From 3ccf0b317c3976c658470ee1df47f76a4487ff3c Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Thu, 31 Mar 2011 21:32:12 +0000
Subject: [PATCH] Creating llvm - final tag.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_29@128672 91177308-0d34-0410-b5e6-96231b3b80d8
---
 final/.gitignore                              |    37 +
 final/CMakeLists.txt                          |   296 +
 final/CREDITS.TXT                             |   362 +
 final/LICENSE.TXT                             |    69 +
 final/Makefile                                |   250 +
 final/Makefile.common                         |    70 +
 final/Makefile.config.in                      |   359 +
 final/Makefile.rules                          |  2278 ++
 final/ModuleInfo.txt                          |     4 +
 final/README.txt                              |    16 +
 final/autoconf/AutoRegen.sh                   |    58 +
 final/autoconf/ExportMap.map                  |     7 +
 final/autoconf/LICENSE.TXT                    |    24 +
 final/autoconf/README.TXT                     |    49 +
 final/autoconf/config.guess                   |  1498 +
 final/autoconf/config.sub                     |  1702 ++
 final/autoconf/configure.ac                   |  1724 ++
 final/autoconf/depcomp                        |   522 +
 final/autoconf/install-sh                     |   322 +
 final/autoconf/ltmain.sh                      |  6863 +++++
 final/autoconf/m4/build_exeext.m4             |    42 +
 final/autoconf/m4/c_printf_a.m4               |    31 +
 final/autoconf/m4/check_gnu_make.m4           |    26 +
 final/autoconf/m4/config_makefile.m4          |     9 +
 final/autoconf/m4/config_project.m4           |    14 +
 final/autoconf/m4/cxx_flag_check.m4           |     2 +
 final/autoconf/m4/find_std_program.m4         |   118 +
 final/autoconf/m4/func_isinf.m4               |    36 +
 final/autoconf/m4/func_isnan.m4               |    27 +
 final/autoconf/m4/func_mmap_file.m4           |    26 +
 final/autoconf/m4/header_mmap_anonymous.m4    |    21 +
 final/autoconf/m4/huge_val.m4                 |    20 +
 final/autoconf/m4/libtool.m4                  |  6389 ++++
 final/autoconf/m4/link_options.m4             |   108 +
 final/autoconf/m4/linux_mixed_64_32.m4        |    17 +
 final/autoconf/m4/ltdl.m4                     |   418 +
 final/autoconf/m4/need_dev_zero_for_mmap.m4   |    17 +
 final/autoconf/m4/path_perl.m4                |    16 +
 final/autoconf/m4/path_tclsh.m4               |    39 +
 final/autoconf/m4/rand48.m4                   |    12 +
 final/autoconf/m4/sanity_check.m4             |    31 +
 final/autoconf/m4/single_cxx_check.m4         |    10 +
 .../autoconf/m4/visibility_inlines_hidden.m4  |    22 +
 final/autoconf/missing                        |   353 +
 final/autoconf/mkinstalldirs                  |   150 +
 final/bindings/Makefile                       |    16 +
 final/bindings/README.txt                     |     3 +
 final/bindings/ocaml/Makefile                 |    19 +
 final/bindings/ocaml/Makefile.ocaml           |   415 +
 final/bindings/ocaml/analysis/Makefile        |    19 +
 .../bindings/ocaml/analysis/analysis_ocaml.c  |    72 +
 .../bindings/ocaml/analysis/llvm_analysis.ml  |    22 +
 .../bindings/ocaml/analysis/llvm_analysis.mli |    46 +
 final/bindings/ocaml/bitreader/Makefile       |    19 +
 .../ocaml/bitreader/bitreader_ocaml.c         |    73 +
 .../ocaml/bitreader/llvm_bitreader.ml         |    20 +
 .../ocaml/bitreader/llvm_bitreader.mli        |    29 +
 final/bindings/ocaml/bitwriter/Makefile       |    19 +
 .../ocaml/bitwriter/bitwriter_ocaml.c         |    45 +
 .../ocaml/bitwriter/llvm_bitwriter.ml         |    25 +
 .../ocaml/bitwriter/llvm_bitwriter.mli        |    30 +
 final/bindings/ocaml/executionengine/Makefile |    19 +
 .../executionengine/executionengine_ocaml.c   |   323 +
 .../executionengine/llvm_executionengine.ml   |   112 +
 .../executionengine/llvm_executionengine.mli  |   163 +
 final/bindings/ocaml/llvm/Makefile            |    19 +
 final/bindings/ocaml/llvm/llvm.ml             |  1064 +
 final/bindings/ocaml/llvm/llvm.mli            |  2259 ++
 final/bindings/ocaml/llvm/llvm_ocaml.c        |  1829 ++
 final/bindings/ocaml/target/Makefile          |    19 +
 final/bindings/ocaml/target/llvm_target.ml    |    44 +
 final/bindings/ocaml/target/llvm_target.mli   |   102 +
 final/bindings/ocaml/target/target_ocaml.c    |   109 +
 final/bindings/ocaml/transforms/Makefile      |    18 +
 .../bindings/ocaml/transforms/scalar/Makefile |    20 +
 .../transforms/scalar/llvm_scalar_opts.ml     |    72 +
 .../transforms/scalar/llvm_scalar_opts.mli    |   118 +
 .../transforms/scalar/scalar_opts_ocaml.c     |   146 +
 final/build-for-llvm-top.sh                   |    68 +
 final/cmake/README                            |     1 +
 final/cmake/config-ix.cmake                   |   385 +
 final/cmake/modules/AddLLVM.cmake             |   136 +
 final/cmake/modules/AddLLVMDefinitions.cmake  |    13 +
 final/cmake/modules/CMakeLists.txt            |    32 +
 final/cmake/modules/CheckAtomic.cmake         |    29 +
 final/cmake/modules/ChooseMSVCCRT.cmake       |   106 +
 final/cmake/modules/CrossCompileLLVM.cmake    |    26 +
 final/cmake/modules/FindBison.cmake           |    52 +
 final/cmake/modules/GetTargetTriple.cmake     |    30 +
 final/cmake/modules/HandleLLVMOptions.cmake   |   185 +
 final/cmake/modules/LLVM.cmake                |    40 +
 final/cmake/modules/LLVMConfig.cmake          |   189 +
 final/cmake/modules/LLVMLibDeps.cmake         |    68 +
 final/cmake/modules/LLVMParseArguments.cmake  |    80 +
 final/cmake/modules/LLVMProcessSources.cmake  |    90 +
 final/cmake/modules/TableGen.cmake            |    46 +
 final/cmake/modules/VersionFromVCS.cmake      |    46 +
 final/configure                               | 24115 ++++++++++++++++
 final/docs/AliasAnalysis.html                 |  1064 +
 final/docs/BitCodeFormat.html                 |  1487 +
 final/docs/Bugpoint.html                      |   250 +
 final/docs/CFEBuildInstrs.html                |    29 +
 final/docs/CMake.html                         |   486 +
 final/docs/CodeGenerator.html                 |  2817 ++
 final/docs/CodingStandards.html               |  1499 +
 final/docs/CommandGuide/FileCheck.pod         |   245 +
 final/docs/CommandGuide/Makefile              |   103 +
 final/docs/CommandGuide/bugpoint.pod          |   171 +
 final/docs/CommandGuide/html/manpage.css      |   256 +
 final/docs/CommandGuide/index.html            |   158 +
 final/docs/CommandGuide/lit.pod               |   354 +
 final/docs/CommandGuide/llc.pod               |   201 +
 final/docs/CommandGuide/lli.pod               |   219 +
 final/docs/CommandGuide/llvm-ar.pod           |   406 +
 final/docs/CommandGuide/llvm-as.pod           |    77 +
 final/docs/CommandGuide/llvm-bcanalyzer.pod   |   315 +
 final/docs/CommandGuide/llvm-config.pod       |   131 +
 final/docs/CommandGuide/llvm-diff.pod         |    53 +
 final/docs/CommandGuide/llvm-dis.pod          |    60 +
 final/docs/CommandGuide/llvm-extract.pod      |    73 +
 final/docs/CommandGuide/llvm-ld.pod           |   234 +
 final/docs/CommandGuide/llvm-link.pod         |    79 +
 final/docs/CommandGuide/llvm-nm.pod           |   122 +
 final/docs/CommandGuide/llvm-prof.pod         |    57 +
 final/docs/CommandGuide/llvm-ranlib.pod       |    52 +
 final/docs/CommandGuide/llvmc.pod             |   190 +
 final/docs/CommandGuide/llvmgcc.pod           |    76 +
 final/docs/CommandGuide/llvmgxx.pod           |    85 +
 final/docs/CommandGuide/manpage.css           |   256 +
 final/docs/CommandGuide/opt.pod               |   143 +
 final/docs/CommandGuide/tblgen.pod            |   115 +
 final/docs/CommandLine.html                   |  1979 ++
 final/docs/CompilerDriver.html                |   756 +
 final/docs/CompilerDriverTutorial.html        |   126 +
 final/docs/CompilerWriterInfo.html            |   263 +
 final/docs/DebuggingJITedCode.html            |   152 +
 final/docs/DeveloperPolicy.html               |   618 +
 final/docs/ExceptionHandling.html             |   644 +
 final/docs/ExtendedIntegerResults.txt         |   133 +
 final/docs/ExtendingLLVM.html                 |   391 +
 final/docs/FAQ.html                           |   938 +
 final/docs/GCCFEBuildInstrs.html              |   279 +
 final/docs/GarbageCollection.html             |  1387 +
 final/docs/GetElementPtr.html                 |   739 +
 final/docs/GettingStarted.html                |  1702 ++
 final/docs/GettingStartedVS.html              |   366 +
 final/docs/GoldPlugin.html                    |   213 +
 .../2000-11-18-EarlyDesignIdeas.txt           |    74 +
 .../2000-11-18-EarlyDesignIdeasResp.txt       |   199 +
 .../2000-12-06-EncodingIdea.txt               |    30 +
 .../2000-12-06-MeetingSummary.txt             |    83 +
 .../2001-01-31-UniversalIRIdea.txt            |    39 +
 .../2001-02-06-TypeNotationDebate.txt         |    67 +
 .../2001-02-06-TypeNotationDebateResp1.txt    |    75 +
 .../2001-02-06-TypeNotationDebateResp2.txt    |    53 +
 .../2001-02-06-TypeNotationDebateResp4.txt    |    89 +
 .../2001-02-09-AdveComments.txt               |   120 +
 .../2001-02-09-AdveCommentsResponse.txt       |   245 +
 .../2001-02-13-Reference-Memory.txt           |    39 +
 .../2001-02-13-Reference-MemoryResponse.txt   |    47 +
 .../2001-04-16-DynamicCompilation.txt         |    49 +
 .../2001-05-18-ExceptionHandling.txt          |   202 +
 .../2001-05-19-ExceptionResponse.txt          |    45 +
 .../2001-06-01-GCCOptimizations.txt           |    63 +
 .../2001-06-01-GCCOptimizations2.txt          |    71 +
 .../2001-06-20-.NET-Differences.txt           |    30 +
 .../2001-07-06-LoweringIRForCodeGen.txt       |    31 +
 .../2001-09-18-OptimizeExceptions.txt         |    56 +
 .../2002-05-12-InstListChange.txt             |    55 +
 .../2002-06-25-MegaPatchInfo.txt              |    72 +
 .../2003-01-23-CygwinNotes.txt                |    28 +
 .../2003-06-25-Reoptimizer1.txt               |   137 +
 .../2003-06-26-Reoptimizer2.txt               |   110 +
 .../2007-OriginalClangReadme.txt              |   178 +
 final/docs/HowToReleaseLLVM.html              |   626 +
 final/docs/HowToSubmitABug.html               |   347 +
 final/docs/LangRef.html                       |  7799 +++++
 final/docs/Lexicon.html                       |   277 +
 final/docs/LinkTimeOptimization.html          |   390 +
 final/docs/Makefile                           |   130 +
 final/docs/MakefileGuide.html                 |  1034 +
 final/docs/Packaging.html                     |   118 +
 final/docs/Passes.html                        |  2249 ++
 final/docs/ProgrammersManual.html             |  3965 +++
 final/docs/Projects.html                      |   460 +
 final/docs/ReleaseNotes.html                  |  1306 +
 final/docs/SourceLevelDebugging.html          |  1783 ++
 final/docs/SystemLibrary.html                 |   319 +
 final/docs/TableGenFundamentals.html          |   911 +
 final/docs/TestingGuide.html                  |  1196 +
 final/docs/UsingLibraries.html                |   443 +
 final/docs/WritingAnLLVMBackend.html          |  2556 ++
 final/docs/WritingAnLLVMPass.html             |  1928 ++
 final/docs/doxygen.cfg.in                     |  1419 +
 final/docs/doxygen.css                        |   378 +
 final/docs/doxygen.footer                     |    13 +
 final/docs/doxygen.header                     |     9 +
 final/docs/doxygen.intro                      |    18 +
 final/docs/img/Debugging.gif                  |   Bin 0 -> 20390 bytes
 final/docs/img/libdeps.gif                    |   Bin 0 -> 52679 bytes
 final/docs/img/lines.gif                      |   Bin 0 -> 91 bytes
 final/docs/img/objdeps.gif                    |   Bin 0 -> 16201 bytes
 final/docs/img/venusflytrap.jpg               |   Bin 0 -> 56606 bytes
 final/docs/index.html                         |   293 +
 final/docs/llvm.css                           |   100 +
 final/docs/re_format.7                        |   756 +
 final/docs/tutorial/LangImpl1.html            |   348 +
 final/docs/tutorial/LangImpl2.html            |  1233 +
 final/docs/tutorial/LangImpl3.html            |  1269 +
 final/docs/tutorial/LangImpl4.html            |  1137 +
 final/docs/tutorial/LangImpl5-cfg.png         |   Bin 0 -> 38586 bytes
 final/docs/tutorial/LangImpl5.html            |  1780 ++
 final/docs/tutorial/LangImpl6.html            |  1817 ++
 final/docs/tutorial/LangImpl7.html            |  2167 ++
 final/docs/tutorial/LangImpl8.html            |   365 +
 final/docs/tutorial/Makefile                  |    28 +
 final/docs/tutorial/OCamlLangImpl1.html       |   365 +
 final/docs/tutorial/OCamlLangImpl2.html       |  1045 +
 final/docs/tutorial/OCamlLangImpl3.html       |  1093 +
 final/docs/tutorial/OCamlLangImpl4.html       |  1029 +
 final/docs/tutorial/OCamlLangImpl5.html       |  1569 +
 final/docs/tutorial/OCamlLangImpl6.html       |  1574 +
 final/docs/tutorial/OCamlLangImpl7.html       |  1907 ++
 final/docs/tutorial/OCamlLangImpl8.html       |   365 +
 final/docs/tutorial/index.html                |    48 +
 final/examples/BrainF/BrainF.cpp              |   468 +
 final/examples/BrainF/BrainF.h                |    94 +
 final/examples/BrainF/BrainFDriver.cpp        |   160 +
 final/examples/BrainF/CMakeLists.txt          |     6 +
 final/examples/BrainF/Makefile                |    15 +
 final/examples/CMakeLists.txt                 |    13 +
 final/examples/ExceptionDemo/CMakeLists.txt   |     6 +
 .../examples/ExceptionDemo/ExceptionDemo.cpp  |  2030 ++
 final/examples/ExceptionDemo/Makefile         |    16 +
 final/examples/Fibonacci/CMakeLists.txt       |     5 +
 final/examples/Fibonacci/Makefile             |    17 +
 final/examples/Fibonacci/fibonacci.cpp        |   137 +
 final/examples/HowToUseJIT/CMakeLists.txt     |     5 +
 final/examples/HowToUseJIT/HowToUseJIT.cpp    |   124 +
 final/examples/HowToUseJIT/Makefile           |    15 +
 final/examples/Kaleidoscope/CMakeLists.txt    |     6 +
 .../Kaleidoscope/Chapter2/CMakeLists.txt      |     3 +
 final/examples/Kaleidoscope/Chapter2/Makefile |    13 +
 final/examples/Kaleidoscope/Chapter2/toy.cpp  |   398 +
 .../Kaleidoscope/Chapter3/CMakeLists.txt      |     5 +
 final/examples/Kaleidoscope/Chapter3/Makefile |    15 +
 final/examples/Kaleidoscope/Chapter3/toy.cpp  |   563 +
 .../Kaleidoscope/Chapter4/CMakeLists.txt      |     5 +
 final/examples/Kaleidoscope/Chapter4/Makefile |    15 +
 final/examples/Kaleidoscope/Chapter4/toy.cpp  |   613 +
 .../Kaleidoscope/Chapter5/CMakeLists.txt      |     5 +
 final/examples/Kaleidoscope/Chapter5/Makefile |    15 +
 final/examples/Kaleidoscope/Chapter5/toy.cpp  |   858 +
 .../Kaleidoscope/Chapter6/CMakeLists.txt      |     5 +
 final/examples/Kaleidoscope/Chapter6/Makefile |    15 +
 final/examples/Kaleidoscope/Chapter6/toy.cpp  |   976 +
 .../Kaleidoscope/Chapter7/CMakeLists.txt      |     6 +
 final/examples/Kaleidoscope/Chapter7/Makefile |    16 +
 final/examples/Kaleidoscope/Chapter7/toy.cpp  |  1142 +
 final/examples/Kaleidoscope/Makefile          |    15 +
 final/examples/Makefile                       |    32 +
 final/examples/ModuleMaker/CMakeLists.txt     |     5 +
 final/examples/ModuleMaker/Makefile           |    14 +
 final/examples/ModuleMaker/ModuleMaker.cpp    |    64 +
 final/examples/ModuleMaker/README.txt         |     8 +
 .../OCaml-Kaleidoscope/Chapter2/Makefile      |    22 +
 .../OCaml-Kaleidoscope/Chapter2/_tags         |     1 +
 .../OCaml-Kaleidoscope/Chapter2/ast.ml        |    25 +
 .../OCaml-Kaleidoscope/Chapter2/lexer.ml      |    52 +
 .../OCaml-Kaleidoscope/Chapter2/parser.ml     |   122 +
 .../OCaml-Kaleidoscope/Chapter2/token.ml      |    15 +
 .../OCaml-Kaleidoscope/Chapter2/toplevel.ml   |    34 +
 .../OCaml-Kaleidoscope/Chapter2/toy.ml        |    21 +
 .../OCaml-Kaleidoscope/Chapter3/Makefile      |    24 +
 .../OCaml-Kaleidoscope/Chapter3/_tags         |     2 +
 .../OCaml-Kaleidoscope/Chapter3/ast.ml        |    25 +
 .../OCaml-Kaleidoscope/Chapter3/codegen.ml    |   100 +
 .../OCaml-Kaleidoscope/Chapter3/lexer.ml      |    52 +
 .../Chapter3/myocamlbuild.ml                  |     6 +
 .../OCaml-Kaleidoscope/Chapter3/parser.ml     |   122 +
 .../OCaml-Kaleidoscope/Chapter3/token.ml      |    15 +
 .../OCaml-Kaleidoscope/Chapter3/toplevel.ml   |    39 +
 .../OCaml-Kaleidoscope/Chapter3/toy.ml        |    26 +
 .../OCaml-Kaleidoscope/Chapter4/Makefile      |    25 +
 .../OCaml-Kaleidoscope/Chapter4/_tags         |     4 +
 .../OCaml-Kaleidoscope/Chapter4/ast.ml        |    25 +
 .../OCaml-Kaleidoscope/Chapter4/bindings.c    |     7 +
 .../OCaml-Kaleidoscope/Chapter4/codegen.ml    |   103 +
 .../OCaml-Kaleidoscope/Chapter4/lexer.ml      |    52 +
 .../Chapter4/myocamlbuild.ml                  |    10 +
 .../OCaml-Kaleidoscope/Chapter4/parser.ml     |   122 +
 .../OCaml-Kaleidoscope/Chapter4/token.ml      |    15 +
 .../OCaml-Kaleidoscope/Chapter4/toplevel.ml   |    49 +
 .../OCaml-Kaleidoscope/Chapter4/toy.ml        |    53 +
 .../OCaml-Kaleidoscope/Chapter5/Makefile      |    25 +
 .../OCaml-Kaleidoscope/Chapter5/_tags         |     4 +
 .../OCaml-Kaleidoscope/Chapter5/ast.ml        |    31 +
 .../OCaml-Kaleidoscope/Chapter5/bindings.c    |     7 +
 .../OCaml-Kaleidoscope/Chapter5/codegen.ml    |   225 +
 .../OCaml-Kaleidoscope/Chapter5/lexer.ml      |    57 +
 .../Chapter5/myocamlbuild.ml                  |    10 +
 .../OCaml-Kaleidoscope/Chapter5/parser.ml     |   158 +
 .../OCaml-Kaleidoscope/Chapter5/token.ml      |    19 +
 .../OCaml-Kaleidoscope/Chapter5/toplevel.ml   |    49 +
 .../OCaml-Kaleidoscope/Chapter5/toy.ml        |    53 +
 .../OCaml-Kaleidoscope/Chapter6/Makefile      |    34 +
 .../OCaml-Kaleidoscope/Chapter6/_tags         |     4 +
 .../OCaml-Kaleidoscope/Chapter6/ast.ml        |    36 +
 .../OCaml-Kaleidoscope/Chapter6/bindings.c    |    13 +
 .../OCaml-Kaleidoscope/Chapter6/codegen.ml    |   251 +
 .../OCaml-Kaleidoscope/Chapter6/lexer.ml      |    59 +
 .../Chapter6/myocamlbuild.ml                  |    10 +
 .../OCaml-Kaleidoscope/Chapter6/parser.ml     |   195 +
 .../OCaml-Kaleidoscope/Chapter6/token.ml      |    22 +
 .../OCaml-Kaleidoscope/Chapter6/toplevel.ml   |    49 +
 .../OCaml-Kaleidoscope/Chapter6/toy.ml        |    53 +
 .../OCaml-Kaleidoscope/Chapter7/Makefile      |    34 +
 .../OCaml-Kaleidoscope/Chapter7/_tags         |     4 +
 .../OCaml-Kaleidoscope/Chapter7/ast.ml        |    39 +
 .../OCaml-Kaleidoscope/Chapter7/bindings.c    |    13 +
 .../OCaml-Kaleidoscope/Chapter7/codegen.ml    |   370 +
 .../OCaml-Kaleidoscope/Chapter7/lexer.ml      |    60 +
 .../Chapter7/myocamlbuild.ml                  |    10 +
 .../OCaml-Kaleidoscope/Chapter7/parser.ml     |   221 +
 .../OCaml-Kaleidoscope/Chapter7/token.ml      |    25 +
 .../OCaml-Kaleidoscope/Chapter7/toplevel.ml   |    49 +
 .../OCaml-Kaleidoscope/Chapter7/toy.ml        |    57 +
 final/examples/OCaml-Kaleidoscope/Makefile    |    15 +
 final/examples/ParallelJIT/CMakeLists.txt     |     9 +
 final/examples/ParallelJIT/Makefile           |    17 +
 final/examples/ParallelJIT/ParallelJIT.cpp    |   304 +
 final/include/llvm-c/Analysis.h               |    55 +
 final/include/llvm-c/BitReader.h              |    66 +
 final/include/llvm-c/BitWriter.h              |    46 +
 final/include/llvm-c/Core.h                   |  1177 +
 final/include/llvm-c/EnhancedDisassembly.h    |   513 +
 final/include/llvm-c/ExecutionEngine.h        |   152 +
 final/include/llvm-c/Initialization.h         |    40 +
 final/include/llvm-c/LinkTimeOptimizer.h      |    58 +
 final/include/llvm-c/Target.h                 |   172 +
 final/include/llvm-c/Transforms/IPO.h         |    76 +
 final/include/llvm-c/Transforms/Scalar.h      |   102 +
 final/include/llvm-c/lto.h                    |   278 +
 final/include/llvm/ADT/APFloat.h              |   452 +
 final/include/llvm/ADT/APInt.h                |  1720 ++
 final/include/llvm/ADT/APSInt.h               |   266 +
 final/include/llvm/ADT/ArrayRef.h             |   137 +
 final/include/llvm/ADT/BitVector.h            |   433 +
 final/include/llvm/ADT/DAGDeltaAlgorithm.h    |    75 +
 final/include/llvm/ADT/DeltaAlgorithm.h       |    91 +
 final/include/llvm/ADT/DenseMap.h             |   533 +
 final/include/llvm/ADT/DenseMapInfo.h         |   165 +
 final/include/llvm/ADT/DenseSet.h             |   129 +
 final/include/llvm/ADT/DepthFirstIterator.h   |   268 +
 final/include/llvm/ADT/EquivalenceClasses.h   |   281 +
 final/include/llvm/ADT/FoldingSet.h           |   684 +
 final/include/llvm/ADT/GraphTraits.h          |   103 +
 final/include/llvm/ADT/ImmutableIntervalMap.h |   242 +
 final/include/llvm/ADT/ImmutableList.h        |   222 +
 final/include/llvm/ADT/ImmutableMap.h         |   261 +
 final/include/llvm/ADT/ImmutableSet.h         |  1084 +
 final/include/llvm/ADT/InMemoryStruct.h       |    77 +
 final/include/llvm/ADT/IndexedMap.h           |    87 +
 final/include/llvm/ADT/IntEqClasses.h         |    88 +
 final/include/llvm/ADT/IntervalMap.h          |  2139 ++
 final/include/llvm/ADT/IntrusiveRefCntPtr.h   |   230 +
 final/include/llvm/ADT/NullablePtr.h          |    52 +
 final/include/llvm/ADT/Optional.h             |   120 +
 final/include/llvm/ADT/OwningPtr.h            |   133 +
 final/include/llvm/ADT/PointerIntPair.h       |   163 +
 final/include/llvm/ADT/PointerUnion.h         |   445 +
 final/include/llvm/ADT/PostOrderIterator.h    |   230 +
 final/include/llvm/ADT/PriorityQueue.h        |    84 +
 final/include/llvm/ADT/SCCIterator.h          |   220 +
 final/include/llvm/ADT/STLExtras.h            |   306 +
 final/include/llvm/ADT/ScopedHashTable.h      |   256 +
 final/include/llvm/ADT/SetOperations.h        |    71 +
 final/include/llvm/ADT/SetVector.h            |   178 +
 final/include/llvm/ADT/SmallBitVector.h       |   461 +
 final/include/llvm/ADT/SmallPtrSet.h          |   294 +
 final/include/llvm/ADT/SmallSet.h             |   118 +
 final/include/llvm/ADT/SmallString.h          |    72 +
 final/include/llvm/ADT/SmallVector.h          |   752 +
 final/include/llvm/ADT/SparseBitVector.h      |   906 +
 final/include/llvm/ADT/Statistic.h            |   135 +
 final/include/llvm/ADT/StringExtras.h         |   171 +
 final/include/llvm/ADT/StringMap.h            |   493 +
 final/include/llvm/ADT/StringRef.h            |   458 +
 final/include/llvm/ADT/StringSet.h            |    38 +
 final/include/llvm/ADT/StringSwitch.h         |   126 +
 final/include/llvm/ADT/Trie.h                 |   336 +
 final/include/llvm/ADT/Triple.h               |   333 +
 final/include/llvm/ADT/Twine.h                |   465 +
 final/include/llvm/ADT/UniqueVector.h         |    89 +
 final/include/llvm/ADT/ValueMap.h             |   368 +
 final/include/llvm/ADT/VectorExtras.h         |    41 +
 final/include/llvm/ADT/ilist.h                |   708 +
 final/include/llvm/ADT/ilist_node.h           |   106 +
 final/include/llvm/AbstractTypeUser.h         |   205 +
 final/include/llvm/Analysis/AliasAnalysis.h   |   507 +
 final/include/llvm/Analysis/AliasSetTracker.h |   440 +
 final/include/llvm/Analysis/CFGPrinter.h      |   111 +
 final/include/llvm/Analysis/CallGraph.h       |   375 +
 final/include/llvm/Analysis/CaptureTracking.h |    33 +
 final/include/llvm/Analysis/CodeMetrics.h     |    95 +
 final/include/llvm/Analysis/ConstantFolding.h |    82 +
 .../include/llvm/Analysis/ConstantsScanner.h  |    93 +
 final/include/llvm/Analysis/DIBuilder.h       |   464 +
 .../llvm/Analysis/DOTGraphTraitsPass.h        |    83 +
 final/include/llvm/Analysis/DebugInfo.h       |   742 +
 final/include/llvm/Analysis/DomPrinter.h      |    30 +
 .../include/llvm/Analysis/DominanceFrontier.h |   189 +
 .../llvm/Analysis/DominatorInternals.h        |   289 +
 final/include/llvm/Analysis/Dominators.h      |   876 +
 final/include/llvm/Analysis/FindUsedTypes.h   |    66 +
 final/include/llvm/Analysis/IVUsers.h         |   175 +
 final/include/llvm/Analysis/InlineCost.h      |   182 +
 .../llvm/Analysis/InstructionSimplify.h       |   143 +
 final/include/llvm/Analysis/Interval.h        |   153 +
 .../include/llvm/Analysis/IntervalIterator.h  |   259 +
 .../include/llvm/Analysis/IntervalPartition.h |   111 +
 final/include/llvm/Analysis/LazyValueInfo.h   |    81 +
 .../llvm/Analysis/LibCallAliasAnalysis.h      |    73 +
 .../include/llvm/Analysis/LibCallSemantics.h  |   167 +
 final/include/llvm/Analysis/Lint.h            |    51 +
 final/include/llvm/Analysis/Loads.h           |    51 +
 .../llvm/Analysis/LoopDependenceAnalysis.h    |   124 +
 final/include/llvm/Analysis/LoopInfo.h        |  1100 +
 final/include/llvm/Analysis/LoopPass.h        |   158 +
 final/include/llvm/Analysis/MemoryBuiltins.h  |    84 +
 .../llvm/Analysis/MemoryDependenceAnalysis.h  |   378 +
 final/include/llvm/Analysis/PHITransAddr.h    |   117 +
 final/include/llvm/Analysis/Passes.h          |   198 +
 final/include/llvm/Analysis/PathNumbering.h   |   304 +
 final/include/llvm/Analysis/PathProfileInfo.h |   113 +
 final/include/llvm/Analysis/PostDominators.h  |   137 +
 final/include/llvm/Analysis/ProfileInfo.h     |   248 +
 .../include/llvm/Analysis/ProfileInfoLoader.h |    84 +
 .../include/llvm/Analysis/ProfileInfoTypes.h  |    60 +
 final/include/llvm/Analysis/RegionInfo.h      |   683 +
 final/include/llvm/Analysis/RegionIterator.h  |   342 +
 final/include/llvm/Analysis/RegionPass.h      |   126 +
 final/include/llvm/Analysis/RegionPrinter.h   |    26 +
 final/include/llvm/Analysis/ScalarEvolution.h |   748 +
 .../llvm/Analysis/ScalarEvolutionExpander.h   |   215 +
 .../Analysis/ScalarEvolutionExpressions.h     |   491 +
 .../Analysis/ScalarEvolutionNormalization.h   |    78 +
 .../include/llvm/Analysis/SparsePropagation.h |   206 +
 final/include/llvm/Analysis/Trace.h           |   119 +
 final/include/llvm/Analysis/ValueTracking.h   |   163 +
 final/include/llvm/Analysis/Verifier.h        |    75 +
 final/include/llvm/Argument.h                 |    88 +
 .../llvm/Assembly/AssemblyAnnotationWriter.h  |    63 +
 final/include/llvm/Assembly/Parser.h          |    65 +
 final/include/llvm/Assembly/PrintModulePass.h |    42 +
 final/include/llvm/Assembly/Writer.h          |    78 +
 final/include/llvm/Attributes.h               |   290 +
 final/include/llvm/AutoUpgrade.h              |    48 +
 final/include/llvm/BasicBlock.h               |   269 +
 final/include/llvm/Bitcode/Archive.h          |   546 +
 final/include/llvm/Bitcode/BitCodes.h         |   185 +
 final/include/llvm/Bitcode/BitstreamReader.h  |   642 +
 final/include/llvm/Bitcode/BitstreamWriter.h  |   533 +
 final/include/llvm/Bitcode/LLVMBitCodes.h     |   266 +
 final/include/llvm/Bitcode/ReaderWriter.h     |   145 +
 final/include/llvm/CMakeLists.txt             |    21 +
 final/include/llvm/CallGraphSCCPass.h         |   104 +
 final/include/llvm/CallingConv.h              |   103 +
 final/include/llvm/CodeGen/Analysis.h         |    85 +
 final/include/llvm/CodeGen/AsmPrinter.h       |   462 +
 final/include/llvm/CodeGen/BinaryObject.h     |   353 +
 final/include/llvm/CodeGen/CalcSpillWeights.h |    83 +
 final/include/llvm/CodeGen/CallingConvLower.h |   300 +
 final/include/llvm/CodeGen/EdgeBundles.h      |    61 +
 final/include/llvm/CodeGen/FastISel.h         |   348 +
 .../llvm/CodeGen/FunctionLoweringInfo.h       |   224 +
 final/include/llvm/CodeGen/GCMetadata.h       |   193 +
 .../include/llvm/CodeGen/GCMetadataPrinter.h  |    73 +
 final/include/llvm/CodeGen/GCStrategy.h       |   142 +
 final/include/llvm/CodeGen/GCs.h              |    35 +
 final/include/llvm/CodeGen/ISDOpcodes.h       |   799 +
 .../include/llvm/CodeGen/IntrinsicLowering.h  |    59 +
 final/include/llvm/CodeGen/JITCodeEmitter.h   |   343 +
 .../llvm/CodeGen/LatencyPriorityQueue.h       |   100 +
 .../llvm/CodeGen/LinkAllAsmWriterComponents.h |    37 +
 .../llvm/CodeGen/LinkAllCodegenComponents.h   |    59 +
 final/include/llvm/CodeGen/LiveInterval.h     |   578 +
 .../llvm/CodeGen/LiveIntervalAnalysis.h       |   465 +
 .../include/llvm/CodeGen/LiveStackAnalysis.h  |    97 +
 final/include/llvm/CodeGen/LiveVariables.h    |   316 +
 final/include/llvm/CodeGen/MachORelocation.h  |    56 +
 .../include/llvm/CodeGen/MachineBasicBlock.h  |   493 +
 .../include/llvm/CodeGen/MachineCodeEmitter.h |   332 +
 final/include/llvm/CodeGen/MachineCodeInfo.h  |    53 +
 .../llvm/CodeGen/MachineConstantPool.h        |   170 +
 .../include/llvm/CodeGen/MachineDominators.h  |   202 +
 final/include/llvm/CodeGen/MachineFrameInfo.h |   551 +
 final/include/llvm/CodeGen/MachineFunction.h  |   478 +
 .../llvm/CodeGen/MachineFunctionAnalysis.h    |    54 +
 .../llvm/CodeGen/MachineFunctionPass.h        |    59 +
 final/include/llvm/CodeGen/MachineInstr.h     |   578 +
 .../llvm/CodeGen/MachineInstrBuilder.h        |   258 +
 .../llvm/CodeGen/MachineJumpTableInfo.h       |   125 +
 final/include/llvm/CodeGen/MachineLocation.h  |    98 +
 final/include/llvm/CodeGen/MachineLoopInfo.h  |   178 +
 .../include/llvm/CodeGen/MachineLoopRanges.h  |   112 +
 .../include/llvm/CodeGen/MachineMemOperand.h  |   165 +
 .../include/llvm/CodeGen/MachineModuleInfo.h  |   366 +
 .../llvm/CodeGen/MachineModuleInfoImpls.h     |    97 +
 final/include/llvm/CodeGen/MachineOperand.h   |   564 +
 .../llvm/CodeGen/MachinePassRegistry.h        |   156 +
 .../llvm/CodeGen/MachineRegisterInfo.h        |   404 +
 .../include/llvm/CodeGen/MachineRelocation.h  |   342 +
 .../include/llvm/CodeGen/MachineSSAUpdater.h  |   115 +
 .../include/llvm/CodeGen/ObjectCodeEmitter.h  |   171 +
 final/include/llvm/CodeGen/PBQP/Graph.h       |   425 +
 .../include/llvm/CodeGen/PBQP/HeuristicBase.h |   246 +
 .../llvm/CodeGen/PBQP/HeuristicSolver.h       |   616 +
 .../llvm/CodeGen/PBQP/Heuristics/Briggs.h     |   464 +
 final/include/llvm/CodeGen/PBQP/Math.h        |   288 +
 final/include/llvm/CodeGen/PBQP/Solution.h    |    94 +
 final/include/llvm/CodeGen/Passes.h           |   241 +
 .../llvm/CodeGen/ProcessImplicitDefs.h        |    45 +
 .../include/llvm/CodeGen/PseudoSourceValue.h  |   112 +
 final/include/llvm/CodeGen/RegAllocPBQP.h     |   167 +
 final/include/llvm/CodeGen/RegAllocRegistry.h |    66 +
 .../include/llvm/CodeGen/RegisterCoalescer.h  |   244 +
 .../include/llvm/CodeGen/RegisterScavenging.h |   168 +
 final/include/llvm/CodeGen/RuntimeLibcalls.h  |   313 +
 final/include/llvm/CodeGen/ScheduleDAG.h      |   718 +
 .../llvm/CodeGen/ScheduleHazardRecognizer.h   |    93 +
 .../include/llvm/CodeGen/SchedulerRegistry.h  |   111 +
 .../llvm/CodeGen/ScoreboardHazardRecognizer.h |   129 +
 final/include/llvm/CodeGen/SelectionDAG.h     |  1051 +
 final/include/llvm/CodeGen/SelectionDAGISel.h |   313 +
 .../include/llvm/CodeGen/SelectionDAGNodes.h  |  1781 ++
 final/include/llvm/CodeGen/SlotIndexes.h      |   728 +
 .../CodeGen/TargetLoweringObjectFileImpl.h    |   206 +
 final/include/llvm/CodeGen/ValueTypes.h       |   690 +
 final/include/llvm/CodeGen/ValueTypes.td      |    78 +
 final/include/llvm/CompilerDriver/Action.h    |    54 +
 .../llvm/CompilerDriver/AutoGenerated.h       |    40 +
 .../llvm/CompilerDriver/BuiltinOptions.h      |    39 +
 final/include/llvm/CompilerDriver/Common.td   |   124 +
 .../llvm/CompilerDriver/CompilationGraph.h    |   330 +
 final/include/llvm/CompilerDriver/Error.h     |    29 +
 final/include/llvm/CompilerDriver/Main.h      |    21 +
 final/include/llvm/CompilerDriver/Main.inc    |    23 +
 final/include/llvm/CompilerDriver/Tool.h      |   100 +
 final/include/llvm/Config/AsmParsers.def.in   |    29 +
 final/include/llvm/Config/AsmPrinters.def.in  |    29 +
 .../include/llvm/Config/Disassemblers.def.in  |    29 +
 final/include/llvm/Config/Targets.def.in      |    28 +
 final/include/llvm/Config/config.h.cmake      |   737 +
 final/include/llvm/Config/config.h.in         |   687 +
 final/include/llvm/Config/llvm-config.h.cmake |   100 +
 final/include/llvm/Config/llvm-config.h.in    |    97 +
 final/include/llvm/Constant.h                 |   164 +
 final/include/llvm/Constants.h                |   971 +
 final/include/llvm/DerivedTypes.h             |   513 +
 .../llvm/ExecutionEngine/ExecutionEngine.h    |   574 +
 .../llvm/ExecutionEngine/GenericValue.h       |    44 +
 .../llvm/ExecutionEngine/Interpreter.h        |    38 +
 final/include/llvm/ExecutionEngine/JIT.h      |    38 +
 .../llvm/ExecutionEngine/JITEventListener.h   |    82 +
 .../llvm/ExecutionEngine/JITMemoryManager.h   |   179 +
 final/include/llvm/ExecutionEngine/MCJIT.h    |    38 +
 final/include/llvm/Function.h                 |   437 +
 final/include/llvm/GVMaterializer.h           |    66 +
 final/include/llvm/GlobalAlias.h              |   100 +
 final/include/llvm/GlobalValue.h              |   296 +
 final/include/llvm/GlobalVariable.h           |   180 +
 final/include/llvm/InitializePasses.h         |   232 +
 final/include/llvm/InlineAsm.h                |   256 +
 final/include/llvm/InstrTypes.h               |   854 +
 final/include/llvm/Instruction.def            |   196 +
 final/include/llvm/Instruction.h              |   390 +
 final/include/llvm/Instructions.h             |  3186 ++
 final/include/llvm/IntrinsicInst.h            |   306 +
 final/include/llvm/Intrinsics.h               |    80 +
 final/include/llvm/Intrinsics.td              |   493 +
 final/include/llvm/IntrinsicsARM.td           |   391 +
 final/include/llvm/IntrinsicsAlpha.td         |    18 +
 final/include/llvm/IntrinsicsCellSPU.td       |   242 +
 final/include/llvm/IntrinsicsPTX.td           |    32 +
 final/include/llvm/IntrinsicsPowerPC.td       |   465 +
 final/include/llvm/IntrinsicsX86.td           |  1584 +
 final/include/llvm/IntrinsicsXCore.td         |    55 +
 final/include/llvm/LLVMContext.h              |   107 +
 final/include/llvm/LinkAllPasses.h            |   165 +
 final/include/llvm/LinkAllVMCore.h            |    53 +
 final/include/llvm/Linker.h                   |   299 +
 final/include/llvm/MC/EDInstInfo.h            |    29 +
 final/include/llvm/MC/MCAsmInfo.h             |   479 +
 final/include/llvm/MC/MCAsmInfoCOFF.h         |    24 +
 final/include/llvm/MC/MCAsmInfoDarwin.h       |    32 +
 final/include/llvm/MC/MCAsmLayout.h           |   104 +
 final/include/llvm/MC/MCAssembler.h           |   916 +
 final/include/llvm/MC/MCCodeEmitter.h         |    42 +
 final/include/llvm/MC/MCContext.h             |   322 +
 final/include/llvm/MC/MCDirectives.h          |    56 +
 final/include/llvm/MC/MCDisassembler.h        |    60 +
 final/include/llvm/MC/MCDwarf.h               |   279 +
 final/include/llvm/MC/MCELFObjectWriter.h     |    47 +
 final/include/llvm/MC/MCELFSymbolFlags.h      |    57 +
 final/include/llvm/MC/MCExpr.h                |   432 +
 final/include/llvm/MC/MCFixup.h               |    97 +
 final/include/llvm/MC/MCFixupKindInfo.h       |    43 +
 final/include/llvm/MC/MCInst.h                |   170 +
 final/include/llvm/MC/MCInstPrinter.h         |    51 +
 final/include/llvm/MC/MCLabel.h               |    56 +
 final/include/llvm/MC/MCMachOSymbolFlags.h    |    46 +
 final/include/llvm/MC/MCMachObjectWriter.h    |    65 +
 final/include/llvm/MC/MCObjectStreamer.h      |    83 +
 final/include/llvm/MC/MCObjectWriter.h        |   198 +
 final/include/llvm/MC/MCParser/AsmCond.h      |    40 +
 final/include/llvm/MC/MCParser/AsmLexer.h     |    70 +
 final/include/llvm/MC/MCParser/MCAsmLexer.h   |   181 +
 final/include/llvm/MC/MCParser/MCAsmParser.h  |   137 +
 .../llvm/MC/MCParser/MCAsmParserExtension.h   |    80 +
 .../llvm/MC/MCParser/MCParsedAsmOperand.h     |    37 +
 final/include/llvm/MC/MCSection.h             |    76 +
 final/include/llvm/MC/MCSectionCOFF.h         |    69 +
 final/include/llvm/MC/MCSectionELF.h          |    87 +
 final/include/llvm/MC/MCSectionMachO.h        |   179 +
 final/include/llvm/MC/MCStreamer.h            |   544 +
 final/include/llvm/MC/MCSymbol.h              |   163 +
 final/include/llvm/MC/MCValue.h               |    87 +
 final/include/llvm/MC/SectionKind.h           |   240 +
 final/include/llvm/Metadata.h                 |   237 +
 final/include/llvm/Module.h                   |   556 +
 final/include/llvm/Object/MachOFormat.h       |   367 +
 final/include/llvm/Object/MachOObject.h       |   180 +
 final/include/llvm/Object/ObjectFile.h        |   262 +
 final/include/llvm/OperandTraits.h            |   197 +
 final/include/llvm/Operator.h                 |   285 +
 final/include/llvm/Pass.h                     |   371 +
 final/include/llvm/PassAnalysisSupport.h      |   250 +
 final/include/llvm/PassManager.h              |   111 +
 final/include/llvm/PassManagers.h             |   460 +
 final/include/llvm/PassRegistry.h             |    84 +
 final/include/llvm/PassSupport.h              |   336 +
 final/include/llvm/Support/AIXDataTypesFix.h  |    25 +
 final/include/llvm/Support/AlignOf.h          |    60 +
 final/include/llvm/Support/Allocator.h        |   239 +
 final/include/llvm/Support/Atomic.h           |    39 +
 final/include/llvm/Support/CFG.h              |   341 +
 final/include/llvm/Support/COFF.h             |   298 +
 final/include/llvm/Support/CallSite.h         |   301 +
 final/include/llvm/Support/Casting.h          |   241 +
 final/include/llvm/Support/CommandLine.h      |  1391 +
 final/include/llvm/Support/Compiler.h         |   129 +
 final/include/llvm/Support/ConstantFolder.h   |   228 +
 final/include/llvm/Support/ConstantRange.h    |   265 +
 .../llvm/Support/CrashRecoveryContext.h       |    92 +
 final/include/llvm/Support/DOTGraphTraits.h   |   161 +
 final/include/llvm/Support/DataFlow.h         |   103 +
 final/include/llvm/Support/DataTypes.h.cmake  |   189 +
 final/include/llvm/Support/DataTypes.h.in     |   111 +
 final/include/llvm/Support/Debug.h            |   101 +
 final/include/llvm/Support/DebugLoc.h         |    80 +
 final/include/llvm/Support/Disassembler.h     |    35 +
 final/include/llvm/Support/Dwarf.h            |   677 +
 final/include/llvm/Support/DynamicLibrary.h   |    86 +
 final/include/llvm/Support/ELF.h              |   824 +
 final/include/llvm/Support/Endian.h           |   213 +
 final/include/llvm/Support/Errno.h            |    34 +
 final/include/llvm/Support/ErrorHandling.h    |   103 +
 final/include/llvm/Support/FEnv.h             |    56 +
 final/include/llvm/Support/FileSystem.h       |   690 +
 final/include/llvm/Support/FileUtilities.h    |    72 +
 final/include/llvm/Support/Format.h           |   154 +
 final/include/llvm/Support/FormattedStream.h  |   154 +
 .../llvm/Support/GetElementPtrTypeIterator.h  |   113 +
 final/include/llvm/Support/GraphWriter.h      |   357 +
 final/include/llvm/Support/Host.h             |    66 +
 final/include/llvm/Support/IRBuilder.h        |  1224 +
 final/include/llvm/Support/IRReader.h         |   110 +
 final/include/llvm/Support/IncludeFile.h      |    79 +
 final/include/llvm/Support/InstIterator.h     |   147 +
 final/include/llvm/Support/InstVisitor.h      |   217 +
 final/include/llvm/Support/LICENSE.TXT        |     6 +
 final/include/llvm/Support/LeakDetector.h     |    92 +
 final/include/llvm/Support/MachO.h            |   696 +
 final/include/llvm/Support/ManagedStatic.h    |   110 +
 final/include/llvm/Support/MathExtras.h       |   460 +
 final/include/llvm/Support/Memory.h           |    96 +
 final/include/llvm/Support/MemoryBuffer.h     |   119 +
 final/include/llvm/Support/MemoryObject.h     |    70 +
 final/include/llvm/Support/Mutex.h            |   154 +
 final/include/llvm/Support/MutexGuard.h       |    41 +
 final/include/llvm/Support/NoFolder.h         |   288 +
 final/include/llvm/Support/OutputBuffer.h     |   166 +
 final/include/llvm/Support/PassNameParser.h   |   137 +
 final/include/llvm/Support/Path.h             |    16 +
 final/include/llvm/Support/PathV1.h           |   755 +
 final/include/llvm/Support/PathV2.h           |   347 +
 final/include/llvm/Support/PatternMatch.h     |   665 +
 final/include/llvm/Support/PluginLoader.h     |    37 +
 .../llvm/Support/PointerLikeTypeTraits.h      |    81 +
 .../include/llvm/Support/PredIteratorCache.h  |    70 +
 final/include/llvm/Support/PrettyStackTrace.h |    71 +
 final/include/llvm/Support/Process.h          |   146 +
 final/include/llvm/Support/Program.h          |   157 +
 final/include/llvm/Support/RWMutex.h          |   173 +
 final/include/llvm/Support/Recycler.h         |   117 +
 .../include/llvm/Support/RecyclingAllocator.h |    73 +
 final/include/llvm/Support/Regex.h            |    81 +
 final/include/llvm/Support/Registry.h         |   223 +
 final/include/llvm/Support/RegistryParser.h   |    55 +
 final/include/llvm/Support/SMLoc.h            |    44 +
 final/include/llvm/Support/Signals.h          |    59 +
 final/include/llvm/Support/Solaris.h          |    40 +
 final/include/llvm/Support/SourceMgr.h        |   185 +
 final/include/llvm/Support/StandardPasses.h   |   242 +
 final/include/llvm/Support/StringPool.h       |   139 +
 final/include/llvm/Support/SwapByteOrder.h    |   101 +
 final/include/llvm/Support/SystemUtils.h      |    44 +
 final/include/llvm/Support/TargetFolder.h     |   242 +
 final/include/llvm/Support/ThreadLocal.h      |    54 +
 final/include/llvm/Support/Threading.h        |    59 +
 final/include/llvm/Support/TimeValue.h        |   382 +
 final/include/llvm/Support/Timer.h            |   194 +
 final/include/llvm/Support/ToolOutputFile.h   |    62 +
 final/include/llvm/Support/TypeBuilder.h      |   399 +
 final/include/llvm/Support/Valgrind.h         |    32 +
 final/include/llvm/Support/ValueHandle.h      |   408 +
 .../llvm/Support/circular_raw_ostream.h       |   171 +
 final/include/llvm/Support/raw_os_ostream.h   |    42 +
 final/include/llvm/Support/raw_ostream.h      |   491 +
 final/include/llvm/Support/system_error.h     |   910 +
 final/include/llvm/Support/type_traits.h      |   126 +
 final/include/llvm/SymbolTableListTraits.h    |    79 +
 final/include/llvm/Target/Mangler.h           |    75 +
 final/include/llvm/Target/SubtargetFeature.h  |   119 +
 final/include/llvm/Target/Target.td           |   711 +
 final/include/llvm/Target/TargetAsmBackend.h  |   123 +
 final/include/llvm/Target/TargetAsmInfo.h     |    75 +
 final/include/llvm/Target/TargetAsmLexer.h    |    89 +
 final/include/llvm/Target/TargetAsmParser.h   |    90 +
 final/include/llvm/Target/TargetCallingConv.h |   140 +
 .../include/llvm/Target/TargetCallingConv.td  |   135 +
 final/include/llvm/Target/TargetData.h        |   336 +
 .../include/llvm/Target/TargetELFWriterInfo.h |   123 +
 .../include/llvm/Target/TargetFrameLowering.h |   196 +
 final/include/llvm/Target/TargetInstrDesc.h   |   513 +
 final/include/llvm/Target/TargetInstrInfo.h   |   713 +
 .../llvm/Target/TargetInstrItineraries.h      |   248 +
 .../include/llvm/Target/TargetIntrinsicInfo.h |    64 +
 final/include/llvm/Target/TargetJITInfo.h     |   142 +
 final/include/llvm/Target/TargetLibraryInfo.h |    75 +
 final/include/llvm/Target/TargetLowering.h    |  1864 ++
 .../llvm/Target/TargetLoweringObjectFile.h    |   240 +
 final/include/llvm/Target/TargetMachine.h     |   390 +
 final/include/llvm/Target/TargetOpcodes.h     |    86 +
 final/include/llvm/Target/TargetOptions.h     |   162 +
 .../include/llvm/Target/TargetRegisterInfo.h  |   828 +
 final/include/llvm/Target/TargetRegistry.h    |   779 +
 final/include/llvm/Target/TargetSchedule.td   |   130 +
 final/include/llvm/Target/TargetSelect.h      |   125 +
 .../include/llvm/Target/TargetSelectionDAG.td |   893 +
 .../llvm/Target/TargetSelectionDAGInfo.h      |   101 +
 final/include/llvm/Target/TargetSubtarget.h   |    67 +
 final/include/llvm/Transforms/IPO.h           |   212 +
 .../include/llvm/Transforms/IPO/InlinerPass.h |    97 +
 .../include/llvm/Transforms/Instrumentation.h |    33 +
 final/include/llvm/Transforms/Scalar.h        |   348 +
 .../llvm/Transforms/Utils/AddrModeMatcher.h   |   108 +
 .../llvm/Transforms/Utils/BasicBlockUtils.h   |   186 +
 .../llvm/Transforms/Utils/BasicInliner.h      |    55 +
 .../llvm/Transforms/Utils/BuildLibCalls.h     |   110 +
 final/include/llvm/Transforms/Utils/Cloning.h |   219 +
 .../llvm/Transforms/Utils/FunctionUtils.h     |    41 +
 final/include/llvm/Transforms/Utils/Local.h   |   162 +
 .../llvm/Transforms/Utils/PromoteMemToReg.h   |    45 +
 .../llvm/Transforms/Utils/SSAUpdater.h        |   163 +
 .../llvm/Transforms/Utils/SSAUpdaterImpl.h    |   469 +
 .../Transforms/Utils/UnifyFunctionExitNodes.h |    51 +
 .../llvm/Transforms/Utils/UnrollLoop.h        |    29 +
 .../llvm/Transforms/Utils/ValueMapper.h       |    50 +
 final/include/llvm/Type.h                     |   564 +
 final/include/llvm/TypeSymbolTable.h          |   152 +
 final/include/llvm/Use.h                      |   222 +
 final/include/llvm/User.h                     |   175 +
 final/include/llvm/Value.h                    |   409 +
 final/include/llvm/ValueSymbolTable.h         |   133 +
 final/lib/Analysis/AliasAnalysis.cpp          |   383 +
 final/lib/Analysis/AliasAnalysisCounter.cpp   |   173 +
 final/lib/Analysis/AliasAnalysisEvaluator.cpp |   304 +
 final/lib/Analysis/AliasDebugger.cpp          |   138 +
 final/lib/Analysis/AliasSetTracker.cpp        |   648 +
 final/lib/Analysis/Analysis.cpp               |   103 +
 final/lib/Analysis/BasicAliasAnalysis.cpp     |  1170 +
 final/lib/Analysis/CFGPrinter.cpp             |   165 +
 final/lib/Analysis/CMakeLists.txt             |    59 +
 final/lib/Analysis/CaptureTracking.cpp        |   147 +
 final/lib/Analysis/ConstantFolding.cpp        |  1400 +
 final/lib/Analysis/DIBuilder.cpp              |   801 +
 final/lib/Analysis/DbgInfoPrinter.cpp         |   224 +
 final/lib/Analysis/DebugInfo.cpp              |   948 +
 final/lib/Analysis/DomPrinter.cpp             |   232 +
 final/lib/Analysis/DominanceFrontier.cpp      |   137 +
 final/lib/Analysis/IPA/CMakeLists.txt         |     7 +
 final/lib/Analysis/IPA/CallGraph.cpp          |   340 +
 final/lib/Analysis/IPA/CallGraphSCCPass.cpp   |   608 +
 final/lib/Analysis/IPA/FindUsedTypes.cpp      |   103 +
 final/lib/Analysis/IPA/GlobalsModRef.cpp      |   609 +
 final/lib/Analysis/IPA/IPA.cpp                |    29 +
 final/lib/Analysis/IPA/Makefile               |    15 +
 final/lib/Analysis/IVUsers.cpp                |   263 +
 final/lib/Analysis/InlineCost.cpp             |   648 +
 final/lib/Analysis/InstCount.cpp              |    87 +
 final/lib/Analysis/InstructionSimplify.cpp    |  2170 ++
 final/lib/Analysis/Interval.cpp               |    58 +
 final/lib/Analysis/IntervalPartition.cpp      |   114 +
 final/lib/Analysis/LazyValueInfo.cpp          |  1125 +
 final/lib/Analysis/LibCallAliasAnalysis.cpp   |   137 +
 final/lib/Analysis/LibCallSemantics.cpp       |    63 +
 final/lib/Analysis/Lint.cpp                   |   658 +
 final/lib/Analysis/Loads.cpp                  |   235 +
 final/lib/Analysis/LoopDependenceAnalysis.cpp |   358 +
 final/lib/Analysis/LoopInfo.cpp               |   419 +
 final/lib/Analysis/LoopPass.cpp               |   403 +
 final/lib/Analysis/Makefile                   |    16 +
 final/lib/Analysis/MemDepPrinter.cpp          |   167 +
 final/lib/Analysis/MemoryBuiltins.cpp         |   207 +
 .../lib/Analysis/MemoryDependenceAnalysis.cpp |  1324 +
 final/lib/Analysis/ModuleDebugInfoPrinter.cpp |    87 +
 final/lib/Analysis/NoAliasAnalysis.cpp        |    88 +
 final/lib/Analysis/PHITransAddr.cpp           |   441 +
 final/lib/Analysis/PathNumbering.cpp          |   525 +
 final/lib/Analysis/PathProfileInfo.cpp        |   434 +
 final/lib/Analysis/PathProfileVerifier.cpp    |   207 +
 final/lib/Analysis/PostDominators.cpp         |   102 +
 final/lib/Analysis/ProfileEstimatorPass.cpp   |   426 +
 final/lib/Analysis/ProfileInfo.cpp            |  1105 +
 final/lib/Analysis/ProfileInfoLoader.cpp      |   158 +
 final/lib/Analysis/ProfileInfoLoaderPass.cpp  |   267 +
 final/lib/Analysis/ProfileVerifierPass.cpp    |   382 +
 final/lib/Analysis/README.txt                 |    30 +
 final/lib/Analysis/RegionInfo.cpp             |   851 +
 final/lib/Analysis/RegionPass.cpp             |   275 +
 final/lib/Analysis/RegionPrinter.cpp          |   220 +
 final/lib/Analysis/ScalarEvolution.cpp        |  6317 ++++
 .../Analysis/ScalarEvolutionAliasAnalysis.cpp |   173 +
 .../lib/Analysis/ScalarEvolutionExpander.cpp  |  1376 +
 .../Analysis/ScalarEvolutionNormalization.cpp |   183 +
 final/lib/Analysis/SparsePropagation.cpp      |   347 +
 final/lib/Analysis/Trace.cpp                  |    51 +
 final/lib/Analysis/TypeBasedAliasAnalysis.cpp |   299 +
 final/lib/Analysis/ValueTracking.cpp          |  1743 ++
 final/lib/Archive/Archive.cpp                 |   261 +
 final/lib/Archive/ArchiveInternals.h          |    89 +
 final/lib/Archive/ArchiveReader.cpp           |   630 +
 final/lib/Archive/ArchiveWriter.cpp           |   489 +
 final/lib/Archive/CMakeLists.txt              |     5 +
 final/lib/Archive/Makefile                    |    17 +
 final/lib/AsmParser/CMakeLists.txt            |     6 +
 final/lib/AsmParser/LLLexer.cpp               |   874 +
 final/lib/AsmParser/LLLexer.h                 |    90 +
 final/lib/AsmParser/LLParser.cpp              |  3976 +++
 final/lib/AsmParser/LLParser.h                |   378 +
 final/lib/AsmParser/LLToken.h                 |   148 +
 final/lib/AsmParser/Makefile                  |    14 +
 final/lib/AsmParser/Parser.cpp                |    62 +
 final/lib/Bitcode/CMakeLists.txt              |     2 +
 final/lib/Bitcode/Makefile                    |    14 +
 final/lib/Bitcode/Reader/BitReader.cpp        |    88 +
 final/lib/Bitcode/Reader/BitcodeReader.cpp    |  2690 ++
 final/lib/Bitcode/Reader/BitcodeReader.h      |   282 +
 final/lib/Bitcode/Reader/CMakeLists.txt       |     4 +
 final/lib/Bitcode/Reader/Makefile             |    15 +
 final/lib/Bitcode/Writer/BitWriter.cpp        |    40 +
 final/lib/Bitcode/Writer/BitcodeWriter.cpp    |  1672 ++
 .../lib/Bitcode/Writer/BitcodeWriterPass.cpp  |    41 +
 final/lib/Bitcode/Writer/CMakeLists.txt       |     6 +
 final/lib/Bitcode/Writer/Makefile             |    15 +
 final/lib/Bitcode/Writer/ValueEnumerator.cpp  |   511 +
 final/lib/Bitcode/Writer/ValueEnumerator.h    |   156 +
 final/lib/CMakeLists.txt                      |    14 +
 .../lib/CodeGen/AggressiveAntiDepBreaker.cpp  |   963 +
 final/lib/CodeGen/AggressiveAntiDepBreaker.h  |   184 +
 final/lib/CodeGen/AllocationOrder.cpp         |    68 +
 final/lib/CodeGen/AllocationOrder.h           |    56 +
 final/lib/CodeGen/Analysis.cpp                |   303 +
 final/lib/CodeGen/AntiDepBreaker.h            |    59 +
 final/lib/CodeGen/AsmPrinter/ARMException.cpp |    87 +
 final/lib/CodeGen/AsmPrinter/AsmPrinter.cpp   |  1904 ++
 .../CodeGen/AsmPrinter/AsmPrinterDwarf.cpp    |   318 +
 .../AsmPrinter/AsmPrinterInlineAsm.cpp        |   409 +
 final/lib/CodeGen/AsmPrinter/CMakeLists.txt   |    13 +
 final/lib/CodeGen/AsmPrinter/DIE.cpp          |   368 +
 final/lib/CodeGen/AsmPrinter/DIE.h            |   434 +
 .../CodeGen/AsmPrinter/DwarfCFIException.cpp  |   138 +
 final/lib/CodeGen/AsmPrinter/DwarfDebug.cpp   |  3792 +++
 final/lib/CodeGen/AsmPrinter/DwarfDebug.h     |   602 +
 .../lib/CodeGen/AsmPrinter/DwarfException.cpp |   676 +
 final/lib/CodeGen/AsmPrinter/DwarfException.h |   274 +
 .../AsmPrinter/DwarfTableException.cpp        |   349 +
 final/lib/CodeGen/AsmPrinter/Makefile         |    13 +
 .../lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp |   166 +
 final/lib/CodeGen/BranchFolding.cpp           |  1341 +
 final/lib/CodeGen/BranchFolding.h             |   116 +
 final/lib/CodeGen/CMakeLists.txt              |    99 +
 final/lib/CodeGen/CalcSpillWeights.cpp        |   227 +
 final/lib/CodeGen/CallingConvLower.cpp        |   177 +
 final/lib/CodeGen/CodeGen.cpp                 |    61 +
 final/lib/CodeGen/CodePlacementOpt.cpp        |   425 +
 final/lib/CodeGen/CriticalAntiDepBreaker.cpp  |   668 +
 final/lib/CodeGen/CriticalAntiDepBreaker.h    |   106 +
 .../CodeGen/DeadMachineInstructionElim.cpp    |   196 +
 final/lib/CodeGen/DwarfEHPrepare.cpp          |   670 +
 final/lib/CodeGen/ELF.h                       |   227 +
 final/lib/CodeGen/ELFCodeEmitter.cpp          |   205 +
 final/lib/CodeGen/ELFCodeEmitter.h            |    78 +
 final/lib/CodeGen/ELFWriter.cpp               |  1103 +
 final/lib/CodeGen/ELFWriter.h                 |   251 +
 final/lib/CodeGen/EdgeBundles.cpp             |    86 +
 final/lib/CodeGen/ExpandISelPseudos.cpp       |    82 +
 final/lib/CodeGen/GCMetadata.cpp              |   213 +
 final/lib/CodeGen/GCMetadataPrinter.cpp       |    27 +
 final/lib/CodeGen/GCStrategy.cpp              |   416 +
 final/lib/CodeGen/IfConversion.cpp            |  1528 +
 final/lib/CodeGen/InlineSpiller.cpp           |   439 +
 final/lib/CodeGen/IntrinsicLowering.cpp       |   564 +
 final/lib/CodeGen/LLVMTargetMachine.cpp       |   453 +
 final/lib/CodeGen/LatencyPriorityQueue.cpp    |   152 +
 final/lib/CodeGen/LiveDebugVariables.cpp      |   711 +
 final/lib/CodeGen/LiveDebugVariables.h        |    63 +
 final/lib/CodeGen/LiveInterval.cpp            |   801 +
 final/lib/CodeGen/LiveIntervalAnalysis.cpp    |  2162 ++
 final/lib/CodeGen/LiveIntervalUnion.cpp       |   315 +
 final/lib/CodeGen/LiveIntervalUnion.h         |   258 +
 final/lib/CodeGen/LiveRangeEdit.cpp           |   130 +
 final/lib/CodeGen/LiveRangeEdit.h             |   135 +
 final/lib/CodeGen/LiveStackAnalysis.cpp       |    82 +
 final/lib/CodeGen/LiveVariables.cpp           |   772 +
 .../lib/CodeGen/LocalStackSlotAllocation.cpp  |   359 +
 final/lib/CodeGen/LowerSubregs.cpp            |   223 +
 final/lib/CodeGen/MachineBasicBlock.cpp       |   692 +
 final/lib/CodeGen/MachineCSE.cpp              |   531 +
 final/lib/CodeGen/MachineDominators.cpp       |    59 +
 final/lib/CodeGen/MachineFunction.cpp         |   752 +
 final/lib/CodeGen/MachineFunctionAnalysis.cpp |    58 +
 final/lib/CodeGen/MachineFunctionPass.cpp     |    56 +
 .../CodeGen/MachineFunctionPrinterPass.cpp    |    60 +
 final/lib/CodeGen/MachineInstr.cpp            |  1671 ++
 final/lib/CodeGen/MachineLICM.cpp             |  1202 +
 final/lib/CodeGen/MachineLoopInfo.cpp         |    83 +
 final/lib/CodeGen/MachineLoopRanges.cpp       |   116 +
 final/lib/CodeGen/MachineModuleInfo.cpp       |   567 +
 final/lib/CodeGen/MachineModuleInfoImpls.cpp  |    45 +
 final/lib/CodeGen/MachinePassRegistry.cpp     |    41 +
 final/lib/CodeGen/MachineRegisterInfo.cpp     |   241 +
 final/lib/CodeGen/MachineSSAUpdater.cpp       |   372 +
 final/lib/CodeGen/MachineSink.cpp             |   607 +
 final/lib/CodeGen/MachineVerifier.cpp         |  1216 +
 final/lib/CodeGen/Makefile                    |    22 +
 final/lib/CodeGen/ObjectCodeEmitter.cpp       |   141 +
 final/lib/CodeGen/OcamlGC.cpp                 |    37 +
 final/lib/CodeGen/OptimizePHIs.cpp            |   190 +
 final/lib/CodeGen/PHIElimination.cpp          |   427 +
 final/lib/CodeGen/PHIEliminationUtils.cpp     |    61 +
 final/lib/CodeGen/PHIEliminationUtils.h       |    25 +
 final/lib/CodeGen/Passes.cpp                  |    68 +
 final/lib/CodeGen/PeepholeOptimizer.cpp       |   372 +
 final/lib/CodeGen/PostRASchedulerList.cpp     |   694 +
 final/lib/CodeGen/PreAllocSplitting.cpp       |  1430 +
 final/lib/CodeGen/ProcessImplicitDefs.cpp     |   298 +
 final/lib/CodeGen/PrologEpilogInserter.cpp    |   850 +
 final/lib/CodeGen/PrologEpilogInserter.h      |   177 +
 final/lib/CodeGen/PseudoSourceValue.cpp       |   134 +
 final/lib/CodeGen/README.txt                  |   199 +
 final/lib/CodeGen/RegAllocBase.h              |   182 +
 final/lib/CodeGen/RegAllocBasic.cpp           |   537 +
 final/lib/CodeGen/RegAllocFast.cpp            |  1076 +
 final/lib/CodeGen/RegAllocGreedy.cpp          |  1286 +
 final/lib/CodeGen/RegAllocLinearScan.cpp      |  1538 +
 final/lib/CodeGen/RegAllocPBQP.cpp            |   720 +
 final/lib/CodeGen/RegisterCoalescer.cpp       |   197 +
 final/lib/CodeGen/RegisterScavenging.cpp      |   389 +
 final/lib/CodeGen/RenderMachineFunction.cpp   |  1014 +
 final/lib/CodeGen/RenderMachineFunction.h     |   338 +
 final/lib/CodeGen/ScheduleDAG.cpp             |   595 +
 final/lib/CodeGen/ScheduleDAGEmit.cpp         |    67 +
 final/lib/CodeGen/ScheduleDAGInstrs.cpp       |   693 +
 final/lib/CodeGen/ScheduleDAGInstrs.h         |   207 +
 final/lib/CodeGen/ScheduleDAGPrinter.cpp      |    99 +
 .../CodeGen/ScoreboardHazardRecognizer.cpp    |   243 +
 final/lib/CodeGen/SelectionDAG/CMakeLists.txt |    23 +
 .../lib/CodeGen/SelectionDAG/DAGCombiner.cpp  |  7641 +++++
 final/lib/CodeGen/SelectionDAG/FastISel.cpp   |  1251 +
 .../SelectionDAG/FunctionLoweringInfo.cpp     |   477 +
 .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp |   897 +
 final/lib/CodeGen/SelectionDAG/InstrEmitter.h |   142 +
 .../lib/CodeGen/SelectionDAG/LegalizeDAG.cpp  |  3577 +++
 .../SelectionDAG/LegalizeFloatTypes.cpp       |  1428 +
 .../SelectionDAG/LegalizeIntegerTypes.cpp     |  2615 ++
 .../CodeGen/SelectionDAG/LegalizeTypes.cpp    |  1153 +
 .../lib/CodeGen/SelectionDAG/LegalizeTypes.h  |   747 +
 .../SelectionDAG/LegalizeTypesGeneric.cpp     |   480 +
 .../SelectionDAG/LegalizeVectorOps.cpp        |   290 +
 .../SelectionDAG/LegalizeVectorTypes.cpp      |  2585 ++
 final/lib/CodeGen/SelectionDAG/Makefile       |    13 +
 .../lib/CodeGen/SelectionDAG/SDNodeDbgValue.h |   114 +
 .../lib/CodeGen/SelectionDAG/SDNodeOrdering.h |    54 +
 .../CodeGen/SelectionDAG/ScheduleDAGFast.cpp  |   636 +
 .../CodeGen/SelectionDAG/ScheduleDAGList.cpp  |   265 +
 .../SelectionDAG/ScheduleDAGRRList.cpp        |  2715 ++
 .../SelectionDAG/ScheduleDAGSDNodes.cpp       |   763 +
 .../CodeGen/SelectionDAG/ScheduleDAGSDNodes.h |   151 +
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  6623 +++++
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  6484 +++++
 .../SelectionDAG/SelectionDAGBuilder.h        |   543 +
 .../CodeGen/SelectionDAG/SelectionDAGISel.cpp |  2783 ++
 .../SelectionDAG/SelectionDAGPrinter.cpp      |   302 +
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  3214 ++
 .../SelectionDAG/TargetSelectionDAGInfo.cpp   |    23 +
 final/lib/CodeGen/ShadowStackGC.cpp           |   451 +
 final/lib/CodeGen/ShrinkWrapping.cpp          |  1152 +
 .../lib/CodeGen/SimpleRegisterCoalescing.cpp  |  1789 ++
 final/lib/CodeGen/SimpleRegisterCoalescing.h  |   193 +
 final/lib/CodeGen/SjLjEHPrepare.cpp           |   592 +
 final/lib/CodeGen/SlotIndexes.cpp             |   181 +
 final/lib/CodeGen/SpillPlacement.cpp          |   324 +
 final/lib/CodeGen/SpillPlacement.h            |   113 +
 final/lib/CodeGen/Spiller.cpp                 |   243 +
 final/lib/CodeGen/Spiller.h                   |    56 +
 final/lib/CodeGen/SplitKit.cpp                |   921 +
 final/lib/CodeGen/SplitKit.h                  |   346 +
 final/lib/CodeGen/Splitter.cpp                |   827 +
 final/lib/CodeGen/Splitter.h                  |   101 +
 final/lib/CodeGen/StackProtector.cpp          |   262 +
 final/lib/CodeGen/StackSlotColoring.cpp       |   768 +
 final/lib/CodeGen/StrongPHIElimination.cpp    |   829 +
 final/lib/CodeGen/TailDuplication.cpp         |   658 +
 final/lib/CodeGen/TargetInstrInfoImpl.cpp     |   449 +
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  |  1026 +
 .../lib/CodeGen/TwoAddressInstructionPass.cpp |  1527 +
 final/lib/CodeGen/UnreachableBlockElim.cpp    |   212 +
 final/lib/CodeGen/VirtRegMap.cpp              |   354 +
 final/lib/CodeGen/VirtRegMap.h                |   523 +
 final/lib/CodeGen/VirtRegRewriter.cpp         |  2604 ++
 final/lib/CodeGen/VirtRegRewriter.h           |    32 +
 final/lib/CompilerDriver/Action.cpp           |   134 +
 final/lib/CompilerDriver/BuiltinOptions.cpp   |    61 +
 final/lib/CompilerDriver/CMakeLists.txt       |    10 +
 final/lib/CompilerDriver/CompilationGraph.cpp |   655 +
 final/lib/CompilerDriver/Main.cpp             |   146 +
 final/lib/CompilerDriver/Makefile             |    20 +
 final/lib/CompilerDriver/Tool.cpp             |    95 +
 final/lib/ExecutionEngine/CMakeLists.txt      |     8 +
 final/lib/ExecutionEngine/ExecutionEngine.cpp |  1103 +
 .../ExecutionEngineBindings.cpp               |   254 +
 .../Interpreter/CMakeLists.txt                |    17 +
 .../ExecutionEngine/Interpreter/Execution.cpp |  1350 +
 .../Interpreter/ExternalFunctions.cpp         |   492 +
 .../Interpreter/Interpreter.cpp               |    98 +
 .../ExecutionEngine/Interpreter/Interpreter.h |   242 +
 .../lib/ExecutionEngine/Interpreter/Makefile  |    13 +
 final/lib/ExecutionEngine/JIT/CMakeLists.txt  |    13 +
 final/lib/ExecutionEngine/JIT/Intercept.cpp   |   161 +
 final/lib/ExecutionEngine/JIT/JIT.cpp         |   843 +
 final/lib/ExecutionEngine/JIT/JIT.h           |   237 +
 .../JIT/JITDebugRegisterer.cpp                |   212 +
 .../ExecutionEngine/JIT/JITDebugRegisterer.h  |   116 +
 .../ExecutionEngine/JIT/JITDwarfEmitter.cpp   |   598 +
 .../lib/ExecutionEngine/JIT/JITDwarfEmitter.h |    73 +
 final/lib/ExecutionEngine/JIT/JITEmitter.cpp  |  1305 +
 .../ExecutionEngine/JIT/JITMemoryManager.cpp  |   727 +
 final/lib/ExecutionEngine/JIT/Makefile        |    38 +
 .../JIT/OProfileJITEventListener.cpp          |   192 +
 .../lib/ExecutionEngine/JIT/TargetSelect.cpp  |    91 +
 .../lib/ExecutionEngine/MCJIT/CMakeLists.txt  |     4 +
 final/lib/ExecutionEngine/MCJIT/MCJIT.cpp     |    92 +
 final/lib/ExecutionEngine/MCJIT/MCJIT.h       |    68 +
 final/lib/ExecutionEngine/MCJIT/Makefile      |    13 +
 .../ExecutionEngine/MCJIT/TargetSelect.cpp    |    91 +
 final/lib/ExecutionEngine/Makefile            |    13 +
 final/lib/Linker/CMakeLists.txt               |     6 +
 final/lib/Linker/LinkArchives.cpp             |   198 +
 final/lib/Linker/LinkItems.cpp                |   241 +
 final/lib/Linker/LinkModules.cpp              |  1301 +
 final/lib/Linker/Linker.cpp                   |   174 +
 final/lib/Linker/Makefile                     |    15 +
 final/lib/MC/CMakeLists.txt                   |    40 +
 final/lib/MC/ELFObjectWriter.cpp              |  1494 +
 final/lib/MC/ELFObjectWriter.h                |   391 +
 final/lib/MC/MCAsmInfo.cpp                    |   108 +
 final/lib/MC/MCAsmInfoCOFF.cpp                |    37 +
 final/lib/MC/MCAsmInfoDarwin.cpp              |    59 +
 final/lib/MC/MCAsmStreamer.cpp                |   974 +
 final/lib/MC/MCAssembler.cpp                  |   949 +
 final/lib/MC/MCCodeEmitter.cpp                |    18 +
 final/lib/MC/MCContext.cpp                    |   312 +
 final/lib/MC/MCDisassembler.cpp               |    14 +
 final/lib/MC/MCDisassembler/CMakeLists.txt    |     7 +
 .../lib/MC/MCDisassembler/EDDisassembler.cpp  |   402 +
 final/lib/MC/MCDisassembler/EDDisassembler.h  |   273 +
 final/lib/MC/MCDisassembler/EDInfo.h          |    73 +
 final/lib/MC/MCDisassembler/EDInst.cpp        |   209 +
 final/lib/MC/MCDisassembler/EDInst.h          |   182 +
 final/lib/MC/MCDisassembler/EDOperand.cpp     |   293 +
 final/lib/MC/MCDisassembler/EDOperand.h       |    91 +
 final/lib/MC/MCDisassembler/EDToken.cpp       |   210 +
 final/lib/MC/MCDisassembler/EDToken.h         |   139 +
 final/lib/MC/MCDisassembler/Makefile          |    14 +
 final/lib/MC/MCDwarf.cpp                      |   814 +
 final/lib/MC/MCELF.cpp                        |    72 +
 final/lib/MC/MCELF.h                          |    35 +
 final/lib/MC/MCELFObjectTargetWriter.cpp      |    23 +
 final/lib/MC/MCELFStreamer.cpp                |   383 +
 final/lib/MC/MCELFStreamer.h                  |   268 +
 final/lib/MC/MCExpr.cpp                       |   556 +
 final/lib/MC/MCInst.cpp                       |    66 +
 final/lib/MC/MCInstPrinter.cpp                |    26 +
 final/lib/MC/MCLabel.cpp                      |    21 +
 final/lib/MC/MCLoggingStreamer.cpp            |   248 +
 final/lib/MC/MCMachOStreamer.cpp              |   405 +
 final/lib/MC/MCMachObjectTargetWriter.cpp     |    22 +
 final/lib/MC/MCNullStreamer.cpp               |   104 +
 final/lib/MC/MCObjectStreamer.cpp             |   270 +
 final/lib/MC/MCObjectWriter.cpp               |    80 +
 final/lib/MC/MCParser/AsmLexer.cpp            |   430 +
 final/lib/MC/MCParser/AsmParser.cpp           |  2497 ++
 final/lib/MC/MCParser/CMakeLists.txt          |    11 +
 final/lib/MC/MCParser/COFFAsmParser.cpp       |   144 +
 final/lib/MC/MCParser/DarwinAsmParser.cpp     |   661 +
 final/lib/MC/MCParser/ELFAsmParser.cpp        |   533 +
 final/lib/MC/MCParser/MCAsmLexer.cpp          |    27 +
 final/lib/MC/MCParser/MCAsmParser.cpp         |    44 +
 .../lib/MC/MCParser/MCAsmParserExtension.cpp  |    22 +
 final/lib/MC/MCParser/Makefile                |    15 +
 final/lib/MC/MCParser/TargetAsmParser.cpp     |    19 +
 final/lib/MC/MCPureStreamer.cpp               |   234 +
 final/lib/MC/MCSection.cpp                    |    22 +
 final/lib/MC/MCSectionCOFF.cpp                |    84 +
 final/lib/MC/MCSectionELF.cpp                 |   150 +
 final/lib/MC/MCSectionMachO.cpp               |   299 +
 final/lib/MC/MCStreamer.cpp                   |   315 +
 final/lib/MC/MCSymbol.cpp                     |    80 +
 final/lib/MC/MCValue.cpp                      |    36 +
 final/lib/MC/MachObjectWriter.cpp             |  1598 +
 final/lib/MC/Makefile                         |    16 +
 final/lib/MC/TargetAsmBackend.cpp             |    37 +
 final/lib/MC/WinCOFFObjectWriter.cpp          |   877 +
 final/lib/MC/WinCOFFStreamer.cpp              |   395 +
 final/lib/Makefile                            |    17 +
 final/lib/Object/CMakeLists.txt               |     6 +
 final/lib/Object/COFFObjectFile.cpp           |   375 +
 final/lib/Object/ELFObjectFile.cpp            |   686 +
 final/lib/Object/MachOObject.cpp              |   342 +
 final/lib/Object/Makefile                     |    14 +
 final/lib/Object/ObjectFile.cpp               |    71 +
 final/lib/Support/APFloat.cpp                 |  3564 +++
 final/lib/Support/APInt.cpp                   |  2904 ++
 final/lib/Support/APSInt.cpp                  |    23 +
 final/lib/Support/Allocator.cpp               |   180 +
 final/lib/Support/Atomic.cpp                  |   112 +
 final/lib/Support/CMakeLists.txt              |   103 +
 final/lib/Support/COPYRIGHT.regex             |    54 +
 final/lib/Support/CommandLine.cpp             |  1295 +
 final/lib/Support/ConstantRange.cpp           |   702 +
 final/lib/Support/CrashRecoveryContext.cpp    |   230 +
 final/lib/Support/DAGDeltaAlgorithm.cpp       |   357 +
 final/lib/Support/Debug.cpp                   |   134 +
 final/lib/Support/DeltaAlgorithm.cpp          |   114 +
 final/lib/Support/Disassembler.cpp            |    75 +
 final/lib/Support/Dwarf.cpp                   |   652 +
 final/lib/Support/DynamicLibrary.cpp          |   170 +
 final/lib/Support/Errno.cpp                   |    74 +
 final/lib/Support/ErrorHandling.cpp           |   100 +
 final/lib/Support/FileUtilities.cpp           |   281 +
 final/lib/Support/FoldingSet.cpp              |   421 +
 final/lib/Support/FormattedStream.cpp         |   101 +
 final/lib/Support/GraphWriter.cpp             |   200 +
 final/lib/Support/Host.cpp                    |   307 +
 final/lib/Support/IncludeFile.cpp             |    20 +
 final/lib/Support/IntEqClasses.cpp            |    70 +
 final/lib/Support/IntervalMap.cpp             |   161 +
 final/lib/Support/IsInf.cpp                   |    49 +
 final/lib/Support/IsNAN.cpp                   |    33 +
 final/lib/Support/Makefile                    |    22 +
 final/lib/Support/ManagedStatic.cpp           |    75 +
 final/lib/Support/Memory.cpp                  |    74 +
 final/lib/Support/MemoryBuffer.cpp            |   298 +
 final/lib/Support/MemoryObject.cpp            |    34 +
 final/lib/Support/Mutex.cpp                   |   157 +
 final/lib/Support/Path.cpp                    |   283 +
 final/lib/Support/PathV2.cpp                  |   774 +
 final/lib/Support/PluginLoader.cpp            |    47 +
 final/lib/Support/PrettyStackTrace.cpp        |   133 +
 final/lib/Support/Process.cpp                 |    33 +
 final/lib/Support/Program.cpp                 |    56 +
 final/lib/Support/README.txt.system           |    43 +
 final/lib/Support/RWMutex.cpp                 |   157 +
 final/lib/Support/Regex.cpp                   |   168 +
 .../SearchForAddressOfSpecialSymbol.cpp       |    73 +
 final/lib/Support/Signals.cpp                 |    34 +
 final/lib/Support/SmallPtrSet.cpp             |   226 +
 final/lib/Support/SmallVector.cpp             |    40 +
 final/lib/Support/SourceMgr.cpp               |   230 +
 final/lib/Support/Statistic.cpp               |   152 +
 final/lib/Support/StringExtras.cpp            |    81 +
 final/lib/Support/StringMap.cpp               |   215 +
 final/lib/Support/StringPool.cpp              |    35 +
 final/lib/Support/StringRef.cpp               |   415 +
 final/lib/Support/SystemUtils.cpp             |    55 +
 final/lib/Support/TargetRegistry.cpp          |    92 +
 final/lib/Support/ThreadLocal.cpp             |    84 +
 final/lib/Support/Threading.cpp               |   116 +
 final/lib/Support/TimeValue.cpp               |    57 +
 final/lib/Support/Timer.cpp                   |   393 +
 final/lib/Support/ToolOutputFile.cpp          |    43 +
 final/lib/Support/Triple.cpp                  |   640 +
 final/lib/Support/Twine.cpp                   |   160 +
 final/lib/Support/Unix/Host.inc               |    97 +
 final/lib/Support/Unix/Memory.inc             |   151 +
 final/lib/Support/Unix/Mutex.inc              |    43 +
 final/lib/Support/Unix/Path.inc               |   887 +
 final/lib/Support/Unix/PathV2.inc             |   507 +
 final/lib/Support/Unix/Process.inc            |   295 +
 final/lib/Support/Unix/Program.inc            |   424 +
 final/lib/Support/Unix/README.txt             |    16 +
 final/lib/Support/Unix/RWMutex.inc            |    43 +
 final/lib/Support/Unix/Signals.inc            |   303 +
 final/lib/Support/Unix/ThreadLocal.inc        |    26 +
 final/lib/Support/Unix/TimeValue.inc          |    56 +
 final/lib/Support/Unix/Unix.h                 |    87 +
 final/lib/Support/Unix/system_error.inc       |    34 +
 final/lib/Support/Valgrind.cpp                |    54 +
 final/lib/Support/Windows/DynamicLibrary.inc  |   166 +
 final/lib/Support/Windows/Host.inc            |    23 +
 final/lib/Support/Windows/Memory.inc          |    73 +
 final/lib/Support/Windows/Mutex.inc           |    58 +
 final/lib/Support/Windows/Path.inc            |   931 +
 final/lib/Support/Windows/PathV2.inc          |   750 +
 final/lib/Support/Windows/Process.inc         |   222 +
 final/lib/Support/Windows/Program.inc         |   403 +
 final/lib/Support/Windows/RWMutex.inc         |    58 +
 final/lib/Support/Windows/Signals.inc         |   328 +
 final/lib/Support/Windows/ThreadLocal.inc     |    54 +
 final/lib/Support/Windows/TimeValue.inc       |    51 +
 final/lib/Support/Windows/Windows.h           |   120 +
 .../lib/Support/Windows/explicit_symbols.inc  |    66 +
 final/lib/Support/Windows/system_error.inc    |   142 +
 final/lib/Support/circular_raw_ostream.cpp    |    45 +
 final/lib/Support/raw_os_ostream.cpp          |    30 +
 final/lib/Support/raw_ostream.cpp             |   763 +
 final/lib/Support/regcclass.h                 |    70 +
 final/lib/Support/regcname.h                  |   139 +
 final/lib/Support/regcomp.c                   |  1525 +
 final/lib/Support/regengine.inc               |  1034 +
 final/lib/Support/regerror.c                  |   135 +
 final/lib/Support/regex2.h                    |   157 +
 final/lib/Support/regex_impl.h                |   108 +
 final/lib/Support/regexec.c                   |   162 +
 final/lib/Support/regfree.c                   |    72 +
 final/lib/Support/regstrlcpy.c                |    52 +
 final/lib/Support/regutils.h                  |    53 +
 final/lib/Support/system_error.cpp            |   130 +
 final/lib/Target/ARM/ARM.h                    |    63 +
 final/lib/Target/ARM/ARM.td                   |   215 +
 final/lib/Target/ARM/ARMAddressingModes.h     |   585 +
 final/lib/Target/ARM/ARMAsmBackend.cpp        |   512 +
 final/lib/Target/ARM/ARMAsmPrinter.cpp        |  1725 ++
 final/lib/Target/ARM/ARMAsmPrinter.h          |   114 +
 final/lib/Target/ARM/ARMBaseInfo.h            |   249 +
 final/lib/Target/ARM/ARMBaseInstrInfo.cpp     |  2331 ++
 final/lib/Target/ARM/ARMBaseInstrInfo.h       |   528 +
 final/lib/Target/ARM/ARMBaseRegisterInfo.cpp  |  1248 +
 final/lib/Target/ARM/ARMBaseRegisterInfo.h    |   209 +
 final/lib/Target/ARM/ARMBuildAttrs.h          |   131 +
 final/lib/Target/ARM/ARMCallingConv.h         |   160 +
 final/lib/Target/ARM/ARMCallingConv.td        |   164 +
 final/lib/Target/ARM/ARMCodeEmitter.cpp       |  1887 ++
 .../lib/Target/ARM/ARMConstantIslandPass.cpp  |  1900 ++
 final/lib/Target/ARM/ARMConstantPoolValue.cpp |   130 +
 final/lib/Target/ARM/ARMConstantPoolValue.h   |   122 +
 final/lib/Target/ARM/ARMELFWriterInfo.cpp     |    83 +
 final/lib/Target/ARM/ARMELFWriterInfo.h       |    58 +
 final/lib/Target/ARM/ARMExpandPseudoInsts.cpp |  1241 +
 final/lib/Target/ARM/ARMFastISel.cpp          |  1920 ++
 final/lib/Target/ARM/ARMFixupKinds.h          |    97 +
 final/lib/Target/ARM/ARMFrameLowering.cpp     |  1041 +
 final/lib/Target/ARM/ARMFrameLowering.h       |    75 +
 final/lib/Target/ARM/ARMGlobalMerge.cpp       |   223 +
 final/lib/Target/ARM/ARMHazardRecognizer.cpp  |   118 +
 final/lib/Target/ARM/ARMHazardRecognizer.h    |    54 +
 final/lib/Target/ARM/ARMISelDAGToDAG.cpp      |  2869 ++
 final/lib/Target/ARM/ARMISelLowering.cpp      |  7055 +++++
 final/lib/Target/ARM/ARMISelLowering.h        |   490 +
 final/lib/Target/ARM/ARMInstrFormats.td       |  1815 ++
 final/lib/Target/ARM/ARMInstrInfo.cpp         |    61 +
 final/lib/Target/ARM/ARMInstrInfo.h           |    44 +
 final/lib/Target/ARM/ARMInstrInfo.td          |  3940 +++
 final/lib/Target/ARM/ARMInstrNEON.td          |  4852 ++++
 final/lib/Target/ARM/ARMInstrThumb.td         |  1550 +
 final/lib/Target/ARM/ARMInstrThumb2.td        |  3432 +++
 final/lib/Target/ARM/ARMInstrVFP.td           |  1113 +
 final/lib/Target/ARM/ARMJITInfo.cpp           |   336 +
 final/lib/Target/ARM/ARMJITInfo.h             |   183 +
 .../lib/Target/ARM/ARMLoadStoreOptimizer.cpp  |  1838 ++
 final/lib/Target/ARM/ARMMCAsmInfo.cpp         |    80 +
 final/lib/Target/ARM/ARMMCAsmInfo.h           |    31 +
 final/lib/Target/ARM/ARMMCCodeEmitter.cpp     |  1263 +
 final/lib/Target/ARM/ARMMCExpr.cpp            |    73 +
 final/lib/Target/ARM/ARMMCExpr.h              |    73 +
 final/lib/Target/ARM/ARMMCInstLower.cpp       |   115 +
 final/lib/Target/ARM/ARMMachineFunctionInfo.h |   250 +
 final/lib/Target/ARM/ARMPerfectShuffle.h      |  6586 +++++
 final/lib/Target/ARM/ARMRegisterInfo.cpp      |    40 +
 final/lib/Target/ARM/ARMRegisterInfo.h        |    33 +
 final/lib/Target/ARM/ARMRegisterInfo.td       |   559 +
 final/lib/Target/ARM/ARMRelocations.h         |    62 +
 final/lib/Target/ARM/ARMSchedule.td           |   261 +
 final/lib/Target/ARM/ARMScheduleA8.td         |  1034 +
 final/lib/Target/ARM/ARMScheduleA9.td         |  1824 ++
 final/lib/Target/ARM/ARMScheduleV6.td         |   294 +
 final/lib/Target/ARM/ARMSelectionDAGInfo.cpp  |   134 +
 final/lib/Target/ARM/ARMSelectionDAGInfo.h    |    42 +
 final/lib/Target/ARM/ARMSubtarget.cpp         |   254 +
 final/lib/Target/ARM/ARMSubtarget.h           |   234 +
 final/lib/Target/ARM/ARMTargetMachine.cpp     |   202 +
 final/lib/Target/ARM/ARMTargetMachine.h       |   143 +
 final/lib/Target/ARM/ARMTargetObjectFile.cpp  |    47 +
 final/lib/Target/ARM/ARMTargetObjectFile.h    |    38 +
 .../lib/Target/ARM/AsmParser/ARMAsmLexer.cpp  |   152 +
 .../lib/Target/ARM/AsmParser/ARMAsmParser.cpp |  1866 ++
 final/lib/Target/ARM/AsmParser/CMakeLists.txt |     7 +
 final/lib/Target/ARM/AsmParser/Makefile       |    15 +
 final/lib/Target/ARM/CMakeLists.txt           |    69 +
 .../ARM/Disassembler/ARMDisassembler.cpp      |   548 +
 .../Target/ARM/Disassembler/ARMDisassembler.h |    99 +
 .../ARM/Disassembler/ARMDisassemblerCore.cpp  |  3247 +++
 .../ARM/Disassembler/ARMDisassemblerCore.h    |   262 +
 .../Target/ARM/Disassembler/CMakeLists.txt    |    14 +
 final/lib/Target/ARM/Disassembler/Makefile    |    16 +
 .../ARM/Disassembler/ThumbDisassemblerCore.h  |  2251 ++
 .../Target/ARM/InstPrinter/ARMInstPrinter.cpp |   714 +
 .../Target/ARM/InstPrinter/ARMInstPrinter.h   |   112 +
 .../lib/Target/ARM/InstPrinter/CMakeLists.txt |     6 +
 final/lib/Target/ARM/InstPrinter/Makefile     |    15 +
 final/lib/Target/ARM/MLxExpansionPass.cpp     |   315 +
 final/lib/Target/ARM/Makefile                 |    25 +
 final/lib/Target/ARM/NEONMoveFix.cpp          |   148 +
 final/lib/Target/ARM/README-Thumb.txt         |   267 +
 final/lib/Target/ARM/README-Thumb2.txt        |     6 +
 final/lib/Target/ARM/README.txt               |   683 +
 .../Target/ARM/TargetInfo/ARMTargetInfo.cpp   |    23 +
 .../lib/Target/ARM/TargetInfo/CMakeLists.txt  |     7 +
 final/lib/Target/ARM/TargetInfo/Makefile      |    15 +
 final/lib/Target/ARM/Thumb1FrameLowering.cpp  |   358 +
 final/lib/Target/ARM/Thumb1FrameLowering.h    |    52 +
 final/lib/Target/ARM/Thumb1InstrInfo.cpp      |   111 +
 final/lib/Target/ARM/Thumb1InstrInfo.h        |    59 +
 final/lib/Target/ARM/Thumb1RegisterInfo.cpp   |   705 +
 final/lib/Target/ARM/Thumb1RegisterInfo.h     |    64 +
 final/lib/Target/ARM/Thumb2ITBlockPass.cpp    |   255 +
 final/lib/Target/ARM/Thumb2InstrInfo.cpp      |   625 +
 final/lib/Target/ARM/Thumb2InstrInfo.h        |    78 +
 final/lib/Target/ARM/Thumb2RegisterInfo.cpp   |    63 +
 final/lib/Target/ARM/Thumb2RegisterInfo.h     |    43 +
 final/lib/Target/ARM/Thumb2SizeReduction.cpp  |   779 +
 final/lib/Target/Alpha/Alpha.h                |    53 +
 final/lib/Target/Alpha/Alpha.td               |    68 +
 final/lib/Target/Alpha/AlphaAsmPrinter.cpp    |   166 +
 .../lib/Target/Alpha/AlphaBranchSelector.cpp  |    66 +
 final/lib/Target/Alpha/AlphaCallingConv.td    |    38 +
 final/lib/Target/Alpha/AlphaFrameLowering.cpp |   143 +
 final/lib/Target/Alpha/AlphaFrameLowering.h   |    43 +
 final/lib/Target/Alpha/AlphaISelDAGToDAG.cpp  |   426 +
 final/lib/Target/Alpha/AlphaISelLowering.cpp  |   973 +
 final/lib/Target/Alpha/AlphaISelLowering.h    |   145 +
 final/lib/Target/Alpha/AlphaInstrFormats.td   |   268 +
 final/lib/Target/Alpha/AlphaInstrInfo.cpp     |   379 +
 final/lib/Target/Alpha/AlphaInstrInfo.h       |    82 +
 final/lib/Target/Alpha/AlphaInstrInfo.td      |  1157 +
 final/lib/Target/Alpha/AlphaLLRP.cpp          |   158 +
 final/lib/Target/Alpha/AlphaMCAsmInfo.cpp     |    23 +
 final/lib/Target/Alpha/AlphaMCAsmInfo.h       |    29 +
 .../Target/Alpha/AlphaMachineFunctionInfo.h   |    62 +
 final/lib/Target/Alpha/AlphaRegisterInfo.cpp  |   207 +
 final/lib/Target/Alpha/AlphaRegisterInfo.h    |    57 +
 final/lib/Target/Alpha/AlphaRegisterInfo.td   |   171 +
 final/lib/Target/Alpha/AlphaRelocations.h     |    31 +
 final/lib/Target/Alpha/AlphaSchedule.td       |    85 +
 .../Target/Alpha/AlphaSelectionDAGInfo.cpp    |    23 +
 .../lib/Target/Alpha/AlphaSelectionDAGInfo.h  |    31 +
 final/lib/Target/Alpha/AlphaSubtarget.cpp     |    25 +
 final/lib/Target/Alpha/AlphaSubtarget.h       |    46 +
 final/lib/Target/Alpha/AlphaTargetMachine.cpp |    54 +
 final/lib/Target/Alpha/AlphaTargetMachine.h   |    65 +
 final/lib/Target/Alpha/CMakeLists.txt         |    28 +
 final/lib/Target/Alpha/Makefile               |    23 +
 final/lib/Target/Alpha/README.txt             |    42 +
 .../Alpha/TargetInfo/AlphaTargetInfo.cpp      |    20 +
 .../Target/Alpha/TargetInfo/CMakeLists.txt    |     7 +
 final/lib/Target/Alpha/TargetInfo/Makefile    |    15 +
 final/lib/Target/Blackfin/Blackfin.h          |    38 +
 final/lib/Target/Blackfin/Blackfin.td         |   202 +
 .../Target/Blackfin/BlackfinAsmPrinter.cpp    |   156 +
 .../Target/Blackfin/BlackfinCallingConv.td    |    30 +
 .../Target/Blackfin/BlackfinFrameLowering.cpp |   124 +
 .../Target/Blackfin/BlackfinFrameLowering.h   |    46 +
 .../Target/Blackfin/BlackfinISelDAGToDAG.cpp  |   180 +
 .../Target/Blackfin/BlackfinISelLowering.cpp  |   664 +
 .../Target/Blackfin/BlackfinISelLowering.h    |    87 +
 .../Target/Blackfin/BlackfinInstrFormats.td   |    34 +
 .../lib/Target/Blackfin/BlackfinInstrInfo.cpp |   253 +
 final/lib/Target/Blackfin/BlackfinInstrInfo.h |    78 +
 .../lib/Target/Blackfin/BlackfinInstrInfo.td  |   862 +
 .../Target/Blackfin/BlackfinIntrinsicInfo.cpp |   104 +
 .../Target/Blackfin/BlackfinIntrinsicInfo.h   |    32 +
 .../lib/Target/Blackfin/BlackfinIntrinsics.td |    34 +
 .../lib/Target/Blackfin/BlackfinMCAsmInfo.cpp |    22 +
 final/lib/Target/Blackfin/BlackfinMCAsmInfo.h |    29 +
 .../Target/Blackfin/BlackfinRegisterInfo.cpp  |   355 +
 .../Target/Blackfin/BlackfinRegisterInfo.h    |    80 +
 .../Target/Blackfin/BlackfinRegisterInfo.td   |   365 +
 .../Blackfin/BlackfinSelectionDAGInfo.cpp     |    24 +
 .../Blackfin/BlackfinSelectionDAGInfo.h       |    31 +
 .../lib/Target/Blackfin/BlackfinSubtarget.cpp |    36 +
 final/lib/Target/Blackfin/BlackfinSubtarget.h |    45 +
 .../Target/Blackfin/BlackfinTargetMachine.cpp |    43 +
 .../Target/Blackfin/BlackfinTargetMachine.h   |    67 +
 final/lib/Target/Blackfin/CMakeLists.txt      |    28 +
 final/lib/Target/Blackfin/Makefile            |    24 +
 final/lib/Target/Blackfin/README.txt          |   244 +
 .../TargetInfo/BlackfinTargetInfo.cpp         |    21 +
 .../Target/Blackfin/TargetInfo/CMakeLists.txt |     7 +
 final/lib/Target/Blackfin/TargetInfo/Makefile |    15 +
 final/lib/Target/CBackend/CBackend.cpp        |  3591 +++
 final/lib/Target/CBackend/CMakeLists.txt      |     5 +
 final/lib/Target/CBackend/CTargetMachine.h    |    40 +
 final/lib/Target/CBackend/Makefile            |    16 +
 .../TargetInfo/CBackendTargetInfo.cpp         |    19 +
 .../Target/CBackend/TargetInfo/CMakeLists.txt |     6 +
 final/lib/Target/CBackend/TargetInfo/Makefile |    15 +
 final/lib/Target/CMakeLists.txt               |    57 +
 final/lib/Target/CellSPU/CMakeLists.txt       |    29 +
 final/lib/Target/CellSPU/CellSDKIntrinsics.td |   449 +
 final/lib/Target/CellSPU/Makefile             |    21 +
 final/lib/Target/CellSPU/README.txt           |    92 +
 final/lib/Target/CellSPU/SPU.h                |    35 +
 final/lib/Target/CellSPU/SPU.td               |    66 +
 final/lib/Target/CellSPU/SPU128InstrInfo.td   |    41 +
 final/lib/Target/CellSPU/SPU64InstrInfo.td    |   408 +
 final/lib/Target/CellSPU/SPUAsmPrinter.cpp    |   334 +
 final/lib/Target/CellSPU/SPUCallingConv.td    |    57 +
 final/lib/Target/CellSPU/SPUFrameLowering.cpp |   276 +
 final/lib/Target/CellSPU/SPUFrameLowering.h   |    94 +
 .../Target/CellSPU/SPUHazardRecognizers.cpp   |   141 +
 .../lib/Target/CellSPU/SPUHazardRecognizers.h |    41 +
 final/lib/Target/CellSPU/SPUISelDAGToDAG.cpp  |  1204 +
 final/lib/Target/CellSPU/SPUISelLowering.cpp  |  3258 +++
 final/lib/Target/CellSPU/SPUISelLowering.h    |   189 +
 final/lib/Target/CellSPU/SPUInstrBuilder.h    |    43 +
 final/lib/Target/CellSPU/SPUInstrFormats.td   |   320 +
 final/lib/Target/CellSPU/SPUInstrInfo.cpp     |   449 +
 final/lib/Target/CellSPU/SPUInstrInfo.h       |    81 +
 final/lib/Target/CellSPU/SPUInstrInfo.td      |  4482 +++
 final/lib/Target/CellSPU/SPUMCAsmInfo.cpp     |    39 +
 final/lib/Target/CellSPU/SPUMCAsmInfo.h       |    28 +
 final/lib/Target/CellSPU/SPUMachineFunction.h |    49 +
 final/lib/Target/CellSPU/SPUMathInstr.td      |    97 +
 final/lib/Target/CellSPU/SPUNodes.td          |   159 +
 final/lib/Target/CellSPU/SPUNopFiller.cpp     |   153 +
 final/lib/Target/CellSPU/SPUOperands.td       |   664 +
 final/lib/Target/CellSPU/SPURegisterInfo.cpp  |   371 +
 final/lib/Target/CellSPU/SPURegisterInfo.h    |   104 +
 final/lib/Target/CellSPU/SPURegisterInfo.td   |   429 +
 final/lib/Target/CellSPU/SPURegisterNames.h   |    18 +
 final/lib/Target/CellSPU/SPUSchedule.td       |    59 +
 .../Target/CellSPU/SPUSelectionDAGInfo.cpp    |    23 +
 .../lib/Target/CellSPU/SPUSelectionDAGInfo.h  |    31 +
 final/lib/Target/CellSPU/SPUSubtarget.cpp     |    57 +
 final/lib/Target/CellSPU/SPUSubtarget.h       |    94 +
 final/lib/Target/CellSPU/SPUTargetMachine.cpp |    70 +
 final/lib/Target/CellSPU/SPUTargetMachine.h   |    90 +
 .../Target/CellSPU/TargetInfo/CMakeLists.txt  |     7 +
 .../CellSPU/TargetInfo/CellSPUTargetInfo.cpp  |    20 +
 final/lib/Target/CellSPU/TargetInfo/Makefile  |    15 +
 final/lib/Target/CppBackend/CMakeLists.txt    |     5 +
 final/lib/Target/CppBackend/CPPBackend.cpp    |  2056 ++
 .../lib/Target/CppBackend/CPPTargetMachine.h  |    43 +
 final/lib/Target/CppBackend/Makefile          |    16 +
 .../CppBackend/TargetInfo/CMakeLists.txt      |     6 +
 .../TargetInfo/CppBackendTargetInfo.cpp       |    26 +
 .../lib/Target/CppBackend/TargetInfo/Makefile |    15 +
 .../Target/MBlaze/AsmParser/CMakeLists.txt    |     8 +
 .../MBlaze/AsmParser/MBlazeAsmLexer.cpp       |   127 +
 .../MBlaze/AsmParser/MBlazeAsmParser.cpp      |   568 +
 final/lib/Target/MBlaze/AsmParser/Makefile    |    15 +
 final/lib/Target/MBlaze/CMakeLists.txt        |    40 +
 .../Target/MBlaze/Disassembler/CMakeLists.txt |    16 +
 .../Disassembler/MBlazeDisassembler.cpp       |   647 +
 .../MBlaze/Disassembler/MBlazeDisassembler.h  |    55 +
 final/lib/Target/MBlaze/Disassembler/Makefile |    16 +
 .../Target/MBlaze/InstPrinter/CMakeLists.txt  |     8 +
 .../MBlaze/InstPrinter/MBlazeInstPrinter.cpp  |    69 +
 .../MBlaze/InstPrinter/MBlazeInstPrinter.h    |    43 +
 final/lib/Target/MBlaze/InstPrinter/Makefile  |    16 +
 final/lib/Target/MBlaze/MBlaze.h              |    47 +
 final/lib/Target/MBlaze/MBlaze.td             |    94 +
 final/lib/Target/MBlaze/MBlazeAsmBackend.cpp  |   163 +
 final/lib/Target/MBlaze/MBlazeAsmPrinter.cpp  |   335 +
 final/lib/Target/MBlaze/MBlazeCallingConv.td  |    28 +
 .../Target/MBlaze/MBlazeDelaySlotFiller.cpp   |   258 +
 .../lib/Target/MBlaze/MBlazeELFWriterInfo.cpp |   111 +
 final/lib/Target/MBlaze/MBlazeELFWriterInfo.h |    58 +
 .../lib/Target/MBlaze/MBlazeFrameLowering.cpp |   450 +
 final/lib/Target/MBlaze/MBlazeFrameLowering.h |    53 +
 .../lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp  |   277 +
 .../lib/Target/MBlaze/MBlazeISelLowering.cpp  |  1171 +
 final/lib/Target/MBlaze/MBlazeISelLowering.h  |   190 +
 final/lib/Target/MBlaze/MBlazeInstrFPU.td     |   224 +
 final/lib/Target/MBlaze/MBlazeInstrFSL.td     |   229 +
 final/lib/Target/MBlaze/MBlazeInstrFormats.td |   204 +
 final/lib/Target/MBlaze/MBlazeInstrInfo.cpp   |   291 +
 final/lib/Target/MBlaze/MBlazeInstrInfo.h     |   294 +
 final/lib/Target/MBlaze/MBlazeInstrInfo.td    |   881 +
 .../lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp |   113 +
 final/lib/Target/MBlaze/MBlazeIntrinsicInfo.h |    33 +
 final/lib/Target/MBlaze/MBlazeIntrinsics.td   |   131 +
 final/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp   |    22 +
 final/lib/Target/MBlaze/MBlazeMCAsmInfo.h     |    30 +
 .../lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp |   223 +
 final/lib/Target/MBlaze/MBlazeMCInstLower.cpp |   166 +
 final/lib/Target/MBlaze/MBlazeMCInstLower.h   |    50 +
 .../lib/Target/MBlaze/MBlazeMachineFunction.h |   170 +
 .../lib/Target/MBlaze/MBlazeRegisterInfo.cpp  |   340 +
 final/lib/Target/MBlaze/MBlazeRegisterInfo.h  |    80 +
 final/lib/Target/MBlaze/MBlazeRegisterInfo.td |   190 +
 final/lib/Target/MBlaze/MBlazeRelocations.h   |    47 +
 final/lib/Target/MBlaze/MBlazeSchedule.td     |    64 +
 .../Target/MBlaze/MBlazeSelectionDAGInfo.cpp  |    23 +
 .../Target/MBlaze/MBlazeSelectionDAGInfo.h    |    31 +
 final/lib/Target/MBlaze/MBlazeSubtarget.cpp   |    31 +
 final/lib/Target/MBlaze/MBlazeSubtarget.h     |    79 +
 .../lib/Target/MBlaze/MBlazeTargetMachine.cpp |   114 +
 final/lib/Target/MBlaze/MBlazeTargetMachine.h |    79 +
 .../Target/MBlaze/MBlazeTargetObjectFile.cpp  |    90 +
 .../Target/MBlaze/MBlazeTargetObjectFile.h    |    40 +
 final/lib/Target/MBlaze/Makefile              |    25 +
 final/lib/Target/MBlaze/TODO                  |    26 +
 .../Target/MBlaze/TargetInfo/CMakeLists.txt   |     8 +
 .../MBlaze/TargetInfo/MBlazeTargetInfo.cpp    |    19 +
 final/lib/Target/MBlaze/TargetInfo/Makefile   |    15 +
 final/lib/Target/MSP430/CMakeLists.txt        |    29 +
 .../Target/MSP430/InstPrinter/CMakeLists.txt  |     6 +
 .../MSP430/InstPrinter/MSP430InstPrinter.cpp  |   113 +
 .../MSP430/InstPrinter/MSP430InstPrinter.h    |    43 +
 final/lib/Target/MSP430/InstPrinter/Makefile  |    15 +
 final/lib/Target/MSP430/MSP430.h              |    55 +
 final/lib/Target/MSP430/MSP430.td             |    66 +
 final/lib/Target/MSP430/MSP430AsmPrinter.cpp  |   179 +
 .../Target/MSP430/MSP430BranchSelector.cpp    |   180 +
 final/lib/Target/MSP430/MSP430CallingConv.td  |    37 +
 .../lib/Target/MSP430/MSP430FrameLowering.cpp |   223 +
 final/lib/Target/MSP430/MSP430FrameLowering.h |    53 +
 .../lib/Target/MSP430/MSP430ISelDAGToDAG.cpp  |   492 +
 .../lib/Target/MSP430/MSP430ISelLowering.cpp  |  1199 +
 final/lib/Target/MSP430/MSP430ISelLowering.h  |   185 +
 final/lib/Target/MSP430/MSP430InstrFormats.td |   211 +
 final/lib/Target/MSP430/MSP430InstrInfo.cpp   |   333 +
 final/lib/Target/MSP430/MSP430InstrInfo.h     |    89 +
 final/lib/Target/MSP430/MSP430InstrInfo.td    |  1211 +
 final/lib/Target/MSP430/MSP430MCAsmInfo.cpp   |    26 +
 final/lib/Target/MSP430/MSP430MCAsmInfo.h     |    29 +
 final/lib/Target/MSP430/MSP430MCInstLower.cpp |   150 +
 final/lib/Target/MSP430/MSP430MCInstLower.h   |    50 +
 .../Target/MSP430/MSP430MachineFunctionInfo.h |    46 +
 .../lib/Target/MSP430/MSP430RegisterInfo.cpp  |   245 +
 final/lib/Target/MSP430/MSP430RegisterInfo.h  |    61 +
 final/lib/Target/MSP430/MSP430RegisterInfo.td |   119 +
 .../Target/MSP430/MSP430SelectionDAGInfo.cpp  |    23 +
 .../Target/MSP430/MSP430SelectionDAGInfo.h    |    31 +
 final/lib/Target/MSP430/MSP430Subtarget.cpp   |    25 +
 final/lib/Target/MSP430/MSP430Subtarget.h     |    38 +
 .../lib/Target/MSP430/MSP430TargetMachine.cpp |    52 +
 final/lib/Target/MSP430/MSP430TargetMachine.h |    69 +
 final/lib/Target/MSP430/Makefile              |    24 +
 final/lib/Target/MSP430/README.txt            |    40 +
 .../Target/MSP430/TargetInfo/CMakeLists.txt   |     7 +
 .../MSP430/TargetInfo/MSP430TargetInfo.cpp    |    20 +
 final/lib/Target/MSP430/TargetInfo/Makefile   |    15 +
 final/lib/Target/Makefile                     |    20 +
 final/lib/Target/Mangler.cpp                  |   235 +
 final/lib/Target/Mips/CMakeLists.txt          |    28 +
 final/lib/Target/Mips/Makefile                |    24 +
 final/lib/Target/Mips/Mips.h                  |    41 +
 final/lib/Target/Mips/Mips.td                 |    93 +
 final/lib/Target/Mips/MipsAsmPrinter.cpp      |   399 +
 final/lib/Target/Mips/MipsCallingConv.td      |    86 +
 final/lib/Target/Mips/MipsDelaySlotFiller.cpp |    82 +
 final/lib/Target/Mips/MipsFrameLowering.cpp   |   390 +
 final/lib/Target/Mips/MipsFrameLowering.h     |    48 +
 final/lib/Target/Mips/MipsISelDAGToDAG.cpp    |   525 +
 final/lib/Target/Mips/MipsISelLowering.cpp    |  1654 ++
 final/lib/Target/Mips/MipsISelLowering.h      |   174 +
 final/lib/Target/Mips/MipsInstrFPU.td         |   316 +
 final/lib/Target/Mips/MipsInstrFormats.td     |   182 +
 final/lib/Target/Mips/MipsInstrInfo.cpp       |   522 +
 final/lib/Target/Mips/MipsInstrInfo.h         |   235 +
 final/lib/Target/Mips/MipsInstrInfo.td        |   681 +
 final/lib/Target/Mips/MipsMCAsmInfo.cpp       |    27 +
 final/lib/Target/Mips/MipsMCAsmInfo.h         |    30 +
 final/lib/Target/Mips/MipsMachineFunction.h   |   147 +
 final/lib/Target/Mips/MipsRegisterInfo.cpp    |   258 +
 final/lib/Target/Mips/MipsRegisterInfo.h      |    70 +
 final/lib/Target/Mips/MipsRegisterInfo.td     |   264 +
 final/lib/Target/Mips/MipsSchedule.td         |    63 +
 .../lib/Target/Mips/MipsSelectionDAGInfo.cpp  |    23 +
 final/lib/Target/Mips/MipsSelectionDAGInfo.h  |    31 +
 final/lib/Target/Mips/MipsSubtarget.cpp       |    51 +
 final/lib/Target/Mips/MipsSubtarget.h         |   125 +
 final/lib/Target/Mips/MipsTargetMachine.cpp   |    77 +
 final/lib/Target/Mips/MipsTargetMachine.h     |    78 +
 .../lib/Target/Mips/MipsTargetObjectFile.cpp  |   102 +
 final/lib/Target/Mips/MipsTargetObjectFile.h  |    41 +
 .../lib/Target/Mips/TargetInfo/CMakeLists.txt |     7 +
 final/lib/Target/Mips/TargetInfo/Makefile     |    15 +
 .../Target/Mips/TargetInfo/MipsTargetInfo.cpp |    21 +
 final/lib/Target/PTX/CMakeLists.txt           |    26 +
 final/lib/Target/PTX/Makefile                 |    26 +
 final/lib/Target/PTX/PTX.h                    |    49 +
 final/lib/Target/PTX/PTX.td                   |    83 +
 final/lib/Target/PTX/PTXAsmPrinter.cpp        |   402 +
 final/lib/Target/PTX/PTXFrameLowering.cpp     |    24 +
 final/lib/Target/PTX/PTXFrameLowering.h       |    44 +
 final/lib/Target/PTX/PTXISelDAGToDAG.cpp      |   180 +
 final/lib/Target/PTX/PTXISelLowering.cpp      |   239 +
 final/lib/Target/PTX/PTXISelLowering.h        |    67 +
 final/lib/Target/PTX/PTXInstrFormats.td       |    24 +
 final/lib/Target/PTX/PTXInstrInfo.cpp         |    97 +
 final/lib/Target/PTX/PTXInstrInfo.h           |    75 +
 final/lib/Target/PTX/PTXInstrInfo.td          |   396 +
 final/lib/Target/PTX/PTXIntrinsicInstrInfo.td |    35 +
 final/lib/Target/PTX/PTXMCAsmInfo.cpp         |    30 +
 final/lib/Target/PTX/PTXMCAsmInfo.h           |    28 +
 final/lib/Target/PTX/PTXMCAsmStreamer.cpp     |   543 +
 final/lib/Target/PTX/PTXMFInfoExtract.cpp     |    96 +
 final/lib/Target/PTX/PTXMachineFunctionInfo.h |    82 +
 final/lib/Target/PTX/PTXRegisterInfo.cpp      |    19 +
 final/lib/Target/PTX/PTXRegisterInfo.h        |    63 +
 final/lib/Target/PTX/PTXRegisterInfo.td       |   271 +
 final/lib/Target/PTX/PTXSubtarget.cpp         |    46 +
 final/lib/Target/PTX/PTXSubtarget.h           |    62 +
 final/lib/Target/PTX/PTXTargetMachine.cpp     |    71 +
 final/lib/Target/PTX/PTXTargetMachine.h       |    60 +
 .../lib/Target/PTX/TargetInfo/CMakeLists.txt  |     7 +
 final/lib/Target/PTX/TargetInfo/Makefile      |    15 +
 .../Target/PTX/TargetInfo/PTXTargetInfo.cpp   |    21 +
 final/lib/Target/PowerPC/CMakeLists.txt       |    37 +
 .../Target/PowerPC/InstPrinter/CMakeLists.txt |     6 +
 final/lib/Target/PowerPC/InstPrinter/Makefile |    16 +
 .../PowerPC/InstPrinter/PPCInstPrinter.cpp    |   292 +
 .../PowerPC/InstPrinter/PPCInstPrinter.h      |    69 +
 final/lib/Target/PowerPC/Makefile             |    24 +
 final/lib/Target/PowerPC/PPC.h                |    93 +
 final/lib/Target/PowerPC/PPC.td               |   112 +
 final/lib/Target/PowerPC/PPCAsmBackend.cpp    |   119 +
 final/lib/Target/PowerPC/PPCAsmPrinter.cpp    |   696 +
 .../lib/Target/PowerPC/PPCBranchSelector.cpp  |   174 +
 final/lib/Target/PowerPC/PPCCallingConv.td    |   132 +
 final/lib/Target/PowerPC/PPCCodeEmitter.cpp   |   261 +
 final/lib/Target/PowerPC/PPCFixupKinds.h      |    45 +
 final/lib/Target/PowerPC/PPCFrameLowering.cpp |   971 +
 final/lib/Target/PowerPC/PPCFrameLowering.h   |   322 +
 .../Target/PowerPC/PPCHazardRecognizers.cpp   |   308 +
 .../lib/Target/PowerPC/PPCHazardRecognizers.h |    73 +
 final/lib/Target/PowerPC/PPCISelDAGToDAG.cpp  |  1077 +
 final/lib/Target/PowerPC/PPCISelLowering.cpp  |  5634 ++++
 final/lib/Target/PowerPC/PPCISelLowering.h    |   489 +
 final/lib/Target/PowerPC/PPCInstr64Bit.td     |   744 +
 final/lib/Target/PowerPC/PPCInstrAltivec.td   |   695 +
 final/lib/Target/PowerPC/PPCInstrBuilder.h    |    43 +
 final/lib/Target/PowerPC/PPCInstrFormats.td   |   907 +
 final/lib/Target/PowerPC/PPCInstrInfo.cpp     |   651 +
 final/lib/Target/PowerPC/PPCInstrInfo.h       |   146 +
 final/lib/Target/PowerPC/PPCInstrInfo.td      |  1477 +
 final/lib/Target/PowerPC/PPCJITInfo.cpp       |   446 +
 final/lib/Target/PowerPC/PPCJITInfo.h         |    49 +
 final/lib/Target/PowerPC/PPCMCAsmInfo.cpp     |    58 +
 final/lib/Target/PowerPC/PPCMCAsmInfo.h       |    31 +
 final/lib/Target/PowerPC/PPCMCCodeEmitter.cpp |   195 +
 final/lib/Target/PowerPC/PPCMCInstLower.cpp   |   172 +
 .../Target/PowerPC/PPCMachineFunctionInfo.h   |   132 +
 final/lib/Target/PowerPC/PPCPerfectShuffle.h  |  6586 +++++
 final/lib/Target/PowerPC/PPCPredicates.cpp    |    31 +
 final/lib/Target/PowerPC/PPCPredicates.h      |    39 +
 final/lib/Target/PowerPC/PPCRegisterInfo.cpp  |   694 +
 final/lib/Target/PowerPC/PPCRegisterInfo.h    |    75 +
 final/lib/Target/PowerPC/PPCRegisterInfo.td   |   389 +
 final/lib/Target/PowerPC/PPCRelocations.h     |    56 +
 final/lib/Target/PowerPC/PPCSchedule.td       |   505 +
 final/lib/Target/PowerPC/PPCScheduleG3.td     |    64 +
 final/lib/Target/PowerPC/PPCScheduleG4.td     |    74 +
 final/lib/Target/PowerPC/PPCScheduleG4Plus.td |    80 +
 final/lib/Target/PowerPC/PPCScheduleG5.td     |    84 +
 .../Target/PowerPC/PPCSelectionDAGInfo.cpp    |    23 +
 .../lib/Target/PowerPC/PPCSelectionDAGInfo.h  |    31 +
 final/lib/Target/PowerPC/PPCSubtarget.cpp     |   142 +
 final/lib/Target/PowerPC/PPCSubtarget.h       |   151 +
 final/lib/Target/PowerPC/PPCTargetMachine.cpp |   147 +
 final/lib/Target/PowerPC/PPCTargetMachine.h   |    93 +
 final/lib/Target/PowerPC/README.txt           |   939 +
 final/lib/Target/PowerPC/README_ALTIVEC.txt   |   211 +
 .../Target/PowerPC/TargetInfo/CMakeLists.txt  |     7 +
 final/lib/Target/PowerPC/TargetInfo/Makefile  |    15 +
 .../PowerPC/TargetInfo/PowerPCTargetInfo.cpp  |    23 +
 final/lib/Target/README.txt                   |  2268 ++
 final/lib/Target/Sparc/CMakeLists.txt         |    28 +
 final/lib/Target/Sparc/DelaySlotFiller.cpp    |   323 +
 final/lib/Target/Sparc/FPMover.cpp            |   141 +
 final/lib/Target/Sparc/Makefile               |    23 +
 final/lib/Target/Sparc/README.txt             |    59 +
 final/lib/Target/Sparc/Sparc.h                |   121 +
 final/lib/Target/Sparc/Sparc.td               |    72 +
 final/lib/Target/Sparc/SparcAsmPrinter.cpp    |   251 +
 final/lib/Target/Sparc/SparcCallingConv.td    |    36 +
 final/lib/Target/Sparc/SparcFrameLowering.cpp |    80 +
 final/lib/Target/Sparc/SparcFrameLowering.h   |    41 +
 final/lib/Target/Sparc/SparcISelDAGToDAG.cpp  |   212 +
 final/lib/Target/Sparc/SparcISelLowering.cpp  |  1297 +
 final/lib/Target/Sparc/SparcISelLowering.h    |   109 +
 final/lib/Target/Sparc/SparcInstrFormats.td   |   114 +
 final/lib/Target/Sparc/SparcInstrInfo.cpp     |   342 +
 final/lib/Target/Sparc/SparcInstrInfo.h       |    97 +
 final/lib/Target/Sparc/SparcInstrInfo.td      |   825 +
 final/lib/Target/Sparc/SparcMCAsmInfo.cpp     |    34 +
 final/lib/Target/Sparc/SparcMCAsmInfo.h       |    29 +
 .../Target/Sparc/SparcMachineFunctionInfo.h   |    47 +
 final/lib/Target/Sparc/SparcRegisterInfo.cpp  |   134 +
 final/lib/Target/Sparc/SparcRegisterInfo.h    |    59 +
 final/lib/Target/Sparc/SparcRegisterInfo.td   |   175 +
 .../Target/Sparc/SparcSelectionDAGInfo.cpp    |    23 +
 .../lib/Target/Sparc/SparcSelectionDAGInfo.h  |    31 +
 final/lib/Target/Sparc/SparcSubtarget.cpp     |    34 +
 final/lib/Target/Sparc/SparcSubtarget.h       |    54 +
 final/lib/Target/Sparc/SparcTargetMachine.cpp |    67 +
 final/lib/Target/Sparc/SparcTargetMachine.h   |    78 +
 .../Target/Sparc/TargetInfo/CMakeLists.txt    |     7 +
 final/lib/Target/Sparc/TargetInfo/Makefile    |    15 +
 .../Sparc/TargetInfo/SparcTargetInfo.cpp      |    21 +
 final/lib/Target/SubtargetFeature.cpp         |   384 +
 final/lib/Target/SystemZ/CMakeLists.txt       |    26 +
 final/lib/Target/SystemZ/Makefile             |    23 +
 final/lib/Target/SystemZ/SystemZ.h            |    61 +
 final/lib/Target/SystemZ/SystemZ.td           |    61 +
 .../lib/Target/SystemZ/SystemZAsmPrinter.cpp  |   223 +
 .../lib/Target/SystemZ/SystemZCallingConv.td  |    46 +
 .../Target/SystemZ/SystemZFrameLowering.cpp   |   386 +
 .../lib/Target/SystemZ/SystemZFrameLowering.h |    57 +
 .../Target/SystemZ/SystemZISelDAGToDAG.cpp    |   779 +
 .../Target/SystemZ/SystemZISelLowering.cpp    |   863 +
 .../lib/Target/SystemZ/SystemZISelLowering.h  |   150 +
 .../lib/Target/SystemZ/SystemZInstrBuilder.h  |   128 +
 final/lib/Target/SystemZ/SystemZInstrFP.td    |   340 +
 .../lib/Target/SystemZ/SystemZInstrFormats.td |   133 +
 final/lib/Target/SystemZ/SystemZInstrInfo.cpp |   435 +
 final/lib/Target/SystemZ/SystemZInstrInfo.h   |   110 +
 final/lib/Target/SystemZ/SystemZInstrInfo.td  |  1147 +
 final/lib/Target/SystemZ/SystemZMCAsmInfo.cpp |    30 +
 final/lib/Target/SystemZ/SystemZMCAsmInfo.h   |    30 +
 .../SystemZ/SystemZMachineFunctionInfo.h      |    51 +
 final/lib/Target/SystemZ/SystemZOperands.td   |   325 +
 .../Target/SystemZ/SystemZRegisterInfo.cpp    |   128 +
 .../lib/Target/SystemZ/SystemZRegisterInfo.h  |    57 +
 .../lib/Target/SystemZ/SystemZRegisterInfo.td |   478 +
 .../SystemZ/SystemZSelectionDAGInfo.cpp       |    23 +
 .../Target/SystemZ/SystemZSelectionDAGInfo.h  |    31 +
 final/lib/Target/SystemZ/SystemZSubtarget.cpp |    47 +
 final/lib/Target/SystemZ/SystemZSubtarget.h   |    45 +
 .../Target/SystemZ/SystemZTargetMachine.cpp   |    44 +
 .../lib/Target/SystemZ/SystemZTargetMachine.h |    67 +
 .../Target/SystemZ/TargetInfo/CMakeLists.txt  |     7 +
 final/lib/Target/SystemZ/TargetInfo/Makefile  |    15 +
 .../SystemZ/TargetInfo/SystemZTargetInfo.cpp  |    19 +
 final/lib/Target/Target.cpp                   |   106 +
 final/lib/Target/TargetAsmInfo.cpp            |    27 +
 final/lib/Target/TargetAsmLexer.cpp           |    14 +
 final/lib/Target/TargetData.cpp               |   639 +
 final/lib/Target/TargetELFWriterInfo.cpp      |    25 +
 final/lib/Target/TargetFrameLowering.cpp      |    53 +
 final/lib/Target/TargetInstrInfo.cpp          |   178 +
 final/lib/Target/TargetIntrinsicInfo.cpp      |    30 +
 final/lib/Target/TargetLibraryInfo.cpp        |    61 +
 final/lib/Target/TargetLoweringObjectFile.cpp |   349 +
 final/lib/Target/TargetMachine.cpp            |   306 +
 final/lib/Target/TargetRegisterInfo.cpp       |   150 +
 final/lib/Target/TargetSubtarget.cpp          |    33 +
 final/lib/Target/X86/AsmParser/CMakeLists.txt |     7 +
 final/lib/Target/X86/AsmParser/Makefile       |    15 +
 .../lib/Target/X86/AsmParser/X86AsmLexer.cpp  |   165 +
 .../lib/Target/X86/AsmParser/X86AsmParser.cpp |  1034 +
 final/lib/Target/X86/CMakeLists.txt           |    64 +
 .../Target/X86/Disassembler/CMakeLists.txt    |    14 +
 final/lib/Target/X86/Disassembler/Makefile    |    16 +
 .../X86/Disassembler/X86Disassembler.cpp      |   554 +
 .../Target/X86/Disassembler/X86Disassembler.h |   155 +
 .../X86/Disassembler/X86DisassemblerDecoder.c |  1385 +
 .../X86/Disassembler/X86DisassemblerDecoder.h |   515 +
 .../X86DisassemblerDecoderCommon.h            |   356 +
 .../lib/Target/X86/InstPrinter/CMakeLists.txt |     8 +
 final/lib/Target/X86/InstPrinter/Makefile     |    15 +
 .../X86/InstPrinter/X86ATTInstPrinter.cpp     |   127 +
 .../X86/InstPrinter/X86ATTInstPrinter.h       |    81 +
 .../X86/InstPrinter/X86InstComments.cpp       |   260 +
 .../Target/X86/InstPrinter/X86InstComments.h  |    25 +
 .../X86/InstPrinter/X86IntelInstPrinter.cpp   |   139 +
 .../X86/InstPrinter/X86IntelInstPrinter.h     |    95 +
 final/lib/Target/X86/Makefile                 |    25 +
 final/lib/Target/X86/README-FPStack.txt       |    85 +
 final/lib/Target/X86/README-MMX.txt           |    71 +
 final/lib/Target/X86/README-SSE.txt           |   937 +
 final/lib/Target/X86/README-UNIMPLEMENTED.txt |    14 +
 final/lib/Target/X86/README-X86-64.txt        |   229 +
 final/lib/Target/X86/README.txt               |  1949 ++
 final/lib/Target/X86/SSEDomainFix.cpp         |   506 +
 .../lib/Target/X86/TargetInfo/CMakeLists.txt  |     7 +
 final/lib/Target/X86/TargetInfo/Makefile      |    16 +
 .../Target/X86/TargetInfo/X86TargetInfo.cpp   |    23 +
 final/lib/Target/X86/Utils/CMakeLists.txt     |     6 +
 final/lib/Target/X86/Utils/Makefile           |    15 +
 .../lib/Target/X86/Utils/X86ShuffleDecode.cpp |   190 +
 final/lib/Target/X86/Utils/X86ShuffleDecode.h |    87 +
 final/lib/Target/X86/X86.h                    |   100 +
 final/lib/Target/X86/X86.td                   |   224 +
 final/lib/Target/X86/X86AsmBackend.cpp        |   441 +
 final/lib/Target/X86/X86AsmPrinter.cpp        |   728 +
 final/lib/Target/X86/X86AsmPrinter.h          |    87 +
 .../Target/X86/X86COFFMachineModuleInfo.cpp   |    20 +
 .../lib/Target/X86/X86COFFMachineModuleInfo.h |    46 +
 final/lib/Target/X86/X86CallingConv.td        |   420 +
 final/lib/Target/X86/X86CodeEmitter.cpp       |   997 +
 .../X86/X86CompilationCallback_Win64.asm      |    68 +
 final/lib/Target/X86/X86ELFWriterInfo.cpp     |   153 +
 final/lib/Target/X86/X86ELFWriterInfo.h       |    59 +
 final/lib/Target/X86/X86FastISel.cpp          |  1956 ++
 final/lib/Target/X86/X86FixupKinds.h          |    33 +
 final/lib/Target/X86/X86FloatingPoint.cpp     |  1521 +
 final/lib/Target/X86/X86FrameLowering.cpp     |  1016 +
 final/lib/Target/X86/X86FrameLowering.h       |    65 +
 final/lib/Target/X86/X86ISelDAGToDAG.cpp      |  2007 ++
 final/lib/Target/X86/X86ISelLowering.cpp      | 12814 ++++++++
 final/lib/Target/X86/X86ISelLowering.h        |   939 +
 final/lib/Target/X86/X86Instr3DNow.td         |    77 +
 final/lib/Target/X86/X86InstrArithmetic.td    |  1125 +
 final/lib/Target/X86/X86InstrBuilder.h        |   184 +
 final/lib/Target/X86/X86InstrCMovSetCC.td     |   104 +
 final/lib/Target/X86/X86InstrCompiler.td      |  1626 ++
 final/lib/Target/X86/X86InstrControl.td       |   294 +
 final/lib/Target/X86/X86InstrExtension.td     |   172 +
 final/lib/Target/X86/X86InstrFMA.td           |    60 +
 final/lib/Target/X86/X86InstrFPStack.td       |   684 +
 final/lib/Target/X86/X86InstrFormats.td       |   528 +
 final/lib/Target/X86/X86InstrFragmentsSIMD.td |   461 +
 final/lib/Target/X86/X86InstrInfo.cpp         |  3194 ++
 final/lib/Target/X86/X86InstrInfo.h           |   882 +
 final/lib/Target/X86/X86InstrInfo.td          |  1628 ++
 final/lib/Target/X86/X86InstrMMX.td           |   454 +
 final/lib/Target/X86/X86InstrSSE.td           |  5881 ++++
 final/lib/Target/X86/X86InstrShiftRotate.td   |   746 +
 final/lib/Target/X86/X86InstrSystem.td        |   400 +
 final/lib/Target/X86/X86InstrVMX.td           |    54 +
 final/lib/Target/X86/X86JITInfo.cpp           |   574 +
 final/lib/Target/X86/X86JITInfo.h             |    81 +
 final/lib/Target/X86/X86MCAsmInfo.cpp         |   116 +
 final/lib/Target/X86/X86MCAsmInfo.h           |    38 +
 final/lib/Target/X86/X86MCCodeEmitter.cpp     |  1033 +
 final/lib/Target/X86/X86MCInstLower.cpp       |   696 +
 final/lib/Target/X86/X86MCInstLower.h         |    52 +
 final/lib/Target/X86/X86MachObjectWriter.cpp  |    32 +
 final/lib/Target/X86/X86MachineFunctionInfo.h |   135 +
 final/lib/Target/X86/X86RegisterInfo.cpp      |   868 +
 final/lib/Target/X86/X86RegisterInfo.h        |   150 +
 final/lib/Target/X86/X86RegisterInfo.td       |   853 +
 final/lib/Target/X86/X86Relocations.h         |    52 +
 final/lib/Target/X86/X86SelectionDAGInfo.cpp  |   259 +
 final/lib/Target/X86/X86SelectionDAGInfo.h    |    56 +
 final/lib/Target/X86/X86Subtarget.cpp         |   376 +
 final/lib/Target/X86/X86Subtarget.h           |   259 +
 final/lib/Target/X86/X86TargetMachine.cpp     |   265 +
 final/lib/Target/X86/X86TargetMachine.h       |   134 +
 final/lib/Target/X86/X86TargetObjectFile.cpp  |   118 +
 final/lib/Target/X86/X86TargetObjectFile.h    |    54 +
 final/lib/Target/XCore/CMakeLists.txt         |    27 +
 final/lib/Target/XCore/Makefile               |    24 +
 final/lib/Target/XCore/README.txt             |     7 +
 .../Target/XCore/TargetInfo/CMakeLists.txt    |     7 +
 final/lib/Target/XCore/TargetInfo/Makefile    |    16 +
 .../XCore/TargetInfo/XCoreTargetInfo.cpp      |    19 +
 final/lib/Target/XCore/XCore.h                |    41 +
 final/lib/Target/XCore/XCore.td               |    46 +
 final/lib/Target/XCore/XCoreAsmPrinter.cpp    |   280 +
 final/lib/Target/XCore/XCoreCallingConv.td    |    36 +
 final/lib/Target/XCore/XCoreFrameLowering.cpp |   387 +
 final/lib/Target/XCore/XCoreFrameLowering.h   |    59 +
 final/lib/Target/XCore/XCoreISelDAGToDAG.cpp  |   215 +
 final/lib/Target/XCore/XCoreISelLowering.cpp  |  1620 ++
 final/lib/Target/XCore/XCoreISelLowering.h    |   203 +
 final/lib/Target/XCore/XCoreInstrFormats.td   |   120 +
 final/lib/Target/XCore/XCoreInstrInfo.cpp     |   395 +
 final/lib/Target/XCore/XCoreInstrInfo.h       |    85 +
 final/lib/Target/XCore/XCoreInstrInfo.td      |  1121 +
 final/lib/Target/XCore/XCoreMCAsmInfo.cpp     |    29 +
 final/lib/Target/XCore/XCoreMCAsmInfo.h       |    30 +
 .../Target/XCore/XCoreMachineFunctionInfo.h   |    69 +
 final/lib/Target/XCore/XCoreRegisterInfo.cpp  |   324 +
 final/lib/Target/XCore/XCoreRegisterInfo.h    |    80 +
 final/lib/Target/XCore/XCoreRegisterInfo.td   |    91 +
 .../Target/XCore/XCoreSelectionDAGInfo.cpp    |    23 +
 .../lib/Target/XCore/XCoreSelectionDAGInfo.h  |    31 +
 final/lib/Target/XCore/XCoreSubtarget.cpp     |    20 +
 final/lib/Target/XCore/XCoreSubtarget.h       |    39 +
 final/lib/Target/XCore/XCoreTargetMachine.cpp |    45 +
 final/lib/Target/XCore/XCoreTargetMachine.h   |    62 +
 .../Target/XCore/XCoreTargetObjectFile.cpp    |    65 +
 .../lib/Target/XCore/XCoreTargetObjectFile.h  |    25 +
 final/lib/Transforms/CMakeLists.txt           |     6 +
 final/lib/Transforms/Hello/CMakeLists.txt     |     3 +
 final/lib/Transforms/Hello/Hello.cpp          |    64 +
 final/lib/Transforms/Hello/Hello.exports      |     0
 final/lib/Transforms/Hello/Makefile           |    24 +
 .../lib/Transforms/IPO/ArgumentPromotion.cpp  |   906 +
 final/lib/Transforms/IPO/CMakeLists.txt       |    24 +
 final/lib/Transforms/IPO/ConstantMerge.cpp    |   190 +
 .../IPO/DeadArgumentElimination.cpp           |  1003 +
 .../Transforms/IPO/DeadTypeElimination.cpp    |   111 +
 final/lib/Transforms/IPO/ExtractGV.cpp        |    80 +
 final/lib/Transforms/IPO/FunctionAttrs.cpp    |   380 +
 final/lib/Transforms/IPO/GlobalDCE.cpp        |   211 +
 final/lib/Transforms/IPO/GlobalOpt.cpp        |  2728 ++
 .../Transforms/IPO/IPConstantPropagation.cpp  |   279 +
 final/lib/Transforms/IPO/IPO.cpp              |   118 +
 final/lib/Transforms/IPO/InlineAlways.cpp     |    85 +
 final/lib/Transforms/IPO/InlineSimple.cpp     |   118 +
 final/lib/Transforms/IPO/Inliner.cpp          |   572 +
 final/lib/Transforms/IPO/Internalize.cpp      |   194 +
 final/lib/Transforms/IPO/LoopExtractor.cpp    |   248 +
 final/lib/Transforms/IPO/LowerSetJmp.cpp      |   547 +
 final/lib/Transforms/IPO/Makefile             |    15 +
 final/lib/Transforms/IPO/MergeFunctions.cpp   |   868 +
 final/lib/Transforms/IPO/PartialInlining.cpp  |   182 +
 final/lib/Transforms/IPO/PruneEH.cpp          |   257 +
 .../Transforms/IPO/StripDeadPrototypes.cpp    |    73 +
 final/lib/Transforms/IPO/StripSymbols.cpp     |   408 +
 .../lib/Transforms/IPO/StructRetPromotion.cpp |   357 +
 .../lib/Transforms/InstCombine/CMakeLists.txt |    15 +
 .../lib/Transforms/InstCombine/InstCombine.h  |   356 +
 .../InstCombine/InstCombineAddSub.cpp         |   697 +
 .../InstCombine/InstCombineAndOrXor.cpp       |  2240 ++
 .../InstCombine/InstCombineCalls.cpp          |  1259 +
 .../InstCombine/InstCombineCasts.cpp          |  1709 ++
 .../InstCombine/InstCombineCompares.cpp       |  2817 ++
 .../InstCombineLoadStoreAlloca.cpp            |   642 +
 .../InstCombine/InstCombineMulDivRem.cpp      |   622 +
 .../Transforms/InstCombine/InstCombinePHI.cpp |   891 +
 .../InstCombine/InstCombineSelect.cpp         |   815 +
 .../InstCombine/InstCombineShifts.cpp         |   746 +
 .../InstCombineSimplifyDemanded.cpp           |  1145 +
 .../InstCombine/InstCombineVectorOps.cpp      |   567 +
 .../InstCombine/InstCombineWorklist.h         |   106 +
 .../InstCombine/InstructionCombining.cpp      |  1662 ++
 final/lib/Transforms/InstCombine/Makefile     |    15 +
 .../Transforms/Instrumentation/CMakeLists.txt |     7 +
 .../Instrumentation/EdgeProfiling.cpp         |   117 +
 .../Instrumentation/Instrumentation.cpp       |    32 +
 final/lib/Transforms/Instrumentation/Makefile |    15 +
 .../Instrumentation/MaximumSpanningTree.h     |   108 +
 .../Instrumentation/OptimalEdgeProfiling.cpp  |   225 +
 .../Instrumentation/PathProfiling.cpp         |  1423 +
 .../Instrumentation/ProfilingUtils.cpp        |   133 +
 .../Instrumentation/ProfilingUtils.h          |    34 +
 final/lib/Transforms/Makefile                 |    20 +
 final/lib/Transforms/Scalar/ADCE.cpp          |    97 +
 .../Transforms/Scalar/BasicBlockPlacement.cpp |   152 +
 final/lib/Transforms/Scalar/CMakeLists.txt    |    33 +
 .../lib/Transforms/Scalar/CodeGenPrepare.cpp  |  1113 +
 final/lib/Transforms/Scalar/ConstantProp.cpp  |    91 +
 .../Scalar/CorrelatedValuePropagation.cpp     |   206 +
 final/lib/Transforms/Scalar/DCE.cpp           |   136 +
 .../Scalar/DeadStoreElimination.cpp           |   741 +
 final/lib/Transforms/Scalar/EarlyCSE.cpp      |   470 +
 final/lib/Transforms/Scalar/GVN.cpp           |  2044 ++
 .../lib/Transforms/Scalar/IndVarSimplify.cpp  |  1043 +
 final/lib/Transforms/Scalar/JumpThreading.cpp |  1576 +
 final/lib/Transforms/Scalar/LICM.cpp          |   789 +
 final/lib/Transforms/Scalar/LoopDeletion.cpp  |   246 +
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  |   606 +
 .../Transforms/Scalar/LoopInstSimplify.cpp    |   170 +
 final/lib/Transforms/Scalar/LoopRotation.cpp  |   348 +
 .../Transforms/Scalar/LoopStrengthReduce.cpp  |  3845 +++
 .../lib/Transforms/Scalar/LoopUnrollPass.cpp  |   182 +
 final/lib/Transforms/Scalar/LoopUnswitch.cpp  |  1045 +
 final/lib/Transforms/Scalar/LowerAtomic.cpp   |   139 +
 final/lib/Transforms/Scalar/Makefile          |    15 +
 .../lib/Transforms/Scalar/MemCpyOptimizer.cpp |   946 +
 final/lib/Transforms/Scalar/Reassociate.cpp   |  1101 +
 final/lib/Transforms/Scalar/Reg2Mem.cpp       |   134 +
 final/lib/Transforms/Scalar/SCCP.cpp          |  2010 ++
 final/lib/Transforms/Scalar/Scalar.cpp        |   159 +
 .../Scalar/ScalarReplAggregates.cpp           |  2336 ++
 .../lib/Transforms/Scalar/SimplifyCFGPass.cpp |   329 +
 .../Transforms/Scalar/SimplifyLibCalls.cpp    |  2388 ++
 final/lib/Transforms/Scalar/Sink.cpp          |   274 +
 .../lib/Transforms/Scalar/TailDuplication.cpp |   373 +
 .../Scalar/TailRecursionElimination.cpp       |   630 +
 .../lib/Transforms/Utils/AddrModeMatcher.cpp  |   582 +
 .../lib/Transforms/Utils/BasicBlockUtils.cpp  |   540 +
 final/lib/Transforms/Utils/BasicInliner.cpp   |   182 +
 .../Transforms/Utils/BreakCriticalEdges.cpp   |   407 +
 final/lib/Transforms/Utils/BuildLibCalls.cpp  |   483 +
 final/lib/Transforms/Utils/CMakeLists.txt     |    29 +
 final/lib/Transforms/Utils/CloneFunction.cpp  |   586 +
 final/lib/Transforms/Utils/CloneLoop.cpp      |   128 +
 final/lib/Transforms/Utils/CloneModule.cpp    |   137 +
 final/lib/Transforms/Utils/CodeExtractor.cpp  |   795 +
 .../lib/Transforms/Utils/DemoteRegToStack.cpp |   146 +
 final/lib/Transforms/Utils/InlineFunction.cpp |   709 +
 .../lib/Transforms/Utils/InstructionNamer.cpp |    64 +
 final/lib/Transforms/Utils/LCSSA.cpp          |   268 +
 final/lib/Transforms/Utils/Local.cpp          |   757 +
 final/lib/Transforms/Utils/LoopSimplify.cpp   |   753 +
 final/lib/Transforms/Utils/LoopUnroll.cpp     |   388 +
 final/lib/Transforms/Utils/LowerInvoke.cpp    |   613 +
 final/lib/Transforms/Utils/LowerSwitch.cpp    |   323 +
 final/lib/Transforms/Utils/Makefile           |    15 +
 final/lib/Transforms/Utils/Mem2Reg.cpp        |    90 +
 .../Utils/PromoteMemoryToRegister.cpp         |  1119 +
 final/lib/Transforms/Utils/SSAUpdater.cpp     |   511 +
 final/lib/Transforms/Utils/SimplifyCFG.cpp    |  2566 ++
 .../Transforms/Utils/SimplifyInstructions.cpp |    94 +
 .../Utils/UnifyFunctionExitNodes.cpp          |   141 +
 final/lib/Transforms/Utils/Utils.cpp          |    37 +
 final/lib/Transforms/Utils/ValueMapper.cpp    |   141 +
 final/lib/VMCore/AsmWriter.cpp                |  2169 ++
 final/lib/VMCore/Attributes.cpp               |   349 +
 final/lib/VMCore/AutoUpgrade.cpp              |  1406 +
 final/lib/VMCore/BasicBlock.cpp               |   310 +
 final/lib/VMCore/CMakeLists.txt               |    38 +
 final/lib/VMCore/ConstantFold.cpp             |  2322 ++
 final/lib/VMCore/ConstantFold.h               |    56 +
 final/lib/VMCore/Constants.cpp                |  2228 ++
 final/lib/VMCore/ConstantsContext.h           |   845 +
 final/lib/VMCore/Core.cpp                     |  2241 ++
 final/lib/VMCore/DebugLoc.cpp                 |   288 +
 final/lib/VMCore/Dominators.cpp               |   107 +
 final/lib/VMCore/Function.cpp                 |   409 +
 final/lib/VMCore/GVMaterializer.cpp           |    18 +
 final/lib/VMCore/Globals.cpp                  |   261 +
 final/lib/VMCore/IRBuilder.cpp                |   119 +
 final/lib/VMCore/InlineAsm.cpp                |   289 +
 final/lib/VMCore/Instruction.cpp              |   436 +
 final/lib/VMCore/Instructions.cpp             |  3332 +++
 final/lib/VMCore/IntrinsicInst.cpp            |    73 +
 final/lib/VMCore/LLVMContext.cpp              |   143 +
 final/lib/VMCore/LLVMContextImpl.cpp          |   115 +
 final/lib/VMCore/LLVMContextImpl.h            |   250 +
 final/lib/VMCore/LeakDetector.cpp             |    69 +
 final/lib/VMCore/LeaksContext.h               |    92 +
 final/lib/VMCore/Makefile                     |    34 +
 final/lib/VMCore/Metadata.cpp                 |   565 +
 final/lib/VMCore/Module.cpp                   |   473 +
 final/lib/VMCore/Pass.cpp                     |   293 +
 final/lib/VMCore/PassManager.cpp              |  1849 ++
 final/lib/VMCore/PassRegistry.cpp             |   208 +
 final/lib/VMCore/PrintModulePass.cpp          |   101 +
 final/lib/VMCore/SymbolTableListTraitsImpl.h  |   118 +
 final/lib/VMCore/Type.cpp                     |  1230 +
 final/lib/VMCore/TypeSymbolTable.cpp          |   169 +
 final/lib/VMCore/TypesContext.h               |   425 +
 final/lib/VMCore/Use.cpp                      |   146 +
 final/lib/VMCore/User.cpp                     |    81 +
 final/lib/VMCore/Value.cpp                    |   640 +
 final/lib/VMCore/ValueSymbolTable.cpp         |   117 +
 final/lib/VMCore/ValueTypes.cpp               |   211 +
 final/lib/VMCore/Verifier.cpp                 |  1994 ++
 final/llvm.spec.in                            |    67 +
 final/projects/CMakeLists.txt                 |    11 +
 final/projects/Makefile                       |    28 +
 final/projects/sample/Makefile                |    18 +
 final/projects/sample/Makefile.common.in      |    22 +
 final/projects/sample/autoconf/AutoRegen.sh   |    52 +
 final/projects/sample/autoconf/LICENSE.TXT    |    24 +
 final/projects/sample/autoconf/config.guess   |  1388 +
 final/projects/sample/autoconf/config.sub     |  1489 +
 final/projects/sample/autoconf/configure.ac   |    73 +
 final/projects/sample/configure               |  2722 ++
 final/projects/sample/docs/index.html         |     6 +
 final/projects/sample/include/sample.h        |     8 +
 final/projects/sample/lib/Makefile            |    13 +
 final/projects/sample/lib/sample/Makefile     |    16 +
 final/projects/sample/lib/sample/sample.c     |    25 +
 final/projects/sample/tools/Makefile          |    13 +
 final/projects/sample/tools/sample/Makefile   |    23 +
 final/projects/sample/tools/sample/main.c     |    14 +
 final/runtime/Makefile                        |    31 +
 final/runtime/README.txt                      |     4 +
 final/runtime/libprofile/BasicBlockTracing.c  |    67 +
 final/runtime/libprofile/CommonProfiling.c    |   130 +
 final/runtime/libprofile/EdgeProfiling.c      |    45 +
 final/runtime/libprofile/Makefile             |    22 +
 .../runtime/libprofile/OptimalEdgeProfiling.c |    45 +
 final/runtime/libprofile/PathProfiling.c      |   266 +
 final/runtime/libprofile/Profiling.h          |    36 +
 final/runtime/libprofile/libprofile.exports   |     7 +
 .../BasicAA/2003-02-26-AccessSizeTest.ll      |    18 +
 .../Analysis/BasicAA/2003-03-04-GEPCrash.ll   |     7 +
 .../Analysis/BasicAA/2003-04-22-GEPProblem.ll |    15 +
 .../Analysis/BasicAA/2003-04-25-GEPCrash.ll   |     7 +
 .../BasicAA/2003-05-21-GEP-Problem.ll         |    21 +
 .../Analysis/BasicAA/2003-06-01-AliasCrash.ll |    11 +
 .../BasicAA/2003-07-03-BasicAACrash.ll        |    10 +
 .../BasicAA/2003-09-19-LocalArgument.ll       |    12 +
 .../BasicAA/2003-11-04-SimpleCases.ll         |    16 +
 .../BasicAA/2003-12-11-ConstExprGEP.ll        |    18 +
 .../BasicAA/2004-07-28-MustAliasbug.ll        |    10 +
 .../BasicAA/2004-12-08-BasicAACrash.ll        |    22 +
 .../BasicAA/2004-12-08-BasicAACrash2.ll       |    20 +
 .../BasicAA/2005-03-09-BrokenBasicAA.ll       |    15 +
 .../BasicAA/2006-03-03-BadArraySubscript.ll   |    31 +
 .../BasicAA/2006-11-03-BasicAAVectorCrash.ll  |    48 +
 .../2007-01-13-BasePointerBadNoAlias.ll       |    35 +
 .../BasicAA/2007-08-01-NoAliasAndCalls.ll     |    10 +
 .../BasicAA/2007-08-01-NoAliasAndGEP.ll       |    17 +
 .../BasicAA/2007-08-05-GetOverloadedModRef.ll |    17 +
 .../BasicAA/2007-10-24-ArgumentsGlobals.ll    |    14 +
 .../Analysis/BasicAA/2007-11-05-SizeCrash.ll  |    34 +
 .../BasicAA/2007-12-08-OutOfBoundsCrash.ll    |    31 +
 .../test/Analysis/BasicAA/2008-04-15-Byval.ll |    18 +
 .../BasicAA/2008-06-02-GEPTailCrash.ll        |    15 +
 .../Analysis/BasicAA/2008-11-23-NoaliasRet.ll |    12 +
 .../Analysis/BasicAA/2009-03-04-GEPNoalias.ll |    13 +
 .../BasicAA/2009-10-13-AtomicModRef.ll        |    17 +
 .../BasicAA/2009-10-13-GEP-BaseNoAlias.ll     |    30 +
 .../2010-09-15-GEP-SignedArithmetic.ll        |    15 +
 .../BasicAA/args-rets-allocas-loads.ll        |   310 +
 final/test/Analysis/BasicAA/byval.ll          |    18 +
 final/test/Analysis/BasicAA/cas.ll            |    15 +
 .../Analysis/BasicAA/constant-over-index.ll   |    28 +
 final/test/Analysis/BasicAA/dg.exp            |     3 +
 final/test/Analysis/BasicAA/empty.ll          |    10 +
 final/test/Analysis/BasicAA/featuretest.ll    |   127 +
 .../BasicAA/full-store-partial-alias.ll       |    33 +
 final/test/Analysis/BasicAA/gcsetest.ll       |    46 +
 final/test/Analysis/BasicAA/gep-alias.ll      |   171 +
 .../Analysis/BasicAA/getmodrefinfo-cs-cs.ll   |    26 +
 final/test/Analysis/BasicAA/global-size.ll    |    40 +
 final/test/Analysis/BasicAA/modref.ll         |   136 +
 final/test/Analysis/BasicAA/no-escape-call.ll |    23 +
 final/test/Analysis/BasicAA/nocapture.ll      |    14 +
 final/test/Analysis/BasicAA/phi-aa.ll         |    29 +
 final/test/Analysis/BasicAA/phi-and-select.ll |    73 +
 final/test/Analysis/BasicAA/pure-const-dce.ll |    33 +
 final/test/Analysis/BasicAA/store-promote.ll  |    54 +
 .../test/Analysis/BasicAA/tailcall-modref.ll  |    16 +
 .../Analysis/BasicAA/unreachable-block.ll     |    16 +
 .../CallGraph/2008-09-09-DirectCall.ll        |    13 +
 .../CallGraph/2008-09-09-UsedByGlobal.ll      |     7 +
 final/test/Analysis/CallGraph/dg.exp          |     3 +
 .../Dominators/2006-10-02-BreakCritEdges.ll   |    20 +
 .../Dominators/2007-01-14-BreakCritEdges.ll   |   187 +
 .../Dominators/2007-07-11-SplitBlock.ll       |    21 +
 .../Dominators/2007-07-12-SplitBlock.ll       |    13 +
 final/test/Analysis/Dominators/dg.exp         |     3 +
 .../GlobalsModRef/2008-09-03-ReadGlobals.ll   |    18 +
 .../test/Analysis/GlobalsModRef/aliastest.ll  |     9 +
 .../GlobalsModRef/chaining-analysis.ll        |    20 +
 final/test/Analysis/GlobalsModRef/dg.exp      |     3 +
 .../Analysis/GlobalsModRef/indirect-global.ll |    20 +
 .../test/Analysis/GlobalsModRef/modreftest.ll |    13 +
 final/test/Analysis/GlobalsModRef/purecse.ll  |    23 +
 .../Analysis/LoopDependenceAnalysis/alias.ll  |    44 +
 .../Analysis/LoopDependenceAnalysis/dg.exp    |     3 +
 .../LoopDependenceAnalysis/siv-strong.ll      |   110 +
 .../siv-weak-crossing.ll                      |   118 +
 .../LoopDependenceAnalysis/siv-weak-zero.ll   |    56 +
 .../Analysis/LoopDependenceAnalysis/ziv.ll    |    63 +
 .../LoopInfo/2003-05-15-NestingProblem.ll     |    30 +
 final/test/Analysis/LoopInfo/dg.exp           |     3 +
 .../2006-09-26-PostDominanceFrontier.ll       |    97 +
 .../2007-04-17-PostDominanceFrontier.ll       |   692 +
 .../2007-04-20-PostDom-Reset.ll               |    28 +
 final/test/Analysis/PostDominators/dg.exp     |     3 +
 final/test/Analysis/PostDominators/pr1098.ll  |    14 +
 .../test/Analysis/PostDominators/pr6047_a.ll  |    15 +
 .../test/Analysis/PostDominators/pr6047_b.ll  |    19 +
 .../test/Analysis/PostDominators/pr6047_c.ll  |   147 +
 .../test/Analysis/PostDominators/pr6047_d.ll  |    24 +
 final/test/Analysis/Profiling/dg.exp          |     4 +
 .../test/Analysis/Profiling/edge-profiling.ll |   139 +
 .../Profiling/profiling-tool-chain.ll         |   212 +
 .../RegionInfo/20100809_bb_not_in_domtree.ll  |    20 +
 final/test/Analysis/RegionInfo/block_sort.ll  |    42 +
 final/test/Analysis/RegionInfo/cond_loop.ll   |    33 +
 .../RegionInfo/condition_complicated.ll       |    60 +
 .../RegionInfo/condition_complicated_2.ll     |    44 +
 .../RegionInfo/condition_forward_edge.ll      |    26 +
 .../RegionInfo/condition_same_exit.ll         |    31 +
 .../Analysis/RegionInfo/condition_simple.ll   |    28 +
 final/test/Analysis/RegionInfo/dg.exp         |     3 +
 .../Analysis/RegionInfo/exit_in_condition.ll  |    38 +
 .../test/Analysis/RegionInfo/infinite_loop.ll |    20 +
 .../Analysis/RegionInfo/infinite_loop_2.ll    |    36 +
 .../Analysis/RegionInfo/infinite_loop_3.ll    |    52 +
 .../Analysis/RegionInfo/infinite_loop_4.ll    |    48 +
 .../RegionInfo/loop_with_condition.ll         |    46 +
 final/test/Analysis/RegionInfo/loops_1.ll     |    40 +
 final/test/Analysis/RegionInfo/loops_2.ll     |    49 +
 final/test/Analysis/RegionInfo/mix_1.ll       |    69 +
 .../RegionInfo/multiple_exiting_edge.ll       |    38 +
 .../test/Analysis/RegionInfo/nested_loops.ll  |    33 +
 final/test/Analysis/RegionInfo/next.ll        |    49 +
 final/test/Analysis/RegionInfo/paper.ll       |    55 +
 .../RegionInfo/two_loops_same_header.ll       |    46 +
 .../2007-07-15-NegativeStride.ll              |    21 +
 .../2007-08-06-MisinterpretBranch.ll          |    18 +
 .../ScalarEvolution/2007-08-06-Unsigned.ll    |    30 +
 .../2007-09-27-LargeStepping.ll               |    22 +
 .../2007-11-14-SignedAddRec.ll                |    24 +
 .../2007-11-18-OrInstruction.ll               |    21 +
 .../2008-02-11-ReversedCondition.ll           |    15 +
 .../2008-02-12-SMAXTripCount.ll               |    16 +
 .../ScalarEvolution/2008-02-15-UMax.ll        |    17 +
 .../2008-05-25-NegativeStepToZero.ll          |    22 +
 .../2008-06-12-BinomialInt64.ll               |    43 +
 .../2008-07-12-UnneededSelect1.ll             |    36 +
 .../2008-07-12-UnneededSelect2.ll             |    30 +
 .../2008-07-19-InfiniteLoop.ll                |    15 +
 .../ScalarEvolution/2008-07-19-WrappingIV.ll  |    15 +
 .../2008-07-29-SGTTripCount.ll                |    28 +
 .../ScalarEvolution/2008-07-29-SMinExpr.ll    |    26 +
 .../ScalarEvolution/2008-08-04-IVOverflow.ll  |    27 +
 .../ScalarEvolution/2008-08-04-LongAddRec.ll  |    58 +
 .../2008-11-02-QuadraticCrash.ll              |    21 +
 .../ScalarEvolution/2008-11-15-CubicOOM.ll    |    19 +
 .../2008-11-18-LessThanOrEqual.ll             |    31 +
 .../ScalarEvolution/2008-11-18-Stride1.ll     |    35 +
 .../ScalarEvolution/2008-11-18-Stride2.ll     |    34 +
 .../ScalarEvolution/2008-12-08-FiniteSGE.ll   |    24 +
 .../2008-12-11-SMaxOverflow.ll                |    30 +
 .../2008-12-14-StrideAndSigned.ll             |    22 +
 .../ScalarEvolution/2008-12-15-DontUseSDiv.ll |    21 +
 .../2009-01-02-SignedNegativeStride.ll        |    40 +
 .../ScalarEvolution/2009-04-22-TruncCast.ll   |    37 +
 .../2009-05-09-PointerEdgeCount.ll            |    28 +
 .../2009-07-04-GroupConstantsWidthMismatch.ll |    16 +
 .../2010-09-03-RequiredTransitive.ll          |    24 +
 .../ScalarEvolution/SolveQuadraticEquation.ll |    32 +
 .../test/Analysis/ScalarEvolution/and-xor.ll  |     8 +
 .../avoid-infinite-recursion-0.ll             |    30 +
 .../avoid-infinite-recursion-1.ll             |   354 +
 .../Analysis/ScalarEvolution/avoid-smax-0.ll  |    35 +
 .../Analysis/ScalarEvolution/avoid-smax-1.ll  |   236 +
 final/test/Analysis/ScalarEvolution/dg.exp    |     3 +
 .../Analysis/ScalarEvolution/div-overflow.ll  |    10 +
 .../test/Analysis/ScalarEvolution/do-loop.ll  |    18 +
 final/test/Analysis/ScalarEvolution/fold.ll   |    62 +
 .../ScalarEvolution/max-trip-count.ll         |    72 +
 .../Analysis/ScalarEvolution/nsw-offset.ll    |    77 +
 final/test/Analysis/ScalarEvolution/nsw.ll    |   106 +
 .../ScalarEvolution/pointer-sign-bits.ll      |   220 +
 final/test/Analysis/ScalarEvolution/pr3909.ll |    30 +
 .../test/Analysis/ScalarEvolution/scev-aa.ll  |   215 +
 .../Analysis/ScalarEvolution/sext-inreg.ll    |    30 +
 .../Analysis/ScalarEvolution/sext-iv-0.ll     |    31 +
 .../Analysis/ScalarEvolution/sext-iv-1.ll     |   100 +
 .../Analysis/ScalarEvolution/sext-iv-2.ll     |    74 +
 final/test/Analysis/ScalarEvolution/sle.ll    |    27 +
 final/test/Analysis/ScalarEvolution/smax.ll   |    12 +
 .../Analysis/ScalarEvolution/trip-count.ll    |    29 +
 .../Analysis/ScalarEvolution/trip-count10.ll  |   126 +
 .../Analysis/ScalarEvolution/trip-count2.ll   |    35 +
 .../Analysis/ScalarEvolution/trip-count3.ll   |    78 +
 .../Analysis/ScalarEvolution/trip-count4.ll   |    24 +
 .../Analysis/ScalarEvolution/trip-count5.ll   |    48 +
 .../Analysis/ScalarEvolution/trip-count6.ll   |    37 +
 .../Analysis/ScalarEvolution/trip-count7.ll   |   150 +
 .../Analysis/ScalarEvolution/trip-count8.ll   |    37 +
 .../Analysis/ScalarEvolution/trip-count9.ll   |   408 +
 .../Analysis/ScalarEvolution/undefined.ll     |    39 +
 .../ScalarEvolution/unreachable-code.ll       |    13 +
 .../ScalarEvolution/unsimplified-loop.ll      |    29 +
 .../test/Analysis/ScalarEvolution/xor-and.ll  |    12 +
 .../Analysis/ScalarEvolution/zext-wrap.ll     |    24 +
 .../TypeBasedAliasAnalysis/aliastest.ll       |    62 +
 .../argument-promotion.ll                     |    31 +
 .../Analysis/TypeBasedAliasAnalysis/dg.exp    |     3 +
 .../Analysis/TypeBasedAliasAnalysis/dse.ll    |    66 +
 .../TypeBasedAliasAnalysis/functionattrs.ll   |    81 +
 .../gvn-nonlocal-type-mismatch.ll             |    91 +
 .../Analysis/TypeBasedAliasAnalysis/licm.ll   |    61 +
 .../TypeBasedAliasAnalysis/memcpyopt.ll       |    23 +
 .../TypeBasedAliasAnalysis/precedence.ll      |    46 +
 .../Analysis/TypeBasedAliasAnalysis/sink.ll   |    20 +
 final/test/Archive/GNU.a                      |   Bin 0 -> 4210 bytes
 final/test/Archive/IsNAN.o                    |   Bin 0 -> 2280 bytes
 final/test/Archive/MacOSX.a                   |   Bin 0 -> 4166 bytes
 final/test/Archive/README.txt                 |    24 +
 final/test/Archive/SVR4.a                     |   Bin 0 -> 4214 bytes
 final/test/Archive/dg.exp                     |     3 +
 final/test/Archive/evenlen                    |     1 +
 final/test/Archive/extract.ll                 |    16 +
 final/test/Archive/oddlen                     |     1 +
 final/test/Archive/toc_GNU.ll                 |     8 +
 final/test/Archive/toc_MacOSX.ll              |     9 +
 final/test/Archive/toc_SVR4.ll                |     8 +
 final/test/Archive/toc_xpg4.ll                |     8 +
 .../Archive/very_long_bytecode_file_name.bc   |   Bin 0 -> 1465 bytes
 final/test/Archive/xpg4.a                     |   Bin 0 -> 4214 bytes
 .../2002-01-24-BadSymbolTableAssert.ll        |    11 +
 .../2002-01-24-ValueRefineAbsType.ll          |    23 +
 .../test/Assembler/2002-02-19-TypeParsing.ll  |     3 +
 .../Assembler/2002-03-08-NameCollision.ll     |    15 +
 .../Assembler/2002-03-08-NameCollision2.ll    |    12 +
 .../Assembler/2002-04-04-PureVirtMethCall.ll  |     6 +
 .../Assembler/2002-04-04-PureVirtMethCall2.ll |     5 +
 .../test/Assembler/2002-04-05-TypeParsing.ll  |     3 +
 .../Assembler/2002-04-07-HexFloatConstants.ll |    16 +
 .../test/Assembler/2002-04-07-InfConstant.ll  |     9 +
 .../test/Assembler/2002-04-29-NameBinding.ll  |    18 +
 .../Assembler/2002-05-02-InvalidForwardRef.ll |    10 +
 final/test/Assembler/2002-05-02-ParseError.ll |     7 +
 .../2002-07-08-HugePerformanceProblem.ll      |    67 +
 .../Assembler/2002-07-14-InternalLossage.ll   |     9 +
 final/test/Assembler/2002-07-14-OpaqueType.ll |    10 +
 .../2002-07-25-ParserAssertionFailure.ll      |    13 +
 .../Assembler/2002-07-25-QuoteInString.ll     |     5 +
 .../Assembler/2002-07-25-ReturnPtrFunction.ll |    15 +
 .../Assembler/2002-07-31-SlashInString.ll     |     5 +
 .../Assembler/2002-08-15-CastAmbiguity.ll     |     6 +
 .../2002-08-15-ConstantExprProblem.ll         |    16 +
 .../2002-08-15-UnresolvedGlobalReference.ll   |     8 +
 .../Assembler/2002-08-16-ConstExprInlined.ll  |    22 +
 .../Assembler/2002-08-19-BytecodeReader.ll    |    17 +
 .../Assembler/2002-08-22-DominanceProblem.ll  |    17 +
 .../2002-10-08-LargeArrayPerformance.ll       |     8 +
 .../2002-10-13-ConstantEncodingProblem.ll     |     5 +
 final/test/Assembler/2002-10-15-NameClash.ll  |     7 +
 .../Assembler/2002-12-15-GlobalResolve.ll     |     7 +
 .../Assembler/2003-01-30-UnsignedString.ll    |     4 +
 .../2003-04-15-ConstantInitAssertion.ll       |     4 +
 .../2003-04-25-UnresolvedGlobalReference.ll   |     7 +
 .../2003-05-03-BytecodeReaderProblem.ll       |     6 +
 .../Assembler/2003-05-12-MinIntProblem.ll     |     5 +
 .../Assembler/2003-05-15-AssemblerProblem.ll  |    14 +
 final/test/Assembler/2003-05-15-SwitchBug.ll  |    11 +
 .../Assembler/2003-05-21-ConstantShiftExpr.ll |     4 +
 .../Assembler/2003-05-21-EmptyStructTest.ll   |     6 +
 .../2003-05-21-MalformedShiftCrash.ll         |     4 +
 .../2003-05-21-MalformedStructCrash.ll        |     4 +
 .../Assembler/2003-06-17-InvokeDisassemble.ll |     9 +
 .../2003-06-30-RecursiveTypeProblem.ll        |     3 +
 .../2003-08-20-ConstantExprGEP-Fold.ll        |    16 +
 .../2003-08-21-ConstantExprCast-Fold.ll       |     4 +
 .../2003-10-04-NotMergingGlobalConstants.ll   |     6 +
 .../Assembler/2003-11-05-ConstantExprShift.ll |     5 +
 .../Assembler/2003-11-11-ImplicitRename.ll    |     8 +
 .../Assembler/2003-11-12-ConstantExprCast.ll  |    10 +
 .../Assembler/2003-11-24-SymbolTableCrash.ll  |    10 +
 .../2003-12-30-TypeMapInvalidMemory.ll        |    55 +
 .../2004-01-11-getelementptrfolding.ll        |    12 +
 .../test/Assembler/2004-01-20-MaxLongLong.ll  |     4 +
 .../test/Assembler/2004-02-01-NegativeZero.ll |     5 +
 .../2004-02-27-SelfUseAssertError.ll          |    25 +
 .../2004-03-07-FunctionAddressAlignment.ll    |    15 +
 .../2004-03-30-UnclosedFunctionCrash.ll       |     3 +
 .../2004-04-04-GetElementPtrIndexTypes.ll     |    10 +
 .../test/Assembler/2004-06-07-VerifierBug.ll  |    11 +
 .../Assembler/2004-10-22-BCWriterUndefBug.ll  |     5 +
 .../Assembler/2004-11-28-InvalidTypeCrash.ll  |     4 +
 .../2005-01-03-FPConstantDisassembly.ll       |     6 +
 .../2005-01-31-CallingAggregateFunction.ll    |     8 +
 .../Assembler/2005-02-09-AsmWriterStoreBug.ll |    14 +
 .../Assembler/2005-05-05-OpaqueUndefValues.ll |     4 +
 .../Assembler/2005-12-21-ZeroInitVector.ll    |     6 +
 .../Assembler/2006-05-26-VarargsCallEncode.ll |     8 +
 .../Assembler/2006-09-28-CrashOnInvalid.ll    |     8 +
 .../test/Assembler/2006-12-09-Cast-To-Bool.ll |     6 +
 .../2007-01-02-Undefined-Arg-Type.ll          |     9 +
 .../Assembler/2007-01-05-Cmp-ConstExpr.ll     |    18 +
 .../Assembler/2007-01-16-CrashOnBadCast.ll    |     7 +
 .../Assembler/2007-01-16-CrashOnBadCast2.ll   |     4 +
 .../2007-03-18-InvalidNumberedVar.ll          |     9 +
 final/test/Assembler/2007-03-19-NegValue.ll   |     7 +
 .../test/Assembler/2007-04-20-AlignedLoad.ll  |     7 +
 .../test/Assembler/2007-04-20-AlignedStore.ll |     7 +
 .../2007-04-25-AssemblerFoldExternWeak.ll     |     6 +
 final/test/Assembler/2007-05-21-Escape.ll     |    22 +
 .../2007-07-19-ParamAttrAmbiguity.ll          |     9 +
 .../2007-07-30-AutoUpgradeZextSext.ll         |    12 +
 .../test/Assembler/2007-08-06-AliasInvalid.ll |     9 +
 .../test/Assembler/2007-09-10-AliasFwdRef.ll  |     9 +
 final/test/Assembler/2007-09-29-GC.ll         |    12 +
 .../Assembler/2007-11-26-AttributeOverload.ll |     4 +
 .../2007-11-27-AutoUpgradeAttributes.ll       |     3 +
 .../Assembler/2007-12-11-AddressSpaces.ll     |    25 +
 .../test/Assembler/2008-01-11-VarargAttrs.ll  |    10 +
 .../Assembler/2008-02-18-IntPointerCrash.ll   |     6 +
 .../2008-02-20-MultipleReturnValue.ll         |    22 +
 final/test/Assembler/2008-07-10-APInt.ll      |     9 +
 .../Assembler/2008-09-02-FunctionNotes.ll     |    14 +
 .../Assembler/2008-09-02-FunctionNotes2.ll    |     6 +
 final/test/Assembler/2008-09-29-RetAttr.ll    |    13 +
 .../2008-10-14-NamedTypeOnInteger.ll          |     6 +
 .../test/Assembler/2008-10-14-QuoteInName.ll  |     3 +
 .../Assembler/2009-02-01-UnnamedForwardRef.ll |     6 +
 final/test/Assembler/2009-02-28-CastOpc.ll    |     8 +
 .../Assembler/2009-02-28-StripOpaqueName.ll   |     6 +
 .../Assembler/2009-03-24-ZextConstantExpr.ll  |    11 +
 final/test/Assembler/2009-04-25-AliasGEP.ll   |     8 +
 final/test/Assembler/2009-07-24-ZeroArgGEP.ll |     5 +
 ...-02-05-FunctionLocalMetadataBecomesNull.ll |    25 +
 final/test/Assembler/AutoUpgradeIntrinsics.ll |    81 +
 .../Assembler/AutoUpgradeMMXIntrinsics.ll     |   223 +
 final/test/Assembler/ConstantExprFold.ll      |    31 +
 final/test/Assembler/ConstantExprFoldCast.ll  |    14 +
 .../test/Assembler/MultipleReturnValueType.ll |    13 +
 .../Assembler/aggregate-constant-values.ll    |    27 +
 .../aggregate-return-single-value.ll          |    14 +
 final/test/Assembler/align-inst-alloca.ll     |     6 +
 final/test/Assembler/align-inst-load.ll       |     6 +
 final/test/Assembler/align-inst-store.ll      |     6 +
 final/test/Assembler/align-inst.ll            |    10 +
 final/test/Assembler/alignstack.ll            |    36 +
 final/test/Assembler/anon-functions.ll        |    26 +
 final/test/Assembler/bcwrap.ll                |     9 +
 final/test/Assembler/comment.ll               |    20 +
 final/test/Assembler/dg.exp                   |     3 +
 .../Assembler/extractvalue-invalid-idx.ll     |     8 +
 final/test/Assembler/flags.ll                 |   276 +
 .../test/Assembler/functionlocal-metadata.ll  |    44 +
 final/test/Assembler/getelementptr.ll         |    22 +
 final/test/Assembler/getelementptr_struct.ll  |    12 +
 final/test/Assembler/huge-array.ll            |     5 +
 final/test/Assembler/insertextractvalue.ll    |    29 +
 .../test/Assembler/insertvalue-invalid-idx.ll |     7 +
 final/test/Assembler/metadata.ll              |    22 +
 final/test/Assembler/numbered-values.ll       |    16 +
 final/test/Assembler/private.ll               |     9 +
 final/test/Assembler/select.ll                |     9 +
 final/test/Assembler/unnamed-addr.ll          |    18 +
 final/test/Assembler/unnamed.ll               |    51 +
 final/test/Assembler/vbool-cmp.ll             |    15 +
 final/test/Assembler/vector-cmp.ll            |    16 +
 final/test/Assembler/vector-select.ll         |    11 +
 final/test/Assembler/vector-shift.ll          |    32 +
 final/test/Assembler/x86mmx.ll                |     8 +
 final/test/Bindings/Ocaml/analysis.ml         |    51 +
 final/test/Bindings/Ocaml/bitreader.ml        |    78 +
 final/test/Bindings/Ocaml/bitwriter.ml        |    47 +
 final/test/Bindings/Ocaml/dg.exp              |     5 +
 final/test/Bindings/Ocaml/executionengine.ml  |   115 +
 final/test/Bindings/Ocaml/ext_exc.ml          |    17 +
 final/test/Bindings/Ocaml/scalar_opts.ml      |    78 +
 final/test/Bindings/Ocaml/target.ml           |    60 +
 final/test/Bindings/Ocaml/vmcore.ml           |  1331 +
 .../test/Bitcode/2006-12-11-Cast-ConstExpr.ll |    10 +
 .../2009-06-11-FirstClassAggregateConstant.ll |    12 +
 final/test/Bitcode/AutoUpgradeGlobals.ll      |     3 +
 final/test/Bitcode/AutoUpgradeGlobals.ll.bc   |   Bin 0 -> 312 bytes
 final/test/Bitcode/AutoUpgradeIntrinsics.ll   |    10 +
 .../test/Bitcode/AutoUpgradeIntrinsics.ll.bc  |   Bin 0 -> 800 bytes
 final/test/Bitcode/dg.exp                     |     3 +
 final/test/Bitcode/extractelement.ll          |     8 +
 final/test/Bitcode/flags.ll                   |    27 +
 final/test/Bitcode/memcpy.ll                  |    23 +
 final/test/Bitcode/metadata-2.ll              |    87 +
 final/test/Bitcode/metadata.ll                |     6 +
 final/test/Bitcode/neon-intrinsics.ll         |   213 +
 final/test/Bitcode/neon-intrinsics.ll.bc      |   Bin 0 -> 5764 bytes
 final/test/Bitcode/null-type.ll               |     2 +
 final/test/Bitcode/null-type.ll.bc            |   Bin 0 -> 312 bytes
 final/test/Bitcode/sse2_loadl_pd.ll           |     2 +
 final/test/Bitcode/sse2_loadl_pd.ll.bc        |   Bin 0 -> 532 bytes
 final/test/Bitcode/sse2_movl_dq.ll            |     2 +
 final/test/Bitcode/sse2_movl_dq.ll.bc         |   Bin 0 -> 480 bytes
 final/test/Bitcode/sse2_movs_d.ll             |     2 +
 final/test/Bitcode/sse2_movs_d.ll.bc          |   Bin 0 -> 476 bytes
 final/test/Bitcode/sse2_punpck_qdq.ll         |     3 +
 final/test/Bitcode/sse2_punpck_qdq.ll.bc      |   Bin 0 -> 576 bytes
 final/test/Bitcode/sse2_shuf_pd.ll            |     2 +
 final/test/Bitcode/sse2_shuf_pd.ll.bc         |   Bin 0 -> 584 bytes
 final/test/Bitcode/sse2_unpck_pd.ll           |     3 +
 final/test/Bitcode/sse2_unpck_pd.ll.bc        |   Bin 0 -> 572 bytes
 final/test/Bitcode/sse41_pmulld.ll            |     2 +
 final/test/Bitcode/sse41_pmulld.ll.bc         |   Bin 0 -> 560 bytes
 final/test/Bitcode/ssse3_palignr.ll           |     1 +
 final/test/Bitcode/ssse3_palignr.ll.bc        |   Bin 0 -> 1504 bytes
 .../test/BugPoint/crash-narrowfunctiontest.ll |    13 +
 final/test/BugPoint/dg.exp                    |     3 +
 final/test/BugPoint/metadata.ll               |    35 +
 final/test/BugPoint/remove_arguments_test.ll  |    18 +
 final/test/CMakeLists.txt                     |    97 +
 .../test/CodeGen/ARM/2006-11-10-CycleInDAG.ll |    20 +
 .../CodeGen/ARM/2007-01-19-InfiniteLoop.ll    |   108 +
 .../CodeGen/ARM/2007-03-07-CombinerCrash.ll   |    21 +
 .../test/CodeGen/ARM/2007-03-13-InstrSched.ll |    51 +
 .../ARM/2007-03-21-JoinIntervalsCrash.ll      |    96 +
 .../ARM/2007-03-26-RegScavengerAssert.ll      |   947 +
 .../ARM/2007-03-27-RegScavengerAssert.ll      |    35 +
 .../ARM/2007-03-30-RegScavengerAssert.ll      |   101 +
 .../ARM/2007-04-02-RegScavengerAssert.ll      |    55 +
 final/test/CodeGen/ARM/2007-04-03-PEIBug.ll   |    12 +
 .../CodeGen/ARM/2007-04-03-UndefinedSymbol.ll |    99 +
 .../CodeGen/ARM/2007-04-30-CombinerCrash.ll   |    32 +
 .../ARM/2007-05-03-BadPostIndexedLd.ll        |   113 +
 .../CodeGen/ARM/2007-05-07-jumptoentry.ll     |    58 +
 .../CodeGen/ARM/2007-05-07-tailmerge-1.ll     |    66 +
 .../CodeGen/ARM/2007-05-09-tailmerge-2.ll     |    67 +
 .../ARM/2007-05-14-InlineAsmCstCrash.ll       |     6 +
 .../ARM/2007-05-14-RegScavengerAssert.ll      |    30 +
 .../CodeGen/ARM/2007-05-22-tailmerge-3.ll     |    69 +
 .../ARM/2007-05-23-BadPreIndexedStore.ll      |    34 +
 .../2007-05-31-RegScavengerInfiniteLoop.ll    |   237 +
 final/test/CodeGen/ARM/2007-08-15-ReuseBug.ll |   106 +
 .../ARM/2008-02-04-LocalRegAllocBug.ll        |    19 +
 .../CodeGen/ARM/2008-02-29-RegAllocLocal.ll   |    21 +
 .../CodeGen/ARM/2008-03-05-SxtInRegBug.ll     |    14 +
 .../ARM/2008-03-07-RegScavengerAssert.ll      |    20 +
 .../CodeGen/ARM/2008-04-04-ScavengerAssert.ll |    60 +
 .../CodeGen/ARM/2008-04-10-ScavengerAssert.ll |   258 +
 .../CodeGen/ARM/2008-04-11-PHIofImpDef.ll     |  3544 +++
 .../ARM/2008-05-19-LiveIntervalsBug.ll        |    55 +
 .../CodeGen/ARM/2008-05-19-ScavengerAssert.ll |    22 +
 final/test/CodeGen/ARM/2008-07-17-Fdiv.ll     |     6 +
 .../ARM/2008-07-24-CodeGenPrepCrash.ll        |     9 +
 .../CodeGen/ARM/2008-08-07-AsmPrintBug.ll     |    13 +
 .../CodeGen/ARM/2008-09-14-CoalescerBug.ll    |    29 +
 .../CodeGen/ARM/2008-09-17-CoalescerBug.ll    |    17 +
 .../CodeGen/ARM/2008-11-18-ScavengerAssert.ll |    16 +
 .../test/CodeGen/ARM/2009-02-16-SpillerBug.ll |   117 +
 .../ARM/2009-02-22-SoftenFloatVaArg.ll        |    20 +
 .../test/CodeGen/ARM/2009-02-27-SpillerBug.ll |   229 +
 .../test/CodeGen/ARM/2009-03-07-SpillerBug.ll |    78 +
 .../CodeGen/ARM/2009-03-09-AddrModeBug.ll     |    13 +
 .../CodeGen/ARM/2009-04-06-AsmModifier.ll     |    20 +
 .../CodeGen/ARM/2009-04-08-AggregateAddr.ll   |    18 +
 final/test/CodeGen/ARM/2009-04-08-FREM.ll     |     9 +
 .../test/CodeGen/ARM/2009-04-08-FloatUndef.ll |    11 +
 .../CodeGen/ARM/2009-04-09-RegScavengerAsm.ll |    14 +
 .../CodeGen/ARM/2009-05-05-DAGCombineBug.ll   |    11 +
 .../CodeGen/ARM/2009-05-07-RegAllocLocal.ll   |    12 +
 .../ARM/2009-05-11-CodePlacementCrash.ll      |    30 +
 .../CodeGen/ARM/2009-05-18-InlineAsmMem.ll    |     9 +
 .../test/CodeGen/ARM/2009-06-02-ISelCrash.ll  |    62 +
 .../CodeGen/ARM/2009-06-04-MissingLiveIn.ll   |   263 +
 .../ARM/2009-06-12-RegScavengerAssert.ll      |    77 +
 .../ARM/2009-06-15-RegScavengerAssert.ll      |   344 +
 .../ARM/2009-06-19-RegScavengerAssert.ll      |    30 +
 .../CodeGen/ARM/2009-06-22-CoalescerBug.ll    |    43 +
 .../ARM/2009-06-30-RegScavengerAssert.ll      |   122 +
 .../ARM/2009-06-30-RegScavengerAssert2.ll     |   116 +
 .../ARM/2009-06-30-RegScavengerAssert3.ll     |   128 +
 .../ARM/2009-06-30-RegScavengerAssert4.ll     |   128 +
 .../ARM/2009-06-30-RegScavengerAssert5.ll     |    99 +
 .../test/CodeGen/ARM/2009-07-01-CommuteBug.ll |   130 +
 .../ARM/2009-07-09-asm-p-constraint.ll        |     7 +
 .../CodeGen/ARM/2009-07-18-RewriterBug.ll     |  1323 +
 .../CodeGen/ARM/2009-07-22-ScavengerAssert.ll |    94 +
 .../CodeGen/ARM/2009-07-22-SchedulerAssert.ll |    95 +
 .../CodeGen/ARM/2009-07-29-VFP3Registers.ll   |   108 +
 .../ARM/2009-08-02-RegScavengerAssert-Neon.ll |    29 +
 .../ARM/2009-08-04-RegScavengerAssert-2.ll    |    33 +
 .../ARM/2009-08-04-RegScavengerAssert.ll      |    25 +
 .../2009-08-15-RegScavenger-EarlyClobber.ll   |    42 +
 .../ARM/2009-08-15-RegScavengerAssert.ll      |    10 +
 .../test/CodeGen/ARM/2009-08-21-PostRAKill.ll |    40 +
 .../CodeGen/ARM/2009-08-21-PostRAKill2.ll     |    38 +
 .../CodeGen/ARM/2009-08-21-PostRAKill3.ll     |    31 +
 .../CodeGen/ARM/2009-08-23-linkerprivate.ll   |     8 +
 .../CodeGen/ARM/2009-08-26-ScalarToVector.ll  |    27 +
 .../CodeGen/ARM/2009-08-27-ScalarToVector.ll  |    35 +
 .../CodeGen/ARM/2009-08-29-ExtractEltf32.ll   |    25 +
 .../CodeGen/ARM/2009-08-29-TooLongSplat.ll    |    23 +
 .../test/CodeGen/ARM/2009-08-31-LSDA-Name.ll  |   103 +
 .../CodeGen/ARM/2009-08-31-TwoRegShuffle.ll   |     9 +
 final/test/CodeGen/ARM/2009-09-09-AllOnes.ll  |    10 +
 .../test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll  |    18 +
 final/test/CodeGen/ARM/2009-09-10-postdec.ll  |    11 +
 .../CodeGen/ARM/2009-09-13-InvalidSubreg.ll   |    61 +
 .../CodeGen/ARM/2009-09-13-InvalidSuperReg.ll |    41 +
 .../ARM/2009-09-20-LiveIntervalsBug.ll        |    34 +
 .../ARM/2009-09-21-LiveVariablesBug.ll        |    14 +
 .../ARM/2009-09-22-LiveVariablesBug.ll        |    23 +
 .../ARM/2009-09-23-LiveVariablesBug.ll        |    21 +
 .../CodeGen/ARM/2009-09-24-spill-align.ll     |    17 +
 .../CodeGen/ARM/2009-09-27-CoalescerBug.ll    |    24 +
 .../CodeGen/ARM/2009-09-28-LdStOptiBug.ll     |    19 +
 .../CodeGen/ARM/2009-10-02-NEONSubregsBug.ll  |    63 +
 final/test/CodeGen/ARM/2009-10-16-Scope.ll    |    32 +
 .../CodeGen/ARM/2009-10-21-InvalidFNeg.ll     |    48 +
 .../CodeGen/ARM/2009-10-27-double-align.ll    |    14 +
 final/test/CodeGen/ARM/2009-10-30.ll          |    17 +
 .../test/CodeGen/ARM/2009-11-01-NeonMoves.ll  |    40 +
 .../CodeGen/ARM/2009-11-02-NegativeLane.ll    |    21 +
 .../ARM/2009-11-07-SubRegAsmPrinting.ll       |    66 +
 .../CodeGen/ARM/2009-11-13-CoalescerCrash.ll  |    20 +
 .../CodeGen/ARM/2009-11-13-ScavengerAssert.ll |    42 +
 .../ARM/2009-11-13-ScavengerAssert2.ll        |   123 +
 .../CodeGen/ARM/2009-11-13-VRRewriterCrash.ll |   113 +
 .../ARM/2009-11-30-LiveVariablesBug.ll        |    41 +
 .../test/CodeGen/ARM/2009-12-02-vtrn-undef.ll |    33 +
 .../CodeGen/ARM/2010-03-04-eabi-fp-spill.ll   |    65 +
 .../CodeGen/ARM/2010-03-04-stm-undef-addr.ll  |    54 +
 final/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll |    15 +
 .../ARM/2010-04-07-DbgValueOtherTargets.ll    |    28 +
 .../test/CodeGen/ARM/2010-04-09-NeonSelect.ll |    23 +
 .../CodeGen/ARM/2010-04-13-v2f64SplitArg.ll   |     7 +
 .../CodeGen/ARM/2010-04-14-SplitVector.ll     |    16 +
 .../ARM/2010-04-15-ScavengerDebugValue.ll     |    26 +
 .../CodeGen/ARM/2010-05-14-IllegalType.ll     |    10 +
 .../CodeGen/ARM/2010-05-17-FastAllocCrash.ll  |   105 +
 .../CodeGen/ARM/2010-05-18-LocalAllocCrash.ll |    36 +
 .../CodeGen/ARM/2010-05-18-PostIndexBug.ll    |    25 +
 final/test/CodeGen/ARM/2010-05-19-Shuffles.ll |    21 +
 .../CodeGen/ARM/2010-05-20-NEONSpillCrash.ll  |    45 +
 .../CodeGen/ARM/2010-05-21-BuildVector.ll     |    43 +
 .../CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll |    19 +
 .../CodeGen/ARM/2010-06-21-LdStMultipleBug.ll |   148 +
 .../CodeGen/ARM/2010-06-21-nondarwin-tc.ll    |   146 +
 .../ARM/2010-06-25-Thumb2ITInvalidIterator.ll |    75 +
 .../ARM/2010-06-29-PartialRedefFastAlloc.ll   |    25 +
 .../CodeGen/ARM/2010-06-29-SubregImpDefs.ll   |    15 +
 .../CodeGen/ARM/2010-07-26-GlobalMerge.ll     |    95 +
 final/test/CodeGen/ARM/2010-08-04-EHCrash.ll  |    65 +
 .../CodeGen/ARM/2010-08-04-StackVariable.ll   |   124 +
 .../test/CodeGen/ARM/2010-09-21-OptCmpBug.ll  |    84 +
 .../ARM/2010-09-29-mc-asm-header-test.ll      |    13 +
 .../ARM/2010-10-19-mc-elf-objheader.ll        |    37 +
 .../test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll  |    31 +
 .../ARM/2010-11-15-SpillEarlyClobber.ll       |    85 +
 .../CodeGen/ARM/2010-11-29-PrologueBug.ll     |    28 +
 .../test/CodeGen/ARM/2010-11-30-reloc-movt.ll |    42 +
 final/test/CodeGen/ARM/2010-12-07-PEIBug.ll   |    40 +
 final/test/CodeGen/ARM/2010-12-08-tpsoft.ll   |    52 +
 .../test/CodeGen/ARM/2010-12-13-reloc-pic.ll  |   100 +
 .../test/CodeGen/ARM/2010-12-15-elf-lcomm.ll  |    35 +
 .../ARM/2010-12-17-LocalStackSlotCrash.ll     |    15 +
 .../CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll |   126 +
 .../CodeGen/ARM/2011-02-04-AntidepMultidef.ll |   128 +
 .../CodeGen/ARM/2011-02-07-AntidepClobber.ll  |    89 +
 final/test/CodeGen/ARM/addrmode.ll            |    15 +
 final/test/CodeGen/ARM/aliases.ll             |    31 +
 final/test/CodeGen/ARM/align.ll               |    42 +
 final/test/CodeGen/ARM/alloca.ll              |    12 +
 final/test/CodeGen/ARM/argaddr.ll             |    19 +
 .../CodeGen/ARM/arguments-nosplit-double.ll   |     9 +
 .../test/CodeGen/ARM/arguments-nosplit-i64.ll |     9 +
 final/test/CodeGen/ARM/arguments.ll           |    43 +
 final/test/CodeGen/ARM/arguments2.ll          |     9 +
 final/test/CodeGen/ARM/arguments3.ll          |     9 +
 final/test/CodeGen/ARM/arguments4.ll          |     9 +
 final/test/CodeGen/ARM/arguments5.ll          |     9 +
 final/test/CodeGen/ARM/arguments6.ll          |     9 +
 final/test/CodeGen/ARM/arguments7.ll          |     9 +
 final/test/CodeGen/ARM/arguments8.ll          |     9 +
 .../CodeGen/ARM/arguments_f64_backfill.ll     |     9 +
 .../test/CodeGen/ARM/arm-and-tst-peephole.ll  |   112 +
 final/test/CodeGen/ARM/arm-asm.ll             |     7 +
 final/test/CodeGen/ARM/arm-frameaddr.ll       |    17 +
 final/test/CodeGen/ARM/arm-negative-stride.ll |    47 +
 final/test/CodeGen/ARM/arm-returnaddr.ll      |    26 +
 final/test/CodeGen/ARM/armv4.ll               |    13 +
 final/test/CodeGen/ARM/atomic-cmp.ll          |    17 +
 .../test/CodeGen/ARM/available_externally.ll  |    16 +
 final/test/CodeGen/ARM/bfc.ll                 |    25 +
 final/test/CodeGen/ARM/bfi.ll                 |    64 +
 final/test/CodeGen/ARM/bfx.ll                 |    28 +
 final/test/CodeGen/ARM/bic.ll                 |    17 +
 final/test/CodeGen/ARM/bits.ll                |    41 +
 final/test/CodeGen/ARM/bswap-inline-asm.ll    |     9 +
 final/test/CodeGen/ARM/bx_fold.ll             |    31 +
 final/test/CodeGen/ARM/call-tc.ll             |    87 +
 final/test/CodeGen/ARM/call.ll                |    36 +
 final/test/CodeGen/ARM/call_nolink.ll         |    52 +
 final/test/CodeGen/ARM/carry.ll               |    21 +
 final/test/CodeGen/ARM/clz.ll                 |    10 +
 final/test/CodeGen/ARM/code-placement.ll      |    79 +
 final/test/CodeGen/ARM/compare-call.ll        |    20 +
 final/test/CodeGen/ARM/constants.ll           |    46 +
 final/test/CodeGen/ARM/crash-O0.ll            |    28 +
 final/test/CodeGen/ARM/crash.ll               |    29 +
 final/test/CodeGen/ARM/cse-libcalls.ll        |    30 +
 final/test/CodeGen/ARM/ctors_dtors.ll         |    25 +
 final/test/CodeGen/ARM/ctz.ll                 |    11 +
 final/test/CodeGen/ARM/dg.exp                 |     5 +
 final/test/CodeGen/ARM/div.ll                 |    34 +
 final/test/CodeGen/ARM/dyn-stackalloc.ll      |    56 +
 final/test/CodeGen/ARM/extloadi1.ll           |    20 +
 final/test/CodeGen/ARM/fabss.ll               |    27 +
 final/test/CodeGen/ARM/fadds.ll               |    23 +
 final/test/CodeGen/ARM/fast-isel-crash.ll     |    21 +
 final/test/CodeGen/ARM/fast-isel-static.ll    |    30 +
 final/test/CodeGen/ARM/fast-isel.ll           |    16 +
 final/test/CodeGen/ARM/fcopysign.ll           |    58 +
 final/test/CodeGen/ARM/fdivs.ll               |    23 +
 final/test/CodeGen/ARM/fixunsdfdi.ll          |    29 +
 final/test/CodeGen/ARM/flag-crash.ll          |    27 +
 final/test/CodeGen/ARM/fmacs.ll               |    51 +
 final/test/CodeGen/ARM/fmdrr-fmrrd.ll         |    13 +
 final/test/CodeGen/ARM/fmscs.ll               |    35 +
 final/test/CodeGen/ARM/fmuls.ll               |    23 +
 final/test/CodeGen/ARM/fnegs.ll               |    53 +
 final/test/CodeGen/ARM/fnmacs.ll              |    35 +
 final/test/CodeGen/ARM/fnmscs.ll              |    71 +
 final/test/CodeGen/ARM/fnmul.ll               |    11 +
 final/test/CodeGen/ARM/fnmuls.ll              |    21 +
 final/test/CodeGen/ARM/formal.ll              |     8 +
 final/test/CodeGen/ARM/fp.ll                  |    78 +
 final/test/CodeGen/ARM/fp16.ll                |    32 +
 final/test/CodeGen/ARM/fp_convert.ll          |    48 +
 final/test/CodeGen/ARM/fparith.ll             |   101 +
 final/test/CodeGen/ARM/fpcmp-opt.ll           |    83 +
 final/test/CodeGen/ARM/fpcmp.ll               |    71 +
 final/test/CodeGen/ARM/fpcmp_ueq.ll           |    16 +
 final/test/CodeGen/ARM/fpconsts.ll            |    33 +
 final/test/CodeGen/ARM/fpconv.ll              |   102 +
 final/test/CodeGen/ARM/fpmem.ll               |    23 +
 final/test/CodeGen/ARM/fpow.ll                |     9 +
 final/test/CodeGen/ARM/fpowi.ll               |    15 +
 final/test/CodeGen/ARM/fptoint.ll             |    49 +
 final/test/CodeGen/ARM/fsubs.ll               |    13 +
 final/test/CodeGen/ARM/global-merge.ll        |    24 +
 final/test/CodeGen/ARM/globals.ll             |    75 +
 final/test/CodeGen/ARM/hardfloat_neon.ll      |    13 +
 final/test/CodeGen/ARM/hello.ll               |    14 +
 final/test/CodeGen/ARM/hidden-vis-2.ll        |    12 +
 final/test/CodeGen/ARM/hidden-vis-3.ll        |    17 +
 final/test/CodeGen/ARM/hidden-vis.ll          |    23 +
 final/test/CodeGen/ARM/iabs.ll                |    16 +
 final/test/CodeGen/ARM/ifcvt1.ll              |    15 +
 final/test/CodeGen/ARM/ifcvt10.ll             |    43 +
 final/test/CodeGen/ARM/ifcvt11.ll             |    59 +
 final/test/CodeGen/ARM/ifcvt2.ll              |    42 +
 final/test/CodeGen/ARM/ifcvt3.ll              |    19 +
 final/test/CodeGen/ARM/ifcvt4.ll              |    38 +
 final/test/CodeGen/ARM/ifcvt5.ll              |    25 +
 final/test/CodeGen/ARM/ifcvt6.ll              |    20 +
 final/test/CodeGen/ARM/ifcvt7.ll              |    32 +
 final/test/CodeGen/ARM/ifcvt8.ll              |    19 +
 final/test/CodeGen/ARM/ifcvt9.ll              |    12 +
 .../CodeGen/ARM/illegal-vector-bitcast.ll     |    14 +
 final/test/CodeGen/ARM/imm.ll                 |    16 +
 final/test/CodeGen/ARM/indirectbr.ll          |    64 +
 final/test/CodeGen/ARM/inlineasm-imm-arm.ll   |    31 +
 final/test/CodeGen/ARM/inlineasm.ll           |    11 +
 final/test/CodeGen/ARM/inlineasm2.ll          |    11 +
 final/test/CodeGen/ARM/inlineasm3.ll          |    25 +
 final/test/CodeGen/ARM/insn-sched1.ll         |    11 +
 final/test/CodeGen/ARM/ispositive.ll          |    10 +
 final/test/CodeGen/ARM/large-stack.ll         |    20 +
 final/test/CodeGen/ARM/ldm.ll                 |    46 +
 final/test/CodeGen/ARM/ldr.ll                 |    71 +
 final/test/CodeGen/ARM/ldr_ext.ll             |    37 +
 final/test/CodeGen/ARM/ldr_frame.ll           |    31 +
 final/test/CodeGen/ARM/ldr_post.ll            |    12 +
 final/test/CodeGen/ARM/ldr_pre.ll             |    19 +
 final/test/CodeGen/ARM/ldrd.ll                |    22 +
 final/test/CodeGen/ARM/ldst-f32-2-i32.ll      |    28 +
 final/test/CodeGen/ARM/load-global.ll         |    50 +
 final/test/CodeGen/ARM/load.ll                |    34 +
 final/test/CodeGen/ARM/long-setcc.ll          |    17 +
 final/test/CodeGen/ARM/long.ll                |    90 +
 final/test/CodeGen/ARM/long_shift.ll          |    45 +
 final/test/CodeGen/ARM/lsr-code-insertion.ll  |    60 +
 .../test/CodeGen/ARM/lsr-on-unrolled-loops.ll |   642 +
 final/test/CodeGen/ARM/lsr-scale-addr-mode.ll |    19 +
 final/test/CodeGen/ARM/machine-cse-cmp.ll     |    18 +
 final/test/CodeGen/ARM/machine-licm.ll        |    66 +
 final/test/CodeGen/ARM/mem.ll                 |    14 +
 final/test/CodeGen/ARM/memcpy-inline.ll       |    16 +
 final/test/CodeGen/ARM/memfunc.ll             |    16 +
 final/test/CodeGen/ARM/mls.ll                 |    16 +
 final/test/CodeGen/ARM/movt-movw-global.ll    |    20 +
 final/test/CodeGen/ARM/movt.ll                |    19 +
 final/test/CodeGen/ARM/mul.ll                 |    22 +
 final/test/CodeGen/ARM/mul_const.ll           |    43 +
 final/test/CodeGen/ARM/mulhi.ll               |    22 +
 .../test/CodeGen/ARM/mult-alt-generic-arm.ll  |   323 +
 final/test/CodeGen/ARM/mvn.ll                 |    74 +
 final/test/CodeGen/ARM/neon_arith1.ll         |     7 +
 final/test/CodeGen/ARM/neon_div.ll            |    48 +
 final/test/CodeGen/ARM/neon_ld1.ll            |    22 +
 final/test/CodeGen/ARM/neon_ld2.ll            |    23 +
 final/test/CodeGen/ARM/neon_minmax.ll         |    81 +
 final/test/CodeGen/ARM/neon_shift.ll          |    11 +
 final/test/CodeGen/ARM/pack.ll                |    88 +
 final/test/CodeGen/ARM/phi.ll                 |    23 +
 final/test/CodeGen/ARM/pr3502.ll              |    24 +
 final/test/CodeGen/ARM/prefetch.ll            |    61 +
 final/test/CodeGen/ARM/private.ll             |    22 +
 final/test/CodeGen/ARM/reg_sequence.ll        |   348 +
 final/test/CodeGen/ARM/ret0.ll                |     5 +
 final/test/CodeGen/ARM/ret_arg1.ll            |     5 +
 final/test/CodeGen/ARM/ret_arg2.ll            |     6 +
 final/test/CodeGen/ARM/ret_arg3.ll            |     5 +
 final/test/CodeGen/ARM/ret_arg4.ll            |     5 +
 final/test/CodeGen/ARM/ret_arg5.ll            |     5 +
 final/test/CodeGen/ARM/ret_f32_arg2.ll        |     6 +
 final/test/CodeGen/ARM/ret_f32_arg5.ll        |     6 +
 final/test/CodeGen/ARM/ret_f64_arg2.ll        |     6 +
 .../test/CodeGen/ARM/ret_f64_arg_reg_split.ll |     6 +
 final/test/CodeGen/ARM/ret_f64_arg_split.ll   |     6 +
 final/test/CodeGen/ARM/ret_f64_arg_stack.ll   |     6 +
 final/test/CodeGen/ARM/ret_i128_arg2.ll       |     6 +
 final/test/CodeGen/ARM/ret_i64_arg2.ll        |     6 +
 final/test/CodeGen/ARM/ret_i64_arg3.ll        |     6 +
 final/test/CodeGen/ARM/ret_i64_arg_split.ll   |     6 +
 final/test/CodeGen/ARM/ret_void.ll            |     6 +
 final/test/CodeGen/ARM/rev.ll                 |    30 +
 final/test/CodeGen/ARM/sbfx.ll                |    47 +
 final/test/CodeGen/ARM/section.ll             |     7 +
 final/test/CodeGen/ARM/select-imm.ll          |    78 +
 final/test/CodeGen/ARM/select.ll              |   115 +
 final/test/CodeGen/ARM/select_xform.ll        |    60 +
 final/test/CodeGen/ARM/shifter_operand.ll     |    72 +
 final/test/CodeGen/ARM/smul.ll                |    36 +
 final/test/CodeGen/ARM/spill-q.ll             |    91 +
 final/test/CodeGen/ARM/stack-frame.ll         |    13 +
 final/test/CodeGen/ARM/stm.ll                 |    16 +
 final/test/CodeGen/ARM/str_post.ll            |    22 +
 final/test/CodeGen/ARM/str_pre-2.ll           |    13 +
 final/test/CodeGen/ARM/str_pre.ll             |    18 +
 final/test/CodeGen/ARM/str_trunc.ll           |    16 +
 final/test/CodeGen/ARM/sub.ll                 |    29 +
 final/test/CodeGen/ARM/sxt_rot.ll             |    29 +
 final/test/CodeGen/ARM/t2-imm.ll              |     9 +
 final/test/CodeGen/ARM/tail-opts.ll           |    67 +
 final/test/CodeGen/ARM/thread_pointer.ll      |    10 +
 final/test/CodeGen/ARM/thumb1-varalloc.ll     |    40 +
 final/test/CodeGen/ARM/tls1.ll                |    20 +
 final/test/CodeGen/ARM/tls2.ll                |    27 +
 final/test/CodeGen/ARM/tls3.ll                |    11 +
 final/test/CodeGen/ARM/trap.ll                |    12 +
 final/test/CodeGen/ARM/trunc_ldr.ll           |    24 +
 .../CodeGen/ARM/truncstore-dag-combine.ll     |    18 +
 final/test/CodeGen/ARM/tst_teq.ll             |    18 +
 final/test/CodeGen/ARM/uint64tof64.ll         |    17 +
 final/test/CodeGen/ARM/umulo-32.ll            |    14 +
 .../test/CodeGen/ARM/unaligned_load_store.ll  |    29 +
 final/test/CodeGen/ARM/unord.ll               |    14 +
 final/test/CodeGen/ARM/uxt_rot.ll             |    24 +
 final/test/CodeGen/ARM/uxtb.ll                |    74 +
 final/test/CodeGen/ARM/va_arg.ll              |    41 +
 final/test/CodeGen/ARM/vaba.ll                |   221 +
 final/test/CodeGen/ARM/vabd.ll                |   207 +
 final/test/CodeGen/ARM/vabs.ll                |   131 +
 final/test/CodeGen/ARM/vadd.ll                |   279 +
 final/test/CodeGen/ARM/vargs.ll               |    12 +
 final/test/CodeGen/ARM/vargs_align.ll         |    23 +
 final/test/CodeGen/ARM/vbits.ll               |   547 +
 final/test/CodeGen/ARM/vbsl.ll                |   105 +
 final/test/CodeGen/ARM/vceq.ll                |    92 +
 final/test/CodeGen/ARM/vcge.ll                |   203 +
 final/test/CodeGen/ARM/vcgt.ll                |   197 +
 final/test/CodeGen/ARM/vcnt.ll                |   132 +
 final/test/CodeGen/ARM/vcombine.ll            |    72 +
 final/test/CodeGen/ARM/vcvt.ll                |   158 +
 final/test/CodeGen/ARM/vdup.ll                |   263 +
 final/test/CodeGen/ARM/vector-DAGCombine.ll   |   107 +
 final/test/CodeGen/ARM/vext.ll                |   135 +
 final/test/CodeGen/ARM/vfcmp.ll               |   139 +
 final/test/CodeGen/ARM/vfp.ll                 |   155 +
 final/test/CodeGen/ARM/vget_lane.ll           |   233 +
 final/test/CodeGen/ARM/vhadd.ll               |   249 +
 final/test/CodeGen/ARM/vhsub.ll               |   125 +
 final/test/CodeGen/ARM/vicmp.ll               |   113 +
 final/test/CodeGen/ARM/vld1.ll                |   142 +
 final/test/CodeGen/ARM/vld2.ll                |   155 +
 final/test/CodeGen/ARM/vld3.ll                |   158 +
 final/test/CodeGen/ARM/vld4.ll                |   160 +
 final/test/CodeGen/ARM/vlddup.ll              |   212 +
 final/test/CodeGen/ARM/vldlane.ll             |   506 +
 final/test/CodeGen/ARM/vminmax.ll             |   293 +
 final/test/CodeGen/ARM/vmla.ll                |   215 +
 final/test/CodeGen/ARM/vmls.ll                |   215 +
 final/test/CodeGen/ARM/vmov.ll                |   355 +
 final/test/CodeGen/ARM/vmul.ll                |   341 +
 final/test/CodeGen/ARM/vneg.ll                |   121 +
 final/test/CodeGen/ARM/vpadal.ll              |   125 +
 final/test/CodeGen/ARM/vpadd.ll               |   155 +
 final/test/CodeGen/ARM/vpminmax.ll            |   147 +
 final/test/CodeGen/ARM/vqadd.ll               |   165 +
 final/test/CodeGen/ARM/vqdmul.ll              |   281 +
 final/test/CodeGen/ARM/vqshl.ll               |   531 +
 final/test/CodeGen/ARM/vqshrn.ll              |   169 +
 final/test/CodeGen/ARM/vqsub.ll               |   165 +
 final/test/CodeGen/ARM/vrec.ll                |   119 +
 final/test/CodeGen/ARM/vrev.ll                |   149 +
 final/test/CodeGen/ARM/vshift.ll              |   432 +
 final/test/CodeGen/ARM/vshiftins.ll           |   155 +
 final/test/CodeGen/ARM/vshl.ll                |   654 +
 final/test/CodeGen/ARM/vshll.ll               |    83 +
 final/test/CodeGen/ARM/vshrn.ll               |    57 +
 final/test/CodeGen/ARM/vsra.ll                |   341 +
 final/test/CodeGen/ARM/vst1.ll                |   130 +
 final/test/CodeGen/ARM/vst2.ll                |   122 +
 final/test/CodeGen/ARM/vst3.ll                |   128 +
 final/test/CodeGen/ARM/vst4.ll                |   127 +
 final/test/CodeGen/ARM/vstlane.ll             |   364 +
 final/test/CodeGen/ARM/vsub.ll                |   279 +
 final/test/CodeGen/ARM/vtbl.ll                |   109 +
 final/test/CodeGen/ARM/vtrn.ll                |   124 +
 final/test/CodeGen/ARM/vuzp.ll                |   102 +
 final/test/CodeGen/ARM/vzip.ll                |   102 +
 final/test/CodeGen/ARM/weak.ll                |    16 +
 final/test/CodeGen/ARM/weak2.ll               |    18 +
 .../Alpha/2005-07-12-TwoMallocCalls.ll        |    17 +
 .../CodeGen/Alpha/2005-12-12-MissingFCMov.ll  |    40 +
 .../CodeGen/Alpha/2006-01-18-MissedGlobal.ll  |    27 +
 .../CodeGen/Alpha/2006-01-26-VaargBreak.ll    |    14 +
 .../test/CodeGen/Alpha/2006-04-04-zextload.ll |    34 +
 .../Alpha/2006-07-03-ASMFormalLowering.ll     |    18 +
 .../test/CodeGen/Alpha/2006-11-01-vastart.ll  |    15 +
 .../test/CodeGen/Alpha/2007-11-27-mulneg3.ll  |    13 +
 .../CodeGen/Alpha/2008-11-10-smul_lohi.ll     |    22 +
 final/test/CodeGen/Alpha/2008-11-12-Add128.ll |    14 +
 .../Alpha/2009-07-16-PromoteFloatCompare.ll   |     6 +
 .../Alpha/2010-04-07-DbgValueOtherTargets.ll  |    28 +
 .../CodeGen/Alpha/2010-08-01-mulreduce64.ll   |    11 +
 final/test/CodeGen/Alpha/add.ll               |   178 +
 final/test/CodeGen/Alpha/add128.ll            |     9 +
 final/test/CodeGen/Alpha/bic.ll               |     9 +
 final/test/CodeGen/Alpha/bsr.ll               |    12 +
 final/test/CodeGen/Alpha/call_adj.ll          |    13 +
 final/test/CodeGen/Alpha/cmov.ll              |    23 +
 final/test/CodeGen/Alpha/cmpbge.ll            |    16 +
 final/test/CodeGen/Alpha/ctlz.ll              |    14 +
 final/test/CodeGen/Alpha/ctlz_e.ll            |    11 +
 final/test/CodeGen/Alpha/ctpop.ll             |    17 +
 final/test/CodeGen/Alpha/dg.exp               |     5 +
 final/test/CodeGen/Alpha/eqv.ll               |    10 +
 final/test/CodeGen/Alpha/i32_sub_1.ll         |     9 +
 .../CodeGen/Alpha/illegal-element-type.ll     |    23 +
 final/test/CodeGen/Alpha/jmp_table.ll         |    99 +
 final/test/CodeGen/Alpha/mb.ll                |     8 +
 final/test/CodeGen/Alpha/mul128.ll            |     7 +
 final/test/CodeGen/Alpha/mul5.ll              |    33 +
 final/test/CodeGen/Alpha/neg1.ll              |     7 +
 final/test/CodeGen/Alpha/not.ll               |     8 +
 final/test/CodeGen/Alpha/ornot.ll             |    10 +
 final/test/CodeGen/Alpha/private.ll           |    21 +
 final/test/CodeGen/Alpha/rpcc.ll              |     9 +
 final/test/CodeGen/Alpha/srl_and.ll           |    10 +
 final/test/CodeGen/Alpha/sub128.ll            |     9 +
 final/test/CodeGen/Alpha/weak.ll              |    16 +
 final/test/CodeGen/Alpha/wmb.ll               |     8 +
 final/test/CodeGen/Alpha/zapnot.ll            |     9 +
 final/test/CodeGen/Alpha/zapnot2.ll           |     9 +
 final/test/CodeGen/Alpha/zapnot3.ll           |    15 +
 final/test/CodeGen/Alpha/zapnot4.ll           |     7 +
 .../Blackfin/2009-08-04-LowerExtract-Live.ll  |    15 +
 .../Blackfin/2009-08-11-RegScavenger-CSR.ll   |    17 +
 .../Blackfin/2009-08-15-LiveIn-SubReg.ll      |    19 +
 .../Blackfin/2009-08-15-MissingDead.ll        |    25 +
 .../Blackfin/2009-08-15-SetCC-Undef.ll        |    16 +
 final/test/CodeGen/Blackfin/add-overflow.ll   |    18 +
 final/test/CodeGen/Blackfin/add.ll            |     5 +
 final/test/CodeGen/Blackfin/addsub-i128.ll    |    42 +
 final/test/CodeGen/Blackfin/basic-i1.ll       |    51 +
 final/test/CodeGen/Blackfin/basic-i16.ll      |    36 +
 final/test/CodeGen/Blackfin/basic-i32.ll      |    51 +
 final/test/CodeGen/Blackfin/basic-i64.ll      |    51 +
 final/test/CodeGen/Blackfin/basic-i8.ll       |    51 +
 final/test/CodeGen/Blackfin/basictest.ll      |    19 +
 final/test/CodeGen/Blackfin/burg.ll           |    19 +
 final/test/CodeGen/Blackfin/cmp-small-imm.ll  |     6 +
 final/test/CodeGen/Blackfin/cmp64.ll          |    17 +
 final/test/CodeGen/Blackfin/ct32.ll           |    20 +
 final/test/CodeGen/Blackfin/ct64.ll           |    20 +
 final/test/CodeGen/Blackfin/ctlz16.ll         |    18 +
 final/test/CodeGen/Blackfin/ctlz64.ll         |    15 +
 final/test/CodeGen/Blackfin/ctpop16.ll        |    18 +
 final/test/CodeGen/Blackfin/cttz16.ll         |    18 +
 final/test/CodeGen/Blackfin/cycles.ll         |    17 +
 final/test/CodeGen/Blackfin/dg.exp            |     5 +
 final/test/CodeGen/Blackfin/double-cast.ll    |     8 +
 final/test/CodeGen/Blackfin/frameindex.ll     |    10 +
 final/test/CodeGen/Blackfin/i17mem.ll         |     9 +
 final/test/CodeGen/Blackfin/i1mem.ll          |     9 +
 final/test/CodeGen/Blackfin/i1ops.ll          |    10 +
 final/test/CodeGen/Blackfin/i216mem.ll        |     9 +
 final/test/CodeGen/Blackfin/i248mem.ll        |     9 +
 final/test/CodeGen/Blackfin/i256mem.ll        |     9 +
 final/test/CodeGen/Blackfin/i256param.ll      |     7 +
 final/test/CodeGen/Blackfin/i56param.ll       |     8 +
 final/test/CodeGen/Blackfin/i8mem.ll          |    10 +
 final/test/CodeGen/Blackfin/inline-asm.ll     |    38 +
 final/test/CodeGen/Blackfin/int-setcc.ll      |    80 +
 final/test/CodeGen/Blackfin/invalid-apint.ll  |    15 +
 final/test/CodeGen/Blackfin/jumptable.ll      |    53 +
 final/test/CodeGen/Blackfin/large-switch.ll   |   187 +
 final/test/CodeGen/Blackfin/load-i16.ll       |    13 +
 final/test/CodeGen/Blackfin/logic-i16.ll      |    16 +
 final/test/CodeGen/Blackfin/many-args.ll      |    23 +
 final/test/CodeGen/Blackfin/mulhu.ll          |   106 +
 final/test/CodeGen/Blackfin/printf.ll         |    10 +
 final/test/CodeGen/Blackfin/printf2.ll        |     8 +
 final/test/CodeGen/Blackfin/promote-logic.ll  |    42 +
 final/test/CodeGen/Blackfin/promote-setcc.ll  |    37 +
 final/test/CodeGen/Blackfin/sdiv.ll           |     5 +
 final/test/CodeGen/Blackfin/simple-select.ll  |    11 +
 final/test/CodeGen/Blackfin/switch.ll         |    18 +
 final/test/CodeGen/Blackfin/switch2.ll        |    16 +
 final/test/CodeGen/Blackfin/sync-intr.ll      |    16 +
 .../CBackend/2002-05-16-NameCollide.ll        |     8 +
 .../CBackend/2002-05-21-MissingReturn.ll      |    20 +
 .../CBackend/2002-08-19-ConstPointerRef.ll    |     7 +
 .../CBackend/2002-08-19-ConstantExpr.ll       |     8 +
 .../CBackend/2002-08-19-DataPointer.ll        |     4 +
 .../CBackend/2002-08-19-FunctionPointer.ll    |     5 +
 .../CBackend/2002-08-19-HardConstantExpr.ll   |     5 +
 .../CBackend/2002-08-20-RecursiveTypes.ll     |     3 +
 .../CBackend/2002-08-20-UnnamedArgument.ll    |    10 +
 .../CBackend/2002-08-26-IndirectCallTest.ll   |    17 +
 .../2002-08-30-StructureOrderingTest.ll       |     8 +
 .../CBackend/2002-09-20-ArrayTypeFailure.ll   |     7 +
 .../CBackend/2002-09-20-VarArgPrototypes.ll   |     6 +
 .../CBackend/2002-10-15-OpaqueTypeProblem.ll  |     6 +
 .../CodeGen/CBackend/2002-10-16-External.ll   |     4 +
 .../2002-10-30-FunctionPointerAlloca.ll       |    10 +
 .../CBackend/2002-11-06-PrintEscaped.ll       |    11 +
 .../CBackend/2003-05-12-IntegerSizeWarning.ll |     8 +
 .../CBackend/2003-05-13-VarArgFunction.ll     |    11 +
 .../CBackend/2003-05-31-MissingStructName.ll  |     5 +
 .../CBackend/2003-06-01-NullPointerType.ll    |     9 +
 .../CBackend/2003-06-11-HexConstant.ll        |     4 +
 .../2003-06-11-LiteralStringProblem.ll        |     3 +
 .../CBackend/2003-06-28-InvokeSupport.ll      |    17 +
 .../CBackend/2003-06-28-LinkOnceGlobalVars.ll |     3 +
 .../CBackend/2003-10-12-NANGlobalInits.ll     |     5 +
 .../CodeGen/CBackend/2003-10-23-UnusedType.ll |     8 +
 .../CBackend/2003-10-28-CastToPtrToStruct.ll  |    12 +
 .../CBackend/2003-11-21-ConstantShiftExpr.ll  |    13 +
 .../CBackend/2004-02-13-FrameReturnAddress.ll |    16 +
 .../2004-02-15-PreexistingExternals.ll        |    18 +
 .../2004-02-26-FPNotPrintableConstants.ll     |    11 +
 .../CBackend/2004-02-26-LinkOnceFunctions.ll  |     6 +
 .../CBackend/2004-08-09-va-end-null.ll        |    10 +
 .../2004-11-13-FunctionPointerCast.ll         |    12 +
 .../CBackend/2004-12-03-ExternStatics.ll      |    10 +
 .../2004-12-28-LogicalConstantExprs.ll        |     5 +
 .../CBackend/2005-02-14-VolatileOperations.ll |     8 +
 .../CBackend/2005-03-08-RecursiveTypeCrash.ll |     5 +
 .../2005-07-14-NegationToMinusMinus.ll        |    18 +
 .../test/CodeGen/CBackend/2005-08-23-Fmod.ll  |     7 +
 .../CBackend/2005-09-27-VolatileFuncPtr.ll    |    10 +
 .../CBackend/2006-12-11-Float-Bitcast.ll      |    49 +
 .../CBackend/2007-01-08-ParamAttr-ICmp.ll     |    26 +
 .../CBackend/2007-01-15-NamedArrayType.ll     |    11 +
 .../CBackend/2007-01-17-StackSaveNRestore.ll  |    12 +
 .../CodeGen/CBackend/2007-02-05-memset.ll     |    13 +
 .../CBackend/2007-02-23-NameConflicts.ll      |    14 +
 .../CBackend/2007-07-11-PackedStruct.ll       |     9 +
 .../CBackend/2008-02-01-UnalignedLoadStore.ll |    15 +
 .../CBackend/2008-05-21-MRV-InlineAsm.ll      |    19 +
 .../CBackend/2008-05-31-BoolOverflow.ll       |    14 +
 .../CBackend/2008-06-04-IndirectMem.ll        |    12 +
 .../2008-10-21-PPCLongDoubleConstant.ll       |    29 +
 final/test/CodeGen/CBackend/dg.exp            |     5 +
 final/test/CodeGen/CBackend/fneg.ll           |     7 +
 final/test/CodeGen/CBackend/pr2408.ll         |    12 +
 final/test/CodeGen/CBackend/vectors.ll        |    37 +
 final/test/CodeGen/CPP/2007-06-16-Funcname.ll |     7 +
 .../CodeGen/CPP/2009-05-01-Long-Double.ll     |    13 +
 final/test/CodeGen/CPP/2009-05-04-CondBr.ll   |    28 +
 final/test/CodeGen/CPP/dg.exp                 |     5 +
 final/test/CodeGen/CPP/llvm2cpp.ll            |   756 +
 .../test/CodeGen/CellSPU/2009-01-01-BrCond.ll |    31 +
 .../2010-04-07-DbgValueOtherTargets.ll        |    28 +
 final/test/CodeGen/CellSPU/and_ops.ll         |   279 +
 final/test/CodeGen/CellSPU/arg_ret.ll         |    34 +
 final/test/CodeGen/CellSPU/bigstack.ll        |    17 +
 final/test/CodeGen/CellSPU/bss.ll             |    11 +
 final/test/CodeGen/CellSPU/call.ll            |    53 +
 final/test/CodeGen/CellSPU/call_indirect.ll   |    49 +
 final/test/CodeGen/CellSPU/crash.ll           |     8 +
 final/test/CodeGen/CellSPU/ctpop.ll           |    30 +
 final/test/CodeGen/CellSPU/dg.exp             |     5 +
 final/test/CodeGen/CellSPU/div_ops.ll         |    22 +
 final/test/CodeGen/CellSPU/dp_farith.ll       |   102 +
 final/test/CodeGen/CellSPU/eqv.ll             |   152 +
 final/test/CodeGen/CellSPU/extract_elt.ll     |   277 +
 final/test/CodeGen/CellSPU/fcmp32.ll          |    36 +
 final/test/CodeGen/CellSPU/fcmp64.ll          |     7 +
 final/test/CodeGen/CellSPU/fdiv.ll            |    22 +
 final/test/CodeGen/CellSPU/fneg-fabs.ll       |    42 +
 final/test/CodeGen/CellSPU/i64ops.ll          |    57 +
 final/test/CodeGen/CellSPU/i8ops.ll           |    25 +
 final/test/CodeGen/CellSPU/icmp16.ll          |   350 +
 final/test/CodeGen/CellSPU/icmp32.ll          |   350 +
 final/test/CodeGen/CellSPU/icmp64.ll          |   146 +
 final/test/CodeGen/CellSPU/icmp8.ll           |   286 +
 final/test/CodeGen/CellSPU/immed16.ll         |    40 +
 final/test/CodeGen/CellSPU/immed32.ll         |    83 +
 final/test/CodeGen/CellSPU/immed64.ll         |    95 +
 final/test/CodeGen/CellSPU/int2fp.ll          |    41 +
 .../test/CodeGen/CellSPU/intrinsics_branch.ll |   150 +
 .../test/CodeGen/CellSPU/intrinsics_float.ll  |    94 +
 .../CodeGen/CellSPU/intrinsics_logical.ll     |    49 +
 final/test/CodeGen/CellSPU/jumptable.ll       |    21 +
 final/test/CodeGen/CellSPU/loads.ll           |    59 +
 .../test/CodeGen/CellSPU/mul-with-overflow.ll |    15 +
 final/test/CodeGen/CellSPU/mul_ops.ll         |    88 +
 final/test/CodeGen/CellSPU/nand.ll            |   121 +
 final/test/CodeGen/CellSPU/or_ops.ll          |   264 +
 final/test/CodeGen/CellSPU/private.ll         |    22 +
 final/test/CodeGen/CellSPU/rotate_ops.ll      |   172 +
 final/test/CodeGen/CellSPU/select_bits.ll     |   569 +
 final/test/CodeGen/CellSPU/sext128.ll         |    71 +
 final/test/CodeGen/CellSPU/shift_ops.ll       |   344 +
 final/test/CodeGen/CellSPU/shuffles.ll        |    67 +
 final/test/CodeGen/CellSPU/sp_farith.ll       |    90 +
 final/test/CodeGen/CellSPU/stores.ll          |   181 +
 final/test/CodeGen/CellSPU/storestruct.ll     |    13 +
 final/test/CodeGen/CellSPU/struct_1.ll        |   144 +
 final/test/CodeGen/CellSPU/sub_ops.ll         |    26 +
 final/test/CodeGen/CellSPU/trunc.ll           |    94 +
 .../CellSPU/useful-harnesses/README.txt       |     5 +
 .../CellSPU/useful-harnesses/i32operations.c  |    69 +
 .../CellSPU/useful-harnesses/i64operations.c  |   673 +
 .../CellSPU/useful-harnesses/i64operations.h  |    43 +
 .../CellSPU/useful-harnesses/lit.local.cfg    |     1 +
 .../CellSPU/useful-harnesses/vecoperations.c  |   179 +
 final/test/CodeGen/CellSPU/v2f32.ll           |    74 +
 final/test/CodeGen/CellSPU/v2i32.ll           |    77 +
 final/test/CodeGen/CellSPU/vec_const.ll       |   154 +
 final/test/CodeGen/CellSPU/vecinsert.ll       |   131 +
 .../2002-04-14-UnexpectedUnsignedType.ll      |    13 +
 .../2002-04-16-StackFrameSizeAlignment.ll     |    14 +
 .../CodeGen/Generic/2003-05-27-phifcmpd.ll    |    19 +
 .../Generic/2003-05-27-useboolinotherbb.ll    |    13 +
 .../Generic/2003-05-27-usefsubasbool.ll       |    14 +
 .../CodeGen/Generic/2003-05-28-ManyArgs.ll    |   153 +
 .../CodeGen/Generic/2003-05-30-BadFoldGEP.ll  |    39 +
 .../Generic/2003-05-30-BadPreselectPhi.ll     |    33 +
 .../CodeGen/Generic/2003-07-06-BadIntCmp.ll   |    51 +
 .../Generic/2003-07-07-BadLongConst.ll        |    20 +
 .../Generic/2003-07-08-BadCastToBool.ll       |    34 +
 .../Generic/2003-07-29-BadConstSbyte.ll       |    40 +
 .../Generic/2004-02-08-UnwindSupport.ll       |    17 +
 .../2004-05-09-LiveVarPartialRegister.ll      |    13 +
 .../Generic/2005-01-18-SetUO-InfLoop.ll       |    20 +
 .../CodeGen/Generic/2005-04-09-GlobalInPHI.ll |    20 +
 .../Generic/2005-07-12-memcpy-i64-length.ll   |    11 +
 .../Generic/2005-10-18-ZeroSizeStackObject.ll |     6 +
 .../CodeGen/Generic/2005-10-21-longlonggtu.ll |    16 +
 .../test/CodeGen/Generic/2005-12-01-Crash.ll  |    20 +
 .../Generic/2005-12-12-ExpandSextInreg.ll     |     7 +
 .../Generic/2006-01-12-BadSetCCFold.ll        |    35 +
 .../2006-01-18-InvalidBranchOpcodeAssert.ll   |    15 +
 .../Generic/2006-02-12-InsertLibcall.ll       |    60 +
 .../Generic/2006-03-01-dagcombineinfloop.ll   |    95 +
 .../CodeGen/Generic/2006-04-26-SetCCAnd.ll    |    40 +
 .../Generic/2006-04-28-Sign-extend-bool.ll    |     9 +
 .../Generic/2006-05-06-GEP-Cast-Sink-Crash.ll |    29 +
 .../Generic/2006-06-12-LowerSwitchCrash.ll    |    10 +
 .../2006-06-13-ComputeMaskedBitsCrash.ll      |    35 +
 .../Generic/2006-06-28-SimplifySetCCCrash.ll  |   279 +
 .../CodeGen/Generic/2006-07-03-schedulers.ll  |    32 +
 .../Generic/2006-08-30-CoalescerCrash.ll      |   112 +
 .../Generic/2006-09-02-LocalAllocCrash.ll     |   117 +
 .../Generic/2006-09-06-SwitchLowering.ll      |    96 +
 .../CodeGen/Generic/2006-10-27-CondFolding.ll |    21 +
 .../test/CodeGen/Generic/2006-10-29-Crash.ll  |    22 +
 .../Generic/2006-11-20-DAGCombineCrash.ll     |    41 +
 .../Generic/2007-01-15-LoadSelectCycle.ll     |    12 +
 .../test/CodeGen/Generic/2007-02-25-invoke.ll |    12 +
 .../2007-04-08-MultipleFrameIndices.ll        |    11 +
 .../Generic/2007-04-13-SwitchLowerBadPhi.ll   |    24 +
 .../CodeGen/Generic/2007-04-17-lsr-crash.ll   |    35 +
 .../Generic/2007-04-27-InlineAsm-X-Dest.ll    |     8 +
 .../Generic/2007-04-27-LargeMemObject.ll      |    13 +
 .../2007-04-30-LandingPadBranchFolding.ll     |    59 +
 .../CodeGen/Generic/2007-05-03-EHTypeInfo.ll  |    12 +
 .../Generic/2007-05-15-InfiniteRecursion.ll   |    90 +
 .../Generic/2007-11-21-UndeadIllegalNode.ll   |   157 +
 .../CodeGen/Generic/2007-12-17-InvokeAsm.ll   |    13 +
 .../Generic/2007-12-31-UnusedSelector.ll      |    34 +
 .../Generic/2008-01-25-dag-combine-mul.ll     |    31 +
 .../CodeGen/Generic/2008-01-30-LoadCrash.ll   |    19 +
 final/test/CodeGen/Generic/2008-02-04-Ctlz.ll |    21 +
 .../Generic/2008-02-04-ExtractSubvector.ll    |    14 +
 .../CodeGen/Generic/2008-02-20-MatchingMem.ll |     9 +
 .../CodeGen/Generic/2008-02-25-NegateZero.ll  |    14 +
 .../Generic/2008-02-26-NegatableCrash.ll      |    50 +
 .../Generic/2008-08-07-PtrToInt-SmallerInt.ll |     5 +
 .../CodeGen/Generic/2009-03-17-LSR-APInt.ll   |    92 +
 .../2009-03-29-SoftFloatVectorExtract.ll      |    10 +
 .../CodeGen/Generic/2009-04-10-SinkCrash.ll   |    16 +
 .../Generic/2009-04-28-i128-cmp-crash.ll      |    32 +
 .../Generic/2009-06-03-UnreachableSplitPad.ll |    15 +
 .../Generic/2009-11-16-BadKillsCrash.ll       |    73 +
 .../Generic/2010-07-27-DAGCombineCrash.ll     |     6 +
 .../CodeGen/Generic/2010-11-04-BigByval.ll    |    11 +
 .../test/CodeGen/Generic/2010-ZeroSizedArg.ll |    17 +
 .../Generic/2011-01-06-BigNumberCrash.ll      |    15 +
 .../CodeGen/Generic/2011-02-12-shuffle.ll     |    32 +
 final/test/CodeGen/Generic/APIntLoadStore.ll  |  2049 ++
 final/test/CodeGen/Generic/APIntParam.ll      |  1537 +
 final/test/CodeGen/Generic/APIntSextParam.ll  |  1537 +
 final/test/CodeGen/Generic/APIntZextParam.ll  |  1537 +
 final/test/CodeGen/Generic/BasicInstrs.ll     |    54 +
 final/test/CodeGen/Generic/BurgBadRegAlloc.ll |   829 +
 .../CodeGen/Generic/ConstantExprLowering.ll   |    22 +
 final/test/CodeGen/Generic/Makefile           |    23 +
 .../CodeGen/Generic/add-with-overflow-128.ll  |    34 +
 .../CodeGen/Generic/add-with-overflow-24.ll   |    42 +
 .../test/CodeGen/Generic/add-with-overflow.ll |    41 +
 final/test/CodeGen/Generic/addr-label.ll      |    81 +
 .../CodeGen/Generic/asm-large-immediate.ll    |     8 +
 .../test/CodeGen/Generic/badCallArgLRLLVM.ll  |    31 +
 final/test/CodeGen/Generic/badFoldGEP.ll      |    27 +
 final/test/CodeGen/Generic/badarg6.ll         |    32 +
 final/test/CodeGen/Generic/badlive.ll         |    28 +
 final/test/CodeGen/Generic/bool-to-double.ll  |     6 +
 final/test/CodeGen/Generic/bool-vector.ll     |    11 +
 final/test/CodeGen/Generic/call-ret0.ll       |    10 +
 final/test/CodeGen/Generic/call-ret42.ll      |    10 +
 final/test/CodeGen/Generic/call-void.ll       |    11 +
 final/test/CodeGen/Generic/call2-ret0.ll      |    16 +
 final/test/CodeGen/Generic/cast-fp.ll         |    33 +
 final/test/CodeGen/Generic/constindices.ll    |    44 +
 final/test/CodeGen/Generic/crash.ll           |    40 +
 final/test/CodeGen/Generic/dbg_value.ll       |    13 +
 final/test/CodeGen/Generic/dg.exp             |     3 +
 final/test/CodeGen/Generic/div-neg-power-2.ll |     7 +
 .../test/CodeGen/Generic/empty-load-store.ll  |    18 +
 .../CodeGen/Generic/externally_available.ll   |    10 +
 final/test/CodeGen/Generic/fastcall.ll        |    14 +
 final/test/CodeGen/Generic/fneg-fabs.ll       |    26 +
 .../test/CodeGen/Generic/fp-to-int-invalid.ll |    18 +
 final/test/CodeGen/Generic/fp_to_int.ll       |    81 +
 final/test/CodeGen/Generic/fpowi-promote.ll   |    11 +
 final/test/CodeGen/Generic/fwdtwice.ll        |    29 +
 final/test/CodeGen/Generic/getresult-undef.ll |     6 +
 final/test/CodeGen/Generic/global-ret0.ll     |     8 +
 final/test/CodeGen/Generic/hello.ll           |    11 +
 final/test/CodeGen/Generic/i128-addsub.ll     |    39 +
 final/test/CodeGen/Generic/i128-arith.ll      |    11 +
 .../Generic/inline-asm-special-strings.ll     |     6 +
 final/test/CodeGen/Generic/intrinsics.ll      |    40 +
 final/test/CodeGen/Generic/invalid-memcpy.ll  |    19 +
 final/test/CodeGen/Generic/isunord.ll         |     9 +
 .../CodeGen/Generic/legalize-dbg-value.ll     |    25 +
 .../CodeGen/Generic/llvm-ct-intrinsics.ll     |    62 +
 ...e-return-values-cross-block-with-invoke.ll |    18 +
 final/test/CodeGen/Generic/negintconst.ll     |    47 +
 final/test/CodeGen/Generic/nested-select.ll   |    19 +
 final/test/CodeGen/Generic/overflow.ll        |   220 +
 final/test/CodeGen/Generic/pr2625.ll          |    17 +
 final/test/CodeGen/Generic/pr3288.ll          |    67 +
 final/test/CodeGen/Generic/print-add.ll       |    18 +
 final/test/CodeGen/Generic/print-arith-fp.ll  |    61 +
 final/test/CodeGen/Generic/print-arith-int.ll |    84 +
 final/test/CodeGen/Generic/print-int.ll       |    13 +
 final/test/CodeGen/Generic/print-mul-exp.ll   |    55 +
 final/test/CodeGen/Generic/print-mul.ll       |    32 +
 final/test/CodeGen/Generic/print-shift.ll     |    34 +
 final/test/CodeGen/Generic/ret0.ll            |     5 +
 final/test/CodeGen/Generic/ret42.ll           |     5 +
 final/test/CodeGen/Generic/select-cc.ll       |     9 +
 final/test/CodeGen/Generic/select.ll          |   187 +
 final/test/CodeGen/Generic/shift-int64.ll     |    12 +
 final/test/CodeGen/Generic/spillccr.ll        |    49 +
 .../test/CodeGen/Generic/stacksave-restore.ll |    14 +
 final/test/CodeGen/Generic/storetrunc-fp.ll   |     8 +
 .../CodeGen/Generic/switch-lower-feature.ll   |    63 +
 final/test/CodeGen/Generic/switch-lower.ll    |   348 +
 final/test/CodeGen/Generic/trap.ll            |     9 +
 final/test/CodeGen/Generic/v-split.ll         |    11 +
 final/test/CodeGen/Generic/vector-casts.ll    |    45 +
 .../CodeGen/Generic/vector-constantexpr.ll    |     7 +
 .../Generic/vector-identity-shuffle.ll        |    17 +
 final/test/CodeGen/Generic/vector.ll          |   154 +
 .../MBlaze/2010-04-07-DbgValueOtherTargets.ll |    28 +
 final/test/CodeGen/MBlaze/brind.ll            |    72 +
 final/test/CodeGen/MBlaze/callind.ll          |    80 +
 final/test/CodeGen/MBlaze/cc.ll               |   266 +
 final/test/CodeGen/MBlaze/dg.exp              |     5 +
 final/test/CodeGen/MBlaze/div.ll              |    75 +
 final/test/CodeGen/MBlaze/fpu.ll              |    66 +
 final/test/CodeGen/MBlaze/fsl.ll              |   323 +
 final/test/CodeGen/MBlaze/imm.ll              |    70 +
 final/test/CodeGen/MBlaze/intr.ll             |    48 +
 final/test/CodeGen/MBlaze/jumptable.ll        |    79 +
 final/test/CodeGen/MBlaze/loop.ll             |    46 +
 final/test/CodeGen/MBlaze/mul.ll              |    51 +
 final/test/CodeGen/MBlaze/mul64.ll            |    23 +
 final/test/CodeGen/MBlaze/select.ll           |    15 +
 final/test/CodeGen/MBlaze/shift.ll            |   115 +
 final/test/CodeGen/MBlaze/svol.ll             |    80 +
 .../CodeGen/MSP430/2009-05-10-CyclicDAG.ll    |    32 +
 final/test/CodeGen/MSP430/2009-05-17-Rot.ll   |    17 +
 final/test/CodeGen/MSP430/2009-05-17-Shift.ll |    15 +
 .../CodeGen/MSP430/2009-05-19-DoubleSplit.ll  |    11 +
 .../MSP430/2009-08-25-DynamicStackAlloc.ll    |    30 +
 .../CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll |    22 +
 .../CodeGen/MSP430/2009-10-10-OrImpDef.ll     |    14 +
 .../CodeGen/MSP430/2009-11-05-8BitLibcalls.ll |    22 +
 .../CodeGen/MSP430/2009-11-08-InvalidResNo.ll |    64 +
 .../test/CodeGen/MSP430/2009-11-20-NewNode.ll |    36 +
 .../CodeGen/MSP430/2009-12-21-FrameAddr.ll    |    13 +
 .../CodeGen/MSP430/2009-12-22-InlineAsm.ll    |    29 +
 .../MSP430/2010-04-07-DbgValueOtherTargets.ll |    28 +
 .../CodeGen/MSP430/2010-05-01-CombinerAnd.ll  |    27 +
 final/test/CodeGen/MSP430/AddrMode-bis-rx.ll  |    74 +
 final/test/CodeGen/MSP430/AddrMode-bis-xr.ll  |    81 +
 final/test/CodeGen/MSP430/AddrMode-mov-rx.ll  |    67 +
 final/test/CodeGen/MSP430/AddrMode-mov-xr.ll  |    67 +
 final/test/CodeGen/MSP430/Inst16mi.ll         |    48 +
 final/test/CodeGen/MSP430/Inst16mm.ll         |    69 +
 final/test/CodeGen/MSP430/Inst16mr.ll         |    58 +
 final/test/CodeGen/MSP430/Inst16ri.ll         |    37 +
 final/test/CodeGen/MSP430/Inst16rm.ll         |    46 +
 final/test/CodeGen/MSP430/Inst16rr.ll         |    45 +
 final/test/CodeGen/MSP430/Inst8mi.ll          |    48 +
 final/test/CodeGen/MSP430/Inst8mm.ll          |    55 +
 final/test/CodeGen/MSP430/Inst8mr.ll          |    58 +
 final/test/CodeGen/MSP430/Inst8ri.ll          |    37 +
 final/test/CodeGen/MSP430/Inst8rm.ll          |    46 +
 final/test/CodeGen/MSP430/Inst8rr.ll          |    46 +
 final/test/CodeGen/MSP430/bit.ll              |   166 +
 final/test/CodeGen/MSP430/dg.exp              |     5 +
 final/test/CodeGen/MSP430/indirectbr.ll       |    41 +
 final/test/CodeGen/MSP430/indirectbr2.ll      |    29 +
 final/test/CodeGen/MSP430/inline-asm.ll       |    26 +
 .../CodeGen/MSP430/mult-alt-generic-msp430.ll |   323 +
 final/test/CodeGen/MSP430/postinc.ll          |   114 +
 final/test/CodeGen/MSP430/setcc.ll            |   116 +
 final/test/CodeGen/MSP430/shifts.ll           |    51 +
 final/test/CodeGen/Mips/2008-06-05-Carry.ll   |    19 +
 final/test/CodeGen/Mips/2008-07-03-SRet.ll    |    17 +
 final/test/CodeGen/Mips/2008-07-05-ByVal.ll   |    18 +
 final/test/CodeGen/Mips/2008-07-06-fadd64.ll  |    10 +
 .../test/CodeGen/Mips/2008-07-07-FPExtend.ll  |    10 +
 .../test/CodeGen/Mips/2008-07-07-Float2Int.ll |    16 +
 .../Mips/2008-07-07-IntDoubleConvertions.ll   |    33 +
 .../Mips/2008-07-15-InternalConstant.ll       |    22 +
 .../CodeGen/Mips/2008-07-15-SmallSection.ll   |    32 +
 .../CodeGen/Mips/2008-07-16-SignExtInReg.ll   |    19 +
 final/test/CodeGen/Mips/2008-07-22-Cstpool.ll |    12 +
 final/test/CodeGen/Mips/2008-07-23-fpcmp.ll   |    34 +
 final/test/CodeGen/Mips/2008-07-29-icmp.ll    |    11 +
 .../test/CodeGen/Mips/2008-07-31-fcopysign.ll |    14 +
 .../test/CodeGen/Mips/2008-08-01-AsmInline.ll |    17 +
 .../CodeGen/Mips/2008-08-03-ReturnDouble.ll   |    18 +
 final/test/CodeGen/Mips/2008-08-03-fabs64.ll  |    16 +
 .../CodeGen/Mips/2008-08-04-Bitconvert.ll     |    18 +
 final/test/CodeGen/Mips/2008-08-06-Alloca.ll  |    16 +
 final/test/CodeGen/Mips/2008-08-07-CC.ll      |    12 +
 final/test/CodeGen/Mips/2008-08-07-FPRound.ll |    10 +
 final/test/CodeGen/Mips/2008-08-08-bswap.ll   |    12 +
 final/test/CodeGen/Mips/2008-08-08-ctlz.ll    |    12 +
 .../CodeGen/Mips/2008-10-13-LegalizerBug.ll   |    29 +
 .../CodeGen/Mips/2008-11-10-xint_to_fp.ll     |    55 +
 .../CodeGen/Mips/2009-11-16-CstPoolLoad.ll    |    10 +
 .../Mips/2010-04-07-DbgValueOtherTargets.ll   |    28 +
 final/test/CodeGen/Mips/2010-07-20-Select.ll  |    21 +
 final/test/CodeGen/Mips/2010-07-20-Switch.ll  |    33 +
 .../CodeGen/Mips/2010-11-09-CountLeading.ll   |    33 +
 final/test/CodeGen/Mips/2010-11-09-Mul.ll     |    15 +
 final/test/CodeGen/Mips/blockaddr.ll          |    26 +
 final/test/CodeGen/Mips/cmov.ll               |    15 +
 final/test/CodeGen/Mips/dg.exp                |     5 +
 final/test/CodeGen/Mips/divrem.ll             |    51 +
 final/test/CodeGen/Mips/largeimm1.ll          |    13 +
 final/test/CodeGen/Mips/madd-msub.ll          |    65 +
 final/test/CodeGen/Mips/o32_cc.ll             |   325 +
 final/test/CodeGen/Mips/private.ll            |    21 +
 final/test/CodeGen/Mips/rotate.ll             |    40 +
 final/test/CodeGen/PTX/add.ll                 |    71 +
 final/test/CodeGen/PTX/dg.exp                 |     5 +
 final/test/CodeGen/PTX/exit.ll                |    14 +
 final/test/CodeGen/PTX/intrinsic.ll           |    43 +
 final/test/CodeGen/PTX/ld.ll                  |   422 +
 final/test/CodeGen/PTX/mov.ll                 |    62 +
 final/test/CodeGen/PTX/mul.ll                 |    39 +
 final/test/CodeGen/PTX/options.ll             |     9 +
 final/test/CodeGen/PTX/ret.ll                 |     7 +
 final/test/CodeGen/PTX/shl.ll                 |    22 +
 final/test/CodeGen/PTX/shr.ll                 |    43 +
 final/test/CodeGen/PTX/st.ll                  |   382 +
 final/test/CodeGen/PTX/sub.ll                 |    71 +
 .../CodeGen/PowerPC/2004-11-29-ShrCrash.ll    |     6 +
 .../CodeGen/PowerPC/2004-11-30-shift-crash.ll |     7 +
 .../PowerPC/2004-11-30-shr-var-crash.ll       |     9 +
 .../PowerPC/2004-12-12-ZeroSizeCommon.ll      |     4 +
 .../PowerPC/2005-01-14-SetSelectCrash.ll      |     8 +
 .../CodeGen/PowerPC/2005-01-14-UndefLong.ll   |     5 +
 .../PowerPC/2005-08-12-rlwimi-crash.ll        |    13 +
 .../2005-09-02-LegalizeDuplicatesCalls.ll     |    11 +
 .../PowerPC/2005-10-08-ArithmeticRotate.ll    |    11 +
 .../PowerPC/2005-11-30-vastart-crash.ll       |    18 +
 .../PowerPC/2006-01-11-darwin-fp-argument.ll  |    10 +
 .../PowerPC/2006-01-20-ShiftPartsCrash.ll     |    19 +
 .../PowerPC/2006-04-01-FloatDoubleExtend.ll   |     9 +
 .../CodeGen/PowerPC/2006-04-05-splat-ish.ll   |    10 +
 .../PowerPC/2006-04-19-vmaddfp-crash.ll       |    58 +
 .../PowerPC/2006-05-12-rlwimi-crash.ll        |    55 +
 .../PowerPC/2006-07-07-ComputeMaskedBits.ll   |    17 +
 .../PowerPC/2006-07-19-stwbrx-crash.ll        |    11 +
 .../CodeGen/PowerPC/2006-08-11-RetVector.ll   |     8 +
 .../PowerPC/2006-08-15-SelectionCrash.ll      |    30 +
 .../CodeGen/PowerPC/2006-09-28-shift_64.ll    |    25 +
 .../2006-10-11-combiner-aa-regression.ll      |    23 +
 .../CodeGen/PowerPC/2006-10-13-Miscompile.ll  |    16 +
 .../PowerPC/2006-10-17-brcc-miscompile.ll     |    20 +
 .../PowerPC/2006-10-17-ppc64-alloca.ll        |     7 +
 .../2006-11-10-DAGCombineMiscompile.ll        |    13 +
 .../PowerPC/2006-11-29-AltivecFPSplat.ll      |    10 +
 .../CodeGen/PowerPC/2006-12-07-LargeAlloca.ll |    24 +
 .../CodeGen/PowerPC/2006-12-07-SelectCrash.ll |    22 +
 .../PowerPC/2007-01-04-ArgExtension.ll        |    10 +
 .../CodeGen/PowerPC/2007-01-15-AsmDialect.ll  |    27 +
 .../CodeGen/PowerPC/2007-01-29-lbrx-asm.ll    |     7 +
 .../PowerPC/2007-01-31-InlineAsmAddrMode.ll   |    24 +
 .../CodeGen/PowerPC/2007-02-16-AlignPacked.ll |     4 +
 .../2007-02-16-InlineAsmNConstraint.ll        |    11 +
 .../PowerPC/2007-02-23-lr-saved-twice.ll      |    14 +
 .../test/CodeGen/PowerPC/2007-03-24-cntlzd.ll |    12 +
 .../PowerPC/2007-03-30-SpillerCrash.ll        |  1801 ++
 .../2007-04-24-InlineAsm-I-Modifier.ll        |    14 +
 .../2007-04-30-InlineAsmEarlyClobber.ll       |    31 +
 .../2007-05-03-InlineAsm-S-Constraint.ll      |    12 +
 .../2007-05-14-InlineAsmSelectCrash.ll        |    25 +
 .../CodeGen/PowerPC/2007-05-22-tailmerge-3.ll |    68 +
 .../PowerPC/2007-05-30-dagcombine-miscomp.ll  |    14 +
 .../CodeGen/PowerPC/2007-06-28-BCCISelBug.ll  |    85 +
 .../PowerPC/2007-08-04-CoalescerAssert.ll     |    28 +
 .../CodeGen/PowerPC/2007-09-04-AltivecDST.ll  |    15 +
 .../PowerPC/2007-09-07-LoadStoreIdxForms.ll   |    16 +
 .../CodeGen/PowerPC/2007-09-08-unaligned.ll   |    53 +
 .../PowerPC/2007-09-11-RegCoalescerAssert.ll  |     9 +
 .../PowerPC/2007-09-12-LiveIntervalsAssert.ll |    15 +
 .../2007-10-16-InlineAsmFrameOffset.ll        |    14 +
 .../PowerPC/2007-10-18-PtrArithmetic.ll       |    22 +
 .../PowerPC/2007-10-21-LocalRegAllocAssert.ll |    27 +
 .../2007-10-21-LocalRegAllocAssert2.ll        |    25 +
 .../PowerPC/2007-11-04-CoalescerCrash.ll      |   148 +
 .../PowerPC/2007-11-16-landingpad-split.ll    |    59 +
 .../PowerPC/2007-11-19-VectorSplitting.ll     |    16 +
 .../PowerPC/2008-02-05-LiveIntervalsAssert.ll |    67 +
 .../PowerPC/2008-02-09-LocalRegAllocAssert.ll |    10 +
 .../PowerPC/2008-03-05-RegScavengerAssert.ll  |    18 +
 .../PowerPC/2008-03-17-RegScavengerCrash.ll   |    31 +
 .../PowerPC/2008-03-18-RegScavengerAssert.ll  |     6 +
 .../PowerPC/2008-03-24-AddressRegImm.ll       |    25 +
 .../PowerPC/2008-03-24-CoalescerBug.ll        |    30 +
 .../PowerPC/2008-03-26-CoalescerBug.ll        |    28 +
 .../PowerPC/2008-04-10-LiveIntervalCrash.ll   |   100 +
 .../PowerPC/2008-04-16-CoalescerBug.ll        |    14 +
 .../PowerPC/2008-04-23-CoalescerCrash.ll      |    89 +
 .../CodeGen/PowerPC/2008-05-01-ppc_fp128.ll   |    15 +
 .../PowerPC/2008-06-19-LegalizerCrash.ll      |     6 +
 .../PowerPC/2008-06-21-F128LoadStore.ll       |    10 +
 .../PowerPC/2008-06-23-LiveVariablesCrash.ll  |    25 +
 .../PowerPC/2008-07-10-SplatMiscompile.ll     |    10 +
 .../test/CodeGen/PowerPC/2008-07-15-Bswap.ll  |   386 +
 final/test/CodeGen/PowerPC/2008-07-15-Fabs.ll |    19 +
 .../PowerPC/2008-07-15-SignExtendInreg.ll     |    17 +
 final/test/CodeGen/PowerPC/2008-07-17-Fneg.ll |    18 +
 .../CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll |    11 +
 .../PowerPC/2008-09-12-CoalescerBug.ll        |   254 +
 .../PowerPC/2008-10-17-AsmMatchingOperands.ll |    11 +
 .../PowerPC/2008-10-28-UnprocessedNode.ll     |    11 +
 .../CodeGen/PowerPC/2008-10-28-f128-i32.ll    |    33 +
 .../PowerPC/2008-10-30-IllegalShift.ll        |    14 +
 .../PowerPC/2008-10-31-PPCF128Libcalls.ll     |    44 +
 .../PowerPC/2008-12-02-LegalizeTypeAssert.ll  |    18 +
 final/test/CodeGen/PowerPC/2008-12-12-EH.ll   |     9 +
 .../PowerPC/2009-01-16-DeclareISelBug.ll      |    19 +
 .../test/CodeGen/PowerPC/2009-03-17-LSRBug.ll |    51 +
 .../PowerPC/2009-05-28-LegalizeBRCC.ll        |    15 +
 .../PowerPC/2009-07-16-InlineAsm-M-Operand.ll |    16 +
 ...009-08-17-inline-asm-addr-mode-breakage.ll |    25 +
 .../PowerPC/2009-08-23-linkerprivate.ll       |     8 +
 .../CodeGen/PowerPC/2009-09-18-carrybit.ll    |    62 +
 .../PowerPC/2009-11-15-ProcImpDefsBug.ll      |   105 +
 .../CodeGen/PowerPC/2009-11-15-ReMatBug.ll    |   155 +
 .../CodeGen/PowerPC/2009-11-25-ImpDefBug.ll   |    56 +
 .../CodeGen/PowerPC/2010-02-04-EmptyGlobal.ll |    20 +
 .../test/CodeGen/PowerPC/2010-02-12-saveCR.ll |    30 +
 .../PowerPC/2010-03-09-indirect-call.ll       |    19 +
 .../PowerPC/2010-04-01-MachineCSEBug.ll       |    70 +
 .../2010-04-07-DbgValueOtherTargets.ll        |    28 +
 .../CodeGen/PowerPC/2010-05-03-retaddr1.ll    |    24 +
 .../PowerPC/2010-10-11-Fast-Varargs.ll        |    16 +
 .../PowerPC/2010-12-18-PPCStackRefs.ll        |    22 +
 final/test/CodeGen/PowerPC/Atomics-32.ll      |   749 +
 final/test/CodeGen/PowerPC/Atomics-64.ll      |   773 +
 final/test/CodeGen/PowerPC/Frames-alloca.ll   |    30 +
 final/test/CodeGen/PowerPC/Frames-large.ll    |    52 +
 final/test/CodeGen/PowerPC/Frames-leaf.ll     |    37 +
 final/test/CodeGen/PowerPC/Frames-small.ll    |    29 +
 .../test/CodeGen/PowerPC/LargeAbsoluteAddr.ll |    17 +
 final/test/CodeGen/PowerPC/addc.ll            |    33 +
 final/test/CodeGen/PowerPC/addi-reassoc.ll    |    19 +
 final/test/CodeGen/PowerPC/align.ll           |    46 +
 final/test/CodeGen/PowerPC/and-branch.ll      |    17 +
 final/test/CodeGen/PowerPC/and-elim.ll        |    18 +
 final/test/CodeGen/PowerPC/and-imm.ll         |    14 +
 final/test/CodeGen/PowerPC/and_add.ll         |    15 +
 final/test/CodeGen/PowerPC/and_sext.ll        |    28 +
 final/test/CodeGen/PowerPC/and_sra.ll         |    27 +
 final/test/CodeGen/PowerPC/atomic-1.ll        |    21 +
 final/test/CodeGen/PowerPC/atomic-2.ll        |    21 +
 .../CodeGen/PowerPC/available-externally.ll   |    71 +
 .../CodeGen/PowerPC/big-endian-actual-args.ll |     9 +
 .../CodeGen/PowerPC/big-endian-call-result.ll |    13 +
 .../CodeGen/PowerPC/big-endian-formal-args.ll |    15 +
 final/test/CodeGen/PowerPC/branch-opt.ll      |    71 +
 .../test/CodeGen/PowerPC/bswap-load-store.ll  |    51 +
 .../CodeGen/PowerPC/buildvec_canonicalize.ll  |    24 +
 final/test/CodeGen/PowerPC/calls.ll           |    32 +
 final/test/CodeGen/PowerPC/cmp-cmp.ll         |    13 +
 .../test/CodeGen/PowerPC/compare-duplicate.ll |    11 +
 final/test/CodeGen/PowerPC/compare-simm.ll    |    14 +
 final/test/CodeGen/PowerPC/constants.ll       |    52 +
 final/test/CodeGen/PowerPC/cr_spilling.ll     |    27 +
 final/test/CodeGen/PowerPC/cttz.ll            |    11 +
 final/test/CodeGen/PowerPC/darwin-labels.ll   |     6 +
 final/test/CodeGen/PowerPC/delete-node.ll     |    22 +
 final/test/CodeGen/PowerPC/dg.exp             |     5 +
 final/test/CodeGen/PowerPC/div-2.ll           |    30 +
 final/test/CodeGen/PowerPC/empty-functions.ll |    12 +
 .../test/CodeGen/PowerPC/eqv-andc-orc-nor.ll  |    93 +
 final/test/CodeGen/PowerPC/extsh.ll           |     8 +
 final/test/CodeGen/PowerPC/fabs.ll            |     7 +
 final/test/CodeGen/PowerPC/fma.ll             |    54 +
 final/test/CodeGen/PowerPC/fnabs.ll           |    10 +
 final/test/CodeGen/PowerPC/fneg.ll            |    12 +
 final/test/CodeGen/PowerPC/fold-li.ll         |    15 +
 final/test/CodeGen/PowerPC/fp-branch.ll       |    21 +
 final/test/CodeGen/PowerPC/fp-int-fp.ll       |    27 +
 final/test/CodeGen/PowerPC/fp_to_uint.ll      |     8 +
 final/test/CodeGen/PowerPC/fpcopy.ll          |     7 +
 final/test/CodeGen/PowerPC/frounds.ll         |    19 +
 final/test/CodeGen/PowerPC/fsqrt.ll           |    19 +
 final/test/CodeGen/PowerPC/hello.ll           |    12 +
 final/test/CodeGen/PowerPC/hidden-vis-2.ll    |    12 +
 final/test/CodeGen/PowerPC/hidden-vis.ll      |     9 +
 final/test/CodeGen/PowerPC/i128-and-beyond.ll |     8 +
 final/test/CodeGen/PowerPC/i64_fp.ll          |    26 +
 final/test/CodeGen/PowerPC/iabs.ll            |    15 +
 .../CodeGen/PowerPC/illegal-element-type.ll   |    23 +
 final/test/CodeGen/PowerPC/indirectbr.ll      |    55 +
 final/test/CodeGen/PowerPC/inlineasm-copy.ll  |    14 +
 final/test/CodeGen/PowerPC/int-fp-conv-0.ll   |    17 +
 final/test/CodeGen/PowerPC/int-fp-conv-1.ll   |    11 +
 final/test/CodeGen/PowerPC/invalid-memcpy.ll  |    20 +
 .../CodeGen/PowerPC/inverted-bool-compares.ll |    13 +
 final/test/CodeGen/PowerPC/ispositive.ll      |    10 +
 final/test/CodeGen/PowerPC/itofp128.ll        |    14 +
 final/test/CodeGen/PowerPC/lha.ll             |     8 +
 .../CodeGen/PowerPC/load-constant-addr.ll     |     9 +
 final/test/CodeGen/PowerPC/long-compare.ll    |     9 +
 .../test/CodeGen/PowerPC/longdbl-truncate.ll  |     9 +
 final/test/CodeGen/PowerPC/lsr-postinc-pos.ll |    32 +
 final/test/CodeGen/PowerPC/mask64.ll          |    27 +
 .../test/CodeGen/PowerPC/mem-rr-addr-mode.ll  |    18 +
 final/test/CodeGen/PowerPC/mem_update.ll      |    68 +
 final/test/CodeGen/PowerPC/mul-neg-power-2.ll |     8 +
 .../test/CodeGen/PowerPC/mul-with-overflow.ll |    15 +
 final/test/CodeGen/PowerPC/mulhs.ll           |    17 +
 .../PowerPC/mult-alt-generic-powerpc.ll       |   321 +
 .../PowerPC/mult-alt-generic-powerpc64.ll     |   321 +
 .../CodeGen/PowerPC/multiple-return-values.ll |    17 +
 final/test/CodeGen/PowerPC/neg.ll             |     7 +
 final/test/CodeGen/PowerPC/no-dead-strip.ll   |     8 +
 .../CodeGen/PowerPC/or-addressing-mode.ll     |    22 +
 final/test/CodeGen/PowerPC/ppc-prologue.ll    |    28 +
 final/test/CodeGen/PowerPC/ppcf128-1-opt.ll   |    29 +
 final/test/CodeGen/PowerPC/ppcf128-1.ll       |    92 +
 final/test/CodeGen/PowerPC/ppcf128-2.ll       |    14 +
 final/test/CodeGen/PowerPC/ppcf128-3.ll       |    32 +
 final/test/CodeGen/PowerPC/ppcf128-4.ll       |    10 +
 .../test/CodeGen/PowerPC/pr3711_widen_bit.ll  |    18 +
 final/test/CodeGen/PowerPC/private.ll         |    24 +
 .../CodeGen/PowerPC/reg-coalesce-simple.ll    |    12 +
 final/test/CodeGen/PowerPC/retaddr.ll         |    15 +
 final/test/CodeGen/PowerPC/return-val-i128.ll |    36 +
 final/test/CodeGen/PowerPC/rlwimi-commute.ll  |    34 +
 final/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll |    28 +
 final/test/CodeGen/PowerPC/rlwimi.ll          |    70 +
 final/test/CodeGen/PowerPC/rlwimi2.ll         |    29 +
 final/test/CodeGen/PowerPC/rlwimi3.ll         |    25 +
 final/test/CodeGen/PowerPC/rlwinm.ll          |    61 +
 final/test/CodeGen/PowerPC/rlwinm2.ll         |    28 +
 final/test/CodeGen/PowerPC/rotl-2.ll          |    38 +
 final/test/CodeGen/PowerPC/rotl-64.ll         |    20 +
 final/test/CodeGen/PowerPC/rotl.ll            |    37 +
 final/test/CodeGen/PowerPC/sections.ll        |     8 +
 final/test/CodeGen/PowerPC/select-cc.ll       |     9 +
 final/test/CodeGen/PowerPC/select_lt0.ll      |    50 +
 final/test/CodeGen/PowerPC/setcc_no_zext.ll   |     9 +
 final/test/CodeGen/PowerPC/seteq-0.ll         |     9 +
 final/test/CodeGen/PowerPC/shift128.ll        |    14 +
 final/test/CodeGen/PowerPC/shl_elim.ll        |    11 +
 final/test/CodeGen/PowerPC/shl_sext.ll        |    18 +
 final/test/CodeGen/PowerPC/sign_ext_inreg1.ll |    12 +
 final/test/CodeGen/PowerPC/small-arguments.ll |    52 +
 final/test/CodeGen/PowerPC/stack-protector.ll |    25 +
 final/test/CodeGen/PowerPC/stfiwx-2.ll        |    11 +
 final/test/CodeGen/PowerPC/stfiwx.ll          |    24 +
 final/test/CodeGen/PowerPC/store-load-fwd.ll  |     8 +
 final/test/CodeGen/PowerPC/stubs.ll           |    22 +
 final/test/CodeGen/PowerPC/subc.ll            |    25 +
 final/test/CodeGen/PowerPC/tailcall1-64.ll    |    11 +
 final/test/CodeGen/PowerPC/tailcall1.ll       |    11 +
 final/test/CodeGen/PowerPC/tailcallpic1.ll    |    14 +
 final/test/CodeGen/PowerPC/trampoline.ll      |   166 +
 final/test/CodeGen/PowerPC/unsafe-math.ll     |    10 +
 final/test/CodeGen/PowerPC/varargs.ll         |    22 +
 final/test/CodeGen/PowerPC/vcmp-fold.ll       |    22 +
 .../test/CodeGen/PowerPC/vec_auto_constant.ll |    36 +
 final/test/CodeGen/PowerPC/vec_br_cmp.ll      |    22 +
 .../PowerPC/vec_buildvector_loadstore.ll      |    37 +
 final/test/CodeGen/PowerPC/vec_call.ll        |    11 +
 final/test/CodeGen/PowerPC/vec_constants.ll   |    44 +
 final/test/CodeGen/PowerPC/vec_fneg.ll        |     8 +
 final/test/CodeGen/PowerPC/vec_insert.ll      |     8 +
 final/test/CodeGen/PowerPC/vec_misaligned.ll  |    37 +
 final/test/CodeGen/PowerPC/vec_mul.ll         |    23 +
 .../test/CodeGen/PowerPC/vec_perf_shuffle.ll  |    36 +
 final/test/CodeGen/PowerPC/vec_shift.ll       |    10 +
 final/test/CodeGen/PowerPC/vec_shuffle.ll     |   504 +
 final/test/CodeGen/PowerPC/vec_splat.ll       |    71 +
 .../CodeGen/PowerPC/vec_splat_constant.ll     |    24 +
 final/test/CodeGen/PowerPC/vec_vrsave.ll      |    14 +
 final/test/CodeGen/PowerPC/vec_zero.ll        |     9 +
 .../PowerPC/vector-identity-shuffle.ll        |    17 +
 final/test/CodeGen/PowerPC/vector.ll          |   158 +
 .../SPARC/2006-01-22-BitConvertLegalize.ll    |    12 +
 .../CodeGen/SPARC/2007-05-09-JumpTables.ll    |    30 +
 .../SPARC/2007-07-05-LiveIntervalAssert.ll    |    11 +
 .../2008-10-10-InlineAsmMemoryOperand.ll      |    16 +
 .../SPARC/2008-10-10-InlineAsmRegOperand.ll   |    14 +
 final/test/CodeGen/SPARC/2009-08-28-PIC.ll    |     9 +
 .../CodeGen/SPARC/2009-08-28-WeakLinkage.ll   |     6 +
 .../SPARC/2010-04-07-DbgValueOtherTargets.ll  |    28 +
 final/test/CodeGen/SPARC/2011-01-11-CC.ll     |   105 +
 final/test/CodeGen/SPARC/2011-01-11-Call.ll   |    13 +
 .../CodeGen/SPARC/2011-01-11-FrameAddr.ll     |    64 +
 .../CodeGen/SPARC/2011-01-19-DelaySlot.ll     |    90 +
 .../CodeGen/SPARC/2011-01-21-ByValArgs.ll     |    18 +
 final/test/CodeGen/SPARC/2011-01-22-SRet.ll   |    37 +
 final/test/CodeGen/SPARC/basictest.ll         |    26 +
 final/test/CodeGen/SPARC/ctpop.ll             |    10 +
 final/test/CodeGen/SPARC/dg.exp               |     5 +
 .../CodeGen/SPARC/mult-alt-generic-sparc.ll   |   323 +
 final/test/CodeGen/SPARC/private.ll           |    21 +
 final/test/CodeGen/SystemZ/00-RetVoid.ll      |     6 +
 final/test/CodeGen/SystemZ/01-RetArg.ll       |     6 +
 final/test/CodeGen/SystemZ/01-RetImm.ll       |    49 +
 final/test/CodeGen/SystemZ/02-MemArith.ll     |   133 +
 final/test/CodeGen/SystemZ/02-RetAdd.ll       |     6 +
 final/test/CodeGen/SystemZ/02-RetAddImm.ll    |     6 +
 final/test/CodeGen/SystemZ/02-RetAnd.ll       |     7 +
 final/test/CodeGen/SystemZ/02-RetAndImm.ll    |    28 +
 final/test/CodeGen/SystemZ/02-RetNeg.ll       |     7 +
 final/test/CodeGen/SystemZ/02-RetOr.ll        |     6 +
 final/test/CodeGen/SystemZ/02-RetOrImm.ll     |    28 +
 final/test/CodeGen/SystemZ/02-RetSub.ll       |     7 +
 final/test/CodeGen/SystemZ/02-RetSubImm.ll    |     7 +
 final/test/CodeGen/SystemZ/02-RetXor.ll       |     6 +
 final/test/CodeGen/SystemZ/02-RetXorImm.ll    |     6 +
 .../CodeGen/SystemZ/03-RetAddImmSubreg.ll     |    42 +
 final/test/CodeGen/SystemZ/03-RetAddSubreg.ll |    22 +
 .../CodeGen/SystemZ/03-RetAndImmSubreg.ll     |    38 +
 final/test/CodeGen/SystemZ/03-RetAndSubreg.ll |    21 +
 final/test/CodeGen/SystemZ/03-RetArgSubreg.ll |    19 +
 final/test/CodeGen/SystemZ/03-RetImmSubreg.ll |    42 +
 .../CodeGen/SystemZ/03-RetNegImmSubreg.ll     |     8 +
 .../test/CodeGen/SystemZ/03-RetOrImmSubreg.ll |    60 +
 final/test/CodeGen/SystemZ/03-RetOrSubreg.ll  |    23 +
 .../CodeGen/SystemZ/03-RetSubImmSubreg.ll     |    42 +
 final/test/CodeGen/SystemZ/03-RetSubSubreg.ll |    22 +
 .../CodeGen/SystemZ/03-RetXorImmSubreg.ll     |    58 +
 final/test/CodeGen/SystemZ/03-RetXorSubreg.ll |    23 +
 final/test/CodeGen/SystemZ/04-RetShifts.ll    |   121 +
 final/test/CodeGen/SystemZ/05-LoadAddr.ll     |    11 +
 final/test/CodeGen/SystemZ/05-MemImmStores.ll |    50 +
 .../test/CodeGen/SystemZ/05-MemLoadsStores.ll |    44 +
 .../CodeGen/SystemZ/05-MemLoadsStores16.ll    |    85 +
 final/test/CodeGen/SystemZ/05-MemRegLoads.ll  |    75 +
 final/test/CodeGen/SystemZ/05-MemRegStores.ll |    79 +
 final/test/CodeGen/SystemZ/06-CallViaStack.ll |    17 +
 final/test/CodeGen/SystemZ/06-FrameIdxLoad.ll |    16 +
 final/test/CodeGen/SystemZ/06-LocalFrame.ll   |    13 +
 final/test/CodeGen/SystemZ/06-SimpleCall.ll   |    12 +
 final/test/CodeGen/SystemZ/07-BrCond.ll       |   141 +
 final/test/CodeGen/SystemZ/07-BrCond32.ll     |   142 +
 final/test/CodeGen/SystemZ/07-BrUnCond.ll     |    18 +
 final/test/CodeGen/SystemZ/07-CmpImm.ll       |   137 +
 final/test/CodeGen/SystemZ/07-CmpImm32.ll     |   139 +
 final/test/CodeGen/SystemZ/07-SelectCC.ll     |    11 +
 final/test/CodeGen/SystemZ/08-DivRem.ll       |    55 +
 final/test/CodeGen/SystemZ/08-DivRemMemOp.ll  |    64 +
 final/test/CodeGen/SystemZ/08-SimpleMuls.ll   |    29 +
 .../test/CodeGen/SystemZ/09-DynamicAlloca.ll  |    14 +
 final/test/CodeGen/SystemZ/09-Globals.ll      |    23 +
 final/test/CodeGen/SystemZ/09-Switches.ll     |    39 +
 final/test/CodeGen/SystemZ/10-FuncsPic.ll     |    27 +
 final/test/CodeGen/SystemZ/10-GlobalsPic.ll   |    29 +
 final/test/CodeGen/SystemZ/11-BSwap.ll        |    74 +
 .../SystemZ/2009-05-29-InvalidRetResult.ll    |    12 +
 .../CodeGen/SystemZ/2009-06-02-And32Imm.ll    |    14 +
 .../test/CodeGen/SystemZ/2009-06-02-Rotate.ll |    13 +
 .../SystemZ/2009-06-05-InvalidArgLoad.ll      |    19 +
 .../test/CodeGen/SystemZ/2009-07-04-Shl32.ll  |    27 +
 .../test/CodeGen/SystemZ/2009-07-05-Shifts.ll |    25 +
 .../2009-07-10-BadIncomingArgOffset.ll        |    24 +
 .../SystemZ/2009-07-11-FloatBitConvert.ll     |    16 +
 .../SystemZ/2009-07-11-InvalidRIISel.ll       |    32 +
 .../2009-08-21-InlineAsmRConstraint.ll        |    21 +
 .../CodeGen/SystemZ/2009-08-22-FCopySign.ll   |    22 +
 .../test/CodeGen/SystemZ/2010-01-04-DivMem.ll |    50 +
 .../2010-04-07-DbgValueOtherTargets.ll        |    28 +
 final/test/CodeGen/SystemZ/dg.exp             |     5 +
 .../CodeGen/Thumb/2007-01-31-RegInfoAssert.ll |    16 +
 .../Thumb/2007-02-02-JoinIntervalsCrash.ll    |    27 +
 final/test/CodeGen/Thumb/2007-03-06-AddR7.ll  |   117 +
 .../Thumb/2007-05-05-InvalidPushPop.ll        |    41 +
 .../Thumb/2009-06-18-ThumbCommuteMul.ll       |     8 +
 .../test/CodeGen/Thumb/2009-07-19-SPDecBug.ll |    33 +
 .../CodeGen/Thumb/2009-07-20-TwoAddrBug.ll    |    11 +
 .../CodeGen/Thumb/2009-07-27-PEIAssert.ll     |    26 +
 .../Thumb/2009-08-12-ConstIslandAssert.ll     |   737 +
 .../CodeGen/Thumb/2009-08-12-RegInfoAssert.ll |    40 +
 .../test/CodeGen/Thumb/2009-08-20-ISelBug.ll  |    66 +
 .../Thumb/2009-12-17-pre-regalloc-taildup.ll  |    66 +
 .../2010-01-15-local-alloc-spill-physical.ll  |    20 +
 .../Thumb/2010-04-07-DbgValueOtherTargets.ll  |    28 +
 .../CodeGen/Thumb/2010-06-18-SibCallCrash.ll  |     8 +
 .../CodeGen/Thumb/2010-07-01-FuncAlign.ll     |     6 +
 .../CodeGen/Thumb/2010-07-15-debugOrdering.ll |   147 +
 final/test/CodeGen/Thumb/2011-EpilogueBug.ll  |    17 +
 final/test/CodeGen/Thumb/asmprinter-bug.ll    |   288 +
 final/test/CodeGen/Thumb/barrier.ll           |    25 +
 final/test/CodeGen/Thumb/dg.exp               |     5 +
 final/test/CodeGen/Thumb/dyn-stackalloc.ll    |    74 +
 final/test/CodeGen/Thumb/fpconv.ll            |    61 +
 final/test/CodeGen/Thumb/fpow.ll              |     9 +
 final/test/CodeGen/Thumb/frame_thumb.ll       |     9 +
 final/test/CodeGen/Thumb/iabs.ll              |    17 +
 .../test/CodeGen/Thumb/inlineasm-imm-thumb.ll |    43 +
 final/test/CodeGen/Thumb/ispositive.ll        |    11 +
 final/test/CodeGen/Thumb/large-stack.ll       |    35 +
 final/test/CodeGen/Thumb/ldr_ext.ll           |    57 +
 final/test/CodeGen/Thumb/ldr_frame.ll         |    41 +
 final/test/CodeGen/Thumb/long-setcc.ll        |    17 +
 final/test/CodeGen/Thumb/long.ll              |    76 +
 final/test/CodeGen/Thumb/long_shift.ll        |    26 +
 final/test/CodeGen/Thumb/mul.ll               |    22 +
 final/test/CodeGen/Thumb/pop.ll               |    13 +
 final/test/CodeGen/Thumb/push.ll              |    10 +
 final/test/CodeGen/Thumb/select.ll            |    55 +
 final/test/CodeGen/Thumb/stack-frame.ll       |    13 +
 final/test/CodeGen/Thumb/thumb-imm.ll         |    10 +
 final/test/CodeGen/Thumb/trap.ll              |    12 +
 final/test/CodeGen/Thumb/tst_teq.ll           |    17 +
 final/test/CodeGen/Thumb/unord.ll             |    14 +
 final/test/CodeGen/Thumb/vargs.ll             |    36 +
 .../Thumb2/2009-07-17-CrossRegClassCopy.ll    |    35 +
 .../test/CodeGen/Thumb2/2009-07-21-ISelBug.ll |    36 +
 .../CodeGen/Thumb2/2009-07-23-CPIslandBug.ll  |    22 +
 .../CodeGen/Thumb2/2009-07-30-PEICrash.ll     |   193 +
 .../CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll |    85 +
 .../CodeGen/Thumb2/2009-08-02-CoalescerBug.ll |    46 +
 .../Thumb2/2009-08-04-CoalescerAssert.ll      |    29 +
 .../CodeGen/Thumb2/2009-08-04-CoalescerBug.ll |   153 +
 .../Thumb2/2009-08-04-ScavengerAssert.ll      |   508 +
 .../Thumb2/2009-08-04-SubregLoweringBug.ll    |    34 +
 .../Thumb2/2009-08-04-SubregLoweringBug2.ll   |    42 +
 .../Thumb2/2009-08-04-SubregLoweringBug3.ll   |    54 +
 .../CodeGen/Thumb2/2009-08-06-SpDecBug.ll     |    29 +
 .../CodeGen/Thumb2/2009-08-07-CoalescerBug.ll |    16 +
 .../CodeGen/Thumb2/2009-08-07-NeonFPBug.ll    |    80 +
 .../Thumb2/2009-08-08-ScavengerAssert.ll      |    20 +
 .../test/CodeGen/Thumb2/2009-08-10-ISelBug.ll |    15 +
 .../CodeGen/Thumb2/2009-08-21-PostRAKill4.ll  |    26 +
 .../CodeGen/Thumb2/2009-09-01-PostRAProlog.ll |   106 +
 .../CodeGen/Thumb2/2009-09-28-ITBlockBug.ll   |   152 +
 .../Thumb2/2009-10-15-ITBlockBranch.ll        |    41 +
 .../Thumb2/2009-11-01-CopyReg2RegBug.ll       |    29 +
 .../Thumb2/2009-11-11-ScavengerAssert.ll      |    85 +
 .../test/CodeGen/Thumb2/2009-11-13-STRDBug.ll |    20 +
 .../CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll  |   128 +
 .../Thumb2/2010-01-06-TailDuplicateLabels.ll  |    89 +
 .../Thumb2/2010-01-19-RemovePredicates.ll     |    53 +
 .../CodeGen/Thumb2/2010-02-11-phi-cycle.ll    |    76 +
 .../CodeGen/Thumb2/2010-02-24-BigStack.ll     |    15 +
 .../CodeGen/Thumb2/2010-03-08-addi12-ccout.ll |   266 +
 .../CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll |    68 +
 .../CodeGen/Thumb2/2010-04-15-DynAllocBug.ll  |    18 +
 .../CodeGen/Thumb2/2010-04-26-CopyRegCrash.ll |    73 +
 final/test/CodeGen/Thumb2/2010-05-24-rsbs.ll  |     9 +
 .../Thumb2/2010-06-14-NEONCoalescer.ll        |    42 +
 .../CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll |    35 +
 .../CodeGen/Thumb2/2010-06-21-TailMergeBug.ll |   127 +
 .../Thumb2/2010-08-10-VarSizedAllocaBug.ll    |    59 +
 .../CodeGen/Thumb2/2010-11-22-EpilogueBug.ll  |    34 +
 .../Thumb2/2010-12-03-AddSPNarrowing.ll       |    11 +
 final/test/CodeGen/Thumb2/bfi.ll              |    51 +
 final/test/CodeGen/Thumb2/bfx.ll              |    28 +
 .../test/CodeGen/Thumb2/buildvector-crash.ll  |    17 +
 final/test/CodeGen/Thumb2/carry.ll            |    22 +
 final/test/CodeGen/Thumb2/cortex-fp.ll        |    24 +
 final/test/CodeGen/Thumb2/crash.ll            |    49 +
 .../CodeGen/Thumb2/cross-rc-coalescing-1.ll   |    52 +
 .../CodeGen/Thumb2/cross-rc-coalescing-2.ll   |    75 +
 final/test/CodeGen/Thumb2/dg.exp              |     5 +
 final/test/CodeGen/Thumb2/div.ll              |    45 +
 final/test/CodeGen/Thumb2/frameless.ll        |     6 +
 final/test/CodeGen/Thumb2/frameless2.ll       |    12 +
 final/test/CodeGen/Thumb2/ifcvt-neon.ll       |    29 +
 final/test/CodeGen/Thumb2/large-stack.ll      |    39 +
 final/test/CodeGen/Thumb2/ldr-str-imm12.ll    |    76 +
 final/test/CodeGen/Thumb2/lsr-deficiency.ll   |    41 +
 final/test/CodeGen/Thumb2/machine-licm.ll     |   122 +
 final/test/CodeGen/Thumb2/mul_const.ll        |    18 +
 final/test/CodeGen/Thumb2/pic-load.ll         |    21 +
 final/test/CodeGen/Thumb2/thumb2-adc.ll       |    48 +
 final/test/CodeGen/Thumb2/thumb2-add.ll       |    50 +
 final/test/CodeGen/Thumb2/thumb2-add2.ll      |    41 +
 final/test/CodeGen/Thumb2/thumb2-add3.ll      |     9 +
 final/test/CodeGen/Thumb2/thumb2-add4.ll      |    46 +
 final/test/CodeGen/Thumb2/thumb2-add5.ll      |    42 +
 final/test/CodeGen/Thumb2/thumb2-add6.ll      |     9 +
 final/test/CodeGen/Thumb2/thumb2-and.ll       |    42 +
 final/test/CodeGen/Thumb2/thumb2-and2.ll      |    41 +
 final/test/CodeGen/Thumb2/thumb2-asr.ll       |     8 +
 final/test/CodeGen/Thumb2/thumb2-asr2.ll      |     8 +
 final/test/CodeGen/Thumb2/thumb2-barrier.ll   |    31 +
 final/test/CodeGen/Thumb2/thumb2-bcc.ll       |    19 +
 final/test/CodeGen/Thumb2/thumb2-bfc.ll       |    32 +
 final/test/CodeGen/Thumb2/thumb2-bic.ll       |   105 +
 final/test/CodeGen/Thumb2/thumb2-branch.ll    |    61 +
 final/test/CodeGen/Thumb2/thumb2-call-tc.ll   |    38 +
 final/test/CodeGen/Thumb2/thumb2-call.ll      |    27 +
 final/test/CodeGen/Thumb2/thumb2-cbnz.ll      |    33 +
 final/test/CodeGen/Thumb2/thumb2-clz.ll       |    10 +
 final/test/CodeGen/Thumb2/thumb2-cmn.ll       |    72 +
 final/test/CodeGen/Thumb2/thumb2-cmn2.ll      |    33 +
 final/test/CodeGen/Thumb2/thumb2-cmp.ll       |    55 +
 final/test/CodeGen/Thumb2/thumb2-cmp2.ll      |    49 +
 final/test/CodeGen/Thumb2/thumb2-eor.ll       |    56 +
 final/test/CodeGen/Thumb2/thumb2-eor2.ll      |    41 +
 final/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll |    87 +
 final/test/CodeGen/Thumb2/thumb2-ifcvt1.ll    |    84 +
 final/test/CodeGen/Thumb2/thumb2-ifcvt2.ll    |    94 +
 final/test/CodeGen/Thumb2/thumb2-ifcvt3.ll    |    31 +
 final/test/CodeGen/Thumb2/thumb2-jtb.ll       |   120 +
 final/test/CodeGen/Thumb2/thumb2-ldm.ll       |    40 +
 final/test/CodeGen/Thumb2/thumb2-ldr.ll       |    72 +
 final/test/CodeGen/Thumb2/thumb2-ldr_ext.ll   |    28 +
 final/test/CodeGen/Thumb2/thumb2-ldr_post.ll  |    12 +
 final/test/CodeGen/Thumb2/thumb2-ldr_pre.ll   |    28 +
 final/test/CodeGen/Thumb2/thumb2-ldrb.ll      |    72 +
 final/test/CodeGen/Thumb2/thumb2-ldrd.ll      |    12 +
 final/test/CodeGen/Thumb2/thumb2-ldrh.ll      |    71 +
 final/test/CodeGen/Thumb2/thumb2-lsl.ll       |     8 +
 final/test/CodeGen/Thumb2/thumb2-lsl2.ll      |     8 +
 final/test/CodeGen/Thumb2/thumb2-lsr.ll       |     8 +
 final/test/CodeGen/Thumb2/thumb2-lsr2.ll      |     8 +
 final/test/CodeGen/Thumb2/thumb2-lsr3.ll      |    19 +
 final/test/CodeGen/Thumb2/thumb2-mla.ll       |    17 +
 final/test/CodeGen/Thumb2/thumb2-mls.ll       |    19 +
 final/test/CodeGen/Thumb2/thumb2-mov.ll       |   266 +
 final/test/CodeGen/Thumb2/thumb2-mul.ll       |    26 +
 final/test/CodeGen/Thumb2/thumb2-mulhi.ll     |    20 +
 final/test/CodeGen/Thumb2/thumb2-mvn.ll       |    33 +
 final/test/CodeGen/Thumb2/thumb2-mvn2.ll      |    49 +
 final/test/CodeGen/Thumb2/thumb2-neg.ll       |     8 +
 final/test/CodeGen/Thumb2/thumb2-orn.ll       |    72 +
 final/test/CodeGen/Thumb2/thumb2-orn2.ll      |    38 +
 final/test/CodeGen/Thumb2/thumb2-orr.ll       |    42 +
 final/test/CodeGen/Thumb2/thumb2-orr2.ll      |    42 +
 final/test/CodeGen/Thumb2/thumb2-pack.ll      |    97 +
 final/test/CodeGen/Thumb2/thumb2-rev.ll       |    23 +
 final/test/CodeGen/Thumb2/thumb2-rev16.ll     |    32 +
 final/test/CodeGen/Thumb2/thumb2-ror.ll       |    11 +
 final/test/CodeGen/Thumb2/thumb2-ror2.ll      |    11 +
 final/test/CodeGen/Thumb2/thumb2-rsb.ll       |    35 +
 final/test/CodeGen/Thumb2/thumb2-rsb2.ll      |    41 +
 final/test/CodeGen/Thumb2/thumb2-sbc.ll       |    54 +
 final/test/CodeGen/Thumb2/thumb2-select.ll    |    98 +
 .../CodeGen/Thumb2/thumb2-select_xform.ll     |    39 +
 final/test/CodeGen/Thumb2/thumb2-shifter.ll   |    48 +
 final/test/CodeGen/Thumb2/thumb2-smla.ll      |    11 +
 final/test/CodeGen/Thumb2/thumb2-smul.ll      |    24 +
 final/test/CodeGen/Thumb2/thumb2-spill-q.ll   |    91 +
 final/test/CodeGen/Thumb2/thumb2-str.ll       |    76 +
 final/test/CodeGen/Thumb2/thumb2-str_post.ll  |    22 +
 final/test/CodeGen/Thumb2/thumb2-str_pre.ll   |    21 +
 final/test/CodeGen/Thumb2/thumb2-strb.ll      |    76 +
 final/test/CodeGen/Thumb2/thumb2-strh.ll      |    76 +
 final/test/CodeGen/Thumb2/thumb2-sub.ll       |    49 +
 final/test/CodeGen/Thumb2/thumb2-sub2.ll      |     8 +
 final/test/CodeGen/Thumb2/thumb2-sub3.ll      |    55 +
 final/test/CodeGen/Thumb2/thumb2-sub4.ll      |    42 +
 final/test/CodeGen/Thumb2/thumb2-sub5.ll      |     9 +
 final/test/CodeGen/Thumb2/thumb2-sxt_rot.ll   |    31 +
 final/test/CodeGen/Thumb2/thumb2-tbb.ll       |    57 +
 final/test/CodeGen/Thumb2/thumb2-tbh.ll       |    84 +
 final/test/CodeGen/Thumb2/thumb2-teq.ll       |    93 +
 final/test/CodeGen/Thumb2/thumb2-teq2.ll      |    71 +
 final/test/CodeGen/Thumb2/thumb2-tst.ll       |    92 +
 final/test/CodeGen/Thumb2/thumb2-tst2.ll      |    71 +
 final/test/CodeGen/Thumb2/thumb2-uxt_rot.ll   |    28 +
 final/test/CodeGen/Thumb2/thumb2-uxtb.ll      |   141 +
 final/test/CodeGen/Thumb2/tls1.ll             |    20 +
 final/test/CodeGen/Thumb2/tls2.ll             |    29 +
 .../X86/2003-08-03-CallArgLiveRanges.ll       |    18 +
 .../CodeGen/X86/2003-08-23-DeadBlockTest.ll   |    12 +
 .../test/CodeGen/X86/2003-11-03-GlobalBool.ll |     4 +
 final/test/CodeGen/X86/2004-02-12-Memcpy.ll   |    25 +
 .../X86/2004-02-13-FrameReturnAddress.ll      |    18 +
 .../X86/2004-02-14-InefficientStackPointer.ll |     5 +
 final/test/CodeGen/X86/2004-02-22-Casts.ll    |    12 +
 .../test/CodeGen/X86/2004-03-30-Select-Max.ll |     8 +
 .../X86/2004-04-09-SameValueCoalescing.ll     |    13 +
 .../CodeGen/X86/2004-04-13-FPCMOV-Crash.ll    |     7 +
 .../CodeGen/X86/2004-06-10-StackifierCrash.ll |     6 +
 .../CodeGen/X86/2004-10-08-SelectSetCCFold.ll |     8 +
 .../test/CodeGen/X86/2005-01-17-CycleInDAG.ll |    17 +
 .../X86/2005-02-14-IllegalAssembler.ll        |     5 +
 .../CodeGen/X86/2005-05-08-FPStackifierPHI.ll |    38 +
 .../CodeGen/X86/2006-01-19-ISelFoldingBug.ll  |    20 +
 .../CodeGen/X86/2006-03-01-InstrSchedBug.ll   |    12 +
 .../CodeGen/X86/2006-03-02-InstrSchedBug.ll   |    12 +
 .../CodeGen/X86/2006-04-04-CrossBlockCrash.ll |    50 +
 .../CodeGen/X86/2006-04-27-ISelFoldingBug.ll  |    29 +
 .../X86/2006-05-01-SchedCausingSpills.ll      |    76 +
 .../CodeGen/X86/2006-05-02-InstrSched1.ll     |    23 +
 .../CodeGen/X86/2006-05-02-InstrSched2.ll     |    24 +
 .../X86/2006-05-08-CoalesceSubRegClass.ll     |    25 +
 .../test/CodeGen/X86/2006-05-08-InstrSched.ll |    25 +
 .../test/CodeGen/X86/2006-05-11-InstrSched.ll |    51 +
 .../test/CodeGen/X86/2006-05-17-VectorArg.ll  |    15 +
 final/test/CodeGen/X86/2006-05-22-FPSetEQ.ll  |    10 +
 .../test/CodeGen/X86/2006-05-25-CycleInDAG.ll |    20 +
 .../X86/2006-07-10-InlineAsmAConstraint.ll    |     8 +
 .../X86/2006-07-12-InlineAsmQConstraint.ll    |    12 +
 final/test/CodeGen/X86/2006-07-19-ATTAsm.ll   |    49 +
 .../test/CodeGen/X86/2006-07-20-InlineAsm.ll  |    23 +
 .../2006-07-28-AsmPrint-Long-As-Pointer.ll    |     5 +
 .../CodeGen/X86/2006-07-31-SingleRegClass.ll  |    10 +
 .../test/CodeGen/X86/2006-08-07-CycleInDAG.ll |    31 +
 .../test/CodeGen/X86/2006-08-16-CycleInDAG.ll |    23 +
 .../CodeGen/X86/2006-08-21-ExtraMovInst.ll    |    17 +
 .../test/CodeGen/X86/2006-09-01-CycleInDAG.ll |   131 +
 .../CodeGen/X86/2006-10-02-BoolRetCrash.ll    |     7 +
 .../X86/2006-10-07-ScalarSSEMiscompile.ll     |    15 +
 .../test/CodeGen/X86/2006-10-09-CycleInDAG.ll |    11 +
 .../X86/2006-10-10-FindModifiedNodeSlotBug.ll |    28 +
 .../test/CodeGen/X86/2006-10-12-CycleInDAG.ll |    41 +
 .../test/CodeGen/X86/2006-10-13-CycleInDAG.ll |    19 +
 .../2006-10-19-SwitchUnnecessaryBranching.ll  |    29 +
 final/test/CodeGen/X86/2006-11-12-CSRetCC.ll  |    59 +
 .../CodeGen/X86/2006-11-17-IllegalMove.ll     |    42 +
 .../CodeGen/X86/2006-11-27-SelectLegalize.ll  |     9 +
 final/test/CodeGen/X86/2006-11-28-Memcpy.ll   |    34 +
 .../CodeGen/X86/2006-12-16-InlineAsmCrash.ll  |    30 +
 .../CodeGen/X86/2006-12-19-IntelSyntax.ll     |    86 +
 .../test/CodeGen/X86/2007-01-08-InstrSched.ll |    22 +
 .../CodeGen/X86/2007-01-08-X86-64-Pointer.ll  |    22 +
 .../CodeGen/X86/2007-01-13-StackPtrIndex.ll   |   461 +
 .../CodeGen/X86/2007-01-29-InlineAsm-ir.ll    |     7 +
 .../test/CodeGen/X86/2007-02-04-OrAddrMode.ll |    21 +
 .../test/CodeGen/X86/2007-02-16-BranchFold.ll |    95 +
 .../X86/2007-02-19-LiveIntervalAssert.ll      |    21 +
 .../X86/2007-02-23-DAGCombine-Miscompile.ll   |    13 +
 .../CodeGen/X86/2007-02-25-FastCCStack.ll     |     5 +
 .../CodeGen/X86/2007-03-01-SpillerCrash.ll    |    86 +
 .../CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll    |    73 +
 .../test/CodeGen/X86/2007-03-16-InlineAsm.ll  |    27 +
 .../X86/2007-03-18-LiveIntervalAssert.ll      |     7 +
 .../2007-03-24-InlineAsmMultiRegConstraint.ll |    11 +
 .../X86/2007-03-24-InlineAsmPModifier.ll      |    10 +
 .../X86/2007-03-24-InlineAsmVectorOp.ll       |    11 +
 .../X86/2007-03-24-InlineAsmXConstraint.ll    |     9 +
 .../CodeGen/X86/2007-03-26-CoalescerBug.ll    |    49 +
 .../CodeGen/X86/2007-04-08-InlineAsmCrash.ll  |    18 +
 .../X86/2007-04-11-InlineAsmVectorResult.ll   |    21 +
 .../X86/2007-04-17-LiveIntervalAssert.ll      |    42 +
 .../test/CodeGen/X86/2007-04-24-Huge-Stack.ll |    19 +
 .../CodeGen/X86/2007-04-24-VectorCrash.ll     |    63 +
 .../test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll  |    64 +
 .../X86/2007-04-27-InlineAsm-IntMemInput.ll   |    12 +
 .../CodeGen/X86/2007-05-05-Personality.ll     |    35 +
 .../CodeGen/X86/2007-05-05-VecCastExpand.ll   |    21 +
 .../test/CodeGen/X86/2007-05-07-InvokeSRet.ll |    15 +
 .../X86/2007-05-14-LiveIntervalAssert.ll      |    27 +
 final/test/CodeGen/X86/2007-05-15-maskmovq.ll |    14 +
 .../CodeGen/X86/2007-05-17-ShuffleISelBug.ll  |    23 +
 .../X86/2007-06-04-X86-64-CtorAsmBugs.ll      |    28 +
 .../test/CodeGen/X86/2007-06-04-tailmerge4.ll |   454 +
 .../CodeGen/X86/2007-06-05-LSR-Dominator.ll   |   129 +
 final/test/CodeGen/X86/2007-06-15-IntToMMX.ll |    16 +
 .../CodeGen/X86/2007-06-28-X86-64-isel.ll     |    16 +
 .../CodeGen/X86/2007-06-29-DAGCombinerBug.ll  |    50 +
 .../X86/2007-06-29-VecFPConstantCSEBug.ll     |    11 +
 .../test/CodeGen/X86/2007-07-03-GR64ToVR64.ll |    18 +
 .../CodeGen/X86/2007-07-10-StackerAssert.ll   |    41 +
 .../CodeGen/X86/2007-07-18-Vector-Extract.ll  |    17 +
 .../X86/2007-08-01-LiveVariablesBug.ll        |     8 +
 .../X86/2007-08-09-IllegalX86-64Asm.ll        |   235 +
 .../CodeGen/X86/2007-08-10-SignExtSubreg.ll   |    10 +
 .../X86/2007-08-13-AppendingLinkage.ll        |    12 +
 .../test/CodeGen/X86/2007-09-05-InvalidAsm.ll |    49 +
 .../CodeGen/X86/2007-09-06-ExtWeakAliasee.ll  |     4 +
 .../CodeGen/X86/2007-09-17-ObjcFrameEH.ll     |    65 +
 .../CodeGen/X86/2007-09-18-ShuffleXformBug.ll |    30 +
 .../CodeGen/X86/2007-09-27-LDIntrinsics.ll    |    30 +
 .../CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll |    20 +
 .../CodeGen/X86/2007-10-05-3AddrConvert.ll    |    48 +
 .../X86/2007-10-12-CoalesceExtSubReg.ll       |    32 +
 .../CodeGen/X86/2007-10-12-SpillerUnfold1.ll  |    45 +
 .../CodeGen/X86/2007-10-12-SpillerUnfold2.ll  |    57 +
 .../CodeGen/X86/2007-10-14-CoalescerCrash.ll  |    28 +
 .../CodeGen/X86/2007-10-15-CoalescerCrash.ll  |   400 +
 .../CodeGen/X86/2007-10-16-CoalescerCrash.ll  |    31 +
 .../test/CodeGen/X86/2007-10-17-IllegalAsm.ll |    87 +
 .../CodeGen/X86/2007-10-19-SpillerUnfold.ll   |    84 +
 .../X86/2007-10-28-inlineasm-q-modifier.ll    |    11 +
 .../CodeGen/X86/2007-10-29-ExtendSetCC.ll     |    17 +
 final/test/CodeGen/X86/2007-10-30-LSRCrash.ll |    48 +
 .../X86/2007-10-31-extractelement-i64.ll      |    82 +
 .../test/CodeGen/X86/2007-11-01-ISelCrash.ll  |    12 +
 final/test/CodeGen/X86/2007-11-02-BadAsm.ll   |   144 +
 .../X86/2007-11-03-x86-64-q-constraint.ll     |     9 +
 .../X86/2007-11-04-LiveIntervalCrash.ll       |    37 +
 .../X86/2007-11-04-LiveVariablesBug.ll        |    16 +
 .../X86/2007-11-04-rip-immediate-constant.ll  |    11 +
 .../test/CodeGen/X86/2007-11-06-InstrSched.ll |    25 +
 final/test/CodeGen/X86/2007-11-07-MulBy4.ll   |   129 +
 .../CodeGen/X86/2007-11-30-LoadFolding-Bug.ll |    86 +
 .../CodeGen/X86/2007-12-11-FoldImpDefSpill.ll |   680 +
 .../CodeGen/X86/2007-12-16-BURRSchedCrash.ll  |    35 +
 .../test/CodeGen/X86/2007-12-18-LoadCSEBug.ll |    28 +
 .../test/CodeGen/X86/2008-01-08-IllegalCMP.ll |    17 +
 .../CodeGen/X86/2008-01-08-SchedulerCrash.ll  |    32 +
 .../CodeGen/X86/2008-01-09-LongDoubleSin.ll   |    11 +
 .../X86/2008-01-16-FPStackifierAssert.ll      |    35 +
 .../X86/2008-01-16-InvalidDAGCombineXform.ll  |    30 +
 .../test/CodeGen/X86/2008-01-16-Trampoline.ll |    14 +
 .../test/CodeGen/X86/2008-02-05-ISelCrash.ll  |    12 +
 .../CodeGen/X86/2008-02-06-LoadFoldingBug.ll  |    20 +
 .../CodeGen/X86/2008-02-08-LoadFoldingBug.ll  |    99 +
 .../CodeGen/X86/2008-02-14-BitMiscompile.ll   |     8 +
 .../CodeGen/X86/2008-02-18-TailMergingBug.ll  |   219 +
 .../X86/2008-02-20-InlineAsmClobber.ll        |    24 +
 .../X86/2008-02-22-LocalRegAllocBug.ll        |    55 +
 final/test/CodeGen/X86/2008-02-22-ReMatBug.ll |    49 +
 .../CodeGen/X86/2008-02-25-InlineAsmBug.ll    |    33 +
 .../X86/2008-02-25-X86-64-CoalescerBug.ll     |    55 +
 .../CodeGen/X86/2008-02-26-AsmDirectMemOp.ll  |    17 +
 .../CodeGen/X86/2008-02-27-DeadSlotElimBug.ll |    66 +
 final/test/CodeGen/X86/2008-02-27-PEICrash.ll |    33 +
 .../CodeGen/X86/2008-03-06-frem-fpstack.ll    |     7 +
 final/test/CodeGen/X86/2008-03-07-APIntBug.ll |    94 +
 .../CodeGen/X86/2008-03-10-RegAllocInfLoop.ll |    14 +
 .../X86/2008-03-12-ThreadLocalAlias.ll        |    37 +
 .../X86/2008-03-13-TwoAddrPassCrash.ll        |    68 +
 .../CodeGen/X86/2008-03-14-SpillerCrash.ll    |    48 +
 .../CodeGen/X86/2008-03-18-CoalescerBug.ll    |    51 +
 .../CodeGen/X86/2008-03-19-DAGCombinerBug.ll  |    14 +
 .../X86/2008-03-23-DarwinAsmComments.ll       |    49 +
 .../CodeGen/X86/2008-03-25-TwoAddrPassBug.ll  |    24 +
 .../X86/2008-03-31-SpillerFoldingBug.ll       |    40 +
 .../test/CodeGen/X86/2008-04-02-unnamedEH.ll  |    16 +
 .../CodeGen/X86/2008-04-08-CoalescerCrash.ll  |    19 +
 .../CodeGen/X86/2008-04-09-BranchFolding.ll   |    48 +
 .../CodeGen/X86/2008-04-15-LiveVariableBug.ll |    50 +
 .../CodeGen/X86/2008-04-16-CoalescerBug.ll    |    33 +
 final/test/CodeGen/X86/2008-04-16-ReMatBug.ll |    46 +
 .../CodeGen/X86/2008-04-17-CoalescerBug.ll    |   171 +
 .../test/CodeGen/X86/2008-04-24-MemCpyBug.ll  |    12 +
 .../X86/2008-04-24-pblendw-fold-crash.ll      |    15 +
 .../X86/2008-04-26-Asm-Optimize-Imm.ll        |    11 +
 .../CodeGen/X86/2008-04-28-CoalescerBug.ll    |   167 +
 .../CodeGen/X86/2008-04-28-CyclicSchedUnit.ll |     6 +
 .../X86/2008-05-01-InvalidOrdCompare.ll       |    15 +
 .../test/CodeGen/X86/2008-05-09-PHIElimBug.ll |    25 +
 .../X86/2008-05-09-ShuffleLoweringBug.ll      |    10 +
 .../CodeGen/X86/2008-05-12-tailmerge-5.ll     |   145 +
 .../CodeGen/X86/2008-05-21-CoalescerBug.ll    |    98 +
 .../X86/2008-05-22-FoldUnalignedLoad.ll       |    11 +
 .../CodeGen/X86/2008-05-28-CoalescerBug.ll    |    10 +
 .../X86/2008-05-28-LocalRegAllocBug.ll        |    30 +
 .../X86/2008-06-04-MemCpyLoweringBug.ll       |    19 +
 .../X86/2008-06-13-NotVolatileLoadStore.ll    |    23 +
 .../X86/2008-06-13-VolatileLoadStore.ll       |    22 +
 .../test/CodeGen/X86/2008-06-16-SubregsBug.ll |    14 +
 .../test/CodeGen/X86/2008-06-18-BadShuffle.ll |    10 +
 .../test/CodeGen/X86/2008-06-25-VecISelBug.ll |     9 +
 .../X86/2008-07-07-DanglingDeadInsts.ll       |    99 +
 .../X86/2008-07-09-ELFSectionAttributes.ll    |    13 +
 final/test/CodeGen/X86/2008-07-11-SHLBy1.ll   |     5 +
 .../test/CodeGen/X86/2008-07-11-SpillerBug.ll |    52 +
 .../CodeGen/X86/2008-07-16-CoalescerCrash.ll  |    34 +
 .../CodeGen/X86/2008-07-19-movups-spills.ll   |   639 +
 .../CodeGen/X86/2008-07-22-CombinerCrash.ll   |    16 +
 final/test/CodeGen/X86/2008-07-23-VSetCC.ll   |    32 +
 .../test/CodeGen/X86/2008-08-05-SpillerBug.ll |    44 +
 .../test/CodeGen/X86/2008-08-06-CmpStride.ll  |    23 +
 .../CodeGen/X86/2008-08-06-RewriterBug.ll     |    40 +
 .../CodeGen/X86/2008-08-17-UComiCodeGenBug.ll |     9 +
 .../CodeGen/X86/2008-08-19-SubAndFetch.ll     |    14 +
 .../CodeGen/X86/2008-08-23-64Bit-maskmovq.ll  |    29 +
 .../CodeGen/X86/2008-08-23-X86-64AsmBug.ll    |    59 +
 .../X86/2008-08-25-AsmRegTypeMismatch.ll      |    18 +
 .../CodeGen/X86/2008-08-31-EH_RETURN32.ll     |    17 +
 .../CodeGen/X86/2008-08-31-EH_RETURN64.ll     |    17 +
 .../CodeGen/X86/2008-09-05-sinttofp-2xi32.ll  |    35 +
 .../CodeGen/X86/2008-09-09-LinearScanBug.ll   |    65 +
 .../CodeGen/X86/2008-09-11-CoalescerBug.ll    |    38 +
 .../CodeGen/X86/2008-09-11-CoalescerBug2.ll   |    33 +
 .../CodeGen/X86/2008-09-17-inline-asm-1.ll    |    30 +
 .../CodeGen/X86/2008-09-18-inline-asm-2.ll    |    27 +
 .../CodeGen/X86/2008-09-19-RegAllocBug.ll     |    22 +
 .../CodeGen/X86/2008-09-25-sseregparm-1.ll    |    19 +
 .../CodeGen/X86/2008-09-26-FrameAddrBug.ll    |    16 +
 final/test/CodeGen/X86/2008-09-29-ReMatBug.ll |    85 +
 .../CodeGen/X86/2008-09-29-VolatileBug.ll     |    15 +
 .../CodeGen/X86/2008-10-02-Atomics32-2.ll     |   969 +
 .../test/CodeGen/X86/2008-10-06-MMXISelBug.ll |    12 +
 .../CodeGen/X86/2008-10-06-x87ld-nan-1.ll     |    13 +
 .../CodeGen/X86/2008-10-06-x87ld-nan-2.ll     |    18 +
 .../test/CodeGen/X86/2008-10-07-SSEISelBug.ll |    22 +
 .../test/CodeGen/X86/2008-10-11-CallCrash.ll  |    21 +
 .../CodeGen/X86/2008-10-13-CoalescerBug.ll    |    42 +
 .../test/CodeGen/X86/2008-10-16-VecUnaryOp.ll |     8 +
 .../X86/2008-10-17-Asm64bitRConstraint.ll     |     9 +
 .../CodeGen/X86/2008-10-20-AsmDoubleInI32.ll  |    11 +
 .../CodeGen/X86/2008-10-24-FlippedCompare.ll  |    17 +
 .../CodeGen/X86/2008-10-27-CoalescerBug.ll    |    52 +
 .../X86/2008-10-27-StackRealignment.ll        |    22 +
 .../CodeGen/X86/2008-10-29-ExpandVAARG.ll     |    10 +
 final/test/CodeGen/X86/2008-11-03-F80VAARG.ll |    17 +
 final/test/CodeGen/X86/2008-11-06-testb.ll    |    26 +
 .../CodeGen/X86/2008-11-13-inlineasm-3.ll     |    19 +
 final/test/CodeGen/X86/2008-11-29-ULT-Sign.ll |    22 +
 .../CodeGen/X86/2008-12-01-SpillerAssert.ll   |    15 +
 .../2008-12-01-loop-iv-used-outside-loop.ll   |    30 +
 .../X86/2008-12-02-IllegalResultType.ll       |    37 +
 .../CodeGen/X86/2008-12-02-dagcombine-1.ll    |    19 +
 .../CodeGen/X86/2008-12-02-dagcombine-2.ll    |    17 +
 .../CodeGen/X86/2008-12-02-dagcombine-3.ll    |    18 +
 .../CodeGen/X86/2008-12-05-SpillerCrash.ll    |   237 +
 .../CodeGen/X86/2008-12-12-PrivateEHSymbol.ll |    12 +
 final/test/CodeGen/X86/2008-12-16-BadShift.ll |    19 +
 .../CodeGen/X86/2008-12-16-dagcombine-4.ll    |    14 +
 .../CodeGen/X86/2008-12-19-EarlyClobberBug.ll |    38 +
 .../CodeGen/X86/2008-12-22-dagcombine-5.ll    |    14 +
 .../CodeGen/X86/2008-12-23-crazy-address.ll   |    33 +
 .../CodeGen/X86/2008-12-23-dagcombine-6.ll    |    24 +
 .../CodeGen/X86/2009-01-13-DoubleUpdate.ll    |    21 +
 .../CodeGen/X86/2009-01-16-SchedulerBug.ll    |    50 +
 final/test/CodeGen/X86/2009-01-16-UIntToFP.ll |    31 +
 .../X86/2009-01-18-ConstantExprCrash.ll       |    36 +
 final/test/CodeGen/X86/2009-01-25-NoSSE.ll    |    20 +
 .../test/CodeGen/X86/2009-01-26-WrongCheck.ll |    16 +
 .../CodeGen/X86/2009-01-27-NullStrings.ll     |     7 +
 .../X86/2009-01-29-LocalRegAllocBug.ll        |    38 +
 final/test/CodeGen/X86/2009-01-31-BigShift.ll |     9 +
 .../test/CodeGen/X86/2009-01-31-BigShift2.ll  |    11 +
 .../test/CodeGen/X86/2009-01-31-BigShift3.ll  |    31 +
 .../test/CodeGen/X86/2009-02-01-LargeMask.ll  |    32 +
 .../CodeGen/X86/2009-02-03-AnalyzedTwice.ll   |    30 +
 .../CodeGen/X86/2009-02-04-sext-i64-gep.ll    |     9 +
 .../CodeGen/X86/2009-02-05-CoalescerBug.ll    |    14 +
 .../CodeGen/X86/2009-02-08-CoalescerBug.ll    |    22 +
 .../X86/2009-02-09-ivs-different-sizes.ll     |    33 +
 .../X86/2009-02-11-codegenprepare-reuse.ll    |    35 +
 .../CodeGen/X86/2009-02-12-DebugInfoVLA.ll    |    85 +
 .../2009-02-12-InlineAsm-nieZ-constraints.ll  |    18 +
 .../test/CodeGen/X86/2009-02-12-SpillerBug.ll |    29 +
 .../X86/2009-02-20-PreAllocSplit-Crash.ll     |    71 +
 .../X86/2009-02-21-ExtWeakInitializer.ll      |    20 +
 .../test/CodeGen/X86/2009-02-25-CommuteBug.ll |    14 +
 .../CodeGen/X86/2009-02-26-MachineLICMBug.ll  |    56 +
 final/test/CodeGen/X86/2009-03-03-BTHang.ll   |    37 +
 .../X86/2009-03-03-BitcastLongDouble.ll       |    14 +
 .../CodeGen/X86/2009-03-05-burr-list-crash.ll |    35 +
 .../CodeGen/X86/2009-03-07-FPConstSelect.ll   |    12 +
 .../test/CodeGen/X86/2009-03-09-APIntCrash.ll |    25 +
 .../test/CodeGen/X86/2009-03-09-SpillerBug.ll |    18 +
 .../CodeGen/X86/2009-03-10-CoalescerBug.ll    |    28 +
 .../test/CodeGen/X86/2009-03-12-CPAlignBug.ll |    37 +
 .../test/CodeGen/X86/2009-03-13-PHIElimBug.ll |    32 +
 .../CodeGen/X86/2009-03-16-PHIElimInLPad.ll   |    24 +
 .../test/CodeGen/X86/2009-03-16-SpillerBug.ll |   167 +
 .../CodeGen/X86/2009-03-23-LinearScanBug.ll   |    23 +
 .../CodeGen/X86/2009-03-23-MultiUseSched.ll   |   242 +
 final/test/CodeGen/X86/2009-03-23-i80-fp80.ll |    14 +
 final/test/CodeGen/X86/2009-03-25-TestBug.ll  |    25 +
 .../CodeGen/X86/2009-03-26-NoImplicitFPBug.ll |    12 +
 .../CodeGen/X86/2009-04-09-InlineAsmCrash.ll  |   165 +
 .../X86/2009-04-12-FastIselOverflowCrash.ll   |    21 +
 final/test/CodeGen/X86/2009-04-12-picrel.ll   |    13 +
 .../CodeGen/X86/2009-04-13-2AddrAssert-2.ll   |    15 +
 .../CodeGen/X86/2009-04-13-2AddrAssert.ll     |    16 +
 .../CodeGen/X86/2009-04-14-IllegalRegs.ll     |    35 +
 .../CodeGen/X86/2009-04-16-SpillerUnfold.ll   |   141 +
 .../CodeGen/X86/2009-04-20-LinearScanOpt.ll   |   121 +
 .../CodeGen/X86/2009-04-21-NoReloadImpDef.ll  |    31 +
 final/test/CodeGen/X86/2009-04-24.ll          |    12 +
 .../CodeGen/X86/2009-04-25-CoalescerBug.ll    |    19 +
 .../CodeGen/X86/2009-04-27-CoalescerAssert.ll |  1457 +
 .../X86/2009-04-27-LiveIntervalsAssert.ll     |    24 +
 .../X86/2009-04-27-LiveIntervalsAssert2.ll    |    23 +
 .../X86/2009-04-27-LiveIntervalsBug.ll        |   165 +
 .../X86/2009-04-29-IndirectDestOperands.ll    |    22 +
 .../CodeGen/X86/2009-04-29-LinearScanBug.ll   |   215 +
 .../CodeGen/X86/2009-04-29-RegAllocAssert.ll  |   117 +
 final/test/CodeGen/X86/2009-04-scale.ll       |    23 +
 .../X86/2009-05-08-InlineAsmIOffset.ll        |    17 +
 .../CodeGen/X86/2009-05-11-tailmerge-crash.ll |    23 +
 .../2009-05-19-SingleElementExtractElement.ll |    16 +
 .../X86/2009-05-23-available_externally.ll    |    19 +
 .../X86/2009-05-23-dagcombine-shifts.ll       |    20 +
 .../CodeGen/X86/2009-05-28-DAGCombineCrash.ll |    15 +
 final/test/CodeGen/X86/2009-05-30-ISelBug.ll  |    28 +
 .../CodeGen/X86/2009-06-02-RewriterBug.ll     |   362 +
 .../X86/2009-06-03-Win64DisableRedZone.ll     |     8 +
 .../CodeGen/X86/2009-06-03-Win64SpillXMM.ll   |    10 +
 .../CodeGen/X86/2009-06-04-VirtualLiveIn.ll   |    48 +
 .../X86/2009-06-05-ScalarToVectorByteMMX.ll   |     7 +
 .../CodeGen/X86/2009-06-05-VZextByteShort.ll  |    37 +
 .../X86/2009-06-05-VariableIndexInsert.ll     |    11 +
 .../CodeGen/X86/2009-06-05-sitofpCrash.ll     |    13 +
 .../CodeGen/X86/2009-06-06-ConcatVectors.ll   |     8 +
 .../X86/2009-06-07-ExpandMMXBitcast.ll        |    10 +
 ...2-x86_64-tail-call-conv-out-of-sync-bug.ll |    15 +
 .../CodeGen/X86/2009-06-15-not-a-tail-call.ll |    14 +
 .../X86/2009-06-18-movlp-shuffle-register.ll  |     9 +
 .../CodeGen/X86/2009-07-06-TwoAddrAssert.ll   |   137 +
 .../test/CodeGen/X86/2009-07-07-SplitICmp.ll  |     8 +
 .../X86/2009-07-09-ExtractBoolFromVector.ll   |    11 +
 .../CodeGen/X86/2009-07-15-CoalescerBug.ll    |   958 +
 .../CodeGen/X86/2009-07-16-CoalescerBug.ll    |   210 +
 .../X86/2009-07-17-StackColoringBug.ll        |    55 +
 .../X86/2009-07-19-AsmExtraOperands.ll        |    11 +
 .../CodeGen/X86/2009-07-20-CoalescerBug.ll    |   165 +
 .../CodeGen/X86/2009-07-20-DAGCombineBug.ll   |    29 +
 .../X86/2009-08-02-mmx-scalar-to-vector.ll    |    12 +
 .../X86/2009-08-06-branchfolder-crash.ll      |   142 +
 .../test/CodeGen/X86/2009-08-06-inlineasm.ll  |    30 +
 .../test/CodeGen/X86/2009-08-08-CastError.ll  |     9 +
 .../test/CodeGen/X86/2009-08-12-badswitch.ll  |   176 +
 .../X86/2009-08-14-Win64MemoryIndirectArg.ll  |    57 +
 .../X86/2009-08-19-LoadNarrowingMiscompile.ll |    15 +
 .../CodeGen/X86/2009-08-23-SubRegReuseUndo.ll |    69 +
 .../CodeGen/X86/2009-08-23-linkerprivate.ll   |     8 +
 .../CodeGen/X86/2009-09-10-LoadFoldingBug.ll  |    47 +
 .../CodeGen/X86/2009-09-10-SpillComments.ll   |   108 +
 .../CodeGen/X86/2009-09-16-CoalescerBug.ll    |    64 +
 .../CodeGen/X86/2009-09-19-earlyclobber.ll    |    15 +
 .../X86/2009-09-21-NoSpillLoopCount.ll        |    36 +
 .../CodeGen/X86/2009-09-22-CoalescerBug.ll    |   124 +
 .../X86/2009-09-23-LiveVariablesBug.ll        |    91 +
 .../CodeGen/X86/2009-10-08-MachineLICMBug.ll  |   264 +
 .../X86/2009-10-14-LiveVariablesBug.ll        |    15 +
 final/test/CodeGen/X86/2009-10-16-Scope.ll    |    32 +
 .../CodeGen/X86/2009-10-19-EmergencySpill.ll  |    54 +
 .../X86/2009-10-19-atomic-cmp-eflags.ll       |    69 +
 .../CodeGen/X86/2009-10-25-RewriterBug.ll     |   171 +
 .../X86/2009-11-04-SubregCoalescingBug.ll     |    15 +
 .../X86/2009-11-13-VirtRegRewriterBug.ll      |   133 +
 .../CodeGen/X86/2009-11-16-MachineLICM.ll     |    42 +
 .../CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll  |    28 +
 .../X86/2009-11-17-UpdateTerminator.ll        |    52 +
 .../CodeGen/X86/2009-11-18-TwoAddrKill.ll     |    29 +
 .../test/CodeGen/X86/2009-11-25-ImpDefBug.ll  |   116 +
 .../CodeGen/X86/2009-12-01-EarlyClobberBug.ll |    38 +
 .../CodeGen/X86/2009-12-11-TLSNoRedZone.ll    |    63 +
 final/test/CodeGen/X86/20090313-signext.ll    |    19 +
 final/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll |    15 +
 final/test/CodeGen/X86/2010-01-07-ISelBug.ll  |    27 +
 .../CodeGen/X86/2010-01-07-UAMemFeature.ll    |    11 +
 .../CodeGen/X86/2010-01-08-Atomic64Bug.ll     |    29 +
 .../CodeGen/X86/2010-01-11-ExtraPHIArg.ll     |    97 +
 .../test/CodeGen/X86/2010-01-13-OptExtBug.ll  |    46 +
 .../X86/2010-01-15-SelectionDAGCycle.ll       |    28 +
 final/test/CodeGen/X86/2010-01-18-DbgValue.ll |    48 +
 .../test/CodeGen/X86/2010-01-19-OptExtBug.ll  |    57 +
 .../CodeGen/X86/2010-02-01-DbgValueCrash.ll   |    33 +
 .../CodeGen/X86/2010-02-01-TaillCallCrash.ll  |    12 +
 .../test/CodeGen/X86/2010-02-03-DualUndef.ll  |    27 +
 .../CodeGen/X86/2010-02-04-SchedulerBug.ll    |    28 +
 .../CodeGen/X86/2010-02-11-NonTemporal.ll     |    22 +
 .../X86/2010-02-12-CoalescerBug-Impdef.ll     |   260 +
 .../CodeGen/X86/2010-02-15-ImplicitDefBug.ll  |    80 +
 .../X86/2010-02-19-TailCallRetAddrBug.ll      |    55 +
 .../CodeGen/X86/2010-02-23-DAGCombineBug.ll   |    18 +
 .../CodeGen/X86/2010-02-23-DIV8rDefinesAX.ll  |    20 +
 .../X86/2010-02-23-RematImplicitSubreg.ll     |    49 +
 .../X86/2010-02-23-SingleDefPhiJoin.ll        |   146 +
 final/test/CodeGen/X86/2010-03-04-Mul8Bug.ll  |    25 +
 .../CodeGen/X86/2010-03-05-ConstantFoldCFG.ll |    42 +
 .../CodeGen/X86/2010-03-05-EFLAGS-Redef.ll    |    49 +
 final/test/CodeGen/X86/2010-03-17-ISelBug.ll  |    67 +
 .../X86/2010-04-06-SSEDomainFixCrash.ll       |    64 +
 .../X86/2010-04-07-DbgValueOtherTargets.ll    |    28 +
 .../CodeGen/X86/2010-04-08-CoalescerBug.ll    |    26 +
 .../X86/2010-04-13-AnalyzeBranchCrash.ll      |    42 +
 .../CodeGen/X86/2010-04-21-CoalescerBug.ll    |    15 +
 .../CodeGen/X86/2010-04-23-mmx-movdq2q.ll     |    93 +
 .../CodeGen/X86/2010-04-29-CoalescerCrash.ll  |   142 +
 .../X86/2010-04-30-LocalAlloc-LandingPad.ll   |   143 +
 .../X86/2010-05-03-CoalescerSubRegClobber.ll  |    33 +
 .../X86/2010-05-05-LocalAllocEarlyClobber.ll  |    32 +
 .../X86/2010-05-06-LocalInlineAsmClobber.ll   |    10 +
 .../test/CodeGen/X86/2010-05-07-ldconvert.ll  |    27 +
 .../CodeGen/X86/2010-05-10-DAGCombinerBug.ll  |    11 +
 .../CodeGen/X86/2010-05-12-FastAllocKills.ll  |    59 +
 .../CodeGen/X86/2010-05-16-nosseconversion.ll |    12 +
 .../CodeGen/X86/2010-05-25-DotDebugLoc.ll     |   242 +
 .../CodeGen/X86/2010-05-26-DotDebugLoc.ll     |    66 +
 .../CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll |    16 +
 final/test/CodeGen/X86/2010-05-28-Crash.ll    |    44 +
 .../CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll |    53 +
 .../X86/2010-06-09-FastAllocRegisters.ll      |    17 +
 .../X86/2010-06-14-fast-isel-fs-load.ll       |     6 +
 .../X86/2010-06-15-FastAllocEarlyCLobber.ll   |    29 +
 .../X86/2010-06-24-g-constraint-crash.ll      |    15 +
 .../X86/2010-06-25-CoalescerSubRegDefDead.ll  |    39 +
 .../CodeGen/X86/2010-06-25-asm-RA-crash.ll    |    19 +
 .../test/CodeGen/X86/2010-06-28-DbgEntryPC.ll |   108 +
 .../X86/2010-06-28-FastAllocTiedOperand.ll    |    22 +
 .../X86/2010-06-28-matched-g-constraint.ll    |    11 +
 .../test/CodeGen/X86/2010-07-02-UnfoldBug.ll  |    99 +
 .../CodeGen/X86/2010-07-02-asm-alignstack.ll  |    31 +
 final/test/CodeGen/X86/2010-07-06-DbgCrash.ll |    29 +
 final/test/CodeGen/X86/2010-07-06-asm-RIP.ll  |    21 +
 .../CodeGen/X86/2010-07-11-FPStackLoneUse.ll  |    28 +
 .../X86/2010-07-13-indirectXconstraint.ll     |    18 +
 final/test/CodeGen/X86/2010-07-15-Crash.ll    |    12 +
 .../CodeGen/X86/2010-07-29-SetccSimplify.ll   |    14 +
 .../X86/2010-08-04-MaskedSignedCompare.ll     |    36 +
 .../test/CodeGen/X86/2010-08-04-MingWCrash.ll |    39 +
 .../CodeGen/X86/2010-08-04-StackVariable.ll   |   124 +
 .../CodeGen/X86/2010-08-10-DbgConstant.ll     |    25 +
 .../2010-09-01-RemoveCopyByCommutingDef.ll    |    28 +
 .../CodeGen/X86/2010-09-16-EmptyFilename.ll   |    29 +
 final/test/CodeGen/X86/2010-09-16-asmcrash.ll |    56 +
 .../X86/2010-09-17-SideEffectsInChain.ll      |    26 +
 .../X86/2010-09-30-CMOV-JumpTable-PHI.ll      |    71 +
 .../test/CodeGen/X86/2010-10-08-cmpxchg8b.ll  |    28 +
 .../CodeGen/X86/2010-11-02-DbgParameter.ll    |    35 +
 final/test/CodeGen/X86/2010-11-09-MOVLPS.ll   |    66 +
 .../CodeGen/X86/2010-11-18-SelectOfExtload.ll |    15 +
 final/test/CodeGen/X86/2010-12-02-MC-Set.ll   |    22 +
 .../X86/2011-01-07-LegalizeTypesCrash.ll      |    19 +
 .../CodeGen/X86/2011-01-10-DagCombineHang.ll  |    15 +
 .../X86/2011-01-24-DbgValue-Before-Use.ll     |   103 +
 .../X86/2011-02-04-FastRegallocNoFP.ll        |    14 +
 .../2011-02-21-VirtRegRewriter-KillSubReg.ll  |    50 +
 .../test/CodeGen/X86/2011-02-23-UnfoldBug.ll  |    42 +
 final/test/CodeGen/X86/2011-02-27-Fpextend.ll |     7 +
 .../CodeGen/X86/2011-03-02-DAGCombiner.ll     |    51 +
 .../X86/2011-03-09-Physreg-Coalescing.ll      |    22 +
 final/test/CodeGen/X86/3addr-16bit.ll         |    95 +
 final/test/CodeGen/X86/3addr-or.ll            |    61 +
 final/test/CodeGen/X86/Atomics-32.ll          |   818 +
 final/test/CodeGen/X86/Atomics-64.ll          |  1015 +
 final/test/CodeGen/X86/GC/alloc_loop.ll       |    53 +
 final/test/CodeGen/X86/GC/argpromotion.ll     |    19 +
 final/test/CodeGen/X86/GC/badreadproto.ll     |    13 +
 final/test/CodeGen/X86/GC/badrootproto.ll     |    13 +
 final/test/CodeGen/X86/GC/badwriteproto.ll    |    22 +
 final/test/CodeGen/X86/GC/deadargelim.ll      |    16 +
 final/test/CodeGen/X86/GC/dg.exp              |     5 +
 final/test/CodeGen/X86/GC/fat.ll              |    10 +
 final/test/CodeGen/X86/GC/inline.ll           |    23 +
 final/test/CodeGen/X86/GC/inline2.ll          |    24 +
 final/test/CodeGen/X86/GC/lower_gcroot.ll     |    11 +
 final/test/CodeGen/X86/GC/outside.ll          |    10 +
 final/test/CodeGen/X86/GC/simple_ocaml.ll     |    42 +
 .../test/CodeGen/X86/MachineSink-CritEdge.ll  |    58 +
 final/test/CodeGen/X86/MachineSink-PHIUse.ll  |    39 +
 final/test/CodeGen/X86/SIMD/dg.exp            |     5 +
 final/test/CodeGen/X86/SIMD/notvunpcklpd.ll   |    20 +
 final/test/CodeGen/X86/SIMD/notvunpcklps.ll   |    20 +
 final/test/CodeGen/X86/SIMD/vunpcklpd.ll      |    20 +
 final/test/CodeGen/X86/SIMD/vunpcklps.ll      |    20 +
 final/test/CodeGen/X86/SwitchLowering.ll      |    28 +
 final/test/CodeGen/X86/abi-isel.ll            |  9660 +++++++
 final/test/CodeGen/X86/add-of-carry.ll        |    34 +
 final/test/CodeGen/X86/add.ll                 |   135 +
 final/test/CodeGen/X86/adde-carry.ll          |    26 +
 .../test/CodeGen/X86/addr-label-difference.ll |    26 +
 final/test/CodeGen/X86/aliases.ll             |    38 +
 final/test/CodeGen/X86/aligned-comm.ll        |     6 +
 final/test/CodeGen/X86/alignment.ll           |    43 +
 final/test/CodeGen/X86/all-ones-vector.ll     |    14 +
 final/test/CodeGen/X86/alldiv-divdi3.ll       |    17 +
 .../CodeGen/X86/alloca-align-rounding-32.ll   |    15 +
 .../test/CodeGen/X86/alloca-align-rounding.ll |    15 +
 final/test/CodeGen/X86/and-or-fold.ll         |    26 +
 final/test/CodeGen/X86/and-su.ll              |    53 +
 final/test/CodeGen/X86/andimm8.ll             |    19 +
 final/test/CodeGen/X86/anyext.ll              |    18 +
 final/test/CodeGen/X86/apm.ll                 |    35 +
 final/test/CodeGen/X86/arg-cast.ll            |    18 +
 final/test/CodeGen/X86/asm-block-labels.ll    |    41 +
 final/test/CodeGen/X86/asm-global-imm.ll      |    23 +
 final/test/CodeGen/X86/asm-indirect-mem.ll    |    11 +
 final/test/CodeGen/X86/asm-modifier-P.ll      |    79 +
 final/test/CodeGen/X86/asm-modifier.ll        |    41 +
 final/test/CodeGen/X86/atomic_add.ll          |   217 +
 final/test/CodeGen/X86/atomic_op.ll           |   143 +
 final/test/CodeGen/X86/attribute-sections.ll  |    18 +
 final/test/CodeGen/X86/avoid-lea-scale2.ll    |    10 +
 final/test/CodeGen/X86/avoid-loop-align-2.ll  |    49 +
 final/test/CodeGen/X86/avoid-loop-align.ll    |    39 +
 final/test/CodeGen/X86/avx-128.ll             |    12 +
 final/test/CodeGen/X86/avx-256.ll             |    15 +
 final/test/CodeGen/X86/avx-intrinsics-x86.ll  |  2578 ++
 .../test/CodeGen/X86/avx-intrinsics-x86_64.ll |    50 +
 final/test/CodeGen/X86/barrier-sse.ll         |    21 +
 final/test/CodeGen/X86/barrier.ll             |     7 +
 final/test/CodeGen/X86/bc-extract.ll          |    27 +
 final/test/CodeGen/X86/bigstructret.ll        |    17 +
 final/test/CodeGen/X86/bigstructret2.ll       |    12 +
 final/test/CodeGen/X86/bit-test-shift.ll      |    13 +
 .../test/CodeGen/X86/bitcast-int-to-vector.ll |    10 +
 final/test/CodeGen/X86/bitcast.ll             |    24 +
 final/test/CodeGen/X86/bitcast2.ll            |    13 +
 final/test/CodeGen/X86/br-fold.ll             |    20 +
 final/test/CodeGen/X86/brcond.ll              |   108 +
 .../CodeGen/X86/break-anti-dependencies.ll    |    34 +
 final/test/CodeGen/X86/break-sse-dep.ll       |    63 +
 final/test/CodeGen/X86/bss_pagealigned.ll     |    21 +
 final/test/CodeGen/X86/bswap-inline-asm.ll    |    87 +
 final/test/CodeGen/X86/bswap.ll               |    27 +
 final/test/CodeGen/X86/bt.ll                  |   442 +
 final/test/CodeGen/X86/byval.ll               |    17 +
 final/test/CodeGen/X86/byval2.ll              |    45 +
 final/test/CodeGen/X86/byval3.ll              |    53 +
 final/test/CodeGen/X86/byval4.ll              |    59 +
 final/test/CodeGen/X86/byval5.ll              |    67 +
 final/test/CodeGen/X86/byval6.ll              |    16 +
 final/test/CodeGen/X86/byval7.ll              |    22 +
 final/test/CodeGen/X86/call-imm.ll            |    18 +
 final/test/CodeGen/X86/call-push.ll           |    29 +
 .../CodeGen/X86/change-compare-stride-0.ll    |    77 +
 .../CodeGen/X86/change-compare-stride-1.ll    |    86 +
 .../X86/change-compare-stride-trickiness-0.ll |    29 +
 .../X86/change-compare-stride-trickiness-1.ll |    28 +
 .../X86/change-compare-stride-trickiness-2.ll |    58 +
 final/test/CodeGen/X86/clz.ll                 |    33 +
 final/test/CodeGen/X86/cmov.ll                |   157 +
 final/test/CodeGen/X86/cmp.ll                 |    92 +
 final/test/CodeGen/X86/coalesce-esp.ll        |    36 +
 final/test/CodeGen/X86/coalescer-commute1.ll  |    26 +
 final/test/CodeGen/X86/coalescer-commute2.ll  |    39 +
 final/test/CodeGen/X86/coalescer-commute3.ll  |    24 +
 final/test/CodeGen/X86/coalescer-commute4.ll  |    30 +
 final/test/CodeGen/X86/coalescer-commute5.ll  |    21 +
 final/test/CodeGen/X86/coalescer-cross.ll     |    41 +
 final/test/CodeGen/X86/coalescer-remat.ll     |    15 +
 final/test/CodeGen/X86/code_placement.ll      |   136 +
 final/test/CodeGen/X86/code_placement_eh.ll   |    45 +
 .../test/CodeGen/X86/codegen-prepare-cast.ll  |    24 +
 .../CodeGen/X86/codegen-prepare-extload.ll    |    21 +
 final/test/CodeGen/X86/codemodel.ll           |    67 +
 final/test/CodeGen/X86/combine-lds.ll         |     6 +
 final/test/CodeGen/X86/combiner-aa-0.ll       |    20 +
 final/test/CodeGen/X86/combiner-aa-1.ll       |    23 +
 final/test/CodeGen/X86/commute-intrinsic.ll   |    15 +
 final/test/CodeGen/X86/commute-two-addr.ll    |    62 +
 final/test/CodeGen/X86/compare-add.ll         |     8 +
 final/test/CodeGen/X86/compare-inf.ll         |    76 +
 final/test/CodeGen/X86/compare_folding.ll     |    11 +
 final/test/CodeGen/X86/compiler_used.ll       |     9 +
 final/test/CodeGen/X86/complex-asm.ll         |    17 +
 final/test/CodeGen/X86/complex-fca.ll         |    14 +
 .../CodeGen/X86/conditional-indecrement.ll    |    89 +
 .../test/CodeGen/X86/constant-pool-remat-0.ll |    21 +
 .../test/CodeGen/X86/constant-pool-sharing.ll |    20 +
 final/test/CodeGen/X86/constpool.ll           |    16 +
 .../X86/convert-2-addr-3-addr-inc64.ll        |    26 +
 final/test/CodeGen/X86/copysign-zero.ll       |    14 +
 final/test/CodeGen/X86/crash-O0.ll            |    31 +
 final/test/CodeGen/X86/crash.ll               |   201 +
 .../test/CodeGen/X86/critical-edge-split-2.ll |    29 +
 final/test/CodeGen/X86/cstring.ll             |     4 +
 final/test/CodeGen/X86/ctpop-combine.ll       |    40 +
 final/test/CodeGen/X86/dag-rauw-cse.ll        |     9 +
 .../CodeGen/X86/dagcombine-buildvector.ll     |    27 +
 final/test/CodeGen/X86/dagcombine-cse.ll      |    27 +
 final/test/CodeGen/X86/darwin-bzero.ll        |     8 +
 .../test/CodeGen/X86/darwin-no-dead-strip.ll  |     7 +
 final/test/CodeGen/X86/darwin-quote.ll        |    15 +
 final/test/CodeGen/X86/darwin-stub.ll         |    12 +
 final/test/CodeGen/X86/dbg-byval-parameter.ll |    45 +
 final/test/CodeGen/X86/dbg-merge-loc-entry.ll |    71 +
 .../X86/dbg-value-inlined-parameter.ll        |    86 +
 final/test/CodeGen/X86/dbg-value-location.ll  |    70 +
 final/test/CodeGen/X86/dbg-value-range.ll     |    56 +
 final/test/CodeGen/X86/dg.exp                 |     5 +
 final/test/CodeGen/X86/discontiguous-loops.ll |    72 +
 final/test/CodeGen/X86/divide-by-constant.ll  |    62 +
 final/test/CodeGen/X86/divrem.ll              |    58 +
 final/test/CodeGen/X86/dll-linkage.ll         |    14 +
 final/test/CodeGen/X86/dllexport.ll           |    12 +
 final/test/CodeGen/X86/dollar-name.ll         |    18 +
 final/test/CodeGen/X86/dyn-stackalloc.ll      |    19 +
 final/test/CodeGen/X86/empty-functions.ll     |    15 +
 .../CodeGen/X86/empty-struct-return-type.ll   |    15 +
 final/test/CodeGen/X86/epilogue.ll            |    11 +
 final/test/CodeGen/X86/extend.ll              |    18 +
 final/test/CodeGen/X86/extern_weak.ll         |    13 +
 final/test/CodeGen/X86/extmul128.ll           |    14 +
 final/test/CodeGen/X86/extmul64.ll            |    14 +
 final/test/CodeGen/X86/extract-combine.ll     |    15 +
 final/test/CodeGen/X86/extract-extract.ll     |    24 +
 .../CodeGen/X86/extractelement-from-arg.ll    |     7 +
 final/test/CodeGen/X86/extractelement-load.ll |     9 +
 .../CodeGen/X86/extractelement-shuffle.ll     |    13 +
 final/test/CodeGen/X86/extractps.ll           |    27 +
 final/test/CodeGen/X86/fabs.ll                |    28 +
 final/test/CodeGen/X86/fast-cc-callee-pops.ll |    13 +
 .../CodeGen/X86/fast-cc-merge-stack-adj.ll    |    13 +
 .../test/CodeGen/X86/fast-cc-pass-in-regs.ll  |    29 +
 final/test/CodeGen/X86/fast-isel-atomic.ll    |    17 +
 .../fast-isel-avoid-unnecessary-pic-base.ll   |    23 +
 final/test/CodeGen/X86/fast-isel-bail.ll      |    14 +
 final/test/CodeGen/X86/fast-isel-bc.ll        |    23 +
 final/test/CodeGen/X86/fast-isel-call.ll      |    13 +
 .../test/CodeGen/X86/fast-isel-cmp-branch.ll  |    30 +
 final/test/CodeGen/X86/fast-isel-constpool.ll |    17 +
 final/test/CodeGen/X86/fast-isel-fneg.ll      |    16 +
 final/test/CodeGen/X86/fast-isel-gep.ll       |   109 +
 final/test/CodeGen/X86/fast-isel-gv.ll        |    24 +
 final/test/CodeGen/X86/fast-isel-i1.ll        |    19 +
 final/test/CodeGen/X86/fast-isel-mem.ll       |    34 +
 final/test/CodeGen/X86/fast-isel-shift-imm.ll |     8 +
 final/test/CodeGen/X86/fast-isel-tailcall.ll  |    13 +
 final/test/CodeGen/X86/fast-isel-tls.ll       |    10 +
 final/test/CodeGen/X86/fast-isel-x86.ll       |    33 +
 final/test/CodeGen/X86/fast-isel.ll           |    94 +
 .../CodeGen/X86/fastcall-correct-mangling.ll  |     9 +
 final/test/CodeGen/X86/fastcc-2.ll            |    10 +
 final/test/CodeGen/X86/fastcc-byval.ll        |    20 +
 final/test/CodeGen/X86/fastcc-sret.ll         |    23 +
 final/test/CodeGen/X86/fastcc.ll              |    20 +
 final/test/CodeGen/X86/fastcc3struct.ll       |    15 +
 .../CodeGen/X86/field-extract-use-trunc.ll    |    39 +
 final/test/CodeGen/X86/fildll.ll              |    12 +
 final/test/CodeGen/X86/fltused.ll             |    19 +
 final/test/CodeGen/X86/fmul-zero.ll           |     9 +
 final/test/CodeGen/X86/fold-add.ll            |    25 +
 final/test/CodeGen/X86/fold-and-shift.ll      |    21 +
 final/test/CodeGen/X86/fold-call-2.ll         |    10 +
 final/test/CodeGen/X86/fold-call-3.ll         |    45 +
 final/test/CodeGen/X86/fold-call.ll           |    10 +
 final/test/CodeGen/X86/fold-imm.ll            |    14 +
 final/test/CodeGen/X86/fold-load.ll           |    47 +
 final/test/CodeGen/X86/fold-mul-lohi.ll       |    31 +
 final/test/CodeGen/X86/fold-pcmpeqd-0.ll      |   105 +
 final/test/CodeGen/X86/fold-pcmpeqd-1.ll      |    11 +
 final/test/CodeGen/X86/fold-pcmpeqd-2.ll      |    94 +
 final/test/CodeGen/X86/fold-sext-trunc.ll     |    20 +
 final/test/CodeGen/X86/force-align-stack.ll   |    21 +
 final/test/CodeGen/X86/fp-elim.ll             |    44 +
 .../test/CodeGen/X86/fp-immediate-shorten.ll  |     9 +
 final/test/CodeGen/X86/fp-in-intregs.ll       |    22 +
 final/test/CodeGen/X86/fp-stack-2results.ll   |    60 +
 final/test/CodeGen/X86/fp-stack-O0-crash.ll   |    30 +
 final/test/CodeGen/X86/fp-stack-compare.ll    |    11 +
 final/test/CodeGen/X86/fp-stack-direct-ret.ll |    11 +
 final/test/CodeGen/X86/fp-stack-ret-conv.ll   |    17 +
 final/test/CodeGen/X86/fp-stack-ret-store.ll  |    26 +
 final/test/CodeGen/X86/fp-stack-ret.ll        |    25 +
 final/test/CodeGen/X86/fp-stack-retcopy.ll    |    12 +
 final/test/CodeGen/X86/fp-stack-set-st1.ll    |     7 +
 final/test/CodeGen/X86/fp-stack.ll            |    25 +
 final/test/CodeGen/X86/fp2sint.ll             |    18 +
 final/test/CodeGen/X86/fp_constant_op.ll      |    46 +
 final/test/CodeGen/X86/fp_load_cast_fold.ll   |    20 +
 final/test/CodeGen/X86/fp_load_fold.ll        |    40 +
 final/test/CodeGen/X86/fsxor-alignment.ll     |    14 +
 final/test/CodeGen/X86/full-lsr.ll            |    34 +
 final/test/CodeGen/X86/ga-offset.ll           |    18 +
 final/test/CodeGen/X86/gather-addresses.ll    |    41 +
 final/test/CodeGen/X86/ghc-cc.ll              |    45 +
 final/test/CodeGen/X86/ghc-cc64.ll            |    86 +
 final/test/CodeGen/X86/global-sections-tls.ll |    14 +
 final/test/CodeGen/X86/global-sections.ll     |   160 +
 .../CodeGen/X86/h-register-addressing-32.ll   |    53 +
 .../CodeGen/X86/h-register-addressing-64.ll   |    53 +
 final/test/CodeGen/X86/h-register-store.ll    |    47 +
 final/test/CodeGen/X86/h-registers-0.ll       |   106 +
 final/test/CodeGen/X86/h-registers-1.ll       |    39 +
 final/test/CodeGen/X86/h-registers-2.ll       |    15 +
 final/test/CodeGen/X86/h-registers-3.ll       |    12 +
 final/test/CodeGen/X86/hidden-vis-2.ll        |    10 +
 final/test/CodeGen/X86/hidden-vis-3.ll        |    19 +
 final/test/CodeGen/X86/hidden-vis-4.ll        |    12 +
 final/test/CodeGen/X86/hidden-vis-pic.ll      |    55 +
 final/test/CodeGen/X86/hidden-vis.ll          |    24 +
 final/test/CodeGen/X86/i128-and-beyond.ll     |     8 +
 final/test/CodeGen/X86/i128-immediate.ll      |     5 +
 final/test/CodeGen/X86/i128-mul.ll            |    12 +
 final/test/CodeGen/X86/i128-ret.ll            |    10 +
 final/test/CodeGen/X86/i256-add.ll            |    18 +
 final/test/CodeGen/X86/i2k.ll                 |     9 +
 final/test/CodeGen/X86/i64-mem-copy.ll        |    17 +
 final/test/CodeGen/X86/iabs.ll                |    16 +
 final/test/CodeGen/X86/illegal-insert.ll      |    18 +
 .../CodeGen/X86/illegal-vector-args-return.ll |    14 +
 final/test/CodeGen/X86/imul-lea-2.ll          |    15 +
 final/test/CodeGen/X86/imul-lea.ll            |    10 +
 final/test/CodeGen/X86/inline-asm-2addr.ll    |     9 +
 .../CodeGen/X86/inline-asm-R-constraint.ll    |    18 +
 .../CodeGen/X86/inline-asm-flag-clobber.ll    |    19 +
 final/test/CodeGen/X86/inline-asm-fpstack.ll  |    88 +
 final/test/CodeGen/X86/inline-asm-fpstack2.ll |    21 +
 final/test/CodeGen/X86/inline-asm-fpstack3.ll |    20 +
 final/test/CodeGen/X86/inline-asm-fpstack4.ll |    24 +
 final/test/CodeGen/X86/inline-asm-fpstack5.ll |    15 +
 final/test/CodeGen/X86/inline-asm-h.ll        |    12 +
 .../test/CodeGen/X86/inline-asm-modifier-n.ll |     8 +
 final/test/CodeGen/X86/inline-asm-mrv.ll      |    35 +
 final/test/CodeGen/X86/inline-asm-out-regs.ll |    40 +
 final/test/CodeGen/X86/inline-asm-pic.ll      |    10 +
 final/test/CodeGen/X86/inline-asm-ptr-cast.ll |    27 +
 final/test/CodeGen/X86/inline-asm-q-regs.ll   |    10 +
 final/test/CodeGen/X86/inline-asm-tied.ll     |    19 +
 final/test/CodeGen/X86/inline-asm-x-scalar.ll |    24 +
 final/test/CodeGen/X86/inline-asm.ll          |    25 +
 .../test/CodeGen/X86/ins_subreg_coalesce-1.ll |    30 +
 .../test/CodeGen/X86/ins_subreg_coalesce-2.ll |     7 +
 .../test/CodeGen/X86/ins_subreg_coalesce-3.ll |    92 +
 final/test/CodeGen/X86/insert-positions.ll    |    69 +
 .../CodeGen/X86/insertelement-copytoregs.ll   |    11 +
 .../CodeGen/X86/insertelement-legalize.ll     |    10 +
 final/test/CodeGen/X86/int-intrinsic.ll       |    20 +
 .../CodeGen/X86/invalid-shift-immediate.ll    |    30 +
 final/test/CodeGen/X86/isel-sink.ll           |    18 +
 final/test/CodeGen/X86/isel-sink2.ll          |    17 +
 final/test/CodeGen/X86/isel-sink3.ll          |    25 +
 final/test/CodeGen/X86/isint.ll               |    31 +
 final/test/CodeGen/X86/isnan.ll               |     9 +
 final/test/CodeGen/X86/isnan2.ll              |    11 +
 final/test/CodeGen/X86/ispositive.ll          |     9 +
 .../CodeGen/X86/iv-users-in-other-loops.ll    |   296 +
 final/test/CodeGen/X86/jump_sign.ll           |    20 +
 final/test/CodeGen/X86/label-redefinition.ll  |    15 +
 final/test/CodeGen/X86/large-gep-scale.ll     |    12 +
 final/test/CodeGen/X86/ldzero.ll              |    43 +
 final/test/CodeGen/X86/lea-2.ll               |    13 +
 final/test/CodeGen/X86/lea-3.ll               |    22 +
 final/test/CodeGen/X86/lea-4.ll               |    19 +
 final/test/CodeGen/X86/lea-recursion.ll       |    47 +
 final/test/CodeGen/X86/lea.ll                 |    35 +
 final/test/CodeGen/X86/leaf-fp-elim.ll        |    30 +
 .../X86/legalize-fmp-oeq-vector-select.ll     |    11 +
 final/test/CodeGen/X86/legalize-sub-zero-2.ll |    41 +
 final/test/CodeGen/X86/legalize-sub-zero.ll   |    35 +
 final/test/CodeGen/X86/legalizedag_vec.ll     |    17 +
 final/test/CodeGen/X86/lfence.ll              |     8 +
 final/test/CodeGen/X86/licm-nested.ll         |    89 +
 final/test/CodeGen/X86/licm-symbol.ll         |    39 +
 final/test/CodeGen/X86/limited-prec.ll        |    60 +
 final/test/CodeGen/X86/live-out-reg-info.ll   |    20 +
 .../CodeGen/X86/liveness-local-regalloc.ll    |    60 +
 final/test/CodeGen/X86/lock-inst-encoding.ll  |    22 +
 final/test/CodeGen/X86/long-setcc.ll          |    18 +
 final/test/CodeGen/X86/longlong-deadload.ll   |    11 +
 final/test/CodeGen/X86/loop-blocks.ll         |   208 +
 final/test/CodeGen/X86/loop-hoist.ll          |    27 +
 .../CodeGen/X86/loop-strength-reduce-2.ll     |    45 +
 .../CodeGen/X86/loop-strength-reduce-3.ll     |    33 +
 .../test/CodeGen/X86/loop-strength-reduce.ll  |    33 +
 .../test/CodeGen/X86/loop-strength-reduce2.ll |    29 +
 .../test/CodeGen/X86/loop-strength-reduce3.ll |    37 +
 .../test/CodeGen/X86/loop-strength-reduce4.ll |    63 +
 .../test/CodeGen/X86/loop-strength-reduce5.ll |    23 +
 .../test/CodeGen/X86/loop-strength-reduce6.ll |    66 +
 .../test/CodeGen/X86/loop-strength-reduce7.ll |    44 +
 .../test/CodeGen/X86/loop-strength-reduce8.ll |    84 +
 final/test/CodeGen/X86/lsr-delayed-fold.ll    |   178 +
 final/test/CodeGen/X86/lsr-i386.ll            |    44 +
 .../test/CodeGen/X86/lsr-interesting-step.ll  |    51 +
 final/test/CodeGen/X86/lsr-loop-exit-cond.ll  |   138 +
 final/test/CodeGen/X86/lsr-negative-stride.ll |    51 +
 final/test/CodeGen/X86/lsr-nonaffine.ll       |    23 +
 final/test/CodeGen/X86/lsr-normalization.ll   |    99 +
 final/test/CodeGen/X86/lsr-overflow.ll        |    27 +
 .../test/CodeGen/X86/lsr-quadratic-expand.ll  |    22 +
 .../CodeGen/X86/lsr-redundant-addressing.ll   |    45 +
 final/test/CodeGen/X86/lsr-reuse-trunc.ll     |    60 +
 final/test/CodeGen/X86/lsr-reuse.ll           |   751 +
 final/test/CodeGen/X86/lsr-sort.ll            |    23 +
 final/test/CodeGen/X86/lsr-static-addr.ll     |    31 +
 final/test/CodeGen/X86/lsr-wrap.ll            |    37 +
 final/test/CodeGen/X86/machine-cse.ll         |    79 +
 final/test/CodeGen/X86/masked-iv-safe.ll      |   244 +
 final/test/CodeGen/X86/masked-iv-unsafe.ll    |   386 +
 final/test/CodeGen/X86/maskmovdqu.ll          |    11 +
 .../test/CodeGen/X86/mcinst-lowering-cmp0.ll  |    68 +
 final/test/CodeGen/X86/mcinst-lowering.ll     |    26 +
 final/test/CodeGen/X86/memcmp.ll              |   111 +
 final/test/CodeGen/X86/memcpy-2.ll            |   167 +
 final/test/CodeGen/X86/memcpy.ll              |    81 +
 final/test/CodeGen/X86/memmove-4.ll           |    12 +
 final/test/CodeGen/X86/memset-2.ll            |    39 +
 final/test/CodeGen/X86/memset-3.ll            |    12 +
 final/test/CodeGen/X86/memset.ll              |    18 +
 final/test/CodeGen/X86/memset64-on-x86-32.ll  |    12 +
 final/test/CodeGen/X86/mfence.ll              |    20 +
 final/test/CodeGen/X86/mingw-alloca.ll        |    30 +
 final/test/CodeGen/X86/misaligned-memset.ll   |    15 +
 final/test/CodeGen/X86/mmx-arg-passing.ll     |    27 +
 final/test/CodeGen/X86/mmx-arg-passing2.ll    |    28 +
 final/test/CodeGen/X86/mmx-arith.ll           |   309 +
 final/test/CodeGen/X86/mmx-bitcast-to-i64.ll  |    31 +
 final/test/CodeGen/X86/mmx-builtins.ll        |  1324 +
 final/test/CodeGen/X86/mmx-copy-gprs.ll       |    19 +
 final/test/CodeGen/X86/mmx-emms.ll            |    11 +
 final/test/CodeGen/X86/mmx-insert-element.ll  |     9 +
 final/test/CodeGen/X86/mmx-pinsrw.ll          |    15 +
 final/test/CodeGen/X86/mmx-punpckhdq.ll       |    31 +
 final/test/CodeGen/X86/mmx-s2v.ll             |    15 +
 final/test/CodeGen/X86/mmx-shift.ll           |    32 +
 final/test/CodeGen/X86/mmx-shuffle.ll         |    31 +
 final/test/CodeGen/X86/mmx-vzmovl-2.ll        |    29 +
 final/test/CodeGen/X86/mmx-vzmovl.ll          |    15 +
 final/test/CodeGen/X86/movfs.ll               |     8 +
 final/test/CodeGen/X86/movgs.ll               |    58 +
 final/test/CodeGen/X86/mul-legalize.ll        |    24 +
 final/test/CodeGen/X86/mul-remat.ll           |     8 +
 final/test/CodeGen/X86/mul-shift-reassoc.ll   |    12 +
 final/test/CodeGen/X86/mul128.ll              |     6 +
 final/test/CodeGen/X86/mul64.ll               |     6 +
 .../test/CodeGen/X86/mult-alt-generic-i686.ll |   321 +
 .../CodeGen/X86/mult-alt-generic-x86_64.ll    |   321 +
 final/test/CodeGen/X86/mult-alt-x86.ll        |   358 +
 .../CodeGen/X86/multiple-loop-post-inc.ll     |   304 +
 .../X86/multiple-return-values-cross-block.ll |    15 +
 .../CodeGen/X86/multiple-return-values.ll     |    16 +
 final/test/CodeGen/X86/nancvt.ll              |   183 +
 final/test/CodeGen/X86/narrow-shl-load.ll     |    83 +
 final/test/CodeGen/X86/narrow_op-1.ll         |    23 +
 final/test/CodeGen/X86/neg-shl-add.ll         |    17 +
 final/test/CodeGen/X86/neg_fp.ll              |    12 +
 final/test/CodeGen/X86/negate-add-zero.ll     |  1145 +
 final/test/CodeGen/X86/negative-sin.ll        |    12 +
 .../X86/negative-stride-fptosi-user.ll        |    25 +
 final/test/CodeGen/X86/negative-subscript.ll  |    10 +
 final/test/CodeGen/X86/negative_zero.ll       |     6 +
 final/test/CodeGen/X86/nobt.ll                |    70 +
 final/test/CodeGen/X86/nofence.ll             |    27 +
 final/test/CodeGen/X86/nosse-error1.ll        |    33 +
 final/test/CodeGen/X86/nosse-error2.ll        |    33 +
 final/test/CodeGen/X86/nosse-varargs.ll       |    46 +
 final/test/CodeGen/X86/object-size.ll         |    55 +
 final/test/CodeGen/X86/opt-ext-uses.ll        |    19 +
 final/test/CodeGen/X86/optimize-max-0.ll      |   461 +
 final/test/CodeGen/X86/optimize-max-1.ll      |    78 +
 final/test/CodeGen/X86/optimize-max-2.ll      |    30 +
 final/test/CodeGen/X86/optimize-max-3.ll      |    77 +
 final/test/CodeGen/X86/or-address.ll          |    90 +
 final/test/CodeGen/X86/or-branch.ll           |    19 +
 final/test/CodeGen/X86/overlap-shift.ll       |    19 +
 final/test/CodeGen/X86/packed_struct.ll       |    34 +
 final/test/CodeGen/X86/palignr-2.ll           |    28 +
 final/test/CodeGen/X86/palignr.ll             |    58 +
 final/test/CodeGen/X86/peep-test-0.ll         |    22 +
 final/test/CodeGen/X86/peep-test-1.ll         |    23 +
 final/test/CodeGen/X86/peep-test-2.ll         |    17 +
 final/test/CodeGen/X86/peep-test-3.ll         |    89 +
 .../CodeGen/X86/peep-vector-extract-concat.ll |    11 +
 .../CodeGen/X86/peep-vector-extract-insert.ll |    12 +
 final/test/CodeGen/X86/personality.ll         |    50 +
 final/test/CodeGen/X86/phi-bit-propagation.ll |    55 +
 .../CodeGen/X86/phi-immediate-factoring.ll    |    54 +
 .../CodeGen/X86/phys-reg-local-regalloc.ll    |    51 +
 .../CodeGen/X86/phys_subreg_coalesce-2.ll     |    30 +
 .../CodeGen/X86/phys_subreg_coalesce-3.ll     |    35 +
 .../test/CodeGen/X86/phys_subreg_coalesce.ll  |    24 +
 final/test/CodeGen/X86/pic-load-remat.ll      |    47 +
 final/test/CodeGen/X86/pic.ll                 |   208 +
 final/test/CodeGen/X86/pic_jumptable.ll       |    83 +
 final/test/CodeGen/X86/pmul.ll                |    32 +
 final/test/CodeGen/X86/pmulld.ll              |    26 +
 final/test/CodeGen/X86/popcnt.ll              |    38 +
 .../test/CodeGen/X86/postalloc-coalescing.ll  |    35 +
 final/test/CodeGen/X86/postra-licm.ll         |   185 +
 final/test/CodeGen/X86/powi.ll                |    11 +
 final/test/CodeGen/X86/pr1462.ll              |    25 +
 final/test/CodeGen/X86/pr1489.ll              |    55 +
 final/test/CodeGen/X86/pr1505.ll              |    12 +
 final/test/CodeGen/X86/pr1505b.ll             |    59 +
 final/test/CodeGen/X86/pr2177.ll              |    35 +
 final/test/CodeGen/X86/pr2182.ll              |    24 +
 final/test/CodeGen/X86/pr2326.ll              |    24 +
 final/test/CodeGen/X86/pr2623.ll              |    44 +
 final/test/CodeGen/X86/pr2656.ll              |    23 +
 final/test/CodeGen/X86/pr2659.ll              |    40 +
 final/test/CodeGen/X86/pr2849.ll              |    38 +
 final/test/CodeGen/X86/pr2924.ll              |    24 +
 final/test/CodeGen/X86/pr2982.ll              |    26 +
 final/test/CodeGen/X86/pr3154.ll              |   104 +
 final/test/CodeGen/X86/pr3216.ll              |    14 +
 final/test/CodeGen/X86/pr3241.ll              |    29 +
 final/test/CodeGen/X86/pr3243.ll              |    15 +
 final/test/CodeGen/X86/pr3244.ll              |    26 +
 final/test/CodeGen/X86/pr3250.ll              |    17 +
 final/test/CodeGen/X86/pr3317.ll              |    46 +
 final/test/CodeGen/X86/pr3366.ll              |    21 +
 final/test/CodeGen/X86/pr3457.ll              |    16 +
 final/test/CodeGen/X86/pr3495-2.ll            |    54 +
 final/test/CodeGen/X86/pr3495.ll              |    79 +
 final/test/CodeGen/X86/pr3522.ll              |    30 +
 final/test/CodeGen/X86/pr7882.ll              |    17 +
 final/test/CodeGen/X86/pr9127.ll              |    13 +
 final/test/CodeGen/X86/pre-split1.ll          |    24 +
 final/test/CodeGen/X86/pre-split10.ll         |    51 +
 final/test/CodeGen/X86/pre-split11.ll         |    34 +
 final/test/CodeGen/X86/pre-split2.ll          |    26 +
 final/test/CodeGen/X86/pre-split3.ll          |    26 +
 final/test/CodeGen/X86/pre-split4.ll          |    26 +
 final/test/CodeGen/X86/pre-split5.ll          |    56 +
 final/test/CodeGen/X86/pre-split6.ll          |    36 +
 final/test/CodeGen/X86/pre-split7.ll          |    34 +
 final/test/CodeGen/X86/pre-split8.ll          |    35 +
 final/test/CodeGen/X86/pre-split9.ll          |    38 +
 final/test/CodeGen/X86/prefetch.ll            |    16 +
 final/test/CodeGen/X86/private-2.ll           |    13 +
 final/test/CodeGen/X86/private.ll             |    20 +
 final/test/CodeGen/X86/promote-assert-zext.ll |    22 +
 final/test/CodeGen/X86/promote-i16.ll         |    11 +
 final/test/CodeGen/X86/ptrtoint-constexpr.ll  |    14 +
 final/test/CodeGen/X86/rdtsc.ll               |     8 +
 final/test/CodeGen/X86/red-zone.ll            |    25 +
 final/test/CodeGen/X86/red-zone2.ll           |     9 +
 final/test/CodeGen/X86/regpressure.ll         |   114 +
 final/test/CodeGen/X86/rem-2.ll               |     7 +
 final/test/CodeGen/X86/rem.ll                 |    22 +
 final/test/CodeGen/X86/remat-constant.ll      |    15 +
 final/test/CodeGen/X86/remat-mov-0.ll         |    34 +
 final/test/CodeGen/X86/remat-scalar-zero.ll   |    96 +
 final/test/CodeGen/X86/ret-addr.ll            |    22 +
 final/test/CodeGen/X86/ret-i64-0.ll           |     5 +
 final/test/CodeGen/X86/ret-mmx.ll             |    26 +
 final/test/CodeGen/X86/rip-rel-address.ll     |    14 +
 final/test/CodeGen/X86/rodata-relocs.ll       |    23 +
 final/test/CodeGen/X86/rot16.ll               |    85 +
 final/test/CodeGen/X86/rot32.ll               |    85 +
 final/test/CodeGen/X86/rot64.ll               |    73 +
 final/test/CodeGen/X86/rotate.ll              |   100 +
 final/test/CodeGen/X86/rotate2.ll             |    19 +
 final/test/CodeGen/X86/scalar-extract.ll      |    13 +
 .../X86/scalar-min-max-fill-operand.ll        |    27 +
 final/test/CodeGen/X86/scalar_sse_minmax.ll   |    44 +
 final/test/CodeGen/X86/scalar_widen_div.ll    |   183 +
 final/test/CodeGen/X86/scalarize-bitcast.ll   |    29 +
 final/test/CodeGen/X86/scev-interchange.ll    |   338 +
 final/test/CodeGen/X86/select.ll              |   220 +
 final/test/CodeGen/X86/setcc.ll               |    36 +
 final/test/CodeGen/X86/setoeq.ll              |    11 +
 final/test/CodeGen/X86/setuge.ll              |    13 +
 final/test/CodeGen/X86/sext-i1.ll             |    63 +
 final/test/CodeGen/X86/sext-load.ll           |     9 +
 final/test/CodeGen/X86/sext-ret-val.ll        |    16 +
 final/test/CodeGen/X86/sext-subreg.ll         |    17 +
 final/test/CodeGen/X86/sext-trunc.ll          |     9 +
 final/test/CodeGen/X86/sfence.ll              |     8 +
 final/test/CodeGen/X86/shift-and.ll           |    24 +
 final/test/CodeGen/X86/shift-coalesce.ll      |    15 +
 final/test/CodeGen/X86/shift-codegen.ll       |    27 +
 final/test/CodeGen/X86/shift-combine.ll       |    15 +
 final/test/CodeGen/X86/shift-double.ll        |    41 +
 final/test/CodeGen/X86/shift-folding.ll       |    28 +
 final/test/CodeGen/X86/shift-i128.ll          |     9 +
 final/test/CodeGen/X86/shift-i256.ll          |     9 +
 final/test/CodeGen/X86/shift-one.ll           |    10 +
 final/test/CodeGen/X86/shift-parts.ll         |    22 +
 final/test/CodeGen/X86/shl-anyext.ll          |    40 +
 final/test/CodeGen/X86/shl_elim.ll            |    13 +
 final/test/CodeGen/X86/shrink-fp-const1.ll    |     7 +
 final/test/CodeGen/X86/shrink-fp-const2.ll    |     7 +
 final/test/CodeGen/X86/sibcall-2.ll           |    52 +
 final/test/CodeGen/X86/sibcall-3.ll           |    16 +
 final/test/CodeGen/X86/sibcall-4.ll           |    13 +
 final/test/CodeGen/X86/sibcall-5.ll           |    31 +
 final/test/CodeGen/X86/sibcall.ll             |   333 +
 final/test/CodeGen/X86/sincos.ll              |    48 +
 final/test/CodeGen/X86/sink-hoist.ll          |   174 +
 final/test/CodeGen/X86/small-byval-memcpy.ll  |    20 +
 .../test/CodeGen/X86/smul-with-overflow-2.ll  |    20 +
 .../test/CodeGen/X86/smul-with-overflow-3.ll  |    23 +
 final/test/CodeGen/X86/smul-with-overflow.ll  |    23 +
 final/test/CodeGen/X86/soft-fp.ll             |    27 +
 final/test/CodeGen/X86/splat-scalar-load.ll   |    17 +
 final/test/CodeGen/X86/split-eh-lpad-edges.ll |    34 +
 final/test/CodeGen/X86/split-vector-rem.ll    |    15 +
 final/test/CodeGen/X86/sret.ll                |    23 +
 final/test/CodeGen/X86/sse-align-0.ll         |    13 +
 final/test/CodeGen/X86/sse-align-1.ll         |    10 +
 final/test/CodeGen/X86/sse-align-10.ll        |     6 +
 final/test/CodeGen/X86/sse-align-11.ll        |    13 +
 final/test/CodeGen/X86/sse-align-12.ll        |    57 +
 final/test/CodeGen/X86/sse-align-2.ll         |    12 +
 final/test/CodeGen/X86/sse-align-3.ll         |    15 +
 final/test/CodeGen/X86/sse-align-4.ll         |    10 +
 final/test/CodeGen/X86/sse-align-5.ll         |     6 +
 final/test/CodeGen/X86/sse-align-6.ll         |     7 +
 final/test/CodeGen/X86/sse-align-7.ll         |     8 +
 final/test/CodeGen/X86/sse-align-8.ll         |     6 +
 final/test/CodeGen/X86/sse-align-9.ll         |    10 +
 final/test/CodeGen/X86/sse-commute.ll         |    20 +
 final/test/CodeGen/X86/sse-fcopysign.ll       |    16 +
 final/test/CodeGen/X86/sse-load-ret.ll        |    19 +
 final/test/CodeGen/X86/sse-minmax.ll          |   932 +
 final/test/CodeGen/X86/sse-varargs.ll         |     9 +
 final/test/CodeGen/X86/sse1.ll                |    45 +
 final/test/CodeGen/X86/sse2.ll                |   224 +
 final/test/CodeGen/X86/sse3.ll                |   277 +
 final/test/CodeGen/X86/sse41.ll               |   251 +
 final/test/CodeGen/X86/sse42.ll               |    38 +
 final/test/CodeGen/X86/sse_reload_fold.ll     |   125 +
 final/test/CodeGen/X86/stack-align.ll         |    51 +
 .../test/CodeGen/X86/stack-protector-linux.ll |    28 +
 final/test/CodeGen/X86/stdarg.ll              |    21 +
 final/test/CodeGen/X86/stdcall-notailcall.ll  |    13 +
 final/test/CodeGen/X86/stdcall.ll             |    16 +
 final/test/CodeGen/X86/store-empty-member.ll  |    14 +
 final/test/CodeGen/X86/store-fp-constant.ll   |    19 +
 .../test/CodeGen/X86/store-global-address.ll  |    10 +
 final/test/CodeGen/X86/store-narrow.ll        |   168 +
 final/test/CodeGen/X86/store_op_load_fold.ll  |    13 +
 final/test/CodeGen/X86/store_op_load_fold2.ll |    24 +
 final/test/CodeGen/X86/storetrunc-fp.ll       |     8 +
 .../CodeGen/X86/stride-nine-with-base-reg.ll  |    38 +
 final/test/CodeGen/X86/stride-reuse.ll        |    31 +
 final/test/CodeGen/X86/sub-with-overflow.ll   |    41 +
 final/test/CodeGen/X86/subreg-to-reg-0.ll     |    11 +
 final/test/CodeGen/X86/subreg-to-reg-1.ll     |    13 +
 final/test/CodeGen/X86/subreg-to-reg-2.ll     |    25 +
 final/test/CodeGen/X86/subreg-to-reg-3.ll     |    10 +
 final/test/CodeGen/X86/subreg-to-reg-4.ll     |   135 +
 final/test/CodeGen/X86/subreg-to-reg-6.ll     |    29 +
 final/test/CodeGen/X86/switch-bt.ll           |    81 +
 .../CodeGen/X86/switch-crit-edge-constant.ll  |    52 +
 final/test/CodeGen/X86/switch-or.ll           |    22 +
 final/test/CodeGen/X86/switch-zextload.ll     |    34 +
 final/test/CodeGen/X86/swizzle.ll             |    19 +
 final/test/CodeGen/X86/tail-opts.ll           |   428 +
 final/test/CodeGen/X86/tailcall-fastisel.ll   |    19 +
 final/test/CodeGen/X86/tailcall-i1.ll         |     6 +
 final/test/CodeGen/X86/tailcall-largecode.ll  |    71 +
 .../CodeGen/X86/tailcall-returndup-void.ll    |    37 +
 final/test/CodeGen/X86/tailcall-ri64.ll       |    24 +
 final/test/CodeGen/X86/tailcall-stackalign.ll |    23 +
 final/test/CodeGen/X86/tailcall-structret.ll  |     6 +
 final/test/CodeGen/X86/tailcall-void.ll       |     6 +
 final/test/CodeGen/X86/tailcall1.ll           |    40 +
 final/test/CodeGen/X86/tailcallbyval.ll       |    18 +
 final/test/CodeGen/X86/tailcallbyval64.ll     |    42 +
 final/test/CodeGen/X86/tailcallfp.ll          |     5 +
 final/test/CodeGen/X86/tailcallfp2.ll         |    27 +
 final/test/CodeGen/X86/tailcallpic1.ll        |    12 +
 final/test/CodeGen/X86/tailcallpic2.ll        |    12 +
 final/test/CodeGen/X86/tailcallstack64.ll     |    28 +
 final/test/CodeGen/X86/test-nofold.ll         |    42 +
 final/test/CodeGen/X86/test-shrink-bug.ll     |    23 +
 final/test/CodeGen/X86/test-shrink.ll         |   159 +
 final/test/CodeGen/X86/testl-commute.ll       |    56 +
 final/test/CodeGen/X86/tls-pic.ll             |    67 +
 final/test/CodeGen/X86/tls1.ll                |    12 +
 final/test/CodeGen/X86/tls10.ll               |    13 +
 final/test/CodeGen/X86/tls11.ll               |    12 +
 final/test/CodeGen/X86/tls12.ll               |    12 +
 final/test/CodeGen/X86/tls13.ll               |    24 +
 final/test/CodeGen/X86/tls14.ll               |    24 +
 final/test/CodeGen/X86/tls15.ll               |    18 +
 final/test/CodeGen/X86/tls2.ll                |    13 +
 final/test/CodeGen/X86/tls3.ll                |    14 +
 final/test/CodeGen/X86/tls4.ll                |    13 +
 final/test/CodeGen/X86/tls5.ll                |    12 +
 final/test/CodeGen/X86/tls6.ll                |    13 +
 final/test/CodeGen/X86/tls7.ll                |    12 +
 final/test/CodeGen/X86/tls8.ll                |    13 +
 final/test/CodeGen/X86/tls9.ll                |    12 +
 final/test/CodeGen/X86/tlv-1.ll               |    35 +
 final/test/CodeGen/X86/tlv-2.ll               |    32 +
 final/test/CodeGen/X86/trap.ll                |     9 +
 final/test/CodeGen/X86/trunc-to-bool.ll       |    59 +
 final/test/CodeGen/X86/twoaddr-coalesce-2.ll  |    15 +
 final/test/CodeGen/X86/twoaddr-coalesce.ll    |    24 +
 final/test/CodeGen/X86/twoaddr-lea.ll         |    47 +
 final/test/CodeGen/X86/twoaddr-pass-sink.ll   |    29 +
 final/test/CodeGen/X86/twoaddr-remat.ll       |    67 +
 final/test/CodeGen/X86/uint64-to-float.ll     |    21 +
 final/test/CodeGen/X86/uint_to_fp-2.ll        |     8 +
 final/test/CodeGen/X86/uint_to_fp.ll          |    14 +
 final/test/CodeGen/X86/umul-with-carry.ll     |    26 +
 final/test/CodeGen/X86/umul-with-overflow.ll  |    14 +
 final/test/CodeGen/X86/unaligned-load.ll      |    36 +
 final/test/CodeGen/X86/unknown-location.ll    |    34 +
 .../CodeGen/X86/unreachable-loop-sinking.ll   |    30 +
 final/test/CodeGen/X86/urem-i8-constant.ll    |     6 +
 final/test/CodeGen/X86/use-add-flags.ll       |    57 +
 final/test/CodeGen/X86/v-binop-widen.ll       |    12 +
 final/test/CodeGen/X86/v-binop-widen2.ll      |    40 +
 final/test/CodeGen/X86/v2f32.ll               |   115 +
 final/test/CodeGen/X86/v4f32-immediate.ll     |     5 +
 .../X86/variable-sized-darwin-bzero.ll        |     8 +
 final/test/CodeGen/X86/variadic-node-pic.ll   |    11 +
 final/test/CodeGen/X86/vec-sign.ll            |    30 +
 final/test/CodeGen/X86/vec-trunc-store.ll     |    15 +
 final/test/CodeGen/X86/vec_add.ll             |     7 +
 final/test/CodeGen/X86/vec_align.ll           |    35 +
 final/test/CodeGen/X86/vec_anyext.ll          |    77 +
 final/test/CodeGen/X86/vec_call.ll            |    13 +
 final/test/CodeGen/X86/vec_cast.ll            |    56 +
 final/test/CodeGen/X86/vec_clear.ll           |    13 +
 final/test/CodeGen/X86/vec_compare-2.ll       |    29 +
 final/test/CodeGen/X86/vec_compare.ll         |    43 +
 final/test/CodeGen/X86/vec_ctbits.ll          |    18 +
 final/test/CodeGen/X86/vec_ext_inreg.ll       |    37 +
 final/test/CodeGen/X86/vec_extract-sse4.ll    |    31 +
 final/test/CodeGen/X86/vec_extract.ll         |    36 +
 final/test/CodeGen/X86/vec_fneg.ll            |    11 +
 final/test/CodeGen/X86/vec_i64.ll             |    22 +
 final/test/CodeGen/X86/vec_ins_extract-1.ll   |    25 +
 final/test/CodeGen/X86/vec_ins_extract.ll     |    52 +
 final/test/CodeGen/X86/vec_insert-2.ll        |    25 +
 final/test/CodeGen/X86/vec_insert-3.ll        |     6 +
 final/test/CodeGen/X86/vec_insert-4.ll        |    11 +
 final/test/CodeGen/X86/vec_insert-5.ll        |    33 +
 final/test/CodeGen/X86/vec_insert-6.ll        |     8 +
 final/test/CodeGen/X86/vec_insert-7.ll        |    15 +
 final/test/CodeGen/X86/vec_insert-8.ll        |    15 +
 final/test/CodeGen/X86/vec_insert-9.ll        |     9 +
 final/test/CodeGen/X86/vec_insert.ll          |    19 +
 final/test/CodeGen/X86/vec_loadsingles.ll     |    12 +
 final/test/CodeGen/X86/vec_logical.ll         |    42 +
 final/test/CodeGen/X86/vec_return.ll          |    12 +
 final/test/CodeGen/X86/vec_set-2.ll           |    19 +
 final/test/CodeGen/X86/vec_set-3.ll           |    17 +
 final/test/CodeGen/X86/vec_set-4.ll           |    24 +
 final/test/CodeGen/X86/vec_set-5.ll           |    28 +
 final/test/CodeGen/X86/vec_set-6.ll           |    12 +
 final/test/CodeGen/X86/vec_set-7.ll           |    11 +
 final/test/CodeGen/X86/vec_set-8.ll           |    13 +
 final/test/CodeGen/X86/vec_set-9.ll           |     9 +
 final/test/CodeGen/X86/vec_set-A.ll           |     6 +
 final/test/CodeGen/X86/vec_set-B.ll           |    24 +
 final/test/CodeGen/X86/vec_set-C.ll           |     8 +
 final/test/CodeGen/X86/vec_set-D.ll           |     7 +
 final/test/CodeGen/X86/vec_set-E.ll           |     9 +
 final/test/CodeGen/X86/vec_set-F.ll           |    19 +
 final/test/CodeGen/X86/vec_set-G.ll           |     9 +
 final/test/CodeGen/X86/vec_set-H.ll           |    15 +
 final/test/CodeGen/X86/vec_set-I.ll           |    10 +
 final/test/CodeGen/X86/vec_set-J.ll           |    10 +
 final/test/CodeGen/X86/vec_set.ll             |    15 +
 final/test/CodeGen/X86/vec_sext.ll            |    69 +
 final/test/CodeGen/X86/vec_shift.ll           |    34 +
 final/test/CodeGen/X86/vec_shift2.ll          |    17 +
 final/test/CodeGen/X86/vec_shift3.ll          |    26 +
 final/test/CodeGen/X86/vec_shift4.ll          |    25 +
 final/test/CodeGen/X86/vec_shuffle-11.ll      |    11 +
 final/test/CodeGen/X86/vec_shuffle-14.ll      |    42 +
 final/test/CodeGen/X86/vec_shuffle-15.ll      |    81 +
 final/test/CodeGen/X86/vec_shuffle-16.ll      |    27 +
 final/test/CodeGen/X86/vec_shuffle-17.ll      |    16 +
 final/test/CodeGen/X86/vec_shuffle-18.ll      |    25 +
 final/test/CodeGen/X86/vec_shuffle-19.ll      |     8 +
 final/test/CodeGen/X86/vec_shuffle-20.ll      |     7 +
 final/test/CodeGen/X86/vec_shuffle-22.ll      |    15 +
 final/test/CodeGen/X86/vec_shuffle-23.ll      |    18 +
 final/test/CodeGen/X86/vec_shuffle-24.ll      |    18 +
 final/test/CodeGen/X86/vec_shuffle-25.ll      |    34 +
 final/test/CodeGen/X86/vec_shuffle-26.ll      |    29 +
 final/test/CodeGen/X86/vec_shuffle-27.ll      |    38 +
 final/test/CodeGen/X86/vec_shuffle-28.ll      |    12 +
 final/test/CodeGen/X86/vec_shuffle-30.ll      |    22 +
 final/test/CodeGen/X86/vec_shuffle-31.ll      |     8 +
 final/test/CodeGen/X86/vec_shuffle-34.ll      |     7 +
 final/test/CodeGen/X86/vec_shuffle-35.ll      |    20 +
 final/test/CodeGen/X86/vec_shuffle-36.ll      |    16 +
 final/test/CodeGen/X86/vec_shuffle-37.ll      |    25 +
 final/test/CodeGen/X86/vec_shuffle.ll         |    45 +
 final/test/CodeGen/X86/vec_splat-2.ll         |    26 +
 final/test/CodeGen/X86/vec_splat-3.ll         |    55 +
 final/test/CodeGen/X86/vec_splat-4.ll         |   104 +
 final/test/CodeGen/X86/vec_splat.ll           |    22 +
 final/test/CodeGen/X86/vec_ss_load_fold.ll    |    72 +
 final/test/CodeGen/X86/vec_zero-2.ll          |    24 +
 final/test/CodeGen/X86/vec_zero.ll            |    18 +
 final/test/CodeGen/X86/vec_zero_cse.ll        |    35 +
 final/test/CodeGen/X86/vec_zext.ll            |    69 +
 final/test/CodeGen/X86/vector-intrinsics.ll   |    52 +
 final/test/CodeGen/X86/vector-rem.ll          |    15 +
 final/test/CodeGen/X86/vector-variable-idx.ll |    11 +
 final/test/CodeGen/X86/vector.ll              |   156 +
 final/test/CodeGen/X86/vfcmp.ll               |    15 +
 final/test/CodeGen/X86/visibility.ll          |    11 +
 final/test/CodeGen/X86/volatile.ll            |    17 +
 final/test/CodeGen/X86/vortex-bug.ll          |    21 +
 final/test/CodeGen/X86/vshift-1.ll            |    79 +
 final/test/CodeGen/X86/vshift-2.ll            |    78 +
 final/test/CodeGen/X86/vshift-3.ll            |    67 +
 final/test/CodeGen/X86/vshift-4.ll            |    85 +
 final/test/CodeGen/X86/vshift-5.ll            |    56 +
 final/test/CodeGen/X86/vshift_scalar.ll       |    11 +
 final/test/CodeGen/X86/vshift_split.ll        |     8 +
 final/test/CodeGen/X86/vshift_split2.ll       |    11 +
 final/test/CodeGen/X86/vsplit-and.ll          |    22 +
 final/test/CodeGen/X86/weak.ll                |     4 +
 final/test/CodeGen/X86/wide-integer-fold.ll   |    12 +
 final/test/CodeGen/X86/widen_arith-1.ll       |    46 +
 final/test/CodeGen/X86/widen_arith-2.ll       |    59 +
 final/test/CodeGen/X86/widen_arith-3.ll       |    52 +
 final/test/CodeGen/X86/widen_arith-4.ll       |    49 +
 final/test/CodeGen/X86/widen_arith-5.ll       |    50 +
 final/test/CodeGen/X86/widen_arith-6.ll       |    49 +
 final/test/CodeGen/X86/widen_cast-1.ll        |    44 +
 final/test/CodeGen/X86/widen_cast-2.ll        |    46 +
 final/test/CodeGen/X86/widen_cast-3.ll        |    17 +
 final/test/CodeGen/X86/widen_cast-4.ll        |    67 +
 final/test/CodeGen/X86/widen_cast-5.ll        |    13 +
 final/test/CodeGen/X86/widen_cast-6.ll        |    10 +
 final/test/CodeGen/X86/widen_conv-1.ll        |    13 +
 final/test/CodeGen/X86/widen_conv-2.ll        |    12 +
 final/test/CodeGen/X86/widen_conv-3.ll        |    11 +
 final/test/CodeGen/X86/widen_conv-4.ll        |    11 +
 final/test/CodeGen/X86/widen_extract-1.ll     |    12 +
 final/test/CodeGen/X86/widen_load-0.ll        |    27 +
 final/test/CodeGen/X86/widen_load-1.ll        |    45 +
 final/test/CodeGen/X86/widen_load-2.ll        |   179 +
 final/test/CodeGen/X86/widen_shuffle-1.ll     |    48 +
 final/test/CodeGen/X86/win64_params.ll        |    11 +
 final/test/CodeGen/X86/win64_vararg.ll        |    53 +
 final/test/CodeGen/X86/win_chkstk.ll          |    46 +
 final/test/CodeGen/X86/x86-64-and-mask.ll     |    49 +
 final/test/CodeGen/X86/x86-64-arg.ll          |    15 +
 final/test/CodeGen/X86/x86-64-asm.ll          |    12 +
 .../CodeGen/X86/x86-64-dead-stack-adjust.ll   |    12 +
 final/test/CodeGen/X86/x86-64-disp.ll         |    14 +
 final/test/CodeGen/X86/x86-64-extend-shift.ll |    10 +
 final/test/CodeGen/X86/x86-64-frameaddr.ll    |    10 +
 final/test/CodeGen/X86/x86-64-gv-offset.ll    |    14 +
 final/test/CodeGen/X86/x86-64-jumps.ll        |    45 +
 final/test/CodeGen/X86/x86-64-malloc.ll       |    12 +
 final/test/CodeGen/X86/x86-64-mem.ll          |    36 +
 final/test/CodeGen/X86/x86-64-pic-1.ll        |    10 +
 final/test/CodeGen/X86/x86-64-pic-10.ll       |    12 +
 final/test/CodeGen/X86/x86-64-pic-11.ll       |     8 +
 final/test/CodeGen/X86/x86-64-pic-2.ll        |    11 +
 final/test/CodeGen/X86/x86-64-pic-3.ll        |    14 +
 final/test/CodeGen/X86/x86-64-pic-4.ll        |    10 +
 final/test/CodeGen/X86/x86-64-pic-5.ll        |    11 +
 final/test/CodeGen/X86/x86-64-pic-6.ll        |    11 +
 final/test/CodeGen/X86/x86-64-pic-7.ll        |     9 +
 final/test/CodeGen/X86/x86-64-pic-8.ll        |    10 +
 final/test/CodeGen/X86/x86-64-pic-9.ll        |    13 +
 final/test/CodeGen/X86/x86-64-ret0.ll         |     8 +
 final/test/CodeGen/X86/x86-64-shortint.ll     |    12 +
 final/test/CodeGen/X86/x86-64-sret-return.ll  |    63 +
 final/test/CodeGen/X86/x86-64-tls-1.ll        |     6 +
 final/test/CodeGen/X86/x86-64-varargs.ll      |    11 +
 final/test/CodeGen/X86/x86-frameaddr.ll       |     9 +
 final/test/CodeGen/X86/x86-frameaddr2.ll      |     9 +
 final/test/CodeGen/X86/x86-store-gv-addr.ll   |    10 +
 final/test/CodeGen/X86/x86_64-mul-by-const.ll |     9 +
 final/test/CodeGen/X86/xmm-r64.ll             |    12 +
 final/test/CodeGen/X86/xor-icmp.ll            |    67 +
 final/test/CodeGen/X86/xor.ll                 |   145 +
 final/test/CodeGen/X86/zero-remat.ll          |    40 +
 final/test/CodeGen/X86/zext-extract_subreg.ll |    60 +
 final/test/CodeGen/X86/zext-inreg-0.ll        |    66 +
 final/test/CodeGen/X86/zext-inreg-1.ll        |    18 +
 final/test/CodeGen/X86/zext-sext.ll           |    54 +
 final/test/CodeGen/X86/zext-shl.ll            |    25 +
 final/test/CodeGen/X86/zext-trunc.ll          |    13 +
 final/test/CodeGen/XCore/2008-11-17-Shl64.ll  |     6 +
 final/test/CodeGen/XCore/2009-01-08-Crash.ll  |    12 +
 .../CodeGen/XCore/2009-01-14-Remat-Crash.ll   |    18 +
 .../CodeGen/XCore/2009-03-27-v2f64-param.ll   |     6 +
 .../test/CodeGen/XCore/2009-07-15-store192.ll |     7 +
 .../CodeGen/XCore/2010-02-25-LSR-Crash.ll     |    26 +
 .../XCore/2010-04-07-DbgValueOtherTargets.ll  |    28 +
 .../CodeGen/XCore/2011-01-31-DAGCombineBug.ll |    10 +
 final/test/CodeGen/XCore/addsub64.ll          |    59 +
 final/test/CodeGen/XCore/ashr.ll              |    76 +
 final/test/CodeGen/XCore/basictest.ll         |     6 +
 final/test/CodeGen/XCore/bigstructret.ll      |    43 +
 final/test/CodeGen/XCore/bitrev.ll            |     8 +
 final/test/CodeGen/XCore/constants.ll         |    11 +
 final/test/CodeGen/XCore/cos.ll               |    16 +
 final/test/CodeGen/XCore/dg.exp               |     5 +
 final/test/CodeGen/XCore/events.ll            |    24 +
 final/test/CodeGen/XCore/exp.ll               |    16 +
 final/test/CodeGen/XCore/exp2.ll              |    16 +
 final/test/CodeGen/XCore/fneg.ll              |     7 +
 final/test/CodeGen/XCore/getid.ll             |     8 +
 final/test/CodeGen/XCore/globals.ll           |    92 +
 final/test/CodeGen/XCore/indirectbr.ll        |    45 +
 final/test/CodeGen/XCore/ladd_lsub_combine.ll |    67 +
 final/test/CodeGen/XCore/load.ll              |    39 +
 final/test/CodeGen/XCore/log.ll               |    16 +
 final/test/CodeGen/XCore/log10.ll             |    16 +
 final/test/CodeGen/XCore/log2.ll              |    16 +
 final/test/CodeGen/XCore/mul64.ll             |    52 +
 final/test/CodeGen/XCore/pow.ll               |    16 +
 final/test/CodeGen/XCore/powi.ll              |    16 +
 final/test/CodeGen/XCore/private.ll           |    21 +
 final/test/CodeGen/XCore/resources.ll         |   176 +
 final/test/CodeGen/XCore/sext.ll              |    32 +
 final/test/CodeGen/XCore/sin.ll               |    16 +
 final/test/CodeGen/XCore/sqrt.ll              |    16 +
 final/test/CodeGen/XCore/store.ll             |    35 +
 final/test/CodeGen/XCore/switch.ll            |    24 +
 final/test/CodeGen/XCore/switch_long.ll       |   132 +
 final/test/CodeGen/XCore/tls.ll               |    20 +
 final/test/CodeGen/XCore/trampoline.ll        |    37 +
 final/test/CodeGen/XCore/trap.ll              |    11 +
 final/test/CodeGen/XCore/unaligned_load.ll    |    31 +
 final/test/CodeGen/XCore/unaligned_store.ll   |    18 +
 .../CodeGen/XCore/unaligned_store_combine.ll  |    12 +
 .../test/DebugInfo/2009-01-15-dbg_declare.ll  |    16 +
 final/test/DebugInfo/2009-01-15-member.ll     |    30 +
 ...2009-10-08-DebugInfo-NullGlobalVariable.ll |    72 +
 final/test/DebugInfo/2009-10-16-Phi.ll        |    13 +
 .../2009-11-03-InsertExtractValue.ll          |    11 +
 .../2009-11-05-DeadGlobalVariable.ll          |    17 +
 .../2009-11-06-InvalidDerivedType.ll          |    13 +
 .../2009-11-06-NamelessGlobalVariable.ll      |     8 +
 final/test/DebugInfo/2009-11-10-CurrentFn.ll  |    20 +
 .../test/DebugInfo/2009-11-10-ParentScope.ll  |    26 +
 final/test/DebugInfo/2010-01-05-DbgScope.ll   |    18 +
 final/test/DebugInfo/2010-01-19-DbgScope.ll   |    28 +
 final/test/DebugInfo/2010-03-12-llc-crash.ll  |    20 +
 final/test/DebugInfo/2010-03-19-DbgDeclare.ll |    12 +
 final/test/DebugInfo/2010-03-24-MemberFn.ll   |    62 +
 .../2010-03-30-InvalidDbgInfoCrash.ll         |    30 +
 .../DebugInfo/2010-04-06-NestedFnDbgInfo.ll   |    89 +
 final/test/DebugInfo/2010-04-13-PubType.ll    |    47 +
 final/test/DebugInfo/2010-04-19-FramePtr.ll   |    30 +
 .../test/DebugInfo/2010-04-25-CU-entry_pc.ll  |     9 +
 .../DebugInfo/2010-05-03-DisableFramePtr.ll   |    34 +
 final/test/DebugInfo/2010-05-03-OriginDIE.ll  |    86 +
 final/test/DebugInfo/2010-05-10-MultipleCU.ll |    44 +
 .../DebugInfo/2010-06-29-InlinedFnLocalVar.ll |    52 +
 final/test/DebugInfo/2010-07-19-Crash.ll      |    24 +
 final/test/DebugInfo/2010-10-01-crash.ll      |    21 +
 final/test/DebugInfo/dg.exp                   |     3 +
 final/test/DebugInfo/inheritance.ll           |   151 +
 final/test/DebugInfo/printdbginfo2.ll         |    66 +
 .../ExecutionEngine/2002-12-16-ArgTest.ll     |    37 +
 .../ExecutionEngine/2003-01-04-ArgumentBug.ll |    13 +
 .../ExecutionEngine/2003-01-04-LoopTest.ll    |    20 +
 .../ExecutionEngine/2003-01-04-PhiTest.ll     |    12 +
 .../ExecutionEngine/2003-01-09-SARTest.ll     |    11 +
 .../test/ExecutionEngine/2003-01-10-FUCOM.ll  |    10 +
 .../2003-01-15-AlignmentTest.ll               |    17 +
 .../2003-05-06-LivenessClobber.ll             |    19 +
 .../2003-05-07-ArgumentTest.ll                |    11 +
 .../2003-05-11-PHIRegAllocBug.ll              |    15 +
 .../ExecutionEngine/2003-06-04-bzip2-bug.ll   |    19 +
 .../test/ExecutionEngine/2003-06-05-PHIBug.ll |    17 +
 .../2003-08-15-AllocaAssertion.ll             |    11 +
 .../2003-08-21-EnvironmentTest.ll             |    21 +
 .../2003-08-23-RegisterAllocatePhysReg.ll     |    34 +
 ...8-PHINode-ConstantExpr-CondCode-Failure.ll |    23 +
 .../ExecutionEngine/2005-12-02-TailCallBug.ll |    22 +
 .../2007-12-10-APIntLoadStore.ll              |    19 +
 .../2008-06-05-APInt-OverAShr.ll              |    59 +
 .../ExecutionEngine/2010-01-15-UndefValue.ll  |     8 +
 final/test/ExecutionEngine/dg.exp             |     3 +
 final/test/ExecutionEngine/fpbitcast.ll       |    20 +
 final/test/ExecutionEngine/hello.ll           |    11 +
 final/test/ExecutionEngine/hello2.ll          |    17 +
 final/test/ExecutionEngine/simplesttest.ll    |     6 +
 final/test/ExecutionEngine/simpletest.ll      |    11 +
 final/test/ExecutionEngine/stubs.ll           |    35 +
 final/test/ExecutionEngine/test-arith.ll      |    34 +
 final/test/ExecutionEngine/test-branch.ll     |    12 +
 final/test/ExecutionEngine/test-call.ll       |    22 +
 final/test/ExecutionEngine/test-cast.ll       |   109 +
 .../test/ExecutionEngine/test-constantexpr.ll |    12 +
 final/test/ExecutionEngine/test-fp.ll         |    23 +
 final/test/ExecutionEngine/test-loadstore.ll  |    31 +
 final/test/ExecutionEngine/test-logical.ll    |    18 +
 final/test/ExecutionEngine/test-loop.ll       |    14 +
 final/test/ExecutionEngine/test-malloc.ll     |    13 +
 final/test/ExecutionEngine/test-phi.ll        |    34 +
 final/test/ExecutionEngine/test-ret.ll        |    46 +
 final/test/ExecutionEngine/test-setcond-fp.ll |    24 +
 .../test/ExecutionEngine/test-setcond-int.ll  |    69 +
 final/test/ExecutionEngine/test-shift.ll      |    32 +
 final/test/Feature/NamedMDNode.ll             |     9 +
 final/test/Feature/NamedMDNode2.ll            |     7 +
 final/test/Feature/README.txt                 |     6 +
 final/test/Feature/aliases.ll                 |    32 +
 final/test/Feature/alignment.ll               |    25 +
 final/test/Feature/basictest.ll               |    31 +
 final/test/Feature/callingconventions.ll      |    50 +
 final/test/Feature/calltest.ll                |    32 +
 final/test/Feature/casttest.ll                |    12 +
 final/test/Feature/cfgstructures.ll           |    53 +
 final/test/Feature/constexpr.ll               |    80 +
 final/test/Feature/constpointer.ll            |    31 +
 final/test/Feature/dg.exp                     |     3 +
 final/test/Feature/escaped_label.ll           |    11 +
 final/test/Feature/float.ll                   |     6 +
 final/test/Feature/fold-fpcast.ll             |    18 +
 final/test/Feature/forwardreftest.ll          |    29 +
 final/test/Feature/global_section.ll          |    10 +
 final/test/Feature/globalredefinition.ll      |    18 +
 final/test/Feature/globalredefinition3.ll     |     4 +
 final/test/Feature/globalvars.ll              |    18 +
 final/test/Feature/indirectcall.ll            |    49 +
 final/test/Feature/indirectcall2.ll           |    22 +
 final/test/Feature/inlineasm.ll               |    13 +
 final/test/Feature/instructions.ll            |    24 +
 final/test/Feature/intrinsics.ll              |    62 +
 final/test/Feature/linker_private_linkages.ll |     7 +
 final/test/Feature/llvm2cpp.exp               |     3 +
 final/test/Feature/load_module.ll             |    12 +
 final/test/Feature/md_on_instruction.ll       |    22 +
 final/test/Feature/memorymarkers.ll           |    36 +
 final/test/Feature/metadata.ll                |    17 +
 final/test/Feature/newcasts.ll                |    33 +
 final/test/Feature/noalias-ret.ll             |     6 +
 final/test/Feature/opaquetypes.ll             |    55 +
 final/test/Feature/packed.ll                  |    15 +
 final/test/Feature/packed_struct.ll           |    33 +
 final/test/Feature/paramattrs.ll              |    22 +
 final/test/Feature/ppcld.ll                   |    26 +
 final/test/Feature/properties.ll              |     7 +
 final/test/Feature/prototype.ll               |    11 +
 final/test/Feature/recursivetype.ll           |   103 +
 final/test/Feature/simplecalltest.ll          |    24 +
 final/test/Feature/small.ll                   |    11 +
 final/test/Feature/smallest.ll                |     4 +
 final/test/Feature/sparcld.ll                 |    24 +
 final/test/Feature/terminators.ll             |    43 +
 final/test/Feature/testalloca.ll              |    22 +
 final/test/Feature/testconstants.ll           |    29 +
 final/test/Feature/testlogical.ll             |    11 +
 final/test/Feature/testmemory.ll              |    36 +
 final/test/Feature/testtype.ll                |    21 +
 final/test/Feature/testvarargs.ll             |    12 +
 final/test/Feature/undefined.ll               |    17 +
 final/test/Feature/unreachable.ll             |    15 +
 final/test/Feature/varargs.ll                 |    29 +
 final/test/Feature/varargs_new.ll             |    38 +
 .../Feature/vector-cast-constant-exprs.ll     |    37 +
 final/test/Feature/weak_constant.ll           |    38 +
 final/test/Feature/weirdnames.ll              |     9 +
 final/test/Feature/x86ld.ll                   |    26 +
 .../test/FrontendAda/Support/element_copy.ads |     8 +
 final/test/FrontendAda/Support/fat_fields.ads |     6 +
 .../FrontendAda/Support/global_constant.ads   |     4 +
 final/test/FrontendAda/Support/non_lvalue.ads |    11 +
 final/test/FrontendAda/Support/real_cst.ads   |     4 +
 .../FrontendAda/Support/unc_constructor.ads   |     8 +
 final/test/FrontendAda/Support/var_offset.ads |     9 +
 final/test/FrontendAda/Support/var_size.ads   |     7 +
 final/test/FrontendAda/array_constructor.adb  |     6 +
 final/test/FrontendAda/array_range_ref.adb    |     7 +
 final/test/FrontendAda/array_ref.adb          |    11 +
 final/test/FrontendAda/array_size.adb         |    10 +
 final/test/FrontendAda/asm.adb                |     6 +
 final/test/FrontendAda/constant_fold.ads      |     4 +
 final/test/FrontendAda/debug_var_size.ads     |     8 +
 final/test/FrontendAda/dg.exp                 |     6 +
 final/test/FrontendAda/element_copy.adb       |     8 +
 final/test/FrontendAda/emit_var.ads           |     5 +
 final/test/FrontendAda/fat_fields.adb         |    10 +
 final/test/FrontendAda/field_order.ads        |     7 +
 final/test/FrontendAda/global_constant.adb    |     5 +
 final/test/FrontendAda/init_size.ads          |    12 +
 .../FrontendAda/negative_field_offset.adb     |    16 +
 final/test/FrontendAda/non_bitfield.ads       |    12 +
 final/test/FrontendAda/non_lvalue.adb         |     7 +
 final/test/FrontendAda/placeholder.adb        |    12 +
 final/test/FrontendAda/real_cst.adb           |     8 +
 final/test/FrontendAda/switch.adb             |    12 +
 final/test/FrontendAda/unc_constructor.adb    |     9 +
 final/test/FrontendAda/var_offset.adb         |     7 +
 final/test/FrontendAda/var_size.adb           |     7 +
 final/test/FrontendAda/vce.adb                |     7 +
 final/test/FrontendAda/vce_lv.adb             |     9 +
 .../FrontendC++/2003-11-02-WeakLinkage.cpp    |    13 +
 .../2003-11-18-PtrMemConstantInitializer.cpp  |    14 +
 .../2003-11-25-ReturningOpaqueByValue.cpp     |    12 +
 .../2003-11-27-MultipleInheritanceThunk.cpp   |    28 +
 .../2003-11-29-DuplicatedCleanupTest.cpp      |    41 +
 .../2003-12-08-ArrayOfPtrToMemberFunc.cpp     |    12 +
 .../2004-01-11-DynamicInitializedConstant.cpp |     6 +
 .../2004-03-08-ReinterpretCastCopy.cpp        |    21 +
 .../2004-03-09-UnmangledBuiltinMethods.cpp    |     8 +
 .../2004-03-15-CleanupsAndGotos.cpp           |    14 +
 .../2004-06-08-LateTemplateInstantiation.cpp  |    19 +
 .../FrontendC++/2004-09-27-CompilerCrash.cpp  |    13 +
 .../2004-09-27-DidntEmitTemplate.cpp          |    23 +
 .../2004-11-27-EmitsUnusedInlineFunctions.cpp |     7 +
 .../2004-11-27-ExceptionCleanupAssertion.cpp  |    14 +
 .../2004-11-27-FriendDefaultArgCrash.cpp      |     9 +
 ...04-11-27-InlineAsmFunctionRedefinition.cpp |    26 +
 .../2005-01-03-StaticInitializers.cpp         |     8 +
 .../FrontendC++/2005-02-11-AnonymousUnion.cpp |    32 +
 .../FrontendC++/2005-02-13-BadDynamicInit.cpp |     9 +
 .../FrontendC++/2005-02-14-BitFieldOffset.cpp |    11 +
 .../2005-02-19-BitfieldStructCrash.cpp        |    14 +
 ...2005-02-19-UnnamedVirtualThunkArgument.cpp |    22 +
 .../2005-02-20-BrokenReferenceTest.cpp        |    11 +
 .../2005-02-27-PlacementArrayNewCrash.cpp     |     8 +
 .../2005-07-21-VirtualBaseAccess.cpp          |    14 +
 .../FrontendC++/2006-03-01-GimplifyCrash.cpp  |    14 +
 .../2006-03-06-C++RecurseCrash.cpp            |    24 +
 final/test/FrontendC++/2006-09-08-powi.cpp    |     7 +
 .../2006-09-12-OpaqueStructCrash.cpp          |    28 +
 .../2006-09-27-Debug-Protection.cpp           |    12 +
 .../FrontendC++/2006-10-30-ClassBitfield.cpp  |    16 +
 .../FrontendC++/2006-11-06-StackTrace.cpp     |    38 +
 .../FrontendC++/2006-11-20-GlobalSymbols.cpp  |    10 +
 .../2006-11-30-ConstantExprCrash.cpp          |    27 +
 .../test/FrontendC++/2006-11-30-Pubnames.cpp  |    22 +
 .../FrontendC++/2007-01-02-UnboundedArray.cpp |    14 +
 .../2007-01-06-ELF-Thunk-Sections.cpp         |    49 +
 .../FrontendC++/2007-01-06-PtrMethodInit.cpp  |    75 +
 .../2007-03-27-FunctionVarRename.cpp          |    17 +
 .../2007-04-05-PackedBitFields-1.cpp          |    23 +
 .../2007-04-05-PackedBitFieldsOverlap-2.cpp   |    24 +
 .../2007-04-05-PackedBitFieldsOverlap.cpp     |    24 +
 .../2007-04-05-PackedBitFieldsSmall.cpp       |    27 +
 .../2007-04-05-StructPackedFieldUnpacked.cpp  |    25 +
 .../FrontendC++/2007-04-10-PackedUnion.cpp    |    41 +
 .../2007-04-11-InlineStorageClassC++.cpp      |    44 +
 .../FrontendC++/2007-04-14-FNoBuiltin.cpp     |     7 +
 .../test/FrontendC++/2007-04-31-TryCatch.cpp  |    12 +
 .../FrontendC++/2007-05-03-VectorInit.cpp     |    17 +
 .../2007-05-16-ReverseBitFieldCrash.cpp       |    24 +
 .../FrontendC++/2007-05-23-TryFinally.cpp     |    16 +
 .../FrontendC++/2007-07-04-NestedCatches.cpp  |    32 +
 .../FrontendC++/2007-07-29-RestrictPtrArg.cpp |     6 +
 .../FrontendC++/2007-07-29-RestrictRefArg.cpp |     6 +
 .../FrontendC++/2007-08-01-RestrictMethod.cpp |    13 +
 .../2007-09-10-RecursiveTypeResolution.cpp    |    88 +
 .../FrontendC++/2007-10-01-StructResize.cpp   |    14 +
 .../FrontendC++/2008-01-11-BadWarning.cpp     |     6 +
 final/test/FrontendC++/2008-01-12-VecInit.cpp |     6 +
 .../FrontendC++/2008-05-07-CrazyOffsetOf.cpp  |     8 +
 .../FrontendC++/2008-10-29-WrongOffset.cpp    |   489 +
 .../2009-02-07-VolatileArrayRefHack.cpp       |     7 +
 .../FrontendC++/2009-02-16-CtorNames-dbg.cpp  |    10 +
 final/test/FrontendC++/2009-03-17-dbg.cpp     |    16 +
 .../FrontendC++/2009-04-21-DtorNames-dbg.cpp  |    32 +
 final/test/FrontendC++/2009-04-23-bool2.cpp   |    15 +
 .../2009-05-04-PureConstNounwind.cpp          |     8 +
 .../FrontendC++/2009-06-16-DebugInfoCrash.cpp |    10 +
 .../2009-06-20-DarwinPPCLayout.cpp            |    32 +
 .../FrontendC++/2009-06-30-ByrefBlock.cpp     |    11 +
 .../FrontendC++/2009-07-15-LineNumbers.cpp    |    27 +
 .../2009-07-16-PrivateCopyConstructor.cpp     |    15 +
 final/test/FrontendC++/2009-07-16-Using.cpp   |     8 +
 .../FrontendC++/2009-08-05-ZeroInitWidth.cpp  |    12 +
 .../FrontendC++/2009-08-11-VectorRetTy.cpp    |    13 +
 .../FrontendC++/2009-09-04-modify-crash.cpp   |     7 +
 .../FrontendC++/2009-09-09-packed-layout.cpp  |    18 +
 final/test/FrontendC++/2009-10-27-crash.cpp   |    43 +
 .../FrontendC++/2009-12-23-MissingSext.cpp    |    16 +
 .../2010-02-17-DbgArtificialArg.cpp           |    16 +
 .../2010-03-22-empty-baseclass.cpp            |   134 +
 .../2010-04-30-OptimizedMethod-Dbg.cpp        |    18 +
 .../FrontendC++/2010-05-10-Var-DbgInfo.cpp    |    43 +
 .../2010-05-11-alwaysinlineinstantiation.cpp  |    31 +
 .../2010-05-12-PtrToMember-Dbg.cpp            |    17 +
 .../FrontendC++/2010-06-21-LocalVarDbg.cpp    |    13 +
 .../FrontendC++/2010-06-22-BitfieldInit.cpp   |    20 +
 .../FrontendC++/2010-06-22-ZeroBitfield.cpp   |     5 +
 final/test/FrontendC++/2010-07-19-nowarn.cpp  |    21 +
 final/test/FrontendC++/2010-07-23-DeclLoc.cpp |    86 +
 .../test/FrontendC++/2010-08-31-ByValArg.cpp  |    53 +
 final/test/FrontendC++/alignstack.cpp         |    23 +
 final/test/FrontendC++/dg.exp                 |     5 +
 final/test/FrontendC++/integration-O2.cpp     |    19 +
 final/test/FrontendC++/m64-ptr.cpp            |    19 +
 final/test/FrontendC++/member-alignment.cpp   |    20 +
 .../test/FrontendC++/ptr-to-method-devirt.cpp |    14 +
 final/test/FrontendC++/thunk-linkonce-odr.cpp |    33 +
 final/test/FrontendC++/varargs.cpp            |    19 +
 final/test/FrontendC++/weak-external.cpp      |    17 +
 .../x86-64-abi-sret-vs-2word-struct-param.cpp |    27 +
 .../2002-01-23-LoadQISIReloadFailure.c        |    11 +
 .../FrontendC/2002-01-24-ComplexSpaceInType.c |    11 +
 .../FrontendC/2002-01-24-HandleCallInsnSEGV.c |     9 +
 .../FrontendC/2002-02-13-ConditionalInCall.c  |    11 +
 .../test/FrontendC/2002-02-13-ReloadProblem.c |    18 +
 .../2002-02-13-TypeVarNameCollision.c         |    16 +
 .../test/FrontendC/2002-02-13-UnnamedLocal.c  |    21 +
 .../FrontendC/2002-02-14-EntryNodePreds.c     |    37 +
 .../test/FrontendC/2002-02-16-RenamingTest.c  |    18 +
 .../FrontendC/2002-02-17-ArgumentAddress.c    |    39 +
 .../test/FrontendC/2002-02-18-64bitConstant.c |    10 +
 final/test/FrontendC/2002-02-18-StaticData.c  |    13 +
 .../FrontendC/2002-03-11-LargeCharInString.c  |    10 +
 .../2002-03-12-ArrayInitialization.c          |    19 +
 .../FrontendC/2002-03-12-StructInitialize.c   |    14 +
 .../FrontendC/2002-03-12-StructInitializer.c  |    18 +
 .../test/FrontendC/2002-03-14-BrokenPHINode.c |    19 +
 final/test/FrontendC/2002-03-14-BrokenSSA.c   |    17 +
 .../FrontendC/2002-03-14-QuotesInStrConst.c   |    10 +
 final/test/FrontendC/2002-04-07-SwitchStmt.c  |    22 +
 final/test/FrontendC/2002-04-08-LocalArray.c  |    14 +
 .../test/FrontendC/2002-04-09-StructRetVal.c  |    12 +
 .../FrontendC/2002-04-10-StructParameters.c   |    25 +
 .../test/FrontendC/2002-05-23-StaticValues.c  |    15 +
 .../FrontendC/2002-05-23-TypeNameCollision.c  |    19 +
 final/test/FrontendC/2002-05-24-Alloca.c      |    11 +
 .../2002-06-25-FWriteInterfaceFailure.c       |     7 +
 .../test/FrontendC/2002-07-14-MiscListTests.c |    71 +
 final/test/FrontendC/2002-07-14-MiscTests.c   |    57 +
 final/test/FrontendC/2002-07-14-MiscTests2.c  |    13 +
 final/test/FrontendC/2002-07-14-MiscTests3.c  |   187 +
 .../FrontendC/2002-07-16-HardStringInit.c     |     8 +
 .../FrontendC/2002-07-17-StringConstant.c     |     4 +
 final/test/FrontendC/2002-07-29-Casts.c       |    86 +
 .../FrontendC/2002-07-30-SubregSetAssertion.c |    12 +
 final/test/FrontendC/2002-07-30-UnionTest.c   |    22 +
 .../FrontendC/2002-07-30-VarArgsCallFailure.c |     8 +
 final/test/FrontendC/2002-07-31-BadAssert.c   |    16 +
 .../test/FrontendC/2002-07-31-SubregFailure.c |    14 +
 final/test/FrontendC/2002-08-02-UnionTest.c   |    19 +
 .../FrontendC/2002-08-19-RecursiveLocals.c    |    18 +
 .../test/FrontendC/2002-09-08-PointerShifts.c |     6 +
 .../test/FrontendC/2002-09-18-UnionProblem.c  |    26 +
 final/test/FrontendC/2002-09-19-StarInLabel.c |     9 +
 .../FrontendC/2002-10-12-TooManyArguments.c   |     8 +
 .../FrontendC/2002-12-15-GlobalBoolTest.c     |     5 +
 .../FrontendC/2002-12-15-GlobalConstantTest.c |     8 +
 .../FrontendC/2002-12-15-GlobalRedefinition.c |     5 +
 .../FrontendC/2002-12-15-StructParameters.c   |    18 +
 final/test/FrontendC/2003-01-30-UnionInit.c   |     8 +
 .../test/FrontendC/2003-03-03-DeferredType.c  |    12 +
 final/test/FrontendC/2003-06-22-UnionCrash.c  |    13 +
 .../2003-06-23-GCC-fold-infinite-recursion.c  |     6 +
 final/test/FrontendC/2003-06-26-CFECrash.c    |    19 +
 .../2003-06-29-MultipleFunctionDefinition.c   |     8 +
 .../2003-07-22-ArrayAccessTypeSafety.c        |     7 +
 .../2003-08-06-BuiltinSetjmpLongjmp.c         |    14 +
 .../2003-08-17-DeadCodeShortCircuit.c         |     7 +
 final/test/FrontendC/2003-08-18-SigSetJmp.c   |    10 +
 .../test/FrontendC/2003-08-18-StructAsValue.c |    11 +
 .../FrontendC/2003-08-20-BadBitfieldRef.c     |     8 +
 .../FrontendC/2003-08-20-PrototypeMismatch.c  |    15 +
 final/test/FrontendC/2003-08-20-vfork-bug.c   |     6 +
 .../2003-08-21-BinOp-Type-Mismatch.c          |    10 +
 final/test/FrontendC/2003-08-21-StmtExpr.c    |    12 +
 final/test/FrontendC/2003-08-21-WideString.c  |     7 +
 .../FrontendC/2003-08-23-LocalUnionTest.c     |    11 +
 .../FrontendC/2003-08-29-BitFieldStruct.c     |    13 +
 .../test/FrontendC/2003-08-29-HugeCharConst.c |     5 +
 .../FrontendC/2003-08-29-StructLayoutBug.c    |    10 +
 .../2003-08-30-AggregateInitializer.c         |    16 +
 .../2003-08-30-LargeIntegerBitfieldMember.c   |     9 +
 .../test/FrontendC/2003-09-18-BitfieldTests.c |    30 +
 .../test/FrontendC/2003-09-30-StructLayout.c  |    18 +
 .../FrontendC/2003-10-02-UnionLValueError.c   |    13 +
 .../FrontendC/2003-10-06-NegateExprType.c     |     8 +
 .../2003-10-09-UnionInitializerBug.c          |    17 +
 final/test/FrontendC/2003-10-28-ident.c       |     4 +
 final/test/FrontendC/2003-10-29-AsmRename.c   |    22 +
 .../2003-11-01-C99-CompoundLiteral.c          |     8 +
 .../FrontendC/2003-11-01-EmptyStructCrash.c   |     6 +
 .../FrontendC/2003-11-01-GlobalUnionInit.c    |     7 +
 .../FrontendC/2003-11-03-AddrArrayElement.c   |    11 +
 final/test/FrontendC/2003-11-04-EmptyStruct.c |     6 +
 final/test/FrontendC/2003-11-04-OutOfMemory.c |     9 +
 .../2003-11-08-PointerSubNotGetelementptr.c   |     9 +
 final/test/FrontendC/2003-11-12-VoidString.c  |     4 +
 final/test/FrontendC/2003-11-13-TypeSafety.c  |     5 +
 .../FrontendC/2003-11-16-StaticArrayInit.c    |     8 +
 .../FrontendC/2003-11-18-CondExprLValue.c     |     9 +
 .../FrontendC/2003-11-19-AddressOfRegister.c  |    12 +
 .../test/FrontendC/2003-11-19-BitFieldArray.c |    12 +
 final/test/FrontendC/2003-11-20-Bitfields.c   |    12 +
 .../FrontendC/2003-11-20-ComplexDivision.c    |     7 +
 .../test/FrontendC/2003-11-20-UnionBitfield.c |    12 +
 .../test/FrontendC/2003-11-26-PointerShift.c  |     6 +
 .../FrontendC/2003-11-27-ConstructorCast.c    |    14 +
 .../2003-11-27-UnionCtorInitialization.c      |    16 +
 .../2003-12-14-ExternInlineSupport.c          |     3 +
 .../FrontendC/2004-01-01-UnknownInitSize.c    |    14 +
 .../2004-01-08-ExternInlineRedefine.c         |    14 +
 .../FrontendC/2004-02-12-LargeAggregateCopy.c |     8 +
 .../2004-02-13-BuiltinFrameReturnAddress.c    |     8 +
 .../test/FrontendC/2004-02-13-IllegalVararg.c |    13 +
 final/test/FrontendC/2004-02-13-Memset.c      |     9 +
 .../FrontendC/2004-02-14-ZeroInitializer.c    |     4 +
 final/test/FrontendC/2004-02-20-Builtins.c    |     8 +
 .../FrontendC/2004-03-07-ComplexDivEquals.c   |     6 +
 .../FrontendC/2004-03-07-ExternalConstant.c   |     7 +
 .../2004-03-09-LargeArrayInitializers.c       |    32 +
 .../FrontendC/2004-03-15-SimpleIndirectGoto.c |    23 +
 .../FrontendC/2004-03-16-AsmRegisterCrash.c   |    10 +
 final/test/FrontendC/2004-05-07-VarArrays.c   |     5 +
 .../FrontendC/2004-05-21-IncompleteEnum.c     |     5 +
 .../FrontendC/2004-06-08-OpaqueStructArg.c    |     7 +
 .../FrontendC/2004-06-17-UnorderedBuiltins.c  |    24 +
 .../FrontendC/2004-06-17-UnorderedCompares.c  |    21 +
 ...04-06-18-VariableLengthArrayOfStructures.c |    10 +
 .../test/FrontendC/2004-07-06-FunctionCast.c  |    10 +
 .../FrontendC/2004-08-06-LargeStructTest.c    |    19 +
 .../2004-11-25-UnnamedBitfieldPadding.c       |     8 +
 .../2004-11-27-InvalidConstantExpr.c          |    10 +
 .../2004-11-27-StaticFunctionRedeclare.c      |    15 +
 .../2004-11-27-VariableSizeInStructure.c      |    11 +
 .../test/FrontendC/2005-01-02-ConstantInits.c |    24 +
 .../FrontendC/2005-01-02-PointerDifference.c  |     3 +
 .../FrontendC/2005-01-02-VAArgError-ICE.c     |    10 +
 .../FrontendC/2005-02-20-AggregateSAVEEXPR.c  |    19 +
 .../FrontendC/2005-02-27-MarkGlobalConstant.c |    10 +
 .../test/FrontendC/2005-03-05-OffsetOfHack.c  |    12 +
 .../2005-03-06-OffsetOfStructCrash.c          |    14 +
 final/test/FrontendC/2005-03-11-Prefetch.c    |     6 +
 final/test/FrontendC/2005-04-09-ComplexOps.c  |     9 +
 .../test/FrontendC/2005-05-06-CountBuiltins.c |    17 +
 .../FrontendC/2005-05-10-GlobalUnionInit.c    |     6 +
 .../2005-06-15-ExpandGotoInternalProblem.c    |    14 +
 final/test/FrontendC/2005-07-20-SqrtNoErrno.c |    11 +
 .../FrontendC/2005-07-26-UnionInitCrash.c     |     3 +
 .../2005-07-28-IncorrectWeakGlobal.c          |     5 +
 .../FrontendC/2005-09-20-ComplexConstants.c   |     4 +
 .../test/FrontendC/2005-09-24-AsmUserPrefix.c |     8 +
 .../test/FrontendC/2005-09-24-BitFieldCrash.c |    33 +
 .../2005-10-18-VariableSizedElementCrash.c    |     9 +
 .../test/FrontendC/2005-12-04-AttributeUsed.c |     8 +
 .../2005-12-04-DeclarationLineNumbers.c       |    23 +
 final/test/FrontendC/2006-01-13-Includes.c    |     8 +
 final/test/FrontendC/2006-01-13-StackSave.c   |    11 +
 .../2006-01-16-BitCountIntrinsicsUnsigned.c   |     9 +
 .../test/FrontendC/2006-01-23-FileScopeAsm.c  |     8 +
 .../FrontendC/2006-03-03-MissingInitializer.c |    11 +
 final/test/FrontendC/2006-03-16-VectorCtor.c  |    10 +
 final/test/FrontendC/2006-03-17-KnRMismatch.c |     8 +
 .../2006-05-01-AppleAlignmentPragma.c         |    12 +
 .../FrontendC/2006-05-19-SingleEltReturn.c    |    23 +
 final/test/FrontendC/2006-07-31-PR854.c       |    11 +
 .../FrontendC/2006-09-11-BitfieldRefCrash.c   |    12 +
 .../FrontendC/2006-09-18-fwrite-cast-crash.c  |    15 +
 .../2006-09-21-IncompleteElementType.c        |     3 +
 .../test/FrontendC/2006-09-25-DebugFilename.c |     6 +
 .../test/FrontendC/2006-09-25-DebugFilename.h |     6 +
 final/test/FrontendC/2006-09-28-SimpleAsm.c   |    10 +
 final/test/FrontendC/2006-10-30-ArrayCrash.c  |    17 +
 .../test/FrontendC/2006-12-14-ordered_expr.c  |     6 +
 final/test/FrontendC/2007-01-06-KNR-Proto.c   |    10 +
 final/test/FrontendC/2007-01-20-VectorICE.c   |    11 +
 .../FrontendC/2007-01-24-InlineAsmCModifier.c |    10 +
 .../test/FrontendC/2007-02-04-AddrLValue-2.c  |    13 +
 final/test/FrontendC/2007-02-04-AddrLValue.c  |    23 +
 final/test/FrontendC/2007-02-04-EmptyStruct.c |     9 +
 .../FrontendC/2007-02-04-WITH_SIZE_EXPR.c     |    21 +
 final/test/FrontendC/2007-02-05-nested.c      |    54 +
 final/test/FrontendC/2007-02-07-AddrLabel.c   |    10 +
 .../2007-02-16-VariableSizeStructArg.c        |     7 +
 final/test/FrontendC/2007-02-16-VoidPtrDiff.c |     5 +
 .../FrontendC/2007-02-16-WritableStrings.c    |     7 +
 final/test/FrontendC/2007-02-25-C-DotDotDot.c |    12 +
 .../FrontendC/2007-03-01-VarSizeArrayIdx.c    |     7 +
 final/test/FrontendC/2007-03-05-DataLayout.c  |    53 +
 .../FrontendC/2007-03-06-VarSizeInStruct1.c   |     8 +
 .../FrontendC/2007-03-06-VarSizeInStruct2.c   |     8 +
 .../2007-03-26-BitfieldAfterZeroWidth.c       |     6 +
 .../FrontendC/2007-03-26-ZeroWidthBitfield.c  |     2 +
 .../FrontendC/2007-03-27-ArrayCompatible.c    |    10 +
 .../FrontendC/2007-03-27-VarLengthArray.c     |     7 +
 .../FrontendC/2007-04-05-PackedBitFields-2.c  |    16 +
 .../FrontendC/2007-04-05-PackedBitFields.c    |    16 +
 .../test/FrontendC/2007-04-05-PackedStruct.c  |    18 +
 .../2007-04-05-PadBeforeZeroLengthField.c     |     9 +
 .../FrontendC/2007-04-05-UnPackedStruct.c     |    16 +
 .../FrontendC/2007-04-11-InlineAsmStruct.c    |     9 +
 .../FrontendC/2007-04-11-InlineAsmUnion.c     |     7 +
 .../2007-04-11-InlineStorageClassC89.c        |    46 +
 .../2007-04-11-InlineStorageClassC99.c        |    46 +
 final/test/FrontendC/2007-04-11-PR1321.c      |    12 +
 .../FrontendC/2007-04-13-InlineAsmStruct2.c   |     9 +
 .../FrontendC/2007-04-13-InlineAsmUnion2.c    |     7 +
 final/test/FrontendC/2007-04-14-FNoBuiltin.c  |     7 +
 .../FrontendC/2007-04-17-ZeroSizeBitFields.c  |     4 +
 .../FrontendC/2007-04-24-VolatileStructCopy.c |    10 +
 .../test/FrontendC/2007-04-24-bit-not-expr.c  |     7 +
 final/test/FrontendC/2007-04-24-str-const.c   |    17 +
 .../FrontendC/2007-05-07-NestedStructReturn.c |    13 +
 .../FrontendC/2007-05-07-PaddingElements.c    |    12 +
 final/test/FrontendC/2007-05-08-PCH.c         |     7 +
 final/test/FrontendC/2007-05-11-str-const.c   |     5 +
 .../FrontendC/2007-05-15-PaddingElement.c     |    23 +
 final/test/FrontendC/2007-05-16-EmptyStruct.c |     5 +
 final/test/FrontendC/2007-05-29-UnionCopy.c   |    18 +
 .../FrontendC/2007-06-05-NoInlineAttribute.c  |    13 +
 .../FrontendC/2007-06-15-AnnotateAttribute.c  |    24 +
 .../FrontendC/2007-06-18-SextAttrAggregate.c  |    11 +
 .../FrontendC/2007-07-29-RestrictPtrArg.c     |     6 +
 .../FrontendC/2007-08-01-LoadStoreAlign.c     |    17 +
 final/test/FrontendC/2007-08-21-ComplexCst.c  |     3 +
 final/test/FrontendC/2007-08-22-CTTZ.c        |     6 +
 final/test/FrontendC/2007-09-05-ConstCtor.c   |    14 +
 final/test/FrontendC/2007-09-12-PragmaPack.c  |    30 +
 .../test/FrontendC/2007-09-14-NegatePointer.c |     7 +
 final/test/FrontendC/2007-09-17-WeakRef.c     |    10 +
 .../FrontendC/2007-09-20-GcrootAttribute.c    |    29 +
 final/test/FrontendC/2007-09-26-Alignment.c   |     7 +
 .../FrontendC/2007-09-27-ComplexIntCompare.c  |    17 +
 .../FrontendC/2007-09-28-PackedUnionMember.c  |    38 +
 .../test/FrontendC/2007-10-01-BuildArrayRef.c |    20 +
 .../test/FrontendC/2007-10-02-VolatileArray.c |     7 +
 final/test/FrontendC/2007-10-15-VoidPtr.c     |     4 +
 final/test/FrontendC/2007-10-30-Volatile.c    |     6 +
 .../test/FrontendC/2007-11-07-AlignedMemcpy.c |     4 +
 .../FrontendC/2007-11-07-CopyAggregateAlign.c |     3 +
 .../FrontendC/2007-11-07-ZeroAggregateAlign.c |     3 +
 final/test/FrontendC/2007-11-27-SExtZExt.c    |    12 +
 .../FrontendC/2007-11-28-GlobalInitializer.c  |     8 +
 final/test/FrontendC/2007-12-16-AsmNoUnwind.c |     3 +
 final/test/FrontendC/2007-12-VarArrayDebug.c  |    18 +
 .../test/FrontendC/2008-01-04-WideBitfield.c  |    13 +
 .../FrontendC/2008-01-07-UnusualIntSize.c     |    11 +
 .../FrontendC/2008-01-11-ChainConsistency.c   |     3 +
 .../FrontendC/2008-01-21-PackedBitFields.c    |     7 +
 .../FrontendC/2008-01-21-PackedStructField.c  |    18 +
 .../2008-01-24-StructAlignAndBitFields.c      |     4 +
 .../test/FrontendC/2008-01-25-ByValReadNone.c |    15 +
 .../FrontendC/2008-01-25-ZeroSizedAggregate.c |    39 +
 final/test/FrontendC/2008-01-28-PragmaMark.c  |     6 +
 final/test/FrontendC/2008-01-28-UnionSize.c   |    24 +
 .../FrontendC/2008-02-11-AnnotateBuiltin.c    |     7 +
 .../test/FrontendC/2008-03-03-CtorAttrType.c  |     6 +
 final/test/FrontendC/2008-03-05-syncPtr.c     |    27 +
 .../2008-03-24-BitField-And-Alloca.c          |    89 +
 .../FrontendC/2008-03-26-PackedBitFields.c    |     7 +
 .../test/FrontendC/2008-04-08-NoExceptions.c  |     7 +
 final/test/FrontendC/2008-05-06-CFECrash.c    |     4 +
 .../FrontendC/2008-05-12-TempUsedBeforeDef.c  |    10 +
 .../test/FrontendC/2008-05-19-AlwaysInline.c  |    12 +
 .../FrontendC/2008-07-08-FAbsAttributes.c     |     4 +
 final/test/FrontendC/2008-07-29-EHLabel.ll    |   282 +
 .../test/FrontendC/2008-08-07-AlignPadding1.c |    29 +
 .../test/FrontendC/2008-08-07-AlignPadding2.c |    18 +
 final/test/FrontendC/2008-08-07-GEPIntToPtr.c |    14 +
 final/test/FrontendC/2008-09-03-WeakAlias.c   |     9 +
 .../test/FrontendC/2008-10-13-FrontendCrash.c |     9 +
 .../test/FrontendC/2008-10-30-ZeroPlacement.c |     9 +
 final/test/FrontendC/2008-11-02-WeakAlias.c   |     5 +
 .../FrontendC/2008-11-08-InstCombineSelect.c  |    17 +
 .../2008-11-11-AnnotateStructFieldAttribute.c |    18 +
 .../FrontendC/2008-12-23-AsmIntPointerTie.c   |     9 +
 .../test/FrontendC/2009-01-05-BlockInlining.c |    28 +
 final/test/FrontendC/2009-01-20-k8.c          |     4 +
 .../FrontendC/2009-01-21-InvalidIterator.c    |    74 +
 .../2009-02-13-zerosize-union-field-ppc.c     |    14 +
 .../2009-02-13-zerosize-union-field.c         |    14 +
 .../test/FrontendC/2009-02-17-BitField-dbg.c  |    14 +
 .../test/FrontendC/2009-03-01-MallocNoAlias.c |     3 +
 .../FrontendC/2009-03-08-ZeroEltStructCrash.c |    14 +
 .../FrontendC/2009-03-09-WeakDeclarations-1.c |    22 +
 final/test/FrontendC/2009-03-13-dbg.c         |     5 +
 final/test/FrontendC/2009-04-22-UnknownSize.c |     4 +
 .../FrontendC/2009-04-28-UnionArrayCrash.c    |    11 +
 final/test/FrontendC/2009-05-04-EnumInreg.c   |    18 +
 .../test/FrontendC/2009-05-17-AlwaysInline.c  |    17 +
 .../test/FrontendC/2009-06-14-HighlyAligned.c |     8 +
 .../2009-06-18-StaticInitTailPadPack.c        |    26 +
 final/test/FrontendC/2009-07-14-VoidPtr.c     |     6 +
 .../FrontendC/2009-07-15-pad-wchar_t-array.c  |     7 +
 .../test/FrontendC/2009-07-17-VoidParameter.c |     4 +
 .../test/FrontendC/2009-07-22-StructLayout.c  |    34 +
 .../2009-08-11-AsmBlocksComplexJumpTarget.c   |    10 +
 final/test/FrontendC/2009-09-24-SqrtErrno.c   |    12 +
 .../FrontendC/2009-12-07-BitFieldAlignment.c  |    15 +
 final/test/FrontendC/2010-01-05-LinkageName.c |    15 +
 final/test/FrontendC/2010-01-13-MemBarrier.c  |    11 +
 .../FrontendC/2010-01-14-FnType-DebugInfo.c   |     4 +
 .../FrontendC/2010-01-14-StaticVariable.c     |    12 +
 .../test/FrontendC/2010-01-18-Inlined-Debug.c |    12 +
 final/test/FrontendC/2010-02-10-PointerName.c |     7 +
 .../test/FrontendC/2010-02-15-DbgStaticVar.c  |    13 +
 final/test/FrontendC/2010-02-16-DbgVarScope.c |    30 +
 .../FrontendC/2010-02-18-Dbg-VectorType.c     |     9 +
 final/test/FrontendC/2010-03-10-arm-asmreg.c  |    15 +
 final/test/FrontendC/2010-03-5-LexicalScope.c |    10 +
 .../FrontendC/2010-05-14-Optimized-VarType.c  |    23 +
 final/test/FrontendC/2010-05-18-asmsched.c    |    17 +
 final/test/FrontendC/2010-05-18-palignr.c     |    24 +
 .../test/FrontendC/2010-05-26-AsmSideEffect.c |    12 +
 final/test/FrontendC/2010-05-31-palignr.c     |    24 +
 final/test/FrontendC/2010-06-11-SaveExpr.c    |     8 +
 final/test/FrontendC/2010-06-17-asmcrash.c    |    16 +
 final/test/FrontendC/2010-06-28-DbgLocalVar.c |    14 +
 final/test/FrontendC/2010-06-28-nowarn.c      |    21 +
 .../FrontendC/2010-07-08-DeclDebugLineNo.c    |    10 +
 .../2010-07-14-overconservative-align.c       |    14 +
 final/test/FrontendC/2010-07-14-ref-off-end.c |    27 +
 .../FrontendC/2010-07-27-MinNoFoldConst.c     |    18 +
 .../test/FrontendC/2010-08-12-asm-aggr-arg.c  |    16 +
 final/test/FrontendC/2010-11-16-asmblock.c    |    16 +
 .../test/FrontendC/2010-12-01-CommonGlobal.c  |     7 +
 final/test/FrontendC/2011-02-21-DATA-common.c |     5 +
 final/test/FrontendC/Atomics-no64bit.c        |   190 +
 final/test/FrontendC/Atomics.c                |   236 +
 final/test/FrontendC/BasicInstrs.c            |    26 +
 final/test/FrontendC/alignstack.c             |    23 +
 final/test/FrontendC/always-inline.c          |    12 +
 final/test/FrontendC/arrayderef.c             |    17 +
 final/test/FrontendC/asm-reg-var-local.c      |    32 +
 final/test/FrontendC/attribute_constructor.c  |     6 +
 final/test/FrontendC/block-copy.c             |    20 +
 final/test/FrontendC/crash-invalid-array.c    |    17 +
 final/test/FrontendC/cstring-align.c          |    11 +
 final/test/FrontendC/dg.exp                   |     5 +
 final/test/FrontendC/exact-div-expr.c         |     6 +
 final/test/FrontendC/extern-weak.c            |    12 +
 final/test/FrontendC/fp-logical.c             |    15 +
 final/test/FrontendC/func-aligned.c           |     7 +
 final/test/FrontendC/funccall.c               |    17 +
 final/test/FrontendC/hidden-visibility.c      |     3 +
 final/test/FrontendC/implicit-arg.c           |    10 +
 final/test/FrontendC/inline-asm-function.c    |     6 +
 final/test/FrontendC/inline-asm-mrv.c         |    12 +
 final/test/FrontendC/libcalls-d.c             |    14 +
 final/test/FrontendC/libcalls-ld.c            |    17 +
 final/test/FrontendC/libcalls.c               |    14 +
 final/test/FrontendC/misaligned-param.c       |    15 +
 final/test/FrontendC/nested-functions.c       |    18 +
 final/test/FrontendC/pr2394.c                 |     6 +
 final/test/FrontendC/pr3518.c                 |    24 +
 final/test/FrontendC/pr4349.c                 |    38 +
 final/test/FrontendC/pr5406.c                 |    20 +
 final/test/FrontendC/ptr-rotate.c             |     7 +
 final/test/FrontendC/redef-ext-inline.c       |     6 +
 final/test/FrontendC/sret.c                   |    15 +
 final/test/FrontendC/sret2.c                  |     9 +
 final/test/FrontendC/unaligned-memcpy.c       |     5 +
 final/test/FrontendC/union-align.c            |    17 +
 final/test/FrontendC/vla-1.c                  |     9 +
 final/test/FrontendC/vla-2.c                  |    10 +
 final/test/FrontendC/wchar-const.c            |     9 +
 final/test/FrontendC/weak_constant.c          |    12 +
 .../2008-11-03-OptionOverride.f90             |     4 +
 .../2009-02-09-FloorDivExpr.f90               |    32 +
 final/test/FrontendFortran/cpow.f90           |    18 +
 final/test/FrontendFortran/dg.exp             |     6 +
 .../2007-10-03-MetadataPointers.mm            |     7 +
 .../2010-08-02-NonPODObjectValue.mm           |    27 +
 .../FrontendObjC++/2010-08-04-Template.mm     |    10 +
 .../FrontendObjC++/2010-08-06-X.Y-syntax.mm   |    16 +
 final/test/FrontendObjC++/dg.exp              |     5 +
 final/test/FrontendObjC/2007-04-03-ObjcEH.m   |    29 +
 final/test/FrontendObjC/2007-05-02-Strong.m   |    23 +
 final/test/FrontendObjC/2007-09-25-EH.m       |    27 +
 .../FrontendObjC/2007-10-17-SJLJExceptions.m  |    24 +
 .../FrontendObjC/2007-10-18-ProDescriptor.m   |    19 +
 .../FrontendObjC/2007-10-23-GC-WriteBarrier.m |     9 +
 final/test/FrontendObjC/2008-10-3-EhValue.m   |    50 +
 final/test/FrontendObjC/2008-11-12-Metadata.m |    14 +
 .../FrontendObjC/2008-11-24-ConstCFStrings.m  |    11 +
 final/test/FrontendObjC/2008-11-25-Blocks.m   |    17 +
 .../FrontendObjC/2009-01-26-WriteBarrier-2.m  |    14 +
 .../FrontendObjC/2009-02-05-VolatileProp.m    |    11 +
 .../test/FrontendObjC/2009-04-14-AsmSection.m |     9 +
 .../2009-04-27-bitfield-vs-ivar.m             |    44 +
 .../FrontendObjC/2009-04-28-bitfield-vs-vbc.m |   127 +
 final/test/FrontendObjC/2009-08-05-utf16.m    |     5 +
 .../test/FrontendObjC/2009-08-17-DebugInfo.m  |    28 +
 final/test/FrontendObjC/2009-11-30-Objc-ID.m  |    14 +
 .../FrontendObjC/2010-02-01-utf16-with-null.m |     5 +
 .../2010-02-11-fwritable-stringsBug.m         |    17 +
 .../FrontendObjC/2010-02-23-DbgInheritance.m  |     9 +
 .../test/FrontendObjC/2010-03-17-StructRef.m  |    43 +
 .../2010-06-04-UnnamedCFString-dbg.m          |     6 +
 .../2011-03-02-ConstCFStringLiteralAlign.m    |    11 +
 final/test/FrontendObjC/dg.exp                |     5 +
 final/test/Integer/2007-01-19-TruncSext.ll    |    29 +
 final/test/Integer/BitArith.ll                |    25 +
 final/test/Integer/BitBit.ll                  |    24 +
 final/test/Integer/BitCast.ll                 |    26 +
 final/test/Integer/BitIcmp.ll                 |    43 +
 final/test/Integer/BitMem.ll                  |    29 +
 final/test/Integer/BitMisc.ll                 |    24 +
 final/test/Integer/BitPacked.ll               |    21 +
 final/test/Integer/alignment_bt.ll            |    21 +
 final/test/Integer/basictest_bt.ll            |    31 +
 final/test/Integer/cfgstructures_bt.ll        |    56 +
 final/test/Integer/constexpr_bt.ll            |    84 +
 final/test/Integer/constpointer_bt.ll         |    32 +
 final/test/Integer/dg.exp                     |     3 +
 final/test/Integer/fold-fpcast_bt.ll          |    33 +
 final/test/Integer/forwardreftest_bt.ll       |    33 +
 final/test/Integer/globalredefinition_bt.ll   |    18 +
 final/test/Integer/globalvars_bt.ll           |    26 +
 final/test/Integer/indirectcall2_bt.ll        |    24 +
 final/test/Integer/indirectcall_bt.ll         |    52 +
 final/test/Integer/instructions_bt.ll         |    26 +
 final/test/Integer/newcasts_bt.ll             |    28 +
 final/test/Integer/opaquetypes_bt.ll          |    58 +
 final/test/Integer/packed_bt.ll               |    16 +
 final/test/Integer/packed_struct_bt.ll        |    33 +
 final/test/Integer/paramattrs_bt.ll           |    19 +
 final/test/Integer/properties_bt.ll           |     9 +
 final/test/Integer/prototype_bt.ll            |    13 +
 final/test/Integer/recursivetype_bt.ll        |   108 +
 final/test/Integer/simplecalltest_bt.ll       |    28 +
 final/test/Integer/small_bt.ll                |    13 +
 final/test/Integer/testalloca_bt.ll           |    28 +
 final/test/Integer/testarith_bt.ll            |    21 +
 final/test/Integer/testconstants_bt.ll        |    32 +
 final/test/Integer/testicmp_bt.ll             |    23 +
 final/test/Integer/testlogical_bt.ll          |    13 +
 final/test/Integer/testlogical_new_bt.ll      |    15 +
 final/test/Integer/testmemory_bt.ll           |    45 +
 final/test/Integer/testswitch_bt.ll           |    24 +
 final/test/Integer/testvarargs_bt.ll          |    14 +
 final/test/Integer/undefined_bt.ll            |    18 +
 final/test/Integer/unreachable_bt.ll          |    16 +
 final/test/Integer/varargs_bt.ll              |    23 +
 final/test/Integer/varargs_new_bt.ll          |    32 +
 final/test/LLVMC/Alias.td                     |    24 +
 final/test/LLVMC/AppendCmdHook.td             |    29 +
 final/test/LLVMC/C++/dash-x.cpp               |    10 +
 final/test/LLVMC/C++/dg.exp                   |     5 +
 final/test/LLVMC/C++/filelist.cpp             |     3 +
 final/test/LLVMC/C++/hello.cpp                |     9 +
 final/test/LLVMC/C++/just-compile.cpp         |    10 +
 final/test/LLVMC/C++/together.cpp             |    10 +
 final/test/LLVMC/C++/unknown_suffix.unk       |     9 +
 final/test/LLVMC/C/dg.exp                     |     5 +
 final/test/LLVMC/C/emit-llvm-opt.c            |     9 +
 final/test/LLVMC/C/emit-llvm.c                |     8 +
 final/test/LLVMC/C/hello.c                    |    13 +
 final/test/LLVMC/C/include.c                  |    10 +
 final/test/LLVMC/C/opt-test.c                 |    13 +
 final/test/LLVMC/C/sink.c                     |    13 +
 final/test/LLVMC/C/wall.c                     |    13 +
 final/test/LLVMC/EmptyCompilationGraph.td     |     8 +
 final/test/LLVMC/EnvParentheses.td            |    18 +
 final/test/LLVMC/ForwardAs.td                 |    21 +
 final/test/LLVMC/ForwardTransformedValue.td   |    27 +
 final/test/LLVMC/ForwardValue.td              |    24 +
 final/test/LLVMC/HookWithArguments.td         |    20 +
 final/test/LLVMC/HookWithInFile.td            |    16 +
 final/test/LLVMC/Init.td                      |    25 +
 final/test/LLVMC/LanguageMap.td               |    29 +
 final/test/LLVMC/MultiValuedOption.td         |    24 +
 final/test/LLVMC/MultipleCompilationGraphs.td |    10 +
 final/test/LLVMC/MultipleOutputLanguages.td   |    27 +
 final/test/LLVMC/NoActions.td                 |    16 +
 final/test/LLVMC/NoCompilationGraph.td        |     6 +
 final/test/LLVMC/ObjC++/dg.exp                |     5 +
 final/test/LLVMC/ObjC++/hello.mm              |     8 +
 final/test/LLVMC/ObjC/dg.exp                  |     5 +
 final/test/LLVMC/ObjC/hello.m                 |    12 +
 final/test/LLVMC/OneOrMore.td                 |    25 +
 final/test/LLVMC/OptionPreprocessor.td        |    67 +
 final/test/LLVMC/OutputSuffixHook.td          |    24 +
 final/test/LLVMC/TestWarnings.td              |     8 +
 final/test/LLVMC/dg.exp                       |     3 +
 final/test/LLVMC/test_data/false.c            |    10 +
 final/test/LLVMC/test_data/false.cpp          |    16 +
 final/test/LLVMC/test_data/false2.cpp         |     5 +
 final/test/LLVMC/test_data/together.c         |     5 +
 final/test/Linker/2002-07-17-GlobalFail.ll    |     8 +
 final/test/Linker/2002-07-17-LinkTest2.ll     |    10 +
 final/test/Linker/2002-08-20-ConstantExpr.ll  |     9 +
 final/test/Linker/2003-01-30-LinkerRename.ll  |     9 +
 .../Linker/2003-01-30-LinkerTypeRename.ll     |     9 +
 final/test/Linker/2003-04-21-Linkage.ll       |    14 +
 final/test/Linker/2003-04-23-LinkOnceLost.ll  |    10 +
 .../Linker/2003-04-26-NullPtrLinkProblem.ll   |    17 +
 final/test/Linker/2003-05-15-TypeProblem.ll   |    10 +
 final/test/Linker/2003-05-31-LinkerRename.ll  |    17 +
 .../Linker/2003-06-02-TypeResolveProblem.ll   |     7 +
 .../Linker/2003-06-02-TypeResolveProblem2.ll  |     7 +
 .../Linker/2003-08-20-OpaqueTypeResolve.ll    |     8 +
 .../Linker/2003-08-23-GlobalVarLinking.ll     |    10 +
 .../2003-08-23-RecursiveOpaqueTypeResolve.ll  |     9 +
 .../test/Linker/2003-08-24-InheritPtrSize.ll  |     9 +
 .../Linker/2003-08-28-TypeResolvesGlobal.ll   |    12 +
 .../Linker/2003-08-28-TypeResolvesGlobal2.ll  |    18 +
 .../Linker/2003-08-28-TypeResolvesGlobal3.ll  |    15 +
 .../2003-10-21-ConflictingTypesTolerance.ll   |     6 +
 .../test/Linker/2003-10-27-LinkOncePromote.ll |     8 +
 .../test/Linker/2003-11-18-TypeResolution.ll  |    20 +
 .../Linker/2004-02-17-WeakStrongLinkage.ll    |     7 +
 .../test/Linker/2004-05-07-TypeResolution1.ll |    35 +
 .../test/Linker/2004-05-07-TypeResolution2.ll |    15 +
 .../test/Linker/2004-12-03-DisagreeingType.ll |     9 +
 .../Linker/2005-02-12-ConstantGlobals-2.ll    |     8 +
 .../test/Linker/2005-02-12-ConstantGlobals.ll |     8 +
 .../2005-12-06-AppendingZeroLengthArrays.ll   |    10 +
 .../test/Linker/2006-01-19-ConstantPacked.ll  |    13 +
 .../Linker/2006-06-15-GlobalVarAlignment.ll   |     7 +
 .../test/Linker/2008-03-05-AliasReference.ll  |    17 +
 .../test/Linker/2008-03-05-AliasReference2.ll |    11 +
 .../Linker/2008-03-07-DroppedSection_a.ll     |    11 +
 .../Linker/2008-03-07-DroppedSection_b.ll     |    10 +
 .../Linker/2008-06-13-LinkOnceRedefinition.ll |     8 +
 final/test/Linker/2008-06-26-AddressSpace.ll  |     9 +
 final/test/Linker/2008-07-06-AliasFnDecl.ll   |    14 +
 final/test/Linker/2008-07-06-AliasFnDecl2.ll  |    13 +
 final/test/Linker/2008-07-06-AliasWeakDest.ll |    18 +
 .../test/Linker/2008-07-06-AliasWeakDest2.ll  |    18 +
 final/test/Linker/2009-09-03-mdnode.ll        |    30 +
 final/test/Linker/2009-09-03-mdnode2.ll       |    25 +
 final/test/Linker/AppendingLinkage.ll         |    15 +
 final/test/Linker/AppendingLinkage2.ll        |     8 +
 final/test/Linker/ConstantGlobals1.ll         |     9 +
 final/test/Linker/ConstantGlobals2.ll         |     9 +
 final/test/Linker/ConstantGlobals3.ll         |     8 +
 final/test/Linker/LinkOnce.ll                 |     8 +
 final/test/Linker/PR8300.ll                   |    13 +
 final/test/Linker/available_externally_a.ll   |     5 +
 final/test/Linker/available_externally_b.ll   |     4 +
 final/test/Linker/basiclink.ll                |    13 +
 final/test/Linker/dg.exp                      |     3 +
 final/test/Linker/link-archive.ll             |    16 +
 final/test/Linker/link-global-to-func.ll      |    13 +
 final/test/Linker/link-messages.ll            |    11 +
 final/test/Linker/linkmdnode.ll               |    13 +
 final/test/Linker/linkmdnode2.ll              |    22 +
 final/test/Linker/linknamedmdnode.ll          |     6 +
 final/test/Linker/linknamedmdnode2.ll         |     6 +
 final/test/Linker/metadata-a.ll               |    15 +
 final/test/Linker/metadata-b.ll               |     9 +
 .../Linker/partial-type-refinement-link.ll    |    20 +
 final/test/Linker/partial-type-refinement.ll  |    24 +
 final/test/Linker/redefinition.ll             |    10 +
 final/test/Linker/testlink1.ll                |    42 +
 final/test/Linker/testlink2.ll                |    41 +
 final/test/Linker/unnamed-addr1-a.ll          |    27 +
 final/test/Linker/unnamed-addr1-b.ll          |    12 +
 final/test/Linker/weakextern.ll               |    11 +
 final/test/MC/ARM/arm_fixups.s                |     7 +
 final/test/MC/ARM/arm_instructions.s          |   284 +
 final/test/MC/ARM/arm_word_directive.s        |     6 +
 final/test/MC/ARM/bracket-darwin.s            |     5 +
 final/test/MC/ARM/bracket-exprs.s             |    15 +
 final/test/MC/ARM/darwin-ARM-reloc.s          |   171 +
 final/test/MC/ARM/darwin-Thumb-reloc.s        |   139 +
 final/test/MC/ARM/dg.exp                      |     5 +
 final/test/MC/ARM/elf-eflags-eabi.s           |    13 +
 final/test/MC/ARM/elf-movt.s                  |    39 +
 final/test/MC/ARM/elf-reloc-01.ll             |    71 +
 final/test/MC/ARM/elf-reloc-02.ll             |    51 +
 final/test/MC/ARM/elf-reloc-03.ll             |    98 +
 final/test/MC/ARM/full_line_comment.s         |     8 +
 final/test/MC/ARM/hilo-16bit-relocations.s    |    20 +
 final/test/MC/ARM/neon-abs-encoding.s         |    31 +
 final/test/MC/ARM/neon-absdiff-encoding.s     |    82 +
 final/test/MC/ARM/neon-add-encoding.s         |   137 +
 final/test/MC/ARM/neon-bitcount-encoding.s    |    31 +
 final/test/MC/ARM/neon-bitwise-encoding.s     |    47 +
 final/test/MC/ARM/neon-cmp-encoding.s         |   115 +
 final/test/MC/ARM/neon-convert-encoding.s     |    38 +
 final/test/MC/ARM/neon-dup-encoding.s         |    27 +
 final/test/MC/ARM/neon-minmax-encoding.s      |    58 +
 final/test/MC/ARM/neon-mov-encoding.s         |   117 +
 final/test/MC/ARM/neon-mul-accum-encoding.s   |    67 +
 final/test/MC/ARM/neon-mul-encoding.s         |    56 +
 final/test/MC/ARM/neon-neg-encoding.s         |    30 +
 final/test/MC/ARM/neon-pairwise-encoding.s    |    86 +
 final/test/MC/ARM/neon-reciprocal-encoding.s  |    26 +
 final/test/MC/ARM/neon-reverse-encoding.s     |    26 +
 final/test/MC/ARM/neon-satshift-encoding.s    |   150 +
 final/test/MC/ARM/neon-shift-encoding.s       |   174 +
 final/test/MC/ARM/neon-shiftaccum-encoding.s  |    98 +
 final/test/MC/ARM/neon-shuffle-encoding.s     |    46 +
 final/test/MC/ARM/neon-sub-encoding.s         |   108 +
 final/test/MC/ARM/neon-table-encoding.s       |    19 +
 final/test/MC/ARM/neon-vld-encoding.s         |   110 +
 final/test/MC/ARM/neon-vst-encoding.s         |   101 +
 final/test/MC/ARM/neont2-abs-encoding.s       |    33 +
 final/test/MC/ARM/neont2-absdiff-encoding.s   |    86 +
 final/test/MC/ARM/neont2-add-encoding.s       |   138 +
 final/test/MC/ARM/neont2-bitcount-encoding.s  |    34 +
 final/test/MC/ARM/neont2-bitwise-encoding.s   |    49 +
 final/test/MC/ARM/neont2-cmp-encoding.s       |    36 +
 final/test/MC/ARM/neont2-convert-encoding.s   |    40 +
 final/test/MC/ARM/neont2-dup-encoding.s       |    29 +
 final/test/MC/ARM/neont2-minmax-encoding.s    |    60 +
 final/test/MC/ARM/neont2-mov-encoding.s       |   119 +
 final/test/MC/ARM/neont2-mul-accum-encoding.s |    69 +
 final/test/MC/ARM/neont2-mul-encoding.s       |    58 +
 final/test/MC/ARM/neont2-neg-encoding.s       |    32 +
 final/test/MC/ARM/neont2-pairwise-encoding.s  |    89 +
 .../test/MC/ARM/neont2-reciprocal-encoding.s  |    28 +
 final/test/MC/ARM/neont2-reverse-encoding.s   |    26 +
 final/test/MC/ARM/neont2-satshift-encoding.s  |   152 +
 final/test/MC/ARM/neont2-shift-encoding.s     |   162 +
 .../test/MC/ARM/neont2-shiftaccum-encoding.s  |   100 +
 final/test/MC/ARM/neont2-shuffle-encoding.s   |    48 +
 final/test/MC/ARM/neont2-sub-encoding.s       |    46 +
 final/test/MC/ARM/neont2-table-encoding.s     |    21 +
 final/test/MC/ARM/neont2-vld-encoding.s       |   112 +
 final/test/MC/ARM/neont2-vst-encoding.s       |   103 +
 final/test/MC/ARM/prefetch.ll                 |    58 +
 final/test/MC/ARM/reg-list.s                  |     8 +
 final/test/MC/ARM/simple-encoding.ll          |   237 +
 final/test/MC/ARM/simple-fp-encoding.s        |   236 +
 final/test/MC/ARM/thumb.s                     |    70 +
 final/test/MC/ARM/thumb2.s                    |   286 +
 final/test/MC/ARM/thumb2_instructions.s       |    12 +
 final/test/MC/AsmParser/assignment.s          |    11 +
 final/test/MC/AsmParser/conditional_asm.s     |    12 +
 final/test/MC/AsmParser/dash-n.s              |     7 +
 final/test/MC/AsmParser/dg.exp                |     5 +
 final/test/MC/AsmParser/directive_abort.s     |     6 +
 final/test/MC/AsmParser/directive_align.s     |    16 +
 final/test/MC/AsmParser/directive_ascii.s     |    41 +
 final/test/MC/AsmParser/directive_comm.s      |     8 +
 .../MC/AsmParser/directive_darwin_section.s   |     4 +
 final/test/MC/AsmParser/directive_desc.s      |     8 +
 final/test/MC/AsmParser/directive_elf_size.s  |     8 +
 final/test/MC/AsmParser/directive_file.s      |     8 +
 final/test/MC/AsmParser/directive_fill.s      |    17 +
 final/test/MC/AsmParser/directive_include.s   |     9 +
 final/test/MC/AsmParser/directive_lcomm.s     |    10 +
 final/test/MC/AsmParser/directive_line.s      |     5 +
 final/test/MC/AsmParser/directive_loc.s       |     8 +
 final/test/MC/AsmParser/directive_lsym.s      |    13 +
 final/test/MC/AsmParser/directive_org.s       |    11 +
 final/test/MC/AsmParser/directive_set.s       |    12 +
 final/test/MC/AsmParser/directive_space.s     |    11 +
 .../directive_subsections_via_symbols.s       |     6 +
 .../MC/AsmParser/directive_symbol_attrs.s     |     7 +
 final/test/MC/AsmParser/directive_tbss.s      |     7 +
 final/test/MC/AsmParser/directive_tdata.s     |     9 +
 .../MC/AsmParser/directive_thread_init_func.s |     7 +
 final/test/MC/AsmParser/directive_tlv.s       |    13 +
 final/test/MC/AsmParser/directive_values.s    |    58 +
 final/test/MC/AsmParser/directive_zerofill.s  |    14 +
 .../MC/AsmParser/dollars-in-identifiers.s     |     7 +
 final/test/MC/AsmParser/equ.s                 |     9 +
 .../test/MC/AsmParser/expr_symbol_modifiers.s |    14 +
 final/test/MC/AsmParser/exprs-invalid.s       |    13 +
 final/test/MC/AsmParser/exprs.s               |    77 +
 final/test/MC/AsmParser/floating-literals.s   |    35 +
 final/test/MC/AsmParser/hello.s               |    28 +
 final/test/MC/AsmParser/ifdef.s               |    29 +
 final/test/MC/AsmParser/ifndef.s              |    29 +
 final/test/MC/AsmParser/labels.s              |    59 +
 .../MC/AsmParser/macro-def-in-instantiation.s |    13 +
 final/test/MC/AsmParser/macros-parsing.s      |    23 +
 final/test/MC/AsmParser/macros.s              |    39 +
 final/test/MC/AsmParser/rename.s              |    10 +
 final/test/MC/AsmParser/section.s             |   107 +
 final/test/MC/AsmParser/variables-invalid.s   |    17 +
 final/test/MC/AsmParser/variables.s           |    15 +
 final/test/MC/COFF/align-nops.s               |    50 +
 final/test/MC/COFF/basic-coff.s               |   133 +
 final/test/MC/COFF/bss.s                      |    15 +
 final/test/MC/COFF/dg.exp                     |     5 +
 final/test/MC/COFF/module-asm.ll              |    26 +
 final/test/MC/COFF/simple-fixups.s            |    50 +
 final/test/MC/COFF/switch-relocations.ll      |    37 +
 final/test/MC/COFF/symbol-alias.s             |    62 +
 final/test/MC/COFF/symbol-fragment-offset.s   |   187 +
 final/test/MC/COFF/weak.s                     |    51 +
 final/test/MC/Disassembler/ARM/arm-tests.txt  |   141 +
 final/test/MC/Disassembler/ARM/dg.exp         |     6 +
 final/test/MC/Disassembler/ARM/neon-tests.txt |    61 +
 .../test/MC/Disassembler/ARM/thumb-tests.txt  |   123 +
 final/test/MC/Disassembler/MBlaze/dg.exp      |     6 +
 .../MC/Disassembler/MBlaze/mblaze_branch.txt  |   119 +
 .../MC/Disassembler/MBlaze/mblaze_fpu.txt     |    47 +
 .../MC/Disassembler/MBlaze/mblaze_fsl.txt     |   338 +
 .../MC/Disassembler/MBlaze/mblaze_imm.txt     |   121 +
 .../MC/Disassembler/MBlaze/mblaze_memory.txt  |    65 +
 .../Disassembler/MBlaze/mblaze_operands.txt   |   197 +
 .../MC/Disassembler/MBlaze/mblaze_pattern.txt |    14 +
 .../MC/Disassembler/MBlaze/mblaze_shift.txt   |    29 +
 .../MC/Disassembler/MBlaze/mblaze_special.txt |   105 +
 .../MC/Disassembler/MBlaze/mblaze_typea.txt   |    74 +
 .../MC/Disassembler/MBlaze/mblaze_typeb.txt   |    56 +
 final/test/MC/Disassembler/X86/dg.exp         |     6 +
 final/test/MC/Disassembler/X86/enhanced.txt   |     6 +
 .../test/MC/Disassembler/X86/simple-tests.txt |    68 +
 .../MC/Disassembler/X86/truncated-input.txt   |     4 +
 final/test/MC/ELF/abs.s                       |    16 +
 final/test/MC/ELF/alias-reloc.s               |    52 +
 final/test/MC/ELF/alias.s                     |    85 +
 final/test/MC/ELF/align-bss.s                 |    17 +
 final/test/MC/ELF/align-nops.s                |    40 +
 final/test/MC/ELF/align-size.s                |    13 +
 final/test/MC/ELF/align-text.s                |    19 +
 final/test/MC/ELF/align.s                     |    32 +
 final/test/MC/ELF/bad-section.s               |     9 +
 final/test/MC/ELF/basic-elf-32.s              |    78 +
 final/test/MC/ELF/basic-elf-64.s              |    82 +
 final/test/MC/ELF/bracket-exprs.s             |    15 +
 final/test/MC/ELF/bracket.s                   |     8 +
 final/test/MC/ELF/bss.ll                      |     8 +
 final/test/MC/ELF/call-abs.s                  |    24 +
 final/test/MC/ELF/cfi-advance-loc2.s          |    45 +
 final/test/MC/ELF/cfi-def-cfa-offset.s        |    46 +
 final/test/MC/ELF/cfi-def-cfa-register.s      |    41 +
 final/test/MC/ELF/cfi-def-cfa.s               |    42 +
 final/test/MC/ELF/cfi-offset.s                |    42 +
 final/test/MC/ELF/cfi-remember.s              |    45 +
 final/test/MC/ELF/cfi-zero-addr-delta.s       |    48 +
 final/test/MC/ELF/cfi.s                       |   674 +
 final/test/MC/ELF/comdat.s                    |    86 +
 final/test/MC/ELF/common.s                    |    88 +
 final/test/MC/ELF/common2.s                   |    21 +
 final/test/MC/ELF/debug-line.s                |    22 +
 final/test/MC/ELF/debug-loc.s                 |    32 +
 final/test/MC/ELF/dg.exp                      |     5 +
 final/test/MC/ELF/diff.s                      |    15 +
 final/test/MC/ELF/diff2.s                     |    13 +
 final/test/MC/ELF/elf_directive_previous.s    |    13 +
 final/test/MC/ELF/elf_directive_section.s     |    23 +
 final/test/MC/ELF/empty-dwarf-lines.s         |    21 +
 final/test/MC/ELF/empty.s                     |    70 +
 final/test/MC/ELF/entsize.ll                  |    44 +
 final/test/MC/ELF/entsize.s                   |    69 +
 final/test/MC/ELF/file.s                      |    23 +
 final/test/MC/ELF/global-offset.s             |    18 +
 final/test/MC/ELF/got.s                       |    25 +
 final/test/MC/ELF/ident.s                     |    17 +
 final/test/MC/ELF/invalid-symver.s            |     7 +
 final/test/MC/ELF/leb128.s                    |    19 +
 final/test/MC/ELF/local-reloc.s               |    31 +
 final/test/MC/ELF/merge.s                     |    97 +
 final/test/MC/ELF/n_bytes.s                   |    20 +
 final/test/MC/ELF/no-fixup.s                  |    16 +
 final/test/MC/ELF/noexec.s                    |    24 +
 final/test/MC/ELF/norelocation.s              |    18 +
 final/test/MC/ELF/org.s                       |    13 +
 final/test/MC/ELF/pic-diff.s                  |    29 +
 final/test/MC/ELF/plt.s                       |    14 +
 final/test/MC/ELF/pr9292.s                    |    26 +
 final/test/MC/ELF/relax-arith.s               |    75 +
 final/test/MC/ELF/relax-crash.s               |    11 +
 final/test/MC/ELF/relax.s                     |    27 +
 final/test/MC/ELF/relocation-386.s            |   226 +
 final/test/MC/ELF/relocation-pc.s             |    33 +
 final/test/MC/ELF/relocation.s                |   114 +
 final/test/MC/ELF/rename.s                    |    46 +
 final/test/MC/ELF/section-quoting.s           |    10 +
 final/test/MC/ELF/section.s                   |   110 +
 final/test/MC/ELF/set.s                       |    34 +
 final/test/MC/ELF/sleb.s                      |    29 +
 final/test/MC/ELF/symref.s                    |   165 +
 final/test/MC/ELF/tls-i386.s                  |    74 +
 final/test/MC/ELF/tls.s                       |    48 +
 final/test/MC/ELF/type.s                      |    32 +
 final/test/MC/ELF/uleb.s                      |    22 +
 final/test/MC/ELF/undef.s                     |    46 +
 final/test/MC/ELF/undef2.s                    |    10 +
 final/test/MC/ELF/weak-relocation.s           |    15 +
 final/test/MC/ELF/weak.s                      |    30 +
 final/test/MC/ELF/weakref-plt.s               |     8 +
 final/test/MC/ELF/weakref-reloc.s             |    49 +
 final/test/MC/ELF/weakref.s                   |   234 +
 final/test/MC/ELF/zero.s                      |    16 +
 final/test/MC/MBlaze/dg.exp                   |     5 +
 final/test/MC/MBlaze/mblaze_branch.s          |   197 +
 final/test/MC/MBlaze/mblaze_fpu.s             |    77 +
 final/test/MC/MBlaze/mblaze_fsl.s             |   568 +
 final/test/MC/MBlaze/mblaze_imm.s             |   194 +
 final/test/MC/MBlaze/mblaze_memory.s          |   107 +
 final/test/MC/MBlaze/mblaze_operands.s        |   328 +
 final/test/MC/MBlaze/mblaze_pattern.s         |    22 +
 final/test/MC/MBlaze/mblaze_shift.s           |    47 +
 final/test/MC/MBlaze/mblaze_special.s         |   167 +
 final/test/MC/MBlaze/mblaze_typea.s           |   122 +
 final/test/MC/MBlaze/mblaze_typeb.s           |    92 +
 final/test/MC/MachO/absolutize.s              |   213 +
 final/test/MC/MachO/comm-1.s                  |   114 +
 .../test/MC/MachO/darwin-complex-difference.s |   129 +
 .../test/MC/MachO/darwin-x86_64-diff-relocs.s |   329 +
 .../MC/MachO/darwin-x86_64-reloc-offsets.s    |   343 +
 final/test/MC/MachO/darwin-x86_64-reloc.s     |   405 +
 final/test/MC/MachO/data.s                    |    67 +
 final/test/MC/MachO/dg.exp                    |     6 +
 final/test/MC/MachO/diff-with-two-sections.s  |    64 +
 final/test/MC/MachO/direction_labels.s        |    95 +
 final/test/MC/MachO/empty-dwarf-lines.s       |    25 +
 final/test/MC/MachO/indirect-symbols.s        |   188 +
 final/test/MC/MachO/jcc.s                     |   106 +
 final/test/MC/MachO/lcomm-attributes.s        |   136 +
 final/test/MC/MachO/loc.s                     |    25 +
 final/test/MC/MachO/pcrel-to-other-section.s  |   107 +
 final/test/MC/MachO/relax-jumps.s             |    31 +
 final/test/MC/MachO/relax-recompute-align.s   |    37 +
 final/test/MC/MachO/reloc-diff.s              |    55 +
 final/test/MC/MachO/reloc-pcrel-offset.s      |    14 +
 final/test/MC/MachO/reloc-pcrel.s             |    62 +
 final/test/MC/MachO/reloc.s                   |   292 +
 final/test/MC/MachO/section-align-1.s         |    87 +
 final/test/MC/MachO/section-align-2.s         |   137 +
 final/test/MC/MachO/section-flags.s           |    14 +
 final/test/MC/MachO/string-table.s            |   100 +
 final/test/MC/MachO/symbol-diff.s             |   122 +
 final/test/MC/MachO/symbol-flags.s            |   341 +
 final/test/MC/MachO/symbol-indirect.s         |   268 +
 final/test/MC/MachO/symbols-1.s               |   310 +
 final/test/MC/MachO/tbss.s                    |   114 +
 final/test/MC/MachO/tdata.s                   |   106 +
 final/test/MC/MachO/thread_init_func.s        |    63 +
 final/test/MC/MachO/tls.s                     |   270 +
 final/test/MC/MachO/tlv-reloc.s               |   174 +
 final/test/MC/MachO/tlv.s                     |   110 +
 final/test/MC/MachO/values.s                  |   135 +
 final/test/MC/MachO/weakdef.s                 |   141 +
 final/test/MC/MachO/x86_32-optimal_nop.s      |   257 +
 final/test/MC/MachO/x86_32-sections.s         |   536 +
 final/test/MC/MachO/x86_32-symbols.s          |  1041 +
 final/test/MC/MachO/x86_64-sections.s         |   561 +
 final/test/MC/MachO/x86_64-symbols.s          |   998 +
 final/test/MC/MachO/zerofill-1.s              |   121 +
 final/test/MC/MachO/zerofill-2.s              |   103 +
 final/test/MC/MachO/zerofill-3.s              |   141 +
 final/test/MC/MachO/zerofill-4.s              |    35 +
 final/test/MC/MachO/zerofill-5.s              |   109 +
 final/test/MC/MachO/zerofill-sect-align.s     |    15 +
 final/test/MC/X86/3DNow.s                     |    92 +
 final/test/MC/X86/dg.exp                      |     5 +
 final/test/MC/X86/x86-32-avx.s                |  3283 +++
 final/test/MC/X86/x86-32-coverage.s           | 19564 +++++++++++++
 final/test/MC/X86/x86-32-fma3.s               |   674 +
 final/test/MC/X86/x86-32.s                    |   826 +
 final/test/MC/X86/x86-64.s                    |   991 +
 final/test/MC/X86/x86_64-avx-clmul-encoding.s |    42 +
 final/test/MC/X86/x86_64-avx-encoding.s       |  3318 +++
 final/test/MC/X86/x86_64-encoding.s           |   157 +
 final/test/MC/X86/x86_64-fma3-encoding.s      |   674 +
 final/test/MC/X86/x86_64-imm-widths.s         |   105 +
 final/test/MC/X86/x86_directives.s            |     6 +
 final/test/MC/X86/x86_errors.s                |     5 +
 final/test/MC/X86/x86_operands.s              |    58 +
 final/test/Makefile                           |   196 +
 final/test/Makefile.tests                     |    80 +
 .../trivial-object-test.coff-i386             |   Bin 0 -> 346 bytes
 .../trivial-object-test.coff-x86-64           |   Bin 0 -> 347 bytes
 .../trivial-object-test.elf-i386              |   Bin 0 -> 716 bytes
 .../trivial-object-test.elf-x86-64            |   Bin 0 -> 1024 bytes
 .../trivial-object-test.macho-i386            |   Bin 0 -> 552 bytes
 .../trivial-object-test.macho-x86-64          |   Bin 0 -> 552 bytes
 final/test/Object/dg.exp                      |     3 +
 .../test/Object/nm-trivial-object.test-broken |    19 +
 .../Object/objdump-trivial-object.test-broken |    54 +
 final/test/Other/2002-01-31-CallGraph.ll      |    13 +
 .../Other/2002-02-24-InlineBrokePHINodes.ll   |    23 +
 final/test/Other/2002-03-11-ConstPropCrash.ll |    24 +
 .../Other/2003-02-19-LoopInfoNestingBug.ll    |    29 +
 .../2004-08-16-PackedConstantInlineStore.ll   |     8 +
 .../Other/2004-08-16-PackedGlobalConstant.ll  |    11 +
 final/test/Other/2004-08-16-PackedSelect.ll   |    13 +
 final/test/Other/2004-08-16-PackedSimple.ll   |    13 +
 .../Other/2004-08-20-PackedControlFlow.ll     |    22 +
 final/test/Other/2006-02-05-PassManager.ll    |     5 +
 ...007-04-24-eliminate-mostly-empty-blocks.ll |   309 +
 final/test/Other/2007-06-05-PassID.ll         |    11 +
 final/test/Other/2007-06-28-PassManager.ll    |     7 +
 final/test/Other/2007-09-10-PassManager.ll    |    32 +
 final/test/Other/2008-02-14-PassManager.ll    |     5 +
 .../Other/2008-06-04-FieldSizeInPacked.ll     |    14 +
 final/test/Other/2008-10-06-RemoveDeadPass.ll |    11 +
 final/test/Other/2008-10-15-MissingSpace.ll   |     7 +
 final/test/Other/2009-03-31-CallGraph.ll      |    31 +
 .../Other/2009-06-05-no-implicit-float.ll     |     4 +
 .../Other/2009-09-14-function-elements.ll     |     6 +
 final/test/Other/2010-05-06-Printer.ll        |     6 +
 final/test/Other/close-stderr.ll              |    11 +
 final/test/Other/constant-fold-gep.ll         |   459 +
 final/test/Other/dg.exp                       |     3 +
 final/test/Other/extract.ll                   |    27 +
 .../Other/inline-asm-newline-terminator.ll    |     6 +
 .../test/Other/invalid-commandline-option.ll  |     3 +
 final/test/Other/lint.ll                      |   167 +
 final/test/Scripts/README.txt                 |     2 +
 final/test/Scripts/coff-dump.py               |   590 +
 final/test/Scripts/coff-dump.py.bat           |     7 +
 final/test/Scripts/common_dump.py             |    46 +
 final/test/Scripts/elf-dump                   |   231 +
 final/test/Scripts/elf-dump.bat               |     7 +
 final/test/Scripts/ignore                     |    10 +
 final/test/Scripts/macho-dumpx                |   294 +
 final/test/Scripts/macho-dumpx.bat            |     7 +
 final/test/TableGen/2003-08-03-PassCode.td    |     8 +
 final/test/TableGen/2006-09-18-LargeInt.td    |     6 +
 .../TableGen/2010-03-24-PrematureDefaults.td  |    44 +
 final/test/TableGen/AnonDefinitionOnDemand.td |    13 +
 final/test/TableGen/BitsInitOverflow.td       |     5 +
 final/test/TableGen/CStyleComment.td          |    14 +
 final/test/TableGen/Dag.td                    |    71 +
 final/test/TableGen/DefmInherit.td            |    33 +
 final/test/TableGen/DefmInsideMultiClass.td   |    25 +
 final/test/TableGen/FieldAccess.td            |    16 +
 final/test/TableGen/ForwardRef.td             |    16 +
 final/test/TableGen/GeneralList.td            |     9 +
 final/test/TableGen/Include.inc               |     4 +
 final/test/TableGen/Include.td                |     7 +
 final/test/TableGen/IntBitInit.td             |     6 +
 final/test/TableGen/LazyChange.td             |    11 +
 final/test/TableGen/LetInsideMultiClasses.td  |    29 +
 final/test/TableGen/ListArgs.td               |    12 +
 final/test/TableGen/ListArgsSimple.td         |     9 +
 final/test/TableGen/ListConversion.td         |    11 +
 final/test/TableGen/ListManip.td              |    12 +
 final/test/TableGen/ListSlices.td             |    19 +
 final/test/TableGen/MultiClass.td             |    26 +
 final/test/TableGen/MultiClassDefName.td      |    13 +
 final/test/TableGen/MultiClassInherit.td      |    65 +
 final/test/TableGen/Slice.td                  |    88 +
 final/test/TableGen/String.td                 |     6 +
 final/test/TableGen/SuperSubclassSameName.td  |    21 +
 final/test/TableGen/TargetInstrInfo.td        |   148 +
 final/test/TableGen/TargetInstrSpec.td        |    98 +
 final/test/TableGen/TemplateArgRename.td      |    18 +
 final/test/TableGen/Tree.td                   |    19 +
 final/test/TableGen/TreeNames.td              |    18 +
 final/test/TableGen/UnsetBitInit.td           |    11 +
 final/test/TableGen/UnterminatedComment.td    |     6 +
 final/test/TableGen/cast.td                   |    91 +
 final/test/TableGen/defmclass.td              |    50 +
 final/test/TableGen/dg.exp                    |     3 +
 final/test/TableGen/eq.td                     |    14 +
 final/test/TableGen/eqbit.td                  |    11 +
 final/test/TableGen/foreach.td                |    32 +
 final/test/TableGen/if.td                     |    45 +
 final/test/TableGen/ifbit.td                  |    11 +
 final/test/TableGen/lisp.td                   |    22 +
 final/test/TableGen/nested-comment.td         |    12 +
 final/test/TableGen/strconcat.td              |    11 +
 final/test/TableGen/subst.td                  |    30 +
 final/test/TableGen/subst2.td                 |    16 +
 final/test/TableGen/usevalname.td             |    24 +
 final/test/TestRunner.sh                      |     5 +
 .../ADCE/2002-01-31-UseStuckAround.ll         |    13 +
 .../Transforms/ADCE/2002-05-22-PHITest.ll     |    16 +
 .../ADCE/2002-05-23-ZeroArgPHITest.ll         |    32 +
 .../ADCE/2002-05-28-Crash-distilled.ll        |    17 +
 .../test/Transforms/ADCE/2002-05-28-Crash.ll  |    54 +
 .../ADCE/2002-07-17-AssertionFailure.ll       |    13 +
 .../ADCE/2002-07-17-PHIAssertion.ll           |    48 +
 .../Transforms/ADCE/2002-07-29-Segfault.ll    |     9 +
 .../ADCE/2003-01-22-PredecessorProblem.ll     |    25 +
 .../ADCE/2003-04-25-PHIPostDominateProblem.ll |    35 +
 .../Transforms/ADCE/2003-06-11-InvalidCFG.ll  |    28 +
 .../ADCE/2003-06-24-BadSuccessor.ll           |    91 +
 .../ADCE/2003-06-24-BasicFunctionality.ll     |    41 +
 .../ADCE/2003-09-10-UnwindInstFail.ll         |    19 +
 .../ADCE/2003-09-15-InfLoopCrash.ll           |     9 +
 .../2003-11-16-MissingPostDominanceInfo.ll    |    18 +
 .../ADCE/2004-05-04-UnreachableBlock.ll       |    16 +
 .../ADCE/2005-02-17-PHI-Invoke-Crash.ll       |    45 +
 final/test/Transforms/ADCE/basictest.ll       |    19 +
 final/test/Transforms/ADCE/basictest1.ll      |    97 +
 final/test/Transforms/ADCE/basictest2.ll      |    97 +
 final/test/Transforms/ADCE/dce_pure_call.ll   |     8 +
 final/test/Transforms/ADCE/dce_pure_invoke.ll |    15 +
 final/test/Transforms/ADCE/dg.exp             |     3 +
 .../Transforms/ADCE/unreachable-function.ll   |     5 +
 .../2008-02-01-ReturnAttrs.ll                 |    15 +
 .../2008-07-02-array-indexing.ll              |    25 +
 .../ArgumentPromotion/2008-09-07-CGUpdate.ll  |    12 +
 .../2008-09-08-CGUpdateSelfEdge.ll            |    25 +
 .../ArgumentPromotion/aggregate-promote.ll    |    24 +
 .../Transforms/ArgumentPromotion/attrs.ll     |    25 +
 .../Transforms/ArgumentPromotion/basictest.ll |    23 +
 .../Transforms/ArgumentPromotion/byval-2.ll   |    26 +
 .../Transforms/ArgumentPromotion/byval.ll     |    25 +
 .../ArgumentPromotion/callgraph-update.ll     |    23 +
 .../Transforms/ArgumentPromotion/chained.ll   |    17 +
 .../ArgumentPromotion/control-flow.ll         |    19 +
 .../ArgumentPromotion/control-flow2.ll        |    22 +
 .../Transforms/ArgumentPromotion/crash.ll     |    59 +
 .../test/Transforms/ArgumentPromotion/dg.exp  |     3 +
 .../Transforms/ArgumentPromotion/pr3085.ll    |  1944 ++
 .../Transforms/BlockPlacement/basictest.ll    |    15 +
 final/test/Transforms/BlockPlacement/dg.exp   |     3 +
 .../2007-10-19-InlineAsmDirectives.ll         |    18 +
 .../2004-03-13-LoopExtractorCrash.ll          |    75 +
 .../2004-03-14-DominanceProblem.ll            |    33 +
 .../2004-03-14-NoSwitchSupport.ll             |    28 +
 .../CodeExtractor/2004-03-17-MissedLiveIns.ll |    47 +
 .../2004-03-17-OutputMismatch.ll              |    20 +
 .../2004-03-17-UpdatePHIsOutsideRegion.ll     |    23 +
 .../2004-03-18-InvokeHandling.ll              |   194 +
 .../2004-08-12-BlockExtractPHI.ll             |    26 +
 .../CodeExtractor/2004-11-12-InvokeExtract.ll |    15 +
 final/test/Transforms/CodeExtractor/dg.exp    |     3 +
 .../CodeGenPrepare/2008-11-24-RAUW-Self.ll    |   511 +
 final/test/Transforms/CodeGenPrepare/basic.ll |    29 +
 final/test/Transforms/CodeGenPrepare/dg.exp   |     3 +
 .../2002-05-03-DivideByZeroException.ll       |    15 +
 .../ConstProp/2002-05-03-NotOperator.ll       |    19 +
 .../ConstProp/2002-09-03-SetCC-Bools.ll       |    20 +
 .../ConstProp/2003-05-12-DivideError.ll       |    15 +
 .../ConstProp/2005-01-28-SetCCGEP.ll          |    10 +
 .../ConstProp/2006-11-30-vector-cast.ll       |    10 +
 .../ConstProp/2006-12-01-TruncBoolBug.ll      |     7 +
 .../ConstProp/2006-12-01-bool-casts.ll        |    15 +
 .../ConstProp/2007-02-05-BitCast.ll           |     7 +
 .../Transforms/ConstProp/2007-02-23-sdiv.ll   |     5 +
 .../Transforms/ConstProp/2007-11-23-cttz.ll   |     8 +
 .../ConstProp/2008-07-07-VectorCompare.ll     |    28 +
 .../2009-06-20-constexpr-zero-lhs.ll          |    11 +
 .../ConstProp/2009-09-01-GEP-Crash.ll         |    24 +
 final/test/Transforms/ConstProp/basictest.ll  |    42 +
 final/test/Transforms/ConstProp/bitcast.ll    |    10 +
 final/test/Transforms/ConstProp/bswap.ll      |    25 +
 final/test/Transforms/ConstProp/calls.ll      |    56 +
 .../Transforms/ConstProp/constant-expr.ll     |   111 +
 final/test/Transforms/ConstProp/dg.exp        |     3 +
 final/test/Transforms/ConstProp/div-zero.ll   |    12 +
 .../test/Transforms/ConstProp/extractvalue.ll |    68 +
 .../Transforms/ConstProp/float-to-ptr-cast.ll |    15 +
 .../test/Transforms/ConstProp/insertvalue.ll  |    68 +
 final/test/Transforms/ConstProp/loads.ll      |   139 +
 .../test/Transforms/ConstProp/logicaltest.ll  |    35 +
 .../test/Transforms/ConstProp/overflow-ops.ll |   183 +
 final/test/Transforms/ConstProp/phi.ll        |    17 +
 final/test/Transforms/ConstProp/remtest.ll    |    24 +
 .../ConstantMerge/2002-09-23-CPR-Update.ll    |    13 +
 .../2003-10-28-MergeExternalConstants.ll      |     7 +
 .../ConstantMerge/2011-01-15-EitherOrder.ll   |    18 +
 final/test/Transforms/ConstantMerge/dg.exp    |     3 +
 .../Transforms/ConstantMerge/dont-merge.ll    |    44 +
 .../Transforms/ConstantMerge/merge-both.ll    |    26 +
 .../Transforms/ConstantMerge/unnamed-addr.ll  |    40 +
 .../2010-09-02-Trunc.ll                       |    25 +
 .../2010-09-26-MergeConstantRange.ll          |    82 +
 .../CorrelatedValuePropagation/basic.ll       |    82 +
 .../CorrelatedValuePropagation/crash.ll       |    37 +
 .../CorrelatedValuePropagation/dg.exp         |     3 +
 .../CorrelatedValuePropagation/non-null.ll    |   103 +
 .../DeadArgElim/2006-06-27-struct-ret.ll      |    11 +
 .../DeadArgElim/2007-02-07-FuncRename.ll      |    11 +
 .../DeadArgElim/2007-10-18-VarargsReturn.ll   |    12 +
 .../DeadArgElim/2007-12-20-ParamAttrs.ll      |    20 +
 .../2008-01-16-VarargsParamAttrs.ll           |    31 +
 .../DeadArgElim/2008-06-23-DeadAfterLive.ll   |    23 +
 .../DeadArgElim/2009-03-17-MRE-Invoke.ll      |    26 +
 .../DeadArgElim/2010-04-30-DbgInfo.ll         |    68 +
 .../test/Transforms/DeadArgElim/basictest.ll  |    36 +
 final/test/Transforms/DeadArgElim/canon.ll    |    24 +
 .../Transforms/DeadArgElim/dead_vaargs.ll     |    12 +
 .../Transforms/DeadArgElim/deadexternal.ll    |    52 +
 .../test/Transforms/DeadArgElim/deadretval.ll |    18 +
 .../Transforms/DeadArgElim/deadretval2.ll     |    59 +
 final/test/Transforms/DeadArgElim/dg.exp      |     3 +
 .../test/Transforms/DeadArgElim/keepalive.ll  |    30 +
 .../Transforms/DeadArgElim/multdeadretval.ll  |    68 +
 .../2011-03-25-DSEMiscompile.ll               |    23 +
 .../DeadStoreElimination/PartialStore.ll      |    71 +
 .../DeadStoreElimination/const-pointers.ll    |    39 +
 .../Transforms/DeadStoreElimination/crash.ll  |    74 +
 .../Transforms/DeadStoreElimination/dg.exp    |     3 +
 .../Transforms/DeadStoreElimination/free.ll   |    40 +
 .../DeadStoreElimination/lifetime.ll          |    37 +
 .../DeadStoreElimination/memintrinsics.ll     |    47 +
 .../DeadStoreElimination/no-targetdata.ll     |    15 +
 .../Transforms/DeadStoreElimination/simple.ll |   238 +
 final/test/Transforms/EarlyCSE/basic.ll       |   121 +
 final/test/Transforms/EarlyCSE/dg.exp         |     3 +
 .../FunctionAttrs/2008-09-03-Mutual.ll        |    11 +
 .../FunctionAttrs/2008-09-03-ReadNone.ll      |    18 +
 .../FunctionAttrs/2008-09-03-ReadOnly.ll      |     9 +
 .../FunctionAttrs/2008-09-13-VolatileRead.ll  |     9 +
 .../FunctionAttrs/2008-10-04-LocalMemory.ll   |    64 +
 .../FunctionAttrs/2008-12-29-Constant.ll      |     8 +
 .../FunctionAttrs/2008-12-31-NoCapture.ll     |   101 +
 .../FunctionAttrs/2009-01-02-LocalStores.ll   |    14 +
 .../FunctionAttrs/2009-05-06-Malloc.ll        |     7 +
 .../FunctionAttrs/2010-10-30-volatile.ll      |    10 +
 final/test/Transforms/FunctionAttrs/dg.exp    |     3 +
 .../GVN/2007-07-25-DominatedLoop.ll           |    86 +
 .../Transforms/GVN/2007-07-25-InfiniteLoop.ll |    14 +
 final/test/Transforms/GVN/2007-07-25-Loop.ll  |    15 +
 .../Transforms/GVN/2007-07-25-NestedLoop.ll   |    38 +
 .../GVN/2007-07-25-SinglePredecessor.ll       |    29 +
 .../GVN/2007-07-26-InterlockingLoops.ll       |    33 +
 .../Transforms/GVN/2007-07-26-NonRedundant.ll |    16 +
 .../Transforms/GVN/2007-07-26-PhiErasure.ll   |    28 +
 .../Transforms/GVN/2007-07-30-PredIDom.ll     |   274 +
 .../Transforms/GVN/2007-07-31-NoDomInherit.ll |   313 +
 .../Transforms/GVN/2007-07-31-RedundantPhi.ll |    22 +
 .../Transforms/GVN/2008-02-12-UndefLoad.ll    |    20 +
 .../test/Transforms/GVN/2008-02-13-NewPHI.ll  |    22 +
 .../GVN/2008-02-24-NonDominatedMemcpy.ll      |    25 +
 .../Transforms/GVN/2008-02-26-MemCpySize.ll   |    46 +
 .../Transforms/GVN/2008-07-02-Unreachable.ll  |    35 +
 .../Transforms/GVN/2008-12-09-SelfRemove.ll   |    26 +
 .../Transforms/GVN/2008-12-12-RLE-Crash.ll    |    35 +
 .../GVN/2008-12-14-rle-reanalyze.ll           |    18 +
 .../Transforms/GVN/2008-12-15-CacheVisited.ll |    28 +
 .../GVN/2009-01-21-SortInvalidation.ll        |    55 +
 .../GVN/2009-01-22-SortInvalidation.ll        |   100 +
 .../Transforms/GVN/2009-02-17-LoadPRECrash.ll |   193 +
 .../Transforms/GVN/2009-03-10-PREOnVoid.ll    |    82 +
 .../Transforms/GVN/2009-06-17-InvalidPRE.ll   |    72 +
 .../GVN/2009-07-13-MemDepSortFail.ll          |    67 +
 .../GVN/2009-11-12-MemDepMallocBitCast.ll     |    15 +
 .../GVN/2010-03-31-RedundantPHIs.ll           |    42 +
 .../test/Transforms/GVN/2010-05-08-OneBit.ll  |    63 +
 .../Transforms/GVN/2010-11-13-Simplify.ll     |    15 +
 final/test/Transforms/GVN/basic.ll            |    10 +
 final/test/Transforms/GVN/bitcast-of-call.ll  |    12 +
 final/test/Transforms/GVN/calls-nonlocal.ll   |    49 +
 final/test/Transforms/GVN/calls-readonly.ll   |    29 +
 final/test/Transforms/GVN/condprop.ll         |    55 +
 final/test/Transforms/GVN/crash-no-aa.ll      |    16 +
 final/test/Transforms/GVN/crash.ll            |   153 +
 final/test/Transforms/GVN/dg.exp              |     3 +
 final/test/Transforms/GVN/invariant-simple.ll |    36 +
 final/test/Transforms/GVN/lifetime-simple.ll  |    20 +
 .../test/Transforms/GVN/load-constant-mem.ll  |    13 +
 final/test/Transforms/GVN/load-pre-align.ll   |    44 +
 final/test/Transforms/GVN/load-pre-licm.ll    |    39 +
 final/test/Transforms/GVN/local-pre.ll        |    18 +
 final/test/Transforms/GVN/lpre-call-wrap-2.ll |    40 +
 final/test/Transforms/GVN/lpre-call-wrap.ll   |    55 +
 final/test/Transforms/GVN/mixed.ll            |    13 +
 final/test/Transforms/GVN/non-local-offset.ll |    59 +
 .../test/Transforms/GVN/nonescaping-malloc.ll |   108 +
 .../Transforms/GVN/null-aliases-nothing.ll    |    20 +
 final/test/Transforms/GVN/phi-translate.ll    |    31 +
 final/test/Transforms/GVN/pre-basic-add.ll    |    27 +
 final/test/Transforms/GVN/pre-load.ll         |   391 +
 final/test/Transforms/GVN/pre-single-pred.ll  |    45 +
 final/test/Transforms/GVN/preserve-tbaa.ll    |    28 +
 final/test/Transforms/GVN/rle-must-alias.ll   |    46 +
 .../Transforms/GVN/rle-no-phi-translate.ll    |    28 +
 final/test/Transforms/GVN/rle-nonlocal.ll     |    24 +
 .../test/Transforms/GVN/rle-phi-translate.ll  |   146 +
 .../test/Transforms/GVN/rle-semidominated.ll  |    19 +
 final/test/Transforms/GVN/rle.ll              |   546 +
 .../GlobalDCE/2002-07-17-CastRef.ll           |    11 +
 .../GlobalDCE/2002-07-17-ConstantRef.ll       |    13 +
 .../GlobalDCE/2002-08-17-FunctionDGE.ll       |    17 +
 .../GlobalDCE/2002-08-17-WorkListTest.ll      |    12 +
 .../GlobalDCE/2002-09-12-Redeletion.ll        |    11 +
 .../GlobalDCE/2003-07-01-SelfReference.ll     |    11 +
 .../2003-10-09-PreserveWeakGlobals.ll         |     6 +
 .../GlobalDCE/2009-01-05-DeadAliases.ll       |     8 +
 .../GlobalDCE/2009-02-17-AliasUsesAliasee.ll  |     4 +
 .../Transforms/GlobalDCE/2009-09-03-MDNode.ll |   264 +
 .../Transforms/GlobalDCE/basicvariabletest.ll |     5 +
 final/test/Transforms/GlobalDCE/dg.exp        |     3 +
 .../GlobalDCE/externally_available.ll         |    10 +
 .../GlobalOpt/2004-10-10-CastStoreOnce.ll     |    17 +
 .../2005-06-15-LocalizeConstExprCrash.ll      |    10 +
 .../Transforms/GlobalOpt/2005-09-27-Crash.ll  |    27 +
 .../GlobalOpt/2006-07-07-InlineAsmCrash.ll    |   135 +
 .../2006-11-01-ShrinkGlobalPhiCrash.ll        |    33 +
 .../Transforms/GlobalOpt/2007-04-05-Crash.ll  |    34 +
 .../Transforms/GlobalOpt/2007-05-13-Crash.ll  |    74 +
 .../GlobalOpt/2007-06-04-PackedStruct.ll      |    36 +
 .../GlobalOpt/2007-11-09-GEP-GEP-Crash.ll     |    19 +
 .../Transforms/GlobalOpt/2008-01-03-Crash.ll  |    26 +
 .../GlobalOpt/2008-01-13-OutOfRangeSROA.ll    |    16 +
 .../GlobalOpt/2008-01-29-VolatileGlobal.ll    |     9 +
 .../GlobalOpt/2008-02-16-NestAttr.ll          |    57 +
 .../GlobalOpt/2008-04-26-SROA-Global-Align.ll |    32 +
 .../GlobalOpt/2008-07-17-addrspace.ll         |    28 +
 .../GlobalOpt/2008-12-16-HeapSRACrash-2.ll    |    24 +
 .../GlobalOpt/2008-12-16-HeapSRACrash.ll      |    27 +
 .../GlobalOpt/2009-01-13-phi-user.ll          |    35 +
 .../GlobalOpt/2009-02-15-BitcastAlias.ll      |    10 +
 .../GlobalOpt/2009-02-15-ResolveAlias.ll      |    24 +
 .../Transforms/GlobalOpt/2009-03-05-dbg.ll    |    76 +
 .../GlobalOpt/2009-03-06-Anonymous.ll         |    11 +
 .../GlobalOpt/2009-03-07-PromotePtrToBool.ll  |    17 +
 .../GlobalOpt/2009-06-01-RecursivePHI.ll      |   122 +
 .../2009-11-16-BrokenPerformHeapAllocSRoA.ll  |    26 +
 ...2009-11-16-MallocSingleStoreToGlobalVar.ll |    30 +
 .../GlobalOpt/2010-02-25-MallocPromote.ll     |    18 +
 .../GlobalOpt/2010-02-26-MallocSROA.ll        |    27 +
 .../GlobalOpt/2010-10-19-WeakOdr.ll           |    16 +
 .../Transforms/GlobalOpt/alias-resolve.ll     |    19 +
 final/test/Transforms/GlobalOpt/basictest.ll  |     9 +
 .../GlobalOpt/constantexpr-dangle.ll          |    13 +
 .../GlobalOpt/constantfold-initializers.ll    |     8 +
 final/test/Transforms/GlobalOpt/crash.ll      |    66 +
 .../GlobalOpt/ctor-list-opt-constexpr.ll      |    23 +
 .../GlobalOpt/ctor-list-opt-inbounds.ll       |    23 +
 .../Transforms/GlobalOpt/ctor-list-opt.ll     |   100 +
 .../test/Transforms/GlobalOpt/deadglobal-2.ll |    11 +
 final/test/Transforms/GlobalOpt/deadglobal.ll |     9 +
 final/test/Transforms/GlobalOpt/dg.exp        |     3 +
 .../Transforms/GlobalOpt/globalsra-partial.ll |    24 +
 .../GlobalOpt/globalsra-unknown-index.ll      |    41 +
 final/test/Transforms/GlobalOpt/globalsra.ll  |    24 +
 final/test/Transforms/GlobalOpt/heap-sra-1.ll |    38 +
 final/test/Transforms/GlobalOpt/heap-sra-2.ll |    38 +
 final/test/Transforms/GlobalOpt/heap-sra-3.ll |    39 +
 final/test/Transforms/GlobalOpt/heap-sra-4.ll |    39 +
 .../test/Transforms/GlobalOpt/heap-sra-phi.ll |    43 +
 .../test/Transforms/GlobalOpt/integer-bool.ll |    23 +
 final/test/Transforms/GlobalOpt/iterate.ll    |    11 +
 .../Transforms/GlobalOpt/load-store-global.ll |    15 +
 .../Transforms/GlobalOpt/malloc-promote-1.ll  |    24 +
 .../Transforms/GlobalOpt/malloc-promote-2.ll  |    19 +
 final/test/Transforms/GlobalOpt/memcpy.ll     |    15 +
 final/test/Transforms/GlobalOpt/memset.ll     |    21 +
 final/test/Transforms/GlobalOpt/metadata.ll   |    26 +
 final/test/Transforms/GlobalOpt/phi-select.ll |    31 +
 .../GlobalOpt/storepointer-compare.ll         |    30 +
 .../test/Transforms/GlobalOpt/storepointer.ll |    19 +
 .../test/Transforms/GlobalOpt/trivialstore.ll |    19 +
 final/test/Transforms/GlobalOpt/undef-init.ll |    17 +
 .../test/Transforms/GlobalOpt/unnamed-addr.ll |    54 +
 .../IPConstantProp/2008-06-09-WeakProp.ll     |    15 +
 .../IPConstantProp/2009-09-24-byval-ptr.ll    |    40 +
 .../IPConstantProp/dangling-block-address.ll  |    42 +
 .../test/Transforms/IPConstantProp/deadarg.ll |     6 +
 final/test/Transforms/IPConstantProp/dg.exp   |     3 +
 .../Transforms/IPConstantProp/recursion.ll    |    12 +
 .../IPConstantProp/return-argument.ll         |    49 +
 .../IPConstantProp/return-constant.ll         |    26 +
 .../IPConstantProp/return-constants.ll        |    41 +
 .../IPConstantProp/user-with-multiple-uses.ll |    30 +
 .../2002-09-09-PointerIndVar.ll               |    17 +
 .../IndVarSimplify/2003-04-16-ExprAnalysis.ll |    17 +
 .../IndVarSimplify/2003-09-12-MultiplePred.ll |    15 +
 .../IndVarSimplify/2003-09-23-NotAtTop.ll     |    20 +
 .../2003-12-10-RemoveInstrCrash.ll            |    18 +
 .../IndVarSimplify/2003-12-15-Crash.ll        |    24 +
 .../IndVarSimplify/2003-12-21-IndVarSize.ll   |    15 +
 .../2004-03-10-PHIInsertionBug.ll             |    26 +
 .../2004-04-05-InvokeCastCrash.ll             |   283 +
 .../2004-04-07-ScalarEvolutionCrash.ll        |    27 +
 .../IndVarSimplify/2005-02-11-InvokeCrash.ll  |    23 +
 .../2005-02-17-TruncateExprCrash.ll           |    61 +
 .../2005-02-26-ExitValueCompute.ll            |    20 +
 .../2005-06-15-InstMoveCrash.ll               |    37 +
 .../IndVarSimplify/2005-11-18-Crash.ll        |    17 +
 .../2006-03-31-NegativeStride.ll              |    22 +
 .../2006-06-16-Indvar-LCSSA-Crash.ll          |    22 +
 .../IndVarSimplify/2006-09-20-LFTR-Crash.ll   |    44 +
 .../IndVarSimplify/2006-12-10-BitCast.ll      |    33 +
 .../IndVarSimplify/2007-01-06-TripCount.ll    |    38 +
 .../2007-06-06-DeleteDanglesPtr.ll            |   117 +
 .../IndVarSimplify/2007-11-23-BitcastCrash.ll |    20 +
 .../2008-06-15-SCEVExpanderBug.ll             |    17 +
 .../IndVarSimplify/2008-09-02-IVType.ll       |    58 +
 .../2008-10-03-CouldNotCompute.ll             |    32 +
 .../2008-11-25-APFloatAssert.ll               |    11 +
 .../2009-04-14-shorten_iv_vars.ll             |   114 +
 .../2009-04-15-shorten-iv-vars-2.ll           |   160 +
 .../IndVarSimplify/2009-04-22-IndvarCrash.ll  |    35 +
 .../IndVarSimplify/2009-04-27-Floating.ll     |    18 +
 .../IndVarSimplify/2009-05-24-useafterfree.ll |    41 +
 .../Transforms/IndVarSimplify/ada-loops.ll    |    90 +
 .../Transforms/IndVarSimplify/addrec-gep.ll   |    78 +
 .../IndVarSimplify/ashr-tripcount.ll          |   107 +
 .../Transforms/IndVarSimplify/avoid-i0.ll     |   126 +
 .../IndVarSimplify/casted-argument.ll         |    50 +
 .../Transforms/IndVarSimplify/complex-scev.ll |    29 +
 final/test/Transforms/IndVarSimplify/crash.ll |    55 +
 .../Transforms/IndVarSimplify/dangling-use.ll |    41 +
 final/test/Transforms/IndVarSimplify/dg.exp   |     3 +
 .../IndVarSimplify/divide-pointer.ll          |    95 +
 .../IndVarSimplify/eliminate-comparison.ll    |   108 +
 .../IndVarSimplify/eliminate-max.ll           |    52 +
 .../IndVarSimplify/eliminate-rem.ll           |   121 +
 .../IndVarSimplify/exit_value_tests.ll        |   114 +
 .../IndVarSimplify/floating-point-iv.ll       |    92 +
 .../IndVarSimplify/gep-with-mul-base.ll       |    59 +
 .../Transforms/IndVarSimplify/indirectbr.ll   |    39 +
 .../IndVarSimplify/interesting-invoke-use.ll  |    57 +
 .../iterationCount_zext_or_trunc.ll           |    25 +
 .../test/Transforms/IndVarSimplify/iv-sext.ll |   143 +
 .../test/Transforms/IndVarSimplify/iv-zext.ll |    33 +
 .../IndVarSimplify/lftr-other-uses.ll         |    36 +
 .../Transforms/IndVarSimplify/lftr-promote.ll |    38 +
 .../Transforms/IndVarSimplify/lftr_simple.ll  |    22 +
 .../IndVarSimplify/loop_evaluate10.ll         |    47 +
 .../IndVarSimplify/loop_evaluate11.ll         |    36 +
 .../IndVarSimplify/loop_evaluate7.ll          |    61 +
 .../IndVarSimplify/loop_evaluate8.ll          |    63 +
 .../IndVarSimplify/loop_evaluate9.ll          |    78 +
 .../IndVarSimplify/loop_evaluate_1.ll         |    21 +
 .../IndVarSimplify/loop_evaluate_2.ll         |    28 +
 .../IndVarSimplify/loop_evaluate_3.ll         |    19 +
 .../IndVarSimplify/loop_evaluate_4.ll         |    20 +
 .../IndVarSimplify/loop_evaluate_5.ll         |    32 +
 .../IndVarSimplify/loop_evaluate_6.ll         |    31 +
 .../Transforms/IndVarSimplify/masked-iv.ll    |    26 +
 .../Transforms/IndVarSimplify/max-pointer.ll  |    39 +
 .../phi-uses-value-multiple-times.ll          |    33 +
 .../IndVarSimplify/pointer-indvars.ll         |    15 +
 .../test/Transforms/IndVarSimplify/pointer.ll |    38 +
 .../IndVarSimplify/polynomial-expand.ll       |    38 +
 .../preserve-gep-loop-variant.ll              |    42 +
 .../IndVarSimplify/preserve-gep-nested.ll     |    75 +
 .../IndVarSimplify/preserve-gep-remainder.ll  |    20 +
 .../Transforms/IndVarSimplify/preserve-gep.ll |    39 +
 .../IndVarSimplify/preserve-signed-wrap.ll    |    38 +
 .../promote-iv-to-eliminate-casts.ll          |    99 +
 .../IndVarSimplify/shrunk-constant.ll         |    15 +
 .../IndVarSimplify/signed-trip-count.ll       |    30 +
 .../IndVarSimplify/single-element-range.ll    |    27 +
 .../Transforms/IndVarSimplify/sink-alloca.ll  |    31 +
 .../IndVarSimplify/sink-trapping.ll           |    19 +
 .../Transforms/IndVarSimplify/subtract.ll     |    15 +
 .../IndVarSimplify/tripcount_compute.ll       |   162 +
 .../IndVarSimplify/tripcount_infinite.ll      |    38 +
 final/test/Transforms/IndVarSimplify/udiv.ll  |   162 +
 .../test/Transforms/IndVarSimplify/uglygep.ll |    40 +
 .../IndVarSimplify/variable-stride-ivs-0.ll   |    43 +
 .../IndVarSimplify/variable-stride-ivs-1.ll   |    43 +
 .../Inline/2003-09-14-InlineValue.ll          |    21 +
 .../Inline/2003-09-22-PHINodeInlineFail.ll    |    16 +
 .../2003-09-22-PHINodesInExceptionDest.ll     |    25 +
 .../2003-09-22-PHINodesInNormalInvokeDest.ll  |    23 +
 .../2003-10-13-AllocaDominanceProblem.ll      |    19 +
 ...2003-10-26-InlineInvokeExceptionDestPhi.ll |    20 +
 .../Inline/2004-04-15-InlineDeletesCall.ll    |    20 +
 .../Inline/2004-04-20-InlineLinkOnce.ll       |    11 +
 .../2004-10-17-InlineFunctionWithoutReturn.ll |    11 +
 .../Inline/2006-01-14-CallGraphUpdate.ll      |    25 +
 .../Inline/2006-07-12-InlinePruneCGUpdate.ll  |   840 +
 .../Inline/2006-11-09-InlineCGUpdate-2.ll     |   245 +
 .../Inline/2006-11-09-InlineCGUpdate.ll       |   338 +
 .../Transforms/Inline/2007-04-15-InlineEH.ll  |    49 +
 .../Transforms/Inline/2007-06-06-NoInline.ll  |    46 +
 .../Inline/2007-06-25-WeakInline.ll           |    14 +
 .../Inline/2007-12-19-InlineNoUnwind.ll       |    19 +
 .../Transforms/Inline/2008-03-04-StructRet.ll |    28 +
 .../Transforms/Inline/2008-03-07-Inline-2.ll  |    53 +
 .../Transforms/Inline/2008-03-07-Inline.ll    |    57 +
 .../Inline/2008-09-02-AlwaysInline.ll         |    10 +
 .../Transforms/Inline/2008-09-02-NoInline.ll  |    10 +
 .../Inline/2008-10-30-AlwaysInline.ll         |    14 +
 .../Inline/2008-11-04-AlwaysInline.ll         |     7 +
 .../2009-01-08-NoInlineDynamicAlloca.ll       |    36 +
 .../Inline/2009-01-13-RecursiveInlineCrash.ll |   293 +
 .../Inline/2009-05-07-CallUsingSelfCrash.ll   |    20 +
 .../Transforms/Inline/2010-05-12-ValueMap.ll  |    28 +
 .../Inline/2010-05-31-ByvalTailcall.ll        |    24 +
 final/test/Transforms/Inline/PR4909.ll        |    15 +
 final/test/Transforms/Inline/alloca-in-scc.ll |    31 +
 final/test/Transforms/Inline/alloca_test.ll   |    23 +
 .../Inline/always_inline_dyn_alloca.ll        |    15 +
 final/test/Transforms/Inline/array_merge.ll   |    26 +
 final/test/Transforms/Inline/basictest.ll     |    47 +
 final/test/Transforms/Inline/byval.ll         |   106 +
 .../Transforms/Inline/callgraph-update.ll     |    33 +
 final/test/Transforms/Inline/casts.ll         |    19 +
 .../Transforms/Inline/cfg_preserve_test.ll    |    16 +
 final/test/Transforms/Inline/crash.ll         |   119 +
 final/test/Transforms/Inline/crash2.ll        |    29 +
 final/test/Transforms/Inline/delete-call.ll   |    22 +
 .../test/Transforms/Inline/devirtualize-2.ll  |    44 +
 .../test/Transforms/Inline/devirtualize-3.ll  |    79 +
 final/test/Transforms/Inline/devirtualize.ll  |   182 +
 final/test/Transforms/Inline/dg.exp           |     3 +
 .../Transforms/Inline/dynamic_alloca_test.ll  |    35 +
 .../Transforms/Inline/externally_available.ll |    16 +
 .../Transforms/Inline/gvn-inline-iteration.ll |    23 +
 .../Transforms/Inline/inline-invoke-tail.ll   |    35 +
 final/test/Transforms/Inline/inline-tail.ll   |    15 +
 .../test/Transforms/Inline/inline_cleanup.ll  |    63 +
 .../Transforms/Inline/inline_constprop.ll     |    14 +
 final/test/Transforms/Inline/inline_dce.ll    |    25 +
 final/test/Transforms/Inline/inline_prune.ll  |    45 +
 final/test/Transforms/Inline/invoke_test-1.ll |    24 +
 final/test/Transforms/Inline/invoke_test-2.ll |    30 +
 final/test/Transforms/Inline/invoke_test-3.ll |    32 +
 final/test/Transforms/Inline/nested-inline.ll |   111 +
 .../Inline/noinline-recursive-fn.ll           |    73 +
 final/test/Transforms/Inline/noinline.ll      |    18 +
 .../InstCombine/2002-03-11-InstCombineHang.ll |     9 +
 .../InstCombine/2002-05-14-SubFailure.ll      |    10 +
 .../InstCombine/2002-08-02-CastTest.ll        |    11 +
 .../InstCombine/2002-12-05-MissedConstProp.ll |    11 +
 .../InstCombine/2003-05-26-CastMiscompile.ll  |     8 +
 .../InstCombine/2003-05-27-ConstExprCrash.ll  |    10 +
 .../2003-06-05-BranchInvertInfLoop.ll         |    16 +
 .../2003-07-21-ExternalConstant.ll            |    44 +
 .../InstCombine/2003-08-12-AllocaNonNull.ll   |    20 +
 .../2003-09-09-VolatileLoadElim.ll            |     7 +
 .../InstCombine/2003-10-29-CallSiteResolve.ll |    15 +
 .../InstCombine/2003-11-03-VarargsCallBug.ll  |    13 +
 .../2004-01-13-InstCombineInvokePHI.ll        |    28 +
 .../2004-02-23-ShiftShiftOverflow.ll          |    15 +
 .../2004-03-13-InstCombineInfLoop.ll          |    13 +
 ...004-04-04-InstCombineReplaceAllUsesWith.ll |    10 +
 .../InstCombine/2004-05-07-UnsizedCastLoad.ll |    10 +
 .../InstCombine/2004-07-27-ConstantExprMul.ll |     9 +
 .../InstCombine/2004-08-09-RemInfLoop.ll      |     9 +
 .../InstCombine/2004-08-10-BoolSetCC.ll       |     8 +
 .../InstCombine/2004-09-20-BadLoadCombine.ll  |    18 +
 .../InstCombine/2004-09-20-BadLoadCombine2.ll |    25 +
 .../2004-09-28-BadShiftAndSetCC.ll            |     9 +
 .../InstCombine/2004-11-22-Missed-and-fold.ll |    10 +
 ...004-11-27-SetCCForCastLargerAndConstant.ll |   192 +
 .../InstCombine/2004-12-08-RemInfiniteLoop.ll |     7 +
 .../InstCombine/2005-03-04-ShiftOverflow.ll   |     9 +
 .../InstCombine/2005-04-07-UDivSelectCrash.ll |     8 +
 .../InstCombine/2005-06-15-DivSelectCrash.ll  |    10 +
 .../InstCombine/2005-06-15-ShiftSetCCCrash.ll |     9 +
 .../InstCombine/2005-06-16-RangeCrash.ll      |     9 +
 .../2005-06-16-SetCCOrSetCCMiscompile.ll      |    14 +
 .../InstCombine/2005-07-07-DeadPHILoop.ll     |    14 +
 .../2006-02-13-DemandedMiscompile.ll          |    10 +
 .../InstCombine/2006-02-28-Crash.ll           |     8 +
 .../InstCombine/2006-03-30-ExtractElement.ll  |     8 +
 .../2006-04-28-ShiftShiftLongLong.ll          |    13 +
 .../2006-05-04-DemandedBitCrash.ll            |    51 +
 .../InstCombine/2006-09-15-CastToBool.ll      |    14 +
 ...06-10-19-SignedToUnsignedCastAndConst-2.ll |    10 +
 .../Transforms/InstCombine/2006-10-20-mask.ll |    11 +
 .../InstCombine/2006-10-26-VectorReassoc.ll   |     9 +
 .../InstCombine/2006-11-03-Memmove64.ll       |    16 +
 .../InstCombine/2006-11-10-ashr-miscompile.ll |     9 +
 .../2006-12-01-BadFPVectorXform.ll            |     9 +
 .../InstCombine/2006-12-05-fp-to-int-ext.ll   |    12 +
 .../2006-12-08-Phi-ICmp-Op-Fold.ll            |    51 +
 .../InstCombine/2006-12-08-Select-ICmp.ll     |    41 +
 .../InstCombine/2006-12-15-Range-Test.ll      |    31 +
 .../InstCombine/2006-12-23-Select-Cmp-Cmp.ll  |    30 +
 .../2007-01-13-ExtCompareMiscompile.ll        |    10 +
 .../InstCombine/2007-01-14-FcmpSelf.ll        |     6 +
 .../InstCombine/2007-01-18-VectorInfLoop.ll   |     7 +
 .../InstCombine/2007-01-27-AndICmp.ll         |     8 +
 .../InstCombine/2007-02-01-LoadSinkAlloca.ll  |    45 +
 .../InstCombine/2007-02-07-PointerCast.ll     |    22 +
 .../InstCombine/2007-02-23-PhiFoldInfLoop.ll  |    31 +
 .../InstCombine/2007-03-13-CompareMerge.ll    |     9 +
 .../2007-03-19-BadTruncChangePR1261.ll        |    10 +
 .../InstCombine/2007-03-21-SignedRangeTest.ll |     7 +
 .../InstCombine/2007-03-25-BadShiftMask.ll    |    29 +
 .../InstCombine/2007-03-25-DoubleShift.ll     |     9 +
 .../InstCombine/2007-03-26-BadShiftMask.ll    |    35 +
 .../2007-04-04-BadFoldBitcastIntoMalloc.ll    |    19 +
 .../2007-04-08-SingleEltVectorCrash.ll        |     7 +
 .../InstCombine/2007-05-04-Crash.ll           |    30 +
 .../InstCombine/2007-05-10-icmp-or.ll         |     8 +
 .../InstCombine/2007-05-14-Crash.ll           |    18 +
 .../InstCombine/2007-05-18-CastFoldBug.ll     |    10 +
 .../InstCombine/2007-06-06-AshrSignBit.ll     |    22 +
 .../2007-06-21-DivCompareMiscomp.ll           |     9 +
 .../InstCombine/2007-08-02-InfiniteLoop.ll    |    10 +
 .../InstCombine/2007-09-10-AliasConstFold.ll  |    13 +
 .../InstCombine/2007-09-11-Trampoline.ll      |    24 +
 .../InstCombine/2007-09-17-AliasConstFold2.ll |    14 +
 .../InstCombine/2007-10-10-EliminateMemCpy.ll |    21 +
 .../InstCombine/2007-10-12-Crash.ll           |    38 +
 .../InstCombine/2007-10-28-stacksave.ll       |    47 +
 .../InstCombine/2007-10-31-RangeCrash.ll      |    35 +
 .../InstCombine/2007-10-31-StringCrash.ll     |    21 +
 .../2007-11-07-OpaqueAlignCrash.ll            |    22 +
 .../InstCombine/2007-11-15-CompareMiscomp.ll  |    10 +
 .../InstCombine/2007-11-22-IcmpCrash.ll       |    16 +
 .../2007-11-25-CompatibleAttributes.ll        |    12 +
 .../2007-12-10-ConstFoldCompare.ll            |     9 +
 .../InstCombine/2007-12-12-GEPScale.ll        |    10 +
 .../InstCombine/2007-12-16-AsmNoUnwind.ll     |     7 +
 .../InstCombine/2007-12-18-AddSelCmpSub.ll    |    29 +
 .../InstCombine/2007-12-28-IcmpSub2.ll        |    89 +
 .../2008-01-06-BitCastAttributes.ll           |    23 +
 .../InstCombine/2008-01-06-CastCrash.ll       |    10 +
 .../InstCombine/2008-01-06-VoidCast.ll        |    10 +
 .../InstCombine/2008-01-13-AndCmpCmp.ll       |     9 +
 .../2008-01-13-NoBitCastAttributes.ll         |    15 +
 .../InstCombine/2008-01-14-DoubleNest.ll      |    24 +
 .../2008-01-14-VarArgTrampoline.ll            |    24 +
 .../2008-01-21-MismatchedCastAndCompare.ll    |    20 +
 .../InstCombine/2008-01-21-MulTrunc.ll        |    17 +
 .../InstCombine/2008-01-27-FloatSelect.ll     |     7 +
 .../InstCombine/2008-01-29-AddICmp.ll         |    20 +
 .../InstCombine/2008-02-13-MulURem.ll         |     8 +
 .../InstCombine/2008-02-16-SDivOverflow.ll    |    14 +
 .../InstCombine/2008-02-16-SDivOverflow2.ll   |     9 +
 .../InstCombine/2008-02-23-MulSub.ll          |     9 +
 .../InstCombine/2008-02-28-OrFCmpCrash.ll     |    16 +
 .../InstCombine/2008-03-13-IntToPtr.ll        |     9 +
 .../InstCombine/2008-04-22-ByValBitcast.ll    |    15 +
 .../InstCombine/2008-04-28-VolatileStore.ll   |     8 +
 .../2008-04-29-VolatileLoadDontMerge.ll       |    25 +
 .../2008-04-29-VolatileLoadMerge.ll           |    21 +
 .../InstCombine/2008-05-08-LiveStoreDelete.ll |    25 +
 .../InstCombine/2008-05-08-StrLenSink.ll      |    32 +
 .../InstCombine/2008-05-09-SinkOfInvoke.ll    |    33 +
 .../InstCombine/2008-05-17-InfLoop.ll         |    23 +
 .../InstCombine/2008-05-18-FoldIntToPtr.ll    |    13 +
 .../InstCombine/2008-05-22-IDivVector.ll      |     6 +
 .../InstCombine/2008-05-22-NegValVector.ll    |     8 +
 .../InstCombine/2008-05-23-CompareFold.ll     |    11 +
 .../InstCombine/2008-05-31-AddBool.ll         |     7 +
 .../InstCombine/2008-05-31-Bools.ll           |    24 +
 .../InstCombine/2008-06-05-ashr-crash.ll      |     7 +
 .../InstCombine/2008-06-08-ICmpPHI.ll         |    47 +
 .../2008-06-13-InfiniteLoopStore.ll           |    20 +
 .../2008-06-13-ReadOnlyCallStore.ll           |    19 +
 .../InstCombine/2008-06-19-UncondLoad.ll      |    16 +
 .../InstCombine/2008-06-21-CompareMiscomp.ll  |    11 +
 .../InstCombine/2008-06-24-StackRestore.ll    |    39 +
 .../InstCombine/2008-07-08-AndICmp.ll         |    10 +
 .../InstCombine/2008-07-08-ShiftOneAndOne.ll  |    10 +
 .../InstCombine/2008-07-08-SubAnd.ll          |     9 +
 .../2008-07-08-VolatileLoadMerge.ll           |    26 +
 .../InstCombine/2008-07-09-SubAndError.ll     |     9 +
 .../InstCombine/2008-07-10-CastSextBool.ll    |    17 +
 .../InstCombine/2008-07-10-ICmpBinOp.ll       |    19 +
 .../InstCombine/2008-07-11-RemAnd.ll          |     9 +
 .../InstCombine/2008-07-13-DivZero.ll         |    16 +
 .../Transforms/InstCombine/2008-07-16-fsub.ll |     8 +
 .../InstCombine/2008-07-16-sse2_storel_dq.ll  |    13 +
 .../Transforms/InstCombine/2008-08-05-And.ll  |    23 +
 .../InstCombine/2008-08-17-ICmpXorSignbit.ll  |    41 +
 .../InstCombine/2008-09-02-VectorCrash.ll     |    27 +
 .../InstCombine/2008-09-29-FoldingOr.ll       |    10 +
 .../InstCombine/2008-10-11-DivCompareFold.ll  |     8 +
 .../2008-10-23-ConstFoldWithoutMask.ll        |     8 +
 .../2008-11-01-SRemDemandedBits.ll            |     8 +
 .../Transforms/InstCombine/2008-11-08-FCmp.ll |    47 +
 .../InstCombine/2008-11-20-DivMulRem.ll       |    67 +
 .../InstCombine/2008-11-27-IDivVector.ll      |    11 +
 .../InstCombine/2008-11-27-MultiplyIntVec.ll  |    11 +
 .../InstCombine/2008-11-27-UDivNegative.ll    |     6 +
 .../InstCombine/2008-12-17-SRemNegConstVec.ll |     7 +
 .../InstCombine/2009-01-05-i128-crash.ll      |    27 +
 .../InstCombine/2009-01-08-AlignAlloca.ll     |    28 +
 .../2009-01-16-PointerAddrSpace.ll            |    11 +
 ...2009-01-19-fmod-constant-float-specials.ll |   315 +
 .../2009-01-19-fmod-constant-float.ll         |    75 +
 .../InstCombine/2009-01-24-EmptyStruct.ll     |    18 +
 .../InstCombine/2009-01-31-InfIterate.ll      |    22 +
 .../InstCombine/2009-01-31-Pressure.ll        |    22 +
 .../InstCombine/2009-02-04-FPBitcast.ll       |    12 +
 .../2009-02-20-InstCombine-SROA.ll            |   278 +
 .../InstCombine/2009-02-21-LoadCST.ll         |    12 +
 .../2009-02-25-CrashZeroSizeArray.ll          |    38 +
 .../2009-03-18-vector-ashr-crash.ll           |    11 +
 .../InstCombine/2009-03-20-AShrOverShift.ll   |     9 +
 .../InstCombine/2009-03-24-InfLoop.ll         |     9 +
 .../InstCombine/2009-04-07-MulPromoteToI96.ll |    13 +
 .../InstCombine/2009-05-23-FCmpToICmp.ll      |     9 +
 .../InstCombine/2009-06-11-StoreAddrSpace.ll  |     7 +
 .../2009-06-16-SRemDemandedBits.ll            |     9 +
 .../InstCombine/2009-07-02-MaskedIntVector.ll |    15 +
 .../InstCombine/2009-12-17-CmpSelectNull.ll   |    16 +
 .../InstCombine/2010-01-28-NegativeSRem.ll    |    19 +
 .../InstCombine/2010-03-03-ExtElim.ll         |    18 +
 .../InstCombine/2010-11-01-lshr-mask.ll       |    46 +
 .../InstCombine/2010-11-21-SizeZeroTypeGEP.ll |    17 +
 .../InstCombine/2010-11-23-Distributed.ll     |    23 +
 .../InstCombine/2011-02-14-InfLoop.ll         |    19 +
 .../2011-02-16-InsertelementHang.ll           |    11 +
 .../Transforms/InstCombine/CPP_min_max.ll     |    34 +
 .../test/Transforms/InstCombine/IntPtrCast.ll |    10 +
 .../Transforms/InstCombine/JavaCompare.ll     |    14 +
 final/test/Transforms/InstCombine/README.txt  |     4 +
 .../test/Transforms/InstCombine/add-shrink.ll |    14 +
 .../test/Transforms/InstCombine/add-sitofp.ll |     9 +
 final/test/Transforms/InstCombine/add.ll      |   301 +
 final/test/Transforms/InstCombine/add2.ll     |    43 +
 final/test/Transforms/InstCombine/add3.ll     |    21 +
 .../test/Transforms/InstCombine/addnegneg.ll  |    12 +
 .../InstCombine/adjust-for-sminmax.ll         |    85 +
 .../Transforms/InstCombine/align-2d-gep.ll    |    44 +
 .../test/Transforms/InstCombine/align-addr.ll |    60 +
 .../Transforms/InstCombine/align-external.ll  |    22 +
 final/test/Transforms/InstCombine/alloca.ll   |    46 +
 .../Transforms/InstCombine/and-compare.ll     |    11 +
 final/test/Transforms/InstCombine/and-fcmp.ll |    34 +
 .../test/Transforms/InstCombine/and-not-or.ll |    34 +
 .../test/Transforms/InstCombine/and-or-and.ll |    61 +
 .../test/Transforms/InstCombine/and-or-not.ll |    46 +
 final/test/Transforms/InstCombine/and-or.ll   |    39 +
 .../Transforms/InstCombine/and-xor-merge.ll   |    19 +
 final/test/Transforms/InstCombine/and.ll      |   255 +
 final/test/Transforms/InstCombine/and2.ll     |    37 +
 .../test/Transforms/InstCombine/apint-add1.ll |    34 +
 .../test/Transforms/InstCombine/apint-add2.ll |    46 +
 .../InstCombine/apint-and-compare.ll          |    16 +
 .../InstCombine/apint-and-or-and.ll           |    50 +
 .../InstCombine/apint-and-xor-merge.ll        |    22 +
 .../test/Transforms/InstCombine/apint-and1.ll |    57 +
 .../test/Transforms/InstCombine/apint-and2.ll |    82 +
 .../InstCombine/apint-call-cast-target.ll     |    17 +
 .../InstCombine/apint-cast-and-cast.ll        |    15 +
 .../InstCombine/apint-cast-cast-to-and.ll     |     8 +
 .../test/Transforms/InstCombine/apint-cast.ll |    30 +
 .../test/Transforms/InstCombine/apint-div1.ll |    22 +
 .../test/Transforms/InstCombine/apint-div2.ll |    22 +
 .../test/Transforms/InstCombine/apint-mul1.ll |    11 +
 .../test/Transforms/InstCombine/apint-mul2.ll |    12 +
 .../test/Transforms/InstCombine/apint-not.ll  |    42 +
 .../test/Transforms/InstCombine/apint-or1.ll  |    36 +
 .../test/Transforms/InstCombine/apint-or2.ll  |    35 +
 .../test/Transforms/InstCombine/apint-rem1.ll |    22 +
 .../test/Transforms/InstCombine/apint-rem2.ll |    22 +
 .../Transforms/InstCombine/apint-select.ll    |    44 +
 .../InstCombine/apint-shift-simplify.ll       |    23 +
 .../Transforms/InstCombine/apint-shift.ll     |   184 +
 .../Transforms/InstCombine/apint-shl-trunc.ll |    14 +
 .../test/Transforms/InstCombine/apint-sub.ll  |   141 +
 .../test/Transforms/InstCombine/apint-xor1.ll |    50 +
 .../test/Transforms/InstCombine/apint-xor2.ll |    51 +
 .../Transforms/InstCombine/apint-zext1.ll     |    11 +
 .../Transforms/InstCombine/apint-zext2.ll     |    11 +
 final/test/Transforms/InstCombine/ashr-nop.ll |     8 +
 .../test/Transforms/InstCombine/badmalloc.ll  |    20 +
 .../test/Transforms/InstCombine/binop-cast.ll |     9 +
 .../test/Transforms/InstCombine/bit-checks.ll |   372 +
 .../Transforms/InstCombine/bit-tracking.ll    |    26 +
 .../InstCombine/bitcast-sext-vector.ll        |    11 +
 .../Transforms/InstCombine/bitcast-store.ll   |    21 +
 .../InstCombine/bitcast-vec-canon.ll          |    22 +
 .../InstCombine/bitcast-vec-uniform.ll        |    70 +
 .../InstCombine/bitcast-vector-fold.ll        |    33 +
 final/test/Transforms/InstCombine/bitcast.ll  |   105 +
 final/test/Transforms/InstCombine/bitcount.ll |    19 +
 final/test/Transforms/InstCombine/bittest.ll  |    30 +
 .../test/Transforms/InstCombine/bswap-fold.ll |    75 +
 final/test/Transforms/InstCombine/bswap.ll    |    74 +
 .../InstCombine/call-cast-target.ll           |    14 +
 .../Transforms/InstCombine/call-intrinsics.ll |    19 +
 final/test/Transforms/InstCombine/call.ll     |   132 +
 final/test/Transforms/InstCombine/call2.ll    |    27 +
 .../InstCombine/canonicalize_branch.ll        |    44 +
 .../Transforms/InstCombine/cast-mul-select.ll |    41 +
 final/test/Transforms/InstCombine/cast-set.ll |    65 +
 final/test/Transforms/InstCombine/cast.ll     |   651 +
 final/test/Transforms/InstCombine/cast_ptr.ll |    79 +
 .../Transforms/InstCombine/compare-signs.ll   |    58 +
 .../InstCombine/constant-fold-compare.ll      |     8 +
 .../InstCombine/constant-fold-gep.ll          |    74 +
 final/test/Transforms/InstCombine/crash.ll    |   372 +
 .../Transforms/InstCombine/dce-iterate.ll     |    24 +
 final/test/Transforms/InstCombine/deadcode.ll |    24 +
 final/test/Transforms/InstCombine/dg.exp      |     3 +
 final/test/Transforms/InstCombine/div.ll      |    84 +
 .../InstCombine/enforce-known-alignment.ll    |    18 +
 final/test/Transforms/InstCombine/exact.ll    |   154 +
 .../Transforms/InstCombine/extractvalue.ll    |   107 +
 .../Transforms/InstCombine/fcmp-select.ll     |    53 +
 .../Transforms/InstCombine/fcmp-special.ll    |   155 +
 .../InstCombine/fold-bin-operand.ll           |    14 +
 .../test/Transforms/InstCombine/fold-calls.ll |    19 +
 .../InstCombine/fold-vector-select.ll         |    13 +
 .../InstCombine/fold-vector-zero.ll           |    35 +
 .../Transforms/InstCombine/fp-ret-bitcast.ll  |    28 +
 final/test/Transforms/InstCombine/fpcast.ll   |    15 +
 final/test/Transforms/InstCombine/fpextend.ll |    36 +
 final/test/Transforms/InstCombine/fsub.ll     |    23 +
 final/test/Transforms/InstCombine/gepgep.ll   |    13 +
 .../Transforms/InstCombine/getelementptr.ll   |   479 +
 .../Transforms/InstCombine/hoist_instr.ll     |    18 +
 final/test/Transforms/InstCombine/icmp.ll     |   496 +
 final/test/Transforms/InstCombine/idioms.ll   |    32 +
 .../test/Transforms/InstCombine/intrinsics.ll |   190 +
 .../test/Transforms/InstCombine/invariant.ll  |    16 +
 .../Transforms/InstCombine/known_align.ll     |    27 +
 final/test/Transforms/InstCombine/load-cmp.ll |   112 +
 .../Transforms/InstCombine/load-select.ll     |    16 +
 final/test/Transforms/InstCombine/load.ll     |    98 +
 final/test/Transforms/InstCombine/load3.ll    |    27 +
 .../InstCombine/loadstore-alignment.ll        |    67 +
 .../Transforms/InstCombine/logical-select.ll  |    68 +
 final/test/Transforms/InstCombine/lshr-phi.ll |    35 +
 .../InstCombine/malloc-free-delete.ll         |    25 +
 final/test/Transforms/InstCombine/malloc.ll   |     7 +
 final/test/Transforms/InstCombine/malloc2.ll  |    22 +
 final/test/Transforms/InstCombine/malloc3.ll  |    26 +
 .../Transforms/InstCombine/memcpy-to-load.ll  |    14 +
 final/test/Transforms/InstCombine/memcpy.ll   |    19 +
 final/test/Transforms/InstCombine/memmove.ll  |    42 +
 final/test/Transforms/InstCombine/memset.ll   |    15 +
 final/test/Transforms/InstCombine/memset2.ll  |    15 +
 .../test/Transforms/InstCombine/memset_chk.ll |    18 +
 .../Transforms/InstCombine/mul-masked-bits.ll |    10 +
 final/test/Transforms/InstCombine/mul.ll      |   116 +
 .../Transforms/InstCombine/multi-use-or.ll    |    24 +
 final/test/Transforms/InstCombine/narrow.ll   |    18 +
 .../Transforms/InstCombine/neon-intrinsics.ll |    25 +
 .../test/Transforms/InstCombine/no-negzero.ll |    33 +
 final/test/Transforms/InstCombine/not-fcmp.ll |    10 +
 final/test/Transforms/InstCombine/not.ll      |    54 +
 final/test/Transforms/InstCombine/nothrow.ll  |     8 +
 final/test/Transforms/InstCombine/nsw.ll      |    39 +
 final/test/Transforms/InstCombine/objsize.ll  |   162 +
 .../Transforms/InstCombine/odr-linkage.ll     |    19 +
 final/test/Transforms/InstCombine/or-fcmp.ll  |    58 +
 .../test/Transforms/InstCombine/or-to-xor.ll  |    42 +
 final/test/Transforms/InstCombine/or-xor.ll   |    94 +
 final/test/Transforms/InstCombine/or.ll       |   392 +
 final/test/Transforms/InstCombine/overflow.ll |   133 +
 .../Transforms/InstCombine/phi-merge-gep.ll   |   102 +
 final/test/Transforms/InstCombine/phi.ll      |   546 +
 final/test/Transforms/InstCombine/pr2645-0.ll |    33 +
 final/test/Transforms/InstCombine/pr2645-1.ll |    39 +
 final/test/Transforms/InstCombine/pr2996.ll   |    12 +
 final/test/Transforms/InstCombine/pr8547.ll   |    26 +
 .../InstCombine/preserve-sminmax.ll           |    32 +
 .../Transforms/InstCombine/ptr-int-cast.ll    |    29 +
 final/test/Transforms/InstCombine/rem.ll      |    88 +
 final/test/Transforms/InstCombine/sdiv-1.ll   |    22 +
 final/test/Transforms/InstCombine/sdiv-2.ll   |    28 +
 .../test/Transforms/InstCombine/sdiv-shift.ll |     9 +
 final/test/Transforms/InstCombine/select-2.ll |    18 +
 .../Transforms/InstCombine/select-crash.ll    |    20 +
 .../InstCombine/select-load-call.ll           |    15 +
 final/test/Transforms/InstCombine/select.ll   |   726 +
 final/test/Transforms/InstCombine/set.ll      |   171 +
 .../InstCombine/setcc-strength-reduce.ll      |    37 +
 final/test/Transforms/InstCombine/sext.ll     |   128 +
 .../test/Transforms/InstCombine/shift-sra.ll  |    78 +
 final/test/Transforms/InstCombine/shift.ll    |   487 +
 .../InstCombine/shufflemask-undef.ll          |   109 +
 .../InstCombine/shufflevec-constant.ll        |    14 +
 .../InstCombine/signed-comparison.ll          |    28 +
 final/test/Transforms/InstCombine/signext.ll  |    87 +
 .../simplify-demanded-bits-pointer.ll         |    84 +
 .../InstCombine/sink_instruction.ll           |    56 +
 final/test/Transforms/InstCombine/sitofp.ll   |    55 +
 final/test/Transforms/InstCombine/sqrt.ll     |    32 +
 .../InstCombine/srem-simplify-bug.ll          |     9 +
 final/test/Transforms/InstCombine/srem.ll     |     8 +
 final/test/Transforms/InstCombine/srem1.ll    |    18 +
 .../Transforms/InstCombine/stack-overalign.ll |    29 +
 .../InstCombine/stacksaverestore.ll           |    56 +
 final/test/Transforms/InstCombine/store.ll    |    85 +
 .../test/Transforms/InstCombine/strcpy_chk.ll |    13 +
 final/test/Transforms/InstCombine/sub.ll      |   303 +
 final/test/Transforms/InstCombine/trunc.ll    |   119 +
 .../InstCombine/udiv-simplify-bug-0.ll        |    14 +
 .../InstCombine/udiv-simplify-bug-1.ll        |    20 +
 .../udiv_select_to_select_shift.ll            |    17 +
 .../InstCombine/udivrem-change-width.ll       |    21 +
 .../InstCombine/urem-simplify-bug.ll          |    32 +
 final/test/Transforms/InstCombine/urem.ll     |     8 +
 .../InstCombine/vec_demanded_elts.ll          |   138 +
 .../Transforms/InstCombine/vec_extract_elt.ll |     9 +
 .../Transforms/InstCombine/vec_insertelt.ll   |     7 +
 .../test/Transforms/InstCombine/vec_narrow.ll |    12 +
 final/test/Transforms/InstCombine/vec_sext.ll |    22 +
 .../Transforms/InstCombine/vec_shuffle.ll     |   112 +
 .../Transforms/InstCombine/vector-casts.ll    |   151 +
 .../Transforms/InstCombine/vector-srem.ll     |     9 +
 .../Transforms/InstCombine/volatile_store.ll  |    14 +
 .../test/Transforms/InstCombine/xor-undef.ll  |     6 +
 final/test/Transforms/InstCombine/xor.ll      |   193 +
 final/test/Transforms/InstCombine/xor2.ll     |    53 +
 .../InstCombine/zero-point-zero-add.ll        |    15 +
 .../InstCombine/zeroext-and-reduce.ll         |    10 +
 .../InstCombine/zext-bool-add-sub.ll          |    29 +
 .../test/Transforms/InstCombine/zext-fold.ll  |    12 +
 .../Transforms/InstCombine/zext-or-icmp.ll    |    35 +
 final/test/Transforms/InstCombine/zext.ll     |    11 +
 .../InstSimplify/2010-12-20-Boolean.ll        |    29 +
 .../InstSimplify/2010-12-20-Distribute.ll     |    62 +
 .../InstSimplify/2011-01-14-Thread.ll         |     9 +
 .../InstSimplify/2011-02-01-Vector.ll         |     8 +
 final/test/Transforms/InstSimplify/compare.ll |   295 +
 final/test/Transforms/InstSimplify/dg.exp     |     3 +
 .../Transforms/InstSimplify/exact-nsw-nuw.ll  |    44 +
 final/test/Transforms/InstSimplify/fdiv.ll    |    17 +
 .../Transforms/InstSimplify/reassociate.ll    |   186 +
 .../Internalize/2008-05-09-AllButMain.ll      |    27 +
 .../2008-05-09-AllButMain.ll.apifile          |     2 +
 .../2009-01-05-InternalizeAliases.ll          |    10 +
 .../Internalize/available_externally.ll       |    16 +
 final/test/Transforms/Internalize/dg.exp      |     3 +
 .../JumpThreading/2008-11-27-EntryMunge.ll    |    13 +
 .../JumpThreading/2010-08-26-and.ll           |   162 +
 .../Transforms/JumpThreading/and-and-cond.ll  |    37 +
 .../test/Transforms/JumpThreading/and-cond.ll |    35 +
 final/test/Transforms/JumpThreading/basic.ll  |   478 +
 .../JumpThreading/branch-no-const.ll          |    21 +
 .../test/Transforms/JumpThreading/compare.ll  |    30 +
 final/test/Transforms/JumpThreading/crash.ll  |   513 +
 .../JumpThreading/degenerate-phi.ll           |    24 +
 final/test/Transforms/JumpThreading/dg.exp    |     3 +
 .../Transforms/JumpThreading/indirectbr.ll    |    94 +
 .../test/Transforms/JumpThreading/lvi-load.ll |    49 +
 .../JumpThreading/no-irreducible-loops.ll     |    38 +
 .../test/Transforms/JumpThreading/or-undef.ll |    69 +
 final/test/Transforms/JumpThreading/select.ll |   123 +
 .../Transforms/JumpThreading/thread-loads.ll  |    41 +
 .../LCSSA/2006-06-03-IncorrectIDFPhis.ll      |    23 +
 .../2006-06-12-MultipleExitsSameBlock.ll      |    27 +
 .../LCSSA/2006-07-09-NoDominator.ll           |    24 +
 .../LCSSA/2006-10-31-UnreachableBlock-2.ll    |   145 +
 .../LCSSA/2006-10-31-UnreachableBlock.ll      |   184 +
 .../Transforms/LCSSA/2007-07-12-LICM-2.ll     |    17 +
 .../Transforms/LCSSA/2007-07-12-LICM-3.ll     |    24 +
 .../test/Transforms/LCSSA/2007-07-12-LICM.ll  |    14 +
 final/test/Transforms/LCSSA/basictest.ll      |    24 +
 final/test/Transforms/LCSSA/dg.exp            |     3 +
 final/test/Transforms/LCSSA/indirectbr.ll     |   542 +
 final/test/Transforms/LCSSA/invoke-dest.ll    |   143 +
 .../test/Transforms/LCSSA/unreachable-use.ll  |    27 +
 .../LICM/2003-02-26-LoopExitNotDominated.ll   |    18 +
 .../LICM/2003-02-27-NestedLoopExitBlocks.ll   |    17 +
 .../2003-02-27-PreheaderExitNodeUpdate.ll     |    16 +
 .../LICM/2003-02-27-PreheaderProblem.ll       |    24 +
 .../LICM/2003-02-27-StoreSinkPHIs.ll          |    15 +
 .../LICM/2003-02-28-PromoteDifferentType.ll   |    15 +
 .../Transforms/LICM/2003-05-02-LoadHoist.ll   |    23 +
 .../LICM/2003-12-11-SinkingToPHI.ll           |    16 +
 .../2004-09-14-AliasAnalysisInvalidate.ll     |    19 +
 .../LICM/2004-11-17-UndefIndexCrash.ll        |    20 +
 .../LICM/2005-03-24-LICM-Aggregate-Crash.ll   |     9 +
 .../LICM/2006-09-12-DeadUserOfSunkInstr.ll    |   148 +
 .../LICM/2007-05-22-VolatileSink.ll           |    56 +
 .../Transforms/LICM/2007-07-30-AliasSet.ll    |    39 +
 .../LICM/2007-09-17-PromoteValue.ll           |    61 +
 .../LICM/2007-09-24-PromoteNullValue.ll       |    46 +
 .../LICM/2007-10-01-PromoteSafeValue.ll       |    21 +
 .../LICM/2008-05-20-AliasSetVAArg.ll          |    30 +
 .../LICM/2008-07-22-LoadGlobalConstant.ll     |    39 +
 .../LICM/2009-12-10-LICM-Indbr-Crash.ll       |    21 +
 final/test/Transforms/LICM/Preserve-LCSSA.ll  |    25 +
 final/test/Transforms/LICM/basictest.ll       |    15 +
 final/test/Transforms/LICM/crash.ll           |    74 +
 final/test/Transforms/LICM/dg.exp             |     3 +
 final/test/Transforms/LICM/hoisting.ll        |    66 +
 .../test/Transforms/LICM/no-preheader-test.ll |    20 +
 final/test/Transforms/LICM/scalar_promote.ll  |   150 +
 final/test/Transforms/LICM/sinking.ll         |   249 +
 .../LoopDeletion/2007-07-23-InfiniteLoop.ll   |    13 +
 .../Transforms/LoopDeletion/2008-05-06-Phi.ll |   109 +
 final/test/Transforms/LoopDeletion/dcetest.ll |    36 +
 final/test/Transforms/LoopDeletion/dg.exp     |     3 +
 .../LoopDeletion/multiple-exit-conditions.ll  |    27 +
 .../Transforms/LoopDeletion/multiple-exits.ll |    26 +
 .../LoopDeletion/simplify-then-delete.ll      |    65 +
 final/test/Transforms/LoopIdiom/basic.ll      |   349 +
 final/test/Transforms/LoopIdiom/debug-line.ll |    49 +
 final/test/Transforms/LoopIdiom/dg.exp        |     3 +
 .../LoopRotate/2009-01-25-SingleEntryPhi.ll   |    21 +
 .../test/Transforms/LoopRotate/PhiRename-1.ll |    95 +
 .../LoopRotate/PhiSelfRefernce-1.ll           |    39 +
 final/test/Transforms/LoopRotate/basic.ll     |    35 +
 final/test/Transforms/LoopRotate/crash.ll     |   139 +
 final/test/Transforms/LoopRotate/dbgvalue.ll  |    59 +
 final/test/Transforms/LoopRotate/dg.exp       |     3 +
 .../test/Transforms/LoopRotate/indirectbr.ll  |    43 +
 .../Transforms/LoopRotate/phi-duplicate.ll    |    40 +
 final/test/Transforms/LoopRotate/pr2639.ll    |    38 +
 .../Transforms/LoopRotate/preserve-scev.ll    |    47 +
 .../LoopSimplify/2003-04-25-AssertFail.ll     |    20 +
 .../2003-05-12-PreheaderExitOfChild.ll        |    42 +
 .../LoopSimplify/2003-08-15-PreheadersFail.ll |    52 +
 .../2003-12-10-ExitBlocksProblem.ll           |    36 +
 .../2004-02-05-DominatorInfoCorruption.ll     |    14 +
 .../2004-03-15-IncorrectDomUpdate.ll          |    11 +
 .../2004-04-01-IncorrectDomUpdate.ll          |    20 +
 ...2004-04-12-LoopSimplify-SwitchBackedges.ll |    18 +
 ...004-04-13-LoopSimplifyUpdateDomFrontier.ll |    18 +
 .../LoopSimplify/2007-10-28-InvokeCrash.ll    |   892 +
 .../2010-07-15-IncorrectDomFrontierUpdate.ll  |    20 +
 .../2010-12-26-PHIInfiniteLoop.ll             |    43 +
 .../test/Transforms/LoopSimplify/basictest.ll |    16 +
 final/test/Transforms/LoopSimplify/dg.exp     |     3 +
 .../Transforms/LoopSimplify/hardertest.ll     |    15 +
 .../LoopSimplify/indirectbr-backedge.ll       |    35 +
 .../Transforms/LoopSimplify/indirectbr.ll     |   100 +
 .../Transforms/LoopSimplify/merge-exits.ll    |    44 +
 .../LoopSimplify/phi-node-simplify.ll         |    55 +
 .../Transforms/LoopSimplify/preserve-scev.ll  |    50 +
 .../LoopSimplify/single-backedge.ll           |    20 +
 .../LoopSimplify/unreachable-loop-pred.ll     |    20 +
 .../LoopStrengthReduce/2005-08-15-AddRecIV.ll |    57 +
 .../2005-08-17-OutOfLoopVariant.ll            |    17 +
 .../2005-09-12-UsesOutOutsideOfLoop.ll        |    32 +
 .../2007-04-23-UseIterator.ll                 |    71 +
 .../2008-08-13-CmpStride.ll                   |    31 +
 .../LoopStrengthReduce/2008-08-14-ShadowIV.ll |    99 +
 .../LoopStrengthReduce/2008-09-09-Overflow.ll |    48 +
 ...9-01-13-nonconstant-stride-outside-loop.ll |    39 +
 .../2009-04-28-no-reduce-mul.ll               |    48 +
 .../LoopStrengthReduce/2009-11-10-LSRCrash.ll |   130 +
 .../LoopStrengthReduce/count-to-zero.ll       |    42 +
 .../Transforms/LoopStrengthReduce/dead-phi.ll |    21 +
 .../test/Transforms/LoopStrengthReduce/dg.exp |     3 +
 .../LoopStrengthReduce/different-type-ivs.ll  |    25 +
 .../dont-hoist-simple-loop-constants.ll       |    23 +
 .../dont_insert_redundant_ops.ll              |    36 +
 .../LoopStrengthReduce/dont_reduce_bytes.ll   |    22 +
 .../LoopStrengthReduce/dont_reverse.ll        |    21 +
 .../exit_compare_live_range.ll                |    21 +
 .../hoist-parent-preheader.ll                 |    32 +
 .../invariant_value_first.ll                  |    23 +
 .../invariant_value_first_arg.ll              |    20 +
 .../LoopStrengthReduce/nested-reduce.ll       |    49 +
 .../LoopStrengthReduce/nonlinear-postinc.ll   |    44 +
 .../LoopStrengthReduce/ops_after_indvar.ll    |    24 +
 .../phi_node_update_multiple_preds.ll         |    28 +
 .../Transforms/LoopStrengthReduce/pr2537.ll   |    21 +
 .../Transforms/LoopStrengthReduce/pr2570.ll   |   287 +
 .../Transforms/LoopStrengthReduce/pr3086.ll   |    29 +
 .../Transforms/LoopStrengthReduce/pr3399.ll   |    32 +
 .../Transforms/LoopStrengthReduce/pr3571.ll   |    27 +
 .../quadradic-exit-value.ll                   |    18 +
 .../LoopStrengthReduce/related_indvars.ll     |    27 +
 .../LoopStrengthReduce/remove_indvar.ll       |    21 +
 .../share_code_in_preheader.ll                |    27 +
 .../LoopStrengthReduce/share_ivs.ll           |    24 +
 .../Transforms/LoopStrengthReduce/uglygep.ll  |    66 +
 .../use_postinc_value_outside_loop.ll         |    29 +
 .../var_stride_used_by_compare.ll             |    41 +
 .../LoopStrengthReduce/variable_stride.ll     |    18 +
 .../2004-05-13-DontUnrollTooMuch.ll           |    14 +
 .../2005-03-06-BadLoopInfoUpdate.ll           |    22 +
 .../LoopUnroll/2006-08-24-MultiBlockLoop.ll   |    16 +
 .../LoopUnroll/2007-04-16-PhiUpdate.ll        |    17 +
 .../LoopUnroll/2007-05-05-UnrollMiscomp.ll    |    36 +
 .../LoopUnroll/2007-05-09-UnknownTripCount.ll |    18 +
 .../Transforms/LoopUnroll/2007-11-05-Crash.ll |   295 +
 final/test/Transforms/LoopUnroll/basic.ll     |    24 +
 final/test/Transforms/LoopUnroll/dg.exp       |     3 +
 .../LoopUnroll/shifted-tripcount.ll           |    28 +
 .../LoopUnswitch/2006-06-13-SingleEntryPHI.ll |    35 +
 .../LoopUnswitch/2006-06-27-DeadSwitchCase.ll |    25 +
 .../LoopUnswitch/2007-05-09-Unreachable.ll    |    28 +
 .../Transforms/LoopUnswitch/2007-05-09-tl.ll  |    95 +
 .../LoopUnswitch/2007-07-12-ExitDomInfo.ll    |    45 +
 .../LoopUnswitch/2007-07-13-DomInfo.ll        |    27 +
 .../LoopUnswitch/2007-07-18-DomInfo.ll        |    66 +
 .../Transforms/LoopUnswitch/2007-08-01-Dom.ll |    30 +
 .../LoopUnswitch/2007-08-01-LCSSA.ll          |    55 +
 .../LoopUnswitch/2007-10-04-DomFrontier.ll    |    29 +
 .../LoopUnswitch/2008-06-02-DomInfo.ll        |    26 +
 .../LoopUnswitch/2008-06-17-DomFrontier.ll    |    22 +
 .../LoopUnswitch/2008-11-03-Invariant.ll      |    36 +
 .../LoopUnswitch/2010-11-18-LCSSA.ll          |    28 +
 .../test/Transforms/LoopUnswitch/basictest.ll |    31 +
 final/test/Transforms/LoopUnswitch/crash.ll   |    66 +
 final/test/Transforms/LoopUnswitch/dg.exp     |     3 +
 .../Transforms/LoopUnswitch/infinite-loop.ll  |    53 +
 .../LoopUnswitch/preserve-analyses.ll         |   129 +
 .../Transforms/LowerAtomic/atomic-load.ll     |    40 +
 .../Transforms/LowerAtomic/atomic-swap.ll     |    26 +
 final/test/Transforms/LowerAtomic/barrier.ll  |    10 +
 final/test/Transforms/LowerAtomic/dg.exp      |     3 +
 .../LowerInvoke/2003-12-10-Crash.ll           |    22 +
 .../LowerInvoke/2004-02-29-PHICrash.ll        |    15 +
 .../LowerInvoke/2005-08-03-InvokeWithPHI.ll   |    17 +
 .../2005-08-03-InvokeWithPHIUse.ll            |    15 +
 .../2008-02-14-CritEdgePhiCrash.ll            |    14 +
 .../test/Transforms/LowerInvoke/basictest.ll  |    30 +
 final/test/Transforms/LowerInvoke/dg.exp      |     3 +
 .../2003-11-05-DominanceProperties.ll         |    16 +
 final/test/Transforms/LowerSetJmp/dg.exp      |     3 +
 .../test/Transforms/LowerSetJmp/simpletest.ll |    32 +
 .../LowerSwitch/2003-05-01-PHIProblem.ll      |    15 +
 .../LowerSwitch/2003-08-23-EmptySwitch.ll     |     9 +
 .../2004-03-13-SwitchIsDefaultCrash.ll        |    19 +
 final/test/Transforms/LowerSwitch/dg.exp      |     3 +
 final/test/Transforms/LowerSwitch/feature.ll  |    51 +
 .../Mem2Reg/2002-03-28-UninitializedVal.ll    |    11 +
 .../2002-05-01-ShouldNotPromoteThisAlloca.ll  |    12 +
 .../Mem2Reg/2003-04-10-DFNotFound.ll          |    10 +
 .../Mem2Reg/2003-04-18-DeadBlockProblem.ll    |    16 +
 .../2003-04-24-MultipleIdenticalSuccessors.ll |    16 +
 .../Mem2Reg/2003-06-26-IterativePromote.ll    |    16 +
 .../Mem2Reg/2003-10-05-DeadPHIInsertion.ll    |    22 +
 .../Mem2Reg/2005-06-30-ReadBeforeWrite.ll     |    47 +
 .../Transforms/Mem2Reg/2005-11-28-Crash.ll    |    62 +
 .../Mem2Reg/2007-08-27-VolatileLoadsStores.ll |    47 +
 .../Transforms/Mem2Reg/ConvertDebugInfo.ll    |    45 +
 .../Transforms/Mem2Reg/ConvertDebugInfo2.ll   |    52 +
 .../Mem2Reg/PromoteMemToRegister.ll           |    18 +
 .../Transforms/Mem2Reg/UndefValuesMerge.ll    |    13 +
 final/test/Transforms/Mem2Reg/crash.ll        |    41 +
 final/test/Transforms/Mem2Reg/dg.exp          |     3 +
 .../MemCpyOpt/2008-02-24-MultipleUseofSRet.ll |    34 +
 .../MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll |    20 +
 final/test/Transforms/MemCpyOpt/align.ll      |    19 +
 final/test/Transforms/MemCpyOpt/crash.ll      |    58 +
 final/test/Transforms/MemCpyOpt/dg.exp        |     3 +
 .../test/Transforms/MemCpyOpt/form-memset.ll  |   222 +
 .../Transforms/MemCpyOpt/loadstore-sret.ll    |    25 +
 .../Transforms/MemCpyOpt/memcpy-to-memset.ll  |    19 +
 final/test/Transforms/MemCpyOpt/memcpy.ll     |   111 +
 final/test/Transforms/MemCpyOpt/memmove.ll    |    37 +
 final/test/Transforms/MemCpyOpt/smaller.ll    |    28 +
 final/test/Transforms/MemCpyOpt/sret.ll       |    28 +
 .../MergeFunc/2011-02-08-RemoveEqual.ll       |   276 +
 final/test/Transforms/MergeFunc/dg.exp        |     3 +
 final/test/Transforms/MergeFunc/fold-weak.ll  |    17 +
 .../Transforms/MergeFunc/phi-speculation1.ll  |    29 +
 .../Transforms/MergeFunc/phi-speculation2.ll  |    29 +
 final/test/Transforms/MergeFunc/vector.ll     |    76 +
 .../MergeFunc/vectors-and-arrays.ll           |    18 +
 .../PruneEH/2003-09-14-ExternalCall.ll        |    11 +
 .../PruneEH/2003-11-21-PHIUpdate.ll           |    15 +
 .../Transforms/PruneEH/2008-06-02-Weak.ll     |    12 +
 .../Transforms/PruneEH/2008-09-05-CGUpdate.ll |  1445 +
 final/test/Transforms/PruneEH/dg.exp          |     3 +
 .../test/Transforms/PruneEH/recursivetest.ll  |    20 +
 .../Transforms/PruneEH/simplenoreturntest.ll  |    13 +
 final/test/Transforms/PruneEH/simpletest.ll   |    19 +
 .../2002-05-15-AgressiveSubMove.ll            |     9 +
 .../Reassociate/2002-05-15-MissedTree.ll      |     9 +
 .../Reassociate/2002-05-15-SubReassociate.ll  |    12 +
 .../Reassociate/2002-05-15-SubReassociate2.ll |    13 +
 .../2002-07-09-DominanceProblem.ll            |    10 +
 .../Reassociate/2003-08-12-InfiniteLoop.ll    |     9 +
 .../Reassociate/2005-08-24-Crash.ll           |    13 +
 .../2005-09-01-ArrayOutOfBounds.ll            |    23 +
 .../2006-04-27-ReassociateVector.ll           |     8 +
 .../Reassociate/2011-01-26-UseAfterFree.ll    |    35 +
 .../test/Transforms/Reassociate/basictest.ll  |   216 +
 final/test/Transforms/Reassociate/crash.ll    |    44 +
 final/test/Transforms/Reassociate/dg.exp      |     3 +
 final/test/Transforms/Reassociate/inverses.ll |    34 +
 final/test/Transforms/Reassociate/looptest.ll |    50 +
 .../test/Transforms/Reassociate/mulfactor.ll  |    14 +
 .../test/Transforms/Reassociate/mulfactor2.ll |    15 +
 final/test/Transforms/Reassociate/negation.ll |    21 +
 .../Transforms/Reassociate/optional-flags.ll  |    29 +
 final/test/Transforms/Reassociate/otherops.ll |    28 +
 .../Transforms/Reassociate/shift-factor.ll    |    12 +
 .../test/Transforms/Reassociate/shifttest.ll  |    12 +
 final/test/Transforms/Reassociate/subtest.ll  |    11 +
 final/test/Transforms/Reassociate/subtest2.ll |    13 +
 .../Transforms/SCCP/2002-05-02-EdgeFailure.ll |    26 +
 .../SCCP/2002-05-02-MissSecondInst.ll         |     8 +
 .../SCCP/2002-05-20-MissedIncomingValue.ll    |    19 +
 .../SCCP/2002-05-21-InvalidSimplify.ll        |    33 +
 .../SCCP/2002-08-30-GetElementPtrTest.ll      |     9 +
 .../SCCP/2003-06-24-OverdefinedPHIValue.ll    |    30 +
 .../SCCP/2003-08-26-InvokeHandling.ll         |    18 +
 .../Transforms/SCCP/2004-11-16-DeadInvoke.ll  |    13 +
 .../SCCP/2004-12-10-UndefBranchBug.ll         |    12 +
 .../SCCP/2006-10-23-IPSCCP-Crash.ll           |   103 +
 .../Transforms/SCCP/2006-12-04-PackedType.ll  |   140 +
 .../Transforms/SCCP/2006-12-19-UndefBug.ll    |     8 +
 .../Transforms/SCCP/2007-05-16-InvokeCrash.ll |    41 +
 .../SCCP/2008-01-27-UndefCorrelate.ll         |    36 +
 .../SCCP/2008-04-22-multiple-ret-sccp.ll      |    11 +
 .../SCCP/2008-05-23-UndefCallFold.ll          |    14 +
 .../SCCP/2009-01-14-IPSCCP-Invoke.ll          |    28 +
 .../SCCP/2009-05-27-VectorOperandZero.ll      |    10 +
 final/test/Transforms/SCCP/apint-array.ll     |    23 +
 final/test/Transforms/SCCP/apint-basictest.ll |    16 +
 .../test/Transforms/SCCP/apint-basictest2.ll  |    17 +
 .../test/Transforms/SCCP/apint-basictest3.ll  |    23 +
 .../test/Transforms/SCCP/apint-basictest4.ll  |    25 +
 final/test/Transforms/SCCP/apint-bigarray.ll  |    23 +
 final/test/Transforms/SCCP/apint-bigint.ll    |     9 +
 final/test/Transforms/SCCP/apint-bigint2.ll   |    18 +
 final/test/Transforms/SCCP/apint-ipsccp1.ll   |    24 +
 final/test/Transforms/SCCP/apint-ipsccp2.ll   |    19 +
 final/test/Transforms/SCCP/apint-ipsccp3.ll   |    23 +
 final/test/Transforms/SCCP/apint-ipsccp4.ll   |    49 +
 final/test/Transforms/SCCP/apint-load.ll      |    36 +
 final/test/Transforms/SCCP/apint-phi.ll       |    19 +
 final/test/Transforms/SCCP/apint-select.ll    |    21 +
 final/test/Transforms/SCCP/calltest.ll        |    21 +
 final/test/Transforms/SCCP/crash.ll           |    29 +
 final/test/Transforms/SCCP/dg.exp             |     3 +
 .../test/Transforms/SCCP/ipsccp-addr-taken.ll |    28 +
 final/test/Transforms/SCCP/ipsccp-basic.ll    |   206 +
 final/test/Transforms/SCCP/loadtest.ll        |    33 +
 final/test/Transforms/SCCP/logical-nuke.ll    |     9 +
 final/test/Transforms/SCCP/phitest.ll         |    20 +
 final/test/Transforms/SCCP/retvalue-undef.ll  |    32 +
 final/test/Transforms/SCCP/sccptest.ll        |    58 +
 final/test/Transforms/SCCP/select.ll          |    12 +
 final/test/Transforms/SCCP/undef-resolve.ll   |   106 +
 .../SRETPromotion/2008-03-11-attributes.ll    |     7 +
 .../2008-06-04-function-pointer-passing.ll    |    24 +
 .../SRETPromotion/2008-06-05-non-call-use.ll  |    20 +
 .../Transforms/SRETPromotion/basictest.ll     |    33 +
 final/test/Transforms/SRETPromotion/dg.exp    |     3 +
 .../ScalarRepl/2003-05-29-ArrayFail.ll        |    13 +
 .../ScalarRepl/2003-09-12-IncorrectPromote.ll |    13 +
 .../ScalarRepl/2003-10-29-ArrayProblem.ll     |    16 +
 .../2006-11-07-InvalidArrayPromote.ll         |    20 +
 .../ScalarRepl/2007-05-24-LargeAggregate.ll   |    27 +
 .../ScalarRepl/2007-05-29-MemcpyPreserve.ll   |    23 +
 .../ScalarRepl/2007-11-03-bigendian_apint.ll  |    30 +
 .../ScalarRepl/2008-01-29-PromoteBug.ll       |    21 +
 .../2008-02-28-SubElementExtractCrash.ll      |    16 +
 .../ScalarRepl/2008-06-05-loadstore-agg.ll    |    33 +
 .../ScalarRepl/2008-06-22-LargeArray.ll       |    17 +
 .../2008-08-22-out-of-range-array-promote.ll  |    22 +
 .../ScalarRepl/2008-09-22-vector-gep.ll       |    25 +
 .../2009-02-02-ScalarPromoteOutOfRange.ll     |    16 +
 .../ScalarRepl/2009-02-05-LoadFCA.ll          |    20 +
 .../ScalarRepl/2009-03-04-MemCpyAlign.ll      |    20 +
 .../ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll |   184 +
 .../ScalarRepl/2009-12-11-NeonTypes.ll        |    89 +
 .../ScalarRepl/2010-01-18-SelfCopy.ll         |    18 +
 .../Transforms/ScalarRepl/AggregatePromote.ll |    51 +
 .../Transforms/ScalarRepl/DifferingTypes.ll   |    16 +
 .../Transforms/ScalarRepl/address-space.ll    |    35 +
 final/test/Transforms/ScalarRepl/arraytest.ll |    11 +
 final/test/Transforms/ScalarRepl/badarray.ll  |    57 +
 final/test/Transforms/ScalarRepl/basictest.ll |    30 +
 .../Transforms/ScalarRepl/bitfield-sroa.ll    |    17 +
 .../Transforms/ScalarRepl/copy-aggregate.ll   |   108 +
 final/test/Transforms/ScalarRepl/crash.ll     |   260 +
 final/test/Transforms/ScalarRepl/debuginfo.ll |   106 +
 final/test/Transforms/ScalarRepl/dg.exp       |     3 +
 .../ScalarRepl/load-store-aggregate.ll        |    31 +
 .../Transforms/ScalarRepl/memcpy-align.ll     |    32 +
 .../ScalarRepl/memcpy-from-global.ll          |    96 +
 .../memset-aggregate-byte-leader.ll           |    23 +
 .../Transforms/ScalarRepl/memset-aggregate.ll |    66 +
 .../ScalarRepl/nonzero-first-index.ll         |    53 +
 .../Transforms/ScalarRepl/not-a-vector.ll     |    20 +
 .../test/Transforms/ScalarRepl/phi-select.ll  |   153 +
 .../Transforms/ScalarRepl/phinodepromote.ll   |    34 +
 .../Transforms/ScalarRepl/select_promote.ll   |    18 +
 final/test/Transforms/ScalarRepl/sroa-fca.ll  |    21 +
 final/test/Transforms/ScalarRepl/sroa_two.ll  |    13 +
 .../Transforms/ScalarRepl/union-fp-int.ll     |    14 +
 .../Transforms/ScalarRepl/union-packed.ll     |    14 +
 .../Transforms/ScalarRepl/union-pointer.ll    |    41 +
 .../Transforms/ScalarRepl/vector_memcpy.ll    |    30 +
 .../Transforms/ScalarRepl/vector_promote.ll   |   100 +
 final/test/Transforms/ScalarRepl/volatile.ll  |    12 +
 .../SimplifyCFG/2002-05-05-EmptyBlockMerge.ll |    22 +
 .../SimplifyCFG/2002-05-21-PHIElimination.ll  |    19 +
 .../SimplifyCFG/2002-06-24-PHINode.ll         |    14 +
 .../SimplifyCFG/2002-09-24-PHIAssertion.ll    |    13 +
 .../SimplifyCFG/2003-03-07-DominateProblem.ll |    17 +
 .../SimplifyCFG/2003-08-05-InvokeCrash.ll     |    13 +
 .../SimplifyCFG/2003-08-05-MishandleInvoke.ll |    12 +
 .../SimplifyCFG/2003-08-17-BranchFold.ll      |    22 +
 .../2003-08-17-BranchFoldOrdering.ll          |    26 +
 .../SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll  |    58 +
 .../SimplifyCFG/2003-08-17-FoldSwitch.ll      |    80 +
 .../2004-12-10-SimplifyCFGCrash.ll            |    40 +
 .../SimplifyCFG/2005-06-16-PHICrash.ll        |    95 +
 .../SimplifyCFG/2005-08-01-PHIUpdateFail.ll   |    71 +
 .../SimplifyCFG/2005-08-03-PHIFactorCrash.ll  |    75 +
 .../SimplifyCFG/2005-10-02-InvokeSimplify.ll  |    15 +
 .../2005-12-03-IncorrectPHIFold.ll            |   124 +
 .../SimplifyCFG/2006-02-17-InfiniteUnroll.ll  |    27 +
 .../SimplifyCFG/2006-06-12-InfLoop.ll         |   413 +
 .../SimplifyCFG/2006-08-03-Crash.ll           |    98 +
 .../SimplifyCFG/2006-10-19-UncondDiv.ll       |    28 +
 .../SimplifyCFG/2006-10-29-InvokeCrash.ll     |   555 +
 .../SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll |   131 +
 .../SimplifyCFG/2007-11-22-InvokeNoUnwind.ll  |    14 +
 .../SimplifyCFG/2007-12-21-Crash.ll           |    37 +
 .../SimplifyCFG/2008-01-02-hoist-fp-add.ll    |    26 +
 .../2008-04-23-MergeMultipleResultRet.ll      |    43 +
 .../2008-04-27-MultipleReturnCrash.ll         |    30 +
 .../SimplifyCFG/2008-05-16-PHIBlockMerge.ll   |   131 +
 .../2008-07-13-InfLoopMiscompile.ll           |    36 +
 .../SimplifyCFG/2008-09-08-MultiplePred.ll    |    60 +
 .../2008-09-17-SpeculativeHoist.ll            |    18 +
 ...008-10-03-SpeculativelyExecuteBeforePHI.ll |    36 +
 .../SimplifyCFG/2008-12-06-SingleEntryPhi.ll  |    13 +
 .../SimplifyCFG/2008-12-16-DCECond.ll         |    46 +
 .../SimplifyCFG/2009-01-18-PHIPropCrash.ll    |    30 +
 ...01-19-UnconditionalTrappingConstantExpr.ll |    31 +
 .../2009-03-05-Speculative-Hoist-Dbg.ll       |   108 +
 .../SimplifyCFG/2009-05-12-externweak.ll      |    47 +
 .../SimplifyCFG/2009-06-15-InvokeCrash.ll     |   557 +
 .../SimplifyCFG/2010-03-30-InvokeCrash.ll     |    18 +
 .../2010-10-24-OnlyUnwindInEntry.ll           |     6 +
 final/test/Transforms/SimplifyCFG/BrUnwind.ll |    15 +
 .../test/Transforms/SimplifyCFG/DeadSetCC.ll  |    28 +
 .../SimplifyCFG/EqualPHIEdgeBlockMerge.ll     |    18 +
 .../test/Transforms/SimplifyCFG/HoistCode.ll  |    11 +
 .../Transforms/SimplifyCFG/MagicPointer.ll    |    75 +
 .../Transforms/SimplifyCFG/PhiBlockMerge.ll   |    25 +
 .../Transforms/SimplifyCFG/PhiBlockMerge2.ll  |    27 +
 .../Transforms/SimplifyCFG/PhiEliminate.ll    |    27 +
 .../Transforms/SimplifyCFG/PhiEliminate2.ll   |    14 +
 .../Transforms/SimplifyCFG/PhiNoEliminate.ll  |    27 +
 .../Transforms/SimplifyCFG/SpeculativeExec.ll |    21 +
 .../SimplifyCFG/UncondBranchToReturn.ll       |    33 +
 .../SimplifyCFG/UnreachableEliminate.ll       |    33 +
 .../test/Transforms/SimplifyCFG/basictest.ll  |    43 +
 .../SimplifyCFG/branch-branch-dbginfo.ll      |    70 +
 .../SimplifyCFG/branch-cond-merge.ll          |    19 +
 .../SimplifyCFG/branch-cond-prop.ll           |    17 +
 .../SimplifyCFG/branch-fold-test.ll           |    17 +
 .../Transforms/SimplifyCFG/branch-fold.ll     |    13 +
 .../SimplifyCFG/branch-phi-thread.ll          |    66 +
 .../Transforms/SimplifyCFG/branch_fold_dbg.ll |   122 +
 final/test/Transforms/SimplifyCFG/dbginfo.ll  |    71 +
 final/test/Transforms/SimplifyCFG/dg.exp      |     3 +
 .../Transforms/SimplifyCFG/duplicate-phis.ll  |    21 +
 .../SimplifyCFG/hoist-common-code.dbg.ll      |    33 +
 .../SimplifyCFG/hoist-common-code.ll          |    18 +
 .../test/Transforms/SimplifyCFG/indirectbr.ll |   182 +
 .../Transforms/SimplifyCFG/invoke_unwind.ll   |    32 +
 .../SimplifyCFG/iterative-simplify.ll         |   100 +
 .../Transforms/SimplifyCFG/noreturn-call.ll   |    11 +
 .../Transforms/SimplifyCFG/return-merge.ll    |    19 +
 .../test/Transforms/SimplifyCFG/select-gep.ll |    40 +
 .../SimplifyCFG/speculate-with-offset.ll      |    94 +
 .../SimplifyCFG/switch-on-const-select.ll     |   138 +
 .../SimplifyCFG/switch-simplify-crash.ll      |   108 +
 .../Transforms/SimplifyCFG/switch-to-icmp.ll  |    39 +
 .../Transforms/SimplifyCFG/switch_create.ll   |   481 +
 .../SimplifyCFG/switch_formation.dbg.ll       |    50 +
 .../SimplifyCFG/switch_switch_fold.ll         |    47 +
 .../SimplifyCFG/switch_switch_fold_dbginfo.ll |   116 +
 .../Transforms/SimplifyCFG/switch_thread.ll   |    79 +
 .../SimplifyCFG/trapping-load-unreachable.ll  |    44 +
 .../SimplifyCFG/two-entry-phi-return.dbg.ll   |    28 +
 .../SimplifyCFG/two-entry-phi-return.ll       |    15 +
 .../2005-05-20-sprintf-crash.ll               |    11 +
 .../2007-04-06-strchr-miscompile.ll           |    29 +
 .../SimplifyLibCalls/2008-05-19-memcmp.ll     |    14 +
 .../SimplifyLibCalls/2009-01-04-Annotate.ll   |    12 +
 .../2009-02-11-NotInitialized.ll              |    13 +
 .../SimplifyLibCalls/2009-02-12-StrTo.ll      |    14 +
 .../2009-05-30-memcmp-byte.ll                 |    14 +
 .../SimplifyLibCalls/2009-07-28-Exit.ll       |    22 +
 .../SimplifyLibCalls/2009-07-29-Exit2.ll      |    24 +
 .../2010-05-30-memcpy-Struct.ll               |    20 +
 final/test/Transforms/SimplifyLibCalls/FFS.ll |    36 +
 .../Transforms/SimplifyLibCalls/FPrintF.ll    |    28 +
 .../test/Transforms/SimplifyLibCalls/FPuts.ll |    29 +
 .../Transforms/SimplifyLibCalls/IsDigit.ll    |    21 +
 .../Transforms/SimplifyLibCalls/MemCpy.ll     |    20 +
 .../Transforms/SimplifyLibCalls/PR7357.ll     |    16 +
 .../Transforms/SimplifyLibCalls/Printf.ll     |    36 +
 .../test/Transforms/SimplifyLibCalls/Puts.ll  |    15 +
 .../Transforms/SimplifyLibCalls/SPrintF.ll    |    40 +
 .../Transforms/SimplifyLibCalls/StrCat.ll     |    33 +
 .../Transforms/SimplifyLibCalls/StrChr.ll     |    26 +
 .../Transforms/SimplifyLibCalls/StrCmp.ll     |    28 +
 .../Transforms/SimplifyLibCalls/StrCpy.ll     |    37 +
 .../Transforms/SimplifyLibCalls/StrLen.ll     |    56 +
 .../Transforms/SimplifyLibCalls/StrNCat.ll    |    31 +
 .../Transforms/SimplifyLibCalls/StrNCmp.ll    |    35 +
 .../Transforms/SimplifyLibCalls/StrNCpy.ll    |    29 +
 .../Transforms/SimplifyLibCalls/StrPBrk.ll    |    25 +
 .../Transforms/SimplifyLibCalls/StrRChr.ll    |    23 +
 .../Transforms/SimplifyLibCalls/StrSpn.ll     |    41 +
 .../Transforms/SimplifyLibCalls/StrStr.ll     |    60 +
 .../Transforms/SimplifyLibCalls/ToAscii.ll    |    21 +
 final/test/Transforms/SimplifyLibCalls/abs.ll |    11 +
 final/test/Transforms/SimplifyLibCalls/dg.exp |     3 +
 .../test/Transforms/SimplifyLibCalls/exp2.ll  |    38 +
 .../test/Transforms/SimplifyLibCalls/floor.ll |    41 +
 .../Transforms/SimplifyLibCalls/iprintf.ll    |    71 +
 .../Transforms/SimplifyLibCalls/memcmp.ll     |    35 +
 .../Transforms/SimplifyLibCalls/memmove.ll    |    12 +
 .../Transforms/SimplifyLibCalls/memset-64.ll  |    12 +
 .../Transforms/SimplifyLibCalls/memset.ll     |    12 +
 .../SimplifyLibCalls/pow-to-sqrt.ll           |    33 +
 .../test/Transforms/SimplifyLibCalls/pow2.ll  |    37 +
 .../SimplifyLibCalls/weak-symbols.ll          |    26 +
 final/test/Transforms/Sink/basic.ll           |    22 +
 final/test/Transforms/Sink/dg.exp             |     3 +
 .../StripSymbols/2007-01-15-llvm.used.ll      |    15 +
 .../StripSymbols/2010-06-30-StripDebug.ll     |    28 +
 .../StripSymbols/2010-07-01-DeadDbgInfo.ll    |    47 +
 .../StripSymbols/2010-08-25-crash.ll          |    19 +
 final/test/Transforms/StripSymbols/dg.exp     |     3 +
 .../2010-06-26-MultipleReturnValues.ll        |    20 +
 .../TailCallElim/accum_recursion.ll           |    74 +
 .../test/Transforms/TailCallElim/ackermann.ll |    25 +
 final/test/Transforms/TailCallElim/dg.exp     |     3 +
 .../TailCallElim/dont-tce-tail-marked-call.ll |    13 +
 .../TailCallElim/dont_reorder_load.ll         |    64 +
 .../test/Transforms/TailCallElim/dup_tail.ll  |    23 +
 .../Transforms/TailCallElim/inf-recursion.ll  |    34 +
 .../TailCallElim/intervening-inst.ll          |    17 +
 .../TailCallElim/move_alloca_for_tail_call.ll |    15 +
 .../test/Transforms/TailCallElim/nocapture.ll |    25 +
 .../Transforms/TailCallElim/reorder_load.ll   |   101 +
 .../TailCallElim/return_constant.ll           |    17 +
 .../TailCallElim/trivial_codegen_tailcall.ll  |    11 +
 .../TailDup/2003-06-24-Simpleloop.ll          |    15 +
 .../TailDup/2003-07-22-InfiniteLoop.ll        |    11 +
 .../TailDup/2003-08-23-InvalidatedPointers.ll |    29 +
 .../TailDup/2003-08-31-UnreachableBlocks.ll   |    17 +
 .../TailDup/2004-04-01-DemoteRegToStack.ll    |    20 +
 .../TailDup/2008-05-13-InfiniteLoop.ll        |    26 +
 .../TailDup/2008-06-11-AvoidDupLoopHeader.ll  |    27 +
 .../Transforms/TailDup/2009-07-31-phicrash.ll |    14 +
 final/test/Transforms/TailDup/MergeTest.ll    |    27 +
 .../test/Transforms/TailDup/PHIUpdateTest.ll  |    16 +
 final/test/Transforms/TailDup/basictest.ll    |    20 +
 final/test/Transforms/TailDup/basictest2.ll   |    15 +
 final/test/Transforms/TailDup/dg.exp          |     3 +
 final/test/Transforms/TailDup/if-tail-dup.ll  |    49 +
 final/test/Unit/lit.cfg                       |    83 +
 final/test/Unit/lit.site.cfg.in               |    22 +
 final/test/Verifier/2002-04-13-RetTypes.ll    |    10 +
 .../2002-11-05-GetelementptrPointers.ll       |     9 +
 .../2004-05-21-SwitchConstantMismatch.ll      |    13 +
 .../2005-03-21-UndefinedTypeReference.ll      |     7 +
 final/test/Verifier/2006-07-11-StoreStruct.ll |    11 +
 final/test/Verifier/2006-10-15-AddrLabel.ll   |     8 +
 .../Verifier/2006-12-12-IntrinsicDefine.ll    |     7 +
 .../Verifier/2007-12-21-InvokeParamAttrs.ll   |    10 +
 final/test/Verifier/2008-01-11-VarargAttrs.ll |    10 +
 final/test/Verifier/2008-03-01-AllocaSized.ll |     8 +
 .../Verifier/2008-08-22-MemCpyAlignment.ll    |    11 +
 final/test/Verifier/2008-11-15-RetVoid.ll     |     5 +
 .../test/Verifier/2009-05-29-InvokeResult1.ll |    15 +
 .../test/Verifier/2009-05-29-InvokeResult2.ll |    16 +
 .../test/Verifier/2009-05-29-InvokeResult3.ll |    19 +
 .../Verifier/2010-08-07-PointerIntrinsic.ll   |    21 +
 final/test/Verifier/AmbiguousPhi.ll           |    10 +
 final/test/Verifier/PhiGrouping.ll            |    17 +
 final/test/Verifier/README.txt                |     3 +
 final/test/Verifier/SelfReferential.ll        |     9 +
 final/test/Verifier/aliasing-chain.ll         |     6 +
 final/test/Verifier/byval-1.ll                |     2 +
 final/test/Verifier/byval-2.ll                |     4 +
 final/test/Verifier/byval-4.ll                |     4 +
 final/test/Verifier/dg.exp                    |     3 +
 final/test/Verifier/gcread-ptrptr.ll          |    13 +
 final/test/Verifier/gcroot-alloca.ll          |    14 +
 final/test/Verifier/gcroot-meta.ll            |    16 +
 final/test/Verifier/gcroot-ptrptr.ll          |    14 +
 final/test/Verifier/gcwrite-ptrptr.ll         |    13 +
 final/test/Verifier/invoke-1.ll               |    10 +
 final/test/Verifier/invoke-2.ll               |    14 +
 final/test/lib/llvm.exp                       |   313 +
 final/test/lib/llvm2cpp.exp                   |   100 +
 final/test/lit.cfg                            |   308 +
 final/test/lit.site.cfg.in                    |    20 +
 final/test/site.exp.in                        |    28 +
 final/tools/CMakeLists.txt                    |    53 +
 final/tools/Makefile                          |    76 +
 final/tools/bugpoint-passes/CMakeLists.txt    |     5 +
 final/tools/bugpoint-passes/Makefile          |    23 +
 final/tools/bugpoint-passes/TestPasses.cpp    |    75 +
 final/tools/bugpoint-passes/bugpoint.exports  |     0
 final/tools/bugpoint/BugDriver.cpp            |   246 +
 final/tools/bugpoint/BugDriver.h              |   330 +
 final/tools/bugpoint/CMakeLists.txt           |    14 +
 final/tools/bugpoint/CrashDebugger.cpp        |   667 +
 final/tools/bugpoint/ExecutionDriver.cpp      |   515 +
 final/tools/bugpoint/ExtractFunction.cpp      |   369 +
 final/tools/bugpoint/FindBugs.cpp             |   113 +
 final/tools/bugpoint/ListReducer.h            |   201 +
 final/tools/bugpoint/Makefile                 |    16 +
 final/tools/bugpoint/Miscompilation.cpp       |  1082 +
 final/tools/bugpoint/OptimizerDriver.cpp      |   265 +
 final/tools/bugpoint/ToolRunner.cpp           |   969 +
 final/tools/bugpoint/ToolRunner.h             |   247 +
 final/tools/bugpoint/bugpoint.cpp             |   184 +
 final/tools/edis/CMakeLists.txt               |    11 +
 final/tools/edis/EDMain.cpp                   |   284 +
 final/tools/edis/EnhancedDisassembly.exports  |    36 +
 final/tools/edis/Makefile                     |    54 +
 final/tools/gold/Makefile                     |    31 +
 final/tools/gold/README.txt                   |    21 +
 final/tools/gold/gold-plugin.cpp              |   503 +
 final/tools/gold/gold.exports                 |     1 +
 final/tools/llc/CMakeLists.txt                |     5 +
 final/tools/llc/Makefile                      |    21 +
 final/tools/llc/llc.cpp                       |   348 +
 final/tools/lli/CMakeLists.txt                |     5 +
 final/tools/lli/Makefile                      |    15 +
 final/tools/lli/lli.cpp                       |   270 +
 final/tools/llvm-ar/CMakeLists.txt            |     8 +
 final/tools/llvm-ar/Makefile                  |    25 +
 final/tools/llvm-ar/llvm-ar.cpp               |   781 +
 final/tools/llvm-as/CMakeLists.txt            |     6 +
 final/tools/llvm-as/Makefile                  |    17 +
 final/tools/llvm-as/llvm-as.cpp               |   119 +
 final/tools/llvm-bcanalyzer/CMakeLists.txt    |     6 +
 final/tools/llvm-bcanalyzer/Makefile          |    17 +
 .../tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp |   635 +
 final/tools/llvm-config/CMakeLists.txt        |   162 +
 final/tools/llvm-config/Makefile              |   131 +
 final/tools/llvm-config/find-cycles.pl        |   170 +
 final/tools/llvm-config/llvm-config.in.in     |   463 +
 final/tools/llvm-diff/CMakeLists.txt          |     6 +
 final/tools/llvm-diff/DifferenceEngine.cpp    |   676 +
 final/tools/llvm-diff/DifferenceEngine.h      |   179 +
 final/tools/llvm-diff/Makefile                |    17 +
 final/tools/llvm-diff/llvm-diff.cpp           |   314 +
 final/tools/llvm-dis/CMakeLists.txt           |     6 +
 final/tools/llvm-dis/Makefile                 |    17 +
 final/tools/llvm-dis/llvm-dis.cpp             |   141 +
 final/tools/llvm-extract/CMakeLists.txt       |     5 +
 final/tools/llvm-extract/Makefile             |    18 +
 final/tools/llvm-extract/llvm-extract.cpp     |   174 +
 final/tools/llvm-ld/CMakeLists.txt            |     8 +
 final/tools/llvm-ld/Makefile                  |    15 +
 final/tools/llvm-ld/Optimize.cpp              |   128 +
 final/tools/llvm-ld/llvm-ld.cpp               |   732 +
 final/tools/llvm-link/CMakeLists.txt          |     5 +
 final/tools/llvm-link/Makefile                |    17 +
 final/tools/llvm-link/llvm-link.cpp           |   141 +
 final/tools/llvm-mc/CMakeLists.txt            |     6 +
 final/tools/llvm-mc/Disassembler.cpp          |   346 +
 final/tools/llvm-mc/Disassembler.h            |    40 +
 final/tools/llvm-mc/Makefile                  |    24 +
 final/tools/llvm-mc/llvm-mc.cpp               |   451 +
 final/tools/llvm-nm/CMakeLists.txt            |     5 +
 final/tools/llvm-nm/Makefile                  |    17 +
 final/tools/llvm-nm/llvm-nm.cpp               |   361 +
 final/tools/llvm-objdump/CMakeLists.txt       |    11 +
 final/tools/llvm-objdump/Makefile             |    17 +
 final/tools/llvm-objdump/llvm-objdump.cpp     |   255 +
 final/tools/llvm-prof/CMakeLists.txt          |     5 +
 final/tools/llvm-prof/Makefile                |    17 +
 final/tools/llvm-prof/llvm-prof.cpp           |   293 +
 final/tools/llvm-ranlib/CMakeLists.txt        |     6 +
 final/tools/llvm-ranlib/Makefile              |    18 +
 final/tools/llvm-ranlib/llvm-ranlib.cpp       |   101 +
 final/tools/llvm-shlib/Makefile               |   120 +
 final/tools/llvm-stub/CMakeLists.txt          |     3 +
 final/tools/llvm-stub/Makefile                |    13 +
 final/tools/llvm-stub/llvm-stub.c             |    77 +
 final/tools/llvmc/CMakeLists.txt              |     4 +
 final/tools/llvmc/Makefile                    |    18 +
 final/tools/llvmc/doc/LLVMC-Reference.rst     |   842 +
 final/tools/llvmc/doc/LLVMC-Tutorial.rst      |   129 +
 final/tools/llvmc/doc/Makefile                |    33 +
 final/tools/llvmc/doc/img/lines.gif           |   Bin 0 -> 91 bytes
 final/tools/llvmc/examples/Hello/Hello.cpp    |    29 +
 final/tools/llvmc/examples/Hello/Makefile     |    14 +
 final/tools/llvmc/examples/Makefile           |    14 +
 final/tools/llvmc/examples/Simple/Makefile    |    15 +
 final/tools/llvmc/examples/Simple/Simple.cpp  |     2 +
 final/tools/llvmc/examples/Simple/Simple.td   |    41 +
 .../llvmc/examples/Skeleton/AutoGenerated.td  |     7 +
 final/tools/llvmc/examples/Skeleton/Hooks.cpp |    12 +
 final/tools/llvmc/examples/Skeleton/Main.cpp  |    15 +
 final/tools/llvmc/examples/Skeleton/Makefile  |    20 +
 final/tools/llvmc/examples/Skeleton/README    |     6 +
 final/tools/llvmc/examples/mcc16/Hooks.cpp    |   109 +
 final/tools/llvmc/examples/mcc16/Main.cpp     |    57 +
 final/tools/llvmc/examples/mcc16/Makefile     |    15 +
 final/tools/llvmc/examples/mcc16/PIC16.td     |   234 +
 final/tools/llvmc/examples/mcc16/README       |    75 +
 final/tools/llvmc/src/AutoGenerated.td        |    17 +
 final/tools/llvmc/src/Base.td.in              |   461 +
 final/tools/llvmc/src/Clang.td                |    87 +
 final/tools/llvmc/src/Hooks.cpp               |   194 +
 final/tools/llvmc/src/Main.cpp                |    16 +
 final/tools/llvmc/src/Makefile                |    14 +
 final/tools/lto/LTOCodeGenerator.cpp          |   415 +
 final/tools/lto/LTOCodeGenerator.h            |    72 +
 final/tools/lto/LTOModule.cpp                 |   748 +
 final/tools/lto/LTOModule.h                   |   120 +
 final/tools/lto/Makefile                      |    58 +
 final/tools/lto/lto.cpp                       |   291 +
 final/tools/lto/lto.exports                   |    27 +
 final/tools/macho-dump/CMakeLists.txt         |     5 +
 final/tools/macho-dump/Makefile               |    23 +
 final/tools/macho-dump/macho-dump.cpp         |   391 +
 final/tools/opt/AnalysisWrappers.cpp          |    94 +
 final/tools/opt/CMakeLists.txt                |     8 +
 final/tools/opt/GraphPrinters.cpp             |   120 +
 final/tools/opt/Makefile                      |    14 +
 final/tools/opt/PrintSCC.cpp                  |   112 +
 final/tools/opt/opt.cpp                       |   696 +
 final/unittests/ADT/APFloatTest.cpp           |   579 +
 final/unittests/ADT/APIntTest.cpp             |   369 +
 final/unittests/ADT/BitVectorTest.cpp         |   201 +
 final/unittests/ADT/DAGDeltaAlgorithmTest.cpp |   122 +
 final/unittests/ADT/DeltaAlgorithmTest.cpp    |   100 +
 final/unittests/ADT/DenseMapTest.cpp          |   179 +
 final/unittests/ADT/DenseSetTest.cpp          |    30 +
 final/unittests/ADT/FoldingSet.cpp            |    39 +
 final/unittests/ADT/ImmutableSetTest.cpp      |   201 +
 final/unittests/ADT/IntEqClassesTest.cpp      |   107 +
 final/unittests/ADT/IntervalMapTest.cpp       |   716 +
 final/unittests/ADT/Makefile                  |    23 +
 final/unittests/ADT/SmallBitVectorTest.cpp    |   196 +
 final/unittests/ADT/SmallStringTest.cpp       |    48 +
 final/unittests/ADT/SmallVectorTest.cpp       |   410 +
 final/unittests/ADT/SparseBitVectorTest.cpp   |    36 +
 final/unittests/ADT/StringMapTest.cpp         |   207 +
 final/unittests/ADT/StringRefTest.cpp         |   282 +
 final/unittests/ADT/TripleTest.cpp            |   270 +
 final/unittests/ADT/TwineTest.cpp             |    83 +
 final/unittests/ADT/ilistTest.cpp             |    44 +
 final/unittests/Analysis/Makefile             |    15 +
 .../Analysis/ScalarEvolutionTest.cpp          |    82 +
 final/unittests/CMakeLists.txt                |   144 +
 .../ExecutionEngine/ExecutionEngineTest.cpp   |   129 +
 .../JIT/JITEventListenerTest.cpp              |   238 +
 .../JIT/JITMemoryManagerTest.cpp              |   279 +
 .../unittests/ExecutionEngine/JIT/JITTest.cpp |   779 +
 .../ExecutionEngine/JIT/JITTests.def          |     4 +
 final/unittests/ExecutionEngine/JIT/Makefile  |    18 +
 .../ExecutionEngine/JIT/MultiJITTest.cpp      |   164 +
 final/unittests/ExecutionEngine/Makefile      |    18 +
 final/unittests/Makefile                      |    17 +
 final/unittests/Makefile.unittest             |    62 +
 final/unittests/Support/AllocatorTest.cpp     |   143 +
 final/unittests/Support/Casting.cpp           |   154 +
 final/unittests/Support/CommandLineTest.cpp   |    60 +
 final/unittests/Support/ConstantRangeTest.cpp |   440 +
 final/unittests/Support/EndianTest.cpp        |    72 +
 final/unittests/Support/LeakDetectorTest.cpp  |    31 +
 final/unittests/Support/Makefile              |    15 +
 final/unittests/Support/MathExtrasTest.cpp    |   104 +
 final/unittests/Support/Path.cpp              |   253 +
 final/unittests/Support/RegexTest.cpp         |    94 +
 final/unittests/Support/SwapByteOrderTest.cpp |   128 +
 final/unittests/Support/TimeValue.cpp         |    23 +
 final/unittests/Support/TypeBuilderTest.cpp   |   253 +
 final/unittests/Support/ValueHandleTest.cpp   |   411 +
 final/unittests/Support/raw_ostream_test.cpp  |   146 +
 final/unittests/Transforms/Makefile           |    17 +
 final/unittests/Transforms/Utils/Cloning.cpp  |   141 +
 final/unittests/Transforms/Utils/Local.cpp    |    60 +
 final/unittests/Transforms/Utils/Makefile     |    15 +
 final/unittests/VMCore/ConstantsTest.cpp      |   122 +
 final/unittests/VMCore/DerivedTypesTest.cpp   |    88 +
 final/unittests/VMCore/InstructionsTest.cpp   |   111 +
 final/unittests/VMCore/Makefile               |    15 +
 final/unittests/VMCore/MetadataTest.cpp       |   145 +
 final/unittests/VMCore/PassManagerTest.cpp    |   548 +
 final/unittests/VMCore/ValueMapTest.cpp       |   294 +
 final/unittests/VMCore/VerifierTest.cpp       |    64 +
 final/utils/CollectDebugInfoUsingLLDB.py      |   182 +
 final/utils/CompareDebugInfo.py               |   182 +
 final/utils/DSAclean.py                       |    32 +
 final/utils/DSAextract.py                     |   111 +
 final/utils/FileCheck/CMakeLists.txt          |    11 +
 final/utils/FileCheck/FileCheck.cpp           |   757 +
 final/utils/FileCheck/Makefile                |    21 +
 final/utils/FileUpdate/CMakeLists.txt         |    11 +
 final/utils/FileUpdate/FileUpdate.cpp         |    87 +
 final/utils/FileUpdate/Makefile               |    21 +
 final/utils/GenLibDeps.pl                     |   386 +
 final/utils/GetRepositoryPath                 |    27 +
 final/utils/GetSourceVersion                  |    27 +
 final/utils/KillTheDoctor/CMakeLists.txt      |     5 +
 final/utils/KillTheDoctor/KillTheDoctor.cpp   |   596 +
 final/utils/Makefile                          |    22 +
 final/utils/Misc/zkill                        |   276 +
 final/utils/NLT.schema                        |     8 +
 final/utils/NewNightlyTest.pl                 |   836 +
 final/utils/NightlyTest.gnuplot               |   214 +
 final/utils/NightlyTestTemplate.html          |   244 +
 final/utils/PerfectShuffle/Makefile           |    18 +
 final/utils/PerfectShuffle/PerfectShuffle.cpp |   572 +
 final/utils/TableGen/ARMDecoderEmitter.cpp    |  1857 ++
 final/utils/TableGen/ARMDecoderEmitter.h      |    50 +
 final/utils/TableGen/AsmMatcherEmitter.cpp    |  2371 ++
 final/utils/TableGen/AsmMatcherEmitter.h      |    33 +
 final/utils/TableGen/AsmWriterEmitter.cpp     |   803 +
 final/utils/TableGen/AsmWriterEmitter.h       |    55 +
 final/utils/TableGen/AsmWriterInst.cpp        |   233 +
 final/utils/TableGen/AsmWriterInst.h          |   113 +
 final/utils/TableGen/CMakeLists.txt           |    56 +
 final/utils/TableGen/CallingConvEmitter.cpp   |   212 +
 final/utils/TableGen/CallingConvEmitter.h     |    38 +
 final/utils/TableGen/ClangASTNodesEmitter.cpp |   165 +
 final/utils/TableGen/ClangASTNodesEmitter.h   |    84 +
 final/utils/TableGen/ClangAttrEmitter.cpp     |   708 +
 final/utils/TableGen/ClangAttrEmitter.h       |   101 +
 .../TableGen/ClangDiagnosticsEmitter.cpp      |   296 +
 .../utils/TableGen/ClangDiagnosticsEmitter.h  |    46 +
 .../utils/TableGen/ClangSACheckersEmitter.cpp |   230 +
 final/utils/TableGen/ClangSACheckersEmitter.h |    31 +
 final/utils/TableGen/CodeEmitterGen.cpp       |   295 +
 final/utils/TableGen/CodeEmitterGen.h         |    49 +
 final/utils/TableGen/CodeGenDAGPatterns.cpp   |  3179 ++
 final/utils/TableGen/CodeGenDAGPatterns.h     |   758 +
 final/utils/TableGen/CodeGenInstruction.cpp   |   537 +
 final/utils/TableGen/CodeGenInstruction.h     |   316 +
 final/utils/TableGen/CodeGenIntrinsics.h      |    87 +
 final/utils/TableGen/CodeGenRegisters.h       |   101 +
 final/utils/TableGen/CodeGenTarget.cpp        |   582 +
 final/utils/TableGen/CodeGenTarget.h          |   269 +
 final/utils/TableGen/DAGISelEmitter.cpp       |   159 +
 final/utils/TableGen/DAGISelEmitter.h         |    38 +
 final/utils/TableGen/DAGISelMatcher.cpp       |   401 +
 final/utils/TableGen/DAGISelMatcher.h         |  1114 +
 .../utils/TableGen/DAGISelMatcherEmitter.cpp  |   827 +
 final/utils/TableGen/DAGISelMatcherGen.cpp    |   909 +
 final/utils/TableGen/DAGISelMatcherOpt.cpp    |   514 +
 final/utils/TableGen/DisassemblerEmitter.cpp  |   138 +
 final/utils/TableGen/DisassemblerEmitter.h    |    28 +
 final/utils/TableGen/EDEmitter.cpp            |   920 +
 final/utils/TableGen/EDEmitter.h              |    34 +
 final/utils/TableGen/FastISelEmitter.cpp      |   653 +
 final/utils/TableGen/FastISelEmitter.h        |    39 +
 .../utils/TableGen/FixedLenDecoderEmitter.cpp |  1372 +
 final/utils/TableGen/FixedLenDecoderEmitter.h |    56 +
 final/utils/TableGen/InstrEnumEmitter.cpp     |    48 +
 final/utils/TableGen/InstrEnumEmitter.h       |    33 +
 final/utils/TableGen/InstrInfoEmitter.cpp     |   336 +
 final/utils/TableGen/InstrInfoEmitter.h       |    66 +
 final/utils/TableGen/IntrinsicEmitter.cpp     |   679 +
 final/utils/TableGen/IntrinsicEmitter.h       |    63 +
 .../TableGen/LLVMCConfigurationEmitter.cpp    |  3105 ++
 .../TableGen/LLVMCConfigurationEmitter.h      |    34 +
 final/utils/TableGen/Makefile                 |    20 +
 final/utils/TableGen/NeonEmitter.cpp          |  1522 +
 final/utils/TableGen/NeonEmitter.h            |   180 +
 final/utils/TableGen/OptParserEmitter.cpp     |   194 +
 final/utils/TableGen/OptParserEmitter.h       |    34 +
 final/utils/TableGen/Record.cpp               |  1564 +
 final/utils/TableGen/Record.h                 |  1507 +
 final/utils/TableGen/RegisterInfoEmitter.cpp  |  1009 +
 final/utils/TableGen/RegisterInfoEmitter.h    |    40 +
 final/utils/TableGen/StringMatcher.cpp        |   149 +
 final/utils/TableGen/StringMatcher.h          |    54 +
 final/utils/TableGen/StringToOffsetTable.h    |    81 +
 final/utils/TableGen/SubtargetEmitter.cpp     |   663 +
 final/utils/TableGen/SubtargetEmitter.h       |    70 +
 final/utils/TableGen/TGLexer.cpp              |   442 +
 final/utils/TableGen/TGLexer.h                |   122 +
 final/utils/TableGen/TGParser.cpp             |  2151 ++
 final/utils/TableGen/TGParser.h               |   118 +
 final/utils/TableGen/TGValueTypes.cpp         |   106 +
 final/utils/TableGen/TableGen.cpp             |   377 +
 final/utils/TableGen/TableGenBackend.cpp      |    25 +
 final/utils/TableGen/TableGenBackend.h        |    43 +
 final/utils/TableGen/X86DisassemblerShared.h  |    38 +
 .../utils/TableGen/X86DisassemblerTables.cpp  |   601 +
 final/utils/TableGen/X86DisassemblerTables.h  |   291 +
 final/utils/TableGen/X86ModRMFilters.h        |   197 +
 final/utils/TableGen/X86RecognizableInstr.cpp |  1006 +
 final/utils/TableGen/X86RecognizableInstr.h   |   240 +
 final/utils/Target/ARM/analyze-match-table.py |    61 +
 final/utils/UpdateCMakeLists.pl               |   119 +
 final/utils/bugpoint/RemoteRunSafely.sh       |   105 +
 final/utils/buildit/GNUmakefile               |   135 +
 final/utils/buildit/build_llvm                |   375 +
 final/utils/cgiplotNLT.pl                     |    68 +
 final/utils/check-each-file                   |   150 +
 final/utils/codegen-diff                      |   135 +
 final/utils/count/CMakeLists.txt              |     3 +
 final/utils/count/Makefile                    |    20 +
 final/utils/count/count.c                     |    50 +
 final/utils/countloc.sh                       |    40 +
 final/utils/crosstool/ARM/README              |    37 +
 .../crosstool/ARM/build-install-linux.sh      |   200 +
 final/utils/crosstool/create-snapshots.sh     |    41 +
 final/utils/emacs/README                      |    27 +
 final/utils/emacs/emacs.el                    |    39 +
 final/utils/emacs/llvm-mode.el                |   133 +
 final/utils/emacs/tablegen-mode.el            |   122 +
 final/utils/findmisopt                        |   178 +
 final/utils/findoptdiff                       |   101 +
 final/utils/findsym.pl                        |    33 +
 final/utils/fpcmp/Makefile                    |    16 +
 final/utils/fpcmp/fpcmp.cpp                   |    43 +
 final/utils/getsrcs.sh                        |    34 +
 final/utils/git/find-rev                      |    50 +
 final/utils/importNLT.pl                      |    86 +
 final/utils/jedit/README                      |    14 +
 final/utils/jedit/tablegen.xml                |    39 +
 final/utils/kate/README                       |    12 +
 final/utils/kate/llvm.xml                     |   255 +
 final/utils/lint/common_lint.py               |    97 +
 final/utils/lint/cpp_lint.py                  |    94 +
 final/utils/lint/generic_lint.py              |    24 +
 .../utils/lint/remove_trailing_whitespace.sh  |     6 +
 final/utils/lit/TODO                          |     9 +
 final/utils/lit/lit.py                        |     5 +
 .../lit/lit/ExampleTests.ObjDir/lit.site.cfg  |    15 +
 .../lit/lit/ExampleTests/Clang/fsyntax-only.c |     4 +
 .../utils/lit/lit/ExampleTests/Clang/lit.cfg  |    47 +
 .../LLVM.InTree/test/Bar/bar-test.ll          |     3 +
 .../ExampleTests/LLVM.InTree/test/Bar/dg.exp  |     6 +
 .../lit/ExampleTests/LLVM.InTree/test/lit.cfg |   151 +
 .../LLVM.InTree/test/lit.site.cfg             |    10 +
 .../ExampleTests/LLVM.InTree/test/site.exp    |    28 +
 .../ExampleTests/LLVM.OutOfTree/lit.local.cfg |     1 +
 .../LLVM.OutOfTree/obj/test/Foo/lit.local.cfg |     0
 .../LLVM.OutOfTree/obj/test/lit.site.cfg      |    11 +
 .../LLVM.OutOfTree/obj/test/site.exp          |    28 +
 .../LLVM.OutOfTree/src/test/Foo/data.txt      |     1 +
 .../LLVM.OutOfTree/src/test/Foo/dg.exp        |     6 +
 .../LLVM.OutOfTree/src/test/Foo/pct-S.ll      |     1 +
 .../LLVM.OutOfTree/src/test/lit.cfg           |   151 +
 .../lit/ExampleTests/ShExternal/lit.local.cfg |     6 +
 .../lit/ExampleTests/ShInternal/lit.local.cfg |     6 +
 .../lit/ExampleTests/TclTest/lit.local.cfg    |     5 +
 .../lit/ExampleTests/TclTest/stderr-pipe.ll   |     1 +
 .../lit/ExampleTests/TclTest/tcl-redir-1.ll   |     7 +
 final/utils/lit/lit/ExampleTests/fail.c       |     2 +
 final/utils/lit/lit/ExampleTests/lit.cfg      |    26 +
 final/utils/lit/lit/ExampleTests/pass.c       |     1 +
 .../lit/ExampleTests/required-and-missing.c   |     4 +
 .../lit/ExampleTests/required-and-present.c   |     2 +
 final/utils/lit/lit/ExampleTests/xfail.c      |     2 +
 final/utils/lit/lit/ExampleTests/xpass.c      |     2 +
 final/utils/lit/lit/LitConfig.py              |   131 +
 final/utils/lit/lit/LitFormats.py             |     3 +
 final/utils/lit/lit/LitTestCase.py            |    30 +
 final/utils/lit/lit/ProgressBar.py            |   267 +
 final/utils/lit/lit/ShCommands.py             |    85 +
 final/utils/lit/lit/ShUtil.py                 |   353 +
 final/utils/lit/lit/TclUtil.py                |   322 +
 final/utils/lit/lit/Test.py                   |    79 +
 final/utils/lit/lit/TestFormats.py            |   228 +
 final/utils/lit/lit/TestRunner.py             |   600 +
 final/utils/lit/lit/TestingConfig.py          |   103 +
 final/utils/lit/lit/Util.py                   |   141 +
 final/utils/lit/lit/__init__.py               |    10 +
 final/utils/lit/lit/main.py                   |   648 +
 final/utils/lit/setup.py                      |    70 +
 final/utils/llvm-lit/CMakeLists.txt           |    12 +
 final/utils/llvm-lit/Makefile                 |    22 +
 final/utils/llvm-lit/llvm-lit.in              |    27 +
 final/utils/llvm-native-gcc                   |   249 +
 final/utils/llvm-native-gxx                   |   249 +
 final/utils/llvm.grm                          |   418 +
 final/utils/llvmbuild                         |   749 +
 final/utils/llvmdo                            |   184 +
 final/utils/llvmgrep                          |    39 +
 final/utils/makellvm                          |   145 +
 final/utils/not/CMakeLists.txt                |    11 +
 final/utils/not/Makefile                      |    21 +
 final/utils/not/not.cpp                       |    27 +
 final/utils/parseNLT.pl                       |    34 +
 final/utils/plotNLT.pl                        |    53 +
 final/utils/profile.pl                        |    74 +
 final/utils/release/test-release.sh           |   381 +
 final/utils/test_debuginfo.pl                 |    61 +
 final/utils/unittest/CMakeLists.txt           |    41 +
 final/utils/unittest/Makefile                 |    13 +
 final/utils/unittest/UnitTestMain/Makefile    |    32 +
 .../utils/unittest/UnitTestMain/TestMain.cpp  |    42 +
 final/utils/unittest/googletest/LICENSE.TXT   |    28 +
 final/utils/unittest/googletest/Makefile      |    39 +
 final/utils/unittest/googletest/README.LLVM   |    31 +
 .../unittest/googletest/gtest-death-test.cc   |  1172 +
 .../unittest/googletest/gtest-filepath.cc     |   380 +
 final/utils/unittest/googletest/gtest-port.cc |   711 +
 .../unittest/googletest/gtest-test-part.cc    |   110 +
 .../unittest/googletest/gtest-typed-test.cc   |   110 +
 final/utils/unittest/googletest/gtest.cc      |  4675 +++
 .../include/gtest/gtest-death-test.h          |   283 +
 .../googletest/include/gtest/gtest-message.h  |   230 +
 .../include/gtest/gtest-param-test.h          |  1397 +
 .../googletest/include/gtest/gtest-spi.h      |   232 +
 .../include/gtest/gtest-test-part.h           |   176 +
 .../include/gtest/gtest-typed-test.h          |   259 +
 .../unittest/googletest/include/gtest/gtest.h |  2054 ++
 .../include/gtest/gtest_pred_impl.h           |   368 +
 .../googletest/include/gtest/gtest_prod.h     |    58 +
 .../internal/gtest-death-test-internal.h      |   275 +
 .../include/gtest/internal/gtest-filepath.h   |   210 +
 .../gtest/internal/gtest-internal-inl.h       |  1074 +
 .../include/gtest/internal/gtest-internal.h   |   943 +
 .../include/gtest/internal/gtest-linked_ptr.h |   243 +
 .../internal/gtest-param-util-generated.h     |  4820 +++
 .../include/gtest/internal/gtest-param-util.h |   619 +
 .../include/gtest/internal/gtest-port.h       |  1502 +
 .../include/gtest/internal/gtest-string.h     |   350 +
 .../include/gtest/internal/gtest-tuple.h      |   968 +
 .../include/gtest/internal/gtest-type-util.h  |  3321 +++
 final/utils/valgrind/i386-pc-linux-gnu.supp   |    41 +
 final/utils/valgrind/x86_64-pc-linux-gnu.supp |    46 +
 final/utils/vim/README                        |    43 +
 final/utils/vim/llvm.vim                      |   110 +
 final/utils/vim/tablegen.vim                  |    54 +
 final/utils/vim/vimrc                         |   221 +
 final/utils/webNLT.pl                         |    83 +
 final/website/index.html                      |    26 +
 8270 files changed, 1242821 insertions(+)
 create mode 100644 final/.gitignore
 create mode 100644 final/CMakeLists.txt
 create mode 100644 final/CREDITS.TXT
 create mode 100644 final/LICENSE.TXT
 create mode 100644 final/Makefile
 create mode 100644 final/Makefile.common
 create mode 100644 final/Makefile.config.in
 create mode 100644 final/Makefile.rules
 create mode 100644 final/ModuleInfo.txt
 create mode 100644 final/README.txt
 create mode 100755 final/autoconf/AutoRegen.sh
 create mode 100644 final/autoconf/ExportMap.map
 create mode 100644 final/autoconf/LICENSE.TXT
 create mode 100644 final/autoconf/README.TXT
 create mode 100755 final/autoconf/config.guess
 create mode 100755 final/autoconf/config.sub
 create mode 100644 final/autoconf/configure.ac
 create mode 100755 final/autoconf/depcomp
 create mode 100755 final/autoconf/install-sh
 create mode 100644 final/autoconf/ltmain.sh
 create mode 100644 final/autoconf/m4/build_exeext.m4
 create mode 100644 final/autoconf/m4/c_printf_a.m4
 create mode 100644 final/autoconf/m4/check_gnu_make.m4
 create mode 100644 final/autoconf/m4/config_makefile.m4
 create mode 100644 final/autoconf/m4/config_project.m4
 create mode 100644 final/autoconf/m4/cxx_flag_check.m4
 create mode 100644 final/autoconf/m4/find_std_program.m4
 create mode 100644 final/autoconf/m4/func_isinf.m4
 create mode 100644 final/autoconf/m4/func_isnan.m4
 create mode 100644 final/autoconf/m4/func_mmap_file.m4
 create mode 100644 final/autoconf/m4/header_mmap_anonymous.m4
 create mode 100644 final/autoconf/m4/huge_val.m4
 create mode 100644 final/autoconf/m4/libtool.m4
 create mode 100644 final/autoconf/m4/link_options.m4
 create mode 100644 final/autoconf/m4/linux_mixed_64_32.m4
 create mode 100644 final/autoconf/m4/ltdl.m4
 create mode 100644 final/autoconf/m4/need_dev_zero_for_mmap.m4
 create mode 100644 final/autoconf/m4/path_perl.m4
 create mode 100644 final/autoconf/m4/path_tclsh.m4
 create mode 100644 final/autoconf/m4/rand48.m4
 create mode 100644 final/autoconf/m4/sanity_check.m4
 create mode 100644 final/autoconf/m4/single_cxx_check.m4
 create mode 100644 final/autoconf/m4/visibility_inlines_hidden.m4
 create mode 100755 final/autoconf/missing
 create mode 100755 final/autoconf/mkinstalldirs
 create mode 100644 final/bindings/Makefile
 create mode 100644 final/bindings/README.txt
 create mode 100644 final/bindings/ocaml/Makefile
 create mode 100644 final/bindings/ocaml/Makefile.ocaml
 create mode 100644 final/bindings/ocaml/analysis/Makefile
 create mode 100644 final/bindings/ocaml/analysis/analysis_ocaml.c
 create mode 100644 final/bindings/ocaml/analysis/llvm_analysis.ml
 create mode 100644 final/bindings/ocaml/analysis/llvm_analysis.mli
 create mode 100644 final/bindings/ocaml/bitreader/Makefile
 create mode 100644 final/bindings/ocaml/bitreader/bitreader_ocaml.c
 create mode 100644 final/bindings/ocaml/bitreader/llvm_bitreader.ml
 create mode 100644 final/bindings/ocaml/bitreader/llvm_bitreader.mli
 create mode 100644 final/bindings/ocaml/bitwriter/Makefile
 create mode 100644 final/bindings/ocaml/bitwriter/bitwriter_ocaml.c
 create mode 100644 final/bindings/ocaml/bitwriter/llvm_bitwriter.ml
 create mode 100644 final/bindings/ocaml/bitwriter/llvm_bitwriter.mli
 create mode 100644 final/bindings/ocaml/executionengine/Makefile
 create mode 100644 final/bindings/ocaml/executionengine/executionengine_ocaml.c
 create mode 100644 final/bindings/ocaml/executionengine/llvm_executionengine.ml
 create mode 100644 final/bindings/ocaml/executionengine/llvm_executionengine.mli
 create mode 100644 final/bindings/ocaml/llvm/Makefile
 create mode 100644 final/bindings/ocaml/llvm/llvm.ml
 create mode 100644 final/bindings/ocaml/llvm/llvm.mli
 create mode 100644 final/bindings/ocaml/llvm/llvm_ocaml.c
 create mode 100644 final/bindings/ocaml/target/Makefile
 create mode 100644 final/bindings/ocaml/target/llvm_target.ml
 create mode 100644 final/bindings/ocaml/target/llvm_target.mli
 create mode 100644 final/bindings/ocaml/target/target_ocaml.c
 create mode 100644 final/bindings/ocaml/transforms/Makefile
 create mode 100644 final/bindings/ocaml/transforms/scalar/Makefile
 create mode 100644 final/bindings/ocaml/transforms/scalar/llvm_scalar_opts.ml
 create mode 100644 final/bindings/ocaml/transforms/scalar/llvm_scalar_opts.mli
 create mode 100644 final/bindings/ocaml/transforms/scalar/scalar_opts_ocaml.c
 create mode 100755 final/build-for-llvm-top.sh
 create mode 100644 final/cmake/README
 create mode 100755 final/cmake/config-ix.cmake
 create mode 100755 final/cmake/modules/AddLLVM.cmake
 create mode 100644 final/cmake/modules/AddLLVMDefinitions.cmake
 create mode 100644 final/cmake/modules/CMakeLists.txt
 create mode 100644 final/cmake/modules/CheckAtomic.cmake
 create mode 100644 final/cmake/modules/ChooseMSVCCRT.cmake
 create mode 100644 final/cmake/modules/CrossCompileLLVM.cmake
 create mode 100755 final/cmake/modules/FindBison.cmake
 create mode 100644 final/cmake/modules/GetTargetTriple.cmake
 create mode 100644 final/cmake/modules/HandleLLVMOptions.cmake
 create mode 100644 final/cmake/modules/LLVM.cmake
 create mode 100755 final/cmake/modules/LLVMConfig.cmake
 create mode 100644 final/cmake/modules/LLVMLibDeps.cmake
 create mode 100644 final/cmake/modules/LLVMParseArguments.cmake
 create mode 100644 final/cmake/modules/LLVMProcessSources.cmake
 create mode 100644 final/cmake/modules/TableGen.cmake
 create mode 100644 final/cmake/modules/VersionFromVCS.cmake
 create mode 100755 final/configure
 create mode 100644 final/docs/AliasAnalysis.html
 create mode 100644 final/docs/BitCodeFormat.html
 create mode 100644 final/docs/Bugpoint.html
 create mode 100644 final/docs/CFEBuildInstrs.html
 create mode 100644 final/docs/CMake.html
 create mode 100644 final/docs/CodeGenerator.html
 create mode 100644 final/docs/CodingStandards.html
 create mode 100644 final/docs/CommandGuide/FileCheck.pod
 create mode 100644 final/docs/CommandGuide/Makefile
 create mode 100644 final/docs/CommandGuide/bugpoint.pod
 create mode 100644 final/docs/CommandGuide/html/manpage.css
 create mode 100644 final/docs/CommandGuide/index.html
 create mode 100644 final/docs/CommandGuide/lit.pod
 create mode 100644 final/docs/CommandGuide/llc.pod
 create mode 100644 final/docs/CommandGuide/lli.pod
 create mode 100644 final/docs/CommandGuide/llvm-ar.pod
 create mode 100644 final/docs/CommandGuide/llvm-as.pod
 create mode 100644 final/docs/CommandGuide/llvm-bcanalyzer.pod
 create mode 100644 final/docs/CommandGuide/llvm-config.pod
 create mode 100644 final/docs/CommandGuide/llvm-diff.pod
 create mode 100644 final/docs/CommandGuide/llvm-dis.pod
 create mode 100644 final/docs/CommandGuide/llvm-extract.pod
 create mode 100644 final/docs/CommandGuide/llvm-ld.pod
 create mode 100644 final/docs/CommandGuide/llvm-link.pod
 create mode 100644 final/docs/CommandGuide/llvm-nm.pod
 create mode 100644 final/docs/CommandGuide/llvm-prof.pod
 create mode 100644 final/docs/CommandGuide/llvm-ranlib.pod
 create mode 100644 final/docs/CommandGuide/llvmc.pod
 create mode 100644 final/docs/CommandGuide/llvmgcc.pod
 create mode 100644 final/docs/CommandGuide/llvmgxx.pod
 create mode 100644 final/docs/CommandGuide/manpage.css
 create mode 100644 final/docs/CommandGuide/opt.pod
 create mode 100644 final/docs/CommandGuide/tblgen.pod
 create mode 100644 final/docs/CommandLine.html
 create mode 100644 final/docs/CompilerDriver.html
 create mode 100644 final/docs/CompilerDriverTutorial.html
 create mode 100644 final/docs/CompilerWriterInfo.html
 create mode 100644 final/docs/DebuggingJITedCode.html
 create mode 100644 final/docs/DeveloperPolicy.html
 create mode 100644 final/docs/ExceptionHandling.html
 create mode 100644 final/docs/ExtendedIntegerResults.txt
 create mode 100644 final/docs/ExtendingLLVM.html
 create mode 100644 final/docs/FAQ.html
 create mode 100644 final/docs/GCCFEBuildInstrs.html
 create mode 100644 final/docs/GarbageCollection.html
 create mode 100644 final/docs/GetElementPtr.html
 create mode 100644 final/docs/GettingStarted.html
 create mode 100644 final/docs/GettingStartedVS.html
 create mode 100644 final/docs/GoldPlugin.html
 create mode 100644 final/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeas.txt
 create mode 100644 final/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt
 create mode 100644 final/docs/HistoricalNotes/2000-12-06-EncodingIdea.txt
 create mode 100644 final/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt
 create mode 100644 final/docs/HistoricalNotes/2001-01-31-UniversalIRIdea.txt
 create mode 100644 final/docs/HistoricalNotes/2001-02-06-TypeNotationDebate.txt
 create mode 100644 final/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp1.txt
 create mode 100644 final/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp2.txt
 create mode 100644 final/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt
 create mode 100644 final/docs/HistoricalNotes/2001-02-09-AdveComments.txt
 create mode 100644 final/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt
 create mode 100644 final/docs/HistoricalNotes/2001-02-13-Reference-Memory.txt
 create mode 100644 final/docs/HistoricalNotes/2001-02-13-Reference-MemoryResponse.txt
 create mode 100644 final/docs/HistoricalNotes/2001-04-16-DynamicCompilation.txt
 create mode 100644 final/docs/HistoricalNotes/2001-05-18-ExceptionHandling.txt
 create mode 100644 final/docs/HistoricalNotes/2001-05-19-ExceptionResponse.txt
 create mode 100644 final/docs/HistoricalNotes/2001-06-01-GCCOptimizations.txt
 create mode 100644 final/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt
 create mode 100644 final/docs/HistoricalNotes/2001-06-20-.NET-Differences.txt
 create mode 100644 final/docs/HistoricalNotes/2001-07-06-LoweringIRForCodeGen.txt
 create mode 100644 final/docs/HistoricalNotes/2001-09-18-OptimizeExceptions.txt
 create mode 100644 final/docs/HistoricalNotes/2002-05-12-InstListChange.txt
 create mode 100644 final/docs/HistoricalNotes/2002-06-25-MegaPatchInfo.txt
 create mode 100644 final/docs/HistoricalNotes/2003-01-23-CygwinNotes.txt
 create mode 100644 final/docs/HistoricalNotes/2003-06-25-Reoptimizer1.txt
 create mode 100644 final/docs/HistoricalNotes/2003-06-26-Reoptimizer2.txt
 create mode 100644 final/docs/HistoricalNotes/2007-OriginalClangReadme.txt
 create mode 100644 final/docs/HowToReleaseLLVM.html
 create mode 100644 final/docs/HowToSubmitABug.html
 create mode 100644 final/docs/LangRef.html
 create mode 100644 final/docs/Lexicon.html
 create mode 100644 final/docs/LinkTimeOptimization.html
 create mode 100644 final/docs/Makefile
 create mode 100644 final/docs/MakefileGuide.html
 create mode 100644 final/docs/Packaging.html
 create mode 100644 final/docs/Passes.html
 create mode 100644 final/docs/ProgrammersManual.html
 create mode 100644 final/docs/Projects.html
 create mode 100644 final/docs/ReleaseNotes.html
 create mode 100644 final/docs/SourceLevelDebugging.html
 create mode 100644 final/docs/SystemLibrary.html
 create mode 100644 final/docs/TableGenFundamentals.html
 create mode 100644 final/docs/TestingGuide.html
 create mode 100644 final/docs/UsingLibraries.html
 create mode 100644 final/docs/WritingAnLLVMBackend.html
 create mode 100644 final/docs/WritingAnLLVMPass.html
 create mode 100644 final/docs/doxygen.cfg.in
 create mode 100644 final/docs/doxygen.css
 create mode 100644 final/docs/doxygen.footer
 create mode 100644 final/docs/doxygen.header
 create mode 100644 final/docs/doxygen.intro
 create mode 100644 final/docs/img/Debugging.gif
 create mode 100644 final/docs/img/libdeps.gif
 create mode 100644 final/docs/img/lines.gif
 create mode 100644 final/docs/img/objdeps.gif
 create mode 100644 final/docs/img/venusflytrap.jpg
 create mode 100644 final/docs/index.html
 create mode 100644 final/docs/llvm.css
 create mode 100644 final/docs/re_format.7
 create mode 100644 final/docs/tutorial/LangImpl1.html
 create mode 100644 final/docs/tutorial/LangImpl2.html
 create mode 100644 final/docs/tutorial/LangImpl3.html
 create mode 100644 final/docs/tutorial/LangImpl4.html
 create mode 100644 final/docs/tutorial/LangImpl5-cfg.png
 create mode 100644 final/docs/tutorial/LangImpl5.html
 create mode 100644 final/docs/tutorial/LangImpl6.html
 create mode 100644 final/docs/tutorial/LangImpl7.html
 create mode 100644 final/docs/tutorial/LangImpl8.html
 create mode 100644 final/docs/tutorial/Makefile
 create mode 100644 final/docs/tutorial/OCamlLangImpl1.html
 create mode 100644 final/docs/tutorial/OCamlLangImpl2.html
 create mode 100644 final/docs/tutorial/OCamlLangImpl3.html
 create mode 100644 final/docs/tutorial/OCamlLangImpl4.html
 create mode 100644 final/docs/tutorial/OCamlLangImpl5.html
 create mode 100644 final/docs/tutorial/OCamlLangImpl6.html
 create mode 100644 final/docs/tutorial/OCamlLangImpl7.html
 create mode 100644 final/docs/tutorial/OCamlLangImpl8.html
 create mode 100644 final/docs/tutorial/index.html
 create mode 100644 final/examples/BrainF/BrainF.cpp
 create mode 100644 final/examples/BrainF/BrainF.h
 create mode 100644 final/examples/BrainF/BrainFDriver.cpp
 create mode 100644 final/examples/BrainF/CMakeLists.txt
 create mode 100644 final/examples/BrainF/Makefile
 create mode 100644 final/examples/CMakeLists.txt
 create mode 100644 final/examples/ExceptionDemo/CMakeLists.txt
 create mode 100644 final/examples/ExceptionDemo/ExceptionDemo.cpp
 create mode 100644 final/examples/ExceptionDemo/Makefile
 create mode 100644 final/examples/Fibonacci/CMakeLists.txt
 create mode 100644 final/examples/Fibonacci/Makefile
 create mode 100644 final/examples/Fibonacci/fibonacci.cpp
 create mode 100644 final/examples/HowToUseJIT/CMakeLists.txt
 create mode 100644 final/examples/HowToUseJIT/HowToUseJIT.cpp
 create mode 100644 final/examples/HowToUseJIT/Makefile
 create mode 100644 final/examples/Kaleidoscope/CMakeLists.txt
 create mode 100644 final/examples/Kaleidoscope/Chapter2/CMakeLists.txt
 create mode 100644 final/examples/Kaleidoscope/Chapter2/Makefile
 create mode 100644 final/examples/Kaleidoscope/Chapter2/toy.cpp
 create mode 100644 final/examples/Kaleidoscope/Chapter3/CMakeLists.txt
 create mode 100644 final/examples/Kaleidoscope/Chapter3/Makefile
 create mode 100644 final/examples/Kaleidoscope/Chapter3/toy.cpp
 create mode 100644 final/examples/Kaleidoscope/Chapter4/CMakeLists.txt
 create mode 100644 final/examples/Kaleidoscope/Chapter4/Makefile
 create mode 100644 final/examples/Kaleidoscope/Chapter4/toy.cpp
 create mode 100644 final/examples/Kaleidoscope/Chapter5/CMakeLists.txt
 create mode 100644 final/examples/Kaleidoscope/Chapter5/Makefile
 create mode 100644 final/examples/Kaleidoscope/Chapter5/toy.cpp
 create mode 100644 final/examples/Kaleidoscope/Chapter6/CMakeLists.txt
 create mode 100644 final/examples/Kaleidoscope/Chapter6/Makefile
 create mode 100644 final/examples/Kaleidoscope/Chapter6/toy.cpp
 create mode 100644 final/examples/Kaleidoscope/Chapter7/CMakeLists.txt
 create mode 100644 final/examples/Kaleidoscope/Chapter7/Makefile
 create mode 100644 final/examples/Kaleidoscope/Chapter7/toy.cpp
 create mode 100644 final/examples/Kaleidoscope/Makefile
 create mode 100644 final/examples/Makefile
 create mode 100644 final/examples/ModuleMaker/CMakeLists.txt
 create mode 100644 final/examples/ModuleMaker/Makefile
 create mode 100644 final/examples/ModuleMaker/ModuleMaker.cpp
 create mode 100644 final/examples/ModuleMaker/README.txt
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter2/Makefile
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter2/_tags
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter2/ast.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter2/lexer.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter2/parser.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter2/token.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter2/toplevel.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter2/toy.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter3/Makefile
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter3/_tags
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter3/ast.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter3/codegen.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter3/lexer.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter3/myocamlbuild.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter3/parser.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter3/token.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter3/toplevel.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter3/toy.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter4/Makefile
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter4/_tags
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter4/ast.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter4/bindings.c
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter4/codegen.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter4/lexer.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter4/myocamlbuild.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter4/parser.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter4/token.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter4/toplevel.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter4/toy.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter5/Makefile
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter5/_tags
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter5/ast.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter5/bindings.c
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter5/codegen.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter5/lexer.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter5/myocamlbuild.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter5/parser.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter5/token.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter5/toplevel.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter5/toy.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter6/Makefile
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter6/_tags
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter6/ast.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter6/bindings.c
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter6/codegen.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter6/lexer.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter6/myocamlbuild.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter6/parser.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter6/token.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter6/toplevel.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter6/toy.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter7/Makefile
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter7/_tags
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter7/ast.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter7/bindings.c
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter7/codegen.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter7/lexer.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter7/myocamlbuild.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter7/parser.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter7/token.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter7/toplevel.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Chapter7/toy.ml
 create mode 100644 final/examples/OCaml-Kaleidoscope/Makefile
 create mode 100644 final/examples/ParallelJIT/CMakeLists.txt
 create mode 100644 final/examples/ParallelJIT/Makefile
 create mode 100644 final/examples/ParallelJIT/ParallelJIT.cpp
 create mode 100644 final/include/llvm-c/Analysis.h
 create mode 100644 final/include/llvm-c/BitReader.h
 create mode 100644 final/include/llvm-c/BitWriter.h
 create mode 100644 final/include/llvm-c/Core.h
 create mode 100644 final/include/llvm-c/EnhancedDisassembly.h
 create mode 100644 final/include/llvm-c/ExecutionEngine.h
 create mode 100644 final/include/llvm-c/Initialization.h
 create mode 100644 final/include/llvm-c/LinkTimeOptimizer.h
 create mode 100644 final/include/llvm-c/Target.h
 create mode 100644 final/include/llvm-c/Transforms/IPO.h
 create mode 100644 final/include/llvm-c/Transforms/Scalar.h
 create mode 100644 final/include/llvm-c/lto.h
 create mode 100644 final/include/llvm/ADT/APFloat.h
 create mode 100644 final/include/llvm/ADT/APInt.h
 create mode 100644 final/include/llvm/ADT/APSInt.h
 create mode 100644 final/include/llvm/ADT/ArrayRef.h
 create mode 100644 final/include/llvm/ADT/BitVector.h
 create mode 100644 final/include/llvm/ADT/DAGDeltaAlgorithm.h
 create mode 100644 final/include/llvm/ADT/DeltaAlgorithm.h
 create mode 100644 final/include/llvm/ADT/DenseMap.h
 create mode 100644 final/include/llvm/ADT/DenseMapInfo.h
 create mode 100644 final/include/llvm/ADT/DenseSet.h
 create mode 100644 final/include/llvm/ADT/DepthFirstIterator.h
 create mode 100644 final/include/llvm/ADT/EquivalenceClasses.h
 create mode 100644 final/include/llvm/ADT/FoldingSet.h
 create mode 100644 final/include/llvm/ADT/GraphTraits.h
 create mode 100644 final/include/llvm/ADT/ImmutableIntervalMap.h
 create mode 100644 final/include/llvm/ADT/ImmutableList.h
 create mode 100644 final/include/llvm/ADT/ImmutableMap.h
 create mode 100644 final/include/llvm/ADT/ImmutableSet.h
 create mode 100644 final/include/llvm/ADT/InMemoryStruct.h
 create mode 100644 final/include/llvm/ADT/IndexedMap.h
 create mode 100644 final/include/llvm/ADT/IntEqClasses.h
 create mode 100644 final/include/llvm/ADT/IntervalMap.h
 create mode 100644 final/include/llvm/ADT/IntrusiveRefCntPtr.h
 create mode 100644 final/include/llvm/ADT/NullablePtr.h
 create mode 100644 final/include/llvm/ADT/Optional.h
 create mode 100644 final/include/llvm/ADT/OwningPtr.h
 create mode 100644 final/include/llvm/ADT/PointerIntPair.h
 create mode 100644 final/include/llvm/ADT/PointerUnion.h
 create mode 100644 final/include/llvm/ADT/PostOrderIterator.h
 create mode 100644 final/include/llvm/ADT/PriorityQueue.h
 create mode 100644 final/include/llvm/ADT/SCCIterator.h
 create mode 100644 final/include/llvm/ADT/STLExtras.h
 create mode 100644 final/include/llvm/ADT/ScopedHashTable.h
 create mode 100644 final/include/llvm/ADT/SetOperations.h
 create mode 100644 final/include/llvm/ADT/SetVector.h
 create mode 100644 final/include/llvm/ADT/SmallBitVector.h
 create mode 100644 final/include/llvm/ADT/SmallPtrSet.h
 create mode 100644 final/include/llvm/ADT/SmallSet.h
 create mode 100644 final/include/llvm/ADT/SmallString.h
 create mode 100644 final/include/llvm/ADT/SmallVector.h
 create mode 100644 final/include/llvm/ADT/SparseBitVector.h
 create mode 100644 final/include/llvm/ADT/Statistic.h
 create mode 100644 final/include/llvm/ADT/StringExtras.h
 create mode 100644 final/include/llvm/ADT/StringMap.h
 create mode 100644 final/include/llvm/ADT/StringRef.h
 create mode 100644 final/include/llvm/ADT/StringSet.h
 create mode 100644 final/include/llvm/ADT/StringSwitch.h
 create mode 100644 final/include/llvm/ADT/Trie.h
 create mode 100644 final/include/llvm/ADT/Triple.h
 create mode 100644 final/include/llvm/ADT/Twine.h
 create mode 100644 final/include/llvm/ADT/UniqueVector.h
 create mode 100644 final/include/llvm/ADT/ValueMap.h
 create mode 100644 final/include/llvm/ADT/VectorExtras.h
 create mode 100644 final/include/llvm/ADT/ilist.h
 create mode 100644 final/include/llvm/ADT/ilist_node.h
 create mode 100644 final/include/llvm/AbstractTypeUser.h
 create mode 100644 final/include/llvm/Analysis/AliasAnalysis.h
 create mode 100644 final/include/llvm/Analysis/AliasSetTracker.h
 create mode 100644 final/include/llvm/Analysis/CFGPrinter.h
 create mode 100644 final/include/llvm/Analysis/CallGraph.h
 create mode 100644 final/include/llvm/Analysis/CaptureTracking.h
 create mode 100644 final/include/llvm/Analysis/CodeMetrics.h
 create mode 100644 final/include/llvm/Analysis/ConstantFolding.h
 create mode 100644 final/include/llvm/Analysis/ConstantsScanner.h
 create mode 100644 final/include/llvm/Analysis/DIBuilder.h
 create mode 100644 final/include/llvm/Analysis/DOTGraphTraitsPass.h
 create mode 100644 final/include/llvm/Analysis/DebugInfo.h
 create mode 100644 final/include/llvm/Analysis/DomPrinter.h
 create mode 100644 final/include/llvm/Analysis/DominanceFrontier.h
 create mode 100644 final/include/llvm/Analysis/DominatorInternals.h
 create mode 100644 final/include/llvm/Analysis/Dominators.h
 create mode 100644 final/include/llvm/Analysis/FindUsedTypes.h
 create mode 100644 final/include/llvm/Analysis/IVUsers.h
 create mode 100644 final/include/llvm/Analysis/InlineCost.h
 create mode 100644 final/include/llvm/Analysis/InstructionSimplify.h
 create mode 100644 final/include/llvm/Analysis/Interval.h
 create mode 100644 final/include/llvm/Analysis/IntervalIterator.h
 create mode 100644 final/include/llvm/Analysis/IntervalPartition.h
 create mode 100644 final/include/llvm/Analysis/LazyValueInfo.h
 create mode 100644 final/include/llvm/Analysis/LibCallAliasAnalysis.h
 create mode 100644 final/include/llvm/Analysis/LibCallSemantics.h
 create mode 100644 final/include/llvm/Analysis/Lint.h
 create mode 100644 final/include/llvm/Analysis/Loads.h
 create mode 100644 final/include/llvm/Analysis/LoopDependenceAnalysis.h
 create mode 100644 final/include/llvm/Analysis/LoopInfo.h
 create mode 100644 final/include/llvm/Analysis/LoopPass.h
 create mode 100644 final/include/llvm/Analysis/MemoryBuiltins.h
 create mode 100644 final/include/llvm/Analysis/MemoryDependenceAnalysis.h
 create mode 100644 final/include/llvm/Analysis/PHITransAddr.h
 create mode 100644 final/include/llvm/Analysis/Passes.h
 create mode 100644 final/include/llvm/Analysis/PathNumbering.h
 create mode 100644 final/include/llvm/Analysis/PathProfileInfo.h
 create mode 100644 final/include/llvm/Analysis/PostDominators.h
 create mode 100644 final/include/llvm/Analysis/ProfileInfo.h
 create mode 100644 final/include/llvm/Analysis/ProfileInfoLoader.h
 create mode 100644 final/include/llvm/Analysis/ProfileInfoTypes.h
 create mode 100644 final/include/llvm/Analysis/RegionInfo.h
 create mode 100644 final/include/llvm/Analysis/RegionIterator.h
 create mode 100644 final/include/llvm/Analysis/RegionPass.h
 create mode 100644 final/include/llvm/Analysis/RegionPrinter.h
 create mode 100644 final/include/llvm/Analysis/ScalarEvolution.h
 create mode 100644 final/include/llvm/Analysis/ScalarEvolutionExpander.h
 create mode 100644 final/include/llvm/Analysis/ScalarEvolutionExpressions.h
 create mode 100644 final/include/llvm/Analysis/ScalarEvolutionNormalization.h
 create mode 100644 final/include/llvm/Analysis/SparsePropagation.h
 create mode 100644 final/include/llvm/Analysis/Trace.h
 create mode 100644 final/include/llvm/Analysis/ValueTracking.h
 create mode 100644 final/include/llvm/Analysis/Verifier.h
 create mode 100644 final/include/llvm/Argument.h
 create mode 100644 final/include/llvm/Assembly/AssemblyAnnotationWriter.h
 create mode 100644 final/include/llvm/Assembly/Parser.h
 create mode 100644 final/include/llvm/Assembly/PrintModulePass.h
 create mode 100644 final/include/llvm/Assembly/Writer.h
 create mode 100644 final/include/llvm/Attributes.h
 create mode 100644 final/include/llvm/AutoUpgrade.h
 create mode 100644 final/include/llvm/BasicBlock.h
 create mode 100644 final/include/llvm/Bitcode/Archive.h
 create mode 100644 final/include/llvm/Bitcode/BitCodes.h
 create mode 100644 final/include/llvm/Bitcode/BitstreamReader.h
 create mode 100644 final/include/llvm/Bitcode/BitstreamWriter.h
 create mode 100644 final/include/llvm/Bitcode/LLVMBitCodes.h
 create mode 100644 final/include/llvm/Bitcode/ReaderWriter.h
 create mode 100644 final/include/llvm/CMakeLists.txt
 create mode 100644 final/include/llvm/CallGraphSCCPass.h
 create mode 100644 final/include/llvm/CallingConv.h
 create mode 100644 final/include/llvm/CodeGen/Analysis.h
 create mode 100644 final/include/llvm/CodeGen/AsmPrinter.h
 create mode 100644 final/include/llvm/CodeGen/BinaryObject.h
 create mode 100644 final/include/llvm/CodeGen/CalcSpillWeights.h
 create mode 100644 final/include/llvm/CodeGen/CallingConvLower.h
 create mode 100644 final/include/llvm/CodeGen/EdgeBundles.h
 create mode 100644 final/include/llvm/CodeGen/FastISel.h
 create mode 100644 final/include/llvm/CodeGen/FunctionLoweringInfo.h
 create mode 100644 final/include/llvm/CodeGen/GCMetadata.h
 create mode 100644 final/include/llvm/CodeGen/GCMetadataPrinter.h
 create mode 100644 final/include/llvm/CodeGen/GCStrategy.h
 create mode 100644 final/include/llvm/CodeGen/GCs.h
 create mode 100644 final/include/llvm/CodeGen/ISDOpcodes.h
 create mode 100644 final/include/llvm/CodeGen/IntrinsicLowering.h
 create mode 100644 final/include/llvm/CodeGen/JITCodeEmitter.h
 create mode 100644 final/include/llvm/CodeGen/LatencyPriorityQueue.h
 create mode 100644 final/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
 create mode 100644 final/include/llvm/CodeGen/LinkAllCodegenComponents.h
 create mode 100644 final/include/llvm/CodeGen/LiveInterval.h
 create mode 100644 final/include/llvm/CodeGen/LiveIntervalAnalysis.h
 create mode 100644 final/include/llvm/CodeGen/LiveStackAnalysis.h
 create mode 100644 final/include/llvm/CodeGen/LiveVariables.h
 create mode 100644 final/include/llvm/CodeGen/MachORelocation.h
 create mode 100644 final/include/llvm/CodeGen/MachineBasicBlock.h
 create mode 100644 final/include/llvm/CodeGen/MachineCodeEmitter.h
 create mode 100644 final/include/llvm/CodeGen/MachineCodeInfo.h
 create mode 100644 final/include/llvm/CodeGen/MachineConstantPool.h
 create mode 100644 final/include/llvm/CodeGen/MachineDominators.h
 create mode 100644 final/include/llvm/CodeGen/MachineFrameInfo.h
 create mode 100644 final/include/llvm/CodeGen/MachineFunction.h
 create mode 100644 final/include/llvm/CodeGen/MachineFunctionAnalysis.h
 create mode 100644 final/include/llvm/CodeGen/MachineFunctionPass.h
 create mode 100644 final/include/llvm/CodeGen/MachineInstr.h
 create mode 100644 final/include/llvm/CodeGen/MachineInstrBuilder.h
 create mode 100644 final/include/llvm/CodeGen/MachineJumpTableInfo.h
 create mode 100644 final/include/llvm/CodeGen/MachineLocation.h
 create mode 100644 final/include/llvm/CodeGen/MachineLoopInfo.h
 create mode 100644 final/include/llvm/CodeGen/MachineLoopRanges.h
 create mode 100644 final/include/llvm/CodeGen/MachineMemOperand.h
 create mode 100644 final/include/llvm/CodeGen/MachineModuleInfo.h
 create mode 100644 final/include/llvm/CodeGen/MachineModuleInfoImpls.h
 create mode 100644 final/include/llvm/CodeGen/MachineOperand.h
 create mode 100644 final/include/llvm/CodeGen/MachinePassRegistry.h
 create mode 100644 final/include/llvm/CodeGen/MachineRegisterInfo.h
 create mode 100644 final/include/llvm/CodeGen/MachineRelocation.h
 create mode 100644 final/include/llvm/CodeGen/MachineSSAUpdater.h
 create mode 100644 final/include/llvm/CodeGen/ObjectCodeEmitter.h
 create mode 100644 final/include/llvm/CodeGen/PBQP/Graph.h
 create mode 100644 final/include/llvm/CodeGen/PBQP/HeuristicBase.h
 create mode 100644 final/include/llvm/CodeGen/PBQP/HeuristicSolver.h
 create mode 100644 final/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
 create mode 100644 final/include/llvm/CodeGen/PBQP/Math.h
 create mode 100644 final/include/llvm/CodeGen/PBQP/Solution.h
 create mode 100644 final/include/llvm/CodeGen/Passes.h
 create mode 100644 final/include/llvm/CodeGen/ProcessImplicitDefs.h
 create mode 100644 final/include/llvm/CodeGen/PseudoSourceValue.h
 create mode 100644 final/include/llvm/CodeGen/RegAllocPBQP.h
 create mode 100644 final/include/llvm/CodeGen/RegAllocRegistry.h
 create mode 100644 final/include/llvm/CodeGen/RegisterCoalescer.h
 create mode 100644 final/include/llvm/CodeGen/RegisterScavenging.h
 create mode 100644 final/include/llvm/CodeGen/RuntimeLibcalls.h
 create mode 100644 final/include/llvm/CodeGen/ScheduleDAG.h
 create mode 100644 final/include/llvm/CodeGen/ScheduleHazardRecognizer.h
 create mode 100644 final/include/llvm/CodeGen/SchedulerRegistry.h
 create mode 100644 final/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
 create mode 100644 final/include/llvm/CodeGen/SelectionDAG.h
 create mode 100644 final/include/llvm/CodeGen/SelectionDAGISel.h
 create mode 100644 final/include/llvm/CodeGen/SelectionDAGNodes.h
 create mode 100644 final/include/llvm/CodeGen/SlotIndexes.h
 create mode 100644 final/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
 create mode 100644 final/include/llvm/CodeGen/ValueTypes.h
 create mode 100644 final/include/llvm/CodeGen/ValueTypes.td
 create mode 100644 final/include/llvm/CompilerDriver/Action.h
 create mode 100644 final/include/llvm/CompilerDriver/AutoGenerated.h
 create mode 100644 final/include/llvm/CompilerDriver/BuiltinOptions.h
 create mode 100644 final/include/llvm/CompilerDriver/Common.td
 create mode 100644 final/include/llvm/CompilerDriver/CompilationGraph.h
 create mode 100644 final/include/llvm/CompilerDriver/Error.h
 create mode 100644 final/include/llvm/CompilerDriver/Main.h
 create mode 100644 final/include/llvm/CompilerDriver/Main.inc
 create mode 100644 final/include/llvm/CompilerDriver/Tool.h
 create mode 100644 final/include/llvm/Config/AsmParsers.def.in
 create mode 100644 final/include/llvm/Config/AsmPrinters.def.in
 create mode 100644 final/include/llvm/Config/Disassemblers.def.in
 create mode 100644 final/include/llvm/Config/Targets.def.in
 create mode 100644 final/include/llvm/Config/config.h.cmake
 create mode 100644 final/include/llvm/Config/config.h.in
 create mode 100644 final/include/llvm/Config/llvm-config.h.cmake
 create mode 100644 final/include/llvm/Config/llvm-config.h.in
 create mode 100644 final/include/llvm/Constant.h
 create mode 100644 final/include/llvm/Constants.h
 create mode 100644 final/include/llvm/DerivedTypes.h
 create mode 100644 final/include/llvm/ExecutionEngine/ExecutionEngine.h
 create mode 100644 final/include/llvm/ExecutionEngine/GenericValue.h
 create mode 100644 final/include/llvm/ExecutionEngine/Interpreter.h
 create mode 100644 final/include/llvm/ExecutionEngine/JIT.h
 create mode 100644 final/include/llvm/ExecutionEngine/JITEventListener.h
 create mode 100644 final/include/llvm/ExecutionEngine/JITMemoryManager.h
 create mode 100644 final/include/llvm/ExecutionEngine/MCJIT.h
 create mode 100644 final/include/llvm/Function.h
 create mode 100644 final/include/llvm/GVMaterializer.h
 create mode 100644 final/include/llvm/GlobalAlias.h
 create mode 100644 final/include/llvm/GlobalValue.h
 create mode 100644 final/include/llvm/GlobalVariable.h
 create mode 100644 final/include/llvm/InitializePasses.h
 create mode 100644 final/include/llvm/InlineAsm.h
 create mode 100644 final/include/llvm/InstrTypes.h
 create mode 100644 final/include/llvm/Instruction.def
 create mode 100644 final/include/llvm/Instruction.h
 create mode 100644 final/include/llvm/Instructions.h
 create mode 100644 final/include/llvm/IntrinsicInst.h
 create mode 100644 final/include/llvm/Intrinsics.h
 create mode 100644 final/include/llvm/Intrinsics.td
 create mode 100644 final/include/llvm/IntrinsicsARM.td
 create mode 100644 final/include/llvm/IntrinsicsAlpha.td
 create mode 100644 final/include/llvm/IntrinsicsCellSPU.td
 create mode 100644 final/include/llvm/IntrinsicsPTX.td
 create mode 100644 final/include/llvm/IntrinsicsPowerPC.td
 create mode 100644 final/include/llvm/IntrinsicsX86.td
 create mode 100644 final/include/llvm/IntrinsicsXCore.td
 create mode 100644 final/include/llvm/LLVMContext.h
 create mode 100644 final/include/llvm/LinkAllPasses.h
 create mode 100644 final/include/llvm/LinkAllVMCore.h
 create mode 100644 final/include/llvm/Linker.h
 create mode 100644 final/include/llvm/MC/EDInstInfo.h
 create mode 100644 final/include/llvm/MC/MCAsmInfo.h
 create mode 100644 final/include/llvm/MC/MCAsmInfoCOFF.h
 create mode 100644 final/include/llvm/MC/MCAsmInfoDarwin.h
 create mode 100644 final/include/llvm/MC/MCAsmLayout.h
 create mode 100644 final/include/llvm/MC/MCAssembler.h
 create mode 100644 final/include/llvm/MC/MCCodeEmitter.h
 create mode 100644 final/include/llvm/MC/MCContext.h
 create mode 100644 final/include/llvm/MC/MCDirectives.h
 create mode 100644 final/include/llvm/MC/MCDisassembler.h
 create mode 100644 final/include/llvm/MC/MCDwarf.h
 create mode 100644 final/include/llvm/MC/MCELFObjectWriter.h
 create mode 100644 final/include/llvm/MC/MCELFSymbolFlags.h
 create mode 100644 final/include/llvm/MC/MCExpr.h
 create mode 100644 final/include/llvm/MC/MCFixup.h
 create mode 100644 final/include/llvm/MC/MCFixupKindInfo.h
 create mode 100644 final/include/llvm/MC/MCInst.h
 create mode 100644 final/include/llvm/MC/MCInstPrinter.h
 create mode 100644 final/include/llvm/MC/MCLabel.h
 create mode 100644 final/include/llvm/MC/MCMachOSymbolFlags.h
 create mode 100644 final/include/llvm/MC/MCMachObjectWriter.h
 create mode 100644 final/include/llvm/MC/MCObjectStreamer.h
 create mode 100644 final/include/llvm/MC/MCObjectWriter.h
 create mode 100644 final/include/llvm/MC/MCParser/AsmCond.h
 create mode 100644 final/include/llvm/MC/MCParser/AsmLexer.h
 create mode 100644 final/include/llvm/MC/MCParser/MCAsmLexer.h
 create mode 100644 final/include/llvm/MC/MCParser/MCAsmParser.h
 create mode 100644 final/include/llvm/MC/MCParser/MCAsmParserExtension.h
 create mode 100644 final/include/llvm/MC/MCParser/MCParsedAsmOperand.h
 create mode 100644 final/include/llvm/MC/MCSection.h
 create mode 100644 final/include/llvm/MC/MCSectionCOFF.h
 create mode 100644 final/include/llvm/MC/MCSectionELF.h
 create mode 100644 final/include/llvm/MC/MCSectionMachO.h
 create mode 100644 final/include/llvm/MC/MCStreamer.h
 create mode 100644 final/include/llvm/MC/MCSymbol.h
 create mode 100644 final/include/llvm/MC/MCValue.h
 create mode 100644 final/include/llvm/MC/SectionKind.h
 create mode 100644 final/include/llvm/Metadata.h
 create mode 100644 final/include/llvm/Module.h
 create mode 100644 final/include/llvm/Object/MachOFormat.h
 create mode 100644 final/include/llvm/Object/MachOObject.h
 create mode 100644 final/include/llvm/Object/ObjectFile.h
 create mode 100644 final/include/llvm/OperandTraits.h
 create mode 100644 final/include/llvm/Operator.h
 create mode 100644 final/include/llvm/Pass.h
 create mode 100644 final/include/llvm/PassAnalysisSupport.h
 create mode 100644 final/include/llvm/PassManager.h
 create mode 100644 final/include/llvm/PassManagers.h
 create mode 100644 final/include/llvm/PassRegistry.h
 create mode 100644 final/include/llvm/PassSupport.h
 create mode 100644 final/include/llvm/Support/AIXDataTypesFix.h
 create mode 100644 final/include/llvm/Support/AlignOf.h
 create mode 100644 final/include/llvm/Support/Allocator.h
 create mode 100644 final/include/llvm/Support/Atomic.h
 create mode 100644 final/include/llvm/Support/CFG.h
 create mode 100644 final/include/llvm/Support/COFF.h
 create mode 100644 final/include/llvm/Support/CallSite.h
 create mode 100644 final/include/llvm/Support/Casting.h
 create mode 100644 final/include/llvm/Support/CommandLine.h
 create mode 100644 final/include/llvm/Support/Compiler.h
 create mode 100644 final/include/llvm/Support/ConstantFolder.h
 create mode 100644 final/include/llvm/Support/ConstantRange.h
 create mode 100644 final/include/llvm/Support/CrashRecoveryContext.h
 create mode 100644 final/include/llvm/Support/DOTGraphTraits.h
 create mode 100644 final/include/llvm/Support/DataFlow.h
 create mode 100644 final/include/llvm/Support/DataTypes.h.cmake
 create mode 100644 final/include/llvm/Support/DataTypes.h.in
 create mode 100644 final/include/llvm/Support/Debug.h
 create mode 100644 final/include/llvm/Support/DebugLoc.h
 create mode 100644 final/include/llvm/Support/Disassembler.h
 create mode 100644 final/include/llvm/Support/Dwarf.h
 create mode 100644 final/include/llvm/Support/DynamicLibrary.h
 create mode 100644 final/include/llvm/Support/ELF.h
 create mode 100644 final/include/llvm/Support/Endian.h
 create mode 100644 final/include/llvm/Support/Errno.h
 create mode 100644 final/include/llvm/Support/ErrorHandling.h
 create mode 100644 final/include/llvm/Support/FEnv.h
 create mode 100644 final/include/llvm/Support/FileSystem.h
 create mode 100644 final/include/llvm/Support/FileUtilities.h
 create mode 100644 final/include/llvm/Support/Format.h
 create mode 100644 final/include/llvm/Support/FormattedStream.h
 create mode 100644 final/include/llvm/Support/GetElementPtrTypeIterator.h
 create mode 100644 final/include/llvm/Support/GraphWriter.h
 create mode 100644 final/include/llvm/Support/Host.h
 create mode 100644 final/include/llvm/Support/IRBuilder.h
 create mode 100644 final/include/llvm/Support/IRReader.h
 create mode 100644 final/include/llvm/Support/IncludeFile.h
 create mode 100644 final/include/llvm/Support/InstIterator.h
 create mode 100644 final/include/llvm/Support/InstVisitor.h
 create mode 100644 final/include/llvm/Support/LICENSE.TXT
 create mode 100644 final/include/llvm/Support/LeakDetector.h
 create mode 100644 final/include/llvm/Support/MachO.h
 create mode 100644 final/include/llvm/Support/ManagedStatic.h
 create mode 100644 final/include/llvm/Support/MathExtras.h
 create mode 100644 final/include/llvm/Support/Memory.h
 create mode 100644 final/include/llvm/Support/MemoryBuffer.h
 create mode 100644 final/include/llvm/Support/MemoryObject.h
 create mode 100644 final/include/llvm/Support/Mutex.h
 create mode 100644 final/include/llvm/Support/MutexGuard.h
 create mode 100644 final/include/llvm/Support/NoFolder.h
 create mode 100644 final/include/llvm/Support/OutputBuffer.h
 create mode 100644 final/include/llvm/Support/PassNameParser.h
 create mode 100644 final/include/llvm/Support/Path.h
 create mode 100644 final/include/llvm/Support/PathV1.h
 create mode 100644 final/include/llvm/Support/PathV2.h
 create mode 100644 final/include/llvm/Support/PatternMatch.h
 create mode 100644 final/include/llvm/Support/PluginLoader.h
 create mode 100644 final/include/llvm/Support/PointerLikeTypeTraits.h
 create mode 100644 final/include/llvm/Support/PredIteratorCache.h
 create mode 100644 final/include/llvm/Support/PrettyStackTrace.h
 create mode 100644 final/include/llvm/Support/Process.h
 create mode 100644 final/include/llvm/Support/Program.h
 create mode 100644 final/include/llvm/Support/RWMutex.h
 create mode 100644 final/include/llvm/Support/Recycler.h
 create mode 100644 final/include/llvm/Support/RecyclingAllocator.h
 create mode 100644 final/include/llvm/Support/Regex.h
 create mode 100644 final/include/llvm/Support/Registry.h
 create mode 100644 final/include/llvm/Support/RegistryParser.h
 create mode 100644 final/include/llvm/Support/SMLoc.h
 create mode 100644 final/include/llvm/Support/Signals.h
 create mode 100644 final/include/llvm/Support/Solaris.h
 create mode 100644 final/include/llvm/Support/SourceMgr.h
 create mode 100644 final/include/llvm/Support/StandardPasses.h
 create mode 100644 final/include/llvm/Support/StringPool.h
 create mode 100644 final/include/llvm/Support/SwapByteOrder.h
 create mode 100644 final/include/llvm/Support/SystemUtils.h
 create mode 100644 final/include/llvm/Support/TargetFolder.h
 create mode 100644 final/include/llvm/Support/ThreadLocal.h
 create mode 100644 final/include/llvm/Support/Threading.h
 create mode 100644 final/include/llvm/Support/TimeValue.h
 create mode 100644 final/include/llvm/Support/Timer.h
 create mode 100644 final/include/llvm/Support/ToolOutputFile.h
 create mode 100644 final/include/llvm/Support/TypeBuilder.h
 create mode 100644 final/include/llvm/Support/Valgrind.h
 create mode 100644 final/include/llvm/Support/ValueHandle.h
 create mode 100644 final/include/llvm/Support/circular_raw_ostream.h
 create mode 100644 final/include/llvm/Support/raw_os_ostream.h
 create mode 100644 final/include/llvm/Support/raw_ostream.h
 create mode 100644 final/include/llvm/Support/system_error.h
 create mode 100644 final/include/llvm/Support/type_traits.h
 create mode 100644 final/include/llvm/SymbolTableListTraits.h
 create mode 100644 final/include/llvm/Target/Mangler.h
 create mode 100644 final/include/llvm/Target/SubtargetFeature.h
 create mode 100644 final/include/llvm/Target/Target.td
 create mode 100644 final/include/llvm/Target/TargetAsmBackend.h
 create mode 100644 final/include/llvm/Target/TargetAsmInfo.h
 create mode 100644 final/include/llvm/Target/TargetAsmLexer.h
 create mode 100644 final/include/llvm/Target/TargetAsmParser.h
 create mode 100644 final/include/llvm/Target/TargetCallingConv.h
 create mode 100644 final/include/llvm/Target/TargetCallingConv.td
 create mode 100644 final/include/llvm/Target/TargetData.h
 create mode 100644 final/include/llvm/Target/TargetELFWriterInfo.h
 create mode 100644 final/include/llvm/Target/TargetFrameLowering.h
 create mode 100644 final/include/llvm/Target/TargetInstrDesc.h
 create mode 100644 final/include/llvm/Target/TargetInstrInfo.h
 create mode 100644 final/include/llvm/Target/TargetInstrItineraries.h
 create mode 100644 final/include/llvm/Target/TargetIntrinsicInfo.h
 create mode 100644 final/include/llvm/Target/TargetJITInfo.h
 create mode 100644 final/include/llvm/Target/TargetLibraryInfo.h
 create mode 100644 final/include/llvm/Target/TargetLowering.h
 create mode 100644 final/include/llvm/Target/TargetLoweringObjectFile.h
 create mode 100644 final/include/llvm/Target/TargetMachine.h
 create mode 100644 final/include/llvm/Target/TargetOpcodes.h
 create mode 100644 final/include/llvm/Target/TargetOptions.h
 create mode 100644 final/include/llvm/Target/TargetRegisterInfo.h
 create mode 100644 final/include/llvm/Target/TargetRegistry.h
 create mode 100644 final/include/llvm/Target/TargetSchedule.td
 create mode 100644 final/include/llvm/Target/TargetSelect.h
 create mode 100644 final/include/llvm/Target/TargetSelectionDAG.td
 create mode 100644 final/include/llvm/Target/TargetSelectionDAGInfo.h
 create mode 100644 final/include/llvm/Target/TargetSubtarget.h
 create mode 100644 final/include/llvm/Transforms/IPO.h
 create mode 100644 final/include/llvm/Transforms/IPO/InlinerPass.h
 create mode 100644 final/include/llvm/Transforms/Instrumentation.h
 create mode 100644 final/include/llvm/Transforms/Scalar.h
 create mode 100644 final/include/llvm/Transforms/Utils/AddrModeMatcher.h
 create mode 100644 final/include/llvm/Transforms/Utils/BasicBlockUtils.h
 create mode 100644 final/include/llvm/Transforms/Utils/BasicInliner.h
 create mode 100644 final/include/llvm/Transforms/Utils/BuildLibCalls.h
 create mode 100644 final/include/llvm/Transforms/Utils/Cloning.h
 create mode 100644 final/include/llvm/Transforms/Utils/FunctionUtils.h
 create mode 100644 final/include/llvm/Transforms/Utils/Local.h
 create mode 100644 final/include/llvm/Transforms/Utils/PromoteMemToReg.h
 create mode 100644 final/include/llvm/Transforms/Utils/SSAUpdater.h
 create mode 100644 final/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
 create mode 100644 final/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
 create mode 100644 final/include/llvm/Transforms/Utils/UnrollLoop.h
 create mode 100644 final/include/llvm/Transforms/Utils/ValueMapper.h
 create mode 100644 final/include/llvm/Type.h
 create mode 100644 final/include/llvm/TypeSymbolTable.h
 create mode 100644 final/include/llvm/Use.h
 create mode 100644 final/include/llvm/User.h
 create mode 100644 final/include/llvm/Value.h
 create mode 100644 final/include/llvm/ValueSymbolTable.h
 create mode 100644 final/lib/Analysis/AliasAnalysis.cpp
 create mode 100644 final/lib/Analysis/AliasAnalysisCounter.cpp
 create mode 100644 final/lib/Analysis/AliasAnalysisEvaluator.cpp
 create mode 100644 final/lib/Analysis/AliasDebugger.cpp
 create mode 100644 final/lib/Analysis/AliasSetTracker.cpp
 create mode 100644 final/lib/Analysis/Analysis.cpp
 create mode 100644 final/lib/Analysis/BasicAliasAnalysis.cpp
 create mode 100644 final/lib/Analysis/CFGPrinter.cpp
 create mode 100644 final/lib/Analysis/CMakeLists.txt
 create mode 100644 final/lib/Analysis/CaptureTracking.cpp
 create mode 100644 final/lib/Analysis/ConstantFolding.cpp
 create mode 100644 final/lib/Analysis/DIBuilder.cpp
 create mode 100644 final/lib/Analysis/DbgInfoPrinter.cpp
 create mode 100644 final/lib/Analysis/DebugInfo.cpp
 create mode 100644 final/lib/Analysis/DomPrinter.cpp
 create mode 100644 final/lib/Analysis/DominanceFrontier.cpp
 create mode 100644 final/lib/Analysis/IPA/CMakeLists.txt
 create mode 100644 final/lib/Analysis/IPA/CallGraph.cpp
 create mode 100644 final/lib/Analysis/IPA/CallGraphSCCPass.cpp
 create mode 100644 final/lib/Analysis/IPA/FindUsedTypes.cpp
 create mode 100644 final/lib/Analysis/IPA/GlobalsModRef.cpp
 create mode 100644 final/lib/Analysis/IPA/IPA.cpp
 create mode 100644 final/lib/Analysis/IPA/Makefile
 create mode 100644 final/lib/Analysis/IVUsers.cpp
 create mode 100644 final/lib/Analysis/InlineCost.cpp
 create mode 100644 final/lib/Analysis/InstCount.cpp
 create mode 100644 final/lib/Analysis/InstructionSimplify.cpp
 create mode 100644 final/lib/Analysis/Interval.cpp
 create mode 100644 final/lib/Analysis/IntervalPartition.cpp
 create mode 100644 final/lib/Analysis/LazyValueInfo.cpp
 create mode 100644 final/lib/Analysis/LibCallAliasAnalysis.cpp
 create mode 100644 final/lib/Analysis/LibCallSemantics.cpp
 create mode 100644 final/lib/Analysis/Lint.cpp
 create mode 100644 final/lib/Analysis/Loads.cpp
 create mode 100644 final/lib/Analysis/LoopDependenceAnalysis.cpp
 create mode 100644 final/lib/Analysis/LoopInfo.cpp
 create mode 100644 final/lib/Analysis/LoopPass.cpp
 create mode 100644 final/lib/Analysis/Makefile
 create mode 100644 final/lib/Analysis/MemDepPrinter.cpp
 create mode 100644 final/lib/Analysis/MemoryBuiltins.cpp
 create mode 100644 final/lib/Analysis/MemoryDependenceAnalysis.cpp
 create mode 100644 final/lib/Analysis/ModuleDebugInfoPrinter.cpp
 create mode 100644 final/lib/Analysis/NoAliasAnalysis.cpp
 create mode 100644 final/lib/Analysis/PHITransAddr.cpp
 create mode 100644 final/lib/Analysis/PathNumbering.cpp
 create mode 100644 final/lib/Analysis/PathProfileInfo.cpp
 create mode 100644 final/lib/Analysis/PathProfileVerifier.cpp
 create mode 100644 final/lib/Analysis/PostDominators.cpp
 create mode 100644 final/lib/Analysis/ProfileEstimatorPass.cpp
 create mode 100644 final/lib/Analysis/ProfileInfo.cpp
 create mode 100644 final/lib/Analysis/ProfileInfoLoader.cpp
 create mode 100644 final/lib/Analysis/ProfileInfoLoaderPass.cpp
 create mode 100644 final/lib/Analysis/ProfileVerifierPass.cpp
 create mode 100644 final/lib/Analysis/README.txt
 create mode 100644 final/lib/Analysis/RegionInfo.cpp
 create mode 100644 final/lib/Analysis/RegionPass.cpp
 create mode 100644 final/lib/Analysis/RegionPrinter.cpp
 create mode 100644 final/lib/Analysis/ScalarEvolution.cpp
 create mode 100644 final/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
 create mode 100644 final/lib/Analysis/ScalarEvolutionExpander.cpp
 create mode 100644 final/lib/Analysis/ScalarEvolutionNormalization.cpp
 create mode 100644 final/lib/Analysis/SparsePropagation.cpp
 create mode 100644 final/lib/Analysis/Trace.cpp
 create mode 100644 final/lib/Analysis/TypeBasedAliasAnalysis.cpp
 create mode 100644 final/lib/Analysis/ValueTracking.cpp
 create mode 100644 final/lib/Archive/Archive.cpp
 create mode 100644 final/lib/Archive/ArchiveInternals.h
 create mode 100644 final/lib/Archive/ArchiveReader.cpp
 create mode 100644 final/lib/Archive/ArchiveWriter.cpp
 create mode 100644 final/lib/Archive/CMakeLists.txt
 create mode 100644 final/lib/Archive/Makefile
 create mode 100644 final/lib/AsmParser/CMakeLists.txt
 create mode 100644 final/lib/AsmParser/LLLexer.cpp
 create mode 100644 final/lib/AsmParser/LLLexer.h
 create mode 100644 final/lib/AsmParser/LLParser.cpp
 create mode 100644 final/lib/AsmParser/LLParser.h
 create mode 100644 final/lib/AsmParser/LLToken.h
 create mode 100644 final/lib/AsmParser/Makefile
 create mode 100644 final/lib/AsmParser/Parser.cpp
 create mode 100644 final/lib/Bitcode/CMakeLists.txt
 create mode 100644 final/lib/Bitcode/Makefile
 create mode 100644 final/lib/Bitcode/Reader/BitReader.cpp
 create mode 100644 final/lib/Bitcode/Reader/BitcodeReader.cpp
 create mode 100644 final/lib/Bitcode/Reader/BitcodeReader.h
 create mode 100644 final/lib/Bitcode/Reader/CMakeLists.txt
 create mode 100644 final/lib/Bitcode/Reader/Makefile
 create mode 100644 final/lib/Bitcode/Writer/BitWriter.cpp
 create mode 100644 final/lib/Bitcode/Writer/BitcodeWriter.cpp
 create mode 100644 final/lib/Bitcode/Writer/BitcodeWriterPass.cpp
 create mode 100644 final/lib/Bitcode/Writer/CMakeLists.txt
 create mode 100644 final/lib/Bitcode/Writer/Makefile
 create mode 100644 final/lib/Bitcode/Writer/ValueEnumerator.cpp
 create mode 100644 final/lib/Bitcode/Writer/ValueEnumerator.h
 create mode 100644 final/lib/CMakeLists.txt
 create mode 100644 final/lib/CodeGen/AggressiveAntiDepBreaker.cpp
 create mode 100644 final/lib/CodeGen/AggressiveAntiDepBreaker.h
 create mode 100644 final/lib/CodeGen/AllocationOrder.cpp
 create mode 100644 final/lib/CodeGen/AllocationOrder.h
 create mode 100644 final/lib/CodeGen/Analysis.cpp
 create mode 100644 final/lib/CodeGen/AntiDepBreaker.h
 create mode 100644 final/lib/CodeGen/AsmPrinter/ARMException.cpp
 create mode 100644 final/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
 create mode 100644 final/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
 create mode 100644 final/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
 create mode 100644 final/lib/CodeGen/AsmPrinter/CMakeLists.txt
 create mode 100644 final/lib/CodeGen/AsmPrinter/DIE.cpp
 create mode 100644 final/lib/CodeGen/AsmPrinter/DIE.h
 create mode 100644 final/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
 create mode 100644 final/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
 create mode 100644 final/lib/CodeGen/AsmPrinter/DwarfDebug.h
 create mode 100644 final/lib/CodeGen/AsmPrinter/DwarfException.cpp
 create mode 100644 final/lib/CodeGen/AsmPrinter/DwarfException.h
 create mode 100644 final/lib/CodeGen/AsmPrinter/DwarfTableException.cpp
 create mode 100644 final/lib/CodeGen/AsmPrinter/Makefile
 create mode 100644 final/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
 create mode 100644 final/lib/CodeGen/BranchFolding.cpp
 create mode 100644 final/lib/CodeGen/BranchFolding.h
 create mode 100644 final/lib/CodeGen/CMakeLists.txt
 create mode 100644 final/lib/CodeGen/CalcSpillWeights.cpp
 create mode 100644 final/lib/CodeGen/CallingConvLower.cpp
 create mode 100644 final/lib/CodeGen/CodeGen.cpp
 create mode 100644 final/lib/CodeGen/CodePlacementOpt.cpp
 create mode 100644 final/lib/CodeGen/CriticalAntiDepBreaker.cpp
 create mode 100644 final/lib/CodeGen/CriticalAntiDepBreaker.h
 create mode 100644 final/lib/CodeGen/DeadMachineInstructionElim.cpp
 create mode 100644 final/lib/CodeGen/DwarfEHPrepare.cpp
 create mode 100644 final/lib/CodeGen/ELF.h
 create mode 100644 final/lib/CodeGen/ELFCodeEmitter.cpp
 create mode 100644 final/lib/CodeGen/ELFCodeEmitter.h
 create mode 100644 final/lib/CodeGen/ELFWriter.cpp
 create mode 100644 final/lib/CodeGen/ELFWriter.h
 create mode 100644 final/lib/CodeGen/EdgeBundles.cpp
 create mode 100644 final/lib/CodeGen/ExpandISelPseudos.cpp
 create mode 100644 final/lib/CodeGen/GCMetadata.cpp
 create mode 100644 final/lib/CodeGen/GCMetadataPrinter.cpp
 create mode 100644 final/lib/CodeGen/GCStrategy.cpp
 create mode 100644 final/lib/CodeGen/IfConversion.cpp
 create mode 100644 final/lib/CodeGen/InlineSpiller.cpp
 create mode 100644 final/lib/CodeGen/IntrinsicLowering.cpp
 create mode 100644 final/lib/CodeGen/LLVMTargetMachine.cpp
 create mode 100644 final/lib/CodeGen/LatencyPriorityQueue.cpp
 create mode 100644 final/lib/CodeGen/LiveDebugVariables.cpp
 create mode 100644 final/lib/CodeGen/LiveDebugVariables.h
 create mode 100644 final/lib/CodeGen/LiveInterval.cpp
 create mode 100644 final/lib/CodeGen/LiveIntervalAnalysis.cpp
 create mode 100644 final/lib/CodeGen/LiveIntervalUnion.cpp
 create mode 100644 final/lib/CodeGen/LiveIntervalUnion.h
 create mode 100644 final/lib/CodeGen/LiveRangeEdit.cpp
 create mode 100644 final/lib/CodeGen/LiveRangeEdit.h
 create mode 100644 final/lib/CodeGen/LiveStackAnalysis.cpp
 create mode 100644 final/lib/CodeGen/LiveVariables.cpp
 create mode 100644 final/lib/CodeGen/LocalStackSlotAllocation.cpp
 create mode 100644 final/lib/CodeGen/LowerSubregs.cpp
 create mode 100644 final/lib/CodeGen/MachineBasicBlock.cpp
 create mode 100644 final/lib/CodeGen/MachineCSE.cpp
 create mode 100644 final/lib/CodeGen/MachineDominators.cpp
 create mode 100644 final/lib/CodeGen/MachineFunction.cpp
 create mode 100644 final/lib/CodeGen/MachineFunctionAnalysis.cpp
 create mode 100644 final/lib/CodeGen/MachineFunctionPass.cpp
 create mode 100644 final/lib/CodeGen/MachineFunctionPrinterPass.cpp
 create mode 100644 final/lib/CodeGen/MachineInstr.cpp
 create mode 100644 final/lib/CodeGen/MachineLICM.cpp
 create mode 100644 final/lib/CodeGen/MachineLoopInfo.cpp
 create mode 100644 final/lib/CodeGen/MachineLoopRanges.cpp
 create mode 100644 final/lib/CodeGen/MachineModuleInfo.cpp
 create mode 100644 final/lib/CodeGen/MachineModuleInfoImpls.cpp
 create mode 100644 final/lib/CodeGen/MachinePassRegistry.cpp
 create mode 100644 final/lib/CodeGen/MachineRegisterInfo.cpp
 create mode 100644 final/lib/CodeGen/MachineSSAUpdater.cpp
 create mode 100644 final/lib/CodeGen/MachineSink.cpp
 create mode 100644 final/lib/CodeGen/MachineVerifier.cpp
 create mode 100644 final/lib/CodeGen/Makefile
 create mode 100644 final/lib/CodeGen/ObjectCodeEmitter.cpp
 create mode 100644 final/lib/CodeGen/OcamlGC.cpp
 create mode 100644 final/lib/CodeGen/OptimizePHIs.cpp
 create mode 100644 final/lib/CodeGen/PHIElimination.cpp
 create mode 100644 final/lib/CodeGen/PHIEliminationUtils.cpp
 create mode 100644 final/lib/CodeGen/PHIEliminationUtils.h
 create mode 100644 final/lib/CodeGen/Passes.cpp
 create mode 100644 final/lib/CodeGen/PeepholeOptimizer.cpp
 create mode 100644 final/lib/CodeGen/PostRASchedulerList.cpp
 create mode 100644 final/lib/CodeGen/PreAllocSplitting.cpp
 create mode 100644 final/lib/CodeGen/ProcessImplicitDefs.cpp
 create mode 100644 final/lib/CodeGen/PrologEpilogInserter.cpp
 create mode 100644 final/lib/CodeGen/PrologEpilogInserter.h
 create mode 100644 final/lib/CodeGen/PseudoSourceValue.cpp
 create mode 100644 final/lib/CodeGen/README.txt
 create mode 100644 final/lib/CodeGen/RegAllocBase.h
 create mode 100644 final/lib/CodeGen/RegAllocBasic.cpp
 create mode 100644 final/lib/CodeGen/RegAllocFast.cpp
 create mode 100644 final/lib/CodeGen/RegAllocGreedy.cpp
 create mode 100644 final/lib/CodeGen/RegAllocLinearScan.cpp
 create mode 100644 final/lib/CodeGen/RegAllocPBQP.cpp
 create mode 100644 final/lib/CodeGen/RegisterCoalescer.cpp
 create mode 100644 final/lib/CodeGen/RegisterScavenging.cpp
 create mode 100644 final/lib/CodeGen/RenderMachineFunction.cpp
 create mode 100644 final/lib/CodeGen/RenderMachineFunction.h
 create mode 100644 final/lib/CodeGen/ScheduleDAG.cpp
 create mode 100644 final/lib/CodeGen/ScheduleDAGEmit.cpp
 create mode 100644 final/lib/CodeGen/ScheduleDAGInstrs.cpp
 create mode 100644 final/lib/CodeGen/ScheduleDAGInstrs.h
 create mode 100644 final/lib/CodeGen/ScheduleDAGPrinter.cpp
 create mode 100644 final/lib/CodeGen/ScoreboardHazardRecognizer.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/CMakeLists.txt
 create mode 100644 final/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/FastISel.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/InstrEmitter.h
 create mode 100644 final/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/LegalizeTypes.h
 create mode 100644 final/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/Makefile
 create mode 100644 final/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
 create mode 100644 final/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
 create mode 100644 final/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
 create mode 100644 final/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
 create mode 100644 final/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/TargetLowering.cpp
 create mode 100644 final/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
 create mode 100644 final/lib/CodeGen/ShadowStackGC.cpp
 create mode 100644 final/lib/CodeGen/ShrinkWrapping.cpp
 create mode 100644 final/lib/CodeGen/SimpleRegisterCoalescing.cpp
 create mode 100644 final/lib/CodeGen/SimpleRegisterCoalescing.h
 create mode 100644 final/lib/CodeGen/SjLjEHPrepare.cpp
 create mode 100644 final/lib/CodeGen/SlotIndexes.cpp
 create mode 100644 final/lib/CodeGen/SpillPlacement.cpp
 create mode 100644 final/lib/CodeGen/SpillPlacement.h
 create mode 100644 final/lib/CodeGen/Spiller.cpp
 create mode 100644 final/lib/CodeGen/Spiller.h
 create mode 100644 final/lib/CodeGen/SplitKit.cpp
 create mode 100644 final/lib/CodeGen/SplitKit.h
 create mode 100644 final/lib/CodeGen/Splitter.cpp
 create mode 100644 final/lib/CodeGen/Splitter.h
 create mode 100644 final/lib/CodeGen/StackProtector.cpp
 create mode 100644 final/lib/CodeGen/StackSlotColoring.cpp
 create mode 100644 final/lib/CodeGen/StrongPHIElimination.cpp
 create mode 100644 final/lib/CodeGen/TailDuplication.cpp
 create mode 100644 final/lib/CodeGen/TargetInstrInfoImpl.cpp
 create mode 100644 final/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
 create mode 100644 final/lib/CodeGen/TwoAddressInstructionPass.cpp
 create mode 100644 final/lib/CodeGen/UnreachableBlockElim.cpp
 create mode 100644 final/lib/CodeGen/VirtRegMap.cpp
 create mode 100644 final/lib/CodeGen/VirtRegMap.h
 create mode 100644 final/lib/CodeGen/VirtRegRewriter.cpp
 create mode 100644 final/lib/CodeGen/VirtRegRewriter.h
 create mode 100644 final/lib/CompilerDriver/Action.cpp
 create mode 100644 final/lib/CompilerDriver/BuiltinOptions.cpp
 create mode 100644 final/lib/CompilerDriver/CMakeLists.txt
 create mode 100644 final/lib/CompilerDriver/CompilationGraph.cpp
 create mode 100644 final/lib/CompilerDriver/Main.cpp
 create mode 100644 final/lib/CompilerDriver/Makefile
 create mode 100644 final/lib/CompilerDriver/Tool.cpp
 create mode 100644 final/lib/ExecutionEngine/CMakeLists.txt
 create mode 100644 final/lib/ExecutionEngine/ExecutionEngine.cpp
 create mode 100644 final/lib/ExecutionEngine/ExecutionEngineBindings.cpp
 create mode 100644 final/lib/ExecutionEngine/Interpreter/CMakeLists.txt
 create mode 100644 final/lib/ExecutionEngine/Interpreter/Execution.cpp
 create mode 100644 final/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
 create mode 100644 final/lib/ExecutionEngine/Interpreter/Interpreter.cpp
 create mode 100644 final/lib/ExecutionEngine/Interpreter/Interpreter.h
 create mode 100644 final/lib/ExecutionEngine/Interpreter/Makefile
 create mode 100644 final/lib/ExecutionEngine/JIT/CMakeLists.txt
 create mode 100644 final/lib/ExecutionEngine/JIT/Intercept.cpp
 create mode 100644 final/lib/ExecutionEngine/JIT/JIT.cpp
 create mode 100644 final/lib/ExecutionEngine/JIT/JIT.h
 create mode 100644 final/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
 create mode 100644 final/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
 create mode 100644 final/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
 create mode 100644 final/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
 create mode 100644 final/lib/ExecutionEngine/JIT/JITEmitter.cpp
 create mode 100644 final/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
 create mode 100644 final/lib/ExecutionEngine/JIT/Makefile
 create mode 100644 final/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
 create mode 100644 final/lib/ExecutionEngine/JIT/TargetSelect.cpp
 create mode 100644 final/lib/ExecutionEngine/MCJIT/CMakeLists.txt
 create mode 100644 final/lib/ExecutionEngine/MCJIT/MCJIT.cpp
 create mode 100644 final/lib/ExecutionEngine/MCJIT/MCJIT.h
 create mode 100644 final/lib/ExecutionEngine/MCJIT/Makefile
 create mode 100644 final/lib/ExecutionEngine/MCJIT/TargetSelect.cpp
 create mode 100644 final/lib/ExecutionEngine/Makefile
 create mode 100644 final/lib/Linker/CMakeLists.txt
 create mode 100644 final/lib/Linker/LinkArchives.cpp
 create mode 100644 final/lib/Linker/LinkItems.cpp
 create mode 100644 final/lib/Linker/LinkModules.cpp
 create mode 100644 final/lib/Linker/Linker.cpp
 create mode 100644 final/lib/Linker/Makefile
 create mode 100644 final/lib/MC/CMakeLists.txt
 create mode 100644 final/lib/MC/ELFObjectWriter.cpp
 create mode 100644 final/lib/MC/ELFObjectWriter.h
 create mode 100644 final/lib/MC/MCAsmInfo.cpp
 create mode 100644 final/lib/MC/MCAsmInfoCOFF.cpp
 create mode 100644 final/lib/MC/MCAsmInfoDarwin.cpp
 create mode 100644 final/lib/MC/MCAsmStreamer.cpp
 create mode 100644 final/lib/MC/MCAssembler.cpp
 create mode 100644 final/lib/MC/MCCodeEmitter.cpp
 create mode 100644 final/lib/MC/MCContext.cpp
 create mode 100644 final/lib/MC/MCDisassembler.cpp
 create mode 100644 final/lib/MC/MCDisassembler/CMakeLists.txt
 create mode 100644 final/lib/MC/MCDisassembler/EDDisassembler.cpp
 create mode 100644 final/lib/MC/MCDisassembler/EDDisassembler.h
 create mode 100644 final/lib/MC/MCDisassembler/EDInfo.h
 create mode 100644 final/lib/MC/MCDisassembler/EDInst.cpp
 create mode 100644 final/lib/MC/MCDisassembler/EDInst.h
 create mode 100644 final/lib/MC/MCDisassembler/EDOperand.cpp
 create mode 100644 final/lib/MC/MCDisassembler/EDOperand.h
 create mode 100644 final/lib/MC/MCDisassembler/EDToken.cpp
 create mode 100644 final/lib/MC/MCDisassembler/EDToken.h
 create mode 100644 final/lib/MC/MCDisassembler/Makefile
 create mode 100644 final/lib/MC/MCDwarf.cpp
 create mode 100644 final/lib/MC/MCELF.cpp
 create mode 100644 final/lib/MC/MCELF.h
 create mode 100644 final/lib/MC/MCELFObjectTargetWriter.cpp
 create mode 100644 final/lib/MC/MCELFStreamer.cpp
 create mode 100644 final/lib/MC/MCELFStreamer.h
 create mode 100644 final/lib/MC/MCExpr.cpp
 create mode 100644 final/lib/MC/MCInst.cpp
 create mode 100644 final/lib/MC/MCInstPrinter.cpp
 create mode 100644 final/lib/MC/MCLabel.cpp
 create mode 100644 final/lib/MC/MCLoggingStreamer.cpp
 create mode 100644 final/lib/MC/MCMachOStreamer.cpp
 create mode 100644 final/lib/MC/MCMachObjectTargetWriter.cpp
 create mode 100644 final/lib/MC/MCNullStreamer.cpp
 create mode 100644 final/lib/MC/MCObjectStreamer.cpp
 create mode 100644 final/lib/MC/MCObjectWriter.cpp
 create mode 100644 final/lib/MC/MCParser/AsmLexer.cpp
 create mode 100644 final/lib/MC/MCParser/AsmParser.cpp
 create mode 100644 final/lib/MC/MCParser/CMakeLists.txt
 create mode 100644 final/lib/MC/MCParser/COFFAsmParser.cpp
 create mode 100644 final/lib/MC/MCParser/DarwinAsmParser.cpp
 create mode 100644 final/lib/MC/MCParser/ELFAsmParser.cpp
 create mode 100644 final/lib/MC/MCParser/MCAsmLexer.cpp
 create mode 100644 final/lib/MC/MCParser/MCAsmParser.cpp
 create mode 100644 final/lib/MC/MCParser/MCAsmParserExtension.cpp
 create mode 100644 final/lib/MC/MCParser/Makefile
 create mode 100644 final/lib/MC/MCParser/TargetAsmParser.cpp
 create mode 100644 final/lib/MC/MCPureStreamer.cpp
 create mode 100644 final/lib/MC/MCSection.cpp
 create mode 100644 final/lib/MC/MCSectionCOFF.cpp
 create mode 100644 final/lib/MC/MCSectionELF.cpp
 create mode 100644 final/lib/MC/MCSectionMachO.cpp
 create mode 100644 final/lib/MC/MCStreamer.cpp
 create mode 100644 final/lib/MC/MCSymbol.cpp
 create mode 100644 final/lib/MC/MCValue.cpp
 create mode 100644 final/lib/MC/MachObjectWriter.cpp
 create mode 100644 final/lib/MC/Makefile
 create mode 100644 final/lib/MC/TargetAsmBackend.cpp
 create mode 100644 final/lib/MC/WinCOFFObjectWriter.cpp
 create mode 100644 final/lib/MC/WinCOFFStreamer.cpp
 create mode 100644 final/lib/Makefile
 create mode 100644 final/lib/Object/CMakeLists.txt
 create mode 100644 final/lib/Object/COFFObjectFile.cpp
 create mode 100644 final/lib/Object/ELFObjectFile.cpp
 create mode 100644 final/lib/Object/MachOObject.cpp
 create mode 100644 final/lib/Object/Makefile
 create mode 100644 final/lib/Object/ObjectFile.cpp
 create mode 100644 final/lib/Support/APFloat.cpp
 create mode 100644 final/lib/Support/APInt.cpp
 create mode 100644 final/lib/Support/APSInt.cpp
 create mode 100644 final/lib/Support/Allocator.cpp
 create mode 100644 final/lib/Support/Atomic.cpp
 create mode 100644 final/lib/Support/CMakeLists.txt
 create mode 100644 final/lib/Support/COPYRIGHT.regex
 create mode 100644 final/lib/Support/CommandLine.cpp
 create mode 100644 final/lib/Support/ConstantRange.cpp
 create mode 100644 final/lib/Support/CrashRecoveryContext.cpp
 create mode 100644 final/lib/Support/DAGDeltaAlgorithm.cpp
 create mode 100644 final/lib/Support/Debug.cpp
 create mode 100644 final/lib/Support/DeltaAlgorithm.cpp
 create mode 100644 final/lib/Support/Disassembler.cpp
 create mode 100644 final/lib/Support/Dwarf.cpp
 create mode 100644 final/lib/Support/DynamicLibrary.cpp
 create mode 100644 final/lib/Support/Errno.cpp
 create mode 100644 final/lib/Support/ErrorHandling.cpp
 create mode 100644 final/lib/Support/FileUtilities.cpp
 create mode 100644 final/lib/Support/FoldingSet.cpp
 create mode 100644 final/lib/Support/FormattedStream.cpp
 create mode 100644 final/lib/Support/GraphWriter.cpp
 create mode 100644 final/lib/Support/Host.cpp
 create mode 100644 final/lib/Support/IncludeFile.cpp
 create mode 100644 final/lib/Support/IntEqClasses.cpp
 create mode 100644 final/lib/Support/IntervalMap.cpp
 create mode 100644 final/lib/Support/IsInf.cpp
 create mode 100644 final/lib/Support/IsNAN.cpp
 create mode 100644 final/lib/Support/Makefile
 create mode 100644 final/lib/Support/ManagedStatic.cpp
 create mode 100644 final/lib/Support/Memory.cpp
 create mode 100644 final/lib/Support/MemoryBuffer.cpp
 create mode 100644 final/lib/Support/MemoryObject.cpp
 create mode 100644 final/lib/Support/Mutex.cpp
 create mode 100644 final/lib/Support/Path.cpp
 create mode 100644 final/lib/Support/PathV2.cpp
 create mode 100644 final/lib/Support/PluginLoader.cpp
 create mode 100644 final/lib/Support/PrettyStackTrace.cpp
 create mode 100644 final/lib/Support/Process.cpp
 create mode 100644 final/lib/Support/Program.cpp
 create mode 100644 final/lib/Support/README.txt.system
 create mode 100644 final/lib/Support/RWMutex.cpp
 create mode 100644 final/lib/Support/Regex.cpp
 create mode 100644 final/lib/Support/SearchForAddressOfSpecialSymbol.cpp
 create mode 100644 final/lib/Support/Signals.cpp
 create mode 100644 final/lib/Support/SmallPtrSet.cpp
 create mode 100644 final/lib/Support/SmallVector.cpp
 create mode 100644 final/lib/Support/SourceMgr.cpp
 create mode 100644 final/lib/Support/Statistic.cpp
 create mode 100644 final/lib/Support/StringExtras.cpp
 create mode 100644 final/lib/Support/StringMap.cpp
 create mode 100644 final/lib/Support/StringPool.cpp
 create mode 100644 final/lib/Support/StringRef.cpp
 create mode 100644 final/lib/Support/SystemUtils.cpp
 create mode 100644 final/lib/Support/TargetRegistry.cpp
 create mode 100644 final/lib/Support/ThreadLocal.cpp
 create mode 100644 final/lib/Support/Threading.cpp
 create mode 100644 final/lib/Support/TimeValue.cpp
 create mode 100644 final/lib/Support/Timer.cpp
 create mode 100644 final/lib/Support/ToolOutputFile.cpp
 create mode 100644 final/lib/Support/Triple.cpp
 create mode 100644 final/lib/Support/Twine.cpp
 create mode 100644 final/lib/Support/Unix/Host.inc
 create mode 100644 final/lib/Support/Unix/Memory.inc
 create mode 100644 final/lib/Support/Unix/Mutex.inc
 create mode 100644 final/lib/Support/Unix/Path.inc
 create mode 100644 final/lib/Support/Unix/PathV2.inc
 create mode 100644 final/lib/Support/Unix/Process.inc
 create mode 100644 final/lib/Support/Unix/Program.inc
 create mode 100644 final/lib/Support/Unix/README.txt
 create mode 100644 final/lib/Support/Unix/RWMutex.inc
 create mode 100644 final/lib/Support/Unix/Signals.inc
 create mode 100644 final/lib/Support/Unix/ThreadLocal.inc
 create mode 100644 final/lib/Support/Unix/TimeValue.inc
 create mode 100644 final/lib/Support/Unix/Unix.h
 create mode 100644 final/lib/Support/Unix/system_error.inc
 create mode 100644 final/lib/Support/Valgrind.cpp
 create mode 100644 final/lib/Support/Windows/DynamicLibrary.inc
 create mode 100644 final/lib/Support/Windows/Host.inc
 create mode 100644 final/lib/Support/Windows/Memory.inc
 create mode 100644 final/lib/Support/Windows/Mutex.inc
 create mode 100644 final/lib/Support/Windows/Path.inc
 create mode 100644 final/lib/Support/Windows/PathV2.inc
 create mode 100644 final/lib/Support/Windows/Process.inc
 create mode 100644 final/lib/Support/Windows/Program.inc
 create mode 100644 final/lib/Support/Windows/RWMutex.inc
 create mode 100644 final/lib/Support/Windows/Signals.inc
 create mode 100644 final/lib/Support/Windows/ThreadLocal.inc
 create mode 100644 final/lib/Support/Windows/TimeValue.inc
 create mode 100644 final/lib/Support/Windows/Windows.h
 create mode 100644 final/lib/Support/Windows/explicit_symbols.inc
 create mode 100644 final/lib/Support/Windows/system_error.inc
 create mode 100644 final/lib/Support/circular_raw_ostream.cpp
 create mode 100644 final/lib/Support/raw_os_ostream.cpp
 create mode 100644 final/lib/Support/raw_ostream.cpp
 create mode 100644 final/lib/Support/regcclass.h
 create mode 100644 final/lib/Support/regcname.h
 create mode 100644 final/lib/Support/regcomp.c
 create mode 100644 final/lib/Support/regengine.inc
 create mode 100644 final/lib/Support/regerror.c
 create mode 100644 final/lib/Support/regex2.h
 create mode 100644 final/lib/Support/regex_impl.h
 create mode 100644 final/lib/Support/regexec.c
 create mode 100644 final/lib/Support/regfree.c
 create mode 100644 final/lib/Support/regstrlcpy.c
 create mode 100644 final/lib/Support/regutils.h
 create mode 100644 final/lib/Support/system_error.cpp
 create mode 100644 final/lib/Target/ARM/ARM.h
 create mode 100644 final/lib/Target/ARM/ARM.td
 create mode 100644 final/lib/Target/ARM/ARMAddressingModes.h
 create mode 100644 final/lib/Target/ARM/ARMAsmBackend.cpp
 create mode 100644 final/lib/Target/ARM/ARMAsmPrinter.cpp
 create mode 100644 final/lib/Target/ARM/ARMAsmPrinter.h
 create mode 100644 final/lib/Target/ARM/ARMBaseInfo.h
 create mode 100644 final/lib/Target/ARM/ARMBaseInstrInfo.cpp
 create mode 100644 final/lib/Target/ARM/ARMBaseInstrInfo.h
 create mode 100644 final/lib/Target/ARM/ARMBaseRegisterInfo.cpp
 create mode 100644 final/lib/Target/ARM/ARMBaseRegisterInfo.h
 create mode 100644 final/lib/Target/ARM/ARMBuildAttrs.h
 create mode 100644 final/lib/Target/ARM/ARMCallingConv.h
 create mode 100644 final/lib/Target/ARM/ARMCallingConv.td
 create mode 100644 final/lib/Target/ARM/ARMCodeEmitter.cpp
 create mode 100644 final/lib/Target/ARM/ARMConstantIslandPass.cpp
 create mode 100644 final/lib/Target/ARM/ARMConstantPoolValue.cpp
 create mode 100644 final/lib/Target/ARM/ARMConstantPoolValue.h
 create mode 100644 final/lib/Target/ARM/ARMELFWriterInfo.cpp
 create mode 100644 final/lib/Target/ARM/ARMELFWriterInfo.h
 create mode 100644 final/lib/Target/ARM/ARMExpandPseudoInsts.cpp
 create mode 100644 final/lib/Target/ARM/ARMFastISel.cpp
 create mode 100644 final/lib/Target/ARM/ARMFixupKinds.h
 create mode 100644 final/lib/Target/ARM/ARMFrameLowering.cpp
 create mode 100644 final/lib/Target/ARM/ARMFrameLowering.h
 create mode 100644 final/lib/Target/ARM/ARMGlobalMerge.cpp
 create mode 100644 final/lib/Target/ARM/ARMHazardRecognizer.cpp
 create mode 100644 final/lib/Target/ARM/ARMHazardRecognizer.h
 create mode 100644 final/lib/Target/ARM/ARMISelDAGToDAG.cpp
 create mode 100644 final/lib/Target/ARM/ARMISelLowering.cpp
 create mode 100644 final/lib/Target/ARM/ARMISelLowering.h
 create mode 100644 final/lib/Target/ARM/ARMInstrFormats.td
 create mode 100644 final/lib/Target/ARM/ARMInstrInfo.cpp
 create mode 100644 final/lib/Target/ARM/ARMInstrInfo.h
 create mode 100644 final/lib/Target/ARM/ARMInstrInfo.td
 create mode 100644 final/lib/Target/ARM/ARMInstrNEON.td
 create mode 100644 final/lib/Target/ARM/ARMInstrThumb.td
 create mode 100644 final/lib/Target/ARM/ARMInstrThumb2.td
 create mode 100644 final/lib/Target/ARM/ARMInstrVFP.td
 create mode 100644 final/lib/Target/ARM/ARMJITInfo.cpp
 create mode 100644 final/lib/Target/ARM/ARMJITInfo.h
 create mode 100644 final/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
 create mode 100644 final/lib/Target/ARM/ARMMCAsmInfo.cpp
 create mode 100644 final/lib/Target/ARM/ARMMCAsmInfo.h
 create mode 100644 final/lib/Target/ARM/ARMMCCodeEmitter.cpp
 create mode 100644 final/lib/Target/ARM/ARMMCExpr.cpp
 create mode 100644 final/lib/Target/ARM/ARMMCExpr.h
 create mode 100644 final/lib/Target/ARM/ARMMCInstLower.cpp
 create mode 100644 final/lib/Target/ARM/ARMMachineFunctionInfo.h
 create mode 100644 final/lib/Target/ARM/ARMPerfectShuffle.h
 create mode 100644 final/lib/Target/ARM/ARMRegisterInfo.cpp
 create mode 100644 final/lib/Target/ARM/ARMRegisterInfo.h
 create mode 100644 final/lib/Target/ARM/ARMRegisterInfo.td
 create mode 100644 final/lib/Target/ARM/ARMRelocations.h
 create mode 100644 final/lib/Target/ARM/ARMSchedule.td
 create mode 100644 final/lib/Target/ARM/ARMScheduleA8.td
 create mode 100644 final/lib/Target/ARM/ARMScheduleA9.td
 create mode 100644 final/lib/Target/ARM/ARMScheduleV6.td
 create mode 100644 final/lib/Target/ARM/ARMSelectionDAGInfo.cpp
 create mode 100644 final/lib/Target/ARM/ARMSelectionDAGInfo.h
 create mode 100644 final/lib/Target/ARM/ARMSubtarget.cpp
 create mode 100644 final/lib/Target/ARM/ARMSubtarget.h
 create mode 100644 final/lib/Target/ARM/ARMTargetMachine.cpp
 create mode 100644 final/lib/Target/ARM/ARMTargetMachine.h
 create mode 100644 final/lib/Target/ARM/ARMTargetObjectFile.cpp
 create mode 100644 final/lib/Target/ARM/ARMTargetObjectFile.h
 create mode 100644 final/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
 create mode 100644 final/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
 create mode 100644 final/lib/Target/ARM/AsmParser/CMakeLists.txt
 create mode 100644 final/lib/Target/ARM/AsmParser/Makefile
 create mode 100644 final/lib/Target/ARM/CMakeLists.txt
 create mode 100644 final/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
 create mode 100644 final/lib/Target/ARM/Disassembler/ARMDisassembler.h
 create mode 100644 final/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
 create mode 100644 final/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
 create mode 100644 final/lib/Target/ARM/Disassembler/CMakeLists.txt
 create mode 100644 final/lib/Target/ARM/Disassembler/Makefile
 create mode 100644 final/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
 create mode 100644 final/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
 create mode 100644 final/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
 create mode 100644 final/lib/Target/ARM/InstPrinter/CMakeLists.txt
 create mode 100644 final/lib/Target/ARM/InstPrinter/Makefile
 create mode 100644 final/lib/Target/ARM/MLxExpansionPass.cpp
 create mode 100644 final/lib/Target/ARM/Makefile
 create mode 100644 final/lib/Target/ARM/NEONMoveFix.cpp
 create mode 100644 final/lib/Target/ARM/README-Thumb.txt
 create mode 100644 final/lib/Target/ARM/README-Thumb2.txt
 create mode 100644 final/lib/Target/ARM/README.txt
 create mode 100644 final/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
 create mode 100644 final/lib/Target/ARM/TargetInfo/CMakeLists.txt
 create mode 100644 final/lib/Target/ARM/TargetInfo/Makefile
 create mode 100644 final/lib/Target/ARM/Thumb1FrameLowering.cpp
 create mode 100644 final/lib/Target/ARM/Thumb1FrameLowering.h
 create mode 100644 final/lib/Target/ARM/Thumb1InstrInfo.cpp
 create mode 100644 final/lib/Target/ARM/Thumb1InstrInfo.h
 create mode 100644 final/lib/Target/ARM/Thumb1RegisterInfo.cpp
 create mode 100644 final/lib/Target/ARM/Thumb1RegisterInfo.h
 create mode 100644 final/lib/Target/ARM/Thumb2ITBlockPass.cpp
 create mode 100644 final/lib/Target/ARM/Thumb2InstrInfo.cpp
 create mode 100644 final/lib/Target/ARM/Thumb2InstrInfo.h
 create mode 100644 final/lib/Target/ARM/Thumb2RegisterInfo.cpp
 create mode 100644 final/lib/Target/ARM/Thumb2RegisterInfo.h
 create mode 100644 final/lib/Target/ARM/Thumb2SizeReduction.cpp
 create mode 100644 final/lib/Target/Alpha/Alpha.h
 create mode 100644 final/lib/Target/Alpha/Alpha.td
 create mode 100644 final/lib/Target/Alpha/AlphaAsmPrinter.cpp
 create mode 100644 final/lib/Target/Alpha/AlphaBranchSelector.cpp
 create mode 100644 final/lib/Target/Alpha/AlphaCallingConv.td
 create mode 100644 final/lib/Target/Alpha/AlphaFrameLowering.cpp
 create mode 100644 final/lib/Target/Alpha/AlphaFrameLowering.h
 create mode 100644 final/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
 create mode 100644 final/lib/Target/Alpha/AlphaISelLowering.cpp
 create mode 100644 final/lib/Target/Alpha/AlphaISelLowering.h
 create mode 100644 final/lib/Target/Alpha/AlphaInstrFormats.td
 create mode 100644 final/lib/Target/Alpha/AlphaInstrInfo.cpp
 create mode 100644 final/lib/Target/Alpha/AlphaInstrInfo.h
 create mode 100644 final/lib/Target/Alpha/AlphaInstrInfo.td
 create mode 100644 final/lib/Target/Alpha/AlphaLLRP.cpp
 create mode 100644 final/lib/Target/Alpha/AlphaMCAsmInfo.cpp
 create mode 100644 final/lib/Target/Alpha/AlphaMCAsmInfo.h
 create mode 100644 final/lib/Target/Alpha/AlphaMachineFunctionInfo.h
 create mode 100644 final/lib/Target/Alpha/AlphaRegisterInfo.cpp
 create mode 100644 final/lib/Target/Alpha/AlphaRegisterInfo.h
 create mode 100644 final/lib/Target/Alpha/AlphaRegisterInfo.td
 create mode 100644 final/lib/Target/Alpha/AlphaRelocations.h
 create mode 100644 final/lib/Target/Alpha/AlphaSchedule.td
 create mode 100644 final/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
 create mode 100644 final/lib/Target/Alpha/AlphaSelectionDAGInfo.h
 create mode 100644 final/lib/Target/Alpha/AlphaSubtarget.cpp
 create mode 100644 final/lib/Target/Alpha/AlphaSubtarget.h
 create mode 100644 final/lib/Target/Alpha/AlphaTargetMachine.cpp
 create mode 100644 final/lib/Target/Alpha/AlphaTargetMachine.h
 create mode 100644 final/lib/Target/Alpha/CMakeLists.txt
 create mode 100644 final/lib/Target/Alpha/Makefile
 create mode 100644 final/lib/Target/Alpha/README.txt
 create mode 100644 final/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
 create mode 100644 final/lib/Target/Alpha/TargetInfo/CMakeLists.txt
 create mode 100644 final/lib/Target/Alpha/TargetInfo/Makefile
 create mode 100644 final/lib/Target/Blackfin/Blackfin.h
 create mode 100644 final/lib/Target/Blackfin/Blackfin.td
 create mode 100644 final/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
 create mode 100644 final/lib/Target/Blackfin/BlackfinCallingConv.td
 create mode 100644 final/lib/Target/Blackfin/BlackfinFrameLowering.cpp
 create mode 100644 final/lib/Target/Blackfin/BlackfinFrameLowering.h
 create mode 100644 final/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
 create mode 100644 final/lib/Target/Blackfin/BlackfinISelLowering.cpp
 create mode 100644 final/lib/Target/Blackfin/BlackfinISelLowering.h
 create mode 100644 final/lib/Target/Blackfin/BlackfinInstrFormats.td
 create mode 100644 final/lib/Target/Blackfin/BlackfinInstrInfo.cpp
 create mode 100644 final/lib/Target/Blackfin/BlackfinInstrInfo.h
 create mode 100644 final/lib/Target/Blackfin/BlackfinInstrInfo.td
 create mode 100644 final/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
 create mode 100644 final/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
 create mode 100644 final/lib/Target/Blackfin/BlackfinIntrinsics.td
 create mode 100644 final/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
 create mode 100644 final/lib/Target/Blackfin/BlackfinMCAsmInfo.h
 create mode 100644 final/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
 create mode 100644 final/lib/Target/Blackfin/BlackfinRegisterInfo.h
 create mode 100644 final/lib/Target/Blackfin/BlackfinRegisterInfo.td
 create mode 100644 final/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
 create mode 100644 final/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
 create mode 100644 final/lib/Target/Blackfin/BlackfinSubtarget.cpp
 create mode 100644 final/lib/Target/Blackfin/BlackfinSubtarget.h
 create mode 100644 final/lib/Target/Blackfin/BlackfinTargetMachine.cpp
 create mode 100644 final/lib/Target/Blackfin/BlackfinTargetMachine.h
 create mode 100644 final/lib/Target/Blackfin/CMakeLists.txt
 create mode 100644 final/lib/Target/Blackfin/Makefile
 create mode 100644 final/lib/Target/Blackfin/README.txt
 create mode 100644 final/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
 create mode 100644 final/lib/Target/Blackfin/TargetInfo/CMakeLists.txt
 create mode 100644 final/lib/Target/Blackfin/TargetInfo/Makefile
 create mode 100644 final/lib/Target/CBackend/CBackend.cpp
 create mode 100644 final/lib/Target/CBackend/CMakeLists.txt
 create mode 100644 final/lib/Target/CBackend/CTargetMachine.h
 create mode 100644 final/lib/Target/CBackend/Makefile
 create mode 100644 final/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
 create mode 100644 final/lib/Target/CBackend/TargetInfo/CMakeLists.txt
 create mode 100644 final/lib/Target/CBackend/TargetInfo/Makefile
 create mode 100644 final/lib/Target/CMakeLists.txt
 create mode 100644 final/lib/Target/CellSPU/CMakeLists.txt
 create mode 100644 final/lib/Target/CellSPU/CellSDKIntrinsics.td
 create mode 100644 final/lib/Target/CellSPU/Makefile
 create mode 100644 final/lib/Target/CellSPU/README.txt
 create mode 100644 final/lib/Target/CellSPU/SPU.h
 create mode 100644 final/lib/Target/CellSPU/SPU.td
 create mode 100644 final/lib/Target/CellSPU/SPU128InstrInfo.td
 create mode 100644 final/lib/Target/CellSPU/SPU64InstrInfo.td
 create mode 100644 final/lib/Target/CellSPU/SPUAsmPrinter.cpp
 create mode 100644 final/lib/Target/CellSPU/SPUCallingConv.td
 create mode 100644 final/lib/Target/CellSPU/SPUFrameLowering.cpp
 create mode 100644 final/lib/Target/CellSPU/SPUFrameLowering.h
 create mode 100644 final/lib/Target/CellSPU/SPUHazardRecognizers.cpp
 create mode 100644 final/lib/Target/CellSPU/SPUHazardRecognizers.h
 create mode 100644 final/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
 create mode 100644 final/lib/Target/CellSPU/SPUISelLowering.cpp
 create mode 100644 final/lib/Target/CellSPU/SPUISelLowering.h
 create mode 100644 final/lib/Target/CellSPU/SPUInstrBuilder.h
 create mode 100644 final/lib/Target/CellSPU/SPUInstrFormats.td
 create mode 100644 final/lib/Target/CellSPU/SPUInstrInfo.cpp
 create mode 100644 final/lib/Target/CellSPU/SPUInstrInfo.h
 create mode 100644 final/lib/Target/CellSPU/SPUInstrInfo.td
 create mode 100644 final/lib/Target/CellSPU/SPUMCAsmInfo.cpp
 create mode 100644 final/lib/Target/CellSPU/SPUMCAsmInfo.h
 create mode 100644 final/lib/Target/CellSPU/SPUMachineFunction.h
 create mode 100644 final/lib/Target/CellSPU/SPUMathInstr.td
 create mode 100644 final/lib/Target/CellSPU/SPUNodes.td
 create mode 100644 final/lib/Target/CellSPU/SPUNopFiller.cpp
 create mode 100644 final/lib/Target/CellSPU/SPUOperands.td
 create mode 100644 final/lib/Target/CellSPU/SPURegisterInfo.cpp
 create mode 100644 final/lib/Target/CellSPU/SPURegisterInfo.h
 create mode 100644 final/lib/Target/CellSPU/SPURegisterInfo.td
 create mode 100644 final/lib/Target/CellSPU/SPURegisterNames.h
 create mode 100644 final/lib/Target/CellSPU/SPUSchedule.td
 create mode 100644 final/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
 create mode 100644 final/lib/Target/CellSPU/SPUSelectionDAGInfo.h
 create mode 100644 final/lib/Target/CellSPU/SPUSubtarget.cpp
 create mode 100644 final/lib/Target/CellSPU/SPUSubtarget.h
 create mode 100644 final/lib/Target/CellSPU/SPUTargetMachine.cpp
 create mode 100644 final/lib/Target/CellSPU/SPUTargetMachine.h
 create mode 100644 final/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
 create mode 100644 final/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
 create mode 100644 final/lib/Target/CellSPU/TargetInfo/Makefile
 create mode 100644 final/lib/Target/CppBackend/CMakeLists.txt
 create mode 100644 final/lib/Target/CppBackend/CPPBackend.cpp
 create mode 100644 final/lib/Target/CppBackend/CPPTargetMachine.h
 create mode 100644 final/lib/Target/CppBackend/Makefile
 create mode 100644 final/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
 create mode 100644 final/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
 create mode 100644 final/lib/Target/CppBackend/TargetInfo/Makefile
 create mode 100644 final/lib/Target/MBlaze/AsmParser/CMakeLists.txt
 create mode 100644 final/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
 create mode 100644 final/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
 create mode 100644 final/lib/Target/MBlaze/AsmParser/Makefile
 create mode 100644 final/lib/Target/MBlaze/CMakeLists.txt
 create mode 100644 final/lib/Target/MBlaze/Disassembler/CMakeLists.txt
 create mode 100644 final/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
 create mode 100644 final/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
 create mode 100644 final/lib/Target/MBlaze/Disassembler/Makefile
 create mode 100644 final/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
 create mode 100644 final/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
 create mode 100644 final/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
 create mode 100644 final/lib/Target/MBlaze/InstPrinter/Makefile
 create mode 100644 final/lib/Target/MBlaze/MBlaze.h
 create mode 100644 final/lib/Target/MBlaze/MBlaze.td
 create mode 100644 final/lib/Target/MBlaze/MBlazeAsmBackend.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeCallingConv.td
 create mode 100644 final/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeELFWriterInfo.h
 create mode 100644 final/lib/Target/MBlaze/MBlazeFrameLowering.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeFrameLowering.h
 create mode 100644 final/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeISelLowering.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeISelLowering.h
 create mode 100644 final/lib/Target/MBlaze/MBlazeInstrFPU.td
 create mode 100644 final/lib/Target/MBlaze/MBlazeInstrFSL.td
 create mode 100644 final/lib/Target/MBlaze/MBlazeInstrFormats.td
 create mode 100644 final/lib/Target/MBlaze/MBlazeInstrInfo.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeInstrInfo.h
 create mode 100644 final/lib/Target/MBlaze/MBlazeInstrInfo.td
 create mode 100644 final/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
 create mode 100644 final/lib/Target/MBlaze/MBlazeIntrinsics.td
 create mode 100644 final/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeMCAsmInfo.h
 create mode 100644 final/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeMCInstLower.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeMCInstLower.h
 create mode 100644 final/lib/Target/MBlaze/MBlazeMachineFunction.h
 create mode 100644 final/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeRegisterInfo.h
 create mode 100644 final/lib/Target/MBlaze/MBlazeRegisterInfo.td
 create mode 100644 final/lib/Target/MBlaze/MBlazeRelocations.h
 create mode 100644 final/lib/Target/MBlaze/MBlazeSchedule.td
 create mode 100644 final/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
 create mode 100644 final/lib/Target/MBlaze/MBlazeSubtarget.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeSubtarget.h
 create mode 100644 final/lib/Target/MBlaze/MBlazeTargetMachine.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeTargetMachine.h
 create mode 100644 final/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
 create mode 100644 final/lib/Target/MBlaze/MBlazeTargetObjectFile.h
 create mode 100644 final/lib/Target/MBlaze/Makefile
 create mode 100644 final/lib/Target/MBlaze/TODO
 create mode 100644 final/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
 create mode 100644 final/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
 create mode 100644 final/lib/Target/MBlaze/TargetInfo/Makefile
 create mode 100644 final/lib/Target/MSP430/CMakeLists.txt
 create mode 100644 final/lib/Target/MSP430/InstPrinter/CMakeLists.txt
 create mode 100644 final/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
 create mode 100644 final/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
 create mode 100644 final/lib/Target/MSP430/InstPrinter/Makefile
 create mode 100644 final/lib/Target/MSP430/MSP430.h
 create mode 100644 final/lib/Target/MSP430/MSP430.td
 create mode 100644 final/lib/Target/MSP430/MSP430AsmPrinter.cpp
 create mode 100644 final/lib/Target/MSP430/MSP430BranchSelector.cpp
 create mode 100644 final/lib/Target/MSP430/MSP430CallingConv.td
 create mode 100644 final/lib/Target/MSP430/MSP430FrameLowering.cpp
 create mode 100644 final/lib/Target/MSP430/MSP430FrameLowering.h
 create mode 100644 final/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
 create mode 100644 final/lib/Target/MSP430/MSP430ISelLowering.cpp
 create mode 100644 final/lib/Target/MSP430/MSP430ISelLowering.h
 create mode 100644 final/lib/Target/MSP430/MSP430InstrFormats.td
 create mode 100644 final/lib/Target/MSP430/MSP430InstrInfo.cpp
 create mode 100644 final/lib/Target/MSP430/MSP430InstrInfo.h
 create mode 100644 final/lib/Target/MSP430/MSP430InstrInfo.td
 create mode 100644 final/lib/Target/MSP430/MSP430MCAsmInfo.cpp
 create mode 100644 final/lib/Target/MSP430/MSP430MCAsmInfo.h
 create mode 100644 final/lib/Target/MSP430/MSP430MCInstLower.cpp
 create mode 100644 final/lib/Target/MSP430/MSP430MCInstLower.h
 create mode 100644 final/lib/Target/MSP430/MSP430MachineFunctionInfo.h
 create mode 100644 final/lib/Target/MSP430/MSP430RegisterInfo.cpp
 create mode 100644 final/lib/Target/MSP430/MSP430RegisterInfo.h
 create mode 100644 final/lib/Target/MSP430/MSP430RegisterInfo.td
 create mode 100644 final/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
 create mode 100644 final/lib/Target/MSP430/MSP430SelectionDAGInfo.h
 create mode 100644 final/lib/Target/MSP430/MSP430Subtarget.cpp
 create mode 100644 final/lib/Target/MSP430/MSP430Subtarget.h
 create mode 100644 final/lib/Target/MSP430/MSP430TargetMachine.cpp
 create mode 100644 final/lib/Target/MSP430/MSP430TargetMachine.h
 create mode 100644 final/lib/Target/MSP430/Makefile
 create mode 100644 final/lib/Target/MSP430/README.txt
 create mode 100644 final/lib/Target/MSP430/TargetInfo/CMakeLists.txt
 create mode 100644 final/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
 create mode 100644 final/lib/Target/MSP430/TargetInfo/Makefile
 create mode 100644 final/lib/Target/Makefile
 create mode 100644 final/lib/Target/Mangler.cpp
 create mode 100644 final/lib/Target/Mips/CMakeLists.txt
 create mode 100644 final/lib/Target/Mips/Makefile
 create mode 100644 final/lib/Target/Mips/Mips.h
 create mode 100644 final/lib/Target/Mips/Mips.td
 create mode 100644 final/lib/Target/Mips/MipsAsmPrinter.cpp
 create mode 100644 final/lib/Target/Mips/MipsCallingConv.td
 create mode 100644 final/lib/Target/Mips/MipsDelaySlotFiller.cpp
 create mode 100644 final/lib/Target/Mips/MipsFrameLowering.cpp
 create mode 100644 final/lib/Target/Mips/MipsFrameLowering.h
 create mode 100644 final/lib/Target/Mips/MipsISelDAGToDAG.cpp
 create mode 100644 final/lib/Target/Mips/MipsISelLowering.cpp
 create mode 100644 final/lib/Target/Mips/MipsISelLowering.h
 create mode 100644 final/lib/Target/Mips/MipsInstrFPU.td
 create mode 100644 final/lib/Target/Mips/MipsInstrFormats.td
 create mode 100644 final/lib/Target/Mips/MipsInstrInfo.cpp
 create mode 100644 final/lib/Target/Mips/MipsInstrInfo.h
 create mode 100644 final/lib/Target/Mips/MipsInstrInfo.td
 create mode 100644 final/lib/Target/Mips/MipsMCAsmInfo.cpp
 create mode 100644 final/lib/Target/Mips/MipsMCAsmInfo.h
 create mode 100644 final/lib/Target/Mips/MipsMachineFunction.h
 create mode 100644 final/lib/Target/Mips/MipsRegisterInfo.cpp
 create mode 100644 final/lib/Target/Mips/MipsRegisterInfo.h
 create mode 100644 final/lib/Target/Mips/MipsRegisterInfo.td
 create mode 100644 final/lib/Target/Mips/MipsSchedule.td
 create mode 100644 final/lib/Target/Mips/MipsSelectionDAGInfo.cpp
 create mode 100644 final/lib/Target/Mips/MipsSelectionDAGInfo.h
 create mode 100644 final/lib/Target/Mips/MipsSubtarget.cpp
 create mode 100644 final/lib/Target/Mips/MipsSubtarget.h
 create mode 100644 final/lib/Target/Mips/MipsTargetMachine.cpp
 create mode 100644 final/lib/Target/Mips/MipsTargetMachine.h
 create mode 100644 final/lib/Target/Mips/MipsTargetObjectFile.cpp
 create mode 100644 final/lib/Target/Mips/MipsTargetObjectFile.h
 create mode 100644 final/lib/Target/Mips/TargetInfo/CMakeLists.txt
 create mode 100644 final/lib/Target/Mips/TargetInfo/Makefile
 create mode 100644 final/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
 create mode 100644 final/lib/Target/PTX/CMakeLists.txt
 create mode 100644 final/lib/Target/PTX/Makefile
 create mode 100644 final/lib/Target/PTX/PTX.h
 create mode 100644 final/lib/Target/PTX/PTX.td
 create mode 100644 final/lib/Target/PTX/PTXAsmPrinter.cpp
 create mode 100644 final/lib/Target/PTX/PTXFrameLowering.cpp
 create mode 100644 final/lib/Target/PTX/PTXFrameLowering.h
 create mode 100644 final/lib/Target/PTX/PTXISelDAGToDAG.cpp
 create mode 100644 final/lib/Target/PTX/PTXISelLowering.cpp
 create mode 100644 final/lib/Target/PTX/PTXISelLowering.h
 create mode 100644 final/lib/Target/PTX/PTXInstrFormats.td
 create mode 100644 final/lib/Target/PTX/PTXInstrInfo.cpp
 create mode 100644 final/lib/Target/PTX/PTXInstrInfo.h
 create mode 100644 final/lib/Target/PTX/PTXInstrInfo.td
 create mode 100644 final/lib/Target/PTX/PTXIntrinsicInstrInfo.td
 create mode 100644 final/lib/Target/PTX/PTXMCAsmInfo.cpp
 create mode 100644 final/lib/Target/PTX/PTXMCAsmInfo.h
 create mode 100644 final/lib/Target/PTX/PTXMCAsmStreamer.cpp
 create mode 100644 final/lib/Target/PTX/PTXMFInfoExtract.cpp
 create mode 100644 final/lib/Target/PTX/PTXMachineFunctionInfo.h
 create mode 100644 final/lib/Target/PTX/PTXRegisterInfo.cpp
 create mode 100644 final/lib/Target/PTX/PTXRegisterInfo.h
 create mode 100644 final/lib/Target/PTX/PTXRegisterInfo.td
 create mode 100644 final/lib/Target/PTX/PTXSubtarget.cpp
 create mode 100644 final/lib/Target/PTX/PTXSubtarget.h
 create mode 100644 final/lib/Target/PTX/PTXTargetMachine.cpp
 create mode 100644 final/lib/Target/PTX/PTXTargetMachine.h
 create mode 100644 final/lib/Target/PTX/TargetInfo/CMakeLists.txt
 create mode 100644 final/lib/Target/PTX/TargetInfo/Makefile
 create mode 100644 final/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
 create mode 100644 final/lib/Target/PowerPC/CMakeLists.txt
 create mode 100644 final/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
 create mode 100644 final/lib/Target/PowerPC/InstPrinter/Makefile
 create mode 100644 final/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
 create mode 100644 final/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
 create mode 100644 final/lib/Target/PowerPC/Makefile
 create mode 100644 final/lib/Target/PowerPC/PPC.h
 create mode 100644 final/lib/Target/PowerPC/PPC.td
 create mode 100644 final/lib/Target/PowerPC/PPCAsmBackend.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCAsmPrinter.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCBranchSelector.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCCallingConv.td
 create mode 100644 final/lib/Target/PowerPC/PPCCodeEmitter.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCFixupKinds.h
 create mode 100644 final/lib/Target/PowerPC/PPCFrameLowering.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCFrameLowering.h
 create mode 100644 final/lib/Target/PowerPC/PPCHazardRecognizers.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCHazardRecognizers.h
 create mode 100644 final/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCISelLowering.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCISelLowering.h
 create mode 100644 final/lib/Target/PowerPC/PPCInstr64Bit.td
 create mode 100644 final/lib/Target/PowerPC/PPCInstrAltivec.td
 create mode 100644 final/lib/Target/PowerPC/PPCInstrBuilder.h
 create mode 100644 final/lib/Target/PowerPC/PPCInstrFormats.td
 create mode 100644 final/lib/Target/PowerPC/PPCInstrInfo.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCInstrInfo.h
 create mode 100644 final/lib/Target/PowerPC/PPCInstrInfo.td
 create mode 100644 final/lib/Target/PowerPC/PPCJITInfo.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCJITInfo.h
 create mode 100644 final/lib/Target/PowerPC/PPCMCAsmInfo.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCMCAsmInfo.h
 create mode 100644 final/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCMCInstLower.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCMachineFunctionInfo.h
 create mode 100644 final/lib/Target/PowerPC/PPCPerfectShuffle.h
 create mode 100644 final/lib/Target/PowerPC/PPCPredicates.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCPredicates.h
 create mode 100644 final/lib/Target/PowerPC/PPCRegisterInfo.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCRegisterInfo.h
 create mode 100644 final/lib/Target/PowerPC/PPCRegisterInfo.td
 create mode 100644 final/lib/Target/PowerPC/PPCRelocations.h
 create mode 100644 final/lib/Target/PowerPC/PPCSchedule.td
 create mode 100644 final/lib/Target/PowerPC/PPCScheduleG3.td
 create mode 100644 final/lib/Target/PowerPC/PPCScheduleG4.td
 create mode 100644 final/lib/Target/PowerPC/PPCScheduleG4Plus.td
 create mode 100644 final/lib/Target/PowerPC/PPCScheduleG5.td
 create mode 100644 final/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCSelectionDAGInfo.h
 create mode 100644 final/lib/Target/PowerPC/PPCSubtarget.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCSubtarget.h
 create mode 100644 final/lib/Target/PowerPC/PPCTargetMachine.cpp
 create mode 100644 final/lib/Target/PowerPC/PPCTargetMachine.h
 create mode 100644 final/lib/Target/PowerPC/README.txt
 create mode 100644 final/lib/Target/PowerPC/README_ALTIVEC.txt
 create mode 100644 final/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
 create mode 100644 final/lib/Target/PowerPC/TargetInfo/Makefile
 create mode 100644 final/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
 create mode 100644 final/lib/Target/README.txt
 create mode 100644 final/lib/Target/Sparc/CMakeLists.txt
 create mode 100644 final/lib/Target/Sparc/DelaySlotFiller.cpp
 create mode 100644 final/lib/Target/Sparc/FPMover.cpp
 create mode 100644 final/lib/Target/Sparc/Makefile
 create mode 100644 final/lib/Target/Sparc/README.txt
 create mode 100644 final/lib/Target/Sparc/Sparc.h
 create mode 100644 final/lib/Target/Sparc/Sparc.td
 create mode 100644 final/lib/Target/Sparc/SparcAsmPrinter.cpp
 create mode 100644 final/lib/Target/Sparc/SparcCallingConv.td
 create mode 100644 final/lib/Target/Sparc/SparcFrameLowering.cpp
 create mode 100644 final/lib/Target/Sparc/SparcFrameLowering.h
 create mode 100644 final/lib/Target/Sparc/SparcISelDAGToDAG.cpp
 create mode 100644 final/lib/Target/Sparc/SparcISelLowering.cpp
 create mode 100644 final/lib/Target/Sparc/SparcISelLowering.h
 create mode 100644 final/lib/Target/Sparc/SparcInstrFormats.td
 create mode 100644 final/lib/Target/Sparc/SparcInstrInfo.cpp
 create mode 100644 final/lib/Target/Sparc/SparcInstrInfo.h
 create mode 100644 final/lib/Target/Sparc/SparcInstrInfo.td
 create mode 100644 final/lib/Target/Sparc/SparcMCAsmInfo.cpp
 create mode 100644 final/lib/Target/Sparc/SparcMCAsmInfo.h
 create mode 100644 final/lib/Target/Sparc/SparcMachineFunctionInfo.h
 create mode 100644 final/lib/Target/Sparc/SparcRegisterInfo.cpp
 create mode 100644 final/lib/Target/Sparc/SparcRegisterInfo.h
 create mode 100644 final/lib/Target/Sparc/SparcRegisterInfo.td
 create mode 100644 final/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
 create mode 100644 final/lib/Target/Sparc/SparcSelectionDAGInfo.h
 create mode 100644 final/lib/Target/Sparc/SparcSubtarget.cpp
 create mode 100644 final/lib/Target/Sparc/SparcSubtarget.h
 create mode 100644 final/lib/Target/Sparc/SparcTargetMachine.cpp
 create mode 100644 final/lib/Target/Sparc/SparcTargetMachine.h
 create mode 100644 final/lib/Target/Sparc/TargetInfo/CMakeLists.txt
 create mode 100644 final/lib/Target/Sparc/TargetInfo/Makefile
 create mode 100644 final/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
 create mode 100644 final/lib/Target/SubtargetFeature.cpp
 create mode 100644 final/lib/Target/SystemZ/CMakeLists.txt
 create mode 100644 final/lib/Target/SystemZ/Makefile
 create mode 100644 final/lib/Target/SystemZ/SystemZ.h
 create mode 100644 final/lib/Target/SystemZ/SystemZ.td
 create mode 100644 final/lib/Target/SystemZ/SystemZAsmPrinter.cpp
 create mode 100644 final/lib/Target/SystemZ/SystemZCallingConv.td
 create mode 100644 final/lib/Target/SystemZ/SystemZFrameLowering.cpp
 create mode 100644 final/lib/Target/SystemZ/SystemZFrameLowering.h
 create mode 100644 final/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
 create mode 100644 final/lib/Target/SystemZ/SystemZISelLowering.cpp
 create mode 100644 final/lib/Target/SystemZ/SystemZISelLowering.h
 create mode 100644 final/lib/Target/SystemZ/SystemZInstrBuilder.h
 create mode 100644 final/lib/Target/SystemZ/SystemZInstrFP.td
 create mode 100644 final/lib/Target/SystemZ/SystemZInstrFormats.td
 create mode 100644 final/lib/Target/SystemZ/SystemZInstrInfo.cpp
 create mode 100644 final/lib/Target/SystemZ/SystemZInstrInfo.h
 create mode 100644 final/lib/Target/SystemZ/SystemZInstrInfo.td
 create mode 100644 final/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
 create mode 100644 final/lib/Target/SystemZ/SystemZMCAsmInfo.h
 create mode 100644 final/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
 create mode 100644 final/lib/Target/SystemZ/SystemZOperands.td
 create mode 100644 final/lib/Target/SystemZ/SystemZRegisterInfo.cpp
 create mode 100644 final/lib/Target/SystemZ/SystemZRegisterInfo.h
 create mode 100644 final/lib/Target/SystemZ/SystemZRegisterInfo.td
 create mode 100644 final/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
 create mode 100644 final/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
 create mode 100644 final/lib/Target/SystemZ/SystemZSubtarget.cpp
 create mode 100644 final/lib/Target/SystemZ/SystemZSubtarget.h
 create mode 100644 final/lib/Target/SystemZ/SystemZTargetMachine.cpp
 create mode 100644 final/lib/Target/SystemZ/SystemZTargetMachine.h
 create mode 100644 final/lib/Target/SystemZ/TargetInfo/CMakeLists.txt
 create mode 100644 final/lib/Target/SystemZ/TargetInfo/Makefile
 create mode 100644 final/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
 create mode 100644 final/lib/Target/Target.cpp
 create mode 100644 final/lib/Target/TargetAsmInfo.cpp
 create mode 100644 final/lib/Target/TargetAsmLexer.cpp
 create mode 100644 final/lib/Target/TargetData.cpp
 create mode 100644 final/lib/Target/TargetELFWriterInfo.cpp
 create mode 100644 final/lib/Target/TargetFrameLowering.cpp
 create mode 100644 final/lib/Target/TargetInstrInfo.cpp
 create mode 100644 final/lib/Target/TargetIntrinsicInfo.cpp
 create mode 100644 final/lib/Target/TargetLibraryInfo.cpp
 create mode 100644 final/lib/Target/TargetLoweringObjectFile.cpp
 create mode 100644 final/lib/Target/TargetMachine.cpp
 create mode 100644 final/lib/Target/TargetRegisterInfo.cpp
 create mode 100644 final/lib/Target/TargetSubtarget.cpp
 create mode 100644 final/lib/Target/X86/AsmParser/CMakeLists.txt
 create mode 100644 final/lib/Target/X86/AsmParser/Makefile
 create mode 100644 final/lib/Target/X86/AsmParser/X86AsmLexer.cpp
 create mode 100644 final/lib/Target/X86/AsmParser/X86AsmParser.cpp
 create mode 100644 final/lib/Target/X86/CMakeLists.txt
 create mode 100644 final/lib/Target/X86/Disassembler/CMakeLists.txt
 create mode 100644 final/lib/Target/X86/Disassembler/Makefile
 create mode 100644 final/lib/Target/X86/Disassembler/X86Disassembler.cpp
 create mode 100644 final/lib/Target/X86/Disassembler/X86Disassembler.h
 create mode 100644 final/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
 create mode 100644 final/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
 create mode 100644 final/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
 create mode 100644 final/lib/Target/X86/InstPrinter/CMakeLists.txt
 create mode 100644 final/lib/Target/X86/InstPrinter/Makefile
 create mode 100644 final/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
 create mode 100644 final/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
 create mode 100644 final/lib/Target/X86/InstPrinter/X86InstComments.cpp
 create mode 100644 final/lib/Target/X86/InstPrinter/X86InstComments.h
 create mode 100644 final/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
 create mode 100644 final/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
 create mode 100644 final/lib/Target/X86/Makefile
 create mode 100644 final/lib/Target/X86/README-FPStack.txt
 create mode 100644 final/lib/Target/X86/README-MMX.txt
 create mode 100644 final/lib/Target/X86/README-SSE.txt
 create mode 100644 final/lib/Target/X86/README-UNIMPLEMENTED.txt
 create mode 100644 final/lib/Target/X86/README-X86-64.txt
 create mode 100644 final/lib/Target/X86/README.txt
 create mode 100644 final/lib/Target/X86/SSEDomainFix.cpp
 create mode 100644 final/lib/Target/X86/TargetInfo/CMakeLists.txt
 create mode 100644 final/lib/Target/X86/TargetInfo/Makefile
 create mode 100644 final/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
 create mode 100644 final/lib/Target/X86/Utils/CMakeLists.txt
 create mode 100644 final/lib/Target/X86/Utils/Makefile
 create mode 100644 final/lib/Target/X86/Utils/X86ShuffleDecode.cpp
 create mode 100644 final/lib/Target/X86/Utils/X86ShuffleDecode.h
 create mode 100644 final/lib/Target/X86/X86.h
 create mode 100644 final/lib/Target/X86/X86.td
 create mode 100644 final/lib/Target/X86/X86AsmBackend.cpp
 create mode 100644 final/lib/Target/X86/X86AsmPrinter.cpp
 create mode 100644 final/lib/Target/X86/X86AsmPrinter.h
 create mode 100644 final/lib/Target/X86/X86COFFMachineModuleInfo.cpp
 create mode 100644 final/lib/Target/X86/X86COFFMachineModuleInfo.h
 create mode 100644 final/lib/Target/X86/X86CallingConv.td
 create mode 100644 final/lib/Target/X86/X86CodeEmitter.cpp
 create mode 100644 final/lib/Target/X86/X86CompilationCallback_Win64.asm
 create mode 100644 final/lib/Target/X86/X86ELFWriterInfo.cpp
 create mode 100644 final/lib/Target/X86/X86ELFWriterInfo.h
 create mode 100644 final/lib/Target/X86/X86FastISel.cpp
 create mode 100644 final/lib/Target/X86/X86FixupKinds.h
 create mode 100644 final/lib/Target/X86/X86FloatingPoint.cpp
 create mode 100644 final/lib/Target/X86/X86FrameLowering.cpp
 create mode 100644 final/lib/Target/X86/X86FrameLowering.h
 create mode 100644 final/lib/Target/X86/X86ISelDAGToDAG.cpp
 create mode 100644 final/lib/Target/X86/X86ISelLowering.cpp
 create mode 100644 final/lib/Target/X86/X86ISelLowering.h
 create mode 100644 final/lib/Target/X86/X86Instr3DNow.td
 create mode 100644 final/lib/Target/X86/X86InstrArithmetic.td
 create mode 100644 final/lib/Target/X86/X86InstrBuilder.h
 create mode 100644 final/lib/Target/X86/X86InstrCMovSetCC.td
 create mode 100644 final/lib/Target/X86/X86InstrCompiler.td
 create mode 100644 final/lib/Target/X86/X86InstrControl.td
 create mode 100644 final/lib/Target/X86/X86InstrExtension.td
 create mode 100644 final/lib/Target/X86/X86InstrFMA.td
 create mode 100644 final/lib/Target/X86/X86InstrFPStack.td
 create mode 100644 final/lib/Target/X86/X86InstrFormats.td
 create mode 100644 final/lib/Target/X86/X86InstrFragmentsSIMD.td
 create mode 100644 final/lib/Target/X86/X86InstrInfo.cpp
 create mode 100644 final/lib/Target/X86/X86InstrInfo.h
 create mode 100644 final/lib/Target/X86/X86InstrInfo.td
 create mode 100644 final/lib/Target/X86/X86InstrMMX.td
 create mode 100644 final/lib/Target/X86/X86InstrSSE.td
 create mode 100644 final/lib/Target/X86/X86InstrShiftRotate.td
 create mode 100644 final/lib/Target/X86/X86InstrSystem.td
 create mode 100644 final/lib/Target/X86/X86InstrVMX.td
 create mode 100644 final/lib/Target/X86/X86JITInfo.cpp
 create mode 100644 final/lib/Target/X86/X86JITInfo.h
 create mode 100644 final/lib/Target/X86/X86MCAsmInfo.cpp
 create mode 100644 final/lib/Target/X86/X86MCAsmInfo.h
 create mode 100644 final/lib/Target/X86/X86MCCodeEmitter.cpp
 create mode 100644 final/lib/Target/X86/X86MCInstLower.cpp
 create mode 100644 final/lib/Target/X86/X86MCInstLower.h
 create mode 100644 final/lib/Target/X86/X86MachObjectWriter.cpp
 create mode 100644 final/lib/Target/X86/X86MachineFunctionInfo.h
 create mode 100644 final/lib/Target/X86/X86RegisterInfo.cpp
 create mode 100644 final/lib/Target/X86/X86RegisterInfo.h
 create mode 100644 final/lib/Target/X86/X86RegisterInfo.td
 create mode 100644 final/lib/Target/X86/X86Relocations.h
 create mode 100644 final/lib/Target/X86/X86SelectionDAGInfo.cpp
 create mode 100644 final/lib/Target/X86/X86SelectionDAGInfo.h
 create mode 100644 final/lib/Target/X86/X86Subtarget.cpp
 create mode 100644 final/lib/Target/X86/X86Subtarget.h
 create mode 100644 final/lib/Target/X86/X86TargetMachine.cpp
 create mode 100644 final/lib/Target/X86/X86TargetMachine.h
 create mode 100644 final/lib/Target/X86/X86TargetObjectFile.cpp
 create mode 100644 final/lib/Target/X86/X86TargetObjectFile.h
 create mode 100644 final/lib/Target/XCore/CMakeLists.txt
 create mode 100644 final/lib/Target/XCore/Makefile
 create mode 100644 final/lib/Target/XCore/README.txt
 create mode 100644 final/lib/Target/XCore/TargetInfo/CMakeLists.txt
 create mode 100644 final/lib/Target/XCore/TargetInfo/Makefile
 create mode 100644 final/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
 create mode 100644 final/lib/Target/XCore/XCore.h
 create mode 100644 final/lib/Target/XCore/XCore.td
 create mode 100644 final/lib/Target/XCore/XCoreAsmPrinter.cpp
 create mode 100644 final/lib/Target/XCore/XCoreCallingConv.td
 create mode 100644 final/lib/Target/XCore/XCoreFrameLowering.cpp
 create mode 100644 final/lib/Target/XCore/XCoreFrameLowering.h
 create mode 100644 final/lib/Target/XCore/XCoreISelDAGToDAG.cpp
 create mode 100644 final/lib/Target/XCore/XCoreISelLowering.cpp
 create mode 100644 final/lib/Target/XCore/XCoreISelLowering.h
 create mode 100644 final/lib/Target/XCore/XCoreInstrFormats.td
 create mode 100644 final/lib/Target/XCore/XCoreInstrInfo.cpp
 create mode 100644 final/lib/Target/XCore/XCoreInstrInfo.h
 create mode 100644 final/lib/Target/XCore/XCoreInstrInfo.td
 create mode 100644 final/lib/Target/XCore/XCoreMCAsmInfo.cpp
 create mode 100644 final/lib/Target/XCore/XCoreMCAsmInfo.h
 create mode 100644 final/lib/Target/XCore/XCoreMachineFunctionInfo.h
 create mode 100644 final/lib/Target/XCore/XCoreRegisterInfo.cpp
 create mode 100644 final/lib/Target/XCore/XCoreRegisterInfo.h
 create mode 100644 final/lib/Target/XCore/XCoreRegisterInfo.td
 create mode 100644 final/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
 create mode 100644 final/lib/Target/XCore/XCoreSelectionDAGInfo.h
 create mode 100644 final/lib/Target/XCore/XCoreSubtarget.cpp
 create mode 100644 final/lib/Target/XCore/XCoreSubtarget.h
 create mode 100644 final/lib/Target/XCore/XCoreTargetMachine.cpp
 create mode 100644 final/lib/Target/XCore/XCoreTargetMachine.h
 create mode 100644 final/lib/Target/XCore/XCoreTargetObjectFile.cpp
 create mode 100644 final/lib/Target/XCore/XCoreTargetObjectFile.h
 create mode 100644 final/lib/Transforms/CMakeLists.txt
 create mode 100644 final/lib/Transforms/Hello/CMakeLists.txt
 create mode 100644 final/lib/Transforms/Hello/Hello.cpp
 create mode 100644 final/lib/Transforms/Hello/Hello.exports
 create mode 100644 final/lib/Transforms/Hello/Makefile
 create mode 100644 final/lib/Transforms/IPO/ArgumentPromotion.cpp
 create mode 100644 final/lib/Transforms/IPO/CMakeLists.txt
 create mode 100644 final/lib/Transforms/IPO/ConstantMerge.cpp
 create mode 100644 final/lib/Transforms/IPO/DeadArgumentElimination.cpp
 create mode 100644 final/lib/Transforms/IPO/DeadTypeElimination.cpp
 create mode 100644 final/lib/Transforms/IPO/ExtractGV.cpp
 create mode 100644 final/lib/Transforms/IPO/FunctionAttrs.cpp
 create mode 100644 final/lib/Transforms/IPO/GlobalDCE.cpp
 create mode 100644 final/lib/Transforms/IPO/GlobalOpt.cpp
 create mode 100644 final/lib/Transforms/IPO/IPConstantPropagation.cpp
 create mode 100644 final/lib/Transforms/IPO/IPO.cpp
 create mode 100644 final/lib/Transforms/IPO/InlineAlways.cpp
 create mode 100644 final/lib/Transforms/IPO/InlineSimple.cpp
 create mode 100644 final/lib/Transforms/IPO/Inliner.cpp
 create mode 100644 final/lib/Transforms/IPO/Internalize.cpp
 create mode 100644 final/lib/Transforms/IPO/LoopExtractor.cpp
 create mode 100644 final/lib/Transforms/IPO/LowerSetJmp.cpp
 create mode 100644 final/lib/Transforms/IPO/Makefile
 create mode 100644 final/lib/Transforms/IPO/MergeFunctions.cpp
 create mode 100644 final/lib/Transforms/IPO/PartialInlining.cpp
 create mode 100644 final/lib/Transforms/IPO/PruneEH.cpp
 create mode 100644 final/lib/Transforms/IPO/StripDeadPrototypes.cpp
 create mode 100644 final/lib/Transforms/IPO/StripSymbols.cpp
 create mode 100644 final/lib/Transforms/IPO/StructRetPromotion.cpp
 create mode 100644 final/lib/Transforms/InstCombine/CMakeLists.txt
 create mode 100644 final/lib/Transforms/InstCombine/InstCombine.h
 create mode 100644 final/lib/Transforms/InstCombine/InstCombineAddSub.cpp
 create mode 100644 final/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
 create mode 100644 final/lib/Transforms/InstCombine/InstCombineCalls.cpp
 create mode 100644 final/lib/Transforms/InstCombine/InstCombineCasts.cpp
 create mode 100644 final/lib/Transforms/InstCombine/InstCombineCompares.cpp
 create mode 100644 final/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
 create mode 100644 final/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
 create mode 100644 final/lib/Transforms/InstCombine/InstCombinePHI.cpp
 create mode 100644 final/lib/Transforms/InstCombine/InstCombineSelect.cpp
 create mode 100644 final/lib/Transforms/InstCombine/InstCombineShifts.cpp
 create mode 100644 final/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
 create mode 100644 final/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
 create mode 100644 final/lib/Transforms/InstCombine/InstCombineWorklist.h
 create mode 100644 final/lib/Transforms/InstCombine/InstructionCombining.cpp
 create mode 100644 final/lib/Transforms/InstCombine/Makefile
 create mode 100644 final/lib/Transforms/Instrumentation/CMakeLists.txt
 create mode 100644 final/lib/Transforms/Instrumentation/EdgeProfiling.cpp
 create mode 100644 final/lib/Transforms/Instrumentation/Instrumentation.cpp
 create mode 100644 final/lib/Transforms/Instrumentation/Makefile
 create mode 100644 final/lib/Transforms/Instrumentation/MaximumSpanningTree.h
 create mode 100644 final/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
 create mode 100644 final/lib/Transforms/Instrumentation/PathProfiling.cpp
 create mode 100644 final/lib/Transforms/Instrumentation/ProfilingUtils.cpp
 create mode 100644 final/lib/Transforms/Instrumentation/ProfilingUtils.h
 create mode 100644 final/lib/Transforms/Makefile
 create mode 100644 final/lib/Transforms/Scalar/ADCE.cpp
 create mode 100644 final/lib/Transforms/Scalar/BasicBlockPlacement.cpp
 create mode 100644 final/lib/Transforms/Scalar/CMakeLists.txt
 create mode 100644 final/lib/Transforms/Scalar/CodeGenPrepare.cpp
 create mode 100644 final/lib/Transforms/Scalar/ConstantProp.cpp
 create mode 100644 final/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
 create mode 100644 final/lib/Transforms/Scalar/DCE.cpp
 create mode 100644 final/lib/Transforms/Scalar/DeadStoreElimination.cpp
 create mode 100644 final/lib/Transforms/Scalar/EarlyCSE.cpp
 create mode 100644 final/lib/Transforms/Scalar/GVN.cpp
 create mode 100644 final/lib/Transforms/Scalar/IndVarSimplify.cpp
 create mode 100644 final/lib/Transforms/Scalar/JumpThreading.cpp
 create mode 100644 final/lib/Transforms/Scalar/LICM.cpp
 create mode 100644 final/lib/Transforms/Scalar/LoopDeletion.cpp
 create mode 100644 final/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
 create mode 100644 final/lib/Transforms/Scalar/LoopInstSimplify.cpp
 create mode 100644 final/lib/Transforms/Scalar/LoopRotation.cpp
 create mode 100644 final/lib/Transforms/Scalar/LoopStrengthReduce.cpp
 create mode 100644 final/lib/Transforms/Scalar/LoopUnrollPass.cpp
 create mode 100644 final/lib/Transforms/Scalar/LoopUnswitch.cpp
 create mode 100644 final/lib/Transforms/Scalar/LowerAtomic.cpp
 create mode 100644 final/lib/Transforms/Scalar/Makefile
 create mode 100644 final/lib/Transforms/Scalar/MemCpyOptimizer.cpp
 create mode 100644 final/lib/Transforms/Scalar/Reassociate.cpp
 create mode 100644 final/lib/Transforms/Scalar/Reg2Mem.cpp
 create mode 100644 final/lib/Transforms/Scalar/SCCP.cpp
 create mode 100644 final/lib/Transforms/Scalar/Scalar.cpp
 create mode 100644 final/lib/Transforms/Scalar/ScalarReplAggregates.cpp
 create mode 100644 final/lib/Transforms/Scalar/SimplifyCFGPass.cpp
 create mode 100644 final/lib/Transforms/Scalar/SimplifyLibCalls.cpp
 create mode 100644 final/lib/Transforms/Scalar/Sink.cpp
 create mode 100644 final/lib/Transforms/Scalar/TailDuplication.cpp
 create mode 100644 final/lib/Transforms/Scalar/TailRecursionElimination.cpp
 create mode 100644 final/lib/Transforms/Utils/AddrModeMatcher.cpp
 create mode 100644 final/lib/Transforms/Utils/BasicBlockUtils.cpp
 create mode 100644 final/lib/Transforms/Utils/BasicInliner.cpp
 create mode 100644 final/lib/Transforms/Utils/BreakCriticalEdges.cpp
 create mode 100644 final/lib/Transforms/Utils/BuildLibCalls.cpp
 create mode 100644 final/lib/Transforms/Utils/CMakeLists.txt
 create mode 100644 final/lib/Transforms/Utils/CloneFunction.cpp
 create mode 100644 final/lib/Transforms/Utils/CloneLoop.cpp
 create mode 100644 final/lib/Transforms/Utils/CloneModule.cpp
 create mode 100644 final/lib/Transforms/Utils/CodeExtractor.cpp
 create mode 100644 final/lib/Transforms/Utils/DemoteRegToStack.cpp
 create mode 100644 final/lib/Transforms/Utils/InlineFunction.cpp
 create mode 100644 final/lib/Transforms/Utils/InstructionNamer.cpp
 create mode 100644 final/lib/Transforms/Utils/LCSSA.cpp
 create mode 100644 final/lib/Transforms/Utils/Local.cpp
 create mode 100644 final/lib/Transforms/Utils/LoopSimplify.cpp
 create mode 100644 final/lib/Transforms/Utils/LoopUnroll.cpp
 create mode 100644 final/lib/Transforms/Utils/LowerInvoke.cpp
 create mode 100644 final/lib/Transforms/Utils/LowerSwitch.cpp
 create mode 100644 final/lib/Transforms/Utils/Makefile
 create mode 100644 final/lib/Transforms/Utils/Mem2Reg.cpp
 create mode 100644 final/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
 create mode 100644 final/lib/Transforms/Utils/SSAUpdater.cpp
 create mode 100644 final/lib/Transforms/Utils/SimplifyCFG.cpp
 create mode 100644 final/lib/Transforms/Utils/SimplifyInstructions.cpp
 create mode 100644 final/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
 create mode 100644 final/lib/Transforms/Utils/Utils.cpp
 create mode 100644 final/lib/Transforms/Utils/ValueMapper.cpp
 create mode 100644 final/lib/VMCore/AsmWriter.cpp
 create mode 100644 final/lib/VMCore/Attributes.cpp
 create mode 100644 final/lib/VMCore/AutoUpgrade.cpp
 create mode 100644 final/lib/VMCore/BasicBlock.cpp
 create mode 100644 final/lib/VMCore/CMakeLists.txt
 create mode 100644 final/lib/VMCore/ConstantFold.cpp
 create mode 100644 final/lib/VMCore/ConstantFold.h
 create mode 100644 final/lib/VMCore/Constants.cpp
 create mode 100644 final/lib/VMCore/ConstantsContext.h
 create mode 100644 final/lib/VMCore/Core.cpp
 create mode 100644 final/lib/VMCore/DebugLoc.cpp
 create mode 100644 final/lib/VMCore/Dominators.cpp
 create mode 100644 final/lib/VMCore/Function.cpp
 create mode 100644 final/lib/VMCore/GVMaterializer.cpp
 create mode 100644 final/lib/VMCore/Globals.cpp
 create mode 100644 final/lib/VMCore/IRBuilder.cpp
 create mode 100644 final/lib/VMCore/InlineAsm.cpp
 create mode 100644 final/lib/VMCore/Instruction.cpp
 create mode 100644 final/lib/VMCore/Instructions.cpp
 create mode 100644 final/lib/VMCore/IntrinsicInst.cpp
 create mode 100644 final/lib/VMCore/LLVMContext.cpp
 create mode 100644 final/lib/VMCore/LLVMContextImpl.cpp
 create mode 100644 final/lib/VMCore/LLVMContextImpl.h
 create mode 100644 final/lib/VMCore/LeakDetector.cpp
 create mode 100644 final/lib/VMCore/LeaksContext.h
 create mode 100644 final/lib/VMCore/Makefile
 create mode 100644 final/lib/VMCore/Metadata.cpp
 create mode 100644 final/lib/VMCore/Module.cpp
 create mode 100644 final/lib/VMCore/Pass.cpp
 create mode 100644 final/lib/VMCore/PassManager.cpp
 create mode 100644 final/lib/VMCore/PassRegistry.cpp
 create mode 100644 final/lib/VMCore/PrintModulePass.cpp
 create mode 100644 final/lib/VMCore/SymbolTableListTraitsImpl.h
 create mode 100644 final/lib/VMCore/Type.cpp
 create mode 100644 final/lib/VMCore/TypeSymbolTable.cpp
 create mode 100644 final/lib/VMCore/TypesContext.h
 create mode 100644 final/lib/VMCore/Use.cpp
 create mode 100644 final/lib/VMCore/User.cpp
 create mode 100644 final/lib/VMCore/Value.cpp
 create mode 100644 final/lib/VMCore/ValueSymbolTable.cpp
 create mode 100644 final/lib/VMCore/ValueTypes.cpp
 create mode 100644 final/lib/VMCore/Verifier.cpp
 create mode 100644 final/llvm.spec.in
 create mode 100644 final/projects/CMakeLists.txt
 create mode 100644 final/projects/Makefile
 create mode 100644 final/projects/sample/Makefile
 create mode 100644 final/projects/sample/Makefile.common.in
 create mode 100755 final/projects/sample/autoconf/AutoRegen.sh
 create mode 100644 final/projects/sample/autoconf/LICENSE.TXT
 create mode 100755 final/projects/sample/autoconf/config.guess
 create mode 100755 final/projects/sample/autoconf/config.sub
 create mode 100644 final/projects/sample/autoconf/configure.ac
 create mode 100755 final/projects/sample/configure
 create mode 100644 final/projects/sample/docs/index.html
 create mode 100644 final/projects/sample/include/sample.h
 create mode 100644 final/projects/sample/lib/Makefile
 create mode 100644 final/projects/sample/lib/sample/Makefile
 create mode 100644 final/projects/sample/lib/sample/sample.c
 create mode 100644 final/projects/sample/tools/Makefile
 create mode 100644 final/projects/sample/tools/sample/Makefile
 create mode 100644 final/projects/sample/tools/sample/main.c
 create mode 100644 final/runtime/Makefile
 create mode 100644 final/runtime/README.txt
 create mode 100644 final/runtime/libprofile/BasicBlockTracing.c
 create mode 100644 final/runtime/libprofile/CommonProfiling.c
 create mode 100644 final/runtime/libprofile/EdgeProfiling.c
 create mode 100644 final/runtime/libprofile/Makefile
 create mode 100644 final/runtime/libprofile/OptimalEdgeProfiling.c
 create mode 100644 final/runtime/libprofile/PathProfiling.c
 create mode 100644 final/runtime/libprofile/Profiling.h
 create mode 100644 final/runtime/libprofile/libprofile.exports
 create mode 100644 final/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll
 create mode 100644 final/test/Analysis/BasicAA/2003-03-04-GEPCrash.ll
 create mode 100644 final/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll
 create mode 100644 final/test/Analysis/BasicAA/2003-04-25-GEPCrash.ll
 create mode 100644 final/test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll
 create mode 100644 final/test/Analysis/BasicAA/2003-06-01-AliasCrash.ll
 create mode 100644 final/test/Analysis/BasicAA/2003-07-03-BasicAACrash.ll
 create mode 100644 final/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll
 create mode 100644 final/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll
 create mode 100644 final/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll
 create mode 100644 final/test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll
 create mode 100644 final/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll
 create mode 100644 final/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll
 create mode 100644 final/test/Analysis/BasicAA/2005-03-09-BrokenBasicAA.ll
 create mode 100644 final/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
 create mode 100644 final/test/Analysis/BasicAA/2006-11-03-BasicAAVectorCrash.ll
 create mode 100644 final/test/Analysis/BasicAA/2007-01-13-BasePointerBadNoAlias.ll
 create mode 100644 final/test/Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll
 create mode 100644 final/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll
 create mode 100644 final/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
 create mode 100644 final/test/Analysis/BasicAA/2007-10-24-ArgumentsGlobals.ll
 create mode 100644 final/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
 create mode 100644 final/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
 create mode 100644 final/test/Analysis/BasicAA/2008-04-15-Byval.ll
 create mode 100644 final/test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll
 create mode 100644 final/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll
 create mode 100644 final/test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll
 create mode 100644 final/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
 create mode 100644 final/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll
 create mode 100644 final/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
 create mode 100644 final/test/Analysis/BasicAA/args-rets-allocas-loads.ll
 create mode 100644 final/test/Analysis/BasicAA/byval.ll
 create mode 100644 final/test/Analysis/BasicAA/cas.ll
 create mode 100644 final/test/Analysis/BasicAA/constant-over-index.ll
 create mode 100644 final/test/Analysis/BasicAA/dg.exp
 create mode 100644 final/test/Analysis/BasicAA/empty.ll
 create mode 100644 final/test/Analysis/BasicAA/featuretest.ll
 create mode 100644 final/test/Analysis/BasicAA/full-store-partial-alias.ll
 create mode 100644 final/test/Analysis/BasicAA/gcsetest.ll
 create mode 100644 final/test/Analysis/BasicAA/gep-alias.ll
 create mode 100644 final/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
 create mode 100644 final/test/Analysis/BasicAA/global-size.ll
 create mode 100644 final/test/Analysis/BasicAA/modref.ll
 create mode 100644 final/test/Analysis/BasicAA/no-escape-call.ll
 create mode 100644 final/test/Analysis/BasicAA/nocapture.ll
 create mode 100644 final/test/Analysis/BasicAA/phi-aa.ll
 create mode 100644 final/test/Analysis/BasicAA/phi-and-select.ll
 create mode 100644 final/test/Analysis/BasicAA/pure-const-dce.ll
 create mode 100644 final/test/Analysis/BasicAA/store-promote.ll
 create mode 100644 final/test/Analysis/BasicAA/tailcall-modref.ll
 create mode 100644 final/test/Analysis/BasicAA/unreachable-block.ll
 create mode 100644 final/test/Analysis/CallGraph/2008-09-09-DirectCall.ll
 create mode 100644 final/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll
 create mode 100644 final/test/Analysis/CallGraph/dg.exp
 create mode 100644 final/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll
 create mode 100644 final/test/Analysis/Dominators/2007-01-14-BreakCritEdges.ll
 create mode 100644 final/test/Analysis/Dominators/2007-07-11-SplitBlock.ll
 create mode 100644 final/test/Analysis/Dominators/2007-07-12-SplitBlock.ll
 create mode 100644 final/test/Analysis/Dominators/dg.exp
 create mode 100644 final/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
 create mode 100644 final/test/Analysis/GlobalsModRef/aliastest.ll
 create mode 100644 final/test/Analysis/GlobalsModRef/chaining-analysis.ll
 create mode 100644 final/test/Analysis/GlobalsModRef/dg.exp
 create mode 100644 final/test/Analysis/GlobalsModRef/indirect-global.ll
 create mode 100644 final/test/Analysis/GlobalsModRef/modreftest.ll
 create mode 100644 final/test/Analysis/GlobalsModRef/purecse.ll
 create mode 100644 final/test/Analysis/LoopDependenceAnalysis/alias.ll
 create mode 100644 final/test/Analysis/LoopDependenceAnalysis/dg.exp
 create mode 100644 final/test/Analysis/LoopDependenceAnalysis/siv-strong.ll
 create mode 100644 final/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll
 create mode 100644 final/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll
 create mode 100644 final/test/Analysis/LoopDependenceAnalysis/ziv.ll
 create mode 100644 final/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
 create mode 100644 final/test/Analysis/LoopInfo/dg.exp
 create mode 100644 final/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll
 create mode 100644 final/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll
 create mode 100644 final/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll
 create mode 100644 final/test/Analysis/PostDominators/dg.exp
 create mode 100644 final/test/Analysis/PostDominators/pr1098.ll
 create mode 100644 final/test/Analysis/PostDominators/pr6047_a.ll
 create mode 100644 final/test/Analysis/PostDominators/pr6047_b.ll
 create mode 100644 final/test/Analysis/PostDominators/pr6047_c.ll
 create mode 100644 final/test/Analysis/PostDominators/pr6047_d.ll
 create mode 100644 final/test/Analysis/Profiling/dg.exp
 create mode 100644 final/test/Analysis/Profiling/edge-profiling.ll
 create mode 100644 final/test/Analysis/Profiling/profiling-tool-chain.ll
 create mode 100644 final/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll
 create mode 100644 final/test/Analysis/RegionInfo/block_sort.ll
 create mode 100644 final/test/Analysis/RegionInfo/cond_loop.ll
 create mode 100644 final/test/Analysis/RegionInfo/condition_complicated.ll
 create mode 100644 final/test/Analysis/RegionInfo/condition_complicated_2.ll
 create mode 100644 final/test/Analysis/RegionInfo/condition_forward_edge.ll
 create mode 100644 final/test/Analysis/RegionInfo/condition_same_exit.ll
 create mode 100644 final/test/Analysis/RegionInfo/condition_simple.ll
 create mode 100644 final/test/Analysis/RegionInfo/dg.exp
 create mode 100644 final/test/Analysis/RegionInfo/exit_in_condition.ll
 create mode 100644 final/test/Analysis/RegionInfo/infinite_loop.ll
 create mode 100644 final/test/Analysis/RegionInfo/infinite_loop_2.ll
 create mode 100644 final/test/Analysis/RegionInfo/infinite_loop_3.ll
 create mode 100644 final/test/Analysis/RegionInfo/infinite_loop_4.ll
 create mode 100644 final/test/Analysis/RegionInfo/loop_with_condition.ll
 create mode 100644 final/test/Analysis/RegionInfo/loops_1.ll
 create mode 100644 final/test/Analysis/RegionInfo/loops_2.ll
 create mode 100644 final/test/Analysis/RegionInfo/mix_1.ll
 create mode 100644 final/test/Analysis/RegionInfo/multiple_exiting_edge.ll
 create mode 100644 final/test/Analysis/RegionInfo/nested_loops.ll
 create mode 100644 final/test/Analysis/RegionInfo/next.ll
 create mode 100644 final/test/Analysis/RegionInfo/paper.ll
 create mode 100644 final/test/Analysis/RegionInfo/two_loops_same_header.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/and-xor.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/avoid-smax-0.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/avoid-smax-1.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/dg.exp
 create mode 100644 final/test/Analysis/ScalarEvolution/div-overflow.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/do-loop.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/fold.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/max-trip-count.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/nsw-offset.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/nsw.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/pointer-sign-bits.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/pr3909.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/scev-aa.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/sext-inreg.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/sext-iv-0.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/sext-iv-1.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/sext-iv-2.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/sle.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/smax.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/trip-count.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/trip-count10.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/trip-count2.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/trip-count3.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/trip-count4.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/trip-count5.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/trip-count6.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/trip-count7.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/trip-count8.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/trip-count9.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/undefined.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/unreachable-code.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/unsimplified-loop.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/xor-and.ll
 create mode 100644 final/test/Analysis/ScalarEvolution/zext-wrap.ll
 create mode 100644 final/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
 create mode 100644 final/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
 create mode 100644 final/test/Analysis/TypeBasedAliasAnalysis/dg.exp
 create mode 100644 final/test/Analysis/TypeBasedAliasAnalysis/dse.ll
 create mode 100644 final/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
 create mode 100644 final/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
 create mode 100644 final/test/Analysis/TypeBasedAliasAnalysis/licm.ll
 create mode 100644 final/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
 create mode 100644 final/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
 create mode 100644 final/test/Analysis/TypeBasedAliasAnalysis/sink.ll
 create mode 100644 final/test/Archive/GNU.a
 create mode 100644 final/test/Archive/IsNAN.o
 create mode 100644 final/test/Archive/MacOSX.a
 create mode 100644 final/test/Archive/README.txt
 create mode 100644 final/test/Archive/SVR4.a
 create mode 100644 final/test/Archive/dg.exp
 create mode 100644 final/test/Archive/evenlen
 create mode 100644 final/test/Archive/extract.ll
 create mode 100644 final/test/Archive/oddlen
 create mode 100644 final/test/Archive/toc_GNU.ll
 create mode 100644 final/test/Archive/toc_MacOSX.ll
 create mode 100644 final/test/Archive/toc_SVR4.ll
 create mode 100644 final/test/Archive/toc_xpg4.ll
 create mode 100644 final/test/Archive/very_long_bytecode_file_name.bc
 create mode 100644 final/test/Archive/xpg4.a
 create mode 100644 final/test/Assembler/2002-01-24-BadSymbolTableAssert.ll
 create mode 100644 final/test/Assembler/2002-01-24-ValueRefineAbsType.ll
 create mode 100644 final/test/Assembler/2002-02-19-TypeParsing.ll
 create mode 100644 final/test/Assembler/2002-03-08-NameCollision.ll
 create mode 100644 final/test/Assembler/2002-03-08-NameCollision2.ll
 create mode 100644 final/test/Assembler/2002-04-04-PureVirtMethCall.ll
 create mode 100644 final/test/Assembler/2002-04-04-PureVirtMethCall2.ll
 create mode 100644 final/test/Assembler/2002-04-05-TypeParsing.ll
 create mode 100644 final/test/Assembler/2002-04-07-HexFloatConstants.ll
 create mode 100644 final/test/Assembler/2002-04-07-InfConstant.ll
 create mode 100644 final/test/Assembler/2002-04-29-NameBinding.ll
 create mode 100644 final/test/Assembler/2002-05-02-InvalidForwardRef.ll
 create mode 100644 final/test/Assembler/2002-05-02-ParseError.ll
 create mode 100644 final/test/Assembler/2002-07-08-HugePerformanceProblem.ll
 create mode 100644 final/test/Assembler/2002-07-14-InternalLossage.ll
 create mode 100644 final/test/Assembler/2002-07-14-OpaqueType.ll
 create mode 100644 final/test/Assembler/2002-07-25-ParserAssertionFailure.ll
 create mode 100644 final/test/Assembler/2002-07-25-QuoteInString.ll
 create mode 100644 final/test/Assembler/2002-07-25-ReturnPtrFunction.ll
 create mode 100644 final/test/Assembler/2002-07-31-SlashInString.ll
 create mode 100644 final/test/Assembler/2002-08-15-CastAmbiguity.ll
 create mode 100644 final/test/Assembler/2002-08-15-ConstantExprProblem.ll
 create mode 100644 final/test/Assembler/2002-08-15-UnresolvedGlobalReference.ll
 create mode 100644 final/test/Assembler/2002-08-16-ConstExprInlined.ll
 create mode 100644 final/test/Assembler/2002-08-19-BytecodeReader.ll
 create mode 100644 final/test/Assembler/2002-08-22-DominanceProblem.ll
 create mode 100644 final/test/Assembler/2002-10-08-LargeArrayPerformance.ll
 create mode 100644 final/test/Assembler/2002-10-13-ConstantEncodingProblem.ll
 create mode 100644 final/test/Assembler/2002-10-15-NameClash.ll
 create mode 100644 final/test/Assembler/2002-12-15-GlobalResolve.ll
 create mode 100644 final/test/Assembler/2003-01-30-UnsignedString.ll
 create mode 100644 final/test/Assembler/2003-04-15-ConstantInitAssertion.ll
 create mode 100644 final/test/Assembler/2003-04-25-UnresolvedGlobalReference.ll
 create mode 100644 final/test/Assembler/2003-05-03-BytecodeReaderProblem.ll
 create mode 100644 final/test/Assembler/2003-05-12-MinIntProblem.ll
 create mode 100644 final/test/Assembler/2003-05-15-AssemblerProblem.ll
 create mode 100644 final/test/Assembler/2003-05-15-SwitchBug.ll
 create mode 100644 final/test/Assembler/2003-05-21-ConstantShiftExpr.ll
 create mode 100644 final/test/Assembler/2003-05-21-EmptyStructTest.ll
 create mode 100644 final/test/Assembler/2003-05-21-MalformedShiftCrash.ll
 create mode 100644 final/test/Assembler/2003-05-21-MalformedStructCrash.ll
 create mode 100644 final/test/Assembler/2003-06-17-InvokeDisassemble.ll
 create mode 100644 final/test/Assembler/2003-06-30-RecursiveTypeProblem.ll
 create mode 100644 final/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll
 create mode 100644 final/test/Assembler/2003-08-21-ConstantExprCast-Fold.ll
 create mode 100644 final/test/Assembler/2003-10-04-NotMergingGlobalConstants.ll
 create mode 100644 final/test/Assembler/2003-11-05-ConstantExprShift.ll
 create mode 100644 final/test/Assembler/2003-11-11-ImplicitRename.ll
 create mode 100644 final/test/Assembler/2003-11-12-ConstantExprCast.ll
 create mode 100644 final/test/Assembler/2003-11-24-SymbolTableCrash.ll
 create mode 100644 final/test/Assembler/2003-12-30-TypeMapInvalidMemory.ll
 create mode 100644 final/test/Assembler/2004-01-11-getelementptrfolding.ll
 create mode 100644 final/test/Assembler/2004-01-20-MaxLongLong.ll
 create mode 100644 final/test/Assembler/2004-02-01-NegativeZero.ll
 create mode 100644 final/test/Assembler/2004-02-27-SelfUseAssertError.ll
 create mode 100644 final/test/Assembler/2004-03-07-FunctionAddressAlignment.ll
 create mode 100644 final/test/Assembler/2004-03-30-UnclosedFunctionCrash.ll
 create mode 100644 final/test/Assembler/2004-04-04-GetElementPtrIndexTypes.ll
 create mode 100644 final/test/Assembler/2004-06-07-VerifierBug.ll
 create mode 100644 final/test/Assembler/2004-10-22-BCWriterUndefBug.ll
 create mode 100644 final/test/Assembler/2004-11-28-InvalidTypeCrash.ll
 create mode 100644 final/test/Assembler/2005-01-03-FPConstantDisassembly.ll
 create mode 100644 final/test/Assembler/2005-01-31-CallingAggregateFunction.ll
 create mode 100644 final/test/Assembler/2005-02-09-AsmWriterStoreBug.ll
 create mode 100644 final/test/Assembler/2005-05-05-OpaqueUndefValues.ll
 create mode 100644 final/test/Assembler/2005-12-21-ZeroInitVector.ll
 create mode 100644 final/test/Assembler/2006-05-26-VarargsCallEncode.ll
 create mode 100644 final/test/Assembler/2006-09-28-CrashOnInvalid.ll
 create mode 100644 final/test/Assembler/2006-12-09-Cast-To-Bool.ll
 create mode 100644 final/test/Assembler/2007-01-02-Undefined-Arg-Type.ll
 create mode 100644 final/test/Assembler/2007-01-05-Cmp-ConstExpr.ll
 create mode 100644 final/test/Assembler/2007-01-16-CrashOnBadCast.ll
 create mode 100644 final/test/Assembler/2007-01-16-CrashOnBadCast2.ll
 create mode 100644 final/test/Assembler/2007-03-18-InvalidNumberedVar.ll
 create mode 100644 final/test/Assembler/2007-03-19-NegValue.ll
 create mode 100644 final/test/Assembler/2007-04-20-AlignedLoad.ll
 create mode 100644 final/test/Assembler/2007-04-20-AlignedStore.ll
 create mode 100644 final/test/Assembler/2007-04-25-AssemblerFoldExternWeak.ll
 create mode 100644 final/test/Assembler/2007-05-21-Escape.ll
 create mode 100644 final/test/Assembler/2007-07-19-ParamAttrAmbiguity.ll
 create mode 100644 final/test/Assembler/2007-07-30-AutoUpgradeZextSext.ll
 create mode 100644 final/test/Assembler/2007-08-06-AliasInvalid.ll
 create mode 100644 final/test/Assembler/2007-09-10-AliasFwdRef.ll
 create mode 100644 final/test/Assembler/2007-09-29-GC.ll
 create mode 100644 final/test/Assembler/2007-11-26-AttributeOverload.ll
 create mode 100644 final/test/Assembler/2007-11-27-AutoUpgradeAttributes.ll
 create mode 100644 final/test/Assembler/2007-12-11-AddressSpaces.ll
 create mode 100644 final/test/Assembler/2008-01-11-VarargAttrs.ll
 create mode 100644 final/test/Assembler/2008-02-18-IntPointerCrash.ll
 create mode 100644 final/test/Assembler/2008-02-20-MultipleReturnValue.ll
 create mode 100644 final/test/Assembler/2008-07-10-APInt.ll
 create mode 100644 final/test/Assembler/2008-09-02-FunctionNotes.ll
 create mode 100644 final/test/Assembler/2008-09-02-FunctionNotes2.ll
 create mode 100644 final/test/Assembler/2008-09-29-RetAttr.ll
 create mode 100644 final/test/Assembler/2008-10-14-NamedTypeOnInteger.ll
 create mode 100644 final/test/Assembler/2008-10-14-QuoteInName.ll
 create mode 100644 final/test/Assembler/2009-02-01-UnnamedForwardRef.ll
 create mode 100644 final/test/Assembler/2009-02-28-CastOpc.ll
 create mode 100644 final/test/Assembler/2009-02-28-StripOpaqueName.ll
 create mode 100644 final/test/Assembler/2009-03-24-ZextConstantExpr.ll
 create mode 100644 final/test/Assembler/2009-04-25-AliasGEP.ll
 create mode 100644 final/test/Assembler/2009-07-24-ZeroArgGEP.ll
 create mode 100644 final/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
 create mode 100644 final/test/Assembler/AutoUpgradeIntrinsics.ll
 create mode 100644 final/test/Assembler/AutoUpgradeMMXIntrinsics.ll
 create mode 100644 final/test/Assembler/ConstantExprFold.ll
 create mode 100644 final/test/Assembler/ConstantExprFoldCast.ll
 create mode 100644 final/test/Assembler/MultipleReturnValueType.ll
 create mode 100644 final/test/Assembler/aggregate-constant-values.ll
 create mode 100644 final/test/Assembler/aggregate-return-single-value.ll
 create mode 100644 final/test/Assembler/align-inst-alloca.ll
 create mode 100644 final/test/Assembler/align-inst-load.ll
 create mode 100644 final/test/Assembler/align-inst-store.ll
 create mode 100644 final/test/Assembler/align-inst.ll
 create mode 100644 final/test/Assembler/alignstack.ll
 create mode 100644 final/test/Assembler/anon-functions.ll
 create mode 100644 final/test/Assembler/bcwrap.ll
 create mode 100644 final/test/Assembler/comment.ll
 create mode 100644 final/test/Assembler/dg.exp
 create mode 100644 final/test/Assembler/extractvalue-invalid-idx.ll
 create mode 100644 final/test/Assembler/flags.ll
 create mode 100644 final/test/Assembler/functionlocal-metadata.ll
 create mode 100644 final/test/Assembler/getelementptr.ll
 create mode 100644 final/test/Assembler/getelementptr_struct.ll
 create mode 100644 final/test/Assembler/huge-array.ll
 create mode 100644 final/test/Assembler/insertextractvalue.ll
 create mode 100644 final/test/Assembler/insertvalue-invalid-idx.ll
 create mode 100644 final/test/Assembler/metadata.ll
 create mode 100644 final/test/Assembler/numbered-values.ll
 create mode 100644 final/test/Assembler/private.ll
 create mode 100644 final/test/Assembler/select.ll
 create mode 100644 final/test/Assembler/unnamed-addr.ll
 create mode 100644 final/test/Assembler/unnamed.ll
 create mode 100644 final/test/Assembler/vbool-cmp.ll
 create mode 100644 final/test/Assembler/vector-cmp.ll
 create mode 100644 final/test/Assembler/vector-select.ll
 create mode 100644 final/test/Assembler/vector-shift.ll
 create mode 100644 final/test/Assembler/x86mmx.ll
 create mode 100644 final/test/Bindings/Ocaml/analysis.ml
 create mode 100644 final/test/Bindings/Ocaml/bitreader.ml
 create mode 100644 final/test/Bindings/Ocaml/bitwriter.ml
 create mode 100644 final/test/Bindings/Ocaml/dg.exp
 create mode 100644 final/test/Bindings/Ocaml/executionengine.ml
 create mode 100644 final/test/Bindings/Ocaml/ext_exc.ml
 create mode 100644 final/test/Bindings/Ocaml/scalar_opts.ml
 create mode 100644 final/test/Bindings/Ocaml/target.ml
 create mode 100644 final/test/Bindings/Ocaml/vmcore.ml
 create mode 100644 final/test/Bitcode/2006-12-11-Cast-ConstExpr.ll
 create mode 100644 final/test/Bitcode/2009-06-11-FirstClassAggregateConstant.ll
 create mode 100644 final/test/Bitcode/AutoUpgradeGlobals.ll
 create mode 100644 final/test/Bitcode/AutoUpgradeGlobals.ll.bc
 create mode 100644 final/test/Bitcode/AutoUpgradeIntrinsics.ll
 create mode 100644 final/test/Bitcode/AutoUpgradeIntrinsics.ll.bc
 create mode 100644 final/test/Bitcode/dg.exp
 create mode 100644 final/test/Bitcode/extractelement.ll
 create mode 100644 final/test/Bitcode/flags.ll
 create mode 100644 final/test/Bitcode/memcpy.ll
 create mode 100644 final/test/Bitcode/metadata-2.ll
 create mode 100644 final/test/Bitcode/metadata.ll
 create mode 100644 final/test/Bitcode/neon-intrinsics.ll
 create mode 100644 final/test/Bitcode/neon-intrinsics.ll.bc
 create mode 100644 final/test/Bitcode/null-type.ll
 create mode 100644 final/test/Bitcode/null-type.ll.bc
 create mode 100644 final/test/Bitcode/sse2_loadl_pd.ll
 create mode 100644 final/test/Bitcode/sse2_loadl_pd.ll.bc
 create mode 100644 final/test/Bitcode/sse2_movl_dq.ll
 create mode 100644 final/test/Bitcode/sse2_movl_dq.ll.bc
 create mode 100644 final/test/Bitcode/sse2_movs_d.ll
 create mode 100644 final/test/Bitcode/sse2_movs_d.ll.bc
 create mode 100644 final/test/Bitcode/sse2_punpck_qdq.ll
 create mode 100644 final/test/Bitcode/sse2_punpck_qdq.ll.bc
 create mode 100644 final/test/Bitcode/sse2_shuf_pd.ll
 create mode 100644 final/test/Bitcode/sse2_shuf_pd.ll.bc
 create mode 100644 final/test/Bitcode/sse2_unpck_pd.ll
 create mode 100644 final/test/Bitcode/sse2_unpck_pd.ll.bc
 create mode 100644 final/test/Bitcode/sse41_pmulld.ll
 create mode 100644 final/test/Bitcode/sse41_pmulld.ll.bc
 create mode 100644 final/test/Bitcode/ssse3_palignr.ll
 create mode 100644 final/test/Bitcode/ssse3_palignr.ll.bc
 create mode 100644 final/test/BugPoint/crash-narrowfunctiontest.ll
 create mode 100644 final/test/BugPoint/dg.exp
 create mode 100644 final/test/BugPoint/metadata.ll
 create mode 100644 final/test/BugPoint/remove_arguments_test.ll
 create mode 100644 final/test/CMakeLists.txt
 create mode 100644 final/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
 create mode 100644 final/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
 create mode 100644 final/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
 create mode 100644 final/test/CodeGen/ARM/2007-03-13-InstrSched.ll
 create mode 100644 final/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
 create mode 100644 final/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2007-04-03-PEIBug.ll
 create mode 100644 final/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
 create mode 100644 final/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
 create mode 100644 final/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
 create mode 100644 final/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
 create mode 100644 final/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
 create mode 100644 final/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
 create mode 100644 final/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
 create mode 100644 final/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
 create mode 100644 final/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
 create mode 100644 final/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
 create mode 100644 final/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
 create mode 100644 final/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
 create mode 100644 final/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
 create mode 100644 final/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
 create mode 100644 final/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll
 create mode 100644 final/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
 create mode 100644 final/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2008-07-17-Fdiv.ll
 create mode 100644 final/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
 create mode 100644 final/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
 create mode 100644 final/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
 create mode 100644 final/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
 create mode 100644 final/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
 create mode 100644 final/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
 create mode 100644 final/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
 create mode 100644 final/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
 create mode 100644 final/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
 create mode 100644 final/test/CodeGen/ARM/2009-04-08-FREM.ll
 create mode 100644 final/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
 create mode 100644 final/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
 create mode 100644 final/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
 create mode 100644 final/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
 create mode 100644 final/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
 create mode 100644 final/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
 create mode 100644 final/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
 create mode 100644 final/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
 create mode 100644 final/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
 create mode 100644 final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
 create mode 100644 final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
 create mode 100644 final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
 create mode 100644 final/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
 create mode 100644 final/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll
 create mode 100644 final/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
 create mode 100644 final/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
 create mode 100644 final/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
 create mode 100644 final/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll
 create mode 100644 final/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
 create mode 100644 final/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
 create mode 100644 final/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
 create mode 100644 final/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
 create mode 100644 final/test/CodeGen/ARM/2009-08-23-linkerprivate.ll
 create mode 100644 final/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll
 create mode 100644 final/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll
 create mode 100644 final/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll
 create mode 100644 final/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll
 create mode 100644 final/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
 create mode 100644 final/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
 create mode 100644 final/test/CodeGen/ARM/2009-09-09-AllOnes.ll
 create mode 100644 final/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
 create mode 100644 final/test/CodeGen/ARM/2009-09-10-postdec.ll
 create mode 100644 final/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
 create mode 100644 final/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
 create mode 100644 final/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll
 create mode 100644 final/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll
 create mode 100644 final/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll
 create mode 100644 final/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
 create mode 100644 final/test/CodeGen/ARM/2009-09-24-spill-align.ll
 create mode 100644 final/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
 create mode 100644 final/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
 create mode 100644 final/test/CodeGen/ARM/2009-10-16-Scope.ll
 create mode 100644 final/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll
 create mode 100644 final/test/CodeGen/ARM/2009-10-27-double-align.ll
 create mode 100644 final/test/CodeGen/ARM/2009-10-30.ll
 create mode 100644 final/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
 create mode 100644 final/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
 create mode 100644 final/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
 create mode 100644 final/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll
 create mode 100644 final/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
 create mode 100644 final/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
 create mode 100644 final/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
 create mode 100644 final/test/CodeGen/ARM/2009-11-30-LiveVariablesBug.ll
 create mode 100644 final/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll
 create mode 100644 final/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
 create mode 100644 final/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
 create mode 100644 final/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
 create mode 100644 final/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
 create mode 100644 final/test/CodeGen/ARM/2010-04-09-NeonSelect.ll
 create mode 100644 final/test/CodeGen/ARM/2010-04-13-v2f64SplitArg.ll
 create mode 100644 final/test/CodeGen/ARM/2010-04-14-SplitVector.ll
 create mode 100644 final/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
 create mode 100644 final/test/CodeGen/ARM/2010-05-14-IllegalType.ll
 create mode 100644 final/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
 create mode 100644 final/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
 create mode 100644 final/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
 create mode 100644 final/test/CodeGen/ARM/2010-05-19-Shuffles.ll
 create mode 100644 final/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
 create mode 100644 final/test/CodeGen/ARM/2010-05-21-BuildVector.ll
 create mode 100644 final/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
 create mode 100644 final/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
 create mode 100755 final/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
 create mode 100644 final/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
 create mode 100644 final/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
 create mode 100644 final/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll
 create mode 100644 final/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
 create mode 100644 final/test/CodeGen/ARM/2010-08-04-EHCrash.ll
 create mode 100644 final/test/CodeGen/ARM/2010-08-04-StackVariable.ll
 create mode 100644 final/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll
 create mode 100644 final/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
 create mode 100644 final/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
 create mode 100644 final/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll
 create mode 100644 final/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
 create mode 100644 final/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
 create mode 100644 final/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
 create mode 100644 final/test/CodeGen/ARM/2010-12-07-PEIBug.ll
 create mode 100644 final/test/CodeGen/ARM/2010-12-08-tpsoft.ll
 create mode 100644 final/test/CodeGen/ARM/2010-12-13-reloc-pic.ll
 create mode 100644 final/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
 create mode 100644 final/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll
 create mode 100644 final/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
 create mode 100644 final/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
 create mode 100644 final/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
 create mode 100644 final/test/CodeGen/ARM/addrmode.ll
 create mode 100644 final/test/CodeGen/ARM/aliases.ll
 create mode 100644 final/test/CodeGen/ARM/align.ll
 create mode 100644 final/test/CodeGen/ARM/alloca.ll
 create mode 100644 final/test/CodeGen/ARM/argaddr.ll
 create mode 100644 final/test/CodeGen/ARM/arguments-nosplit-double.ll
 create mode 100644 final/test/CodeGen/ARM/arguments-nosplit-i64.ll
 create mode 100644 final/test/CodeGen/ARM/arguments.ll
 create mode 100644 final/test/CodeGen/ARM/arguments2.ll
 create mode 100644 final/test/CodeGen/ARM/arguments3.ll
 create mode 100644 final/test/CodeGen/ARM/arguments4.ll
 create mode 100644 final/test/CodeGen/ARM/arguments5.ll
 create mode 100644 final/test/CodeGen/ARM/arguments6.ll
 create mode 100644 final/test/CodeGen/ARM/arguments7.ll
 create mode 100644 final/test/CodeGen/ARM/arguments8.ll
 create mode 100644 final/test/CodeGen/ARM/arguments_f64_backfill.ll
 create mode 100644 final/test/CodeGen/ARM/arm-and-tst-peephole.ll
 create mode 100644 final/test/CodeGen/ARM/arm-asm.ll
 create mode 100644 final/test/CodeGen/ARM/arm-frameaddr.ll
 create mode 100644 final/test/CodeGen/ARM/arm-negative-stride.ll
 create mode 100644 final/test/CodeGen/ARM/arm-returnaddr.ll
 create mode 100644 final/test/CodeGen/ARM/armv4.ll
 create mode 100644 final/test/CodeGen/ARM/atomic-cmp.ll
 create mode 100644 final/test/CodeGen/ARM/available_externally.ll
 create mode 100644 final/test/CodeGen/ARM/bfc.ll
 create mode 100644 final/test/CodeGen/ARM/bfi.ll
 create mode 100644 final/test/CodeGen/ARM/bfx.ll
 create mode 100644 final/test/CodeGen/ARM/bic.ll
 create mode 100644 final/test/CodeGen/ARM/bits.ll
 create mode 100644 final/test/CodeGen/ARM/bswap-inline-asm.ll
 create mode 100644 final/test/CodeGen/ARM/bx_fold.ll
 create mode 100644 final/test/CodeGen/ARM/call-tc.ll
 create mode 100644 final/test/CodeGen/ARM/call.ll
 create mode 100644 final/test/CodeGen/ARM/call_nolink.ll
 create mode 100644 final/test/CodeGen/ARM/carry.ll
 create mode 100644 final/test/CodeGen/ARM/clz.ll
 create mode 100644 final/test/CodeGen/ARM/code-placement.ll
 create mode 100644 final/test/CodeGen/ARM/compare-call.ll
 create mode 100644 final/test/CodeGen/ARM/constants.ll
 create mode 100644 final/test/CodeGen/ARM/crash-O0.ll
 create mode 100644 final/test/CodeGen/ARM/crash.ll
 create mode 100644 final/test/CodeGen/ARM/cse-libcalls.ll
 create mode 100644 final/test/CodeGen/ARM/ctors_dtors.ll
 create mode 100644 final/test/CodeGen/ARM/ctz.ll
 create mode 100644 final/test/CodeGen/ARM/dg.exp
 create mode 100644 final/test/CodeGen/ARM/div.ll
 create mode 100644 final/test/CodeGen/ARM/dyn-stackalloc.ll
 create mode 100644 final/test/CodeGen/ARM/extloadi1.ll
 create mode 100644 final/test/CodeGen/ARM/fabss.ll
 create mode 100644 final/test/CodeGen/ARM/fadds.ll
 create mode 100644 final/test/CodeGen/ARM/fast-isel-crash.ll
 create mode 100644 final/test/CodeGen/ARM/fast-isel-static.ll
 create mode 100644 final/test/CodeGen/ARM/fast-isel.ll
 create mode 100644 final/test/CodeGen/ARM/fcopysign.ll
 create mode 100644 final/test/CodeGen/ARM/fdivs.ll
 create mode 100644 final/test/CodeGen/ARM/fixunsdfdi.ll
 create mode 100644 final/test/CodeGen/ARM/flag-crash.ll
 create mode 100644 final/test/CodeGen/ARM/fmacs.ll
 create mode 100644 final/test/CodeGen/ARM/fmdrr-fmrrd.ll
 create mode 100644 final/test/CodeGen/ARM/fmscs.ll
 create mode 100644 final/test/CodeGen/ARM/fmuls.ll
 create mode 100644 final/test/CodeGen/ARM/fnegs.ll
 create mode 100644 final/test/CodeGen/ARM/fnmacs.ll
 create mode 100644 final/test/CodeGen/ARM/fnmscs.ll
 create mode 100644 final/test/CodeGen/ARM/fnmul.ll
 create mode 100644 final/test/CodeGen/ARM/fnmuls.ll
 create mode 100644 final/test/CodeGen/ARM/formal.ll
 create mode 100644 final/test/CodeGen/ARM/fp.ll
 create mode 100644 final/test/CodeGen/ARM/fp16.ll
 create mode 100644 final/test/CodeGen/ARM/fp_convert.ll
 create mode 100644 final/test/CodeGen/ARM/fparith.ll
 create mode 100644 final/test/CodeGen/ARM/fpcmp-opt.ll
 create mode 100644 final/test/CodeGen/ARM/fpcmp.ll
 create mode 100644 final/test/CodeGen/ARM/fpcmp_ueq.ll
 create mode 100644 final/test/CodeGen/ARM/fpconsts.ll
 create mode 100644 final/test/CodeGen/ARM/fpconv.ll
 create mode 100644 final/test/CodeGen/ARM/fpmem.ll
 create mode 100644 final/test/CodeGen/ARM/fpow.ll
 create mode 100644 final/test/CodeGen/ARM/fpowi.ll
 create mode 100644 final/test/CodeGen/ARM/fptoint.ll
 create mode 100644 final/test/CodeGen/ARM/fsubs.ll
 create mode 100644 final/test/CodeGen/ARM/global-merge.ll
 create mode 100644 final/test/CodeGen/ARM/globals.ll
 create mode 100644 final/test/CodeGen/ARM/hardfloat_neon.ll
 create mode 100644 final/test/CodeGen/ARM/hello.ll
 create mode 100644 final/test/CodeGen/ARM/hidden-vis-2.ll
 create mode 100644 final/test/CodeGen/ARM/hidden-vis-3.ll
 create mode 100644 final/test/CodeGen/ARM/hidden-vis.ll
 create mode 100644 final/test/CodeGen/ARM/iabs.ll
 create mode 100644 final/test/CodeGen/ARM/ifcvt1.ll
 create mode 100644 final/test/CodeGen/ARM/ifcvt10.ll
 create mode 100644 final/test/CodeGen/ARM/ifcvt11.ll
 create mode 100644 final/test/CodeGen/ARM/ifcvt2.ll
 create mode 100644 final/test/CodeGen/ARM/ifcvt3.ll
 create mode 100644 final/test/CodeGen/ARM/ifcvt4.ll
 create mode 100644 final/test/CodeGen/ARM/ifcvt5.ll
 create mode 100644 final/test/CodeGen/ARM/ifcvt6.ll
 create mode 100644 final/test/CodeGen/ARM/ifcvt7.ll
 create mode 100644 final/test/CodeGen/ARM/ifcvt8.ll
 create mode 100644 final/test/CodeGen/ARM/ifcvt9.ll
 create mode 100644 final/test/CodeGen/ARM/illegal-vector-bitcast.ll
 create mode 100644 final/test/CodeGen/ARM/imm.ll
 create mode 100644 final/test/CodeGen/ARM/indirectbr.ll
 create mode 100644 final/test/CodeGen/ARM/inlineasm-imm-arm.ll
 create mode 100644 final/test/CodeGen/ARM/inlineasm.ll
 create mode 100644 final/test/CodeGen/ARM/inlineasm2.ll
 create mode 100644 final/test/CodeGen/ARM/inlineasm3.ll
 create mode 100644 final/test/CodeGen/ARM/insn-sched1.ll
 create mode 100644 final/test/CodeGen/ARM/ispositive.ll
 create mode 100644 final/test/CodeGen/ARM/large-stack.ll
 create mode 100644 final/test/CodeGen/ARM/ldm.ll
 create mode 100644 final/test/CodeGen/ARM/ldr.ll
 create mode 100644 final/test/CodeGen/ARM/ldr_ext.ll
 create mode 100644 final/test/CodeGen/ARM/ldr_frame.ll
 create mode 100644 final/test/CodeGen/ARM/ldr_post.ll
 create mode 100644 final/test/CodeGen/ARM/ldr_pre.ll
 create mode 100644 final/test/CodeGen/ARM/ldrd.ll
 create mode 100644 final/test/CodeGen/ARM/ldst-f32-2-i32.ll
 create mode 100644 final/test/CodeGen/ARM/load-global.ll
 create mode 100644 final/test/CodeGen/ARM/load.ll
 create mode 100644 final/test/CodeGen/ARM/long-setcc.ll
 create mode 100644 final/test/CodeGen/ARM/long.ll
 create mode 100644 final/test/CodeGen/ARM/long_shift.ll
 create mode 100644 final/test/CodeGen/ARM/lsr-code-insertion.ll
 create mode 100644 final/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
 create mode 100644 final/test/CodeGen/ARM/lsr-scale-addr-mode.ll
 create mode 100644 final/test/CodeGen/ARM/machine-cse-cmp.ll
 create mode 100644 final/test/CodeGen/ARM/machine-licm.ll
 create mode 100644 final/test/CodeGen/ARM/mem.ll
 create mode 100644 final/test/CodeGen/ARM/memcpy-inline.ll
 create mode 100644 final/test/CodeGen/ARM/memfunc.ll
 create mode 100644 final/test/CodeGen/ARM/mls.ll
 create mode 100644 final/test/CodeGen/ARM/movt-movw-global.ll
 create mode 100644 final/test/CodeGen/ARM/movt.ll
 create mode 100644 final/test/CodeGen/ARM/mul.ll
 create mode 100644 final/test/CodeGen/ARM/mul_const.ll
 create mode 100644 final/test/CodeGen/ARM/mulhi.ll
 create mode 100644 final/test/CodeGen/ARM/mult-alt-generic-arm.ll
 create mode 100644 final/test/CodeGen/ARM/mvn.ll
 create mode 100644 final/test/CodeGen/ARM/neon_arith1.ll
 create mode 100644 final/test/CodeGen/ARM/neon_div.ll
 create mode 100644 final/test/CodeGen/ARM/neon_ld1.ll
 create mode 100644 final/test/CodeGen/ARM/neon_ld2.ll
 create mode 100644 final/test/CodeGen/ARM/neon_minmax.ll
 create mode 100644 final/test/CodeGen/ARM/neon_shift.ll
 create mode 100644 final/test/CodeGen/ARM/pack.ll
 create mode 100644 final/test/CodeGen/ARM/phi.ll
 create mode 100644 final/test/CodeGen/ARM/pr3502.ll
 create mode 100644 final/test/CodeGen/ARM/prefetch.ll
 create mode 100644 final/test/CodeGen/ARM/private.ll
 create mode 100644 final/test/CodeGen/ARM/reg_sequence.ll
 create mode 100644 final/test/CodeGen/ARM/ret0.ll
 create mode 100644 final/test/CodeGen/ARM/ret_arg1.ll
 create mode 100644 final/test/CodeGen/ARM/ret_arg2.ll
 create mode 100644 final/test/CodeGen/ARM/ret_arg3.ll
 create mode 100644 final/test/CodeGen/ARM/ret_arg4.ll
 create mode 100644 final/test/CodeGen/ARM/ret_arg5.ll
 create mode 100644 final/test/CodeGen/ARM/ret_f32_arg2.ll
 create mode 100644 final/test/CodeGen/ARM/ret_f32_arg5.ll
 create mode 100644 final/test/CodeGen/ARM/ret_f64_arg2.ll
 create mode 100644 final/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
 create mode 100644 final/test/CodeGen/ARM/ret_f64_arg_split.ll
 create mode 100644 final/test/CodeGen/ARM/ret_f64_arg_stack.ll
 create mode 100644 final/test/CodeGen/ARM/ret_i128_arg2.ll
 create mode 100644 final/test/CodeGen/ARM/ret_i64_arg2.ll
 create mode 100644 final/test/CodeGen/ARM/ret_i64_arg3.ll
 create mode 100644 final/test/CodeGen/ARM/ret_i64_arg_split.ll
 create mode 100644 final/test/CodeGen/ARM/ret_void.ll
 create mode 100644 final/test/CodeGen/ARM/rev.ll
 create mode 100644 final/test/CodeGen/ARM/sbfx.ll
 create mode 100644 final/test/CodeGen/ARM/section.ll
 create mode 100644 final/test/CodeGen/ARM/select-imm.ll
 create mode 100644 final/test/CodeGen/ARM/select.ll
 create mode 100644 final/test/CodeGen/ARM/select_xform.ll
 create mode 100644 final/test/CodeGen/ARM/shifter_operand.ll
 create mode 100644 final/test/CodeGen/ARM/smul.ll
 create mode 100644 final/test/CodeGen/ARM/spill-q.ll
 create mode 100644 final/test/CodeGen/ARM/stack-frame.ll
 create mode 100644 final/test/CodeGen/ARM/stm.ll
 create mode 100644 final/test/CodeGen/ARM/str_post.ll
 create mode 100644 final/test/CodeGen/ARM/str_pre-2.ll
 create mode 100644 final/test/CodeGen/ARM/str_pre.ll
 create mode 100644 final/test/CodeGen/ARM/str_trunc.ll
 create mode 100644 final/test/CodeGen/ARM/sub.ll
 create mode 100644 final/test/CodeGen/ARM/sxt_rot.ll
 create mode 100644 final/test/CodeGen/ARM/t2-imm.ll
 create mode 100644 final/test/CodeGen/ARM/tail-opts.ll
 create mode 100644 final/test/CodeGen/ARM/thread_pointer.ll
 create mode 100644 final/test/CodeGen/ARM/thumb1-varalloc.ll
 create mode 100644 final/test/CodeGen/ARM/tls1.ll
 create mode 100644 final/test/CodeGen/ARM/tls2.ll
 create mode 100644 final/test/CodeGen/ARM/tls3.ll
 create mode 100644 final/test/CodeGen/ARM/trap.ll
 create mode 100644 final/test/CodeGen/ARM/trunc_ldr.ll
 create mode 100644 final/test/CodeGen/ARM/truncstore-dag-combine.ll
 create mode 100644 final/test/CodeGen/ARM/tst_teq.ll
 create mode 100644 final/test/CodeGen/ARM/uint64tof64.ll
 create mode 100644 final/test/CodeGen/ARM/umulo-32.ll
 create mode 100644 final/test/CodeGen/ARM/unaligned_load_store.ll
 create mode 100644 final/test/CodeGen/ARM/unord.ll
 create mode 100644 final/test/CodeGen/ARM/uxt_rot.ll
 create mode 100644 final/test/CodeGen/ARM/uxtb.ll
 create mode 100644 final/test/CodeGen/ARM/va_arg.ll
 create mode 100644 final/test/CodeGen/ARM/vaba.ll
 create mode 100644 final/test/CodeGen/ARM/vabd.ll
 create mode 100644 final/test/CodeGen/ARM/vabs.ll
 create mode 100644 final/test/CodeGen/ARM/vadd.ll
 create mode 100644 final/test/CodeGen/ARM/vargs.ll
 create mode 100644 final/test/CodeGen/ARM/vargs_align.ll
 create mode 100644 final/test/CodeGen/ARM/vbits.ll
 create mode 100644 final/test/CodeGen/ARM/vbsl.ll
 create mode 100644 final/test/CodeGen/ARM/vceq.ll
 create mode 100644 final/test/CodeGen/ARM/vcge.ll
 create mode 100644 final/test/CodeGen/ARM/vcgt.ll
 create mode 100644 final/test/CodeGen/ARM/vcnt.ll
 create mode 100644 final/test/CodeGen/ARM/vcombine.ll
 create mode 100644 final/test/CodeGen/ARM/vcvt.ll
 create mode 100644 final/test/CodeGen/ARM/vdup.ll
 create mode 100644 final/test/CodeGen/ARM/vector-DAGCombine.ll
 create mode 100644 final/test/CodeGen/ARM/vext.ll
 create mode 100644 final/test/CodeGen/ARM/vfcmp.ll
 create mode 100644 final/test/CodeGen/ARM/vfp.ll
 create mode 100644 final/test/CodeGen/ARM/vget_lane.ll
 create mode 100644 final/test/CodeGen/ARM/vhadd.ll
 create mode 100644 final/test/CodeGen/ARM/vhsub.ll
 create mode 100644 final/test/CodeGen/ARM/vicmp.ll
 create mode 100644 final/test/CodeGen/ARM/vld1.ll
 create mode 100644 final/test/CodeGen/ARM/vld2.ll
 create mode 100644 final/test/CodeGen/ARM/vld3.ll
 create mode 100644 final/test/CodeGen/ARM/vld4.ll
 create mode 100644 final/test/CodeGen/ARM/vlddup.ll
 create mode 100644 final/test/CodeGen/ARM/vldlane.ll
 create mode 100644 final/test/CodeGen/ARM/vminmax.ll
 create mode 100644 final/test/CodeGen/ARM/vmla.ll
 create mode 100644 final/test/CodeGen/ARM/vmls.ll
 create mode 100644 final/test/CodeGen/ARM/vmov.ll
 create mode 100644 final/test/CodeGen/ARM/vmul.ll
 create mode 100644 final/test/CodeGen/ARM/vneg.ll
 create mode 100644 final/test/CodeGen/ARM/vpadal.ll
 create mode 100644 final/test/CodeGen/ARM/vpadd.ll
 create mode 100644 final/test/CodeGen/ARM/vpminmax.ll
 create mode 100644 final/test/CodeGen/ARM/vqadd.ll
 create mode 100644 final/test/CodeGen/ARM/vqdmul.ll
 create mode 100644 final/test/CodeGen/ARM/vqshl.ll
 create mode 100644 final/test/CodeGen/ARM/vqshrn.ll
 create mode 100644 final/test/CodeGen/ARM/vqsub.ll
 create mode 100644 final/test/CodeGen/ARM/vrec.ll
 create mode 100644 final/test/CodeGen/ARM/vrev.ll
 create mode 100644 final/test/CodeGen/ARM/vshift.ll
 create mode 100644 final/test/CodeGen/ARM/vshiftins.ll
 create mode 100644 final/test/CodeGen/ARM/vshl.ll
 create mode 100644 final/test/CodeGen/ARM/vshll.ll
 create mode 100644 final/test/CodeGen/ARM/vshrn.ll
 create mode 100644 final/test/CodeGen/ARM/vsra.ll
 create mode 100644 final/test/CodeGen/ARM/vst1.ll
 create mode 100644 final/test/CodeGen/ARM/vst2.ll
 create mode 100644 final/test/CodeGen/ARM/vst3.ll
 create mode 100644 final/test/CodeGen/ARM/vst4.ll
 create mode 100644 final/test/CodeGen/ARM/vstlane.ll
 create mode 100644 final/test/CodeGen/ARM/vsub.ll
 create mode 100644 final/test/CodeGen/ARM/vtbl.ll
 create mode 100644 final/test/CodeGen/ARM/vtrn.ll
 create mode 100644 final/test/CodeGen/ARM/vuzp.ll
 create mode 100644 final/test/CodeGen/ARM/vzip.ll
 create mode 100644 final/test/CodeGen/ARM/weak.ll
 create mode 100644 final/test/CodeGen/ARM/weak2.ll
 create mode 100644 final/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll
 create mode 100644 final/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
 create mode 100644 final/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
 create mode 100644 final/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
 create mode 100644 final/test/CodeGen/Alpha/2006-04-04-zextload.ll
 create mode 100644 final/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
 create mode 100644 final/test/CodeGen/Alpha/2006-11-01-vastart.ll
 create mode 100644 final/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
 create mode 100644 final/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
 create mode 100644 final/test/CodeGen/Alpha/2008-11-12-Add128.ll
 create mode 100644 final/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll
 create mode 100644 final/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
 create mode 100644 final/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll
 create mode 100644 final/test/CodeGen/Alpha/add.ll
 create mode 100644 final/test/CodeGen/Alpha/add128.ll
 create mode 100644 final/test/CodeGen/Alpha/bic.ll
 create mode 100644 final/test/CodeGen/Alpha/bsr.ll
 create mode 100644 final/test/CodeGen/Alpha/call_adj.ll
 create mode 100644 final/test/CodeGen/Alpha/cmov.ll
 create mode 100644 final/test/CodeGen/Alpha/cmpbge.ll
 create mode 100644 final/test/CodeGen/Alpha/ctlz.ll
 create mode 100644 final/test/CodeGen/Alpha/ctlz_e.ll
 create mode 100644 final/test/CodeGen/Alpha/ctpop.ll
 create mode 100644 final/test/CodeGen/Alpha/dg.exp
 create mode 100644 final/test/CodeGen/Alpha/eqv.ll
 create mode 100644 final/test/CodeGen/Alpha/i32_sub_1.ll
 create mode 100644 final/test/CodeGen/Alpha/illegal-element-type.ll
 create mode 100644 final/test/CodeGen/Alpha/jmp_table.ll
 create mode 100644 final/test/CodeGen/Alpha/mb.ll
 create mode 100644 final/test/CodeGen/Alpha/mul128.ll
 create mode 100644 final/test/CodeGen/Alpha/mul5.ll
 create mode 100644 final/test/CodeGen/Alpha/neg1.ll
 create mode 100644 final/test/CodeGen/Alpha/not.ll
 create mode 100644 final/test/CodeGen/Alpha/ornot.ll
 create mode 100644 final/test/CodeGen/Alpha/private.ll
 create mode 100644 final/test/CodeGen/Alpha/rpcc.ll
 create mode 100644 final/test/CodeGen/Alpha/srl_and.ll
 create mode 100644 final/test/CodeGen/Alpha/sub128.ll
 create mode 100644 final/test/CodeGen/Alpha/weak.ll
 create mode 100644 final/test/CodeGen/Alpha/wmb.ll
 create mode 100644 final/test/CodeGen/Alpha/zapnot.ll
 create mode 100644 final/test/CodeGen/Alpha/zapnot2.ll
 create mode 100644 final/test/CodeGen/Alpha/zapnot3.ll
 create mode 100644 final/test/CodeGen/Alpha/zapnot4.ll
 create mode 100644 final/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
 create mode 100644 final/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll
 create mode 100644 final/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll
 create mode 100644 final/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll
 create mode 100644 final/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
 create mode 100644 final/test/CodeGen/Blackfin/add-overflow.ll
 create mode 100644 final/test/CodeGen/Blackfin/add.ll
 create mode 100644 final/test/CodeGen/Blackfin/addsub-i128.ll
 create mode 100644 final/test/CodeGen/Blackfin/basic-i1.ll
 create mode 100644 final/test/CodeGen/Blackfin/basic-i16.ll
 create mode 100644 final/test/CodeGen/Blackfin/basic-i32.ll
 create mode 100644 final/test/CodeGen/Blackfin/basic-i64.ll
 create mode 100644 final/test/CodeGen/Blackfin/basic-i8.ll
 create mode 100644 final/test/CodeGen/Blackfin/basictest.ll
 create mode 100644 final/test/CodeGen/Blackfin/burg.ll
 create mode 100644 final/test/CodeGen/Blackfin/cmp-small-imm.ll
 create mode 100644 final/test/CodeGen/Blackfin/cmp64.ll
 create mode 100644 final/test/CodeGen/Blackfin/ct32.ll
 create mode 100644 final/test/CodeGen/Blackfin/ct64.ll
 create mode 100644 final/test/CodeGen/Blackfin/ctlz16.ll
 create mode 100644 final/test/CodeGen/Blackfin/ctlz64.ll
 create mode 100644 final/test/CodeGen/Blackfin/ctpop16.ll
 create mode 100644 final/test/CodeGen/Blackfin/cttz16.ll
 create mode 100644 final/test/CodeGen/Blackfin/cycles.ll
 create mode 100644 final/test/CodeGen/Blackfin/dg.exp
 create mode 100644 final/test/CodeGen/Blackfin/double-cast.ll
 create mode 100644 final/test/CodeGen/Blackfin/frameindex.ll
 create mode 100644 final/test/CodeGen/Blackfin/i17mem.ll
 create mode 100644 final/test/CodeGen/Blackfin/i1mem.ll
 create mode 100644 final/test/CodeGen/Blackfin/i1ops.ll
 create mode 100644 final/test/CodeGen/Blackfin/i216mem.ll
 create mode 100644 final/test/CodeGen/Blackfin/i248mem.ll
 create mode 100644 final/test/CodeGen/Blackfin/i256mem.ll
 create mode 100644 final/test/CodeGen/Blackfin/i256param.ll
 create mode 100644 final/test/CodeGen/Blackfin/i56param.ll
 create mode 100644 final/test/CodeGen/Blackfin/i8mem.ll
 create mode 100644 final/test/CodeGen/Blackfin/inline-asm.ll
 create mode 100644 final/test/CodeGen/Blackfin/int-setcc.ll
 create mode 100644 final/test/CodeGen/Blackfin/invalid-apint.ll
 create mode 100644 final/test/CodeGen/Blackfin/jumptable.ll
 create mode 100644 final/test/CodeGen/Blackfin/large-switch.ll
 create mode 100644 final/test/CodeGen/Blackfin/load-i16.ll
 create mode 100644 final/test/CodeGen/Blackfin/logic-i16.ll
 create mode 100644 final/test/CodeGen/Blackfin/many-args.ll
 create mode 100644 final/test/CodeGen/Blackfin/mulhu.ll
 create mode 100644 final/test/CodeGen/Blackfin/printf.ll
 create mode 100644 final/test/CodeGen/Blackfin/printf2.ll
 create mode 100644 final/test/CodeGen/Blackfin/promote-logic.ll
 create mode 100644 final/test/CodeGen/Blackfin/promote-setcc.ll
 create mode 100644 final/test/CodeGen/Blackfin/sdiv.ll
 create mode 100644 final/test/CodeGen/Blackfin/simple-select.ll
 create mode 100644 final/test/CodeGen/Blackfin/switch.ll
 create mode 100644 final/test/CodeGen/Blackfin/switch2.ll
 create mode 100644 final/test/CodeGen/Blackfin/sync-intr.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-10-16-External.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll
 create mode 100644 final/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
 create mode 100644 final/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
 create mode 100644 final/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
 create mode 100644 final/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
 create mode 100644 final/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
 create mode 100644 final/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
 create mode 100644 final/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
 create mode 100644 final/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
 create mode 100644 final/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
 create mode 100644 final/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
 create mode 100644 final/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
 create mode 100644 final/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
 create mode 100644 final/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
 create mode 100644 final/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
 create mode 100644 final/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
 create mode 100644 final/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
 create mode 100644 final/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
 create mode 100644 final/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
 create mode 100644 final/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
 create mode 100644 final/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
 create mode 100644 final/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
 create mode 100644 final/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
 create mode 100644 final/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll
 create mode 100644 final/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
 create mode 100644 final/test/CodeGen/CBackend/2005-08-23-Fmod.ll
 create mode 100644 final/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
 create mode 100644 final/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
 create mode 100644 final/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
 create mode 100644 final/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll
 create mode 100644 final/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
 create mode 100644 final/test/CodeGen/CBackend/2007-02-05-memset.ll
 create mode 100644 final/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
 create mode 100644 final/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
 create mode 100644 final/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
 create mode 100644 final/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll
 create mode 100644 final/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
 create mode 100644 final/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll
 create mode 100644 final/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
 create mode 100644 final/test/CodeGen/CBackend/dg.exp
 create mode 100644 final/test/CodeGen/CBackend/fneg.ll
 create mode 100644 final/test/CodeGen/CBackend/pr2408.ll
 create mode 100644 final/test/CodeGen/CBackend/vectors.ll
 create mode 100644 final/test/CodeGen/CPP/2007-06-16-Funcname.ll
 create mode 100644 final/test/CodeGen/CPP/2009-05-01-Long-Double.ll
 create mode 100644 final/test/CodeGen/CPP/2009-05-04-CondBr.ll
 create mode 100644 final/test/CodeGen/CPP/dg.exp
 create mode 100644 final/test/CodeGen/CPP/llvm2cpp.ll
 create mode 100644 final/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
 create mode 100644 final/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
 create mode 100644 final/test/CodeGen/CellSPU/and_ops.ll
 create mode 100644 final/test/CodeGen/CellSPU/arg_ret.ll
 create mode 100644 final/test/CodeGen/CellSPU/bigstack.ll
 create mode 100644 final/test/CodeGen/CellSPU/bss.ll
 create mode 100644 final/test/CodeGen/CellSPU/call.ll
 create mode 100644 final/test/CodeGen/CellSPU/call_indirect.ll
 create mode 100644 final/test/CodeGen/CellSPU/crash.ll
 create mode 100644 final/test/CodeGen/CellSPU/ctpop.ll
 create mode 100644 final/test/CodeGen/CellSPU/dg.exp
 create mode 100644 final/test/CodeGen/CellSPU/div_ops.ll
 create mode 100644 final/test/CodeGen/CellSPU/dp_farith.ll
 create mode 100644 final/test/CodeGen/CellSPU/eqv.ll
 create mode 100644 final/test/CodeGen/CellSPU/extract_elt.ll
 create mode 100644 final/test/CodeGen/CellSPU/fcmp32.ll
 create mode 100644 final/test/CodeGen/CellSPU/fcmp64.ll
 create mode 100644 final/test/CodeGen/CellSPU/fdiv.ll
 create mode 100644 final/test/CodeGen/CellSPU/fneg-fabs.ll
 create mode 100644 final/test/CodeGen/CellSPU/i64ops.ll
 create mode 100644 final/test/CodeGen/CellSPU/i8ops.ll
 create mode 100644 final/test/CodeGen/CellSPU/icmp16.ll
 create mode 100644 final/test/CodeGen/CellSPU/icmp32.ll
 create mode 100644 final/test/CodeGen/CellSPU/icmp64.ll
 create mode 100644 final/test/CodeGen/CellSPU/icmp8.ll
 create mode 100644 final/test/CodeGen/CellSPU/immed16.ll
 create mode 100644 final/test/CodeGen/CellSPU/immed32.ll
 create mode 100644 final/test/CodeGen/CellSPU/immed64.ll
 create mode 100644 final/test/CodeGen/CellSPU/int2fp.ll
 create mode 100644 final/test/CodeGen/CellSPU/intrinsics_branch.ll
 create mode 100644 final/test/CodeGen/CellSPU/intrinsics_float.ll
 create mode 100644 final/test/CodeGen/CellSPU/intrinsics_logical.ll
 create mode 100644 final/test/CodeGen/CellSPU/jumptable.ll
 create mode 100644 final/test/CodeGen/CellSPU/loads.ll
 create mode 100644 final/test/CodeGen/CellSPU/mul-with-overflow.ll
 create mode 100644 final/test/CodeGen/CellSPU/mul_ops.ll
 create mode 100644 final/test/CodeGen/CellSPU/nand.ll
 create mode 100644 final/test/CodeGen/CellSPU/or_ops.ll
 create mode 100644 final/test/CodeGen/CellSPU/private.ll
 create mode 100644 final/test/CodeGen/CellSPU/rotate_ops.ll
 create mode 100644 final/test/CodeGen/CellSPU/select_bits.ll
 create mode 100644 final/test/CodeGen/CellSPU/sext128.ll
 create mode 100644 final/test/CodeGen/CellSPU/shift_ops.ll
 create mode 100644 final/test/CodeGen/CellSPU/shuffles.ll
 create mode 100644 final/test/CodeGen/CellSPU/sp_farith.ll
 create mode 100644 final/test/CodeGen/CellSPU/stores.ll
 create mode 100644 final/test/CodeGen/CellSPU/storestruct.ll
 create mode 100644 final/test/CodeGen/CellSPU/struct_1.ll
 create mode 100644 final/test/CodeGen/CellSPU/sub_ops.ll
 create mode 100644 final/test/CodeGen/CellSPU/trunc.ll
 create mode 100644 final/test/CodeGen/CellSPU/useful-harnesses/README.txt
 create mode 100644 final/test/CodeGen/CellSPU/useful-harnesses/i32operations.c
 create mode 100644 final/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
 create mode 100644 final/test/CodeGen/CellSPU/useful-harnesses/i64operations.h
 create mode 100644 final/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg
 create mode 100644 final/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c
 create mode 100644 final/test/CodeGen/CellSPU/v2f32.ll
 create mode 100644 final/test/CodeGen/CellSPU/v2i32.ll
 create mode 100644 final/test/CodeGen/CellSPU/vec_const.ll
 create mode 100644 final/test/CodeGen/CellSPU/vecinsert.ll
 create mode 100644 final/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll
 create mode 100644 final/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll
 create mode 100644 final/test/CodeGen/Generic/2003-05-27-phifcmpd.ll
 create mode 100644 final/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll
 create mode 100644 final/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll
 create mode 100644 final/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
 create mode 100644 final/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
 create mode 100644 final/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll
 create mode 100644 final/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
 create mode 100644 final/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
 create mode 100644 final/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
 create mode 100644 final/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
 create mode 100644 final/test/CodeGen/Generic/2004-02-08-UnwindSupport.ll
 create mode 100644 final/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
 create mode 100644 final/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll
 create mode 100644 final/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll
 create mode 100644 final/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
 create mode 100644 final/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll
 create mode 100644 final/test/CodeGen/Generic/2005-10-21-longlonggtu.ll
 create mode 100644 final/test/CodeGen/Generic/2005-12-01-Crash.ll
 create mode 100644 final/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll
 create mode 100644 final/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll
 create mode 100644 final/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll
 create mode 100644 final/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
 create mode 100644 final/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
 create mode 100644 final/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
 create mode 100644 final/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll
 create mode 100644 final/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
 create mode 100644 final/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll
 create mode 100644 final/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
 create mode 100644 final/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
 create mode 100644 final/test/CodeGen/Generic/2006-07-03-schedulers.ll
 create mode 100644 final/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll
 create mode 100644 final/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
 create mode 100644 final/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll
 create mode 100644 final/test/CodeGen/Generic/2006-10-27-CondFolding.ll
 create mode 100644 final/test/CodeGen/Generic/2006-10-29-Crash.ll
 create mode 100644 final/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
 create mode 100644 final/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
 create mode 100644 final/test/CodeGen/Generic/2007-02-25-invoke.ll
 create mode 100644 final/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll
 create mode 100644 final/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll
 create mode 100644 final/test/CodeGen/Generic/2007-04-17-lsr-crash.ll
 create mode 100644 final/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll
 create mode 100644 final/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll
 create mode 100644 final/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
 create mode 100644 final/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
 create mode 100644 final/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
 create mode 100644 final/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
 create mode 100644 final/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll
 create mode 100644 final/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
 create mode 100644 final/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
 create mode 100644 final/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
 create mode 100644 final/test/CodeGen/Generic/2008-02-04-Ctlz.ll
 create mode 100644 final/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
 create mode 100644 final/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
 create mode 100644 final/test/CodeGen/Generic/2008-02-25-NegateZero.ll
 create mode 100644 final/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
 create mode 100644 final/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
 create mode 100644 final/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
 create mode 100644 final/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
 create mode 100644 final/test/CodeGen/Generic/2009-04-10-SinkCrash.ll
 create mode 100644 final/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
 create mode 100644 final/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
 create mode 100644 final/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
 create mode 100644 final/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll
 create mode 100644 final/test/CodeGen/Generic/2010-11-04-BigByval.ll
 create mode 100644 final/test/CodeGen/Generic/2010-ZeroSizedArg.ll
 create mode 100644 final/test/CodeGen/Generic/2011-01-06-BigNumberCrash.ll
 create mode 100644 final/test/CodeGen/Generic/2011-02-12-shuffle.ll
 create mode 100644 final/test/CodeGen/Generic/APIntLoadStore.ll
 create mode 100644 final/test/CodeGen/Generic/APIntParam.ll
 create mode 100644 final/test/CodeGen/Generic/APIntSextParam.ll
 create mode 100644 final/test/CodeGen/Generic/APIntZextParam.ll
 create mode 100644 final/test/CodeGen/Generic/BasicInstrs.ll
 create mode 100644 final/test/CodeGen/Generic/BurgBadRegAlloc.ll
 create mode 100644 final/test/CodeGen/Generic/ConstantExprLowering.ll
 create mode 100644 final/test/CodeGen/Generic/Makefile
 create mode 100644 final/test/CodeGen/Generic/add-with-overflow-128.ll
 create mode 100644 final/test/CodeGen/Generic/add-with-overflow-24.ll
 create mode 100644 final/test/CodeGen/Generic/add-with-overflow.ll
 create mode 100644 final/test/CodeGen/Generic/addr-label.ll
 create mode 100644 final/test/CodeGen/Generic/asm-large-immediate.ll
 create mode 100644 final/test/CodeGen/Generic/badCallArgLRLLVM.ll
 create mode 100644 final/test/CodeGen/Generic/badFoldGEP.ll
 create mode 100644 final/test/CodeGen/Generic/badarg6.ll
 create mode 100644 final/test/CodeGen/Generic/badlive.ll
 create mode 100644 final/test/CodeGen/Generic/bool-to-double.ll
 create mode 100644 final/test/CodeGen/Generic/bool-vector.ll
 create mode 100644 final/test/CodeGen/Generic/call-ret0.ll
 create mode 100644 final/test/CodeGen/Generic/call-ret42.ll
 create mode 100644 final/test/CodeGen/Generic/call-void.ll
 create mode 100644 final/test/CodeGen/Generic/call2-ret0.ll
 create mode 100644 final/test/CodeGen/Generic/cast-fp.ll
 create mode 100644 final/test/CodeGen/Generic/constindices.ll
 create mode 100644 final/test/CodeGen/Generic/crash.ll
 create mode 100644 final/test/CodeGen/Generic/dbg_value.ll
 create mode 100644 final/test/CodeGen/Generic/dg.exp
 create mode 100644 final/test/CodeGen/Generic/div-neg-power-2.ll
 create mode 100644 final/test/CodeGen/Generic/empty-load-store.ll
 create mode 100644 final/test/CodeGen/Generic/externally_available.ll
 create mode 100644 final/test/CodeGen/Generic/fastcall.ll
 create mode 100644 final/test/CodeGen/Generic/fneg-fabs.ll
 create mode 100644 final/test/CodeGen/Generic/fp-to-int-invalid.ll
 create mode 100644 final/test/CodeGen/Generic/fp_to_int.ll
 create mode 100644 final/test/CodeGen/Generic/fpowi-promote.ll
 create mode 100644 final/test/CodeGen/Generic/fwdtwice.ll
 create mode 100644 final/test/CodeGen/Generic/getresult-undef.ll
 create mode 100644 final/test/CodeGen/Generic/global-ret0.ll
 create mode 100644 final/test/CodeGen/Generic/hello.ll
 create mode 100644 final/test/CodeGen/Generic/i128-addsub.ll
 create mode 100644 final/test/CodeGen/Generic/i128-arith.ll
 create mode 100644 final/test/CodeGen/Generic/inline-asm-special-strings.ll
 create mode 100644 final/test/CodeGen/Generic/intrinsics.ll
 create mode 100644 final/test/CodeGen/Generic/invalid-memcpy.ll
 create mode 100644 final/test/CodeGen/Generic/isunord.ll
 create mode 100644 final/test/CodeGen/Generic/legalize-dbg-value.ll
 create mode 100644 final/test/CodeGen/Generic/llvm-ct-intrinsics.ll
 create mode 100644 final/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
 create mode 100644 final/test/CodeGen/Generic/negintconst.ll
 create mode 100644 final/test/CodeGen/Generic/nested-select.ll
 create mode 100644 final/test/CodeGen/Generic/overflow.ll
 create mode 100644 final/test/CodeGen/Generic/pr2625.ll
 create mode 100644 final/test/CodeGen/Generic/pr3288.ll
 create mode 100644 final/test/CodeGen/Generic/print-add.ll
 create mode 100644 final/test/CodeGen/Generic/print-arith-fp.ll
 create mode 100644 final/test/CodeGen/Generic/print-arith-int.ll
 create mode 100644 final/test/CodeGen/Generic/print-int.ll
 create mode 100644 final/test/CodeGen/Generic/print-mul-exp.ll
 create mode 100644 final/test/CodeGen/Generic/print-mul.ll
 create mode 100644 final/test/CodeGen/Generic/print-shift.ll
 create mode 100644 final/test/CodeGen/Generic/ret0.ll
 create mode 100644 final/test/CodeGen/Generic/ret42.ll
 create mode 100644 final/test/CodeGen/Generic/select-cc.ll
 create mode 100644 final/test/CodeGen/Generic/select.ll
 create mode 100644 final/test/CodeGen/Generic/shift-int64.ll
 create mode 100644 final/test/CodeGen/Generic/spillccr.ll
 create mode 100644 final/test/CodeGen/Generic/stacksave-restore.ll
 create mode 100644 final/test/CodeGen/Generic/storetrunc-fp.ll
 create mode 100644 final/test/CodeGen/Generic/switch-lower-feature.ll
 create mode 100644 final/test/CodeGen/Generic/switch-lower.ll
 create mode 100644 final/test/CodeGen/Generic/trap.ll
 create mode 100644 final/test/CodeGen/Generic/v-split.ll
 create mode 100644 final/test/CodeGen/Generic/vector-casts.ll
 create mode 100644 final/test/CodeGen/Generic/vector-constantexpr.ll
 create mode 100644 final/test/CodeGen/Generic/vector-identity-shuffle.ll
 create mode 100644 final/test/CodeGen/Generic/vector.ll
 create mode 100644 final/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
 create mode 100644 final/test/CodeGen/MBlaze/brind.ll
 create mode 100644 final/test/CodeGen/MBlaze/callind.ll
 create mode 100644 final/test/CodeGen/MBlaze/cc.ll
 create mode 100644 final/test/CodeGen/MBlaze/dg.exp
 create mode 100644 final/test/CodeGen/MBlaze/div.ll
 create mode 100644 final/test/CodeGen/MBlaze/fpu.ll
 create mode 100644 final/test/CodeGen/MBlaze/fsl.ll
 create mode 100644 final/test/CodeGen/MBlaze/imm.ll
 create mode 100644 final/test/CodeGen/MBlaze/intr.ll
 create mode 100644 final/test/CodeGen/MBlaze/jumptable.ll
 create mode 100644 final/test/CodeGen/MBlaze/loop.ll
 create mode 100644 final/test/CodeGen/MBlaze/mul.ll
 create mode 100644 final/test/CodeGen/MBlaze/mul64.ll
 create mode 100644 final/test/CodeGen/MBlaze/select.ll
 create mode 100644 final/test/CodeGen/MBlaze/shift.ll
 create mode 100644 final/test/CodeGen/MBlaze/svol.ll
 create mode 100644 final/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
 create mode 100644 final/test/CodeGen/MSP430/2009-05-17-Rot.ll
 create mode 100644 final/test/CodeGen/MSP430/2009-05-17-Shift.ll
 create mode 100644 final/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
 create mode 100644 final/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
 create mode 100644 final/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
 create mode 100644 final/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
 create mode 100644 final/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll
 create mode 100644 final/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll
 create mode 100644 final/test/CodeGen/MSP430/2009-11-20-NewNode.ll
 create mode 100644 final/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll
 create mode 100644 final/test/CodeGen/MSP430/2009-12-22-InlineAsm.ll
 create mode 100644 final/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
 create mode 100644 final/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll
 create mode 100644 final/test/CodeGen/MSP430/AddrMode-bis-rx.ll
 create mode 100644 final/test/CodeGen/MSP430/AddrMode-bis-xr.ll
 create mode 100644 final/test/CodeGen/MSP430/AddrMode-mov-rx.ll
 create mode 100644 final/test/CodeGen/MSP430/AddrMode-mov-xr.ll
 create mode 100644 final/test/CodeGen/MSP430/Inst16mi.ll
 create mode 100644 final/test/CodeGen/MSP430/Inst16mm.ll
 create mode 100644 final/test/CodeGen/MSP430/Inst16mr.ll
 create mode 100644 final/test/CodeGen/MSP430/Inst16ri.ll
 create mode 100644 final/test/CodeGen/MSP430/Inst16rm.ll
 create mode 100644 final/test/CodeGen/MSP430/Inst16rr.ll
 create mode 100644 final/test/CodeGen/MSP430/Inst8mi.ll
 create mode 100644 final/test/CodeGen/MSP430/Inst8mm.ll
 create mode 100644 final/test/CodeGen/MSP430/Inst8mr.ll
 create mode 100644 final/test/CodeGen/MSP430/Inst8ri.ll
 create mode 100644 final/test/CodeGen/MSP430/Inst8rm.ll
 create mode 100644 final/test/CodeGen/MSP430/Inst8rr.ll
 create mode 100644 final/test/CodeGen/MSP430/bit.ll
 create mode 100644 final/test/CodeGen/MSP430/dg.exp
 create mode 100644 final/test/CodeGen/MSP430/indirectbr.ll
 create mode 100644 final/test/CodeGen/MSP430/indirectbr2.ll
 create mode 100644 final/test/CodeGen/MSP430/inline-asm.ll
 create mode 100644 final/test/CodeGen/MSP430/mult-alt-generic-msp430.ll
 create mode 100644 final/test/CodeGen/MSP430/postinc.ll
 create mode 100644 final/test/CodeGen/MSP430/setcc.ll
 create mode 100644 final/test/CodeGen/MSP430/shifts.ll
 create mode 100644 final/test/CodeGen/Mips/2008-06-05-Carry.ll
 create mode 100644 final/test/CodeGen/Mips/2008-07-03-SRet.ll
 create mode 100644 final/test/CodeGen/Mips/2008-07-05-ByVal.ll
 create mode 100644 final/test/CodeGen/Mips/2008-07-06-fadd64.ll
 create mode 100644 final/test/CodeGen/Mips/2008-07-07-FPExtend.ll
 create mode 100644 final/test/CodeGen/Mips/2008-07-07-Float2Int.ll
 create mode 100644 final/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll
 create mode 100644 final/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
 create mode 100644 final/test/CodeGen/Mips/2008-07-15-SmallSection.ll
 create mode 100644 final/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
 create mode 100644 final/test/CodeGen/Mips/2008-07-22-Cstpool.ll
 create mode 100644 final/test/CodeGen/Mips/2008-07-23-fpcmp.ll
 create mode 100644 final/test/CodeGen/Mips/2008-07-29-icmp.ll
 create mode 100644 final/test/CodeGen/Mips/2008-07-31-fcopysign.ll
 create mode 100644 final/test/CodeGen/Mips/2008-08-01-AsmInline.ll
 create mode 100644 final/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
 create mode 100644 final/test/CodeGen/Mips/2008-08-03-fabs64.ll
 create mode 100644 final/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
 create mode 100644 final/test/CodeGen/Mips/2008-08-06-Alloca.ll
 create mode 100644 final/test/CodeGen/Mips/2008-08-07-CC.ll
 create mode 100644 final/test/CodeGen/Mips/2008-08-07-FPRound.ll
 create mode 100644 final/test/CodeGen/Mips/2008-08-08-bswap.ll
 create mode 100644 final/test/CodeGen/Mips/2008-08-08-ctlz.ll
 create mode 100644 final/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
 create mode 100644 final/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
 create mode 100644 final/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
 create mode 100644 final/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
 create mode 100644 final/test/CodeGen/Mips/2010-07-20-Select.ll
 create mode 100644 final/test/CodeGen/Mips/2010-07-20-Switch.ll
 create mode 100644 final/test/CodeGen/Mips/2010-11-09-CountLeading.ll
 create mode 100644 final/test/CodeGen/Mips/2010-11-09-Mul.ll
 create mode 100644 final/test/CodeGen/Mips/blockaddr.ll
 create mode 100755 final/test/CodeGen/Mips/cmov.ll
 create mode 100644 final/test/CodeGen/Mips/dg.exp
 create mode 100644 final/test/CodeGen/Mips/divrem.ll
 create mode 100644 final/test/CodeGen/Mips/largeimm1.ll
 create mode 100644 final/test/CodeGen/Mips/madd-msub.ll
 create mode 100644 final/test/CodeGen/Mips/o32_cc.ll
 create mode 100644 final/test/CodeGen/Mips/private.ll
 create mode 100644 final/test/CodeGen/Mips/rotate.ll
 create mode 100644 final/test/CodeGen/PTX/add.ll
 create mode 100644 final/test/CodeGen/PTX/dg.exp
 create mode 100644 final/test/CodeGen/PTX/exit.ll
 create mode 100644 final/test/CodeGen/PTX/intrinsic.ll
 create mode 100644 final/test/CodeGen/PTX/ld.ll
 create mode 100644 final/test/CodeGen/PTX/mov.ll
 create mode 100644 final/test/CodeGen/PTX/mul.ll
 create mode 100644 final/test/CodeGen/PTX/options.ll
 create mode 100644 final/test/CodeGen/PTX/ret.ll
 create mode 100644 final/test/CodeGen/PTX/shl.ll
 create mode 100644 final/test/CodeGen/PTX/shr.ll
 create mode 100644 final/test/CodeGen/PTX/st.ll
 create mode 100644 final/test/CodeGen/PTX/sub.ll
 create mode 100644 final/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll
 create mode 100644 final/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll
 create mode 100644 final/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
 create mode 100644 final/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll
 create mode 100644 final/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-08-11-RetVector.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-09-28-shift_64.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
 create mode 100644 final/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
 create mode 100644 final/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll
 create mode 100644 final/test/CodeGen/PowerPC/2008-12-12-EH.ll
 create mode 100644 final/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll
 create mode 100644 final/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll
 create mode 100644 final/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll
 create mode 100644 final/test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll
 create mode 100644 final/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
 create mode 100644 final/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll
 create mode 100644 final/test/CodeGen/PowerPC/2009-09-18-carrybit.ll
 create mode 100644 final/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll
 create mode 100644 final/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll
 create mode 100644 final/test/CodeGen/PowerPC/2009-11-25-ImpDefBug.ll
 create mode 100644 final/test/CodeGen/PowerPC/2010-02-04-EmptyGlobal.ll
 create mode 100644 final/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
 create mode 100644 final/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
 create mode 100644 final/test/CodeGen/PowerPC/2010-04-01-MachineCSEBug.ll
 create mode 100644 final/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
 create mode 100644 final/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
 create mode 100644 final/test/CodeGen/PowerPC/2010-10-11-Fast-Varargs.ll
 create mode 100644 final/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
 create mode 100644 final/test/CodeGen/PowerPC/Atomics-32.ll
 create mode 100644 final/test/CodeGen/PowerPC/Atomics-64.ll
 create mode 100644 final/test/CodeGen/PowerPC/Frames-alloca.ll
 create mode 100644 final/test/CodeGen/PowerPC/Frames-large.ll
 create mode 100644 final/test/CodeGen/PowerPC/Frames-leaf.ll
 create mode 100644 final/test/CodeGen/PowerPC/Frames-small.ll
 create mode 100644 final/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
 create mode 100644 final/test/CodeGen/PowerPC/addc.ll
 create mode 100644 final/test/CodeGen/PowerPC/addi-reassoc.ll
 create mode 100644 final/test/CodeGen/PowerPC/align.ll
 create mode 100644 final/test/CodeGen/PowerPC/and-branch.ll
 create mode 100644 final/test/CodeGen/PowerPC/and-elim.ll
 create mode 100644 final/test/CodeGen/PowerPC/and-imm.ll
 create mode 100644 final/test/CodeGen/PowerPC/and_add.ll
 create mode 100644 final/test/CodeGen/PowerPC/and_sext.ll
 create mode 100644 final/test/CodeGen/PowerPC/and_sra.ll
 create mode 100644 final/test/CodeGen/PowerPC/atomic-1.ll
 create mode 100644 final/test/CodeGen/PowerPC/atomic-2.ll
 create mode 100644 final/test/CodeGen/PowerPC/available-externally.ll
 create mode 100644 final/test/CodeGen/PowerPC/big-endian-actual-args.ll
 create mode 100644 final/test/CodeGen/PowerPC/big-endian-call-result.ll
 create mode 100644 final/test/CodeGen/PowerPC/big-endian-formal-args.ll
 create mode 100644 final/test/CodeGen/PowerPC/branch-opt.ll
 create mode 100644 final/test/CodeGen/PowerPC/bswap-load-store.ll
 create mode 100644 final/test/CodeGen/PowerPC/buildvec_canonicalize.ll
 create mode 100644 final/test/CodeGen/PowerPC/calls.ll
 create mode 100644 final/test/CodeGen/PowerPC/cmp-cmp.ll
 create mode 100644 final/test/CodeGen/PowerPC/compare-duplicate.ll
 create mode 100644 final/test/CodeGen/PowerPC/compare-simm.ll
 create mode 100644 final/test/CodeGen/PowerPC/constants.ll
 create mode 100644 final/test/CodeGen/PowerPC/cr_spilling.ll
 create mode 100644 final/test/CodeGen/PowerPC/cttz.ll
 create mode 100644 final/test/CodeGen/PowerPC/darwin-labels.ll
 create mode 100644 final/test/CodeGen/PowerPC/delete-node.ll
 create mode 100644 final/test/CodeGen/PowerPC/dg.exp
 create mode 100644 final/test/CodeGen/PowerPC/div-2.ll
 create mode 100644 final/test/CodeGen/PowerPC/empty-functions.ll
 create mode 100644 final/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
 create mode 100644 final/test/CodeGen/PowerPC/extsh.ll
 create mode 100644 final/test/CodeGen/PowerPC/fabs.ll
 create mode 100644 final/test/CodeGen/PowerPC/fma.ll
 create mode 100644 final/test/CodeGen/PowerPC/fnabs.ll
 create mode 100644 final/test/CodeGen/PowerPC/fneg.ll
 create mode 100644 final/test/CodeGen/PowerPC/fold-li.ll
 create mode 100644 final/test/CodeGen/PowerPC/fp-branch.ll
 create mode 100644 final/test/CodeGen/PowerPC/fp-int-fp.ll
 create mode 100644 final/test/CodeGen/PowerPC/fp_to_uint.ll
 create mode 100644 final/test/CodeGen/PowerPC/fpcopy.ll
 create mode 100644 final/test/CodeGen/PowerPC/frounds.ll
 create mode 100644 final/test/CodeGen/PowerPC/fsqrt.ll
 create mode 100644 final/test/CodeGen/PowerPC/hello.ll
 create mode 100644 final/test/CodeGen/PowerPC/hidden-vis-2.ll
 create mode 100644 final/test/CodeGen/PowerPC/hidden-vis.ll
 create mode 100644 final/test/CodeGen/PowerPC/i128-and-beyond.ll
 create mode 100644 final/test/CodeGen/PowerPC/i64_fp.ll
 create mode 100644 final/test/CodeGen/PowerPC/iabs.ll
 create mode 100644 final/test/CodeGen/PowerPC/illegal-element-type.ll
 create mode 100644 final/test/CodeGen/PowerPC/indirectbr.ll
 create mode 100644 final/test/CodeGen/PowerPC/inlineasm-copy.ll
 create mode 100644 final/test/CodeGen/PowerPC/int-fp-conv-0.ll
 create mode 100644 final/test/CodeGen/PowerPC/int-fp-conv-1.ll
 create mode 100644 final/test/CodeGen/PowerPC/invalid-memcpy.ll
 create mode 100644 final/test/CodeGen/PowerPC/inverted-bool-compares.ll
 create mode 100644 final/test/CodeGen/PowerPC/ispositive.ll
 create mode 100644 final/test/CodeGen/PowerPC/itofp128.ll
 create mode 100644 final/test/CodeGen/PowerPC/lha.ll
 create mode 100644 final/test/CodeGen/PowerPC/load-constant-addr.ll
 create mode 100644 final/test/CodeGen/PowerPC/long-compare.ll
 create mode 100644 final/test/CodeGen/PowerPC/longdbl-truncate.ll
 create mode 100644 final/test/CodeGen/PowerPC/lsr-postinc-pos.ll
 create mode 100644 final/test/CodeGen/PowerPC/mask64.ll
 create mode 100644 final/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
 create mode 100644 final/test/CodeGen/PowerPC/mem_update.ll
 create mode 100644 final/test/CodeGen/PowerPC/mul-neg-power-2.ll
 create mode 100644 final/test/CodeGen/PowerPC/mul-with-overflow.ll
 create mode 100644 final/test/CodeGen/PowerPC/mulhs.ll
 create mode 100644 final/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
 create mode 100644 final/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
 create mode 100644 final/test/CodeGen/PowerPC/multiple-return-values.ll
 create mode 100644 final/test/CodeGen/PowerPC/neg.ll
 create mode 100644 final/test/CodeGen/PowerPC/no-dead-strip.ll
 create mode 100644 final/test/CodeGen/PowerPC/or-addressing-mode.ll
 create mode 100644 final/test/CodeGen/PowerPC/ppc-prologue.ll
 create mode 100644 final/test/CodeGen/PowerPC/ppcf128-1-opt.ll
 create mode 100644 final/test/CodeGen/PowerPC/ppcf128-1.ll
 create mode 100644 final/test/CodeGen/PowerPC/ppcf128-2.ll
 create mode 100644 final/test/CodeGen/PowerPC/ppcf128-3.ll
 create mode 100644 final/test/CodeGen/PowerPC/ppcf128-4.ll
 create mode 100644 final/test/CodeGen/PowerPC/pr3711_widen_bit.ll
 create mode 100644 final/test/CodeGen/PowerPC/private.ll
 create mode 100644 final/test/CodeGen/PowerPC/reg-coalesce-simple.ll
 create mode 100644 final/test/CodeGen/PowerPC/retaddr.ll
 create mode 100644 final/test/CodeGen/PowerPC/return-val-i128.ll
 create mode 100644 final/test/CodeGen/PowerPC/rlwimi-commute.ll
 create mode 100644 final/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll
 create mode 100644 final/test/CodeGen/PowerPC/rlwimi.ll
 create mode 100644 final/test/CodeGen/PowerPC/rlwimi2.ll
 create mode 100644 final/test/CodeGen/PowerPC/rlwimi3.ll
 create mode 100644 final/test/CodeGen/PowerPC/rlwinm.ll
 create mode 100644 final/test/CodeGen/PowerPC/rlwinm2.ll
 create mode 100644 final/test/CodeGen/PowerPC/rotl-2.ll
 create mode 100644 final/test/CodeGen/PowerPC/rotl-64.ll
 create mode 100644 final/test/CodeGen/PowerPC/rotl.ll
 create mode 100644 final/test/CodeGen/PowerPC/sections.ll
 create mode 100644 final/test/CodeGen/PowerPC/select-cc.ll
 create mode 100644 final/test/CodeGen/PowerPC/select_lt0.ll
 create mode 100644 final/test/CodeGen/PowerPC/setcc_no_zext.ll
 create mode 100644 final/test/CodeGen/PowerPC/seteq-0.ll
 create mode 100644 final/test/CodeGen/PowerPC/shift128.ll
 create mode 100644 final/test/CodeGen/PowerPC/shl_elim.ll
 create mode 100644 final/test/CodeGen/PowerPC/shl_sext.ll
 create mode 100644 final/test/CodeGen/PowerPC/sign_ext_inreg1.ll
 create mode 100644 final/test/CodeGen/PowerPC/small-arguments.ll
 create mode 100644 final/test/CodeGen/PowerPC/stack-protector.ll
 create mode 100644 final/test/CodeGen/PowerPC/stfiwx-2.ll
 create mode 100644 final/test/CodeGen/PowerPC/stfiwx.ll
 create mode 100644 final/test/CodeGen/PowerPC/store-load-fwd.ll
 create mode 100644 final/test/CodeGen/PowerPC/stubs.ll
 create mode 100644 final/test/CodeGen/PowerPC/subc.ll
 create mode 100644 final/test/CodeGen/PowerPC/tailcall1-64.ll
 create mode 100644 final/test/CodeGen/PowerPC/tailcall1.ll
 create mode 100644 final/test/CodeGen/PowerPC/tailcallpic1.ll
 create mode 100644 final/test/CodeGen/PowerPC/trampoline.ll
 create mode 100644 final/test/CodeGen/PowerPC/unsafe-math.ll
 create mode 100644 final/test/CodeGen/PowerPC/varargs.ll
 create mode 100644 final/test/CodeGen/PowerPC/vcmp-fold.ll
 create mode 100644 final/test/CodeGen/PowerPC/vec_auto_constant.ll
 create mode 100644 final/test/CodeGen/PowerPC/vec_br_cmp.ll
 create mode 100644 final/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll
 create mode 100644 final/test/CodeGen/PowerPC/vec_call.ll
 create mode 100644 final/test/CodeGen/PowerPC/vec_constants.ll
 create mode 100644 final/test/CodeGen/PowerPC/vec_fneg.ll
 create mode 100644 final/test/CodeGen/PowerPC/vec_insert.ll
 create mode 100644 final/test/CodeGen/PowerPC/vec_misaligned.ll
 create mode 100644 final/test/CodeGen/PowerPC/vec_mul.ll
 create mode 100644 final/test/CodeGen/PowerPC/vec_perf_shuffle.ll
 create mode 100644 final/test/CodeGen/PowerPC/vec_shift.ll
 create mode 100644 final/test/CodeGen/PowerPC/vec_shuffle.ll
 create mode 100644 final/test/CodeGen/PowerPC/vec_splat.ll
 create mode 100644 final/test/CodeGen/PowerPC/vec_splat_constant.ll
 create mode 100644 final/test/CodeGen/PowerPC/vec_vrsave.ll
 create mode 100644 final/test/CodeGen/PowerPC/vec_zero.ll
 create mode 100644 final/test/CodeGen/PowerPC/vector-identity-shuffle.ll
 create mode 100644 final/test/CodeGen/PowerPC/vector.ll
 create mode 100644 final/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
 create mode 100644 final/test/CodeGen/SPARC/2007-05-09-JumpTables.ll
 create mode 100644 final/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll
 create mode 100644 final/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
 create mode 100644 final/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
 create mode 100644 final/test/CodeGen/SPARC/2009-08-28-PIC.ll
 create mode 100644 final/test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll
 create mode 100644 final/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
 create mode 100755 final/test/CodeGen/SPARC/2011-01-11-CC.ll
 create mode 100644 final/test/CodeGen/SPARC/2011-01-11-Call.ll
 create mode 100644 final/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
 create mode 100644 final/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
 create mode 100644 final/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll
 create mode 100644 final/test/CodeGen/SPARC/2011-01-22-SRet.ll
 create mode 100644 final/test/CodeGen/SPARC/basictest.ll
 create mode 100644 final/test/CodeGen/SPARC/ctpop.ll
 create mode 100644 final/test/CodeGen/SPARC/dg.exp
 create mode 100644 final/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
 create mode 100644 final/test/CodeGen/SPARC/private.ll
 create mode 100644 final/test/CodeGen/SystemZ/00-RetVoid.ll
 create mode 100644 final/test/CodeGen/SystemZ/01-RetArg.ll
 create mode 100644 final/test/CodeGen/SystemZ/01-RetImm.ll
 create mode 100644 final/test/CodeGen/SystemZ/02-MemArith.ll
 create mode 100644 final/test/CodeGen/SystemZ/02-RetAdd.ll
 create mode 100644 final/test/CodeGen/SystemZ/02-RetAddImm.ll
 create mode 100644 final/test/CodeGen/SystemZ/02-RetAnd.ll
 create mode 100644 final/test/CodeGen/SystemZ/02-RetAndImm.ll
 create mode 100644 final/test/CodeGen/SystemZ/02-RetNeg.ll
 create mode 100644 final/test/CodeGen/SystemZ/02-RetOr.ll
 create mode 100644 final/test/CodeGen/SystemZ/02-RetOrImm.ll
 create mode 100644 final/test/CodeGen/SystemZ/02-RetSub.ll
 create mode 100644 final/test/CodeGen/SystemZ/02-RetSubImm.ll
 create mode 100644 final/test/CodeGen/SystemZ/02-RetXor.ll
 create mode 100644 final/test/CodeGen/SystemZ/02-RetXorImm.ll
 create mode 100644 final/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
 create mode 100644 final/test/CodeGen/SystemZ/03-RetAddSubreg.ll
 create mode 100644 final/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
 create mode 100644 final/test/CodeGen/SystemZ/03-RetAndSubreg.ll
 create mode 100644 final/test/CodeGen/SystemZ/03-RetArgSubreg.ll
 create mode 100644 final/test/CodeGen/SystemZ/03-RetImmSubreg.ll
 create mode 100644 final/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll
 create mode 100644 final/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
 create mode 100644 final/test/CodeGen/SystemZ/03-RetOrSubreg.ll
 create mode 100644 final/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
 create mode 100644 final/test/CodeGen/SystemZ/03-RetSubSubreg.ll
 create mode 100644 final/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
 create mode 100644 final/test/CodeGen/SystemZ/03-RetXorSubreg.ll
 create mode 100644 final/test/CodeGen/SystemZ/04-RetShifts.ll
 create mode 100644 final/test/CodeGen/SystemZ/05-LoadAddr.ll
 create mode 100644 final/test/CodeGen/SystemZ/05-MemImmStores.ll
 create mode 100644 final/test/CodeGen/SystemZ/05-MemLoadsStores.ll
 create mode 100644 final/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
 create mode 100644 final/test/CodeGen/SystemZ/05-MemRegLoads.ll
 create mode 100644 final/test/CodeGen/SystemZ/05-MemRegStores.ll
 create mode 100644 final/test/CodeGen/SystemZ/06-CallViaStack.ll
 create mode 100644 final/test/CodeGen/SystemZ/06-FrameIdxLoad.ll
 create mode 100644 final/test/CodeGen/SystemZ/06-LocalFrame.ll
 create mode 100644 final/test/CodeGen/SystemZ/06-SimpleCall.ll
 create mode 100644 final/test/CodeGen/SystemZ/07-BrCond.ll
 create mode 100644 final/test/CodeGen/SystemZ/07-BrCond32.ll
 create mode 100644 final/test/CodeGen/SystemZ/07-BrUnCond.ll
 create mode 100644 final/test/CodeGen/SystemZ/07-CmpImm.ll
 create mode 100644 final/test/CodeGen/SystemZ/07-CmpImm32.ll
 create mode 100644 final/test/CodeGen/SystemZ/07-SelectCC.ll
 create mode 100644 final/test/CodeGen/SystemZ/08-DivRem.ll
 create mode 100644 final/test/CodeGen/SystemZ/08-DivRemMemOp.ll
 create mode 100644 final/test/CodeGen/SystemZ/08-SimpleMuls.ll
 create mode 100644 final/test/CodeGen/SystemZ/09-DynamicAlloca.ll
 create mode 100644 final/test/CodeGen/SystemZ/09-Globals.ll
 create mode 100644 final/test/CodeGen/SystemZ/09-Switches.ll
 create mode 100644 final/test/CodeGen/SystemZ/10-FuncsPic.ll
 create mode 100644 final/test/CodeGen/SystemZ/10-GlobalsPic.ll
 create mode 100644 final/test/CodeGen/SystemZ/11-BSwap.ll
 create mode 100644 final/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll
 create mode 100644 final/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll
 create mode 100644 final/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
 create mode 100644 final/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll
 create mode 100644 final/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
 create mode 100644 final/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
 create mode 100644 final/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
 create mode 100644 final/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
 create mode 100644 final/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
 create mode 100644 final/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll
 create mode 100644 final/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll
 create mode 100644 final/test/CodeGen/SystemZ/2010-01-04-DivMem.ll
 create mode 100644 final/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
 create mode 100644 final/test/CodeGen/SystemZ/dg.exp
 create mode 100644 final/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
 create mode 100644 final/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll
 create mode 100644 final/test/CodeGen/Thumb/2007-03-06-AddR7.ll
 create mode 100644 final/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
 create mode 100644 final/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
 create mode 100644 final/test/CodeGen/Thumb/2009-07-19-SPDecBug.ll
 create mode 100644 final/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll
 create mode 100644 final/test/CodeGen/Thumb/2009-07-27-PEIAssert.ll
 create mode 100644 final/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll
 create mode 100644 final/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll
 create mode 100644 final/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
 create mode 100644 final/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll
 create mode 100644 final/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll
 create mode 100644 final/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
 create mode 100644 final/test/CodeGen/Thumb/2010-06-18-SibCallCrash.ll
 create mode 100644 final/test/CodeGen/Thumb/2010-07-01-FuncAlign.ll
 create mode 100644 final/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
 create mode 100644 final/test/CodeGen/Thumb/2011-EpilogueBug.ll
 create mode 100644 final/test/CodeGen/Thumb/asmprinter-bug.ll
 create mode 100644 final/test/CodeGen/Thumb/barrier.ll
 create mode 100644 final/test/CodeGen/Thumb/dg.exp
 create mode 100644 final/test/CodeGen/Thumb/dyn-stackalloc.ll
 create mode 100644 final/test/CodeGen/Thumb/fpconv.ll
 create mode 100644 final/test/CodeGen/Thumb/fpow.ll
 create mode 100644 final/test/CodeGen/Thumb/frame_thumb.ll
 create mode 100644 final/test/CodeGen/Thumb/iabs.ll
 create mode 100644 final/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
 create mode 100644 final/test/CodeGen/Thumb/ispositive.ll
 create mode 100644 final/test/CodeGen/Thumb/large-stack.ll
 create mode 100644 final/test/CodeGen/Thumb/ldr_ext.ll
 create mode 100644 final/test/CodeGen/Thumb/ldr_frame.ll
 create mode 100644 final/test/CodeGen/Thumb/long-setcc.ll
 create mode 100644 final/test/CodeGen/Thumb/long.ll
 create mode 100644 final/test/CodeGen/Thumb/long_shift.ll
 create mode 100644 final/test/CodeGen/Thumb/mul.ll
 create mode 100644 final/test/CodeGen/Thumb/pop.ll
 create mode 100644 final/test/CodeGen/Thumb/push.ll
 create mode 100644 final/test/CodeGen/Thumb/select.ll
 create mode 100644 final/test/CodeGen/Thumb/stack-frame.ll
 create mode 100644 final/test/CodeGen/Thumb/thumb-imm.ll
 create mode 100644 final/test/CodeGen/Thumb/trap.ll
 create mode 100644 final/test/CodeGen/Thumb/tst_teq.ll
 create mode 100644 final/test/CodeGen/Thumb/unord.ll
 create mode 100644 final/test/CodeGen/Thumb/vargs.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-11-01-CopyReg2RegBug.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-11-13-STRDBug.ll
 create mode 100644 final/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
 create mode 100644 final/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
 create mode 100644 final/test/CodeGen/Thumb2/2010-01-19-RemovePredicates.ll
 create mode 100644 final/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
 create mode 100644 final/test/CodeGen/Thumb2/2010-02-24-BigStack.ll
 create mode 100644 final/test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll
 create mode 100644 final/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
 create mode 100644 final/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
 create mode 100644 final/test/CodeGen/Thumb2/2010-04-26-CopyRegCrash.ll
 create mode 100644 final/test/CodeGen/Thumb2/2010-05-24-rsbs.ll
 create mode 100644 final/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
 create mode 100644 final/test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll
 create mode 100644 final/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
 create mode 100644 final/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
 create mode 100644 final/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
 create mode 100644 final/test/CodeGen/Thumb2/2010-12-03-AddSPNarrowing.ll
 create mode 100644 final/test/CodeGen/Thumb2/bfi.ll
 create mode 100644 final/test/CodeGen/Thumb2/bfx.ll
 create mode 100644 final/test/CodeGen/Thumb2/buildvector-crash.ll
 create mode 100644 final/test/CodeGen/Thumb2/carry.ll
 create mode 100644 final/test/CodeGen/Thumb2/cortex-fp.ll
 create mode 100644 final/test/CodeGen/Thumb2/crash.ll
 create mode 100644 final/test/CodeGen/Thumb2/cross-rc-coalescing-1.ll
 create mode 100644 final/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
 create mode 100644 final/test/CodeGen/Thumb2/dg.exp
 create mode 100644 final/test/CodeGen/Thumb2/div.ll
 create mode 100644 final/test/CodeGen/Thumb2/frameless.ll
 create mode 100644 final/test/CodeGen/Thumb2/frameless2.ll
 create mode 100644 final/test/CodeGen/Thumb2/ifcvt-neon.ll
 create mode 100644 final/test/CodeGen/Thumb2/large-stack.ll
 create mode 100644 final/test/CodeGen/Thumb2/ldr-str-imm12.ll
 create mode 100644 final/test/CodeGen/Thumb2/lsr-deficiency.ll
 create mode 100644 final/test/CodeGen/Thumb2/machine-licm.ll
 create mode 100644 final/test/CodeGen/Thumb2/mul_const.ll
 create mode 100644 final/test/CodeGen/Thumb2/pic-load.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-adc.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-add.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-add2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-add3.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-add4.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-add5.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-add6.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-and.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-and2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-asr.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-asr2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-barrier.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-bcc.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-bfc.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-bic.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-branch.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-call-tc.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-call.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-cbnz.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-clz.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-cmn.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-cmn2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-cmp.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-cmp2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-eor.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-eor2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-jtb.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-ldm.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-ldr.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-ldr_post.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-ldrb.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-ldrd.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-ldrh.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-lsl.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-lsl2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-lsr.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-lsr2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-lsr3.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-mla.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-mls.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-mov.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-mul.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-mulhi.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-mvn.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-mvn2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-neg.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-orn.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-orn2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-orr.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-orr2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-pack.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-rev.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-rev16.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-ror.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-ror2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-rsb.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-rsb2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-sbc.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-select.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-select_xform.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-shifter.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-smla.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-smul.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-spill-q.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-str.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-str_post.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-str_pre.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-strb.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-strh.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-sub.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-sub2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-sub3.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-sub4.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-sub5.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-tbb.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-tbh.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-teq.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-teq2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-tst.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-tst2.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
 create mode 100644 final/test/CodeGen/Thumb2/thumb2-uxtb.ll
 create mode 100644 final/test/CodeGen/Thumb2/tls1.ll
 create mode 100644 final/test/CodeGen/Thumb2/tls2.ll
 create mode 100644 final/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
 create mode 100644 final/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll
 create mode 100644 final/test/CodeGen/X86/2003-11-03-GlobalBool.ll
 create mode 100644 final/test/CodeGen/X86/2004-02-12-Memcpy.ll
 create mode 100644 final/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
 create mode 100644 final/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll
 create mode 100644 final/test/CodeGen/X86/2004-02-22-Casts.ll
 create mode 100644 final/test/CodeGen/X86/2004-03-30-Select-Max.ll
 create mode 100644 final/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
 create mode 100644 final/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll
 create mode 100644 final/test/CodeGen/X86/2004-06-10-StackifierCrash.ll
 create mode 100644 final/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll
 create mode 100644 final/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
 create mode 100644 final/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
 create mode 100644 final/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
 create mode 100644 final/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
 create mode 100644 final/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
 create mode 100644 final/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
 create mode 100644 final/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
 create mode 100644 final/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
 create mode 100644 final/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
 create mode 100644 final/test/CodeGen/X86/2006-05-02-InstrSched1.ll
 create mode 100644 final/test/CodeGen/X86/2006-05-02-InstrSched2.ll
 create mode 100644 final/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
 create mode 100644 final/test/CodeGen/X86/2006-05-08-InstrSched.ll
 create mode 100644 final/test/CodeGen/X86/2006-05-11-InstrSched.ll
 create mode 100644 final/test/CodeGen/X86/2006-05-17-VectorArg.ll
 create mode 100644 final/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
 create mode 100644 final/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
 create mode 100644 final/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll
 create mode 100644 final/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll
 create mode 100644 final/test/CodeGen/X86/2006-07-19-ATTAsm.ll
 create mode 100644 final/test/CodeGen/X86/2006-07-20-InlineAsm.ll
 create mode 100644 final/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll
 create mode 100644 final/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
 create mode 100644 final/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
 create mode 100644 final/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
 create mode 100644 final/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
 create mode 100644 final/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
 create mode 100644 final/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
 create mode 100644 final/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
 create mode 100644 final/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
 create mode 100644 final/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
 create mode 100644 final/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
 create mode 100644 final/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
 create mode 100644 final/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
 create mode 100644 final/test/CodeGen/X86/2006-11-12-CSRetCC.ll
 create mode 100644 final/test/CodeGen/X86/2006-11-17-IllegalMove.ll
 create mode 100644 final/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
 create mode 100644 final/test/CodeGen/X86/2006-11-28-Memcpy.ll
 create mode 100644 final/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll
 create mode 100644 final/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
 create mode 100644 final/test/CodeGen/X86/2007-01-08-InstrSched.ll
 create mode 100644 final/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll
 create mode 100644 final/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
 create mode 100644 final/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll
 create mode 100644 final/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
 create mode 100644 final/test/CodeGen/X86/2007-02-16-BranchFold.ll
 create mode 100644 final/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
 create mode 100644 final/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll
 create mode 100644 final/test/CodeGen/X86/2007-02-25-FastCCStack.ll
 create mode 100644 final/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
 create mode 100644 final/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
 create mode 100644 final/test/CodeGen/X86/2007-03-16-InlineAsm.ll
 create mode 100644 final/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll
 create mode 100644 final/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll
 create mode 100644 final/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
 create mode 100644 final/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
 create mode 100644 final/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
 create mode 100644 final/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
 create mode 100644 final/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
 create mode 100644 final/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
 create mode 100644 final/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
 create mode 100644 final/test/CodeGen/X86/2007-04-24-VectorCrash.ll
 create mode 100644 final/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
 create mode 100644 final/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
 create mode 100644 final/test/CodeGen/X86/2007-05-05-Personality.ll
 create mode 100644 final/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
 create mode 100644 final/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
 create mode 100644 final/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
 create mode 100644 final/test/CodeGen/X86/2007-05-15-maskmovq.ll
 create mode 100644 final/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
 create mode 100644 final/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
 create mode 100644 final/test/CodeGen/X86/2007-06-04-tailmerge4.ll
 create mode 100644 final/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
 create mode 100644 final/test/CodeGen/X86/2007-06-15-IntToMMX.ll
 create mode 100644 final/test/CodeGen/X86/2007-06-28-X86-64-isel.ll
 create mode 100644 final/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll
 create mode 100644 final/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
 create mode 100644 final/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
 create mode 100644 final/test/CodeGen/X86/2007-07-10-StackerAssert.ll
 create mode 100644 final/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
 create mode 100644 final/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
 create mode 100644 final/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
 create mode 100644 final/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
 create mode 100644 final/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
 create mode 100644 final/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
 create mode 100644 final/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
 create mode 100644 final/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
 create mode 100644 final/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
 create mode 100644 final/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
 create mode 100644 final/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
 create mode 100644 final/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
 create mode 100644 final/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
 create mode 100644 final/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
 create mode 100644 final/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
 create mode 100644 final/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
 create mode 100644 final/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
 create mode 100644 final/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll
 create mode 100644 final/test/CodeGen/X86/2007-10-17-IllegalAsm.ll
 create mode 100644 final/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
 create mode 100644 final/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll
 create mode 100644 final/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
 create mode 100644 final/test/CodeGen/X86/2007-10-30-LSRCrash.ll
 create mode 100644 final/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
 create mode 100644 final/test/CodeGen/X86/2007-11-01-ISelCrash.ll
 create mode 100644 final/test/CodeGen/X86/2007-11-02-BadAsm.ll
 create mode 100644 final/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll
 create mode 100644 final/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
 create mode 100644 final/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll
 create mode 100644 final/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
 create mode 100644 final/test/CodeGen/X86/2007-11-06-InstrSched.ll
 create mode 100644 final/test/CodeGen/X86/2007-11-07-MulBy4.ll
 create mode 100644 final/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
 create mode 100644 final/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
 create mode 100644 final/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
 create mode 100644 final/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-01-08-IllegalCMP.ll
 create mode 100644 final/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
 create mode 100644 final/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
 create mode 100644 final/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
 create mode 100644 final/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
 create mode 100644 final/test/CodeGen/X86/2008-01-16-Trampoline.ll
 create mode 100644 final/test/CodeGen/X86/2008-02-05-ISelCrash.ll
 create mode 100644 final/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-02-14-BitMiscompile.ll
 create mode 100644 final/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
 create mode 100644 final/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-02-22-ReMatBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll
 create mode 100644 final/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-02-27-PEICrash.ll
 create mode 100644 final/test/CodeGen/X86/2008-03-06-frem-fpstack.ll
 create mode 100644 final/test/CodeGen/X86/2008-03-07-APIntBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
 create mode 100644 final/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
 create mode 100644 final/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
 create mode 100644 final/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
 create mode 100644 final/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
 create mode 100644 final/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-04-02-unnamedEH.ll
 create mode 100644 final/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
 create mode 100644 final/test/CodeGen/X86/2008-04-09-BranchFolding.ll
 create mode 100644 final/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-04-16-ReMatBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
 create mode 100644 final/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
 create mode 100644 final/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll
 create mode 100644 final/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
 create mode 100644 final/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
 create mode 100644 final/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
 create mode 100644 final/test/CodeGen/X86/2008-05-28-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
 create mode 100644 final/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
 create mode 100644 final/test/CodeGen/X86/2008-06-16-SubregsBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-06-18-BadShuffle.ll
 create mode 100644 final/test/CodeGen/X86/2008-06-25-VecISelBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
 create mode 100644 final/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll
 create mode 100644 final/test/CodeGen/X86/2008-07-11-SHLBy1.ll
 create mode 100644 final/test/CodeGen/X86/2008-07-11-SpillerBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
 create mode 100644 final/test/CodeGen/X86/2008-07-19-movups-spills.ll
 create mode 100644 final/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
 create mode 100644 final/test/CodeGen/X86/2008-07-23-VSetCC.ll
 create mode 100644 final/test/CodeGen/X86/2008-08-05-SpillerBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-08-06-CmpStride.ll
 create mode 100644 final/test/CodeGen/X86/2008-08-06-RewriterBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
 create mode 100644 final/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
 create mode 100644 final/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
 create mode 100644 final/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
 create mode 100644 final/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
 create mode 100644 final/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
 create mode 100644 final/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
 create mode 100644 final/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
 create mode 100644 final/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
 create mode 100644 final/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
 create mode 100644 final/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-09-29-ReMatBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-09-29-VolatileBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-10-02-Atomics32-2.ll
 create mode 100644 final/test/CodeGen/X86/2008-10-06-MMXISelBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll
 create mode 100644 final/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
 create mode 100644 final/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-10-11-CallCrash.ll
 create mode 100644 final/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
 create mode 100644 final/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll
 create mode 100644 final/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll
 create mode 100644 final/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
 create mode 100644 final/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-10-27-StackRealignment.ll
 create mode 100644 final/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll
 create mode 100644 final/test/CodeGen/X86/2008-11-03-F80VAARG.ll
 create mode 100644 final/test/CodeGen/X86/2008-11-06-testb.ll
 create mode 100644 final/test/CodeGen/X86/2008-11-13-inlineasm-3.ll
 create mode 100644 final/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
 create mode 100644 final/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
 create mode 100644 final/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
 create mode 100644 final/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
 create mode 100644 final/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
 create mode 100644 final/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
 create mode 100644 final/test/CodeGen/X86/2008-12-02-dagcombine-3.ll
 create mode 100644 final/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
 create mode 100644 final/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll
 create mode 100644 final/test/CodeGen/X86/2008-12-16-BadShift.ll
 create mode 100644 final/test/CodeGen/X86/2008-12-16-dagcombine-4.ll
 create mode 100644 final/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
 create mode 100644 final/test/CodeGen/X86/2008-12-22-dagcombine-5.ll
 create mode 100644 final/test/CodeGen/X86/2008-12-23-crazy-address.ll
 create mode 100644 final/test/CodeGen/X86/2008-12-23-dagcombine-6.ll
 create mode 100644 final/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
 create mode 100644 final/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-01-16-UIntToFP.ll
 create mode 100644 final/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
 create mode 100644 final/test/CodeGen/X86/2009-01-25-NoSSE.ll
 create mode 100644 final/test/CodeGen/X86/2009-01-26-WrongCheck.ll
 create mode 100644 final/test/CodeGen/X86/2009-01-27-NullStrings.ll
 create mode 100644 final/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-01-31-BigShift.ll
 create mode 100644 final/test/CodeGen/X86/2009-01-31-BigShift2.ll
 create mode 100644 final/test/CodeGen/X86/2009-01-31-BigShift3.ll
 create mode 100644 final/test/CodeGen/X86/2009-02-01-LargeMask.ll
 create mode 100644 final/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
 create mode 100644 final/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
 create mode 100644 final/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-02-08-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll
 create mode 100644 final/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
 create mode 100644 final/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
 create mode 100644 final/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
 create mode 100644 final/test/CodeGen/X86/2009-02-12-SpillerBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
 create mode 100644 final/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll
 create mode 100644 final/test/CodeGen/X86/2009-02-25-CommuteBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-03-03-BTHang.ll
 create mode 100644 final/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
 create mode 100644 final/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
 create mode 100644 final/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
 create mode 100644 final/test/CodeGen/X86/2009-03-09-APIntCrash.ll
 create mode 100644 final/test/CodeGen/X86/2009-03-09-SpillerBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
 create mode 100644 final/test/CodeGen/X86/2009-03-16-SpillerBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
 create mode 100644 final/test/CodeGen/X86/2009-03-23-i80-fp80.ll
 create mode 100644 final/test/CodeGen/X86/2009-03-25-TestBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-12-picrel.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-24.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
 create mode 100644 final/test/CodeGen/X86/2009-04-scale.ll
 create mode 100644 final/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
 create mode 100644 final/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
 create mode 100644 final/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
 create mode 100644 final/test/CodeGen/X86/2009-05-23-available_externally.ll
 create mode 100644 final/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
 create mode 100644 final/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
 create mode 100644 final/test/CodeGen/X86/2009-05-30-ISelBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-06-02-RewriterBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
 create mode 100644 final/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
 create mode 100644 final/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
 create mode 100644 final/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
 create mode 100644 final/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
 create mode 100644 final/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
 create mode 100644 final/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
 create mode 100644 final/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
 create mode 100644 final/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
 create mode 100644 final/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
 create mode 100644 final/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll
 create mode 100644 final/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
 create mode 100644 final/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll
 create mode 100644 final/test/CodeGen/X86/2009-07-07-SplitICmp.ll
 create mode 100644 final/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll
 create mode 100644 final/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-07-16-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll
 create mode 100644 final/test/CodeGen/X86/2009-07-20-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
 create mode 100644 final/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
 create mode 100644 final/test/CodeGen/X86/2009-08-06-inlineasm.ll
 create mode 100644 final/test/CodeGen/X86/2009-08-08-CastError.ll
 create mode 100644 final/test/CodeGen/X86/2009-08-12-badswitch.ll
 create mode 100644 final/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
 create mode 100644 final/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
 create mode 100644 final/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
 create mode 100644 final/test/CodeGen/X86/2009-08-23-linkerprivate.ll
 create mode 100644 final/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-09-10-SpillComments.ll
 create mode 100644 final/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-09-19-earlyclobber.ll
 create mode 100644 final/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
 create mode 100644 final/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-10-14-LiveVariablesBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-10-16-Scope.ll
 create mode 100644 final/test/CodeGen/X86/2009-10-19-EmergencySpill.ll
 create mode 100644 final/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll
 create mode 100644 final/test/CodeGen/X86/2009-10-25-RewriterBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-11-16-MachineLICM.ll
 create mode 100644 final/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll
 create mode 100644 final/test/CodeGen/X86/2009-11-18-TwoAddrKill.ll
 create mode 100644 final/test/CodeGen/X86/2009-11-25-ImpDefBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
 create mode 100644 final/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
 create mode 100644 final/test/CodeGen/X86/20090313-signext.ll
 create mode 100644 final/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll
 create mode 100644 final/test/CodeGen/X86/2010-01-07-ISelBug.ll
 create mode 100644 final/test/CodeGen/X86/2010-01-07-UAMemFeature.ll
 create mode 100644 final/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
 create mode 100644 final/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll
 create mode 100644 final/test/CodeGen/X86/2010-01-13-OptExtBug.ll
 create mode 100644 final/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll
 create mode 100644 final/test/CodeGen/X86/2010-01-18-DbgValue.ll
 create mode 100644 final/test/CodeGen/X86/2010-01-19-OptExtBug.ll
 create mode 100644 final/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
 create mode 100644 final/test/CodeGen/X86/2010-02-01-TaillCallCrash.ll
 create mode 100644 final/test/CodeGen/X86/2010-02-03-DualUndef.ll
 create mode 100644 final/test/CodeGen/X86/2010-02-04-SchedulerBug.ll
 create mode 100644 final/test/CodeGen/X86/2010-02-11-NonTemporal.ll
 create mode 100644 final/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll
 create mode 100644 final/test/CodeGen/X86/2010-02-15-ImplicitDefBug.ll
 create mode 100644 final/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
 create mode 100644 final/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll
 create mode 100644 final/test/CodeGen/X86/2010-02-23-DIV8rDefinesAX.ll
 create mode 100644 final/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll
 create mode 100644 final/test/CodeGen/X86/2010-02-23-SingleDefPhiJoin.ll
 create mode 100644 final/test/CodeGen/X86/2010-03-04-Mul8Bug.ll
 create mode 100644 final/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll
 create mode 100644 final/test/CodeGen/X86/2010-03-05-EFLAGS-Redef.ll
 create mode 100644 final/test/CodeGen/X86/2010-03-17-ISelBug.ll
 create mode 100644 final/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll
 create mode 100644 final/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
 create mode 100644 final/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll
 create mode 100644 final/test/CodeGen/X86/2010-04-21-CoalescerBug.ll
 create mode 100644 final/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
 create mode 100644 final/test/CodeGen/X86/2010-04-29-CoalescerCrash.ll
 create mode 100644 final/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
 create mode 100644 final/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
 create mode 100644 final/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll
 create mode 100644 final/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll
 create mode 100644 final/test/CodeGen/X86/2010-05-07-ldconvert.ll
 create mode 100644 final/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll
 create mode 100644 final/test/CodeGen/X86/2010-05-12-FastAllocKills.ll
 create mode 100644 final/test/CodeGen/X86/2010-05-16-nosseconversion.ll
 create mode 100644 final/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
 create mode 100644 final/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
 create mode 100644 final/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll
 create mode 100644 final/test/CodeGen/X86/2010-05-28-Crash.ll
 create mode 100644 final/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
 create mode 100644 final/test/CodeGen/X86/2010-06-09-FastAllocRegisters.ll
 create mode 100644 final/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
 create mode 100644 final/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
 create mode 100644 final/test/CodeGen/X86/2010-06-24-g-constraint-crash.ll
 create mode 100644 final/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
 create mode 100644 final/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll
 create mode 100644 final/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll
 create mode 100644 final/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll
 create mode 100644 final/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll
 create mode 100644 final/test/CodeGen/X86/2010-07-02-UnfoldBug.ll
 create mode 100644 final/test/CodeGen/X86/2010-07-02-asm-alignstack.ll
 create mode 100644 final/test/CodeGen/X86/2010-07-06-DbgCrash.ll
 create mode 100644 final/test/CodeGen/X86/2010-07-06-asm-RIP.ll
 create mode 100644 final/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
 create mode 100644 final/test/CodeGen/X86/2010-07-13-indirectXconstraint.ll
 create mode 100644 final/test/CodeGen/X86/2010-07-15-Crash.ll
 create mode 100644 final/test/CodeGen/X86/2010-07-29-SetccSimplify.ll
 create mode 100644 final/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
 create mode 100644 final/test/CodeGen/X86/2010-08-04-MingWCrash.ll
 create mode 100644 final/test/CodeGen/X86/2010-08-04-StackVariable.ll
 create mode 100644 final/test/CodeGen/X86/2010-08-10-DbgConstant.ll
 create mode 100644 final/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll
 create mode 100644 final/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
 create mode 100644 final/test/CodeGen/X86/2010-09-16-asmcrash.ll
 create mode 100644 final/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
 create mode 100644 final/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
 create mode 100644 final/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
 create mode 100644 final/test/CodeGen/X86/2010-11-02-DbgParameter.ll
 create mode 100644 final/test/CodeGen/X86/2010-11-09-MOVLPS.ll
 create mode 100644 final/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
 create mode 100644 final/test/CodeGen/X86/2010-12-02-MC-Set.ll
 create mode 100644 final/test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll
 create mode 100644 final/test/CodeGen/X86/2011-01-10-DagCombineHang.ll
 create mode 100644 final/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
 create mode 100644 final/test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll
 create mode 100644 final/test/CodeGen/X86/2011-02-21-VirtRegRewriter-KillSubReg.ll
 create mode 100644 final/test/CodeGen/X86/2011-02-23-UnfoldBug.ll
 create mode 100644 final/test/CodeGen/X86/2011-02-27-Fpextend.ll
 create mode 100644 final/test/CodeGen/X86/2011-03-02-DAGCombiner.ll
 create mode 100644 final/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
 create mode 100644 final/test/CodeGen/X86/3addr-16bit.ll
 create mode 100644 final/test/CodeGen/X86/3addr-or.ll
 create mode 100644 final/test/CodeGen/X86/Atomics-32.ll
 create mode 100644 final/test/CodeGen/X86/Atomics-64.ll
 create mode 100644 final/test/CodeGen/X86/GC/alloc_loop.ll
 create mode 100644 final/test/CodeGen/X86/GC/argpromotion.ll
 create mode 100644 final/test/CodeGen/X86/GC/badreadproto.ll
 create mode 100644 final/test/CodeGen/X86/GC/badrootproto.ll
 create mode 100644 final/test/CodeGen/X86/GC/badwriteproto.ll
 create mode 100644 final/test/CodeGen/X86/GC/deadargelim.ll
 create mode 100644 final/test/CodeGen/X86/GC/dg.exp
 create mode 100644 final/test/CodeGen/X86/GC/fat.ll
 create mode 100644 final/test/CodeGen/X86/GC/inline.ll
 create mode 100644 final/test/CodeGen/X86/GC/inline2.ll
 create mode 100644 final/test/CodeGen/X86/GC/lower_gcroot.ll
 create mode 100644 final/test/CodeGen/X86/GC/outside.ll
 create mode 100644 final/test/CodeGen/X86/GC/simple_ocaml.ll
 create mode 100644 final/test/CodeGen/X86/MachineSink-CritEdge.ll
 create mode 100644 final/test/CodeGen/X86/MachineSink-PHIUse.ll
 create mode 100644 final/test/CodeGen/X86/SIMD/dg.exp
 create mode 100644 final/test/CodeGen/X86/SIMD/notvunpcklpd.ll
 create mode 100644 final/test/CodeGen/X86/SIMD/notvunpcklps.ll
 create mode 100644 final/test/CodeGen/X86/SIMD/vunpcklpd.ll
 create mode 100644 final/test/CodeGen/X86/SIMD/vunpcklps.ll
 create mode 100644 final/test/CodeGen/X86/SwitchLowering.ll
 create mode 100644 final/test/CodeGen/X86/abi-isel.ll
 create mode 100644 final/test/CodeGen/X86/add-of-carry.ll
 create mode 100644 final/test/CodeGen/X86/add.ll
 create mode 100644 final/test/CodeGen/X86/adde-carry.ll
 create mode 100644 final/test/CodeGen/X86/addr-label-difference.ll
 create mode 100644 final/test/CodeGen/X86/aliases.ll
 create mode 100644 final/test/CodeGen/X86/aligned-comm.ll
 create mode 100644 final/test/CodeGen/X86/alignment.ll
 create mode 100644 final/test/CodeGen/X86/all-ones-vector.ll
 create mode 100644 final/test/CodeGen/X86/alldiv-divdi3.ll
 create mode 100644 final/test/CodeGen/X86/alloca-align-rounding-32.ll
 create mode 100644 final/test/CodeGen/X86/alloca-align-rounding.ll
 create mode 100644 final/test/CodeGen/X86/and-or-fold.ll
 create mode 100644 final/test/CodeGen/X86/and-su.ll
 create mode 100644 final/test/CodeGen/X86/andimm8.ll
 create mode 100644 final/test/CodeGen/X86/anyext.ll
 create mode 100644 final/test/CodeGen/X86/apm.ll
 create mode 100644 final/test/CodeGen/X86/arg-cast.ll
 create mode 100644 final/test/CodeGen/X86/asm-block-labels.ll
 create mode 100644 final/test/CodeGen/X86/asm-global-imm.ll
 create mode 100644 final/test/CodeGen/X86/asm-indirect-mem.ll
 create mode 100644 final/test/CodeGen/X86/asm-modifier-P.ll
 create mode 100644 final/test/CodeGen/X86/asm-modifier.ll
 create mode 100644 final/test/CodeGen/X86/atomic_add.ll
 create mode 100644 final/test/CodeGen/X86/atomic_op.ll
 create mode 100644 final/test/CodeGen/X86/attribute-sections.ll
 create mode 100644 final/test/CodeGen/X86/avoid-lea-scale2.ll
 create mode 100644 final/test/CodeGen/X86/avoid-loop-align-2.ll
 create mode 100644 final/test/CodeGen/X86/avoid-loop-align.ll
 create mode 100644 final/test/CodeGen/X86/avx-128.ll
 create mode 100644 final/test/CodeGen/X86/avx-256.ll
 create mode 100644 final/test/CodeGen/X86/avx-intrinsics-x86.ll
 create mode 100644 final/test/CodeGen/X86/avx-intrinsics-x86_64.ll
 create mode 100644 final/test/CodeGen/X86/barrier-sse.ll
 create mode 100644 final/test/CodeGen/X86/barrier.ll
 create mode 100644 final/test/CodeGen/X86/bc-extract.ll
 create mode 100644 final/test/CodeGen/X86/bigstructret.ll
 create mode 100644 final/test/CodeGen/X86/bigstructret2.ll
 create mode 100644 final/test/CodeGen/X86/bit-test-shift.ll
 create mode 100644 final/test/CodeGen/X86/bitcast-int-to-vector.ll
 create mode 100644 final/test/CodeGen/X86/bitcast.ll
 create mode 100644 final/test/CodeGen/X86/bitcast2.ll
 create mode 100644 final/test/CodeGen/X86/br-fold.ll
 create mode 100644 final/test/CodeGen/X86/brcond.ll
 create mode 100644 final/test/CodeGen/X86/break-anti-dependencies.ll
 create mode 100644 final/test/CodeGen/X86/break-sse-dep.ll
 create mode 100644 final/test/CodeGen/X86/bss_pagealigned.ll
 create mode 100644 final/test/CodeGen/X86/bswap-inline-asm.ll
 create mode 100644 final/test/CodeGen/X86/bswap.ll
 create mode 100644 final/test/CodeGen/X86/bt.ll
 create mode 100644 final/test/CodeGen/X86/byval.ll
 create mode 100644 final/test/CodeGen/X86/byval2.ll
 create mode 100644 final/test/CodeGen/X86/byval3.ll
 create mode 100644 final/test/CodeGen/X86/byval4.ll
 create mode 100644 final/test/CodeGen/X86/byval5.ll
 create mode 100644 final/test/CodeGen/X86/byval6.ll
 create mode 100644 final/test/CodeGen/X86/byval7.ll
 create mode 100644 final/test/CodeGen/X86/call-imm.ll
 create mode 100644 final/test/CodeGen/X86/call-push.ll
 create mode 100644 final/test/CodeGen/X86/change-compare-stride-0.ll
 create mode 100644 final/test/CodeGen/X86/change-compare-stride-1.ll
 create mode 100644 final/test/CodeGen/X86/change-compare-stride-trickiness-0.ll
 create mode 100644 final/test/CodeGen/X86/change-compare-stride-trickiness-1.ll
 create mode 100644 final/test/CodeGen/X86/change-compare-stride-trickiness-2.ll
 create mode 100644 final/test/CodeGen/X86/clz.ll
 create mode 100644 final/test/CodeGen/X86/cmov.ll
 create mode 100644 final/test/CodeGen/X86/cmp.ll
 create mode 100644 final/test/CodeGen/X86/coalesce-esp.ll
 create mode 100644 final/test/CodeGen/X86/coalescer-commute1.ll
 create mode 100644 final/test/CodeGen/X86/coalescer-commute2.ll
 create mode 100644 final/test/CodeGen/X86/coalescer-commute3.ll
 create mode 100644 final/test/CodeGen/X86/coalescer-commute4.ll
 create mode 100644 final/test/CodeGen/X86/coalescer-commute5.ll
 create mode 100644 final/test/CodeGen/X86/coalescer-cross.ll
 create mode 100644 final/test/CodeGen/X86/coalescer-remat.ll
 create mode 100644 final/test/CodeGen/X86/code_placement.ll
 create mode 100644 final/test/CodeGen/X86/code_placement_eh.ll
 create mode 100644 final/test/CodeGen/X86/codegen-prepare-cast.ll
 create mode 100644 final/test/CodeGen/X86/codegen-prepare-extload.ll
 create mode 100644 final/test/CodeGen/X86/codemodel.ll
 create mode 100644 final/test/CodeGen/X86/combine-lds.ll
 create mode 100644 final/test/CodeGen/X86/combiner-aa-0.ll
 create mode 100644 final/test/CodeGen/X86/combiner-aa-1.ll
 create mode 100644 final/test/CodeGen/X86/commute-intrinsic.ll
 create mode 100644 final/test/CodeGen/X86/commute-two-addr.ll
 create mode 100644 final/test/CodeGen/X86/compare-add.ll
 create mode 100644 final/test/CodeGen/X86/compare-inf.ll
 create mode 100644 final/test/CodeGen/X86/compare_folding.ll
 create mode 100644 final/test/CodeGen/X86/compiler_used.ll
 create mode 100644 final/test/CodeGen/X86/complex-asm.ll
 create mode 100644 final/test/CodeGen/X86/complex-fca.ll
 create mode 100644 final/test/CodeGen/X86/conditional-indecrement.ll
 create mode 100644 final/test/CodeGen/X86/constant-pool-remat-0.ll
 create mode 100644 final/test/CodeGen/X86/constant-pool-sharing.ll
 create mode 100644 final/test/CodeGen/X86/constpool.ll
 create mode 100644 final/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
 create mode 100644 final/test/CodeGen/X86/copysign-zero.ll
 create mode 100644 final/test/CodeGen/X86/crash-O0.ll
 create mode 100644 final/test/CodeGen/X86/crash.ll
 create mode 100644 final/test/CodeGen/X86/critical-edge-split-2.ll
 create mode 100644 final/test/CodeGen/X86/cstring.ll
 create mode 100644 final/test/CodeGen/X86/ctpop-combine.ll
 create mode 100644 final/test/CodeGen/X86/dag-rauw-cse.ll
 create mode 100644 final/test/CodeGen/X86/dagcombine-buildvector.ll
 create mode 100644 final/test/CodeGen/X86/dagcombine-cse.ll
 create mode 100644 final/test/CodeGen/X86/darwin-bzero.ll
 create mode 100644 final/test/CodeGen/X86/darwin-no-dead-strip.ll
 create mode 100644 final/test/CodeGen/X86/darwin-quote.ll
 create mode 100644 final/test/CodeGen/X86/darwin-stub.ll
 create mode 100644 final/test/CodeGen/X86/dbg-byval-parameter.ll
 create mode 100644 final/test/CodeGen/X86/dbg-merge-loc-entry.ll
 create mode 100644 final/test/CodeGen/X86/dbg-value-inlined-parameter.ll
 create mode 100644 final/test/CodeGen/X86/dbg-value-location.ll
 create mode 100644 final/test/CodeGen/X86/dbg-value-range.ll
 create mode 100644 final/test/CodeGen/X86/dg.exp
 create mode 100644 final/test/CodeGen/X86/discontiguous-loops.ll
 create mode 100644 final/test/CodeGen/X86/divide-by-constant.ll
 create mode 100644 final/test/CodeGen/X86/divrem.ll
 create mode 100644 final/test/CodeGen/X86/dll-linkage.ll
 create mode 100644 final/test/CodeGen/X86/dllexport.ll
 create mode 100644 final/test/CodeGen/X86/dollar-name.ll
 create mode 100644 final/test/CodeGen/X86/dyn-stackalloc.ll
 create mode 100644 final/test/CodeGen/X86/empty-functions.ll
 create mode 100644 final/test/CodeGen/X86/empty-struct-return-type.ll
 create mode 100644 final/test/CodeGen/X86/epilogue.ll
 create mode 100644 final/test/CodeGen/X86/extend.ll
 create mode 100644 final/test/CodeGen/X86/extern_weak.ll
 create mode 100644 final/test/CodeGen/X86/extmul128.ll
 create mode 100644 final/test/CodeGen/X86/extmul64.ll
 create mode 100644 final/test/CodeGen/X86/extract-combine.ll
 create mode 100644 final/test/CodeGen/X86/extract-extract.ll
 create mode 100644 final/test/CodeGen/X86/extractelement-from-arg.ll
 create mode 100644 final/test/CodeGen/X86/extractelement-load.ll
 create mode 100644 final/test/CodeGen/X86/extractelement-shuffle.ll
 create mode 100644 final/test/CodeGen/X86/extractps.ll
 create mode 100644 final/test/CodeGen/X86/fabs.ll
 create mode 100644 final/test/CodeGen/X86/fast-cc-callee-pops.ll
 create mode 100644 final/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
 create mode 100644 final/test/CodeGen/X86/fast-cc-pass-in-regs.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel-atomic.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel-bail.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel-bc.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel-call.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel-cmp-branch.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel-constpool.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel-fneg.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel-gep.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel-gv.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel-i1.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel-mem.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel-shift-imm.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel-tailcall.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel-tls.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel-x86.ll
 create mode 100644 final/test/CodeGen/X86/fast-isel.ll
 create mode 100644 final/test/CodeGen/X86/fastcall-correct-mangling.ll
 create mode 100644 final/test/CodeGen/X86/fastcc-2.ll
 create mode 100644 final/test/CodeGen/X86/fastcc-byval.ll
 create mode 100644 final/test/CodeGen/X86/fastcc-sret.ll
 create mode 100644 final/test/CodeGen/X86/fastcc.ll
 create mode 100644 final/test/CodeGen/X86/fastcc3struct.ll
 create mode 100644 final/test/CodeGen/X86/field-extract-use-trunc.ll
 create mode 100644 final/test/CodeGen/X86/fildll.ll
 create mode 100644 final/test/CodeGen/X86/fltused.ll
 create mode 100644 final/test/CodeGen/X86/fmul-zero.ll
 create mode 100644 final/test/CodeGen/X86/fold-add.ll
 create mode 100644 final/test/CodeGen/X86/fold-and-shift.ll
 create mode 100644 final/test/CodeGen/X86/fold-call-2.ll
 create mode 100644 final/test/CodeGen/X86/fold-call-3.ll
 create mode 100644 final/test/CodeGen/X86/fold-call.ll
 create mode 100644 final/test/CodeGen/X86/fold-imm.ll
 create mode 100644 final/test/CodeGen/X86/fold-load.ll
 create mode 100644 final/test/CodeGen/X86/fold-mul-lohi.ll
 create mode 100644 final/test/CodeGen/X86/fold-pcmpeqd-0.ll
 create mode 100644 final/test/CodeGen/X86/fold-pcmpeqd-1.ll
 create mode 100644 final/test/CodeGen/X86/fold-pcmpeqd-2.ll
 create mode 100644 final/test/CodeGen/X86/fold-sext-trunc.ll
 create mode 100644 final/test/CodeGen/X86/force-align-stack.ll
 create mode 100644 final/test/CodeGen/X86/fp-elim.ll
 create mode 100644 final/test/CodeGen/X86/fp-immediate-shorten.ll
 create mode 100644 final/test/CodeGen/X86/fp-in-intregs.ll
 create mode 100644 final/test/CodeGen/X86/fp-stack-2results.ll
 create mode 100644 final/test/CodeGen/X86/fp-stack-O0-crash.ll
 create mode 100644 final/test/CodeGen/X86/fp-stack-compare.ll
 create mode 100644 final/test/CodeGen/X86/fp-stack-direct-ret.ll
 create mode 100644 final/test/CodeGen/X86/fp-stack-ret-conv.ll
 create mode 100644 final/test/CodeGen/X86/fp-stack-ret-store.ll
 create mode 100644 final/test/CodeGen/X86/fp-stack-ret.ll
 create mode 100644 final/test/CodeGen/X86/fp-stack-retcopy.ll
 create mode 100644 final/test/CodeGen/X86/fp-stack-set-st1.ll
 create mode 100644 final/test/CodeGen/X86/fp-stack.ll
 create mode 100644 final/test/CodeGen/X86/fp2sint.ll
 create mode 100644 final/test/CodeGen/X86/fp_constant_op.ll
 create mode 100644 final/test/CodeGen/X86/fp_load_cast_fold.ll
 create mode 100644 final/test/CodeGen/X86/fp_load_fold.ll
 create mode 100644 final/test/CodeGen/X86/fsxor-alignment.ll
 create mode 100644 final/test/CodeGen/X86/full-lsr.ll
 create mode 100644 final/test/CodeGen/X86/ga-offset.ll
 create mode 100644 final/test/CodeGen/X86/gather-addresses.ll
 create mode 100644 final/test/CodeGen/X86/ghc-cc.ll
 create mode 100644 final/test/CodeGen/X86/ghc-cc64.ll
 create mode 100644 final/test/CodeGen/X86/global-sections-tls.ll
 create mode 100644 final/test/CodeGen/X86/global-sections.ll
 create mode 100644 final/test/CodeGen/X86/h-register-addressing-32.ll
 create mode 100644 final/test/CodeGen/X86/h-register-addressing-64.ll
 create mode 100644 final/test/CodeGen/X86/h-register-store.ll
 create mode 100644 final/test/CodeGen/X86/h-registers-0.ll
 create mode 100644 final/test/CodeGen/X86/h-registers-1.ll
 create mode 100644 final/test/CodeGen/X86/h-registers-2.ll
 create mode 100644 final/test/CodeGen/X86/h-registers-3.ll
 create mode 100644 final/test/CodeGen/X86/hidden-vis-2.ll
 create mode 100644 final/test/CodeGen/X86/hidden-vis-3.ll
 create mode 100644 final/test/CodeGen/X86/hidden-vis-4.ll
 create mode 100644 final/test/CodeGen/X86/hidden-vis-pic.ll
 create mode 100644 final/test/CodeGen/X86/hidden-vis.ll
 create mode 100644 final/test/CodeGen/X86/i128-and-beyond.ll
 create mode 100644 final/test/CodeGen/X86/i128-immediate.ll
 create mode 100644 final/test/CodeGen/X86/i128-mul.ll
 create mode 100644 final/test/CodeGen/X86/i128-ret.ll
 create mode 100644 final/test/CodeGen/X86/i256-add.ll
 create mode 100644 final/test/CodeGen/X86/i2k.ll
 create mode 100644 final/test/CodeGen/X86/i64-mem-copy.ll
 create mode 100644 final/test/CodeGen/X86/iabs.ll
 create mode 100644 final/test/CodeGen/X86/illegal-insert.ll
 create mode 100644 final/test/CodeGen/X86/illegal-vector-args-return.ll
 create mode 100644 final/test/CodeGen/X86/imul-lea-2.ll
 create mode 100644 final/test/CodeGen/X86/imul-lea.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-2addr.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-R-constraint.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-flag-clobber.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-fpstack.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-fpstack2.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-fpstack3.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-fpstack4.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-fpstack5.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-h.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-modifier-n.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-mrv.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-out-regs.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-pic.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-ptr-cast.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-q-regs.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-tied.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm-x-scalar.ll
 create mode 100644 final/test/CodeGen/X86/inline-asm.ll
 create mode 100644 final/test/CodeGen/X86/ins_subreg_coalesce-1.ll
 create mode 100644 final/test/CodeGen/X86/ins_subreg_coalesce-2.ll
 create mode 100644 final/test/CodeGen/X86/ins_subreg_coalesce-3.ll
 create mode 100644 final/test/CodeGen/X86/insert-positions.ll
 create mode 100644 final/test/CodeGen/X86/insertelement-copytoregs.ll
 create mode 100644 final/test/CodeGen/X86/insertelement-legalize.ll
 create mode 100644 final/test/CodeGen/X86/int-intrinsic.ll
 create mode 100644 final/test/CodeGen/X86/invalid-shift-immediate.ll
 create mode 100644 final/test/CodeGen/X86/isel-sink.ll
 create mode 100644 final/test/CodeGen/X86/isel-sink2.ll
 create mode 100644 final/test/CodeGen/X86/isel-sink3.ll
 create mode 100644 final/test/CodeGen/X86/isint.ll
 create mode 100644 final/test/CodeGen/X86/isnan.ll
 create mode 100644 final/test/CodeGen/X86/isnan2.ll
 create mode 100644 final/test/CodeGen/X86/ispositive.ll
 create mode 100644 final/test/CodeGen/X86/iv-users-in-other-loops.ll
 create mode 100644 final/test/CodeGen/X86/jump_sign.ll
 create mode 100644 final/test/CodeGen/X86/label-redefinition.ll
 create mode 100644 final/test/CodeGen/X86/large-gep-scale.ll
 create mode 100644 final/test/CodeGen/X86/ldzero.ll
 create mode 100644 final/test/CodeGen/X86/lea-2.ll
 create mode 100644 final/test/CodeGen/X86/lea-3.ll
 create mode 100644 final/test/CodeGen/X86/lea-4.ll
 create mode 100644 final/test/CodeGen/X86/lea-recursion.ll
 create mode 100644 final/test/CodeGen/X86/lea.ll
 create mode 100644 final/test/CodeGen/X86/leaf-fp-elim.ll
 create mode 100644 final/test/CodeGen/X86/legalize-fmp-oeq-vector-select.ll
 create mode 100644 final/test/CodeGen/X86/legalize-sub-zero-2.ll
 create mode 100644 final/test/CodeGen/X86/legalize-sub-zero.ll
 create mode 100644 final/test/CodeGen/X86/legalizedag_vec.ll
 create mode 100644 final/test/CodeGen/X86/lfence.ll
 create mode 100644 final/test/CodeGen/X86/licm-nested.ll
 create mode 100644 final/test/CodeGen/X86/licm-symbol.ll
 create mode 100644 final/test/CodeGen/X86/limited-prec.ll
 create mode 100644 final/test/CodeGen/X86/live-out-reg-info.ll
 create mode 100644 final/test/CodeGen/X86/liveness-local-regalloc.ll
 create mode 100644 final/test/CodeGen/X86/lock-inst-encoding.ll
 create mode 100644 final/test/CodeGen/X86/long-setcc.ll
 create mode 100644 final/test/CodeGen/X86/longlong-deadload.ll
 create mode 100644 final/test/CodeGen/X86/loop-blocks.ll
 create mode 100644 final/test/CodeGen/X86/loop-hoist.ll
 create mode 100644 final/test/CodeGen/X86/loop-strength-reduce-2.ll
 create mode 100644 final/test/CodeGen/X86/loop-strength-reduce-3.ll
 create mode 100644 final/test/CodeGen/X86/loop-strength-reduce.ll
 create mode 100644 final/test/CodeGen/X86/loop-strength-reduce2.ll
 create mode 100644 final/test/CodeGen/X86/loop-strength-reduce3.ll
 create mode 100644 final/test/CodeGen/X86/loop-strength-reduce4.ll
 create mode 100644 final/test/CodeGen/X86/loop-strength-reduce5.ll
 create mode 100644 final/test/CodeGen/X86/loop-strength-reduce6.ll
 create mode 100644 final/test/CodeGen/X86/loop-strength-reduce7.ll
 create mode 100644 final/test/CodeGen/X86/loop-strength-reduce8.ll
 create mode 100644 final/test/CodeGen/X86/lsr-delayed-fold.ll
 create mode 100644 final/test/CodeGen/X86/lsr-i386.ll
 create mode 100644 final/test/CodeGen/X86/lsr-interesting-step.ll
 create mode 100644 final/test/CodeGen/X86/lsr-loop-exit-cond.ll
 create mode 100644 final/test/CodeGen/X86/lsr-negative-stride.ll
 create mode 100644 final/test/CodeGen/X86/lsr-nonaffine.ll
 create mode 100644 final/test/CodeGen/X86/lsr-normalization.ll
 create mode 100644 final/test/CodeGen/X86/lsr-overflow.ll
 create mode 100644 final/test/CodeGen/X86/lsr-quadratic-expand.ll
 create mode 100644 final/test/CodeGen/X86/lsr-redundant-addressing.ll
 create mode 100644 final/test/CodeGen/X86/lsr-reuse-trunc.ll
 create mode 100644 final/test/CodeGen/X86/lsr-reuse.ll
 create mode 100644 final/test/CodeGen/X86/lsr-sort.ll
 create mode 100644 final/test/CodeGen/X86/lsr-static-addr.ll
 create mode 100644 final/test/CodeGen/X86/lsr-wrap.ll
 create mode 100644 final/test/CodeGen/X86/machine-cse.ll
 create mode 100644 final/test/CodeGen/X86/masked-iv-safe.ll
 create mode 100644 final/test/CodeGen/X86/masked-iv-unsafe.ll
 create mode 100644 final/test/CodeGen/X86/maskmovdqu.ll
 create mode 100644 final/test/CodeGen/X86/mcinst-lowering-cmp0.ll
 create mode 100644 final/test/CodeGen/X86/mcinst-lowering.ll
 create mode 100644 final/test/CodeGen/X86/memcmp.ll
 create mode 100644 final/test/CodeGen/X86/memcpy-2.ll
 create mode 100644 final/test/CodeGen/X86/memcpy.ll
 create mode 100644 final/test/CodeGen/X86/memmove-4.ll
 create mode 100644 final/test/CodeGen/X86/memset-2.ll
 create mode 100644 final/test/CodeGen/X86/memset-3.ll
 create mode 100644 final/test/CodeGen/X86/memset.ll
 create mode 100644 final/test/CodeGen/X86/memset64-on-x86-32.ll
 create mode 100644 final/test/CodeGen/X86/mfence.ll
 create mode 100644 final/test/CodeGen/X86/mingw-alloca.ll
 create mode 100644 final/test/CodeGen/X86/misaligned-memset.ll
 create mode 100644 final/test/CodeGen/X86/mmx-arg-passing.ll
 create mode 100644 final/test/CodeGen/X86/mmx-arg-passing2.ll
 create mode 100644 final/test/CodeGen/X86/mmx-arith.ll
 create mode 100644 final/test/CodeGen/X86/mmx-bitcast-to-i64.ll
 create mode 100644 final/test/CodeGen/X86/mmx-builtins.ll
 create mode 100644 final/test/CodeGen/X86/mmx-copy-gprs.ll
 create mode 100644 final/test/CodeGen/X86/mmx-emms.ll
 create mode 100644 final/test/CodeGen/X86/mmx-insert-element.ll
 create mode 100644 final/test/CodeGen/X86/mmx-pinsrw.ll
 create mode 100644 final/test/CodeGen/X86/mmx-punpckhdq.ll
 create mode 100644 final/test/CodeGen/X86/mmx-s2v.ll
 create mode 100644 final/test/CodeGen/X86/mmx-shift.ll
 create mode 100644 final/test/CodeGen/X86/mmx-shuffle.ll
 create mode 100644 final/test/CodeGen/X86/mmx-vzmovl-2.ll
 create mode 100644 final/test/CodeGen/X86/mmx-vzmovl.ll
 create mode 100644 final/test/CodeGen/X86/movfs.ll
 create mode 100644 final/test/CodeGen/X86/movgs.ll
 create mode 100644 final/test/CodeGen/X86/mul-legalize.ll
 create mode 100644 final/test/CodeGen/X86/mul-remat.ll
 create mode 100644 final/test/CodeGen/X86/mul-shift-reassoc.ll
 create mode 100644 final/test/CodeGen/X86/mul128.ll
 create mode 100644 final/test/CodeGen/X86/mul64.ll
 create mode 100644 final/test/CodeGen/X86/mult-alt-generic-i686.ll
 create mode 100644 final/test/CodeGen/X86/mult-alt-generic-x86_64.ll
 create mode 100644 final/test/CodeGen/X86/mult-alt-x86.ll
 create mode 100644 final/test/CodeGen/X86/multiple-loop-post-inc.ll
 create mode 100644 final/test/CodeGen/X86/multiple-return-values-cross-block.ll
 create mode 100644 final/test/CodeGen/X86/multiple-return-values.ll
 create mode 100644 final/test/CodeGen/X86/nancvt.ll
 create mode 100644 final/test/CodeGen/X86/narrow-shl-load.ll
 create mode 100644 final/test/CodeGen/X86/narrow_op-1.ll
 create mode 100644 final/test/CodeGen/X86/neg-shl-add.ll
 create mode 100644 final/test/CodeGen/X86/neg_fp.ll
 create mode 100644 final/test/CodeGen/X86/negate-add-zero.ll
 create mode 100644 final/test/CodeGen/X86/negative-sin.ll
 create mode 100644 final/test/CodeGen/X86/negative-stride-fptosi-user.ll
 create mode 100644 final/test/CodeGen/X86/negative-subscript.ll
 create mode 100644 final/test/CodeGen/X86/negative_zero.ll
 create mode 100644 final/test/CodeGen/X86/nobt.ll
 create mode 100644 final/test/CodeGen/X86/nofence.ll
 create mode 100644 final/test/CodeGen/X86/nosse-error1.ll
 create mode 100644 final/test/CodeGen/X86/nosse-error2.ll
 create mode 100644 final/test/CodeGen/X86/nosse-varargs.ll
 create mode 100644 final/test/CodeGen/X86/object-size.ll
 create mode 100644 final/test/CodeGen/X86/opt-ext-uses.ll
 create mode 100644 final/test/CodeGen/X86/optimize-max-0.ll
 create mode 100644 final/test/CodeGen/X86/optimize-max-1.ll
 create mode 100644 final/test/CodeGen/X86/optimize-max-2.ll
 create mode 100644 final/test/CodeGen/X86/optimize-max-3.ll
 create mode 100644 final/test/CodeGen/X86/or-address.ll
 create mode 100644 final/test/CodeGen/X86/or-branch.ll
 create mode 100644 final/test/CodeGen/X86/overlap-shift.ll
 create mode 100644 final/test/CodeGen/X86/packed_struct.ll
 create mode 100644 final/test/CodeGen/X86/palignr-2.ll
 create mode 100644 final/test/CodeGen/X86/palignr.ll
 create mode 100644 final/test/CodeGen/X86/peep-test-0.ll
 create mode 100644 final/test/CodeGen/X86/peep-test-1.ll
 create mode 100644 final/test/CodeGen/X86/peep-test-2.ll
 create mode 100644 final/test/CodeGen/X86/peep-test-3.ll
 create mode 100644 final/test/CodeGen/X86/peep-vector-extract-concat.ll
 create mode 100644 final/test/CodeGen/X86/peep-vector-extract-insert.ll
 create mode 100644 final/test/CodeGen/X86/personality.ll
 create mode 100644 final/test/CodeGen/X86/phi-bit-propagation.ll
 create mode 100644 final/test/CodeGen/X86/phi-immediate-factoring.ll
 create mode 100644 final/test/CodeGen/X86/phys-reg-local-regalloc.ll
 create mode 100644 final/test/CodeGen/X86/phys_subreg_coalesce-2.ll
 create mode 100644 final/test/CodeGen/X86/phys_subreg_coalesce-3.ll
 create mode 100644 final/test/CodeGen/X86/phys_subreg_coalesce.ll
 create mode 100644 final/test/CodeGen/X86/pic-load-remat.ll
 create mode 100644 final/test/CodeGen/X86/pic.ll
 create mode 100644 final/test/CodeGen/X86/pic_jumptable.ll
 create mode 100644 final/test/CodeGen/X86/pmul.ll
 create mode 100644 final/test/CodeGen/X86/pmulld.ll
 create mode 100644 final/test/CodeGen/X86/popcnt.ll
 create mode 100644 final/test/CodeGen/X86/postalloc-coalescing.ll
 create mode 100644 final/test/CodeGen/X86/postra-licm.ll
 create mode 100644 final/test/CodeGen/X86/powi.ll
 create mode 100644 final/test/CodeGen/X86/pr1462.ll
 create mode 100644 final/test/CodeGen/X86/pr1489.ll
 create mode 100644 final/test/CodeGen/X86/pr1505.ll
 create mode 100644 final/test/CodeGen/X86/pr1505b.ll
 create mode 100644 final/test/CodeGen/X86/pr2177.ll
 create mode 100644 final/test/CodeGen/X86/pr2182.ll
 create mode 100644 final/test/CodeGen/X86/pr2326.ll
 create mode 100644 final/test/CodeGen/X86/pr2623.ll
 create mode 100644 final/test/CodeGen/X86/pr2656.ll
 create mode 100644 final/test/CodeGen/X86/pr2659.ll
 create mode 100644 final/test/CodeGen/X86/pr2849.ll
 create mode 100644 final/test/CodeGen/X86/pr2924.ll
 create mode 100644 final/test/CodeGen/X86/pr2982.ll
 create mode 100644 final/test/CodeGen/X86/pr3154.ll
 create mode 100644 final/test/CodeGen/X86/pr3216.ll
 create mode 100644 final/test/CodeGen/X86/pr3241.ll
 create mode 100644 final/test/CodeGen/X86/pr3243.ll
 create mode 100644 final/test/CodeGen/X86/pr3244.ll
 create mode 100644 final/test/CodeGen/X86/pr3250.ll
 create mode 100644 final/test/CodeGen/X86/pr3317.ll
 create mode 100644 final/test/CodeGen/X86/pr3366.ll
 create mode 100644 final/test/CodeGen/X86/pr3457.ll
 create mode 100644 final/test/CodeGen/X86/pr3495-2.ll
 create mode 100644 final/test/CodeGen/X86/pr3495.ll
 create mode 100644 final/test/CodeGen/X86/pr3522.ll
 create mode 100644 final/test/CodeGen/X86/pr7882.ll
 create mode 100644 final/test/CodeGen/X86/pr9127.ll
 create mode 100644 final/test/CodeGen/X86/pre-split1.ll
 create mode 100644 final/test/CodeGen/X86/pre-split10.ll
 create mode 100644 final/test/CodeGen/X86/pre-split11.ll
 create mode 100644 final/test/CodeGen/X86/pre-split2.ll
 create mode 100644 final/test/CodeGen/X86/pre-split3.ll
 create mode 100644 final/test/CodeGen/X86/pre-split4.ll
 create mode 100644 final/test/CodeGen/X86/pre-split5.ll
 create mode 100644 final/test/CodeGen/X86/pre-split6.ll
 create mode 100644 final/test/CodeGen/X86/pre-split7.ll
 create mode 100644 final/test/CodeGen/X86/pre-split8.ll
 create mode 100644 final/test/CodeGen/X86/pre-split9.ll
 create mode 100644 final/test/CodeGen/X86/prefetch.ll
 create mode 100644 final/test/CodeGen/X86/private-2.ll
 create mode 100644 final/test/CodeGen/X86/private.ll
 create mode 100644 final/test/CodeGen/X86/promote-assert-zext.ll
 create mode 100644 final/test/CodeGen/X86/promote-i16.ll
 create mode 100644 final/test/CodeGen/X86/ptrtoint-constexpr.ll
 create mode 100644 final/test/CodeGen/X86/rdtsc.ll
 create mode 100644 final/test/CodeGen/X86/red-zone.ll
 create mode 100644 final/test/CodeGen/X86/red-zone2.ll
 create mode 100644 final/test/CodeGen/X86/regpressure.ll
 create mode 100644 final/test/CodeGen/X86/rem-2.ll
 create mode 100644 final/test/CodeGen/X86/rem.ll
 create mode 100644 final/test/CodeGen/X86/remat-constant.ll
 create mode 100644 final/test/CodeGen/X86/remat-mov-0.ll
 create mode 100644 final/test/CodeGen/X86/remat-scalar-zero.ll
 create mode 100644 final/test/CodeGen/X86/ret-addr.ll
 create mode 100644 final/test/CodeGen/X86/ret-i64-0.ll
 create mode 100644 final/test/CodeGen/X86/ret-mmx.ll
 create mode 100644 final/test/CodeGen/X86/rip-rel-address.ll
 create mode 100644 final/test/CodeGen/X86/rodata-relocs.ll
 create mode 100644 final/test/CodeGen/X86/rot16.ll
 create mode 100644 final/test/CodeGen/X86/rot32.ll
 create mode 100644 final/test/CodeGen/X86/rot64.ll
 create mode 100644 final/test/CodeGen/X86/rotate.ll
 create mode 100644 final/test/CodeGen/X86/rotate2.ll
 create mode 100644 final/test/CodeGen/X86/scalar-extract.ll
 create mode 100644 final/test/CodeGen/X86/scalar-min-max-fill-operand.ll
 create mode 100644 final/test/CodeGen/X86/scalar_sse_minmax.ll
 create mode 100644 final/test/CodeGen/X86/scalar_widen_div.ll
 create mode 100644 final/test/CodeGen/X86/scalarize-bitcast.ll
 create mode 100644 final/test/CodeGen/X86/scev-interchange.ll
 create mode 100644 final/test/CodeGen/X86/select.ll
 create mode 100644 final/test/CodeGen/X86/setcc.ll
 create mode 100644 final/test/CodeGen/X86/setoeq.ll
 create mode 100644 final/test/CodeGen/X86/setuge.ll
 create mode 100644 final/test/CodeGen/X86/sext-i1.ll
 create mode 100644 final/test/CodeGen/X86/sext-load.ll
 create mode 100644 final/test/CodeGen/X86/sext-ret-val.ll
 create mode 100644 final/test/CodeGen/X86/sext-subreg.ll
 create mode 100644 final/test/CodeGen/X86/sext-trunc.ll
 create mode 100644 final/test/CodeGen/X86/sfence.ll
 create mode 100644 final/test/CodeGen/X86/shift-and.ll
 create mode 100644 final/test/CodeGen/X86/shift-coalesce.ll
 create mode 100644 final/test/CodeGen/X86/shift-codegen.ll
 create mode 100644 final/test/CodeGen/X86/shift-combine.ll
 create mode 100644 final/test/CodeGen/X86/shift-double.ll
 create mode 100644 final/test/CodeGen/X86/shift-folding.ll
 create mode 100644 final/test/CodeGen/X86/shift-i128.ll
 create mode 100644 final/test/CodeGen/X86/shift-i256.ll
 create mode 100644 final/test/CodeGen/X86/shift-one.ll
 create mode 100644 final/test/CodeGen/X86/shift-parts.ll
 create mode 100644 final/test/CodeGen/X86/shl-anyext.ll
 create mode 100644 final/test/CodeGen/X86/shl_elim.ll
 create mode 100644 final/test/CodeGen/X86/shrink-fp-const1.ll
 create mode 100644 final/test/CodeGen/X86/shrink-fp-const2.ll
 create mode 100644 final/test/CodeGen/X86/sibcall-2.ll
 create mode 100644 final/test/CodeGen/X86/sibcall-3.ll
 create mode 100644 final/test/CodeGen/X86/sibcall-4.ll
 create mode 100644 final/test/CodeGen/X86/sibcall-5.ll
 create mode 100644 final/test/CodeGen/X86/sibcall.ll
 create mode 100644 final/test/CodeGen/X86/sincos.ll
 create mode 100644 final/test/CodeGen/X86/sink-hoist.ll
 create mode 100644 final/test/CodeGen/X86/small-byval-memcpy.ll
 create mode 100644 final/test/CodeGen/X86/smul-with-overflow-2.ll
 create mode 100644 final/test/CodeGen/X86/smul-with-overflow-3.ll
 create mode 100644 final/test/CodeGen/X86/smul-with-overflow.ll
 create mode 100644 final/test/CodeGen/X86/soft-fp.ll
 create mode 100644 final/test/CodeGen/X86/splat-scalar-load.ll
 create mode 100644 final/test/CodeGen/X86/split-eh-lpad-edges.ll
 create mode 100644 final/test/CodeGen/X86/split-vector-rem.ll
 create mode 100644 final/test/CodeGen/X86/sret.ll
 create mode 100644 final/test/CodeGen/X86/sse-align-0.ll
 create mode 100644 final/test/CodeGen/X86/sse-align-1.ll
 create mode 100644 final/test/CodeGen/X86/sse-align-10.ll
 create mode 100644 final/test/CodeGen/X86/sse-align-11.ll
 create mode 100644 final/test/CodeGen/X86/sse-align-12.ll
 create mode 100644 final/test/CodeGen/X86/sse-align-2.ll
 create mode 100644 final/test/CodeGen/X86/sse-align-3.ll
 create mode 100644 final/test/CodeGen/X86/sse-align-4.ll
 create mode 100644 final/test/CodeGen/X86/sse-align-5.ll
 create mode 100644 final/test/CodeGen/X86/sse-align-6.ll
 create mode 100644 final/test/CodeGen/X86/sse-align-7.ll
 create mode 100644 final/test/CodeGen/X86/sse-align-8.ll
 create mode 100644 final/test/CodeGen/X86/sse-align-9.ll
 create mode 100644 final/test/CodeGen/X86/sse-commute.ll
 create mode 100644 final/test/CodeGen/X86/sse-fcopysign.ll
 create mode 100644 final/test/CodeGen/X86/sse-load-ret.ll
 create mode 100644 final/test/CodeGen/X86/sse-minmax.ll
 create mode 100644 final/test/CodeGen/X86/sse-varargs.ll
 create mode 100644 final/test/CodeGen/X86/sse1.ll
 create mode 100644 final/test/CodeGen/X86/sse2.ll
 create mode 100644 final/test/CodeGen/X86/sse3.ll
 create mode 100644 final/test/CodeGen/X86/sse41.ll
 create mode 100644 final/test/CodeGen/X86/sse42.ll
 create mode 100644 final/test/CodeGen/X86/sse_reload_fold.ll
 create mode 100644 final/test/CodeGen/X86/stack-align.ll
 create mode 100644 final/test/CodeGen/X86/stack-protector-linux.ll
 create mode 100644 final/test/CodeGen/X86/stdarg.ll
 create mode 100644 final/test/CodeGen/X86/stdcall-notailcall.ll
 create mode 100644 final/test/CodeGen/X86/stdcall.ll
 create mode 100644 final/test/CodeGen/X86/store-empty-member.ll
 create mode 100644 final/test/CodeGen/X86/store-fp-constant.ll
 create mode 100644 final/test/CodeGen/X86/store-global-address.ll
 create mode 100644 final/test/CodeGen/X86/store-narrow.ll
 create mode 100644 final/test/CodeGen/X86/store_op_load_fold.ll
 create mode 100644 final/test/CodeGen/X86/store_op_load_fold2.ll
 create mode 100644 final/test/CodeGen/X86/storetrunc-fp.ll
 create mode 100644 final/test/CodeGen/X86/stride-nine-with-base-reg.ll
 create mode 100644 final/test/CodeGen/X86/stride-reuse.ll
 create mode 100644 final/test/CodeGen/X86/sub-with-overflow.ll
 create mode 100644 final/test/CodeGen/X86/subreg-to-reg-0.ll
 create mode 100644 final/test/CodeGen/X86/subreg-to-reg-1.ll
 create mode 100644 final/test/CodeGen/X86/subreg-to-reg-2.ll
 create mode 100644 final/test/CodeGen/X86/subreg-to-reg-3.ll
 create mode 100644 final/test/CodeGen/X86/subreg-to-reg-4.ll
 create mode 100644 final/test/CodeGen/X86/subreg-to-reg-6.ll
 create mode 100644 final/test/CodeGen/X86/switch-bt.ll
 create mode 100644 final/test/CodeGen/X86/switch-crit-edge-constant.ll
 create mode 100644 final/test/CodeGen/X86/switch-or.ll
 create mode 100644 final/test/CodeGen/X86/switch-zextload.ll
 create mode 100644 final/test/CodeGen/X86/swizzle.ll
 create mode 100644 final/test/CodeGen/X86/tail-opts.ll
 create mode 100644 final/test/CodeGen/X86/tailcall-fastisel.ll
 create mode 100644 final/test/CodeGen/X86/tailcall-i1.ll
 create mode 100644 final/test/CodeGen/X86/tailcall-largecode.ll
 create mode 100644 final/test/CodeGen/X86/tailcall-returndup-void.ll
 create mode 100644 final/test/CodeGen/X86/tailcall-ri64.ll
 create mode 100644 final/test/CodeGen/X86/tailcall-stackalign.ll
 create mode 100644 final/test/CodeGen/X86/tailcall-structret.ll
 create mode 100644 final/test/CodeGen/X86/tailcall-void.ll
 create mode 100644 final/test/CodeGen/X86/tailcall1.ll
 create mode 100644 final/test/CodeGen/X86/tailcallbyval.ll
 create mode 100644 final/test/CodeGen/X86/tailcallbyval64.ll
 create mode 100644 final/test/CodeGen/X86/tailcallfp.ll
 create mode 100644 final/test/CodeGen/X86/tailcallfp2.ll
 create mode 100644 final/test/CodeGen/X86/tailcallpic1.ll
 create mode 100644 final/test/CodeGen/X86/tailcallpic2.ll
 create mode 100644 final/test/CodeGen/X86/tailcallstack64.ll
 create mode 100644 final/test/CodeGen/X86/test-nofold.ll
 create mode 100644 final/test/CodeGen/X86/test-shrink-bug.ll
 create mode 100644 final/test/CodeGen/X86/test-shrink.ll
 create mode 100644 final/test/CodeGen/X86/testl-commute.ll
 create mode 100644 final/test/CodeGen/X86/tls-pic.ll
 create mode 100644 final/test/CodeGen/X86/tls1.ll
 create mode 100644 final/test/CodeGen/X86/tls10.ll
 create mode 100644 final/test/CodeGen/X86/tls11.ll
 create mode 100644 final/test/CodeGen/X86/tls12.ll
 create mode 100644 final/test/CodeGen/X86/tls13.ll
 create mode 100644 final/test/CodeGen/X86/tls14.ll
 create mode 100644 final/test/CodeGen/X86/tls15.ll
 create mode 100644 final/test/CodeGen/X86/tls2.ll
 create mode 100644 final/test/CodeGen/X86/tls3.ll
 create mode 100644 final/test/CodeGen/X86/tls4.ll
 create mode 100644 final/test/CodeGen/X86/tls5.ll
 create mode 100644 final/test/CodeGen/X86/tls6.ll
 create mode 100644 final/test/CodeGen/X86/tls7.ll
 create mode 100644 final/test/CodeGen/X86/tls8.ll
 create mode 100644 final/test/CodeGen/X86/tls9.ll
 create mode 100644 final/test/CodeGen/X86/tlv-1.ll
 create mode 100644 final/test/CodeGen/X86/tlv-2.ll
 create mode 100644 final/test/CodeGen/X86/trap.ll
 create mode 100644 final/test/CodeGen/X86/trunc-to-bool.ll
 create mode 100644 final/test/CodeGen/X86/twoaddr-coalesce-2.ll
 create mode 100644 final/test/CodeGen/X86/twoaddr-coalesce.ll
 create mode 100644 final/test/CodeGen/X86/twoaddr-lea.ll
 create mode 100644 final/test/CodeGen/X86/twoaddr-pass-sink.ll
 create mode 100644 final/test/CodeGen/X86/twoaddr-remat.ll
 create mode 100644 final/test/CodeGen/X86/uint64-to-float.ll
 create mode 100644 final/test/CodeGen/X86/uint_to_fp-2.ll
 create mode 100644 final/test/CodeGen/X86/uint_to_fp.ll
 create mode 100644 final/test/CodeGen/X86/umul-with-carry.ll
 create mode 100644 final/test/CodeGen/X86/umul-with-overflow.ll
 create mode 100644 final/test/CodeGen/X86/unaligned-load.ll
 create mode 100644 final/test/CodeGen/X86/unknown-location.ll
 create mode 100644 final/test/CodeGen/X86/unreachable-loop-sinking.ll
 create mode 100644 final/test/CodeGen/X86/urem-i8-constant.ll
 create mode 100644 final/test/CodeGen/X86/use-add-flags.ll
 create mode 100644 final/test/CodeGen/X86/v-binop-widen.ll
 create mode 100644 final/test/CodeGen/X86/v-binop-widen2.ll
 create mode 100644 final/test/CodeGen/X86/v2f32.ll
 create mode 100644 final/test/CodeGen/X86/v4f32-immediate.ll
 create mode 100644 final/test/CodeGen/X86/variable-sized-darwin-bzero.ll
 create mode 100644 final/test/CodeGen/X86/variadic-node-pic.ll
 create mode 100644 final/test/CodeGen/X86/vec-sign.ll
 create mode 100644 final/test/CodeGen/X86/vec-trunc-store.ll
 create mode 100644 final/test/CodeGen/X86/vec_add.ll
 create mode 100644 final/test/CodeGen/X86/vec_align.ll
 create mode 100644 final/test/CodeGen/X86/vec_anyext.ll
 create mode 100644 final/test/CodeGen/X86/vec_call.ll
 create mode 100644 final/test/CodeGen/X86/vec_cast.ll
 create mode 100644 final/test/CodeGen/X86/vec_clear.ll
 create mode 100644 final/test/CodeGen/X86/vec_compare-2.ll
 create mode 100644 final/test/CodeGen/X86/vec_compare.ll
 create mode 100644 final/test/CodeGen/X86/vec_ctbits.ll
 create mode 100644 final/test/CodeGen/X86/vec_ext_inreg.ll
 create mode 100644 final/test/CodeGen/X86/vec_extract-sse4.ll
 create mode 100644 final/test/CodeGen/X86/vec_extract.ll
 create mode 100644 final/test/CodeGen/X86/vec_fneg.ll
 create mode 100644 final/test/CodeGen/X86/vec_i64.ll
 create mode 100644 final/test/CodeGen/X86/vec_ins_extract-1.ll
 create mode 100644 final/test/CodeGen/X86/vec_ins_extract.ll
 create mode 100644 final/test/CodeGen/X86/vec_insert-2.ll
 create mode 100644 final/test/CodeGen/X86/vec_insert-3.ll
 create mode 100644 final/test/CodeGen/X86/vec_insert-4.ll
 create mode 100644 final/test/CodeGen/X86/vec_insert-5.ll
 create mode 100644 final/test/CodeGen/X86/vec_insert-6.ll
 create mode 100644 final/test/CodeGen/X86/vec_insert-7.ll
 create mode 100644 final/test/CodeGen/X86/vec_insert-8.ll
 create mode 100644 final/test/CodeGen/X86/vec_insert-9.ll
 create mode 100644 final/test/CodeGen/X86/vec_insert.ll
 create mode 100644 final/test/CodeGen/X86/vec_loadsingles.ll
 create mode 100644 final/test/CodeGen/X86/vec_logical.ll
 create mode 100644 final/test/CodeGen/X86/vec_return.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-2.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-3.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-4.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-5.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-6.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-7.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-8.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-9.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-A.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-B.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-C.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-D.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-E.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-F.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-G.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-H.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-I.ll
 create mode 100644 final/test/CodeGen/X86/vec_set-J.ll
 create mode 100644 final/test/CodeGen/X86/vec_set.ll
 create mode 100644 final/test/CodeGen/X86/vec_sext.ll
 create mode 100644 final/test/CodeGen/X86/vec_shift.ll
 create mode 100644 final/test/CodeGen/X86/vec_shift2.ll
 create mode 100644 final/test/CodeGen/X86/vec_shift3.ll
 create mode 100644 final/test/CodeGen/X86/vec_shift4.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-11.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-14.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-15.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-16.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-17.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-18.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-19.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-20.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-22.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-23.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-24.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-25.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-26.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-27.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-28.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-30.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-31.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-34.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-35.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-36.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle-37.ll
 create mode 100644 final/test/CodeGen/X86/vec_shuffle.ll
 create mode 100644 final/test/CodeGen/X86/vec_splat-2.ll
 create mode 100644 final/test/CodeGen/X86/vec_splat-3.ll
 create mode 100644 final/test/CodeGen/X86/vec_splat-4.ll
 create mode 100644 final/test/CodeGen/X86/vec_splat.ll
 create mode 100644 final/test/CodeGen/X86/vec_ss_load_fold.ll
 create mode 100644 final/test/CodeGen/X86/vec_zero-2.ll
 create mode 100644 final/test/CodeGen/X86/vec_zero.ll
 create mode 100644 final/test/CodeGen/X86/vec_zero_cse.ll
 create mode 100644 final/test/CodeGen/X86/vec_zext.ll
 create mode 100644 final/test/CodeGen/X86/vector-intrinsics.ll
 create mode 100644 final/test/CodeGen/X86/vector-rem.ll
 create mode 100644 final/test/CodeGen/X86/vector-variable-idx.ll
 create mode 100644 final/test/CodeGen/X86/vector.ll
 create mode 100644 final/test/CodeGen/X86/vfcmp.ll
 create mode 100644 final/test/CodeGen/X86/visibility.ll
 create mode 100644 final/test/CodeGen/X86/volatile.ll
 create mode 100644 final/test/CodeGen/X86/vortex-bug.ll
 create mode 100644 final/test/CodeGen/X86/vshift-1.ll
 create mode 100644 final/test/CodeGen/X86/vshift-2.ll
 create mode 100644 final/test/CodeGen/X86/vshift-3.ll
 create mode 100644 final/test/CodeGen/X86/vshift-4.ll
 create mode 100644 final/test/CodeGen/X86/vshift-5.ll
 create mode 100644 final/test/CodeGen/X86/vshift_scalar.ll
 create mode 100644 final/test/CodeGen/X86/vshift_split.ll
 create mode 100644 final/test/CodeGen/X86/vshift_split2.ll
 create mode 100644 final/test/CodeGen/X86/vsplit-and.ll
 create mode 100644 final/test/CodeGen/X86/weak.ll
 create mode 100644 final/test/CodeGen/X86/wide-integer-fold.ll
 create mode 100644 final/test/CodeGen/X86/widen_arith-1.ll
 create mode 100644 final/test/CodeGen/X86/widen_arith-2.ll
 create mode 100644 final/test/CodeGen/X86/widen_arith-3.ll
 create mode 100644 final/test/CodeGen/X86/widen_arith-4.ll
 create mode 100644 final/test/CodeGen/X86/widen_arith-5.ll
 create mode 100644 final/test/CodeGen/X86/widen_arith-6.ll
 create mode 100644 final/test/CodeGen/X86/widen_cast-1.ll
 create mode 100644 final/test/CodeGen/X86/widen_cast-2.ll
 create mode 100644 final/test/CodeGen/X86/widen_cast-3.ll
 create mode 100644 final/test/CodeGen/X86/widen_cast-4.ll
 create mode 100644 final/test/CodeGen/X86/widen_cast-5.ll
 create mode 100644 final/test/CodeGen/X86/widen_cast-6.ll
 create mode 100644 final/test/CodeGen/X86/widen_conv-1.ll
 create mode 100644 final/test/CodeGen/X86/widen_conv-2.ll
 create mode 100644 final/test/CodeGen/X86/widen_conv-3.ll
 create mode 100644 final/test/CodeGen/X86/widen_conv-4.ll
 create mode 100644 final/test/CodeGen/X86/widen_extract-1.ll
 create mode 100644 final/test/CodeGen/X86/widen_load-0.ll
 create mode 100644 final/test/CodeGen/X86/widen_load-1.ll
 create mode 100644 final/test/CodeGen/X86/widen_load-2.ll
 create mode 100644 final/test/CodeGen/X86/widen_shuffle-1.ll
 create mode 100644 final/test/CodeGen/X86/win64_params.ll
 create mode 100644 final/test/CodeGen/X86/win64_vararg.ll
 create mode 100644 final/test/CodeGen/X86/win_chkstk.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-and-mask.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-arg.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-asm.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-disp.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-extend-shift.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-frameaddr.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-gv-offset.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-jumps.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-malloc.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-mem.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-pic-1.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-pic-10.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-pic-11.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-pic-2.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-pic-3.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-pic-4.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-pic-5.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-pic-6.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-pic-7.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-pic-8.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-pic-9.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-ret0.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-shortint.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-sret-return.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-tls-1.ll
 create mode 100644 final/test/CodeGen/X86/x86-64-varargs.ll
 create mode 100644 final/test/CodeGen/X86/x86-frameaddr.ll
 create mode 100644 final/test/CodeGen/X86/x86-frameaddr2.ll
 create mode 100644 final/test/CodeGen/X86/x86-store-gv-addr.ll
 create mode 100644 final/test/CodeGen/X86/x86_64-mul-by-const.ll
 create mode 100644 final/test/CodeGen/X86/xmm-r64.ll
 create mode 100644 final/test/CodeGen/X86/xor-icmp.ll
 create mode 100644 final/test/CodeGen/X86/xor.ll
 create mode 100644 final/test/CodeGen/X86/zero-remat.ll
 create mode 100644 final/test/CodeGen/X86/zext-extract_subreg.ll
 create mode 100644 final/test/CodeGen/X86/zext-inreg-0.ll
 create mode 100644 final/test/CodeGen/X86/zext-inreg-1.ll
 create mode 100644 final/test/CodeGen/X86/zext-sext.ll
 create mode 100644 final/test/CodeGen/X86/zext-shl.ll
 create mode 100644 final/test/CodeGen/X86/zext-trunc.ll
 create mode 100644 final/test/CodeGen/XCore/2008-11-17-Shl64.ll
 create mode 100644 final/test/CodeGen/XCore/2009-01-08-Crash.ll
 create mode 100644 final/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
 create mode 100644 final/test/CodeGen/XCore/2009-03-27-v2f64-param.ll
 create mode 100644 final/test/CodeGen/XCore/2009-07-15-store192.ll
 create mode 100644 final/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll
 create mode 100644 final/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
 create mode 100644 final/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll
 create mode 100644 final/test/CodeGen/XCore/addsub64.ll
 create mode 100644 final/test/CodeGen/XCore/ashr.ll
 create mode 100644 final/test/CodeGen/XCore/basictest.ll
 create mode 100644 final/test/CodeGen/XCore/bigstructret.ll
 create mode 100644 final/test/CodeGen/XCore/bitrev.ll
 create mode 100644 final/test/CodeGen/XCore/constants.ll
 create mode 100644 final/test/CodeGen/XCore/cos.ll
 create mode 100644 final/test/CodeGen/XCore/dg.exp
 create mode 100644 final/test/CodeGen/XCore/events.ll
 create mode 100644 final/test/CodeGen/XCore/exp.ll
 create mode 100644 final/test/CodeGen/XCore/exp2.ll
 create mode 100644 final/test/CodeGen/XCore/fneg.ll
 create mode 100644 final/test/CodeGen/XCore/getid.ll
 create mode 100644 final/test/CodeGen/XCore/globals.ll
 create mode 100644 final/test/CodeGen/XCore/indirectbr.ll
 create mode 100644 final/test/CodeGen/XCore/ladd_lsub_combine.ll
 create mode 100644 final/test/CodeGen/XCore/load.ll
 create mode 100644 final/test/CodeGen/XCore/log.ll
 create mode 100644 final/test/CodeGen/XCore/log10.ll
 create mode 100644 final/test/CodeGen/XCore/log2.ll
 create mode 100644 final/test/CodeGen/XCore/mul64.ll
 create mode 100644 final/test/CodeGen/XCore/pow.ll
 create mode 100644 final/test/CodeGen/XCore/powi.ll
 create mode 100644 final/test/CodeGen/XCore/private.ll
 create mode 100644 final/test/CodeGen/XCore/resources.ll
 create mode 100644 final/test/CodeGen/XCore/sext.ll
 create mode 100644 final/test/CodeGen/XCore/sin.ll
 create mode 100644 final/test/CodeGen/XCore/sqrt.ll
 create mode 100644 final/test/CodeGen/XCore/store.ll
 create mode 100644 final/test/CodeGen/XCore/switch.ll
 create mode 100644 final/test/CodeGen/XCore/switch_long.ll
 create mode 100644 final/test/CodeGen/XCore/tls.ll
 create mode 100644 final/test/CodeGen/XCore/trampoline.ll
 create mode 100644 final/test/CodeGen/XCore/trap.ll
 create mode 100644 final/test/CodeGen/XCore/unaligned_load.ll
 create mode 100644 final/test/CodeGen/XCore/unaligned_store.ll
 create mode 100644 final/test/CodeGen/XCore/unaligned_store_combine.ll
 create mode 100644 final/test/DebugInfo/2009-01-15-dbg_declare.ll
 create mode 100644 final/test/DebugInfo/2009-01-15-member.ll
 create mode 100644 final/test/DebugInfo/2009-10-08-DebugInfo-NullGlobalVariable.ll
 create mode 100644 final/test/DebugInfo/2009-10-16-Phi.ll
 create mode 100644 final/test/DebugInfo/2009-11-03-InsertExtractValue.ll
 create mode 100644 final/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
 create mode 100644 final/test/DebugInfo/2009-11-06-InvalidDerivedType.ll
 create mode 100644 final/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
 create mode 100644 final/test/DebugInfo/2009-11-10-CurrentFn.ll
 create mode 100644 final/test/DebugInfo/2009-11-10-ParentScope.ll
 create mode 100644 final/test/DebugInfo/2010-01-05-DbgScope.ll
 create mode 100644 final/test/DebugInfo/2010-01-19-DbgScope.ll
 create mode 100644 final/test/DebugInfo/2010-03-12-llc-crash.ll
 create mode 100644 final/test/DebugInfo/2010-03-19-DbgDeclare.ll
 create mode 100644 final/test/DebugInfo/2010-03-24-MemberFn.ll
 create mode 100644 final/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
 create mode 100644 final/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
 create mode 100644 final/test/DebugInfo/2010-04-13-PubType.ll
 create mode 100644 final/test/DebugInfo/2010-04-19-FramePtr.ll
 create mode 100644 final/test/DebugInfo/2010-04-25-CU-entry_pc.ll
 create mode 100644 final/test/DebugInfo/2010-05-03-DisableFramePtr.ll
 create mode 100644 final/test/DebugInfo/2010-05-03-OriginDIE.ll
 create mode 100644 final/test/DebugInfo/2010-05-10-MultipleCU.ll
 create mode 100644 final/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
 create mode 100644 final/test/DebugInfo/2010-07-19-Crash.ll
 create mode 100644 final/test/DebugInfo/2010-10-01-crash.ll
 create mode 100644 final/test/DebugInfo/dg.exp
 create mode 100644 final/test/DebugInfo/inheritance.ll
 create mode 100644 final/test/DebugInfo/printdbginfo2.ll
 create mode 100644 final/test/ExecutionEngine/2002-12-16-ArgTest.ll
 create mode 100644 final/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
 create mode 100644 final/test/ExecutionEngine/2003-01-04-LoopTest.ll
 create mode 100644 final/test/ExecutionEngine/2003-01-04-PhiTest.ll
 create mode 100644 final/test/ExecutionEngine/2003-01-09-SARTest.ll
 create mode 100644 final/test/ExecutionEngine/2003-01-10-FUCOM.ll
 create mode 100644 final/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
 create mode 100644 final/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
 create mode 100644 final/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
 create mode 100644 final/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
 create mode 100644 final/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
 create mode 100644 final/test/ExecutionEngine/2003-06-05-PHIBug.ll
 create mode 100644 final/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
 create mode 100644 final/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
 create mode 100644 final/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
 create mode 100644 final/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
 create mode 100644 final/test/ExecutionEngine/2005-12-02-TailCallBug.ll
 create mode 100644 final/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll
 create mode 100644 final/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
 create mode 100644 final/test/ExecutionEngine/2010-01-15-UndefValue.ll
 create mode 100644 final/test/ExecutionEngine/dg.exp
 create mode 100644 final/test/ExecutionEngine/fpbitcast.ll
 create mode 100644 final/test/ExecutionEngine/hello.ll
 create mode 100644 final/test/ExecutionEngine/hello2.ll
 create mode 100644 final/test/ExecutionEngine/simplesttest.ll
 create mode 100644 final/test/ExecutionEngine/simpletest.ll
 create mode 100644 final/test/ExecutionEngine/stubs.ll
 create mode 100644 final/test/ExecutionEngine/test-arith.ll
 create mode 100644 final/test/ExecutionEngine/test-branch.ll
 create mode 100644 final/test/ExecutionEngine/test-call.ll
 create mode 100644 final/test/ExecutionEngine/test-cast.ll
 create mode 100644 final/test/ExecutionEngine/test-constantexpr.ll
 create mode 100644 final/test/ExecutionEngine/test-fp.ll
 create mode 100644 final/test/ExecutionEngine/test-loadstore.ll
 create mode 100644 final/test/ExecutionEngine/test-logical.ll
 create mode 100644 final/test/ExecutionEngine/test-loop.ll
 create mode 100644 final/test/ExecutionEngine/test-malloc.ll
 create mode 100644 final/test/ExecutionEngine/test-phi.ll
 create mode 100644 final/test/ExecutionEngine/test-ret.ll
 create mode 100644 final/test/ExecutionEngine/test-setcond-fp.ll
 create mode 100644 final/test/ExecutionEngine/test-setcond-int.ll
 create mode 100644 final/test/ExecutionEngine/test-shift.ll
 create mode 100644 final/test/Feature/NamedMDNode.ll
 create mode 100644 final/test/Feature/NamedMDNode2.ll
 create mode 100644 final/test/Feature/README.txt
 create mode 100644 final/test/Feature/aliases.ll
 create mode 100644 final/test/Feature/alignment.ll
 create mode 100644 final/test/Feature/basictest.ll
 create mode 100644 final/test/Feature/callingconventions.ll
 create mode 100644 final/test/Feature/calltest.ll
 create mode 100644 final/test/Feature/casttest.ll
 create mode 100644 final/test/Feature/cfgstructures.ll
 create mode 100644 final/test/Feature/constexpr.ll
 create mode 100644 final/test/Feature/constpointer.ll
 create mode 100644 final/test/Feature/dg.exp
 create mode 100644 final/test/Feature/escaped_label.ll
 create mode 100644 final/test/Feature/float.ll
 create mode 100644 final/test/Feature/fold-fpcast.ll
 create mode 100644 final/test/Feature/forwardreftest.ll
 create mode 100644 final/test/Feature/global_section.ll
 create mode 100644 final/test/Feature/globalredefinition.ll
 create mode 100644 final/test/Feature/globalredefinition3.ll
 create mode 100644 final/test/Feature/globalvars.ll
 create mode 100644 final/test/Feature/indirectcall.ll
 create mode 100644 final/test/Feature/indirectcall2.ll
 create mode 100644 final/test/Feature/inlineasm.ll
 create mode 100644 final/test/Feature/instructions.ll
 create mode 100644 final/test/Feature/intrinsics.ll
 create mode 100644 final/test/Feature/linker_private_linkages.ll
 create mode 100644 final/test/Feature/llvm2cpp.exp
 create mode 100644 final/test/Feature/load_module.ll
 create mode 100644 final/test/Feature/md_on_instruction.ll
 create mode 100644 final/test/Feature/memorymarkers.ll
 create mode 100644 final/test/Feature/metadata.ll
 create mode 100644 final/test/Feature/newcasts.ll
 create mode 100644 final/test/Feature/noalias-ret.ll
 create mode 100644 final/test/Feature/opaquetypes.ll
 create mode 100644 final/test/Feature/packed.ll
 create mode 100644 final/test/Feature/packed_struct.ll
 create mode 100644 final/test/Feature/paramattrs.ll
 create mode 100644 final/test/Feature/ppcld.ll
 create mode 100644 final/test/Feature/properties.ll
 create mode 100644 final/test/Feature/prototype.ll
 create mode 100644 final/test/Feature/recursivetype.ll
 create mode 100644 final/test/Feature/simplecalltest.ll
 create mode 100644 final/test/Feature/small.ll
 create mode 100644 final/test/Feature/smallest.ll
 create mode 100644 final/test/Feature/sparcld.ll
 create mode 100644 final/test/Feature/terminators.ll
 create mode 100644 final/test/Feature/testalloca.ll
 create mode 100644 final/test/Feature/testconstants.ll
 create mode 100644 final/test/Feature/testlogical.ll
 create mode 100644 final/test/Feature/testmemory.ll
 create mode 100644 final/test/Feature/testtype.ll
 create mode 100644 final/test/Feature/testvarargs.ll
 create mode 100644 final/test/Feature/undefined.ll
 create mode 100644 final/test/Feature/unreachable.ll
 create mode 100644 final/test/Feature/varargs.ll
 create mode 100644 final/test/Feature/varargs_new.ll
 create mode 100644 final/test/Feature/vector-cast-constant-exprs.ll
 create mode 100644 final/test/Feature/weak_constant.ll
 create mode 100644 final/test/Feature/weirdnames.ll
 create mode 100644 final/test/Feature/x86ld.ll
 create mode 100644 final/test/FrontendAda/Support/element_copy.ads
 create mode 100644 final/test/FrontendAda/Support/fat_fields.ads
 create mode 100644 final/test/FrontendAda/Support/global_constant.ads
 create mode 100644 final/test/FrontendAda/Support/non_lvalue.ads
 create mode 100644 final/test/FrontendAda/Support/real_cst.ads
 create mode 100644 final/test/FrontendAda/Support/unc_constructor.ads
 create mode 100644 final/test/FrontendAda/Support/var_offset.ads
 create mode 100644 final/test/FrontendAda/Support/var_size.ads
 create mode 100644 final/test/FrontendAda/array_constructor.adb
 create mode 100644 final/test/FrontendAda/array_range_ref.adb
 create mode 100644 final/test/FrontendAda/array_ref.adb
 create mode 100644 final/test/FrontendAda/array_size.adb
 create mode 100644 final/test/FrontendAda/asm.adb
 create mode 100644 final/test/FrontendAda/constant_fold.ads
 create mode 100644 final/test/FrontendAda/debug_var_size.ads
 create mode 100644 final/test/FrontendAda/dg.exp
 create mode 100644 final/test/FrontendAda/element_copy.adb
 create mode 100644 final/test/FrontendAda/emit_var.ads
 create mode 100644 final/test/FrontendAda/fat_fields.adb
 create mode 100644 final/test/FrontendAda/field_order.ads
 create mode 100644 final/test/FrontendAda/global_constant.adb
 create mode 100644 final/test/FrontendAda/init_size.ads
 create mode 100644 final/test/FrontendAda/negative_field_offset.adb
 create mode 100644 final/test/FrontendAda/non_bitfield.ads
 create mode 100644 final/test/FrontendAda/non_lvalue.adb
 create mode 100644 final/test/FrontendAda/placeholder.adb
 create mode 100644 final/test/FrontendAda/real_cst.adb
 create mode 100644 final/test/FrontendAda/switch.adb
 create mode 100644 final/test/FrontendAda/unc_constructor.adb
 create mode 100644 final/test/FrontendAda/var_offset.adb
 create mode 100644 final/test/FrontendAda/var_size.adb
 create mode 100644 final/test/FrontendAda/vce.adb
 create mode 100644 final/test/FrontendAda/vce_lv.adb
 create mode 100644 final/test/FrontendC++/2003-11-02-WeakLinkage.cpp
 create mode 100644 final/test/FrontendC++/2003-11-18-PtrMemConstantInitializer.cpp
 create mode 100644 final/test/FrontendC++/2003-11-25-ReturningOpaqueByValue.cpp
 create mode 100644 final/test/FrontendC++/2003-11-27-MultipleInheritanceThunk.cpp
 create mode 100644 final/test/FrontendC++/2003-11-29-DuplicatedCleanupTest.cpp
 create mode 100644 final/test/FrontendC++/2003-12-08-ArrayOfPtrToMemberFunc.cpp
 create mode 100644 final/test/FrontendC++/2004-01-11-DynamicInitializedConstant.cpp
 create mode 100644 final/test/FrontendC++/2004-03-08-ReinterpretCastCopy.cpp
 create mode 100644 final/test/FrontendC++/2004-03-09-UnmangledBuiltinMethods.cpp
 create mode 100644 final/test/FrontendC++/2004-03-15-CleanupsAndGotos.cpp
 create mode 100644 final/test/FrontendC++/2004-06-08-LateTemplateInstantiation.cpp
 create mode 100644 final/test/FrontendC++/2004-09-27-CompilerCrash.cpp
 create mode 100644 final/test/FrontendC++/2004-09-27-DidntEmitTemplate.cpp
 create mode 100644 final/test/FrontendC++/2004-11-27-EmitsUnusedInlineFunctions.cpp
 create mode 100644 final/test/FrontendC++/2004-11-27-ExceptionCleanupAssertion.cpp
 create mode 100644 final/test/FrontendC++/2004-11-27-FriendDefaultArgCrash.cpp
 create mode 100644 final/test/FrontendC++/2004-11-27-InlineAsmFunctionRedefinition.cpp
 create mode 100644 final/test/FrontendC++/2005-01-03-StaticInitializers.cpp
 create mode 100644 final/test/FrontendC++/2005-02-11-AnonymousUnion.cpp
 create mode 100644 final/test/FrontendC++/2005-02-13-BadDynamicInit.cpp
 create mode 100644 final/test/FrontendC++/2005-02-14-BitFieldOffset.cpp
 create mode 100644 final/test/FrontendC++/2005-02-19-BitfieldStructCrash.cpp
 create mode 100644 final/test/FrontendC++/2005-02-19-UnnamedVirtualThunkArgument.cpp
 create mode 100644 final/test/FrontendC++/2005-02-20-BrokenReferenceTest.cpp
 create mode 100644 final/test/FrontendC++/2005-02-27-PlacementArrayNewCrash.cpp
 create mode 100644 final/test/FrontendC++/2005-07-21-VirtualBaseAccess.cpp
 create mode 100644 final/test/FrontendC++/2006-03-01-GimplifyCrash.cpp
 create mode 100644 final/test/FrontendC++/2006-03-06-C++RecurseCrash.cpp
 create mode 100644 final/test/FrontendC++/2006-09-08-powi.cpp
 create mode 100644 final/test/FrontendC++/2006-09-12-OpaqueStructCrash.cpp
 create mode 100644 final/test/FrontendC++/2006-09-27-Debug-Protection.cpp
 create mode 100644 final/test/FrontendC++/2006-10-30-ClassBitfield.cpp
 create mode 100644 final/test/FrontendC++/2006-11-06-StackTrace.cpp
 create mode 100644 final/test/FrontendC++/2006-11-20-GlobalSymbols.cpp
 create mode 100644 final/test/FrontendC++/2006-11-30-ConstantExprCrash.cpp
 create mode 100644 final/test/FrontendC++/2006-11-30-Pubnames.cpp
 create mode 100644 final/test/FrontendC++/2007-01-02-UnboundedArray.cpp
 create mode 100644 final/test/FrontendC++/2007-01-06-ELF-Thunk-Sections.cpp
 create mode 100644 final/test/FrontendC++/2007-01-06-PtrMethodInit.cpp
 create mode 100644 final/test/FrontendC++/2007-03-27-FunctionVarRename.cpp
 create mode 100644 final/test/FrontendC++/2007-04-05-PackedBitFields-1.cpp
 create mode 100644 final/test/FrontendC++/2007-04-05-PackedBitFieldsOverlap-2.cpp
 create mode 100644 final/test/FrontendC++/2007-04-05-PackedBitFieldsOverlap.cpp
 create mode 100644 final/test/FrontendC++/2007-04-05-PackedBitFieldsSmall.cpp
 create mode 100644 final/test/FrontendC++/2007-04-05-StructPackedFieldUnpacked.cpp
 create mode 100644 final/test/FrontendC++/2007-04-10-PackedUnion.cpp
 create mode 100644 final/test/FrontendC++/2007-04-11-InlineStorageClassC++.cpp
 create mode 100644 final/test/FrontendC++/2007-04-14-FNoBuiltin.cpp
 create mode 100644 final/test/FrontendC++/2007-04-31-TryCatch.cpp
 create mode 100644 final/test/FrontendC++/2007-05-03-VectorInit.cpp
 create mode 100644 final/test/FrontendC++/2007-05-16-ReverseBitFieldCrash.cpp
 create mode 100644 final/test/FrontendC++/2007-05-23-TryFinally.cpp
 create mode 100644 final/test/FrontendC++/2007-07-04-NestedCatches.cpp
 create mode 100644 final/test/FrontendC++/2007-07-29-RestrictPtrArg.cpp
 create mode 100644 final/test/FrontendC++/2007-07-29-RestrictRefArg.cpp
 create mode 100644 final/test/FrontendC++/2007-08-01-RestrictMethod.cpp
 create mode 100644 final/test/FrontendC++/2007-09-10-RecursiveTypeResolution.cpp
 create mode 100644 final/test/FrontendC++/2007-10-01-StructResize.cpp
 create mode 100644 final/test/FrontendC++/2008-01-11-BadWarning.cpp
 create mode 100644 final/test/FrontendC++/2008-01-12-VecInit.cpp
 create mode 100644 final/test/FrontendC++/2008-05-07-CrazyOffsetOf.cpp
 create mode 100644 final/test/FrontendC++/2008-10-29-WrongOffset.cpp
 create mode 100644 final/test/FrontendC++/2009-02-07-VolatileArrayRefHack.cpp
 create mode 100644 final/test/FrontendC++/2009-02-16-CtorNames-dbg.cpp
 create mode 100644 final/test/FrontendC++/2009-03-17-dbg.cpp
 create mode 100644 final/test/FrontendC++/2009-04-21-DtorNames-dbg.cpp
 create mode 100644 final/test/FrontendC++/2009-04-23-bool2.cpp
 create mode 100644 final/test/FrontendC++/2009-05-04-PureConstNounwind.cpp
 create mode 100644 final/test/FrontendC++/2009-06-16-DebugInfoCrash.cpp
 create mode 100644 final/test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp
 create mode 100644 final/test/FrontendC++/2009-06-30-ByrefBlock.cpp
 create mode 100644 final/test/FrontendC++/2009-07-15-LineNumbers.cpp
 create mode 100644 final/test/FrontendC++/2009-07-16-PrivateCopyConstructor.cpp
 create mode 100644 final/test/FrontendC++/2009-07-16-Using.cpp
 create mode 100644 final/test/FrontendC++/2009-08-05-ZeroInitWidth.cpp
 create mode 100644 final/test/FrontendC++/2009-08-11-VectorRetTy.cpp
 create mode 100644 final/test/FrontendC++/2009-09-04-modify-crash.cpp
 create mode 100644 final/test/FrontendC++/2009-09-09-packed-layout.cpp
 create mode 100644 final/test/FrontendC++/2009-10-27-crash.cpp
 create mode 100644 final/test/FrontendC++/2009-12-23-MissingSext.cpp
 create mode 100644 final/test/FrontendC++/2010-02-17-DbgArtificialArg.cpp
 create mode 100644 final/test/FrontendC++/2010-03-22-empty-baseclass.cpp
 create mode 100644 final/test/FrontendC++/2010-04-30-OptimizedMethod-Dbg.cpp
 create mode 100644 final/test/FrontendC++/2010-05-10-Var-DbgInfo.cpp
 create mode 100644 final/test/FrontendC++/2010-05-11-alwaysinlineinstantiation.cpp
 create mode 100644 final/test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp
 create mode 100644 final/test/FrontendC++/2010-06-21-LocalVarDbg.cpp
 create mode 100644 final/test/FrontendC++/2010-06-22-BitfieldInit.cpp
 create mode 100644 final/test/FrontendC++/2010-06-22-ZeroBitfield.cpp
 create mode 100644 final/test/FrontendC++/2010-07-19-nowarn.cpp
 create mode 100644 final/test/FrontendC++/2010-07-23-DeclLoc.cpp
 create mode 100644 final/test/FrontendC++/2010-08-31-ByValArg.cpp
 create mode 100644 final/test/FrontendC++/alignstack.cpp
 create mode 100644 final/test/FrontendC++/dg.exp
 create mode 100644 final/test/FrontendC++/integration-O2.cpp
 create mode 100644 final/test/FrontendC++/m64-ptr.cpp
 create mode 100644 final/test/FrontendC++/member-alignment.cpp
 create mode 100644 final/test/FrontendC++/ptr-to-method-devirt.cpp
 create mode 100644 final/test/FrontendC++/thunk-linkonce-odr.cpp
 create mode 100644 final/test/FrontendC++/varargs.cpp
 create mode 100644 final/test/FrontendC++/weak-external.cpp
 create mode 100644 final/test/FrontendC++/x86-64-abi-sret-vs-2word-struct-param.cpp
 create mode 100644 final/test/FrontendC/2002-01-23-LoadQISIReloadFailure.c
 create mode 100644 final/test/FrontendC/2002-01-24-ComplexSpaceInType.c
 create mode 100644 final/test/FrontendC/2002-01-24-HandleCallInsnSEGV.c
 create mode 100644 final/test/FrontendC/2002-02-13-ConditionalInCall.c
 create mode 100644 final/test/FrontendC/2002-02-13-ReloadProblem.c
 create mode 100644 final/test/FrontendC/2002-02-13-TypeVarNameCollision.c
 create mode 100644 final/test/FrontendC/2002-02-13-UnnamedLocal.c
 create mode 100644 final/test/FrontendC/2002-02-14-EntryNodePreds.c
 create mode 100644 final/test/FrontendC/2002-02-16-RenamingTest.c
 create mode 100644 final/test/FrontendC/2002-02-17-ArgumentAddress.c
 create mode 100644 final/test/FrontendC/2002-02-18-64bitConstant.c
 create mode 100644 final/test/FrontendC/2002-02-18-StaticData.c
 create mode 100644 final/test/FrontendC/2002-03-11-LargeCharInString.c
 create mode 100644 final/test/FrontendC/2002-03-12-ArrayInitialization.c
 create mode 100644 final/test/FrontendC/2002-03-12-StructInitialize.c
 create mode 100644 final/test/FrontendC/2002-03-12-StructInitializer.c
 create mode 100644 final/test/FrontendC/2002-03-14-BrokenPHINode.c
 create mode 100644 final/test/FrontendC/2002-03-14-BrokenSSA.c
 create mode 100644 final/test/FrontendC/2002-03-14-QuotesInStrConst.c
 create mode 100644 final/test/FrontendC/2002-04-07-SwitchStmt.c
 create mode 100644 final/test/FrontendC/2002-04-08-LocalArray.c
 create mode 100644 final/test/FrontendC/2002-04-09-StructRetVal.c
 create mode 100644 final/test/FrontendC/2002-04-10-StructParameters.c
 create mode 100644 final/test/FrontendC/2002-05-23-StaticValues.c
 create mode 100644 final/test/FrontendC/2002-05-23-TypeNameCollision.c
 create mode 100644 final/test/FrontendC/2002-05-24-Alloca.c
 create mode 100644 final/test/FrontendC/2002-06-25-FWriteInterfaceFailure.c
 create mode 100644 final/test/FrontendC/2002-07-14-MiscListTests.c
 create mode 100644 final/test/FrontendC/2002-07-14-MiscTests.c
 create mode 100644 final/test/FrontendC/2002-07-14-MiscTests2.c
 create mode 100644 final/test/FrontendC/2002-07-14-MiscTests3.c
 create mode 100644 final/test/FrontendC/2002-07-16-HardStringInit.c
 create mode 100644 final/test/FrontendC/2002-07-17-StringConstant.c
 create mode 100644 final/test/FrontendC/2002-07-29-Casts.c
 create mode 100644 final/test/FrontendC/2002-07-30-SubregSetAssertion.c
 create mode 100644 final/test/FrontendC/2002-07-30-UnionTest.c
 create mode 100644 final/test/FrontendC/2002-07-30-VarArgsCallFailure.c
 create mode 100644 final/test/FrontendC/2002-07-31-BadAssert.c
 create mode 100644 final/test/FrontendC/2002-07-31-SubregFailure.c
 create mode 100644 final/test/FrontendC/2002-08-02-UnionTest.c
 create mode 100644 final/test/FrontendC/2002-08-19-RecursiveLocals.c
 create mode 100644 final/test/FrontendC/2002-09-08-PointerShifts.c
 create mode 100644 final/test/FrontendC/2002-09-18-UnionProblem.c
 create mode 100644 final/test/FrontendC/2002-09-19-StarInLabel.c
 create mode 100644 final/test/FrontendC/2002-10-12-TooManyArguments.c
 create mode 100644 final/test/FrontendC/2002-12-15-GlobalBoolTest.c
 create mode 100644 final/test/FrontendC/2002-12-15-GlobalConstantTest.c
 create mode 100644 final/test/FrontendC/2002-12-15-GlobalRedefinition.c
 create mode 100644 final/test/FrontendC/2002-12-15-StructParameters.c
 create mode 100644 final/test/FrontendC/2003-01-30-UnionInit.c
 create mode 100644 final/test/FrontendC/2003-03-03-DeferredType.c
 create mode 100644 final/test/FrontendC/2003-06-22-UnionCrash.c
 create mode 100644 final/test/FrontendC/2003-06-23-GCC-fold-infinite-recursion.c
 create mode 100644 final/test/FrontendC/2003-06-26-CFECrash.c
 create mode 100644 final/test/FrontendC/2003-06-29-MultipleFunctionDefinition.c
 create mode 100644 final/test/FrontendC/2003-07-22-ArrayAccessTypeSafety.c
 create mode 100644 final/test/FrontendC/2003-08-06-BuiltinSetjmpLongjmp.c
 create mode 100644 final/test/FrontendC/2003-08-17-DeadCodeShortCircuit.c
 create mode 100644 final/test/FrontendC/2003-08-18-SigSetJmp.c
 create mode 100644 final/test/FrontendC/2003-08-18-StructAsValue.c
 create mode 100644 final/test/FrontendC/2003-08-20-BadBitfieldRef.c
 create mode 100644 final/test/FrontendC/2003-08-20-PrototypeMismatch.c
 create mode 100644 final/test/FrontendC/2003-08-20-vfork-bug.c
 create mode 100644 final/test/FrontendC/2003-08-21-BinOp-Type-Mismatch.c
 create mode 100644 final/test/FrontendC/2003-08-21-StmtExpr.c
 create mode 100644 final/test/FrontendC/2003-08-21-WideString.c
 create mode 100644 final/test/FrontendC/2003-08-23-LocalUnionTest.c
 create mode 100644 final/test/FrontendC/2003-08-29-BitFieldStruct.c
 create mode 100644 final/test/FrontendC/2003-08-29-HugeCharConst.c
 create mode 100644 final/test/FrontendC/2003-08-29-StructLayoutBug.c
 create mode 100644 final/test/FrontendC/2003-08-30-AggregateInitializer.c
 create mode 100644 final/test/FrontendC/2003-08-30-LargeIntegerBitfieldMember.c
 create mode 100644 final/test/FrontendC/2003-09-18-BitfieldTests.c
 create mode 100644 final/test/FrontendC/2003-09-30-StructLayout.c
 create mode 100644 final/test/FrontendC/2003-10-02-UnionLValueError.c
 create mode 100644 final/test/FrontendC/2003-10-06-NegateExprType.c
 create mode 100644 final/test/FrontendC/2003-10-09-UnionInitializerBug.c
 create mode 100644 final/test/FrontendC/2003-10-28-ident.c
 create mode 100644 final/test/FrontendC/2003-10-29-AsmRename.c
 create mode 100644 final/test/FrontendC/2003-11-01-C99-CompoundLiteral.c
 create mode 100644 final/test/FrontendC/2003-11-01-EmptyStructCrash.c
 create mode 100644 final/test/FrontendC/2003-11-01-GlobalUnionInit.c
 create mode 100644 final/test/FrontendC/2003-11-03-AddrArrayElement.c
 create mode 100644 final/test/FrontendC/2003-11-04-EmptyStruct.c
 create mode 100644 final/test/FrontendC/2003-11-04-OutOfMemory.c
 create mode 100644 final/test/FrontendC/2003-11-08-PointerSubNotGetelementptr.c
 create mode 100644 final/test/FrontendC/2003-11-12-VoidString.c
 create mode 100644 final/test/FrontendC/2003-11-13-TypeSafety.c
 create mode 100644 final/test/FrontendC/2003-11-16-StaticArrayInit.c
 create mode 100644 final/test/FrontendC/2003-11-18-CondExprLValue.c
 create mode 100644 final/test/FrontendC/2003-11-19-AddressOfRegister.c
 create mode 100644 final/test/FrontendC/2003-11-19-BitFieldArray.c
 create mode 100644 final/test/FrontendC/2003-11-20-Bitfields.c
 create mode 100644 final/test/FrontendC/2003-11-20-ComplexDivision.c
 create mode 100644 final/test/FrontendC/2003-11-20-UnionBitfield.c
 create mode 100644 final/test/FrontendC/2003-11-26-PointerShift.c
 create mode 100644 final/test/FrontendC/2003-11-27-ConstructorCast.c
 create mode 100644 final/test/FrontendC/2003-11-27-UnionCtorInitialization.c
 create mode 100644 final/test/FrontendC/2003-12-14-ExternInlineSupport.c
 create mode 100644 final/test/FrontendC/2004-01-01-UnknownInitSize.c
 create mode 100644 final/test/FrontendC/2004-01-08-ExternInlineRedefine.c
 create mode 100644 final/test/FrontendC/2004-02-12-LargeAggregateCopy.c
 create mode 100644 final/test/FrontendC/2004-02-13-BuiltinFrameReturnAddress.c
 create mode 100644 final/test/FrontendC/2004-02-13-IllegalVararg.c
 create mode 100644 final/test/FrontendC/2004-02-13-Memset.c
 create mode 100644 final/test/FrontendC/2004-02-14-ZeroInitializer.c
 create mode 100644 final/test/FrontendC/2004-02-20-Builtins.c
 create mode 100644 final/test/FrontendC/2004-03-07-ComplexDivEquals.c
 create mode 100644 final/test/FrontendC/2004-03-07-ExternalConstant.c
 create mode 100644 final/test/FrontendC/2004-03-09-LargeArrayInitializers.c
 create mode 100644 final/test/FrontendC/2004-03-15-SimpleIndirectGoto.c
 create mode 100644 final/test/FrontendC/2004-03-16-AsmRegisterCrash.c
 create mode 100644 final/test/FrontendC/2004-05-07-VarArrays.c
 create mode 100644 final/test/FrontendC/2004-05-21-IncompleteEnum.c
 create mode 100644 final/test/FrontendC/2004-06-08-OpaqueStructArg.c
 create mode 100644 final/test/FrontendC/2004-06-17-UnorderedBuiltins.c
 create mode 100644 final/test/FrontendC/2004-06-17-UnorderedCompares.c
 create mode 100644 final/test/FrontendC/2004-06-18-VariableLengthArrayOfStructures.c
 create mode 100644 final/test/FrontendC/2004-07-06-FunctionCast.c
 create mode 100644 final/test/FrontendC/2004-08-06-LargeStructTest.c
 create mode 100644 final/test/FrontendC/2004-11-25-UnnamedBitfieldPadding.c
 create mode 100644 final/test/FrontendC/2004-11-27-InvalidConstantExpr.c
 create mode 100644 final/test/FrontendC/2004-11-27-StaticFunctionRedeclare.c
 create mode 100644 final/test/FrontendC/2004-11-27-VariableSizeInStructure.c
 create mode 100644 final/test/FrontendC/2005-01-02-ConstantInits.c
 create mode 100644 final/test/FrontendC/2005-01-02-PointerDifference.c
 create mode 100644 final/test/FrontendC/2005-01-02-VAArgError-ICE.c
 create mode 100644 final/test/FrontendC/2005-02-20-AggregateSAVEEXPR.c
 create mode 100644 final/test/FrontendC/2005-02-27-MarkGlobalConstant.c
 create mode 100644 final/test/FrontendC/2005-03-05-OffsetOfHack.c
 create mode 100644 final/test/FrontendC/2005-03-06-OffsetOfStructCrash.c
 create mode 100644 final/test/FrontendC/2005-03-11-Prefetch.c
 create mode 100644 final/test/FrontendC/2005-04-09-ComplexOps.c
 create mode 100644 final/test/FrontendC/2005-05-06-CountBuiltins.c
 create mode 100644 final/test/FrontendC/2005-05-10-GlobalUnionInit.c
 create mode 100644 final/test/FrontendC/2005-06-15-ExpandGotoInternalProblem.c
 create mode 100644 final/test/FrontendC/2005-07-20-SqrtNoErrno.c
 create mode 100644 final/test/FrontendC/2005-07-26-UnionInitCrash.c
 create mode 100644 final/test/FrontendC/2005-07-28-IncorrectWeakGlobal.c
 create mode 100644 final/test/FrontendC/2005-09-20-ComplexConstants.c
 create mode 100644 final/test/FrontendC/2005-09-24-AsmUserPrefix.c
 create mode 100644 final/test/FrontendC/2005-09-24-BitFieldCrash.c
 create mode 100644 final/test/FrontendC/2005-10-18-VariableSizedElementCrash.c
 create mode 100644 final/test/FrontendC/2005-12-04-AttributeUsed.c
 create mode 100644 final/test/FrontendC/2005-12-04-DeclarationLineNumbers.c
 create mode 100644 final/test/FrontendC/2006-01-13-Includes.c
 create mode 100644 final/test/FrontendC/2006-01-13-StackSave.c
 create mode 100644 final/test/FrontendC/2006-01-16-BitCountIntrinsicsUnsigned.c
 create mode 100644 final/test/FrontendC/2006-01-23-FileScopeAsm.c
 create mode 100644 final/test/FrontendC/2006-03-03-MissingInitializer.c
 create mode 100644 final/test/FrontendC/2006-03-16-VectorCtor.c
 create mode 100644 final/test/FrontendC/2006-03-17-KnRMismatch.c
 create mode 100644 final/test/FrontendC/2006-05-01-AppleAlignmentPragma.c
 create mode 100644 final/test/FrontendC/2006-05-19-SingleEltReturn.c
 create mode 100644 final/test/FrontendC/2006-07-31-PR854.c
 create mode 100644 final/test/FrontendC/2006-09-11-BitfieldRefCrash.c
 create mode 100644 final/test/FrontendC/2006-09-18-fwrite-cast-crash.c
 create mode 100644 final/test/FrontendC/2006-09-21-IncompleteElementType.c
 create mode 100644 final/test/FrontendC/2006-09-25-DebugFilename.c
 create mode 100644 final/test/FrontendC/2006-09-25-DebugFilename.h
 create mode 100644 final/test/FrontendC/2006-09-28-SimpleAsm.c
 create mode 100644 final/test/FrontendC/2006-10-30-ArrayCrash.c
 create mode 100644 final/test/FrontendC/2006-12-14-ordered_expr.c
 create mode 100644 final/test/FrontendC/2007-01-06-KNR-Proto.c
 create mode 100644 final/test/FrontendC/2007-01-20-VectorICE.c
 create mode 100644 final/test/FrontendC/2007-01-24-InlineAsmCModifier.c
 create mode 100644 final/test/FrontendC/2007-02-04-AddrLValue-2.c
 create mode 100644 final/test/FrontendC/2007-02-04-AddrLValue.c
 create mode 100644 final/test/FrontendC/2007-02-04-EmptyStruct.c
 create mode 100644 final/test/FrontendC/2007-02-04-WITH_SIZE_EXPR.c
 create mode 100644 final/test/FrontendC/2007-02-05-nested.c
 create mode 100644 final/test/FrontendC/2007-02-07-AddrLabel.c
 create mode 100644 final/test/FrontendC/2007-02-16-VariableSizeStructArg.c
 create mode 100644 final/test/FrontendC/2007-02-16-VoidPtrDiff.c
 create mode 100644 final/test/FrontendC/2007-02-16-WritableStrings.c
 create mode 100644 final/test/FrontendC/2007-02-25-C-DotDotDot.c
 create mode 100644 final/test/FrontendC/2007-03-01-VarSizeArrayIdx.c
 create mode 100644 final/test/FrontendC/2007-03-05-DataLayout.c
 create mode 100644 final/test/FrontendC/2007-03-06-VarSizeInStruct1.c
 create mode 100644 final/test/FrontendC/2007-03-06-VarSizeInStruct2.c
 create mode 100644 final/test/FrontendC/2007-03-26-BitfieldAfterZeroWidth.c
 create mode 100644 final/test/FrontendC/2007-03-26-ZeroWidthBitfield.c
 create mode 100644 final/test/FrontendC/2007-03-27-ArrayCompatible.c
 create mode 100644 final/test/FrontendC/2007-03-27-VarLengthArray.c
 create mode 100644 final/test/FrontendC/2007-04-05-PackedBitFields-2.c
 create mode 100644 final/test/FrontendC/2007-04-05-PackedBitFields.c
 create mode 100644 final/test/FrontendC/2007-04-05-PackedStruct.c
 create mode 100644 final/test/FrontendC/2007-04-05-PadBeforeZeroLengthField.c
 create mode 100644 final/test/FrontendC/2007-04-05-UnPackedStruct.c
 create mode 100644 final/test/FrontendC/2007-04-11-InlineAsmStruct.c
 create mode 100644 final/test/FrontendC/2007-04-11-InlineAsmUnion.c
 create mode 100644 final/test/FrontendC/2007-04-11-InlineStorageClassC89.c
 create mode 100644 final/test/FrontendC/2007-04-11-InlineStorageClassC99.c
 create mode 100644 final/test/FrontendC/2007-04-11-PR1321.c
 create mode 100644 final/test/FrontendC/2007-04-13-InlineAsmStruct2.c
 create mode 100644 final/test/FrontendC/2007-04-13-InlineAsmUnion2.c
 create mode 100644 final/test/FrontendC/2007-04-14-FNoBuiltin.c
 create mode 100644 final/test/FrontendC/2007-04-17-ZeroSizeBitFields.c
 create mode 100644 final/test/FrontendC/2007-04-24-VolatileStructCopy.c
 create mode 100644 final/test/FrontendC/2007-04-24-bit-not-expr.c
 create mode 100644 final/test/FrontendC/2007-04-24-str-const.c
 create mode 100644 final/test/FrontendC/2007-05-07-NestedStructReturn.c
 create mode 100644 final/test/FrontendC/2007-05-07-PaddingElements.c
 create mode 100644 final/test/FrontendC/2007-05-08-PCH.c
 create mode 100644 final/test/FrontendC/2007-05-11-str-const.c
 create mode 100644 final/test/FrontendC/2007-05-15-PaddingElement.c
 create mode 100644 final/test/FrontendC/2007-05-16-EmptyStruct.c
 create mode 100644 final/test/FrontendC/2007-05-29-UnionCopy.c
 create mode 100644 final/test/FrontendC/2007-06-05-NoInlineAttribute.c
 create mode 100644 final/test/FrontendC/2007-06-15-AnnotateAttribute.c
 create mode 100644 final/test/FrontendC/2007-06-18-SextAttrAggregate.c
 create mode 100644 final/test/FrontendC/2007-07-29-RestrictPtrArg.c
 create mode 100644 final/test/FrontendC/2007-08-01-LoadStoreAlign.c
 create mode 100644 final/test/FrontendC/2007-08-21-ComplexCst.c
 create mode 100644 final/test/FrontendC/2007-08-22-CTTZ.c
 create mode 100644 final/test/FrontendC/2007-09-05-ConstCtor.c
 create mode 100644 final/test/FrontendC/2007-09-12-PragmaPack.c
 create mode 100644 final/test/FrontendC/2007-09-14-NegatePointer.c
 create mode 100644 final/test/FrontendC/2007-09-17-WeakRef.c
 create mode 100644 final/test/FrontendC/2007-09-20-GcrootAttribute.c
 create mode 100644 final/test/FrontendC/2007-09-26-Alignment.c
 create mode 100644 final/test/FrontendC/2007-09-27-ComplexIntCompare.c
 create mode 100644 final/test/FrontendC/2007-09-28-PackedUnionMember.c
 create mode 100644 final/test/FrontendC/2007-10-01-BuildArrayRef.c
 create mode 100644 final/test/FrontendC/2007-10-02-VolatileArray.c
 create mode 100644 final/test/FrontendC/2007-10-15-VoidPtr.c
 create mode 100644 final/test/FrontendC/2007-10-30-Volatile.c
 create mode 100644 final/test/FrontendC/2007-11-07-AlignedMemcpy.c
 create mode 100644 final/test/FrontendC/2007-11-07-CopyAggregateAlign.c
 create mode 100644 final/test/FrontendC/2007-11-07-ZeroAggregateAlign.c
 create mode 100644 final/test/FrontendC/2007-11-27-SExtZExt.c
 create mode 100644 final/test/FrontendC/2007-11-28-GlobalInitializer.c
 create mode 100644 final/test/FrontendC/2007-12-16-AsmNoUnwind.c
 create mode 100644 final/test/FrontendC/2007-12-VarArrayDebug.c
 create mode 100644 final/test/FrontendC/2008-01-04-WideBitfield.c
 create mode 100644 final/test/FrontendC/2008-01-07-UnusualIntSize.c
 create mode 100644 final/test/FrontendC/2008-01-11-ChainConsistency.c
 create mode 100644 final/test/FrontendC/2008-01-21-PackedBitFields.c
 create mode 100644 final/test/FrontendC/2008-01-21-PackedStructField.c
 create mode 100644 final/test/FrontendC/2008-01-24-StructAlignAndBitFields.c
 create mode 100644 final/test/FrontendC/2008-01-25-ByValReadNone.c
 create mode 100644 final/test/FrontendC/2008-01-25-ZeroSizedAggregate.c
 create mode 100644 final/test/FrontendC/2008-01-28-PragmaMark.c
 create mode 100644 final/test/FrontendC/2008-01-28-UnionSize.c
 create mode 100644 final/test/FrontendC/2008-02-11-AnnotateBuiltin.c
 create mode 100644 final/test/FrontendC/2008-03-03-CtorAttrType.c
 create mode 100644 final/test/FrontendC/2008-03-05-syncPtr.c
 create mode 100644 final/test/FrontendC/2008-03-24-BitField-And-Alloca.c
 create mode 100644 final/test/FrontendC/2008-03-26-PackedBitFields.c
 create mode 100644 final/test/FrontendC/2008-04-08-NoExceptions.c
 create mode 100644 final/test/FrontendC/2008-05-06-CFECrash.c
 create mode 100644 final/test/FrontendC/2008-05-12-TempUsedBeforeDef.c
 create mode 100644 final/test/FrontendC/2008-05-19-AlwaysInline.c
 create mode 100644 final/test/FrontendC/2008-07-08-FAbsAttributes.c
 create mode 100644 final/test/FrontendC/2008-07-29-EHLabel.ll
 create mode 100644 final/test/FrontendC/2008-08-07-AlignPadding1.c
 create mode 100644 final/test/FrontendC/2008-08-07-AlignPadding2.c
 create mode 100644 final/test/FrontendC/2008-08-07-GEPIntToPtr.c
 create mode 100644 final/test/FrontendC/2008-09-03-WeakAlias.c
 create mode 100644 final/test/FrontendC/2008-10-13-FrontendCrash.c
 create mode 100644 final/test/FrontendC/2008-10-30-ZeroPlacement.c
 create mode 100644 final/test/FrontendC/2008-11-02-WeakAlias.c
 create mode 100644 final/test/FrontendC/2008-11-08-InstCombineSelect.c
 create mode 100644 final/test/FrontendC/2008-11-11-AnnotateStructFieldAttribute.c
 create mode 100644 final/test/FrontendC/2008-12-23-AsmIntPointerTie.c
 create mode 100644 final/test/FrontendC/2009-01-05-BlockInlining.c
 create mode 100644 final/test/FrontendC/2009-01-20-k8.c
 create mode 100644 final/test/FrontendC/2009-01-21-InvalidIterator.c
 create mode 100644 final/test/FrontendC/2009-02-13-zerosize-union-field-ppc.c
 create mode 100644 final/test/FrontendC/2009-02-13-zerosize-union-field.c
 create mode 100644 final/test/FrontendC/2009-02-17-BitField-dbg.c
 create mode 100644 final/test/FrontendC/2009-03-01-MallocNoAlias.c
 create mode 100644 final/test/FrontendC/2009-03-08-ZeroEltStructCrash.c
 create mode 100644 final/test/FrontendC/2009-03-09-WeakDeclarations-1.c
 create mode 100644 final/test/FrontendC/2009-03-13-dbg.c
 create mode 100644 final/test/FrontendC/2009-04-22-UnknownSize.c
 create mode 100644 final/test/FrontendC/2009-04-28-UnionArrayCrash.c
 create mode 100644 final/test/FrontendC/2009-05-04-EnumInreg.c
 create mode 100644 final/test/FrontendC/2009-05-17-AlwaysInline.c
 create mode 100644 final/test/FrontendC/2009-06-14-HighlyAligned.c
 create mode 100644 final/test/FrontendC/2009-06-18-StaticInitTailPadPack.c
 create mode 100644 final/test/FrontendC/2009-07-14-VoidPtr.c
 create mode 100644 final/test/FrontendC/2009-07-15-pad-wchar_t-array.c
 create mode 100644 final/test/FrontendC/2009-07-17-VoidParameter.c
 create mode 100644 final/test/FrontendC/2009-07-22-StructLayout.c
 create mode 100644 final/test/FrontendC/2009-08-11-AsmBlocksComplexJumpTarget.c
 create mode 100644 final/test/FrontendC/2009-09-24-SqrtErrno.c
 create mode 100644 final/test/FrontendC/2009-12-07-BitFieldAlignment.c
 create mode 100644 final/test/FrontendC/2010-01-05-LinkageName.c
 create mode 100644 final/test/FrontendC/2010-01-13-MemBarrier.c
 create mode 100644 final/test/FrontendC/2010-01-14-FnType-DebugInfo.c
 create mode 100644 final/test/FrontendC/2010-01-14-StaticVariable.c
 create mode 100644 final/test/FrontendC/2010-01-18-Inlined-Debug.c
 create mode 100644 final/test/FrontendC/2010-02-10-PointerName.c
 create mode 100644 final/test/FrontendC/2010-02-15-DbgStaticVar.c
 create mode 100644 final/test/FrontendC/2010-02-16-DbgVarScope.c
 create mode 100644 final/test/FrontendC/2010-02-18-Dbg-VectorType.c
 create mode 100644 final/test/FrontendC/2010-03-10-arm-asmreg.c
 create mode 100644 final/test/FrontendC/2010-03-5-LexicalScope.c
 create mode 100644 final/test/FrontendC/2010-05-14-Optimized-VarType.c
 create mode 100644 final/test/FrontendC/2010-05-18-asmsched.c
 create mode 100644 final/test/FrontendC/2010-05-18-palignr.c
 create mode 100644 final/test/FrontendC/2010-05-26-AsmSideEffect.c
 create mode 100644 final/test/FrontendC/2010-05-31-palignr.c
 create mode 100644 final/test/FrontendC/2010-06-11-SaveExpr.c
 create mode 100644 final/test/FrontendC/2010-06-17-asmcrash.c
 create mode 100644 final/test/FrontendC/2010-06-28-DbgLocalVar.c
 create mode 100644 final/test/FrontendC/2010-06-28-nowarn.c
 create mode 100644 final/test/FrontendC/2010-07-08-DeclDebugLineNo.c
 create mode 100644 final/test/FrontendC/2010-07-14-overconservative-align.c
 create mode 100644 final/test/FrontendC/2010-07-14-ref-off-end.c
 create mode 100644 final/test/FrontendC/2010-07-27-MinNoFoldConst.c
 create mode 100644 final/test/FrontendC/2010-08-12-asm-aggr-arg.c
 create mode 100644 final/test/FrontendC/2010-11-16-asmblock.c
 create mode 100644 final/test/FrontendC/2010-12-01-CommonGlobal.c
 create mode 100644 final/test/FrontendC/2011-02-21-DATA-common.c
 create mode 100644 final/test/FrontendC/Atomics-no64bit.c
 create mode 100644 final/test/FrontendC/Atomics.c
 create mode 100644 final/test/FrontendC/BasicInstrs.c
 create mode 100644 final/test/FrontendC/alignstack.c
 create mode 100644 final/test/FrontendC/always-inline.c
 create mode 100644 final/test/FrontendC/arrayderef.c
 create mode 100644 final/test/FrontendC/asm-reg-var-local.c
 create mode 100644 final/test/FrontendC/attribute_constructor.c
 create mode 100644 final/test/FrontendC/block-copy.c
 create mode 100644 final/test/FrontendC/crash-invalid-array.c
 create mode 100644 final/test/FrontendC/cstring-align.c
 create mode 100644 final/test/FrontendC/dg.exp
 create mode 100644 final/test/FrontendC/exact-div-expr.c
 create mode 100644 final/test/FrontendC/extern-weak.c
 create mode 100644 final/test/FrontendC/fp-logical.c
 create mode 100644 final/test/FrontendC/func-aligned.c
 create mode 100644 final/test/FrontendC/funccall.c
 create mode 100644 final/test/FrontendC/hidden-visibility.c
 create mode 100644 final/test/FrontendC/implicit-arg.c
 create mode 100644 final/test/FrontendC/inline-asm-function.c
 create mode 100644 final/test/FrontendC/inline-asm-mrv.c
 create mode 100644 final/test/FrontendC/libcalls-d.c
 create mode 100644 final/test/FrontendC/libcalls-ld.c
 create mode 100644 final/test/FrontendC/libcalls.c
 create mode 100644 final/test/FrontendC/misaligned-param.c
 create mode 100644 final/test/FrontendC/nested-functions.c
 create mode 100644 final/test/FrontendC/pr2394.c
 create mode 100644 final/test/FrontendC/pr3518.c
 create mode 100644 final/test/FrontendC/pr4349.c
 create mode 100644 final/test/FrontendC/pr5406.c
 create mode 100644 final/test/FrontendC/ptr-rotate.c
 create mode 100644 final/test/FrontendC/redef-ext-inline.c
 create mode 100644 final/test/FrontendC/sret.c
 create mode 100644 final/test/FrontendC/sret2.c
 create mode 100644 final/test/FrontendC/unaligned-memcpy.c
 create mode 100644 final/test/FrontendC/union-align.c
 create mode 100644 final/test/FrontendC/vla-1.c
 create mode 100644 final/test/FrontendC/vla-2.c
 create mode 100644 final/test/FrontendC/wchar-const.c
 create mode 100644 final/test/FrontendC/weak_constant.c
 create mode 100644 final/test/FrontendFortran/2008-11-03-OptionOverride.f90
 create mode 100644 final/test/FrontendFortran/2009-02-09-FloorDivExpr.f90
 create mode 100644 final/test/FrontendFortran/cpow.f90
 create mode 100644 final/test/FrontendFortran/dg.exp
 create mode 100644 final/test/FrontendObjC++/2007-10-03-MetadataPointers.mm
 create mode 100644 final/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm
 create mode 100644 final/test/FrontendObjC++/2010-08-04-Template.mm
 create mode 100644 final/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm
 create mode 100644 final/test/FrontendObjC++/dg.exp
 create mode 100644 final/test/FrontendObjC/2007-04-03-ObjcEH.m
 create mode 100644 final/test/FrontendObjC/2007-05-02-Strong.m
 create mode 100644 final/test/FrontendObjC/2007-09-25-EH.m
 create mode 100644 final/test/FrontendObjC/2007-10-17-SJLJExceptions.m
 create mode 100644 final/test/FrontendObjC/2007-10-18-ProDescriptor.m
 create mode 100644 final/test/FrontendObjC/2007-10-23-GC-WriteBarrier.m
 create mode 100644 final/test/FrontendObjC/2008-10-3-EhValue.m
 create mode 100644 final/test/FrontendObjC/2008-11-12-Metadata.m
 create mode 100644 final/test/FrontendObjC/2008-11-24-ConstCFStrings.m
 create mode 100644 final/test/FrontendObjC/2008-11-25-Blocks.m
 create mode 100644 final/test/FrontendObjC/2009-01-26-WriteBarrier-2.m
 create mode 100644 final/test/FrontendObjC/2009-02-05-VolatileProp.m
 create mode 100644 final/test/FrontendObjC/2009-04-14-AsmSection.m
 create mode 100644 final/test/FrontendObjC/2009-04-27-bitfield-vs-ivar.m
 create mode 100644 final/test/FrontendObjC/2009-04-28-bitfield-vs-vbc.m
 create mode 100644 final/test/FrontendObjC/2009-08-05-utf16.m
 create mode 100644 final/test/FrontendObjC/2009-08-17-DebugInfo.m
 create mode 100644 final/test/FrontendObjC/2009-11-30-Objc-ID.m
 create mode 100644 final/test/FrontendObjC/2010-02-01-utf16-with-null.m
 create mode 100644 final/test/FrontendObjC/2010-02-11-fwritable-stringsBug.m
 create mode 100644 final/test/FrontendObjC/2010-02-23-DbgInheritance.m
 create mode 100644 final/test/FrontendObjC/2010-03-17-StructRef.m
 create mode 100644 final/test/FrontendObjC/2010-06-04-UnnamedCFString-dbg.m
 create mode 100644 final/test/FrontendObjC/2011-03-02-ConstCFStringLiteralAlign.m
 create mode 100644 final/test/FrontendObjC/dg.exp
 create mode 100644 final/test/Integer/2007-01-19-TruncSext.ll
 create mode 100644 final/test/Integer/BitArith.ll
 create mode 100644 final/test/Integer/BitBit.ll
 create mode 100644 final/test/Integer/BitCast.ll
 create mode 100644 final/test/Integer/BitIcmp.ll
 create mode 100644 final/test/Integer/BitMem.ll
 create mode 100644 final/test/Integer/BitMisc.ll
 create mode 100644 final/test/Integer/BitPacked.ll
 create mode 100644 final/test/Integer/alignment_bt.ll
 create mode 100644 final/test/Integer/basictest_bt.ll
 create mode 100644 final/test/Integer/cfgstructures_bt.ll
 create mode 100644 final/test/Integer/constexpr_bt.ll
 create mode 100644 final/test/Integer/constpointer_bt.ll
 create mode 100644 final/test/Integer/dg.exp
 create mode 100644 final/test/Integer/fold-fpcast_bt.ll
 create mode 100644 final/test/Integer/forwardreftest_bt.ll
 create mode 100644 final/test/Integer/globalredefinition_bt.ll
 create mode 100644 final/test/Integer/globalvars_bt.ll
 create mode 100644 final/test/Integer/indirectcall2_bt.ll
 create mode 100644 final/test/Integer/indirectcall_bt.ll
 create mode 100644 final/test/Integer/instructions_bt.ll
 create mode 100644 final/test/Integer/newcasts_bt.ll
 create mode 100644 final/test/Integer/opaquetypes_bt.ll
 create mode 100644 final/test/Integer/packed_bt.ll
 create mode 100644 final/test/Integer/packed_struct_bt.ll
 create mode 100644 final/test/Integer/paramattrs_bt.ll
 create mode 100644 final/test/Integer/properties_bt.ll
 create mode 100644 final/test/Integer/prototype_bt.ll
 create mode 100644 final/test/Integer/recursivetype_bt.ll
 create mode 100644 final/test/Integer/simplecalltest_bt.ll
 create mode 100644 final/test/Integer/small_bt.ll
 create mode 100644 final/test/Integer/testalloca_bt.ll
 create mode 100644 final/test/Integer/testarith_bt.ll
 create mode 100644 final/test/Integer/testconstants_bt.ll
 create mode 100644 final/test/Integer/testicmp_bt.ll
 create mode 100644 final/test/Integer/testlogical_bt.ll
 create mode 100644 final/test/Integer/testlogical_new_bt.ll
 create mode 100644 final/test/Integer/testmemory_bt.ll
 create mode 100644 final/test/Integer/testswitch_bt.ll
 create mode 100644 final/test/Integer/testvarargs_bt.ll
 create mode 100644 final/test/Integer/undefined_bt.ll
 create mode 100644 final/test/Integer/unreachable_bt.ll
 create mode 100644 final/test/Integer/varargs_bt.ll
 create mode 100644 final/test/Integer/varargs_new_bt.ll
 create mode 100644 final/test/LLVMC/Alias.td
 create mode 100644 final/test/LLVMC/AppendCmdHook.td
 create mode 100644 final/test/LLVMC/C++/dash-x.cpp
 create mode 100644 final/test/LLVMC/C++/dg.exp
 create mode 100644 final/test/LLVMC/C++/filelist.cpp
 create mode 100644 final/test/LLVMC/C++/hello.cpp
 create mode 100644 final/test/LLVMC/C++/just-compile.cpp
 create mode 100644 final/test/LLVMC/C++/together.cpp
 create mode 100644 final/test/LLVMC/C++/unknown_suffix.unk
 create mode 100644 final/test/LLVMC/C/dg.exp
 create mode 100644 final/test/LLVMC/C/emit-llvm-opt.c
 create mode 100644 final/test/LLVMC/C/emit-llvm.c
 create mode 100644 final/test/LLVMC/C/hello.c
 create mode 100644 final/test/LLVMC/C/include.c
 create mode 100644 final/test/LLVMC/C/opt-test.c
 create mode 100644 final/test/LLVMC/C/sink.c
 create mode 100644 final/test/LLVMC/C/wall.c
 create mode 100644 final/test/LLVMC/EmptyCompilationGraph.td
 create mode 100644 final/test/LLVMC/EnvParentheses.td
 create mode 100644 final/test/LLVMC/ForwardAs.td
 create mode 100644 final/test/LLVMC/ForwardTransformedValue.td
 create mode 100644 final/test/LLVMC/ForwardValue.td
 create mode 100644 final/test/LLVMC/HookWithArguments.td
 create mode 100644 final/test/LLVMC/HookWithInFile.td
 create mode 100644 final/test/LLVMC/Init.td
 create mode 100644 final/test/LLVMC/LanguageMap.td
 create mode 100644 final/test/LLVMC/MultiValuedOption.td
 create mode 100644 final/test/LLVMC/MultipleCompilationGraphs.td
 create mode 100644 final/test/LLVMC/MultipleOutputLanguages.td
 create mode 100644 final/test/LLVMC/NoActions.td
 create mode 100644 final/test/LLVMC/NoCompilationGraph.td
 create mode 100644 final/test/LLVMC/ObjC++/dg.exp
 create mode 100644 final/test/LLVMC/ObjC++/hello.mm
 create mode 100644 final/test/LLVMC/ObjC/dg.exp
 create mode 100644 final/test/LLVMC/ObjC/hello.m
 create mode 100644 final/test/LLVMC/OneOrMore.td
 create mode 100644 final/test/LLVMC/OptionPreprocessor.td
 create mode 100644 final/test/LLVMC/OutputSuffixHook.td
 create mode 100644 final/test/LLVMC/TestWarnings.td
 create mode 100644 final/test/LLVMC/dg.exp
 create mode 100644 final/test/LLVMC/test_data/false.c
 create mode 100644 final/test/LLVMC/test_data/false.cpp
 create mode 100644 final/test/LLVMC/test_data/false2.cpp
 create mode 100644 final/test/LLVMC/test_data/together.c
 create mode 100644 final/test/Linker/2002-07-17-GlobalFail.ll
 create mode 100644 final/test/Linker/2002-07-17-LinkTest2.ll
 create mode 100644 final/test/Linker/2002-08-20-ConstantExpr.ll
 create mode 100644 final/test/Linker/2003-01-30-LinkerRename.ll
 create mode 100644 final/test/Linker/2003-01-30-LinkerTypeRename.ll
 create mode 100644 final/test/Linker/2003-04-21-Linkage.ll
 create mode 100644 final/test/Linker/2003-04-23-LinkOnceLost.ll
 create mode 100644 final/test/Linker/2003-04-26-NullPtrLinkProblem.ll
 create mode 100644 final/test/Linker/2003-05-15-TypeProblem.ll
 create mode 100644 final/test/Linker/2003-05-31-LinkerRename.ll
 create mode 100644 final/test/Linker/2003-06-02-TypeResolveProblem.ll
 create mode 100644 final/test/Linker/2003-06-02-TypeResolveProblem2.ll
 create mode 100644 final/test/Linker/2003-08-20-OpaqueTypeResolve.ll
 create mode 100644 final/test/Linker/2003-08-23-GlobalVarLinking.ll
 create mode 100644 final/test/Linker/2003-08-23-RecursiveOpaqueTypeResolve.ll
 create mode 100644 final/test/Linker/2003-08-24-InheritPtrSize.ll
 create mode 100644 final/test/Linker/2003-08-28-TypeResolvesGlobal.ll
 create mode 100644 final/test/Linker/2003-08-28-TypeResolvesGlobal2.ll
 create mode 100644 final/test/Linker/2003-08-28-TypeResolvesGlobal3.ll
 create mode 100644 final/test/Linker/2003-10-21-ConflictingTypesTolerance.ll
 create mode 100644 final/test/Linker/2003-10-27-LinkOncePromote.ll
 create mode 100644 final/test/Linker/2003-11-18-TypeResolution.ll
 create mode 100644 final/test/Linker/2004-02-17-WeakStrongLinkage.ll
 create mode 100644 final/test/Linker/2004-05-07-TypeResolution1.ll
 create mode 100644 final/test/Linker/2004-05-07-TypeResolution2.ll
 create mode 100644 final/test/Linker/2004-12-03-DisagreeingType.ll
 create mode 100644 final/test/Linker/2005-02-12-ConstantGlobals-2.ll
 create mode 100644 final/test/Linker/2005-02-12-ConstantGlobals.ll
 create mode 100644 final/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll
 create mode 100644 final/test/Linker/2006-01-19-ConstantPacked.ll
 create mode 100644 final/test/Linker/2006-06-15-GlobalVarAlignment.ll
 create mode 100644 final/test/Linker/2008-03-05-AliasReference.ll
 create mode 100644 final/test/Linker/2008-03-05-AliasReference2.ll
 create mode 100644 final/test/Linker/2008-03-07-DroppedSection_a.ll
 create mode 100644 final/test/Linker/2008-03-07-DroppedSection_b.ll
 create mode 100644 final/test/Linker/2008-06-13-LinkOnceRedefinition.ll
 create mode 100644 final/test/Linker/2008-06-26-AddressSpace.ll
 create mode 100644 final/test/Linker/2008-07-06-AliasFnDecl.ll
 create mode 100644 final/test/Linker/2008-07-06-AliasFnDecl2.ll
 create mode 100644 final/test/Linker/2008-07-06-AliasWeakDest.ll
 create mode 100644 final/test/Linker/2008-07-06-AliasWeakDest2.ll
 create mode 100644 final/test/Linker/2009-09-03-mdnode.ll
 create mode 100644 final/test/Linker/2009-09-03-mdnode2.ll
 create mode 100644 final/test/Linker/AppendingLinkage.ll
 create mode 100644 final/test/Linker/AppendingLinkage2.ll
 create mode 100644 final/test/Linker/ConstantGlobals1.ll
 create mode 100644 final/test/Linker/ConstantGlobals2.ll
 create mode 100644 final/test/Linker/ConstantGlobals3.ll
 create mode 100644 final/test/Linker/LinkOnce.ll
 create mode 100644 final/test/Linker/PR8300.ll
 create mode 100644 final/test/Linker/available_externally_a.ll
 create mode 100644 final/test/Linker/available_externally_b.ll
 create mode 100644 final/test/Linker/basiclink.ll
 create mode 100644 final/test/Linker/dg.exp
 create mode 100644 final/test/Linker/link-archive.ll
 create mode 100644 final/test/Linker/link-global-to-func.ll
 create mode 100644 final/test/Linker/link-messages.ll
 create mode 100644 final/test/Linker/linkmdnode.ll
 create mode 100644 final/test/Linker/linkmdnode2.ll
 create mode 100644 final/test/Linker/linknamedmdnode.ll
 create mode 100644 final/test/Linker/linknamedmdnode2.ll
 create mode 100644 final/test/Linker/metadata-a.ll
 create mode 100644 final/test/Linker/metadata-b.ll
 create mode 100644 final/test/Linker/partial-type-refinement-link.ll
 create mode 100644 final/test/Linker/partial-type-refinement.ll
 create mode 100644 final/test/Linker/redefinition.ll
 create mode 100644 final/test/Linker/testlink1.ll
 create mode 100644 final/test/Linker/testlink2.ll
 create mode 100644 final/test/Linker/unnamed-addr1-a.ll
 create mode 100644 final/test/Linker/unnamed-addr1-b.ll
 create mode 100644 final/test/Linker/weakextern.ll
 create mode 100644 final/test/MC/ARM/arm_fixups.s
 create mode 100644 final/test/MC/ARM/arm_instructions.s
 create mode 100644 final/test/MC/ARM/arm_word_directive.s
 create mode 100644 final/test/MC/ARM/bracket-darwin.s
 create mode 100644 final/test/MC/ARM/bracket-exprs.s
 create mode 100644 final/test/MC/ARM/darwin-ARM-reloc.s
 create mode 100644 final/test/MC/ARM/darwin-Thumb-reloc.s
 create mode 100644 final/test/MC/ARM/dg.exp
 create mode 100644 final/test/MC/ARM/elf-eflags-eabi.s
 create mode 100644 final/test/MC/ARM/elf-movt.s
 create mode 100644 final/test/MC/ARM/elf-reloc-01.ll
 create mode 100644 final/test/MC/ARM/elf-reloc-02.ll
 create mode 100644 final/test/MC/ARM/elf-reloc-03.ll
 create mode 100644 final/test/MC/ARM/full_line_comment.s
 create mode 100644 final/test/MC/ARM/hilo-16bit-relocations.s
 create mode 100644 final/test/MC/ARM/neon-abs-encoding.s
 create mode 100644 final/test/MC/ARM/neon-absdiff-encoding.s
 create mode 100644 final/test/MC/ARM/neon-add-encoding.s
 create mode 100644 final/test/MC/ARM/neon-bitcount-encoding.s
 create mode 100644 final/test/MC/ARM/neon-bitwise-encoding.s
 create mode 100644 final/test/MC/ARM/neon-cmp-encoding.s
 create mode 100644 final/test/MC/ARM/neon-convert-encoding.s
 create mode 100644 final/test/MC/ARM/neon-dup-encoding.s
 create mode 100644 final/test/MC/ARM/neon-minmax-encoding.s
 create mode 100644 final/test/MC/ARM/neon-mov-encoding.s
 create mode 100644 final/test/MC/ARM/neon-mul-accum-encoding.s
 create mode 100644 final/test/MC/ARM/neon-mul-encoding.s
 create mode 100644 final/test/MC/ARM/neon-neg-encoding.s
 create mode 100644 final/test/MC/ARM/neon-pairwise-encoding.s
 create mode 100644 final/test/MC/ARM/neon-reciprocal-encoding.s
 create mode 100644 final/test/MC/ARM/neon-reverse-encoding.s
 create mode 100644 final/test/MC/ARM/neon-satshift-encoding.s
 create mode 100644 final/test/MC/ARM/neon-shift-encoding.s
 create mode 100644 final/test/MC/ARM/neon-shiftaccum-encoding.s
 create mode 100644 final/test/MC/ARM/neon-shuffle-encoding.s
 create mode 100644 final/test/MC/ARM/neon-sub-encoding.s
 create mode 100644 final/test/MC/ARM/neon-table-encoding.s
 create mode 100644 final/test/MC/ARM/neon-vld-encoding.s
 create mode 100644 final/test/MC/ARM/neon-vst-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-abs-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-absdiff-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-add-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-bitcount-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-bitwise-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-cmp-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-convert-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-dup-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-minmax-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-mov-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-mul-accum-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-mul-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-neg-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-pairwise-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-reciprocal-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-reverse-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-satshift-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-shift-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-shiftaccum-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-shuffle-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-sub-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-table-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-vld-encoding.s
 create mode 100644 final/test/MC/ARM/neont2-vst-encoding.s
 create mode 100644 final/test/MC/ARM/prefetch.ll
 create mode 100644 final/test/MC/ARM/reg-list.s
 create mode 100644 final/test/MC/ARM/simple-encoding.ll
 create mode 100644 final/test/MC/ARM/simple-fp-encoding.s
 create mode 100644 final/test/MC/ARM/thumb.s
 create mode 100644 final/test/MC/ARM/thumb2.s
 create mode 100644 final/test/MC/ARM/thumb2_instructions.s
 create mode 100644 final/test/MC/AsmParser/assignment.s
 create mode 100644 final/test/MC/AsmParser/conditional_asm.s
 create mode 100644 final/test/MC/AsmParser/dash-n.s
 create mode 100644 final/test/MC/AsmParser/dg.exp
 create mode 100644 final/test/MC/AsmParser/directive_abort.s
 create mode 100644 final/test/MC/AsmParser/directive_align.s
 create mode 100644 final/test/MC/AsmParser/directive_ascii.s
 create mode 100644 final/test/MC/AsmParser/directive_comm.s
 create mode 100644 final/test/MC/AsmParser/directive_darwin_section.s
 create mode 100644 final/test/MC/AsmParser/directive_desc.s
 create mode 100644 final/test/MC/AsmParser/directive_elf_size.s
 create mode 100644 final/test/MC/AsmParser/directive_file.s
 create mode 100644 final/test/MC/AsmParser/directive_fill.s
 create mode 100644 final/test/MC/AsmParser/directive_include.s
 create mode 100644 final/test/MC/AsmParser/directive_lcomm.s
 create mode 100644 final/test/MC/AsmParser/directive_line.s
 create mode 100644 final/test/MC/AsmParser/directive_loc.s
 create mode 100644 final/test/MC/AsmParser/directive_lsym.s
 create mode 100644 final/test/MC/AsmParser/directive_org.s
 create mode 100644 final/test/MC/AsmParser/directive_set.s
 create mode 100644 final/test/MC/AsmParser/directive_space.s
 create mode 100644 final/test/MC/AsmParser/directive_subsections_via_symbols.s
 create mode 100644 final/test/MC/AsmParser/directive_symbol_attrs.s
 create mode 100644 final/test/MC/AsmParser/directive_tbss.s
 create mode 100644 final/test/MC/AsmParser/directive_tdata.s
 create mode 100644 final/test/MC/AsmParser/directive_thread_init_func.s
 create mode 100644 final/test/MC/AsmParser/directive_tlv.s
 create mode 100644 final/test/MC/AsmParser/directive_values.s
 create mode 100644 final/test/MC/AsmParser/directive_zerofill.s
 create mode 100644 final/test/MC/AsmParser/dollars-in-identifiers.s
 create mode 100644 final/test/MC/AsmParser/equ.s
 create mode 100644 final/test/MC/AsmParser/expr_symbol_modifiers.s
 create mode 100644 final/test/MC/AsmParser/exprs-invalid.s
 create mode 100644 final/test/MC/AsmParser/exprs.s
 create mode 100644 final/test/MC/AsmParser/floating-literals.s
 create mode 100644 final/test/MC/AsmParser/hello.s
 create mode 100644 final/test/MC/AsmParser/ifdef.s
 create mode 100644 final/test/MC/AsmParser/ifndef.s
 create mode 100644 final/test/MC/AsmParser/labels.s
 create mode 100644 final/test/MC/AsmParser/macro-def-in-instantiation.s
 create mode 100644 final/test/MC/AsmParser/macros-parsing.s
 create mode 100644 final/test/MC/AsmParser/macros.s
 create mode 100644 final/test/MC/AsmParser/rename.s
 create mode 100644 final/test/MC/AsmParser/section.s
 create mode 100644 final/test/MC/AsmParser/variables-invalid.s
 create mode 100644 final/test/MC/AsmParser/variables.s
 create mode 100644 final/test/MC/COFF/align-nops.s
 create mode 100644 final/test/MC/COFF/basic-coff.s
 create mode 100644 final/test/MC/COFF/bss.s
 create mode 100644 final/test/MC/COFF/dg.exp
 create mode 100644 final/test/MC/COFF/module-asm.ll
 create mode 100644 final/test/MC/COFF/simple-fixups.s
 create mode 100644 final/test/MC/COFF/switch-relocations.ll
 create mode 100644 final/test/MC/COFF/symbol-alias.s
 create mode 100644 final/test/MC/COFF/symbol-fragment-offset.s
 create mode 100644 final/test/MC/COFF/weak.s
 create mode 100644 final/test/MC/Disassembler/ARM/arm-tests.txt
 create mode 100644 final/test/MC/Disassembler/ARM/dg.exp
 create mode 100644 final/test/MC/Disassembler/ARM/neon-tests.txt
 create mode 100644 final/test/MC/Disassembler/ARM/thumb-tests.txt
 create mode 100644 final/test/MC/Disassembler/MBlaze/dg.exp
 create mode 100644 final/test/MC/Disassembler/MBlaze/mblaze_branch.txt
 create mode 100644 final/test/MC/Disassembler/MBlaze/mblaze_fpu.txt
 create mode 100644 final/test/MC/Disassembler/MBlaze/mblaze_fsl.txt
 create mode 100644 final/test/MC/Disassembler/MBlaze/mblaze_imm.txt
 create mode 100644 final/test/MC/Disassembler/MBlaze/mblaze_memory.txt
 create mode 100644 final/test/MC/Disassembler/MBlaze/mblaze_operands.txt
 create mode 100644 final/test/MC/Disassembler/MBlaze/mblaze_pattern.txt
 create mode 100644 final/test/MC/Disassembler/MBlaze/mblaze_shift.txt
 create mode 100644 final/test/MC/Disassembler/MBlaze/mblaze_special.txt
 create mode 100644 final/test/MC/Disassembler/MBlaze/mblaze_typea.txt
 create mode 100644 final/test/MC/Disassembler/MBlaze/mblaze_typeb.txt
 create mode 100644 final/test/MC/Disassembler/X86/dg.exp
 create mode 100644 final/test/MC/Disassembler/X86/enhanced.txt
 create mode 100644 final/test/MC/Disassembler/X86/simple-tests.txt
 create mode 100644 final/test/MC/Disassembler/X86/truncated-input.txt
 create mode 100644 final/test/MC/ELF/abs.s
 create mode 100644 final/test/MC/ELF/alias-reloc.s
 create mode 100644 final/test/MC/ELF/alias.s
 create mode 100644 final/test/MC/ELF/align-bss.s
 create mode 100644 final/test/MC/ELF/align-nops.s
 create mode 100644 final/test/MC/ELF/align-size.s
 create mode 100644 final/test/MC/ELF/align-text.s
 create mode 100644 final/test/MC/ELF/align.s
 create mode 100644 final/test/MC/ELF/bad-section.s
 create mode 100644 final/test/MC/ELF/basic-elf-32.s
 create mode 100644 final/test/MC/ELF/basic-elf-64.s
 create mode 100644 final/test/MC/ELF/bracket-exprs.s
 create mode 100644 final/test/MC/ELF/bracket.s
 create mode 100644 final/test/MC/ELF/bss.ll
 create mode 100644 final/test/MC/ELF/call-abs.s
 create mode 100644 final/test/MC/ELF/cfi-advance-loc2.s
 create mode 100644 final/test/MC/ELF/cfi-def-cfa-offset.s
 create mode 100644 final/test/MC/ELF/cfi-def-cfa-register.s
 create mode 100644 final/test/MC/ELF/cfi-def-cfa.s
 create mode 100644 final/test/MC/ELF/cfi-offset.s
 create mode 100644 final/test/MC/ELF/cfi-remember.s
 create mode 100644 final/test/MC/ELF/cfi-zero-addr-delta.s
 create mode 100644 final/test/MC/ELF/cfi.s
 create mode 100644 final/test/MC/ELF/comdat.s
 create mode 100644 final/test/MC/ELF/common.s
 create mode 100644 final/test/MC/ELF/common2.s
 create mode 100644 final/test/MC/ELF/debug-line.s
 create mode 100644 final/test/MC/ELF/debug-loc.s
 create mode 100644 final/test/MC/ELF/dg.exp
 create mode 100644 final/test/MC/ELF/diff.s
 create mode 100644 final/test/MC/ELF/diff2.s
 create mode 100644 final/test/MC/ELF/elf_directive_previous.s
 create mode 100644 final/test/MC/ELF/elf_directive_section.s
 create mode 100644 final/test/MC/ELF/empty-dwarf-lines.s
 create mode 100644 final/test/MC/ELF/empty.s
 create mode 100644 final/test/MC/ELF/entsize.ll
 create mode 100644 final/test/MC/ELF/entsize.s
 create mode 100644 final/test/MC/ELF/file.s
 create mode 100644 final/test/MC/ELF/global-offset.s
 create mode 100644 final/test/MC/ELF/got.s
 create mode 100644 final/test/MC/ELF/ident.s
 create mode 100644 final/test/MC/ELF/invalid-symver.s
 create mode 100644 final/test/MC/ELF/leb128.s
 create mode 100644 final/test/MC/ELF/local-reloc.s
 create mode 100644 final/test/MC/ELF/merge.s
 create mode 100644 final/test/MC/ELF/n_bytes.s
 create mode 100644 final/test/MC/ELF/no-fixup.s
 create mode 100644 final/test/MC/ELF/noexec.s
 create mode 100644 final/test/MC/ELF/norelocation.s
 create mode 100644 final/test/MC/ELF/org.s
 create mode 100644 final/test/MC/ELF/pic-diff.s
 create mode 100644 final/test/MC/ELF/plt.s
 create mode 100644 final/test/MC/ELF/pr9292.s
 create mode 100644 final/test/MC/ELF/relax-arith.s
 create mode 100644 final/test/MC/ELF/relax-crash.s
 create mode 100644 final/test/MC/ELF/relax.s
 create mode 100644 final/test/MC/ELF/relocation-386.s
 create mode 100644 final/test/MC/ELF/relocation-pc.s
 create mode 100644 final/test/MC/ELF/relocation.s
 create mode 100644 final/test/MC/ELF/rename.s
 create mode 100644 final/test/MC/ELF/section-quoting.s
 create mode 100644 final/test/MC/ELF/section.s
 create mode 100644 final/test/MC/ELF/set.s
 create mode 100644 final/test/MC/ELF/sleb.s
 create mode 100644 final/test/MC/ELF/symref.s
 create mode 100644 final/test/MC/ELF/tls-i386.s
 create mode 100644 final/test/MC/ELF/tls.s
 create mode 100644 final/test/MC/ELF/type.s
 create mode 100644 final/test/MC/ELF/uleb.s
 create mode 100644 final/test/MC/ELF/undef.s
 create mode 100644 final/test/MC/ELF/undef2.s
 create mode 100644 final/test/MC/ELF/weak-relocation.s
 create mode 100644 final/test/MC/ELF/weak.s
 create mode 100644 final/test/MC/ELF/weakref-plt.s
 create mode 100644 final/test/MC/ELF/weakref-reloc.s
 create mode 100644 final/test/MC/ELF/weakref.s
 create mode 100644 final/test/MC/ELF/zero.s
 create mode 100644 final/test/MC/MBlaze/dg.exp
 create mode 100644 final/test/MC/MBlaze/mblaze_branch.s
 create mode 100644 final/test/MC/MBlaze/mblaze_fpu.s
 create mode 100644 final/test/MC/MBlaze/mblaze_fsl.s
 create mode 100644 final/test/MC/MBlaze/mblaze_imm.s
 create mode 100644 final/test/MC/MBlaze/mblaze_memory.s
 create mode 100644 final/test/MC/MBlaze/mblaze_operands.s
 create mode 100644 final/test/MC/MBlaze/mblaze_pattern.s
 create mode 100644 final/test/MC/MBlaze/mblaze_shift.s
 create mode 100644 final/test/MC/MBlaze/mblaze_special.s
 create mode 100644 final/test/MC/MBlaze/mblaze_typea.s
 create mode 100644 final/test/MC/MBlaze/mblaze_typeb.s
 create mode 100644 final/test/MC/MachO/absolutize.s
 create mode 100644 final/test/MC/MachO/comm-1.s
 create mode 100644 final/test/MC/MachO/darwin-complex-difference.s
 create mode 100644 final/test/MC/MachO/darwin-x86_64-diff-relocs.s
 create mode 100644 final/test/MC/MachO/darwin-x86_64-reloc-offsets.s
 create mode 100644 final/test/MC/MachO/darwin-x86_64-reloc.s
 create mode 100644 final/test/MC/MachO/data.s
 create mode 100644 final/test/MC/MachO/dg.exp
 create mode 100644 final/test/MC/MachO/diff-with-two-sections.s
 create mode 100644 final/test/MC/MachO/direction_labels.s
 create mode 100644 final/test/MC/MachO/empty-dwarf-lines.s
 create mode 100644 final/test/MC/MachO/indirect-symbols.s
 create mode 100644 final/test/MC/MachO/jcc.s
 create mode 100644 final/test/MC/MachO/lcomm-attributes.s
 create mode 100644 final/test/MC/MachO/loc.s
 create mode 100644 final/test/MC/MachO/pcrel-to-other-section.s
 create mode 100644 final/test/MC/MachO/relax-jumps.s
 create mode 100644 final/test/MC/MachO/relax-recompute-align.s
 create mode 100644 final/test/MC/MachO/reloc-diff.s
 create mode 100644 final/test/MC/MachO/reloc-pcrel-offset.s
 create mode 100644 final/test/MC/MachO/reloc-pcrel.s
 create mode 100644 final/test/MC/MachO/reloc.s
 create mode 100644 final/test/MC/MachO/section-align-1.s
 create mode 100644 final/test/MC/MachO/section-align-2.s
 create mode 100644 final/test/MC/MachO/section-flags.s
 create mode 100644 final/test/MC/MachO/string-table.s
 create mode 100644 final/test/MC/MachO/symbol-diff.s
 create mode 100644 final/test/MC/MachO/symbol-flags.s
 create mode 100644 final/test/MC/MachO/symbol-indirect.s
 create mode 100644 final/test/MC/MachO/symbols-1.s
 create mode 100644 final/test/MC/MachO/tbss.s
 create mode 100644 final/test/MC/MachO/tdata.s
 create mode 100644 final/test/MC/MachO/thread_init_func.s
 create mode 100644 final/test/MC/MachO/tls.s
 create mode 100644 final/test/MC/MachO/tlv-reloc.s
 create mode 100644 final/test/MC/MachO/tlv.s
 create mode 100644 final/test/MC/MachO/values.s
 create mode 100644 final/test/MC/MachO/weakdef.s
 create mode 100644 final/test/MC/MachO/x86_32-optimal_nop.s
 create mode 100644 final/test/MC/MachO/x86_32-sections.s
 create mode 100644 final/test/MC/MachO/x86_32-symbols.s
 create mode 100644 final/test/MC/MachO/x86_64-sections.s
 create mode 100644 final/test/MC/MachO/x86_64-symbols.s
 create mode 100644 final/test/MC/MachO/zerofill-1.s
 create mode 100644 final/test/MC/MachO/zerofill-2.s
 create mode 100644 final/test/MC/MachO/zerofill-3.s
 create mode 100644 final/test/MC/MachO/zerofill-4.s
 create mode 100644 final/test/MC/MachO/zerofill-5.s
 create mode 100644 final/test/MC/MachO/zerofill-sect-align.s
 create mode 100644 final/test/MC/X86/3DNow.s
 create mode 100644 final/test/MC/X86/dg.exp
 create mode 100644 final/test/MC/X86/x86-32-avx.s
 create mode 100644 final/test/MC/X86/x86-32-coverage.s
 create mode 100644 final/test/MC/X86/x86-32-fma3.s
 create mode 100644 final/test/MC/X86/x86-32.s
 create mode 100644 final/test/MC/X86/x86-64.s
 create mode 100644 final/test/MC/X86/x86_64-avx-clmul-encoding.s
 create mode 100644 final/test/MC/X86/x86_64-avx-encoding.s
 create mode 100644 final/test/MC/X86/x86_64-encoding.s
 create mode 100644 final/test/MC/X86/x86_64-fma3-encoding.s
 create mode 100644 final/test/MC/X86/x86_64-imm-widths.s
 create mode 100644 final/test/MC/X86/x86_directives.s
 create mode 100644 final/test/MC/X86/x86_errors.s
 create mode 100644 final/test/MC/X86/x86_operands.s
 create mode 100644 final/test/Makefile
 create mode 100644 final/test/Makefile.tests
 create mode 100644 final/test/Object/TestObjectFiles/trivial-object-test.coff-i386
 create mode 100644 final/test/Object/TestObjectFiles/trivial-object-test.coff-x86-64
 create mode 100644 final/test/Object/TestObjectFiles/trivial-object-test.elf-i386
 create mode 100644 final/test/Object/TestObjectFiles/trivial-object-test.elf-x86-64
 create mode 100644 final/test/Object/TestObjectFiles/trivial-object-test.macho-i386
 create mode 100644 final/test/Object/TestObjectFiles/trivial-object-test.macho-x86-64
 create mode 100644 final/test/Object/dg.exp
 create mode 100644 final/test/Object/nm-trivial-object.test-broken
 create mode 100644 final/test/Object/objdump-trivial-object.test-broken
 create mode 100644 final/test/Other/2002-01-31-CallGraph.ll
 create mode 100644 final/test/Other/2002-02-24-InlineBrokePHINodes.ll
 create mode 100644 final/test/Other/2002-03-11-ConstPropCrash.ll
 create mode 100644 final/test/Other/2003-02-19-LoopInfoNestingBug.ll
 create mode 100644 final/test/Other/2004-08-16-PackedConstantInlineStore.ll
 create mode 100644 final/test/Other/2004-08-16-PackedGlobalConstant.ll
 create mode 100644 final/test/Other/2004-08-16-PackedSelect.ll
 create mode 100644 final/test/Other/2004-08-16-PackedSimple.ll
 create mode 100644 final/test/Other/2004-08-20-PackedControlFlow.ll
 create mode 100644 final/test/Other/2006-02-05-PassManager.ll
 create mode 100644 final/test/Other/2007-04-24-eliminate-mostly-empty-blocks.ll
 create mode 100644 final/test/Other/2007-06-05-PassID.ll
 create mode 100644 final/test/Other/2007-06-28-PassManager.ll
 create mode 100644 final/test/Other/2007-09-10-PassManager.ll
 create mode 100644 final/test/Other/2008-02-14-PassManager.ll
 create mode 100644 final/test/Other/2008-06-04-FieldSizeInPacked.ll
 create mode 100644 final/test/Other/2008-10-06-RemoveDeadPass.ll
 create mode 100644 final/test/Other/2008-10-15-MissingSpace.ll
 create mode 100644 final/test/Other/2009-03-31-CallGraph.ll
 create mode 100644 final/test/Other/2009-06-05-no-implicit-float.ll
 create mode 100644 final/test/Other/2009-09-14-function-elements.ll
 create mode 100644 final/test/Other/2010-05-06-Printer.ll
 create mode 100644 final/test/Other/close-stderr.ll
 create mode 100644 final/test/Other/constant-fold-gep.ll
 create mode 100644 final/test/Other/dg.exp
 create mode 100644 final/test/Other/extract.ll
 create mode 100644 final/test/Other/inline-asm-newline-terminator.ll
 create mode 100644 final/test/Other/invalid-commandline-option.ll
 create mode 100644 final/test/Other/lint.ll
 create mode 100644 final/test/Scripts/README.txt
 create mode 100755 final/test/Scripts/coff-dump.py
 create mode 100644 final/test/Scripts/coff-dump.py.bat
 create mode 100644 final/test/Scripts/common_dump.py
 create mode 100755 final/test/Scripts/elf-dump
 create mode 100644 final/test/Scripts/elf-dump.bat
 create mode 100755 final/test/Scripts/ignore
 create mode 100755 final/test/Scripts/macho-dumpx
 create mode 100644 final/test/Scripts/macho-dumpx.bat
 create mode 100644 final/test/TableGen/2003-08-03-PassCode.td
 create mode 100644 final/test/TableGen/2006-09-18-LargeInt.td
 create mode 100644 final/test/TableGen/2010-03-24-PrematureDefaults.td
 create mode 100644 final/test/TableGen/AnonDefinitionOnDemand.td
 create mode 100644 final/test/TableGen/BitsInitOverflow.td
 create mode 100644 final/test/TableGen/CStyleComment.td
 create mode 100644 final/test/TableGen/Dag.td
 create mode 100644 final/test/TableGen/DefmInherit.td
 create mode 100644 final/test/TableGen/DefmInsideMultiClass.td
 create mode 100644 final/test/TableGen/FieldAccess.td
 create mode 100644 final/test/TableGen/ForwardRef.td
 create mode 100644 final/test/TableGen/GeneralList.td
 create mode 100644 final/test/TableGen/Include.inc
 create mode 100644 final/test/TableGen/Include.td
 create mode 100644 final/test/TableGen/IntBitInit.td
 create mode 100644 final/test/TableGen/LazyChange.td
 create mode 100644 final/test/TableGen/LetInsideMultiClasses.td
 create mode 100644 final/test/TableGen/ListArgs.td
 create mode 100644 final/test/TableGen/ListArgsSimple.td
 create mode 100644 final/test/TableGen/ListConversion.td
 create mode 100644 final/test/TableGen/ListManip.td
 create mode 100644 final/test/TableGen/ListSlices.td
 create mode 100644 final/test/TableGen/MultiClass.td
 create mode 100644 final/test/TableGen/MultiClassDefName.td
 create mode 100644 final/test/TableGen/MultiClassInherit.td
 create mode 100644 final/test/TableGen/Slice.td
 create mode 100644 final/test/TableGen/String.td
 create mode 100644 final/test/TableGen/SuperSubclassSameName.td
 create mode 100644 final/test/TableGen/TargetInstrInfo.td
 create mode 100644 final/test/TableGen/TargetInstrSpec.td
 create mode 100644 final/test/TableGen/TemplateArgRename.td
 create mode 100644 final/test/TableGen/Tree.td
 create mode 100644 final/test/TableGen/TreeNames.td
 create mode 100644 final/test/TableGen/UnsetBitInit.td
 create mode 100644 final/test/TableGen/UnterminatedComment.td
 create mode 100644 final/test/TableGen/cast.td
 create mode 100644 final/test/TableGen/defmclass.td
 create mode 100644 final/test/TableGen/dg.exp
 create mode 100644 final/test/TableGen/eq.td
 create mode 100644 final/test/TableGen/eqbit.td
 create mode 100644 final/test/TableGen/foreach.td
 create mode 100644 final/test/TableGen/if.td
 create mode 100644 final/test/TableGen/ifbit.td
 create mode 100644 final/test/TableGen/lisp.td
 create mode 100644 final/test/TableGen/nested-comment.td
 create mode 100644 final/test/TableGen/strconcat.td
 create mode 100644 final/test/TableGen/subst.td
 create mode 100644 final/test/TableGen/subst2.td
 create mode 100644 final/test/TableGen/usevalname.td
 create mode 100755 final/test/TestRunner.sh
 create mode 100644 final/test/Transforms/ADCE/2002-01-31-UseStuckAround.ll
 create mode 100644 final/test/Transforms/ADCE/2002-05-22-PHITest.ll
 create mode 100644 final/test/Transforms/ADCE/2002-05-23-ZeroArgPHITest.ll
 create mode 100644 final/test/Transforms/ADCE/2002-05-28-Crash-distilled.ll
 create mode 100644 final/test/Transforms/ADCE/2002-05-28-Crash.ll
 create mode 100644 final/test/Transforms/ADCE/2002-07-17-AssertionFailure.ll
 create mode 100644 final/test/Transforms/ADCE/2002-07-17-PHIAssertion.ll
 create mode 100644 final/test/Transforms/ADCE/2002-07-29-Segfault.ll
 create mode 100644 final/test/Transforms/ADCE/2003-01-22-PredecessorProblem.ll
 create mode 100644 final/test/Transforms/ADCE/2003-04-25-PHIPostDominateProblem.ll
 create mode 100644 final/test/Transforms/ADCE/2003-06-11-InvalidCFG.ll
 create mode 100644 final/test/Transforms/ADCE/2003-06-24-BadSuccessor.ll
 create mode 100644 final/test/Transforms/ADCE/2003-06-24-BasicFunctionality.ll
 create mode 100644 final/test/Transforms/ADCE/2003-09-10-UnwindInstFail.ll
 create mode 100644 final/test/Transforms/ADCE/2003-09-15-InfLoopCrash.ll
 create mode 100644 final/test/Transforms/ADCE/2003-11-16-MissingPostDominanceInfo.ll
 create mode 100644 final/test/Transforms/ADCE/2004-05-04-UnreachableBlock.ll
 create mode 100644 final/test/Transforms/ADCE/2005-02-17-PHI-Invoke-Crash.ll
 create mode 100644 final/test/Transforms/ADCE/basictest.ll
 create mode 100644 final/test/Transforms/ADCE/basictest1.ll
 create mode 100644 final/test/Transforms/ADCE/basictest2.ll
 create mode 100644 final/test/Transforms/ADCE/dce_pure_call.ll
 create mode 100644 final/test/Transforms/ADCE/dce_pure_invoke.ll
 create mode 100644 final/test/Transforms/ADCE/dg.exp
 create mode 100644 final/test/Transforms/ADCE/unreachable-function.ll
 create mode 100644 final/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
 create mode 100644 final/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
 create mode 100644 final/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll
 create mode 100644 final/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll
 create mode 100644 final/test/Transforms/ArgumentPromotion/aggregate-promote.ll
 create mode 100644 final/test/Transforms/ArgumentPromotion/attrs.ll
 create mode 100644 final/test/Transforms/ArgumentPromotion/basictest.ll
 create mode 100644 final/test/Transforms/ArgumentPromotion/byval-2.ll
 create mode 100644 final/test/Transforms/ArgumentPromotion/byval.ll
 create mode 100644 final/test/Transforms/ArgumentPromotion/callgraph-update.ll
 create mode 100644 final/test/Transforms/ArgumentPromotion/chained.ll
 create mode 100644 final/test/Transforms/ArgumentPromotion/control-flow.ll
 create mode 100644 final/test/Transforms/ArgumentPromotion/control-flow2.ll
 create mode 100644 final/test/Transforms/ArgumentPromotion/crash.ll
 create mode 100644 final/test/Transforms/ArgumentPromotion/dg.exp
 create mode 100644 final/test/Transforms/ArgumentPromotion/pr3085.ll
 create mode 100644 final/test/Transforms/BlockPlacement/basictest.ll
 create mode 100644 final/test/Transforms/BlockPlacement/dg.exp
 create mode 100644 final/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
 create mode 100644 final/test/Transforms/CodeExtractor/2004-03-13-LoopExtractorCrash.ll
 create mode 100644 final/test/Transforms/CodeExtractor/2004-03-14-DominanceProblem.ll
 create mode 100644 final/test/Transforms/CodeExtractor/2004-03-14-NoSwitchSupport.ll
 create mode 100644 final/test/Transforms/CodeExtractor/2004-03-17-MissedLiveIns.ll
 create mode 100644 final/test/Transforms/CodeExtractor/2004-03-17-OutputMismatch.ll
 create mode 100644 final/test/Transforms/CodeExtractor/2004-03-17-UpdatePHIsOutsideRegion.ll
 create mode 100644 final/test/Transforms/CodeExtractor/2004-03-18-InvokeHandling.ll
 create mode 100644 final/test/Transforms/CodeExtractor/2004-08-12-BlockExtractPHI.ll
 create mode 100644 final/test/Transforms/CodeExtractor/2004-11-12-InvokeExtract.ll
 create mode 100644 final/test/Transforms/CodeExtractor/dg.exp
 create mode 100644 final/test/Transforms/CodeGenPrepare/2008-11-24-RAUW-Self.ll
 create mode 100644 final/test/Transforms/CodeGenPrepare/basic.ll
 create mode 100644 final/test/Transforms/CodeGenPrepare/dg.exp
 create mode 100644 final/test/Transforms/ConstProp/2002-05-03-DivideByZeroException.ll
 create mode 100644 final/test/Transforms/ConstProp/2002-05-03-NotOperator.ll
 create mode 100644 final/test/Transforms/ConstProp/2002-09-03-SetCC-Bools.ll
 create mode 100644 final/test/Transforms/ConstProp/2003-05-12-DivideError.ll
 create mode 100644 final/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll
 create mode 100644 final/test/Transforms/ConstProp/2006-11-30-vector-cast.ll
 create mode 100644 final/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll
 create mode 100644 final/test/Transforms/ConstProp/2006-12-01-bool-casts.ll
 create mode 100644 final/test/Transforms/ConstProp/2007-02-05-BitCast.ll
 create mode 100644 final/test/Transforms/ConstProp/2007-02-23-sdiv.ll
 create mode 100644 final/test/Transforms/ConstProp/2007-11-23-cttz.ll
 create mode 100644 final/test/Transforms/ConstProp/2008-07-07-VectorCompare.ll
 create mode 100644 final/test/Transforms/ConstProp/2009-06-20-constexpr-zero-lhs.ll
 create mode 100644 final/test/Transforms/ConstProp/2009-09-01-GEP-Crash.ll
 create mode 100644 final/test/Transforms/ConstProp/basictest.ll
 create mode 100644 final/test/Transforms/ConstProp/bitcast.ll
 create mode 100644 final/test/Transforms/ConstProp/bswap.ll
 create mode 100644 final/test/Transforms/ConstProp/calls.ll
 create mode 100644 final/test/Transforms/ConstProp/constant-expr.ll
 create mode 100644 final/test/Transforms/ConstProp/dg.exp
 create mode 100644 final/test/Transforms/ConstProp/div-zero.ll
 create mode 100644 final/test/Transforms/ConstProp/extractvalue.ll
 create mode 100644 final/test/Transforms/ConstProp/float-to-ptr-cast.ll
 create mode 100644 final/test/Transforms/ConstProp/insertvalue.ll
 create mode 100644 final/test/Transforms/ConstProp/loads.ll
 create mode 100644 final/test/Transforms/ConstProp/logicaltest.ll
 create mode 100644 final/test/Transforms/ConstProp/overflow-ops.ll
 create mode 100644 final/test/Transforms/ConstProp/phi.ll
 create mode 100644 final/test/Transforms/ConstProp/remtest.ll
 create mode 100644 final/test/Transforms/ConstantMerge/2002-09-23-CPR-Update.ll
 create mode 100644 final/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll
 create mode 100644 final/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll
 create mode 100644 final/test/Transforms/ConstantMerge/dg.exp
 create mode 100644 final/test/Transforms/ConstantMerge/dont-merge.ll
 create mode 100644 final/test/Transforms/ConstantMerge/merge-both.ll
 create mode 100644 final/test/Transforms/ConstantMerge/unnamed-addr.ll
 create mode 100644 final/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll
 create mode 100644 final/test/Transforms/CorrelatedValuePropagation/2010-09-26-MergeConstantRange.ll
 create mode 100644 final/test/Transforms/CorrelatedValuePropagation/basic.ll
 create mode 100644 final/test/Transforms/CorrelatedValuePropagation/crash.ll
 create mode 100644 final/test/Transforms/CorrelatedValuePropagation/dg.exp
 create mode 100644 final/test/Transforms/CorrelatedValuePropagation/non-null.ll
 create mode 100644 final/test/Transforms/DeadArgElim/2006-06-27-struct-ret.ll
 create mode 100644 final/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll
 create mode 100644 final/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll
 create mode 100644 final/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
 create mode 100644 final/test/Transforms/DeadArgElim/2008-01-16-VarargsParamAttrs.ll
 create mode 100644 final/test/Transforms/DeadArgElim/2008-06-23-DeadAfterLive.ll
 create mode 100644 final/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll
 create mode 100644 final/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
 create mode 100644 final/test/Transforms/DeadArgElim/basictest.ll
 create mode 100644 final/test/Transforms/DeadArgElim/canon.ll
 create mode 100644 final/test/Transforms/DeadArgElim/dead_vaargs.ll
 create mode 100644 final/test/Transforms/DeadArgElim/deadexternal.ll
 create mode 100644 final/test/Transforms/DeadArgElim/deadretval.ll
 create mode 100644 final/test/Transforms/DeadArgElim/deadretval2.ll
 create mode 100644 final/test/Transforms/DeadArgElim/dg.exp
 create mode 100644 final/test/Transforms/DeadArgElim/keepalive.ll
 create mode 100644 final/test/Transforms/DeadArgElim/multdeadretval.ll
 create mode 100644 final/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll
 create mode 100644 final/test/Transforms/DeadStoreElimination/PartialStore.ll
 create mode 100644 final/test/Transforms/DeadStoreElimination/const-pointers.ll
 create mode 100644 final/test/Transforms/DeadStoreElimination/crash.ll
 create mode 100644 final/test/Transforms/DeadStoreElimination/dg.exp
 create mode 100644 final/test/Transforms/DeadStoreElimination/free.ll
 create mode 100644 final/test/Transforms/DeadStoreElimination/lifetime.ll
 create mode 100644 final/test/Transforms/DeadStoreElimination/memintrinsics.ll
 create mode 100644 final/test/Transforms/DeadStoreElimination/no-targetdata.ll
 create mode 100644 final/test/Transforms/DeadStoreElimination/simple.ll
 create mode 100644 final/test/Transforms/EarlyCSE/basic.ll
 create mode 100644 final/test/Transforms/EarlyCSE/dg.exp
 create mode 100644 final/test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll
 create mode 100644 final/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
 create mode 100644 final/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
 create mode 100644 final/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
 create mode 100644 final/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
 create mode 100644 final/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll
 create mode 100644 final/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll
 create mode 100644 final/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
 create mode 100644 final/test/Transforms/FunctionAttrs/2009-05-06-Malloc.ll
 create mode 100644 final/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
 create mode 100644 final/test/Transforms/FunctionAttrs/dg.exp
 create mode 100644 final/test/Transforms/GVN/2007-07-25-DominatedLoop.ll
 create mode 100644 final/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
 create mode 100644 final/test/Transforms/GVN/2007-07-25-Loop.ll
 create mode 100644 final/test/Transforms/GVN/2007-07-25-NestedLoop.ll
 create mode 100644 final/test/Transforms/GVN/2007-07-25-SinglePredecessor.ll
 create mode 100644 final/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
 create mode 100644 final/test/Transforms/GVN/2007-07-26-NonRedundant.ll
 create mode 100644 final/test/Transforms/GVN/2007-07-26-PhiErasure.ll
 create mode 100644 final/test/Transforms/GVN/2007-07-30-PredIDom.ll
 create mode 100644 final/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
 create mode 100644 final/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
 create mode 100644 final/test/Transforms/GVN/2008-02-12-UndefLoad.ll
 create mode 100644 final/test/Transforms/GVN/2008-02-13-NewPHI.ll
 create mode 100644 final/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll
 create mode 100644 final/test/Transforms/GVN/2008-02-26-MemCpySize.ll
 create mode 100644 final/test/Transforms/GVN/2008-07-02-Unreachable.ll
 create mode 100644 final/test/Transforms/GVN/2008-12-09-SelfRemove.ll
 create mode 100644 final/test/Transforms/GVN/2008-12-12-RLE-Crash.ll
 create mode 100644 final/test/Transforms/GVN/2008-12-14-rle-reanalyze.ll
 create mode 100644 final/test/Transforms/GVN/2008-12-15-CacheVisited.ll
 create mode 100644 final/test/Transforms/GVN/2009-01-21-SortInvalidation.ll
 create mode 100644 final/test/Transforms/GVN/2009-01-22-SortInvalidation.ll
 create mode 100644 final/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll
 create mode 100644 final/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
 create mode 100644 final/test/Transforms/GVN/2009-06-17-InvalidPRE.ll
 create mode 100644 final/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll
 create mode 100644 final/test/Transforms/GVN/2009-11-12-MemDepMallocBitCast.ll
 create mode 100644 final/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll
 create mode 100644 final/test/Transforms/GVN/2010-05-08-OneBit.ll
 create mode 100644 final/test/Transforms/GVN/2010-11-13-Simplify.ll
 create mode 100644 final/test/Transforms/GVN/basic.ll
 create mode 100644 final/test/Transforms/GVN/bitcast-of-call.ll
 create mode 100644 final/test/Transforms/GVN/calls-nonlocal.ll
 create mode 100644 final/test/Transforms/GVN/calls-readonly.ll
 create mode 100644 final/test/Transforms/GVN/condprop.ll
 create mode 100644 final/test/Transforms/GVN/crash-no-aa.ll
 create mode 100644 final/test/Transforms/GVN/crash.ll
 create mode 100644 final/test/Transforms/GVN/dg.exp
 create mode 100644 final/test/Transforms/GVN/invariant-simple.ll
 create mode 100644 final/test/Transforms/GVN/lifetime-simple.ll
 create mode 100644 final/test/Transforms/GVN/load-constant-mem.ll
 create mode 100644 final/test/Transforms/GVN/load-pre-align.ll
 create mode 100644 final/test/Transforms/GVN/load-pre-licm.ll
 create mode 100644 final/test/Transforms/GVN/local-pre.ll
 create mode 100644 final/test/Transforms/GVN/lpre-call-wrap-2.ll
 create mode 100644 final/test/Transforms/GVN/lpre-call-wrap.ll
 create mode 100644 final/test/Transforms/GVN/mixed.ll
 create mode 100644 final/test/Transforms/GVN/non-local-offset.ll
 create mode 100644 final/test/Transforms/GVN/nonescaping-malloc.ll
 create mode 100644 final/test/Transforms/GVN/null-aliases-nothing.ll
 create mode 100644 final/test/Transforms/GVN/phi-translate.ll
 create mode 100644 final/test/Transforms/GVN/pre-basic-add.ll
 create mode 100644 final/test/Transforms/GVN/pre-load.ll
 create mode 100644 final/test/Transforms/GVN/pre-single-pred.ll
 create mode 100644 final/test/Transforms/GVN/preserve-tbaa.ll
 create mode 100644 final/test/Transforms/GVN/rle-must-alias.ll
 create mode 100644 final/test/Transforms/GVN/rle-no-phi-translate.ll
 create mode 100644 final/test/Transforms/GVN/rle-nonlocal.ll
 create mode 100644 final/test/Transforms/GVN/rle-phi-translate.ll
 create mode 100644 final/test/Transforms/GVN/rle-semidominated.ll
 create mode 100644 final/test/Transforms/GVN/rle.ll
 create mode 100644 final/test/Transforms/GlobalDCE/2002-07-17-CastRef.ll
 create mode 100644 final/test/Transforms/GlobalDCE/2002-07-17-ConstantRef.ll
 create mode 100644 final/test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll
 create mode 100644 final/test/Transforms/GlobalDCE/2002-08-17-WorkListTest.ll
 create mode 100644 final/test/Transforms/GlobalDCE/2002-09-12-Redeletion.ll
 create mode 100644 final/test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll
 create mode 100644 final/test/Transforms/GlobalDCE/2003-10-09-PreserveWeakGlobals.ll
 create mode 100644 final/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
 create mode 100644 final/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
 create mode 100644 final/test/Transforms/GlobalDCE/2009-09-03-MDNode.ll
 create mode 100644 final/test/Transforms/GlobalDCE/basicvariabletest.ll
 create mode 100644 final/test/Transforms/GlobalDCE/dg.exp
 create mode 100644 final/test/Transforms/GlobalDCE/externally_available.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2004-10-10-CastStoreOnce.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2005-06-15-LocalizeConstExprCrash.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2005-09-27-Crash.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2006-07-07-InlineAsmCrash.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2006-11-01-ShrinkGlobalPhiCrash.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2007-04-05-Crash.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2007-05-13-Crash.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2007-06-04-PackedStruct.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2007-11-09-GEP-GEP-Crash.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2008-01-03-Crash.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2009-11-16-BrokenPerformHeapAllocSRoA.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll
 create mode 100644 final/test/Transforms/GlobalOpt/2010-10-19-WeakOdr.ll
 create mode 100644 final/test/Transforms/GlobalOpt/alias-resolve.ll
 create mode 100644 final/test/Transforms/GlobalOpt/basictest.ll
 create mode 100644 final/test/Transforms/GlobalOpt/constantexpr-dangle.ll
 create mode 100644 final/test/Transforms/GlobalOpt/constantfold-initializers.ll
 create mode 100644 final/test/Transforms/GlobalOpt/crash.ll
 create mode 100644 final/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
 create mode 100644 final/test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll
 create mode 100644 final/test/Transforms/GlobalOpt/ctor-list-opt.ll
 create mode 100644 final/test/Transforms/GlobalOpt/deadglobal-2.ll
 create mode 100644 final/test/Transforms/GlobalOpt/deadglobal.ll
 create mode 100644 final/test/Transforms/GlobalOpt/dg.exp
 create mode 100644 final/test/Transforms/GlobalOpt/globalsra-partial.ll
 create mode 100644 final/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
 create mode 100644 final/test/Transforms/GlobalOpt/globalsra.ll
 create mode 100644 final/test/Transforms/GlobalOpt/heap-sra-1.ll
 create mode 100644 final/test/Transforms/GlobalOpt/heap-sra-2.ll
 create mode 100644 final/test/Transforms/GlobalOpt/heap-sra-3.ll
 create mode 100644 final/test/Transforms/GlobalOpt/heap-sra-4.ll
 create mode 100644 final/test/Transforms/GlobalOpt/heap-sra-phi.ll
 create mode 100644 final/test/Transforms/GlobalOpt/integer-bool.ll
 create mode 100644 final/test/Transforms/GlobalOpt/iterate.ll
 create mode 100644 final/test/Transforms/GlobalOpt/load-store-global.ll
 create mode 100644 final/test/Transforms/GlobalOpt/malloc-promote-1.ll
 create mode 100644 final/test/Transforms/GlobalOpt/malloc-promote-2.ll
 create mode 100644 final/test/Transforms/GlobalOpt/memcpy.ll
 create mode 100644 final/test/Transforms/GlobalOpt/memset.ll
 create mode 100644 final/test/Transforms/GlobalOpt/metadata.ll
 create mode 100644 final/test/Transforms/GlobalOpt/phi-select.ll
 create mode 100644 final/test/Transforms/GlobalOpt/storepointer-compare.ll
 create mode 100644 final/test/Transforms/GlobalOpt/storepointer.ll
 create mode 100644 final/test/Transforms/GlobalOpt/trivialstore.ll
 create mode 100644 final/test/Transforms/GlobalOpt/undef-init.ll
 create mode 100644 final/test/Transforms/GlobalOpt/unnamed-addr.ll
 create mode 100644 final/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll
 create mode 100644 final/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll
 create mode 100644 final/test/Transforms/IPConstantProp/dangling-block-address.ll
 create mode 100644 final/test/Transforms/IPConstantProp/deadarg.ll
 create mode 100644 final/test/Transforms/IPConstantProp/dg.exp
 create mode 100644 final/test/Transforms/IPConstantProp/recursion.ll
 create mode 100644 final/test/Transforms/IPConstantProp/return-argument.ll
 create mode 100644 final/test/Transforms/IPConstantProp/return-constant.ll
 create mode 100644 final/test/Transforms/IPConstantProp/return-constants.ll
 create mode 100644 final/test/Transforms/IPConstantProp/user-with-multiple-uses.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2002-09-09-PointerIndVar.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2003-04-16-ExprAnalysis.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2003-09-12-MultiplePred.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2003-12-10-RemoveInstrCrash.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2003-12-15-Crash.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2003-12-21-IndVarSize.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2004-03-10-PHIInsertionBug.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2004-04-07-ScalarEvolutionCrash.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2005-02-11-InvokeCrash.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2005-02-17-TruncateExprCrash.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2005-02-26-ExitValueCompute.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2005-06-15-InstMoveCrash.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2005-11-18-Crash.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2006-03-31-NegativeStride.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2006-06-16-Indvar-LCSSA-Crash.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2006-09-20-LFTR-Crash.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2006-12-10-BitCast.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2007-06-06-DeleteDanglesPtr.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2007-11-23-BitcastCrash.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2008-06-15-SCEVExpanderBug.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2008-09-02-IVType.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2008-10-03-CouldNotCompute.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2008-11-25-APFloatAssert.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2009-04-22-IndvarCrash.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/ada-loops.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/addrec-gep.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/ashr-tripcount.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/avoid-i0.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/casted-argument.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/complex-scev.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/crash.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/dangling-use.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/dg.exp
 create mode 100644 final/test/Transforms/IndVarSimplify/divide-pointer.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/eliminate-comparison.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/eliminate-max.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/eliminate-rem.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/exit_value_tests.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/floating-point-iv.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/gep-with-mul-base.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/indirectbr.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/interesting-invoke-use.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/iterationCount_zext_or_trunc.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/iv-sext.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/iv-zext.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/lftr-other-uses.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/lftr-promote.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/lftr_simple.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/loop_evaluate10.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/loop_evaluate11.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/loop_evaluate7.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/loop_evaluate8.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/loop_evaluate9.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/loop_evaluate_2.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/loop_evaluate_3.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/loop_evaluate_4.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/loop_evaluate_5.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/masked-iv.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/max-pointer.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/pointer-indvars.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/pointer.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/polynomial-expand.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/preserve-gep-nested.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/preserve-gep.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/shrunk-constant.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/signed-trip-count.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/single-element-range.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/sink-alloca.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/sink-trapping.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/subtract.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/tripcount_compute.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/tripcount_infinite.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/udiv.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/uglygep.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
 create mode 100644 final/test/Transforms/IndVarSimplify/variable-stride-ivs-1.ll
 create mode 100644 final/test/Transforms/Inline/2003-09-14-InlineValue.ll
 create mode 100644 final/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
 create mode 100644 final/test/Transforms/Inline/2003-09-22-PHINodesInExceptionDest.ll
 create mode 100644 final/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll
 create mode 100644 final/test/Transforms/Inline/2003-10-13-AllocaDominanceProblem.ll
 create mode 100644 final/test/Transforms/Inline/2003-10-26-InlineInvokeExceptionDestPhi.ll
 create mode 100644 final/test/Transforms/Inline/2004-04-15-InlineDeletesCall.ll
 create mode 100644 final/test/Transforms/Inline/2004-04-20-InlineLinkOnce.ll
 create mode 100644 final/test/Transforms/Inline/2004-10-17-InlineFunctionWithoutReturn.ll
 create mode 100644 final/test/Transforms/Inline/2006-01-14-CallGraphUpdate.ll
 create mode 100644 final/test/Transforms/Inline/2006-07-12-InlinePruneCGUpdate.ll
 create mode 100644 final/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll
 create mode 100644 final/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll
 create mode 100644 final/test/Transforms/Inline/2007-04-15-InlineEH.ll
 create mode 100644 final/test/Transforms/Inline/2007-06-06-NoInline.ll
 create mode 100644 final/test/Transforms/Inline/2007-06-25-WeakInline.ll
 create mode 100644 final/test/Transforms/Inline/2007-12-19-InlineNoUnwind.ll
 create mode 100644 final/test/Transforms/Inline/2008-03-04-StructRet.ll
 create mode 100644 final/test/Transforms/Inline/2008-03-07-Inline-2.ll
 create mode 100644 final/test/Transforms/Inline/2008-03-07-Inline.ll
 create mode 100644 final/test/Transforms/Inline/2008-09-02-AlwaysInline.ll
 create mode 100644 final/test/Transforms/Inline/2008-09-02-NoInline.ll
 create mode 100644 final/test/Transforms/Inline/2008-10-30-AlwaysInline.ll
 create mode 100644 final/test/Transforms/Inline/2008-11-04-AlwaysInline.ll
 create mode 100644 final/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll
 create mode 100644 final/test/Transforms/Inline/2009-01-13-RecursiveInlineCrash.ll
 create mode 100644 final/test/Transforms/Inline/2009-05-07-CallUsingSelfCrash.ll
 create mode 100644 final/test/Transforms/Inline/2010-05-12-ValueMap.ll
 create mode 100644 final/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll
 create mode 100644 final/test/Transforms/Inline/PR4909.ll
 create mode 100644 final/test/Transforms/Inline/alloca-in-scc.ll
 create mode 100644 final/test/Transforms/Inline/alloca_test.ll
 create mode 100644 final/test/Transforms/Inline/always_inline_dyn_alloca.ll
 create mode 100644 final/test/Transforms/Inline/array_merge.ll
 create mode 100644 final/test/Transforms/Inline/basictest.ll
 create mode 100644 final/test/Transforms/Inline/byval.ll
 create mode 100644 final/test/Transforms/Inline/callgraph-update.ll
 create mode 100644 final/test/Transforms/Inline/casts.ll
 create mode 100644 final/test/Transforms/Inline/cfg_preserve_test.ll
 create mode 100644 final/test/Transforms/Inline/crash.ll
 create mode 100644 final/test/Transforms/Inline/crash2.ll
 create mode 100644 final/test/Transforms/Inline/delete-call.ll
 create mode 100644 final/test/Transforms/Inline/devirtualize-2.ll
 create mode 100644 final/test/Transforms/Inline/devirtualize-3.ll
 create mode 100644 final/test/Transforms/Inline/devirtualize.ll
 create mode 100644 final/test/Transforms/Inline/dg.exp
 create mode 100644 final/test/Transforms/Inline/dynamic_alloca_test.ll
 create mode 100644 final/test/Transforms/Inline/externally_available.ll
 create mode 100644 final/test/Transforms/Inline/gvn-inline-iteration.ll
 create mode 100644 final/test/Transforms/Inline/inline-invoke-tail.ll
 create mode 100644 final/test/Transforms/Inline/inline-tail.ll
 create mode 100644 final/test/Transforms/Inline/inline_cleanup.ll
 create mode 100644 final/test/Transforms/Inline/inline_constprop.ll
 create mode 100644 final/test/Transforms/Inline/inline_dce.ll
 create mode 100644 final/test/Transforms/Inline/inline_prune.ll
 create mode 100644 final/test/Transforms/Inline/invoke_test-1.ll
 create mode 100644 final/test/Transforms/Inline/invoke_test-2.ll
 create mode 100644 final/test/Transforms/Inline/invoke_test-3.ll
 create mode 100644 final/test/Transforms/Inline/nested-inline.ll
 create mode 100644 final/test/Transforms/Inline/noinline-recursive-fn.ll
 create mode 100644 final/test/Transforms/Inline/noinline.ll
 create mode 100644 final/test/Transforms/InstCombine/2002-03-11-InstCombineHang.ll
 create mode 100644 final/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
 create mode 100644 final/test/Transforms/InstCombine/2002-08-02-CastTest.ll
 create mode 100644 final/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
 create mode 100644 final/test/Transforms/InstCombine/2003-05-26-CastMiscompile.ll
 create mode 100644 final/test/Transforms/InstCombine/2003-05-27-ConstExprCrash.ll
 create mode 100644 final/test/Transforms/InstCombine/2003-06-05-BranchInvertInfLoop.ll
 create mode 100644 final/test/Transforms/InstCombine/2003-07-21-ExternalConstant.ll
 create mode 100644 final/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
 create mode 100644 final/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
 create mode 100644 final/test/Transforms/InstCombine/2003-10-29-CallSiteResolve.ll
 create mode 100644 final/test/Transforms/InstCombine/2003-11-03-VarargsCallBug.ll
 create mode 100644 final/test/Transforms/InstCombine/2004-01-13-InstCombineInvokePHI.ll
 create mode 100644 final/test/Transforms/InstCombine/2004-02-23-ShiftShiftOverflow.ll
 create mode 100644 final/test/Transforms/InstCombine/2004-03-13-InstCombineInfLoop.ll
 create mode 100644 final/test/Transforms/InstCombine/2004-04-04-InstCombineReplaceAllUsesWith.ll
 create mode 100644 final/test/Transforms/InstCombine/2004-05-07-UnsizedCastLoad.ll
 create mode 100644 final/test/Transforms/InstCombine/2004-07-27-ConstantExprMul.ll
 create mode 100644 final/test/Transforms/InstCombine/2004-08-09-RemInfLoop.ll
 create mode 100644 final/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll
 create mode 100644 final/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll
 create mode 100644 final/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
 create mode 100644 final/test/Transforms/InstCombine/2004-09-28-BadShiftAndSetCC.ll
 create mode 100644 final/test/Transforms/InstCombine/2004-11-22-Missed-and-fold.ll
 create mode 100644 final/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll
 create mode 100644 final/test/Transforms/InstCombine/2004-12-08-RemInfiniteLoop.ll
 create mode 100644 final/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll
 create mode 100644 final/test/Transforms/InstCombine/2005-04-07-UDivSelectCrash.ll
 create mode 100644 final/test/Transforms/InstCombine/2005-06-15-DivSelectCrash.ll
 create mode 100644 final/test/Transforms/InstCombine/2005-06-15-ShiftSetCCCrash.ll
 create mode 100644 final/test/Transforms/InstCombine/2005-06-16-RangeCrash.ll
 create mode 100644 final/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll
 create mode 100644 final/test/Transforms/InstCombine/2005-07-07-DeadPHILoop.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-02-13-DemandedMiscompile.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-02-28-Crash.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-03-30-ExtractElement.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-04-28-ShiftShiftLongLong.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-05-04-DemandedBitCrash.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-09-15-CastToBool.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst-2.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-10-20-mask.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-11-03-Memmove64.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-11-10-ashr-miscompile.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-12-05-fp-to-int-ext.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-12-15-Range-Test.ll
 create mode 100644 final/test/Transforms/InstCombine/2006-12-23-Select-Cmp-Cmp.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-01-18-VectorInfLoop.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-01-27-AndICmp.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-02-07-PointerCast.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-02-23-PhiFoldInfLoop.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-03-19-BadTruncChangePR1261.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-03-25-DoubleShift.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-04-04-BadFoldBitcastIntoMalloc.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-04-08-SingleEltVectorCrash.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-05-04-Crash.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-05-10-icmp-or.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-05-14-Crash.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-08-02-InfiniteLoop.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-10-12-Crash.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-10-28-stacksave.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-10-31-StringCrash.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-11-22-IcmpCrash.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-12-12-GEPScale.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-12-16-AsmNoUnwind.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll
 create mode 100644 final/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-01-06-CastCrash.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-01-06-VoidCast.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-01-21-MismatchedCastAndCompare.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-01-29-AddICmp.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-02-13-MulURem.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-02-23-MulSub.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-04-22-ByValBitcast.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-05-09-SinkOfInvoke.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-05-17-InfLoop.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-05-22-IDivVector.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-05-22-NegValVector.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-05-31-AddBool.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-05-31-Bools.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-06-05-ashr-crash.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-06-19-UncondLoad.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-07-08-AndICmp.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-07-08-SubAnd.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-07-09-SubAndError.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-07-11-RemAnd.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-07-13-DivZero.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-07-16-fsub.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-08-05-And.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-09-02-VectorCrash.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-09-29-FoldingOr.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-10-23-ConstFoldWithoutMask.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-11-08-FCmp.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-11-27-IDivVector.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-11-27-MultiplyIntVec.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-11-27-UDivNegative.ll
 create mode 100644 final/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-01-05-i128-crash.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-01-19-fmod-constant-float.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-01-24-EmptyStruct.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-01-31-InfIterate.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-01-31-Pressure.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-02-04-FPBitcast.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-02-21-LoadCST.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-02-25-CrashZeroSizeArray.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-03-18-vector-ashr-crash.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-03-24-InfLoop.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-05-23-FCmpToICmp.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-06-16-SRemDemandedBits.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-07-02-MaskedIntVector.ll
 create mode 100644 final/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll
 create mode 100644 final/test/Transforms/InstCombine/2010-01-28-NegativeSRem.ll
 create mode 100644 final/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
 create mode 100644 final/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
 create mode 100644 final/test/Transforms/InstCombine/2010-11-21-SizeZeroTypeGEP.ll
 create mode 100644 final/test/Transforms/InstCombine/2010-11-23-Distributed.ll
 create mode 100644 final/test/Transforms/InstCombine/2011-02-14-InfLoop.ll
 create mode 100644 final/test/Transforms/InstCombine/2011-02-16-InsertelementHang.ll
 create mode 100644 final/test/Transforms/InstCombine/CPP_min_max.ll
 create mode 100644 final/test/Transforms/InstCombine/IntPtrCast.ll
 create mode 100644 final/test/Transforms/InstCombine/JavaCompare.ll
 create mode 100644 final/test/Transforms/InstCombine/README.txt
 create mode 100644 final/test/Transforms/InstCombine/add-shrink.ll
 create mode 100644 final/test/Transforms/InstCombine/add-sitofp.ll
 create mode 100644 final/test/Transforms/InstCombine/add.ll
 create mode 100644 final/test/Transforms/InstCombine/add2.ll
 create mode 100644 final/test/Transforms/InstCombine/add3.ll
 create mode 100644 final/test/Transforms/InstCombine/addnegneg.ll
 create mode 100644 final/test/Transforms/InstCombine/adjust-for-sminmax.ll
 create mode 100644 final/test/Transforms/InstCombine/align-2d-gep.ll
 create mode 100644 final/test/Transforms/InstCombine/align-addr.ll
 create mode 100644 final/test/Transforms/InstCombine/align-external.ll
 create mode 100644 final/test/Transforms/InstCombine/alloca.ll
 create mode 100644 final/test/Transforms/InstCombine/and-compare.ll
 create mode 100644 final/test/Transforms/InstCombine/and-fcmp.ll
 create mode 100644 final/test/Transforms/InstCombine/and-not-or.ll
 create mode 100644 final/test/Transforms/InstCombine/and-or-and.ll
 create mode 100644 final/test/Transforms/InstCombine/and-or-not.ll
 create mode 100644 final/test/Transforms/InstCombine/and-or.ll
 create mode 100644 final/test/Transforms/InstCombine/and-xor-merge.ll
 create mode 100644 final/test/Transforms/InstCombine/and.ll
 create mode 100644 final/test/Transforms/InstCombine/and2.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-add1.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-add2.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-and-compare.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-and-or-and.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-and-xor-merge.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-and1.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-and2.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-call-cast-target.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-cast-and-cast.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-cast-cast-to-and.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-cast.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-div1.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-div2.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-mul1.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-mul2.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-not.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-or1.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-or2.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-rem1.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-rem2.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-select.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-shift-simplify.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-shift.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-shl-trunc.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-sub.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-xor1.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-xor2.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-zext1.ll
 create mode 100644 final/test/Transforms/InstCombine/apint-zext2.ll
 create mode 100644 final/test/Transforms/InstCombine/ashr-nop.ll
 create mode 100644 final/test/Transforms/InstCombine/badmalloc.ll
 create mode 100644 final/test/Transforms/InstCombine/binop-cast.ll
 create mode 100644 final/test/Transforms/InstCombine/bit-checks.ll
 create mode 100644 final/test/Transforms/InstCombine/bit-tracking.ll
 create mode 100644 final/test/Transforms/InstCombine/bitcast-sext-vector.ll
 create mode 100644 final/test/Transforms/InstCombine/bitcast-store.ll
 create mode 100644 final/test/Transforms/InstCombine/bitcast-vec-canon.ll
 create mode 100644 final/test/Transforms/InstCombine/bitcast-vec-uniform.ll
 create mode 100644 final/test/Transforms/InstCombine/bitcast-vector-fold.ll
 create mode 100644 final/test/Transforms/InstCombine/bitcast.ll
 create mode 100644 final/test/Transforms/InstCombine/bitcount.ll
 create mode 100644 final/test/Transforms/InstCombine/bittest.ll
 create mode 100644 final/test/Transforms/InstCombine/bswap-fold.ll
 create mode 100644 final/test/Transforms/InstCombine/bswap.ll
 create mode 100644 final/test/Transforms/InstCombine/call-cast-target.ll
 create mode 100644 final/test/Transforms/InstCombine/call-intrinsics.ll
 create mode 100644 final/test/Transforms/InstCombine/call.ll
 create mode 100644 final/test/Transforms/InstCombine/call2.ll
 create mode 100644 final/test/Transforms/InstCombine/canonicalize_branch.ll
 create mode 100644 final/test/Transforms/InstCombine/cast-mul-select.ll
 create mode 100644 final/test/Transforms/InstCombine/cast-set.ll
 create mode 100644 final/test/Transforms/InstCombine/cast.ll
 create mode 100644 final/test/Transforms/InstCombine/cast_ptr.ll
 create mode 100644 final/test/Transforms/InstCombine/compare-signs.ll
 create mode 100644 final/test/Transforms/InstCombine/constant-fold-compare.ll
 create mode 100644 final/test/Transforms/InstCombine/constant-fold-gep.ll
 create mode 100644 final/test/Transforms/InstCombine/crash.ll
 create mode 100644 final/test/Transforms/InstCombine/dce-iterate.ll
 create mode 100644 final/test/Transforms/InstCombine/deadcode.ll
 create mode 100644 final/test/Transforms/InstCombine/dg.exp
 create mode 100644 final/test/Transforms/InstCombine/div.ll
 create mode 100644 final/test/Transforms/InstCombine/enforce-known-alignment.ll
 create mode 100644 final/test/Transforms/InstCombine/exact.ll
 create mode 100644 final/test/Transforms/InstCombine/extractvalue.ll
 create mode 100644 final/test/Transforms/InstCombine/fcmp-select.ll
 create mode 100644 final/test/Transforms/InstCombine/fcmp-special.ll
 create mode 100644 final/test/Transforms/InstCombine/fold-bin-operand.ll
 create mode 100644 final/test/Transforms/InstCombine/fold-calls.ll
 create mode 100644 final/test/Transforms/InstCombine/fold-vector-select.ll
 create mode 100644 final/test/Transforms/InstCombine/fold-vector-zero.ll
 create mode 100644 final/test/Transforms/InstCombine/fp-ret-bitcast.ll
 create mode 100644 final/test/Transforms/InstCombine/fpcast.ll
 create mode 100644 final/test/Transforms/InstCombine/fpextend.ll
 create mode 100644 final/test/Transforms/InstCombine/fsub.ll
 create mode 100644 final/test/Transforms/InstCombine/gepgep.ll
 create mode 100644 final/test/Transforms/InstCombine/getelementptr.ll
 create mode 100644 final/test/Transforms/InstCombine/hoist_instr.ll
 create mode 100644 final/test/Transforms/InstCombine/icmp.ll
 create mode 100644 final/test/Transforms/InstCombine/idioms.ll
 create mode 100644 final/test/Transforms/InstCombine/intrinsics.ll
 create mode 100644 final/test/Transforms/InstCombine/invariant.ll
 create mode 100644 final/test/Transforms/InstCombine/known_align.ll
 create mode 100644 final/test/Transforms/InstCombine/load-cmp.ll
 create mode 100644 final/test/Transforms/InstCombine/load-select.ll
 create mode 100644 final/test/Transforms/InstCombine/load.ll
 create mode 100644 final/test/Transforms/InstCombine/load3.ll
 create mode 100644 final/test/Transforms/InstCombine/loadstore-alignment.ll
 create mode 100644 final/test/Transforms/InstCombine/logical-select.ll
 create mode 100644 final/test/Transforms/InstCombine/lshr-phi.ll
 create mode 100644 final/test/Transforms/InstCombine/malloc-free-delete.ll
 create mode 100644 final/test/Transforms/InstCombine/malloc.ll
 create mode 100644 final/test/Transforms/InstCombine/malloc2.ll
 create mode 100644 final/test/Transforms/InstCombine/malloc3.ll
 create mode 100644 final/test/Transforms/InstCombine/memcpy-to-load.ll
 create mode 100644 final/test/Transforms/InstCombine/memcpy.ll
 create mode 100644 final/test/Transforms/InstCombine/memmove.ll
 create mode 100644 final/test/Transforms/InstCombine/memset.ll
 create mode 100644 final/test/Transforms/InstCombine/memset2.ll
 create mode 100644 final/test/Transforms/InstCombine/memset_chk.ll
 create mode 100644 final/test/Transforms/InstCombine/mul-masked-bits.ll
 create mode 100644 final/test/Transforms/InstCombine/mul.ll
 create mode 100644 final/test/Transforms/InstCombine/multi-use-or.ll
 create mode 100644 final/test/Transforms/InstCombine/narrow.ll
 create mode 100644 final/test/Transforms/InstCombine/neon-intrinsics.ll
 create mode 100644 final/test/Transforms/InstCombine/no-negzero.ll
 create mode 100644 final/test/Transforms/InstCombine/not-fcmp.ll
 create mode 100644 final/test/Transforms/InstCombine/not.ll
 create mode 100644 final/test/Transforms/InstCombine/nothrow.ll
 create mode 100644 final/test/Transforms/InstCombine/nsw.ll
 create mode 100644 final/test/Transforms/InstCombine/objsize.ll
 create mode 100644 final/test/Transforms/InstCombine/odr-linkage.ll
 create mode 100644 final/test/Transforms/InstCombine/or-fcmp.ll
 create mode 100644 final/test/Transforms/InstCombine/or-to-xor.ll
 create mode 100644 final/test/Transforms/InstCombine/or-xor.ll
 create mode 100644 final/test/Transforms/InstCombine/or.ll
 create mode 100644 final/test/Transforms/InstCombine/overflow.ll
 create mode 100644 final/test/Transforms/InstCombine/phi-merge-gep.ll
 create mode 100644 final/test/Transforms/InstCombine/phi.ll
 create mode 100644 final/test/Transforms/InstCombine/pr2645-0.ll
 create mode 100644 final/test/Transforms/InstCombine/pr2645-1.ll
 create mode 100644 final/test/Transforms/InstCombine/pr2996.ll
 create mode 100644 final/test/Transforms/InstCombine/pr8547.ll
 create mode 100644 final/test/Transforms/InstCombine/preserve-sminmax.ll
 create mode 100644 final/test/Transforms/InstCombine/ptr-int-cast.ll
 create mode 100644 final/test/Transforms/InstCombine/rem.ll
 create mode 100644 final/test/Transforms/InstCombine/sdiv-1.ll
 create mode 100644 final/test/Transforms/InstCombine/sdiv-2.ll
 create mode 100644 final/test/Transforms/InstCombine/sdiv-shift.ll
 create mode 100644 final/test/Transforms/InstCombine/select-2.ll
 create mode 100644 final/test/Transforms/InstCombine/select-crash.ll
 create mode 100644 final/test/Transforms/InstCombine/select-load-call.ll
 create mode 100644 final/test/Transforms/InstCombine/select.ll
 create mode 100644 final/test/Transforms/InstCombine/set.ll
 create mode 100644 final/test/Transforms/InstCombine/setcc-strength-reduce.ll
 create mode 100644 final/test/Transforms/InstCombine/sext.ll
 create mode 100644 final/test/Transforms/InstCombine/shift-sra.ll
 create mode 100644 final/test/Transforms/InstCombine/shift.ll
 create mode 100644 final/test/Transforms/InstCombine/shufflemask-undef.ll
 create mode 100644 final/test/Transforms/InstCombine/shufflevec-constant.ll
 create mode 100644 final/test/Transforms/InstCombine/signed-comparison.ll
 create mode 100644 final/test/Transforms/InstCombine/signext.ll
 create mode 100644 final/test/Transforms/InstCombine/simplify-demanded-bits-pointer.ll
 create mode 100644 final/test/Transforms/InstCombine/sink_instruction.ll
 create mode 100644 final/test/Transforms/InstCombine/sitofp.ll
 create mode 100644 final/test/Transforms/InstCombine/sqrt.ll
 create mode 100644 final/test/Transforms/InstCombine/srem-simplify-bug.ll
 create mode 100644 final/test/Transforms/InstCombine/srem.ll
 create mode 100644 final/test/Transforms/InstCombine/srem1.ll
 create mode 100644 final/test/Transforms/InstCombine/stack-overalign.ll
 create mode 100644 final/test/Transforms/InstCombine/stacksaverestore.ll
 create mode 100644 final/test/Transforms/InstCombine/store.ll
 create mode 100644 final/test/Transforms/InstCombine/strcpy_chk.ll
 create mode 100644 final/test/Transforms/InstCombine/sub.ll
 create mode 100644 final/test/Transforms/InstCombine/trunc.ll
 create mode 100644 final/test/Transforms/InstCombine/udiv-simplify-bug-0.ll
 create mode 100644 final/test/Transforms/InstCombine/udiv-simplify-bug-1.ll
 create mode 100644 final/test/Transforms/InstCombine/udiv_select_to_select_shift.ll
 create mode 100644 final/test/Transforms/InstCombine/udivrem-change-width.ll
 create mode 100644 final/test/Transforms/InstCombine/urem-simplify-bug.ll
 create mode 100644 final/test/Transforms/InstCombine/urem.ll
 create mode 100644 final/test/Transforms/InstCombine/vec_demanded_elts.ll
 create mode 100644 final/test/Transforms/InstCombine/vec_extract_elt.ll
 create mode 100644 final/test/Transforms/InstCombine/vec_insertelt.ll
 create mode 100644 final/test/Transforms/InstCombine/vec_narrow.ll
 create mode 100644 final/test/Transforms/InstCombine/vec_sext.ll
 create mode 100644 final/test/Transforms/InstCombine/vec_shuffle.ll
 create mode 100644 final/test/Transforms/InstCombine/vector-casts.ll
 create mode 100644 final/test/Transforms/InstCombine/vector-srem.ll
 create mode 100644 final/test/Transforms/InstCombine/volatile_store.ll
 create mode 100644 final/test/Transforms/InstCombine/xor-undef.ll
 create mode 100644 final/test/Transforms/InstCombine/xor.ll
 create mode 100644 final/test/Transforms/InstCombine/xor2.ll
 create mode 100644 final/test/Transforms/InstCombine/zero-point-zero-add.ll
 create mode 100644 final/test/Transforms/InstCombine/zeroext-and-reduce.ll
 create mode 100644 final/test/Transforms/InstCombine/zext-bool-add-sub.ll
 create mode 100644 final/test/Transforms/InstCombine/zext-fold.ll
 create mode 100644 final/test/Transforms/InstCombine/zext-or-icmp.ll
 create mode 100644 final/test/Transforms/InstCombine/zext.ll
 create mode 100644 final/test/Transforms/InstSimplify/2010-12-20-Boolean.ll
 create mode 100644 final/test/Transforms/InstSimplify/2010-12-20-Distribute.ll
 create mode 100644 final/test/Transforms/InstSimplify/2011-01-14-Thread.ll
 create mode 100644 final/test/Transforms/InstSimplify/2011-02-01-Vector.ll
 create mode 100644 final/test/Transforms/InstSimplify/compare.ll
 create mode 100644 final/test/Transforms/InstSimplify/dg.exp
 create mode 100644 final/test/Transforms/InstSimplify/exact-nsw-nuw.ll
 create mode 100644 final/test/Transforms/InstSimplify/fdiv.ll
 create mode 100644 final/test/Transforms/InstSimplify/reassociate.ll
 create mode 100644 final/test/Transforms/Internalize/2008-05-09-AllButMain.ll
 create mode 100644 final/test/Transforms/Internalize/2008-05-09-AllButMain.ll.apifile
 create mode 100644 final/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
 create mode 100644 final/test/Transforms/Internalize/available_externally.ll
 create mode 100644 final/test/Transforms/Internalize/dg.exp
 create mode 100644 final/test/Transforms/JumpThreading/2008-11-27-EntryMunge.ll
 create mode 100644 final/test/Transforms/JumpThreading/2010-08-26-and.ll
 create mode 100644 final/test/Transforms/JumpThreading/and-and-cond.ll
 create mode 100644 final/test/Transforms/JumpThreading/and-cond.ll
 create mode 100644 final/test/Transforms/JumpThreading/basic.ll
 create mode 100644 final/test/Transforms/JumpThreading/branch-no-const.ll
 create mode 100644 final/test/Transforms/JumpThreading/compare.ll
 create mode 100644 final/test/Transforms/JumpThreading/crash.ll
 create mode 100644 final/test/Transforms/JumpThreading/degenerate-phi.ll
 create mode 100644 final/test/Transforms/JumpThreading/dg.exp
 create mode 100644 final/test/Transforms/JumpThreading/indirectbr.ll
 create mode 100644 final/test/Transforms/JumpThreading/lvi-load.ll
 create mode 100644 final/test/Transforms/JumpThreading/no-irreducible-loops.ll
 create mode 100644 final/test/Transforms/JumpThreading/or-undef.ll
 create mode 100644 final/test/Transforms/JumpThreading/select.ll
 create mode 100644 final/test/Transforms/JumpThreading/thread-loads.ll
 create mode 100644 final/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
 create mode 100644 final/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll
 create mode 100644 final/test/Transforms/LCSSA/2006-07-09-NoDominator.ll
 create mode 100644 final/test/Transforms/LCSSA/2006-10-31-UnreachableBlock-2.ll
 create mode 100644 final/test/Transforms/LCSSA/2006-10-31-UnreachableBlock.ll
 create mode 100644 final/test/Transforms/LCSSA/2007-07-12-LICM-2.ll
 create mode 100644 final/test/Transforms/LCSSA/2007-07-12-LICM-3.ll
 create mode 100644 final/test/Transforms/LCSSA/2007-07-12-LICM.ll
 create mode 100644 final/test/Transforms/LCSSA/basictest.ll
 create mode 100644 final/test/Transforms/LCSSA/dg.exp
 create mode 100644 final/test/Transforms/LCSSA/indirectbr.ll
 create mode 100644 final/test/Transforms/LCSSA/invoke-dest.ll
 create mode 100644 final/test/Transforms/LCSSA/unreachable-use.ll
 create mode 100644 final/test/Transforms/LICM/2003-02-26-LoopExitNotDominated.ll
 create mode 100644 final/test/Transforms/LICM/2003-02-27-NestedLoopExitBlocks.ll
 create mode 100644 final/test/Transforms/LICM/2003-02-27-PreheaderExitNodeUpdate.ll
 create mode 100644 final/test/Transforms/LICM/2003-02-27-PreheaderProblem.ll
 create mode 100644 final/test/Transforms/LICM/2003-02-27-StoreSinkPHIs.ll
 create mode 100644 final/test/Transforms/LICM/2003-02-28-PromoteDifferentType.ll
 create mode 100644 final/test/Transforms/LICM/2003-05-02-LoadHoist.ll
 create mode 100644 final/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
 create mode 100644 final/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
 create mode 100644 final/test/Transforms/LICM/2004-11-17-UndefIndexCrash.ll
 create mode 100644 final/test/Transforms/LICM/2005-03-24-LICM-Aggregate-Crash.ll
 create mode 100644 final/test/Transforms/LICM/2006-09-12-DeadUserOfSunkInstr.ll
 create mode 100644 final/test/Transforms/LICM/2007-05-22-VolatileSink.ll
 create mode 100644 final/test/Transforms/LICM/2007-07-30-AliasSet.ll
 create mode 100644 final/test/Transforms/LICM/2007-09-17-PromoteValue.ll
 create mode 100644 final/test/Transforms/LICM/2007-09-24-PromoteNullValue.ll
 create mode 100644 final/test/Transforms/LICM/2007-10-01-PromoteSafeValue.ll
 create mode 100644 final/test/Transforms/LICM/2008-05-20-AliasSetVAArg.ll
 create mode 100644 final/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll
 create mode 100644 final/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll
 create mode 100644 final/test/Transforms/LICM/Preserve-LCSSA.ll
 create mode 100644 final/test/Transforms/LICM/basictest.ll
 create mode 100644 final/test/Transforms/LICM/crash.ll
 create mode 100644 final/test/Transforms/LICM/dg.exp
 create mode 100644 final/test/Transforms/LICM/hoisting.ll
 create mode 100644 final/test/Transforms/LICM/no-preheader-test.ll
 create mode 100644 final/test/Transforms/LICM/scalar_promote.ll
 create mode 100644 final/test/Transforms/LICM/sinking.ll
 create mode 100644 final/test/Transforms/LoopDeletion/2007-07-23-InfiniteLoop.ll
 create mode 100644 final/test/Transforms/LoopDeletion/2008-05-06-Phi.ll
 create mode 100644 final/test/Transforms/LoopDeletion/dcetest.ll
 create mode 100644 final/test/Transforms/LoopDeletion/dg.exp
 create mode 100644 final/test/Transforms/LoopDeletion/multiple-exit-conditions.ll
 create mode 100644 final/test/Transforms/LoopDeletion/multiple-exits.ll
 create mode 100644 final/test/Transforms/LoopDeletion/simplify-then-delete.ll
 create mode 100644 final/test/Transforms/LoopIdiom/basic.ll
 create mode 100644 final/test/Transforms/LoopIdiom/debug-line.ll
 create mode 100644 final/test/Transforms/LoopIdiom/dg.exp
 create mode 100644 final/test/Transforms/LoopRotate/2009-01-25-SingleEntryPhi.ll
 create mode 100644 final/test/Transforms/LoopRotate/PhiRename-1.ll
 create mode 100644 final/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll
 create mode 100644 final/test/Transforms/LoopRotate/basic.ll
 create mode 100644 final/test/Transforms/LoopRotate/crash.ll
 create mode 100644 final/test/Transforms/LoopRotate/dbgvalue.ll
 create mode 100644 final/test/Transforms/LoopRotate/dg.exp
 create mode 100644 final/test/Transforms/LoopRotate/indirectbr.ll
 create mode 100644 final/test/Transforms/LoopRotate/phi-duplicate.ll
 create mode 100644 final/test/Transforms/LoopRotate/pr2639.ll
 create mode 100644 final/test/Transforms/LoopRotate/preserve-scev.ll
 create mode 100644 final/test/Transforms/LoopSimplify/2003-04-25-AssertFail.ll
 create mode 100644 final/test/Transforms/LoopSimplify/2003-05-12-PreheaderExitOfChild.ll
 create mode 100644 final/test/Transforms/LoopSimplify/2003-08-15-PreheadersFail.ll
 create mode 100644 final/test/Transforms/LoopSimplify/2003-12-10-ExitBlocksProblem.ll
 create mode 100644 final/test/Transforms/LoopSimplify/2004-02-05-DominatorInfoCorruption.ll
 create mode 100644 final/test/Transforms/LoopSimplify/2004-03-15-IncorrectDomUpdate.ll
 create mode 100644 final/test/Transforms/LoopSimplify/2004-04-01-IncorrectDomUpdate.ll
 create mode 100644 final/test/Transforms/LoopSimplify/2004-04-12-LoopSimplify-SwitchBackedges.ll
 create mode 100644 final/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll
 create mode 100644 final/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll
 create mode 100644 final/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll
 create mode 100644 final/test/Transforms/LoopSimplify/2010-12-26-PHIInfiniteLoop.ll
 create mode 100644 final/test/Transforms/LoopSimplify/basictest.ll
 create mode 100644 final/test/Transforms/LoopSimplify/dg.exp
 create mode 100644 final/test/Transforms/LoopSimplify/hardertest.ll
 create mode 100644 final/test/Transforms/LoopSimplify/indirectbr-backedge.ll
 create mode 100644 final/test/Transforms/LoopSimplify/indirectbr.ll
 create mode 100644 final/test/Transforms/LoopSimplify/merge-exits.ll
 create mode 100644 final/test/Transforms/LoopSimplify/phi-node-simplify.ll
 create mode 100644 final/test/Transforms/LoopSimplify/preserve-scev.ll
 create mode 100644 final/test/Transforms/LoopSimplify/single-backedge.ll
 create mode 100644 final/test/Transforms/LoopSimplify/unreachable-loop-pred.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/2005-08-15-AddRecIV.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/2005-08-17-OutOfLoopVariant.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/2005-09-12-UsesOutOutsideOfLoop.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/2007-04-23-UseIterator.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/2008-09-09-Overflow.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/2009-11-10-LSRCrash.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/count-to-zero.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/dead-phi.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/dg.exp
 create mode 100644 final/test/Transforms/LoopStrengthReduce/different-type-ivs.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/dont_reduce_bytes.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/dont_reverse.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/hoist-parent-preheader.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/invariant_value_first.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/nested-reduce.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/nonlinear-postinc.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/phi_node_update_multiple_preds.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/pr2537.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/pr2570.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/pr3086.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/pr3399.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/pr3571.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/related_indvars.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/remove_indvar.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/share_code_in_preheader.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/share_ivs.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/uglygep.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll
 create mode 100644 final/test/Transforms/LoopStrengthReduce/variable_stride.ll
 create mode 100644 final/test/Transforms/LoopUnroll/2004-05-13-DontUnrollTooMuch.ll
 create mode 100644 final/test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll
 create mode 100644 final/test/Transforms/LoopUnroll/2006-08-24-MultiBlockLoop.ll
 create mode 100644 final/test/Transforms/LoopUnroll/2007-04-16-PhiUpdate.ll
 create mode 100644 final/test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll
 create mode 100644 final/test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll
 create mode 100644 final/test/Transforms/LoopUnroll/2007-11-05-Crash.ll
 create mode 100644 final/test/Transforms/LoopUnroll/basic.ll
 create mode 100644 final/test/Transforms/LoopUnroll/dg.exp
 create mode 100644 final/test/Transforms/LoopUnroll/shifted-tripcount.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/2006-06-13-SingleEntryPHI.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/2006-06-27-DeadSwitchCase.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/2007-05-09-Unreachable.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/2007-05-09-tl.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/2007-07-12-ExitDomInfo.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/2007-07-13-DomInfo.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/2007-07-18-DomInfo.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/2007-08-01-Dom.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/2007-08-01-LCSSA.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/2007-10-04-DomFrontier.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/2008-06-17-DomFrontier.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/basictest.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/crash.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/dg.exp
 create mode 100644 final/test/Transforms/LoopUnswitch/infinite-loop.ll
 create mode 100644 final/test/Transforms/LoopUnswitch/preserve-analyses.ll
 create mode 100644 final/test/Transforms/LowerAtomic/atomic-load.ll
 create mode 100644 final/test/Transforms/LowerAtomic/atomic-swap.ll
 create mode 100644 final/test/Transforms/LowerAtomic/barrier.ll
 create mode 100644 final/test/Transforms/LowerAtomic/dg.exp
 create mode 100644 final/test/Transforms/LowerInvoke/2003-12-10-Crash.ll
 create mode 100644 final/test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll
 create mode 100644 final/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHI.ll
 create mode 100644 final/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHIUse.ll
 create mode 100644 final/test/Transforms/LowerInvoke/2008-02-14-CritEdgePhiCrash.ll
 create mode 100644 final/test/Transforms/LowerInvoke/basictest.ll
 create mode 100644 final/test/Transforms/LowerInvoke/dg.exp
 create mode 100644 final/test/Transforms/LowerSetJmp/2003-11-05-DominanceProperties.ll
 create mode 100644 final/test/Transforms/LowerSetJmp/dg.exp
 create mode 100644 final/test/Transforms/LowerSetJmp/simpletest.ll
 create mode 100644 final/test/Transforms/LowerSwitch/2003-05-01-PHIProblem.ll
 create mode 100644 final/test/Transforms/LowerSwitch/2003-08-23-EmptySwitch.ll
 create mode 100644 final/test/Transforms/LowerSwitch/2004-03-13-SwitchIsDefaultCrash.ll
 create mode 100644 final/test/Transforms/LowerSwitch/dg.exp
 create mode 100644 final/test/Transforms/LowerSwitch/feature.ll
 create mode 100644 final/test/Transforms/Mem2Reg/2002-03-28-UninitializedVal.ll
 create mode 100644 final/test/Transforms/Mem2Reg/2002-05-01-ShouldNotPromoteThisAlloca.ll
 create mode 100644 final/test/Transforms/Mem2Reg/2003-04-10-DFNotFound.ll
 create mode 100644 final/test/Transforms/Mem2Reg/2003-04-18-DeadBlockProblem.ll
 create mode 100644 final/test/Transforms/Mem2Reg/2003-04-24-MultipleIdenticalSuccessors.ll
 create mode 100644 final/test/Transforms/Mem2Reg/2003-06-26-IterativePromote.ll
 create mode 100644 final/test/Transforms/Mem2Reg/2003-10-05-DeadPHIInsertion.ll
 create mode 100644 final/test/Transforms/Mem2Reg/2005-06-30-ReadBeforeWrite.ll
 create mode 100644 final/test/Transforms/Mem2Reg/2005-11-28-Crash.ll
 create mode 100644 final/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
 create mode 100644 final/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
 create mode 100644 final/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
 create mode 100644 final/test/Transforms/Mem2Reg/PromoteMemToRegister.ll
 create mode 100644 final/test/Transforms/Mem2Reg/UndefValuesMerge.ll
 create mode 100644 final/test/Transforms/Mem2Reg/crash.ll
 create mode 100644 final/test/Transforms/Mem2Reg/dg.exp
 create mode 100644 final/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
 create mode 100644 final/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
 create mode 100644 final/test/Transforms/MemCpyOpt/align.ll
 create mode 100644 final/test/Transforms/MemCpyOpt/crash.ll
 create mode 100644 final/test/Transforms/MemCpyOpt/dg.exp
 create mode 100644 final/test/Transforms/MemCpyOpt/form-memset.ll
 create mode 100644 final/test/Transforms/MemCpyOpt/loadstore-sret.ll
 create mode 100644 final/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
 create mode 100644 final/test/Transforms/MemCpyOpt/memcpy.ll
 create mode 100644 final/test/Transforms/MemCpyOpt/memmove.ll
 create mode 100644 final/test/Transforms/MemCpyOpt/smaller.ll
 create mode 100644 final/test/Transforms/MemCpyOpt/sret.ll
 create mode 100644 final/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
 create mode 100644 final/test/Transforms/MergeFunc/dg.exp
 create mode 100644 final/test/Transforms/MergeFunc/fold-weak.ll
 create mode 100644 final/test/Transforms/MergeFunc/phi-speculation1.ll
 create mode 100644 final/test/Transforms/MergeFunc/phi-speculation2.ll
 create mode 100644 final/test/Transforms/MergeFunc/vector.ll
 create mode 100644 final/test/Transforms/MergeFunc/vectors-and-arrays.ll
 create mode 100644 final/test/Transforms/PruneEH/2003-09-14-ExternalCall.ll
 create mode 100644 final/test/Transforms/PruneEH/2003-11-21-PHIUpdate.ll
 create mode 100644 final/test/Transforms/PruneEH/2008-06-02-Weak.ll
 create mode 100644 final/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
 create mode 100644 final/test/Transforms/PruneEH/dg.exp
 create mode 100644 final/test/Transforms/PruneEH/recursivetest.ll
 create mode 100644 final/test/Transforms/PruneEH/simplenoreturntest.ll
 create mode 100644 final/test/Transforms/PruneEH/simpletest.ll
 create mode 100644 final/test/Transforms/Reassociate/2002-05-15-AgressiveSubMove.ll
 create mode 100644 final/test/Transforms/Reassociate/2002-05-15-MissedTree.ll
 create mode 100644 final/test/Transforms/Reassociate/2002-05-15-SubReassociate.ll
 create mode 100644 final/test/Transforms/Reassociate/2002-05-15-SubReassociate2.ll
 create mode 100644 final/test/Transforms/Reassociate/2002-07-09-DominanceProblem.ll
 create mode 100644 final/test/Transforms/Reassociate/2003-08-12-InfiniteLoop.ll
 create mode 100644 final/test/Transforms/Reassociate/2005-08-24-Crash.ll
 create mode 100644 final/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll
 create mode 100644 final/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
 create mode 100644 final/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll
 create mode 100644 final/test/Transforms/Reassociate/basictest.ll
 create mode 100644 final/test/Transforms/Reassociate/crash.ll
 create mode 100644 final/test/Transforms/Reassociate/dg.exp
 create mode 100644 final/test/Transforms/Reassociate/inverses.ll
 create mode 100644 final/test/Transforms/Reassociate/looptest.ll
 create mode 100644 final/test/Transforms/Reassociate/mulfactor.ll
 create mode 100644 final/test/Transforms/Reassociate/mulfactor2.ll
 create mode 100644 final/test/Transforms/Reassociate/negation.ll
 create mode 100644 final/test/Transforms/Reassociate/optional-flags.ll
 create mode 100644 final/test/Transforms/Reassociate/otherops.ll
 create mode 100644 final/test/Transforms/Reassociate/shift-factor.ll
 create mode 100644 final/test/Transforms/Reassociate/shifttest.ll
 create mode 100644 final/test/Transforms/Reassociate/subtest.ll
 create mode 100644 final/test/Transforms/Reassociate/subtest2.ll
 create mode 100644 final/test/Transforms/SCCP/2002-05-02-EdgeFailure.ll
 create mode 100644 final/test/Transforms/SCCP/2002-05-02-MissSecondInst.ll
 create mode 100644 final/test/Transforms/SCCP/2002-05-20-MissedIncomingValue.ll
 create mode 100644 final/test/Transforms/SCCP/2002-05-21-InvalidSimplify.ll
 create mode 100644 final/test/Transforms/SCCP/2002-08-30-GetElementPtrTest.ll
 create mode 100644 final/test/Transforms/SCCP/2003-06-24-OverdefinedPHIValue.ll
 create mode 100644 final/test/Transforms/SCCP/2003-08-26-InvokeHandling.ll
 create mode 100644 final/test/Transforms/SCCP/2004-11-16-DeadInvoke.ll
 create mode 100644 final/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll
 create mode 100644 final/test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll
 create mode 100644 final/test/Transforms/SCCP/2006-12-04-PackedType.ll
 create mode 100644 final/test/Transforms/SCCP/2006-12-19-UndefBug.ll
 create mode 100644 final/test/Transforms/SCCP/2007-05-16-InvokeCrash.ll
 create mode 100644 final/test/Transforms/SCCP/2008-01-27-UndefCorrelate.ll
 create mode 100644 final/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll
 create mode 100644 final/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
 create mode 100644 final/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
 create mode 100644 final/test/Transforms/SCCP/2009-05-27-VectorOperandZero.ll
 create mode 100644 final/test/Transforms/SCCP/apint-array.ll
 create mode 100644 final/test/Transforms/SCCP/apint-basictest.ll
 create mode 100644 final/test/Transforms/SCCP/apint-basictest2.ll
 create mode 100644 final/test/Transforms/SCCP/apint-basictest3.ll
 create mode 100644 final/test/Transforms/SCCP/apint-basictest4.ll
 create mode 100644 final/test/Transforms/SCCP/apint-bigarray.ll
 create mode 100644 final/test/Transforms/SCCP/apint-bigint.ll
 create mode 100644 final/test/Transforms/SCCP/apint-bigint2.ll
 create mode 100644 final/test/Transforms/SCCP/apint-ipsccp1.ll
 create mode 100644 final/test/Transforms/SCCP/apint-ipsccp2.ll
 create mode 100644 final/test/Transforms/SCCP/apint-ipsccp3.ll
 create mode 100644 final/test/Transforms/SCCP/apint-ipsccp4.ll
 create mode 100644 final/test/Transforms/SCCP/apint-load.ll
 create mode 100644 final/test/Transforms/SCCP/apint-phi.ll
 create mode 100644 final/test/Transforms/SCCP/apint-select.ll
 create mode 100644 final/test/Transforms/SCCP/calltest.ll
 create mode 100644 final/test/Transforms/SCCP/crash.ll
 create mode 100644 final/test/Transforms/SCCP/dg.exp
 create mode 100644 final/test/Transforms/SCCP/ipsccp-addr-taken.ll
 create mode 100644 final/test/Transforms/SCCP/ipsccp-basic.ll
 create mode 100644 final/test/Transforms/SCCP/loadtest.ll
 create mode 100644 final/test/Transforms/SCCP/logical-nuke.ll
 create mode 100644 final/test/Transforms/SCCP/phitest.ll
 create mode 100644 final/test/Transforms/SCCP/retvalue-undef.ll
 create mode 100644 final/test/Transforms/SCCP/sccptest.ll
 create mode 100644 final/test/Transforms/SCCP/select.ll
 create mode 100644 final/test/Transforms/SCCP/undef-resolve.ll
 create mode 100644 final/test/Transforms/SRETPromotion/2008-03-11-attributes.ll
 create mode 100644 final/test/Transforms/SRETPromotion/2008-06-04-function-pointer-passing.ll
 create mode 100644 final/test/Transforms/SRETPromotion/2008-06-05-non-call-use.ll
 create mode 100644 final/test/Transforms/SRETPromotion/basictest.ll
 create mode 100644 final/test/Transforms/SRETPromotion/dg.exp
 create mode 100644 final/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
 create mode 100644 final/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
 create mode 100644 final/test/Transforms/ScalarRepl/AggregatePromote.ll
 create mode 100644 final/test/Transforms/ScalarRepl/DifferingTypes.ll
 create mode 100644 final/test/Transforms/ScalarRepl/address-space.ll
 create mode 100644 final/test/Transforms/ScalarRepl/arraytest.ll
 create mode 100644 final/test/Transforms/ScalarRepl/badarray.ll
 create mode 100644 final/test/Transforms/ScalarRepl/basictest.ll
 create mode 100644 final/test/Transforms/ScalarRepl/bitfield-sroa.ll
 create mode 100644 final/test/Transforms/ScalarRepl/copy-aggregate.ll
 create mode 100644 final/test/Transforms/ScalarRepl/crash.ll
 create mode 100644 final/test/Transforms/ScalarRepl/debuginfo.ll
 create mode 100644 final/test/Transforms/ScalarRepl/dg.exp
 create mode 100644 final/test/Transforms/ScalarRepl/load-store-aggregate.ll
 create mode 100644 final/test/Transforms/ScalarRepl/memcpy-align.ll
 create mode 100644 final/test/Transforms/ScalarRepl/memcpy-from-global.ll
 create mode 100644 final/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
 create mode 100644 final/test/Transforms/ScalarRepl/memset-aggregate.ll
 create mode 100644 final/test/Transforms/ScalarRepl/nonzero-first-index.ll
 create mode 100644 final/test/Transforms/ScalarRepl/not-a-vector.ll
 create mode 100644 final/test/Transforms/ScalarRepl/phi-select.ll
 create mode 100644 final/test/Transforms/ScalarRepl/phinodepromote.ll
 create mode 100644 final/test/Transforms/ScalarRepl/select_promote.ll
 create mode 100644 final/test/Transforms/ScalarRepl/sroa-fca.ll
 create mode 100644 final/test/Transforms/ScalarRepl/sroa_two.ll
 create mode 100644 final/test/Transforms/ScalarRepl/union-fp-int.ll
 create mode 100644 final/test/Transforms/ScalarRepl/union-packed.ll
 create mode 100644 final/test/Transforms/ScalarRepl/union-pointer.ll
 create mode 100644 final/test/Transforms/ScalarRepl/vector_memcpy.ll
 create mode 100644 final/test/Transforms/ScalarRepl/vector_promote.ll
 create mode 100644 final/test/Transforms/ScalarRepl/volatile.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2002-05-21-PHIElimination.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2002-09-24-PHIAssertion.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2003-03-07-DominateProblem.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2003-08-05-InvokeCrash.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2003-08-05-MishandleInvoke.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2005-08-01-PHIUpdateFail.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2005-08-03-PHIFactorCrash.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2005-10-02-InvokeSimplify.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2006-02-17-InfiniteUnroll.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2007-12-21-Crash.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2008-04-23-MergeMultipleResultRet.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2008-09-08-MultiplePred.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2008-09-17-SpeculativeHoist.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2008-10-03-SpeculativelyExecuteBeforePHI.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2008-12-06-SingleEntryPhi.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2009-01-18-PHIPropCrash.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2009-03-05-Speculative-Hoist-Dbg.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/BrUnwind.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/DeadSetCC.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/HoistCode.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/MagicPointer.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/PhiBlockMerge2.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/PhiEliminate.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/PhiEliminate2.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/PhiNoEliminate.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/SpeculativeExec.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/UncondBranchToReturn.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/basictest.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/branch-cond-merge.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/branch-cond-prop.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/branch-fold-test.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/branch-fold.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/branch-phi-thread.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/branch_fold_dbg.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/dbginfo.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/dg.exp
 create mode 100644 final/test/Transforms/SimplifyCFG/duplicate-phis.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/hoist-common-code.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/indirectbr.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/invoke_unwind.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/iterative-simplify.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/noreturn-call.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/return-merge.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/select-gep.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/speculate-with-offset.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/switch-on-const-select.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/switch-simplify-crash.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/switch-to-icmp.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/switch_create.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/switch_switch_fold.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/switch_thread.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll
 create mode 100644 final/test/Transforms/SimplifyCFG/two-entry-phi-return.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/2005-05-20-sprintf-crash.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/2008-05-19-memcmp.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/2009-07-28-Exit.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/2009-07-29-Exit2.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/2010-05-30-memcpy-Struct.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/FFS.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/FPrintF.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/FPuts.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/IsDigit.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/MemCpy.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/PR7357.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/Printf.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/Puts.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/SPrintF.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/StrCat.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/StrChr.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/StrCmp.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/StrCpy.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/StrLen.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/StrNCat.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/StrNCmp.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/StrNCpy.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/StrPBrk.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/StrRChr.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/StrSpn.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/StrStr.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/ToAscii.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/abs.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/dg.exp
 create mode 100644 final/test/Transforms/SimplifyLibCalls/exp2.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/floor.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/iprintf.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/memcmp.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/memmove.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/memset-64.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/memset.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/pow2.ll
 create mode 100644 final/test/Transforms/SimplifyLibCalls/weak-symbols.ll
 create mode 100644 final/test/Transforms/Sink/basic.ll
 create mode 100644 final/test/Transforms/Sink/dg.exp
 create mode 100644 final/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
 create mode 100644 final/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
 create mode 100644 final/test/Transforms/StripSymbols/2010-07-01-DeadDbgInfo.ll
 create mode 100644 final/test/Transforms/StripSymbols/2010-08-25-crash.ll
 create mode 100644 final/test/Transforms/StripSymbols/dg.exp
 create mode 100644 final/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll
 create mode 100644 final/test/Transforms/TailCallElim/accum_recursion.ll
 create mode 100644 final/test/Transforms/TailCallElim/ackermann.ll
 create mode 100644 final/test/Transforms/TailCallElim/dg.exp
 create mode 100644 final/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
 create mode 100644 final/test/Transforms/TailCallElim/dont_reorder_load.ll
 create mode 100644 final/test/Transforms/TailCallElim/dup_tail.ll
 create mode 100644 final/test/Transforms/TailCallElim/inf-recursion.ll
 create mode 100644 final/test/Transforms/TailCallElim/intervening-inst.ll
 create mode 100644 final/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
 create mode 100644 final/test/Transforms/TailCallElim/nocapture.ll
 create mode 100644 final/test/Transforms/TailCallElim/reorder_load.ll
 create mode 100644 final/test/Transforms/TailCallElim/return_constant.ll
 create mode 100644 final/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
 create mode 100644 final/test/Transforms/TailDup/2003-06-24-Simpleloop.ll
 create mode 100644 final/test/Transforms/TailDup/2003-07-22-InfiniteLoop.ll
 create mode 100644 final/test/Transforms/TailDup/2003-08-23-InvalidatedPointers.ll
 create mode 100644 final/test/Transforms/TailDup/2003-08-31-UnreachableBlocks.ll
 create mode 100644 final/test/Transforms/TailDup/2004-04-01-DemoteRegToStack.ll
 create mode 100644 final/test/Transforms/TailDup/2008-05-13-InfiniteLoop.ll
 create mode 100644 final/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
 create mode 100644 final/test/Transforms/TailDup/2009-07-31-phicrash.ll
 create mode 100644 final/test/Transforms/TailDup/MergeTest.ll
 create mode 100644 final/test/Transforms/TailDup/PHIUpdateTest.ll
 create mode 100644 final/test/Transforms/TailDup/basictest.ll
 create mode 100644 final/test/Transforms/TailDup/basictest2.ll
 create mode 100644 final/test/Transforms/TailDup/dg.exp
 create mode 100644 final/test/Transforms/TailDup/if-tail-dup.ll
 create mode 100644 final/test/Unit/lit.cfg
 create mode 100644 final/test/Unit/lit.site.cfg.in
 create mode 100644 final/test/Verifier/2002-04-13-RetTypes.ll
 create mode 100644 final/test/Verifier/2002-11-05-GetelementptrPointers.ll
 create mode 100644 final/test/Verifier/2004-05-21-SwitchConstantMismatch.ll
 create mode 100644 final/test/Verifier/2005-03-21-UndefinedTypeReference.ll
 create mode 100644 final/test/Verifier/2006-07-11-StoreStruct.ll
 create mode 100644 final/test/Verifier/2006-10-15-AddrLabel.ll
 create mode 100644 final/test/Verifier/2006-12-12-IntrinsicDefine.ll
 create mode 100644 final/test/Verifier/2007-12-21-InvokeParamAttrs.ll
 create mode 100644 final/test/Verifier/2008-01-11-VarargAttrs.ll
 create mode 100644 final/test/Verifier/2008-03-01-AllocaSized.ll
 create mode 100644 final/test/Verifier/2008-08-22-MemCpyAlignment.ll
 create mode 100644 final/test/Verifier/2008-11-15-RetVoid.ll
 create mode 100644 final/test/Verifier/2009-05-29-InvokeResult1.ll
 create mode 100644 final/test/Verifier/2009-05-29-InvokeResult2.ll
 create mode 100644 final/test/Verifier/2009-05-29-InvokeResult3.ll
 create mode 100644 final/test/Verifier/2010-08-07-PointerIntrinsic.ll
 create mode 100644 final/test/Verifier/AmbiguousPhi.ll
 create mode 100644 final/test/Verifier/PhiGrouping.ll
 create mode 100644 final/test/Verifier/README.txt
 create mode 100644 final/test/Verifier/SelfReferential.ll
 create mode 100644 final/test/Verifier/aliasing-chain.ll
 create mode 100644 final/test/Verifier/byval-1.ll
 create mode 100644 final/test/Verifier/byval-2.ll
 create mode 100644 final/test/Verifier/byval-4.ll
 create mode 100644 final/test/Verifier/dg.exp
 create mode 100644 final/test/Verifier/gcread-ptrptr.ll
 create mode 100644 final/test/Verifier/gcroot-alloca.ll
 create mode 100644 final/test/Verifier/gcroot-meta.ll
 create mode 100644 final/test/Verifier/gcroot-ptrptr.ll
 create mode 100644 final/test/Verifier/gcwrite-ptrptr.ll
 create mode 100644 final/test/Verifier/invoke-1.ll
 create mode 100644 final/test/Verifier/invoke-2.ll
 create mode 100644 final/test/lib/llvm.exp
 create mode 100644 final/test/lib/llvm2cpp.exp
 create mode 100644 final/test/lit.cfg
 create mode 100644 final/test/lit.site.cfg.in
 create mode 100644 final/test/site.exp.in
 create mode 100644 final/tools/CMakeLists.txt
 create mode 100644 final/tools/Makefile
 create mode 100644 final/tools/bugpoint-passes/CMakeLists.txt
 create mode 100644 final/tools/bugpoint-passes/Makefile
 create mode 100644 final/tools/bugpoint-passes/TestPasses.cpp
 create mode 100644 final/tools/bugpoint-passes/bugpoint.exports
 create mode 100644 final/tools/bugpoint/BugDriver.cpp
 create mode 100644 final/tools/bugpoint/BugDriver.h
 create mode 100644 final/tools/bugpoint/CMakeLists.txt
 create mode 100644 final/tools/bugpoint/CrashDebugger.cpp
 create mode 100644 final/tools/bugpoint/ExecutionDriver.cpp
 create mode 100644 final/tools/bugpoint/ExtractFunction.cpp
 create mode 100644 final/tools/bugpoint/FindBugs.cpp
 create mode 100644 final/tools/bugpoint/ListReducer.h
 create mode 100644 final/tools/bugpoint/Makefile
 create mode 100644 final/tools/bugpoint/Miscompilation.cpp
 create mode 100644 final/tools/bugpoint/OptimizerDriver.cpp
 create mode 100644 final/tools/bugpoint/ToolRunner.cpp
 create mode 100644 final/tools/bugpoint/ToolRunner.h
 create mode 100644 final/tools/bugpoint/bugpoint.cpp
 create mode 100644 final/tools/edis/CMakeLists.txt
 create mode 100644 final/tools/edis/EDMain.cpp
 create mode 100644 final/tools/edis/EnhancedDisassembly.exports
 create mode 100644 final/tools/edis/Makefile
 create mode 100644 final/tools/gold/Makefile
 create mode 100644 final/tools/gold/README.txt
 create mode 100644 final/tools/gold/gold-plugin.cpp
 create mode 100644 final/tools/gold/gold.exports
 create mode 100644 final/tools/llc/CMakeLists.txt
 create mode 100644 final/tools/llc/Makefile
 create mode 100644 final/tools/llc/llc.cpp
 create mode 100644 final/tools/lli/CMakeLists.txt
 create mode 100644 final/tools/lli/Makefile
 create mode 100644 final/tools/lli/lli.cpp
 create mode 100644 final/tools/llvm-ar/CMakeLists.txt
 create mode 100644 final/tools/llvm-ar/Makefile
 create mode 100644 final/tools/llvm-ar/llvm-ar.cpp
 create mode 100644 final/tools/llvm-as/CMakeLists.txt
 create mode 100644 final/tools/llvm-as/Makefile
 create mode 100644 final/tools/llvm-as/llvm-as.cpp
 create mode 100644 final/tools/llvm-bcanalyzer/CMakeLists.txt
 create mode 100644 final/tools/llvm-bcanalyzer/Makefile
 create mode 100644 final/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
 create mode 100644 final/tools/llvm-config/CMakeLists.txt
 create mode 100644 final/tools/llvm-config/Makefile
 create mode 100755 final/tools/llvm-config/find-cycles.pl
 create mode 100644 final/tools/llvm-config/llvm-config.in.in
 create mode 100644 final/tools/llvm-diff/CMakeLists.txt
 create mode 100644 final/tools/llvm-diff/DifferenceEngine.cpp
 create mode 100644 final/tools/llvm-diff/DifferenceEngine.h
 create mode 100644 final/tools/llvm-diff/Makefile
 create mode 100644 final/tools/llvm-diff/llvm-diff.cpp
 create mode 100644 final/tools/llvm-dis/CMakeLists.txt
 create mode 100644 final/tools/llvm-dis/Makefile
 create mode 100644 final/tools/llvm-dis/llvm-dis.cpp
 create mode 100644 final/tools/llvm-extract/CMakeLists.txt
 create mode 100644 final/tools/llvm-extract/Makefile
 create mode 100644 final/tools/llvm-extract/llvm-extract.cpp
 create mode 100644 final/tools/llvm-ld/CMakeLists.txt
 create mode 100644 final/tools/llvm-ld/Makefile
 create mode 100644 final/tools/llvm-ld/Optimize.cpp
 create mode 100644 final/tools/llvm-ld/llvm-ld.cpp
 create mode 100644 final/tools/llvm-link/CMakeLists.txt
 create mode 100644 final/tools/llvm-link/Makefile
 create mode 100644 final/tools/llvm-link/llvm-link.cpp
 create mode 100644 final/tools/llvm-mc/CMakeLists.txt
 create mode 100644 final/tools/llvm-mc/Disassembler.cpp
 create mode 100644 final/tools/llvm-mc/Disassembler.h
 create mode 100644 final/tools/llvm-mc/Makefile
 create mode 100644 final/tools/llvm-mc/llvm-mc.cpp
 create mode 100644 final/tools/llvm-nm/CMakeLists.txt
 create mode 100644 final/tools/llvm-nm/Makefile
 create mode 100644 final/tools/llvm-nm/llvm-nm.cpp
 create mode 100644 final/tools/llvm-objdump/CMakeLists.txt
 create mode 100644 final/tools/llvm-objdump/Makefile
 create mode 100644 final/tools/llvm-objdump/llvm-objdump.cpp
 create mode 100644 final/tools/llvm-prof/CMakeLists.txt
 create mode 100644 final/tools/llvm-prof/Makefile
 create mode 100644 final/tools/llvm-prof/llvm-prof.cpp
 create mode 100644 final/tools/llvm-ranlib/CMakeLists.txt
 create mode 100644 final/tools/llvm-ranlib/Makefile
 create mode 100644 final/tools/llvm-ranlib/llvm-ranlib.cpp
 create mode 100644 final/tools/llvm-shlib/Makefile
 create mode 100644 final/tools/llvm-stub/CMakeLists.txt
 create mode 100644 final/tools/llvm-stub/Makefile
 create mode 100644 final/tools/llvm-stub/llvm-stub.c
 create mode 100644 final/tools/llvmc/CMakeLists.txt
 create mode 100644 final/tools/llvmc/Makefile
 create mode 100644 final/tools/llvmc/doc/LLVMC-Reference.rst
 create mode 100644 final/tools/llvmc/doc/LLVMC-Tutorial.rst
 create mode 100644 final/tools/llvmc/doc/Makefile
 create mode 100644 final/tools/llvmc/doc/img/lines.gif
 create mode 100644 final/tools/llvmc/examples/Hello/Hello.cpp
 create mode 100644 final/tools/llvmc/examples/Hello/Makefile
 create mode 100644 final/tools/llvmc/examples/Makefile
 create mode 100644 final/tools/llvmc/examples/Simple/Makefile
 create mode 100644 final/tools/llvmc/examples/Simple/Simple.cpp
 create mode 100644 final/tools/llvmc/examples/Simple/Simple.td
 create mode 100644 final/tools/llvmc/examples/Skeleton/AutoGenerated.td
 create mode 100644 final/tools/llvmc/examples/Skeleton/Hooks.cpp
 create mode 100644 final/tools/llvmc/examples/Skeleton/Main.cpp
 create mode 100644 final/tools/llvmc/examples/Skeleton/Makefile
 create mode 100644 final/tools/llvmc/examples/Skeleton/README
 create mode 100644 final/tools/llvmc/examples/mcc16/Hooks.cpp
 create mode 100644 final/tools/llvmc/examples/mcc16/Main.cpp
 create mode 100644 final/tools/llvmc/examples/mcc16/Makefile
 create mode 100644 final/tools/llvmc/examples/mcc16/PIC16.td
 create mode 100644 final/tools/llvmc/examples/mcc16/README
 create mode 100644 final/tools/llvmc/src/AutoGenerated.td
 create mode 100644 final/tools/llvmc/src/Base.td.in
 create mode 100644 final/tools/llvmc/src/Clang.td
 create mode 100644 final/tools/llvmc/src/Hooks.cpp
 create mode 100644 final/tools/llvmc/src/Main.cpp
 create mode 100644 final/tools/llvmc/src/Makefile
 create mode 100644 final/tools/lto/LTOCodeGenerator.cpp
 create mode 100644 final/tools/lto/LTOCodeGenerator.h
 create mode 100644 final/tools/lto/LTOModule.cpp
 create mode 100644 final/tools/lto/LTOModule.h
 create mode 100644 final/tools/lto/Makefile
 create mode 100644 final/tools/lto/lto.cpp
 create mode 100644 final/tools/lto/lto.exports
 create mode 100644 final/tools/macho-dump/CMakeLists.txt
 create mode 100644 final/tools/macho-dump/Makefile
 create mode 100644 final/tools/macho-dump/macho-dump.cpp
 create mode 100644 final/tools/opt/AnalysisWrappers.cpp
 create mode 100644 final/tools/opt/CMakeLists.txt
 create mode 100644 final/tools/opt/GraphPrinters.cpp
 create mode 100644 final/tools/opt/Makefile
 create mode 100644 final/tools/opt/PrintSCC.cpp
 create mode 100644 final/tools/opt/opt.cpp
 create mode 100644 final/unittests/ADT/APFloatTest.cpp
 create mode 100644 final/unittests/ADT/APIntTest.cpp
 create mode 100644 final/unittests/ADT/BitVectorTest.cpp
 create mode 100644 final/unittests/ADT/DAGDeltaAlgorithmTest.cpp
 create mode 100644 final/unittests/ADT/DeltaAlgorithmTest.cpp
 create mode 100644 final/unittests/ADT/DenseMapTest.cpp
 create mode 100644 final/unittests/ADT/DenseSetTest.cpp
 create mode 100644 final/unittests/ADT/FoldingSet.cpp
 create mode 100644 final/unittests/ADT/ImmutableSetTest.cpp
 create mode 100644 final/unittests/ADT/IntEqClassesTest.cpp
 create mode 100644 final/unittests/ADT/IntervalMapTest.cpp
 create mode 100644 final/unittests/ADT/Makefile
 create mode 100644 final/unittests/ADT/SmallBitVectorTest.cpp
 create mode 100644 final/unittests/ADT/SmallStringTest.cpp
 create mode 100644 final/unittests/ADT/SmallVectorTest.cpp
 create mode 100644 final/unittests/ADT/SparseBitVectorTest.cpp
 create mode 100644 final/unittests/ADT/StringMapTest.cpp
 create mode 100644 final/unittests/ADT/StringRefTest.cpp
 create mode 100644 final/unittests/ADT/TripleTest.cpp
 create mode 100644 final/unittests/ADT/TwineTest.cpp
 create mode 100644 final/unittests/ADT/ilistTest.cpp
 create mode 100644 final/unittests/Analysis/Makefile
 create mode 100644 final/unittests/Analysis/ScalarEvolutionTest.cpp
 create mode 100644 final/unittests/CMakeLists.txt
 create mode 100644 final/unittests/ExecutionEngine/ExecutionEngineTest.cpp
 create mode 100644 final/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
 create mode 100644 final/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
 create mode 100644 final/unittests/ExecutionEngine/JIT/JITTest.cpp
 create mode 100644 final/unittests/ExecutionEngine/JIT/JITTests.def
 create mode 100644 final/unittests/ExecutionEngine/JIT/Makefile
 create mode 100644 final/unittests/ExecutionEngine/JIT/MultiJITTest.cpp
 create mode 100644 final/unittests/ExecutionEngine/Makefile
 create mode 100644 final/unittests/Makefile
 create mode 100644 final/unittests/Makefile.unittest
 create mode 100644 final/unittests/Support/AllocatorTest.cpp
 create mode 100644 final/unittests/Support/Casting.cpp
 create mode 100644 final/unittests/Support/CommandLineTest.cpp
 create mode 100644 final/unittests/Support/ConstantRangeTest.cpp
 create mode 100644 final/unittests/Support/EndianTest.cpp
 create mode 100644 final/unittests/Support/LeakDetectorTest.cpp
 create mode 100644 final/unittests/Support/Makefile
 create mode 100644 final/unittests/Support/MathExtrasTest.cpp
 create mode 100644 final/unittests/Support/Path.cpp
 create mode 100644 final/unittests/Support/RegexTest.cpp
 create mode 100644 final/unittests/Support/SwapByteOrderTest.cpp
 create mode 100644 final/unittests/Support/TimeValue.cpp
 create mode 100644 final/unittests/Support/TypeBuilderTest.cpp
 create mode 100644 final/unittests/Support/ValueHandleTest.cpp
 create mode 100644 final/unittests/Support/raw_ostream_test.cpp
 create mode 100644 final/unittests/Transforms/Makefile
 create mode 100644 final/unittests/Transforms/Utils/Cloning.cpp
 create mode 100644 final/unittests/Transforms/Utils/Local.cpp
 create mode 100644 final/unittests/Transforms/Utils/Makefile
 create mode 100644 final/unittests/VMCore/ConstantsTest.cpp
 create mode 100644 final/unittests/VMCore/DerivedTypesTest.cpp
 create mode 100644 final/unittests/VMCore/InstructionsTest.cpp
 create mode 100644 final/unittests/VMCore/Makefile
 create mode 100644 final/unittests/VMCore/MetadataTest.cpp
 create mode 100644 final/unittests/VMCore/PassManagerTest.cpp
 create mode 100644 final/unittests/VMCore/ValueMapTest.cpp
 create mode 100644 final/unittests/VMCore/VerifierTest.cpp
 create mode 100755 final/utils/CollectDebugInfoUsingLLDB.py
 create mode 100755 final/utils/CompareDebugInfo.py
 create mode 100755 final/utils/DSAclean.py
 create mode 100644 final/utils/DSAextract.py
 create mode 100644 final/utils/FileCheck/CMakeLists.txt
 create mode 100644 final/utils/FileCheck/FileCheck.cpp
 create mode 100644 final/utils/FileCheck/Makefile
 create mode 100644 final/utils/FileUpdate/CMakeLists.txt
 create mode 100644 final/utils/FileUpdate/FileUpdate.cpp
 create mode 100644 final/utils/FileUpdate/Makefile
 create mode 100755 final/utils/GenLibDeps.pl
 create mode 100755 final/utils/GetRepositoryPath
 create mode 100755 final/utils/GetSourceVersion
 create mode 100644 final/utils/KillTheDoctor/CMakeLists.txt
 create mode 100644 final/utils/KillTheDoctor/KillTheDoctor.cpp
 create mode 100644 final/utils/Makefile
 create mode 100755 final/utils/Misc/zkill
 create mode 100644 final/utils/NLT.schema
 create mode 100755 final/utils/NewNightlyTest.pl
 create mode 100644 final/utils/NightlyTest.gnuplot
 create mode 100644 final/utils/NightlyTestTemplate.html
 create mode 100644 final/utils/PerfectShuffle/Makefile
 create mode 100644 final/utils/PerfectShuffle/PerfectShuffle.cpp
 create mode 100644 final/utils/TableGen/ARMDecoderEmitter.cpp
 create mode 100644 final/utils/TableGen/ARMDecoderEmitter.h
 create mode 100644 final/utils/TableGen/AsmMatcherEmitter.cpp
 create mode 100644 final/utils/TableGen/AsmMatcherEmitter.h
 create mode 100644 final/utils/TableGen/AsmWriterEmitter.cpp
 create mode 100644 final/utils/TableGen/AsmWriterEmitter.h
 create mode 100644 final/utils/TableGen/AsmWriterInst.cpp
 create mode 100644 final/utils/TableGen/AsmWriterInst.h
 create mode 100644 final/utils/TableGen/CMakeLists.txt
 create mode 100644 final/utils/TableGen/CallingConvEmitter.cpp
 create mode 100644 final/utils/TableGen/CallingConvEmitter.h
 create mode 100644 final/utils/TableGen/ClangASTNodesEmitter.cpp
 create mode 100644 final/utils/TableGen/ClangASTNodesEmitter.h
 create mode 100644 final/utils/TableGen/ClangAttrEmitter.cpp
 create mode 100644 final/utils/TableGen/ClangAttrEmitter.h
 create mode 100644 final/utils/TableGen/ClangDiagnosticsEmitter.cpp
 create mode 100644 final/utils/TableGen/ClangDiagnosticsEmitter.h
 create mode 100644 final/utils/TableGen/ClangSACheckersEmitter.cpp
 create mode 100644 final/utils/TableGen/ClangSACheckersEmitter.h
 create mode 100644 final/utils/TableGen/CodeEmitterGen.cpp
 create mode 100644 final/utils/TableGen/CodeEmitterGen.h
 create mode 100644 final/utils/TableGen/CodeGenDAGPatterns.cpp
 create mode 100644 final/utils/TableGen/CodeGenDAGPatterns.h
 create mode 100644 final/utils/TableGen/CodeGenInstruction.cpp
 create mode 100644 final/utils/TableGen/CodeGenInstruction.h
 create mode 100644 final/utils/TableGen/CodeGenIntrinsics.h
 create mode 100644 final/utils/TableGen/CodeGenRegisters.h
 create mode 100644 final/utils/TableGen/CodeGenTarget.cpp
 create mode 100644 final/utils/TableGen/CodeGenTarget.h
 create mode 100644 final/utils/TableGen/DAGISelEmitter.cpp
 create mode 100644 final/utils/TableGen/DAGISelEmitter.h
 create mode 100644 final/utils/TableGen/DAGISelMatcher.cpp
 create mode 100644 final/utils/TableGen/DAGISelMatcher.h
 create mode 100644 final/utils/TableGen/DAGISelMatcherEmitter.cpp
 create mode 100644 final/utils/TableGen/DAGISelMatcherGen.cpp
 create mode 100644 final/utils/TableGen/DAGISelMatcherOpt.cpp
 create mode 100644 final/utils/TableGen/DisassemblerEmitter.cpp
 create mode 100644 final/utils/TableGen/DisassemblerEmitter.h
 create mode 100644 final/utils/TableGen/EDEmitter.cpp
 create mode 100644 final/utils/TableGen/EDEmitter.h
 create mode 100644 final/utils/TableGen/FastISelEmitter.cpp
 create mode 100644 final/utils/TableGen/FastISelEmitter.h
 create mode 100644 final/utils/TableGen/FixedLenDecoderEmitter.cpp
 create mode 100644 final/utils/TableGen/FixedLenDecoderEmitter.h
 create mode 100644 final/utils/TableGen/InstrEnumEmitter.cpp
 create mode 100644 final/utils/TableGen/InstrEnumEmitter.h
 create mode 100644 final/utils/TableGen/InstrInfoEmitter.cpp
 create mode 100644 final/utils/TableGen/InstrInfoEmitter.h
 create mode 100644 final/utils/TableGen/IntrinsicEmitter.cpp
 create mode 100644 final/utils/TableGen/IntrinsicEmitter.h
 create mode 100644 final/utils/TableGen/LLVMCConfigurationEmitter.cpp
 create mode 100644 final/utils/TableGen/LLVMCConfigurationEmitter.h
 create mode 100644 final/utils/TableGen/Makefile
 create mode 100644 final/utils/TableGen/NeonEmitter.cpp
 create mode 100644 final/utils/TableGen/NeonEmitter.h
 create mode 100644 final/utils/TableGen/OptParserEmitter.cpp
 create mode 100644 final/utils/TableGen/OptParserEmitter.h
 create mode 100644 final/utils/TableGen/Record.cpp
 create mode 100644 final/utils/TableGen/Record.h
 create mode 100644 final/utils/TableGen/RegisterInfoEmitter.cpp
 create mode 100644 final/utils/TableGen/RegisterInfoEmitter.h
 create mode 100644 final/utils/TableGen/StringMatcher.cpp
 create mode 100644 final/utils/TableGen/StringMatcher.h
 create mode 100644 final/utils/TableGen/StringToOffsetTable.h
 create mode 100644 final/utils/TableGen/SubtargetEmitter.cpp
 create mode 100644 final/utils/TableGen/SubtargetEmitter.h
 create mode 100644 final/utils/TableGen/TGLexer.cpp
 create mode 100644 final/utils/TableGen/TGLexer.h
 create mode 100644 final/utils/TableGen/TGParser.cpp
 create mode 100644 final/utils/TableGen/TGParser.h
 create mode 100644 final/utils/TableGen/TGValueTypes.cpp
 create mode 100644 final/utils/TableGen/TableGen.cpp
 create mode 100644 final/utils/TableGen/TableGenBackend.cpp
 create mode 100644 final/utils/TableGen/TableGenBackend.h
 create mode 100644 final/utils/TableGen/X86DisassemblerShared.h
 create mode 100644 final/utils/TableGen/X86DisassemblerTables.cpp
 create mode 100644 final/utils/TableGen/X86DisassemblerTables.h
 create mode 100644 final/utils/TableGen/X86ModRMFilters.h
 create mode 100644 final/utils/TableGen/X86RecognizableInstr.cpp
 create mode 100644 final/utils/TableGen/X86RecognizableInstr.h
 create mode 100644 final/utils/Target/ARM/analyze-match-table.py
 create mode 100755 final/utils/UpdateCMakeLists.pl
 create mode 100644 final/utils/bugpoint/RemoteRunSafely.sh
 create mode 100644 final/utils/buildit/GNUmakefile
 create mode 100755 final/utils/buildit/build_llvm
 create mode 100755 final/utils/cgiplotNLT.pl
 create mode 100755 final/utils/check-each-file
 create mode 100755 final/utils/codegen-diff
 create mode 100644 final/utils/count/CMakeLists.txt
 create mode 100644 final/utils/count/Makefile
 create mode 100644 final/utils/count/count.c
 create mode 100755 final/utils/countloc.sh
 create mode 100644 final/utils/crosstool/ARM/README
 create mode 100755 final/utils/crosstool/ARM/build-install-linux.sh
 create mode 100755 final/utils/crosstool/create-snapshots.sh
 create mode 100644 final/utils/emacs/README
 create mode 100644 final/utils/emacs/emacs.el
 create mode 100644 final/utils/emacs/llvm-mode.el
 create mode 100644 final/utils/emacs/tablegen-mode.el
 create mode 100755 final/utils/findmisopt
 create mode 100755 final/utils/findoptdiff
 create mode 100755 final/utils/findsym.pl
 create mode 100644 final/utils/fpcmp/Makefile
 create mode 100644 final/utils/fpcmp/fpcmp.cpp
 create mode 100755 final/utils/getsrcs.sh
 create mode 100755 final/utils/git/find-rev
 create mode 100644 final/utils/importNLT.pl
 create mode 100644 final/utils/jedit/README
 create mode 100644 final/utils/jedit/tablegen.xml
 create mode 100644 final/utils/kate/README
 create mode 100644 final/utils/kate/llvm.xml
 create mode 100644 final/utils/lint/common_lint.py
 create mode 100755 final/utils/lint/cpp_lint.py
 create mode 100755 final/utils/lint/generic_lint.py
 create mode 100755 final/utils/lint/remove_trailing_whitespace.sh
 create mode 100644 final/utils/lit/TODO
 create mode 100755 final/utils/lit/lit.py
 create mode 100644 final/utils/lit/lit/ExampleTests.ObjDir/lit.site.cfg
 create mode 100644 final/utils/lit/lit/ExampleTests/Clang/fsyntax-only.c
 create mode 100644 final/utils/lit/lit/ExampleTests/Clang/lit.cfg
 create mode 100644 final/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/bar-test.ll
 create mode 100644 final/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/dg.exp
 create mode 100644 final/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg
 create mode 100644 final/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.site.cfg
 create mode 100644 final/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp
 create mode 100644 final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/lit.local.cfg
 create mode 100644 final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/Foo/lit.local.cfg
 create mode 100644 final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/lit.site.cfg
 create mode 100644 final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp
 create mode 100644 final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/data.txt
 create mode 100644 final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/dg.exp
 create mode 100644 final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/pct-S.ll
 create mode 100644 final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg
 create mode 100644 final/utils/lit/lit/ExampleTests/ShExternal/lit.local.cfg
 create mode 100644 final/utils/lit/lit/ExampleTests/ShInternal/lit.local.cfg
 create mode 100644 final/utils/lit/lit/ExampleTests/TclTest/lit.local.cfg
 create mode 100644 final/utils/lit/lit/ExampleTests/TclTest/stderr-pipe.ll
 create mode 100644 final/utils/lit/lit/ExampleTests/TclTest/tcl-redir-1.ll
 create mode 100644 final/utils/lit/lit/ExampleTests/fail.c
 create mode 100644 final/utils/lit/lit/ExampleTests/lit.cfg
 create mode 100644 final/utils/lit/lit/ExampleTests/pass.c
 create mode 100644 final/utils/lit/lit/ExampleTests/required-and-missing.c
 create mode 100644 final/utils/lit/lit/ExampleTests/required-and-present.c
 create mode 100644 final/utils/lit/lit/ExampleTests/xfail.c
 create mode 100644 final/utils/lit/lit/ExampleTests/xpass.c
 create mode 100644 final/utils/lit/lit/LitConfig.py
 create mode 100644 final/utils/lit/lit/LitFormats.py
 create mode 100644 final/utils/lit/lit/LitTestCase.py
 create mode 100644 final/utils/lit/lit/ProgressBar.py
 create mode 100644 final/utils/lit/lit/ShCommands.py
 create mode 100644 final/utils/lit/lit/ShUtil.py
 create mode 100644 final/utils/lit/lit/TclUtil.py
 create mode 100644 final/utils/lit/lit/Test.py
 create mode 100644 final/utils/lit/lit/TestFormats.py
 create mode 100644 final/utils/lit/lit/TestRunner.py
 create mode 100644 final/utils/lit/lit/TestingConfig.py
 create mode 100644 final/utils/lit/lit/Util.py
 create mode 100644 final/utils/lit/lit/__init__.py
 create mode 100755 final/utils/lit/lit/main.py
 create mode 100644 final/utils/lit/setup.py
 create mode 100644 final/utils/llvm-lit/CMakeLists.txt
 create mode 100644 final/utils/llvm-lit/Makefile
 create mode 100755 final/utils/llvm-lit/llvm-lit.in
 create mode 100755 final/utils/llvm-native-gcc
 create mode 100755 final/utils/llvm-native-gxx
 create mode 100644 final/utils/llvm.grm
 create mode 100755 final/utils/llvmbuild
 create mode 100755 final/utils/llvmdo
 create mode 100755 final/utils/llvmgrep
 create mode 100755 final/utils/makellvm
 create mode 100644 final/utils/not/CMakeLists.txt
 create mode 100644 final/utils/not/Makefile
 create mode 100644 final/utils/not/not.cpp
 create mode 100644 final/utils/parseNLT.pl
 create mode 100644 final/utils/plotNLT.pl
 create mode 100755 final/utils/profile.pl
 create mode 100755 final/utils/release/test-release.sh
 create mode 100755 final/utils/test_debuginfo.pl
 create mode 100644 final/utils/unittest/CMakeLists.txt
 create mode 100644 final/utils/unittest/Makefile
 create mode 100644 final/utils/unittest/UnitTestMain/Makefile
 create mode 100644 final/utils/unittest/UnitTestMain/TestMain.cpp
 create mode 100644 final/utils/unittest/googletest/LICENSE.TXT
 create mode 100644 final/utils/unittest/googletest/Makefile
 create mode 100644 final/utils/unittest/googletest/README.LLVM
 create mode 100644 final/utils/unittest/googletest/gtest-death-test.cc
 create mode 100644 final/utils/unittest/googletest/gtest-filepath.cc
 create mode 100644 final/utils/unittest/googletest/gtest-port.cc
 create mode 100644 final/utils/unittest/googletest/gtest-test-part.cc
 create mode 100644 final/utils/unittest/googletest/gtest-typed-test.cc
 create mode 100644 final/utils/unittest/googletest/gtest.cc
 create mode 100644 final/utils/unittest/googletest/include/gtest/gtest-death-test.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/gtest-message.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/gtest-param-test.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/gtest-spi.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/gtest-test-part.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/gtest-typed-test.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/gtest.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/gtest_pred_impl.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/gtest_prod.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/internal/gtest-death-test-internal.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/internal/gtest-filepath.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/internal/gtest-internal-inl.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/internal/gtest-internal.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/internal/gtest-linked_ptr.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/internal/gtest-param-util-generated.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/internal/gtest-param-util.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/internal/gtest-port.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/internal/gtest-string.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/internal/gtest-tuple.h
 create mode 100644 final/utils/unittest/googletest/include/gtest/internal/gtest-type-util.h
 create mode 100644 final/utils/valgrind/i386-pc-linux-gnu.supp
 create mode 100644 final/utils/valgrind/x86_64-pc-linux-gnu.supp
 create mode 100644 final/utils/vim/README
 create mode 100644 final/utils/vim/llvm.vim
 create mode 100644 final/utils/vim/tablegen.vim
 create mode 100644 final/utils/vim/vimrc
 create mode 100755 final/utils/webNLT.pl
 create mode 100644 final/website/index.html

diff --git a/final/.gitignore b/final/.gitignore
new file mode 100644
index 00000000000..2e2713a48ae
--- /dev/null
+++ b/final/.gitignore
@@ -0,0 +1,37 @@
+#==============================================================================#
+# This file specifies intentionally untracked files that git should ignore.
+# See: http://www.kernel.org/pub/software/scm/git/docs/gitignore.html
+#
+# This file is intentionally different from the output of `git svn show-ignore`,
+# as most of those are useless.
+#==============================================================================#
+
+#==============================================================================#
+# File extensions to be ignored anywhere in the tree.
+#==============================================================================#
+# Temp files created by most text editors.
+*~
+# Merge files created by git.
+*.orig
+# Byte compiled python modules.
+*.pyc
+
+#==============================================================================#
+# Explicit files to ignore (only matches one).
+#==============================================================================#
+.gitusers
+cscope.files
+cscope.out
+autoconf/aclocal.m4
+autoconf/autom4te.cache
+
+#==============================================================================#
+# Directories to ignore (do not add trailing '/'s, they skip symlinks).
+#==============================================================================#
+# External projects that are tracked independently.
+projects/*
+!projects/sample
+!projects/CMakeLists.txt
+!projects/Makefile
+# Clang, which is tracked independently.
+tools/clang
diff --git a/final/CMakeLists.txt b/final/CMakeLists.txt
new file mode 100644
index 00000000000..2d201df2f06
--- /dev/null
+++ b/final/CMakeLists.txt
@@ -0,0 +1,296 @@
+# See docs/CMake.html for instructions about how to build LLVM with CMake.
+
+project(LLVM)
+cmake_minimum_required(VERSION 2.8)
+
+# Add path for custom modules
+set(CMAKE_MODULE_PATH
+  ${CMAKE_MODULE_PATH}
+  "${CMAKE_CURRENT_SOURCE_DIR}/cmake"
+  "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules"
+  )
+
+set(PACKAGE_VERSION "2.9")
+
+set_property(GLOBAL PROPERTY USE_FOLDERS ON)
+
+include(VersionFromVCS)
+
+option(LLVM_APPEND_VC_REV
+  "Append the version control system revision id to LLVM version" OFF)
+
+if( LLVM_APPEND_VC_REV )
+  add_version_info_from_vcs(PACKAGE_VERSION)
+endif()
+
+set(PACKAGE_NAME llvm)
+set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
+set(PACKAGE_BUGREPORT "llvmbugs@cs.uiuc.edu")
+
+if( CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND NOT MSVC_IDE )
+  message(FATAL_ERROR "In-source builds are not allowed.
+CMake would overwrite the makefiles distributed with LLVM.
+Please create a directory and run cmake from there, passing the path
+to this source directory as the last argument.
+This process created the file `CMakeCache.txt' and the directory `CMakeFiles'.
+Please delete them.")
+endif()
+
+string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
+
+set(LLVM_MAIN_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+set(LLVM_MAIN_INCLUDE_DIR ${LLVM_MAIN_SRC_DIR}/include)
+set(LLVM_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
+set(LLVM_TOOLS_BINARY_DIR ${LLVM_BINARY_DIR}/bin)
+set(LLVM_EXAMPLES_BINARY_DIR ${LLVM_BINARY_DIR}/examples)
+set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name (32/64)" )
+
+if( NOT CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR )
+  file(GLOB_RECURSE
+    tablegenned_files_on_include_dir
+    "${LLVM_MAIN_SRC_DIR}/include/llvm/*.gen")
+  file(GLOB_RECURSE
+    tablegenned_files_on_lib_dir
+    "${LLVM_MAIN_SRC_DIR}/lib/Target/*.inc")
+  if( tablegenned_files_on_include_dir OR tablegenned_files_on_lib_dir)
+    message(FATAL_ERROR "Apparently there is a previous in-source build,
+probably as the result of running `configure' and `make' on
+${LLVM_MAIN_SRC_DIR}.
+This may cause problems. The suspicious files are:
+${tablegenned_files_on_lib_dir}
+${tablegenned_files_on_include_dir}
+Please clean the source directory.")
+  endif()
+endif()
+
+set(LLVM_ALL_TARGETS
+  Alpha
+  ARM
+  Blackfin
+  CBackend
+  CellSPU
+  CppBackend
+  Mips
+  MBlaze
+  MSP430
+  PowerPC
+  PTX
+  Sparc
+  SystemZ
+  X86
+  XCore
+  )
+
+if( MSVC )
+  set(LLVM_TARGETS_TO_BUILD X86
+    CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
+else( MSVC )
+  set(LLVM_TARGETS_TO_BUILD ${LLVM_ALL_TARGETS}
+    CACHE STRING "Semicolon-separated list of targets to build, or \"all\".")
+endif( MSVC )
+
+option(LLVM_ENABLE_CBE_PRINTF_A "Set to ON if CBE is enabled for printf %a output" ON)
+if(LLVM_ENABLE_CBE_PRINTF_A)
+  set(ENABLE_CBE_PRINTF_A 1)
+endif()
+
+option(LLVM_ENABLE_TIMESTAMPS "Enable embedding timestamp information in build" ON)
+if(LLVM_ENABLE_TIMESTAMPS)
+  set(ENABLE_TIMESTAMPS 1)
+endif()
+
+option(LLVM_ENABLE_FFI "Use libffi to call external functions from the interpreter" OFF)
+set(FFI_LIBRARY_DIR "" CACHE PATH "Additional directory, where CMake should search for libffi.so")
+set(FFI_INCLUDE_DIR "" CACHE PATH "Additional directory, where CMake should search for ffi.h or ffi/ffi.h")
+
+set(LLVM_TARGET_ARCH "host"
+  CACHE STRING "Set target to use for LLVM JIT or use \"host\" for automatic detection.")
+
+option(LLVM_ENABLE_THREADS "Use threads if available." ON)
+
+if( LLVM_TARGETS_TO_BUILD STREQUAL "all" )
+  set( LLVM_TARGETS_TO_BUILD ${LLVM_ALL_TARGETS} )
+endif()
+
+set(LLVM_ENUM_TARGETS "")
+foreach(c ${LLVM_TARGETS_TO_BUILD})
+  list(FIND LLVM_ALL_TARGETS ${c} idx)
+  if( idx LESS 0 )
+    message(FATAL_ERROR "The target `${c}' does not exist.
+    It should be one of\n${LLVM_ALL_TARGETS}")
+  else()
+    set(LLVM_ENUM_TARGETS "${LLVM_ENUM_TARGETS}LLVM_TARGET(${c})\n")
+  endif()
+endforeach(c)
+
+# Produce llvm/Config/Targets.def
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Targets.def.in
+  ${LLVM_BINARY_DIR}/include/llvm/Config/Targets.def
+  )
+
+set(llvm_builded_incs_dir ${LLVM_BINARY_DIR}/include/llvm)
+
+include(AddLLVMDefinitions)
+
+option(LLVM_ENABLE_PIC "Build Position-Independent Code" ON)
+
+# MSVC has a gazillion warnings with this.
+if( MSVC )
+  option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." OFF)
+else( MSVC )
+  option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." ON)
+endif()
+
+option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
+option(LLVM_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF)
+
+if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
+  option(LLVM_ENABLE_ASSERTIONS "Enable assertions" OFF)
+else()
+  option(LLVM_ENABLE_ASSERTIONS "Enable assertions" ON)
+endif()
+
+# All options refered to from HandleLLVMOptions have to be specified
+# BEFORE this include, otherwise options will not be correctly set on
+# first cmake run
+include(config-ix)
+include(HandleLLVMOptions)
+
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/config.h.cmake
+  ${LLVM_BINARY_DIR}/include/llvm/Config/config.h)
+
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/llvm-config.h.cmake
+  ${LLVM_BINARY_DIR}/include/llvm/Config/llvm-config.h)
+
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/DataTypes.h.cmake
+  ${LLVM_BINARY_DIR}/include/llvm/Support/DataTypes.h)
+
+set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_TOOLS_BINARY_DIR} )
+set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib )
+set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib )
+
+set(CMAKE_INCLUDE_CURRENT_DIR ON)
+
+include_directories( ${LLVM_BINARY_DIR}/include ${LLVM_MAIN_INCLUDE_DIR})
+
+if( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
+   SET(CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} "-include llvm/Support/Solaris.h")
+endif( ${CMAKE_SYSTEM_NAME} MATCHES SunOS )
+
+include(AddLLVM)
+include(TableGen)
+
+if( MINGW )
+  get_system_libs(LLVM_SYSTEM_LIBS_LIST)
+  foreach(l ${LLVM_SYSTEM_LIBS_LIST})
+    set(LLVM_SYSTEM_LIBS "${LLVM_SYSTEM_LIBS} -l${l}")
+  endforeach()
+  set(CMAKE_CXX_STANDARD_LIBRARIES "${CMAKE_CXX_STANDARD_LIBRARIES}${LLVM_SYSTEM_LIBS}")
+  set(CMAKE_C_STANDARD_LIBRARIES "${CMAKE_C_STANDARD_LIBRARIES}${LLVM_SYSTEM_LIBS}")
+endif()
+
+if( MINGW )
+  # People report that -O3 is unreliable on MinGW. The traditional
+  # build also uses -O2 for that reason:
+  llvm_replace_compiler_option(CMAKE_CXX_FLAGS_RELEASE "-O3" "-O2")
+endif()
+
+# Put this before tblgen. Else we have a circular dependence.
+add_subdirectory(lib/Support)
+
+set(LLVM_TABLEGEN "tblgen" CACHE
+  STRING "Native TableGen executable. Saves building one when cross-compiling.")
+# Effective tblgen executable to be used:
+set(LLVM_TABLEGEN_EXE ${LLVM_TABLEGEN})
+
+add_subdirectory(utils/TableGen)
+
+if( CMAKE_CROSSCOMPILING )
+  # This adds a dependency on target `tblgen', so must go after utils/TableGen
+  include( CrossCompileLLVM )
+endif( CMAKE_CROSSCOMPILING )
+
+add_subdirectory(include/llvm)
+
+add_subdirectory(lib)
+
+add_subdirectory(utils/FileCheck)
+add_subdirectory(utils/FileUpdate)
+add_subdirectory(utils/count)
+add_subdirectory(utils/not)
+add_subdirectory(utils/llvm-lit)
+
+add_subdirectory(projects)
+
+option(LLVM_BUILD_TOOLS
+  "Build the LLVM tools. If OFF, just generate build targets." ON)
+option(LLVM_INCLUDE_TOOLS "Generate build targets for the LLVM tools." ON)
+if( LLVM_INCLUDE_TOOLS )
+  add_subdirectory(tools)
+endif()
+
+option(LLVM_BUILD_EXAMPLES
+  "Build the LLVM example programs. If OFF, just generate build targets." OFF)
+option(LLVM_INCLUDE_EXAMPLES "Generate build targets for the LLVM examples" ON)
+if( LLVM_INCLUDE_EXAMPLES )
+  add_subdirectory(examples)
+endif()
+
+option(LLVM_BUILD_TESTS
+  "Build LLVM unit tests. If OFF, just generate build targes." OFF)
+if( LLVM_INCLUDE_TESTS )
+  add_subdirectory(test)
+  add_subdirectory(utils/unittest)
+  add_subdirectory(unittests)
+  if (MSVC)
+    # This utility is used to prevent chrashing tests from calling Dr. Watson on
+    # Windows.
+    add_subdirectory(utils/KillTheDoctor)
+  endif()
+endif()
+
+add_subdirectory(cmake/modules)
+
+install(DIRECTORY include/
+  DESTINATION include
+  FILES_MATCHING
+  PATTERN "*.def"
+  PATTERN "*.h"
+  PATTERN "*.td"
+  PATTERN "*.inc"
+  PATTERN "LICENSE.TXT"
+  PATTERN ".svn" EXCLUDE
+  )
+
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/
+  DESTINATION include
+  FILES_MATCHING
+  PATTERN "*.def"
+  PATTERN "*.h"
+  PATTERN "*.gen"
+  PATTERN "*.inc"
+  # Exclude include/llvm/CMakeFiles/intrinsics_gen.dir, matched by "*.def"
+  PATTERN "CMakeFiles" EXCLUDE
+  PATTERN ".svn" EXCLUDE
+  )
+
+# TODO: make and install documentation.
+
+set(CPACK_PACKAGE_VENDOR "LLVM")
+set(CPACK_PACKAGE_VERSION_MAJOR 2)
+set(CPACK_PACKAGE_VERSION_MINOR 9)
+add_version_info_from_vcs(CPACK_PACKAGE_VERSION_PATCH)
+include(CPack)
+
+# Workaround for MSVS10 to avoid the Dialog Hell
+# FIXME: This could be removed with future version of CMake.
+if(MSVC_VERSION EQUAL 1600)
+  set(LLVM_SLN_FILENAME "${CMAKE_CURRENT_BINARY_DIR}/LLVM.sln")
+  if( EXISTS "${LLVM_SLN_FILENAME}" )
+    file(APPEND "${LLVM_SLN_FILENAME}" "\n# This should be regenerated!\n")
+  endif()
+endif()
diff --git a/final/CREDITS.TXT b/final/CREDITS.TXT
new file mode 100644
index 00000000000..ab01dde338a
--- /dev/null
+++ b/final/CREDITS.TXT
@@ -0,0 +1,362 @@
+This file is a partial list of people who have contributed to the LLVM
+project.  If you have contributed a patch or made some other contribution to
+LLVM, please submit a patch to this file to add yourself, and it will be
+done!
+
+The list is sorted by surname and formatted to allow easy grepping and
+beautification by scripts.  The fields are: name (N), email (E), web-address
+(W), PGP key ID and fingerprint (P), description (D), and snail-mail address
+(S).
+
+
+N: Vikram Adve
+E: vadve@cs.uiuc.edu
+W: http://www.cs.uiuc.edu/~vadve/
+D: The Sparc64 backend, provider of much wisdom, and motivator for LLVM
+
+N: Owen Anderson
+E: resistor@mac.com
+D: LCSSA pass and related LoopUnswitch work
+D: GVNPRE pass, TargetData refactoring, random improvements
+
+N: Henrik Bach
+D: MingW Win32 API portability layer
+
+N: Nate Begeman
+E: natebegeman@mac.com
+D: PowerPC backend developer
+D: Target-independent code generator and analysis improvements
+
+N: Daniel Berlin
+E: dberlin@dberlin.org
+D: ET-Forest implementation.
+D: Sparse bitmap
+
+N: Neil Booth
+E: neil@daikokuya.co.uk
+D: APFloat implementation.
+
+N: Misha Brukman
+E: brukman+llvm@uiuc.edu
+W: http://misha.brukman.net
+D: Portions of X86 and Sparc JIT compilers, PowerPC backend
+D: Incremental bitcode loader
+
+N: Cameron Buschardt
+E: buschard@uiuc.edu
+D: The `mem2reg' pass - promotes values stored in memory to registers
+
+N: Chandler Carruth
+E: chandlerc@gmail.com
+D: LinkTimeOptimizer for Linux, via binutils integration, and C API
+
+N: Casey Carter
+E: ccarter@uiuc.edu
+D: Fixes to the Reassociation pass, various improvement patches
+
+N: Evan Cheng
+E: evan.cheng@apple.com
+D: ARM and X86 backends
+D: Instruction scheduler improvements
+D: Register allocator improvements
+D: Loop optimizer improvements
+D: Target-independent code generator improvements
+
+N: Dan Villiom Podlaski Christiansen
+E: danchr@gmail.com
+E: danchr@cs.au.dk
+W: http://villiom.dk
+D: LLVM Makefile improvements
+D: Clang diagnostic & driver tweaks
+S: Aarhus, Denmark
+
+N: Jeff Cohen
+E: jeffc@jolt-lang.org
+W: http://jolt-lang.org
+D: Native Win32 API portability layer
+
+N: John T. Criswell
+E: criswell@uiuc.edu
+D: Original Autoconf support, documentation improvements, bug fixes
+
+N: Stefanus Du Toit
+E: stefanus.dutoit@rapidmind.com
+D: Bug fixes and minor improvements
+
+N: Rafael Avila de Espindola
+E: rafael.espindola@gmail.com
+D: The ARM backend
+
+N: Alkis Evlogimenos
+E: alkis@evlogimenos.com
+D: Linear scan register allocator, many codegen improvements, Java frontend
+
+N: Ryan Flynn
+E: pizza@parseerror.com
+D: Miscellaneous bug fixes
+
+N: Brian Gaeke
+E: gaeke@uiuc.edu
+W: http://www.students.uiuc.edu/~gaeke/
+D: Portions of X86 static and JIT compilers; initial SparcV8 backend
+D: Dynamic trace optimizer
+D: FreeBSD/X86 compatibility fixes, the llvm-nm tool
+
+N: Nicolas Geoffray
+E: nicolas.geoffray@lip6.fr
+W: http://www-src.lip6.fr/homepages/Nicolas.Geoffray/
+D: PPC backend fixes for Linux
+
+N: Louis Gerbarg
+D: Portions of the PowerPC backend
+
+N: Saem Ghani
+E: saemghani@gmail.com
+D: Callgraph class cleanups
+
+N: Mikhail Glushenkov
+E: foldr@codedgers.com
+D: Author of llvmc2
+
+N: Dan Gohman
+E: gohman@apple.com
+D: Miscellaneous bug fixes
+
+N: David Goodwin
+E: david@goodwinz.net
+D: Thumb-2 code generator
+
+N: David Greene
+E: greened@obbligato.org
+D: Miscellaneous bug fixes
+D: Register allocation refactoring
+
+N: Gabor Greif
+E: ggreif@gmail.com
+D: Improvements for space efficiency
+
+N: James Grosbach
+E: grosbach@apple.com
+D: SjLj exception handling support
+D: General fixes and improvements for the ARM back-end
+
+N: Lang Hames
+E: lhames@gmail.com
+D: PBQP-based register allocator
+
+N: Gordon Henriksen
+E: gordonhenriksen@mac.com
+D: Pluggable GC support
+D: C interface
+D: Ocaml bindings
+
+N: Raul Fernandes Herbster
+E: raul@dsc.ufcg.edu.br
+D: JIT support for ARM
+
+N: Paolo Invernizzi
+E: arathorn@fastwebnet.it
+D: Visual C++ compatibility fixes
+
+N: Patrick Jenkins
+E: patjenk@wam.umd.edu
+D: Nightly Tester
+
+N: Dale Johannesen
+E: dalej@apple.com
+D: ARM constant islands improvements
+D: Tail merging improvements
+D: Rewrite X87 back end
+D: Use APFloat for floating point constants widely throughout compiler
+D: Implement X87 long double
+
+N: Brad Jones
+E: kungfoomaster@nondot.org
+D: Support for packed types
+
+N: Rod Kay
+E: rkay@auroraux.org
+D: Author of LLVM Ada bindings
+
+N: Eric Kidd
+W: http://randomhacks.net/
+D: llvm-config script
+
+N: Anton Korobeynikov
+E: asl@math.spbu.ru
+D: Mingw32 fixes, cross-compiling support, stdcall/fastcall calling conv.
+D: x86/linux PIC codegen, aliases, regparm/visibility attributes
+D: Switch lowering refactoring
+
+N: Sumant Kowshik
+E: kowshik@uiuc.edu
+D: Author of the original C backend
+
+N: Benjamin Kramer
+E: benny.kra@gmail.com
+D: Miscellaneous bug fixes
+
+N: Christopher Lamb
+E: christopher.lamb@gmail.com
+D: aligned load/store support, parts of noalias and restrict support
+D: vreg subreg infrastructure, X86 codegen improvements based on subregs
+D: address spaces
+
+N: Jim Laskey
+E: jlaskey@apple.com
+D: Improvements to the PPC backend, instruction scheduling
+D: Debug and Dwarf implementation
+D: Auto upgrade mangler
+D: llvm-gcc4 svn wrangler
+
+N: Chris Lattner
+E: sabre@nondot.org
+W: http://nondot.org/~sabre/
+D: Primary architect of LLVM
+
+N: Tanya Lattner (Tanya Brethour)
+E: tonic@nondot.org
+W: http://nondot.org/~tonic/
+D: The initial llvm-ar tool, converted regression testsuite to dejagnu
+D: Modulo scheduling in the SparcV9 backend
+D: Release manager (1.7+)
+
+N: Andrew Lenharth
+E: alenhar2@cs.uiuc.edu
+W: http://www.lenharth.org/~andrewl/
+D: Alpha backend
+D: Sampling based profiling
+
+N: Nick Lewycky
+E: nicholas@mxc.ca
+D: PredicateSimplifier pass
+
+N: Bruno Cardoso Lopes
+E: bruno.cardoso@gmail.com
+W: http://www.brunocardoso.org
+D: The Mips backend
+
+N: Duraid Madina
+E: duraid@octopus.com.au
+W: http://kinoko.c.u-tokyo.ac.jp/~duraid/
+D: IA64 backend, BigBlock register allocator
+
+N: Michael McCracken
+E: michael.mccracken@gmail.com
+D: Line number support for llvmgcc
+
+N: Vladimir Merzliakov
+E: wanderer@rsu.ru
+D: Test suite fixes for FreeBSD
+
+N: Scott Michel
+E: scottm@aero.org
+D: Added STI Cell SPU backend.
+
+N: Takumi Nakamura
+E: geek4civic@gmail.com
+E: chapuni@hf.rim.or.jp
+D: Cygwin and MinGW support.
+S: Yokohama, Japan
+
+N: Edward O'Callaghan
+E: eocallaghan@auroraux.org
+W: http://www.auroraux.org
+D: Add Clang support with various other improvements to utils/NewNightlyTest.pl
+D: Fix and maintain Solaris & AuroraUX support for llvm, various build warnings
+D: and error clean ups.
+
+N: Morten Ofstad
+E: morten@hue.no
+D: Visual C++ compatibility fixes
+
+N: Jakob Stoklund Olesen
+E: stoklund@2pi.dk
+D: Machine code verifier
+D: Blackfin backend
+
+N: Richard Osborne
+E: richard@xmos.com
+D: XCore backend
+
+N: Devang Patel
+E: dpatel@apple.com
+D: LTO tool, PassManager rewrite, Loop Pass Manager, Loop Rotate
+D: GCC PCH Integration (llvm-gcc), llvm-gcc improvements
+D: Optimizer improvements, Loop Index Split
+
+N: Sandeep Patel
+E: deeppatel1987@gmail.com
+D: ARM calling conventions rewrite, hard float support
+
+N: Wesley Peck
+E: peckw@wesleypeck.com
+W: http://wesleypeck.com/
+D: MicroBlaze backend
+
+N: Vladimir Prus
+W: http://vladimir_prus.blogspot.com
+E: ghost@cs.msu.su
+D: Made inst_iterator behave like a proper iterator, LowerConstantExprs pass
+
+N: Roman Samoilov
+E: roman@codedgers.com
+D: MSIL backend
+
+N: Duncan Sands
+E: baldrick@free.fr
+D: Ada support in llvm-gcc
+D: Dragonegg plugin
+D: Exception handling improvements
+D: Type legalizer rewrite
+
+N: Ruchira Sasanka
+E: sasanka@uiuc.edu
+D: Graph coloring register allocator for the Sparc64 backend
+
+N: Arnold Schwaighofer
+E: arnold.schwaighofer@gmail.com
+D: Tail call optimization for the x86 backend
+
+N: Shantonu Sen
+E: ssen@apple.com
+D: Miscellaneous bug fixes
+
+N: Anand Shukla
+E: ashukla@cs.uiuc.edu
+D: The `paths' pass
+
+N: Michael J. Spencer
+E: bigcheesegs@gmail.com
+D: Shepherding Windows COFF support into MC.
+D: Lots of Windows stuff.
+
+N: Reid Spencer
+E: rspencer@reidspencer.com
+W: http://reidspencer.com/
+D: Lots of stuff, see: http://wiki.llvm.org/index.php/User:Reid
+
+N: Edwin Torok
+E: edwintorok@gmail.com
+D: Miscellaneous bug fixes
+
+N: Adam Treat
+E: manyoso@yahoo.com
+D: C++ bugs filed, and C++ front-end bug fixes.
+
+N: Lauro Ramos Venancio
+E: lauro.venancio@indt.org.br
+D: ARM backend improvements
+D: Thread Local Storage implementation
+
+N: Xerxes Ranby
+E: xerxes@zafena.se
+D: Cmake dependency chain and various bug fixes
+
+N: Bill Wendling
+E: wendling@apple.com
+D: Bunches of stuff
+
+N: Bob Wilson
+E: bob.wilson@acm.org
+D: Advanced SIMD (NEON) support in the ARM backend
diff --git a/final/LICENSE.TXT b/final/LICENSE.TXT
new file mode 100644
index 00000000000..b8d2c742096
--- /dev/null
+++ b/final/LICENSE.TXT
@@ -0,0 +1,69 @@
+==============================================================================
+LLVM Release License
+==============================================================================
+University of Illinois/NCSA
+Open Source License
+
+Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
+All rights reserved.
+
+Developed by:
+
+    LLVM Team
+
+    University of Illinois at Urbana-Champaign
+
+    http://llvm.org
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal with
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimers.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimers in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the names of the LLVM Team, University of Illinois at
+      Urbana-Champaign, nor the names of its contributors may be used to
+      endorse or promote products derived from this Software without specific
+      prior written permission.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+SOFTWARE.
+
+==============================================================================
+Copyrights and Licenses for Third Party Software Distributed with LLVM:
+==============================================================================
+The LLVM software contains code written by third parties.  Such software will
+have its own individual LICENSE.TXT file in the directory in which it appears.
+This file will describe the copyrights, license, and restrictions which apply
+to that code.
+
+The disclaimer of warranty in the University of Illinois Open Source License
+applies to all code in the LLVM Distribution, and nothing in any of the
+other licenses gives permission to use the names of the LLVM Team or the
+University of Illinois to endorse or promote products derived from this
+Software.
+
+The following pieces of software have additional or alternate copyrights,
+licenses, and/or restrictions:
+
+Program             Directory
+-------             ---------
+Autoconf            llvm/autoconf
+                    llvm/projects/ModuleMaker/autoconf
+                    llvm/projects/sample/autoconf
+CellSPU backend     llvm/lib/Target/CellSPU/README.txt
+Google Test         llvm/utils/unittest/googletest
+OpenBSD regex       llvm/lib/Support/{reg*, COPYRIGHT.regex}
diff --git a/final/Makefile b/final/Makefile
new file mode 100644
index 00000000000..dbb759dd5fc
--- /dev/null
+++ b/final/Makefile
@@ -0,0 +1,250 @@
+#===- ./Makefile -------------------------------------------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+LEVEL := .
+
+# Top-Level LLVM Build Stages:
+#   1. Build lib/Support, which is used by utils (tblgen).
+#   2. Build utils, which is used by VMCore.
+#   3. Build VMCore, which builds the Intrinsics.inc file used by libs.
+#   4. Build libs, which are needed by llvm-config.
+#   5. Build llvm-config, which determines inter-lib dependencies for tools.
+#   6. Build tools, runtime, docs.
+#
+# When cross-compiling, there are some things (tablegen) that need to
+# be build for the build system first.
+
+# If "RC_ProjectName" exists in the environment, and its value is
+# "llvmCore", then this is an "Apple-style" build; search for
+# "Apple-style" in the comments for more info.  Anything else is a
+# normal build.
+ifneq ($(findstring llvmCore, $(RC_ProjectName)),llvmCore)  # Normal build (not "Apple-style").
+
+ifeq ($(BUILD_DIRS_ONLY),1)
+  DIRS := lib/Support utils
+  OPTIONAL_DIRS :=
+else
+  DIRS := lib/Support utils lib/VMCore lib tools/llvm-shlib \
+          tools/llvm-config tools runtime docs unittests
+  OPTIONAL_DIRS := projects bindings
+endif
+
+ifeq ($(BUILD_EXAMPLES),1)
+  OPTIONAL_DIRS += examples
+endif
+
+EXTRA_DIST := test unittests llvm.spec include win32 Xcode
+
+include $(LEVEL)/Makefile.config
+
+ifneq ($(ENABLE_SHARED),1)
+  DIRS := $(filter-out tools/llvm-shlib, $(DIRS))
+endif
+
+ifneq ($(ENABLE_DOCS),1)
+  DIRS := $(filter-out docs, $(DIRS))
+endif
+
+ifeq ($(MAKECMDGOALS),libs-only)
+  DIRS := $(filter-out tools runtime docs, $(DIRS))
+  OPTIONAL_DIRS :=
+endif
+
+ifeq ($(MAKECMDGOALS),install-libs)
+  DIRS := $(filter-out tools runtime docs, $(DIRS))
+  OPTIONAL_DIRS := $(filter bindings, $(OPTIONAL_DIRS))
+endif
+
+ifeq ($(MAKECMDGOALS),tools-only)
+  DIRS := $(filter-out runtime docs, $(DIRS))
+  OPTIONAL_DIRS :=
+endif
+
+ifeq ($(MAKECMDGOALS),install-clang)
+  DIRS := tools/clang/tools/driver tools/clang/lib/Headers \
+          tools/clang/runtime tools/clang/docs \
+          tools/lto
+  OPTIONAL_DIRS :=
+  NO_INSTALL = 1
+endif
+
+ifeq ($(MAKECMDGOALS),install-clang-c)
+  DIRS := tools/clang/tools/driver tools/clang/lib/Headers \
+          tools/clang/tools/libclang tools/clang/tools/c-index-test \
+	  tools/clang/include/clang-c
+  OPTIONAL_DIRS :=
+  NO_INSTALL = 1
+endif
+
+ifeq ($(MAKECMDGOALS),clang-only)
+  DIRS := $(filter-out tools runtime docs unittests, $(DIRS)) \
+          tools/clang tools/lto
+  OPTIONAL_DIRS :=
+endif
+
+ifeq ($(MAKECMDGOALS),unittests)
+  DIRS := $(filter-out tools runtime docs, $(DIRS)) utils unittests
+  OPTIONAL_DIRS :=
+endif
+
+# Use NO_INSTALL define of the Makefile of each directory for deciding
+# if the directory is installed or not
+ifeq ($(MAKECMDGOALS),install)
+  OPTIONAL_DIRS := $(filter bindings, $(OPTIONAL_DIRS))
+endif
+
+# Don't build unittests when ONLY_TOOLS is set.
+ifneq ($(ONLY_TOOLS),)
+  DIRS := $(filter-out unittests, $(DIRS))
+endif
+
+# If we're cross-compiling, build the build-hosted tools first
+ifeq ($(LLVM_CROSS_COMPILING),1)
+all:: cross-compile-build-tools
+
+clean::
+	$(Verb) rm -rf BuildTools
+
+cross-compile-build-tools:
+	$(Verb) if [ ! -f BuildTools/Makefile ]; then \
+          $(MKDIR) BuildTools; \
+	  cd BuildTools ; \
+	  unset CFLAGS ; \
+	  unset CXXFLAGS ; \
+	  $(PROJ_SRC_DIR)/configure --build=$(BUILD_TRIPLE) \
+		--host=$(BUILD_TRIPLE) --target=$(BUILD_TRIPLE); \
+	  cd .. ; \
+	fi; \
+	(unset SDKROOT; \
+	 $(MAKE) -C BuildTools \
+	  BUILD_DIRS_ONLY=1 \
+	  UNIVERSAL= \
+	  ENABLE_OPTIMIZED=$(ENABLE_OPTIMIZED) \
+	  ENABLE_PROFILING=$(ENABLE_PROFILING) \
+	  ENABLE_COVERAGE=$(ENABLE_COVERAGE) \
+	  DISABLE_ASSERTIONS=$(DISABLE_ASSERTIONS) \
+	  ENABLE_EXPENSIVE_CHECKS=$(ENABLE_EXPENSIVE_CHECKS) \
+	  CFLAGS= \
+	  CXXFLAGS= \
+	) || exit 1;
+endif
+
+# Include the main makefile machinery.
+include $(LLVM_SRC_ROOT)/Makefile.rules
+
+# Specify options to pass to configure script when we're
+# running the dist-check target
+DIST_CHECK_CONFIG_OPTIONS = --with-llvmgccdir=$(LLVMGCCDIR)
+
+.PHONY: debug-opt-prof
+debug-opt-prof:
+	$(Echo) Building Debug Version
+	$(Verb) $(MAKE)
+	$(Echo)
+	$(Echo) Building Optimized Version
+	$(Echo)
+	$(Verb) $(MAKE) ENABLE_OPTIMIZED=1
+	$(Echo)
+	$(Echo) Building Profiling Version
+	$(Echo)
+	$(Verb) $(MAKE) ENABLE_PROFILING=1
+
+dist-hook::
+	$(Echo) Eliminating files constructed by configure
+	$(Verb) $(RM) -f \
+	  $(TopDistDir)/include/llvm/Config/config.h  \
+	  $(TopDistDir)/include/llvm/Support/DataTypes.h
+
+clang-only: all
+tools-only: all
+libs-only: all
+install-clang: install
+install-clang-c: install
+install-libs: install
+
+#------------------------------------------------------------------------
+# Make sure the generated headers are up-to-date. This must be kept in
+# sync with the AC_CONFIG_HEADER invocations in autoconf/configure.ac
+#------------------------------------------------------------------------
+FilesToConfig := \
+  include/llvm/Config/config.h \
+  include/llvm/Config/Targets.def \
+  include/llvm/Config/AsmPrinters.def \
+  include/llvm/Config/AsmParsers.def \
+  include/llvm/Config/Disassemblers.def \
+  include/llvm/Support/DataTypes.h \
+  tools/llvmc/src/Base.td
+FilesToConfigPATH  := $(addprefix $(LLVM_OBJ_ROOT)/,$(FilesToConfig))
+
+all-local:: $(FilesToConfigPATH)
+$(FilesToConfigPATH) : $(LLVM_OBJ_ROOT)/% : $(LLVM_SRC_ROOT)/%.in
+	$(Echo) Regenerating $*
+	$(Verb) cd $(LLVM_OBJ_ROOT) && $(ConfigStatusScript) $*
+.PRECIOUS: $(FilesToConfigPATH)
+
+# NOTE: This needs to remain as the last target definition in this file so
+# that it gets executed last.
+ifneq ($(BUILD_DIRS_ONLY),1)
+all::
+	$(Echo) '*****' Completed $(BuildMode) Build
+ifneq ($(ENABLE_OPTIMIZED),1)
+	$(Echo) '*****' Note: Debug build can be 10 times slower than an
+	$(Echo) '*****' optimized build. Use 'make ENABLE_OPTIMIZED=1' to
+	$(Echo) '*****' make an optimized build. Alternatively you can
+	$(Echo) '*****' configure with --enable-optimized.
+endif
+endif
+
+check-llvm2cpp:
+	$(Verb)$(MAKE) check TESTSUITE=Feature RUNLLVM2CPP=1
+
+srpm: $(LLVM_OBJ_ROOT)/llvm.spec
+	rpmbuild -bs $(LLVM_OBJ_ROOT)/llvm.spec
+
+rpm: $(LLVM_OBJ_ROOT)/llvm.spec
+	rpmbuild -bb --target $(TARGET_TRIPLE) $(LLVM_OBJ_ROOT)/llvm.spec
+
+show-footprint:
+	$(Verb) du -sk $(LibDir)
+	$(Verb) du -sk $(ToolDir)
+	$(Verb) du -sk $(ExmplDir)
+	$(Verb) du -sk $(ObjDir)
+
+build-for-llvm-top:
+	$(Verb) if test ! -f ./config.status ; then \
+	  ./configure --prefix="$(LLVM_TOP)/install" \
+	    --with-llvm-gcc="$(LLVM_TOP)/llvm-gcc" ; \
+	fi
+	$(Verb) $(MAKE) tools-only
+
+SVN = svn
+SVN-UPDATE-OPTIONS =
+AWK = awk
+SUB-SVN-DIRS = $(AWK) '/\?\ \ \ \ \ \ / {print $$2}'   \
+		| LC_ALL=C xargs $(SVN) info 2>/dev/null \
+		| $(AWK) '/Path:\ / {print $$2}'
+
+update:
+	$(SVN) $(SVN-UPDATE-OPTIONS) update $(LLVM_SRC_ROOT)
+	@ $(SVN) status $(LLVM_SRC_ROOT) | $(SUB-SVN-DIRS) | xargs $(SVN) $(SVN-UPDATE-OPTIONS) update
+
+happiness: update all check-all
+
+.PHONY: srpm rpm update happiness
+
+# declare all targets at this level to be serial:
+
+.NOTPARALLEL:
+
+else # Building "Apple-style."
+# In an Apple-style build, once configuration is done, lines marked
+# "Apple-style" are removed with sed!  Please don't remove these!
+# Look for the string "Apple-style" in utils/buildit/build_llvm.
+include $(shell find . -name GNUmakefile) # Building "Apple-style."
+endif # Building "Apple-style."
diff --git a/final/Makefile.common b/final/Makefile.common
new file mode 100644
index 00000000000..e1f52036f6f
--- /dev/null
+++ b/final/Makefile.common
@@ -0,0 +1,70 @@
+#===-- Makefile.common - Common make rules for LLVM --------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+#
+# This file is included by all of the LLVM makefiles.  This file defines common
+# rules to do things like compile a .cpp file or generate dependency info.
+# These are platform dependent, so this is the file used to specify these
+# system dependent operations.
+#
+# The following functionality can be set by setting incoming variables.
+# The variable $(LEVEL) *must* be set:
+#
+# 1. LEVEL - The level of the current subdirectory from the top of the
+#    source directory.  This level should be expressed as a path, for
+#    example, ../.. for two levels deep.
+#
+# 2. DIRS - A list of subdirectories to be built.  Fake targets are set up
+#    so that each of the targets "all", "install", and "clean" each build
+#    the subdirectories before the local target.  DIRS are guaranteed to be
+#    built in order.
+#
+# 3. PARALLEL_DIRS - A list of subdirectories to be built, but that may be
+#    built in any order.  All DIRS are built in order before PARALLEL_DIRS are
+#    built, which are then built in any order.
+#
+# 4. Source - If specified, this sets the source code filenames.  If this
+#    is not set, it defaults to be all of the .cpp, .c, .y, and .l files
+#    in the current directory.  Also, if you want to build files in addition
+#    to the local files, you can use the ExtraSource variable
+#
+# 5. SourceDir - If specified, this specifies a directory that the source files
+#    are in, if they are not in the current directory.  This should include a
+#    trailing / character.
+#
+# 6. LLVM_SRC_ROOT - If specified, points to the top of the LLVM source tree.
+#
+# 8. PROJ_SRC_DIR - The directory which contains the current set of Makefiles
+#    and usually the source code too (unless SourceDir is set).
+#
+# 9. PROJ_SRC_ROOT - The root directory of the source code being compiled.
+#
+# 10. PROJ_OBJ_DIR - The directory where object code should be placed.
+#
+# 11. PROJ_OBJ_ROOT - The root directory for where object code should be
+#     placed.
+#
+# For building,
+# 	LLVM, LLVM_SRC_ROOT = PROJ_SRC_ROOT
+#
+#===-----------------------------------------------------------------------====
+
+#
+# Configuration file to set paths specific to local installation of LLVM
+#
+ifndef LLVM_OBJ_ROOT
+include $(LEVEL)/Makefile.config
+else
+include $(LLVM_OBJ_ROOT)/Makefile.config
+endif
+
+#
+# Include all of the build rules used for making LLVM
+#
+include $(LLVM_SRC_ROOT)/Makefile.rules
+
diff --git a/final/Makefile.config.in b/final/Makefile.config.in
new file mode 100644
index 00000000000..5c737580632
--- /dev/null
+++ b/final/Makefile.config.in
@@ -0,0 +1,359 @@
+#===-- Makefile.config - Local configuration for LLVM ------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+#
+# This file is included by Makefile.common.  It defines paths and other
+# values specific to a particular installation of LLVM.
+#
+#===------------------------------------------------------------------------===#
+
+# Define LLVM specific info and directories based on the autoconf variables
+LLVMPackageName   := @PACKAGE_NAME@
+LLVMVersion       := @PACKAGE_VERSION@
+LLVM_CONFIGTIME   := @LLVM_CONFIGTIME@
+
+###########################################################################
+# Directory Configuration
+#	This section of the Makefile determines what is where.  To be
+#	specific, there are several locations that need to be defined:
+#
+#	o LLVM_SRC_ROOT  : The root directory of the LLVM source code.
+#	o LLVM_OBJ_ROOT  : The root directory containing the built LLVM code.
+#
+#	o PROJ_SRC_DIR  : The directory containing the code to build.
+#	o PROJ_SRC_ROOT : The root directory of the code to build.
+#
+#	o PROJ_OBJ_DIR  : The directory in which compiled code will be placed.
+#	o PROJ_OBJ_ROOT : The root directory in which compiled code is placed.
+#
+###########################################################################
+
+PWD := @BINPWD@
+# Set the project name to LLVM if its not defined
+ifndef PROJECT_NAME
+PROJECT_NAME := $(LLVMPackageName)
+endif
+
+# The macro below is expanded when 'realpath' is not built-in.
+# Built-in 'realpath' is available on GNU Make 3.81.
+realpath = $(shell cd $(1); $(PWD))
+
+PROJ_OBJ_DIR  := $(call realpath, .)
+PROJ_OBJ_ROOT := $(call realpath, $(PROJ_OBJ_DIR)/$(LEVEL))
+
+ifeq ($(PROJECT_NAME),llvm)
+LLVM_SRC_ROOT   := $(call realpath, @abs_top_srcdir@)
+LLVM_OBJ_ROOT   := $(call realpath, @abs_top_builddir@)
+PROJ_SRC_ROOT   := $(LLVM_SRC_ROOT)
+PROJ_SRC_DIR    := $(call realpath, $(LLVM_SRC_ROOT)/$(patsubst $(PROJ_OBJ_ROOT)%,%,$(PROJ_OBJ_DIR)))
+prefix          := @prefix@
+PROJ_prefix     := $(prefix)
+PROJ_VERSION    := $(LLVMVersion)
+else
+ifndef PROJ_SRC_ROOT
+$(error Projects must define PROJ_SRC_ROOT)
+endif
+ifndef PROJ_OBJ_ROOT
+$(error Projects must define PROJ_OBJ_ROOT)
+endif
+ifndef PROJ_INSTALL_ROOT
+$(error Projects must define PROJ_INSTALL_ROOT)
+endif
+ifndef LLVM_SRC_ROOT
+$(error Projects must define LLVM_SRC_ROOT)
+endif
+ifndef LLVM_OBJ_ROOT
+$(error Projects must define LLVM_OBJ_ROOT)
+endif
+PROJ_SRC_DIR := $(call realpath, $(PROJ_SRC_ROOT)/$(patsubst $(PROJ_OBJ_ROOT)%,%,$(PROJ_OBJ_DIR)))
+prefix          := $(PROJ_INSTALL_ROOT)
+PROJ_prefix     := $(prefix)
+ifndef PROJ_VERSION
+PROJ_VERSION := 1.0
+endif
+endif
+
+LLVMMAKE := $(LLVM_SRC_ROOT)/make
+
+PROJ_bindir     := $(PROJ_prefix)/bin
+PROJ_libdir     := $(PROJ_prefix)/lib
+PROJ_datadir    := $(PROJ_prefix)/share
+PROJ_docsdir    := $(PROJ_prefix)/docs/llvm
+PROJ_etcdir     := $(PROJ_prefix)/etc/llvm
+PROJ_includedir := $(PROJ_prefix)/include
+PROJ_infodir    := $(PROJ_prefix)/info
+PROJ_mandir     := $(PROJ_prefix)/share/man
+
+# Determine if we're on a unix type operating system
+LLVM_ON_UNIX:=@LLVM_ON_UNIX@
+LLVM_ON_WIN32:=@LLVM_ON_WIN32@
+
+# Host operating system for which LLVM will be run.
+OS=@OS@
+HOST_OS=@HOST_OS@
+# Target operating system for which LLVM will compile for.
+TARGET_OS=@TARGET_OS@
+
+# Target hardware architecture
+ARCH=@ARCH@
+
+# Indicates, whether we're cross-compiling LLVM or not
+LLVM_CROSS_COMPILING=@LLVM_CROSS_COMPILING@
+
+# Executable file extension for build platform (mainly for
+# tablegen call if we're cross-compiling).
+BUILD_EXEEXT=@BUILD_EXEEXT@
+
+# Compilers for the build platflorm (mainly for tablegen
+# call if we're cross-compiling).
+BUILD_CC=@BUILD_CC@
+BUILD_CXX=@BUILD_CXX@
+
+# Triple for configuring build tools when cross-compiling
+BUILD_TRIPLE=@build@
+
+# Target triple (cpu-vendor-os) for which we should generate code
+TARGET_TRIPLE=@target@
+
+# Extra options to compile LLVM with
+EXTRA_OPTIONS=@EXTRA_OPTIONS@
+
+# Endian-ness of the target
+ENDIAN=@ENDIAN@
+
+# Path to the C++ compiler to use.  This is an optional setting, which defaults
+# to whatever your gmake defaults to.
+CXX = @CXX@
+
+# Path to the CC binary, which use used by testcases for native builds.
+CC := @CC@
+
+# Linker flags.
+LDFLAGS+=@LDFLAGS@
+
+# Path to the library archiver program.
+AR_PATH = @AR@
+AR = @AR@
+
+# Path to the nm program
+NM_PATH = @NM@
+
+# The pathnames of the programs we require to build
+CMP        := @CMP@
+CP         := @CP@
+DATE       := @DATE@
+FIND       := @FIND@
+GREP       := @GREP@
+INSTALL    := @INSTALL@
+MKDIR      := $(LLVM_SRC_ROOT)/autoconf/mkinstalldirs
+MV         := @MV@
+RANLIB     := @RANLIB@
+RM         := @RM@
+SED        := @SED@
+TAR        := @TAR@
+
+# Paths to miscellaneous programs we hope are present but might not be
+PERL       := @PERL@
+BZIP2      := @BZIP2@
+CAT        := @CAT@
+DOT        := @DOT@
+DOXYGEN    := @DOXYGEN@
+GROFF      := @GROFF@
+GZIPBIN    := @GZIPBIN@
+OCAMLC     := @OCAMLC@
+OCAMLOPT   := @OCAMLOPT@
+OCAMLDEP   := @OCAMLDEP@
+OCAMLDOC   := @OCAMLDOC@
+GAS        := @GAS@
+POD2HTML   := @POD2HTML@
+POD2MAN    := @POD2MAN@
+PDFROFF    := @PDFROFF@
+RUNTEST    := @RUNTEST@
+TCLSH      := @TCLSH@
+ZIP        := @ZIP@
+
+HAVE_PERL    := @HAVE_PERL@
+HAVE_PTHREAD := @HAVE_PTHREAD@
+
+LIBS       := @LIBS@
+
+# Targets that we should build
+TARGETS_TO_BUILD=@TARGETS_TO_BUILD@
+
+# Path to location for LLVM C/C++ front-end. You can modify this if you
+# want to override the value set by configure.
+LLVMGCCDIR := @LLVMGCCDIR@
+
+# Full pathnames of LLVM C/C++ front-end 'cc1' and 'cc1plus' binaries:
+LLVMGCC  := @LLVMGCC@
+LLVMGXX  := @LLVMGXX@
+LLVMCC1  := @LLVMCC1@
+LLVMCC1PLUS := @LLVMCC1PLUS@
+LLVMGCC_LANGS := @LLVMGCC_LANGS@
+LLVMGCC_DRAGONEGG := @LLVMGCC_DRAGONEGG@
+
+# Information on Clang, if configured.
+CLANGPATH := @CLANGPATH@
+CLANGXXPATH := @CLANGXXPATH@
+ENABLE_BUILT_CLANG := @ENABLE_BUILT_CLANG@
+
+# The LLVM capable compiler to use.
+LLVMCC_OPTION := @LLVMCC_OPTION@
+
+# The flag used to emit LLVM IR.
+LLVMCC_EMITIR_FLAG = @LLVMCC_EMITIR_FLAG@
+LLVMCC_DISABLEOPT_FLAGS := @LLVMCC_DISABLEOPT_FLAGS@
+
+# Path to directory where object files should be stored during a build.
+# Set OBJ_ROOT to "." if you do not want to use a separate place for
+# object files.
+OBJ_ROOT := .
+
+# What to pass as rpath flag to g++
+RPATH := @RPATH@
+
+# What to pass as -rdynamic flag to g++
+RDYNAMIC := @RDYNAMIC@
+
+# These are options that can either be enabled here, or can be enabled on the
+# make command line (ie, make ENABLE_PROFILING=1):
+
+# When ENABLE_OPTIMIZED is enabled, LLVM code is optimized and output is put
+# into the "Release" directories. Otherwise, LLVM code is not optimized and
+# output is put in the "Debug" directories.
+#ENABLE_OPTIMIZED = 1
+@ENABLE_OPTIMIZED@
+
+# When ENABLE_PROFILING is enabled, profile instrumentation is done
+# and output is put into the "<Flavor>+Profile" directories, where
+# <Flavor> is either Debug or Release depending on how other build
+# flags are set. Otherwise, output is put in the <Flavor>
+# directories.
+#ENABLE_PROFILING = 1
+@ENABLE_PROFILING@
+
+# When DISABLE_ASSERTIONS is enabled, builds of all of the LLVM code will
+# exclude assertion checks, otherwise they are included.
+#DISABLE_ASSERTIONS = 1
+@DISABLE_ASSERTIONS@
+
+# When ENABLE_EXPENSIVE_CHECKS is enabled, builds of all of the LLVM
+# code will include expensive checks, otherwise they are excluded.
+#ENABLE_EXPENSIVE_CHECKS = 0
+@ENABLE_EXPENSIVE_CHECKS@
+
+# When DEBUG_RUNTIME is enabled, the runtime libraries will retain debug
+# symbols.
+#DEBUG_RUNTIME = 1
+@DEBUG_RUNTIME@
+
+# When DEBUG_SYMBOLS is enabled, the compiler libraries will retain debug
+# symbols.
+#DEBUG_SYMBOLS = 1
+@DEBUG_SYMBOLS@
+
+# The compiler flags to use for optimized builds.
+OPTIMIZE_OPTION := @OPTIMIZE_OPTION@
+
+# When ENABLE_PROFILING is enabled, the llvm source base is built with profile
+# information to allow gprof to be used to get execution frequencies.
+#ENABLE_PROFILING = 1
+
+# When ENABLE_DOCS is disabled, docs/ will not be built.
+ENABLE_DOCS = @ENABLE_DOCS@
+
+# When ENABLE_DOXYGEN is enabled, the doxygen documentation will be built
+ENABLE_DOXYGEN = @ENABLE_DOXYGEN@
+
+# Do we want to enable threads?
+ENABLE_THREADS := @ENABLE_THREADS@
+
+# Do we want to build with position independent code?
+ENABLE_PIC := @ENABLE_PIC@
+
+# Do we want to build a shared library and link the tools with it?
+ENABLE_SHARED := @ENABLE_SHARED@
+
+# Do we want to link the stdc++ into a shared library? (Cygming)
+ENABLE_EMBED_STDCXX := @ENABLE_EMBED_STDCXX@
+
+# Use -fvisibility-inlines-hidden?
+ENABLE_VISIBILITY_INLINES_HIDDEN := @ENABLE_VISIBILITY_INLINES_HIDDEN@
+
+# Do we want to allow timestamping information into builds?
+ENABLE_TIMESTAMPS := @ENABLE_TIMESTAMPS@
+
+# This option tells the Makefiles to produce verbose output.
+# It essentially prints the commands that make is executing
+#VERBOSE = 1
+
+# Enable JIT for this platform
+TARGET_HAS_JIT = @TARGET_HAS_JIT@
+
+# Environment variable to set to change the runtime shared library search path.
+SHLIBPATH_VAR = @SHLIBPATH_VAR@
+
+# Shared library extension for host platform.
+SHLIBEXT = @SHLIBEXT@
+
+# Executable file extension for host platform.
+EXEEXT = @EXEEXT@
+
+# Things we just assume are "there"
+ECHO := echo
+
+# Get the options for causing archives to link all their content instead of
+# just missing symbols, and the inverse of that. This is used for certain LLVM
+# tools that permit loadable modules. It ensures that the LLVM symbols will be
+# available to those loadable modules.
+LINKALL := @LINKALL@
+NOLINKALL := @NOLINKALL@
+
+# Get the value of HUGE_VAL_SANITY which will be either "yes" or "no" depending
+# on the check.
+HUGE_VAL_SANITY = @HUGE_VAL_SANITY@
+
+# Bindings that we should build
+BINDINGS_TO_BUILD := @BINDINGS_TO_BUILD@
+ALL_BINDINGS      := @ALL_BINDINGS@
+OCAML_LIBDIR      := @OCAML_LIBDIR@
+
+# When compiling under Mingw/Cygwin, executables such as tblgen
+# expect Windows paths, whereas the build system uses Unix paths.
+# The function SYSPATH transforms Unix paths into Windows paths.
+ifneq (,$(findstring -mno-cygwin, $(CXX)))
+  SYSPATH = $(shell echo $(1) | cygpath -m -f -)
+else
+  SYSPATH = $(1)
+endif
+
+# Location of the plugin header file for gold.
+BINUTILS_INCDIR := @BINUTILS_INCDIR@
+
+# When ENABLE_LLVMC_DYNAMIC is enabled, LLVMC will link libCompilerDriver
+# dynamically. This is needed to make dynamic plugins work on some targets
+# (Windows).
+ENABLE_LLVMC_DYNAMIC = 0
+#@ENABLE_LLVMC_DYNAMIC@
+
+# When ENABLE_LLVMC_DYNAMIC_PLUGINS is enabled, LLVMC will have dynamic plugin
+# support (via the -load option).
+ENABLE_LLVMC_DYNAMIC_PLUGINS = 1
+#@ENABLE_LLVMC_DYNAMIC_PLUGINS@
+
+# Optional flags supported by the compiler
+# -Wno-missing-field-initializers
+NO_MISSING_FIELD_INITIALIZERS = @NO_MISSING_FIELD_INITIALIZERS@
+# -Wno-variadic-macros
+NO_VARIADIC_MACROS = @NO_VARIADIC_MACROS@
+
+# Was polly found in tools/polly?
+LLVM_HAS_POLLY = @LLVM_HAS_POLLY@
+# Flags supported by the linker.
+# bfd ld / gold --version-script=file
+HAVE_LINK_VERSION_SCRIPT = @HAVE_LINK_VERSION_SCRIPT@
diff --git a/final/Makefile.rules b/final/Makefile.rules
new file mode 100644
index 00000000000..5fc77a5e510
--- /dev/null
+++ b/final/Makefile.rules
@@ -0,0 +1,2278 @@
+#===-- Makefile.rules - Common make rules for LLVM ---------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+#
+# This file is included by all of the LLVM makefiles.  For details on how to use
+# it properly, please see the document MakefileGuide.html in the docs directory.
+#
+#===-----------------------------------------------------------------------====#
+
+################################################################################
+# TARGETS: Define standard targets that can be invoked
+################################################################################
+
+#--------------------------------------------------------------------
+# Define the various target sets
+#--------------------------------------------------------------------
+RecursiveTargets := all clean clean-all install uninstall install-bytecode \
+                    unitcheck
+LocalTargets     := all-local clean-local clean-all-local check-local \
+                    install-local printvars uninstall-local \
+		    install-bytecode-local
+TopLevelTargets  := check dist dist-check dist-clean dist-gzip dist-bzip2 \
+                    dist-zip unittests
+UserTargets      := $(RecursiveTargets) $(LocalTargets) $(TopLevelTargets)
+InternalTargets  := preconditions distdir dist-hook
+
+################################################################################
+# INITIALIZATION: Basic things the makefile needs
+################################################################################
+
+#--------------------------------------------------------------------
+# Set the VPATH so that we can find source files.
+#--------------------------------------------------------------------
+VPATH=$(PROJ_SRC_DIR)
+
+#--------------------------------------------------------------------
+# Reset the list of suffixes we know how to build.
+#--------------------------------------------------------------------
+.SUFFIXES:
+.SUFFIXES: .c .cpp .cc .h .hpp .o .a .bc .td .ps .dot .ll .m .mm
+.SUFFIXES: $(SHLIBEXT) $(SUFFIXES)
+
+#--------------------------------------------------------------------
+# Mark all of these targets as phony to avoid implicit rule search
+#--------------------------------------------------------------------
+.PHONY: $(UserTargets) $(InternalTargets)
+
+#--------------------------------------------------------------------
+# Make sure all the user-target rules are double colon rules and
+# they are defined first.
+#--------------------------------------------------------------------
+
+$(UserTargets)::
+
+################################################################################
+# PRECONDITIONS: that which must be built/checked first
+################################################################################
+
+SrcMakefiles       := $(filter %Makefile %Makefile.tests,\
+                      $(wildcard $(PROJ_SRC_DIR)/Makefile*))
+ObjMakefiles       := $(subst $(PROJ_SRC_DIR),$(PROJ_OBJ_DIR),$(SrcMakefiles))
+ConfigureScript    := $(PROJ_SRC_ROOT)/configure
+ConfigStatusScript := $(PROJ_OBJ_ROOT)/config.status
+MakefileConfigIn   := $(strip $(wildcard $(PROJ_SRC_ROOT)/Makefile.config.in))
+MakefileCommonIn   := $(strip $(wildcard $(PROJ_SRC_ROOT)/Makefile.common.in))
+MakefileConfig     := $(PROJ_OBJ_ROOT)/Makefile.config
+MakefileCommon     := $(PROJ_OBJ_ROOT)/Makefile.common
+PreConditions      := $(ConfigStatusScript) $(ObjMakefiles)
+ifneq ($(MakefileCommonIn),)
+PreConditions      += $(MakefileCommon)
+endif
+
+ifneq ($(MakefileConfigIn),)
+PreConditions      += $(MakefileConfig)
+endif
+
+preconditions: $(PreConditions)
+
+#------------------------------------------------------------------------
+# Make sure the BUILT_SOURCES are built first
+#------------------------------------------------------------------------
+$(filter-out clean clean-local,$(UserTargets)):: $(BUILT_SOURCES)
+
+clean-all-local::
+ifneq ($(strip $(BUILT_SOURCES)),)
+	-$(Verb) $(RM) -f $(BUILT_SOURCES)
+endif
+
+ifneq ($(PROJ_OBJ_ROOT),$(PROJ_SRC_ROOT))
+spotless:
+	$(Verb) if test -x config.status ; then \
+	  $(EchoCmd) Wiping out $(PROJ_OBJ_ROOT) ; \
+	  $(MKDIR) .spotless.save ; \
+	  $(MV) config.status .spotless.save ; \
+	  $(MV) mklib  .spotless.save ; \
+	  $(MV) projects  .spotless.save ; \
+	  $(RM) -rf * ; \
+	  $(MV) .spotless.save/config.status . ; \
+	  $(MV) .spotless.save/mklib . ; \
+	  $(MV) .spotless.save/projects . ; \
+	  $(RM) -rf .spotless.save ; \
+	  $(EchoCmd) Rebuilding configuration of $(PROJ_OBJ_ROOT) ; \
+	  $(ConfigStatusScript) --recheck $(ConfigureScriptFLAGS) && \
+	  $(ConfigStatusScript) ; \
+	else \
+	  $(EchoCmd) "make spotless" can only be run from $(PROJ_OBJ_ROOT); \
+	fi
+else
+spotless:
+	$(EchoCmd) "spotless target not supported for objdir == srcdir"
+endif
+
+$(BUILT_SOURCES) : $(ObjMakefiles)
+
+#------------------------------------------------------------------------
+# Make sure we're not using a stale configuration
+#------------------------------------------------------------------------
+reconfigure:
+	$(Echo) Reconfiguring $(PROJ_OBJ_ROOT)
+	$(Verb) cd $(PROJ_OBJ_ROOT) && \
+	  if test -w $(PROJ_OBJ_ROOT)/config.cache ; then \
+	    $(RM) $(PROJ_OBJ_ROOT)/config.cache ; \
+	  fi ; \
+	  $(ConfigStatusScript) --recheck $(ConfigureScriptFLAGS) && \
+	  $(ConfigStatusScript)
+
+.PRECIOUS: $(ConfigStatusScript)
+$(ConfigStatusScript): $(ConfigureScript)
+	$(Echo) Reconfiguring with $<
+	$(Verb) cd $(PROJ_OBJ_ROOT) && \
+	  if test -w $(PROJ_OBJ_ROOT)/config.cache ; then \
+	    $(RM) $(PROJ_OBJ_ROOT)/config.cache ; \
+	  fi ; \
+	  $(ConfigStatusScript) --recheck $(ConfigureScriptFLAGS) && \
+	  $(ConfigStatusScript)
+
+#------------------------------------------------------------------------
+# Make sure the configuration makefile is up to date
+#------------------------------------------------------------------------
+ifneq ($(MakefileConfigIn),)
+$(MakefileConfig): $(MakefileConfigIn) $(ConfigStatusScript)
+	$(Echo) Regenerating $@
+	$(Verb) cd $(PROJ_OBJ_ROOT) ; $(ConfigStatusScript) Makefile.config
+endif
+
+ifneq ($(MakefileCommonIn),)
+$(MakefileCommon): $(MakefileCommonIn) $(ConfigStatusScript)
+	$(Echo) Regenerating $@
+	$(Verb) cd $(PROJ_OBJ_ROOT) ; $(ConfigStatusScript) Makefile.common
+endif
+
+#------------------------------------------------------------------------
+# If the Makefile in the source tree has been updated, copy it over into the
+# build tree. But, only do this if the source and object makefiles differ
+#------------------------------------------------------------------------
+ifndef PROJ_MAKEFILE
+PROJ_MAKEFILE := $(PROJ_SRC_DIR)/Makefile
+endif
+
+ifneq ($(PROJ_OBJ_DIR),$(PROJ_SRC_DIR))
+
+Makefile: $(PROJ_MAKEFILE) $(ExtraMakefiles)
+	$(Echo) "Updating Makefile"
+	$(Verb) $(MKDIR) $(@D)
+	$(Verb) $(CP) -f $< $@
+
+# Copy the Makefile.* files unless we're in the root directory which avoids
+# the copying of Makefile.config.in or other things that should be explicitly
+# taken care of.
+$(PROJ_OBJ_DIR)/Makefile% : $(PROJ_MAKEFILE)%
+	@case '$?' in \
+          *Makefile.rules) ;; \
+          *.in) ;; \
+          *) $(EchoCmd) "Updating $(@F)" ; \
+	     $(MKDIR) $(@D) ; \
+	     $(CP) -f $< $@ ;; \
+	esac
+
+endif
+
+#------------------------------------------------------------------------
+# Set up the basic dependencies
+#------------------------------------------------------------------------
+$(UserTargets):: $(PreConditions)
+
+all:: all-local
+clean:: clean-local
+clean-all:: clean-local clean-all-local
+install:: install-local
+uninstall:: uninstall-local
+install-local:: all-local
+install-bytecode:: install-bytecode-local
+
+###############################################################################
+# LLVMC: Provide rules for compiling llvmc-based driver
+###############################################################################
+
+ifdef LLVMC_BASED_DRIVER
+
+TOOLNAME = $(LLVMC_BASED_DRIVER)
+
+LLVMLIBS = CompilerDriver.a
+LINK_COMPONENTS = support
+
+endif # LLVMC_BASED_DRIVER
+
+###############################################################################
+# VARIABLES: Set up various variables based on configuration data
+###############################################################################
+
+# Variable for if this make is for a "cleaning" target
+ifneq ($(strip $(filter clean clean-local dist-clean,$(MAKECMDGOALS))),)
+  IS_CLEANING_TARGET=1
+endif
+
+#--------------------------------------------------------------------
+# Variables derived from configuration we are building
+#--------------------------------------------------------------------
+
+CPP.Defines :=
+ifeq ($(ENABLE_OPTIMIZED),1)
+  BuildMode := Release
+  # Don't use -fomit-frame-pointer on Darwin or FreeBSD.
+  ifneq ($(HOST_OS),FreeBSD)
+  ifneq ($(HOST_OS),Darwin)
+    OmitFramePointer := -fomit-frame-pointer
+  endif
+  endif
+
+  # Darwin requires -fstrict-aliasing to be explicitly enabled.
+  # Avoid -fstrict-aliasing on Darwin for now, there are unresolved issues
+  # with -fstrict-aliasing and ipa-type-escape radr://6756684
+  #ifeq ($(HOST_OS),Darwin)
+  #  EXTRA_OPTIONS += -fstrict-aliasing -Wstrict-aliasing
+  #endif
+  CXX.Flags += $(OPTIMIZE_OPTION) $(OmitFramePointer)
+  C.Flags   += $(OPTIMIZE_OPTION) $(OmitFramePointer)
+  LD.Flags  += $(OPTIMIZE_OPTION)
+  ifdef DEBUG_SYMBOLS
+    BuildMode := $(BuildMode)+Debug
+    CXX.Flags += -g
+    C.Flags   += -g
+    LD.Flags  += -g
+    KEEP_SYMBOLS := 1
+  endif
+else
+  ifdef NO_DEBUG_SYMBOLS
+    BuildMode := Unoptimized
+    CXX.Flags +=
+    C.Flags   +=
+    LD.Flags  +=
+    KEEP_SYMBOLS := 1
+  else
+    BuildMode := Debug
+    CXX.Flags += -g
+    C.Flags   += -g
+    LD.Flags  += -g
+    KEEP_SYMBOLS := 1
+  endif
+endif
+
+ifeq ($(ENABLE_PROFILING),1)
+  BuildMode := $(BuildMode)+Profile
+  CXX.Flags := $(filter-out -fomit-frame-pointer,$(CXX.Flags)) -pg -g
+  C.Flags   := $(filter-out -fomit-frame-pointer,$(C.Flags)) -pg -g
+  LD.Flags  := $(filter-out -fomit-frame-pointer,$(LD.Flags)) -pg -g
+  KEEP_SYMBOLS := 1
+endif
+
+#ifeq ($(ENABLE_VISIBILITY_INLINES_HIDDEN),1)
+#    CXX.Flags += -fvisibility-inlines-hidden
+#endif
+
+ifdef ENABLE_EXPENSIVE_CHECKS
+  # GNU libstdc++ uses RTTI if you define _GLIBCXX_DEBUG, which we did above.
+  # See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=40160
+  REQUIRES_RTTI := 1
+endif
+
+# IF REQUIRES_EH=1 is specified then don't disable exceptions
+ifndef REQUIRES_EH
+  CXX.Flags += -fno-exceptions
+else
+  # If the library requires EH, it also requires RTTI.
+  REQUIRES_RTTI := 1
+endif
+
+ifdef REQUIRES_FRAME_POINTER
+  CXX.Flags := $(filter-out -fomit-frame-pointer,$(CXX.Flags))
+  C.Flags   := $(filter-out -fomit-frame-pointer,$(C.Flags))
+  LD.Flags  := $(filter-out -fomit-frame-pointer,$(LD.Flags))
+endif
+
+# If REQUIRES_RTTI=1 is specified then don't disable run-time type id.
+ifneq ($(REQUIRES_RTTI), 1)
+  CXX.Flags += -fno-rtti
+endif
+
+ifeq ($(ENABLE_COVERAGE),1)
+  BuildMode := $(BuildMode)+Coverage
+  CXX.Flags += -ftest-coverage -fprofile-arcs
+  C.Flags   += -ftest-coverage -fprofile-arcs
+endif
+
+# If DISABLE_ASSERTIONS=1 is specified (make command line or configured),
+# then disable assertions by defining the appropriate preprocessor symbols.
+ifeq ($(DISABLE_ASSERTIONS),1)
+  CPP.Defines += -DNDEBUG
+else
+  BuildMode := $(BuildMode)+Asserts
+  CPP.Defines += -D_DEBUG
+endif
+
+# If ENABLE_EXPENSIVE_CHECKS=1 is specified (make command line or
+# configured), then enable expensive checks by defining the
+# appropriate preprocessor symbols.
+ifeq ($(ENABLE_EXPENSIVE_CHECKS),1)
+  BuildMode := $(BuildMode)+Checks
+  CPP.Defines += -D_GLIBCXX_DEBUG -DXDEBUG
+endif
+
+# LOADABLE_MODULE implies several other things so we force them to be
+# defined/on.
+ifdef LOADABLE_MODULE
+  SHARED_LIBRARY := 1
+  LINK_LIBS_IN_SHARED := 1
+endif
+
+ifdef SHARED_LIBRARY
+  ENABLE_PIC := 1
+  PIC_FLAG = "(PIC)"
+endif
+
+ifeq ($(ENABLE_PIC),1)
+  ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+    # Nothing. Win32 defaults to PIC and warns when given -fPIC
+  else
+    ifeq ($(HOST_OS),Darwin)
+      # Common symbols not allowed in dylib files
+      CXX.Flags += -fno-common
+      C.Flags   += -fno-common
+    else
+      # Linux and others; pass -fPIC
+      CXX.Flags += -fPIC
+      C.Flags   += -fPIC
+    endif
+  endif
+else
+  ifeq ($(HOST_OS),Darwin)
+      CXX.Flags += -mdynamic-no-pic
+      C.Flags   += -mdynamic-no-pic
+  endif
+endif
+
+# Support makefile variable to disable any kind of timestamp/non-deterministic
+# info from being used in the build.
+ifeq ($(ENABLE_TIMESTAMPS),1)
+  DOTDIR_TIMESTAMP_COMMAND := $(DATE)
+else
+  DOTDIR_TIMESTAMP_COMMAND := echo 'Created.'
+endif
+
+ifeq ($(HOST_OS),MingW)
+  # Work around PR4957
+  CPP.Defines += -D__NO_CTYPE_INLINE
+  ifeq ($(LLVM_CROSS_COMPILING),1)
+    # Work around http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=525016
+    ifdef TOOLNAME
+      LD.Flags += -Wl,--allow-multiple-definition
+    endif
+  endif
+endif
+
+CXX.Flags     += -Woverloaded-virtual
+CPP.BaseFlags += $(CPP.Defines)
+AR.Flags      := cru
+
+# Make Floating point IEEE compliant on Alpha.
+ifeq ($(ARCH),Alpha)
+  CXX.Flags     += -mieee
+  CPP.BaseFlags += -mieee
+ifeq ($(ENABLE_PIC),0)
+  CXX.Flags     += -fPIC
+  CPP.BaseFlags += -fPIC
+endif
+
+  LD.Flags += -Wl,--no-relax
+endif
+
+# GNU ld/PECOFF accepts but ignores them below;
+#   --version-script
+#   --export-dynamic
+#   --rpath
+# FIXME: autoconf should be aware of them.
+ifneq (,$(filter $(HOST_OS),Cygwin MingW))
+  HAVE_LINK_VERSION_SCRIPT := 0
+  RPATH :=
+  RDYNAMIC := -Wl,--export-all-symbols
+endif
+
+#--------------------------------------------------------------------
+# Directory locations
+#--------------------------------------------------------------------
+TargetMode :=
+ifeq ($(LLVM_CROSS_COMPILING),1)
+  BuildLLVMToolDir := $(LLVM_OBJ_ROOT)/BuildTools/$(BuildMode)/bin
+endif
+
+ObjRootDir  := $(PROJ_OBJ_DIR)/$(BuildMode)
+ObjDir      := $(ObjRootDir)
+LibDir      := $(PROJ_OBJ_ROOT)/$(BuildMode)/lib
+ToolDir     := $(PROJ_OBJ_ROOT)/$(BuildMode)/bin
+ExmplDir    := $(PROJ_OBJ_ROOT)/$(BuildMode)/examples
+LLVMLibDir  := $(LLVM_OBJ_ROOT)/$(BuildMode)/lib
+LLVMToolDir := $(LLVM_OBJ_ROOT)/$(BuildMode)/bin
+LLVMExmplDir:= $(LLVM_OBJ_ROOT)/$(BuildMode)/examples
+
+#--------------------------------------------------------------------
+# Locations of shared libraries
+#--------------------------------------------------------------------
+
+SharedPrefix     := lib
+SharedLibDir     := $(LibDir)
+LLVMSharedLibDir := $(LLVMLibDir)
+
+# Win32.DLL prefers to be located on the "PATH" of binaries.
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+  SharedLibDir     := $(ToolDir)
+  LLVMSharedLibDir := $(LLVMToolDir)
+
+  ifeq ($(HOST_OS),Cygwin)
+    SharedPrefix  := cyg
+  else
+    SharedPrefix  :=
+  endif
+endif
+
+#--------------------------------------------------------------------
+# LLVM Capable Compiler
+#--------------------------------------------------------------------
+
+ifeq ($(LLVMCC_OPTION),llvm-gcc)
+  LLVMCC := $(LLVMGCC)
+  LLVMCXX := $(LLVMGXX)
+else
+  ifeq ($(LLVMCC_OPTION),clang)
+    ifneq ($(CLANGPATH),)
+      LLVMCC := $(CLANGPATH)
+      LLVMCXX := $(CLANGXXPATH)
+    else
+      ifeq ($(ENABLE_BUILT_CLANG),1)
+        LLVMCC := $(LLVMToolDir)/clang
+        LLVMCXX := $(LLVMToolDir)/clang++
+      endif
+    endif
+  endif
+endif
+
+#--------------------------------------------------------------------
+# Full Paths To Compiled Tools and Utilities
+#--------------------------------------------------------------------
+EchoCmd  = $(ECHO) llvm[$(MAKELEVEL)]:
+Echo     = @$(EchoCmd)
+ifndef LLVMAS
+LLVMAS   := $(LLVMToolDir)/llvm-as$(EXEEXT)
+endif
+ifndef TBLGEN
+  ifeq ($(LLVM_CROSS_COMPILING),1)
+    TBLGEN   := $(BuildLLVMToolDir)/tblgen$(BUILD_EXEEXT)
+  else
+    TBLGEN   := $(LLVMToolDir)/tblgen$(EXEEXT)
+  endif
+endif
+LLVM_CONFIG := $(LLVMToolDir)/llvm-config
+ifndef LLVMLD
+LLVMLD    := $(LLVMToolDir)/llvm-ld$(EXEEXT)
+endif
+ifndef LLVMDIS
+LLVMDIS  := $(LLVMToolDir)/llvm-dis$(EXEEXT)
+endif
+ifndef LLI
+LLI      := $(LLVMToolDir)/lli$(EXEEXT)
+endif
+ifndef LLC
+LLC      := $(LLVMToolDir)/llc$(EXEEXT)
+endif
+ifndef LOPT
+LOPT     := $(LLVMToolDir)/opt$(EXEEXT)
+endif
+ifndef LBUGPOINT
+LBUGPOINT := $(LLVMToolDir)/bugpoint$(EXEEXT)
+endif
+
+#--------------------------------------------------------------------
+# Adjust to user's request
+#--------------------------------------------------------------------
+
+ifeq ($(HOST_OS),Darwin)
+  DARWIN_VERSION := `sw_vers -productVersion`
+  # Strip a number like 10.4.7 to 10.4
+  DARWIN_VERSION := $(shell echo $(DARWIN_VERSION)| sed -E 's/(10.[0-9]).*/\1/')
+  # Get "4" out of 10.4 for later pieces in the makefile.
+  DARWIN_MAJVERS := $(shell echo $(DARWIN_VERSION)| sed -E 's/10.([0-9]).*/\1/')
+
+  LoadableModuleOptions := -Wl,-flat_namespace -Wl,-undefined,suppress
+  SharedLinkOptions := -dynamiclib
+  ifneq ($(ARCH),ARM)
+    SharedLinkOptions += -mmacosx-version-min=$(DARWIN_VERSION)
+  endif
+else
+  SharedLinkOptions=-shared
+endif
+
+ifeq ($(TARGET_OS),Darwin)
+  ifneq ($(ARCH),ARM)
+    TargetCommonOpts += -mmacosx-version-min=$(DARWIN_VERSION)
+  endif
+endif
+
+ifdef SHARED_LIBRARY
+ifneq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+ifneq ($(HOST_OS),Darwin)
+  LD.Flags += $(RPATH) -Wl,'$$ORIGIN'
+endif
+endif
+endif
+
+ifdef TOOL_VERBOSE
+  C.Flags += -v
+  CXX.Flags += -v
+  LD.Flags += -v
+  VERBOSE := 1
+endif
+
+# Adjust settings for verbose mode
+ifndef VERBOSE
+  Verb := @
+  AR.Flags += >/dev/null 2>/dev/null
+  ConfigureScriptFLAGS += >$(PROJ_OBJ_DIR)/configure.out 2>&1
+else
+  ConfigureScriptFLAGS :=
+endif
+
+# By default, strip symbol information from executable
+ifndef KEEP_SYMBOLS
+  Strip := $(PLATFORMSTRIPOPTS)
+  StripWarnMsg := "(without symbols)"
+  Install.StripFlag += -s
+endif
+
+ifdef TOOL_NO_EXPORTS
+  DynamicFlags :=
+else
+  DynamicFlag := $(RDYNAMIC)
+endif
+
+# Adjust linker flags for building an executable
+ifneq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+ifneq ($(HOST_OS), Darwin)
+ifdef TOOLNAME
+  LD.Flags += $(RPATH) -Wl,'$$ORIGIN/../lib'
+  ifdef EXAMPLE_TOOL
+    LD.Flags += $(RPATH) -Wl,$(ExmplDir) $(DynamicFlag)
+  else
+    LD.Flags += $(RPATH) -Wl,$(ToolDir) $(DynamicFlag)
+  endif
+endif
+else
+ifneq ($(DARWIN_MAJVERS),4)
+  LD.Flags += $(RPATH) -Wl,@executable_path/../lib
+endif
+endif
+endif
+
+
+#----------------------------------------------------------
+# Options To Invoke Tools
+#----------------------------------------------------------
+
+ifndef NO_PEDANTIC
+CompileCommonOpts += -pedantic -Wno-long-long
+endif
+CompileCommonOpts += -Wall -W -Wno-unused-parameter -Wwrite-strings \
+                     $(EXTRA_OPTIONS)
+# Enable cast-qual for C++; the workaround is to use const_cast.
+CXX.Flags += -Wcast-qual
+
+ifeq ($(HOST_OS),HP-UX)
+  CompileCommonOpts := -D_REENTRANT -D_HPUX_SOURCE
+endif
+
+# If we are building a universal binary on Mac OS/X, pass extra options.  This
+# is useful to people that want to link the LLVM libraries into their universal
+# apps.
+#
+# The following can be optionally specified:
+#   UNIVERSAL_SDK_PATH variable can be specified as a path to the SDK to use.
+#      For Mac OS/X 10.4 Intel machines, the traditional one is:
+#      UNIVERSAL_SDK_PATH=/Developer/SDKs/MacOSX10.4u.sdk/
+#   UNIVERSAL_ARCH can be optionally specified to be a list of architectures
+#      to build for, e.g. UNIVERSAL_ARCH="i386 ppc ppc64".  This defaults to
+#      i386/ppc only.
+ifdef UNIVERSAL
+  ifndef UNIVERSAL_ARCH
+    UNIVERSAL_ARCH := i386 ppc
+  endif
+  UNIVERSAL_ARCH_OPTIONS := $(UNIVERSAL_ARCH:%=-arch %)
+  CompileCommonOpts += $(UNIVERSAL_ARCH_OPTIONS)
+  ifdef UNIVERSAL_SDK_PATH
+    CompileCommonOpts += -isysroot $(UNIVERSAL_SDK_PATH)
+  endif
+
+  # Building universal cannot compute dependencies automatically.
+  DISABLE_AUTO_DEPENDENCIES=1
+else
+  ifeq ($(TARGET_OS),Darwin)
+    ifeq ($(ARCH),x86_64)
+      TargetCommonOpts = -m64
+    else
+      ifeq ($(ARCH),x86)
+        TargetCommonOpts = -m32
+      endif
+    endif
+  endif
+endif
+
+ifeq ($(HOST_OS),SunOS)
+CPP.BaseFlags += -include llvm/Support/Solaris.h
+endif
+
+ifeq ($(HOST_OS),AuroraUX)
+CPP.BaseFlags += -include llvm/Support/Solaris.h
+endif # !HOST_OS - AuroraUX.
+
+LD.Flags      += -L$(LibDir) -L$(LLVMLibDir)
+CPP.BaseFlags += -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS
+# All -I flags should go here, so that they don't confuse llvm-config.
+CPP.Flags     += $(sort -I$(PROJ_OBJ_DIR) -I$(PROJ_SRC_DIR) \
+	         $(patsubst %,-I%/include,\
+	         $(PROJ_OBJ_ROOT) $(PROJ_SRC_ROOT) \
+	         $(LLVM_OBJ_ROOT) $(LLVM_SRC_ROOT))) \
+	         $(CPP.BaseFlags)
+
+ifeq ($(BUILD_COMPONENT), 1)
+  Compile.C     = $(BUILD_CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \
+                  $(TargetCommonOpts) $(CompileCommonOpts) -c
+  Compile.CXX   = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \
+		  $(CPPFLAGS) \
+                  $(TargetCommonOpts) $(CompileCommonOpts) -c
+  Preprocess.CXX= $(BUILD_CXX) $(CPP.Flags) $(CPPFLAGS) $(TargetCommonOpts) \
+                  $(CompileCommonOpts) $(CXX.Flags) -E
+  Link          = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \
+		  $(LD.Flags) $(LDFLAGS) \
+                  $(TargetCommonOpts) $(CompileCommonOpts) $(Strip)
+else
+  Compile.C     = $(CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \
+                  $(TargetCommonOpts) $(CompileCommonOpts) -c
+  Compile.CXX   = $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(CPPFLAGS) \
+                  $(TargetCommonOpts) $(CompileCommonOpts) -c
+  Preprocess.CXX= $(CXX) $(CPP.Flags) $(TargetCommonOpts) $(CPPFLAGS) \
+                  $(CompileCommonOpts) $(CXX.Flags) -E
+  Link          = $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(LD.Flags) \
+                  $(LDFLAGS) $(TargetCommonOpts)  $(CompileCommonOpts) $(Strip)
+endif
+
+BCCompile.C   = $(LLVMCC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \
+                $(TargetCommonOpts) $(CompileCommonOpts)
+Preprocess.C  = $(CC) $(CPP.Flags) $(C.Flags) $(CPPFLAGS) \
+                $(TargetCommonOpts) $(CompileCommonOpts) -E
+
+BCCompile.CXX = $(LLVMCXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(CPPFLAGS) \
+                $(TargetCommonOpts) $(CompileCommonOpts)
+
+ProgInstall   = $(INSTALL) $(Install.StripFlag) -m 0755
+ScriptInstall = $(INSTALL) -m 0755
+DataInstall   = $(INSTALL) -m 0644
+
+# When compiling under Mingw/Cygwin, the tblgen tool expects Windows
+# paths. In this case, the SYSPATH function (defined in
+# Makefile.config) transforms Unix paths into Windows paths.
+TableGen      = $(TBLGEN) -I $(call SYSPATH, $(PROJ_SRC_DIR)) \
+                -I $(call SYSPATH, $(LLVM_SRC_ROOT)/include) \
+                -I $(call SYSPATH, $(PROJ_SRC_ROOT)/include) \
+                -I $(call SYSPATH, $(PROJ_SRC_ROOT)/lib/Target)
+
+Archive       = $(AR) $(AR.Flags)
+LArchive      = $(LLVMToolDir)/llvm-ar rcsf
+ifdef RANLIB
+Ranlib        = $(RANLIB)
+else
+Ranlib        = ranlib
+endif
+
+AliasTool     = ln -s
+
+#----------------------------------------------------------
+# Get the list of source files and compute object file
+# names from them.
+#----------------------------------------------------------
+
+ifndef SOURCES
+  Sources := $(notdir $(wildcard $(PROJ_SRC_DIR)/*.cpp \
+             $(PROJ_SRC_DIR)/*.cc $(PROJ_SRC_DIR)/*.c))
+else
+  Sources := $(SOURCES)
+endif
+
+ifdef BUILT_SOURCES
+Sources += $(filter %.cpp %.c %.cc,$(BUILT_SOURCES))
+endif
+
+BaseNameSources := $(sort $(basename $(Sources)))
+
+ObjectsO  := $(BaseNameSources:%=$(ObjDir)/%.o)
+ObjectsBC := $(BaseNameSources:%=$(ObjDir)/%.bc)
+
+#----------------------------------------------------------
+# For Mingw MSYS bash and Python/w32:
+#
+# $(ECHOPATH) prints DOSish pathstring.
+#   ex) $(ECHOPATH) /include/sys/types.h
+#   --> C:/mingw/include/sys/types.h
+# built-in "echo" does not transform path to DOSish path.
+#
+# FIXME: It would not be needed when MSYS's python
+# were provided.
+#----------------------------------------------------------
+
+ifeq (-mingw32,$(findstring -mingw32,$(BUILD_TRIPLE)))
+  ECHOPATH := $(Verb)python -u -c "import sys;print ' '.join(sys.argv[1:])"
+else
+  ECHOPATH := $(Verb)$(ECHO)
+endif
+
+###############################################################################
+# DIRECTORIES: Handle recursive descent of directory structure
+###############################################################################
+
+#---------------------------------------------------------
+# Provide rules to make install dirs. This must be early
+# in the file so they get built before dependencies
+#---------------------------------------------------------
+
+$(DESTDIR)$(PROJ_bindir) $(DESTDIR)$(PROJ_libdir) $(DESTDIR)$(PROJ_includedir) $(DESTDIR)$(PROJ_etcdir)::
+	$(Verb) $(MKDIR) $@
+
+# To create other directories, as needed, and timestamp their creation
+%/.dir:
+	$(Verb) $(MKDIR) $* > /dev/null
+	$(Verb) $(DOTDIR_TIMESTAMP_COMMAND) > $@
+
+.PRECIOUS: $(ObjDir)/.dir $(LibDir)/.dir $(ToolDir)/.dir $(ExmplDir)/.dir
+.PRECIOUS: $(LLVMLibDir)/.dir $(LLVMToolDir)/.dir $(LLVMExmplDir)/.dir
+
+#---------------------------------------------------------
+# Handle the DIRS options for sequential construction
+#---------------------------------------------------------
+
+SubDirs :=
+ifdef DIRS
+SubDirs += $(DIRS)
+
+ifneq ($(PROJ_SRC_ROOT),$(PROJ_OBJ_ROOT))
+$(RecursiveTargets)::
+	$(Verb) for dir in $(DIRS); do \
+	  if ([ ! -f $$dir/Makefile ] || \
+	      command test $$dir/Makefile -ot $(PROJ_SRC_DIR)/$$dir/Makefile ); then \
+	    $(MKDIR) $$dir; \
+	    $(CP) $(PROJ_SRC_DIR)/$$dir/Makefile $$dir/Makefile; \
+	  fi; \
+	  ($(MAKE) -C $$dir $@ ) || exit 1; \
+	done
+else
+$(RecursiveTargets)::
+	$(Verb) for dir in $(DIRS); do \
+	  ($(MAKE) -C $$dir $@ ) || exit 1; \
+	done
+endif
+
+endif
+
+#---------------------------------------------------------
+# Handle the EXPERIMENTAL_DIRS options ensuring success
+# after each directory is built.
+#---------------------------------------------------------
+ifdef EXPERIMENTAL_DIRS
+$(RecursiveTargets)::
+	$(Verb) for dir in $(EXPERIMENTAL_DIRS); do \
+	  if ([ ! -f $$dir/Makefile ] || \
+	      command test $$dir/Makefile -ot $(PROJ_SRC_DIR)/$$dir/Makefile ); then \
+	    $(MKDIR) $$dir; \
+	    $(CP) $(PROJ_SRC_DIR)/$$dir/Makefile $$dir/Makefile; \
+	  fi; \
+	  ($(MAKE) -C $$dir $@ ) || exit 0; \
+	done
+endif
+
+#-----------------------------------------------------------
+# Handle the OPTIONAL_PARALLEL_DIRS options for optional parallel construction
+#-----------------------------------------------------------
+ifdef OPTIONAL_PARALLEL_DIRS
+  PARALLEL_DIRS += $(foreach T,$(OPTIONAL_PARALLEL_DIRS),$(shell test -d $(PROJ_SRC_DIR)/$(T) && echo "$(T)"))
+endif
+
+#-----------------------------------------------------------
+# Handle the PARALLEL_DIRS options for parallel construction
+#-----------------------------------------------------------
+ifdef PARALLEL_DIRS
+
+SubDirs += $(PARALLEL_DIRS)
+
+# Unfortunately, this list must be maintained if new recursive targets are added
+all      :: $(addsuffix /.makeall      ,$(PARALLEL_DIRS))
+clean    :: $(addsuffix /.makeclean    ,$(PARALLEL_DIRS))
+clean-all:: $(addsuffix /.makeclean-all,$(PARALLEL_DIRS))
+install  :: $(addsuffix /.makeinstall  ,$(PARALLEL_DIRS))
+uninstall:: $(addsuffix /.makeuninstall,$(PARALLEL_DIRS))
+install-bytecode  :: $(addsuffix /.makeinstall-bytecode,$(PARALLEL_DIRS))
+unitcheck:: $(addsuffix /.makeunitcheck,$(PARALLEL_DIRS))
+
+ParallelTargets := $(foreach T,$(RecursiveTargets),%/.make$(T))
+
+$(ParallelTargets) :
+	$(Verb) if ([ ! -f $(@D)/Makefile ] || \
+	            command test $(@D)/Makefile -ot \
+                      $(PROJ_SRC_DIR)/$(@D)/Makefile ); then \
+	  $(MKDIR) $(@D); \
+	  $(CP) $(PROJ_SRC_DIR)/$(@D)/Makefile $(@D)/Makefile; \
+	fi; \
+	$(MAKE) -C $(@D) $(subst $(@D)/.make,,$@)
+endif
+
+#---------------------------------------------------------
+# Handle the OPTIONAL_DIRS options for directores that may
+# or may not exist.
+#---------------------------------------------------------
+ifdef OPTIONAL_DIRS
+
+SubDirs += $(OPTIONAL_DIRS)
+
+ifneq ($(PROJ_SRC_ROOT),$(PROJ_OBJ_ROOT))
+$(RecursiveTargets)::
+	$(Verb) for dir in $(OPTIONAL_DIRS); do \
+	  if [ -d $(PROJ_SRC_DIR)/$$dir ]; then\
+	    if ([ ! -f $$dir/Makefile ] || \
+	        command test $$dir/Makefile -ot $(PROJ_SRC_DIR)/$$dir/Makefile ); then \
+	      $(MKDIR) $$dir; \
+	      $(CP) $(PROJ_SRC_DIR)/$$dir/Makefile $$dir/Makefile; \
+	    fi; \
+	    ($(MAKE) -C$$dir $@ ) || exit 1; \
+	  fi \
+	done
+else
+$(RecursiveTargets)::
+	$(Verb) for dir in $(OPTIONAL_DIRS); do \
+	  if [ -d $(PROJ_SRC_DIR)/$$dir ]; then\
+	    ($(MAKE) -C$$dir $@ ) || exit 1; \
+	  fi \
+	done
+endif
+endif
+
+#---------------------------------------------------------
+# Handle the CONFIG_FILES options
+#---------------------------------------------------------
+ifdef CONFIG_FILES
+
+ifdef NO_INSTALL
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) UnInstall circumvented with NO_INSTALL
+else
+install-local:: $(DESTDIR)$(PROJ_etcdir) $(CONFIG_FILES)
+	$(Echo) Installing Configuration Files To $(DESTDIR)$(PROJ_etcdir)
+	$(Verb)for file in $(CONFIG_FILES); do \
+          if test -f $(PROJ_OBJ_DIR)/$${file} ; then \
+            $(DataInstall) $(PROJ_OBJ_DIR)/$${file} $(DESTDIR)$(PROJ_etcdir) ; \
+          elif test -f $(PROJ_SRC_DIR)/$${file} ; then \
+            $(DataInstall) $(PROJ_SRC_DIR)/$${file} $(DESTDIR)$(PROJ_etcdir) ; \
+          else \
+            $(ECHO) Error: cannot find config file $${file}. ; \
+          fi \
+	done
+
+uninstall-local::
+	$(Echo) Uninstalling Configuration Files From $(DESTDIR)$(PROJ_etcdir)
+	$(Verb)for file in $(CONFIG_FILES); do \
+	  $(RM) -f $(DESTDIR)$(PROJ_etcdir)/$${file} ; \
+	done
+endif
+
+endif
+
+###############################################################################
+# Set up variables for building libraries
+###############################################################################
+
+#---------------------------------------------------------
+# Define various command line options pertaining to the
+# libraries needed when linking. There are "Proj" libs
+# (defined by the user's project) and "LLVM" libs (defined
+# by the LLVM project).
+#---------------------------------------------------------
+
+ifdef USEDLIBS
+ProjLibsOptions := $(patsubst %.a.o, -l%, $(addsuffix .o, $(USEDLIBS)))
+ProjLibsOptions := $(patsubst %.o, $(LibDir)/%.o,  $(ProjLibsOptions))
+ProjUsedLibs    := $(patsubst %.a.o, lib%.a, $(addsuffix .o, $(USEDLIBS)))
+ProjLibsPaths   := $(addprefix $(LibDir)/,$(ProjUsedLibs))
+endif
+
+ifdef LLVMLIBS
+LLVMLibsOptions := $(patsubst %.a.o, -l%, $(addsuffix .o, $(LLVMLIBS)))
+LLVMLibsOptions := $(patsubst %.o, $(LLVMLibDir)/%.o, $(LLVMLibsOptions))
+LLVMUsedLibs    := $(patsubst %.a.o, lib%.a, $(addsuffix .o, $(LLVMLIBS)))
+LLVMLibsPaths   := $(addprefix $(LLVMLibDir)/,$(LLVMUsedLibs))
+endif
+
+# Loadable module for Win32 requires all symbols resolved for linking.
+# Then all symbols in LLVM.dll will be available.
+ifeq ($(ENABLE_SHARED),1)
+  ifdef LOADABLE_MODULE
+    ifneq (,$(filter $(HOST_OS),Cygwin MingW))
+      LINK_COMPONENTS += all
+    endif
+  endif
+endif
+
+ifndef IS_CLEANING_TARGET
+ifdef LINK_COMPONENTS
+
+# If LLVM_CONFIG doesn't exist, build it.  This can happen if you do a make
+# clean in tools, then do a make in tools (instead of at the top level).
+$(LLVM_CONFIG):
+	@echo "*** llvm-config doesn't exist - rebuilding it."
+	@$(MAKE) -C $(PROJ_OBJ_ROOT)/tools/llvm-config
+
+$(ToolDir)/$(strip $(TOOLNAME))$(EXEEXT): $(LLVM_CONFIG)
+
+ifeq ($(ENABLE_SHARED), 1)
+# We can take the "auto-import" feature to get rid of using dllimport.
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+LLVMLibsOptions += -Wl,--enable-auto-import,--enable-runtime-pseudo-reloc \
+                   -L $(SharedLibDir)
+endif
+LLVMLibsOptions += -lLLVM-$(LLVMVersion)
+LLVMLibsPaths += $(SharedLibDir)/$(SharedPrefix)LLVM-$(LLVMVersion)$(SHLIBEXT)
+else
+
+ifndef NO_LLVM_CONFIG
+LLVMConfigLibs := $(shell $(LLVM_CONFIG) --libs $(LINK_COMPONENTS) || echo Error)
+ifeq ($(LLVMConfigLibs),Error)
+$(error llvm-config --libs failed)
+endif
+LLVMLibsOptions += $(LLVMConfigLibs)
+LLVMConfigLibfiles := $(shell $(LLVM_CONFIG) --libfiles $(LINK_COMPONENTS) || echo Error)
+ifeq ($(LLVMConfigLibfiles),Error)
+$(error llvm-config --libfiles failed)
+endif
+LLVMLibsPaths += $(LLVM_CONFIG) $(LLVMConfigLibfiles)
+endif
+
+endif
+endif
+endif
+
+# Set up the library exports file.
+ifdef EXPORTED_SYMBOL_FILE
+
+# First, set up the native export file, which may differ from the source
+# export file.
+
+ifeq ($(HOST_OS),Darwin)
+# Darwin convention prefixes symbols with underscores.
+NativeExportsFile := $(ObjDir)/$(notdir $(EXPORTED_SYMBOL_FILE)).sed
+$(NativeExportsFile): $(EXPORTED_SYMBOL_FILE) $(ObjDir)/.dir
+	$(Verb) sed -e 's/^/_/' < $< > $@
+clean-local::
+	-$(Verb) $(RM) -f $(NativeExportsFile)
+else
+ifeq ($(HAVE_LINK_VERSION_SCRIPT),1)
+# Gold and BFD ld require a version script rather than a plain list.
+NativeExportsFile := $(ObjDir)/$(notdir $(EXPORTED_SYMBOL_FILE)).map
+$(NativeExportsFile): $(EXPORTED_SYMBOL_FILE) $(ObjDir)/.dir
+	$(Verb) echo "{" > $@
+	$(Verb) grep -q "\<" $< && echo "  global:" >> $@ || :
+	$(Verb) sed -e 's/$$/;/' -e 's/^/    /' < $< >> $@
+ifneq ($(HOST_OS),OpenBSD)
+	$(Verb) echo "  local: *;" >> $@
+endif
+	$(Verb) echo "};" >> $@
+clean-local::
+	-$(Verb) $(RM) -f $(NativeExportsFile)
+else
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+# GNU ld Win32 accepts .DEF files that contain "DATA" entries.
+NativeExportsFile := $(ObjDir)/$(notdir $(EXPORTED_SYMBOL_FILE:.exports=.def))
+$(NativeExportsFile): $(EXPORTED_SYMBOL_FILE) $(ObjDir)/.dir
+	$(Echo) Generating $(notdir $@)
+	$(Verb) $(ECHO) "EXPORTS" > $@
+	$(Verb) $(CAT) $< >> $@
+clean-local::
+	-$(Verb) $(RM) -f $(NativeExportsFile)
+else
+# Default behavior: just use the exports file verbatim.
+NativeExportsFile := $(EXPORTED_SYMBOL_FILE)
+endif
+endif
+endif
+
+# Now add the linker command-line options to use the native export file.
+
+# Darwin
+ifeq ($(HOST_OS),Darwin)
+LLVMLibsOptions += -Wl,-exported_symbols_list,$(NativeExportsFile)
+endif
+
+# gold, bfd ld, etc.
+ifeq ($(HAVE_LINK_VERSION_SCRIPT),1)
+LLVMLibsOptions += -Wl,--version-script,$(NativeExportsFile)
+endif
+
+# Windows
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+# LLVMLibsOptions is invalidated at processing tools/llvm-shlib.
+SharedLinkOptions += $(NativeExportsFile)
+endif
+
+endif
+
+###############################################################################
+# Library Build Rules: Four ways to build a library
+###############################################################################
+
+#---------------------------------------------------------
+# Bytecode Module Targets:
+#   If the user set MODULE_NAME then they want to build a
+#   bytecode module from the sources. We compile all the
+#   sources and link it together into a single bytecode
+#   module.
+#---------------------------------------------------------
+
+ifdef MODULE_NAME
+ifeq ($(strip $(LLVMCC)),)
+$(warning Modules require LLVM capable compiler but none is available ****)
+else
+
+Module     := $(LibDir)/$(MODULE_NAME).bc
+LinkModule := $(LLVMLD) -r
+
+
+ifdef EXPORTED_SYMBOL_FILE
+LinkModule += -internalize-public-api-file=$(EXPORTED_SYMBOL_FILE)
+endif
+
+$(Module): $(BUILT_SOURCES) $(ObjectsBC) $(LibDir)/.dir $(LLVMLD)
+	$(Echo) Building $(BuildMode) Bytecode Module $(notdir $@)
+	$(Verb) $(LinkModule) -o $@ $(ObjectsBC)
+
+all-local:: $(Module)
+
+clean-local::
+ifneq ($(strip $(Module)),)
+	-$(Verb) $(RM) -f $(Module)
+endif
+
+ifdef BYTECODE_DESTINATION
+ModuleDestDir := $(BYTECODE_DESTINATION)
+else
+ModuleDestDir := $(DESTDIR)$(PROJ_libdir)
+endif
+
+ifdef NO_INSTALL
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) Uninstall circumvented with NO_INSTALL
+else
+DestModule := $(ModuleDestDir)/$(MODULE_NAME).bc
+
+install-module:: $(DestModule)
+install-local:: $(DestModule)
+
+$(DestModule): $(ModuleDestDir) $(Module)
+	$(Echo) Installing $(BuildMode) Bytecode Module $(DestModule)
+	$(Verb) $(DataInstall) $(Module) $(DestModule)
+
+uninstall-local::
+	$(Echo) Uninstalling $(BuildMode) Bytecode Module $(DestModule)
+	-$(Verb) $(RM) -f $(DestModule)
+endif
+
+endif
+endif
+
+# if we're building a library ...
+ifdef LIBRARYNAME
+
+# Make sure there isn't any extraneous whitespace on the LIBRARYNAME option
+LIBRARYNAME := $(strip $(LIBRARYNAME))
+ifdef LOADABLE_MODULE
+BaseLibName.A  := $(LIBRARYNAME).a
+BaseLibName.SO := $(LIBRARYNAME)$(SHLIBEXT)
+else
+BaseLibName.A  := lib$(LIBRARYNAME).a
+BaseLibName.SO := $(SharedPrefix)$(LIBRARYNAME)$(SHLIBEXT)
+endif
+LibName.A  := $(LibDir)/$(BaseLibName.A)
+LibName.SO := $(SharedLibDir)/$(BaseLibName.SO)
+LibName.O  := $(LibDir)/$(LIBRARYNAME).o
+LibName.BCA:= $(LibDir)/lib$(LIBRARYNAME).bca
+
+#---------------------------------------------------------
+# Shared Library Targets:
+#   If the user asked for a shared library to be built
+#   with the SHARED_LIBRARY variable, then we provide
+#   targets for building them.
+#---------------------------------------------------------
+ifdef SHARED_LIBRARY
+
+all-local:: $(LibName.SO)
+
+ifdef EXPORTED_SYMBOL_FILE
+$(LibName.SO): $(NativeExportsFile)
+endif
+
+ifdef LINK_LIBS_IN_SHARED
+ifdef LOADABLE_MODULE
+SharedLibKindMessage := "Loadable Module"
+SharedLinkOptions := $(LoadableModuleOptions) $(SharedLinkOptions)
+else
+SharedLibKindMessage := "Shared Library"
+endif
+$(LibName.SO): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths) $(SharedLibDir)/.dir
+	$(Echo) Linking $(BuildMode) $(SharedLibKindMessage) \
+	  $(notdir $@)
+	$(Verb) $(Link) $(SharedLinkOptions) -o $@ $(ObjectsO) \
+	  $(ProjLibsOptions) $(LLVMLibsOptions) $(LIBS)
+else
+$(LibName.SO): $(ObjectsO) $(SharedLibDir)/.dir
+	$(Echo) Linking $(BuildMode) Shared Library $(notdir $@)
+	$(Verb) $(Link) $(SharedLinkOptions) -o $@ $(ObjectsO)
+endif
+
+clean-local::
+ifneq ($(strip $(LibName.SO)),)
+	-$(Verb) $(RM) -f $(LibName.SO)
+endif
+
+ifdef NO_INSTALL
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) Uninstall circumvented with NO_INSTALL
+else
+
+# Win32.DLL prefers to be located on the "PATH" of binaries.
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+DestSharedLibDir := $(DESTDIR)$(PROJ_bindir)
+else
+DestSharedLibDir := $(DESTDIR)$(PROJ_libdir)
+endif
+DestSharedLib := $(DestSharedLibDir)/$(BaseLibName.SO)
+
+install-local:: $(DestSharedLib)
+
+$(DestSharedLib): $(LibName.SO) $(DestSharedLibDir)
+	$(Echo) Installing $(BuildMode) Shared Library $(DestSharedLib)
+	$(Verb) $(INSTALL) $(LibName.SO) $(DestSharedLib)
+
+uninstall-local::
+	$(Echo) Uninstalling $(BuildMode) Shared Library $(DestSharedLib)
+	-$(Verb) $(RM) -f $(DestSharedLibDir)/$(SharedPrefix)$(LIBRARYNAME).*
+endif
+endif
+
+#---------------------------------------------------------
+# Bytecode Library Targets:
+#   If the user asked for a bytecode library to be built
+#   with the BYTECODE_LIBRARY variable, then we provide
+#   targets for building them.
+#---------------------------------------------------------
+ifdef BYTECODE_LIBRARY
+ifeq ($(strip $(LLVMCC)),)
+$(warning Bytecode libraries require LLVM capable compiler but none is available ****)
+else
+
+all-local:: $(LibName.BCA)
+
+ifdef EXPORTED_SYMBOL_FILE
+BCLinkLib = $(LLVMLD) -internalize-public-api-file=$(EXPORTED_SYMBOL_FILE)
+
+$(LibName.BCA): $(ObjectsBC) $(LibDir)/.dir $(LLVMLD) \
+                $(LLVMToolDir)/llvm-ar
+	$(Echo) Building $(BuildMode) Bytecode Archive $(notdir $@) \
+	  "(internalize)"
+	$(Verb) $(BCLinkLib) -o $(ObjDir)/$(LIBRARYNAME).internalize $(ObjectsBC)
+	$(Verb) $(RM) -f $@
+	$(Verb) $(LArchive) $@ $(ObjDir)/$(LIBRARYNAME).internalize.bc
+else
+$(LibName.BCA): $(ObjectsBC) $(LibDir)/.dir \
+                $(LLVMToolDir)/llvm-ar
+	$(Echo) Building $(BuildMode) Bytecode Archive $(notdir $@)
+	$(Verb) $(RM) -f $@
+	$(Verb) $(LArchive) $@ $(ObjectsBC)
+
+endif
+
+clean-local::
+ifneq ($(strip $(LibName.BCA)),)
+	-$(Verb) $(RM) -f $(LibName.BCA)
+endif
+
+ifdef BYTECODE_DESTINATION
+BytecodeDestDir := $(BYTECODE_DESTINATION)
+else
+BytecodeDestDir := $(DESTDIR)$(PROJ_libdir)
+endif
+
+DestBytecodeLib = $(BytecodeDestDir)/lib$(LIBRARYNAME).bca
+
+install-bytecode-local:: $(DestBytecodeLib)
+
+ifdef NO_INSTALL
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) Uninstall circumvented with NO_INSTALL
+else
+install-local:: $(DestBytecodeLib)
+
+$(DestBytecodeLib): $(LibName.BCA) $(BytecodeDestDir)
+	$(Echo) Installing $(BuildMode) Bytecode Archive $(DestBytecodeLib)
+	$(Verb) $(DataInstall) $(LibName.BCA) $(DestBytecodeLib)
+
+uninstall-local::
+	$(Echo) Uninstalling $(BuildMode) Bytecode Archive $(DestBytecodeLib)
+	-$(Verb) $(RM) -f $(DestBytecodeLib)
+endif
+endif
+endif
+
+#---------------------------------------------------------
+# Library Targets:
+#   If neither BUILD_ARCHIVE or LOADABLE_MODULE are specified, default to
+#   building an archive.
+#---------------------------------------------------------
+ifndef NO_BUILD_ARCHIVE
+ifndef BUILD_ARCHIVE
+ifndef LOADABLE_MODULE
+BUILD_ARCHIVE = 1
+endif
+endif
+endif
+
+#---------------------------------------------------------
+# Archive Library Targets:
+#   If the user wanted a regular archive library built,
+#   then we provide targets for building them.
+#---------------------------------------------------------
+ifdef BUILD_ARCHIVE
+
+all-local:: $(LibName.A)
+
+$(LibName.A): $(ObjectsO) $(LibDir)/.dir
+	$(Echo) Building $(BuildMode) Archive Library $(notdir $@)
+	-$(Verb) $(RM) -f $@
+	$(Verb) $(Archive) $@ $(ObjectsO)
+	$(Verb) $(Ranlib) $@
+
+clean-local::
+ifneq ($(strip $(LibName.A)),)
+	-$(Verb) $(RM) -f $(LibName.A)
+endif
+
+ifdef NO_INSTALL
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) Uninstall circumvented with NO_INSTALL
+else
+ifdef NO_INSTALL_ARCHIVES
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) Uninstall circumvented with NO_INSTALL
+else
+DestArchiveLib := $(DESTDIR)$(PROJ_libdir)/lib$(LIBRARYNAME).a
+
+install-local:: $(DestArchiveLib)
+
+$(DestArchiveLib): $(LibName.A) $(DESTDIR)$(PROJ_libdir)
+	$(Echo) Installing $(BuildMode) Archive Library $(DestArchiveLib)
+	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_libdir)
+	$(Verb) $(INSTALL) $(LibName.A) $(DestArchiveLib)
+
+uninstall-local::
+	$(Echo) Uninstalling $(BuildMode) Archive Library $(DestArchiveLib)
+	-$(Verb) $(RM) -f $(DestArchiveLib)
+endif
+endif
+endif
+
+# endif LIBRARYNAME
+endif
+
+###############################################################################
+# Tool Build Rules: Build executable tool based on TOOLNAME option
+###############################################################################
+
+ifdef TOOLNAME
+
+#---------------------------------------------------------
+# Set up variables for building a tool.
+#---------------------------------------------------------
+TOOLEXENAME := $(strip $(TOOLNAME))$(EXEEXT)
+ifdef EXAMPLE_TOOL
+ToolBuildPath   := $(ExmplDir)/$(TOOLEXENAME)
+else
+ToolBuildPath   := $(ToolDir)/$(TOOLEXENAME)
+endif
+
+# TOOLALIAS is a name to symlink (or copy) the tool to.
+ifdef TOOLALIAS
+ifdef EXAMPLE_TOOL
+ToolAliasBuildPath   := $(ExmplDir)/$(strip $(TOOLALIAS))$(EXEEXT)
+else
+ToolAliasBuildPath   := $(ToolDir)/$(strip $(TOOLALIAS))$(EXEEXT)
+endif
+endif
+
+#---------------------------------------------------------
+# Prune Exports
+#---------------------------------------------------------
+
+# If the tool opts in with TOOL_NO_EXPORTS, optimize startup time of the app by
+# not exporting all of the weak symbols from the binary.  This reduces dyld
+# startup time by 4x on darwin in some cases.
+ifdef TOOL_NO_EXPORTS
+ifeq ($(HOST_OS),Darwin)
+
+# Tiger tools don't support this.
+ifneq ($(DARWIN_MAJVERS),4)
+LD.Flags += -Wl,-exported_symbol,_main
+endif
+endif
+
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux NetBSD FreeBSD))
+ifneq ($(ARCH), Mips)
+  LD.Flags += -Wl,--version-script=$(LLVM_SRC_ROOT)/autoconf/ExportMap.map
+endif
+endif
+endif
+
+#---------------------------------------------------------
+# Tool Order File Support
+#---------------------------------------------------------
+
+ifeq ($(HOST_OS),Darwin)
+ifdef TOOL_ORDER_FINE
+
+LD.Flags += -Wl,-order_file,$(TOOL_ORDER_FILE)
+
+endif
+endif
+
+#---------------------------------------------------------
+# Tool Version Info Support
+#---------------------------------------------------------
+
+ifeq ($(HOST_OS),Darwin)
+ifdef TOOL_INFO_PLIST
+
+LD.Flags += -Wl,-sectcreate,__TEXT,__info_plist,$(ObjDir)/$(TOOL_INFO_PLIST)
+
+$(ToolBuildPath): $(ObjDir)/$(TOOL_INFO_PLIST)
+
+$(ObjDir)/$(TOOL_INFO_PLIST): $(PROJ_SRC_DIR)/$(TOOL_INFO_PLIST).in $(ObjDir)/.dir
+	$(Echo) "Creating $(TOOLNAME) '$(TOOL_INFO_PLIST)' file..."
+	$(Verb)sed -e "s#@TOOL_INFO_UTI@#$(TOOL_INFO_UTI)#g" \
+	           -e "s#@TOOL_INFO_NAME@#$(TOOL_INFO_NAME)#g" \
+	           -e "s#@TOOL_INFO_VERSION@#$(TOOL_INFO_VERSION)#g" \
+	         -e "s#@TOOL_INFO_BUILD_VERSION@#$(TOOL_INFO_BUILD_VERSION)#g" \
+	           $< > $@
+
+endif
+endif
+
+#---------------------------------------------------------
+# Provide targets for building the tools
+#---------------------------------------------------------
+all-local:: $(ToolBuildPath) $(ToolAliasBuildPath)
+
+clean-local::
+ifneq ($(strip $(ToolBuildPath)),)
+	-$(Verb) $(RM) -f $(ToolBuildPath)
+endif
+ifneq ($(strip $(ToolAliasBuildPath)),)
+	-$(Verb) $(RM) -f $(ToolAliasBuildPath)
+endif
+
+ifdef EXAMPLE_TOOL
+$(ToolBuildPath): $(ExmplDir)/.dir
+else
+$(ToolBuildPath): $(ToolDir)/.dir
+endif
+
+$(ToolBuildPath): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths)
+	$(Echo) Linking $(BuildMode) executable $(TOOLNAME) $(StripWarnMsg)
+	$(Verb) $(Link) -o $@ $(TOOLLINKOPTS) $(ObjectsO) $(ProjLibsOptions) \
+	$(LLVMLibsOptions) $(ExtraLibs) $(TOOLLINKOPTSB) $(LIBS)
+	$(Echo) ======= Finished Linking $(BuildMode) Executable $(TOOLNAME) \
+          $(StripWarnMsg)
+
+ifneq ($(strip $(ToolAliasBuildPath)),)
+$(ToolAliasBuildPath): $(ToolBuildPath)
+	$(Echo) Creating $(BuildMode) Alias $(TOOLALIAS) $(StripWarnMsg)
+	$(Verb) $(RM) -f $(ToolAliasBuildPath)
+	$(Verb) $(AliasTool) $(TOOLEXENAME) $(ToolAliasBuildPath)
+	$(Echo) ======= Finished Creating $(BuildMode) Alias $(TOOLALIAS) \
+          $(StripWarnMsg)
+endif
+
+ifdef NO_INSTALL
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) Uninstall circumvented with NO_INSTALL
+else
+DestTool = $(DESTDIR)$(PROJ_bindir)/$(TOOLEXENAME)
+
+install-local:: $(DestTool)
+
+$(DestTool): $(ToolBuildPath) $(DESTDIR)$(PROJ_bindir)
+	$(Echo) Installing $(BuildMode) $(DestTool)
+	$(Verb) $(ProgInstall) $(ToolBuildPath) $(DestTool)
+
+uninstall-local::
+	$(Echo) Uninstalling $(BuildMode) $(DestTool)
+	-$(Verb) $(RM) -f $(DestTool)
+
+# TOOLALIAS install.
+ifdef TOOLALIAS
+DestToolAlias = $(DESTDIR)$(PROJ_bindir)/$(TOOLALIAS)$(EXEEXT)
+
+install-local:: $(DestToolAlias)
+
+$(DestToolAlias): $(DestTool)
+	$(Echo) Installing $(BuildMode) $(DestToolAlias)
+	$(Verb) $(RM) -f $(DestToolAlias)
+	$(Verb) $(AliasTool) $(TOOLEXENAME) $(DestToolAlias)
+
+uninstall-local::
+	$(Echo) Uninstalling $(BuildMode) $(DestToolAlias)
+	-$(Verb) $(RM) -f $(DestToolAlias)
+endif
+
+endif
+endif
+
+###############################################################################
+# Object Build Rules: Build object files based on sources
+###############################################################################
+
+# FIXME: This should be checking for "if not GCC or ICC", not for "if HP-UX"
+ifeq ($(HOST_OS),HP-UX)
+  DISABLE_AUTO_DEPENDENCIES=1
+endif
+
+# Provide rule sets for when dependency generation is enabled
+ifndef DISABLE_AUTO_DEPENDENCIES
+
+#---------------------------------------------------------
+# Create .o files in the ObjDir directory from the .cpp and .c files...
+#---------------------------------------------------------
+
+DEPEND_OPTIONS = -MMD -MP -MF "$(ObjDir)/$*.d.tmp" \
+         -MT "$(ObjDir)/$*.o" -MT "$(ObjDir)/$*.d"
+
+# If the build succeeded, move the dependency file over, otherwise
+# remove it.
+DEPEND_MOVEFILE = then $(MV) -f "$(ObjDir)/$*.d.tmp" "$(ObjDir)/$*.d"; \
+                  else $(RM) "$(ObjDir)/$*.d.tmp"; exit 1; fi
+
+$(ObjDir)/%.o: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_MAKEFILE)
+	$(Echo) "Compiling $*.cpp for $(BuildMode) build" $(PIC_FLAG)
+	$(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
+	        $(DEPEND_MOVEFILE)
+
+$(ObjDir)/%.o: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_MAKEFILE)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build" $(PIC_FLAG)
+	$(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
+	        $(DEPEND_MOVEFILE)
+
+$(ObjDir)/%.o: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_MAKEFILE)
+	$(Echo) "Compiling $*.cc for $(BuildMode) build" $(PIC_FLAG)
+	$(Verb) if $(Compile.CXX) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
+	        $(DEPEND_MOVEFILE)
+
+$(ObjDir)/%.o: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_MAKEFILE)
+	$(Echo) "Compiling $*.c for $(BuildMode) build" $(PIC_FLAG)
+	$(Verb) if $(Compile.C) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
+	        $(DEPEND_MOVEFILE)
+
+$(ObjDir)/%.o: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(PROJ_MAKEFILE)
+	$(Echo) "Compiling $*.m for $(BuildMode) build" $(PIC_FLAG)
+	$(Verb) if $(Compile.C) $(DEPEND_OPTIONS) $< -o $(ObjDir)/$*.o ; \
+	        $(DEPEND_MOVEFILE)
+
+#---------------------------------------------------------
+# Create .bc files in the ObjDir directory from .cpp .cc and .c files...
+#---------------------------------------------------------
+
+BC_DEPEND_OPTIONS = -MMD -MP -MF "$(ObjDir)/$*.bc.d.tmp" \
+	-MT "$(ObjDir)/$*.ll" -MT "$(ObjDir)/$*.bc.d"
+
+# If the build succeeded, move the dependency file over, otherwise
+# remove it.
+BC_DEPEND_MOVEFILE = then $(MV) -f "$(ObjDir)/$*.bc.d.tmp" "$(ObjDir)/$*.bc.d"; \
+                     else $(RM) "$(ObjDir)/$*.bc.d.tmp"; exit 1; fi
+
+$(ObjDir)/%.ll: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
+	$(Echo) "Compiling $*.cpp for $(BuildMode) build (bytecode)"
+	$(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \
+			$< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \
+	        $(BC_DEPEND_MOVEFILE)
+
+$(ObjDir)/%.ll: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build (bytecode)"
+	$(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \
+			$< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \
+	        $(BC_DEPEND_MOVEFILE)
+
+$(ObjDir)/%.ll: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
+	$(Echo) "Compiling $*.cc for $(BuildMode) build (bytecode)"
+	$(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \
+			$< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \
+	        $(BC_DEPEND_MOVEFILE)
+
+$(ObjDir)/%.ll: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
+	$(Echo) "Compiling $*.c for $(BuildMode) build (bytecode)"
+	$(Verb) if $(BCCompile.C) $(BC_DEPEND_OPTIONS) \
+			$< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \
+	        $(BC_DEPEND_MOVEFILE)
+
+$(ObjDir)/%.ll: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
+	$(Echo) "Compiling $*.m for $(BuildMode) build (bytecode)"
+	$(Verb) if $(BCCompile.C) $(BC_DEPEND_OPTIONS) \
+			$< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \
+	        $(BC_DEPEND_MOVEFILE)
+
+# Provide alternate rule sets if dependencies are disabled
+else
+
+$(ObjDir)/%.o: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.cpp for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.CXX) $< -o $@
+
+$(ObjDir)/%.o: %.mm $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.CXX) $< -o $@
+
+$(ObjDir)/%.o: %.cc $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.cc for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.CXX) $< -o $@
+
+$(ObjDir)/%.o: %.c $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.c for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.C) $< -o $@
+
+$(ObjDir)/%.o: %.m $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.m for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.C) $< -o $@
+
+$(ObjDir)/%.ll: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
+	$(Echo) "Compiling $*.cpp for $(BuildMode) build (bytecode)"
+	$(BCCompile.CXX) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG)
+
+$(ObjDir)/%.ll: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build (bytecode)"
+	$(BCCompile.CXX) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG)
+
+$(ObjDir)/%.ll: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX)
+	$(Echo) "Compiling $*.cc for $(BuildMode) build (bytecode)"
+	$(BCCompile.CXX) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG)
+
+$(ObjDir)/%.ll: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
+	$(Echo) "Compiling $*.c for $(BuildMode) build (bytecode)"
+	$(BCCompile.C) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG)
+
+$(ObjDir)/%.ll: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC)
+	$(Echo) "Compiling $*.m for $(BuildMode) build (bytecode)"
+	$(BCCompile.C) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG)
+
+endif
+
+
+## Rules for building preprocessed (.i/.ii) outputs.
+$(BuildMode)/%.ii: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.cpp for $(BuildMode) build to .ii file"
+	$(Verb) $(Preprocess.CXX) $< -o $@
+
+$(BuildMode)/%.ii: %.mm $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.mm for $(BuildMode) build to .ii file"
+	$(Verb) $(Preprocess.CXX) $< -o $@
+
+$(BuildMode)/%.ii: %.cc $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.cc for $(BuildMode) build to .ii file"
+	$(Verb) $(Preprocess.CXX) $< -o $@
+
+$(BuildMode)/%.i: %.c $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.c for $(BuildMode) build to .i file"
+	$(Verb) $(Preprocess.C) $< -o $@
+
+$(BuildMode)/%.i: %.m $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.m for $(BuildMode) build to .i file"
+	$(Verb) $(Preprocess.C) $< -o $@
+
+
+$(ObjDir)/%.s: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.cpp to asm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.CXX) $< -o $@ -S
+
+$(ObjDir)/%.s: %.mm $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.mm to asm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.CXX) $< -o $@ -S
+
+$(ObjDir)/%.s: %.cc $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.cc to asm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.CXX) $< -o $@ -S
+
+$(ObjDir)/%.s: %.c $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.c to asm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.C) $< -o $@ -S
+
+$(ObjDir)/%.s: %.m $(ObjDir)/.dir $(BUILT_SOURCES)
+	$(Echo) "Compiling $*.m to asm for $(BuildMode) build" $(PIC_FLAG)
+	$(Compile.C) $< -o $@ -S
+
+
+# make the C and C++ compilers strip debug info out of bytecode libraries.
+ifdef DEBUG_RUNTIME
+$(ObjectsBC): $(ObjDir)/%.bc: $(ObjDir)/%.ll $(LOPT)
+	$(Echo) "Compiling $*.ll to $*.bc for $(BuildMode) build (bytecode)"
+	$(Verb) $(LOPT) $< -std-compile-opts -o $@
+else
+$(ObjectsBC): $(ObjDir)/%.bc: $(ObjDir)/%.ll $(LOPT)
+	$(Echo) "Compiling $*.ll to $*.bc for $(BuildMode) build (bytecode)"
+	$(Verb) $(LOPT) $< -std-compile-opts -strip-debug -o $@
+endif
+
+
+#---------------------------------------------------------
+# Provide rule to build .bc files from .ll sources,
+# regardless of dependencies
+#---------------------------------------------------------
+$(ObjDir)/%.bc: %.ll $(ObjDir)/.dir $(LLVMAS)
+	$(Echo) "Compiling $*.ll for $(BuildMode) build"
+	$(Verb) $(LLVMAS) $< -f -o $@
+
+###############################################################################
+# TABLEGEN: Provide rules for running tblgen to produce *.inc files
+###############################################################################
+
+ifdef TARGET
+TABLEGEN_INC_FILES_COMMON = 1
+endif
+
+ifdef LLVMC_BASED_DRIVER
+TABLEGEN_INC_FILES_COMMON = 1
+endif
+
+ifdef TABLEGEN_INC_FILES_COMMON
+
+INCFiles := $(filter %.inc,$(BUILT_SOURCES))
+INCTMPFiles := $(INCFiles:%=$(ObjDir)/%.tmp)
+.PRECIOUS: $(INCTMPFiles) $(INCFiles)
+
+# INCFiles rule: All of the tblgen generated files are emitted to
+# $(ObjDir)/%.inc.tmp, instead of emitting them directly to %.inc.  This allows
+# us to only "touch" the real file if the contents of it change.  IOW, if
+# tblgen is modified, all of the .inc.tmp files are regenerated, but no
+# dependencies of the .inc files are, unless the contents of the .inc file
+# changes.
+$(INCFiles) : %.inc : $(ObjDir)/%.inc.tmp
+	$(Verb) $(CMP) -s $@ $< || $(CP) $< $@
+
+endif # TABLEGEN_INC_FILES_COMMON
+
+ifdef TARGET
+
+TDFiles := $(strip $(wildcard $(PROJ_SRC_DIR)/*.td) \
+           $(LLVM_SRC_ROOT)/include/llvm/Target/Target.td \
+           $(LLVM_SRC_ROOT)/include/llvm/Target/TargetCallingConv.td \
+           $(LLVM_SRC_ROOT)/include/llvm/Target/TargetSchedule.td \
+           $(LLVM_SRC_ROOT)/include/llvm/Target/TargetSelectionDAG.td \
+           $(LLVM_SRC_ROOT)/include/llvm/CodeGen/ValueTypes.td) \
+           $(wildcard $(LLVM_SRC_ROOT)/include/llvm/Intrinsics*.td)
+
+# All of these files depend on tblgen and the .td files.
+$(INCTMPFiles) : $(TBLGEN) $(TDFiles)
+
+$(TARGET:%=$(ObjDir)/%GenRegisterNames.inc.tmp): \
+$(ObjDir)/%GenRegisterNames.inc.tmp : %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) register names with tblgen"
+	$(Verb) $(TableGen) -gen-register-enums -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenRegisterInfo.h.inc.tmp): \
+$(ObjDir)/%GenRegisterInfo.h.inc.tmp : %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) register information header with tblgen"
+	$(Verb) $(TableGen) -gen-register-desc-header -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenRegisterInfo.inc.tmp): \
+$(ObjDir)/%GenRegisterInfo.inc.tmp : %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) register info implementation with tblgen"
+	$(Verb) $(TableGen) -gen-register-desc -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenInstrNames.inc.tmp): \
+$(ObjDir)/%GenInstrNames.inc.tmp : %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) instruction names with tblgen"
+	$(Verb) $(TableGen) -gen-instr-enums -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenInstrInfo.inc.tmp): \
+$(ObjDir)/%GenInstrInfo.inc.tmp : %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) instruction information with tblgen"
+	$(Verb) $(TableGen) -gen-instr-desc -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenAsmWriter.inc.tmp): \
+$(ObjDir)/%GenAsmWriter.inc.tmp : %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) assembly writer with tblgen"
+	$(Verb) $(TableGen) -gen-asm-writer -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenAsmWriter1.inc.tmp): \
+$(ObjDir)/%GenAsmWriter1.inc.tmp : %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) assembly writer #1 with tblgen"
+	$(Verb) $(TableGen) -gen-asm-writer -asmwriternum=1 -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenAsmMatcher.inc.tmp): \
+$(ObjDir)/%GenAsmMatcher.inc.tmp : %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) assembly matcher with tblgen"
+	$(Verb) $(TableGen) -gen-asm-matcher -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenMCCodeEmitter.inc.tmp): \
+$(ObjDir)/%GenMCCodeEmitter.inc.tmp: %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) MC code emitter with tblgen"
+	$(Verb) $(TableGen) -gen-emitter -mc-emitter -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenCodeEmitter.inc.tmp): \
+$(ObjDir)/%GenCodeEmitter.inc.tmp: %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) code emitter with tblgen"
+	$(Verb) $(TableGen) -gen-emitter -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenDAGISel.inc.tmp): \
+$(ObjDir)/%GenDAGISel.inc.tmp : %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) DAG instruction selector implementation with tblgen"
+	$(Verb) $(TableGen) -gen-dag-isel -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenDisassemblerTables.inc.tmp): \
+$(ObjDir)/%GenDisassemblerTables.inc.tmp : %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) disassembly tables with tblgen"
+	$(Verb) $(TableGen) -gen-disassembler -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenEDInfo.inc.tmp): \
+$(ObjDir)/%GenEDInfo.inc.tmp : %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) enhanced disassembly information with tblgen"
+	$(Verb) $(TableGen) -gen-enhanced-disassembly-info -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenFastISel.inc.tmp): \
+$(ObjDir)/%GenFastISel.inc.tmp : %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) \"fast\" instruction selector implementation with tblgen"
+	$(Verb) $(TableGen) -gen-fast-isel -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenSubtarget.inc.tmp): \
+$(ObjDir)/%GenSubtarget.inc.tmp : %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) subtarget information with tblgen"
+	$(Verb) $(TableGen) -gen-subtarget -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenCallingConv.inc.tmp): \
+$(ObjDir)/%GenCallingConv.inc.tmp : %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) calling convention information with tblgen"
+	$(Verb) $(TableGen) -gen-callingconv -o $(call SYSPATH, $@) $<
+
+$(TARGET:%=$(ObjDir)/%GenIntrinsics.inc.tmp): \
+$(ObjDir)/%GenIntrinsics.inc.tmp : %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) intrinsics information with tblgen"
+	$(Verb) $(TableGen) -gen-tgt-intrinsic -o $(call SYSPATH, $@) $<
+
+$(ObjDir)/ARMGenDecoderTables.inc.tmp : ARM.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) decoder tables with tblgen"
+	$(Verb) $(TableGen) -gen-arm-decoder -o $(call SYSPATH, $@) $<
+
+
+clean-local::
+	-$(Verb) $(RM) -f $(INCFiles)
+
+endif # TARGET
+
+ifdef LLVMC_BASED_DRIVER
+
+TDSrc := $(sort $(strip $(wildcard $(PROJ_SRC_DIR)/*.td)) \
+		$(strip $(wildcard $(PROJ_OBJ_DIR)/*.td)))
+
+TDCommon := $(strip $(wildcard \
+		$(LLVM_SRC_ROOT)/include/llvm/CompilerDriver/*.td))
+
+TDFiles := $(TDSrc) $(TDCommon)
+
+$(INCTMPFiles) : $(TBLGEN) $(TDFiles)
+
+$(ObjDir)/%.inc.tmp: %.td $(ObjDir)/.dir
+	$(Echo) "Building LLVMC compilation graph description with tblgen"
+	$(Verb) $(TableGen) -gen-llvmc -o $(call SYSPATH, $@) $<
+
+clean-local::
+	-$(Verb) $(RM) -f $(INCFiles)
+
+endif # LLVMC_BASED_DRIVER
+
+###############################################################################
+# OTHER RULES: Other rules needed
+###############################################################################
+
+# To create postscript files from dot files...
+ifneq ($(DOT),false)
+%.ps: %.dot
+	$(DOT) -Tps < $< > $@
+else
+%.ps: %.dot
+	$(Echo) "Cannot build $@: The program dot is not installed"
+endif
+
+# This rules ensures that header files that are removed still have a rule for
+# which they can be "generated."  This allows make to ignore them and
+# reproduce the dependency lists.
+%.h:: ;
+%.hpp:: ;
+
+# Define clean-local to clean the current directory. Note that this uses a
+# very conservative approach ensuring that empty variables do not cause
+# errors or disastrous removal.
+clean-local::
+ifneq ($(strip $(ObjRootDir)),)
+	-$(Verb) $(RM) -rf $(ObjRootDir)
+endif
+	-$(Verb) $(RM) -f core core.[0-9][0-9]* *.o *.d *~ *.flc
+ifneq ($(strip $(SHLIBEXT)),) # Extra paranoia - make real sure SHLIBEXT is set
+	-$(Verb) $(RM) -f *$(SHLIBEXT)
+endif
+
+clean-all-local::
+	-$(Verb) $(RM) -rf Debug Release Profile
+
+
+###############################################################################
+# DEPENDENCIES: Include the dependency files if we should
+###############################################################################
+ifndef DISABLE_AUTO_DEPENDENCIES
+
+# If its not one of the cleaning targets
+ifndef IS_CLEANING_TARGET
+
+# Get the list of dependency files
+DependSourceFiles := $(basename $(filter %.cpp %.c %.cc %.m %.mm, $(Sources)))
+DependFiles := $(DependSourceFiles:%=$(PROJ_OBJ_DIR)/$(BuildMode)/%.d)
+
+# Include bitcode dependency files if using bitcode libraries
+ifdef BYTECODE_LIBRARY
+DependFiles += $(DependSourceFiles:%=$(PROJ_OBJ_DIR)/$(BuildMode)/%.bc.d)
+endif
+
+-include $(DependFiles) ""
+
+endif
+
+endif
+
+###############################################################################
+# CHECK: Running the test suite
+###############################################################################
+
+check::
+	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/test" ; then \
+	  if test -f "$(PROJ_OBJ_ROOT)/test/Makefile" ; then \
+	    $(EchoCmd) Running test suite ; \
+	    $(MAKE) -C $(PROJ_OBJ_ROOT)/test check-local \
+	      TESTSUITE=$(TESTSUITE) ; \
+	  else \
+	    $(EchoCmd) No Makefile in test directory ; \
+	  fi ; \
+	else \
+	  $(EchoCmd) No test directory ; \
+	fi
+
+check-lit:: check
+
+check-dg::
+	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/test" ; then \
+	  if test -f "$(PROJ_OBJ_ROOT)/test/Makefile" ; then \
+	    $(EchoCmd) Running test suite ; \
+	    $(MAKE) -C $(PROJ_OBJ_ROOT)/test check-local-dg ; \
+	  else \
+	    $(EchoCmd) No Makefile in test directory ; \
+	  fi ; \
+	else \
+	  $(EchoCmd) No test directory ; \
+	fi
+
+check-all::
+	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/test" ; then \
+	  if test -f "$(PROJ_OBJ_ROOT)/test/Makefile" ; then \
+	    $(EchoCmd) Running test suite ; \
+	    $(MAKE) -C $(PROJ_OBJ_ROOT)/test check-local-all ; \
+	  else \
+	    $(EchoCmd) No Makefile in test directory ; \
+	  fi ; \
+	else \
+	  $(EchoCmd) No test directory ; \
+	fi
+
+###############################################################################
+# UNITTESTS: Running the unittests test suite
+###############################################################################
+
+unittests::
+	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/unittests" ; then \
+	  if test -f "$(PROJ_OBJ_ROOT)/unittests/Makefile" ; then \
+	    $(EchoCmd) Running unittests test suite ; \
+	    $(MAKE) -C $(PROJ_OBJ_ROOT)/unittests unitcheck; \
+	  else \
+	    $(EchoCmd) No Makefile in unittests directory ; \
+	  fi ; \
+	else \
+	  $(EchoCmd) No unittests directory ; \
+	fi
+
+###############################################################################
+# DISTRIBUTION: Handle construction of a distribution tarball
+###############################################################################
+
+#------------------------------------------------------------------------
+# Define distribution related variables
+#------------------------------------------------------------------------
+DistName    := $(PROJECT_NAME)-$(PROJ_VERSION)
+DistDir     := $(PROJ_OBJ_ROOT)/$(DistName)
+TopDistDir  := $(PROJ_OBJ_ROOT)/$(DistName)
+DistTarGZip := $(PROJ_OBJ_ROOT)/$(DistName).tar.gz
+DistZip     := $(PROJ_OBJ_ROOT)/$(DistName).zip
+DistTarBZ2  := $(PROJ_OBJ_ROOT)/$(DistName).tar.bz2
+DistAlways  := CREDITS.TXT LICENSE.TXT README.txt README AUTHORS COPYING \
+	       ChangeLog INSTALL NEWS Makefile Makefile.common Makefile.rules \
+	       Makefile.config.in configure autoconf
+DistOther   := $(notdir $(wildcard \
+               $(PROJ_SRC_DIR)/*.h \
+               $(PROJ_SRC_DIR)/*.td \
+               $(PROJ_SRC_DIR)/*.def \
+               $(PROJ_SRC_DIR)/*.ll \
+               $(PROJ_SRC_DIR)/*.in))
+DistSubDirs := $(SubDirs)
+DistSources  = $(Sources) $(EXTRA_DIST)
+DistFiles    = $(DistAlways) $(DistSources) $(DistOther)
+
+#------------------------------------------------------------------------
+# We MUST build distribution with OBJ_DIR != SRC_DIR
+#------------------------------------------------------------------------
+ifeq ($(PROJ_SRC_DIR),$(PROJ_OBJ_DIR))
+dist dist-check dist-clean dist-gzip dist-bzip2 dist-zip ::
+	$(Echo) ERROR: Target $@ only available with OBJ_DIR != SRC_DIR
+
+else
+
+#------------------------------------------------------------------------
+# Prevent attempt to run dist targets from anywhere but the top level
+#------------------------------------------------------------------------
+ifneq ($(LEVEL),.)
+dist dist-check dist-clean dist-gzip dist-bzip2 dist-zip ::
+	$(Echo) ERROR: You must run $@ from $(PROJ_OBJ_ROOT)
+else
+
+#------------------------------------------------------------------------
+# Provide the top level targets
+#------------------------------------------------------------------------
+
+dist-gzip:: $(DistTarGZip)
+
+$(DistTarGZip) : $(TopDistDir)/.makedistdir
+	$(Echo) Packing gzipped distribution tar file.
+	$(Verb) cd $(PROJ_OBJ_ROOT) ; $(TAR) chf - "$(DistName)" | \
+	  $(GZIP) -c > "$(DistTarGZip)"
+
+dist-bzip2:: $(DistTarBZ2)
+
+$(DistTarBZ2) : $(TopDistDir)/.makedistdir
+	$(Echo) Packing bzipped distribution tar file.
+	$(Verb) cd $(PROJ_OBJ_ROOT) ; $(TAR) chf - $(DistName) | \
+	  $(BZIP2) -c >$(DistTarBZ2)
+
+dist-zip:: $(DistZip)
+
+$(DistZip) : $(TopDistDir)/.makedistdir
+	$(Echo) Packing zipped distribution file.
+	$(Verb) rm -f $(DistZip)
+	$(Verb) cd $(PROJ_OBJ_ROOT) ; $(ZIP) -rq $(DistZip) $(DistName)
+
+dist :: $(DistTarGZip) $(DistTarBZ2) $(DistZip)
+	$(Echo) ===== DISTRIBUTION PACKAGING SUCESSFUL =====
+
+DistCheckDir := $(PROJ_OBJ_ROOT)/_distcheckdir
+
+dist-check:: $(DistTarGZip)
+	$(Echo) Checking distribution tar file.
+	$(Verb) if test -d $(DistCheckDir) ; then \
+	  $(RM) -rf $(DistCheckDir) ; \
+	fi
+	$(Verb) $(MKDIR) $(DistCheckDir)
+	$(Verb) cd $(DistCheckDir) && \
+	  $(MKDIR) $(DistCheckDir)/build && \
+	  $(MKDIR) $(DistCheckDir)/install && \
+	  gunzip -c $(DistTarGZip) | $(TAR) xf - && \
+	  cd build && \
+	  ../$(DistName)/configure --prefix="$(DistCheckDir)/install" \
+	    --srcdir=../$(DistName) $(DIST_CHECK_CONFIG_OPTIONS) && \
+	  $(MAKE) all && \
+	  $(MAKE) check && \
+	  $(MAKE) unittests && \
+	  $(MAKE) install && \
+	  $(MAKE) uninstall && \
+	  $(MAKE) dist-clean && \
+	  $(EchoCmd) ===== $(DistTarGZip) Ready For Distribution =====
+
+dist-clean::
+	$(Echo) Cleaning distribution files
+	-$(Verb) $(RM) -rf $(DistTarGZip) $(DistTarBZ2) $(DistZip) $(DistName) \
+	  $(DistCheckDir)
+
+endif
+
+#------------------------------------------------------------------------
+# Provide the recursive distdir target for building the distribution directory
+#------------------------------------------------------------------------
+distdir: $(DistDir)/.makedistdir
+$(DistDir)/.makedistdir: $(DistSources)
+	$(Verb) if test "$(DistDir)" = "$(TopDistDir)" ; then \
+	  if test -d "$(DistDir)" ; then \
+	    find $(DistDir) -type d ! -perm -200 -exec chmod u+w {} ';'  || \
+	      exit 1 ; \
+	  fi ; \
+	  $(EchoCmd) Removing old $(DistDir) ; \
+	  $(RM) -rf $(DistDir); \
+	  $(EchoCmd) Making 'all' to verify build ; \
+	  $(MAKE) ENABLE_OPTIMIZED=1 all ; \
+	fi
+	$(Echo) Building Distribution Directory $(DistDir)
+	$(Verb) $(MKDIR) $(DistDir)
+	$(Verb) srcdirstrip=`echo "$(PROJ_SRC_DIR)" | sed 's|.|.|g'`; \
+	srcrootstrip=`echo "$(PROJ_SRC_ROOT)" | sed 's|.|.|g'`; \
+	for file in $(DistFiles) ; do \
+	  case "$$file" in \
+	    $(PROJ_SRC_DIR)/*) \
+	      file=`echo "$$file" | sed "s#^$$srcdirstrip/##"` \
+	      ;; \
+	    $(PROJ_SRC_ROOT)/*) \
+	      file=`echo "$$file" | \
+		sed "s#^$$srcrootstrip/##"` \
+	      ;; \
+	  esac; \
+	  if test -f "$(PROJ_SRC_DIR)/$$file" || \
+	     test -d "$(PROJ_SRC_DIR)/$$file" ; then \
+	    from_dir="$(PROJ_SRC_DIR)" ; \
+	  elif test -f "$$file" || test -d "$$file" ; then \
+	    from_dir=. ; \
+	  fi ; \
+	  to_dir=`echo "$$file" | sed -e 's#/[^/]*$$##'` ; \
+	  if test "$$to_dir" != "$$file" && test "$$to_dir" != "."; then \
+	    to_dir="$(DistDir)/$$dir"; \
+	    $(MKDIR) "$$to_dir" ; \
+	  else \
+	    to_dir="$(DistDir)"; \
+	  fi; \
+	  mid_dir=`echo "$$file" | sed -n -e 's#^\(.*\)/[^/]*$$#\1#p'`; \
+	  if test -n "$$mid_dir" ; then \
+            $(MKDIR) "$$to_dir/$$mid_dir" || exit 1; \
+          fi ; \
+	  if test -d "$$from_dir/$$file"; then \
+	    if test -d "$(PROJ_SRC_DIR)/$$file" && \
+	       test "$$from_dir" != "$(PROJ_SRC_DIR)" ; then \
+	       cd $(PROJ_SRC_DIR) ; \
+	       $(TAR) cf - $$file --exclude .svn --exclude CVS | \
+	         ( cd $$to_dir ; $(TAR) xf - ) ; \
+	       cd $(PROJ_OBJ_DIR) ; \
+	    else \
+	       cd $$from_dir ; \
+	       $(TAR) cf - $$file --exclude .svn --exclude CVS | \
+	         ( cd $$to_dir ; $(TAR) xf - ) ; \
+	       cd $(PROJ_OBJ_DIR) ; \
+	    fi; \
+	  elif test -f "$$from_dir/$$file" ; then \
+	    $(CP) -p "$$from_dir/$$file" "$(DistDir)/$$file" || exit 1; \
+	  elif test -L "$$from_dir/$$file" ; then \
+	    $(CP) -pd "$$from_dir/$$file" $(DistDir)/$$file || exit 1; \
+	  elif echo "$(DistAlways)" | grep -v "$$file" >/dev/null ; then \
+	    $(EchoCmd) "===== WARNING: Distribution Source " \
+	      "$$from_dir/$$file Not Found!" ; \
+	  elif test "$(Verb)" != '@' ; then \
+	    $(EchoCmd) "Skipping non-existent $$from_dir/$$file" ; \
+	  fi; \
+	done
+	$(Verb) for subdir in $(DistSubDirs) ; do \
+	  if test "$$subdir" \!= "." ; then \
+	    new_distdir="$(DistDir)/$$subdir" ; \
+	    test -d "$$new_distdir" || $(MKDIR) "$$new_distdir" || exit 1; \
+	    ( cd $$subdir && $(MAKE) ENABLE_OPTIMIZED=1 \
+	      DistDir="$$new_distdir" distdir ) || exit 1; \
+	  fi; \
+	done
+	$(Verb) if test "$(DistDir)" = "$(TopDistDir)" ; then \
+	  $(EchoCmd) Eliminating CVS/.svn directories from distribution ; \
+	  $(RM) -rf `find $(TopDistDir) -type d \( -name CVS -o \
+                                  -name .svn \) -print` ;\
+	  $(MAKE) dist-hook ; \
+	  $(FIND) $(TopDistDir) -type d ! -perm -777 -exec chmod a+rwx {} \; \
+	    -o ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; \
+	    -o ! -type d ! -perm -400 -exec chmod a+r {} \; \
+	    -o ! -type d ! -perm -444 -exec \
+	      $(SHELL) $(INSTALL_SH) -c -m a+r {} {} \; \
+	    || chmod -R a+r $(DistDir) ; \
+	fi
+
+# This is invoked by distdir target, define it as a no-op to avoid errors if not
+# defined by user.
+dist-hook::
+
+endif
+
+###############################################################################
+# TOP LEVEL - targets only to apply at the top level directory
+###############################################################################
+
+ifeq ($(LEVEL),.)
+
+#------------------------------------------------------------------------
+# Install support for the project's include files:
+#------------------------------------------------------------------------
+ifdef NO_INSTALL
+install-local::
+	$(Echo) Install circumvented with NO_INSTALL
+uninstall-local::
+	$(Echo) Uninstall circumvented with NO_INSTALL
+else
+install-local::
+	$(Echo) Installing include files
+	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_includedir)
+	$(Verb) if test -d "$(PROJ_SRC_ROOT)/include" ; then \
+	  cd $(PROJ_SRC_ROOT)/include && \
+	  for hdr in `find . -type f \
+	      '(' -name LICENSE.TXT \
+	       -o -name '*.def' \
+	       -o -name '*.h' \
+	       -o -name '*.inc' \
+	       -o -name '*.td' \
+	      ')' -print | grep -v CVS | \
+	      grep -v .svn` ; do \
+	    instdir=`dirname "$(DESTDIR)$(PROJ_includedir)/$$hdr"` ; \
+	    if test \! -d "$$instdir" ; then \
+	      $(EchoCmd) Making install directory $$instdir ; \
+	      $(MKDIR) $$instdir ;\
+	    fi ; \
+	    $(DataInstall) $$hdr $(DESTDIR)$(PROJ_includedir)/$$hdr ; \
+	  done ; \
+	fi
+ifneq ($(PROJ_SRC_ROOT),$(PROJ_OBJ_ROOT))
+	$(Verb) if test -d "$(PROJ_OBJ_ROOT)/include" ; then \
+	  cd $(PROJ_OBJ_ROOT)/include && \
+	  for hdr in `find . -type f \
+	      '(' -name LICENSE.TXT \
+	       -o -name '*.def' \
+	       -o -name '*.h' \
+	       -o -name '*.inc' \
+	       -o -name '*.td' \
+	      ')' -print | grep -v CVS | \
+	      grep -v .svn` ; do \
+	    instdir=`dirname "$(DESTDIR)$(PROJ_includedir)/$$hdr"` ; \
+	    if test \! -d "$$instdir" ; then \
+	      $(EchoCmd) Making install directory $$instdir ; \
+	      $(MKDIR) $$instdir ;\
+	    fi ; \
+	    $(DataInstall) $$hdr $(DESTDIR)$(PROJ_includedir)/$$hdr ; \
+	  done ; \
+	fi
+endif
+
+uninstall-local::
+	$(Echo) Uninstalling include files
+	$(Verb) if [ -d "$(PROJ_SRC_ROOT)/include" ] ; then \
+	  cd $(PROJ_SRC_ROOT)/include && \
+	    $(RM) -f `find . -path '*/Internal' -prune -o '(' -type f \
+	      '!' '(' -name '*~' -o -name '.#*' \
+        -o -name '*.in' ')' -print ')' | \
+        grep -v CVS | sed 's#^#$(DESTDIR)$(PROJ_includedir)/#'` ; \
+	  cd $(PROJ_SRC_ROOT)/include && \
+	    $(RM) -f `find . -path '*/Internal' -prune -o '(' -type f -name '*.in' \
+      -print ')' | sed 's#\.in$$##;s#^#$(DESTDIR)$(PROJ_includedir)/#'` ; \
+	fi
+endif
+endif
+
+check-line-length:
+	@echo searching for overlength lines in files: $(Sources)
+	@echo
+	@echo
+	egrep -n '.{81}' $(Sources) /dev/null
+
+check-for-tabs:
+	@echo searching for tabs in files: $(Sources)
+	@echo
+	@echo
+	egrep -n '	' $(Sources) /dev/null
+
+check-footprint:
+	@ls -l $(LibDir) | awk '\
+	  BEGIN { sum = 0; } \
+	        { sum += $$5; } \
+	  END   { printf("Libraries: %6.3f MBytes\n", sum/(1024.0*1024.0)); }'
+	@ls -l $(ToolDir) | awk '\
+	  BEGIN { sum = 0; } \
+	        { sum += $$5; } \
+	  END   { printf("Programs:  %6.3f MBytes\n", sum/(1024.0*1024.0)); }'
+#------------------------------------------------------------------------
+# Print out the directories used for building
+#------------------------------------------------------------------------
+printvars::
+	$(Echo) "BuildMode    : " '$(BuildMode)'
+	$(Echo) "PROJ_SRC_ROOT: " '$(PROJ_SRC_ROOT)'
+	$(Echo) "PROJ_SRC_DIR : " '$(PROJ_SRC_DIR)'
+	$(Echo) "PROJ_OBJ_ROOT: " '$(PROJ_OBJ_ROOT)'
+	$(Echo) "PROJ_OBJ_DIR : " '$(PROJ_OBJ_DIR)'
+	$(Echo) "LLVM_SRC_ROOT: " '$(LLVM_SRC_ROOT)'
+	$(Echo) "LLVM_OBJ_ROOT: " '$(LLVM_OBJ_ROOT)'
+	$(Echo) "PROJ_prefix  : " '$(PROJ_prefix)'
+	$(Echo) "PROJ_bindir  : " '$(PROJ_bindir)'
+	$(Echo) "PROJ_libdir  : " '$(PROJ_libdir)'
+	$(Echo) "PROJ_etcdir  : " '$(PROJ_etcdir)'
+	$(Echo) "PROJ_includedir  : " '$(PROJ_includedir)'
+	$(Echo) "UserTargets  : " '$(UserTargets)'
+	$(Echo) "ObjMakefiles : " '$(ObjMakefiles)'
+	$(Echo) "SrcMakefiles : " '$(SrcMakefiles)'
+	$(Echo) "ObjDir       : " '$(ObjDir)'
+	$(Echo) "LibDir       : " '$(LibDir)'
+	$(Echo) "ToolDir      : " '$(ToolDir)'
+	$(Echo) "ExmplDir     : " '$(ExmplDir)'
+	$(Echo) "Sources      : " '$(Sources)'
+	$(Echo) "TDFiles      : " '$(TDFiles)'
+	$(Echo) "INCFiles     : " '$(INCFiles)'
+	$(Echo) "INCTMPFiles  : " '$(INCTMPFiles)'
+	$(Echo) "PreConditions: " '$(PreConditions)'
+	$(Echo) "Compile.CXX  : " '$(Compile.CXX)'
+	$(Echo) "Compile.C    : " '$(Compile.C)'
+	$(Echo) "Archive      : " '$(Archive)'
+	$(Echo) "YaccFiles    : " '$(YaccFiles)'
+	$(Echo) "LexFiles     : " '$(LexFiles)'
+	$(Echo) "Module       : " '$(Module)'
+	$(Echo) "FilesToConfig: " '$(FilesToConfigPATH)'
+	$(Echo) "SubDirs      : " '$(SubDirs)'
+	$(Echo) "ProjLibsPaths: " '$(ProjLibsPaths)'
+	$(Echo) "ProjLibsOptions: " '$(ProjLibsOptions)'
+
+###
+# Debugging
+
+# General debugging rule, use 'make dbg-print-XXX' to print the
+# definition, value and origin of XXX.
+make-print-%:
+	$(error PRINT: $(value $*) = "$($*)" (from $(origin $*)))
diff --git a/final/ModuleInfo.txt b/final/ModuleInfo.txt
new file mode 100644
index 00000000000..40607c71a94
--- /dev/null
+++ b/final/ModuleInfo.txt
@@ -0,0 +1,4 @@
+DepModule:
+BuildCmd: ./build-for-llvm-top.sh
+CleanCmd: make clean -C ../build.llvm
+InstallCmd: make install -C ../build.llvm
diff --git a/final/README.txt b/final/README.txt
new file mode 100644
index 00000000000..0dad9f5e0a6
--- /dev/null
+++ b/final/README.txt
@@ -0,0 +1,16 @@
+Low Level Virtual Machine (LLVM)
+================================
+
+This directory and its subdirectories contain source code for the Low Level
+Virtual Machine, a toolkit for the construction of highly optimized compilers,
+optimizers, and runtime environments.
+
+LLVM is open source software. You may freely distribute it under the terms of
+the license agreement found in LICENSE.txt.
+
+Please see the HTML documentation provided in docs/index.html for further
+assistance with LLVM.
+
+If you're writing a package for LLVM, see docs/Packaging.html for our
+suggestions.
+
diff --git a/final/autoconf/AutoRegen.sh b/final/autoconf/AutoRegen.sh
new file mode 100755
index 00000000000..7809667ac5f
--- /dev/null
+++ b/final/autoconf/AutoRegen.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+die() {
+  echo "$@" 1>&2
+  exit 1
+}
+
+clean() {
+  echo $1 | sed -e 's/\\//g'
+}
+
+### NOTE: ############################################################
+### These variables specify the tool versions we want to use.
+### Periods should be escaped with backslash for use by grep.
+###
+### If you update these, please also update docs/GettingStarted.html
+want_autoconf_version='2\.60'
+want_autoheader_version=$want_autoconf_version
+want_aclocal_version='1\.9\.6'
+want_libtool_version='1\.5\.22'
+### END NOTE #########################################################
+
+outfile=configure
+configfile=configure.ac
+
+want_autoconf_version_clean=$(clean $want_autoconf_version)
+want_autoheader_version_clean=$(clean $want_autoheader_version)
+want_aclocal_version_clean=$(clean $want_aclocal_version)
+want_libtool_version_clean=$(clean $want_libtool_version)
+
+test -d autoconf && test -f autoconf/$configfile && cd autoconf
+test -f $configfile || die "Can't find 'autoconf' dir; please cd into it first"
+autoconf --version | grep $want_autoconf_version > /dev/null
+test $? -eq 0 || die "Your autoconf was not detected as being $want_autoconf_version_clean"
+aclocal --version | grep '^aclocal.*'$want_aclocal_version > /dev/null
+test $? -eq 0 || die "Your aclocal was not detected as being $want_aclocal_version_clean"
+autoheader --version | grep '^autoheader.*'$want_autoheader_version > /dev/null
+test $? -eq 0 || die "Your autoheader was not detected as being $want_autoheader_version_clean"
+libtool --version | grep $want_libtool_version > /dev/null
+test $? -eq 0 || die "Your libtool was not detected as being $want_libtool_version_clean"
+echo ""
+echo "### NOTE: ############################################################"
+echo "### If you get *any* warnings from autoconf below you MUST fix the"
+echo "### scripts in the m4 directory because there are future forward"
+echo "### compatibility or platform support issues at risk. Please do NOT"
+echo "### commit any configure script that was generated with warnings"
+echo "### present. You should get just three 'Regenerating..' lines."
+echo "######################################################################"
+echo ""
+echo "Regenerating aclocal.m4 with aclocal $want_aclocal_version_clean"
+cwd=`pwd`
+aclocal --force -I $cwd/m4 || die "aclocal failed"
+echo "Regenerating configure with autoconf $want_autoconf_version_clean"
+autoconf --force --warnings=all -o ../$outfile $configfile || die "autoconf failed"
+cd ..
+echo "Regenerating config.h.in with autoheader $want_autoheader_version_clean"
+autoheader --warnings=all -I autoconf -I autoconf/m4 autoconf/$configfile || die "autoheader failed"
+exit 0
diff --git a/final/autoconf/ExportMap.map b/final/autoconf/ExportMap.map
new file mode 100644
index 00000000000..17b185fed91
--- /dev/null
+++ b/final/autoconf/ExportMap.map
@@ -0,0 +1,7 @@
+{
+	global: main;
+		__progname;
+		environ;
+
+	local: *;
+};
diff --git a/final/autoconf/LICENSE.TXT b/final/autoconf/LICENSE.TXT
new file mode 100644
index 00000000000..72fdd39edcc
--- /dev/null
+++ b/final/autoconf/LICENSE.TXT
@@ -0,0 +1,24 @@
+------------------------------------------------------------------------------
+Autoconf Files
+------------------------------------------------------------------------------
+All autoconf files are licensed under the LLVM license with the following
+additions:
+
+llvm/autoconf/install-sh:
+	This script is licensed under the LLVM license, with the following
+	additional copyrights and restrictions:
+
+	Copyright 1991 by the Massachusetts Institute of Technology
+
+	Permission to use, copy, modify, distribute, and sell this software and its
+	documentation for any purpose is hereby granted without fee, provided that
+	the above copyright notice appear in all copies and that both that
+	copyright notice and this permission notice appear in supporting 
+	documentation, and that the name of M.I.T. not be used in advertising or
+	publicity pertaining to distribution of the software without specific,
+	written prior permission.  M.I.T. makes no representations about the
+	suitability of this software for any purpose.  It is provided "as is"
+	without express or implied warranty.
+
+Please see the source files for additional copyrights.
+
diff --git a/final/autoconf/README.TXT b/final/autoconf/README.TXT
new file mode 100644
index 00000000000..3dabdf7b879
--- /dev/null
+++ b/final/autoconf/README.TXT
@@ -0,0 +1,49 @@
+Upgrading Libtool
+===============================================================================
+
+If you are in the mood to upgrade libtool, you must do the following:
+
+ 1. Get the new version of libtool and put it in <SRC>
+ 2. configure/build/install libtool with --prefix=<PFX>
+ 3. Copy <SRC>/ltdl.m4 to llvm/autoconf/m4
+ 4. Copy <PFX>/share/aclocal/libtool.m4 to llvm/autoconf/m4/libtool.m4
+ 5. Copy <PFX>/share/libtool/ltmain.sh to llvm/autoconf/ltmain.sh
+ 6. Copy <PFX>/share/libtool/libltdl/ltdl.c to llvm/lib/System
+ 7. Copy <PFX>/share/libtool/libltdl/ltdl.h to llvm/lib/System
+ 8. Edit the ltdl.h file to #include "llvm/Config/config.h" at the very top. You
+    might also need to resolve some compiler warnings (typically about
+    comparison of signed vs. unsigned values). But, you won't find out about 
+    those until you build LLVM (step 13).
+ 9. Edit the llvm/autoconf/m4/libtool.m4 file so that:
+    a) in AC_PROB_LIBTOOL macro, the value of LIBTOOL is set to
+       $(top_builddir)/mklib, not $(top_builddir)/libtool
+    b) in AC_LIBTOOL_SETUP macro, the variable default_ofile is set to 
+       "mklib" instead of "libtool"
+    c) s/AC_ENABLE_SHARED_DEFAULT/enable_shared_default/g
+    d) s/AC_ENABLE_STATIC_DEFAULT/enable_static_default/g
+    e) s/AC_ENABLE_FAST_INSTALL_DEFAULT/enable_fast_install_default/g
+10. Run "autoupdate libtool.m4 ltdl.m4" in the llvm/autoconf/m4 directory.
+    This should correctly update the macro definitions in the libtool m4 
+    files to match the version of autoconf that LLVM uses. This converts
+    AC_HELP_STRING to AS_HELP_STRING and AC_TRY_LINK to AC_LINK_IFELSE, amongst
+    other things. You may need to manually adjust the files.
+11. Run AutoRegen.sh to get the new macros into configure script
+12. If there are any warnings from AutoRegen.sh, go to step 9.
+13. Rebuild LLVM, making sure it reconfigures
+14. Test the JIT which uses libltdl
+15. If it all works, only THEN commit the changes.
+
+Upgrading autoconf
+===============================================================================
+
+If you are in the mood to upgrade autoconf, you should:
+
+ 1. Consider not upgrading.
+ 2. No really, this is a hassle, you don't want to do it.
+ 3. Get the new version of autoconf and put it in <SRC>
+ 4. configure/build/install autoconf with --prefix=<PFX>
+ 5. Run autoupdate on all the m4 macros in llvm/autoconf/m4
+ 6. Run autoupdate on llvm/autoconf/configure.ac
+ 7. Regenerate configure script with AutoRegen.sh
+ 8. If there are any warnings from AutoRegen.sh, fix them and go to step 7.
+ 9. Test, test, test.
diff --git a/final/autoconf/config.guess b/final/autoconf/config.guess
new file mode 100755
index 00000000000..865fe53d6b1
--- /dev/null
+++ b/final/autoconf/config.guess
@@ -0,0 +1,1498 @@
+#! /bin/sh
+# Attempt to guess a canonical system name.
+#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+#   Free Software Foundation, Inc.
+
+timestamp='2009-09-18'
+
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+# 02110-1301, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+
+# Originally written by Per Bothner.  Please send patches (context
+# diff format) to <config-patches@gnu.org> and include a ChangeLog
+# entry.
+#
+# This script attempts to guess a canonical system name similar to
+# config.sub.  If it succeeds, it prints the system name on stdout, and
+# exits with 0.  Otherwise, it exits with 1.
+#
+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION]
+
+Output the configuration name of the system \`$me' is run on.
+
+Operation modes:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.guess ($timestamp)
+
+Originally written by Per Bothner.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit ;;
+    --version | -v )
+       echo "$version" ; exit ;;
+    --help | --h* | -h )
+       echo "$usage"; exit ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help" >&2
+       exit 1 ;;
+    * )
+       break ;;
+  esac
+done
+
+if test $# != 0; then
+  echo "$me: too many arguments$help" >&2
+  exit 1
+fi
+
+trap 'exit 1' 1 2 15
+
+# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
+# compiler to aid in system detection is discouraged as it requires
+# temporary files to be created and, as you can see below, it is a
+# headache to deal with in a portable fashion.
+
+# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
+# use `HOST_CC' if defined, but it is deprecated.
+
+# Portable tmp directory creation inspired by the Autoconf team.
+
+set_cc_for_build='
+trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
+trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
+: ${TMPDIR=/tmp} ;
+ { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
+ { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
+ { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
+ { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
+dummy=$tmp/dummy ;
+tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
+case $CC_FOR_BUILD,$HOST_CC,$CC in
+ ,,)    echo "int x;" > $dummy.c ;
+	for c in cc gcc c89 c99 ; do
+	  if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then
+	     CC_FOR_BUILD="$c"; break ;
+	  fi ;
+	done ;
+	if test x"$CC_FOR_BUILD" = x ; then
+	  CC_FOR_BUILD=no_compiler_found ;
+	fi
+	;;
+ ,,*)   CC_FOR_BUILD=$CC ;;
+ ,*,*)  CC_FOR_BUILD=$HOST_CC ;;
+esac ; set_cc_for_build= ;'
+
+# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
+# (ghazi@noc.rutgers.edu 1994-08-24)
+if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+	PATH=$PATH:/.attbin ; export PATH
+fi
+
+UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
+UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown
+UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
+
+# Note: order is significant - the case branches are not exclusive.
+
+case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
+    *:NetBSD:*:*)
+	# NetBSD (nbsd) targets should (where applicable) match one or
+	# more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
+	# switched to ELF, *-*-netbsd* would select the old
+	# object file format.  This provides both forward
+	# compatibility and a consistent mechanism for selecting the
+	# object file format.
+	#
+	# Note: NetBSD doesn't particularly care about the vendor
+	# portion of the name.  We always set it to "unknown".
+	sysctl="sysctl -n hw.machine_arch"
+	UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \
+	    /usr/sbin/$sysctl 2>/dev/null || echo unknown)`
+	case "${UNAME_MACHINE_ARCH}" in
+	    armeb) machine=armeb-unknown ;;
+	    arm*) machine=arm-unknown ;;
+	    sh3el) machine=shl-unknown ;;
+	    sh3eb) machine=sh-unknown ;;
+	    sh5el) machine=sh5le-unknown ;;
+	    *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
+	esac
+	# The Operating System including object format, if it has switched
+	# to ELF recently, or will in the future.
+	case "${UNAME_MACHINE_ARCH}" in
+	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
+		eval $set_cc_for_build
+		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
+			| grep -q __ELF__
+		then
+		    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
+		    # Return netbsd for either.  FIX?
+		    os=netbsd
+		else
+		    os=netbsdelf
+		fi
+		;;
+	    *)
+	        os=netbsd
+		;;
+	esac
+	# The OS release
+	# Debian GNU/NetBSD machines have a different userland, and
+	# thus, need a distinct triplet. However, they do not need
+	# kernel version information, so it can be replaced with a
+	# suitable tag, in the style of linux-gnu.
+	case "${UNAME_VERSION}" in
+	    Debian*)
+		release='-gnu'
+		;;
+	    *)
+		release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+		;;
+	esac
+	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
+	# contains redundant information, the shorter form:
+	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
+	echo "${machine}-${os}${release}"
+	exit ;;
+    *:OpenBSD:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
+	echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
+	exit ;;
+    *:ekkoBSD:*:*)
+	echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
+	exit ;;
+    *:SolidBSD:*:*)
+	echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE}
+	exit ;;
+    macppc:MirBSD:*:*)
+	echo powerpc-unknown-mirbsd${UNAME_RELEASE}
+	exit ;;
+    *:MirBSD:*:*)
+	echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
+	exit ;;
+    alpha:OSF1:*:*)
+	case $UNAME_RELEASE in
+	*4.0)
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+		;;
+	*5.*)
+	        UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+		;;
+	esac
+	# According to Compaq, /usr/sbin/psrinfo has been available on
+	# OSF/1 and Tru64 systems produced since 1995.  I hope that
+	# covers most systems running today.  This code pipes the CPU
+	# types through head -n 1, so we only detect the type of CPU 0.
+	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+	case "$ALPHA_CPU_TYPE" in
+	    "EV4 (21064)")
+		UNAME_MACHINE="alpha" ;;
+	    "EV4.5 (21064)")
+		UNAME_MACHINE="alpha" ;;
+	    "LCA4 (21066/21068)")
+		UNAME_MACHINE="alpha" ;;
+	    "EV5 (21164)")
+		UNAME_MACHINE="alphaev5" ;;
+	    "EV5.6 (21164A)")
+		UNAME_MACHINE="alphaev56" ;;
+	    "EV5.6 (21164PC)")
+		UNAME_MACHINE="alphapca56" ;;
+	    "EV5.7 (21164PC)")
+		UNAME_MACHINE="alphapca57" ;;
+	    "EV6 (21264)")
+		UNAME_MACHINE="alphaev6" ;;
+	    "EV6.7 (21264A)")
+		UNAME_MACHINE="alphaev67" ;;
+	    "EV6.8CB (21264C)")
+		UNAME_MACHINE="alphaev68" ;;
+	    "EV6.8AL (21264B)")
+		UNAME_MACHINE="alphaev68" ;;
+	    "EV6.8CX (21264D)")
+		UNAME_MACHINE="alphaev68" ;;
+	    "EV6.9A (21264/EV69A)")
+		UNAME_MACHINE="alphaev69" ;;
+	    "EV7 (21364)")
+		UNAME_MACHINE="alphaev7" ;;
+	    "EV7.9 (21364A)")
+		UNAME_MACHINE="alphaev79" ;;
+	esac
+	# A Pn.n version is a patched version.
+	# A Vn.n version is a released version.
+	# A Tn.n version is a released field test version.
+	# A Xn.n version is an unreleased experimental baselevel.
+	# 1.2 uses "1.2" for uname -r.
+	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+	exit ;;
+    Alpha\ *:Windows_NT*:*)
+	# How do we know it's Interix rather than the generic POSIX subsystem?
+	# Should we change UNAME_MACHINE based on the output of uname instead
+	# of the specific Alpha model?
+	echo alpha-pc-interix
+	exit ;;
+    21064:Windows_NT:50:3)
+	echo alpha-dec-winnt3.5
+	exit ;;
+    Amiga*:UNIX_System_V:4.0:*)
+	echo m68k-unknown-sysv4
+	exit ;;
+    *:[Aa]miga[Oo][Ss]:*:*)
+	echo ${UNAME_MACHINE}-unknown-amigaos
+	exit ;;
+    *:[Mm]orph[Oo][Ss]:*:*)
+	echo ${UNAME_MACHINE}-unknown-morphos
+	exit ;;
+    *:OS/390:*:*)
+	echo i370-ibm-openedition
+	exit ;;
+    *:z/VM:*:*)
+	echo s390-ibm-zvmoe
+	exit ;;
+    *:OS400:*:*)
+        echo powerpc-ibm-os400
+	exit ;;
+    arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
+	echo arm-acorn-riscix${UNAME_RELEASE}
+	exit ;;
+    arm:riscos:*:*|arm:RISCOS:*:*)
+	echo arm-unknown-riscos
+	exit ;;
+    SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
+	echo hppa1.1-hitachi-hiuxmpp
+	exit ;;
+    Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
+	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
+	if test "`(/bin/universe) 2>/dev/null`" = att ; then
+		echo pyramid-pyramid-sysv3
+	else
+		echo pyramid-pyramid-bsd
+	fi
+	exit ;;
+    NILE*:*:*:dcosx)
+	echo pyramid-pyramid-svr4
+	exit ;;
+    DRS?6000:unix:4.0:6*)
+	echo sparc-icl-nx6
+	exit ;;
+    DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
+	case `/usr/bin/uname -p` in
+	    sparc) echo sparc-icl-nx7; exit ;;
+	esac ;;
+    s390x:SunOS:*:*)
+	echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
+    sun4H:SunOS:5.*:*)
+	echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
+    sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
+	echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
+    i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
+	AUX_ARCH="i386"
+	echo ${AUX_ARCH}-pc-auroraux`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
+    i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
+	eval $set_cc_for_build
+	SUN_ARCH="i386"
+	# If there is a compiler, see if it is configured for 64-bit objects.
+	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
+	# This test works for both compilers.
+	if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
+		(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		grep IS_64BIT_ARCH >/dev/null
+	    then
+		SUN_ARCH="x86_64"
+	    fi
+	fi
+	echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
+    sun4*:SunOS:6*:*)
+	# According to config.sub, this is the proper way to canonicalize
+	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
+	# it's likely to be more like Solaris than SunOS4.
+	echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
+    sun4*:SunOS:*:*)
+	case "`/usr/bin/arch -k`" in
+	    Series*|S4*)
+		UNAME_RELEASE=`uname -v`
+		;;
+	esac
+	# Japanese Language versions have a version number like `4.1.3-JL'.
+	echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
+	exit ;;
+    sun3*:SunOS:*:*)
+	echo m68k-sun-sunos${UNAME_RELEASE}
+	exit ;;
+    sun*:*:4.2BSD:*)
+	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
+	test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
+	case "`/bin/arch`" in
+	    sun3)
+		echo m68k-sun-sunos${UNAME_RELEASE}
+		;;
+	    sun4)
+		echo sparc-sun-sunos${UNAME_RELEASE}
+		;;
+	esac
+	exit ;;
+    aushp:SunOS:*:*)
+	echo sparc-auspex-sunos${UNAME_RELEASE}
+	exit ;;
+    # The situation for MiNT is a little confusing.  The machine name
+    # can be virtually everything (everything which is not
+    # "atarist" or "atariste" at least should have a processor
+    # > m68000).  The system name ranges from "MiNT" over "FreeMiNT"
+    # to the lowercase version "mint" (or "freemint").  Finally
+    # the system name "TOS" denotes a system which is actually not
+    # MiNT.  But MiNT is downward compatible to TOS, so this should
+    # be no problem.
+    atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
+        echo m68k-atari-mint${UNAME_RELEASE}
+	exit ;;
+    atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
+	echo m68k-atari-mint${UNAME_RELEASE}
+        exit ;;
+    *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
+        echo m68k-atari-mint${UNAME_RELEASE}
+	exit ;;
+    milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
+        echo m68k-milan-mint${UNAME_RELEASE}
+        exit ;;
+    hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
+        echo m68k-hades-mint${UNAME_RELEASE}
+        exit ;;
+    *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
+        echo m68k-unknown-mint${UNAME_RELEASE}
+        exit ;;
+    m68k:machten:*:*)
+	echo m68k-apple-machten${UNAME_RELEASE}
+	exit ;;
+    powerpc:machten:*:*)
+	echo powerpc-apple-machten${UNAME_RELEASE}
+	exit ;;
+    RISC*:Mach:*:*)
+	echo mips-dec-mach_bsd4.3
+	exit ;;
+    RISC*:ULTRIX:*:*)
+	echo mips-dec-ultrix${UNAME_RELEASE}
+	exit ;;
+    VAX*:ULTRIX*:*:*)
+	echo vax-dec-ultrix${UNAME_RELEASE}
+	exit ;;
+    2020:CLIX:*:* | 2430:CLIX:*:*)
+	echo clipper-intergraph-clix${UNAME_RELEASE}
+	exit ;;
+    mips:*:*:UMIPS | mips:*:*:RISCos)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+#ifdef __cplusplus
+#include <stdio.h>  /* for printf() prototype */
+	int main (int argc, char *argv[]) {
+#else
+	int main (argc, argv) int argc; char *argv[]; {
+#endif
+	#if defined (host_mips) && defined (MIPSEB)
+	#if defined (SYSTYPE_SYSV)
+	  printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
+	#endif
+	#if defined (SYSTYPE_SVR4)
+	  printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
+	#endif
+	#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
+	  printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
+	#endif
+	#endif
+	  exit (-1);
+	}
+EOF
+	$CC_FOR_BUILD -o $dummy $dummy.c &&
+	  dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` &&
+	  SYSTEM_NAME=`$dummy $dummyarg` &&
+	    { echo "$SYSTEM_NAME"; exit; }
+	echo mips-mips-riscos${UNAME_RELEASE}
+	exit ;;
+    Motorola:PowerMAX_OS:*:*)
+	echo powerpc-motorola-powermax
+	exit ;;
+    Motorola:*:4.3:PL8-*)
+	echo powerpc-harris-powermax
+	exit ;;
+    Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
+	echo powerpc-harris-powermax
+	exit ;;
+    Night_Hawk:Power_UNIX:*:*)
+	echo powerpc-harris-powerunix
+	exit ;;
+    m88k:CX/UX:7*:*)
+	echo m88k-harris-cxux7
+	exit ;;
+    m88k:*:4*:R4*)
+	echo m88k-motorola-sysv4
+	exit ;;
+    m88k:*:3*:R3*)
+	echo m88k-motorola-sysv3
+	exit ;;
+    AViiON:dgux:*:*)
+        # DG/UX returns AViiON for all architectures
+        UNAME_PROCESSOR=`/usr/bin/uname -p`
+	if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
+	then
+	    if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
+	       [ ${TARGET_BINARY_INTERFACE}x = x ]
+	    then
+		echo m88k-dg-dgux${UNAME_RELEASE}
+	    else
+		echo m88k-dg-dguxbcs${UNAME_RELEASE}
+	    fi
+	else
+	    echo i586-dg-dgux${UNAME_RELEASE}
+	fi
+ 	exit ;;
+    M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
+	echo m88k-dolphin-sysv3
+	exit ;;
+    M88*:*:R3*:*)
+	# Delta 88k system running SVR3
+	echo m88k-motorola-sysv3
+	exit ;;
+    XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
+	echo m88k-tektronix-sysv3
+	exit ;;
+    Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
+	echo m68k-tektronix-bsd
+	exit ;;
+    *:IRIX*:*:*)
+	echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
+	exit ;;
+    ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
+	echo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id
+	exit ;;               # Note that: echo "'`uname -s`'" gives 'AIX '
+    i*86:AIX:*:*)
+	echo i386-ibm-aix
+	exit ;;
+    ia64:AIX:*:*)
+	if [ -x /usr/bin/oslevel ] ; then
+		IBM_REV=`/usr/bin/oslevel`
+	else
+		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+	fi
+	echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
+	exit ;;
+    *:AIX:2:3)
+	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
+		eval $set_cc_for_build
+		sed 's/^		//' << EOF >$dummy.c
+		#include <sys/systemcfg.h>
+
+		main()
+			{
+			if (!__power_pc())
+				exit(1);
+			puts("powerpc-ibm-aix3.2.5");
+			exit(0);
+			}
+EOF
+		if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`
+		then
+			echo "$SYSTEM_NAME"
+		else
+			echo rs6000-ibm-aix3.2.5
+		fi
+	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
+		echo rs6000-ibm-aix3.2.4
+	else
+		echo rs6000-ibm-aix3.2
+	fi
+	exit ;;
+    *:AIX:*:[456])
+	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
+	if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
+		IBM_ARCH=rs6000
+	else
+		IBM_ARCH=powerpc
+	fi
+	if [ -x /usr/bin/oslevel ] ; then
+		IBM_REV=`/usr/bin/oslevel`
+	else
+		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+	fi
+	echo ${IBM_ARCH}-ibm-aix${IBM_REV}
+	exit ;;
+    *:AIX:*:*)
+	echo rs6000-ibm-aix
+	exit ;;
+    ibmrt:4.4BSD:*|romp-ibm:BSD:*)
+	echo romp-ibm-bsd4.4
+	exit ;;
+    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
+	echo romp-ibm-bsd${UNAME_RELEASE}   # 4.3 with uname added to
+	exit ;;                             # report: romp-ibm BSD 4.3
+    *:BOSX:*:*)
+	echo rs6000-bull-bosx
+	exit ;;
+    DPX/2?00:B.O.S.:*:*)
+	echo m68k-bull-sysv3
+	exit ;;
+    9000/[34]??:4.3bsd:1.*:*)
+	echo m68k-hp-bsd
+	exit ;;
+    hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
+	echo m68k-hp-bsd4.4
+	exit ;;
+    9000/[34678]??:HP-UX:*:*)
+	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+	case "${UNAME_MACHINE}" in
+	    9000/31? )            HP_ARCH=m68000 ;;
+	    9000/[34]?? )         HP_ARCH=m68k ;;
+	    9000/[678][0-9][0-9])
+		if [ -x /usr/bin/getconf ]; then
+		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
+                    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+                    case "${sc_cpu_version}" in
+                      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+                      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+                      532)                      # CPU_PA_RISC2_0
+                        case "${sc_kernel_bits}" in
+                          32) HP_ARCH="hppa2.0n" ;;
+                          64) HP_ARCH="hppa2.0w" ;;
+			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
+                        esac ;;
+                    esac
+		fi
+		if [ "${HP_ARCH}" = "" ]; then
+		    eval $set_cc_for_build
+		    sed 's/^              //' << EOF >$dummy.c
+
+              #define _HPUX_SOURCE
+              #include <stdlib.h>
+              #include <unistd.h>
+
+              int main ()
+              {
+              #if defined(_SC_KERNEL_BITS)
+                  long bits = sysconf(_SC_KERNEL_BITS);
+              #endif
+                  long cpu  = sysconf (_SC_CPU_VERSION);
+
+                  switch (cpu)
+              	{
+              	case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+              	case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+              	case CPU_PA_RISC2_0:
+              #if defined(_SC_KERNEL_BITS)
+              	    switch (bits)
+              		{
+              		case 64: puts ("hppa2.0w"); break;
+              		case 32: puts ("hppa2.0n"); break;
+              		default: puts ("hppa2.0"); break;
+              		} break;
+              #else  /* !defined(_SC_KERNEL_BITS) */
+              	    puts ("hppa2.0"); break;
+              #endif
+              	default: puts ("hppa1.0"); break;
+              	}
+                  exit (0);
+              }
+EOF
+		    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+		    test -z "$HP_ARCH" && HP_ARCH=hppa
+		fi ;;
+	esac
+	if [ ${HP_ARCH} = "hppa2.0w" ]
+	then
+	    eval $set_cc_for_build
+
+	    # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
+	    # 32-bit code.  hppa64-hp-hpux* has the same kernel and a compiler
+	    # generating 64-bit code.  GNU and HP use different nomenclature:
+	    #
+	    # $ CC_FOR_BUILD=cc ./config.guess
+	    # => hppa2.0w-hp-hpux11.23
+	    # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
+	    # => hppa64-hp-hpux11.23
+
+	    if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
+		grep -q __LP64__
+	    then
+		HP_ARCH="hppa2.0w"
+	    else
+		HP_ARCH="hppa64"
+	    fi
+	fi
+	echo ${HP_ARCH}-hp-hpux${HPUX_REV}
+	exit ;;
+    ia64:HP-UX:*:*)
+	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+	echo ia64-hp-hpux${HPUX_REV}
+	exit ;;
+    3050*:HI-UX:*:*)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#include <unistd.h>
+	int
+	main ()
+	{
+	  long cpu = sysconf (_SC_CPU_VERSION);
+	  /* The order matters, because CPU_IS_HP_MC68K erroneously returns
+	     true for CPU_PA_RISC1_0.  CPU_IS_PA_RISC returns correct
+	     results, however.  */
+	  if (CPU_IS_PA_RISC (cpu))
+	    {
+	      switch (cpu)
+		{
+		  case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
+		  case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
+		  case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
+		  default: puts ("hppa-hitachi-hiuxwe2"); break;
+		}
+	    }
+	  else if (CPU_IS_HP_MC68K (cpu))
+	    puts ("m68k-hitachi-hiuxwe2");
+	  else puts ("unknown-hitachi-hiuxwe2");
+	  exit (0);
+	}
+EOF
+	$CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&
+		{ echo "$SYSTEM_NAME"; exit; }
+	echo unknown-hitachi-hiuxwe2
+	exit ;;
+    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
+	echo hppa1.1-hp-bsd
+	exit ;;
+    9000/8??:4.3bsd:*:*)
+	echo hppa1.0-hp-bsd
+	exit ;;
+    *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
+	echo hppa1.0-hp-mpeix
+	exit ;;
+    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
+	echo hppa1.1-hp-osf
+	exit ;;
+    hp8??:OSF1:*:*)
+	echo hppa1.0-hp-osf
+	exit ;;
+    i*86:OSF1:*:*)
+	if [ -x /usr/sbin/sysversion ] ; then
+	    echo ${UNAME_MACHINE}-unknown-osf1mk
+	else
+	    echo ${UNAME_MACHINE}-unknown-osf1
+	fi
+	exit ;;
+    parisc*:Lites*:*:*)
+	echo hppa1.1-hp-lites
+	exit ;;
+    C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
+	echo c1-convex-bsd
+        exit ;;
+    C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
+	if getsysinfo -f scalar_acc
+	then echo c32-convex-bsd
+	else echo c2-convex-bsd
+	fi
+        exit ;;
+    C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
+	echo c34-convex-bsd
+        exit ;;
+    C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
+	echo c38-convex-bsd
+        exit ;;
+    C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
+	echo c4-convex-bsd
+        exit ;;
+    CRAY*Y-MP:*:*:*)
+	echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*[A-Z]90:*:*:*)
+	echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
+	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
+	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
+	      -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*TS:*:*:*)
+	echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*T3E:*:*:*)
+	echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    CRAY*SV1:*:*:*)
+	echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    *:UNICOS/mp:*:*)
+	echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit ;;
+    F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
+	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+        FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+        echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+        exit ;;
+    5000:UNIX_System_V:4.*:*)
+        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+        FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+        echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+	exit ;;
+    i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
+	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
+	exit ;;
+    sparc*:BSD/OS:*:*)
+	echo sparc-unknown-bsdi${UNAME_RELEASE}
+	exit ;;
+    *:BSD/OS:*:*)
+	echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
+	exit ;;
+    *:FreeBSD:*:*)
+	case ${UNAME_MACHINE} in
+	    pc98)
+		echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	    amd64)
+		echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	    *)
+		echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+	esac
+	exit ;;
+    i*:CYGWIN*:*)
+	echo ${UNAME_MACHINE}-pc-cygwin
+	exit ;;
+    *:MINGW*:*)
+	echo ${UNAME_MACHINE}-pc-mingw32
+	exit ;;
+    i*:windows32*:*)
+    	# uname -m includes "-pc" on this system.
+    	echo ${UNAME_MACHINE}-mingw32
+	exit ;;
+    i*:PW*:*)
+	echo ${UNAME_MACHINE}-pc-pw32
+	exit ;;
+    *:Interix*:[3456]*)
+    	case ${UNAME_MACHINE} in
+	    x86)
+		echo i586-pc-interix${UNAME_RELEASE}
+		exit ;;
+	    EM64T | authenticamd | genuineintel)
+		echo x86_64-unknown-interix${UNAME_RELEASE}
+		exit ;;
+	    IA64)
+		echo ia64-unknown-interix${UNAME_RELEASE}
+		exit ;;
+	esac ;;
+    [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
+	echo i${UNAME_MACHINE}-pc-mks
+	exit ;;
+    8664:Windows_NT:*)
+	echo x86_64-pc-mks
+	exit ;;
+    i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
+	# How do we know it's Interix rather than the generic POSIX subsystem?
+	# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
+	# UNAME_MACHINE based on the output of uname instead of i386?
+	echo i586-pc-interix
+	exit ;;
+    i*:UWIN*:*)
+	echo ${UNAME_MACHINE}-pc-uwin
+	exit ;;
+    amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
+	echo x86_64-unknown-cygwin
+	exit ;;
+    p*:CYGWIN*:*)
+	echo powerpcle-unknown-cygwin
+	exit ;;
+    prep*:SunOS:5.*:*)
+	echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit ;;
+    *:GNU:*:*)
+	# the GNU system
+	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+	exit ;;
+    *:GNU/*:*:*)
+	# other systems with GNU libc and userland
+	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu
+	exit ;;
+    i*86:Minix:*:*)
+	echo ${UNAME_MACHINE}-pc-minix
+	exit ;;
+    alpha:Linux:*:*)
+	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+	  EV5)   UNAME_MACHINE=alphaev5 ;;
+	  EV56)  UNAME_MACHINE=alphaev56 ;;
+	  PCA56) UNAME_MACHINE=alphapca56 ;;
+	  PCA57) UNAME_MACHINE=alphapca56 ;;
+	  EV6)   UNAME_MACHINE=alphaev6 ;;
+	  EV67)  UNAME_MACHINE=alphaev67 ;;
+	  EV68*) UNAME_MACHINE=alphaev68 ;;
+        esac
+	objdump --private-headers /bin/sh | grep -q ld.so.1
+	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+	exit ;;
+    arm*:Linux:*:*)
+	eval $set_cc_for_build
+	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
+	    | grep -q __ARM_EABI__
+	then
+	    echo ${UNAME_MACHINE}-unknown-linux-gnu
+	else
+	    echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+	fi
+	exit ;;
+    avr32*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    cris:Linux:*:*)
+	echo cris-axis-linux-gnu
+	exit ;;
+    crisv32:Linux:*:*)
+	echo crisv32-axis-linux-gnu
+	exit ;;
+    frv:Linux:*:*)
+    	echo frv-unknown-linux-gnu
+	exit ;;
+    i*86:Linux:*:*)
+	echo ${UNAME_MACHINE}-pc-linux-gnu
+	exit ;;
+    ia64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    m32r*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    m68*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    mips:Linux:*:* | mips64:Linux:*:*)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#undef CPU
+	#undef ${UNAME_MACHINE}
+	#undef ${UNAME_MACHINE}el
+	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+	CPU=${UNAME_MACHINE}el
+	#else
+	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+	CPU=${UNAME_MACHINE}
+	#else
+	CPU=
+	#endif
+	#endif
+EOF
+	eval "`$CC_FOR_BUILD -E $dummy.c 2>/dev/null | sed -n '
+	    /^CPU/{
+		s: ::g
+		p
+	    }'`"
+	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
+	;;
+    or32:Linux:*:*)
+	echo or32-unknown-linux-gnu
+	exit ;;
+    padre:Linux:*:*)
+	echo sparc-unknown-linux-gnu
+	exit ;;
+    parisc64:Linux:*:* | hppa64:Linux:*:*)
+	echo hppa64-unknown-linux-gnu
+	exit ;;
+    parisc:Linux:*:* | hppa:Linux:*:*)
+	# Look for CPU level
+	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
+	  PA7*) echo hppa1.1-unknown-linux-gnu ;;
+	  PA8*) echo hppa2.0-unknown-linux-gnu ;;
+	  *)    echo hppa-unknown-linux-gnu ;;
+	esac
+	exit ;;
+    ppc64:Linux:*:*)
+	echo powerpc64-unknown-linux-gnu
+	exit ;;
+    ppc:Linux:*:*)
+	echo powerpc-unknown-linux-gnu
+	exit ;;
+    s390:Linux:*:* | s390x:Linux:*:*)
+	echo ${UNAME_MACHINE}-ibm-linux
+	exit ;;
+    sh64*:Linux:*:*)
+    	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    sh*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    sparc:Linux:*:* | sparc64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    vax:Linux:*:*)
+	echo ${UNAME_MACHINE}-dec-linux-gnu
+	exit ;;
+    x86_64:Linux:*:*)
+	echo x86_64-unknown-linux-gnu
+	exit ;;
+    xtensa*:Linux:*:*)
+    	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit ;;
+    i*86:DYNIX/ptx:4*:*)
+	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
+	# earlier versions are messed up and put the nodename in both
+	# sysname and nodename.
+	echo i386-sequent-sysv4
+	exit ;;
+    i*86:UNIX_SV:4.2MP:2.*)
+        # Unixware is an offshoot of SVR4, but it has its own version
+        # number series starting with 2...
+        # I am not positive that other SVR4 systems won't match this,
+	# I just have to hope.  -- rms.
+        # Use sysv4.2uw... so that sysv4* matches it.
+	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
+	exit ;;
+    i*86:OS/2:*:*)
+	# If we were able to find `uname', then EMX Unix compatibility
+	# is probably installed.
+	echo ${UNAME_MACHINE}-pc-os2-emx
+	exit ;;
+    i*86:XTS-300:*:STOP)
+	echo ${UNAME_MACHINE}-unknown-stop
+	exit ;;
+    i*86:atheos:*:*)
+	echo ${UNAME_MACHINE}-unknown-atheos
+	exit ;;
+    i*86:syllable:*:*)
+	echo ${UNAME_MACHINE}-pc-syllable
+	exit ;;
+    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
+	echo i386-unknown-lynxos${UNAME_RELEASE}
+	exit ;;
+    i*86:*DOS:*:*)
+	echo ${UNAME_MACHINE}-pc-msdosdjgpp
+	exit ;;
+    i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
+	UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
+	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
+		echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
+	else
+		echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
+	fi
+	exit ;;
+    i*86:*:5:[678]*)
+    	# UnixWare 7.x, OpenUNIX and OpenServer 6.
+	case `/bin/uname -X | grep "^Machine"` in
+	    *486*)	     UNAME_MACHINE=i486 ;;
+	    *Pentium)	     UNAME_MACHINE=i586 ;;
+	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
+	esac
+	echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
+	exit ;;
+    i*86:*:3.2:*)
+	if test -f /usr/options/cb.name; then
+		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+		echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
+	elif /bin/uname -X 2>/dev/null >/dev/null ; then
+		UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
+		(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
+		(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
+			&& UNAME_MACHINE=i586
+		(/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
+			&& UNAME_MACHINE=i686
+		(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
+			&& UNAME_MACHINE=i686
+		echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
+	else
+		echo ${UNAME_MACHINE}-pc-sysv32
+	fi
+	exit ;;
+    pc:*:*:*)
+	# Left here for compatibility:
+        # uname -m prints for DJGPP always 'pc', but it prints nothing about
+        # the processor, so we play safe by assuming i586.
+	# Note: whatever this is, it MUST be the same as what config.sub
+	# prints for the "djgpp" host, or else GDB configury will decide that
+	# this is a cross-build.
+	echo i586-pc-msdosdjgpp
+        exit ;;
+    Intel:Mach:3*:*)
+	echo i386-pc-mach3
+	exit ;;
+    paragon:*:*:*)
+	echo i860-intel-osf1
+	exit ;;
+    i860:*:4.*:*) # i860-SVR4
+	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
+	  echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
+	else # Add other i860-SVR4 vendors below as they are discovered.
+	  echo i860-unknown-sysv${UNAME_RELEASE}  # Unknown i860-SVR4
+	fi
+	exit ;;
+    mini*:CTIX:SYS*5:*)
+	# "miniframe"
+	echo m68010-convergent-sysv
+	exit ;;
+    mc68k:UNIX:SYSTEM5:3.51m)
+	echo m68k-convergent-sysv
+	exit ;;
+    M680?0:D-NIX:5.3:*)
+	echo m68k-diab-dnix
+	exit ;;
+    M68*:*:R3V[5678]*:*)
+	test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
+    3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
+	OS_REL=''
+	test -r /etc/.relid \
+	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+	  && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+    3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
+        /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+          && { echo i486-ncr-sysv4; exit; } ;;
+    NCR*:*:4.2:* | MPRAS*:*:4.2:*)
+	OS_REL='.3'
+	test -r /etc/.relid \
+	    && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	    && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+	    && { echo i586-ncr-sysv4.3${OS_REL}; exit; }
+	/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
+	    && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+    m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
+	echo m68k-unknown-lynxos${UNAME_RELEASE}
+	exit ;;
+    mc68030:UNIX_System_V:4.*:*)
+	echo m68k-atari-sysv4
+	exit ;;
+    TSUNAMI:LynxOS:2.*:*)
+	echo sparc-unknown-lynxos${UNAME_RELEASE}
+	exit ;;
+    rs6000:LynxOS:2.*:*)
+	echo rs6000-unknown-lynxos${UNAME_RELEASE}
+	exit ;;
+    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
+	echo powerpc-unknown-lynxos${UNAME_RELEASE}
+	exit ;;
+    SM[BE]S:UNIX_SV:*:*)
+	echo mips-dde-sysv${UNAME_RELEASE}
+	exit ;;
+    RM*:ReliantUNIX-*:*:*)
+	echo mips-sni-sysv4
+	exit ;;
+    RM*:SINIX-*:*:*)
+	echo mips-sni-sysv4
+	exit ;;
+    *:SINIX-*:*:*)
+	if uname -p 2>/dev/null >/dev/null ; then
+		UNAME_MACHINE=`(uname -p) 2>/dev/null`
+		echo ${UNAME_MACHINE}-sni-sysv4
+	else
+		echo ns32k-sni-sysv
+	fi
+	exit ;;
+    PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+                      # says <Richard.M.Bartel@ccMail.Census.GOV>
+        echo i586-unisys-sysv4
+        exit ;;
+    *:UNIX_System_V:4*:FTX*)
+	# From Gerald Hewes <hewes@openmarket.com>.
+	# How about differentiating between stratus architectures? -djm
+	echo hppa1.1-stratus-sysv4
+	exit ;;
+    *:*:*:FTX*)
+	# From seanf@swdc.stratus.com.
+	echo i860-stratus-sysv4
+	exit ;;
+    i*86:VOS:*:*)
+	# From Paul.Green@stratus.com.
+	echo ${UNAME_MACHINE}-stratus-vos
+	exit ;;
+    *:VOS:*:*)
+	# From Paul.Green@stratus.com.
+	echo hppa1.1-stratus-vos
+	exit ;;
+    mc68*:A/UX:*:*)
+	echo m68k-apple-aux${UNAME_RELEASE}
+	exit ;;
+    news*:NEWS-OS:6*:*)
+	echo mips-sony-newsos6
+	exit ;;
+    R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
+	if [ -d /usr/nec ]; then
+	        echo mips-nec-sysv${UNAME_RELEASE}
+	else
+	        echo mips-unknown-sysv${UNAME_RELEASE}
+	fi
+        exit ;;
+    BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
+	echo powerpc-be-beos
+	exit ;;
+    BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
+	echo powerpc-apple-beos
+	exit ;;
+    BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
+	echo i586-pc-beos
+	exit ;;
+    BePC:Haiku:*:*)	# Haiku running on Intel PC compatible.
+	echo i586-pc-haiku
+	exit ;;
+    SX-4:SUPER-UX:*:*)
+	echo sx4-nec-superux${UNAME_RELEASE}
+	exit ;;
+    SX-5:SUPER-UX:*:*)
+	echo sx5-nec-superux${UNAME_RELEASE}
+	exit ;;
+    SX-6:SUPER-UX:*:*)
+	echo sx6-nec-superux${UNAME_RELEASE}
+	exit ;;
+    SX-7:SUPER-UX:*:*)
+	echo sx7-nec-superux${UNAME_RELEASE}
+	exit ;;
+    SX-8:SUPER-UX:*:*)
+	echo sx8-nec-superux${UNAME_RELEASE}
+	exit ;;
+    SX-8R:SUPER-UX:*:*)
+	echo sx8r-nec-superux${UNAME_RELEASE}
+	exit ;;
+    Power*:Rhapsody:*:*)
+	echo powerpc-apple-rhapsody${UNAME_RELEASE}
+	exit ;;
+    *:Rhapsody:*:*)
+	echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
+	exit ;;
+    *:Darwin:*:*)
+	UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
+	case $UNAME_PROCESSOR in
+	    i386)
+		eval $set_cc_for_build
+		if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+		  if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+		      (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		      grep IS_64BIT_ARCH >/dev/null
+		  then
+		      UNAME_PROCESSOR="x86_64"
+		  fi
+		fi ;;
+	    unknown) UNAME_PROCESSOR=powerpc ;;
+	esac
+	echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
+	exit ;;
+    *:procnto*:*:* | *:QNX:[0123456789]*:*)
+	UNAME_PROCESSOR=`uname -p`
+	if test "$UNAME_PROCESSOR" = "x86"; then
+		UNAME_PROCESSOR=i386
+		UNAME_MACHINE=pc
+	fi
+	echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
+	exit ;;
+    *:QNX:*:4*)
+	echo i386-pc-qnx
+	exit ;;
+    NSE-?:NONSTOP_KERNEL:*:*)
+	echo nse-tandem-nsk${UNAME_RELEASE}
+	exit ;;
+    NSR-?:NONSTOP_KERNEL:*:*)
+	echo nsr-tandem-nsk${UNAME_RELEASE}
+	exit ;;
+    *:NonStop-UX:*:*)
+	echo mips-compaq-nonstopux
+	exit ;;
+    BS2000:POSIX*:*:*)
+	echo bs2000-siemens-sysv
+	exit ;;
+    DS/*:UNIX_System_V:*:*)
+	echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
+	exit ;;
+    *:Plan9:*:*)
+	# "uname -m" is not consistent, so use $cputype instead. 386
+	# is converted to i386 for consistency with other x86
+	# operating systems.
+	if test "$cputype" = "386"; then
+	    UNAME_MACHINE=i386
+	else
+	    UNAME_MACHINE="$cputype"
+	fi
+	echo ${UNAME_MACHINE}-unknown-plan9
+	exit ;;
+    *:TOPS-10:*:*)
+	echo pdp10-unknown-tops10
+	exit ;;
+    *:TENEX:*:*)
+	echo pdp10-unknown-tenex
+	exit ;;
+    KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
+	echo pdp10-dec-tops20
+	exit ;;
+    XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
+	echo pdp10-xkl-tops20
+	exit ;;
+    *:TOPS-20:*:*)
+	echo pdp10-unknown-tops20
+	exit ;;
+    *:ITS:*:*)
+	echo pdp10-unknown-its
+	exit ;;
+    SEI:*:*:SEIUX)
+        echo mips-sei-seiux${UNAME_RELEASE}
+	exit ;;
+    *:DragonFly:*:*)
+	echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
+	exit ;;
+    *:*VMS:*:*)
+    	UNAME_MACHINE=`(uname -p) 2>/dev/null`
+	case "${UNAME_MACHINE}" in
+	    A*) echo alpha-dec-vms ; exit ;;
+	    I*) echo ia64-dec-vms ; exit ;;
+	    V*) echo vax-dec-vms ; exit ;;
+	esac ;;
+    *:XENIX:*:SysV)
+	echo i386-pc-xenix
+	exit ;;
+    i*86:skyos:*:*)
+	echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
+	exit ;;
+    i*86:rdos:*:*)
+	echo ${UNAME_MACHINE}-pc-rdos
+	exit ;;
+    i*86:AROS:*:*)
+	echo ${UNAME_MACHINE}-pc-aros
+	exit ;;
+esac
+
+#echo '(No uname command or uname output not recognized.)' 1>&2
+#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
+
+eval $set_cc_for_build
+cat >$dummy.c <<EOF
+#ifdef _SEQUENT_
+# include <sys/types.h>
+# include <sys/utsname.h>
+#endif
+main ()
+{
+#if defined (sony)
+#if defined (MIPSEB)
+  /* BFD wants "bsd" instead of "newsos".  Perhaps BFD should be changed,
+     I don't know....  */
+  printf ("mips-sony-bsd\n"); exit (0);
+#else
+#include <sys/param.h>
+  printf ("m68k-sony-newsos%s\n",
+#ifdef NEWSOS4
+          "4"
+#else
+	  ""
+#endif
+         ); exit (0);
+#endif
+#endif
+
+#if defined (__arm) && defined (__acorn) && defined (__unix)
+  printf ("arm-acorn-riscix\n"); exit (0);
+#endif
+
+#if defined (hp300) && !defined (hpux)
+  printf ("m68k-hp-bsd\n"); exit (0);
+#endif
+
+#if defined (NeXT)
+#if !defined (__ARCHITECTURE__)
+#define __ARCHITECTURE__ "m68k"
+#endif
+  int version;
+  version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
+  if (version < 4)
+    printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
+  else
+    printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
+  exit (0);
+#endif
+
+#if defined (MULTIMAX) || defined (n16)
+#if defined (UMAXV)
+  printf ("ns32k-encore-sysv\n"); exit (0);
+#else
+#if defined (CMU)
+  printf ("ns32k-encore-mach\n"); exit (0);
+#else
+  printf ("ns32k-encore-bsd\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (__386BSD__)
+  printf ("i386-pc-bsd\n"); exit (0);
+#endif
+
+#if defined (sequent)
+#if defined (i386)
+  printf ("i386-sequent-dynix\n"); exit (0);
+#endif
+#if defined (ns32000)
+  printf ("ns32k-sequent-dynix\n"); exit (0);
+#endif
+#endif
+
+#if defined (_SEQUENT_)
+    struct utsname un;
+
+    uname(&un);
+
+    if (strncmp(un.version, "V2", 2) == 0) {
+	printf ("i386-sequent-ptx2\n"); exit (0);
+    }
+    if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
+	printf ("i386-sequent-ptx1\n"); exit (0);
+    }
+    printf ("i386-sequent-ptx\n"); exit (0);
+
+#endif
+
+#if defined (vax)
+# if !defined (ultrix)
+#  include <sys/param.h>
+#  if defined (BSD)
+#   if BSD == 43
+      printf ("vax-dec-bsd4.3\n"); exit (0);
+#   else
+#    if BSD == 199006
+      printf ("vax-dec-bsd4.3reno\n"); exit (0);
+#    else
+      printf ("vax-dec-bsd\n"); exit (0);
+#    endif
+#   endif
+#  else
+    printf ("vax-dec-bsd\n"); exit (0);
+#  endif
+# else
+    printf ("vax-dec-ultrix\n"); exit (0);
+# endif
+#endif
+
+#if defined (alliant) && defined (i860)
+  printf ("i860-alliant-bsd\n"); exit (0);
+#endif
+
+  exit (1);
+}
+EOF
+
+$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && SYSTEM_NAME=`$dummy` &&
+	{ echo "$SYSTEM_NAME"; exit; }
+
+# Apollos put the system type in the environment.
+
+test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit; }
+
+# Convex versions that predate uname can use getsysinfo(1)
+
+if [ -x /usr/convex/getsysinfo ]
+then
+    case `getsysinfo -f cpu_type` in
+    c1*)
+	echo c1-convex-bsd
+	exit ;;
+    c2*)
+	if getsysinfo -f scalar_acc
+	then echo c32-convex-bsd
+	else echo c2-convex-bsd
+	fi
+	exit ;;
+    c34*)
+	echo c34-convex-bsd
+	exit ;;
+    c38*)
+	echo c38-convex-bsd
+	exit ;;
+    c4*)
+	echo c4-convex-bsd
+	exit ;;
+    esac
+fi
+
+cat >&2 <<EOF
+$0: unable to guess system type
+
+This script, last modified $timestamp, has failed to recognize
+the operating system you are using. It is advised that you
+download the most up to date version of the config scripts from
+
+  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+and
+  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+
+If the version you run ($0) is already up to date, please
+send the following data and any information you think might be
+pertinent to <config-patches@gnu.org> in order to provide the needed
+information to handle your system.
+
+config.guess timestamp = $timestamp
+
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
+
+hostinfo               = `(hostinfo) 2>/dev/null`
+/bin/universe          = `(/bin/universe) 2>/dev/null`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
+/bin/arch              = `(/bin/arch) 2>/dev/null`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
+
+UNAME_MACHINE = ${UNAME_MACHINE}
+UNAME_RELEASE = ${UNAME_RELEASE}
+UNAME_SYSTEM  = ${UNAME_SYSTEM}
+UNAME_VERSION = ${UNAME_VERSION}
+EOF
+
+exit 1
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/final/autoconf/config.sub b/final/autoconf/config.sub
new file mode 100755
index 00000000000..183976a066a
--- /dev/null
+++ b/final/autoconf/config.sub
@@ -0,0 +1,1702 @@
+#! /bin/sh
+# Configuration validation subroutine script.
+#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+#   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
+#   Free Software Foundation, Inc.
+
+timestamp='2009-08-19'
+
+# This file is (in principle) common to ALL GNU software.
+# The presence of a machine in this file suggests that SOME GNU software
+# can handle that machine.  It does not imply ALL GNU software can.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+# 02110-1301, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+
+# Please send patches to <config-patches@gnu.org>.  Submit a context
+# diff and a properly formatted GNU ChangeLog entry.
+#
+# Configuration subroutine to validate and canonicalize a configuration type.
+# Supply the specified configuration type as an argument.
+# If it is invalid, we print an error message on stderr and exit with code 1.
+# Otherwise, we print the canonical config type on stdout and succeed.
+
+# You can get the latest version of this script from:
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+
+# This file is supposed to be the same for all GNU packages
+# and recognize all the CPU types, system types and aliases
+# that are meaningful with *any* GNU software.
+# Each package is responsible for reporting which valid configurations
+# it does not support.  The user should be able to distinguish
+# a failure to support a valid configuration from a meaningless
+# configuration.
+
+# The goal of this file is to map all the various variations of a given
+# machine specification into a single specification in the form:
+#	CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or in some cases, the newer four-part form:
+#	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+# It is wrong to echo any other type of specification.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION] CPU-MFR-OPSYS
+       $0 [OPTION] ALIAS
+
+Canonicalize a configuration name.
+
+Operation modes:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.sub ($timestamp)
+
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit ;;
+    --version | -v )
+       echo "$version" ; exit ;;
+    --help | --h* | -h )
+       echo "$usage"; exit ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help"
+       exit 1 ;;
+
+    *local*)
+       # First pass through any local machine types.
+       echo $1
+       exit ;;
+
+    * )
+       break ;;
+  esac
+done
+
+case $# in
+ 0) echo "$me: missing argument$help" >&2
+    exit 1;;
+ 1) ;;
+ *) echo "$me: too many arguments$help" >&2
+    exit 1;;
+esac
+
+# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
+# Here we must recognize all the valid KERNEL-OS combinations.
+maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
+case $maybe_os in
+  nto-qnx* | linux-gnu* | linux-dietlibc | linux-newlib* | linux-uclibc* | \
+  uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | \
+  kopensolaris*-gnu* | \
+  storm-chaos* | os2-emx* | rtmk-nova*)
+    os=-$maybe_os
+    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
+    ;;
+  *)
+    basic_machine=`echo $1 | sed 's/-[^-]*$//'`
+    if [ $basic_machine != $1 ]
+    then os=`echo $1 | sed 's/.*-/-/'`
+    else os=; fi
+    ;;
+esac
+
+### Let's recognize common machines as not being operating systems so
+### that things like config.sub decstation-3100 work.  We also
+### recognize some manufacturers as not being operating systems, so we
+### can provide default operating systems below.
+case $os in
+	-sun*os*)
+		# Prevent following clause from handling this invalid input.
+		;;
+	-dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
+	-att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
+	-unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
+	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
+	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
+	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
+	-apple | -axis | -knuth | -cray | -microblaze)
+		os=
+		basic_machine=$1
+		;;
+        -bluegene*)
+	        os=-cnk
+		;;
+	-sim | -cisco | -oki | -wec | -winbond)
+		os=
+		basic_machine=$1
+		;;
+	-scout)
+		;;
+	-wrs)
+		os=-vxworks
+		basic_machine=$1
+		;;
+	-chorusos*)
+		os=-chorusos
+		basic_machine=$1
+		;;
+ 	-chorusrdb)
+ 		os=-chorusrdb
+		basic_machine=$1
+ 		;;
+	-hiux*)
+		os=-hiuxwe2
+		;;
+	-sco6)
+		os=-sco5v6
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco5)
+		os=-sco3.2v5
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco4)
+		os=-sco3.2v4
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2.[4-9]*)
+		os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2v[4-9]*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco5v6*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco*)
+		os=-sco3.2v2
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-udk*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-isc)
+		os=-isc2.2
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-clix*)
+		basic_machine=clipper-intergraph
+		;;
+	-isc*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-lynx*)
+		os=-lynxos
+		;;
+	-ptx*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
+		;;
+	-windowsnt*)
+		os=`echo $os | sed -e 's/windowsnt/winnt/'`
+		;;
+	-psos*)
+		os=-psos
+		;;
+	-mint | -mint[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+esac
+
+# Decode aliases for certain CPU-COMPANY combinations.
+case $basic_machine in
+	# Recognize the basic CPU types without company name.
+	# Some are omitted here because they have special meanings below.
+	1750a | 580 \
+	| a29k \
+	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
+	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
+	| am33_2.0 \
+	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
+	| bfin \
+	| c4x | clipper \
+	| d10v | d30v | dlx | dsp16xx \
+	| fido | fr30 | frv \
+	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+	| i370 | i860 | i960 | ia64 \
+	| ip2k | iq2000 \
+	| lm32 \
+	| m32c | m32r | m32rle | m68000 | m68k | m88k \
+	| maxq | mb | microblaze | mcore | mep | metag \
+	| mips | mipsbe | mipseb | mipsel | mipsle \
+	| mips16 \
+	| mips64 | mips64el \
+	| mips64octeon | mips64octeonel \
+	| mips64orion | mips64orionel \
+	| mips64r5900 | mips64r5900el \
+	| mips64vr | mips64vrel \
+	| mips64vr4100 | mips64vr4100el \
+	| mips64vr4300 | mips64vr4300el \
+	| mips64vr5000 | mips64vr5000el \
+	| mips64vr5900 | mips64vr5900el \
+	| mipsisa32 | mipsisa32el \
+	| mipsisa32r2 | mipsisa32r2el \
+	| mipsisa64 | mipsisa64el \
+	| mipsisa64r2 | mipsisa64r2el \
+	| mipsisa64sb1 | mipsisa64sb1el \
+	| mipsisa64sr71k | mipsisa64sr71kel \
+	| mipstx39 | mipstx39el \
+	| mn10200 | mn10300 \
+	| moxie \
+	| mt \
+	| msp430 \
+	| nios | nios2 \
+	| ns16k | ns32k \
+	| or32 \
+	| pdp10 | pdp11 | pj | pjl \
+	| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
+	| pyramid \
+	| score \
+	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
+	| sh64 | sh64le \
+	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
+	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
+	| spu | strongarm \
+	| tahoe | thumb | tic4x | tic80 | tron \
+	| v850 | v850e \
+	| we32k \
+	| x86 | xc16x | xscale | xscalee[bl] | xstormy16 | xtensa \
+	| z8k | z80)
+		basic_machine=$basic_machine-unknown
+		;;
+	m6811 | m68hc11 | m6812 | m68hc12)
+		# Motorola 68HC11/12.
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
+		;;
+	ms1)
+		basic_machine=mt-unknown
+		;;
+
+	# We use `pc' rather than `unknown'
+	# because (1) that's what they normally are, and
+	# (2) the word "unknown" tends to confuse beginning users.
+	i*86 | x86_64)
+	  basic_machine=$basic_machine-pc
+	  ;;
+	# Object if more than one company name word.
+	*-*-*)
+		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+		exit 1
+		;;
+	# Recognize the basic CPU types with company name.
+	580-* \
+	| a29k-* \
+	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
+	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
+	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
+	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
+	| avr-* | avr32-* \
+	| bfin-* | bs2000-* \
+	| c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
+	| clipper-* | craynv-* | cydra-* \
+	| d10v-* | d30v-* | dlx-* \
+	| elxsi-* \
+	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
+	| h8300-* | h8500-* \
+	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+	| i*86-* | i860-* | i960-* | ia64-* \
+	| ip2k-* | iq2000-* \
+	| lm32-* \
+	| m32c-* | m32r-* | m32rle-* \
+	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
+	| m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
+	| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
+	| mips16-* \
+	| mips64-* | mips64el-* \
+	| mips64octeon-* | mips64octeonel-* \
+	| mips64orion-* | mips64orionel-* \
+	| mips64r5900-* | mips64r5900el-* \
+	| mips64vr-* | mips64vrel-* \
+	| mips64vr4100-* | mips64vr4100el-* \
+	| mips64vr4300-* | mips64vr4300el-* \
+	| mips64vr5000-* | mips64vr5000el-* \
+	| mips64vr5900-* | mips64vr5900el-* \
+	| mipsisa32-* | mipsisa32el-* \
+	| mipsisa32r2-* | mipsisa32r2el-* \
+	| mipsisa64-* | mipsisa64el-* \
+	| mipsisa64r2-* | mipsisa64r2el-* \
+	| mipsisa64sb1-* | mipsisa64sb1el-* \
+	| mipsisa64sr71k-* | mipsisa64sr71kel-* \
+	| mipstx39-* | mipstx39el-* \
+	| mmix-* \
+	| mt-* \
+	| msp430-* \
+	| nios-* | nios2-* \
+	| none-* | np1-* | ns16k-* | ns32k-* \
+	| orion-* \
+	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
+	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
+	| pyramid-* \
+	| romp-* | rs6000-* \
+	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
+	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
+	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
+	| sparclite-* \
+	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | strongarm-* | sv1-* | sx?-* \
+	| tahoe-* | thumb-* \
+	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* | tile-* \
+	| tron-* \
+	| v850-* | v850e-* | vax-* \
+	| we32k-* \
+	| x86-* | x86_64-* | xc16x-* | xps100-* | xscale-* | xscalee[bl]-* \
+	| xstormy16-* | xtensa*-* \
+	| ymp-* \
+	| z8k-* | z80-*)
+		;;
+	# Recognize the basic CPU types without company name, with glob match.
+	xtensa*)
+		basic_machine=$basic_machine-unknown
+		;;
+	# Recognize the various machine names and aliases which stand
+	# for a CPU type and a company and sometimes even an OS.
+	386bsd)
+		basic_machine=i386-unknown
+		os=-bsd
+		;;
+	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
+		basic_machine=m68000-att
+		;;
+	3b*)
+		basic_machine=we32k-att
+		;;
+	a29khif)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+    	abacus)
+		basic_machine=abacus-unknown
+		;;
+	adobe68k)
+		basic_machine=m68010-adobe
+		os=-scout
+		;;
+	alliant | fx80)
+		basic_machine=fx80-alliant
+		;;
+	altos | altos3068)
+		basic_machine=m68k-altos
+		;;
+	am29k)
+		basic_machine=a29k-none
+		os=-bsd
+		;;
+	amd64)
+		basic_machine=x86_64-pc
+		;;
+	amd64-*)
+		basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	amdahl)
+		basic_machine=580-amdahl
+		os=-sysv
+		;;
+	amiga | amiga-*)
+		basic_machine=m68k-unknown
+		;;
+	amigaos | amigados)
+		basic_machine=m68k-unknown
+		os=-amigaos
+		;;
+	amigaunix | amix)
+		basic_machine=m68k-unknown
+		os=-sysv4
+		;;
+	apollo68)
+		basic_machine=m68k-apollo
+		os=-sysv
+		;;
+	apollo68bsd)
+		basic_machine=m68k-apollo
+		os=-bsd
+		;;
+	aros)
+		basic_machine=i386-pc
+		os=-aros
+		;;
+	aux)
+		basic_machine=m68k-apple
+		os=-aux
+		;;
+	balance)
+		basic_machine=ns32k-sequent
+		os=-dynix
+		;;
+	blackfin)
+		basic_machine=bfin-unknown
+		os=-linux
+		;;
+	blackfin-*)
+		basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	bluegene*)
+		basic_machine=powerpc-ibm
+		os=-cnk
+		;;
+	c90)
+		basic_machine=c90-cray
+		os=-unicos
+		;;
+        cegcc)
+		basic_machine=arm-unknown
+		os=-cegcc
+		;;
+	convex-c1)
+		basic_machine=c1-convex
+		os=-bsd
+		;;
+	convex-c2)
+		basic_machine=c2-convex
+		os=-bsd
+		;;
+	convex-c32)
+		basic_machine=c32-convex
+		os=-bsd
+		;;
+	convex-c34)
+		basic_machine=c34-convex
+		os=-bsd
+		;;
+	convex-c38)
+		basic_machine=c38-convex
+		os=-bsd
+		;;
+	cray | j90)
+		basic_machine=j90-cray
+		os=-unicos
+		;;
+	craynv)
+		basic_machine=craynv-cray
+		os=-unicosmp
+		;;
+	cr16)
+		basic_machine=cr16-unknown
+		os=-elf
+		;;
+	crds | unos)
+		basic_machine=m68k-crds
+		;;
+	crisv32 | crisv32-* | etraxfs*)
+		basic_machine=crisv32-axis
+		;;
+	cris | cris-* | etrax*)
+		basic_machine=cris-axis
+		;;
+	crx)
+		basic_machine=crx-unknown
+		os=-elf
+		;;
+	da30 | da30-*)
+		basic_machine=m68k-da30
+		;;
+	decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
+		basic_machine=mips-dec
+		;;
+	decsystem10* | dec10*)
+		basic_machine=pdp10-dec
+		os=-tops10
+		;;
+	decsystem20* | dec20*)
+		basic_machine=pdp10-dec
+		os=-tops20
+		;;
+	delta | 3300 | motorola-3300 | motorola-delta \
+	      | 3300-motorola | delta-motorola)
+		basic_machine=m68k-motorola
+		;;
+	delta88)
+		basic_machine=m88k-motorola
+		os=-sysv3
+		;;
+	dicos)
+		basic_machine=i686-pc
+		os=-dicos
+		;;
+	djgpp)
+		basic_machine=i586-pc
+		os=-msdosdjgpp
+		;;
+	dpx20 | dpx20-*)
+		basic_machine=rs6000-bull
+		os=-bosx
+		;;
+	dpx2* | dpx2*-bull)
+		basic_machine=m68k-bull
+		os=-sysv3
+		;;
+	ebmon29k)
+		basic_machine=a29k-amd
+		os=-ebmon
+		;;
+	elxsi)
+		basic_machine=elxsi-elxsi
+		os=-bsd
+		;;
+	encore | umax | mmax)
+		basic_machine=ns32k-encore
+		;;
+	es1800 | OSE68k | ose68k | ose | OSE)
+		basic_machine=m68k-ericsson
+		os=-ose
+		;;
+	fx2800)
+		basic_machine=i860-alliant
+		;;
+	genix)
+		basic_machine=ns32k-ns
+		;;
+	gmicro)
+		basic_machine=tron-gmicro
+		os=-sysv
+		;;
+	go32)
+		basic_machine=i386-pc
+		os=-go32
+		;;
+	h3050r* | hiux*)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	h8300hms)
+		basic_machine=h8300-hitachi
+		os=-hms
+		;;
+	h8300xray)
+		basic_machine=h8300-hitachi
+		os=-xray
+		;;
+	h8500hms)
+		basic_machine=h8500-hitachi
+		os=-hms
+		;;
+	harris)
+		basic_machine=m88k-harris
+		os=-sysv3
+		;;
+	hp300-*)
+		basic_machine=m68k-hp
+		;;
+	hp300bsd)
+		basic_machine=m68k-hp
+		os=-bsd
+		;;
+	hp300hpux)
+		basic_machine=m68k-hp
+		os=-hpux
+		;;
+	hp3k9[0-9][0-9] | hp9[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k2[0-9][0-9] | hp9k31[0-9])
+		basic_machine=m68000-hp
+		;;
+	hp9k3[2-9][0-9])
+		basic_machine=m68k-hp
+		;;
+	hp9k6[0-9][0-9] | hp6[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k7[0-79][0-9] | hp7[0-79][0-9])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k78[0-9] | hp78[0-9])
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][13679] | hp8[0-9][13679])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][0-9] | hp8[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hppa-next)
+		os=-nextstep3
+		;;
+	hppaosf)
+		basic_machine=hppa1.1-hp
+		os=-osf
+		;;
+	hppro)
+		basic_machine=hppa1.1-hp
+		os=-proelf
+		;;
+	i370-ibm* | ibm*)
+		basic_machine=i370-ibm
+		;;
+# I'm not sure what "Sysv32" means.  Should this be sysv3.2?
+	i*86v32)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv32
+		;;
+	i*86v4*)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv4
+		;;
+	i*86v)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv
+		;;
+	i*86sol2)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-solaris2
+		;;
+	i386mach)
+		basic_machine=i386-mach
+		os=-mach
+		;;
+	i386-vsta | vsta)
+		basic_machine=i386-unknown
+		os=-vsta
+		;;
+	iris | iris4d)
+		basic_machine=mips-sgi
+		case $os in
+		    -irix*)
+			;;
+		    *)
+			os=-irix4
+			;;
+		esac
+		;;
+	isi68 | isi)
+		basic_machine=m68k-isi
+		os=-sysv
+		;;
+	m68knommu)
+		basic_machine=m68k-unknown
+		os=-linux
+		;;
+	m68knommu-*)
+		basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	m88k-omron*)
+		basic_machine=m88k-omron
+		;;
+	magnum | m3230)
+		basic_machine=mips-mips
+		os=-sysv
+		;;
+	merlin)
+		basic_machine=ns32k-utek
+		os=-sysv
+		;;
+        microblaze)
+		basic_machine=microblaze-xilinx
+		;;
+	mingw32)
+		basic_machine=i386-pc
+		os=-mingw32
+		;;
+	mingw32ce)
+		basic_machine=arm-unknown
+		os=-mingw32ce
+		;;
+	miniframe)
+		basic_machine=m68000-convergent
+		;;
+	*mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+	mips3*-*)
+		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
+		;;
+	mips3*)
+		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
+		;;
+	monitor)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	morphos)
+		basic_machine=powerpc-unknown
+		os=-morphos
+		;;
+	msdos)
+		basic_machine=i386-pc
+		os=-msdos
+		;;
+	ms1-*)
+		basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
+		;;
+	mvs)
+		basic_machine=i370-ibm
+		os=-mvs
+		;;
+	ncr3000)
+		basic_machine=i486-ncr
+		os=-sysv4
+		;;
+	netbsd386)
+		basic_machine=i386-unknown
+		os=-netbsd
+		;;
+	netwinder)
+		basic_machine=armv4l-rebel
+		os=-linux
+		;;
+	news | news700 | news800 | news900)
+		basic_machine=m68k-sony
+		os=-newsos
+		;;
+	news1000)
+		basic_machine=m68030-sony
+		os=-newsos
+		;;
+	news-3600 | risc-news)
+		basic_machine=mips-sony
+		os=-newsos
+		;;
+	necv70)
+		basic_machine=v70-nec
+		os=-sysv
+		;;
+	next | m*-next )
+		basic_machine=m68k-next
+		case $os in
+		    -nextstep* )
+			;;
+		    -ns2*)
+		      os=-nextstep2
+			;;
+		    *)
+		      os=-nextstep3
+			;;
+		esac
+		;;
+	nh3000)
+		basic_machine=m68k-harris
+		os=-cxux
+		;;
+	nh[45]000)
+		basic_machine=m88k-harris
+		os=-cxux
+		;;
+	nindy960)
+		basic_machine=i960-intel
+		os=-nindy
+		;;
+	mon960)
+		basic_machine=i960-intel
+		os=-mon960
+		;;
+	nonstopux)
+		basic_machine=mips-compaq
+		os=-nonstopux
+		;;
+	np1)
+		basic_machine=np1-gould
+		;;
+	nsr-tandem)
+		basic_machine=nsr-tandem
+		;;
+	op50n-* | op60c-*)
+		basic_machine=hppa1.1-oki
+		os=-proelf
+		;;
+	openrisc | openrisc-*)
+		basic_machine=or32-unknown
+		;;
+	os400)
+		basic_machine=powerpc-ibm
+		os=-os400
+		;;
+	OSE68000 | ose68000)
+		basic_machine=m68000-ericsson
+		os=-ose
+		;;
+	os68k)
+		basic_machine=m68k-none
+		os=-os68k
+		;;
+	pa-hitachi)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	paragon)
+		basic_machine=i860-intel
+		os=-osf
+		;;
+	parisc)
+		basic_machine=hppa-unknown
+		os=-linux
+		;;
+	parisc-*)
+		basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=-linux
+		;;
+	pbd)
+		basic_machine=sparc-tti
+		;;
+	pbb)
+		basic_machine=m68k-tti
+		;;
+	pc532 | pc532-*)
+		basic_machine=ns32k-pc532
+		;;
+	pc98)
+		basic_machine=i386-pc
+		;;
+	pc98-*)
+		basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentium | p5 | k5 | k6 | nexgen | viac3)
+		basic_machine=i586-pc
+		;;
+	pentiumpro | p6 | 6x86 | athlon | athlon_*)
+		basic_machine=i686-pc
+		;;
+	pentiumii | pentium2 | pentiumiii | pentium3)
+		basic_machine=i686-pc
+		;;
+	pentium4)
+		basic_machine=i786-pc
+		;;
+	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
+		basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentiumpro-* | p6-* | 6x86-* | athlon-*)
+		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
+		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentium4-*)
+		basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pn)
+		basic_machine=pn-gould
+		;;
+	power)	basic_machine=power-ibm
+		;;
+	ppc)	basic_machine=powerpc-unknown
+		;;
+	ppc-*)	basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppcle | powerpclittle | ppc-le | powerpc-little)
+		basic_machine=powerpcle-unknown
+		;;
+	ppcle-* | powerpclittle-*)
+		basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppc64)	basic_machine=powerpc64-unknown
+		;;
+	ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppc64le | powerpc64little | ppc64-le | powerpc64-little)
+		basic_machine=powerpc64le-unknown
+		;;
+	ppc64le-* | powerpc64little-*)
+		basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ps2)
+		basic_machine=i386-ibm
+		;;
+	pw32)
+		basic_machine=i586-unknown
+		os=-pw32
+		;;
+	rdos)
+		basic_machine=i386-pc
+		os=-rdos
+		;;
+	rom68k)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	rm[46]00)
+		basic_machine=mips-siemens
+		;;
+	rtpc | rtpc-*)
+		basic_machine=romp-ibm
+		;;
+	s390 | s390-*)
+		basic_machine=s390-ibm
+		;;
+	s390x | s390x-*)
+		basic_machine=s390x-ibm
+		;;
+	sa29200)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	sb1)
+		basic_machine=mipsisa64sb1-unknown
+		;;
+	sb1el)
+		basic_machine=mipsisa64sb1el-unknown
+		;;
+	sde)
+		basic_machine=mipsisa32-sde
+		os=-elf
+		;;
+	sei)
+		basic_machine=mips-sei
+		os=-seiux
+		;;
+	sequent)
+		basic_machine=i386-sequent
+		;;
+	sh)
+		basic_machine=sh-hitachi
+		os=-hms
+		;;
+	sh5el)
+		basic_machine=sh5le-unknown
+		;;
+	sh64)
+		basic_machine=sh64-unknown
+		;;
+	sparclite-wrs | simso-wrs)
+		basic_machine=sparclite-wrs
+		os=-vxworks
+		;;
+	sps7)
+		basic_machine=m68k-bull
+		os=-sysv2
+		;;
+	spur)
+		basic_machine=spur-unknown
+		;;
+	st2000)
+		basic_machine=m68k-tandem
+		;;
+	stratus)
+		basic_machine=i860-stratus
+		os=-sysv4
+		;;
+	sun2)
+		basic_machine=m68000-sun
+		;;
+	sun2os3)
+		basic_machine=m68000-sun
+		os=-sunos3
+		;;
+	sun2os4)
+		basic_machine=m68000-sun
+		os=-sunos4
+		;;
+	sun3os3)
+		basic_machine=m68k-sun
+		os=-sunos3
+		;;
+	sun3os4)
+		basic_machine=m68k-sun
+		os=-sunos4
+		;;
+	sun4os3)
+		basic_machine=sparc-sun
+		os=-sunos3
+		;;
+	sun4os4)
+		basic_machine=sparc-sun
+		os=-sunos4
+		;;
+	sun4sol2)
+		basic_machine=sparc-sun
+		os=-solaris2
+		;;
+	sun3 | sun3-*)
+		basic_machine=m68k-sun
+		;;
+	sun4)
+		basic_machine=sparc-sun
+		;;
+	sun386 | sun386i | roadrunner)
+		basic_machine=i386-sun
+		;;
+	sv1)
+		basic_machine=sv1-cray
+		os=-unicos
+		;;
+	symmetry)
+		basic_machine=i386-sequent
+		os=-dynix
+		;;
+	t3e)
+		basic_machine=alphaev5-cray
+		os=-unicos
+		;;
+	t90)
+		basic_machine=t90-cray
+		os=-unicos
+		;;
+	tic54x | c54x*)
+		basic_machine=tic54x-unknown
+		os=-coff
+		;;
+	tic55x | c55x*)
+		basic_machine=tic55x-unknown
+		os=-coff
+		;;
+	tic6x | c6x*)
+		basic_machine=tic6x-unknown
+		os=-coff
+		;;
+	tile*)
+		basic_machine=tile-unknown
+		os=-linux-gnu
+		;;
+	tx39)
+		basic_machine=mipstx39-unknown
+		;;
+	tx39el)
+		basic_machine=mipstx39el-unknown
+		;;
+	toad1)
+		basic_machine=pdp10-xkl
+		os=-tops20
+		;;
+	tower | tower-32)
+		basic_machine=m68k-ncr
+		;;
+	tpf)
+		basic_machine=s390x-ibm
+		os=-tpf
+		;;
+	udi29k)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	ultra3)
+		basic_machine=a29k-nyu
+		os=-sym1
+		;;
+	v810 | necv810)
+		basic_machine=v810-nec
+		os=-none
+		;;
+	vaxv)
+		basic_machine=vax-dec
+		os=-sysv
+		;;
+	vms)
+		basic_machine=vax-dec
+		os=-vms
+		;;
+	vpp*|vx|vx-*)
+		basic_machine=f301-fujitsu
+		;;
+	vxworks960)
+		basic_machine=i960-wrs
+		os=-vxworks
+		;;
+	vxworks68)
+		basic_machine=m68k-wrs
+		os=-vxworks
+		;;
+	vxworks29k)
+		basic_machine=a29k-wrs
+		os=-vxworks
+		;;
+	w65*)
+		basic_machine=w65-wdc
+		os=-none
+		;;
+	w89k-*)
+		basic_machine=hppa1.1-winbond
+		os=-proelf
+		;;
+	xbox)
+		basic_machine=i686-pc
+		os=-mingw32
+		;;
+	xps | xps100)
+		basic_machine=xps100-honeywell
+		;;
+	ymp)
+		basic_machine=ymp-cray
+		os=-unicos
+		;;
+	z8k-*-coff)
+		basic_machine=z8k-unknown
+		os=-sim
+		;;
+	z80-*-coff)
+		basic_machine=z80-unknown
+		os=-sim
+		;;
+	none)
+		basic_machine=none-none
+		os=-none
+		;;
+
+# Here we handle the default manufacturer of certain CPU types.  It is in
+# some cases the only manufacturer, in others, it is the most popular.
+	w89k)
+		basic_machine=hppa1.1-winbond
+		;;
+	op50n)
+		basic_machine=hppa1.1-oki
+		;;
+	op60c)
+		basic_machine=hppa1.1-oki
+		;;
+	romp)
+		basic_machine=romp-ibm
+		;;
+	mmix)
+		basic_machine=mmix-knuth
+		;;
+	rs6000)
+		basic_machine=rs6000-ibm
+		;;
+	vax)
+		basic_machine=vax-dec
+		;;
+	pdp10)
+		# there are many clones, so DEC is not a safe bet
+		basic_machine=pdp10-unknown
+		;;
+	pdp11)
+		basic_machine=pdp11-dec
+		;;
+	we32k)
+		basic_machine=we32k-att
+		;;
+	sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
+		basic_machine=sh-unknown
+		;;
+	sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
+		basic_machine=sparc-sun
+		;;
+	cydra)
+		basic_machine=cydra-cydrome
+		;;
+	orion)
+		basic_machine=orion-highlevel
+		;;
+	orion105)
+		basic_machine=clipper-highlevel
+		;;
+	mac | mpw | mac-mpw)
+		basic_machine=m68k-apple
+		;;
+	pmac | pmac-mpw)
+		basic_machine=powerpc-apple
+		;;
+	*-unknown)
+		# Make sure to match an already-canonicalized machine name.
+		;;
+	*)
+		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+		exit 1
+		;;
+esac
+
+# Here we canonicalize certain aliases for manufacturers.
+case $basic_machine in
+	*-digital*)
+		basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
+		;;
+	*-commodore*)
+		basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
+		;;
+	*)
+		;;
+esac
+
+# Decode manufacturer-specific aliases for certain operating systems.
+
+if [ x"$os" != x"" ]
+then
+case $os in
+        # First match some system type aliases
+        # that might get confused with valid system types.
+	# -solaris* is a basic system type, with this one exception.
+	-solaris1 | -solaris1.*)
+		os=`echo $os | sed -e 's|solaris1|sunos4|'`
+		;;
+	-auroraux)
+		os=-auroraux
+		;;
+	-solaris)
+		os=-solaris2
+		;;
+	-svr4*)
+		os=-sysv4
+		;;
+	-unixware*)
+		os=-sysv4.2uw
+		;;
+	-gnu/linux*)
+		os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+		;;
+	# First accept the basic system types.
+	# The portable systems comes first.
+	# Each alternative MUST END IN A *, to match a version number.
+	# -sysv* is not here because it comes later, after sysvr4.
+	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
+	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
+	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* | -sym* \
+	      | -kopensolaris* \
+	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
+	      | -aos* | -aros* \
+	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
+	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
+	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
+	      | -openbsd* | -solidbsd* \
+	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
+	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
+	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
+	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
+	      | -chorusos* | -chorusrdb* | -cegcc* \
+	      | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+	      | -mingw32* | -linux-gnu* | -linux-newlib* | -linux-uclibc* \
+	      | -uxpv* | -beos* | -mpeix* | -udk* \
+	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
+	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
+	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
+	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
+	      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
+	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
+	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops*)
+	# Remember, each alternative MUST END IN *, to match a version number.
+		;;
+	-qnx*)
+		case $basic_machine in
+		    x86-* | i*86-*)
+			;;
+		    *)
+			os=-nto$os
+			;;
+		esac
+		;;
+	-nto-qnx*)
+		;;
+	-nto*)
+		os=`echo $os | sed -e 's|nto|nto-qnx|'`
+		;;
+	-sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
+	      | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
+	      | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+		;;
+	-mac*)
+		os=`echo $os | sed -e 's|mac|macos|'`
+		;;
+	-linux-dietlibc)
+		os=-linux-dietlibc
+		;;
+	-linux*)
+		os=`echo $os | sed -e 's|linux|linux-gnu|'`
+		;;
+	-sunos5*)
+		os=`echo $os | sed -e 's|sunos5|solaris2|'`
+		;;
+	-sunos6*)
+		os=`echo $os | sed -e 's|sunos6|solaris3|'`
+		;;
+	-opened*)
+		os=-openedition
+		;;
+        -os400*)
+		os=-os400
+		;;
+	-wince*)
+		os=-wince
+		;;
+	-osfrose*)
+		os=-osfrose
+		;;
+	-osf*)
+		os=-osf
+		;;
+	-utek*)
+		os=-bsd
+		;;
+	-dynix*)
+		os=-bsd
+		;;
+	-acis*)
+		os=-aos
+		;;
+	-atheos*)
+		os=-atheos
+		;;
+	-syllable*)
+		os=-syllable
+		;;
+	-386bsd)
+		os=-bsd
+		;;
+	-ctix* | -uts*)
+		os=-sysv
+		;;
+	-nova*)
+		os=-rtmk-nova
+		;;
+	-ns2 )
+		os=-nextstep2
+		;;
+	-nsk*)
+		os=-nsk
+		;;
+	# Preserve the version number of sinix5.
+	-sinix5.*)
+		os=`echo $os | sed -e 's|sinix|sysv|'`
+		;;
+	-sinix*)
+		os=-sysv4
+		;;
+        -tpf*)
+		os=-tpf
+		;;
+	-triton*)
+		os=-sysv3
+		;;
+	-oss*)
+		os=-sysv3
+		;;
+	-svr4)
+		os=-sysv4
+		;;
+	-svr3)
+		os=-sysv3
+		;;
+	-sysvr4)
+		os=-sysv4
+		;;
+	# This must come after -sysvr4.
+	-sysv*)
+		;;
+	-ose*)
+		os=-ose
+		;;
+	-es1800*)
+		os=-ose
+		;;
+	-xenix)
+		os=-xenix
+		;;
+	-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+		os=-mint
+		;;
+	-aros*)
+		os=-aros
+		;;
+	-kaos*)
+		os=-kaos
+		;;
+	-zvmoe)
+		os=-zvmoe
+		;;
+	-dicos*)
+		os=-dicos
+		;;
+	-none)
+		;;
+	*)
+		# Get rid of the `-' at the beginning of $os.
+		os=`echo $os | sed 's/[^-]*-//'`
+		echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
+		exit 1
+		;;
+esac
+else
+
+# Here we handle the default operating systems that come with various machines.
+# The value should be what the vendor currently ships out the door with their
+# machine or put another way, the most popular os provided with the machine.
+
+# Note that if you're going to try to match "-MANUFACTURER" here (say,
+# "-sun"), then you have to tell the case statement up towards the top
+# that MANUFACTURER isn't an operating system.  Otherwise, code above
+# will signal an error saying that MANUFACTURER isn't an operating
+# system, and we'll never get to this point.
+
+case $basic_machine in
+        score-*)
+		os=-elf
+		;;
+        spu-*)
+		os=-elf
+		;;
+	*-acorn)
+		os=-riscix1.2
+		;;
+	arm*-rebel)
+		os=-linux
+		;;
+	arm*-semi)
+		os=-aout
+		;;
+        c4x-* | tic4x-*)
+        	os=-coff
+		;;
+	# This must come before the *-dec entry.
+	pdp10-*)
+		os=-tops20
+		;;
+	pdp11-*)
+		os=-none
+		;;
+	*-dec | vax-*)
+		os=-ultrix4.2
+		;;
+	m68*-apollo)
+		os=-domain
+		;;
+	i386-sun)
+		os=-sunos4.0.2
+		;;
+	m68000-sun)
+		os=-sunos3
+		# This also exists in the configure program, but was not the
+		# default.
+		# os=-sunos4
+		;;
+	m68*-cisco)
+		os=-aout
+		;;
+        mep-*)
+		os=-elf
+		;;
+	mips*-cisco)
+		os=-elf
+		;;
+	mips*-*)
+		os=-elf
+		;;
+	or32-*)
+		os=-coff
+		;;
+	*-tti)	# must be before sparc entry or we get the wrong os.
+		os=-sysv3
+		;;
+	sparc-* | *-sun)
+		os=-sunos4.1.1
+		;;
+	*-be)
+		os=-beos
+		;;
+	*-haiku)
+		os=-haiku
+		;;
+	*-ibm)
+		os=-aix
+		;;
+    	*-knuth)
+		os=-mmixware
+		;;
+	*-wec)
+		os=-proelf
+		;;
+	*-winbond)
+		os=-proelf
+		;;
+	*-oki)
+		os=-proelf
+		;;
+	*-hp)
+		os=-hpux
+		;;
+	*-hitachi)
+		os=-hiux
+		;;
+	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
+		os=-sysv
+		;;
+	*-cbm)
+		os=-amigaos
+		;;
+	*-dg)
+		os=-dgux
+		;;
+	*-dolphin)
+		os=-sysv3
+		;;
+	m68k-ccur)
+		os=-rtu
+		;;
+	m88k-omron*)
+		os=-luna
+		;;
+	*-next )
+		os=-nextstep
+		;;
+	*-sequent)
+		os=-ptx
+		;;
+	*-crds)
+		os=-unos
+		;;
+	*-ns)
+		os=-genix
+		;;
+	i370-*)
+		os=-mvs
+		;;
+	*-next)
+		os=-nextstep3
+		;;
+	*-gould)
+		os=-sysv
+		;;
+	*-highlevel)
+		os=-bsd
+		;;
+	*-encore)
+		os=-bsd
+		;;
+	*-sgi)
+		os=-irix
+		;;
+	*-siemens)
+		os=-sysv4
+		;;
+	*-masscomp)
+		os=-rtu
+		;;
+	f30[01]-fujitsu | f700-fujitsu)
+		os=-uxpv
+		;;
+	*-rom68k)
+		os=-coff
+		;;
+	*-*bug)
+		os=-coff
+		;;
+	*-apple)
+		os=-macos
+		;;
+	*-atari*)
+		os=-mint
+		;;
+	*)
+		os=-none
+		;;
+esac
+fi
+
+# Here we handle the case where we know the os, and the CPU type, but not the
+# manufacturer.  We pick the logical manufacturer.
+vendor=unknown
+case $basic_machine in
+	*-unknown)
+		case $os in
+			-riscix*)
+				vendor=acorn
+				;;
+			-sunos*)
+				vendor=sun
+				;;
+			-cnk*|-aix*)
+				vendor=ibm
+				;;
+			-beos*)
+				vendor=be
+				;;
+			-hpux*)
+				vendor=hp
+				;;
+			-mpeix*)
+				vendor=hp
+				;;
+			-hiux*)
+				vendor=hitachi
+				;;
+			-unos*)
+				vendor=crds
+				;;
+			-dgux*)
+				vendor=dg
+				;;
+			-luna*)
+				vendor=omron
+				;;
+			-genix*)
+				vendor=ns
+				;;
+			-mvs* | -opened*)
+				vendor=ibm
+				;;
+			-os400*)
+				vendor=ibm
+				;;
+			-ptx*)
+				vendor=sequent
+				;;
+			-tpf*)
+				vendor=ibm
+				;;
+			-vxsim* | -vxworks* | -windiss*)
+				vendor=wrs
+				;;
+			-aux*)
+				vendor=apple
+				;;
+			-hms*)
+				vendor=hitachi
+				;;
+			-mpw* | -macos*)
+				vendor=apple
+				;;
+			-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+				vendor=atari
+				;;
+			-vos*)
+				vendor=stratus
+				;;
+		esac
+		basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
+		;;
+esac
+
+echo $basic_machine$os
+exit
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/final/autoconf/configure.ac b/final/autoconf/configure.ac
new file mode 100644
index 00000000000..27d9bd4949f
--- /dev/null
+++ b/final/autoconf/configure.ac
@@ -0,0 +1,1724 @@
+dnl === configure.ac --------------------------------------------------------===
+dnl                     The LLVM Compiler Infrastructure
+dnl
+dnl This file is distributed under the University of Illinois Open Source
+dnl License. See LICENSE.TXT for details.
+dnl
+dnl===-----------------------------------------------------------------------===
+dnl This is the LLVM configuration script. It is processed by the autoconf
+dnl program to produce a script named configure. This script contains the
+dnl configuration checks that LLVM needs in order to support multiple platforms.
+dnl This file is composed of 10 sections per the recommended organization of
+dnl autoconf input defined in the autoconf documentation. As this file evolves,
+dnl please keep the various types of checks within their sections. The sections
+dnl are as follows:
+dnl
+dnl SECTION 1: Initialization & Setup
+dnl SECTION 2: Architecture, target, and host checks
+dnl SECTION 3: Command line arguments for the configure script.
+dnl SECTION 4: Check for programs we need and that they are the right version
+dnl SECTION 5: Check for libraries
+dnl SECTION 6: Check for header files
+dnl SECTION 7: Check for types and structures
+dnl SECTION 8: Check for specific functions needed
+dnl SECTION 9: Additional checks, variables, etc.
+dnl SECTION 10: Specify the output files and generate it
+dnl
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 1: Initialization & Setup
+dnl===
+dnl===-----------------------------------------------------------------------===
+dnl Initialize autoconf and define the package name, version number and
+dnl email address for reporting bugs.
+AC_INIT([[llvm]],[[2.9]],[llvmbugs@cs.uiuc.edu])
+
+dnl Provide a copyright substitution and ensure the copyright notice is included
+dnl in the output of --version option of the generated configure script.
+AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2011 University of Illinois at Urbana-Champaign."])
+AC_COPYRIGHT([Copyright (c) 2003-2011 University of Illinois at Urbana-Champaign.])
+
+dnl Indicate that we require autoconf 2.59 or later. Ths is needed because we
+dnl use some autoconf macros only available in 2.59.
+AC_PREREQ(2.59)
+
+dnl Verify that the source directory is valid. This makes sure that we are
+dnl configuring LLVM and not some other package (it validates --srcdir argument)
+AC_CONFIG_SRCDIR([lib/VMCore/Module.cpp])
+
+dnl Place all of the extra autoconf files into the config subdirectory. Tell
+dnl various tools where the m4 autoconf macros are.
+AC_CONFIG_AUX_DIR([autoconf])
+
+dnl Quit if the source directory has already been configured.
+dnl NOTE: This relies upon undocumented autoconf behavior.
+if test ${srcdir} != "." ; then
+  if test -f ${srcdir}/include/llvm/Config/config.h ; then
+    AC_MSG_ERROR([Already configured in ${srcdir}])
+  fi
+fi
+
+dnl Configure all of the projects present in our source tree. While we could
+dnl just AC_CONFIG_SUBDIRS on the set of directories in projects that have a
+dnl configure script, that usage of the AC_CONFIG_SUBDIRS macro is deprecated.
+dnl Instead we match on the known projects.
+
+dnl
+dnl One tricky part of doing this is that some projects depend upon other
+dnl projects.  For example, several projects rely upon the LLVM test suite.
+dnl We want to configure those projects first so that their object trees are
+dnl created before running the configure scripts of projects that depend upon
+dnl them.
+dnl
+
+dnl Several projects use llvm-gcc, so configure that first
+if test -d ${srcdir}/projects/llvm-gcc ; then
+  AC_CONFIG_SUBDIRS([projects/llvm-gcc])
+fi
+
+dnl Several projects use the LLVM test suite, so configure it next.
+if test -d ${srcdir}/projects/test-suite ; then
+  AC_CONFIG_SUBDIRS([projects/test-suite])
+fi
+
+dnl llvm-test is the old name of the test-suite, kept here for backwards
+dnl compatibility
+if test -d ${srcdir}/projects/llvm-test ; then
+  AC_CONFIG_SUBDIRS([projects/llvm-test])
+fi
+
+dnl Some projects use poolalloc; configure that next
+if test -d ${srcdir}/projects/poolalloc ; then
+  AC_CONFIG_SUBDIRS([projects/poolalloc])
+fi
+
+if test -d ${srcdir}/projects/llvm-poolalloc ; then
+  AC_CONFIG_SUBDIRS([projects/llvm-poolalloc])
+fi
+
+dnl Check for all other projects
+for i in `ls ${srcdir}/projects`
+do
+  if test -d ${srcdir}/projects/${i} ; then
+    case ${i} in
+      sample)       AC_CONFIG_SUBDIRS([projects/sample])    ;;
+      privbracket)  AC_CONFIG_SUBDIRS([projects/privbracket]) ;;
+      llvm-stacker) AC_CONFIG_SUBDIRS([projects/llvm-stacker]) ;;
+      llvm-reopt)   AC_CONFIG_SUBDIRS([projects/llvm-reopt]);;
+      llvm-java)    AC_CONFIG_SUBDIRS([projects/llvm-java]) ;;
+      llvm-tv)      AC_CONFIG_SUBDIRS([projects/llvm-tv])   ;;
+      safecode)     AC_CONFIG_SUBDIRS([projects/safecode]) ;;
+      llvm-kernel)  AC_CONFIG_SUBDIRS([projects/llvm-kernel]) ;;
+      llvm-gcc)       ;;
+      test-suite)     ;;
+      llvm-test)      ;;
+      poolalloc)      ;;
+      llvm-poolalloc) ;;
+      *)
+        AC_MSG_WARN([Unknown project (${i}) won't be configured automatically])
+        ;;
+    esac
+  fi
+done
+
+dnl Disable the build of polly, even if it is checked out into tools/polly.
+AC_ARG_ENABLE(polly,
+              AS_HELP_STRING([--enable-polly],
+                             [Use polly if available (default is YES)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_POLLY,[1]) ;;
+  no)  AC_SUBST(ENABLE_POLLY,[0]) ;;
+  default) AC_SUBST(ENABLE_POLLY,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-polly. Use "yes" or "no"]) ;;
+esac
+
+
+dnl Check if polly is checked out into tools/polly and configure it if
+dnl available.
+if (test -d ${srcdir}/tools/polly) && (test $ENABLE_POLLY -eq 1) ; then
+  AC_SUBST(LLVM_HAS_POLLY,1)
+  AC_CONFIG_SUBDIRS([tools/polly])
+fi
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 2: Architecture, target, and host checks
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+dnl Check the target for which we're compiling and the host that will do the
+dnl compilations. This will tell us which LLVM compiler will be used for
+dnl compiling SSA into object code. This needs to be done early because
+dnl following tests depend on it.
+AC_CANONICAL_TARGET
+
+dnl Determine the platform type and cache its value. This helps us configure
+dnl the System library to the correct build platform.
+AC_CACHE_CHECK([type of operating system we're going to host on],
+               [llvm_cv_os_type],
+[case $host in
+  *-*-aix*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="AIX"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-irix*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="IRIX"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-cygwin*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Cygwin"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-darwin*)
+    llvm_cv_link_all_option="-Wl,-all_load"
+    llvm_cv_no_link_all_option="-Wl,-noall_load"
+    llvm_cv_os_type="Darwin"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-minix*)
+    llvm_cv_link_all_option="-Wl,-all_load"
+    llvm_cv_no_link_all_option="-Wl,-noall_load"
+    llvm_cv_os_type="Minix"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-freebsd*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="FreeBSD"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-openbsd*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="OpenBSD"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-netbsd*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="NetBSD"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-dragonfly*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="DragonFly"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-hpux*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="HP-UX"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-interix*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Interix"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-linux*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Linux"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-solaris*)
+    llvm_cv_link_all_option="-Wl,-z,allextract"
+    llvm_cv_no_link_all_option="-Wl,-z,defaultextract"
+    llvm_cv_os_type="SunOS"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-auroraux*)
+    llvm_cv_link_all_option="-Wl,-z,allextract"
+    llvm_cv_link_all_option="-Wl,-z,defaultextract"
+    llvm_cv_os_type="AuroraUX"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-win32*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Win32"
+    llvm_cv_platform_type="Win32" ;;
+  *-*-mingw*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="MingW"
+    llvm_cv_platform_type="Win32" ;;
+  *-*-haiku*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Haiku"
+    llvm_cv_platform_type="Unix" ;;
+  *-unknown-eabi*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Freestanding"
+    llvm_cv_platform_type="Unix" ;;
+  *-unknown-elf*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Freestanding"
+    llvm_cv_platform_type="Unix" ;;
+  *)
+    llvm_cv_link_all_option=""
+    llvm_cv_no_link_all_option=""
+    llvm_cv_os_type="Unknown"
+    llvm_cv_platform_type="Unknown" ;;
+esac])
+
+AC_CACHE_CHECK([type of operating system we're going to target],
+               [llvm_cv_target_os_type],
+[case $target in
+  *-*-aix*)
+    llvm_cv_target_os_type="AIX" ;;
+  *-*-irix*)
+    llvm_cv_target_os_type="IRIX" ;;
+  *-*-cygwin*)
+    llvm_cv_target_os_type="Cygwin" ;;
+  *-*-darwin*)
+    llvm_cv_target_os_type="Darwin" ;;
+  *-*-minix*)
+    llvm_cv_target_os_type="Minix" ;;
+  *-*-freebsd*)
+    llvm_cv_target_os_type="FreeBSD" ;;
+  *-*-openbsd*)
+    llvm_cv_target_os_type="OpenBSD" ;;
+  *-*-netbsd*)
+    llvm_cv_target_os_type="NetBSD" ;;
+  *-*-dragonfly*)
+    llvm_cv_target_os_type="DragonFly" ;;
+  *-*-hpux*)
+    llvm_cv_target_os_type="HP-UX" ;;
+  *-*-interix*)
+    llvm_cv_target_os_type="Interix" ;;
+  *-*-linux*)
+    llvm_cv_target_os_type="Linux" ;;
+  *-*-solaris*)
+    llvm_cv_target_os_type="SunOS" ;;
+  *-*-auroraux*)
+    llvm_cv_target_os_type="AuroraUX" ;;
+  *-*-win32*)
+    llvm_cv_target_os_type="Win32" ;;
+  *-*-mingw*)
+    llvm_cv_target_os_type="MingW" ;;
+  *-*-haiku*)
+    llvm_cv_target_os_type="Haiku" ;;
+  *-unknown-eabi*)
+    llvm_cv_target_os_type="Freestanding" ;;
+  *)
+    llvm_cv_target_os_type="Unknown" ;;
+esac])
+
+dnl Make sure we aren't attempting to configure for an unknown system
+if test "$llvm_cv_os_type" = "Unknown" ; then
+  AC_MSG_ERROR([Operating system is unknown, configure can't continue])
+fi
+
+dnl Set the "OS" Makefile variable based on the platform type so the
+dnl makefile can configure itself to specific build hosts
+AC_SUBST(OS,$llvm_cv_os_type)
+AC_SUBST(HOST_OS,$llvm_cv_os_type)
+AC_SUBST(TARGET_OS,$llvm_cv_target_os_type)
+
+dnl Set the LINKALL and NOLINKALL Makefile variables based on the platform
+AC_SUBST(LINKALL,$llvm_cv_link_all_option)
+AC_SUBST(NOLINKALL,$llvm_cv_no_link_all_option)
+
+dnl Set the "LLVM_ON_*" variables based on llvm_cv_platform_type
+dnl This is used by lib/Support to determine the basic kind of implementation
+dnl to use.
+case $llvm_cv_platform_type in
+  Unix)
+    AC_DEFINE([LLVM_ON_UNIX],[1],[Define if this is Unixish platform])
+    AC_SUBST(LLVM_ON_UNIX,[1])
+    AC_SUBST(LLVM_ON_WIN32,[0])
+    ;;
+  Win32)
+    AC_DEFINE([LLVM_ON_WIN32],[1],[Define if this is Win32ish platform])
+    AC_SUBST(LLVM_ON_UNIX,[0])
+    AC_SUBST(LLVM_ON_WIN32,[1])
+    ;;
+esac
+
+dnl Determine what our target architecture is and configure accordingly.
+dnl This will allow Makefiles to make a distinction between the hardware and
+dnl the OS.
+AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
+[case $target in
+  i?86-*)                 llvm_cv_target_arch="x86" ;;
+  amd64-* | x86_64-*)     llvm_cv_target_arch="x86_64" ;;
+  sparc*-*)               llvm_cv_target_arch="Sparc" ;;
+  powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
+  alpha*-*)               llvm_cv_target_arch="Alpha" ;;
+  arm*-*)                 llvm_cv_target_arch="ARM" ;;
+  mips-*)                 llvm_cv_target_arch="Mips" ;;
+  xcore-*)                llvm_cv_target_arch="XCore" ;;
+  msp430-*)               llvm_cv_target_arch="MSP430" ;;
+  s390x-*)                llvm_cv_target_arch="SystemZ" ;;
+  bfin-*)                 llvm_cv_target_arch="Blackfin" ;;
+  mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
+  ptx-*)                  llvm_cv_target_arch="PTX" ;;
+  *)                      llvm_cv_target_arch="Unknown" ;;
+esac])
+
+if test "$llvm_cv_target_arch" = "Unknown" ; then
+  AC_MSG_WARN([Configuring LLVM for an unknown target archicture])
+fi
+
+# Determine the LLVM native architecture for the target
+case "$llvm_cv_target_arch" in
+    x86)     LLVM_NATIVE_ARCH="X86" ;;
+    x86_64)  LLVM_NATIVE_ARCH="X86" ;;
+    *)       LLVM_NATIVE_ARCH="$llvm_cv_target_arch" ;;
+esac
+
+dnl Define a substitution, ARCH, for the target architecture
+AC_SUBST(ARCH,$llvm_cv_target_arch)
+
+dnl Check for the endianness of the target
+AC_C_BIGENDIAN(AC_SUBST([ENDIAN],[big]),AC_SUBST([ENDIAN],[little]))
+
+dnl Check for build platform executable suffix if we're crosscompiling
+if test "$cross_compiling" = yes; then
+  AC_SUBST(LLVM_CROSS_COMPILING, [1])
+  AC_BUILD_EXEEXT
+  ac_build_prefix=${build_alias}-
+  AC_CHECK_PROG(BUILD_CXX, ${ac_build_prefix}g++, ${ac_build_prefix}g++)
+  if test -z "$BUILD_CXX"; then
+     AC_CHECK_PROG(BUILD_CXX, g++, g++)
+     if test -z "$BUILD_CXX"; then
+       AC_CHECK_PROG(BUILD_CXX, c++, c++, , , /usr/ucb/c++)
+     fi
+  fi
+else
+  AC_SUBST(LLVM_CROSS_COMPILING, [0])
+fi
+
+dnl Check to see if there's a .svn or .git directory indicating that this
+dnl build is being done from a checkout. This sets up several defaults for
+dnl the command line switches. When we build with a checkout directory,
+dnl we get a debug with assertions turned on. Without, we assume a source
+dnl release and we get an optimized build without assertions.
+dnl See --enable-optimized and --enable-assertions below
+if test -d ".svn" -o -d "${srcdir}/.svn" -o -d ".git" -o -d "${srcdir}/.git"; then
+  cvsbuild="yes"
+  optimize="no"
+  AC_SUBST(CVSBUILD,[[CVSBUILD=1]])
+else
+  cvsbuild="no"
+  optimize="yes"
+fi
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 3: Command line arguments for the configure script.
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+dnl --enable-optimized : check whether they want to do an optimized build:
+AC_ARG_ENABLE(optimized, AS_HELP_STRING(
+ --enable-optimized,[Compile with optimizations enabled (default is YES)]),,enableval=$optimize)
+if test ${enableval} = "no" ; then
+  AC_SUBST(ENABLE_OPTIMIZED,[[]])
+else
+  AC_SUBST(ENABLE_OPTIMIZED,[[ENABLE_OPTIMIZED=1]])
+fi
+
+dnl --enable-profiling : check whether they want to do a profile build:
+AC_ARG_ENABLE(profiling, AS_HELP_STRING(
+ --enable-profiling,[Compile with profiling enabled (default is NO)]),,enableval="no")
+if test ${enableval} = "no" ; then
+  AC_SUBST(ENABLE_PROFILING,[[]])
+else
+  AC_SUBST(ENABLE_PROFILING,[[ENABLE_PROFILING=1]])
+fi
+
+dnl --enable-assertions : check whether they want to turn on assertions or not:
+AC_ARG_ENABLE(assertions,AS_HELP_STRING(
+  --enable-assertions,[Compile with assertion checks enabled (default is NO)]),, enableval="no")
+if test ${enableval} = "yes" ; then
+  AC_SUBST(DISABLE_ASSERTIONS,[[]])
+else
+  AC_SUBST(DISABLE_ASSERTIONS,[[DISABLE_ASSERTIONS=1]])
+fi
+
+dnl --enable-expensive-checks : check whether they want to turn on expensive debug checks:
+AC_ARG_ENABLE(expensive-checks,AS_HELP_STRING(
+  --enable-expensive-checks,[Compile with expensive debug checks enabled (default is NO)]),, enableval="no")
+if test ${enableval} = "yes" ; then
+  AC_SUBST(ENABLE_EXPENSIVE_CHECKS,[[ENABLE_EXPENSIVE_CHECKS=1]])
+  AC_SUBST(EXPENSIVE_CHECKS,[[yes]])
+else
+  AC_SUBST(ENABLE_EXPENSIVE_CHECKS,[[]])
+  AC_SUBST(EXPENSIVE_CHECKS,[[no]])
+fi
+
+dnl --enable-debug-runtime : should runtime libraries have debug symbols?
+AC_ARG_ENABLE(debug-runtime,
+   AS_HELP_STRING(--enable-debug-runtime,[Build runtime libs with debug symbols (default is NO)]),,enableval=no)
+if test ${enableval} = "no" ; then
+  AC_SUBST(DEBUG_RUNTIME,[[]])
+else
+  AC_SUBST(DEBUG_RUNTIME,[[DEBUG_RUNTIME=1]])
+fi
+
+dnl --enable-debug-symbols : should even optimized compiler libraries
+dnl have debug symbols?
+AC_ARG_ENABLE(debug-symbols,
+   AS_HELP_STRING(--enable-debug-symbols,[Build compiler with debug symbols (default is NO if optimization is on and YES if it's off)]),,enableval=no)
+if test ${enableval} = "no" ; then
+  AC_SUBST(DEBUG_SYMBOLS,[[]])
+else
+  AC_SUBST(DEBUG_SYMBOLS,[[DEBUG_SYMBOLS=1]])
+fi
+
+dnl --enable-jit: check whether they want to enable the jit
+AC_ARG_ENABLE(jit,
+  AS_HELP_STRING(--enable-jit,
+                 [Enable Just In Time Compiling (default is YES)]),,
+  enableval=default)
+if test ${enableval} = "no"
+then
+  AC_SUBST(JIT,[[]])
+else
+  case "$llvm_cv_target_arch" in
+    x86)         AC_SUBST(TARGET_HAS_JIT,1) ;;
+    Sparc)       AC_SUBST(TARGET_HAS_JIT,0) ;;
+    PowerPC)     AC_SUBST(TARGET_HAS_JIT,1) ;;
+    x86_64)      AC_SUBST(TARGET_HAS_JIT,1) ;;
+    Alpha)       AC_SUBST(TARGET_HAS_JIT,0) ;;
+    ARM)         AC_SUBST(TARGET_HAS_JIT,1) ;;
+    Mips)        AC_SUBST(TARGET_HAS_JIT,0) ;;
+    XCore)       AC_SUBST(TARGET_HAS_JIT,0) ;;
+    MSP430)      AC_SUBST(TARGET_HAS_JIT,0) ;;
+    SystemZ)     AC_SUBST(TARGET_HAS_JIT,0) ;;
+    Blackfin)    AC_SUBST(TARGET_HAS_JIT,0) ;;
+    MBlaze)      AC_SUBST(TARGET_HAS_JIT,0) ;;
+    PTX)         AC_SUBST(TARGET_HAS_JIT,0) ;;
+    *)           AC_SUBST(TARGET_HAS_JIT,0) ;;
+  esac
+fi
+
+dnl Allow enablement of building and installing docs
+AC_ARG_ENABLE(docs,
+              AS_HELP_STRING([--enable-docs],
+                             [Build documents (default is YES)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_DOCS,[1]) ;;
+  no)  AC_SUBST(ENABLE_DOCS,[0]) ;;
+  default) AC_SUBST(ENABLE_DOCS,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-docs. Use "yes" or "no"]) ;;
+esac
+
+dnl Allow enablement of doxygen generated documentation
+AC_ARG_ENABLE(doxygen,
+              AS_HELP_STRING([--enable-doxygen],
+                             [Build doxygen documentation (default is NO)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_DOXYGEN,[1]) ;;
+  no)  AC_SUBST(ENABLE_DOXYGEN,[0]) ;;
+  default) AC_SUBST(ENABLE_DOXYGEN,[0]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-doxygen. Use "yes" or "no"]) ;;
+esac
+
+dnl Allow disablement of threads
+AC_ARG_ENABLE(threads,
+              AS_HELP_STRING([--enable-threads],
+                             [Use threads if available (default is YES)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_THREADS,[1]) ;;
+  no)  AC_SUBST(ENABLE_THREADS,[0]) ;;
+  default) AC_SUBST(ENABLE_THREADS,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-threads. Use "yes" or "no"]) ;;
+esac
+AC_DEFINE_UNQUOTED([ENABLE_THREADS],$ENABLE_THREADS,[Define if threads enabled])
+
+dnl Allow disablement of pthread.h
+AC_ARG_ENABLE(pthreads,
+              AS_HELP_STRING([--enable-pthreads],
+                             [Use pthreads if available (default is YES)]),,
+                             enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_PTHREADS,[1]) ;;
+  no)  AC_SUBST(ENABLE_PTHREADS,[0]) ;;
+  default) AC_SUBST(ENABLE_PTHREADS,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-pthreads. Use "yes" or "no"]) ;;
+esac
+
+dnl Allow building without position independent code
+AC_ARG_ENABLE(pic,
+  AS_HELP_STRING([--enable-pic],
+                 [Build LLVM with Position Independent Code (default is YES)]),,
+                 enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_PIC,[1]) ;;
+  no)  AC_SUBST(ENABLE_PIC,[0]) ;;
+  default) AC_SUBST(ENABLE_PIC,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-pic. Use "yes" or "no"]) ;;
+esac
+AC_DEFINE_UNQUOTED([ENABLE_PIC],$ENABLE_PIC,
+                   [Define if position independent code is enabled])
+
+dnl Allow building a shared library and linking tools against it.
+AC_ARG_ENABLE(shared,
+  AS_HELP_STRING([--enable-shared],
+                 [Build a shared library and link tools against it (default is NO)]),,
+                 enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_SHARED,[1]) ;;
+  no)  AC_SUBST(ENABLE_SHARED,[0]) ;;
+  default) AC_SUBST(ENABLE_SHARED,[0]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-shared. Use "yes" or "no"]) ;;
+esac
+
+dnl Allow libstdc++ is embedded in LLVM.dll.
+AC_ARG_ENABLE(embed-stdcxx,
+  AS_HELP_STRING([--enable-embed-stdcxx],
+                 [Build a shared library with embedded libstdc++ for Win32 DLL (default is YES)]),,
+                 enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_EMBED_STDCXX,[1]) ;;
+  no)  AC_SUBST(ENABLE_EMBED_STDCXX,[0]) ;;
+  default) AC_SUBST(ENABLE_EMBED_STDCXX,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-embed-stdcxx. Use "yes" or "no"]) ;;
+esac
+
+dnl Enable embedding timestamp information into build.
+AC_ARG_ENABLE(timestamps,
+  AS_HELP_STRING([--enable-timestamps],
+                 [Enable embedding timestamp information in build (default is YES)]),,
+                 enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_TIMESTAMPS,[1]) ;;
+  no)  AC_SUBST(ENABLE_TIMESTAMPS,[0]) ;;
+  default) AC_SUBST(ENABLE_TIMESTAMPS,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-timestamps. Use "yes" or "no"]) ;;
+esac
+AC_DEFINE_UNQUOTED([ENABLE_TIMESTAMPS],$ENABLE_TIMESTAMPS,
+                   [Define if timestamp information (e.g., __DATE___) is allowed])
+
+dnl Allow specific targets to be specified for building (or not)
+TARGETS_TO_BUILD=""
+AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
+    [Build specific host targets: all or target1,target2,... Valid targets are:
+     host, x86, x86_64, sparc, powerpc, alpha, arm, mips, spu,
+     xcore, msp430, systemz, blackfin, ptx, cbe, and cpp (default=all)]),,
+    enableval=all)
+if test "$enableval" = host-only ; then
+  enableval=host
+fi
+case "$enableval" in
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha ARM Mips CellSPU XCore MSP430 SystemZ Blackfin CBackend CppBackend MBlaze PTX" ;;
+  *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
+      case "$a_target" in
+        x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+        x86_64)   TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+        sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
+        powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
+        alpha)    TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;;
+        arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+        mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
+        spu)      TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
+        xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
+        msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+        systemz)  TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
+        blackfin) TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
+        cbe)      TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;;
+        cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
+        mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
+        ptx)      TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
+        host) case "$llvm_cv_target_arch" in
+            x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+            x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+            Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
+            PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
+            Alpha)       TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;;
+            ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+            Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
+            MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
+            CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
+            XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
+            MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+            s390x)       TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
+            Blackfin)    TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
+            PTX)         TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
+            *)       AC_MSG_ERROR([Can not set target to build]) ;;
+          esac ;;
+        *) AC_MSG_ERROR([Unrecognized target $a_target]) ;;
+      esac
+  done
+  ;;
+esac
+AC_SUBST(TARGETS_TO_BUILD,$TARGETS_TO_BUILD)
+
+# Determine whether we are building LLVM support for the native architecture.
+# If so, define LLVM_NATIVE_ARCH to that LLVM target.
+for a_target in $TARGETS_TO_BUILD; do
+  if test "$a_target" = "$LLVM_NATIVE_ARCH"; then
+    AC_DEFINE_UNQUOTED(LLVM_NATIVE_ARCH, $LLVM_NATIVE_ARCH,
+      [LLVM architecture name for the native architecture, if available])
+    LLVM_NATIVE_TARGET="LLVMInitialize${LLVM_NATIVE_ARCH}Target"
+    LLVM_NATIVE_TARGETINFO="LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo"
+    LLVM_NATIVE_ASMPRINTER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter"
+    AC_DEFINE_UNQUOTED(LLVM_NATIVE_TARGET, $LLVM_NATIVE_TARGET,
+      [LLVM name for the native Target init function, if available])
+    AC_DEFINE_UNQUOTED(LLVM_NATIVE_TARGETINFO, $LLVM_NATIVE_TARGETINFO,
+      [LLVM name for the native TargetInfo init function, if available])
+    AC_DEFINE_UNQUOTED(LLVM_NATIVE_ASMPRINTER, $LLVM_NATIVE_ASMPRINTER,
+      [LLVM name for the native AsmPrinter init function, if available])
+  fi
+done
+
+# Build the LLVM_TARGET and LLVM_... macros for Targets.def and the individual
+# target feature def files.
+LLVM_ENUM_TARGETS=""
+LLVM_ENUM_ASM_PRINTERS=""
+LLVM_ENUM_ASM_PARSERS=""
+LLVM_ENUM_DISASSEMBLERS=""
+for target_to_build in $TARGETS_TO_BUILD; do
+  LLVM_ENUM_TARGETS="LLVM_TARGET($target_to_build) $LLVM_ENUM_TARGETS"
+  if test -f ${srcdir}/lib/Target/${target_to_build}/*AsmPrinter.cpp ; then
+    LLVM_ENUM_ASM_PRINTERS="LLVM_ASM_PRINTER($target_to_build) $LLVM_ENUM_ASM_PRINTERS";
+  fi
+  if test -f ${srcdir}/lib/Target/${target_to_build}/AsmParser/Makefile ; then
+    LLVM_ENUM_ASM_PARSERS="LLVM_ASM_PARSER($target_to_build) $LLVM_ENUM_ASM_PARSERS";
+  fi
+  if test -f ${srcdir}/lib/Target/${target_to_build}/Disassembler/Makefile ; then
+    LLVM_ENUM_DISASSEMBLERS="LLVM_DISASSEMBLER($target_to_build) $LLVM_ENUM_DISASSEMBLERS";
+  fi
+done
+AC_SUBST(LLVM_ENUM_TARGETS)
+AC_SUBST(LLVM_ENUM_ASM_PRINTERS)
+AC_SUBST(LLVM_ENUM_ASM_PARSERS)
+AC_SUBST(LLVM_ENUM_DISASSEMBLERS)
+
+dnl Prevent the CBackend from using printf("%a") for floating point so older
+dnl C compilers that cannot deal with the 0x0p+0 hex floating point format
+dnl can still compile the CBE's output
+AC_ARG_ENABLE([cbe-printf-a],AS_HELP_STRING([--enable-cbe-printf-a],
+  [Enable C Backend output with hex floating point via %a  (default is YES)]),,
+  enableval=default)
+case "$enableval" in
+  yes) AC_SUBST(ENABLE_CBE_PRINTF_A,[1]) ;;
+  no)  AC_SUBST(ENABLE_CBE_PRINTF_A,[0]) ;;
+  default)  AC_SUBST(ENABLE_CBE_PRINTF_A,[1]) ;;
+  *) AC_MSG_ERROR([Invalid setting for --enable-cbe-printf-a. Use "yes" or "no"]) ;;
+esac
+AC_DEFINE_UNQUOTED([ENABLE_CBE_PRINTF_A],$ENABLE_CBE_PRINTF_A,
+                   [Define if CBE is enabled for printf %a output])
+
+dnl Allow a specific llvm-gcc/llvm-g++ pair to be used with this LLVM config.
+AC_ARG_WITH(llvmgccdir,
+  AS_HELP_STRING([--with-llvmgccdir],
+    [Specify location of llvm-gcc install dir (default searches PATH)]),,
+    withval=default)
+case "$withval" in
+  default) WITH_LLVMGCCDIR=default ;;
+  /* | [[A-Za-z]]:[[\\/]]*)      WITH_LLVMGCCDIR=$withval ;;
+  *) AC_MSG_ERROR([Invalid path for --with-llvmgccdir. Provide full path]) ;;
+esac
+
+dnl Allow a specific llvm-gcc compiler to be used with this LLVM config.
+AC_ARG_WITH(llvmgcc,
+  AS_HELP_STRING([--with-llvmgcc],
+    [Specify location of llvm-gcc driver (default searches PATH)]),
+    LLVMGCC=$with_llvmgcc
+      WITH_LLVMGCCDIR="",)
+
+dnl Allow a specific llvm-g++ compiler to be used with this LLVM config.
+AC_ARG_WITH(llvmgxx,
+  AS_HELP_STRING([--with-llvmgxx],
+    [Specify location of llvm-g++ driver (default searches PATH)]),
+    LLVMGXX=$with_llvmgxx
+    WITH_LLVMGCCDIR="",)
+
+if test -n "$LLVMGCC"; then
+   LLVMGCCCOMMAND="$LLVMGCC"
+fi
+
+if test -n "$LLVMGXX"; then
+   LLVMGXXCOMMAND="$LLVMGXX"
+fi
+
+if test -n "$LLVMGCC" && test -z "$LLVMGXX"; then
+   AC_MSG_ERROR([Invalid llvm-g++. Use --with-llvmgxx when --with-llvmgcc is used]);
+fi
+
+if test -n "$LLVMGXX" && test -z "$LLVMGCC"; then
+   AC_MSG_ERROR([Invalid llvm-gcc. Use --with-llvmgcc when --with-llvmgxx is used]);
+fi
+
+dnl Allow a specific Clang compiler to be used with this LLVM config.
+AC_ARG_WITH(clang,
+  AS_HELP_STRING([--with-clang],
+    [Specify location of clang compiler (default is --with-built-clang)]),
+    [],[with_clang=default])
+
+dnl Enable use of the built Clang.
+AC_ARG_WITH(built-clang,
+  AS_HELP_STRING([--with-built-clang],
+    [Use the compiled Clang as the LLVM compiler (default=check)]),
+    [],[with_built_clang=check])
+
+dnl Select the Clang compiler option.
+dnl
+dnl If --with-clang is given, always honor that; otherwise honor
+dnl --with-built-clang, or check if we have the clang sources.
+AC_MSG_CHECKING([clang compiler])
+WITH_CLANGPATH=""
+WITH_BUILT_CLANG=0
+if test "$with_clang" != "default"; then
+   WITH_CLANGPATH="$with_clang"
+   if ! test -x "$WITH_CLANGPATH"; then
+     AC_MSG_ERROR([invalid --with-clang, path does not specify an executable])
+   fi
+elif test "$with_built_clang" = "yes"; then
+   WITH_BUILT_CLANG=1
+elif test "$with_built_clang" = "no"; then
+   WITH_BUILT_CLANG=0
+else
+   if test "$with_built_clang" != "check"; then
+      AC_MSG_ERROR([invalid value for --with-built-clang.])
+   fi
+
+   if test -f ${srcdir}/tools/clang/README.txt; then
+     WITH_BUILT_CLANG=1
+   fi
+fi
+
+if ! test -z "$WITH_CLANGPATH"; then
+   AC_MSG_RESULT([$WITH_CLANGPATH])
+   WITH_CLANGXXPATH=`"$WITH_CLANGPATH" --print-prog-name=clang++`
+elif test "$WITH_BUILT_CLANG" = "1"; then
+   AC_MSG_RESULT([built])
+else
+   AC_MSG_RESULT([none])
+fi
+AC_SUBST(CLANGPATH,$WITH_CLANGPATH)
+AC_SUBST(CLANGXXPATH,$WITH_CLANGXXPATH)
+AC_SUBST(ENABLE_BUILT_CLANG,$WITH_BUILT_CLANG)
+
+dnl Override the option to use for optimized builds.
+AC_ARG_WITH(optimize-option,
+  AS_HELP_STRING([--with-optimize-option],
+                 [Select the compiler options to use for optimized builds]),,
+                 withval=default)
+AC_MSG_CHECKING([optimization flags])
+case "$withval" in
+  default)
+    case "$llvm_cv_os_type" in
+    FreeBSD) optimize_option=-O2 ;;
+    MingW) optimize_option=-O2 ;;
+    *)     optimize_option=-O3 ;;
+    esac ;;
+  *) optimize_option="$withval" ;;
+esac
+AC_SUBST(OPTIMIZE_OPTION,$optimize_option)
+AC_MSG_RESULT([$optimize_option])
+
+dnl Specify extra build options
+AC_ARG_WITH(extra-options,
+  AS_HELP_STRING([--with-extra-options],
+                 [Specify additional options to compile LLVM with]),,
+                 withval=default)
+case "$withval" in
+  default) EXTRA_OPTIONS= ;;
+  *) EXTRA_OPTIONS=$withval ;;
+esac
+AC_SUBST(EXTRA_OPTIONS,$EXTRA_OPTIONS)
+
+dnl Allow specific bindings to be specified for building (or not)
+AC_ARG_ENABLE([bindings],AS_HELP_STRING([--enable-bindings],
+    [Build specific language bindings: all,auto,none,{binding-name} (default=auto)]),,
+    enableval=default)
+BINDINGS_TO_BUILD=""
+case "$enableval" in
+  yes | default | auto) BINDINGS_TO_BUILD="auto" ;;
+  all ) BINDINGS_TO_BUILD="ocaml" ;;
+  none | no) BINDINGS_TO_BUILD="" ;;
+  *)for a_binding in `echo $enableval|sed -e 's/,/ /g' ` ; do
+      case "$a_binding" in
+        ocaml) BINDINGS_TO_BUILD="ocaml $BINDINGS_TO_BUILD" ;;
+        *) AC_MSG_ERROR([Unrecognized binding $a_binding]) ;;
+      esac
+  done
+  ;;
+esac
+
+dnl Allow the ocaml libdir to be overridden. This could go in a configure
+dnl script for bindings/ocaml/configure, except that its auto value depends on
+dnl OCAMLC, which is found here to support tests.
+AC_ARG_WITH([ocaml-libdir],
+  [AS_HELP_STRING([--with-ocaml-libdir],
+    [Specify install location for ocaml bindings (default is stdlib)])],
+  [],
+  [withval=auto])
+case "$withval" in
+  auto) with_ocaml_libdir="$withval" ;;
+  /* | [[A-Za-z]]:[[\\/]]*) with_ocaml_libdir="$withval" ;;
+  *) AC_MSG_ERROR([Invalid path for --with-ocaml-libdir. Provide full path]) ;;
+esac
+
+AC_ARG_WITH(clang-resource-dir,
+  AS_HELP_STRING([--with-clang-resource-dir],
+    [Relative directory from the Clang binary for resource files]),,
+    withval="")
+AC_DEFINE_UNQUOTED(CLANG_RESOURCE_DIR,"$withval",
+                   [Relative directory for resource files])
+
+AC_ARG_WITH(c-include-dirs,
+  AS_HELP_STRING([--with-c-include-dirs],
+    [Colon separated list of directories clang will search for headers]),,
+    withval="")
+AC_DEFINE_UNQUOTED(C_INCLUDE_DIRS,"$withval",
+                   [Directories clang will search for headers])
+
+AC_ARG_WITH(cxx-include-root,
+  AS_HELP_STRING([--with-cxx-include-root],
+    [Directory with the libstdc++ headers.]),,
+    withval="")
+AC_DEFINE_UNQUOTED(CXX_INCLUDE_ROOT,"$withval",
+                   [Directory with the libstdc++ headers.])
+
+AC_ARG_WITH(cxx-include-arch,
+  AS_HELP_STRING([--with-cxx-include-arch],
+    [Architecture of the libstdc++ headers.]),,
+    withval="")
+AC_DEFINE_UNQUOTED(CXX_INCLUDE_ARCH,"$withval",
+                   [Arch the libstdc++ headers.])
+
+AC_ARG_WITH(cxx-include-32bit-dir,
+  AS_HELP_STRING([--with-cxx-include-32bit-dir],
+    [32 bit multilib dir.]),,
+    withval="")
+AC_DEFINE_UNQUOTED(CXX_INCLUDE_32BIT_DIR,"$withval",
+                   [32 bit multilib directory.])
+
+AC_ARG_WITH(cxx-include-64bit-dir,
+  AS_HELP_STRING([--with-cxx-include-64bit-dir],
+    [64 bit multilib directory.]),,
+    withval="")
+AC_DEFINE_UNQUOTED(CXX_INCLUDE_64BIT_DIR,"$withval",
+                   [64 bit multilib directory.])
+
+dnl Allow linking of LLVM with GPLv3 binutils code.
+AC_ARG_WITH(binutils-include,
+  AS_HELP_STRING([--with-binutils-include],
+    [Specify path to binutils/include/ containing plugin-api.h file for gold plugin.]),,
+  withval=default)
+case "$withval" in
+  default) WITH_BINUTILS_INCDIR=default ;;
+  /* | [[A-Za-z]]:[[\\/]]*)      WITH_BINUTILS_INCDIR=$withval ;;
+  *) AC_MSG_ERROR([Invalid path for --with-binutils-include. Provide full path]) ;;
+esac
+if test "x$WITH_BINUTILS_INCDIR" != xdefault ; then
+  AC_SUBST(BINUTILS_INCDIR,$WITH_BINUTILS_INCDIR)
+  if test ! -f "$WITH_BINUTILS_INCDIR/plugin-api.h"; then
+     echo "$WITH_BINUTILS_INCDIR/plugin-api.h"
+     AC_MSG_ERROR([Invalid path to directory containing plugin-api.h.]);
+  fi
+fi
+
+dnl --enable-libffi : check whether the user wants to turn off libffi:
+AC_ARG_ENABLE(libffi,AS_HELP_STRING(
+  --enable-libffi,[Check for the presence of libffi (default is NO)]),
+  [case "$enableval" in
+    yes) llvm_cv_enable_libffi="yes" ;;
+    no)  llvm_cv_enable_libffi="no"  ;;
+    *) AC_MSG_ERROR([Invalid setting for --enable-libffi. Use "yes" or "no"]) ;;
+  esac],
+  llvm_cv_enable_libffi=no)
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 4: Check for programs we need and that they are the right version
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+dnl Check for compilation tools
+AC_PROG_CPP
+AC_PROG_CC(gcc)
+AC_PROG_CXX(g++)
+
+AC_PROG_NM
+AC_SUBST(NM)
+
+dnl Check for the tools that the makefiles require
+AC_CHECK_GNU_MAKE
+AC_PROG_LN_S
+AC_PATH_PROG(CMP, [cmp], [cmp])
+AC_PATH_PROG(CP, [cp], [cp])
+AC_PATH_PROG(DATE, [date], [date])
+AC_PATH_PROG(FIND, [find], [find])
+AC_PATH_PROG(GREP, [grep], [grep])
+AC_PATH_PROG(MKDIR,[mkdir],[mkdir])
+AC_PATH_PROG(MV,   [mv],   [mv])
+AC_PROG_RANLIB
+AC_CHECK_TOOL(AR, ar, false)
+AC_PATH_PROG(RM,   [rm],   [rm])
+AC_PATH_PROG(SED,  [sed],  [sed])
+AC_PATH_PROG(TAR,  [tar],  [gtar])
+AC_PATH_PROG(BINPWD,[pwd],  [pwd])
+
+dnl Looking for misc. graph plotting software
+AC_PATH_PROG(GRAPHVIZ, [Graphviz], [echo Graphviz])
+if test "$GRAPHVIZ" != "echo Graphviz" ; then
+  AC_DEFINE([HAVE_GRAPHVIZ],[1],[Define if the Graphviz program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    GRAPHVIZ=`echo $GRAPHVIZ | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_GRAPHVIZ],"$GRAPHVIZ${EXEEXT}",
+   [Define to path to Graphviz program if found or 'echo Graphviz' otherwise])
+fi
+AC_PATH_PROG(DOT, [dot], [echo dot])
+if test "$DOT" != "echo dot" ; then
+  AC_DEFINE([HAVE_DOT],[1],[Define if the dot program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    DOT=`echo $DOT | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_DOT],"$DOT${EXEEXT}",
+   [Define to path to dot program if found or 'echo dot' otherwise])
+fi
+AC_PATH_PROG(FDP, [fdp], [echo fdp])
+if test "$FDP" != "echo fdp" ; then
+  AC_DEFINE([HAVE_FDP],[1],[Define if the neat program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    FDP=`echo $FDP | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_FDP],"$FDP${EXEEXT}",
+   [Define to path to fdp program if found or 'echo fdp' otherwise])
+fi
+AC_PATH_PROG(NEATO, [neato], [echo neato])
+if test "$NEATO" != "echo neato" ; then
+  AC_DEFINE([HAVE_NEATO],[1],[Define if the neat program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    NEATO=`echo $NEATO | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_NEATO],"$NEATO${EXEEXT}",
+   [Define to path to neato program if found or 'echo neato' otherwise])
+fi
+AC_PATH_PROG(TWOPI, [twopi], [echo twopi])
+if test "$TWOPI" != "echo twopi" ; then
+  AC_DEFINE([HAVE_TWOPI],[1],[Define if the neat program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    TWOPI=`echo $TWOPI | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_TWOPI],"$TWOPI${EXEEXT}",
+   [Define to path to twopi program if found or 'echo twopi' otherwise])
+fi
+AC_PATH_PROG(CIRCO, [circo], [echo circo])
+if test "$CIRCO" != "echo circo" ; then
+  AC_DEFINE([HAVE_CIRCO],[1],[Define if the neat program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    CIRCO=`echo $CIRCO | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_CIRCO],"$CIRCO${EXEEXT}",
+   [Define to path to circo program if found or 'echo circo' otherwise])
+fi
+AC_PATH_PROGS(GV, [gv gsview32], [echo gv])
+if test "$GV" != "echo gv" ; then
+  AC_DEFINE([HAVE_GV],[1],[Define if the gv program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    GV=`echo $GV | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_GV],"$GV${EXEEXT}",
+   [Define to path to gv program if found or 'echo gv' otherwise])
+fi
+AC_PATH_PROG(DOTTY, [dotty], [echo dotty])
+if test "$DOTTY" != "echo dotty" ; then
+  AC_DEFINE([HAVE_DOTTY],[1],[Define if the dotty program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    DOTTY=`echo $DOTTY | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_DOTTY],"$DOTTY${EXEEXT}",
+   [Define to path to dotty program if found or 'echo dotty' otherwise])
+fi
+AC_PATH_PROG(XDOT_PY, [xdot.py], [echo xdot.py])
+if test "$XDOT_PY" != "echo xdot.py" ; then
+  AC_DEFINE([HAVE_XDOT_PY],[1],[Define if the xdot.py program is available])
+  dnl If we're targeting for mingw we should emit windows paths, not msys
+  if test "$llvm_cv_os_type" = "MingW" ; then
+    XDOT_PY=`echo $XDOT_PY | sed 's/^\/\([[A-Za-z]]\)\//\1:\//' `
+  fi
+  AC_DEFINE_UNQUOTED([LLVM_PATH_XDOT_PY],"$XDOT_PY${EXEEXT}",
+   [Define to path to xdot.py program if found or 'echo xdot.py' otherwise])
+fi
+
+dnl Look for a sufficiently recent version of Perl.
+LLVM_PROG_PERL([5.006])
+AC_SUBST(PERL)
+if test x"$PERL" = xnone; then
+   AC_SUBST(HAVE_PERL,0)
+   AC_MSG_ERROR([perl is required but was not found, please install it])
+else
+   AC_SUBST(HAVE_PERL,1)
+fi
+
+dnl Find the install program
+AC_PROG_INSTALL
+dnl Prepend src dir to install path dir if it's a relative path
+dnl This is a hack for installs that take place in something other
+dnl than the top level.
+case "$INSTALL" in
+ [[\\/$]]* | ?:[[\\/]]* ) ;;
+ *)  INSTALL="\\\$(TOPSRCDIR)/$INSTALL" ;;
+esac
+
+dnl Checks for documentation and testing tools that we can do without. If these
+dnl are not found then they are set to "true" which always succeeds but does
+dnl nothing. This just lets the build output show that we could have done
+dnl something if the tool was available.
+AC_PATH_PROG(BZIP2, [bzip2])
+AC_PATH_PROG(CAT, [cat])
+AC_PATH_PROG(DOXYGEN, [doxygen])
+AC_PATH_PROG(GROFF, [groff])
+AC_PATH_PROG(GZIPBIN, [gzip])
+AC_PATH_PROG(POD2HTML, [pod2html])
+AC_PATH_PROG(POD2MAN, [pod2man])
+AC_PATH_PROG(PDFROFF, [pdfroff])
+AC_PATH_PROG(RUNTEST, [runtest])
+DJ_AC_PATH_TCLSH
+AC_PATH_PROG(ZIP, [zip])
+AC_PATH_PROGS(OCAMLC, [ocamlc])
+AC_PATH_PROGS(OCAMLOPT, [ocamlopt])
+AC_PATH_PROGS(OCAMLDEP, [ocamldep])
+AC_PATH_PROGS(OCAMLDOC, [ocamldoc])
+AC_PATH_PROGS(GAS, [gas as])
+
+dnl Get the version of the linker in use.
+AC_LINK_GET_VERSION
+
+dnl Determine whether the linker supports the -R option.
+AC_LINK_USE_R
+
+dnl Determine whether the linker supports the -export-dynamic option.
+AC_LINK_EXPORT_DYNAMIC
+
+dnl Determine whether the linker supports the --version-script option.
+AC_LINK_VERSION_SCRIPT
+
+dnl Check for libtool and the library that has dlopen function (which must come
+dnl before the AC_PROG_LIBTOOL check in order to enable dlopening libraries with
+dnl libtool).
+AC_LIBTOOL_DLOPEN
+AC_LIB_LTDL
+
+if test "$WITH_LLVMGCCDIR" = "default" ; then
+  LLVMGCC="llvm-gcc${EXEEXT}"
+  LLVMGXX="llvm-g++${EXEEXT}"
+  LLVMGCCCOMMAND="$LLVMGCC"
+  LLVMGXXCOMMAND="$LLVMGXX"
+  AC_SUBST(LLVMGCCCOMMAND,$LLVMGCCCOMMAND)
+  AC_SUBST(LLVMGXXCOMMAND,$LLVMGXXCOMMAND)
+  AC_PATH_PROG(LLVMGCC, $LLVMGCC, [])
+  AC_PATH_PROG(LLVMGXX, $LLVMGXX, [])
+else
+  if test -z "$LLVMGCC"; then
+    LLVMGCC="$WITH_LLVMGCCDIR/bin/llvm-gcc${EXEEXT}"
+    LLVMGCCCOMMAND="$LLVMGCC"
+  fi
+  if test -z "$LLVMGXX"; then
+    LLVMGXX="$WITH_LLVMGCCDIR/bin/llvm-g++${EXEEXT}"
+    LLVMGXXCOMMAND="$LLVMGXX"
+  fi
+
+  AC_SUBST(LLVMGCC,$LLVMGCC)
+  AC_SUBST(LLVMGXX,$LLVMGXX)
+  AC_SUBST(LLVMGCCCOMMAND,$LLVMGCCCOMMAND)
+  AC_SUBST(LLVMGXXCOMMAND,$LLVMGXXCOMMAND)
+fi
+
+dnl Select the LLVM capable compiler to use, we default to using llvm-gcc if
+dnl found, otherwise clang if available.
+AC_ARG_WITH(llvmcc,
+  AS_HELP_STRING([--with-llvmcc=<name>],
+    [Choose the LLVM capable compiler to use (llvm-gcc, clang, or none; default=check)]),
+    [],[with_llvmcc=check])
+AC_MSG_CHECKING([LLVM capable compiler])
+if test "$with_llvmcc" != "check"; then
+   if (test "$with_llvmcc" != "llvm-gcc" &&
+       test "$with_llvmcc" != "clang" &&
+       test "$with_llvmcc" != "none"); then
+      AC_MSG_ERROR([invalid value for --with-llvmcc, expected 'llvm-gcc', 'clang', or 'none'.])
+   fi
+   WITH_LLVMCC="$with_llvmcc"
+elif test -n "$LLVMGCC"; then
+   WITH_LLVMCC=llvm-gcc
+elif test -n "$WITH_CLANGPATH" || test "$WITH_BUILT_CLANG" -ne "0"; then
+   WITH_LLVMCC=clang
+else
+   WITH_LLVMCC=none
+fi
+AC_MSG_RESULT([$WITH_LLVMCC])
+AC_SUBST(LLVMCC_OPTION,$WITH_LLVMCC)
+
+AC_MSG_CHECKING([tool compatibility])
+
+dnl Ensure that compilation tools are GCC or a GNU compatible compiler such as
+dnl ICC; we use GCC specific options in the makefiles so the compiler needs
+dnl to support those options.
+dnl "icc" emits gcc signatures
+dnl "icc -no-gcc" emits no gcc signature BUT is still compatible
+ICC=no
+IXX=no
+case $CC in
+  icc*|icpc*)
+    ICC=yes
+    IXX=yes
+    ;;
+   *)
+    ;;
+esac
+
+if test "$GCC" != "yes" && test "$ICC" != "yes"
+then
+  AC_MSG_ERROR([gcc|icc required but not found])
+fi
+
+dnl Ensure that compilation tools are compatible with GCC extensions
+if test "$GXX" != "yes" && test "$IXX" != "yes"
+then
+  AC_MSG_ERROR([g++|clang++|icc required but not found])
+fi
+
+dnl Verify that GCC is version 3.0 or higher
+if test "$GCC" = "yes"
+then
+  AC_COMPILE_IFELSE([[#if !defined(__GNUC__) || __GNUC__ < 3
+#error Unsupported GCC version
+#endif
+]], [], [AC_MSG_ERROR([gcc 3.x required, but you have a lower version])])
+fi
+
+dnl Check for GNU Make.  We use its extensions, so don't build without it
+if test -z "$llvm_cv_gnu_make_command"
+then
+  AC_MSG_ERROR([GNU Make required but not found])
+fi
+
+dnl Tool compatibility is okay if we make it here.
+AC_MSG_RESULT([ok])
+
+dnl Check optional compiler flags.
+AC_MSG_CHECKING([optional compiler flags])
+CXX_FLAG_CHECK(NO_VARIADIC_MACROS, [-Wno-variadic-macros])
+CXX_FLAG_CHECK(NO_MISSING_FIELD_INITIALIZERS, [-Wno-missing-field-initializers])
+AC_MSG_RESULT([$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS])
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 5: Check for libraries
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+AC_CHECK_LIB(m,sin)
+if test "$llvm_cv_os_type" = "MingW" ; then
+  AC_CHECK_LIB(imagehlp, main)
+  AC_CHECK_LIB(psapi, main)
+fi
+
+dnl dlopen() is required for plugin support.
+AC_SEARCH_LIBS(dlopen,dl,AC_DEFINE([HAVE_DLOPEN],[1],
+               [Define if dlopen() is available on this platform.]),
+               AC_MSG_WARN([dlopen() not found - disabling plugin support]))
+
+dnl libffi is optional; used to call external functions from the interpreter
+if test "$llvm_cv_enable_libffi" = "yes" ; then
+  AC_SEARCH_LIBS(ffi_call,ffi,AC_DEFINE([HAVE_FFI_CALL],[1],
+                 [Define if libffi is available on this platform.]),
+                 AC_MSG_ERROR([libffi not found - configure without --enable-libffi to compile without it]))
+fi
+
+dnl mallinfo is optional; the code can compile (minus features) without it
+AC_SEARCH_LIBS(mallinfo,malloc,AC_DEFINE([HAVE_MALLINFO],[1],
+               [Define if mallinfo() is available on this platform.]))
+
+dnl pthread locking functions are optional - but llvm will not be thread-safe
+dnl without locks.
+if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
+  AC_CHECK_LIB(pthread, pthread_mutex_init)
+  AC_SEARCH_LIBS(pthread_mutex_lock,pthread,
+                 AC_DEFINE([HAVE_PTHREAD_MUTEX_LOCK],[1],
+                           [Have pthread_mutex_lock]))
+  AC_SEARCH_LIBS(pthread_rwlock_init,pthread,
+                 AC_DEFINE([HAVE_PTHREAD_RWLOCK_INIT],[1],
+                 [Have pthread_rwlock_init]))
+  AC_SEARCH_LIBS(pthread_getspecific,pthread,
+                 AC_DEFINE([HAVE_PTHREAD_GETSPECIFIC],[1],
+                 [Have pthread_getspecific]))
+fi
+
+dnl Allow extra x86-disassembler library
+AC_ARG_WITH(udis86,
+  AS_HELP_STRING([--with-udis86=<path>],
+    [Use udis86 external x86 disassembler library]),
+    [
+      AC_SUBST(USE_UDIS86, [1])
+      case "$withval" in
+        /usr/lib|yes) ;;
+        *) LDFLAGS="$LDFLAGS -L${withval}" ;;
+      esac
+      AC_CHECK_LIB(udis86, ud_init, [], [
+        echo "Error! You need to have libudis86 around."
+        exit -1
+      ])
+    ],
+    AC_SUBST(USE_UDIS86, [0]))
+AC_DEFINE_UNQUOTED([USE_UDIS86],$USE_UDIS86,
+                   [Define if use udis86 library])
+
+dnl Allow OProfile support for JIT output.
+AC_ARG_WITH(oprofile,
+  AS_HELP_STRING([--with-oprofile=<prefix>],
+    [Tell OProfile >= 0.9.4 how to symbolize JIT output]),
+    [
+      AC_SUBST(USE_OPROFILE, [1])
+      case "$withval" in
+        /usr|yes) llvm_cv_oppath=/usr/lib/oprofile ;;
+        no) llvm_cv_oppath=
+            AC_SUBST(USE_OPROFILE, [0]) ;;
+        *) llvm_cv_oppath="${withval}/lib/oprofile"
+           CPPFLAGS="-I${withval}/include";;
+      esac
+      if test -n "$llvm_cv_oppath" ; then
+        LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}"
+        dnl Work around http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=537744:
+        dnl libbfd is not included properly in libopagent in some Debian
+        dnl versions.  If libbfd isn't found at all, we assume opagent works
+        dnl anyway.
+        AC_SEARCH_LIBS(bfd_init, bfd, [], [])
+        AC_SEARCH_LIBS(op_open_agent, opagent, [], [
+          echo "Error! You need to have libopagent around."
+          exit -1
+        ])
+        AC_CHECK_HEADER([opagent.h], [], [
+          echo "Error! You need to have opagent.h around."
+          exit -1
+          ])
+      fi
+    ],
+    [
+      AC_SUBST(USE_OPROFILE, [0])
+    ])
+AC_DEFINE_UNQUOTED([USE_OPROFILE],$USE_OPROFILE,
+                   [Define if we have the oprofile JIT-support library])
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 6: Check for header files
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+dnl First, use autoconf provided macros for specific headers that we need
+dnl We don't check for ancient stuff or things that are guaranteed to be there
+dnl by the C++ standard. We always use the <cfoo> versions of <foo.h> C headers.
+dnl Generally we're looking for POSIX headers.
+AC_HEADER_DIRENT
+AC_HEADER_MMAP_ANONYMOUS
+AC_HEADER_STAT
+AC_HEADER_STDC
+AC_HEADER_SYS_WAIT
+AC_HEADER_TIME
+
+AC_CHECK_HEADERS([dlfcn.h execinfo.h fcntl.h inttypes.h limits.h link.h])
+AC_CHECK_HEADERS([malloc.h setjmp.h signal.h stdint.h termios.h unistd.h])
+AC_CHECK_HEADERS([utime.h windows.h])
+AC_CHECK_HEADERS([sys/mman.h sys/param.h sys/resource.h sys/time.h sys/uio.h])
+AC_CHECK_HEADERS([sys/types.h sys/ioctl.h malloc/malloc.h mach/mach.h])
+AC_CHECK_HEADERS([valgrind/valgrind.h])
+AC_CHECK_HEADERS([fenv.h])
+if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
+  AC_CHECK_HEADERS(pthread.h,
+                   AC_SUBST(HAVE_PTHREAD, 1),
+                   AC_SUBST(HAVE_PTHREAD, 0))
+else
+  AC_SUBST(HAVE_PTHREAD, 0)
+fi
+
+dnl Try to find ffi.h.
+if test "$llvm_cv_enable_libffi" = "yes" ; then
+  AC_CHECK_HEADERS([ffi.h ffi/ffi.h])
+fi
+
+dnl Try to find Darwin specific crash reporting libraries.
+AC_CHECK_HEADERS([CrashReporterClient.h])
+
+dnl Try to find Darwin specific crash reporting global.
+AC_MSG_CHECKING([__crashreporter_info__])
+AC_LINK_IFELSE(
+  AC_LANG_SOURCE(
+    [[extern const char *__crashreporter_info__;
+      int main() {
+        __crashreporter_info__ = "test";
+        return 0;
+      }
+    ]]),
+  AC_MSG_RESULT(yes)
+  AC_DEFINE(HAVE_CRASHREPORTER_INFO, 1, Can use __crashreporter_info__),
+  AC_MSG_RESULT(no)
+  AC_DEFINE(HAVE_CRASHREPORTER_INFO, 0,
+            Define if __crashreporter_info__ exists.))
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 7: Check for types and structures
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+AC_HUGE_VAL_CHECK
+AC_TYPE_PID_T
+AC_TYPE_SIZE_T
+AC_DEFINE_UNQUOTED([RETSIGTYPE],[void],[Define as the return type of signal handlers (`int' or `void').])
+AC_STRUCT_TM
+AC_CHECK_TYPES([int64_t],,AC_MSG_ERROR([Type int64_t required but not found]))
+AC_CHECK_TYPES([uint64_t],,
+         AC_CHECK_TYPES([u_int64_t],,
+         AC_MSG_ERROR([Type uint64_t or u_int64_t required but not found])))
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 8: Check for specific functions needed
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+AC_CHECK_FUNCS([backtrace ceilf floorf roundf rintf nearbyintf getcwd ])
+AC_CHECK_FUNCS([powf fmodf strtof round ])
+AC_CHECK_FUNCS([getpagesize getrusage getrlimit setrlimit gettimeofday ])
+AC_CHECK_FUNCS([isatty mkdtemp mkstemp ])
+AC_CHECK_FUNCS([mktemp posix_spawn realpath sbrk setrlimit strdup ])
+AC_CHECK_FUNCS([strerror strerror_r setenv ])
+AC_CHECK_FUNCS([strtoll strtoq sysconf malloc_zone_statistics ])
+AC_CHECK_FUNCS([setjmp longjmp sigsetjmp siglongjmp writev])
+AC_C_PRINTF_A
+AC_FUNC_RAND48
+
+dnl Check the declaration "Secure API" on Windows environments.
+AC_CHECK_DECLS([strerror_s])
+
+dnl Check symbols in libgcc.a for JIT on Mingw.
+if test "$llvm_cv_os_type" = "MingW" ; then
+  AC_CHECK_LIB(gcc,_alloca,AC_DEFINE([HAVE__ALLOCA],[1],[Have host's _alloca]))
+  AC_CHECK_LIB(gcc,__alloca,AC_DEFINE([HAVE___ALLOCA],[1],[Have host's __alloca]))
+  AC_CHECK_LIB(gcc,__chkstk,AC_DEFINE([HAVE___CHKSTK],[1],[Have host's __chkstk]))
+  AC_CHECK_LIB(gcc,___chkstk,AC_DEFINE([HAVE____CHKSTK],[1],[Have host's ___chkstk]))
+
+  AC_CHECK_LIB(gcc,__ashldi3,AC_DEFINE([HAVE___ASHLDI3],[1],[Have host's __ashldi3]))
+  AC_CHECK_LIB(gcc,__ashrdi3,AC_DEFINE([HAVE___ASHRDI3],[1],[Have host's __ashrdi3]))
+  AC_CHECK_LIB(gcc,__divdi3,AC_DEFINE([HAVE___DIVDI3],[1],[Have host's __divdi3]))
+  AC_CHECK_LIB(gcc,__fixdfdi,AC_DEFINE([HAVE___FIXDFDI],[1],[Have host's __fixdfdi]))
+  AC_CHECK_LIB(gcc,__fixsfdi,AC_DEFINE([HAVE___FIXSFDI],[1],[Have host's __fixsfdi]))
+  AC_CHECK_LIB(gcc,__floatdidf,AC_DEFINE([HAVE___FLOATDIDF],[1],[Have host's __floatdidf]))
+  AC_CHECK_LIB(gcc,__lshrdi3,AC_DEFINE([HAVE___LSHRDI3],[1],[Have host's __lshrdi3]))
+  AC_CHECK_LIB(gcc,__moddi3,AC_DEFINE([HAVE___MODDI3],[1],[Have host's __moddi3]))
+  AC_CHECK_LIB(gcc,__udivdi3,AC_DEFINE([HAVE___UDIVDI3],[1],[Have host's __udivdi3]))
+  AC_CHECK_LIB(gcc,__umoddi3,AC_DEFINE([HAVE___UMODDI3],[1],[Have host's __umoddi3]))
+
+  AC_CHECK_LIB(gcc,__main,AC_DEFINE([HAVE___MAIN],[1],[Have host's __main]))
+  AC_CHECK_LIB(gcc,__cmpdi2,AC_DEFINE([HAVE___CMPDI2],[1],[Have host's __cmpdi2]))
+fi
+
+dnl Check for variations in the Standard C++ library and STL. These macros are
+dnl provided by LLVM in the autoconf/m4 directory.
+AC_FUNC_ISNAN
+AC_FUNC_ISINF
+
+dnl Check for mmap support.We also need to know if /dev/zero is required to
+dnl be opened for allocating RWX memory.
+dnl Make sure we aren't attempting to configure for an unknown system
+if test "$llvm_cv_platform_type" = "Unix" ; then
+  AC_FUNC_MMAP
+  AC_FUNC_MMAP_FILE
+  AC_NEED_DEV_ZERO_FOR_MMAP
+
+  if test "$ac_cv_func_mmap_fixed_mapped" = "no"
+  then
+    AC_MSG_WARN([mmap() of a fixed address required but not supported])
+  fi
+  if test "$ac_cv_func_mmap_file" = "no"
+  then
+    AC_MSG_WARN([mmap() of files required but not found])
+  fi
+fi
+
+dnl atomic builtins are required for threading support.
+AC_MSG_CHECKING(for GCC atomic builtins)
+dnl Since we'll be using these atomic builtins in C++ files we should test
+dnl the C++ compiler.
+AC_LANG_PUSH([C++])
+AC_LINK_IFELSE(
+  AC_LANG_SOURCE(
+    [[int main() {
+        volatile unsigned long val = 1;
+        __sync_synchronize();
+        __sync_val_compare_and_swap(&val, 1, 0);
+        __sync_add_and_fetch(&val, 1);
+        __sync_sub_and_fetch(&val, 1);
+        return 0;
+      }
+    ]]),
+  AC_LANG_POP([C++])
+  AC_MSG_RESULT(yes)
+  AC_DEFINE(LLVM_MULTITHREADED, 1, Build multithreading support into LLVM),
+  AC_MSG_RESULT(no)
+  AC_DEFINE(LLVM_MULTITHREADED, 0, Build multithreading support into LLVM)
+  AC_MSG_WARN([LLVM will be built thread-unsafe because atomic builtins are missing]))
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 9: Additional checks, variables, etc.
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+dnl Handle 32-bit linux systems running a 64-bit kernel.
+dnl This has to come after section 4 because it invokes the compiler.
+if test "$llvm_cv_os_type" = "Linux" -a "$llvm_cv_target_arch" = "x86_64" ; then
+  AC_IS_LINUX_MIXED
+  if test "$llvm_cv_linux_mixed" = "yes"; then
+    llvm_cv_target_arch="x86"
+    ARCH="x86"
+  fi
+fi
+
+dnl Check, whether __dso_handle is present
+AC_CHECK_FUNCS([__dso_handle])
+
+dnl Check wether llvm-gcc is based on dragonegg
+AC_CACHE_CHECK([whether llvm-gcc is dragonegg],[llvm_cv_llvmgcc_dragonegg],
+[llvm_cv_llvmgcc_dragonegg="no"
+if test -n "$LLVMGCC" ; then
+  cp /dev/null conftest.c
+  $LLVMGCC -fplugin-arg-dragonegg-emit-ir -S -o - conftest.c > /dev/null 2>&1
+  if test $? -eq 0 ; then
+    llvm_cv_llvmgcc_dragonegg="yes"
+  fi
+  rm conftest.c
+fi])
+
+dnl Set the flags needed to emit LLVM IR and to disable optimizations
+dnl in llvmgcc
+if test "$llvm_cv_llvmgcc_dragonegg" = "yes" ; then
+  LLVMCC_EMITIR_FLAG="-fplugin-arg-dragonegg-emit-ir"
+  LLVMCC_DISABLEOPT_FLAGS="-fplugin-arg-dragonegg-disable-llvm-optzns"
+else
+  LLVMCC_EMITIR_FLAG="-emit-llvm"
+  LLVMCC_DISABLEOPT_FLAGS="-mllvm -disable-llvm-optzns"
+fi
+
+AC_SUBST(LLVMCC_EMITIR_FLAG)
+
+dnl See if the llvm-gcc executable can compile to LLVM assembly
+AC_CACHE_CHECK([whether llvm-gcc is sane],[llvm_cv_llvmgcc_sanity],
+[llvm_cv_llvmgcc_sanity="no"
+if test -n "$LLVMGCC" ; then
+  cp /dev/null conftest.c
+  $LLVMGCC "$LLVMCC_EMITIR_FLAG" -S -o - conftest.c | \
+      grep 'target datalayout =' > /dev/null 2>&1
+  if test $? -eq 0 ; then
+    llvm_cv_llvmgcc_sanity="yes"
+  fi
+  rm conftest.c
+fi])
+
+dnl Since we have a sane llvm-gcc, identify it and its sub-tools
+dnl Furthermore, add some information about the tools
+if test "$llvm_cv_llvmgcc_sanity" = "yes" ; then
+  AC_MSG_CHECKING([llvm-gcc component support])
+  llvmcc1path=`$LLVMGCC --print-prog-name=cc1`
+  AC_SUBST(LLVMCC1,$llvmcc1path)
+  llvmcc1pluspath=`$LLVMGCC --print-prog-name=cc1plus`
+  AC_SUBST(LLVMCC1PLUS,$llvmcc1pluspath)
+  llvmgccdir=`echo "$llvmcc1path" | sed 's,/libexec/.*,,'`
+  AC_SUBST(LLVMGCCDIR,$llvmgccdir)
+  llvmgcclangs=[`$LLVMGCC -v --help 2>&1 | grep '^Configured with:' | sed 's/^.*--enable-languages=\([^ ]*\).*/\1/'`]
+  AC_SUBST(LLVMGCC_LANGS,$llvmgcclangs)
+  AC_SUBST(LLVMGCC_DRAGONEGG,$llvm_cv_llvmgcc_dragonegg)
+  AC_SUBST(LLVMCC_DISABLEOPT_FLAGS)
+  AC_MSG_RESULT([ok])
+fi
+
+dnl Propagate the shared library extension that the libltdl checks did to
+dnl the Makefiles so we can use it there too
+AC_SUBST(SHLIBEXT,$libltdl_cv_shlibext)
+
+dnl Propagate the run-time library path variable that the libltdl
+dnl checks found to the Makefiles so we can use it there too
+AC_SUBST(SHLIBPATH_VAR,$libltdl_cv_shlibpath_var)
+
+# Translate the various configuration directories and other basic
+# information into substitutions that will end up in Makefile.config.in
+# that these configured values can be used by the makefiles
+if test "${prefix}" = "NONE" ; then
+  prefix="/usr/local"
+fi
+eval LLVM_PREFIX="${prefix}";
+eval LLVM_BINDIR="${prefix}/bin";
+eval LLVM_LIBDIR="${prefix}/lib";
+eval LLVM_DATADIR="${prefix}/share/llvm";
+eval LLVM_DOCSDIR="${prefix}/share/doc/llvm";
+eval LLVM_ETCDIR="${prefix}/etc/llvm";
+eval LLVM_INCLUDEDIR="${prefix}/include";
+eval LLVM_INFODIR="${prefix}/info";
+eval LLVM_MANDIR="${prefix}/man";
+LLVM_CONFIGTIME=`date`
+AC_SUBST(LLVM_PREFIX)
+AC_SUBST(LLVM_BINDIR)
+AC_SUBST(LLVM_LIBDIR)
+AC_SUBST(LLVM_DATADIR)
+AC_SUBST(LLVM_DOCSDIR)
+AC_SUBST(LLVM_ETCDIR)
+AC_SUBST(LLVM_INCLUDEDIR)
+AC_SUBST(LLVM_INFODIR)
+AC_SUBST(LLVM_MANDIR)
+AC_SUBST(LLVM_CONFIGTIME)
+
+# Place the various directores into the config.h file as #defines so that we
+# can know about the installation paths within LLVM.
+AC_DEFINE_UNQUOTED(LLVM_PREFIX,"$LLVM_PREFIX",
+                   [Installation prefix directory])
+AC_DEFINE_UNQUOTED(LLVM_BINDIR, "$LLVM_BINDIR",
+                   [Installation directory for binary executables])
+AC_DEFINE_UNQUOTED(LLVM_LIBDIR, "$LLVM_LIBDIR",
+                   [Installation directory for libraries])
+AC_DEFINE_UNQUOTED(LLVM_DATADIR, "$LLVM_DATADIR",
+                   [Installation directory for data files])
+AC_DEFINE_UNQUOTED(LLVM_DOCSDIR, "$LLVM_DOCSDIR",
+                   [Installation directory for documentation])
+AC_DEFINE_UNQUOTED(LLVM_ETCDIR, "$LLVM_ETCDIR",
+                   [Installation directory for config files])
+AC_DEFINE_UNQUOTED(LLVM_INCLUDEDIR, "$LLVM_INCLUDEDIR",
+                   [Installation directory for include files])
+AC_DEFINE_UNQUOTED(LLVM_INFODIR, "$LLVM_INFODIR",
+                   [Installation directory for .info files])
+AC_DEFINE_UNQUOTED(LLVM_MANDIR, "$LLVM_MANDIR",
+                   [Installation directory for man pages])
+AC_DEFINE_UNQUOTED(LLVM_CONFIGTIME, "$LLVM_CONFIGTIME",
+                   [Time at which LLVM was configured])
+AC_DEFINE_UNQUOTED(LLVM_HOSTTRIPLE, "$host",
+                   [Host triple we were built on])
+
+# Determine which bindings to build.
+if test "$BINDINGS_TO_BUILD" = auto ; then
+  BINDINGS_TO_BUILD=""
+  if test "x$OCAMLC" != x -a "x$OCAMLDEP" != x ; then
+    BINDINGS_TO_BUILD="ocaml $BINDINGS_TO_BUILD"
+  fi
+fi
+AC_SUBST(BINDINGS_TO_BUILD,$BINDINGS_TO_BUILD)
+
+# This isn't really configurey, but it avoids having to repeat the list in
+# other files.
+AC_SUBST(ALL_BINDINGS,ocaml)
+
+# Do any work necessary to ensure that bindings have what they need.
+binding_prereqs_failed=0
+for a_binding in $BINDINGS_TO_BUILD ; do
+  case "$a_binding" in
+  ocaml)
+    if test "x$OCAMLC" = x ; then
+      AC_MSG_WARN([--enable-bindings=ocaml specified, but ocamlc not found. Try configure OCAMLC=/path/to/ocamlc])
+      binding_prereqs_failed=1
+    fi
+    if test "x$OCAMLDEP" = x ; then
+      AC_MSG_WARN([--enable-bindings=ocaml specified, but ocamldep not found. Try configure OCAMLDEP=/path/to/ocamldep])
+      binding_prereqs_failed=1
+    fi
+    if test "x$OCAMLOPT" = x ; then
+      AC_MSG_WARN([--enable-bindings=ocaml specified, but ocamlopt not found. Try configure OCAMLOPT=/path/to/ocamlopt])
+      dnl ocamlopt is optional!
+    fi
+    if test "x$with_ocaml_libdir" != xauto ; then
+      AC_SUBST(OCAML_LIBDIR,$with_ocaml_libdir)
+    else
+      ocaml_stdlib="`"$OCAMLC" -where`"
+      if test "$LLVM_PREFIX" '<' "$ocaml_stdlib" -a "$ocaml_stdlib" '<' "$LLVM_PREFIX~"
+      then
+        # ocaml stdlib is beneath our prefix; use stdlib
+        AC_SUBST(OCAML_LIBDIR,$ocaml_stdlib)
+      else
+        # ocaml stdlib is outside our prefix; use libdir/ocaml
+        AC_SUBST(OCAML_LIBDIR,$LLVM_LIBDIR/ocaml)
+      fi
+    fi
+    ;;
+  esac
+done
+if test "$binding_prereqs_failed" = 1 ; then
+  AC_MSG_ERROR([Prequisites for bindings not satisfied. Fix them or use configure --disable-bindings.])
+fi
+
+dnl Determine whether the compiler supports -fvisibility-inlines-hidden.
+AC_CXX_USE_VISIBILITY_INLINES_HIDDEN
+
+dnl Determine linker rpath flag
+if test "$llvm_cv_link_use_r" = "yes" ; then
+  RPATH="-Wl,-R"
+else
+  RPATH="-Wl,-rpath"
+fi
+AC_SUBST(RPATH)
+
+dnl Determine linker rdynamic flag
+if test "$llvm_cv_link_use_export_dynamic" = "yes" ; then
+  RDYNAMIC="-Wl,-export-dynamic"
+else
+  RDYNAMIC=""
+fi
+AC_SUBST(RDYNAMIC)
+
+dnl===-----------------------------------------------------------------------===
+dnl===
+dnl=== SECTION 10: Specify the output files and generate it
+dnl===
+dnl===-----------------------------------------------------------------------===
+
+dnl Configure header files
+dnl WARNING: dnl If you add or remove any of the following config headers, then
+dnl you MUST also update Makefile.rules so that the variable FilesToConfig
+dnl contains the same list of files as AC_CONFIG_HEADERS below. This ensures the
+dnl files can be updated automatically when their *.in sources change.
+AC_CONFIG_HEADERS([include/llvm/Config/config.h include/llvm/Config/llvm-config.h])
+AH_TOP([#ifndef CONFIG_H
+#define CONFIG_H])
+AH_BOTTOM([#endif])
+
+AC_CONFIG_FILES([include/llvm/Config/Targets.def])
+AC_CONFIG_FILES([include/llvm/Config/AsmPrinters.def])
+AC_CONFIG_FILES([include/llvm/Config/AsmParsers.def])
+AC_CONFIG_FILES([include/llvm/Config/Disassemblers.def])
+AC_CONFIG_HEADERS([include/llvm/Support/DataTypes.h])
+
+dnl Configure the makefile's configuration data
+AC_CONFIG_FILES([Makefile.config])
+
+dnl Configure the RPM spec file for LLVM
+AC_CONFIG_FILES([llvm.spec])
+
+dnl Configure llvmc's Base plugin
+AC_CONFIG_FILES([tools/llvmc/src/Base.td])
+
+dnl Do the first stage of configuration for llvm-config.in.
+AC_CONFIG_FILES([tools/llvm-config/llvm-config.in])
+
+dnl Do special configuration of Makefiles
+AC_CONFIG_COMMANDS([setup],,[llvm_src="${srcdir}"])
+AC_CONFIG_MAKEFILE(Makefile)
+AC_CONFIG_MAKEFILE(Makefile.common)
+AC_CONFIG_MAKEFILE(examples/Makefile)
+AC_CONFIG_MAKEFILE(lib/Makefile)
+AC_CONFIG_MAKEFILE(runtime/Makefile)
+AC_CONFIG_MAKEFILE(test/Makefile)
+AC_CONFIG_MAKEFILE(test/Makefile.tests)
+AC_CONFIG_MAKEFILE(unittests/Makefile)
+AC_CONFIG_MAKEFILE(tools/Makefile)
+AC_CONFIG_MAKEFILE(utils/Makefile)
+AC_CONFIG_MAKEFILE(projects/Makefile)
+AC_CONFIG_MAKEFILE(bindings/Makefile)
+AC_CONFIG_MAKEFILE(bindings/ocaml/Makefile.ocaml)
+
+dnl Finally, crank out the output
+AC_OUTPUT
diff --git a/final/autoconf/depcomp b/final/autoconf/depcomp
new file mode 100755
index 00000000000..11e2d3bfe1c
--- /dev/null
+++ b/final/autoconf/depcomp
@@ -0,0 +1,522 @@
+#! /bin/sh
+# depcomp - compile a program generating dependencies as side-effects
+
+scriptversion=2004-05-31.23
+
+# Copyright (C) 1999, 2000, 2003, 2004 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
+
+case $1 in
+  '')
+     echo "$0: No command.  Try \`$0 --help' for more information." 1>&2
+     exit 1;
+     ;;
+  -h | --h*)
+    cat <<\EOF
+Usage: depcomp [--help] [--version] PROGRAM [ARGS]
+
+Run PROGRAMS ARGS to compile a file, generating dependencies
+as side-effects.
+
+Environment variables:
+  depmode     Dependency tracking mode.
+  source      Source file read by `PROGRAMS ARGS'.
+  object      Object file output by `PROGRAMS ARGS'.
+  DEPDIR      directory where to store dependencies.
+  depfile     Dependency file to output.
+  tmpdepfile  Temporary file to use when outputing dependencies.
+  libtool     Whether libtool is used (yes/no).
+
+Report bugs to <bug-automake@gnu.org>.
+EOF
+    exit 0
+    ;;
+  -v | --v*)
+    echo "depcomp $scriptversion"
+    exit 0
+    ;;
+esac
+
+if test -z "$depmode" || test -z "$source" || test -z "$object"; then
+  echo "depcomp: Variables source, object and depmode must be set" 1>&2
+  exit 1
+fi
+
+# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
+depfile=${depfile-`echo "$object" |
+  sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
+tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
+
+rm -f "$tmpdepfile"
+
+# Some modes work just like other modes, but use different flags.  We
+# parameterize here, but still list the modes in the big case below,
+# to make depend.m4 easier to write.  Note that we *cannot* use a case
+# here, because this file can only contain one case statement.
+if test "$depmode" = hp; then
+  # HP compiler uses -M and no extra arg.
+  gccflag=-M
+  depmode=gcc
+fi
+
+if test "$depmode" = dashXmstdout; then
+   # This is just like dashmstdout with a different argument.
+   dashmflag=-xM
+   depmode=dashmstdout
+fi
+
+case "$depmode" in
+gcc3)
+## gcc 3 implements dependency tracking that does exactly what
+## we want.  Yay!  Note: for some reason libtool 1.4 doesn't like
+## it if -MD -MP comes after the -MF stuff.  Hmm.
+  "$@" -MT "$object" -MD -MP -MF "$tmpdepfile"
+  stat=$?
+  if test $stat -eq 0; then :
+  else
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  mv "$tmpdepfile" "$depfile"
+  ;;
+
+gcc)
+## There are various ways to get dependency output from gcc.  Here's
+## why we pick this rather obscure method:
+## - Don't want to use -MD because we'd like the dependencies to end
+##   up in a subdir.  Having to rename by hand is ugly.
+##   (We might end up doing this anyway to support other compilers.)
+## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
+##   -MM, not -M (despite what the docs say).
+## - Using -M directly means running the compiler twice (even worse
+##   than renaming).
+  if test -z "$gccflag"; then
+    gccflag=-MD,
+  fi
+  "$@" -Wp,"$gccflag$tmpdepfile"
+  stat=$?
+  if test $stat -eq 0; then :
+  else
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
+## The second -e expression handles DOS-style file names with drive letters.
+  sed -e 's/^[^:]*: / /' \
+      -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
+## This next piece of magic avoids the `deleted header file' problem.
+## The problem is that when a header file which appears in a .P file
+## is deleted, the dependency causes make to die (because there is
+## typically no way to rebuild the header).  We avoid this by adding
+## dummy dependencies for each header file.  Too bad gcc doesn't do
+## this for us directly.
+  tr ' ' '
+' < "$tmpdepfile" |
+## Some versions of gcc put a space before the `:'.  On the theory
+## that the space means something, we add a space to the output as
+## well.
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly.  Breaking it into two sed invocations is a workaround.
+    sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+hp)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+sgi)
+  if test "$libtool" = yes; then
+    "$@" "-Wp,-MDupdate,$tmpdepfile"
+  else
+    "$@" -MDupdate "$tmpdepfile"
+  fi
+  stat=$?
+  if test $stat -eq 0; then :
+  else
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+
+  if test -f "$tmpdepfile"; then  # yes, the sourcefile depend on other files
+    echo "$object : \\" > "$depfile"
+
+    # Clip off the initial element (the dependent).  Don't try to be
+    # clever and replace this with sed code, as IRIX sed won't handle
+    # lines with more than a fixed number of characters (4096 in
+    # IRIX 6.2 sed, 8192 in IRIX 6.5).  We also remove comment lines;
+    # the IRIX cc adds comments like `#:fec' to the end of the
+    # dependency line.
+    tr ' ' '
+' < "$tmpdepfile" \
+    | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
+    tr '
+' ' ' >> $depfile
+    echo >> $depfile
+
+    # The second pass generates a dummy entry for each header file.
+    tr ' ' '
+' < "$tmpdepfile" \
+   | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
+   >> $depfile
+  else
+    # The sourcefile does not contain any dependencies, so just
+    # store a dummy comment line, to avoid errors with the Makefile
+    # "include basename.Plo" scheme.
+    echo "#dummy" > "$depfile"
+  fi
+  rm -f "$tmpdepfile"
+  ;;
+
+aix)
+  # The C for AIX Compiler uses -M and outputs the dependencies
+  # in a .u file.  In older versions, this file always lives in the
+  # current directory.  Also, the AIX compiler puts `$object:' at the
+  # start of each line; $object doesn't have directory information.
+  # Version 6 uses the directory in both cases.
+  stripped=`echo "$object" | sed 's/\(.*\)\..*$/\1/'`
+  tmpdepfile="$stripped.u"
+  if test "$libtool" = yes; then
+    "$@" -Wc,-M
+  else
+    "$@" -M
+  fi
+  stat=$?
+
+  if test -f "$tmpdepfile"; then :
+  else
+    stripped=`echo "$stripped" | sed 's,^.*/,,'`
+    tmpdepfile="$stripped.u"
+  fi
+
+  if test $stat -eq 0; then :
+  else
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+
+  if test -f "$tmpdepfile"; then
+    outname="$stripped.o"
+    # Each line is of the form `foo.o: dependent.h'.
+    # Do two passes, one to just change these to
+    # `$object: dependent.h' and one to simply `dependent.h:'.
+    sed -e "s,^$outname:,$object :," < "$tmpdepfile" > "$depfile"
+    sed -e "s,^$outname: \(.*\)$,\1:," < "$tmpdepfile" >> "$depfile"
+  else
+    # The sourcefile does not contain any dependencies, so just
+    # store a dummy comment line, to avoid errors with the Makefile
+    # "include basename.Plo" scheme.
+    echo "#dummy" > "$depfile"
+  fi
+  rm -f "$tmpdepfile"
+  ;;
+
+icc)
+  # Intel's C compiler understands `-MD -MF file'.  However on
+  #    icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c
+  # ICC 7.0 will fill foo.d with something like
+  #    foo.o: sub/foo.c
+  #    foo.o: sub/foo.h
+  # which is wrong.  We want:
+  #    sub/foo.o: sub/foo.c
+  #    sub/foo.o: sub/foo.h
+  #    sub/foo.c:
+  #    sub/foo.h:
+  # ICC 7.1 will output
+  #    foo.o: sub/foo.c sub/foo.h
+  # and will wrap long lines using \ :
+  #    foo.o: sub/foo.c ... \
+  #     sub/foo.h ... \
+  #     ...
+
+  "$@" -MD -MF "$tmpdepfile"
+  stat=$?
+  if test $stat -eq 0; then :
+  else
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  # Each line is of the form `foo.o: dependent.h',
+  # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
+  # Do two passes, one to just change these to
+  # `$object: dependent.h' and one to simply `dependent.h:'.
+  sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
+  # Some versions of the HPUX 10.20 sed can't process this invocation
+  # correctly.  Breaking it into two sed invocations is a workaround.
+  sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" |
+    sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+tru64)
+   # The Tru64 compiler uses -MD to generate dependencies as a side
+   # effect.  `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'.
+   # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
+   # dependencies in `foo.d' instead, so we check for that too.
+   # Subdirectories are respected.
+   dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
+   test "x$dir" = "x$object" && dir=
+   base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
+
+   if test "$libtool" = yes; then
+      # Dependencies are output in .lo.d with libtool 1.4.
+      # With libtool 1.5 they are output both in $dir.libs/$base.o.d
+      # and in $dir.libs/$base.o.d and $dir$base.o.d.  We process the
+      # latter, because the former will be cleaned when $dir.libs is
+      # erased.
+      tmpdepfile1="$dir.libs/$base.lo.d"
+      tmpdepfile2="$dir$base.o.d"
+      tmpdepfile3="$dir.libs/$base.d"
+      "$@" -Wc,-MD
+   else
+      tmpdepfile1="$dir$base.o.d"
+      tmpdepfile2="$dir$base.d"
+      tmpdepfile3="$dir$base.d"
+      "$@" -MD
+   fi
+
+   stat=$?
+   if test $stat -eq 0; then :
+   else
+      rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+      exit $stat
+   fi
+
+   if test -f "$tmpdepfile1"; then
+      tmpdepfile="$tmpdepfile1"
+   elif test -f "$tmpdepfile2"; then
+      tmpdepfile="$tmpdepfile2"
+   else
+      tmpdepfile="$tmpdepfile3"
+   fi
+   if test -f "$tmpdepfile"; then
+      sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
+      # That's a tab and a space in the [].
+      sed -e 's,^.*\.[a-z]*:[	 ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
+   else
+      echo "#dummy" > "$depfile"
+   fi
+   rm -f "$tmpdepfile"
+   ;;
+
+#nosideeffect)
+  # This comment above is used by automake to tell side-effect
+  # dependency tracking mechanisms from slower ones.
+
+dashmstdout)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout, regardless of -o.
+  "$@" || exit $?
+
+  # Remove the call to Libtool.
+  if test "$libtool" = yes; then
+    while test $1 != '--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+
+  # Remove `-o $object'.
+  IFS=" "
+  for arg
+  do
+    case $arg in
+    -o)
+      shift
+      ;;
+    $object)
+      shift
+      ;;
+    *)
+      set fnord "$@" "$arg"
+      shift # fnord
+      shift # $arg
+      ;;
+    esac
+  done
+
+  test -z "$dashmflag" && dashmflag=-M
+  # Require at least two characters before searching for `:'
+  # in the target name.  This is to cope with DOS-style filenames:
+  # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise.
+  "$@" $dashmflag |
+    sed 's:^[  ]*[^: ][^:][^:]*\:[    ]*:'"$object"'\: :' > "$tmpdepfile"
+  rm -f "$depfile"
+  cat < "$tmpdepfile" > "$depfile"
+  tr ' ' '
+' < "$tmpdepfile" | \
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly.  Breaking it into two sed invocations is a workaround.
+    sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+dashXmstdout)
+  # This case only exists to satisfy depend.m4.  It is never actually
+  # run, as this mode is specially recognized in the preamble.
+  exit 1
+  ;;
+
+makedepend)
+  "$@" || exit $?
+  # Remove any Libtool call
+  if test "$libtool" = yes; then
+    while test $1 != '--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+  # X makedepend
+  shift
+  cleared=no
+  for arg in "$@"; do
+    case $cleared in
+    no)
+      set ""; shift
+      cleared=yes ;;
+    esac
+    case "$arg" in
+    -D*|-I*)
+      set fnord "$@" "$arg"; shift ;;
+    # Strip any option that makedepend may not understand.  Remove
+    # the object too, otherwise makedepend will parse it as a source file.
+    -*|$object)
+      ;;
+    *)
+      set fnord "$@" "$arg"; shift ;;
+    esac
+  done
+  obj_suffix="`echo $object | sed 's/^.*\././'`"
+  touch "$tmpdepfile"
+  ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
+  rm -f "$depfile"
+  cat < "$tmpdepfile" > "$depfile"
+  sed '1,2d' "$tmpdepfile" | tr ' ' '
+' | \
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly.  Breaking it into two sed invocations is a workaround.
+    sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile" "$tmpdepfile".bak
+  ;;
+
+cpp)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout.
+  "$@" || exit $?
+
+  # Remove the call to Libtool.
+  if test "$libtool" = yes; then
+    while test $1 != '--mode=compile'; do
+      shift
+    done
+    shift
+  fi
+
+  # Remove `-o $object'.
+  IFS=" "
+  for arg
+  do
+    case $arg in
+    -o)
+      shift
+      ;;
+    $object)
+      shift
+      ;;
+    *)
+      set fnord "$@" "$arg"
+      shift # fnord
+      shift # $arg
+      ;;
+    esac
+  done
+
+  "$@" -E |
+    sed -n '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
+    sed '$ s: \\$::' > "$tmpdepfile"
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  cat < "$tmpdepfile" >> "$depfile"
+  sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+msvisualcpp)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the preprocessed file to stdout, regardless of -o,
+  # because we must use -o when running libtool.
+  "$@" || exit $?
+  IFS=" "
+  for arg
+  do
+    case "$arg" in
+    "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
+	set fnord "$@"
+	shift
+	shift
+	;;
+    *)
+	set fnord "$@" "$arg"
+	shift
+	shift
+	;;
+    esac
+  done
+  "$@" -E |
+  sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile"
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::	\1 \\:p' >> "$depfile"
+  echo "	" >> "$depfile"
+  . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+none)
+  exec "$@"
+  ;;
+
+*)
+  echo "Unknown depmode $depmode" 1>&2
+  exit 1
+  ;;
+esac
+
+exit 0
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
+# End:
diff --git a/final/autoconf/install-sh b/final/autoconf/install-sh
new file mode 100755
index 00000000000..dd97db7aa1c
--- /dev/null
+++ b/final/autoconf/install-sh
@@ -0,0 +1,322 @@
+#!/bin/sh
+# install - install a program, script, or datafile
+
+scriptversion=2004-09-10.20
+
+# This originates from X11R5 (mit/util/scripts/install.sh), which was
+# later released in X11R6 (xc/config/util/install.sh) with the
+# following copyright and license.
+#
+# Copyright (C) 1994 X Consortium
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
+# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# Except as contained in this notice, the name of the X Consortium shall not
+# be used in advertising or otherwise to promote the sale, use or other deal-
+# ings in this Software without prior written authorization from the X Consor-
+# tium.
+#
+#
+# FSF changes to this file are in the public domain.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# `make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch.  It can only install one file at a time, a restriction
+# shared with many OS's install programs.
+
+# set DOITPROG to echo to test this script
+
+# Don't use :- since 4.3BSD and earlier shells don't like it.
+doit="${DOITPROG-}"
+
+# put in absolute paths if you don't have them in your path; or use env. vars.
+
+mvprog="${MVPROG-mv}"
+cpprog="${CPPROG-cp}"
+chmodprog="${CHMODPROG-chmod}"
+chownprog="${CHOWNPROG-chown}"
+chgrpprog="${CHGRPPROG-chgrp}"
+stripprog="${STRIPPROG-strip}"
+rmprog="${RMPROG-rm}"
+mkdirprog="${MKDIRPROG-mkdir}"
+
+chmodcmd="$chmodprog 0755"
+chowncmd=
+chgrpcmd=
+stripcmd=
+rmcmd="$rmprog -f"
+mvcmd="$mvprog"
+src=
+dst=
+dir_arg=
+dstarg=
+no_target_directory=
+
+usage="Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
+   or: $0 [OPTION]... SRCFILES... DIRECTORY
+   or: $0 [OPTION]... -t DIRECTORY SRCFILES...
+   or: $0 [OPTION]... -d DIRECTORIES...
+
+In the 1st form, copy SRCFILE to DSTFILE.
+In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
+In the 4th, create DIRECTORIES.
+
+Options:
+-c         (ignored)
+-d         create directories instead of installing files.
+-g GROUP   $chgrpprog installed files to GROUP.
+-m MODE    $chmodprog installed files to MODE.
+-o USER    $chownprog installed files to USER.
+-s         $stripprog installed files.
+-t DIRECTORY  install into DIRECTORY.
+-T         report an error if DSTFILE is a directory.
+--help     display this help and exit.
+--version  display version info and exit.
+
+Environment variables override the default commands:
+  CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG
+"
+
+while test -n "$1"; do
+  case $1 in
+    -c) shift
+        continue;;
+
+    -d) dir_arg=true
+        shift
+        continue;;
+
+    -g) chgrpcmd="$chgrpprog $2"
+        shift
+        shift
+        continue;;
+
+    --help) echo "$usage"; exit 0;;
+
+    -m) chmodcmd="$chmodprog $2"
+        shift
+        shift
+        continue;;
+
+    -o) chowncmd="$chownprog $2"
+        shift
+        shift
+        continue;;
+
+    -s) stripcmd=$stripprog
+        shift
+        continue;;
+
+    -t) dstarg=$2
+	shift
+	shift
+	continue;;
+
+    -T) no_target_directory=true
+	shift
+	continue;;
+
+    --version) echo "$0 $scriptversion"; exit 0;;
+
+    *)  # When -d is used, all remaining arguments are directories to create.
+	# When -t is used, the destination is already specified.
+	test -n "$dir_arg$dstarg" && break
+        # Otherwise, the last argument is the destination.  Remove it from $@.
+	for arg
+	do
+          if test -n "$dstarg"; then
+	    # $@ is not empty: it contains at least $arg.
+	    set fnord "$@" "$dstarg"
+	    shift # fnord
+	  fi
+	  shift # arg
+	  dstarg=$arg
+	done
+	break;;
+  esac
+done
+
+if test -z "$1"; then
+  if test -z "$dir_arg"; then
+    echo "$0: no input file specified." >&2
+    exit 1
+  fi
+  # It's OK to call `install-sh -d' without argument.
+  # This can happen when creating conditional directories.
+  exit 0
+fi
+
+for src
+do
+  # Protect names starting with `-'.
+  case $src in
+    -*) src=./$src ;;
+  esac
+
+  if test -n "$dir_arg"; then
+    dst=$src
+    src=
+
+    if test -d "$dst"; then
+      mkdircmd=:
+      chmodcmd=
+    else
+      mkdircmd=$mkdirprog
+    fi
+  else
+    # Waiting for this to be detected by the "$cpprog $src $dsttmp" command
+    # might cause directories to be created, which would be especially bad
+    # if $src (and thus $dsttmp) contains '*'.
+    if test ! -f "$src" && test ! -d "$src"; then
+      echo "$0: $src does not exist." >&2
+      exit 1
+    fi
+
+    if test -z "$dstarg"; then
+      echo "$0: no destination specified." >&2
+      exit 1
+    fi
+
+    dst=$dstarg
+    # Protect names starting with `-'.
+    case $dst in
+      -*) dst=./$dst ;;
+    esac
+
+    # If destination is a directory, append the input filename; won't work
+    # if double slashes aren't ignored.
+    if test -d "$dst"; then
+      if test -n "$no_target_directory"; then
+	echo "$0: $dstarg: Is a directory" >&2
+	exit 1
+      fi
+      dst=$dst/`basename "$src"`
+    fi
+  fi
+
+  # This sed command emulates the dirname command.
+  dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+
+  # Make sure that the destination directory exists.
+
+  # Skip lots of stat calls in the usual case.
+  if test ! -d "$dstdir"; then
+    defaultIFS='
+	 '
+    IFS="${IFS-$defaultIFS}"
+
+    oIFS=$IFS
+    # Some sh's can't handle IFS=/ for some reason.
+    IFS='%'
+    set - `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'`
+    IFS=$oIFS
+
+    pathcomp=
+
+    while test $# -ne 0 ; do
+      pathcomp=$pathcomp$1
+      shift
+      if test ! -d "$pathcomp"; then
+        $mkdirprog "$pathcomp"
+	# mkdir can fail with a `File exist' error in case several
+	# install-sh are creating the directory concurrently.  This
+	# is OK.
+	test -d "$pathcomp" || exit
+      fi
+      pathcomp=$pathcomp/
+    done
+  fi
+
+  if test -n "$dir_arg"; then
+    $doit $mkdircmd "$dst" \
+      && { test -z "$chowncmd" || $doit $chowncmd "$dst"; } \
+      && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } \
+      && { test -z "$stripcmd" || $doit $stripcmd "$dst"; } \
+      && { test -z "$chmodcmd" || $doit $chmodcmd "$dst"; }
+
+  else
+    dstfile=`basename "$dst"`
+
+    # Make a couple of temp file names in the proper directory.
+    dsttmp=$dstdir/_inst.$$_
+    rmtmp=$dstdir/_rm.$$_
+
+    # Trap to clean up those temp files at exit.
+    trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
+    trap '(exit $?); exit' 1 2 13 15
+
+    # Copy the file name to the temp name.
+    $doit $cpprog "$src" "$dsttmp" &&
+
+    # and set any options; do chmod last to preserve setuid bits.
+    #
+    # If any of these fail, we abort the whole thing.  If we want to
+    # ignore errors from any of these, just make sure not to ignore
+    # errors from the above "$doit $cpprog $src $dsttmp" command.
+    #
+    { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \
+      && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \
+      && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \
+      && { test -z "$chmodcmd" || $doit $chmodcmd "$dsttmp"; } &&
+
+    # Now rename the file to the real destination.
+    { $doit $mvcmd -f "$dsttmp" "$dstdir/$dstfile" 2>/dev/null \
+      || {
+	   # The rename failed, perhaps because mv can't rename something else
+	   # to itself, or perhaps because mv is so ancient that it does not
+	   # support -f.
+
+	   # Now remove or move aside any old file at destination location.
+	   # We try this two ways since rm can't unlink itself on some
+	   # systems and the destination file might be busy for other
+	   # reasons.  In this case, the final cleanup might fail but the new
+	   # file should still install successfully.
+	   {
+	     if test -f "$dstdir/$dstfile"; then
+	       $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null \
+	       || $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null \
+	       || {
+		 echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2
+		 (exit 1); exit
+	       }
+	     else
+	       :
+	     fi
+	   } &&
+
+	   # Now rename the file to the real destination.
+	   $doit $mvcmd "$dsttmp" "$dstdir/$dstfile"
+	 }
+    }
+  fi || { (exit 1); exit; }
+done
+
+# The final little trick to "correctly" pass the exit status to the exit trap.
+{
+  (exit 0); exit
+}
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
+# End:
diff --git a/final/autoconf/ltmain.sh b/final/autoconf/ltmain.sh
new file mode 100644
index 00000000000..06823e057a5
--- /dev/null
+++ b/final/autoconf/ltmain.sh
@@ -0,0 +1,6863 @@
+# ltmain.sh - Provide generalized library-building support services.
+# NOTE: Changing this file will not affect anything until you rerun configure.
+#
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005
+# Free Software Foundation, Inc.
+# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+basename="s,^.*/,,g"
+
+# Work around backward compatibility issue on IRIX 6.5. On IRIX 6.4+, sh
+# is ksh but when the shell is invoked as "sh" and the current value of
+# the _XPG environment variable is not equal to 1 (one), the special
+# positional parameter $0, within a function call, is the name of the
+# function.
+progpath="$0"
+
+# The name of this program:
+progname=`echo "$progpath" | $SED $basename`
+modename="$progname"
+
+# Global variables:
+EXIT_SUCCESS=0
+EXIT_FAILURE=1
+
+PROGRAM=ltmain.sh
+PACKAGE=libtool
+VERSION=1.5.22
+TIMESTAMP=" (1.1220.2.365 2005/12/18 22:14:06)"
+
+# See if we are running on zsh, and set the options which allow our
+# commands through without removal of \ escapes.
+if test -n "${ZSH_VERSION+set}" ; then
+  setopt NO_GLOB_SUBST
+fi
+
+# Check that we have a working $echo.
+if test "X$1" = X--no-reexec; then
+  # Discard the --no-reexec flag, and continue.
+  shift
+elif test "X$1" = X--fallback-echo; then
+  # Avoid inline document here, it may be left over
+  :
+elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then
+  # Yippee, $echo works!
+  :
+else
+  # Restart under the correct shell, and then maybe $echo will work.
+  exec $SHELL "$progpath" --no-reexec ${1+"$@"}
+fi
+
+if test "X$1" = X--fallback-echo; then
+  # used as fallback echo
+  shift
+  cat <<EOF
+$*
+EOF
+  exit $EXIT_SUCCESS
+fi
+
+default_mode=
+help="Try \`$progname --help' for more information."
+magic="%%%MAGIC variable%%%"
+mkdir="mkdir"
+mv="mv -f"
+rm="rm -f"
+
+# Sed substitution that helps us do robust quoting.  It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed="${SED}"' -e 1s/^X//'
+sed_quote_subst='s/\([\\`\\"$\\\\]\)/\\\1/g'
+# test EBCDIC or ASCII
+case `echo X|tr X '\101'` in
+ A) # ASCII based system
+    # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr
+  SP2NL='tr \040 \012'
+  NL2SP='tr \015\012 \040\040'
+  ;;
+ *) # EBCDIC based system
+  SP2NL='tr \100 \n'
+  NL2SP='tr \r\n \100\100'
+  ;;
+esac
+
+# NLS nuisances.
+# Only set LANG and LC_ALL to C if already set.
+# These must not be set unconditionally because not all systems understand
+# e.g. LANG=C (notably SCO).
+# We save the old values to restore during execute mode.
+if test "${LC_ALL+set}" = set; then
+  save_LC_ALL="$LC_ALL"; LC_ALL=C; export LC_ALL
+fi
+if test "${LANG+set}" = set; then
+  save_LANG="$LANG"; LANG=C; export LANG
+fi
+
+# Make sure IFS has a sensible default
+lt_nl='
+'
+IFS=" 	$lt_nl"
+
+if test "$build_libtool_libs" != yes && test "$build_old_libs" != yes; then
+  $echo "$modename: not configured to build any kind of library" 1>&2
+  $echo "Fatal configuration error.  See the $PACKAGE docs for more information." 1>&2
+  exit $EXIT_FAILURE
+fi
+
+# Global variables.
+mode=$default_mode
+nonopt=
+prev=
+prevopt=
+run=
+show="$echo"
+show_help=
+execute_dlfiles=
+duplicate_deps=no
+preserve_args=
+lo2o="s/\\.lo\$/.${objext}/"
+o2lo="s/\\.${objext}\$/.lo/"
+
+#####################################
+# Shell function definitions:
+# This seems to be the best place for them
+
+# func_mktempdir [string]
+# Make a temporary directory that won't clash with other running
+# libtool processes, and avoids race conditions if possible.  If
+# given, STRING is the basename for that directory.
+func_mktempdir ()
+{
+    my_template="${TMPDIR-/tmp}/${1-$progname}"
+
+    if test "$run" = ":"; then
+      # Return a directory name, but don't create it in dry-run mode
+      my_tmpdir="${my_template}-$$"
+    else
+
+      # If mktemp works, use that first and foremost
+      my_tmpdir=`mktemp -d "${my_template}-XXXXXXXX" 2>/dev/null`
+
+      if test ! -d "$my_tmpdir"; then
+	# Failing that, at least try and use $RANDOM to avoid a race
+	my_tmpdir="${my_template}-${RANDOM-0}$$"
+
+	save_mktempdir_umask=`umask`
+	umask 0077
+	$mkdir "$my_tmpdir"
+	umask $save_mktempdir_umask
+      fi
+
+      # If we're not in dry-run mode, bomb out on failure
+      test -d "$my_tmpdir" || {
+        $echo "cannot create temporary directory \`$my_tmpdir'" 1>&2
+	exit $EXIT_FAILURE
+      }
+    fi
+
+    $echo "X$my_tmpdir" | $Xsed
+}
+
+
+# func_win32_libid arg
+# return the library type of file 'arg'
+#
+# Need a lot of goo to handle *both* DLLs and import libs
+# Has to be a shell function in order to 'eat' the argument
+# that is supplied when $file_magic_command is called.
+func_win32_libid ()
+{
+  win32_libid_type="unknown"
+  win32_fileres=`file -L $1 2>/dev/null`
+  case $win32_fileres in
+  *ar\ archive\ import\ library*) # definitely import
+    win32_libid_type="x86 archive import"
+    ;;
+  *ar\ archive*) # could be an import, or static
+    if eval $OBJDUMP -f $1 | $SED -e '10q' 2>/dev/null | \
+      $EGREP -e 'file format pe-i386(.*architecture: i386)?' >/dev/null ; then
+      win32_nmres=`eval $NM -f posix -A $1 | \
+	$SED -n -e '1,100{/ I /{s,.*,import,;p;q;};}'`
+      case $win32_nmres in
+      import*)  win32_libid_type="x86 archive import";;
+      *)        win32_libid_type="x86 archive static";;
+      esac
+    fi
+    ;;
+  *DLL*)
+    win32_libid_type="x86 DLL"
+    ;;
+  *executable*) # but shell scripts are "executable" too...
+    case $win32_fileres in
+    *MS\ Windows\ PE\ Intel*)
+      win32_libid_type="x86 DLL"
+      ;;
+    esac
+    ;;
+  esac
+  $echo $win32_libid_type
+}
+
+
+# func_infer_tag arg
+# Infer tagged configuration to use if any are available and
+# if one wasn't chosen via the "--tag" command line option.
+# Only attempt this if the compiler in the base compile
+# command doesn't match the default compiler.
+# arg is usually of the form 'gcc ...'
+func_infer_tag ()
+{
+    if test -n "$available_tags" && test -z "$tagname"; then
+      CC_quoted=
+      for arg in $CC; do
+	case $arg in
+	  *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	  arg="\"$arg\""
+	  ;;
+	esac
+	CC_quoted="$CC_quoted $arg"
+      done
+      case $@ in
+      # Blanks in the command may have been stripped by the calling shell,
+      # but not from the CC environment variable when configure was run.
+      " $CC "* | "$CC "* | " `$echo $CC` "* | "`$echo $CC` "* | " $CC_quoted"* | "$CC_quoted "* | " `$echo $CC_quoted` "* | "`$echo $CC_quoted` "*) ;;
+      # Blanks at the start of $base_compile will cause this to fail
+      # if we don't check for them as well.
+      *)
+	for z in $available_tags; do
+	  if grep "^# ### BEGIN LIBTOOL TAG CONFIG: $z$" < "$progpath" > /dev/null; then
+	    # Evaluate the configuration.
+	    eval "`${SED} -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$z'$/,/^# ### END LIBTOOL TAG CONFIG: '$z'$/p' < $progpath`"
+	    CC_quoted=
+	    for arg in $CC; do
+	    # Double-quote args containing other shell metacharacters.
+	    case $arg in
+	      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	      arg="\"$arg\""
+	      ;;
+	    esac
+	    CC_quoted="$CC_quoted $arg"
+	  done
+	    case "$@ " in
+	      " $CC "* | "$CC "* | " `$echo $CC` "* | "`$echo $CC` "* | " $CC_quoted"* | "$CC_quoted "* | " `$echo $CC_quoted` "* | "`$echo $CC_quoted` "*)
+	      # The compiler in the base compile command matches
+	      # the one in the tagged configuration.
+	      # Assume this is the tagged configuration we want.
+	      tagname=$z
+	      break
+	      ;;
+	    esac
+	  fi
+	done
+	# If $tagname still isn't set, then no tagged configuration
+	# was found and let the user know that the "--tag" command
+	# line option must be used.
+	if test -z "$tagname"; then
+	  $echo "$modename: unable to infer tagged configuration"
+	  $echo "$modename: specify a tag with \`--tag'" 1>&2
+	  exit $EXIT_FAILURE
+#        else
+#          $echo "$modename: using $tagname tagged configuration"
+	fi
+	;;
+      esac
+    fi
+}
+
+
+# func_extract_an_archive dir oldlib
+func_extract_an_archive ()
+{
+    f_ex_an_ar_dir="$1"; shift
+    f_ex_an_ar_oldlib="$1"
+
+    $show "(cd $f_ex_an_ar_dir && $AR x $f_ex_an_ar_oldlib)"
+    $run eval "(cd \$f_ex_an_ar_dir && $AR x \$f_ex_an_ar_oldlib)" || exit $?
+    if ($AR t "$f_ex_an_ar_oldlib" | sort | sort -uc >/dev/null 2>&1); then
+     :
+    else
+      $echo "$modename: ERROR: object name conflicts: $f_ex_an_ar_dir/$f_ex_an_ar_oldlib" 1>&2
+      exit $EXIT_FAILURE
+    fi
+}
+
+# func_extract_archives gentop oldlib ...
+func_extract_archives ()
+{
+    my_gentop="$1"; shift
+    my_oldlibs=${1+"$@"}
+    my_oldobjs=""
+    my_xlib=""
+    my_xabs=""
+    my_xdir=""
+    my_status=""
+
+    $show "${rm}r $my_gentop"
+    $run ${rm}r "$my_gentop"
+    $show "$mkdir $my_gentop"
+    $run $mkdir "$my_gentop"
+    my_status=$?
+    if test "$my_status" -ne 0 && test ! -d "$my_gentop"; then
+      exit $my_status
+    fi
+
+    for my_xlib in $my_oldlibs; do
+      # Extract the objects.
+      case $my_xlib in
+	[\\/]* | [A-Za-z]:[\\/]*) my_xabs="$my_xlib" ;;
+	*) my_xabs=`pwd`"/$my_xlib" ;;
+      esac
+      my_xlib=`$echo "X$my_xlib" | $Xsed -e 's%^.*/%%'`
+      my_xdir="$my_gentop/$my_xlib"
+
+      $show "${rm}r $my_xdir"
+      $run ${rm}r "$my_xdir"
+      $show "$mkdir $my_xdir"
+      $run $mkdir "$my_xdir"
+      exit_status=$?
+      if test "$exit_status" -ne 0 && test ! -d "$my_xdir"; then
+	exit $exit_status
+      fi
+      case $host in
+      *-darwin*)
+	$show "Extracting $my_xabs"
+	# Do not bother doing anything if just a dry run
+	if test -z "$run"; then
+	  darwin_orig_dir=`pwd`
+	  cd $my_xdir || exit $?
+	  darwin_archive=$my_xabs
+	  darwin_curdir=`pwd`
+	  darwin_base_archive=`$echo "X$darwin_archive" | $Xsed -e 's%^.*/%%'`
+	  darwin_arches=`lipo -info "$darwin_archive" 2>/dev/null | $EGREP Architectures 2>/dev/null`
+	  if test -n "$darwin_arches"; then 
+	    darwin_arches=`echo "$darwin_arches" | $SED -e 's/.*are://'`
+	    darwin_arch=
+	    $show "$darwin_base_archive has multiple architectures $darwin_arches"
+	    for darwin_arch in  $darwin_arches ; do
+	      mkdir -p "unfat-$$/${darwin_base_archive}-${darwin_arch}"
+	      lipo -thin $darwin_arch -output "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}" "${darwin_archive}"
+	      cd "unfat-$$/${darwin_base_archive}-${darwin_arch}"
+	      func_extract_an_archive "`pwd`" "${darwin_base_archive}"
+	      cd "$darwin_curdir"
+	      $rm "unfat-$$/${darwin_base_archive}-${darwin_arch}/${darwin_base_archive}"
+	    done # $darwin_arches
+      ## Okay now we have a bunch of thin objects, gotta fatten them up :)
+	    darwin_filelist=`find unfat-$$ -type f -name \*.o -print -o -name \*.lo -print| xargs basename | sort -u | $NL2SP`
+	    darwin_file=
+	    darwin_files=
+	    for darwin_file in $darwin_filelist; do
+	      darwin_files=`find unfat-$$ -name $darwin_file -print | $NL2SP`
+	      lipo -create -output "$darwin_file" $darwin_files
+	    done # $darwin_filelist
+	    ${rm}r unfat-$$
+	    cd "$darwin_orig_dir"
+	  else
+	    cd "$darwin_orig_dir"
+ 	    func_extract_an_archive "$my_xdir" "$my_xabs"
+	  fi # $darwin_arches
+	fi # $run
+	;;
+      *)
+        func_extract_an_archive "$my_xdir" "$my_xabs"
+        ;;
+      esac
+      my_oldobjs="$my_oldobjs "`find $my_xdir -name \*.$objext -print -o -name \*.lo -print | $NL2SP`
+    done
+    func_extract_archives_result="$my_oldobjs"
+}
+# End of Shell function definitions
+#####################################
+
+# Darwin sucks
+eval std_shrext=\"$shrext_cmds\"
+
+disable_libs=no
+
+# Parse our command line options once, thoroughly.
+while test "$#" -gt 0
+do
+  arg="$1"
+  shift
+
+  case $arg in
+  -*=*) optarg=`$echo "X$arg" | $Xsed -e 's/[-_a-zA-Z0-9]*=//'` ;;
+  *) optarg= ;;
+  esac
+
+  # If the previous option needs an argument, assign it.
+  if test -n "$prev"; then
+    case $prev in
+    execute_dlfiles)
+      execute_dlfiles="$execute_dlfiles $arg"
+      ;;
+    tag)
+      tagname="$arg"
+      preserve_args="${preserve_args}=$arg"
+
+      # Check whether tagname contains only valid characters
+      case $tagname in
+      *[!-_A-Za-z0-9,/]*)
+	$echo "$progname: invalid tag name: $tagname" 1>&2
+	exit $EXIT_FAILURE
+	;;
+      esac
+
+      case $tagname in
+      CC)
+	# Don't test for the "default" C tag, as we know, it's there, but
+	# not specially marked.
+	;;
+      *)
+	if grep "^# ### BEGIN LIBTOOL TAG CONFIG: $tagname$" < "$progpath" > /dev/null; then
+	  taglist="$taglist $tagname"
+	  # Evaluate the configuration.
+	  eval "`${SED} -n -e '/^# ### BEGIN LIBTOOL TAG CONFIG: '$tagname'$/,/^# ### END LIBTOOL TAG CONFIG: '$tagname'$/p' < $progpath`"
+	else
+	  $echo "$progname: ignoring unknown tag $tagname" 1>&2
+	fi
+	;;
+      esac
+      ;;
+    *)
+      eval "$prev=\$arg"
+      ;;
+    esac
+
+    prev=
+    prevopt=
+    continue
+  fi
+
+  # Have we seen a non-optional argument yet?
+  case $arg in
+  --help)
+    show_help=yes
+    ;;
+
+  --version)
+    $echo "$PROGRAM (GNU $PACKAGE) $VERSION$TIMESTAMP"
+    $echo
+    $echo "Copyright (C) 2005  Free Software Foundation, Inc."
+    $echo "This is free software; see the source for copying conditions.  There is NO"
+    $echo "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+    exit $?
+    ;;
+
+  --config)
+    ${SED} -e '1,/^# ### BEGIN LIBTOOL CONFIG/d' -e '/^# ### END LIBTOOL CONFIG/,$d' $progpath
+    # Now print the configurations for the tags.
+    for tagname in $taglist; do
+      ${SED} -n -e "/^# ### BEGIN LIBTOOL TAG CONFIG: $tagname$/,/^# ### END LIBTOOL TAG CONFIG: $tagname$/p" < "$progpath"
+    done
+    exit $?
+    ;;
+
+  --debug)
+    $echo "$progname: enabling shell trace mode"
+    set -x
+    preserve_args="$preserve_args $arg"
+    ;;
+
+  --dry-run | -n)
+    run=:
+    ;;
+
+  --features)
+    $echo "host: $host"
+    if test "$build_libtool_libs" = yes; then
+      $echo "enable shared libraries"
+    else
+      $echo "disable shared libraries"
+    fi
+    if test "$build_old_libs" = yes; then
+      $echo "enable static libraries"
+    else
+      $echo "disable static libraries"
+    fi
+    exit $?
+    ;;
+
+  --finish) mode="finish" ;;
+
+  --mode) prevopt="--mode" prev=mode ;;
+  --mode=*) mode="$optarg" ;;
+
+  --preserve-dup-deps) duplicate_deps="yes" ;;
+
+  --quiet | --silent)
+    show=:
+    preserve_args="$preserve_args $arg"
+    ;;
+
+  --tag)
+    prevopt="--tag"
+    prev=tag
+    preserve_args="$preserve_args --tag"
+    ;;
+  --tag=*)
+    set tag "$optarg" ${1+"$@"}
+    shift
+    prev=tag
+    preserve_args="$preserve_args --tag"
+    ;;
+
+  -dlopen)
+    prevopt="-dlopen"
+    prev=execute_dlfiles
+    ;;
+
+  -*)
+    $echo "$modename: unrecognized option \`$arg'" 1>&2
+    $echo "$help" 1>&2
+    exit $EXIT_FAILURE
+    ;;
+
+  *)
+    nonopt="$arg"
+    break
+    ;;
+  esac
+done
+
+if test -n "$prevopt"; then
+  $echo "$modename: option \`$prevopt' requires an argument" 1>&2
+  $echo "$help" 1>&2
+  exit $EXIT_FAILURE
+fi
+
+case $disable_libs in
+no) 
+  ;;
+shared)
+  build_libtool_libs=no
+  build_old_libs=yes
+  ;;
+static)
+  build_old_libs=`case $build_libtool_libs in yes) echo no;; *) echo yes;; esac`
+  ;;
+esac
+
+# If this variable is set in any of the actions, the command in it
+# will be execed at the end.  This prevents here-documents from being
+# left over by shells.
+exec_cmd=
+
+if test -z "$show_help"; then
+
+  # Infer the operation mode.
+  if test -z "$mode"; then
+    $echo "*** Warning: inferring the mode of operation is deprecated." 1>&2
+    $echo "*** Future versions of Libtool will require --mode=MODE be specified." 1>&2
+    case $nonopt in
+    *cc | cc* | *++ | gcc* | *-gcc* | g++* | xlc*)
+      mode=link
+      for arg
+      do
+	case $arg in
+	-c)
+	   mode=compile
+	   break
+	   ;;
+	esac
+      done
+      ;;
+    *db | *dbx | *strace | *truss)
+      mode=execute
+      ;;
+    *install*|cp|mv)
+      mode=install
+      ;;
+    *rm)
+      mode=uninstall
+      ;;
+    *)
+      # If we have no mode, but dlfiles were specified, then do execute mode.
+      test -n "$execute_dlfiles" && mode=execute
+
+      # Just use the default operation mode.
+      if test -z "$mode"; then
+	if test -n "$nonopt"; then
+	  $echo "$modename: warning: cannot infer operation mode from \`$nonopt'" 1>&2
+	else
+	  $echo "$modename: warning: cannot infer operation mode without MODE-ARGS" 1>&2
+	fi
+      fi
+      ;;
+    esac
+  fi
+
+  # Only execute mode is allowed to have -dlopen flags.
+  if test -n "$execute_dlfiles" && test "$mode" != execute; then
+    $echo "$modename: unrecognized option \`-dlopen'" 1>&2
+    $echo "$help" 1>&2
+    exit $EXIT_FAILURE
+  fi
+
+  # Change the help message to a mode-specific one.
+  generic_help="$help"
+  help="Try \`$modename --help --mode=$mode' for more information."
+
+  # These modes are in order of execution frequency so that they run quickly.
+  case $mode in
+  # libtool compile mode
+  compile)
+    modename="$modename: compile"
+    # Get the compilation command and the source file.
+    base_compile=
+    srcfile="$nonopt"  #  always keep a non-empty value in "srcfile"
+    suppress_opt=yes
+    suppress_output=
+    arg_mode=normal
+    libobj=
+    later=
+
+    for arg
+    do
+      case $arg_mode in
+      arg  )
+	# do not "continue".  Instead, add this to base_compile
+	lastarg="$arg"
+	arg_mode=normal
+	;;
+
+      target )
+	libobj="$arg"
+	arg_mode=normal
+	continue
+	;;
+
+      normal )
+	# Accept any command-line options.
+	case $arg in
+	-o)
+	  if test -n "$libobj" ; then
+	    $echo "$modename: you cannot specify \`-o' more than once" 1>&2
+	    exit $EXIT_FAILURE
+	  fi
+	  arg_mode=target
+	  continue
+	  ;;
+
+	-static | -prefer-pic | -prefer-non-pic)
+	  later="$later $arg"
+	  continue
+	  ;;
+
+	-no-suppress)
+	  suppress_opt=no
+	  continue
+	  ;;
+
+	-Xcompiler)
+	  arg_mode=arg  #  the next one goes into the "base_compile" arg list
+	  continue      #  The current "srcfile" will either be retained or
+	  ;;            #  replaced later.  I would guess that would be a bug.
+
+	-Wc,*)
+	  args=`$echo "X$arg" | $Xsed -e "s/^-Wc,//"`
+	  lastarg=
+	  save_ifs="$IFS"; IFS=','
+ 	  for arg in $args; do
+	    IFS="$save_ifs"
+
+	    # Double-quote args containing other shell metacharacters.
+	    # Many Bourne shells cannot handle close brackets correctly
+	    # in scan sets, so we specify it separately.
+	    case $arg in
+	      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	      arg="\"$arg\""
+	      ;;
+	    esac
+	    lastarg="$lastarg $arg"
+	  done
+	  IFS="$save_ifs"
+	  lastarg=`$echo "X$lastarg" | $Xsed -e "s/^ //"`
+
+	  # Add the arguments to base_compile.
+	  base_compile="$base_compile $lastarg"
+	  continue
+	  ;;
+
+	* )
+	  # Accept the current argument as the source file.
+	  # The previous "srcfile" becomes the current argument.
+	  #
+	  lastarg="$srcfile"
+	  srcfile="$arg"
+	  ;;
+	esac  #  case $arg
+	;;
+      esac    #  case $arg_mode
+
+      # Aesthetically quote the previous argument.
+      lastarg=`$echo "X$lastarg" | $Xsed -e "$sed_quote_subst"`
+
+      case $lastarg in
+      # Double-quote args containing other shell metacharacters.
+      # Many Bourne shells cannot handle close brackets correctly
+      # in scan sets, and some SunOS ksh mistreat backslash-escaping
+      # in scan sets (worked around with variable expansion),
+      # and furthermore cannot handle '|' '&' '(' ')' in scan sets 
+      # at all, so we specify them separately.
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	lastarg="\"$lastarg\""
+	;;
+      esac
+
+      base_compile="$base_compile $lastarg"
+    done # for arg
+
+    case $arg_mode in
+    arg)
+      $echo "$modename: you must specify an argument for -Xcompile"
+      exit $EXIT_FAILURE
+      ;;
+    target)
+      $echo "$modename: you must specify a target with \`-o'" 1>&2
+      exit $EXIT_FAILURE
+      ;;
+    *)
+      # Get the name of the library object.
+      [ -z "$libobj" ] && libobj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%'`
+      ;;
+    esac
+
+    # Recognize several different file suffixes.
+    # If the user specifies -o file.o, it is replaced with file.lo
+    xform='[cCFSifmso]'
+    case $libobj in
+    *.ada) xform=ada ;;
+    *.adb) xform=adb ;;
+    *.ads) xform=ads ;;
+    *.asm) xform=asm ;;
+    *.c++) xform=c++ ;;
+    *.cc) xform=cc ;;
+    *.ii) xform=ii ;;
+    *.class) xform=class ;;
+    *.cpp) xform=cpp ;;
+    *.cxx) xform=cxx ;;
+    *.f90) xform=f90 ;;
+    *.for) xform=for ;;
+    *.java) xform=java ;;
+    esac
+
+    libobj=`$echo "X$libobj" | $Xsed -e "s/\.$xform$/.lo/"`
+
+    case $libobj in
+    *.lo) obj=`$echo "X$libobj" | $Xsed -e "$lo2o"` ;;
+    *)
+      $echo "$modename: cannot determine name of library object from \`$libobj'" 1>&2
+      exit $EXIT_FAILURE
+      ;;
+    esac
+
+    func_infer_tag $base_compile
+
+    for arg in $later; do
+      case $arg in
+      -static)
+	build_old_libs=yes
+	continue
+	;;
+
+      -prefer-pic)
+	pic_mode=yes
+	continue
+	;;
+
+      -prefer-non-pic)
+	pic_mode=no
+	continue
+	;;
+      esac
+    done
+
+    qlibobj=`$echo "X$libobj" | $Xsed -e "$sed_quote_subst"`
+    case $qlibobj in
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	qlibobj="\"$qlibobj\"" ;;
+    esac
+    test "X$libobj" != "X$qlibobj" \
+	&& $echo "X$libobj" | grep '[]~#^*{};<>?"'"'"' 	&()|`$[]' \
+	&& $echo "$modename: libobj name \`$libobj' may not contain shell special characters."
+    objname=`$echo "X$obj" | $Xsed -e 's%^.*/%%'`
+    xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'`
+    if test "X$xdir" = "X$obj"; then
+      xdir=
+    else
+      xdir=$xdir/
+    fi
+    lobj=${xdir}$objdir/$objname
+
+    if test -z "$base_compile"; then
+      $echo "$modename: you must specify a compilation command" 1>&2
+      $echo "$help" 1>&2
+      exit $EXIT_FAILURE
+    fi
+
+    # Delete any leftover library objects.
+    if test "$build_old_libs" = yes; then
+      removelist="$obj $lobj $libobj ${libobj}T"
+    else
+      removelist="$lobj $libobj ${libobj}T"
+    fi
+
+    $run $rm $removelist
+    trap "$run $rm $removelist; exit $EXIT_FAILURE" 1 2 15
+
+    # On Cygwin there's no "real" PIC flag so we must build both object types
+    case $host_os in
+    cygwin* | mingw* | pw32* | os2*)
+      pic_mode=default
+      ;;
+    esac
+    if test "$pic_mode" = no && test "$deplibs_check_method" != pass_all; then
+      # non-PIC code in shared libraries is not supported
+      pic_mode=default
+    fi
+
+    # Calculate the filename of the output object if compiler does
+    # not support -o with -c
+    if test "$compiler_c_o" = no; then
+      output_obj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.${objext}
+      lockfile="$output_obj.lock"
+      removelist="$removelist $output_obj $lockfile"
+      trap "$run $rm $removelist; exit $EXIT_FAILURE" 1 2 15
+    else
+      output_obj=
+      need_locks=no
+      lockfile=
+    fi
+
+    # Lock this critical section if it is needed
+    # We use this script file to make the link, it avoids creating a new file
+    if test "$need_locks" = yes; then
+      until $run ln "$progpath" "$lockfile" 2>/dev/null; do
+	$show "Waiting for $lockfile to be removed"
+	sleep 2
+      done
+    elif test "$need_locks" = warn; then
+      if test -f "$lockfile"; then
+	$echo "\
+*** ERROR, $lockfile exists and contains:
+`cat $lockfile 2>/dev/null`
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+	$run $rm $removelist
+	exit $EXIT_FAILURE
+      fi
+      $echo "$srcfile" > "$lockfile"
+    fi
+
+    if test -n "$fix_srcfile_path"; then
+      eval srcfile=\"$fix_srcfile_path\"
+    fi
+    qsrcfile=`$echo "X$srcfile" | $Xsed -e "$sed_quote_subst"`
+    case $qsrcfile in
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+      qsrcfile="\"$qsrcfile\"" ;;
+    esac
+
+    $run $rm "$libobj" "${libobj}T"
+
+    # Create a libtool object file (analogous to a ".la" file),
+    # but don't create it if we're doing a dry run.
+    test -z "$run" && cat > ${libobj}T <<EOF
+# $libobj - a libtool object file
+# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP
+#
+# Please DO NOT delete this file!
+# It is necessary for linking the library.
+
+# Name of the PIC object.
+EOF
+
+    # Only build a PIC object if we are building libtool libraries.
+    if test "$build_libtool_libs" = yes; then
+      # Without this assignment, base_compile gets emptied.
+      fbsd_hideous_sh_bug=$base_compile
+
+      if test "$pic_mode" != no; then
+	command="$base_compile $qsrcfile $pic_flag"
+      else
+	# Don't build PIC code
+	command="$base_compile $qsrcfile"
+      fi
+
+      if test ! -d "${xdir}$objdir"; then
+	$show "$mkdir ${xdir}$objdir"
+	$run $mkdir ${xdir}$objdir
+	exit_status=$?
+	if test "$exit_status" -ne 0 && test ! -d "${xdir}$objdir"; then
+	  exit $exit_status
+	fi
+      fi
+
+      if test -z "$output_obj"; then
+	# Place PIC objects in $objdir
+	command="$command -o $lobj"
+      fi
+
+      $run $rm "$lobj" "$output_obj"
+
+      $show "$command"
+      if $run eval "$command"; then :
+      else
+	test -n "$output_obj" && $run $rm $removelist
+	exit $EXIT_FAILURE
+      fi
+
+      if test "$need_locks" = warn &&
+	 test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then
+	$echo "\
+*** ERROR, $lockfile contains:
+`cat $lockfile 2>/dev/null`
+
+but it should contain:
+$srcfile
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+	$run $rm $removelist
+	exit $EXIT_FAILURE
+      fi
+
+      # Just move the object if needed, then go on to compile the next one
+      if test -n "$output_obj" && test "X$output_obj" != "X$lobj"; then
+	$show "$mv $output_obj $lobj"
+	if $run $mv $output_obj $lobj; then :
+	else
+	  error=$?
+	  $run $rm $removelist
+	  exit $error
+	fi
+      fi
+
+      # Append the name of the PIC object to the libtool object file.
+      test -z "$run" && cat >> ${libobj}T <<EOF
+pic_object='$objdir/$objname'
+
+EOF
+
+      # Allow error messages only from the first compilation.
+      if test "$suppress_opt" = yes; then
+        suppress_output=' >/dev/null 2>&1'
+      fi
+    else
+      # No PIC object so indicate it doesn't exist in the libtool
+      # object file.
+      test -z "$run" && cat >> ${libobj}T <<EOF
+pic_object=none
+
+EOF
+    fi
+
+    # Only build a position-dependent object if we build old libraries.
+    if test "$build_old_libs" = yes; then
+      if test "$pic_mode" != yes; then
+	# Don't build PIC code
+	command="$base_compile $qsrcfile"
+      else
+	command="$base_compile $qsrcfile $pic_flag"
+      fi
+      if test "$compiler_c_o" = yes; then
+	command="$command -o $obj"
+      fi
+
+      # Suppress compiler output if we already did a PIC compilation.
+      command="$command$suppress_output"
+      $run $rm "$obj" "$output_obj"
+      $show "$command"
+      if $run eval "$command"; then :
+      else
+	$run $rm $removelist
+	exit $EXIT_FAILURE
+      fi
+
+      if test "$need_locks" = warn &&
+	 test "X`cat $lockfile 2>/dev/null`" != "X$srcfile"; then
+	$echo "\
+*** ERROR, $lockfile contains:
+`cat $lockfile 2>/dev/null`
+
+but it should contain:
+$srcfile
+
+This indicates that another process is trying to use the same
+temporary object file, and libtool could not work around it because
+your compiler does not support \`-c' and \`-o' together.  If you
+repeat this compilation, it may succeed, by chance, but you had better
+avoid parallel builds (make -j) in this platform, or get a better
+compiler."
+
+	$run $rm $removelist
+	exit $EXIT_FAILURE
+      fi
+
+      # Just move the object if needed
+      if test -n "$output_obj" && test "X$output_obj" != "X$obj"; then
+	$show "$mv $output_obj $obj"
+	if $run $mv $output_obj $obj; then :
+	else
+	  error=$?
+	  $run $rm $removelist
+	  exit $error
+	fi
+      fi
+
+      # Append the name of the non-PIC object the libtool object file.
+      # Only append if the libtool object file exists.
+      test -z "$run" && cat >> ${libobj}T <<EOF
+# Name of the non-PIC object.
+non_pic_object='$objname'
+
+EOF
+    else
+      # Append the name of the non-PIC object the libtool object file.
+      # Only append if the libtool object file exists.
+      test -z "$run" && cat >> ${libobj}T <<EOF
+# Name of the non-PIC object.
+non_pic_object=none
+
+EOF
+    fi
+
+    $run $mv "${libobj}T" "${libobj}"
+
+    # Unlock the critical section if it was locked
+    if test "$need_locks" != no; then
+      $run $rm "$lockfile"
+    fi
+
+    exit $EXIT_SUCCESS
+    ;;
+
+  # libtool link mode
+  link | relink)
+    modename="$modename: link"
+    case $host in
+    *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+      # It is impossible to link a dll without this setting, and
+      # we shouldn't force the makefile maintainer to figure out
+      # which system we are compiling for in order to pass an extra
+      # flag for every libtool invocation.
+      # allow_undefined=no
+
+      # FIXME: Unfortunately, there are problems with the above when trying
+      # to make a dll which has undefined symbols, in which case not
+      # even a static library is built.  For now, we need to specify
+      # -no-undefined on the libtool link line when we can be certain
+      # that all symbols are satisfied, otherwise we get a static library.
+      allow_undefined=yes
+      ;;
+    *)
+      allow_undefined=yes
+      ;;
+    esac
+    libtool_args="$nonopt"
+    base_compile="$nonopt $@"
+    compile_command="$nonopt"
+    finalize_command="$nonopt"
+
+    compile_rpath=
+    finalize_rpath=
+    compile_shlibpath=
+    finalize_shlibpath=
+    convenience=
+    old_convenience=
+    deplibs=
+    old_deplibs=
+    compiler_flags=
+    linker_flags=
+    dllsearchpath=
+    lib_search_path=`pwd`
+    inst_prefix_dir=
+
+    avoid_version=no
+    dlfiles=
+    dlprefiles=
+    dlself=no
+    export_dynamic=no
+    export_symbols=
+    export_symbols_regex=
+    generated=
+    libobjs=
+    ltlibs=
+    module=no
+    no_install=no
+    objs=
+    non_pic_objects=
+    notinst_path= # paths that contain not-installed libtool libraries
+    precious_files_regex=
+    prefer_static_libs=no
+    preload=no
+    prev=
+    prevarg=
+    release=
+    rpath=
+    xrpath=
+    perm_rpath=
+    temp_rpath=
+    thread_safe=no
+    vinfo=
+    vinfo_number=no
+
+    func_infer_tag $base_compile
+
+    # We need to know -static, to get the right output filenames.
+    for arg
+    do
+      case $arg in
+      -all-static | -static)
+	if test "X$arg" = "X-all-static"; then
+	  if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then
+	    $echo "$modename: warning: complete static linking is impossible in this configuration" 1>&2
+	  fi
+	  if test -n "$link_static_flag"; then
+	    dlopen_self=$dlopen_self_static
+	  fi
+	  prefer_static_libs=yes
+	else
+	  if test -z "$pic_flag" && test -n "$link_static_flag"; then
+	    dlopen_self=$dlopen_self_static
+	  fi
+	  prefer_static_libs=built
+	fi
+	build_libtool_libs=no
+	build_old_libs=yes
+	break
+	;;
+      esac
+    done
+
+    # See if our shared archives depend on static archives.
+    test -n "$old_archive_from_new_cmds" && build_old_libs=yes
+
+    # Go through the arguments, transforming them on the way.
+    while test "$#" -gt 0; do
+      arg="$1"
+      shift
+      case $arg in
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	qarg=\"`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`\" ### testsuite: skip nested quoting test
+	;;
+      *) qarg=$arg ;;
+      esac
+      libtool_args="$libtool_args $qarg"
+
+      # If the previous option needs an argument, assign it.
+      if test -n "$prev"; then
+	case $prev in
+	output)
+	  compile_command="$compile_command @OUTPUT@"
+	  finalize_command="$finalize_command @OUTPUT@"
+	  ;;
+	esac
+
+	case $prev in
+	dlfiles|dlprefiles)
+	  if test "$preload" = no; then
+	    # Add the symbol object into the linking commands.
+	    compile_command="$compile_command @SYMFILE@"
+	    finalize_command="$finalize_command @SYMFILE@"
+	    preload=yes
+	  fi
+	  case $arg in
+	  *.la | *.lo) ;;  # We handle these cases below.
+	  force)
+	    if test "$dlself" = no; then
+	      dlself=needless
+	      export_dynamic=yes
+	    fi
+	    prev=
+	    continue
+	    ;;
+	  self)
+	    if test "$prev" = dlprefiles; then
+	      dlself=yes
+	    elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then
+	      dlself=yes
+	    else
+	      dlself=needless
+	      export_dynamic=yes
+	    fi
+	    prev=
+	    continue
+	    ;;
+	  *)
+	    if test "$prev" = dlfiles; then
+	      dlfiles="$dlfiles $arg"
+	    else
+	      dlprefiles="$dlprefiles $arg"
+	    fi
+	    prev=
+	    continue
+	    ;;
+	  esac
+	  ;;
+	expsyms)
+	  export_symbols="$arg"
+	  if test ! -f "$arg"; then
+	    $echo "$modename: symbol file \`$arg' does not exist"
+	    exit $EXIT_FAILURE
+	  fi
+	  prev=
+	  continue
+	  ;;
+	expsyms_regex)
+	  export_symbols_regex="$arg"
+	  prev=
+	  continue
+	  ;;
+	inst_prefix)
+	  inst_prefix_dir="$arg"
+	  prev=
+	  continue
+	  ;;
+	precious_regex)
+	  precious_files_regex="$arg"
+	  prev=
+	  continue
+	  ;;
+	release)
+	  release="-$arg"
+	  prev=
+	  continue
+	  ;;
+	objectlist)
+	  if test -f "$arg"; then
+	    save_arg=$arg
+	    moreargs=
+	    for fil in `cat $save_arg`
+	    do
+#	      moreargs="$moreargs $fil"
+	      arg=$fil
+	      # A libtool-controlled object.
+
+	      # Check to see that this really is a libtool object.
+	      if (${SED} -e '2q' $arg | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+		pic_object=
+		non_pic_object=
+
+		# Read the .lo file
+		# If there is no directory component, then add one.
+		case $arg in
+		*/* | *\\*) . $arg ;;
+		*) . ./$arg ;;
+		esac
+
+		if test -z "$pic_object" || \
+		   test -z "$non_pic_object" ||
+		   test "$pic_object" = none && \
+		   test "$non_pic_object" = none; then
+		  $echo "$modename: cannot find name of object for \`$arg'" 1>&2
+		  exit $EXIT_FAILURE
+		fi
+
+		# Extract subdirectory from the argument.
+		xdir=`$echo "X$arg" | $Xsed -e 's%/[^/]*$%%'`
+		if test "X$xdir" = "X$arg"; then
+		  xdir=
+		else
+		  xdir="$xdir/"
+		fi
+
+		if test "$pic_object" != none; then
+		  # Prepend the subdirectory the object is found in.
+		  pic_object="$xdir$pic_object"
+
+		  if test "$prev" = dlfiles; then
+		    if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then
+		      dlfiles="$dlfiles $pic_object"
+		      prev=
+		      continue
+		    else
+		      # If libtool objects are unsupported, then we need to preload.
+		      prev=dlprefiles
+		    fi
+		  fi
+
+		  # CHECK ME:  I think I busted this.  -Ossama
+		  if test "$prev" = dlprefiles; then
+		    # Preload the old-style object.
+		    dlprefiles="$dlprefiles $pic_object"
+		    prev=
+		  fi
+
+		  # A PIC object.
+		  libobjs="$libobjs $pic_object"
+		  arg="$pic_object"
+		fi
+
+		# Non-PIC object.
+		if test "$non_pic_object" != none; then
+		  # Prepend the subdirectory the object is found in.
+		  non_pic_object="$xdir$non_pic_object"
+
+		  # A standard non-PIC object
+		  non_pic_objects="$non_pic_objects $non_pic_object"
+		  if test -z "$pic_object" || test "$pic_object" = none ; then
+		    arg="$non_pic_object"
+		  fi
+		else
+		  # If the PIC object exists, use it instead.
+		  # $xdir was prepended to $pic_object above.
+		  non_pic_object="$pic_object"
+		  non_pic_objects="$non_pic_objects $non_pic_object"
+		fi
+	      else
+		# Only an error if not doing a dry-run.
+		if test -z "$run"; then
+		  $echo "$modename: \`$arg' is not a valid libtool object" 1>&2
+		  exit $EXIT_FAILURE
+		else
+		  # Dry-run case.
+
+		  # Extract subdirectory from the argument.
+		  xdir=`$echo "X$arg" | $Xsed -e 's%/[^/]*$%%'`
+		  if test "X$xdir" = "X$arg"; then
+		    xdir=
+		  else
+		    xdir="$xdir/"
+		  fi
+
+		  pic_object=`$echo "X${xdir}${objdir}/${arg}" | $Xsed -e "$lo2o"`
+		  non_pic_object=`$echo "X${xdir}${arg}" | $Xsed -e "$lo2o"`
+		  libobjs="$libobjs $pic_object"
+		  non_pic_objects="$non_pic_objects $non_pic_object"
+		fi
+	      fi
+	    done
+	  else
+	    $echo "$modename: link input file \`$save_arg' does not exist"
+	    exit $EXIT_FAILURE
+	  fi
+	  arg=$save_arg
+	  prev=
+	  continue
+	  ;;
+	rpath | xrpath)
+	  # We need an absolute path.
+	  case $arg in
+	  [\\/]* | [A-Za-z]:[\\/]*) ;;
+	  *)
+	    $echo "$modename: only absolute run-paths are allowed" 1>&2
+	    exit $EXIT_FAILURE
+	    ;;
+	  esac
+	  if test "$prev" = rpath; then
+	    case "$rpath " in
+	    *" $arg "*) ;;
+	    *) rpath="$rpath $arg" ;;
+	    esac
+	  else
+	    case "$xrpath " in
+	    *" $arg "*) ;;
+	    *) xrpath="$xrpath $arg" ;;
+	    esac
+	  fi
+	  prev=
+	  continue
+	  ;;
+	xcompiler)
+	  compiler_flags="$compiler_flags $qarg"
+	  prev=
+	  compile_command="$compile_command $qarg"
+	  finalize_command="$finalize_command $qarg"
+	  continue
+	  ;;
+	xlinker)
+	  linker_flags="$linker_flags $qarg"
+	  compiler_flags="$compiler_flags $wl$qarg"
+	  prev=
+	  compile_command="$compile_command $wl$qarg"
+	  finalize_command="$finalize_command $wl$qarg"
+	  continue
+	  ;;
+	xcclinker)
+	  linker_flags="$linker_flags $qarg"
+	  compiler_flags="$compiler_flags $qarg"
+	  prev=
+	  compile_command="$compile_command $qarg"
+	  finalize_command="$finalize_command $qarg"
+	  continue
+	  ;;
+	shrext)
+  	  shrext_cmds="$arg"
+	  prev=
+	  continue
+	  ;;
+	darwin_framework|darwin_framework_skip)
+	  test "$prev" = "darwin_framework" && compiler_flags="$compiler_flags $arg"
+	  compile_command="$compile_command $arg"
+	  finalize_command="$finalize_command $arg"
+	  prev=
+	  continue
+	  ;;
+	*)
+	  eval "$prev=\"\$arg\""
+	  prev=
+	  continue
+	  ;;
+	esac
+      fi # test -n "$prev"
+
+      prevarg="$arg"
+
+      case $arg in
+      -all-static)
+	if test -n "$link_static_flag"; then
+	  compile_command="$compile_command $link_static_flag"
+	  finalize_command="$finalize_command $link_static_flag"
+	fi
+	continue
+	;;
+
+      -allow-undefined)
+	# FIXME: remove this flag sometime in the future.
+	$echo "$modename: \`-allow-undefined' is deprecated because it is the default" 1>&2
+	continue
+	;;
+
+      -avoid-version)
+	avoid_version=yes
+	continue
+	;;
+
+      -dlopen)
+	prev=dlfiles
+	continue
+	;;
+
+      -dlpreopen)
+	prev=dlprefiles
+	continue
+	;;
+
+      -export-dynamic)
+	export_dynamic=yes
+	continue
+	;;
+
+      -export-symbols | -export-symbols-regex)
+	if test -n "$export_symbols" || test -n "$export_symbols_regex"; then
+	  $echo "$modename: more than one -exported-symbols argument is not allowed"
+	  exit $EXIT_FAILURE
+	fi
+	if test "X$arg" = "X-export-symbols"; then
+	  prev=expsyms
+	else
+	  prev=expsyms_regex
+	fi
+	continue
+	;;
+
+      -framework|-arch|-isysroot)
+	case " $CC " in
+	  *" ${arg} ${1} "* | *" ${arg}	${1} "*) 
+		prev=darwin_framework_skip ;;
+	  *) compiler_flags="$compiler_flags $arg"
+	     prev=darwin_framework ;;
+	esac
+	compile_command="$compile_command $arg"
+	finalize_command="$finalize_command $arg"
+	continue
+	;;
+
+      -inst-prefix-dir)
+	prev=inst_prefix
+	continue
+	;;
+
+      # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:*
+      # so, if we see these flags be careful not to treat them like -L
+      -L[A-Z][A-Z]*:*)
+	case $with_gcc/$host in
+	no/*-*-irix* | /*-*-irix*)
+	  compile_command="$compile_command $arg"
+	  finalize_command="$finalize_command $arg"
+	  ;;
+	esac
+	continue
+	;;
+
+      -L*)
+	dir=`$echo "X$arg" | $Xsed -e 's/^-L//'`
+	# We need an absolute path.
+	case $dir in
+	[\\/]* | [A-Za-z]:[\\/]*) ;;
+	*)
+	  absdir=`cd "$dir" && pwd`
+	  if test -z "$absdir"; then
+	    $echo "$modename: cannot determine absolute directory name of \`$dir'" 1>&2
+	    absdir="$dir"
+	    notinst_path="$notinst_path $dir"
+	  fi
+	  dir="$absdir"
+	  ;;
+	esac
+	case "$deplibs " in
+	*" -L$dir "*) ;;
+	*)
+	  deplibs="$deplibs -L$dir"
+	  lib_search_path="$lib_search_path $dir"
+	  ;;
+	esac
+	case $host in
+	*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+	  testbindir=`$echo "X$dir" | $Xsed -e 's*/lib$*/bin*'`
+	  case :$dllsearchpath: in
+	  *":$dir:"*) ;;
+	  *) dllsearchpath="$dllsearchpath:$dir";;
+	  esac
+	  case :$dllsearchpath: in
+	  *":$testbindir:"*) ;;
+	  *) dllsearchpath="$dllsearchpath:$testbindir";;
+	  esac
+	  ;;
+	esac
+	continue
+	;;
+
+      -l*)
+	if test "X$arg" = "X-lc" || test "X$arg" = "X-lm"; then
+	  case $host in
+	  *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-beos*)
+	    # These systems don't actually have a C or math library (as such)
+	    continue
+	    ;;
+	  *-*-os2*)
+	    # These systems don't actually have a C library (as such)
+	    test "X$arg" = "X-lc" && continue
+	    ;;
+	  *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
+	    # Do not include libc due to us having libc/libc_r.
+	    test "X$arg" = "X-lc" && continue
+	    ;;
+	  *-*-rhapsody* | *-*-darwin1.[012])
+	    # Rhapsody C and math libraries are in the System framework
+	    deplibs="$deplibs -framework System"
+	    continue
+	    ;;
+	  *-*-sco3.2v5* | *-*-sco5v6*)
+	    # Causes problems with __ctype
+	    test "X$arg" = "X-lc" && continue
+	    ;;
+	  *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*)
+	    # Compiler inserts libc in the correct place for threads to work
+	    test "X$arg" = "X-lc" && continue
+	    ;;
+	  esac
+	elif test "X$arg" = "X-lc_r"; then
+	 case $host in
+	 *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
+	   # Do not include libc_r directly, use -pthread flag.
+	   continue
+	   ;;
+	 esac
+	fi
+	deplibs="$deplibs $arg"
+	continue
+	;;
+
+      # Tru64 UNIX uses -model [arg] to determine the layout of C++
+      # classes, name mangling, and exception handling.
+      -model)
+	compile_command="$compile_command $arg"
+	compiler_flags="$compiler_flags $arg"
+	finalize_command="$finalize_command $arg"
+	prev=xcompiler
+	continue
+	;;
+
+     -mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe)
+	compiler_flags="$compiler_flags $arg"
+	compile_command="$compile_command $arg"
+	finalize_command="$finalize_command $arg"
+	continue
+	;;
+
+      -module)
+	module=yes
+	continue
+	;;
+
+      # -64, -mips[0-9] enable 64-bit mode on the SGI compiler
+      # -r[0-9][0-9]* specifies the processor on the SGI compiler
+      # -xarch=*, -xtarget=* enable 64-bit mode on the Sun compiler
+      # +DA*, +DD* enable 64-bit mode on the HP compiler
+      # -q* pass through compiler args for the IBM compiler
+      # -m* pass through architecture-specific compiler args for GCC
+      # -m*, -t[45]*, -txscale* pass through architecture-specific
+      # compiler args for GCC
+      # -pg pass through profiling flag for GCC
+      # @file GCC response files
+      -64|-mips[0-9]|-r[0-9][0-9]*|-xarch=*|-xtarget=*|+DA*|+DD*|-q*|-m*|-pg| \
+      -t[45]*|-txscale*|@*)
+
+	# Unknown arguments in both finalize_command and compile_command need
+	# to be aesthetically quoted because they are evaled later.
+	arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+	case $arg in
+	*[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	  arg="\"$arg\""
+	  ;;
+	esac
+        compile_command="$compile_command $arg"
+        finalize_command="$finalize_command $arg"
+        compiler_flags="$compiler_flags $arg"
+        continue
+        ;;
+
+      -shrext)
+	prev=shrext
+	continue
+	;;
+
+      -no-fast-install)
+	fast_install=no
+	continue
+	;;
+
+      -no-install)
+	case $host in
+	*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+	  # The PATH hackery in wrapper scripts is required on Windows
+	  # in order for the loader to find any dlls it needs.
+	  $echo "$modename: warning: \`-no-install' is ignored for $host" 1>&2
+	  $echo "$modename: warning: assuming \`-no-fast-install' instead" 1>&2
+	  fast_install=no
+	  ;;
+	*) no_install=yes ;;
+	esac
+	continue
+	;;
+
+      -no-undefined)
+	allow_undefined=no
+	continue
+	;;
+
+      -objectlist)
+	prev=objectlist
+	continue
+	;;
+
+      -o) prev=output ;;
+
+      -precious-files-regex)
+	prev=precious_regex
+	continue
+	;;
+
+      -release)
+	prev=release
+	continue
+	;;
+
+      -rpath)
+	prev=rpath
+	continue
+	;;
+
+      -R)
+	prev=xrpath
+	continue
+	;;
+
+      -R*)
+	dir=`$echo "X$arg" | $Xsed -e 's/^-R//'`
+	# We need an absolute path.
+	case $dir in
+	[\\/]* | [A-Za-z]:[\\/]*) ;;
+	*)
+	  $echo "$modename: only absolute run-paths are allowed" 1>&2
+	  exit $EXIT_FAILURE
+	  ;;
+	esac
+	case "$xrpath " in
+	*" $dir "*) ;;
+	*) xrpath="$xrpath $dir" ;;
+	esac
+	continue
+	;;
+
+      -static)
+	# The effects of -static are defined in a previous loop.
+	# We used to do the same as -all-static on platforms that
+	# didn't have a PIC flag, but the assumption that the effects
+	# would be equivalent was wrong.  It would break on at least
+	# Digital Unix and AIX.
+	continue
+	;;
+
+      -thread-safe)
+	thread_safe=yes
+	continue
+	;;
+
+      -version-info)
+	prev=vinfo
+	continue
+	;;
+      -version-number)
+	prev=vinfo
+	vinfo_number=yes
+	continue
+	;;
+
+      -Wc,*)
+	args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wc,//'`
+	arg=
+	save_ifs="$IFS"; IFS=','
+	for flag in $args; do
+	  IFS="$save_ifs"
+	  case $flag in
+	    *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	    flag="\"$flag\""
+	    ;;
+	  esac
+	  arg="$arg $wl$flag"
+	  compiler_flags="$compiler_flags $flag"
+	done
+	IFS="$save_ifs"
+	arg=`$echo "X$arg" | $Xsed -e "s/^ //"`
+	;;
+
+      -Wl,*)
+	args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wl,//'`
+	arg=
+	save_ifs="$IFS"; IFS=','
+	for flag in $args; do
+	  IFS="$save_ifs"
+	  case $flag in
+	    *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	    flag="\"$flag\""
+	    ;;
+	  esac
+	  arg="$arg $wl$flag"
+	  compiler_flags="$compiler_flags $wl$flag"
+	  linker_flags="$linker_flags $flag"
+	done
+	IFS="$save_ifs"
+	arg=`$echo "X$arg" | $Xsed -e "s/^ //"`
+	;;
+
+      -Xcompiler)
+	prev=xcompiler
+	continue
+	;;
+
+      -Xlinker)
+	prev=xlinker
+	continue
+	;;
+
+      -XCClinker)
+	prev=xcclinker
+	continue
+	;;
+
+      # Some other compiler flag.
+      -* | +*)
+	# Unknown arguments in both finalize_command and compile_command need
+	# to be aesthetically quoted because they are evaled later.
+	arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+	case $arg in
+	*[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	  arg="\"$arg\""
+	  ;;
+	esac
+	;;
+
+      *.$objext)
+	# A standard object.
+	objs="$objs $arg"
+	;;
+
+      *.lo)
+	# A libtool-controlled object.
+
+	# Check to see that this really is a libtool object.
+	if (${SED} -e '2q' $arg | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+	  pic_object=
+	  non_pic_object=
+
+	  # Read the .lo file
+	  # If there is no directory component, then add one.
+	  case $arg in
+	  */* | *\\*) . $arg ;;
+	  *) . ./$arg ;;
+	  esac
+
+	  if test -z "$pic_object" || \
+	     test -z "$non_pic_object" ||
+	     test "$pic_object" = none && \
+	     test "$non_pic_object" = none; then
+	    $echo "$modename: cannot find name of object for \`$arg'" 1>&2
+	    exit $EXIT_FAILURE
+	  fi
+
+	  # Extract subdirectory from the argument.
+	  xdir=`$echo "X$arg" | $Xsed -e 's%/[^/]*$%%'`
+	  if test "X$xdir" = "X$arg"; then
+	    xdir=
+ 	  else
+	    xdir="$xdir/"
+	  fi
+
+	  if test "$pic_object" != none; then
+	    # Prepend the subdirectory the object is found in.
+	    pic_object="$xdir$pic_object"
+
+	    if test "$prev" = dlfiles; then
+	      if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then
+		dlfiles="$dlfiles $pic_object"
+		prev=
+		continue
+	      else
+		# If libtool objects are unsupported, then we need to preload.
+		prev=dlprefiles
+	      fi
+	    fi
+
+	    # CHECK ME:  I think I busted this.  -Ossama
+	    if test "$prev" = dlprefiles; then
+	      # Preload the old-style object.
+	      dlprefiles="$dlprefiles $pic_object"
+	      prev=
+	    fi
+
+	    # A PIC object.
+	    libobjs="$libobjs $pic_object"
+	    arg="$pic_object"
+	  fi
+
+	  # Non-PIC object.
+	  if test "$non_pic_object" != none; then
+	    # Prepend the subdirectory the object is found in.
+	    non_pic_object="$xdir$non_pic_object"
+
+	    # A standard non-PIC object
+	    non_pic_objects="$non_pic_objects $non_pic_object"
+	    if test -z "$pic_object" || test "$pic_object" = none ; then
+	      arg="$non_pic_object"
+	    fi
+	  else
+	    # If the PIC object exists, use it instead.
+	    # $xdir was prepended to $pic_object above.
+	    non_pic_object="$pic_object"
+	    non_pic_objects="$non_pic_objects $non_pic_object"
+	  fi
+	else
+	  # Only an error if not doing a dry-run.
+	  if test -z "$run"; then
+	    $echo "$modename: \`$arg' is not a valid libtool object" 1>&2
+	    exit $EXIT_FAILURE
+	  else
+	    # Dry-run case.
+
+	    # Extract subdirectory from the argument.
+	    xdir=`$echo "X$arg" | $Xsed -e 's%/[^/]*$%%'`
+	    if test "X$xdir" = "X$arg"; then
+	      xdir=
+	    else
+	      xdir="$xdir/"
+	    fi
+
+	    pic_object=`$echo "X${xdir}${objdir}/${arg}" | $Xsed -e "$lo2o"`
+	    non_pic_object=`$echo "X${xdir}${arg}" | $Xsed -e "$lo2o"`
+	    libobjs="$libobjs $pic_object"
+	    non_pic_objects="$non_pic_objects $non_pic_object"
+	  fi
+	fi
+	;;
+
+      *.$libext)
+	# An archive.
+	deplibs="$deplibs $arg"
+	old_deplibs="$old_deplibs $arg"
+	continue
+	;;
+
+      *.la)
+	# A libtool-controlled library.
+
+	if test "$prev" = dlfiles; then
+	  # This library was specified with -dlopen.
+	  dlfiles="$dlfiles $arg"
+	  prev=
+	elif test "$prev" = dlprefiles; then
+	  # The library was specified with -dlpreopen.
+	  dlprefiles="$dlprefiles $arg"
+	  prev=
+	else
+	  deplibs="$deplibs $arg"
+	fi
+	continue
+	;;
+
+      # Some other compiler argument.
+      *)
+	# Unknown arguments in both finalize_command and compile_command need
+	# to be aesthetically quoted because they are evaled later.
+	arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+	case $arg in
+	*[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	  arg="\"$arg\""
+	  ;;
+	esac
+	;;
+      esac # arg
+
+      # Now actually substitute the argument into the commands.
+      if test -n "$arg"; then
+	compile_command="$compile_command $arg"
+	finalize_command="$finalize_command $arg"
+      fi
+    done # argument parsing loop
+
+    if test -n "$prev"; then
+      $echo "$modename: the \`$prevarg' option requires an argument" 1>&2
+      $echo "$help" 1>&2
+      exit $EXIT_FAILURE
+    fi
+
+    if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then
+      eval arg=\"$export_dynamic_flag_spec\"
+      compile_command="$compile_command $arg"
+      finalize_command="$finalize_command $arg"
+    fi
+
+    oldlibs=
+    # calculate the name of the file, without its directory
+    outputname=`$echo "X$output" | $Xsed -e 's%^.*/%%'`
+    libobjs_save="$libobjs"
+
+    if test -n "$shlibpath_var"; then
+      # get the directories listed in $shlibpath_var
+      eval shlib_search_path=\`\$echo \"X\${$shlibpath_var}\" \| \$Xsed -e \'s/:/ /g\'\`
+    else
+      shlib_search_path=
+    fi
+    eval sys_lib_search_path=\"$sys_lib_search_path_spec\"
+    eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\"
+
+    output_objdir=`$echo "X$output" | $Xsed -e 's%/[^/]*$%%'`
+    if test "X$output_objdir" = "X$output"; then
+      output_objdir="$objdir"
+    else
+      output_objdir="$output_objdir/$objdir"
+    fi
+    # Create the object directory.
+    if test ! -d "$output_objdir"; then
+      $show "$mkdir $output_objdir"
+      $run $mkdir $output_objdir
+      exit_status=$?
+      if test "$exit_status" -ne 0 && test ! -d "$output_objdir"; then
+	exit $exit_status
+      fi
+    fi
+
+    # Determine the type of output
+    case $output in
+    "")
+      $echo "$modename: you must specify an output file" 1>&2
+      $echo "$help" 1>&2
+      exit $EXIT_FAILURE
+      ;;
+    *.$libext) linkmode=oldlib ;;
+    *.lo | *.$objext) linkmode=obj ;;
+    *.la) linkmode=lib ;;
+    *) linkmode=prog ;; # Anything else should be a program.
+    esac
+
+    case $host in
+    *cygwin* | *mingw* | *pw32*)
+      # don't eliminate duplications in $postdeps and $predeps
+      duplicate_compiler_generated_deps=yes
+      ;;
+    *)
+      duplicate_compiler_generated_deps=$duplicate_deps
+      ;;
+    esac
+    specialdeplibs=
+
+    libs=
+    # Find all interdependent deplibs by searching for libraries
+    # that are linked more than once (e.g. -la -lb -la)
+    for deplib in $deplibs; do
+      if test "X$duplicate_deps" = "Xyes" ; then
+	case "$libs " in
+	*" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+	esac
+      fi
+      libs="$libs $deplib"
+    done
+
+    if test "$linkmode" = lib; then
+      libs="$predeps $libs $compiler_lib_search_path $postdeps"
+
+      # Compute libraries that are listed more than once in $predeps
+      # $postdeps and mark them as special (i.e., whose duplicates are
+      # not to be eliminated).
+      pre_post_deps=
+      if test "X$duplicate_compiler_generated_deps" = "Xyes" ; then
+	for pre_post_dep in $predeps $postdeps; do
+	  case "$pre_post_deps " in
+	  *" $pre_post_dep "*) specialdeplibs="$specialdeplibs $pre_post_deps" ;;
+	  esac
+	  pre_post_deps="$pre_post_deps $pre_post_dep"
+	done
+      fi
+      pre_post_deps=
+    fi
+
+    deplibs=
+    newdependency_libs=
+    newlib_search_path=
+    need_relink=no # whether we're linking any uninstalled libtool libraries
+    notinst_deplibs= # not-installed libtool libraries
+    case $linkmode in
+    lib)
+	passes="conv link"
+	for file in $dlfiles $dlprefiles; do
+	  case $file in
+	  *.la) ;;
+	  *)
+	    $echo "$modename: libraries can \`-dlopen' only libtool libraries: $file" 1>&2
+	    exit $EXIT_FAILURE
+	    ;;
+	  esac
+	done
+	;;
+    prog)
+	compile_deplibs=
+	finalize_deplibs=
+	alldeplibs=no
+	newdlfiles=
+	newdlprefiles=
+	passes="conv scan dlopen dlpreopen link"
+	;;
+    *)  passes="conv"
+	;;
+    esac
+    for pass in $passes; do
+      if test "$linkmode,$pass" = "lib,link" ||
+	 test "$linkmode,$pass" = "prog,scan"; then
+	libs="$deplibs"
+	deplibs=
+      fi
+      if test "$linkmode" = prog; then
+	case $pass in
+	dlopen) libs="$dlfiles" ;;
+	dlpreopen) libs="$dlprefiles" ;;
+	link) libs="$deplibs %DEPLIBS% $dependency_libs" ;;
+	esac
+      fi
+      if test "$pass" = dlopen; then
+	# Collect dlpreopened libraries
+	save_deplibs="$deplibs"
+	deplibs=
+      fi
+      for deplib in $libs; do
+	lib=
+	found=no
+	case $deplib in
+	-mt|-mthreads|-kthread|-Kthread|-pthread|-pthreads|--thread-safe)
+	  if test "$linkmode,$pass" = "prog,link"; then
+	    compile_deplibs="$deplib $compile_deplibs"
+	    finalize_deplibs="$deplib $finalize_deplibs"
+	  else
+	    compiler_flags="$compiler_flags $deplib"
+	  fi
+	  continue
+	  ;;
+	-l*)
+	  if test "$linkmode" != lib && test "$linkmode" != prog; then
+	    $echo "$modename: warning: \`-l' is ignored for archives/objects" 1>&2
+	    continue
+	  fi
+	  name=`$echo "X$deplib" | $Xsed -e 's/^-l//'`
+	  for searchdir in $newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path; do
+	    for search_ext in .la $std_shrext .so .a; do
+	      # Search the libtool library
+	      lib="$searchdir/lib${name}${search_ext}"
+	      if test -f "$lib"; then
+		if test "$search_ext" = ".la"; then
+		  found=yes
+		else
+		  found=no
+		fi
+		break 2
+	      fi
+	    done
+	  done
+	  if test "$found" != yes; then
+	    # deplib doesn't seem to be a libtool library
+	    if test "$linkmode,$pass" = "prog,link"; then
+	      compile_deplibs="$deplib $compile_deplibs"
+	      finalize_deplibs="$deplib $finalize_deplibs"
+	    else
+	      deplibs="$deplib $deplibs"
+	      test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs"
+	    fi
+	    continue
+	  else # deplib is a libtool library
+	    # If $allow_libtool_libs_with_static_runtimes && $deplib is a stdlib,
+	    # We need to do some special things here, and not later.
+	    if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+	      case " $predeps $postdeps " in
+	      *" $deplib "*)
+		if (${SED} -e '2q' $lib |
+                    grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+		  library_names=
+		  old_library=
+		  case $lib in
+		  */* | *\\*) . $lib ;;
+		  *) . ./$lib ;;
+		  esac
+		  for l in $old_library $library_names; do
+		    ll="$l"
+		  done
+		  if test "X$ll" = "X$old_library" ; then # only static version available
+		    found=no
+		    ladir=`$echo "X$lib" | $Xsed -e 's%/[^/]*$%%'`
+		    test "X$ladir" = "X$lib" && ladir="."
+		    lib=$ladir/$old_library
+		    if test "$linkmode,$pass" = "prog,link"; then
+		      compile_deplibs="$deplib $compile_deplibs"
+		      finalize_deplibs="$deplib $finalize_deplibs"
+		    else
+		      deplibs="$deplib $deplibs"
+		      test "$linkmode" = lib && newdependency_libs="$deplib $newdependency_libs"
+		    fi
+		    continue
+		  fi
+		fi
+	        ;;
+	      *) ;;
+	      esac
+	    fi
+	  fi
+	  ;; # -l
+	-L*)
+	  case $linkmode in
+	  lib)
+	    deplibs="$deplib $deplibs"
+	    test "$pass" = conv && continue
+	    newdependency_libs="$deplib $newdependency_libs"
+	    newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`
+	    ;;
+	  prog)
+	    if test "$pass" = conv; then
+	      deplibs="$deplib $deplibs"
+	      continue
+	    fi
+	    if test "$pass" = scan; then
+	      deplibs="$deplib $deplibs"
+	    else
+	      compile_deplibs="$deplib $compile_deplibs"
+	      finalize_deplibs="$deplib $finalize_deplibs"
+	    fi
+	    newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`
+	    ;;
+	  *)
+	    $echo "$modename: warning: \`-L' is ignored for archives/objects" 1>&2
+	    ;;
+	  esac # linkmode
+	  continue
+	  ;; # -L
+	-R*)
+	  if test "$pass" = link; then
+	    dir=`$echo "X$deplib" | $Xsed -e 's/^-R//'`
+	    # Make sure the xrpath contains only unique directories.
+	    case "$xrpath " in
+	    *" $dir "*) ;;
+	    *) xrpath="$xrpath $dir" ;;
+	    esac
+	  fi
+	  deplibs="$deplib $deplibs"
+	  continue
+	  ;;
+	*.la) lib="$deplib" ;;
+	*.$libext)
+	  if test "$pass" = conv; then
+	    deplibs="$deplib $deplibs"
+	    continue
+	  fi
+	  case $linkmode in
+	  lib)
+	    valid_a_lib=no
+	    case $deplibs_check_method in
+	      match_pattern*)
+		set dummy $deplibs_check_method
+	        match_pattern_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"`
+		if eval $echo \"$deplib\" 2>/dev/null \
+		    | $SED 10q \
+		    | $EGREP "$match_pattern_regex" > /dev/null; then
+		  valid_a_lib=yes
+		fi
+		;;
+	      pass_all)
+		valid_a_lib=yes
+		;;
+            esac
+	    if test "$valid_a_lib" != yes; then
+	      $echo
+	      $echo "*** Warning: Trying to link with static lib archive $deplib."
+	      $echo "*** I have the capability to make that library automatically link in when"
+	      $echo "*** you link to this library.  But I can only do this if you have a"
+	      $echo "*** shared version of the library, which you do not appear to have"
+	      $echo "*** because the file extensions .$libext of this argument makes me believe"
+	      $echo "*** that it is just a static archive that I should not used here."
+	    else
+	      $echo
+	      $echo "*** Warning: Linking the shared library $output against the"
+	      $echo "*** static library $deplib is not portable!"
+	      deplibs="$deplib $deplibs"
+	    fi
+	    continue
+	    ;;
+	  prog)
+	    if test "$pass" != link; then
+	      deplibs="$deplib $deplibs"
+	    else
+	      compile_deplibs="$deplib $compile_deplibs"
+	      finalize_deplibs="$deplib $finalize_deplibs"
+	    fi
+	    continue
+	    ;;
+	  esac # linkmode
+	  ;; # *.$libext
+	*.lo | *.$objext)
+	  if test "$pass" = conv; then
+	    deplibs="$deplib $deplibs"
+	  elif test "$linkmode" = prog; then
+	    if test "$pass" = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then
+	      # If there is no dlopen support or we're linking statically,
+	      # we need to preload.
+	      newdlprefiles="$newdlprefiles $deplib"
+	      compile_deplibs="$deplib $compile_deplibs"
+	      finalize_deplibs="$deplib $finalize_deplibs"
+	    else
+	      newdlfiles="$newdlfiles $deplib"
+	    fi
+	  fi
+	  continue
+	  ;;
+	%DEPLIBS%)
+	  alldeplibs=yes
+	  continue
+	  ;;
+	esac # case $deplib
+	if test "$found" = yes || test -f "$lib"; then :
+	else
+	  $echo "$modename: cannot find the library \`$lib' or unhandled argument \`$deplib'" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+
+	# Check to see that this really is a libtool archive.
+	if (${SED} -e '2q' $lib | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+	else
+	  $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+
+	ladir=`$echo "X$lib" | $Xsed -e 's%/[^/]*$%%'`
+	test "X$ladir" = "X$lib" && ladir="."
+
+	dlname=
+	dlopen=
+	dlpreopen=
+	libdir=
+	library_names=
+	old_library=
+	# If the library was installed with an old release of libtool,
+	# it will not redefine variables installed, or shouldnotlink
+	installed=yes
+	shouldnotlink=no
+	avoidtemprpath=
+
+
+	# Read the .la file
+	case $lib in
+	*/* | *\\*) . $lib ;;
+	*) . ./$lib ;;
+	esac
+
+	if test "$linkmode,$pass" = "lib,link" ||
+	   test "$linkmode,$pass" = "prog,scan" ||
+	   { test "$linkmode" != prog && test "$linkmode" != lib; }; then
+	  test -n "$dlopen" && dlfiles="$dlfiles $dlopen"
+	  test -n "$dlpreopen" && dlprefiles="$dlprefiles $dlpreopen"
+	fi
+
+	if test "$pass" = conv; then
+	  # Only check for convenience libraries
+	  deplibs="$lib $deplibs"
+	  if test -z "$libdir"; then
+	    if test -z "$old_library"; then
+	      $echo "$modename: cannot find name of link library for \`$lib'" 1>&2
+	      exit $EXIT_FAILURE
+	    fi
+	    # It is a libtool convenience library, so add in its objects.
+	    convenience="$convenience $ladir/$objdir/$old_library"
+	    old_convenience="$old_convenience $ladir/$objdir/$old_library"
+	    tmp_libs=
+	    for deplib in $dependency_libs; do
+	      deplibs="$deplib $deplibs"
+              if test "X$duplicate_deps" = "Xyes" ; then
+	        case "$tmp_libs " in
+	        *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+	        esac
+              fi
+	      tmp_libs="$tmp_libs $deplib"
+	    done
+	  elif test "$linkmode" != prog && test "$linkmode" != lib; then
+	    $echo "$modename: \`$lib' is not a convenience library" 1>&2
+	    exit $EXIT_FAILURE
+	  fi
+	  continue
+	fi # $pass = conv
+
+
+	# Get the name of the library we link against.
+	linklib=
+	for l in $old_library $library_names; do
+	  linklib="$l"
+	done
+	if test -z "$linklib"; then
+	  $echo "$modename: cannot find name of link library for \`$lib'" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+
+	# This library was specified with -dlopen.
+	if test "$pass" = dlopen; then
+	  if test -z "$libdir"; then
+	    $echo "$modename: cannot -dlopen a convenience library: \`$lib'" 1>&2
+	    exit $EXIT_FAILURE
+	  fi
+	  if test -z "$dlname" ||
+	     test "$dlopen_support" != yes ||
+	     test "$build_libtool_libs" = no; then
+	    # If there is no dlname, no dlopen support or we're linking
+	    # statically, we need to preload.  We also need to preload any
+	    # dependent libraries so libltdl's deplib preloader doesn't
+	    # bomb out in the load deplibs phase.
+	    dlprefiles="$dlprefiles $lib $dependency_libs"
+	  else
+	    newdlfiles="$newdlfiles $lib"
+	  fi
+	  continue
+	fi # $pass = dlopen
+
+	# We need an absolute path.
+	case $ladir in
+	[\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;;
+	*)
+	  abs_ladir=`cd "$ladir" && pwd`
+	  if test -z "$abs_ladir"; then
+	    $echo "$modename: warning: cannot determine absolute directory name of \`$ladir'" 1>&2
+	    $echo "$modename: passing it literally to the linker, although it might fail" 1>&2
+	    abs_ladir="$ladir"
+	  fi
+	  ;;
+	esac
+	laname=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+
+	# Find the relevant object directory and library name.
+	if test "X$installed" = Xyes; then
+	  if test ! -f "$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then
+	    $echo "$modename: warning: library \`$lib' was moved." 1>&2
+	    dir="$ladir"
+	    absdir="$abs_ladir"
+	    libdir="$abs_ladir"
+	  else
+	    dir="$libdir"
+	    absdir="$libdir"
+	  fi
+	  test "X$hardcode_automatic" = Xyes && avoidtemprpath=yes
+	else
+	  if test ! -f "$ladir/$objdir/$linklib" && test -f "$abs_ladir/$linklib"; then
+	    dir="$ladir"
+	    absdir="$abs_ladir"
+	    # Remove this search path later
+	    notinst_path="$notinst_path $abs_ladir"
+	  else
+	    dir="$ladir/$objdir"
+	    absdir="$abs_ladir/$objdir"
+	    # Remove this search path later
+	    notinst_path="$notinst_path $abs_ladir"
+	  fi
+	fi # $installed = yes
+	name=`$echo "X$laname" | $Xsed -e 's/\.la$//' -e 's/^lib//'`
+
+	# This library was specified with -dlpreopen.
+	if test "$pass" = dlpreopen; then
+	  if test -z "$libdir"; then
+	    $echo "$modename: cannot -dlpreopen a convenience library: \`$lib'" 1>&2
+	    exit $EXIT_FAILURE
+	  fi
+	  # Prefer using a static library (so that no silly _DYNAMIC symbols
+	  # are required to link).
+	  if test -n "$old_library"; then
+	    newdlprefiles="$newdlprefiles $dir/$old_library"
+	  # Otherwise, use the dlname, so that lt_dlopen finds it.
+	  elif test -n "$dlname"; then
+	    newdlprefiles="$newdlprefiles $dir/$dlname"
+	  else
+	    newdlprefiles="$newdlprefiles $dir/$linklib"
+	  fi
+	fi # $pass = dlpreopen
+
+	if test -z "$libdir"; then
+	  # Link the convenience library
+	  if test "$linkmode" = lib; then
+	    deplibs="$dir/$old_library $deplibs"
+	  elif test "$linkmode,$pass" = "prog,link"; then
+	    compile_deplibs="$dir/$old_library $compile_deplibs"
+	    finalize_deplibs="$dir/$old_library $finalize_deplibs"
+	  else
+	    deplibs="$lib $deplibs" # used for prog,scan pass
+	  fi
+	  continue
+	fi
+
+
+	if test "$linkmode" = prog && test "$pass" != link; then
+	  newlib_search_path="$newlib_search_path $ladir"
+	  deplibs="$lib $deplibs"
+
+	  linkalldeplibs=no
+	  if test "$link_all_deplibs" != no || test -z "$library_names" ||
+	     test "$build_libtool_libs" = no; then
+	    linkalldeplibs=yes
+	  fi
+
+	  tmp_libs=
+	  for deplib in $dependency_libs; do
+	    case $deplib in
+	    -L*) newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`;; ### testsuite: skip nested quoting test
+	    esac
+	    # Need to link against all dependency_libs?
+	    if test "$linkalldeplibs" = yes; then
+	      deplibs="$deplib $deplibs"
+	    else
+	      # Need to hardcode shared library paths
+	      # or/and link against static libraries
+	      newdependency_libs="$deplib $newdependency_libs"
+	    fi
+	    if test "X$duplicate_deps" = "Xyes" ; then
+	      case "$tmp_libs " in
+	      *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+	      esac
+	    fi
+	    tmp_libs="$tmp_libs $deplib"
+	  done # for deplib
+	  continue
+	fi # $linkmode = prog...
+
+	if test "$linkmode,$pass" = "prog,link"; then
+	  if test -n "$library_names" &&
+	     { test "$prefer_static_libs" = no || test -z "$old_library"; }; then
+	    # We need to hardcode the library path
+	    if test -n "$shlibpath_var" && test -z "$avoidtemprpath" ; then
+	      # Make sure the rpath contains only unique directories.
+	      case "$temp_rpath " in
+	      *" $dir "*) ;;
+	      *" $absdir "*) ;;
+	      *) temp_rpath="$temp_rpath $absdir" ;;
+	      esac
+	    fi
+
+	    # Hardcode the library path.
+	    # Skip directories that are in the system default run-time
+	    # search path.
+	    case " $sys_lib_dlsearch_path " in
+	    *" $absdir "*) ;;
+	    *)
+	      case "$compile_rpath " in
+	      *" $absdir "*) ;;
+	      *) compile_rpath="$compile_rpath $absdir"
+	      esac
+	      ;;
+	    esac
+	    case " $sys_lib_dlsearch_path " in
+	    *" $libdir "*) ;;
+	    *)
+	      case "$finalize_rpath " in
+	      *" $libdir "*) ;;
+	      *) finalize_rpath="$finalize_rpath $libdir"
+	      esac
+	      ;;
+	    esac
+	  fi # $linkmode,$pass = prog,link...
+
+	  if test "$alldeplibs" = yes &&
+	     { test "$deplibs_check_method" = pass_all ||
+	       { test "$build_libtool_libs" = yes &&
+		 test -n "$library_names"; }; }; then
+	    # We only need to search for static libraries
+	    continue
+	  fi
+	fi
+
+	link_static=no # Whether the deplib will be linked statically
+	use_static_libs=$prefer_static_libs
+	if test "$use_static_libs" = built && test "$installed" = yes ; then
+	  use_static_libs=no
+	fi
+	if test -n "$library_names" &&
+	   { test "$use_static_libs" = no || test -z "$old_library"; }; then
+	  if test "$installed" = no; then
+	    notinst_deplibs="$notinst_deplibs $lib"
+	    need_relink=yes
+	  fi
+	  # This is a shared library
+
+	  # Warn about portability, can't link against -module's on
+	  # some systems (darwin)
+	  if test "$shouldnotlink" = yes && test "$pass" = link ; then
+	    $echo
+	    if test "$linkmode" = prog; then
+	      $echo "*** Warning: Linking the executable $output against the loadable module"
+	    else
+	      $echo "*** Warning: Linking the shared library $output against the loadable module"
+	    fi
+	    $echo "*** $linklib is not portable!"
+	  fi
+	  if test "$linkmode" = lib &&
+	     test "$hardcode_into_libs" = yes; then
+	    # Hardcode the library path.
+	    # Skip directories that are in the system default run-time
+	    # search path.
+	    case " $sys_lib_dlsearch_path " in
+	    *" $absdir "*) ;;
+	    *)
+	      case "$compile_rpath " in
+	      *" $absdir "*) ;;
+	      *) compile_rpath="$compile_rpath $absdir"
+	      esac
+	      ;;
+	    esac
+	    case " $sys_lib_dlsearch_path " in
+	    *" $libdir "*) ;;
+	    *)
+	      case "$finalize_rpath " in
+	      *" $libdir "*) ;;
+	      *) finalize_rpath="$finalize_rpath $libdir"
+	      esac
+	      ;;
+	    esac
+	  fi
+
+	  if test -n "$old_archive_from_expsyms_cmds"; then
+	    # figure out the soname
+	    set dummy $library_names
+	    realname="$2"
+	    shift; shift
+	    libname=`eval \\$echo \"$libname_spec\"`
+	    # use dlname if we got it. it's perfectly good, no?
+	    if test -n "$dlname"; then
+	      soname="$dlname"
+	    elif test -n "$soname_spec"; then
+	      # bleh windows
+	      case $host in
+	      *cygwin* | mingw*)
+		major=`expr $current - $age`
+		versuffix="-$major"
+		;;
+	      esac
+	      eval soname=\"$soname_spec\"
+	    else
+	      soname="$realname"
+	    fi
+
+	    # Make a new name for the extract_expsyms_cmds to use
+	    soroot="$soname"
+	    soname=`$echo $soroot | ${SED} -e 's/^.*\///'`
+	    newlib="libimp-`$echo $soname | ${SED} 's/^lib//;s/\.dll$//'`.a"
+
+	    # If the library has no export list, then create one now
+	    if test -f "$output_objdir/$soname-def"; then :
+	    else
+	      $show "extracting exported symbol list from \`$soname'"
+	      save_ifs="$IFS"; IFS='~'
+	      cmds=$extract_expsyms_cmds
+	      for cmd in $cmds; do
+		IFS="$save_ifs"
+		eval cmd=\"$cmd\"
+		$show "$cmd"
+		$run eval "$cmd" || exit $?
+	      done
+	      IFS="$save_ifs"
+	    fi
+
+	    # Create $newlib
+	    if test -f "$output_objdir/$newlib"; then :; else
+	      $show "generating import library for \`$soname'"
+	      save_ifs="$IFS"; IFS='~'
+	      cmds=$old_archive_from_expsyms_cmds
+	      for cmd in $cmds; do
+		IFS="$save_ifs"
+		eval cmd=\"$cmd\"
+		$show "$cmd"
+		$run eval "$cmd" || exit $?
+	      done
+	      IFS="$save_ifs"
+	    fi
+	    # make sure the library variables are pointing to the new library
+	    dir=$output_objdir
+	    linklib=$newlib
+	  fi # test -n "$old_archive_from_expsyms_cmds"
+
+	  if test "$linkmode" = prog || test "$mode" != relink; then
+	    add_shlibpath=
+	    add_dir=
+	    add=
+	    lib_linked=yes
+	    case $hardcode_action in
+	    immediate | unsupported)
+	      if test "$hardcode_direct" = no; then
+		add="$dir/$linklib"
+		case $host in
+		  *-*-sco3.2v5.0.[024]*) add_dir="-L$dir" ;;
+		  *-*-sysv4*uw2*) add_dir="-L$dir" ;;
+		  *-*-sysv5OpenUNIX* | *-*-sysv5UnixWare7.[01].[10]* | \
+		    *-*-unixware7*) add_dir="-L$dir" ;;
+		  *-*-darwin* )
+		    # if the lib is a module then we can not link against
+		    # it, someone is ignoring the new warnings I added
+		    if /usr/bin/file -L $add 2> /dev/null |
+                      $EGREP ": [^:]* bundle" >/dev/null ; then
+		      $echo "** Warning, lib $linklib is a module, not a shared library"
+		      if test -z "$old_library" ; then
+		        $echo
+		        $echo "** And there doesn't seem to be a static archive available"
+		        $echo "** The link will probably fail, sorry"
+		      else
+		        add="$dir/$old_library"
+		      fi
+		    fi
+		esac
+	      elif test "$hardcode_minus_L" = no; then
+		case $host in
+		*-*-sunos*) add_shlibpath="$dir" ;;
+		esac
+		add_dir="-L$dir"
+		add="-l$name"
+	      elif test "$hardcode_shlibpath_var" = no; then
+		add_shlibpath="$dir"
+		add="-l$name"
+	      else
+		lib_linked=no
+	      fi
+	      ;;
+	    relink)
+	      if test "$hardcode_direct" = yes; then
+		add="$dir/$linklib"
+	      elif test "$hardcode_minus_L" = yes; then
+		add_dir="-L$dir"
+		# Try looking first in the location we're being installed to.
+		if test -n "$inst_prefix_dir"; then
+		  case $libdir in
+		    [\\/]*)
+		      add_dir="$add_dir -L$inst_prefix_dir$libdir"
+		      ;;
+		  esac
+		fi
+		add="-l$name"
+	      elif test "$hardcode_shlibpath_var" = yes; then
+		add_shlibpath="$dir"
+		add="-l$name"
+	      else
+		lib_linked=no
+	      fi
+	      ;;
+	    *) lib_linked=no ;;
+	    esac
+
+	    if test "$lib_linked" != yes; then
+	      $echo "$modename: configuration error: unsupported hardcode properties"
+	      exit $EXIT_FAILURE
+	    fi
+
+	    if test -n "$add_shlibpath"; then
+	      case :$compile_shlibpath: in
+	      *":$add_shlibpath:"*) ;;
+	      *) compile_shlibpath="$compile_shlibpath$add_shlibpath:" ;;
+	      esac
+	    fi
+	    if test "$linkmode" = prog; then
+	      test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs"
+	      test -n "$add" && compile_deplibs="$add $compile_deplibs"
+	    else
+	      test -n "$add_dir" && deplibs="$add_dir $deplibs"
+	      test -n "$add" && deplibs="$add $deplibs"
+	      if test "$hardcode_direct" != yes && \
+		 test "$hardcode_minus_L" != yes && \
+		 test "$hardcode_shlibpath_var" = yes; then
+		case :$finalize_shlibpath: in
+		*":$libdir:"*) ;;
+		*) finalize_shlibpath="$finalize_shlibpath$libdir:" ;;
+		esac
+	      fi
+	    fi
+	  fi
+
+	  if test "$linkmode" = prog || test "$mode" = relink; then
+	    add_shlibpath=
+	    add_dir=
+	    add=
+	    # Finalize command for both is simple: just hardcode it.
+	    if test "$hardcode_direct" = yes; then
+	      add="$libdir/$linklib"
+	    elif test "$hardcode_minus_L" = yes; then
+	      add_dir="-L$libdir"
+	      add="-l$name"
+	    elif test "$hardcode_shlibpath_var" = yes; then
+	      case :$finalize_shlibpath: in
+	      *":$libdir:"*) ;;
+	      *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;;
+	      esac
+	      add="-l$name"
+	    elif test "$hardcode_automatic" = yes; then
+	      if test -n "$inst_prefix_dir" &&
+		 test -f "$inst_prefix_dir$libdir/$linklib" ; then
+	        add="$inst_prefix_dir$libdir/$linklib"
+	      else
+	        add="$libdir/$linklib"
+	      fi
+	    else
+	      # We cannot seem to hardcode it, guess we'll fake it.
+	      add_dir="-L$libdir"
+	      # Try looking first in the location we're being installed to.
+	      if test -n "$inst_prefix_dir"; then
+		case $libdir in
+		  [\\/]*)
+		    add_dir="$add_dir -L$inst_prefix_dir$libdir"
+		    ;;
+		esac
+	      fi
+	      add="-l$name"
+	    fi
+
+	    if test "$linkmode" = prog; then
+	      test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs"
+	      test -n "$add" && finalize_deplibs="$add $finalize_deplibs"
+	    else
+	      test -n "$add_dir" && deplibs="$add_dir $deplibs"
+	      test -n "$add" && deplibs="$add $deplibs"
+	    fi
+	  fi
+	elif test "$linkmode" = prog; then
+	  # Here we assume that one of hardcode_direct or hardcode_minus_L
+	  # is not unsupported.  This is valid on all known static and
+	  # shared platforms.
+	  if test "$hardcode_direct" != unsupported; then
+	    test -n "$old_library" && linklib="$old_library"
+	    compile_deplibs="$dir/$linklib $compile_deplibs"
+	    finalize_deplibs="$dir/$linklib $finalize_deplibs"
+	  else
+	    compile_deplibs="-l$name -L$dir $compile_deplibs"
+	    finalize_deplibs="-l$name -L$dir $finalize_deplibs"
+	  fi
+	elif test "$build_libtool_libs" = yes; then
+	  # Not a shared library
+	  if test "$deplibs_check_method" != pass_all; then
+	    # We're trying link a shared library against a static one
+	    # but the system doesn't support it.
+
+	    # Just print a warning and add the library to dependency_libs so
+	    # that the program can be linked against the static library.
+	    $echo
+	    $echo "*** Warning: This system can not link to static lib archive $lib."
+	    $echo "*** I have the capability to make that library automatically link in when"
+	    $echo "*** you link to this library.  But I can only do this if you have a"
+	    $echo "*** shared version of the library, which you do not appear to have."
+	    if test "$module" = yes; then
+	      $echo "*** But as you try to build a module library, libtool will still create "
+	      $echo "*** a static module, that should work as long as the dlopening application"
+	      $echo "*** is linked with the -dlopen flag to resolve symbols at runtime."
+	      if test -z "$global_symbol_pipe"; then
+		$echo
+		$echo "*** However, this would only work if libtool was able to extract symbol"
+		$echo "*** lists from a program, using \`nm' or equivalent, but libtool could"
+		$echo "*** not find such a program.  So, this module is probably useless."
+		$echo "*** \`nm' from GNU binutils and a full rebuild may help."
+	      fi
+	      if test "$build_old_libs" = no; then
+		build_libtool_libs=module
+		build_old_libs=yes
+	      else
+		build_libtool_libs=no
+	      fi
+	    fi
+	  else
+	    deplibs="$dir/$old_library $deplibs"
+	    link_static=yes
+	  fi
+	fi # link shared/static library?
+
+	if test "$linkmode" = lib; then
+	  if test -n "$dependency_libs" &&
+	     { test "$hardcode_into_libs" != yes ||
+	       test "$build_old_libs" = yes ||
+	       test "$link_static" = yes; }; then
+	    # Extract -R from dependency_libs
+	    temp_deplibs=
+	    for libdir in $dependency_libs; do
+	      case $libdir in
+	      -R*) temp_xrpath=`$echo "X$libdir" | $Xsed -e 's/^-R//'`
+		   case " $xrpath " in
+		   *" $temp_xrpath "*) ;;
+		   *) xrpath="$xrpath $temp_xrpath";;
+		   esac;;
+	      *) temp_deplibs="$temp_deplibs $libdir";;
+	      esac
+	    done
+	    dependency_libs="$temp_deplibs"
+	  fi
+
+	  newlib_search_path="$newlib_search_path $absdir"
+	  # Link against this library
+	  test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs"
+	  # ... and its dependency_libs
+	  tmp_libs=
+	  for deplib in $dependency_libs; do
+	    newdependency_libs="$deplib $newdependency_libs"
+	    if test "X$duplicate_deps" = "Xyes" ; then
+	      case "$tmp_libs " in
+	      *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;;
+	      esac
+	    fi
+	    tmp_libs="$tmp_libs $deplib"
+	  done
+
+	  if test "$link_all_deplibs" != no; then
+	    # Add the search paths of all dependency libraries
+	    for deplib in $dependency_libs; do
+	      case $deplib in
+	      -L*) path="$deplib" ;;
+	      *.la)
+		dir=`$echo "X$deplib" | $Xsed -e 's%/[^/]*$%%'`
+		test "X$dir" = "X$deplib" && dir="."
+		# We need an absolute path.
+		case $dir in
+		[\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;;
+		*)
+		  absdir=`cd "$dir" && pwd`
+		  if test -z "$absdir"; then
+		    $echo "$modename: warning: cannot determine absolute directory name of \`$dir'" 1>&2
+		    absdir="$dir"
+		  fi
+		  ;;
+		esac
+		if grep "^installed=no" $deplib > /dev/null; then
+		  path="$absdir/$objdir"
+		else
+		  eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
+		  if test -z "$libdir"; then
+		    $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2
+		    exit $EXIT_FAILURE
+		  fi
+		  if test "$absdir" != "$libdir"; then
+		    $echo "$modename: warning: \`$deplib' seems to be moved" 1>&2
+		  fi
+		  path="$absdir"
+		fi
+		depdepl=
+		case $host in
+		*-*-darwin*)
+		  # we do not want to link against static libs,
+		  # but need to link against shared
+		  eval deplibrary_names=`${SED} -n -e 's/^library_names=\(.*\)$/\1/p' $deplib`
+		  if test -n "$deplibrary_names" ; then
+		    for tmp in $deplibrary_names ; do
+		      depdepl=$tmp
+		    done
+		    if test -f "$path/$depdepl" ; then
+		      depdepl="$path/$depdepl"
+		    fi
+		    # do not add paths which are already there
+		    case " $newlib_search_path " in
+		    *" $path "*) ;;
+		    *) newlib_search_path="$newlib_search_path $path";;
+		    esac
+		  fi
+		  path=""
+		  ;;
+		*)
+		  path="-L$path"
+		  ;;
+		esac
+		;;
+	      -l*)
+		case $host in
+		*-*-darwin*)
+		  # Again, we only want to link against shared libraries
+		  eval tmp_libs=`$echo "X$deplib" | $Xsed -e "s,^\-l,,"`
+		  for tmp in $newlib_search_path ; do
+		    if test -f "$tmp/lib$tmp_libs.dylib" ; then
+		      eval depdepl="$tmp/lib$tmp_libs.dylib"
+		      break
+		    fi
+		  done
+		  path=""
+		  ;;
+		*) continue ;;
+		esac
+		;;
+	      *) continue ;;
+	      esac
+	      case " $deplibs " in
+	      *" $path "*) ;;
+	      *) deplibs="$path $deplibs" ;;
+	      esac
+	      case " $deplibs " in
+	      *" $depdepl "*) ;;
+	      *) deplibs="$depdepl $deplibs" ;;
+	      esac
+	    done
+	  fi # link_all_deplibs != no
+	fi # linkmode = lib
+      done # for deplib in $libs
+      dependency_libs="$newdependency_libs"
+      if test "$pass" = dlpreopen; then
+	# Link the dlpreopened libraries before other libraries
+	for deplib in $save_deplibs; do
+	  deplibs="$deplib $deplibs"
+	done
+      fi
+      if test "$pass" != dlopen; then
+	if test "$pass" != conv; then
+	  # Make sure lib_search_path contains only unique directories.
+	  lib_search_path=
+	  for dir in $newlib_search_path; do
+	    case "$lib_search_path " in
+	    *" $dir "*) ;;
+	    *) lib_search_path="$lib_search_path $dir" ;;
+	    esac
+	  done
+	  newlib_search_path=
+	fi
+
+	if test "$linkmode,$pass" != "prog,link"; then
+	  vars="deplibs"
+	else
+	  vars="compile_deplibs finalize_deplibs"
+	fi
+	for var in $vars dependency_libs; do
+	  # Add libraries to $var in reverse order
+	  eval tmp_libs=\"\$$var\"
+	  new_libs=
+	  for deplib in $tmp_libs; do
+	    # FIXME: Pedantically, this is the right thing to do, so
+	    #        that some nasty dependency loop isn't accidentally
+	    #        broken:
+	    #new_libs="$deplib $new_libs"
+	    # Pragmatically, this seems to cause very few problems in
+	    # practice:
+	    case $deplib in
+	    -L*) new_libs="$deplib $new_libs" ;;
+	    -R*) ;;
+	    *)
+	      # And here is the reason: when a library appears more
+	      # than once as an explicit dependence of a library, or
+	      # is implicitly linked in more than once by the
+	      # compiler, it is considered special, and multiple
+	      # occurrences thereof are not removed.  Compare this
+	      # with having the same library being listed as a
+	      # dependency of multiple other libraries: in this case,
+	      # we know (pedantically, we assume) the library does not
+	      # need to be listed more than once, so we keep only the
+	      # last copy.  This is not always right, but it is rare
+	      # enough that we require users that really mean to play
+	      # such unportable linking tricks to link the library
+	      # using -Wl,-lname, so that libtool does not consider it
+	      # for duplicate removal.
+	      case " $specialdeplibs " in
+	      *" $deplib "*) new_libs="$deplib $new_libs" ;;
+	      *)
+		case " $new_libs " in
+		*" $deplib "*) ;;
+		*) new_libs="$deplib $new_libs" ;;
+		esac
+		;;
+	      esac
+	      ;;
+	    esac
+	  done
+	  tmp_libs=
+	  for deplib in $new_libs; do
+	    case $deplib in
+	    -L*)
+	      case " $tmp_libs " in
+	      *" $deplib "*) ;;
+	      *) tmp_libs="$tmp_libs $deplib" ;;
+	      esac
+	      ;;
+	    *) tmp_libs="$tmp_libs $deplib" ;;
+	    esac
+	  done
+	  eval $var=\"$tmp_libs\"
+	done # for var
+      fi
+      # Last step: remove runtime libs from dependency_libs
+      # (they stay in deplibs)
+      tmp_libs=
+      for i in $dependency_libs ; do
+	case " $predeps $postdeps $compiler_lib_search_path " in
+	*" $i "*)
+	  i=""
+	  ;;
+	esac
+	if test -n "$i" ; then
+	  tmp_libs="$tmp_libs $i"
+	fi
+      done
+      dependency_libs=$tmp_libs
+    done # for pass
+    if test "$linkmode" = prog; then
+      dlfiles="$newdlfiles"
+      dlprefiles="$newdlprefiles"
+    fi
+
+    case $linkmode in
+    oldlib)
+      if test -n "$deplibs"; then
+	$echo "$modename: warning: \`-l' and \`-L' are ignored for archives" 1>&2
+      fi
+
+      if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+	$echo "$modename: warning: \`-dlopen' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$rpath"; then
+	$echo "$modename: warning: \`-rpath' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$xrpath"; then
+	$echo "$modename: warning: \`-R' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$vinfo"; then
+	$echo "$modename: warning: \`-version-info/-version-number' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$release"; then
+	$echo "$modename: warning: \`-release' is ignored for archives" 1>&2
+      fi
+
+      if test -n "$export_symbols" || test -n "$export_symbols_regex"; then
+	$echo "$modename: warning: \`-export-symbols' is ignored for archives" 1>&2
+      fi
+
+      # Now set the variables for building old libraries.
+      build_libtool_libs=no
+      oldlibs="$output"
+      objs="$objs$old_deplibs"
+      ;;
+
+    lib)
+      # Make sure we only generate libraries of the form `libNAME.la'.
+      case $outputname in
+      lib*)
+	name=`$echo "X$outputname" | $Xsed -e 's/\.la$//' -e 's/^lib//'`
+	eval shared_ext=\"$shrext_cmds\"
+	eval libname=\"$libname_spec\"
+	;;
+      *)
+	if test "$module" = no; then
+	  $echo "$modename: libtool library \`$output' must begin with \`lib'" 1>&2
+	  $echo "$help" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+	if test "$need_lib_prefix" != no; then
+	  # Add the "lib" prefix for modules if required
+	  name=`$echo "X$outputname" | $Xsed -e 's/\.la$//'`
+	  eval shared_ext=\"$shrext_cmds\"
+	  eval libname=\"$libname_spec\"
+	else
+	  libname=`$echo "X$outputname" | $Xsed -e 's/\.la$//'`
+	fi
+	;;
+      esac
+
+      if test -n "$objs"; then
+	if test "$deplibs_check_method" != pass_all; then
+	  $echo "$modename: cannot build libtool library \`$output' from non-libtool objects on this host:$objs" 2>&1
+	  exit $EXIT_FAILURE
+	else
+	  $echo
+	  $echo "*** Warning: Linking the shared library $output against the non-libtool"
+	  $echo "*** objects $objs is not portable!"
+	  libobjs="$libobjs $objs"
+	fi
+      fi
+
+      if test "$dlself" != no; then
+	$echo "$modename: warning: \`-dlopen self' is ignored for libtool libraries" 1>&2
+      fi
+
+      set dummy $rpath
+      if test "$#" -gt 2; then
+	$echo "$modename: warning: ignoring multiple \`-rpath's for a libtool library" 1>&2
+      fi
+      install_libdir="$2"
+
+      oldlibs=
+      if test -z "$rpath"; then
+	if test "$build_libtool_libs" = yes; then
+	  # Building a libtool convenience library.
+	  # Some compilers have problems with a `.al' extension so
+	  # convenience libraries should have the same extension an
+	  # archive normally would.
+	  oldlibs="$output_objdir/$libname.$libext $oldlibs"
+	  build_libtool_libs=convenience
+	  build_old_libs=yes
+	fi
+
+	if test -n "$vinfo"; then
+	  $echo "$modename: warning: \`-version-info/-version-number' is ignored for convenience libraries" 1>&2
+	fi
+
+	if test -n "$release"; then
+	  $echo "$modename: warning: \`-release' is ignored for convenience libraries" 1>&2
+	fi
+      else
+
+	# Parse the version information argument.
+	save_ifs="$IFS"; IFS=':'
+	set dummy $vinfo 0 0 0
+	IFS="$save_ifs"
+
+	if test -n "$8"; then
+	  $echo "$modename: too many parameters to \`-version-info'" 1>&2
+	  $echo "$help" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+
+	# convert absolute version numbers to libtool ages
+	# this retains compatibility with .la files and attempts
+	# to make the code below a bit more comprehensible
+
+	case $vinfo_number in
+	yes)
+	  number_major="$2"
+	  number_minor="$3"
+	  number_revision="$4"
+	  #
+	  # There are really only two kinds -- those that
+	  # use the current revision as the major version
+	  # and those that subtract age and use age as
+	  # a minor version.  But, then there is irix
+	  # which has an extra 1 added just for fun
+	  #
+	  case $version_type in
+	  darwin|linux|osf|windows)
+	    current=`expr $number_major + $number_minor`
+	    age="$number_minor"
+	    revision="$number_revision"
+	    ;;
+	  freebsd-aout|freebsd-elf|sunos)
+	    current="$number_major"
+	    revision="$number_minor"
+	    age="0"
+	    ;;
+	  irix|nonstopux)
+	    current=`expr $number_major + $number_minor - 1`
+	    age="$number_minor"
+	    revision="$number_minor"
+	    ;;
+	  esac
+	  ;;
+	no)
+	  current="$2"
+	  revision="$3"
+	  age="$4"
+	  ;;
+	esac
+
+	# Check that each of the things are valid numbers.
+	case $current in
+	0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
+	*)
+	  $echo "$modename: CURRENT \`$current' must be a nonnegative integer" 1>&2
+	  $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+	  exit $EXIT_FAILURE
+	  ;;
+	esac
+
+	case $revision in
+	0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
+	*)
+	  $echo "$modename: REVISION \`$revision' must be a nonnegative integer" 1>&2
+	  $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+	  exit $EXIT_FAILURE
+	  ;;
+	esac
+
+	case $age in
+	0|[1-9]|[1-9][0-9]|[1-9][0-9][0-9]|[1-9][0-9][0-9][0-9]|[1-9][0-9][0-9][0-9][0-9]) ;;
+	*)
+	  $echo "$modename: AGE \`$age' must be a nonnegative integer" 1>&2
+	  $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+	  exit $EXIT_FAILURE
+	  ;;
+	esac
+
+	if test "$age" -gt "$current"; then
+	  $echo "$modename: AGE \`$age' is greater than the current interface number \`$current'" 1>&2
+	  $echo "$modename: \`$vinfo' is not valid version information" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+
+	# Calculate the version variables.
+	major=
+	versuffix=
+	verstring=
+	case $version_type in
+	none) ;;
+
+	darwin)
+	  # Like Linux, but with the current version available in
+	  # verstring for coding it into the library header
+	  major=.`expr $current - $age`
+	  versuffix="$major.$age.$revision"
+	  # Darwin ld doesn't like 0 for these options...
+	  minor_current=`expr $current + 1`
+	  verstring="${wl}-compatibility_version ${wl}$minor_current ${wl}-current_version ${wl}$minor_current.$revision"
+	  ;;
+
+	freebsd-aout)
+	  major=".$current"
+	  versuffix=".$current.$revision";
+	  ;;
+
+	freebsd-elf)
+	  major=".$current"
+	  versuffix=".$current";
+	  ;;
+
+	irix | nonstopux)
+	  major=`expr $current - $age + 1`
+
+	  case $version_type in
+	    nonstopux) verstring_prefix=nonstopux ;;
+	    *)         verstring_prefix=sgi ;;
+	  esac
+	  verstring="$verstring_prefix$major.$revision"
+
+	  # Add in all the interfaces that we are compatible with.
+	  loop=$revision
+	  while test "$loop" -ne 0; do
+	    iface=`expr $revision - $loop`
+	    loop=`expr $loop - 1`
+	    verstring="$verstring_prefix$major.$iface:$verstring"
+	  done
+
+	  # Before this point, $major must not contain `.'.
+	  major=.$major
+	  versuffix="$major.$revision"
+	  ;;
+
+	linux)
+	  major=.`expr $current - $age`
+	  versuffix="$major.$age.$revision"
+	  ;;
+
+	osf)
+	  major=.`expr $current - $age`
+	  versuffix=".$current.$age.$revision"
+	  verstring="$current.$age.$revision"
+
+	  # Add in all the interfaces that we are compatible with.
+	  loop=$age
+	  while test "$loop" -ne 0; do
+	    iface=`expr $current - $loop`
+	    loop=`expr $loop - 1`
+	    verstring="$verstring:${iface}.0"
+	  done
+
+	  # Make executables depend on our current version.
+	  verstring="$verstring:${current}.0"
+	  ;;
+
+	sunos)
+	  major=".$current"
+	  versuffix=".$current.$revision"
+	  ;;
+
+	windows)
+	  # Use '-' rather than '.', since we only want one
+	  # extension on DOS 8.3 filesystems.
+	  major=`expr $current - $age`
+	  versuffix="-$major"
+	  ;;
+
+	*)
+	  $echo "$modename: unknown library version type \`$version_type'" 1>&2
+	  $echo "Fatal configuration error.  See the $PACKAGE docs for more information." 1>&2
+	  exit $EXIT_FAILURE
+	  ;;
+	esac
+
+	# Clear the version info if we defaulted, and they specified a release.
+	if test -z "$vinfo" && test -n "$release"; then
+	  major=
+	  case $version_type in
+	  darwin)
+	    # we can't check for "0.0" in archive_cmds due to quoting
+	    # problems, so we reset it completely
+	    verstring=
+	    ;;
+	  *)
+	    verstring="0.0"
+	    ;;
+	  esac
+	  if test "$need_version" = no; then
+	    versuffix=
+	  else
+	    versuffix=".0.0"
+	  fi
+	fi
+
+	# Remove version info from name if versioning should be avoided
+	if test "$avoid_version" = yes && test "$need_version" = no; then
+	  major=
+	  versuffix=
+	  verstring=""
+	fi
+
+	# Check to see if the archive will have undefined symbols.
+	if test "$allow_undefined" = yes; then
+	  if test "$allow_undefined_flag" = unsupported; then
+	    $echo "$modename: warning: undefined symbols not allowed in $host shared libraries" 1>&2
+	    build_libtool_libs=no
+	    build_old_libs=yes
+	  fi
+	else
+	  # Don't allow undefined symbols.
+	  allow_undefined_flag="$no_undefined_flag"
+	fi
+      fi
+
+      if test "$mode" != relink; then
+	# Remove our outputs, but don't remove object files since they
+	# may have been created when compiling PIC objects.
+	removelist=
+	tempremovelist=`$echo "$output_objdir/*"`
+	for p in $tempremovelist; do
+	  case $p in
+	    *.$objext)
+	       ;;
+	    $output_objdir/$outputname | $output_objdir/$libname.* | $output_objdir/${libname}${release}.*)
+	       if test "X$precious_files_regex" != "X"; then
+	         if echo $p | $EGREP -e "$precious_files_regex" >/dev/null 2>&1
+	         then
+		   continue
+		 fi
+	       fi
+	       removelist="$removelist $p"
+	       ;;
+	    *) ;;
+	  esac
+	done
+	if test -n "$removelist"; then
+	  $show "${rm}r $removelist"
+	  $run ${rm}r $removelist
+	fi
+      fi
+
+      # Now set the variables for building old libraries.
+      if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then
+	oldlibs="$oldlibs $output_objdir/$libname.$libext"
+
+	# Transform .lo files to .o files.
+	oldobjs="$objs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e "$lo2o" | $NL2SP`
+      fi
+
+      # Eliminate all temporary directories.
+      for path in $notinst_path; do
+	lib_search_path=`$echo "$lib_search_path " | ${SED} -e "s% $path % %g"`
+	deplibs=`$echo "$deplibs " | ${SED} -e "s% -L$path % %g"`
+	dependency_libs=`$echo "$dependency_libs " | ${SED} -e "s% -L$path % %g"`
+      done
+
+      if test -n "$xrpath"; then
+	# If the user specified any rpath flags, then add them.
+	temp_xrpath=
+	for libdir in $xrpath; do
+	  temp_xrpath="$temp_xrpath -R$libdir"
+	  case "$finalize_rpath " in
+	  *" $libdir "*) ;;
+	  *) finalize_rpath="$finalize_rpath $libdir" ;;
+	  esac
+	done
+	if test "$hardcode_into_libs" != yes || test "$build_old_libs" = yes; then
+	  dependency_libs="$temp_xrpath $dependency_libs"
+	fi
+      fi
+
+      # Make sure dlfiles contains only unique files that won't be dlpreopened
+      old_dlfiles="$dlfiles"
+      dlfiles=
+      for lib in $old_dlfiles; do
+	case " $dlprefiles $dlfiles " in
+	*" $lib "*) ;;
+	*) dlfiles="$dlfiles $lib" ;;
+	esac
+      done
+
+      # Make sure dlprefiles contains only unique files
+      old_dlprefiles="$dlprefiles"
+      dlprefiles=
+      for lib in $old_dlprefiles; do
+	case "$dlprefiles " in
+	*" $lib "*) ;;
+	*) dlprefiles="$dlprefiles $lib" ;;
+	esac
+      done
+
+      if test "$build_libtool_libs" = yes; then
+	if test -n "$rpath"; then
+	  case $host in
+	  *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos*)
+	    # these systems don't actually have a c library (as such)!
+	    ;;
+	  *-*-rhapsody* | *-*-darwin1.[012])
+	    # Rhapsody C library is in the System framework
+	    deplibs="$deplibs -framework System"
+	    ;;
+	  *-*-netbsd*)
+	    # Don't link with libc until the a.out ld.so is fixed.
+	    ;;
+	  *-*-openbsd* | *-*-freebsd* | *-*-dragonfly*)
+	    # Do not include libc due to us having libc/libc_r.
+	    ;;
+	  *-*-sco3.2v5* | *-*-sco5v6*)
+	    # Causes problems with __ctype
+	    ;;
+	  *-*-sysv4.2uw2* | *-*-sysv5* | *-*-unixware* | *-*-OpenUNIX*)
+	    # Compiler inserts libc in the correct place for threads to work
+	    ;;
+ 	  *)
+	    # Add libc to deplibs on all other systems if necessary.
+	    if test "$build_libtool_need_lc" = "yes"; then
+	      deplibs="$deplibs -lc"
+	    fi
+	    ;;
+	  esac
+	fi
+
+	# Transform deplibs into only deplibs that can be linked in shared.
+	name_save=$name
+	libname_save=$libname
+	release_save=$release
+	versuffix_save=$versuffix
+	major_save=$major
+	# I'm not sure if I'm treating the release correctly.  I think
+	# release should show up in the -l (ie -lgmp5) so we don't want to
+	# add it in twice.  Is that correct?
+	release=""
+	versuffix=""
+	major=""
+	newdeplibs=
+	droppeddeps=no
+	case $deplibs_check_method in
+	pass_all)
+	  # Don't check for shared/static.  Everything works.
+	  # This might be a little naive.  We might want to check
+	  # whether the library exists or not.  But this is on
+	  # osf3 & osf4 and I'm not really sure... Just
+	  # implementing what was already the behavior.
+	  newdeplibs=$deplibs
+	  ;;
+	test_compile)
+	  # This code stresses the "libraries are programs" paradigm to its
+	  # limits. Maybe even breaks it.  We compile a program, linking it
+	  # against the deplibs as a proxy for the library.  Then we can check
+	  # whether they linked in statically or dynamically with ldd.
+	  $rm conftest.c
+	  cat > conftest.c <<EOF
+	  int main() { return 0; }
+EOF
+	  $rm conftest
+	  $LTCC $LTCFLAGS -o conftest conftest.c $deplibs
+	  if test "$?" -eq 0 ; then
+	    ldd_output=`ldd conftest`
+	    for i in $deplibs; do
+	      name=`expr $i : '-l\(.*\)'`
+	      # If $name is empty we are operating on a -L argument.
+              if test "$name" != "" && test "$name" -ne "0"; then
+		if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+		  case " $predeps $postdeps " in
+		  *" $i "*)
+		    newdeplibs="$newdeplibs $i"
+		    i=""
+		    ;;
+		  esac
+	        fi
+		if test -n "$i" ; then
+		  libname=`eval \\$echo \"$libname_spec\"`
+		  deplib_matches=`eval \\$echo \"$library_names_spec\"`
+		  set dummy $deplib_matches
+		  deplib_match=$2
+		  if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
+		    newdeplibs="$newdeplibs $i"
+		  else
+		    droppeddeps=yes
+		    $echo
+		    $echo "*** Warning: dynamic linker does not accept needed library $i."
+		    $echo "*** I have the capability to make that library automatically link in when"
+		    $echo "*** you link to this library.  But I can only do this if you have a"
+		    $echo "*** shared version of the library, which I believe you do not have"
+		    $echo "*** because a test_compile did reveal that the linker did not use it for"
+		    $echo "*** its dynamic dependency list that programs get resolved with at runtime."
+		  fi
+		fi
+	      else
+		newdeplibs="$newdeplibs $i"
+	      fi
+	    done
+	  else
+	    # Error occurred in the first compile.  Let's try to salvage
+	    # the situation: Compile a separate program for each library.
+	    for i in $deplibs; do
+	      name=`expr $i : '-l\(.*\)'`
+	      # If $name is empty we are operating on a -L argument.
+              if test "$name" != "" && test "$name" != "0"; then
+		$rm conftest
+		$LTCC $LTCFLAGS -o conftest conftest.c $i
+		# Did it work?
+		if test "$?" -eq 0 ; then
+		  ldd_output=`ldd conftest`
+		  if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+		    case " $predeps $postdeps " in
+		    *" $i "*)
+		      newdeplibs="$newdeplibs $i"
+		      i=""
+		      ;;
+		    esac
+		  fi
+		  if test -n "$i" ; then
+		    libname=`eval \\$echo \"$libname_spec\"`
+		    deplib_matches=`eval \\$echo \"$library_names_spec\"`
+		    set dummy $deplib_matches
+		    deplib_match=$2
+		    if test `expr "$ldd_output" : ".*$deplib_match"` -ne 0 ; then
+		      newdeplibs="$newdeplibs $i"
+		    else
+		      droppeddeps=yes
+		      $echo
+		      $echo "*** Warning: dynamic linker does not accept needed library $i."
+		      $echo "*** I have the capability to make that library automatically link in when"
+		      $echo "*** you link to this library.  But I can only do this if you have a"
+		      $echo "*** shared version of the library, which you do not appear to have"
+		      $echo "*** because a test_compile did reveal that the linker did not use this one"
+		      $echo "*** as a dynamic dependency that programs can get resolved with at runtime."
+		    fi
+		  fi
+		else
+		  droppeddeps=yes
+		  $echo
+		  $echo "*** Warning!  Library $i is needed by this library but I was not able to"
+		  $echo "***  make it link in!  You will probably need to install it or some"
+		  $echo "*** library that it depends on before this library will be fully"
+		  $echo "*** functional.  Installing it before continuing would be even better."
+		fi
+	      else
+		newdeplibs="$newdeplibs $i"
+	      fi
+	    done
+	  fi
+	  ;;
+	file_magic*)
+	  set dummy $deplibs_check_method
+	  file_magic_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"`
+	  for a_deplib in $deplibs; do
+	    name=`expr $a_deplib : '-l\(.*\)'`
+	    # If $name is empty we are operating on a -L argument.
+            if test "$name" != "" && test  "$name" != "0"; then
+	      if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+		case " $predeps $postdeps " in
+		*" $a_deplib "*)
+		  newdeplibs="$newdeplibs $a_deplib"
+		  a_deplib=""
+		  ;;
+		esac
+	      fi
+	      if test -n "$a_deplib" ; then
+		libname=`eval \\$echo \"$libname_spec\"`
+		for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do
+		  potential_libs=`ls $i/$libname[.-]* 2>/dev/null`
+		  for potent_lib in $potential_libs; do
+		      # Follow soft links.
+		      if ls -lLd "$potent_lib" 2>/dev/null \
+			 | grep " -> " >/dev/null; then
+			continue
+		      fi
+		      # The statement above tries to avoid entering an
+		      # endless loop below, in case of cyclic links.
+		      # We might still enter an endless loop, since a link
+		      # loop can be closed while we follow links,
+		      # but so what?
+		      potlib="$potent_lib"
+		      while test -h "$potlib" 2>/dev/null; do
+			potliblink=`ls -ld $potlib | ${SED} 's/.* -> //'`
+			case $potliblink in
+			[\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";;
+			*) potlib=`$echo "X$potlib" | $Xsed -e 's,[^/]*$,,'`"$potliblink";;
+			esac
+		      done
+		      if eval $file_magic_cmd \"\$potlib\" 2>/dev/null \
+			 | ${SED} 10q \
+			 | $EGREP "$file_magic_regex" > /dev/null; then
+			newdeplibs="$newdeplibs $a_deplib"
+			a_deplib=""
+			break 2
+		      fi
+		  done
+		done
+	      fi
+	      if test -n "$a_deplib" ; then
+		droppeddeps=yes
+		$echo
+		$echo "*** Warning: linker path does not have real file for library $a_deplib."
+		$echo "*** I have the capability to make that library automatically link in when"
+		$echo "*** you link to this library.  But I can only do this if you have a"
+		$echo "*** shared version of the library, which you do not appear to have"
+		$echo "*** because I did check the linker path looking for a file starting"
+		if test -z "$potlib" ; then
+		  $echo "*** with $libname but no candidates were found. (...for file magic test)"
+		else
+		  $echo "*** with $libname and none of the candidates passed a file format test"
+		  $echo "*** using a file magic. Last file checked: $potlib"
+		fi
+	      fi
+	    else
+	      # Add a -L argument.
+	      newdeplibs="$newdeplibs $a_deplib"
+	    fi
+	  done # Gone through all deplibs.
+	  ;;
+	match_pattern*)
+	  set dummy $deplibs_check_method
+	  match_pattern_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"`
+	  for a_deplib in $deplibs; do
+	    name=`expr $a_deplib : '-l\(.*\)'`
+	    # If $name is empty we are operating on a -L argument.
+	    if test -n "$name" && test "$name" != "0"; then
+	      if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+		case " $predeps $postdeps " in
+		*" $a_deplib "*)
+		  newdeplibs="$newdeplibs $a_deplib"
+		  a_deplib=""
+		  ;;
+		esac
+	      fi
+	      if test -n "$a_deplib" ; then
+		libname=`eval \\$echo \"$libname_spec\"`
+		for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do
+		  potential_libs=`ls $i/$libname[.-]* 2>/dev/null`
+		  for potent_lib in $potential_libs; do
+		    potlib="$potent_lib" # see symlink-check above in file_magic test
+		    if eval $echo \"$potent_lib\" 2>/dev/null \
+		        | ${SED} 10q \
+		        | $EGREP "$match_pattern_regex" > /dev/null; then
+		      newdeplibs="$newdeplibs $a_deplib"
+		      a_deplib=""
+		      break 2
+		    fi
+		  done
+		done
+	      fi
+	      if test -n "$a_deplib" ; then
+		droppeddeps=yes
+		$echo
+		$echo "*** Warning: linker path does not have real file for library $a_deplib."
+		$echo "*** I have the capability to make that library automatically link in when"
+		$echo "*** you link to this library.  But I can only do this if you have a"
+		$echo "*** shared version of the library, which you do not appear to have"
+		$echo "*** because I did check the linker path looking for a file starting"
+		if test -z "$potlib" ; then
+		  $echo "*** with $libname but no candidates were found. (...for regex pattern test)"
+		else
+		  $echo "*** with $libname and none of the candidates passed a file format test"
+		  $echo "*** using a regex pattern. Last file checked: $potlib"
+		fi
+	      fi
+	    else
+	      # Add a -L argument.
+	      newdeplibs="$newdeplibs $a_deplib"
+	    fi
+	  done # Gone through all deplibs.
+	  ;;
+	none | unknown | *)
+	  newdeplibs=""
+	  tmp_deplibs=`$echo "X $deplibs" | $Xsed -e 's/ -lc$//' \
+	    -e 's/ -[LR][^ ]*//g'`
+	  if test "X$allow_libtool_libs_with_static_runtimes" = "Xyes" ; then
+	    for i in $predeps $postdeps ; do
+	      # can't use Xsed below, because $i might contain '/'
+	      tmp_deplibs=`$echo "X $tmp_deplibs" | ${SED} -e "1s,^X,," -e "s,$i,,"`
+	    done
+	  fi
+	  if $echo "X $tmp_deplibs" | $Xsed -e 's/[ 	]//g' \
+	    | grep . >/dev/null; then
+	    $echo
+	    if test "X$deplibs_check_method" = "Xnone"; then
+	      $echo "*** Warning: inter-library dependencies are not supported in this platform."
+	    else
+	      $echo "*** Warning: inter-library dependencies are not known to be supported."
+	    fi
+	    $echo "*** All declared inter-library dependencies are being dropped."
+	    droppeddeps=yes
+	  fi
+	  ;;
+	esac
+	versuffix=$versuffix_save
+	major=$major_save
+	release=$release_save
+	libname=$libname_save
+	name=$name_save
+
+	case $host in
+	*-*-rhapsody* | *-*-darwin1.[012])
+	  # On Rhapsody replace the C library is the System framework
+	  newdeplibs=`$echo "X $newdeplibs" | $Xsed -e 's/ -lc / -framework System /'`
+	  ;;
+	esac
+
+	if test "$droppeddeps" = yes; then
+	  if test "$module" = yes; then
+	    $echo
+	    $echo "*** Warning: libtool could not satisfy all declared inter-library"
+	    $echo "*** dependencies of module $libname.  Therefore, libtool will create"
+	    $echo "*** a static module, that should work as long as the dlopening"
+	    $echo "*** application is linked with the -dlopen flag."
+	    if test -z "$global_symbol_pipe"; then
+	      $echo
+	      $echo "*** However, this would only work if libtool was able to extract symbol"
+	      $echo "*** lists from a program, using \`nm' or equivalent, but libtool could"
+	      $echo "*** not find such a program.  So, this module is probably useless."
+	      $echo "*** \`nm' from GNU binutils and a full rebuild may help."
+	    fi
+	    if test "$build_old_libs" = no; then
+	      oldlibs="$output_objdir/$libname.$libext"
+	      build_libtool_libs=module
+	      build_old_libs=yes
+	    else
+	      build_libtool_libs=no
+	    fi
+	  else
+	    $echo "*** The inter-library dependencies that have been dropped here will be"
+	    $echo "*** automatically added whenever a program is linked with this library"
+	    $echo "*** or is declared to -dlopen it."
+
+	    if test "$allow_undefined" = no; then
+	      $echo
+	      $echo "*** Since this library must not contain undefined symbols,"
+	      $echo "*** because either the platform does not support them or"
+	      $echo "*** it was explicitly requested with -no-undefined,"
+	      $echo "*** libtool will only create a static version of it."
+	      if test "$build_old_libs" = no; then
+		oldlibs="$output_objdir/$libname.$libext"
+		build_libtool_libs=module
+		build_old_libs=yes
+	      else
+		build_libtool_libs=no
+	      fi
+	    fi
+	  fi
+	fi
+	# Done checking deplibs!
+	deplibs=$newdeplibs
+      fi
+
+
+      # move library search paths that coincide with paths to not yet
+      # installed libraries to the beginning of the library search list
+      new_libs=
+      for path in $notinst_path; do
+	case " $new_libs " in
+	*" -L$path/$objdir "*) ;;
+	*)
+	  case " $deplibs " in
+	  *" -L$path/$objdir "*)
+	    new_libs="$new_libs -L$path/$objdir" ;;
+	  esac
+	  ;;
+	esac
+      done
+      for deplib in $deplibs; do
+	case $deplib in
+	-L*)
+	  case " $new_libs " in
+	  *" $deplib "*) ;;
+	  *) new_libs="$new_libs $deplib" ;;
+	  esac
+	  ;;
+	*) new_libs="$new_libs $deplib" ;;
+	esac
+      done
+      deplibs="$new_libs"
+
+
+      # All the library-specific variables (install_libdir is set above).
+      library_names=
+      old_library=
+      dlname=
+
+      # Test again, we may have decided not to build it any more
+      if test "$build_libtool_libs" = yes; then
+	if test "$hardcode_into_libs" = yes; then
+	  # Hardcode the library paths
+	  hardcode_libdirs=
+	  dep_rpath=
+	  rpath="$finalize_rpath"
+	  test "$mode" != relink && rpath="$compile_rpath$rpath"
+	  for libdir in $rpath; do
+	    if test -n "$hardcode_libdir_flag_spec"; then
+	      if test -n "$hardcode_libdir_separator"; then
+		if test -z "$hardcode_libdirs"; then
+		  hardcode_libdirs="$libdir"
+		else
+		  # Just accumulate the unique libdirs.
+		  case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+		  *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+		    ;;
+		  *)
+		    hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+		    ;;
+		  esac
+		fi
+	      else
+		eval flag=\"$hardcode_libdir_flag_spec\"
+		dep_rpath="$dep_rpath $flag"
+	      fi
+	    elif test -n "$runpath_var"; then
+	      case "$perm_rpath " in
+	      *" $libdir "*) ;;
+	      *) perm_rpath="$perm_rpath $libdir" ;;
+	      esac
+	    fi
+	  done
+	  # Substitute the hardcoded libdirs into the rpath.
+	  if test -n "$hardcode_libdir_separator" &&
+	     test -n "$hardcode_libdirs"; then
+	    libdir="$hardcode_libdirs"
+	    if test -n "$hardcode_libdir_flag_spec_ld"; then
+	      eval dep_rpath=\"$hardcode_libdir_flag_spec_ld\"
+	    else
+	      eval dep_rpath=\"$hardcode_libdir_flag_spec\"
+	    fi
+	  fi
+	  if test -n "$runpath_var" && test -n "$perm_rpath"; then
+	    # We should set the runpath_var.
+	    rpath=
+	    for dir in $perm_rpath; do
+	      rpath="$rpath$dir:"
+	    done
+	    eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var"
+	  fi
+	  test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs"
+	fi
+
+	shlibpath="$finalize_shlibpath"
+	test "$mode" != relink && shlibpath="$compile_shlibpath$shlibpath"
+	if test -n "$shlibpath"; then
+	  eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var"
+	fi
+
+	# Get the real and link names of the library.
+	eval shared_ext=\"$shrext_cmds\"
+	eval library_names=\"$library_names_spec\"
+	set dummy $library_names
+	realname="$2"
+	shift; shift
+
+	if test -n "$soname_spec"; then
+	  eval soname=\"$soname_spec\"
+	else
+	  soname="$realname"
+	fi
+	if test -z "$dlname"; then
+	  dlname=$soname
+	fi
+
+	lib="$output_objdir/$realname"
+	linknames=
+	for link
+	do
+	  linknames="$linknames $link"
+	done
+
+	# Use standard objects if they are pic
+	test -z "$pic_flag" && libobjs=`$echo "X$libobjs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+
+	# Prepare the list of exported symbols
+	if test -z "$export_symbols"; then
+	  if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then
+	    $show "generating symbol list for \`$libname.la'"
+	    export_symbols="$output_objdir/$libname.exp"
+	    $run $rm $export_symbols
+	    cmds=$export_symbols_cmds
+	    save_ifs="$IFS"; IFS='~'
+	    for cmd in $cmds; do
+	      IFS="$save_ifs"
+	      eval cmd=\"$cmd\"
+	      if len=`expr "X$cmd" : ".*"` &&
+	       test "$len" -le "$max_cmd_len" || test "$max_cmd_len" -le -1; then
+	        $show "$cmd"
+	        $run eval "$cmd" || exit $?
+	        skipped_export=false
+	      else
+	        # The command line is too long to execute in one step.
+	        $show "using reloadable object file for export list..."
+	        skipped_export=:
+		# Break out early, otherwise skipped_export may be
+		# set to false by a later but shorter cmd.
+		break
+	      fi
+	    done
+	    IFS="$save_ifs"
+	    if test -n "$export_symbols_regex"; then
+	      $show "$EGREP -e \"$export_symbols_regex\" \"$export_symbols\" > \"${export_symbols}T\""
+	      $run eval '$EGREP -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"'
+	      $show "$mv \"${export_symbols}T\" \"$export_symbols\""
+	      $run eval '$mv "${export_symbols}T" "$export_symbols"'
+	    fi
+	  fi
+	fi
+
+	if test -n "$export_symbols" && test -n "$include_expsyms"; then
+	  $run eval '$echo "X$include_expsyms" | $SP2NL >> "$export_symbols"'
+	fi
+
+	tmp_deplibs=
+	for test_deplib in $deplibs; do
+		case " $convenience " in
+		*" $test_deplib "*) ;;
+		*)
+			tmp_deplibs="$tmp_deplibs $test_deplib"
+			;;
+		esac
+	done
+	deplibs="$tmp_deplibs"
+
+	if test -n "$convenience"; then
+	  if test -n "$whole_archive_flag_spec"; then
+	    save_libobjs=$libobjs
+	    eval libobjs=\"\$libobjs $whole_archive_flag_spec\"
+	  else
+	    gentop="$output_objdir/${outputname}x"
+	    generated="$generated $gentop"
+
+	    func_extract_archives $gentop $convenience
+	    libobjs="$libobjs $func_extract_archives_result"
+	  fi
+	fi
+	
+	if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then
+	  eval flag=\"$thread_safe_flag_spec\"
+	  linker_flags="$linker_flags $flag"
+	fi
+
+	# Make a backup of the uninstalled library when relinking
+	if test "$mode" = relink; then
+	  $run eval '(cd $output_objdir && $rm ${realname}U && $mv $realname ${realname}U)' || exit $?
+	fi
+
+	# Do each of the archive commands.
+	if test "$module" = yes && test -n "$module_cmds" ; then
+	  if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then
+	    eval test_cmds=\"$module_expsym_cmds\"
+	    cmds=$module_expsym_cmds
+	  else
+	    eval test_cmds=\"$module_cmds\"
+	    cmds=$module_cmds
+	  fi
+	else
+	if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then
+	  eval test_cmds=\"$archive_expsym_cmds\"
+	  cmds=$archive_expsym_cmds
+	else
+	  eval test_cmds=\"$archive_cmds\"
+	  cmds=$archive_cmds
+	  fi
+	fi
+
+	if test "X$skipped_export" != "X:" &&
+	   len=`expr "X$test_cmds" : ".*" 2>/dev/null` &&
+	   test "$len" -le "$max_cmd_len" || test "$max_cmd_len" -le -1; then
+	  :
+	else
+	  # The command line is too long to link in one step, link piecewise.
+	  $echo "creating reloadable object files..."
+
+	  # Save the value of $output and $libobjs because we want to
+	  # use them later.  If we have whole_archive_flag_spec, we
+	  # want to use save_libobjs as it was before
+	  # whole_archive_flag_spec was expanded, because we can't
+	  # assume the linker understands whole_archive_flag_spec.
+	  # This may have to be revisited, in case too many
+	  # convenience libraries get linked in and end up exceeding
+	  # the spec.
+	  if test -z "$convenience" || test -z "$whole_archive_flag_spec"; then
+	    save_libobjs=$libobjs
+	  fi
+	  save_output=$output
+	  output_la=`$echo "X$output" | $Xsed -e "$basename"`
+
+	  # Clear the reloadable object creation command queue and
+	  # initialize k to one.
+	  test_cmds=
+	  concat_cmds=
+	  objlist=
+	  delfiles=
+	  last_robj=
+	  k=1
+	  output=$output_objdir/$output_la-${k}.$objext
+	  # Loop over the list of objects to be linked.
+	  for obj in $save_libobjs
+	  do
+	    eval test_cmds=\"$reload_cmds $objlist $last_robj\"
+	    if test "X$objlist" = X ||
+	       { len=`expr "X$test_cmds" : ".*" 2>/dev/null` &&
+		 test "$len" -le "$max_cmd_len"; }; then
+	      objlist="$objlist $obj"
+	    else
+	      # The command $test_cmds is almost too long, add a
+	      # command to the queue.
+	      if test "$k" -eq 1 ; then
+		# The first file doesn't have a previous command to add.
+		eval concat_cmds=\"$reload_cmds $objlist $last_robj\"
+	      else
+		# All subsequent reloadable object files will link in
+		# the last one created.
+		eval concat_cmds=\"\$concat_cmds~$reload_cmds $objlist $last_robj\"
+	      fi
+	      last_robj=$output_objdir/$output_la-${k}.$objext
+	      k=`expr $k + 1`
+	      output=$output_objdir/$output_la-${k}.$objext
+	      objlist=$obj
+	      len=1
+	    fi
+	  done
+	  # Handle the remaining objects by creating one last
+	  # reloadable object file.  All subsequent reloadable object
+	  # files will link in the last one created.
+	  test -z "$concat_cmds" || concat_cmds=$concat_cmds~
+	  eval concat_cmds=\"\${concat_cmds}$reload_cmds $objlist $last_robj\"
+
+	  if ${skipped_export-false}; then
+	    $show "generating symbol list for \`$libname.la'"
+	    export_symbols="$output_objdir/$libname.exp"
+	    $run $rm $export_symbols
+	    libobjs=$output
+	    # Append the command to create the export file.
+	    eval concat_cmds=\"\$concat_cmds~$export_symbols_cmds\"
+          fi
+
+	  # Set up a command to remove the reloadable object files
+	  # after they are used.
+	  i=0
+	  while test "$i" -lt "$k"
+	  do
+	    i=`expr $i + 1`
+	    delfiles="$delfiles $output_objdir/$output_la-${i}.$objext"
+	  done
+
+	  $echo "creating a temporary reloadable object file: $output"
+
+	  # Loop through the commands generated above and execute them.
+	  save_ifs="$IFS"; IFS='~'
+	  for cmd in $concat_cmds; do
+	    IFS="$save_ifs"
+	    $show "$cmd"
+	    $run eval "$cmd" || exit $?
+	  done
+	  IFS="$save_ifs"
+
+	  libobjs=$output
+	  # Restore the value of output.
+	  output=$save_output
+
+	  if test -n "$convenience" && test -n "$whole_archive_flag_spec"; then
+	    eval libobjs=\"\$libobjs $whole_archive_flag_spec\"
+	  fi
+	  # Expand the library linking commands again to reset the
+	  # value of $libobjs for piecewise linking.
+
+	  # Do each of the archive commands.
+	  if test "$module" = yes && test -n "$module_cmds" ; then
+	    if test -n "$export_symbols" && test -n "$module_expsym_cmds"; then
+	      cmds=$module_expsym_cmds
+	    else
+	      cmds=$module_cmds
+	    fi
+	  else
+	  if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then
+	    cmds=$archive_expsym_cmds
+	  else
+	    cmds=$archive_cmds
+	    fi
+	  fi
+
+	  # Append the command to remove the reloadable object files
+	  # to the just-reset $cmds.
+	  eval cmds=\"\$cmds~\$rm $delfiles\"
+	fi
+	save_ifs="$IFS"; IFS='~'
+	for cmd in $cmds; do
+	  IFS="$save_ifs"
+	  eval cmd=\"$cmd\"
+	  $show "$cmd"
+	  $run eval "$cmd" || {
+	    lt_exit=$?
+
+	    # Restore the uninstalled library and exit
+	    if test "$mode" = relink; then
+	      $run eval '(cd $output_objdir && $rm ${realname}T && $mv ${realname}U $realname)'
+	    fi
+
+	    exit $lt_exit
+	  }
+	done
+	IFS="$save_ifs"
+
+	# Restore the uninstalled library and exit
+	if test "$mode" = relink; then
+	  $run eval '(cd $output_objdir && $rm ${realname}T && $mv $realname ${realname}T && $mv "$realname"U $realname)' || exit $?
+
+	  if test -n "$convenience"; then
+	    if test -z "$whole_archive_flag_spec"; then
+	      $show "${rm}r $gentop"
+	      $run ${rm}r "$gentop"
+	    fi
+	  fi
+
+	  exit $EXIT_SUCCESS
+	fi
+
+	# Create links to the real library.
+	for linkname in $linknames; do
+	  if test "$realname" != "$linkname"; then
+	    $show "(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)"
+	    $run eval '(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)' || exit $?
+	  fi
+	done
+
+	# If -module or -export-dynamic was specified, set the dlname.
+	if test "$module" = yes || test "$export_dynamic" = yes; then
+	  # On all known operating systems, these are identical.
+	  dlname="$soname"
+	fi
+      fi
+      ;;
+
+    obj)
+      if test -n "$deplibs"; then
+	$echo "$modename: warning: \`-l' and \`-L' are ignored for objects" 1>&2
+      fi
+
+      if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+	$echo "$modename: warning: \`-dlopen' is ignored for objects" 1>&2
+      fi
+
+      if test -n "$rpath"; then
+	$echo "$modename: warning: \`-rpath' is ignored for objects" 1>&2
+      fi
+
+      if test -n "$xrpath"; then
+	$echo "$modename: warning: \`-R' is ignored for objects" 1>&2
+      fi
+
+      if test -n "$vinfo"; then
+	$echo "$modename: warning: \`-version-info' is ignored for objects" 1>&2
+      fi
+
+      if test -n "$release"; then
+	$echo "$modename: warning: \`-release' is ignored for objects" 1>&2
+      fi
+
+      case $output in
+      *.lo)
+	if test -n "$objs$old_deplibs"; then
+	  $echo "$modename: cannot build library object \`$output' from non-libtool objects" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+	libobj="$output"
+	obj=`$echo "X$output" | $Xsed -e "$lo2o"`
+	;;
+      *)
+	libobj=
+	obj="$output"
+	;;
+      esac
+
+      # Delete the old objects.
+      $run $rm $obj $libobj
+
+      # Objects from convenience libraries.  This assumes
+      # single-version convenience libraries.  Whenever we create
+      # different ones for PIC/non-PIC, this we'll have to duplicate
+      # the extraction.
+      reload_conv_objs=
+      gentop=
+      # reload_cmds runs $LD directly, so let us get rid of
+      # -Wl from whole_archive_flag_spec
+      wl=
+
+      if test -n "$convenience"; then
+	if test -n "$whole_archive_flag_spec"; then
+	  eval reload_conv_objs=\"\$reload_objs $whole_archive_flag_spec\"
+	else
+	  gentop="$output_objdir/${obj}x"
+	  generated="$generated $gentop"
+
+	  func_extract_archives $gentop $convenience
+	  reload_conv_objs="$reload_objs $func_extract_archives_result"
+	fi
+      fi
+
+      # Create the old-style object.
+      reload_objs="$objs$old_deplibs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}$'/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test
+
+      output="$obj"
+      cmds=$reload_cmds
+      save_ifs="$IFS"; IFS='~'
+      for cmd in $cmds; do
+	IFS="$save_ifs"
+	eval cmd=\"$cmd\"
+	$show "$cmd"
+	$run eval "$cmd" || exit $?
+      done
+      IFS="$save_ifs"
+
+      # Exit if we aren't doing a library object file.
+      if test -z "$libobj"; then
+	if test -n "$gentop"; then
+	  $show "${rm}r $gentop"
+	  $run ${rm}r $gentop
+	fi
+
+	exit $EXIT_SUCCESS
+      fi
+
+      if test "$build_libtool_libs" != yes; then
+	if test -n "$gentop"; then
+	  $show "${rm}r $gentop"
+	  $run ${rm}r $gentop
+	fi
+
+	# Create an invalid libtool object if no PIC, so that we don't
+	# accidentally link it into a program.
+	# $show "echo timestamp > $libobj"
+	# $run eval "echo timestamp > $libobj" || exit $?
+	exit $EXIT_SUCCESS
+      fi
+
+      if test -n "$pic_flag" || test "$pic_mode" != default; then
+	# Only do commands if we really have different PIC objects.
+	reload_objs="$libobjs $reload_conv_objs"
+	output="$libobj"
+	cmds=$reload_cmds
+	save_ifs="$IFS"; IFS='~'
+	for cmd in $cmds; do
+	  IFS="$save_ifs"
+	  eval cmd=\"$cmd\"
+	  $show "$cmd"
+	  $run eval "$cmd" || exit $?
+	done
+	IFS="$save_ifs"
+      fi
+
+      if test -n "$gentop"; then
+	$show "${rm}r $gentop"
+	$run ${rm}r $gentop
+      fi
+
+      exit $EXIT_SUCCESS
+      ;;
+
+    prog)
+      case $host in
+	*cygwin*) output=`$echo $output | ${SED} -e 's,.exe$,,;s,$,.exe,'` ;;
+      esac
+      if test -n "$vinfo"; then
+	$echo "$modename: warning: \`-version-info' is ignored for programs" 1>&2
+      fi
+
+      if test -n "$release"; then
+	$echo "$modename: warning: \`-release' is ignored for programs" 1>&2
+      fi
+
+      if test "$preload" = yes; then
+	if test "$dlopen_support" = unknown && test "$dlopen_self" = unknown &&
+	   test "$dlopen_self_static" = unknown; then
+	  $echo "$modename: warning: \`AC_LIBTOOL_DLOPEN' not used. Assuming no dlopen support."
+	fi
+      fi
+
+      case $host in
+      *-*-rhapsody* | *-*-darwin1.[012])
+	# On Rhapsody replace the C library is the System framework
+	compile_deplibs=`$echo "X $compile_deplibs" | $Xsed -e 's/ -lc / -framework System /'`
+	finalize_deplibs=`$echo "X $finalize_deplibs" | $Xsed -e 's/ -lc / -framework System /'`
+	;;
+      esac
+
+      case $host in
+      *darwin*)
+        # Don't allow lazy linking, it breaks C++ global constructors
+        if test "$tagname" = CXX ; then
+        compile_command="$compile_command ${wl}-bind_at_load"
+        finalize_command="$finalize_command ${wl}-bind_at_load"
+        fi
+        ;;
+      esac
+
+
+      # move library search paths that coincide with paths to not yet
+      # installed libraries to the beginning of the library search list
+      new_libs=
+      for path in $notinst_path; do
+	case " $new_libs " in
+	*" -L$path/$objdir "*) ;;
+	*)
+	  case " $compile_deplibs " in
+	  *" -L$path/$objdir "*)
+	    new_libs="$new_libs -L$path/$objdir" ;;
+	  esac
+	  ;;
+	esac
+      done
+      for deplib in $compile_deplibs; do
+	case $deplib in
+	-L*)
+	  case " $new_libs " in
+	  *" $deplib "*) ;;
+	  *) new_libs="$new_libs $deplib" ;;
+	  esac
+	  ;;
+	*) new_libs="$new_libs $deplib" ;;
+	esac
+      done
+      compile_deplibs="$new_libs"
+
+
+      compile_command="$compile_command $compile_deplibs"
+      finalize_command="$finalize_command $finalize_deplibs"
+
+      if test -n "$rpath$xrpath"; then
+	# If the user specified any rpath flags, then add them.
+	for libdir in $rpath $xrpath; do
+	  # This is the magic to use -rpath.
+	  case "$finalize_rpath " in
+	  *" $libdir "*) ;;
+	  *) finalize_rpath="$finalize_rpath $libdir" ;;
+	  esac
+	done
+      fi
+
+      # Now hardcode the library paths
+      rpath=
+      hardcode_libdirs=
+      for libdir in $compile_rpath $finalize_rpath; do
+	if test -n "$hardcode_libdir_flag_spec"; then
+	  if test -n "$hardcode_libdir_separator"; then
+	    if test -z "$hardcode_libdirs"; then
+	      hardcode_libdirs="$libdir"
+	    else
+	      # Just accumulate the unique libdirs.
+	      case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+	      *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+		;;
+	      *)
+		hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+		;;
+	      esac
+	    fi
+	  else
+	    eval flag=\"$hardcode_libdir_flag_spec\"
+	    rpath="$rpath $flag"
+	  fi
+	elif test -n "$runpath_var"; then
+	  case "$perm_rpath " in
+	  *" $libdir "*) ;;
+	  *) perm_rpath="$perm_rpath $libdir" ;;
+	  esac
+	fi
+	case $host in
+	*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+	  testbindir=`$echo "X$libdir" | $Xsed -e 's*/lib$*/bin*'`
+	  case :$dllsearchpath: in
+	  *":$libdir:"*) ;;
+	  *) dllsearchpath="$dllsearchpath:$libdir";;
+	  esac
+	  case :$dllsearchpath: in
+	  *":$testbindir:"*) ;;
+	  *) dllsearchpath="$dllsearchpath:$testbindir";;
+	  esac
+	  ;;
+	esac
+      done
+      # Substitute the hardcoded libdirs into the rpath.
+      if test -n "$hardcode_libdir_separator" &&
+	 test -n "$hardcode_libdirs"; then
+	libdir="$hardcode_libdirs"
+	eval rpath=\" $hardcode_libdir_flag_spec\"
+      fi
+      compile_rpath="$rpath"
+
+      rpath=
+      hardcode_libdirs=
+      for libdir in $finalize_rpath; do
+	if test -n "$hardcode_libdir_flag_spec"; then
+	  if test -n "$hardcode_libdir_separator"; then
+	    if test -z "$hardcode_libdirs"; then
+	      hardcode_libdirs="$libdir"
+	    else
+	      # Just accumulate the unique libdirs.
+	      case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in
+	      *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*)
+		;;
+	      *)
+		hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir"
+		;;
+	      esac
+	    fi
+	  else
+	    eval flag=\"$hardcode_libdir_flag_spec\"
+	    rpath="$rpath $flag"
+	  fi
+	elif test -n "$runpath_var"; then
+	  case "$finalize_perm_rpath " in
+	  *" $libdir "*) ;;
+	  *) finalize_perm_rpath="$finalize_perm_rpath $libdir" ;;
+	  esac
+	fi
+      done
+      # Substitute the hardcoded libdirs into the rpath.
+      if test -n "$hardcode_libdir_separator" &&
+	 test -n "$hardcode_libdirs"; then
+	libdir="$hardcode_libdirs"
+	eval rpath=\" $hardcode_libdir_flag_spec\"
+      fi
+      finalize_rpath="$rpath"
+
+      if test -n "$libobjs" && test "$build_old_libs" = yes; then
+	# Transform all the library objects into standard objects.
+	compile_command=`$echo "X$compile_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+	finalize_command=`$echo "X$finalize_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+      fi
+
+      dlsyms=
+      if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then
+	if test -n "$NM" && test -n "$global_symbol_pipe"; then
+	  dlsyms="${outputname}S.c"
+	else
+	  $echo "$modename: not configured to extract global symbols from dlpreopened files" 1>&2
+	fi
+      fi
+
+      if test -n "$dlsyms"; then
+	case $dlsyms in
+	"") ;;
+	*.c)
+	  # Discover the nlist of each of the dlfiles.
+	  nlist="$output_objdir/${outputname}.nm"
+
+	  $show "$rm $nlist ${nlist}S ${nlist}T"
+	  $run $rm "$nlist" "${nlist}S" "${nlist}T"
+
+	  # Parse the name list into a source file.
+	  $show "creating $output_objdir/$dlsyms"
+
+	  test -z "$run" && $echo > "$output_objdir/$dlsyms" "\
+/* $dlsyms - symbol resolution table for \`$outputname' dlsym emulation. */
+/* Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP */
+
+#ifdef __cplusplus
+extern \"C\" {
+#endif
+
+/* Prevent the only kind of declaration conflicts we can make. */
+#define lt_preloaded_symbols some_other_symbol
+
+/* External symbol declarations for the compiler. */\
+"
+
+	  if test "$dlself" = yes; then
+	    $show "generating symbol list for \`$output'"
+
+	    test -z "$run" && $echo ': @PROGRAM@ ' > "$nlist"
+
+	    # Add our own program objects to the symbol list.
+	    progfiles=`$echo "X$objs$old_deplibs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP`
+	    for arg in $progfiles; do
+	      $show "extracting global C symbols from \`$arg'"
+	      $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'"
+	    done
+
+	    if test -n "$exclude_expsyms"; then
+	      $run eval '$EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T'
+	      $run eval '$mv "$nlist"T "$nlist"'
+	    fi
+
+	    if test -n "$export_symbols_regex"; then
+	      $run eval '$EGREP -e "$export_symbols_regex" "$nlist" > "$nlist"T'
+	      $run eval '$mv "$nlist"T "$nlist"'
+	    fi
+
+	    # Prepare the list of exported symbols
+	    if test -z "$export_symbols"; then
+	      export_symbols="$output_objdir/$outputname.exp"
+	      $run $rm $export_symbols
+	      $run eval "${SED} -n -e '/^: @PROGRAM@ $/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"'
+              case $host in
+              *cygwin* | *mingw* )
+	        $run eval "echo EXPORTS "'> "$output_objdir/$outputname.def"'
+		$run eval 'cat "$export_symbols" >> "$output_objdir/$outputname.def"'
+                ;;
+              esac
+	    else
+	      $run eval "${SED} -e 's/\([].[*^$]\)/\\\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$outputname.exp"'
+	      $run eval 'grep -f "$output_objdir/$outputname.exp" < "$nlist" > "$nlist"T'
+	      $run eval 'mv "$nlist"T "$nlist"'
+              case $host in
+              *cygwin* | *mingw* )
+	        $run eval "echo EXPORTS "'> "$output_objdir/$outputname.def"'
+		$run eval 'cat "$nlist" >> "$output_objdir/$outputname.def"'
+                ;;
+              esac
+	    fi
+	  fi
+
+	  for arg in $dlprefiles; do
+	    $show "extracting global C symbols from \`$arg'"
+	    name=`$echo "$arg" | ${SED} -e 's%^.*/%%'`
+	    $run eval '$echo ": $name " >> "$nlist"'
+	    $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'"
+	  done
+
+	  if test -z "$run"; then
+	    # Make sure we have at least an empty file.
+	    test -f "$nlist" || : > "$nlist"
+
+	    if test -n "$exclude_expsyms"; then
+	      $EGREP -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T
+	      $mv "$nlist"T "$nlist"
+	    fi
+
+	    # Try sorting and uniquifying the output.
+	    if grep -v "^: " < "$nlist" |
+		if sort -k 3 </dev/null >/dev/null 2>&1; then
+		  sort -k 3
+		else
+		  sort +2
+		fi |
+		uniq > "$nlist"S; then
+	      :
+	    else
+	      grep -v "^: " < "$nlist" > "$nlist"S
+	    fi
+
+	    if test -f "$nlist"S; then
+	      eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$dlsyms"'
+	    else
+	      $echo '/* NONE */' >> "$output_objdir/$dlsyms"
+	    fi
+
+	    $echo >> "$output_objdir/$dlsyms" "\
+
+#undef lt_preloaded_symbols
+
+#if defined (__STDC__) && __STDC__
+# define lt_ptr void *
+#else
+# define lt_ptr char *
+# define const
+#endif
+
+/* The mapping between symbol names and symbols. */
+"
+
+	    case $host in
+	    *cygwin* | *mingw* )
+	  $echo >> "$output_objdir/$dlsyms" "\
+/* DATA imports from DLLs on WIN32 can't be const, because
+   runtime relocations are performed -- see ld's documentation
+   on pseudo-relocs */
+struct {
+"
+	      ;;
+	    * )
+	  $echo >> "$output_objdir/$dlsyms" "\
+const struct {
+"
+	      ;;
+	    esac
+
+
+	  $echo >> "$output_objdir/$dlsyms" "\
+  const char *name;
+  lt_ptr address;
+}
+lt_preloaded_symbols[] =
+{\
+"
+
+	    eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$dlsyms"
+
+	    $echo >> "$output_objdir/$dlsyms" "\
+  {0, (lt_ptr) 0}
+};
+
+/* This works around a problem in FreeBSD linker */
+#ifdef FREEBSD_WORKAROUND
+static const void *lt_preloaded_setup() {
+  return lt_preloaded_symbols;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif\
+"
+	  fi
+
+	  pic_flag_for_symtable=
+	  case $host in
+	  # compiling the symbol table file with pic_flag works around
+	  # a FreeBSD bug that causes programs to crash when -lm is
+	  # linked before any other PIC object.  But we must not use
+	  # pic_flag when linking with -static.  The problem exists in
+	  # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1.
+	  *-*-freebsd2*|*-*-freebsd3.0*|*-*-freebsdelf3.0*)
+	    case "$compile_command " in
+	    *" -static "*) ;;
+	    *) pic_flag_for_symtable=" $pic_flag -DFREEBSD_WORKAROUND";;
+	    esac;;
+	  *-*-hpux*)
+	    case "$compile_command " in
+	    *" -static "*) ;;
+	    *) pic_flag_for_symtable=" $pic_flag";;
+	    esac
+	  esac
+
+	  # Now compile the dynamic symbol file.
+	  $show "(cd $output_objdir && $LTCC  $LTCFLAGS -c$no_builtin_flag$pic_flag_for_symtable \"$dlsyms\")"
+	  $run eval '(cd $output_objdir && $LTCC  $LTCFLAGS -c$no_builtin_flag$pic_flag_for_symtable "$dlsyms")' || exit $?
+
+	  # Clean up the generated files.
+	  $show "$rm $output_objdir/$dlsyms $nlist ${nlist}S ${nlist}T"
+	  $run $rm "$output_objdir/$dlsyms" "$nlist" "${nlist}S" "${nlist}T"
+
+	  # Transform the symbol file into the correct name.
+          case $host in
+          *cygwin* | *mingw* )
+            if test -f "$output_objdir/${outputname}.def" ; then
+              compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}.def $output_objdir/${outputname}S.${objext}%"`
+              finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}.def $output_objdir/${outputname}S.${objext}%"`
+            else
+              compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"`
+              finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"`
+             fi
+            ;;
+          * )
+            compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"`
+            finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"`
+            ;;
+          esac
+	  ;;
+	*)
+	  $echo "$modename: unknown suffix for \`$dlsyms'" 1>&2
+	  exit $EXIT_FAILURE
+	  ;;
+	esac
+      else
+	# We keep going just in case the user didn't refer to
+	# lt_preloaded_symbols.  The linker will fail if global_symbol_pipe
+	# really was required.
+
+	# Nullify the symbol file.
+	compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"`
+	finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"`
+      fi
+
+      if test "$need_relink" = no || test "$build_libtool_libs" != yes; then
+	# Replace the output file specification.
+	compile_command=`$echo "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
+	link_command="$compile_command$compile_rpath"
+
+	# We have no uninstalled library dependencies, so finalize right now.
+	$show "$link_command"
+	$run eval "$link_command"
+	exit_status=$?
+
+	# Delete the generated files.
+	if test -n "$dlsyms"; then
+	  $show "$rm $output_objdir/${outputname}S.${objext}"
+	  $run $rm "$output_objdir/${outputname}S.${objext}"
+	fi
+
+	exit $exit_status
+      fi
+
+      if test -n "$shlibpath_var"; then
+	# We should set the shlibpath_var
+	rpath=
+	for dir in $temp_rpath; do
+	  case $dir in
+	  [\\/]* | [A-Za-z]:[\\/]*)
+	    # Absolute path.
+	    rpath="$rpath$dir:"
+	    ;;
+	  *)
+	    # Relative path: add a thisdir entry.
+	    rpath="$rpath\$thisdir/$dir:"
+	    ;;
+	  esac
+	done
+	temp_rpath="$rpath"
+      fi
+
+      if test -n "$compile_shlibpath$finalize_shlibpath"; then
+	compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command"
+      fi
+      if test -n "$finalize_shlibpath"; then
+	finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command"
+      fi
+
+      compile_var=
+      finalize_var=
+      if test -n "$runpath_var"; then
+	if test -n "$perm_rpath"; then
+	  # We should set the runpath_var.
+	  rpath=
+	  for dir in $perm_rpath; do
+	    rpath="$rpath$dir:"
+	  done
+	  compile_var="$runpath_var=\"$rpath\$$runpath_var\" "
+	fi
+	if test -n "$finalize_perm_rpath"; then
+	  # We should set the runpath_var.
+	  rpath=
+	  for dir in $finalize_perm_rpath; do
+	    rpath="$rpath$dir:"
+	  done
+	  finalize_var="$runpath_var=\"$rpath\$$runpath_var\" "
+	fi
+      fi
+
+      if test "$no_install" = yes; then
+	# We don't need to create a wrapper script.
+	link_command="$compile_var$compile_command$compile_rpath"
+	# Replace the output file specification.
+	link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'`
+	# Delete the old output file.
+	$run $rm $output
+	# Link the executable and exit
+	$show "$link_command"
+	$run eval "$link_command" || exit $?
+	exit $EXIT_SUCCESS
+      fi
+
+      if test "$hardcode_action" = relink; then
+	# Fast installation is not supported
+	link_command="$compile_var$compile_command$compile_rpath"
+	relink_command="$finalize_var$finalize_command$finalize_rpath"
+
+	$echo "$modename: warning: this platform does not like uninstalled shared libraries" 1>&2
+	$echo "$modename: \`$output' will be relinked during installation" 1>&2
+      else
+	if test "$fast_install" != no; then
+	  link_command="$finalize_var$compile_command$finalize_rpath"
+	  if test "$fast_install" = yes; then
+	    relink_command=`$echo "X$compile_var$compile_command$compile_rpath" | $Xsed -e 's%@OUTPUT@%\$progdir/\$file%g'`
+	  else
+	    # fast_install is set to needless
+	    relink_command=
+	  fi
+	else
+	  link_command="$compile_var$compile_command$compile_rpath"
+	  relink_command="$finalize_var$finalize_command$finalize_rpath"
+	fi
+      fi
+
+      # Replace the output file specification.
+      link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'`
+
+      # Delete the old output files.
+      $run $rm $output $output_objdir/$outputname $output_objdir/lt-$outputname
+
+      $show "$link_command"
+      $run eval "$link_command" || exit $?
+
+      # Now create the wrapper script.
+      $show "creating $output"
+
+      # Quote the relink command for shipping.
+      if test -n "$relink_command"; then
+	# Preserve any variables that may affect compiler behavior
+	for var in $variables_saved_for_relink; do
+	  if eval test -z \"\${$var+set}\"; then
+	    relink_command="{ test -z \"\${$var+set}\" || unset $var || { $var=; export $var; }; }; $relink_command"
+	  elif eval var_value=\$$var; test -z "$var_value"; then
+	    relink_command="$var=; export $var; $relink_command"
+	  else
+	    var_value=`$echo "X$var_value" | $Xsed -e "$sed_quote_subst"`
+	    relink_command="$var=\"$var_value\"; export $var; $relink_command"
+	  fi
+	done
+	relink_command="(cd `pwd`; $relink_command)"
+	relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"`
+      fi
+
+      # Quote $echo for shipping.
+      if test "X$echo" = "X$SHELL $progpath --fallback-echo"; then
+	case $progpath in
+	[\\/]* | [A-Za-z]:[\\/]*) qecho="$SHELL $progpath --fallback-echo";;
+	*) qecho="$SHELL `pwd`/$progpath --fallback-echo";;
+	esac
+	qecho=`$echo "X$qecho" | $Xsed -e "$sed_quote_subst"`
+      else
+	qecho=`$echo "X$echo" | $Xsed -e "$sed_quote_subst"`
+      fi
+
+      # Only actually do things if our run command is non-null.
+      if test -z "$run"; then
+	# win32 will think the script is a binary if it has
+	# a .exe suffix, so we strip it off here.
+	case $output in
+	  *.exe) output=`$echo $output|${SED} 's,.exe$,,'` ;;
+	esac
+	# test for cygwin because mv fails w/o .exe extensions
+	case $host in
+	  *cygwin*)
+	    exeext=.exe
+	    outputname=`$echo $outputname|${SED} 's,.exe$,,'` ;;
+	  *) exeext= ;;
+	esac
+	case $host in
+	  *cygwin* | *mingw* )
+            output_name=`basename $output`
+            output_path=`dirname $output`
+            cwrappersource="$output_path/$objdir/lt-$output_name.c"
+            cwrapper="$output_path/$output_name.exe"
+            $rm $cwrappersource $cwrapper
+            trap "$rm $cwrappersource $cwrapper; exit $EXIT_FAILURE" 1 2 15
+
+	    cat > $cwrappersource <<EOF
+
+/* $cwrappersource - temporary wrapper executable for $objdir/$outputname
+   Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP
+
+   The $output program cannot be directly executed until all the libtool
+   libraries that it depends on are installed.
+
+   This wrapper executable should never be moved out of the build directory.
+   If it is, it will not operate correctly.
+
+   Currently, it simply execs the wrapper *script* "/bin/sh $output",
+   but could eventually absorb all of the scripts functionality and
+   exec $objdir/$outputname directly.
+*/
+EOF
+	    cat >> $cwrappersource<<"EOF"
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <malloc.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <string.h>
+#include <ctype.h>
+#include <sys/stat.h>
+
+#if defined(PATH_MAX)
+# define LT_PATHMAX PATH_MAX
+#elif defined(MAXPATHLEN)
+# define LT_PATHMAX MAXPATHLEN
+#else
+# define LT_PATHMAX 1024
+#endif
+
+#ifndef DIR_SEPARATOR
+# define DIR_SEPARATOR '/'
+# define PATH_SEPARATOR ':'
+#endif
+
+#if defined (_WIN32) || defined (__MSDOS__) || defined (__DJGPP__) || \
+  defined (__OS2__)
+# define HAVE_DOS_BASED_FILE_SYSTEM
+# ifndef DIR_SEPARATOR_2
+#  define DIR_SEPARATOR_2 '\\'
+# endif
+# ifndef PATH_SEPARATOR_2
+#  define PATH_SEPARATOR_2 ';'
+# endif
+#endif
+
+#ifndef DIR_SEPARATOR_2
+# define IS_DIR_SEPARATOR(ch) ((ch) == DIR_SEPARATOR)
+#else /* DIR_SEPARATOR_2 */
+# define IS_DIR_SEPARATOR(ch) \
+        (((ch) == DIR_SEPARATOR) || ((ch) == DIR_SEPARATOR_2))
+#endif /* DIR_SEPARATOR_2 */
+
+#ifndef PATH_SEPARATOR_2
+# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR)
+#else /* PATH_SEPARATOR_2 */
+# define IS_PATH_SEPARATOR(ch) ((ch) == PATH_SEPARATOR_2)
+#endif /* PATH_SEPARATOR_2 */
+
+#define XMALLOC(type, num)      ((type *) xmalloc ((num) * sizeof(type)))
+#define XFREE(stale) do { \
+  if (stale) { free ((void *) stale); stale = 0; } \
+} while (0)
+
+/* -DDEBUG is fairly common in CFLAGS.  */
+#undef DEBUG
+#if defined DEBUGWRAPPER
+# define DEBUG(format, ...) fprintf(stderr, format, __VA_ARGS__)
+#else
+# define DEBUG(format, ...)
+#endif
+
+const char *program_name = NULL;
+
+void * xmalloc (size_t num);
+char * xstrdup (const char *string);
+const char * base_name (const char *name);
+char * find_executable(const char *wrapper);
+int    check_executable(const char *path);
+char * strendzap(char *str, const char *pat);
+void lt_fatal (const char *message, ...);
+
+int
+main (int argc, char *argv[])
+{
+  char **newargz;
+  int i;
+
+  program_name = (char *) xstrdup (base_name (argv[0]));
+  DEBUG("(main) argv[0]      : %s\n",argv[0]);
+  DEBUG("(main) program_name : %s\n",program_name);
+  newargz = XMALLOC(char *, argc+2);
+EOF
+
+            cat >> $cwrappersource <<EOF
+  newargz[0] = (char *) xstrdup("$SHELL");
+EOF
+
+            cat >> $cwrappersource <<"EOF"
+  newargz[1] = find_executable(argv[0]);
+  if (newargz[1] == NULL)
+    lt_fatal("Couldn't find %s", argv[0]);
+  DEBUG("(main) found exe at : %s\n",newargz[1]);
+  /* we know the script has the same name, without the .exe */
+  /* so make sure newargz[1] doesn't end in .exe */
+  strendzap(newargz[1],".exe");
+  for (i = 1; i < argc; i++)
+    newargz[i+1] = xstrdup(argv[i]);
+  newargz[argc+1] = NULL;
+
+  for (i=0; i<argc+1; i++)
+  {
+    DEBUG("(main) newargz[%d]   : %s\n",i,newargz[i]);
+    ;
+  }
+
+EOF
+
+            case $host_os in
+              mingw*)
+                cat >> $cwrappersource <<EOF
+  execv("$SHELL",(char const **)newargz);
+EOF
+              ;;
+              *)
+                cat >> $cwrappersource <<EOF
+  execv("$SHELL",newargz);
+EOF
+              ;;
+            esac
+
+            cat >> $cwrappersource <<"EOF"
+  return 127;
+}
+
+void *
+xmalloc (size_t num)
+{
+  void * p = (void *) malloc (num);
+  if (!p)
+    lt_fatal ("Memory exhausted");
+
+  return p;
+}
+
+char *
+xstrdup (const char *string)
+{
+  return string ? strcpy ((char *) xmalloc (strlen (string) + 1), string) : NULL
+;
+}
+
+const char *
+base_name (const char *name)
+{
+  const char *base;
+
+#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
+  /* Skip over the disk name in MSDOS pathnames. */
+  if (isalpha ((unsigned char)name[0]) && name[1] == ':')
+    name += 2;
+#endif
+
+  for (base = name; *name; name++)
+    if (IS_DIR_SEPARATOR (*name))
+      base = name + 1;
+  return base;
+}
+
+int
+check_executable(const char * path)
+{
+  struct stat st;
+
+  DEBUG("(check_executable)  : %s\n", path ? (*path ? path : "EMPTY!") : "NULL!");
+  if ((!path) || (!*path))
+    return 0;
+
+  if ((stat (path, &st) >= 0) &&
+      (
+        /* MinGW & native WIN32 do not support S_IXOTH or S_IXGRP */
+#if defined (S_IXOTH)
+       ((st.st_mode & S_IXOTH) == S_IXOTH) ||
+#endif
+#if defined (S_IXGRP)
+       ((st.st_mode & S_IXGRP) == S_IXGRP) ||
+#endif
+       ((st.st_mode & S_IXUSR) == S_IXUSR))
+      )
+    return 1;
+  else
+    return 0;
+}
+
+/* Searches for the full path of the wrapper.  Returns
+   newly allocated full path name if found, NULL otherwise */
+char *
+find_executable (const char* wrapper)
+{
+  int has_slash = 0;
+  const char* p;
+  const char* p_next;
+  /* static buffer for getcwd */
+  char tmp[LT_PATHMAX + 1];
+  int tmp_len;
+  char* concat_name;
+
+  DEBUG("(find_executable)  : %s\n", wrapper ? (*wrapper ? wrapper : "EMPTY!") : "NULL!");
+
+  if ((wrapper == NULL) || (*wrapper == '\0'))
+    return NULL;
+
+  /* Absolute path? */
+#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
+  if (isalpha ((unsigned char)wrapper[0]) && wrapper[1] == ':')
+  {
+    concat_name = xstrdup (wrapper);
+    if (check_executable(concat_name))
+      return concat_name;
+    XFREE(concat_name);
+  }
+  else
+  {
+#endif
+    if (IS_DIR_SEPARATOR (wrapper[0]))
+    {
+      concat_name = xstrdup (wrapper);
+      if (check_executable(concat_name))
+        return concat_name;
+      XFREE(concat_name);
+    }
+#if defined (HAVE_DOS_BASED_FILE_SYSTEM)
+  }
+#endif
+
+  for (p = wrapper; *p; p++)
+    if (*p == '/')
+    {
+      has_slash = 1;
+      break;
+    }
+  if (!has_slash)
+  {
+    /* no slashes; search PATH */
+    const char* path = getenv ("PATH");
+    if (path != NULL)
+    {
+      for (p = path; *p; p = p_next)
+      {
+        const char* q;
+        size_t p_len;
+        for (q = p; *q; q++)
+          if (IS_PATH_SEPARATOR(*q))
+            break;
+        p_len = q - p;
+        p_next = (*q == '\0' ? q : q + 1);
+        if (p_len == 0)
+        {
+          /* empty path: current directory */
+          if (getcwd (tmp, LT_PATHMAX) == NULL)
+            lt_fatal ("getcwd failed");
+          tmp_len = strlen(tmp);
+          concat_name = XMALLOC(char, tmp_len + 1 + strlen(wrapper) + 1);
+          memcpy (concat_name, tmp, tmp_len);
+          concat_name[tmp_len] = '/';
+          strcpy (concat_name + tmp_len + 1, wrapper);
+        }
+        else
+        {
+          concat_name = XMALLOC(char, p_len + 1 + strlen(wrapper) + 1);
+          memcpy (concat_name, p, p_len);
+          concat_name[p_len] = '/';
+          strcpy (concat_name + p_len + 1, wrapper);
+        }
+        if (check_executable(concat_name))
+          return concat_name;
+        XFREE(concat_name);
+      }
+    }
+    /* not found in PATH; assume curdir */
+  }
+  /* Relative path | not found in path: prepend cwd */
+  if (getcwd (tmp, LT_PATHMAX) == NULL)
+    lt_fatal ("getcwd failed");
+  tmp_len = strlen(tmp);
+  concat_name = XMALLOC(char, tmp_len + 1 + strlen(wrapper) + 1);
+  memcpy (concat_name, tmp, tmp_len);
+  concat_name[tmp_len] = '/';
+  strcpy (concat_name + tmp_len + 1, wrapper);
+
+  if (check_executable(concat_name))
+    return concat_name;
+  XFREE(concat_name);
+  return NULL;
+}
+
+char *
+strendzap(char *str, const char *pat)
+{
+  size_t len, patlen;
+
+  assert(str != NULL);
+  assert(pat != NULL);
+
+  len = strlen(str);
+  patlen = strlen(pat);
+
+  if (patlen <= len)
+  {
+    str += len - patlen;
+    if (strcmp(str, pat) == 0)
+      *str = '\0';
+  }
+  return str;
+}
+
+static void
+lt_error_core (int exit_status, const char * mode,
+          const char * message, va_list ap)
+{
+  fprintf (stderr, "%s: %s: ", program_name, mode);
+  vfprintf (stderr, message, ap);
+  fprintf (stderr, ".\n");
+
+  if (exit_status >= 0)
+    exit (exit_status);
+}
+
+void
+lt_fatal (const char *message, ...)
+{
+  va_list ap;
+  va_start (ap, message);
+  lt_error_core (EXIT_FAILURE, "FATAL", message, ap);
+  va_end (ap);
+}
+EOF
+          # we should really use a build-platform specific compiler
+          # here, but OTOH, the wrappers (shell script and this C one)
+          # are only useful if you want to execute the "real" binary.
+          # Since the "real" binary is built for $host, then this
+          # wrapper might as well be built for $host, too.
+          $run $LTCC $LTCFLAGS -s -o $cwrapper $cwrappersource
+          ;;
+        esac
+        $rm $output
+        trap "$rm $output; exit $EXIT_FAILURE" 1 2 15
+
+	$echo > $output "\
+#! $SHELL
+
+# $output - temporary wrapper script for $objdir/$outputname
+# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP
+#
+# The $output program cannot be directly executed until all the libtool
+# libraries that it depends on are installed.
+#
+# This wrapper script should never be moved out of the build directory.
+# If it is, it will not operate correctly.
+
+# Sed substitution that helps us do robust quoting.  It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed='${SED} -e 1s/^X//'
+sed_quote_subst='$sed_quote_subst'
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+relink_command=\"$relink_command\"
+
+# This environment variable determines our operation mode.
+if test \"\$libtool_install_magic\" = \"$magic\"; then
+  # install mode needs the following variable:
+  notinst_deplibs='$notinst_deplibs'
+else
+  # When we are sourced in execute mode, \$file and \$echo are already set.
+  if test \"\$libtool_execute_magic\" != \"$magic\"; then
+    echo=\"$qecho\"
+    file=\"\$0\"
+    # Make sure echo works.
+    if test \"X\$1\" = X--no-reexec; then
+      # Discard the --no-reexec flag, and continue.
+      shift
+    elif test \"X\`(\$echo '\t') 2>/dev/null\`\" = 'X\t'; then
+      # Yippee, \$echo works!
+      :
+    else
+      # Restart under the correct shell, and then maybe \$echo will work.
+      exec $SHELL \"\$0\" --no-reexec \${1+\"\$@\"}
+    fi
+  fi\
+"
+	$echo >> $output "\
+
+  # Find the directory that this script lives in.
+  thisdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*$%%'\`
+  test \"x\$thisdir\" = \"x\$file\" && thisdir=.
+
+  # Follow symbolic links until we get to the real thisdir.
+  file=\`ls -ld \"\$file\" | ${SED} -n 's/.*-> //p'\`
+  while test -n \"\$file\"; do
+    destdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*\$%%'\`
+
+    # If there was a directory component, then change thisdir.
+    if test \"x\$destdir\" != \"x\$file\"; then
+      case \"\$destdir\" in
+      [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;;
+      *) thisdir=\"\$thisdir/\$destdir\" ;;
+      esac
+    fi
+
+    file=\`\$echo \"X\$file\" | \$Xsed -e 's%^.*/%%'\`
+    file=\`ls -ld \"\$thisdir/\$file\" | ${SED} -n 's/.*-> //p'\`
+  done
+
+  # Try to get the absolute directory name.
+  absdir=\`cd \"\$thisdir\" && pwd\`
+  test -n \"\$absdir\" && thisdir=\"\$absdir\"
+"
+
+	if test "$fast_install" = yes; then
+	  $echo >> $output "\
+  program=lt-'$outputname'$exeext
+  progdir=\"\$thisdir/$objdir\"
+
+  if test ! -f \"\$progdir/\$program\" || \\
+     { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | ${SED} 1q\`; \\
+       test \"X\$file\" != \"X\$progdir/\$program\"; }; then
+
+    file=\"\$\$-\$program\"
+
+    if test ! -d \"\$progdir\"; then
+      $mkdir \"\$progdir\"
+    else
+      $rm \"\$progdir/\$file\"
+    fi"
+
+	  $echo >> $output "\
+
+    # relink executable if necessary
+    if test -n \"\$relink_command\"; then
+      if relink_command_output=\`eval \$relink_command 2>&1\`; then :
+      else
+	$echo \"\$relink_command_output\" >&2
+	$rm \"\$progdir/\$file\"
+	exit $EXIT_FAILURE
+      fi
+    fi
+
+    $mv \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null ||
+    { $rm \"\$progdir/\$program\";
+      $mv \"\$progdir/\$file\" \"\$progdir/\$program\"; }
+    $rm \"\$progdir/\$file\"
+  fi"
+	else
+	  $echo >> $output "\
+  program='$outputname'
+  progdir=\"\$thisdir/$objdir\"
+"
+	fi
+
+	$echo >> $output "\
+
+  if test -f \"\$progdir/\$program\"; then"
+
+	# Export our shlibpath_var if we have one.
+	if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then
+	  $echo >> $output "\
+    # Add our own library path to $shlibpath_var
+    $shlibpath_var=\"$temp_rpath\$$shlibpath_var\"
+
+    # Some systems cannot cope with colon-terminated $shlibpath_var
+    # The second colon is a workaround for a bug in BeOS R4 sed
+    $shlibpath_var=\`\$echo \"X\$$shlibpath_var\" | \$Xsed -e 's/::*\$//'\`
+
+    export $shlibpath_var
+"
+	fi
+
+	# fixup the dll searchpath if we need to.
+	if test -n "$dllsearchpath"; then
+	  $echo >> $output "\
+    # Add the dll search path components to the executable PATH
+    PATH=$dllsearchpath:\$PATH
+"
+	fi
+
+	$echo >> $output "\
+    if test \"\$libtool_execute_magic\" != \"$magic\"; then
+      # Run the actual program with our arguments.
+"
+	case $host in
+	# Backslashes separate directories on plain windows
+	*-*-mingw | *-*-os2*)
+	  $echo >> $output "\
+      exec \"\$progdir\\\\\$program\" \${1+\"\$@\"}
+"
+	  ;;
+
+	*)
+	  $echo >> $output "\
+      exec \"\$progdir/\$program\" \${1+\"\$@\"}
+"
+	  ;;
+	esac
+	$echo >> $output "\
+      \$echo \"\$0: cannot exec \$program \${1+\"\$@\"}\"
+      exit $EXIT_FAILURE
+    fi
+  else
+    # The program doesn't exist.
+    \$echo \"\$0: error: \\\`\$progdir/\$program' does not exist\" 1>&2
+    \$echo \"This script is just a wrapper for \$program.\" 1>&2
+    $echo \"See the $PACKAGE documentation for more information.\" 1>&2
+    exit $EXIT_FAILURE
+  fi
+fi\
+"
+	chmod +x $output
+      fi
+      exit $EXIT_SUCCESS
+      ;;
+    esac
+
+    # See if we need to build an old-fashioned archive.
+    for oldlib in $oldlibs; do
+
+      if test "$build_libtool_libs" = convenience; then
+	oldobjs="$libobjs_save"
+	addlibs="$convenience"
+	build_libtool_libs=no
+      else
+	if test "$build_libtool_libs" = module; then
+	  oldobjs="$libobjs_save"
+	  build_libtool_libs=no
+	else
+	  oldobjs="$old_deplibs $non_pic_objects"
+	fi
+	addlibs="$old_convenience"
+      fi
+
+      if test -n "$addlibs"; then
+	gentop="$output_objdir/${outputname}x"
+	generated="$generated $gentop"
+
+	func_extract_archives $gentop $addlibs
+	oldobjs="$oldobjs $func_extract_archives_result"
+      fi
+
+      # Do each command in the archive commands.
+      if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then
+       cmds=$old_archive_from_new_cmds
+      else
+	# POSIX demands no paths to be encoded in archives.  We have
+	# to avoid creating archives with duplicate basenames if we
+	# might have to extract them afterwards, e.g., when creating a
+	# static archive out of a convenience library, or when linking
+	# the entirety of a libtool archive into another (currently
+	# not supported by libtool).
+	if (for obj in $oldobjs
+	    do
+	      $echo "X$obj" | $Xsed -e 's%^.*/%%'
+	    done | sort | sort -uc >/dev/null 2>&1); then
+	  :
+	else
+	  $echo "copying selected object files to avoid basename conflicts..."
+
+	  if test -z "$gentop"; then
+	    gentop="$output_objdir/${outputname}x"
+	    generated="$generated $gentop"
+
+	    $show "${rm}r $gentop"
+	    $run ${rm}r "$gentop"
+	    $show "$mkdir $gentop"
+	    $run $mkdir "$gentop"
+	    exit_status=$?
+	    if test "$exit_status" -ne 0 && test ! -d "$gentop"; then
+	      exit $exit_status
+	    fi
+	  fi
+
+	  save_oldobjs=$oldobjs
+	  oldobjs=
+	  counter=1
+	  for obj in $save_oldobjs
+	  do
+	    objbase=`$echo "X$obj" | $Xsed -e 's%^.*/%%'`
+	    case " $oldobjs " in
+	    " ") oldobjs=$obj ;;
+	    *[\ /]"$objbase "*)
+	      while :; do
+		# Make sure we don't pick an alternate name that also
+		# overlaps.
+		newobj=lt$counter-$objbase
+		counter=`expr $counter + 1`
+		case " $oldobjs " in
+		*[\ /]"$newobj "*) ;;
+		*) if test ! -f "$gentop/$newobj"; then break; fi ;;
+		esac
+	      done
+	      $show "ln $obj $gentop/$newobj || cp $obj $gentop/$newobj"
+	      $run ln "$obj" "$gentop/$newobj" ||
+	      $run cp "$obj" "$gentop/$newobj"
+	      oldobjs="$oldobjs $gentop/$newobj"
+	      ;;
+	    *) oldobjs="$oldobjs $obj" ;;
+	    esac
+	  done
+	fi
+
+	eval cmds=\"$old_archive_cmds\"
+
+	if len=`expr "X$cmds" : ".*"` &&
+	     test "$len" -le "$max_cmd_len" || test "$max_cmd_len" -le -1; then
+	  cmds=$old_archive_cmds
+	else
+	  # the command line is too long to link in one step, link in parts
+	  $echo "using piecewise archive linking..."
+	  save_RANLIB=$RANLIB
+	  RANLIB=:
+	  objlist=
+	  concat_cmds=
+	  save_oldobjs=$oldobjs
+
+	  # Is there a better way of finding the last object in the list?
+	  for obj in $save_oldobjs
+	  do
+	    last_oldobj=$obj
+	  done
+	  for obj in $save_oldobjs
+	  do
+	    oldobjs="$objlist $obj"
+	    objlist="$objlist $obj"
+	    eval test_cmds=\"$old_archive_cmds\"
+	    if len=`expr "X$test_cmds" : ".*" 2>/dev/null` &&
+	       test "$len" -le "$max_cmd_len"; then
+	      :
+	    else
+	      # the above command should be used before it gets too long
+	      oldobjs=$objlist
+	      if test "$obj" = "$last_oldobj" ; then
+	        RANLIB=$save_RANLIB
+	      fi
+	      test -z "$concat_cmds" || concat_cmds=$concat_cmds~
+	      eval concat_cmds=\"\${concat_cmds}$old_archive_cmds\"
+	      objlist=
+	    fi
+	  done
+	  RANLIB=$save_RANLIB
+	  oldobjs=$objlist
+	  if test "X$oldobjs" = "X" ; then
+	    eval cmds=\"\$concat_cmds\"
+	  else
+	    eval cmds=\"\$concat_cmds~\$old_archive_cmds\"
+	  fi
+	fi
+      fi
+      save_ifs="$IFS"; IFS='~'
+      for cmd in $cmds; do
+        eval cmd=\"$cmd\"
+	IFS="$save_ifs"
+	$show "$cmd"
+	$run eval "$cmd" || exit $?
+      done
+      IFS="$save_ifs"
+    done
+
+    if test -n "$generated"; then
+      $show "${rm}r$generated"
+      $run ${rm}r$generated
+    fi
+
+    # Now create the libtool archive.
+    case $output in
+    *.la)
+      old_library=
+      test "$build_old_libs" = yes && old_library="$libname.$libext"
+      $show "creating $output"
+
+      # Preserve any variables that may affect compiler behavior
+      for var in $variables_saved_for_relink; do
+	if eval test -z \"\${$var+set}\"; then
+	  relink_command="{ test -z \"\${$var+set}\" || unset $var || { $var=; export $var; }; }; $relink_command"
+	elif eval var_value=\$$var; test -z "$var_value"; then
+	  relink_command="$var=; export $var; $relink_command"
+	else
+	  var_value=`$echo "X$var_value" | $Xsed -e "$sed_quote_subst"`
+	  relink_command="$var=\"$var_value\"; export $var; $relink_command"
+	fi
+      done
+      # Quote the link command for shipping.
+      relink_command="(cd `pwd`; $SHELL $progpath $preserve_args --mode=relink $libtool_args @inst_prefix_dir@)"
+      relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"`
+      if test "$hardcode_automatic" = yes ; then
+	relink_command=
+      fi
+
+
+      # Only create the output if not a dry run.
+      if test -z "$run"; then
+	for installed in no yes; do
+	  if test "$installed" = yes; then
+	    if test -z "$install_libdir"; then
+	      break
+	    fi
+	    output="$output_objdir/$outputname"i
+	    # Replace all uninstalled libtool libraries with the installed ones
+	    newdependency_libs=
+	    for deplib in $dependency_libs; do
+	      case $deplib in
+	      *.la)
+		name=`$echo "X$deplib" | $Xsed -e 's%^.*/%%'`
+		eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib`
+		if test -z "$libdir"; then
+		  $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2
+		  exit $EXIT_FAILURE
+		fi
+		newdependency_libs="$newdependency_libs $libdir/$name"
+		;;
+	      *) newdependency_libs="$newdependency_libs $deplib" ;;
+	      esac
+	    done
+	    dependency_libs="$newdependency_libs"
+	    newdlfiles=
+	    for lib in $dlfiles; do
+	      name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+	      eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
+	      if test -z "$libdir"; then
+		$echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+		exit $EXIT_FAILURE
+	      fi
+	      newdlfiles="$newdlfiles $libdir/$name"
+	    done
+	    dlfiles="$newdlfiles"
+	    newdlprefiles=
+	    for lib in $dlprefiles; do
+	      name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'`
+	      eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib`
+	      if test -z "$libdir"; then
+		$echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+		exit $EXIT_FAILURE
+	      fi
+	      newdlprefiles="$newdlprefiles $libdir/$name"
+	    done
+	    dlprefiles="$newdlprefiles"
+	  else
+	    newdlfiles=
+	    for lib in $dlfiles; do
+	      case $lib in
+		[\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;;
+		*) abs=`pwd`"/$lib" ;;
+	      esac
+	      newdlfiles="$newdlfiles $abs"
+	    done
+	    dlfiles="$newdlfiles"
+	    newdlprefiles=
+	    for lib in $dlprefiles; do
+	      case $lib in
+		[\\/]* | [A-Za-z]:[\\/]*) abs="$lib" ;;
+		*) abs=`pwd`"/$lib" ;;
+	      esac
+	      newdlprefiles="$newdlprefiles $abs"
+	    done
+	    dlprefiles="$newdlprefiles"
+	  fi
+	  $rm $output
+	  # place dlname in correct position for cygwin
+	  tdlname=$dlname
+	  case $host,$output,$installed,$module,$dlname in
+	    *cygwin*,*lai,yes,no,*.dll | *mingw*,*lai,yes,no,*.dll) tdlname=../bin/$dlname ;;
+	  esac
+	  $echo > $output "\
+# $outputname - a libtool library file
+# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP
+#
+# Please DO NOT delete this file!
+# It is necessary for linking the library.
+
+# The name that we can dlopen(3).
+dlname='$tdlname'
+
+# Names of this library.
+library_names='$library_names'
+
+# The name of the static archive.
+old_library='$old_library'
+
+# Libraries that this one depends upon.
+dependency_libs='$dependency_libs'
+
+# Version information for $libname.
+current=$current
+age=$age
+revision=$revision
+
+# Is this an already installed library?
+installed=$installed
+
+# Should we warn about portability when linking against -modules?
+shouldnotlink=$module
+
+# Files to dlopen/dlpreopen
+dlopen='$dlfiles'
+dlpreopen='$dlprefiles'
+
+# Directory that this library needs to be installed in:
+libdir='$install_libdir'"
+	  if test "$installed" = no && test "$need_relink" = yes; then
+	    $echo >> $output "\
+relink_command=\"$relink_command\""
+	  fi
+	done
+      fi
+
+      # Do a symbolic link so that the libtool archive can be found in
+      # LD_LIBRARY_PATH before the program is installed.
+      $show "(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)"
+      $run eval '(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)' || exit $?
+      ;;
+    esac
+    exit $EXIT_SUCCESS
+    ;;
+
+  # libtool install mode
+  install)
+    modename="$modename: install"
+
+    # There may be an optional sh(1) argument at the beginning of
+    # install_prog (especially on Windows NT).
+    if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh ||
+       # Allow the use of GNU shtool's install command.
+       $echo "X$nonopt" | grep shtool > /dev/null; then
+      # Aesthetically quote it.
+      arg=`$echo "X$nonopt" | $Xsed -e "$sed_quote_subst"`
+      case $arg in
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	arg="\"$arg\""
+	;;
+      esac
+      install_prog="$arg "
+      arg="$1"
+      shift
+    else
+      install_prog=
+      arg=$nonopt
+    fi
+
+    # The real first argument should be the name of the installation program.
+    # Aesthetically quote it.
+    arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+    case $arg in
+    *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+      arg="\"$arg\""
+      ;;
+    esac
+    install_prog="$install_prog$arg"
+
+    # We need to accept at least all the BSD install flags.
+    dest=
+    files=
+    opts=
+    prev=
+    install_type=
+    isdir=no
+    stripme=
+    for arg
+    do
+      if test -n "$dest"; then
+	files="$files $dest"
+	dest=$arg
+	continue
+      fi
+
+      case $arg in
+      -d) isdir=yes ;;
+      -f) 
+      	case " $install_prog " in
+	*[\\\ /]cp\ *) ;;
+	*) prev=$arg ;;
+	esac
+	;;
+      -g | -m | -o) prev=$arg ;;
+      -s)
+	stripme=" -s"
+	continue
+	;;
+      -*)
+	;;
+      *)
+	# If the previous option needed an argument, then skip it.
+	if test -n "$prev"; then
+	  prev=
+	else
+	  dest=$arg
+	  continue
+	fi
+	;;
+      esac
+
+      # Aesthetically quote the argument.
+      arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`
+      case $arg in
+      *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \	]*|*]*|"")
+	arg="\"$arg\""
+	;;
+      esac
+      install_prog="$install_prog $arg"
+    done
+
+    if test -z "$install_prog"; then
+      $echo "$modename: you must specify an install program" 1>&2
+      $echo "$help" 1>&2
+      exit $EXIT_FAILURE
+    fi
+
+    if test -n "$prev"; then
+      $echo "$modename: the \`$prev' option requires an argument" 1>&2
+      $echo "$help" 1>&2
+      exit $EXIT_FAILURE
+    fi
+
+    if test -z "$files"; then
+      if test -z "$dest"; then
+	$echo "$modename: no file or destination specified" 1>&2
+      else
+	$echo "$modename: you must specify a destination" 1>&2
+      fi
+      $echo "$help" 1>&2
+      exit $EXIT_FAILURE
+    fi
+
+    # Strip any trailing slash from the destination.
+    dest=`$echo "X$dest" | $Xsed -e 's%/$%%'`
+
+    # Check to see that the destination is a directory.
+    test -d "$dest" && isdir=yes
+    if test "$isdir" = yes; then
+      destdir="$dest"
+      destname=
+    else
+      destdir=`$echo "X$dest" | $Xsed -e 's%/[^/]*$%%'`
+      test "X$destdir" = "X$dest" && destdir=.
+      destname=`$echo "X$dest" | $Xsed -e 's%^.*/%%'`
+
+      # Not a directory, so check to see that there is only one file specified.
+      set dummy $files
+      if test "$#" -gt 2; then
+	$echo "$modename: \`$dest' is not a directory" 1>&2
+	$echo "$help" 1>&2
+	exit $EXIT_FAILURE
+      fi
+    fi
+    case $destdir in
+    [\\/]* | [A-Za-z]:[\\/]*) ;;
+    *)
+      for file in $files; do
+	case $file in
+	*.lo) ;;
+	*)
+	  $echo "$modename: \`$destdir' must be an absolute directory name" 1>&2
+	  $echo "$help" 1>&2
+	  exit $EXIT_FAILURE
+	  ;;
+	esac
+      done
+      ;;
+    esac
+
+    # This variable tells wrapper scripts just to set variables rather
+    # than running their programs.
+    libtool_install_magic="$magic"
+
+    staticlibs=
+    future_libdirs=
+    current_libdirs=
+    for file in $files; do
+
+      # Do each installation.
+      case $file in
+      *.$libext)
+	# Do the static libraries later.
+	staticlibs="$staticlibs $file"
+	;;
+
+      *.la)
+	# Check to see that this really is a libtool archive.
+	if (${SED} -e '2q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+	else
+	  $echo "$modename: \`$file' is not a valid libtool archive" 1>&2
+	  $echo "$help" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+
+	library_names=
+	old_library=
+	relink_command=
+	# If there is no directory component, then add one.
+	case $file in
+	*/* | *\\*) . $file ;;
+	*) . ./$file ;;
+	esac
+
+	# Add the libdir to current_libdirs if it is the destination.
+	if test "X$destdir" = "X$libdir"; then
+	  case "$current_libdirs " in
+	  *" $libdir "*) ;;
+	  *) current_libdirs="$current_libdirs $libdir" ;;
+	  esac
+	else
+	  # Note the libdir as a future libdir.
+	  case "$future_libdirs " in
+	  *" $libdir "*) ;;
+	  *) future_libdirs="$future_libdirs $libdir" ;;
+	  esac
+	fi
+
+	dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`/
+	test "X$dir" = "X$file/" && dir=
+	dir="$dir$objdir"
+
+	if test -n "$relink_command"; then
+	  # Determine the prefix the user has applied to our future dir.
+	  inst_prefix_dir=`$echo "$destdir" | $SED "s%$libdir\$%%"`
+
+	  # Don't allow the user to place us outside of our expected
+	  # location b/c this prevents finding dependent libraries that
+	  # are installed to the same prefix.
+	  # At present, this check doesn't affect windows .dll's that
+	  # are installed into $libdir/../bin (currently, that works fine)
+	  # but it's something to keep an eye on.
+	  if test "$inst_prefix_dir" = "$destdir"; then
+	    $echo "$modename: error: cannot install \`$file' to a directory not ending in $libdir" 1>&2
+	    exit $EXIT_FAILURE
+	  fi
+
+	  if test -n "$inst_prefix_dir"; then
+	    # Stick the inst_prefix_dir data into the link command.
+	    relink_command=`$echo "$relink_command" | $SED "s%@inst_prefix_dir@%-inst-prefix-dir $inst_prefix_dir%"`
+	  else
+	    relink_command=`$echo "$relink_command" | $SED "s%@inst_prefix_dir@%%"`
+	  fi
+
+	  $echo "$modename: warning: relinking \`$file'" 1>&2
+	  $show "$relink_command"
+	  if $run eval "$relink_command"; then :
+	  else
+	    $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2
+	    exit $EXIT_FAILURE
+	  fi
+	fi
+
+	# See the names of the shared library.
+	set dummy $library_names
+	if test -n "$2"; then
+	  realname="$2"
+	  shift
+	  shift
+
+	  srcname="$realname"
+	  test -n "$relink_command" && srcname="$realname"T
+
+	  # Install the shared library and build the symlinks.
+	  $show "$install_prog $dir/$srcname $destdir/$realname"
+	  $run eval "$install_prog $dir/$srcname $destdir/$realname" || exit $?
+	  if test -n "$stripme" && test -n "$striplib"; then
+	    $show "$striplib $destdir/$realname"
+	    $run eval "$striplib $destdir/$realname" || exit $?
+	  fi
+
+	  if test "$#" -gt 0; then
+	    # Delete the old symlinks, and create new ones.
+	    # Try `ln -sf' first, because the `ln' binary might depend on
+	    # the symlink we replace!  Solaris /bin/ln does not understand -f,
+	    # so we also need to try rm && ln -s.
+	    for linkname
+	    do
+	      if test "$linkname" != "$realname"; then
+                $show "(cd $destdir && { $LN_S -f $realname $linkname || { $rm $linkname && $LN_S $realname $linkname; }; })"
+                $run eval "(cd $destdir && { $LN_S -f $realname $linkname || { $rm $linkname && $LN_S $realname $linkname; }; })"
+	      fi
+	    done
+	  fi
+
+	  # Do each command in the postinstall commands.
+	  lib="$destdir/$realname"
+	  cmds=$postinstall_cmds
+	  save_ifs="$IFS"; IFS='~'
+	  for cmd in $cmds; do
+	    IFS="$save_ifs"
+	    eval cmd=\"$cmd\"
+	    $show "$cmd"
+	    $run eval "$cmd" || {
+	      lt_exit=$?
+
+	      # Restore the uninstalled library and exit
+	      if test "$mode" = relink; then
+		$run eval '(cd $output_objdir && $rm ${realname}T && $mv ${realname}U $realname)'
+	      fi
+
+	      exit $lt_exit
+	    }
+	  done
+	  IFS="$save_ifs"
+	fi
+
+	# Install the pseudo-library for information purposes.
+	name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+	instname="$dir/$name"i
+	$show "$install_prog $instname $destdir/$name"
+	$run eval "$install_prog $instname $destdir/$name" || exit $?
+
+	# Maybe install the static library, too.
+	test -n "$old_library" && staticlibs="$staticlibs $dir/$old_library"
+	;;
+
+      *.lo)
+	# Install (i.e. copy) a libtool object.
+
+	# Figure out destination file name, if it wasn't already specified.
+	if test -n "$destname"; then
+	  destfile="$destdir/$destname"
+	else
+	  destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+	  destfile="$destdir/$destfile"
+	fi
+
+	# Deduce the name of the destination old-style object file.
+	case $destfile in
+	*.lo)
+	  staticdest=`$echo "X$destfile" | $Xsed -e "$lo2o"`
+	  ;;
+	*.$objext)
+	  staticdest="$destfile"
+	  destfile=
+	  ;;
+	*)
+	  $echo "$modename: cannot copy a libtool object to \`$destfile'" 1>&2
+	  $echo "$help" 1>&2
+	  exit $EXIT_FAILURE
+	  ;;
+	esac
+
+	# Install the libtool object if requested.
+	if test -n "$destfile"; then
+	  $show "$install_prog $file $destfile"
+	  $run eval "$install_prog $file $destfile" || exit $?
+	fi
+
+	# Install the old object if enabled.
+	if test "$build_old_libs" = yes; then
+	  # Deduce the name of the old-style object file.
+	  staticobj=`$echo "X$file" | $Xsed -e "$lo2o"`
+
+	  $show "$install_prog $staticobj $staticdest"
+	  $run eval "$install_prog \$staticobj \$staticdest" || exit $?
+	fi
+	exit $EXIT_SUCCESS
+	;;
+
+      *)
+	# Figure out destination file name, if it wasn't already specified.
+	if test -n "$destname"; then
+	  destfile="$destdir/$destname"
+	else
+	  destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+	  destfile="$destdir/$destfile"
+	fi
+
+	# If the file is missing, and there is a .exe on the end, strip it
+	# because it is most likely a libtool script we actually want to
+	# install
+	stripped_ext=""
+	case $file in
+	  *.exe)
+	    if test ! -f "$file"; then
+	      file=`$echo $file|${SED} 's,.exe$,,'`
+	      stripped_ext=".exe"
+	    fi
+	    ;;
+	esac
+
+	# Do a test to see if this is really a libtool program.
+	case $host in
+	*cygwin*|*mingw*)
+	    wrapper=`$echo $file | ${SED} -e 's,.exe$,,'`
+	    ;;
+	*)
+	    wrapper=$file
+	    ;;
+	esac
+	if (${SED} -e '4q' $wrapper | grep "^# Generated by .*$PACKAGE")>/dev/null 2>&1; then
+	  notinst_deplibs=
+	  relink_command=
+
+	  # Note that it is not necessary on cygwin/mingw to append a dot to
+	  # foo even if both foo and FILE.exe exist: automatic-append-.exe
+	  # behavior happens only for exec(3), not for open(2)!  Also, sourcing
+	  # `FILE.' does not work on cygwin managed mounts.
+	  #
+	  # If there is no directory component, then add one.
+	  case $wrapper in
+	  */* | *\\*) . ${wrapper} ;;
+	  *) . ./${wrapper} ;;
+	  esac
+
+	  # Check the variables that should have been set.
+	  if test -z "$notinst_deplibs"; then
+	    $echo "$modename: invalid libtool wrapper script \`$wrapper'" 1>&2
+	    exit $EXIT_FAILURE
+	  fi
+
+	  finalize=yes
+	  for lib in $notinst_deplibs; do
+	    # Check to see that each library is installed.
+	    libdir=
+	    if test -f "$lib"; then
+	      # If there is no directory component, then add one.
+	      case $lib in
+	      */* | *\\*) . $lib ;;
+	      *) . ./$lib ;;
+	      esac
+	    fi
+	    libfile="$libdir/"`$echo "X$lib" | $Xsed -e 's%^.*/%%g'` ### testsuite: skip nested quoting test
+	    if test -n "$libdir" && test ! -f "$libfile"; then
+	      $echo "$modename: warning: \`$lib' has not been installed in \`$libdir'" 1>&2
+	      finalize=no
+	    fi
+	  done
+
+	  relink_command=
+	  # Note that it is not necessary on cygwin/mingw to append a dot to
+	  # foo even if both foo and FILE.exe exist: automatic-append-.exe
+	  # behavior happens only for exec(3), not for open(2)!  Also, sourcing
+	  # `FILE.' does not work on cygwin managed mounts.
+	  #
+	  # If there is no directory component, then add one.
+	  case $wrapper in
+	  */* | *\\*) . ${wrapper} ;;
+	  *) . ./${wrapper} ;;
+	  esac
+
+	  outputname=
+	  if test "$fast_install" = no && test -n "$relink_command"; then
+	    if test "$finalize" = yes && test -z "$run"; then
+	      tmpdir=`func_mktempdir`
+	      file=`$echo "X$file$stripped_ext" | $Xsed -e 's%^.*/%%'`
+	      outputname="$tmpdir/$file"
+	      # Replace the output file specification.
+	      relink_command=`$echo "X$relink_command" | $Xsed -e 's%@OUTPUT@%'"$outputname"'%g'`
+
+	      $show "$relink_command"
+	      if $run eval "$relink_command"; then :
+	      else
+		$echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2
+		${rm}r "$tmpdir"
+		continue
+	      fi
+	      file="$outputname"
+	    else
+	      $echo "$modename: warning: cannot relink \`$file'" 1>&2
+	    fi
+	  else
+	    # Install the binary that we compiled earlier.
+	    file=`$echo "X$file$stripped_ext" | $Xsed -e "s%\([^/]*\)$%$objdir/\1%"`
+	  fi
+	fi
+
+	# remove .exe since cygwin /usr/bin/install will append another
+	# one anyway 
+	case $install_prog,$host in
+	*/usr/bin/install*,*cygwin*)
+	  case $file:$destfile in
+	  *.exe:*.exe)
+	    # this is ok
+	    ;;
+	  *.exe:*)
+	    destfile=$destfile.exe
+	    ;;
+	  *:*.exe)
+	    destfile=`$echo $destfile | ${SED} -e 's,.exe$,,'`
+	    ;;
+	  esac
+	  ;;
+	esac
+	$show "$install_prog$stripme $file $destfile"
+	$run eval "$install_prog\$stripme \$file \$destfile" || exit $?
+	test -n "$outputname" && ${rm}r "$tmpdir"
+	;;
+      esac
+    done
+
+    for file in $staticlibs; do
+      name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+
+      # Set up the ranlib parameters.
+      oldlib="$destdir/$name"
+
+      $show "$install_prog $file $oldlib"
+      $run eval "$install_prog \$file \$oldlib" || exit $?
+
+      if test -n "$stripme" && test -n "$old_striplib"; then
+	$show "$old_striplib $oldlib"
+	$run eval "$old_striplib $oldlib" || exit $?
+      fi
+
+      # Do each command in the postinstall commands.
+      cmds=$old_postinstall_cmds
+      save_ifs="$IFS"; IFS='~'
+      for cmd in $cmds; do
+	IFS="$save_ifs"
+	eval cmd=\"$cmd\"
+	$show "$cmd"
+	$run eval "$cmd" || exit $?
+      done
+      IFS="$save_ifs"
+    done
+
+    if test -n "$future_libdirs"; then
+      $echo "$modename: warning: remember to run \`$progname --finish$future_libdirs'" 1>&2
+    fi
+
+    if test -n "$current_libdirs"; then
+      # Maybe just do a dry run.
+      test -n "$run" && current_libdirs=" -n$current_libdirs"
+      exec_cmd='$SHELL $progpath $preserve_args --finish$current_libdirs'
+    else
+      exit $EXIT_SUCCESS
+    fi
+    ;;
+
+  # libtool finish mode
+  finish)
+    modename="$modename: finish"
+    libdirs="$nonopt"
+    admincmds=
+
+    if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then
+      for dir
+      do
+	libdirs="$libdirs $dir"
+      done
+
+      for libdir in $libdirs; do
+	if test -n "$finish_cmds"; then
+	  # Do each command in the finish commands.
+	  cmds=$finish_cmds
+	  save_ifs="$IFS"; IFS='~'
+	  for cmd in $cmds; do
+	    IFS="$save_ifs"
+	    eval cmd=\"$cmd\"
+	    $show "$cmd"
+	    $run eval "$cmd" || admincmds="$admincmds
+       $cmd"
+	  done
+	  IFS="$save_ifs"
+	fi
+	if test -n "$finish_eval"; then
+	  # Do the single finish_eval.
+	  eval cmds=\"$finish_eval\"
+	  $run eval "$cmds" || admincmds="$admincmds
+       $cmds"
+	fi
+      done
+    fi
+
+    # Exit here if they wanted silent mode.
+    test "$show" = : && exit $EXIT_SUCCESS
+
+    $echo "X----------------------------------------------------------------------" | $Xsed
+    $echo "Libraries have been installed in:"
+    for libdir in $libdirs; do
+      $echo "   $libdir"
+    done
+    $echo
+    $echo "If you ever happen to want to link against installed libraries"
+    $echo "in a given directory, LIBDIR, you must either use libtool, and"
+    $echo "specify the full pathname of the library, or use the \`-LLIBDIR'"
+    $echo "flag during linking and do at least one of the following:"
+    if test -n "$shlibpath_var"; then
+      $echo "   - add LIBDIR to the \`$shlibpath_var' environment variable"
+      $echo "     during execution"
+    fi
+    if test -n "$runpath_var"; then
+      $echo "   - add LIBDIR to the \`$runpath_var' environment variable"
+      $echo "     during linking"
+    fi
+    if test -n "$hardcode_libdir_flag_spec"; then
+      libdir=LIBDIR
+      eval flag=\"$hardcode_libdir_flag_spec\"
+
+      $echo "   - use the \`$flag' linker flag"
+    fi
+    if test -n "$admincmds"; then
+      $echo "   - have your system administrator run these commands:$admincmds"
+    fi
+    if test -f /etc/ld.so.conf; then
+      $echo "   - have your system administrator add LIBDIR to \`/etc/ld.so.conf'"
+    fi
+    $echo
+    $echo "See any operating system documentation about shared libraries for"
+    $echo "more information, such as the ld(1) and ld.so(8) manual pages."
+    $echo "X----------------------------------------------------------------------" | $Xsed
+    exit $EXIT_SUCCESS
+    ;;
+
+  # libtool execute mode
+  execute)
+    modename="$modename: execute"
+
+    # The first argument is the command name.
+    cmd="$nonopt"
+    if test -z "$cmd"; then
+      $echo "$modename: you must specify a COMMAND" 1>&2
+      $echo "$help"
+      exit $EXIT_FAILURE
+    fi
+
+    # Handle -dlopen flags immediately.
+    for file in $execute_dlfiles; do
+      if test ! -f "$file"; then
+	$echo "$modename: \`$file' is not a file" 1>&2
+	$echo "$help" 1>&2
+	exit $EXIT_FAILURE
+      fi
+
+      dir=
+      case $file in
+      *.la)
+	# Check to see that this really is a libtool archive.
+	if (${SED} -e '2q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then :
+	else
+	  $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2
+	  $echo "$help" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+
+	# Read the libtool library.
+	dlname=
+	library_names=
+
+	# If there is no directory component, then add one.
+	case $file in
+	*/* | *\\*) . $file ;;
+	*) . ./$file ;;
+	esac
+
+	# Skip this library if it cannot be dlopened.
+	if test -z "$dlname"; then
+	  # Warn if it was a shared library.
+	  test -n "$library_names" && $echo "$modename: warning: \`$file' was not linked with \`-export-dynamic'"
+	  continue
+	fi
+
+	dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
+	test "X$dir" = "X$file" && dir=.
+
+	if test -f "$dir/$objdir/$dlname"; then
+	  dir="$dir/$objdir"
+	else
+	  $echo "$modename: cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'" 1>&2
+	  exit $EXIT_FAILURE
+	fi
+	;;
+
+      *.lo)
+	# Just add the directory containing the .lo file.
+	dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
+	test "X$dir" = "X$file" && dir=.
+	;;
+
+      *)
+	$echo "$modename: warning \`-dlopen' is ignored for non-libtool libraries and objects" 1>&2
+	continue
+	;;
+      esac
+
+      # Get the absolute pathname.
+      absdir=`cd "$dir" && pwd`
+      test -n "$absdir" && dir="$absdir"
+
+      # Now add the directory to shlibpath_var.
+      if eval "test -z \"\$$shlibpath_var\""; then
+	eval "$shlibpath_var=\"\$dir\""
+      else
+	eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\""
+      fi
+    done
+
+    # This variable tells wrapper scripts just to set shlibpath_var
+    # rather than running their programs.
+    libtool_execute_magic="$magic"
+
+    # Check if any of the arguments is a wrapper script.
+    args=
+    for file
+    do
+      case $file in
+      -*) ;;
+      *)
+	# Do a test to see if this is really a libtool program.
+	if (${SED} -e '4q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+	  # If there is no directory component, then add one.
+	  case $file in
+	  */* | *\\*) . $file ;;
+	  *) . ./$file ;;
+	  esac
+
+	  # Transform arg to wrapped name.
+	  file="$progdir/$program"
+	fi
+	;;
+      esac
+      # Quote arguments (to preserve shell metacharacters).
+      file=`$echo "X$file" | $Xsed -e "$sed_quote_subst"`
+      args="$args \"$file\""
+    done
+
+    if test -z "$run"; then
+      if test -n "$shlibpath_var"; then
+	# Export the shlibpath_var.
+	eval "export $shlibpath_var"
+      fi
+
+      # Restore saved environment variables
+      if test "${save_LC_ALL+set}" = set; then
+	LC_ALL="$save_LC_ALL"; export LC_ALL
+      fi
+      if test "${save_LANG+set}" = set; then
+	LANG="$save_LANG"; export LANG
+      fi
+
+      # Now prepare to actually exec the command.
+      exec_cmd="\$cmd$args"
+    else
+      # Display what would be done.
+      if test -n "$shlibpath_var"; then
+	eval "\$echo \"\$shlibpath_var=\$$shlibpath_var\""
+	$echo "export $shlibpath_var"
+      fi
+      $echo "$cmd$args"
+      exit $EXIT_SUCCESS
+    fi
+    ;;
+
+  # libtool clean and uninstall mode
+  clean | uninstall)
+    modename="$modename: $mode"
+    rm="$nonopt"
+    files=
+    rmforce=
+    exit_status=0
+
+    # This variable tells wrapper scripts just to set variables rather
+    # than running their programs.
+    libtool_install_magic="$magic"
+
+    for arg
+    do
+      case $arg in
+      -f) rm="$rm $arg"; rmforce=yes ;;
+      -*) rm="$rm $arg" ;;
+      *) files="$files $arg" ;;
+      esac
+    done
+
+    if test -z "$rm"; then
+      $echo "$modename: you must specify an RM program" 1>&2
+      $echo "$help" 1>&2
+      exit $EXIT_FAILURE
+    fi
+
+    rmdirs=
+
+    origobjdir="$objdir"
+    for file in $files; do
+      dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`
+      if test "X$dir" = "X$file"; then
+	dir=.
+	objdir="$origobjdir"
+      else
+	objdir="$dir/$origobjdir"
+      fi
+      name=`$echo "X$file" | $Xsed -e 's%^.*/%%'`
+      test "$mode" = uninstall && objdir="$dir"
+
+      # Remember objdir for removal later, being careful to avoid duplicates
+      if test "$mode" = clean; then
+	case " $rmdirs " in
+	  *" $objdir "*) ;;
+	  *) rmdirs="$rmdirs $objdir" ;;
+	esac
+      fi
+
+      # Don't error if the file doesn't exist and rm -f was used.
+      if (test -L "$file") >/dev/null 2>&1 \
+	|| (test -h "$file") >/dev/null 2>&1 \
+	|| test -f "$file"; then
+	:
+      elif test -d "$file"; then
+	exit_status=1
+	continue
+      elif test "$rmforce" = yes; then
+	continue
+      fi
+
+      rmfiles="$file"
+
+      case $name in
+      *.la)
+	# Possibly a libtool archive, so verify it.
+	if (${SED} -e '2q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+	  . $dir/$name
+
+	  # Delete the libtool libraries and symlinks.
+	  for n in $library_names; do
+	    rmfiles="$rmfiles $objdir/$n"
+	  done
+	  test -n "$old_library" && rmfiles="$rmfiles $objdir/$old_library"
+
+	  case "$mode" in
+	  clean)
+	    case "  $library_names " in
+	    # "  " in the beginning catches empty $dlname
+	    *" $dlname "*) ;;
+	    *) rmfiles="$rmfiles $objdir/$dlname" ;;
+	    esac
+	     test -n "$libdir" && rmfiles="$rmfiles $objdir/$name $objdir/${name}i"
+	    ;;
+	  uninstall)
+	    if test -n "$library_names"; then
+	      # Do each command in the postuninstall commands.
+	      cmds=$postuninstall_cmds
+	      save_ifs="$IFS"; IFS='~'
+	      for cmd in $cmds; do
+		IFS="$save_ifs"
+		eval cmd=\"$cmd\"
+		$show "$cmd"
+		$run eval "$cmd"
+		if test "$?" -ne 0 && test "$rmforce" != yes; then
+		  exit_status=1
+		fi
+	      done
+	      IFS="$save_ifs"
+	    fi
+
+	    if test -n "$old_library"; then
+	      # Do each command in the old_postuninstall commands.
+	      cmds=$old_postuninstall_cmds
+	      save_ifs="$IFS"; IFS='~'
+	      for cmd in $cmds; do
+		IFS="$save_ifs"
+		eval cmd=\"$cmd\"
+		$show "$cmd"
+		$run eval "$cmd"
+		if test "$?" -ne 0 && test "$rmforce" != yes; then
+		  exit_status=1
+		fi
+	      done
+	      IFS="$save_ifs"
+	    fi
+	    # FIXME: should reinstall the best remaining shared library.
+	    ;;
+	  esac
+	fi
+	;;
+
+      *.lo)
+	# Possibly a libtool object, so verify it.
+	if (${SED} -e '2q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+
+	  # Read the .lo file
+	  . $dir/$name
+
+	  # Add PIC object to the list of files to remove.
+	  if test -n "$pic_object" \
+	     && test "$pic_object" != none; then
+	    rmfiles="$rmfiles $dir/$pic_object"
+	  fi
+
+	  # Add non-PIC object to the list of files to remove.
+	  if test -n "$non_pic_object" \
+	     && test "$non_pic_object" != none; then
+	    rmfiles="$rmfiles $dir/$non_pic_object"
+	  fi
+	fi
+	;;
+
+      *)
+	if test "$mode" = clean ; then
+	  noexename=$name
+	  case $file in
+	  *.exe)
+	    file=`$echo $file|${SED} 's,.exe$,,'`
+	    noexename=`$echo $name|${SED} 's,.exe$,,'`
+	    # $file with .exe has already been added to rmfiles,
+	    # add $file without .exe
+	    rmfiles="$rmfiles $file"
+	    ;;
+	  esac
+	  # Do a test to see if this is a libtool program.
+	  if (${SED} -e '4q' $file | grep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then
+	    relink_command=
+	    . $dir/$noexename
+
+	    # note $name still contains .exe if it was in $file originally
+	    # as does the version of $file that was added into $rmfiles
+	    rmfiles="$rmfiles $objdir/$name $objdir/${name}S.${objext}"
+	    if test "$fast_install" = yes && test -n "$relink_command"; then
+	      rmfiles="$rmfiles $objdir/lt-$name"
+	    fi
+	    if test "X$noexename" != "X$name" ; then
+	      rmfiles="$rmfiles $objdir/lt-${noexename}.c"
+	    fi
+	  fi
+	fi
+	;;
+      esac
+      $show "$rm $rmfiles"
+      $run $rm $rmfiles || exit_status=1
+    done
+    objdir="$origobjdir"
+
+    # Try to remove the ${objdir}s in the directories where we deleted files
+    for dir in $rmdirs; do
+      if test -d "$dir"; then
+	$show "rmdir $dir"
+	$run rmdir $dir >/dev/null 2>&1
+      fi
+    done
+
+    exit $exit_status
+    ;;
+
+  "")
+    $echo "$modename: you must specify a MODE" 1>&2
+    $echo "$generic_help" 1>&2
+    exit $EXIT_FAILURE
+    ;;
+  esac
+
+  if test -z "$exec_cmd"; then
+    $echo "$modename: invalid operation mode \`$mode'" 1>&2
+    $echo "$generic_help" 1>&2
+    exit $EXIT_FAILURE
+  fi
+fi # test -z "$show_help"
+
+if test -n "$exec_cmd"; then
+  eval exec $exec_cmd
+  exit $EXIT_FAILURE
+fi
+
+# We need to display help for each of the modes.
+case $mode in
+"") $echo \
+"Usage: $modename [OPTION]... [MODE-ARG]...
+
+Provide generalized library-building support services.
+
+    --config          show all configuration variables
+    --debug           enable verbose shell tracing
+-n, --dry-run         display commands without modifying any files
+    --features        display basic configuration information and exit
+    --finish          same as \`--mode=finish'
+    --help            display this help message and exit
+    --mode=MODE       use operation mode MODE [default=inferred from MODE-ARGS]
+    --quiet           same as \`--silent'
+    --silent          don't print informational messages
+    --tag=TAG         use configuration variables from tag TAG
+    --version         print version information
+
+MODE must be one of the following:
+
+      clean           remove files from the build directory
+      compile         compile a source file into a libtool object
+      execute         automatically set library path, then run a program
+      finish          complete the installation of libtool libraries
+      install         install libraries or executables
+      link            create a library or an executable
+      uninstall       remove libraries from an installed directory
+
+MODE-ARGS vary depending on the MODE.  Try \`$modename --help --mode=MODE' for
+a more detailed description of MODE.
+
+Report bugs to <bug-libtool@gnu.org>."
+  exit $EXIT_SUCCESS
+  ;;
+
+clean)
+  $echo \
+"Usage: $modename [OPTION]... --mode=clean RM [RM-OPTION]... FILE...
+
+Remove files from the build directory.
+
+RM is the name of the program to use to delete files associated with each FILE
+(typically \`/bin/rm').  RM-OPTIONS are options (such as \`-f') to be passed
+to RM.
+
+If FILE is a libtool library, object or program, all the files associated
+with it are deleted. Otherwise, only FILE itself is deleted using RM."
+  ;;
+
+compile)
+  $echo \
+"Usage: $modename [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE
+
+Compile a source file into a libtool library object.
+
+This mode accepts the following additional options:
+
+  -o OUTPUT-FILE    set the output file name to OUTPUT-FILE
+  -prefer-pic       try to building PIC objects only
+  -prefer-non-pic   try to building non-PIC objects only
+  -static           always build a \`.o' file suitable for static linking
+
+COMPILE-COMMAND is a command to be used in creating a \`standard' object file
+from the given SOURCEFILE.
+
+The output file name is determined by removing the directory component from
+SOURCEFILE, then substituting the C source code suffix \`.c' with the
+library object suffix, \`.lo'."
+  ;;
+
+execute)
+  $echo \
+"Usage: $modename [OPTION]... --mode=execute COMMAND [ARGS]...
+
+Automatically set library path, then run a program.
+
+This mode accepts the following additional options:
+
+  -dlopen FILE      add the directory containing FILE to the library path
+
+This mode sets the library path environment variable according to \`-dlopen'
+flags.
+
+If any of the ARGS are libtool executable wrappers, then they are translated
+into their corresponding uninstalled binary, and any of their required library
+directories are added to the library path.
+
+Then, COMMAND is executed, with ARGS as arguments."
+  ;;
+
+finish)
+  $echo \
+"Usage: $modename [OPTION]... --mode=finish [LIBDIR]...
+
+Complete the installation of libtool libraries.
+
+Each LIBDIR is a directory that contains libtool libraries.
+
+The commands that this mode executes may require superuser privileges.  Use
+the \`--dry-run' option if you just want to see what would be executed."
+  ;;
+
+install)
+  $echo \
+"Usage: $modename [OPTION]... --mode=install INSTALL-COMMAND...
+
+Install executables or libraries.
+
+INSTALL-COMMAND is the installation command.  The first component should be
+either the \`install' or \`cp' program.
+
+The rest of the components are interpreted as arguments to that command (only
+BSD-compatible install options are recognized)."
+  ;;
+
+link)
+  $echo \
+"Usage: $modename [OPTION]... --mode=link LINK-COMMAND...
+
+Link object files or libraries together to form another library, or to
+create an executable program.
+
+LINK-COMMAND is a command using the C compiler that you would use to create
+a program from several object files.
+
+The following components of LINK-COMMAND are treated specially:
+
+  -all-static       do not do any dynamic linking at all
+  -avoid-version    do not add a version suffix if possible
+  -dlopen FILE      \`-dlpreopen' FILE if it cannot be dlopened at runtime
+  -dlpreopen FILE   link in FILE and add its symbols to lt_preloaded_symbols
+  -export-dynamic   allow symbols from OUTPUT-FILE to be resolved with dlsym(3)
+  -export-symbols SYMFILE
+		    try to export only the symbols listed in SYMFILE
+  -export-symbols-regex REGEX
+		    try to export only the symbols matching REGEX
+  -LLIBDIR          search LIBDIR for required installed libraries
+  -lNAME            OUTPUT-FILE requires the installed library libNAME
+  -module           build a library that can dlopened
+  -no-fast-install  disable the fast-install mode
+  -no-install       link a not-installable executable
+  -no-undefined     declare that a library does not refer to external symbols
+  -o OUTPUT-FILE    create OUTPUT-FILE from the specified objects
+  -objectlist FILE  Use a list of object files found in FILE to specify objects
+  -precious-files-regex REGEX
+                    don't remove output files matching REGEX
+  -release RELEASE  specify package release information
+  -rpath LIBDIR     the created library will eventually be installed in LIBDIR
+  -R[ ]LIBDIR       add LIBDIR to the runtime path of programs and libraries
+  -static           do not do any dynamic linking of libtool libraries
+  -version-info CURRENT[:REVISION[:AGE]]
+		    specify library version info [each variable defaults to 0]
+
+All other options (arguments beginning with \`-') are ignored.
+
+Every other argument is treated as a filename.  Files ending in \`.la' are
+treated as uninstalled libtool libraries, other files are standard or library
+object files.
+
+If the OUTPUT-FILE ends in \`.la', then a libtool library is created,
+only library objects (\`.lo' files) may be specified, and \`-rpath' is
+required, except when creating a convenience library.
+
+If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created
+using \`ar' and \`ranlib', or on Windows using \`lib'.
+
+If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file
+is created, otherwise an executable program is created."
+  ;;
+
+uninstall)
+  $echo \
+"Usage: $modename [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE...
+
+Remove libraries from an installation directory.
+
+RM is the name of the program to use to delete files associated with each FILE
+(typically \`/bin/rm').  RM-OPTIONS are options (such as \`-f') to be passed
+to RM.
+
+If FILE is a libtool library, all the files associated with it are deleted.
+Otherwise, only FILE itself is deleted using RM."
+  ;;
+
+*)
+  $echo "$modename: invalid operation mode \`$mode'" 1>&2
+  $echo "$help" 1>&2
+  exit $EXIT_FAILURE
+  ;;
+esac
+
+$echo
+$echo "Try \`$modename --help' for more information about other modes."
+
+exit $?
+
+# The TAGs below are defined such that we never get into a situation
+# in which we disable both kinds of libraries.  Given conflicting
+# choices, we go for a static library, that is the most portable,
+# since we can't tell whether shared libraries were disabled because
+# the user asked for that or because the platform doesn't support
+# them.  This is particularly important on AIX, because we don't
+# support having both static and shared libraries enabled at the same
+# time on that platform, so we default to a shared-only configuration.
+# If a disable-shared tag is given, we'll fallback to a static-only
+# configuration.  But we'll never go from static-only to shared-only.
+
+# ### BEGIN LIBTOOL TAG CONFIG: disable-shared
+disable_libs=shared
+# ### END LIBTOOL TAG CONFIG: disable-shared
+
+# ### BEGIN LIBTOOL TAG CONFIG: disable-static
+disable_libs=static
+# ### END LIBTOOL TAG CONFIG: disable-static
+
+# Local Variables:
+# mode:shell-script
+# sh-indentation:2
+# End:
diff --git a/final/autoconf/m4/build_exeext.m4 b/final/autoconf/m4/build_exeext.m4
new file mode 100644
index 00000000000..1bdecc1ba57
--- /dev/null
+++ b/final/autoconf/m4/build_exeext.m4
@@ -0,0 +1,42 @@
+# Check for the extension used for executables on build platform.
+# This is necessary for cross-compiling where the build platform
+# may differ from the host platform.
+AC_DEFUN([AC_BUILD_EXEEXT],
+[
+AC_MSG_CHECKING([for executable suffix on build platform])
+AC_CACHE_VAL(ac_cv_build_exeext,
+[if test "$CYGWIN" = yes || test "$MINGW32" = yes; then
+  ac_cv_build_exeext=.exe
+else
+  ac_build_prefix=${build_alias}-
+
+  AC_CHECK_PROG(BUILD_CC, ${ac_build_prefix}gcc, ${ac_build_prefix}gcc)
+  if test -z "$BUILD_CC"; then
+     AC_CHECK_PROG(BUILD_CC, gcc, gcc)
+     if test -z "$BUILD_CC"; then
+       AC_CHECK_PROG(BUILD_CC, cc, cc, , , /usr/ucb/cc)
+     fi
+  fi
+  test -z "$BUILD_CC" && AC_MSG_ERROR([no acceptable cc found in \$PATH])
+  ac_build_link='${BUILD_CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&AS_MESSAGE_LOG_FD'
+  rm -f conftest*
+  echo 'int main () { return 0; }' > conftest.$ac_ext
+  ac_cv_build_exeext=
+  if AC_TRY_EVAL(ac_build_link); then
+    for file in conftest.*; do
+      case $file in
+      *.c | *.o | *.obj | *.dSYM) ;;
+      *) ac_cv_build_exeext=`echo $file | sed -e s/conftest//` ;;
+      esac
+    done
+  else
+    AC_MSG_ERROR([installation or configuration problem: compiler cannot create executables.])
+  fi
+  rm -f conftest*
+  test x"${ac_cv_build_exeext}" = x && ac_cv_build_exeext=blank
+fi])
+BUILD_EXEEXT=""
+test x"${ac_cv_build_exeext}" != xblank && BUILD_EXEEXT=${ac_cv_build_exeext}
+AC_MSG_RESULT(${ac_cv_build_exeext})
+ac_build_exeext=$BUILD_EXEEXT
+AC_SUBST(BUILD_EXEEXT)])
diff --git a/final/autoconf/m4/c_printf_a.m4 b/final/autoconf/m4/c_printf_a.m4
new file mode 100644
index 00000000000..61bac8c9dd1
--- /dev/null
+++ b/final/autoconf/m4/c_printf_a.m4
@@ -0,0 +1,31 @@
+#
+# Determine if the printf() functions have the %a format character.
+# This is modified from:
+# http://www.gnu.org/software/ac-archive/htmldoc/ac_cxx_have_ext_slist.html
+AC_DEFUN([AC_C_PRINTF_A],
+[AC_CACHE_CHECK([if printf has the %a format character],[llvm_cv_c_printf_a],
+[AC_LANG_PUSH([C])
+ AC_RUN_IFELSE([
+  AC_LANG_PROGRAM([[
+#include <stdio.h>
+#include <stdlib.h>
+]],[[
+volatile double A, B;
+char Buffer[100];
+A = 1;
+A /= 10.0;
+sprintf(Buffer, "%a", A);
+B = atof(Buffer);
+if (A != B)
+  return (1);
+if (A != 0x1.999999999999ap-4)
+  return (1);
+return (0);]])],
+  llvm_cv_c_printf_a=yes,
+  llvmac_cv_c_printf_a=no,
+  llvmac_cv_c_printf_a=no)
+ AC_LANG_POP([C])])
+ if test "$llvm_cv_c_printf_a" = "yes"; then
+   AC_DEFINE([HAVE_PRINTF_A],[1],[Define to have the %a format string])
+ fi
+])
diff --git a/final/autoconf/m4/check_gnu_make.m4 b/final/autoconf/m4/check_gnu_make.m4
new file mode 100644
index 00000000000..7355e1c85bb
--- /dev/null
+++ b/final/autoconf/m4/check_gnu_make.m4
@@ -0,0 +1,26 @@
+#
+# Check for GNU Make.  This is originally from
+# http://www.gnu.org/software/ac-archive/htmldoc/check_gnu_make.html
+#
+AC_DEFUN([AC_CHECK_GNU_MAKE],
+[AC_CACHE_CHECK([for GNU make],[llvm_cv_gnu_make_command],
+dnl Search all the common names for GNU make
+[llvm_cv_gnu_make_command=''
+ for a in "$MAKE" make gmake gnumake ; do
+  if test -z "$a" ; then continue ; fi ;
+  if  ( sh -c "$a --version" 2> /dev/null | grep GNU 2>&1 > /dev/null ) 
+  then
+   llvm_cv_gnu_make_command=$a ;
+   break;
+  fi
+ done])
+dnl If there was a GNU version, then set @ifGNUmake@ to the empty string, 
+dnl '#' otherwise
+ if test "x$llvm_cv_gnu_make_command" != "x"  ; then
+   ifGNUmake='' ;
+ else
+   ifGNUmake='#' ;
+   AC_MSG_RESULT("Not found");
+ fi
+ AC_SUBST(ifGNUmake)
+])
diff --git a/final/autoconf/m4/config_makefile.m4 b/final/autoconf/m4/config_makefile.m4
new file mode 100644
index 00000000000..b1eaffdcd85
--- /dev/null
+++ b/final/autoconf/m4/config_makefile.m4
@@ -0,0 +1,9 @@
+#
+# Configure a Makefile without clobbering it if it exists and is not out of
+# date.  This macro is unique to LLVM.
+#
+AC_DEFUN([AC_CONFIG_MAKEFILE],
+[AC_CONFIG_COMMANDS($1,
+  [${llvm_src}/autoconf/mkinstalldirs `dirname $1`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/$1 $1])
+])
diff --git a/final/autoconf/m4/config_project.m4 b/final/autoconf/m4/config_project.m4
new file mode 100644
index 00000000000..eea7faf165c
--- /dev/null
+++ b/final/autoconf/m4/config_project.m4
@@ -0,0 +1,14 @@
+#
+# Provide the arguments and other processing needed for an LLVM project
+#
+AC_DEFUN([LLVM_CONFIG_PROJECT],
+  [AC_ARG_WITH([llvmsrc],
+    AS_HELP_STRING([--with-llvmsrc],[Location of LLVM Source Code]),
+    [llvm_src="$withval"],[llvm_src="]$1["])
+  AC_SUBST(LLVM_SRC,$llvm_src)
+  AC_ARG_WITH([llvmobj],
+    AS_HELP_STRING([--with-llvmobj],[Location of LLVM Object Code]),
+    [llvm_obj="$withval"],[llvm_obj="]$2["])
+  AC_SUBST(LLVM_OBJ,$llvm_obj)
+  AC_CONFIG_COMMANDS([setup],,[llvm_src="${LLVM_SRC}"])
+])
diff --git a/final/autoconf/m4/cxx_flag_check.m4 b/final/autoconf/m4/cxx_flag_check.m4
new file mode 100644
index 00000000000..ab09f2af5cf
--- /dev/null
+++ b/final/autoconf/m4/cxx_flag_check.m4
@@ -0,0 +1,2 @@
+AC_DEFUN([CXX_FLAG_CHECK],
+  [AC_SUBST($1, `$CXX $2 -fsyntax-only -xc /dev/null 2>/dev/null && echo $2`)])
diff --git a/final/autoconf/m4/find_std_program.m4 b/final/autoconf/m4/find_std_program.m4
new file mode 100644
index 00000000000..c789df8e641
--- /dev/null
+++ b/final/autoconf/m4/find_std_program.m4
@@ -0,0 +1,118 @@
+dnl Check for a standard program that has a bin, include and lib directory
+dnl 
+dnl Parameters:
+dnl   $1 - prefix directory to check
+dnl   $2 - program name to check
+dnl   $3 - header file to check 
+dnl   $4 - library file to check 
+AC_DEFUN([CHECK_STD_PROGRAM],
+[m4_define([allcapsname],translit($2,a-z,A-Z))
+if test -n "$1" -a -d "$1" -a -n "$2" -a -d "$1/bin" -a -x "$1/bin/$2" ; then
+  AC_SUBST([USE_]allcapsname(),["USE_]allcapsname()[ = 1"])
+  AC_SUBST(allcapsname(),[$1/bin/$2])
+  AC_SUBST(allcapsname()[_BIN],[$1/bin])
+  AC_SUBST(allcapsname()[_DIR],[$1])
+  if test -n "$3" -a -d "$1/include" -a -f "$1/include/$3" ; then 
+    AC_SUBST(allcapsname()[_INC],[$1/include])
+  fi
+  if test -n "$4" -a -d "$1/lib" -a -f "$1/lib/$4" ; then
+    AC_SUBST(allcapsname()[_LIB],[$1/lib])
+  fi
+fi
+])
+
+dnl Find a program via --with options, in the path, or well known places
+dnl
+dnl Parameters:
+dnl   $1 - program's executable name
+dnl   $2 - header file name to check (optional)
+dnl   $3 - library file name to check (optional)
+dnl   $4 - alternate (long) name for the program
+AC_DEFUN([FIND_STD_PROGRAM],
+[m4_define([allcapsname],translit($1,a-z,A-Z))
+m4_define([stdprog_long_name],ifelse($4,,translit($1,[ !@#$%^&*()-+={}[]:;"',./?],[-]),translit($4,[ !@#$%^&*()-+={}[]:;"',./?],[-])))
+AC_MSG_CHECKING([for ]stdprog_long_name()[ bin/lib/include locations])
+AC_ARG_WITH($1,
+  AS_HELP_STRING([--with-]stdprog_long_name()[=DIR],
+  [Specify that the ]stdprog_long_name()[ install prefix is DIR]),
+  $1[pfxdir=$withval],$1[pfxdir=nada])
+AC_ARG_WITH($1[-bin],
+  AS_HELP_STRING([--with-]stdprog_long_name()[-bin=DIR],
+  [Specify that the ]stdprog_long_name()[ binary is in DIR]),
+    $1[bindir=$withval],$1[bindir=nada])
+AC_ARG_WITH($1[-lib],
+  AS_HELP_STRING([--with-]stdprog_long_name()[-lib=DIR],
+  [Specify that ]stdprog_long_name()[ libraries are in DIR]),
+  $1[libdir=$withval],$1[libdir=nada])
+AC_ARG_WITH($1[-inc],
+  AS_HELP_STRING([--with-]stdprog_long_name()[-inc=DIR],
+  [Specify that the ]stdprog_long_name()[ includes are in DIR]),
+  $1[incdir=$withval],$1[incdir=nada])
+eval pfxval=\$\{$1pfxdir\}
+eval binval=\$\{$1bindir\}
+eval incval=\$\{$1incdir\}
+eval libval=\$\{$1libdir\}
+if test "${pfxval}" != "nada" ; then
+  CHECK_STD_PROGRAM(${pfxval},$1,$2,$3)
+elif test "${binval}" != "nada" ; then
+  if test "${libval}" != "nada" ; then
+    if test "${incval}" != "nada" ; then
+      if test -d "${binval}" ; then
+        if test -d "${incval}" ; then
+          if test -d "${libval}" ; then
+            AC_SUBST(allcapsname(),${binval}/$1)
+            AC_SUBST(allcapsname()[_BIN],${binval})
+            AC_SUBST(allcapsname()[_INC],${incval})
+            AC_SUBST(allcapsname()[_LIB],${libval})
+            AC_SUBST([USE_]allcapsname(),["USE_]allcapsname()[ = 1"])
+            AC_MSG_RESULT([found via --with options])
+          else
+            AC_MSG_RESULT([failed])
+            AC_MSG_ERROR([The --with-]$1[-libdir value must be a directory])
+          fi
+        else
+          AC_MSG_RESULT([failed])
+          AC_MSG_ERROR([The --with-]$1[-incdir value must be a directory])
+        fi
+      else
+        AC_MSG_RESULT([failed])
+        AC_MSG_ERROR([The --with-]$1[-bindir value must be a directory])
+      fi
+    else
+      AC_MSG_RESULT([failed])
+      AC_MSG_ERROR([The --with-]$1[-incdir option must be specified])
+    fi
+  else
+    AC_MSG_RESULT([failed])
+    AC_MSG_ERROR([The --with-]$1[-libdir option must be specified])
+  fi
+else
+  tmppfxdir=`which $1 2>&1`
+  if test -n "$tmppfxdir" -a -d "${tmppfxdir%*$1}" -a \
+          -d "${tmppfxdir%*$1}/.." ; then
+    tmppfxdir=`cd "${tmppfxdir%*$1}/.." ; pwd`
+    CHECK_STD_PROGRAM($tmppfxdir,$1,$2,$3)
+    AC_MSG_RESULT([found in PATH at ]$tmppfxdir)
+  else
+    checkresult="yes"
+    eval checkval=\$\{"USE_"allcapsname()\}
+    CHECK_STD_PROGRAM([/usr],$1,$2,$3)
+    if test -z "${checkval}" ; then
+      CHECK_STD_PROGRAM([/usr/local],$1,$2,$3)
+      if test -z "${checkval}" ; then
+        CHECK_STD_PROGRAM([/sw],$1,$2,$3)
+        if test -z "${checkval}" ; then
+          CHECK_STD_PROGRAM([/opt],$1,$2,$3)
+          if test -z "${checkval}" ; then
+            CHECK_STD_PROGRAM([/],$1,$2,$3)
+            if test -z "${checkval}" ; then
+              checkresult="no"
+            fi
+          fi
+        fi
+      fi
+    fi
+    AC_MSG_RESULT($checkresult)
+  fi
+fi
+])
diff --git a/final/autoconf/m4/func_isinf.m4 b/final/autoconf/m4/func_isinf.m4
new file mode 100644
index 00000000000..c936bf920d0
--- /dev/null
+++ b/final/autoconf/m4/func_isinf.m4
@@ -0,0 +1,36 @@
+#
+# This function determins if the the isinf function isavailable on this
+# platform.
+#
+AC_DEFUN([AC_FUNC_ISINF],[
+AC_SINGLE_CXX_CHECK([ac_cv_func_isinf_in_math_h],   
+                    [isinf], [<math.h>],
+                    [float f; isinf(f);])
+if test "$ac_cv_func_isinf_in_math_h" = "yes" ; then 
+  AC_DEFINE([HAVE_ISINF_IN_MATH_H],1,[Set to 1 if the isinf function is found in <math.h>])
+fi
+
+AC_SINGLE_CXX_CHECK([ac_cv_func_isinf_in_cmath],    
+                    [isinf], [<cmath>],
+                    [float f; isinf(f);])
+if test "$ac_cv_func_isinf_in_cmath" = "yes" ; then
+  AC_DEFINE([HAVE_ISINF_IN_CMATH],1,[Set to 1 if the isinf function is found in <cmath>])
+fi
+
+AC_SINGLE_CXX_CHECK([ac_cv_func_std_isinf_in_cmath],
+                    [std::isinf], [<cmath>],
+                    [float f; std::isinf(f)}])
+if test "$ac_cv_func_std_isinf_in_cmath" = "yes" ; then 
+  AC_DEFINE([HAVE_STD_ISINF_IN_CMATH],1,[Set to 1 if the std::isinf function is found in <cmath>])
+fi
+
+AC_SINGLE_CXX_CHECK([ac_cv_func_finite_in_ieeefp_h],
+                    [finite], [<ieeefp.h>],
+                    [float f; finite(f);])
+if test "$ac_cv_func_finite_in_ieeefp_h" = "yes" ; then
+  AC_DEFINE([HAVE_FINITE_IN_IEEEFP_H],1,[Set to 1 if the finite function is found in <ieeefp.h>])
+fi
+
+])
+
+
diff --git a/final/autoconf/m4/func_isnan.m4 b/final/autoconf/m4/func_isnan.m4
new file mode 100644
index 00000000000..eb5ca0daeb5
--- /dev/null
+++ b/final/autoconf/m4/func_isnan.m4
@@ -0,0 +1,27 @@
+#
+# This function determines if the isnan function is available on this
+# platform.
+#
+AC_DEFUN([AC_FUNC_ISNAN],[
+AC_SINGLE_CXX_CHECK([ac_cv_func_isnan_in_math_h],   
+                    [isnan], [<math.h>],
+                    [float f; isnan(f);])
+
+if test "$ac_cv_func_isnan_in_math_h" = "yes" ; then
+  AC_DEFINE([HAVE_ISNAN_IN_MATH_H],1,[Set to 1 if the isnan function is found in <math.h>])
+fi
+
+AC_SINGLE_CXX_CHECK([ac_cv_func_isnan_in_cmath],    
+                    [isnan], [<cmath>],
+                    [float f; isnan(f);])
+if test "$ac_cv_func_isnan_in_cmath" = "yes" ; then
+  AC_DEFINE([HAVE_ISNAN_IN_CMATH],1,[Set to 1 if the isnan function is found in <cmath>])
+fi
+
+AC_SINGLE_CXX_CHECK([ac_cv_func_std_isnan_in_cmath],
+                    [std::isnan], [<cmath>],
+                    [float f; std::isnan(f);])
+if test "$ac_cv_func_std_isnan_in_cmath" = "yes" ; then
+  AC_DEFINE([HAVE_STD_ISNAN_IN_CMATH],1,[Set to 1 if the std::isnan function is found in <cmath>])
+fi
+])
diff --git a/final/autoconf/m4/func_mmap_file.m4 b/final/autoconf/m4/func_mmap_file.m4
new file mode 100644
index 00000000000..372c87fbe5c
--- /dev/null
+++ b/final/autoconf/m4/func_mmap_file.m4
@@ -0,0 +1,26 @@
+#
+# Check for the ability to mmap a file.  
+#
+AC_DEFUN([AC_FUNC_MMAP_FILE],
+[AC_CACHE_CHECK(for mmap of files,
+ac_cv_func_mmap_file,
+[ AC_LANG_PUSH([C])
+  AC_RUN_IFELSE([
+    AC_LANG_PROGRAM([[
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+]],[[
+  int fd;
+  fd = creat ("foo",0777); 
+  fd = (int) mmap (0, 1, PROT_READ, MAP_SHARED, fd, 0);
+  unlink ("foo"); 
+  return (fd != (int) MAP_FAILED);]])],
+  [ac_cv_func_mmap_file=yes],[ac_cv_func_mmap_file=no],[ac_cv_func_mmap_file=no])
+  AC_LANG_POP([C])
+])
+if test "$ac_cv_func_mmap_file" = yes; then
+   AC_DEFINE([HAVE_MMAP_FILE],[],[Define if mmap() can map files into memory])
+   AC_SUBST(MMAP_FILE,[yes])
+fi
+])
diff --git a/final/autoconf/m4/header_mmap_anonymous.m4 b/final/autoconf/m4/header_mmap_anonymous.m4
new file mode 100644
index 00000000000..2270d29557b
--- /dev/null
+++ b/final/autoconf/m4/header_mmap_anonymous.m4
@@ -0,0 +1,21 @@
+#
+# Check for anonymous mmap macros.  This is modified from
+# http://www.gnu.org/software/ac-archive/htmldoc/ac_cxx_have_ext_slist.html
+#
+AC_DEFUN([AC_HEADER_MMAP_ANONYMOUS],
+[AC_CACHE_CHECK(for MAP_ANONYMOUS vs. MAP_ANON,
+ac_cv_header_mmap_anon,
+[ AC_LANG_PUSH([C])
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+    [[#include <sys/mman.h>
+#include <unistd.h>
+#include <fcntl.h>]],
+  [[mmap (0, 1, PROT_READ, MAP_ANONYMOUS, -1, 0); return (0);]])],
+  ac_cv_header_mmap_anon=yes, 
+  ac_cv_header_mmap_anon=no)
+  AC_LANG_POP([C])
+])
+if test "$ac_cv_header_mmap_anon" = yes; then
+   AC_DEFINE([HAVE_MMAP_ANONYMOUS],[1],[Define if mmap() uses MAP_ANONYMOUS to map anonymous pages, or undefine if it uses MAP_ANON])
+fi
+])
diff --git a/final/autoconf/m4/huge_val.m4 b/final/autoconf/m4/huge_val.m4
new file mode 100644
index 00000000000..5fffbfc8d37
--- /dev/null
+++ b/final/autoconf/m4/huge_val.m4
@@ -0,0 +1,20 @@
+#
+# This function determins if the the HUGE_VAL macro is compilable with the 
+# -pedantic switch or not. XCode < 2.4.1 doesn't get it right.
+#
+AC_DEFUN([AC_HUGE_VAL_CHECK],[
+  AC_CACHE_CHECK([for HUGE_VAL sanity], [ac_cv_huge_val_sanity],[
+    AC_LANG_PUSH([C++])
+    ac_save_CXXFLAGS=$CXXFLAGS
+    CXXFLAGS=-pedantic
+    AC_RUN_IFELSE(
+      AC_LANG_PROGRAM(
+        [#include <math.h>],
+        [double x = HUGE_VAL; return x != x; ]),
+      [ac_cv_huge_val_sanity=yes],[ac_cv_huge_val_sanity=no],
+      [ac_cv_huge_val_sanity=yes])
+    CXXFLAGS=$ac_save_CXXFLAGS
+    AC_LANG_POP([C++])
+    ])
+  AC_SUBST(HUGE_VAL_SANITY,$ac_cv_huge_val_sanity)
+])
diff --git a/final/autoconf/m4/libtool.m4 b/final/autoconf/m4/libtool.m4
new file mode 100644
index 00000000000..a8b5e6a94fc
--- /dev/null
+++ b/final/autoconf/m4/libtool.m4
@@ -0,0 +1,6389 @@
+# libtool.m4 - Configure libtool for the host system. -*-Autoconf-*-
+## Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005
+## Free Software Foundation, Inc.
+## Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+##
+## This file is free software; the Free Software Foundation gives
+## unlimited permission to copy and/or distribute it, with or without
+## modifications, as long as this notice is preserved.
+
+# serial 48 AC_PROG_LIBTOOL
+
+
+# AC_PROVIDE_IFELSE(MACRO-NAME, IF-PROVIDED, IF-NOT-PROVIDED)
+# -----------------------------------------------------------
+# If this macro is not defined by Autoconf, define it here.
+m4_ifdef([AC_PROVIDE_IFELSE],
+         [],
+         [m4_define([AC_PROVIDE_IFELSE],
+	         [m4_ifdef([AC_PROVIDE_$1],
+		           [$2], [$3])])])
+
+
+# AC_PROG_LIBTOOL
+# ---------------
+AC_DEFUN([AC_PROG_LIBTOOL],
+[AC_REQUIRE([_AC_PROG_LIBTOOL])dnl
+dnl If AC_PROG_CXX has already been expanded, run AC_LIBTOOL_CXX
+dnl immediately, otherwise, hook it in at the end of AC_PROG_CXX.
+  AC_PROVIDE_IFELSE([AC_PROG_CXX],
+    [AC_LIBTOOL_CXX],
+    [define([AC_PROG_CXX], defn([AC_PROG_CXX])[AC_LIBTOOL_CXX
+  ])])
+dnl And a similar setup for Fortran 77 support
+  AC_PROVIDE_IFELSE([AC_PROG_F77],
+    [AC_LIBTOOL_F77],
+    [define([AC_PROG_F77], defn([AC_PROG_F77])[AC_LIBTOOL_F77
+])])
+
+dnl Quote A][M_PROG_GCJ so that aclocal doesn't bring it in needlessly.
+dnl If either AC_PROG_GCJ or A][M_PROG_GCJ have already been expanded, run
+dnl AC_LIBTOOL_GCJ immediately, otherwise, hook it in at the end of both.
+  AC_PROVIDE_IFELSE([AC_PROG_GCJ],
+    [AC_LIBTOOL_GCJ],
+    [AC_PROVIDE_IFELSE([A][M_PROG_GCJ],
+      [AC_LIBTOOL_GCJ],
+      [AC_PROVIDE_IFELSE([LT_AC_PROG_GCJ],
+	[AC_LIBTOOL_GCJ],
+      [ifdef([AC_PROG_GCJ],
+	     [define([AC_PROG_GCJ], defn([AC_PROG_GCJ])[AC_LIBTOOL_GCJ])])
+       ifdef([A][M_PROG_GCJ],
+	     [define([A][M_PROG_GCJ], defn([A][M_PROG_GCJ])[AC_LIBTOOL_GCJ])])
+       ifdef([LT_AC_PROG_GCJ],
+	     [define([LT_AC_PROG_GCJ],
+		defn([LT_AC_PROG_GCJ])[AC_LIBTOOL_GCJ])])])])
+])])# AC_PROG_LIBTOOL
+
+
+# _AC_PROG_LIBTOOL
+# ----------------
+AC_DEFUN([_AC_PROG_LIBTOOL],
+[AC_REQUIRE([AC_LIBTOOL_SETUP])dnl
+AC_BEFORE([$0],[AC_LIBTOOL_CXX])dnl
+AC_BEFORE([$0],[AC_LIBTOOL_F77])dnl
+AC_BEFORE([$0],[AC_LIBTOOL_GCJ])dnl
+
+# This can be used to rebuild libtool when needed
+LIBTOOL_DEPS="$ac_aux_dir/ltmain.sh"
+
+# Always use our own libtool.
+LIBTOOL='$(SHELL) $(top_builddir)/mklib'
+AC_SUBST(LIBTOOL)dnl
+
+# Prevent multiple expansion
+define([AC_PROG_LIBTOOL], [])
+])# _AC_PROG_LIBTOOL
+
+
+# AC_LIBTOOL_SETUP
+# ----------------
+AC_DEFUN([AC_LIBTOOL_SETUP],
+[AC_PREREQ(2.60)dnl
+AC_REQUIRE([AC_ENABLE_SHARED])dnl
+AC_REQUIRE([AC_ENABLE_STATIC])dnl
+AC_REQUIRE([AC_ENABLE_FAST_INSTALL])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+AC_REQUIRE([AC_PROG_CC])dnl
+AC_REQUIRE([AC_PROG_LD])dnl
+AC_REQUIRE([AC_PROG_LD_RELOAD_FLAG])dnl
+AC_REQUIRE([AC_PROG_NM])dnl
+
+AC_REQUIRE([AC_PROG_LN_S])dnl
+AC_REQUIRE([AC_DEPLIBS_CHECK_METHOD])dnl
+# Autoconf 2.13's AC_OBJEXT and AC_EXEEXT macros only works for C compilers!
+AC_REQUIRE([AC_OBJEXT])dnl
+AC_REQUIRE([AC_EXEEXT])dnl
+dnl
+
+AC_LIBTOOL_SYS_MAX_CMD_LEN
+AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE
+AC_LIBTOOL_OBJDIR
+
+AC_REQUIRE([_LT_AC_SYS_COMPILER])dnl
+_LT_AC_PROG_ECHO_BACKSLASH
+
+case $host_os in
+aix3*)
+  # AIX sometimes has problems with the GCC collect2 program.  For some
+  # reason, if we set the COLLECT_NAMES environment variable, the problems
+  # vanish in a puff of smoke.
+  if test "X${COLLECT_NAMES+set}" != Xset; then
+    COLLECT_NAMES=
+    export COLLECT_NAMES
+  fi
+  ;;
+esac
+
+# Sed substitution that helps us do robust quoting.  It backslashifies
+# metacharacters that are still active within double-quoted strings.
+Xsed='sed -e 1s/^X//'
+[sed_quote_subst='s/\([\\"\\`$\\\\]\)/\\\1/g']
+
+# Same as above, but do not quote variable references.
+[double_quote_subst='s/\([\\"\\`\\\\]\)/\\\1/g']
+
+# Sed substitution to delay expansion of an escaped shell variable in a
+# double_quote_subst'ed string.
+delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g'
+
+# Sed substitution to avoid accidental globbing in evaled expressions
+no_glob_subst='s/\*/\\\*/g'
+
+# Constants:
+rm="rm -f"
+
+# Global variables:
+default_ofile=mklib
+can_build_shared=yes
+
+# All known linkers require a `.a' archive for static linking (except MSVC,
+# which needs '.lib').
+libext=a
+ltmain="$ac_aux_dir/ltmain.sh"
+ofile="$default_ofile"
+with_gnu_ld="$lt_cv_prog_gnu_ld"
+
+AC_CHECK_TOOL(AR, ar, false)
+AC_CHECK_TOOL(RANLIB, ranlib, :)
+AC_CHECK_TOOL(STRIP, strip, :)
+
+old_CC="$CC"
+old_CFLAGS="$CFLAGS"
+
+# Set sane defaults for various variables
+test -z "$AR" && AR=ar
+test -z "$AR_FLAGS" && AR_FLAGS=cru
+test -z "$AS" && AS=as
+test -z "$CC" && CC=cc
+test -z "$LTCC" && LTCC=$CC
+test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS
+test -z "$DLLTOOL" && DLLTOOL=dlltool
+test -z "$LD" && LD=ld
+test -z "$LN_S" && LN_S="ln -s"
+test -z "$MAGIC_CMD" && MAGIC_CMD=file
+test -z "$NM" && NM=nm
+test -z "$SED" && SED=sed
+test -z "$OBJDUMP" && OBJDUMP=objdump
+test -z "$RANLIB" && RANLIB=:
+test -z "$STRIP" && STRIP=:
+test -z "$ac_objext" && ac_objext=o
+
+# Determine commands to create old-style static archives.
+old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs$old_deplibs'
+old_postinstall_cmds='chmod 644 $oldlib'
+old_postuninstall_cmds=
+
+if test -n "$RANLIB"; then
+  case $host_os in
+  openbsd*)
+    old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$oldlib"
+    ;;
+  *)
+    old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$oldlib"
+    ;;
+  esac
+  old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib"
+fi
+
+_LT_CC_BASENAME([$compiler])
+
+# Only perform the check for file, if the check method requires it
+case $deplibs_check_method in
+file_magic*)
+  if test "$file_magic_cmd" = '$MAGIC_CMD'; then
+    AC_PATH_MAGIC
+  fi
+  ;;
+esac
+
+AC_PROVIDE_IFELSE([AC_LIBTOOL_DLOPEN], enable_dlopen=yes, enable_dlopen=no)
+AC_PROVIDE_IFELSE([AC_LIBTOOL_WIN32_DLL],
+enable_win32_dll=yes, enable_win32_dll=no)
+
+AC_ARG_ENABLE([libtool-lock],
+    [AS_HELP_STRING([--disable-libtool-lock],[avoid locking (might break parallel builds)])])
+test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes
+
+AC_ARG_WITH([pic],
+    [AS_HELP_STRING([--with-pic],[try to use only PIC/non-PIC objects @<:@default=use both@:>@])],
+    [pic_mode="$withval"],
+    [pic_mode=default])
+test -z "$pic_mode" && pic_mode=default
+
+# Use C for the default configuration in the libtool script
+tagname=
+AC_LIBTOOL_LANG_C_CONFIG
+_LT_AC_TAGCONFIG
+])# AC_LIBTOOL_SETUP
+
+
+# _LT_AC_SYS_COMPILER
+# -------------------
+AC_DEFUN([_LT_AC_SYS_COMPILER],
+[AC_REQUIRE([AC_PROG_CC])dnl
+
+# If no C compiler was specified, use CC.
+LTCC=${LTCC-"$CC"}
+
+# If no C compiler flags were specified, use CFLAGS.
+LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
+
+# Allow CC to be a program name with arguments.
+compiler=$CC
+])# _LT_AC_SYS_COMPILER
+
+
+# _LT_CC_BASENAME(CC)
+# -------------------
+# Calculate cc_basename.  Skip known compiler wrappers and cross-prefix.
+AC_DEFUN([_LT_CC_BASENAME],
+[for cc_temp in $1""; do
+  case $cc_temp in
+    compile | *[[\\/]]compile | ccache | *[[\\/]]ccache ) ;;
+    distcc | *[[\\/]]distcc | purify | *[[\\/]]purify ) ;;
+    \-*) ;;
+    *) break;;
+  esac
+done
+cc_basename=`$echo "X$cc_temp" | $Xsed -e 's%.*/%%' -e "s%^$host_alias-%%"`
+])
+
+
+# _LT_COMPILER_BOILERPLATE
+# ------------------------
+# Check for compiler boilerplate output or warnings with
+# the simple compiler test code.
+AC_DEFUN([_LT_COMPILER_BOILERPLATE],
+[ac_outfile=conftest.$ac_objext
+printf "$lt_simple_compile_test_code" >conftest.$ac_ext
+eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_compiler_boilerplate=`cat conftest.err`
+$rm conftest*
+])# _LT_COMPILER_BOILERPLATE
+
+
+# _LT_LINKER_BOILERPLATE
+# ----------------------
+# Check for linker boilerplate output or warnings with
+# the simple link test code.
+AC_DEFUN([_LT_LINKER_BOILERPLATE],
+[ac_outfile=conftest.$ac_objext
+printf "$lt_simple_link_test_code" >conftest.$ac_ext
+eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_linker_boilerplate=`cat conftest.err`
+$rm conftest*
+])# _LT_LINKER_BOILERPLATE
+
+
+# _LT_AC_SYS_LIBPATH_AIX
+# ----------------------
+# Links a minimal program and checks the executable
+# for the system default hardcoded library path. In most cases,
+# this is /usr/lib:/lib, but when the MPI compilers are used
+# the location of the communication and MPI libs are included too.
+# If we don't find anything, use the default library path according
+# to the aix ld manual.
+AC_DEFUN([_LT_AC_SYS_LIBPATH_AIX],
+[AC_LINK_IFELSE(AC_LANG_PROGRAM,[
+aix_libpath=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
+}'`
+# Check for a 64-bit object if we didn't find anything.
+if test -z "$aix_libpath"; then aix_libpath=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e '/Import File Strings/,/^$/ { /^0/ { s/^0  *\(.*\)$/\1/; p; }
+}'`; fi],[])
+if test -z "$aix_libpath"; then aix_libpath="/usr/lib:/lib"; fi
+])# _LT_AC_SYS_LIBPATH_AIX
+
+
+# _LT_AC_SHELL_INIT(ARG)
+# ----------------------
+AC_DEFUN([_LT_AC_SHELL_INIT],
+[ifdef([AC_DIVERSION_NOTICE],
+	     [AC_DIVERT_PUSH(AC_DIVERSION_NOTICE)],
+	 [AC_DIVERT_PUSH(NOTICE)])
+$1
+AC_DIVERT_POP
+])# _LT_AC_SHELL_INIT
+
+
+# _LT_AC_PROG_ECHO_BACKSLASH
+# --------------------------
+# Add some code to the start of the generated configure script which
+# will find an echo command which doesn't interpret backslashes.
+AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH],
+[_LT_AC_SHELL_INIT([
+# Check that we are running under the correct shell.
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+case X$ECHO in
+X*--fallback-echo)
+  # Remove one level of quotation (which was required for Make).
+  ECHO=`echo "$ECHO" | sed 's,\\\\\[$]\\[$]0,'[$]0','`
+  ;;
+esac
+
+echo=${ECHO-echo}
+if test "X[$]1" = X--no-reexec; then
+  # Discard the --no-reexec flag, and continue.
+  shift
+elif test "X[$]1" = X--fallback-echo; then
+  # Avoid inline document here, it may be left over
+  :
+elif test "X`($echo '\t') 2>/dev/null`" = 'X\t' ; then
+  # Yippee, $echo works!
+  :
+else
+  # Restart under the correct shell.
+  exec $SHELL "[$]0" --no-reexec ${1+"[$]@"}
+fi
+
+if test "X[$]1" = X--fallback-echo; then
+  # used as fallback echo
+  shift
+  cat <<EOF
+[$]*
+EOF
+  exit 0
+fi
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+if test -z "$ECHO"; then
+if test "X${echo_test_string+set}" != Xset; then
+# find a string as large as possible, as long as the shell can cope with it
+  for cmd in 'sed 50q "[$]0"' 'sed 20q "[$]0"' 'sed 10q "[$]0"' 'sed 2q "[$]0"' 'echo test'; do
+    # expected sizes: less than 2Kb, 1Kb, 512 bytes, 16 bytes, ...
+    if (echo_test_string=`eval $cmd`) 2>/dev/null &&
+       echo_test_string=`eval $cmd` &&
+       (test "X$echo_test_string" = "X$echo_test_string") 2>/dev/null
+    then
+      break
+    fi
+  done
+fi
+
+if test "X`($echo '\t') 2>/dev/null`" = 'X\t' &&
+   echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` &&
+   test "X$echo_testing_string" = "X$echo_test_string"; then
+  :
+else
+  # The Solaris, AIX, and Digital Unix default echo programs unquote
+  # backslashes.  This makes it impossible to quote backslashes using
+  #   echo "$something" | sed 's/\\/\\\\/g'
+  #
+  # So, first we look for a working echo in the user's PATH.
+
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for dir in $PATH /usr/ucb; do
+    IFS="$lt_save_ifs"
+    if (test -f $dir/echo || test -f $dir/echo$ac_exeext) &&
+       test "X`($dir/echo '\t') 2>/dev/null`" = 'X\t' &&
+       echo_testing_string=`($dir/echo "$echo_test_string") 2>/dev/null` &&
+       test "X$echo_testing_string" = "X$echo_test_string"; then
+      echo="$dir/echo"
+      break
+    fi
+  done
+  IFS="$lt_save_ifs"
+
+  if test "X$echo" = Xecho; then
+    # We didn't find a better echo, so look for alternatives.
+    if test "X`(print -r '\t') 2>/dev/null`" = 'X\t' &&
+       echo_testing_string=`(print -r "$echo_test_string") 2>/dev/null` &&
+       test "X$echo_testing_string" = "X$echo_test_string"; then
+      # This shell has a builtin print -r that does the trick.
+      echo='print -r'
+    elif (test -f /bin/ksh || test -f /bin/ksh$ac_exeext) &&
+	 test "X$CONFIG_SHELL" != X/bin/ksh; then
+      # If we have ksh, try running configure again with it.
+      ORIGINAL_CONFIG_SHELL=${CONFIG_SHELL-/bin/sh}
+      export ORIGINAL_CONFIG_SHELL
+      CONFIG_SHELL=/bin/ksh
+      export CONFIG_SHELL
+      exec $CONFIG_SHELL "[$]0" --no-reexec ${1+"[$]@"}
+    else
+      # Try using printf.
+      echo='printf %s\n'
+      if test "X`($echo '\t') 2>/dev/null`" = 'X\t' &&
+	 echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` &&
+	 test "X$echo_testing_string" = "X$echo_test_string"; then
+	# Cool, printf works
+	:
+      elif echo_testing_string=`($ORIGINAL_CONFIG_SHELL "[$]0" --fallback-echo '\t') 2>/dev/null` &&
+	   test "X$echo_testing_string" = 'X\t' &&
+	   echo_testing_string=`($ORIGINAL_CONFIG_SHELL "[$]0" --fallback-echo "$echo_test_string") 2>/dev/null` &&
+	   test "X$echo_testing_string" = "X$echo_test_string"; then
+	CONFIG_SHELL=$ORIGINAL_CONFIG_SHELL
+	export CONFIG_SHELL
+	SHELL="$CONFIG_SHELL"
+	export SHELL
+	echo="$CONFIG_SHELL [$]0 --fallback-echo"
+      elif echo_testing_string=`($CONFIG_SHELL "[$]0" --fallback-echo '\t') 2>/dev/null` &&
+	   test "X$echo_testing_string" = 'X\t' &&
+	   echo_testing_string=`($CONFIG_SHELL "[$]0" --fallback-echo "$echo_test_string") 2>/dev/null` &&
+	   test "X$echo_testing_string" = "X$echo_test_string"; then
+	echo="$CONFIG_SHELL [$]0 --fallback-echo"
+      else
+	# maybe with a smaller string...
+	prev=:
+
+	for cmd in 'echo test' 'sed 2q "[$]0"' 'sed 10q "[$]0"' 'sed 20q "[$]0"' 'sed 50q "[$]0"'; do
+	  if (test "X$echo_test_string" = "X`eval $cmd`") 2>/dev/null
+	  then
+	    break
+	  fi
+	  prev="$cmd"
+	done
+
+	if test "$prev" != 'sed 50q "[$]0"'; then
+	  echo_test_string=`eval $prev`
+	  export echo_test_string
+	  exec ${ORIGINAL_CONFIG_SHELL-${CONFIG_SHELL-/bin/sh}} "[$]0" ${1+"[$]@"}
+	else
+	  # Oops.  We lost completely, so just stick with echo.
+	  echo=echo
+	fi
+      fi
+    fi
+  fi
+fi
+fi
+
+# Copy echo and quote the copy suitably for passing to libtool from
+# the Makefile, instead of quoting the original, which is used later.
+ECHO=$echo
+if test "X$ECHO" = "X$CONFIG_SHELL [$]0 --fallback-echo"; then
+   ECHO="$CONFIG_SHELL \\\$\[$]0 --fallback-echo"
+fi
+
+AC_SUBST(ECHO)
+])])# _LT_AC_PROG_ECHO_BACKSLASH
+
+
+# _LT_AC_LOCK
+# -----------
+AC_DEFUN([_LT_AC_LOCK],
+[AC_ARG_ENABLE([libtool-lock],
+    [AS_HELP_STRING([--disable-libtool-lock],[avoid locking (might break parallel builds)])])
+test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes
+
+# Some flags need to be propagated to the compiler or linker for good
+# libtool support.
+case $host in
+ia64-*-hpux*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+    case `/usr/bin/file conftest.$ac_objext` in
+    *ELF-32*)
+      HPUX_IA64_MODE="32"
+      ;;
+    *ELF-64*)
+      HPUX_IA64_MODE="64"
+      ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+*-*-irix6*)
+  # Find out which ABI we are using.
+  echo '[#]line __oline__ "configure"' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+   if test "$lt_cv_prog_gnu_ld" = yes; then
+    case `/usr/bin/file conftest.$ac_objext` in
+    *32-bit*)
+      LD="${LD-ld} -melf32bsmip"
+      ;;
+    *N32*)
+      LD="${LD-ld} -melf32bmipn32"
+      ;;
+    *64-bit*)
+      LD="${LD-ld} -melf64bmip"
+      ;;
+    esac
+   else
+    case `/usr/bin/file conftest.$ac_objext` in
+    *32-bit*)
+      LD="${LD-ld} -32"
+      ;;
+    *N32*)
+      LD="${LD-ld} -n32"
+      ;;
+    *64-bit*)
+      LD="${LD-ld} -64"
+      ;;
+    esac
+   fi
+  fi
+  rm -rf conftest*
+  ;;
+
+x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*|s390*-*linux*|sparc*-*linux*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+    case `/usr/bin/file conftest.o` in
+    *32-bit*)
+      case $host in
+        x86_64-*linux*)
+          LD="${LD-ld} -m elf_i386"
+          ;;
+        ppc64-*linux*|powerpc64-*linux*)
+          LD="${LD-ld} -m elf32ppclinux"
+          ;;
+        s390x-*linux*)
+          LD="${LD-ld} -m elf_s390"
+          ;;
+        sparc64-*linux*)
+          LD="${LD-ld} -m elf32_sparc"
+          ;;
+      esac
+      ;;
+    *64-bit*)
+      case $host in
+        x86_64-*linux*)
+          LD="${LD-ld} -m elf_x86_64"
+          ;;
+        ppc*-*linux*|powerpc*-*linux*)
+          LD="${LD-ld} -m elf64ppc"
+          ;;
+        s390*-*linux*)
+          LD="${LD-ld} -m elf64_s390"
+          ;;
+        sparc*-*linux*)
+          LD="${LD-ld} -m elf64_sparc"
+          ;;
+      esac
+      ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+
+*-*-sco3.2v5*)
+  # On SCO OpenServer 5, we need -belf to get full-featured binaries.
+  SAVE_CFLAGS="$CFLAGS"
+  CFLAGS="$CFLAGS -belf"
+  AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf,
+    [AC_LANG_PUSH(C)
+     AC_LINK_IFELSE([AC_LANG_PROGRAM([[]], [[]])],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no])
+     AC_LANG_POP])
+  if test x"$lt_cv_cc_needs_belf" != x"yes"; then
+    # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf
+    CFLAGS="$SAVE_CFLAGS"
+  fi
+  ;;
+sparc*-*solaris*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if AC_TRY_EVAL(ac_compile); then
+    case `/usr/bin/file conftest.o` in
+    *64-bit*)
+      case $lt_cv_prog_gnu_ld in
+      yes*) LD="${LD-ld} -m elf64_sparc" ;;
+      *)    LD="${LD-ld} -64" ;;
+      esac
+      ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+
+AC_PROVIDE_IFELSE([AC_LIBTOOL_WIN32_DLL],
+[*-*-cygwin* | *-*-mingw* | *-*-pw32*)
+  AC_CHECK_TOOL(DLLTOOL, dlltool, false)
+  AC_CHECK_TOOL(AS, as, false)
+  AC_CHECK_TOOL(OBJDUMP, objdump, false)
+  ;;
+  ])
+esac
+
+need_locks="$enable_libtool_lock"
+
+])# _LT_AC_LOCK
+
+
+# AC_LIBTOOL_COMPILER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS,
+#		[OUTPUT-FILE], [ACTION-SUCCESS], [ACTION-FAILURE])
+# ----------------------------------------------------------------
+# Check whether the given compiler option works
+AC_DEFUN([AC_LIBTOOL_COMPILER_OPTION],
+[AC_REQUIRE([LT_AC_PROG_SED])
+AC_CACHE_CHECK([$1], [$2],
+  [$2=no
+  ifelse([$4], , [ac_outfile=conftest.$ac_objext], [ac_outfile=$4])
+   printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+   lt_compiler_flag="$3"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   # The option is referenced via a variable to avoid confusing sed.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:__oline__: $lt_compile\"" >&AS_MESSAGE_LOG_FD)
+   (eval "$lt_compile" 2>conftest.err)
+   ac_status=$?
+   cat conftest.err >&AS_MESSAGE_LOG_FD
+   echo "$as_me:__oline__: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
+   if (exit $ac_status) && test -s "$ac_outfile"; then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings other than the usual output.
+     $echo "X$_lt_compiler_boilerplate" | $Xsed -e '/^$/d' >conftest.exp
+     $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+     if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; then
+       $2=yes
+     fi
+   fi
+   $rm conftest*
+])
+
+if test x"[$]$2" = xyes; then
+    ifelse([$5], , :, [$5])
+else
+    ifelse([$6], , :, [$6])
+fi
+])# AC_LIBTOOL_COMPILER_OPTION
+
+
+# AC_LIBTOOL_LINKER_OPTION(MESSAGE, VARIABLE-NAME, FLAGS,
+#                          [ACTION-SUCCESS], [ACTION-FAILURE])
+# ------------------------------------------------------------
+# Check whether the given compiler option works
+AC_DEFUN([AC_LIBTOOL_LINKER_OPTION],
+[AC_CACHE_CHECK([$1], [$2],
+  [$2=no
+   save_LDFLAGS="$LDFLAGS"
+   LDFLAGS="$LDFLAGS $3"
+   printf "$lt_simple_link_test_code" > conftest.$ac_ext
+   if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
+     # The linker can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     if test -s conftest.err; then
+       # Append any errors to the config.log.
+       cat conftest.err 1>&AS_MESSAGE_LOG_FD
+       $echo "X$_lt_linker_boilerplate" | $Xsed -e '/^$/d' > conftest.exp
+       $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+       if diff conftest.exp conftest.er2 >/dev/null; then
+         $2=yes
+       fi
+     else
+       $2=yes
+     fi
+   fi
+   $rm conftest*
+   LDFLAGS="$save_LDFLAGS"
+])
+
+if test x"[$]$2" = xyes; then
+    ifelse([$4], , :, [$4])
+else
+    ifelse([$5], , :, [$5])
+fi
+])# AC_LIBTOOL_LINKER_OPTION
+
+
+# AC_LIBTOOL_SYS_MAX_CMD_LEN
+# --------------------------
+AC_DEFUN([AC_LIBTOOL_SYS_MAX_CMD_LEN],
+[# find the maximum length of command line arguments
+AC_MSG_CHECKING([the maximum length of command line arguments])
+AC_CACHE_VAL([lt_cv_sys_max_cmd_len], [dnl
+  i=0
+  teststring="ABCD"
+
+  case $build_os in
+  msdosdjgpp*)
+    # On DJGPP, this test can blow up pretty badly due to problems in libc
+    # (any single argument exceeding 2000 bytes causes a buffer overrun
+    # during glob expansion).  Even if it were fixed, the result of this
+    # check would be larger than it should be.
+    lt_cv_sys_max_cmd_len=12288;    # 12K is about right
+    ;;
+
+  gnu*)
+    # Under GNU Hurd, this test is not required because there is
+    # no limit to the length of command line arguments.
+    # Libtool will interpret -1 as no limit whatsoever
+    lt_cv_sys_max_cmd_len=-1;
+    ;;
+
+  cygwin* | mingw*)
+    # On Win9x/ME, this test blows up -- it succeeds, but takes
+    # about 5 minutes as the teststring grows exponentially.
+    # Worse, since 9x/ME are not pre-emptively multitasking,
+    # you end up with a "frozen" computer, even though with patience
+    # the test eventually succeeds (with a max line length of 256k).
+    # Instead, let's just punt: use the minimum linelength reported by
+    # all of the supported platforms: 8192 (on NT/2K/XP).
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  amigaos*)
+    # On AmigaOS with pdksh, this test takes hours, literally.
+    # So we just punt and use a minimum line length of 8192.
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  netbsd* | freebsd* | openbsd* | darwin* | dragonfly*)
+    # This has been around since 386BSD, at least.  Likely further.
+    if test -x /sbin/sysctl; then
+      lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax`
+    elif test -x /usr/sbin/sysctl; then
+      lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax`
+    else
+      lt_cv_sys_max_cmd_len=65536	# usable default for all BSDs
+    fi
+    # And add a safety zone
+    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
+    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
+    ;;
+
+  interix*)
+    # We know the value 262144 and hardcode it with a safety zone (like BSD)
+    lt_cv_sys_max_cmd_len=196608
+    ;;
+
+  osf*)
+    # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
+    # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
+    # nice to cause kernel panics so lets avoid the loop below.
+    # First set a reasonable default.
+    lt_cv_sys_max_cmd_len=16384
+    #
+    if test -x /sbin/sysconfig; then
+      case `/sbin/sysconfig -q proc exec_disable_arg_limit` in
+        *1*) lt_cv_sys_max_cmd_len=-1 ;;
+      esac
+    fi
+    ;;
+  sco3.2v5*)
+    lt_cv_sys_max_cmd_len=102400
+    ;;
+  sysv5* | sco5v6* | sysv4.2uw2*)
+    kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null`
+    if test -n "$kargmax"; then
+      lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[[ 	]]//'`
+    else
+      lt_cv_sys_max_cmd_len=32768
+    fi
+    ;;
+  *)
+    # If test is not a shell built-in, we'll probably end up computing a
+    # maximum length that is only half of the actual maximum length, but
+    # we can't tell.
+    SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}}
+    while (test "X"`$SHELL [$]0 --fallback-echo "X$teststring" 2>/dev/null` \
+	       = "XX$teststring") >/dev/null 2>&1 &&
+	    new_result=`expr "X$teststring" : ".*" 2>&1` &&
+	    lt_cv_sys_max_cmd_len=$new_result &&
+	    test $i != 17 # 1/2 MB should be enough
+    do
+      i=`expr $i + 1`
+      teststring=$teststring$teststring
+    done
+    teststring=
+    # Add a significant safety factor because C++ compilers can tack on massive
+    # amounts of additional arguments before passing them to the linker.
+    # It appears as though 1/2 is a usable value.
+    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2`
+    ;;
+  esac
+])
+if test -n $lt_cv_sys_max_cmd_len ; then
+  AC_MSG_RESULT($lt_cv_sys_max_cmd_len)
+else
+  AC_MSG_RESULT(none)
+fi
+])# AC_LIBTOOL_SYS_MAX_CMD_LEN
+
+
+# _LT_AC_CHECK_DLFCN
+# ------------------
+AC_DEFUN([_LT_AC_CHECK_DLFCN],
+[AC_CHECK_HEADERS(dlfcn.h)dnl
+])# _LT_AC_CHECK_DLFCN
+
+
+# _LT_AC_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE,
+#                           ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING)
+# ---------------------------------------------------------------------
+AC_DEFUN([_LT_AC_TRY_DLOPEN_SELF],
+[AC_REQUIRE([_LT_AC_CHECK_DLFCN])dnl
+if test "$cross_compiling" = yes; then :
+  [$4]
+else
+  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+  lt_status=$lt_dlunknown
+  cat > conftest.$ac_ext <<EOF
+[#line __oline__ "configure"
+#include "confdefs.h"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+#  define LT_DLGLOBAL		RTLD_GLOBAL
+#else
+#  ifdef DL_GLOBAL
+#    define LT_DLGLOBAL		DL_GLOBAL
+#  else
+#    define LT_DLGLOBAL		0
+#  endif
+#endif
+
+/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
+   find out it does not work in some platform. */
+#ifndef LT_DLLAZY_OR_NOW
+#  ifdef RTLD_LAZY
+#    define LT_DLLAZY_OR_NOW		RTLD_LAZY
+#  else
+#    ifdef DL_LAZY
+#      define LT_DLLAZY_OR_NOW		DL_LAZY
+#    else
+#      ifdef RTLD_NOW
+#        define LT_DLLAZY_OR_NOW	RTLD_NOW
+#      else
+#        ifdef DL_NOW
+#          define LT_DLLAZY_OR_NOW	DL_NOW
+#        else
+#          define LT_DLLAZY_OR_NOW	0
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+#ifdef __cplusplus
+extern "C" void exit (int);
+#endif
+
+void fnord() { int i=42;}
+int main ()
+{
+  void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
+  int status = $lt_dlunknown;
+
+  if (self)
+    {
+      if (dlsym (self,"fnord"))       status = $lt_dlno_uscore;
+      else if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore;
+      /* dlclose (self); */
+    }
+  else
+    puts (dlerror ());
+
+    exit (status);
+}]
+EOF
+  if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext} 2>/dev/null; then
+    (./conftest; exit; ) >&AS_MESSAGE_LOG_FD 2>/dev/null
+    lt_status=$?
+    case x$lt_status in
+      x$lt_dlno_uscore) $1 ;;
+      x$lt_dlneed_uscore) $2 ;;
+      x$lt_dlunknown|x*) $3 ;;
+    esac
+  else :
+    # compilation failed
+    $3
+  fi
+fi
+rm -fr conftest*
+])# _LT_AC_TRY_DLOPEN_SELF
+
+
+# AC_LIBTOOL_DLOPEN_SELF
+# ----------------------
+AC_DEFUN([AC_LIBTOOL_DLOPEN_SELF],
+[AC_REQUIRE([_LT_AC_CHECK_DLFCN])dnl
+if test "x$enable_dlopen" != xyes; then
+  enable_dlopen=unknown
+  enable_dlopen_self=unknown
+  enable_dlopen_self_static=unknown
+else
+  lt_cv_dlopen=no
+  lt_cv_dlopen_libs=
+
+  case $host_os in
+  beos*)
+    lt_cv_dlopen="load_add_on"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+    ;;
+
+  mingw* | pw32*)
+    lt_cv_dlopen="LoadLibrary"
+    lt_cv_dlopen_libs=
+   ;;
+
+  cygwin*)
+    lt_cv_dlopen="dlopen"
+    lt_cv_dlopen_libs=
+   ;;
+
+  darwin*)
+  # if libdl is installed we need to link against it
+    AC_CHECK_LIB([dl], [dlopen],
+		[lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],[
+    lt_cv_dlopen="dyld"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+    ])
+   ;;
+
+  *)
+    AC_CHECK_FUNC([shl_load],
+	  [lt_cv_dlopen="shl_load"],
+      [AC_CHECK_LIB([dld], [shl_load],
+	    [lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-dld"],
+	[AC_CHECK_FUNC([dlopen],
+	      [lt_cv_dlopen="dlopen"],
+	  [AC_CHECK_LIB([dl], [dlopen],
+		[lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"],
+	    [AC_CHECK_LIB([svld], [dlopen],
+		  [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"],
+	      [AC_CHECK_LIB([dld], [dld_link],
+		    [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-dld"])
+	      ])
+	    ])
+	  ])
+	])
+      ])
+    ;;
+  esac
+
+  if test "x$lt_cv_dlopen" != xno; then
+    enable_dlopen=yes
+  else
+    enable_dlopen=no
+  fi
+
+  case $lt_cv_dlopen in
+  dlopen)
+    save_CPPFLAGS="$CPPFLAGS"
+    test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H"
+
+    save_LDFLAGS="$LDFLAGS"
+    wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\"
+
+    save_LIBS="$LIBS"
+    LIBS="$lt_cv_dlopen_libs $LIBS"
+
+    AC_CACHE_CHECK([whether a program can dlopen itself],
+	  lt_cv_dlopen_self, [dnl
+	  _LT_AC_TRY_DLOPEN_SELF(
+	    lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes,
+	    lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross)
+    ])
+
+    if test "x$lt_cv_dlopen_self" = xyes; then
+      wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\"
+      AC_CACHE_CHECK([whether a statically linked program can dlopen itself],
+    	  lt_cv_dlopen_self_static, [dnl
+	  _LT_AC_TRY_DLOPEN_SELF(
+	    lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes,
+	    lt_cv_dlopen_self_static=no,  lt_cv_dlopen_self_static=cross)
+      ])
+    fi
+
+    CPPFLAGS="$save_CPPFLAGS"
+    LDFLAGS="$save_LDFLAGS"
+    LIBS="$save_LIBS"
+    ;;
+  esac
+
+  case $lt_cv_dlopen_self in
+  yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;;
+  *) enable_dlopen_self=unknown ;;
+  esac
+
+  case $lt_cv_dlopen_self_static in
+  yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;;
+  *) enable_dlopen_self_static=unknown ;;
+  esac
+fi
+])# AC_LIBTOOL_DLOPEN_SELF
+
+
+# AC_LIBTOOL_PROG_CC_C_O([TAGNAME])
+# ---------------------------------
+# Check to see if options -c and -o are simultaneously supported by compiler
+AC_DEFUN([AC_LIBTOOL_PROG_CC_C_O],
+[AC_REQUIRE([_LT_AC_SYS_COMPILER])dnl
+AC_CACHE_CHECK([if $compiler supports -c -o file.$ac_objext],
+  [_LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)],
+  [_LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)=no
+   $rm -r conftest 2>/dev/null
+   mkdir conftest
+   cd conftest
+   mkdir out
+   printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+   lt_compiler_flag="-o out/conftest2.$ac_objext"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [[^ ]]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:__oline__: $lt_compile\"" >&AS_MESSAGE_LOG_FD)
+   (eval "$lt_compile" 2>out/conftest.err)
+   ac_status=$?
+   cat out/conftest.err >&AS_MESSAGE_LOG_FD
+   echo "$as_me:__oline__: \$? = $ac_status" >&AS_MESSAGE_LOG_FD
+   if (exit $ac_status) && test -s out/conftest2.$ac_objext
+   then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     $echo "X$_lt_compiler_boilerplate" | $Xsed -e '/^$/d' > out/conftest.exp
+     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
+     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 >/dev/null; then
+       _LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes
+     fi
+   fi
+   chmod u+w . 2>&AS_MESSAGE_LOG_FD
+   $rm conftest*
+   # SGI C++ compiler will create directory out/ii_files/ for
+   # template instantiation
+   test -d out/ii_files && $rm out/ii_files/* && rmdir out/ii_files
+   $rm out/* && rmdir out
+   cd ..
+   rmdir conftest
+   $rm conftest*
+])
+])# AC_LIBTOOL_PROG_CC_C_O
+
+
+# AC_LIBTOOL_SYS_HARD_LINK_LOCKS([TAGNAME])
+# -----------------------------------------
+# Check to see if we can do hard links to lock some files if needed
+AC_DEFUN([AC_LIBTOOL_SYS_HARD_LINK_LOCKS],
+[AC_REQUIRE([_LT_AC_LOCK])dnl
+
+hard_links="nottested"
+if test "$_LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)" = no && test "$need_locks" != no; then
+  # do not overwrite the value of need_locks provided by the user
+  AC_MSG_CHECKING([if we can lock with hard links])
+  hard_links=yes
+  $rm conftest*
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  touch conftest.a
+  ln conftest.a conftest.b 2>&5 || hard_links=no
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  AC_MSG_RESULT([$hard_links])
+  if test "$hard_links" = no; then
+    AC_MSG_WARN([`$CC' does not support `-c -o', so `make -j' may be unsafe])
+    need_locks=warn
+  fi
+else
+  need_locks=no
+fi
+])# AC_LIBTOOL_SYS_HARD_LINK_LOCKS
+
+
+# AC_LIBTOOL_OBJDIR
+# -----------------
+AC_DEFUN([AC_LIBTOOL_OBJDIR],
+[AC_CACHE_CHECK([for objdir], [lt_cv_objdir],
+[rm -f .libs 2>/dev/null
+mkdir .libs 2>/dev/null
+if test -d .libs; then
+  lt_cv_objdir=.libs
+else
+  # MS-DOS does not allow filenames that begin with a dot.
+  lt_cv_objdir=_libs
+fi
+rmdir .libs 2>/dev/null])
+objdir=$lt_cv_objdir
+])# AC_LIBTOOL_OBJDIR
+
+
+# AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH([TAGNAME])
+# ----------------------------------------------
+# Check hardcoding attributes.
+AC_DEFUN([AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH],
+[AC_MSG_CHECKING([how to hardcode library paths into programs])
+_LT_AC_TAGVAR(hardcode_action, $1)=
+if test -n "$_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)" || \
+   test -n "$_LT_AC_TAGVAR(runpath_var, $1)" || \
+   test "X$_LT_AC_TAGVAR(hardcode_automatic, $1)" = "Xyes" ; then
+
+  # We can hardcode non-existant directories.
+  if test "$_LT_AC_TAGVAR(hardcode_direct, $1)" != no &&
+     # If the only mechanism to avoid hardcoding is shlibpath_var, we
+     # have to relink, otherwise we might link with an installed library
+     # when we should be linking with a yet-to-be-installed one
+     ## test "$_LT_AC_TAGVAR(hardcode_shlibpath_var, $1)" != no &&
+     test "$_LT_AC_TAGVAR(hardcode_minus_L, $1)" != no; then
+    # Linking always hardcodes the temporary library directory.
+    _LT_AC_TAGVAR(hardcode_action, $1)=relink
+  else
+    # We can link without hardcoding, and we can hardcode nonexisting dirs.
+    _LT_AC_TAGVAR(hardcode_action, $1)=immediate
+  fi
+else
+  # We cannot hardcode anything, or else we can only hardcode existing
+  # directories.
+  _LT_AC_TAGVAR(hardcode_action, $1)=unsupported
+fi
+AC_MSG_RESULT([$_LT_AC_TAGVAR(hardcode_action, $1)])
+
+if test "$_LT_AC_TAGVAR(hardcode_action, $1)" = relink; then
+  # Fast installation is not supported
+  enable_fast_install=no
+elif test "$shlibpath_overrides_runpath" = yes ||
+     test "$enable_shared" = no; then
+  # Fast installation is not necessary
+  enable_fast_install=needless
+fi
+])# AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH
+
+
+# AC_LIBTOOL_SYS_LIB_STRIP
+# ------------------------
+AC_DEFUN([AC_LIBTOOL_SYS_LIB_STRIP],
+[striplib=
+old_striplib=
+AC_MSG_CHECKING([whether stripping libraries is possible])
+if test -n "$STRIP" && $STRIP -V 2>&1 | grep "GNU strip" >/dev/null; then
+  test -z "$old_striplib" && old_striplib="$STRIP --strip-debug"
+  test -z "$striplib" && striplib="$STRIP --strip-unneeded"
+  AC_MSG_RESULT([yes])
+else
+# FIXME - insert some real tests, host_os isn't really good enough
+  case $host_os in
+   darwin*)
+       if test -n "$STRIP" ; then
+         striplib="$STRIP -x"
+         AC_MSG_RESULT([yes])
+       else
+  AC_MSG_RESULT([no])
+fi
+       ;;
+   *)
+  AC_MSG_RESULT([no])
+    ;;
+  esac
+fi
+])# AC_LIBTOOL_SYS_LIB_STRIP
+
+
+# AC_LIBTOOL_SYS_DYNAMIC_LINKER
+# -----------------------------
+# PORTME Fill in your ld.so characteristics
+AC_DEFUN([AC_LIBTOOL_SYS_DYNAMIC_LINKER],
+[AC_MSG_CHECKING([dynamic linker characteristics])
+library_names_spec=
+libname_spec='lib$name'
+soname_spec=
+shrext_cmds=".so"
+postinstall_cmds=
+postuninstall_cmds=
+finish_cmds=
+finish_eval=
+shlibpath_var=
+shlibpath_overrides_runpath=unknown
+version_type=none
+dynamic_linker="$host_os ld.so"
+sys_lib_dlsearch_path_spec="/lib /usr/lib"
+if test "$GCC" = yes; then
+  sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"`
+  if echo "$sys_lib_search_path_spec" | grep ';' >/dev/null ; then
+    # if the path contains ";" then we assume it to be the separator
+    # otherwise default to the standard path separator (i.e. ":") - it is
+    # assumed that no part of a normal pathname contains ";" but that should
+    # okay in the real world where ";" in dirpaths is itself problematic.
+    sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
+  else
+    sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED  -e "s/$PATH_SEPARATOR/ /g"`
+  fi
+else
+  sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
+fi
+need_lib_prefix=unknown
+hardcode_into_libs=no
+
+# when you set need_version to no, make sure it does not cause -set_version
+# flags to be left without arguments
+need_version=unknown
+
+case $host_os in
+aix3*)
+  version_type=linux
+  library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
+  shlibpath_var=LIBPATH
+
+  # AIX 3 has no versioning support, so we append a major version to the name.
+  soname_spec='${libname}${release}${shared_ext}$major'
+  ;;
+
+aix4* | aix5*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  hardcode_into_libs=yes
+  if test "$host_cpu" = ia64; then
+    # AIX 5 supports IA64
+    library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}'
+    shlibpath_var=LD_LIBRARY_PATH
+  else
+    # With GCC up to 2.95.x, collect2 would create an import file
+    # for dependence libraries.  The import file would start with
+    # the line `#! .'.  This would cause the generated library to
+    # depend on `.', always an invalid library.  This was fixed in
+    # development snapshots of GCC prior to 3.0.
+    case $host_os in
+      aix4 | aix4.[[01]] | aix4.[[01]].*)
+      if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
+	   echo ' yes '
+	   echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then
+	:
+      else
+	can_build_shared=no
+      fi
+      ;;
+    esac
+    # AIX (on Power*) has no versioning support, so currently we can not hardcode correct
+    # soname into executable. Probably we can add versioning support to
+    # collect2, so additional links can be useful in future.
+    if test "$aix_use_runtimelinking" = yes; then
+      # If using run time linking (on AIX 4.2 or later) use lib<name>.so
+      # instead of lib<name>.a to let people know that these are not
+      # typical AIX shared libraries.
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    else
+      # We preserve .a as extension for shared libraries through AIX4.2
+      # and later when we are not doing run time linking.
+      library_names_spec='${libname}${release}.a $libname.a'
+      soname_spec='${libname}${release}${shared_ext}$major'
+    fi
+    shlibpath_var=LIBPATH
+  fi
+  ;;
+
+amigaos*)
+  library_names_spec='$libname.ixlibrary $libname.a'
+  # Create ${libname}_ixlibrary.a entries in /sys/libs.
+  finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([[^/]]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done'
+  ;;
+
+beos*)
+  library_names_spec='${libname}${shared_ext}'
+  dynamic_linker="$host_os ld.so"
+  shlibpath_var=LIBRARY_PATH
+  ;;
+
+bsdi[[45]]*)
+  version_type=linux
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
+  sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
+  # the default ld.so.conf also contains /usr/contrib/lib and
+  # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
+  # libtool to hard-code these into programs
+  ;;
+
+cygwin* | mingw* | pw32*)
+  version_type=windows
+  shrext_cmds=".dll"
+  need_version=no
+  need_lib_prefix=no
+
+  case $GCC,$host_os in
+  yes,cygwin* | yes,mingw* | yes,pw32*)
+    library_names_spec='$libname.dll.a'
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i;echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname~
+      chmod a+x \$dldir/$dlname'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $rm \$dlpath'
+    shlibpath_overrides_runpath=yes
+
+    case $host_os in
+    cygwin*)
+      # Cygwin DLLs use 'cyg' prefix rather than 'lib'
+      soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
+      sys_lib_search_path_spec="/usr/lib /lib/w32api /lib /usr/local/lib"
+      ;;
+    mingw*)
+      # MinGW DLLs use traditional 'lib' prefix
+      soname_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
+      sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"`
+      if echo "$sys_lib_search_path_spec" | [grep ';[c-zC-Z]:/' >/dev/null]; then
+        # It is most probably a Windows format PATH printed by
+        # mingw gcc, but we are running on Cygwin. Gcc prints its search
+        # path with ; separators, and with drive letters. We can handle the
+        # drive letters (cygwin fileutils understands them), so leave them,
+        # especially as we might pass files found there to a mingw objdump,
+        # which wouldn't understand a cygwinified path. Ahh.
+        sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
+      else
+        sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED  -e "s/$PATH_SEPARATOR/ /g"`
+      fi
+      ;;
+    pw32*)
+      # pw32 DLLs use 'pw' prefix rather than 'lib'
+      library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    esac
+    ;;
+
+  *)
+    library_names_spec='${libname}`echo ${release} | $SED -e 's/[[.]]/-/g'`${versuffix}${shared_ext} $libname.lib'
+    ;;
+  esac
+  dynamic_linker='Win32 ld.exe'
+  # FIXME: first we should search . and the directory the executable is in
+  shlibpath_var=PATH
+  ;;
+
+darwin* | rhapsody*)
+  dynamic_linker="$host_os dyld"
+  version_type=darwin
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${versuffix}$shared_ext ${libname}${release}${major}$shared_ext ${libname}$shared_ext'
+  soname_spec='${libname}${release}${major}$shared_ext'
+  shlibpath_overrides_runpath=yes
+  shlibpath_var=DYLD_LIBRARY_PATH
+  shrext_cmds='.dylib'
+  # Apple's gcc prints 'gcc -print-search-dirs' doesn't operate the same.
+  if test "$GCC" = yes; then
+    sys_lib_search_path_spec=`$CC -print-search-dirs | tr "\n" "$PATH_SEPARATOR" | sed -e 's/libraries:/@libraries:/' | tr "@" "\n" | grep "^libraries:" | sed -e "s/^libraries://" -e "s,=/,/,g" -e "s,$PATH_SEPARATOR, ,g" -e "s,.*,& /lib /usr/lib /usr/local/lib,g"`
+  else
+    sys_lib_search_path_spec='/lib /usr/lib /usr/local/lib'
+  fi
+  sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
+  ;;
+
+dgux*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+freebsd1*)
+  dynamic_linker=no
+  ;;
+
+kfreebsd*-gnu)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='GNU ld.so'
+  ;;
+
+freebsd* | dragonfly*)
+  # DragonFly does not have aout.  When/if they implement a new
+  # versioning mechanism, adjust this.
+  if test -x /usr/bin/objformat; then
+    objformat=`/usr/bin/objformat`
+  else
+    case $host_os in
+    freebsd[[123]]*) objformat=aout ;;
+    *) objformat=elf ;;
+    esac
+  fi
+  version_type=freebsd-$objformat
+  case $version_type in
+    freebsd-elf*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+      need_version=no
+      need_lib_prefix=no
+      ;;
+    freebsd-*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix'
+      need_version=yes
+      ;;
+  esac
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_os in
+  freebsd2*)
+    shlibpath_overrides_runpath=yes
+    ;;
+  freebsd3.[[01]]* | freebsdelf3.[[01]]*)
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  freebsd3.[[2-9]]* | freebsdelf3.[[2-9]]* | \
+  freebsd4.[[0-5]] | freebsdelf4.[[0-5]] | freebsd4.1.1 | freebsdelf4.1.1)
+    shlibpath_overrides_runpath=no
+    hardcode_into_libs=yes
+    ;;
+  freebsd*) # from 4.6 on
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  esac
+  ;;
+
+gnu*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  hardcode_into_libs=yes
+  ;;
+
+hpux9* | hpux10* | hpux11*)
+  # Give a soname corresponding to the major version so that dld.sl refuses to
+  # link against other versions.
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  case $host_cpu in
+  ia64*)
+    shrext_cmds='.so'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.so"
+    shlibpath_var=LD_LIBRARY_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    if test "X$HPUX_IA64_MODE" = X32; then
+      sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib"
+    else
+      sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64"
+    fi
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+   hppa*64*)
+     shrext_cmds='.sl'
+     hardcode_into_libs=yes
+     dynamic_linker="$host_os dld.sl"
+     shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH
+     shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+     library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+     soname_spec='${libname}${release}${shared_ext}$major'
+     sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64"
+     sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+     ;;
+   *)
+    shrext_cmds='.sl'
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=SHLIB_PATH
+    shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    ;;
+  esac
+  # HP-UX runs *really* slowly unless shared libraries are mode 555.
+  postinstall_cmds='chmod 555 $lib'
+  ;;
+
+interix3*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $host_os in
+    nonstopux*) version_type=nonstopux ;;
+    *)
+	if test "$lt_cv_prog_gnu_ld" = yes; then
+		version_type=linux
+	else
+		version_type=irix
+	fi ;;
+  esac
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}'
+  case $host_os in
+  irix5* | nonstopux*)
+    libsuff= shlibsuff=
+    ;;
+  *)
+    case $LD in # libtool.m4 will add one of these switches to LD
+    *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ")
+      libsuff= shlibsuff= libmagic=32-bit;;
+    *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ")
+      libsuff=32 shlibsuff=N32 libmagic=N32;;
+    *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ")
+      libsuff=64 shlibsuff=64 libmagic=64-bit;;
+    *) libsuff= shlibsuff= libmagic=never-match;;
+    esac
+    ;;
+  esac
+  shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
+  shlibpath_overrides_runpath=no
+  sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
+  sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
+  hardcode_into_libs=yes
+  ;;
+
+# No shared lib support for Linux oldld, aout, or coff.
+linux*oldld* | linux*aout* | linux*coff*)
+  dynamic_linker=no
+  ;;
+
+# This must be Linux ELF.
+linux*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  # This implies no fast_install, which is unacceptable.
+  # Some rework will be needed to allow for fast_install
+  # before this can be enabled.
+  hardcode_into_libs=yes
+
+  # Append ld.so.conf contents to the search path
+  if test -f /etc/ld.so.conf; then
+    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s", \[$]2)); skip = 1; } { if (!skip) print \[$]0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;s/[:,	]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '`
+    sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
+  fi
+
+  # We used to test for /lib/ld.so.1 and disable shared libraries on
+  # powerpc, because MkLinux only supported shared libraries with the
+  # GNU dynamic linker.  Since this was broken with cross compilers,
+  # most powerpc-linux boxes support dynamic linking these days and
+  # people can always --disable-shared, the test was removed, and we
+  # assume the GNU/Linux dynamic linker is in use.
+  dynamic_linker='GNU/Linux ld.so'
+  ;;
+
+knetbsd*-gnu)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='GNU ld.so'
+  ;;
+
+netbsd*)
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+    finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+    dynamic_linker='NetBSD (a.out) ld.so'
+  else
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    dynamic_linker='NetBSD ld.elf_so'
+  fi
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  ;;
+
+newsos6)
+  version_type=linux
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+nto-qnx*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+openbsd*)
+  version_type=sunos
+  sys_lib_dlsearch_path_spec="/usr/lib"
+  need_lib_prefix=no
+  # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs.
+  case $host_os in
+    openbsd3.3 | openbsd3.3.*) need_version=yes ;;
+    *)                         need_version=no  ;;
+  esac
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    case $host_os in
+      openbsd2.[[89]] | openbsd2.[[89]].*)
+	shlibpath_overrides_runpath=no
+	;;
+      *)
+	shlibpath_overrides_runpath=yes
+	;;
+      esac
+  else
+    shlibpath_overrides_runpath=yes
+  fi
+  ;;
+
+os2*)
+  libname_spec='$name'
+  shrext_cmds=".dll"
+  need_lib_prefix=no
+  library_names_spec='$libname${shared_ext} $libname.a'
+  dynamic_linker='OS/2 ld.exe'
+  shlibpath_var=LIBPATH
+  ;;
+
+osf3* | osf4* | osf5*)
+  version_type=osf
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
+  sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
+  ;;
+
+solaris*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  # ldd complains unless libraries are executable
+  postinstall_cmds='chmod +x $lib'
+  ;;
+
+sunos4*)
+  version_type=sunos
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  if test "$with_gnu_ld" = yes; then
+    need_lib_prefix=no
+  fi
+  need_version=yes
+  ;;
+
+sysv4 | sysv4.3*)
+  version_type=linux
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_vendor in
+    sni)
+      shlibpath_overrides_runpath=no
+      need_lib_prefix=no
+      export_dynamic_flag_spec='${wl}-Blargedynsym'
+      runpath_var=LD_RUN_PATH
+      ;;
+    siemens)
+      need_lib_prefix=no
+      ;;
+    motorola)
+      need_lib_prefix=no
+      need_version=no
+      shlibpath_overrides_runpath=no
+      sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
+      ;;
+  esac
+  ;;
+
+sysv4*MP*)
+  if test -d /usr/nec ;then
+    version_type=linux
+    library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
+    soname_spec='$libname${shared_ext}.$major'
+    shlibpath_var=LD_LIBRARY_PATH
+  fi
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  version_type=freebsd-elf
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  hardcode_into_libs=yes
+  if test "$with_gnu_ld" = yes; then
+    sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib'
+    shlibpath_overrides_runpath=no
+  else
+    sys_lib_search_path_spec='/usr/ccs/lib /usr/lib'
+    shlibpath_overrides_runpath=yes
+    case $host_os in
+      sco3.2v5*)
+        sys_lib_search_path_spec="$sys_lib_search_path_spec /lib"
+	;;
+    esac
+  fi
+  sys_lib_dlsearch_path_spec='/usr/lib'
+  ;;
+
+uts4*)
+  version_type=linux
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+*)
+  dynamic_linker=no
+  ;;
+esac
+AC_MSG_RESULT([$dynamic_linker])
+test "$dynamic_linker" = no && can_build_shared=no
+
+variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
+if test "$GCC" = yes; then
+  variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
+fi
+])# AC_LIBTOOL_SYS_DYNAMIC_LINKER
+
+
+# _LT_AC_TAGCONFIG
+# ----------------
+AC_DEFUN([_LT_AC_TAGCONFIG],
+[AC_ARG_WITH([tags],
+    [AS_HELP_STRING([--with-tags@<:@=TAGS@:>@],[include additional configurations @<:@automatic@:>@])],
+    [tagnames="$withval"])
+
+if test -f "$ltmain" && test -n "$tagnames"; then
+  if test ! -f "${ofile}"; then
+    AC_MSG_WARN([output file `$ofile' does not exist])
+  fi
+
+  if test -z "$LTCC"; then
+    eval "`$SHELL ${ofile} --config | grep '^LTCC='`"
+    if test -z "$LTCC"; then
+      AC_MSG_WARN([output file `$ofile' does not look like a libtool script])
+    else
+      AC_MSG_WARN([using `LTCC=$LTCC', extracted from `$ofile'])
+    fi
+  fi
+  if test -z "$LTCFLAGS"; then
+    eval "`$SHELL ${ofile} --config | grep '^LTCFLAGS='`"
+  fi
+
+  # Extract list of available tagged configurations in $ofile.
+  # Note that this assumes the entire list is on one line.
+  available_tags=`grep "^available_tags=" "${ofile}" | $SED -e 's/available_tags=\(.*$\)/\1/' -e 's/\"//g'`
+
+  lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+  for tagname in $tagnames; do
+    IFS="$lt_save_ifs"
+    # Check whether tagname contains only valid characters
+    case `$echo "X$tagname" | $Xsed -e 's:[[-_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890,/]]::g'` in
+    "") ;;
+    *)  AC_MSG_ERROR([invalid tag name: $tagname])
+	;;
+    esac
+
+    if grep "^# ### BEGIN LIBTOOL TAG CONFIG: $tagname$" < "${ofile}" > /dev/null
+    then
+      AC_MSG_ERROR([tag name "$tagname" already exists])
+    fi
+
+    # Update the list of available tags.
+    if test -n "$tagname"; then
+      echo appending configuration tag \"$tagname\" to $ofile
+
+      case $tagname in
+      CXX)
+	if test -n "$CXX" && ( test "X$CXX" != "Xno" &&
+	    ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) ||
+	    (test "X$CXX" != "Xg++"))) ; then
+	  AC_LIBTOOL_LANG_CXX_CONFIG
+	else
+	  tagname=""
+	fi
+	;;
+
+      F77)
+	if test -n "$F77" && test "X$F77" != "Xno"; then
+	  AC_LIBTOOL_LANG_F77_CONFIG
+	else
+	  tagname=""
+	fi
+	;;
+
+      GCJ)
+	if test -n "$GCJ" && test "X$GCJ" != "Xno"; then
+	  AC_LIBTOOL_LANG_GCJ_CONFIG
+	else
+	  tagname=""
+	fi
+	;;
+
+      RC)
+	AC_LIBTOOL_LANG_RC_CONFIG
+	;;
+
+      *)
+	AC_MSG_ERROR([Unsupported tag name: $tagname])
+	;;
+      esac
+
+      # Append the new tag name to the list of available tags.
+      if test -n "$tagname" ; then
+      available_tags="$available_tags $tagname"
+    fi
+    fi
+  done
+  IFS="$lt_save_ifs"
+
+  # Now substitute the updated list of available tags.
+  if eval "sed -e 's/^available_tags=.*\$/available_tags=\"$available_tags\"/' \"$ofile\" > \"${ofile}T\""; then
+    mv "${ofile}T" "$ofile"
+    chmod +x "$ofile"
+  else
+    rm -f "${ofile}T"
+    AC_MSG_ERROR([unable to update list of available tagged configurations.])
+  fi
+fi
+])# _LT_AC_TAGCONFIG
+
+
+# AC_LIBTOOL_DLOPEN
+# -----------------
+# enable checks for dlopen support
+AC_DEFUN([AC_LIBTOOL_DLOPEN],
+ [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])
+])# AC_LIBTOOL_DLOPEN
+
+
+# AC_LIBTOOL_WIN32_DLL
+# --------------------
+# declare package support for building win32 DLLs
+AC_DEFUN([AC_LIBTOOL_WIN32_DLL],
+[AC_BEFORE([$0], [AC_LIBTOOL_SETUP])
+])# AC_LIBTOOL_WIN32_DLL
+
+
+# AC_ENABLE_SHARED([DEFAULT])
+# ---------------------------
+# implement the --enable-shared flag
+# DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
+AC_DEFUN([AC_ENABLE_SHARED],
+[define([enable_shared_default], ifelse($1, no, no, yes))dnl
+AC_ARG_ENABLE([shared],
+    AS_HELP_STRING([--enable-shared@<:@=PKGS@:>@],[build shared libraries @<:@default=enable_shared_default@:>@]),
+    [p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_shared=yes ;;
+    no) enable_shared=no ;;
+    *)
+      enable_shared=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+	IFS="$lt_save_ifs"
+	if test "X$pkg" = "X$p"; then
+	  enable_shared=yes
+	fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac],
+    [enable_shared=]enable_shared_default)
+])# AC_ENABLE_SHARED
+
+
+# AC_DISABLE_SHARED
+# -----------------
+# set the default shared flag to --disable-shared
+AC_DEFUN([AC_DISABLE_SHARED],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+AC_ENABLE_SHARED(no)
+])# AC_DISABLE_SHARED
+
+
+# AC_ENABLE_STATIC([DEFAULT])
+# ---------------------------
+# implement the --enable-static flag
+# DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
+AC_DEFUN([AC_ENABLE_STATIC],
+[define([enable_static_default], ifelse($1, no, no, yes))dnl
+AC_ARG_ENABLE([static],
+    AS_HELP_STRING([--enable-static@<:@=PKGS@:>@],[build static libraries @<:@default=enable_static_default@:>@]),
+    [p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_static=yes ;;
+    no) enable_static=no ;;
+    *)
+     enable_static=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+	IFS="$lt_save_ifs"
+	if test "X$pkg" = "X$p"; then
+	  enable_static=yes
+	fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac],
+    [enable_static=]enable_static_default)
+])# AC_ENABLE_STATIC
+
+
+# AC_DISABLE_STATIC
+# -----------------
+# set the default static flag to --disable-static
+AC_DEFUN([AC_DISABLE_STATIC],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+AC_ENABLE_STATIC(no)
+])# AC_DISABLE_STATIC
+
+
+# AC_ENABLE_FAST_INSTALL([DEFAULT])
+# ---------------------------------
+# implement the --enable-fast-install flag
+# DEFAULT is either `yes' or `no'.  If omitted, it defaults to `yes'.
+AC_DEFUN([AC_ENABLE_FAST_INSTALL],
+[define([enable_Fast_install_default], ifelse($1, no, no, yes))dnl
+AC_ARG_ENABLE([fast-install],
+    AS_HELP_STRING([--enable-fast-install@<:@=PKGS@:>@],[optimize for fast installation @<:@default=enable_Fast_install_default@:>@]),
+    [p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_fast_install=yes ;;
+    no) enable_fast_install=no ;;
+    *)
+      enable_fast_install=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+	IFS="$lt_save_ifs"
+	if test "X$pkg" = "X$p"; then
+	  enable_fast_install=yes
+	fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac],
+    [enable_fast_install=]enable_Fast_install_default)
+])# AC_ENABLE_FAST_INSTALL
+
+
+# AC_DISABLE_FAST_INSTALL
+# -----------------------
+# set the default to --disable-fast-install
+AC_DEFUN([AC_DISABLE_FAST_INSTALL],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+AC_ENABLE_FAST_INSTALL(no)
+])# AC_DISABLE_FAST_INSTALL
+
+
+# AC_LIBTOOL_PICMODE([MODE])
+# --------------------------
+# implement the --with-pic flag
+# MODE is either `yes' or `no'.  If omitted, it defaults to `both'.
+AC_DEFUN([AC_LIBTOOL_PICMODE],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+pic_mode=ifelse($#,1,$1,default)
+])# AC_LIBTOOL_PICMODE
+
+
+# AC_PROG_EGREP
+# -------------
+# This is predefined starting with Autoconf 2.54, so this conditional
+# definition can be removed once we require Autoconf 2.54 or later.
+m4_ifndef([AC_PROG_EGREP], [AC_DEFUN([AC_PROG_EGREP],
+[AC_CACHE_CHECK([for egrep], [ac_cv_prog_egrep],
+   [if echo a | (grep -E '(a|b)') >/dev/null 2>&1
+    then ac_cv_prog_egrep='grep -E'
+    else ac_cv_prog_egrep='egrep'
+    fi])
+ EGREP=$ac_cv_prog_egrep
+ AC_SUBST([EGREP])
+])])
+
+
+# AC_PATH_TOOL_PREFIX
+# -------------------
+# find a file program which can recognise shared library
+AC_DEFUN([AC_PATH_TOOL_PREFIX],
+[AC_REQUIRE([AC_PROG_EGREP])dnl
+AC_MSG_CHECKING([for $1])
+AC_CACHE_VAL(lt_cv_path_MAGIC_CMD,
+[case $MAGIC_CMD in
+[[\\/*] |  ?:[\\/]*])
+  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path.
+  ;;
+*)
+  lt_save_MAGIC_CMD="$MAGIC_CMD"
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+dnl $ac_dummy forces splitting on constant user-supplied paths.
+dnl POSIX.2 word splitting is done only on the output of word expansions,
+dnl not every word.  This closes a longstanding sh security hole.
+  ac_dummy="ifelse([$2], , $PATH, [$2])"
+  for ac_dir in $ac_dummy; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/$1; then
+      lt_cv_path_MAGIC_CMD="$ac_dir/$1"
+      if test -n "$file_magic_test_file"; then
+	case $deplibs_check_method in
+	"file_magic "*)
+	  file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"`
+	  MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+	  if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+	    $EGREP "$file_magic_regex" > /dev/null; then
+	    :
+	  else
+	    cat <<EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such.  This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem.  Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+EOF
+	  fi ;;
+	esac
+      fi
+      break
+    fi
+  done
+  IFS="$lt_save_ifs"
+  MAGIC_CMD="$lt_save_MAGIC_CMD"
+  ;;
+esac])
+MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+if test -n "$MAGIC_CMD"; then
+  AC_MSG_RESULT($MAGIC_CMD)
+else
+  AC_MSG_RESULT(no)
+fi
+])# AC_PATH_TOOL_PREFIX
+
+
+# AC_PATH_MAGIC
+# -------------
+# find a file program which can recognise a shared library
+AC_DEFUN([AC_PATH_MAGIC],
+[AC_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin$PATH_SEPARATOR$PATH)
+if test -z "$lt_cv_path_MAGIC_CMD"; then
+  if test -n "$ac_tool_prefix"; then
+    AC_PATH_TOOL_PREFIX(file, /usr/bin$PATH_SEPARATOR$PATH)
+  else
+    MAGIC_CMD=:
+  fi
+fi
+])# AC_PATH_MAGIC
+
+
+# AC_PROG_LD
+# ----------
+# find the pathname to the GNU or non-GNU linker
+AC_DEFUN([AC_PROG_LD],
+[AC_ARG_WITH([gnu-ld],
+    [AS_HELP_STRING([--with-gnu-ld],[assume the C compiler uses GNU ld @<:@default=no@:>@])],
+    [test "$withval" = no || with_gnu_ld=yes],
+    [with_gnu_ld=no])
+AC_REQUIRE([LT_AC_PROG_SED])dnl
+AC_REQUIRE([AC_PROG_CC])dnl
+AC_REQUIRE([AC_CANONICAL_HOST])dnl
+AC_REQUIRE([AC_CANONICAL_BUILD])dnl
+ac_prog=ld
+if test "$GCC" = yes; then
+  # Check if gcc -print-prog-name=ld gives a path.
+  AC_MSG_CHECKING([for ld used by $CC])
+  case $host in
+  *-*-mingw*)
+    # gcc leaves a trailing carriage return which upsets mingw
+    ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
+  *)
+    ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
+  esac
+  case $ac_prog in
+    # Accept absolute paths.
+    [[\\/]]* | ?:[[\\/]]*)
+      re_direlt='/[[^/]][[^/]]*/\.\./'
+      # Canonicalize the pathname of ld
+      ac_prog=`echo $ac_prog| $SED 's%\\\\%/%g'`
+      while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do
+	ac_prog=`echo $ac_prog| $SED "s%$re_direlt%/%"`
+      done
+      test -z "$LD" && LD="$ac_prog"
+      ;;
+  "")
+    # If it fails, then pretend we aren't using GCC.
+    ac_prog=ld
+    ;;
+  *)
+    # If it is relative, then search for the first ld in PATH.
+    with_gnu_ld=unknown
+    ;;
+  esac
+elif test "$with_gnu_ld" = yes; then
+  AC_MSG_CHECKING([for GNU ld])
+else
+  AC_MSG_CHECKING([for non-GNU ld])
+fi
+AC_CACHE_VAL(lt_cv_path_LD,
+[if test -z "$LD"; then
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for ac_dir in $PATH; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
+      lt_cv_path_LD="$ac_dir/$ac_prog"
+      # Check to see if the program is GNU ld.  I'd rather use --version,
+      # but apparently some variants of GNU ld only accept -v.
+      # Break only if it was the GNU/non-GNU ld that we prefer.
+      case `"$lt_cv_path_LD" -v 2>&1 </dev/null` in
+      *GNU* | *'with BFD'*)
+	test "$with_gnu_ld" != no && break
+	;;
+      *)
+	test "$with_gnu_ld" != yes && break
+	;;
+      esac
+    fi
+  done
+  IFS="$lt_save_ifs"
+else
+  lt_cv_path_LD="$LD" # Let the user override the test with a path.
+fi])
+LD="$lt_cv_path_LD"
+if test -n "$LD"; then
+  AC_MSG_RESULT($LD)
+else
+  AC_MSG_RESULT(no)
+fi
+test -z "$LD" && AC_MSG_ERROR([no acceptable ld found in \$PATH])
+AC_PROG_LD_GNU
+])# AC_PROG_LD
+
+
+# AC_PROG_LD_GNU
+# --------------
+AC_DEFUN([AC_PROG_LD_GNU],
+[AC_REQUIRE([AC_PROG_EGREP])dnl
+AC_CACHE_CHECK([if the linker ($LD) is GNU ld], lt_cv_prog_gnu_ld,
+[# I'd rather use --version here, but apparently some GNU lds only accept -v.
+case `$LD -v 2>&1 </dev/null` in
+*GNU* | *'with BFD'*)
+  lt_cv_prog_gnu_ld=yes
+  ;;
+*)
+  lt_cv_prog_gnu_ld=no
+  ;;
+esac])
+with_gnu_ld=$lt_cv_prog_gnu_ld
+])# AC_PROG_LD_GNU
+
+
+# AC_PROG_LD_RELOAD_FLAG
+# ----------------------
+# find reload flag for linker
+#   -- PORTME Some linkers may need a different reload flag.
+AC_DEFUN([AC_PROG_LD_RELOAD_FLAG],
+[AC_CACHE_CHECK([for $LD option to reload object files],
+  lt_cv_ld_reload_flag,
+  [lt_cv_ld_reload_flag='-r'])
+reload_flag=$lt_cv_ld_reload_flag
+case $reload_flag in
+"" | " "*) ;;
+*) reload_flag=" $reload_flag" ;;
+esac
+reload_cmds='$LD$reload_flag -o $output$reload_objs'
+case $host_os in
+  darwin*)
+    if test "$GCC" = yes; then
+      reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r $compiler_flags -o $output$reload_objs'
+    else
+      reload_cmds='$LD$reload_flag -o $output$reload_objs'
+    fi
+    ;;
+esac
+])# AC_PROG_LD_RELOAD_FLAG
+
+
+# AC_DEPLIBS_CHECK_METHOD
+# -----------------------
+# how to check for library dependencies
+#  -- PORTME fill in with the dynamic library characteristics
+AC_DEFUN([AC_DEPLIBS_CHECK_METHOD],
+[AC_CACHE_CHECK([how to recognise dependent libraries],
+lt_cv_deplibs_check_method,
+[lt_cv_file_magic_cmd='$MAGIC_CMD'
+lt_cv_file_magic_test_file=
+lt_cv_deplibs_check_method='unknown'
+# Need to set the preceding variable on all platforms that support
+# interlibrary dependencies.
+# 'none' -- dependencies not supported.
+# `unknown' -- same as none, but documents that we really don't know.
+# 'pass_all' -- all dependencies passed with no checks.
+# 'test_compile' -- check by making test program.
+# 'file_magic [[regex]]' -- check by looking for files in library path
+# which responds to the $file_magic_cmd with a given extended regex.
+# If you have `file' or equivalent on your system and you're not sure
+# whether `pass_all' will *always* work, you probably want this one.
+
+case $host_os in
+aix4* | aix5*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+beos*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+bsdi[[45]]*)
+  lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib)'
+  lt_cv_file_magic_cmd='/usr/bin/file -L'
+  lt_cv_file_magic_test_file=/shlib/libc.so
+  ;;
+
+cygwin*)
+  # func_win32_libid is a shell function defined in ltmain.sh
+  lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
+  lt_cv_file_magic_cmd='func_win32_libid'
+  ;;
+
+mingw* | pw32*)
+  # Base MSYS/MinGW do not provide the 'file' command needed by
+  # func_win32_libid shell function, so use a weaker test based on 'objdump'.
+  lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?'
+  lt_cv_file_magic_cmd='$OBJDUMP -f'
+  ;;
+
+darwin* | rhapsody*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+freebsd* | kfreebsd*-gnu | dragonfly*)
+  if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then
+    case $host_cpu in
+    i*86 )
+      # Not sure whether the presence of OpenBSD here was a mistake.
+      # Let's accept both of them until this is cleared up.
+      lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD|DragonFly)/i[[3-9]]86 (compact )?demand paged shared library'
+      lt_cv_file_magic_cmd=/usr/bin/file
+      lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
+      ;;
+    esac
+  else
+    lt_cv_deplibs_check_method=pass_all
+  fi
+  ;;
+
+gnu*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+hpux10.20* | hpux11*)
+  lt_cv_file_magic_cmd=/usr/bin/file
+  case $host_cpu in
+  ia64*)
+    lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|ELF-[[0-9]][[0-9]]) shared object file - IA64'
+    lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so
+    ;;
+  hppa*64*)
+    [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) shared object file - PA-RISC [0-9].[0-9]']
+    lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl
+    ;;
+  *)
+    lt_cv_deplibs_check_method='file_magic (s[[0-9]][[0-9]][[0-9]]|PA-RISC[[0-9]].[[0-9]]) shared library'
+    lt_cv_file_magic_test_file=/usr/lib/libc.sl
+    ;;
+  esac
+  ;;
+
+interix3*)
+  # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here
+  lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|\.a)$'
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $LD in
+  *-32|*"-32 ") libmagic=32-bit;;
+  *-n32|*"-n32 ") libmagic=N32;;
+  *-64|*"-64 ") libmagic=64-bit;;
+  *) libmagic=never-match;;
+  esac
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+# This must be Linux ELF.
+linux*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+netbsd*)
+  if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$'
+  else
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so|_pic\.a)$'
+  fi
+  ;;
+
+newos6*)
+  lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (executable|dynamic lib)'
+  lt_cv_file_magic_cmd=/usr/bin/file
+  lt_cv_file_magic_test_file=/usr/lib/libnls.so
+  ;;
+
+nto-qnx*)
+  lt_cv_deplibs_check_method=unknown
+  ;;
+
+openbsd*)
+  if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|\.so|_pic\.a)$'
+  else
+    lt_cv_deplibs_check_method='match_pattern /lib[[^/]]+(\.so\.[[0-9]]+\.[[0-9]]+|_pic\.a)$'
+  fi
+  ;;
+
+osf3* | osf4* | osf5*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+solaris*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+sysv4 | sysv4.3*)
+  case $host_vendor in
+  motorola)
+    lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[ML]]SB (shared object|dynamic lib) M[[0-9]][[0-9]]* Version [[0-9]]'
+    lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
+    ;;
+  ncr)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  sequent)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method='file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB (shared object|dynamic lib )'
+    ;;
+  sni)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method="file_magic ELF [[0-9]][[0-9]]*-bit [[LM]]SB dynamic lib"
+    lt_cv_file_magic_test_file=/lib/libc.so
+    ;;
+  siemens)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  pc)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  esac
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+esac
+])
+file_magic_cmd=$lt_cv_file_magic_cmd
+deplibs_check_method=$lt_cv_deplibs_check_method
+test -z "$deplibs_check_method" && deplibs_check_method=unknown
+])# AC_DEPLIBS_CHECK_METHOD
+
+
+# AC_PROG_NM
+# ----------
+# find the pathname to a BSD-compatible name lister
+AC_DEFUN([AC_PROG_NM],
+[AC_CACHE_CHECK([for BSD-compatible nm], lt_cv_path_NM,
+[if test -n "$NM"; then
+  # Let the user override the test.
+  lt_cv_path_NM="$NM"
+else
+  lt_nm_to_check="${ac_tool_prefix}nm"
+  if test -n "$ac_tool_prefix" && test "$build" = "$host"; then 
+    lt_nm_to_check="$lt_nm_to_check nm"
+  fi
+  for lt_tmp_nm in $lt_nm_to_check; do
+    lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+    for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do
+      IFS="$lt_save_ifs"
+      test -z "$ac_dir" && ac_dir=.
+      tmp_nm="$ac_dir/$lt_tmp_nm"
+      if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then
+	# Check to see if the nm accepts a BSD-compat flag.
+	# Adding the `sed 1q' prevents false positives on HP-UX, which says:
+	#   nm: unknown option "B" ignored
+	# Tru64's nm complains that /dev/null is an invalid object file
+	case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in
+	*/dev/null* | *'Invalid file or object type'*)
+	  lt_cv_path_NM="$tmp_nm -B"
+	  break
+	  ;;
+	*)
+	  case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in
+	  */dev/null*)
+	    lt_cv_path_NM="$tmp_nm -p"
+	    break
+	    ;;
+	  *)
+	    lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but
+	    continue # so that we can try to find one that supports BSD flags
+	    ;;
+	  esac
+	  ;;
+	esac
+      fi
+    done
+    IFS="$lt_save_ifs"
+  done
+  test -z "$lt_cv_path_NM" && lt_cv_path_NM=nm
+fi])
+NM="$lt_cv_path_NM"
+])# AC_PROG_NM
+
+
+# AC_CHECK_LIBM
+# -------------
+# check for math library
+AC_DEFUN([AC_CHECK_LIBM],
+[AC_REQUIRE([AC_CANONICAL_HOST])dnl
+LIBM=
+case $host in
+*-*-beos* | *-*-cygwin* | *-*-pw32* | *-*-darwin*)
+  # These system don't have libm, or don't need it
+  ;;
+*-ncr-sysv4.3*)
+  AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw")
+  AC_CHECK_LIB(m, cos, LIBM="$LIBM -lm")
+  ;;
+*)
+  AC_CHECK_LIB(m, cos, LIBM="-lm")
+  ;;
+esac
+])# AC_CHECK_LIBM
+
+
+# AC_LIBLTDL_CONVENIENCE([DIRECTORY])
+# -----------------------------------
+# sets LIBLTDL to the link flags for the libltdl convenience library and
+# LTDLINCL to the include flags for the libltdl header and adds
+# --enable-ltdl-convenience to the configure arguments.  Note that
+# AC_CONFIG_SUBDIRS is not called here.  If DIRECTORY is not provided,
+# it is assumed to be `libltdl'.  LIBLTDL will be prefixed with
+# '${top_builddir}/' and LTDLINCL will be prefixed with '${top_srcdir}/'
+# (note the single quotes!).  If your package is not flat and you're not
+# using automake, define top_builddir and top_srcdir appropriately in
+# the Makefiles.
+AC_DEFUN([AC_LIBLTDL_CONVENIENCE],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+  case $enable_ltdl_convenience in
+  no) AC_MSG_ERROR([this package needs a convenience libltdl]) ;;
+  "") enable_ltdl_convenience=yes
+      ac_configure_args="$ac_configure_args --enable-ltdl-convenience" ;;
+  esac
+  LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdlc.la
+  LTDLINCL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl'])
+  # For backwards non-gettext consistent compatibility...
+  INCLTDL="$LTDLINCL"
+])# AC_LIBLTDL_CONVENIENCE
+
+
+# AC_LIBLTDL_INSTALLABLE([DIRECTORY])
+# -----------------------------------
+# sets LIBLTDL to the link flags for the libltdl installable library and
+# LTDLINCL to the include flags for the libltdl header and adds
+# --enable-ltdl-install to the configure arguments.  Note that
+# AC_CONFIG_SUBDIRS is not called here.  If DIRECTORY is not provided,
+# and an installed libltdl is not found, it is assumed to be `libltdl'.
+# LIBLTDL will be prefixed with '${top_builddir}/'# and LTDLINCL with
+# '${top_srcdir}/' (note the single quotes!).  If your package is not
+# flat and you're not using automake, define top_builddir and top_srcdir
+# appropriately in the Makefiles.
+# In the future, this macro may have to be called after AC_PROG_LIBTOOL.
+AC_DEFUN([AC_LIBLTDL_INSTALLABLE],
+[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl
+  AC_CHECK_LIB(ltdl, lt_dlinit,
+  [test x"$enable_ltdl_install" != xyes && enable_ltdl_install=no],
+  [if test x"$enable_ltdl_install" = xno; then
+     AC_MSG_WARN([libltdl not installed, but installation disabled])
+   else
+     enable_ltdl_install=yes
+   fi
+  ])
+  if test x"$enable_ltdl_install" = x"yes"; then
+    ac_configure_args="$ac_configure_args --enable-ltdl-install"
+    LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdl.la
+    LTDLINCL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl'])
+  else
+    ac_configure_args="$ac_configure_args --enable-ltdl-install=no"
+    LIBLTDL="-lltdl"
+    LTDLINCL=
+  fi
+  # For backwards non-gettext consistent compatibility...
+  INCLTDL="$LTDLINCL"
+])# AC_LIBLTDL_INSTALLABLE
+
+
+# AC_LIBTOOL_CXX
+# --------------
+# enable support for C++ libraries
+AC_DEFUN([AC_LIBTOOL_CXX],
+[AC_REQUIRE([_LT_AC_LANG_CXX])
+])# AC_LIBTOOL_CXX
+
+
+# _LT_AC_LANG_CXX
+# ---------------
+AC_DEFUN([_LT_AC_LANG_CXX],
+[AC_REQUIRE([AC_PROG_CXX])
+AC_REQUIRE([_LT_AC_PROG_CXXCPP])
+_LT_AC_SHELL_INIT([tagnames=${tagnames+${tagnames},}CXX])
+])# _LT_AC_LANG_CXX
+
+# _LT_AC_PROG_CXXCPP
+# ------------------
+AC_DEFUN([_LT_AC_PROG_CXXCPP],
+[
+AC_REQUIRE([AC_PROG_CXX])
+if test -n "$CXX" && ( test "X$CXX" != "Xno" &&
+    ( (test "X$CXX" = "Xg++" && `g++ -v >/dev/null 2>&1` ) ||
+    (test "X$CXX" != "Xg++"))) ; then
+  AC_PROG_CXXCPP
+fi
+])# _LT_AC_PROG_CXXCPP
+
+# AC_LIBTOOL_F77
+# --------------
+# enable support for Fortran 77 libraries
+AC_DEFUN([AC_LIBTOOL_F77],
+[AC_REQUIRE([_LT_AC_LANG_F77])
+])# AC_LIBTOOL_F77
+
+
+# _LT_AC_LANG_F77
+# ---------------
+AC_DEFUN([_LT_AC_LANG_F77],
+[AC_REQUIRE([AC_PROG_F77])
+_LT_AC_SHELL_INIT([tagnames=${tagnames+${tagnames},}F77])
+])# _LT_AC_LANG_F77
+
+
+# AC_LIBTOOL_GCJ
+# --------------
+# enable support for GCJ libraries
+AC_DEFUN([AC_LIBTOOL_GCJ],
+[AC_REQUIRE([_LT_AC_LANG_GCJ])
+])# AC_LIBTOOL_GCJ
+
+
+# _LT_AC_LANG_GCJ
+# ---------------
+AC_DEFUN([_LT_AC_LANG_GCJ],
+[AC_PROVIDE_IFELSE([AC_PROG_GCJ],[],
+  [AC_PROVIDE_IFELSE([A][M_PROG_GCJ],[],
+    [AC_PROVIDE_IFELSE([LT_AC_PROG_GCJ],[],
+      [ifdef([AC_PROG_GCJ],[AC_REQUIRE([AC_PROG_GCJ])],
+	 [ifdef([A][M_PROG_GCJ],[AC_REQUIRE([A][M_PROG_GCJ])],
+	   [AC_REQUIRE([A][C_PROG_GCJ_OR_A][M_PROG_GCJ])])])])])])
+_LT_AC_SHELL_INIT([tagnames=${tagnames+${tagnames},}GCJ])
+])# _LT_AC_LANG_GCJ
+
+
+# AC_LIBTOOL_RC
+# -------------
+# enable support for Windows resource files
+AC_DEFUN([AC_LIBTOOL_RC],
+[AC_REQUIRE([LT_AC_PROG_RC])
+_LT_AC_SHELL_INIT([tagnames=${tagnames+${tagnames},}RC])
+])# AC_LIBTOOL_RC
+
+
+# AC_LIBTOOL_LANG_C_CONFIG
+# ------------------------
+# Ensure that the configuration vars for the C compiler are
+# suitably defined.  Those variables are subsequently used by
+# AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'.
+AC_DEFUN([AC_LIBTOOL_LANG_C_CONFIG], [_LT_AC_LANG_C_CONFIG])
+AC_DEFUN([_LT_AC_LANG_C_CONFIG],
+[lt_save_CC="$CC"
+AC_LANG_PUSH(C)
+
+# Source file extension for C test sources.
+ac_ext=c
+
+# Object file extension for compiled C test sources.
+objext=o
+_LT_AC_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="int some_variable = 0;\n"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='int main(){return(0);}\n'
+
+_LT_AC_SYS_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+## CAVEAT EMPTOR:
+## There is no encapsulation within the following macros, do not change
+## the running order or otherwise move them around unless you know exactly
+## what you are doing...
+AC_LIBTOOL_PROG_COMPILER_NO_RTTI($1)
+AC_LIBTOOL_PROG_COMPILER_PIC($1)
+AC_LIBTOOL_PROG_CC_C_O($1)
+AC_LIBTOOL_SYS_HARD_LINK_LOCKS($1)
+AC_LIBTOOL_PROG_LD_SHLIBS($1)
+AC_LIBTOOL_SYS_DYNAMIC_LINKER($1)
+AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH($1)
+AC_LIBTOOL_SYS_LIB_STRIP
+AC_LIBTOOL_DLOPEN_SELF
+
+# Report which library types will actually be built
+AC_MSG_CHECKING([if libtool supports shared libraries])
+AC_MSG_RESULT([$can_build_shared])
+
+AC_MSG_CHECKING([whether to build shared libraries])
+test "$can_build_shared" = "no" && enable_shared=no
+
+# On AIX, shared libraries and static libraries use the same namespace, and
+# are all built from PIC.
+case $host_os in
+aix3*)
+  test "$enable_shared" = yes && enable_static=no
+  if test -n "$RANLIB"; then
+    archive_cmds="$archive_cmds~\$RANLIB \$lib"
+    postinstall_cmds='$RANLIB $lib'
+  fi
+  ;;
+
+aix4* | aix5*)
+  if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+    test "$enable_shared" = yes && enable_static=no
+  fi
+    ;;
+esac
+AC_MSG_RESULT([$enable_shared])
+
+AC_MSG_CHECKING([whether to build static libraries])
+# Make sure either enable_shared or enable_static is yes.
+test "$enable_shared" = yes || enable_static=yes
+AC_MSG_RESULT([$enable_static])
+
+AC_LIBTOOL_CONFIG($1)
+
+AC_LANG_POP
+CC="$lt_save_CC"
+])# AC_LIBTOOL_LANG_C_CONFIG
+
+
+# AC_LIBTOOL_LANG_CXX_CONFIG
+# --------------------------
+# Ensure that the configuration vars for the C compiler are
+# suitably defined.  Those variables are subsequently used by
+# AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'.
+AC_DEFUN([AC_LIBTOOL_LANG_CXX_CONFIG], [_LT_AC_LANG_CXX_CONFIG(CXX)])
+AC_DEFUN([_LT_AC_LANG_CXX_CONFIG],
+[AC_LANG_PUSH(C++)
+AC_REQUIRE([AC_PROG_CXX])
+AC_REQUIRE([_LT_AC_PROG_CXXCPP])
+
+_LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+_LT_AC_TAGVAR(allow_undefined_flag, $1)=
+_LT_AC_TAGVAR(always_export_symbols, $1)=no
+_LT_AC_TAGVAR(archive_expsym_cmds, $1)=
+_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)=
+_LT_AC_TAGVAR(hardcode_direct, $1)=no
+_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)=
+_LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
+_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=
+_LT_AC_TAGVAR(hardcode_minus_L, $1)=no
+_LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
+_LT_AC_TAGVAR(hardcode_automatic, $1)=no
+_LT_AC_TAGVAR(module_cmds, $1)=
+_LT_AC_TAGVAR(module_expsym_cmds, $1)=
+_LT_AC_TAGVAR(link_all_deplibs, $1)=unknown
+_LT_AC_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_AC_TAGVAR(no_undefined_flag, $1)=
+_LT_AC_TAGVAR(whole_archive_flag_spec, $1)=
+_LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=no
+
+# Dependencies to place before and after the object being linked:
+_LT_AC_TAGVAR(predep_objects, $1)=
+_LT_AC_TAGVAR(postdep_objects, $1)=
+_LT_AC_TAGVAR(predeps, $1)=
+_LT_AC_TAGVAR(postdeps, $1)=
+_LT_AC_TAGVAR(compiler_lib_search_path, $1)=
+
+# Source file extension for C++ test sources.
+ac_ext=cpp
+
+# Object file extension for compiled C++ test sources.
+objext=o
+_LT_AC_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="int some_variable = 0;\n"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='int main(int, char *[[]]) { return(0); }\n'
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_AC_SYS_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC=$CC
+lt_save_LD=$LD
+lt_save_GCC=$GCC
+GCC=$GXX
+lt_save_with_gnu_ld=$with_gnu_ld
+lt_save_path_LD=$lt_cv_path_LD
+if test -n "${lt_cv_prog_gnu_ldcxx+set}"; then
+  lt_cv_prog_gnu_ld=$lt_cv_prog_gnu_ldcxx
+else
+  $as_unset lt_cv_prog_gnu_ld
+fi
+if test -n "${lt_cv_path_LDCXX+set}"; then
+  lt_cv_path_LD=$lt_cv_path_LDCXX
+else
+  $as_unset lt_cv_path_LD
+fi
+test -z "${LDCXX+set}" || LD=$LDCXX
+CC=${CXX-"c++"}
+compiler=$CC
+_LT_AC_TAGVAR(compiler, $1)=$CC
+_LT_CC_BASENAME([$compiler])
+
+# We don't want -fno-exception wen compiling C++ code, so set the
+# no_builtin_flag separately
+if test "$GXX" = yes; then
+  _LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin'
+else
+  _LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=
+fi
+
+if test "$GXX" = yes; then
+  # Set up default GNU C++ configuration
+
+  AC_PROG_LD
+
+  # Check if GNU C++ uses GNU ld as the underlying linker, since the
+  # archiving commands below assume that GNU ld is being used.
+  if test "$with_gnu_ld" = yes; then
+    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+    _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir'
+    _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+
+    # If archive_cmds runs LD, not CC, wlarc should be empty
+    # XXX I think wlarc can be eliminated in ltcf-cxx, but I need to
+    #     investigate it a little bit more. (MM)
+    wlarc='${wl}'
+
+    # ancient GNU ld didn't support --whole-archive et. al.
+    if eval "`$CC -print-prog-name=ld` --help 2>&1" | \
+	grep 'no-whole-archive' > /dev/null; then
+      _LT_AC_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+    else
+      _LT_AC_TAGVAR(whole_archive_flag_spec, $1)=
+    fi
+  else
+    with_gnu_ld=no
+    wlarc=
+
+    # A generic and very simple default shared library creation
+    # command for GNU C++ for the case where it uses the native
+    # linker, instead of GNU ld.  If possible, this setting should
+    # overridden to take advantage of the native linker features on
+    # the platform it is being used on.
+    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
+  fi
+
+  # Commands to make compiler produce verbose output that lists
+  # what "hidden" libraries, object files and flags are used when
+  # linking a shared library.
+  output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "\-L"'
+
+else
+  GXX=no
+  with_gnu_ld=no
+  wlarc=
+fi
+
+# PORTME: fill in a description of your system's C++ link characteristics
+AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries])
+_LT_AC_TAGVAR(ld_shlibs, $1)=yes
+case $host_os in
+  aix3*)
+    # FIXME: insert proper C++ library support
+    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    ;;
+  aix4* | aix5*)
+    if test "$host_cpu" = ia64; then
+      # On IA64, the linker does run time linking by default, so we don't
+      # have to do anything special.
+      aix_use_runtimelinking=no
+      exp_sym_flag='-Bexport'
+      no_entry_flag=""
+    else
+      aix_use_runtimelinking=no
+
+      # Test if we are trying to use run time linking or normal
+      # AIX style linking. If -brtl is somewhere in LDFLAGS, we
+      # need to do runtime linking.
+      case $host_os in aix4.[[23]]|aix4.[[23]].*|aix5*)
+	for ld_flag in $LDFLAGS; do
+	  case $ld_flag in
+	  *-brtl*)
+	    aix_use_runtimelinking=yes
+	    break
+	    ;;
+	  esac
+	done
+	;;
+      esac
+
+      exp_sym_flag='-bexport'
+      no_entry_flag='-bnoentry'
+    fi
+
+    # When large executables or shared objects are built, AIX ld can
+    # have problems creating the table of contents.  If linking a library
+    # or program results in "error TOC overflow" add -mminimal-toc to
+    # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
+    # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+
+    _LT_AC_TAGVAR(archive_cmds, $1)=''
+    _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+    _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=':'
+    _LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+
+    if test "$GXX" = yes; then
+      case $host_os in aix4.[[012]]|aix4.[[012]].*)
+      # We only want to do this on AIX 4.2 and lower, the check
+      # below for broken collect2 doesn't work under 4.3+
+	collect2name=`${CC} -print-prog-name=collect2`
+	if test -f "$collect2name" && \
+	   strings "$collect2name" | grep resolve_lib_name >/dev/null
+	then
+	  # We have reworked collect2
+	  _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+	else
+	  # We have old collect2
+	  _LT_AC_TAGVAR(hardcode_direct, $1)=unsupported
+	  # It fails to find uninstalled libraries when the uninstalled
+	  # path is not listed in the libpath.  Setting hardcode_minus_L
+	  # to unsupported forces relinking
+	  _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+	  _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+	  _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=
+	fi
+	;;
+      esac
+      shared_flag='-shared'
+      if test "$aix_use_runtimelinking" = yes; then
+	shared_flag="$shared_flag "'${wl}-G'
+      fi
+    else
+      # not using gcc
+      if test "$host_cpu" = ia64; then
+	# VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
+	# chokes on -Wl,-G. The following line is correct:
+	shared_flag='-G'
+      else
+	if test "$aix_use_runtimelinking" = yes; then
+	  shared_flag='${wl}-G'
+	else
+	  shared_flag='${wl}-bM:SRE'
+	fi
+      fi
+    fi
+
+    # It seems that -bexpall does not export symbols beginning with
+    # underscore (_), so it is better to generate a list of symbols to export.
+    _LT_AC_TAGVAR(always_export_symbols, $1)=yes
+    if test "$aix_use_runtimelinking" = yes; then
+      # Warning - without using the other runtime loading flags (-brtl),
+      # -berok will link without error, but may produce a broken library.
+      _LT_AC_TAGVAR(allow_undefined_flag, $1)='-berok'
+      # Determine the default libpath from the value encoded in an empty executable.
+      _LT_AC_SYS_LIBPATH_AIX
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
+
+      _LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then echo "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
+     else
+      if test "$host_cpu" = ia64; then
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib'
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)="-z nodefs"
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
+      else
+	# Determine the default libpath from the value encoded in an empty executable.
+	_LT_AC_SYS_LIBPATH_AIX
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
+	# Warning - without using the other run time loading flags,
+	# -berok will link without error, but may produce a broken library.
+	_LT_AC_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok'
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok'
+	# Exported symbols can be pulled into shared objects from archives
+	_LT_AC_TAGVAR(whole_archive_flag_spec, $1)='$convenience'
+	_LT_AC_TAGVAR(archive_cmds_need_lc, $1)=yes
+	# This is similar to how AIX traditionally builds its shared libraries.
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
+      fi
+    fi
+    ;;
+
+  beos*)
+    if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+      _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported
+      # Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+      # support --undefined.  This deserves some investigation.  FIXME
+      _LT_AC_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+    else
+      _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    fi
+    ;;
+
+  chorus*)
+    case $cc_basename in
+      *)
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+    esac
+    ;;
+
+  cygwin* | mingw* | pw32*)
+    # _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless,
+    # as there is no search path for DLLs.
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+    _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported
+    _LT_AC_TAGVAR(always_export_symbols, $1)=no
+    _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+
+    if $LD --help 2>&1 | grep 'auto-import' > /dev/null; then
+      _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+      # If the export-symbols file already is a .def file (1st line
+      # is EXPORTS), use it as is; otherwise, prepend...
+      _LT_AC_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+	cp $export_symbols $output_objdir/$soname.def;
+      else
+	echo EXPORTS > $output_objdir/$soname.def;
+	cat $export_symbols >> $output_objdir/$soname.def;
+      fi~
+      $CC -shared -nostdlib $output_objdir/$soname.def $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+    else
+      _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    fi
+  ;;
+      darwin* | rhapsody*)
+        case $host_os in
+        rhapsody* | darwin1.[[012]])
+         _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-undefined ${wl}suppress'
+         ;;
+       *) # Darwin 1.3 on
+         if test -z ${MACOSX_DEPLOYMENT_TARGET} ; then
+           _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-flat_namespace ${wl}-undefined ${wl}suppress'
+         else
+           case ${MACOSX_DEPLOYMENT_TARGET} in
+             10.[[012]])
+               _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-flat_namespace ${wl}-undefined ${wl}suppress'
+               ;;
+             10.*)
+               _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-undefined ${wl}dynamic_lookup'
+               ;;
+           esac
+         fi
+         ;;
+        esac
+      _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+      _LT_AC_TAGVAR(hardcode_direct, $1)=no
+      _LT_AC_TAGVAR(hardcode_automatic, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
+      _LT_AC_TAGVAR(whole_archive_flag_spec, $1)=''
+      _LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+
+    if test "$GXX" = yes ; then
+      lt_int_apple_cc_single_mod=no
+      output_verbose_link_cmd='echo'
+      if $CC -dumpspecs 2>&1 | $EGREP 'single_module' >/dev/null ; then
+       lt_int_apple_cc_single_mod=yes
+      fi
+      if test "X$lt_int_apple_cc_single_mod" = Xyes ; then
+       _LT_AC_TAGVAR(archive_cmds, $1)='$CC -dynamiclib -single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring'
+      else
+          _LT_AC_TAGVAR(archive_cmds, $1)='$CC -r -keep_private_externs -nostdlib -o ${lib}-master.o $libobjs~$CC -dynamiclib $allow_undefined_flag -o $lib ${lib}-master.o $deplibs $compiler_flags -install_name $rpath/$soname $verstring'
+        fi
+        _LT_AC_TAGVAR(module_cmds, $1)='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags'
+        # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds
+          if test "X$lt_int_apple_cc_single_mod" = Xyes ; then
+            _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -dynamiclib -single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+          else
+            _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -r -keep_private_externs -nostdlib -o ${lib}-master.o $libobjs~$CC -dynamiclib $allow_undefined_flag -o $lib ${lib}-master.o $deplibs $compiler_flags -install_name $rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+          fi
+            _LT_AC_TAGVAR(module_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag  -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+      else
+      case $cc_basename in
+        xlc*)
+         output_verbose_link_cmd='echo'
+          _LT_AC_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj ${wl}-single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $verstring'
+          _LT_AC_TAGVAR(module_cmds, $1)='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags'
+          # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds
+          _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj ${wl}-single_module $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+          _LT_AC_TAGVAR(module_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag  -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+          ;;
+       *)
+         _LT_AC_TAGVAR(ld_shlibs, $1)=no
+          ;;
+      esac
+      fi
+        ;;
+
+  dgux*)
+    case $cc_basename in
+      ec++*)
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      ghcx*)
+	# Green Hills C++ Compiler
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      *)
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+    esac
+    ;;
+  freebsd[[12]]*)
+    # C++ shared libraries reported to be fairly broken before switch to ELF
+    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    ;;
+  freebsd-elf*)
+    _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+    ;;
+  freebsd* | kfreebsd*-gnu | dragonfly*)
+    # FreeBSD 3 and later use GNU C++ and GNU ld with standard ELF
+    # conventions
+    _LT_AC_TAGVAR(ld_shlibs, $1)=yes
+    ;;
+  gnu*)
+    ;;
+  hpux9*)
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+    _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+    _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+    _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+    _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH,
+				# but as the default
+				# location of the library.
+
+    case $cc_basename in
+    CC*)
+      # FIXME: insert proper C++ library support
+      _LT_AC_TAGVAR(ld_shlibs, $1)=no
+      ;;
+    aCC*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/$soname~$CC -b ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      # Commands to make compiler produce verbose output that lists
+      # what "hidden" libraries, object files and flags are used when
+      # linking a shared library.
+      #
+      # There doesn't appear to be a way to prevent this compiler from
+      # explicitly linking system object files so we need to strip them
+      # from the output so that they don't get included in the library
+      # dependencies.
+      output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | grep "[[-]]L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list'
+      ;;
+    *)
+      if test "$GXX" = yes; then
+        _LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/$soname~$CC -shared -nostdlib -fPIC ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      else
+        # FIXME: insert proper C++ library support
+        _LT_AC_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+    esac
+    ;;
+  hpux10*|hpux11*)
+    if test $with_gnu_ld = no; then
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+      _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+      case $host_cpu in
+      hppa*64*|ia64*)
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='+b $libdir'
+        ;;
+      *)
+	_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+        ;;
+      esac
+    fi
+    case $host_cpu in
+    hppa*64*|ia64*)
+      _LT_AC_TAGVAR(hardcode_direct, $1)=no
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+    *)
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes # Not in the search PATH,
+					      # but as the default
+					      # location of the library.
+      ;;
+    esac
+
+    case $cc_basename in
+      CC*)
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      aCC*)
+	case $host_cpu in
+	hppa*64*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	  ;;
+	ia64*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	  ;;
+	*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	  ;;
+	esac
+	# Commands to make compiler produce verbose output that lists
+	# what "hidden" libraries, object files and flags are used when
+	# linking a shared library.
+	#
+	# There doesn't appear to be a way to prevent this compiler from
+	# explicitly linking system object files so we need to strip them
+	# from the output so that they don't get included in the library
+	# dependencies.
+	output_verbose_link_cmd='templist=`($CC -b $CFLAGS -v conftest.$objext 2>&1) | grep "\-L"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list'
+	;;
+      *)
+	if test "$GXX" = yes; then
+	  if test $with_gnu_ld = no; then
+	    case $host_cpu in
+	    hppa*64*)
+	      _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	      ;;
+	    ia64*)
+	      _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	      ;;
+	    *)
+	      _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib -fPIC ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	      ;;
+	    esac
+	  fi
+	else
+	  # FIXME: insert proper C++ library support
+	  _LT_AC_TAGVAR(ld_shlibs, $1)=no
+	fi
+	;;
+    esac
+    ;;
+  interix3*)
+    _LT_AC_TAGVAR(hardcode_direct, $1)=no
+    _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+    _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+    # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
+    # Instead, shared libraries are loaded at an image base (0x10000000 by
+    # default) and relocated if they conflict, which is a slow very memory
+    # consuming and fragmenting process.  To avoid this, we pick a random,
+    # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
+    # time.  Moving up from 0x10000000 also allows more sbrk(2) space.
+    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+    _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+    ;;
+  irix5* | irix6*)
+    case $cc_basename in
+      CC*)
+	# SGI C++
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -all -multigot $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+
+	# Archives containing C++ object files must be created using
+	# "CC -ar", where "CC" is the IRIX C++ compiler.  This is
+	# necessary to make sure instantiated templates are included
+	# in the archive.
+	_LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -ar -WR,-u -o $oldlib $oldobjs'
+	;;
+      *)
+	if test "$GXX" = yes; then
+	  if test "$with_gnu_ld" = no; then
+	    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+	  else
+	    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` -o $lib'
+	  fi
+	fi
+	_LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+	;;
+    esac
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+    _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+    ;;
+  linux*)
+    case $cc_basename in
+      KCC*)
+	# Kuck and Associates, Inc. (KAI) C++ Compiler
+
+	# KCC will only create a shared library if the output file
+	# ends with ".so" (or ".sl" for HP-UX), so rename the library
+	# to its proper name (with version) after linking.
+	_LT_AC_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib ${wl}-retain-symbols-file,$export_symbols; mv \$templib $lib'
+	# Commands to make compiler produce verbose output that lists
+	# what "hidden" libraries, object files and flags are used when
+	# linking a shared library.
+	#
+	# There doesn't appear to be a way to prevent this compiler from
+	# explicitly linking system object files so we need to strip them
+	# from the output so that they don't get included in the library
+	# dependencies.
+	output_verbose_link_cmd='templist=`$CC $CFLAGS -v conftest.$objext -o libconftest$shared_ext 2>&1 | grep "ld"`; rm -f libconftest$shared_ext; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list'
+
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath,$libdir'
+	_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+
+	# Archives containing C++ object files must be created using
+	# "CC -Bstatic", where "CC" is the KAI C++ compiler.
+	_LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs'
+	;;
+      icpc*)
+	# Intel C++
+	with_gnu_ld=yes
+	# version 8.0 and above of icpc choke on multiply defined symbols
+	# if we add $predep_objects and $postdep_objects, however 7.1 and
+	# earlier do not add the objects themselves.
+	case `$CC -V 2>&1` in
+	*"Version 7."*)
+  	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+  	  _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+	  ;;
+	*)  # Version 8.0 or newer
+	  tmp_idyn=
+	  case $host_cpu in
+	    ia64*) tmp_idyn=' -i_dynamic';;
+	  esac
+  	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	  _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared'"$tmp_idyn"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+	  ;;
+	esac
+	_LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+	_LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive$convenience ${wl}--no-whole-archive'
+	;;
+      pgCC*)
+        # Portland Group C++ compiler
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname -o $lib'
+  	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname ${wl}-retain-symbols-file ${wl}$export_symbols -o $lib'
+
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir'
+	_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+	_LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive'
+        ;;
+      cxx*)
+	# Compaq C++
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $wl$soname  -o $lib ${wl}-retain-symbols-file $wl$export_symbols'
+
+	runpath_var=LD_RUN_PATH
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
+	_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	# Commands to make compiler produce verbose output that lists
+	# what "hidden" libraries, object files and flags are used when
+	# linking a shared library.
+	#
+	# There doesn't appear to be a way to prevent this compiler from
+	# explicitly linking system object files so we need to strip them
+	# from the output so that they don't get included in the library
+	# dependencies.
+	output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "ld"`; templist=`echo $templist | $SED "s/\(^.*ld.*\)\( .*ld .*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list'
+	;;
+    esac
+    ;;
+  lynxos*)
+    # FIXME: insert proper C++ library support
+    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    ;;
+  m88k*)
+    # FIXME: insert proper C++ library support
+    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    ;;
+  mvs*)
+    case $cc_basename in
+      cxx*)
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      *)
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+    esac
+    ;;
+  netbsd*)
+    if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+      _LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable  -o $lib $predep_objects $libobjs $deplibs $postdep_objects $linker_flags'
+      wlarc=
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+    fi
+    # Workaround some broken pre-1.5 toolchains
+    output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep conftest.$objext | $SED -e "s:-lgcc -lc -lgcc::"'
+    ;;
+  openbsd2*)
+    # C++ shared libraries are fairly broken
+    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    ;;
+  openbsd*)
+    _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+    _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -o $lib'
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+    if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+      _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-retain-symbols-file,$export_symbols -o $lib'
+      _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+      _LT_AC_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+    fi
+    output_verbose_link_cmd='echo'
+    ;;
+  osf3*)
+    case $cc_basename in
+      KCC*)
+	# Kuck and Associates, Inc. (KAI) C++ Compiler
+
+	# KCC will only create a shared library if the output file
+	# ends with ".so" (or ".sl" for HP-UX), so rename the library
+	# to its proper name (with version) after linking.
+	_LT_AC_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
+
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	# Archives containing C++ object files must be created using
+	# "CC -Bstatic", where "CC" is the KAI C++ compiler.
+	_LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -Bstatic -o $oldlib $oldobjs'
+
+	;;
+      RCC*)
+	# Rational C++ 2.4.1
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      cxx*)
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname $soname `test -n "$verstring" && echo ${wl}-set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+	_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	# Commands to make compiler produce verbose output that lists
+	# what "hidden" libraries, object files and flags are used when
+	# linking a shared library.
+	#
+	# There doesn't appear to be a way to prevent this compiler from
+	# explicitly linking system object files so we need to strip them
+	# from the output so that they don't get included in the library
+	# dependencies.
+	output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "ld" | grep -v "ld:"`; templist=`echo $templist | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list'
+	;;
+      *)
+	if test "$GXX" = yes && test "$with_gnu_ld" = no; then
+	  _LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+
+	  _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+	  _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	  # Commands to make compiler produce verbose output that lists
+	  # what "hidden" libraries, object files and flags are used when
+	  # linking a shared library.
+	  output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "\-L"'
+
+	else
+	  # FIXME: insert proper C++ library support
+	  _LT_AC_TAGVAR(ld_shlibs, $1)=no
+	fi
+	;;
+    esac
+    ;;
+  osf4* | osf5*)
+    case $cc_basename in
+      KCC*)
+	# Kuck and Associates, Inc. (KAI) C++ Compiler
+
+	# KCC will only create a shared library if the output file
+	# ends with ".so" (or ".sl" for HP-UX), so rename the library
+	# to its proper name (with version) after linking.
+	_LT_AC_TAGVAR(archive_cmds, $1)='tempext=`echo $shared_ext | $SED -e '\''s/\([[^()0-9A-Za-z{}]]\)/\\\\\1/g'\''`; templib=`echo $lib | $SED -e "s/\${tempext}\..*/.so/"`; $CC $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags --soname $soname -o \$templib; mv \$templib $lib'
+
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	# Archives containing C++ object files must be created using
+	# the KAI C++ compiler.
+	_LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -o $oldlib $oldobjs'
+	;;
+      RCC*)
+	# Rational C++ 2.4.1
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      cxx*)
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done~
+	  echo "-hidden">> $lib.exp~
+	  $CC -shared$allow_undefined_flag $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags -msym -soname $soname -Wl,-input -Wl,$lib.exp  `test -n "$verstring" && echo -set_version	$verstring` -update_registry ${output_objdir}/so_locations -o $lib~
+	  $rm $lib.exp'
+
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
+	_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	# Commands to make compiler produce verbose output that lists
+	# what "hidden" libraries, object files and flags are used when
+	# linking a shared library.
+	#
+	# There doesn't appear to be a way to prevent this compiler from
+	# explicitly linking system object files so we need to strip them
+	# from the output so that they don't get included in the library
+	# dependencies.
+	output_verbose_link_cmd='templist=`$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "ld" | grep -v "ld:"`; templist=`echo $templist | $SED "s/\(^.*ld.*\)\( .*ld.*$\)/\1/"`; list=""; for z in $templist; do case $z in conftest.$objext) list="$list $z";; *.$objext);; *) list="$list $z";;esac; done; echo $list'
+	;;
+      *)
+	if test "$GXX" = yes && test "$with_gnu_ld" = no; then
+	  _LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+	 _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib ${allow_undefined_flag} $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+
+	  _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+	  _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	  # Commands to make compiler produce verbose output that lists
+	  # what "hidden" libraries, object files and flags are used when
+	  # linking a shared library.
+	  output_verbose_link_cmd='$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep "\-L"'
+
+	else
+	  # FIXME: insert proper C++ library support
+	  _LT_AC_TAGVAR(ld_shlibs, $1)=no
+	fi
+	;;
+    esac
+    ;;
+  psos*)
+    # FIXME: insert proper C++ library support
+    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    ;;
+  sunos4*)
+    case $cc_basename in
+      CC*)
+	# Sun C++ 4.x
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      lcc*)
+	# Lucid
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      *)
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+    esac
+    ;;
+  solaris*)
+    case $cc_basename in
+      CC*)
+	# Sun C++ 4.2, 5.x and Centerline C++
+        _LT_AC_TAGVAR(archive_cmds_need_lc,$1)=yes
+	_LT_AC_TAGVAR(no_undefined_flag, $1)=' -zdefs'
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -G${allow_undefined_flag}  -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+	$CC -G${allow_undefined_flag}  ${wl}-M ${wl}$lib.exp -h$soname -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$rm $lib.exp'
+
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+	_LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+	case $host_os in
+	  solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
+	  *)
+	    # The C++ compiler is used as linker so we must use $wl
+	    # flag to pass the commands to the underlying system
+	    # linker. We must also pass each convience library through
+	    # to the system linker between allextract/defaultextract.
+	    # The C++ compiler will combine linker options so we
+	    # cannot just pass the convience library names through
+	    # without $wl.
+	    # Supported since Solaris 2.6 (maybe 2.5.1?)
+	    _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}-z ${wl}defaultextract'
+	    ;;
+	esac
+	_LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+
+	output_verbose_link_cmd='echo'
+
+	# Archives containing C++ object files must be created using
+	# "CC -xar", where "CC" is the Sun C++ compiler.  This is
+	# necessary to make sure instantiated templates are included
+	# in the archive.
+	_LT_AC_TAGVAR(old_archive_cmds, $1)='$CC -xar -o $oldlib $oldobjs'
+	;;
+      gcx*)
+	# Green Hills C++ Compiler
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+
+	# The C++ compiler must be used to create the archive.
+	_LT_AC_TAGVAR(old_archive_cmds, $1)='$CC $LDFLAGS -archive -o $oldlib $oldobjs'
+	;;
+      *)
+	# GNU C++ compiler with Solaris linker
+	if test "$GXX" = yes && test "$with_gnu_ld" = no; then
+	  _LT_AC_TAGVAR(no_undefined_flag, $1)=' ${wl}-z ${wl}defs'
+	  if $CC --version | grep -v '^2\.7' > /dev/null; then
+	    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+	    _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+		$CC -shared -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$rm $lib.exp'
+
+	    # Commands to make compiler produce verbose output that lists
+	    # what "hidden" libraries, object files and flags are used when
+	    # linking a shared library.
+	    output_verbose_link_cmd="$CC -shared $CFLAGS -v conftest.$objext 2>&1 | grep \"\-L\""
+	  else
+	    # g++ 2.7 appears to require `-G' NOT `-shared' on this
+	    # platform.
+	    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -G -nostdlib $LDFLAGS $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags ${wl}-h $wl$soname -o $lib'
+	    _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+		$CC -G -nostdlib ${wl}-M $wl$lib.exp -o $lib $predep_objects $libobjs $deplibs $postdep_objects $compiler_flags~$rm $lib.exp'
+
+	    # Commands to make compiler produce verbose output that lists
+	    # what "hidden" libraries, object files and flags are used when
+	    # linking a shared library.
+	    output_verbose_link_cmd="$CC -G $CFLAGS -v conftest.$objext 2>&1 | grep \"\-L\""
+	  fi
+
+	  _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $wl$libdir'
+	fi
+	;;
+    esac
+    ;;
+  sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7* | sco3.2v5.0.[[024]]*)
+    _LT_AC_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
+    _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+    _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+    runpath_var='LD_RUN_PATH'
+
+    case $cc_basename in
+      CC*)
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	;;
+      *)
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	;;
+    esac
+    ;;
+  sysv5* | sco3.2v5* | sco5v6*)
+    # Note: We can NOT use -z defs as we might desire, because we do not
+    # link with -lc, and that would cause any symbols used from libc to
+    # always be unresolved, which means just about no library would
+    # ever link correctly.  If we're not using GNU ld we use -z text
+    # though, which does catch some bad symbols but isn't as heavy-handed
+    # as -z defs.
+    # For security reasons, it is highly recommended that you always
+    # use absolute paths for naming shared libraries, and exclude the
+    # DT_RUNPATH tag from executables and libraries.  But doing so
+    # requires that you compile everything twice, which is a pain.
+    # So that behaviour is only enabled if SCOABSPATH is set to a
+    # non-empty value in the environment.  Most likely only useful for
+    # creating official distributions of packages.
+    # This is a hack until libtool officially supports absolute path
+    # names for shared libraries.
+    _LT_AC_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
+    _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs'
+    _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+    _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='`test -z "$SCOABSPATH" && echo ${wl}-R,$libdir`'
+    _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=':'
+    _LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+    _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport'
+    runpath_var='LD_RUN_PATH'
+
+    case $cc_basename in
+      CC*)
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	;;
+      *)
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	;;
+    esac
+    ;;
+  tandem*)
+    case $cc_basename in
+      NCC*)
+	# NonStop-UX NCC 3.20
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+      *)
+	# FIXME: insert proper C++ library support
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	;;
+    esac
+    ;;
+  vxworks*)
+    # FIXME: insert proper C++ library support
+    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    ;;
+  *)
+    # FIXME: insert proper C++ library support
+    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+    ;;
+esac
+AC_MSG_RESULT([$_LT_AC_TAGVAR(ld_shlibs, $1)])
+test "$_LT_AC_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no
+
+_LT_AC_TAGVAR(GCC, $1)="$GXX"
+_LT_AC_TAGVAR(LD, $1)="$LD"
+
+## CAVEAT EMPTOR:
+## There is no encapsulation within the following macros, do not change
+## the running order or otherwise move them around unless you know exactly
+## what you are doing...
+AC_LIBTOOL_POSTDEP_PREDEP($1)
+AC_LIBTOOL_PROG_COMPILER_PIC($1)
+AC_LIBTOOL_PROG_CC_C_O($1)
+AC_LIBTOOL_SYS_HARD_LINK_LOCKS($1)
+AC_LIBTOOL_PROG_LD_SHLIBS($1)
+AC_LIBTOOL_SYS_DYNAMIC_LINKER($1)
+AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH($1)
+
+AC_LIBTOOL_CONFIG($1)
+
+AC_LANG_POP
+CC=$lt_save_CC
+LDCXX=$LD
+LD=$lt_save_LD
+GCC=$lt_save_GCC
+with_gnu_ldcxx=$with_gnu_ld
+with_gnu_ld=$lt_save_with_gnu_ld
+lt_cv_path_LDCXX=$lt_cv_path_LD
+lt_cv_path_LD=$lt_save_path_LD
+lt_cv_prog_gnu_ldcxx=$lt_cv_prog_gnu_ld
+lt_cv_prog_gnu_ld=$lt_save_with_gnu_ld
+])# AC_LIBTOOL_LANG_CXX_CONFIG
+
+# AC_LIBTOOL_POSTDEP_PREDEP([TAGNAME])
+# ------------------------------------
+# Figure out "hidden" library dependencies from verbose
+# compiler output when linking a shared library.
+# Parse the compiler output and extract the necessary
+# objects, libraries and library flags.
+AC_DEFUN([AC_LIBTOOL_POSTDEP_PREDEP],[
+dnl we can't use the lt_simple_compile_test_code here,
+dnl because it contains code intended for an executable,
+dnl not a library.  It's possible we should let each
+dnl tag define a new lt_????_link_test_code variable,
+dnl but it's only used here...
+ifelse([$1],[],[cat > conftest.$ac_ext <<EOF
+int a;
+void foo (void) { a = 0; }
+EOF
+],[$1],[CXX],[cat > conftest.$ac_ext <<EOF
+class Foo
+{
+public:
+  Foo (void) { a = 0; }
+private:
+  int a;
+};
+EOF
+],[$1],[F77],[cat > conftest.$ac_ext <<EOF
+      subroutine foo
+      implicit none
+      integer*4 a
+      a=0
+      return
+      end
+EOF
+],[$1],[GCJ],[cat > conftest.$ac_ext <<EOF
+public class foo {
+  private int a;
+  public void bar (void) {
+    a = 0;
+  }
+};
+EOF
+])
+dnl Parse the compiler output and extract the necessary
+dnl objects, libraries and library flags.
+if AC_TRY_EVAL(ac_compile); then
+  # Parse the compiler output and extract the necessary
+  # objects, libraries and library flags.
+
+  # Sentinel used to keep track of whether or not we are before
+  # the conftest object file.
+  pre_test_object_deps_done=no
+
+  # The `*' in the case matches for architectures that use `case' in
+  # $output_verbose_cmd can trigger glob expansion during the loop
+  # eval without this substitution.
+  output_verbose_link_cmd=`$echo "X$output_verbose_link_cmd" | $Xsed -e "$no_glob_subst"`
+
+  for p in `eval $output_verbose_link_cmd`; do
+    case $p in
+
+    -L* | -R* | -l*)
+       # Some compilers place space between "-{L,R}" and the path.
+       # Remove the space.
+       if test $p = "-L" \
+	  || test $p = "-R"; then
+	 prev=$p
+	 continue
+       else
+	 prev=
+       fi
+
+       if test "$pre_test_object_deps_done" = no; then
+	 case $p in
+	 -L* | -R*)
+	   # Internal compiler library paths should come after those
+	   # provided the user.  The postdeps already come after the
+	   # user supplied libs so there is no need to process them.
+	   if test -z "$_LT_AC_TAGVAR(compiler_lib_search_path, $1)"; then
+	     _LT_AC_TAGVAR(compiler_lib_search_path, $1)="${prev}${p}"
+	   else
+	     _LT_AC_TAGVAR(compiler_lib_search_path, $1)="${_LT_AC_TAGVAR(compiler_lib_search_path, $1)} ${prev}${p}"
+	   fi
+	   ;;
+	 # The "-l" case would never come before the object being
+	 # linked, so don't bother handling this case.
+	 esac
+       else
+	 if test -z "$_LT_AC_TAGVAR(postdeps, $1)"; then
+	   _LT_AC_TAGVAR(postdeps, $1)="${prev}${p}"
+	 else
+	   _LT_AC_TAGVAR(postdeps, $1)="${_LT_AC_TAGVAR(postdeps, $1)} ${prev}${p}"
+	 fi
+       fi
+       ;;
+
+    *.$objext)
+       # This assumes that the test object file only shows up
+       # once in the compiler output.
+       if test "$p" = "conftest.$objext"; then
+	 pre_test_object_deps_done=yes
+	 continue
+       fi
+
+       if test "$pre_test_object_deps_done" = no; then
+	 if test -z "$_LT_AC_TAGVAR(predep_objects, $1)"; then
+	   _LT_AC_TAGVAR(predep_objects, $1)="$p"
+	 else
+	   _LT_AC_TAGVAR(predep_objects, $1)="$_LT_AC_TAGVAR(predep_objects, $1) $p"
+	 fi
+       else
+	 if test -z "$_LT_AC_TAGVAR(postdep_objects, $1)"; then
+	   _LT_AC_TAGVAR(postdep_objects, $1)="$p"
+	 else
+	   _LT_AC_TAGVAR(postdep_objects, $1)="$_LT_AC_TAGVAR(postdep_objects, $1) $p"
+	 fi
+       fi
+       ;;
+
+    *) ;; # Ignore the rest.
+
+    esac
+  done
+
+  # Clean up.
+  rm -f a.out a.exe
+else
+  echo "libtool.m4: error: problem compiling $1 test program"
+fi
+
+$rm -f confest.$objext
+
+# PORTME: override above test on systems where it is broken
+ifelse([$1],[CXX],
+[case $host_os in
+interix3*)
+  # Interix 3.5 installs completely hosed .la files for C++, so rather than
+  # hack all around it, let's just trust "g++" to DTRT.
+  _LT_AC_TAGVAR(predep_objects,$1)=
+  _LT_AC_TAGVAR(postdep_objects,$1)=
+  _LT_AC_TAGVAR(postdeps,$1)=
+  ;;
+
+solaris*)
+  case $cc_basename in
+  CC*)
+    # Adding this requires a known-good setup of shared libraries for
+    # Sun compiler versions before 5.6, else PIC objects from an old
+    # archive will be linked into the output, leading to subtle bugs.
+    _LT_AC_TAGVAR(postdeps,$1)='-lCstd -lCrun'
+    ;;
+  esac
+  ;;
+esac
+])
+
+case " $_LT_AC_TAGVAR(postdeps, $1) " in
+*" -lc "*) _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no ;;
+esac
+])# AC_LIBTOOL_POSTDEP_PREDEP
+
+# AC_LIBTOOL_LANG_F77_CONFIG
+# --------------------------
+# Ensure that the configuration vars for the C compiler are
+# suitably defined.  Those variables are subsequently used by
+# AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'.
+AC_DEFUN([AC_LIBTOOL_LANG_F77_CONFIG], [_LT_AC_LANG_F77_CONFIG(F77)])
+AC_DEFUN([_LT_AC_LANG_F77_CONFIG],
+[AC_REQUIRE([AC_PROG_F77])
+AC_LANG_PUSH(Fortran 77)
+
+_LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+_LT_AC_TAGVAR(allow_undefined_flag, $1)=
+_LT_AC_TAGVAR(always_export_symbols, $1)=no
+_LT_AC_TAGVAR(archive_expsym_cmds, $1)=
+_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)=
+_LT_AC_TAGVAR(hardcode_direct, $1)=no
+_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)=
+_LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
+_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=
+_LT_AC_TAGVAR(hardcode_minus_L, $1)=no
+_LT_AC_TAGVAR(hardcode_automatic, $1)=no
+_LT_AC_TAGVAR(module_cmds, $1)=
+_LT_AC_TAGVAR(module_expsym_cmds, $1)=
+_LT_AC_TAGVAR(link_all_deplibs, $1)=unknown
+_LT_AC_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+_LT_AC_TAGVAR(no_undefined_flag, $1)=
+_LT_AC_TAGVAR(whole_archive_flag_spec, $1)=
+_LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=no
+
+# Source file extension for f77 test sources.
+ac_ext=f
+
+# Object file extension for compiled f77 test sources.
+objext=o
+_LT_AC_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="      subroutine t\n      return\n      end\n"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code="      program t\n      end\n"
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_AC_SYS_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC="$CC"
+CC=${F77-"f77"}
+compiler=$CC
+_LT_AC_TAGVAR(compiler, $1)=$CC
+_LT_CC_BASENAME([$compiler])
+
+AC_MSG_CHECKING([if libtool supports shared libraries])
+AC_MSG_RESULT([$can_build_shared])
+
+AC_MSG_CHECKING([whether to build shared libraries])
+test "$can_build_shared" = "no" && enable_shared=no
+
+# On AIX, shared libraries and static libraries use the same namespace, and
+# are all built from PIC.
+case $host_os in
+aix3*)
+  test "$enable_shared" = yes && enable_static=no
+  if test -n "$RANLIB"; then
+    archive_cmds="$archive_cmds~\$RANLIB \$lib"
+    postinstall_cmds='$RANLIB $lib'
+  fi
+  ;;
+aix4* | aix5*)
+  if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+    test "$enable_shared" = yes && enable_static=no
+  fi
+  ;;
+esac
+AC_MSG_RESULT([$enable_shared])
+
+AC_MSG_CHECKING([whether to build static libraries])
+# Make sure either enable_shared or enable_static is yes.
+test "$enable_shared" = yes || enable_static=yes
+AC_MSG_RESULT([$enable_static])
+
+_LT_AC_TAGVAR(GCC, $1)="$G77"
+_LT_AC_TAGVAR(LD, $1)="$LD"
+
+AC_LIBTOOL_PROG_COMPILER_PIC($1)
+AC_LIBTOOL_PROG_CC_C_O($1)
+AC_LIBTOOL_SYS_HARD_LINK_LOCKS($1)
+AC_LIBTOOL_PROG_LD_SHLIBS($1)
+AC_LIBTOOL_SYS_DYNAMIC_LINKER($1)
+AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH($1)
+
+AC_LIBTOOL_CONFIG($1)
+
+AC_LANG_POP
+CC="$lt_save_CC"
+])# AC_LIBTOOL_LANG_F77_CONFIG
+
+
+# AC_LIBTOOL_LANG_GCJ_CONFIG
+# --------------------------
+# Ensure that the configuration vars for the C compiler are
+# suitably defined.  Those variables are subsequently used by
+# AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'.
+AC_DEFUN([AC_LIBTOOL_LANG_GCJ_CONFIG], [_LT_AC_LANG_GCJ_CONFIG(GCJ)])
+AC_DEFUN([_LT_AC_LANG_GCJ_CONFIG],
+[AC_LANG_PUSH(C)
+
+# Source file extension for Java test sources.
+ac_ext=java
+
+# Object file extension for compiled Java test sources.
+objext=o
+_LT_AC_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="class foo {}\n"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='public class conftest { public static void main(String[[]] argv) {}; }\n'
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_AC_SYS_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC="$CC"
+CC=${GCJ-"gcj"}
+compiler=$CC
+_LT_AC_TAGVAR(compiler, $1)=$CC
+_LT_CC_BASENAME([$compiler])
+
+# GCJ did not exist at the time GCC didn't implicitly link libc in.
+_LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+
+_LT_AC_TAGVAR(old_archive_cmds, $1)=$old_archive_cmds
+
+## CAVEAT EMPTOR:
+## There is no encapsulation within the following macros, do not change
+## the running order or otherwise move them around unless you know exactly
+## what you are doing...
+AC_LIBTOOL_PROG_COMPILER_NO_RTTI($1)
+AC_LIBTOOL_PROG_COMPILER_PIC($1)
+AC_LIBTOOL_PROG_CC_C_O($1)
+AC_LIBTOOL_SYS_HARD_LINK_LOCKS($1)
+AC_LIBTOOL_PROG_LD_SHLIBS($1)
+AC_LIBTOOL_SYS_DYNAMIC_LINKER($1)
+AC_LIBTOOL_PROG_LD_HARDCODE_LIBPATH($1)
+
+AC_LIBTOOL_CONFIG($1)
+
+AC_LANG_POP([])
+CC="$lt_save_CC"
+])# AC_LIBTOOL_LANG_GCJ_CONFIG
+
+
+# AC_LIBTOOL_LANG_RC_CONFIG
+# -------------------------
+# Ensure that the configuration vars for the Windows resource compiler are
+# suitably defined.  Those variables are subsequently used by
+# AC_LIBTOOL_CONFIG to write the compiler configuration to `libtool'.
+AC_DEFUN([AC_LIBTOOL_LANG_RC_CONFIG], [_LT_AC_LANG_RC_CONFIG(RC)])
+AC_DEFUN([_LT_AC_LANG_RC_CONFIG],
+[AC_LANG_PUSH(C)
+
+# Source file extension for RC test sources.
+ac_ext=rc
+
+# Object file extension for compiled RC test sources.
+objext=o
+_LT_AC_TAGVAR(objext, $1)=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }\n'
+
+# Code to be used in simple link tests
+lt_simple_link_test_code="$lt_simple_compile_test_code"
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+_LT_AC_SYS_COMPILER
+
+# save warnings/boilerplate of simple test code
+_LT_COMPILER_BOILERPLATE
+_LT_LINKER_BOILERPLATE
+
+# Allow CC to be a program name with arguments.
+lt_save_CC="$CC"
+CC=${RC-"windres"}
+compiler=$CC
+_LT_AC_TAGVAR(compiler, $1)=$CC
+_LT_CC_BASENAME([$compiler])
+_LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)=yes
+
+AC_LIBTOOL_CONFIG($1)
+
+AC_LANG_POP([])
+CC="$lt_save_CC"
+])# AC_LIBTOOL_LANG_RC_CONFIG
+
+
+# AC_LIBTOOL_CONFIG([TAGNAME])
+# ----------------------------
+# If TAGNAME is not passed, then create an initial libtool script
+# with a default configuration from the untagged config vars.  Otherwise
+# add code to config.status for appending the configuration named by
+# TAGNAME from the matching tagged config vars.
+AC_DEFUN([AC_LIBTOOL_CONFIG],
+[# The else clause should only fire when bootstrapping the
+# libtool distribution, otherwise you forgot to ship ltmain.sh
+# with your package, and you will get complaints that there are
+# no rules to generate ltmain.sh.
+if test -f "$ltmain"; then
+  # See if we are running on zsh, and set the options which allow our commands through
+  # without removal of \ escapes.
+  if test -n "${ZSH_VERSION+set}" ; then
+    setopt NO_GLOB_SUBST
+  fi
+  # Now quote all the things that may contain metacharacters while being
+  # careful not to overquote the AC_SUBSTed values.  We take copies of the
+  # variables and quote the copies for generation of the libtool script.
+  for var in echo old_CC old_CFLAGS AR AR_FLAGS EGREP RANLIB LN_S LTCC LTCFLAGS NM \
+    SED SHELL STRIP \
+    libname_spec library_names_spec soname_spec extract_expsyms_cmds \
+    old_striplib striplib file_magic_cmd finish_cmds finish_eval \
+    deplibs_check_method reload_flag reload_cmds need_locks \
+    lt_cv_sys_global_symbol_pipe lt_cv_sys_global_symbol_to_cdecl \
+    lt_cv_sys_global_symbol_to_c_name_address \
+    sys_lib_search_path_spec sys_lib_dlsearch_path_spec \
+    old_postinstall_cmds old_postuninstall_cmds \
+    _LT_AC_TAGVAR(compiler, $1) \
+    _LT_AC_TAGVAR(CC, $1) \
+    _LT_AC_TAGVAR(LD, $1) \
+    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1) \
+    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1) \
+    _LT_AC_TAGVAR(lt_prog_compiler_static, $1) \
+    _LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) \
+    _LT_AC_TAGVAR(export_dynamic_flag_spec, $1) \
+    _LT_AC_TAGVAR(thread_safe_flag_spec, $1) \
+    _LT_AC_TAGVAR(whole_archive_flag_spec, $1) \
+    _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1) \
+    _LT_AC_TAGVAR(old_archive_cmds, $1) \
+    _LT_AC_TAGVAR(old_archive_from_new_cmds, $1) \
+    _LT_AC_TAGVAR(predep_objects, $1) \
+    _LT_AC_TAGVAR(postdep_objects, $1) \
+    _LT_AC_TAGVAR(predeps, $1) \
+    _LT_AC_TAGVAR(postdeps, $1) \
+    _LT_AC_TAGVAR(compiler_lib_search_path, $1) \
+    _LT_AC_TAGVAR(archive_cmds, $1) \
+    _LT_AC_TAGVAR(archive_expsym_cmds, $1) \
+    _LT_AC_TAGVAR(postinstall_cmds, $1) \
+    _LT_AC_TAGVAR(postuninstall_cmds, $1) \
+    _LT_AC_TAGVAR(old_archive_from_expsyms_cmds, $1) \
+    _LT_AC_TAGVAR(allow_undefined_flag, $1) \
+    _LT_AC_TAGVAR(no_undefined_flag, $1) \
+    _LT_AC_TAGVAR(export_symbols_cmds, $1) \
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1) \
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1) \
+    _LT_AC_TAGVAR(hardcode_libdir_separator, $1) \
+    _LT_AC_TAGVAR(hardcode_automatic, $1) \
+    _LT_AC_TAGVAR(module_cmds, $1) \
+    _LT_AC_TAGVAR(module_expsym_cmds, $1) \
+    _LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1) \
+    _LT_AC_TAGVAR(exclude_expsyms, $1) \
+    _LT_AC_TAGVAR(include_expsyms, $1); do
+
+    case $var in
+    _LT_AC_TAGVAR(old_archive_cmds, $1) | \
+    _LT_AC_TAGVAR(old_archive_from_new_cmds, $1) | \
+    _LT_AC_TAGVAR(archive_cmds, $1) | \
+    _LT_AC_TAGVAR(archive_expsym_cmds, $1) | \
+    _LT_AC_TAGVAR(module_cmds, $1) | \
+    _LT_AC_TAGVAR(module_expsym_cmds, $1) | \
+    _LT_AC_TAGVAR(old_archive_from_expsyms_cmds, $1) | \
+    _LT_AC_TAGVAR(export_symbols_cmds, $1) | \
+    extract_expsyms_cmds | reload_cmds | finish_cmds | \
+    postinstall_cmds | postuninstall_cmds | \
+    old_postinstall_cmds | old_postuninstall_cmds | \
+    sys_lib_search_path_spec | sys_lib_dlsearch_path_spec)
+      # Double-quote double-evaled strings.
+      eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\" -e \"\$delay_variable_subst\"\`\\\""
+      ;;
+    *)
+      eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`\\\""
+      ;;
+    esac
+  done
+
+  case $lt_echo in
+  *'\[$]0 --fallback-echo"')
+    lt_echo=`$echo "X$lt_echo" | $Xsed -e 's/\\\\\\\[$]0 --fallback-echo"[$]/[$]0 --fallback-echo"/'`
+    ;;
+  esac
+
+ifelse([$1], [],
+  [cfgfile="${ofile}T"
+  trap "$rm \"$cfgfile\"; exit 1" 1 2 15
+  $rm -f "$cfgfile"
+  AC_MSG_NOTICE([creating $ofile])],
+  [cfgfile="$ofile"])
+
+  cat <<__EOF__ >> "$cfgfile"
+ifelse([$1], [],
+[#! $SHELL
+
+# `$echo "$cfgfile" | sed 's%^.*/%%'` - Provide generalized library-building support services.
+# Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP)
+# NOTE: Changes made to this file will be lost: look at ltmain.sh.
+#
+# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001
+# Free Software Foundation, Inc.
+#
+# This file is part of GNU Libtool:
+# Originally by Gordon Matzigkeit <gord@gnu.ai.mit.edu>, 1996
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# A sed program that does not truncate output.
+SED=$lt_SED
+
+# Sed that helps us avoid accidentally triggering echo(1) options like -n.
+Xsed="$SED -e 1s/^X//"
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+# The names of the tagged configurations supported by this script.
+available_tags=
+
+# ### BEGIN LIBTOOL CONFIG],
+[# ### BEGIN LIBTOOL TAG CONFIG: $tagname])
+
+# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
+
+# Shell to use when invoking shell scripts.
+SHELL=$lt_SHELL
+
+# Whether or not to build shared libraries.
+build_libtool_libs=$enable_shared
+
+# Whether or not to build static libraries.
+build_old_libs=$enable_static
+
+# Whether or not to add -lc for building shared libraries.
+build_libtool_need_lc=$_LT_AC_TAGVAR(archive_cmds_need_lc, $1)
+
+# Whether or not to disallow shared libs when runtime libs are static
+allow_libtool_libs_with_static_runtimes=$_LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)
+
+# Whether or not to optimize for fast installation.
+fast_install=$enable_fast_install
+
+# The host system.
+host_alias=$host_alias
+host=$host
+host_os=$host_os
+
+# The build system.
+build_alias=$build_alias
+build=$build
+build_os=$build_os
+
+# An echo program that does not interpret backslashes.
+echo=$lt_echo
+
+# The archiver.
+AR=$lt_AR
+AR_FLAGS=$lt_AR_FLAGS
+
+# A C compiler.
+LTCC=$lt_LTCC
+
+# LTCC compiler flags.
+LTCFLAGS=$lt_LTCFLAGS
+
+# A language-specific compiler.
+CC=$lt_[]_LT_AC_TAGVAR(compiler, $1)
+
+# Is the compiler the GNU C compiler?
+with_gcc=$_LT_AC_TAGVAR(GCC, $1)
+
+# An ERE matcher.
+EGREP=$lt_EGREP
+
+# The linker used to build libraries.
+LD=$lt_[]_LT_AC_TAGVAR(LD, $1)
+
+# Whether we need hard or soft links.
+LN_S=$lt_LN_S
+
+# A BSD-compatible nm program.
+NM=$lt_NM
+
+# A symbol stripping program
+STRIP=$lt_STRIP
+
+# Used to examine libraries when file_magic_cmd begins "file"
+MAGIC_CMD=$MAGIC_CMD
+
+# Used on cygwin: DLL creation program.
+DLLTOOL="$DLLTOOL"
+
+# Used on cygwin: object dumper.
+OBJDUMP="$OBJDUMP"
+
+# Used on cygwin: assembler.
+AS="$AS"
+
+# The name of the directory that contains temporary libtool files.
+objdir=$objdir
+
+# How to create reloadable object files.
+reload_flag=$lt_reload_flag
+reload_cmds=$lt_reload_cmds
+
+# How to pass a linker flag through the compiler.
+wl=$lt_[]_LT_AC_TAGVAR(lt_prog_compiler_wl, $1)
+
+# Object file suffix (normally "o").
+objext="$ac_objext"
+
+# Old archive suffix (normally "a").
+libext="$libext"
+
+# Shared library suffix (normally ".so").
+shrext_cmds='$shrext_cmds'
+
+# Executable file suffix (normally "").
+exeext="$exeext"
+
+# Additional compiler flags for building library objects.
+pic_flag=$lt_[]_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)
+pic_mode=$pic_mode
+
+# What is the maximum length of a command?
+max_cmd_len=$lt_cv_sys_max_cmd_len
+
+# Does compiler simultaneously support -c and -o options?
+compiler_c_o=$lt_[]_LT_AC_TAGVAR(lt_cv_prog_compiler_c_o, $1)
+
+# Must we lock files when doing compilation?
+need_locks=$lt_need_locks
+
+# Do we need the lib prefix for modules?
+need_lib_prefix=$need_lib_prefix
+
+# Do we need a version for libraries?
+need_version=$need_version
+
+# Whether dlopen is supported.
+dlopen_support=$enable_dlopen
+
+# Whether dlopen of programs is supported.
+dlopen_self=$enable_dlopen_self
+
+# Whether dlopen of statically linked programs is supported.
+dlopen_self_static=$enable_dlopen_self_static
+
+# Compiler flag to prevent dynamic linking.
+link_static_flag=$lt_[]_LT_AC_TAGVAR(lt_prog_compiler_static, $1)
+
+# Compiler flag to turn off builtin functions.
+no_builtin_flag=$lt_[]_LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)
+
+# Compiler flag to allow reflexive dlopens.
+export_dynamic_flag_spec=$lt_[]_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)
+
+# Compiler flag to generate shared objects directly from archives.
+whole_archive_flag_spec=$lt_[]_LT_AC_TAGVAR(whole_archive_flag_spec, $1)
+
+# Compiler flag to generate thread-safe objects.
+thread_safe_flag_spec=$lt_[]_LT_AC_TAGVAR(thread_safe_flag_spec, $1)
+
+# Library versioning type.
+version_type=$version_type
+
+# Format of library name prefix.
+libname_spec=$lt_libname_spec
+
+# List of archive names.  First name is the real one, the rest are links.
+# The last name is the one that the linker finds with -lNAME.
+library_names_spec=$lt_library_names_spec
+
+# The coded name of the library, if different from the real name.
+soname_spec=$lt_soname_spec
+
+# Commands used to build and install an old-style archive.
+RANLIB=$lt_RANLIB
+old_archive_cmds=$lt_[]_LT_AC_TAGVAR(old_archive_cmds, $1)
+old_postinstall_cmds=$lt_old_postinstall_cmds
+old_postuninstall_cmds=$lt_old_postuninstall_cmds
+
+# Create an old-style archive from a shared archive.
+old_archive_from_new_cmds=$lt_[]_LT_AC_TAGVAR(old_archive_from_new_cmds, $1)
+
+# Create a temporary old-style archive to link instead of a shared archive.
+old_archive_from_expsyms_cmds=$lt_[]_LT_AC_TAGVAR(old_archive_from_expsyms_cmds, $1)
+
+# Commands used to build and install a shared archive.
+archive_cmds=$lt_[]_LT_AC_TAGVAR(archive_cmds, $1)
+archive_expsym_cmds=$lt_[]_LT_AC_TAGVAR(archive_expsym_cmds, $1)
+postinstall_cmds=$lt_postinstall_cmds
+postuninstall_cmds=$lt_postuninstall_cmds
+
+# Commands used to build a loadable module (assumed same as above if empty)
+module_cmds=$lt_[]_LT_AC_TAGVAR(module_cmds, $1)
+module_expsym_cmds=$lt_[]_LT_AC_TAGVAR(module_expsym_cmds, $1)
+
+# Commands to strip libraries.
+old_striplib=$lt_old_striplib
+striplib=$lt_striplib
+
+# Dependencies to place before the objects being linked to create a
+# shared library.
+predep_objects=$lt_[]_LT_AC_TAGVAR(predep_objects, $1)
+
+# Dependencies to place after the objects being linked to create a
+# shared library.
+postdep_objects=$lt_[]_LT_AC_TAGVAR(postdep_objects, $1)
+
+# Dependencies to place before the objects being linked to create a
+# shared library.
+predeps=$lt_[]_LT_AC_TAGVAR(predeps, $1)
+
+# Dependencies to place after the objects being linked to create a
+# shared library.
+postdeps=$lt_[]_LT_AC_TAGVAR(postdeps, $1)
+
+# The library search path used internally by the compiler when linking
+# a shared library.
+compiler_lib_search_path=$lt_[]_LT_AC_TAGVAR(compiler_lib_search_path, $1)
+
+# Method to check whether dependent libraries are shared objects.
+deplibs_check_method=$lt_deplibs_check_method
+
+# Command to use when deplibs_check_method == file_magic.
+file_magic_cmd=$lt_file_magic_cmd
+
+# Flag that allows shared libraries with undefined symbols to be built.
+allow_undefined_flag=$lt_[]_LT_AC_TAGVAR(allow_undefined_flag, $1)
+
+# Flag that forces no undefined symbols.
+no_undefined_flag=$lt_[]_LT_AC_TAGVAR(no_undefined_flag, $1)
+
+# Commands used to finish a libtool library installation in a directory.
+finish_cmds=$lt_finish_cmds
+
+# Same as above, but a single script fragment to be evaled but not shown.
+finish_eval=$lt_finish_eval
+
+# Take the output of nm and produce a listing of raw symbols and C names.
+global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe
+
+# Transform the output of nm in a proper C declaration
+global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl
+
+# Transform the output of nm in a C name address pair
+global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address
+
+# This is the shared library runtime path variable.
+runpath_var=$runpath_var
+
+# This is the shared library path variable.
+shlibpath_var=$shlibpath_var
+
+# Is shlibpath searched before the hard-coded library search path?
+shlibpath_overrides_runpath=$shlibpath_overrides_runpath
+
+# How to hardcode a shared library path into an executable.
+hardcode_action=$_LT_AC_TAGVAR(hardcode_action, $1)
+
+# Whether we should hardcode library paths into libraries.
+hardcode_into_libs=$hardcode_into_libs
+
+# Flag to hardcode \$libdir into a binary during linking.
+# This must work even if \$libdir does not exist.
+hardcode_libdir_flag_spec=$lt_[]_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)
+
+# If ld is used when linking, flag to hardcode \$libdir into
+# a binary during linking. This must work even if \$libdir does
+# not exist.
+hardcode_libdir_flag_spec_ld=$lt_[]_LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)
+
+# Whether we need a single -rpath flag with a separated argument.
+hardcode_libdir_separator=$lt_[]_LT_AC_TAGVAR(hardcode_libdir_separator, $1)
+
+# Set to yes if using DIR/libNAME${shared_ext} during linking hardcodes DIR into the
+# resulting binary.
+hardcode_direct=$_LT_AC_TAGVAR(hardcode_direct, $1)
+
+# Set to yes if using the -LDIR flag during linking hardcodes DIR into the
+# resulting binary.
+hardcode_minus_L=$_LT_AC_TAGVAR(hardcode_minus_L, $1)
+
+# Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into
+# the resulting binary.
+hardcode_shlibpath_var=$_LT_AC_TAGVAR(hardcode_shlibpath_var, $1)
+
+# Set to yes if building a shared library automatically hardcodes DIR into the library
+# and all subsequent libraries and executables linked against it.
+hardcode_automatic=$_LT_AC_TAGVAR(hardcode_automatic, $1)
+
+# Variables whose values should be saved in libtool wrapper scripts and
+# restored at relink time.
+variables_saved_for_relink="$variables_saved_for_relink"
+
+# Whether libtool must link a program against all its dependency libraries.
+link_all_deplibs=$_LT_AC_TAGVAR(link_all_deplibs, $1)
+
+# Compile-time system search path for libraries
+sys_lib_search_path_spec=$lt_sys_lib_search_path_spec
+
+# Run-time system search path for libraries
+sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec
+
+# Fix the shell variable \$srcfile for the compiler.
+fix_srcfile_path="$_LT_AC_TAGVAR(fix_srcfile_path, $1)"
+
+# Set to yes if exported symbols are required.
+always_export_symbols=$_LT_AC_TAGVAR(always_export_symbols, $1)
+
+# The commands to list exported symbols.
+export_symbols_cmds=$lt_[]_LT_AC_TAGVAR(export_symbols_cmds, $1)
+
+# The commands to extract the exported symbol list from a shared archive.
+extract_expsyms_cmds=$lt_extract_expsyms_cmds
+
+# Symbols that should not be listed in the preloaded symbols.
+exclude_expsyms=$lt_[]_LT_AC_TAGVAR(exclude_expsyms, $1)
+
+# Symbols that must always be exported.
+include_expsyms=$lt_[]_LT_AC_TAGVAR(include_expsyms, $1)
+
+ifelse([$1],[],
+[# ### END LIBTOOL CONFIG],
+[# ### END LIBTOOL TAG CONFIG: $tagname])
+
+__EOF__
+
+ifelse([$1],[], [
+  case $host_os in
+  aix3*)
+    cat <<\EOF >> "$cfgfile"
+
+# AIX sometimes has problems with the GCC collect2 program.  For some
+# reason, if we set the COLLECT_NAMES environment variable, the problems
+# vanish in a puff of smoke.
+if test "X${COLLECT_NAMES+set}" != Xset; then
+  COLLECT_NAMES=
+  export COLLECT_NAMES
+fi
+EOF
+    ;;
+  esac
+
+  # We use sed instead of cat because bash on DJGPP gets confused if
+  # if finds mixed CR/LF and LF-only lines.  Since sed operates in
+  # text mode, it properly converts lines to CR/LF.  This bash problem
+  # is reportedly fixed, but why not run on old versions too?
+  sed '$q' "$ltmain" >> "$cfgfile" || (rm -f "$cfgfile"; exit 1)
+
+  mv -f "$cfgfile" "$ofile" || \
+    (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile")
+  chmod +x "$ofile"
+])
+else
+  # If there is no Makefile yet, we rely on a make rule to execute
+  # `config.status --recheck' to rerun these tests and create the
+  # libtool script then.
+  ltmain_in=`echo $ltmain | sed -e 's/\.sh$/.in/'`
+  if test -f "$ltmain_in"; then
+    test -f Makefile && make "$ltmain"
+  fi
+fi
+])# AC_LIBTOOL_CONFIG
+
+
+# AC_LIBTOOL_PROG_COMPILER_NO_RTTI([TAGNAME])
+# -------------------------------------------
+AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_NO_RTTI],
+[AC_REQUIRE([_LT_AC_SYS_COMPILER])dnl
+
+_LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=
+
+if test "$GCC" = yes; then
+  _LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)=' -fno-builtin'
+
+  AC_LIBTOOL_COMPILER_OPTION([if $compiler supports -fno-rtti -fno-exceptions],
+    lt_cv_prog_compiler_rtti_exceptions,
+    [-fno-rtti -fno-exceptions], [],
+    [_LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)="$_LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1) -fno-rtti -fno-exceptions"])
+fi
+])# AC_LIBTOOL_PROG_COMPILER_NO_RTTI
+
+
+# AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE
+# ---------------------------------
+AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE],
+[AC_REQUIRE([AC_CANONICAL_HOST])
+AC_REQUIRE([AC_PROG_NM])
+AC_REQUIRE([AC_OBJEXT])
+# Check for command to grab the raw symbol name followed by C symbol from nm.
+AC_MSG_CHECKING([command to parse $NM output from $compiler object])
+AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe],
+[
+# These are sane defaults that work on at least a few old systems.
+# [They come from Ultrix.  What could be older than Ultrix?!! ;)]
+
+# Character class describing NM global symbol codes.
+symcode='[[BCDEGRST]]'
+
+# Regexp to match symbols that can be accessed directly from C.
+sympat='\([[_A-Za-z]][[_A-Za-z0-9]]*\)'
+
+# Transform an extracted symbol line into a proper C declaration
+lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern int \1;/p'"
+
+# Transform an extracted symbol line into symbol name and symbol address
+lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode \([[^ ]]*\) \([[^ ]]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
+
+# Define system-specific variables.
+case $host_os in
+aix*)
+  symcode='[[BCDT]]'
+  ;;
+cygwin* | mingw* | pw32*)
+  symcode='[[ABCDGISTW]]'
+  ;;
+hpux*) # Its linker distinguishes data from code symbols
+  if test "$host_cpu" = ia64; then
+    symcode='[[ABCDEGRST]]'
+  fi
+  lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
+  lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
+  ;;
+linux*)
+  if test "$host_cpu" = ia64; then
+    symcode='[[ABCDGIRSTW]]'
+    lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
+    lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([[^ ]]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([[^ ]]*\) \([[^ ]]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
+  fi
+  ;;
+irix* | nonstopux*)
+  symcode='[[BCDEGRST]]'
+  ;;
+osf*)
+  symcode='[[BCDEGQRST]]'
+  ;;
+solaris*)
+  symcode='[[BDRT]]'
+  ;;
+sco3.2v5*)
+  symcode='[[DT]]'
+  ;;
+sysv4.2uw2*)
+  symcode='[[DT]]'
+  ;;
+sysv5* | sco5v6* | unixware* | OpenUNIX*)
+  symcode='[[ABDT]]'
+  ;;
+sysv4)
+  symcode='[[DFNSTU]]'
+  ;;
+esac
+
+# Handle CRLF in mingw tool chain
+opt_cr=
+case $build_os in
+mingw*)
+  opt_cr=`echo 'x\{0,1\}' | tr x '\015'` # option cr in regexp
+  ;;
+esac
+
+# If we're using GNU nm, then use its standard symbol codes.
+case `$NM -V 2>&1` in
+*GNU* | *'with BFD'*)
+  symcode='[[ABCDGIRSTW]]' ;;
+esac
+
+# Try without a prefix undercore, then with it.
+for ac_symprfx in "" "_"; do
+
+  # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol.
+  symxfrm="\\1 $ac_symprfx\\2 \\2"
+
+  # Write the raw and C identifiers.
+  lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[[ 	]]\($symcode$symcode*\)[[ 	]][[ 	]]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'"
+
+  # Check to see that the pipe works correctly.
+  pipe_works=no
+
+  rm -f conftest*
+  cat > conftest.$ac_ext <<EOF
+#ifdef __cplusplus
+extern "C" {
+#endif
+char nm_test_var;
+void nm_test_func(){}
+#ifdef __cplusplus
+}
+#endif
+int main(){nm_test_var='a';nm_test_func();return(0);}
+EOF
+
+  if AC_TRY_EVAL(ac_compile); then
+    # Now try to grab the symbols.
+    nlist=conftest.nm
+    if AC_TRY_EVAL(NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $nlist) && test -s "$nlist"; then
+      # Try sorting and uniquifying the output.
+      if sort "$nlist" | uniq > "$nlist"T; then
+	mv -f "$nlist"T "$nlist"
+      else
+	rm -f "$nlist"T
+      fi
+
+      # Make sure that we snagged all the symbols we need.
+      if grep ' nm_test_var$' "$nlist" >/dev/null; then
+	if grep ' nm_test_func$' "$nlist" >/dev/null; then
+	  cat <<EOF > conftest.$ac_ext
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+EOF
+	  # Now generate the symbol file.
+	  eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | grep -v main >> conftest.$ac_ext'
+
+	  cat <<EOF >> conftest.$ac_ext
+#if defined (__STDC__) && __STDC__
+# define lt_ptr_t void *
+#else
+# define lt_ptr_t char *
+# define const
+#endif
+
+/* The mapping between symbol names and symbols. */
+const struct {
+  const char *name;
+  lt_ptr_t address;
+}
+lt_preloaded_symbols[[]] =
+{
+EOF
+	  $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/  {\"\2\", (lt_ptr_t) \&\2},/" < "$nlist" | grep -v main >> conftest.$ac_ext
+	  cat <<\EOF >> conftest.$ac_ext
+  {0, (lt_ptr_t) 0}
+};
+
+#ifdef __cplusplus
+}
+#endif
+EOF
+	  # Now try linking the two files.
+	  mv conftest.$ac_objext conftstm.$ac_objext
+	  lt_save_LIBS="$LIBS"
+	  lt_save_CFLAGS="$CFLAGS"
+	  LIBS="conftstm.$ac_objext"
+	  CFLAGS="$CFLAGS$_LT_AC_TAGVAR(lt_prog_compiler_no_builtin_flag, $1)"
+	  if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext}; then
+	    pipe_works=yes
+	  fi
+	  LIBS="$lt_save_LIBS"
+	  CFLAGS="$lt_save_CFLAGS"
+	else
+	  echo "cannot find nm_test_func in $nlist" >&AS_MESSAGE_LOG_FD
+	fi
+      else
+	echo "cannot find nm_test_var in $nlist" >&AS_MESSAGE_LOG_FD
+      fi
+    else
+      echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD
+    fi
+  else
+    echo "$progname: failed program was:" >&AS_MESSAGE_LOG_FD
+    cat conftest.$ac_ext >&5
+  fi
+  rm -f conftest* conftst*
+
+  # Do not use the global_symbol_pipe unless it works.
+  if test "$pipe_works" = yes; then
+    break
+  else
+    lt_cv_sys_global_symbol_pipe=
+  fi
+done
+])
+if test -z "$lt_cv_sys_global_symbol_pipe"; then
+  lt_cv_sys_global_symbol_to_cdecl=
+fi
+if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then
+  AC_MSG_RESULT(failed)
+else
+  AC_MSG_RESULT(ok)
+fi
+]) # AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE
+
+
+# AC_LIBTOOL_PROG_COMPILER_PIC([TAGNAME])
+# ---------------------------------------
+AC_DEFUN([AC_LIBTOOL_PROG_COMPILER_PIC],
+[_LT_AC_TAGVAR(lt_prog_compiler_wl, $1)=
+_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=
+_LT_AC_TAGVAR(lt_prog_compiler_static, $1)=
+
+AC_MSG_CHECKING([for $compiler option to produce PIC])
+ ifelse([$1],[CXX],[
+  # C++ specific cases for pic, static, wl, etc.
+  if test "$GXX" = yes; then
+    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-static'
+
+    case $host_os in
+    aix*)
+      # All AIX code is PIC.
+      if test "$host_cpu" = ia64; then
+	# AIX 5 now supports IA64 processor
+	_LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      fi
+      ;;
+    amigaos*)
+      # FIXME: we need at least 68020 code to build shared libraries, but
+      # adding the `-m68020' flag to GCC prevents building anything better,
+      # like `-m68040'.
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4'
+      ;;
+    beos* | cygwin* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+      # PIC is the default for these OSes.
+      ;;
+    mingw* | os2* | pw32*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'
+      ;;
+    darwin* | rhapsody*)
+      # PIC is the default on this platform
+      # Common symbols not allowed in MH_DYLIB files
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common'
+      ;;
+    *djgpp*)
+      # DJGPP does not support shared libraries at all
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=
+      ;;
+    interix3*)
+      # Interix 3.x gcc -fpic/-fPIC options generate broken code.
+      # Instead, we relocate shared libraries at runtime.
+      ;;
+    sysv4*MP*)
+      if test -d /usr/nec; then
+	_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic
+      fi
+      ;;
+    hpux*)
+      # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but
+      # not for PA HP-UX.
+      case $host_cpu in
+      hppa*64*|ia64*)
+	;;
+      *)
+	_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+	;;
+      esac
+      ;;
+    *)
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+      ;;
+    esac
+  else
+    case $host_os in
+      aix4* | aix5*)
+	# All AIX code is PIC.
+	if test "$host_cpu" = ia64; then
+	  # AIX 5 now supports IA64 processor
+	  _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	else
+	  _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp'
+	fi
+	;;
+      chorus*)
+	case $cc_basename in
+	cxch68*)
+	  # Green Hills C++ Compiler
+	  # _LT_AC_TAGVAR(lt_prog_compiler_static, $1)="--no_auto_instantiation -u __main -u __premain -u _abort -r $COOL_DIR/lib/libOrb.a $MVME_DIR/lib/CC/libC.a $MVME_DIR/lib/classix/libcx.s.a"
+	  ;;
+	esac
+	;;
+       darwin*)
+         # PIC is the default on this platform
+         # Common symbols not allowed in MH_DYLIB files
+         case $cc_basename in
+           xlc*)
+           _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-qnocommon'
+           _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+           ;;
+         esac
+       ;;
+      dgux*)
+	case $cc_basename in
+	  ec++*)
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	    ;;
+	  ghcx*)
+	    # Green Hills C++ Compiler
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      freebsd* | kfreebsd*-gnu | dragonfly*)
+	# FreeBSD uses GNU C++
+	;;
+      hpux9* | hpux10* | hpux11*)
+	case $cc_basename in
+	  CC*)
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
+	    if test "$host_cpu" != ia64; then
+	      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
+	    fi
+	    ;;
+	  aCC*)
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
+	    case $host_cpu in
+	    hppa*64*|ia64*)
+	      # +Z the default
+	      ;;
+	    *)
+	      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
+	      ;;
+	    esac
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      interix*)
+	# This is c89, which is MS Visual C++ (no shared libs)
+	# Anyone wants to do a port?
+	;;
+      irix5* | irix6* | nonstopux*)
+	case $cc_basename in
+	  CC*)
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+	    # CC pic flag -KPIC is the default.
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      linux*)
+	case $cc_basename in
+	  KCC*)
+	    # KAI C++ Compiler
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,'
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+	    ;;
+	  icpc* | ecpc*)
+	    # Intel C++
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-static'
+	    ;;
+	  pgCC*)
+	    # Portland Group C++ compiler.
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	    ;;
+	  cxx*)
+	    # Compaq C++
+	    # Make sure the PIC flag is empty.  It appears that all Alpha
+	    # Linux and Compaq Tru64 Unix objects are PIC.
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      lynxos*)
+	;;
+      m88k*)
+	;;
+      mvs*)
+	case $cc_basename in
+	  cxx*)
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-W c,exportall'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      netbsd*)
+	;;
+      osf3* | osf4* | osf5*)
+	case $cc_basename in
+	  KCC*)
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='--backend -Wl,'
+	    ;;
+	  RCC*)
+	    # Rational C++ 2.4.1
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+	    ;;
+	  cxx*)
+	    # Digital/Compaq C++
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    # Make sure the PIC flag is empty.  It appears that all Alpha
+	    # Linux and Compaq Tru64 Unix objects are PIC.
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      psos*)
+	;;
+      solaris*)
+	case $cc_basename in
+	  CC*)
+	    # Sun C++ 4.2, 5.x and Centerline C++
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
+	    ;;
+	  gcx*)
+	    # Green Hills C++ Compiler
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-PIC'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      sunos4*)
+	case $cc_basename in
+	  CC*)
+	    # Sun C++ 4.x
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	    ;;
+	  lcc*)
+	    # Lucid
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      tandem*)
+	case $cc_basename in
+	  NCC*)
+	    # NonStop-UX NCC 3.20
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	    ;;
+	  *)
+	    ;;
+	esac
+	;;
+      sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
+	case $cc_basename in
+	  CC*)
+	    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+	    ;;
+	esac
+	;;
+      vxworks*)
+	;;
+      *)
+	_LT_AC_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
+	;;
+    esac
+  fi
+],
+[
+  if test "$GCC" = yes; then
+    _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+    _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-static'
+
+    case $host_os in
+      aix*)
+      # All AIX code is PIC.
+      if test "$host_cpu" = ia64; then
+	# AIX 5 now supports IA64 processor
+	_LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      fi
+      ;;
+
+    amigaos*)
+      # FIXME: we need at least 68020 code to build shared libraries, but
+      # adding the `-m68020' flag to GCC prevents building anything better,
+      # like `-m68040'.
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-m68020 -resident32 -malways-restore-a4'
+      ;;
+
+    beos* | cygwin* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+      # PIC is the default for these OSes.
+      ;;
+
+    mingw* | pw32* | os2*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'
+      ;;
+
+    darwin* | rhapsody*)
+      # PIC is the default on this platform
+      # Common symbols not allowed in MH_DYLIB files
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fno-common'
+      ;;
+
+    interix3*)
+      # Interix 3.x gcc -fpic/-fPIC options generate broken code.
+      # Instead, we relocate shared libraries at runtime.
+      ;;
+
+    msdosdjgpp*)
+      # Just because we use GCC doesn't mean we suddenly get shared libraries
+      # on systems that don't support them.
+      _LT_AC_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
+      enable_shared=no
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec; then
+	_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=-Kconform_pic
+      fi
+      ;;
+
+    hpux*)
+      # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but
+      # not for PA HP-UX.
+      case $host_cpu in
+      hppa*64*|ia64*)
+	# +Z the default
+	;;
+      *)
+	_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+	;;
+      esac
+      ;;
+
+    *)
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fPIC'
+      ;;
+    esac
+  else
+    # PORTME Check for flag to pass linker flags through the system compiler.
+    case $host_os in
+    aix*)
+      _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      if test "$host_cpu" = ia64; then
+	# AIX 5 now supports IA64 processor
+	_LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      else
+	_LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-bnso -bI:/lib/syscalls.exp'
+      fi
+      ;;
+      darwin*)
+        # PIC is the default on this platform
+        # Common symbols not allowed in MH_DYLIB files
+       case $cc_basename in
+         xlc*)
+         _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-qnocommon'
+         _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+         ;;
+       esac
+       ;;
+
+    mingw* | pw32* | os2*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-DDLL_EXPORT'
+      ;;
+
+    hpux9* | hpux10* | hpux11*)
+      _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but
+      # not for PA HP-UX.
+      case $host_cpu in
+      hppa*64*|ia64*)
+	# +Z the default
+	;;
+      *)
+	_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='+Z'
+	;;
+      esac
+      # Is there a better lt_prog_compiler_static that works with the bundled CC?
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='${wl}-a ${wl}archive'
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      # PIC (with -KPIC) is the default.
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+      ;;
+
+    newsos6)
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    linux*)
+      case $cc_basename in
+      icc* | ecc*)
+	_LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+	_LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-static'
+        ;;
+      pgcc* | pgf77* | pgf90* | pgf95*)
+        # Portland Group compilers (*not* the Pentium gcc compiler,
+	# which looks to be a dead project)
+	_LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+	_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-fpic'
+	_LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+        ;;
+      ccc*)
+        _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+        # All Alpha code is PIC.
+        _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+        ;;
+      esac
+      ;;
+
+    osf3* | osf4* | osf5*)
+      _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      # All OSF/1 code is PIC.
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-non_shared'
+      ;;
+
+    solaris*)
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      case $cc_basename in
+      f77* | f90* | f95*)
+	_LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld ';;
+      *)
+	_LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,';;
+      esac
+      ;;
+
+    sunos4*)
+      _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Qoption ld '
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-PIC'
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    sysv4 | sysv4.2uw2* | sysv4.3*)
+      _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec ;then
+	_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-Kconform_pic'
+	_LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      fi
+      ;;
+
+    sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
+      _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-KPIC'
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    unicos*)
+      _LT_AC_TAGVAR(lt_prog_compiler_wl, $1)='-Wl,'
+      _LT_AC_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
+      ;;
+
+    uts4*)
+      _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)='-pic'
+      _LT_AC_TAGVAR(lt_prog_compiler_static, $1)='-Bstatic'
+      ;;
+
+    *)
+      _LT_AC_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no
+      ;;
+    esac
+  fi
+])
+AC_MSG_RESULT([$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)])
+
+#
+# Check to make sure the PIC flag actually works.
+#
+if test -n "$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)"; then
+  AC_LIBTOOL_COMPILER_OPTION([if $compiler PIC flag $_LT_AC_TAGVAR(lt_prog_compiler_pic, $1) works],
+    _LT_AC_TAGVAR(lt_prog_compiler_pic_works, $1),
+    [$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)ifelse([$1],[],[ -DPIC],[ifelse([$1],[CXX],[ -DPIC],[])])], [],
+    [case $_LT_AC_TAGVAR(lt_prog_compiler_pic, $1) in
+     "" | " "*) ;;
+     *) _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=" $_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)" ;;
+     esac],
+    [_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=
+     _LT_AC_TAGVAR(lt_prog_compiler_can_build_shared, $1)=no])
+fi
+case $host_os in
+  # For platforms which do not support PIC, -DPIC is meaningless:
+  *djgpp*)
+    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)=
+    ;;
+  *)
+    _LT_AC_TAGVAR(lt_prog_compiler_pic, $1)="$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)ifelse([$1],[],[ -DPIC],[ifelse([$1],[CXX],[ -DPIC],[])])"
+    ;;
+esac
+
+#
+# Check to make sure the static flag actually works.
+#
+wl=$_LT_AC_TAGVAR(lt_prog_compiler_wl, $1) eval lt_tmp_static_flag=\"$_LT_AC_TAGVAR(lt_prog_compiler_static, $1)\"
+AC_LIBTOOL_LINKER_OPTION([if $compiler static flag $lt_tmp_static_flag works],
+  _LT_AC_TAGVAR(lt_prog_compiler_static_works, $1),
+  $lt_tmp_static_flag,
+  [],
+  [_LT_AC_TAGVAR(lt_prog_compiler_static, $1)=])
+])
+
+
+# AC_LIBTOOL_PROG_LD_SHLIBS([TAGNAME])
+# ------------------------------------
+# See if the linker supports building shared libraries.
+AC_DEFUN([AC_LIBTOOL_PROG_LD_SHLIBS],
+[AC_MSG_CHECKING([whether the $compiler linker ($LD) supports shared libraries])
+ifelse([$1],[CXX],[
+  _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+  case $host_os in
+  aix4* | aix5*)
+    # If we're using GNU nm, then we don't want the "-C" option.
+    # -C means demangle to AIX nm, but means don't demangle with GNU nm
+    if $NM -V 2>&1 | grep 'GNU' > /dev/null; then
+      _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\[$]2 == "T") || (\[$]2 == "D") || (\[$]2 == "B")) && ([substr](\[$]3,1,1) != ".")) { print \[$]3 } }'\'' | sort -u > $export_symbols'
+    else
+      _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\[$]2 == "T") || (\[$]2 == "D") || (\[$]2 == "B")) && ([substr](\[$]3,1,1) != ".")) { print \[$]3 } }'\'' | sort -u > $export_symbols'
+    fi
+    ;;
+  pw32*)
+    _LT_AC_TAGVAR(export_symbols_cmds, $1)="$ltdll_cmds"
+  ;;
+  cygwin* | mingw*)
+    _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]] /s/.* \([[^ ]]*\)/\1 DATA/;/^.* __nm__/s/^.* __nm__\([[^ ]]*\) [[^ ]]*/\1 DATA/;/^I /d;/^[[AITW]] /s/.* //'\'' | sort | uniq > $export_symbols'
+  ;;
+  *)
+    _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+  ;;
+  esac
+],[
+  runpath_var=
+  _LT_AC_TAGVAR(allow_undefined_flag, $1)=
+  _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=no
+  _LT_AC_TAGVAR(archive_cmds, $1)=
+  _LT_AC_TAGVAR(archive_expsym_cmds, $1)=
+  _LT_AC_TAGVAR(old_archive_From_new_cmds, $1)=
+  _LT_AC_TAGVAR(old_archive_from_expsyms_cmds, $1)=
+  _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)=
+  _LT_AC_TAGVAR(whole_archive_flag_spec, $1)=
+  _LT_AC_TAGVAR(thread_safe_flag_spec, $1)=
+  _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)=
+  _LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)=
+  _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=
+  _LT_AC_TAGVAR(hardcode_direct, $1)=no
+  _LT_AC_TAGVAR(hardcode_minus_L, $1)=no
+  _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
+  _LT_AC_TAGVAR(link_all_deplibs, $1)=unknown
+  _LT_AC_TAGVAR(hardcode_automatic, $1)=no
+  _LT_AC_TAGVAR(module_cmds, $1)=
+  _LT_AC_TAGVAR(module_expsym_cmds, $1)=
+  _LT_AC_TAGVAR(always_export_symbols, $1)=no
+  _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED '\''s/.* //'\'' | sort | uniq > $export_symbols'
+  # include_expsyms should be a list of space-separated symbols to be *always*
+  # included in the symbol list
+  _LT_AC_TAGVAR(include_expsyms, $1)=
+  # exclude_expsyms can be an extended regexp of symbols to exclude
+  # it will be wrapped by ` (' and `)$', so one must not match beginning or
+  # end of line.  Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc',
+  # as well as any symbol that contains `d'.
+  _LT_AC_TAGVAR(exclude_expsyms, $1)="_GLOBAL_OFFSET_TABLE_"
+  # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out
+  # platforms (ab)use it in PIC code, but their linkers get confused if
+  # the symbol is explicitly referenced.  Since portable code cannot
+  # rely on this symbol name, it's probably fine to never include it in
+  # preloaded symbol tables.
+  extract_expsyms_cmds=
+  # Just being paranoid about ensuring that cc_basename is set.
+  _LT_CC_BASENAME([$compiler])
+  case $host_os in
+  cygwin* | mingw* | pw32*)
+    # FIXME: the MSVC++ port hasn't been tested in a loooong time
+    # When not using gcc, we currently assume that we are using
+    # Microsoft Visual C++.
+    if test "$GCC" != yes; then
+      with_gnu_ld=no
+    fi
+    ;;
+  interix*)
+    # we just hope/assume this is gcc and not c89 (= MSVC++)
+    with_gnu_ld=yes
+    ;;
+  openbsd*)
+    with_gnu_ld=no
+    ;;
+  esac
+
+  _LT_AC_TAGVAR(ld_shlibs, $1)=yes
+  if test "$with_gnu_ld" = yes; then
+    # If archive_cmds runs LD, not CC, wlarc should be empty
+    wlarc='${wl}'
+
+    # Set some defaults for GNU ld with shared library support. These
+    # are reset later if shared libraries are not supported. Putting them
+    # here allows them to be overridden if necessary.
+    runpath_var=LD_RUN_PATH
+    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}--rpath ${wl}$libdir'
+    _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}--export-dynamic'
+    # ancient GNU ld didn't support --whole-archive et. al.
+    if $LD --help 2>&1 | grep 'no-whole-archive' > /dev/null; then
+	_LT_AC_TAGVAR(whole_archive_flag_spec, $1)="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive'
+      else
+  	_LT_AC_TAGVAR(whole_archive_flag_spec, $1)=
+    fi
+    supports_anon_versioning=no
+    case `$LD -v 2>/dev/null` in
+      *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.10.*) ;; # catch versions < 2.11
+      *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ...
+      *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ...
+      *\ 2.11.*) ;; # other 2.11 versions
+      *) supports_anon_versioning=yes ;;
+    esac
+
+    # See if GNU ld supports shared libraries.
+    case $host_os in
+    aix3* | aix4* | aix5*)
+      # On AIX/PPC, the GNU linker is very broken
+      if test "$host_cpu" != ia64; then
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	cat <<EOF 1>&2
+
+*** Warning: the GNU linker, at least up to release 2.9.1, is reported
+*** to be unable to reliably create shared libraries on AIX.
+*** Therefore, libtool is disabling shared libraries support.  If you
+*** really care for shared libraries, you may want to modify your PATH
+*** so that a non-GNU linker is found, and then restart.
+
+EOF
+      fi
+      ;;
+
+    amigaos*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+
+      # Samuel A. Falvo II <kc5tja@dolphin.openprojects.net> reports
+      # that the semantics of dynamic libraries on AmigaOS, at least up
+      # to version 4, is to share data among multiple programs linked
+      # with the same dynamic library.  Since this doesn't match the
+      # behavior of shared libraries on other platforms, we can't use
+      # them.
+      _LT_AC_TAGVAR(ld_shlibs, $1)=no
+      ;;
+
+    beos*)
+      if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported
+	# Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+	# support --undefined.  This deserves some investigation.  FIXME
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+      else
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    cygwin* | mingw* | pw32*)
+      # _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1) is actually meaningless,
+      # as there is no search path for DLLs.
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported
+      _LT_AC_TAGVAR(always_export_symbols, $1)=no
+      _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+      _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM $libobjs $convenience | $global_symbol_pipe | $SED -e '\''/^[[BCDGRS]] /s/.* \([[^ ]]*\)/\1 DATA/'\'' | $SED -e '\''/^[[AITW]] /s/.* //'\'' | sort | uniq > $export_symbols'
+
+      if $LD --help 2>&1 | grep 'auto-import' > /dev/null; then
+        _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+	# If the export-symbols file already is a .def file (1st line
+	# is EXPORTS), use it as is; otherwise, prepend...
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='if test "x`$SED 1q $export_symbols`" = xEXPORTS; then
+	  cp $export_symbols $output_objdir/$soname.def;
+	else
+	  echo EXPORTS > $output_objdir/$soname.def;
+	  cat $export_symbols >> $output_objdir/$soname.def;
+	fi~
+	$CC -shared $output_objdir/$soname.def $libobjs $deplibs $compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib -Xlinker $lib'
+      else
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    interix3*)
+      _LT_AC_TAGVAR(hardcode_direct, $1)=no
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+      _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+      # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
+      # Instead, shared libraries are loaded at an image base (0x10000000 by
+      # default) and relocated if they conflict, which is a slow very memory
+      # consuming and fragmenting process.  To avoid this, we pick a random,
+      # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
+      # time.  Moving up from 0x10000000 also allows more sbrk(2) space.
+      _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+      _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed "s,^,_," $export_symbols >$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs $compiler_flags ${wl}-h,$soname ${wl}--retain-symbols-file,$output_objdir/$soname.expsym ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+      ;;
+
+    linux*)
+      if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+	tmp_addflag=
+	case $cc_basename,$host_cpu in
+	pgcc*)				# Portland Group C compiler
+	  _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive'
+	  tmp_addflag=' $pic_flag'
+	  ;;
+	pgf77* | pgf90* | pgf95*)	# Portland Group f77 and f90 compilers
+	  _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}--whole-archive`for conv in $convenience\"\"; do test  -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}--no-whole-archive'
+	  tmp_addflag=' $pic_flag -Mnomain' ;;
+	ecc*,ia64* | icc*,ia64*)		# Intel C compiler on ia64
+	  tmp_addflag=' -i_dynamic' ;;
+	efc*,ia64* | ifort*,ia64*)	# Intel Fortran compiler on ia64
+	  tmp_addflag=' -i_dynamic -nofor_main' ;;
+	ifc* | ifort*)			# Intel Fortran compiler
+	  tmp_addflag=' -nofor_main' ;;
+	esac
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared'"$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+
+	if test $supports_anon_versioning = yes; then
+	  _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $output_objdir/$libname.ver~
+  cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $output_objdir/$libname.ver~
+  $echo "local: *; };" >> $output_objdir/$libname.ver~
+	  $CC -shared'"$tmp_addflag"' $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-version-script ${wl}$output_objdir/$libname.ver -o $lib'
+	fi
+      else
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    netbsd*)
+      if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
+	wlarc=
+      else
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      fi
+      ;;
+
+    solaris*)
+      if $LD -v 2>&1 | grep 'BFD 2\.8' > /dev/null; then
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	cat <<EOF 1>&2
+
+*** Warning: The releases 2.8.* of the GNU linker cannot reliably
+*** create shared libraries on Solaris systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.9.1 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+EOF
+      elif $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      else
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+
+    sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*)
+      case `$LD -v 2>&1` in
+        *\ [[01]].* | *\ 2.[[0-9]].* | *\ 2.1[[0-5]].*) 
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+	cat <<_LT_EOF 1>&2
+
+*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not
+*** reliably create shared libraries on SCO systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.16.91.0.3 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+_LT_EOF
+	;;
+	*)
+	  if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+	    _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='`test -z "$SCOABSPATH" && echo ${wl}-rpath,$libdir`'
+	    _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib'
+	    _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname,\${SCOABSPATH:+${install_libdir}/}$soname,-retain-symbols-file,$export_symbols -o $lib'
+	  else
+	    _LT_AC_TAGVAR(ld_shlibs, $1)=no
+	  fi
+	;;
+      esac
+      ;;
+
+    sunos4*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+      wlarc=
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    *)
+      if $LD --help 2>&1 | grep ': supported targets:.* elf' > /dev/null; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+      else
+	_LT_AC_TAGVAR(ld_shlibs, $1)=no
+      fi
+      ;;
+    esac
+
+    if test "$_LT_AC_TAGVAR(ld_shlibs, $1)" = no; then
+      runpath_var=
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)=
+      _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)=
+      _LT_AC_TAGVAR(whole_archive_flag_spec, $1)=
+    fi
+  else
+    # PORTME fill in a description of your system's linker (not GNU ld)
+    case $host_os in
+    aix3*)
+      _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported
+      _LT_AC_TAGVAR(always_export_symbols, $1)=yes
+      _LT_AC_TAGVAR(archive_expsym_cmds, $1)='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname'
+      # Note: this linker hardcodes the directories in LIBPATH if there
+      # are no directories specified by -L.
+      _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+      if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then
+	# Neither direct hardcoding nor static linking is supported with a
+	# broken collect2.
+	_LT_AC_TAGVAR(hardcode_direct, $1)=unsupported
+      fi
+      ;;
+
+    aix4* | aix5*)
+      if test "$host_cpu" = ia64; then
+	# On IA64, the linker does run time linking by default, so we don't
+	# have to do anything special.
+	aix_use_runtimelinking=no
+	exp_sym_flag='-Bexport'
+	no_entry_flag=""
+      else
+	# If we're using GNU nm, then we don't want the "-C" option.
+	# -C means demangle to AIX nm, but means don't demangle with GNU nm
+	if $NM -V 2>&1 | grep 'GNU' > /dev/null; then
+	  _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | awk '\''{ if (((\[$]2 == "T") || (\[$]2 == "D") || (\[$]2 == "B")) && ([substr](\[$]3,1,1) != ".")) { print \[$]3 } }'\'' | sort -u > $export_symbols'
+	else
+	  _LT_AC_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | awk '\''{ if (((\[$]2 == "T") || (\[$]2 == "D") || (\[$]2 == "B")) && ([substr](\[$]3,1,1) != ".")) { print \[$]3 } }'\'' | sort -u > $export_symbols'
+	fi
+	aix_use_runtimelinking=no
+
+	# Test if we are trying to use run time linking or normal
+	# AIX style linking. If -brtl is somewhere in LDFLAGS, we
+	# need to do runtime linking.
+	case $host_os in aix4.[[23]]|aix4.[[23]].*|aix5*)
+	  for ld_flag in $LDFLAGS; do
+  	  if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then
+  	    aix_use_runtimelinking=yes
+  	    break
+  	  fi
+	  done
+	  ;;
+	esac
+
+	exp_sym_flag='-bexport'
+	no_entry_flag='-bnoentry'
+      fi
+
+      # When large executables or shared objects are built, AIX ld can
+      # have problems creating the table of contents.  If linking a library
+      # or program results in "error TOC overflow" add -mminimal-toc to
+      # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
+      # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+
+      _LT_AC_TAGVAR(archive_cmds, $1)=''
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=':'
+      _LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+
+      if test "$GCC" = yes; then
+	case $host_os in aix4.[[012]]|aix4.[[012]].*)
+	# We only want to do this on AIX 4.2 and lower, the check
+	# below for broken collect2 doesn't work under 4.3+
+	  collect2name=`${CC} -print-prog-name=collect2`
+	  if test -f "$collect2name" && \
+  	   strings "$collect2name" | grep resolve_lib_name >/dev/null
+	  then
+  	  # We have reworked collect2
+  	  _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+	  else
+  	  # We have old collect2
+  	  _LT_AC_TAGVAR(hardcode_direct, $1)=unsupported
+  	  # It fails to find uninstalled libraries when the uninstalled
+  	  # path is not listed in the libpath.  Setting hardcode_minus_L
+  	  # to unsupported forces relinking
+  	  _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+  	  _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+  	  _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=
+	  fi
+	  ;;
+	esac
+	shared_flag='-shared'
+	if test "$aix_use_runtimelinking" = yes; then
+	  shared_flag="$shared_flag "'${wl}-G'
+	fi
+      else
+	# not using gcc
+	if test "$host_cpu" = ia64; then
+  	# VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
+  	# chokes on -Wl,-G. The following line is correct:
+	  shared_flag='-G'
+	else
+	  if test "$aix_use_runtimelinking" = yes; then
+	    shared_flag='${wl}-G'
+	  else
+	    shared_flag='${wl}-bM:SRE'
+	  fi
+	fi
+      fi
+
+      # It seems that -bexpall does not export symbols beginning with
+      # underscore (_), so it is better to generate a list of symbols to export.
+      _LT_AC_TAGVAR(always_export_symbols, $1)=yes
+      if test "$aix_use_runtimelinking" = yes; then
+	# Warning - without using the other runtime loading flags (-brtl),
+	# -berok will link without error, but may produce a broken library.
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)='-berok'
+       # Determine the default libpath from the value encoded in an empty executable.
+       _LT_AC_SYS_LIBPATH_AIX
+       _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != "x"; then echo "${wl}${allow_undefined_flag}"; else :; fi` '"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
+       else
+	if test "$host_cpu" = ia64; then
+	  _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-R $libdir:/usr/lib:/lib'
+	  _LT_AC_TAGVAR(allow_undefined_flag, $1)="-z nodefs"
+	  _LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
+	else
+	 # Determine the default libpath from the value encoded in an empty executable.
+	 _LT_AC_SYS_LIBPATH_AIX
+	 _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-blibpath:$libdir:'"$aix_libpath"
+	  # Warning - without using the other run time loading flags,
+	  # -berok will link without error, but may produce a broken library.
+	  _LT_AC_TAGVAR(no_undefined_flag, $1)=' ${wl}-bernotok'
+	  _LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-berok'
+	  # Exported symbols can be pulled into shared objects from archives
+	  _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='$convenience'
+	  _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=yes
+	  # This is similar to how AIX traditionally builds its shared libraries.
+	  _LT_AC_TAGVAR(archive_expsym_cmds, $1)="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs ${wl}-bnoentry $compiler_flags ${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS $output_objdir/$libname$release.a $output_objdir/$soname'
+	fi
+      fi
+      ;;
+
+    amigaos*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)'
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+      # see comment about different semantics on the GNU ld section
+      _LT_AC_TAGVAR(ld_shlibs, $1)=no
+      ;;
+
+    bsdi[[45]]*)
+      _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)=-rdynamic
+      ;;
+
+    cygwin* | mingw* | pw32*)
+      # When not using gcc, we currently assume that we are using
+      # Microsoft Visual C++.
+      # hardcode_libdir_flag_spec is actually meaningless, as there is
+      # no search path for DLLs.
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)=' '
+      _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported
+      # Tell ltmain to make .lib files, not .a files.
+      libext=lib
+      # Tell ltmain to make .dll files, not .so files.
+      shrext_cmds=".dll"
+      # FIXME: Setting linknames here is a bad hack.
+      _LT_AC_TAGVAR(archive_cmds, $1)='$CC -o $lib $libobjs $compiler_flags `echo "$deplibs" | $SED -e '\''s/ -lc$//'\''` -link -dll~linknames='
+      # The linker will automatically build a .lib file if we build a DLL.
+      _LT_AC_TAGVAR(old_archive_From_new_cmds, $1)='true'
+      # FIXME: Should let the user specify the lib program.
+      _LT_AC_TAGVAR(old_archive_cmds, $1)='lib /OUT:$oldlib$oldobjs$old_deplibs'
+      _LT_AC_TAGVAR(fix_srcfile_path, $1)='`cygpath -w "$srcfile"`'
+      _LT_AC_TAGVAR(enable_shared_with_static_runtimes, $1)=yes
+      ;;
+
+    darwin* | rhapsody*)
+      case $host_os in
+        rhapsody* | darwin1.[[012]])
+         _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-undefined ${wl}suppress'
+         ;;
+       *) # Darwin 1.3 on
+         if test -z ${MACOSX_DEPLOYMENT_TARGET} ; then
+           _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-flat_namespace ${wl}-undefined ${wl}suppress'
+         else
+           case ${MACOSX_DEPLOYMENT_TARGET} in
+             10.[[012]])
+               _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-flat_namespace ${wl}-undefined ${wl}suppress'
+               ;;
+             10.*)
+               _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-undefined ${wl}dynamic_lookup'
+               ;;
+           esac
+         fi
+         ;;
+      esac
+      _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+      _LT_AC_TAGVAR(hardcode_direct, $1)=no
+      _LT_AC_TAGVAR(hardcode_automatic, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=unsupported
+      _LT_AC_TAGVAR(whole_archive_flag_spec, $1)=''
+      _LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+    if test "$GCC" = yes ; then
+    	output_verbose_link_cmd='echo'
+        _LT_AC_TAGVAR(archive_cmds, $1)='$CC -dynamiclib $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring'
+      _LT_AC_TAGVAR(module_cmds, $1)='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags'
+      # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds
+      _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -dynamiclib $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags -install_name $rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+      _LT_AC_TAGVAR(module_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag  -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+    else
+      case $cc_basename in
+        xlc*)
+         output_verbose_link_cmd='echo'
+         _LT_AC_TAGVAR(archive_cmds, $1)='$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}`echo $rpath/$soname` $verstring'
+         _LT_AC_TAGVAR(module_cmds, $1)='$CC $allow_undefined_flag -o $lib -bundle $libobjs $deplibs$compiler_flags'
+          # Don't fix this by using the ld -exported_symbols_list flag, it doesn't exist in older darwin lds
+         _LT_AC_TAGVAR(archive_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC -qmkshrobj $allow_undefined_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-install_name ${wl}$rpath/$soname $verstring~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+          _LT_AC_TAGVAR(module_expsym_cmds, $1)='sed -e "s,#.*,," -e "s,^[    ]*,," -e "s,^\(..*\),_&," < $export_symbols > $output_objdir/${libname}-symbols.expsym~$CC $allow_undefined_flag  -o $lib -bundle $libobjs $deplibs$compiler_flags~nmedit -s $output_objdir/${libname}-symbols.expsym ${lib}'
+          ;;
+       *)
+         _LT_AC_TAGVAR(ld_shlibs, $1)=no
+          ;;
+      esac
+    fi
+      ;;
+
+    dgux*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    freebsd1*)
+      _LT_AC_TAGVAR(ld_shlibs, $1)=no
+      ;;
+
+    # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
+    # support.  Future versions do this automatically, but an explicit c++rt0.o
+    # does not break anything, and helps significantly (at the cost of a little
+    # extra space).
+    freebsd2.2*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o'
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    # Unfortunately, older versions of FreeBSD 2 do not have this feature.
+    freebsd2*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
+    freebsd* | kfreebsd*-gnu | dragonfly*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -o $lib $libobjs $deplibs $compiler_flags'
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    hpux9*)
+      if test "$GCC" = yes; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/$soname~$CC -shared -fPIC ${wl}+b ${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs $compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      else
+	_LT_AC_TAGVAR(archive_cmds, $1)='$rm $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      fi
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+      _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+
+      # hardcode_minus_L: Not really in the search PATH,
+      # but as the default location of the library.
+      _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+      _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+      ;;
+
+    hpux10*)
+      if test "$GCC" = yes -a "$with_gnu_ld" = no; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -fPIC ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+      else
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags'
+      fi
+      if test "$with_gnu_ld" = no; then
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+	_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	_LT_AC_TAGVAR(hardcode_direct, $1)=yes
+	_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+
+	# hardcode_minus_L: Not really in the search PATH,
+	# but as the default location of the library.
+	_LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+      fi
+      ;;
+
+    hpux11*)
+      if test "$GCC" = yes -a "$with_gnu_ld" = no; then
+	case $host_cpu in
+	hppa*64*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	ia64*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared -fPIC ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	esac
+      else
+	case $host_cpu in
+	hppa*64*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	ia64*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	*)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+	  ;;
+	esac
+      fi
+      if test "$with_gnu_ld" = no; then
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}+b ${wl}$libdir'
+	_LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+
+	case $host_cpu in
+	hppa*64*|ia64*)
+	  _LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='+b $libdir'
+	  _LT_AC_TAGVAR(hardcode_direct, $1)=no
+	  _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+	  ;;
+	*)
+	  _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+	  _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+
+	  # hardcode_minus_L: Not really in the search PATH,
+	  # but as the default location of the library.
+	  _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+	  ;;
+	esac
+      fi
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      if test "$GCC" = yes; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+      else
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -shared $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec_ld, $1)='-rpath $libdir'
+      fi
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+      _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+      _LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+      ;;
+
+    netbsd*)
+      if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'  # a.out
+      else
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -shared -o $lib $libobjs $deplibs $linker_flags'      # ELF
+      fi
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    newsos6)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+      _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    openbsd*)
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags ${wl}-retain-symbols-file,$export_symbols'
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	_LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-E'
+      else
+       case $host_os in
+	 openbsd[[01]].* | openbsd2.[[0-7]] | openbsd2.[[0-7]].*)
+	   _LT_AC_TAGVAR(archive_cmds, $1)='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+	   _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+	   ;;
+	 *)
+	   _LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared $pic_flag -o $lib $libobjs $deplibs $compiler_flags'
+	   _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath,$libdir'
+	   ;;
+       esac
+      fi
+      ;;
+
+    os2*)
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+      _LT_AC_TAGVAR(allow_undefined_flag, $1)=unsupported
+      _LT_AC_TAGVAR(archive_cmds, $1)='$echo "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$echo "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~$echo DATA >> $output_objdir/$libname.def~$echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~$echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def'
+      _LT_AC_TAGVAR(old_archive_From_new_cmds, $1)='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def'
+      ;;
+
+    osf3*)
+      if test "$GCC" = yes; then
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+      else
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+      fi
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+      _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+      ;;
+
+    osf4* | osf5*)	# as osf3* with the addition of -msym flag
+      if test "$GCC" = yes; then
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)=' ${wl}-expect_unresolved ${wl}\*'
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='${wl}-rpath ${wl}$libdir'
+      else
+	_LT_AC_TAGVAR(allow_undefined_flag, $1)=' -expect_unresolved \*'
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='for i in `cat $export_symbols`; do printf "%s %s\\n" -exported_symbol "\$i" >> $lib.exp; done; echo "-hidden">> $lib.exp~
+	$LD -shared${allow_undefined_flag} -input $lib.exp $linker_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib~$rm $lib.exp'
+
+	# Both c and cxx compiler support -rpath directly
+	_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-rpath $libdir'
+      fi
+      _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=:
+      ;;
+
+    solaris*)
+      _LT_AC_TAGVAR(no_undefined_flag, $1)=' -z text'
+      if test "$GCC" = yes; then
+	wlarc='${wl}'
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+	  $CC -shared ${wl}-M ${wl}$lib.exp ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$rm $lib.exp'
+      else
+	wlarc=''
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$echo "{ global:" > $lib.exp~cat $export_symbols | $SED -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~
+  	$LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp'
+      fi
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-R$libdir'
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      case $host_os in
+      solaris2.[[0-5]] | solaris2.[[0-5]].*) ;;
+      *)
+ 	# The compiler driver will combine linker options so we
+ 	# cannot just pass the convience library names through
+ 	# without $wl, iff we do not link with $LD.
+ 	# Luckily, gcc supports the same syntax we need for Sun Studio.
+ 	# Supported since Solaris 2.6 (maybe 2.5.1?)
+ 	case $wlarc in
+ 	'')
+ 	  _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='-z allextract$convenience -z defaultextract' ;;
+ 	*)
+ 	  _LT_AC_TAGVAR(whole_archive_flag_spec, $1)='${wl}-z ${wl}allextract`for conv in $convenience\"\"; do test -n \"$conv\" && new_convenience=\"$new_convenience,$conv\"; done; $echo \"$new_convenience\"` ${wl}-z ${wl}defaultextract' ;;
+ 	esac ;;
+      esac
+      _LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+      ;;
+
+    sunos4*)
+      if test "x$host_vendor" = xsequent; then
+	# Use $CC to link under sequent, because it throws in some extra .o
+	# files that make .init and .fini sections work.
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags'
+      fi
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_AC_TAGVAR(hardcode_direct, $1)=yes
+      _LT_AC_TAGVAR(hardcode_minus_L, $1)=yes
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    sysv4)
+      case $host_vendor in
+	sni)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+	  _LT_AC_TAGVAR(hardcode_direct, $1)=yes # is this really true???
+	;;
+	siemens)
+	  ## LD is ld it makes a PLAMLIB
+	  ## CC just makes a GrossModule.
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -o $lib $libobjs $deplibs $linker_flags'
+	  _LT_AC_TAGVAR(reload_cmds, $1)='$CC -r -o $output$reload_objs'
+	  _LT_AC_TAGVAR(hardcode_direct, $1)=no
+        ;;
+	motorola)
+	  _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+	  _LT_AC_TAGVAR(hardcode_direct, $1)=no #Motorola manual says yes, but my tests say they lie
+	;;
+      esac
+      runpath_var='LD_RUN_PATH'
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    sysv4.3*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='-Bexport'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+	_LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+	runpath_var=LD_RUN_PATH
+	hardcode_runpath_var=yes
+	_LT_AC_TAGVAR(ld_shlibs, $1)=yes
+      fi
+      ;;
+
+    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[[01]].[[10]]* | unixware7*)
+      _LT_AC_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
+      _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      runpath_var='LD_RUN_PATH'
+
+      if test "$GCC" = yes; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      fi
+      ;;
+
+    sysv5* | sco3.2v5* | sco5v6*)
+      # Note: We can NOT use -z defs as we might desire, because we do not
+      # link with -lc, and that would cause any symbols used from libc to
+      # always be unresolved, which means just about no library would
+      # ever link correctly.  If we're not using GNU ld we use -z text
+      # though, which does catch some bad symbols but isn't as heavy-handed
+      # as -z defs.
+      _LT_AC_TAGVAR(no_undefined_flag, $1)='${wl}-z,text'
+      _LT_AC_TAGVAR(allow_undefined_flag, $1)='${wl}-z,nodefs'
+      _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='`test -z "$SCOABSPATH" && echo ${wl}-R,$libdir`'
+      _LT_AC_TAGVAR(hardcode_libdir_separator, $1)=':'
+      _LT_AC_TAGVAR(link_all_deplibs, $1)=yes
+      _LT_AC_TAGVAR(export_dynamic_flag_spec, $1)='${wl}-Bexport'
+      runpath_var='LD_RUN_PATH'
+
+      if test "$GCC" = yes; then
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -shared ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -shared ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+	_LT_AC_TAGVAR(archive_cmds, $1)='$CC -G ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags'
+	_LT_AC_TAGVAR(archive_expsym_cmds, $1)='$CC -G ${wl}-Bexport:$export_symbols ${wl}-h,\${SCOABSPATH:+${install_libdir}/}$soname -o $lib $libobjs $deplibs $compiler_flags'
+      fi
+      ;;
+
+    uts4*)
+      _LT_AC_TAGVAR(archive_cmds, $1)='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      _LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)='-L$libdir'
+      _LT_AC_TAGVAR(hardcode_shlibpath_var, $1)=no
+      ;;
+
+    *)
+      _LT_AC_TAGVAR(ld_shlibs, $1)=no
+      ;;
+    esac
+  fi
+])
+AC_MSG_RESULT([$_LT_AC_TAGVAR(ld_shlibs, $1)])
+test "$_LT_AC_TAGVAR(ld_shlibs, $1)" = no && can_build_shared=no
+
+#
+# Do we need to explicitly link libc?
+#
+case "x$_LT_AC_TAGVAR(archive_cmds_need_lc, $1)" in
+x|xyes)
+  # Assume -lc should be added
+  _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=yes
+
+  if test "$enable_shared" = yes && test "$GCC" = yes; then
+    case $_LT_AC_TAGVAR(archive_cmds, $1) in
+    *'~'*)
+      # FIXME: we may have to deal with multi-command sequences.
+      ;;
+    '$CC '*)
+      # Test whether the compiler implicitly links with -lc since on some
+      # systems, -lgcc has to come before -lc. If gcc already passes -lc
+      # to ld, don't add -lc before -lgcc.
+      AC_MSG_CHECKING([whether -lc should be explicitly linked in])
+      $rm conftest*
+      printf "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+      if AC_TRY_EVAL(ac_compile) 2>conftest.err; then
+        soname=conftest
+        lib=conftest
+        libobjs=conftest.$ac_objext
+        deplibs=
+        wl=$_LT_AC_TAGVAR(lt_prog_compiler_wl, $1)
+	pic_flag=$_LT_AC_TAGVAR(lt_prog_compiler_pic, $1)
+        compiler_flags=-v
+        linker_flags=-v
+        verstring=
+        output_objdir=.
+        libname=conftest
+        lt_save_allow_undefined_flag=$_LT_AC_TAGVAR(allow_undefined_flag, $1)
+        _LT_AC_TAGVAR(allow_undefined_flag, $1)=
+        if AC_TRY_EVAL(_LT_AC_TAGVAR(archive_cmds, $1) 2\>\&1 \| grep \" -lc \" \>/dev/null 2\>\&1)
+        then
+	  _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=no
+        else
+	  _LT_AC_TAGVAR(archive_cmds_need_lc, $1)=yes
+        fi
+        _LT_AC_TAGVAR(allow_undefined_flag, $1)=$lt_save_allow_undefined_flag
+      else
+        cat conftest.err 1>&5
+      fi
+      $rm conftest*
+      AC_MSG_RESULT([$_LT_AC_TAGVAR(archive_cmds_need_lc, $1)])
+      ;;
+    esac
+  fi
+  ;;
+esac
+])# AC_LIBTOOL_PROG_LD_SHLIBS
+
+
+# _LT_AC_FILE_LTDLL_C
+# -------------------
+# Be careful that the start marker always follows a newline.
+AC_DEFUN([_LT_AC_FILE_LTDLL_C], [
+# /* ltdll.c starts here */
+# #define WIN32_LEAN_AND_MEAN
+# #include <windows.h>
+# #undef WIN32_LEAN_AND_MEAN
+# #include <stdio.h>
+#
+# #ifndef __CYGWIN__
+# #  ifdef __CYGWIN32__
+# #    define __CYGWIN__ __CYGWIN32__
+# #  endif
+# #endif
+#
+# #ifdef __cplusplus
+# extern "C" {
+# #endif
+# BOOL APIENTRY DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved);
+# #ifdef __cplusplus
+# }
+# #endif
+#
+# #ifdef __CYGWIN__
+# #include <cygwin/cygwin_dll.h>
+# DECLARE_CYGWIN_DLL( DllMain );
+# #endif
+# HINSTANCE __hDllInstance_base;
+#
+# BOOL APIENTRY
+# DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved)
+# {
+#   __hDllInstance_base = hInst;
+#   return TRUE;
+# }
+# /* ltdll.c ends here */
+])# _LT_AC_FILE_LTDLL_C
+
+
+# _LT_AC_TAGVAR(VARNAME, [TAGNAME])
+# ---------------------------------
+AC_DEFUN([_LT_AC_TAGVAR], [ifelse([$2], [], [$1], [$1_$2])])
+
+
+# old names
+AC_DEFUN([AM_PROG_LIBTOOL],   [AC_PROG_LIBTOOL])
+AC_DEFUN([AM_ENABLE_SHARED],  [AC_ENABLE_SHARED($@)])
+AC_DEFUN([AM_ENABLE_STATIC],  [AC_ENABLE_STATIC($@)])
+AC_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)])
+AC_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)])
+AC_DEFUN([AM_PROG_LD],        [AC_PROG_LD])
+AC_DEFUN([AM_PROG_NM],        [AC_PROG_NM])
+
+# This is just to silence aclocal about the macro not being used
+ifelse([AC_DISABLE_FAST_INSTALL])
+
+AC_DEFUN([LT_AC_PROG_GCJ],
+[AC_CHECK_TOOL(GCJ, gcj, no)
+  test "x${GCJFLAGS+set}" = xset || GCJFLAGS="-g -O2"
+  AC_SUBST(GCJFLAGS)
+])
+
+AC_DEFUN([LT_AC_PROG_RC],
+[AC_CHECK_TOOL(RC, windres, no)
+])
+
+############################################################
+# NOTE: This macro has been submitted for inclusion into   #
+#  GNU Autoconf as AC_PROG_SED.  When it is available in   #
+#  a released version of Autoconf we should remove this    #
+#  macro and use it instead.                               #
+############################################################
+# LT_AC_PROG_SED
+# --------------
+# Check for a fully-functional sed program, that truncates
+# as few characters as possible.  Prefer GNU sed if found.
+AC_DEFUN([LT_AC_PROG_SED],
+[AC_MSG_CHECKING([for a sed that does not truncate output])
+AC_CACHE_VAL(lt_cv_path_SED,
+[# Loop through the user's path and test for sed and gsed.
+# Then use that list of sed's as ones to test for truncation.
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for lt_ac_prog in sed gsed; do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      if $as_executable_p "$as_dir/$lt_ac_prog$ac_exec_ext"; then
+        lt_ac_sed_list="$lt_ac_sed_list $as_dir/$lt_ac_prog$ac_exec_ext"
+      fi
+    done
+  done
+done
+lt_ac_max=0
+lt_ac_count=0
+# Add /usr/xpg4/bin/sed as it is typically found on Solaris
+# along with /bin/sed that truncates output.
+for lt_ac_sed in $lt_ac_sed_list /usr/xpg4/bin/sed; do
+  test ! -f $lt_ac_sed && continue
+  cat /dev/null > conftest.in
+  lt_ac_count=0
+  echo $ECHO_N "0123456789$ECHO_C" >conftest.in
+  # Check for GNU sed and select it if it is found.
+  if "$lt_ac_sed" --version 2>&1 < /dev/null | grep 'GNU' > /dev/null; then
+    lt_cv_path_SED=$lt_ac_sed
+    break
+  fi
+  while true; do
+    cat conftest.in conftest.in >conftest.tmp
+    mv conftest.tmp conftest.in
+    cp conftest.in conftest.nl
+    echo >>conftest.nl
+    $lt_ac_sed -e 's/a$//' < conftest.nl >conftest.out || break
+    cmp -s conftest.out conftest.nl || break
+    # 10000 chars as input seems more than enough
+    test $lt_ac_count -gt 10 && break
+    lt_ac_count=`expr $lt_ac_count + 1`
+    if test $lt_ac_count -gt $lt_ac_max; then
+      lt_ac_max=$lt_ac_count
+      lt_cv_path_SED=$lt_ac_sed
+    fi
+  done
+done
+])
+SED=$lt_cv_path_SED
+AC_MSG_RESULT([$SED])
+])
diff --git a/final/autoconf/m4/link_options.m4 b/final/autoconf/m4/link_options.m4
new file mode 100644
index 00000000000..4c5f2f435d0
--- /dev/null
+++ b/final/autoconf/m4/link_options.m4
@@ -0,0 +1,108 @@
+#
+# Get the linker version string.
+#
+# This macro is specific to LLVM.
+#
+AC_DEFUN([AC_LINK_GET_VERSION],
+  [AC_CACHE_CHECK([for linker version],[llvm_cv_link_version],
+  [
+   version_string="$(ld -v 2>&1 | head -1)"
+
+   # Check for ld64.
+   if (echo "$version_string" | grep -q "ld64"); then
+     llvm_cv_link_version=$(echo "$version_string" | sed -e "s#.*ld64-\([^ ]*\)#\1#")
+   else
+     llvm_cv_link_version=$(echo "$version_string" | sed -e "s#[^0-9]*\([0-9.]*\).*#\1#")
+   fi
+  ])
+  AC_DEFINE_UNQUOTED([HOST_LINK_VERSION],"$llvm_cv_link_version",
+                     [Linker version detected at compile time.])
+])
+
+#
+# Determine if the system can handle the -R option being passed to the linker.
+#
+# This macro is specific to LLVM.
+#
+AC_DEFUN([AC_LINK_USE_R],
+[AC_CACHE_CHECK([for compiler -Wl,-R<path> option],[llvm_cv_link_use_r],
+[ AC_LANG_PUSH([C])
+  oldcflags="$CFLAGS"
+  CFLAGS="$CFLAGS -Wl,-R."
+  AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],
+    [llvm_cv_link_use_r=yes],[llvm_cv_link_use_r=no])
+  CFLAGS="$oldcflags"
+  AC_LANG_POP([C])
+])
+if test "$llvm_cv_link_use_r" = yes ; then
+  AC_DEFINE([HAVE_LINK_R],[1],[Define if you can use -Wl,-R. to pass -R. to the linker, in order to add the current directory to the dynamic linker search path.])
+  fi
+])
+
+#
+# Determine if the system can handle the -R option being passed to the linker.
+#
+# This macro is specific to LLVM.
+#
+AC_DEFUN([AC_LINK_EXPORT_DYNAMIC],
+[AC_CACHE_CHECK([for compiler -Wl,-export-dynamic option],
+                [llvm_cv_link_use_export_dynamic],
+[ AC_LANG_PUSH([C])
+  oldcflags="$CFLAGS"
+  CFLAGS="$CFLAGS -Wl,-export-dynamic"
+  AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],
+    [llvm_cv_link_use_export_dynamic=yes],[llvm_cv_link_use_export_dynamic=no])
+  CFLAGS="$oldcflags"
+  AC_LANG_POP([C])
+])
+if test "$llvm_cv_link_use_export_dynamic" = yes ; then
+  AC_DEFINE([HAVE_LINK_EXPORT_DYNAMIC],[1],[Define if you can use -Wl,-export-dynamic.])
+  fi
+])
+
+#
+# Determine if the system can handle the --version-script option being
+# passed to the linker.
+#
+# This macro is specific to LLVM.
+#
+AC_DEFUN([AC_LINK_VERSION_SCRIPT],
+[AC_CACHE_CHECK([for compiler -Wl,--version-script option],
+                [llvm_cv_link_use_version_script],
+[ AC_LANG_PUSH([C])
+  oldcflags="$CFLAGS"
+
+  # The following code is from the autoconf manual,
+  # "11.13: Limitations of Usual Tools".
+  # Create a temporary directory $tmp in $TMPDIR (default /tmp).
+  # Use mktemp if possible; otherwise fall back on mkdir,
+  # with $RANDOM to make collisions less likely.
+  : ${TMPDIR=/tmp}
+  {
+    tmp=`
+      (umask 077 && mktemp -d "$TMPDIR/fooXXXXXX") 2>/dev/null
+    ` &&
+    test -n "$tmp" && test -d "$tmp"
+  } || {
+    tmp=$TMPDIR/foo$$-$RANDOM
+    (umask 077 && mkdir "$tmp")
+  } || exit $?
+
+  echo "{" > "$tmp/export.map"
+  echo "  global: main;" >> "$tmp/export.map"
+  echo "  local: *;" >> "$tmp/export.map"
+  echo "};" >> "$tmp/export.map"
+
+  CFLAGS="$CFLAGS -Wl,--version-script=$tmp/export.map"
+  AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])],
+    [llvm_cv_link_use_version_script=yes],[llvm_cv_link_use_version_script=no])
+  rm "$tmp/export.map"
+  rmdir "$tmp"
+  CFLAGS="$oldcflags"
+  AC_LANG_POP([C])
+])
+if test "$llvm_cv_link_use_version_script" = yes ; then
+  AC_SUBST(HAVE_LINK_VERSION_SCRIPT,1)
+  fi
+])
+
diff --git a/final/autoconf/m4/linux_mixed_64_32.m4 b/final/autoconf/m4/linux_mixed_64_32.m4
new file mode 100644
index 00000000000..123491f87e5
--- /dev/null
+++ b/final/autoconf/m4/linux_mixed_64_32.m4
@@ -0,0 +1,17 @@
+#
+# Some Linux machines run a 64-bit kernel with a 32-bit userspace. 'uname -m'
+# shows these as x86_64. Ask the system 'gcc' what it thinks.
+#
+AC_DEFUN([AC_IS_LINUX_MIXED],
+[AC_CACHE_CHECK(for 32-bit userspace on 64-bit system,llvm_cv_linux_mixed,
+[ AC_LANG_PUSH([C])
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+      [[#ifndef __x86_64__
+       error: Not x86-64 even if uname says so!
+      #endif
+      ]])],
+      [llvm_cv_linux_mixed=no],
+      [llvm_cv_linux_mixed=yes])
+  AC_LANG_POP([C])
+])
+])
diff --git a/final/autoconf/m4/ltdl.m4 b/final/autoconf/m4/ltdl.m4
new file mode 100644
index 00000000000..bc9e2ad2419
--- /dev/null
+++ b/final/autoconf/m4/ltdl.m4
@@ -0,0 +1,418 @@
+## ltdl.m4 - Configure ltdl for the target system. -*-Autoconf-*-
+## Copyright (C) 1999-2000 Free Software Foundation, Inc.
+##
+## This file is free software; the Free Software Foundation gives
+## unlimited permission to copy and/or distribute it, with or without
+## modifications, as long as this notice is preserved.
+
+# serial 7 AC_LIB_LTDL
+
+# AC_WITH_LTDL
+# ------------
+# Clients of libltdl can use this macro to allow the installer to
+# choose between a shipped copy of the ltdl sources or a preinstalled
+# version of the library.
+AC_DEFUN([AC_WITH_LTDL],
+[AC_REQUIRE([AC_LIB_LTDL])
+AC_SUBST([LIBLTDL])
+AC_SUBST([INCLTDL])
+
+# Unless the user asks us to check, assume no installed ltdl exists.
+use_installed_libltdl=no
+
+AC_ARG_WITH([included_ltdl],
+    [  --with-included-ltdl    use the GNU ltdl sources included here])
+
+if test "x$with_included_ltdl" != xyes; then
+  # We are not being forced to use the included libltdl sources, so
+  # decide whether there is a useful installed version we can use.
+  AC_CHECK_HEADER([ltdl.h],
+      [AC_CHECK_LIB([ltdl], [lt_dlcaller_register],
+          [with_included_ltdl=no],
+          [with_included_ltdl=yes])
+  ])
+fi
+
+if test "x$enable_ltdl_install" != xyes; then
+  # If the user did not specify an installable libltdl, then default
+  # to a convenience lib.
+  AC_LIBLTDL_CONVENIENCE
+fi
+
+if test "x$with_included_ltdl" = xno; then
+  # If the included ltdl is not to be used. then Use the
+  # preinstalled libltdl we found.
+  AC_DEFINE([HAVE_LTDL], [1],
+    [Define this if a modern libltdl is already installed])
+  LIBLTDL=-lltdl
+fi
+
+# Report our decision...
+AC_MSG_CHECKING([whether to use included libltdl])
+AC_MSG_RESULT([$with_included_ltdl])
+
+AC_CONFIG_SUBDIRS([libltdl])
+])# AC_WITH_LTDL
+
+
+# AC_LIB_LTDL
+# -----------
+# Perform all the checks necessary for compilation of the ltdl objects
+#  -- including compiler checks and header checks.
+AC_DEFUN([AC_LIB_LTDL],
+[AC_PREREQ(2.60)
+AC_REQUIRE([AC_PROG_CC])
+AC_REQUIRE([AC_C_CONST])
+AC_REQUIRE([AC_HEADER_STDC])
+AC_REQUIRE([AC_HEADER_DIRENT])
+AC_REQUIRE([_LT_AC_CHECK_DLFCN])
+AC_REQUIRE([AC_LTDL_ENABLE_INSTALL])
+AC_REQUIRE([AC_LTDL_SHLIBEXT])
+AC_REQUIRE([AC_LTDL_SHLIBPATH])
+AC_REQUIRE([AC_LTDL_SYSSEARCHPATH])
+AC_REQUIRE([AC_LTDL_OBJDIR])
+AC_REQUIRE([AC_LTDL_DLPREOPEN])
+AC_REQUIRE([AC_LTDL_DLLIB])
+AC_REQUIRE([AC_LTDL_SYMBOL_USCORE])
+AC_REQUIRE([AC_LTDL_DLSYM_USCORE])
+AC_REQUIRE([AC_LTDL_SYS_DLOPEN_DEPLIBS])
+AC_REQUIRE([AC_LTDL_FUNC_ARGZ])
+
+AC_CHECK_HEADERS([assert.h ctype.h errno.h malloc.h memory.h stdlib.h \
+		  stdio.h unistd.h])
+AC_CHECK_HEADERS([dl.h sys/dl.h dld.h mach-o/dyld.h])
+AC_CHECK_HEADERS([string.h strings.h], [break])
+
+AC_CHECK_FUNCS([strchr index], [break])
+AC_CHECK_FUNCS([strrchr rindex], [break])
+AC_CHECK_FUNCS([memcpy bcopy], [break])
+AC_CHECK_FUNCS([memmove strcmp])
+AC_CHECK_FUNCS([closedir opendir readdir])
+])# AC_LIB_LTDL
+
+
+# AC_LTDL_ENABLE_INSTALL
+# ----------------------
+AC_DEFUN([AC_LTDL_ENABLE_INSTALL],
+[AC_ARG_ENABLE([ltdl-install],
+    [AS_HELP_STRING([--enable-ltdl-install],[install libltdl])])
+
+AM_CONDITIONAL(INSTALL_LTDL, test x"${enable_ltdl_install-no}" != xno)
+AM_CONDITIONAL(CONVENIENCE_LTDL, test x"${enable_ltdl_convenience-no}" != xno)
+])# AC_LTDL_ENABLE_INSTALL
+
+
+# AC_LTDL_SYS_DLOPEN_DEPLIBS
+# --------------------------
+AC_DEFUN([AC_LTDL_SYS_DLOPEN_DEPLIBS],
+[AC_REQUIRE([AC_CANONICAL_HOST])
+AC_CACHE_CHECK([whether deplibs are loaded by dlopen],
+  [libltdl_cv_sys_dlopen_deplibs],
+  [# PORTME does your system automatically load deplibs for dlopen?
+  # or its logical equivalent (e.g. shl_load for HP-UX < 11)
+  # For now, we just catch OSes we know something about -- in the
+  # future, we'll try test this programmatically.
+  libltdl_cv_sys_dlopen_deplibs=unknown
+  case "$host_os" in
+  aix3*|aix4.1.*|aix4.2.*)
+    # Unknown whether this is true for these versions of AIX, but
+    # we want this `case' here to explicitly catch those versions.
+    libltdl_cv_sys_dlopen_deplibs=unknown
+    ;;
+  aix[[45]]*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  darwin*)
+    # Assuming the user has installed a libdl from somewhere, this is true
+    # If you are looking for one http://www.opendarwin.org/projects/dlcompat
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  gnu* | linux* | kfreebsd*-gnu | knetbsd*-gnu)
+    # GNU and its variants, using gnu ld.so (Glibc)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  hpux10*|hpux11*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  interix*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  irix[[12345]]*|irix6.[[01]]*)
+    # Catch all versions of IRIX before 6.2, and indicate that we don't
+    # know how it worked for any of those versions.
+    libltdl_cv_sys_dlopen_deplibs=unknown
+    ;;
+  irix*)
+    # The case above catches anything before 6.2, and it's known that
+    # at 6.2 and later dlopen does load deplibs.
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  netbsd*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  openbsd*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  osf[[1234]]*)
+    # dlopen did load deplibs (at least at 4.x), but until the 5.x series,
+    # it did *not* use an RPATH in a shared library to find objects the
+    # library depends on, so we explictly say `no'.
+    libltdl_cv_sys_dlopen_deplibs=no
+    ;;
+  osf5.0|osf5.0a|osf5.1)
+    # dlopen *does* load deplibs and with the right loader patch applied
+    # it even uses RPATH in a shared library to search for shared objects
+    # that the library depends on, but there's no easy way to know if that
+    # patch is installed.  Since this is the case, all we can really
+    # say is unknown -- it depends on the patch being installed.  If
+    # it is, this changes to `yes'.  Without it, it would be `no'.
+    libltdl_cv_sys_dlopen_deplibs=unknown
+    ;;
+  osf*)
+    # the two cases above should catch all versions of osf <= 5.1.  Read
+    # the comments above for what we know about them.
+    # At > 5.1, deplibs are loaded *and* any RPATH in a shared library
+    # is used to find them so we can finally say `yes'.
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  solaris*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  esac
+  ])
+if test "$libltdl_cv_sys_dlopen_deplibs" != yes; then
+ AC_DEFINE([LTDL_DLOPEN_DEPLIBS], [1],
+    [Define if the OS needs help to load dependent libraries for dlopen().])
+fi
+])# AC_LTDL_SYS_DLOPEN_DEPLIBS
+
+
+# AC_LTDL_SHLIBEXT
+# ----------------
+AC_DEFUN([AC_LTDL_SHLIBEXT],
+[AC_REQUIRE([AC_LIBTOOL_SYS_DYNAMIC_LINKER])
+AC_CACHE_CHECK([which extension is used for loadable modules],
+  [libltdl_cv_shlibext],
+[
+module=yes
+eval libltdl_cv_shlibext=$shrext_cmds
+  ])
+if test -n "$libltdl_cv_shlibext"; then
+  AC_DEFINE_UNQUOTED([LTDL_SHLIB_EXT], ["$libltdl_cv_shlibext"],
+    [Define to the extension used for shared libraries, say, ".so".])
+fi
+])# AC_LTDL_SHLIBEXT
+
+
+# AC_LTDL_SHLIBPATH
+# -----------------
+AC_DEFUN([AC_LTDL_SHLIBPATH],
+[AC_REQUIRE([AC_LIBTOOL_SYS_DYNAMIC_LINKER])
+AC_CACHE_CHECK([which variable specifies run-time library path],
+  [libltdl_cv_shlibpath_var], [libltdl_cv_shlibpath_var="$shlibpath_var"])
+if test -n "$libltdl_cv_shlibpath_var"; then
+  AC_DEFINE_UNQUOTED([LTDL_SHLIBPATH_VAR], ["$libltdl_cv_shlibpath_var"],
+    [Define to the name of the environment variable that determines the dynamic library search path.])
+fi
+])# AC_LTDL_SHLIBPATH
+
+
+# AC_LTDL_SYSSEARCHPATH
+# ---------------------
+AC_DEFUN([AC_LTDL_SYSSEARCHPATH],
+[AC_REQUIRE([AC_LIBTOOL_SYS_DYNAMIC_LINKER])
+AC_CACHE_CHECK([for the default library search path],
+  [libltdl_cv_sys_search_path],
+  [libltdl_cv_sys_search_path="$sys_lib_dlsearch_path_spec"])
+if test -n "$libltdl_cv_sys_search_path"; then
+  sys_search_path=
+  for dir in $libltdl_cv_sys_search_path; do
+    if test -z "$sys_search_path"; then
+      sys_search_path="$dir"
+    else
+      sys_search_path="$sys_search_path$PATH_SEPARATOR$dir"
+    fi
+  done
+  AC_DEFINE_UNQUOTED([LTDL_SYSSEARCHPATH], ["$sys_search_path"],
+    [Define to the system default library search path.])
+fi
+])# AC_LTDL_SYSSEARCHPATH
+
+
+# AC_LTDL_OBJDIR
+# --------------
+AC_DEFUN([AC_LTDL_OBJDIR],
+[AC_CACHE_CHECK([for objdir],
+  [libltdl_cv_objdir],
+  [libltdl_cv_objdir="$objdir"
+  if test -n "$objdir"; then
+    :
+  else
+    rm -f .libs 2>/dev/null
+    mkdir .libs 2>/dev/null
+    if test -d .libs; then
+      libltdl_cv_objdir=.libs
+    else
+      # MS-DOS does not allow filenames that begin with a dot.
+      libltdl_cv_objdir=_libs
+    fi
+  rmdir .libs 2>/dev/null
+  fi
+  ])
+AC_DEFINE_UNQUOTED([LTDL_OBJDIR], ["$libltdl_cv_objdir/"],
+  [Define to the sub-directory in which libtool stores uninstalled libraries.])
+])# AC_LTDL_OBJDIR
+
+
+# AC_LTDL_DLPREOPEN
+# -----------------
+AC_DEFUN([AC_LTDL_DLPREOPEN],
+[AC_REQUIRE([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])
+AC_CACHE_CHECK([whether libtool supports -dlopen/-dlpreopen],
+  [libltdl_cv_preloaded_symbols],
+  [if test -n "$lt_cv_sys_global_symbol_pipe"; then
+    libltdl_cv_preloaded_symbols=yes
+  else
+    libltdl_cv_preloaded_symbols=no
+  fi
+  ])
+if test x"$libltdl_cv_preloaded_symbols" = xyes; then
+  AC_DEFINE([HAVE_PRELOADED_SYMBOLS], [1],
+    [Define if libtool can extract symbol lists from object files.])
+fi
+])# AC_LTDL_DLPREOPEN
+
+
+# AC_LTDL_DLLIB
+# -------------
+AC_DEFUN([AC_LTDL_DLLIB],
+[LIBADD_DL=
+AC_SUBST(LIBADD_DL)
+AC_LANG_PUSH([C])
+
+AC_CHECK_FUNC([shl_load],
+      [AC_DEFINE([HAVE_SHL_LOAD], [1],
+		 [Define if you have the shl_load function.])],
+  [AC_CHECK_LIB([dld], [shl_load],
+	[AC_DEFINE([HAVE_SHL_LOAD], [1],
+		   [Define if you have the shl_load function.])
+	LIBADD_DL="$LIBADD_DL -ldld"],
+    [AC_CHECK_LIB([dl], [dlopen],
+	  [AC_DEFINE([HAVE_LIBDL], [1],
+		     [Define if you have the libdl library or equivalent.])
+	        LIBADD_DL="-ldl" libltdl_cv_lib_dl_dlopen="yes"],
+      [AC_LINK_IFELSE([AC_LANG_PROGRAM([[#if HAVE_DLFCN_H
+#  include <dlfcn.h>
+#endif
+      ]], [[dlopen(0, 0);]])],[AC_DEFINE([HAVE_LIBDL], [1],
+		             [Define if you have the libdl library or equivalent.]) libltdl_cv_func_dlopen="yes"],[AC_CHECK_LIB([svld], [dlopen],
+	      [AC_DEFINE([HAVE_LIBDL], [1],
+			 [Define if you have the libdl library or equivalent.])
+	            LIBADD_DL="-lsvld" libltdl_cv_func_dlopen="yes"],
+	  [AC_CHECK_LIB([dld], [dld_link],
+	        [AC_DEFINE([HAVE_DLD], [1],
+			   [Define if you have the GNU dld library.])
+	 	LIBADD_DL="$LIBADD_DL -ldld"],
+	 	[AC_CHECK_FUNC([_dyld_func_lookup],
+	 	       [AC_DEFINE([HAVE_DYLD], [1],
+	 	          [Define if you have the _dyld_func_lookup function.])])
+          ])
+        ])
+      ])
+    ])
+  ])
+])
+
+if test x"$libltdl_cv_func_dlopen" = xyes || test x"$libltdl_cv_lib_dl_dlopen" = xyes
+then
+  lt_save_LIBS="$LIBS"
+  LIBS="$LIBS $LIBADD_DL"
+  AC_CHECK_FUNCS([dlerror])
+  LIBS="$lt_save_LIBS"
+fi
+AC_LANG_POP
+])# AC_LTDL_DLLIB
+
+
+# AC_LTDL_SYMBOL_USCORE
+# ---------------------
+# does the compiler prefix global symbols with an underscore?
+AC_DEFUN([AC_LTDL_SYMBOL_USCORE],
+[AC_REQUIRE([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])
+AC_CACHE_CHECK([for _ prefix in compiled symbols],
+  [ac_cv_sys_symbol_underscore],
+  [ac_cv_sys_symbol_underscore=no
+  cat > conftest.$ac_ext <<EOF
+void nm_test_func(){}
+int main(){nm_test_func;return 0;}
+EOF
+  if AC_TRY_EVAL(ac_compile); then
+    # Now try to grab the symbols.
+    ac_nlist=conftest.nm
+    if AC_TRY_EVAL(NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $ac_nlist) && test -s "$ac_nlist"; then
+      # See whether the symbols have a leading underscore.
+      if grep '^. _nm_test_func' "$ac_nlist" >/dev/null; then
+        ac_cv_sys_symbol_underscore=yes
+      else
+        if grep '^. nm_test_func ' "$ac_nlist" >/dev/null; then
+	  :
+        else
+	  echo "configure: cannot find nm_test_func in $ac_nlist" >&AS_MESSAGE_LOG_FD
+        fi
+      fi
+    else
+      echo "configure: cannot run $lt_cv_sys_global_symbol_pipe" >&AS_MESSAGE_LOG_FD
+    fi
+  else
+    echo "configure: failed program was:" >&AS_MESSAGE_LOG_FD
+    cat conftest.c >&AS_MESSAGE_LOG_FD
+  fi
+  rm -rf conftest*
+  ])
+])# AC_LTDL_SYMBOL_USCORE
+
+
+# AC_LTDL_DLSYM_USCORE
+# --------------------
+AC_DEFUN([AC_LTDL_DLSYM_USCORE],
+[AC_REQUIRE([AC_LTDL_SYMBOL_USCORE])
+if test x"$ac_cv_sys_symbol_underscore" = xyes; then
+  if test x"$libltdl_cv_func_dlopen" = xyes ||
+     test x"$libltdl_cv_lib_dl_dlopen" = xyes ; then
+	AC_CACHE_CHECK([whether we have to add an underscore for dlsym],
+	  [libltdl_cv_need_uscore],
+	  [libltdl_cv_need_uscore=unknown
+          save_LIBS="$LIBS"
+          LIBS="$LIBS $LIBADD_DL"
+	  _LT_AC_TRY_DLOPEN_SELF(
+	    [libltdl_cv_need_uscore=no], [libltdl_cv_need_uscore=yes],
+	    [],				 [libltdl_cv_need_uscore=cross])
+	  LIBS="$save_LIBS"
+	])
+  fi
+fi
+
+if test x"$libltdl_cv_need_uscore" = xyes; then
+  AC_DEFINE([NEED_USCORE], [1],
+    [Define if dlsym() requires a leading underscore in symbol names.])
+fi
+])# AC_LTDL_DLSYM_USCORE
+
+# AC_LTDL_FUNC_ARGZ
+# -----------------
+AC_DEFUN([AC_LTDL_FUNC_ARGZ],
+[AC_CHECK_HEADERS([argz.h])
+
+AC_CHECK_TYPES([error_t],
+  [],
+  [AC_DEFINE([error_t], [int],
+    [Define to a type to use for `error_t' if it is not otherwise available.])],
+  [#if HAVE_ARGZ_H
+#  include <argz.h>
+#endif])
+
+AC_CHECK_FUNCS([argz_append argz_create_sep argz_insert argz_next argz_stringify])
+])# AC_LTDL_FUNC_ARGZ
diff --git a/final/autoconf/m4/need_dev_zero_for_mmap.m4 b/final/autoconf/m4/need_dev_zero_for_mmap.m4
new file mode 100644
index 00000000000..57b32283017
--- /dev/null
+++ b/final/autoconf/m4/need_dev_zero_for_mmap.m4
@@ -0,0 +1,17 @@
+#
+# When allocating RWX memory, check whether we need to use /dev/zero
+# as the file descriptor or not.
+#
+AC_DEFUN([AC_NEED_DEV_ZERO_FOR_MMAP],
+[AC_CACHE_CHECK([if /dev/zero is needed for mmap],
+ac_cv_need_dev_zero_for_mmap,
+[if test "$llvm_cv_os_type" = "Interix" ; then
+   ac_cv_need_dev_zero_for_mmap=yes
+ else
+   ac_cv_need_dev_zero_for_mmap=no
+ fi
+])
+if test "$ac_cv_need_dev_zero_for_mmap" = yes; then
+  AC_DEFINE([NEED_DEV_ZERO_FOR_MMAP],[1],
+   [Define if /dev/zero should be used when mapping RWX memory, or undefine if its not necessary])
+fi])
diff --git a/final/autoconf/m4/path_perl.m4 b/final/autoconf/m4/path_perl.m4
new file mode 100644
index 00000000000..406656cb032
--- /dev/null
+++ b/final/autoconf/m4/path_perl.m4
@@ -0,0 +1,16 @@
+dnl Check for a reasonable version of Perl.
+dnl   $1 - Minimum Perl version.  Typically 5.006.
+dnl 
+AC_DEFUN([LLVM_PROG_PERL], [
+AC_PATH_PROG(PERL, [perl], [none])
+if test "$PERL" != "none"; then
+  AC_MSG_CHECKING(for Perl $1 or newer)
+  if $PERL -e 'use $1;' 2>&1 > /dev/null; then
+    AC_MSG_RESULT(yes)
+  else
+    PERL=none
+    AC_MSG_RESULT(not found)
+  fi
+fi
+])
+
diff --git a/final/autoconf/m4/path_tclsh.m4 b/final/autoconf/m4/path_tclsh.m4
new file mode 100644
index 00000000000..85433de71cc
--- /dev/null
+++ b/final/autoconf/m4/path_tclsh.m4
@@ -0,0 +1,39 @@
+dnl This macro checks for tclsh which is required to run dejagnu. On some 
+dnl platforms (notably FreeBSD), tclsh is named tclshX.Y - this handles
+dnl that for us so we can get the latest installed tclsh version.
+dnl 
+AC_DEFUN([DJ_AC_PATH_TCLSH], [
+no_itcl=true
+AC_MSG_CHECKING(for the tclsh program in tclinclude directory)
+AC_ARG_WITH(tclinclude,
+  AS_HELP_STRING([--with-tclinclude],
+                [directory where tcl headers are]), 
+  [with_tclinclude=${withval}],[with_tclinclude=''])
+AC_CACHE_VAL(ac_cv_path_tclsh,[
+dnl first check to see if --with-itclinclude was specified
+if test x"${with_tclinclude}" != x ; then
+  if test -f ${with_tclinclude}/tclsh ; then
+    ac_cv_path_tclsh=`(cd ${with_tclinclude}; pwd)`
+  elif test -f ${with_tclinclude}/src/tclsh ; then
+    ac_cv_path_tclsh=`(cd ${with_tclinclude}/src; pwd)`
+  else
+    AC_MSG_ERROR([${with_tclinclude} directory doesn't contain tclsh])
+  fi
+fi])
+
+dnl see if one is installed
+if test x"${ac_cv_path_tclsh}" = x ; then
+  AC_MSG_RESULT(none)
+  AC_PATH_PROGS([TCLSH],[tclsh8.4 tclsh8.4.8 tclsh8.4.7 tclsh8.4.6 tclsh8.4.5 tclsh8.4.4 tclsh8.4.3 tclsh8.4.2 tclsh8.4.1 tclsh8.4.0 tclsh8.3 tclsh8.3.5 tclsh8.3.4 tclsh8.3.3 tclsh8.3.2 tclsh8.3.1 tclsh8.3.0 tclsh])
+  if test x"${TCLSH}" = x ; then
+    ac_cv_path_tclsh='';
+  else
+    ac_cv_path_tclsh="${TCLSH}";
+  fi
+else
+  AC_MSG_RESULT(${ac_cv_path_tclsh})
+  TCLSH="${ac_cv_path_tclsh}"
+  AC_SUBST(TCLSH)
+fi
+])
+
diff --git a/final/autoconf/m4/rand48.m4 b/final/autoconf/m4/rand48.m4
new file mode 100644
index 00000000000..56705d85c9c
--- /dev/null
+++ b/final/autoconf/m4/rand48.m4
@@ -0,0 +1,12 @@
+#
+# This function determins if the the srand48,drand48,lrand48 functions are
+# available on this platform.
+#
+AC_DEFUN([AC_FUNC_RAND48],[
+AC_SINGLE_CXX_CHECK([ac_cv_func_rand48],   
+                    [srand48/lrand48/drand48], [<stdlib.h>],
+                    [srand48(0);lrand48();drand48();])
+if test "$ac_cv_func_rand48" = "yes" ; then
+AC_DEFINE([HAVE_RAND48],1,[Define to 1 if srand48/lrand48/drand48 exist in <stdlib.h>])
+fi
+])
diff --git a/final/autoconf/m4/sanity_check.m4 b/final/autoconf/m4/sanity_check.m4
new file mode 100644
index 00000000000..639fccca246
--- /dev/null
+++ b/final/autoconf/m4/sanity_check.m4
@@ -0,0 +1,31 @@
+dnl Check a program for version sanity. The test runs a program, passes it an
+dnl argument to make it print out some identification string, and filters that 
+dnl output with a regular expression. If the output is non-empty, the program
+dnl passes the sanity check.
+dnl   $1 - Name or full path of the program to run
+dnl   $2 - Argument to pass to print out identification string
+dnl   $3 - grep RE to match identification string
+dnl   $4 - set to 1 to make errors only a warning
+AC_DEFUN([CHECK_PROGRAM_SANITY],
+[
+AC_MSG_CHECKING([sanity for program ]$1)
+sanity="0"
+sanity_path=`which $1 2>/dev/null`
+if test "$?" -eq 0 -a -x "$sanity_path" ; then
+  sanity=`$1 $2 2>&1 | grep "$3"`
+  if test -z "$sanity" ; then
+    AC_MSG_RESULT([no])
+    sanity="0"
+    if test "$4" -eq 1 ; then
+      AC_MSG_WARN([Program ]$1[ failed to pass sanity check.])
+    else
+      AC_MSG_ERROR([Program ]$1[ failed to pass sanity check.])
+    fi
+  else
+    AC_MSG_RESULT([yes])
+    sanity="1"
+  fi
+else
+  AC_MSG_RESULT([not found])
+fi
+])
diff --git a/final/autoconf/m4/single_cxx_check.m4 b/final/autoconf/m4/single_cxx_check.m4
new file mode 100644
index 00000000000..21efa4bed35
--- /dev/null
+++ b/final/autoconf/m4/single_cxx_check.m4
@@ -0,0 +1,10 @@
+dnl AC_SINGLE_CXX_CHECK(CACHEVAR, FUNCTION, HEADER, PROGRAM)
+dnl                     $1,       $2,       $3,     $4,     
+dnl 
+AC_DEFUN([AC_SINGLE_CXX_CHECK],
+ [AC_CACHE_CHECK([for $2 in $3], [$1],
+  [AC_LANG_PUSH([C++])
+   AC_COMPILE_IFELSE(AC_LANG_PROGRAM([#include $3],[$4]),[$1=yes],[$1=no])
+  AC_LANG_POP([C++])])
+ ])
+
diff --git a/final/autoconf/m4/visibility_inlines_hidden.m4 b/final/autoconf/m4/visibility_inlines_hidden.m4
new file mode 100644
index 00000000000..42ddbe9128b
--- /dev/null
+++ b/final/autoconf/m4/visibility_inlines_hidden.m4
@@ -0,0 +1,22 @@
+#
+# Determine if the compiler accepts -fvisibility-inlines-hidden
+#
+# This macro is specific to LLVM.
+#
+AC_DEFUN([AC_CXX_USE_VISIBILITY_INLINES_HIDDEN],
+[AC_CACHE_CHECK([for compiler -fvisibility-inlines-hidden option],
+                [llvm_cv_cxx_visibility_inlines_hidden],
+[ AC_LANG_PUSH([C++])
+  oldcxxflags="$CXXFLAGS"
+  CXXFLAGS="$CXXFLAGS -fvisibility-inlines-hidden"
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM()],
+    [llvm_cv_cxx_visibility_inlines_hidden=yes],[llvm_cv_cxx_visibility_inlines_hidden=no])
+  CXXFLAGS="$oldcxxflags"
+  AC_LANG_POP([C++])
+])
+if test "$llvm_cv_cxx_visibility_inlines_hidden" = yes ; then
+  AC_SUBST([ENABLE_VISIBILITY_INLINES_HIDDEN],[1])
+else
+  AC_SUBST([ENABLE_VISIBILITY_INLINES_HIDDEN],[0])
+fi
+])
diff --git a/final/autoconf/missing b/final/autoconf/missing
new file mode 100755
index 00000000000..64b5f901dd5
--- /dev/null
+++ b/final/autoconf/missing
@@ -0,0 +1,353 @@
+#! /bin/sh
+# Common stub for a few missing GNU programs while installing.
+
+scriptversion=2004-09-07.08
+
+# Copyright (C) 1996, 1997, 1999, 2000, 2002, 2003, 2004
+#   Free Software Foundation, Inc.
+# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+if test $# -eq 0; then
+  echo 1>&2 "Try \`$0 --help' for more information"
+  exit 1
+fi
+
+run=:
+
+# In the cases where this matters, `missing' is being run in the
+# srcdir already.
+if test -f configure.ac; then
+  configure_ac=configure.ac
+else
+  configure_ac=configure.in
+fi
+
+msg="missing on your system"
+
+case "$1" in
+--run)
+  # Try to run requested program, and just exit if it succeeds.
+  run=
+  shift
+  "$@" && exit 0
+  # Exit code 63 means version mismatch.  This often happens
+  # when the user try to use an ancient version of a tool on
+  # a file that requires a minimum version.  In this case we
+  # we should proceed has if the program had been absent, or
+  # if --run hadn't been passed.
+  if test $? = 63; then
+    run=:
+    msg="probably too old"
+  fi
+  ;;
+
+  -h|--h|--he|--hel|--help)
+    echo "\
+$0 [OPTION]... PROGRAM [ARGUMENT]...
+
+Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
+error status if there is no known handling for PROGRAM.
+
+Options:
+  -h, --help      display this help and exit
+  -v, --version   output version information and exit
+  --run           try to run the given command, and emulate it if it fails
+
+Supported PROGRAM values:
+  aclocal      touch file \`aclocal.m4'
+  autoconf     touch file \`configure'
+  autoheader   touch file \`config.h.in'
+  automake     touch all \`Makefile.in' files
+  bison        create \`y.tab.[ch]', if possible, from existing .[ch]
+  flex         create \`lex.yy.c', if possible, from existing .c
+  help2man     touch the output file
+  lex          create \`lex.yy.c', if possible, from existing .c
+  makeinfo     touch the output file
+  tar          try tar, gnutar, gtar, then tar without non-portable flags
+  yacc         create \`y.tab.[ch]', if possible, from existing .[ch]
+
+Send bug reports to <bug-automake@gnu.org>."
+    exit 0
+    ;;
+
+  -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
+    echo "missing $scriptversion (GNU Automake)"
+    exit 0
+    ;;
+
+  -*)
+    echo 1>&2 "$0: Unknown \`$1' option"
+    echo 1>&2 "Try \`$0 --help' for more information"
+    exit 1
+    ;;
+
+esac
+
+# Now exit if we have it, but it failed.  Also exit now if we
+# don't have it and --version was passed (most likely to detect
+# the program).
+case "$1" in
+  lex|yacc)
+    # Not GNU programs, they don't have --version.
+    ;;
+
+  tar)
+    if test -n "$run"; then
+       echo 1>&2 "ERROR: \`tar' requires --run"
+       exit 1
+    elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
+       exit 1
+    fi
+    ;;
+
+  *)
+    if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
+       # We have it, but it failed.
+       exit 1
+    elif test "x$2" = "x--version" || test "x$2" = "x--help"; then
+       # Could not run --version or --help.  This is probably someone
+       # running `$TOOL --version' or `$TOOL --help' to check whether
+       # $TOOL exists and not knowing $TOOL uses missing.
+       exit 1
+    fi
+    ;;
+esac
+
+# If it does not exist, or fails to run (possibly an outdated version),
+# try to emulate it.
+case "$1" in
+  aclocal*)
+    echo 1>&2 "\
+WARNING: \`$1' is $msg.  You should only need it if
+         you modified \`acinclude.m4' or \`${configure_ac}'.  You might want
+         to install the \`Automake' and \`Perl' packages.  Grab them from
+         any GNU archive site."
+    touch aclocal.m4
+    ;;
+
+  autoconf)
+    echo 1>&2 "\
+WARNING: \`$1' is $msg.  You should only need it if
+         you modified \`${configure_ac}'.  You might want to install the
+         \`Autoconf' and \`GNU m4' packages.  Grab them from any GNU
+         archive site."
+    touch configure
+    ;;
+
+  autoheader)
+    echo 1>&2 "\
+WARNING: \`$1' is $msg.  You should only need it if
+         you modified \`acconfig.h' or \`${configure_ac}'.  You might want
+         to install the \`Autoconf' and \`GNU m4' packages.  Grab them
+         from any GNU archive site."
+    files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
+    test -z "$files" && files="config.h"
+    touch_files=
+    for f in $files; do
+      case "$f" in
+      *:*) touch_files="$touch_files "`echo "$f" |
+				       sed -e 's/^[^:]*://' -e 's/:.*//'`;;
+      *) touch_files="$touch_files $f.in";;
+      esac
+    done
+    touch $touch_files
+    ;;
+
+  automake*)
+    echo 1>&2 "\
+WARNING: \`$1' is $msg.  You should only need it if
+         you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'.
+         You might want to install the \`Automake' and \`Perl' packages.
+         Grab them from any GNU archive site."
+    find . -type f -name Makefile.am -print |
+	   sed 's/\.am$/.in/' |
+	   while read f; do touch "$f"; done
+    ;;
+
+  autom4te)
+    echo 1>&2 "\
+WARNING: \`$1' is needed, but is $msg.
+         You might have modified some files without having the
+         proper tools for further handling them.
+         You can get \`$1' as part of \`Autoconf' from any GNU
+         archive site."
+
+    file=`echo "$*" | sed -n 's/.*--output[ =]*\([^ ]*\).*/\1/p'`
+    test -z "$file" && file=`echo "$*" | sed -n 's/.*-o[ ]*\([^ ]*\).*/\1/p'`
+    if test -f "$file"; then
+	touch $file
+    else
+	test -z "$file" || exec >$file
+	echo "#! /bin/sh"
+	echo "# Created by GNU Automake missing as a replacement of"
+	echo "#  $ $@"
+	echo "exit 0"
+	chmod +x $file
+	exit 1
+    fi
+    ;;
+
+  bison|yacc)
+    echo 1>&2 "\
+WARNING: \`$1' $msg.  You should only need it if
+         you modified a \`.y' file.  You may need the \`Bison' package
+         in order for those modifications to take effect.  You can get
+         \`Bison' from any GNU archive site."
+    rm -f y.tab.c y.tab.h
+    if [ $# -ne 1 ]; then
+        eval LASTARG="\${$#}"
+	case "$LASTARG" in
+	*.y)
+	    SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
+	    if [ -f "$SRCFILE" ]; then
+	         cp "$SRCFILE" y.tab.c
+	    fi
+	    SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
+	    if [ -f "$SRCFILE" ]; then
+	         cp "$SRCFILE" y.tab.h
+	    fi
+	  ;;
+	esac
+    fi
+    if [ ! -f y.tab.h ]; then
+	echo >y.tab.h
+    fi
+    if [ ! -f y.tab.c ]; then
+	echo 'main() { return 0; }' >y.tab.c
+    fi
+    ;;
+
+  lex|flex)
+    echo 1>&2 "\
+WARNING: \`$1' is $msg.  You should only need it if
+         you modified a \`.l' file.  You may need the \`Flex' package
+         in order for those modifications to take effect.  You can get
+         \`Flex' from any GNU archive site."
+    rm -f lex.yy.c
+    if [ $# -ne 1 ]; then
+        eval LASTARG="\${$#}"
+	case "$LASTARG" in
+	*.l)
+	    SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
+	    if [ -f "$SRCFILE" ]; then
+	         cp "$SRCFILE" lex.yy.c
+	    fi
+	  ;;
+	esac
+    fi
+    if [ ! -f lex.yy.c ]; then
+	echo 'main() { return 0; }' >lex.yy.c
+    fi
+    ;;
+
+  help2man)
+    echo 1>&2 "\
+WARNING: \`$1' is $msg.  You should only need it if
+	 you modified a dependency of a manual page.  You may need the
+	 \`Help2man' package in order for those modifications to take
+	 effect.  You can get \`Help2man' from any GNU archive site."
+
+    file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
+    if test -z "$file"; then
+	file=`echo "$*" | sed -n 's/.*--output=\([^ ]*\).*/\1/p'`
+    fi
+    if [ -f "$file" ]; then
+	touch $file
+    else
+	test -z "$file" || exec >$file
+	echo ".ab help2man is required to generate this page"
+	exit 1
+    fi
+    ;;
+
+  makeinfo)
+    echo 1>&2 "\
+WARNING: \`$1' is $msg.  You should only need it if
+         you modified a \`.texi' or \`.texinfo' file, or any other file
+         indirectly affecting the aspect of the manual.  The spurious
+         call might also be the consequence of using a buggy \`make' (AIX,
+         DU, IRIX).  You might want to install the \`Texinfo' package or
+         the \`GNU make' package.  Grab either from any GNU archive site."
+    file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
+    if test -z "$file"; then
+      file=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
+      file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $file`
+    fi
+    touch $file
+    ;;
+
+  tar)
+    shift
+
+    # We have already tried tar in the generic part.
+    # Look for gnutar/gtar before invocation to avoid ugly error
+    # messages.
+    if (gnutar --version > /dev/null 2>&1); then
+       gnutar "$@" && exit 0
+    fi
+    if (gtar --version > /dev/null 2>&1); then
+       gtar "$@" && exit 0
+    fi
+    firstarg="$1"
+    if shift; then
+	case "$firstarg" in
+	*o*)
+	    firstarg=`echo "$firstarg" | sed s/o//`
+	    tar "$firstarg" "$@" && exit 0
+	    ;;
+	esac
+	case "$firstarg" in
+	*h*)
+	    firstarg=`echo "$firstarg" | sed s/h//`
+	    tar "$firstarg" "$@" && exit 0
+	    ;;
+	esac
+    fi
+
+    echo 1>&2 "\
+WARNING: I can't seem to be able to run \`tar' with the given arguments.
+         You may want to install GNU tar or Free paxutils, or check the
+         command line arguments."
+    exit 1
+    ;;
+
+  *)
+    echo 1>&2 "\
+WARNING: \`$1' is needed, and is $msg.
+         You might have modified some files without having the
+         proper tools for further handling them.  Check the \`README' file,
+         it often tells you about the needed prerequisites for installing
+         this package.  You may also peek at any GNU archive site, in case
+         some other package would contain this missing \`$1' program."
+    exit 1
+    ;;
+esac
+
+exit 0
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
+# End:
diff --git a/final/autoconf/mkinstalldirs b/final/autoconf/mkinstalldirs
new file mode 100755
index 00000000000..1ee2d580177
--- /dev/null
+++ b/final/autoconf/mkinstalldirs
@@ -0,0 +1,150 @@
+#! /bin/sh
+# mkinstalldirs --- make directory hierarchy
+
+scriptversion=2004-02-15.20
+
+# Original author: Noah Friedman <friedman@prep.ai.mit.edu>
+# Created: 1993-05-16
+# Public domain.
+#
+# This file is maintained in Automake, please report
+# bugs to <bug-automake@gnu.org> or send patches to
+# <automake-patches@gnu.org>.
+
+errstatus=0
+dirmode=""
+
+usage="\
+Usage: mkinstalldirs [-h] [--help] [--version] [-m MODE] DIR ...
+
+Create each directory DIR (with mode MODE, if specified), including all
+leading file name components.
+
+Report bugs to <bug-automake@gnu.org>."
+
+# process command line arguments
+while test $# -gt 0 ; do
+  case $1 in
+    -h | --help | --h*)         # -h for help
+      echo "$usage"
+      exit 0
+      ;;
+    -m)                         # -m PERM arg
+      shift
+      test $# -eq 0 && { echo "$usage" 1>&2; exit 1; }
+      dirmode=$1
+      shift
+      ;;
+    --version)
+      echo "$0 $scriptversion"
+      exit 0
+      ;;
+    --)                         # stop option processing
+      shift
+      break
+      ;;
+    -*)                         # unknown option
+      echo "$usage" 1>&2
+      exit 1
+      ;;
+    *)                          # first non-opt arg
+      break
+      ;;
+  esac
+done
+
+for file
+do
+  if test -d "$file"; then
+    shift
+  else
+    break
+  fi
+done
+
+case $# in
+  0) exit 0 ;;
+esac
+
+# Solaris 8's mkdir -p isn't thread-safe.  If you mkdir -p a/b and
+# mkdir -p a/c at the same time, both will detect that a is missing,
+# one will create a, then the other will try to create a and die with
+# a "File exists" error.  This is a problem when calling mkinstalldirs
+# from a parallel make.  We use --version in the probe to restrict
+# ourselves to GNU mkdir, which is thread-safe.
+case $dirmode in
+  '')
+    if mkdir -p --version . >/dev/null 2>&1 && test ! -d ./--version; then
+      # echo "mkdir -p -- $*"
+      exec mkdir -p -- "$@"
+    else
+      # On NextStep and OpenStep, the `mkdir' command does not
+      # recognize any option.  It will interpret all options as
+      # directories to create, and then abort because `.' already
+      # exists.
+      test -d ./-p && rmdir ./-p
+      test -d ./--version && rmdir ./--version
+    fi
+    ;;
+  *)
+    if mkdir -m "$dirmode" -p --version . >/dev/null 2>&1 &&
+       test ! -d ./--version; then
+      # echo "mkdir -m $dirmode -p -- $*"
+      exec mkdir -m "$dirmode" -p -- "$@"
+    else
+      # Clean up after NextStep and OpenStep mkdir.
+      for d in ./-m ./-p ./--version "./$dirmode";
+      do
+        test -d $d && rmdir $d
+      done
+    fi
+    ;;
+esac
+
+for file
+do
+  set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'`
+  shift
+
+  pathcomp=
+  for d
+  do
+    pathcomp="$pathcomp$d"
+    case $pathcomp in
+      -*) pathcomp=./$pathcomp ;;
+    esac
+
+    if test ! -d "$pathcomp"; then
+      # echo "mkdir $pathcomp"
+
+      mkdir "$pathcomp" || lasterr=$?
+
+      if test ! -d "$pathcomp"; then
+	errstatus=$lasterr
+      else
+	if test ! -z "$dirmode"; then
+	  # echo "chmod $dirmode $pathcomp"
+	  lasterr=""
+	  chmod "$dirmode" "$pathcomp" || lasterr=$?
+
+	  if test ! -z "$lasterr"; then
+	    errstatus=$lasterr
+	  fi
+	fi
+      fi
+    fi
+
+    pathcomp="$pathcomp/"
+  done
+done
+
+exit $errstatus
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
+# End:
diff --git a/final/bindings/Makefile b/final/bindings/Makefile
new file mode 100644
index 00000000000..c545b28854c
--- /dev/null
+++ b/final/bindings/Makefile
@@ -0,0 +1,16 @@
+##===- bindings/Makefile -----------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ..
+
+include $(LEVEL)/Makefile.config
+
+PARALLEL_DIRS = $(BINDINGS_TO_BUILD)
+
+include $(LEVEL)/Makefile.common
diff --git a/final/bindings/README.txt b/final/bindings/README.txt
new file mode 100644
index 00000000000..7693cb2cead
--- /dev/null
+++ b/final/bindings/README.txt
@@ -0,0 +1,3 @@
+This directory contains bindings for the LLVM compiler infrastructure to allow
+programs written in languages other than C or C++ to take advantage of the LLVM
+infrastructure--for instance, a self-hosted compiler front-end.
diff --git a/final/bindings/ocaml/Makefile b/final/bindings/ocaml/Makefile
new file mode 100644
index 00000000000..a89caefb4de
--- /dev/null
+++ b/final/bindings/ocaml/Makefile
@@ -0,0 +1,19 @@
+##===- bindings/ocaml/Makefile -----------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+DIRS = llvm bitreader bitwriter analysis target executionengine transforms
+ExtraMakefiles = $(PROJ_OBJ_DIR)/Makefile.ocaml
+
+ocamldoc:
+	$(Verb) for i in $(DIRS) ; do \
+		$(MAKE) -C $$i ocamldoc; \
+	done
+
+include $(LEVEL)/Makefile.common
diff --git a/final/bindings/ocaml/Makefile.ocaml b/final/bindings/ocaml/Makefile.ocaml
new file mode 100644
index 00000000000..40ecc9c08e0
--- /dev/null
+++ b/final/bindings/ocaml/Makefile.ocaml
@@ -0,0 +1,415 @@
+##===- tools/ml/Makefile -----------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+# 
+# An ocaml library is a unique project type in the context of LLVM, so rules are
+# here rather than in Makefile.rules.
+# 
+# Reference materials on installing ocaml libraries:
+# 
+#   https://fedoraproject.org/wiki/Packaging/OCaml
+#   http://pkg-ocaml-maint.alioth.debian.org/ocaml_packaging_policy.txt
+# 
+##===----------------------------------------------------------------------===##
+
+include $(LEVEL)/Makefile.config
+
+# CFLAGS needs to be set before Makefile.rules is included.
+CXX.Flags += -I"$(shell $(OCAMLC) -where)"
+C.Flags += -I"$(shell $(OCAMLC) -where)"
+
+include $(LEVEL)/Makefile.common
+
+# Intentionally ignore PROJ_prefix here. We want the ocaml stdlib. However, the
+# user can override this with OCAML_LIBDIR or configure --with-ocaml-libdir=.
+PROJ_libocamldir := $(DESTDIR)$(OCAML_LIBDIR)
+OcamlDir := $(LibDir)/ocaml
+
+# Info from llvm-config and similar
+ifndef IS_CLEANING_TARGET
+ifdef UsedComponents
+UsedLibs = $(shell $(LLVM_CONFIG) --libs $(UsedComponents))
+UsedLibNames = $(shell $(LLVM_CONFIG) --libnames $(UsedComponents))
+endif
+endif
+
+# Tools
+OCAMLCFLAGS += -I $(ObjDir) -I $(OcamlDir)
+ifndef IS_CLEANING_TARGET
+ifneq ($(ObjectsO),)
+OCAMLAFLAGS += $(patsubst %,-cclib %, \
+                 $(filter-out -L$(LibDir),-l$(LIBRARYNAME) \
+                                          $(shell $(LLVM_CONFIG) --ldflags)) \
+                                          $(UsedLibs))
+else
+OCAMLAFLAGS += $(patsubst %,-cclib %, \
+                 $(filter-out -L$(LibDir),$(shell $(LLVM_CONFIG) --ldflags)) \
+                                          $(UsedLibs))
+endif
+endif
+ 
+# -g was introduced in 3.10.0.
+#ifneq ($(ENABLE_OPTIMIZED),1)
+#  OCAMLDEBUGFLAG := -g
+#endif
+
+Compile.CMI  := $(strip $(OCAMLC) -c $(OCAMLCFLAGS) $(OCAMLDEBUGFLAG) -o)
+Compile.CMO  := $(strip $(OCAMLC) -c $(OCAMLCFLAGS) $(OCAMLDEBUGFLAG) -o)
+Archive.CMA  := $(strip $(OCAMLC) -a -custom $(OCAMLAFLAGS) $(OCAMLDEBUGFLAG) \
+                                  -o)
+
+Compile.CMX  := $(strip $(OCAMLOPT) -c $(OCAMLCFLAGS) $(OCAMLDEBUGFLAG) -o)
+Archive.CMXA := $(strip $(OCAMLOPT) -a $(OCAMLAFLAGS) $(OCAMLDEBUGFLAG) -o)
+
+ifdef OCAMLOPT
+Archive.EXE := $(strip $(OCAMLOPT) -cc $(CXX) $(OCAMLCFLAGS) $(UsedOcamLibs:%=%.cmxa) $(OCAMLDEBUGFLAG) -o)
+else
+Archive.EXE := $(strip $(OCAMLC) -cc $(CXX) $(OCAMLCFLAGS) $(OCAMLDEBUGFLAG:%=%.cma) -o)
+endif
+
+# Source files
+ifndef OcamlSources1
+OcamlSources1 := $(sort $(wildcard $(PROJ_SRC_DIR)/*.ml))
+endif
+
+ifndef OcamlHeaders1
+OcamlHeaders1 := $(sort $(wildcard $(PROJ_SRC_DIR)/*.mli))
+endif
+
+OcamlSources2 := $(filter-out $(ExcludeSources),$(OcamlSources1))
+OcamlHeaders2 := $(filter-out $(ExcludeHeaders),$(OcamlHeaders1))
+
+OcamlSources := $(OcamlSources2:$(PROJ_SRC_DIR)/%=$(ObjDir)/%)
+OcamlHeaders := $(OcamlHeaders2:$(PROJ_SRC_DIR)/%=$(ObjDir)/%)
+
+# Intermediate files
+ObjectsCMI   := $(OcamlSources:%.ml=%.cmi)
+ObjectsCMO   := $(OcamlSources:%.ml=%.cmo)
+ObjectsCMX   := $(OcamlSources:%.ml=%.cmx)
+
+ifdef LIBRARYNAME
+LibraryCMA   := $(ObjDir)/$(LIBRARYNAME).cma
+LibraryCMXA  := $(ObjDir)/$(LIBRARYNAME).cmxa
+endif
+
+ifdef TOOLNAME
+ToolEXE      := $(ObjDir)/$(TOOLNAME)$(EXEEXT)
+endif
+
+# Output files
+#   The .cmo files are the only intermediates; all others are to be installed.
+OutputsCMI := $(ObjectsCMI:$(ObjDir)/%.cmi=$(OcamlDir)/%.cmi)
+OutputsCMX := $(ObjectsCMX:$(ObjDir)/%.cmx=$(OcamlDir)/%.cmx)
+OutputLibs := $(UsedLibNames:%=$(OcamlDir)/%)
+
+ifdef LIBRARYNAME
+LibraryA   := $(OcamlDir)/lib$(LIBRARYNAME).a
+OutputCMA  := $(LibraryCMA:$(ObjDir)/%.cma=$(OcamlDir)/%.cma)
+OutputCMXA := $(LibraryCMXA:$(ObjDir)/%.cmxa=$(OcamlDir)/%.cmxa)
+endif
+
+ifdef TOOLNAME
+ifdef EXAMPLE_TOOL
+OutputEXE := $(ExmplDir)/$(strip $(TOOLNAME))$(EXEEXT)
+else
+OutputEXE := $(ToolDir)/$(strip $(TOOLNAME))$(EXEEXT)
+endif
+endif
+
+# Installation targets
+DestLibs := $(UsedLibNames:%=$(PROJ_libocamldir)/%)
+
+ifdef LIBRARYNAME
+DestA    := $(PROJ_libocamldir)/lib$(LIBRARYNAME).a
+DestCMA  := $(PROJ_libocamldir)/$(LIBRARYNAME).cma
+DestCMXA := $(PROJ_libocamldir)/$(LIBRARYNAME).cmxa
+endif
+
+##===- Dependencies -------------------------------------------------------===##
+# Copy the sources into the intermediate directory because older ocamlc doesn't
+# support -o except when linking (outputs are placed next to inputs).
+
+$(ObjDir)/%.mli: $(PROJ_SRC_DIR)/%.mli $(ObjDir)/.dir
+	$(Verb) $(CP) -f $< $@
+
+$(ObjDir)/%.ml: $(PROJ_SRC_DIR)/%.ml $(ObjDir)/.dir
+	$(Verb) $(CP) -f $< $@
+
+$(ObjectsCMI): $(UsedOcamlInterfaces:%=$(OcamlDir)/%.cmi)
+
+ifdef LIBRARYNAME
+$(ObjDir)/$(LIBRARYNAME).ocamldep: $(OcamlSources) $(OcamlHeaders) \
+                                   $(OcamlDir)/.dir $(ObjDir)/.dir
+	$(Verb) $(OCAMLDEP) $(OCAMLCFLAGS) $(OcamlSources) $(OcamlHeaders) > $@
+
+-include $(ObjDir)/$(LIBRARYNAME).ocamldep
+endif
+
+ifdef TOOLNAME
+$(ObjDir)/$(TOOLNAME).ocamldep: $(OcamlSources) $(OcamlHeaders) \
+                                $(OcamlDir)/.dir $(ObjDir)/.dir
+	$(Verb) $(OCAMLDEP) $(OCAMLCFLAGS) $(OcamlSources) $(OcamlHeaders) > $@
+
+-include $(ObjDir)/$(TOOLNAME).ocamldep
+endif
+
+##===- Build static library from C sources --------------------------------===##
+
+ifdef LibraryA
+all-local:: $(LibraryA)
+clean-local:: clean-a
+install-local:: install-a
+uninstall-local:: uninstall-a
+
+$(LibraryA): $(ObjectsO) $(OcamlDir)/.dir
+	$(Echo) "Building $(BuildMode) $(notdir $@)"
+	-$(Verb) $(RM) -f $@
+	$(Verb) $(Archive) $@ $(ObjectsO)
+	$(Verb) $(Ranlib) $@
+
+clean-a::
+	-$(Verb) $(RM) -f $(LibraryA)
+
+install-a:: $(LibraryA)
+	$(Echo) "Installing $(BuildMode) $(DestA)"
+	$(Verb) $(MKDIR) $(PROJ_libocamldir)
+	$(Verb) $(INSTALL) $(LibraryA) $(DestA)
+	$(Verb) 
+
+uninstall-a::
+	$(Echo) "Uninstalling $(DestA)"
+	-$(Verb) $(RM) -f $(DestA)
+endif
+
+
+##===- Deposit dependent libraries adjacent to Ocaml libs -----------------===##
+
+all-local:: build-deplibs
+clean-local:: clean-deplibs
+install-local:: install-deplibs
+uninstall-local:: uninstall-deplibs
+
+build-deplibs: $(OutputLibs)
+
+$(OcamlDir)/%.a: $(LibDir)/%.a
+	$(Verb) ln -sf $< $@
+
+$(OcamlDir)/%.o: $(LibDir)/%.o
+	$(Verb) ln -sf $< $@
+
+clean-deplibs:
+	$(Verb) $(RM) -f $(OutputLibs)
+
+install-deplibs:
+	$(Verb) $(MKDIR) $(PROJ_libocamldir)
+	$(Verb) for i in $(DestLibs:$(PROJ_libocamldir)/%=%); do \
+	  ln -sf "$(PROJ_libdir)/$$i" "$(PROJ_libocamldir)/$$i"; \
+	done
+
+uninstall-deplibs:
+	$(Verb) $(RM) -f $(DestLibs)
+
+
+##===- Build ocaml interfaces (.mli's -> .cmi's) --------------------------===##
+
+ifneq ($(OcamlHeaders),)
+all-local:: build-cmis
+clean-local:: clean-cmis
+install-local:: install-cmis
+uninstall-local:: uninstall-cmis
+
+build-cmis: $(OutputsCMI)
+
+$(OcamlDir)/%.cmi: $(ObjDir)/%.cmi $(OcamlDir)/.dir
+	$(Verb) $(CP) -f $< $@
+
+$(ObjDir)/%.cmi: $(ObjDir)/%.mli $(ObjDir)/.dir
+	$(Echo) "Compiling $(notdir $<) for $(BuildMode) build"
+	$(Verb) $(Compile.CMI) $@ $<
+
+clean-cmis::
+	-$(Verb) $(RM) -f $(OutputsCMI)
+
+# Also install the .mli's (headers) as documentation.
+install-cmis: $(OutputsCMI) $(OcamlHeaders)
+	$(Verb) $(MKDIR) $(PROJ_libocamldir)
+	$(Verb) for i in $(OutputsCMI:$(OcamlDir)/%=%); do \
+	  $(EchoCmd) "Installing $(BuildMode) $(PROJ_libocamldir)/$$i"; \
+	  $(DataInstall) $(OcamlDir)/$$i "$(PROJ_libocamldir)/$$i"; \
+	done
+	$(Verb) for i in $(OcamlHeaders:$(ObjDir)/%=%); do \
+	  $(EchoCmd) "Installing $(BuildMode) $(PROJ_libocamldir)/$$i"; \
+	  $(DataInstall) $(ObjDir)/$$i "$(PROJ_libocamldir)/$$i"; \
+	done
+
+uninstall-cmis::
+	$(Verb) for i in $(OutputsCMI:$(OcamlDir)/%=%); do \
+	  $(EchoCmd) "Uninstalling $(PROJ_libocamldir)/$$i"; \
+	  $(RM) -f "$(PROJ_libocamldir)/$$i"; \
+	done
+	$(Verb) for i in $(OcamlHeaders:$(ObjDir)/%=%); do \
+	  $(EchoCmd) "Uninstalling $(PROJ_libocamldir)/$$i"; \
+	  $(RM) -f "$(PROJ_libocamldir)/$$i"; \
+	done
+endif
+
+
+##===- Build ocaml bytecode archive (.ml's -> .cmo's -> .cma) -------------===##
+
+$(ObjDir)/%.cmo: $(ObjDir)/%.ml
+	$(Echo) "Compiling $(notdir $<) for $(BuildMode) build"
+	$(Verb) $(Compile.CMO) $@ $<
+
+ifdef LIBRARYNAME
+all-local:: $(OutputCMA)
+clean-local:: clean-cma
+install-local:: install-cma
+uninstall-local:: uninstall-cma
+
+$(OutputCMA): $(LibraryCMA) $(OcamlDir)/.dir
+	$(Verb) $(CP) -f $< $@
+
+$(LibraryCMA): $(ObjectsCMO) $(OcamlDir)/.dir
+	$(Echo) "Archiving $(notdir $@) for $(BuildMode) build"
+	$(Verb) $(Archive.CMA) $@ $(ObjectsCMO)
+
+clean-cma::
+	$(Verb) $(RM) -f $(OutputCMA) $(UsedLibNames:%=$(OcamlDir)/%)
+
+install-cma:: $(OutputCMA)
+	$(Echo) "Installing $(BuildMode) $(DestCMA)"
+	$(Verb) $(MKDIR) $(PROJ_libocamldir)
+	$(Verb) $(DataInstall) $(OutputCMA) "$(DestCMA)"
+
+uninstall-cma::
+	$(Echo) "Uninstalling $(DestCMA)"
+	-$(Verb) $(RM) -f $(DestCMA)
+endif
+
+##===- Build optimized ocaml archive (.ml's -> .cmx's -> .cmxa, .a) -------===##
+
+# The ocamlopt compiler is supported on a set of targets disjoint from LLVM's.
+# If unavailable, 'configure' will not define OCAMLOPT in Makefile.config.
+ifdef OCAMLOPT
+
+$(OcamlDir)/%.cmx: $(ObjDir)/%.cmx
+	$(Verb) $(CP) -f $< $@
+
+$(ObjDir)/%.cmx: $(ObjDir)/%.ml
+	$(Echo) "Compiling optimized $(notdir $<) for $(BuildMode) build"
+	$(Verb) $(Compile.CMX) $@ $<
+
+ifdef LIBRARYNAME
+all-local:: $(OutputCMXA) $(OutputsCMX)
+clean-local:: clean-cmxa
+install-local:: install-cmxa
+uninstall-local:: uninstall-cmxa
+
+$(OutputCMXA): $(LibraryCMXA)
+	$(Verb) $(CP) -f $< $@
+	$(Verb) $(CP) -f $(<:.cmxa=.a) $(@:.cmxa=.a)
+
+$(LibraryCMXA): $(ObjectsCMX)
+	$(Echo) "Archiving $(notdir $@) for $(BuildMode) build"
+	$(Verb) $(Archive.CMXA) $@ $(ObjectsCMX)
+	$(Verb) $(RM) -f $(@:.cmxa=.o)
+
+clean-cmxa::
+	$(Verb) $(RM) -f $(OutputCMXA) $(OutputCMXA:.cmxa=.a) $(OutputsCMX)
+
+install-cmxa:: $(OutputCMXA) $(OutputsCMX)
+	$(Verb) $(MKDIR) $(PROJ_libocamldir)
+	$(Echo) "Installing $(BuildMode) $(DestCMXA)"
+	$(Verb) $(DataInstall) $(OutputCMXA) $(DestCMXA)
+	$(Echo) "Installing $(BuildMode) $(DestCMXA:.cmxa=.a)"
+	$(Verb) $(DataInstall) $(OutputCMXA:.cmxa=.a) $(DestCMXA:.cmxa=.a)
+	$(Verb) for i in $(OutputsCMX:$(OcamlDir)/%=%); do \
+	  $(EchoCmd) "Installing $(BuildMode) $(PROJ_libocamldir)/$$i"; \
+	  $(DataInstall) $(OcamlDir)/$$i "$(PROJ_libocamldir)/$$i"; \
+	done
+
+uninstall-cmxa::
+	$(Echo) "Uninstalling $(DestCMXA)"
+	$(Verb) $(RM) -f $(DestCMXA)
+	$(Echo) "Uninstalling $(DestCMXA:.cmxa=.a)"
+	$(Verb) $(RM) -f $(DestCMXA:.cmxa=.a)
+	$(Verb) for i in $(OutputsCMX:$(OcamlDir)/%=%); do \
+	  $(EchoCmd) "Uninstalling $(PROJ_libocamldir)/$$i"; \
+	  $(RM) -f $(PROJ_libocamldir)/$$i; \
+	done
+endif
+endif
+
+##===- Build executables --------------------------------------------------===##
+
+ifdef TOOLNAME
+all-local:: $(OutputEXE)
+clean-local:: clean-exe
+
+$(OutputEXE): $(ToolEXE) $(OcamlDir)/.dir
+	$(Verb) $(CP) -f $< $@
+
+ifndef OCAMLOPT
+$(ToolEXE): $(ObjectsCMO) $(OcamlDir)/.dir
+	$(Echo) "Archiving $(notdir $@) for $(BuildMode) build"
+	$(Verb) $(Archive.EXE) $@ $(ObjectsCMO)
+else
+$(ToolEXE): $(ObjectsCMX) $(OcamlDir)/.dir
+	$(Echo) "Archiving $(notdir $@) for $(BuildMode) build"
+	$(Verb) $(Archive.EXE) $@ $(ObjectsCMX)
+endif
+endif
+
+##===- Generate documentation ---------------------------------------------===##
+
+$(ObjDir)/$(LIBRARYNAME).odoc: $(ObjectsCMI)
+	$(Echo) "Documenting $(notdir $@)"
+	$(Verb) $(OCAMLDOC) -I $(ObjDir) -I $(OcamlDir) -dump $@ $(OcamlHeaders)
+
+ocamldoc: $(ObjDir)/$(LIBRARYNAME).odoc
+
+##===- Debugging gunk -----------------------------------------------------===##
+printvars:: printcamlvars
+
+printcamlvars::
+	$(Echo) "LLVM_CONFIG  : " '$(LLVM_CONFIG)'
+	$(Echo) "OCAMLCFLAGS  : " '$(OCAMLCFLAGS)'
+	$(Echo) "OCAMLAFLAGS  : " '$(OCAMLAFLAGS)'
+	$(Echo) "OCAMLC       : " '$(OCAMLC)'
+	$(Echo) "OCAMLOPT     : " '$(OCAMLOPT)'
+	$(Echo) "OCAMLDEP     : " '$(OCAMLDEP)'
+	$(Echo) "Compile.CMI  : " '$(Compile.CMI)'
+	$(Echo) "Compile.CMO  : " '$(Compile.CMO)'
+	$(Echo) "Archive.CMA  : " '$(Archive.CMA)'
+	$(Echo) "Compile.CMX  : " '$(Compile.CMX)'
+	$(Echo) "Archive.CMXA : " '$(Archive.CMXA)'
+	$(Echo) "CAML_LIBDIR  : " '$(CAML_LIBDIR)'
+	$(Echo) "LibraryCMA   : " '$(LibraryCMA)'
+	$(Echo) "LibraryCMXA  : " '$(LibraryCMXA)'
+	$(Echo) "OcamlSources1: " '$(OcamlSources1)'
+	$(Echo) "OcamlSources2: " '$(OcamlSources2)'
+	$(Echo) "OcamlSources : " '$(OcamlSources)'
+	$(Echo) "OcamlHeaders1: " '$(OcamlHeaders1)'
+	$(Echo) "OcamlHeaders2: " '$(OcamlHeaders2)'
+	$(Echo) "OcamlHeaders : " '$(OcamlHeaders)'
+	$(Echo) "ObjectsCMI   : " '$(ObjectsCMI)'
+	$(Echo) "ObjectsCMO   : " '$(ObjectsCMO)'
+	$(Echo) "ObjectsCMX   : " '$(ObjectsCMX)'
+	$(Echo) "OCAML_LIBDIR : " '$(OCAML_LIBDIR)'
+	$(Echo) "DestA        : " '$(DestA)'
+	$(Echo) "DestCMA      : " '$(DestCMA)'
+	$(Echo) "DestCMXA     : " '$(DestCMXA)'
+	$(Echo) "UsedLibs     : " '$(UsedLibs)'
+	$(Echo) "UsedLibNames : " '$(UsedLibNames)'
+
+.PHONY: printcamlvars   build-cmis \
+            clean-a     clean-cmis     clean-cma     clean-cmxa \
+          install-a   install-cmis   install-cma   install-cmxa \
+          install-exe \
+		uninstall-a uninstall-cmis uninstall-cma uninstall-cmxa \
+		uninstall-exe
diff --git a/final/bindings/ocaml/analysis/Makefile b/final/bindings/ocaml/analysis/Makefile
new file mode 100644
index 00000000000..cbfcb246704
--- /dev/null
+++ b/final/bindings/ocaml/analysis/Makefile
@@ -0,0 +1,19 @@
+##===- bindings/ocaml/analysis/Makefile --------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+# 
+# This is the makefile for the Objective Caml Llvm_analysis interface.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../../..
+LIBRARYNAME := llvm_analysis
+UsedComponents := analysis
+UsedOcamlInterfaces := llvm
+
+include ../Makefile.ocaml
diff --git a/final/bindings/ocaml/analysis/analysis_ocaml.c b/final/bindings/ocaml/analysis/analysis_ocaml.c
new file mode 100644
index 00000000000..97167055f70
--- /dev/null
+++ b/final/bindings/ocaml/analysis/analysis_ocaml.c
@@ -0,0 +1,72 @@
+/*===-- analysis_ocaml.c - LLVM Ocaml Glue ----------------------*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file glues LLVM's ocaml interface to its C interface. These functions *|
+|* are by and large transparent wrappers to the corresponding C functions.    *|
+|*                                                                            *|
+|* Note that these functions intentionally take liberties with the CAMLparamX *|
+|* macros, since most of the parameters are not GC heap objects.              *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#include "llvm-c/Analysis.h"
+#include "caml/alloc.h"
+#include "caml/mlvalues.h"
+#include "caml/memory.h"
+
+
+/* Llvm.llmodule -> string option */
+CAMLprim value llvm_verify_module(LLVMModuleRef M) {
+  CAMLparam0();
+  CAMLlocal2(String, Option);
+  
+  char *Message;
+  int Result = LLVMVerifyModule(M, LLVMReturnStatusAction, &Message);
+  
+  if (0 == Result) {
+    Option = Val_int(0);
+  } else {
+    Option = alloc(1, 0);
+    String = copy_string(Message);
+    Store_field(Option, 0, String);
+  }
+  
+  LLVMDisposeMessage(Message);
+  
+  CAMLreturn(Option);
+}
+
+/* Llvm.llvalue -> bool */
+CAMLprim value llvm_verify_function(LLVMValueRef Fn) {
+  return Val_bool(LLVMVerifyFunction(Fn, LLVMReturnStatusAction) == 0);
+}
+
+/* Llvm.llmodule -> unit */
+CAMLprim value llvm_assert_valid_module(LLVMModuleRef M) {
+  LLVMVerifyModule(M, LLVMAbortProcessAction, 0);
+  return Val_unit;
+}
+
+/* Llvm.llvalue -> unit */
+CAMLprim value llvm_assert_valid_function(LLVMValueRef Fn) {
+  LLVMVerifyFunction(Fn, LLVMAbortProcessAction);
+  return Val_unit;
+}
+
+/* Llvm.llvalue -> unit */
+CAMLprim value llvm_view_function_cfg(LLVMValueRef Fn) {
+  LLVMViewFunctionCFG(Fn);
+  return Val_unit;
+}
+
+/* Llvm.llvalue -> unit */
+CAMLprim value llvm_view_function_cfg_only(LLVMValueRef Fn) {
+  LLVMViewFunctionCFGOnly(Fn);
+  return Val_unit;
+}
diff --git a/final/bindings/ocaml/analysis/llvm_analysis.ml b/final/bindings/ocaml/analysis/llvm_analysis.ml
new file mode 100644
index 00000000000..fc4d2037074
--- /dev/null
+++ b/final/bindings/ocaml/analysis/llvm_analysis.ml
@@ -0,0 +1,22 @@
+(*===-- llvm_analysis.ml - LLVM Ocaml Interface -----------------*- C++ -*-===*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*)
+
+
+external verify_module : Llvm.llmodule -> string option = "llvm_verify_module"
+
+external verify_function : Llvm.llvalue -> bool = "llvm_verify_function"
+
+external assert_valid_module : Llvm.llmodule -> unit
+                             = "llvm_assert_valid_module"
+
+external assert_valid_function : Llvm.llvalue -> unit
+                               = "llvm_assert_valid_function"
+external view_function_cfg : Llvm.llvalue -> unit = "llvm_view_function_cfg"
+external view_function_cfg_only : Llvm.llvalue -> unit
+                                = "llvm_view_function_cfg_only"
diff --git a/final/bindings/ocaml/analysis/llvm_analysis.mli b/final/bindings/ocaml/analysis/llvm_analysis.mli
new file mode 100644
index 00000000000..793f482e3c6
--- /dev/null
+++ b/final/bindings/ocaml/analysis/llvm_analysis.mli
@@ -0,0 +1,46 @@
+(*===-- llvm_analysis.mli - LLVM Ocaml Interface ----------------*- C++ -*-===*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*)
+
+(** Intermediate representation analysis.
+
+    This interface provides an ocaml API for LLVM IR analyses, the classes in
+    the Analysis library. *)
+
+(** [verify_module m] returns [None] if the module [m] is valid, and
+    [Some reason] if it is invalid. [reason] is a string containing a
+    human-readable validation report. See [llvm::verifyModule]. *)
+external verify_module : Llvm.llmodule -> string option = "llvm_verify_module"
+
+(** [verify_function f] returns [None] if the function [f] is valid, and
+    [Some reason] if it is invalid. [reason] is a string containing a
+    human-readable validation report. See [llvm::verifyFunction]. *)
+external verify_function : Llvm.llvalue -> bool = "llvm_verify_function"
+
+(** [verify_module m] returns if the module [m] is valid, but prints a
+    validation report to [stderr] and aborts the program if it is invalid. See
+    [llvm::verifyModule]. *)
+external assert_valid_module : Llvm.llmodule -> unit
+                             = "llvm_assert_valid_module"
+
+(** [verify_function f] returns if the function [f] is valid, but prints a
+    validation report to [stderr] and aborts the program if it is invalid. See
+    [llvm::verifyFunction]. *)
+external assert_valid_function : Llvm.llvalue -> unit
+                               = "llvm_assert_valid_function"
+
+(** [view_function_cfg f] opens up a ghostscript window displaying the CFG of
+    the current function with the code for each basic block inside.
+    See [llvm::Function::viewCFG]. *)
+external view_function_cfg : Llvm.llvalue -> unit = "llvm_view_function_cfg"
+
+(** [view_function_cfg_only f] works just like [view_function_cfg], but does not
+    include the contents of basic blocks into the nodes.
+    See [llvm::Function::viewCFGOnly]. *)
+external view_function_cfg_only : Llvm.llvalue -> unit
+                                = "llvm_view_function_cfg_only"
diff --git a/final/bindings/ocaml/bitreader/Makefile b/final/bindings/ocaml/bitreader/Makefile
new file mode 100644
index 00000000000..a1c7de895cf
--- /dev/null
+++ b/final/bindings/ocaml/bitreader/Makefile
@@ -0,0 +1,19 @@
+##===- bindings/ocaml/bitreader/Makefile -------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+# 
+# This is the makefile for the Objective Caml Llvm_bitreader interface.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../../..
+LIBRARYNAME := llvm_bitreader
+UsedComponents := bitreader
+UsedOcamlInterfaces := llvm
+
+include ../Makefile.ocaml
diff --git a/final/bindings/ocaml/bitreader/bitreader_ocaml.c b/final/bindings/ocaml/bitreader/bitreader_ocaml.c
new file mode 100644
index 00000000000..ef72ce213d8
--- /dev/null
+++ b/final/bindings/ocaml/bitreader/bitreader_ocaml.c
@@ -0,0 +1,73 @@
+/*===-- bitwriter_ocaml.c - LLVM Ocaml Glue ---------------------*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file glues LLVM's ocaml interface to its C interface. These functions *|
+|* are by and large transparent wrappers to the corresponding C functions.    *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#include "llvm-c/BitReader.h"
+#include "caml/alloc.h"
+#include "caml/fail.h"
+#include "caml/memory.h"
+
+
+/* Can't use the recommended caml_named_value mechanism for backwards
+   compatibility reasons. This is largely equivalent. */
+static value llvm_bitreader_error_exn;
+
+CAMLprim value llvm_register_bitreader_exns(value Error) {
+  llvm_bitreader_error_exn = Field(Error, 0);
+  register_global_root(&llvm_bitreader_error_exn);
+  return Val_unit;
+}
+
+static void llvm_raise(value Prototype, char *Message) {
+  CAMLparam1(Prototype);
+  CAMLlocal1(CamlMessage);
+  
+  CamlMessage = copy_string(Message);
+  LLVMDisposeMessage(Message);
+  
+  raise_with_arg(Prototype, CamlMessage);
+  abort(); /* NOTREACHED */
+#ifdef CAMLnoreturn
+  CAMLnoreturn; /* Silences warnings, but is missing in some versions. */
+#endif
+}
+
+
+/*===-- Modules -----------------------------------------------------------===*/
+
+/* Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule */
+CAMLprim value llvm_get_module(LLVMContextRef C, LLVMMemoryBufferRef MemBuf) {
+  CAMLparam0();
+  CAMLlocal2(Variant, MessageVal);
+  char *Message;
+  
+  LLVMModuleRef M;
+  if (LLVMGetBitcodeModuleInContext(C, MemBuf, &M, &Message))
+    llvm_raise(llvm_bitreader_error_exn, Message);
+  
+  CAMLreturn((value) M);
+}
+
+/* Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule */
+CAMLprim value llvm_parse_bitcode(LLVMContextRef C,
+                                  LLVMMemoryBufferRef MemBuf) {
+  CAMLparam0();
+  CAMLlocal2(Variant, MessageVal);
+  LLVMModuleRef M;
+  char *Message;
+  
+  if (LLVMParseBitcodeInContext(C, MemBuf, &M, &Message))
+    llvm_raise(llvm_bitreader_error_exn, Message);
+  
+  CAMLreturn((value) M);
+}
diff --git a/final/bindings/ocaml/bitreader/llvm_bitreader.ml b/final/bindings/ocaml/bitreader/llvm_bitreader.ml
new file mode 100644
index 00000000000..8b9d01d8fb0
--- /dev/null
+++ b/final/bindings/ocaml/bitreader/llvm_bitreader.ml
@@ -0,0 +1,20 @@
+(*===-- llvm_bitreader.ml - LLVM Ocaml Interface ----------------*- C++ -*-===*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*)
+
+
+exception Error of string
+
+external register_exns : exn -> unit = "llvm_register_bitreader_exns"
+let _ = register_exns (Error "")
+
+external get_module : Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule
+                    = "llvm_get_module"
+
+external parse_bitcode : Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule
+                       = "llvm_parse_bitcode"
diff --git a/final/bindings/ocaml/bitreader/llvm_bitreader.mli b/final/bindings/ocaml/bitreader/llvm_bitreader.mli
new file mode 100644
index 00000000000..1d333191c1d
--- /dev/null
+++ b/final/bindings/ocaml/bitreader/llvm_bitreader.mli
@@ -0,0 +1,29 @@
+(*===-- llvm_bitreader.mli - LLVM Ocaml Interface ---------------*- C++ -*-===*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*)
+
+(** Bitcode reader.
+
+    This interface provides an ocaml API for the LLVM bitcode reader, the
+    classes in the Bitreader library. *)
+
+exception Error of string
+
+(** [get_module context mb] reads the bitcode for a new module [m] from the
+    memory buffer [mb] in the context [context].  Returns [m] if successful, or
+    raises [Error msg] otherwise, where [msg] is a description of the error
+    encountered. See the function [llvm::getBitcodeModule]. *)
+val get_module : Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule
+
+
+(** [parse_bitcode context mb] parses the bitcode for a new module [m] from the
+    memory buffer [mb] in the context [context]. Returns [m] if successful, or
+	 	raises [Error msg] otherwise, where [msg] is a description of the error
+	 	encountered. See the function [llvm::ParseBitcodeFile]. *)
+val parse_bitcode : Llvm.llcontext -> Llvm.llmemorybuffer -> Llvm.llmodule
+
diff --git a/final/bindings/ocaml/bitwriter/Makefile b/final/bindings/ocaml/bitwriter/Makefile
new file mode 100644
index 00000000000..cec0a59c31b
--- /dev/null
+++ b/final/bindings/ocaml/bitwriter/Makefile
@@ -0,0 +1,19 @@
+##===- bindings/ocaml/bitwriter/Makefile -------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+# 
+# This is the makefile for the Objective Caml Llvm_bitwriter interface.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../../..
+LIBRARYNAME := llvm_bitwriter
+UsedComponents := bitwriter
+UsedOcamlInterfaces := llvm
+
+include ../Makefile.ocaml
diff --git a/final/bindings/ocaml/bitwriter/bitwriter_ocaml.c b/final/bindings/ocaml/bitwriter/bitwriter_ocaml.c
new file mode 100644
index 00000000000..53c93cbdfe9
--- /dev/null
+++ b/final/bindings/ocaml/bitwriter/bitwriter_ocaml.c
@@ -0,0 +1,45 @@
+/*===-- bitwriter_ocaml.c - LLVM Ocaml Glue ---------------------*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file glues LLVM's ocaml interface to its C interface. These functions *|
+|* are by and large transparent wrappers to the corresponding C functions.    *|
+|*                                                                            *|
+|* Note that these functions intentionally take liberties with the CAMLparamX *|
+|* macros, since most of the parameters are not GC heap objects.              *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#include "llvm-c/BitWriter.h"
+#include "llvm-c/Core.h"
+#include "caml/alloc.h"
+#include "caml/mlvalues.h"
+#include "caml/memory.h"
+
+/*===-- Modules -----------------------------------------------------------===*/
+
+/* Llvm.llmodule -> string -> bool */
+CAMLprim value llvm_write_bitcode_file(value M, value Path) {
+  int res = LLVMWriteBitcodeToFile((LLVMModuleRef) M, String_val(Path));
+  return Val_bool(res == 0);
+}
+
+/* ?unbuffered:bool -> Llvm.llmodule -> Unix.file_descr -> bool */
+CAMLprim value llvm_write_bitcode_to_fd(value U, value M, value FD) {
+  int Unbuffered;
+  int res;
+
+  if (U == Val_int(0)) {
+    Unbuffered = 0;
+  } else {
+    Unbuffered = Bool_val(Field(U,0));
+  }
+
+  res = LLVMWriteBitcodeToFD((LLVMModuleRef) M, Int_val(FD), 0, Unbuffered);
+  return Val_bool(res == 0);
+}
diff --git a/final/bindings/ocaml/bitwriter/llvm_bitwriter.ml b/final/bindings/ocaml/bitwriter/llvm_bitwriter.ml
new file mode 100644
index 00000000000..3e69a3cc8fb
--- /dev/null
+++ b/final/bindings/ocaml/bitwriter/llvm_bitwriter.ml
@@ -0,0 +1,25 @@
+(*===-- llvm_bitwriter.ml - LLVM Ocaml Interface ----------------*- C++ -*-===*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===
+ *
+ * This interface provides an ocaml API for the LLVM intermediate
+ * representation, the classes in the VMCore library.
+ *
+ *===----------------------------------------------------------------------===*)
+
+
+(* Writes the bitcode for module the given path. Returns true if successful. *)
+external write_bitcode_file : Llvm.llmodule -> string -> bool
+                            = "llvm_write_bitcode_file"
+
+external write_bitcode_to_fd : ?unbuffered:bool -> Llvm.llmodule
+                               -> Unix.file_descr -> bool
+                             = "llvm_write_bitcode_to_fd"
+
+let output_bitcode ?unbuffered channel m =
+  write_bitcode_to_fd ?unbuffered m (Unix.descr_of_out_channel channel)
diff --git a/final/bindings/ocaml/bitwriter/llvm_bitwriter.mli b/final/bindings/ocaml/bitwriter/llvm_bitwriter.mli
new file mode 100644
index 00000000000..ea9a87600a7
--- /dev/null
+++ b/final/bindings/ocaml/bitwriter/llvm_bitwriter.mli
@@ -0,0 +1,30 @@
+(*===-- llvm_bitwriter.mli - LLVM Ocaml Interface ---------------*- C++ -*-===*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*)
+
+(** Bitcode writer.
+
+    This interface provides an ocaml API for the LLVM bitcode writer, the
+    classes in the Bitwriter library. *)
+
+(** [write_bitcode_file m path] writes the bitcode for module [m] to the file at
+    [path]. Returns [true] if successful, [false] otherwise. *)
+external write_bitcode_file : Llvm.llmodule -> string -> bool
+                            = "llvm_write_bitcode_file"
+
+(** [write_bitcode_to_fd ~unbuffered fd m] writes the bitcode for module
+    [m] to the channel [c]. If [unbuffered] is [true], after every write the fd
+    will be flushed. Returns [true] if successful, [false] otherwise. *)
+external write_bitcode_to_fd : ?unbuffered:bool -> Llvm.llmodule
+                               -> Unix.file_descr -> bool
+                             = "llvm_write_bitcode_to_fd"
+
+(** [output_bitcode ~unbuffered c m] writes the bitcode for module [m]
+    to the channel [c]. If [unbuffered] is [true], after every write the fd
+    will be flushed. Returns [true] if successful, [false] otherwise. *)
+val output_bitcode : ?unbuffered:bool -> out_channel -> Llvm.llmodule -> bool
diff --git a/final/bindings/ocaml/executionengine/Makefile b/final/bindings/ocaml/executionengine/Makefile
new file mode 100644
index 00000000000..5fa3f22048f
--- /dev/null
+++ b/final/bindings/ocaml/executionengine/Makefile
@@ -0,0 +1,19 @@
+##===- bindings/ocaml/executionengine/Makefile --------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+# 
+# This is the makefile for the Objective Caml Llvm_executionengine interface.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../../..
+LIBRARYNAME := llvm_executionengine
+UsedComponents := executionengine jit interpreter native
+UsedOcamlInterfaces := llvm llvm_target
+
+include ../Makefile.ocaml
diff --git a/final/bindings/ocaml/executionengine/executionengine_ocaml.c b/final/bindings/ocaml/executionengine/executionengine_ocaml.c
new file mode 100644
index 00000000000..5b1e32efefc
--- /dev/null
+++ b/final/bindings/ocaml/executionengine/executionengine_ocaml.c
@@ -0,0 +1,323 @@
+/*===-- executionengine_ocaml.c - LLVM Ocaml Glue ---------------*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file glues LLVM's ocaml interface to its C interface. These functions *|
+|* are by and large transparent wrappers to the corresponding C functions.    *|
+|*                                                                            *|
+|* Note that these functions intentionally take liberties with the CAMLparamX *|
+|* macros, since most of the parameters are not GC heap objects.              *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#include "llvm-c/ExecutionEngine.h"
+#include "llvm-c/Target.h"
+#include "caml/alloc.h"
+#include "caml/custom.h"
+#include "caml/fail.h"
+#include "caml/memory.h"
+#include <string.h>
+#include <assert.h>
+
+/* Force the LLVM interpreter and JIT to be linked in. */
+void llvm_initialize(void) {
+  LLVMLinkInInterpreter();
+  LLVMLinkInJIT();
+}
+
+/* unit -> bool */
+CAMLprim value llvm_initialize_native_target(value Unit) {
+  return Val_bool(LLVMInitializeNativeTarget());
+}
+
+/* Can't use the recommended caml_named_value mechanism for backwards
+   compatibility reasons. This is largely equivalent. */
+static value llvm_ee_error_exn;
+
+CAMLprim value llvm_register_ee_exns(value Error) {
+  llvm_ee_error_exn = Field(Error, 0);
+  register_global_root(&llvm_ee_error_exn);
+  return Val_unit;
+}
+
+static void llvm_raise(value Prototype, char *Message) {
+  CAMLparam1(Prototype);
+  CAMLlocal1(CamlMessage);
+  
+  CamlMessage = copy_string(Message);
+  LLVMDisposeMessage(Message);
+  
+  raise_with_arg(Prototype, CamlMessage);
+  abort(); /* NOTREACHED */
+#ifdef CAMLnoreturn
+  CAMLnoreturn; /* Silences warnings, but is missing in some versions. */
+#endif
+}
+
+
+/*--... Operations on generic values .......................................--*/
+
+#define Genericvalue_val(v)  (*(LLVMGenericValueRef *)(Data_custom_val(v)))
+
+static void llvm_finalize_generic_value(value GenVal) {
+  LLVMDisposeGenericValue(Genericvalue_val(GenVal));
+}
+
+static struct custom_operations generic_value_ops = {
+  (char *) "LLVMGenericValue",
+  llvm_finalize_generic_value,
+  custom_compare_default,
+  custom_hash_default,
+  custom_serialize_default,
+  custom_deserialize_default
+};
+
+static value alloc_generic_value(LLVMGenericValueRef Ref) {
+  value Val = alloc_custom(&generic_value_ops, sizeof(LLVMGenericValueRef), 0, 1);
+  Genericvalue_val(Val) = Ref;
+  return Val;
+}
+
+/* Llvm.lltype -> float -> t */
+CAMLprim value llvm_genericvalue_of_float(LLVMTypeRef Ty, value N) {
+  CAMLparam1(N);
+  CAMLreturn(alloc_generic_value(
+    LLVMCreateGenericValueOfFloat(Ty, Double_val(N))));
+}
+
+/* 'a -> t */
+CAMLprim value llvm_genericvalue_of_pointer(value V) {
+  CAMLparam1(V);
+  CAMLreturn(alloc_generic_value(LLVMCreateGenericValueOfPointer(Op_val(V))));
+}
+
+/* Llvm.lltype -> int -> t */
+CAMLprim value llvm_genericvalue_of_int(LLVMTypeRef Ty, value Int) {
+  return alloc_generic_value(LLVMCreateGenericValueOfInt(Ty, Int_val(Int), 1));
+}
+
+/* Llvm.lltype -> int32 -> t */
+CAMLprim value llvm_genericvalue_of_int32(LLVMTypeRef Ty, value Int32) {
+  CAMLparam1(Int32);
+  CAMLreturn(alloc_generic_value(
+    LLVMCreateGenericValueOfInt(Ty, Int32_val(Int32), 1)));
+}
+
+/* Llvm.lltype -> nativeint -> t */
+CAMLprim value llvm_genericvalue_of_nativeint(LLVMTypeRef Ty, value NatInt) {
+  CAMLparam1(NatInt);
+  CAMLreturn(alloc_generic_value(
+    LLVMCreateGenericValueOfInt(Ty, Nativeint_val(NatInt), 1)));
+}
+
+/* Llvm.lltype -> int64 -> t */
+CAMLprim value llvm_genericvalue_of_int64(LLVMTypeRef Ty, value Int64) {
+  CAMLparam1(Int64);
+  CAMLreturn(alloc_generic_value(
+    LLVMCreateGenericValueOfInt(Ty, Int64_val(Int64), 1)));
+}
+
+/* Llvm.lltype -> t -> float */
+CAMLprim value llvm_genericvalue_as_float(LLVMTypeRef Ty, value GenVal) {
+  CAMLparam1(GenVal);
+  CAMLreturn(copy_double(
+    LLVMGenericValueToFloat(Ty, Genericvalue_val(GenVal))));
+}
+
+/* t -> 'a */
+CAMLprim value llvm_genericvalue_as_pointer(value GenVal) {
+  return Val_op(LLVMGenericValueToPointer(Genericvalue_val(GenVal)));
+}
+
+/* t -> int */
+CAMLprim value llvm_genericvalue_as_int(value GenVal) {
+  assert(LLVMGenericValueIntWidth(Genericvalue_val(GenVal)) <= 8 * sizeof(value)
+         && "Generic value too wide to treat as an int!");
+  return Val_int(LLVMGenericValueToInt(Genericvalue_val(GenVal), 1));
+}
+
+/* t -> int32 */
+CAMLprim value llvm_genericvalue_as_int32(value GenVal) {
+  CAMLparam1(GenVal);
+  assert(LLVMGenericValueIntWidth(Genericvalue_val(GenVal)) <= 32
+         && "Generic value too wide to treat as an int32!");
+  CAMLreturn(copy_int32(LLVMGenericValueToInt(Genericvalue_val(GenVal), 1)));
+}
+
+/* t -> int64 */
+CAMLprim value llvm_genericvalue_as_int64(value GenVal) {
+  CAMLparam1(GenVal);
+  assert(LLVMGenericValueIntWidth(Genericvalue_val(GenVal)) <= 64
+         && "Generic value too wide to treat as an int64!");
+  CAMLreturn(copy_int64(LLVMGenericValueToInt(Genericvalue_val(GenVal), 1)));
+}
+
+/* t -> nativeint */
+CAMLprim value llvm_genericvalue_as_nativeint(value GenVal) {
+  CAMLparam1(GenVal);
+  assert(LLVMGenericValueIntWidth(Genericvalue_val(GenVal)) <= 8 * sizeof(value)
+         && "Generic value too wide to treat as a nativeint!");
+  CAMLreturn(copy_nativeint(LLVMGenericValueToInt(Genericvalue_val(GenVal),1)));
+}
+
+
+/*--... Operations on execution engines ....................................--*/
+
+/* llmodule -> ExecutionEngine.t */
+CAMLprim LLVMExecutionEngineRef llvm_ee_create(LLVMModuleRef M) {
+  LLVMExecutionEngineRef Interp;
+  char *Error;
+  if (LLVMCreateExecutionEngineForModule(&Interp, M, &Error))
+    llvm_raise(llvm_ee_error_exn, Error);
+  return Interp;
+}
+
+/* llmodule -> ExecutionEngine.t */
+CAMLprim LLVMExecutionEngineRef
+llvm_ee_create_interpreter(LLVMModuleRef M) {
+  LLVMExecutionEngineRef Interp;
+  char *Error;
+  if (LLVMCreateInterpreterForModule(&Interp, M, &Error))
+    llvm_raise(llvm_ee_error_exn, Error);
+  return Interp;
+}
+
+/* llmodule -> int -> ExecutionEngine.t */
+CAMLprim LLVMExecutionEngineRef
+llvm_ee_create_jit(LLVMModuleRef M, value OptLevel) {
+  LLVMExecutionEngineRef JIT;
+  char *Error;
+  if (LLVMCreateJITCompilerForModule(&JIT, M, Int_val(OptLevel), &Error))
+    llvm_raise(llvm_ee_error_exn, Error);
+  return JIT;
+}
+
+/* ExecutionEngine.t -> unit */
+CAMLprim value llvm_ee_dispose(LLVMExecutionEngineRef EE) {
+  LLVMDisposeExecutionEngine(EE);
+  return Val_unit;
+}
+
+/* llmodule -> ExecutionEngine.t -> unit */
+CAMLprim value llvm_ee_add_module(LLVMModuleRef M, LLVMExecutionEngineRef EE) {
+  LLVMAddModule(EE, M);
+  return Val_unit;
+}
+
+/* llmodule -> ExecutionEngine.t -> llmodule */
+CAMLprim LLVMModuleRef llvm_ee_remove_module(LLVMModuleRef M,
+                                             LLVMExecutionEngineRef EE) {
+  LLVMModuleRef RemovedModule;
+  char *Error;
+  if (LLVMRemoveModule(EE, M, &RemovedModule, &Error))
+    llvm_raise(llvm_ee_error_exn, Error);
+  return RemovedModule;
+}
+
+/* string -> ExecutionEngine.t -> llvalue option */
+CAMLprim value llvm_ee_find_function(value Name, LLVMExecutionEngineRef EE) {
+  CAMLparam1(Name);
+  CAMLlocal1(Option);
+  LLVMValueRef Found;
+  if (LLVMFindFunction(EE, String_val(Name), &Found))
+    CAMLreturn(Val_unit);
+  Option = alloc(1, 0);
+  Field(Option, 0) = Val_op(Found);
+  CAMLreturn(Option);
+}
+
+/* llvalue -> GenericValue.t array -> ExecutionEngine.t -> GenericValue.t */
+CAMLprim value llvm_ee_run_function(LLVMValueRef F, value Args,
+                                    LLVMExecutionEngineRef EE) {
+  unsigned NumArgs;
+  LLVMGenericValueRef Result, *GVArgs;
+  unsigned I;
+  
+  NumArgs = Wosize_val(Args);
+  GVArgs = (LLVMGenericValueRef*) malloc(NumArgs * sizeof(LLVMGenericValueRef));
+  for (I = 0; I != NumArgs; ++I)
+    GVArgs[I] = Genericvalue_val(Field(Args, I));
+  
+  Result = LLVMRunFunction(EE, F, NumArgs, GVArgs);
+  
+  free(GVArgs);
+  return alloc_generic_value(Result);
+}
+
+/* ExecutionEngine.t -> unit */
+CAMLprim value llvm_ee_run_static_ctors(LLVMExecutionEngineRef EE) {
+  LLVMRunStaticConstructors(EE);
+  return Val_unit;
+}
+
+/* ExecutionEngine.t -> unit */
+CAMLprim value llvm_ee_run_static_dtors(LLVMExecutionEngineRef EE) {
+  LLVMRunStaticDestructors(EE);
+  return Val_unit;
+}
+
+/* llvalue -> string array -> (string * string) array -> ExecutionEngine.t ->
+   int */
+CAMLprim value llvm_ee_run_function_as_main(LLVMValueRef F,
+                                            value Args, value Env,
+                                            LLVMExecutionEngineRef EE) {
+  CAMLparam2(Args, Env);
+  int I, NumArgs, NumEnv, EnvSize, Result;
+  const char **CArgs, **CEnv;
+  char *CEnvBuf, *Pos;
+  
+  NumArgs = Wosize_val(Args);
+  NumEnv = Wosize_val(Env);
+  
+  /* Build the environment. */
+  CArgs = (const char **) malloc(NumArgs * sizeof(char*));
+  for (I = 0; I != NumArgs; ++I)
+    CArgs[I] = String_val(Field(Args, I));
+  
+  /* Compute the size of the environment string buffer. */
+  for (I = 0, EnvSize = 0; I != NumEnv; ++I) {
+    EnvSize += strlen(String_val(Field(Field(Env, I), 0))) + 1;
+    EnvSize += strlen(String_val(Field(Field(Env, I), 1))) + 1;
+  }
+  
+  /* Build the environment. */
+  CEnv = (const char **) malloc((NumEnv + 1) * sizeof(char*));
+  CEnvBuf = (char*) malloc(EnvSize);
+  Pos = CEnvBuf;
+  for (I = 0; I != NumEnv; ++I) {
+    char *Name  = String_val(Field(Field(Env, I), 0)),
+         *Value = String_val(Field(Field(Env, I), 1));
+    int NameLen  = strlen(Name),
+        ValueLen = strlen(Value);
+    
+    CEnv[I] = Pos;
+    memcpy(Pos, Name, NameLen);
+    Pos += NameLen;
+    *Pos++ = '=';
+    memcpy(Pos, Value, ValueLen);
+    Pos += ValueLen;
+    *Pos++ = '\0';
+  }
+  CEnv[NumEnv] = NULL;
+  
+  Result = LLVMRunFunctionAsMain(EE, F, NumArgs, CArgs, CEnv);
+  
+  free(CArgs);
+  free(CEnv);
+  free(CEnvBuf);
+  
+  CAMLreturn(Val_int(Result));
+}
+
+/* llvalue -> ExecutionEngine.t -> unit */
+CAMLprim value llvm_ee_free_machine_code(LLVMValueRef F,
+                                         LLVMExecutionEngineRef EE) {
+  LLVMFreeMachineCodeForFunction(EE, F);
+  return Val_unit;
+}
+
diff --git a/final/bindings/ocaml/executionengine/llvm_executionengine.ml b/final/bindings/ocaml/executionengine/llvm_executionengine.ml
new file mode 100644
index 00000000000..a8535b24640
--- /dev/null
+++ b/final/bindings/ocaml/executionengine/llvm_executionengine.ml
@@ -0,0 +1,112 @@
+(*===-- llvm_executionengine.ml - LLVM Ocaml Interface ----------*- C++ -*-===*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*)
+
+
+exception Error of string
+
+external register_exns: exn -> unit
+  = "llvm_register_ee_exns"
+
+
+module GenericValue = struct
+  type t
+  
+  external of_float: Llvm.lltype -> float -> t
+    = "llvm_genericvalue_of_float"
+  external of_pointer: 'a -> t
+    = "llvm_genericvalue_of_pointer"
+  external of_int32: Llvm.lltype -> int32 -> t
+    = "llvm_genericvalue_of_int32"
+  external of_int: Llvm.lltype -> int -> t
+    = "llvm_genericvalue_of_int"
+  external of_nativeint: Llvm.lltype -> nativeint -> t
+    = "llvm_genericvalue_of_nativeint"
+  external of_int64: Llvm.lltype -> int64 -> t
+    = "llvm_genericvalue_of_int64"
+  
+  external as_float: Llvm.lltype -> t -> float
+    = "llvm_genericvalue_as_float"
+  external as_pointer: t -> 'a
+    = "llvm_genericvalue_as_pointer"
+  external as_int32: t -> int32
+    = "llvm_genericvalue_as_int32"
+  external as_int: t -> int
+    = "llvm_genericvalue_as_int"
+  external as_nativeint: t -> nativeint
+    = "llvm_genericvalue_as_nativeint"
+  external as_int64: t -> int64
+    = "llvm_genericvalue_as_int64"
+end
+
+
+module ExecutionEngine = struct
+  type t
+  
+  (* FIXME: Ocaml is not running this setup code unless we use 'val' in the
+            interface, which causes the emission of a stub for each function;
+            using 'external' in the module allows direct calls into 
+            ocaml_executionengine.c. This is hardly fatal, but it is unnecessary
+            overhead on top of the two stubs that are already invoked for each 
+            call into LLVM. *)
+  let _ = register_exns (Error "")
+  
+  external create: Llvm.llmodule -> t
+    = "llvm_ee_create"
+  external create_interpreter: Llvm.llmodule -> t
+    = "llvm_ee_create_interpreter"
+  external create_jit: Llvm.llmodule -> int -> t
+    = "llvm_ee_create_jit"
+  external dispose: t -> unit
+    = "llvm_ee_dispose"
+  external add_module: Llvm.llmodule -> t -> unit
+    = "llvm_ee_add_module"
+  external remove_module: Llvm.llmodule -> t -> Llvm.llmodule
+    = "llvm_ee_remove_module"
+  external find_function: string -> t -> Llvm.llvalue option
+    = "llvm_ee_find_function"
+  external run_function: Llvm.llvalue -> GenericValue.t array -> t ->
+                         GenericValue.t
+    = "llvm_ee_run_function"
+  external run_static_ctors: t -> unit
+    = "llvm_ee_run_static_ctors"
+  external run_static_dtors: t -> unit
+    = "llvm_ee_run_static_dtors"
+  external run_function_as_main: Llvm.llvalue -> string array ->
+                                 (string * string) array -> t -> int
+    = "llvm_ee_run_function_as_main"
+  external free_machine_code: Llvm.llvalue -> t -> unit
+    = "llvm_ee_free_machine_code"
+
+  external target_data: t -> Llvm_target.TargetData.t
+    = "LLVMGetExecutionEngineTargetData"
+  
+  (* The following are not bound. Patches are welcome.
+  
+  get_target_data: t -> lltargetdata
+  add_global_mapping: llvalue -> llgenericvalue -> t -> unit
+  clear_all_global_mappings: t -> unit
+  update_global_mapping: llvalue -> llgenericvalue -> t -> unit
+  get_pointer_to_global_if_available: llvalue -> t -> llgenericvalue
+  get_pointer_to_global: llvalue -> t -> llgenericvalue
+  get_pointer_to_function: llvalue -> t -> llgenericvalue
+  get_pointer_to_function_or_stub: llvalue -> t -> llgenericvalue
+  get_global_value_at_address: llgenericvalue -> t -> llvalue option
+  store_value_to_memory: llgenericvalue -> llgenericvalue -> lltype -> unit
+  initialize_memory: llvalue -> llgenericvalue -> t -> unit
+  recompile_and_relink_function: llvalue -> t -> llgenericvalue
+  get_or_emit_global_variable: llvalue -> t -> llgenericvalue
+  disable_lazy_compilation: t -> unit
+  lazy_compilation_enabled: t -> bool
+  install_lazy_function_creator: (string -> llgenericvalue) -> t -> unit
+  
+   *)
+end
+
+external initialize_native_target : unit -> bool
+                                  = "llvm_initialize_native_target"
diff --git a/final/bindings/ocaml/executionengine/llvm_executionengine.mli b/final/bindings/ocaml/executionengine/llvm_executionengine.mli
new file mode 100644
index 00000000000..166b7bcddca
--- /dev/null
+++ b/final/bindings/ocaml/executionengine/llvm_executionengine.mli
@@ -0,0 +1,163 @@
+(*===-- llvm_executionengine.mli - LLVM Ocaml Interface ---------*- C++ -*-===*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*)
+
+(** JIT Interpreter.
+
+    This interface provides an ocaml API for LLVM execution engine (JIT/
+    interpreter), the classes in the ExecutionEngine library. *)
+
+exception Error of string
+
+module GenericValue: sig
+  (** [GenericValue.t] is a boxed union type used to portably pass arguments to
+      and receive values from the execution engine. It supports only a limited
+      selection of types; for more complex argument types, it is necessary to
+      generate a stub function by hand or to pass parameters by reference.
+      See the struct [llvm::GenericValue]. *)
+  type t
+  
+  (** [of_float fpty n] boxes the float [n] in a float-valued generic value
+      according to the floating point type [fpty]. See the fields
+      [llvm::GenericValue::DoubleVal] and [llvm::GenericValue::FloatVal]. *)
+  val of_float : Llvm.lltype -> float -> t
+  
+  (** [of_pointer v] boxes the pointer value [v] in a generic value. See the
+      field [llvm::GenericValue::PointerVal]. *)
+  val of_pointer : 'a -> t
+  
+  (** [of_int32 n w] boxes the int32 [i] in a generic value with the bitwidth
+      [w]. See the field [llvm::GenericValue::IntVal]. *)
+  val of_int32 : Llvm.lltype -> int32 -> t
+  
+  (** [of_int n w] boxes the int [i] in a generic value with the bitwidth
+      [w]. See the field [llvm::GenericValue::IntVal]. *)
+  val of_int : Llvm.lltype -> int -> t
+  
+  (** [of_natint n w] boxes the native int [i] in a generic value with the
+      bitwidth [w]. See the field [llvm::GenericValue::IntVal]. *)
+  val of_nativeint : Llvm.lltype -> nativeint -> t
+
+
+  (** [of_int64 n w] boxes the int64 [i] in a generic value with the bitwidth
+      [w]. See the field [llvm::GenericValue::IntVal]. *)
+  val of_int64 : Llvm.lltype -> int64 -> t
+
+  (** [as_float fpty gv] unboxes the floating point-valued generic value [gv] of
+      floating point type [fpty]. See the fields [llvm::GenericValue::DoubleVal]
+      and [llvm::GenericValue::FloatVal]. *)
+  val as_float : Llvm.lltype -> t -> float
+  
+  (** [as_pointer gv] unboxes the pointer-valued generic value [gv]. See the
+      field [llvm::GenericValue::PointerVal]. *)
+  val as_pointer : t -> 'a
+  
+  (** [as_int32 gv] unboxes the integer-valued generic value [gv] as an [int32].
+      Is invalid if [gv] has a bitwidth greater than 32 bits. See the field
+      [llvm::GenericValue::IntVal]. *)
+  val as_int32 : t -> int32
+  
+  (** [as_int gv] unboxes the integer-valued generic value [gv] as an [int].
+      Is invalid if [gv] has a bitwidth greater than the host bit width (but the
+      most significant bit may be lost). See the field
+      [llvm::GenericValue::IntVal]. *)
+  val as_int : t -> int
+  
+  (** [as_natint gv] unboxes the integer-valued generic value [gv] as a
+      [nativeint]. Is invalid if [gv] has a bitwidth greater than
+      [nativeint]. See the field [llvm::GenericValue::IntVal]. *)
+  val as_nativeint : t -> nativeint
+  
+  (** [as_int64 gv] returns the integer-valued generic value [gv] as an [int64].
+      Is invalid if [gv] has a bitwidth greater than [int64]. See the field
+      [llvm::GenericValue::IntVal]. *)
+  val as_int64 : t -> int64
+end
+
+
+module ExecutionEngine: sig
+  (** An execution engine is either a JIT compiler or an interpreter, capable of
+      directly loading an LLVM module and executing its functions without first
+      invoking a static compiler and generating a native executable. *)
+  type t
+  
+  (** [create m] creates a new execution engine, taking ownership of the
+      module [m] if successful. Creates a JIT if possible, else falls back to an
+      interpreter. Raises [Error msg] if an error occurrs. The execution engine
+      is not garbage collected and must be destroyed with [dispose ee].
+      See the function [llvm::EngineBuilder::create]. *)
+  val create : Llvm.llmodule -> t
+  
+  (** [create_interpreter m] creates a new interpreter, taking ownership of the
+      module [m] if successful. Raises [Error msg] if an error occurrs. The
+      execution engine is not garbage collected and must be destroyed with
+      [dispose ee].
+      See the function [llvm::EngineBuilder::create]. *)
+  val create_interpreter : Llvm.llmodule -> t
+  
+  (** [create_jit m optlevel] creates a new JIT (just-in-time compiler), taking
+      ownership of the module [m] if successful with the desired optimization
+      level [optlevel]. Raises [Error msg] if an error occurrs. The execution
+      engine is not garbage collected and must be destroyed with [dispose ee].
+      See the function [llvm::EngineBuilder::create]. *)
+  val create_jit : Llvm.llmodule -> int -> t
+
+  (** [dispose ee] releases the memory used by the execution engine and must be
+      invoked to avoid memory leaks. *)
+  val dispose : t -> unit
+  
+  (** [add_module m ee] adds the module [m] to the execution engine [ee]. *)
+  val add_module : Llvm.llmodule -> t -> unit
+  
+  (** [remove_module m ee] removes the module [m] from the execution engine
+      [ee], disposing of [m] and the module referenced by [mp]. Raises
+      [Error msg] if an error occurs. *)
+  val remove_module : Llvm.llmodule -> t -> Llvm.llmodule
+
+  
+  (** [find_function n ee] finds the function named [n] defined in any of the
+      modules owned by the execution engine [ee]. Returns [None] if the function
+      is not found and [Some f] otherwise. *)
+  val find_function : string -> t -> Llvm.llvalue option
+
+  
+  (** [run_function f args ee] synchronously executes the function [f] with the
+      arguments [args], which must be compatible with the parameter types. *)
+  val run_function : Llvm.llvalue -> GenericValue.t array -> t ->
+                     GenericValue.t
+
+  
+  (** [run_static_ctors ee] executes the static constructors of each module in
+      the execution engine [ee]. *)
+  val run_static_ctors : t -> unit
+  
+  (** [run_static_dtors ee] executes the static destructors of each module in
+      the execution engine [ee]. *)
+  val run_static_dtors : t -> unit
+  
+  (** [run_function_as_main f args env ee] executes the function [f] as a main
+      function, passing it [argv] and [argc] according to the string array
+      [args], and [envp] as specified by the array [env]. Returns the integer
+      return value of the function. *)
+  val run_function_as_main : Llvm.llvalue -> string array ->
+                                  (string * string) array -> t -> int
+
+  
+  (** [free_machine_code f ee] releases the memory in the execution engine [ee]
+      used to store the machine code for the function [f]. *)
+  val free_machine_code : Llvm.llvalue -> t -> unit
+
+
+  (** [target_data ee] is the target data owned by the execution engine
+      [ee]. *)
+  val target_data : t -> Llvm_target.TargetData.t
+
+end
+
+val initialize_native_target : unit -> bool
+
diff --git a/final/bindings/ocaml/llvm/Makefile b/final/bindings/ocaml/llvm/Makefile
new file mode 100644
index 00000000000..99e347bc131
--- /dev/null
+++ b/final/bindings/ocaml/llvm/Makefile
@@ -0,0 +1,19 @@
+##===- bindings/ocaml/llvm/Makefile ------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+# 
+# This is the makefile for the Objective Caml Llvm interface.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../../..
+LIBRARYNAME := llvm
+UsedComponents := core
+UsedOcamLibs := llvm
+
+include ../Makefile.ocaml
diff --git a/final/bindings/ocaml/llvm/llvm.ml b/final/bindings/ocaml/llvm/llvm.ml
new file mode 100644
index 00000000000..462eb201694
--- /dev/null
+++ b/final/bindings/ocaml/llvm/llvm.ml
@@ -0,0 +1,1064 @@
+(*===-- llvm/llvm.ml - LLVM Ocaml Interface --------------------------------===*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*)
+
+
+type llcontext
+type llmodule
+type lltype
+type lltypehandle
+type llvalue
+type lluse
+type llbasicblock
+type llbuilder
+type llmemorybuffer
+
+module TypeKind = struct
+  type t =
+  | Void
+  | Float
+  | Double
+  | X86fp80
+  | Fp128
+  | Ppc_fp128
+  | Label
+  | Integer
+  | Function
+  | Struct
+  | Array
+  | Pointer
+  | Opaque
+  | Vector
+  | Metadata
+end
+
+module Linkage = struct
+  type t =
+  | External
+  | Available_externally
+  | Link_once
+  | Link_once_odr
+  | Weak
+  | Weak_odr
+  | Appending
+  | Internal
+  | Private
+  | Dllimport
+  | Dllexport
+  | External_weak
+  | Ghost
+  | Common
+  | Linker_private
+end
+
+module Visibility = struct
+  type t =
+  | Default
+  | Hidden
+  | Protected
+end
+
+module CallConv = struct
+  let c = 0
+  let fast = 8
+  let cold = 9
+  let x86_stdcall = 64
+  let x86_fastcall = 65
+end
+
+module Attribute = struct
+  type t =
+  | Zext
+  | Sext
+  | Noreturn
+  | Inreg
+  | Structret
+  | Nounwind
+  | Noalias
+  | Byval
+  | Nest
+  | Readnone
+  | Readonly
+  | Noinline
+  | Alwaysinline
+  | Optsize
+  | Ssp
+  | Sspreq
+  | Alignment of int
+  | Nocapture
+  | Noredzone
+  | Noimplicitfloat
+  | Naked
+  | Inlinehint
+  | Stackalignment of int
+end
+
+module Icmp = struct
+  type t =
+  | Eq
+  | Ne
+  | Ugt
+  | Uge
+  | Ult
+  | Ule
+  | Sgt
+  | Sge
+  | Slt
+  | Sle
+end
+
+module Fcmp = struct
+  type t =
+  | False
+  | Oeq
+  | Ogt
+  | Oge
+  | Olt
+  | Ole
+  | One
+  | Ord
+  | Uno
+  | Ueq
+  | Ugt
+  | Uge
+  | Ult
+  | Ule
+  | Une
+  | True
+end
+
+exception IoError of string
+
+external register_exns : exn -> unit = "llvm_register_core_exns"
+let _ = register_exns (IoError "")
+
+type ('a, 'b) llpos =
+| At_end of 'a
+| Before of 'b
+
+type ('a, 'b) llrev_pos =
+| At_start of 'a
+| After of 'b
+
+(*===-- Contexts ----------------------------------------------------------===*)
+external create_context : unit -> llcontext = "llvm_create_context"
+external dispose_context : llcontext -> unit = "llvm_dispose_context"
+external global_context : unit -> llcontext = "llvm_global_context"
+external mdkind_id : llcontext -> string -> int = "llvm_mdkind_id"
+
+(*===-- Modules -----------------------------------------------------------===*)
+external create_module : llcontext -> string -> llmodule = "llvm_create_module"
+external dispose_module : llmodule -> unit = "llvm_dispose_module"
+external target_triple: llmodule -> string
+                      = "llvm_target_triple"
+external set_target_triple: string -> llmodule -> unit
+                          = "llvm_set_target_triple"
+external data_layout: llmodule -> string
+                    = "llvm_data_layout"
+external set_data_layout: string -> llmodule -> unit
+                        = "llvm_set_data_layout"
+external define_type_name : string -> lltype -> llmodule -> bool
+                          = "llvm_add_type_name"
+external delete_type_name : string -> llmodule -> unit
+                          = "llvm_delete_type_name"
+external type_by_name : llmodule -> string -> lltype option
+                      = "llvm_type_by_name"
+external dump_module : llmodule -> unit = "llvm_dump_module"
+external set_module_inline_asm : llmodule -> string -> unit
+                               = "llvm_set_module_inline_asm"
+
+(*===-- Types -------------------------------------------------------------===*)
+external classify_type : lltype -> TypeKind.t = "llvm_classify_type"
+external type_context : lltype -> llcontext = "llvm_type_context"
+
+(*--... Operations on integer types ........................................--*)
+external i1_type : llcontext -> lltype = "llvm_i1_type"
+external i8_type : llcontext -> lltype = "llvm_i8_type"
+external i16_type : llcontext -> lltype = "llvm_i16_type"
+external i32_type : llcontext -> lltype = "llvm_i32_type"
+external i64_type : llcontext -> lltype = "llvm_i64_type"
+
+external integer_type : llcontext -> int -> lltype = "llvm_integer_type"
+external integer_bitwidth : lltype -> int = "llvm_integer_bitwidth"
+
+(*--... Operations on real types ...........................................--*)
+external float_type : llcontext -> lltype = "llvm_float_type"
+external double_type : llcontext -> lltype = "llvm_double_type"
+external x86fp80_type : llcontext -> lltype = "llvm_x86fp80_type"
+external fp128_type : llcontext -> lltype = "llvm_fp128_type"
+external ppc_fp128_type : llcontext -> lltype = "llvm_ppc_fp128_type"
+
+(*--... Operations on function types .......................................--*)
+external function_type : lltype -> lltype array -> lltype = "llvm_function_type"
+external var_arg_function_type : lltype -> lltype array -> lltype
+                               = "llvm_var_arg_function_type"
+external is_var_arg : lltype -> bool = "llvm_is_var_arg"
+external return_type : lltype -> lltype = "LLVMGetReturnType"
+external param_types : lltype -> lltype array = "llvm_param_types"
+
+(*--... Operations on struct types .........................................--*)
+external struct_type : llcontext -> lltype array -> lltype = "llvm_struct_type"
+external packed_struct_type : llcontext -> lltype array -> lltype
+                            = "llvm_packed_struct_type"
+external struct_element_types : lltype -> lltype array
+                              = "llvm_struct_element_types"
+external is_packed : lltype -> bool = "llvm_is_packed"
+
+(*--... Operations on pointer, vector, and array types .....................--*)
+external array_type : lltype -> int -> lltype = "llvm_array_type"
+external pointer_type : lltype -> lltype = "llvm_pointer_type"
+external qualified_pointer_type : lltype -> int -> lltype
+                                = "llvm_qualified_pointer_type"
+external vector_type : lltype -> int -> lltype = "llvm_vector_type"
+
+external element_type : lltype -> lltype = "LLVMGetElementType"
+external array_length : lltype -> int = "llvm_array_length"
+external address_space : lltype -> int = "llvm_address_space"
+external vector_size : lltype -> int = "llvm_vector_size"
+
+(*--... Operations on other types ..........................................--*)
+external opaque_type : llcontext -> lltype = "llvm_opaque_type"
+external void_type : llcontext -> lltype = "llvm_void_type"
+external label_type : llcontext -> lltype = "llvm_label_type"
+
+(*--... Operations on type handles .........................................--*)
+external handle_to_type : lltype -> lltypehandle = "llvm_handle_to_type"
+external type_of_handle : lltypehandle -> lltype = "llvm_type_of_handle"
+external refine_type : lltype -> lltype -> unit = "llvm_refine_type"
+
+
+(*===-- Values ------------------------------------------------------------===*)
+external type_of : llvalue -> lltype = "llvm_type_of"
+external value_name : llvalue -> string = "llvm_value_name"
+external set_value_name : string -> llvalue -> unit = "llvm_set_value_name"
+external dump_value : llvalue -> unit = "llvm_dump_value"
+external replace_all_uses_with : llvalue -> llvalue -> unit
+                               = "LLVMReplaceAllUsesWith"
+
+(*--... Operations on uses .................................................--*)
+external use_begin : llvalue -> lluse option = "llvm_use_begin"
+external use_succ : lluse -> lluse option = "llvm_use_succ"
+external user : lluse -> llvalue = "llvm_user"
+external used_value : lluse -> llvalue = "llvm_used_value"
+
+let iter_uses f v =
+  let rec aux = function
+    | None -> ()
+    | Some u ->
+        f u;
+        aux (use_succ u)
+  in
+  aux (use_begin v)
+
+let fold_left_uses f init v =
+  let rec aux init u =
+    match u with
+    | None -> init
+    | Some u -> aux (f init u) (use_succ u)
+  in
+  aux init (use_begin v)
+
+let fold_right_uses f v init =
+  let rec aux u init =
+    match u with
+    | None -> init
+    | Some u -> f u (aux (use_succ u) init)
+  in
+  aux (use_begin v) init
+
+
+(*--... Operations on users ................................................--*)
+external operand : llvalue -> int -> llvalue = "llvm_operand"
+external set_operand : llvalue -> int -> llvalue -> unit = "llvm_set_operand"
+external num_operands : llvalue -> int = "llvm_num_operands"
+
+(*--... Operations on constants of (mostly) any type .......................--*)
+external is_constant : llvalue -> bool = "llvm_is_constant"
+external const_null : lltype -> llvalue = "LLVMConstNull"
+external const_all_ones : (*int|vec*)lltype -> llvalue = "LLVMConstAllOnes"
+external const_pointer_null : lltype -> llvalue = "LLVMConstPointerNull"
+external undef : lltype -> llvalue = "LLVMGetUndef"
+external is_null : llvalue -> bool = "llvm_is_null"
+external is_undef : llvalue -> bool = "llvm_is_undef"
+
+(*--... Operations on instructions .........................................--*)
+external has_metadata : llvalue -> bool = "llvm_has_metadata"
+external metadata : llvalue -> int -> llvalue option = "llvm_metadata"
+external set_metadata : llvalue -> int -> llvalue -> unit = "llvm_set_metadata"
+external clear_metadata : llvalue -> int -> unit = "llvm_clear_metadata"
+
+(*--... Operations on metadata .......,.....................................--*)
+external mdstring : llcontext -> string -> llvalue = "llvm_mdstring"
+external mdnode : llcontext -> llvalue array -> llvalue = "llvm_mdnode"
+
+(*--... Operations on scalar constants .....................................--*)
+external const_int : lltype -> int -> llvalue = "llvm_const_int"
+external const_of_int64 : lltype -> Int64.t -> bool -> llvalue
+                        = "llvm_const_of_int64"
+external const_int_of_string : lltype -> string -> int -> llvalue
+                             = "llvm_const_int_of_string"
+external const_float : lltype -> float -> llvalue = "llvm_const_float"
+external const_float_of_string : lltype -> string -> llvalue
+                               = "llvm_const_float_of_string"
+
+(*--... Operations on composite constants ..................................--*)
+external const_string : llcontext -> string -> llvalue = "llvm_const_string"
+external const_stringz : llcontext -> string -> llvalue = "llvm_const_stringz"
+external const_array : lltype -> llvalue array -> llvalue = "llvm_const_array"
+external const_struct : llcontext -> llvalue array -> llvalue
+                      = "llvm_const_struct"
+external const_packed_struct : llcontext -> llvalue array -> llvalue
+                             = "llvm_const_packed_struct"
+external const_vector : llvalue array -> llvalue = "llvm_const_vector"
+
+(*--... Constant expressions ...............................................--*)
+external align_of : lltype -> llvalue = "LLVMAlignOf"
+external size_of : lltype -> llvalue = "LLVMSizeOf"
+external const_neg : llvalue -> llvalue = "LLVMConstNeg"
+external const_nsw_neg : llvalue -> llvalue = "LLVMConstNSWNeg"
+external const_nuw_neg : llvalue -> llvalue = "LLVMConstNUWNeg"
+external const_fneg : llvalue -> llvalue = "LLVMConstFNeg"
+external const_not : llvalue -> llvalue = "LLVMConstNot"
+external const_add : llvalue -> llvalue -> llvalue = "LLVMConstAdd"
+external const_nsw_add : llvalue -> llvalue -> llvalue = "LLVMConstNSWAdd"
+external const_nuw_add : llvalue -> llvalue -> llvalue = "LLVMConstNUWAdd"
+external const_fadd : llvalue -> llvalue -> llvalue = "LLVMConstFAdd"
+external const_sub : llvalue -> llvalue -> llvalue = "LLVMConstSub"
+external const_nsw_sub : llvalue -> llvalue -> llvalue = "LLVMConstNSWSub"
+external const_nuw_sub : llvalue -> llvalue -> llvalue = "LLVMConstNUWSub"
+external const_fsub : llvalue -> llvalue -> llvalue = "LLVMConstFSub"
+external const_mul : llvalue -> llvalue -> llvalue = "LLVMConstMul"
+external const_nsw_mul : llvalue -> llvalue -> llvalue = "LLVMConstNSWMul"
+external const_nuw_mul : llvalue -> llvalue -> llvalue = "LLVMConstNUWMul"
+external const_fmul : llvalue -> llvalue -> llvalue = "LLVMConstFMul"
+external const_udiv : llvalue -> llvalue -> llvalue = "LLVMConstUDiv"
+external const_sdiv : llvalue -> llvalue -> llvalue = "LLVMConstSDiv"
+external const_exact_sdiv : llvalue -> llvalue -> llvalue = "LLVMConstExactSDiv"
+external const_fdiv : llvalue -> llvalue -> llvalue = "LLVMConstFDiv"
+external const_urem : llvalue -> llvalue -> llvalue = "LLVMConstURem"
+external const_srem : llvalue -> llvalue -> llvalue = "LLVMConstSRem"
+external const_frem : llvalue -> llvalue -> llvalue = "LLVMConstFRem"
+external const_and : llvalue -> llvalue -> llvalue = "LLVMConstAnd"
+external const_or : llvalue -> llvalue -> llvalue = "LLVMConstOr"
+external const_xor : llvalue -> llvalue -> llvalue = "LLVMConstXor"
+external const_icmp : Icmp.t -> llvalue -> llvalue -> llvalue
+                    = "llvm_const_icmp"
+external const_fcmp : Fcmp.t -> llvalue -> llvalue -> llvalue
+                    = "llvm_const_fcmp"
+external const_shl : llvalue -> llvalue -> llvalue = "LLVMConstShl"
+external const_lshr : llvalue -> llvalue -> llvalue = "LLVMConstLShr"
+external const_ashr : llvalue -> llvalue -> llvalue = "LLVMConstAShr"
+external const_gep : llvalue -> llvalue array -> llvalue = "llvm_const_gep"
+external const_in_bounds_gep : llvalue -> llvalue array -> llvalue
+                            = "llvm_const_in_bounds_gep"
+external const_trunc : llvalue -> lltype -> llvalue = "LLVMConstTrunc"
+external const_sext : llvalue -> lltype -> llvalue = "LLVMConstSExt"
+external const_zext : llvalue -> lltype -> llvalue = "LLVMConstZExt"
+external const_fptrunc : llvalue -> lltype -> llvalue = "LLVMConstFPTrunc"
+external const_fpext : llvalue -> lltype -> llvalue = "LLVMConstFPExt"
+external const_uitofp : llvalue -> lltype -> llvalue = "LLVMConstUIToFP"
+external const_sitofp : llvalue -> lltype -> llvalue = "LLVMConstSIToFP"
+external const_fptoui : llvalue -> lltype -> llvalue = "LLVMConstFPToUI"
+external const_fptosi : llvalue -> lltype -> llvalue = "LLVMConstFPToSI"
+external const_ptrtoint : llvalue -> lltype -> llvalue = "LLVMConstPtrToInt"
+external const_inttoptr : llvalue -> lltype -> llvalue = "LLVMConstIntToPtr"
+external const_bitcast : llvalue -> lltype -> llvalue = "LLVMConstBitCast"
+external const_zext_or_bitcast : llvalue -> lltype -> llvalue
+                             = "LLVMConstZExtOrBitCast"
+external const_sext_or_bitcast : llvalue -> lltype -> llvalue
+                             = "LLVMConstSExtOrBitCast"
+external const_trunc_or_bitcast : llvalue -> lltype -> llvalue
+                              = "LLVMConstTruncOrBitCast"
+external const_pointercast : llvalue -> lltype -> llvalue
+                           = "LLVMConstPointerCast"
+external const_intcast : llvalue -> lltype -> llvalue = "LLVMConstIntCast"
+external const_fpcast : llvalue -> lltype -> llvalue = "LLVMConstFPCast"
+external const_select : llvalue -> llvalue -> llvalue -> llvalue
+                      = "LLVMConstSelect"
+external const_extractelement : llvalue -> llvalue -> llvalue
+                              = "LLVMConstExtractElement"
+external const_insertelement : llvalue -> llvalue -> llvalue -> llvalue
+                             = "LLVMConstInsertElement"
+external const_shufflevector : llvalue -> llvalue -> llvalue -> llvalue
+                             = "LLVMConstShuffleVector"
+external const_extractvalue : llvalue -> int array -> llvalue
+                            = "llvm_const_extractvalue"
+external const_insertvalue : llvalue -> llvalue -> int array -> llvalue
+                           = "llvm_const_insertvalue"
+external const_inline_asm : lltype -> string -> string -> bool -> bool ->
+                            llvalue
+                          = "llvm_const_inline_asm"
+external block_address : llvalue -> llbasicblock -> llvalue = "LLVMBlockAddress"
+
+(*--... Operations on global variables, functions, and aliases (globals) ...--*)
+external global_parent : llvalue -> llmodule = "LLVMGetGlobalParent"
+external is_declaration : llvalue -> bool = "llvm_is_declaration"
+external linkage : llvalue -> Linkage.t = "llvm_linkage"
+external set_linkage : Linkage.t -> llvalue -> unit = "llvm_set_linkage"
+external section : llvalue -> string = "llvm_section"
+external set_section : string -> llvalue -> unit = "llvm_set_section"
+external visibility : llvalue -> Visibility.t = "llvm_visibility"
+external set_visibility : Visibility.t -> llvalue -> unit = "llvm_set_visibility"
+external alignment : llvalue -> int = "llvm_alignment"
+external set_alignment : int -> llvalue -> unit = "llvm_set_alignment"
+external is_global_constant : llvalue -> bool = "llvm_is_global_constant"
+external set_global_constant : bool -> llvalue -> unit
+                             = "llvm_set_global_constant"
+
+(*--... Operations on global variables .....................................--*)
+external declare_global : lltype -> string -> llmodule -> llvalue
+                        = "llvm_declare_global"
+external declare_qualified_global : lltype -> string -> int -> llmodule ->
+                                    llvalue
+                                  = "llvm_declare_qualified_global"
+external define_global : string -> llvalue -> llmodule -> llvalue
+                       = "llvm_define_global"
+external define_qualified_global : string -> llvalue -> int -> llmodule ->
+                                   llvalue
+                                 = "llvm_define_qualified_global"
+external lookup_global : string -> llmodule -> llvalue option
+                       = "llvm_lookup_global"
+external delete_global : llvalue -> unit = "llvm_delete_global"
+external global_initializer : llvalue -> llvalue = "LLVMGetInitializer"
+external set_initializer : llvalue -> llvalue -> unit = "llvm_set_initializer"
+external remove_initializer : llvalue -> unit = "llvm_remove_initializer"
+external is_thread_local : llvalue -> bool = "llvm_is_thread_local"
+external set_thread_local : bool -> llvalue -> unit = "llvm_set_thread_local"
+external global_begin : llmodule -> (llmodule, llvalue) llpos
+                      = "llvm_global_begin"
+external global_succ : llvalue -> (llmodule, llvalue) llpos
+                     = "llvm_global_succ"
+external global_end : llmodule -> (llmodule, llvalue) llrev_pos
+                    = "llvm_global_end"
+external global_pred : llvalue -> (llmodule, llvalue) llrev_pos
+                     = "llvm_global_pred"
+
+let rec iter_global_range f i e =
+  if i = e then () else
+  match i with
+  | At_end _ -> raise (Invalid_argument "Invalid global variable range.")
+  | Before bb ->
+      f bb;
+      iter_global_range f (global_succ bb) e
+
+let iter_globals f m =
+  iter_global_range f (global_begin m) (At_end m)
+
+let rec fold_left_global_range f init i e =
+  if i = e then init else
+  match i with
+  | At_end _ -> raise (Invalid_argument "Invalid global variable range.")
+  | Before bb -> fold_left_global_range f (f init bb) (global_succ bb) e
+
+let fold_left_globals f init m =
+  fold_left_global_range f init (global_begin m) (At_end m)
+
+let rec rev_iter_global_range f i e =
+  if i = e then () else
+  match i with
+  | At_start _ -> raise (Invalid_argument "Invalid global variable range.")
+  | After bb ->
+      f bb;
+      rev_iter_global_range f (global_pred bb) e
+
+let rev_iter_globals f m =
+  rev_iter_global_range f (global_end m) (At_start m)
+
+let rec fold_right_global_range f i e init =
+  if i = e then init else
+  match i with
+  | At_start _ -> raise (Invalid_argument "Invalid global variable range.")
+  | After bb -> fold_right_global_range f (global_pred bb) e (f bb init)
+
+let fold_right_globals f m init =
+  fold_right_global_range f (global_end m) (At_start m) init
+
+(*--... Operations on aliases ..............................................--*)
+external add_alias : llmodule -> lltype -> llvalue -> string -> llvalue
+                   = "llvm_add_alias"
+
+(*--... Operations on functions ............................................--*)
+external declare_function : string -> lltype -> llmodule -> llvalue
+                          = "llvm_declare_function"
+external define_function : string -> lltype -> llmodule -> llvalue
+                         = "llvm_define_function"
+external lookup_function : string -> llmodule -> llvalue option
+                         = "llvm_lookup_function"
+external delete_function : llvalue -> unit = "llvm_delete_function"
+external is_intrinsic : llvalue -> bool = "llvm_is_intrinsic"
+external function_call_conv : llvalue -> int = "llvm_function_call_conv"
+external set_function_call_conv : int -> llvalue -> unit
+                                = "llvm_set_function_call_conv"
+external gc : llvalue -> string option = "llvm_gc"
+external set_gc : string option -> llvalue -> unit = "llvm_set_gc"
+external function_begin : llmodule -> (llmodule, llvalue) llpos
+                        = "llvm_function_begin"
+external function_succ : llvalue -> (llmodule, llvalue) llpos
+                       = "llvm_function_succ"
+external function_end : llmodule -> (llmodule, llvalue) llrev_pos
+                      = "llvm_function_end"
+external function_pred : llvalue -> (llmodule, llvalue) llrev_pos
+                       = "llvm_function_pred"
+
+let rec iter_function_range f i e =
+  if i = e then () else
+  match i with
+  | At_end _ -> raise (Invalid_argument "Invalid function range.")
+  | Before fn ->
+      f fn;
+      iter_function_range f (function_succ fn) e
+
+let iter_functions f m =
+  iter_function_range f (function_begin m) (At_end m)
+
+let rec fold_left_function_range f init i e =
+  if i = e then init else
+  match i with
+  | At_end _ -> raise (Invalid_argument "Invalid function range.")
+  | Before fn -> fold_left_function_range f (f init fn) (function_succ fn) e
+
+let fold_left_functions f init m =
+  fold_left_function_range f init (function_begin m) (At_end m)
+
+let rec rev_iter_function_range f i e =
+  if i = e then () else
+  match i with
+  | At_start _ -> raise (Invalid_argument "Invalid function range.")
+  | After fn ->
+      f fn;
+      rev_iter_function_range f (function_pred fn) e
+
+let rev_iter_functions f m =
+  rev_iter_function_range f (function_end m) (At_start m)
+
+let rec fold_right_function_range f i e init =
+  if i = e then init else
+  match i with
+  | At_start _ -> raise (Invalid_argument "Invalid function range.")
+  | After fn -> fold_right_function_range f (function_pred fn) e (f fn init)
+
+let fold_right_functions f m init =
+  fold_right_function_range f (function_end m) (At_start m) init
+
+external llvm_add_function_attr : llvalue -> int -> unit
+                                = "llvm_add_function_attr"
+external llvm_remove_function_attr : llvalue -> int -> unit
+                                   = "llvm_remove_function_attr"
+
+let pack_attr (attr:Attribute.t) : int =
+  match attr with
+      Attribute.Zext              -> 1 lsl 0
+    | Attribute.Sext              -> 1 lsl 1
+    | Attribute.Noreturn          -> 1 lsl 2
+    | Attribute.Inreg             -> 1 lsl 3
+    | Attribute.Structret         -> 1 lsl 4
+    | Attribute.Nounwind          -> 1 lsl 5
+    | Attribute.Noalias           -> 1 lsl 6
+    | Attribute.Byval             -> 1 lsl 7
+    | Attribute.Nest              -> 1 lsl 8
+    | Attribute.Readnone          -> 1 lsl 9
+    | Attribute.Readonly          -> 1 lsl 10
+    | Attribute.Noinline          -> 1 lsl 11
+    | Attribute.Alwaysinline      -> 1 lsl 12
+    | Attribute.Optsize           -> 1 lsl 13
+    | Attribute.Ssp               -> 1 lsl 14
+    | Attribute.Sspreq            -> 1 lsl 15
+    | Attribute.Alignment n       -> n lsl 16
+    | Attribute.Nocapture         -> 1 lsl 21
+    | Attribute.Noredzone         -> 1 lsl 22
+    | Attribute.Noimplicitfloat   -> 1 lsl 23
+    | Attribute.Naked             -> 1 lsl 24
+    | Attribute.Inlinehint        -> 1 lsl 25
+    | Attribute.Stackalignment n  -> n lsl 26
+
+let add_function_attr llval attr =
+  llvm_add_function_attr llval (pack_attr attr)
+
+let remove_function_attr llval attr =
+  llvm_remove_function_attr llval (pack_attr attr)
+
+(*--... Operations on params ...............................................--*)
+external params : llvalue -> llvalue array = "llvm_params"
+external param : llvalue -> int -> llvalue = "llvm_param"
+external param_parent : llvalue -> llvalue = "LLVMGetParamParent"
+external param_begin : llvalue -> (llvalue, llvalue) llpos = "llvm_param_begin"
+external param_succ : llvalue -> (llvalue, llvalue) llpos = "llvm_param_succ"
+external param_end : llvalue -> (llvalue, llvalue) llrev_pos = "llvm_param_end"
+external param_pred : llvalue -> (llvalue, llvalue) llrev_pos ="llvm_param_pred"
+
+let rec iter_param_range f i e =
+  if i = e then () else
+  match i with
+  | At_end _ -> raise (Invalid_argument "Invalid parameter range.")
+  | Before p ->
+      f p;
+      iter_param_range f (param_succ p) e
+
+let iter_params f fn =
+  iter_param_range f (param_begin fn) (At_end fn)
+
+let rec fold_left_param_range f init i e =
+  if i = e then init else
+  match i with
+  | At_end _ -> raise (Invalid_argument "Invalid parameter range.")
+  | Before p -> fold_left_param_range f (f init p) (param_succ p) e
+
+let fold_left_params f init fn =
+  fold_left_param_range f init (param_begin fn) (At_end fn)
+
+let rec rev_iter_param_range f i e =
+  if i = e then () else
+  match i with
+  | At_start _ -> raise (Invalid_argument "Invalid parameter range.")
+  | After p ->
+      f p;
+      rev_iter_param_range f (param_pred p) e
+
+let rev_iter_params f fn =
+  rev_iter_param_range f (param_end fn) (At_start fn)
+
+let rec fold_right_param_range f init i e =
+  if i = e then init else
+  match i with
+  | At_start _ -> raise (Invalid_argument "Invalid parameter range.")
+  | After p -> fold_right_param_range f (f p init) (param_pred p) e
+
+let fold_right_params f fn init =
+  fold_right_param_range f init (param_end fn) (At_start fn)
+
+external llvm_add_param_attr : llvalue -> int -> unit
+                                = "llvm_add_param_attr"
+external llvm_remove_param_attr : llvalue -> int -> unit
+                                = "llvm_remove_param_attr"
+
+let add_param_attr llval attr =
+  llvm_add_param_attr llval (pack_attr attr)
+
+let remove_param_attr llval attr =
+  llvm_remove_param_attr llval (pack_attr attr)
+
+external set_param_alignment : llvalue -> int -> unit
+                             = "llvm_set_param_alignment"
+
+(*--... Operations on basic blocks .........................................--*)
+external value_of_block : llbasicblock -> llvalue = "LLVMBasicBlockAsValue"
+external value_is_block : llvalue -> bool = "llvm_value_is_block"
+external block_of_value : llvalue -> llbasicblock = "LLVMValueAsBasicBlock"
+external block_parent : llbasicblock -> llvalue = "LLVMGetBasicBlockParent"
+external basic_blocks : llvalue -> llbasicblock array = "llvm_basic_blocks"
+external entry_block : llvalue -> llbasicblock = "LLVMGetEntryBasicBlock"
+external delete_block : llbasicblock -> unit = "llvm_delete_block"
+external append_block : llcontext -> string -> llvalue -> llbasicblock
+                      = "llvm_append_block"
+external insert_block : llcontext -> string -> llbasicblock -> llbasicblock
+                      = "llvm_insert_block"
+external block_begin : llvalue -> (llvalue, llbasicblock) llpos
+                     = "llvm_block_begin"
+external block_succ : llbasicblock -> (llvalue, llbasicblock) llpos
+                    = "llvm_block_succ"
+external block_end : llvalue -> (llvalue, llbasicblock) llrev_pos
+                   = "llvm_block_end"
+external block_pred : llbasicblock -> (llvalue, llbasicblock) llrev_pos
+                    = "llvm_block_pred"
+
+let rec iter_block_range f i e =
+  if i = e then () else
+  match i with
+  | At_end _ -> raise (Invalid_argument "Invalid block range.")
+  | Before bb ->
+      f bb;
+      iter_block_range f (block_succ bb) e
+
+let iter_blocks f fn =
+  iter_block_range f (block_begin fn) (At_end fn)
+
+let rec fold_left_block_range f init i e =
+  if i = e then init else
+  match i with
+  | At_end _ -> raise (Invalid_argument "Invalid block range.")
+  | Before bb -> fold_left_block_range f (f init bb) (block_succ bb) e
+
+let fold_left_blocks f init fn =
+  fold_left_block_range f init (block_begin fn) (At_end fn)
+
+let rec rev_iter_block_range f i e =
+  if i = e then () else
+  match i with
+  | At_start _ -> raise (Invalid_argument "Invalid block range.")
+  | After bb ->
+      f bb;
+      rev_iter_block_range f (block_pred bb) e
+
+let rev_iter_blocks f fn =
+  rev_iter_block_range f (block_end fn) (At_start fn)
+
+let rec fold_right_block_range f init i e =
+  if i = e then init else
+  match i with
+  | At_start _ -> raise (Invalid_argument "Invalid block range.")
+  | After bb -> fold_right_block_range f (f bb init) (block_pred bb) e
+
+let fold_right_blocks f fn init =
+  fold_right_block_range f init (block_end fn) (At_start fn)
+
+(*--... Operations on instructions .........................................--*)
+external instr_parent : llvalue -> llbasicblock = "LLVMGetInstructionParent"
+external instr_begin : llbasicblock -> (llbasicblock, llvalue) llpos
+                     = "llvm_instr_begin"
+external instr_succ : llvalue -> (llbasicblock, llvalue) llpos
+                     = "llvm_instr_succ"
+external instr_end : llbasicblock -> (llbasicblock, llvalue) llrev_pos
+                     = "llvm_instr_end"
+external instr_pred : llvalue -> (llbasicblock, llvalue) llrev_pos
+                     = "llvm_instr_pred"
+
+let rec iter_instrs_range f i e =
+  if i = e then () else
+  match i with
+  | At_end _ -> raise (Invalid_argument "Invalid instruction range.")
+  | Before i ->
+      f i;
+      iter_instrs_range f (instr_succ i) e
+
+let iter_instrs f bb =
+  iter_instrs_range f (instr_begin bb) (At_end bb)
+
+let rec fold_left_instrs_range f init i e =
+  if i = e then init else
+  match i with
+  | At_end _ -> raise (Invalid_argument "Invalid instruction range.")
+  | Before i -> fold_left_instrs_range f (f init i) (instr_succ i) e
+
+let fold_left_instrs f init bb =
+  fold_left_instrs_range f init (instr_begin bb) (At_end bb)
+
+let rec rev_iter_instrs_range f i e =
+  if i = e then () else
+  match i with
+  | At_start _ -> raise (Invalid_argument "Invalid instruction range.")
+  | After i ->
+      f i;
+      rev_iter_instrs_range f (instr_pred i) e
+
+let rev_iter_instrs f bb =
+  rev_iter_instrs_range f (instr_end bb) (At_start bb)
+
+let rec fold_right_instr_range f i e init =
+  if i = e then init else
+  match i with
+  | At_start _ -> raise (Invalid_argument "Invalid instruction range.")
+  | After i -> fold_right_instr_range f (instr_pred i) e (f i init)
+
+let fold_right_instrs f bb init =
+  fold_right_instr_range f (instr_end bb) (At_start bb) init
+
+
+(*--... Operations on call sites ...........................................--*)
+external instruction_call_conv: llvalue -> int
+                              = "llvm_instruction_call_conv"
+external set_instruction_call_conv: int -> llvalue -> unit
+                                  = "llvm_set_instruction_call_conv"
+
+external llvm_add_instruction_param_attr : llvalue -> int -> int -> unit
+                                         = "llvm_add_instruction_param_attr"
+external llvm_remove_instruction_param_attr : llvalue -> int -> int -> unit
+                                         = "llvm_remove_instruction_param_attr"
+
+let add_instruction_param_attr llval i attr =
+  llvm_add_instruction_param_attr llval i (pack_attr attr)
+
+let remove_instruction_param_attr llval i attr =
+  llvm_remove_instruction_param_attr llval i (pack_attr attr)
+
+(*--... Operations on call instructions (only) .............................--*)
+external is_tail_call : llvalue -> bool = "llvm_is_tail_call"
+external set_tail_call : bool -> llvalue -> unit = "llvm_set_tail_call"
+
+(*--... Operations on phi nodes ............................................--*)
+external add_incoming : (llvalue * llbasicblock) -> llvalue -> unit
+                      = "llvm_add_incoming"
+external incoming : llvalue -> (llvalue * llbasicblock) list = "llvm_incoming"
+
+
+(*===-- Instruction builders ----------------------------------------------===*)
+external builder : llcontext -> llbuilder = "llvm_builder"
+external position_builder : (llbasicblock, llvalue) llpos -> llbuilder -> unit
+                          = "llvm_position_builder"
+external insertion_block : llbuilder -> llbasicblock = "llvm_insertion_block"
+external insert_into_builder : llvalue -> string -> llbuilder -> unit
+                             = "llvm_insert_into_builder"
+
+let builder_at context ip =
+  let b = builder context in
+  position_builder ip b;
+  b
+
+let builder_before context i = builder_at context (Before i)
+let builder_at_end context bb = builder_at context (At_end bb)
+
+let position_before i = position_builder (Before i)
+let position_at_end bb = position_builder (At_end bb)
+
+
+(*--... Metadata ...........................................................--*)
+external set_current_debug_location : llbuilder -> llvalue -> unit
+                                    = "llvm_set_current_debug_location"
+external clear_current_debug_location : llbuilder -> unit
+                                      = "llvm_clear_current_debug_location"
+external current_debug_location : llbuilder -> llvalue option
+                                    = "llvm_current_debug_location"
+external set_inst_debug_location : llbuilder -> llvalue -> unit
+                                 = "llvm_set_inst_debug_location"
+
+
+(*--... Terminators ........................................................--*)
+external build_ret_void : llbuilder -> llvalue = "llvm_build_ret_void"
+external build_ret : llvalue -> llbuilder -> llvalue = "llvm_build_ret"
+external build_aggregate_ret : llvalue array -> llbuilder -> llvalue
+                             = "llvm_build_aggregate_ret"
+external build_br : llbasicblock -> llbuilder -> llvalue = "llvm_build_br"
+external build_cond_br : llvalue -> llbasicblock -> llbasicblock -> llbuilder ->
+                         llvalue = "llvm_build_cond_br"
+external build_switch : llvalue -> llbasicblock -> int -> llbuilder -> llvalue
+                      = "llvm_build_switch"
+external add_case : llvalue -> llvalue -> llbasicblock -> unit
+                  = "llvm_add_case"
+external build_indirect_br : llvalue -> int -> llbuilder -> llvalue
+                           = "llvm_build_indirect_br"
+external add_destination : llvalue -> llbasicblock -> unit
+                         = "llvm_add_destination"
+external build_invoke : llvalue -> llvalue array -> llbasicblock ->
+                        llbasicblock -> string -> llbuilder -> llvalue
+                      = "llvm_build_invoke_bc" "llvm_build_invoke_nat"
+external build_unwind : llbuilder -> llvalue = "llvm_build_unwind"
+external build_unreachable : llbuilder -> llvalue = "llvm_build_unreachable"
+
+(*--... Arithmetic .........................................................--*)
+external build_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                   = "llvm_build_add"
+external build_nsw_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                       = "llvm_build_nsw_add"
+external build_nuw_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                       = "llvm_build_nuw_add"
+external build_fadd : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_fadd"
+external build_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                   = "llvm_build_sub"
+external build_nsw_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                       = "llvm_build_nsw_sub"
+external build_nuw_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                       = "llvm_build_nuw_sub"
+external build_fsub : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_fsub"
+external build_mul : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                   = "llvm_build_mul"
+external build_nsw_mul : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                       = "llvm_build_nsw_mul"
+external build_nuw_mul : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                       = "llvm_build_nuw_mul"
+external build_fmul : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_fmul"
+external build_udiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_udiv"
+external build_sdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_sdiv"
+external build_exact_sdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                          = "llvm_build_exact_sdiv"
+external build_fdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_fdiv"
+external build_urem : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_urem"
+external build_srem : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_srem"
+external build_frem : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_frem"
+external build_shl : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                   = "llvm_build_shl"
+external build_lshr : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_lshr"
+external build_ashr : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_ashr"
+external build_and : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                   = "llvm_build_and"
+external build_or : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                  = "llvm_build_or"
+external build_xor : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                   = "llvm_build_xor"
+external build_neg : llvalue -> string -> llbuilder -> llvalue
+                   = "llvm_build_neg"
+external build_nsw_neg : llvalue -> string -> llbuilder -> llvalue
+                       = "llvm_build_nsw_neg"
+external build_nuw_neg : llvalue -> string -> llbuilder -> llvalue
+                       = "llvm_build_nuw_neg"
+external build_fneg : llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_fneg"
+external build_not : llvalue -> string -> llbuilder -> llvalue
+                   = "llvm_build_not"
+
+(*--... Memory .............................................................--*)
+external build_alloca : lltype -> string -> llbuilder -> llvalue
+                      = "llvm_build_alloca"
+external build_array_alloca : lltype -> llvalue -> string -> llbuilder ->
+                              llvalue = "llvm_build_array_alloca"
+external build_load : llvalue -> string -> llbuilder -> llvalue
+                    = "llvm_build_load"
+external build_store : llvalue -> llvalue -> llbuilder -> llvalue
+                     = "llvm_build_store"
+external build_gep : llvalue -> llvalue array -> string -> llbuilder -> llvalue
+                   = "llvm_build_gep"
+external build_in_bounds_gep : llvalue -> llvalue array -> string ->
+                             llbuilder -> llvalue = "llvm_build_in_bounds_gep"
+external build_struct_gep : llvalue -> int -> string -> llbuilder -> llvalue
+                         = "llvm_build_struct_gep"
+
+external build_global_string : string -> string -> llbuilder -> llvalue
+                             = "llvm_build_global_string"
+external build_global_stringptr  : string -> string -> llbuilder -> llvalue
+                                 = "llvm_build_global_stringptr"
+
+(*--... Casts ..............................................................--*)
+external build_trunc : llvalue -> lltype -> string -> llbuilder -> llvalue
+                     = "llvm_build_trunc"
+external build_zext : llvalue -> lltype -> string -> llbuilder -> llvalue
+                    = "llvm_build_zext"
+external build_sext : llvalue -> lltype -> string -> llbuilder -> llvalue
+                    = "llvm_build_sext"
+external build_fptoui : llvalue -> lltype -> string -> llbuilder -> llvalue
+                      = "llvm_build_fptoui"
+external build_fptosi : llvalue -> lltype -> string -> llbuilder -> llvalue
+                      = "llvm_build_fptosi"
+external build_uitofp : llvalue -> lltype -> string -> llbuilder -> llvalue
+                      = "llvm_build_uitofp"
+external build_sitofp : llvalue -> lltype -> string -> llbuilder -> llvalue
+                      = "llvm_build_sitofp"
+external build_fptrunc : llvalue -> lltype -> string -> llbuilder -> llvalue
+                       = "llvm_build_fptrunc"
+external build_fpext : llvalue -> lltype -> string -> llbuilder -> llvalue
+                     = "llvm_build_fpext"
+external build_ptrtoint : llvalue -> lltype -> string -> llbuilder -> llvalue
+                        = "llvm_build_prttoint"
+external build_inttoptr : llvalue -> lltype -> string -> llbuilder -> llvalue
+                        = "llvm_build_inttoptr"
+external build_bitcast : llvalue -> lltype -> string -> llbuilder -> llvalue
+                       = "llvm_build_bitcast"
+external build_zext_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
+                                 llvalue = "llvm_build_zext_or_bitcast"
+external build_sext_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
+                                 llvalue = "llvm_build_sext_or_bitcast"
+external build_trunc_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
+                                  llvalue = "llvm_build_trunc_or_bitcast"
+external build_pointercast : llvalue -> lltype -> string -> llbuilder -> llvalue
+                           = "llvm_build_pointercast"
+external build_intcast : llvalue -> lltype -> string -> llbuilder -> llvalue
+                       = "llvm_build_intcast"
+external build_fpcast : llvalue -> lltype -> string -> llbuilder -> llvalue
+                      = "llvm_build_fpcast"
+
+(*--... Comparisons ........................................................--*)
+external build_icmp : Icmp.t -> llvalue -> llvalue -> string ->
+                      llbuilder -> llvalue = "llvm_build_icmp"
+external build_fcmp : Fcmp.t -> llvalue -> llvalue -> string ->
+                      llbuilder -> llvalue = "llvm_build_fcmp"
+
+(*--... Miscellaneous instructions .........................................--*)
+external build_phi : (llvalue * llbasicblock) list -> string -> llbuilder ->
+                     llvalue = "llvm_build_phi"
+external build_call : llvalue -> llvalue array -> string -> llbuilder -> llvalue
+                    = "llvm_build_call"
+external build_select : llvalue -> llvalue -> llvalue -> string -> llbuilder ->
+                        llvalue = "llvm_build_select"
+external build_va_arg : llvalue -> lltype -> string -> llbuilder -> llvalue
+                      = "llvm_build_va_arg"
+external build_extractelement : llvalue -> llvalue -> string -> llbuilder ->
+                                llvalue = "llvm_build_extractelement"
+external build_insertelement : llvalue -> llvalue -> llvalue -> string ->
+                               llbuilder -> llvalue = "llvm_build_insertelement"
+external build_shufflevector : llvalue -> llvalue -> llvalue -> string ->
+                               llbuilder -> llvalue = "llvm_build_shufflevector"
+external build_extractvalue : llvalue -> int -> string -> llbuilder -> llvalue
+                            = "llvm_build_extractvalue"
+external build_insertvalue : llvalue -> llvalue -> int -> string -> llbuilder ->
+                             llvalue = "llvm_build_insertvalue"
+
+external build_is_null : llvalue -> string -> llbuilder -> llvalue
+                       = "llvm_build_is_null"
+external build_is_not_null : llvalue -> string -> llbuilder -> llvalue
+                           = "llvm_build_is_not_null"
+external build_ptrdiff : llvalue -> llvalue -> string -> llbuilder -> llvalue
+                       = "llvm_build_ptrdiff"
+
+
+(*===-- Memory buffers ----------------------------------------------------===*)
+
+module MemoryBuffer = struct
+  external of_file : string -> llmemorybuffer = "llvm_memorybuffer_of_file"
+  external of_stdin : unit -> llmemorybuffer = "llvm_memorybuffer_of_stdin"
+  external dispose : llmemorybuffer -> unit = "llvm_memorybuffer_dispose"
+end
+
+
+(*===-- Pass Manager ------------------------------------------------------===*)
+
+module PassManager = struct
+  type 'a t
+  type any = [ `Module | `Function ]
+  external create : unit -> [ `Module ] t = "llvm_passmanager_create"
+  external create_function : llmodule -> [ `Function ] t
+                           = "LLVMCreateFunctionPassManager"
+  external run_module : llmodule -> [ `Module ] t -> bool
+                      = "llvm_passmanager_run_module"
+  external initialize : [ `Function ] t -> bool = "llvm_passmanager_initialize"
+  external run_function : llvalue -> [ `Function ] t -> bool
+                        = "llvm_passmanager_run_function"
+  external finalize : [ `Function ] t -> bool = "llvm_passmanager_finalize"
+  external dispose : [< any ] t -> unit = "llvm_passmanager_dispose"
+end
+
+
+(*===-- Non-Externs -------------------------------------------------------===*)
+(* These functions are built using the externals, so must be declared late.   *)
+
+let concat2 sep arr =
+  let s = ref "" in
+  if 0 < Array.length arr then begin
+    s := !s ^ arr.(0);
+    for i = 1 to (Array.length arr) - 1 do
+      s := !s ^ sep ^ arr.(i)
+    done
+  end;
+  !s
+
+let rec string_of_lltype ty =
+  (* FIXME: stop infinite recursion! :) *)
+  match classify_type ty with
+    TypeKind.Integer -> "i" ^ string_of_int (integer_bitwidth ty)
+  | TypeKind.Pointer -> (string_of_lltype (element_type ty)) ^ "*"
+  | TypeKind.Struct ->
+      let s = "{ " ^ (concat2 ", " (
+                Array.map string_of_lltype (struct_element_types ty)
+              )) ^ " }" in
+      if is_packed ty
+        then "<" ^ s ^ ">"
+        else s
+  | TypeKind.Array -> "["   ^ (string_of_int (array_length ty)) ^
+                      " x " ^ (string_of_lltype (element_type ty)) ^ "]"
+  | TypeKind.Vector -> "<"   ^ (string_of_int (vector_size ty)) ^
+                       " x " ^ (string_of_lltype (element_type ty)) ^ ">"
+  | TypeKind.Opaque -> "opaque"
+  | TypeKind.Function -> string_of_lltype (return_type ty) ^
+                         " (" ^ (concat2 ", " (
+                           Array.map string_of_lltype (param_types ty)
+                         )) ^ ")"
+  | TypeKind.Label -> "label"
+  | TypeKind.Ppc_fp128 -> "ppc_fp128"
+  | TypeKind.Fp128 -> "fp128"
+  | TypeKind.X86fp80 -> "x86_fp80"
+  | TypeKind.Double -> "double"
+  | TypeKind.Float -> "float"
+  | TypeKind.Void -> "void"
+  | TypeKind.Metadata -> "metadata"
diff --git a/final/bindings/ocaml/llvm/llvm.mli b/final/bindings/ocaml/llvm/llvm.mli
new file mode 100644
index 00000000000..9b037aae7a4
--- /dev/null
+++ b/final/bindings/ocaml/llvm/llvm.mli
@@ -0,0 +1,2259 @@
+(*===-- llvm/llvm.mli - LLVM Ocaml Interface -------------------------------===*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*)
+
+(** Core API.
+
+    This interface provides an ocaml API for the LLVM intermediate
+    representation, the classes in the VMCore library. *)
+
+
+(** {6 Abstract types}
+
+    These abstract types correlate directly to the LLVM VMCore classes. *)
+
+(** The top-level container for all LLVM global data. See the
+    [llvm::LLVMContext] class. *)
+type llcontext
+
+(** The top-level container for all other LLVM Intermediate Representation (IR)
+    objects. See the [llvm::Module] class. *)
+type llmodule
+
+(** Each value in the LLVM IR has a type, an instance of [lltype]. See the
+    [llvm::Type] class. *)
+type lltype
+
+(** When building recursive types using {!refine_type}, [lltype] values may
+    become invalid; use [lltypehandle] to resolve this problem. See the
+    [llvm::AbstractTypeHolder] class. *)
+type lltypehandle
+
+(** Any value in the LLVM IR. Functions, instructions, global variables,
+    constants, and much more are all [llvalues]. See the [llvm::Value] class.
+    This type covers a wide range of subclasses. *)
+type llvalue
+
+(** Used to store users and usees of values. See the [llvm::Use] class. *)
+type lluse
+
+(** A basic block in LLVM IR. See the [llvm::BasicBlock] class. *)
+type llbasicblock
+
+(** Used to generate instructions in the LLVM IR. See the [llvm::LLVMBuilder]
+    class. *)
+type llbuilder
+
+(** Used to efficiently handle large buffers of read-only binary data.
+    See the [llvm::MemoryBuffer] class. *)
+type llmemorybuffer
+
+(** The kind of an [lltype], the result of [classify_type ty]. See the
+    [llvm::Type::TypeID] enumeration. *)
+module TypeKind : sig
+  type t =
+    Void
+  | Float
+  | Double
+  | X86fp80
+  | Fp128
+  | Ppc_fp128
+  | Label
+  | Integer
+  | Function
+  | Struct
+  | Array
+  | Pointer
+  | Opaque
+  | Vector
+  | Metadata
+end
+
+(** The linkage of a global value, accessed with {!linkage} and
+    {!set_linkage}. See [llvm::GlobalValue::LinkageTypes]. *)
+module Linkage : sig
+  type t =
+    External
+  | Available_externally
+  | Link_once
+  | Link_once_odr
+  | Weak
+  | Weak_odr
+  | Appending
+  | Internal
+  | Private
+  | Dllimport
+  | Dllexport
+  | External_weak
+  | Ghost
+  | Common
+  | Linker_private
+end
+
+(** The linker visibility of a global value, accessed with {!visibility} and
+    {!set_visibility}. See [llvm::GlobalValue::VisibilityTypes]. *)
+module Visibility : sig
+  type t =
+    Default
+  | Hidden
+  | Protected
+end
+
+(** The following calling convention values may be accessed with
+    {!function_call_conv} and {!set_function_call_conv}. Calling
+    conventions are open-ended. *)
+module CallConv : sig
+  val c : int             (** [c] is the C calling convention. *)
+  val fast : int          (** [fast] is the calling convention to allow LLVM
+                              maximum optimization opportunities. Use only with
+                              internal linkage. *)
+  val cold : int          (** [cold] is the calling convention for
+                              callee-save. *)
+  val x86_stdcall : int   (** [x86_stdcall] is the familiar stdcall calling
+                              convention from C. *)
+  val x86_fastcall : int  (** [x86_fastcall] is the familiar fastcall calling
+                              convention from C. *)
+end
+
+module Attribute : sig
+  type t =
+  | Zext
+  | Sext
+  | Noreturn
+  | Inreg
+  | Structret
+  | Nounwind
+  | Noalias
+  | Byval
+  | Nest
+  | Readnone
+  | Readonly
+  | Noinline
+  | Alwaysinline
+  | Optsize
+  | Ssp
+  | Sspreq
+  | Alignment of int
+  | Nocapture
+  | Noredzone
+  | Noimplicitfloat
+  | Naked
+  | Inlinehint
+  | Stackalignment of int
+end
+
+(** The predicate for an integer comparison ([icmp]) instruction.
+    See the [llvm::ICmpInst::Predicate] enumeration. *)
+module Icmp : sig
+  type t =
+  | Eq
+  | Ne
+  | Ugt
+  | Uge
+  | Ult
+  | Ule
+  | Sgt
+  | Sge
+  | Slt
+  | Sle
+end
+
+(** The predicate for a floating-point comparison ([fcmp]) instruction.
+    See the [llvm::FCmpInst::Predicate] enumeration. *)
+module Fcmp : sig
+  type t =
+  | False
+  | Oeq
+  | Ogt
+  | Oge
+  | Olt
+  | Ole
+  | One
+  | Ord
+  | Uno
+  | Ueq
+  | Ugt
+  | Uge
+  | Ult
+  | Ule
+  | Une
+  | True
+end
+
+
+(** {6 Iteration} *)
+
+(** [Before b] and [At_end a] specify positions from the start of the ['b] list
+    of [a]. [llpos] is used to specify positions in and for forward iteration
+    through the various value lists maintained by the LLVM IR. *)
+type ('a, 'b) llpos =
+| At_end of 'a
+| Before of 'b
+
+(** [After b] and [At_start a] specify positions from the end of the ['b] list
+    of [a]. [llrev_pos] is used for reverse iteration through the various value
+    lists maintained by the LLVM IR. *)
+type ('a, 'b) llrev_pos =
+| At_start of 'a
+| After of 'b
+
+
+(** {6 Exceptions} *)
+
+exception IoError of string
+
+
+(** {6 Contexts} *)
+
+(** [create_context ()] creates a context for storing the "global" state in
+    LLVM. See the constructor [llvm::LLVMContext]. *)
+val create_context : unit -> llcontext
+
+(** [destroy_context ()] destroys a context. See the destructor
+    [llvm::LLVMContext::~LLVMContext]. *)
+val dispose_context : llcontext -> unit
+
+(** See the function [llvm::getGlobalContext]. *)
+val global_context : unit -> llcontext
+
+(** [mdkind_id context name] returns the MDKind ID that corresponds to the
+    name [name] in the context [context].  See the function
+    [llvm::LLVMContext::getMDKindID]. *)
+val mdkind_id : llcontext -> string -> int
+
+
+(** {6 Modules} *)
+
+(** [create_module context id] creates a module with the supplied module ID in
+    the context [context].  Modules are not garbage collected; it is mandatory
+    to call {!dispose_module} to free memory. See the constructor
+    [llvm::Module::Module]. *)
+val create_module : llcontext -> string -> llmodule
+
+(** [dispose_module m] destroys a module [m] and all of the IR objects it
+    contained. All references to subordinate objects are invalidated;
+    referencing them will invoke undefined behavior. See the destructor
+    [llvm::Module::~Module]. *)
+val dispose_module : llmodule -> unit
+
+(** [target_triple m] is the target specifier for the module [m], something like
+    [i686-apple-darwin8]. See the method [llvm::Module::getTargetTriple]. *)
+val target_triple: llmodule -> string
+
+
+(** [target_triple triple m] changes the target specifier for the module [m] to
+    the string [triple]. See the method [llvm::Module::setTargetTriple]. *)
+val set_target_triple: string -> llmodule -> unit
+
+
+(** [data_layout m] is the data layout specifier for the module [m], something
+    like [e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-...-a0:0:64-f80:128:128]. See the
+    method [llvm::Module::getDataLayout]. *)
+val data_layout: llmodule -> string
+
+
+(** [set_data_layout s m] changes the data layout specifier for the module [m]
+    to the string [s]. See the method [llvm::Module::setDataLayout]. *)
+val set_data_layout: string -> llmodule -> unit
+
+
+(** [define_type_name name ty m] adds a named type to the module's symbol table.
+    Returns [true] if successful. If such a name already exists, then no entry
+    is added and [false] is returned. See the [llvm::Module::addTypeName]
+    method. *)
+val define_type_name : string -> lltype -> llmodule -> bool
+
+
+(** [delete_type_name name] removes a type name from the module's symbol
+    table. *)
+val delete_type_name : string -> llmodule -> unit
+
+
+(** [type_by_name m n] returns the type in the module [m] named [n], or [None]
+    if it does not exist. See the method [llvm::Module::getTypeByName]. *)
+val type_by_name : llmodule -> string -> lltype option
+
+
+(** [dump_module m] prints the .ll representation of the module [m] to standard
+    error. See the method [llvm::Module::dump]. *)
+val dump_module : llmodule -> unit
+
+(** [set_module_inline_asm m asm] sets the inline assembler for the module. See
+    the method [llvm::Module::setModuleInlineAsm]. *)
+val set_module_inline_asm : llmodule -> string -> unit
+
+
+
+(** {6 Types} *)
+
+(** [classify_type ty] returns the {!TypeKind.t} corresponding to the type [ty].
+    See the method [llvm::Type::getTypeID]. *)
+val classify_type : lltype -> TypeKind.t
+
+(** [type_context ty] returns the {!llcontext} corresponding to the type [ty].
+    See the method [llvm::Type::getContext]. *)
+val type_context : lltype -> llcontext
+
+(** [string_of_lltype ty] returns a string describing the type [ty]. *)
+val string_of_lltype : lltype -> string
+
+(** {7 Operations on integer types} *)
+
+(** [i1_type c] returns an integer type of bitwidth 1 in the context [c]. See
+    [llvm::Type::Int1Ty]. *)
+val i1_type : llcontext -> lltype
+
+(** [i8_type c] returns an integer type of bitwidth 8 in the context [c]. See
+    [llvm::Type::Int8Ty]. *)
+val i8_type : llcontext -> lltype
+
+(** [i16_type c] returns an integer type of bitwidth 16 in the context [c]. See
+    [llvm::Type::Int16Ty]. *)
+val i16_type : llcontext -> lltype
+
+(** [i32_type c] returns an integer type of bitwidth 32 in the context [c]. See
+    [llvm::Type::Int32Ty]. *)
+val i32_type : llcontext -> lltype
+
+(** [i64_type c] returns an integer type of bitwidth 64 in the context [c]. See
+    [llvm::Type::Int64Ty]. *)
+val i64_type : llcontext -> lltype
+
+(** [integer_type c n] returns an integer type of bitwidth [n] in the context
+    [c]. See the method [llvm::IntegerType::get]. *)
+val integer_type : llcontext -> int -> lltype
+
+(** [integer_bitwidth c ty] returns the number of bits in the integer type [ty]
+    in the context [c].  See the method [llvm::IntegerType::getBitWidth]. *)
+val integer_bitwidth : lltype -> int
+
+
+(** {7 Operations on real types} *)
+
+(** [float_type c] returns the IEEE 32-bit floating point type in the context
+    [c]. See [llvm::Type::FloatTy]. *)
+val float_type : llcontext -> lltype
+
+(** [double_type c] returns the IEEE 64-bit floating point type in the context
+    [c]. See [llvm::Type::DoubleTy]. *)
+val double_type : llcontext -> lltype
+
+(** [x86fp80_type c] returns the x87 80-bit floating point type in the context
+    [c]. See [llvm::Type::X86_FP80Ty]. *)
+val x86fp80_type : llcontext -> lltype
+
+(** [fp128_type c] returns the IEEE 128-bit floating point type in the context
+    [c]. See [llvm::Type::FP128Ty]. *)
+val fp128_type : llcontext -> lltype
+
+(** [ppc_fp128_type c] returns the PowerPC 128-bit floating point type in the
+    context [c]. See [llvm::Type::PPC_FP128Ty]. *)
+val ppc_fp128_type : llcontext -> lltype
+
+
+(** {7 Operations on function types} *)
+
+(** [function_type ret_ty param_tys] returns the function type returning
+    [ret_ty] and taking [param_tys] as parameters.
+    See the method [llvm::FunctionType::get]. *)
+val function_type : lltype -> lltype array -> lltype
+
+(** [va_arg_function_type ret_ty param_tys] is just like
+    [function_type ret_ty param_tys] except that it returns the function type
+    which also takes a variable number of arguments.
+    See the method [llvm::FunctionType::get]. *)
+val var_arg_function_type : lltype -> lltype array -> lltype
+
+
+(** [is_var_arg fty] returns [true] if [fty] is a varargs function type, [false]
+    otherwise. See the method [llvm::FunctionType::isVarArg]. *)
+val is_var_arg : lltype -> bool
+
+(** [return_type fty] gets the return type of the function type [fty].
+    See the method [llvm::FunctionType::getReturnType]. *)
+val return_type : lltype -> lltype
+
+(** [param_types fty] gets the parameter types of the function type [fty].
+    See the method [llvm::FunctionType::getParamType]. *)
+val param_types : lltype -> lltype array
+
+
+(** {7 Operations on struct types} *)
+
+(** [struct_type context tys] returns the structure type in the context
+    [context] containing in the types in the array [tys]. See the method
+    [llvm::StructType::get]. *)
+val struct_type : llcontext -> lltype array -> lltype
+
+
+(** [packed_struct_type context ys] returns the packed structure type in the
+    context [context] containing in the types in the array [tys]. See the method
+    [llvm::StructType::get]. *)
+val packed_struct_type : llcontext -> lltype array -> lltype
+
+
+(** [struct_element_types sty] returns the constituent types of the struct type
+    [sty]. See the method [llvm::StructType::getElementType]. *)
+val struct_element_types : lltype -> lltype array
+
+
+(** [is_packed sty] returns [true] if the structure type [sty] is packed,
+    [false] otherwise. See the method [llvm::StructType::isPacked]. *)
+val is_packed : lltype -> bool
+
+
+(** {7 Operations on pointer, vector, and array types} *)
+
+(** [array_type ty n] returns the array type containing [n] elements of type
+    [ty]. See the method [llvm::ArrayType::get]. *)
+val array_type : lltype -> int -> lltype
+
+(** [pointer_type ty] returns the pointer type referencing objects of type
+    [ty] in the default address space (0).
+    See the method [llvm::PointerType::getUnqual]. *)
+val pointer_type : lltype -> lltype
+
+(** [qualified_pointer_type ty as] returns the pointer type referencing objects
+    of type [ty] in address space [as].
+    See the method [llvm::PointerType::get]. *)
+val qualified_pointer_type : lltype -> int -> lltype
+
+
+(** [vector_type ty n] returns the array type containing [n] elements of the
+    primitive type [ty]. See the method [llvm::ArrayType::get]. *)
+val vector_type : lltype -> int -> lltype
+
+(** [element_type ty] returns the element type of the pointer, vector, or array
+    type [ty]. See the method [llvm::SequentialType::get]. *)
+val element_type : lltype -> lltype
+
+(** [element_type aty] returns the element count of the array type [aty].
+    See the method [llvm::ArrayType::getNumElements]. *)
+val array_length : lltype -> int
+
+(** [address_space pty] returns the address space qualifier of the pointer type
+    [pty]. See the method [llvm::PointerType::getAddressSpace]. *)
+val address_space : lltype -> int
+
+(** [element_type ty] returns the element count of the vector type [ty].
+    See the method [llvm::VectorType::getNumElements]. *)
+val vector_size : lltype -> int
+
+
+(** {7 Operations on other types} *)
+
+(** [opaque_type c] creates a new opaque type distinct from any other in the
+    context [c]. Opaque types are useful for building recursive types in
+    combination with {!refine_type}. See [llvm::OpaqueType::get]. *)
+val opaque_type : llcontext -> lltype
+
+(** [void_type c] creates a type of a function which does not return any
+    value in the context [c]. See [llvm::Type::VoidTy]. *)
+val void_type : llcontext -> lltype
+
+(** [label_type c] creates a type of a basic block in the context [c]. See
+    [llvm::Type::LabelTy]. *)
+val label_type : llcontext -> lltype
+
+(** {7 Operations on type handles} *)
+
+(** [handle_to_type ty] creates a handle to the type [ty]. If [ty] is later
+    refined as a result of a call to {!refine_type}, the handle will be updated;
+    any bare [lltype] references will become invalid.
+    See the class [llvm::PATypeHolder]. *)
+val handle_to_type : lltype -> lltypehandle
+
+(** [type_of_handle tyh] resolves the type handle [tyh].
+    See the method [llvm::PATypeHolder::get()]. *)
+val type_of_handle : lltypehandle -> lltype
+
+(** [refine_type opaque_ty ty] replaces the abstract type [opaque_ty] with the
+    concrete type [ty] in all users. Warning: This may invalidate {!lltype}
+    values! Use {!lltypehandle} to manipulate potentially abstract types. See
+    the method [llvm::Type::refineAbstractType]. *)
+val refine_type : lltype -> lltype -> unit
+
+
+(* {6 Values} *)
+
+(** [type_of v] returns the type of the value [v].
+    See the method [llvm::Value::getType]. *)
+val type_of : llvalue -> lltype
+
+(** [value_name v] returns the name of the value [v]. For global values, this is
+    the symbol name. For instructions and basic blocks, it is the SSA register
+    name. It is meaningless for constants.
+    See the method [llvm::Value::getName]. *)
+val value_name : llvalue -> string
+
+(** [set_value_name n v] sets the name of the value [v] to [n]. See the method
+    [llvm::Value::setName]. *)
+val set_value_name : string -> llvalue -> unit
+
+(** [dump_value v] prints the .ll representation of the value [v] to standard
+    error. See the method [llvm::Value::dump]. *)
+val dump_value : llvalue -> unit
+
+(** [replace_all_uses_with old new] replaces all uses of the value [old]
+ * with the value [new]. See the method [llvm::Value::replaceAllUsesWith]. *)
+val replace_all_uses_with : llvalue -> llvalue -> unit
+
+
+
+(* {6 Uses} *)
+
+(** [use_begin v] returns the first position in the use list for the value [v].
+    [use_begin] and [use_succ] can e used to iterate over the use list in order.
+    See the method [llvm::Value::use_begin]. *)
+val use_begin : llvalue -> lluse option
+
+(** [use_succ u] returns the use list position succeeding [u].
+    See the method [llvm::use_value_iterator::operator++]. *)
+val use_succ : lluse -> lluse option
+
+(** [user u] returns the user of the use [u].
+    See the method [llvm::Use::getUser]. *)
+val user : lluse -> llvalue
+
+(** [used_value u] returns the usee of the use [u].
+    See the method [llvm::Use::getUsedValue]. *)
+val used_value : lluse -> llvalue
+
+(** [iter_uses f v] applies function [f] to each of the users of the value [v]
+    in order. Tail recursive. *)
+val iter_uses : (lluse -> unit) -> llvalue -> unit
+
+(** [fold_left_uses f init v] is [f (... (f init u1) ...) uN] where
+    [u1,...,uN] are the users of the value [v]. Tail recursive. *)
+val fold_left_uses : ('a -> lluse -> 'a) -> 'a -> llvalue -> 'a
+
+(** [fold_right_uses f v init] is [f u1 (... (f uN init) ...)] where
+    [u1,...,uN] are the users of the value [v]. Not tail recursive. *)
+val fold_right_uses : (lluse -> 'a -> 'a) -> llvalue -> 'a -> 'a
+
+
+(* {6 Users} *)
+
+(** [operand v i] returns the operand at index [i] for the value [v]. See the
+    method [llvm::User::getOperand]. *)
+val operand : llvalue -> int -> llvalue
+
+(** [set_operand v i o] sets the operand of the value [v] at the index [i] to
+    the value [o].
+    See the method [llvm::User::setOperand]. *)
+val set_operand : llvalue -> int -> llvalue -> unit
+
+(** [num_operands v] returns the number of operands for the value [v].
+    See the method [llvm::User::getNumOperands]. *)
+val num_operands : llvalue -> int
+
+(** {7 Operations on constants of (mostly) any type} *)
+
+(** [is_constant v] returns [true] if the value [v] is a constant, [false]
+    otherwise. Similar to [llvm::isa<Constant>]. *)
+val is_constant : llvalue -> bool
+
+(** [const_null ty] returns the constant null (zero) of the type [ty].
+    See the method [llvm::Constant::getNullValue]. *)
+val const_null : lltype -> llvalue
+
+(** [const_all_ones ty] returns the constant '-1' of the integer or vector type
+    [ty]. See the method [llvm::Constant::getAllOnesValue]. *)
+val const_all_ones : (*int|vec*)lltype -> llvalue
+
+(** [const_pointer_null ty] returns the constant null (zero) pointer of the type
+    [ty]. See the method [llvm::ConstantPointerNull::get]. *)
+val const_pointer_null : lltype -> llvalue
+
+(** [undef ty] returns the undefined value of the type [ty].
+    See the method [llvm::UndefValue::get]. *)
+val undef : lltype -> llvalue
+
+(** [is_null v] returns [true] if the value [v] is the null (zero) value.
+    See the method [llvm::Constant::isNullValue]. *)
+val is_null : llvalue -> bool
+
+(** [is_undef v] returns [true] if the value [v] is an undefined value, [false]
+    otherwise. Similar to [llvm::isa<UndefValue>]. *)
+val is_undef : llvalue -> bool
+
+
+(** {7 Operations on instructions} *)
+
+(** [has_metadata i] returns whether or not the instruction [i] has any
+    metadata attached to it. See the function
+    [llvm::Instruction::hasMetadata]. *)
+val has_metadata : llvalue -> bool
+
+(** [metadata i kind] optionally returns the metadata associated with the
+    kind [kind] in the instruction [i] See the function
+    [llvm::Instruction::getMetadata]. *)
+val metadata : llvalue -> int -> llvalue option
+
+(** [set_metadata i kind md] sets the metadata [md] of kind [kind] in the
+    instruction [i]. See the function [llvm::Instruction::setMetadata]. *)
+val set_metadata : llvalue -> int -> llvalue -> unit
+
+(** [clear_metadata i kind] clears the metadata of kind [kind] in the
+    instruction [i]. See the function [llvm::Instruction::setMetadata]. *)
+val clear_metadata : llvalue -> int -> unit
+
+
+(** {7 Operations on metadata} *)
+
+(** [mdstring c s] returns the MDString of the string [s] in the context [c].
+    See the method [llvm::MDNode::get]. *)
+val mdstring : llcontext -> string -> llvalue
+
+(** [mdnode c elts] returns the MDNode containing the values [elts] in the
+    context [c].
+    See the method [llvm::MDNode::get]. *)
+val mdnode : llcontext -> llvalue array -> llvalue
+
+
+(** {7 Operations on scalar constants} *)
+
+(** [const_int ty i] returns the integer constant of type [ty] and value [i].
+    See the method [llvm::ConstantInt::get]. *)
+val const_int : lltype -> int -> llvalue
+
+(** [const_of_int64 ty i] returns the integer constant of type [ty] and value
+    [i]. See the method [llvm::ConstantInt::get]. *)
+val const_of_int64 : lltype -> Int64.t -> bool -> llvalue
+
+
+(** [const_int_of_string ty s r] returns the integer constant of type [ty] and
+ * value [s], with the radix [r]. See the method [llvm::ConstantInt::get]. *)
+val const_int_of_string : lltype -> string -> int -> llvalue
+
+
+(** [const_float ty n] returns the floating point constant of type [ty] and
+    value [n]. See the method [llvm::ConstantFP::get]. *)
+val const_float : lltype -> float -> llvalue
+
+(** [const_float_of_string ty s] returns the floating point constant of type
+    [ty] and value [n]. See the method [llvm::ConstantFP::get]. *)
+val const_float_of_string : lltype -> string -> llvalue
+
+
+
+(** {7 Operations on composite constants} *)
+
+(** [const_string c s] returns the constant [i8] array with the values of the
+    characters in the string [s] in the context [c]. The array is not 
+    null-terminated (but see {!const_stringz}). This value can in turn be used
+    as the initializer for a global variable. See the method
+    [llvm::ConstantArray::get]. *)
+val const_string : llcontext -> string -> llvalue
+
+(** [const_stringz c s] returns the constant [i8] array with the values of the
+    characters in the string [s] and a null terminator in the context [c]. This
+    value can in turn be used as the initializer for a global variable.
+    See the method [llvm::ConstantArray::get]. *)
+val const_stringz : llcontext -> string -> llvalue
+
+(** [const_array ty elts] returns the constant array of type
+    [array_type ty (Array.length elts)] and containing the values [elts].
+    This value can in turn be used as the initializer for a global variable.
+    See the method [llvm::ConstantArray::get]. *)
+val const_array : lltype -> llvalue array -> llvalue
+
+(** [const_struct context elts] returns the structured constant of type
+    [struct_type (Array.map type_of elts)] and containing the values [elts]
+    in the context [context]. This value can in turn be used as the initializer
+    for a global variable. See the method [llvm::ConstantStruct::get]. *)
+val const_struct : llcontext -> llvalue array -> llvalue
+
+
+(** [const_packed_struct context elts] returns the structured constant of
+    type {!packed_struct_type} [(Array.map type_of elts)] and containing the
+    values [elts] in the context [context]. This value can in turn be used as
+    the initializer for a global variable. See the method
+    [llvm::ConstantStruct::get]. *)
+val const_packed_struct : llcontext -> llvalue array -> llvalue
+
+
+(** [const_vector elts] returns the vector constant of type
+    [vector_type (type_of elts.(0)) (Array.length elts)] and containing the
+    values [elts]. See the method [llvm::ConstantVector::get]. *)
+val const_vector : llvalue array -> llvalue
+
+
+(** {7 Constant expressions} *)
+
+(** [align_of ty] returns the alignof constant for the type [ty]. This is
+    equivalent to [const_ptrtoint (const_gep (const_null (pointer_type {i8,ty}))
+    (const_int i32_type 0) (const_int i32_type 1)) i32_type], but considerably
+    more readable.  See the method [llvm::ConstantExpr::getAlignOf]. *)
+val align_of : lltype -> llvalue
+
+(** [size_of ty] returns the sizeof constant for the type [ty]. This is
+    equivalent to [const_ptrtoint (const_gep (const_null (pointer_type ty))
+    (const_int i32_type 1)) i64_type], but considerably more readable.
+    See the method [llvm::ConstantExpr::getSizeOf]. *)
+val size_of : lltype -> llvalue
+
+(** [const_neg c] returns the arithmetic negation of the constant [c].
+    See the method [llvm::ConstantExpr::getNeg]. *)
+val const_neg : llvalue -> llvalue
+
+(** [const_nsw_neg c] returns the arithmetic negation of the constant [c] with
+    no signed wrapping. The result is undefined if the negation overflows.
+    See the method [llvm::ConstantExpr::getNSWNeg]. *)
+val const_nsw_neg : llvalue -> llvalue
+
+(** [const_nuw_neg c] returns the arithmetic negation of the constant [c] with
+    no unsigned wrapping. The result is undefined if the negation overflows.
+    See the method [llvm::ConstantExpr::getNUWNeg]. *)
+val const_nuw_neg : llvalue -> llvalue
+
+(** [const_fneg c] returns the arithmetic negation of the constant float [c].
+    See the method [llvm::ConstantExpr::getFNeg]. *)
+val const_fneg : llvalue -> llvalue
+
+(** [const_not c] returns the bitwise inverse of the constant [c].
+    See the method [llvm::ConstantExpr::getNot]. *)
+val const_not : llvalue -> llvalue
+
+(** [const_add c1 c2] returns the constant sum of two constants.
+    See the method [llvm::ConstantExpr::getAdd]. *)
+val const_add : llvalue -> llvalue -> llvalue
+
+(** [const_nsw_add c1 c2] returns the constant sum of two constants with no
+    signed wrapping. The result is undefined if the sum overflows.
+    See the method [llvm::ConstantExpr::getNSWAdd]. *)
+val const_nsw_add : llvalue -> llvalue -> llvalue
+
+(** [const_nuw_add c1 c2] returns the constant sum of two constants with no
+    unsigned wrapping. The result is undefined if the sum overflows.
+    See the method [llvm::ConstantExpr::getNSWAdd]. *)
+val const_nuw_add : llvalue -> llvalue -> llvalue
+
+(** [const_fadd c1 c2] returns the constant sum of two constant floats.
+    See the method [llvm::ConstantExpr::getFAdd]. *)
+val const_fadd : llvalue -> llvalue -> llvalue
+
+(** [const_sub c1 c2] returns the constant difference, [c1 - c2], of two
+    constants. See the method [llvm::ConstantExpr::getSub]. *)
+val const_sub : llvalue -> llvalue -> llvalue
+
+(** [const_nsw_sub c1 c2] returns the constant difference of two constants with
+    no signed wrapping. The result is undefined if the sum overflows.
+    See the method [llvm::ConstantExpr::getNSWSub]. *)
+val const_nsw_sub : llvalue -> llvalue -> llvalue
+
+(** [const_nuw_sub c1 c2] returns the constant difference of two constants with
+    no unsigned wrapping. The result is undefined if the sum overflows.
+    See the method [llvm::ConstantExpr::getNSWSub]. *)
+val const_nuw_sub : llvalue -> llvalue -> llvalue
+
+(** [const_fsub c1 c2] returns the constant difference, [c1 - c2], of two
+    constant floats. See the method [llvm::ConstantExpr::getFSub]. *)
+val const_fsub : llvalue -> llvalue -> llvalue
+
+(** [const_mul c1 c2] returns the constant product of two constants.
+    See the method [llvm::ConstantExpr::getMul]. *)
+val const_mul : llvalue -> llvalue -> llvalue
+
+(** [const_nsw_mul c1 c2] returns the constant product of two constants with
+    no signed wrapping. The result is undefined if the sum overflows.
+    See the method [llvm::ConstantExpr::getNSWMul]. *)
+val const_nsw_mul : llvalue -> llvalue -> llvalue
+
+(** [const_nuw_mul c1 c2] returns the constant product of two constants with
+    no unsigned wrapping. The result is undefined if the sum overflows.
+    See the method [llvm::ConstantExpr::getNSWMul]. *)
+val const_nuw_mul : llvalue -> llvalue -> llvalue
+
+(** [const_fmul c1 c2] returns the constant product of two constants floats.
+    See the method [llvm::ConstantExpr::getFMul]. *)
+val const_fmul : llvalue -> llvalue -> llvalue
+
+(** [const_udiv c1 c2] returns the constant quotient [c1 / c2] of two unsigned
+    integer constants.
+    See the method [llvm::ConstantExpr::getUDiv]. *)
+val const_udiv : llvalue -> llvalue -> llvalue
+
+(** [const_sdiv c1 c2] returns the constant quotient [c1 / c2] of two signed
+    integer constants.
+    See the method [llvm::ConstantExpr::getSDiv]. *)
+val const_sdiv : llvalue -> llvalue -> llvalue
+
+(** [const_exact_sdiv c1 c2] returns the constant quotient [c1 / c2] of two
+    signed integer constants. The result is undefined if the result is rounded
+    or overflows. See the method [llvm::ConstantExpr::getExactSDiv]. *)
+val const_exact_sdiv : llvalue -> llvalue -> llvalue
+
+(** [const_fdiv c1 c2] returns the constant quotient [c1 / c2] of two floating
+    point constants.
+    See the method [llvm::ConstantExpr::getFDiv]. *)
+val const_fdiv : llvalue -> llvalue -> llvalue
+
+(** [const_urem c1 c2] returns the constant remainder [c1 MOD c2] of two
+    unsigned integer constants.
+    See the method [llvm::ConstantExpr::getURem]. *)
+val const_urem : llvalue -> llvalue -> llvalue
+
+(** [const_srem c1 c2] returns the constant remainder [c1 MOD c2] of two
+    signed integer constants.
+    See the method [llvm::ConstantExpr::getSRem]. *)
+val const_srem : llvalue -> llvalue -> llvalue
+
+(** [const_frem c1 c2] returns the constant remainder [c1 MOD c2] of two
+    signed floating point constants.
+    See the method [llvm::ConstantExpr::getFRem]. *)
+val const_frem : llvalue -> llvalue -> llvalue
+
+(** [const_and c1 c2] returns the constant bitwise [AND] of two integer
+    constants.
+    See the method [llvm::ConstantExpr::getAnd]. *)
+val const_and : llvalue -> llvalue -> llvalue
+
+(** [const_or c1 c2] returns the constant bitwise [OR] of two integer
+    constants.
+    See the method [llvm::ConstantExpr::getOr]. *)
+val const_or : llvalue -> llvalue -> llvalue
+
+(** [const_xor c1 c2] returns the constant bitwise [XOR] of two integer
+    constants.
+    See the method [llvm::ConstantExpr::getXor]. *)
+val const_xor : llvalue -> llvalue -> llvalue
+
+(** [const_icmp pred c1 c2] returns the constant comparison of two integer
+    constants, [c1 pred c2].
+    See the method [llvm::ConstantExpr::getICmp]. *)
+val const_icmp : Icmp.t -> llvalue -> llvalue -> llvalue
+
+
+(** [const_fcmp pred c1 c2] returns the constant comparison of two floating
+    point constants, [c1 pred c2].
+    See the method [llvm::ConstantExpr::getFCmp]. *)
+val const_fcmp : Fcmp.t -> llvalue -> llvalue -> llvalue
+
+
+(** [const_shl c1 c2] returns the constant integer [c1] left-shifted by the
+    constant integer [c2].
+    See the method [llvm::ConstantExpr::getShl]. *)
+val const_shl : llvalue -> llvalue -> llvalue
+
+(** [const_lshr c1 c2] returns the constant integer [c1] right-shifted by the
+    constant integer [c2] with zero extension.
+    See the method [llvm::ConstantExpr::getLShr]. *)
+val const_lshr : llvalue -> llvalue -> llvalue
+
+(** [const_ashr c1 c2] returns the constant integer [c1] right-shifted by the
+    constant integer [c2] with sign extension.
+    See the method [llvm::ConstantExpr::getAShr]. *)
+val const_ashr : llvalue -> llvalue -> llvalue
+
+(** [const_gep pc indices] returns the constant [getElementPtr] of [p1] with the
+    constant integers indices from the array [indices].
+    See the method [llvm::ConstantExpr::getGetElementPtr]. *)
+val const_gep : llvalue -> llvalue array -> llvalue
+
+(** [const_in_bounds_gep pc indices] returns the constant [getElementPtr] of [p1]
+    with the constant integers indices from the array [indices].
+    See the method [llvm::ConstantExpr::getInBoundsGetElementPtr]. *)
+val const_in_bounds_gep : llvalue -> llvalue array -> llvalue
+
+
+(** [const_trunc c ty] returns the constant truncation of integer constant [c]
+    to the smaller integer type [ty].
+    See the method [llvm::ConstantExpr::getTrunc]. *)
+val const_trunc : llvalue -> lltype -> llvalue
+
+(** [const_sext c ty] returns the constant sign extension of integer constant
+    [c] to the larger integer type [ty].
+    See the method [llvm::ConstantExpr::getSExt]. *)
+val const_sext : llvalue -> lltype -> llvalue
+
+(** [const_zext c ty] returns the constant zero extension of integer constant
+    [c] to the larger integer type [ty].
+    See the method [llvm::ConstantExpr::getZExt]. *)
+val const_zext : llvalue -> lltype -> llvalue
+
+(** [const_fptrunc c ty] returns the constant truncation of floating point
+    constant [c] to the smaller floating point type [ty].
+    See the method [llvm::ConstantExpr::getFPTrunc]. *)
+val const_fptrunc : llvalue -> lltype -> llvalue
+
+(** [const_fpext c ty] returns the constant extension of floating point constant
+    [c] to the larger floating point type [ty].
+    See the method [llvm::ConstantExpr::getFPExt]. *)
+val const_fpext : llvalue -> lltype -> llvalue
+
+(** [const_uitofp c ty] returns the constant floating point conversion of
+    unsigned integer constant [c] to the floating point type [ty].
+    See the method [llvm::ConstantExpr::getUIToFP]. *)
+val const_uitofp : llvalue -> lltype -> llvalue
+
+(** [const_sitofp c ty] returns the constant floating point conversion of
+    signed integer constant [c] to the floating point type [ty].
+    See the method [llvm::ConstantExpr::getSIToFP]. *)
+val const_sitofp : llvalue -> lltype -> llvalue
+
+(** [const_fptoui c ty] returns the constant unsigned integer conversion of
+    floating point constant [c] to integer type [ty].
+    See the method [llvm::ConstantExpr::getFPToUI]. *)
+val const_fptoui : llvalue -> lltype -> llvalue
+
+(** [const_fptoui c ty] returns the constant unsigned integer conversion of
+    floating point constant [c] to integer type [ty].
+    See the method [llvm::ConstantExpr::getFPToSI]. *)
+val const_fptosi : llvalue -> lltype -> llvalue
+
+(** [const_ptrtoint c ty] returns the constant integer conversion of
+    pointer constant [c] to integer type [ty].
+    See the method [llvm::ConstantExpr::getPtrToInt]. *)
+val const_ptrtoint : llvalue -> lltype -> llvalue
+
+(** [const_inttoptr c ty] returns the constant pointer conversion of
+    integer constant [c] to pointer type [ty].
+    See the method [llvm::ConstantExpr::getIntToPtr]. *)
+val const_inttoptr : llvalue -> lltype -> llvalue
+
+(** [const_bitcast c ty] returns the constant bitwise conversion of constant [c]
+    to type [ty] of equal size.
+    See the method [llvm::ConstantExpr::getBitCast]. *)
+val const_bitcast : llvalue -> lltype -> llvalue
+
+(** [const_zext_or_bitcast c ty] returns a constant zext or bitwise cast
+    conversion of constant [c] to type [ty].
+    See the method [llvm::ConstantExpr::getZExtOrBitCast]. *)
+val const_zext_or_bitcast : llvalue -> lltype -> llvalue
+
+
+(** [const_sext_or_bitcast c ty] returns a constant sext or bitwise cast
+    conversion of constant [c] to type [ty].
+    See the method [llvm::ConstantExpr::getSExtOrBitCast]. *)
+val const_sext_or_bitcast : llvalue -> lltype -> llvalue
+
+
+(** [const_trunc_or_bitcast c ty] returns a constant trunc or bitwise cast
+    conversion of constant [c] to type [ty].
+    See the method [llvm::ConstantExpr::getTruncOrBitCast]. *)
+val const_trunc_or_bitcast : llvalue -> lltype -> llvalue
+
+
+(** [const_pointercast c ty] returns a constant bitcast or a pointer-to-int
+    cast conversion of constant [c] to type [ty] of equal size.
+    See the method [llvm::ConstantExpr::getPointerCast]. *)
+val const_pointercast : llvalue -> lltype -> llvalue
+
+
+(** [const_intcast c ty] returns a constant zext, bitcast, or trunc for integer
+    -> integer casts of constant [c] to type [ty].
+    See the method [llvm::ConstantExpr::getIntCast]. *)
+val const_intcast : llvalue -> lltype -> llvalue
+
+
+(** [const_fpcast c ty] returns a constant fpext, bitcast, or fptrunc for fp ->
+    fp casts of constant [c] to type [ty].
+    See the method [llvm::ConstantExpr::getFPCast]. *)
+val const_fpcast : llvalue -> lltype -> llvalue
+
+
+(** [const_select cond t f] returns the constant conditional which returns value
+    [t] if the boolean constant [cond] is true and the value [f] otherwise.
+    See the method [llvm::ConstantExpr::getSelect]. *)
+val const_select : llvalue -> llvalue -> llvalue -> llvalue
+
+
+(** [const_extractelement vec i] returns the constant [i]th element of
+    constant vector [vec]. [i] must be a constant [i32] value unsigned less than
+    the size of the vector.
+    See the method [llvm::ConstantExpr::getExtractElement]. *)
+val const_extractelement : llvalue -> llvalue -> llvalue
+
+
+(** [const_insertelement vec v i] returns the constant vector with the same
+    elements as constant vector [v] but the [i]th element replaced by the
+    constant [v]. [v] must be a constant value with the type of the vector
+    elements. [i] must be a constant [i32] value unsigned less than the size
+    of the vector.
+    See the method [llvm::ConstantExpr::getInsertElement]. *)
+val const_insertelement : llvalue -> llvalue -> llvalue -> llvalue
+
+
+(** [const_shufflevector a b mask] returns a constant [shufflevector].
+    See the LLVM Language Reference for details on the [shufflevector]
+    instruction.
+    See the method [llvm::ConstantExpr::getShuffleVector]. *)
+val const_shufflevector : llvalue -> llvalue -> llvalue -> llvalue
+
+
+(** [const_extractvalue agg idxs] returns the constant [idxs]th value of
+    constant aggregate [agg]. Each [idxs] must be less than the size of the
+    aggregate.  See the method [llvm::ConstantExpr::getExtractValue]. *)
+val const_extractvalue : llvalue -> int array -> llvalue
+
+
+(** [const_insertvalue agg val idxs] inserts the value [val] in the specified
+    indexs [idxs] in the aggegate [agg]. Each [idxs] must be less than the size
+    of the aggregate. See the method [llvm::ConstantExpr::getInsertValue]. *)
+val const_insertvalue : llvalue -> llvalue -> int array -> llvalue
+
+
+(** [const_inline_asm ty asm con side align] inserts a inline assembly string.
+    See the method [llvm::InlineAsm::get]. *)
+val const_inline_asm : lltype -> string -> string -> bool -> bool ->
+                            llvalue
+
+
+(** [block_address f bb] returns the address of the basic block [bb] in the
+    function [f]. See the method [llvm::BasicBlock::get]. *)
+val block_address : llvalue -> llbasicblock -> llvalue
+
+
+(** {7 Operations on global variables, functions, and aliases (globals)} *)
+
+(** [global_parent g] is the enclosing module of the global value [g].
+    See the method [llvm::GlobalValue::getParent]. *)
+val global_parent : llvalue -> llmodule
+
+(** [is_declaration g] returns [true] if the global value [g] is a declaration
+    only. Returns [false] otherwise.
+    See the method [llvm::GlobalValue::isDeclaration]. *)
+val is_declaration : llvalue -> bool
+
+(** [linkage g] returns the linkage of the global value [g].
+    See the method [llvm::GlobalValue::getLinkage]. *)
+val linkage : llvalue -> Linkage.t
+
+(** [set_linkage l g] sets the linkage of the global value [g] to [l].
+    See the method [llvm::GlobalValue::setLinkage]. *)
+val set_linkage : Linkage.t -> llvalue -> unit
+
+(** [section g] returns the linker section of the global value [g].
+    See the method [llvm::GlobalValue::getSection]. *)
+val section : llvalue -> string
+
+(** [set_section s g] sets the linker section of the global value [g] to [s].
+    See the method [llvm::GlobalValue::setSection]. *)
+val set_section : string -> llvalue -> unit
+
+(** [visibility g] returns the linker visibility of the global value [g].
+    See the method [llvm::GlobalValue::getVisibility]. *)
+val visibility : llvalue -> Visibility.t
+
+(** [set_visibility v g] sets the linker visibility of the global value [g] to
+    [v]. See the method [llvm::GlobalValue::setVisibility]. *)
+val set_visibility : Visibility.t -> llvalue -> unit
+
+
+(** [alignment g] returns the required alignment of the global value [g].
+    See the method [llvm::GlobalValue::getAlignment]. *)
+val alignment : llvalue -> int
+
+(** [set_alignment n g] sets the required alignment of the global value [g] to
+    [n] bytes. See the method [llvm::GlobalValue::setAlignment]. *)
+val set_alignment : int -> llvalue -> unit
+
+
+(** {7 Operations on global variables} *)
+
+(** [declare_global ty name m] returns a new global variable of type [ty] and
+    with name [name] in module [m] in the default address space (0). If such a
+    global variable already exists, it is returned. If the type of the existing
+    global differs, then a bitcast to [ty] is returned. *)
+val declare_global : lltype -> string -> llmodule -> llvalue
+
+
+(** [declare_qualified_global ty name addrspace m] returns a new global variable
+    of type [ty] and with name [name] in module [m] in the address space
+    [addrspace]. If such a global variable already exists, it is returned. If
+    the type of the existing global differs, then a bitcast to [ty] is
+    returned. *)
+val declare_qualified_global : lltype -> string -> int -> llmodule ->
+                                    llvalue
+
+
+(** [define_global name init m] returns a new global with name [name] and
+    initializer [init] in module [m] in the default address space (0). If the
+    named global already exists, it is renamed.
+    See the constructor of [llvm::GlobalVariable]. *)
+val define_global : string -> llvalue -> llmodule -> llvalue
+
+
+(** [define_qualified_global name init addrspace m] returns a new global with
+    name [name] and initializer [init] in module [m] in the address space
+    [addrspace]. If the named global already exists, it is renamed.
+    See the constructor of [llvm::GlobalVariable]. *)
+val define_qualified_global : string -> llvalue -> int -> llmodule ->
+                                   llvalue
+
+
+(** [lookup_global name m] returns [Some g] if a global variable with name
+    [name] exists in module [m]. If no such global exists, returns [None].
+    See the [llvm::GlobalVariable] constructor. *)
+val lookup_global : string -> llmodule -> llvalue option
+
+
+(** [delete_global gv] destroys the global variable [gv].
+    See the method [llvm::GlobalVariable::eraseFromParent]. *)
+val delete_global : llvalue -> unit
+
+(** [global_begin m] returns the first position in the global variable list of
+    the module [m]. [global_begin] and [global_succ] can be used to iterate
+    over the global list in order.
+    See the method [llvm::Module::global_begin]. *)
+val global_begin : llmodule -> (llmodule, llvalue) llpos
+
+
+(** [global_succ gv] returns the global variable list position succeeding
+    [Before gv].
+    See the method [llvm::Module::global_iterator::operator++]. *)
+val global_succ : llvalue -> (llmodule, llvalue) llpos
+
+
+(** [iter_globals f m] applies function [f] to each of the global variables of
+    module [m] in order. Tail recursive. *)
+val iter_globals : (llvalue -> unit) -> llmodule -> unit
+
+(** [fold_left_globals f init m] is [f (... (f init g1) ...) gN] where
+    [g1,...,gN] are the global variables of module [m]. Tail recursive. *)
+val fold_left_globals : ('a -> llvalue -> 'a) -> 'a -> llmodule -> 'a
+
+(** [global_end m] returns the last position in the global variable list of the
+    module [m]. [global_end] and [global_pred] can be used to iterate over the
+    global list in reverse.
+    See the method [llvm::Module::global_end]. *)
+val global_end : llmodule -> (llmodule, llvalue) llrev_pos
+
+
+(** [global_pred gv] returns the global variable list position preceding
+    [After gv].
+    See the method [llvm::Module::global_iterator::operator--]. *)
+val global_pred : llvalue -> (llmodule, llvalue) llrev_pos
+
+
+(** [rev_iter_globals f m] applies function [f] to each of the global variables
+    of module [m] in reverse order. Tail recursive. *)
+val rev_iter_globals : (llvalue -> unit) -> llmodule -> unit
+
+(** [fold_right_globals f m init] is [f g1 (... (f gN init) ...)] where
+    [g1,...,gN] are the global variables of module [m]. Tail recursive. *)
+val fold_right_globals : (llvalue -> 'a -> 'a) -> llmodule -> 'a -> 'a
+
+(** [is_global_constant gv] returns [true] if the global variabile [gv] is a
+    constant. Returns [false] otherwise.
+    See the method [llvm::GlobalVariable::isConstant]. *)
+val is_global_constant : llvalue -> bool
+
+(** [set_global_constant c gv] sets the global variable [gv] to be a constant if
+    [c] is [true] and not if [c] is [false].
+    See the method [llvm::GlobalVariable::setConstant]. *)
+val set_global_constant : bool -> llvalue -> unit
+
+
+(** [global_initializer gv] returns the initializer for the global variable
+    [gv]. See the method [llvm::GlobalVariable::getInitializer]. *)
+val global_initializer : llvalue -> llvalue
+
+(** [set_initializer c gv] sets the initializer for the global variable
+    [gv] to the constant [c].
+    See the method [llvm::GlobalVariable::setInitializer]. *)
+val set_initializer : llvalue -> llvalue -> unit
+
+(** [remove_initializer gv] unsets the initializer for the global variable
+    [gv].
+    See the method [llvm::GlobalVariable::setInitializer]. *)
+val remove_initializer : llvalue -> unit
+
+(** [is_thread_local gv] returns [true] if the global variable [gv] is
+    thread-local and [false] otherwise.
+    See the method [llvm::GlobalVariable::isThreadLocal]. *)
+val is_thread_local : llvalue -> bool
+
+(** [set_thread_local c gv] sets the global variable [gv] to be thread local if
+    [c] is [true] and not otherwise.
+    See the method [llvm::GlobalVariable::setThreadLocal]. *)
+val set_thread_local : bool -> llvalue -> unit
+
+
+(** {7 Operations on aliases} *)
+
+(** [add_alias m t a n] inserts an alias in the module [m] with the type [t] and
+    the aliasee [a] with the name [n].
+    See the constructor for [llvm::GlobalAlias]. *)
+val add_alias : llmodule -> lltype -> llvalue -> string -> llvalue
+
+
+
+(** {7 Operations on functions} *)
+
+(** [declare_function name ty m] returns a new function of type [ty] and
+    with name [name] in module [m]. If such a function already exists,
+    it is returned. If the type of the existing function differs, then a bitcast
+    to [ty] is returned. *)
+val declare_function : string -> lltype -> llmodule -> llvalue
+
+
+(** [define_function name ty m] creates a new function with name [name] and
+    type [ty] in module [m]. If the named function already exists, it is
+    renamed. An entry basic block is created in the function.
+    See the constructor of [llvm::GlobalVariable]. *)
+val define_function : string -> lltype -> llmodule -> llvalue
+
+
+(** [lookup_function name m] returns [Some f] if a function with name
+    [name] exists in module [m]. If no such function exists, returns [None].
+    See the method [llvm::Module] constructor. *)
+val lookup_function : string -> llmodule -> llvalue option
+
+
+(** [delete_function f] destroys the function [f].
+    See the method [llvm::Function::eraseFromParent]. *)
+val delete_function : llvalue -> unit
+
+(** [function_begin m] returns the first position in the function list of the
+    module [m]. [function_begin] and [function_succ] can be used to iterate over
+    the function list in order.
+    See the method [llvm::Module::begin]. *)
+val function_begin : llmodule -> (llmodule, llvalue) llpos
+
+
+(** [function_succ gv] returns the function list position succeeding
+    [Before gv].
+    See the method [llvm::Module::iterator::operator++]. *)
+val function_succ : llvalue -> (llmodule, llvalue) llpos
+
+
+(** [iter_functions f m] applies function [f] to each of the functions of module
+    [m] in order. Tail recursive. *)
+val iter_functions : (llvalue -> unit) -> llmodule -> unit
+
+(** [fold_left_function f init m] is [f (... (f init f1) ...) fN] where
+    [f1,...,fN] are the functions of module [m]. Tail recursive. *)
+val fold_left_functions : ('a -> llvalue -> 'a) -> 'a -> llmodule -> 'a
+
+(** [function_end m] returns the last position in the function list of
+    the module [m]. [function_end] and [function_pred] can be used to iterate
+    over the function list in reverse.
+    See the method [llvm::Module::end]. *)
+val function_end : llmodule -> (llmodule, llvalue) llrev_pos
+
+
+(** [function_pred gv] returns the function list position preceding [After gv].
+    See the method [llvm::Module::iterator::operator--]. *)
+val function_pred : llvalue -> (llmodule, llvalue) llrev_pos
+
+
+(** [rev_iter_functions f fn] applies function [f] to each of the functions of
+    module [m] in reverse order. Tail recursive. *)
+val rev_iter_functions : (llvalue -> unit) -> llmodule -> unit
+
+(** [fold_right_functions f m init] is [f (... (f init fN) ...) f1] where
+    [f1,...,fN] are the functions of module [m]. Tail recursive. *)
+val fold_right_functions : (llvalue -> 'a -> 'a) -> llmodule -> 'a -> 'a
+
+(** [is_intrinsic f] returns true if the function [f] is an intrinsic.
+    See the method [llvm::Function::isIntrinsic]. *)
+val is_intrinsic : llvalue -> bool
+
+(** [function_call_conv f] returns the calling convention of the function [f].
+    See the method [llvm::Function::getCallingConv]. *)
+val function_call_conv : llvalue -> int
+
+(** [set_function_call_conv cc f] sets the calling convention of the function
+    [f] to the calling convention numbered [cc].
+    See the method [llvm::Function::setCallingConv]. *)
+val set_function_call_conv : int -> llvalue -> unit
+
+
+(** [gc f] returns [Some name] if the function [f] has a garbage
+    collection algorithm specified and [None] otherwise.
+    See the method [llvm::Function::getGC]. *)
+val gc : llvalue -> string option
+
+(** [set_gc gc f] sets the collection algorithm for the function [f] to
+    [gc]. See the method [llvm::Function::setGC]. *)
+val set_gc : string option -> llvalue -> unit
+
+(** [add_function_attr f a] adds attribute [a] to the return type of function
+    [f]. *)
+val add_function_attr : llvalue -> Attribute.t -> unit
+
+(** [remove_function_attr f a] removes attribute [a] from the return type of
+    function [f]. *)
+val remove_function_attr : llvalue -> Attribute.t -> unit
+
+(** {7 Operations on params} *)
+
+(** [params f] returns the parameters of function [f].
+    See the method [llvm::Function::getArgumentList]. *)
+val params : llvalue -> llvalue array
+
+(** [param f n] returns the [n]th parameter of function [f].
+    See the method [llvm::Function::getArgumentList]. *)
+val param : llvalue -> int -> llvalue
+
+(** [param_parent p] returns the parent function that owns the parameter.
+    See the method [llvm::Argument::getParent]. *)
+val param_parent : llvalue -> llvalue
+
+(** [param_begin f] returns the first position in the parameter list of the
+    function [f]. [param_begin] and [param_succ] can be used to iterate over
+    the parameter list in order.
+    See the method [llvm::Function::arg_begin]. *)
+val param_begin : llvalue -> (llvalue, llvalue) llpos
+
+(** [param_succ bb] returns the parameter list position succeeding
+    [Before bb].
+    See the method [llvm::Function::arg_iterator::operator++]. *)
+val param_succ : llvalue -> (llvalue, llvalue) llpos
+
+(** [iter_params f fn] applies function [f] to each of the parameters
+    of function [fn] in order. Tail recursive. *)
+val iter_params : (llvalue -> unit) -> llvalue -> unit
+
+(** [fold_left_params f init fn] is [f (... (f init b1) ...) bN] where
+    [b1,...,bN] are the parameters of function [fn]. Tail recursive. *)
+val fold_left_params : ('a -> llvalue -> 'a) -> 'a -> llvalue -> 'a
+
+(** [param_end f] returns the last position in the parameter list of
+    the function [f]. [param_end] and [param_pred] can be used to iterate
+    over the parameter list in reverse.
+    See the method [llvm::Function::arg_end]. *)
+val param_end : llvalue -> (llvalue, llvalue) llrev_pos
+
+(** [param_pred gv] returns the function list position preceding [After gv].
+    See the method [llvm::Function::arg_iterator::operator--]. *)
+val param_pred : llvalue -> (llvalue, llvalue) llrev_pos
+
+
+(** [rev_iter_params f fn] applies function [f] to each of the parameters
+    of function [fn] in reverse order. Tail recursive. *)
+val rev_iter_params : (llvalue -> unit) -> llvalue -> unit
+
+(** [fold_right_params f fn init] is [f (... (f init bN) ...) b1] where
+    [b1,...,bN] are the parameters of function [fn]. Tail recursive. *)
+val fold_right_params : (llvalue -> 'a -> 'a) -> llvalue -> 'a -> 'a
+
+(** [add_param p a] adds attribute [a] to parameter [p]. *)
+val add_param_attr : llvalue -> Attribute.t -> unit
+
+(** [remove_param_attr p a] removes attribute [a] from parameter [p]. *)
+val remove_param_attr : llvalue -> Attribute.t -> unit
+
+(** [set_param_alignment p a] set the alignment of parameter [p] to [a]. *)
+val set_param_alignment : llvalue -> int -> unit
+
+
+(** {7 Operations on basic blocks} *)
+
+(** [basic_blocks fn] returns the basic blocks of the function [f].
+    See the method [llvm::Function::getBasicBlockList]. *)
+val basic_blocks : llvalue -> llbasicblock array
+
+(** [entry_block fn] returns the entry basic block of the function [f].
+    See the method [llvm::Function::getEntryBlock]. *)
+val entry_block : llvalue -> llbasicblock
+
+(** [delete_block bb] deletes the basic block [bb].
+    See the method [llvm::BasicBlock::eraseFromParent]. *)
+val delete_block : llbasicblock -> unit
+
+(** [append_block c name f] creates a new basic block named [name] at the end of
+    function [f] in the context [c].
+    See the constructor of [llvm::BasicBlock]. *)
+val append_block : llcontext -> string -> llvalue -> llbasicblock
+
+
+(** [insert_block c name bb] creates a new basic block named [name] before the
+    basic block [bb] in the context [c].
+    See the constructor of [llvm::BasicBlock]. *)
+val insert_block : llcontext -> string -> llbasicblock -> llbasicblock
+
+
+(** [block_parent bb] returns the parent function that owns the basic block.
+    See the method [llvm::BasicBlock::getParent]. *)
+val block_parent : llbasicblock -> llvalue
+
+(** [block_begin f] returns the first position in the basic block list of the
+    function [f]. [block_begin] and [block_succ] can be used to iterate over
+    the basic block list in order.
+    See the method [llvm::Function::begin]. *)
+val block_begin : llvalue -> (llvalue, llbasicblock) llpos
+
+
+(** [block_succ bb] returns the basic block list position succeeding
+    [Before bb].
+    See the method [llvm::Function::iterator::operator++]. *)
+val block_succ : llbasicblock -> (llvalue, llbasicblock) llpos
+
+
+(** [iter_blocks f fn] applies function [f] to each of the basic blocks
+    of function [fn] in order. Tail recursive. *)
+val iter_blocks : (llbasicblock -> unit) -> llvalue -> unit
+
+(** [fold_left_blocks f init fn] is [f (... (f init b1) ...) bN] where
+    [b1,...,bN] are the basic blocks of function [fn]. Tail recursive. *)
+val fold_left_blocks : ('a -> llbasicblock -> 'a) -> 'a -> llvalue -> 'a
+
+(** [block_end f] returns the last position in the basic block list of
+    the function [f]. [block_end] and [block_pred] can be used to iterate
+    over the basic block list in reverse.
+    See the method [llvm::Function::end]. *)
+val block_end : llvalue -> (llvalue, llbasicblock) llrev_pos
+
+
+(** [block_pred gv] returns the function list position preceding [After gv].
+    See the method [llvm::Function::iterator::operator--]. *)
+val block_pred : llbasicblock -> (llvalue, llbasicblock) llrev_pos
+
+
+(** [rev_iter_blocks f fn] applies function [f] to each of the basic blocks
+    of function [fn] in reverse order. Tail recursive. *)
+val rev_iter_blocks : (llbasicblock -> unit) -> llvalue -> unit
+
+(** [fold_right_blocks f fn init] is [f (... (f init bN) ...) b1] where
+    [b1,...,bN] are the basic blocks of function [fn]. Tail recursive. *)
+val fold_right_blocks : (llbasicblock -> 'a -> 'a) -> llvalue -> 'a -> 'a
+
+(** [value_of_block bb] losslessly casts [bb] to an [llvalue]. *)
+val value_of_block : llbasicblock -> llvalue
+
+(** [value_is_block v] returns [true] if the value [v] is a basic block and
+    [false] otherwise.
+    Similar to [llvm::isa<BasicBlock>]. *)
+val value_is_block : llvalue -> bool
+
+(** [block_of_value v] losslessly casts [v] to an [llbasicblock]. *)
+val block_of_value : llvalue -> llbasicblock
+
+
+(** {7 Operations on instructions} *)
+
+(** [instr_parent i] is the enclosing basic block of the instruction [i].
+    See the method [llvm::Instruction::getParent]. *)
+val instr_parent : llvalue -> llbasicblock
+
+(** [instr_begin bb] returns the first position in the instruction list of the
+    basic block [bb]. [instr_begin] and [instr_succ] can be used to iterate over
+    the instruction list in order.
+    See the method [llvm::BasicBlock::begin]. *)
+val instr_begin : llbasicblock -> (llbasicblock, llvalue) llpos
+
+
+(** [instr_succ i] returns the instruction list position succeeding [Before i].
+    See the method [llvm::BasicBlock::iterator::operator++]. *)
+val instr_succ : llvalue -> (llbasicblock, llvalue) llpos
+
+
+(** [iter_instrs f bb] applies function [f] to each of the instructions of basic
+    block [bb] in order. Tail recursive. *)
+val iter_instrs: (llvalue -> unit) -> llbasicblock -> unit
+
+(** [fold_left_instrs f init bb] is [f (... (f init g1) ...) gN] where
+    [g1,...,gN] are the instructions of basic block [bb]. Tail recursive. *)
+val fold_left_instrs: ('a -> llvalue -> 'a) -> 'a -> llbasicblock -> 'a
+
+(** [instr_end bb] returns the last position in the instruction list of the
+    basic block [bb]. [instr_end] and [instr_pred] can be used to iterate over
+    the instruction list in reverse.
+    See the method [llvm::BasicBlock::end]. *)
+val instr_end : llbasicblock -> (llbasicblock, llvalue) llrev_pos
+
+
+(** [instr_pred i] returns the instruction list position preceding [After i].
+    See the method [llvm::BasicBlock::iterator::operator--]. *)
+val instr_pred : llvalue -> (llbasicblock, llvalue) llrev_pos
+
+
+(** [fold_right_instrs f bb init] is [f (... (f init fN) ...) f1] where
+    [f1,...,fN] are the instructions of basic block [bb]. Tail recursive. *)
+val fold_right_instrs: (llvalue -> 'a -> 'a) -> llbasicblock -> 'a -> 'a
+
+
+(** {7 Operations on call sites} *)
+
+(** [instruction_call_conv ci] is the calling convention for the call or invoke
+    instruction [ci], which may be one of the values from the module
+    {!CallConv}. See the method [llvm::CallInst::getCallingConv] and
+    [llvm::InvokeInst::getCallingConv]. *)
+val instruction_call_conv: llvalue -> int
+
+
+(** [set_instruction_call_conv cc ci] sets the calling convention for the call
+    or invoke instruction [ci] to the integer [cc], which can be one of the
+    values from the module {!CallConv}.
+    See the method [llvm::CallInst::setCallingConv]
+    and [llvm::InvokeInst::setCallingConv]. *)
+val set_instruction_call_conv: int -> llvalue -> unit
+
+
+(** [add_instruction_param_attr ci i a] adds attribute [a] to the [i]th
+    parameter of the call or invoke instruction [ci]. [i]=0 denotes the return
+    value. *)
+val add_instruction_param_attr : llvalue -> int -> Attribute.t -> unit
+
+(** [remove_instruction_param_attr ci i a] removes attribute [a] from the
+    [i]th parameter of the call or invoke instruction [ci]. [i]=0 denotes the
+    return value. *)
+val remove_instruction_param_attr : llvalue -> int -> Attribute.t -> unit
+
+(** {Operations on call instructions (only)} *)
+
+(** [is_tail_call ci] is [true] if the call instruction [ci] is flagged as
+    eligible for tail call optimization, [false] otherwise.
+    See the method [llvm::CallInst::isTailCall]. *)
+val is_tail_call : llvalue -> bool
+
+(** [set_tail_call tc ci] flags the call instruction [ci] as eligible for tail
+    call optimization if [tc] is [true], clears otherwise.
+    See the method [llvm::CallInst::setTailCall]. *)
+val set_tail_call : bool -> llvalue -> unit
+
+(** {7 Operations on phi nodes} *)
+
+(** [add_incoming (v, bb) pn] adds the value [v] to the phi node [pn] for use
+    with branches from [bb]. See the method [llvm::PHINode::addIncoming]. *)
+val add_incoming : (llvalue * llbasicblock) -> llvalue -> unit
+
+
+(** [incoming pn] returns the list of value-block pairs for phi node [pn].
+    See the method [llvm::PHINode::getIncomingValue]. *)
+val incoming : llvalue -> (llvalue * llbasicblock) list
+
+
+
+(** {6 Instruction builders} *)
+
+(** [builder context] creates an instruction builder with no position in
+    the context [context]. It is invalid to use this builder until its position
+    is set with {!position_before} or {!position_at_end}. See the constructor
+    for [llvm::LLVMBuilder]. *)
+val builder : llcontext -> llbuilder
+
+(** [builder_at ip] creates an instruction builder positioned at [ip].
+    See the constructor for [llvm::LLVMBuilder]. *)
+val builder_at : llcontext -> (llbasicblock, llvalue) llpos -> llbuilder
+
+(** [builder_before ins] creates an instruction builder positioned before the
+    instruction [isn]. See the constructor for [llvm::LLVMBuilder]. *)
+val builder_before : llcontext -> llvalue -> llbuilder
+
+(** [builder_at_end bb] creates an instruction builder positioned at the end of
+    the basic block [bb]. See the constructor for [llvm::LLVMBuilder]. *)
+val builder_at_end : llcontext -> llbasicblock -> llbuilder
+
+(** [position_builder ip bb] moves the instruction builder [bb] to the position
+    [ip].
+    See the constructor for [llvm::LLVMBuilder]. *)
+val position_builder : (llbasicblock, llvalue) llpos -> llbuilder -> unit
+
+
+(** [position_before ins b] moves the instruction builder [b] to before the
+    instruction [isn]. See the method [llvm::LLVMBuilder::SetInsertPoint]. *)
+val position_before : llvalue -> llbuilder -> unit
+
+(** [position_at_end bb b] moves the instruction builder [b] to the end of the
+    basic block [bb]. See the method [llvm::LLVMBuilder::SetInsertPoint]. *)
+val position_at_end : llbasicblock -> llbuilder -> unit
+
+(** [insertion_block b] returns the basic block that the builder [b] is
+    positioned to insert into. Raises [Not_Found] if the instruction builder is
+    uninitialized.
+    See the method [llvm::LLVMBuilder::GetInsertBlock]. *)
+val insertion_block : llbuilder -> llbasicblock
+
+(** [insert_into_builder i name b] inserts the specified instruction [i] at the
+    position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::Insert]. *)
+val insert_into_builder : llvalue -> string -> llbuilder -> unit
+
+
+(** {7 Metadata} *)
+
+(** [set_current_debug_location b md] sets the current debug location [md] in
+    the builder [b].
+    See the method [llvm::IRBuilder::SetDebugLocation]. *)
+val set_current_debug_location : llbuilder -> llvalue -> unit
+
+
+(** [clear_current_debug_location b] clears the current debug location in the
+    builder [b]. *)
+val clear_current_debug_location : llbuilder -> unit
+
+
+(** [current_debug_location b] returns the current debug location, or None
+    if none is currently set.
+    See the method [llvm::IRBuilder::GetDebugLocation]. *)
+val current_debug_location : llbuilder -> llvalue option
+
+
+(** [set_inst_debug_location b i] sets the current debug location of the builder
+    [b] to the instruction [i].
+    See the method [llvm::IRBuilder::SetInstDebugLocation]. *)
+val set_inst_debug_location : llbuilder -> llvalue -> unit
+
+
+(** {7 Terminators} *)
+
+(** [build_ret_void b] creates a
+    [ret void]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateRetVoid]. *)
+val build_ret_void : llbuilder -> llvalue
+
+(** [build_ret v b] creates a
+    [ret %v]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateRet]. *)
+val build_ret : llvalue -> llbuilder -> llvalue
+
+(** [build_aggregate_ret vs b] creates a
+    [ret {...} { %v1, %v2, ... } ]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateAggregateRet]. *)
+val build_aggregate_ret : llvalue array -> llbuilder -> llvalue
+
+
+(** [build_br bb b] creates a
+    [br %bb]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateBr]. *)
+val build_br : llbasicblock -> llbuilder -> llvalue
+
+(** [build_cond_br cond tbb fbb b] creates a
+    [br %cond, %tbb, %fbb]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateCondBr]. *)
+val build_cond_br : llvalue -> llbasicblock -> llbasicblock -> llbuilder ->
+                         llvalue
+
+(** [build_switch case elsebb count b] creates an empty
+    [switch %case, %elsebb]
+    instruction at the position specified by the instruction builder [b] with
+    space reserved for [count] cases.
+    See the method [llvm::LLVMBuilder::CreateSwitch]. *)
+val build_switch : llvalue -> llbasicblock -> int -> llbuilder -> llvalue
+
+
+(** [add_case sw onval bb] causes switch instruction [sw] to branch to [bb]
+    when its input matches the constant [onval].
+    See the method [llvm::SwitchInst::addCase]. **)
+val add_case : llvalue -> llvalue -> llbasicblock -> unit
+
+
+(** [build_indirect_br addr count b] creates a
+    [indirectbr %addr]
+    instruction at the position specified by the instruction builder [b] with
+    space reserved for [count] destinations.
+    See the method [llvm::LLVMBuilder::CreateIndirectBr]. *)
+val build_indirect_br : llvalue -> int -> llbuilder -> llvalue
+
+
+(** [add_destination br bb] adds the basic block [bb] as a possible branch
+    location for the indirectbr instruction [br].
+    See the method [llvm::IndirectBrInst::addDestination]. **)
+val add_destination : llvalue -> llbasicblock -> unit
+
+
+(** [build_invoke fn args tobb unwindbb name b] creates an
+    [%name = invoke %fn(args) to %tobb unwind %unwindbb]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateInvoke]. *)
+val build_invoke : llvalue -> llvalue array -> llbasicblock ->
+                        llbasicblock -> string -> llbuilder -> llvalue
+
+
+(** [build_unwind b] creates an
+    [unwind]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateUnwind]. *)
+val build_unwind : llbuilder -> llvalue
+
+(** [build_unreachable b] creates an
+    [unreachable]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateUnwind]. *)
+val build_unreachable : llbuilder -> llvalue
+
+
+(** {7 Arithmetic} *)
+
+(** [build_add x y name b] creates a
+    [%name = add %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateAdd]. *)
+val build_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_nsw_add x y name b] creates a
+    [%name = nsw add %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateNSWAdd]. *)
+val build_nsw_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_nuw_add x y name b] creates a
+    [%name = nuw add %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateNUWAdd]. *)
+val build_nuw_add : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_fadd x y name b] creates a
+    [%name = fadd %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateFAdd]. *)
+val build_fadd : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_sub x y name b] creates a
+    [%name = sub %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateSub]. *)
+val build_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_nsw_sub x y name b] creates a
+    [%name = nsw sub %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateNSWSub]. *)
+val build_nsw_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_nuw_sub x y name b] creates a
+    [%name = nuw sub %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateNUWSub]. *)
+val build_nuw_sub : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_fsub x y name b] creates a
+    [%name = fsub %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateFSub]. *)
+val build_fsub : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_mul x y name b] creates a
+    [%name = mul %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateMul]. *)
+val build_mul : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_nsw_mul x y name b] creates a
+    [%name = nsw mul %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateNSWMul]. *)
+val build_nsw_mul : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_nuw_mul x y name b] creates a
+    [%name = nuw mul %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateNUWMul]. *)
+val build_nuw_mul : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_fmul x y name b] creates a
+    [%name = fmul %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateFMul]. *)
+val build_fmul : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_udiv x y name b] creates a
+    [%name = udiv %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateUDiv]. *)
+val build_udiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_sdiv x y name b] creates a
+    [%name = sdiv %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateSDiv]. *)
+val build_sdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_exact_sdiv x y name b] creates a
+    [%name = exact sdiv %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateExactSDiv]. *)
+val build_exact_sdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_fdiv x y name b] creates a
+    [%name = fdiv %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateFDiv]. *)
+val build_fdiv : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_urem x y name b] creates a
+    [%name = urem %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateURem]. *)
+val build_urem : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_SRem x y name b] creates a
+    [%name = srem %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateSRem]. *)
+val build_srem : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_frem x y name b] creates a
+    [%name = frem %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateFRem]. *)
+val build_frem : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_shl x y name b] creates a
+    [%name = shl %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateShl]. *)
+val build_shl : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_lshr x y name b] creates a
+    [%name = lshr %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateLShr]. *)
+val build_lshr : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_ashr x y name b] creates a
+    [%name = ashr %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateAShr]. *)
+val build_ashr : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_and x y name b] creates a
+    [%name = and %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateAnd]. *)
+val build_and : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_or x y name b] creates a
+    [%name = or %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateOr]. *)
+val build_or : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_xor x y name b] creates a
+    [%name = xor %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateXor]. *)
+val build_xor : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_neg x name b] creates a
+    [%name = sub 0, %x]
+    instruction at the position specified by the instruction builder [b].
+    [-0.0] is used for floating point types to compute the correct sign.
+    See the method [llvm::LLVMBuilder::CreateNeg]. *)
+val build_neg : llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_nsw_neg x name b] creates a
+    [%name = nsw sub 0, %x]
+    instruction at the position specified by the instruction builder [b].
+    [-0.0] is used for floating point types to compute the correct sign.
+    See the method [llvm::LLVMBuilder::CreateNeg]. *)
+val build_nsw_neg : llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_nuw_neg x name b] creates a
+    [%name = nuw sub 0, %x]
+    instruction at the position specified by the instruction builder [b].
+    [-0.0] is used for floating point types to compute the correct sign.
+    See the method [llvm::LLVMBuilder::CreateNeg]. *)
+val build_nuw_neg : llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_fneg x name b] creates a
+    [%name = fsub 0, %x]
+    instruction at the position specified by the instruction builder [b].
+    [-0.0] is used for floating point types to compute the correct sign.
+    See the method [llvm::LLVMBuilder::CreateFNeg]. *)
+val build_fneg : llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_xor x name b] creates a
+    [%name = xor %x, -1]
+    instruction at the position specified by the instruction builder [b].
+    [-1] is the correct "all ones" value for the type of [x].
+    See the method [llvm::LLVMBuilder::CreateXor]. *)
+val build_not : llvalue -> string -> llbuilder -> llvalue
+
+
+
+(** {7 Memory} *)
+
+(** [build_alloca ty name b] creates a
+    [%name = alloca %ty]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateAlloca]. *)
+val build_alloca : lltype -> string -> llbuilder -> llvalue
+
+
+(** [build_array_alloca ty n name b] creates a
+    [%name = alloca %ty, %n]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateAlloca]. *)
+val build_array_alloca : lltype -> llvalue -> string -> llbuilder ->
+                              llvalue
+
+(** [build_load v name b] creates a
+    [%name = load %v]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateLoad]. *)
+val build_load : llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_store v p b] creates a
+    [store %v, %p]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateStore]. *)
+val build_store : llvalue -> llvalue -> llbuilder -> llvalue
+
+
+(** [build_gep p indices name b] creates a
+    [%name = getelementptr %p, indices...]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateGetElementPtr]. *)
+val build_gep : llvalue -> llvalue array -> string -> llbuilder -> llvalue
+
+
+(** [build_in_bounds_gep p indices name b] creates a
+    [%name = gelementptr inbounds %p, indices...]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateInBoundsGetElementPtr]. *)
+val build_in_bounds_gep : llvalue -> llvalue array -> string -> llbuilder ->
+                               llvalue
+
+(** [build_struct_gep p idx name b] creates a
+    [%name = getelementptr %p, 0, idx]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateStructGetElementPtr]. *)
+val build_struct_gep : llvalue -> int -> string -> llbuilder ->
+                            llvalue
+
+(** [build_global_string str name b] creates a series of instructions that adds
+    a global string at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateGlobalString]. *)
+val build_global_string : string -> string -> llbuilder -> llvalue
+
+
+(** [build_global_stringptr str name b] creates a series of instructions that
+    adds a global string pointer at the position specified by the instruction
+    builder [b].
+    See the method [llvm::LLVMBuilder::CreateGlobalStringPtr]. *)
+val build_global_stringptr : string -> string -> llbuilder -> llvalue
+
+
+
+(** {7 Casts} *)
+
+(** [build_trunc v ty name b] creates a
+    [%name = trunc %p to %ty]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateTrunc]. *)
+val build_trunc : llvalue -> lltype -> string -> llbuilder -> llvalue
+
+
+(** [build_zext v ty name b] creates a
+    [%name = zext %p to %ty]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateZExt]. *)
+val build_zext : llvalue -> lltype -> string -> llbuilder -> llvalue
+
+
+(** [build_sext v ty name b] creates a
+    [%name = sext %p to %ty]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateSExt]. *)
+val build_sext : llvalue -> lltype -> string -> llbuilder -> llvalue
+
+
+(** [build_fptoui v ty name b] creates a
+    [%name = fptoui %p to %ty]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateFPToUI]. *)
+val build_fptoui : llvalue -> lltype -> string -> llbuilder -> llvalue
+
+
+(** [build_fptosi v ty name b] creates a
+    [%name = fptosi %p to %ty]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateFPToSI]. *)
+val build_fptosi : llvalue -> lltype -> string -> llbuilder -> llvalue
+
+
+(** [build_uitofp v ty name b] creates a
+    [%name = uitofp %p to %ty]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateUIToFP]. *)
+val build_uitofp : llvalue -> lltype -> string -> llbuilder -> llvalue
+
+
+(** [build_sitofp v ty name b] creates a
+    [%name = sitofp %p to %ty]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateSIToFP]. *)
+val build_sitofp : llvalue -> lltype -> string -> llbuilder -> llvalue
+
+
+(** [build_fptrunc v ty name b] creates a
+    [%name = fptrunc %p to %ty]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateFPTrunc]. *)
+val build_fptrunc : llvalue -> lltype -> string -> llbuilder -> llvalue
+
+
+(** [build_fpext v ty name b] creates a
+    [%name = fpext %p to %ty]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateFPExt]. *)
+val build_fpext : llvalue -> lltype -> string -> llbuilder -> llvalue
+
+
+(** [build_ptrtoint v ty name b] creates a
+    [%name = prtotint %p to %ty]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreatePtrToInt]. *)
+val build_ptrtoint : llvalue -> lltype -> string -> llbuilder -> llvalue
+
+
+(** [build_inttoptr v ty name b] creates a
+    [%name = inttoptr %p to %ty]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateIntToPtr]. *)
+val build_inttoptr : llvalue -> lltype -> string -> llbuilder -> llvalue
+
+
+(** [build_bitcast v ty name b] creates a
+    [%name = bitcast %p to %ty]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateBitCast]. *)
+val build_bitcast : llvalue -> lltype -> string -> llbuilder -> llvalue
+
+
+(** [build_zext_or_bitcast v ty name b] creates a zext or bitcast
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateZExtOrBitCast]. *)
+val build_zext_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
+                                 llvalue
+
+(** [build_sext_or_bitcast v ty name b] creates a sext or bitcast
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateSExtOrBitCast]. *)
+val build_sext_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
+                                 llvalue
+
+(** [build_trunc_or_bitcast v ty name b] creates a trunc or bitcast
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateZExtOrBitCast]. *)
+val build_trunc_or_bitcast : llvalue -> lltype -> string -> llbuilder ->
+                                  llvalue
+
+(** [build_pointercast v ty name b] creates a bitcast or pointer-to-int
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreatePointerCast]. *)
+val build_pointercast : llvalue -> lltype -> string -> llbuilder -> llvalue
+
+
+(** [build_intcast v ty name b] creates a zext, bitcast, or trunc
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateIntCast]. *)
+val build_intcast : llvalue -> lltype -> string -> llbuilder -> llvalue
+
+
+(** [build_fpcast v ty name b] creates a fpext, bitcast, or fptrunc
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateFPCast]. *)
+val build_fpcast : llvalue -> lltype -> string -> llbuilder -> llvalue
+
+
+
+(** {7 Comparisons} *)
+
+(** [build_icmp pred x y name b] creates a
+    [%name = icmp %pred %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateICmp]. *)
+val build_icmp : Icmp.t -> llvalue -> llvalue -> string ->
+                      llbuilder -> llvalue
+
+(** [build_fcmp pred x y name b] creates a
+    [%name = fcmp %pred %x, %y]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateFCmp]. *)
+val build_fcmp : Fcmp.t -> llvalue -> llvalue -> string ->
+                      llbuilder -> llvalue
+
+
+(** {7 Miscellaneous instructions} *)
+
+(** [build_phi incoming name b] creates a
+    [%name = phi %incoming]
+    instruction at the position specified by the instruction builder [b].
+    [incoming] is a list of [(llvalue, llbasicblock)] tuples.
+    See the method [llvm::LLVMBuilder::CreatePHI]. *)
+val build_phi : (llvalue * llbasicblock) list -> string -> llbuilder ->
+                     llvalue
+
+(** [build_call fn args name b] creates a
+    [%name = call %fn(args...)]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateCall]. *)
+val build_call : llvalue -> llvalue array -> string -> llbuilder -> llvalue
+
+
+(** [build_select cond thenv elsev name b] creates a
+    [%name = select %cond, %thenv, %elsev]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateSelect]. *)
+val build_select : llvalue -> llvalue -> llvalue -> string -> llbuilder ->
+                        llvalue
+
+(** [build_va_arg valist argty name b] creates a
+    [%name = va_arg %valist, %argty]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateVAArg]. *)
+val build_va_arg : llvalue -> lltype -> string -> llbuilder -> llvalue
+
+
+(** [build_extractelement vec i name b] creates a
+    [%name = extractelement %vec, %i]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateExtractElement]. *)
+val build_extractelement : llvalue -> llvalue -> string -> llbuilder ->
+                                llvalue
+
+(** [build_insertelement vec elt i name b] creates a
+    [%name = insertelement %vec, %elt, %i]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateInsertElement]. *)
+val build_insertelement : llvalue -> llvalue -> llvalue -> string ->
+                               llbuilder -> llvalue
+
+(** [build_shufflevector veca vecb mask name b] creates a
+    [%name = shufflevector %veca, %vecb, %mask]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateShuffleVector]. *)
+val build_shufflevector : llvalue -> llvalue -> llvalue -> string ->
+                               llbuilder -> llvalue
+
+(** [build_insertvalue agg idx name b] creates a
+    [%name = extractvalue %agg, %idx]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateExtractValue]. *)
+val build_extractvalue : llvalue -> int -> string -> llbuilder -> llvalue
+
+
+(** [build_insertvalue agg val idx name b] creates a
+    [%name = insertvalue %agg, %val, %idx]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateInsertValue]. *)
+val build_insertvalue : llvalue -> llvalue -> int -> string -> llbuilder ->
+                             llvalue
+
+(** [build_is_null val name b] creates a
+    [%name = icmp eq %val, null]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateIsNull]. *)
+val build_is_null : llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_is_not_null val name b] creates a
+    [%name = icmp ne %val, null]
+    instruction at the position specified by the instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreateIsNotNull]. *)
+val build_is_not_null : llvalue -> string -> llbuilder -> llvalue
+
+
+(** [build_ptrdiff lhs rhs name b] creates a series of instructions that measure
+    the difference between two pointer values at the position specified by the
+    instruction builder [b].
+    See the method [llvm::LLVMBuilder::CreatePtrDiff]. *)
+val build_ptrdiff : llvalue -> llvalue -> string -> llbuilder -> llvalue
+
+
+
+(** {6 Memory buffers} *)
+
+module MemoryBuffer : sig
+  (** [of_file p] is the memory buffer containing the contents of the file at
+      path [p]. If the file could not be read, then [IoError msg] is
+      raised. *)
+  val of_file : string -> llmemorybuffer
+  
+  (** [stdin ()] is the memory buffer containing the contents of standard input.
+      If standard input is empty, then [IoError msg] is raised. *)
+  val of_stdin : unit -> llmemorybuffer
+  
+  (** Disposes of a memory buffer. *)
+  val dispose : llmemorybuffer -> unit
+end
+
+
+(** {6 Pass Managers} *)
+
+module PassManager : sig
+  (**  *)
+  type 'a t
+  type any = [ `Module | `Function ]
+  
+  (** [PassManager.create ()] constructs a new whole-module pass pipeline. This
+      type of pipeline is suitable for link-time optimization and whole-module
+      transformations.
+      See the constructor of [llvm::PassManager]. *)
+  val create : unit -> [ `Module ] t
+  
+  (** [PassManager.create_function m] constructs a new function-by-function
+      pass pipeline over the module [m]. It does not take ownership of [m].
+      This type of pipeline is suitable for code generation and JIT compilation
+      tasks.
+      See the constructor of [llvm::FunctionPassManager]. *)
+  val create_function : llmodule -> [ `Function ] t
+
+  
+  (** [run_module m pm] initializes, executes on the module [m], and finalizes
+      all of the passes scheduled in the pass manager [pm]. Returns [true] if
+      any of the passes modified the module, [false] otherwise.
+      See the [llvm::PassManager::run] method. *)
+  val run_module : llmodule -> [ `Module ] t -> bool
+
+  
+  (** [initialize fpm] initializes all of the function passes scheduled in the
+      function pass manager [fpm]. Returns [true] if any of the passes modified
+      the module, [false] otherwise.
+      See the [llvm::FunctionPassManager::doInitialization] method. *)
+  val initialize : [ `Function ] t -> bool
+  
+  (** [run_function f fpm] executes all of the function passes scheduled in the
+      function pass manager [fpm] over the function [f]. Returns [true] if any
+      of the passes modified [f], [false] otherwise.
+      See the [llvm::FunctionPassManager::run] method. *)
+  val run_function : llvalue -> [ `Function ] t -> bool
+
+  
+  (** [finalize fpm] finalizes all of the function passes scheduled in in the
+      function pass manager [fpm]. Returns [true] if any of the passes
+      modified the module, [false] otherwise.
+      See the [llvm::FunctionPassManager::doFinalization] method. *)
+  val finalize : [ `Function ] t -> bool
+  
+  (** Frees the memory of a pass pipeline. For function pipelines, does not free
+      the module.
+      See the destructor of [llvm::BasePassManager]. *)
+  val dispose : [< any ] t -> unit
+end
diff --git a/final/bindings/ocaml/llvm/llvm_ocaml.c b/final/bindings/ocaml/llvm/llvm_ocaml.c
new file mode 100644
index 00000000000..ce6cf8ea79e
--- /dev/null
+++ b/final/bindings/ocaml/llvm/llvm_ocaml.c
@@ -0,0 +1,1829 @@
+/*===-- llvm_ocaml.c - LLVM Ocaml Glue --------------------------*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file glues LLVM's ocaml interface to its C interface. These functions *|
+|* are by and large transparent wrappers to the corresponding C functions.    *|
+|*                                                                            *|
+|* Note that these functions intentionally take liberties with the CAMLparamX *|
+|* macros, since most of the parameters are not GC heap objects.              *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#include "llvm-c/Core.h"
+#include "caml/alloc.h"
+#include "caml/custom.h"
+#include "caml/memory.h"
+#include "caml/fail.h"
+#include "caml/callback.h"
+#include "llvm/Config/config.h"
+#include <assert.h>
+#include <stdlib.h>
+
+
+/* Can't use the recommended caml_named_value mechanism for backwards
+   compatibility reasons. This is largely equivalent. */
+static value llvm_ioerror_exn;
+
+CAMLprim value llvm_register_core_exns(value IoError) {
+  llvm_ioerror_exn = Field(IoError, 0);
+  register_global_root(&llvm_ioerror_exn);
+  return Val_unit;
+}
+
+static void llvm_raise(value Prototype, char *Message) {
+  CAMLparam1(Prototype);
+  CAMLlocal1(CamlMessage);
+  
+  CamlMessage = copy_string(Message);
+  LLVMDisposeMessage(Message);
+  
+  raise_with_arg(Prototype, CamlMessage);
+  abort(); /* NOTREACHED */
+#ifdef CAMLnoreturn
+  CAMLnoreturn; /* Silences warnings, but is missing in some versions. */
+#endif
+}
+
+static value alloc_variant(int tag, void *Value) {
+  value Iter = alloc_small(1, tag);
+  Field(Iter, 0) = Val_op(Value);
+  return Iter;
+}
+
+/* Macro to convert the C first/next/last/prev idiom to the Ocaml llpos/
+   llrev_pos idiom. */
+#define DEFINE_ITERATORS(camlname, cname, pty, cty, pfun) \
+  /* llmodule -> ('a, 'b) llpos */                        \
+  CAMLprim value llvm_##camlname##_begin(pty Mom) {       \
+    cty First = LLVMGetFirst##cname(Mom);                 \
+    if (First)                                            \
+      return alloc_variant(1, First);                     \
+    return alloc_variant(0, Mom);                         \
+  }                                                       \
+                                                          \
+  /* llvalue -> ('a, 'b) llpos */                         \
+  CAMLprim value llvm_##camlname##_succ(cty Kid) {        \
+    cty Next = LLVMGetNext##cname(Kid);                   \
+    if (Next)                                             \
+      return alloc_variant(1, Next);                      \
+    return alloc_variant(0, pfun(Kid));                   \
+  }                                                       \
+                                                          \
+  /* llmodule -> ('a, 'b) llrev_pos */                    \
+  CAMLprim value llvm_##camlname##_end(pty Mom) {         \
+    cty Last = LLVMGetLast##cname(Mom);                   \
+    if (Last)                                             \
+      return alloc_variant(1, Last);                      \
+    return alloc_variant(0, Mom);                         \
+  }                                                       \
+                                                          \
+  /* llvalue -> ('a, 'b) llrev_pos */                     \
+  CAMLprim value llvm_##camlname##_pred(cty Kid) {        \
+    cty Prev = LLVMGetPrevious##cname(Kid);               \
+    if (Prev)                                             \
+      return alloc_variant(1, Prev);                      \
+    return alloc_variant(0, pfun(Kid));                   \
+  }
+
+
+/*===-- Contexts ----------------------------------------------------------===*/
+
+/* unit -> llcontext */
+CAMLprim LLVMContextRef llvm_create_context(value Unit) {
+  return LLVMContextCreate();
+}
+
+/* llcontext -> unit */
+CAMLprim value llvm_dispose_context(LLVMContextRef C) {
+  LLVMContextDispose(C);
+  return Val_unit;
+}
+
+/* unit -> llcontext */
+CAMLprim LLVMContextRef llvm_global_context(value Unit) {
+  return LLVMGetGlobalContext();
+}
+
+/* llcontext -> string -> int */
+CAMLprim value llvm_mdkind_id(LLVMContextRef C, value Name) {
+  unsigned MDKindID = LLVMGetMDKindIDInContext(C, String_val(Name),
+                                               caml_string_length(Name));
+  return Val_int(MDKindID);
+}
+
+/*===-- Modules -----------------------------------------------------------===*/
+
+/* llcontext -> string -> llmodule */
+CAMLprim LLVMModuleRef llvm_create_module(LLVMContextRef C, value ModuleID) {
+  return LLVMModuleCreateWithNameInContext(String_val(ModuleID), C);
+}
+
+/* llmodule -> unit */
+CAMLprim value llvm_dispose_module(LLVMModuleRef M) {
+  LLVMDisposeModule(M);
+  return Val_unit;
+}
+
+/* llmodule -> string */
+CAMLprim value llvm_target_triple(LLVMModuleRef M) {
+  return copy_string(LLVMGetTarget(M));
+}
+
+/* string -> llmodule -> unit */
+CAMLprim value llvm_set_target_triple(value Trip, LLVMModuleRef M) {
+  LLVMSetTarget(M, String_val(Trip));
+  return Val_unit;
+}
+
+/* llmodule -> string */
+CAMLprim value llvm_data_layout(LLVMModuleRef M) {
+  return copy_string(LLVMGetDataLayout(M));
+}
+
+/* string -> llmodule -> unit */
+CAMLprim value llvm_set_data_layout(value Layout, LLVMModuleRef M) {
+  LLVMSetDataLayout(M, String_val(Layout));
+  return Val_unit;
+}
+
+/* string -> lltype -> llmodule -> bool */
+CAMLprim value llvm_add_type_name(value Name, LLVMTypeRef Ty, LLVMModuleRef M) {
+  int res = LLVMAddTypeName(M, String_val(Name), Ty);
+  return Val_bool(res == 0);
+}
+
+/* string -> llmodule -> unit */
+CAMLprim value llvm_delete_type_name(value Name, LLVMModuleRef M) {
+  LLVMDeleteTypeName(M, String_val(Name));
+  return Val_unit;
+}
+
+/* llmodule -> string -> lltype option */
+CAMLprim value llvm_type_by_name(LLVMModuleRef M, value Name) {
+  CAMLparam1(Name);
+  LLVMTypeRef T;
+  if ((T = LLVMGetTypeByName(M, String_val(Name)))) {
+    value Option = alloc(1, 0);
+    Field(Option, 0) = (value) T;
+    CAMLreturn(Option);
+  }
+  CAMLreturn(Val_int(0));
+}
+
+/* llmodule -> unit */
+CAMLprim value llvm_dump_module(LLVMModuleRef M) {
+  LLVMDumpModule(M);
+  return Val_unit;
+}
+
+/* llmodule -> string -> unit */
+CAMLprim value llvm_set_module_inline_asm(LLVMModuleRef M, value Asm) {
+  LLVMSetModuleInlineAsm(M, String_val(Asm));
+  return Val_unit;
+}
+
+/*===-- Types -------------------------------------------------------------===*/
+
+/* lltype -> TypeKind.t */
+CAMLprim value llvm_classify_type(LLVMTypeRef Ty) {
+  return Val_int(LLVMGetTypeKind(Ty));
+}
+
+/* lltype -> llcontext */
+CAMLprim LLVMContextRef llvm_type_context(LLVMTypeRef Ty) {
+  return LLVMGetTypeContext(Ty);
+}
+
+/*--... Operations on integer types ........................................--*/
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_i1_type (LLVMContextRef Context) {
+  return LLVMInt1TypeInContext(Context);
+}
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_i8_type (LLVMContextRef Context) {
+  return LLVMInt8TypeInContext(Context);
+}
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_i16_type (LLVMContextRef Context) {
+  return LLVMInt16TypeInContext(Context);
+}
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_i32_type (LLVMContextRef Context) {
+  return LLVMInt32TypeInContext(Context);
+}
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_i64_type (LLVMContextRef Context) {
+  return LLVMInt64TypeInContext(Context);
+}
+
+/* llcontext -> int -> lltype */
+CAMLprim LLVMTypeRef llvm_integer_type(LLVMContextRef Context, value Width) {
+  return LLVMIntTypeInContext(Context, Int_val(Width));
+}
+
+/* lltype -> int */
+CAMLprim value llvm_integer_bitwidth(LLVMTypeRef IntegerTy) {
+  return Val_int(LLVMGetIntTypeWidth(IntegerTy));
+}
+
+/*--... Operations on real types ...........................................--*/
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_float_type(LLVMContextRef Context) {
+  return LLVMFloatTypeInContext(Context);
+}
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_double_type(LLVMContextRef Context) {
+  return LLVMDoubleTypeInContext(Context);
+}
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_x86fp80_type(LLVMContextRef Context) {
+  return LLVMX86FP80TypeInContext(Context);
+}
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_fp128_type(LLVMContextRef Context) {
+  return LLVMFP128TypeInContext(Context);
+}
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_ppc_fp128_type(LLVMContextRef Context) {
+  return LLVMPPCFP128TypeInContext(Context);
+}
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_x86mmx_type(LLVMContextRef Context) {
+  return LLVMX86MMXTypeInContext(Context);
+}
+
+/*--... Operations on function types .......................................--*/
+
+/* lltype -> lltype array -> lltype */
+CAMLprim LLVMTypeRef llvm_function_type(LLVMTypeRef RetTy, value ParamTys) {
+  return LLVMFunctionType(RetTy, (LLVMTypeRef *) ParamTys,
+                          Wosize_val(ParamTys), 0);
+}
+
+/* lltype -> lltype array -> lltype */
+CAMLprim LLVMTypeRef llvm_var_arg_function_type(LLVMTypeRef RetTy,
+                                                value ParamTys) {
+  return LLVMFunctionType(RetTy, (LLVMTypeRef *) ParamTys,
+                          Wosize_val(ParamTys), 1);
+}
+
+/* lltype -> bool */
+CAMLprim value llvm_is_var_arg(LLVMTypeRef FunTy) {
+  return Val_bool(LLVMIsFunctionVarArg(FunTy));
+}
+
+/* lltype -> lltype array */
+CAMLprim value llvm_param_types(LLVMTypeRef FunTy) {
+  value Tys = alloc(LLVMCountParamTypes(FunTy), 0);
+  LLVMGetParamTypes(FunTy, (LLVMTypeRef *) Tys);
+  return Tys;
+}
+
+/*--... Operations on struct types .........................................--*/
+
+/* llcontext -> lltype array -> lltype */
+CAMLprim LLVMTypeRef llvm_struct_type(LLVMContextRef C, value ElementTypes) {
+  return LLVMStructTypeInContext(C, (LLVMTypeRef *) ElementTypes,
+                                 Wosize_val(ElementTypes), 0);
+}
+
+/* llcontext -> lltype array -> lltype */
+CAMLprim LLVMTypeRef llvm_packed_struct_type(LLVMContextRef C,
+                                             value ElementTypes) {
+  return LLVMStructTypeInContext(C, (LLVMTypeRef *) ElementTypes,
+                                 Wosize_val(ElementTypes), 1);
+}
+
+/* lltype -> lltype array */
+CAMLprim value llvm_struct_element_types(LLVMTypeRef StructTy) {
+  value Tys = alloc(LLVMCountStructElementTypes(StructTy), 0);
+  LLVMGetStructElementTypes(StructTy, (LLVMTypeRef *) Tys);
+  return Tys;
+}
+
+/* lltype -> bool */
+CAMLprim value llvm_is_packed(LLVMTypeRef StructTy) {
+  return Val_bool(LLVMIsPackedStruct(StructTy));
+}
+
+/*--... Operations on array, pointer, and vector types .....................--*/
+
+/* lltype -> int -> lltype */
+CAMLprim LLVMTypeRef llvm_array_type(LLVMTypeRef ElementTy, value Count) {
+  return LLVMArrayType(ElementTy, Int_val(Count));
+}
+
+/* lltype -> lltype */
+CAMLprim LLVMTypeRef llvm_pointer_type(LLVMTypeRef ElementTy) {
+  return LLVMPointerType(ElementTy, 0);
+}
+
+/* lltype -> int -> lltype */
+CAMLprim LLVMTypeRef llvm_qualified_pointer_type(LLVMTypeRef ElementTy,
+                                                 value AddressSpace) {
+  return LLVMPointerType(ElementTy, Int_val(AddressSpace));
+}
+
+/* lltype -> int -> lltype */
+CAMLprim LLVMTypeRef llvm_vector_type(LLVMTypeRef ElementTy, value Count) {
+  return LLVMVectorType(ElementTy, Int_val(Count));
+}
+
+/* lltype -> int */
+CAMLprim value llvm_array_length(LLVMTypeRef ArrayTy) {
+  return Val_int(LLVMGetArrayLength(ArrayTy));
+}
+
+/* lltype -> int */
+CAMLprim value llvm_address_space(LLVMTypeRef PtrTy) {
+  return Val_int(LLVMGetPointerAddressSpace(PtrTy));
+}
+
+/* lltype -> int */
+CAMLprim value llvm_vector_size(LLVMTypeRef VectorTy) {
+  return Val_int(LLVMGetVectorSize(VectorTy));
+}
+
+/*--... Operations on other types ..........................................--*/
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_void_type (LLVMContextRef Context) {
+  return LLVMVoidTypeInContext(Context);
+}
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_label_type(LLVMContextRef Context) {
+  return LLVMLabelTypeInContext(Context);
+}
+
+/* llcontext -> lltype */
+CAMLprim LLVMTypeRef llvm_opaque_type(LLVMContextRef Context) {
+  return LLVMOpaqueTypeInContext(Context);
+}
+
+/*--... Operations on type handles .........................................--*/
+
+#define Typehandle_val(v)  (*(LLVMTypeHandleRef *)(Data_custom_val(v)))
+
+static void llvm_finalize_handle(value TH) {
+  LLVMDisposeTypeHandle(Typehandle_val(TH));
+}
+
+static struct custom_operations typehandle_ops = {
+  (char *) "LLVMTypeHandle",
+  llvm_finalize_handle,
+  custom_compare_default,
+  custom_hash_default,
+  custom_serialize_default,
+  custom_deserialize_default
+};
+
+CAMLprim value llvm_handle_to_type(LLVMTypeRef PATy) {
+  value TH = alloc_custom(&typehandle_ops, sizeof(LLVMBuilderRef), 0, 1);
+  Typehandle_val(TH) = LLVMCreateTypeHandle(PATy);
+  return TH;
+}
+
+CAMLprim LLVMTypeRef llvm_type_of_handle(value TH) {
+  return LLVMResolveTypeHandle(Typehandle_val(TH));
+}
+
+CAMLprim value llvm_refine_type(LLVMTypeRef AbstractTy, LLVMTypeRef ConcreteTy){
+  LLVMRefineType(AbstractTy, ConcreteTy);
+  return Val_unit;
+}
+
+
+/*===-- VALUES ------------------------------------------------------------===*/
+
+/* llvalue -> lltype */
+CAMLprim LLVMTypeRef llvm_type_of(LLVMValueRef Val) {
+  return LLVMTypeOf(Val);
+}
+
+/* llvalue -> string */
+CAMLprim value llvm_value_name(LLVMValueRef Val) {
+  return copy_string(LLVMGetValueName(Val));
+}
+
+/* string -> llvalue -> unit */
+CAMLprim value llvm_set_value_name(value Name, LLVMValueRef Val) {
+  LLVMSetValueName(Val, String_val(Name));
+  return Val_unit;
+}
+
+/* llvalue -> unit */
+CAMLprim value llvm_dump_value(LLVMValueRef Val) {
+  LLVMDumpValue(Val);
+  return Val_unit;
+}
+
+/*--... Operations on users ................................................--*/
+
+/* llvalue -> int -> llvalue */
+CAMLprim LLVMValueRef llvm_operand(LLVMValueRef V, value I) {
+  return LLVMGetOperand(V, Int_val(I));
+}
+
+/* llvalue -> int -> llvalue -> unit */
+CAMLprim value llvm_set_operand(LLVMValueRef U, value I, LLVMValueRef V) {
+  LLVMSetOperand(U, Int_val(I), V);
+  return Val_unit;
+}
+
+/* llvalue -> int */
+CAMLprim value llvm_num_operands(LLVMValueRef V) {
+  return Val_int(LLVMGetNumOperands(V));
+}
+
+/*--... Operations on constants of (mostly) any type .......................--*/
+
+/* llvalue -> bool */
+CAMLprim value llvm_is_constant(LLVMValueRef Val) {
+  return Val_bool(LLVMIsConstant(Val));
+}
+
+/* llvalue -> bool */
+CAMLprim value llvm_is_null(LLVMValueRef Val) {
+  return Val_bool(LLVMIsNull(Val));
+}
+
+/* llvalue -> bool */
+CAMLprim value llvm_is_undef(LLVMValueRef Val) {
+  return Val_bool(LLVMIsUndef(Val));
+}
+
+/*--... Operations on instructions .........................................--*/
+
+/* llvalue -> bool */
+CAMLprim value llvm_has_metadata(LLVMValueRef Val) {
+  return Val_bool(LLVMHasMetadata(Val));
+}
+
+/* llvalue -> int -> llvalue option */
+CAMLprim value llvm_metadata(LLVMValueRef Val, value MDKindID) {
+  CAMLparam1(MDKindID);
+  LLVMValueRef MD;
+  if ((MD = LLVMGetMetadata(Val, Int_val(MDKindID)))) {
+    value Option = alloc(1, 0);
+    Field(Option, 0) = (value) MD;
+    CAMLreturn(Option);
+  }
+  CAMLreturn(Val_int(0));
+}
+
+/* llvalue -> int -> llvalue -> unit */
+CAMLprim value llvm_set_metadata(LLVMValueRef Val, value MDKindID,
+                                 LLVMValueRef MD) {
+  LLVMSetMetadata(Val, Int_val(MDKindID), MD);
+  return Val_unit;
+}
+
+/* llvalue -> int -> unit */
+CAMLprim value llvm_clear_metadata(LLVMValueRef Val, value MDKindID) {
+  LLVMSetMetadata(Val, Int_val(MDKindID), NULL);
+  return Val_unit;
+}
+
+
+/*--... Operations on metadata .............................................--*/
+
+/* llcontext -> string -> llvalue */
+CAMLprim LLVMValueRef llvm_mdstring(LLVMContextRef C, value S) {
+  return LLVMMDStringInContext(C, String_val(S), caml_string_length(S));
+}
+
+/* llcontext -> llvalue array -> llvalue */
+CAMLprim LLVMValueRef llvm_mdnode(LLVMContextRef C, value ElementVals) {
+  return LLVMMDNodeInContext(C, (LLVMValueRef*) Op_val(ElementVals),
+                             Wosize_val(ElementVals));
+}
+
+/*--... Operations on scalar constants .....................................--*/
+
+/* lltype -> int -> llvalue */
+CAMLprim LLVMValueRef llvm_const_int(LLVMTypeRef IntTy, value N) {
+  return LLVMConstInt(IntTy, (long long) Int_val(N), 1);
+}
+
+/* lltype -> Int64.t -> bool -> llvalue */
+CAMLprim LLVMValueRef llvm_const_of_int64(LLVMTypeRef IntTy, value N,
+                                          value SExt) {
+  return LLVMConstInt(IntTy, Int64_val(N), Bool_val(SExt));
+}
+
+/* lltype -> string -> int -> llvalue */
+CAMLprim LLVMValueRef llvm_const_int_of_string(LLVMTypeRef IntTy, value S,
+                                               value Radix) {
+  return LLVMConstIntOfStringAndSize(IntTy, String_val(S), caml_string_length(S),
+                                     Int_val(Radix));
+}
+
+/* lltype -> float -> llvalue */
+CAMLprim LLVMValueRef llvm_const_float(LLVMTypeRef RealTy, value N) {
+  return LLVMConstReal(RealTy, Double_val(N));
+}
+
+/* lltype -> string -> llvalue */
+CAMLprim LLVMValueRef llvm_const_float_of_string(LLVMTypeRef RealTy, value S) {
+  return LLVMConstRealOfStringAndSize(RealTy, String_val(S),
+                                      caml_string_length(S));
+}
+
+/*--... Operations on composite constants ..................................--*/
+
+/* llcontext -> string -> llvalue */
+CAMLprim LLVMValueRef llvm_const_string(LLVMContextRef Context, value Str,
+                                        value NullTerminate) {
+  return LLVMConstStringInContext(Context, String_val(Str), string_length(Str),
+                                  1);
+}
+
+/* llcontext -> string -> llvalue */
+CAMLprim LLVMValueRef llvm_const_stringz(LLVMContextRef Context, value Str,
+                                         value NullTerminate) {
+  return LLVMConstStringInContext(Context, String_val(Str), string_length(Str),
+                                  0);
+}
+
+/* lltype -> llvalue array -> llvalue */
+CAMLprim LLVMValueRef llvm_const_array(LLVMTypeRef ElementTy,
+                                               value ElementVals) {
+  return LLVMConstArray(ElementTy, (LLVMValueRef*) Op_val(ElementVals),
+                        Wosize_val(ElementVals));
+}
+
+/* llcontext -> llvalue array -> llvalue */
+CAMLprim LLVMValueRef llvm_const_struct(LLVMContextRef C, value ElementVals) {
+  return LLVMConstStructInContext(C, (LLVMValueRef *) Op_val(ElementVals),
+                                  Wosize_val(ElementVals), 0);
+}
+
+/* llcontext -> llvalue array -> llvalue */
+CAMLprim LLVMValueRef llvm_const_packed_struct(LLVMContextRef C,
+                                               value ElementVals) {
+  return LLVMConstStructInContext(C, (LLVMValueRef *) Op_val(ElementVals),
+                                  Wosize_val(ElementVals), 1);
+}
+
+/* llvalue array -> llvalue */
+CAMLprim LLVMValueRef llvm_const_vector(value ElementVals) {
+  return LLVMConstVector((LLVMValueRef*) Op_val(ElementVals),
+                         Wosize_val(ElementVals));
+}
+
+/*--... Constant expressions ...............................................--*/
+
+/* Icmp.t -> llvalue -> llvalue -> llvalue */
+CAMLprim LLVMValueRef llvm_const_icmp(value Pred,
+                                      LLVMValueRef LHSConstant,
+                                      LLVMValueRef RHSConstant) {
+  return LLVMConstICmp(Int_val(Pred) + LLVMIntEQ, LHSConstant, RHSConstant);
+}
+
+/* Fcmp.t -> llvalue -> llvalue -> llvalue */
+CAMLprim LLVMValueRef llvm_const_fcmp(value Pred,
+                                      LLVMValueRef LHSConstant,
+                                      LLVMValueRef RHSConstant) {
+  return LLVMConstFCmp(Int_val(Pred), LHSConstant, RHSConstant);
+}
+
+/* llvalue -> llvalue array -> llvalue */
+CAMLprim LLVMValueRef llvm_const_gep(LLVMValueRef ConstantVal, value Indices) {
+  return LLVMConstGEP(ConstantVal, (LLVMValueRef*) Op_val(Indices),
+                      Wosize_val(Indices));
+}
+
+/* llvalue -> llvalue array -> llvalue */
+CAMLprim LLVMValueRef llvm_const_in_bounds_gep(LLVMValueRef ConstantVal,
+                                               value Indices) {
+  return LLVMConstInBoundsGEP(ConstantVal, (LLVMValueRef*) Op_val(Indices),
+                              Wosize_val(Indices));
+}
+
+/* llvalue -> int array -> llvalue */
+CAMLprim LLVMValueRef llvm_const_extractvalue(LLVMValueRef Aggregate,
+                                              value Indices) {
+  CAMLparam1(Indices);
+  int size = Wosize_val(Indices);
+  int i;
+  LLVMValueRef result;
+
+  unsigned* idxs = (unsigned*)malloc(size * sizeof(unsigned));
+  for (i = 0; i < size; i++) {
+    idxs[i] = Int_val(Field(Indices, i));
+  }
+
+  result = LLVMConstExtractValue(Aggregate, idxs, size);
+  free(idxs);
+  CAMLreturnT(LLVMValueRef, result);
+}
+
+/* llvalue -> llvalue -> int array -> llvalue */
+CAMLprim LLVMValueRef llvm_const_insertvalue(LLVMValueRef Aggregate,
+                                             LLVMValueRef Val, value Indices) {
+  CAMLparam1(Indices);
+  int size = Wosize_val(Indices);
+  int i;
+  LLVMValueRef result;
+
+  unsigned* idxs = (unsigned*)malloc(size * sizeof(unsigned));
+  for (i = 0; i < size; i++) {
+    idxs[i] = Int_val(Field(Indices, i));
+  }
+
+  result = LLVMConstInsertValue(Aggregate, Val, idxs, size);
+  free(idxs);
+  CAMLreturnT(LLVMValueRef, result);
+}
+
+/* lltype -> string -> string -> bool -> bool -> llvalue */
+CAMLprim LLVMValueRef llvm_const_inline_asm(LLVMTypeRef Ty, value Asm,
+                                     value Constraints, value HasSideEffects,
+                                     value IsAlignStack) {
+  return LLVMConstInlineAsm(Ty, String_val(Asm), String_val(Constraints),
+                            Bool_val(HasSideEffects), Bool_val(IsAlignStack));
+}
+
+/*--... Operations on global variables, functions, and aliases (globals) ...--*/
+
+/* llvalue -> bool */
+CAMLprim value llvm_is_declaration(LLVMValueRef Global) {
+  return Val_bool(LLVMIsDeclaration(Global));
+}
+
+/* llvalue -> Linkage.t */
+CAMLprim value llvm_linkage(LLVMValueRef Global) {
+  return Val_int(LLVMGetLinkage(Global));
+}
+
+/* Linkage.t -> llvalue -> unit */
+CAMLprim value llvm_set_linkage(value Linkage, LLVMValueRef Global) {
+  LLVMSetLinkage(Global, Int_val(Linkage));
+  return Val_unit;
+}
+
+/* llvalue -> string */
+CAMLprim value llvm_section(LLVMValueRef Global) {
+  return copy_string(LLVMGetSection(Global));
+}
+
+/* string -> llvalue -> unit */
+CAMLprim value llvm_set_section(value Section, LLVMValueRef Global) {
+  LLVMSetSection(Global, String_val(Section));
+  return Val_unit;
+}
+
+/* llvalue -> Visibility.t */
+CAMLprim value llvm_visibility(LLVMValueRef Global) {
+  return Val_int(LLVMGetVisibility(Global));
+}
+
+/* Visibility.t -> llvalue -> unit */
+CAMLprim value llvm_set_visibility(value Viz, LLVMValueRef Global) {
+  LLVMSetVisibility(Global, Int_val(Viz));
+  return Val_unit;
+}
+
+/* llvalue -> int */
+CAMLprim value llvm_alignment(LLVMValueRef Global) {
+  return Val_int(LLVMGetAlignment(Global));
+}
+
+/* int -> llvalue -> unit */
+CAMLprim value llvm_set_alignment(value Bytes, LLVMValueRef Global) {
+  LLVMSetAlignment(Global, Int_val(Bytes));
+  return Val_unit;
+}
+
+/*--... Operations on uses .................................................--*/
+
+/* llvalue -> lluse option */
+CAMLprim value llvm_use_begin(LLVMValueRef Val) {
+  CAMLparam0();
+  LLVMUseRef First;
+  if ((First = LLVMGetFirstUse(Val))) {
+    value Option = alloc(1, 0);
+    Field(Option, 0) = (value) First;
+    CAMLreturn(Option);
+  }
+  CAMLreturn(Val_int(0));
+}
+
+/* lluse -> lluse option */
+CAMLprim value llvm_use_succ(LLVMUseRef U) {
+  CAMLparam0();
+  LLVMUseRef Next;
+  if ((Next = LLVMGetNextUse(U))) {
+    value Option = alloc(1, 0);
+    Field(Option, 0) = (value) Next;
+    CAMLreturn(Option);
+  }
+  CAMLreturn(Val_int(0));
+}
+
+/* lluse -> llvalue */
+CAMLprim LLVMValueRef llvm_user(LLVMUseRef UR) {
+  return LLVMGetUser(UR);
+}
+
+/* lluse -> llvalue */
+CAMLprim LLVMValueRef llvm_used_value(LLVMUseRef UR) {
+  return LLVMGetUsedValue(UR);
+}
+
+/*--... Operations on global variables .....................................--*/
+
+DEFINE_ITERATORS(global, Global, LLVMModuleRef, LLVMValueRef,
+                 LLVMGetGlobalParent)
+
+/* lltype -> string -> llmodule -> llvalue */
+CAMLprim LLVMValueRef llvm_declare_global(LLVMTypeRef Ty, value Name,
+                                          LLVMModuleRef M) {
+  LLVMValueRef GlobalVar;
+  if ((GlobalVar = LLVMGetNamedGlobal(M, String_val(Name)))) {
+    if (LLVMGetElementType(LLVMTypeOf(GlobalVar)) != Ty)
+      return LLVMConstBitCast(GlobalVar, LLVMPointerType(Ty, 0));
+    return GlobalVar;
+  }
+  return LLVMAddGlobal(M, Ty, String_val(Name));
+}
+
+/* lltype -> string -> int -> llmodule -> llvalue */
+CAMLprim LLVMValueRef llvm_declare_qualified_global(LLVMTypeRef Ty, value Name,
+                                                    value AddressSpace,
+                                                    LLVMModuleRef M) {
+  LLVMValueRef GlobalVar;
+  if ((GlobalVar = LLVMGetNamedGlobal(M, String_val(Name)))) {
+    if (LLVMGetElementType(LLVMTypeOf(GlobalVar)) != Ty)
+      return LLVMConstBitCast(GlobalVar,
+                              LLVMPointerType(Ty, Int_val(AddressSpace)));
+    return GlobalVar;
+  }
+  return LLVMAddGlobal(M, Ty, String_val(Name));
+}
+
+/* string -> llmodule -> llvalue option */
+CAMLprim value llvm_lookup_global(value Name, LLVMModuleRef M) {
+  CAMLparam1(Name);
+  LLVMValueRef GlobalVar;
+  if ((GlobalVar = LLVMGetNamedGlobal(M, String_val(Name)))) {
+    value Option = alloc(1, 0);
+    Field(Option, 0) = (value) GlobalVar;
+    CAMLreturn(Option);
+  }
+  CAMLreturn(Val_int(0));
+}
+
+/* string -> llvalue -> llmodule -> llvalue */
+CAMLprim LLVMValueRef llvm_define_global(value Name, LLVMValueRef Initializer,
+                                         LLVMModuleRef M) {
+  LLVMValueRef GlobalVar = LLVMAddGlobal(M, LLVMTypeOf(Initializer),
+                                         String_val(Name));
+  LLVMSetInitializer(GlobalVar, Initializer);
+  return GlobalVar;
+}
+
+/* string -> llvalue -> int -> llmodule -> llvalue */
+CAMLprim LLVMValueRef llvm_define_qualified_global(value Name,
+                                                   LLVMValueRef Initializer,
+                                                   value AddressSpace,
+                                                   LLVMModuleRef M) {
+  LLVMValueRef GlobalVar = LLVMAddGlobalInAddressSpace(M,
+                                                       LLVMTypeOf(Initializer),
+                                                       String_val(Name),
+                                                       Int_val(AddressSpace));
+  LLVMSetInitializer(GlobalVar, Initializer);
+  return GlobalVar;
+}
+
+/* llvalue -> unit */
+CAMLprim value llvm_delete_global(LLVMValueRef GlobalVar) {
+  LLVMDeleteGlobal(GlobalVar);
+  return Val_unit;
+}
+
+/* llvalue -> llvalue -> unit */
+CAMLprim value llvm_set_initializer(LLVMValueRef ConstantVal,
+                                    LLVMValueRef GlobalVar) {
+  LLVMSetInitializer(GlobalVar, ConstantVal);
+  return Val_unit;
+}
+
+/* llvalue -> unit */
+CAMLprim value llvm_remove_initializer(LLVMValueRef GlobalVar) {
+  LLVMSetInitializer(GlobalVar, NULL);
+  return Val_unit;
+}
+
+/* llvalue -> bool */
+CAMLprim value llvm_is_thread_local(LLVMValueRef GlobalVar) {
+  return Val_bool(LLVMIsThreadLocal(GlobalVar));
+}
+
+/* bool -> llvalue -> unit */
+CAMLprim value llvm_set_thread_local(value IsThreadLocal,
+                                     LLVMValueRef GlobalVar) {
+  LLVMSetThreadLocal(GlobalVar, Bool_val(IsThreadLocal));
+  return Val_unit;
+}
+
+/* llvalue -> bool */
+CAMLprim value llvm_is_global_constant(LLVMValueRef GlobalVar) {
+  return Val_bool(LLVMIsGlobalConstant(GlobalVar));
+}
+
+/* bool -> llvalue -> unit */
+CAMLprim value llvm_set_global_constant(value Flag, LLVMValueRef GlobalVar) {
+  LLVMSetGlobalConstant(GlobalVar, Bool_val(Flag));
+  return Val_unit;
+}
+
+/*--... Operations on aliases ..............................................--*/
+
+CAMLprim LLVMValueRef llvm_add_alias(LLVMModuleRef M, LLVMTypeRef Ty,
+                                     LLVMValueRef Aliasee, value Name) {
+  return LLVMAddAlias(M, Ty, Aliasee, String_val(Name));
+}
+
+/*--... Operations on functions ............................................--*/
+
+DEFINE_ITERATORS(function, Function, LLVMModuleRef, LLVMValueRef,
+                 LLVMGetGlobalParent)
+
+/* string -> lltype -> llmodule -> llvalue */
+CAMLprim LLVMValueRef llvm_declare_function(value Name, LLVMTypeRef Ty,
+                                            LLVMModuleRef M) {
+  LLVMValueRef Fn;
+  if ((Fn = LLVMGetNamedFunction(M, String_val(Name)))) {
+    if (LLVMGetElementType(LLVMTypeOf(Fn)) != Ty)
+      return LLVMConstBitCast(Fn, LLVMPointerType(Ty, 0));
+    return Fn;
+  }
+  return LLVMAddFunction(M, String_val(Name), Ty);
+}
+
+/* string -> llmodule -> llvalue option */
+CAMLprim value llvm_lookup_function(value Name, LLVMModuleRef M) {
+  CAMLparam1(Name);
+  LLVMValueRef Fn;
+  if ((Fn = LLVMGetNamedFunction(M, String_val(Name)))) {
+    value Option = alloc(1, 0);
+    Field(Option, 0) = (value) Fn;
+    CAMLreturn(Option);
+  }
+  CAMLreturn(Val_int(0));
+}
+
+/* string -> lltype -> llmodule -> llvalue */
+CAMLprim LLVMValueRef llvm_define_function(value Name, LLVMTypeRef Ty,
+                                           LLVMModuleRef M) {
+  LLVMValueRef Fn = LLVMAddFunction(M, String_val(Name), Ty);
+  LLVMAppendBasicBlockInContext(LLVMGetTypeContext(Ty), Fn, "entry");
+  return Fn;
+}
+
+/* llvalue -> unit */
+CAMLprim value llvm_delete_function(LLVMValueRef Fn) {
+  LLVMDeleteFunction(Fn);
+  return Val_unit;
+}
+
+/* llvalue -> bool */
+CAMLprim value llvm_is_intrinsic(LLVMValueRef Fn) {
+  return Val_bool(LLVMGetIntrinsicID(Fn));
+}
+
+/* llvalue -> int */
+CAMLprim value llvm_function_call_conv(LLVMValueRef Fn) {
+  return Val_int(LLVMGetFunctionCallConv(Fn));
+}
+
+/* int -> llvalue -> unit */
+CAMLprim value llvm_set_function_call_conv(value Id, LLVMValueRef Fn) {
+  LLVMSetFunctionCallConv(Fn, Int_val(Id));
+  return Val_unit;
+}
+
+/* llvalue -> string option */
+CAMLprim value llvm_gc(LLVMValueRef Fn) {
+  const char *GC;
+  CAMLparam0();
+  CAMLlocal2(Name, Option);
+  
+  if ((GC = LLVMGetGC(Fn))) {
+    Name = copy_string(GC);
+    
+    Option = alloc(1, 0);
+    Field(Option, 0) = Name;
+    CAMLreturn(Option);
+  } else {
+    CAMLreturn(Val_int(0));
+  }
+}
+
+/* string option -> llvalue -> unit */
+CAMLprim value llvm_set_gc(value GC, LLVMValueRef Fn) {
+  LLVMSetGC(Fn, GC == Val_int(0)? 0 : String_val(Field(GC, 0)));
+  return Val_unit;
+}
+
+/* llvalue -> Attribute.t -> unit */
+CAMLprim value llvm_add_function_attr(LLVMValueRef Arg, value PA) {
+  LLVMAddFunctionAttr(Arg, Int_val(PA));
+  return Val_unit;
+}
+
+/* llvalue -> Attribute.t -> unit */
+CAMLprim value llvm_remove_function_attr(LLVMValueRef Arg, value PA) {
+  LLVMRemoveFunctionAttr(Arg, Int_val(PA));
+  return Val_unit;
+}
+/*--... Operations on parameters ...........................................--*/
+
+DEFINE_ITERATORS(param, Param, LLVMValueRef, LLVMValueRef, LLVMGetParamParent)
+
+/* llvalue -> int -> llvalue */
+CAMLprim LLVMValueRef llvm_param(LLVMValueRef Fn, value Index) {
+  return LLVMGetParam(Fn, Int_val(Index));
+}
+
+/* llvalue -> llvalue */
+CAMLprim value llvm_params(LLVMValueRef Fn) {
+  value Params = alloc(LLVMCountParams(Fn), 0);
+  LLVMGetParams(Fn, (LLVMValueRef *) Op_val(Params));
+  return Params;
+}
+
+/* llvalue -> Attribute.t -> unit */
+CAMLprim value llvm_add_param_attr(LLVMValueRef Arg, value PA) {
+  LLVMAddAttribute(Arg, Int_val(PA));
+  return Val_unit;
+}
+
+/* llvalue -> Attribute.t -> unit */
+CAMLprim value llvm_remove_param_attr(LLVMValueRef Arg, value PA) {
+  LLVMRemoveAttribute(Arg, Int_val(PA));
+  return Val_unit;
+}
+
+/* llvalue -> int -> unit */
+CAMLprim value llvm_set_param_alignment(LLVMValueRef Arg, value align) {
+  LLVMSetParamAlignment(Arg, Int_val(align));
+  return Val_unit;
+}
+
+/*--... Operations on basic blocks .........................................--*/
+
+DEFINE_ITERATORS(
+  block, BasicBlock, LLVMValueRef, LLVMBasicBlockRef, LLVMGetBasicBlockParent)
+
+/* llvalue -> llbasicblock array */
+CAMLprim value llvm_basic_blocks(LLVMValueRef Fn) {
+  value MLArray = alloc(LLVMCountBasicBlocks(Fn), 0);
+  LLVMGetBasicBlocks(Fn, (LLVMBasicBlockRef *) Op_val(MLArray));
+  return MLArray;
+}
+
+/* llbasicblock -> unit */
+CAMLprim value llvm_delete_block(LLVMBasicBlockRef BB) {
+  LLVMDeleteBasicBlock(BB);
+  return Val_unit;
+}
+
+/* string -> llvalue -> llbasicblock */
+CAMLprim LLVMBasicBlockRef llvm_append_block(LLVMContextRef Context, value Name,
+                                             LLVMValueRef Fn) {
+  return LLVMAppendBasicBlockInContext(Context, Fn, String_val(Name));
+}
+
+/* string -> llbasicblock -> llbasicblock */
+CAMLprim LLVMBasicBlockRef llvm_insert_block(LLVMContextRef Context, value Name,
+                                             LLVMBasicBlockRef BB) {
+  return LLVMInsertBasicBlockInContext(Context, BB, String_val(Name));
+}
+
+/* llvalue -> bool */
+CAMLprim value llvm_value_is_block(LLVMValueRef Val) {
+  return Val_bool(LLVMValueIsBasicBlock(Val));
+}
+
+/*--... Operations on instructions .........................................--*/
+
+DEFINE_ITERATORS(instr, Instruction, LLVMBasicBlockRef, LLVMValueRef,
+                 LLVMGetInstructionParent)
+
+
+/*--... Operations on call sites ...........................................--*/
+
+/* llvalue -> int */
+CAMLprim value llvm_instruction_call_conv(LLVMValueRef Inst) {
+  return Val_int(LLVMGetInstructionCallConv(Inst));
+}
+
+/* int -> llvalue -> unit */
+CAMLprim value llvm_set_instruction_call_conv(value CC, LLVMValueRef Inst) {
+  LLVMSetInstructionCallConv(Inst, Int_val(CC));
+  return Val_unit;
+}
+
+/* llvalue -> int -> Attribute.t -> unit */
+CAMLprim value llvm_add_instruction_param_attr(LLVMValueRef Instr,
+                                               value index,
+                                               value PA) {
+  LLVMAddInstrAttribute(Instr, Int_val(index), Int_val(PA));
+  return Val_unit;
+}
+
+/* llvalue -> int -> Attribute.t -> unit */
+CAMLprim value llvm_remove_instruction_param_attr(LLVMValueRef Instr,
+                                                  value index,
+                                                  value PA) {
+  LLVMRemoveInstrAttribute(Instr, Int_val(index), Int_val(PA));
+  return Val_unit;
+}
+
+/*--... Operations on call instructions (only) .............................--*/
+
+/* llvalue -> bool */
+CAMLprim value llvm_is_tail_call(LLVMValueRef CallInst) {
+  return Val_bool(LLVMIsTailCall(CallInst));
+}
+
+/* bool -> llvalue -> unit */
+CAMLprim value llvm_set_tail_call(value IsTailCall,
+                                  LLVMValueRef CallInst) {
+  LLVMSetTailCall(CallInst, Bool_val(IsTailCall));
+  return Val_unit;
+}
+
+/*--... Operations on phi nodes ............................................--*/
+
+/* (llvalue * llbasicblock) -> llvalue -> unit */
+CAMLprim value llvm_add_incoming(value Incoming, LLVMValueRef PhiNode) {
+  LLVMAddIncoming(PhiNode,
+                  (LLVMValueRef*) &Field(Incoming, 0),
+                  (LLVMBasicBlockRef*) &Field(Incoming, 1),
+                  1);
+  return Val_unit;
+}
+
+/* llvalue -> (llvalue * llbasicblock) list */
+CAMLprim value llvm_incoming(LLVMValueRef PhiNode) {
+  unsigned I;
+  CAMLparam0();
+  CAMLlocal3(Hd, Tl, Tmp);
+  
+  /* Build a tuple list of them. */
+  Tl = Val_int(0);
+  for (I = LLVMCountIncoming(PhiNode); I != 0; ) {
+    Hd = alloc(2, 0);
+    Store_field(Hd, 0, (value) LLVMGetIncomingValue(PhiNode, --I));
+    Store_field(Hd, 1, (value) LLVMGetIncomingBlock(PhiNode, I));
+    
+    Tmp = alloc(2, 0);
+    Store_field(Tmp, 0, Hd);
+    Store_field(Tmp, 1, Tl);
+    Tl = Tmp;
+  }
+  
+  CAMLreturn(Tl);
+}
+
+
+/*===-- Instruction builders ----------------------------------------------===*/
+
+#define Builder_val(v)  (*(LLVMBuilderRef *)(Data_custom_val(v)))
+
+static void llvm_finalize_builder(value B) {
+  LLVMDisposeBuilder(Builder_val(B));
+}
+
+static struct custom_operations builder_ops = {
+  (char *) "IRBuilder",
+  llvm_finalize_builder,
+  custom_compare_default,
+  custom_hash_default,
+  custom_serialize_default,
+  custom_deserialize_default
+};
+
+static value alloc_builder(LLVMBuilderRef B) {
+  value V = alloc_custom(&builder_ops, sizeof(LLVMBuilderRef), 0, 1);
+  Builder_val(V) = B;
+  return V;
+}
+
+/* llcontext -> llbuilder */
+CAMLprim value llvm_builder(LLVMContextRef C) {
+  return alloc_builder(LLVMCreateBuilderInContext(C));
+}
+
+/* (llbasicblock, llvalue) llpos -> llbuilder -> unit */
+CAMLprim value llvm_position_builder(value Pos, value B) {
+  if (Tag_val(Pos) == 0) {
+    LLVMBasicBlockRef BB = (LLVMBasicBlockRef) Op_val(Field(Pos, 0));
+    LLVMPositionBuilderAtEnd(Builder_val(B), BB);
+  } else {
+    LLVMValueRef I = (LLVMValueRef) Op_val(Field(Pos, 0));
+    LLVMPositionBuilderBefore(Builder_val(B), I);
+  }
+  return Val_unit;
+}
+
+/* llbuilder -> llbasicblock */
+CAMLprim LLVMBasicBlockRef llvm_insertion_block(value B) {
+  LLVMBasicBlockRef InsertBlock = LLVMGetInsertBlock(Builder_val(B));
+  if (!InsertBlock)
+    raise_not_found();
+  return InsertBlock;
+}
+
+/* llvalue -> string -> llbuilder -> unit */
+CAMLprim value llvm_insert_into_builder(LLVMValueRef I, value Name, value B) {
+  LLVMInsertIntoBuilderWithName(Builder_val(B), I, String_val(Name));
+  return Val_unit;
+}
+
+/*--... Metadata ...........................................................--*/
+
+/* llbuilder -> llvalue -> unit */
+CAMLprim value llvm_set_current_debug_location(value B, LLVMValueRef V) {
+  LLVMSetCurrentDebugLocation(Builder_val(B), V);
+  return Val_unit;
+}
+
+/* llbuilder -> unit */
+CAMLprim value llvm_clear_current_debug_location(value B) {
+  LLVMSetCurrentDebugLocation(Builder_val(B), NULL);
+  return Val_unit;
+}
+
+/* llbuilder -> llvalue option */
+CAMLprim value llvm_current_debug_location(value B) {
+  CAMLparam0();
+  LLVMValueRef L;
+  if ((L = LLVMGetCurrentDebugLocation(Builder_val(B)))) {
+    value Option = alloc(1, 0);
+    Field(Option, 0) = (value) L;
+    CAMLreturn(Option);
+  }
+  CAMLreturn(Val_int(0));
+}
+
+/* llbuilder -> llvalue -> unit */
+CAMLprim value llvm_set_inst_debug_location(value B, LLVMValueRef V) {
+  LLVMSetInstDebugLocation(Builder_val(B), V);
+  return Val_unit;
+}
+
+
+/*--... Terminators ........................................................--*/
+
+/* llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_ret_void(value B) {
+  return LLVMBuildRetVoid(Builder_val(B));
+}
+
+/* llvalue -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_ret(LLVMValueRef Val, value B) {
+  return LLVMBuildRet(Builder_val(B), Val);
+}
+
+/* llvalue array -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_aggregate_ret(value RetVals, value B) {
+  return LLVMBuildAggregateRet(Builder_val(B), (LLVMValueRef *) Op_val(RetVals),
+                               Wosize_val(RetVals));
+}
+
+/* llbasicblock -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_br(LLVMBasicBlockRef BB, value B) {
+  return LLVMBuildBr(Builder_val(B), BB);
+}
+
+/* llvalue -> llbasicblock -> llbasicblock -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_cond_br(LLVMValueRef If,
+                                         LLVMBasicBlockRef Then,
+                                         LLVMBasicBlockRef Else,
+                                         value B) {
+  return LLVMBuildCondBr(Builder_val(B), If, Then, Else);
+}
+
+/* llvalue -> llbasicblock -> int -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_switch(LLVMValueRef Of,
+                                        LLVMBasicBlockRef Else,
+                                        value EstimatedCount,
+                                        value B) {
+  return LLVMBuildSwitch(Builder_val(B), Of, Else, Int_val(EstimatedCount));
+}
+
+/* llvalue -> llvalue -> llbasicblock -> unit */
+CAMLprim value llvm_add_case(LLVMValueRef Switch, LLVMValueRef OnVal,
+                             LLVMBasicBlockRef Dest) {
+  LLVMAddCase(Switch, OnVal, Dest);
+  return Val_unit;
+}
+
+/* llvalue -> llbasicblock -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_indirect_br(LLVMValueRef Addr,
+                                             value EstimatedDests,
+                                             value B) {
+  return LLVMBuildIndirectBr(Builder_val(B), Addr, EstimatedDests);
+}
+
+/* llvalue -> llvalue -> llbasicblock -> unit */
+CAMLprim value llvm_add_destination(LLVMValueRef IndirectBr,
+                                    LLVMBasicBlockRef Dest) {
+  LLVMAddDestination(IndirectBr, Dest);
+  return Val_unit;
+}
+
+/* llvalue -> llvalue array -> llbasicblock -> llbasicblock -> string ->
+   llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_invoke_nat(LLVMValueRef Fn, value Args,
+                                            LLVMBasicBlockRef Then,
+                                            LLVMBasicBlockRef Catch,
+                                            value Name, value B) {
+  return LLVMBuildInvoke(Builder_val(B), Fn, (LLVMValueRef *) Op_val(Args),
+                         Wosize_val(Args), Then, Catch, String_val(Name));
+}
+
+/* llvalue -> llvalue array -> llbasicblock -> llbasicblock -> string ->
+   llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_invoke_bc(value Args[], int NumArgs) {
+  return llvm_build_invoke_nat((LLVMValueRef) Args[0], Args[1],
+                               (LLVMBasicBlockRef) Args[2],
+                               (LLVMBasicBlockRef) Args[3],
+                               Args[4], Args[5]);
+}
+
+/* llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_unwind(value B) {
+  return LLVMBuildUnwind(Builder_val(B));
+}
+
+/* llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_unreachable(value B) {
+  return LLVMBuildUnreachable(Builder_val(B));
+}
+
+/*--... Arithmetic .........................................................--*/
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_add(LLVMValueRef LHS, LLVMValueRef RHS,
+                                     value Name, value B) {
+  return LLVMBuildAdd(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_nsw_add(LLVMValueRef LHS, LLVMValueRef RHS,
+                                         value Name, value B) {
+  return LLVMBuildNSWAdd(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_nuw_add(LLVMValueRef LHS, LLVMValueRef RHS,
+                                         value Name, value B) {
+  return LLVMBuildNUWAdd(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_fadd(LLVMValueRef LHS, LLVMValueRef RHS,
+                                      value Name, value B) {
+  return LLVMBuildFAdd(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_sub(LLVMValueRef LHS, LLVMValueRef RHS,
+                                     value Name, value B) {
+  return LLVMBuildSub(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_nsw_sub(LLVMValueRef LHS, LLVMValueRef RHS,
+                                         value Name, value B) {
+  return LLVMBuildNSWSub(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_nuw_sub(LLVMValueRef LHS, LLVMValueRef RHS,
+                                         value Name, value B) {
+  return LLVMBuildNUWSub(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_fsub(LLVMValueRef LHS, LLVMValueRef RHS,
+                                      value Name, value B) {
+  return LLVMBuildFSub(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_mul(LLVMValueRef LHS, LLVMValueRef RHS,
+                                     value Name, value B) {
+  return LLVMBuildMul(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_nsw_mul(LLVMValueRef LHS, LLVMValueRef RHS,
+                                         value Name, value B) {
+  return LLVMBuildNSWMul(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_nuw_mul(LLVMValueRef LHS, LLVMValueRef RHS,
+                                         value Name, value B) {
+  return LLVMBuildNUWMul(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_fmul(LLVMValueRef LHS, LLVMValueRef RHS,
+                                      value Name, value B) {
+  return LLVMBuildFMul(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_udiv(LLVMValueRef LHS, LLVMValueRef RHS,
+                                      value Name, value B) {
+  return LLVMBuildUDiv(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_sdiv(LLVMValueRef LHS, LLVMValueRef RHS,
+                                      value Name, value B) {
+  return LLVMBuildSDiv(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_exact_sdiv(LLVMValueRef LHS, LLVMValueRef RHS,
+                                            value Name, value B) {
+  return LLVMBuildExactSDiv(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_fdiv(LLVMValueRef LHS, LLVMValueRef RHS,
+                                      value Name, value B) {
+  return LLVMBuildFDiv(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_urem(LLVMValueRef LHS, LLVMValueRef RHS,
+                                      value Name, value B) {
+  return LLVMBuildURem(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_srem(LLVMValueRef LHS, LLVMValueRef RHS,
+                                      value Name, value B) {
+  return LLVMBuildSRem(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_frem(LLVMValueRef LHS, LLVMValueRef RHS,
+                                      value Name, value B) {
+  return LLVMBuildFRem(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_shl(LLVMValueRef LHS, LLVMValueRef RHS,
+                                     value Name, value B) {
+  return LLVMBuildShl(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_lshr(LLVMValueRef LHS, LLVMValueRef RHS,
+                                      value Name, value B) {
+  return LLVMBuildLShr(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_ashr(LLVMValueRef LHS, LLVMValueRef RHS,
+                                      value Name, value B) {
+  return LLVMBuildAShr(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_and(LLVMValueRef LHS, LLVMValueRef RHS,
+                                     value Name, value B) {
+  return LLVMBuildAnd(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_or(LLVMValueRef LHS, LLVMValueRef RHS,
+                                    value Name, value B) {
+  return LLVMBuildOr(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_xor(LLVMValueRef LHS, LLVMValueRef RHS,
+                                     value Name, value B) {
+  return LLVMBuildXor(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+/* llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_neg(LLVMValueRef X,
+                                     value Name, value B) {
+  return LLVMBuildNeg(Builder_val(B), X, String_val(Name));
+}
+
+/* llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_nsw_neg(LLVMValueRef X,
+                                         value Name, value B) {
+  return LLVMBuildNSWNeg(Builder_val(B), X, String_val(Name));
+}
+
+/* llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_nuw_neg(LLVMValueRef X,
+                                         value Name, value B) {
+  return LLVMBuildNUWNeg(Builder_val(B), X, String_val(Name));
+}
+
+/* llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_fneg(LLVMValueRef X,
+                                     value Name, value B) {
+  return LLVMBuildFNeg(Builder_val(B), X, String_val(Name));
+}
+
+/* llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_not(LLVMValueRef X,
+                                     value Name, value B) {
+  return LLVMBuildNot(Builder_val(B), X, String_val(Name));
+}
+
+/*--... Memory .............................................................--*/
+
+/* lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_alloca(LLVMTypeRef Ty,
+                                        value Name, value B) {
+  return LLVMBuildAlloca(Builder_val(B), Ty, String_val(Name));
+}
+
+/* lltype -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_array_alloca(LLVMTypeRef Ty, LLVMValueRef Size,
+                                              value Name, value B) {
+  return LLVMBuildArrayAlloca(Builder_val(B), Ty, Size, String_val(Name));
+}
+
+/* llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_load(LLVMValueRef Pointer,
+                                      value Name, value B) {
+  return LLVMBuildLoad(Builder_val(B), Pointer, String_val(Name));
+}
+
+/* llvalue -> llvalue -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_store(LLVMValueRef Value, LLVMValueRef Pointer,
+                                       value B) {
+  return LLVMBuildStore(Builder_val(B), Value, Pointer);
+}
+
+/* llvalue -> llvalue array -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_gep(LLVMValueRef Pointer, value Indices,
+                                     value Name, value B) {
+  return LLVMBuildGEP(Builder_val(B), Pointer,
+                      (LLVMValueRef *) Op_val(Indices), Wosize_val(Indices),
+                      String_val(Name));
+}
+
+/* llvalue -> llvalue array -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_in_bounds_gep(LLVMValueRef Pointer,
+                                               value Indices, value Name,
+                                               value B) {
+  return LLVMBuildInBoundsGEP(Builder_val(B), Pointer,
+                              (LLVMValueRef *) Op_val(Indices),
+                              Wosize_val(Indices), String_val(Name));
+}
+
+/* llvalue -> int -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_struct_gep(LLVMValueRef Pointer,
+                                               value Index, value Name,
+                                               value B) {
+  return LLVMBuildStructGEP(Builder_val(B), Pointer,
+                              Int_val(Index), String_val(Name));
+}
+
+/* string -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_global_string(value Str, value Name, value B) {
+  return LLVMBuildGlobalString(Builder_val(B), String_val(Str),
+                               String_val(Name));
+}
+
+/* string -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_global_stringptr(value Str, value Name,
+                                                  value B) {
+  return LLVMBuildGlobalStringPtr(Builder_val(B), String_val(Str),
+                                  String_val(Name));
+}
+
+/*--... Casts ..............................................................--*/
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_trunc(LLVMValueRef X, LLVMTypeRef Ty,
+                                       value Name, value B) {
+  return LLVMBuildTrunc(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_zext(LLVMValueRef X, LLVMTypeRef Ty,
+                                      value Name, value B) {
+  return LLVMBuildZExt(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_sext(LLVMValueRef X, LLVMTypeRef Ty,
+                                      value Name, value B) {
+  return LLVMBuildSExt(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_fptoui(LLVMValueRef X, LLVMTypeRef Ty,
+                                        value Name, value B) {
+  return LLVMBuildFPToUI(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_fptosi(LLVMValueRef X, LLVMTypeRef Ty,
+                                        value Name, value B) {
+  return LLVMBuildFPToSI(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_uitofp(LLVMValueRef X, LLVMTypeRef Ty,
+                                        value Name, value B) {
+  return LLVMBuildUIToFP(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_sitofp(LLVMValueRef X, LLVMTypeRef Ty,
+                                        value Name, value B) {
+  return LLVMBuildSIToFP(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_fptrunc(LLVMValueRef X, LLVMTypeRef Ty,
+                                         value Name, value B) {
+  return LLVMBuildFPTrunc(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_fpext(LLVMValueRef X, LLVMTypeRef Ty,
+                                       value Name, value B) {
+  return LLVMBuildFPExt(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_prttoint(LLVMValueRef X, LLVMTypeRef Ty,
+                                          value Name, value B) {
+  return LLVMBuildPtrToInt(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_inttoptr(LLVMValueRef X, LLVMTypeRef Ty,
+                                          value Name, value B) {
+  return LLVMBuildIntToPtr(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_bitcast(LLVMValueRef X, LLVMTypeRef Ty,
+                                         value Name, value B) {
+  return LLVMBuildBitCast(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_zext_or_bitcast(LLVMValueRef X, LLVMTypeRef Ty,
+                                                 value Name, value B) {
+  return LLVMBuildZExtOrBitCast(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_sext_or_bitcast(LLVMValueRef X, LLVMTypeRef Ty,
+                                                 value Name, value B) {
+  return LLVMBuildSExtOrBitCast(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_trunc_or_bitcast(LLVMValueRef X,
+                                                  LLVMTypeRef Ty, value Name,
+                                                  value B) {
+  return LLVMBuildTruncOrBitCast(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_pointercast(LLVMValueRef X, LLVMTypeRef Ty,
+                                             value Name, value B) {
+  return LLVMBuildPointerCast(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_intcast(LLVMValueRef X, LLVMTypeRef Ty,
+                                         value Name, value B) {
+  return LLVMBuildIntCast(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_fpcast(LLVMValueRef X, LLVMTypeRef Ty,
+                                        value Name, value B) {
+  return LLVMBuildFPCast(Builder_val(B), X, Ty, String_val(Name));
+}
+
+/*--... Comparisons ........................................................--*/
+
+/* Icmp.t -> llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_icmp(value Pred,
+                                      LLVMValueRef LHS, LLVMValueRef RHS,
+                                      value Name, value B) {
+  return LLVMBuildICmp(Builder_val(B), Int_val(Pred) + LLVMIntEQ, LHS, RHS,
+                       String_val(Name));
+}
+
+/* Fcmp.t -> llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_fcmp(value Pred,
+                                      LLVMValueRef LHS, LLVMValueRef RHS,
+                                      value Name, value B) {
+  return LLVMBuildFCmp(Builder_val(B), Int_val(Pred), LHS, RHS,
+                       String_val(Name));
+}
+
+/*--... Miscellaneous instructions .........................................--*/
+
+/* (llvalue * llbasicblock) list -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_phi(value Incoming, value Name, value B) {
+  value Hd, Tl;
+  LLVMValueRef FirstValue, PhiNode;
+  
+  assert(Incoming != Val_int(0) && "Empty list passed to Llvm.build_phi!");
+  
+  Hd = Field(Incoming, 0);
+  FirstValue = (LLVMValueRef) Field(Hd, 0);
+  PhiNode = LLVMBuildPhi(Builder_val(B), LLVMTypeOf(FirstValue),
+                         String_val(Name));
+
+  for (Tl = Incoming; Tl != Val_int(0); Tl = Field(Tl, 1)) {
+    value Hd = Field(Tl, 0);
+    LLVMAddIncoming(PhiNode, (LLVMValueRef*) &Field(Hd, 0),
+                    (LLVMBasicBlockRef*) &Field(Hd, 1), 1);
+  }
+  
+  return PhiNode;
+}
+
+/* llvalue -> llvalue array -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_call(LLVMValueRef Fn, value Params,
+                                      value Name, value B) {
+  return LLVMBuildCall(Builder_val(B), Fn, (LLVMValueRef *) Op_val(Params),
+                       Wosize_val(Params), String_val(Name));
+}
+
+/* llvalue -> llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_select(LLVMValueRef If,
+                                        LLVMValueRef Then, LLVMValueRef Else,
+                                        value Name, value B) {
+  return LLVMBuildSelect(Builder_val(B), If, Then, Else, String_val(Name));
+}
+
+/* llvalue -> lltype -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_va_arg(LLVMValueRef List, LLVMTypeRef Ty,
+                                        value Name, value B) {
+  return LLVMBuildVAArg(Builder_val(B), List, Ty, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_extractelement(LLVMValueRef Vec,
+                                                LLVMValueRef Idx,
+                                                value Name, value B) {
+  return LLVMBuildExtractElement(Builder_val(B), Vec, Idx, String_val(Name));
+}
+
+/* llvalue -> llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_insertelement(LLVMValueRef Vec,
+                                               LLVMValueRef Element,
+                                               LLVMValueRef Idx,
+                                               value Name, value B) {
+  return LLVMBuildInsertElement(Builder_val(B), Vec, Element, Idx, 
+                                String_val(Name));
+}
+
+/* llvalue -> llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_shufflevector(LLVMValueRef V1, LLVMValueRef V2,
+                                               LLVMValueRef Mask,
+                                               value Name, value B) {
+  return LLVMBuildShuffleVector(Builder_val(B), V1, V2, Mask, String_val(Name));
+}
+
+/* llvalue -> int -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_extractvalue(LLVMValueRef Aggregate,
+                                              value Idx, value Name, value B) {
+  return LLVMBuildExtractValue(Builder_val(B), Aggregate, Int_val(Idx),
+                               String_val(Name));
+}
+
+/* llvalue -> llvalue -> int -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_insertvalue(LLVMValueRef Aggregate,
+                                             LLVMValueRef Val, value Idx,
+                                             value Name, value B) {
+  return LLVMBuildInsertValue(Builder_val(B), Aggregate, Val, Int_val(Idx),
+                              String_val(Name));
+}
+
+/* llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_is_null(LLVMValueRef Val, value Name,
+                                         value B) {
+  return LLVMBuildIsNull(Builder_val(B), Val, String_val(Name));
+}
+
+/* llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_is_not_null(LLVMValueRef Val, value Name,
+                                             value B) {
+  return LLVMBuildIsNotNull(Builder_val(B), Val, String_val(Name));
+}
+
+/* llvalue -> llvalue -> string -> llbuilder -> llvalue */
+CAMLprim LLVMValueRef llvm_build_ptrdiff(LLVMValueRef LHS, LLVMValueRef RHS,
+                                         value Name, value B) {
+  return LLVMBuildPtrDiff(Builder_val(B), LHS, RHS, String_val(Name));
+}
+
+
+/*===-- Memory buffers ----------------------------------------------------===*/
+
+/* string -> llmemorybuffer
+   raises IoError msg on error */
+CAMLprim value llvm_memorybuffer_of_file(value Path) {
+  CAMLparam1(Path);
+  char *Message;
+  LLVMMemoryBufferRef MemBuf;
+  
+  if (LLVMCreateMemoryBufferWithContentsOfFile(String_val(Path),
+                                               &MemBuf, &Message))
+    llvm_raise(llvm_ioerror_exn, Message);
+  
+  CAMLreturn((value) MemBuf);
+}
+
+/* unit -> llmemorybuffer
+   raises IoError msg on error */
+CAMLprim LLVMMemoryBufferRef llvm_memorybuffer_of_stdin(value Unit) {
+  char *Message;
+  LLVMMemoryBufferRef MemBuf;
+  
+  if (LLVMCreateMemoryBufferWithSTDIN(&MemBuf, &Message))
+    llvm_raise(llvm_ioerror_exn, Message);
+  
+  return MemBuf;
+}
+
+/* llmemorybuffer -> unit */
+CAMLprim value llvm_memorybuffer_dispose(LLVMMemoryBufferRef MemBuf) {
+  LLVMDisposeMemoryBuffer(MemBuf);
+  return Val_unit;
+}
+
+/*===-- Pass Managers -----------------------------------------------------===*/
+
+/* unit -> [ `Module ] PassManager.t */
+CAMLprim LLVMPassManagerRef llvm_passmanager_create(value Unit) {
+  return LLVMCreatePassManager();
+}
+
+/* llmodule -> [ `Function ] PassManager.t -> bool */
+CAMLprim value llvm_passmanager_run_module(LLVMModuleRef M,
+                                           LLVMPassManagerRef PM) {
+  return Val_bool(LLVMRunPassManager(PM, M));
+}
+
+/* [ `Function ] PassManager.t -> bool */
+CAMLprim value llvm_passmanager_initialize(LLVMPassManagerRef FPM) {
+  return Val_bool(LLVMInitializeFunctionPassManager(FPM));
+}
+
+/* llvalue -> [ `Function ] PassManager.t -> bool */
+CAMLprim value llvm_passmanager_run_function(LLVMValueRef F,
+                                             LLVMPassManagerRef FPM) {
+  return Val_bool(LLVMRunFunctionPassManager(FPM, F));
+}
+
+/* [ `Function ] PassManager.t -> bool */
+CAMLprim value llvm_passmanager_finalize(LLVMPassManagerRef FPM) {
+  return Val_bool(LLVMFinalizeFunctionPassManager(FPM));
+}
+
+/* PassManager.any PassManager.t -> unit */
+CAMLprim value llvm_passmanager_dispose(LLVMPassManagerRef PM) {
+  LLVMDisposePassManager(PM);
+  return Val_unit;
+}
diff --git a/final/bindings/ocaml/target/Makefile b/final/bindings/ocaml/target/Makefile
new file mode 100644
index 00000000000..3c48cd8fc20
--- /dev/null
+++ b/final/bindings/ocaml/target/Makefile
@@ -0,0 +1,19 @@
+##===- bindings/ocaml/target/Makefile ----------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+#
+# This is the makefile for the Objective Caml Llvm_target interface.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../../..
+LIBRARYNAME := llvm_target
+UsedComponents := target
+UsedOcamlInterfaces := llvm
+
+include ../Makefile.ocaml
diff --git a/final/bindings/ocaml/target/llvm_target.ml b/final/bindings/ocaml/target/llvm_target.ml
new file mode 100644
index 00000000000..ea5341d5e8b
--- /dev/null
+++ b/final/bindings/ocaml/target/llvm_target.ml
@@ -0,0 +1,44 @@
+(*===-- llvm_target.ml - LLVM Ocaml Interface ------------------*- OCaml -*-===*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*)
+
+module Endian = struct
+  type t =
+  | Big
+  | Little
+end
+
+module TargetData = struct
+  type t
+
+  external create : string -> t = "llvm_targetdata_create"
+  external add : t -> [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+               = "llvm_targetdata_add"
+  external as_string : t -> string = "llvm_targetdata_as_string"
+  external invalidate_struct_layout : t -> Llvm.lltype -> unit
+                                    = "llvm_targetdata_invalidate_struct_layout"
+  external dispose : t -> unit = "llvm_targetdata_dispose"
+end
+
+external byte_order : TargetData.t -> Endian.t = "llvm_byte_order"
+external pointer_size : TargetData.t -> int = "llvm_pointer_size"
+external intptr_type : TargetData.t -> Llvm.lltype = "LLVMIntPtrType"
+external size_in_bits : TargetData.t -> Llvm.lltype -> Int64.t
+                      = "llvm_size_in_bits"
+external store_size : TargetData.t -> Llvm.lltype -> Int64.t = "llvm_store_size"
+external abi_size : TargetData.t -> Llvm.lltype -> Int64.t = "llvm_abi_size"
+external abi_align : TargetData.t -> Llvm.lltype -> int = "llvm_abi_align"
+external stack_align : TargetData.t -> Llvm.lltype -> int = "llvm_stack_align"
+external preferred_align : TargetData.t -> Llvm.lltype -> int
+                         = "llvm_preferred_align"
+external preferred_align_of_global : TargetData.t -> Llvm.llvalue -> int
+                                   = "llvm_preferred_align_of_global"
+external element_at_offset : TargetData.t -> Llvm.lltype -> Int64.t -> int
+                           = "llvm_element_at_offset"
+external offset_of_element : TargetData.t -> Llvm.lltype -> int -> Int64.t
+                           = "llvm_offset_of_element"
diff --git a/final/bindings/ocaml/target/llvm_target.mli b/final/bindings/ocaml/target/llvm_target.mli
new file mode 100644
index 00000000000..a82e1b684fa
--- /dev/null
+++ b/final/bindings/ocaml/target/llvm_target.mli
@@ -0,0 +1,102 @@
+(*===-- llvm_target.mli - LLVM Ocaml Interface -----------------*- OCaml -*-===*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*)
+
+(** Target Information.
+
+    This interface provides an ocaml API for LLVM target information,
+    the classes in the Target library. *)
+
+module Endian : sig
+  type t =
+  | Big
+  | Little
+end
+
+module TargetData : sig
+  type t
+
+  (** [TargetData.create rep] parses the target data string representation [rep].
+      See the constructor llvm::TargetData::TargetData. *)
+  external create : string -> t = "llvm_targetdata_create"
+
+  (** [add_target_data td pm] adds the target data [td] to the pass manager [pm].
+      Does not take ownership of the target data.
+      See the method llvm::PassManagerBase::add. *)
+  external add : t -> [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+               = "llvm_targetdata_add"
+
+  (** [as_string td] is the string representation of the target data [td].
+      See the constructor llvm::TargetData::TargetData. *)
+  external as_string : t -> string = "llvm_targetdata_as_string"
+
+  (** Struct layouts are speculatively cached. If a TargetDataRef is alive when
+      types are being refined and removed, this method must be called whenever a
+      struct type is removed to avoid a dangling pointer in this cache.
+      See the method llvm::TargetData::InvalidateStructLayoutInfo. *)
+  external invalidate_struct_layout : t -> Llvm.lltype -> unit
+                                    = "llvm_targetdata_invalidate_struct_layout"
+
+  (** Deallocates a TargetData.
+      See the destructor llvm::TargetData::~TargetData. *)
+  external dispose : t -> unit = "llvm_targetdata_dispose"
+end
+
+(** Returns the byte order of a target, either LLVMBigEndian or
+    LLVMLittleEndian.
+    See the method llvm::TargetData::isLittleEndian. *)
+external byte_order : TargetData.t -> Endian.t = "llvm_byte_order"
+
+(** Returns the pointer size in bytes for a target.
+    See the method llvm::TargetData::getPointerSize. *)
+external pointer_size : TargetData.t -> int = "llvm_pointer_size"
+
+(** Returns the integer type that is the same size as a pointer on a target.
+    See the method llvm::TargetData::getIntPtrType. *)
+external intptr_type : TargetData.t -> Llvm.lltype = "LLVMIntPtrType"
+
+(** Computes the size of a type in bytes for a target.
+    See the method llvm::TargetData::getTypeSizeInBits. *)
+external size_in_bits : TargetData.t -> Llvm.lltype -> Int64.t
+                      = "llvm_size_in_bits"
+
+(** Computes the storage size of a type in bytes for a target.
+    See the method llvm::TargetData::getTypeStoreSize. *)
+external store_size : TargetData.t -> Llvm.lltype -> Int64.t = "llvm_store_size"
+
+(** Computes the ABI size of a type in bytes for a target.
+    See the method llvm::TargetData::getTypeAllocSize. *)
+external abi_size : TargetData.t -> Llvm.lltype -> Int64.t = "llvm_abi_size"
+
+(** Computes the ABI alignment of a type in bytes for a target.
+    See the method llvm::TargetData::getTypeABISize. *)
+external abi_align : TargetData.t -> Llvm.lltype -> int = "llvm_abi_align"
+
+(** Computes the call frame alignment of a type in bytes for a target.
+    See the method llvm::TargetData::getTypeABISize. *)
+external stack_align : TargetData.t -> Llvm.lltype -> int = "llvm_stack_align"
+
+(** Computes the preferred alignment of a type in bytes for a target.
+    See the method llvm::TargetData::getTypeABISize. *)
+external preferred_align : TargetData.t -> Llvm.lltype -> int
+                         = "llvm_preferred_align"
+
+(** Computes the preferred alignment of a global variable in bytes for a target.
+    See the method llvm::TargetData::getPreferredAlignment. *)
+external preferred_align_of_global : TargetData.t -> Llvm.llvalue -> int
+                                   = "llvm_preferred_align_of_global"
+
+(** Computes the structure element that contains the byte offset for a target.
+    See the method llvm::StructLayout::getElementContainingOffset. *)
+external element_at_offset : TargetData.t -> Llvm.lltype -> Int64.t -> int
+                           = "llvm_element_at_offset"
+
+(** Computes the byte offset of the indexed struct element for a target.
+    See the method llvm::StructLayout::getElementContainingOffset. *)
+external offset_of_element : TargetData.t -> Llvm.lltype -> int -> Int64.t
+                           = "llvm_offset_of_element"
diff --git a/final/bindings/ocaml/target/target_ocaml.c b/final/bindings/ocaml/target/target_ocaml.c
new file mode 100644
index 00000000000..cc20e8187a7
--- /dev/null
+++ b/final/bindings/ocaml/target/target_ocaml.c
@@ -0,0 +1,109 @@
+/*===-- target_ocaml.c - LLVM Ocaml Glue ------------------------*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file glues LLVM's ocaml interface to its C interface. These functions *|
+|* are by and large transparent wrappers to the corresponding C functions.    *|
+|*                                                                            *|
+|* Note that these functions intentionally take liberties with the CAMLparamX *|
+|* macros, since most of the parameters are not GC heap objects.              *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#include "llvm-c/Target.h"
+#include "caml/alloc.h"
+
+/* string -> TargetData.t */
+CAMLprim LLVMTargetDataRef llvm_targetdata_create(value StringRep) {
+  return LLVMCreateTargetData(String_val(StringRep));
+}
+
+/* TargetData.t -> [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_targetdata_add(LLVMTargetDataRef TD, LLVMPassManagerRef PM){
+  LLVMAddTargetData(TD, PM);
+  return Val_unit;
+}
+
+/* TargetData.t -> string */
+CAMLprim value llvm_targetdata_as_string(LLVMTargetDataRef TD) {
+  char *StringRep = LLVMCopyStringRepOfTargetData(TD);
+  value Copy = copy_string(StringRep);
+  LLVMDisposeMessage(StringRep);
+  return Copy;
+}
+
+/* TargetData.t -> Llvm.lltype -> unit */
+CAMLprim value llvm_targetdata_invalidate_struct_layout(LLVMTargetDataRef TD,
+                                                        LLVMTypeRef Ty) {
+  LLVMInvalidateStructLayout(TD, Ty);
+  return Val_unit;
+}
+
+/* TargetData.t -> unit */
+CAMLprim value llvm_targetdata_dispose(LLVMTargetDataRef TD) {
+  LLVMDisposeTargetData(TD);
+  return Val_unit;
+}
+
+/* TargetData.t -> Endian.t */
+CAMLprim value llvm_byte_order(LLVMTargetDataRef TD) {
+  return Val_int(LLVMByteOrder(TD));
+}
+
+/* TargetData.t -> int */
+CAMLprim value llvm_pointer_size(LLVMTargetDataRef TD) {
+  return Val_int(LLVMPointerSize(TD));
+}
+
+/* TargetData.t -> Llvm.lltype -> Int64.t */
+CAMLprim value llvm_size_in_bits(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return caml_copy_int64(LLVMSizeOfTypeInBits(TD, Ty));
+}
+
+/* TargetData.t -> Llvm.lltype -> Int64.t */
+CAMLprim value llvm_store_size(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return caml_copy_int64(LLVMStoreSizeOfType(TD, Ty));
+}
+
+/* TargetData.t -> Llvm.lltype -> Int64.t */
+CAMLprim value llvm_abi_size(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return caml_copy_int64(LLVMABISizeOfType(TD, Ty));
+}
+
+/* TargetData.t -> Llvm.lltype -> int */
+CAMLprim value llvm_abi_align(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return Val_int(LLVMABIAlignmentOfType(TD, Ty));
+}
+
+/* TargetData.t -> Llvm.lltype -> int */
+CAMLprim value llvm_stack_align(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return Val_int(LLVMCallFrameAlignmentOfType(TD, Ty));
+}
+
+/* TargetData.t -> Llvm.lltype -> int */
+CAMLprim value llvm_preferred_align(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return Val_int(LLVMPreferredAlignmentOfType(TD, Ty));
+}
+
+/* TargetData.t -> Llvm.llvalue -> int */
+CAMLprim value llvm_preferred_align_of_global(LLVMTargetDataRef TD,
+                                              LLVMValueRef GlobalVar) {
+  return Val_int(LLVMPreferredAlignmentOfGlobal(TD, GlobalVar));
+}
+
+/* TargetData.t -> Llvm.lltype -> Int64.t -> int */
+CAMLprim value llvm_element_at_offset(LLVMTargetDataRef TD, LLVMTypeRef Ty,
+                                      value Offset) {
+  return Val_int(LLVMElementAtOffset(TD, Ty, Int_val(Offset)));
+}
+
+/* TargetData.t -> Llvm.lltype -> int -> Int64.t */
+CAMLprim value llvm_offset_of_element(LLVMTargetDataRef TD, LLVMTypeRef Ty,
+                                      value Index) {
+  return caml_copy_int64(LLVMOffsetOfElement(TD, Ty, Int_val(Index)));
+}
diff --git a/final/bindings/ocaml/transforms/Makefile b/final/bindings/ocaml/transforms/Makefile
new file mode 100644
index 00000000000..95b00c8d74a
--- /dev/null
+++ b/final/bindings/ocaml/transforms/Makefile
@@ -0,0 +1,18 @@
+##===- bindings/ocaml/transforms/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../../..
+DIRS = scalar
+
+ocamldoc:
+	$(Verb) for i in $(DIRS) ; do \
+		$(MAKE) -C $$i ocamldoc; \
+	done
+
+include $(LEVEL)/Makefile.common
diff --git a/final/bindings/ocaml/transforms/scalar/Makefile b/final/bindings/ocaml/transforms/scalar/Makefile
new file mode 100644
index 00000000000..cbaffa4ea7a
--- /dev/null
+++ b/final/bindings/ocaml/transforms/scalar/Makefile
@@ -0,0 +1,20 @@
+##===- bindings/ocaml/transforms/scalar/Makefile -----------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+#
+# This is the makefile for the Objective Caml Llvm_scalar_opts interface.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../../../..
+LIBRARYNAME := llvm_scalar_opts
+DONT_BUILD_RELINKED := 1
+UsedComponents := scalaropts
+UsedOcamlInterfaces := llvm
+
+include ../../Makefile.ocaml
diff --git a/final/bindings/ocaml/transforms/scalar/llvm_scalar_opts.ml b/final/bindings/ocaml/transforms/scalar/llvm_scalar_opts.ml
new file mode 100644
index 00000000000..276e1182d05
--- /dev/null
+++ b/final/bindings/ocaml/transforms/scalar/llvm_scalar_opts.ml
@@ -0,0 +1,72 @@
+(*===-- llvm_scalar_opts.ml - LLVM Ocaml Interface -------------*- OCaml -*-===*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*)
+
+external add_constant_propagation : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                    -> unit
+                                  = "llvm_add_constant_propagation"
+external add_sccp : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                  = "llvm_add_sccp"
+external add_dead_store_elimination : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                      -> unit
+                                    = "llvm_add_dead_store_elimination"
+external add_aggressive_dce : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                            = "llvm_add_aggressive_dce"
+external
+add_scalar_repl_aggregation : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                            = "llvm_add_scalar_repl_aggregation"
+external add_ind_var_simplification : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                      -> unit
+                                    = "llvm_add_ind_var_simplification"
+external
+add_instruction_combination : [<Llvm.PassManager.any] Llvm.PassManager.t
+                              -> unit
+                            = "llvm_add_instruction_combination"
+external add_licm : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_licm"
+external add_loop_unswitch : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_loop_unswitch"
+external add_loop_unroll : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_loop_unroll"
+external add_loop_rotation : [<Llvm.PassManager.any] Llvm.PassManager.t
+                             -> unit
+                           = "llvm_add_loop_rotation"
+external
+add_memory_to_register_promotion : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                   -> unit
+                                 = "llvm_add_memory_to_register_promotion"
+external
+add_memory_to_register_demotion : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                  -> unit
+                                = "llvm_add_memory_to_register_demotion"
+external add_reassociation : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                           = "llvm_add_reassociation"
+external add_jump_threading : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_jump_threading"
+external add_cfg_simplification : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                  -> unit
+                                = "llvm_add_cfg_simplification"
+external
+add_tail_call_elimination : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                          = "llvm_add_tail_call_elimination" 
+external add_gvn : [<Llvm.PassManager.any] Llvm.PassManager.t
+                   -> unit
+                 = "llvm_add_gvn"
+external add_memcpy_opt : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_memcpy_opt"
+external add_loop_deletion : [<Llvm.PassManager.any] Llvm.PassManager.t
+                             -> unit
+                           = "llvm_add_loop_deletion"
+external
+add_lib_call_simplification : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                            = "llvm_add_lib_call_simplification"
diff --git a/final/bindings/ocaml/transforms/scalar/llvm_scalar_opts.mli b/final/bindings/ocaml/transforms/scalar/llvm_scalar_opts.mli
new file mode 100644
index 00000000000..d7162c769e4
--- /dev/null
+++ b/final/bindings/ocaml/transforms/scalar/llvm_scalar_opts.mli
@@ -0,0 +1,118 @@
+(*===-- llvm_scalar_opts.mli - LLVM Ocaml Interface ------------*- OCaml -*-===*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*)
+
+(** Scalar Transforms.
+
+    This interface provides an ocaml API for LLVM scalar transforms, the
+    classes in the [LLVMScalarOpts] library. *)
+
+(** See the [llvm::createConstantPropogationPass] function. *)
+external add_constant_propagation : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                    -> unit
+                                  = "llvm_add_constant_propagation"
+
+(** See the [llvm::createSCCPPass] function. *)
+external add_sccp : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                  = "llvm_add_sccp"
+
+(** See [llvm::createDeadStoreEliminationPass] function. *)
+external add_dead_store_elimination : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                      -> unit
+                                    = "llvm_add_dead_store_elimination"
+
+(** See The [llvm::createAggressiveDCEPass] function. *)
+external add_aggressive_dce : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                            = "llvm_add_aggressive_dce"
+
+(** See the [llvm::createScalarReplAggregatesPass] function. *)
+external
+add_scalar_repl_aggregation : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                            = "llvm_add_scalar_repl_aggregation"
+
+(** See the [llvm::createIndVarSimplifyPass] function. *)
+external add_ind_var_simplification : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                      -> unit
+                                    = "llvm_add_ind_var_simplification"
+
+(** See the [llvm::createInstructionCombiningPass] function. *)
+external
+add_instruction_combination : [<Llvm.PassManager.any] Llvm.PassManager.t
+                              -> unit
+                            = "llvm_add_instruction_combination"
+
+(** See the [llvm::createLICMPass] function. *)
+external add_licm : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_licm"
+
+(** See the [llvm::createLoopUnswitchPass] function. *)
+external add_loop_unswitch : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_loop_unswitch"
+
+(** See the [llvm::createLoopUnrollPass] function. *)
+external add_loop_unroll : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_loop_unroll"
+
+(** See the [llvm::createLoopRotatePass] function. *)
+external add_loop_rotation : [<Llvm.PassManager.any] Llvm.PassManager.t
+                             -> unit
+                           = "llvm_add_loop_rotation"
+
+(** See the [llvm::createPromoteMemoryToRegisterPass] function. *)
+external
+add_memory_to_register_promotion : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                   -> unit
+                                 = "llvm_add_memory_to_register_promotion"
+
+(** See the [llvm::createDemoteMemoryToRegisterPass] function. *)
+external
+add_memory_to_register_demotion : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                  -> unit
+                                = "llvm_add_memory_to_register_demotion"
+
+(** See the [llvm::createReassociatePass] function. *)
+external add_reassociation : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                           = "llvm_add_reassociation"
+
+(** See the [llvm::createJumpThreadingPass] function. *)
+external add_jump_threading : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_jump_threading"
+
+(** See the [llvm::createCFGSimplificationPass] function. *)
+external add_cfg_simplification : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                  -> unit
+                                = "llvm_add_cfg_simplification"
+
+(** See the [llvm::createTailCallEliminationPass] function. *)
+external
+add_tail_call_elimination : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                          = "llvm_add_tail_call_elimination" 
+
+(** See the [llvm::createGVNPass] function. *)
+external add_gvn : [<Llvm.PassManager.any] Llvm.PassManager.t
+                   -> unit
+                 = "llvm_add_gvn"
+
+(** See the [llvm::createMemCpyOptPass] function. *)
+external add_memcpy_opt : [<Llvm.PassManager.any] Llvm.PassManager.t
+                                -> unit
+                              = "llvm_add_memcpy_opt"
+
+(** See the [llvm::createLoopDeletionPass] function. *)
+external add_loop_deletion : [<Llvm.PassManager.any] Llvm.PassManager.t
+                             -> unit
+                           = "llvm_add_loop_deletion"
+
+(** See the [llvm::createSimplifyLibCallsPass] function. *)
+external
+add_lib_call_simplification : [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
+                            = "llvm_add_lib_call_simplification"
diff --git a/final/bindings/ocaml/transforms/scalar/scalar_opts_ocaml.c b/final/bindings/ocaml/transforms/scalar/scalar_opts_ocaml.c
new file mode 100644
index 00000000000..df44807859c
--- /dev/null
+++ b/final/bindings/ocaml/transforms/scalar/scalar_opts_ocaml.c
@@ -0,0 +1,146 @@
+/*===-- scalar_opts_ocaml.c - LLVM Ocaml Glue -------------------*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file glues LLVM's ocaml interface to its C interface. These functions *|
+|* are by and large transparent wrappers to the corresponding C functions.    *|
+|*                                                                            *|
+|* Note that these functions intentionally take liberties with the CAMLparamX *|
+|* macros, since most of the parameters are not GC heap objects.              *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#include "llvm-c/Transforms/Scalar.h"
+#include "caml/mlvalues.h"
+#include "caml/misc.h"
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_constant_propagation(LLVMPassManagerRef PM) {
+  LLVMAddConstantPropagationPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_sccp(LLVMPassManagerRef PM) {
+  LLVMAddSCCPPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_dead_store_elimination(LLVMPassManagerRef PM) {
+  LLVMAddDeadStoreEliminationPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_aggressive_dce(LLVMPassManagerRef PM) {
+  LLVMAddAggressiveDCEPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_scalar_repl_aggregation(LLVMPassManagerRef PM) {
+  LLVMAddScalarReplAggregatesPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_ind_var_simplification(LLVMPassManagerRef PM) {
+  LLVMAddIndVarSimplifyPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_instruction_combination(LLVMPassManagerRef PM) {
+  LLVMAddInstructionCombiningPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_licm(LLVMPassManagerRef PM) {
+  LLVMAddLICMPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_loop_unswitch(LLVMPassManagerRef PM) {
+  LLVMAddLoopUnrollPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_loop_unroll(LLVMPassManagerRef PM) {
+  LLVMAddLoopUnrollPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_loop_rotation(LLVMPassManagerRef PM) {
+  LLVMAddLoopRotatePass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_memory_to_register_promotion(LLVMPassManagerRef PM) {
+  LLVMAddPromoteMemoryToRegisterPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_memory_to_register_demotion(LLVMPassManagerRef PM) {
+  LLVMAddDemoteMemoryToRegisterPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_reassociation(LLVMPassManagerRef PM) {
+  LLVMAddReassociatePass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_jump_threading(LLVMPassManagerRef PM) {
+  LLVMAddJumpThreadingPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_cfg_simplification(LLVMPassManagerRef PM) {
+  LLVMAddCFGSimplificationPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_tail_call_elimination(LLVMPassManagerRef PM) {
+  LLVMAddTailCallEliminationPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_gvn(LLVMPassManagerRef PM) {
+  LLVMAddGVNPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_memcpy_opt(LLVMPassManagerRef PM) {
+  LLVMAddMemCpyOptPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_loop_deletion(LLVMPassManagerRef PM) {
+  LLVMAddLoopDeletionPass(PM);
+  return Val_unit;
+}
+
+/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
+CAMLprim value llvm_add_lib_call_simplification(LLVMPassManagerRef PM) {
+  LLVMAddSimplifyLibCallsPass(PM);
+  return Val_unit;
+}
diff --git a/final/build-for-llvm-top.sh b/final/build-for-llvm-top.sh
new file mode 100755
index 00000000000..78e3ed87f09
--- /dev/null
+++ b/final/build-for-llvm-top.sh
@@ -0,0 +1,68 @@
+#!/bin/sh
+
+# This includes the Bourne shell library from llvm-top. Since this file is
+# generally only used when building from llvm-top, it is safe to assume that
+# llvm is checked out into llvm-top in which case .. just works.
+. ../library.sh
+
+# Process the options passed in to us by the build script into standard
+# variables. 
+process_arguments "$@"
+
+# First, see if the build directory is there. If not, create it.
+build_dir="$LLVM_TOP/build.llvm"
+if test ! -d "$build_dir" ; then
+  mkdir -p "$build_dir"
+fi
+
+# See if we have previously been configured by sensing the presence
+# of the config.status scripts
+config_status="$build_dir/config.status"
+if test ! -f "$config_status" -o "$config_status" -ot "$0" ; then
+  # We must configure so build a list of configure options
+  config_options="--prefix=$PREFIX --with-llvmgccdir=$PREFIX"
+  if test "$OPTIMIZED" -eq 1 ; then
+    config_options="$config_options --enable-optimized"
+  else
+    config_options="$config_options --disable-optimized"
+  fi
+  if test "$DEBUG" -eq 1 ; then
+    config_options="$config_options --enable-debug"
+  else
+    config_options="$config_options --disable-debug"
+  fi
+  if test "$ASSERTIONS" -eq 1 ; then
+    config_options="$config_options --enable-assertions"
+  else
+    config_options="$config_options --disable-assertions"
+  fi
+  if test "$CHECKING" -eq 1 ; then
+    config_options="$config_options --enable-expensive-checks"
+  else
+    config_options="$config_options --disable-expensive-checks"
+  fi
+  if test "$DOXYGEN" -eq 1 ; then
+    config_options="$config_options --enable-doxygen"
+  else
+    config_options="$config_options --disable-doxygen"
+  fi
+  if test "$THREADS" -eq 1 ; then
+    config_options="$config_options --enable-threads"
+  else
+    config_options="$config_options --disable-threads"
+  fi
+  config_options="$config_options $OPTIONS_DASH $OPTIONS_DASH_DASH"
+  src_dir=`pwd`
+  cd "$build_dir"
+  msg 0 Configuring $module with:
+  msg 0 "  $src_dir/configure" $config_options
+  $src_dir/configure $config_options || \
+    die $? "Configuring $module module failed"
+else
+  msg 0 Module $module already configured, ignoring configure options.
+  cd "$build_dir"
+fi
+
+msg 0 Building $module with:
+msg 0 "  make" $OPTIONS_ASSIGN tools-only
+make $OPTIONS_ASSIGN tools-only
diff --git a/final/cmake/README b/final/cmake/README
new file mode 100644
index 00000000000..4aafdbf32a3
--- /dev/null
+++ b/final/cmake/README
@@ -0,0 +1 @@
+See docs/CMake.html for instructions on how to build LLVM with CMake.
diff --git a/final/cmake/config-ix.cmake b/final/cmake/config-ix.cmake
new file mode 100755
index 00000000000..e2817f1316b
--- /dev/null
+++ b/final/cmake/config-ix.cmake
@@ -0,0 +1,385 @@
+if( WIN32 AND NOT CYGWIN )
+  # We consider Cygwin as another Unix
+  set(PURE_WINDOWS 1)
+endif()
+
+include(CheckIncludeFile)
+include(CheckLibraryExists)
+include(CheckSymbolExists)
+include(CheckFunctionExists)
+include(CheckCXXSourceCompiles)
+include(TestBigEndian)
+
+if( UNIX AND NOT BEOS )
+  # Used by check_symbol_exists:
+  set(CMAKE_REQUIRED_LIBRARIES m)
+endif()
+
+# Helper macros and functions
+macro(add_cxx_include result files)
+  set(${result} "")
+  foreach (file_name ${files})
+     set(${result} "${${result}}#include<${file_name}>\n")
+  endforeach()
+endmacro(add_cxx_include files result)
+
+function(check_type_exists type files variable)
+  add_cxx_include(includes "${files}")
+  CHECK_CXX_SOURCE_COMPILES("
+    ${includes} ${type} typeVar;
+    int main() {
+        return 0;
+    }
+    " ${variable})
+endfunction()
+
+# include checks
+check_include_file(argz.h HAVE_ARGZ_H)
+check_include_file(assert.h HAVE_ASSERT_H)
+check_include_file(ctype.h HAVE_CTYPE_H)
+check_include_file(dirent.h HAVE_DIRENT_H)
+check_include_file(dl.h HAVE_DL_H)
+check_include_file(dld.h HAVE_DLD_H)
+check_include_file(dlfcn.h HAVE_DLFCN_H)
+check_include_file(errno.h HAVE_ERRNO_H)
+check_include_file(execinfo.h HAVE_EXECINFO_H)
+check_include_file(fcntl.h HAVE_FCNTL_H)
+check_include_file(inttypes.h HAVE_INTTYPES_H)
+check_include_file(limits.h HAVE_LIMITS_H)
+check_include_file(link.h HAVE_LINK_H)
+check_include_file(malloc.h HAVE_MALLOC_H)
+check_include_file(malloc/malloc.h HAVE_MALLOC_MALLOC_H)
+check_include_file(memory.h HAVE_MEMORY_H)
+check_include_file(ndir.h HAVE_NDIR_H)
+if( NOT PURE_WINDOWS )
+  check_include_file(pthread.h HAVE_PTHREAD_H)
+endif()
+check_include_file(setjmp.h HAVE_SETJMP_H)
+check_include_file(signal.h HAVE_SIGNAL_H)
+check_include_file(stdint.h HAVE_STDINT_H)
+check_include_file(stdio.h HAVE_STDIO_H)
+check_include_file(stdlib.h HAVE_STDLIB_H)
+check_include_file(string.h HAVE_STRING_H)
+check_include_file(strings.h HAVE_STRINGS_H)
+check_include_file(sys/dir.h HAVE_SYS_DIR_H)
+check_include_file(sys/dl.h HAVE_SYS_DL_H)
+check_include_file(sys/ioctl.h HAVE_SYS_IOCTL_H)
+check_include_file(sys/mman.h HAVE_SYS_MMAN_H)
+check_include_file(sys/ndir.h HAVE_SYS_NDIR_H)
+check_include_file(sys/param.h HAVE_SYS_PARAM_H)
+check_include_file(sys/resource.h HAVE_SYS_RESOURCE_H)
+check_include_file(sys/stat.h HAVE_SYS_STAT_H)
+check_include_file(sys/time.h HAVE_SYS_TIME_H)
+check_include_file(sys/types.h HAVE_SYS_TYPES_H)
+check_include_file(sys/uio.h HAVE_SYS_UIO_H)
+check_include_file(sys/wait.h HAVE_SYS_WAIT_H)
+check_include_file(termios.h HAVE_TERMIOS_H)
+check_include_file(unistd.h HAVE_UNISTD_H)
+check_include_file(utime.h HAVE_UTIME_H)
+check_include_file(valgrind/valgrind.h HAVE_VALGRIND_VALGRIND_H)
+check_include_file(windows.h HAVE_WINDOWS_H)
+check_include_file(fenv.h HAVE_FENV_H)
+check_include_file(mach/mach.h HAVE_MACH_MACH_H)
+check_include_file(mach-o/dyld.h HAVE_MACH_O_DYLD_H)
+
+# library checks
+if( NOT PURE_WINDOWS )
+  check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD)
+  check_library_exists(pthread pthread_getspecific "" HAVE_PTHREAD_GETSPECIFIC)
+  check_library_exists(pthread pthread_rwlock_init "" HAVE_PTHREAD_RWLOCK_INIT)
+  check_library_exists(dl dlopen "" HAVE_LIBDL)
+endif()
+
+# function checks
+check_symbol_exists(getpagesize unistd.h HAVE_GETPAGESIZE)
+check_symbol_exists(getrusage sys/resource.h HAVE_GETRUSAGE)
+check_symbol_exists(setrlimit sys/resource.h HAVE_SETRLIMIT)
+check_function_exists(isatty HAVE_ISATTY)
+check_symbol_exists(index strings.h HAVE_INDEX)
+check_symbol_exists(isinf cmath HAVE_ISINF_IN_CMATH)
+check_symbol_exists(isinf math.h HAVE_ISINF_IN_MATH_H)
+check_symbol_exists(finite ieeefp.h HAVE_FINITE_IN_IEEEFP_H)
+check_symbol_exists(isnan cmath HAVE_ISNAN_IN_CMATH)
+check_symbol_exists(isnan math.h HAVE_ISNAN_IN_MATH_H)
+check_symbol_exists(ceilf math.h HAVE_CEILF)
+check_symbol_exists(floorf math.h HAVE_FLOORF)
+check_symbol_exists(fmodf math.h HAVE_FMODF)
+if( HAVE_SETJMP_H )
+  check_symbol_exists(longjmp setjmp.h HAVE_LONGJMP)
+  check_symbol_exists(setjmp setjmp.h HAVE_SETJMP)
+  check_symbol_exists(siglongjmp setjmp.h HAVE_SIGLONGJMP)
+  check_symbol_exists(sigsetjmp setjmp.h HAVE_SIGSETJMP)
+endif()
+if( HAVE_SYS_UIO_H )
+  check_symbol_exists(writev sys/uio.h HAVE_WRITEV)
+endif()
+check_symbol_exists(nearbyintf math.h HAVE_NEARBYINTF)
+check_symbol_exists(mallinfo malloc.h HAVE_MALLINFO)
+check_symbol_exists(malloc_zone_statistics malloc/malloc.h
+                    HAVE_MALLOC_ZONE_STATISTICS)
+check_symbol_exists(mkdtemp "stdlib.h;unistd.h" HAVE_MKDTEMP)
+check_symbol_exists(mkstemp "stdlib.h;unistd.h" HAVE_MKSTEMP)
+check_symbol_exists(mktemp "stdlib.h;unistd.h" HAVE_MKTEMP)
+check_symbol_exists(closedir "sys/types.h;dirent.h" HAVE_CLOSEDIR)
+check_symbol_exists(opendir "sys/types.h;dirent.h" HAVE_OPENDIR)
+check_symbol_exists(readdir "sys/types.h;dirent.h" HAVE_READDIR)
+check_symbol_exists(getcwd unistd.h HAVE_GETCWD)
+check_symbol_exists(gettimeofday sys/time.h HAVE_GETTIMEOFDAY)
+check_symbol_exists(getrlimit "sys/types.h;sys/time.h;sys/resource.h" HAVE_GETRLIMIT)
+check_symbol_exists(rindex strings.h HAVE_RINDEX)
+check_symbol_exists(strchr string.h HAVE_STRCHR)
+check_symbol_exists(strcmp string.h HAVE_STRCMP)
+check_symbol_exists(strdup string.h HAVE_STRDUP)
+check_symbol_exists(strrchr string.h HAVE_STRRCHR)
+if( NOT PURE_WINDOWS )
+  check_symbol_exists(pthread_mutex_lock pthread.h HAVE_PTHREAD_MUTEX_LOCK)
+endif()
+check_symbol_exists(sbrk unistd.h HAVE_SBRK)
+check_symbol_exists(srand48 stdlib.h HAVE_RAND48_SRAND48)
+if( HAVE_RAND48_SRAND48 )
+  check_symbol_exists(lrand48 stdlib.h HAVE_RAND48_LRAND48)
+  if( HAVE_RAND48_LRAND48 )
+    check_symbol_exists(drand48 stdlib.h HAVE_RAND48_DRAND48)
+    if( HAVE_RAND48_DRAND48 )
+      set(HAVE_RAND48 1 CACHE INTERNAL "are srand48/lrand48/drand48 available?")
+    endif()
+  endif()
+endif()
+check_symbol_exists(strtoll stdlib.h HAVE_STRTOLL)
+check_symbol_exists(strtoq stdlib.h HAVE_STRTOQ)
+check_symbol_exists(strerror string.h HAVE_STRERROR)
+check_symbol_exists(strerror_r string.h HAVE_STRERROR_R)
+check_symbol_exists(strerror_s string.h HAVE_DECL_STRERROR_S)
+check_symbol_exists(memcpy string.h HAVE_MEMCPY)
+check_symbol_exists(memmove string.h HAVE_MEMMOVE)
+check_symbol_exists(setenv stdlib.h HAVE_SETENV)
+if( PURE_WINDOWS )
+  check_symbol_exists(_chsize_s io.h HAVE__CHSIZE_S)
+
+  check_function_exists(_alloca HAVE__ALLOCA)
+  check_function_exists(__alloca HAVE___ALLOCA)
+  check_function_exists(__chkstk HAVE___CHKSTK)
+  check_function_exists(___chkstk HAVE____CHKSTK)
+
+  check_function_exists(__ashldi3 HAVE___ASHLDI3)
+  check_function_exists(__ashrdi3 HAVE___ASHRDI3)
+  check_function_exists(__divdi3 HAVE___DIVDI3)
+  check_function_exists(__fixdfdi HAVE___FIXDFDI)
+  check_function_exists(__fixsfdi HAVE___FIXSFDI)
+  check_function_exists(__floatdidf HAVE___FLOATDIDF)
+  check_function_exists(__lshrdi3 HAVE___LSHRDI3)
+  check_function_exists(__moddi3 HAVE___MODDI3)
+  check_function_exists(__udivdi3 HAVE___UDIVDI3)
+  check_function_exists(__umoddi3 HAVE___UMODDI3)
+
+  check_function_exists(__main HAVE___MAIN)
+  check_function_exists(__cmpdi2 HAVE___CMPDI2)
+endif()
+if( HAVE_ARGZ_H )
+  check_symbol_exists(argz_append argz.h HAVE_ARGZ_APPEND)
+  check_symbol_exists(argz_create_sep argz.h HAVE_ARGZ_CREATE_SEP)
+  check_symbol_exists(argz_insert argz.h HAVE_ARGZ_INSERT)
+  check_symbol_exists(argz_next argz.h HAVE_ARGZ_NEXT)
+  check_symbol_exists(argz_stringify argz.h HAVE_ARGZ_STRINGIFY)
+endif()
+if( HAVE_DLFCN_H )
+  if( HAVE_LIBDL )
+    list(APPEND CMAKE_REQUIRED_LIBRARIES dl)
+  endif()
+  check_symbol_exists(dlerror dlfcn.h HAVE_DLERROR)
+  check_symbol_exists(dlopen dlfcn.h HAVE_DLOPEN)
+  if( HAVE_LIBDL )
+    list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES dl)
+  endif()
+endif()
+
+check_symbol_exists(__GLIBC__ stdio.h LLVM_USING_GLIBC)
+if( LLVM_USING_GLIBC )
+  add_llvm_definitions( -D_GNU_SOURCE )
+endif()
+
+# Type checks
+check_type_exists(std::bidirectional_iterator<int,int> "iterator;iostream" HAVE_BI_ITERATOR)
+check_type_exists(std::iterator<int,int,int> iterator HAVE_STD_ITERATOR)
+check_type_exists(std::forward_iterator<int,int> iterator HAVE_FWD_ITERATOR)
+
+set(headers "")
+if (HAVE_SYS_TYPES_H)
+  set(headers ${headers} "sys/types.h")
+endif()
+
+if (HAVE_INTTYPES_H)
+  set(headers ${headers} "inttypes.h")
+endif()
+
+if (HAVE_STDINT_H)
+  set(headers ${headers} "stdint.h")
+endif()
+
+check_type_exists(int64_t "${headers}" HAVE_INT64_T)
+check_type_exists(uint64_t "${headers}" HAVE_UINT64_T)
+check_type_exists(u_int64_t "${headers}" HAVE_U_INT64_T)
+check_type_exists(error_t errno.h HAVE_ERROR_T)
+
+# available programs checks
+function(llvm_find_program name)
+  string(TOUPPER ${name} NAME)
+  string(REGEX REPLACE "\\." "_" NAME ${NAME})
+  find_program(LLVM_PATH_${NAME} ${name})
+  mark_as_advanced(LLVM_PATH_${NAME})
+  if(LLVM_PATH_${NAME})
+    set(HAVE_${NAME} 1 CACHE INTERNAL "Is ${name} available ?")
+    mark_as_advanced(HAVE_${NAME})
+  else(LLVM_PATH_${NAME})
+    set(HAVE_${NAME} "" CACHE INTERNAL "Is ${name} available ?")
+  endif(LLVM_PATH_${NAME})
+endfunction()
+
+llvm_find_program(gv)
+llvm_find_program(circo)
+llvm_find_program(twopi)
+llvm_find_program(neato)
+llvm_find_program(fdp)
+llvm_find_program(dot)
+llvm_find_program(dotty)
+llvm_find_program(xdot.py)
+
+if( LLVM_ENABLE_FFI )
+  find_path(FFI_INCLUDE_PATH ffi.h PATHS ${FFI_INCLUDE_DIR})
+  if( FFI_INCLUDE_PATH )
+    set(FFI_HEADER ffi.h CACHE INTERNAL "")
+    set(HAVE_FFI_H 1 CACHE INTERNAL "")
+  else()
+    find_path(FFI_INCLUDE_PATH ffi/ffi.h PATHS ${FFI_INCLUDE_DIR})
+    if( FFI_INCLUDE_PATH )
+      set(FFI_HEADER ffi/ffi.h CACHE INTERNAL "")
+      set(HAVE_FFI_FFI_H 1 CACHE INTERNAL "")
+    endif()
+  endif()
+
+  if( NOT FFI_HEADER )
+    message(FATAL_ERROR "libffi includes are not found.")
+  endif()
+
+  find_library(FFI_LIBRARY_PATH ffi PATHS ${FFI_LIBRARY_DIR})
+  if( NOT FFI_LIBRARY_PATH )
+    message(FATAL_ERROR "libffi is not found.")
+  endif()
+
+  list(APPEND CMAKE_REQUIRED_LIBRARIES ${FFI_LIBRARY_PATH})
+  list(APPEND CMAKE_REQUIRED_INCLUDES ${FFI_INCLUDE_PATH})
+  check_symbol_exists(ffi_call ${FFI_HEADER} HAVE_FFI_CALL)
+  list(REMOVE_ITEM CMAKE_REQUIRED_INCLUDES ${FFI_INCLUDE_PATH})
+  list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ${FFI_LIBRARY_PATH})
+endif( LLVM_ENABLE_FFI )
+
+# Define LLVM_MULTITHREADED if gcc atomic builtins exists.
+include(CheckAtomic)
+
+if( LLVM_ENABLE_PIC )
+  set(ENABLE_PIC 1)
+else()
+  set(ENABLE_PIC 0)
+endif()
+
+include(CheckCXXCompilerFlag)
+
+check_cxx_compiler_flag("-Wno-variadic-macros" SUPPORTS_NO_VARIADIC_MACROS_FLAG)
+
+include(GetTargetTriple)
+get_target_triple(LLVM_HOSTTRIPLE)
+
+# FIXME: We don't distinguish the target and the host. :(
+set(TARGET_TRIPLE "${LLVM_HOSTTRIPLE}")
+
+# Determine the native architecture.
+string(TOLOWER "${LLVM_TARGET_ARCH}" LLVM_NATIVE_ARCH)
+if( LLVM_NATIVE_ARCH STREQUAL "host" )
+  string(REGEX MATCH "^[^-]*" LLVM_NATIVE_ARCH ${LLVM_HOSTTRIPLE})
+endif ()
+
+if (LLVM_NATIVE_ARCH MATCHES "i[2-6]86")
+  set(LLVM_NATIVE_ARCH X86)
+elseif (LLVM_NATIVE_ARCH STREQUAL "x86")
+  set(LLVM_NATIVE_ARCH X86)
+elseif (LLVM_NATIVE_ARCH STREQUAL "amd64")
+  set(LLVM_NATIVE_ARCH X86)
+elseif (LLVM_NATIVE_ARCH STREQUAL "x86_64")
+  set(LLVM_NATIVE_ARCH X86)
+elseif (LLVM_NATIVE_ARCH MATCHES "sparc")
+  set(LLVM_NATIVE_ARCH Sparc)
+elseif (LLVM_NATIVE_ARCH MATCHES "powerpc")
+  set(LLVM_NATIVE_ARCH PowerPC)
+elseif (LLVM_NATIVE_ARCH MATCHES "alpha")
+  set(LLVM_NATIVE_ARCH Alpha)
+elseif (LLVM_NATIVE_ARCH MATCHES "arm")
+  set(LLVM_NATIVE_ARCH ARM)
+elseif (LLVM_NATIVE_ARCH MATCHES "mips")
+  set(LLVM_NATIVE_ARCH Mips)
+elseif (LLVM_NATIVE_ARCH MATCHES "xcore")
+  set(LLVM_NATIVE_ARCH XCore)
+elseif (LLVM_NATIVE_ARCH MATCHES "msp430")
+  set(LLVM_NATIVE_ARCH MSP430)
+else ()
+  message(STATUS
+    "Unknown architecture ${LLVM_NATIVE_ARCH}; lli will not JIT code")
+  set(LLVM_NATIVE_ARCH)
+endif ()
+
+if (LLVM_NATIVE_ARCH)
+  list(FIND LLVM_TARGETS_TO_BUILD ${LLVM_NATIVE_ARCH} NATIVE_ARCH_IDX)
+  if (NATIVE_ARCH_IDX EQUAL -1)
+    message(STATUS
+      "Native target ${LLVM_NATIVE_ARCH} is not selected; lli will not JIT code")
+    set(LLVM_NATIVE_ARCH)
+  else ()
+    message(STATUS "Native target architecture is ${LLVM_NATIVE_ARCH}")
+    set(LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target)
+    set(LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo)
+    set(LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter)
+  endif ()
+endif()
+
+if( MINGW )
+  set(HAVE_LIBIMAGEHLP 1)
+  set(HAVE_LIBPSAPI 1)
+  # TODO: Check existence of libraries.
+  #   include(CheckLibraryExists)
+  #   CHECK_LIBRARY_EXISTS(imagehlp ??? . HAVE_LIBIMAGEHLP)
+endif( MINGW )
+
+if( MSVC )
+  set(error_t int)
+  set(mode_t "unsigned short")
+  set(LTDL_SHLIBPATH_VAR "PATH")
+  set(LTDL_SYSSEARCHPATH "")
+  set(LTDL_DLOPEN_DEPLIBS 1)
+  set(SHLIBEXT ".lib")
+  set(LTDL_OBJDIR "_libs")
+  set(HAVE_STRTOLL 1)
+  set(strtoll "_strtoi64")
+  set(strtoull "_strtoui64")
+  set(stricmp "_stricmp")
+  set(strdup "_strdup")
+else( MSVC )
+  set(LTDL_SHLIBPATH_VAR "LD_LIBRARY_PATH")
+  set(LTDL_SYSSEARCHPATH "") # TODO
+  set(LTDL_DLOPEN_DEPLIBS 0)  # TODO
+endif( MSVC )
+
+# FIXME: Signal handler return type, currently hardcoded to 'void'
+set(RETSIGTYPE void)
+
+if( LLVM_ENABLE_THREADS )
+  if( HAVE_PTHREAD_H OR WIN32 )
+    set(ENABLE_THREADS 1)
+  endif()
+endif()
+
+if( ENABLE_THREADS )
+  message(STATUS "Threads enabled.")
+else( ENABLE_THREADS )
+  message(STATUS "Threads disabled.")
+endif()
+
+set(LLVM_PREFIX ${CMAKE_INSTALL_PREFIX})
diff --git a/final/cmake/modules/AddLLVM.cmake b/final/cmake/modules/AddLLVM.cmake
new file mode 100755
index 00000000000..764c6591c45
--- /dev/null
+++ b/final/cmake/modules/AddLLVM.cmake
@@ -0,0 +1,136 @@
+include(LLVMProcessSources)
+include(LLVMConfig)
+
+macro(add_llvm_library name)
+  llvm_process_sources( ALL_FILES ${ARGN} )
+  add_library( ${name} ${ALL_FILES} )
+  set_property( GLOBAL APPEND PROPERTY LLVM_LIBS ${name} )
+  if( LLVM_COMMON_DEPENDS )
+    add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} )
+  endif( LLVM_COMMON_DEPENDS )
+
+  if( BUILD_SHARED_LIBS )
+    get_system_libs(sl)
+    target_link_libraries( ${name} ${sl} )
+  endif()
+
+  install(TARGETS ${name}
+    LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
+    ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX})
+  # The LLVM Target library shall be built before its sublibraries
+  # (asmprinter, etc) because those may use tablegenned files which
+  # generation is triggered by the main LLVM target library. Necessary
+  # for parallel builds:
+  if( CURRENT_LLVM_TARGET )
+    add_dependencies(${name} ${CURRENT_LLVM_TARGET})
+  endif()
+  set_target_properties(${name} PROPERTIES FOLDER "Libraries")
+endmacro(add_llvm_library name)
+
+
+macro(add_llvm_loadable_module name)
+  if( NOT LLVM_ON_UNIX OR CYGWIN )
+    message(STATUS "Loadable modules not supported on this platform.
+${name} ignored.")
+    # Add empty "phony" target
+    add_custom_target(${name})
+  else()
+    llvm_process_sources( ALL_FILES ${ARGN} )
+    if (MODULE)
+      set(libkind MODULE)
+    else()
+      set(libkind SHARED)
+    endif()
+
+    add_library( ${name} ${libkind} ${ALL_FILES} )
+    set_target_properties( ${name} PROPERTIES PREFIX "" )
+
+    if (APPLE)
+      # Darwin-specific linker flags for loadable modules.
+      set_target_properties(${name} PROPERTIES
+        LINK_FLAGS "-Wl,-flat_namespace -Wl,-undefined -Wl,suppress")
+    endif()
+
+    install(TARGETS ${name}
+      LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
+      ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX})
+  endif()
+
+  set_target_properties(${name} PROPERTIES FOLDER "Loadable modules")
+endmacro(add_llvm_loadable_module name)
+
+
+macro(add_llvm_executable name)
+  llvm_process_sources( ALL_FILES ${ARGN} )
+  if( EXCLUDE_FROM_ALL )
+    add_executable(${name} EXCLUDE_FROM_ALL ${ALL_FILES})
+  else()
+    add_executable(${name} ${ALL_FILES})
+  endif()
+  set(EXCLUDE_FROM_ALL OFF)
+  if( LLVM_USED_LIBS )
+    foreach(lib ${LLVM_USED_LIBS})
+      target_link_libraries( ${name} ${lib} )
+    endforeach(lib)
+  endif( LLVM_USED_LIBS )
+  if( LLVM_LINK_COMPONENTS )
+    llvm_config(${name} ${LLVM_LINK_COMPONENTS})
+  endif( LLVM_LINK_COMPONENTS )
+  if( LLVM_COMMON_DEPENDS )
+    add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} )
+  endif( LLVM_COMMON_DEPENDS )
+  if( NOT MINGW )
+    get_system_libs(llvm_system_libs)
+    if( llvm_system_libs )
+      target_link_libraries(${name} ${llvm_system_libs})
+    endif()
+  endif()
+endmacro(add_llvm_executable name)
+
+
+macro(add_llvm_tool name)
+  set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_TOOLS_BINARY_DIR})
+  if( NOT LLVM_BUILD_TOOLS )
+    set(EXCLUDE_FROM_ALL ON)
+  endif()
+  add_llvm_executable(${name} ${ARGN})
+  if( LLVM_BUILD_TOOLS )
+    install(TARGETS ${name} RUNTIME DESTINATION bin)
+  endif()
+  set_target_properties(${name} PROPERTIES FOLDER "Tools")
+endmacro(add_llvm_tool name)
+
+
+macro(add_llvm_example name)
+#  set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_EXAMPLES_BINARY_DIR})
+  if( NOT LLVM_BUILD_EXAMPLES )
+    set(EXCLUDE_FROM_ALL ON)
+  endif()
+  add_llvm_executable(${name} ${ARGN})
+  if( LLVM_BUILD_EXAMPLES )
+    install(TARGETS ${name} RUNTIME DESTINATION examples)
+  endif()
+  set_target_properties(${name} PROPERTIES FOLDER "Examples")
+endmacro(add_llvm_example name)
+
+
+macro(add_llvm_utility name)
+  add_llvm_executable(${name} ${ARGN})
+  set_target_properties(${name} PROPERTIES FOLDER "Utils")
+endmacro(add_llvm_utility name)
+
+
+macro(add_llvm_target target_name)
+  if( TABLEGEN_OUTPUT )
+    add_custom_target(${target_name}Table_gen
+      DEPENDS ${TABLEGEN_OUTPUT})
+    add_dependencies(${target_name}Table_gen ${LLVM_COMMON_DEPENDS})
+  endif( TABLEGEN_OUTPUT )
+  include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
+  add_llvm_library(LLVM${target_name} ${ARGN} ${TABLEGEN_OUTPUT})
+  if ( TABLEGEN_OUTPUT )
+    add_dependencies(LLVM${target_name} ${target_name}Table_gen)
+    set_target_properties(${target_name}Table_gen PROPERTIES FOLDER "Tablegenning")
+  endif (TABLEGEN_OUTPUT)
+  set( CURRENT_LLVM_TARGET LLVM${target_name} )
+endmacro(add_llvm_target)
diff --git a/final/cmake/modules/AddLLVMDefinitions.cmake b/final/cmake/modules/AddLLVMDefinitions.cmake
new file mode 100644
index 00000000000..33ac9731db5
--- /dev/null
+++ b/final/cmake/modules/AddLLVMDefinitions.cmake
@@ -0,0 +1,13 @@
+# There is no clear way of keeping track of compiler command-line
+# options chosen via `add_definitions', so we need our own method for
+# using it on tools/llvm-config/CMakeLists.txt.
+
+# Beware that there is no implementation of remove_llvm_definitions.
+
+macro(add_llvm_definitions)
+  # We don't want no semicolons on LLVM_DEFINITIONS:
+  foreach(arg ${ARGN})
+    set(LLVM_DEFINITIONS "${LLVM_DEFINITIONS} ${arg}")
+  endforeach(arg)
+  add_definitions( ${ARGN} )
+endmacro(add_llvm_definitions)
diff --git a/final/cmake/modules/CMakeLists.txt b/final/cmake/modules/CMakeLists.txt
new file mode 100644
index 00000000000..1ab94749f15
--- /dev/null
+++ b/final/cmake/modules/CMakeLists.txt
@@ -0,0 +1,32 @@
+set(llvm_cmake_builddir "${LLVM_BINARY_DIR}/share/llvm/cmake")
+
+get_property(llvm_libs GLOBAL PROPERTY LLVM_LIBS)
+
+configure_file(
+  LLVM.cmake
+  ${llvm_cmake_builddir}/LLVM.cmake
+  @ONLY)
+
+install(FILES
+  ${llvm_cmake_builddir}/LLVM.cmake
+  LLVMConfig.cmake
+  LLVMLibDeps.cmake
+  DESTINATION share/llvm/cmake)
+
+install(DIRECTORY .
+  DESTINATION share/llvm/cmake
+  FILES_MATCHING PATTERN *.cmake
+  PATTERN .svn EXCLUDE
+  PATTERN LLVM.cmake EXCLUDE
+  PATTERN LLVMConfig.cmake EXCLUDE
+  PATTERN LLVMLibDeps.cmake EXCLUDE
+  PATTERN FindBison.cmake EXCLUDE
+  PATTERN GetTargetTriple.cmake EXCLUDE
+  PATTERN VersionFromVCS.cmake EXCLUDE
+  PATTERN CheckAtomic.cmake EXCLUDE)
+
+install(FILES
+  ${llvm_cmake_builddir}/LLVM.cmake
+  LLVMConfig.cmake
+  LLVMLibDeps.cmake
+  DESTINATION share/llvm/cmake)
diff --git a/final/cmake/modules/CheckAtomic.cmake b/final/cmake/modules/CheckAtomic.cmake
new file mode 100644
index 00000000000..f40ff4dfbd3
--- /dev/null
+++ b/final/cmake/modules/CheckAtomic.cmake
@@ -0,0 +1,29 @@
+# atomic builtins are required for threading support.
+
+INCLUDE(CheckCXXSourceCompiles)
+
+CHECK_CXX_SOURCE_COMPILES("
+#ifdef _MSC_VER
+#include <windows.h>
+#endif
+int main() {
+#ifdef _MSC_VER
+        volatile LONG val = 1;
+        MemoryBarrier();
+        InterlockedCompareExchange(&val, 0, 1);
+        InterlockedIncrement(&val);
+        InterlockedDecrement(&val);
+#else
+        volatile unsigned long val = 1;
+        __sync_synchronize();
+        __sync_val_compare_and_swap(&val, 1, 0);
+        __sync_add_and_fetch(&val, 1);
+        __sync_sub_and_fetch(&val, 1);
+#endif
+        return 0;
+      }
+" LLVM_MULTITHREADED)
+
+if( NOT LLVM_MULTITHREADED )
+  message(STATUS "Warning: LLVM will be built thread-unsafe because atomic builtins are missing")
+endif()
diff --git a/final/cmake/modules/ChooseMSVCCRT.cmake b/final/cmake/modules/ChooseMSVCCRT.cmake
new file mode 100644
index 00000000000..eb78f45c885
--- /dev/null
+++ b/final/cmake/modules/ChooseMSVCCRT.cmake
@@ -0,0 +1,106 @@
+# The macro choose_msvc_crt() takes a list of possible
+# C runtimes to choose from, in the form of compiler flags,
+# to present to the user. (MTd for /MTd, etc)
+#
+# The macro is invoked at the end of the file.
+#
+# CMake already sets CRT flags in the CMAKE_CXX_FLAGS_* and
+# CMAKE_C_FLAGS_* variables by default. To let the user
+# override that for each build type:
+# 1. Detect which CRT is already selected, and reflect this in
+# LLVM_USE_CRT_* so the user can have a better idea of what
+# changes they're making.
+# 2. Replace the flags in both variables with the new flag via a regex.
+# 3. set() the variables back into the cache so the changes
+# are user-visible.
+
+### Helper macros: ###
+macro(make_crt_regex regex crts)
+  set(${regex} "")
+  foreach(crt ${${crts}})
+    # Trying to match the beginning or end of the string with stuff
+    # like [ ^]+ didn't work, so use a bunch of parentheses instead.
+    set(${regex} "${${regex}}|(^| +)/${crt}($| +)")
+  endforeach(crt)
+  string(REGEX REPLACE "^\\|" "" ${regex} "${${regex}}")
+endmacro(make_crt_regex)
+
+macro(get_current_crt crt_current regex flagsvar)
+  # Find the selected-by-CMake CRT for each build type, if any.
+  # Strip off the leading slash and any whitespace.
+  string(REGEX MATCH "${${regex}}" ${crt_current} "${${flagsvar}}")
+  string(REPLACE "/" " " ${crt_current} "${${crt_current}}")
+  string(STRIP "${${crt_current}}" ${crt_current})
+endmacro(get_current_crt)
+
+# Replaces or adds a flag to a variable.
+# Expects 'flag' to be padded with spaces.
+macro(set_flag_in_var flagsvar regex flag)
+  string(REGEX MATCH "${${regex}}" current_flag "${${flagsvar}}")
+  if("${current_flag}" STREQUAL "")
+    set(${flagsvar} "${${flagsvar}}${${flag}}")
+  else()
+    string(REGEX REPLACE "${${regex}}" "${${flag}}" ${flagsvar} "${${flagsvar}}")
+  endif()
+  string(STRIP "${${flagsvar}}" ${flagsvar})
+  # Make sure this change gets reflected in the cache/gui.
+  # CMake requires the docstring parameter whenever set() touches the cache,
+  # so get the existing docstring and re-use that.
+  get_property(flagsvar_docs CACHE ${flagsvar} PROPERTY HELPSTRING)
+  set(${flagsvar} "${${flagsvar}}" CACHE STRING "${flagsvar_docs}" FORCE)
+endmacro(set_flag_in_var)
+
+
+macro(choose_msvc_crt MSVC_CRT)
+  if(LLVM_USE_CRT)
+    message(FATAL_ERROR
+      "LLVM_USE_CRT is deprecated. Use the CMAKE_BUILD_TYPE-specific
+variables (LLVM_USE_CRT_DEBUG, etc) instead.")
+  endif()
+
+  make_crt_regex(MSVC_CRT_REGEX ${MSVC_CRT})
+
+  foreach(build_type ${CMAKE_CONFIGURATION_TYPES})
+    string(TOUPPER "${build_type}" build)
+    if (NOT LLVM_USE_CRT_${build})
+      get_current_crt(LLVM_USE_CRT_${build}
+	MSVC_CRT_REGEX
+	CMAKE_CXX_FLAGS_${build})
+      set(LLVM_USE_CRT_${build}
+	"${LLVM_USE_CRT_${build}}"
+	CACHE STRING "Specify VC++ CRT to use for ${build_type} configurations."
+	FORCE)
+      set_property(CACHE LLVM_USE_CRT_${build}
+	PROPERTY STRINGS "";${${MSVC_CRT}})
+    endif(NOT LLVM_USE_CRT_${build})
+  endforeach(build_type)
+
+  foreach(build_type ${CMAKE_CONFIGURATION_TYPES})
+    string(TOUPPER "${build_type}" build)
+    if ("${LLVM_USE_CRT_${build}}" STREQUAL "")
+      set(flag_string " ")
+    else()
+      set(flag_string " /${LLVM_USE_CRT_${build}} ")
+      list(FIND ${MSVC_CRT} ${LLVM_USE_CRT_${build}} idx)
+      if (idx LESS 0)
+        message(FATAL_ERROR
+	  "Invalid value for LLVM_USE_CRT_${build}: ${LLVM_USE_CRT_${build}}. Valid options are one of: ${${MSVC_CRT}}")
+      endif (idx LESS 0)
+      message(STATUS "Using ${build_type} VC++ CRT: ${LLVM_USE_CRT_${build}}")
+    endif()
+    foreach(lang C CXX)
+      set_flag_in_var(CMAKE_${lang}_FLAGS_${build} MSVC_CRT_REGEX flag_string)
+    endforeach(lang)
+  endforeach(build_type)
+endmacro(choose_msvc_crt MSVC_CRT)
+
+
+# List of valid CRTs for MSVC
+set(MSVC_CRT
+  MD
+  MDd
+  MT
+  MTd)
+
+choose_msvc_crt(MSVC_CRT)
+
diff --git a/final/cmake/modules/CrossCompileLLVM.cmake b/final/cmake/modules/CrossCompileLLVM.cmake
new file mode 100644
index 00000000000..98e60a54366
--- /dev/null
+++ b/final/cmake/modules/CrossCompileLLVM.cmake
@@ -0,0 +1,26 @@
+
+if( ${LLVM_TABLEGEN} STREQUAL "tblgen" )
+  set(CX_NATIVE_TG_DIR "${CMAKE_BINARY_DIR}/native")
+  set(LLVM_TABLEGEN_EXE "${CX_NATIVE_TG_DIR}/bin/tblgen")
+
+  add_custom_command(OUTPUT ${CX_NATIVE_TG_DIR}
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${CX_NATIVE_TG_DIR}
+    COMMENT "Creating ${CX_NATIVE_TG_DIR}...")
+
+  add_custom_command(OUTPUT ${CX_NATIVE_TG_DIR}/CMakeCache.txt
+    COMMAND ${CMAKE_COMMAND} -UMAKE_TOOLCHAIN_FILE -DCMAKE_BUILD_TYPE=Release ${CMAKE_SOURCE_DIR}
+    WORKING_DIRECTORY ${CX_NATIVE_TG_DIR}
+    DEPENDS ${CX_NATIVE_TG_DIR}
+    COMMENT "Configuring native TableGen...")
+
+  add_custom_command(OUTPUT ${LLVM_TABLEGEN_EXE}
+    COMMAND ${CMAKE_BUILD_TOOL}
+    DEPENDS ${CX_NATIVE_TG_DIR}/CMakeCache.txt
+    WORKING_DIRECTORY ${CX_NATIVE_TG_DIR}/utils/TableGen
+    COMMENT "Building native TableGen...")
+  add_custom_target(NativeTableGen DEPENDS ${LLVM_TABLEGEN_EXE})
+
+  add_dependencies(tblgen NativeTableGen)
+
+  set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES ${CX_NATIVE_TG_DIR})
+endif()
diff --git a/final/cmake/modules/FindBison.cmake b/final/cmake/modules/FindBison.cmake
new file mode 100755
index 00000000000..0320ae3ce14
--- /dev/null
+++ b/final/cmake/modules/FindBison.cmake
@@ -0,0 +1,52 @@
+# - Try to find Bison
+# Once done this will define
+#
+#  BISON_FOUND - system has Bison
+#  BISON_EXECUTABLE - path of the bison executable
+#  BISON_VERSION - the version string, like "2.5.31"
+#
+
+MACRO(FIND_BISON)
+	FIND_PROGRAM(BISON_EXECUTABLE NAMES bison)
+
+	IF(BISON_EXECUTABLE)
+		SET(BISON_FOUND TRUE)
+
+		EXECUTE_PROCESS(COMMAND ${BISON_EXECUTABLE} --version
+			OUTPUT_VARIABLE _BISON_VERSION
+		)
+		string (REGEX MATCH "[0-9]+\\.[0-9]+\\.[0-9]+" BISON_VERSION "${_bison_VERSION}")
+	ENDIF(BISON_EXECUTABLE)
+
+	IF(BISON_FOUND)
+	  IF(NOT Bison_FIND_QUIETLY)
+		MESSAGE(STATUS "Found Bison: ${BISON_EXECUTABLE}")
+	  ENDIF(NOT Bison_FIND_QUIETLY)
+	ELSE(BISON_FOUND)
+	  IF(Bison_FIND_REQUIRED)
+		MESSAGE(FATAL_ERROR "Could not find Bison")
+	  ENDIF(Bison_FIND_REQUIRED)
+	ENDIF(BISON_FOUND)
+ENDMACRO(FIND_BISON)
+
+MACRO(BISON_GENERATOR _PREFIX _Y_INPUT _H_OUTPUT _CPP_OUTPUT)
+	IF(BISON_EXECUTABLE)
+		GET_FILENAME_COMPONENT(_Y_DIR ${_Y_INPUT} PATH)
+		ADD_CUSTOM_COMMAND(
+			OUTPUT  ${_CPP_OUTPUT}
+			OUTPUT  ${_H_OUTPUT}
+			DEPENDS ${_Y_INPUT}
+			COMMAND ${BISON_EXECUTABLE}
+			ARGS
+				-p ${_PREFIX} -o"${_CPP_OUTPUT}"
+				--defines="${_H_OUTPUT}" ${_Y_INPUT}
+			WORKING_DIRECTORY ${_Y_DIR}
+		)
+		SET_SOURCE_FILES_PROPERTIES(
+			${_CPP_OUTPUT} ${_H_OUTPUT}
+			GENERATED
+		)
+	ELSE(BISON_EXECUTABLE)
+		MESSAGE(SEND_ERROR "Can't find bison program, and it's required")
+	ENDIF(BISON_EXECUTABLE)
+ENDMACRO(BISON_GENERATOR)
diff --git a/final/cmake/modules/GetTargetTriple.cmake b/final/cmake/modules/GetTargetTriple.cmake
new file mode 100644
index 00000000000..f4321c9b67e
--- /dev/null
+++ b/final/cmake/modules/GetTargetTriple.cmake
@@ -0,0 +1,30 @@
+# Returns the host triple.
+# Invokes config.guess
+
+function( get_target_triple var )
+  if( MSVC )
+    if( CMAKE_CL_64 )
+      set( value "x86_64-pc-win32" )
+    else()
+      set( value "i686-pc-win32" )
+    endif()
+  elseif( MINGW AND NOT MSYS )
+    if( CMAKE_SIZEOF_VOID_P EQUAL 8 )
+      set( value "x86_64-w64-mingw32" )
+    else()
+      set( value "i686-pc-mingw32" )
+    endif()
+  else( MSVC )
+    set(config_guess ${LLVM_MAIN_SRC_DIR}/autoconf/config.guess)
+    execute_process(COMMAND sh ${config_guess}
+      RESULT_VARIABLE TT_RV
+      OUTPUT_VARIABLE TT_OUT
+      OUTPUT_STRIP_TRAILING_WHITESPACE)
+    if( NOT TT_RV EQUAL 0 )
+      message(FATAL_ERROR "Failed to execute ${config_guess}")
+    endif( NOT TT_RV EQUAL 0 )
+    set( value ${TT_OUT} )
+  endif( MSVC )
+  set( ${var} ${value} PARENT_SCOPE )
+  message(STATUS "Target triple: ${value}")
+endfunction( get_target_triple var )
diff --git a/final/cmake/modules/HandleLLVMOptions.cmake b/final/cmake/modules/HandleLLVMOptions.cmake
new file mode 100644
index 00000000000..e1f06a7ab9c
--- /dev/null
+++ b/final/cmake/modules/HandleLLVMOptions.cmake
@@ -0,0 +1,185 @@
+include(AddLLVMDefinitions)
+
+# Run-time build mode; It is used for unittests.
+if(MSVC_IDE)
+  # Expect "$(Configuration)", "$(OutDir)", etc.
+  # It is expanded by msbuild or similar.
+  set(RUNTIME_BUILD_MODE "${CMAKE_CFG_INTDIR}")
+elseif(NOT CMAKE_BUILD_TYPE STREQUAL "")
+  # Expect "Release" "Debug", etc.
+  # Or unittests could not run.
+  set(RUNTIME_BUILD_MODE ${CMAKE_BUILD_TYPE})
+else()
+  # It might be "."
+  set(RUNTIME_BUILD_MODE "${CMAKE_CFG_INTDIR}")
+endif()
+
+set(LIT_ARGS_DEFAULT "-sv")
+if (MSVC OR XCODE)
+  set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar")
+endif()
+set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}"
+    CACHE STRING "Default options for lit")
+
+if( LLVM_ENABLE_ASSERTIONS )
+  # MSVC doesn't like _DEBUG on release builds. See PR 4379.
+  if( NOT MSVC )
+    add_definitions( -D_DEBUG )
+  endif()
+  # On Release builds cmake automatically defines NDEBUG, so we
+  # explicitly undefine it:
+  if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
+    add_definitions( -UNDEBUG )
+  endif()
+else()
+  if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" )
+    if( NOT MSVC_IDE AND NOT XCODE )
+      add_definitions( -DNDEBUG )
+    endif()
+  endif()
+endif()
+
+if(WIN32)
+  if(CYGWIN)
+    set(LLVM_ON_WIN32 0)
+    set(LLVM_ON_UNIX 1)
+  else(CYGWIN)
+    set(LLVM_ON_WIN32 1)
+    set(LLVM_ON_UNIX 0)
+
+    # This is effective only on Win32 hosts to use gnuwin32 tools.
+    set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools")
+  endif(CYGWIN)
+  set(LTDL_SHLIB_EXT ".dll")
+  set(EXEEXT ".exe")
+  # Maximum path length is 160 for non-unicode paths
+  set(MAXPATHLEN 160)
+else(WIN32)
+  if(UNIX)
+    set(LLVM_ON_WIN32 0)
+    set(LLVM_ON_UNIX 1)
+    if(APPLE)
+      set(LTDL_SHLIB_EXT ".dylib")
+    else(APPLE)
+      set(LTDL_SHLIB_EXT ".so")
+    endif(APPLE)
+    set(EXEEXT "")
+    # FIXME: Maximum path length is currently set to 'safe' fixed value
+    set(MAXPATHLEN 2024)
+  else(UNIX)
+    MESSAGE(SEND_ERROR "Unable to determine platform")
+  endif(UNIX)
+endif(WIN32)
+
+if( LLVM_ENABLE_PIC )
+  if( XCODE )
+    # Xcode has -mdynamic-no-pic on by default, which overrides -fPIC. I don't
+    # know how to disable this, so just force ENABLE_PIC off for now.
+    message(WARNING "-fPIC not supported with Xcode.")
+  elseif( WIN32 )
+    # On Windows all code is PIC. MinGW warns if -fPIC is used.
+  else()
+    include(CheckCXXCompilerFlag)
+    check_cxx_compiler_flag("-fPIC" SUPPORTS_FPIC_FLAG)
+    if( SUPPORTS_FPIC_FLAG )
+      message(STATUS "Building with -fPIC")
+      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
+      set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
+    else( SUPPORTS_FPIC_FLAG )
+      message(WARNING "-fPIC not supported.")
+    endif()
+  endif()
+endif()
+
+if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
+  # TODO: support other platforms and toolchains.
+  option(LLVM_BUILD_32_BITS "Build 32 bits executables and libraries." OFF)
+  if( LLVM_BUILD_32_BITS )
+    message(STATUS "Building 32 bits executables and libraries.")
+    add_llvm_definitions( -m32 )
+    list(APPEND CMAKE_EXE_LINKER_FLAGS -m32)
+    list(APPEND CMAKE_SHARED_LINKER_FLAGS -m32)
+  endif( LLVM_BUILD_32_BITS )
+endif( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 )
+
+if( MSVC_IDE AND ( MSVC90 OR MSVC10 ) )
+  # Only Visual Studio 2008 and 2010 officially supports /MP.
+  # Visual Studio 2005 do support it but it's experimental there.
+  set(LLVM_COMPILER_JOBS "0" CACHE STRING
+    "Number of parallel compiler jobs. 0 means use all processors. Default is 0.")
+  if( NOT LLVM_COMPILER_JOBS STREQUAL "1" )
+    if( LLVM_COMPILER_JOBS STREQUAL "0" )
+      add_llvm_definitions( /MP )
+    else()
+      if (MSVC10)
+        message(FATAL_ERROR
+          "Due to a bug in CMake only 0 and 1 is supported for "
+          "LLVM_COMPILER_JOBS when generating for Visual Studio 2010")
+      else()
+        message(STATUS "Number of parallel compiler jobs set to " ${LLVM_COMPILER_JOBS})
+        add_llvm_definitions( /MP${LLVM_COMPILER_JOBS} )
+      endif()
+    endif()
+  else()
+    message(STATUS "Parallel compilation disabled")
+  endif()
+endif()
+
+if( MSVC )
+  include(ChooseMSVCCRT)
+
+  # Add definitions that make MSVC much less annoying.
+  add_llvm_definitions(
+    # For some reason MS wants to deprecate a bunch of standard functions...
+    -D_CRT_SECURE_NO_DEPRECATE
+    -D_CRT_SECURE_NO_WARNINGS
+    -D_CRT_NONSTDC_NO_DEPRECATE
+    -D_CRT_NONSTDC_NO_WARNINGS
+    -D_SCL_SECURE_NO_DEPRECATE
+    -D_SCL_SECURE_NO_WARNINGS
+
+    -wd4146 # Suppress 'unary minus operator applied to unsigned type, result still unsigned'
+    -wd4180 # Suppress 'qualifier applied to function type has no meaning; ignored'
+    -wd4224 # Suppress 'nonstandard extension used : formal parameter 'identifier' was previously defined as a type'
+    -wd4244 # Suppress ''argument' : conversion from 'type1' to 'type2', possible loss of data'
+    -wd4267 # Suppress ''var' : conversion from 'size_t' to 'type', possible loss of data'
+    -wd4275 # Suppress 'An exported class was derived from a class that was not exported.'
+    -wd4291 # Suppress ''declaration' : no matching operator delete found; memory will not be freed if initialization throws an exception'
+    -wd4345 # Suppress 'behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized'
+    -wd4351 # Suppress 'new behavior: elements of array 'array' will be default initialized'
+    -wd4355 # Suppress ''this' : used in base member initializer list'
+    -wd4503 # Suppress ''identifier' : decorated name length exceeded, name was truncated'
+    -wd4624 # Suppress ''derived class' : destructor could not be generated because a base class destructor is inaccessible'
+    -wd4715 # Suppress ''function' : not all control paths return a value'
+    -wd4800 # Suppress ''type' : forcing value to bool 'true' or 'false' (performance warning)'
+    -wd4065 # Suppress 'switch statement contains 'default' but no 'case' labels'
+
+    -w14062 # Promote "enumerator in switch of enum is not handled" to level 1 warning.
+    )
+
+  # Enable warnings
+  if (LLVM_ENABLE_WARNINGS)
+    add_llvm_definitions( /W4 /Wall )
+    if (LLVM_ENABLE_PEDANTIC)
+      # No MSVC equivalent available
+    endif (LLVM_ENABLE_PEDANTIC)
+  endif (LLVM_ENABLE_WARNINGS)
+  if (LLVM_ENABLE_WERROR)
+    add_llvm_definitions( /WX )
+  endif (LLVM_ENABLE_WERROR)
+elseif( CMAKE_COMPILER_IS_GNUCXX )
+  if (LLVM_ENABLE_WARNINGS)
+    add_llvm_definitions( -Wall -W -Wno-unused-parameter -Wwrite-strings )
+    if (LLVM_ENABLE_PEDANTIC)
+      add_llvm_definitions( -pedantic -Wno-long-long )
+    endif (LLVM_ENABLE_PEDANTIC)
+  endif (LLVM_ENABLE_WARNINGS)
+  if (LLVM_ENABLE_WERROR)
+    add_llvm_definitions( -Werror )
+  endif (LLVM_ENABLE_WERROR)
+endif( MSVC )
+
+add_llvm_definitions( -D__STDC_LIMIT_MACROS )
+add_llvm_definitions( -D__STDC_CONSTANT_MACROS )
+
+option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON)
diff --git a/final/cmake/modules/LLVM.cmake b/final/cmake/modules/LLVM.cmake
new file mode 100644
index 00000000000..9182afdf275
--- /dev/null
+++ b/final/cmake/modules/LLVM.cmake
@@ -0,0 +1,40 @@
+# This file provides information and services to the final user.
+
+set(LLVM_PACKAGE_VERSION @PACKAGE_VERSION@)
+
+set(LLVM_COMMON_DEPENDS @LLVM_COMMON_DEPENDS@)
+
+set_property( GLOBAL PROPERTY LLVM_LIBS "@llvm_libs@")
+
+set(LLVM_ALL_TARGETS @LLVM_ALL_TARGETS@)
+
+set(LLVM_TARGETS_TO_BUILD @LLVM_TARGETS_TO_BUILD@)
+
+set(TARGET_TRIPLE "@TARGET_TRIPLE@")
+
+set(LLVM_TOOLS_BINARY_DIR @LLVM_TOOLS_BINARY_DIR@)
+
+set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS@)
+
+set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@)
+
+set(LLVM_ENABLE_PIC @LLVM_ENABLE_PIC@)
+
+set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS)
+
+set(HAVE_LIBDL @HAVE_LIBDL@)
+set(HAVE_LIBPTHREAD @HAVE_LIBPTHREAD)
+
+# We try to include using the current setting of CMAKE_MODULE_PATH,
+# which suppossedly was filled by the user with the directory where
+# this file was installed:
+include( LLVMConfig OPTIONAL RESULT_VARIABLE LLVMCONFIG_INCLUDED )
+
+# If failed, we assume that this is an un-installed build:
+if( NOT LLVMCONFIG_INCLUDED )
+  set(CMAKE_MODULE_PATH
+    ${CMAKE_MODULE_PATH}
+    "@LLVM_SOURCE_DIR@/cmake/modules")
+  include( LLVMConfig )
+endif()
+
diff --git a/final/cmake/modules/LLVMConfig.cmake b/final/cmake/modules/LLVMConfig.cmake
new file mode 100755
index 00000000000..e8308f680b0
--- /dev/null
+++ b/final/cmake/modules/LLVMConfig.cmake
@@ -0,0 +1,189 @@
+function(get_system_libs return_var)
+  # Returns in `return_var' a list of system libraries used by LLVM.
+  if( NOT MSVC )
+    if( MINGW )
+      set(system_libs ${system_libs} imagehlp psapi)
+    elseif( CMAKE_HOST_UNIX )
+      if( HAVE_LIBDL )
+	set(system_libs ${system_libs} ${CMAKE_DL_LIBS})
+      endif()
+      if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD )
+	set(system_libs ${system_libs} pthread)
+      endif()
+    endif( MINGW )
+  endif( NOT MSVC )
+  set(${return_var} ${system_libs} PARENT_SCOPE)
+endfunction(get_system_libs)
+
+
+function(is_llvm_target_library library return_var)
+  # Sets variable `return_var' to ON if `library' corresponds to a
+  # LLVM supported target. To OFF if it doesn't.
+  set(${return_var} OFF PARENT_SCOPE)
+  string(TOUPPER "${library}" capitalized_lib)
+  string(TOUPPER "${LLVM_ALL_TARGETS}" targets)
+  foreach(t ${targets})
+    if( capitalized_lib STREQUAL "LLVM${t}" OR
+	capitalized_lib STREQUAL "LLVM${t}CODEGEN" OR
+	capitalized_lib STREQUAL "LLVM${t}ASMPARSER" OR
+	capitalized_lib STREQUAL "LLVM${t}ASMPRINTER" OR
+	capitalized_lib STREQUAL "LLVM${t}DISASSEMBLER" OR
+	capitalized_lib STREQUAL "LLVM${t}INFO" )
+      set(${return_var} ON PARENT_SCOPE)
+      break()
+    endif()
+  endforeach()
+endfunction(is_llvm_target_library)
+
+
+macro(llvm_config executable)
+  explicit_llvm_config(${executable} ${ARGN})
+endmacro(llvm_config)
+
+
+function(explicit_llvm_config executable)
+  set( link_components ${ARGN} )
+
+  explicit_map_components_to_libraries(LIBRARIES ${link_components})
+  target_link_libraries(${executable} ${LIBRARIES})
+endfunction(explicit_llvm_config)
+
+
+# This is a variant intended for the final user:
+function(llvm_map_components_to_libraries OUT_VAR)
+  explicit_map_components_to_libraries(result ${ARGN})
+  get_system_libs(sys_result)
+  set( ${OUT_VAR} ${result} ${sys_result} PARENT_SCOPE )
+endfunction(llvm_map_components_to_libraries)
+
+
+function(explicit_map_components_to_libraries out_libs)
+  set( link_components ${ARGN} )
+  get_property(llvm_libs GLOBAL PROPERTY LLVM_LIBS)
+  string(TOUPPER "${llvm_libs}" capitalized_libs)
+
+  # Expand some keywords:
+  list(FIND link_components "engine" engine_required)
+  if( engine_required )
+    # TODO: as we assume we are on X86, this is `jit'.
+    list(APPEND link_components "jit")
+    list(APPEND link_components "native")
+  endif()
+  list(FIND link_components "native" native_required)
+  if( native_required )
+    list(APPEND link_components "X86")
+  endif()
+
+  # Translate symbolic component names to real libraries:
+  foreach(c ${link_components})
+    # add codegen, asmprinter, asmparser, disassembler
+    list(FIND LLVM_TARGETS_TO_BUILD ${c} idx)
+    if( NOT idx LESS 0 )
+      list(FIND llvm_libs "LLVM${c}CodeGen" idx)
+      if( NOT idx LESS 0 )
+	list(APPEND expanded_components "LLVM${c}CodeGen")
+      else()
+	list(FIND llvm_libs "LLVM${c}" idx)
+	if( NOT idx LESS 0 )
+	  list(APPEND expanded_components "LLVM${c}")
+	else()
+	  message(FATAL_ERROR "Target ${c} is not in the set of libraries.")
+	endif()
+      endif()
+      list(FIND llvm_libs "LLVM${c}AsmPrinter" asmidx)
+      if( NOT asmidx LESS 0 )
+        list(APPEND expanded_components "LLVM${c}AsmPrinter")
+      endif()
+      list(FIND llvm_libs "LLVM${c}AsmParser" asmidx)
+      if( NOT asmidx LESS 0 )
+        list(APPEND expanded_components "LLVM${c}AsmParser")
+      endif()
+      list(FIND llvm_libs "LLVM${c}Info" asmidx)
+      if( NOT asmidx LESS 0 )
+        list(APPEND expanded_components "LLVM${c}Info")
+      endif()
+      list(FIND llvm_libs "LLVM${c}Disassembler" asmidx)
+      if( NOT asmidx LESS 0 )
+        list(APPEND expanded_components "LLVM${c}Disassembler")
+      endif()
+    elseif( c STREQUAL "native" )
+      # already processed
+    elseif( c STREQUAL "nativecodegen" )
+      list(APPEND expanded_components "LLVM${LLVM_NATIVE_ARCH}CodeGen")
+    elseif( c STREQUAL "backend" )
+      # same case as in `native'.
+    elseif( c STREQUAL "engine" )
+      # already processed
+    elseif( c STREQUAL "all" )
+      list(APPEND expanded_components ${llvm_libs})
+    else( NOT idx LESS 0 )
+      # Canonize the component name:
+      string(TOUPPER "${c}" capitalized)
+      list(FIND capitalized_libs LLVM${capitalized} lib_idx)
+      if( lib_idx LESS 0 )
+	# The component is unkown. Maybe is an ommitted target?
+	is_llvm_target_library(${c} iltl_result)
+	if( NOT iltl_result )
+	  message(FATAL_ERROR "Library `${c}' not found in list of llvm libraries.")
+	endif()
+      else( lib_idx LESS 0 )
+	list(GET llvm_libs ${lib_idx} canonical_lib)
+	list(APPEND expanded_components ${canonical_lib})
+      endif( lib_idx LESS 0 )
+    endif( NOT idx LESS 0 )
+  endforeach(c)
+  # Expand dependencies while topologically sorting the list of libraries:
+  list(LENGTH expanded_components lst_size)
+  set(cursor 0)
+  set(processed)
+  while( cursor LESS lst_size )
+    list(GET expanded_components ${cursor} lib)
+    list(APPEND expanded_components ${MSVC_LIB_DEPS_${lib}})
+    # Remove duplicates at the front:
+    list(REVERSE expanded_components)
+    list(REMOVE_DUPLICATES expanded_components)
+    list(REVERSE expanded_components)
+    list(APPEND processed ${lib})
+    # Find the maximum index that doesn't have to be re-processed:
+    while(NOT "${expanded_components}" MATCHES "^${processed}.*" )
+      list(REMOVE_AT processed -1)
+    endwhile()
+    list(LENGTH processed cursor)
+    list(LENGTH expanded_components lst_size)
+  endwhile( cursor LESS lst_size )
+  # Return just the libraries included in this build:
+  set(result)
+  foreach(c ${expanded_components})
+    list(FIND llvm_libs ${c} lib_idx)
+    if( NOT lib_idx LESS 0 )
+      set(result ${result} ${c})
+    endif()
+  endforeach(c)
+  set(${out_libs} ${result} PARENT_SCOPE)
+endfunction(explicit_map_components_to_libraries)
+
+
+# The library dependency data is contained in the file
+# LLVMLibDeps.cmake on this directory. It is automatically generated
+# by tools/llvm-config/CMakeLists.txt when the build comprises all the
+# targets and we are on a environment Posix enough to build the
+# llvm-config script. This, in practice, just excludes MSVC.
+
+# When you remove or rename a library from the build, be sure to
+# remove its file from lib/ as well, or the GenLibDeps.pl script will
+# include it on its analysis!
+
+# The format generated by GenLibDeps.pl
+
+# LLVMARMAsmPrinter.o: LLVMARMCodeGen.o libLLVMAsmPrinter.a libLLVMCodeGen.a libLLVMCore.a libLLVMSupport.a libLLVMTarget.a
+
+# is translated to:
+
+# set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMARMCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMSupport LLVMTarget)
+
+# It is necessary to remove the `lib' prefix and the `.a'.
+
+# This 'sed' script should do the trick:
+# sed -e s'#\.a##g' -e 's#libLLVM#LLVM#g' -e 's#: # #' -e 's#\(.*\)#set(MSVC_LIB_DEPS_\1)#' ~/llvm/tools/llvm-config/LibDeps.txt
+
+include(LLVMLibDeps)
diff --git a/final/cmake/modules/LLVMLibDeps.cmake b/final/cmake/modules/LLVMLibDeps.cmake
new file mode 100644
index 00000000000..e70ea8aca4e
--- /dev/null
+++ b/final/cmake/modules/LLVMLibDeps.cmake
@@ -0,0 +1,68 @@
+set(MSVC_LIB_DEPS_LLVMARMAsmParser LLVMARMCodeGen LLVMARMInfo LLVMMC LLVMMCParser LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMARMAsmPrinter LLVMARMInfo LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMARMDisassembler LLVMARMCodeGen LLVMARMInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMARMInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMAlphaInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMArchive LLVMBitReader LLVMCore LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMAsmParser LLVMCore LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMMCParser LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMBlackfinCodeGen LLVMAsmPrinter LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMBlackfinInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMCBackend LLVMAnalysis LLVMCBackendInfo LLVMCodeGen LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils LLVMipa)
+set(MSVC_LIB_DEPS_LLVMCBackendInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMAsmPrinter LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMCellSPUInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMCore LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMCppBackend LLVMCore LLVMCppBackendInfo LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMCppBackendInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMExecutionEngine LLVMCore LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMInstCombine LLVMAnalysis LLVMCore LLVMSupport LLVMTarget LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMInstrumentation LLVMAnalysis LLVMCore LLVMSupport LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMInterpreter LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMJIT LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMMC LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMMBlazeAsmParser LLVMMBlazeCodeGen LLVMMBlazeInfo LLVMMC LLVMMCParser LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMBlazeAsmPrinter LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMBlazeCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMBlazeAsmPrinter LLVMMBlazeInfo LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMBlazeDisassembler LLVMMBlazeCodeGen LLVMMBlazeInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMBlazeInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMCDisassembler LLVMARMAsmParser LLVMARMCodeGen LLVMARMDisassembler LLVMARMInfo LLVMAlphaCodeGen LLVMAlphaInfo LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCBackend LLVMCBackendInfo LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCppBackend LLVMCppBackendInfo LLVMMBlazeAsmParser LLVMMBlazeCodeGen LLVMMBlazeDisassembler LLVMMBlazeInfo LLVMMC LLVMMCParser LLVMMSP430CodeGen LLVMMSP430Info LLVMMipsCodeGen LLVMMipsInfo LLVMPTXCodeGen LLVMPTXInfo LLVMPowerPCCodeGen LLVMPowerPCInfo LLVMSparcCodeGen LLVMSparcInfo LLVMSupport LLVMSystemZCodeGen LLVMSystemZInfo LLVMX86AsmParser LLVMX86CodeGen LLVMX86Disassembler LLVMX86Info LLVMXCoreCodeGen LLVMXCoreInfo)
+set(MSVC_LIB_DEPS_LLVMMCJIT LLVMExecutionEngine LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMCParser LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430AsmPrinter LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMSP430Info LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMipsCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMipsInfo LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMipsInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMObject LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMPTXCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPTXInfo LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPTXInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCAsmPrinter LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMInstCombine LLVMSupport LLVMTarget LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMTarget LLVMTransformUtils)
+set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMSparcInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMSupport )
+set(MSVC_LIB_DEPS_LLVMSystemZCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystemZInfo LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMSystemZInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMTransformUtils LLVMAnalysis LLVMCore LLVMSupport LLVMTarget LLVMipa)
+set(MSVC_LIB_DEPS_LLVMX86AsmParser LLVMMC LLVMMCParser LLVMSupport LLVMTarget LLVMX86Info)
+set(MSVC_LIB_DEPS_LLVMX86AsmPrinter LLVMMC LLVMSupport LLVMX86Utils)
+set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMX86AsmPrinter LLVMX86Info LLVMX86Utils)
+set(MSVC_LIB_DEPS_LLVMX86Disassembler LLVMMC LLVMSupport LLVMX86Info)
+set(MSVC_LIB_DEPS_LLVMX86Info LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMX86Utils LLVMCore LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMXCoreCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMXCoreInfo)
+set(MSVC_LIB_DEPS_LLVMXCoreInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMipa LLVMAnalysis LLVMCore LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMipo LLVMAnalysis LLVMCore LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils LLVMipa)
diff --git a/final/cmake/modules/LLVMParseArguments.cmake b/final/cmake/modules/LLVMParseArguments.cmake
new file mode 100644
index 00000000000..ce19be114b3
--- /dev/null
+++ b/final/cmake/modules/LLVMParseArguments.cmake
@@ -0,0 +1,80 @@
+# Copied from http://www.itk.org/Wiki/CMakeMacroParseArguments under
+# http://creativecommons.org/licenses/by/2.5/.
+#
+# The PARSE_ARGUMENTS macro will take the arguments of another macro and define
+# several variables. The first argument to PARSE_ARGUMENTS is a prefix to put on
+# all variables it creates. The second argument is a list of names, and the
+# third argument is a list of options. Both of these lists should be quoted. The
+# rest of PARSE_ARGUMENTS are arguments from another macro to be parsed.
+#
+# PARSE_ARGUMENTS(prefix arg_names options arg1 arg2...)
+#
+# For each item in options, PARSE_ARGUMENTS will create a variable with that
+# name, prefixed with prefix_. So, for example, if prefix is MY_MACRO and
+# options is OPTION1;OPTION2, then PARSE_ARGUMENTS will create the variables
+# MY_MACRO_OPTION1 and MY_MACRO_OPTION2. These variables will be set to true if
+# the option exists in the command line or false otherwise.
+#
+#For each item in arg_names, PARSE_ARGUMENTS will create a variable with that
+#name, prefixed with prefix_. Each variable will be filled with the arguments
+#that occur after the given arg_name is encountered up to the next arg_name or
+#the end of the arguments. All options are removed from these
+#lists. PARSE_ARGUMENTS also creates a prefix_DEFAULT_ARGS variable containing
+#the list of all arguments up to the first arg_name encountered.
+#
+#Here is a simple, albeit impractical, example of using PARSE_ARGUMENTS that
+#demonstrates its behavior.
+#
+# SET(arguments
+#     hello OPTION3 world
+#     LIST3 foo bar
+#     OPTION2
+#     LIST1 fuz baz
+#     )
+#
+# PARSE_ARGUMENTS(ARG "LIST1;LIST2;LIST3" "OPTION1;OPTION2;OPTION3" ${arguments})
+#
+# PARSE_ARGUMENTS creates 7 variables and sets them as follows:
+#   ARG_DEFAULT_ARGS: hello;world
+#   ARG_LIST1: fuz;baz
+#   ARG_LIST2:
+#   ARG_LIST3: foo;bar
+#   ARG_OPTION1: FALSE
+#   ARG_OPTION2: TRUE
+#   ARG_OPTION3: TRUE
+#
+# If you don't have any options, use an empty string in its place.
+#   PARSE_ARGUMENTS(ARG "LIST1;LIST2;LIST3" "" ${arguments})
+# Likewise if you have no lists.
+#   PARSE_ARGUMENTS(ARG "" "OPTION1;OPTION2;OPTION3" ${arguments})
+
+MACRO(PARSE_ARGUMENTS prefix arg_names option_names)
+  SET(DEFAULT_ARGS)
+  FOREACH(arg_name ${arg_names})
+    SET(${prefix}_${arg_name})
+  ENDFOREACH(arg_name)
+  FOREACH(option ${option_names})
+    SET(${prefix}_${option} FALSE)
+  ENDFOREACH(option)
+
+  SET(current_arg_name DEFAULT_ARGS)
+  SET(current_arg_list)
+  FOREACH(arg ${ARGN})
+    SET(larg_names ${arg_names})
+    LIST(FIND larg_names "${arg}" is_arg_name)
+    IF (is_arg_name GREATER -1)
+      SET(${prefix}_${current_arg_name} ${current_arg_list})
+      SET(current_arg_name ${arg})
+      SET(current_arg_list)
+    ELSE (is_arg_name GREATER -1)
+      SET(loption_names ${option_names})
+      LIST(FIND loption_names "${arg}" is_option)
+      IF (is_option GREATER -1)
+        SET(${prefix}_${arg} TRUE)
+      ELSE (is_option GREATER -1)
+        SET(current_arg_list ${current_arg_list} ${arg})
+      ENDIF (is_option GREATER -1)
+    ENDIF (is_arg_name GREATER -1)
+  ENDFOREACH(arg)
+  SET(${prefix}_${current_arg_name} ${current_arg_list})
+ENDMACRO(PARSE_ARGUMENTS)
diff --git a/final/cmake/modules/LLVMProcessSources.cmake b/final/cmake/modules/LLVMProcessSources.cmake
new file mode 100644
index 00000000000..270292ad3b8
--- /dev/null
+++ b/final/cmake/modules/LLVMProcessSources.cmake
@@ -0,0 +1,90 @@
+include(AddFileDependencies)
+
+function(llvm_replace_compiler_option var old new)
+  # Replaces a compiler option or switch `old' in `var' by `new'.
+  # If `old' is not in `var', appends `new' to `var'.
+  # Example: llvm_replace_compiler_option(CMAKE_CXX_FLAGS_RELEASE "-O3" "-O2")
+  # If the option already is on the variable, don't add it:
+  if( "${${var}}" MATCHES "(^| )${new}($| )" )
+    set(n "")
+  else()
+    set(n "${new}")
+  endif()
+  if( "${${var}}" MATCHES "(^| )${old}($| )" )
+    string( REGEX REPLACE "(^| )${old}($| )" " ${n} " ${var} "${${var}}" )
+  else()
+    set( ${var} "${${var}} ${n}" )
+  endif()
+  set( ${var} "${${var}}" PARENT_SCOPE )
+endfunction(llvm_replace_compiler_option)
+
+macro(add_td_sources srcs)
+  file(GLOB tds *.td)
+  if( tds )
+    source_group("TableGen descriptions" FILES ${tds})
+    set_source_files_properties(${tds} PROPERTIES HEADER_FILE_ONLY ON)
+    list(APPEND ${srcs} ${tds})
+  endif()
+endmacro(add_td_sources)
+
+
+macro(add_header_files srcs)
+  file(GLOB hds *.h *.def)
+  if( hds )
+    set_source_files_properties(${hds} PROPERTIES HEADER_FILE_ONLY ON)
+    list(APPEND ${srcs} ${hds})
+  endif()
+endmacro(add_header_files)
+
+
+function(llvm_process_sources OUT_VAR)
+  set( sources ${ARGN} )
+  llvm_check_source_file_list( ${sources} )
+  # Create file dependencies on the tablegenned files, if any.  Seems
+  # that this is not strictly needed, as dependencies of the .cpp
+  # sources on the tablegenned .inc files are detected and handled,
+  # but just in case...
+  foreach( s ${sources} )
+    set( f ${CMAKE_CURRENT_SOURCE_DIR}/${s} )
+    add_file_dependencies( ${f} ${TABLEGEN_OUTPUT} )
+  endforeach(s)
+  if( MSVC_IDE )
+    # This adds .td and .h files to the Visual Studio solution:
+    add_td_sources(sources)
+    add_header_files(sources)
+  endif()
+
+  # Set common compiler options:
+  if( NOT LLVM_REQUIRES_EH )
+    if( CMAKE_COMPILER_IS_GNUCXX )
+      add_definitions( -fno-exceptions )
+    elseif( MSVC )
+      llvm_replace_compiler_option(CMAKE_CXX_FLAGS "/EHsc" "/EHs-c-")
+      add_definitions( /D_HAS_EXCEPTIONS=0 )
+    endif()
+  endif()
+  if( NOT LLVM_REQUIRES_RTTI )
+    if( CMAKE_COMPILER_IS_GNUCXX )
+      llvm_replace_compiler_option(CMAKE_CXX_FLAGS "-frtti" "-fno-rtti")
+    elseif( MSVC )
+      llvm_replace_compiler_option(CMAKE_CXX_FLAGS "/GR" "/GR-")
+    endif()
+  endif()
+
+  set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" PARENT_SCOPE )
+  set( ${OUT_VAR} ${sources} PARENT_SCOPE )
+endfunction(llvm_process_sources)
+
+
+function(llvm_check_source_file_list)
+  set(listed ${ARGN})
+  file(GLOB globbed *.cpp)
+  foreach(g ${globbed})
+    get_filename_component(fn ${g} NAME)
+    list(FIND listed ${fn} idx)
+    if( idx LESS 0 )
+      message(SEND_ERROR "Found unknown source file ${g}
+Please update ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt\n")
+    endif()
+  endforeach()
+endfunction(llvm_check_source_file_list)
diff --git a/final/cmake/modules/TableGen.cmake b/final/cmake/modules/TableGen.cmake
new file mode 100644
index 00000000000..9d67137bb42
--- /dev/null
+++ b/final/cmake/modules/TableGen.cmake
@@ -0,0 +1,46 @@
+# LLVM_TARGET_DEFINITIONS must contain the name of the .td file to process.
+# Extra parameters for `tblgen' may come after `ofn' parameter.
+# Adds the name of the generated file to TABLEGEN_OUTPUT.
+
+macro(tablegen ofn)
+  file(GLOB local_tds "*.td")
+  file(GLOB_RECURSE global_tds "${LLVM_MAIN_SRC_DIR}/include/llvm/*.td")
+
+  if (IS_ABSOLUTE ${LLVM_TARGET_DEFINITIONS})
+    set(LLVM_TARGET_DEFINITIONS_ABSOLUTE ${LLVM_TARGET_DEFINITIONS})
+  else()
+    set(LLVM_TARGET_DEFINITIONS_ABSOLUTE 
+      ${CMAKE_CURRENT_SOURCE_DIR}/${LLVM_TARGET_DEFINITIONS})
+  endif()
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
+    # Generate tablegen output in a temporary file.
+    COMMAND ${LLVM_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
+    -I ${LLVM_MAIN_SRC_DIR}/lib/Target -I ${LLVM_MAIN_INCLUDE_DIR}
+    ${LLVM_TARGET_DEFINITIONS_ABSOLUTE} 
+    -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
+    # The file in LLVM_TARGET_DEFINITIONS may be not in the current
+    # directory and local_tds may not contain it, so we must
+    # explicitly list it here:
+    DEPENDS ${LLVM_TABLEGEN_EXE} ${local_tds} ${global_tds}
+    ${LLVM_TARGET_DEFINITIONS_ABSOLUTE}
+    COMMENT "Building ${ofn}..."
+    )
+  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
+    # Only update the real output file if there are any differences.
+    # This prevents recompilation of all the files depending on it if there
+    # aren't any.
+    COMMAND ${CMAKE_COMMAND} -E copy_if_different
+        ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
+        ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
+    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
+    COMMENT ""
+    )
+
+  # `make clean' must remove all those generated files:
+  set_property(DIRECTORY APPEND
+    PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${ofn}.tmp ${ofn})
+
+  set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} ${CMAKE_CURRENT_BINARY_DIR}/${ofn})
+  set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${ofn} 
+    PROPERTIES GENERATED 1)
+endmacro(tablegen)
diff --git a/final/cmake/modules/VersionFromVCS.cmake b/final/cmake/modules/VersionFromVCS.cmake
new file mode 100644
index 00000000000..81739be927a
--- /dev/null
+++ b/final/cmake/modules/VersionFromVCS.cmake
@@ -0,0 +1,46 @@
+# Adds version control information to the variable VERS. For
+# determining the Version Control System used (if any) it inspects the
+# existence of certain subdirectories under CMAKE_CURRENT_SOURCE_DIR.
+
+function(add_version_info_from_vcs VERS)
+  set(result ${${VERS}})
+  if( EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.svn" )
+    set(result "${result}svn")
+    # FindSubversion does not work with symlinks. See PR 8437
+    if( NOT IS_SYMLINK "${CMAKE_CURRENT_SOURCE_DIR}" )
+      find_package(Subversion)
+    endif()
+    if( Subversion_FOUND )
+      subversion_wc_info( ${CMAKE_CURRENT_SOURCE_DIR} Project )
+      if( Project_WC_REVISION )
+        set(result "${result}-r${Project_WC_REVISION}")
+      endif()
+    endif()
+  elseif( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/.git )
+    set(result "${result}git")
+    # Try to get a ref-id
+    find_program(git_executable NAMES git git.exe git.cmd)
+    if( git_executable )
+      execute_process(COMMAND ${git_executable} show-ref HEAD
+                      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+                      TIMEOUT 5
+                      RESULT_VARIABLE git_result
+                      OUTPUT_VARIABLE git_output)
+      if( git_result EQUAL 0 )
+        string(SUBSTRING ${git_output} 0 7 git_ref_id)
+        set(result "${result}-${git_ref_id}")
+      else()
+        execute_process(COMMAND ${git_executable} svn log --limit=1 --oneline
+                        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+                        TIMEOUT 5
+                        RESULT_VARIABLE git_result
+                        OUTPUT_VARIABLE git_output)
+        if( git_result EQUAL 0 )
+          string(REGEX MATCH r[0-9]+ git_svn_rev ${git_output})
+          set(result "${result}-svn-${git_svn_rev}")
+        endif()
+      endif()
+    endif()
+  endif()
+  set(${VERS} ${result} PARENT_SCOPE)
+endfunction(add_version_info_from_vcs)
diff --git a/final/configure b/final/configure
new file mode 100755
index 00000000000..52bf988247b
--- /dev/null
+++ b/final/configure
@@ -0,0 +1,24115 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.60 for llvm 2.9.
+#
+# Report bugs to <llvmbugs@cs.uiuc.edu>.
+#
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+# 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+#
+# Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
+## --------------------- ##
+## M4sh Initialization.  ##
+## --------------------- ##
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
+fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
+
+
+# PATH needs CR
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  echo "#! /bin/sh" >conf$$.sh
+  echo  "exit 0"   >>conf$$.sh
+  chmod +x conf$$.sh
+  if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+    PATH_SEPARATOR=';'
+  else
+    PATH_SEPARATOR=:
+  fi
+  rm -f conf$$.sh
+fi
+
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+  as_unset=unset
+else
+  as_unset=false
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+as_nl='
+'
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+case $0 in
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  { (exit 1); exit 1; }
+fi
+
+# Work around bugs in pre-3.0 UWIN ksh.
+for as_var in ENV MAIL MAILPATH
+do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+for as_var in \
+  LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
+  LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
+  LC_TELEPHONE LC_TIME
+do
+  if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
+    eval $as_var=C; export $as_var
+  else
+    ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+  fi
+done
+
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# CDPATH.
+$as_unset CDPATH
+
+
+if test "x$CONFIG_SHELL" = x; then
+  if (eval ":") 2>/dev/null; then
+  as_have_required=yes
+else
+  as_have_required=no
+fi
+
+  if test $as_have_required = yes && 	 (eval ":
+(as_func_return () {
+  (exit \$1)
+}
+as_func_success () {
+  as_func_return 0
+}
+as_func_failure () {
+  as_func_return 1
+}
+as_func_ret_success () {
+  return 0
+}
+as_func_ret_failure () {
+  return 1
+}
+
+exitcode=0
+if as_func_success; then
+  :
+else
+  exitcode=1
+  echo as_func_success failed.
+fi
+
+if as_func_failure; then
+  exitcode=1
+  echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+  :
+else
+  exitcode=1
+  echo as_func_ret_success failed.
+fi
+
+if as_func_ret_failure; then
+  exitcode=1
+  echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = \"\$1\" ); then
+  :
+else
+  exitcode=1
+  echo positional parameters were not saved.
+fi
+
+test \$exitcode = 0) || { (exit 1); exit 1; }
+
+(
+  as_lineno_1=\$LINENO
+  as_lineno_2=\$LINENO
+  test \"x\$as_lineno_1\" != \"x\$as_lineno_2\" &&
+  test \"x\`expr \$as_lineno_1 + 1\`\" = \"x\$as_lineno_2\") || { (exit 1); exit 1; }
+") 2> /dev/null; then
+  :
+else
+  as_candidate_shells=
+    as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in /usr/bin/posix$PATH_SEPARATOR/bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  case $as_dir in
+	 /*)
+	   for as_base in sh bash ksh sh5; do
+	     as_candidate_shells="$as_candidate_shells $as_dir/$as_base"
+	   done;;
+       esac
+done
+IFS=$as_save_IFS
+
+
+      for as_shell in $as_candidate_shells $SHELL; do
+	 # Try only shells that exist, to save several forks.
+	 if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+		{ ("$as_shell") 2> /dev/null <<\_ASEOF
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
+fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
+
+:
+_ASEOF
+}; then
+  CONFIG_SHELL=$as_shell
+	       as_have_required=yes
+	       if { "$as_shell" 2> /dev/null <<\_ASEOF
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
+fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
+
+:
+(as_func_return () {
+  (exit $1)
+}
+as_func_success () {
+  as_func_return 0
+}
+as_func_failure () {
+  as_func_return 1
+}
+as_func_ret_success () {
+  return 0
+}
+as_func_ret_failure () {
+  return 1
+}
+
+exitcode=0
+if as_func_success; then
+  :
+else
+  exitcode=1
+  echo as_func_success failed.
+fi
+
+if as_func_failure; then
+  exitcode=1
+  echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+  :
+else
+  exitcode=1
+  echo as_func_ret_success failed.
+fi
+
+if as_func_ret_failure; then
+  exitcode=1
+  echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = "$1" ); then
+  :
+else
+  exitcode=1
+  echo positional parameters were not saved.
+fi
+
+test $exitcode = 0) || { (exit 1); exit 1; }
+
+(
+  as_lineno_1=$LINENO
+  as_lineno_2=$LINENO
+  test "x$as_lineno_1" != "x$as_lineno_2" &&
+  test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2") || { (exit 1); exit 1; }
+
+_ASEOF
+}; then
+  break
+fi
+
+fi
+
+      done
+
+      if test "x$CONFIG_SHELL" != x; then
+  for as_var in BASH_ENV ENV
+        do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+        done
+        export CONFIG_SHELL
+        exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"}
+fi
+
+
+    if test $as_have_required = no; then
+  echo This script requires a shell more modern than all the
+      echo shells that I found on your system.  Please install a
+      echo modern shell, or manually run the script under such a
+      echo shell if you do have one.
+      { (exit 1); exit 1; }
+fi
+
+
+fi
+
+fi
+
+
+
+(eval "as_func_return () {
+  (exit \$1)
+}
+as_func_success () {
+  as_func_return 0
+}
+as_func_failure () {
+  as_func_return 1
+}
+as_func_ret_success () {
+  return 0
+}
+as_func_ret_failure () {
+  return 1
+}
+
+exitcode=0
+if as_func_success; then
+  :
+else
+  exitcode=1
+  echo as_func_success failed.
+fi
+
+if as_func_failure; then
+  exitcode=1
+  echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+  :
+else
+  exitcode=1
+  echo as_func_ret_success failed.
+fi
+
+if as_func_ret_failure; then
+  exitcode=1
+  echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = \"\$1\" ); then
+  :
+else
+  exitcode=1
+  echo positional parameters were not saved.
+fi
+
+test \$exitcode = 0") || {
+  echo No shell found that supports shell functions.
+  echo Please tell autoconf@gnu.org about your system,
+  echo including any error possibly output before this
+  echo message
+}
+
+
+
+  as_lineno_1=$LINENO
+  as_lineno_2=$LINENO
+  test "x$as_lineno_1" != "x$as_lineno_2" &&
+  test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || {
+
+  # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+  # uniformly replaced by the line number.  The first 'sed' inserts a
+  # line-number line after each line using $LINENO; the second 'sed'
+  # does the real work.  The second script uses 'N' to pair each
+  # line-number line with the line containing $LINENO, and appends
+  # trailing '-' during substitution so that $LINENO is not a special
+  # case at line end.
+  # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+  # scripts with optimization help from Paolo Bonzini.  Blame Lee
+  # E. McMahon (1931-1989) for sed's syntax.  :-)
+  sed -n '
+    p
+    /[$]LINENO/=
+  ' <$as_myself |
+    sed '
+      s/[$]LINENO.*/&-/
+      t lineno
+      b
+      :lineno
+      N
+      :loop
+      s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+      t loop
+      s/-\n.*//
+    ' >$as_me.lineno &&
+  chmod +x "$as_me.lineno" ||
+    { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+   { (exit 1); exit 1; }; }
+
+  # Don't try to exec as it changes $[0], causing all sort of problems
+  # (the dirname of $[0] is not the place where we might find the
+  # original and so on.  Autoconf is especially sensitive to this).
+  . "./$as_me.lineno"
+  # Exit status is that of the last command.
+  exit
+}
+
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in
+-n*)
+  case `echo 'x\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  *)   ECHO_C='\c';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir
+fi
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+  as_ln_s='ln -s'
+  # ... but there are two gotchas:
+  # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+  # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+  # In both cases, we have to default to `cp -p'.
+  ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+    as_ln_s='cp -p'
+elif ln conf$$.file conf$$ 2>/dev/null; then
+  as_ln_s=ln
+else
+  as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p=:
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+# Find out whether ``test -x'' works.  Don't use a zero-byte file, as
+# systems may use methods other than mode bits to determine executability.
+cat >conf$$.file <<_ASEOF
+#! /bin/sh
+exit 0
+_ASEOF
+chmod +x conf$$.file
+if test -x conf$$.file >/dev/null 2>&1; then
+  as_executable_p="test -x"
+else
+  as_executable_p=:
+fi
+rm -f conf$$.file
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+
+exec 7<&0 </dev/null 6>&1
+
+# Name of the host.
+# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_clean_files=
+ac_config_libobj_dir=.
+LIBOBJS=
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+# Identity of this package.
+PACKAGE_NAME='llvm'
+PACKAGE_TARNAME='-llvm-'
+PACKAGE_VERSION='2.9'
+PACKAGE_STRING='llvm 2.9'
+PACKAGE_BUGREPORT='llvmbugs@cs.uiuc.edu'
+
+ac_unique_file="lib/VMCore/Module.cpp"
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stdio.h>
+#if HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#if HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#if STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# if HAVE_STDLIB_H
+#  include <stdlib.h>
+# endif
+#endif
+#if HAVE_STRING_H
+# if !STDC_HEADERS && HAVE_MEMORY_H
+#  include <memory.h>
+# endif
+# include <string.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#if HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#if HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
+ac_subst_vars='SHELL
+PATH_SEPARATOR
+PACKAGE_NAME
+PACKAGE_TARNAME
+PACKAGE_VERSION
+PACKAGE_STRING
+PACKAGE_BUGREPORT
+exec_prefix
+prefix
+program_transform_name
+bindir
+sbindir
+libexecdir
+datarootdir
+datadir
+sysconfdir
+sharedstatedir
+localstatedir
+includedir
+oldincludedir
+docdir
+infodir
+htmldir
+dvidir
+pdfdir
+psdir
+libdir
+localedir
+mandir
+DEFS
+ECHO_C
+ECHO_N
+ECHO_T
+LIBS
+build_alias
+host_alias
+target_alias
+LLVM_COPYRIGHT
+subdirs
+ENABLE_POLLY
+LLVM_HAS_POLLY
+build
+build_cpu
+build_vendor
+build_os
+host
+host_cpu
+host_vendor
+host_os
+target
+target_cpu
+target_vendor
+target_os
+OS
+HOST_OS
+TARGET_OS
+LINKALL
+NOLINKALL
+LLVM_ON_UNIX
+LLVM_ON_WIN32
+ARCH
+ENDIAN
+CC
+CFLAGS
+LDFLAGS
+CPPFLAGS
+ac_ct_CC
+EXEEXT
+OBJEXT
+CPP
+GREP
+EGREP
+LLVM_CROSS_COMPILING
+BUILD_CC
+BUILD_EXEEXT
+BUILD_CXX
+CVSBUILD
+ENABLE_OPTIMIZED
+ENABLE_PROFILING
+DISABLE_ASSERTIONS
+ENABLE_EXPENSIVE_CHECKS
+EXPENSIVE_CHECKS
+DEBUG_RUNTIME
+DEBUG_SYMBOLS
+JIT
+TARGET_HAS_JIT
+ENABLE_DOCS
+ENABLE_DOXYGEN
+ENABLE_THREADS
+ENABLE_PTHREADS
+ENABLE_PIC
+ENABLE_SHARED
+ENABLE_EMBED_STDCXX
+ENABLE_TIMESTAMPS
+TARGETS_TO_BUILD
+LLVM_ENUM_TARGETS
+LLVM_ENUM_ASM_PRINTERS
+LLVM_ENUM_ASM_PARSERS
+LLVM_ENUM_DISASSEMBLERS
+ENABLE_CBE_PRINTF_A
+CLANGPATH
+CLANGXXPATH
+ENABLE_BUILT_CLANG
+OPTIMIZE_OPTION
+EXTRA_OPTIONS
+BINUTILS_INCDIR
+CXX
+CXXFLAGS
+ac_ct_CXX
+NM
+ifGNUmake
+LN_S
+CMP
+CP
+DATE
+FIND
+MKDIR
+MV
+RANLIB
+AR
+RM
+SED
+TAR
+BINPWD
+GRAPHVIZ
+DOT
+FDP
+NEATO
+TWOPI
+CIRCO
+GV
+DOTTY
+XDOT_PY
+PERL
+HAVE_PERL
+INSTALL_PROGRAM
+INSTALL_SCRIPT
+INSTALL_DATA
+BZIP2
+CAT
+DOXYGEN
+GROFF
+GZIPBIN
+POD2HTML
+POD2MAN
+PDFROFF
+RUNTEST
+TCLSH
+ZIP
+OCAMLC
+OCAMLOPT
+OCAMLDEP
+OCAMLDOC
+GAS
+HAVE_LINK_VERSION_SCRIPT
+INSTALL_LTDL_TRUE
+INSTALL_LTDL_FALSE
+CONVENIENCE_LTDL_TRUE
+CONVENIENCE_LTDL_FALSE
+LIBADD_DL
+LLVMGCCCOMMAND
+LLVMGXXCOMMAND
+LLVMGCC
+LLVMGXX
+LLVMCC_OPTION
+NO_VARIADIC_MACROS
+NO_MISSING_FIELD_INITIALIZERS
+USE_UDIS86
+USE_OPROFILE
+HAVE_PTHREAD
+HUGE_VAL_SANITY
+MMAP_FILE
+LLVMCC_EMITIR_FLAG
+LLVMCC1
+LLVMCC1PLUS
+LLVMGCCDIR
+LLVMGCC_LANGS
+LLVMGCC_DRAGONEGG
+LLVMCC_DISABLEOPT_FLAGS
+SHLIBEXT
+SHLIBPATH_VAR
+LLVM_PREFIX
+LLVM_BINDIR
+LLVM_LIBDIR
+LLVM_DATADIR
+LLVM_DOCSDIR
+LLVM_ETCDIR
+LLVM_INCLUDEDIR
+LLVM_INFODIR
+LLVM_MANDIR
+LLVM_CONFIGTIME
+BINDINGS_TO_BUILD
+ALL_BINDINGS
+OCAML_LIBDIR
+ENABLE_VISIBILITY_INLINES_HIDDEN
+RPATH
+RDYNAMIC
+LIBOBJS
+LTLIBOBJS'
+ac_subst_files=''
+      ac_precious_vars='build_alias
+host_alias
+target_alias
+CC
+CFLAGS
+LDFLAGS
+CPPFLAGS
+CPP
+CXX
+CXXFLAGS
+CCC'
+ac_subdirs_all='projects/llvm-gcc
+projects/test-suite
+projects/llvm-test
+projects/poolalloc
+projects/llvm-poolalloc
+projects/sample
+projects/privbracket
+projects/llvm-stacker
+projects/llvm-reopt
+projects/llvm-java
+projects/llvm-tv
+projects/safecode
+projects/llvm-kernel
+tools/polly'
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+# (The list follows the same order as the GNU Coding Standards.)
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datarootdir='${prefix}/share'
+datadir='${datarootdir}'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
+infodir='${datarootdir}/info'
+htmldir='${docdir}'
+dvidir='${docdir}'
+pdfdir='${docdir}'
+psdir='${docdir}'
+libdir='${exec_prefix}/lib'
+localedir='${datarootdir}/locale'
+mandir='${datarootdir}/man'
+
+ac_prev=
+ac_dashdash=
+for ac_option
+do
+  # If the previous option needs an argument, assign it.
+  if test -n "$ac_prev"; then
+    eval $ac_prev=\$ac_option
+    ac_prev=
+    continue
+  fi
+
+  case $ac_option in
+  *=*)	ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+  *)	ac_optarg=yes ;;
+  esac
+
+  # Accept the important Cygnus configure options, so we can diagnose typos.
+
+  case $ac_dashdash$ac_option in
+  --)
+    ac_dashdash=yes ;;
+
+  -bindir | --bindir | --bindi | --bind | --bin | --bi)
+    ac_prev=bindir ;;
+  -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+    bindir=$ac_optarg ;;
+
+  -build | --build | --buil | --bui | --bu)
+    ac_prev=build_alias ;;
+  -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+    build_alias=$ac_optarg ;;
+
+  -cache-file | --cache-file | --cache-fil | --cache-fi \
+  | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+    ac_prev=cache_file ;;
+  -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+  | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+    cache_file=$ac_optarg ;;
+
+  --config-cache | -C)
+    cache_file=config.cache ;;
+
+  -datadir | --datadir | --datadi | --datad)
+    ac_prev=datadir ;;
+  -datadir=* | --datadir=* | --datadi=* | --datad=*)
+    datadir=$ac_optarg ;;
+
+  -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \
+  | --dataroo | --dataro | --datar)
+    ac_prev=datarootdir ;;
+  -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \
+  | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)
+    datarootdir=$ac_optarg ;;
+
+  -disable-* | --disable-*)
+    ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+      { echo "$as_me: error: invalid feature name: $ac_feature" >&2
+   { (exit 1); exit 1; }; }
+    ac_feature=`echo $ac_feature | sed 's/-/_/g'`
+    eval enable_$ac_feature=no ;;
+
+  -docdir | --docdir | --docdi | --doc | --do)
+    ac_prev=docdir ;;
+  -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)
+    docdir=$ac_optarg ;;
+
+  -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)
+    ac_prev=dvidir ;;
+  -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)
+    dvidir=$ac_optarg ;;
+
+  -enable-* | --enable-*)
+    ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+      { echo "$as_me: error: invalid feature name: $ac_feature" >&2
+   { (exit 1); exit 1; }; }
+    ac_feature=`echo $ac_feature | sed 's/-/_/g'`
+    eval enable_$ac_feature=\$ac_optarg ;;
+
+  -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+  | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+  | --exec | --exe | --ex)
+    ac_prev=exec_prefix ;;
+  -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+  | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+  | --exec=* | --exe=* | --ex=*)
+    exec_prefix=$ac_optarg ;;
+
+  -gas | --gas | --ga | --g)
+    # Obsolete; use --with-gas.
+    with_gas=yes ;;
+
+  -help | --help | --hel | --he | -h)
+    ac_init_help=long ;;
+  -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+    ac_init_help=recursive ;;
+  -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+    ac_init_help=short ;;
+
+  -host | --host | --hos | --ho)
+    ac_prev=host_alias ;;
+  -host=* | --host=* | --hos=* | --ho=*)
+    host_alias=$ac_optarg ;;
+
+  -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)
+    ac_prev=htmldir ;;
+  -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \
+  | --ht=*)
+    htmldir=$ac_optarg ;;
+
+  -includedir | --includedir | --includedi | --included | --include \
+  | --includ | --inclu | --incl | --inc)
+    ac_prev=includedir ;;
+  -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+  | --includ=* | --inclu=* | --incl=* | --inc=*)
+    includedir=$ac_optarg ;;
+
+  -infodir | --infodir | --infodi | --infod | --info | --inf)
+    ac_prev=infodir ;;
+  -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+    infodir=$ac_optarg ;;
+
+  -libdir | --libdir | --libdi | --libd)
+    ac_prev=libdir ;;
+  -libdir=* | --libdir=* | --libdi=* | --libd=*)
+    libdir=$ac_optarg ;;
+
+  -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+  | --libexe | --libex | --libe)
+    ac_prev=libexecdir ;;
+  -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+  | --libexe=* | --libex=* | --libe=*)
+    libexecdir=$ac_optarg ;;
+
+  -localedir | --localedir | --localedi | --localed | --locale)
+    ac_prev=localedir ;;
+  -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)
+    localedir=$ac_optarg ;;
+
+  -localstatedir | --localstatedir | --localstatedi | --localstated \
+  | --localstate | --localstat | --localsta | --localst | --locals)
+    ac_prev=localstatedir ;;
+  -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+  | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)
+    localstatedir=$ac_optarg ;;
+
+  -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+    ac_prev=mandir ;;
+  -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+    mandir=$ac_optarg ;;
+
+  -nfp | --nfp | --nf)
+    # Obsolete; use --without-fp.
+    with_fp=no ;;
+
+  -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+  | --no-cr | --no-c | -n)
+    no_create=yes ;;
+
+  -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+  | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+    no_recursion=yes ;;
+
+  -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+  | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+  | --oldin | --oldi | --old | --ol | --o)
+    ac_prev=oldincludedir ;;
+  -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+  | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+  | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+    oldincludedir=$ac_optarg ;;
+
+  -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+    ac_prev=prefix ;;
+  -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+    prefix=$ac_optarg ;;
+
+  -program-prefix | --program-prefix | --program-prefi | --program-pref \
+  | --program-pre | --program-pr | --program-p)
+    ac_prev=program_prefix ;;
+  -program-prefix=* | --program-prefix=* | --program-prefi=* \
+  | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+    program_prefix=$ac_optarg ;;
+
+  -program-suffix | --program-suffix | --program-suffi | --program-suff \
+  | --program-suf | --program-su | --program-s)
+    ac_prev=program_suffix ;;
+  -program-suffix=* | --program-suffix=* | --program-suffi=* \
+  | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+    program_suffix=$ac_optarg ;;
+
+  -program-transform-name | --program-transform-name \
+  | --program-transform-nam | --program-transform-na \
+  | --program-transform-n | --program-transform- \
+  | --program-transform | --program-transfor \
+  | --program-transfo | --program-transf \
+  | --program-trans | --program-tran \
+  | --progr-tra | --program-tr | --program-t)
+    ac_prev=program_transform_name ;;
+  -program-transform-name=* | --program-transform-name=* \
+  | --program-transform-nam=* | --program-transform-na=* \
+  | --program-transform-n=* | --program-transform-=* \
+  | --program-transform=* | --program-transfor=* \
+  | --program-transfo=* | --program-transf=* \
+  | --program-trans=* | --program-tran=* \
+  | --progr-tra=* | --program-tr=* | --program-t=*)
+    program_transform_name=$ac_optarg ;;
+
+  -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)
+    ac_prev=pdfdir ;;
+  -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)
+    pdfdir=$ac_optarg ;;
+
+  -psdir | --psdir | --psdi | --psd | --ps)
+    ac_prev=psdir ;;
+  -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
+    psdir=$ac_optarg ;;
+
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil)
+    silent=yes ;;
+
+  -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+    ac_prev=sbindir ;;
+  -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+  | --sbi=* | --sb=*)
+    sbindir=$ac_optarg ;;
+
+  -sharedstatedir | --sharedstatedir | --sharedstatedi \
+  | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+  | --sharedst | --shareds | --shared | --share | --shar \
+  | --sha | --sh)
+    ac_prev=sharedstatedir ;;
+  -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+  | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+  | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+  | --sha=* | --sh=*)
+    sharedstatedir=$ac_optarg ;;
+
+  -site | --site | --sit)
+    ac_prev=site ;;
+  -site=* | --site=* | --sit=*)
+    site=$ac_optarg ;;
+
+  -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+    ac_prev=srcdir ;;
+  -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+    srcdir=$ac_optarg ;;
+
+  -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+  | --syscon | --sysco | --sysc | --sys | --sy)
+    ac_prev=sysconfdir ;;
+  -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+  | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+    sysconfdir=$ac_optarg ;;
+
+  -target | --target | --targe | --targ | --tar | --ta | --t)
+    ac_prev=target_alias ;;
+  -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+    target_alias=$ac_optarg ;;
+
+  -v | -verbose | --verbose | --verbos | --verbo | --verb)
+    verbose=yes ;;
+
+  -version | --version | --versio | --versi | --vers | -V)
+    ac_init_version=: ;;
+
+  -with-* | --with-*)
+    ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+      { echo "$as_me: error: invalid package name: $ac_package" >&2
+   { (exit 1); exit 1; }; }
+    ac_package=`echo $ac_package| sed 's/-/_/g'`
+    eval with_$ac_package=\$ac_optarg ;;
+
+  -without-* | --without-*)
+    ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+      { echo "$as_me: error: invalid package name: $ac_package" >&2
+   { (exit 1); exit 1; }; }
+    ac_package=`echo $ac_package | sed 's/-/_/g'`
+    eval with_$ac_package=no ;;
+
+  --x)
+    # Obsolete; use --with-x.
+    with_x=yes ;;
+
+  -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+  | --x-incl | --x-inc | --x-in | --x-i)
+    ac_prev=x_includes ;;
+  -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+  | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+    x_includes=$ac_optarg ;;
+
+  -x-libraries | --x-libraries | --x-librarie | --x-librari \
+  | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+    ac_prev=x_libraries ;;
+  -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+  | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+    x_libraries=$ac_optarg ;;
+
+  -*) { echo "$as_me: error: unrecognized option: $ac_option
+Try \`$0 --help' for more information." >&2
+   { (exit 1); exit 1; }; }
+    ;;
+
+  *=*)
+    ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null &&
+      { echo "$as_me: error: invalid variable name: $ac_envvar" >&2
+   { (exit 1); exit 1; }; }
+    eval $ac_envvar=\$ac_optarg
+    export $ac_envvar ;;
+
+  *)
+    # FIXME: should be removed in autoconf 3.0.
+    echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+    expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+      echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+    : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}
+    ;;
+
+  esac
+done
+
+if test -n "$ac_prev"; then
+  ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+  { echo "$as_me: error: missing argument to $ac_option" >&2
+   { (exit 1); exit 1; }; }
+fi
+
+# Be sure to have absolute directory names.
+for ac_var in	exec_prefix prefix bindir sbindir libexecdir datarootdir \
+		datadir sysconfdir sharedstatedir localstatedir includedir \
+		oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
+		libdir localedir mandir
+do
+  eval ac_val=\$$ac_var
+  case $ac_val in
+    [\\/$]* | ?:[\\/]* )  continue;;
+    NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
+  esac
+  { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+   { (exit 1); exit 1; }; }
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+  if test "x$build_alias" = x; then
+    cross_compiling=maybe
+    echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
+    If a cross compiler is detected then cross compile mode will be used." >&2
+  elif test "x$build_alias" != "x$host_alias"; then
+    cross_compiling=yes
+  fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+ac_pwd=`pwd` && test -n "$ac_pwd" &&
+ac_ls_di=`ls -di .` &&
+ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
+  { echo "$as_me: error: Working directory cannot be determined" >&2
+   { (exit 1); exit 1; }; }
+test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
+  { echo "$as_me: error: pwd does not report name of working directory" >&2
+   { (exit 1); exit 1; }; }
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+  ac_srcdir_defaulted=yes
+  # Try the directory containing this script, then the parent directory.
+  ac_confdir=`$as_dirname -- "$0" ||
+$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$0" : 'X\(//\)[^/]' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+echo X"$0" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  srcdir=$ac_confdir
+  if test ! -r "$srcdir/$ac_unique_file"; then
+    srcdir=..
+  fi
+else
+  ac_srcdir_defaulted=no
+fi
+if test ! -r "$srcdir/$ac_unique_file"; then
+  test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
+  { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2
+   { (exit 1); exit 1; }; }
+fi
+ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
+ac_abs_confdir=`(
+	cd "$srcdir" && test -r "./$ac_unique_file" || { echo "$as_me: error: $ac_msg" >&2
+   { (exit 1); exit 1; }; }
+	pwd)`
+# When building in place, set srcdir=.
+if test "$ac_abs_confdir" = "$ac_pwd"; then
+  srcdir=.
+fi
+# Remove unnecessary trailing slashes from srcdir.
+# Double slashes in file names in object file debugging info
+# mess up M-x gdb in Emacs.
+case $srcdir in
+*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;;
+esac
+for ac_var in $ac_precious_vars; do
+  eval ac_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_env_${ac_var}_value=\$${ac_var}
+  eval ac_cv_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_cv_env_${ac_var}_value=\$${ac_var}
+done
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+  # Omit some internal or obsolete options to make the list less imposing.
+  # This message is too long to be a string in the A/UX 3.1 sh.
+  cat <<_ACEOF
+\`configure' configures llvm 2.9 to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE.  See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+  -h, --help              display this help and exit
+      --help=short        display options specific to this package
+      --help=recursive    display the short help of all the included packages
+  -V, --version           display version information and exit
+  -q, --quiet, --silent   do not print \`checking...' messages
+      --cache-file=FILE   cache test results in FILE [disabled]
+  -C, --config-cache      alias for \`--cache-file=config.cache'
+  -n, --no-create         do not create output files
+      --srcdir=DIR        find the sources in DIR [configure dir or \`..']
+
+Installation directories:
+  --prefix=PREFIX         install architecture-independent files in PREFIX
+			  [$ac_default_prefix]
+  --exec-prefix=EPREFIX   install architecture-dependent files in EPREFIX
+			  [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc.  You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+  --bindir=DIR           user executables [EPREFIX/bin]
+  --sbindir=DIR          system admin executables [EPREFIX/sbin]
+  --libexecdir=DIR       program executables [EPREFIX/libexec]
+  --sysconfdir=DIR       read-only single-machine data [PREFIX/etc]
+  --sharedstatedir=DIR   modifiable architecture-independent data [PREFIX/com]
+  --localstatedir=DIR    modifiable single-machine data [PREFIX/var]
+  --libdir=DIR           object code libraries [EPREFIX/lib]
+  --includedir=DIR       C header files [PREFIX/include]
+  --oldincludedir=DIR    C header files for non-gcc [/usr/include]
+  --datarootdir=DIR      read-only arch.-independent data root [PREFIX/share]
+  --datadir=DIR          read-only architecture-independent data [DATAROOTDIR]
+  --infodir=DIR          info documentation [DATAROOTDIR/info]
+  --localedir=DIR        locale-dependent data [DATAROOTDIR/locale]
+  --mandir=DIR           man documentation [DATAROOTDIR/man]
+  --docdir=DIR           documentation root [DATAROOTDIR/doc/-llvm-]
+  --htmldir=DIR          html documentation [DOCDIR]
+  --dvidir=DIR           dvi documentation [DOCDIR]
+  --pdfdir=DIR           pdf documentation [DOCDIR]
+  --psdir=DIR            ps documentation [DOCDIR]
+_ACEOF
+
+  cat <<\_ACEOF
+
+System types:
+  --build=BUILD     configure for building on BUILD [guessed]
+  --host=HOST       cross-compile to build programs to run on HOST [BUILD]
+  --target=TARGET   configure for building compilers for TARGET [HOST]
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+  case $ac_init_help in
+     short | recursive ) echo "Configuration of llvm 2.9:";;
+   esac
+  cat <<\_ACEOF
+
+Optional Features:
+  --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
+  --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
+  --enable-polly          Use polly if available (default is YES)
+  --enable-optimized      Compile with optimizations enabled (default is YES)
+  --enable-profiling      Compile with profiling enabled (default is NO)
+  --enable-assertions     Compile with assertion checks enabled (default is NO)
+  --enable-expensive-checks
+                          Compile with expensive debug checks enabled (default
+                          is NO)
+  --enable-debug-runtime  Build runtime libs with debug symbols (default is
+                          NO)
+  --enable-debug-symbols  Build compiler with debug symbols (default is NO if
+                          optimization is on and YES if it's off)
+  --enable-jit            Enable Just In Time Compiling (default is YES)
+  --enable-docs           Build documents (default is YES)
+  --enable-doxygen        Build doxygen documentation (default is NO)
+  --enable-threads        Use threads if available (default is YES)
+  --enable-pthreads       Use pthreads if available (default is YES)
+  --enable-pic            Build LLVM with Position Independent Code (default
+                          is YES)
+  --enable-shared         Build a shared library and link tools against it
+                          (default is NO)
+  --enable-embed-stdcxx   Build a shared library with embedded libstdc++ for
+                          Win32 DLL (default is YES)
+  --enable-timestamps     Enable embedding timestamp information in build
+                          (default is YES)
+  --enable-targets        Build specific host targets: all or
+                          target1,target2,... Valid targets are: host, x86,
+                          x86_64, sparc, powerpc, alpha, arm, mips, spu,
+                          xcore, msp430, systemz, blackfin, ptx, cbe, and cpp
+                          (default=all)
+  --enable-cbe-printf-a   Enable C Backend output with hex floating point via
+                          %a (default is YES)
+  --enable-bindings       Build specific language bindings:
+                          all,auto,none,{binding-name} (default=auto)
+  --enable-libffi         Check for the presence of libffi (default is NO)
+  --enable-ltdl-install   install libltdl
+
+Optional Packages:
+  --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
+  --without-PACKAGE       do not use PACKAGE (same as --with-PACKAGE=no)
+  --with-llvmgccdir       Specify location of llvm-gcc install dir (default
+                          searches PATH)
+  --with-llvmgcc          Specify location of llvm-gcc driver (default
+                          searches PATH)
+  --with-llvmgxx          Specify location of llvm-g++ driver (default
+                          searches PATH)
+  --with-clang            Specify location of clang compiler (default is
+                          --with-built-clang)
+  --with-built-clang      Use the compiled Clang as the LLVM compiler
+                          (default=check)
+  --with-optimize-option  Select the compiler options to use for optimized
+                          builds
+  --with-extra-options    Specify additional options to compile LLVM with
+  --with-ocaml-libdir     Specify install location for ocaml bindings (default
+                          is stdlib)
+  --with-clang-resource-dir
+                          Relative directory from the Clang binary for
+                          resource files
+  --with-c-include-dirs   Colon separated list of directories clang will
+                          search for headers
+  --with-cxx-include-root Directory with the libstdc++ headers.
+  --with-cxx-include-arch Architecture of the libstdc++ headers.
+  --with-cxx-include-32bit-dir
+                          32 bit multilib dir.
+  --with-cxx-include-64bit-dir
+                          64 bit multilib directory.
+  --with-binutils-include Specify path to binutils/include/ containing
+                          plugin-api.h file for gold plugin.
+  --with-tclinclude       directory where tcl headers are
+  --with-llvmcc=<name>    Choose the LLVM capable compiler to use (llvm-gcc,
+                          clang, or none; default=check)
+  --with-udis86=<path>    Use udis86 external x86 disassembler library
+  --with-oprofile=<prefix>
+                          Tell OProfile >= 0.9.4 how to symbolize JIT output
+
+Some influential environment variables:
+  CC          C compiler command
+  CFLAGS      C compiler flags
+  LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
+              nonstandard directory <lib dir>
+  CPPFLAGS    C/C++/Objective C preprocessor flags, e.g. -I<include dir> if
+              you have headers in a nonstandard directory <include dir>
+  CPP         C preprocessor
+  CXX         C++ compiler command
+  CXXFLAGS    C++ compiler flags
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+Report bugs to <llvmbugs@cs.uiuc.edu>.
+_ACEOF
+ac_status=$?
+fi
+
+if test "$ac_init_help" = "recursive"; then
+  # If there are subdirs, report their specific --help.
+  for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+    test -d "$ac_dir" || continue
+    ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+    cd "$ac_dir" || { ac_status=$?; continue; }
+    # Check for guested configure.
+    if test -f "$ac_srcdir/configure.gnu"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure.gnu" --help=recursive
+    elif test -f "$ac_srcdir/configure"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure" --help=recursive
+    else
+      echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+    fi || ac_status=$?
+    cd "$ac_pwd" || { ac_status=$?; break; }
+  done
+fi
+
+test -n "$ac_init_help" && exit $ac_status
+if $ac_init_version; then
+  cat <<\_ACEOF
+llvm configure 2.9
+generated by GNU Autoconf 2.60
+
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+
+Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
+_ACEOF
+  exit
+fi
+cat >config.log <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by llvm $as_me 2.9, which was
+generated by GNU Autoconf 2.60.  Invocation command line was
+
+  $ $0 $@
+
+_ACEOF
+exec 5>>config.log
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null     || echo unknown`
+
+/bin/arch              = `(/bin/arch) 2>/dev/null              || echo unknown`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null       || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+/usr/bin/hostinfo      = `(/usr/bin/hostinfo) 2>/dev/null      || echo unknown`
+/bin/machine           = `(/bin/machine) 2>/dev/null           || echo unknown`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null       || echo unknown`
+/bin/universe          = `(/bin/universe) 2>/dev/null          || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  echo "PATH: $as_dir"
+done
+IFS=$as_save_IFS
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+  for ac_arg
+  do
+    case $ac_arg in
+    -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+    -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+    | -silent | --silent | --silen | --sile | --sil)
+      continue ;;
+    *\'*)
+      ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    case $ac_pass in
+    1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;;
+    2)
+      ac_configure_args1="$ac_configure_args1 '$ac_arg'"
+      if test $ac_must_keep_next = true; then
+	ac_must_keep_next=false # Got value, back to normal.
+      else
+	case $ac_arg in
+	  *=* | --config-cache | -C | -disable-* | --disable-* \
+	  | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+	  | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+	  | -with-* | --with-* | -without-* | --without-* | --x)
+	    case "$ac_configure_args0 " in
+	      "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+	    esac
+	    ;;
+	  -* ) ac_must_keep_next=true ;;
+	esac
+      fi
+      ac_configure_args="$ac_configure_args '$ac_arg'"
+      ;;
+    esac
+  done
+done
+$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; }
+$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; }
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log.  We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Use '\'' to represent an apostrophe within the trap.
+# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.
+trap 'exit_status=$?
+  # Save into config.log some information that might help in debugging.
+  {
+    echo
+
+    cat <<\_ASBOX
+## ---------------- ##
+## Cache variables. ##
+## ---------------- ##
+_ASBOX
+    echo
+    # The following way of writing the cache mishandles newlines in values,
+(
+  for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5
+echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      *) $as_unset $ac_var ;;
+      esac ;;
+    esac
+  done
+  (set) 2>&1 |
+    case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      sed -n \
+	"s/'\''/'\''\\\\'\'''\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p"
+      ;; #(
+    *)
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+)
+    echo
+
+    cat <<\_ASBOX
+## ----------------- ##
+## Output variables. ##
+## ----------------- ##
+_ASBOX
+    echo
+    for ac_var in $ac_subst_vars
+    do
+      eval ac_val=\$$ac_var
+      case $ac_val in
+      *\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+      esac
+      echo "$ac_var='\''$ac_val'\''"
+    done | sort
+    echo
+
+    if test -n "$ac_subst_files"; then
+      cat <<\_ASBOX
+## ------------------- ##
+## File substitutions. ##
+## ------------------- ##
+_ASBOX
+      echo
+      for ac_var in $ac_subst_files
+      do
+	eval ac_val=\$$ac_var
+	case $ac_val in
+	*\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+	esac
+	echo "$ac_var='\''$ac_val'\''"
+      done | sort
+      echo
+    fi
+
+    if test -s confdefs.h; then
+      cat <<\_ASBOX
+## ----------- ##
+## confdefs.h. ##
+## ----------- ##
+_ASBOX
+      echo
+      cat confdefs.h
+      echo
+    fi
+    test "$ac_signal" != 0 &&
+      echo "$as_me: caught signal $ac_signal"
+    echo "$as_me: exit $exit_status"
+  } >&5
+  rm -f core *.core core.conftest.* &&
+    rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&
+    exit $exit_status
+' 0
+for ac_signal in 1 2 13 15; do
+  trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -f -r conftest* confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer explicitly selected file to automatically selected ones.
+if test -n "$CONFIG_SITE"; then
+  set x "$CONFIG_SITE"
+elif test "x$prefix" != xNONE; then
+  set x "$prefix/share/config.site" "$prefix/etc/config.site"
+else
+  set x "$ac_default_prefix/share/config.site" \
+	"$ac_default_prefix/etc/config.site"
+fi
+shift
+for ac_site_file
+do
+  if test -r "$ac_site_file"; then
+    { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5
+echo "$as_me: loading site script $ac_site_file" >&6;}
+    sed 's/^/| /' "$ac_site_file" >&5
+    . "$ac_site_file"
+  fi
+done
+
+if test -r "$cache_file"; then
+  # Some versions of bash will fail to source /dev/null (special
+  # files actually), so we avoid doing that.
+  if test -f "$cache_file"; then
+    { echo "$as_me:$LINENO: loading cache $cache_file" >&5
+echo "$as_me: loading cache $cache_file" >&6;}
+    case $cache_file in
+      [\\/]* | ?:[\\/]* ) . "$cache_file";;
+      *)                      . "./$cache_file";;
+    esac
+  fi
+else
+  { echo "$as_me:$LINENO: creating cache $cache_file" >&5
+echo "$as_me: creating cache $cache_file" >&6;}
+  >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in $ac_precious_vars; do
+  eval ac_old_set=\$ac_cv_env_${ac_var}_set
+  eval ac_new_set=\$ac_env_${ac_var}_set
+  eval ac_old_val=\$ac_cv_env_${ac_var}_value
+  eval ac_new_val=\$ac_env_${ac_var}_value
+  case $ac_old_set,$ac_new_set in
+    set,)
+      { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,set)
+      { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5
+echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,);;
+    *)
+      if test "x$ac_old_val" != "x$ac_new_val"; then
+	{ echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5
+echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+	{ echo "$as_me:$LINENO:   former value:  $ac_old_val" >&5
+echo "$as_me:   former value:  $ac_old_val" >&2;}
+	{ echo "$as_me:$LINENO:   current value: $ac_new_val" >&5
+echo "$as_me:   current value: $ac_new_val" >&2;}
+	ac_cache_corrupted=:
+      fi;;
+  esac
+  # Pass precious variables to config.status.
+  if test "$ac_new_set" = set; then
+    case $ac_new_val in
+    *\'*) ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+    *) ac_arg=$ac_var=$ac_new_val ;;
+    esac
+    case " $ac_configure_args " in
+      *" '$ac_arg' "*) ;; # Avoid dups.  Use of quotes ensures accuracy.
+      *) ac_configure_args="$ac_configure_args '$ac_arg'" ;;
+    esac
+  fi
+done
+if $ac_cache_corrupted; then
+  { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5
+echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+  { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5
+echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+LLVM_COPYRIGHT="Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign."
+
+
+
+
+
+
+
+ac_aux_dir=
+for ac_dir in autoconf "$srcdir"/autoconf; do
+  if test -f "$ac_dir/install-sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install-sh -c"
+    break
+  elif test -f "$ac_dir/install.sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install.sh -c"
+    break
+  elif test -f "$ac_dir/shtool"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/shtool install -c"
+    break
+  fi
+done
+if test -z "$ac_aux_dir"; then
+  { { echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in autoconf \"$srcdir\"/autoconf" >&5
+echo "$as_me: error: cannot find install-sh or install.sh in autoconf \"$srcdir\"/autoconf" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+# These three variables are undocumented and unsupported,
+# and are intended to be withdrawn in a future Autoconf release.
+# They can cause serious problems if a builder's source tree is in a directory
+# whose full name contains unusual characters.
+ac_config_guess="$SHELL $ac_aux_dir/config.guess"  # Please don't use this var.
+ac_config_sub="$SHELL $ac_aux_dir/config.sub"  # Please don't use this var.
+ac_configure="$SHELL $ac_aux_dir/configure"  # Please don't use this var.
+
+
+
+if test ${srcdir} != "." ; then
+  if test -f ${srcdir}/include/llvm/Config/config.h ; then
+    { { echo "$as_me:$LINENO: error: Already configured in ${srcdir}" >&5
+echo "$as_me: error: Already configured in ${srcdir}" >&2;}
+   { (exit 1); exit 1; }; }
+  fi
+fi
+
+
+
+if test -d ${srcdir}/projects/llvm-gcc ; then
+  subdirs="$subdirs projects/llvm-gcc"
+
+fi
+
+if test -d ${srcdir}/projects/test-suite ; then
+  subdirs="$subdirs projects/test-suite"
+
+fi
+
+if test -d ${srcdir}/projects/llvm-test ; then
+  subdirs="$subdirs projects/llvm-test"
+
+fi
+
+if test -d ${srcdir}/projects/poolalloc ; then
+  subdirs="$subdirs projects/poolalloc"
+
+fi
+
+if test -d ${srcdir}/projects/llvm-poolalloc ; then
+  subdirs="$subdirs projects/llvm-poolalloc"
+
+fi
+
+for i in `ls ${srcdir}/projects`
+do
+  if test -d ${srcdir}/projects/${i} ; then
+    case ${i} in
+      sample)       subdirs="$subdirs projects/sample"
+    ;;
+      privbracket)  subdirs="$subdirs projects/privbracket"
+ ;;
+      llvm-stacker) subdirs="$subdirs projects/llvm-stacker"
+ ;;
+      llvm-reopt)   subdirs="$subdirs projects/llvm-reopt"
+;;
+      llvm-java)    subdirs="$subdirs projects/llvm-java"
+ ;;
+      llvm-tv)      subdirs="$subdirs projects/llvm-tv"
+   ;;
+      safecode)     subdirs="$subdirs projects/safecode"
+ ;;
+      llvm-kernel)  subdirs="$subdirs projects/llvm-kernel"
+ ;;
+      llvm-gcc)       ;;
+      test-suite)     ;;
+      llvm-test)      ;;
+      poolalloc)      ;;
+      llvm-poolalloc) ;;
+      *)
+        { echo "$as_me:$LINENO: WARNING: Unknown project (${i}) won't be configured automatically" >&5
+echo "$as_me: WARNING: Unknown project (${i}) won't be configured automatically" >&2;}
+        ;;
+    esac
+  fi
+done
+
+# Check whether --enable-polly was given.
+if test "${enable_polly+set}" = set; then
+  enableval=$enable_polly;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_POLLY=1
+ ;;
+  no)  ENABLE_POLLY=0
+ ;;
+  default) ENABLE_POLLY=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-polly. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-polly. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+
+if (test -d ${srcdir}/tools/polly) && (test $ENABLE_POLLY -eq 1) ; then
+  LLVM_HAS_POLLY=1
+
+  subdirs="$subdirs tools/polly"
+
+fi
+
+
+# Make sure we can run config.sub.
+$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 ||
+  { { echo "$as_me:$LINENO: error: cannot run $SHELL $ac_aux_dir/config.sub" >&5
+echo "$as_me: error: cannot run $SHELL $ac_aux_dir/config.sub" >&2;}
+   { (exit 1); exit 1; }; }
+
+{ echo "$as_me:$LINENO: checking build system type" >&5
+echo $ECHO_N "checking build system type... $ECHO_C" >&6; }
+if test "${ac_cv_build+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_build_alias=$build_alias
+test "x$ac_build_alias" = x &&
+  ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"`
+test "x$ac_build_alias" = x &&
+  { { echo "$as_me:$LINENO: error: cannot guess build type; you must specify one" >&5
+echo "$as_me: error: cannot guess build type; you must specify one" >&2;}
+   { (exit 1); exit 1; }; }
+ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` ||
+  { { echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $ac_build_alias failed" >&5
+echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $ac_build_alias failed" >&2;}
+   { (exit 1); exit 1; }; }
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_build" >&5
+echo "${ECHO_T}$ac_cv_build" >&6; }
+case $ac_cv_build in
+*-*-*) ;;
+*) { { echo "$as_me:$LINENO: error: invalid value of canonical build" >&5
+echo "$as_me: error: invalid value of canonical build" >&2;}
+   { (exit 1); exit 1; }; };;
+esac
+build=$ac_cv_build
+ac_save_IFS=$IFS; IFS='-'
+set x $ac_cv_build
+shift
+build_cpu=$1
+build_vendor=$2
+shift; shift
+# Remember, the first character of IFS is used to create $*,
+# except with old shells:
+build_os=$*
+IFS=$ac_save_IFS
+case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac
+
+
+{ echo "$as_me:$LINENO: checking host system type" >&5
+echo $ECHO_N "checking host system type... $ECHO_C" >&6; }
+if test "${ac_cv_host+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test "x$host_alias" = x; then
+  ac_cv_host=$ac_cv_build
+else
+  ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` ||
+    { { echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $host_alias failed" >&5
+echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $host_alias failed" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_host" >&5
+echo "${ECHO_T}$ac_cv_host" >&6; }
+case $ac_cv_host in
+*-*-*) ;;
+*) { { echo "$as_me:$LINENO: error: invalid value of canonical host" >&5
+echo "$as_me: error: invalid value of canonical host" >&2;}
+   { (exit 1); exit 1; }; };;
+esac
+host=$ac_cv_host
+ac_save_IFS=$IFS; IFS='-'
+set x $ac_cv_host
+shift
+host_cpu=$1
+host_vendor=$2
+shift; shift
+# Remember, the first character of IFS is used to create $*,
+# except with old shells:
+host_os=$*
+IFS=$ac_save_IFS
+case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac
+
+
+{ echo "$as_me:$LINENO: checking target system type" >&5
+echo $ECHO_N "checking target system type... $ECHO_C" >&6; }
+if test "${ac_cv_target+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test "x$target_alias" = x; then
+  ac_cv_target=$ac_cv_host
+else
+  ac_cv_target=`$SHELL "$ac_aux_dir/config.sub" $target_alias` ||
+    { { echo "$as_me:$LINENO: error: $SHELL $ac_aux_dir/config.sub $target_alias failed" >&5
+echo "$as_me: error: $SHELL $ac_aux_dir/config.sub $target_alias failed" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_target" >&5
+echo "${ECHO_T}$ac_cv_target" >&6; }
+case $ac_cv_target in
+*-*-*) ;;
+*) { { echo "$as_me:$LINENO: error: invalid value of canonical target" >&5
+echo "$as_me: error: invalid value of canonical target" >&2;}
+   { (exit 1); exit 1; }; };;
+esac
+target=$ac_cv_target
+ac_save_IFS=$IFS; IFS='-'
+set x $ac_cv_target
+shift
+target_cpu=$1
+target_vendor=$2
+shift; shift
+# Remember, the first character of IFS is used to create $*,
+# except with old shells:
+target_os=$*
+IFS=$ac_save_IFS
+case $target_os in *\ *) target_os=`echo "$target_os" | sed 's/ /-/g'`;; esac
+
+
+# The aliases save the names the user supplied, while $host etc.
+# will get canonicalized.
+test -n "$target_alias" &&
+  test "$program_prefix$program_suffix$program_transform_name" = \
+    NONENONEs,x,x, &&
+  program_prefix=${target_alias}-
+
+{ echo "$as_me:$LINENO: checking type of operating system we're going to host on" >&5
+echo $ECHO_N "checking type of operating system we're going to host on... $ECHO_C" >&6; }
+if test "${llvm_cv_os_type+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $host in
+  *-*-aix*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="AIX"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-irix*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="IRIX"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-cygwin*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Cygwin"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-darwin*)
+    llvm_cv_link_all_option="-Wl,-all_load"
+    llvm_cv_no_link_all_option="-Wl,-noall_load"
+    llvm_cv_os_type="Darwin"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-minix*)
+    llvm_cv_link_all_option="-Wl,-all_load"
+    llvm_cv_no_link_all_option="-Wl,-noall_load"
+    llvm_cv_os_type="Minix"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-freebsd*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="FreeBSD"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-openbsd*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="OpenBSD"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-netbsd*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="NetBSD"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-dragonfly*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="DragonFly"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-hpux*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="HP-UX"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-interix*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Interix"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-linux*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Linux"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-solaris*)
+    llvm_cv_link_all_option="-Wl,-z,allextract"
+    llvm_cv_no_link_all_option="-Wl,-z,defaultextract"
+    llvm_cv_os_type="SunOS"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-auroraux*)
+    llvm_cv_link_all_option="-Wl,-z,allextract"
+    llvm_cv_link_all_option="-Wl,-z,defaultextract"
+    llvm_cv_os_type="AuroraUX"
+    llvm_cv_platform_type="Unix" ;;
+  *-*-win32*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Win32"
+    llvm_cv_platform_type="Win32" ;;
+  *-*-mingw*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="MingW"
+    llvm_cv_platform_type="Win32" ;;
+  *-*-haiku*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Haiku"
+    llvm_cv_platform_type="Unix" ;;
+  *-unknown-eabi*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Freestanding"
+    llvm_cv_platform_type="Unix" ;;
+  *-unknown-elf*)
+    llvm_cv_link_all_option="-Wl,--whole-archive"
+    llvm_cv_no_link_all_option="-Wl,--no-whole-archive"
+    llvm_cv_os_type="Freestanding"
+    llvm_cv_platform_type="Unix" ;;
+  *)
+    llvm_cv_link_all_option=""
+    llvm_cv_no_link_all_option=""
+    llvm_cv_os_type="Unknown"
+    llvm_cv_platform_type="Unknown" ;;
+esac
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_os_type" >&5
+echo "${ECHO_T}$llvm_cv_os_type" >&6; }
+
+{ echo "$as_me:$LINENO: checking type of operating system we're going to target" >&5
+echo $ECHO_N "checking type of operating system we're going to target... $ECHO_C" >&6; }
+if test "${llvm_cv_target_os_type+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $target in
+  *-*-aix*)
+    llvm_cv_target_os_type="AIX" ;;
+  *-*-irix*)
+    llvm_cv_target_os_type="IRIX" ;;
+  *-*-cygwin*)
+    llvm_cv_target_os_type="Cygwin" ;;
+  *-*-darwin*)
+    llvm_cv_target_os_type="Darwin" ;;
+  *-*-minix*)
+    llvm_cv_target_os_type="Minix" ;;
+  *-*-freebsd*)
+    llvm_cv_target_os_type="FreeBSD" ;;
+  *-*-openbsd*)
+    llvm_cv_target_os_type="OpenBSD" ;;
+  *-*-netbsd*)
+    llvm_cv_target_os_type="NetBSD" ;;
+  *-*-dragonfly*)
+    llvm_cv_target_os_type="DragonFly" ;;
+  *-*-hpux*)
+    llvm_cv_target_os_type="HP-UX" ;;
+  *-*-interix*)
+    llvm_cv_target_os_type="Interix" ;;
+  *-*-linux*)
+    llvm_cv_target_os_type="Linux" ;;
+  *-*-solaris*)
+    llvm_cv_target_os_type="SunOS" ;;
+  *-*-auroraux*)
+    llvm_cv_target_os_type="AuroraUX" ;;
+  *-*-win32*)
+    llvm_cv_target_os_type="Win32" ;;
+  *-*-mingw*)
+    llvm_cv_target_os_type="MingW" ;;
+  *-*-haiku*)
+    llvm_cv_target_os_type="Haiku" ;;
+  *-unknown-eabi*)
+    llvm_cv_target_os_type="Freestanding" ;;
+  *)
+    llvm_cv_target_os_type="Unknown" ;;
+esac
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_target_os_type" >&5
+echo "${ECHO_T}$llvm_cv_target_os_type" >&6; }
+
+if test "$llvm_cv_os_type" = "Unknown" ; then
+  { { echo "$as_me:$LINENO: error: Operating system is unknown, configure can't continue" >&5
+echo "$as_me: error: Operating system is unknown, configure can't continue" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+OS=$llvm_cv_os_type
+
+HOST_OS=$llvm_cv_os_type
+
+TARGET_OS=$llvm_cv_target_os_type
+
+
+LINKALL=$llvm_cv_link_all_option
+
+NOLINKALL=$llvm_cv_no_link_all_option
+
+
+case $llvm_cv_platform_type in
+  Unix)
+
+cat >>confdefs.h <<\_ACEOF
+#define LLVM_ON_UNIX 1
+_ACEOF
+
+    LLVM_ON_UNIX=1
+
+    LLVM_ON_WIN32=0
+
+    ;;
+  Win32)
+
+cat >>confdefs.h <<\_ACEOF
+#define LLVM_ON_WIN32 1
+_ACEOF
+
+    LLVM_ON_UNIX=0
+
+    LLVM_ON_WIN32=1
+
+    ;;
+esac
+
+{ echo "$as_me:$LINENO: checking target architecture" >&5
+echo $ECHO_N "checking target architecture... $ECHO_C" >&6; }
+if test "${llvm_cv_target_arch+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $target in
+  i?86-*)                 llvm_cv_target_arch="x86" ;;
+  amd64-* | x86_64-*)     llvm_cv_target_arch="x86_64" ;;
+  sparc*-*)               llvm_cv_target_arch="Sparc" ;;
+  powerpc*-*)             llvm_cv_target_arch="PowerPC" ;;
+  alpha*-*)               llvm_cv_target_arch="Alpha" ;;
+  arm*-*)                 llvm_cv_target_arch="ARM" ;;
+  mips-*)                 llvm_cv_target_arch="Mips" ;;
+  xcore-*)                llvm_cv_target_arch="XCore" ;;
+  msp430-*)               llvm_cv_target_arch="MSP430" ;;
+  s390x-*)                llvm_cv_target_arch="SystemZ" ;;
+  bfin-*)                 llvm_cv_target_arch="Blackfin" ;;
+  mblaze-*)               llvm_cv_target_arch="MBlaze" ;;
+  ptx-*)                  llvm_cv_target_arch="PTX" ;;
+  *)                      llvm_cv_target_arch="Unknown" ;;
+esac
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_target_arch" >&5
+echo "${ECHO_T}$llvm_cv_target_arch" >&6; }
+
+if test "$llvm_cv_target_arch" = "Unknown" ; then
+  { echo "$as_me:$LINENO: WARNING: Configuring LLVM for an unknown target archicture" >&5
+echo "$as_me: WARNING: Configuring LLVM for an unknown target archicture" >&2;}
+fi
+
+# Determine the LLVM native architecture for the target
+case "$llvm_cv_target_arch" in
+    x86)     LLVM_NATIVE_ARCH="X86" ;;
+    x86_64)  LLVM_NATIVE_ARCH="X86" ;;
+    *)       LLVM_NATIVE_ARCH="$llvm_cv_target_arch" ;;
+esac
+
+ARCH=$llvm_cv_target_arch
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_CC="${ac_tool_prefix}gcc"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+  ac_ct_CC=$CC
+  # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_CC="gcc"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&5
+echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+else
+  CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+          if test -n "$ac_tool_prefix"; then
+    # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_CC="${ac_tool_prefix}cc"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  fi
+fi
+if test -z "$CC"; then
+  # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+  ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+       ac_prog_rejected=yes
+       continue
+     fi
+    ac_cv_prog_CC="cc"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+  # We found a bogon in the path, so make sure we never use it.
+  set dummy $ac_cv_prog_CC
+  shift
+  if test $# != 0; then
+    # We chose a different compiler from the bogus one.
+    # However, it has the same basename, so the bogon will be chosen
+    # first if we set CC to just the basename; use the full file name.
+    shift
+    ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@"
+  fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+fi
+if test -z "$CC"; then
+  if test -n "$ac_tool_prefix"; then
+  for ac_prog in cl.exe
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+    test -n "$CC" && break
+  done
+fi
+if test -z "$CC"; then
+  ac_ct_CC=$CC
+  for ac_prog in cl.exe
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_CC="$ac_prog"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CC" && break
+done
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&5
+echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+fi
+
+fi
+
+
+test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&5
+echo "$as_me: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }
+
+# Provide some information about the compiler.
+echo "$as_me:$LINENO: checking for C compiler version" >&5
+ac_compiler=`set X $ac_compile; echo $2`
+{ (ac_try="$ac_compiler --version >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compiler --version >&5") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+{ (ac_try="$ac_compiler -v >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compiler -v >&5") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+{ (ac_try="$ac_compiler -V >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compiler -V >&5") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.exe b.out"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+{ echo "$as_me:$LINENO: checking for C compiler default output file name" >&5
+echo $ECHO_N "checking for C compiler default output file name... $ECHO_C" >&6; }
+ac_link_default=`echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+#
+# List of possible output files, starting from the most likely.
+# The algorithm is not robust to junk in `.', hence go to wildcards (a.*)
+# only as a last resort.  b.out is created by i960 compilers.
+ac_files='a_out.exe a.exe conftest.exe a.out conftest a.* conftest.* b.out'
+#
+# The IRIX 6 linker writes into existing files which may not be
+# executable, retaining their permissions.  Remove them first so a
+# subsequent execution test works.
+ac_rmfiles=
+for ac_file in $ac_files
+do
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;;
+    * ) ac_rmfiles="$ac_rmfiles $ac_file";;
+  esac
+done
+rm -f $ac_rmfiles
+
+if { (ac_try="$ac_link_default"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link_default") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; then
+  # Autoconf-2.13 could set the ac_cv_exeext variable to `no'.
+# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'
+# in a Makefile.  We should not override ac_cv_exeext if it was cached,
+# so that the user can short-circuit this test for compilers unknown to
+# Autoconf.
+for ac_file in $ac_files
+do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj )
+	;;
+    [ab].out )
+	# We found the default executable, but exeext='' is most
+	# certainly right.
+	break;;
+    *.* )
+        if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no;
+	then :; else
+	   ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+	fi
+	# We set ac_cv_exeext here because the later test for it is not
+	# safe: cross compilers may not add the suffix if given an `-o'
+	# argument, so we may need to know it at that point already.
+	# Even if this section looks crufty: it has the advantage of
+	# actually working.
+	break;;
+    * )
+	break;;
+  esac
+done
+test "$ac_cv_exeext" = no && ac_cv_exeext=
+
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { echo "$as_me:$LINENO: error: C compiler cannot create executables
+See \`config.log' for more details." >&5
+echo "$as_me: error: C compiler cannot create executables
+See \`config.log' for more details." >&2;}
+   { (exit 77); exit 77; }; }
+fi
+
+ac_exeext=$ac_cv_exeext
+{ echo "$as_me:$LINENO: result: $ac_file" >&5
+echo "${ECHO_T}$ac_file" >&6; }
+
+# Check that the compiler produces executables we can run.  If not, either
+# the compiler is broken, or we cross compile.
+{ echo "$as_me:$LINENO: checking whether the C compiler works" >&5
+echo $ECHO_N "checking whether the C compiler works... $ECHO_C" >&6; }
+# FIXME: These cross compiler hacks should be removed for Autoconf 3.0
+# If not cross compiling, check that we can run a simple program.
+if test "$cross_compiling" != yes; then
+  if { ac_try='./$ac_file'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+    cross_compiling=no
+  else
+    if test "$cross_compiling" = maybe; then
+	cross_compiling=yes
+    else
+	{ { echo "$as_me:$LINENO: error: cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }
+    fi
+  fi
+fi
+{ echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+
+rm -f a.out a.exe conftest$ac_cv_exeext b.out
+ac_clean_files=$ac_clean_files_save
+# Check that the compiler produces executables we can run.  If not, either
+# the compiler is broken, or we cross compile.
+{ echo "$as_me:$LINENO: checking whether we are cross compiling" >&5
+echo $ECHO_N "checking whether we are cross compiling... $ECHO_C" >&6; }
+{ echo "$as_me:$LINENO: result: $cross_compiling" >&5
+echo "${ECHO_T}$cross_compiling" >&6; }
+
+{ echo "$as_me:$LINENO: checking for suffix of executables" >&5
+echo $ECHO_N "checking for suffix of executables... $ECHO_C" >&6; }
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; then
+  # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'.  For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in conftest.exe conftest conftest.*; do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf | *.o | *.obj ) ;;
+    *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+	  break;;
+    * ) break;;
+  esac
+done
+else
+  { { echo "$as_me:$LINENO: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+rm -f conftest$ac_cv_exeext
+{ echo "$as_me:$LINENO: result: $ac_cv_exeext" >&5
+echo "${ECHO_T}$ac_cv_exeext" >&6; }
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+{ echo "$as_me:$LINENO: checking for suffix of object files" >&5
+echo $ECHO_N "checking for suffix of object files... $ECHO_C" >&6; }
+if test "${ac_cv_objext+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; then
+  for ac_file in conftest.o conftest.obj conftest.*; do
+  test -f "$ac_file" || continue;
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map | *.inf ) ;;
+    *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+       break;;
+  esac
+done
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { echo "$as_me:$LINENO: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&5
+echo "$as_me: error: cannot compute suffix of object files: cannot compile
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_objext" >&5
+echo "${ECHO_T}$ac_cv_objext" >&6; }
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+{ echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5
+echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6; }
+if test "${ac_cv_c_compiler_gnu+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+#ifndef __GNUC__
+       choke me
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_compiler_gnu=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_compiler_gnu=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5
+echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6; }
+GCC=`test $ac_compiler_gnu = yes && echo yes`
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+{ echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5
+echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6; }
+if test "${ac_cv_prog_cc_g+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_save_c_werror_flag=$ac_c_werror_flag
+   ac_c_werror_flag=yes
+   ac_cv_prog_cc_g=no
+   CFLAGS="-g"
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_prog_cc_g=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	CFLAGS=""
+      cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  :
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_c_werror_flag=$ac_save_c_werror_flag
+	 CFLAGS="-g"
+	 cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_prog_cc_g=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   ac_c_werror_flag=$ac_save_c_werror_flag
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5
+echo "${ECHO_T}$ac_cv_prog_cc_g" >&6; }
+if test "$ac_test_CFLAGS" = set; then
+  CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+  if test "$GCC" = yes; then
+    CFLAGS="-g -O2"
+  else
+    CFLAGS="-g"
+  fi
+else
+  if test "$GCC" = yes; then
+    CFLAGS="-O2"
+  else
+    CFLAGS=
+  fi
+fi
+{ echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5
+echo $ECHO_N "checking for $CC option to accept ISO C89... $ECHO_C" >&6; }
+if test "${ac_cv_prog_cc_c89+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+     char **p;
+     int i;
+{
+  return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+  char *s;
+  va_list v;
+  va_start (v,p);
+  s = g (p, va_arg (v,int));
+  va_end (v);
+  return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default.  It has
+   function prototypes and stuff, but not '\xHH' hex character constants.
+   These don't provoke an error unfortunately, instead are silently treated
+   as 'x'.  The following induces an error, until -std is added to get
+   proper ANSI mode.  Curiously '\x00'!='x' always comes out true, for an
+   array size at least.  It's necessary to write '\x00'==0 to get something
+   that's true only with -std.  */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+   inside strings and character constants.  */
+#define FOO(x) 'x'
+int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0]  ||  f (e, argv, 1) != argv[1];
+  ;
+  return 0;
+}
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
+	-Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+  CC="$ac_save_CC $ac_arg"
+  rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_prog_cc_c89=$ac_arg
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext
+  test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c89" in
+  x)
+    { echo "$as_me:$LINENO: result: none needed" >&5
+echo "${ECHO_T}none needed" >&6; } ;;
+  xno)
+    { echo "$as_me:$LINENO: result: unsupported" >&5
+echo "${ECHO_T}unsupported" >&6; } ;;
+  *)
+    CC="$CC $ac_cv_prog_cc_c89"
+    { echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5
+echo "${ECHO_T}$ac_cv_prog_cc_c89" >&6; } ;;
+esac
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5
+echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6; }
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+  CPP=
+fi
+if test -z "$CPP"; then
+  if test "${ac_cv_prog_CPP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+      # Double quotes because CPP needs to be expanded
+    for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
+    do
+      ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  :
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  # Broken: fails on valid input.
+continue
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  # Broken: success on invalid input.
+continue
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+  break
+fi
+
+    done
+    ac_cv_prog_CPP=$CPP
+
+fi
+  CPP=$ac_cv_prog_CPP
+else
+  ac_cv_prog_CPP=$CPP
+fi
+{ echo "$as_me:$LINENO: result: $CPP" >&5
+echo "${ECHO_T}$CPP" >&6; }
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  :
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  # Broken: fails on valid input.
+continue
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  # Broken: success on invalid input.
+continue
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+  :
+else
+  { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." >&5
+echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+{ echo "$as_me:$LINENO: checking for grep that handles long lines and -e" >&5
+echo $ECHO_N "checking for grep that handles long lines and -e... $ECHO_C" >&6; }
+if test "${ac_cv_path_GREP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  # Extract the first word of "grep ggrep" to use in msg output
+if test -z "$GREP"; then
+set dummy grep ggrep; ac_prog_name=$2
+if test "${ac_cv_path_GREP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_path_GREP_found=false
+# Loop through the user's path and test for each of PROGNAME-LIST
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_prog in grep ggrep; do
+  for ac_exec_ext in '' $ac_executable_extensions; do
+    ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
+    { test -f "$ac_path_GREP" && $as_executable_p "$ac_path_GREP"; } || continue
+    # Check for GNU ac_path_GREP and select it if it is found.
+  # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+  ac_count=0
+  echo $ECHO_N "0123456789$ECHO_C" >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    echo 'GREP' >> "conftest.nl"
+    "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    ac_count=`expr $ac_count + 1`
+    if test $ac_count -gt ${ac_path_GREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_GREP="$ac_path_GREP"
+      ac_path_GREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+
+    $ac_path_GREP_found && break 3
+  done
+done
+
+done
+IFS=$as_save_IFS
+
+
+fi
+
+GREP="$ac_cv_path_GREP"
+if test -z "$GREP"; then
+  { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5
+echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+else
+  ac_cv_path_GREP=$GREP
+fi
+
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_path_GREP" >&5
+echo "${ECHO_T}$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ echo "$as_me:$LINENO: checking for egrep" >&5
+echo $ECHO_N "checking for egrep... $ECHO_C" >&6; }
+if test "${ac_cv_path_EGREP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+   then ac_cv_path_EGREP="$GREP -E"
+   else
+     # Extract the first word of "egrep" to use in msg output
+if test -z "$EGREP"; then
+set dummy egrep; ac_prog_name=$2
+if test "${ac_cv_path_EGREP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_path_EGREP_found=false
+# Loop through the user's path and test for each of PROGNAME-LIST
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_prog in egrep; do
+  for ac_exec_ext in '' $ac_executable_extensions; do
+    ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
+    { test -f "$ac_path_EGREP" && $as_executable_p "$ac_path_EGREP"; } || continue
+    # Check for GNU ac_path_EGREP and select it if it is found.
+  # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+  ac_count=0
+  echo $ECHO_N "0123456789$ECHO_C" >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    echo 'EGREP' >> "conftest.nl"
+    "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    ac_count=`expr $ac_count + 1`
+    if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_EGREP="$ac_path_EGREP"
+      ac_path_EGREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+
+    $ac_path_EGREP_found && break 3
+  done
+done
+
+done
+IFS=$as_save_IFS
+
+
+fi
+
+EGREP="$ac_cv_path_EGREP"
+if test -z "$EGREP"; then
+  { { echo "$as_me:$LINENO: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&5
+echo "$as_me: error: no acceptable $ac_prog_name could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+else
+  ac_cv_path_EGREP=$EGREP
+fi
+
+
+   fi
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_path_EGREP" >&5
+echo "${ECHO_T}$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ echo "$as_me:$LINENO: checking for ANSI C header files" >&5
+echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6; }
+if test "${ac_cv_header_stdc+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_header_stdc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_header_stdc=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+  # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "memchr" >/dev/null 2>&1; then
+  :
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "free" >/dev/null 2>&1; then
+  :
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+  if test "$cross_compiling" = yes; then
+  :
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <ctype.h>
+#include <stdlib.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+		   (('a' <= (c) && (c) <= 'i') \
+		     || ('j' <= (c) && (c) <= 'r') \
+		     || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 256; i++)
+    if (XOR (islower (i), ISLOWER (i))
+	|| toupper (i) != TOUPPER (i))
+      return 2;
+  return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  :
+else
+  echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+fi
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5
+echo "${ECHO_T}$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define STDC_HEADERS 1
+_ACEOF
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+
+
+
+
+
+
+
+
+
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+		  inttypes.h stdint.h unistd.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_Header=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_Header=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+{ echo "$as_me:$LINENO: checking whether byte ordering is bigendian" >&5
+echo $ECHO_N "checking whether byte ordering is bigendian... $ECHO_C" >&6; }
+if test "${ac_cv_c_bigendian+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  # See if sys/param.h defines the BYTE_ORDER macro.
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <sys/param.h>
+
+int
+main ()
+{
+#if !BYTE_ORDER || !BIG_ENDIAN || !LITTLE_ENDIAN
+ bogus endian macros
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  # It does; now see whether it defined to BIG_ENDIAN or not.
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <sys/param.h>
+
+int
+main ()
+{
+#if BYTE_ORDER != BIG_ENDIAN
+ not big endian
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_c_bigendian=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_c_bigendian=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	# It does not; compile a test program.
+if test "$cross_compiling" = yes; then
+  # try to guess the endianness by grepping values into an object file
+  ac_cv_c_bigendian=unknown
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+short int ascii_mm[] = { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 };
+short int ascii_ii[] = { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 };
+void _ascii () { char *s = (char *) ascii_mm; s = (char *) ascii_ii; }
+short int ebcdic_ii[] = { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 };
+short int ebcdic_mm[] = { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 };
+void _ebcdic () { char *s = (char *) ebcdic_mm; s = (char *) ebcdic_ii; }
+int
+main ()
+{
+ _ascii (); _ebcdic ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  if grep BIGenDianSyS conftest.$ac_objext >/dev/null ; then
+  ac_cv_c_bigendian=yes
+fi
+if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then
+  if test "$ac_cv_c_bigendian" = unknown; then
+    ac_cv_c_bigendian=no
+  else
+    # finding both strings is unlikely to happen, but who knows?
+    ac_cv_c_bigendian=unknown
+  fi
+fi
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main ()
+{
+
+  /* Are we little or big endian?  From Harbison&Steele.  */
+  union
+  {
+    long int l;
+    char c[sizeof (long int)];
+  } u;
+  u.l = 1;
+  return u.c[sizeof (long int) - 1] == 1;
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_c_bigendian=no
+else
+  echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_c_bigendian=yes
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_c_bigendian" >&5
+echo "${ECHO_T}$ac_cv_c_bigendian" >&6; }
+case $ac_cv_c_bigendian in
+  yes)
+    ENDIAN=big
+ ;;
+  no)
+    ENDIAN=little
+ ;;
+  *)
+    { { echo "$as_me:$LINENO: error: unknown endianness
+presetting ac_cv_c_bigendian=no (or yes) will help" >&5
+echo "$as_me: error: unknown endianness
+presetting ac_cv_c_bigendian=no (or yes) will help" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+
+if test "$cross_compiling" = yes; then
+  LLVM_CROSS_COMPILING=1
+
+
+{ echo "$as_me:$LINENO: checking for executable suffix on build platform" >&5
+echo $ECHO_N "checking for executable suffix on build platform... $ECHO_C" >&6; }
+if test "${ac_cv_build_exeext+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test "$CYGWIN" = yes || test "$MINGW32" = yes; then
+  ac_cv_build_exeext=.exe
+else
+  ac_build_prefix=${build_alias}-
+
+  # Extract the first word of "${ac_build_prefix}gcc", so it can be a program name with args.
+set dummy ${ac_build_prefix}gcc; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_BUILD_CC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$BUILD_CC"; then
+  ac_cv_prog_BUILD_CC="$BUILD_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_BUILD_CC="${ac_build_prefix}gcc"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+BUILD_CC=$ac_cv_prog_BUILD_CC
+if test -n "$BUILD_CC"; then
+  { echo "$as_me:$LINENO: result: $BUILD_CC" >&5
+echo "${ECHO_T}$BUILD_CC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  if test -z "$BUILD_CC"; then
+     # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_BUILD_CC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$BUILD_CC"; then
+  ac_cv_prog_BUILD_CC="$BUILD_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_BUILD_CC="gcc"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+BUILD_CC=$ac_cv_prog_BUILD_CC
+if test -n "$BUILD_CC"; then
+  { echo "$as_me:$LINENO: result: $BUILD_CC" >&5
+echo "${ECHO_T}$BUILD_CC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+     if test -z "$BUILD_CC"; then
+       # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_BUILD_CC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$BUILD_CC"; then
+  ac_cv_prog_BUILD_CC="$BUILD_CC" # Let the user override the test.
+else
+  ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+       ac_prog_rejected=yes
+       continue
+     fi
+    ac_cv_prog_BUILD_CC="cc"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+  # We found a bogon in the path, so make sure we never use it.
+  set dummy $ac_cv_prog_BUILD_CC
+  shift
+  if test $# != 0; then
+    # We chose a different compiler from the bogus one.
+    # However, it has the same basename, so the bogon will be chosen
+    # first if we set BUILD_CC to just the basename; use the full file name.
+    shift
+    ac_cv_prog_BUILD_CC="$as_dir/$ac_word${1+' '}$@"
+  fi
+fi
+fi
+fi
+BUILD_CC=$ac_cv_prog_BUILD_CC
+if test -n "$BUILD_CC"; then
+  { echo "$as_me:$LINENO: result: $BUILD_CC" >&5
+echo "${ECHO_T}$BUILD_CC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+     fi
+  fi
+  test -z "$BUILD_CC" && { { echo "$as_me:$LINENO: error: no acceptable cc found in \$PATH" >&5
+echo "$as_me: error: no acceptable cc found in \$PATH" >&2;}
+   { (exit 1); exit 1; }; }
+  ac_build_link='${BUILD_CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5'
+  rm -f conftest*
+  echo 'int main () { return 0; }' > conftest.$ac_ext
+  ac_cv_build_exeext=
+  if { (eval echo "$as_me:$LINENO: \"$ac_build_link\"") >&5
+  (eval $ac_build_link) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; then
+    for file in conftest.*; do
+      case $file in
+      *.c | *.o | *.obj | *.dSYM) ;;
+      *) ac_cv_build_exeext=`echo $file | sed -e s/conftest//` ;;
+      esac
+    done
+  else
+    { { echo "$as_me:$LINENO: error: installation or configuration problem: compiler cannot create executables." >&5
+echo "$as_me: error: installation or configuration problem: compiler cannot create executables." >&2;}
+   { (exit 1); exit 1; }; }
+  fi
+  rm -f conftest*
+  test x"${ac_cv_build_exeext}" = x && ac_cv_build_exeext=blank
+fi
+fi
+
+BUILD_EXEEXT=""
+test x"${ac_cv_build_exeext}" != xblank && BUILD_EXEEXT=${ac_cv_build_exeext}
+{ echo "$as_me:$LINENO: result: ${ac_cv_build_exeext}" >&5
+echo "${ECHO_T}${ac_cv_build_exeext}" >&6; }
+ac_build_exeext=$BUILD_EXEEXT
+
+  ac_build_prefix=${build_alias}-
+  # Extract the first word of "${ac_build_prefix}g++", so it can be a program name with args.
+set dummy ${ac_build_prefix}g++; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_BUILD_CXX+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$BUILD_CXX"; then
+  ac_cv_prog_BUILD_CXX="$BUILD_CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_BUILD_CXX="${ac_build_prefix}g++"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+BUILD_CXX=$ac_cv_prog_BUILD_CXX
+if test -n "$BUILD_CXX"; then
+  { echo "$as_me:$LINENO: result: $BUILD_CXX" >&5
+echo "${ECHO_T}$BUILD_CXX" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  if test -z "$BUILD_CXX"; then
+     # Extract the first word of "g++", so it can be a program name with args.
+set dummy g++; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_BUILD_CXX+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$BUILD_CXX"; then
+  ac_cv_prog_BUILD_CXX="$BUILD_CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_BUILD_CXX="g++"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+BUILD_CXX=$ac_cv_prog_BUILD_CXX
+if test -n "$BUILD_CXX"; then
+  { echo "$as_me:$LINENO: result: $BUILD_CXX" >&5
+echo "${ECHO_T}$BUILD_CXX" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+     if test -z "$BUILD_CXX"; then
+       # Extract the first word of "c++", so it can be a program name with args.
+set dummy c++; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_BUILD_CXX+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$BUILD_CXX"; then
+  ac_cv_prog_BUILD_CXX="$BUILD_CXX" # Let the user override the test.
+else
+  ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/c++"; then
+       ac_prog_rejected=yes
+       continue
+     fi
+    ac_cv_prog_BUILD_CXX="c++"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+  # We found a bogon in the path, so make sure we never use it.
+  set dummy $ac_cv_prog_BUILD_CXX
+  shift
+  if test $# != 0; then
+    # We chose a different compiler from the bogus one.
+    # However, it has the same basename, so the bogon will be chosen
+    # first if we set BUILD_CXX to just the basename; use the full file name.
+    shift
+    ac_cv_prog_BUILD_CXX="$as_dir/$ac_word${1+' '}$@"
+  fi
+fi
+fi
+fi
+BUILD_CXX=$ac_cv_prog_BUILD_CXX
+if test -n "$BUILD_CXX"; then
+  { echo "$as_me:$LINENO: result: $BUILD_CXX" >&5
+echo "${ECHO_T}$BUILD_CXX" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+     fi
+  fi
+else
+  LLVM_CROSS_COMPILING=0
+
+fi
+
+if test -d ".svn" -o -d "${srcdir}/.svn" -o -d ".git" -o -d "${srcdir}/.git"; then
+  cvsbuild="yes"
+  optimize="no"
+  CVSBUILD=CVSBUILD=1
+
+else
+  cvsbuild="no"
+  optimize="yes"
+fi
+
+
+# Check whether --enable-optimized was given.
+if test "${enable_optimized+set}" = set; then
+  enableval=$enable_optimized;
+else
+  enableval=$optimize
+fi
+
+if test ${enableval} = "no" ; then
+  ENABLE_OPTIMIZED=
+
+else
+  ENABLE_OPTIMIZED=ENABLE_OPTIMIZED=1
+
+fi
+
+# Check whether --enable-profiling was given.
+if test "${enable_profiling+set}" = set; then
+  enableval=$enable_profiling;
+else
+  enableval="no"
+fi
+
+if test ${enableval} = "no" ; then
+  ENABLE_PROFILING=
+
+else
+  ENABLE_PROFILING=ENABLE_PROFILING=1
+
+fi
+
+# Check whether --enable-assertions was given.
+if test "${enable_assertions+set}" = set; then
+  enableval=$enable_assertions;
+else
+  enableval="no"
+fi
+
+if test ${enableval} = "yes" ; then
+  DISABLE_ASSERTIONS=
+
+else
+  DISABLE_ASSERTIONS=DISABLE_ASSERTIONS=1
+
+fi
+
+# Check whether --enable-expensive-checks was given.
+if test "${enable_expensive_checks+set}" = set; then
+  enableval=$enable_expensive_checks;
+else
+  enableval="no"
+fi
+
+if test ${enableval} = "yes" ; then
+  ENABLE_EXPENSIVE_CHECKS=ENABLE_EXPENSIVE_CHECKS=1
+
+  EXPENSIVE_CHECKS=yes
+
+else
+  ENABLE_EXPENSIVE_CHECKS=
+
+  EXPENSIVE_CHECKS=no
+
+fi
+
+# Check whether --enable-debug-runtime was given.
+if test "${enable_debug_runtime+set}" = set; then
+  enableval=$enable_debug_runtime;
+else
+  enableval=no
+fi
+
+if test ${enableval} = "no" ; then
+  DEBUG_RUNTIME=
+
+else
+  DEBUG_RUNTIME=DEBUG_RUNTIME=1
+
+fi
+
+# Check whether --enable-debug-symbols was given.
+if test "${enable_debug_symbols+set}" = set; then
+  enableval=$enable_debug_symbols;
+else
+  enableval=no
+fi
+
+if test ${enableval} = "no" ; then
+  DEBUG_SYMBOLS=
+
+else
+  DEBUG_SYMBOLS=DEBUG_SYMBOLS=1
+
+fi
+
+# Check whether --enable-jit was given.
+if test "${enable_jit+set}" = set; then
+  enableval=$enable_jit;
+else
+  enableval=default
+fi
+
+if test ${enableval} = "no"
+then
+  JIT=
+
+else
+  case "$llvm_cv_target_arch" in
+    x86)         TARGET_HAS_JIT=1
+ ;;
+    Sparc)       TARGET_HAS_JIT=0
+ ;;
+    PowerPC)     TARGET_HAS_JIT=1
+ ;;
+    x86_64)      TARGET_HAS_JIT=1
+ ;;
+    Alpha)       TARGET_HAS_JIT=0
+ ;;
+    ARM)         TARGET_HAS_JIT=1
+ ;;
+    Mips)        TARGET_HAS_JIT=0
+ ;;
+    XCore)       TARGET_HAS_JIT=0
+ ;;
+    MSP430)      TARGET_HAS_JIT=0
+ ;;
+    SystemZ)     TARGET_HAS_JIT=0
+ ;;
+    Blackfin)    TARGET_HAS_JIT=0
+ ;;
+    MBlaze)      TARGET_HAS_JIT=0
+ ;;
+    PTX)         TARGET_HAS_JIT=0
+ ;;
+    *)           TARGET_HAS_JIT=0
+ ;;
+  esac
+fi
+
+# Check whether --enable-docs was given.
+if test "${enable_docs+set}" = set; then
+  enableval=$enable_docs;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_DOCS=1
+ ;;
+  no)  ENABLE_DOCS=0
+ ;;
+  default) ENABLE_DOCS=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-docs. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-docs. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+# Check whether --enable-doxygen was given.
+if test "${enable_doxygen+set}" = set; then
+  enableval=$enable_doxygen;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_DOXYGEN=1
+ ;;
+  no)  ENABLE_DOXYGEN=0
+ ;;
+  default) ENABLE_DOXYGEN=0
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-doxygen. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-doxygen. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+# Check whether --enable-threads was given.
+if test "${enable_threads+set}" = set; then
+  enableval=$enable_threads;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_THREADS=1
+ ;;
+  no)  ENABLE_THREADS=0
+ ;;
+  default) ENABLE_THREADS=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-threads. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-threads. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+cat >>confdefs.h <<_ACEOF
+#define ENABLE_THREADS $ENABLE_THREADS
+_ACEOF
+
+
+# Check whether --enable-pthreads was given.
+if test "${enable_pthreads+set}" = set; then
+  enableval=$enable_pthreads;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_PTHREADS=1
+ ;;
+  no)  ENABLE_PTHREADS=0
+ ;;
+  default) ENABLE_PTHREADS=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-pthreads. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-pthreads. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+# Check whether --enable-pic was given.
+if test "${enable_pic+set}" = set; then
+  enableval=$enable_pic;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_PIC=1
+ ;;
+  no)  ENABLE_PIC=0
+ ;;
+  default) ENABLE_PIC=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-pic. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-pic. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+cat >>confdefs.h <<_ACEOF
+#define ENABLE_PIC $ENABLE_PIC
+_ACEOF
+
+
+# Check whether --enable-shared was given.
+if test "${enable_shared+set}" = set; then
+  enableval=$enable_shared;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_SHARED=1
+ ;;
+  no)  ENABLE_SHARED=0
+ ;;
+  default) ENABLE_SHARED=0
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-shared. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-shared. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+# Check whether --enable-embed-stdcxx was given.
+if test "${enable_embed_stdcxx+set}" = set; then
+  enableval=$enable_embed_stdcxx;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_EMBED_STDCXX=1
+ ;;
+  no)  ENABLE_EMBED_STDCXX=0
+ ;;
+  default) ENABLE_EMBED_STDCXX=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-embed-stdcxx. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-embed-stdcxx. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+# Check whether --enable-timestamps was given.
+if test "${enable_timestamps+set}" = set; then
+  enableval=$enable_timestamps;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_TIMESTAMPS=1
+ ;;
+  no)  ENABLE_TIMESTAMPS=0
+ ;;
+  default) ENABLE_TIMESTAMPS=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-timestamps. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-timestamps. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+cat >>confdefs.h <<_ACEOF
+#define ENABLE_TIMESTAMPS $ENABLE_TIMESTAMPS
+_ACEOF
+
+
+TARGETS_TO_BUILD=""
+# Check whether --enable-targets was given.
+if test "${enable_targets+set}" = set; then
+  enableval=$enable_targets;
+else
+  enableval=all
+fi
+
+if test "$enableval" = host-only ; then
+  enableval=host
+fi
+case "$enableval" in
+  all) TARGETS_TO_BUILD="X86 Sparc PowerPC Alpha ARM Mips CellSPU XCore MSP430 SystemZ Blackfin CBackend CppBackend MBlaze PTX" ;;
+  *)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
+      case "$a_target" in
+        x86)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+        x86_64)   TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+        sparc)    TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
+        powerpc)  TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
+        alpha)    TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;;
+        arm)      TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+        mips)     TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
+        spu)      TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
+        xcore)    TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
+        msp430)   TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+        systemz)  TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
+        blackfin) TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
+        cbe)      TARGETS_TO_BUILD="CBackend $TARGETS_TO_BUILD" ;;
+        cpp)      TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
+        mblaze)   TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
+        ptx)      TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
+        host) case "$llvm_cv_target_arch" in
+            x86)         TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+            x86_64)      TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
+            Sparc)       TARGETS_TO_BUILD="Sparc $TARGETS_TO_BUILD" ;;
+            PowerPC)     TARGETS_TO_BUILD="PowerPC $TARGETS_TO_BUILD" ;;
+            Alpha)       TARGETS_TO_BUILD="Alpha $TARGETS_TO_BUILD" ;;
+            ARM)         TARGETS_TO_BUILD="ARM $TARGETS_TO_BUILD" ;;
+            Mips)        TARGETS_TO_BUILD="Mips $TARGETS_TO_BUILD" ;;
+            MBlaze)      TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
+            CellSPU|SPU) TARGETS_TO_BUILD="CellSPU $TARGETS_TO_BUILD" ;;
+            XCore)       TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
+            MSP430)      TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
+            s390x)       TARGETS_TO_BUILD="SystemZ $TARGETS_TO_BUILD" ;;
+            Blackfin)    TARGETS_TO_BUILD="Blackfin $TARGETS_TO_BUILD" ;;
+            PTX)         TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
+            *)       { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
+echo "$as_me: error: Can not set target to build" >&2;}
+   { (exit 1); exit 1; }; } ;;
+          esac ;;
+        *) { { echo "$as_me:$LINENO: error: Unrecognized target $a_target" >&5
+echo "$as_me: error: Unrecognized target $a_target" >&2;}
+   { (exit 1); exit 1; }; } ;;
+      esac
+  done
+  ;;
+esac
+TARGETS_TO_BUILD=$TARGETS_TO_BUILD
+
+
+# Determine whether we are building LLVM support for the native architecture.
+# If so, define LLVM_NATIVE_ARCH to that LLVM target.
+for a_target in $TARGETS_TO_BUILD; do
+  if test "$a_target" = "$LLVM_NATIVE_ARCH"; then
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_ARCH $LLVM_NATIVE_ARCH
+_ACEOF
+
+    LLVM_NATIVE_TARGET="LLVMInitialize${LLVM_NATIVE_ARCH}Target"
+    LLVM_NATIVE_TARGETINFO="LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo"
+    LLVM_NATIVE_ASMPRINTER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter"
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_TARGET $LLVM_NATIVE_TARGET
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_TARGETINFO $LLVM_NATIVE_TARGETINFO
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_ASMPRINTER $LLVM_NATIVE_ASMPRINTER
+_ACEOF
+
+  fi
+done
+
+# Build the LLVM_TARGET and LLVM_... macros for Targets.def and the individual
+# target feature def files.
+LLVM_ENUM_TARGETS=""
+LLVM_ENUM_ASM_PRINTERS=""
+LLVM_ENUM_ASM_PARSERS=""
+LLVM_ENUM_DISASSEMBLERS=""
+for target_to_build in $TARGETS_TO_BUILD; do
+  LLVM_ENUM_TARGETS="LLVM_TARGET($target_to_build) $LLVM_ENUM_TARGETS"
+  if test -f ${srcdir}/lib/Target/${target_to_build}/*AsmPrinter.cpp ; then
+    LLVM_ENUM_ASM_PRINTERS="LLVM_ASM_PRINTER($target_to_build) $LLVM_ENUM_ASM_PRINTERS";
+  fi
+  if test -f ${srcdir}/lib/Target/${target_to_build}/AsmParser/Makefile ; then
+    LLVM_ENUM_ASM_PARSERS="LLVM_ASM_PARSER($target_to_build) $LLVM_ENUM_ASM_PARSERS";
+  fi
+  if test -f ${srcdir}/lib/Target/${target_to_build}/Disassembler/Makefile ; then
+    LLVM_ENUM_DISASSEMBLERS="LLVM_DISASSEMBLER($target_to_build) $LLVM_ENUM_DISASSEMBLERS";
+  fi
+done
+
+
+
+
+
+# Check whether --enable-cbe-printf-a was given.
+if test "${enable_cbe_printf_a+set}" = set; then
+  enableval=$enable_cbe_printf_a;
+else
+  enableval=default
+fi
+
+case "$enableval" in
+  yes) ENABLE_CBE_PRINTF_A=1
+ ;;
+  no)  ENABLE_CBE_PRINTF_A=0
+ ;;
+  default)  ENABLE_CBE_PRINTF_A=1
+ ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-cbe-printf-a. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-cbe-printf-a. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+cat >>confdefs.h <<_ACEOF
+#define ENABLE_CBE_PRINTF_A $ENABLE_CBE_PRINTF_A
+_ACEOF
+
+
+
+# Check whether --with-llvmgccdir was given.
+if test "${with_llvmgccdir+set}" = set; then
+  withval=$with_llvmgccdir;
+else
+  withval=default
+fi
+
+case "$withval" in
+  default) WITH_LLVMGCCDIR=default ;;
+  /* | [A-Za-z]:[\\/]*)      WITH_LLVMGCCDIR=$withval ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid path for --with-llvmgccdir. Provide full path" >&5
+echo "$as_me: error: Invalid path for --with-llvmgccdir. Provide full path" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+
+# Check whether --with-llvmgcc was given.
+if test "${with_llvmgcc+set}" = set; then
+  withval=$with_llvmgcc; LLVMGCC=$with_llvmgcc
+      WITH_LLVMGCCDIR=""
+fi
+
+
+
+# Check whether --with-llvmgxx was given.
+if test "${with_llvmgxx+set}" = set; then
+  withval=$with_llvmgxx; LLVMGXX=$with_llvmgxx
+    WITH_LLVMGCCDIR=""
+fi
+
+
+if test -n "$LLVMGCC"; then
+   LLVMGCCCOMMAND="$LLVMGCC"
+fi
+
+if test -n "$LLVMGXX"; then
+   LLVMGXXCOMMAND="$LLVMGXX"
+fi
+
+if test -n "$LLVMGCC" && test -z "$LLVMGXX"; then
+   { { echo "$as_me:$LINENO: error: Invalid llvm-g++. Use --with-llvmgxx when --with-llvmgcc is used" >&5
+echo "$as_me: error: Invalid llvm-g++. Use --with-llvmgxx when --with-llvmgcc is used" >&2;}
+   { (exit 1); exit 1; }; };
+fi
+
+if test -n "$LLVMGXX" && test -z "$LLVMGCC"; then
+   { { echo "$as_me:$LINENO: error: Invalid llvm-gcc. Use --with-llvmgcc when --with-llvmgxx is used" >&5
+echo "$as_me: error: Invalid llvm-gcc. Use --with-llvmgcc when --with-llvmgxx is used" >&2;}
+   { (exit 1); exit 1; }; };
+fi
+
+
+# Check whether --with-clang was given.
+if test "${with_clang+set}" = set; then
+  withval=$with_clang;
+else
+  with_clang=default
+fi
+
+
+
+# Check whether --with-built-clang was given.
+if test "${with_built_clang+set}" = set; then
+  withval=$with_built_clang;
+else
+  with_built_clang=check
+fi
+
+
+{ echo "$as_me:$LINENO: checking clang compiler" >&5
+echo $ECHO_N "checking clang compiler... $ECHO_C" >&6; }
+WITH_CLANGPATH=""
+WITH_BUILT_CLANG=0
+if test "$with_clang" != "default"; then
+   WITH_CLANGPATH="$with_clang"
+   if ! test -x "$WITH_CLANGPATH"; then
+     { { echo "$as_me:$LINENO: error: invalid --with-clang, path does not specify an executable" >&5
+echo "$as_me: error: invalid --with-clang, path does not specify an executable" >&2;}
+   { (exit 1); exit 1; }; }
+   fi
+elif test "$with_built_clang" = "yes"; then
+   WITH_BUILT_CLANG=1
+elif test "$with_built_clang" = "no"; then
+   WITH_BUILT_CLANG=0
+else
+   if test "$with_built_clang" != "check"; then
+      { { echo "$as_me:$LINENO: error: invalid value for --with-built-clang." >&5
+echo "$as_me: error: invalid value for --with-built-clang." >&2;}
+   { (exit 1); exit 1; }; }
+   fi
+
+   if test -f ${srcdir}/tools/clang/README.txt; then
+     WITH_BUILT_CLANG=1
+   fi
+fi
+
+if ! test -z "$WITH_CLANGPATH"; then
+   { echo "$as_me:$LINENO: result: $WITH_CLANGPATH" >&5
+echo "${ECHO_T}$WITH_CLANGPATH" >&6; }
+   WITH_CLANGXXPATH=`"$WITH_CLANGPATH" --print-prog-name=clang++`
+elif test "$WITH_BUILT_CLANG" = "1"; then
+   { echo "$as_me:$LINENO: result: built" >&5
+echo "${ECHO_T}built" >&6; }
+else
+   { echo "$as_me:$LINENO: result: none" >&5
+echo "${ECHO_T}none" >&6; }
+fi
+CLANGPATH=$WITH_CLANGPATH
+
+CLANGXXPATH=$WITH_CLANGXXPATH
+
+ENABLE_BUILT_CLANG=$WITH_BUILT_CLANG
+
+
+
+# Check whether --with-optimize-option was given.
+if test "${with_optimize_option+set}" = set; then
+  withval=$with_optimize_option;
+else
+  withval=default
+fi
+
+{ echo "$as_me:$LINENO: checking optimization flags" >&5
+echo $ECHO_N "checking optimization flags... $ECHO_C" >&6; }
+case "$withval" in
+  default)
+    case "$llvm_cv_os_type" in
+    FreeBSD) optimize_option=-O2 ;;
+    MingW) optimize_option=-O2 ;;
+    *)     optimize_option=-O3 ;;
+    esac ;;
+  *) optimize_option="$withval" ;;
+esac
+OPTIMIZE_OPTION=$optimize_option
+
+{ echo "$as_me:$LINENO: result: $optimize_option" >&5
+echo "${ECHO_T}$optimize_option" >&6; }
+
+
+# Check whether --with-extra-options was given.
+if test "${with_extra_options+set}" = set; then
+  withval=$with_extra_options;
+else
+  withval=default
+fi
+
+case "$withval" in
+  default) EXTRA_OPTIONS= ;;
+  *) EXTRA_OPTIONS=$withval ;;
+esac
+EXTRA_OPTIONS=$EXTRA_OPTIONS
+
+
+# Check whether --enable-bindings was given.
+if test "${enable_bindings+set}" = set; then
+  enableval=$enable_bindings;
+else
+  enableval=default
+fi
+
+BINDINGS_TO_BUILD=""
+case "$enableval" in
+  yes | default | auto) BINDINGS_TO_BUILD="auto" ;;
+  all ) BINDINGS_TO_BUILD="ocaml" ;;
+  none | no) BINDINGS_TO_BUILD="" ;;
+  *)for a_binding in `echo $enableval|sed -e 's/,/ /g' ` ; do
+      case "$a_binding" in
+        ocaml) BINDINGS_TO_BUILD="ocaml $BINDINGS_TO_BUILD" ;;
+        *) { { echo "$as_me:$LINENO: error: Unrecognized binding $a_binding" >&5
+echo "$as_me: error: Unrecognized binding $a_binding" >&2;}
+   { (exit 1); exit 1; }; } ;;
+      esac
+  done
+  ;;
+esac
+
+
+# Check whether --with-ocaml-libdir was given.
+if test "${with_ocaml_libdir+set}" = set; then
+  withval=$with_ocaml_libdir;
+else
+  withval=auto
+fi
+
+case "$withval" in
+  auto) with_ocaml_libdir="$withval" ;;
+  /* | [A-Za-z]:[\\/]*) with_ocaml_libdir="$withval" ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid path for --with-ocaml-libdir. Provide full path" >&5
+echo "$as_me: error: Invalid path for --with-ocaml-libdir. Provide full path" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+
+
+# Check whether --with-clang-resource-dir was given.
+if test "${with_clang_resource_dir+set}" = set; then
+  withval=$with_clang_resource_dir;
+else
+  withval=""
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define CLANG_RESOURCE_DIR "$withval"
+_ACEOF
+
+
+
+# Check whether --with-c-include-dirs was given.
+if test "${with_c_include_dirs+set}" = set; then
+  withval=$with_c_include_dirs;
+else
+  withval=""
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define C_INCLUDE_DIRS "$withval"
+_ACEOF
+
+
+
+# Check whether --with-cxx-include-root was given.
+if test "${with_cxx_include_root+set}" = set; then
+  withval=$with_cxx_include_root;
+else
+  withval=""
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define CXX_INCLUDE_ROOT "$withval"
+_ACEOF
+
+
+
+# Check whether --with-cxx-include-arch was given.
+if test "${with_cxx_include_arch+set}" = set; then
+  withval=$with_cxx_include_arch;
+else
+  withval=""
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define CXX_INCLUDE_ARCH "$withval"
+_ACEOF
+
+
+
+# Check whether --with-cxx-include-32bit-dir was given.
+if test "${with_cxx_include_32bit_dir+set}" = set; then
+  withval=$with_cxx_include_32bit_dir;
+else
+  withval=""
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define CXX_INCLUDE_32BIT_DIR "$withval"
+_ACEOF
+
+
+
+# Check whether --with-cxx-include-64bit-dir was given.
+if test "${with_cxx_include_64bit_dir+set}" = set; then
+  withval=$with_cxx_include_64bit_dir;
+else
+  withval=""
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define CXX_INCLUDE_64BIT_DIR "$withval"
+_ACEOF
+
+
+
+# Check whether --with-binutils-include was given.
+if test "${with_binutils_include+set}" = set; then
+  withval=$with_binutils_include;
+else
+  withval=default
+fi
+
+case "$withval" in
+  default) WITH_BINUTILS_INCDIR=default ;;
+  /* | [A-Za-z]:[\\/]*)      WITH_BINUTILS_INCDIR=$withval ;;
+  *) { { echo "$as_me:$LINENO: error: Invalid path for --with-binutils-include. Provide full path" >&5
+echo "$as_me: error: Invalid path for --with-binutils-include. Provide full path" >&2;}
+   { (exit 1); exit 1; }; } ;;
+esac
+if test "x$WITH_BINUTILS_INCDIR" != xdefault ; then
+  BINUTILS_INCDIR=$WITH_BINUTILS_INCDIR
+
+  if test ! -f "$WITH_BINUTILS_INCDIR/plugin-api.h"; then
+     echo "$WITH_BINUTILS_INCDIR/plugin-api.h"
+     { { echo "$as_me:$LINENO: error: Invalid path to directory containing plugin-api.h." >&5
+echo "$as_me: error: Invalid path to directory containing plugin-api.h." >&2;}
+   { (exit 1); exit 1; }; };
+  fi
+fi
+
+# Check whether --enable-libffi was given.
+if test "${enable_libffi+set}" = set; then
+  enableval=$enable_libffi; case "$enableval" in
+    yes) llvm_cv_enable_libffi="yes" ;;
+    no)  llvm_cv_enable_libffi="no"  ;;
+    *) { { echo "$as_me:$LINENO: error: Invalid setting for --enable-libffi. Use \"yes\" or \"no\"" >&5
+echo "$as_me: error: Invalid setting for --enable-libffi. Use \"yes\" or \"no\"" >&2;}
+   { (exit 1); exit 1; }; } ;;
+  esac
+else
+  llvm_cv_enable_libffi=no
+fi
+
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ echo "$as_me:$LINENO: checking how to run the C preprocessor" >&5
+echo $ECHO_N "checking how to run the C preprocessor... $ECHO_C" >&6; }
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+  CPP=
+fi
+if test -z "$CPP"; then
+  if test "${ac_cv_prog_CPP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+      # Double quotes because CPP needs to be expanded
+    for CPP in "$CC -E" "$CC -E -traditional-cpp" "/lib/cpp"
+    do
+      ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  :
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  # Broken: fails on valid input.
+continue
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  # Broken: success on invalid input.
+continue
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+  break
+fi
+
+    done
+    ac_cv_prog_CPP=$CPP
+
+fi
+  CPP=$ac_cv_prog_CPP
+else
+  ac_cv_prog_CPP=$CPP
+fi
+{ echo "$as_me:$LINENO: result: $CPP" >&5
+echo "${ECHO_T}$CPP" >&6; }
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+  # <limits.h> exists even on freestanding compilers.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+		     Syntax error
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  :
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  # Broken: fails on valid input.
+continue
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  # Broken: success on invalid input.
+continue
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+
+rm -f conftest.err conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.err conftest.$ac_ext
+if $ac_preproc_ok; then
+  :
+else
+  { { echo "$as_me:$LINENO: error: C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." >&5
+echo "$as_me: error: C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+  for ac_prog in gcc
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_CC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { echo "$as_me:$LINENO: result: $CC" >&5
+echo "${ECHO_T}$CC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+    test -n "$CC" && break
+  done
+fi
+if test -z "$CC"; then
+  ac_ct_CC=$CC
+  for ac_prog in gcc
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_ac_ct_CC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_CC="$ac_prog"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { echo "$as_me:$LINENO: result: $ac_ct_CC" >&5
+echo "${ECHO_T}$ac_ct_CC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CC" && break
+done
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&5
+echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+fi
+
+
+test -z "$CC" && { { echo "$as_me:$LINENO: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&5
+echo "$as_me: error: no acceptable C compiler found in \$PATH
+See \`config.log' for more details." >&2;}
+   { (exit 1); exit 1; }; }
+
+# Provide some information about the compiler.
+echo "$as_me:$LINENO: checking for C compiler version" >&5
+ac_compiler=`set X $ac_compile; echo $2`
+{ (ac_try="$ac_compiler --version >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compiler --version >&5") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+{ (ac_try="$ac_compiler -v >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compiler -v >&5") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+{ (ac_try="$ac_compiler -V >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compiler -V >&5") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+
+{ echo "$as_me:$LINENO: checking whether we are using the GNU C compiler" >&5
+echo $ECHO_N "checking whether we are using the GNU C compiler... $ECHO_C" >&6; }
+if test "${ac_cv_c_compiler_gnu+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+#ifndef __GNUC__
+       choke me
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_compiler_gnu=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_compiler_gnu=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_c_compiler_gnu" >&5
+echo "${ECHO_T}$ac_cv_c_compiler_gnu" >&6; }
+GCC=`test $ac_compiler_gnu = yes && echo yes`
+ac_test_CFLAGS=${CFLAGS+set}
+ac_save_CFLAGS=$CFLAGS
+{ echo "$as_me:$LINENO: checking whether $CC accepts -g" >&5
+echo $ECHO_N "checking whether $CC accepts -g... $ECHO_C" >&6; }
+if test "${ac_cv_prog_cc_g+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_save_c_werror_flag=$ac_c_werror_flag
+   ac_c_werror_flag=yes
+   ac_cv_prog_cc_g=no
+   CFLAGS="-g"
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_prog_cc_g=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	CFLAGS=""
+      cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  :
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_c_werror_flag=$ac_save_c_werror_flag
+	 CFLAGS="-g"
+	 cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_prog_cc_g=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   ac_c_werror_flag=$ac_save_c_werror_flag
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_prog_cc_g" >&5
+echo "${ECHO_T}$ac_cv_prog_cc_g" >&6; }
+if test "$ac_test_CFLAGS" = set; then
+  CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+  if test "$GCC" = yes; then
+    CFLAGS="-g -O2"
+  else
+    CFLAGS="-g"
+  fi
+else
+  if test "$GCC" = yes; then
+    CFLAGS="-O2"
+  else
+    CFLAGS=
+  fi
+fi
+{ echo "$as_me:$LINENO: checking for $CC option to accept ISO C89" >&5
+echo $ECHO_N "checking for $CC option to accept ISO C89... $ECHO_C" >&6; }
+if test "${ac_cv_prog_cc_c89+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+/* Most of the following tests are stolen from RCS 5.7's src/conf.sh.  */
+struct buf { int x; };
+FILE * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+     char **p;
+     int i;
+{
+  return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+  char *s;
+  va_list v;
+  va_start (v,p);
+  s = g (p, va_arg (v,int));
+  va_end (v);
+  return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default.  It has
+   function prototypes and stuff, but not '\xHH' hex character constants.
+   These don't provoke an error unfortunately, instead are silently treated
+   as 'x'.  The following induces an error, until -std is added to get
+   proper ANSI mode.  Curiously '\x00'!='x' always comes out true, for an
+   array size at least.  It's necessary to write '\x00'==0 to get something
+   that's true only with -std.  */
+int osf4_cc_array ['\x00' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+   inside strings and character constants.  */
+#define FOO(x) 'x'
+int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int);
+int argc;
+char **argv;
+int
+main ()
+{
+return f (e, argv, 0) != argv[0]  ||  f (e, argv, 1) != argv[1];
+  ;
+  return 0;
+}
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \
+	-Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__"
+do
+  CC="$ac_save_CC $ac_arg"
+  rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_prog_cc_c89=$ac_arg
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext
+  test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+
+fi
+# AC_CACHE_VAL
+case "x$ac_cv_prog_cc_c89" in
+  x)
+    { echo "$as_me:$LINENO: result: none needed" >&5
+echo "${ECHO_T}none needed" >&6; } ;;
+  xno)
+    { echo "$as_me:$LINENO: result: unsupported" >&5
+echo "${ECHO_T}unsupported" >&6; } ;;
+  *)
+    CC="$CC $ac_cv_prog_cc_c89"
+    { echo "$as_me:$LINENO: result: $ac_cv_prog_cc_c89" >&5
+echo "${ECHO_T}$ac_cv_prog_cc_c89" >&6; } ;;
+esac
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+if test -z "$CXX"; then
+  if test -n "$CCC"; then
+    CXX=$CCC
+  else
+    if test -n "$ac_tool_prefix"; then
+  for ac_prog in g++
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_CXX+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$CXX"; then
+  ac_cv_prog_CXX="$CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+CXX=$ac_cv_prog_CXX
+if test -n "$CXX"; then
+  { echo "$as_me:$LINENO: result: $CXX" >&5
+echo "${ECHO_T}$CXX" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+    test -n "$CXX" && break
+  done
+fi
+if test -z "$CXX"; then
+  ac_ct_CXX=$CXX
+  for ac_prog in g++
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_ac_ct_CXX+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$ac_ct_CXX"; then
+  ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_CXX="$ac_prog"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CXX=$ac_cv_prog_ac_ct_CXX
+if test -n "$ac_ct_CXX"; then
+  { echo "$as_me:$LINENO: result: $ac_ct_CXX" >&5
+echo "${ECHO_T}$ac_ct_CXX" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CXX" && break
+done
+
+  if test "x$ac_ct_CXX" = x; then
+    CXX="g++"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&5
+echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&2;}
+ac_tool_warned=yes ;;
+esac
+    CXX=$ac_ct_CXX
+  fi
+fi
+
+  fi
+fi
+# Provide some information about the compiler.
+echo "$as_me:$LINENO: checking for C++ compiler version" >&5
+ac_compiler=`set X $ac_compile; echo $2`
+{ (ac_try="$ac_compiler --version >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compiler --version >&5") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+{ (ac_try="$ac_compiler -v >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compiler -v >&5") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+{ (ac_try="$ac_compiler -V >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compiler -V >&5") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }
+
+{ echo "$as_me:$LINENO: checking whether we are using the GNU C++ compiler" >&5
+echo $ECHO_N "checking whether we are using the GNU C++ compiler... $ECHO_C" >&6; }
+if test "${ac_cv_cxx_compiler_gnu+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+#ifndef __GNUC__
+       choke me
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_compiler_gnu=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_compiler_gnu=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ac_cv_cxx_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_cxx_compiler_gnu" >&5
+echo "${ECHO_T}$ac_cv_cxx_compiler_gnu" >&6; }
+GXX=`test $ac_compiler_gnu = yes && echo yes`
+ac_test_CXXFLAGS=${CXXFLAGS+set}
+ac_save_CXXFLAGS=$CXXFLAGS
+{ echo "$as_me:$LINENO: checking whether $CXX accepts -g" >&5
+echo $ECHO_N "checking whether $CXX accepts -g... $ECHO_C" >&6; }
+if test "${ac_cv_prog_cxx_g+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_save_cxx_werror_flag=$ac_cxx_werror_flag
+   ac_cxx_werror_flag=yes
+   ac_cv_prog_cxx_g=no
+   CXXFLAGS="-g"
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_prog_cxx_g=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	CXXFLAGS=""
+      cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  :
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+	 CXXFLAGS="-g"
+	 cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_prog_cxx_g=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+   ac_cxx_werror_flag=$ac_save_cxx_werror_flag
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_prog_cxx_g" >&5
+echo "${ECHO_T}$ac_cv_prog_cxx_g" >&6; }
+if test "$ac_test_CXXFLAGS" = set; then
+  CXXFLAGS=$ac_save_CXXFLAGS
+elif test $ac_cv_prog_cxx_g = yes; then
+  if test "$GXX" = yes; then
+    CXXFLAGS="-g -O2"
+  else
+    CXXFLAGS="-g"
+  fi
+else
+  if test "$GXX" = yes; then
+    CXXFLAGS="-O2"
+  else
+    CXXFLAGS=
+  fi
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+{ echo "$as_me:$LINENO: checking for BSD-compatible nm" >&5
+echo $ECHO_N "checking for BSD-compatible nm... $ECHO_C" >&6; }
+if test "${lt_cv_path_NM+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$NM"; then
+  # Let the user override the test.
+  lt_cv_path_NM="$NM"
+else
+  lt_nm_to_check="${ac_tool_prefix}nm"
+  if test -n "$ac_tool_prefix" && test "$build" = "$host"; then
+    lt_nm_to_check="$lt_nm_to_check nm"
+  fi
+  for lt_tmp_nm in $lt_nm_to_check; do
+    lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+    for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do
+      IFS="$lt_save_ifs"
+      test -z "$ac_dir" && ac_dir=.
+      tmp_nm="$ac_dir/$lt_tmp_nm"
+      if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then
+	# Check to see if the nm accepts a BSD-compat flag.
+	# Adding the `sed 1q' prevents false positives on HP-UX, which says:
+	#   nm: unknown option "B" ignored
+	# Tru64's nm complains that /dev/null is an invalid object file
+	case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in
+	*/dev/null* | *'Invalid file or object type'*)
+	  lt_cv_path_NM="$tmp_nm -B"
+	  break
+	  ;;
+	*)
+	  case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in
+	  */dev/null*)
+	    lt_cv_path_NM="$tmp_nm -p"
+	    break
+	    ;;
+	  *)
+	    lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but
+	    continue # so that we can try to find one that supports BSD flags
+	    ;;
+	  esac
+	  ;;
+	esac
+      fi
+    done
+    IFS="$lt_save_ifs"
+  done
+  test -z "$lt_cv_path_NM" && lt_cv_path_NM=nm
+fi
+fi
+{ echo "$as_me:$LINENO: result: $lt_cv_path_NM" >&5
+echo "${ECHO_T}$lt_cv_path_NM" >&6; }
+NM="$lt_cv_path_NM"
+
+
+
+{ echo "$as_me:$LINENO: checking for GNU make" >&5
+echo $ECHO_N "checking for GNU make... $ECHO_C" >&6; }
+if test "${llvm_cv_gnu_make_command+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  llvm_cv_gnu_make_command=''
+ for a in "$MAKE" make gmake gnumake ; do
+  if test -z "$a" ; then continue ; fi ;
+  if  ( sh -c "$a --version" 2> /dev/null | grep GNU 2>&1 > /dev/null )
+  then
+   llvm_cv_gnu_make_command=$a ;
+   break;
+  fi
+ done
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_gnu_make_command" >&5
+echo "${ECHO_T}$llvm_cv_gnu_make_command" >&6; }
+ if test "x$llvm_cv_gnu_make_command" != "x"  ; then
+   ifGNUmake='' ;
+ else
+   ifGNUmake='#' ;
+   { echo "$as_me:$LINENO: result: \"Not found\"" >&5
+echo "${ECHO_T}\"Not found\"" >&6; };
+ fi
+
+
+{ echo "$as_me:$LINENO: checking whether ln -s works" >&5
+echo $ECHO_N "checking whether ln -s works... $ECHO_C" >&6; }
+LN_S=$as_ln_s
+if test "$LN_S" = "ln -s"; then
+  { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no, using $LN_S" >&5
+echo "${ECHO_T}no, using $LN_S" >&6; }
+fi
+
+# Extract the first word of "cmp", so it can be a program name with args.
+set dummy cmp; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_CMP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $CMP in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_CMP="$CMP" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_CMP="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_CMP" && ac_cv_path_CMP="cmp"
+  ;;
+esac
+fi
+CMP=$ac_cv_path_CMP
+if test -n "$CMP"; then
+  { echo "$as_me:$LINENO: result: $CMP" >&5
+echo "${ECHO_T}$CMP" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "cp", so it can be a program name with args.
+set dummy cp; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_CP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $CP in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_CP="$CP" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_CP="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_CP" && ac_cv_path_CP="cp"
+  ;;
+esac
+fi
+CP=$ac_cv_path_CP
+if test -n "$CP"; then
+  { echo "$as_me:$LINENO: result: $CP" >&5
+echo "${ECHO_T}$CP" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "date", so it can be a program name with args.
+set dummy date; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_DATE+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $DATE in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_DATE="$DATE" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_DATE="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_DATE" && ac_cv_path_DATE="date"
+  ;;
+esac
+fi
+DATE=$ac_cv_path_DATE
+if test -n "$DATE"; then
+  { echo "$as_me:$LINENO: result: $DATE" >&5
+echo "${ECHO_T}$DATE" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "find", so it can be a program name with args.
+set dummy find; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_FIND+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $FIND in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_FIND="$FIND" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_FIND="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_FIND" && ac_cv_path_FIND="find"
+  ;;
+esac
+fi
+FIND=$ac_cv_path_FIND
+if test -n "$FIND"; then
+  { echo "$as_me:$LINENO: result: $FIND" >&5
+echo "${ECHO_T}$FIND" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "grep", so it can be a program name with args.
+set dummy grep; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_GREP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $GREP in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_GREP="$GREP" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_GREP="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_GREP" && ac_cv_path_GREP="grep"
+  ;;
+esac
+fi
+GREP=$ac_cv_path_GREP
+if test -n "$GREP"; then
+  { echo "$as_me:$LINENO: result: $GREP" >&5
+echo "${ECHO_T}$GREP" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "mkdir", so it can be a program name with args.
+set dummy mkdir; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_MKDIR+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $MKDIR in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_MKDIR="$MKDIR" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_MKDIR="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_MKDIR" && ac_cv_path_MKDIR="mkdir"
+  ;;
+esac
+fi
+MKDIR=$ac_cv_path_MKDIR
+if test -n "$MKDIR"; then
+  { echo "$as_me:$LINENO: result: $MKDIR" >&5
+echo "${ECHO_T}$MKDIR" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "mv", so it can be a program name with args.
+set dummy mv; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_MV+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $MV in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_MV="$MV" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_MV="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_MV" && ac_cv_path_MV="mv"
+  ;;
+esac
+fi
+MV=$ac_cv_path_MV
+if test -n "$MV"; then
+  { echo "$as_me:$LINENO: result: $MV" >&5
+echo "${ECHO_T}$MV" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ranlib; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_RANLIB+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$RANLIB"; then
+  ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+RANLIB=$ac_cv_prog_RANLIB
+if test -n "$RANLIB"; then
+  { echo "$as_me:$LINENO: result: $RANLIB" >&5
+echo "${ECHO_T}$RANLIB" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_RANLIB"; then
+  ac_ct_RANLIB=$RANLIB
+  # Extract the first word of "ranlib", so it can be a program name with args.
+set dummy ranlib; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$ac_ct_RANLIB"; then
+  ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_RANLIB="ranlib"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB
+if test -n "$ac_ct_RANLIB"; then
+  { echo "$as_me:$LINENO: result: $ac_ct_RANLIB" >&5
+echo "${ECHO_T}$ac_ct_RANLIB" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+  if test "x$ac_ct_RANLIB" = x; then
+    RANLIB=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&5
+echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&2;}
+ac_tool_warned=yes ;;
+esac
+    RANLIB=$ac_ct_RANLIB
+  fi
+else
+  RANLIB="$ac_cv_prog_RANLIB"
+fi
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}ar", so it can be a program name with args.
+set dummy ${ac_tool_prefix}ar; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_AR+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$AR"; then
+  ac_cv_prog_AR="$AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_AR="${ac_tool_prefix}ar"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+AR=$ac_cv_prog_AR
+if test -n "$AR"; then
+  { echo "$as_me:$LINENO: result: $AR" >&5
+echo "${ECHO_T}$AR" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_AR"; then
+  ac_ct_AR=$AR
+  # Extract the first word of "ar", so it can be a program name with args.
+set dummy ar; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_prog_ac_ct_AR+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$ac_ct_AR"; then
+  ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_prog_ac_ct_AR="ar"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_AR=$ac_cv_prog_ac_ct_AR
+if test -n "$ac_ct_AR"; then
+  { echo "$as_me:$LINENO: result: $ac_ct_AR" >&5
+echo "${ECHO_T}$ac_ct_AR" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+  if test "x$ac_ct_AR" = x; then
+    AR="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ echo "$as_me:$LINENO: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&5
+echo "$as_me: WARNING: In the future, Autoconf will not detect cross-tools
+whose name does not start with the host triplet.  If you think this
+configuration is useful to you, please write to autoconf@gnu.org." >&2;}
+ac_tool_warned=yes ;;
+esac
+    AR=$ac_ct_AR
+  fi
+else
+  AR="$ac_cv_prog_AR"
+fi
+
+# Extract the first word of "rm", so it can be a program name with args.
+set dummy rm; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_RM+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $RM in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_RM="$RM" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_RM="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_RM" && ac_cv_path_RM="rm"
+  ;;
+esac
+fi
+RM=$ac_cv_path_RM
+if test -n "$RM"; then
+  { echo "$as_me:$LINENO: result: $RM" >&5
+echo "${ECHO_T}$RM" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "sed", so it can be a program name with args.
+set dummy sed; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_SED+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $SED in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_SED="$SED" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_SED="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_SED" && ac_cv_path_SED="sed"
+  ;;
+esac
+fi
+SED=$ac_cv_path_SED
+if test -n "$SED"; then
+  { echo "$as_me:$LINENO: result: $SED" >&5
+echo "${ECHO_T}$SED" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "tar", so it can be a program name with args.
+set dummy tar; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_TAR+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $TAR in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_TAR="$TAR" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_TAR="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_TAR" && ac_cv_path_TAR="gtar"
+  ;;
+esac
+fi
+TAR=$ac_cv_path_TAR
+if test -n "$TAR"; then
+  { echo "$as_me:$LINENO: result: $TAR" >&5
+echo "${ECHO_T}$TAR" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "pwd", so it can be a program name with args.
+set dummy pwd; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_BINPWD+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $BINPWD in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_BINPWD="$BINPWD" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_BINPWD="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_BINPWD" && ac_cv_path_BINPWD="pwd"
+  ;;
+esac
+fi
+BINPWD=$ac_cv_path_BINPWD
+if test -n "$BINPWD"; then
+  { echo "$as_me:$LINENO: result: $BINPWD" >&5
+echo "${ECHO_T}$BINPWD" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+
+# Extract the first word of "Graphviz", so it can be a program name with args.
+set dummy Graphviz; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_GRAPHVIZ+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $GRAPHVIZ in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_GRAPHVIZ="$GRAPHVIZ" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_GRAPHVIZ="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_GRAPHVIZ" && ac_cv_path_GRAPHVIZ="echo Graphviz"
+  ;;
+esac
+fi
+GRAPHVIZ=$ac_cv_path_GRAPHVIZ
+if test -n "$GRAPHVIZ"; then
+  { echo "$as_me:$LINENO: result: $GRAPHVIZ" >&5
+echo "${ECHO_T}$GRAPHVIZ" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$GRAPHVIZ" != "echo Graphviz" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_GRAPHVIZ 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    GRAPHVIZ=`echo $GRAPHVIZ | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_GRAPHVIZ "$GRAPHVIZ${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "dot", so it can be a program name with args.
+set dummy dot; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_DOT+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $DOT in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_DOT="$DOT" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_DOT="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_DOT" && ac_cv_path_DOT="echo dot"
+  ;;
+esac
+fi
+DOT=$ac_cv_path_DOT
+if test -n "$DOT"; then
+  { echo "$as_me:$LINENO: result: $DOT" >&5
+echo "${ECHO_T}$DOT" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$DOT" != "echo dot" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_DOT 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    DOT=`echo $DOT | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_DOT "$DOT${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "fdp", so it can be a program name with args.
+set dummy fdp; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_FDP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $FDP in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_FDP="$FDP" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_FDP="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_FDP" && ac_cv_path_FDP="echo fdp"
+  ;;
+esac
+fi
+FDP=$ac_cv_path_FDP
+if test -n "$FDP"; then
+  { echo "$as_me:$LINENO: result: $FDP" >&5
+echo "${ECHO_T}$FDP" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$FDP" != "echo fdp" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_FDP 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    FDP=`echo $FDP | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_FDP "$FDP${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "neato", so it can be a program name with args.
+set dummy neato; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_NEATO+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $NEATO in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_NEATO="$NEATO" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_NEATO="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_NEATO" && ac_cv_path_NEATO="echo neato"
+  ;;
+esac
+fi
+NEATO=$ac_cv_path_NEATO
+if test -n "$NEATO"; then
+  { echo "$as_me:$LINENO: result: $NEATO" >&5
+echo "${ECHO_T}$NEATO" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$NEATO" != "echo neato" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_NEATO 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    NEATO=`echo $NEATO | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_NEATO "$NEATO${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "twopi", so it can be a program name with args.
+set dummy twopi; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_TWOPI+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $TWOPI in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_TWOPI="$TWOPI" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_TWOPI="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_TWOPI" && ac_cv_path_TWOPI="echo twopi"
+  ;;
+esac
+fi
+TWOPI=$ac_cv_path_TWOPI
+if test -n "$TWOPI"; then
+  { echo "$as_me:$LINENO: result: $TWOPI" >&5
+echo "${ECHO_T}$TWOPI" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$TWOPI" != "echo twopi" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_TWOPI 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    TWOPI=`echo $TWOPI | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_TWOPI "$TWOPI${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "circo", so it can be a program name with args.
+set dummy circo; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_CIRCO+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $CIRCO in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_CIRCO="$CIRCO" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_CIRCO="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_CIRCO" && ac_cv_path_CIRCO="echo circo"
+  ;;
+esac
+fi
+CIRCO=$ac_cv_path_CIRCO
+if test -n "$CIRCO"; then
+  { echo "$as_me:$LINENO: result: $CIRCO" >&5
+echo "${ECHO_T}$CIRCO" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$CIRCO" != "echo circo" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_CIRCO 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    CIRCO=`echo $CIRCO | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_CIRCO "$CIRCO${EXEEXT}"
+_ACEOF
+
+fi
+for ac_prog in gv gsview32
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_GV+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $GV in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_GV="$GV" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_GV="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+GV=$ac_cv_path_GV
+if test -n "$GV"; then
+  { echo "$as_me:$LINENO: result: $GV" >&5
+echo "${ECHO_T}$GV" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$GV" && break
+done
+test -n "$GV" || GV="echo gv"
+
+if test "$GV" != "echo gv" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_GV 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    GV=`echo $GV | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_GV "$GV${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "dotty", so it can be a program name with args.
+set dummy dotty; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_DOTTY+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $DOTTY in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_DOTTY="$DOTTY" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_DOTTY="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_DOTTY" && ac_cv_path_DOTTY="echo dotty"
+  ;;
+esac
+fi
+DOTTY=$ac_cv_path_DOTTY
+if test -n "$DOTTY"; then
+  { echo "$as_me:$LINENO: result: $DOTTY" >&5
+echo "${ECHO_T}$DOTTY" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$DOTTY" != "echo dotty" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_DOTTY 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    DOTTY=`echo $DOTTY | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_DOTTY "$DOTTY${EXEEXT}"
+_ACEOF
+
+fi
+# Extract the first word of "xdot.py", so it can be a program name with args.
+set dummy xdot.py; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_XDOT_PY+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $XDOT_PY in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_XDOT_PY="$XDOT_PY" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_XDOT_PY="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_XDOT_PY" && ac_cv_path_XDOT_PY="echo xdot.py"
+  ;;
+esac
+fi
+XDOT_PY=$ac_cv_path_XDOT_PY
+if test -n "$XDOT_PY"; then
+  { echo "$as_me:$LINENO: result: $XDOT_PY" >&5
+echo "${ECHO_T}$XDOT_PY" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$XDOT_PY" != "echo xdot.py" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_XDOT_PY 1
+_ACEOF
+
+    if test "$llvm_cv_os_type" = "MingW" ; then
+    XDOT_PY=`echo $XDOT_PY | sed 's/^\/\([A-Za-z]\)\//\1:\//' `
+  fi
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PATH_XDOT_PY "$XDOT_PY${EXEEXT}"
+_ACEOF
+
+fi
+
+
+# Extract the first word of "perl", so it can be a program name with args.
+set dummy perl; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_PERL+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $PERL in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_PERL="$PERL" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_PERL="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_PERL" && ac_cv_path_PERL="none"
+  ;;
+esac
+fi
+PERL=$ac_cv_path_PERL
+if test -n "$PERL"; then
+  { echo "$as_me:$LINENO: result: $PERL" >&5
+echo "${ECHO_T}$PERL" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+if test "$PERL" != "none"; then
+  { echo "$as_me:$LINENO: checking for Perl 5.006 or newer" >&5
+echo $ECHO_N "checking for Perl 5.006 or newer... $ECHO_C" >&6; }
+  if $PERL -e 'use 5.006;' 2>&1 > /dev/null; then
+    { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+  else
+    PERL=none
+    { echo "$as_me:$LINENO: result: not found" >&5
+echo "${ECHO_T}not found" >&6; }
+  fi
+fi
+
+
+if test x"$PERL" = xnone; then
+   HAVE_PERL=0
+
+   { { echo "$as_me:$LINENO: error: perl is required but was not found, please install it" >&5
+echo "$as_me: error: perl is required but was not found, please install it" >&2;}
+   { (exit 1); exit 1; }; }
+else
+   HAVE_PERL=1
+
+fi
+
+# Find a good install program.  We prefer a C program (faster),
+# so one script is as good as another.  But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AmigaOS /C/install, which installs bootblocks on floppy discs
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# OS/2's system install, which has a completely different semantic
+# ./install, which can be erroneously created by make from ./install.sh.
+{ echo "$as_me:$LINENO: checking for a BSD-compatible install" >&5
+echo $ECHO_N "checking for a BSD-compatible install... $ECHO_C" >&6; }
+if test -z "$INSTALL"; then
+if test "${ac_cv_path_install+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  # Account for people who put trailing slashes in PATH elements.
+case $as_dir/ in
+  ./ | .// | /cC/* | \
+  /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \
+  ?:\\/os2\\/install\\/* | ?:\\/OS2\\/INSTALL\\/* | \
+  /usr/ucb/* ) ;;
+  *)
+    # OSF1 and SCO ODT 3.0 have their own names for install.
+    # Don't use installbsd from OSF since it installs stuff as root
+    # by default.
+    for ac_prog in ginstall scoinst install; do
+      for ac_exec_ext in '' $ac_executable_extensions; do
+	if { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_executable_p "$as_dir/$ac_prog$ac_exec_ext"; }; then
+	  if test $ac_prog = install &&
+	    grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+	    # AIX install.  It has an incompatible calling convention.
+	    :
+	  elif test $ac_prog = install &&
+	    grep pwplus "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+	    # program-specific install script used by HP pwplus--don't use.
+	    :
+	  else
+	    ac_cv_path_install="$as_dir/$ac_prog$ac_exec_ext -c"
+	    break 3
+	  fi
+	fi
+      done
+    done
+    ;;
+esac
+done
+IFS=$as_save_IFS
+
+
+fi
+  if test "${ac_cv_path_install+set}" = set; then
+    INSTALL=$ac_cv_path_install
+  else
+    # As a last resort, use the slow shell script.  Don't cache a
+    # value for INSTALL within a source directory, because that will
+    # break other packages using the cache if that directory is
+    # removed, or if the value is a relative name.
+    INSTALL=$ac_install_sh
+  fi
+fi
+{ echo "$as_me:$LINENO: result: $INSTALL" >&5
+echo "${ECHO_T}$INSTALL" >&6; }
+
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
+
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+case "$INSTALL" in
+ [\\/$]* | ?:[\\/]* ) ;;
+ *)  INSTALL="\\\$(TOPSRCDIR)/$INSTALL" ;;
+esac
+
+# Extract the first word of "bzip2", so it can be a program name with args.
+set dummy bzip2; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_BZIP2+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $BZIP2 in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_BZIP2="$BZIP2" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_BZIP2="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+BZIP2=$ac_cv_path_BZIP2
+if test -n "$BZIP2"; then
+  { echo "$as_me:$LINENO: result: $BZIP2" >&5
+echo "${ECHO_T}$BZIP2" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "cat", so it can be a program name with args.
+set dummy cat; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_CAT+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $CAT in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_CAT="$CAT" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_CAT="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+CAT=$ac_cv_path_CAT
+if test -n "$CAT"; then
+  { echo "$as_me:$LINENO: result: $CAT" >&5
+echo "${ECHO_T}$CAT" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "doxygen", so it can be a program name with args.
+set dummy doxygen; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_DOXYGEN+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $DOXYGEN in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_DOXYGEN="$DOXYGEN" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_DOXYGEN="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+DOXYGEN=$ac_cv_path_DOXYGEN
+if test -n "$DOXYGEN"; then
+  { echo "$as_me:$LINENO: result: $DOXYGEN" >&5
+echo "${ECHO_T}$DOXYGEN" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "groff", so it can be a program name with args.
+set dummy groff; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_GROFF+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $GROFF in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_GROFF="$GROFF" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_GROFF="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+GROFF=$ac_cv_path_GROFF
+if test -n "$GROFF"; then
+  { echo "$as_me:$LINENO: result: $GROFF" >&5
+echo "${ECHO_T}$GROFF" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "gzip", so it can be a program name with args.
+set dummy gzip; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_GZIPBIN+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $GZIPBIN in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_GZIPBIN="$GZIPBIN" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_GZIPBIN="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+GZIPBIN=$ac_cv_path_GZIPBIN
+if test -n "$GZIPBIN"; then
+  { echo "$as_me:$LINENO: result: $GZIPBIN" >&5
+echo "${ECHO_T}$GZIPBIN" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "pod2html", so it can be a program name with args.
+set dummy pod2html; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_POD2HTML+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $POD2HTML in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_POD2HTML="$POD2HTML" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_POD2HTML="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+POD2HTML=$ac_cv_path_POD2HTML
+if test -n "$POD2HTML"; then
+  { echo "$as_me:$LINENO: result: $POD2HTML" >&5
+echo "${ECHO_T}$POD2HTML" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "pod2man", so it can be a program name with args.
+set dummy pod2man; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_POD2MAN+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $POD2MAN in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_POD2MAN="$POD2MAN" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_POD2MAN="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+POD2MAN=$ac_cv_path_POD2MAN
+if test -n "$POD2MAN"; then
+  { echo "$as_me:$LINENO: result: $POD2MAN" >&5
+echo "${ECHO_T}$POD2MAN" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "pdfroff", so it can be a program name with args.
+set dummy pdfroff; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_PDFROFF+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $PDFROFF in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_PDFROFF="$PDFROFF" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_PDFROFF="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+PDFROFF=$ac_cv_path_PDFROFF
+if test -n "$PDFROFF"; then
+  { echo "$as_me:$LINENO: result: $PDFROFF" >&5
+echo "${ECHO_T}$PDFROFF" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+# Extract the first word of "runtest", so it can be a program name with args.
+set dummy runtest; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_RUNTEST+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $RUNTEST in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_RUNTEST="$RUNTEST" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_RUNTEST="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+RUNTEST=$ac_cv_path_RUNTEST
+if test -n "$RUNTEST"; then
+  { echo "$as_me:$LINENO: result: $RUNTEST" >&5
+echo "${ECHO_T}$RUNTEST" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+
+no_itcl=true
+{ echo "$as_me:$LINENO: checking for the tclsh program in tclinclude directory" >&5
+echo $ECHO_N "checking for the tclsh program in tclinclude directory... $ECHO_C" >&6; }
+
+# Check whether --with-tclinclude was given.
+if test "${with_tclinclude+set}" = set; then
+  withval=$with_tclinclude; with_tclinclude=${withval}
+else
+  with_tclinclude=''
+fi
+
+if test "${ac_cv_path_tclsh+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+
+if test x"${with_tclinclude}" != x ; then
+  if test -f ${with_tclinclude}/tclsh ; then
+    ac_cv_path_tclsh=`(cd ${with_tclinclude}; pwd)`
+  elif test -f ${with_tclinclude}/src/tclsh ; then
+    ac_cv_path_tclsh=`(cd ${with_tclinclude}/src; pwd)`
+  else
+    { { echo "$as_me:$LINENO: error: ${with_tclinclude} directory doesn't contain tclsh" >&5
+echo "$as_me: error: ${with_tclinclude} directory doesn't contain tclsh" >&2;}
+   { (exit 1); exit 1; }; }
+  fi
+fi
+fi
+
+
+if test x"${ac_cv_path_tclsh}" = x ; then
+  { echo "$as_me:$LINENO: result: none" >&5
+echo "${ECHO_T}none" >&6; }
+  for ac_prog in tclsh8.4 tclsh8.4.8 tclsh8.4.7 tclsh8.4.6 tclsh8.4.5 tclsh8.4.4 tclsh8.4.3 tclsh8.4.2 tclsh8.4.1 tclsh8.4.0 tclsh8.3 tclsh8.3.5 tclsh8.3.4 tclsh8.3.3 tclsh8.3.2 tclsh8.3.1 tclsh8.3.0 tclsh
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_TCLSH+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $TCLSH in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_TCLSH="$TCLSH" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_TCLSH="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+TCLSH=$ac_cv_path_TCLSH
+if test -n "$TCLSH"; then
+  { echo "$as_me:$LINENO: result: $TCLSH" >&5
+echo "${ECHO_T}$TCLSH" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$TCLSH" && break
+done
+
+  if test x"${TCLSH}" = x ; then
+    ac_cv_path_tclsh='';
+  else
+    ac_cv_path_tclsh="${TCLSH}";
+  fi
+else
+  { echo "$as_me:$LINENO: result: ${ac_cv_path_tclsh}" >&5
+echo "${ECHO_T}${ac_cv_path_tclsh}" >&6; }
+  TCLSH="${ac_cv_path_tclsh}"
+
+fi
+
+# Extract the first word of "zip", so it can be a program name with args.
+set dummy zip; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_ZIP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $ZIP in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_ZIP="$ZIP" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_ZIP="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+ZIP=$ac_cv_path_ZIP
+if test -n "$ZIP"; then
+  { echo "$as_me:$LINENO: result: $ZIP" >&5
+echo "${ECHO_T}$ZIP" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+for ac_prog in ocamlc
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_OCAMLC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $OCAMLC in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_OCAMLC="$OCAMLC" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_OCAMLC="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+OCAMLC=$ac_cv_path_OCAMLC
+if test -n "$OCAMLC"; then
+  { echo "$as_me:$LINENO: result: $OCAMLC" >&5
+echo "${ECHO_T}$OCAMLC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$OCAMLC" && break
+done
+
+for ac_prog in ocamlopt
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_OCAMLOPT+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $OCAMLOPT in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_OCAMLOPT="$OCAMLOPT" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_OCAMLOPT="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+OCAMLOPT=$ac_cv_path_OCAMLOPT
+if test -n "$OCAMLOPT"; then
+  { echo "$as_me:$LINENO: result: $OCAMLOPT" >&5
+echo "${ECHO_T}$OCAMLOPT" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$OCAMLOPT" && break
+done
+
+for ac_prog in ocamldep
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_OCAMLDEP+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $OCAMLDEP in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_OCAMLDEP="$OCAMLDEP" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_OCAMLDEP="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+OCAMLDEP=$ac_cv_path_OCAMLDEP
+if test -n "$OCAMLDEP"; then
+  { echo "$as_me:$LINENO: result: $OCAMLDEP" >&5
+echo "${ECHO_T}$OCAMLDEP" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$OCAMLDEP" && break
+done
+
+for ac_prog in ocamldoc
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_OCAMLDOC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $OCAMLDOC in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_OCAMLDOC="$OCAMLDOC" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_OCAMLDOC="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+OCAMLDOC=$ac_cv_path_OCAMLDOC
+if test -n "$OCAMLDOC"; then
+  { echo "$as_me:$LINENO: result: $OCAMLDOC" >&5
+echo "${ECHO_T}$OCAMLDOC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$OCAMLDOC" && break
+done
+
+for ac_prog in gas as
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with args.
+set dummy $ac_prog; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_GAS+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $GAS in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_GAS="$GAS" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_GAS="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+GAS=$ac_cv_path_GAS
+if test -n "$GAS"; then
+  { echo "$as_me:$LINENO: result: $GAS" >&5
+echo "${ECHO_T}$GAS" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  test -n "$GAS" && break
+done
+
+
+{ echo "$as_me:$LINENO: checking for linker version" >&5
+echo $ECHO_N "checking for linker version... $ECHO_C" >&6; }
+if test "${llvm_cv_link_version+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+
+   version_string="$(ld -v 2>&1 | head -1)"
+
+   # Check for ld64.
+   if (echo "$version_string" | grep -q "ld64"); then
+     llvm_cv_link_version=$(echo "$version_string" | sed -e "s#.*ld64-\([^ ]*\)#\1#")
+   else
+     llvm_cv_link_version=$(echo "$version_string" | sed -e "s#[^0-9]*\([0-9.]*\).*#\1#")
+   fi
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_link_version" >&5
+echo "${ECHO_T}$llvm_cv_link_version" >&6; }
+
+cat >>confdefs.h <<_ACEOF
+#define HOST_LINK_VERSION "$llvm_cv_link_version"
+_ACEOF
+
+
+
+{ echo "$as_me:$LINENO: checking for compiler -Wl,-R<path> option" >&5
+echo $ECHO_N "checking for compiler -Wl,-R<path> option... $ECHO_C" >&6; }
+if test "${llvm_cv_link_use_r+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  oldcflags="$CFLAGS"
+  CFLAGS="$CFLAGS -Wl,-R."
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  llvm_cv_link_use_r=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	llvm_cv_link_use_r=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+  CFLAGS="$oldcflags"
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_link_use_r" >&5
+echo "${ECHO_T}$llvm_cv_link_use_r" >&6; }
+if test "$llvm_cv_link_use_r" = yes ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_LINK_R 1
+_ACEOF
+
+  fi
+
+
+{ echo "$as_me:$LINENO: checking for compiler -Wl,-export-dynamic option" >&5
+echo $ECHO_N "checking for compiler -Wl,-export-dynamic option... $ECHO_C" >&6; }
+if test "${llvm_cv_link_use_export_dynamic+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  oldcflags="$CFLAGS"
+  CFLAGS="$CFLAGS -Wl,-export-dynamic"
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  llvm_cv_link_use_export_dynamic=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	llvm_cv_link_use_export_dynamic=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+  CFLAGS="$oldcflags"
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_link_use_export_dynamic" >&5
+echo "${ECHO_T}$llvm_cv_link_use_export_dynamic" >&6; }
+if test "$llvm_cv_link_use_export_dynamic" = yes ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_LINK_EXPORT_DYNAMIC 1
+_ACEOF
+
+  fi
+
+
+{ echo "$as_me:$LINENO: checking for compiler -Wl,--version-script option" >&5
+echo $ECHO_N "checking for compiler -Wl,--version-script option... $ECHO_C" >&6; }
+if test "${llvm_cv_link_use_version_script+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  oldcflags="$CFLAGS"
+
+  # The following code is from the autoconf manual,
+  # "11.13: Limitations of Usual Tools".
+  # Create a temporary directory $tmp in $TMPDIR (default /tmp).
+  # Use mktemp if possible; otherwise fall back on mkdir,
+  # with $RANDOM to make collisions less likely.
+  : ${TMPDIR=/tmp}
+  {
+    tmp=`
+      (umask 077 && mktemp -d "$TMPDIR/fooXXXXXX") 2>/dev/null
+    ` &&
+    test -n "$tmp" && test -d "$tmp"
+  } || {
+    tmp=$TMPDIR/foo$$-$RANDOM
+    (umask 077 && mkdir "$tmp")
+  } || exit $?
+
+  echo "{" > "$tmp/export.map"
+  echo "  global: main;" >> "$tmp/export.map"
+  echo "  local: *;" >> "$tmp/export.map"
+  echo "};" >> "$tmp/export.map"
+
+  CFLAGS="$CFLAGS -Wl,--version-script=$tmp/export.map"
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  llvm_cv_link_use_version_script=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	llvm_cv_link_use_version_script=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+  rm "$tmp/export.map"
+  rmdir "$tmp"
+  CFLAGS="$oldcflags"
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_link_use_version_script" >&5
+echo "${ECHO_T}$llvm_cv_link_use_version_script" >&6; }
+if test "$llvm_cv_link_use_version_script" = yes ; then
+  HAVE_LINK_VERSION_SCRIPT=1
+
+  fi
+
+
+
+
+{ echo "$as_me:$LINENO: checking for an ANSI C-conforming const" >&5
+echo $ECHO_N "checking for an ANSI C-conforming const... $ECHO_C" >&6; }
+if test "${ac_cv_c_const+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+/* FIXME: Include the comments suggested by Paul. */
+#ifndef __cplusplus
+  /* Ultrix mips cc rejects this.  */
+  typedef int charset[2];
+  const charset x;
+  /* SunOS 4.1.1 cc rejects this.  */
+  char const *const *ccp;
+  char **p;
+  /* NEC SVR4.0.2 mips cc rejects this.  */
+  struct point {int x, y;};
+  static struct point const zero = {0,0};
+  /* AIX XL C 1.02.0.0 rejects this.
+     It does not let you subtract one const X* pointer from another in
+     an arm of an if-expression whose if-part is not a constant
+     expression */
+  const char *g = "string";
+  ccp = &g + (g ? g-g : 0);
+  /* HPUX 7.0 cc rejects these. */
+  ++ccp;
+  p = (char**) ccp;
+  ccp = (char const *const *) p;
+  { /* SCO 3.2v4 cc rejects this.  */
+    char *t;
+    char const *s = 0 ? (char *) 0 : (char const *) 0;
+
+    *t++ = 0;
+    if (s) return 0;
+  }
+  { /* Someone thinks the Sun supposedly-ANSI compiler will reject this.  */
+    int x[] = {25, 17};
+    const int *foo = &x[0];
+    ++foo;
+  }
+  { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */
+    typedef const int *iptr;
+    iptr p = 0;
+    ++p;
+  }
+  { /* AIX XL C 1.02.0.0 rejects this saying
+       "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */
+    struct s { int j; const int *ap[3]; };
+    struct s *b; b->j = 5;
+  }
+  { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */
+    const int foo = 10;
+    if (!foo) return 0;
+  }
+  return !x[0] && !zero.x;
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_c_const=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_c_const=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_c_const" >&5
+echo "${ECHO_T}$ac_cv_c_const" >&6; }
+if test $ac_cv_c_const = no; then
+
+cat >>confdefs.h <<\_ACEOF
+#define const
+_ACEOF
+
+fi
+
+
+
+
+
+
+ac_header_dirent=no
+for ac_hdr in dirent.h sys/ndir.h sys/dir.h ndir.h; do
+  as_ac_Header=`echo "ac_cv_header_dirent_$ac_hdr" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_hdr that defines DIR" >&5
+echo $ECHO_N "checking for $ac_hdr that defines DIR... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <$ac_hdr>
+
+int
+main ()
+{
+if ((DIR *) 0)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_Header=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_Header=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_hdr" | $as_tr_cpp` 1
+_ACEOF
+
+ac_header_dirent=$ac_hdr; break
+fi
+
+done
+# Two versions of opendir et al. are in -ldir and -lx on SCO Xenix.
+if test $ac_header_dirent = dirent.h; then
+  { echo "$as_me:$LINENO: checking for library containing opendir" >&5
+echo $ECHO_N "checking for library containing opendir... $ECHO_C" >&6; }
+if test "${ac_cv_search_opendir+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char opendir ();
+int
+main ()
+{
+return opendir ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' dir; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_opendir=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_opendir+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_opendir+set}" = set; then
+  :
+else
+  ac_cv_search_opendir=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_opendir" >&5
+echo "${ECHO_T}$ac_cv_search_opendir" >&6; }
+ac_res=$ac_cv_search_opendir
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+else
+  { echo "$as_me:$LINENO: checking for library containing opendir" >&5
+echo $ECHO_N "checking for library containing opendir... $ECHO_C" >&6; }
+if test "${ac_cv_search_opendir+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char opendir ();
+int
+main ()
+{
+return opendir ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' x; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_opendir=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_opendir+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_opendir+set}" = set; then
+  :
+else
+  ac_cv_search_opendir=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_opendir" >&5
+echo "${ECHO_T}$ac_cv_search_opendir" >&6; }
+ac_res=$ac_cv_search_opendir
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+fi
+
+
+for ac_header in dlfcn.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+# Check whether --enable-ltdl-install was given.
+if test "${enable_ltdl_install+set}" = set; then
+  enableval=$enable_ltdl_install;
+fi
+
+
+
+
+if test x"${enable_ltdl_install-no}" != xno; then
+  INSTALL_LTDL_TRUE=
+  INSTALL_LTDL_FALSE='#'
+else
+  INSTALL_LTDL_TRUE='#'
+  INSTALL_LTDL_FALSE=
+fi
+
+
+
+if test x"${enable_ltdl_convenience-no}" != xno; then
+  CONVENIENCE_LTDL_TRUE=
+  CONVENIENCE_LTDL_FALSE='#'
+else
+  CONVENIENCE_LTDL_TRUE='#'
+  CONVENIENCE_LTDL_FALSE=
+fi
+
+
+{ echo "$as_me:$LINENO: checking dynamic linker characteristics" >&5
+echo $ECHO_N "checking dynamic linker characteristics... $ECHO_C" >&6; }
+library_names_spec=
+libname_spec='lib$name'
+soname_spec=
+shrext_cmds=".so"
+postinstall_cmds=
+postuninstall_cmds=
+finish_cmds=
+finish_eval=
+shlibpath_var=
+shlibpath_overrides_runpath=unknown
+version_type=none
+dynamic_linker="$host_os ld.so"
+sys_lib_dlsearch_path_spec="/lib /usr/lib"
+if test "$GCC" = yes; then
+  sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"`
+  if echo "$sys_lib_search_path_spec" | grep ';' >/dev/null ; then
+    # if the path contains ";" then we assume it to be the separator
+    # otherwise default to the standard path separator (i.e. ":") - it is
+    # assumed that no part of a normal pathname contains ";" but that should
+    # okay in the real world where ";" in dirpaths is itself problematic.
+    sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
+  else
+    sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED  -e "s/$PATH_SEPARATOR/ /g"`
+  fi
+else
+  sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
+fi
+need_lib_prefix=unknown
+hardcode_into_libs=no
+
+# when you set need_version to no, make sure it does not cause -set_version
+# flags to be left without arguments
+need_version=unknown
+
+case $host_os in
+aix3*)
+  version_type=linux
+  library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
+  shlibpath_var=LIBPATH
+
+  # AIX 3 has no versioning support, so we append a major version to the name.
+  soname_spec='${libname}${release}${shared_ext}$major'
+  ;;
+
+aix4* | aix5*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  hardcode_into_libs=yes
+  if test "$host_cpu" = ia64; then
+    # AIX 5 supports IA64
+    library_names_spec='${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext}$versuffix $libname${shared_ext}'
+    shlibpath_var=LD_LIBRARY_PATH
+  else
+    # With GCC up to 2.95.x, collect2 would create an import file
+    # for dependence libraries.  The import file would start with
+    # the line `#! .'.  This would cause the generated library to
+    # depend on `.', always an invalid library.  This was fixed in
+    # development snapshots of GCC prior to 3.0.
+    case $host_os in
+      aix4 | aix4.[01] | aix4.[01].*)
+      if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
+	   echo ' yes '
+	   echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then
+	:
+      else
+	can_build_shared=no
+      fi
+      ;;
+    esac
+    # AIX (on Power*) has no versioning support, so currently we can not hardcode correct
+    # soname into executable. Probably we can add versioning support to
+    # collect2, so additional links can be useful in future.
+    if test "$aix_use_runtimelinking" = yes; then
+      # If using run time linking (on AIX 4.2 or later) use lib<name>.so
+      # instead of lib<name>.a to let people know that these are not
+      # typical AIX shared libraries.
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    else
+      # We preserve .a as extension for shared libraries through AIX4.2
+      # and later when we are not doing run time linking.
+      library_names_spec='${libname}${release}.a $libname.a'
+      soname_spec='${libname}${release}${shared_ext}$major'
+    fi
+    shlibpath_var=LIBPATH
+  fi
+  ;;
+
+amigaos*)
+  library_names_spec='$libname.ixlibrary $libname.a'
+  # Create ${libname}_ixlibrary.a entries in /sys/libs.
+  finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | $Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || exit 1; done'
+  ;;
+
+beos*)
+  library_names_spec='${libname}${shared_ext}'
+  dynamic_linker="$host_os ld.so"
+  shlibpath_var=LIBRARY_PATH
+  ;;
+
+bsdi[45]*)
+  version_type=linux
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib"
+  sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
+  # the default ld.so.conf also contains /usr/contrib/lib and
+  # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
+  # libtool to hard-code these into programs
+  ;;
+
+cygwin* | mingw* | pw32*)
+  version_type=windows
+  shrext_cmds=".dll"
+  need_version=no
+  need_lib_prefix=no
+
+  case $GCC,$host_os in
+  yes,cygwin* | yes,mingw* | yes,pw32*)
+    library_names_spec='$libname.dll.a'
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i;echo \$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname~
+      chmod a+x \$dldir/$dlname'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $rm \$dlpath'
+    shlibpath_overrides_runpath=yes
+
+    case $host_os in
+    cygwin*)
+      # Cygwin DLLs use 'cyg' prefix rather than 'lib'
+      soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      sys_lib_search_path_spec="/usr/lib /lib/w32api /lib /usr/local/lib"
+      ;;
+    mingw*)
+      # MinGW DLLs use traditional 'lib' prefix
+      soname_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | $SED -e "s/^libraries://" -e "s,=/,/,g"`
+      if echo "$sys_lib_search_path_spec" | grep ';[c-zC-Z]:/' >/dev/null; then
+        # It is most probably a Windows format PATH printed by
+        # mingw gcc, but we are running on Cygwin. Gcc prints its search
+        # path with ; separators, and with drive letters. We can handle the
+        # drive letters (cygwin fileutils understands them), so leave them,
+        # especially as we might pass files found there to a mingw objdump,
+        # which wouldn't understand a cygwinified path. Ahh.
+        sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED -e 's/;/ /g'`
+      else
+        sys_lib_search_path_spec=`echo "$sys_lib_search_path_spec" | $SED  -e "s/$PATH_SEPARATOR/ /g"`
+      fi
+      ;;
+    pw32*)
+      # pw32 DLLs use 'pw' prefix rather than 'lib'
+      library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    esac
+    ;;
+
+  *)
+    library_names_spec='${libname}`echo ${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib'
+    ;;
+  esac
+  dynamic_linker='Win32 ld.exe'
+  # FIXME: first we should search . and the directory the executable is in
+  shlibpath_var=PATH
+  ;;
+
+darwin* | rhapsody*)
+  dynamic_linker="$host_os dyld"
+  version_type=darwin
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${versuffix}$shared_ext ${libname}${release}${major}$shared_ext ${libname}$shared_ext'
+  soname_spec='${libname}${release}${major}$shared_ext'
+  shlibpath_overrides_runpath=yes
+  shlibpath_var=DYLD_LIBRARY_PATH
+  shrext_cmds='.dylib'
+  # Apple's gcc prints 'gcc -print-search-dirs' doesn't operate the same.
+  if test "$GCC" = yes; then
+    sys_lib_search_path_spec=`$CC -print-search-dirs | tr "\n" "$PATH_SEPARATOR" | sed -e 's/libraries:/@libraries:/' | tr "@" "\n" | grep "^libraries:" | sed -e "s/^libraries://" -e "s,=/,/,g" -e "s,$PATH_SEPARATOR, ,g" -e "s,.*,& /lib /usr/lib /usr/local/lib,g"`
+  else
+    sys_lib_search_path_spec='/lib /usr/lib /usr/local/lib'
+  fi
+  sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
+  ;;
+
+dgux*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname$shared_ext'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+freebsd1*)
+  dynamic_linker=no
+  ;;
+
+kfreebsd*-gnu)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='GNU ld.so'
+  ;;
+
+freebsd* | dragonfly*)
+  # DragonFly does not have aout.  When/if they implement a new
+  # versioning mechanism, adjust this.
+  if test -x /usr/bin/objformat; then
+    objformat=`/usr/bin/objformat`
+  else
+    case $host_os in
+    freebsd[123]*) objformat=aout ;;
+    *) objformat=elf ;;
+    esac
+  fi
+  version_type=freebsd-$objformat
+  case $version_type in
+    freebsd-elf*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+      need_version=no
+      need_lib_prefix=no
+      ;;
+    freebsd-*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix $libname${shared_ext}$versuffix'
+      need_version=yes
+      ;;
+  esac
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_os in
+  freebsd2*)
+    shlibpath_overrides_runpath=yes
+    ;;
+  freebsd3.[01]* | freebsdelf3.[01]*)
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  freebsd3.[2-9]* | freebsdelf3.[2-9]* | \
+  freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1)
+    shlibpath_overrides_runpath=no
+    hardcode_into_libs=yes
+    ;;
+  freebsd*) # from 4.6 on
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  esac
+  ;;
+
+gnu*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  hardcode_into_libs=yes
+  ;;
+
+hpux9* | hpux10* | hpux11*)
+  # Give a soname corresponding to the major version so that dld.sl refuses to
+  # link against other versions.
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  case $host_cpu in
+  ia64*)
+    shrext_cmds='.so'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.so"
+    shlibpath_var=LD_LIBRARY_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    if test "X$HPUX_IA64_MODE" = X32; then
+      sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 /usr/local/lib"
+    else
+      sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64"
+    fi
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+   hppa*64*)
+     shrext_cmds='.sl'
+     hardcode_into_libs=yes
+     dynamic_linker="$host_os dld.sl"
+     shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH
+     shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+     library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+     soname_spec='${libname}${release}${shared_ext}$major'
+     sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64"
+     sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+     ;;
+   *)
+    shrext_cmds='.sl'
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=SHLIB_PATH
+    shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    ;;
+  esac
+  # HP-UX runs *really* slowly unless shared libraries are mode 555.
+  postinstall_cmds='chmod 555 $lib'
+  ;;
+
+interix3*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $host_os in
+    nonstopux*) version_type=nonstopux ;;
+    *)
+	if test "$lt_cv_prog_gnu_ld" = yes; then
+		version_type=linux
+	else
+		version_type=irix
+	fi ;;
+  esac
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} $libname${shared_ext}'
+  case $host_os in
+  irix5* | nonstopux*)
+    libsuff= shlibsuff=
+    ;;
+  *)
+    case $LD in # libtool.m4 will add one of these switches to LD
+    *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ")
+      libsuff= shlibsuff= libmagic=32-bit;;
+    *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ")
+      libsuff=32 shlibsuff=N32 libmagic=N32;;
+    *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ")
+      libsuff=64 shlibsuff=64 libmagic=64-bit;;
+    *) libsuff= shlibsuff= libmagic=never-match;;
+    esac
+    ;;
+  esac
+  shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
+  shlibpath_overrides_runpath=no
+  sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}"
+  sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
+  hardcode_into_libs=yes
+  ;;
+
+# No shared lib support for Linux oldld, aout, or coff.
+linux*oldld* | linux*aout* | linux*coff*)
+  dynamic_linker=no
+  ;;
+
+# This must be Linux ELF.
+linux*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  # This implies no fast_install, which is unacceptable.
+  # Some rework will be needed to allow for fast_install
+  # before this can be enabled.
+  hardcode_into_libs=yes
+
+  # Append ld.so.conf contents to the search path
+  if test -f /etc/ld.so.conf; then
+    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < /etc/ld.so.conf | $SED -e 's/#.*//;s/[:,	]/ /g;s/=[^=]*$//;s/=[^= ]* / /g;/^$/d' | tr '\n' ' '`
+    sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
+  fi
+
+  # We used to test for /lib/ld.so.1 and disable shared libraries on
+  # powerpc, because MkLinux only supported shared libraries with the
+  # GNU dynamic linker.  Since this was broken with cross compilers,
+  # most powerpc-linux boxes support dynamic linking these days and
+  # people can always --disable-shared, the test was removed, and we
+  # assume the GNU/Linux dynamic linker is in use.
+  dynamic_linker='GNU/Linux ld.so'
+  ;;
+
+knetbsd*-gnu)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='GNU ld.so'
+  ;;
+
+netbsd*)
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+    finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+    dynamic_linker='NetBSD (a.out) ld.so'
+  else
+    library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    dynamic_linker='NetBSD ld.elf_so'
+  fi
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  ;;
+
+newsos6)
+  version_type=linux
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+nto-qnx*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+openbsd*)
+  version_type=sunos
+  sys_lib_dlsearch_path_spec="/usr/lib"
+  need_lib_prefix=no
+  # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs.
+  case $host_os in
+    openbsd3.3 | openbsd3.3.*) need_version=yes ;;
+    *)                         need_version=no  ;;
+  esac
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  if test -z "`echo __ELF__ | $CC -E - | grep __ELF__`" || test "$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    case $host_os in
+      openbsd2.[89] | openbsd2.[89].*)
+	shlibpath_overrides_runpath=no
+	;;
+      *)
+	shlibpath_overrides_runpath=yes
+	;;
+      esac
+  else
+    shlibpath_overrides_runpath=yes
+  fi
+  ;;
+
+os2*)
+  libname_spec='$name'
+  shrext_cmds=".dll"
+  need_lib_prefix=no
+  library_names_spec='$libname${shared_ext} $libname.a'
+  dynamic_linker='OS/2 ld.exe'
+  shlibpath_var=LIBPATH
+  ;;
+
+osf3* | osf4* | osf5*)
+  version_type=osf
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib"
+  sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
+  ;;
+
+solaris*)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  # ldd complains unless libraries are executable
+  postinstall_cmds='chmod +x $lib'
+  ;;
+
+sunos4*)
+  version_type=sunos
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  if test "$with_gnu_ld" = yes; then
+    need_lib_prefix=no
+  fi
+  need_version=yes
+  ;;
+
+sysv4 | sysv4.3*)
+  version_type=linux
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_vendor in
+    sni)
+      shlibpath_overrides_runpath=no
+      need_lib_prefix=no
+      export_dynamic_flag_spec='${wl}-Blargedynsym'
+      runpath_var=LD_RUN_PATH
+      ;;
+    siemens)
+      need_lib_prefix=no
+      ;;
+    motorola)
+      need_lib_prefix=no
+      need_version=no
+      shlibpath_overrides_runpath=no
+      sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
+      ;;
+  esac
+  ;;
+
+sysv4*MP*)
+  if test -d /usr/nec ;then
+    version_type=linux
+    library_names_spec='$libname${shared_ext}.$versuffix $libname${shared_ext}.$major $libname${shared_ext}'
+    soname_spec='$libname${shared_ext}.$major'
+    shlibpath_var=LD_LIBRARY_PATH
+  fi
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  version_type=freebsd-elf
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext} $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  hardcode_into_libs=yes
+  if test "$with_gnu_ld" = yes; then
+    sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib /usr/lib /lib'
+    shlibpath_overrides_runpath=no
+  else
+    sys_lib_search_path_spec='/usr/ccs/lib /usr/lib'
+    shlibpath_overrides_runpath=yes
+    case $host_os in
+      sco3.2v5*)
+        sys_lib_search_path_spec="$sys_lib_search_path_spec /lib"
+	;;
+    esac
+  fi
+  sys_lib_dlsearch_path_spec='/usr/lib'
+  ;;
+
+uts4*)
+  version_type=linux
+  library_names_spec='${libname}${release}${shared_ext}$versuffix ${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+*)
+  dynamic_linker=no
+  ;;
+esac
+{ echo "$as_me:$LINENO: result: $dynamic_linker" >&5
+echo "${ECHO_T}$dynamic_linker" >&6; }
+test "$dynamic_linker" = no && can_build_shared=no
+
+variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
+if test "$GCC" = yes; then
+  variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH"
+fi
+
+
+{ echo "$as_me:$LINENO: checking which extension is used for loadable modules" >&5
+echo $ECHO_N "checking which extension is used for loadable modules... $ECHO_C" >&6; }
+if test "${libltdl_cv_shlibext+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+
+module=yes
+eval libltdl_cv_shlibext=$shrext_cmds
+
+fi
+{ echo "$as_me:$LINENO: result: $libltdl_cv_shlibext" >&5
+echo "${ECHO_T}$libltdl_cv_shlibext" >&6; }
+if test -n "$libltdl_cv_shlibext"; then
+
+cat >>confdefs.h <<_ACEOF
+#define LTDL_SHLIB_EXT "$libltdl_cv_shlibext"
+_ACEOF
+
+fi
+
+
+{ echo "$as_me:$LINENO: checking which variable specifies run-time library path" >&5
+echo $ECHO_N "checking which variable specifies run-time library path... $ECHO_C" >&6; }
+if test "${libltdl_cv_shlibpath_var+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  libltdl_cv_shlibpath_var="$shlibpath_var"
+fi
+{ echo "$as_me:$LINENO: result: $libltdl_cv_shlibpath_var" >&5
+echo "${ECHO_T}$libltdl_cv_shlibpath_var" >&6; }
+if test -n "$libltdl_cv_shlibpath_var"; then
+
+cat >>confdefs.h <<_ACEOF
+#define LTDL_SHLIBPATH_VAR "$libltdl_cv_shlibpath_var"
+_ACEOF
+
+fi
+
+
+{ echo "$as_me:$LINENO: checking for the default library search path" >&5
+echo $ECHO_N "checking for the default library search path... $ECHO_C" >&6; }
+if test "${libltdl_cv_sys_search_path+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  libltdl_cv_sys_search_path="$sys_lib_dlsearch_path_spec"
+fi
+{ echo "$as_me:$LINENO: result: $libltdl_cv_sys_search_path" >&5
+echo "${ECHO_T}$libltdl_cv_sys_search_path" >&6; }
+if test -n "$libltdl_cv_sys_search_path"; then
+  sys_search_path=
+  for dir in $libltdl_cv_sys_search_path; do
+    if test -z "$sys_search_path"; then
+      sys_search_path="$dir"
+    else
+      sys_search_path="$sys_search_path$PATH_SEPARATOR$dir"
+    fi
+  done
+
+cat >>confdefs.h <<_ACEOF
+#define LTDL_SYSSEARCHPATH "$sys_search_path"
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for objdir" >&5
+echo $ECHO_N "checking for objdir... $ECHO_C" >&6; }
+if test "${libltdl_cv_objdir+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  libltdl_cv_objdir="$objdir"
+  if test -n "$objdir"; then
+    :
+  else
+    rm -f .libs 2>/dev/null
+    mkdir .libs 2>/dev/null
+    if test -d .libs; then
+      libltdl_cv_objdir=.libs
+    else
+      # MS-DOS does not allow filenames that begin with a dot.
+      libltdl_cv_objdir=_libs
+    fi
+  rmdir .libs 2>/dev/null
+  fi
+
+fi
+{ echo "$as_me:$LINENO: result: $libltdl_cv_objdir" >&5
+echo "${ECHO_T}$libltdl_cv_objdir" >&6; }
+
+cat >>confdefs.h <<_ACEOF
+#define LTDL_OBJDIR "$libltdl_cv_objdir/"
+_ACEOF
+
+
+
+
+
+
+# Check for command to grab the raw symbol name followed by C symbol from nm.
+{ echo "$as_me:$LINENO: checking command to parse $NM output from $compiler object" >&5
+echo $ECHO_N "checking command to parse $NM output from $compiler object... $ECHO_C" >&6; }
+if test "${lt_cv_sys_global_symbol_pipe+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+
+# These are sane defaults that work on at least a few old systems.
+# [They come from Ultrix.  What could be older than Ultrix?!! ;)]
+
+# Character class describing NM global symbol codes.
+symcode='[BCDEGRST]'
+
+# Regexp to match symbols that can be accessed directly from C.
+sympat='\([_A-Za-z][_A-Za-z0-9]*\)'
+
+# Transform an extracted symbol line into a proper C declaration
+lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern int \1;/p'"
+
+# Transform an extracted symbol line into symbol name and symbol address
+lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode \([^ ]*\) \([^ ]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
+
+# Define system-specific variables.
+case $host_os in
+aix*)
+  symcode='[BCDT]'
+  ;;
+cygwin* | mingw* | pw32*)
+  symcode='[ABCDGISTW]'
+  ;;
+hpux*) # Its linker distinguishes data from code symbols
+  if test "$host_cpu" = ia64; then
+    symcode='[ABCDEGRST]'
+  fi
+  lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
+  lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
+  ;;
+linux*)
+  if test "$host_cpu" = ia64; then
+    symcode='[ABCDGIRSTW]'
+    lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
+    lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\) $/  {\\\"\1\\\", (lt_ptr) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/  {\"\2\", (lt_ptr) \&\2},/p'"
+  fi
+  ;;
+irix* | nonstopux*)
+  symcode='[BCDEGRST]'
+  ;;
+osf*)
+  symcode='[BCDEGQRST]'
+  ;;
+solaris*)
+  symcode='[BDRT]'
+  ;;
+sco3.2v5*)
+  symcode='[DT]'
+  ;;
+sysv4.2uw2*)
+  symcode='[DT]'
+  ;;
+sysv5* | sco5v6* | unixware* | OpenUNIX*)
+  symcode='[ABDT]'
+  ;;
+sysv4)
+  symcode='[DFNSTU]'
+  ;;
+esac
+
+# Handle CRLF in mingw tool chain
+opt_cr=
+case $build_os in
+mingw*)
+  opt_cr=`echo 'x\{0,1\}' | tr x '\015'` # option cr in regexp
+  ;;
+esac
+
+# If we're using GNU nm, then use its standard symbol codes.
+case `$NM -V 2>&1` in
+*GNU* | *'with BFD'*)
+  symcode='[ABCDGIRSTW]' ;;
+esac
+
+# Try without a prefix undercore, then with it.
+for ac_symprfx in "" "_"; do
+
+  # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol.
+  symxfrm="\\1 $ac_symprfx\\2 \\2"
+
+  # Write the raw and C identifiers.
+  lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[ 	]\($symcode$symcode*\)[ 	][ 	]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'"
+
+  # Check to see that the pipe works correctly.
+  pipe_works=no
+
+  rm -f conftest*
+  cat > conftest.$ac_ext <<EOF
+#ifdef __cplusplus
+extern "C" {
+#endif
+char nm_test_var;
+void nm_test_func(){}
+#ifdef __cplusplus
+}
+#endif
+int main(){nm_test_var='a';nm_test_func();return(0);}
+EOF
+
+  if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; then
+    # Now try to grab the symbols.
+    nlist=conftest.nm
+    if { (eval echo "$as_me:$LINENO: \"$NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $nlist\"") >&5
+  (eval $NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $nlist) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && test -s "$nlist"; then
+      # Try sorting and uniquifying the output.
+      if sort "$nlist" | uniq > "$nlist"T; then
+	mv -f "$nlist"T "$nlist"
+      else
+	rm -f "$nlist"T
+      fi
+
+      # Make sure that we snagged all the symbols we need.
+      if grep ' nm_test_var$' "$nlist" >/dev/null; then
+	if grep ' nm_test_func$' "$nlist" >/dev/null; then
+	  cat <<EOF > conftest.$ac_ext
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+EOF
+	  # Now generate the symbol file.
+	  eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | grep -v main >> conftest.$ac_ext'
+
+	  cat <<EOF >> conftest.$ac_ext
+#if defined (__STDC__) && __STDC__
+# define lt_ptr_t void *
+#else
+# define lt_ptr_t char *
+# define const
+#endif
+
+/* The mapping between symbol names and symbols. */
+const struct {
+  const char *name;
+  lt_ptr_t address;
+}
+lt_preloaded_symbols[] =
+{
+EOF
+	  $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/  {\"\2\", (lt_ptr_t) \&\2},/" < "$nlist" | grep -v main >> conftest.$ac_ext
+	  cat <<\EOF >> conftest.$ac_ext
+  {0, (lt_ptr_t) 0}
+};
+
+#ifdef __cplusplus
+}
+#endif
+EOF
+	  # Now try linking the two files.
+	  mv conftest.$ac_objext conftstm.$ac_objext
+	  lt_save_LIBS="$LIBS"
+	  lt_save_CFLAGS="$CFLAGS"
+	  LIBS="conftstm.$ac_objext"
+	  CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag"
+	  if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && test -s conftest${ac_exeext}; then
+	    pipe_works=yes
+	  fi
+	  LIBS="$lt_save_LIBS"
+	  CFLAGS="$lt_save_CFLAGS"
+	else
+	  echo "cannot find nm_test_func in $nlist" >&5
+	fi
+      else
+	echo "cannot find nm_test_var in $nlist" >&5
+      fi
+    else
+      echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5
+    fi
+  else
+    echo "$progname: failed program was:" >&5
+    cat conftest.$ac_ext >&5
+  fi
+  rm -f conftest* conftst*
+
+  # Do not use the global_symbol_pipe unless it works.
+  if test "$pipe_works" = yes; then
+    break
+  else
+    lt_cv_sys_global_symbol_pipe=
+  fi
+done
+
+fi
+
+if test -z "$lt_cv_sys_global_symbol_pipe"; then
+  lt_cv_sys_global_symbol_to_cdecl=
+fi
+if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; then
+  { echo "$as_me:$LINENO: result: failed" >&5
+echo "${ECHO_T}failed" >&6; }
+else
+  { echo "$as_me:$LINENO: result: ok" >&5
+echo "${ECHO_T}ok" >&6; }
+fi
+
+
+{ echo "$as_me:$LINENO: checking whether libtool supports -dlopen/-dlpreopen" >&5
+echo $ECHO_N "checking whether libtool supports -dlopen/-dlpreopen... $ECHO_C" >&6; }
+if test "${libltdl_cv_preloaded_symbols+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test -n "$lt_cv_sys_global_symbol_pipe"; then
+    libltdl_cv_preloaded_symbols=yes
+  else
+    libltdl_cv_preloaded_symbols=no
+  fi
+
+fi
+{ echo "$as_me:$LINENO: result: $libltdl_cv_preloaded_symbols" >&5
+echo "${ECHO_T}$libltdl_cv_preloaded_symbols" >&6; }
+if test x"$libltdl_cv_preloaded_symbols" = xyes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PRELOADED_SYMBOLS 1
+_ACEOF
+
+fi
+
+LIBADD_DL=
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+{ echo "$as_me:$LINENO: checking for shl_load" >&5
+echo $ECHO_N "checking for shl_load... $ECHO_C" >&6; }
+if test "${ac_cv_func_shl_load+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define shl_load to an innocuous variant, in case <limits.h> declares shl_load.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define shl_load innocuous_shl_load
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char shl_load (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef shl_load
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char shl_load ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_shl_load || defined __stub___shl_load
+choke me
+#endif
+
+int
+main ()
+{
+return shl_load ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_shl_load=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_shl_load=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_shl_load" >&5
+echo "${ECHO_T}$ac_cv_func_shl_load" >&6; }
+if test $ac_cv_func_shl_load = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_SHL_LOAD 1
+_ACEOF
+
+else
+  { echo "$as_me:$LINENO: checking for shl_load in -ldld" >&5
+echo $ECHO_N "checking for shl_load in -ldld... $ECHO_C" >&6; }
+if test "${ac_cv_lib_dld_shl_load+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldld  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char shl_load ();
+int
+main ()
+{
+return shl_load ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_dld_shl_load=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_dld_shl_load=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_dld_shl_load" >&5
+echo "${ECHO_T}$ac_cv_lib_dld_shl_load" >&6; }
+if test $ac_cv_lib_dld_shl_load = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_SHL_LOAD 1
+_ACEOF
+
+	LIBADD_DL="$LIBADD_DL -ldld"
+else
+  { echo "$as_me:$LINENO: checking for dlopen in -ldl" >&5
+echo $ECHO_N "checking for dlopen in -ldl... $ECHO_C" >&6; }
+if test "${ac_cv_lib_dl_dlopen+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldl  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlopen ();
+int
+main ()
+{
+return dlopen ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_dl_dlopen=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_dl_dlopen=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_dl_dlopen" >&5
+echo "${ECHO_T}$ac_cv_lib_dl_dlopen" >&6; }
+if test $ac_cv_lib_dl_dlopen = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_LIBDL 1
+_ACEOF
+
+	        LIBADD_DL="-ldl" libltdl_cv_lib_dl_dlopen="yes"
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#if HAVE_DLFCN_H
+#  include <dlfcn.h>
+#endif
+
+int
+main ()
+{
+dlopen(0, 0);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_LIBDL 1
+_ACEOF
+ libltdl_cv_func_dlopen="yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	{ echo "$as_me:$LINENO: checking for dlopen in -lsvld" >&5
+echo $ECHO_N "checking for dlopen in -lsvld... $ECHO_C" >&6; }
+if test "${ac_cv_lib_svld_dlopen+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lsvld  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlopen ();
+int
+main ()
+{
+return dlopen ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_svld_dlopen=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_svld_dlopen=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_svld_dlopen" >&5
+echo "${ECHO_T}$ac_cv_lib_svld_dlopen" >&6; }
+if test $ac_cv_lib_svld_dlopen = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_LIBDL 1
+_ACEOF
+
+	            LIBADD_DL="-lsvld" libltdl_cv_func_dlopen="yes"
+else
+  { echo "$as_me:$LINENO: checking for dld_link in -ldld" >&5
+echo $ECHO_N "checking for dld_link in -ldld... $ECHO_C" >&6; }
+if test "${ac_cv_lib_dld_dld_link+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldld  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dld_link ();
+int
+main ()
+{
+return dld_link ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_dld_dld_link=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_dld_dld_link=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_dld_dld_link" >&5
+echo "${ECHO_T}$ac_cv_lib_dld_dld_link" >&6; }
+if test $ac_cv_lib_dld_dld_link = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_DLD 1
+_ACEOF
+
+	 	LIBADD_DL="$LIBADD_DL -ldld"
+else
+  { echo "$as_me:$LINENO: checking for _dyld_func_lookup" >&5
+echo $ECHO_N "checking for _dyld_func_lookup... $ECHO_C" >&6; }
+if test "${ac_cv_func__dyld_func_lookup+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define _dyld_func_lookup to an innocuous variant, in case <limits.h> declares _dyld_func_lookup.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define _dyld_func_lookup innocuous__dyld_func_lookup
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char _dyld_func_lookup (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef _dyld_func_lookup
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char _dyld_func_lookup ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub__dyld_func_lookup || defined __stub____dyld_func_lookup
+choke me
+#endif
+
+int
+main ()
+{
+return _dyld_func_lookup ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func__dyld_func_lookup=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func__dyld_func_lookup=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func__dyld_func_lookup" >&5
+echo "${ECHO_T}$ac_cv_func__dyld_func_lookup" >&6; }
+if test $ac_cv_func__dyld_func_lookup = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_DYLD 1
+_ACEOF
+
+fi
+
+
+fi
+
+
+fi
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+
+fi
+
+
+fi
+
+
+fi
+
+
+if test x"$libltdl_cv_func_dlopen" = xyes || test x"$libltdl_cv_lib_dl_dlopen" = xyes
+then
+  lt_save_LIBS="$LIBS"
+  LIBS="$LIBS $LIBADD_DL"
+
+for ac_func in dlerror
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+  LIBS="$lt_save_LIBS"
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+{ echo "$as_me:$LINENO: checking for _ prefix in compiled symbols" >&5
+echo $ECHO_N "checking for _ prefix in compiled symbols... $ECHO_C" >&6; }
+if test "${ac_cv_sys_symbol_underscore+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_cv_sys_symbol_underscore=no
+  cat > conftest.$ac_ext <<EOF
+void nm_test_func(){}
+int main(){nm_test_func;return 0;}
+EOF
+  if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; then
+    # Now try to grab the symbols.
+    ac_nlist=conftest.nm
+    if { (eval echo "$as_me:$LINENO: \"$NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $ac_nlist\"") >&5
+  (eval $NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \> $ac_nlist) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && test -s "$ac_nlist"; then
+      # See whether the symbols have a leading underscore.
+      if grep '^. _nm_test_func' "$ac_nlist" >/dev/null; then
+        ac_cv_sys_symbol_underscore=yes
+      else
+        if grep '^. nm_test_func ' "$ac_nlist" >/dev/null; then
+	  :
+        else
+	  echo "configure: cannot find nm_test_func in $ac_nlist" >&5
+        fi
+      fi
+    else
+      echo "configure: cannot run $lt_cv_sys_global_symbol_pipe" >&5
+    fi
+  else
+    echo "configure: failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -rf conftest*
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_sys_symbol_underscore" >&5
+echo "${ECHO_T}$ac_cv_sys_symbol_underscore" >&6; }
+
+
+if test x"$ac_cv_sys_symbol_underscore" = xyes; then
+  if test x"$libltdl_cv_func_dlopen" = xyes ||
+     test x"$libltdl_cv_lib_dl_dlopen" = xyes ; then
+	{ echo "$as_me:$LINENO: checking whether we have to add an underscore for dlsym" >&5
+echo $ECHO_N "checking whether we have to add an underscore for dlsym... $ECHO_C" >&6; }
+if test "${libltdl_cv_need_uscore+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  libltdl_cv_need_uscore=unknown
+          save_LIBS="$LIBS"
+          LIBS="$LIBS $LIBADD_DL"
+	  if test "$cross_compiling" = yes; then :
+  libltdl_cv_need_uscore=cross
+else
+  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+  lt_status=$lt_dlunknown
+  cat > conftest.$ac_ext <<EOF
+#line 11564 "configure"
+#include "confdefs.h"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+#  define LT_DLGLOBAL		RTLD_GLOBAL
+#else
+#  ifdef DL_GLOBAL
+#    define LT_DLGLOBAL		DL_GLOBAL
+#  else
+#    define LT_DLGLOBAL		0
+#  endif
+#endif
+
+/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
+   find out it does not work in some platform. */
+#ifndef LT_DLLAZY_OR_NOW
+#  ifdef RTLD_LAZY
+#    define LT_DLLAZY_OR_NOW		RTLD_LAZY
+#  else
+#    ifdef DL_LAZY
+#      define LT_DLLAZY_OR_NOW		DL_LAZY
+#    else
+#      ifdef RTLD_NOW
+#        define LT_DLLAZY_OR_NOW	RTLD_NOW
+#      else
+#        ifdef DL_NOW
+#          define LT_DLLAZY_OR_NOW	DL_NOW
+#        else
+#          define LT_DLLAZY_OR_NOW	0
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+#ifdef __cplusplus
+extern "C" void exit (int);
+#endif
+
+void fnord() { int i=42;}
+int main ()
+{
+  void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
+  int status = $lt_dlunknown;
+
+  if (self)
+    {
+      if (dlsym (self,"fnord"))       status = $lt_dlno_uscore;
+      else if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore;
+      /* dlclose (self); */
+    }
+  else
+    puts (dlerror ());
+
+    exit (status);
+}
+EOF
+  if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && test -s conftest${ac_exeext} 2>/dev/null; then
+    (./conftest; exit; ) >&5 2>/dev/null
+    lt_status=$?
+    case x$lt_status in
+      x$lt_dlno_uscore) libltdl_cv_need_uscore=no ;;
+      x$lt_dlneed_uscore) libltdl_cv_need_uscore=yes ;;
+      x$lt_dlunknown|x*)  ;;
+    esac
+  else :
+    # compilation failed
+
+  fi
+fi
+rm -fr conftest*
+
+	  LIBS="$save_LIBS"
+
+fi
+{ echo "$as_me:$LINENO: result: $libltdl_cv_need_uscore" >&5
+echo "${ECHO_T}$libltdl_cv_need_uscore" >&6; }
+  fi
+fi
+
+if test x"$libltdl_cv_need_uscore" = xyes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define NEED_USCORE 1
+_ACEOF
+
+fi
+
+
+{ echo "$as_me:$LINENO: checking whether deplibs are loaded by dlopen" >&5
+echo $ECHO_N "checking whether deplibs are loaded by dlopen... $ECHO_C" >&6; }
+if test "${libltdl_cv_sys_dlopen_deplibs+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  # PORTME does your system automatically load deplibs for dlopen?
+  # or its logical equivalent (e.g. shl_load for HP-UX < 11)
+  # For now, we just catch OSes we know something about -- in the
+  # future, we'll try test this programmatically.
+  libltdl_cv_sys_dlopen_deplibs=unknown
+  case "$host_os" in
+  aix3*|aix4.1.*|aix4.2.*)
+    # Unknown whether this is true for these versions of AIX, but
+    # we want this `case' here to explicitly catch those versions.
+    libltdl_cv_sys_dlopen_deplibs=unknown
+    ;;
+  aix[45]*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  darwin*)
+    # Assuming the user has installed a libdl from somewhere, this is true
+    # If you are looking for one http://www.opendarwin.org/projects/dlcompat
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  gnu* | linux* | kfreebsd*-gnu | knetbsd*-gnu)
+    # GNU and its variants, using gnu ld.so (Glibc)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  hpux10*|hpux11*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  interix*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  irix[12345]*|irix6.[01]*)
+    # Catch all versions of IRIX before 6.2, and indicate that we don't
+    # know how it worked for any of those versions.
+    libltdl_cv_sys_dlopen_deplibs=unknown
+    ;;
+  irix*)
+    # The case above catches anything before 6.2, and it's known that
+    # at 6.2 and later dlopen does load deplibs.
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  netbsd*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  openbsd*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  osf[1234]*)
+    # dlopen did load deplibs (at least at 4.x), but until the 5.x series,
+    # it did *not* use an RPATH in a shared library to find objects the
+    # library depends on, so we explictly say `no'.
+    libltdl_cv_sys_dlopen_deplibs=no
+    ;;
+  osf5.0|osf5.0a|osf5.1)
+    # dlopen *does* load deplibs and with the right loader patch applied
+    # it even uses RPATH in a shared library to search for shared objects
+    # that the library depends on, but there's no easy way to know if that
+    # patch is installed.  Since this is the case, all we can really
+    # say is unknown -- it depends on the patch being installed.  If
+    # it is, this changes to `yes'.  Without it, it would be `no'.
+    libltdl_cv_sys_dlopen_deplibs=unknown
+    ;;
+  osf*)
+    # the two cases above should catch all versions of osf <= 5.1.  Read
+    # the comments above for what we know about them.
+    # At > 5.1, deplibs are loaded *and* any RPATH in a shared library
+    # is used to find them so we can finally say `yes'.
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  solaris*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+    libltdl_cv_sys_dlopen_deplibs=yes
+    ;;
+  esac
+
+fi
+{ echo "$as_me:$LINENO: result: $libltdl_cv_sys_dlopen_deplibs" >&5
+echo "${ECHO_T}$libltdl_cv_sys_dlopen_deplibs" >&6; }
+if test "$libltdl_cv_sys_dlopen_deplibs" != yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define LTDL_DLOPEN_DEPLIBS 1
+_ACEOF
+
+fi
+
+
+for ac_header in argz.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+{ echo "$as_me:$LINENO: checking for error_t" >&5
+echo $ECHO_N "checking for error_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_error_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#if HAVE_ARGZ_H
+#  include <argz.h>
+#endif
+
+typedef error_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_error_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_error_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_error_t" >&5
+echo "${ECHO_T}$ac_cv_type_error_t" >&6; }
+if test $ac_cv_type_error_t = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_ERROR_T 1
+_ACEOF
+
+
+else
+
+cat >>confdefs.h <<\_ACEOF
+#define error_t int
+_ACEOF
+
+fi
+
+
+
+
+
+
+
+for ac_func in argz_append argz_create_sep argz_insert argz_next argz_stringify
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+for ac_header in assert.h ctype.h errno.h malloc.h memory.h stdlib.h \
+		  stdio.h unistd.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+
+
+for ac_header in dl.h sys/dl.h dld.h mach-o/dyld.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+for ac_header in string.h strings.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+ break
+fi
+
+done
+
+
+
+
+for ac_func in strchr index
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ break
+fi
+done
+
+
+
+for ac_func in strrchr rindex
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ break
+fi
+done
+
+
+
+for ac_func in memcpy bcopy
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+ break
+fi
+done
+
+
+
+for ac_func in memmove strcmp
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+for ac_func in closedir opendir readdir
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+if test "$WITH_LLVMGCCDIR" = "default" ; then
+  LLVMGCC="llvm-gcc${EXEEXT}"
+  LLVMGXX="llvm-g++${EXEEXT}"
+  LLVMGCCCOMMAND="$LLVMGCC"
+  LLVMGXXCOMMAND="$LLVMGXX"
+  LLVMGCCCOMMAND=$LLVMGCCCOMMAND
+
+  LLVMGXXCOMMAND=$LLVMGXXCOMMAND
+
+  # Extract the first word of "$LLVMGCC", so it can be a program name with args.
+set dummy $LLVMGCC; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_LLVMGCC+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $LLVMGCC in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_LLVMGCC="$LLVMGCC" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_LLVMGCC="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+LLVMGCC=$ac_cv_path_LLVMGCC
+if test -n "$LLVMGCC"; then
+  { echo "$as_me:$LINENO: result: $LLVMGCC" >&5
+echo "${ECHO_T}$LLVMGCC" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+  # Extract the first word of "$LLVMGXX", so it can be a program name with args.
+set dummy $LLVMGXX; ac_word=$2
+{ echo "$as_me:$LINENO: checking for $ac_word" >&5
+echo $ECHO_N "checking for $ac_word... $ECHO_C" >&6; }
+if test "${ac_cv_path_LLVMGXX+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  case $LLVMGXX in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_LLVMGXX="$LLVMGXX" # Let the user override the test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  for ac_exec_ext in '' $ac_executable_extensions; do
+  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_executable_p "$as_dir/$ac_word$ac_exec_ext"; }; then
+    ac_cv_path_LLVMGXX="$as_dir/$ac_word$ac_exec_ext"
+    echo "$as_me:$LINENO: found $as_dir/$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+done
+IFS=$as_save_IFS
+
+  ;;
+esac
+fi
+LLVMGXX=$ac_cv_path_LLVMGXX
+if test -n "$LLVMGXX"; then
+  { echo "$as_me:$LINENO: result: $LLVMGXX" >&5
+echo "${ECHO_T}$LLVMGXX" >&6; }
+else
+  { echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+fi
+
+
+else
+  if test -z "$LLVMGCC"; then
+    LLVMGCC="$WITH_LLVMGCCDIR/bin/llvm-gcc${EXEEXT}"
+    LLVMGCCCOMMAND="$LLVMGCC"
+  fi
+  if test -z "$LLVMGXX"; then
+    LLVMGXX="$WITH_LLVMGCCDIR/bin/llvm-g++${EXEEXT}"
+    LLVMGXXCOMMAND="$LLVMGXX"
+  fi
+
+  LLVMGCC=$LLVMGCC
+
+  LLVMGXX=$LLVMGXX
+
+  LLVMGCCCOMMAND=$LLVMGCCCOMMAND
+
+  LLVMGXXCOMMAND=$LLVMGXXCOMMAND
+
+fi
+
+
+# Check whether --with-llvmcc was given.
+if test "${with_llvmcc+set}" = set; then
+  withval=$with_llvmcc;
+else
+  with_llvmcc=check
+fi
+
+{ echo "$as_me:$LINENO: checking LLVM capable compiler" >&5
+echo $ECHO_N "checking LLVM capable compiler... $ECHO_C" >&6; }
+if test "$with_llvmcc" != "check"; then
+   if (test "$with_llvmcc" != "llvm-gcc" &&
+       test "$with_llvmcc" != "clang" &&
+       test "$with_llvmcc" != "none"); then
+      { { echo "$as_me:$LINENO: error: invalid value for --with-llvmcc, expected 'llvm-gcc', 'clang', or 'none'." >&5
+echo "$as_me: error: invalid value for --with-llvmcc, expected 'llvm-gcc', 'clang', or 'none'." >&2;}
+   { (exit 1); exit 1; }; }
+   fi
+   WITH_LLVMCC="$with_llvmcc"
+elif test -n "$LLVMGCC"; then
+   WITH_LLVMCC=llvm-gcc
+elif test -n "$WITH_CLANGPATH" || test "$WITH_BUILT_CLANG" -ne "0"; then
+   WITH_LLVMCC=clang
+else
+   WITH_LLVMCC=none
+fi
+{ echo "$as_me:$LINENO: result: $WITH_LLVMCC" >&5
+echo "${ECHO_T}$WITH_LLVMCC" >&6; }
+LLVMCC_OPTION=$WITH_LLVMCC
+
+
+{ echo "$as_me:$LINENO: checking tool compatibility" >&5
+echo $ECHO_N "checking tool compatibility... $ECHO_C" >&6; }
+
+ICC=no
+IXX=no
+case $CC in
+  icc*|icpc*)
+    ICC=yes
+    IXX=yes
+    ;;
+   *)
+    ;;
+esac
+
+if test "$GCC" != "yes" && test "$ICC" != "yes"
+then
+  { { echo "$as_me:$LINENO: error: gcc|icc required but not found" >&5
+echo "$as_me: error: gcc|icc required but not found" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+if test "$GXX" != "yes" && test "$IXX" != "yes"
+then
+  { { echo "$as_me:$LINENO: error: g++|clang++|icc required but not found" >&5
+echo "$as_me: error: g++|clang++|icc required but not found" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+if test "$GCC" = "yes"
+then
+  cat >conftest.$ac_ext <<_ACEOF
+#if !defined(__GNUC__) || __GNUC__ < 3
+#error Unsupported GCC version
+#endif
+
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  :
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	{ { echo "$as_me:$LINENO: error: gcc 3.x required, but you have a lower version" >&5
+echo "$as_me: error: gcc 3.x required, but you have a lower version" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+
+if test -z "$llvm_cv_gnu_make_command"
+then
+  { { echo "$as_me:$LINENO: error: GNU Make required but not found" >&5
+echo "$as_me: error: GNU Make required but not found" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+{ echo "$as_me:$LINENO: result: ok" >&5
+echo "${ECHO_T}ok" >&6; }
+
+{ echo "$as_me:$LINENO: checking optional compiler flags" >&5
+echo $ECHO_N "checking optional compiler flags... $ECHO_C" >&6; }
+NO_VARIADIC_MACROS=`$CXX -Wno-variadic-macros -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-variadic-macros`
+
+NO_MISSING_FIELD_INITIALIZERS=`$CXX -Wno-missing-field-initializers -fsyntax-only -xc /dev/null 2>/dev/null && echo -Wno-missing-field-initializers`
+
+{ echo "$as_me:$LINENO: result: $NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS" >&5
+echo "${ECHO_T}$NO_VARIADIC_MACROS $NO_MISSING_FIELD_INITIALIZERS" >&6; }
+
+
+
+{ echo "$as_me:$LINENO: checking for sin in -lm" >&5
+echo $ECHO_N "checking for sin in -lm... $ECHO_C" >&6; }
+if test "${ac_cv_lib_m_sin+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lm  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char sin ();
+int
+main ()
+{
+return sin ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_m_sin=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_m_sin=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_m_sin" >&5
+echo "${ECHO_T}$ac_cv_lib_m_sin" >&6; }
+if test $ac_cv_lib_m_sin = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBM 1
+_ACEOF
+
+  LIBS="-lm $LIBS"
+
+fi
+
+if test "$llvm_cv_os_type" = "MingW" ; then
+
+{ echo "$as_me:$LINENO: checking for main in -limagehlp" >&5
+echo $ECHO_N "checking for main in -limagehlp... $ECHO_C" >&6; }
+if test "${ac_cv_lib_imagehlp_main+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-limagehlp  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+return main ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_imagehlp_main=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_imagehlp_main=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_imagehlp_main" >&5
+echo "${ECHO_T}$ac_cv_lib_imagehlp_main" >&6; }
+if test $ac_cv_lib_imagehlp_main = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBIMAGEHLP 1
+_ACEOF
+
+  LIBS="-limagehlp $LIBS"
+
+fi
+
+
+{ echo "$as_me:$LINENO: checking for main in -lpsapi" >&5
+echo $ECHO_N "checking for main in -lpsapi... $ECHO_C" >&6; }
+if test "${ac_cv_lib_psapi_main+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lpsapi  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+
+int
+main ()
+{
+return main ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_psapi_main=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_psapi_main=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_psapi_main" >&5
+echo "${ECHO_T}$ac_cv_lib_psapi_main" >&6; }
+if test $ac_cv_lib_psapi_main = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBPSAPI 1
+_ACEOF
+
+  LIBS="-lpsapi $LIBS"
+
+fi
+
+fi
+
+{ echo "$as_me:$LINENO: checking for library containing dlopen" >&5
+echo $ECHO_N "checking for library containing dlopen... $ECHO_C" >&6; }
+if test "${ac_cv_search_dlopen+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dlopen ();
+int
+main ()
+{
+return dlopen ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' dl; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_dlopen=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_dlopen+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_dlopen+set}" = set; then
+  :
+else
+  ac_cv_search_dlopen=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_dlopen" >&5
+echo "${ECHO_T}$ac_cv_search_dlopen" >&6; }
+ac_res=$ac_cv_search_dlopen
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_DLOPEN 1
+_ACEOF
+
+else
+  { echo "$as_me:$LINENO: WARNING: dlopen() not found - disabling plugin support" >&5
+echo "$as_me: WARNING: dlopen() not found - disabling plugin support" >&2;}
+fi
+
+
+if test "$llvm_cv_enable_libffi" = "yes" ; then
+  { echo "$as_me:$LINENO: checking for library containing ffi_call" >&5
+echo $ECHO_N "checking for library containing ffi_call... $ECHO_C" >&6; }
+if test "${ac_cv_search_ffi_call+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char ffi_call ();
+int
+main ()
+{
+return ffi_call ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' ffi; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_ffi_call=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_ffi_call+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_ffi_call+set}" = set; then
+  :
+else
+  ac_cv_search_ffi_call=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_ffi_call" >&5
+echo "${ECHO_T}$ac_cv_search_ffi_call" >&6; }
+ac_res=$ac_cv_search_ffi_call
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_FFI_CALL 1
+_ACEOF
+
+else
+  { { echo "$as_me:$LINENO: error: libffi not found - configure without --enable-libffi to compile without it" >&5
+echo "$as_me: error: libffi not found - configure without --enable-libffi to compile without it" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+fi
+
+{ echo "$as_me:$LINENO: checking for library containing mallinfo" >&5
+echo $ECHO_N "checking for library containing mallinfo... $ECHO_C" >&6; }
+if test "${ac_cv_search_mallinfo+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char mallinfo ();
+int
+main ()
+{
+return mallinfo ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' malloc; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_mallinfo=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_mallinfo+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_mallinfo+set}" = set; then
+  :
+else
+  ac_cv_search_mallinfo=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_mallinfo" >&5
+echo "${ECHO_T}$ac_cv_search_mallinfo" >&6; }
+ac_res=$ac_cv_search_mallinfo
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_MALLINFO 1
+_ACEOF
+
+fi
+
+
+if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
+
+{ echo "$as_me:$LINENO: checking for pthread_mutex_init in -lpthread" >&5
+echo $ECHO_N "checking for pthread_mutex_init in -lpthread... $ECHO_C" >&6; }
+if test "${ac_cv_lib_pthread_pthread_mutex_init+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lpthread  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pthread_mutex_init ();
+int
+main ()
+{
+return pthread_mutex_init ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_pthread_pthread_mutex_init=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_pthread_pthread_mutex_init=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_pthread_pthread_mutex_init" >&5
+echo "${ECHO_T}$ac_cv_lib_pthread_pthread_mutex_init" >&6; }
+if test $ac_cv_lib_pthread_pthread_mutex_init = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBPTHREAD 1
+_ACEOF
+
+  LIBS="-lpthread $LIBS"
+
+fi
+
+  { echo "$as_me:$LINENO: checking for library containing pthread_mutex_lock" >&5
+echo $ECHO_N "checking for library containing pthread_mutex_lock... $ECHO_C" >&6; }
+if test "${ac_cv_search_pthread_mutex_lock+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pthread_mutex_lock ();
+int
+main ()
+{
+return pthread_mutex_lock ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' pthread; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_pthread_mutex_lock=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_pthread_mutex_lock+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_pthread_mutex_lock+set}" = set; then
+  :
+else
+  ac_cv_search_pthread_mutex_lock=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_pthread_mutex_lock" >&5
+echo "${ECHO_T}$ac_cv_search_pthread_mutex_lock" >&6; }
+ac_res=$ac_cv_search_pthread_mutex_lock
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PTHREAD_MUTEX_LOCK 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for library containing pthread_rwlock_init" >&5
+echo $ECHO_N "checking for library containing pthread_rwlock_init... $ECHO_C" >&6; }
+if test "${ac_cv_search_pthread_rwlock_init+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pthread_rwlock_init ();
+int
+main ()
+{
+return pthread_rwlock_init ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' pthread; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_pthread_rwlock_init=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_pthread_rwlock_init+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_pthread_rwlock_init+set}" = set; then
+  :
+else
+  ac_cv_search_pthread_rwlock_init=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_pthread_rwlock_init" >&5
+echo "${ECHO_T}$ac_cv_search_pthread_rwlock_init" >&6; }
+ac_res=$ac_cv_search_pthread_rwlock_init
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PTHREAD_RWLOCK_INIT 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for library containing pthread_getspecific" >&5
+echo $ECHO_N "checking for library containing pthread_getspecific... $ECHO_C" >&6; }
+if test "${ac_cv_search_pthread_getspecific+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pthread_getspecific ();
+int
+main ()
+{
+return pthread_getspecific ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' pthread; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_pthread_getspecific=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_pthread_getspecific+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_pthread_getspecific+set}" = set; then
+  :
+else
+  ac_cv_search_pthread_getspecific=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_pthread_getspecific" >&5
+echo "${ECHO_T}$ac_cv_search_pthread_getspecific" >&6; }
+ac_res=$ac_cv_search_pthread_getspecific
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PTHREAD_GETSPECIFIC 1
+_ACEOF
+
+fi
+
+fi
+
+
+# Check whether --with-udis86 was given.
+if test "${with_udis86+set}" = set; then
+  withval=$with_udis86;
+      USE_UDIS86=1
+
+      case "$withval" in
+        /usr/lib|yes) ;;
+        *) LDFLAGS="$LDFLAGS -L${withval}" ;;
+      esac
+
+{ echo "$as_me:$LINENO: checking for ud_init in -ludis86" >&5
+echo $ECHO_N "checking for ud_init in -ludis86... $ECHO_C" >&6; }
+if test "${ac_cv_lib_udis86_ud_init+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ludis86  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char ud_init ();
+int
+main ()
+{
+return ud_init ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_udis86_ud_init=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_udis86_ud_init=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_udis86_ud_init" >&5
+echo "${ECHO_T}$ac_cv_lib_udis86_ud_init" >&6; }
+if test $ac_cv_lib_udis86_ud_init = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_LIBUDIS86 1
+_ACEOF
+
+  LIBS="-ludis86 $LIBS"
+
+else
+
+        echo "Error! You need to have libudis86 around."
+        exit -1
+
+fi
+
+
+else
+  USE_UDIS86=0
+
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define USE_UDIS86 $USE_UDIS86
+_ACEOF
+
+
+
+# Check whether --with-oprofile was given.
+if test "${with_oprofile+set}" = set; then
+  withval=$with_oprofile;
+      USE_OPROFILE=1
+
+      case "$withval" in
+        /usr|yes) llvm_cv_oppath=/usr/lib/oprofile ;;
+        no) llvm_cv_oppath=
+            USE_OPROFILE=0
+ ;;
+        *) llvm_cv_oppath="${withval}/lib/oprofile"
+           CPPFLAGS="-I${withval}/include";;
+      esac
+      if test -n "$llvm_cv_oppath" ; then
+        LIBS="$LIBS -L${llvm_cv_oppath} -Wl,-rpath,${llvm_cv_oppath}"
+                                        { echo "$as_me:$LINENO: checking for library containing bfd_init" >&5
+echo $ECHO_N "checking for library containing bfd_init... $ECHO_C" >&6; }
+if test "${ac_cv_search_bfd_init+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char bfd_init ();
+int
+main ()
+{
+return bfd_init ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' bfd; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_bfd_init=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_bfd_init+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_bfd_init+set}" = set; then
+  :
+else
+  ac_cv_search_bfd_init=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_bfd_init" >&5
+echo "${ECHO_T}$ac_cv_search_bfd_init" >&6; }
+ac_res=$ac_cv_search_bfd_init
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+        { echo "$as_me:$LINENO: checking for library containing op_open_agent" >&5
+echo $ECHO_N "checking for library containing op_open_agent... $ECHO_C" >&6; }
+if test "${ac_cv_search_op_open_agent+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char op_open_agent ();
+int
+main ()
+{
+return op_open_agent ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' opagent; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_op_open_agent=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_op_open_agent+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_op_open_agent+set}" = set; then
+  :
+else
+  ac_cv_search_op_open_agent=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_op_open_agent" >&5
+echo "${ECHO_T}$ac_cv_search_op_open_agent" >&6; }
+ac_res=$ac_cv_search_op_open_agent
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+
+          echo "Error! You need to have libopagent around."
+          exit -1
+
+fi
+
+        if test "${ac_cv_header_opagent_h+set}" = set; then
+  { echo "$as_me:$LINENO: checking for opagent.h" >&5
+echo $ECHO_N "checking for opagent.h... $ECHO_C" >&6; }
+if test "${ac_cv_header_opagent_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_opagent_h" >&5
+echo "${ECHO_T}$ac_cv_header_opagent_h" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking opagent.h usability" >&5
+echo $ECHO_N "checking opagent.h usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <opagent.h>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking opagent.h presence" >&5
+echo $ECHO_N "checking opagent.h presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <opagent.h>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: opagent.h: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: opagent.h: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: opagent.h: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: opagent.h: present but cannot be compiled" >&5
+echo "$as_me: WARNING: opagent.h: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: opagent.h:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: opagent.h: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: opagent.h:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: opagent.h: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: opagent.h: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: opagent.h: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for opagent.h" >&5
+echo $ECHO_N "checking for opagent.h... $ECHO_C" >&6; }
+if test "${ac_cv_header_opagent_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_cv_header_opagent_h=$ac_header_preproc
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_opagent_h" >&5
+echo "${ECHO_T}$ac_cv_header_opagent_h" >&6; }
+
+fi
+if test $ac_cv_header_opagent_h = yes; then
+  :
+else
+
+          echo "Error! You need to have opagent.h around."
+          exit -1
+
+fi
+
+
+      fi
+
+else
+
+      USE_OPROFILE=0
+
+
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define USE_OPROFILE $USE_OPROFILE
+_ACEOF
+
+
+
+
+
+
+
+
+ac_header_dirent=no
+for ac_hdr in dirent.h sys/ndir.h sys/dir.h ndir.h; do
+  as_ac_Header=`echo "ac_cv_header_dirent_$ac_hdr" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_hdr that defines DIR" >&5
+echo $ECHO_N "checking for $ac_hdr that defines DIR... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <$ac_hdr>
+
+int
+main ()
+{
+if ((DIR *) 0)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_Header=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_Header=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_hdr" | $as_tr_cpp` 1
+_ACEOF
+
+ac_header_dirent=$ac_hdr; break
+fi
+
+done
+# Two versions of opendir et al. are in -ldir and -lx on SCO Xenix.
+if test $ac_header_dirent = dirent.h; then
+  { echo "$as_me:$LINENO: checking for library containing opendir" >&5
+echo $ECHO_N "checking for library containing opendir... $ECHO_C" >&6; }
+if test "${ac_cv_search_opendir+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char opendir ();
+int
+main ()
+{
+return opendir ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' dir; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_opendir=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_opendir+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_opendir+set}" = set; then
+  :
+else
+  ac_cv_search_opendir=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_opendir" >&5
+echo "${ECHO_T}$ac_cv_search_opendir" >&6; }
+ac_res=$ac_cv_search_opendir
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+else
+  { echo "$as_me:$LINENO: checking for library containing opendir" >&5
+echo $ECHO_N "checking for library containing opendir... $ECHO_C" >&6; }
+if test "${ac_cv_search_opendir+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char opendir ();
+int
+main ()
+{
+return opendir ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' x; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_opendir=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_opendir+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_opendir+set}" = set; then
+  :
+else
+  ac_cv_search_opendir=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_opendir" >&5
+echo "${ECHO_T}$ac_cv_search_opendir" >&6; }
+ac_res=$ac_cv_search_opendir
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+fi
+
+{ echo "$as_me:$LINENO: checking for MAP_ANONYMOUS vs. MAP_ANON" >&5
+echo $ECHO_N "checking for MAP_ANONYMOUS vs. MAP_ANON... $ECHO_C" >&6; }
+if test "${ac_cv_header_mmap_anon+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/mman.h>
+#include <unistd.h>
+#include <fcntl.h>
+int
+main ()
+{
+mmap (0, 1, PROT_READ, MAP_ANONYMOUS, -1, 0); return (0);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_header_mmap_anon=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_header_mmap_anon=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_mmap_anon" >&5
+echo "${ECHO_T}$ac_cv_header_mmap_anon" >&6; }
+if test "$ac_cv_header_mmap_anon" = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_MMAP_ANONYMOUS 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking whether stat file-mode macros are broken" >&5
+echo $ECHO_N "checking whether stat file-mode macros are broken... $ECHO_C" >&6; }
+if test "${ac_cv_header_stat_broken+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if defined S_ISBLK && defined S_IFDIR
+# if S_ISBLK (S_IFDIR)
+You lose.
+# endif
+#endif
+
+#if defined S_ISBLK && defined S_IFCHR
+# if S_ISBLK (S_IFCHR)
+You lose.
+# endif
+#endif
+
+#if defined S_ISLNK && defined S_IFREG
+# if S_ISLNK (S_IFREG)
+You lose.
+# endif
+#endif
+
+#if defined S_ISSOCK && defined S_IFREG
+# if S_ISSOCK (S_IFREG)
+You lose.
+# endif
+#endif
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "You lose" >/dev/null 2>&1; then
+  ac_cv_header_stat_broken=yes
+else
+  ac_cv_header_stat_broken=no
+fi
+rm -f conftest*
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_stat_broken" >&5
+echo "${ECHO_T}$ac_cv_header_stat_broken" >&6; }
+if test $ac_cv_header_stat_broken = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define STAT_MACROS_BROKEN 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for ANSI C header files" >&5
+echo $ECHO_N "checking for ANSI C header files... $ECHO_C" >&6; }
+if test "${ac_cv_header_stdc+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_header_stdc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_header_stdc=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+  # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "memchr" >/dev/null 2>&1; then
+  :
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+  $EGREP "free" >/dev/null 2>&1; then
+  :
+else
+  ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+  # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+  if test "$cross_compiling" = yes; then
+  :
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <ctype.h>
+#include <stdlib.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+		   (('a' <= (c) && (c) <= 'i') \
+		     || ('j' <= (c) && (c) <= 'r') \
+		     || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+  int i;
+  for (i = 0; i < 256; i++)
+    if (XOR (islower (i), ISLOWER (i))
+	|| toupper (i) != TOUPPER (i))
+      return 2;
+  return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  :
+else
+  echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+fi
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_stdc" >&5
+echo "${ECHO_T}$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define STDC_HEADERS 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for sys/wait.h that is POSIX.1 compatible" >&5
+echo $ECHO_N "checking for sys/wait.h that is POSIX.1 compatible... $ECHO_C" >&6; }
+if test "${ac_cv_header_sys_wait_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <sys/wait.h>
+#ifndef WEXITSTATUS
+# define WEXITSTATUS(stat_val) ((unsigned int) (stat_val) >> 8)
+#endif
+#ifndef WIFEXITED
+# define WIFEXITED(stat_val) (((stat_val) & 255) == 0)
+#endif
+
+int
+main ()
+{
+  int s;
+  wait (&s);
+  s = WIFEXITED (s) ? WEXITSTATUS (s) : 1;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_header_sys_wait_h=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_header_sys_wait_h=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_sys_wait_h" >&5
+echo "${ECHO_T}$ac_cv_header_sys_wait_h" >&6; }
+if test $ac_cv_header_sys_wait_h = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_SYS_WAIT_H 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking whether time.h and sys/time.h may both be included" >&5
+echo $ECHO_N "checking whether time.h and sys/time.h may both be included... $ECHO_C" >&6; }
+if test "${ac_cv_header_time+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <sys/time.h>
+#include <time.h>
+
+int
+main ()
+{
+if ((struct tm *) 0)
+return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_header_time=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_header_time=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_header_time" >&5
+echo "${ECHO_T}$ac_cv_header_time" >&6; }
+if test $ac_cv_header_time = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define TIME_WITH_SYS_TIME 1
+_ACEOF
+
+fi
+
+
+
+
+
+
+
+
+for ac_header in dlfcn.h execinfo.h fcntl.h inttypes.h limits.h link.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+
+
+
+
+for ac_header in malloc.h setjmp.h signal.h stdint.h termios.h unistd.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+for ac_header in utime.h windows.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+
+
+
+for ac_header in sys/mman.h sys/param.h sys/resource.h sys/time.h sys/uio.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+
+
+
+for ac_header in sys/types.h sys/ioctl.h malloc/malloc.h mach/mach.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+for ac_header in valgrind/valgrind.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+for ac_header in fenv.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+if test "$ENABLE_THREADS" -eq 1 && test "$ENABLE_PTHREADS" -eq 1 ; then
+
+for ac_header in pthread.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+ HAVE_PTHREAD=1
+
+else
+  HAVE_PTHREAD=0
+
+fi
+
+done
+
+else
+  HAVE_PTHREAD=0
+
+fi
+
+if test "$llvm_cv_enable_libffi" = "yes" ; then
+
+
+for ac_header in ffi.h ffi/ffi.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+fi
+
+
+for ac_header in CrashReporterClient.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+{ echo "$as_me:$LINENO: checking __crashreporter_info__" >&5
+echo $ECHO_N "checking __crashreporter_info__... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+extern const char *__crashreporter_info__;
+      int main() {
+        __crashreporter_info__ = "test";
+        return 0;
+      }
+
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_CRASHREPORTER_INFO 1
+_ACEOF
+
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	{ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_CRASHREPORTER_INFO 0
+_ACEOF
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+
+
+
+
+  { echo "$as_me:$LINENO: checking for HUGE_VAL sanity" >&5
+echo $ECHO_N "checking for HUGE_VAL sanity... $ECHO_C" >&6; }
+if test "${ac_cv_huge_val_sanity+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+
+    ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+    ac_save_CXXFLAGS=$CXXFLAGS
+    CXXFLAGS=-pedantic
+    if test "$cross_compiling" = yes; then
+  ac_cv_huge_val_sanity=yes
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <math.h>
+int
+main ()
+{
+double x = HUGE_VAL; return x != x;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_huge_val_sanity=yes
+else
+  echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_huge_val_sanity=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+    CXXFLAGS=$ac_save_CXXFLAGS
+    ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_huge_val_sanity" >&5
+echo "${ECHO_T}$ac_cv_huge_val_sanity" >&6; }
+  HUGE_VAL_SANITY=$ac_cv_huge_val_sanity
+
+
+{ echo "$as_me:$LINENO: checking for pid_t" >&5
+echo $ECHO_N "checking for pid_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_pid_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+typedef pid_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_pid_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_pid_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_pid_t" >&5
+echo "${ECHO_T}$ac_cv_type_pid_t" >&6; }
+if test $ac_cv_type_pid_t = yes; then
+  :
+else
+
+cat >>confdefs.h <<_ACEOF
+#define pid_t int
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for size_t" >&5
+echo $ECHO_N "checking for size_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_size_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+typedef size_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_size_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_size_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_size_t" >&5
+echo "${ECHO_T}$ac_cv_type_size_t" >&6; }
+if test $ac_cv_type_size_t = yes; then
+  :
+else
+
+cat >>confdefs.h <<_ACEOF
+#define size_t unsigned int
+_ACEOF
+
+fi
+
+
+cat >>confdefs.h <<_ACEOF
+#define RETSIGTYPE void
+_ACEOF
+
+{ echo "$as_me:$LINENO: checking whether struct tm is in sys/time.h or time.h" >&5
+echo $ECHO_N "checking whether struct tm is in sys/time.h or time.h... $ECHO_C" >&6; }
+if test "${ac_cv_struct_tm+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <sys/types.h>
+#include <time.h>
+
+int
+main ()
+{
+struct tm *tp; tp->tm_sec;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_struct_tm=time.h
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_struct_tm=sys/time.h
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_struct_tm" >&5
+echo "${ECHO_T}$ac_cv_struct_tm" >&6; }
+if test $ac_cv_struct_tm = sys/time.h; then
+
+cat >>confdefs.h <<\_ACEOF
+#define TM_IN_SYS_TIME 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for int64_t" >&5
+echo $ECHO_N "checking for int64_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_int64_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+typedef int64_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_int64_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_int64_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_int64_t" >&5
+echo "${ECHO_T}$ac_cv_type_int64_t" >&6; }
+if test $ac_cv_type_int64_t = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_INT64_T 1
+_ACEOF
+
+
+else
+  { { echo "$as_me:$LINENO: error: Type int64_t required but not found" >&5
+echo "$as_me: error: Type int64_t required but not found" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+{ echo "$as_me:$LINENO: checking for uint64_t" >&5
+echo $ECHO_N "checking for uint64_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_uint64_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+typedef uint64_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_uint64_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_uint64_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_uint64_t" >&5
+echo "${ECHO_T}$ac_cv_type_uint64_t" >&6; }
+if test $ac_cv_type_uint64_t = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_UINT64_T 1
+_ACEOF
+
+
+else
+  { echo "$as_me:$LINENO: checking for u_int64_t" >&5
+echo $ECHO_N "checking for u_int64_t... $ECHO_C" >&6; }
+if test "${ac_cv_type_u_int64_t+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+typedef u_int64_t ac__type_new_;
+int
+main ()
+{
+if ((ac__type_new_ *) 0)
+  return 0;
+if (sizeof (ac__type_new_))
+  return 0;
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_type_u_int64_t=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_type_u_int64_t=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_type_u_int64_t" >&5
+echo "${ECHO_T}$ac_cv_type_u_int64_t" >&6; }
+if test $ac_cv_type_u_int64_t = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_U_INT64_T 1
+_ACEOF
+
+
+else
+  { { echo "$as_me:$LINENO: error: Type uint64_t or u_int64_t required but not found" >&5
+echo "$as_me: error: Type uint64_t or u_int64_t required but not found" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+fi
+
+
+
+
+
+
+
+
+
+
+for ac_func in backtrace ceilf floorf roundf rintf nearbyintf getcwd
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+for ac_func in powf fmodf strtof round
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+
+for ac_func in getpagesize getrusage getrlimit setrlimit gettimeofday
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+for ac_func in isatty mkdtemp mkstemp
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+
+
+for ac_func in mktemp posix_spawn realpath sbrk setrlimit strdup
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+for ac_func in strerror strerror_r setenv
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+for ac_func in strtoll strtoq sysconf malloc_zone_statistics
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+
+
+
+
+for ac_func in setjmp longjmp sigsetjmp siglongjmp writev
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+{ echo "$as_me:$LINENO: checking if printf has the %a format character" >&5
+echo $ECHO_N "checking if printf has the %a format character... $ECHO_C" >&6; }
+if test "${llvm_cv_c_printf_a+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ if test "$cross_compiling" = yes; then
+  llvmac_cv_c_printf_a=no
+else
+  cat >conftest.$ac_ext <<_ACEOF
+
+  /* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+
+volatile double A, B;
+char Buffer[100];
+A = 1;
+A /= 10.0;
+sprintf(Buffer, "%a", A);
+B = atof(Buffer);
+if (A != B)
+  return (1);
+if (A != 0x1.999999999999ap-4)
+  return (1);
+return (0);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  llvm_cv_c_printf_a=yes
+else
+  echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+llvmac_cv_c_printf_a=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+ ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_c_printf_a" >&5
+echo "${ECHO_T}$llvm_cv_c_printf_a" >&6; }
+ if test "$llvm_cv_c_printf_a" = "yes"; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PRINTF_A 1
+_ACEOF
+
+ fi
+
+
+{ echo "$as_me:$LINENO: checking for srand48/lrand48/drand48 in <stdlib.h>" >&5
+echo $ECHO_N "checking for srand48/lrand48/drand48 in <stdlib.h>... $ECHO_C" >&6; }
+if test "${ac_cv_func_rand48+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <stdlib.h>
+int
+main ()
+{
+srand48(0);lrand48();drand48();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_rand48=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_rand48=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_rand48" >&5
+echo "${ECHO_T}$ac_cv_func_rand48" >&6; }
+
+if test "$ac_cv_func_rand48" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_RAND48 1
+_ACEOF
+
+fi
+
+
+{ echo "$as_me:$LINENO: checking whether strerror_s is declared" >&5
+echo $ECHO_N "checking whether strerror_s is declared... $ECHO_C" >&6; }
+if test "${ac_cv_have_decl_strerror_s+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main ()
+{
+#ifndef strerror_s
+  char *p = (char *) strerror_s;
+  return !p;
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_have_decl_strerror_s=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_have_decl_strerror_s=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_have_decl_strerror_s" >&5
+echo "${ECHO_T}$ac_cv_have_decl_strerror_s" >&6; }
+if test $ac_cv_have_decl_strerror_s = yes; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_STRERROR_S 1
+_ACEOF
+
+
+else
+  cat >>confdefs.h <<_ACEOF
+#define HAVE_DECL_STRERROR_S 0
+_ACEOF
+
+
+fi
+
+
+
+if test "$llvm_cv_os_type" = "MingW" ; then
+  { echo "$as_me:$LINENO: checking for _alloca in -lgcc" >&5
+echo $ECHO_N "checking for _alloca in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc__alloca+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char _alloca ();
+int
+main ()
+{
+return _alloca ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc__alloca=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc__alloca=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc__alloca" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc__alloca" >&6; }
+if test $ac_cv_lib_gcc__alloca = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE__ALLOCA 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __alloca in -lgcc" >&5
+echo $ECHO_N "checking for __alloca in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___alloca+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __alloca ();
+int
+main ()
+{
+return __alloca ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___alloca=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___alloca=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___alloca" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___alloca" >&6; }
+if test $ac_cv_lib_gcc___alloca = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___ALLOCA 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __chkstk in -lgcc" >&5
+echo $ECHO_N "checking for __chkstk in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___chkstk+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __chkstk ();
+int
+main ()
+{
+return __chkstk ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___chkstk=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___chkstk=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___chkstk" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___chkstk" >&6; }
+if test $ac_cv_lib_gcc___chkstk = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___CHKSTK 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for ___chkstk in -lgcc" >&5
+echo $ECHO_N "checking for ___chkstk in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc____chkstk+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char ___chkstk ();
+int
+main ()
+{
+return ___chkstk ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc____chkstk=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc____chkstk=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc____chkstk" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc____chkstk" >&6; }
+if test $ac_cv_lib_gcc____chkstk = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE____CHKSTK 1
+_ACEOF
+
+fi
+
+
+  { echo "$as_me:$LINENO: checking for __ashldi3 in -lgcc" >&5
+echo $ECHO_N "checking for __ashldi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___ashldi3+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __ashldi3 ();
+int
+main ()
+{
+return __ashldi3 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___ashldi3=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___ashldi3=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___ashldi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___ashldi3" >&6; }
+if test $ac_cv_lib_gcc___ashldi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___ASHLDI3 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __ashrdi3 in -lgcc" >&5
+echo $ECHO_N "checking for __ashrdi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___ashrdi3+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __ashrdi3 ();
+int
+main ()
+{
+return __ashrdi3 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___ashrdi3=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___ashrdi3=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___ashrdi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___ashrdi3" >&6; }
+if test $ac_cv_lib_gcc___ashrdi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___ASHRDI3 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __divdi3 in -lgcc" >&5
+echo $ECHO_N "checking for __divdi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___divdi3+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __divdi3 ();
+int
+main ()
+{
+return __divdi3 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___divdi3=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___divdi3=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___divdi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___divdi3" >&6; }
+if test $ac_cv_lib_gcc___divdi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___DIVDI3 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __fixdfdi in -lgcc" >&5
+echo $ECHO_N "checking for __fixdfdi in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___fixdfdi+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __fixdfdi ();
+int
+main ()
+{
+return __fixdfdi ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___fixdfdi=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___fixdfdi=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___fixdfdi" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___fixdfdi" >&6; }
+if test $ac_cv_lib_gcc___fixdfdi = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___FIXDFDI 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __fixsfdi in -lgcc" >&5
+echo $ECHO_N "checking for __fixsfdi in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___fixsfdi+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __fixsfdi ();
+int
+main ()
+{
+return __fixsfdi ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___fixsfdi=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___fixsfdi=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___fixsfdi" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___fixsfdi" >&6; }
+if test $ac_cv_lib_gcc___fixsfdi = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___FIXSFDI 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __floatdidf in -lgcc" >&5
+echo $ECHO_N "checking for __floatdidf in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___floatdidf+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __floatdidf ();
+int
+main ()
+{
+return __floatdidf ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___floatdidf=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___floatdidf=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___floatdidf" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___floatdidf" >&6; }
+if test $ac_cv_lib_gcc___floatdidf = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___FLOATDIDF 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __lshrdi3 in -lgcc" >&5
+echo $ECHO_N "checking for __lshrdi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___lshrdi3+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __lshrdi3 ();
+int
+main ()
+{
+return __lshrdi3 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___lshrdi3=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___lshrdi3=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___lshrdi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___lshrdi3" >&6; }
+if test $ac_cv_lib_gcc___lshrdi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___LSHRDI3 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __moddi3 in -lgcc" >&5
+echo $ECHO_N "checking for __moddi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___moddi3+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __moddi3 ();
+int
+main ()
+{
+return __moddi3 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___moddi3=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___moddi3=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___moddi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___moddi3" >&6; }
+if test $ac_cv_lib_gcc___moddi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___MODDI3 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __udivdi3 in -lgcc" >&5
+echo $ECHO_N "checking for __udivdi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___udivdi3+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __udivdi3 ();
+int
+main ()
+{
+return __udivdi3 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___udivdi3=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___udivdi3=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___udivdi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___udivdi3" >&6; }
+if test $ac_cv_lib_gcc___udivdi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___UDIVDI3 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __umoddi3 in -lgcc" >&5
+echo $ECHO_N "checking for __umoddi3 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___umoddi3+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __umoddi3 ();
+int
+main ()
+{
+return __umoddi3 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___umoddi3=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___umoddi3=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___umoddi3" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___umoddi3" >&6; }
+if test $ac_cv_lib_gcc___umoddi3 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___UMODDI3 1
+_ACEOF
+
+fi
+
+
+  { echo "$as_me:$LINENO: checking for __main in -lgcc" >&5
+echo $ECHO_N "checking for __main in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___main+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __main ();
+int
+main ()
+{
+return __main ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___main=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___main=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___main" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___main" >&6; }
+if test $ac_cv_lib_gcc___main = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___MAIN 1
+_ACEOF
+
+fi
+
+  { echo "$as_me:$LINENO: checking for __cmpdi2 in -lgcc" >&5
+echo $ECHO_N "checking for __cmpdi2 in -lgcc... $ECHO_C" >&6; }
+if test "${ac_cv_lib_gcc___cmpdi2+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lgcc  $LIBS"
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char __cmpdi2 ();
+int
+main ()
+{
+return __cmpdi2 ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_lib_gcc___cmpdi2=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_lib_gcc___cmpdi2=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_lib_gcc___cmpdi2" >&5
+echo "${ECHO_T}$ac_cv_lib_gcc___cmpdi2" >&6; }
+if test $ac_cv_lib_gcc___cmpdi2 = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE___CMPDI2 1
+_ACEOF
+
+fi
+
+fi
+
+
+{ echo "$as_me:$LINENO: checking for isnan in <math.h>" >&5
+echo $ECHO_N "checking for isnan in <math.h>... $ECHO_C" >&6; }
+if test "${ac_cv_func_isnan_in_math_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <math.h>
+int
+main ()
+{
+float f; isnan(f);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_isnan_in_math_h=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_isnan_in_math_h=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_isnan_in_math_h" >&5
+echo "${ECHO_T}$ac_cv_func_isnan_in_math_h" >&6; }
+
+
+if test "$ac_cv_func_isnan_in_math_h" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_ISNAN_IN_MATH_H 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for isnan in <cmath>" >&5
+echo $ECHO_N "checking for isnan in <cmath>... $ECHO_C" >&6; }
+if test "${ac_cv_func_isnan_in_cmath+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <cmath>
+int
+main ()
+{
+float f; isnan(f);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_isnan_in_cmath=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_isnan_in_cmath=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_isnan_in_cmath" >&5
+echo "${ECHO_T}$ac_cv_func_isnan_in_cmath" >&6; }
+
+if test "$ac_cv_func_isnan_in_cmath" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_ISNAN_IN_CMATH 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for std::isnan in <cmath>" >&5
+echo $ECHO_N "checking for std::isnan in <cmath>... $ECHO_C" >&6; }
+if test "${ac_cv_func_std_isnan_in_cmath+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <cmath>
+int
+main ()
+{
+float f; std::isnan(f);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_std_isnan_in_cmath=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_std_isnan_in_cmath=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_std_isnan_in_cmath" >&5
+echo "${ECHO_T}$ac_cv_func_std_isnan_in_cmath" >&6; }
+
+if test "$ac_cv_func_std_isnan_in_cmath" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_STD_ISNAN_IN_CMATH 1
+_ACEOF
+
+fi
+
+
+{ echo "$as_me:$LINENO: checking for isinf in <math.h>" >&5
+echo $ECHO_N "checking for isinf in <math.h>... $ECHO_C" >&6; }
+if test "${ac_cv_func_isinf_in_math_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <math.h>
+int
+main ()
+{
+float f; isinf(f);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_isinf_in_math_h=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_isinf_in_math_h=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_isinf_in_math_h" >&5
+echo "${ECHO_T}$ac_cv_func_isinf_in_math_h" >&6; }
+
+if test "$ac_cv_func_isinf_in_math_h" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_ISINF_IN_MATH_H 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for isinf in <cmath>" >&5
+echo $ECHO_N "checking for isinf in <cmath>... $ECHO_C" >&6; }
+if test "${ac_cv_func_isinf_in_cmath+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <cmath>
+int
+main ()
+{
+float f; isinf(f);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_isinf_in_cmath=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_isinf_in_cmath=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_isinf_in_cmath" >&5
+echo "${ECHO_T}$ac_cv_func_isinf_in_cmath" >&6; }
+
+if test "$ac_cv_func_isinf_in_cmath" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_ISINF_IN_CMATH 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for std::isinf in <cmath>" >&5
+echo $ECHO_N "checking for std::isinf in <cmath>... $ECHO_C" >&6; }
+if test "${ac_cv_func_std_isinf_in_cmath+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <cmath>
+int
+main ()
+{
+float f; std::isinf(f)}
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_std_isinf_in_cmath=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_std_isinf_in_cmath=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_std_isinf_in_cmath" >&5
+echo "${ECHO_T}$ac_cv_func_std_isinf_in_cmath" >&6; }
+
+if test "$ac_cv_func_std_isinf_in_cmath" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_STD_ISINF_IN_CMATH 1
+_ACEOF
+
+fi
+
+{ echo "$as_me:$LINENO: checking for finite in <ieeefp.h>" >&5
+echo $ECHO_N "checking for finite in <ieeefp.h>... $ECHO_C" >&6; }
+if test "${ac_cv_func_finite_in_ieeefp_h+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+   cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <ieeefp.h>
+int
+main ()
+{
+float f; finite(f);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_finite_in_ieeefp_h=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_cv_func_finite_in_ieeefp_h=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_finite_in_ieeefp_h" >&5
+echo "${ECHO_T}$ac_cv_func_finite_in_ieeefp_h" >&6; }
+
+if test "$ac_cv_func_finite_in_ieeefp_h" = "yes" ; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_FINITE_IN_IEEEFP_H 1
+_ACEOF
+
+fi
+
+
+
+if test "$llvm_cv_platform_type" = "Unix" ; then
+
+
+for ac_header in stdlib.h unistd.h
+do
+as_ac_Header=`echo "ac_cv_header_$ac_header" | $as_tr_sh`
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  { echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+else
+  # Is the header compilable?
+{ echo "$as_me:$LINENO: checking $ac_header usability" >&5
+echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+#include <$ac_header>
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_header_compiler=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	ac_header_compiler=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5
+echo "${ECHO_T}$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ echo "$as_me:$LINENO: checking $ac_header presence" >&5
+echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; }
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <$ac_header>
+_ACEOF
+if { (ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } >/dev/null; then
+  if test -s conftest.err; then
+    ac_cpp_err=$ac_c_preproc_warn_flag
+    ac_cpp_err=$ac_cpp_err$ac_c_werror_flag
+  else
+    ac_cpp_err=
+  fi
+else
+  ac_cpp_err=yes
+fi
+if test -z "$ac_cpp_err"; then
+  ac_header_preproc=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+  ac_header_preproc=no
+fi
+
+rm -f conftest.err conftest.$ac_ext
+{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5
+echo "${ECHO_T}$ac_header_preproc" >&6; }
+
+# So?  What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in
+  yes:no: )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5
+echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;}
+    ac_header_preproc=yes
+    ;;
+  no:yes:* )
+    { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5
+echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     check for missing prerequisite headers?" >&5
+echo "$as_me: WARNING: $ac_header:     check for missing prerequisite headers?" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5
+echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&5
+echo "$as_me: WARNING: $ac_header:     section \"Present But Cannot Be Compiled\"" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5
+echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;}
+    { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5
+echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;}
+    ( cat <<\_ASBOX
+## ----------------------------------- ##
+## Report this to llvmbugs@cs.uiuc.edu ##
+## ----------------------------------- ##
+_ASBOX
+     ) | sed "s/^/$as_me: WARNING:     /" >&2
+    ;;
+esac
+{ echo "$as_me:$LINENO: checking for $ac_header" >&5
+echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; }
+if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  eval "$as_ac_Header=\$ac_header_preproc"
+fi
+ac_res=`eval echo '${'$as_ac_Header'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+
+fi
+if test `eval echo '${'$as_ac_Header'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
+for ac_func in getpagesize
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+{ echo "$as_me:$LINENO: checking for working mmap" >&5
+echo $ECHO_N "checking for working mmap... $ECHO_C" >&6; }
+if test "${ac_cv_func_mmap_fixed_mapped+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test "$cross_compiling" = yes; then
+  ac_cv_func_mmap_fixed_mapped=no
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+$ac_includes_default
+/* malloc might have been renamed as rpl_malloc. */
+#undef malloc
+
+/* Thanks to Mike Haertel and Jim Avera for this test.
+   Here is a matrix of mmap possibilities:
+	mmap private not fixed
+	mmap private fixed at somewhere currently unmapped
+	mmap private fixed at somewhere already mapped
+	mmap shared not fixed
+	mmap shared fixed at somewhere currently unmapped
+	mmap shared fixed at somewhere already mapped
+   For private mappings, we should verify that changes cannot be read()
+   back from the file, nor mmap's back from the file at a different
+   address.  (There have been systems where private was not correctly
+   implemented like the infamous i386 svr4.0, and systems where the
+   VM page cache was not coherent with the file system buffer cache
+   like early versions of FreeBSD and possibly contemporary NetBSD.)
+   For shared mappings, we should conversely verify that changes get
+   propagated back to all the places they're supposed to be.
+
+   Grep wants private fixed already mapped.
+   The main things grep needs to know about mmap are:
+   * does it exist and is it safe to write into the mmap'd area
+   * how to use it (BSD variants)  */
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#if !STDC_HEADERS && !HAVE_STDLIB_H
+char *malloc ();
+#endif
+
+/* This mess was copied from the GNU getpagesize.h.  */
+#if !HAVE_GETPAGESIZE
+/* Assume that all systems that can run configure have sys/param.h.  */
+# if !HAVE_SYS_PARAM_H
+#  define HAVE_SYS_PARAM_H 1
+# endif
+
+# ifdef _SC_PAGESIZE
+#  define getpagesize() sysconf(_SC_PAGESIZE)
+# else /* no _SC_PAGESIZE */
+#  if HAVE_SYS_PARAM_H
+#   include <sys/param.h>
+#   ifdef EXEC_PAGESIZE
+#    define getpagesize() EXEC_PAGESIZE
+#   else /* no EXEC_PAGESIZE */
+#    ifdef NBPG
+#     define getpagesize() NBPG * CLSIZE
+#     ifndef CLSIZE
+#      define CLSIZE 1
+#     endif /* no CLSIZE */
+#    else /* no NBPG */
+#     ifdef NBPC
+#      define getpagesize() NBPC
+#     else /* no NBPC */
+#      ifdef PAGESIZE
+#       define getpagesize() PAGESIZE
+#      endif /* PAGESIZE */
+#     endif /* no NBPC */
+#    endif /* no NBPG */
+#   endif /* no EXEC_PAGESIZE */
+#  else /* no HAVE_SYS_PARAM_H */
+#   define getpagesize() 8192	/* punt totally */
+#  endif /* no HAVE_SYS_PARAM_H */
+# endif /* no _SC_PAGESIZE */
+
+#endif /* no HAVE_GETPAGESIZE */
+
+int
+main ()
+{
+  char *data, *data2, *data3;
+  int i, pagesize;
+  int fd;
+
+  pagesize = getpagesize ();
+
+  /* First, make a file with some known garbage in it. */
+  data = (char *) malloc (pagesize);
+  if (!data)
+    return 1;
+  for (i = 0; i < pagesize; ++i)
+    *(data + i) = rand ();
+  umask (0);
+  fd = creat ("conftest.mmap", 0600);
+  if (fd < 0)
+    return 1;
+  if (write (fd, data, pagesize) != pagesize)
+    return 1;
+  close (fd);
+
+  /* Next, try to mmap the file at a fixed address which already has
+     something else allocated at it.  If we can, also make sure that
+     we see the same garbage.  */
+  fd = open ("conftest.mmap", O_RDWR);
+  if (fd < 0)
+    return 1;
+  data2 = (char *) malloc (2 * pagesize);
+  if (!data2)
+    return 1;
+  data2 += (pagesize - ((long int) data2 & (pagesize - 1))) & (pagesize - 1);
+  if (data2 != mmap (data2, pagesize, PROT_READ | PROT_WRITE,
+		     MAP_PRIVATE | MAP_FIXED, fd, 0L))
+    return 1;
+  for (i = 0; i < pagesize; ++i)
+    if (*(data + i) != *(data2 + i))
+      return 1;
+
+  /* Finally, make sure that changes to the mapped area do not
+     percolate back to the file as seen by read().  (This is a bug on
+     some variants of i386 svr4.0.)  */
+  for (i = 0; i < pagesize; ++i)
+    *(data2 + i) = *(data2 + i) + 1;
+  data3 = (char *) malloc (pagesize);
+  if (!data3)
+    return 1;
+  if (read (fd, data3, pagesize) != pagesize)
+    return 1;
+  for (i = 0; i < pagesize; ++i)
+    if (*(data + i) != *(data3 + i))
+      return 1;
+  close (fd);
+  return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_mmap_fixed_mapped=yes
+else
+  echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_func_mmap_fixed_mapped=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_mmap_fixed_mapped" >&5
+echo "${ECHO_T}$ac_cv_func_mmap_fixed_mapped" >&6; }
+if test $ac_cv_func_mmap_fixed_mapped = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_MMAP 1
+_ACEOF
+
+fi
+rm -f conftest.mmap
+
+  { echo "$as_me:$LINENO: checking for mmap of files" >&5
+echo $ECHO_N "checking for mmap of files... $ECHO_C" >&6; }
+if test "${ac_cv_func_mmap_file+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  if test "$cross_compiling" = yes; then
+  ac_cv_func_mmap_file=no
+else
+  cat >conftest.$ac_ext <<_ACEOF
+
+    /* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+int
+main ()
+{
+
+  int fd;
+  fd = creat ("foo",0777);
+  fd = (int) mmap (0, 1, PROT_READ, MAP_SHARED, fd, 0);
+  unlink ("foo");
+  return (fd != (int) MAP_FAILED);
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && { ac_try='./conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_func_mmap_file=yes
+else
+  echo "$as_me: program exited with status $ac_status" >&5
+echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+( exit $ac_status )
+ac_cv_func_mmap_file=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext conftest.$ac_objext conftest.$ac_ext
+fi
+
+
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_func_mmap_file" >&5
+echo "${ECHO_T}$ac_cv_func_mmap_file" >&6; }
+if test "$ac_cv_func_mmap_file" = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_MMAP_FILE
+_ACEOF
+
+   MMAP_FILE=yes
+
+fi
+
+  { echo "$as_me:$LINENO: checking if /dev/zero is needed for mmap" >&5
+echo $ECHO_N "checking if /dev/zero is needed for mmap... $ECHO_C" >&6; }
+if test "${ac_cv_need_dev_zero_for_mmap+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  if test "$llvm_cv_os_type" = "Interix" ; then
+   ac_cv_need_dev_zero_for_mmap=yes
+ else
+   ac_cv_need_dev_zero_for_mmap=no
+ fi
+
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_need_dev_zero_for_mmap" >&5
+echo "${ECHO_T}$ac_cv_need_dev_zero_for_mmap" >&6; }
+if test "$ac_cv_need_dev_zero_for_mmap" = yes; then
+
+cat >>confdefs.h <<\_ACEOF
+#define NEED_DEV_ZERO_FOR_MMAP 1
+_ACEOF
+
+fi
+
+  if test "$ac_cv_func_mmap_fixed_mapped" = "no"
+  then
+    { echo "$as_me:$LINENO: WARNING: mmap() of a fixed address required but not supported" >&5
+echo "$as_me: WARNING: mmap() of a fixed address required but not supported" >&2;}
+  fi
+  if test "$ac_cv_func_mmap_file" = "no"
+  then
+    { echo "$as_me:$LINENO: WARNING: mmap() of files required but not found" >&5
+echo "$as_me: WARNING: mmap() of files required but not found" >&2;}
+  fi
+fi
+
+{ echo "$as_me:$LINENO: checking for GCC atomic builtins" >&5
+echo $ECHO_N "checking for GCC atomic builtins... $ECHO_C" >&6; }
+ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+int main() {
+        volatile unsigned long val = 1;
+        __sync_synchronize();
+        __sync_val_compare_and_swap(&val, 1, 0);
+        __sync_add_and_fetch(&val, 1);
+        __sync_sub_and_fetch(&val, 1);
+        return 0;
+      }
+
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  { echo "$as_me:$LINENO: result: yes" >&5
+echo "${ECHO_T}yes" >&6; }
+
+cat >>confdefs.h <<\_ACEOF
+#define LLVM_MULTITHREADED 1
+_ACEOF
+
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	{ echo "$as_me:$LINENO: result: no" >&5
+echo "${ECHO_T}no" >&6; }
+
+cat >>confdefs.h <<\_ACEOF
+#define LLVM_MULTITHREADED 0
+_ACEOF
+
+  { echo "$as_me:$LINENO: WARNING: LLVM will be built thread-unsafe because atomic builtins are missing" >&5
+echo "$as_me: WARNING: LLVM will be built thread-unsafe because atomic builtins are missing" >&2;}
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+
+
+if test "$llvm_cv_os_type" = "Linux" -a "$llvm_cv_target_arch" = "x86_64" ; then
+  { echo "$as_me:$LINENO: checking for 32-bit userspace on 64-bit system" >&5
+echo $ECHO_N "checking for 32-bit userspace on 64-bit system... $ECHO_C" >&6; }
+if test "${llvm_cv_linux_mixed+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#ifndef __x86_64__
+       error: Not x86-64 even if uname says so!
+      #endif
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  llvm_cv_linux_mixed=no
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	llvm_cv_linux_mixed=yes
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_linux_mixed" >&5
+echo "${ECHO_T}$llvm_cv_linux_mixed" >&6; }
+
+  if test "$llvm_cv_linux_mixed" = "yes"; then
+    llvm_cv_target_arch="x86"
+    ARCH="x86"
+  fi
+fi
+
+
+for ac_func in __dso_handle
+do
+as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh`
+{ echo "$as_me:$LINENO: checking for $ac_func" >&5
+echo $ECHO_N "checking for $ac_func... $ECHO_C" >&6; }
+if { as_var=$as_ac_var; eval "test \"\${$as_var+set}\" = set"; }; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+/* Define $ac_func to an innocuous variant, in case <limits.h> declares $ac_func.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $ac_func innocuous_$ac_func
+
+/* System header to define __stub macros and hopefully few prototypes,
+    which can conflict with char $ac_func (); below.
+    Prefer <limits.h> to <assert.h> if __STDC__ is defined, since
+    <limits.h> exists even on freestanding compilers.  */
+
+#ifdef __STDC__
+# include <limits.h>
+#else
+# include <assert.h>
+#endif
+
+#undef $ac_func
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $ac_func ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$ac_func || defined __stub___$ac_func
+choke me
+#endif
+
+int
+main ()
+{
+return $ac_func ();
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  eval "$as_ac_var=yes"
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	eval "$as_ac_var=no"
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+ac_res=`eval echo '${'$as_ac_var'}'`
+	       { echo "$as_me:$LINENO: result: $ac_res" >&5
+echo "${ECHO_T}$ac_res" >&6; }
+if test `eval echo '${'$as_ac_var'}'` = yes; then
+  cat >>confdefs.h <<_ACEOF
+#define `echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
+
+{ echo "$as_me:$LINENO: checking whether llvm-gcc is dragonegg" >&5
+echo $ECHO_N "checking whether llvm-gcc is dragonegg... $ECHO_C" >&6; }
+if test "${llvm_cv_llvmgcc_dragonegg+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  llvm_cv_llvmgcc_dragonegg="no"
+if test -n "$LLVMGCC" ; then
+  cp /dev/null conftest.c
+  $LLVMGCC -fplugin-arg-dragonegg-emit-ir -S -o - conftest.c > /dev/null 2>&1
+  if test $? -eq 0 ; then
+    llvm_cv_llvmgcc_dragonegg="yes"
+  fi
+  rm conftest.c
+fi
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_llvmgcc_dragonegg" >&5
+echo "${ECHO_T}$llvm_cv_llvmgcc_dragonegg" >&6; }
+
+if test "$llvm_cv_llvmgcc_dragonegg" = "yes" ; then
+  LLVMCC_EMITIR_FLAG="-fplugin-arg-dragonegg-emit-ir"
+  LLVMCC_DISABLEOPT_FLAGS="-fplugin-arg-dragonegg-disable-llvm-optzns"
+else
+  LLVMCC_EMITIR_FLAG="-emit-llvm"
+  LLVMCC_DISABLEOPT_FLAGS="-mllvm -disable-llvm-optzns"
+fi
+
+
+
+{ echo "$as_me:$LINENO: checking whether llvm-gcc is sane" >&5
+echo $ECHO_N "checking whether llvm-gcc is sane... $ECHO_C" >&6; }
+if test "${llvm_cv_llvmgcc_sanity+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  llvm_cv_llvmgcc_sanity="no"
+if test -n "$LLVMGCC" ; then
+  cp /dev/null conftest.c
+  $LLVMGCC "$LLVMCC_EMITIR_FLAG" -S -o - conftest.c | \
+      grep 'target datalayout =' > /dev/null 2>&1
+  if test $? -eq 0 ; then
+    llvm_cv_llvmgcc_sanity="yes"
+  fi
+  rm conftest.c
+fi
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_llvmgcc_sanity" >&5
+echo "${ECHO_T}$llvm_cv_llvmgcc_sanity" >&6; }
+
+if test "$llvm_cv_llvmgcc_sanity" = "yes" ; then
+  { echo "$as_me:$LINENO: checking llvm-gcc component support" >&5
+echo $ECHO_N "checking llvm-gcc component support... $ECHO_C" >&6; }
+  llvmcc1path=`$LLVMGCC --print-prog-name=cc1`
+  LLVMCC1=$llvmcc1path
+
+  llvmcc1pluspath=`$LLVMGCC --print-prog-name=cc1plus`
+  LLVMCC1PLUS=$llvmcc1pluspath
+
+  llvmgccdir=`echo "$llvmcc1path" | sed 's,/libexec/.*,,'`
+  LLVMGCCDIR=$llvmgccdir
+
+  llvmgcclangs=`$LLVMGCC -v --help 2>&1 | grep '^Configured with:' | sed 's/^.*--enable-languages=\([^ ]*\).*/\1/'`
+  LLVMGCC_LANGS=$llvmgcclangs
+
+  LLVMGCC_DRAGONEGG=$llvm_cv_llvmgcc_dragonegg
+
+
+  { echo "$as_me:$LINENO: result: ok" >&5
+echo "${ECHO_T}ok" >&6; }
+fi
+
+SHLIBEXT=$libltdl_cv_shlibext
+
+
+SHLIBPATH_VAR=$libltdl_cv_shlibpath_var
+
+
+# Translate the various configuration directories and other basic
+# information into substitutions that will end up in Makefile.config.in
+# that these configured values can be used by the makefiles
+if test "${prefix}" = "NONE" ; then
+  prefix="/usr/local"
+fi
+eval LLVM_PREFIX="${prefix}";
+eval LLVM_BINDIR="${prefix}/bin";
+eval LLVM_LIBDIR="${prefix}/lib";
+eval LLVM_DATADIR="${prefix}/share/llvm";
+eval LLVM_DOCSDIR="${prefix}/share/doc/llvm";
+eval LLVM_ETCDIR="${prefix}/etc/llvm";
+eval LLVM_INCLUDEDIR="${prefix}/include";
+eval LLVM_INFODIR="${prefix}/info";
+eval LLVM_MANDIR="${prefix}/man";
+LLVM_CONFIGTIME=`date`
+
+
+
+
+
+
+
+
+
+
+
+# Place the various directores into the config.h file as #defines so that we
+# can know about the installation paths within LLVM.
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_PREFIX "$LLVM_PREFIX"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_BINDIR "$LLVM_BINDIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_LIBDIR "$LLVM_LIBDIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_DATADIR "$LLVM_DATADIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_DOCSDIR "$LLVM_DOCSDIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_ETCDIR "$LLVM_ETCDIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_INCLUDEDIR "$LLVM_INCLUDEDIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_INFODIR "$LLVM_INFODIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_MANDIR "$LLVM_MANDIR"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_CONFIGTIME "$LLVM_CONFIGTIME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define LLVM_HOSTTRIPLE "$host"
+_ACEOF
+
+
+# Determine which bindings to build.
+if test "$BINDINGS_TO_BUILD" = auto ; then
+  BINDINGS_TO_BUILD=""
+  if test "x$OCAMLC" != x -a "x$OCAMLDEP" != x ; then
+    BINDINGS_TO_BUILD="ocaml $BINDINGS_TO_BUILD"
+  fi
+fi
+BINDINGS_TO_BUILD=$BINDINGS_TO_BUILD
+
+
+# This isn't really configurey, but it avoids having to repeat the list in
+# other files.
+ALL_BINDINGS=ocaml
+
+
+# Do any work necessary to ensure that bindings have what they need.
+binding_prereqs_failed=0
+for a_binding in $BINDINGS_TO_BUILD ; do
+  case "$a_binding" in
+  ocaml)
+    if test "x$OCAMLC" = x ; then
+      { echo "$as_me:$LINENO: WARNING: --enable-bindings=ocaml specified, but ocamlc not found. Try configure OCAMLC=/path/to/ocamlc" >&5
+echo "$as_me: WARNING: --enable-bindings=ocaml specified, but ocamlc not found. Try configure OCAMLC=/path/to/ocamlc" >&2;}
+      binding_prereqs_failed=1
+    fi
+    if test "x$OCAMLDEP" = x ; then
+      { echo "$as_me:$LINENO: WARNING: --enable-bindings=ocaml specified, but ocamldep not found. Try configure OCAMLDEP=/path/to/ocamldep" >&5
+echo "$as_me: WARNING: --enable-bindings=ocaml specified, but ocamldep not found. Try configure OCAMLDEP=/path/to/ocamldep" >&2;}
+      binding_prereqs_failed=1
+    fi
+    if test "x$OCAMLOPT" = x ; then
+      { echo "$as_me:$LINENO: WARNING: --enable-bindings=ocaml specified, but ocamlopt not found. Try configure OCAMLOPT=/path/to/ocamlopt" >&5
+echo "$as_me: WARNING: --enable-bindings=ocaml specified, but ocamlopt not found. Try configure OCAMLOPT=/path/to/ocamlopt" >&2;}
+          fi
+    if test "x$with_ocaml_libdir" != xauto ; then
+      OCAML_LIBDIR=$with_ocaml_libdir
+
+    else
+      ocaml_stdlib="`"$OCAMLC" -where`"
+      if test "$LLVM_PREFIX" '<' "$ocaml_stdlib" -a "$ocaml_stdlib" '<' "$LLVM_PREFIX~"
+      then
+        # ocaml stdlib is beneath our prefix; use stdlib
+        OCAML_LIBDIR=$ocaml_stdlib
+
+      else
+        # ocaml stdlib is outside our prefix; use libdir/ocaml
+        OCAML_LIBDIR=$LLVM_LIBDIR/ocaml
+
+      fi
+    fi
+    ;;
+  esac
+done
+if test "$binding_prereqs_failed" = 1 ; then
+  { { echo "$as_me:$LINENO: error: Prequisites for bindings not satisfied. Fix them or use configure --disable-bindings." >&5
+echo "$as_me: error: Prequisites for bindings not satisfied. Fix them or use configure --disable-bindings." >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+{ echo "$as_me:$LINENO: checking for compiler -fvisibility-inlines-hidden option" >&5
+echo $ECHO_N "checking for compiler -fvisibility-inlines-hidden option... $ECHO_C" >&6; }
+if test "${llvm_cv_cxx_visibility_inlines_hidden+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+   ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+  oldcxxflags="$CXXFLAGS"
+  CXXFLAGS="$CXXFLAGS -fvisibility-inlines-hidden"
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext
+if { (ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_compile") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_cxx_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest.$ac_objext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  llvm_cv_cxx_visibility_inlines_hidden=yes
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+	llvm_cv_cxx_visibility_inlines_hidden=no
+fi
+
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  CXXFLAGS="$oldcxxflags"
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+fi
+{ echo "$as_me:$LINENO: result: $llvm_cv_cxx_visibility_inlines_hidden" >&5
+echo "${ECHO_T}$llvm_cv_cxx_visibility_inlines_hidden" >&6; }
+if test "$llvm_cv_cxx_visibility_inlines_hidden" = yes ; then
+  ENABLE_VISIBILITY_INLINES_HIDDEN=1
+
+else
+  ENABLE_VISIBILITY_INLINES_HIDDEN=0
+
+fi
+
+
+if test "$llvm_cv_link_use_r" = "yes" ; then
+  RPATH="-Wl,-R"
+else
+  RPATH="-Wl,-rpath"
+fi
+
+
+if test "$llvm_cv_link_use_export_dynamic" = "yes" ; then
+  RDYNAMIC="-Wl,-export-dynamic"
+else
+  RDYNAMIC=""
+fi
+
+
+
+ac_config_headers="$ac_config_headers include/llvm/Config/config.h include/llvm/Config/llvm-config.h"
+
+
+
+
+
+
+ac_config_files="$ac_config_files include/llvm/Config/Targets.def"
+
+ac_config_files="$ac_config_files include/llvm/Config/AsmPrinters.def"
+
+ac_config_files="$ac_config_files include/llvm/Config/AsmParsers.def"
+
+ac_config_files="$ac_config_files include/llvm/Config/Disassemblers.def"
+
+ac_config_headers="$ac_config_headers include/llvm/Support/DataTypes.h"
+
+
+ac_config_files="$ac_config_files Makefile.config"
+
+
+ac_config_files="$ac_config_files llvm.spec"
+
+
+ac_config_files="$ac_config_files tools/llvmc/src/Base.td"
+
+
+ac_config_files="$ac_config_files tools/llvm-config/llvm-config.in"
+
+
+ac_config_commands="$ac_config_commands setup"
+
+ac_config_commands="$ac_config_commands Makefile"
+
+
+ac_config_commands="$ac_config_commands Makefile.common"
+
+
+ac_config_commands="$ac_config_commands examples/Makefile"
+
+
+ac_config_commands="$ac_config_commands lib/Makefile"
+
+
+ac_config_commands="$ac_config_commands runtime/Makefile"
+
+
+ac_config_commands="$ac_config_commands test/Makefile"
+
+
+ac_config_commands="$ac_config_commands test/Makefile.tests"
+
+
+ac_config_commands="$ac_config_commands unittests/Makefile"
+
+
+ac_config_commands="$ac_config_commands tools/Makefile"
+
+
+ac_config_commands="$ac_config_commands utils/Makefile"
+
+
+ac_config_commands="$ac_config_commands projects/Makefile"
+
+
+ac_config_commands="$ac_config_commands bindings/Makefile"
+
+
+ac_config_commands="$ac_config_commands bindings/ocaml/Makefile.ocaml"
+
+
+
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems.  If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, we kill variables containing newlines.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(
+  for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5
+echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      *) $as_unset $ac_var ;;
+      esac ;;
+    esac
+  done
+
+  (set) 2>&1 |
+    case $as_nl`(ac_space=' '; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      # `set' does not quote correctly, so add quotes (double-quote
+      # substitution turns \\\\ into \\, and sed turns \\ into \).
+      sed -n \
+	"s/'/'\\\\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+      ;; #(
+    *)
+      # `set' quotes correctly as required by POSIX, so do not add quotes.
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+) |
+  sed '
+     /^ac_cv_env_/b end
+     t clear
+     :clear
+     s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+     t end
+     s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+     :end' >>confcache
+if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
+  if test -w "$cache_file"; then
+    test "x$cache_file" != "x/dev/null" &&
+      { echo "$as_me:$LINENO: updating cache $cache_file" >&5
+echo "$as_me: updating cache $cache_file" >&6;}
+    cat confcache >$cache_file
+  else
+    { echo "$as_me:$LINENO: not updating unwritable cache $cache_file" >&5
+echo "$as_me: not updating unwritable cache $cache_file" >&6;}
+  fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+DEFS=-DHAVE_CONFIG_H
+
+ac_libobjs=
+ac_ltlibobjs=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+  # 1. Remove the extension, and $U if already installed.
+  ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
+  ac_i=`echo "$ac_i" | sed "$ac_script"`
+  # 2. Prepend LIBOBJDIR.  When used with automake>=1.10 LIBOBJDIR
+  #    will be set to the directory where LIBOBJS objects are built.
+  ac_libobjs="$ac_libobjs \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+  ac_ltlibobjs="$ac_ltlibobjs \${LIBOBJDIR}$ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+if test -z "${INSTALL_LTDL_TRUE}" && test -z "${INSTALL_LTDL_FALSE}"; then
+  { { echo "$as_me:$LINENO: error: conditional \"INSTALL_LTDL\" was never defined.
+Usually this means the macro was only invoked conditionally." >&5
+echo "$as_me: error: conditional \"INSTALL_LTDL\" was never defined.
+Usually this means the macro was only invoked conditionally." >&2;}
+   { (exit 1); exit 1; }; }
+fi
+if test -z "${CONVENIENCE_LTDL_TRUE}" && test -z "${CONVENIENCE_LTDL_FALSE}"; then
+  { { echo "$as_me:$LINENO: error: conditional \"CONVENIENCE_LTDL\" was never defined.
+Usually this means the macro was only invoked conditionally." >&5
+echo "$as_me: error: conditional \"CONVENIENCE_LTDL\" was never defined.
+Usually this means the macro was only invoked conditionally." >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+: ${CONFIG_STATUS=./config.status}
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5
+echo "$as_me: creating $CONFIG_STATUS" >&6;}
+cat >$CONFIG_STATUS <<_ACEOF
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+SHELL=\${CONFIG_SHELL-$SHELL}
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+## --------------------- ##
+## M4sh Initialization.  ##
+## --------------------- ##
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
+fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
+
+
+# PATH needs CR
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  echo "#! /bin/sh" >conf$$.sh
+  echo  "exit 0"   >>conf$$.sh
+  chmod +x conf$$.sh
+  if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+    PATH_SEPARATOR=';'
+  else
+    PATH_SEPARATOR=:
+  fi
+  rm -f conf$$.sh
+fi
+
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+  as_unset=unset
+else
+  as_unset=false
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+as_nl='
+'
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+case $0 in
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  { (exit 1); exit 1; }
+fi
+
+# Work around bugs in pre-3.0 UWIN ksh.
+for as_var in ENV MAIL MAILPATH
+do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+for as_var in \
+  LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
+  LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
+  LC_TELEPHONE LC_TIME
+do
+  if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
+    eval $as_var=C; export $as_var
+  else
+    ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+  fi
+done
+
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# CDPATH.
+$as_unset CDPATH
+
+
+
+  as_lineno_1=$LINENO
+  as_lineno_2=$LINENO
+  test "x$as_lineno_1" != "x$as_lineno_2" &&
+  test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || {
+
+  # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+  # uniformly replaced by the line number.  The first 'sed' inserts a
+  # line-number line after each line using $LINENO; the second 'sed'
+  # does the real work.  The second script uses 'N' to pair each
+  # line-number line with the line containing $LINENO, and appends
+  # trailing '-' during substitution so that $LINENO is not a special
+  # case at line end.
+  # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+  # scripts with optimization help from Paolo Bonzini.  Blame Lee
+  # E. McMahon (1931-1989) for sed's syntax.  :-)
+  sed -n '
+    p
+    /[$]LINENO/=
+  ' <$as_myself |
+    sed '
+      s/[$]LINENO.*/&-/
+      t lineno
+      b
+      :lineno
+      N
+      :loop
+      s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+      t loop
+      s/-\n.*//
+    ' >$as_me.lineno &&
+  chmod +x "$as_me.lineno" ||
+    { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+   { (exit 1); exit 1; }; }
+
+  # Don't try to exec as it changes $[0], causing all sort of problems
+  # (the dirname of $[0] is not the place where we might find the
+  # original and so on.  Autoconf is especially sensitive to this).
+  . "./$as_me.lineno"
+  # Exit status is that of the last command.
+  exit
+}
+
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in
+-n*)
+  case `echo 'x\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  *)   ECHO_C='\c';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir
+fi
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+  as_ln_s='ln -s'
+  # ... but there are two gotchas:
+  # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+  # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+  # In both cases, we have to default to `cp -p'.
+  ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+    as_ln_s='cp -p'
+elif ln conf$$.file conf$$ 2>/dev/null; then
+  as_ln_s=ln
+else
+  as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p=:
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+# Find out whether ``test -x'' works.  Don't use a zero-byte file, as
+# systems may use methods other than mode bits to determine executability.
+cat >conf$$.file <<_ASEOF
+#! /bin/sh
+exit 0
+_ASEOF
+chmod +x conf$$.file
+if test -x conf$$.file >/dev/null 2>&1; then
+  as_executable_p="test -x"
+else
+  as_executable_p=:
+fi
+rm -f conf$$.file
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+exec 6>&1
+
+# Save the log message, to keep $[0] and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.
+ac_log="
+This file was extended by llvm $as_me 2.9, which was
+generated by GNU Autoconf 2.60.  Invocation command line was
+
+  CONFIG_FILES    = $CONFIG_FILES
+  CONFIG_HEADERS  = $CONFIG_HEADERS
+  CONFIG_LINKS    = $CONFIG_LINKS
+  CONFIG_COMMANDS = $CONFIG_COMMANDS
+  $ $0 $@
+
+on `(hostname || uname -n) 2>/dev/null | sed 1q`
+"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<_ACEOF
+# Files that config.status was made for.
+config_files="$ac_config_files"
+config_headers="$ac_config_headers"
+config_commands="$ac_config_commands"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+ac_cs_usage="\
+\`$as_me' instantiates files from templates according to the
+current configuration.
+
+Usage: $0 [OPTIONS] [FILE]...
+
+  -h, --help       print this help, then exit
+  -V, --version    print version number, then exit
+  -q, --quiet      do not print progress messages
+  -d, --debug      don't remove temporary files
+      --recheck    update $as_me by reconfiguring in the same conditions
+  --file=FILE[:TEMPLATE]
+		   instantiate the configuration file FILE
+  --header=FILE[:TEMPLATE]
+		   instantiate the configuration header FILE
+
+Configuration files:
+$config_files
+
+Configuration headers:
+$config_headers
+
+Configuration commands:
+$config_commands
+
+Report bugs to <bug-autoconf@gnu.org>."
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+ac_cs_version="\\
+llvm config.status 2.9
+configured by $0, generated by GNU Autoconf 2.60,
+  with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
+
+Copyright (C) 2006 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+
+ac_pwd='$ac_pwd'
+srcdir='$srcdir'
+INSTALL='$INSTALL'
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+# If no file are specified by the user, then we need to provide default
+# value.  By we need to know if files were specified by the user.
+ac_need_defaults=:
+while test $# != 0
+do
+  case $1 in
+  --*=*)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
+    ac_shift=:
+    ;;
+  *)
+    ac_option=$1
+    ac_optarg=$2
+    ac_shift=shift
+    ;;
+  esac
+
+  case $ac_option in
+  # Handling of the options.
+  -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+    ac_cs_recheck=: ;;
+  --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
+    echo "$ac_cs_version"; exit ;;
+  --debug | --debu | --deb | --de | --d | -d )
+    debug=: ;;
+  --file | --fil | --fi | --f )
+    $ac_shift
+    CONFIG_FILES="$CONFIG_FILES $ac_optarg"
+    ac_need_defaults=false;;
+  --header | --heade | --head | --hea )
+    $ac_shift
+    CONFIG_HEADERS="$CONFIG_HEADERS $ac_optarg"
+    ac_need_defaults=false;;
+  --he | --h)
+    # Conflict between --help and --header
+    { echo "$as_me: error: ambiguous option: $1
+Try \`$0 --help' for more information." >&2
+   { (exit 1); exit 1; }; };;
+  --help | --hel | -h )
+    echo "$ac_cs_usage"; exit ;;
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil | --si | --s)
+    ac_cs_silent=: ;;
+
+  # This is an error.
+  -*) { echo "$as_me: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&2
+   { (exit 1); exit 1; }; } ;;
+
+  *) ac_config_targets="$ac_config_targets $1"
+     ac_need_defaults=false ;;
+
+  esac
+  shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+  exec 6>/dev/null
+  ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+if \$ac_cs_recheck; then
+  echo "running CONFIG_SHELL=$SHELL $SHELL $0 "$ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6
+  CONFIG_SHELL=$SHELL
+  export CONFIG_SHELL
+  exec $SHELL "$0"$ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+exec 5>>config.log
+{
+  echo
+  sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+  echo "$ac_log"
+} >&5
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+#
+# INIT-COMMANDS
+#
+llvm_src="${srcdir}"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+# Handling of arguments.
+for ac_config_target in $ac_config_targets
+do
+  case $ac_config_target in
+    "include/llvm/Config/config.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/Config/config.h" ;;
+    "include/llvm/Config/llvm-config.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/Config/llvm-config.h" ;;
+    "include/llvm/Config/Targets.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/Targets.def" ;;
+    "include/llvm/Config/AsmPrinters.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/AsmPrinters.def" ;;
+    "include/llvm/Config/AsmParsers.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/AsmParsers.def" ;;
+    "include/llvm/Config/Disassemblers.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/Disassemblers.def" ;;
+    "include/llvm/Support/DataTypes.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/Support/DataTypes.h" ;;
+    "Makefile.config") CONFIG_FILES="$CONFIG_FILES Makefile.config" ;;
+    "llvm.spec") CONFIG_FILES="$CONFIG_FILES llvm.spec" ;;
+    "tools/llvmc/src/Base.td") CONFIG_FILES="$CONFIG_FILES tools/llvmc/src/Base.td" ;;
+    "tools/llvm-config/llvm-config.in") CONFIG_FILES="$CONFIG_FILES tools/llvm-config/llvm-config.in" ;;
+    "setup") CONFIG_COMMANDS="$CONFIG_COMMANDS setup" ;;
+    "Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS Makefile" ;;
+    "Makefile.common") CONFIG_COMMANDS="$CONFIG_COMMANDS Makefile.common" ;;
+    "examples/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS examples/Makefile" ;;
+    "lib/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS lib/Makefile" ;;
+    "runtime/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS runtime/Makefile" ;;
+    "test/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS test/Makefile" ;;
+    "test/Makefile.tests") CONFIG_COMMANDS="$CONFIG_COMMANDS test/Makefile.tests" ;;
+    "unittests/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS unittests/Makefile" ;;
+    "tools/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS tools/Makefile" ;;
+    "utils/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS utils/Makefile" ;;
+    "projects/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS projects/Makefile" ;;
+    "bindings/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS bindings/Makefile" ;;
+    "bindings/ocaml/Makefile.ocaml") CONFIG_COMMANDS="$CONFIG_COMMANDS bindings/ocaml/Makefile.ocaml" ;;
+
+  *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
+echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
+   { (exit 1); exit 1; }; };;
+  esac
+done
+
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used.  Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+  test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+  test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers
+  test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
+fi
+
+# Have a temporary directory for convenience.  Make it in the build tree
+# simply because there is no reason against having it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Hook for its removal unless debugging.
+# Note that there is a small window in which the directory will not be cleaned:
+# after its creation but before its name has been assigned to `$tmp'.
+$debug ||
+{
+  tmp=
+  trap 'exit_status=$?
+  { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status
+' 0
+  trap '{ (exit 1); exit 1; }' 1 2 13 15
+}
+# Create a (secure) tmp directory for tmp files.
+
+{
+  tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
+  test -n "$tmp" && test -d "$tmp"
+}  ||
+{
+  tmp=./conf$$-$RANDOM
+  (umask 077 && mkdir "$tmp")
+} ||
+{
+   echo "$me: cannot create a temporary directory in ." >&2
+   { (exit 1); exit 1; }
+}
+
+#
+# Set up the sed scripts for CONFIG_FILES section.
+#
+
+# No need to generate the scripts if there are no CONFIG_FILES.
+# This happens for instance when ./config.status config.h
+if test -n "$CONFIG_FILES"; then
+
+_ACEOF
+
+
+
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+  cat >conf$$subs.sed <<_ACEOF
+SHELL!$SHELL$ac_delim
+PATH_SEPARATOR!$PATH_SEPARATOR$ac_delim
+PACKAGE_NAME!$PACKAGE_NAME$ac_delim
+PACKAGE_TARNAME!$PACKAGE_TARNAME$ac_delim
+PACKAGE_VERSION!$PACKAGE_VERSION$ac_delim
+PACKAGE_STRING!$PACKAGE_STRING$ac_delim
+PACKAGE_BUGREPORT!$PACKAGE_BUGREPORT$ac_delim
+exec_prefix!$exec_prefix$ac_delim
+prefix!$prefix$ac_delim
+program_transform_name!$program_transform_name$ac_delim
+bindir!$bindir$ac_delim
+sbindir!$sbindir$ac_delim
+libexecdir!$libexecdir$ac_delim
+datarootdir!$datarootdir$ac_delim
+datadir!$datadir$ac_delim
+sysconfdir!$sysconfdir$ac_delim
+sharedstatedir!$sharedstatedir$ac_delim
+localstatedir!$localstatedir$ac_delim
+includedir!$includedir$ac_delim
+oldincludedir!$oldincludedir$ac_delim
+docdir!$docdir$ac_delim
+infodir!$infodir$ac_delim
+htmldir!$htmldir$ac_delim
+dvidir!$dvidir$ac_delim
+pdfdir!$pdfdir$ac_delim
+psdir!$psdir$ac_delim
+libdir!$libdir$ac_delim
+localedir!$localedir$ac_delim
+mandir!$mandir$ac_delim
+DEFS!$DEFS$ac_delim
+ECHO_C!$ECHO_C$ac_delim
+ECHO_N!$ECHO_N$ac_delim
+ECHO_T!$ECHO_T$ac_delim
+LIBS!$LIBS$ac_delim
+build_alias!$build_alias$ac_delim
+host_alias!$host_alias$ac_delim
+target_alias!$target_alias$ac_delim
+LLVM_COPYRIGHT!$LLVM_COPYRIGHT$ac_delim
+subdirs!$subdirs$ac_delim
+ENABLE_POLLY!$ENABLE_POLLY$ac_delim
+LLVM_HAS_POLLY!$LLVM_HAS_POLLY$ac_delim
+build!$build$ac_delim
+build_cpu!$build_cpu$ac_delim
+build_vendor!$build_vendor$ac_delim
+build_os!$build_os$ac_delim
+host!$host$ac_delim
+host_cpu!$host_cpu$ac_delim
+host_vendor!$host_vendor$ac_delim
+host_os!$host_os$ac_delim
+target!$target$ac_delim
+target_cpu!$target_cpu$ac_delim
+target_vendor!$target_vendor$ac_delim
+target_os!$target_os$ac_delim
+OS!$OS$ac_delim
+HOST_OS!$HOST_OS$ac_delim
+TARGET_OS!$TARGET_OS$ac_delim
+LINKALL!$LINKALL$ac_delim
+NOLINKALL!$NOLINKALL$ac_delim
+LLVM_ON_UNIX!$LLVM_ON_UNIX$ac_delim
+LLVM_ON_WIN32!$LLVM_ON_WIN32$ac_delim
+ARCH!$ARCH$ac_delim
+ENDIAN!$ENDIAN$ac_delim
+CC!$CC$ac_delim
+CFLAGS!$CFLAGS$ac_delim
+LDFLAGS!$LDFLAGS$ac_delim
+CPPFLAGS!$CPPFLAGS$ac_delim
+ac_ct_CC!$ac_ct_CC$ac_delim
+EXEEXT!$EXEEXT$ac_delim
+OBJEXT!$OBJEXT$ac_delim
+CPP!$CPP$ac_delim
+GREP!$GREP$ac_delim
+EGREP!$EGREP$ac_delim
+LLVM_CROSS_COMPILING!$LLVM_CROSS_COMPILING$ac_delim
+BUILD_CC!$BUILD_CC$ac_delim
+BUILD_EXEEXT!$BUILD_EXEEXT$ac_delim
+BUILD_CXX!$BUILD_CXX$ac_delim
+CVSBUILD!$CVSBUILD$ac_delim
+ENABLE_OPTIMIZED!$ENABLE_OPTIMIZED$ac_delim
+ENABLE_PROFILING!$ENABLE_PROFILING$ac_delim
+DISABLE_ASSERTIONS!$DISABLE_ASSERTIONS$ac_delim
+ENABLE_EXPENSIVE_CHECKS!$ENABLE_EXPENSIVE_CHECKS$ac_delim
+EXPENSIVE_CHECKS!$EXPENSIVE_CHECKS$ac_delim
+DEBUG_RUNTIME!$DEBUG_RUNTIME$ac_delim
+DEBUG_SYMBOLS!$DEBUG_SYMBOLS$ac_delim
+JIT!$JIT$ac_delim
+TARGET_HAS_JIT!$TARGET_HAS_JIT$ac_delim
+ENABLE_DOCS!$ENABLE_DOCS$ac_delim
+ENABLE_DOXYGEN!$ENABLE_DOXYGEN$ac_delim
+ENABLE_THREADS!$ENABLE_THREADS$ac_delim
+ENABLE_PTHREADS!$ENABLE_PTHREADS$ac_delim
+ENABLE_PIC!$ENABLE_PIC$ac_delim
+ENABLE_SHARED!$ENABLE_SHARED$ac_delim
+ENABLE_EMBED_STDCXX!$ENABLE_EMBED_STDCXX$ac_delim
+ENABLE_TIMESTAMPS!$ENABLE_TIMESTAMPS$ac_delim
+TARGETS_TO_BUILD!$TARGETS_TO_BUILD$ac_delim
+LLVM_ENUM_TARGETS!$LLVM_ENUM_TARGETS$ac_delim
+LLVM_ENUM_ASM_PRINTERS!$LLVM_ENUM_ASM_PRINTERS$ac_delim
+_ACEOF
+
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
+    break
+  elif $ac_last_try; then
+    { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+   { (exit 1); exit 1; }; }
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+
+ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed`
+if test -n "$ac_eof"; then
+  ac_eof=`echo "$ac_eof" | sort -nru | sed 1q`
+  ac_eof=`expr $ac_eof + 1`
+fi
+
+cat >>$CONFIG_STATUS <<_ACEOF
+cat >"\$tmp/subs-1.sed" <<\CEOF$ac_eof
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+_ACEOF
+sed '
+s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g
+s/^/s,@/; s/!/@,|#_!!_#|/
+:n
+t n
+s/'"$ac_delim"'$/,g/; t
+s/$/\\/; p
+N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n
+' >>$CONFIG_STATUS <conf$$subs.sed
+rm -f conf$$subs.sed
+cat >>$CONFIG_STATUS <<_ACEOF
+CEOF$ac_eof
+_ACEOF
+
+
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+  cat >conf$$subs.sed <<_ACEOF
+LLVM_ENUM_ASM_PARSERS!$LLVM_ENUM_ASM_PARSERS$ac_delim
+LLVM_ENUM_DISASSEMBLERS!$LLVM_ENUM_DISASSEMBLERS$ac_delim
+ENABLE_CBE_PRINTF_A!$ENABLE_CBE_PRINTF_A$ac_delim
+CLANGPATH!$CLANGPATH$ac_delim
+CLANGXXPATH!$CLANGXXPATH$ac_delim
+ENABLE_BUILT_CLANG!$ENABLE_BUILT_CLANG$ac_delim
+OPTIMIZE_OPTION!$OPTIMIZE_OPTION$ac_delim
+EXTRA_OPTIONS!$EXTRA_OPTIONS$ac_delim
+BINUTILS_INCDIR!$BINUTILS_INCDIR$ac_delim
+CXX!$CXX$ac_delim
+CXXFLAGS!$CXXFLAGS$ac_delim
+ac_ct_CXX!$ac_ct_CXX$ac_delim
+NM!$NM$ac_delim
+ifGNUmake!$ifGNUmake$ac_delim
+LN_S!$LN_S$ac_delim
+CMP!$CMP$ac_delim
+CP!$CP$ac_delim
+DATE!$DATE$ac_delim
+FIND!$FIND$ac_delim
+MKDIR!$MKDIR$ac_delim
+MV!$MV$ac_delim
+RANLIB!$RANLIB$ac_delim
+AR!$AR$ac_delim
+RM!$RM$ac_delim
+SED!$SED$ac_delim
+TAR!$TAR$ac_delim
+BINPWD!$BINPWD$ac_delim
+GRAPHVIZ!$GRAPHVIZ$ac_delim
+DOT!$DOT$ac_delim
+FDP!$FDP$ac_delim
+NEATO!$NEATO$ac_delim
+TWOPI!$TWOPI$ac_delim
+CIRCO!$CIRCO$ac_delim
+GV!$GV$ac_delim
+DOTTY!$DOTTY$ac_delim
+XDOT_PY!$XDOT_PY$ac_delim
+PERL!$PERL$ac_delim
+HAVE_PERL!$HAVE_PERL$ac_delim
+INSTALL_PROGRAM!$INSTALL_PROGRAM$ac_delim
+INSTALL_SCRIPT!$INSTALL_SCRIPT$ac_delim
+INSTALL_DATA!$INSTALL_DATA$ac_delim
+BZIP2!$BZIP2$ac_delim
+CAT!$CAT$ac_delim
+DOXYGEN!$DOXYGEN$ac_delim
+GROFF!$GROFF$ac_delim
+GZIPBIN!$GZIPBIN$ac_delim
+POD2HTML!$POD2HTML$ac_delim
+POD2MAN!$POD2MAN$ac_delim
+PDFROFF!$PDFROFF$ac_delim
+RUNTEST!$RUNTEST$ac_delim
+TCLSH!$TCLSH$ac_delim
+ZIP!$ZIP$ac_delim
+OCAMLC!$OCAMLC$ac_delim
+OCAMLOPT!$OCAMLOPT$ac_delim
+OCAMLDEP!$OCAMLDEP$ac_delim
+OCAMLDOC!$OCAMLDOC$ac_delim
+GAS!$GAS$ac_delim
+HAVE_LINK_VERSION_SCRIPT!$HAVE_LINK_VERSION_SCRIPT$ac_delim
+INSTALL_LTDL_TRUE!$INSTALL_LTDL_TRUE$ac_delim
+INSTALL_LTDL_FALSE!$INSTALL_LTDL_FALSE$ac_delim
+CONVENIENCE_LTDL_TRUE!$CONVENIENCE_LTDL_TRUE$ac_delim
+CONVENIENCE_LTDL_FALSE!$CONVENIENCE_LTDL_FALSE$ac_delim
+LIBADD_DL!$LIBADD_DL$ac_delim
+LLVMGCCCOMMAND!$LLVMGCCCOMMAND$ac_delim
+LLVMGXXCOMMAND!$LLVMGXXCOMMAND$ac_delim
+LLVMGCC!$LLVMGCC$ac_delim
+LLVMGXX!$LLVMGXX$ac_delim
+LLVMCC_OPTION!$LLVMCC_OPTION$ac_delim
+NO_VARIADIC_MACROS!$NO_VARIADIC_MACROS$ac_delim
+NO_MISSING_FIELD_INITIALIZERS!$NO_MISSING_FIELD_INITIALIZERS$ac_delim
+USE_UDIS86!$USE_UDIS86$ac_delim
+USE_OPROFILE!$USE_OPROFILE$ac_delim
+HAVE_PTHREAD!$HAVE_PTHREAD$ac_delim
+HUGE_VAL_SANITY!$HUGE_VAL_SANITY$ac_delim
+MMAP_FILE!$MMAP_FILE$ac_delim
+LLVMCC_EMITIR_FLAG!$LLVMCC_EMITIR_FLAG$ac_delim
+LLVMCC1!$LLVMCC1$ac_delim
+LLVMCC1PLUS!$LLVMCC1PLUS$ac_delim
+LLVMGCCDIR!$LLVMGCCDIR$ac_delim
+LLVMGCC_LANGS!$LLVMGCC_LANGS$ac_delim
+LLVMGCC_DRAGONEGG!$LLVMGCC_DRAGONEGG$ac_delim
+LLVMCC_DISABLEOPT_FLAGS!$LLVMCC_DISABLEOPT_FLAGS$ac_delim
+SHLIBEXT!$SHLIBEXT$ac_delim
+SHLIBPATH_VAR!$SHLIBPATH_VAR$ac_delim
+LLVM_PREFIX!$LLVM_PREFIX$ac_delim
+LLVM_BINDIR!$LLVM_BINDIR$ac_delim
+LLVM_LIBDIR!$LLVM_LIBDIR$ac_delim
+LLVM_DATADIR!$LLVM_DATADIR$ac_delim
+LLVM_DOCSDIR!$LLVM_DOCSDIR$ac_delim
+LLVM_ETCDIR!$LLVM_ETCDIR$ac_delim
+LLVM_INCLUDEDIR!$LLVM_INCLUDEDIR$ac_delim
+LLVM_INFODIR!$LLVM_INFODIR$ac_delim
+LLVM_MANDIR!$LLVM_MANDIR$ac_delim
+LLVM_CONFIGTIME!$LLVM_CONFIGTIME$ac_delim
+BINDINGS_TO_BUILD!$BINDINGS_TO_BUILD$ac_delim
+ALL_BINDINGS!$ALL_BINDINGS$ac_delim
+OCAML_LIBDIR!$OCAML_LIBDIR$ac_delim
+_ACEOF
+
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
+    break
+  elif $ac_last_try; then
+    { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+   { (exit 1); exit 1; }; }
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+
+ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed`
+if test -n "$ac_eof"; then
+  ac_eof=`echo "$ac_eof" | sort -nru | sed 1q`
+  ac_eof=`expr $ac_eof + 1`
+fi
+
+cat >>$CONFIG_STATUS <<_ACEOF
+cat >"\$tmp/subs-2.sed" <<\CEOF$ac_eof
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+_ACEOF
+sed '
+s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g
+s/^/s,@/; s/!/@,|#_!!_#|/
+:n
+t n
+s/'"$ac_delim"'$/,g/; t
+s/$/\\/; p
+N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n
+' >>$CONFIG_STATUS <conf$$subs.sed
+rm -f conf$$subs.sed
+cat >>$CONFIG_STATUS <<_ACEOF
+CEOF$ac_eof
+_ACEOF
+
+
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+  cat >conf$$subs.sed <<_ACEOF
+ENABLE_VISIBILITY_INLINES_HIDDEN!$ENABLE_VISIBILITY_INLINES_HIDDEN$ac_delim
+RPATH!$RPATH$ac_delim
+RDYNAMIC!$RDYNAMIC$ac_delim
+LIBOBJS!$LIBOBJS$ac_delim
+LTLIBOBJS!$LTLIBOBJS$ac_delim
+_ACEOF
+
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 5; then
+    break
+  elif $ac_last_try; then
+    { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+   { (exit 1); exit 1; }; }
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+
+ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed`
+if test -n "$ac_eof"; then
+  ac_eof=`echo "$ac_eof" | sort -nru | sed 1q`
+  ac_eof=`expr $ac_eof + 1`
+fi
+
+cat >>$CONFIG_STATUS <<_ACEOF
+cat >"\$tmp/subs-3.sed" <<\CEOF$ac_eof
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b end
+_ACEOF
+sed '
+s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g
+s/^/s,@/; s/!/@,|#_!!_#|/
+:n
+t n
+s/'"$ac_delim"'$/,g/; t
+s/$/\\/; p
+N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n
+' >>$CONFIG_STATUS <conf$$subs.sed
+rm -f conf$$subs.sed
+cat >>$CONFIG_STATUS <<_ACEOF
+:end
+s/|#_!!_#|//g
+CEOF$ac_eof
+_ACEOF
+
+
+# VPATH may cause trouble with some makes, so we remove $(srcdir),
+# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+  ac_vpsub='/^[	 ]*VPATH[	 ]*=/{
+s/:*\$(srcdir):*/:/
+s/:*\${srcdir}:*/:/
+s/:*@srcdir@:*/:/
+s/^\([^=]*=[	 ]*\):*/\1/
+s/:*$//
+s/^[^=]*=[	 ]*$//
+}'
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+fi # test -n "$CONFIG_FILES"
+
+
+for ac_tag in  :F $CONFIG_FILES  :H $CONFIG_HEADERS    :C $CONFIG_COMMANDS
+do
+  case $ac_tag in
+  :[FHLC]) ac_mode=$ac_tag; continue;;
+  esac
+  case $ac_mode$ac_tag in
+  :[FHL]*:*);;
+  :L* | :C*:*) { { echo "$as_me:$LINENO: error: Invalid tag $ac_tag." >&5
+echo "$as_me: error: Invalid tag $ac_tag." >&2;}
+   { (exit 1); exit 1; }; };;
+  :[FH]-) ac_tag=-:-;;
+  :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
+  esac
+  ac_save_IFS=$IFS
+  IFS=:
+  set x $ac_tag
+  IFS=$ac_save_IFS
+  shift
+  ac_file=$1
+  shift
+
+  case $ac_mode in
+  :L) ac_source=$1;;
+  :[FH])
+    ac_file_inputs=
+    for ac_f
+    do
+      case $ac_f in
+      -) ac_f="$tmp/stdin";;
+      *) # Look for the file first in the build tree, then in the source tree
+	 # (if the path is not absolute).  The absolute path cannot be DOS-style,
+	 # because $ac_f cannot contain `:'.
+	 test -f "$ac_f" ||
+	   case $ac_f in
+	   [\\/$]*) false;;
+	   *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
+	   esac ||
+	   { { echo "$as_me:$LINENO: error: cannot find input file: $ac_f" >&5
+echo "$as_me: error: cannot find input file: $ac_f" >&2;}
+   { (exit 1); exit 1; }; };;
+      esac
+      ac_file_inputs="$ac_file_inputs $ac_f"
+    done
+
+    # Let's still pretend it is `configure' which instantiates (i.e., don't
+    # use $as_me), people would be surprised to read:
+    #    /* config.h.  Generated by config.status.  */
+    configure_input="Generated from "`IFS=:
+	  echo $* | sed 's|^[^:]*/||;s|:[^:]*/|, |g'`" by configure."
+    if test x"$ac_file" != x-; then
+      configure_input="$ac_file.  $configure_input"
+      { echo "$as_me:$LINENO: creating $ac_file" >&5
+echo "$as_me: creating $ac_file" >&6;}
+    fi
+
+    case $ac_tag in
+    *:-:* | *:-) cat >"$tmp/stdin";;
+    esac
+    ;;
+  esac
+
+  ac_dir=`$as_dirname -- "$ac_file" ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$ac_file" : 'X\(//\)[^/]' \| \
+	 X"$ac_file" : 'X\(//\)$' \| \
+	 X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
+echo X"$ac_file" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  { as_dir="$ac_dir"
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || { { echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5
+echo "$as_me: error: cannot create directory $as_dir" >&2;}
+   { (exit 1); exit 1; }; }; }
+  ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+  case $ac_mode in
+  :F)
+  #
+  # CONFIG_FILE
+  #
+
+  case $INSTALL in
+  [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
+  *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;;
+  esac
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+# If the template does not know about datarootdir, expand it.
+# FIXME: This hack should be removed a few years after 2.60.
+ac_datarootdir_hack=; ac_datarootdir_seen=
+
+case `sed -n '/datarootdir/ {
+  p
+  q
+}
+/@datadir@/p
+/@docdir@/p
+/@infodir@/p
+/@localedir@/p
+/@mandir@/p
+' $ac_file_inputs` in
+*datarootdir*) ac_datarootdir_seen=yes;;
+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
+  { echo "$as_me:$LINENO: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+  ac_datarootdir_hack='
+  s&@datadir@&$datadir&g
+  s&@docdir@&$docdir&g
+  s&@infodir@&$infodir&g
+  s&@localedir@&$localedir&g
+  s&@mandir@&$mandir&g
+    s&\\\${datarootdir}&$datarootdir&g' ;;
+esac
+_ACEOF
+
+# Neutralize VPATH when `$srcdir' = `.'.
+# Shell code in configure.ac might set extrasub.
+# FIXME: do we really want to maintain this feature?
+cat >>$CONFIG_STATUS <<_ACEOF
+  sed "$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s&@configure_input@&$configure_input&;t t
+s&@top_builddir@&$ac_top_builddir_sub&;t t
+s&@srcdir@&$ac_srcdir&;t t
+s&@abs_srcdir@&$ac_abs_srcdir&;t t
+s&@top_srcdir@&$ac_top_srcdir&;t t
+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
+s&@builddir@&$ac_builddir&;t t
+s&@abs_builddir@&$ac_abs_builddir&;t t
+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+s&@INSTALL@&$ac_INSTALL&;t t
+$ac_datarootdir_hack
+" $ac_file_inputs | sed -f "$tmp/subs-1.sed" | sed -f "$tmp/subs-2.sed" | sed -f "$tmp/subs-3.sed" >$tmp/out
+
+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
+  { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } &&
+  { ac_out=`sed -n '/^[	 ]*datarootdir[	 ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } &&
+  { echo "$as_me:$LINENO: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined." >&5
+echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined." >&2;}
+
+  rm -f "$tmp/stdin"
+  case $ac_file in
+  -) cat "$tmp/out"; rm -f "$tmp/out";;
+  *) rm -f "$ac_file"; mv "$tmp/out" $ac_file;;
+  esac
+ ;;
+  :H)
+  #
+  # CONFIG_HEADER
+  #
+_ACEOF
+
+# Transform confdefs.h into a sed script `conftest.defines', that
+# substitutes the proper values into config.h.in to produce config.h.
+rm -f conftest.defines conftest.tail
+# First, append a space to every undef/define line, to ease matching.
+echo 's/$/ /' >conftest.defines
+# Then, protect against being on the right side of a sed subst, or in
+# an unquoted here document, in config.status.  If some macros were
+# called several times there might be several #defines for the same
+# symbol, which is useless.  But do not sort them, since the last
+# AC_DEFINE must be honored.
+ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
+# These sed commands are passed to sed as "A NAME B PARAMS C VALUE D", where
+# NAME is the cpp macro being defined, VALUE is the value it is being given.
+# PARAMS is the parameter list in the macro definition--in most cases, it's
+# just an empty string.
+ac_dA='s,^\\([	 #]*\\)[^	 ]*\\([	 ]*'
+ac_dB='\\)[	 (].*,\\1define\\2'
+ac_dC=' '
+ac_dD=' ,'
+
+uniq confdefs.h |
+  sed -n '
+	t rset
+	:rset
+	s/^[	 ]*#[	 ]*define[	 ][	 ]*//
+	t ok
+	d
+	:ok
+	s/[\\&,]/\\&/g
+	s/^\('"$ac_word_re"'\)\(([^()]*)\)[	 ]*\(.*\)/ '"$ac_dA"'\1'"$ac_dB"'\2'"${ac_dC}"'\3'"$ac_dD"'/p
+	s/^\('"$ac_word_re"'\)[	 ]*\(.*\)/'"$ac_dA"'\1'"$ac_dB$ac_dC"'\2'"$ac_dD"'/p
+  ' >>conftest.defines
+
+# Remove the space that was appended to ease matching.
+# Then replace #undef with comments.  This is necessary, for
+# example, in the case of _POSIX_SOURCE, which is predefined and required
+# on some systems where configure will not decide to define it.
+# (The regexp can be short, since the line contains either #define or #undef.)
+echo 's/ $//
+s,^[	 #]*u.*,/* & */,' >>conftest.defines
+
+# Break up conftest.defines:
+ac_max_sed_lines=50
+
+# First sed command is:	 sed -f defines.sed $ac_file_inputs >"$tmp/out1"
+# Second one is:	 sed -f defines.sed "$tmp/out1" >"$tmp/out2"
+# Third one will be:	 sed -f defines.sed "$tmp/out2" >"$tmp/out1"
+# et cetera.
+ac_in='$ac_file_inputs'
+ac_out='"$tmp/out1"'
+ac_nxt='"$tmp/out2"'
+
+while :
+do
+  # Write a here document:
+    cat >>$CONFIG_STATUS <<_ACEOF
+    # First, check the format of the line:
+    cat >"\$tmp/defines.sed" <<\\CEOF
+/^[	 ]*#[	 ]*undef[	 ][	 ]*$ac_word_re[	 ]*\$/b def
+/^[	 ]*#[	 ]*define[	 ][	 ]*$ac_word_re[(	 ]/b def
+b
+:def
+_ACEOF
+  sed ${ac_max_sed_lines}q conftest.defines >>$CONFIG_STATUS
+  echo 'CEOF
+    sed -f "$tmp/defines.sed"' "$ac_in >$ac_out" >>$CONFIG_STATUS
+  ac_in=$ac_out; ac_out=$ac_nxt; ac_nxt=$ac_in
+  sed 1,${ac_max_sed_lines}d conftest.defines >conftest.tail
+  grep . conftest.tail >/dev/null || break
+  rm -f conftest.defines
+  mv conftest.tail conftest.defines
+done
+rm -f conftest.defines conftest.tail
+
+echo "ac_result=$ac_in" >>$CONFIG_STATUS
+cat >>$CONFIG_STATUS <<\_ACEOF
+  if test x"$ac_file" != x-; then
+    echo "/* $configure_input  */" >"$tmp/config.h"
+    cat "$ac_result" >>"$tmp/config.h"
+    if diff $ac_file "$tmp/config.h" >/dev/null 2>&1; then
+      { echo "$as_me:$LINENO: $ac_file is unchanged" >&5
+echo "$as_me: $ac_file is unchanged" >&6;}
+    else
+      rm -f $ac_file
+      mv "$tmp/config.h" $ac_file
+    fi
+  else
+    echo "/* $configure_input  */"
+    cat "$ac_result"
+  fi
+  rm -f "$tmp/out12"
+ ;;
+
+  :C)  { echo "$as_me:$LINENO: executing $ac_file commands" >&5
+echo "$as_me: executing $ac_file commands" >&6;}
+ ;;
+  esac
+
+
+  case $ac_file$ac_mode in
+    "Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname Makefile`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/Makefile Makefile ;;
+    "Makefile.common":C) ${llvm_src}/autoconf/mkinstalldirs `dirname Makefile.common`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/Makefile.common Makefile.common ;;
+    "examples/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname examples/Makefile`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/examples/Makefile examples/Makefile ;;
+    "lib/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname lib/Makefile`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/lib/Makefile lib/Makefile ;;
+    "runtime/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname runtime/Makefile`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/runtime/Makefile runtime/Makefile ;;
+    "test/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname test/Makefile`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/test/Makefile test/Makefile ;;
+    "test/Makefile.tests":C) ${llvm_src}/autoconf/mkinstalldirs `dirname test/Makefile.tests`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/test/Makefile.tests test/Makefile.tests ;;
+    "unittests/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname unittests/Makefile`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/unittests/Makefile unittests/Makefile ;;
+    "tools/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname tools/Makefile`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/tools/Makefile tools/Makefile ;;
+    "utils/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname utils/Makefile`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/utils/Makefile utils/Makefile ;;
+    "projects/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname projects/Makefile`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/projects/Makefile projects/Makefile ;;
+    "bindings/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname bindings/Makefile`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/bindings/Makefile bindings/Makefile ;;
+    "bindings/ocaml/Makefile.ocaml":C) ${llvm_src}/autoconf/mkinstalldirs `dirname bindings/ocaml/Makefile.ocaml`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/bindings/ocaml/Makefile.ocaml bindings/ocaml/Makefile.ocaml ;;
+
+  esac
+done # for ac_tag
+
+
+{ (exit 0); exit 0; }
+_ACEOF
+chmod +x $CONFIG_STATUS
+ac_clean_files=$ac_clean_files_save
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded.  So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status.  When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+  ac_cs_success=:
+  ac_config_status_args=
+  test "$silent" = yes &&
+    ac_config_status_args="$ac_config_status_args --quiet"
+  exec 5>/dev/null
+  $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+  exec 5>>config.log
+  # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+  # would make configure fail if this is the last instruction.
+  $ac_cs_success || { (exit 1); exit 1; }
+fi
+
+#
+# CONFIG_SUBDIRS section.
+#
+if test "$no_recursion" != yes; then
+
+  # Remove --cache-file and --srcdir arguments so they do not pile up.
+  ac_sub_configure_args=
+  ac_prev=
+  eval "set x $ac_configure_args"
+  shift
+  for ac_arg
+  do
+    if test -n "$ac_prev"; then
+      ac_prev=
+      continue
+    fi
+    case $ac_arg in
+    -cache-file | --cache-file | --cache-fil | --cache-fi \
+    | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+      ac_prev=cache_file ;;
+    -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+    | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* \
+    | --c=*)
+      ;;
+    --config-cache | -C)
+      ;;
+    -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+      ac_prev=srcdir ;;
+    -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+      ;;
+    -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+      ac_prev=prefix ;;
+    -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+      ;;
+    *)
+      case $ac_arg in
+      *\'*) ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+      esac
+      ac_sub_configure_args="$ac_sub_configure_args '$ac_arg'" ;;
+    esac
+  done
+
+  # Always prepend --prefix to ensure using the same prefix
+  # in subdir configurations.
+  ac_arg="--prefix=$prefix"
+  case $ac_arg in
+  *\'*) ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+  esac
+  ac_sub_configure_args="$ac_arg $ac_sub_configure_args"
+
+  ac_popdir=`pwd`
+  for ac_dir in : $subdirs; do test "x$ac_dir" = x: && continue
+
+    # Do not complain, so a configure script can configure whichever
+    # parts of a large source tree are present.
+    test -d "$srcdir/$ac_dir" || continue
+
+    ac_msg="=== configuring in $ac_dir (`pwd`/$ac_dir)"
+    echo "$as_me:$LINENO: $ac_msg" >&5
+    echo "$ac_msg" >&6
+    { as_dir="$ac_dir"
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || { { echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5
+echo "$as_me: error: cannot create directory $as_dir" >&2;}
+   { (exit 1); exit 1; }; }; }
+    ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+    cd "$ac_dir"
+
+    # Check for guested configure; otherwise get Cygnus style configure.
+    if test -f "$ac_srcdir/configure.gnu"; then
+      ac_sub_configure=$ac_srcdir/configure.gnu
+    elif test -f "$ac_srcdir/configure"; then
+      ac_sub_configure=$ac_srcdir/configure
+    elif test -f "$ac_srcdir/configure.in"; then
+      # This should be Cygnus configure.
+      ac_sub_configure=$ac_aux_dir/configure
+    else
+      { echo "$as_me:$LINENO: WARNING: no configuration information is in $ac_dir" >&5
+echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2;}
+      ac_sub_configure=
+    fi
+
+    # The recursion is here.
+    if test -n "$ac_sub_configure"; then
+      # Make the cache file name correct relative to the subdirectory.
+      case $cache_file in
+      [\\/]* | ?:[\\/]* ) ac_sub_cache_file=$cache_file ;;
+      *) # Relative name.
+	ac_sub_cache_file=$ac_top_build_prefix$cache_file ;;
+      esac
+
+      { echo "$as_me:$LINENO: running $SHELL $ac_sub_configure $ac_sub_configure_args --cache-file=$ac_sub_cache_file --srcdir=$ac_srcdir" >&5
+echo "$as_me: running $SHELL $ac_sub_configure $ac_sub_configure_args --cache-file=$ac_sub_cache_file --srcdir=$ac_srcdir" >&6;}
+      # The eval makes quoting arguments work.
+      eval "\$SHELL \"\$ac_sub_configure\" $ac_sub_configure_args \
+	   --cache-file=\"\$ac_sub_cache_file\" --srcdir=\"\$ac_srcdir\"" ||
+	{ { echo "$as_me:$LINENO: error: $ac_sub_configure failed for $ac_dir" >&5
+echo "$as_me: error: $ac_sub_configure failed for $ac_dir" >&2;}
+   { (exit 1); exit 1; }; }
+    fi
+
+    cd "$ac_popdir"
+  done
+fi
+
diff --git a/final/docs/AliasAnalysis.html b/final/docs/AliasAnalysis.html
new file mode 100644
index 00000000000..7baa9467697
--- /dev/null
+++ b/final/docs/AliasAnalysis.html
@@ -0,0 +1,1064 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>LLVM Alias Analysis Infrastructure</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+  LLVM Alias Analysis Infrastructure
+</div>
+
+<ol>
+  <li><a href="#introduction">Introduction</a></li>
+
+  <li><a href="#overview"><tt>AliasAnalysis</tt> Class Overview</a>
+    <ul>
+    <li><a href="#pointers">Representation of Pointers</a></li>
+    <li><a href="#alias">The <tt>alias</tt> method</a></li>
+    <li><a href="#ModRefInfo">The <tt>getModRefInfo</tt> methods</a></li>
+    <li><a href="#OtherItfs">Other useful <tt>AliasAnalysis</tt> methods</a></li>
+    </ul>
+  </li>
+
+  <li><a href="#writingnew">Writing a new <tt>AliasAnalysis</tt> Implementation</a>
+    <ul>
+    <li><a href="#passsubclasses">Different Pass styles</a></li>
+    <li><a href="#requiredcalls">Required initialization calls</a></li>
+    <li><a href="#interfaces">Interfaces which may be specified</a></li>
+    <li><a href="#chaining"><tt>AliasAnalysis</tt> chaining behavior</a></li>
+    <li><a href="#updating">Updating analysis results for transformations</a></li>
+    <li><a href="#implefficiency">Efficiency Issues</a></li>
+    <li><a href="#limitations">Limitations</a></li>
+    </ul>
+  </li>
+
+  <li><a href="#using">Using alias analysis results</a>
+    <ul>
+    <li><a href="#memdep">Using the <tt>MemoryDependenceAnalysis</tt> Pass</a></li>
+    <li><a href="#ast">Using the <tt>AliasSetTracker</tt> class</a></li>
+    <li><a href="#direct">Using the <tt>AliasAnalysis</tt> interface directly</a></li>
+    </ul>
+  </li>
+
+  <li><a href="#exist">Existing alias analysis implementations and clients</a>
+    <ul>
+    <li><a href="#impls">Available <tt>AliasAnalysis</tt> implementations</a></li>
+    <li><a href="#aliasanalysis-xforms">Alias analysis driven transformations</a></li>
+    <li><a href="#aliasanalysis-debug">Clients for debugging and evaluation of
+    implementations</a></li>
+    </ul>
+  </li>
+  <li><a href="#memdep">Memory Dependence Analysis</a></li>
+</ol>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="introduction">Introduction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Alias Analysis (aka Pointer Analysis) is a class of techniques which attempt
+to determine whether or not two pointers ever can point to the same object in
+memory.  There are many different algorithms for alias analysis and many
+different ways of classifying them: flow-sensitive vs flow-insensitive,
+context-sensitive vs context-insensitive, field-sensitive vs field-insensitive,
+unification-based vs subset-based, etc.  Traditionally, alias analyses respond
+to a query with a <a href="#MustMayNo">Must, May, or No</a> alias response,
+indicating that two pointers always point to the same object, might point to the
+same object, or are known to never point to the same object.</p>
+
+<p>The LLVM <a
+href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html"><tt>AliasAnalysis</tt></a>
+class is the primary interface used by clients and implementations of alias
+analyses in the LLVM system.  This class is the common interface between clients
+of alias analysis information and the implementations providing it, and is
+designed to support a wide range of implementations and clients (but currently
+all clients are assumed to be flow-insensitive).  In addition to simple alias
+analysis information, this class exposes Mod/Ref information from those
+implementations which can provide it, allowing for powerful analyses and
+transformations to work well together.</p>
+
+<p>This document contains information necessary to successfully implement this
+interface, use it, and to test both sides.  It also explains some of the finer
+points about what exactly results mean.  If you feel that something is unclear
+or should be added, please <a href="mailto:sabre@nondot.org">let me
+know</a>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="overview"><tt>AliasAnalysis</tt> Class Overview</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The <a
+href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html"><tt>AliasAnalysis</tt></a>
+class defines the interface that the various alias analysis implementations
+should support.  This class exports two important enums: <tt>AliasResult</tt>
+and <tt>ModRefResult</tt> which represent the result of an alias query or a
+mod/ref query, respectively.</p>
+
+<p>The <tt>AliasAnalysis</tt> interface exposes information about memory,
+represented in several different ways.  In particular, memory objects are
+represented as a starting address and size, and function calls are represented
+as the actual <tt>call</tt> or <tt>invoke</tt> instructions that performs the
+call.  The <tt>AliasAnalysis</tt> interface also exposes some helper methods
+which allow you to get mod/ref information for arbitrary instructions.</p>
+
+<p>All <tt>AliasAnalysis</tt> interfaces require that in queries involving
+multiple values, values which are not
+<a href="LangRef.html#constants">constants</a> are all defined within the
+same function.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="pointers">Representation of Pointers</a>
+</div>
+
+<div class="doc_text">
+
+<p>Most importantly, the <tt>AliasAnalysis</tt> class provides several methods
+which are used to query whether or not two memory objects alias, whether
+function calls can modify or read a memory object, etc.  For all of these
+queries, memory objects are represented as a pair of their starting address (a
+symbolic LLVM <tt>Value*</tt>) and a static size.</p>
+
+<p>Representing memory objects as a starting address and a size is critically
+important for correct Alias Analyses.  For example, consider this (silly, but
+possible) C code:</p>
+
+<div class="doc_code">
+<pre>
+int i;
+char C[2];
+char A[10]; 
+/* ... */
+for (i = 0; i != 10; ++i) {
+  C[0] = A[i];          /* One byte store */
+  C[1] = A[9-i];        /* One byte store */
+}
+</pre>
+</div>
+
+<p>In this case, the <tt>basicaa</tt> pass will disambiguate the stores to
+<tt>C[0]</tt> and <tt>C[1]</tt> because they are accesses to two distinct
+locations one byte apart, and the accesses are each one byte.  In this case, the
+LICM pass can use store motion to remove the stores from the loop.  In
+constrast, the following code:</p>
+
+<div class="doc_code">
+<pre>
+int i;
+char C[2];
+char A[10]; 
+/* ... */
+for (i = 0; i != 10; ++i) {
+  ((short*)C)[0] = A[i];  /* Two byte store! */
+  C[1] = A[9-i];          /* One byte store */
+}
+</pre>
+</div>
+
+<p>In this case, the two stores to C do alias each other, because the access to
+the <tt>&amp;C[0]</tt> element is a two byte access.  If size information wasn't
+available in the query, even the first case would have to conservatively assume
+that the accesses alias.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="alias">The <tt>alias</tt> method</a>
+</div>
+  
+<div class="doc_text">
+<p>The <tt>alias</tt> method is the primary interface used to determine whether
+or not two memory objects alias each other.  It takes two memory objects as
+input and returns MustAlias, PartialAlias, MayAlias, or NoAlias as
+appropriate.</p>
+
+<p>Like all <tt>AliasAnalysis</tt> interfaces, the <tt>alias</tt> method requires
+that either the two pointer values be defined within the same function, or at
+least one of the values is a <a href="LangRef.html#constants">constant</a>.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="MustMayNo">Must, May, and No Alias Responses</a>
+</div>
+
+<div class="doc_text">
+<p>The NoAlias response may be used when there is never an immediate dependence
+between any memory reference <i>based</i> on one pointer and any memory
+reference <i>based</i> the other. The most obvious example is when the two
+pointers point to non-overlapping memory ranges. Another is when the two
+pointers are only ever used for reading memory. Another is when the memory is
+freed and reallocated between accesses through one pointer and accesses through
+the other -- in this case, there is a dependence, but it's mediated by the free
+and reallocation.</p>
+
+<p>As an exception to this is with the
+<a href="LangRef.html#noalias"><tt>noalias</tt></a> keyword; the "irrelevant"
+dependencies are ignored.</p>
+
+<p>The MayAlias response is used whenever the two pointers might refer to the
+same object.</p>
+
+<p>The PartialAlias response is used when the two memory objects are known
+to be overlapping in some way, but do not start at the same address.</p>
+
+<p>The MustAlias response may only be returned if the two memory objects are
+guaranteed to always start at exactly the same location. A MustAlias response
+implies that the pointers compare equal.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ModRefInfo">The <tt>getModRefInfo</tt> methods</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>getModRefInfo</tt> methods return information about whether the
+execution of an instruction can read or modify a memory location.  Mod/Ref
+information is always conservative: if an instruction <b>might</b> read or write
+a location, ModRef is returned.</p>
+
+<p>The <tt>AliasAnalysis</tt> class also provides a <tt>getModRefInfo</tt>
+method for testing dependencies between function calls.  This method takes two
+call sites (CS1 &amp; CS2), returns NoModRef if neither call writes to memory
+read or written by the other, Ref if CS1 reads memory written by CS2, Mod if CS1
+writes to memory read or written by CS2, or ModRef if CS1 might read or write
+memory written to by CS2.  Note that this relation is not commutative.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="OtherItfs">Other useful <tt>AliasAnalysis</tt> methods</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+Several other tidbits of information are often collected by various alias
+analysis implementations and can be put to good use by various clients.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  The <tt>pointsToConstantMemory</tt> method
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>pointsToConstantMemory</tt> method returns true if and only if the
+analysis can prove that the pointer only points to unchanging memory locations
+(functions, constant global variables, and the null pointer).  This information
+can be used to refine mod/ref information: it is impossible for an unchanging
+memory location to be modified.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="simplemodref">The <tt>doesNotAccessMemory</tt> and
+  <tt>onlyReadsMemory</tt> methods</a>
+</div>
+
+<div class="doc_text">
+
+<p>These methods are used to provide very simple mod/ref information for
+function calls.  The <tt>doesNotAccessMemory</tt> method returns true for a
+function if the analysis can prove that the function never reads or writes to
+memory, or if the function only reads from constant memory.  Functions with this
+property are side-effect free and only depend on their input arguments, allowing
+them to be eliminated if they form common subexpressions or be hoisted out of
+loops.  Many common functions behave this way (e.g., <tt>sin</tt> and
+<tt>cos</tt>) but many others do not (e.g., <tt>acos</tt>, which modifies the
+<tt>errno</tt> variable).</p>
+
+<p>The <tt>onlyReadsMemory</tt> method returns true for a function if analysis
+can prove that (at most) the function only reads from non-volatile memory.
+Functions with this property are side-effect free, only depending on their input
+arguments and the state of memory when they are called.  This property allows
+calls to these functions to be eliminated and moved around, as long as there is
+no store instruction that changes the contents of memory.  Note that all
+functions that satisfy the <tt>doesNotAccessMemory</tt> method also satisfies
+<tt>onlyReadsMemory</tt>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="writingnew">Writing a new <tt>AliasAnalysis</tt> Implementation</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Writing a new alias analysis implementation for LLVM is quite
+straight-forward.  There are already several implementations that you can use
+for examples, and the following information should help fill in any details.
+For a examples, take a look at the <a href="#impls">various alias analysis
+implementations</a> included with LLVM.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="passsubclasses">Different Pass styles</a>
+</div>
+
+<div class="doc_text">
+
+<p>The first step to determining what type of <a
+href="WritingAnLLVMPass.html">LLVM pass</a> you need to use for your Alias
+Analysis.  As is the case with most other analyses and transformations, the
+answer should be fairly obvious from what type of problem you are trying to
+solve:</p>
+
+<ol>
+  <li>If you require interprocedural analysis, it should be a
+      <tt>Pass</tt>.</li>
+  <li>If you are a function-local analysis, subclass <tt>FunctionPass</tt>.</li>
+  <li>If you don't need to look at the program at all, subclass 
+      <tt>ImmutablePass</tt>.</li>
+</ol>
+
+<p>In addition to the pass that you subclass, you should also inherit from the
+<tt>AliasAnalysis</tt> interface, of course, and use the
+<tt>RegisterAnalysisGroup</tt> template to register as an implementation of
+<tt>AliasAnalysis</tt>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="requiredcalls">Required initialization calls</a>
+</div>
+
+<div class="doc_text">
+
+<p>Your subclass of <tt>AliasAnalysis</tt> is required to invoke two methods on
+the <tt>AliasAnalysis</tt> base class: <tt>getAnalysisUsage</tt> and
+<tt>InitializeAliasAnalysis</tt>.  In particular, your implementation of
+<tt>getAnalysisUsage</tt> should explicitly call into the
+<tt>AliasAnalysis::getAnalysisUsage</tt> method in addition to doing any
+declaring any pass dependencies your pass has.  Thus you should have something
+like this:</p>
+
+<div class="doc_code">
+<pre>
+void getAnalysisUsage(AnalysisUsage &amp;AU) const {
+  AliasAnalysis::getAnalysisUsage(AU);
+  <i>// declare your dependencies here.</i>
+}
+</pre>
+</div>
+
+<p>Additionally, your must invoke the <tt>InitializeAliasAnalysis</tt> method
+from your analysis run method (<tt>run</tt> for a <tt>Pass</tt>,
+<tt>runOnFunction</tt> for a <tt>FunctionPass</tt>, or <tt>InitializePass</tt>
+for an <tt>ImmutablePass</tt>).  For example (as part of a <tt>Pass</tt>):</p>
+
+<div class="doc_code">
+<pre>
+bool run(Module &amp;M) {
+  InitializeAliasAnalysis(this);
+  <i>// Perform analysis here...</i>
+  return false;
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="interfaces">Interfaces which may be specified</a>
+</div>
+
+<div class="doc_text">
+
+<p>All of the <a
+href="/doxygen/classllvm_1_1AliasAnalysis.html"><tt>AliasAnalysis</tt></a>
+virtual methods default to providing <a href="#chaining">chaining</a> to another
+alias analysis implementation, which ends up returning conservatively correct
+information (returning "May" Alias and "Mod/Ref" for alias and mod/ref queries
+respectively).  Depending on the capabilities of the analysis you are
+implementing, you just override the interfaces you can improve.</p>
+
+</div>
+
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="chaining"><tt>AliasAnalysis</tt> chaining behavior</a>
+</div>
+
+<div class="doc_text">
+
+<p>With only two special exceptions (the <tt><a
+href="#basic-aa">basicaa</a></tt> and <a href="#no-aa"><tt>no-aa</tt></a>
+passes) every alias analysis pass chains to another alias analysis
+implementation (for example, the user can specify "<tt>-basicaa -ds-aa
+-licm</tt>" to get the maximum benefit from both alias
+analyses).  The alias analysis class automatically takes care of most of this
+for methods that you don't override.  For methods that you do override, in code
+paths that return a conservative MayAlias or Mod/Ref result, simply return
+whatever the superclass computes.  For example:</p>
+
+<div class="doc_code">
+<pre>
+AliasAnalysis::AliasResult alias(const Value *V1, unsigned V1Size,
+                                 const Value *V2, unsigned V2Size) {
+  if (...)
+    return NoAlias;
+  ...
+
+  <i>// Couldn't determine a must or no-alias result.</i>
+  return AliasAnalysis::alias(V1, V1Size, V2, V2Size);
+}
+</pre>
+</div>
+
+<p>In addition to analysis queries, you must make sure to unconditionally pass
+LLVM <a href="#updating">update notification</a> methods to the superclass as
+well if you override them, which allows all alias analyses in a change to be
+updated.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="updating">Updating analysis results for transformations</a>
+</div>
+
+<div class="doc_text">
+<p>
+Alias analysis information is initially computed for a static snapshot of the
+program, but clients will use this information to make transformations to the
+code.  All but the most trivial forms of alias analysis will need to have their
+analysis results updated to reflect the changes made by these transformations.
+</p>
+
+<p>
+The <tt>AliasAnalysis</tt> interface exposes four methods which are used to
+communicate program changes from the clients to the analysis implementations.
+Various alias analysis implementations should use these methods to ensure that
+their internal data structures are kept up-to-date as the program changes (for
+example, when an instruction is deleted), and clients of alias analysis must be
+sure to call these interfaces appropriately.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">The <tt>deleteValue</tt> method</div>
+
+<div class="doc_text">
+The <tt>deleteValue</tt> method is called by transformations when they remove an
+instruction or any other value from the program (including values that do not
+use pointers).  Typically alias analyses keep data structures that have entries
+for each value in the program.  When this method is called, they should remove
+any entries for the specified value, if they exist.
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">The <tt>copyValue</tt> method</div>
+
+<div class="doc_text">
+The <tt>copyValue</tt> method is used when a new value is introduced into the
+program.  There is no way to introduce a value into the program that did not
+exist before (this doesn't make sense for a safe compiler transformation), so
+this is the only way to introduce a new value.  This method indicates that the
+new value has exactly the same properties as the value being copied.
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">The <tt>replaceWithNewValue</tt> method</div>
+
+<div class="doc_text">
+This method is a simple helper method that is provided to make clients easier to
+use.  It is implemented by copying the old analysis information to the new
+value, then deleting the old value.  This method cannot be overridden by alias
+analysis implementations.
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">The <tt>addEscapingUse</tt> method</div>
+
+<div class="doc_text">
+<p>The <tt>addEscapingUse</tt> method is used when the uses of a pointer
+value have changed in ways that may invalidate precomputed analysis information. 
+Implementations may either use this callback to provide conservative responses
+for points whose uses have change since analysis time, or may recompute some
+or all of their internal state to continue providing accurate responses.</p>
+
+<p>In general, any new use of a pointer value is considered an escaping use,
+and must be reported through this callback, <em>except</em> for the
+uses below:</p>
+
+<ul>
+  <li>A <tt>bitcast</tt> or <tt>getelementptr</tt> of the pointer</li>
+  <li>A <tt>store</tt> through the pointer (but not a <tt>store</tt>
+      <em>of</em> the pointer)</li>
+  <li>A <tt>load</tt> through the pointer</li>
+</ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="implefficiency">Efficiency Issues</a>
+</div>
+
+<div class="doc_text">
+
+<p>From the LLVM perspective, the only thing you need to do to provide an
+efficient alias analysis is to make sure that alias analysis <b>queries</b> are
+serviced quickly.  The actual calculation of the alias analysis results (the
+"run" method) is only performed once, but many (perhaps duplicate) queries may
+be performed.  Because of this, try to move as much computation to the run
+method as possible (within reason).</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="limitations">Limitations</a>
+</div>
+
+<div class="doc_text">
+
+<p>The AliasAnalysis infrastructure has several limitations which make
+writing a new <tt>AliasAnalysis</tt> implementation difficult.</p>
+
+<p>There is no way to override the default alias analysis. It would
+be very useful to be able to do something like "opt -my-aa -O2" and
+have it use -my-aa for all passes which need AliasAnalysis, but there
+is currently no support for that, short of changing the source code
+and recompiling. Similarly, there is also no way of setting a chain
+of analyses as the default.</p>
+
+<p>There is no way for transform passes to declare that they preserve
+<tt>AliasAnalysis</tt> implementations. The <tt>AliasAnalysis</tt>
+interface includes <tt>deleteValue</tt> and <tt>copyValue</tt> methods
+which are intended to allow a pass to keep an AliasAnalysis consistent,
+however there's no way for a pass to declare in its
+<tt>getAnalysisUsage</tt> that it does so. Some passes attempt to use
+<tt>AU.addPreserved&lt;AliasAnalysis&gt;</tt>, however this doesn't
+actually have any effect.</tt>
+
+<p><tt>AliasAnalysisCounter</tt> (<tt>-count-aa</tt>) and <tt>AliasDebugger</tt>
+(<tt>-debug-aa</tt>) are implemented as <tt>ModulePass</tt> classes, so if your
+alias analysis uses <tt>FunctionPass</tt>, it won't be able to use
+these utilities. If you try to use them, the pass manager will
+silently route alias analysis queries directly to
+<tt>BasicAliasAnalysis</tt> instead.</p>
+
+<p>Similarly, the <tt>opt -p</tt> option introduces <tt>ModulePass</tt>
+passes between each pass, which prevents the use of <tt>FunctionPass</tt>
+alias analysis passes.</p>
+
+<p>The <tt>AliasAnalysis</tt> API does have functions for notifying
+implementations when values are deleted or copied, however these
+aren't sufficient. There are many other ways that LLVM IR can be
+modified which could be relevant to <tt>AliasAnalysis</tt>
+implementations which can not be expressed.</p>
+
+<p>The <tt>AliasAnalysisDebugger</tt> utility seems to suggest that
+<tt>AliasAnalysis</tt> implementations can expect that they will be
+informed of any relevant <tt>Value</tt> before it appears in an
+alias query. However, popular clients such as <tt>GVN</tt> don't
+support this, and are known to trigger errors when run with the
+<tt>AliasAnalysisDebugger</tt>.</p>
+
+<p>Due to several of the above limitations, the most obvious use for
+the <tt>AliasAnalysisCounter</tt> utility, collecting stats on all
+alias queries in a compilation, doesn't work, even if the
+<tt>AliasAnalysis</tt> implementations don't use <tt>FunctionPass</tt>.
+There's no way to set a default, much less a default sequence,
+and there's no way to preserve it.</p>
+
+<p>The <tt>AliasSetTracker</tt> class (which is used by <tt>LICM</tt>
+makes a non-deterministic number of alias queries. This can cause stats
+collected by <tt>AliasAnalysisCounter</tt> to have fluctuations among
+identical runs, for example. Another consequence is that debugging
+techniques involving pausing execution after a predetermined number
+of queries can be unreliable.</p>
+
+<p>Many alias queries can be reformulated in terms of other alias
+queries. When multiple <tt>AliasAnalysis</tt> queries are chained together,
+it would make sense to start those queries from the beginning of the chain,
+with care taken to avoid infinite looping, however currently an
+implementation which wants to do this can only start such queries
+from itself.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="using">Using alias analysis results</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>There are several different ways to use alias analysis results.  In order of
+preference, these are...</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="memdep">Using the <tt>MemoryDependenceAnalysis</tt> Pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>memdep</tt> pass uses alias analysis to provide high-level dependence
+information about memory-using instructions.  This will tell you which store
+feeds into a load, for example.  It uses caching and other techniques to be
+efficient, and is used by Dead Store Elimination, GVN, and memcpy optimizations.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ast">Using the <tt>AliasSetTracker</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>Many transformations need information about alias <b>sets</b> that are active
+in some scope, rather than information about pairwise aliasing.  The <tt><a
+href="/doxygen/classllvm_1_1AliasSetTracker.html">AliasSetTracker</a></tt> class
+is used to efficiently build these Alias Sets from the pairwise alias analysis
+information provided by the <tt>AliasAnalysis</tt> interface.</p>
+
+<p>First you initialize the AliasSetTracker by using the "<tt>add</tt>" methods
+to add information about various potentially aliasing instructions in the scope
+you are interested in.  Once all of the alias sets are completed, your pass
+should simply iterate through the constructed alias sets, using the
+<tt>AliasSetTracker</tt> <tt>begin()</tt>/<tt>end()</tt> methods.</p>
+
+<p>The <tt>AliasSet</tt>s formed by the <tt>AliasSetTracker</tt> are guaranteed
+to be disjoint, calculate mod/ref information and volatility for the set, and
+keep track of whether or not all of the pointers in the set are Must aliases.
+The AliasSetTracker also makes sure that sets are properly folded due to call
+instructions, and can provide a list of pointers in each set.</p>
+
+<p>As an example user of this, the <a href="/doxygen/structLICM.html">Loop
+Invariant Code Motion</a> pass uses <tt>AliasSetTracker</tt>s to calculate alias
+sets for each loop nest.  If an <tt>AliasSet</tt> in a loop is not modified,
+then all load instructions from that set may be hoisted out of the loop.  If any
+alias sets are stored to <b>and</b> are must alias sets, then the stores may be
+sunk to outside of the loop, promoting the memory location to a register for the
+duration of the loop nest.  Both of these transformations only apply if the
+pointer argument is loop-invariant.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  The AliasSetTracker implementation
+</div>
+
+<div class="doc_text">
+
+<p>The AliasSetTracker class is implemented to be as efficient as possible.  It
+uses the union-find algorithm to efficiently merge AliasSets when a pointer is
+inserted into the AliasSetTracker that aliases multiple sets.  The primary data
+structure is a hash table mapping pointers to the AliasSet they are in.</p>
+
+<p>The AliasSetTracker class must maintain a list of all of the LLVM Value*'s
+that are in each AliasSet.  Since the hash table already has entries for each
+LLVM Value* of interest, the AliasesSets thread the linked list through these
+hash-table nodes to avoid having to allocate memory unnecessarily, and to make
+merging alias sets extremely efficient (the linked list merge is constant time).
+</p>
+
+<p>You shouldn't need to understand these details if you are just a client of
+the AliasSetTracker, but if you look at the code, hopefully this brief
+description will help make sense of why things are designed the way they
+are.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="direct">Using the <tt>AliasAnalysis</tt> interface directly</a>
+</div>
+
+<div class="doc_text">
+
+<p>If neither of these utility class are what your pass needs, you should use
+the interfaces exposed by the <tt>AliasAnalysis</tt> class directly.  Try to use
+the higher-level methods when possible (e.g., use mod/ref information instead of
+the <a href="#alias"><tt>alias</tt></a> method directly if possible) to get the
+best precision and efficiency.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="exist">Existing alias analysis implementations and clients</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>If you're going to be working with the LLVM alias analysis infrastructure,
+you should know what clients and implementations of alias analysis are
+available.  In particular, if you are implementing an alias analysis, you should
+be aware of the <a href="#aliasanalysis-debug">the clients</a> that are useful
+for monitoring and evaluating different implementations.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="impls">Available <tt>AliasAnalysis</tt> implementations</a>
+</div>
+
+<div class="doc_text">
+
+<p>This section lists the various implementations of the <tt>AliasAnalysis</tt>
+interface.  With the exception of the <a href="#no-aa"><tt>-no-aa</tt></a> and
+<a href="#basic-aa"><tt>-basicaa</tt></a> implementations, all of these <a
+href="#chaining">chain</a> to other alias analysis implementations.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="no-aa">The <tt>-no-aa</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-no-aa</tt> pass is just like what it sounds: an alias analysis that
+never returns any useful information.  This pass can be useful if you think that
+alias analysis is doing something wrong and are trying to narrow down a
+problem.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="basic-aa">The <tt>-basicaa</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-basicaa</tt> pass is an aggressive local analysis that "knows"
+many important facts:</p>
+
+<ul>
+<li>Distinct globals, stack allocations, and heap allocations can never
+    alias.</li>
+<li>Globals, stack allocations, and heap allocations never alias the null
+    pointer.</li>
+<li>Different fields of a structure do not alias.</li>
+<li>Indexes into arrays with statically differing subscripts cannot alias.</li>
+<li>Many common standard C library functions <a
+    href="#simplemodref">never access memory or only read memory</a>.</li>
+<li>Pointers that obviously point to constant globals
+    "<tt>pointToConstantMemory</tt>".</li>
+<li>Function calls can not modify or references stack allocations if they never
+    escape from the function that allocates them (a common case for automatic
+    arrays).</li>
+</ul>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="globalsmodref">The <tt>-globalsmodref-aa</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>This pass implements a simple context-sensitive mod/ref and alias analysis
+for internal global variables that don't "have their address taken".  If a
+global does not have its address taken, the pass knows that no pointers alias
+the global.  This pass also keeps track of functions that it knows never access
+memory or never read memory.  This allows certain optimizations (e.g. GVN) to
+eliminate call instructions entirely.
+</p>
+
+<p>The real power of this pass is that it provides context-sensitive mod/ref 
+information for call instructions.  This allows the optimizer to know that 
+calls to a function do not clobber or read the value of the global, allowing 
+loads and stores to be eliminated.</p>
+
+<p>Note that this pass is somewhat limited in its scope (only support 
+non-address taken globals), but is very quick analysis.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="steens-aa">The <tt>-steens-aa</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-steens-aa</tt> pass implements a variation on the well-known
+"Steensgaard's algorithm" for interprocedural alias analysis.  Steensgaard's
+algorithm is a unification-based, flow-insensitive, context-insensitive, and
+field-insensitive alias analysis that is also very scalable (effectively linear
+time).</p>
+
+<p>The LLVM <tt>-steens-aa</tt> pass implements a "speculatively
+field-<b>sensitive</b>" version of Steensgaard's algorithm using the Data
+Structure Analysis framework.  This gives it substantially more precision than
+the standard algorithm while maintaining excellent analysis scalability.</p>
+
+<p>Note that <tt>-steens-aa</tt> is available in the optional "poolalloc"
+module, it is not part of the LLVM core.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ds-aa">The <tt>-ds-aa</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-ds-aa</tt> pass implements the full Data Structure Analysis
+algorithm.  Data Structure Analysis is a modular unification-based,
+flow-insensitive, context-<b>sensitive</b>, and speculatively
+field-<b>sensitive</b> alias analysis that is also quite scalable, usually at
+O(n*log(n)).</p>
+
+<p>This algorithm is capable of responding to a full variety of alias analysis
+queries, and can provide context-sensitive mod/ref information as well.  The
+only major facility not implemented so far is support for must-alias
+information.</p>
+
+<p>Note that <tt>-ds-aa</tt> is available in the optional "poolalloc"
+module, it is not part of the LLVM core.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="scev-aa">The <tt>-scev-aa</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-scev-aa</tt> pass implements AliasAnalysis queries by
+translating them into ScalarEvolution queries. This gives it a
+more complete understanding of <tt>getelementptr</tt> instructions
+and loop induction variables than other alias analyses have.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="aliasanalysis-xforms">Alias analysis driven transformations</a>
+</div>
+
+<div class="doc_text">
+LLVM includes several alias-analysis driven transformations which can be used
+with any of the implementations above.
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="adce">The <tt>-adce</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-adce</tt> pass, which implements Aggressive Dead Code Elimination
+uses the <tt>AliasAnalysis</tt> interface to delete calls to functions that do
+not have side-effects and are not used.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="licm">The <tt>-licm</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-licm</tt> pass implements various Loop Invariant Code Motion related
+transformations.  It uses the <tt>AliasAnalysis</tt> interface for several
+different transformations:</p>
+
+<ul>
+<li>It uses mod/ref information to hoist or sink load instructions out of loops
+if there are no instructions in the loop that modifies the memory loaded.</li>
+
+<li>It uses mod/ref information to hoist function calls out of loops that do not
+write to memory and are loop-invariant.</li>
+
+<li>If uses alias information to promote memory objects that are loaded and
+stored to in loops to live in a register instead.  It can do this if there are
+no may aliases to the loaded/stored memory location.</li>
+</ul>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="argpromotion">The <tt>-argpromotion</tt> pass</a>
+</div>
+
+<div class="doc_text">
+<p>
+The <tt>-argpromotion</tt> pass promotes by-reference arguments to be passed in
+by-value instead.  In particular, if pointer arguments are only loaded from it
+passes in the value loaded instead of the address to the function.  This pass
+uses alias information to make sure that the value loaded from the argument
+pointer is not modified between the entry of the function and any load of the
+pointer.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="gvn">The <tt>-gvn</tt>, <tt>-memcpyopt</tt>, and <tt>-dse</tt>
+     passes</a>
+</div>
+
+<div class="doc_text">
+
+<p>These passes use AliasAnalysis information to reason about loads and stores.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="aliasanalysis-debug">Clients for debugging and evaluation of
+  implementations</a>
+</div>
+
+<div class="doc_text">
+
+<p>These passes are useful for evaluating the various alias analysis
+implementations.  You can use them with commands like '<tt>opt -ds-aa
+-aa-eval foo.bc -disable-output -stats</tt>'.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="print-alias-sets">The <tt>-print-alias-sets</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-print-alias-sets</tt> pass is exposed as part of the
+<tt>opt</tt> tool to print out the Alias Sets formed by the <a
+href="#ast"><tt>AliasSetTracker</tt></a> class.  This is useful if you're using
+the <tt>AliasSetTracker</tt> class.  To use it, use something like:</p>
+
+<div class="doc_code">
+<pre>
+% opt -ds-aa -print-alias-sets -disable-output
+</pre>
+</div>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="count-aa">The <tt>-count-aa</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-count-aa</tt> pass is useful to see how many queries a particular
+pass is making and what responses are returned by the alias analysis.  As an
+example,</p>
+
+<div class="doc_code">
+<pre>
+% opt -basicaa -count-aa -ds-aa -count-aa -licm
+</pre>
+</div>
+
+<p>will print out how many queries (and what responses are returned) by the
+<tt>-licm</tt> pass (of the <tt>-ds-aa</tt> pass) and how many queries are made
+of the <tt>-basicaa</tt> pass by the <tt>-ds-aa</tt> pass.  This can be useful
+when debugging a transformation or an alias analysis implementation.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="aa-eval">The <tt>-aa-eval</tt> pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>-aa-eval</tt> pass simply iterates through all pairs of pointers in a
+function and asks an alias analysis whether or not the pointers alias.  This
+gives an indication of the precision of the alias analysis.  Statistics are
+printed indicating the percent of no/may/must aliases found (a more precise
+algorithm will have a lower number of may aliases).</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="memdep">Memory Dependence Analysis</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>If you're just looking to be a client of alias analysis information, consider
+using the Memory Dependence Analysis interface instead.  MemDep is a lazy, 
+caching layer on top of alias analysis that is able to answer the question of
+what preceding memory operations a given instruction depends on, either at an
+intra- or inter-block level.  Because of its laziness and caching 
+policy, using MemDep can be a significant performance win over accessing alias
+analysis directly.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/BitCodeFormat.html b/final/docs/BitCodeFormat.html
new file mode 100644
index 00000000000..0b8747c261f
--- /dev/null
+++ b/final/docs/BitCodeFormat.html
@@ -0,0 +1,1487 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>LLVM Bitcode File Format</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+<div class="doc_title"> LLVM Bitcode File Format </div>
+<ol>
+  <li><a href="#abstract">Abstract</a></li>
+  <li><a href="#overview">Overview</a></li>
+  <li><a href="#bitstream">Bitstream Format</a>
+    <ol>
+    <li><a href="#magic">Magic Numbers</a></li>
+    <li><a href="#primitives">Primitives</a></li>
+    <li><a href="#abbrevid">Abbreviation IDs</a></li>
+    <li><a href="#blocks">Blocks</a></li>
+    <li><a href="#datarecord">Data Records</a></li>
+    <li><a href="#abbreviations">Abbreviations</a></li>
+    <li><a href="#stdblocks">Standard Blocks</a></li>
+    </ol>
+  </li>
+  <li><a href="#wrapper">Bitcode Wrapper Format</a>
+  </li>
+  <li><a href="#llvmir">LLVM IR Encoding</a>
+    <ol>
+    <li><a href="#basics">Basics</a></li>
+    <li><a href="#MODULE_BLOCK">MODULE_BLOCK Contents</a></li>
+    <li><a href="#PARAMATTR_BLOCK">PARAMATTR_BLOCK Contents</a></li>
+    <li><a href="#TYPE_BLOCK">TYPE_BLOCK Contents</a></li>
+    <li><a href="#CONSTANTS_BLOCK">CONSTANTS_BLOCK Contents</a></li>
+    <li><a href="#FUNCTION_BLOCK">FUNCTION_BLOCK Contents</a></li>
+    <li><a href="#TYPE_SYMTAB_BLOCK">TYPE_SYMTAB_BLOCK Contents</a></li>
+    <li><a href="#VALUE_SYMTAB_BLOCK">VALUE_SYMTAB_BLOCK Contents</a></li>
+    <li><a href="#METADATA_BLOCK">METADATA_BLOCK Contents</a></li>
+    <li><a href="#METADATA_ATTACHMENT">METADATA_ATTACHMENT Contents</a></li>
+    </ol>
+  </li>
+</ol>
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>,
+  <a href="http://www.reverberate.org">Joshua Haberman</a>,
+  and <a href="mailto:housel@acm.org">Peter S. Housel</a>.
+</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="abstract">Abstract</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document describes the LLVM bitstream file format and the encoding of
+the LLVM IR into it.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="overview">Overview</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+What is commonly known as the LLVM bitcode file format (also, sometimes
+anachronistically known as bytecode) is actually two things: a <a 
+href="#bitstream">bitstream container format</a>
+and an <a href="#llvmir">encoding of LLVM IR</a> into the container format.</p>
+
+<p>
+The bitstream format is an abstract encoding of structured data, very
+similar to XML in some ways.  Like XML, bitstream files contain tags, and nested
+structures, and you can parse the file without having to understand the tags.
+Unlike XML, the bitstream format is a binary encoding, and unlike XML it
+provides a mechanism for the file to self-describe "abbreviations", which are
+effectively size optimizations for the content.</p>
+
+<p>LLVM IR files may be optionally embedded into a <a 
+href="#wrapper">wrapper</a> structure that makes it easy to embed extra data
+along with LLVM IR files.</p>
+
+<p>This document first describes the LLVM bitstream format, describes the
+wrapper format, then describes the record structure used by LLVM IR files.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="bitstream">Bitstream Format</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+The bitstream format is literally a stream of bits, with a very simple
+structure.  This structure consists of the following concepts:
+</p>
+
+<ul>
+<li>A "<a href="#magic">magic number</a>" that identifies the contents of
+    the stream.</li>
+<li>Encoding <a href="#primitives">primitives</a> like variable bit-rate
+    integers.</li> 
+<li><a href="#blocks">Blocks</a>, which define nested content.</li> 
+<li><a href="#datarecord">Data Records</a>, which describe entities within the
+    file.</li> 
+<li>Abbreviations, which specify compression optimizations for the file.</li> 
+</ul>
+
+<p>Note that the <a 
+href="CommandGuide/html/llvm-bcanalyzer.html">llvm-bcanalyzer</a> tool can be
+used to dump and inspect arbitrary bitstreams, which is very useful for
+understanding the encoding.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="magic">Magic Numbers</a>
+</div>
+
+<div class="doc_text">
+
+<p>The first two bytes of a bitcode file are 'BC' (0x42, 0x43).
+The second two bytes are an application-specific magic number.  Generic
+bitcode tools can look at only the first two bytes to verify the file is
+bitcode, while application-specific programs will want to look at all four.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="primitives">Primitives</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+A bitstream literally consists of a stream of bits, which are read in order
+starting with the least significant bit of each byte.  The stream is made up of a
+number of primitive values that encode a stream of unsigned integer values.
+These integers are encoded in two ways: either as <a href="#fixedwidth">Fixed
+Width Integers</a> or as <a href="#variablewidth">Variable Width
+Integers</a>.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="fixedwidth">Fixed Width Integers</a>
+</div>
+
+<div class="doc_text">
+
+<p>Fixed-width integer values have their low bits emitted directly to the file.
+   For example, a 3-bit integer value encodes 1 as 001.  Fixed width integers
+   are used when there are a well-known number of options for a field.  For
+   example, boolean values are usually encoded with a 1-bit wide integer. 
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="variablewidth">Variable Width
+Integers</a></div>
+
+<div class="doc_text">
+
+<p>Variable-width integer (VBR) values encode values of arbitrary size,
+optimizing for the case where the values are small.  Given a 4-bit VBR field,
+any 3-bit value (0 through 7) is encoded directly, with the high bit set to
+zero.  Values larger than N-1 bits emit their bits in a series of N-1 bit
+chunks, where all but the last set the high bit.</p>
+
+<p>For example, the value 27 (0x1B) is encoded as 1011 0011 when emitted as a
+vbr4 value.  The first set of four bits indicates the value 3 (011) with a
+continuation piece (indicated by a high bit of 1).  The next word indicates a
+value of 24 (011 << 3) with no continuation.  The sum (3+24) yields the value
+27.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="char6">6-bit characters</a></div>
+
+<div class="doc_text">
+
+<p>6-bit characters encode common characters into a fixed 6-bit field.  They
+represent the following characters with the following 6-bit values:</p>
+
+<div class="doc_code">
+<pre>
+'a' .. 'z' &mdash;  0 .. 25
+'A' .. 'Z' &mdash; 26 .. 51
+'0' .. '9' &mdash; 52 .. 61
+       '.' &mdash; 62
+       '_' &mdash; 63
+</pre>
+</div>
+
+<p>This encoding is only suitable for encoding characters and strings that
+consist only of the above characters.  It is completely incapable of encoding
+characters not in the set.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="wordalign">Word Alignment</a></div>
+
+<div class="doc_text">
+
+<p>Occasionally, it is useful to emit zero bits until the bitstream is a
+multiple of 32 bits.  This ensures that the bit position in the stream can be
+represented as a multiple of 32-bit words.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="abbrevid">Abbreviation IDs</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+A bitstream is a sequential series of <a href="#blocks">Blocks</a> and
+<a href="#datarecord">Data Records</a>.  Both of these start with an
+abbreviation ID encoded as a fixed-bitwidth field.  The width is specified by
+the current block, as described below.  The value of the abbreviation ID
+specifies either a builtin ID (which have special meanings, defined below) or
+one of the abbreviation IDs defined for the current block by the stream itself.
+</p>
+
+<p>
+The set of builtin abbrev IDs is:
+</p>
+
+<ul>
+<li><tt>0 - <a href="#END_BLOCK">END_BLOCK</a></tt> &mdash; This abbrev ID marks
+    the end of the current block.</li>
+<li><tt>1 - <a href="#ENTER_SUBBLOCK">ENTER_SUBBLOCK</a></tt> &mdash; This
+    abbrev ID marks the beginning of a new block.</li>
+<li><tt>2 - <a href="#DEFINE_ABBREV">DEFINE_ABBREV</a></tt> &mdash; This defines
+    a new abbreviation.</li>
+<li><tt>3 - <a href="#UNABBREV_RECORD">UNABBREV_RECORD</a></tt> &mdash; This ID
+    specifies the definition of an unabbreviated record.</li>
+</ul>
+
+<p>Abbreviation IDs 4 and above are defined by the stream itself, and specify
+an <a href="#abbrev_records">abbreviated record encoding</a>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="blocks">Blocks</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+Blocks in a bitstream denote nested regions of the stream, and are identified by
+a content-specific id number (for example, LLVM IR uses an ID of 12 to represent
+function bodies).  Block IDs 0-7 are reserved for <a href="#stdblocks">standard blocks</a>
+whose meaning is defined by Bitcode; block IDs 8 and greater are
+application specific. Nested blocks capture the hierarchical structure of the data
+encoded in it, and various properties are associated with blocks as the file is
+parsed.  Block definitions allow the reader to efficiently skip blocks
+in constant time if the reader wants a summary of blocks, or if it wants to
+efficiently skip data it does not understand.  The LLVM IR reader uses this
+mechanism to skip function bodies, lazily reading them on demand.
+</p>
+
+<p>
+When reading and encoding the stream, several properties are maintained for the
+block.  In particular, each block maintains:
+</p>
+
+<ol>
+<li>A current abbrev id width.  This value starts at 2 at the beginning of
+    the stream, and is set every time a
+    block record is entered.  The block entry specifies the abbrev id width for
+    the body of the block.</li>
+
+<li>A set of abbreviations.  Abbreviations may be defined within a block, in
+    which case they are only defined in that block (neither subblocks nor
+    enclosing blocks see the abbreviation).  Abbreviations can also be defined
+    inside a <tt><a href="#BLOCKINFO">BLOCKINFO</a></tt> block, in which case
+    they are defined in all blocks that match the ID that the BLOCKINFO block is
+    describing.
+</li>
+</ol>
+
+<p>
+As sub blocks are entered, these properties are saved and the new sub-block has
+its own set of abbreviations, and its own abbrev id width.  When a sub-block is
+popped, the saved values are restored.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="ENTER_SUBBLOCK">ENTER_SUBBLOCK
+Encoding</a></div>
+
+<div class="doc_text">
+
+<p><tt>[ENTER_SUBBLOCK, blockid<sub>vbr8</sub>, newabbrevlen<sub>vbr4</sub>,
+     &lt;align32bits&gt;, blocklen<sub>32</sub>]</tt></p>
+
+<p>
+The <tt>ENTER_SUBBLOCK</tt> abbreviation ID specifies the start of a new block
+record.  The <tt>blockid</tt> value is encoded as an 8-bit VBR identifier, and
+indicates the type of block being entered, which can be
+a <a href="#stdblocks">standard block</a> or an application-specific block.
+The <tt>newabbrevlen</tt> value is a 4-bit VBR, which specifies the abbrev id
+width for the sub-block.  The <tt>blocklen</tt> value is a 32-bit aligned value
+that specifies the size of the subblock in 32-bit words. This value allows the
+reader to skip over the entire block in one jump.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="END_BLOCK">END_BLOCK
+Encoding</a></div>
+
+<div class="doc_text">
+
+<p><tt>[END_BLOCK, &lt;align32bits&gt;]</tt></p>
+
+<p>
+The <tt>END_BLOCK</tt> abbreviation ID specifies the end of the current block
+record.  Its end is aligned to 32-bits to ensure that the size of the block is
+an even multiple of 32-bits.
+</p>
+
+</div>
+
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="datarecord">Data Records</a>
+</div>
+
+<div class="doc_text">
+<p>
+Data records consist of a record code and a number of (up to) 64-bit
+integer values.  The interpretation of the code and values is
+application specific and may vary between different block types.
+Records can be encoded either using an unabbrev record, or with an
+abbreviation.  In the LLVM IR format, for example, there is a record
+which encodes the target triple of a module.  The code is
+<tt>MODULE_CODE_TRIPLE</tt>, and the values of the record are the
+ASCII codes for the characters in the string.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="UNABBREV_RECORD">UNABBREV_RECORD
+Encoding</a></div>
+
+<div class="doc_text">
+
+<p><tt>[UNABBREV_RECORD, code<sub>vbr6</sub>, numops<sub>vbr6</sub>,
+       op0<sub>vbr6</sub>, op1<sub>vbr6</sub>, ...]</tt></p>
+
+<p>
+An <tt>UNABBREV_RECORD</tt> provides a default fallback encoding, which is both
+completely general and extremely inefficient.  It can describe an arbitrary
+record by emitting the code and operands as VBRs.
+</p>
+
+<p>
+For example, emitting an LLVM IR target triple as an unabbreviated record
+requires emitting the <tt>UNABBREV_RECORD</tt> abbrevid, a vbr6 for the
+<tt>MODULE_CODE_TRIPLE</tt> code, a vbr6 for the length of the string, which is
+equal to the number of operands, and a vbr6 for each character.  Because there
+are no letters with values less than 32, each letter would need to be emitted as
+at least a two-part VBR, which means that each letter would require at least 12
+bits.  This is not an efficient encoding, but it is fully general.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="abbrev_records">Abbreviated Record
+Encoding</a></div>
+
+<div class="doc_text">
+
+<p><tt>[&lt;abbrevid&gt;, fields...]</tt></p>
+
+<p>
+An abbreviated record is a abbreviation id followed by a set of fields that are
+encoded according to the <a href="#abbreviations">abbreviation definition</a>.
+This allows records to be encoded significantly more densely than records
+encoded with the <tt><a href="#UNABBREV_RECORD">UNABBREV_RECORD</a></tt> type,
+and allows the abbreviation types to be specified in the stream itself, which
+allows the files to be completely self describing.  The actual encoding of
+abbreviations is defined below.
+</p>
+
+<p>The record code, which is the first field of an abbreviated record,
+may be encoded in the abbreviation definition (as a literal
+operand) or supplied in the abbreviated record (as a Fixed or VBR
+operand value).</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="abbreviations">Abbreviations</a>
+</div>
+
+<div class="doc_text">
+<p>
+Abbreviations are an important form of compression for bitstreams.  The idea is
+to specify a dense encoding for a class of records once, then use that encoding
+to emit many records.  It takes space to emit the encoding into the file, but
+the space is recouped (hopefully plus some) when the records that use it are
+emitted.
+</p>
+
+<p>
+Abbreviations can be determined dynamically per client, per file. Because the
+abbreviations are stored in the bitstream itself, different streams of the same
+format can contain different sets of abbreviations according to the needs
+of the specific stream.
+As a concrete example, LLVM IR files usually emit an abbreviation
+for binary operators.  If a specific LLVM module contained no or few binary
+operators, the abbreviation does not need to be emitted.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="DEFINE_ABBREV">DEFINE_ABBREV
+ Encoding</a></div>
+
+<div class="doc_text">
+
+<p><tt>[DEFINE_ABBREV, numabbrevops<sub>vbr5</sub>, abbrevop0, abbrevop1,
+ ...]</tt></p>
+
+<p>
+A <tt>DEFINE_ABBREV</tt> record adds an abbreviation to the list of currently
+defined abbreviations in the scope of this block.  This definition only exists
+inside this immediate block &mdash; it is not visible in subblocks or enclosing
+blocks.  Abbreviations are implicitly assigned IDs sequentially starting from 4
+(the first application-defined abbreviation ID).  Any abbreviations defined in a
+<tt>BLOCKINFO</tt> record for the particular block type
+receive IDs first, in order, followed by any
+abbreviations defined within the block itself.  Abbreviated data records
+reference this ID to indicate what abbreviation they are invoking.
+</p>
+
+<p>
+An abbreviation definition consists of the <tt>DEFINE_ABBREV</tt> abbrevid
+followed by a VBR that specifies the number of abbrev operands, then the abbrev
+operands themselves.  Abbreviation operands come in three forms.  They all start
+with a single bit that indicates whether the abbrev operand is a literal operand
+(when the bit is 1) or an encoding operand (when the bit is 0).
+</p>
+
+<ol>
+<li>Literal operands &mdash; <tt>[1<sub>1</sub>, litvalue<sub>vbr8</sub>]</tt>
+&mdash; Literal operands specify that the value in the result is always a single
+specific value.  This specific value is emitted as a vbr8 after the bit
+indicating that it is a literal operand.</li>
+<li>Encoding info without data &mdash; <tt>[0<sub>1</sub>,
+ encoding<sub>3</sub>]</tt> &mdash; Operand encodings that do not have extra
+ data are just emitted as their code.
+</li>
+<li>Encoding info with data &mdash; <tt>[0<sub>1</sub>, encoding<sub>3</sub>,
+value<sub>vbr5</sub>]</tt> &mdash; Operand encodings that do have extra data are
+emitted as their code, followed by the extra data.
+</li>
+</ol>
+
+<p>The possible operand encodings are:</p>
+
+<ul>
+<li>Fixed (code 1): The field should be emitted as
+    a <a href="#fixedwidth">fixed-width value</a>, whose width is specified by
+    the operand's extra data.</li>
+<li>VBR (code 2): The field should be emitted as
+    a <a href="#variablewidth">variable-width value</a>, whose width is
+    specified by the operand's extra data.</li>
+<li>Array (code 3): This field is an array of values.  The array operand
+    has no extra data, but expects another operand to follow it, indicating
+    the element type of the array.  When reading an array in an abbreviated
+    record, the first integer is a vbr6 that indicates the array length,
+    followed by the encoded elements of the array.  An array may only occur as
+    the last operand of an abbreviation (except for the one final operand that
+    gives the array's type).</li>
+<li>Char6 (code 4): This field should be emitted as
+    a <a href="#char6">char6-encoded value</a>.  This operand type takes no
+    extra data. Char6 encoding is normally used as an array element type.
+    </li>
+<li>Blob (code 5): This field is emitted as a vbr6, followed by padding to a
+    32-bit boundary (for alignment) and an array of 8-bit objects.  The array of
+    bytes is further followed by tail padding to ensure that its total length is
+    a multiple of 4 bytes.  This makes it very efficient for the reader to
+    decode the data without having to make a copy of it: it can use a pointer to
+    the data in the mapped in file and poke directly at it.  A blob may only
+    occur as the last operand of an abbreviation.</li>
+</ul>
+
+<p>
+For example, target triples in LLVM modules are encoded as a record of the
+form <tt>[TRIPLE, 'a', 'b', 'c', 'd']</tt>.  Consider if the bitstream emitted
+the following abbrev entry:
+</p>
+
+<div class="doc_code">
+<pre>
+[0, Fixed, 4]
+[0, Array]
+[0, Char6]
+</pre>
+</div>
+
+<p>
+When emitting a record with this abbreviation, the above entry would be emitted
+as:
+</p>
+
+<div class="doc_code">
+<p>
+<tt>[4<sub>abbrevwidth</sub>, 2<sub>4</sub>, 4<sub>vbr6</sub>, 0<sub>6</sub>,
+1<sub>6</sub>, 2<sub>6</sub>, 3<sub>6</sub>]</tt>
+</p>
+</div>
+
+<p>These values are:</p>
+
+<ol>
+<li>The first value, 4, is the abbreviation ID for this abbreviation.</li>
+<li>The second value, 2, is the record code for <tt>TRIPLE</tt> records within LLVM IR file <tt>MODULE_BLOCK</tt> blocks.</li>
+<li>The third value, 4, is the length of the array.</li>
+<li>The rest of the values are the char6 encoded values
+    for <tt>"abcd"</tt>.</li>
+</ol>
+
+<p>
+With this abbreviation, the triple is emitted with only 37 bits (assuming a
+abbrev id width of 3).  Without the abbreviation, significantly more space would
+be required to emit the target triple.  Also, because the <tt>TRIPLE</tt> value
+is not emitted as a literal in the abbreviation, the abbreviation can also be
+used for any other string value.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="stdblocks">Standard Blocks</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+In addition to the basic block structure and record encodings, the bitstream
+also defines specific built-in block types.  These block types specify how the
+stream is to be decoded or other metadata.  In the future, new standard blocks
+may be added.  Block IDs 0-7 are reserved for standard blocks.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="BLOCKINFO">#0 - BLOCKINFO
+Block</a></div>
+
+<div class="doc_text">
+
+<p>
+The <tt>BLOCKINFO</tt> block allows the description of metadata for other
+blocks.  The currently specified records are:
+</p>
+
+<div class="doc_code">
+<pre>
+[SETBID (#1), blockid]
+[DEFINE_ABBREV, ...]
+[BLOCKNAME, ...name...]
+[SETRECORDNAME, RecordID, ...name...]
+</pre>
+</div>
+
+<p>
+The <tt>SETBID</tt> record (code 1) indicates which block ID is being
+described.  <tt>SETBID</tt> records can occur multiple times throughout the
+block to change which block ID is being described.  There must be
+a <tt>SETBID</tt> record prior to any other records.
+</p>
+
+<p>
+Standard <tt>DEFINE_ABBREV</tt> records can occur inside <tt>BLOCKINFO</tt>
+blocks, but unlike their occurrence in normal blocks, the abbreviation is
+defined for blocks matching the block ID we are describing, <i>not</i> the
+<tt>BLOCKINFO</tt> block itself.  The abbreviations defined
+in <tt>BLOCKINFO</tt> blocks receive abbreviation IDs as described
+in <tt><a href="#DEFINE_ABBREV">DEFINE_ABBREV</a></tt>.
+</p>
+
+<p>The <tt>BLOCKNAME</tt> record (code 2) can optionally occur in this block.  The elements of
+the record are the bytes of the string name of the block.  llvm-bcanalyzer can use
+this to dump out bitcode files symbolically.</p>
+
+<p>The <tt>SETRECORDNAME</tt> record (code 3) can also optionally occur in this block.  The
+first operand value is a record ID number, and the rest of the elements of the record are
+the bytes for the string name of the record.  llvm-bcanalyzer can use
+this to dump out bitcode files symbolically.</p>
+
+<p>
+Note that although the data in <tt>BLOCKINFO</tt> blocks is described as
+"metadata," the abbreviations they contain are essential for parsing records
+from the corresponding blocks.  It is not safe to skip them.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="wrapper">Bitcode Wrapper Format</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Bitcode files for LLVM IR may optionally be wrapped in a simple wrapper
+structure.  This structure contains a simple header that indicates the offset
+and size of the embedded BC file.  This allows additional information to be
+stored alongside the BC file.  The structure of this file header is:
+</p>
+
+<div class="doc_code">
+<p>
+<tt>[Magic<sub>32</sub>, Version<sub>32</sub>, Offset<sub>32</sub>,
+Size<sub>32</sub>, CPUType<sub>32</sub>]</tt>
+</p>
+</div>
+
+<p>
+Each of the fields are 32-bit fields stored in little endian form (as with
+the rest of the bitcode file fields).  The Magic number is always
+<tt>0x0B17C0DE</tt> and the version is currently always <tt>0</tt>.  The Offset
+field is the offset in bytes to the start of the bitcode stream in the file, and
+the Size field is the size in bytes of the stream. CPUType is a target-specific
+value that can be used to encode the CPU of the target.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="llvmir">LLVM IR Encoding</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+LLVM IR is encoded into a bitstream by defining blocks and records.  It uses
+blocks for things like constant pools, functions, symbol tables, etc.  It uses
+records for things like instructions, global variable descriptors, type
+descriptions, etc.  This document does not describe the set of abbreviations
+that the writer uses, as these are fully self-described in the file, and the
+reader is not allowed to build in any knowledge of this.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="basics">Basics</a>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="ir_magic">LLVM IR Magic Number</a></div>
+
+<div class="doc_text">
+
+<p>
+The magic number for LLVM IR files is:
+</p>
+
+<div class="doc_code">
+<p>
+<tt>[0x0<sub>4</sub>, 0xC<sub>4</sub>, 0xE<sub>4</sub>, 0xD<sub>4</sub>]</tt>
+</p>
+</div>
+
+<p>
+When combined with the bitcode magic number and viewed as bytes, this is
+<tt>"BC&nbsp;0xC0DE"</tt>.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="ir_signed_vbr">Signed VBRs</a></div>
+
+<div class="doc_text">
+
+<p>
+<a href="#variablewidth">Variable Width Integer</a> encoding is an efficient way to
+encode arbitrary sized unsigned values, but is an extremely inefficient for
+encoding signed values, as signed values are otherwise treated as maximally large
+unsigned values.
+</p>
+
+<p>
+As such, signed VBR values of a specific width are emitted as follows:
+</p>
+
+<ul>
+<li>Positive values are emitted as VBRs of the specified width, but with their
+    value shifted left by one.</li>
+<li>Negative values are emitted as VBRs of the specified width, but the negated
+    value is shifted left by one, and the low bit is set.</li>
+</ul>
+
+<p>
+With this encoding, small positive and small negative values can both
+be emitted efficiently. Signed VBR encoding is used in
+<tt>CST_CODE_INTEGER</tt> and <tt>CST_CODE_WIDE_INTEGER</tt> records
+within <tt>CONSTANTS_BLOCK</tt> blocks.
+</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="ir_blocks">LLVM IR Blocks</a></div>
+
+<div class="doc_text">
+
+<p>
+LLVM IR is defined with the following blocks:
+</p>
+
+<ul>
+<li>8  &mdash; <a href="#MODULE_BLOCK"><tt>MODULE_BLOCK</tt></a> &mdash; This is the top-level block that
+    contains the entire module, and describes a variety of per-module
+    information.</li>
+<li>9  &mdash; <a href="#PARAMATTR_BLOCK"><tt>PARAMATTR_BLOCK</tt></a> &mdash; This enumerates the parameter
+    attributes.</li>
+<li>10 &mdash; <a href="#TYPE_BLOCK"><tt>TYPE_BLOCK</tt></a> &mdash; This describes all of the types in
+    the module.</li>
+<li>11 &mdash; <a href="#CONSTANTS_BLOCK"><tt>CONSTANTS_BLOCK</tt></a> &mdash; This describes constants for a
+    module or function.</li>
+<li>12 &mdash; <a href="#FUNCTION_BLOCK"><tt>FUNCTION_BLOCK</tt></a> &mdash; This describes a function
+    body.</li>
+<li>13 &mdash; <a href="#TYPE_SYMTAB_BLOCK"><tt>TYPE_SYMTAB_BLOCK</tt></a> &mdash; This describes the type symbol
+    table.</li>
+<li>14 &mdash; <a href="#VALUE_SYMTAB_BLOCK"><tt>VALUE_SYMTAB_BLOCK</tt></a> &mdash; This describes a value symbol
+    table.</li>
+<li>15 &mdash; <a href="#METADATA_BLOCK"><tt>METADATA_BLOCK</tt></a> &mdash; This describes metadata items.</li>
+<li>16 &mdash; <a href="#METADATA_ATTACHMENT"><tt>METADATA_ATTACHMENT</tt></a> &mdash; This contains records associating metadata with function instruction values.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="MODULE_BLOCK">MODULE_BLOCK Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>MODULE_BLOCK</tt> block (id 8) is the top-level block for LLVM
+bitcode files, and each bitcode file must contain exactly one. In
+addition to records (described below) containing information
+about the module, a <tt>MODULE_BLOCK</tt> block may contain the
+following sub-blocks:
+</p>
+
+<ul>
+<li><a href="#BLOCKINFO"><tt>BLOCKINFO</tt></a></li>
+<li><a href="#PARAMATTR_BLOCK"><tt>PARAMATTR_BLOCK</tt></a></li>
+<li><a href="#TYPE_BLOCK"><tt>TYPE_BLOCK</tt></a></li>
+<li><a href="#TYPE_SYMTAB_BLOCK"><tt>TYPE_SYMTAB_BLOCK</tt></a></li>
+<li><a href="#VALUE_SYMTAB_BLOCK"><tt>VALUE_SYMTAB_BLOCK</tt></a></li>
+<li><a href="#CONSTANTS_BLOCK"><tt>CONSTANTS_BLOCK</tt></a></li>
+<li><a href="#FUNCTION_BLOCK"><tt>FUNCTION_BLOCK</tt></a></li>
+<li><a href="#METADATA_BLOCK"><tt>METADATA_BLOCK</tt></a></li>
+</ul>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_VERSION">MODULE_CODE_VERSION Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[VERSION, version#]</tt></p>
+
+<p>The <tt>VERSION</tt> record (code 1) contains a single value
+indicating the format version. Only version 0 is supported at this
+time.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_TRIPLE">MODULE_CODE_TRIPLE Record</a>
+</div>
+
+<div class="doc_text">
+<p><tt>[TRIPLE, ...string...]</tt></p>
+
+<p>The <tt>TRIPLE</tt> record (code 2) contains a variable number of
+values representing the bytes of the <tt>target triple</tt>
+specification string.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_DATALAYOUT">MODULE_CODE_DATALAYOUT Record</a>
+</div>
+
+<div class="doc_text">
+<p><tt>[DATALAYOUT, ...string...]</tt></p>
+
+<p>The <tt>DATALAYOUT</tt> record (code 3) contains a variable number of
+values representing the bytes of the <tt>target datalayout</tt>
+specification string.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_ASM">MODULE_CODE_ASM Record</a>
+</div>
+
+<div class="doc_text">
+<p><tt>[ASM, ...string...]</tt></p>
+
+<p>The <tt>ASM</tt> record (code 4) contains a variable number of
+values representing the bytes of <tt>module asm</tt> strings, with
+individual assembly blocks separated by newline (ASCII 10) characters.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_SECTIONNAME">MODULE_CODE_SECTIONNAME Record</a>
+</div>
+
+<div class="doc_text">
+<p><tt>[SECTIONNAME, ...string...]</tt></p>
+
+<p>The <tt>SECTIONNAME</tt> record (code 5) contains a variable number
+of values representing the bytes of a single section name
+string. There should be one <tt>SECTIONNAME</tt> record for each
+section name referenced (e.g., in global variable or function
+<tt>section</tt> attributes) within the module. These records can be
+referenced by the 1-based index in the <i>section</i> fields of
+<tt>GLOBALVAR</tt> or <tt>FUNCTION</tt> records.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_DEPLIB">MODULE_CODE_DEPLIB Record</a>
+</div>
+
+<div class="doc_text">
+<p><tt>[DEPLIB, ...string...]</tt></p>
+
+<p>The <tt>DEPLIB</tt> record (code 6) contains a variable number of
+values representing the bytes of a single dependent library name
+string, one of the libraries mentioned in a <tt>deplibs</tt>
+declaration.  There should be one <tt>DEPLIB</tt> record for each
+library name referenced.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_GLOBALVAR">MODULE_CODE_GLOBALVAR Record</a>
+</div>
+
+<div class="doc_text">
+<p><tt>[GLOBALVAR, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal]</tt></p>
+
+<p>The <tt>GLOBALVAR</tt> record (code 7) marks the declaration or
+definition of a global variable. The operand fields are:</p>
+
+<ul>
+<li><i>pointer type</i>: The type index of the pointer type used to point to
+this global variable</li>
+
+<li><i>isconst</i>: Non-zero if the variable is treated as constant within
+the module, or zero if it is not</li>
+
+<li><i>initid</i>: If non-zero, the value index of the initializer for this
+variable, plus 1.</li>
+
+<li><a name="linkage"><i>linkage</i></a>: An encoding of the linkage
+type for this variable:
+  <ul>
+    <li><tt>external</tt>: code 0</li>
+    <li><tt>weak</tt>: code 1</li>
+    <li><tt>appending</tt>: code 2</li>
+    <li><tt>internal</tt>: code 3</li>
+    <li><tt>linkonce</tt>: code 4</li>
+    <li><tt>dllimport</tt>: code 5</li>
+    <li><tt>dllexport</tt>: code 6</li>
+    <li><tt>extern_weak</tt>: code 7</li>
+    <li><tt>common</tt>: code 8</li>
+    <li><tt>private</tt>: code 9</li>
+    <li><tt>weak_odr</tt>: code 10</li>
+    <li><tt>linkonce_odr</tt>: code 11</li>
+    <li><tt>available_externally</tt>: code 12</li>
+    <li><tt>linker_private</tt>: code 13</li>
+  </ul>
+</li>
+
+<li><i>alignment</i>: The logarithm base 2 of the variable's requested
+alignment, plus 1</li>
+
+<li><i>section</i>: If non-zero, the 1-based section index in the
+table of <a href="#MODULE_CODE_SECTIONNAME">MODULE_CODE_SECTIONNAME</a>
+entries.</li>
+
+<li><a name="visibility"><i>visibility</i></a>: If present, an
+encoding of the visibility of this variable:
+  <ul>
+    <li><tt>default</tt>: code 0</li>
+    <li><tt>hidden</tt>: code 1</li>
+    <li><tt>protected</tt>: code 2</li>
+  </ul>
+</li>
+
+<li><i>threadlocal</i>: If present and non-zero, indicates that the variable
+is <tt>thread_local</tt></li>
+
+<li><i>unnamed_addr</i>: If present and non-zero, indicates that the variable
+has <tt>unnamed_addr<tt></li>
+
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_FUNCTION">MODULE_CODE_FUNCTION Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[FUNCTION, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc]</tt></p>
+
+<p>The <tt>FUNCTION</tt> record (code 8) marks the declaration or
+definition of a function. The operand fields are:</p>
+
+<ul>
+<li><i>type</i>: The type index of the function type describing this function</li>
+
+<li><i>callingconv</i>: The calling convention number:
+  <ul>
+    <li><tt>ccc</tt>: code 0</li>
+    <li><tt>fastcc</tt>: code 8</li>
+    <li><tt>coldcc</tt>: code 9</li>
+    <li><tt>x86_stdcallcc</tt>: code 64</li>
+    <li><tt>x86_fastcallcc</tt>: code 65</li>
+    <li><tt>arm_apcscc</tt>: code 66</li>
+    <li><tt>arm_aapcscc</tt>: code 67</li>
+    <li><tt>arm_aapcs_vfpcc</tt>: code 68</li>
+  </ul>
+</li>
+
+<li><i>isproto</i>: Non-zero if this entry represents a declaration
+rather than a definition</li>
+
+<li><i>linkage</i>: An encoding of the <a href="#linkage">linkage type</a>
+for this function</li>
+
+<li><i>paramattr</i>: If nonzero, the 1-based parameter attribute index
+into the table of <a href="#PARAMATTR_CODE_ENTRY">PARAMATTR_CODE_ENTRY</a>
+entries.</li>
+
+<li><i>alignment</i>: The logarithm base 2 of the function's requested
+alignment, plus 1</li>
+
+<li><i>section</i>: If non-zero, the 1-based section index in the
+table of <a href="#MODULE_CODE_SECTIONNAME">MODULE_CODE_SECTIONNAME</a>
+entries.</li>
+
+<li><i>visibility</i>: An encoding of the <a href="#visibility">visibility</a>
+    of this function</li>
+
+<li><i>gc</i>: If present and nonzero, the 1-based garbage collector
+index in the table of
+<a href="#MODULE_CODE_GCNAME">MODULE_CODE_GCNAME</a> entries.</li>
+
+<li><i>unnamed_addr</i>: If present and non-zero, indicates that the function
+has <tt>unnamed_addr<tt></li>
+
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_ALIAS">MODULE_CODE_ALIAS Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[ALIAS, alias type, aliasee val#, linkage, visibility]</tt></p>
+
+<p>The <tt>ALIAS</tt> record (code 9) marks the definition of an
+alias. The operand fields are</p>
+
+<ul>
+<li><i>alias type</i>: The type index of the alias</li>
+
+<li><i>aliasee val#</i>: The value index of the aliased value</li>
+
+<li><i>linkage</i>: An encoding of the <a href="#linkage">linkage type</a>
+for this alias</li>
+
+<li><i>visibility</i>: If present, an encoding of the
+<a href="#visibility">visibility</a> of the alias</li>
+
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_PURGEVALS">MODULE_CODE_PURGEVALS Record</a>
+</div>
+
+<div class="doc_text">
+<p><tt>[PURGEVALS, numvals]</tt></p>
+
+<p>The <tt>PURGEVALS</tt> record (code 10) resets the module-level
+value list to the size given by the single operand value. Module-level
+value list items are added by <tt>GLOBALVAR</tt>, <tt>FUNCTION</tt>,
+and <tt>ALIAS</tt> records.  After a <tt>PURGEVALS</tt> record is seen,
+new value indices will start from the given <i>numvals</i> value.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="MODULE_CODE_GCNAME">MODULE_CODE_GCNAME Record</a>
+</div>
+
+<div class="doc_text">
+<p><tt>[GCNAME, ...string...]</tt></p>
+
+<p>The <tt>GCNAME</tt> record (code 11) contains a variable number of
+values representing the bytes of a single garbage collector name
+string. There should be one <tt>GCNAME</tt> record for each garbage
+collector name referenced in function <tt>gc</tt> attributes within
+the module. These records can be referenced by 1-based index in the <i>gc</i>
+fields of <tt>FUNCTION</tt> records.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="PARAMATTR_BLOCK">PARAMATTR_BLOCK Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>PARAMATTR_BLOCK</tt> block (id 9) contains a table of
+entries describing the attributes of function parameters. These
+entries are referenced by 1-based index in the <i>paramattr</i> field
+of module block <a name="MODULE_CODE_FUNCTION"><tt>FUNCTION</tt></a>
+records, or within the <i>attr</i> field of function block <a
+href="#FUNC_CODE_INST_INVOKE"><tt>INST_INVOKE</tt></a> and <a
+href="#FUNC_CODE_INST_CALL"><tt>INST_CALL</tt></a> records.</p>
+
+<p>Entries within <tt>PARAMATTR_BLOCK</tt> are constructed to ensure
+that each is unique (i.e., no two indicies represent equivalent
+attribute lists). </p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="PARAMATTR_CODE_ENTRY">PARAMATTR_CODE_ENTRY Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[ENTRY, paramidx0, attr0, paramidx1, attr1...]</tt></p>
+
+<p>The <tt>ENTRY</tt> record (code 1) contains an even number of
+values describing a unique set of function parameter attributes. Each
+<i>paramidx</i> value indicates which set of attributes is
+represented, with 0 representing the return value attributes,
+0xFFFFFFFF representing function attributes, and other values
+representing 1-based function parameters. Each <i>attr</i> value is a
+bitmap with the following interpretation:
+</p>
+
+<ul>
+<li>bit 0: <tt>zeroext</tt></li>
+<li>bit 1: <tt>signext</tt></li>
+<li>bit 2: <tt>noreturn</tt></li>
+<li>bit 3: <tt>inreg</tt></li>
+<li>bit 4: <tt>sret</tt></li>
+<li>bit 5: <tt>nounwind</tt></li>
+<li>bit 6: <tt>noalias</tt></li>
+<li>bit 7: <tt>byval</tt></li>
+<li>bit 8: <tt>nest</tt></li>
+<li>bit 9: <tt>readnone</tt></li>
+<li>bit 10: <tt>readonly</tt></li>
+<li>bit 11: <tt>noinline</tt></li>
+<li>bit 12: <tt>alwaysinline</tt></li>
+<li>bit 13: <tt>optsize</tt></li>
+<li>bit 14: <tt>ssp</tt></li>
+<li>bit 15: <tt>sspreq</tt></li>
+<li>bits 16&ndash;31: <tt>align <var>n</var></tt></li>
+<li>bit 32: <tt>nocapture</tt></li>
+<li>bit 33: <tt>noredzone</tt></li>
+<li>bit 34: <tt>noimplicitfloat</tt></li>
+<li>bit 35: <tt>naked</tt></li>
+<li>bit 36: <tt>inlinehint</tt></li>
+<li>bits 37&ndash;39: <tt>alignstack <var>n</var></tt>, represented as
+the logarithm base 2 of the requested alignment, plus 1</li>
+</ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="TYPE_BLOCK">TYPE_BLOCK Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>TYPE_BLOCK</tt> block (id 10) contains records which
+constitute a table of type operator entries used to represent types
+referenced within an LLVM module. Each record (with the exception of
+<a href="#TYPE_CODE_NUMENTRY"><tt>NUMENTRY</tt></a>) generates a
+single type table entry, which may be referenced by 0-based index from
+instructions, constants, metadata, type symbol table entries, or other
+type operator records.
+</p>
+
+<p>Entries within <tt>TYPE_BLOCK</tt> are constructed to ensure that
+each entry is unique (i.e., no two indicies represent structurally
+equivalent types). </p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_NUMENTRY">TYPE_CODE_NUMENTRY Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[NUMENTRY, numentries]</tt></p>
+
+<p>The <tt>NUMENTRY</tt> record (code 1) contains a single value which
+indicates the total number of type code entries in the type table of
+the module. If present, <tt>NUMENTRY</tt> should be the first record
+in the block.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_VOID">TYPE_CODE_VOID Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[VOID]</tt></p>
+
+<p>The <tt>VOID</tt> record (code 2) adds a <tt>void</tt> type to the
+type table.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_FLOAT">TYPE_CODE_FLOAT Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[FLOAT]</tt></p>
+
+<p>The <tt>FLOAT</tt> record (code 3) adds a <tt>float</tt> (32-bit
+floating point) type to the type table.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_DOUBLE">TYPE_CODE_DOUBLE Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[DOUBLE]</tt></p>
+
+<p>The <tt>DOUBLE</tt> record (code 4) adds a <tt>double</tt> (64-bit
+floating point) type to the type table.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_LABEL">TYPE_CODE_LABEL Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[LABEL]</tt></p>
+
+<p>The <tt>LABEL</tt> record (code 5) adds a <tt>label</tt> type to
+the type table.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_OPAQUE">TYPE_CODE_OPAQUE Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[OPAQUE]</tt></p>
+
+<p>The <tt>OPAQUE</tt> record (code 6) adds an <tt>opaque</tt> type to
+the type table. Note that distinct <tt>opaque</tt> types are not
+unified.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_INTEGER">TYPE_CODE_INTEGER  Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[INTEGER, width]</tt></p>
+
+<p>The <tt>INTEGER</tt> record (code 7) adds an integer type to the
+type table. The single <i>width</i> field indicates the width of the
+integer type.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_POINTER">TYPE_CODE_POINTER Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[POINTER, pointee type, address space]</tt></p>
+
+<p>The <tt>POINTER</tt> record (code 8) adds a pointer type to the
+type table. The operand fields are</p>
+
+<ul>
+<li><i>pointee type</i>: The type index of the pointed-to type</li>
+
+<li><i>address space</i>: If supplied, the target-specific numbered
+address space where the pointed-to object resides. Otherwise, the
+default address space is zero.
+</li>
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_FUNCTION">TYPE_CODE_FUNCTION Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[FUNCTION, vararg, ignored, retty, ...paramty... ]</tt></p>
+
+<p>The <tt>FUNCTION</tt> record (code 9) adds a function type to the
+type table. The operand fields are</p>
+
+<ul>
+<li><i>vararg</i>: Non-zero if the type represents a varargs function</li>
+
+<li><i>ignored</i>: This value field is present for backward
+compatibility only, and is ignored</li>
+
+<li><i>retty</i>: The type index of the function's return type</li>
+
+<li><i>paramty</i>: Zero or more type indices representing the
+parameter types of the function</li>
+</ul>
+	
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_STRUCT">TYPE_CODE_STRUCT Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[STRUCT, ispacked, ...eltty...]</tt></p>
+
+<p>The <tt>STRUCT </tt> record (code 10) adds a struct type to the
+type table. The operand fields are</p>
+
+<ul>
+<li><i>ispacked</i>: Non-zero if the type represents a packed structure</li>
+
+<li><i>eltty</i>: Zero or more type indices representing the element
+types of the structure</li>
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_ARRAY">TYPE_CODE_ARRAY Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[ARRAY, numelts, eltty]</tt></p>
+
+<p>The <tt>ARRAY</tt> record (code 11) adds an array type to the type
+table.  The operand fields are</p>
+
+<ul>
+<li><i>numelts</i>: The number of elements in arrays of this type</li>
+
+<li><i>eltty</i>: The type index of the array element type</li>
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_VECTOR">TYPE_CODE_VECTOR Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[VECTOR, numelts, eltty]</tt></p>
+
+<p>The <tt>VECTOR</tt> record (code 12) adds a vector type to the type
+table.  The operand fields are</p>
+
+<ul>
+<li><i>numelts</i>: The number of elements in vectors of this type</li>
+
+<li><i>eltty</i>: The type index of the vector element type</li>
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_X86_FP80">TYPE_CODE_X86_FP80 Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[X86_FP80]</tt></p>
+
+<p>The <tt>X86_FP80</tt> record (code 13) adds an <tt>x86_fp80</tt> (80-bit
+floating point) type to the type table.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_FP128">TYPE_CODE_FP128 Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[FP128]</tt></p>
+
+<p>The <tt>FP128</tt> record (code 14) adds an <tt>fp128</tt> (128-bit
+floating point) type to the type table.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_PPC_FP128">TYPE_CODE_PPC_FP128 Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[PPC_FP128]</tt></p>
+
+<p>The <tt>PPC_FP128</tt> record (code 15) adds a <tt>ppc_fp128</tt>
+(128-bit floating point) type to the type table.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TYPE_CODE_METADATA">TYPE_CODE_METADATA Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[METADATA]</tt></p>
+
+<p>The <tt>METADATA</tt> record (code 16) adds a <tt>metadata</tt>
+type to the type table.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="CONSTANTS_BLOCK">CONSTANTS_BLOCK Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>CONSTANTS_BLOCK</tt> block (id 11) ...
+</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="FUNCTION_BLOCK">FUNCTION_BLOCK Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>FUNCTION_BLOCK</tt> block (id 12) ...
+</p>
+
+<p>In addition to the record types described below, a
+<tt>FUNCTION_BLOCK</tt> block may contain the following sub-blocks:
+</p>
+
+<ul>
+<li><a href="#CONSTANTS_BLOCK"><tt>CONSTANTS_BLOCK</tt></a></li>
+<li><a href="#VALUE_SYMTAB_BLOCK"><tt>VALUE_SYMTAB_BLOCK</tt></a></li>
+<li><a href="#METADATA_ATTACHMENT"><tt>METADATA_ATTACHMENT</tt></a></li>
+</ul>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="TYPE_SYMTAB_BLOCK">TYPE_SYMTAB_BLOCK Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>TYPE_SYMTAB_BLOCK</tt> block (id 13) contains entries which
+map between module-level named types and their corresponding type
+indices.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="TST_CODE_ENTRY">TST_CODE_ENTRY Record</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>[ENTRY, typeid, ...string...]</tt></p>
+
+<p>The <tt>ENTRY</tt> record (code 1) contains a variable number of
+values, with the first giving the type index of the designated type,
+and the remaining values giving the character codes of the type
+name. Each entry corresponds to a single named type.
+</p>
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="VALUE_SYMTAB_BLOCK">VALUE_SYMTAB_BLOCK Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>VALUE_SYMTAB_BLOCK</tt> block (id 14) ... 
+</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="METADATA_BLOCK">METADATA_BLOCK Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>METADATA_BLOCK</tt> block (id 15) ...
+</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="METADATA_ATTACHMENT">METADATA_ATTACHMENT Contents</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>METADATA_ATTACHMENT</tt> block (id 16) ...
+</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<hr>
+<address> <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+<a href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+ <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/Bugpoint.html b/final/docs/Bugpoint.html
new file mode 100644
index 00000000000..bf75b5ba44c
--- /dev/null
+++ b/final/docs/Bugpoint.html
@@ -0,0 +1,250 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>LLVM bugpoint tool: design and usage</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+
+<div class="doc_title">
+  LLVM bugpoint tool: design and usage
+</div>
+
+<ul>
+  <li><a href="#desc">Description</a></li>
+  <li><a href="#design">Design Philosophy</a>
+  <ul>
+    <li><a href="#autoselect">Automatic Debugger Selection</a></li>
+    <li><a href="#crashdebug">Crash debugger</a></li>
+    <li><a href="#codegendebug">Code generator debugger</a></li>
+    <li><a href="#miscompilationdebug">Miscompilation debugger</a></li>
+  </ul></li>
+  <li><a href="#advice">Advice for using <tt>bugpoint</tt></a></li>
+</ul>
+
+<div class="doc_author">
+<p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+<a name="desc">Description</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p><tt>bugpoint</tt> narrows down the source of problems in LLVM tools and
+passes.  It can be used to debug three types of failures: optimizer crashes,
+miscompilations by optimizers, or bad native code generation (including problems
+in the static and JIT compilers).  It aims to reduce large test cases to small,
+useful ones.  For example, if <tt>opt</tt> crashes while optimizing a
+file, it will identify the optimization (or combination of optimizations) that
+causes the crash, and reduce the file down to a small example which triggers the
+crash.</p>
+
+<p>For detailed case scenarios, such as debugging <tt>opt</tt>,
+<tt>llvm-ld</tt>, or one of the LLVM code generators, see <a
+href="HowToSubmitABug.html">How To Submit a Bug Report document</a>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+<a name="design">Design Philosophy</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p><tt>bugpoint</tt> is designed to be a useful tool without requiring any
+hooks into the LLVM infrastructure at all.  It works with any and all LLVM
+passes and code generators, and does not need to "know" how they work.  Because
+of this, it may appear to do stupid things or miss obvious
+simplifications.  <tt>bugpoint</tt> is also designed to trade off programmer
+time for computer time in the compiler-debugging process; consequently, it may
+take a long period of (unattended) time to reduce a test case, but we feel it
+is still worth it. Note that <tt>bugpoint</tt> is generally very quick unless
+debugging a miscompilation where each test of the program (which requires 
+executing it) takes a long time.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="autoselect">Automatic Debugger Selection</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>bugpoint</tt> reads each <tt>.bc</tt> or <tt>.ll</tt> file specified on
+the command line and links them together into a single module, called the test
+program.  If any LLVM passes are specified on the command line, it runs these
+passes on the test program.  If any of the passes crash, or if they produce
+malformed output (which causes the verifier to abort), <tt>bugpoint</tt> starts
+the <a href="#crashdebug">crash debugger</a>.</p>
+
+<p>Otherwise, if the <tt>-output</tt> option was not specified,
+<tt>bugpoint</tt> runs the test program with the C backend (which is assumed to
+generate good code) to generate a reference output.  Once <tt>bugpoint</tt> has
+a reference output for the test program, it tries executing it with the
+selected code generator.  If the selected code generator crashes,
+<tt>bugpoint</tt> starts the <a href="#crashdebug">crash debugger</a> on the
+code generator.  Otherwise, if the resulting output differs from the reference
+output, it assumes the difference resulted from a code generator failure, and
+starts the <a href="#codegendebug">code generator debugger</a>.</p>
+
+<p>Finally, if the output of the selected code generator matches the reference
+output, <tt>bugpoint</tt> runs the test program after all of the LLVM passes
+have been applied to it.  If its output differs from the reference output, it
+assumes the difference resulted from a failure in one of the LLVM passes, and
+enters the <a href="#miscompilationdebug">miscompilation debugger</a>.
+Otherwise, there is no problem <tt>bugpoint</tt> can debug.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="crashdebug">Crash debugger</a>
+</div>
+
+<div class="doc_text">
+
+<p>If an optimizer or code generator crashes, <tt>bugpoint</tt> will try as hard
+as it can to reduce the list of passes (for optimizer crashes) and the size of
+the test program.  First, <tt>bugpoint</tt> figures out which combination of
+optimizer passes triggers the bug. This is useful when debugging a problem
+exposed by <tt>opt</tt>, for example, because it runs over 38 passes.</p>
+
+<p>Next, <tt>bugpoint</tt> tries removing functions from the test program, to
+reduce its size.  Usually it is able to reduce a test program to a single
+function, when debugging intraprocedural optimizations.  Once the number of
+functions has been reduced, it attempts to delete various edges in the control
+flow graph, to reduce the size of the function as much as possible.  Finally,
+<tt>bugpoint</tt> deletes any individual LLVM instructions whose absence does
+not eliminate the failure.  At the end, <tt>bugpoint</tt> should tell you what
+passes crash, give you a bitcode file, and give you instructions on how to
+reproduce the failure with <tt>opt</tt> or <tt>llc</tt>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="codegendebug">Code generator debugger</a>
+</div>
+
+<div class="doc_text">
+
+<p>The code generator debugger attempts to narrow down the amount of code that
+is being miscompiled by the selected code generator.  To do this, it takes the
+test program and partitions it into two pieces: one piece which it compiles
+with the C backend (into a shared object), and one piece which it runs with
+either the JIT or the static LLC compiler.  It uses several techniques to
+reduce the amount of code pushed through the LLVM code generator, to reduce the
+potential scope of the problem.  After it is finished, it emits two bitcode
+files (called "test" [to be compiled with the code generator] and "safe" [to be
+compiled with the C backend], respectively), and instructions for reproducing
+the problem.  The code generator debugger assumes that the C backend produces
+good code.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="miscompilationdebug">Miscompilation debugger</a>
+</div>
+
+<div class="doc_text">
+
+<p>The miscompilation debugger works similarly to the code generator debugger.
+It works by splitting the test program into two pieces, running the
+optimizations specified on one piece, linking the two pieces back together, and
+then executing the result.  It attempts to narrow down the list of passes to
+the one (or few) which are causing the miscompilation, then reduce the portion
+of the test program which is being miscompiled.  The miscompilation debugger
+assumes that the selected code generator is working properly.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="advice">Advice for using bugpoint</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<tt>bugpoint</tt> can be a remarkably useful tool, but it sometimes works in
+non-obvious ways.  Here are some hints and tips:<p>
+
+<ol>
+<li>In the code generator and miscompilation debuggers, <tt>bugpoint</tt> only
+    works with programs that have deterministic output.  Thus, if the program
+    outputs <tt>argv[0]</tt>, the date, time, or any other "random" data,
+    <tt>bugpoint</tt> may misinterpret differences in these data, when output,
+    as the result of a miscompilation.  Programs should be temporarily modified
+    to disable outputs that are likely to vary from run to run.
+
+<li>In the code generator and miscompilation debuggers, debugging will go
+    faster if you manually modify the program or its inputs to reduce the
+    runtime, but still exhibit the problem.
+
+<li><tt>bugpoint</tt> is extremely useful when working on a new optimization:
+    it helps track down regressions quickly.  To avoid having to relink
+    <tt>bugpoint</tt> every time you change your optimization however, have
+    <tt>bugpoint</tt> dynamically load your optimization with the
+    <tt>-load</tt> option.
+
+<li><p><tt>bugpoint</tt> can generate a lot of output and run for a long period
+    of time.  It is often useful to capture the output of the program to file.
+    For example, in the C shell, you can run:</p>
+
+<div class="doc_code">
+<p><tt>bugpoint  ... |&amp; tee bugpoint.log</tt></p>
+</div>
+
+    <p>to get a copy of <tt>bugpoint</tt>'s output in the file
+    <tt>bugpoint.log</tt>, as well as on your terminal.</p>
+
+<li><tt>bugpoint</tt> cannot debug problems with the LLVM linker. If
+    <tt>bugpoint</tt> crashes before you see its "All input ok" message,
+    you might try <tt>llvm-link -v</tt> on the same set of input files. If
+    that also crashes, you may be experiencing a linker bug.
+
+<li><tt>bugpoint</tt> is useful for proactively finding bugs in LLVM. 
+    Invoking <tt>bugpoint</tt> with the <tt>-find-bugs</tt> option will cause
+    the list of specified optimizations to be randomized and applied to the 
+    program. This process will repeat until a bug is found or the user
+    kills <tt>bugpoint</tt>.
+
+<li><p><tt>bugpoint</tt> does not understand the <tt>-O</tt> option
+    that is used to specify optimization level to <tt>opt</tt>. You
+    can use e.g.</p>
+
+<div class="doc_code">
+<p><tt>opt -O2 -debug-pass=Arguments foo.bc -disable-output</tt></p>
+</div>
+
+    <p>to get a list of passes that are used with <tt>-O2</tt> and
+    then pass this list to <tt>bugpoint</tt>.</p>
+    
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/CFEBuildInstrs.html b/final/docs/CFEBuildInstrs.html
new file mode 100644
index 00000000000..ed2f295b7af
--- /dev/null
+++ b/final/docs/CFEBuildInstrs.html
@@ -0,0 +1,29 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <link rel="stylesheet" href="llvm.css" type="text/css" media="screen">
+  <title>Building the LLVM C/C++ Front-End</title>
+  <meta HTTP-EQUIV="REFRESH" CONTENT="3; URL=GCCFEBuildInstrs.html">
+</head>
+<body>
+<div class="doc_title">
+This page has moved <a href="GCCFEBuildInstrs.html">here</A>.
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2008-02-13 17:46:10 +0100 (Wed, 13 Feb 2008) $
+</address>
+
+</body>
+</html>
diff --git a/final/docs/CMake.html b/final/docs/CMake.html
new file mode 100644
index 00000000000..c88b1249021
--- /dev/null
+++ b/final/docs/CMake.html
@@ -0,0 +1,486 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>Building LLVM with CMake</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+
+<div class="doc_title">
+  Building LLVM with CMake
+</div>
+
+<ul>
+  <li><a href="#intro">Introduction</a></li>
+  <li><a href="#quickstart">Quick start</a></li>
+  <li><a href="#usage">Basic CMake usage</a>
+  <li><a href="#options">Options and variables</a>
+    <ul>
+    <li><a href="#freccmake">Frequently-used CMake variables</a></li>
+    <li><a href="#llvmvars">LLVM-specific variables</a></li>
+  </ul></li>
+  <li><a href="#testing">Executing the test suite</a>
+  <li><a href="#cross">Cross compiling</a>
+  <li><a href="#embedding">Embedding LLVM in your project</a>
+  <li><a href="#specifics">Compiler/Platform specific topics</a>
+    <ul>
+    <li><a href="#msvc">Microsoft Visual C++</a></li>
+  </ul></li>
+</ul>
+
+<div class="doc_author">
+<p>Written by <a href="mailto:ofv@wanadoo.es">Oscar Fuentes</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+<a name="intro">Introduction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+  <p><a href="http://www.cmake.org/">CMake</a> is a cross-platform
+    build-generator tool. CMake does not build the project, it generates
+    the files needed by your build tool (GNU make, Visual Studio, etc) for
+    building LLVM.</p>
+
+  <p>If you are really anxious about getting a functional LLVM build,
+    go to the <a href="#quickstart">Quick start</a> section. If you
+    are a CMake novice, start on <a href="#usage">Basic CMake
+      usage</a> and then go back to the <a href="#quickstart">Quick
+      start</a> once you know what you are
+    doing. The <a href="#options">Options and variables</a> section
+    is a reference for customizing your build. If you already have
+    experience with CMake, this is the recommended starting point.
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+<a name="quickstart">Quick start</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p> We use here the command-line, non-interactive CMake interface </p>
+
+<ol>
+
+  <li><p><a href="http://www.cmake.org/cmake/resources/software.html">Download</a>
+      and install CMake. Version 2.8 is the minimum required.</p>
+
+  <li><p>Open a shell. Your development tools must be reachable from this
+      shell through the PATH environment variable.</p>
+
+  <li><p>Create a directory for containing the build. It is not
+      supported to build LLVM on the source directory. cd to this
+      directory:</p>
+    <div class="doc_code">
+      <p><tt>mkdir mybuilddir</tt></p>
+      <p><tt>cd mybuilddir</tt></p>
+    </div>
+
+  <li><p>Execute this command on the shell
+      replacing <i>path/to/llvm/source/root</i> with the path to the
+      root of your LLVM source tree:</p>
+    <div class="doc_code">
+      <p><tt>cmake path/to/llvm/source/root</tt></p>
+    </div>
+
+    <p>CMake will detect your development environment, perform a
+      series of test and generate the files required for building
+      LLVM. CMake will use default values for all build
+      parameters. See the <a href="#options">Options and variables</a>
+      section for fine-tuning your build</p>
+
+    <p>This can fail if CMake can't detect your toolset, or if it
+      thinks that the environment is not sane enough. On this case
+      make sure that the toolset that you intend to use is the only
+      one reachable from the shell and that the shell itself is the
+      correct one for you development environment. CMake will refuse
+      to build MinGW makefiles if you have a POSIX shell reachable
+      through the PATH environment variable, for instance. You can
+      force CMake to use a given build tool, see
+      the <a href="#usage">Usage</a> section.</p>
+
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="usage">Basic CMake usage</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+  <p>This section explains basic aspects of CMake, mostly for
+    explaining those options which you may need on your day-to-day
+    usage.</p>
+
+  <p>CMake comes with extensive documentation in the form of html
+    files and on the cmake executable itself. Execute <i>cmake
+    --help</i> for further help options.</p>
+
+  <p>CMake requires to know for which build tool it shall generate
+    files (GNU make, Visual Studio, Xcode, etc). If not specified on
+    the command line, it tries to guess it based on you
+    environment. Once identified the build tool, CMake uses the
+    corresponding <i>Generator</i> for creating files for your build
+    tool. You can explicitly specify the generator with the command
+    line option <i>-G "Name of the generator"</i>. For knowing the
+    available generators on your platform, execute</p>
+
+    <div class="doc_code">
+      <p><tt>cmake --help</tt></p>
+    </div>
+
+    <p>This will list the generator's names at the end of the help
+      text. Generator's names are case-sensitive. Example:</p>
+
+    <div class="doc_code">
+      <p><tt>cmake -G "Visual Studio 8 2005" path/to/llvm/source/root</tt></p>
+    </div>
+
+    <p>For a given development platform there can be more than one
+      adequate generator. If you use Visual Studio "NMake Makefiles"
+      is a generator you can use for building with NMake. By default,
+      CMake chooses the more specific generator supported by your
+      development environment. If you want an alternative generator,
+      you must tell this to CMake with the <i>-G</i> option.</p>
+
+    <p>TODO: explain variables and cache. Move explanation here from
+      #options section.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="options">Options and variables</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+  <p>Variables customize how the build will be generated. Options are
+    boolean variables, with possible values ON/OFF. Options and
+    variables are defined on the CMake command line like this:</p>
+
+  <div class="doc_code">
+    <p><tt>cmake -DVARIABLE=value path/to/llvm/source</tt></p>
+  </div>
+
+  <p>You can set a variable after the initial CMake invocation for
+    changing its value. You can also undefine a variable:</p>
+
+  <div class="doc_code">
+    <p><tt>cmake -UVARIABLE path/to/llvm/source</tt></p>
+  </div>
+
+  <p>Variables are stored on the CMake cache. This is a file
+    named <tt>CMakeCache.txt</tt> on the root of the build
+    directory. Do not hand-edit it.</p>
+
+  <p>Variables are listed here appending its type after a colon. It is
+    correct to write the variable and the type on the CMake command
+    line:</p>
+
+  <div class="doc_code">
+    <p><tt>cmake -DVARIABLE:TYPE=value path/to/llvm/source</tt></p>
+  </div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="freccmake">Frequently-used CMake variables</a>
+</div>
+
+<div class="doc_text">
+
+<p>Here are listed some of the CMake variables that are used often,
+  along with a brief explanation and LLVM-specific notes. For full
+  documentation, check the CMake docs or execute <i>cmake
+  --help-variable VARIABLE_NAME</i>.</p>
+
+<dl>
+  <dt><b>CMAKE_BUILD_TYPE</b>:STRING</dt>
+
+  <dd>Sets the build type for <i>make</i> based generators. Possible
+    values are Release, Debug, RelWithDebInfo and MinSizeRel. On
+    systems like Visual Studio the user sets the build type with the IDE
+    settings.</dd>
+
+  <dt><b>CMAKE_INSTALL_PREFIX</b>:PATH</dt>
+  <dd>Path where LLVM will be installed if "make install" is invoked
+    or the "INSTALL" target is built.</dd>
+
+  <dt><b>LLVM_LIBDIR_SUFFIX</b>:STRING</dt>
+  <dd>Extra suffix to append to the directory where libraries are to
+    be installed. On a 64-bit architecture, one could use
+    -DLLVM_LIBDIR_SUFFIX=64 to install libraries to /usr/lib64.</dd>
+
+  <dt><b>CMAKE_C_FLAGS</b>:STRING</dt>
+  <dd>Extra flags to use when compiling C source files.</dd>
+
+  <dt><b>CMAKE_CXX_FLAGS</b>:STRING</dt>
+  <dd>Extra flags to use when compiling C++ source files.</dd>
+
+  <dt><b>BUILD_SHARED_LIBS</b>:BOOL</dt>
+  <dd>Flag indicating is shared libraries will be built. Its default
+    value is OFF. Shared libraries are not supported on Windows and
+    not recommended in the other OSes.</dd>
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="llvmvars">LLVM-specific variables</a>
+</div>
+
+<div class="doc_text">
+
+<dl>
+  <dt><b>LLVM_TARGETS_TO_BUILD</b>:STRING</dt>
+  <dd>Semicolon-separated list of targets to build, or <i>all</i> for
+    building all targets. Case-sensitive. For Visual C++ defaults
+    to <i>X86</i>. On the other cases defaults to <i>all</i>. Example:
+    <i>-DLLVM_TARGETS_TO_BUILD="X86;PowerPC;Alpha"</i>.</dd>
+
+  <dt><b>LLVM_BUILD_TOOLS</b>:BOOL</dt>
+  <dd>Build LLVM tools. Defaults to ON. Targets for building each tool
+    are generated in any case. You can build an tool separately by
+    invoking its target. For example, you can build <i>llvm-as</i>
+    with a makefile-based system executing <i>make llvm-as</i> on the
+    root of your build directory.</dd>
+
+  <dt><b>LLVM_INCLUDE_TOOLS</b>:BOOL</dt>
+  <dd>Generate build targets for the LLVM tools. Defaults to
+    ON. You can use that option for disabling the generation of build
+    targets for the LLVM tools.</dd>
+
+  <dt><b>LLVM_BUILD_EXAMPLES</b>:BOOL</dt>
+  <dd>Build LLVM examples. Defaults to OFF. Targets for building each
+    example are generated in any case. See documentation
+    for <i>LLVM_BUILD_TOOLS</i> above for more details.</dd>
+
+  <dt><b>LLVM_INCLUDE_EXAMPLES</b>:BOOL</dt>
+  <dd>Generate build targets for the LLVM examples. Defaults to
+    ON. You can use that option for disabling the generation of build
+    targets for the LLVM examples.</dd>
+
+  <dt><b>LLVM_BUILD_TESTS</b>:BOOL</dt>
+  <dd>Build LLVM unit tests. Defaults to OFF. Targets for building
+    each unit test are generated in any case. You can build a specific
+    unit test with the target <i>UnitTestNameTests</i> (where at this
+    time <i>UnitTestName</i> can be ADT, Analysis, ExecutionEngine,
+    JIT, Support, Transform, VMCore; see the subdirectories
+    of <i>unittests</i> for an updated list.) It is possible to build
+    all unit tests with the target <i>UnitTests</i>.</dd>
+
+  <dt><b>LLVM_INCLUDE_TESTS</b>:BOOL</dt>
+  <dd>Generate build targets for the LLVM unit tests. Defaults to
+    ON. You can use that option for disabling the generation of build
+    targets for the LLVM unit tests.</dd>
+
+  <dt><b>LLVM_APPEND_VC_REV</b>:BOOL</dt>
+  <dd>Append version control revision info (svn revision number or git
+    revision id) to LLVM version string (stored in the PACKAGE_VERSION
+    macro). For this to work cmake must be invoked before the
+    build. Defaults to OFF.</dd>
+
+  <dt><b>LLVM_ENABLE_THREADS</b>:BOOL</dt>
+  <dd>Build with threads support, if available. Defaults to ON.</dd>
+
+  <dt><b>LLVM_ENABLE_ASSERTIONS</b>:BOOL</dt>
+  <dd>Enables code assertions. Defaults to OFF if and only if
+    CMAKE_BUILD_TYPE is <i>Release</i>.</dd>
+
+  <dt><b>LLVM_ENABLE_PIC</b>:BOOL</dt>
+  <dd>Add the <i>-fPIC</i> flag for the compiler command-line, if the
+    compiler supports this flag. Some systems, like Windows, do not
+    need this flag. Defaults to ON.</dd>
+
+  <dt><b>LLVM_ENABLE_WARNINGS</b>:BOOL</dt>
+  <dd>Enable all compiler warnings. Defaults to ON.</dd>
+
+  <dt><b>LLVM_ENABLE_PEDANTIC</b>:BOOL</dt>
+  <dd>Enable pedantic mode. This disable compiler specific extensions, is
+    possible. Defaults to ON.</dd>
+
+  <dt><b>LLVM_ENABLE_WERROR</b>:BOOL</dt>
+  <dd>Stop and fail build, if a compiler warning is
+    triggered. Defaults to OFF.</dd>
+
+  <dt><b>LLVM_BUILD_32_BITS</b>:BOOL</dt>
+  <dd>Build 32-bits executables and libraries on 64-bits systems. This
+    option is available only on some 64-bits unix systems. Defaults to
+    OFF.</dd>
+
+  <dt><b>LLVM_TARGET_ARCH</b>:STRING</dt>
+  <dd>LLVM target to use for native code generation. This is required
+    for JIT generation. It defaults to "host", meaning that it shall
+    pick the architecture of the machine where LLVM is being built. If
+    you are cross-compiling, set it to the target architecture
+    name.</dd>
+
+  <dt><b>LLVM_TABLEGEN</b>:STRING</dt>
+  <dd>Full path to a native TableGen executable (usually
+    named <i>tblgen</i>). This is intented for cross-compiling: if the
+    user sets this variable, no native TableGen will be created.</dd>
+
+  <dt><b>LLVM_LIT_ARGS</b>:STRING</dt>
+  <dd>Arguments given to lit.
+    <tt>make check</tt> and <tt>make clang-test</tt> are affected.
+    By default, <tt>&quot;-sv --no-progress-bar&quot;</tt>
+    on Visual C++ and Xcode,
+    <tt>&quot;-sv&quot;</tt> on others.</dd>
+
+  <dt><b>LLVM_LIT_TOOLS_DIR</b>:STRING</dt>
+  <dd>The path to GnuWin32 tools for tests. Valid on Windows host.
+    Defaults to "", then Lit seeks tools according to %PATH%.
+    Lit can find tools(eg. grep, sort, &c) on LLVM_LIT_TOOLS_DIR at first,
+    without specifying GnuWin32 to %PATH%.</dd>
+
+  <dt><b>LLVM_ENABLE_FFI</b>:BOOL</dt>
+  <dd>Indicates whether LLVM Interpreter will be linked with Foreign
+    Function Interface library. If the library or its headers are
+    installed on a custom location, you can set the variables
+    FFI_INCLUDE_DIR and FFI_LIBRARY_DIR. Defaults to OFF.</dd>
+</dl>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="testing">Executing the test suite</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Testing is performed when the <i>check</i> target is built. For
+  instance, if you are using makefiles, execute this command while on
+  the top level of your build directory:</p>
+
+<div class="doc_code">
+  <p><tt>make check</tt></p>
+</div>
+
+<p>On Visual Studio, you may run tests to build the project "check".</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="cross">Cross compiling</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>See <a href="http://www.vtk.org/Wiki/CMake_Cross_Compiling">this
+    wiki page</a> for generic instructions on how to cross-compile
+    with CMake. It goes into detailed explanations and may seem
+    daunting, but it is not. On the wiki page there are several
+    examples including toolchain files. Go directly to
+    <a href="http://www.vtk.org/Wiki/CMake_Cross_Compiling#Information_how_to_set_up_various_cross_compiling_toolchains">this
+    section</a> for a quick solution.</p>
+
+<p>Also see the <a href="#llvmvars">LLVM-specific variables</a>
+  section for variables used when cross-compiling.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="embedding">Embedding LLVM in your project</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+  <p>The most difficult part of adding LLVM to the build of a project
+    is to determine the set of LLVM libraries corresponding to the set
+    of required LLVM features. What follows is an example of how to
+    obtain this information:</p>
+
+  <div class="doc_code">
+    <pre>
+    <b># A convenience variable:</b>
+    set(LLVM_ROOT "" CACHE PATH "Root of LLVM install.")
+    <b># A bit of a sanity check:</b>
+    if( NOT EXISTS ${LLVM_ROOT}/include/llvm )
+    message(FATAL_ERROR "LLVM_ROOT (${LLVM_ROOT}) is not a valid LLVM install")
+    endif()
+    <b># We incorporate the CMake features provided by LLVM:</b>
+    set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${LLVM_ROOT}/share/llvm/cmake")
+    include(LLVM)
+    <b># Now set the header and library paths:</b>
+    include_directories( ${LLVM_ROOT}/include )
+    link_directories( ${LLVM_ROOT}/lib )
+    <b># Let's suppose we want to build a JIT compiler with support for
+    # binary code (no interpreter):</b>
+    llvm_map_components_to_libraries(REQ_LLVM_LIBRARIES jit native)
+    <b># Finally, we link the LLVM libraries to our executable:</b>
+    target_link_libraries(mycompiler ${REQ_LLVM_LIBRARIES})
+    </pre>
+  </div>
+
+  <p>This assumes that LLVM_ROOT points to an install of LLVM. The
+    procedure works too for uninstalled builds although we need to take
+    care to add an <i>include_directories</i> for the location of the
+    headers on the LLVM source directory (if we are building
+    out-of-source.)</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="specifics">Compiler/Platform specific topics</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Notes for specific compilers and/or platforms.</p>
+
+</div>
+
+<div class="doc_subsection">
+  <a name="msvc">Microsoft Visual C++</a>
+</div>
+
+<div class="doc_text">
+
+<dl>
+  <dt><b>LLVM_COMPILER_JOBS</b>:STRING</dt>
+  <dd>Specifies the maximum number of parallell compiler jobs to use
+    per project when building with msbuild or Visual Studio. Only supported for
+    Visual Studio 2008 and Visual Studio 2010 CMake generators. 0 means use all
+    processors. Default is 0.</dd>
+</dl>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:ofv@wanadoo.es">Oscar Fuentes</a><br>
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2010-08-09 03:59:36 +0100 (Mon, 9 Aug 2010) $
+</address>
+
+</body>
+</html>
diff --git a/final/docs/CodeGenerator.html b/final/docs/CodeGenerator.html
new file mode 100644
index 00000000000..a84534644cf
--- /dev/null
+++ b/final/docs/CodeGenerator.html
@@ -0,0 +1,2817 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>The LLVM Target-Independent Code Generator</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+
+  <style type="text/css">
+    .unknown { background-color: #C0C0C0; text-align: center; }
+    .unknown:before { content: "?" }
+    .no { background-color: #C11B17 }
+    .no:before { content: "N" }
+    .partial { background-color: #F88017 }
+    .yes { background-color: #0F0; }
+    .yes:before { content: "Y" }
+  </style>
+
+</head>
+<body>
+
+<div class="doc_title">
+  The LLVM Target-Independent Code Generator
+</div>
+
+<ol>
+  <li><a href="#introduction">Introduction</a>
+    <ul>
+      <li><a href="#required">Required components in the code generator</a></li>
+      <li><a href="#high-level-design">The high-level design of the code
+          generator</a></li>
+      <li><a href="#tablegen">Using TableGen for target description</a></li>
+    </ul>
+  </li>
+  <li><a href="#targetdesc">Target description classes</a>
+    <ul>
+      <li><a href="#targetmachine">The <tt>TargetMachine</tt> class</a></li>
+      <li><a href="#targetdata">The <tt>TargetData</tt> class</a></li>
+      <li><a href="#targetlowering">The <tt>TargetLowering</tt> class</a></li>
+      <li><a href="#targetregisterinfo">The <tt>TargetRegisterInfo</tt> class</a></li>
+      <li><a href="#targetinstrinfo">The <tt>TargetInstrInfo</tt> class</a></li>
+      <li><a href="#targetframeinfo">The <tt>TargetFrameInfo</tt> class</a></li>
+      <li><a href="#targetsubtarget">The <tt>TargetSubtarget</tt> class</a></li>
+      <li><a href="#targetjitinfo">The <tt>TargetJITInfo</tt> class</a></li>
+    </ul>
+  </li>
+  <li><a href="#codegendesc">The "Machine" Code Generator classes</a>
+    <ul>
+    <li><a href="#machineinstr">The <tt>MachineInstr</tt> class</a></li>
+    <li><a href="#machinebasicblock">The <tt>MachineBasicBlock</tt>
+                                     class</a></li>
+    <li><a href="#machinefunction">The <tt>MachineFunction</tt> class</a></li>
+    </ul>
+  </li>
+  <li><a href="#mc">The "MC" Layer</a>
+    <ul>
+    <li><a href="#mcstreamer">The <tt>MCStreamer</tt> API</a></li>
+    <li><a href="#mccontext">The <tt>MCContext</tt> class</a>
+    <li><a href="#mcsymbol">The <tt>MCSymbol</tt> class</a></li>
+    <li><a href="#mcsection">The <tt>MCSection</tt> class</a></li>
+    <li><a href="#mcinst">The <tt>MCInst</tt> class</a></li>
+    </ul>
+  </li>
+  <li><a href="#codegenalgs">Target-independent code generation algorithms</a>
+    <ul>
+    <li><a href="#instselect">Instruction Selection</a>
+      <ul>
+      <li><a href="#selectiondag_intro">Introduction to SelectionDAGs</a></li>
+      <li><a href="#selectiondag_process">SelectionDAG Code Generation
+                                          Process</a></li>
+      <li><a href="#selectiondag_build">Initial SelectionDAG
+                                        Construction</a></li>
+      <li><a href="#selectiondag_legalize_types">SelectionDAG LegalizeTypes Phase</a></li>
+      <li><a href="#selectiondag_legalize">SelectionDAG Legalize Phase</a></li>
+      <li><a href="#selectiondag_optimize">SelectionDAG Optimization
+                                           Phase: the DAG Combiner</a></li>
+      <li><a href="#selectiondag_select">SelectionDAG Select Phase</a></li>
+      <li><a href="#selectiondag_sched">SelectionDAG Scheduling and Formation
+                                        Phase</a></li>
+      <li><a href="#selectiondag_future">Future directions for the
+                                         SelectionDAG</a></li>
+      </ul></li>
+     <li><a href="#liveintervals">Live Intervals</a>
+       <ul>
+       <li><a href="#livevariable_analysis">Live Variable Analysis</a></li>
+       <li><a href="#liveintervals_analysis">Live Intervals Analysis</a></li>
+       </ul></li>
+    <li><a href="#regalloc">Register Allocation</a>
+      <ul>
+      <li><a href="#regAlloc_represent">How registers are represented in
+                                        LLVM</a></li>
+      <li><a href="#regAlloc_howTo">Mapping virtual registers to physical
+                                    registers</a></li>
+      <li><a href="#regAlloc_twoAddr">Handling two address instructions</a></li>
+      <li><a href="#regAlloc_ssaDecon">The SSA deconstruction phase</a></li>
+      <li><a href="#regAlloc_fold">Instruction folding</a></li>
+      <li><a href="#regAlloc_builtIn">Built in register allocators</a></li>
+      </ul></li>
+    <li><a href="#codeemit">Code Emission</a></li>
+    </ul>
+  </li>
+  <li><a href="#nativeassembler">Implementing a Native Assembler</a></li>
+  
+  <li><a href="#targetimpls">Target-specific Implementation Notes</a>
+    <ul>
+    <li><a href="#targetfeatures">Target Feature Matrix</a></li>
+    <li><a href="#tailcallopt">Tail call optimization</a></li>
+    <li><a href="#sibcallopt">Sibling call optimization</a></li>
+    <li><a href="#x86">The X86 backend</a></li>
+    <li><a href="#ppc">The PowerPC backend</a>
+      <ul>
+      <li><a href="#ppc_abi">LLVM PowerPC ABI</a></li>
+      <li><a href="#ppc_frame">Frame Layout</a></li>
+      <li><a href="#ppc_prolog">Prolog/Epilog</a></li>
+      <li><a href="#ppc_dynamic">Dynamic Allocation</a></li>
+      </ul></li>
+    </ul></li>
+
+</ol>
+
+<div class="doc_author">
+  <p>Written by the LLVM Team.</p>
+</div>
+
+<div class="doc_warning">
+  <p>Warning: This is a work in progress.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="introduction">Introduction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The LLVM target-independent code generator is a framework that provides a
+   suite of reusable components for translating the LLVM internal representation
+   to the machine code for a specified target&mdash;either in assembly form
+   (suitable for a static compiler) or in binary machine code format (usable for
+   a JIT compiler). The LLVM target-independent code generator consists of six
+   main components:</p>
+
+<ol>
+  <li><a href="#targetdesc">Abstract target description</a> interfaces which
+      capture important properties about various aspects of the machine,
+      independently of how they will be used.  These interfaces are defined in
+      <tt>include/llvm/Target/</tt>.</li>
+
+  <li>Classes used to represent the <a href="#codegendesc">code being
+      generated</a> for a target.  These classes are intended to be abstract
+      enough to represent the machine code for <i>any</i> target machine.  These
+      classes are defined in <tt>include/llvm/CodeGen/</tt>. At this level,
+      concepts like "constant pool entries" and "jump tables" are explicitly
+      exposed.</li>
+
+  <li>Classes and algorithms used to represent code as the object file level,
+      the <a href="#mc">MC Layer</a>.  These classes represent assembly level
+      constructs like labels, sections, and instructions.  At this level,
+      concepts like "constant pool entries" and "jump tables" don't exist.</li>
+
+  <li><a href="#codegenalgs">Target-independent algorithms</a> used to implement
+      various phases of native code generation (register allocation, scheduling,
+      stack frame representation, etc).  This code lives
+      in <tt>lib/CodeGen/</tt>.</li>
+
+  <li><a href="#targetimpls">Implementations of the abstract target description
+      interfaces</a> for particular targets.  These machine descriptions make
+      use of the components provided by LLVM, and can optionally provide custom
+      target-specific passes, to build complete code generators for a specific
+      target.  Target descriptions live in <tt>lib/Target/</tt>.</li>
+
+  <li><a href="#jit">The target-independent JIT components</a>.  The LLVM JIT is
+      completely target independent (it uses the <tt>TargetJITInfo</tt>
+      structure to interface for target-specific issues.  The code for the
+      target-independent JIT lives in <tt>lib/ExecutionEngine/JIT</tt>.</li>
+</ol>
+
+<p>Depending on which part of the code generator you are interested in working
+   on, different pieces of this will be useful to you.  In any case, you should
+   be familiar with the <a href="#targetdesc">target description</a>
+   and <a href="#codegendesc">machine code representation</a> classes.  If you
+   want to add a backend for a new target, you will need
+   to <a href="#targetimpls">implement the target description</a> classes for
+   your new target and understand the <a href="LangRef.html">LLVM code
+   representation</a>.  If you are interested in implementing a
+   new <a href="#codegenalgs">code generation algorithm</a>, it should only
+   depend on the target-description and machine code representation classes,
+   ensuring that it is portable.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="required">Required components in the code generator</a>
+</div>
+
+<div class="doc_text">
+
+<p>The two pieces of the LLVM code generator are the high-level interface to the
+   code generator and the set of reusable components that can be used to build
+   target-specific backends.  The two most important interfaces
+   (<a href="#targetmachine"><tt>TargetMachine</tt></a>
+   and <a href="#targetdata"><tt>TargetData</tt></a>) are the only ones that are
+   required to be defined for a backend to fit into the LLVM system, but the
+   others must be defined if the reusable code generator components are going to
+   be used.</p>
+
+<p>This design has two important implications.  The first is that LLVM can
+   support completely non-traditional code generation targets.  For example, the
+   C backend does not require register allocation, instruction selection, or any
+   of the other standard components provided by the system.  As such, it only
+   implements these two interfaces, and does its own thing.  Another example of
+   a code generator like this is a (purely hypothetical) backend that converts
+   LLVM to the GCC RTL form and uses GCC to emit machine code for a target.</p>
+
+<p>This design also implies that it is possible to design and implement
+   radically different code generators in the LLVM system that do not make use
+   of any of the built-in components.  Doing so is not recommended at all, but
+   could be required for radically different targets that do not fit into the
+   LLVM machine description model: FPGAs for example.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="high-level-design">The high-level design of the code generator</a>
+</div>
+
+<div class="doc_text">
+
+<p>The LLVM target-independent code generator is designed to support efficient
+   and quality code generation for standard register-based microprocessors.
+   Code generation in this model is divided into the following stages:</p>
+
+<ol>
+  <li><b><a href="#instselect">Instruction Selection</a></b> &mdash; This phase
+      determines an efficient way to express the input LLVM code in the target
+      instruction set.  This stage produces the initial code for the program in
+      the target instruction set, then makes use of virtual registers in SSA
+      form and physical registers that represent any required register
+      assignments due to target constraints or calling conventions.  This step
+      turns the LLVM code into a DAG of target instructions.</li>
+
+  <li><b><a href="#selectiondag_sched">Scheduling and Formation</a></b> &mdash;
+      This phase takes the DAG of target instructions produced by the
+      instruction selection phase, determines an ordering of the instructions,
+      then emits the instructions
+      as <tt><a href="#machineinstr">MachineInstr</a></tt>s with that ordering.
+      Note that we describe this in the <a href="#instselect">instruction
+      selection section</a> because it operates on
+      a <a href="#selectiondag_intro">SelectionDAG</a>.</li>
+
+  <li><b><a href="#ssamco">SSA-based Machine Code Optimizations</a></b> &mdash;
+      This optional stage consists of a series of machine-code optimizations
+      that operate on the SSA-form produced by the instruction selector.
+      Optimizations like modulo-scheduling or peephole optimization work
+      here.</li>
+
+  <li><b><a href="#regalloc">Register Allocation</a></b> &mdash; The target code
+      is transformed from an infinite virtual register file in SSA form to the
+      concrete register file used by the target.  This phase introduces spill
+      code and eliminates all virtual register references from the program.</li>
+
+  <li><b><a href="#proepicode">Prolog/Epilog Code Insertion</a></b> &mdash; Once
+      the machine code has been generated for the function and the amount of
+      stack space required is known (used for LLVM alloca's and spill slots),
+      the prolog and epilog code for the function can be inserted and "abstract
+      stack location references" can be eliminated.  This stage is responsible
+      for implementing optimizations like frame-pointer elimination and stack
+      packing.</li>
+
+  <li><b><a href="#latemco">Late Machine Code Optimizations</a></b> &mdash;
+      Optimizations that operate on "final" machine code can go here, such as
+      spill code scheduling and peephole optimizations.</li>
+
+  <li><b><a href="#codeemit">Code Emission</a></b> &mdash; The final stage
+      actually puts out the code for the current function, either in the target
+      assembler format or in machine code.</li>
+</ol>
+
+<p>The code generator is based on the assumption that the instruction selector
+   will use an optimal pattern matching selector to create high-quality
+   sequences of native instructions.  Alternative code generator designs based
+   on pattern expansion and aggressive iterative peephole optimization are much
+   slower.  This design permits efficient compilation (important for JIT
+   environments) and aggressive optimization (used when generating code offline)
+   by allowing components of varying levels of sophistication to be used for any
+   step of compilation.</p>
+
+<p>In addition to these stages, target implementations can insert arbitrary
+   target-specific passes into the flow.  For example, the X86 target uses a
+   special pass to handle the 80x87 floating point stack architecture.  Other
+   targets with unusual requirements can be supported with custom passes as
+   needed.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+ <a name="tablegen">Using TableGen for target description</a>
+</div>
+
+<div class="doc_text">
+
+<p>The target description classes require a detailed description of the target
+   architecture.  These target descriptions often have a large amount of common
+   information (e.g., an <tt>add</tt> instruction is almost identical to a
+   <tt>sub</tt> instruction).  In order to allow the maximum amount of
+   commonality to be factored out, the LLVM code generator uses
+   the <a href="TableGenFundamentals.html">TableGen</a> tool to describe big
+   chunks of the target machine, which allows the use of domain-specific and
+   target-specific abstractions to reduce the amount of repetition.</p>
+
+<p>As LLVM continues to be developed and refined, we plan to move more and more
+   of the target description to the <tt>.td</tt> form.  Doing so gives us a
+   number of advantages.  The most important is that it makes it easier to port
+   LLVM because it reduces the amount of C++ code that has to be written, and
+   the surface area of the code generator that needs to be understood before
+   someone can get something working.  Second, it makes it easier to change
+   things. In particular, if tables and other things are all emitted
+   by <tt>tblgen</tt>, we only need a change in one place (<tt>tblgen</tt>) to
+   update all of the targets to a new interface.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="targetdesc">Target description classes</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The LLVM target description classes (located in the
+   <tt>include/llvm/Target</tt> directory) provide an abstract description of
+   the target machine independent of any particular client.  These classes are
+   designed to capture the <i>abstract</i> properties of the target (such as the
+   instructions and registers it has), and do not incorporate any particular
+   pieces of code generation algorithms.</p>
+
+<p>All of the target description classes (except the
+   <tt><a href="#targetdata">TargetData</a></tt> class) are designed to be
+   subclassed by the concrete target implementation, and have virtual methods
+   implemented.  To get to these implementations, the
+   <tt><a href="#targetmachine">TargetMachine</a></tt> class provides accessors
+   that should be implemented by the target.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="targetmachine">The <tt>TargetMachine</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>TargetMachine</tt> class provides virtual methods that are used to
+   access the target-specific implementations of the various target description
+   classes via the <tt>get*Info</tt> methods (<tt>getInstrInfo</tt>,
+   <tt>getRegisterInfo</tt>, <tt>getFrameInfo</tt>, etc.).  This class is
+   designed to be specialized by a concrete target implementation
+   (e.g., <tt>X86TargetMachine</tt>) which implements the various virtual
+   methods.  The only required target description class is
+   the <a href="#targetdata"><tt>TargetData</tt></a> class, but if the code
+   generator components are to be used, the other interfaces should be
+   implemented as well.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="targetdata">The <tt>TargetData</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>TargetData</tt> class is the only required target description class,
+   and it is the only class that is not extensible (you cannot derived a new
+   class from it).  <tt>TargetData</tt> specifies information about how the
+   target lays out memory for structures, the alignment requirements for various
+   data types, the size of pointers in the target, and whether the target is
+   little-endian or big-endian.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="targetlowering">The <tt>TargetLowering</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>TargetLowering</tt> class is used by SelectionDAG based instruction
+   selectors primarily to describe how LLVM code should be lowered to
+   SelectionDAG operations.  Among other things, this class indicates:</p>
+
+<ul>
+  <li>an initial register class to use for various <tt>ValueType</tt>s,</li>
+
+  <li>which operations are natively supported by the target machine,</li>
+
+  <li>the return type of <tt>setcc</tt> operations,</li>
+
+  <li>the type to use for shift amounts, and</li>
+
+  <li>various high-level characteristics, like whether it is profitable to turn
+      division by a constant into a multiplication sequence</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="targetregisterinfo">The <tt>TargetRegisterInfo</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>TargetRegisterInfo</tt> class is used to describe the register file
+   of the target and any interactions between the registers.</p>
+
+<p>Registers in the code generator are represented in the code generator by
+   unsigned integers.  Physical registers (those that actually exist in the
+   target description) are unique small numbers, and virtual registers are
+   generally large.  Note that register #0 is reserved as a flag value.</p>
+
+<p>Each register in the processor description has an associated
+   <tt>TargetRegisterDesc</tt> entry, which provides a textual name for the
+   register (used for assembly output and debugging dumps) and a set of aliases
+   (used to indicate whether one register overlaps with another).</p>
+
+<p>In addition to the per-register description, the <tt>TargetRegisterInfo</tt>
+   class exposes a set of processor specific register classes (instances of the
+   <tt>TargetRegisterClass</tt> class).  Each register class contains sets of
+   registers that have the same properties (for example, they are all 32-bit
+   integer registers).  Each SSA virtual register created by the instruction
+   selector has an associated register class.  When the register allocator runs,
+   it replaces virtual registers with a physical register in the set.</p>
+
+<p>The target-specific implementations of these classes is auto-generated from
+   a <a href="TableGenFundamentals.html">TableGen</a> description of the
+   register file.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="targetinstrinfo">The <tt>TargetInstrInfo</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>TargetInstrInfo</tt> class is used to describe the machine
+   instructions supported by the target. It is essentially an array of
+   <tt>TargetInstrDescriptor</tt> objects, each of which describes one
+   instruction the target supports. Descriptors define things like the mnemonic
+   for the opcode, the number of operands, the list of implicit register uses
+   and defs, whether the instruction has certain target-independent properties
+   (accesses memory, is commutable, etc), and holds any target-specific
+   flags.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="targetframeinfo">The <tt>TargetFrameInfo</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>TargetFrameInfo</tt> class is used to provide information about the
+   stack frame layout of the target. It holds the direction of stack growth, the
+   known stack alignment on entry to each function, and the offset to the local
+   area.  The offset to the local area is the offset from the stack pointer on
+   function entry to the first location where function data (local variables,
+   spill locations) can be stored.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="targetsubtarget">The <tt>TargetSubtarget</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>TargetSubtarget</tt> class is used to provide information about the
+   specific chip set being targeted.  A sub-target informs code generation of
+   which instructions are supported, instruction latencies and instruction
+   execution itinerary; i.e., which processing units are used, in what order,
+   and for how long.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="targetjitinfo">The <tt>TargetJITInfo</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>TargetJITInfo</tt> class exposes an abstract interface used by the
+   Just-In-Time code generator to perform target-specific activities, such as
+   emitting stubs.  If a <tt>TargetMachine</tt> supports JIT code generation, it
+   should provide one of these objects through the <tt>getJITInfo</tt>
+   method.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="codegendesc">Machine code description classes</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>At the high-level, LLVM code is translated to a machine specific
+   representation formed out of
+   <a href="#machinefunction"><tt>MachineFunction</tt></a>,
+   <a href="#machinebasicblock"><tt>MachineBasicBlock</tt></a>,
+   and <a href="#machineinstr"><tt>MachineInstr</tt></a> instances (defined
+   in <tt>include/llvm/CodeGen</tt>).  This representation is completely target
+   agnostic, representing instructions in their most abstract form: an opcode
+   and a series of operands.  This representation is designed to support both an
+   SSA representation for machine code, as well as a register allocated, non-SSA
+   form.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="machineinstr">The <tt>MachineInstr</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>Target machine instructions are represented as instances of the
+   <tt>MachineInstr</tt> class.  This class is an extremely abstract way of
+   representing machine instructions.  In particular, it only keeps track of an
+   opcode number and a set of operands.</p>
+
+<p>The opcode number is a simple unsigned integer that only has meaning to a
+   specific backend.  All of the instructions for a target should be defined in
+   the <tt>*InstrInfo.td</tt> file for the target. The opcode enum values are
+   auto-generated from this description.  The <tt>MachineInstr</tt> class does
+   not have any information about how to interpret the instruction (i.e., what
+   the semantics of the instruction are); for that you must refer to the
+   <tt><a href="#targetinstrinfo">TargetInstrInfo</a></tt> class.</p> 
+
+<p>The operands of a machine instruction can be of several different types: a
+   register reference, a constant integer, a basic block reference, etc.  In
+   addition, a machine operand should be marked as a def or a use of the value
+   (though only registers are allowed to be defs).</p>
+
+<p>By convention, the LLVM code generator orders instruction operands so that
+   all register definitions come before the register uses, even on architectures
+   that are normally printed in other orders.  For example, the SPARC add
+   instruction: "<tt>add %i1, %i2, %i3</tt>" adds the "%i1", and "%i2" registers
+   and stores the result into the "%i3" register.  In the LLVM code generator,
+   the operands should be stored as "<tt>%i3, %i1, %i2</tt>": with the
+   destination first.</p>
+
+<p>Keeping destination (definition) operands at the beginning of the operand
+   list has several advantages.  In particular, the debugging printer will print
+   the instruction like this:</p>
+
+<div class="doc_code">
+<pre>
+%r3 = add %i1, %i2
+</pre>
+</div>
+
+<p>Also if the first operand is a def, it is easier to <a href="#buildmi">create
+   instructions</a> whose only def is the first operand.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="buildmi">Using the <tt>MachineInstrBuilder.h</tt> functions</a>
+</div>
+
+<div class="doc_text">
+
+<p>Machine instructions are created by using the <tt>BuildMI</tt> functions,
+   located in the <tt>include/llvm/CodeGen/MachineInstrBuilder.h</tt> file.  The
+   <tt>BuildMI</tt> functions make it easy to build arbitrary machine
+   instructions.  Usage of the <tt>BuildMI</tt> functions look like this:</p>
+
+<div class="doc_code">
+<pre>
+// Create a 'DestReg = mov 42' (rendered in X86 assembly as 'mov DestReg, 42')
+// instruction.  The '1' specifies how many operands will be added.
+MachineInstr *MI = BuildMI(X86::MOV32ri, 1, DestReg).addImm(42);
+
+// Create the same instr, but insert it at the end of a basic block.
+MachineBasicBlock &amp;MBB = ...
+BuildMI(MBB, X86::MOV32ri, 1, DestReg).addImm(42);
+
+// Create the same instr, but insert it before a specified iterator point.
+MachineBasicBlock::iterator MBBI = ...
+BuildMI(MBB, MBBI, X86::MOV32ri, 1, DestReg).addImm(42);
+
+// Create a 'cmp Reg, 0' instruction, no destination reg.
+MI = BuildMI(X86::CMP32ri, 2).addReg(Reg).addImm(0);
+// Create an 'sahf' instruction which takes no operands and stores nothing.
+MI = BuildMI(X86::SAHF, 0);
+
+// Create a self looping branch instruction.
+BuildMI(MBB, X86::JNE, 1).addMBB(&amp;MBB);
+</pre>
+</div>
+
+<p>The key thing to remember with the <tt>BuildMI</tt> functions is that you
+   have to specify the number of operands that the machine instruction will
+   take.  This allows for efficient memory allocation.  You also need to specify
+   if operands default to be uses of values, not definitions.  If you need to
+   add a definition operand (other than the optional destination register), you
+   must explicitly mark it as such:</p>
+
+<div class="doc_code">
+<pre>
+MI.addReg(Reg, RegState::Define);
+</pre>
+</div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="fixedregs">Fixed (preassigned) registers</a>
+</div>
+
+<div class="doc_text">
+
+<p>One important issue that the code generator needs to be aware of is the
+   presence of fixed registers.  In particular, there are often places in the
+   instruction stream where the register allocator <em>must</em> arrange for a
+   particular value to be in a particular register.  This can occur due to
+   limitations of the instruction set (e.g., the X86 can only do a 32-bit divide
+   with the <tt>EAX</tt>/<tt>EDX</tt> registers), or external factors like
+   calling conventions.  In any case, the instruction selector should emit code
+   that copies a virtual register into or out of a physical register when
+   needed.</p>
+
+<p>For example, consider this simple LLVM example:</p>
+
+<div class="doc_code">
+<pre>
+define i32 @test(i32 %X, i32 %Y) {
+  %Z = udiv i32 %X, %Y
+  ret i32 %Z
+}
+</pre>
+</div>
+
+<p>The X86 instruction selector produces this machine code for the <tt>div</tt>
+   and <tt>ret</tt> (use "<tt>llc X.bc -march=x86 -print-machineinstrs</tt>" to
+   get this):</p>
+
+<div class="doc_code">
+<pre>
+;; Start of div
+%EAX = mov %reg1024           ;; Copy X (in reg1024) into EAX
+%reg1027 = sar %reg1024, 31
+%EDX = mov %reg1027           ;; Sign extend X into EDX
+idiv %reg1025                 ;; Divide by Y (in reg1025)
+%reg1026 = mov %EAX           ;; Read the result (Z) out of EAX
+
+;; Start of ret
+%EAX = mov %reg1026           ;; 32-bit return value goes in EAX
+ret
+</pre>
+</div>
+
+<p>By the end of code generation, the register allocator has coalesced the
+   registers and deleted the resultant identity moves producing the following
+   code:</p>
+
+<div class="doc_code">
+<pre>
+;; X is in EAX, Y is in ECX
+mov %EAX, %EDX
+sar %EDX, 31
+idiv %ECX
+ret 
+</pre>
+</div>
+
+<p>This approach is extremely general (if it can handle the X86 architecture, it
+   can handle anything!) and allows all of the target specific knowledge about
+   the instruction stream to be isolated in the instruction selector.  Note that
+   physical registers should have a short lifetime for good code generation, and
+   all physical registers are assumed dead on entry to and exit from basic
+   blocks (before register allocation).  Thus, if you need a value to be live
+   across basic block boundaries, it <em>must</em> live in a virtual
+   register.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ssa">Machine code in SSA form</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>MachineInstr</tt>'s are initially selected in SSA-form, and are
+   maintained in SSA-form until register allocation happens.  For the most part,
+   this is trivially simple since LLVM is already in SSA form; LLVM PHI nodes
+   become machine code PHI nodes, and virtual registers are only allowed to have
+   a single definition.</p>
+
+<p>After register allocation, machine code is no longer in SSA-form because
+   there are no virtual registers left in the code.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="machinebasicblock">The <tt>MachineBasicBlock</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>MachineBasicBlock</tt> class contains a list of machine instructions
+   (<tt><a href="#machineinstr">MachineInstr</a></tt> instances).  It roughly
+   corresponds to the LLVM code input to the instruction selector, but there can
+   be a one-to-many mapping (i.e. one LLVM basic block can map to multiple
+   machine basic blocks). The <tt>MachineBasicBlock</tt> class has a
+   "<tt>getBasicBlock</tt>" method, which returns the LLVM basic block that it
+   comes from.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="machinefunction">The <tt>MachineFunction</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>MachineFunction</tt> class contains a list of machine basic blocks
+   (<tt><a href="#machinebasicblock">MachineBasicBlock</a></tt> instances).  It
+   corresponds one-to-one with the LLVM function input to the instruction
+   selector.  In addition to a list of basic blocks,
+   the <tt>MachineFunction</tt> contains a a <tt>MachineConstantPool</tt>,
+   a <tt>MachineFrameInfo</tt>, a <tt>MachineFunctionInfo</tt>, and a
+   <tt>MachineRegisterInfo</tt>.  See
+   <tt>include/llvm/CodeGen/MachineFunction.h</tt> for more information.</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="mc">The "MC" Layer</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+The MC Layer is used to represent and process code at the raw machine code
+level, devoid of "high level" information like "constant pools", "jump tables",
+"global variables" or anything like that.  At this level, LLVM handles things
+like label names, machine instructions, and sections in the object file.  The
+code in this layer is used for a number of important purposes: the tail end of
+the code generator uses it to write a .s or .o file, and it is also used by the
+llvm-mc tool to implement standalone machine codeassemblers and disassemblers.
+</p>
+
+<p>
+This section describes some of the important classes.  There are also a number
+of important subsystems that interact at this layer, they are described later
+in this manual.
+</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="mcstreamer">The <tt>MCStreamer</tt> API</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+MCStreamer is best thought of as an assembler API.  It is an abstract API which
+is <em>implemented</em> in different ways (e.g. to output a .s file, output an
+ELF .o file, etc) but whose API correspond directly to what you see in a .s
+file.  MCStreamer has one method per directive, such as EmitLabel,
+EmitSymbolAttribute, SwitchSection, EmitValue (for .byte, .word), etc, which
+directly correspond to assembly level directives.  It also has an
+EmitInstruction method, which is used to output an MCInst to the streamer.
+</p>
+
+<p>
+This API is most important for two clients: the llvm-mc stand-alone assembler is
+effectively a parser that parses a line, then invokes a method on MCStreamer. In
+the code generator, the <a href="#codeemit">Code Emission</a> phase of the code
+generator lowers higher level LLVM IR and Machine* constructs down to the MC
+layer, emitting directives through MCStreamer.</p>
+
+<p>
+On the implementation side of MCStreamer, there are two major implementations:
+one for writing out a .s file (MCAsmStreamer), and one for writing out a .o
+file (MCObjectStreamer).  MCAsmStreamer is a straight-forward implementation
+that prints out a directive for each method (e.g. EmitValue -&gt; .byte), but
+MCObjectStreamer implements a full assembler.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="mccontext">The <tt>MCContext</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The MCContext class is the owner of a variety of uniqued data structures at the
+MC layer, including symbols, sections, etc.  As such, this is the class that you
+interact with to create symbols and sections.  This class can not be subclassed.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="mcsymbol">The <tt>MCSymbol</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The MCSymbol class represents a symbol (aka label) in the assembly file.  There
+are two interesting kinds of symbols: assembler temporary symbols, and normal
+symbols.  Assembler temporary symbols are used and processed by the assembler
+but are discarded when the object file is produced.  The distinction is usually
+represented by adding a prefix to the label, for example "L" labels are
+assembler temporary labels in MachO.
+</p>
+
+<p>MCSymbols are created by MCContext and uniqued there.  This means that
+MCSymbols can be compared for pointer equivalence to find out if they are the
+same symbol.  Note that pointer inequality does not guarantee the labels will
+end up at different addresses though.  It's perfectly legal to output something
+like this to the .s file:<p>
+
+<pre>
+  foo:
+  bar:
+    .byte 4
+</pre>
+
+<p>In this case, both the foo and bar symbols will have the same address.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="mcsection">The <tt>MCSection</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The MCSection class represents an object-file specific section. It is subclassed
+by object file specific implementations (e.g. <tt>MCSectionMachO</tt>, 
+<tt>MCSectionCOFF</tt>, <tt>MCSectionELF</tt>) and these are created and uniqued
+by MCContext.  The MCStreamer has a notion of the current section, which can be
+changed with the SwitchToSection method (which corresponds to a ".section"
+directive in a .s file).
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="mcinst">The <tt>MCInst</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The MCInst class is a target-independent representation of an instruction.  It
+is a simple class (much more so than <a href="#machineinstr">MachineInstr</a>)
+that holds a target-specific opcode and a vector of MCOperands.  MCOperand, in
+turn, is a simple discriminated union of three cases: 1) a simple immediate, 
+2) a target register ID, 3) a symbolic expression (e.g. "Lfoo-Lbar+42") as an
+MCExpr.
+</p>
+
+<p>MCInst is the common currency used to represent machine instructions at the
+MC layer.  It is the type used by the instruction encoder, the instruction
+printer, and the type generated by the assembly parser and disassembler.
+</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="codegenalgs">Target-independent code generation algorithms</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section documents the phases described in the
+   <a href="#high-level-design">high-level design of the code generator</a>.
+   It explains how they work and some of the rationale behind their design.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="instselect">Instruction Selection</a>
+</div>
+
+<div class="doc_text">
+
+<p>Instruction Selection is the process of translating LLVM code presented to
+   the code generator into target-specific machine instructions.  There are
+   several well-known ways to do this in the literature.  LLVM uses a
+   SelectionDAG based instruction selector.</p>
+
+<p>Portions of the DAG instruction selector are generated from the target
+   description (<tt>*.td</tt>) files.  Our goal is for the entire instruction
+   selector to be generated from these <tt>.td</tt> files, though currently
+   there are still things that require custom C++ code.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="selectiondag_intro">Introduction to SelectionDAGs</a>
+</div>
+
+<div class="doc_text">
+
+<p>The SelectionDAG provides an abstraction for code representation in a way
+   that is amenable to instruction selection using automatic techniques
+   (e.g. dynamic-programming based optimal pattern matching selectors). It is
+   also well-suited to other phases of code generation; in particular,
+   instruction scheduling (SelectionDAG's are very close to scheduling DAGs
+   post-selection).  Additionally, the SelectionDAG provides a host
+   representation where a large variety of very-low-level (but
+   target-independent) <a href="#selectiondag_optimize">optimizations</a> may be
+   performed; ones which require extensive information about the instructions
+   efficiently supported by the target.</p>
+
+<p>The SelectionDAG is a Directed-Acyclic-Graph whose nodes are instances of the
+   <tt>SDNode</tt> class.  The primary payload of the <tt>SDNode</tt> is its
+   operation code (Opcode) that indicates what operation the node performs and
+   the operands to the operation.  The various operation node types are
+   described at the top of the <tt>include/llvm/CodeGen/SelectionDAGNodes.h</tt>
+   file.</p>
+
+<p>Although most operations define a single value, each node in the graph may
+   define multiple values.  For example, a combined div/rem operation will
+   define both the dividend and the remainder. Many other situations require
+   multiple values as well.  Each node also has some number of operands, which
+   are edges to the node defining the used value.  Because nodes may define
+   multiple values, edges are represented by instances of the <tt>SDValue</tt>
+   class, which is a <tt>&lt;SDNode, unsigned&gt;</tt> pair, indicating the node
+   and result value being used, respectively.  Each value produced by
+   an <tt>SDNode</tt> has an associated <tt>MVT</tt> (Machine Value Type)
+   indicating what the type of the value is.</p>
+
+<p>SelectionDAGs contain two different kinds of values: those that represent
+   data flow and those that represent control flow dependencies.  Data values
+   are simple edges with an integer or floating point value type.  Control edges
+   are represented as "chain" edges which are of type <tt>MVT::Other</tt>.
+   These edges provide an ordering between nodes that have side effects (such as
+   loads, stores, calls, returns, etc).  All nodes that have side effects should
+   take a token chain as input and produce a new one as output.  By convention,
+   token chain inputs are always operand #0, and chain results are always the
+   last value produced by an operation.</p>
+
+<p>A SelectionDAG has designated "Entry" and "Root" nodes.  The Entry node is
+   always a marker node with an Opcode of <tt>ISD::EntryToken</tt>.  The Root
+   node is the final side-effecting node in the token chain. For example, in a
+   single basic block function it would be the return node.</p>
+
+<p>One important concept for SelectionDAGs is the notion of a "legal" vs.
+   "illegal" DAG.  A legal DAG for a target is one that only uses supported
+   operations and supported types.  On a 32-bit PowerPC, for example, a DAG with
+   a value of type i1, i8, i16, or i64 would be illegal, as would a DAG that
+   uses a SREM or UREM operation.  The
+   <a href="#selectinodag_legalize_types">legalize types</a> and
+   <a href="#selectiondag_legalize">legalize operations</a> phases are
+   responsible for turning an illegal DAG into a legal DAG.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="selectiondag_process">SelectionDAG Instruction Selection Process</a>
+</div>
+
+<div class="doc_text">
+
+<p>SelectionDAG-based instruction selection consists of the following steps:</p>
+
+<ol>
+  <li><a href="#selectiondag_build">Build initial DAG</a> &mdash; This stage
+      performs a simple translation from the input LLVM code to an illegal
+      SelectionDAG.</li>
+
+  <li><a href="#selectiondag_optimize">Optimize SelectionDAG</a> &mdash; This
+      stage performs simple optimizations on the SelectionDAG to simplify it,
+      and recognize meta instructions (like rotates
+      and <tt>div</tt>/<tt>rem</tt> pairs) for targets that support these meta
+      operations.  This makes the resultant code more efficient and
+      the <a href="#selectiondag_select">select instructions from DAG</a> phase
+      (below) simpler.</li>
+
+  <li><a href="#selectiondag_legalize_types">Legalize SelectionDAG Types</a>
+      &mdash; This stage transforms SelectionDAG nodes to eliminate any types
+      that are unsupported on the target.</li>
+
+  <li><a href="#selectiondag_optimize">Optimize SelectionDAG</a> &mdash; The
+      SelectionDAG optimizer is run to clean up redundancies exposed by type
+      legalization.</li>
+
+  <li><a href="#selectiondag_legalize">Legalize SelectionDAG Ops</a> &mdash;
+      This stage transforms SelectionDAG nodes to eliminate any operations 
+      that are unsupported on the target.</li>
+
+  <li><a href="#selectiondag_optimize">Optimize SelectionDAG</a> &mdash; The
+      SelectionDAG optimizer is run to eliminate inefficiencies introduced by
+      operation legalization.</li>
+
+  <li><a href="#selectiondag_select">Select instructions from DAG</a> &mdash;
+      Finally, the target instruction selector matches the DAG operations to
+      target instructions.  This process translates the target-independent input
+      DAG into another DAG of target instructions.</li>
+
+  <li><a href="#selectiondag_sched">SelectionDAG Scheduling and Formation</a>
+      &mdash; The last phase assigns a linear order to the instructions in the
+      target-instruction DAG and emits them into the MachineFunction being
+      compiled.  This step uses traditional prepass scheduling techniques.</li>
+</ol>
+
+<p>After all of these steps are complete, the SelectionDAG is destroyed and the
+   rest of the code generation passes are run.</p>
+
+<p>One great way to visualize what is going on here is to take advantage of a
+   few LLC command line options.  The following options pop up a window
+   displaying the SelectionDAG at specific times (if you only get errors printed
+   to the console while using this, you probably
+   <a href="ProgrammersManual.html#ViewGraph">need to configure your system</a>
+   to add support for it).</p>
+
+<ul>
+  <li><tt>-view-dag-combine1-dags</tt> displays the DAG after being built,
+      before the first optimization pass.</li>
+
+  <li><tt>-view-legalize-dags</tt> displays the DAG before Legalization.</li>
+
+  <li><tt>-view-dag-combine2-dags</tt> displays the DAG before the second
+      optimization pass.</li>
+
+  <li><tt>-view-isel-dags</tt> displays the DAG before the Select phase.</li>
+
+  <li><tt>-view-sched-dags</tt> displays the DAG before Scheduling.</li>
+</ul>
+
+<p>The <tt>-view-sunit-dags</tt> displays the Scheduler's dependency graph.
+   This graph is based on the final SelectionDAG, with nodes that must be
+   scheduled together bundled into a single scheduling-unit node, and with
+   immediate operands and other nodes that aren't relevant for scheduling
+   omitted.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="selectiondag_build">Initial SelectionDAG Construction</a>
+</div>
+
+<div class="doc_text">
+
+<p>The initial SelectionDAG is na&iuml;vely peephole expanded from the LLVM
+   input by the <tt>SelectionDAGLowering</tt> class in the
+   <tt>lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp</tt> file.  The intent of
+   this pass is to expose as much low-level, target-specific details to the
+   SelectionDAG as possible.  This pass is mostly hard-coded (e.g. an
+   LLVM <tt>add</tt> turns into an <tt>SDNode add</tt> while a
+   <tt>getelementptr</tt> is expanded into the obvious arithmetic). This pass
+   requires target-specific hooks to lower calls, returns, varargs, etc.  For
+   these features, the <tt><a href="#targetlowering">TargetLowering</a></tt>
+   interface is used.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="selectiondag_legalize_types">SelectionDAG LegalizeTypes Phase</a>
+</div>
+
+<div class="doc_text">
+
+<p>The Legalize phase is in charge of converting a DAG to only use the types
+   that are natively supported by the target.</p>
+
+<p>There are two main ways of converting values of unsupported scalar types to
+   values of supported types: converting small types to larger types
+   ("promoting"), and breaking up large integer types into smaller ones
+   ("expanding").  For example, a target might require that all f32 values are
+   promoted to f64 and that all i1/i8/i16 values are promoted to i32.  The same
+   target might require that all i64 values be expanded into pairs of i32
+   values.  These changes can insert sign and zero extensions as needed to make
+   sure that the final code has the same behavior as the input.</p>
+
+<p>There are two main ways of converting values of unsupported vector types to
+   value of supported types: splitting vector types, multiple times if
+   necessary, until a legal type is found, and extending vector types by adding
+   elements to the end to round them out to legal types ("widening").  If a
+   vector gets split all the way down to single-element parts with no supported
+   vector type being found, the elements are converted to scalars
+   ("scalarizing").</p>
+
+<p>A target implementation tells the legalizer which types are supported (and
+   which register class to use for them) by calling the
+   <tt>addRegisterClass</tt> method in its TargetLowering constructor.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="selectiondag_legalize">SelectionDAG Legalize Phase</a>
+</div>
+
+<div class="doc_text">
+
+<p>The Legalize phase is in charge of converting a DAG to only use the
+   operations that are natively supported by the target.</p>
+
+<p>Targets often have weird constraints, such as not supporting every operation
+   on every supported datatype (e.g. X86 does not support byte conditional moves
+   and PowerPC does not support sign-extending loads from a 16-bit memory
+   location).  Legalize takes care of this by open-coding another sequence of
+   operations to emulate the operation ("expansion"), by promoting one type to a
+   larger type that supports the operation ("promotion"), or by using a
+   target-specific hook to implement the legalization ("custom").</p>
+
+<p>A target implementation tells the legalizer which operations are not
+   supported (and which of the above three actions to take) by calling the
+   <tt>setOperationAction</tt> method in its <tt>TargetLowering</tt>
+   constructor.</p>
+
+<p>Prior to the existence of the Legalize passes, we required that every target
+   <a href="#selectiondag_optimize">selector</a> supported and handled every
+   operator and type even if they are not natively supported.  The introduction
+   of the Legalize phases allows all of the canonicalization patterns to be
+   shared across targets, and makes it very easy to optimize the canonicalized
+   code because it is still in the form of a DAG.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="selectiondag_optimize">SelectionDAG Optimization Phase: the DAG
+  Combiner</a>
+</div>
+
+<div class="doc_text">
+
+<p>The SelectionDAG optimization phase is run multiple times for code
+   generation, immediately after the DAG is built and once after each
+   legalization.  The first run of the pass allows the initial code to be
+   cleaned up (e.g. performing optimizations that depend on knowing that the
+   operators have restricted type inputs).  Subsequent runs of the pass clean up
+   the messy code generated by the Legalize passes, which allows Legalize to be
+   very simple (it can focus on making code legal instead of focusing on
+   generating <em>good</em> and legal code).</p>
+
+<p>One important class of optimizations performed is optimizing inserted sign
+   and zero extension instructions.  We currently use ad-hoc techniques, but
+   could move to more rigorous techniques in the future.  Here are some good
+   papers on the subject:</p>
+
+<p>"<a href="http://www.eecs.harvard.edu/~nr/pubs/widen-abstract.html">Widening
+   integer arithmetic</a>"<br>
+   Kevin Redwine and Norman Ramsey<br>
+   International Conference on Compiler Construction (CC) 2004</p>
+
+<p>"<a href="http://portal.acm.org/citation.cfm?doid=512529.512552">Effective
+   sign extension elimination</a>"<br>
+   Motohiro Kawahito, Hideaki Komatsu, and Toshio Nakatani<br>
+   Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language Design
+   and Implementation.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="selectiondag_select">SelectionDAG Select Phase</a>
+</div>
+
+<div class="doc_text">
+
+<p>The Select phase is the bulk of the target-specific code for instruction
+   selection.  This phase takes a legal SelectionDAG as input, pattern matches
+   the instructions supported by the target to this DAG, and produces a new DAG
+   of target code.  For example, consider the following LLVM fragment:</p>
+
+<div class="doc_code">
+<pre>
+%t1 = fadd float %W, %X
+%t2 = fmul float %t1, %Y
+%t3 = fadd float %t2, %Z
+</pre>
+</div>
+
+<p>This LLVM code corresponds to a SelectionDAG that looks basically like
+   this:</p>
+
+<div class="doc_code">
+<pre>
+(fadd:f32 (fmul:f32 (fadd:f32 W, X), Y), Z)
+</pre>
+</div>
+
+<p>If a target supports floating point multiply-and-add (FMA) operations, one of
+   the adds can be merged with the multiply.  On the PowerPC, for example, the
+   output of the instruction selector might look like this DAG:</p>
+
+<div class="doc_code">
+<pre>
+(FMADDS (FADDS W, X), Y, Z)
+</pre>
+</div>
+
+<p>The <tt>FMADDS</tt> instruction is a ternary instruction that multiplies its
+first two operands and adds the third (as single-precision floating-point
+numbers).  The <tt>FADDS</tt> instruction is a simple binary single-precision
+add instruction.  To perform this pattern match, the PowerPC backend includes
+the following instruction definitions:</p>
+
+<div class="doc_code">
+<pre>
+def FMADDS : AForm_1&lt;59, 29,
+                    (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+                    "fmadds $FRT, $FRA, $FRC, $FRB",
+                    [<b>(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+                                           F4RC:$FRB))</b>]&gt;;
+def FADDS : AForm_2&lt;59, 21,
+                    (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRB),
+                    "fadds $FRT, $FRA, $FRB",
+                    [<b>(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))</b>]&gt;;
+</pre>
+</div>
+
+<p>The portion of the instruction definition in bold indicates the pattern used
+   to match the instruction.  The DAG operators
+   (like <tt>fmul</tt>/<tt>fadd</tt>) are defined in
+   the <tt>include/llvm/Target/TargetSelectionDAG.td</tt> file.  "
+   <tt>F4RC</tt>" is the register class of the input and result values.</p>
+
+<p>The TableGen DAG instruction selector generator reads the instruction
+   patterns in the <tt>.td</tt> file and automatically builds parts of the
+   pattern matching code for your target.  It has the following strengths:</p>
+
+<ul>
+  <li>At compiler-compiler time, it analyzes your instruction patterns and tells
+      you if your patterns make sense or not.</li>
+
+  <li>It can handle arbitrary constraints on operands for the pattern match.  In
+      particular, it is straight-forward to say things like "match any immediate
+      that is a 13-bit sign-extended value".  For examples, see the
+      <tt>immSExt16</tt> and related <tt>tblgen</tt> classes in the PowerPC
+      backend.</li>
+
+  <li>It knows several important identities for the patterns defined.  For
+      example, it knows that addition is commutative, so it allows the
+      <tt>FMADDS</tt> pattern above to match "<tt>(fadd X, (fmul Y, Z))</tt>" as
+      well as "<tt>(fadd (fmul X, Y), Z)</tt>", without the target author having
+      to specially handle this case.</li>
+
+  <li>It has a full-featured type-inferencing system.  In particular, you should
+      rarely have to explicitly tell the system what type parts of your patterns
+      are.  In the <tt>FMADDS</tt> case above, we didn't have to tell
+      <tt>tblgen</tt> that all of the nodes in the pattern are of type 'f32'.
+      It was able to infer and propagate this knowledge from the fact that
+      <tt>F4RC</tt> has type 'f32'.</li>
+
+  <li>Targets can define their own (and rely on built-in) "pattern fragments".
+      Pattern fragments are chunks of reusable patterns that get inlined into
+      your patterns during compiler-compiler time.  For example, the integer
+      "<tt>(not x)</tt>" operation is actually defined as a pattern fragment
+      that expands as "<tt>(xor x, -1)</tt>", since the SelectionDAG does not
+      have a native '<tt>not</tt>' operation.  Targets can define their own
+      short-hand fragments as they see fit.  See the definition of
+      '<tt>not</tt>' and '<tt>ineg</tt>' for examples.</li>
+
+  <li>In addition to instructions, targets can specify arbitrary patterns that
+      map to one or more instructions using the 'Pat' class.  For example, the
+      PowerPC has no way to load an arbitrary integer immediate into a register
+      in one instruction. To tell tblgen how to do this, it defines:
+      <br>
+      <br>
+<div class="doc_code">
+<pre>
+// Arbitrary immediate support.  Implement in terms of LIS/ORI.
+def : Pat&lt;(i32 imm:$imm),
+          (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))&gt;;
+</pre>
+</div>
+      <br>
+      If none of the single-instruction patterns for loading an immediate into a
+      register match, this will be used.  This rule says "match an arbitrary i32
+      immediate, turning it into an <tt>ORI</tt> ('or a 16-bit immediate') and
+      an <tt>LIS</tt> ('load 16-bit immediate, where the immediate is shifted to
+      the left 16 bits') instruction".  To make this work, the
+      <tt>LO16</tt>/<tt>HI16</tt> node transformations are used to manipulate
+      the input immediate (in this case, take the high or low 16-bits of the
+      immediate).</li>
+
+  <li>While the system does automate a lot, it still allows you to write custom
+      C++ code to match special cases if there is something that is hard to
+      express.</li>
+</ul>
+
+<p>While it has many strengths, the system currently has some limitations,
+   primarily because it is a work in progress and is not yet finished:</p>
+
+<ul>
+  <li>Overall, there is no way to define or match SelectionDAG nodes that define
+      multiple values (e.g. <tt>SMUL_LOHI</tt>, <tt>LOAD</tt>, <tt>CALL</tt>,
+      etc).  This is the biggest reason that you currently still <em>have
+      to</em> write custom C++ code for your instruction selector.</li>
+
+  <li>There is no great way to support matching complex addressing modes yet.
+      In the future, we will extend pattern fragments to allow them to define
+      multiple values (e.g. the four operands of the <a href="#x86_memory">X86
+      addressing mode</a>, which are currently matched with custom C++ code).
+      In addition, we'll extend fragments so that a fragment can match multiple
+      different patterns.</li>
+
+  <li>We don't automatically infer flags like isStore/isLoad yet.</li>
+
+  <li>We don't automatically generate the set of supported registers and
+      operations for the <a href="#selectiondag_legalize">Legalizer</a>
+      yet.</li>
+
+  <li>We don't have a way of tying in custom legalized nodes yet.</li>
+</ul>
+
+<p>Despite these limitations, the instruction selector generator is still quite
+   useful for most of the binary and logical operations in typical instruction
+   sets.  If you run into any problems or can't figure out how to do something,
+   please let Chris know!</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="selectiondag_sched">SelectionDAG Scheduling and Formation Phase</a>
+</div>
+
+<div class="doc_text">
+
+<p>The scheduling phase takes the DAG of target instructions from the selection
+   phase and assigns an order.  The scheduler can pick an order depending on
+   various constraints of the machines (i.e. order for minimal register pressure
+   or try to cover instruction latencies).  Once an order is established, the
+   DAG is converted to a list
+   of <tt><a href="#machineinstr">MachineInstr</a></tt>s and the SelectionDAG is
+   destroyed.</p>
+
+<p>Note that this phase is logically separate from the instruction selection
+   phase, but is tied to it closely in the code because it operates on
+   SelectionDAGs.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="selectiondag_future">Future directions for the SelectionDAG</a>
+</div>
+
+<div class="doc_text">
+
+<ol>
+  <li>Optional function-at-a-time selection.</li>
+
+  <li>Auto-generate entire selector from <tt>.td</tt> file.</li>
+</ol>
+
+</div>
+ 
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ssamco">SSA-based Machine Code Optimizations</a>
+</div>
+<div class="doc_text"><p>To Be Written</p></div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="liveintervals">Live Intervals</a>
+</div>
+
+<div class="doc_text">
+
+<p>Live Intervals are the ranges (intervals) where a variable is <i>live</i>.
+   They are used by some <a href="#regalloc">register allocator</a> passes to
+   determine if two or more virtual registers which require the same physical
+   register are live at the same point in the program (i.e., they conflict).
+   When this situation occurs, one virtual register must be <i>spilled</i>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="livevariable_analysis">Live Variable Analysis</a>
+</div>
+
+<div class="doc_text">
+
+<p>The first step in determining the live intervals of variables is to calculate
+   the set of registers that are immediately dead after the instruction (i.e.,
+   the instruction calculates the value, but it is never used) and the set of
+   registers that are used by the instruction, but are never used after the
+   instruction (i.e., they are killed). Live variable information is computed
+   for each <i>virtual</i> register and <i>register allocatable</i> physical
+   register in the function.  This is done in a very efficient manner because it
+   uses SSA to sparsely compute lifetime information for virtual registers
+   (which are in SSA form) and only has to track physical registers within a
+   block.  Before register allocation, LLVM can assume that physical registers
+   are only live within a single basic block.  This allows it to do a single,
+   local analysis to resolve physical register lifetimes within each basic
+   block. If a physical register is not register allocatable (e.g., a stack
+   pointer or condition codes), it is not tracked.</p>
+
+<p>Physical registers may be live in to or out of a function. Live in values are
+   typically arguments in registers. Live out values are typically return values
+   in registers. Live in values are marked as such, and are given a dummy
+   "defining" instruction during live intervals analysis. If the last basic
+   block of a function is a <tt>return</tt>, then it's marked as using all live
+   out values in the function.</p>
+
+<p><tt>PHI</tt> nodes need to be handled specially, because the calculation of
+   the live variable information from a depth first traversal of the CFG of the
+   function won't guarantee that a virtual register used by the <tt>PHI</tt>
+   node is defined before it's used. When a <tt>PHI</tt> node is encountered,
+   only the definition is handled, because the uses will be handled in other
+   basic blocks.</p>
+
+<p>For each <tt>PHI</tt> node of the current basic block, we simulate an
+   assignment at the end of the current basic block and traverse the successor
+   basic blocks. If a successor basic block has a <tt>PHI</tt> node and one of
+   the <tt>PHI</tt> node's operands is coming from the current basic block, then
+   the variable is marked as <i>alive</i> within the current basic block and all
+   of its predecessor basic blocks, until the basic block with the defining
+   instruction is encountered.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="liveintervals_analysis">Live Intervals Analysis</a>
+</div>
+
+<div class="doc_text">
+
+<p>We now have the information available to perform the live intervals analysis
+   and build the live intervals themselves.  We start off by numbering the basic
+   blocks and machine instructions.  We then handle the "live-in" values.  These
+   are in physical registers, so the physical register is assumed to be killed
+   by the end of the basic block.  Live intervals for virtual registers are
+   computed for some ordering of the machine instructions <tt>[1, N]</tt>.  A
+   live interval is an interval <tt>[i, j)</tt>, where <tt>1 &lt;= i &lt;= j
+   &lt; N</tt>, for which a variable is live.</p>
+
+<p><i><b>More to come...</b></i></p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="regalloc">Register Allocation</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <i>Register Allocation problem</i> consists in mapping a program
+   <i>P<sub>v</sub></i>, that can use an unbounded number of virtual registers,
+   to a program <i>P<sub>p</sub></i> that contains a finite (possibly small)
+   number of physical registers. Each target architecture has a different number
+   of physical registers. If the number of physical registers is not enough to
+   accommodate all the virtual registers, some of them will have to be mapped
+   into memory. These virtuals are called <i>spilled virtuals</i>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_subsubsection">
+  <a name="regAlloc_represent">How registers are represented in LLVM</a>
+</div>
+
+<div class="doc_text">
+
+<p>In LLVM, physical registers are denoted by integer numbers that normally
+   range from 1 to 1023. To see how this numbering is defined for a particular
+   architecture, you can read the <tt>GenRegisterNames.inc</tt> file for that
+   architecture. For instance, by
+   inspecting <tt>lib/Target/X86/X86GenRegisterNames.inc</tt> we see that the
+   32-bit register <tt>EAX</tt> is denoted by 15, and the MMX register
+   <tt>MM0</tt> is mapped to 48.</p>
+
+<p>Some architectures contain registers that share the same physical location. A
+   notable example is the X86 platform. For instance, in the X86 architecture,
+   the registers <tt>EAX</tt>, <tt>AX</tt> and <tt>AL</tt> share the first eight
+   bits. These physical registers are marked as <i>aliased</i> in LLVM. Given a
+   particular architecture, you can check which registers are aliased by
+   inspecting its <tt>RegisterInfo.td</tt> file. Moreover, the method
+   <tt>TargetRegisterInfo::getAliasSet(p_reg)</tt> returns an array containing
+   all the physical registers aliased to the register <tt>p_reg</tt>.</p>
+
+<p>Physical registers, in LLVM, are grouped in <i>Register Classes</i>.
+   Elements in the same register class are functionally equivalent, and can be
+   interchangeably used. Each virtual register can only be mapped to physical
+   registers of a particular class. For instance, in the X86 architecture, some
+   virtuals can only be allocated to 8 bit registers.  A register class is
+   described by <tt>TargetRegisterClass</tt> objects.  To discover if a virtual
+   register is compatible with a given physical, this code can be used:</p>
+
+<div class="doc_code">
+<pre>
+bool RegMapping_Fer::compatible_class(MachineFunction &amp;mf,
+                                      unsigned v_reg,
+                                      unsigned p_reg) {
+  assert(TargetRegisterInfo::isPhysicalRegister(p_reg) &amp;&amp;
+         "Target register must be physical");
+  const TargetRegisterClass *trc = mf.getRegInfo().getRegClass(v_reg);
+  return trc-&gt;contains(p_reg);
+}
+</pre>
+</div>
+
+<p>Sometimes, mostly for debugging purposes, it is useful to change the number
+   of physical registers available in the target architecture. This must be done
+   statically, inside the <tt>TargetRegsterInfo.td</tt> file. Just <tt>grep</tt>
+   for <tt>RegisterClass</tt>, the last parameter of which is a list of
+   registers. Just commenting some out is one simple way to avoid them being
+   used. A more polite way is to explicitly exclude some registers from
+   the <i>allocation order</i>. See the definition of the <tt>GR8</tt> register
+   class in <tt>lib/Target/X86/X86RegisterInfo.td</tt> for an example of this.
+   </p>
+
+<p>Virtual registers are also denoted by integer numbers. Contrary to physical
+   registers, different virtual registers never share the same number. Whereas
+   physical registers are statically defined in a <tt>TargetRegisterInfo.td</tt>
+   file and cannot be created by the application developer, that is not the case
+   with virtual registers. In order to create new virtual registers, use the
+   method <tt>MachineRegisterInfo::createVirtualRegister()</tt>. This method
+   will return a new virtual register. Use an <tt>IndexedMap&lt;Foo,
+   VirtReg2IndexFunctor&gt;</tt> to hold information per virtual register. If you
+   need to enumerate all virtual registers, use the function
+   <tt>TargetRegisterInfo::index2VirtReg()</tt> to find the virtual register
+   numbers:</p>
+
+<div class="doc_code">
+<pre>
+  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+    unsigned VirtReg = TargetRegisterInfo::index2VirtReg(i);
+    stuff(VirtReg);
+  }
+</pre>
+</div>
+
+<p>Before register allocation, the operands of an instruction are mostly virtual
+   registers, although physical registers may also be used. In order to check if
+   a given machine operand is a register, use the boolean
+   function <tt>MachineOperand::isRegister()</tt>. To obtain the integer code of
+   a register, use <tt>MachineOperand::getReg()</tt>. An instruction may define
+   or use a register. For instance, <tt>ADD reg:1026 := reg:1025 reg:1024</tt>
+   defines the registers 1024, and uses registers 1025 and 1026. Given a
+   register operand, the method <tt>MachineOperand::isUse()</tt> informs if that
+   register is being used by the instruction. The
+   method <tt>MachineOperand::isDef()</tt> informs if that registers is being
+   defined.</p>
+
+<p>We will call physical registers present in the LLVM bitcode before register
+   allocation <i>pre-colored registers</i>. Pre-colored registers are used in
+   many different situations, for instance, to pass parameters of functions
+   calls, and to store results of particular instructions. There are two types
+   of pre-colored registers: the ones <i>implicitly</i> defined, and
+   those <i>explicitly</i> defined. Explicitly defined registers are normal
+   operands, and can be accessed
+   with <tt>MachineInstr::getOperand(int)::getReg()</tt>.  In order to check
+   which registers are implicitly defined by an instruction, use
+   the <tt>TargetInstrInfo::get(opcode)::ImplicitDefs</tt>,
+   where <tt>opcode</tt> is the opcode of the target instruction. One important
+   difference between explicit and implicit physical registers is that the
+   latter are defined statically for each instruction, whereas the former may
+   vary depending on the program being compiled. For example, an instruction
+   that represents a function call will always implicitly define or use the same
+   set of physical registers. To read the registers implicitly used by an
+   instruction,
+   use <tt>TargetInstrInfo::get(opcode)::ImplicitUses</tt>. Pre-colored
+   registers impose constraints on any register allocation algorithm. The
+   register allocator must make sure that none of them are overwritten by
+   the values of virtual registers while still alive.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_subsubsection">
+  <a name="regAlloc_howTo">Mapping virtual registers to physical registers</a>
+</div>
+
+<div class="doc_text">
+
+<p>There are two ways to map virtual registers to physical registers (or to
+   memory slots). The first way, that we will call <i>direct mapping</i>, is
+   based on the use of methods of the classes <tt>TargetRegisterInfo</tt>,
+   and <tt>MachineOperand</tt>. The second way, that we will call <i>indirect
+   mapping</i>, relies on the <tt>VirtRegMap</tt> class in order to insert loads
+   and stores sending and getting values to and from memory.</p>
+
+<p>The direct mapping provides more flexibility to the developer of the register
+   allocator; however, it is more error prone, and demands more implementation
+   work.  Basically, the programmer will have to specify where load and store
+   instructions should be inserted in the target function being compiled in
+   order to get and store values in memory. To assign a physical register to a
+   virtual register present in a given operand,
+   use <tt>MachineOperand::setReg(p_reg)</tt>. To insert a store instruction,
+   use <tt>TargetInstrInfo::storeRegToStackSlot(...)</tt>, and to insert a
+   load instruction, use <tt>TargetInstrInfo::loadRegFromStackSlot</tt>.</p>
+
+<p>The indirect mapping shields the application developer from the complexities
+   of inserting load and store instructions. In order to map a virtual register
+   to a physical one, use <tt>VirtRegMap::assignVirt2Phys(vreg, preg)</tt>.  In
+   order to map a certain virtual register to memory,
+   use <tt>VirtRegMap::assignVirt2StackSlot(vreg)</tt>. This method will return
+   the stack slot where <tt>vreg</tt>'s value will be located.  If it is
+   necessary to map another virtual register to the same stack slot,
+   use <tt>VirtRegMap::assignVirt2StackSlot(vreg, stack_location)</tt>. One
+   important point to consider when using the indirect mapping, is that even if
+   a virtual register is mapped to memory, it still needs to be mapped to a
+   physical register. This physical register is the location where the virtual
+   register is supposed to be found before being stored or after being
+   reloaded.</p>
+
+<p>If the indirect strategy is used, after all the virtual registers have been
+   mapped to physical registers or stack slots, it is necessary to use a spiller
+   object to place load and store instructions in the code. Every virtual that
+   has been mapped to a stack slot will be stored to memory after been defined
+   and will be loaded before being used. The implementation of the spiller tries
+   to recycle load/store instructions, avoiding unnecessary instructions. For an
+   example of how to invoke the spiller,
+   see <tt>RegAllocLinearScan::runOnMachineFunction</tt>
+   in <tt>lib/CodeGen/RegAllocLinearScan.cpp</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="regAlloc_twoAddr">Handling two address instructions</a>
+</div>
+
+<div class="doc_text">
+
+<p>With very rare exceptions (e.g., function calls), the LLVM machine code
+   instructions are three address instructions. That is, each instruction is
+   expected to define at most one register, and to use at most two registers.
+   However, some architectures use two address instructions. In this case, the
+   defined register is also one of the used register. For instance, an
+   instruction such as <tt>ADD %EAX, %EBX</tt>, in X86 is actually equivalent
+   to <tt>%EAX = %EAX + %EBX</tt>.</p>
+
+<p>In order to produce correct code, LLVM must convert three address
+   instructions that represent two address instructions into true two address
+   instructions. LLVM provides the pass <tt>TwoAddressInstructionPass</tt> for
+   this specific purpose. It must be run before register allocation takes
+   place. After its execution, the resulting code may no longer be in SSA
+   form. This happens, for instance, in situations where an instruction such
+   as <tt>%a = ADD %b %c</tt> is converted to two instructions such as:</p>
+
+<div class="doc_code">
+<pre>
+%a = MOVE %b
+%a = ADD %a %c
+</pre>
+</div>
+
+<p>Notice that, internally, the second instruction is represented as
+   <tt>ADD %a[def/use] %c</tt>. I.e., the register operand <tt>%a</tt> is both
+   used and defined by the instruction.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="regAlloc_ssaDecon">The SSA deconstruction phase</a>
+</div>
+
+<div class="doc_text">
+
+<p>An important transformation that happens during register allocation is called
+   the <i>SSA Deconstruction Phase</i>. The SSA form simplifies many analyses
+   that are performed on the control flow graph of programs. However,
+   traditional instruction sets do not implement PHI instructions. Thus, in
+   order to generate executable code, compilers must replace PHI instructions
+   with other instructions that preserve their semantics.</p>
+
+<p>There are many ways in which PHI instructions can safely be removed from the
+   target code. The most traditional PHI deconstruction algorithm replaces PHI
+   instructions with copy instructions. That is the strategy adopted by
+   LLVM. The SSA deconstruction algorithm is implemented
+   in <tt>lib/CodeGen/PHIElimination.cpp</tt>. In order to invoke this pass, the
+   identifier <tt>PHIEliminationID</tt> must be marked as required in the code
+   of the register allocator.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="regAlloc_fold">Instruction folding</a>
+</div>
+
+<div class="doc_text">
+
+<p><i>Instruction folding</i> is an optimization performed during register
+   allocation that removes unnecessary copy instructions. For instance, a
+   sequence of instructions such as:</p>
+
+<div class="doc_code">
+<pre>
+%EBX = LOAD %mem_address
+%EAX = COPY %EBX
+</pre>
+</div>
+
+<p>can be safely substituted by the single instruction:</p>
+
+<div class="doc_code">
+<pre>
+%EAX = LOAD %mem_address
+</pre>
+</div>
+
+<p>Instructions can be folded with
+   the <tt>TargetRegisterInfo::foldMemoryOperand(...)</tt> method. Care must be
+   taken when folding instructions; a folded instruction can be quite different
+   from the original
+   instruction. See <tt>LiveIntervals::addIntervalsForSpills</tt>
+   in <tt>lib/CodeGen/LiveIntervalAnalysis.cpp</tt> for an example of its
+   use.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_subsubsection">
+  <a name="regAlloc_builtIn">Built in register allocators</a>
+</div>
+
+<div class="doc_text">
+
+<p>The LLVM infrastructure provides the application developer with three
+   different register allocators:</p>
+
+<ul>
+  <li><i>Linear Scan</i> &mdash; <i>The default allocator</i>. This is the
+      well-know linear scan register allocator. Whereas the
+      <i>Simple</i> and <i>Local</i> algorithms use a direct mapping
+      implementation technique, the <i>Linear Scan</i> implementation
+      uses a spiller in order to place load and stores.</li>
+
+  <li><i>Fast</i> &mdash; This register allocator is the default for debug
+      builds. It allocates registers on a basic block level, attempting to keep
+      values in registers and reusing registers as appropriate.</li>
+
+  <li><i>PBQP</i> &mdash; A Partitioned Boolean Quadratic Programming (PBQP)
+      based register allocator. This allocator works by constructing a PBQP
+      problem representing the register allocation problem under consideration,
+      solving this using a PBQP solver, and mapping the solution back to a
+      register assignment.</li>
+
+</ul>
+
+<p>The type of register allocator used in <tt>llc</tt> can be chosen with the
+   command line option <tt>-regalloc=...</tt>:</p>
+
+<div class="doc_code">
+<pre>
+$ llc -regalloc=linearscan file.bc -o ln.s;
+$ llc -regalloc=fast file.bc -o fa.s;
+$ llc -regalloc=pbqp file.bc -o pbqp.s;
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="proepicode">Prolog/Epilog Code Insertion</a>
+</div>
+<div class="doc_text"><p>To Be Written</p></div>
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="latemco">Late Machine Code Optimizations</a>
+</div>
+<div class="doc_text"><p>To Be Written</p></div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="codeemit">Code Emission</a>
+</div>
+
+<div class="doc_text">
+
+<p>The code emission step of code generation is responsible for lowering from
+the code generator abstractions (like <a 
+href="#machinefunction">MachineFunction</a>, <a 
+href="#machineinstr">MachineInstr</a>, etc) down
+to the abstractions used by the MC layer (<a href="#mcinst">MCInst</a>, 
+<a href="#mcstreamer">MCStreamer</a>, etc).  This is
+done with a combination of several different classes: the (misnamed)
+target-independent AsmPrinter class, target-specific subclasses of AsmPrinter
+(such as SparcAsmPrinter), and the TargetLoweringObjectFile class.</p>
+
+<p>Since the MC layer works at the level of abstraction of object files, it
+doesn't have a notion of functions, global variables etc.  Instead, it thinks
+about labels, directives, and instructions.  A key class used at this time is
+the MCStreamer class.  This is an abstract API that is implemented in different
+ways (e.g. to output a .s file, output an ELF .o file, etc) that is effectively
+an "assembler API".  MCStreamer has one method per directive, such as EmitLabel,
+EmitSymbolAttribute, SwitchSection, etc, which directly correspond to assembly
+level directives.
+</p>
+
+<p>If you are interested in implementing a code generator for a target, there
+are three important things that you have to implement for your target:</p>
+
+<ol>
+<li>First, you need a subclass of AsmPrinter for your target.  This class
+implements the general lowering process converting MachineFunction's into MC
+label constructs.  The AsmPrinter base class provides a number of useful methods
+and routines, and also allows you to override the lowering process in some
+important ways.  You should get much of the lowering for free if you are
+implementing an ELF, COFF, or MachO target, because the TargetLoweringObjectFile
+class implements much of the common logic.</li>
+
+<li>Second, you need to implement an instruction printer for your target.  The
+instruction printer takes an <a href="#mcinst">MCInst</a> and renders it to a
+raw_ostream as text.  Most of this is automatically generated from the .td file
+(when you specify something like "<tt>add $dst, $src1, $src2</tt>" in the
+instructions), but you need to implement routines to print operands.</li>
+
+<li>Third, you need to implement code that lowers a <a
+href="#machineinstr">MachineInstr</a> to an MCInst, usually implemented in
+"&lt;target&gt;MCInstLower.cpp".  This lowering process is often target
+specific, and is responsible for turning jump table entries, constant pool
+indices, global variable addresses, etc into MCLabels as appropriate.  This
+translation layer is also responsible for expanding pseudo ops used by the code
+generator into the actual machine instructions they correspond to. The MCInsts
+that are generated by this are fed into the instruction printer or the encoder.
+</li>
+
+</ol>
+
+<p>Finally, at your choosing, you can also implement an subclass of
+MCCodeEmitter which lowers MCInst's into machine code bytes and relocations.
+This is important if you want to support direct .o file emission, or would like
+to implement an assembler for your target.</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="nativeassembler">Implementing a Native Assembler</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Though you're probably reading this because you want to write or maintain a
+compiler backend, LLVM also fully supports building a native assemblers too.
+We've tried hard to automate the generation of the assembler from the .td files
+(in particular the instruction syntax and encodings), which means that a large
+part of the manual and repetitive data entry can be factored and shared with the
+compiler.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection" id="na_instparsing">Instruction Parsing</div>
+
+<div class="doc_text"><p>To Be Written</p></div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection" id="na_instaliases">
+  Instruction Alias Processing
+</div>
+
+<div class="doc_text">
+<p>Once the instruction is parsed, it enters the MatchInstructionImpl function.
+The MatchInstructionImpl function performs alias processing and then does
+actual matching.</p>
+
+<p>Alias processing is the phase that canonicalizes different lexical forms of
+the same instructions down to one representation.  There are several different
+kinds of alias that are possible to implement and they are listed below in the
+order that they are processed (which is in order from simplest/weakest to most
+complex/powerful).  Generally you want to use the first alias mechanism that
+meets the needs of your instruction, because it will allow a more concise
+description.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">Mnemonic Aliases</div>
+
+<div class="doc_text">
+
+<p>The first phase of alias processing is simple instruction mnemonic
+remapping for classes of instructions which are allowed with two different
+mnemonics.  This phase is a simple and unconditionally remapping from one input
+mnemonic to one output mnemonic.  It isn't possible for this form of alias to
+look at the operands at all, so the remapping must apply for all forms of a
+given mnemonic.  Mnemonic aliases are defined simply, for example X86 has:
+</p>
+
+<div class="doc_code">
+<pre>
+def : MnemonicAlias&lt;"cbw",     "cbtw"&gt;;
+def : MnemonicAlias&lt;"smovq",   "movsq"&gt;;
+def : MnemonicAlias&lt;"fldcww",  "fldcw"&gt;;
+def : MnemonicAlias&lt;"fucompi", "fucomip"&gt;;
+def : MnemonicAlias&lt;"ud2a",    "ud2"&gt;;
+</pre>
+</div>
+
+<p>... and many others.  With a MnemonicAlias definition, the mnemonic is
+remapped simply and directly.  Though MnemonicAlias's can't look at any aspect
+of the instruction (such as the operands) they can depend on global modes (the
+same ones supported by the matcher), through a Requires clause:</p>
+
+<div class="doc_code">
+<pre>
+def : MnemonicAlias&lt;"pushf", "pushfq"&gt;, Requires&lt;[In64BitMode]&gt;;
+def : MnemonicAlias&lt;"pushf", "pushfl"&gt;, Requires&lt;[In32BitMode]&gt;;
+</pre>
+</div>
+
+<p>In this example, the mnemonic gets mapped into different a new one depending
+on the current instruction set.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">Instruction Aliases</div>
+
+<div class="doc_text">
+
+<p>The most general phase of alias processing occurs while matching is
+happening: it provides new forms for the matcher to match along with a specific
+instruction to generate.  An instruction alias has two parts: the string to
+match and the instruction to generate.  For example:
+</p>
+
+<div class="doc_code">
+<pre>
+def : InstAlias&lt;"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8  :$src)&gt;;
+def : InstAlias&lt;"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)&gt;;
+def : InstAlias&lt;"movsx $src, $dst", (MOVSX32rr8  GR32:$dst, GR8  :$src)&gt;;
+def : InstAlias&lt;"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16 :$src)&gt;;
+def : InstAlias&lt;"movsx $src, $dst", (MOVSX64rr8  GR64:$dst, GR8  :$src)&gt;;
+def : InstAlias&lt;"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16 :$src)&gt;;
+def : InstAlias&lt;"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32 :$src)&gt;;
+</pre>
+</div>
+
+<p>This shows a powerful example of the instruction aliases, matching the
+same mnemonic in multiple different ways depending on what operands are present
+in the assembly.  The result of instruction aliases can include operands in a
+different order than the destination instruction, and can use an input
+multiple times, for example:</p>
+
+<div class="doc_code">
+<pre>
+def : InstAlias&lt;"clrb $reg", (XOR8rr  GR8 :$reg, GR8 :$reg)&gt;;
+def : InstAlias&lt;"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)&gt;;
+def : InstAlias&lt;"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)&gt;;
+def : InstAlias&lt;"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)&gt;;
+</pre>
+</div>
+
+<p>This example also shows that tied operands are only listed once.  In the X86
+backend, XOR8rr has two input GR8's and one output GR8 (where an input is tied
+to the output).  InstAliases take a flattened operand list without duplicates
+for tied operands.  The result of an instruction alias can also use immediates
+and fixed physical registers which are added as simple immediate operands in the
+result, for example:</p>
+
+<div class="doc_code">
+<pre>
+// Fixed Immediate operand.
+def : InstAlias&lt;"aad", (AAD8i8 10)&gt;;
+
+// Fixed register operand.
+def : InstAlias&lt;"fcomi", (COM_FIr ST1)&gt;;
+
+// Simple alias.
+def : InstAlias&lt;"fcomi $reg", (COM_FIr RST:$reg)&gt;;
+</pre>
+</div>
+
+
+<p>Instruction aliases can also have a Requires clause to make them
+subtarget specific.</p>
+
+</div>
+
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection" id="na_matching">Instruction Matching</div>
+
+<div class="doc_text"><p>To Be Written</p></div>
+
+
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="targetimpls">Target-specific Implementation Notes</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section of the document explains features or design decisions that are
+   specific to the code generator for a particular target.  First we start
+   with a table that summarizes what features are supported by each target.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="targetfeatures">Target Feature Matrix</a>
+</div>
+
+<div class="doc_text">
+
+<p>Note that this table does not include the C backend or Cpp backends, since
+they do not use the target independent code generator infrastructure.  It also
+doesn't list features that are not supported fully by any target yet.  It
+considers a feature to be supported if at least one subtarget supports it.  A
+feature being supported means that it is useful and works for most cases, it
+does not indicate that there are zero known bugs in the implementation.  Here
+is the key:</p>
+
+
+<table border="1" cellspacing="0">
+  <tr>
+    <th>Unknown</th>
+    <th>No support</th>
+    <th>Partial Support</th>
+    <th>Complete Support</th>
+  </tr>
+  <tr>
+    <td class="unknown"></td>
+    <td class="no"></td>
+    <td class="partial"></td>
+    <td class="yes"></td>
+  </tr>
+</table>
+
+<p>Here is the table:</p>
+
+<table width="689" border="1" cellspacing="0">
+<tr><td></td>
+<td colspan="13" align="center" style="background-color:#ffc">Target</td>
+</tr>
+  <tr>
+    <th>Feature</th>
+    <th>ARM</th>
+    <th>Alpha</th>
+    <th>Blackfin</th>
+    <th>CellSPU</th>
+    <th>MBlaze</th>
+    <th>MSP430</th>
+    <th>Mips</th>
+    <th>PTX</th>
+    <th>PowerPC</th>
+    <th>Sparc</th>
+    <th>SystemZ</th>
+    <th>X86</th>
+    <th>XCore</th>
+  </tr>
+
+<tr>
+  <td><a href="#feat_reliable">is generally reliable</a></td>
+  <td class="yes"></td> <!-- ARM -->
+  <td class="unknown"></td> <!-- Alpha -->
+  <td class="no"></td> <!-- Blackfin -->
+  <td class="no"></td> <!-- CellSPU -->
+  <td class="no"></td> <!-- MBlaze -->
+  <td class="unknown"></td> <!-- MSP430 -->
+  <td class="no"></td> <!-- Mips -->
+  <td class="no"></td> <!-- PTX -->
+  <td class="yes"></td> <!-- PowerPC -->
+  <td class="yes"></td> <!-- Sparc -->
+  <td class="unknown"></td> <!-- SystemZ -->
+  <td class="yes"></td> <!-- X86 -->
+  <td class="unknown"></td> <!-- XCore -->
+</tr>
+
+<tr>
+  <td><a href="#feat_asmparser">assembly parser</a></td>
+  <td class="no"></td> <!-- ARM -->
+  <td class="no"></td> <!-- Alpha -->
+  <td class="no"></td> <!-- Blackfin -->
+  <td class="no"></td> <!-- CellSPU -->
+  <td class="yes"></td> <!-- MBlaze -->
+  <td class="no"></td> <!-- MSP430 -->
+  <td class="no"></td> <!-- Mips -->
+  <td class="no"></td> <!-- PTX -->
+  <td class="no"></td> <!-- PowerPC -->
+  <td class="no"></td> <!-- Sparc -->
+  <td class="no"></td> <!-- SystemZ -->
+  <td class="yes"></td> <!-- X86 -->
+  <td class="no"></td> <!-- XCore -->
+</tr>
+
+<tr>
+  <td><a href="#feat_disassembler">disassembler</a></td>
+  <td class="yes"></td> <!-- ARM -->
+  <td class="no"></td> <!-- Alpha -->
+  <td class="no"></td> <!-- Blackfin -->
+  <td class="no"></td> <!-- CellSPU -->
+  <td class="yes"></td> <!-- MBlaze -->
+  <td class="no"></td> <!-- MSP430 -->
+  <td class="no"></td> <!-- Mips -->
+  <td class="no"></td> <!-- PTX -->
+  <td class="no"></td> <!-- PowerPC -->
+  <td class="no"></td> <!-- Sparc -->
+  <td class="no"></td> <!-- SystemZ -->
+  <td class="yes"></td> <!-- X86 -->
+  <td class="no"></td> <!-- XCore -->
+</tr>
+
+<tr>
+  <td><a href="#feat_inlineasm">inline asm</a></td>
+  <td class="yes"></td> <!-- ARM -->
+  <td class="unknown"></td> <!-- Alpha -->
+  <td class="yes"></td> <!-- Blackfin -->
+  <td class="no"></td> <!-- CellSPU -->
+  <td class="yes"></td> <!-- MBlaze -->
+  <td class="unknown"></td> <!-- MSP430 -->
+  <td class="no"></td> <!-- Mips -->
+  <td class="unknown"></td> <!-- PTX -->
+  <td class="yes"></td> <!-- PowerPC -->
+  <td class="unknown"></td> <!-- Sparc -->
+  <td class="unknown"></td> <!-- SystemZ -->
+  <td class="yes"><a href="#feat_inlineasm_x86">*</a></td> <!-- X86 -->
+  <td class="unknown"></td> <!-- XCore -->
+</tr>
+
+<tr>
+  <td><a href="#feat_jit">jit</a></td>
+  <td class="partial"><a href="#feat_jit_arm">*</a></td> <!-- ARM -->
+  <td class="no"></td> <!-- Alpha -->
+  <td class="no"></td> <!-- Blackfin -->
+  <td class="no"></td> <!-- CellSPU -->
+  <td class="no"></td> <!-- MBlaze -->
+  <td class="unknown"></td> <!-- MSP430 -->
+  <td class="no"></td> <!-- Mips -->
+  <td class="unknown"></td> <!-- PTX -->
+  <td class="yes"></td> <!-- PowerPC -->
+  <td class="unknown"></td> <!-- Sparc -->
+  <td class="unknown"></td> <!-- SystemZ -->
+  <td class="yes"></td> <!-- X86 -->
+  <td class="unknown"></td> <!-- XCore -->
+</tr>
+
+<tr>
+  <td><a href="#feat_objectwrite">.o&nbsp;file writing</a></td>
+  <td class="no"></td> <!-- ARM -->
+  <td class="no"></td> <!-- Alpha -->
+  <td class="no"></td> <!-- Blackfin -->
+  <td class="no"></td> <!-- CellSPU -->
+  <td class="yes"></td> <!-- MBlaze -->
+  <td class="no"></td> <!-- MSP430 -->
+  <td class="no"></td> <!-- Mips -->
+  <td class="no"></td> <!-- PTX -->
+  <td class="no"></td> <!-- PowerPC -->
+  <td class="no"></td> <!-- Sparc -->
+  <td class="no"></td> <!-- SystemZ -->
+  <td class="yes"></td> <!-- X86 -->
+  <td class="no"></td> <!-- XCore -->
+</tr>
+
+<tr>
+  <td><a href="#feat_tailcall">tail calls</a></td>
+  <td class="yes"></td> <!-- ARM -->
+  <td class="unknown"></td> <!-- Alpha -->
+  <td class="no"></td> <!-- Blackfin -->
+  <td class="no"></td> <!-- CellSPU -->
+  <td class="no"></td> <!-- MBlaze -->
+  <td class="unknown"></td> <!-- MSP430 -->
+  <td class="no"></td> <!-- Mips -->
+  <td class="unknown"></td> <!-- PTX -->
+  <td class="yes"></td> <!-- PowerPC -->
+  <td class="unknown"></td> <!-- Sparc -->
+  <td class="unknown"></td> <!-- SystemZ -->
+  <td class="yes"></td> <!-- X86 -->
+  <td class="unknown"></td> <!-- XCore -->
+</tr>
+
+
+</table>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection" id="feat_reliable">Is Generally Reliable</div>
+
+<div class="doc_text">
+<p>This box indicates whether the target is considered to be production quality.
+This indicates that the target has been used as a static compiler to
+compile large amounts of code by a variety of different people and is in
+continuous use.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection" id="feat_asmparser">Assembly Parser</div>
+
+<div class="doc_text">
+<p>This box indicates whether the target supports parsing target specific .s
+files by implementing the MCAsmParser interface.  This is required for llvm-mc
+to be able to act as a native assembler and is required for inline assembly
+support in the native .o file writer.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection" id="feat_disassembler">Disassembler</div>
+
+<div class="doc_text">
+<p>This box indicates whether the target supports the MCDisassembler API for
+disassembling machine opcode bytes into MCInst's.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection" id="feat_inlineasm">Inline Asm</div>
+
+<div class="doc_text">
+<p>This box indicates whether the target supports most popular inline assembly
+constraints and modifiers.</p>
+
+<p id="feat_inlineasm_x86">X86 lacks reliable support for inline assembly
+constraints relating to the X86 floating point stack.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection" id="feat_jit">JIT Support</div>
+
+<div class="doc_text">
+<p>This box indicates whether the target supports the JIT compiler through
+the ExecutionEngine interface.</p>
+
+<p id="feat_jit_arm">The ARM backend has basic support for integer code
+in ARM codegen mode, but lacks NEON and full Thumb support.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection" id="feat_objectwrite">.o File Writing</div>
+
+<div class="doc_text">
+
+<p>This box indicates whether the target supports writing .o files (e.g. MachO,
+ELF, and/or COFF) files directly from the target.  Note that the target also
+must include an assembly parser and general inline assembly support for full
+inline assembly support in the .o writer.</p>
+
+<p>Targets that don't support this feature can obviously still write out .o
+files, they just rely on having an external assembler to translate from a .s
+file to a .o file (as is the case for many C compilers).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection" id="feat_tailcall">Tail Calls</div>
+
+<div class="doc_text">
+
+<p>This box indicates whether the target supports guaranteed tail calls.  These
+are calls marked "<a href="LangRef.html#i_call">tail</a>" and use the fastcc
+calling convention.  Please see the <a href="#tailcallopt">tail call section
+more more details</a>.</p>
+
+</div>
+
+
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="tailcallopt">Tail call optimization</a>
+</div>
+
+<div class="doc_text">
+
+<p>Tail call optimization, callee reusing the stack of the caller, is currently
+   supported on x86/x86-64 and PowerPC. It is performed if:</p>
+
+<ul>
+  <li>Caller and callee have the calling convention <tt>fastcc</tt> or
+       <tt>cc 10</tt> (GHC call convention).</li>
+
+  <li>The call is a tail call - in tail position (ret immediately follows call
+      and ret uses value of call or is void).</li>
+
+  <li>Option <tt>-tailcallopt</tt> is enabled.</li>
+
+  <li>Platform specific constraints are met.</li>
+</ul>
+
+<p>x86/x86-64 constraints:</p>
+
+<ul>
+  <li>No variable argument lists are used.</li>
+
+  <li>On x86-64 when generating GOT/PIC code only module-local calls (visibility
+  = hidden or protected) are supported.</li>
+</ul>
+
+<p>PowerPC constraints:</p>
+
+<ul>
+  <li>No variable argument lists are used.</li>
+
+  <li>No byval parameters are used.</li>
+
+  <li>On ppc32/64 GOT/PIC only module-local calls (visibility = hidden or protected) are supported.</li>
+</ul>
+
+<p>Example:</p>
+
+<p>Call as <tt>llc -tailcallopt test.ll</tt>.</p>
+
+<div class="doc_code">
+<pre>
+declare fastcc i32 @tailcallee(i32 inreg %a1, i32 inreg %a2, i32 %a3, i32 %a4)
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
+  %l1 = add i32 %in1, %in2
+  %tmp = tail call fastcc i32 @tailcallee(i32 %in1 inreg, i32 %in2 inreg, i32 %in1, i32 %l1)
+  ret i32 %tmp
+}
+</pre>
+</div>
+
+<p>Implications of <tt>-tailcallopt</tt>:</p>
+
+<p>To support tail call optimization in situations where the callee has more
+   arguments than the caller a 'callee pops arguments' convention is used. This
+   currently causes each <tt>fastcc</tt> call that is not tail call optimized
+   (because one or more of above constraints are not met) to be followed by a
+   readjustment of the stack. So performance might be worse in such cases.</p>
+
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="sibcallopt">Sibling call optimization</a>
+</div>
+
+<div class="doc_text">
+
+<p>Sibling call optimization is a restricted form of tail call optimization.
+   Unlike tail call optimization described in the previous section, it can be
+   performed automatically on any tail calls when <tt>-tailcallopt</tt> option
+   is not specified.</p>
+
+<p>Sibling call optimization is currently performed on x86/x86-64 when the
+   following constraints are met:</p>
+
+<ul>
+  <li>Caller and callee have the same calling convention. It can be either
+      <tt>c</tt> or <tt>fastcc</tt>.
+
+  <li>The call is a tail call - in tail position (ret immediately follows call
+      and ret uses value of call or is void).</li>
+
+  <li>Caller and callee have matching return type or the callee result is not
+      used.
+
+  <li>If any of the callee arguments are being passed in stack, they must be
+      available in caller's own incoming argument stack and the frame offsets
+      must be the same.
+</ul>
+
+<p>Example:</p>
+<div class="doc_code">
+<pre>
+declare i32 @bar(i32, i32)
+
+define i32 @foo(i32 %a, i32 %b, i32 %c) {
+entry:
+  %0 = tail call i32 @bar(i32 %a, i32 %b)
+  ret i32 %0
+}
+</pre>
+</div>
+
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="x86">The X86 backend</a>
+</div>
+
+<div class="doc_text">
+
+<p>The X86 code generator lives in the <tt>lib/Target/X86</tt> directory.  This
+   code generator is capable of targeting a variety of x86-32 and x86-64
+   processors, and includes support for ISA extensions such as MMX and SSE.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="x86_tt">X86 Target Triples supported</a>
+</div>
+
+<div class="doc_text">
+
+<p>The following are the known target triples that are supported by the X86
+   backend.  This is not an exhaustive list, and it would be useful to add those
+   that people test.</p>
+
+<ul>
+  <li><b>i686-pc-linux-gnu</b> &mdash; Linux</li>
+
+  <li><b>i386-unknown-freebsd5.3</b> &mdash; FreeBSD 5.3</li>
+
+  <li><b>i686-pc-cygwin</b> &mdash; Cygwin on Win32</li>
+
+  <li><b>i686-pc-mingw32</b> &mdash; MingW on Win32</li>
+
+  <li><b>i386-pc-mingw32msvc</b> &mdash; MingW crosscompiler on Linux</li>
+
+  <li><b>i686-apple-darwin*</b> &mdash; Apple Darwin on X86</li>
+
+  <li><b>x86_64-unknown-linux-gnu</b> &mdash; Linux</li>
+</ul>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="x86_cc">X86 Calling Conventions supported</a>
+</div>
+
+
+<div class="doc_text">
+
+<p>The following target-specific calling conventions are known to backend:</p>
+
+<ul>
+  <li><b>x86_StdCall</b> &mdash; stdcall calling convention seen on Microsoft
+      Windows platform (CC ID = 64).</li>
+
+  <li><b>x86_FastCall</b> &mdash; fastcall calling convention seen on Microsoft
+      Windows platform (CC ID = 65).</li>
+</ul>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="x86_memory">Representing X86 addressing modes in MachineInstrs</a>
+</div>
+
+<div class="doc_text">
+
+<p>The x86 has a very flexible way of accessing memory.  It is capable of
+   forming memory addresses of the following expression directly in integer
+   instructions (which use ModR/M addressing):</p>
+
+<div class="doc_code">
+<pre>
+SegmentReg: Base + [1,2,4,8] * IndexReg + Disp32
+</pre>
+</div>
+
+<p>In order to represent this, LLVM tracks no less than 5 operands for each
+   memory operand of this form.  This means that the "load" form of
+   '<tt>mov</tt>' has the following <tt>MachineOperand</tt>s in this order:</p>
+
+<div class="doc_code">
+<pre>
+Index:        0     |    1        2       3           4          5
+Meaning:   DestReg, | BaseReg,  Scale, IndexReg, Displacement Segment
+OperandTy: VirtReg, | VirtReg, UnsImm, VirtReg,   SignExtImm  PhysReg
+</pre>
+</div>
+
+<p>Stores, and all other instructions, treat the four memory operands in the
+   same way and in the same order.  If the segment register is unspecified
+   (regno = 0), then no segment override is generated.  "Lea" operations do not
+   have a segment register specified, so they only have 4 operands for their
+   memory reference.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="x86_memory">X86 address spaces supported</a>
+</div>
+
+<div class="doc_text">
+
+<p>x86 has an experimental feature which provides
+   the ability to perform loads and stores to different address spaces
+   via the x86 segment registers.  A segment override prefix byte on an
+   instruction causes the instruction's memory access to go to the specified
+   segment.  LLVM address space 0 is the default address space, which includes
+   the stack, and any unqualified memory accesses in a program.  Address spaces
+   1-255 are currently reserved for user-defined code.  The GS-segment is
+   represented by address space 256, while the FS-segment is represented by 
+   address space 257. Other x86 segments have yet to be allocated address space
+   numbers.</p>
+
+<p>While these address spaces may seem similar to TLS via the
+   <tt>thread_local</tt> keyword, and often use the same underlying hardware,
+   there are some fundamental differences.</p>
+
+<p>The <tt>thread_local</tt> keyword applies to global variables and
+   specifies that they are to be allocated in thread-local memory. There are
+   no type qualifiers involved, and these variables can be pointed to with
+   normal pointers and accessed with normal loads and stores.
+   The <tt>thread_local</tt> keyword is target-independent at the LLVM IR
+   level (though LLVM doesn't yet have implementations of it for some
+   configurations).<p>
+
+<p>Special address spaces, in contrast, apply to static types. Every
+   load and store has a particular address space in its address operand type,
+   and this is what determines which address space is accessed.
+   LLVM ignores these special address space qualifiers on global variables,
+   and does not provide a way to directly allocate storage in them.
+   At the LLVM IR level, the behavior of these special address spaces depends
+   in part on the underlying OS or runtime environment, and they are specific
+   to x86 (and LLVM doesn't yet handle them correctly in some cases).</p>
+
+<p>Some operating systems and runtime environments use (or may in the future
+   use) the FS/GS-segment registers for various low-level purposes, so care
+   should be taken when considering them.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="x86_names">Instruction naming</a>
+</div>
+
+<div class="doc_text">
+
+<p>An instruction name consists of the base name, a default operand size, and a
+   a character per operand with an optional special size. For example:</p>
+
+<div class="doc_code">
+<pre>
+ADD8rr      -&gt; add, 8-bit register, 8-bit register
+IMUL16rmi   -&gt; imul, 16-bit register, 16-bit memory, 16-bit immediate
+IMUL16rmi8  -&gt; imul, 16-bit register, 16-bit memory, 8-bit immediate
+MOVSX32rm16 -&gt; movsx, 32-bit register, 16-bit memory
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ppc">The PowerPC backend</a>
+</div>
+
+<div class="doc_text">
+
+<p>The PowerPC code generator lives in the lib/Target/PowerPC directory.  The
+   code generation is retargetable to several variations or <i>subtargets</i> of
+   the PowerPC ISA; including ppc32, ppc64 and altivec.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ppc_abi">LLVM PowerPC ABI</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM follows the AIX PowerPC ABI, with two deviations. LLVM uses a PC
+   relative (PIC) or static addressing for accessing global values, so no TOC
+   (r2) is used. Second, r31 is used as a frame pointer to allow dynamic growth
+   of a stack frame.  LLVM takes advantage of having no TOC to provide space to
+   save the frame pointer in the PowerPC linkage area of the caller frame.
+   Other details of PowerPC ABI can be found at <a href=
+   "http://developer.apple.com/documentation/DeveloperTools/Conceptual/LowLevelABI/Articles/32bitPowerPC.html"
+   >PowerPC ABI.</a> Note: This link describes the 32 bit ABI.  The 64 bit ABI
+   is similar except space for GPRs are 8 bytes wide (not 4) and r13 is reserved
+   for system use.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ppc_frame">Frame Layout</a>
+</div>
+
+<div class="doc_text">
+
+<p>The size of a PowerPC frame is usually fixed for the duration of a
+   function's invocation.  Since the frame is fixed size, all references
+   into the frame can be accessed via fixed offsets from the stack pointer.  The
+   exception to this is when dynamic alloca or variable sized arrays are
+   present, then a base pointer (r31) is used as a proxy for the stack pointer
+   and stack pointer is free to grow or shrink.  A base pointer is also used if
+   llvm-gcc is not passed the -fomit-frame-pointer flag. The stack pointer is
+   always aligned to 16 bytes, so that space allocated for altivec vectors will
+   be properly aligned.</p>
+
+<p>An invocation frame is laid out as follows (low memory at top);</p>
+
+<table class="layout">
+  <tr>
+    <td>Linkage<br><br></td>
+  </tr>
+  <tr>
+    <td>Parameter area<br><br></td>
+  </tr>
+  <tr>
+    <td>Dynamic area<br><br></td>
+  </tr>
+  <tr>
+    <td>Locals area<br><br></td>
+  </tr>
+  <tr>
+    <td>Saved registers area<br><br></td>
+  </tr>
+  <tr style="border-style: none hidden none hidden;">
+    <td><br></td>
+  </tr>
+  <tr>
+    <td>Previous Frame<br><br></td>
+  </tr>
+</table>
+
+<p>The <i>linkage</i> area is used by a callee to save special registers prior
+   to allocating its own frame.  Only three entries are relevant to LLVM. The
+   first entry is the previous stack pointer (sp), aka link.  This allows
+   probing tools like gdb or exception handlers to quickly scan the frames in
+   the stack.  A function epilog can also use the link to pop the frame from the
+   stack.  The third entry in the linkage area is used to save the return
+   address from the lr register. Finally, as mentioned above, the last entry is
+   used to save the previous frame pointer (r31.)  The entries in the linkage
+   area are the size of a GPR, thus the linkage area is 24 bytes long in 32 bit
+   mode and 48 bytes in 64 bit mode.</p>
+
+<p>32 bit linkage area</p>
+
+<table class="layout">
+  <tr>
+    <td>0</td>
+    <td>Saved SP (r1)</td>
+  </tr>
+  <tr>
+    <td>4</td>
+    <td>Saved CR</td>
+  </tr>
+  <tr>
+    <td>8</td>
+    <td>Saved LR</td>
+  </tr>
+  <tr>
+    <td>12</td>
+    <td>Reserved</td>
+  </tr>
+  <tr>
+    <td>16</td>
+    <td>Reserved</td>
+  </tr>
+  <tr>
+    <td>20</td>
+    <td>Saved FP (r31)</td>
+  </tr>
+</table>
+
+<p>64 bit linkage area</p>
+
+<table class="layout">
+  <tr>
+    <td>0</td>
+    <td>Saved SP (r1)</td>
+  </tr>
+  <tr>
+    <td>8</td>
+    <td>Saved CR</td>
+  </tr>
+  <tr>
+    <td>16</td>
+    <td>Saved LR</td>
+  </tr>
+  <tr>
+    <td>24</td>
+    <td>Reserved</td>
+  </tr>
+  <tr>
+    <td>32</td>
+    <td>Reserved</td>
+  </tr>
+  <tr>
+    <td>40</td>
+    <td>Saved FP (r31)</td>
+  </tr>
+</table>
+
+<p>The <i>parameter area</i> is used to store arguments being passed to a callee
+   function.  Following the PowerPC ABI, the first few arguments are actually
+   passed in registers, with the space in the parameter area unused.  However,
+   if there are not enough registers or the callee is a thunk or vararg
+   function, these register arguments can be spilled into the parameter area.
+   Thus, the parameter area must be large enough to store all the parameters for
+   the largest call sequence made by the caller.  The size must also be
+   minimally large enough to spill registers r3-r10.  This allows callees blind
+   to the call signature, such as thunks and vararg functions, enough space to
+   cache the argument registers.  Therefore, the parameter area is minimally 32
+   bytes (64 bytes in 64 bit mode.)  Also note that since the parameter area is
+   a fixed offset from the top of the frame, that a callee can access its spilt
+   arguments using fixed offsets from the stack pointer (or base pointer.)</p>
+
+<p>Combining the information about the linkage, parameter areas and alignment. A
+   stack frame is minimally 64 bytes in 32 bit mode and 128 bytes in 64 bit
+   mode.</p>
+
+<p>The <i>dynamic area</i> starts out as size zero.  If a function uses dynamic
+   alloca then space is added to the stack, the linkage and parameter areas are
+   shifted to top of stack, and the new space is available immediately below the
+   linkage and parameter areas.  The cost of shifting the linkage and parameter
+   areas is minor since only the link value needs to be copied.  The link value
+   can be easily fetched by adding the original frame size to the base pointer.
+   Note that allocations in the dynamic space need to observe 16 byte
+   alignment.</p>
+
+<p>The <i>locals area</i> is where the llvm compiler reserves space for local
+   variables.</p>
+
+<p>The <i>saved registers area</i> is where the llvm compiler spills callee
+   saved registers on entry to the callee.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ppc_prolog">Prolog/Epilog</a>
+</div>
+
+<div class="doc_text">
+
+<p>The llvm prolog and epilog are the same as described in the PowerPC ABI, with
+   the following exceptions.  Callee saved registers are spilled after the frame
+   is created.  This allows the llvm epilog/prolog support to be common with
+   other targets.  The base pointer callee saved register r31 is saved in the
+   TOC slot of linkage area.  This simplifies allocation of space for the base
+   pointer and makes it convenient to locate programatically and during
+   debugging.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ppc_dynamic">Dynamic Allocation</a>
+</div>
+
+<div class="doc_text">
+
+<p><i>TODO - More to come.</i></p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/CodingStandards.html b/final/docs/CodingStandards.html
new file mode 100644
index 00000000000..f290712eebf
--- /dev/null
+++ b/final/docs/CodingStandards.html
@@ -0,0 +1,1499 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+  <title>LLVM Coding Standards</title>
+</head>
+<body>
+
+<div class="doc_title">
+  LLVM Coding Standards
+</div>
+
+<ol>
+  <li><a href="#introduction">Introduction</a></li>
+  <li><a href="#mechanicalissues">Mechanical Source Issues</a>
+    <ol>
+      <li><a href="#sourceformating">Source Code Formatting</a>
+        <ol>
+          <li><a href="#scf_commenting">Commenting</a></li>
+          <li><a href="#scf_commentformat">Comment Formatting</a></li>
+          <li><a href="#scf_includes"><tt>#include</tt> Style</a></li>
+          <li><a href="#scf_codewidth">Source Code Width</a></li>
+          <li><a href="#scf_spacestabs">Use Spaces Instead of Tabs</a></li>
+          <li><a href="#scf_indentation">Indent Code Consistently</a></li>
+        </ol></li>
+      <li><a href="#compilerissues">Compiler Issues</a>
+        <ol>
+          <li><a href="#ci_warningerrors">Treat Compiler Warnings Like
+              Errors</a></li>
+          <li><a href="#ci_portable_code">Write Portable Code</a></li>
+          <li><a href="#ci_rtti_exceptions">Do not use RTTI or Exceptions</a></li>
+          <li><a href="#ci_class_struct">Use of <tt>class</tt>/<tt>struct</tt> Keywords</a></li>
+        </ol></li>
+    </ol></li>
+  <li><a href="#styleissues">Style Issues</a>
+    <ol>
+      <li><a href="#macro">The High-Level Issues</a>
+        <ol>
+          <li><a href="#hl_module">A Public Header File <b>is</b> a
+              Module</a></li>
+          <li><a href="#hl_dontinclude"><tt>#include</tt> as Little as Possible</a></li>
+          <li><a href="#hl_privateheaders">Keep "internal" Headers
+              Private</a></li>
+          <li><a href="#hl_earlyexit">Use Early Exits and <tt>continue</tt> to Simplify
+              Code</a></li>
+          <li><a href="#hl_else_after_return">Don't use <tt>else</tt> after a
+              <tt>return</tt></a></li>
+          <li><a href="#hl_predicateloops">Turn Predicate Loops into Predicate
+              Functions</a></li>
+        </ol></li>
+      <li><a href="#micro">The Low-Level Issues</a>
+        <ol>
+          <li><a href="#ll_naming">Name Types, Functions, Variables, and Enumerators Properly</a></li>
+          <li><a href="#ll_assert">Assert Liberally</a></li>
+          <li><a href="#ll_ns_std">Do not use '<tt>using namespace std</tt>'</a></li>
+          <li><a href="#ll_virtual_anch">Provide a virtual method anchor for
+              classes in headers</a></li>
+          <li><a href="#ll_end">Don't evaluate <tt>end()</tt> every time through a
+              loop</a></li>
+          <li><a href="#ll_iostream"><tt>#include &lt;iostream&gt;</tt> is
+              <em>forbidden</em></a></li>
+          <li><a href="#ll_raw_ostream">Use <tt>raw_ostream</tt></a></li>
+          <li><a href="#ll_avoidendl">Avoid <tt>std::endl</tt></a></li>
+        </ol></li>
+        
+      <li><a href="#nano">Microscopic Details</a>
+        <ol>
+          <li><a href="#micro_spaceparen">Spaces Before Parentheses</a></li>
+          <li><a href="#micro_preincrement">Prefer Preincrement</a></li>
+          <li><a href="#micro_namespaceindent">Namespace Indentation</a></li>
+          <li><a href="#micro_anonns">Anonymous Namespaces</a></li>
+        </ol></li>
+
+        
+    </ol></li>
+  <li><a href="#seealso">See Also</a></li>
+</ol>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="introduction">Introduction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document attempts to describe a few coding standards that are being used
+in the LLVM source tree.  Although no coding standards should be regarded as
+absolute requirements to be followed in all instances, coding standards can be
+useful.</p>
+
+<p>This document intentionally does not prescribe fixed standards for religious
+issues such as brace placement and space usage.  For issues like this, follow
+the golden rule:</p>
+
+<blockquote>
+
+<p><b><a name="goldenrule">If you are adding a significant body of source to a
+project, feel free to use whatever style you are most comfortable with.  If you
+are extending, enhancing, or bug fixing already implemented code, use the style
+that is already being used so that the source is uniform and easy to
+follow.</a></b></p>
+
+</blockquote>
+
+<p>The ultimate goal of these guidelines is the increase readability and
+maintainability of our common source base. If you have suggestions for topics to
+be included, please mail them to <a
+href="mailto:sabre@nondot.org">Chris</a>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="mechanicalissues">Mechanical Source Issues</a>
+</div>
+<!-- *********************************************************************** -->
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="sourceformating">Source Code Formatting</a>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="scf_commenting">Commenting</a>
+</div>
+
+<div class="doc_text">
+
+<p>Comments are one critical part of readability and maintainability.  Everyone
+knows they should comment, so should you.  When writing comments, write them as
+English prose, which means they should use proper capitalization, punctuation,
+etc.  Although we all should probably
+comment our code more than we do, there are a few very critical places that
+documentation is very useful:</p>
+
+<b>File Headers</b>
+
+<p>Every source file should have a header on it that describes the basic 
+purpose of the file.  If a file does not have a header, it should not be 
+checked into Subversion.  Most source trees will probably have a standard
+file header format.  The standard format for the LLVM source tree looks like
+this:</p>
+
+<div class="doc_code">
+<pre>
+//===-- llvm/Instruction.h - Instruction class definition -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the Instruction class, which is the
+// base class for all of the VM instructions.
+//
+//===----------------------------------------------------------------------===//
+</pre>
+</div>
+
+<p>A few things to note about this particular format:  The "<tt>-*- C++
+-*-</tt>" string on the first line is there to tell Emacs that the source file
+is a C++ file, not a C file (Emacs assumes <tt>.h</tt> files are C files by default).
+Note that this tag is not necessary in <tt>.cpp</tt> files.  The name of the file is also
+on the first line, along with a very short description of the purpose of the
+file.  This is important when printing out code and flipping though lots of
+pages.</p>
+
+<p>The next section in the file is a concise note that defines the license
+that the file is released under.  This makes it perfectly clear what terms the
+source code can be distributed under and should not be modified in any way.</p>
+
+<p>The main body of the description does not have to be very long in most cases.
+Here it's only two lines.  If an algorithm is being implemented or something
+tricky is going on, a reference to the paper where it is published should be
+included, as well as any notes or "gotchas" in the code to watch out for.</p>
+
+<b>Class overviews</b>
+
+<p>Classes are one fundamental part of a good object oriented design.  As such,
+a class definition should have a comment block that explains what the class is
+used for... if it's not obvious.  If it's so completely obvious your grandma
+could figure it out, it's probably safe to leave it out.  Naming classes
+something sane goes a long ways towards avoiding writing documentation.</p>
+
+
+<b>Method information</b>
+
+<p>Methods defined in a class (as well as any global functions) should also be
+documented properly.  A quick note about what it does and a description of the
+borderline behaviour is all that is necessary here (unless something
+particularly tricky or insidious is going on).  The hope is that people can
+figure out how to use your interfaces without reading the code itself... that is
+the goal metric.</p>
+
+<p>Good things to talk about here are what happens when something unexpected
+happens: does the method return null?  Abort?  Format your hard disk?</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="scf_commentformat">Comment Formatting</a>
+</div>
+
+<div class="doc_text">
+
+<p>In general, prefer C++ style (<tt>//</tt>) comments.  They take less space,
+require less typing, don't have nesting problems, etc.  There are a few cases
+when it is useful to use C style (<tt>/* */</tt>) comments however:</p>
+
+<ol>
+  <li>When writing C code: Obviously if you are writing C code, use C style
+      comments.</li>
+  <li>When writing a header file that may be <tt>#include</tt>d by a C source
+      file.</li>
+  <li>When writing a source file that is used by a tool that only accepts C
+      style comments.</li>
+</ol>
+
+<p>To comment out a large block of code, use <tt>#if 0</tt> and <tt>#endif</tt>.
+These nest properly and are better behaved in general than C style comments.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="scf_includes"><tt>#include</tt> Style</a>
+</div>
+
+<div class="doc_text">
+
+<p>Immediately after the <a href="#scf_commenting">header file comment</a> (and
+include guards if working on a header file), the <a
+href="#hl_dontinclude">minimal</a> list of <tt>#include</tt>s required by the
+file should be listed.  We prefer these <tt>#include</tt>s to be listed in this
+order:</p>
+
+<ol>
+  <li><a href="#mmheader">Main Module Header</a></li>
+  <li><a href="#hl_privateheaders">Local/Private Headers</a></li>
+  <li><tt>llvm/*</tt></li>
+  <li><tt>llvm/Analysis/*</tt></li>
+  <li><tt>llvm/Assembly/*</tt></li>
+  <li><tt>llvm/Bitcode/*</tt></li>
+  <li><tt>llvm/CodeGen/*</tt></li>
+  <li>...</li>
+  <li><tt>Support/*</tt></li>
+  <li><tt>Config/*</tt></li>
+  <li>System <tt>#includes</tt></li>
+</ol>
+
+<p>and each category should be sorted by name.</p>
+
+<p><a name="mmheader">The "Main Module Header"</a> file applies to <tt>.cpp</tt> files
+which implement an interface defined by a <tt>.h</tt> file.  This <tt>#include</tt>
+should always be included <b>first</b> regardless of where it lives on the file
+system.  By including a header file first in the <tt>.cpp</tt> files that implement the
+interfaces, we ensure that the header does not have any hidden dependencies
+which are not explicitly #included in the header, but should be.  It is also a
+form of documentation in the <tt>.cpp</tt> file to indicate where the interfaces it
+implements are defined.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="scf_codewidth">Source Code Width</a>
+</div>
+
+<div class="doc_text">
+
+<p>Write your code to fit within 80 columns of text.  This helps those of us who
+like to print out code and look at your code in an xterm without resizing
+it.</p>
+
+<p>The longer answer is that there must be some limit to the width of the code
+in order to reasonably allow developers to have multiple files side-by-side in
+windows on a modest display.  If you are going to pick a width limit, it is
+somewhat arbitrary but you might as well pick something standard.  Going with
+90 columns (for example) instead of 80 columns wouldn't add any significant 
+value and would be detrimental to printing out code.  Also many other projects
+have standardized on 80 columns, so some people have already configured their
+editors for it (vs something else, like 90 columns).</p>
+
+<p>This is one of many contentious issues in coding standards, but it is not up
+for debate.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="scf_spacestabs">Use Spaces Instead of Tabs</a>
+</div>
+
+<div class="doc_text">
+
+<p>In all cases, prefer spaces to tabs in source files.  People have different
+preferred indentation levels, and different styles of indentation that they
+like; this is fine.  What isn't fine is that different editors/viewers expand
+tabs out to different tab stops.  This can cause your code to look completely
+unreadable, and it is not worth dealing with.</p>
+
+<p>As always, follow the <a href="#goldenrule">Golden Rule</a> above: follow the
+style of existing code if you are modifying and extending it.  If you like four
+spaces of indentation, <b>DO NOT</b> do that in the middle of a chunk of code
+with two spaces of indentation.  Also, do not reindent a whole source file: it
+makes for incredible diffs that are absolutely worthless.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="scf_indentation">Indent Code Consistently</a>
+</div>
+
+<div class="doc_text">
+
+<p>Okay, in your first year of programming you were told that indentation is
+important.  If you didn't believe and internalize this then, now is the time.
+Just do it.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="compilerissues">Compiler Issues</a>
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ci_warningerrors">Treat Compiler Warnings Like Errors</a>
+</div>
+
+<div class="doc_text">
+
+<p>If your code has compiler warnings in it, something is wrong &mdash; you
+aren't casting values correctly, your have "questionable" constructs in your
+code, or you are doing something legitimately wrong.  Compiler warnings can
+cover up legitimate errors in output and make dealing with a translation unit
+difficult.</p>
+
+<p>It is not possible to prevent all warnings from all compilers, nor is it
+desirable.  Instead, pick a standard compiler (like <tt>gcc</tt>) that provides
+a good thorough set of warnings, and stick to it.  At least in the case of
+<tt>gcc</tt>, it is possible to work around any spurious errors by changing the
+syntax of the code slightly.  For example, a warning that annoys me occurs when
+I write code like this:</p>
+
+<div class="doc_code">
+<pre>
+if (V = getValue()) {
+  ...
+}
+</pre>
+</div>
+
+<p><tt>gcc</tt> will warn me that I probably want to use the <tt>==</tt>
+operator, and that I probably mistyped it.  In most cases, I haven't, and I
+really don't want the spurious errors.  To fix this particular problem, I
+rewrite the code like this:</p>
+
+<div class="doc_code">
+<pre>
+if ((V = getValue())) {
+  ...
+}
+</pre>
+</div>
+
+<p>which shuts <tt>gcc</tt> up.  Any <tt>gcc</tt> warning that annoys you can
+be fixed by massaging the code appropriately.</p>
+
+<p>These are the <tt>gcc</tt> warnings that I prefer to enable:</p>
+
+<div class="doc_code">
+<pre>
+-Wall -Winline -W -Wwrite-strings -Wno-unused
+</pre>
+</div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ci_portable_code">Write Portable Code</a>
+</div>
+
+<div class="doc_text">
+
+<p>In almost all cases, it is possible and within reason to write completely
+portable code.  If there are cases where it isn't possible to write portable
+code, isolate it behind a well defined (and well documented) interface.</p>
+
+<p>In practice, this means that you shouldn't assume much about the host
+compiler, and Visual Studio tends to be the lowest common denominator.
+If advanced features are used, they should only be an implementation detail of 
+a library which has a simple exposed API, and preferably be buried in 
+libSystem.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+<a name="ci_rtti_exceptions">Do not use RTTI or Exceptions</a>
+</div>
+<div class="doc_text">
+
+<p>In an effort to reduce code and executable size, LLVM does not use RTTI
+(e.g. <tt>dynamic_cast&lt;&gt;</tt>) or exceptions.  These two language features
+violate the general C++ principle of <i>"you only pay for what you use"</i>,
+causing executable bloat even if exceptions are never used in the code base, or
+if RTTI is never used for a class.  Because of this, we turn them off globally
+in the code.</p>
+
+<p>That said, LLVM does make extensive use of a hand-rolled form of RTTI that
+use templates like <a href="ProgrammersManual.html#isa"><tt>isa&lt;&gt;</tt>,
+<tt>cast&lt;&gt;</tt>, and <tt>dyn_cast&lt;&gt;</tt></a>.  This form of RTTI is
+opt-in and can be added to any class.  It is also substantially more efficient
+than <tt>dynamic_cast&lt;&gt;</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+<a name="ci_class_struct">Use of <tt>class</tt> and <tt>struct</tt> Keywords</a>
+</div>
+<div class="doc_text">
+
+<p>In C++, the <tt>class</tt> and <tt>struct</tt> keywords can be used almost
+interchangeably. The only difference is when they are used to declare a class:
+<tt>class</tt> makes all members private by default while <tt>struct</tt> makes
+all members public by default.</p>
+
+<p>Unfortunately, not all compilers follow the rules and some will generate
+different symbols based on whether <tt>class</tt> or <tt>struct</tt> was used to
+declare the symbol.  This can lead to problems at link time.</p> 
+
+<p>So, the rule for LLVM is to always use the <tt>class</tt> keyword, unless
+<b>all</b> members are public and the type is a C++
+<a href="http://en.wikipedia.org/wiki/Plain_old_data_structure">POD</a> type, in
+which case <tt>struct</tt> is allowed.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="styleissues">Style Issues</a>
+</div>
+<!-- *********************************************************************** -->
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="macro">The High-Level Issues</a>
+</div>
+<!-- ======================================================================= -->
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="hl_module">A Public Header File <b>is</b> a Module</a>
+</div>
+
+<div class="doc_text">
+
+<p>C++ doesn't do too well in the modularity department.  There is no real
+encapsulation or data hiding (unless you use expensive protocol classes), but it
+is what we have to work with.  When you write a public header file (in the LLVM
+source tree, they live in the top level "<tt>include</tt>" directory), you are
+defining a module of functionality.</p>
+
+<p>Ideally, modules should be completely independent of each other, and their
+header files should only <tt>#include</tt> the absolute minimum number of
+headers possible. A module is not just a class, a function, or a
+namespace: <a href="http://www.cuj.com/articles/2000/0002/0002c/0002c.htm">it's
+a collection of these</a> that defines an interface.  This interface may be
+several functions, classes, or data structures, but the important issue is how
+they work together.</p>
+
+<p>In general, a module should be implemented by one or more <tt>.cpp</tt>
+files.  Each of these <tt>.cpp</tt> files should include the header that defines
+their interface first.  This ensures that all of the dependences of the module
+header have been properly added to the module header itself, and are not
+implicit.  System headers should be included after user headers for a
+translation unit.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="hl_dontinclude"><tt>#include</tt> as Little as Possible</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>#include</tt> hurts compile time performance.  Don't do it unless you
+have to, especially in header files.</p>
+
+<p>But wait! Sometimes you need to have the definition of a class to use it, or
+to inherit from it.  In these cases go ahead and <tt>#include</tt> that header
+file.  Be aware however that there are many cases where you don't need to have
+the full definition of a class.  If you are using a pointer or reference to a
+class, you don't need the header file.  If you are simply returning a class
+instance from a prototyped function or method, you don't need it.  In fact, for
+most cases, you simply don't need the definition of a class. And not
+<tt>#include</tt>'ing speeds up compilation.</p>
+
+<p>It is easy to try to go too overboard on this recommendation, however.  You
+<b>must</b> include all of the header files that you are using &mdash; you can
+include them either directly or indirectly (through another header file).  To
+make sure that you don't accidentally forget to include a header file in your
+module header, make sure to include your module header <b>first</b> in the
+implementation file (as mentioned above).  This way there won't be any hidden
+dependencies that you'll find out about later.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="hl_privateheaders">Keep "Internal" Headers Private</a>
+</div>
+
+<div class="doc_text">
+
+<p>Many modules have a complex implementation that causes them to use more than
+one implementation (<tt>.cpp</tt>) file.  It is often tempting to put the
+internal communication interface (helper classes, extra functions, etc) in the
+public module header file.  Don't do this!</p>
+
+<p>If you really need to do something like this, put a private header file in
+the same directory as the source files, and include it locally.  This ensures
+that your private interface remains private and undisturbed by outsiders.</p>
+
+<p>Note however, that it's okay to put extra implementation methods in a public
+class itself. Just make them private (or protected) and all is well.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="hl_earlyexit">Use Early Exits and <tt>continue</tt> to Simplify Code</a>
+</div>
+
+<div class="doc_text">
+
+<p>When reading code, keep in mind how much state and how many previous
+decisions have to be remembered by the reader to understand a block of code.
+Aim to reduce indentation where possible when it doesn't make it more difficult
+to understand the code.  One great way to do this is by making use of early
+exits and the <tt>continue</tt> keyword in long loops.  As an example of using
+an early exit from a function, consider this "bad" code:</p>
+
+<div class="doc_code">
+<pre>
+Value *DoSomething(Instruction *I) {
+  if (!isa&lt;TerminatorInst&gt;(I) &amp;&amp;
+      I-&gt;hasOneUse() &amp;&amp; SomeOtherThing(I)) {
+    ... some long code ....
+  }
+  
+  return 0;
+}
+</pre>
+</div>
+
+<p>This code has several problems if the body of the '<tt>if</tt>' is large.
+When you're looking at the top of the function, it isn't immediately clear that
+this <em>only</em> does interesting things with non-terminator instructions, and
+only applies to things with the other predicates.  Second, it is relatively
+difficult to describe (in comments) why these predicates are important because
+the <tt>if</tt> statement makes it difficult to lay out the comments.  Third,
+when you're deep within the body of the code, it is indented an extra level.
+Finally, when reading the top of the function, it isn't clear what the result is
+if the predicate isn't true; you have to read to the end of the function to know
+that it returns null.</p>
+
+<p>It is much preferred to format the code like this:</p>
+
+<div class="doc_code">
+<pre>
+Value *DoSomething(Instruction *I) {
+  // Terminators never need 'something' done to them because ... 
+  if (isa&lt;TerminatorInst&gt;(I))
+    return 0;
+
+  // We conservatively avoid transforming instructions with multiple uses
+  // because goats like cheese.
+  if (!I-&gt;hasOneUse())
+    return 0;
+
+  // This is really just here for example.
+  if (!SomeOtherThing(I))
+    return 0;
+    
+  ... some long code ....
+}
+</pre>
+</div>
+
+<p>This fixes these problems.  A similar problem frequently happens in <tt>for</tt>
+loops.  A silly example is something like this:</p>
+
+<div class="doc_code">
+<pre>
+  for (BasicBlock::iterator II = BB-&gt;begin(), E = BB-&gt;end(); II != E; ++II) {
+    if (BinaryOperator *BO = dyn_cast&lt;BinaryOperator&gt;(II)) {
+      Value *LHS = BO-&gt;getOperand(0);
+      Value *RHS = BO-&gt;getOperand(1);
+      if (LHS != RHS) {
+        ...
+      }
+    }
+  }
+</pre>
+</div>
+
+<p>When you have very, very small loops, this sort of structure is fine. But if
+it exceeds more than 10-15 lines, it becomes difficult for people to read and
+understand at a glance. The problem with this sort of code is that it gets very
+nested very quickly. Meaning that the reader of the code has to keep a lot of
+context in their brain to remember what is going immediately on in the loop,
+because they don't know if/when the <tt>if</tt> conditions will have elses etc.
+It is strongly preferred to structure the loop like this:</p>
+
+<div class="doc_code">
+<pre>
+  for (BasicBlock::iterator II = BB-&gt;begin(), E = BB-&gt;end(); II != E; ++II) {
+    BinaryOperator *BO = dyn_cast&lt;BinaryOperator&gt;(II);
+    if (!BO) continue;
+    
+    Value *LHS = BO-&gt;getOperand(0);
+    Value *RHS = BO-&gt;getOperand(1);
+    if (LHS == RHS) continue;
+
+    ...
+  }
+</pre>
+</div>
+
+<p>This has all the benefits of using early exits for functions: it reduces
+nesting of the loop, it makes it easier to describe why the conditions are true,
+and it makes it obvious to the reader that there is no <tt>else</tt> coming up
+that they have to push context into their brain for.  If a loop is large, this
+can be a big understandability win.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="hl_else_after_return">Don't use <tt>else</tt> after a <tt>return</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>For similar reasons above (reduction of indentation and easier reading),
+please do not use '<tt>else</tt>' or '<tt>else if</tt>' after something that
+interrupts control flow &mdash; like <tt>return</tt>, <tt>break</tt>,
+<tt>continue</tt>, <tt>goto</tt>, etc. For example, this is <em>bad</em>:</p>
+
+<div class="doc_code">
+<pre>
+  case 'J': {
+    if (Signed) {
+      Type = Context.getsigjmp_bufType();
+      if (Type.isNull()) {
+        Error = ASTContext::GE_Missing_sigjmp_buf;
+        return QualType();
+      <b>} else {
+        break;
+      }</b>
+    } else {
+      Type = Context.getjmp_bufType();
+      if (Type.isNull()) {
+        Error = ASTContext::GE_Missing_jmp_buf;
+        return QualType();
+      <b>} else {
+        break;
+      }</b>
+    }
+  }
+  }
+</pre>
+</div>
+
+<p>It is better to write it like this:</p>
+
+<div class="doc_code">
+<pre>
+  case 'J':
+    if (Signed) {
+      Type = Context.getsigjmp_bufType();
+      if (Type.isNull()) {
+        Error = ASTContext::GE_Missing_sigjmp_buf;
+        return QualType();
+      }
+    } else {
+      Type = Context.getjmp_bufType();
+      if (Type.isNull()) {
+        Error = ASTContext::GE_Missing_jmp_buf;
+        return QualType();
+      }
+    }
+    <b>break;</b>
+</pre>
+</div>
+
+<p>Or better yet (in this case) as:</p>
+
+<div class="doc_code">
+<pre>
+  case 'J':
+    if (Signed)
+      Type = Context.getsigjmp_bufType();
+    else
+      Type = Context.getjmp_bufType();
+    
+    if (Type.isNull()) {
+      Error = Signed ? ASTContext::GE_Missing_sigjmp_buf :
+                       ASTContext::GE_Missing_jmp_buf;
+      return QualType();
+    }
+    <b>break;</b>
+</pre>
+</div>
+
+<p>The idea is to reduce indentation and the amount of code you have to keep
+track of when reading the code.</p>
+              
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="hl_predicateloops">Turn Predicate Loops into Predicate Functions</a>
+</div>
+
+<div class="doc_text">
+
+<p>It is very common to write small loops that just compute a boolean value.
+There are a number of ways that people commonly write these, but an example of
+this sort of thing is:</p>
+   
+<div class="doc_code">
+<pre>
+  <b>bool FoundFoo = false;</b>
+  for (unsigned i = 0, e = BarList.size(); i != e; ++i)
+    if (BarList[i]-&gt;isFoo()) {
+      <b>FoundFoo = true;</b>
+      break;
+    }
+    
+  <b>if (FoundFoo) {</b>
+    ...
+  }
+</pre>
+</div>
+
+<p>This sort of code is awkward to write, and is almost always a bad sign.
+Instead of this sort of loop, we strongly prefer to use a predicate function
+(which may be <a href="#micro_anonns">static</a>) that uses
+<a href="#hl_earlyexit">early exits</a> to compute the predicate.  We prefer
+the code to be structured like this:</p>
+
+<div class="doc_code">
+<pre>
+/// ListContainsFoo - Return true if the specified list has an element that is
+/// a foo.
+static bool ListContainsFoo(const std::vector&lt;Bar*&gt; &amp;List) {
+  for (unsigned i = 0, e = List.size(); i != e; ++i)
+    if (List[i]-&gt;isFoo())
+      return true;
+  return false;
+}
+...
+
+  <b>if (ListContainsFoo(BarList)) {</b>
+    ...
+  }
+</pre>
+</div>
+
+<p>There are many reasons for doing this: it reduces indentation and factors out
+code which can often be shared by other code that checks for the same predicate.
+More importantly, it <em>forces you to pick a name</em> for the function, and
+forces you to write a comment for it.  In this silly example, this doesn't add
+much value.  However, if the condition is complex, this can make it a lot easier
+for the reader to understand the code that queries for this predicate.  Instead
+of being faced with the in-line details of how we check to see if the BarList
+contains a foo, we can trust the function name and continue reading with better
+locality.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="micro">The Low-Level Issues</a>
+</div>
+<!-- ======================================================================= -->
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ll_naming">Name Types, Functions, Variables, and Enumerators Properly</a>
+</div>
+
+<div class="doc_text">
+
+<p>Poorly-chosen names can mislead the reader and cause bugs. We cannot stress
+enough how important it is to use <em>descriptive</em> names.  Pick names that
+match the semantics and role of the underlying entities, within reason.  Avoid
+abbreviations unless they are well known.  After picking a good name, make sure
+to use consistent capitalization for the name, as inconsistency requires clients
+to either memorize the APIs or to look it up to find the exact spelling.</p>
+
+<p>In general, names should be in camel case (e.g. <tt>TextFileReader</tt>
+and <tt>isLValue()</tt>).  Different kinds of declarations have different
+rules:</p>
+
+<ul>
+<li><p><b>Type names</b> (including classes, structs, enums, typedefs, etc)
+  should be nouns and start with an upper-case letter (e.g.
+  <tt>TextFileReader</tt>).</p></li>
+  
+<li><p><b>Function names</b> should be verb phrases (as they represent
+    actions), and command-like function should be imperative.  The name should
+    be camel case, and start with a lower case letter (e.g. <tt>openFile()</tt>
+    or <tt>isFoo()</tt>).</p></li>
+
+<li><p><b>Enum declarations</b> (e.g. <tt>enum Foo {...}</tt>) are types, so
+    they should follow the naming conventions for types.  A common use for enums
+    is as a discriminator for a union, or an indicator of a subclass.  When an
+    enum is used for something like this, it should have a <tt>Kind</tt> suffix
+    (e.g. <tt>ValueKind</tt>).</p></li>
+  
+<li><p><b>Enumerators</b> (e.g. <tt>enum { Foo, Bar }</tt>) and <b>public member
+    variables</b> should start with an upper-case letter, just like types.
+    Unless the enumerators are defined in their own small namespace or inside a
+    class, enumerators should have a prefix corresponding to the enum
+    declaration name.  For example, <tt>enum ValueKind { ... };</tt> may contain
+    enumerators like <tt>VK_Argument</tt>, <tt>VK_BasicBlock</tt>, etc.
+    Enumerators that are just convenience constants are exempt from the
+    requirement for a prefix.  For instance:</p>
+
+<div class="doc_code">
+<pre>
+enum {
+  MaxSize = 42,
+  Density = 12
+};
+</pre>
+</div>
+</li>
+
+</ul>
+  
+<p>As an exception, classes that mimic STL classes can have member names in
+STL's style of lower-case words separated by underscores (e.g. <tt>begin()</tt>,
+<tt>push_back()</tt>, and <tt>empty()</tt>).</p>
+
+<p>Here are some examples of good and bad names:</p>
+
+<div class="doc_code">
+<pre>
+class VehicleMaker {
+  ...
+  Factory&lt;Tire&gt; F;            // Bad -- abbreviation and non-descriptive.
+  Factory&lt;Tire&gt; Factory;      // Better.
+  Factory&lt;Tire&gt; TireFactory;  // Even better -- if VehicleMaker has more than one
+                              // kind of factories.
+};
+
+Vehicle MakeVehicle(VehicleType Type) {
+  VehicleMaker M;                         // Might be OK if having a short life-span.
+  Tire tmp1 = M.makeTire();               // Bad -- 'tmp1' provides no information.
+  Light headlight = M.makeLight("head");  // Good -- descriptive.
+  ...
+}
+</pre>
+</div>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ll_assert">Assert Liberally</a>
+</div>
+
+<div class="doc_text">
+
+<p>Use the "<tt>assert</tt>" macro to its fullest.  Check all of your
+preconditions and assumptions, you never know when a bug (not necessarily even
+yours) might be caught early by an assertion, which reduces debugging time
+dramatically.  The "<tt>&lt;cassert&gt;</tt>" header file is probably already
+included by the header files you are using, so it doesn't cost anything to use
+it.</p>
+
+<p>To further assist with debugging, make sure to put some kind of error message
+in the assertion statement, which is printed if the assertion is tripped. This
+helps the poor debugger make sense of why an assertion is being made and
+enforced, and hopefully what to do about it.  Here is one complete example:</p>
+
+<div class="doc_code">
+<pre>
+inline Value *getOperand(unsigned i) { 
+  assert(i &lt; Operands.size() &amp;&amp; "getOperand() out of range!");
+  return Operands[i]; 
+}
+</pre>
+</div>
+
+<p>Here are more examples:</p>
+
+<div class="doc_code">
+<pre>
+assert(Ty-&gt;isPointerType() &amp;&amp; "Can't allocate a non pointer type!");
+
+assert((Opcode == Shl || Opcode == Shr) &amp;&amp; "ShiftInst Opcode invalid!");
+
+assert(idx &lt; getNumSuccessors() &amp;&amp; "Successor # out of range!");
+
+assert(V1.getType() == V2.getType() &amp;&amp; "Constant types must be identical!");
+
+assert(isa&lt;PHINode&gt;(Succ-&gt;front()) &amp;&amp; "Only works on PHId BBs!");
+</pre>
+</div>
+
+<p>You get the idea.</p>
+
+<p>Please be aware that, when adding assert statements, not all compilers are aware of
+the semantics of the assert.  In some places, asserts are used to indicate a piece of
+code that should not be reached.  These are typically of the form:</p>
+
+<div class="doc_code">
+<pre>
+assert(0 &amp;&amp; "Some helpful error message");
+</pre>
+</div>
+
+<p>When used in a function that returns a value, they should be followed with a return
+statement and a comment indicating that this line is never reached.  This will prevent
+a compiler which is unable to deduce that the assert statement never returns from
+generating a warning.</p>
+
+<div class="doc_code">
+<pre>
+assert(0 &amp;&amp; "Some helpful error message");
+// Not reached
+return 0;
+</pre>
+</div>
+
+<p>Another issue is that values used only by assertions will produce an "unused
+value" warning when assertions are disabled.  For example, this code will
+warn:</p>
+
+<div class="doc_code">
+<pre>
+unsigned Size = V.size();
+assert(Size &gt; 42 &amp;&amp; "Vector smaller than it should be");
+
+bool NewToSet = Myset.insert(Value);
+assert(NewToSet &amp;&amp; "The value shouldn't be in the set yet");
+</pre>
+</div>
+
+<p>These are two interesting different cases. In the first case, the call to
+V.size() is only useful for the assert, and we don't want it executed when
+assertions are disabled.  Code like this should move the call into the assert
+itself.  In the second case, the side effects of the call must happen whether
+the assert is enabled or not.  In this case, the value should be cast to void to
+disable the warning.  To be specific, it is preferred to write the code like
+this:</p>
+
+<div class="doc_code">
+<pre>
+assert(V.size() &gt; 42 &amp;&amp; "Vector smaller than it should be");
+
+bool NewToSet = Myset.insert(Value); (void)NewToSet;
+assert(NewToSet &amp;&amp; "The value shouldn't be in the set yet");
+</pre>
+</div>
+
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ll_ns_std">Do Not Use '<tt>using namespace std</tt>'</a>
+</div>
+
+<div class="doc_text">
+
+<p>In LLVM, we prefer to explicitly prefix all identifiers from the standard
+namespace with an "<tt>std::</tt>" prefix, rather than rely on
+"<tt>using namespace std;</tt>".</p>
+
+<p> In header files, adding a '<tt>using namespace XXX</tt>' directive pollutes
+the namespace of any source file that <tt>#include</tt>s the header.  This is
+clearly a bad thing.</p>
+
+<p>In implementation files (e.g. <tt>.cpp</tt> files), the rule is more of a stylistic
+rule, but is still important.  Basically, using explicit namespace prefixes
+makes the code <b>clearer</b>, because it is immediately obvious what facilities
+are being used and where they are coming from. And <b>more portable</b>, because
+namespace clashes cannot occur between LLVM code and other namespaces.  The
+portability rule is important because different standard library implementations
+expose different symbols (potentially ones they shouldn't), and future revisions
+to the C++ standard will add more symbols to the <tt>std</tt> namespace.  As
+such, we never use '<tt>using namespace std;</tt>' in LLVM.</p>
+
+<p>The exception to the general rule (i.e. it's not an exception for
+the <tt>std</tt> namespace) is for implementation files.  For example, all of
+the code in the LLVM project implements code that lives in the 'llvm' namespace.
+As such, it is ok, and actually clearer, for the <tt>.cpp</tt> files to have a
+'<tt>using namespace llvm;</tt>' directive at the top, after the
+<tt>#include</tt>s.  This reduces indentation in the body of the file for source
+editors that indent based on braces, and keeps the conceptual context cleaner.
+The general form of this rule is that any <tt>.cpp</tt> file that implements
+code in any namespace may use that namespace (and its parents'), but should not
+use any others.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ll_virtual_anch">Provide a Virtual Method Anchor for Classes
+  in Headers</a>
+</div>
+
+<div class="doc_text">
+
+<p>If a class is defined in a header file and has a v-table (either it has 
+virtual methods or it derives from classes with virtual methods), it must 
+always have at least one out-of-line virtual method in the class.  Without 
+this, the compiler will copy the vtable and RTTI into every <tt>.o</tt> file
+that <tt>#include</tt>s the header, bloating <tt>.o</tt> file sizes and
+increasing link times.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ll_end">Don't evaluate <tt>end()</tt> every time through a loop</a>
+</div>
+
+<div class="doc_text">
+
+<p>Because C++ doesn't have a standard "<tt>foreach</tt>" loop (though it can be
+emulated with macros and may be coming in C++'0x) we end up writing a lot of
+loops that manually iterate from begin to end on a variety of containers or
+through other data structures.  One common mistake is to write a loop in this
+style:</p>
+
+<div class="doc_code">
+<pre>
+  BasicBlock *BB = ...
+  for (BasicBlock::iterator I = BB->begin(); I != <b>BB->end()</b>; ++I)
+     ... use I ...
+</pre>
+</div>
+
+<p>The problem with this construct is that it evaluates "<tt>BB->end()</tt>"
+every time through the loop.  Instead of writing the loop like this, we strongly
+prefer loops to be written so that they evaluate it once before the loop starts.
+A convenient way to do this is like so:</p>
+
+<div class="doc_code">
+<pre>
+  BasicBlock *BB = ...
+  for (BasicBlock::iterator I = BB->begin(), E = <b>BB->end()</b>; I != E; ++I)
+     ... use I ...
+</pre>
+</div>
+
+<p>The observant may quickly point out that these two loops may have different
+semantics: if the container (a basic block in this case) is being mutated, then
+"<tt>BB->end()</tt>" may change its value every time through the loop and the
+second loop may not in fact be correct.  If you actually do depend on this
+behavior, please write the loop in the first form and add a comment indicating
+that you did it intentionally.</p>
+
+<p>Why do we prefer the second form (when correct)?  Writing the loop in the
+first form has two problems. First it may be less efficient than evaluating it
+at the start of the loop.  In this case, the cost is probably minor &mdash; a
+few extra loads every time through the loop.  However, if the base expression is
+more complex, then the cost can rise quickly.  I've seen loops where the end
+expression was actually something like: "<tt>SomeMap[x]->end()</tt>" and map
+lookups really aren't cheap.  By writing it in the second form consistently, you
+eliminate the issue entirely and don't even have to think about it.</p>
+
+<p>The second (even bigger) issue is that writing the loop in the first form
+hints to the reader that the loop is mutating the container (a fact that a
+comment would handily confirm!).  If you write the loop in the second form, it
+is immediately obvious without even looking at the body of the loop that the
+container isn't being modified, which makes it easier to read the code and
+understand what it does.</p>
+
+<p>While the second form of the loop is a few extra keystrokes, we do strongly
+prefer it.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ll_iostream"><tt>#include &lt;iostream&gt;</tt> is Forbidden</a>
+</div>
+
+<div class="doc_text">
+
+<p>The use of <tt>#include &lt;iostream&gt;</tt> in library files is
+hereby <b><em>forbidden</em></b>. The primary reason for doing this is to
+support clients using LLVM libraries as part of larger systems. In particular,
+we statically link LLVM into some dynamic libraries. Even if LLVM isn't used,
+the static constructors are run whenever an application starts up that uses the
+dynamic library. There are two problems with this:</p>
+
+<ol>
+  <li>The time to run the static c'tors impacts startup time of applications
+      &mdash; a critical time for GUI apps.</li>
+
+  <li>The static c'tors cause the app to pull many extra pages of memory off the
+      disk: both the code for the static c'tors in each <tt>.o</tt> file and the
+      small amount of data that gets touched. In addition, touched/dirty pages
+      put more pressure on the VM system on low-memory machines.</li>
+</ol>
+
+<p>Note that using the other stream headers (<tt>&lt;sstream&gt;</tt> for
+example) is not problematic in this regard &mdash;
+just <tt>&lt;iostream&gt;</tt>. However, <tt>raw_ostream</tt> provides various
+APIs that are better performing for almost every use than <tt>std::ostream</tt>
+style APIs. <b>Therefore new code should always
+use <a href="#ll_raw_ostream"><tt>raw_ostream</tt></a> for writing, or
+the <tt>llvm::MemoryBuffer</tt> API for reading files.</b></p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ll_raw_ostream">Use <tt>raw_ostream</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM includes a lightweight, simple, and efficient stream implementation
+in <tt>llvm/Support/raw_ostream.h</tt>, which provides all of the common
+features of <tt>std::ostream</tt>.  All new code should use <tt>raw_ostream</tt>
+instead of <tt>ostream</tt>.</p>
+
+<p>Unlike <tt>std::ostream</tt>, <tt>raw_ostream</tt> is not a template and can
+be forward declared as <tt>class raw_ostream</tt>.  Public headers should
+generally not include the <tt>raw_ostream</tt> header, but use forward
+declarations and constant references to <tt>raw_ostream</tt> instances.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ll_avoidendl">Avoid <tt>std::endl</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>std::endl</tt> modifier, when used with <tt>iostreams</tt> outputs a
+newline to the output stream specified.  In addition to doing this, however, it
+also flushes the output stream.  In other words, these are equivalent:</p>
+
+<div class="doc_code">
+<pre>
+std::cout &lt;&lt; std::endl;
+std::cout &lt;&lt; '\n' &lt;&lt; std::flush;
+</pre>
+</div>
+
+<p>Most of the time, you probably have no reason to flush the output stream, so
+it's better to use a literal <tt>'\n'</tt>.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="nano">Microscopic Details</a>
+</div>
+<!-- ======================================================================= -->
+
+<p>This section describes preferred low-level formatting guidelines along with
+reasoning on why we prefer them.</p>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="micro_spaceparen">Spaces Before Parentheses</a>
+</div>
+
+<div class="doc_text">
+
+<p>We prefer to put a space before an open parenthesis only in control flow
+statements, but not in normal function call expressions and function-like
+macros.  For example, this is good:</p>
+
+<div class="doc_code">
+<pre>
+<b>if (</b>x) ...
+<b>for (</b>i = 0; i != 100; ++i) ...
+<b>while (</b>llvm_rocks) ...
+
+<b>somefunc(</b>42);
+<b><a href="#ll_assert">assert</a>(</b>3 != 4 &amp;&amp; "laws of math are failing me");
+  
+a = <b>foo(</b>42, 92) + <b>bar(</b>x);
+</pre>
+</div>
+
+<p>and this is bad:</p>
+
+<div class="doc_code">
+<pre>
+<b>if(</b>x) ...
+<b>for(</b>i = 0; i != 100; ++i) ...
+<b>while(</b>llvm_rocks) ...
+
+<b>somefunc (</b>42);
+<b><a href="#ll_assert">assert</a> (</b>3 != 4 &amp;&amp; "laws of math are failing me");
+  
+a = <b>foo (</b>42, 92) + <b>bar (</b>x);
+</pre>
+</div>
+
+<p>The reason for doing this is not completely arbitrary.  This style makes
+control flow operators stand out more, and makes expressions flow better. The
+function call operator binds very tightly as a postfix operator.  Putting a
+space after a function name (as in the last example) makes it appear that the
+code might bind the arguments of the left-hand-side of a binary operator with
+the argument list of a function and the name of the right side.  More
+specifically, it is easy to misread the "a" example as:</p>
+   
+<div class="doc_code">
+<pre>
+a = foo <b>(</b>(42, 92) + bar<b>)</b> (x);
+</pre>
+</div>
+
+<p>when skimming through the code.  By avoiding a space in a function, we avoid
+this misinterpretation.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="micro_preincrement">Prefer Preincrement</a>
+</div>
+
+<div class="doc_text">
+
+<p>Hard fast rule: Preincrement (<tt>++X</tt>) may be no slower than
+postincrement (<tt>X++</tt>) and could very well be a lot faster than it.  Use
+preincrementation whenever possible.</p>
+
+<p>The semantics of postincrement include making a copy of the value being
+incremented, returning it, and then preincrementing the "work value".  For
+primitive types, this isn't a big deal... but for iterators, it can be a huge
+issue (for example, some iterators contains stack and set objects in them...
+copying an iterator could invoke the copy ctor's of these as well).  In general,
+get in the habit of always using preincrement, and you won't have a problem.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="micro_namespaceindent">Namespace Indentation</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+In general, we strive to reduce indentation wherever possible.  This is useful
+because we want code to <a href="#scf_codewidth">fit into 80 columns</a> without
+wrapping horribly, but also because it makes it easier to understand the code.
+Namespaces are a funny thing: they are often large, and we often desire to put
+lots of stuff into them (so they can be large).  Other times they are tiny,
+because they just hold an enum or something similar.  In order to balance this,
+we use different approaches for small versus large namespaces.  
+</p>
+
+<p>
+If a namespace definition is small and <em>easily</em> fits on a screen (say,
+less than 35 lines of code), then you should indent its body.  Here's an
+example:
+</p>
+
+<div class="doc_code">
+<pre>
+namespace llvm {
+  namespace X86 {
+    /// RelocationType - An enum for the x86 relocation codes. Note that
+    /// the terminology here doesn't follow x86 convention - word means
+    /// 32-bit and dword means 64-bit.
+    enum RelocationType {
+      /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+      /// the value already in memory, after we adjust it for where the PC is.
+      reloc_pcrel_word = 0,
+
+      /// reloc_picrel_word - PIC base relative relocation, add the relocated
+      /// value to the value already in memory, after we adjust it for where the
+      /// PIC base is.
+      reloc_picrel_word = 1,
+      
+      /// reloc_absolute_word, reloc_absolute_dword - Absolute relocation, just
+      /// add the relocated value to the value already in memory.
+      reloc_absolute_word = 2,
+      reloc_absolute_dword = 3
+    };
+  }
+}
+</pre>
+</div>
+
+<p>Since the body is small, indenting adds value because it makes it very clear
+where the namespace starts and ends, and it is easy to take the whole thing in
+in one "gulp" when reading the code.  If the blob of code in the namespace is
+larger (as it typically is in a header in the <tt>llvm</tt> or <tt>clang</tt> namespaces), do not
+indent the code, and add a comment indicating what namespace is being closed.
+For example:</p>
+
+<div class="doc_code">
+<pre>
+namespace llvm {
+namespace knowledge {
+
+/// Grokable - This class represents things that Smith can have an intimate
+/// understanding of and contains the data associated with it.
+class Grokable {
+...
+public:
+  explicit Grokable() { ... }
+  virtual ~Grokable() = 0;
+  
+  ...
+
+};
+
+} // end namespace knowledge
+} // end namespace llvm
+</pre>
+</div>
+
+<p>Because the class is large, we don't expect that the reader can easily
+understand the entire concept in a glance, and the end of the file (where the
+namespaces end) may be a long ways away from the place they open.  As such,
+indenting the contents of the namespace doesn't add any value, and detracts from
+the readability of the class.  In these cases it is best to <em>not</em> indent
+the contents of the namespace.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="micro_anonns">Anonymous Namespaces</a>
+</div>
+
+<div class="doc_text">
+
+<p>After talking about namespaces in general, you may be wondering about
+anonymous namespaces in particular.
+Anonymous namespaces are a great language feature that tells the C++ compiler
+that the contents of the namespace are only visible within the current
+translation unit, allowing more aggressive optimization and eliminating the
+possibility of symbol name collisions.  Anonymous namespaces are to C++ as 
+"static" is to C functions and global variables.  While "static" is available
+in C++, anonymous namespaces are more general: they can make entire classes
+private to a file.</p>
+
+<p>The problem with anonymous namespaces is that they naturally want to
+encourage indentation of their body, and they reduce locality of reference: if
+you see a random function definition in a C++ file, it is easy to see if it is
+marked static, but seeing if it is in an anonymous namespace requires scanning
+a big chunk of the file.</p>
+
+<p>Because of this, we have a simple guideline: make anonymous namespaces as
+small as possible, and only use them for class declarations.  For example, this
+is good:</p>
+
+<div class="doc_code">
+<pre>
+<b>namespace {</b>
+  class StringSort {
+  ...
+  public:
+    StringSort(...)
+    bool operator&lt;(const char *RHS) const;
+  };
+<b>} // end anonymous namespace</b>
+
+static void Helper() { 
+  ... 
+}
+
+bool StringSort::operator&lt;(const char *RHS) const {
+  ...
+}
+
+</pre>
+</div>
+
+<p>This is bad:</p>
+
+
+<div class="doc_code">
+<pre>
+<b>namespace {</b>
+class StringSort {
+...
+public:
+  StringSort(...)
+  bool operator&lt;(const char *RHS) const;
+};
+
+void Helper() { 
+  ... 
+}
+
+bool StringSort::operator&lt;(const char *RHS) const {
+  ...
+}
+
+<b>} // end anonymous namespace</b>
+
+</pre>
+</div>
+
+
+<p>This is bad specifically because if you're looking at "Helper" in the middle
+of a large C++ file, that you have no immediate way to tell if it is local to
+the file.  When it is marked static explicitly, this is immediately obvious.
+Also, there is no reason to enclose the definition of "operator&lt;" in the
+namespace just because it was declared there.
+</p>
+
+</div>
+
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="seealso">See Also</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>A lot of these comments and recommendations have been culled for other
+sources.  Two particularly important books for our work are:</p>
+
+<ol>
+
+<li><a href="http://www.amazon.com/Effective-Specific-Addison-Wesley-Professional-Computing/dp/0321334876">Effective
+C++</a> by Scott Meyers.  Also 
+interesting and useful are "More Effective C++" and "Effective STL" by the same
+author.</li>
+
+<li>Large-Scale C++ Software Design by John Lakos</li>
+
+</ol>
+
+<p>If you get some free time, and you haven't read them: do so, you might learn
+something.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/CommandGuide/FileCheck.pod b/final/docs/CommandGuide/FileCheck.pod
new file mode 100644
index 00000000000..3ccaa63e176
--- /dev/null
+++ b/final/docs/CommandGuide/FileCheck.pod
@@ -0,0 +1,245 @@
+
+=pod
+
+=head1 NAME
+
+FileCheck - Flexible pattern matching file verifier
+
+=head1 SYNOPSIS
+
+B<FileCheck> I<match-filename> [I<--check-prefix=XXX>] [I<--strict-whitespace>]
+
+=head1 DESCRIPTION
+
+B<FileCheck> reads two files (one from standard input, and one specified on the
+command line) and uses one to verify the other.  This behavior is particularly
+useful for the testsuite, which wants to verify that the output of some tool
+(e.g. llc) contains the expected information (for example, a movsd from esp or
+whatever is interesting).  This is similar to using grep, but it is optimized
+for matching multiple different inputs in one file in a specific order.
+
+The I<match-filename> file specifies the file that contains the patterns to
+match.  The file to verify is always read from standard input.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-help>
+
+Print a summary of command line options.
+
+=item B<--check-prefix> I<prefix>
+
+FileCheck searches the contents of I<match-filename> for patterns to match.  By
+default, these patterns are prefixed with "CHECK:".  If you'd like to use a
+different prefix (e.g. because the same input file is checking multiple
+different tool or options), the B<--check-prefix> argument allows you to specify
+a specific prefix to match.
+
+=item B<--strict-whitespace>
+
+By default, FileCheck canonicalizes input horizontal whitespace (spaces and
+tabs) which causes it to ignore these differences (a space will match a tab).
+The --strict-whitespace argument disables this behavior.
+
+=item B<-version>
+
+Show the version number of this program.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<FileCheck> verifies that the file matches the expected contents, it exits
+with 0.  Otherwise, if not, or if an error occurs, it will exit with a non-zero
+value.
+
+=head1 TUTORIAL
+
+FileCheck is typically used from LLVM regression tests, being invoked on the RUN
+line of the test.  A simple example of using FileCheck from a RUN line looks
+like this:
+
+  ; RUN: llvm-as < %s | llc -march=x86-64 | FileCheck %s
+
+This syntax says to pipe the current file ("%s") into llvm-as, pipe that into
+llc, then pipe the output of llc into FileCheck.  This means that FileCheck will
+be verifying its standard input (the llc output) against the filename argument
+specified (the original .ll file specified by "%s").  To see how this works,
+lets look at the rest of the .ll file (after the RUN line):
+
+  define void @sub1(i32* %p, i32 %v) {
+  entry:
+  ; <b>CHECK: sub1:</b>
+  ; <b>CHECK: subl</b>
+          %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)
+          ret void
+  }
+  
+  define void @inc4(i64* %p) {
+  entry:
+  ; <b>CHECK: inc4:</b>
+  ; <b>CHECK: incq</b>
+          %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)
+          ret void
+  }
+
+Here you can see some "CHECK:" lines specified in comments.  Now you can see
+how the file is piped into llvm-as, then llc, and the machine code output is
+what we are verifying.  FileCheck checks the machine code output to verify that
+it matches what the "CHECK:" lines specify.
+
+The syntax of the CHECK: lines is very simple: they are fixed strings that
+must occur in order.  FileCheck defaults to ignoring horizontal whitespace
+differences (e.g. a space is allowed to match a tab) but otherwise, the contents
+of the CHECK: line is required to match some thing in the test file exactly.
+
+One nice thing about FileCheck (compared to grep) is that it allows merging
+test cases together into logical groups.  For example, because the test above
+is checking for the "sub1:" and "inc4:" labels, it will not match unless there
+is a "subl" in between those labels.  If it existed somewhere else in the file,
+that would not count: "grep subl" matches if subl exists anywhere in the
+file.
+
+
+
+=head2 The FileCheck -check-prefix option
+
+The FileCheck -check-prefix option allows multiple test configurations to be
+driven from one .ll file.  This is useful in many circumstances, for example,
+testing different architectural variants with llc.  Here's a simple example:
+
+  ; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin9 -mattr=sse41 \
+  ; RUN:              | <b>FileCheck %s -check-prefix=X32</b>
+  ; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin9 -mattr=sse41 \
+  ; RUN:              | <b>FileCheck %s -check-prefix=X64</b>
+
+  define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind {
+          %tmp1 = insertelement <4 x i32>; %tmp, i32 %s, i32 1
+          ret <4 x i32> %tmp1
+  ; <b>X32:</b> pinsrd_1:
+  ; <b>X32:</b>    pinsrd $1, 4(%esp), %xmm0
+  
+  ; <b>X64:</b> pinsrd_1:
+  ; <b>X64:</b>    pinsrd $1, %edi, %xmm0
+  }
+
+In this case, we're testing that we get the expected code generation with
+both 32-bit and 64-bit code generation.
+
+
+
+=head2 The "CHECK-NEXT:" directive
+
+Sometimes you want to match lines and would like to verify that matches
+happen on exactly consecutive lines with no other lines in between them.  In
+this case, you can use CHECK: and CHECK-NEXT: directives to specify this.  If
+you specified a custom check prefix, just use "<PREFIX>-NEXT:".  For
+example, something like this works as you'd expect:
+
+  define void @t2(<2 x double>* %r, <2 x double&gt;* %A, double %B) {
+	%tmp3 = load <2 x double&gt;* %A, align 16
+	%tmp7 = insertelement <2 x double&gt; undef, double %B, i32 0
+	%tmp9 = shufflevector <2 x double&gt; %tmp3,
+                              <2 x double&gt; %tmp7,
+                              <2 x i32&gt; < i32 0, i32 2 &gt;
+	store <2 x double&gt; %tmp9, <2 x double&gt;* %r, align 16
+	ret void
+        
+  ; <b>CHECK:</b> t2:
+  ; <b>CHECK:</b> 	movl	8(%esp), %eax
+  ; <b>CHECK-NEXT:</b> 	movapd	(%eax), %xmm0
+  ; <b>CHECK-NEXT:</b> 	movhpd	12(%esp), %xmm0
+  ; <b>CHECK-NEXT:</b> 	movl	4(%esp), %eax
+  ; <b>CHECK-NEXT:</b> 	movapd	%xmm0, (%eax)
+  ; <b>CHECK-NEXT:</b> 	ret
+  }
+
+CHECK-NEXT: directives reject the input unless there is exactly one newline
+between it an the previous directive.  A CHECK-NEXT cannot be the first
+directive in a file.
+
+
+
+=head2 The "CHECK-NOT:" directive
+
+The CHECK-NOT: directive is used to verify that a string doesn't occur
+between two matches (or before the first match, or after the last match).  For
+example, to verify that a load is removed by a transformation, a test like this
+can be used:
+
+  define i8 @coerce_offset0(i32 %V, i32* %P) {
+    store i32 %V, i32* %P
+   
+    %P2 = bitcast i32* %P to i8*
+    %P3 = getelementptr i8* %P2, i32 2
+
+    %A = load i8* %P3
+    ret i8 %A
+  ; <b>CHECK:</b> @coerce_offset0
+  ; <b>CHECK-NOT:</b> load
+  ; <b>CHECK:</b> ret i8
+  }
+
+
+
+=head2 FileCheck Pattern Matching Syntax
+
+The CHECK: and CHECK-NOT: directives both take a pattern to match.  For most
+uses of FileCheck, fixed string matching is perfectly sufficient.  For some
+things, a more flexible form of matching is desired.  To support this, FileCheck
+allows you to specify regular expressions in matching strings, surrounded by
+double braces: B<{{yourregex}}>.  Because we want to use fixed string
+matching for a majority of what we do, FileCheck has been designed to support
+mixing and matching fixed string matching with regular expressions.  This allows
+you to write things like this:
+
+  ; CHECK: movhpd	<b>{{[0-9]+}}</b>(%esp), <b>{{%xmm[0-7]}}</b>
+
+In this case, any offset from the ESP register will be allowed, and any xmm
+register will be allowed.
+
+Because regular expressions are enclosed with double braces, they are
+visually distinct, and you don't need to use escape characters within the double
+braces like you would in C.  In the rare case that you want to match double
+braces explicitly from the input, you can use something ugly like
+B<{{[{][{]}}> as your pattern.
+
+
+
+=head2 FileCheck Variables
+
+It is often useful to match a pattern and then verify that it occurs again
+later in the file.  For codegen tests, this can be useful to allow any register,
+but verify that that register is used consistently later.  To do this, FileCheck
+allows named variables to be defined and substituted into patterns.  Here is a
+simple example:
+
+  ; CHECK: test5:
+  ; CHECK:    notw	<b>[[REGISTER:%[a-z]+]]</b>
+  ; CHECK:    andw	{{.*}}<b>[[REGISTER]]</b>
+
+The first check line matches a regex (<tt>%[a-z]+</tt>) and captures it into
+the variables "REGISTER".  The second line verifies that whatever is in REGISTER
+occurs later in the file after an "andw".  FileCheck variable references are
+always contained in <tt>[[ ]]</tt> pairs, are named, and their names can be
+formed with the regex "<tt>[a-zA-Z_][a-zA-Z0-9_]*</tt>".  If a colon follows the
+name, then it is a definition of the variable, if not, it is a use.
+
+FileCheck variables can be defined multiple times, and uses always get the
+latest value.  Note that variables are all read at the start of a "CHECK" line
+and are all defined at the end.  This means that if you have something like
+"<tt>CHECK: [[XYZ:.*]]x[[XYZ]]<tt>" that the check line will read the previous
+value of the XYZ variable and define a new one after the match is performed.  If
+you need to do something like this you can probably take advantage of the fact
+that FileCheck is not actually line-oriented when it matches, this allows you to
+define two separate CHECK lines that match on the same line.
+
+
+
+=head1 AUTHORS
+
+Maintained by The LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/Makefile b/final/docs/CommandGuide/Makefile
new file mode 100644
index 00000000000..2c2d0760e79
--- /dev/null
+++ b/final/docs/CommandGuide/Makefile
@@ -0,0 +1,103 @@
+##===- docs/CommandGuide/Makefile --------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+ifdef BUILD_FOR_WEBSITE
+# This special case is for keeping the CommandGuide on the LLVM web site
+# up to date automatically as the documents are checked in. It must build
+# the POD files to HTML only and keep them in the src directories. It must also
+# build in an unconfigured tree, hence the ifdef. To use this, run
+# make -s BUILD_FOR_WEBSITE=1 inside the cvs commit script.
+SRC_DOC_DIR=
+DST_HTML_DIR=html/
+DST_MAN_DIR=man/man1/
+DST_PS_DIR=ps/
+
+# If we are in BUILD_FOR_WEBSITE mode, default to the all target.
+all:: html man ps
+
+clean:
+	rm -f pod2htm*.*~~ $(HTML) $(MAN) $(PS)
+
+# To create other directories, as needed, and timestamp their creation
+%/.dir:
+	-mkdir $* > /dev/null
+	date > $@
+
+else
+
+# Otherwise, if not in BUILD_FOR_WEBSITE mode, use the project info.
+LEVEL := ../..
+include $(LEVEL)/Makefile.common
+
+SRC_DOC_DIR=$(PROJ_SRC_DIR)/
+DST_HTML_DIR=$(PROJ_OBJ_DIR)/
+DST_MAN_DIR=$(PROJ_OBJ_DIR)/
+DST_PS_DIR=$(PROJ_OBJ_DIR)/
+
+endif
+
+
+POD  := $(wildcard $(SRC_DOC_DIR)*.pod)
+HTML := $(patsubst $(SRC_DOC_DIR)%.pod, $(DST_HTML_DIR)%.html, $(POD))
+MAN  := $(patsubst $(SRC_DOC_DIR)%.pod, $(DST_MAN_DIR)%.1, $(POD))
+PS   := $(patsubst $(SRC_DOC_DIR)%.pod, $(DST_PS_DIR)%.ps, $(POD))
+
+# The set of man pages we will not install
+NO_INSTALL_MANS = $(DST_MAN_DIR)FileCheck.1
+
+# The set of man pages that we will install
+INSTALL_MANS = $(filter-out $(NO_INSTALL_MANS), $(MAN))
+
+.SUFFIXES:
+.SUFFIXES: .html .pod .1 .ps
+
+$(DST_HTML_DIR)%.html: %.pod $(DST_HTML_DIR)/.dir
+	pod2html --css=manpage.css --htmlroot=. \
+	  --podpath=. --noindex --infile=$< --outfile=$@ --title=$*
+
+$(DST_MAN_DIR)%.1: %.pod $(DST_MAN_DIR)/.dir
+	pod2man --release=CVS --center="LLVM Command Guide" $< $@
+
+$(DST_PS_DIR)%.ps: $(DST_MAN_DIR)%.1 $(DST_PS_DIR)/.dir
+	groff -Tps -man $< > $@
+
+
+html: $(HTML)
+man: $(MAN)
+ps: $(PS)
+
+EXTRA_DIST := $(POD) index.html
+
+clean-local::
+	$(Verb) $(RM) -f pod2htm*.*~~ $(HTML) $(MAN) $(PS)
+
+HTML_DIR := $(DESTDIR)$(PROJ_docsdir)/html/CommandGuide
+MAN_DIR  := $(DESTDIR)$(PROJ_mandir)/man1
+PS_DIR   := $(DESTDIR)$(PROJ_docsdir)/ps
+
+install-local:: $(HTML) $(INSTALL_MANS) $(PS)
+	$(Echo) Installing HTML CommandGuide Documentation
+	$(Verb) $(MKDIR) $(HTML_DIR)
+	$(Verb) $(DataInstall) $(HTML) $(HTML_DIR)
+	$(Verb) $(DataInstall) $(PROJ_SRC_DIR)/index.html $(HTML_DIR)
+	$(Verb) $(DataInstall) $(PROJ_SRC_DIR)/manpage.css $(HTML_DIR)
+	$(Echo) Installing MAN CommandGuide Documentation
+	$(Verb) $(MKDIR) $(MAN_DIR)
+	$(Verb) $(DataInstall) $(INSTALL_MANS) $(MAN_DIR)
+	$(Echo) Installing PS CommandGuide Documentation
+	$(Verb) $(MKDIR) $(PS_DIR)
+	$(Verb) $(DataInstall) $(PS) $(PS_DIR)
+
+uninstall-local::
+	$(Echo) Uninstalling CommandGuide Documentation
+	$(Verb) $(RM) -rf $(HTML_DIR) $(MAN_DIR) $(PS_DIR)
+
+printvars::
+	$(Echo) "POD            : " '$(POD)'
+	$(Echo) "HTML           : " '$(HTML)'
diff --git a/final/docs/CommandGuide/bugpoint.pod b/final/docs/CommandGuide/bugpoint.pod
new file mode 100644
index 00000000000..1870a0d84b6
--- /dev/null
+++ b/final/docs/CommandGuide/bugpoint.pod
@@ -0,0 +1,171 @@
+=pod
+
+=head1 NAME
+
+bugpoint - automatic test case reduction tool
+
+=head1 SYNOPSIS
+
+B<bugpoint> [I<options>] [I<input LLVM ll/bc files>] [I<LLVM passes>] B<--args>
+I<program arguments>
+
+=head1 DESCRIPTION
+
+B<bugpoint> narrows down the source of problems in LLVM tools and passes.  It
+can be used to debug three types of failures: optimizer crashes, miscompilations
+by optimizers, or bad native code generation (including problems in the static
+and JIT compilers).  It aims to reduce large test cases to small, useful ones.
+For more information on the design and inner workings of B<bugpoint>, as well as
+advice for using bugpoint, see F<llvm/docs/Bugpoint.html> in the LLVM
+distribution.
+
+=head1 OPTIONS
+
+=over 
+
+=item B<--additional-so> F<library>
+
+Load the dynamic shared object F<library> into the test program whenever it is
+run.  This is useful if you are debugging programs which depend on non-LLVM
+libraries (such as the X or curses libraries) to run.
+
+=item B<--append-exit-code>=I<{true,false}>
+
+Append the test programs exit code to the output file so that a change in exit
+code is considered a test failure. Defaults to false.
+
+=item B<--args> I<program args>
+
+Pass all arguments specified after -args to the test program whenever it runs.
+Note that if any of the I<program args> start with a '-', you should use:
+
+    bugpoint [bugpoint args] --args -- [program args]
+
+The "--" right after the B<--args> option tells B<bugpoint> to consider any
+options starting with C<-> to be part of the B<--args> option, not as options to
+B<bugpoint> itself.
+
+=item B<--tool-args> I<tool args>
+
+Pass all arguments specified after --tool-args to the LLVM tool under test
+(B<llc>, B<lli>, etc.) whenever it runs.  You should use this option in the
+following way:
+
+    bugpoint [bugpoint args] --tool-args -- [tool args]
+
+The "--" right after the B<--tool-args> option tells B<bugpoint> to consider any
+options starting with C<-> to be part of the B<--tool-args> option, not as
+options to B<bugpoint> itself. (See B<--args>, above.)
+
+=item B<--safe-tool-args> I<tool args>
+
+Pass all arguments specified after B<--safe-tool-args> to the "safe" execution
+tool.
+
+=item B<--gcc-tool-args> I<gcc tool args>
+
+Pass all arguments specified after B<--gcc-tool-args> to the invocation of
+B<gcc>.
+
+=item B<--opt-args> I<opt args>
+
+Pass all arguments specified after B<--opt-args> to the invocation of B<opt>.
+
+=item B<--disable-{dce,simplifycfg}>
+
+Do not run the specified passes to clean up and reduce the size of the test
+program. By default, B<bugpoint> uses these passes internally when attempting to
+reduce test programs.  If you're trying to find a bug in one of these passes,
+B<bugpoint> may crash.
+
+=item B<--enable-valgrind>
+
+Use valgrind to find faults in the optimization phase. This will allow
+bugpoint to find otherwise asymptomatic problems caused by memory
+mis-management.
+
+=item B<-find-bugs>
+
+Continually randomize the specified passes and run them on the test program
+until a bug is found or the user kills B<bugpoint>. 
+
+=item B<-help>
+
+Print a summary of command line options.
+
+=item B<--input> F<filename>
+
+Open F<filename> and redirect the standard input of the test program, whenever
+it runs, to come from that file.
+
+=item B<--load> F<plugin>
+
+Load the dynamic object F<plugin> into B<bugpoint> itself.  This object should
+register new optimization passes.  Once loaded, the object will add new command
+line options to enable various optimizations.  To see the new complete list of
+optimizations, use the B<-help> and B<--load> options together; for example:
+
+    bugpoint --load myNewPass.so -help
+
+=item B<--mlimit> F<megabytes>
+
+Specifies an upper limit on memory usage of the optimization and codegen. Set
+to zero to disable the limit.
+
+=item B<--output> F<filename>
+
+Whenever the test program produces output on its standard output stream, it
+should match the contents of F<filename> (the "reference output"). If you
+do not use this option, B<bugpoint> will attempt to generate a reference output
+by compiling the program with the "safe" backend and running it.
+
+=item B<--profile-info-file> F<filename>
+
+Profile file loaded by B<--profile-loader>.
+
+=item B<--run-{int,jit,llc,cbe,custom}>
+
+Whenever the test program is compiled, B<bugpoint> should generate code for it
+using the specified code generator.  These options allow you to choose the
+interpreter, the JIT compiler, the static native code compiler, the C
+backend, or a custom command (see B<--exec-command>) respectively.
+
+=item B<--safe-{llc,cbe,custom}>
+
+When debugging a code generator, B<bugpoint> should use the specified code
+generator as the "safe" code generator. This is a known-good code generator
+used to generate the "reference output" if it has not been provided, and to
+compile portions of the program that as they are excluded from the testcase.
+These options allow you to choose the
+static native code compiler, the C backend, or a custom command,
+(see B<--exec-command>) respectively. The interpreter and the JIT backends
+cannot currently be used as the "safe" backends.
+
+=item B<--exec-command> I<command>
+
+This option defines the command to use with the B<--run-custom> and
+B<--safe-custom> options to execute the bitcode testcase. This can
+be useful for cross-compilation.
+
+=item B<--safe-path> I<path>
+
+This option defines the path to the command to execute with the
+B<--safe-{int,jit,llc,cbe,custom}>
+option.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<bugpoint> succeeds in finding a problem, it will exit with 0.  Otherwise,
+if an error occurs, it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<opt|opt>
+
+=head1 AUTHOR
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/html/manpage.css b/final/docs/CommandGuide/html/manpage.css
new file mode 100644
index 00000000000..b2003434901
--- /dev/null
+++ b/final/docs/CommandGuide/html/manpage.css
@@ -0,0 +1,256 @@
+/* Based on http://www.perldoc.com/css/perldoc.css */
+
+@import url("../llvm.css");
+
+body { font-family: Arial,Helvetica; }
+
+blockquote { margin: 10pt;  }
+
+h1, a { color: #336699; }
+
+
+/*** Top menu style ****/
+.mmenuon { 
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #ff6600; font-size: 10pt;
+ }
+.mmenuoff { 
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #ffffff; font-size: 10pt;
+}	  
+.cpyright {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #ffffff; font-size: xx-small;
+}
+.cpyrightText {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #ffffff; font-size: xx-small;
+}
+.sections { 
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 11pt;
+}	 
+.dsections { 
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 12pt;
+}	
+.slink { 
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
+ color: #000000; font-size: 9pt;
+}	 
+
+.slink2 { font-family: Arial,Helvetica; text-decoration: none; color: #336699; }	 
+
+.maintitle { 
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 18pt;
+}	 
+.dblArrow {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: small;
+}
+.menuSec {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: small;
+}
+
+.newstext {
+ font-family: Arial,Helvetica; font-size: small;
+}
+
+.linkmenu {
+ font-family: Arial,Helvetica; color: #000000; font-weight: bold;
+ text-decoration: none;
+}
+
+P {
+ font-family: Arial,Helvetica;
+}
+
+PRE {
+    font-size: 10pt;
+}
+.quote { 
+ font-family: Times; text-decoration: none;
+ color: #000000; font-size: 9pt; font-style: italic;
+}	
+.smstd { font-family: Arial,Helvetica; color: #000000; font-size: x-small; } 
+.std { font-family: Arial,Helvetica; color: #000000; } 
+.meerkatTitle { 
+ font-family: sans-serif; font-size: x-small;  color: black;    }
+
+.meerkatDescription { font-family: sans-serif; font-size: 10pt; color: black }
+.meerkatCategory { 
+ font-family: sans-serif; font-size: 9pt; font-weight: bold; font-style: italic; 
+ color: brown; }
+.meerkatChannel { 
+ font-family: sans-serif; font-size: 9pt; font-style: italic; color: brown; }
+.meerkatDate { font-family: sans-serif; font-size: xx-small; color: #336699; }
+
+.tocTitle {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #333333; font-size: 10pt;
+}
+
+.toc-item {
+ font-family: Arial,Helvetica; font-weight: bold; 
+ color: #336699; font-size: 10pt; text-decoration: underline;
+}
+
+.perlVersion {
+ font-family: Arial,Helvetica; font-weight: bold; 
+ color: #336699; font-size: 10pt; text-decoration: none;
+}
+
+.podTitle {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #000000;
+}
+
+.docTitle {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #000000; font-size: 10pt;
+}
+.dotDot {
+ font-family: Arial,Helvetica; font-weight: bold; 
+ color: #000000; font-size: 9pt;
+}
+
+.docSec {
+ font-family: Arial,Helvetica; font-weight: normal; 
+ color: #333333; font-size: 9pt;
+}
+.docVersion {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 10pt;
+}
+
+.docSecs-on {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
+ color: #ff0000; font-size: 10pt;
+}
+.docSecs-off {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
+ color: #333333; font-size: 10pt;
+}
+
+h2 {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: medium;
+}
+h1 {
+ font-family: Verdana,Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: large;
+}
+
+DL {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
+ color: #333333; font-size: 10pt;
+}
+
+UL > LI > A {
+ font-family: Arial,Helvetica; font-weight: bold;
+ color: #336699; font-size: 10pt;
+}
+
+.moduleInfo {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #333333; font-size: 11pt;
+}
+
+.moduleInfoSec {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 10pt;
+}
+
+.moduleInfoVal {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: underline;
+ color: #000000; font-size: 10pt;
+}
+
+.cpanNavTitle {
+ font-family: Arial,Helvetica; font-weight: bold; 
+ color: #ffffff; font-size: 10pt;
+}
+.cpanNavLetter {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none; 
+ color: #333333; font-size: 9pt;
+}
+.cpanCat {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none; 
+ color: #336699; font-size: 9pt;
+}
+
+.bttndrkblue-bkgd-top {
+	background-color: #225688;
+	background-image: url(/global/mvc_objects/images/bttndrkblue_bgtop.gif);
+}
+.bttndrkblue-bkgd-left {
+	background-color: #225688;
+	background-image: url(/global/mvc_objects/images/bttndrkblue_bgleft.gif);
+}
+.bttndrkblue-bkgd {
+	padding-top: 0px;
+	padding-bottom: 0px;
+	margin-bottom: 0px;
+	margin-top: 0px;
+	background-repeat: no-repeat;
+	background-color: #225688;
+	background-image: url(/global/mvc_objects/images/bttndrkblue_bgmiddle.gif);
+	vertical-align: top;
+}
+.bttndrkblue-bkgd-right {
+	background-color: #225688;
+	background-image: url(/global/mvc_objects/images/bttndrkblue_bgright.gif);
+}
+.bttndrkblue-bkgd-bottom {
+	background-color: #225688;
+	background-image: url(/global/mvc_objects/images/bttndrkblue_bgbottom.gif);
+}
+.bttndrkblue-text a {
+	color: #ffffff;
+	text-decoration: none;
+}
+a.bttndrkblue-text:hover {
+	color: #ffDD3C;
+	text-decoration: none;
+}
+.bg-ltblue {
+	background-color: #f0f5fa;
+} 
+
+.border-left-b {
+	background: #f0f5fa url(/i/corner-leftline.gif) repeat-y;
+} 
+
+.border-right-b {
+	background: #f0f5fa url(/i/corner-rightline.gif) repeat-y;
+} 
+
+.border-top-b {
+	background: #f0f5fa url(/i/corner-topline.gif) repeat-x;
+} 
+
+.border-bottom-b {
+	background: #f0f5fa url(/i/corner-botline.gif) repeat-x;
+} 
+
+.border-right-w {
+	background: #ffffff url(/i/corner-rightline.gif) repeat-y;
+} 
+
+.border-top-w {
+	background: #ffffff url(/i/corner-topline.gif) repeat-x;
+} 
+
+.border-bottom-w {
+	background: #ffffff url(/i/corner-botline.gif) repeat-x;
+} 
+
+.bg-white {
+	background-color: #ffffff;
+} 
+
+.border-left-w {
+	background: #ffffff url(/i/corner-leftline.gif) repeat-y;
+} 
diff --git a/final/docs/CommandGuide/index.html b/final/docs/CommandGuide/index.html
new file mode 100644
index 00000000000..8854fbbebe6
--- /dev/null
+++ b/final/docs/CommandGuide/index.html
@@ -0,0 +1,158 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>LLVM Command Guide</title>
+  <link rel="stylesheet" href="/docs/llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+  LLVM Command Guide
+</div>
+
+<div class="doc_text">
+
+<p>These documents are HTML versions of the <a href="man/man1/">man pages</a>
+for all of the LLVM tools.  These pages describe how to use the LLVM commands
+and what their options are.  Note that these pages do not describe all of the
+options available for all tools.  To get a complete listing, pass the
+<tt>-help</tt> (general options) or <tt>-help-hidden</tt> (general+debugging
+options) arguments to the tool you are interested in.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="basic">Basic Commands</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<ul>
+
+<li><a href="/cmds/llvm-as.html"><b>llvm-as</b></a> -
+    assemble a human-readable .ll file into bytecode</li>
+
+<li><a href="/cmds/llvm-dis.html"><b>llvm-dis</b></a> -
+    disassemble a bytecode file into a human-readable .ll file</li>
+
+<li><a href="/cmds/opt.html"><b>opt</b></a> -
+    run a series of LLVM-to-LLVM optimizations on a bytecode file</li>
+
+<li><a href="/cmds/llc.html"><b>llc</b></a> -
+    generate native machine code for a bytecode file</li>
+
+<li><a href="/cmds/lli.html"><b>lli</b></a> -
+    directly run a program compiled to bytecode using a JIT compiler or
+    interpreter</li>
+
+<li><a href="/cmds/llvm-link.html"><b>llvm-link</b></a> -
+    link several bytecode files into one</li>
+
+<li><a href="/cmds/llvm-ar.html"><b>llvm-ar</b></a> -
+    archive bytecode files</li>
+
+<li><a href="/cmds/llvm-ranlib.html"><b>llvm-ranlib</b></a> -
+    create an index for archives made with llvm-ar</li>
+
+<li><a href="/cmds/llvm-nm.html"><b>llvm-nm</b></a> -
+    print out the names and types of symbols in a bytecode file</li>
+
+<li><a href="/cmds/llvm-prof.html"><b>llvm-prof</b></a> -
+    format raw `<tt>llvmprof.out</tt>' data into a human-readable report</li>
+
+<li><a href="/cmds/llvm-ld.html"><b>llvm-ld</b></a> -
+    general purpose linker with loadable runtime optimization support</li>
+
+<li><a href="/cmds/llvm-config.html"><b>llvm-config</b></a> -
+    print out LLVM compilation options, libraries, etc. as configured</li>
+
+<li><a href="/cmds/llvmc.html"><b>llvmc</b></a> -
+    a generic customizable compiler driver</li>
+
+<li><a href="/cmds/llvm-diff.html"><b>llvm-diff</b></a> -
+    structurally compare two modules</li>
+
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="frontend">C and C++ Front-end Commands</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<ul>
+
+<li><a href="/cmds/llvmgcc.html"><b>llvm-gcc</b></a> -
+    GCC-based C front-end for LLVM
+
+<li><a href="/cmds/llvmgxx.html"><b>llvm-g++</b></a> -
+    GCC-based C++ front-end for LLVM</li>
+
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="debug">Debugging Tools</a>
+</div>
+<!-- *********************************************************************** -->
+
+
+<div class="doc_text">
+
+<ul>
+
+<li><a href="/cmds/bugpoint.html"><b>bugpoint</b></a> -
+    automatic test-case reducer</li>
+
+<li><a href="/cmds/llvm-extract.html"><b>llvm-extract</b></a> -
+    extract a function from an LLVM bytecode file</li>
+
+<li><a href="/cmds/llvm-bcanalyzer.html"><b>llvm-bcanalyzer</b></a> -
+    bytecode analyzer (analyzes the binary encoding itself, not the program it
+    represents)</li>
+
+</ul>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="internal">Internal Tools</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<ul>
+
+<li><a href="/cmds/FileCheck.html"><b>FileCheck</b></a> -
+    Flexible file verifier used extensively by the testing harness</li>
+<li><a href="/cmds/tblgen.html"><b>tblgen</b></a> -
+    target description reader and generator</li>
+<li><a href="/cmds/lit.html"><b>lit</b></a> -
+    LLVM Integrated Tester, for running tests</li>
+
+</ul>
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/CommandGuide/lit.pod b/final/docs/CommandGuide/lit.pod
new file mode 100644
index 00000000000..989a5d7acb5
--- /dev/null
+++ b/final/docs/CommandGuide/lit.pod
@@ -0,0 +1,354 @@
+=pod
+
+=head1 NAME
+
+lit - LLVM Integrated Tester
+
+=head1 SYNOPSIS
+
+B<lit> [I<options>] [I<tests>]
+
+=head1 DESCRIPTION
+
+B<lit> is a portable tool for executing LLVM and Clang style test suites,
+summarizing their results, and providing indication of failures. B<lit> is
+designed to be a lightweight testing tool with as simple a user interface as
+possible.
+
+B<lit> should be run with one or more I<tests> to run specified on the command
+line. Tests can be either individual test files or directories to search for
+tests (see L<"TEST DISCOVERY">).
+
+Each specified test will be executed (potentially in parallel) and once all
+tests have been run B<lit> will print summary information on the number of tests
+which passed or failed (see L<"TEST STATUS RESULTS">). The B<lit> program will
+execute with a non-zero exit code if any tests fail.
+
+By default B<lit> will use a succinct progress display and will only print
+summary information for test failures. See L<"OUTPUT OPTIONS"> for options
+controlling the B<lit> progress display and output.
+
+B<lit> also includes a number of options for controlling how tests are exected
+(specific features may depend on the particular test format). See L<"EXECUTION
+OPTIONS"> for more information.
+
+Finally, B<lit> also supports additional options for only running a subset of
+the options specified on the command line, see L<"SELECTION OPTIONS"> for
+more information.
+
+Users interested in the B<lit> architecture or designing a B<lit> testing
+implementation should see L<"LIT ARCHITECTURE">
+
+=head1 GENERAL OPTIONS
+
+=over
+
+=item B<-h>, B<--help>
+
+Show the B<lit> help message.
+
+=item B<-j> I<N>, B<--threads>=I<N>
+
+Run I<N> tests in parallel. By default, this is automatically chosen to match
+the number of detected available CPUs.
+
+=item B<--config-prefix>=I<NAME>
+
+Search for I<NAME.cfg> and I<NAME.site.cfg> when searching for test suites,
+instead of I<lit.cfg> and I<lit.site.cfg>.
+
+=item B<--param> I<NAME>, B<--param> I<NAME>=I<VALUE>
+
+Add a user defined parameter I<NAME> with the given I<VALUE> (or the empty
+string if not given). The meaning and use of these parameters is test suite
+dependent.
+
+=back 
+
+=head1 OUTPUT OPTIONS
+
+=over
+
+=item B<-q>, B<--quiet>
+
+Suppress any output except for test failures.
+
+=item B<-s>, B<--succinct>
+
+Show less output, for example don't show information on tests that pass.
+
+=item B<-v>, B<--verbose>
+
+Show more information on test failures, for example the entire test output
+instead of just the test result.
+
+=item B<--no-progress-bar>
+
+Do not use curses based progress bar.
+
+=back 
+
+=head1 EXECUTION OPTIONS
+
+=over
+
+=item B<--path>=I<PATH>
+
+Specify an addition I<PATH> to use when searching for executables in tests.
+
+=item B<--vg>
+
+Run individual tests under valgrind (using the memcheck tool). The
+I<--error-exitcode> argument for valgrind is used so that valgrind failures will
+cause the program to exit with a non-zero status.
+
+=item B<--vg-arg>=I<ARG>
+
+When I<--vg> is used, specify an additional argument to pass to valgrind itself.
+
+=item B<--time-tests>
+
+Track the wall time individual tests take to execute and includes the results in
+the summary output. This is useful for determining which tests in a test suite
+take the most time to execute. Note that this option is most useful with I<-j
+1>.
+
+=back
+
+=head1 SELECTION OPTIONS
+
+=over
+
+=item B<--max-tests>=I<N>
+
+Run at most I<N> tests and then terminate.
+
+=item B<--max-time>=I<N>
+
+Spend at most I<N> seconds (approximately) running tests and then terminate.
+
+=item B<--shuffle>
+
+Run the tests in a random order.
+
+=back
+
+=head1 ADDITIONAL OPTIONS
+
+=over
+
+=item B<--debug>
+
+Run B<lit> in debug mode, for debugging configuration issues and B<lit> itself.
+
+=item B<--show-suites>
+
+List the discovered test suites as part of the standard output.
+
+=item B<--no-tcl-as-sh>
+
+Run Tcl scripts internally (instead of converting to shell scripts).
+
+=item B<--repeat>=I<N>
+
+Run each test I<N> times. Currently this is primarily useful for timing tests,
+other results are not collated in any reasonable fashion.
+
+=back
+
+=head1 EXIT STATUS
+
+B<lit> will exit with an exit code of 1 if there are any FAIL or XPASS
+results. Otherwise, it will exit with the status 0. Other exit codes used for
+non-test related failures (for example a user error or an internal program
+error).
+
+=head1 TEST DISCOVERY
+
+The inputs passed to B<lit> can be either individual tests, or entire
+directories or hierarchies of tests to run. When B<lit> starts up, the first
+thing it does is convert the inputs into a complete list of tests to run as part
+of I<test discovery>.
+
+In the B<lit> model, every test must exist inside some I<test suite>. B<lit>
+resolves the inputs specified on the command line to test suites by searching
+upwards from the input path until it finds a I<lit.cfg> or I<lit.site.cfg>
+file. These files serve as both a marker of test suites and as configuration
+files which B<lit> loads in order to understand how to find and run the tests
+inside the test suite.
+
+Once B<lit> has mapped the inputs into test suites it traverses the list of
+inputs adding tests for individual files and recursively searching for tests in
+directories.
+
+This behavior makes it easy to specify a subset of tests to run, while still
+allowing the test suite configuration to control exactly how tests are
+interpreted. In addition, B<lit> always identifies tests by the test suite they
+are in, and their relative path inside the test suite. For appropriately
+configured projects, this allows B<lit> to provide convenient and flexible
+support for out-of-tree builds.
+
+=head1 TEST STATUS RESULTS
+
+Each test ultimately produces one of the following six results:
+
+=over
+
+=item B<PASS>
+
+The test succeeded.
+
+=item B<XFAIL>
+
+The test failed, but that is expected. This is used for test formats which allow
+specifying that a test does not currently work, but wish to leave it in the test
+suite.
+
+=item B<XPASS>
+
+The test succeeded, but it was expected to fail. This is used for tests which
+were specified as expected to fail, but are now succeeding (generally because
+the feautre they test was broken and has been fixed).
+
+=item B<FAIL>
+
+The test failed.
+
+=item B<UNRESOLVED>
+
+The test result could not be determined. For example, this occurs when the test
+could not be run, the test itself is invalid, or the test was interrupted.
+
+=item B<UNSUPPORTED>
+
+The test is not supported in this environment. This is used by test formats
+which can report unsupported tests.
+
+=back
+
+Depending on the test format tests may produce additional information about
+their status (generally only for failures). See the L<Output|"LIT OUTPUT">
+section for more information.
+
+=head1 LIT INFRASTRUCTURE
+
+This section describes the B<lit> testing architecture for users interested in
+creating a new B<lit> testing implementation, or extending an existing one.
+
+B<lit> proper is primarily an infrastructure for discovering and running
+arbitrary tests, and to expose a single convenient interface to these
+tests. B<lit> itself doesn't know how to run tests, rather this logic is
+defined by I<test suites>.
+
+=head2 TEST SUITES
+
+As described in L<"TEST DISCOVERY">, tests are always located inside a I<test
+suite>. Test suites serve to define the format of the tests they contain, the
+logic for finding those tests, and any additional information to run the tests.
+
+B<lit> identifies test suites as directories containing I<lit.cfg> or
+I<lit.site.cfg> files (see also B<--config-prefix>. Test suites are initially
+discovered by recursively searching up the directory hierarchy for all the input
+files passed on the command line. You can use B<--show-suites> to display the
+discovered test suites at startup.
+
+Once a test suite is discovered, its config file is loaded. Config files
+themselves are Python modules which will be executed. When the config file is
+executed, two important global variables are predefined:
+
+=over
+
+=item B<lit>
+
+The global B<lit> configuration object (a I<LitConfig> instance), which defines
+the builtin test formats, global configuration parameters, and other helper
+routines for implementing test configurations.
+
+=item B<config>
+
+This is the config object (a I<TestingConfig> instance) for the test suite,
+which the config file is expected to populate. The following variables are also
+available on the I<config> object, some of which must be set by the config and
+others are optional or predefined:
+
+B<name> I<[required]> The name of the test suite, for use in reports and
+diagnostics.
+
+B<test_format> I<[required]> The test format object which will be used to
+discover and run tests in the test suite. Generally this will be a builtin test
+format available from the I<lit.formats> module.
+
+B<test_src_root> The filesystem path to the test suite root. For out-of-dir
+builds this is the directory that will be scanned for tests.
+
+B<test_exec_root> For out-of-dir builds, the path to the test suite root inside
+the object directory. This is where tests will be run and temporary output files
+places.
+
+B<environment> A dictionary representing the environment to use when executing
+tests in the suite.
+
+B<suffixes> For B<lit> test formats which scan directories for tests, this
+variable as a list of suffixes to identify test files. Used by: I<ShTest>,
+I<TclTest>.
+
+B<substitutions> For B<lit> test formats which substitute variables into a test
+script, the list of substitutions to perform. Used by: I<ShTest>, I<TclTest>.
+
+B<unsupported> Mark an unsupported directory, all tests within it will be
+reported as unsupported. Used by: I<ShTest>, I<TclTest>.
+
+B<parent> The parent configuration, this is the config object for the directory
+containing the test suite, or None.
+
+B<on_clone> The config is actually cloned for every subdirectory inside a test
+suite, to allow local configuration on a per-directory basis. The I<on_clone>
+variable can be set to a Python function which will be called whenever a
+configuration is cloned (for a subdirectory). The function should takes three
+arguments: (1) the parent configuration, (2) the new configuration (which the
+I<on_clone> function will generally modify), and (3) the test path to the new
+directory being scanned.
+
+=back
+
+=head2 TEST DISCOVERY
+
+Once test suites are located, B<lit> recursively traverses the source directory
+(following I<test_src_root>) looking for tests. When B<lit> enters a
+sub-directory, it first checks to see if a nest test suite is defined in that
+directory. If so, it loads that test suite recursively, otherwise it
+instantiates a local test config for the directory (see L<"LOCAL CONFIGURATION
+FILES">).
+
+Tests are identified by the test suite they are contained within, and the
+relative path inside that suite. Note that the relative path may not refer to an
+actual file on disk; some test formats (such as I<GoogleTest>) define "virtual
+tests" which have a path that contains both the path to the actual test file and
+a subpath to identify the virtual test.
+
+=head2 LOCAL CONFIGURATION FILES
+
+When B<lit> loads a subdirectory in a test suite, it instantiates a local test
+configuration by cloning the configuration for the parent direction -- the root
+of this configuration chain will always be a test suite. Once the test
+configuration is cloned B<lit> checks for a I<lit.local.cfg> file in the
+subdirectory. If present, this file will be loaded and can be used to specialize
+the configuration for each individual directory. This facility can be used to
+define subdirectories of optional tests, or to change other configuration
+parameters -- for example, to change the test format, or the suffixes which
+identify test files.
+
+=head2 LIT EXAMPLE TESTS
+
+The B<lit> distribution contains several example implementations of test suites
+in the I<ExampleTests> directory.
+
+=head1 SEE ALSO
+
+L<valgrind(1)>
+
+=head1 AUTHOR
+
+Written by Daniel Dunbar and maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/llc.pod b/final/docs/CommandGuide/llc.pod
new file mode 100644
index 00000000000..eb26ec00fd7
--- /dev/null
+++ b/final/docs/CommandGuide/llc.pod
@@ -0,0 +1,201 @@
+=pod
+
+=head1 NAME
+
+llc - LLVM static compiler
+
+=head1 SYNOPSIS
+
+B<llc> [I<options>] [I<filename>]
+
+=head1 DESCRIPTION
+
+The B<llc> command compiles LLVM source inputs into assembly language for a
+specified architecture.  The assembly language output can then be passed through
+a native assembler and linker to generate a native executable.
+
+The choice of architecture for the output assembly code is automatically
+determined from the input file, unless the B<-march> option is used to override
+the default.
+
+=head1 OPTIONS
+
+If I<filename> is - or omitted, B<llc> reads from standard input.  Otherwise, it
+will from I<filename>.  Inputs can be in either the LLVM assembly language
+format (.ll) or the LLVM bitcode format (.bc).
+
+If the B<-o> option is omitted, then B<llc> will send its output to standard
+output if the input is from standard input.  If the B<-o> option specifies -,
+then the output will also be sent to standard output.
+
+If no B<-o> option is specified and an input file other than - is specified,
+then B<llc> creates the output filename by taking the input filename,
+removing any existing F<.bc> extension, and adding a F<.s> suffix.
+
+Other B<llc> options are as follows:
+
+=head2 End-user Options
+
+=over
+
+=item B<-help>
+
+Print a summary of command line options.
+
+=item B<-O>=I<uint>
+
+Generate code at different optimization levels. These correspond to the I<-O0>,
+I<-O1>, I<-O2>, I<-O3>, and I<-O4> optimization levels used by B<llvm-gcc> and
+B<clang>.
+
+=item B<-mtriple>=I<target triple>
+
+Override the target triple specified in the input file with the specified
+string.
+
+=item B<-march>=I<arch>
+
+Specify the architecture for which to generate assembly, overriding the target
+encoded in the input file.  See the output of B<llc -help> for a list of
+valid architectures.  By default this is inferred from the target triple or
+autodetected to the current architecture.
+
+=item B<-mcpu>=I<cpuname>
+
+Specify a specific chip in the current architecture to generate code for.
+By default this is inferred from the target triple and autodetected to 
+the current architecture.  For a list of available CPUs, use:
+B<llvm-as E<lt> /dev/null | llc -march=xyz -mcpu=help>
+
+=item B<-mattr>=I<a1,+a2,-a3,...>
+
+Override or control specific attributes of the target, such as whether SIMD
+operations are enabled or not.  The default set of attributes is set by the
+current CPU.  For a list of available attributes, use:
+B<llvm-as E<lt> /dev/null | llc -march=xyz -mattr=help>
+
+=item B<--disable-fp-elim>
+
+Disable frame pointer elimination optimization.
+
+=item B<--disable-excess-fp-precision>
+
+Disable optimizations that may produce excess precision for floating point.
+Note that this option can dramatically slow down code on some systems
+(e.g. X86).
+
+=item B<--enable-no-infs-fp-math>
+
+Enable optimizations that assume no Inf values.
+
+=item B<--enable-no-nans-fp-math>
+
+Enable optimizations that assume no NAN values.
+
+=item B<--enable-unsafe-fp-math>
+
+Enable optimizations that make unsafe assumptions about IEEE math (e.g. that
+addition is associative) or may not work for all input ranges.  These
+optimizations allow the code generator to make use of some instructions which
+would otherwise not be usable (such as fsin on X86).
+
+=item B<--enable-correct-eh-support>
+
+Instruct the B<lowerinvoke> pass to insert code for correct exception handling
+support.  This is expensive and is by default omitted for efficiency.
+
+=item B<--stats>
+
+Print statistics recorded by code-generation passes.
+
+=item B<--time-passes>
+
+Record the amount of time needed for each pass and print a report to standard
+error.
+
+=item B<--load>=F<dso_path>
+
+Dynamically load F<dso_path> (a path to a dynamically shared object) that
+implements an LLVM target. This will permit the target name to be used with the
+B<-march> option so that code can be generated for that target.
+
+=back
+
+=head2 Tuning/Configuration Options
+
+=over
+
+=item B<--print-machineinstrs>
+
+Print generated machine code between compilation phases (useful for debugging).
+
+=item B<--regalloc>=I<allocator>
+
+Specify the register allocator to use. The default I<allocator> is I<local>.
+Valid register allocators are:
+
+=over
+
+=item I<simple>
+
+Very simple "always spill" register allocator
+
+=item I<local>
+
+Local register allocator
+
+=item I<linearscan>
+
+Linear scan global register allocator
+
+=item I<iterativescan>
+
+Iterative scan global register allocator
+
+=back
+
+=item B<--spiller>=I<spiller>
+
+Specify the spiller to use for register allocators that support it.  Currently
+this option is used only by the linear scan register allocator. The default
+I<spiller> is I<local>.  Valid spillers are:
+
+=over
+
+=item I<simple>
+
+Simple spiller
+
+=item I<local>
+
+Local spiller
+
+=back
+
+=back
+
+=head2 Intel IA-32-specific Options
+
+=over
+
+=item B<--x86-asm-syntax=att|intel>
+
+Specify whether to emit assembly code in AT&T syntax (the default) or intel
+syntax.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llc> succeeds, it will exit with 0.  Otherwise, if an error occurs,
+it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<lli|lli>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/lli.pod b/final/docs/CommandGuide/lli.pod
new file mode 100644
index 00000000000..52a2721e7d7
--- /dev/null
+++ b/final/docs/CommandGuide/lli.pod
@@ -0,0 +1,219 @@
+=pod
+
+=head1 NAME
+
+lli - directly execute programs from LLVM bitcode
+
+=head1 SYNOPSIS
+
+B<lli> [I<options>] [I<filename>] [I<program args>]
+
+=head1 DESCRIPTION
+
+B<lli> directly executes programs in LLVM bitcode format.  It takes a program
+in LLVM bitcode format and executes it using a just-in-time compiler, if one is
+available for the current architecture, or an interpreter.  B<lli> takes all of
+the same code generator options as L<llc|llc>, but they are only effective when
+B<lli> is using the just-in-time compiler.
+
+If I<filename> is not specified, then B<lli> reads the LLVM bitcode for the
+program from standard input.
+
+The optional I<args> specified on the command line are passed to the program as
+arguments.
+
+=head1 GENERAL OPTIONS
+
+=over
+
+=item B<-fake-argv0>=I<executable>
+
+Override the C<argv[0]> value passed into the executing program.
+
+=item B<-force-interpreter>=I<{false,true}>
+
+If set to true, use the interpreter even if a just-in-time compiler is available
+for this architecture. Defaults to false.
+
+=item B<-help>
+
+Print a summary of command line options.
+
+=item B<-load>=I<puginfilename>
+
+Causes B<lli> to load the plugin (shared object) named I<pluginfilename> and use
+it for optimization.
+
+=item B<-stats>
+
+Print statistics from the code-generation passes. This is only meaningful for
+the just-in-time compiler, at present.
+
+=item B<-time-passes>
+
+Record the amount of time needed for each code-generation pass and print it to
+standard error.
+
+=item B<-version>
+
+Print out the version of B<lli> and exit without doing anything else.
+
+=back 
+
+=head1 TARGET OPTIONS
+
+=over 
+
+=item B<-mtriple>=I<target triple>
+
+Override the target triple specified in the input bitcode file with the 
+specified string.  This may result in a crash if you pick an
+architecture which is not compatible with the current system.
+
+=item B<-march>=I<arch>
+
+Specify the architecture for which to generate assembly, overriding the target
+encoded in the bitcode file.  See the output of B<llc -help> for a list of
+valid architectures.  By default this is inferred from the target triple or
+autodetected to the current architecture.
+
+=item B<-mcpu>=I<cpuname>
+
+Specify a specific chip in the current architecture to generate code for.
+By default this is inferred from the target triple and autodetected to 
+the current architecture.  For a list of available CPUs, use:
+B<llvm-as E<lt> /dev/null | llc -march=xyz -mcpu=help>
+
+=item B<-mattr>=I<a1,+a2,-a3,...>
+
+Override or control specific attributes of the target, such as whether SIMD
+operations are enabled or not.  The default set of attributes is set by the
+current CPU.  For a list of available attributes, use:
+B<llvm-as E<lt> /dev/null | llc -march=xyz -mattr=help>
+
+=back
+
+
+=head1 FLOATING POINT OPTIONS
+
+=over 
+
+=item B<-disable-excess-fp-precision>
+
+Disable optimizations that may increase floating point precision.
+
+=item B<-enable-no-infs-fp-math>
+
+Enable optimizations that assume no Inf values.
+
+=item B<-enable-no-nans-fp-math>
+
+Enable optimizations that assume no NAN values.
+
+=item B<-enable-unsafe-fp-math>
+
+Causes B<lli> to enable optimizations that may decrease floating point
+precision.
+
+=item B<-soft-float>
+
+Causes B<lli> to generate software floating point library calls instead of
+equivalent hardware instructions.
+
+=back
+
+=head1 CODE GENERATION OPTIONS
+
+=over
+
+=item B<-code-model>=I<model>
+
+Choose the code model from:
+
+    default: Target default code model
+    small: Small code model
+    kernel: Kernel code model
+    medium: Medium code model
+    large: Large code model
+
+=item B<-disable-post-RA-scheduler>
+
+Disable scheduling after register allocation.
+
+=item B<-disable-spill-fusing>
+
+Disable fusing of spill code into instructions.
+
+=item B<-enable-correct-eh-support> 
+
+Make the -lowerinvoke pass insert expensive, but correct, EH code.
+
+=item B<-jit-enable-eh> 
+
+Exception handling should be enabled in the just-in-time compiler.
+
+=item B<-join-liveintervals> 
+
+Coalesce copies (default=true).
+
+=item B<-nozero-initialized-in-bss>
+Don't place zero-initialized symbols into the BSS section.
+
+=item B<-pre-RA-sched>=I<scheduler>
+
+Instruction schedulers available (before register allocation):
+
+    =default: Best scheduler for the target 
+    =none: No scheduling: breadth first sequencing 
+    =simple: Simple two pass scheduling: minimize critical path and maximize processor utilization 
+    =simple-noitin: Simple two pass scheduling: Same as simple except using generic latency 
+    =list-burr: Bottom-up register reduction list scheduling 
+    =list-tdrr: Top-down register reduction list scheduling 
+    =list-td: Top-down list scheduler -print-machineinstrs - Print generated machine code
+
+=item B<-regalloc>=I<allocator>
+
+Register allocator to use (default=linearscan)
+
+    =bigblock: Big-block register allocator 
+    =linearscan: linear scan register allocator =local -   local register allocator 
+    =simple: simple register allocator 
+
+=item B<-relocation-model>=I<model> 
+
+Choose relocation model from:
+
+    =default: Target default relocation model 
+    =static: Non-relocatable code =pic -   Fully relocatable, position independent code 
+    =dynamic-no-pic: Relocatable external references, non-relocatable code 
+
+=item B<-spiller>
+
+Spiller to use (default=local) 
+
+    =simple: simple spiller 
+    =local: local spiller 
+
+=item B<-x86-asm-syntax>=I<syntax>
+
+Choose style of code to emit from X86 backend: 
+
+    =att: Emit AT&T-style assembly 
+    =intel: Emit Intel-style assembly
+
+=back
+
+=head1 EXIT STATUS
+
+If B<lli> fails to load the program, it will exit with an exit code of 1.
+Otherwise, it will return the exit code of the program it executes.
+
+=head1 SEE ALSO
+
+L<llc|llc>
+
+=head1 AUTHOR
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/llvm-ar.pod b/final/docs/CommandGuide/llvm-ar.pod
new file mode 100644
index 00000000000..63ba43f6f6f
--- /dev/null
+++ b/final/docs/CommandGuide/llvm-ar.pod
@@ -0,0 +1,406 @@
+=pod
+
+=head1 NAME
+
+llvm-ar - LLVM archiver
+
+=head1 SYNOPSIS
+
+B<llvm-ar> [-]{dmpqrtx}[Rabfikouz] [relpos] [count] <archive> [files...]
+
+
+=head1 DESCRIPTION
+
+The B<llvm-ar> command is similar to the common Unix utility, C<ar>. It 
+archives several files together into a single file. The intent for this is
+to produce archive libraries by LLVM bitcode that can be linked into an
+LLVM program. However, the archive can contain any kind of file. By default,
+B<llvm-ar> generates a symbol table that makes linking faster because
+only the symbol table needs to be consulted, not each individual file member
+of the archive. 
+
+The B<llvm-ar> command can be used to I<read> both SVR4 and BSD style archive
+files. However, it cannot be used to write them.  While the B<llvm-ar> command 
+produces files that are I<almost> identical to the format used by other C<ar> 
+implementations, it has two significant departures in order to make the 
+archive appropriate for LLVM. The first departure is that B<llvm-ar> only
+uses BSD4.4 style long path names (stored immediately after the header) and
+never contains a string table for long names. The second departure is that the
+symbol table is formated for efficient construction of an in-memory data
+structure that permits rapid (red-black tree) lookups. Consequently, archives 
+produced with B<llvm-ar> usually won't be readable or editable with any
+C<ar> implementation or useful for linking.  Using the C<f> modifier to flatten
+file names will make the archive readable by other C<ar> implementations
+but not for linking because the symbol table format for LLVM is unique. If an
+SVR4 or BSD style archive is used with the C<r> (replace) or C<q> (quick
+update) operations, the archive will be reconstructed in LLVM format. This 
+means that the string table will be dropped (in deference to BSD 4.4 long names)
+and an LLVM symbol table will be added (by default). The system symbol table
+will be retained.
+
+Here's where B<llvm-ar> departs from previous C<ar> implementations:
+
+=over
+
+=item I<Symbol Table>
+
+Since B<llvm-ar> is intended to archive bitcode files, the symbol table
+won't make much sense to anything but LLVM. Consequently, the symbol table's
+format has been simplified. It consists simply of a sequence of pairs
+of a file member index number as an LSB 4byte integer and a null-terminated 
+string.
+
+=item I<Long Paths>
+
+Some C<ar> implementations (SVR4) use a separate file member to record long
+path names (> 15 characters). B<llvm-ar> takes the BSD 4.4 and Mac OS X 
+approach which is to simply store the full path name immediately preceding
+the data for the file. The path name is null terminated and may contain the
+slash (/) character. 
+
+=item I<Compression>
+
+B<llvm-ar> can compress the members of an archive to save space. The 
+compression used depends on what's available on the platform and what choices
+the LLVM Compressor utility makes. It generally favors bzip2 but will select
+between "no compression" or bzip2 depending on what makes sense for the
+file's content.
+
+=item I<Directory Recursion>
+
+Most C<ar> implementations do not recurse through directories but simply
+ignore directories if they are presented to the program in the F<files> 
+option. B<llvm-ar>, however, can recurse through directory structures and
+add all the files under a directory, if requested.
+
+=item I<TOC Verbose Output>
+
+When B<llvm-ar> prints out the verbose table of contents (C<tv> option), it
+precedes the usual output with a character indicating the basic kind of 
+content in the file. A blank means the file is a regular file. A 'Z' means
+the file is compressed. A 'B' means the file is an LLVM bitcode file. An
+'S' means the file is the symbol table.
+
+=back
+
+=head1 OPTIONS
+
+The options to B<llvm-ar> are compatible with other C<ar> implementations.
+However, there are a few modifiers (F<zR>) that are not found in other
+C<ar>s. The options to B<llvm-ar> specify a single basic operation to 
+perform on the archive, a variety of modifiers for that operation, the
+name of the archive file, and an optional list of file names. These options
+are used to determine how B<llvm-ar> should process the archive file.
+
+The Operations and Modifiers are explained in the sections below. The minimal
+set of options is at least one operator and the name of the archive. Typically
+archive files end with a C<.a> suffix, but this is not required. Following
+the F<archive-name> comes a list of F<files> that indicate the specific members
+of the archive to operate on. If the F<files> option is not specified, it
+generally means either "none" or "all" members, depending on the operation.
+
+=head2 Operations
+
+=over
+
+=item d
+
+Delete files from the archive. No modifiers are applicable to this operation.
+The F<files> options specify which members should be removed from the
+archive. It is not an error if a specified file does not appear in the archive.
+If no F<files> are specified, the archive is not modified.
+
+=item m[abi]
+
+Move files from one location in the archive to another. The F<a>, F<b>, and 
+F<i> modifiers apply to this operation. The F<files> will all be moved
+to the location given by the modifiers. If no modifiers are used, the files
+will be moved to the end of the archive. If no F<files> are specified, the
+archive is not modified.
+
+=item p[k]
+
+Print files to the standard output. The F<k> modifier applies to this
+operation. This operation simply prints the F<files> indicated to the
+standard output. If no F<files> are specified, the entire archive is printed.
+Printing bitcode files is ill-advised as they might confuse your terminal
+settings. The F<p> operation never modifies the archive.
+
+=item q[Rfz]
+
+Quickly append files to the end of the archive. The F<R>, F<f>, and F<z>
+modifiers apply to this operation.  This operation quickly adds the 
+F<files> to the archive without checking for duplicates that should be 
+removed first. If no F<files> are specified, the archive is not modified. 
+Because of the way that B<llvm-ar> constructs the archive file, its dubious 
+whether the F<q> operation is any faster than the F<r> operation.
+
+=item r[Rabfuz]
+
+Replace or insert file members. The F<R>, F<a>, F<b>, F<f>, F<u>, and F<z>
+modifiers apply to this operation. This operation will replace existing
+F<files> or insert them at the end of the archive if they do not exist. If no
+F<files> are specified, the archive is not modified.
+
+=item t[v]
+
+Print the table of contents. Without any modifiers, this operation just prints
+the names of the members to the standard output. With the F<v> modifier,
+B<llvm-ar> also prints out the file type (B=bitcode, Z=compressed, S=symbol
+table, blank=regular file), the permission mode, the owner and group, the
+size, and the date. If any F<files> are specified, the listing is only for
+those files. If no F<files> are specified, the table of contents for the
+whole archive is printed.
+
+=item x[oP]
+
+Extract archive members back to files. The F<o> modifier applies to this
+operation. This operation retrieves the indicated F<files> from the archive 
+and writes them back to the operating system's file system. If no 
+F<files> are specified, the entire archive is extract. 
+
+=back
+
+=head2 Modifiers (operation specific)
+
+The modifiers below are specific to certain operations. See the Operations
+section (above) to determine which modifiers are applicable to which operations.
+
+=over
+
+=item [a] 
+
+When inserting or moving member files, this option specifies the destination of
+the new files as being C<a>fter the F<relpos> member. If F<relpos> is not found,
+the files are placed at the end of the archive.
+
+=item [b] 
+
+When inserting or moving member files, this option specifies the destination of
+the new files as being C<b>efore the F<relpos> member. If F<relpos> is not 
+found, the files are placed at the end of the archive. This modifier is 
+identical to the the F<i> modifier.
+
+=item [f] 
+
+Normally, B<llvm-ar> stores the full path name to a file as presented to it on
+the command line. With this option, truncated (15 characters max) names are
+used. This ensures name compatibility with older versions of C<ar> but may also
+thwart correct extraction of the files (duplicates may overwrite). If used with
+the F<R> option, the directory recursion will be performed but the file names
+will all be C<f>lattened to simple file names.
+
+=item [i] 
+
+A synonym for the F<b> option.
+
+=item [k] 
+
+Normally, B<llvm-ar> will not print the contents of bitcode files when the 
+F<p> operation is used. This modifier defeats the default and allows the 
+bitcode members to be printed.
+
+=item [N] 
+
+This option is ignored by B<llvm-ar> but provided for compatibility.
+
+=item [o] 
+
+When extracting files, this option will cause B<llvm-ar> to preserve the
+original modification times of the files it writes. 
+
+=item [P] 
+
+use full path names when matching
+
+=item [R]
+
+This modifier instructions the F<r> option to recursively process directories.
+Without F<R>, directories are ignored and only those F<files> that refer to
+files will be added to the archive. When F<R> is used, any directories specified
+with F<files> will be scanned (recursively) to find files to be added to the
+archive. Any file whose name begins with a dot will not be added.
+
+=item [u] 
+
+When replacing existing files in the archive, only replace those files that have
+a time stamp than the time stamp of the member in the archive.
+
+=item [z] 
+
+When inserting or replacing any file in the archive, compress the file first.
+This
+modifier is safe to use when (previously) compressed bitcode files are added to
+the archive; the compressed bitcode files will not be doubly compressed.
+
+=back
+
+=head2 Modifiers (generic)
+
+The modifiers below may be applied to any operation.
+
+=over
+
+=item [c]
+
+For all operations, B<llvm-ar> will always create the archive if it doesn't 
+exist. Normally, B<llvm-ar> will print a warning message indicating that the
+archive is being created. Using this modifier turns off that warning.
+
+=item [s]
+
+This modifier requests that an archive index (or symbol table) be added to the
+archive. This is the default mode of operation. The symbol table will contain
+all the externally visible functions and global variables defined by all the
+bitcode files in the archive. Using this modifier is more efficient that using
+L<llvm-ranlib|llvm-ranlib> which also creates the symbol table.
+
+=item [S]
+
+This modifier is the opposite of the F<s> modifier. It instructs B<llvm-ar> to
+not build the symbol table. If both F<s> and F<S> are used, the last modifier to
+occur in the options will prevail. 
+
+=item [v]
+
+This modifier instructs B<llvm-ar> to be verbose about what it is doing. Each
+editing operation taken against the archive will produce a line of output saying
+what is being done.
+
+=back
+
+=head1 STANDARDS
+
+The B<llvm-ar> utility is intended to provide a superset of the IEEE Std 1003.2
+(POSIX.2) functionality for C<ar>. B<llvm-ar> can read both SVR4 and BSD4.4 (or
+Mac OS X) archives. If the C<f> modifier is given to the C<x> or C<r> operations
+then B<llvm-ar> will write SVR4 compatible archives. Without this modifier, 
+B<llvm-ar> will write BSD4.4 compatible archives that have long names
+immediately after the header and indicated using the "#1/ddd" notation for the
+name in the header.
+
+=head1 FILE FORMAT
+
+The file format for LLVM Archive files is similar to that of BSD 4.4 or Mac OSX
+archive files. In fact, except for the symbol table, the C<ar> commands on those
+operating systems should be able to read LLVM archive files. The details of the
+file format follow.
+
+Each archive begins with the archive magic number which is the eight printable
+characters "!<arch>\n" where \n represents the newline character (0x0A). 
+Following the magic number, the file is composed of even length members that 
+begin with an archive header and end with a \n padding character if necessary 
+(to make the length even). Each file member is composed of a header (defined 
+below), an optional newline-terminated "long file name" and the contents of 
+the file. 
+
+The fields of the header are described in the items below. All fields of the
+header contain only ASCII characters, are left justified and are right padded 
+with space characters.
+
+=over
+
+=item name - char[16]
+
+This field of the header provides the name of the archive member. If the name is
+longer than 15 characters or contains a slash (/) character, then this field
+contains C<#1/nnn> where C<nnn> provides the length of the name and the C<#1/>
+is literal.  In this case, the actual name of the file is provided in the C<nnn>
+bytes immediately following the header. If the name is 15 characters or less, it
+is contained directly in this field and terminated with a slash (/) character.
+
+=item date - char[12]
+
+This field provides the date of modification of the file in the form of a
+decimal encoded number that provides the number of seconds since the epoch 
+(since 00:00:00 Jan 1, 1970) per Posix specifications.
+
+=item uid - char[6]
+
+This field provides the user id of the file encoded as a decimal ASCII string.
+This field might not make much sense on non-Unix systems. On Unix, it is the
+same value as the st_uid field of the stat structure returned by the stat(2)
+operating system call.
+
+=item gid - char[6]
+
+This field provides the group id of the file encoded as a decimal ASCII string.
+This field might not make much sense on non-Unix systems. On Unix, it is the
+same value as the st_gid field of the stat structure returned by the stat(2)
+operating system call.
+
+=item mode - char[8]
+
+This field provides the access mode of the file encoded as an octal ASCII 
+string. This field might not make much sense on non-Unix systems. On Unix, it 
+is the same value as the st_mode field of the stat structure returned by the 
+stat(2) operating system call.
+
+=item size - char[10]
+
+This field provides the size of the file, in bytes, encoded as a decimal ASCII
+string. If the size field is negative (starts with a minus sign, 0x02D), then
+the archive member is stored in compressed form. The first byte of the archive
+member's data indicates the compression type used. A value of 0 (0x30) indicates
+that no compression was used. A value of 2 (0x32) indicates that bzip2
+compression was used.
+
+=item fmag - char[2]
+
+This field is the archive file member magic number. Its content is always the
+two characters back tick (0x60) and newline (0x0A). This provides some measure 
+utility in identifying archive files that have been corrupted.
+
+=back 
+
+The LLVM symbol table has the special name "#_LLVM_SYM_TAB_#". It is presumed
+that no regular archive member file will want this name. The LLVM symbol table 
+is simply composed of a sequence of triplets: byte offset, length of symbol, 
+and the symbol itself. Symbols are not null or newline terminated. Here are 
+the details on each of these items:
+
+=over
+
+=item offset - vbr encoded 32-bit integer
+
+The offset item provides the offset into the archive file where the bitcode
+member is stored that is associated with the symbol. The offset value is 0
+based at the start of the first "normal" file member. To derive the actual
+file offset of the member, you must add the number of bytes occupied by the file
+signature (8 bytes) and the symbol tables. The value of this item is encoded
+using variable bit rate encoding to reduce the size of the symbol table.
+Variable bit rate encoding uses the high bit (0x80) of each byte to indicate 
+if there are more bytes to follow. The remaining 7 bits in each byte carry bits
+from the value. The final byte does not have the high bit set.
+
+=item length - vbr encoded 32-bit integer
+
+The length item provides the length of the symbol that follows. Like this
+I<offset> item, the length is variable bit rate encoded.
+
+=item symbol - character array
+
+The symbol item provides the text of the symbol that is associated with the
+I<offset>. The symbol is not terminated by any character. Its length is provided
+by the I<length> field. Note that is allowed (but unwise) to use non-printing
+characters (even 0x00) in the symbol. This allows for multiple encodings of 
+symbol names.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-ar> succeeds, it will exit with 0.  A usage error, results
+in an exit code of 1. A hard (file system typically) error results in an
+exit code of 2. Miscellaneous or unknown errors result in an
+exit code of 3.
+
+=head1 SEE ALSO
+
+L<llvm-ranlib|llvm-ranlib>, ar(1)
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/llvm-as.pod b/final/docs/CommandGuide/llvm-as.pod
new file mode 100644
index 00000000000..185c009698f
--- /dev/null
+++ b/final/docs/CommandGuide/llvm-as.pod
@@ -0,0 +1,77 @@
+=pod
+
+=head1 NAME
+
+llvm-as - LLVM assembler
+
+=head1 SYNOPSIS
+
+B<llvm-as> [I<options>] [I<filename>]
+
+=head1 DESCRIPTION
+
+B<llvm-as> is the LLVM assembler.  It reads a file containing human-readable
+LLVM assembly language, translates it to LLVM bitcode, and writes the result
+into a file or to standard output.
+
+If F<filename> is omitted or is C<->, then B<llvm-as> reads its input from
+standard input.
+
+If an output file is not specified with the B<-o> option, then
+B<llvm-as> sends its output to a file or standard output by following
+these rules:
+
+=over 
+
+=item *
+
+If the input is standard input, then the output is standard output.
+
+=item *
+
+If the input is a file that ends with C<.ll>, then the output file is of
+the same name, except that the suffix is changed to C<.bc>.
+
+=item *
+
+If the input is a file that does not end with the C<.ll> suffix, then the
+output file has the same name as the input file, except that the C<.bc>
+suffix is appended.
+
+=back
+
+=head1 OPTIONS
+
+=over
+
+=item B<-f>
+
+Enable binary output on terminals.  Normally, B<llvm-as> will refuse to
+write raw bitcode output if the output stream is a terminal. With this option,
+B<llvm-as> will write raw bitcode regardless of the output device.
+
+=item B<-help>
+
+Print a summary of command line options.
+
+=item B<-o> F<filename>
+
+Specify the output file name.  If F<filename> is C<->, then B<llvm-as>
+sends its output to standard output.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-as> succeeds, it will exit with 0.  Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<llvm-dis|llvm-dis>, L<gccas|gccas>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/llvm-bcanalyzer.pod b/final/docs/CommandGuide/llvm-bcanalyzer.pod
new file mode 100644
index 00000000000..b0bc0cddba8
--- /dev/null
+++ b/final/docs/CommandGuide/llvm-bcanalyzer.pod
@@ -0,0 +1,315 @@
+=pod
+
+=head1 NAME
+
+llvm-bcanalyzer - LLVM bitcode analyzer
+
+=head1 SYNOPSIS
+
+B<llvm-bcanalyzer> [I<options>] [F<filename>]
+
+=head1 DESCRIPTION
+
+The B<llvm-bcanalyzer> command is a small utility for analyzing bitcode files.
+The tool reads a bitcode file (such as generated with the B<llvm-as> tool) and
+produces a statistical report on the contents of the bitcode file.  The tool
+can also dump a low level but human readable version of the bitcode file. 
+This tool is probably not of much interest or utility except for those working 
+directly with the bitcode file format. Most LLVM users can just ignore
+this tool.
+
+If F<filename> is omitted or is C<->, then B<llvm-bcanalyzer> reads its input 
+from standard input. This is useful for combining the tool into a pipeline.
+Output is written to the standard output.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-nodetails>
+
+Causes B<llvm-bcanalyzer> to abbreviate its output by writing out only a module 
+level summary. The details for individual functions are not displayed.
+
+=item B<-dump>
+
+Causes B<llvm-bcanalyzer> to dump the bitcode in a human readable format. This 
+format is significantly different from LLVM assembly and provides details about 
+the encoding of the bitcode file.
+
+=item B<-verify>
+
+Causes B<llvm-bcanalyzer> to verify the module produced by reading the 
+bitcode. This ensures that the statistics generated are based on a consistent
+module.
+
+=item B<-help>
+
+Print a summary of command line options.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-bcanalyzer> succeeds, it will exit with 0.  Otherwise, if an error
+occurs, it will exit with a non-zero value, usually 1.
+
+=head1 SUMMARY OUTPUT DEFINITIONS
+
+The following items are always printed by llvm-bcanalyzer. They comprize the
+summary output.
+
+=over
+
+=item B<Bitcode Analysis Of Module>
+
+This just provides the name of the module for which bitcode analysis is being
+generated.
+
+=item B<Bitcode Version Number>
+
+The bitcode version (not LLVM version) of the file read by the analyzer.
+
+=item B<File Size>
+
+The size, in bytes, of the entire bitcode file.
+
+=item B<Module Bytes>
+
+The size, in bytes, of the module block. Percentage is relative to File Size.
+
+=item B<Function Bytes>
+
+The size, in bytes, of all the function blocks. Percentage is relative to File
+Size.
+
+=item B<Global Types Bytes>
+
+The size, in bytes, of the Global Types Pool. Percentage is relative to File
+Size. This is the size of the definitions of all types in the bitcode file.
+
+=item B<Constant Pool Bytes>
+
+The size, in bytes, of the Constant Pool Blocks Percentage is relative to File
+Size.
+
+=item B<Module Globals Bytes>
+
+Ths size, in bytes, of the Global Variable Definitions and their initializers.
+Percentage is relative to File Size.
+
+=item B<Instruction List Bytes>
+
+The size, in bytes, of all the instruction lists in all the functions.
+Percentage is relative to File Size. Note that this value is also included in
+the Function Bytes.
+
+=item B<Compaction Table Bytes>
+
+The size, in bytes, of all the compaction tables in all the functions.
+Percentage is relative to File Size. Note that this value is also included in
+the Function Bytes.
+
+=item B<Symbol Table Bytes>
+
+The size, in bytes, of all the symbol tables in all the functions. Percentage is
+relative to File Size. Note that this value is also included in the Function
+Bytes.
+
+=item B<Dependent Libraries Bytes>
+
+The size, in bytes, of the list of dependent libraries in the module. Percentage
+is relative to File Size. Note that this value is also included in the Module
+Global Bytes.
+
+=item B<Number Of Bitcode Blocks>
+
+The total number of blocks of any kind in the bitcode file.
+
+=item B<Number Of Functions>
+
+The total number of function definitions in the bitcode file.
+
+=item B<Number Of Types>
+
+The total number of types defined in the Global Types Pool.
+
+=item B<Number Of Constants>
+
+The total number of constants (of any type) defined in the Constant Pool.
+
+=item B<Number Of Basic Blocks>
+
+The total number of basic blocks defined in all functions in the bitcode file.
+
+=item B<Number Of Instructions>
+
+The total number of instructions defined in all functions in the bitcode file.
+
+=item B<Number Of Long Instructions>
+
+The total number of long instructions defined in all functions in the bitcode
+file. Long instructions are those taking greater than 4 bytes. Typically long
+instructions are GetElementPtr with several indices, PHI nodes, and calls to
+functions with large numbers of arguments.
+
+=item B<Number Of Operands>
+
+The total number of operands used in all instructions in the bitcode file.
+
+=item B<Number Of Compaction Tables>
+
+The total number of compaction tables in all functions in the bitcode file.
+
+=item B<Number Of Symbol Tables>
+
+The total number of symbol tables in all functions in the bitcode file.
+
+=item B<Number Of Dependent Libs>
+
+The total number of dependent libraries found in the bitcode file.
+
+=item B<Total Instruction Size>
+
+The total size of the instructions in all functions in the bitcode file.
+
+=item B<Average Instruction Size>
+
+The average number of bytes per instruction across all functions in the bitcode
+file. This value is computed by dividing Total Instruction Size by Number Of
+Instructions.
+
+=item B<Maximum Type Slot Number>
+
+The maximum value used for a type's slot number. Larger slot number values take 
+more bytes to encode.
+
+=item B<Maximum Value Slot Number>
+
+The maximum value used for a value's slot number. Larger slot number values take 
+more bytes to encode.
+
+=item B<Bytes Per Value>
+
+The average size of a Value definition (of any type). This is computed by
+dividing File Size by the total number of values of any type.
+
+=item B<Bytes Per Global>
+
+The average size of a global definition (constants and global variables).
+
+=item B<Bytes Per Function>
+
+The average number of bytes per function definition. This is computed by
+dividing Function Bytes by Number Of Functions.
+
+=item B<# of VBR 32-bit Integers>
+
+The total number of 32-bit integers encoded using the Variable Bit Rate
+encoding scheme.
+
+=item B<# of VBR 64-bit Integers>
+
+The total number of 64-bit integers encoded using the Variable Bit Rate encoding
+scheme.
+
+=item B<# of VBR Compressed Bytes>
+
+The total number of bytes consumed by the 32-bit and 64-bit integers that use
+the Variable Bit Rate encoding scheme.
+
+=item B<# of VBR Expanded Bytes>
+
+The total number of bytes that would have been consumed by the 32-bit and 64-bit
+integers had they not been compressed with the Variable Bit Rage encoding
+scheme.
+
+=item B<Bytes Saved With VBR>
+
+The total number of bytes saved by using the Variable Bit Rate encoding scheme.
+The percentage is relative to # of VBR Expanded Bytes.
+
+=back
+
+=head1 DETAILED OUTPUT DEFINITIONS
+
+The following definitions occur only if the -nodetails option was not given.
+The detailed output provides additional information on a per-function basis.
+
+=over
+
+=item B<Type>
+
+The type signature of the function.
+
+=item B<Byte Size>
+
+The total number of bytes in the function's block.
+
+=item B<Basic Blocks>
+
+The number of basic blocks defined by the function.
+
+=item B<Instructions>
+
+The number of instructions defined by the function.
+
+=item B<Long Instructions>
+
+The number of instructions using the long instruction format in the function.
+
+=item B<Operands>
+
+The number of operands used by all instructions in the function.
+
+=item B<Instruction Size>
+
+The number of bytes consumed by instructions in the function.
+
+=item B<Average Instruction Size>
+
+The average number of bytes consumed by the instructions in the funtion. This
+value is computed by dividing Instruction Size by Instructions.
+
+=item B<Bytes Per Instruction>
+
+The average number of bytes used by the function per instruction. This value is
+computed by dividing Byte Size by Instructions. Note that this is not the same
+as Average Instruction Size. It computes a number relative to the total function
+size not just the size of the instruction list.
+
+=item B<Number of VBR 32-bit Integers>
+
+The total number of 32-bit integers found in this function (for any use).
+
+=item B<Number of VBR 64-bit Integers>
+
+The total number of 64-bit integers found in this function (for any use).
+
+=item B<Number of VBR Compressed Bytes>
+
+The total number of bytes in this function consumed by the 32-bit and 64-bit 
+integers that use the Variable Bit Rate encoding scheme.
+
+=item B<Number of VBR Expanded Bytes>
+
+The total number of bytes in this function that would have been consumed by 
+the 32-bit and 64-bit integers had they not been compressed with the Variable 
+Bit Rate encoding scheme.
+
+=item B<Bytes Saved With VBR>
+
+The total number of bytes saved in this function by using the Variable Bit 
+Rate encoding scheme. The percentage is relative to # of VBR Expanded Bytes.
+
+=back
+
+=head1 SEE ALSO
+
+L<llvm-dis|llvm-dis>, L<http://llvm.org/docs/BitCodeFormat.html>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/llvm-config.pod b/final/docs/CommandGuide/llvm-config.pod
new file mode 100644
index 00000000000..4e38dae3df6
--- /dev/null
+++ b/final/docs/CommandGuide/llvm-config.pod
@@ -0,0 +1,131 @@
+=pod
+
+=head1 NAME
+
+llvm-config - Print LLVM compilation options
+
+=head1 SYNOPSIS
+
+B<llvm-config> I<option> [I<components>...]
+
+=head1 DESCRIPTION
+
+B<llvm-config> makes it easier to build applications that use LLVM.  It can
+print the compiler flags, linker flags and object libraries needed to link
+against LLVM.
+
+=head1 EXAMPLES
+
+To link against the JIT:
+
+  g++ `llvm-config --cxxflags` -o HowToUseJIT.o -c HowToUseJIT.cpp
+  g++ `llvm-config --ldflags` -o HowToUseJIT HowToUseJIT.o \
+      `llvm-config --libs engine bcreader scalaropts`
+
+=head1 OPTIONS
+
+=over
+
+=item B<--version>
+
+Print the version number of LLVM.
+
+=item B<-help>
+
+Print a summary of B<llvm-config> arguments.
+
+=item B<--prefix>
+
+Print the installation prefix for LLVM.
+
+=item B<--src-root>
+
+Print the source root from which LLVM was built.
+
+=item B<--obj-root>
+
+Print the object root used to build LLVM.
+
+=item B<--bindir>
+
+Print the installation directory for LLVM binaries.
+
+=item B<--includedir>
+
+Print the installation directory for LLVM headers.
+
+=item B<--libdir>
+
+Print the installation directory for LLVM libraries.
+
+=item B<--cxxflags>
+
+Print the C++ compiler flags needed to use LLVM headers.
+
+=item B<--ldflags>
+
+Print the flags needed to link against LLVM libraries.
+
+=item B<--libs>
+
+Print all the libraries needed to link against the specified LLVM
+I<components>, including any dependencies.
+
+=item B<--libnames>
+
+Similar to B<--libs>, but prints the bare filenames of the libraries
+without B<-l> or pathnames.  Useful for linking against a not-yet-installed
+copy of LLVM.
+
+=item B<--libfiles>
+
+Similar to B<--libs>, but print the full path to each library file.  This is
+useful when creating makefile dependencies, to ensure that a tool is relinked if
+any library it uses changes.
+
+=item B<--components>
+
+Print all valid component names.
+
+=item B<--targets-built>
+
+Print the component names for all targets supported by this copy of LLVM.
+
+=item B<--build-mode>
+
+Print the build mode used when LLVM was built (e.g. Debug or Release)
+
+=back
+
+=head1 COMPONENTS
+
+To print a list of all available components, run B<llvm-config
+--components>.  In most cases, components correspond directly to LLVM
+libraries.  Useful "virtual" components include:
+
+=over
+
+=item B<all>
+
+Includes all LLVM libaries.  The default if no components are specified.
+
+=item B<backend>
+
+Includes either a native backend or the C backend.
+
+=item B<engine>
+
+Includes either a native JIT or the bitcode interpreter.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-config> succeeds, it will exit with 0.  Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/llvm-diff.pod b/final/docs/CommandGuide/llvm-diff.pod
new file mode 100644
index 00000000000..c8cfdb3be94
--- /dev/null
+++ b/final/docs/CommandGuide/llvm-diff.pod
@@ -0,0 +1,53 @@
+=pod
+
+=head1 NAME
+
+llvm-diff - LLVM structural 'diff'
+
+=head1 SYNOPSIS
+
+B<llvm-diff> [I<options>] I<module 1> I<module 2> [I<global name ...>]
+
+=head1 DESCRIPTION
+
+B<llvm-diff> compares the structure of two LLVM modules, primarily
+focusing on differences in function definitions.  Insignificant
+differences, such as changes in the ordering of globals or in the
+names of local values, are ignored.
+
+An input module will be interpreted as an assembly file if its name
+ends in '.ll';  otherwise it will be read in as a bitcode file.
+
+If a list of global names is given, just the values with those names
+are compared; otherwise, all global values are compared, and
+diagnostics are produced for globals which only appear in one module
+or the other.
+
+B<llvm-diff> compares two functions by comparing their basic blocks,
+beginning with the entry blocks.  If the terminators seem to match,
+then the corresponding successors are compared; otherwise they are
+ignored.  This algorithm is very sensitive to changes in control flow,
+which tend to stop any downstream changes from being detected.
+
+B<llvm-diff> is intended as a debugging tool for writers of LLVM
+passes and frontends.  It does not have a stable output format.
+
+=head1 EXIT STATUS
+
+If B<llvm-diff> finds no differences between the modules, it will exit
+with 0 and produce no output.  Otherwise it will exit with a non-zero
+value.
+
+=head1 BUGS
+
+Many important differences, like changes in linkage or function
+attributes, are not diagnosed.
+
+Changes in memory behavior (for example, coalescing loads) can cause
+massive detected differences in blocks.
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/llvm-dis.pod b/final/docs/CommandGuide/llvm-dis.pod
new file mode 100644
index 00000000000..5b2f4ef4e92
--- /dev/null
+++ b/final/docs/CommandGuide/llvm-dis.pod
@@ -0,0 +1,60 @@
+=pod
+
+=head1 NAME
+
+llvm-dis - LLVM disassembler
+
+=head1 SYNOPSIS
+
+B<llvm-dis> [I<options>] [I<filename>]
+
+=head1 DESCRIPTION
+
+The B<llvm-dis> command is the LLVM disassembler.  It takes an LLVM
+bitcode file and converts it into human-readable LLVM assembly language.
+
+If filename is omitted or specified as C<->, B<llvm-dis> reads its
+input from standard input.
+
+If the input is being read from standard input, then B<llvm-dis>
+will send its output to standard output by default.  Otherwise, the
+output will be written to a file named after the input file, with
+a C<.ll> suffix added (any existing C<.bc> suffix will first be
+removed).  You can override the choice of output file using the
+B<-o> option.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-f>
+
+Enable binary output on terminals.  Normally, B<llvm-dis> will refuse to
+write raw bitcode output if the output stream is a terminal. With this option,
+B<llvm-dis> will write raw bitcode regardless of the output device.
+
+=item B<-help>
+
+Print a summary of command line options.
+
+=item B<-o> F<filename>
+
+Specify the output file name.  If F<filename> is -, then the output is sent
+to standard output.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-dis> succeeds, it will exit with 0.  Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<llvm-as|llvm-as>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/llvm-extract.pod b/final/docs/CommandGuide/llvm-extract.pod
new file mode 100644
index 00000000000..d4baab73991
--- /dev/null
+++ b/final/docs/CommandGuide/llvm-extract.pod
@@ -0,0 +1,73 @@
+=pod
+
+=head1 NAME
+
+llvm-extract - extract a function from an LLVM module
+
+=head1 SYNOPSIS
+
+B<llvm-extract> [I<options>] B<--func> I<function-name> [I<filename>]
+
+=head1 DESCRIPTION
+
+The B<llvm-extract> command takes the name of a function and extracts it from
+the specified LLVM bitcode file.  It is primarily used as a debugging tool to
+reduce test cases from larger programs that are triggering a bug.
+
+In addition to extracting the bitcode of the specified function,
+B<llvm-extract> will also remove unreachable global variables, prototypes, and
+unused types.
+
+The B<llvm-extract> command reads its input from standard input if filename is
+omitted or if filename is -.  The output is always written to standard output,
+unless the B<-o> option is specified (see below).
+
+=head1 OPTIONS
+
+=over
+
+=item B<-f>
+
+Enable binary output on terminals.  Normally, B<llvm-extract> will refuse to
+write raw bitcode output if the output stream is a terminal. With this option,
+B<llvm-extract> will write raw bitcode regardless of the output device.
+
+=item B<--func> I<function-name>
+
+Extract the function named I<function-name> from the LLVM bitcode. May be
+specified multiple times to extract multiple functions at once.
+
+=item B<--glob> I<global-name>
+
+Extract the global variable named I<global-name> from the LLVM bitcode. May be
+specified multiple times to extract multiple global variables at once.
+
+=item B<-help>
+
+Print a summary of command line options.
+
+=item B<-o> I<filename>
+
+Specify the output filename.  If filename is "-" (the default), then
+B<llvm-extract> sends its output to standard output.
+
+=item B<-S>
+
+Write output in LLVM intermediate language (instead of bitcode).
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-extract> succeeds, it will exit with 0.  Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<bugpoint|bugpoint>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/llvm-ld.pod b/final/docs/CommandGuide/llvm-ld.pod
new file mode 100644
index 00000000000..536ab0fa43d
--- /dev/null
+++ b/final/docs/CommandGuide/llvm-ld.pod
@@ -0,0 +1,234 @@
+=pod
+
+=head1 NAME
+
+llvm-ld - LLVM linker
+
+=head1 SYNOPSIS
+
+B<llvm-ld> <options> <files>
+
+=head1 DESCRIPTION
+
+The B<llvm-ld> tool takes a set of LLVM bitcode files and links them
+together into a single LLVM bitcode file.  The output bitcode file can be
+another bitcode file or an executable bitcode program.  Using additional
+options, B<llvm-ld> is able to produce native code executables.
+
+The B<llvm-ld> tool is the main linker for LLVM. It is used to link together
+the output of LLVM front-end compilers and run "link time" optimizations (mostly
+the inter-procedural kind).
+
+The B<llvm-ld> tools attempts to mimic the interface provided by the default
+system linker so that it can act as a I<drop-in> replacement.
+
+=head2 Search Order
+
+When looking for objects specified on the command line, B<llvm-ld> will search 
+for the object first in the current directory and then in the directory 
+specified by the B<LLVM_LIB_SEARCH_PATH> environment variable.  If it cannot 
+find the object, it fails.
+
+When looking for a library specified with the B<-l> option, B<llvm-ld> first
+attempts to load a file with that name from the current directory.  If that
+fails, it looks for libI<library>.bc, libI<library>.a, or libI<library>.I<shared
+library extension>, in that order, in each directory added to the library search
+path with the B<-L> option.  These directories are searched in the order they
+are specified.  If the library cannot be located, then B<llvm-ld> looks in the
+directory specified by the B<LLVM_LIB_SEARCH_PATH> environment variable.  If it
+does not find a library there, it fails.
+
+The I<shared library extension> may be I<.so>, I<.dyld>, I<.dll>, or something
+different, depending upon the system.
+
+The B<-L> option is global.  It does not matter where it is specified in the
+list of command line arguments; the directory is simply added to the search path
+and is applied to all libraries, preceding or succeeding, in the command line.
+
+=head2 Link order
+
+All object and bitcode files are linked first in the order they were 
+specified on the command line.  All library files are linked next.  
+Some libraries may not be linked into the object program; see below.
+
+=head2 Library Linkage
+
+Object files and static bitcode objects are always linked into the output
+file.  Library archives (.a files) load only the objects within the archive
+that define symbols needed by the output file.  Hence, libraries should be
+listed after the object files and libraries which need them; otherwise, the
+library may not be linked in, and the dependent library will not have its
+undefined symbols defined.
+
+=head2 Native code generation
+
+The B<llvm-ld> program has limited support for native code generation, when
+using the B<-native> or B<-native-cbe> options. Native code generation is
+performed by converting the linked bitcode into native assembly (.s) or C code
+and running the system compiler (typically gcc) on the result.
+
+=head1 OPTIONS
+
+=head2 General Options
+
+=over 
+
+=item B<-help>
+
+Print a summary of command line options.
+
+=item B<-v>
+
+Specifies verbose mode. In this mode the linker will print additional
+information about the actions it takes, programs it executes, etc. 
+
+=item B<-stats>
+
+Print statistics.
+
+=item B<-time-passes>
+
+Record the amount of time needed for each pass and print it to standard
+error.
+
+=back 
+
+=head2 Input/Output Options
+
+=over
+
+=item B<-o> F<filename>
+
+This overrides the default output file and specifies the name of the file that
+should be generated by the linker. By default, B<llvm-ld> generates a file named
+F<a.out> for compatibility with B<ld>. The output will be written to
+F<filename>.
+
+=item B<-b> F<filename>
+
+This option can be used to override the output bitcode file name. By default, 
+the name of the bitcode output file is one more ".bc" suffix added to the name 
+specified by B<-o filename> option.
+
+=item B<-l>F<name>
+
+This option specifies the F<name> of a library to search when resolving symbols
+for the program. Only the base name should be specified as F<name>, without a
+F<lib> prefix or any suffix. 
+
+=item B<-L>F<Path>
+
+This option tells B<llvm-ld> to look in F<Path> to find any library subsequently
+specified with the B<-l> option. The paths will be searched in the order in
+which they are specified on the command line. If the library is still not found,
+a small set of system specific directories will also be searched. Note that
+libraries specified with the B<-l> option that occur I<before> any B<-L> options
+will not search the paths given by the B<-L> options following it.
+
+=item B<-link-as-library>
+
+Link the bitcode files together as a library, not an executable. In this mode,
+undefined symbols will be permitted.
+
+=item B<-r>
+
+An alias for -link-as-library.
+
+=item B<-native>
+
+Generate a native machine code executable.
+
+When generating native executables, B<llvm-ld> first checks for a bitcode
+version of the library and links it in, if necessary.  If the library is
+missing, B<llvm-ld> skips it.  Then, B<llvm-ld> links in the same
+libraries as native code.
+
+In this way, B<llvm-ld> should be able to link in optimized bitcode
+subsets of common libraries and then link in any part of the library that
+hasn't been converted to bitcode.
+
+=item B<-native-cbe>
+
+Generate a native machine code executable with the LLVM C backend.
+      
+This option is identical to the B<-native> option, but uses the
+C backend to generate code for the program instead of an LLVM native
+code generator.
+
+=back
+
+=head2 Optimization Options
+
+=over 
+
+=item B<-disable-inlining>
+
+Do not run the inlining pass. Functions will not be inlined into other
+functions.
+
+=item B<-disable-opt>
+
+Completely disable optimization.
+
+=item B<-disable-internalize>
+
+Do not mark all symbols as internal.
+
+=item B<-verify-each>
+
+Run the verification pass after each of the passes to verify intermediate
+results.
+
+=item B<-strip-all>
+
+Strip all debug and symbol information from the executable to make it smaller.
+
+=item B<-strip-debug>
+
+Strip all debug information from the executable to make it smaller.
+
+=item B<-s>
+
+An alias for B<-strip-all>.
+
+=item B<-S>
+
+An alias for B<-strip-debug>.
+
+=item B<-export-dynamic>
+
+An alias for B<-disable-internalize>
+
+=item B<-post-link-opt>F<Path>
+
+Run post-link optimization program. After linking is completed a bitcode file
+will be generated. It will be passed to the program specified by F<Path> as the
+first argument. The second argument to the program will be the name of a
+temporary file into which the program should place its optimized output. For
+example, the "no-op optimization" would be a simple shell script:
+
+    #!/bin/bash
+    cp $1 $2
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-ld> succeeds, it will exit with 0 return code.  If an error occurs,
+it will exit with a non-zero return code.
+
+=head1 ENVIRONMENT
+
+The C<LLVM_LIB_SEARCH_PATH> environment variable is used to find bitcode
+libraries. Any paths specified in this variable will be searched after the C<-L>
+options.
+
+=head1 SEE ALSO
+
+L<llvm-link|llvm-link>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/llvm-link.pod b/final/docs/CommandGuide/llvm-link.pod
new file mode 100644
index 00000000000..8d06cc9d9d9
--- /dev/null
+++ b/final/docs/CommandGuide/llvm-link.pod
@@ -0,0 +1,79 @@
+=pod
+
+=head1 NAME
+
+llvm-link - LLVM linker
+
+=head1 SYNOPSIS
+
+B<llvm-link> [I<options>] I<filename ...>
+
+=head1 DESCRIPTION
+
+B<llvm-link> takes several LLVM bitcode files and links them together into a
+single LLVM bitcode file.  It writes the output file to standard output, unless
+the B<-o> option is used to specify a filename.
+
+B<llvm-link> attempts to load the input files from the current directory.  If
+that fails, it looks for each file in each of the directories specified by the
+B<-L> options on the command line.  The library search paths are global; each
+one is searched for every input file if necessary.  The directories are searched
+in the order they were specified on the command line.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-L> F<directory>
+
+Add the specified F<directory> to the library search path.  When looking for
+libraries, B<llvm-link> will look in path name for libraries.  This option can be
+specified multiple times; B<llvm-link> will search inside these directories in
+the order in which they were specified on the command line.
+
+=item B<-f>
+
+Enable binary output on terminals.  Normally, B<llvm-link> will refuse to
+write raw bitcode output if the output stream is a terminal. With this option,
+B<llvm-link> will write raw bitcode regardless of the output device.
+
+=item B<-o> F<filename>
+
+Specify the output file name.  If F<filename> is C<->, then B<llvm-link> will
+write its output to standard output.
+
+=item B<-S>
+
+Write output in LLVM intermediate language (instead of bitcode).
+
+=item B<-d>
+
+If specified, B<llvm-link> prints a human-readable version of the output
+bitcode file to standard error.
+
+=item B<-help>
+
+Print a summary of command line options.
+
+=item B<-v>
+
+Verbose mode.  Print information about what B<llvm-link> is doing.  This
+typically includes a message for each bitcode file linked in and for each
+library found.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-link> succeeds, it will exit with 0.  Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<gccld|gccld>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/llvm-nm.pod b/final/docs/CommandGuide/llvm-nm.pod
new file mode 100644
index 00000000000..a580d3f5ca7
--- /dev/null
+++ b/final/docs/CommandGuide/llvm-nm.pod
@@ -0,0 +1,122 @@
+=pod
+
+=head1 NAME
+
+llvm-nm - list LLVM bitcode file's symbol table
+
+=head1 SYNOPSIS
+
+B<llvm-nm> [I<options>] [I<filenames...>]
+
+=head1 DESCRIPTION
+
+The B<llvm-nm> utility lists the names of symbols from the LLVM bitcode files,
+or B<ar> archives containing LLVM bitcode files, named on the command line.
+Each symbol is listed along with some simple information about its provenance.
+If no file name is specified, or I<-> is used as a file name, B<llvm-nm> will
+process a bitcode file on its standard input stream.
+
+B<llvm-nm>'s default output format is the traditional BSD B<nm> output format.
+Each such output record consists of an (optional) 8-digit hexadecimal address,
+followed by a type code character, followed by a name, for each symbol. One
+record is printed per line; fields are separated by spaces. When the address is
+omitted, it is replaced by 8 spaces.
+
+Type code characters currently supported, and their meanings, are as follows:
+
+=over
+
+=item U
+
+Named object is referenced but undefined in this bitcode file
+
+=item C
+
+Common (multiple definitions link together into one def)
+
+=item W
+
+Weak reference (multiple definitions link together into zero or one definitions)
+
+=item t
+
+Local function (text) object
+
+=item T
+
+Global function (text) object
+
+=item d
+
+Local data object
+
+=item D
+
+Global data object
+
+=item ?
+
+Something unrecognizable
+
+=back
+
+Because LLVM bitcode files typically contain objects that are not considered to
+have addresses until they are linked into an executable image or dynamically
+compiled "just-in-time", B<llvm-nm> does not print an address for any symbol,
+even symbols which are defined in the bitcode file.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-P>
+
+Use POSIX.2 output format. Alias for B<--format=posix>.
+
+=item B<-B>    (default)
+
+Use BSD output format. Alias for B<--format=bsd>.
+
+=item B<-help>
+
+Print a summary of command-line options and their meanings.
+
+=item B<--defined-only>
+
+Print only symbols defined in this bitcode file (as opposed to
+symbols which may be referenced by objects in this file, but not
+defined in this file.)
+
+=item B<--extern-only>, B<-g>
+
+Print only symbols whose definitions are external; that is, accessible
+from other bitcode files.
+
+=item B<--undefined-only>, B<-u>
+
+Print only symbols referenced but not defined in this bitcode file.
+
+=item B<--format=>I<fmt>, B<-f>
+
+Select an output format; I<fmt> may be I<sysv>, I<posix>, or I<bsd>. The
+default is I<bsd>.
+
+=back
+
+=head1 BUGS
+
+B<llvm-nm> cannot demangle C++ mangled names, like GNU B<nm> can.
+
+=head1 EXIT STATUS
+
+B<llvm-nm> exits with an exit code of zero.
+
+=head1 SEE ALSO
+
+L<llvm-dis|llvm-dis>, ar(1), nm(1)
+
+=head1 AUTHOR
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/llvm-prof.pod b/final/docs/CommandGuide/llvm-prof.pod
new file mode 100644
index 00000000000..9541b05dcaf
--- /dev/null
+++ b/final/docs/CommandGuide/llvm-prof.pod
@@ -0,0 +1,57 @@
+=pod
+
+=head1 NAME
+
+llvm-prof - print execution profile of LLVM program
+
+=head1 SYNOPSIS
+
+B<llvm-prof> [I<options>] [I<bitcode file>] [I<llvmprof.out>]
+
+=head1 DESCRIPTION
+
+The B<llvm-prof> tool reads in an F<llvmprof.out> file (which can
+optionally use a specific file with the third program argument), a bitcode file
+for the program, and produces a human readable report, suitable for determining
+where the program hotspots are.
+
+This program is often used in conjunction with the F<utils/profile.pl>
+script.  This script automatically instruments a program, runs it with the JIT,
+then runs B<llvm-prof> to format a report.  To get more information about
+F<utils/profile.pl>, execute it with the B<-help> option.
+
+=head1 OPTIONS
+
+=over
+
+=item B<--annotated-llvm> or B<-A>
+
+In addition to the normal report printed, print out the code for the
+program, annotated with execution frequency information. This can be
+particularly useful when trying to visualize how frequently basic blocks
+are executed.  This is most useful with basic block profiling
+information or better.
+
+=item B<--print-all-code>
+
+Using this option enables the B<--annotated-llvm> option, but it
+prints the entire module, instead of just the most commonly executed
+functions.
+
+=item B<--time-passes>
+
+Record the amount of time needed for each pass and print it to standard
+error.
+
+=back
+
+=head1 EXIT STATUS
+
+B<llvm-prof> returns 1 if it cannot load the bitcode file or the profile
+information. Otherwise, it exits with zero.
+
+=head1 AUTHOR
+
+B<llvm-prof> is maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/llvm-ranlib.pod b/final/docs/CommandGuide/llvm-ranlib.pod
new file mode 100644
index 00000000000..53cd34bbb5c
--- /dev/null
+++ b/final/docs/CommandGuide/llvm-ranlib.pod
@@ -0,0 +1,52 @@
+=pod
+
+=head1 NAME
+
+llvm-ranlib - Generate index for LLVM archive
+
+=head1 SYNOPSIS
+
+B<llvm-ranlib> [--version] [-help] <archive-file>
+
+=head1 DESCRIPTION
+
+The B<llvm-ranlib> command is similar to the common Unix utility, C<ranlib>. It
+adds or updates the symbol table in an LLVM archive file. Note that using the
+B<llvm-ar> modifier F<s> is usually more efficient than running B<llvm-ranlib>
+which is only provided only for completness and compatibility. Unlike other 
+implementations of C<ranlib>, B<llvm-ranlib> indexes LLVM bitcode files, not
+native object modules. You can list the contents of the symbol table with the
+C<llvm-nm -s> command.
+
+=head1 OPTIONS
+
+=over
+
+=item F<archive-file>
+
+Specifies the archive-file to which the symbol table is added or updated.
+
+=item F<--version>
+
+Print the version of B<llvm-ranlib> and exit without building a symbol table.
+
+=item F<-help>
+
+Print usage help for B<llvm-ranlib> and exit without building a symbol table.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-ranlib> succeeds, it will exit with 0.  If an error occurs, a non-zero
+exit code will be returned.
+
+=head1 SEE ALSO
+
+L<llvm-ar|llvm-ar>, ranlib(1)
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/llvmc.pod b/final/docs/CommandGuide/llvmc.pod
new file mode 100644
index 00000000000..d237ca4c14f
--- /dev/null
+++ b/final/docs/CommandGuide/llvmc.pod
@@ -0,0 +1,190 @@
+=pod
+
+=head1 NAME
+
+llvmc - The LLVM Compiler Driver (WIP)
+
+=head1 SYNOPSIS
+
+B<llvmc> [I<options>] I<filenames...>
+
+=head1 DESCRIPTION
+
+B<llvmc> is a configurable driver for invoking other LLVM (and non-LLVM) tools
+in order to compile, optimize and link software for multiple languages. For
+those familiar with FSF's B<gcc> tool, it is very similar.  Please note that
+B<llvmc> is considered an experimental tool.
+
+=head1 OPTIONS
+
+=head2 Built-in Options
+
+LLVMC has some built-in options that can't be overridden in the
+configuration libraries.
+
+=over
+
+=item B<-o> I<filename>
+
+Output file name.
+
+=item B<-x> I<language>
+
+Specify the language of the following input files until the next B<-x>
+option.
+
+=item B<-load> I<plugin_name>
+
+Load the specified plugin DLL. Example:
+S<-load $LLVM_DIR/Release/lib/LLVMCSimple.so>.
+
+=item B<-v> or B<--verbose>
+
+Enable verbose mode, i.e. print out all executed commands.
+
+=item B<--check-graph>
+
+Check the compilation for common errors like mismatched output/input language
+names, multiple default edges and cycles. Because of plugins, these checks can't
+be performed at compile-time. Exit with code zero if no errors were found, and
+return the number of found errors otherwise. Hidden option, useful for debugging
+LLVMC plugins.
+
+=item B<--view-graph>
+
+Show a graphical representation of the compilation graph and exit. Requires that
+you have I<dot> and I<gv> programs installed. Hidden option, useful for
+debugging LLVMC plugins.
+
+=item B<--write-graph>
+
+Write a I<compilation-graph.dot> file in the current directory with the
+compilation graph description in Graphviz format (identical to the file used by
+the B<--view-graph> option). The B<-o> option can be used to set the output file
+name. Hidden option, useful for debugging LLVMC plugins.
+
+=item B<--save-temps>
+
+Write temporary files to the current directory and do not delete them on
+exit. This option can also take an argument: the I<--save-temps=obj> switch will
+write files into the directory specified with the I<-o> option. The
+I<--save-temps=cwd> and I<--save-temps> switches are both synonyms for the
+default behaviour.
+
+=item B<--temp-dir> I<directory>
+
+Store temporary files in the given directory. This directory is deleted on exit
+unless I<--save-temps> is specified. If I<--save-temps=obj> is also specified,
+I<--temp-dir> is given the precedence.
+
+=item B<-help>
+
+Print a summary of command-line options and exit.
+
+=item B<-help-hidden>
+
+Print a summary of command-line options and exit. Print help even for
+options intended for developers.
+
+=item B<--version>
+
+Print version information and exit.
+
+=item B<@>I<file>
+
+Read command-line options from I<file>. The options read are inserted
+in place of the original @I<file> option. If I<file> does not exist, or
+cannot be read, then the option will be treated literally, and not
+removed.
+
+Options in I<file> are separated by whitespace. A whitespace character
+may be included in an option by surrounding the entire option in
+either single or double quotes. Any character (including a backslash)
+may be included by prefixing the character to be included with a
+backslash. The file may itself contain additional @I<file> options;
+any such options will be processed recursively.
+
+
+=back
+
+
+=head2 Control Options
+
+By default, LLVMC is built with some standard configuration libraries
+that define the following options:
+
+=over
+
+=item B<-clang>
+
+Use Clang instead of llvm-gcc.
+
+=item B<-opt>
+
+Enable optimization passes with B<opt>. To pass options to the B<opt> program
+use the B<-Wo,> option.
+
+=item B<-I> I<directory>
+
+Add a directory to the header file search path.
+
+=item B<-L> I<directory>
+
+Add I<directory> to the library search path.
+
+=item B<-F> I<directory>
+
+Add I<directory> to the framework search path.
+
+=item B<-l>I<name>
+
+Link in the library libI<name>.[bc | a | so].  This library should
+be a bitcode library.
+
+=item B<-framework> I<name>
+
+Link in the library libI<name>.[bc | a | so].  This library should
+be a bitcode library.
+
+=item B<-emit-llvm>
+
+Output LLVM bitcode (with B<-c>) or assembly (with B<-S>) instead of native
+object (or assembly).  If B<-emit-llvm> is given without either B<-c> or B<-S>
+it has no effect.
+
+=item B<-Wa>
+
+Pass options to assembler.
+
+=item B<-Wl>
+
+Pass options to linker.
+
+=item B<-Wo>
+
+Pass options to opt.
+
+=item B<-Wllc>
+
+Pass options to llc (code generator).
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvmc> succeeds, it will exit with code 0.  Otherwise, if an
+error occurs, it will exit with a non-zero value. If one of the
+compilation tools returns a non-zero status, pending actions will be
+discarded and B<llvmc> will return the same result code as the failing
+compilation tool.
+
+=head1 SEE ALSO
+
+L<llvm-gcc|llvmgcc>, L<llvm-g++|llvmgxx>, L<llvm-as|llvm-as>,
+L<llvm-dis|llvm-dis>, L<llc|llc>, L<llvm-link|llvm-link>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/llvmgcc.pod b/final/docs/CommandGuide/llvmgcc.pod
new file mode 100644
index 00000000000..9892ca71861
--- /dev/null
+++ b/final/docs/CommandGuide/llvmgcc.pod
@@ -0,0 +1,76 @@
+=pod
+
+=head1 NAME
+
+llvm-gcc - LLVM C front-end
+
+=head1 SYNOPSIS
+
+B<llvm-gcc> [I<options>] I<filename>
+
+=head1 DESCRIPTION
+
+The B<llvm-gcc> command is the LLVM C front end.  It is a modified
+version of gcc that compiles C/ObjC programs into native objects, LLVM
+bitcode or LLVM assembly language, depending upon the options.
+
+By default, B<llvm-gcc> compiles to native objects just like GCC does. If the
+B<-emit-llvm> and B<-c> options are given then it will generate LLVM bitcode files
+instead. If B<-emit-llvm> and B<-S> are given, then it will generate LLVM
+assembly.
+
+Being derived from the GNU Compiler Collection, B<llvm-gcc> has many
+of gcc's features and accepts most of gcc's options.  It handles a
+number of gcc's extensions to the C programming language.  See the gcc
+documentation for details.
+
+=head1 OPTIONS
+
+=over
+
+=item B<--help>
+
+Print a summary of command line options.
+
+=item B<-o> I<filename>
+
+Specify the output file to be I<filename>.
+
+=item B<-I> I<directory>
+
+Add a directory to the header file search path.  This option can be
+repeated.
+
+=item B<-L> I<directory>
+
+Add I<directory> to the library search path.  This option can be
+repeated.
+
+=item B<-l>I<name>
+
+Link in the library libI<name>.[bc | a | so].  This library should
+be a bitcode library.
+
+=item B<-emit-llvm>
+
+Make the output be LLVM bitcode (with B<-c>) or assembly (with B<-s>) instead
+of native object (or assembly).  If B<-emit-llvm> is given without either B<-c>
+or B<-S> it has no effect.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-gcc> succeeds, it will exit with 0.  Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<llvm-g++|llvmgxx>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
+
diff --git a/final/docs/CommandGuide/llvmgxx.pod b/final/docs/CommandGuide/llvmgxx.pod
new file mode 100644
index 00000000000..64b670ebe06
--- /dev/null
+++ b/final/docs/CommandGuide/llvmgxx.pod
@@ -0,0 +1,85 @@
+=pod
+
+=head1 NAME
+
+llvm-g++ - LLVM C++ front-end
+
+=head1 SYNOPSIS
+
+B<llvm-g++> [I<options>] I<filename>
+
+=head1 DESCRIPTION
+
+The B<llvm-g++> command is the LLVM C++ front end.  It is a modified
+version of g++ that compiles C++/ObjC++ programs into native code, 
+LLVM bitcode or assembly language, depending upon the options.
+
+By default, B<llvm-g++> compiles to native objects just like GCC does. If the
+B<-emit-llvm> option is given then it will generate LLVM bitcode files instead.
+If B<-S> (assembly) is also given, then it will generate LLVM assembly. 
+
+Being derived from the GNU Compiler Collection, B<llvm-g++> has many
+of g++'s features and accepts most of g++'s options.  It handles a
+number of g++'s extensions to the C++ programming language.
+
+=head1 OPTIONS
+
+=over
+
+=item B<--help>
+
+Print a summary of command line options.
+
+=item B<-S>
+
+Do not generate an LLVM bitcode file.  Rather, compile the source
+file into an LLVM assembly language file.
+
+=item B<-c>
+
+Do not generate a linked executable.  Rather, compile the source
+file into an LLVM bitcode file.  This bitcode file can then be
+linked with other bitcode files later on to generate a full LLVM
+executable.
+
+=item B<-o> I<filename>
+
+Specify the output file to be I<filename>.
+
+=item B<-I> I<directory>
+
+Add a directory to the header file search path.  This option can be
+repeated.
+
+=item B<-L> I<directory>
+
+Add I<directory> to the library search path.  This option can be
+repeated.
+
+=item B<-l>I<name>
+
+Link in the library libI<name>.[bc | a | so].  This library should
+be a bitcode library.
+
+=item B<-emit-llvm>
+
+Make the output be LLVM bitcode (or assembly) instead of native object (or
+assembly).
+
+=back
+
+=head1 EXIT STATUS
+
+If B<llvm-g++> succeeds, it will exit with 0.  Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 SEE ALSO
+
+L<llvm-gcc|llvmgcc>
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
+
diff --git a/final/docs/CommandGuide/manpage.css b/final/docs/CommandGuide/manpage.css
new file mode 100644
index 00000000000..c922564dc3c
--- /dev/null
+++ b/final/docs/CommandGuide/manpage.css
@@ -0,0 +1,256 @@
+/* Based on http://www.perldoc.com/css/perldoc.css */
+
+@import url("../llvm.css");
+
+body { font-family: Arial,Helvetica; }
+
+blockquote { margin: 10pt;  }
+
+h1, a { color: #336699; }
+
+
+/*** Top menu style ****/
+.mmenuon { 
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #ff6600; font-size: 10pt;
+}
+.mmenuoff { 
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #ffffff; font-size: 10pt;
+}	  
+.cpyright {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #ffffff; font-size: xx-small;
+}
+.cpyrightText {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #ffffff; font-size: xx-small;
+}
+.sections { 
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 11pt;
+}	 
+.dsections { 
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 12pt;
+}	
+.slink { 
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
+ color: #000000; font-size: 9pt;
+}	 
+
+.slink2 { font-family: Arial,Helvetica; text-decoration: none; color: #336699; }	 
+
+.maintitle { 
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 18pt;
+}	 
+.dblArrow {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: small;
+}
+.menuSec {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: small;
+}
+
+.newstext {
+ font-family: Arial,Helvetica; font-size: small;
+}
+
+.linkmenu {
+ font-family: Arial,Helvetica; color: #000000; font-weight: bold;
+ text-decoration: none;
+}
+
+P {
+ font-family: Arial,Helvetica;
+}
+
+PRE {
+    font-size: 10pt;
+}
+.quote { 
+ font-family: Times; text-decoration: none;
+ color: #000000; font-size: 9pt; font-style: italic;
+}	
+.smstd { font-family: Arial,Helvetica; color: #000000; font-size: x-small; } 
+.std { font-family: Arial,Helvetica; color: #000000; } 
+.meerkatTitle { 
+ font-family: sans-serif; font-size: x-small;  color: black;    }
+
+.meerkatDescription { font-family: sans-serif; font-size: 10pt; color: black }
+.meerkatCategory { 
+ font-family: sans-serif; font-size: 9pt; font-weight: bold; font-style: italic; 
+ color: brown; }
+.meerkatChannel { 
+ font-family: sans-serif; font-size: 9pt; font-style: italic; color: brown; }
+.meerkatDate { font-family: sans-serif; font-size: xx-small; color: #336699; }
+
+.tocTitle {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #333333; font-size: 10pt;
+}
+
+.toc-item {
+ font-family: Arial,Helvetica; font-weight: bold; 
+ color: #336699; font-size: 10pt; text-decoration: underline;
+}
+
+.perlVersion {
+ font-family: Arial,Helvetica; font-weight: bold; 
+ color: #336699; font-size: 10pt; text-decoration: none;
+}
+
+.podTitle {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #000000;
+}
+
+.docTitle {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #000000; font-size: 10pt;
+}
+.dotDot {
+ font-family: Arial,Helvetica; font-weight: bold; 
+ color: #000000; font-size: 9pt;
+}
+
+.docSec {
+ font-family: Arial,Helvetica; font-weight: normal; 
+ color: #333333; font-size: 9pt;
+}
+.docVersion {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 10pt;
+}
+
+.docSecs-on {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
+ color: #ff0000; font-size: 10pt;
+}
+.docSecs-off {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
+ color: #333333; font-size: 10pt;
+}
+
+h2 {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: medium;
+}
+h1 {
+ font-family: Verdana,Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: large;
+}
+
+DL {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: none;
+ color: #333333; font-size: 10pt;
+}
+
+UL > LI > A {
+ font-family: Arial,Helvetica; font-weight: bold;
+ color: #336699; font-size: 10pt;
+}
+
+.moduleInfo {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #333333; font-size: 11pt;
+}
+
+.moduleInfoSec {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none;
+ color: #336699; font-size: 10pt;
+}
+
+.moduleInfoVal {
+ font-family: Arial,Helvetica; font-weight: normal; text-decoration: underline;
+ color: #000000; font-size: 10pt;
+}
+
+.cpanNavTitle {
+ font-family: Arial,Helvetica; font-weight: bold; 
+ color: #ffffff; font-size: 10pt;
+}
+.cpanNavLetter {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none; 
+ color: #333333; font-size: 9pt;
+}
+.cpanCat {
+ font-family: Arial,Helvetica; font-weight: bold; text-decoration: none; 
+ color: #336699; font-size: 9pt;
+}
+
+.bttndrkblue-bkgd-top {
+	background-color: #225688;
+	background-image: url(/global/mvc_objects/images/bttndrkblue_bgtop.gif);
+}
+.bttndrkblue-bkgd-left {
+	background-color: #225688;
+	background-image: url(/global/mvc_objects/images/bttndrkblue_bgleft.gif);
+}
+.bttndrkblue-bkgd {
+	padding-top: 0px;
+	padding-bottom: 0px;
+	margin-bottom: 0px;
+	margin-top: 0px;
+	background-repeat: no-repeat;
+	background-color: #225688;
+	background-image: url(/global/mvc_objects/images/bttndrkblue_bgmiddle.gif);
+	vertical-align: top;
+}
+.bttndrkblue-bkgd-right {
+	background-color: #225688;
+	background-image: url(/global/mvc_objects/images/bttndrkblue_bgright.gif);
+}
+.bttndrkblue-bkgd-bottom {
+	background-color: #225688;
+	background-image: url(/global/mvc_objects/images/bttndrkblue_bgbottom.gif);
+}
+.bttndrkblue-text a {
+	color: #ffffff;
+	text-decoration: none;
+}
+a.bttndrkblue-text:hover {
+	color: #ffDD3C;
+	text-decoration: none;
+}
+.bg-ltblue {
+	background-color: #f0f5fa;
+} 
+
+.border-left-b {
+	background: #f0f5fa url(/i/corner-leftline.gif) repeat-y;
+} 
+
+.border-right-b {
+	background: #f0f5fa url(/i/corner-rightline.gif) repeat-y;
+} 
+
+.border-top-b {
+	background: #f0f5fa url(/i/corner-topline.gif) repeat-x;
+} 
+
+.border-bottom-b {
+	background: #f0f5fa url(/i/corner-botline.gif) repeat-x;
+} 
+
+.border-right-w {
+	background: #ffffff url(/i/corner-rightline.gif) repeat-y;
+} 
+
+.border-top-w {
+	background: #ffffff url(/i/corner-topline.gif) repeat-x;
+} 
+
+.border-bottom-w {
+	background: #ffffff url(/i/corner-botline.gif) repeat-x;
+} 
+
+.bg-white {
+	background-color: #ffffff;
+} 
+
+.border-left-w {
+	background: #ffffff url(/i/corner-leftline.gif) repeat-y;
+} 
diff --git a/final/docs/CommandGuide/opt.pod b/final/docs/CommandGuide/opt.pod
new file mode 100644
index 00000000000..d1d1db5ef67
--- /dev/null
+++ b/final/docs/CommandGuide/opt.pod
@@ -0,0 +1,143 @@
+=pod
+
+=head1 NAME
+
+opt - LLVM optimizer
+
+=head1 SYNOPSIS
+
+B<opt> [I<options>] [I<filename>]
+
+=head1 DESCRIPTION
+
+The B<opt> command is the modular LLVM optimizer and analyzer.  It takes LLVM 
+source files as input, runs the specified optimizations or analyses on it, and then
+outputs the optimized file or the analysis results.  The function of 
+B<opt> depends on whether the B<-analyze> option is given. 
+
+When B<-analyze> is specified, B<opt> performs various analyses of the input
+source.  It will usually print the results on standard output, but in a few
+cases, it will print output to standard error or generate a file with the
+analysis output, which is usually done when the output is meant for another
+program.
+
+While B<-analyze> is I<not> given, B<opt> attempts to produce an optimized 
+output file.  The optimizations available via B<opt> depend upon what 
+libraries were linked into it as well as any additional libraries that have 
+been loaded with the B<-load> option.  Use the B<-help> option to determine 
+what optimizations you can use.
+
+If I<filename> is omitted from the command line or is I<->, B<opt> reads its
+input from standard input. Inputs can be in either the LLVM assembly language
+format (.ll) or the LLVM bitcode format (.bc).
+
+If an output filename is not specified with the B<-o> option, B<opt>
+writes its output to the standard output.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-f>
+
+Enable binary output on terminals.  Normally, B<opt> will refuse to
+write raw bitcode output if the output stream is a terminal. With this option,
+B<opt> will write raw bitcode regardless of the output device.
+
+=item B<-help>
+
+Print a summary of command line options. 
+
+=item B<-o> I<filename>
+
+Specify the output filename.
+
+=item B<-S>
+
+Write output in LLVM intermediate language (instead of bitcode).
+
+=item B<-{passname}>
+
+B<opt> provides the ability to run any of LLVM's optimization or analysis passes
+in any order. The B<-help> option lists all the passes available. The order in
+which the options occur on the command line are the order in which they are
+executed (within pass constraints). 
+
+=item B<-std-compile-opts>
+
+This is short hand for a standard list of I<compile time optimization> passes.
+This is typically used to optimize the output from the llvm-gcc front end. It
+might be useful for other front end compilers as well. To discover the full set
+of options available, use the following command:
+
+   llvm-as < /dev/null | opt -std-compile-opts -disable-output -debug-pass=Arguments
+
+=item B<-disable-inlining>
+
+This option is only meaningful when B<-std-compile-opts> is given. It simply
+removes the inlining pass from the standard list.
+
+=item B<-disable-opt>
+
+This option is only meaningful when B<-std-compile-opts> is given. It disables
+most, but not all, of the B<-std-compile-opts>. The ones that remain are
+B<-verify>, B<-lower-setjmp>, and B<-funcresolve>.
+
+=item B<-strip-debug>
+
+This option causes opt to strip debug information from the module before 
+applying other optimizations. It is essentially the same as B<-strip> but it
+ensures that stripping of debug information is done first.
+
+=item B<-verify-each>
+
+This option causes opt to add a verify pass after every pass otherwise specified
+on the command line (including B<-verify>).  This is useful for cases where it 
+is suspected that a pass is creating an invalid module but it is not clear which
+pass is doing it. The combination of B<-std-compile-opts> and B<-verify-each>
+can quickly track down this kind of problem.
+
+=item B<-profile-info-file> I<filename>
+
+Specify the name of the file loaded by the -profile-loader option.
+
+=item B<-stats>
+
+Print statistics.
+
+=item B<-time-passes>
+
+Record the amount of time needed for each pass and print it to standard
+error.
+
+=item B<-debug>
+
+If this is a debug build, this option will enable debug printouts
+from passes which use the I<DEBUG()> macro.  See the B<LLVM Programmer's
+Manual>, section I<#DEBUG> for more information.
+
+=item B<-load>=I<plugin>
+
+Load the dynamic object I<plugin>.  This object should register new optimization
+or analysis passes. Once loaded, the object will add new command line options to
+enable various optimizations or analyses.  To see the new complete list of 
+optimizations, use the B<-help> and B<-load> options together. For example:
+
+   opt -load=plugin.so -help
+
+=item B<-p>
+
+Print module after each transformation.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<opt> succeeds, it will exit with 0.  Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 AUTHORS
+
+Maintained by the LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandGuide/tblgen.pod b/final/docs/CommandGuide/tblgen.pod
new file mode 100644
index 00000000000..d127492a91b
--- /dev/null
+++ b/final/docs/CommandGuide/tblgen.pod
@@ -0,0 +1,115 @@
+
+=pod
+
+=head1 NAME
+
+tblgen - Target Description To C++ Code Generator
+
+=head1 SYNOPSIS
+
+B<tblgen> [I<options>] [I<filename>]
+
+=head1 DESCRIPTION
+
+B<tblgen> translates from target description (.td) files into C++ code that can
+be included in the definition of an LLVM target library. Most users of LLVM will
+not need to use this program. It is only for assisting with writing an LLVM
+target backend.
+
+The input and output of B<tblgen> is beyond the scope of this short
+introduction. Please see the I<CodeGeneration> page in the LLVM documentation.
+
+The F<filename> argument specifies the name of a Target Description (.td) file
+to read as input.
+
+=head1 OPTIONS
+
+=over
+
+=item B<-help>
+
+Print a summary of command line options.
+
+=item B<-o> F<filename>
+
+Specify the output file name.  If F<filename> is C<->, then B<tblgen>
+sends its output to standard output.
+
+=item B<-I> F<directory>
+
+Specify where to find other target description files for inclusion. The
+F<directory> value should be a full or partial path to a directory that contains
+target description files.
+
+=item B<-asmwriternum> F<N>
+
+Make -gen-asm-writer emit assembly writer number F<N>.
+
+=item B<-class> F<class Name>
+
+Print the enumeration list for this class.
+
+=item B<-print-records>
+
+Print all records to standard output (default).
+
+=item B<-print-enums>
+
+Print enumeration values for a class
+
+=item B<-gen-emitter>
+
+Generate machine code emitter.
+
+=item B<-gen-register-enums>
+
+Generate the enumeration values for all registers.
+
+=item B<-gen-register-desc>
+
+Generate a register info description for each register.
+
+=item B<-gen-register-desc-header>
+
+Generate a register info description header for each register.
+
+=item B<-gen-instr-enums>
+
+Generate enumeration values for instructions.
+
+=item B<-gen-instr-desc>
+
+Generate instruction descriptions.
+
+=item B<-gen-asm-writer>
+
+Generate the assembly writer.
+
+=item B<-gen-dag-isel>
+
+Generate a DAG (Directed Acycle Graph) instruction selector.
+
+=item B<-gen-subtarget>
+
+Generate subtarget enumerations.
+
+=item B<-gen-intrinsic>
+
+Generate intrinsic information.
+
+=item B<-version>
+
+Show the version number of this program.
+
+=back
+
+=head1 EXIT STATUS
+
+If B<tblgen> succeeds, it will exit with 0.  Otherwise, if an error
+occurs, it will exit with a non-zero value.
+
+=head1 AUTHORS
+
+Maintained by The LLVM Team (L<http://llvm.org>).
+
+=cut
diff --git a/final/docs/CommandLine.html b/final/docs/CommandLine.html
new file mode 100644
index 00000000000..47ab2cc074d
--- /dev/null
+++ b/final/docs/CommandLine.html
@@ -0,0 +1,1979 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>CommandLine 2.0 Library Manual</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+  CommandLine 2.0 Library Manual
+</div>
+
+<ol>
+  <li><a href="#introduction">Introduction</a></li>
+
+  <li><a href="#quickstart">Quick Start Guide</a>
+    <ol>
+      <li><a href="#bool">Boolean Arguments</a></li>
+      <li><a href="#alias">Argument Aliases</a></li>
+      <li><a href="#onealternative">Selecting an alternative from a
+                                    set of possibilities</a></li>
+      <li><a href="#namedalternatives">Named alternatives</a></li>
+      <li><a href="#list">Parsing a list of options</a></li>
+      <li><a href="#bits">Collecting options as a set of flags</a></li>
+      <li><a href="#description">Adding freeform text to help output</a></li>
+    </ol></li>
+
+  <li><a href="#referenceguide">Reference Guide</a>
+    <ol>
+      <li><a href="#positional">Positional Arguments</a>
+        <ul>
+        <li><a href="#--">Specifying positional options with hyphens</a></li>
+        <li><a href="#getPosition">Determining absolute position with
+          getPosition</a></li>
+        <li><a href="#cl::ConsumeAfter">The <tt>cl::ConsumeAfter</tt>
+             modifier</a></li>
+        </ul></li>
+
+      <li><a href="#storage">Internal vs External Storage</a></li>
+
+      <li><a href="#attributes">Option Attributes</a></li>
+
+      <li><a href="#modifiers">Option Modifiers</a>
+        <ul>
+        <li><a href="#hiding">Hiding an option from <tt>-help</tt>
+            output</a></li>
+        <li><a href="#numoccurrences">Controlling the number of occurrences
+                                     required and allowed</a></li>
+        <li><a href="#valrequired">Controlling whether or not a value must be
+                                   specified</a></li>
+        <li><a href="#formatting">Controlling other formatting options</a></li>
+        <li><a href="#misc">Miscellaneous option modifiers</a></li>
+        <li><a href="#response">Response files</a></li>
+        </ul></li>
+
+      <li><a href="#toplevel">Top-Level Classes and Functions</a>
+        <ul>
+        <li><a href="#cl::ParseCommandLineOptions">The
+            <tt>cl::ParseCommandLineOptions</tt> function</a></li>
+        <li><a href="#cl::ParseEnvironmentOptions">The
+            <tt>cl::ParseEnvironmentOptions</tt> function</a></li>
+        <li><a href="#cl::SetVersionPrinter">The <tt>cl::SetVersionPrinter</tt>
+          function</a></li>
+        <li><a href="#cl::opt">The <tt>cl::opt</tt> class</a></li>
+        <li><a href="#cl::list">The <tt>cl::list</tt> class</a></li>
+        <li><a href="#cl::bits">The <tt>cl::bits</tt> class</a></li>
+        <li><a href="#cl::alias">The <tt>cl::alias</tt> class</a></li>
+        <li><a href="#cl::extrahelp">The <tt>cl::extrahelp</tt> class</a></li>
+        </ul></li>
+
+      <li><a href="#builtinparsers">Builtin parsers</a>
+        <ul>
+        <li><a href="#genericparser">The Generic <tt>parser&lt;t&gt;</tt>
+            parser</a></li>
+        <li><a href="#boolparser">The <tt>parser&lt;bool&gt;</tt>
+            specialization</a></li>
+        <li><a href="#boolOrDefaultparser">The <tt>parser&lt;boolOrDefault&gt;</tt>
+            specialization</a></li>
+        <li><a href="#stringparser">The <tt>parser&lt;string&gt;</tt>
+            specialization</a></li>
+        <li><a href="#intparser">The <tt>parser&lt;int&gt;</tt>
+            specialization</a></li>
+        <li><a href="#doubleparser">The <tt>parser&lt;double&gt;</tt> and
+            <tt>parser&lt;float&gt;</tt> specializations</a></li>
+        </ul></li>
+    </ol></li>
+  <li><a href="#extensionguide">Extension Guide</a>
+    <ol>
+      <li><a href="#customparser">Writing a custom parser</a></li>
+      <li><a href="#explotingexternal">Exploiting external storage</a></li>
+      <li><a href="#dynamicopts">Dynamically adding command line
+          options</a></li>
+    </ol></li>
+</ol>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="introduction">Introduction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document describes the CommandLine argument processing library.  It will
+show you how to use it, and what it can do.  The CommandLine library uses a
+declarative approach to specifying the command line options that your program
+takes.  By default, these options declarations implicitly hold the value parsed
+for the option declared (of course this <a href="#storage">can be
+changed</a>).</p>
+
+<p>Although there are a <b>lot</b> of command line argument parsing libraries
+out there in many different languages, none of them fit well with what I needed.
+By looking at the features and problems of other libraries, I designed the
+CommandLine library to have the following features:</p>
+
+<ol>
+<li>Speed: The CommandLine library is very quick and uses little resources.  The
+parsing time of the library is directly proportional to the number of arguments
+parsed, not the the number of options recognized.  Additionally, command line
+argument values are captured transparently into user defined global variables,
+which can be accessed like any other variable (and with the same
+performance).</li>
+
+<li>Type Safe: As a user of CommandLine, you don't have to worry about
+remembering the type of arguments that you want (is it an int?  a string? a
+bool? an enum?) and keep casting it around.  Not only does this help prevent
+error prone constructs, it also leads to dramatically cleaner source code.</li>
+
+<li>No subclasses required: To use CommandLine, you instantiate variables that
+correspond to the arguments that you would like to capture, you don't subclass a
+parser.  This means that you don't have to write <b>any</b> boilerplate
+code.</li>
+
+<li>Globally accessible: Libraries can specify command line arguments that are
+automatically enabled in any tool that links to the library.  This is possible
+because the application doesn't have to keep a list of arguments to pass to
+the parser.  This also makes supporting <a href="#dynamicopts">dynamically
+loaded options</a> trivial.</li>
+
+<li>Cleaner: CommandLine supports enum and other types directly, meaning that
+there is less error and more security built into the library.  You don't have to
+worry about whether your integral command line argument accidentally got
+assigned a value that is not valid for your enum type.</li>
+
+<li>Powerful: The CommandLine library supports many different types of
+arguments, from simple <a href="#boolparser">boolean flags</a> to <a
+href="#cl::opt">scalars arguments</a> (<a href="#stringparser">strings</a>, <a
+href="#intparser">integers</a>, <a href="#genericparser">enums</a>, <a
+href="#doubleparser">doubles</a>), to <a href="#cl::list">lists of
+arguments</a>.  This is possible because CommandLine is...</li>
+
+<li>Extensible: It is very simple to add a new argument type to CommandLine.
+Simply specify the parser that you want to use with the command line option when
+you declare it.  <a href="#customparser">Custom parsers</a> are no problem.</li>
+
+<li>Labor Saving: The CommandLine library cuts down on the amount of grunt work
+that you, the user, have to do.  For example, it automatically provides a
+<tt>-help</tt> option that shows the available command line options for your
+tool.  Additionally, it does most of the basic correctness checking for
+you.</li>
+
+<li>Capable: The CommandLine library can handle lots of different forms of
+options often found in real programs.  For example, <a
+href="#positional">positional</a> arguments, <tt>ls</tt> style <a
+href="#cl::Grouping">grouping</a> options (to allow processing '<tt>ls
+-lad</tt>' naturally), <tt>ld</tt> style <a href="#cl::Prefix">prefix</a>
+options (to parse '<tt>-lmalloc -L/usr/lib</tt>'), and <a
+href="#cl::ConsumeAfter">interpreter style options</a>.</li>
+
+</ol>
+
+<p>This document will hopefully let you jump in and start using CommandLine in
+your utility quickly and painlessly.  Additionally it should be a simple
+reference manual to figure out how stuff works.  If it is failing in some area
+(or you want an extension to the library), nag the author, <a
+href="mailto:sabre@nondot.org">Chris Lattner</a>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="quickstart">Quick Start Guide</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section of the manual runs through a simple CommandLine'ification of a
+basic compiler tool.  This is intended to show you how to jump into using the
+CommandLine library in your own program, and show you some of the cool things it
+can do.</p>
+
+<p>To start out, you need to include the CommandLine header file into your
+program:</p>
+
+<div class="doc_code"><pre>
+  #include "llvm/Support/CommandLine.h"
+</pre></div>
+
+<p>Additionally, you need to add this as the first line of your main
+program:</p>
+
+<div class="doc_code"><pre>
+int main(int argc, char **argv) {
+  <a href="#cl::ParseCommandLineOptions">cl::ParseCommandLineOptions</a>(argc, argv);
+  ...
+}
+</pre></div>
+
+<p>... which actually parses the arguments and fills in the variable
+declarations.</p>
+
+<p>Now that you are ready to support command line arguments, we need to tell the
+system which ones we want, and what type of arguments they are.  The CommandLine
+library uses a declarative syntax to model command line arguments with the
+global variable declarations that capture the parsed values.  This means that
+for every command line option that you would like to support, there should be a
+global variable declaration to capture the result.  For example, in a compiler,
+we would like to support the Unix-standard '<tt>-o &lt;filename&gt;</tt>' option
+to specify where to put the output.  With the CommandLine library, this is
+represented like this:</p>
+
+<a name="value_desc_example"></a>
+<div class="doc_code"><pre>
+<a href="#cl::opt">cl::opt</a>&lt;string&gt; OutputFilename("<i>o</i>", <a href="#cl::desc">cl::desc</a>("<i>Specify output filename</i>"), <a href="#cl::value_desc">cl::value_desc</a>("<i>filename</i>"));
+</pre></div>
+
+<p>This declares a global variable "<tt>OutputFilename</tt>" that is used to
+capture the result of the "<tt>o</tt>" argument (first parameter).  We specify
+that this is a simple scalar option by using the "<tt><a
+href="#cl::opt">cl::opt</a></tt>" template (as opposed to the <a
+href="#list">"<tt>cl::list</tt> template</a>), and tell the CommandLine library
+that the data type that we are parsing is a string.</p>
+
+<p>The second and third parameters (which are optional) are used to specify what
+to output for the "<tt>-help</tt>" option.  In this case, we get a line that
+looks like this:</p>
+
+<div class="doc_code"><pre>
+USAGE: compiler [options]
+
+OPTIONS:
+  -help             - display available options (-help-hidden for more)
+  <b>-o &lt;filename&gt;     - Specify output filename</b>
+</pre></div>
+
+<p>Because we specified that the command line option should parse using the
+<tt>string</tt> data type, the variable declared is automatically usable as a
+real string in all contexts that a normal C++ string object may be used.  For
+example:</p>
+
+<div class="doc_code"><pre>
+  ...
+  std::ofstream Output(OutputFilename.c_str());
+  if (Output.good()) ...
+  ...
+</pre></div>
+
+<p>There are many different options that you can use to customize the command
+line option handling library, but the above example shows the general interface
+to these options.  The options can be specified in any order, and are specified
+with helper functions like <a href="#cl::desc"><tt>cl::desc(...)</tt></a>, so
+there are no positional dependencies to remember.  The available options are
+discussed in detail in the <a href="#referenceguide">Reference Guide</a>.</p>
+
+<p>Continuing the example, we would like to have our compiler take an input
+filename as well as an output filename, but we do not want the input filename to
+be specified with a hyphen (ie, not <tt>-filename.c</tt>).  To support this
+style of argument, the CommandLine library allows for <a
+href="#positional">positional</a> arguments to be specified for the program.
+These positional arguments are filled with command line parameters that are not
+in option form.  We use this feature like this:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::opt">cl::opt</a>&lt;string&gt; InputFilename(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;input file&gt;</i>"), <a href="#cl::init">cl::init</a>("<i>-</i>"));
+</pre></div>
+
+<p>This declaration indicates that the first positional argument should be
+treated as the input filename.  Here we use the <tt><a
+href="#cl::init">cl::init</a></tt> option to specify an initial value for the
+command line option, which is used if the option is not specified (if you do not
+specify a <tt><a href="#cl::init">cl::init</a></tt> modifier for an option, then
+the default constructor for the data type is used to initialize the value).
+Command line options default to being optional, so if we would like to require
+that the user always specify an input filename, we would add the <tt><a
+href="#cl::Required">cl::Required</a></tt> flag, and we could eliminate the
+<tt><a href="#cl::init">cl::init</a></tt> modifier, like this:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::opt">cl::opt</a>&lt;string&gt; InputFilename(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;input file&gt;</i>"), <b><a href="#cl::Required">cl::Required</a></b>);
+</pre></div>
+
+<p>Again, the CommandLine library does not require the options to be specified
+in any particular order, so the above declaration is equivalent to:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::opt">cl::opt</a>&lt;string&gt; InputFilename(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::Required">cl::Required</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;input file&gt;</i>"));
+</pre></div>
+
+<p>By simply adding the <tt><a href="#cl::Required">cl::Required</a></tt> flag,
+the CommandLine library will automatically issue an error if the argument is not
+specified, which shifts all of the command line option verification code out of
+your application into the library.  This is just one example of how using flags
+can alter the default behaviour of the library, on a per-option basis.  By
+adding one of the declarations above, the <tt>-help</tt> option synopsis is now
+extended to:</p>
+
+<div class="doc_code"><pre>
+USAGE: compiler [options] <b>&lt;input file&gt;</b>
+
+OPTIONS:
+  -help             - display available options (-help-hidden for more)
+  -o &lt;filename&gt;     - Specify output filename
+</pre></div>
+
+<p>... indicating that an input filename is expected.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="bool">Boolean Arguments</a>
+</div>
+
+<div class="doc_text">
+
+<p>In addition to input and output filenames, we would like the compiler example
+to support three boolean flags: "<tt>-f</tt>" to force writing binary output to
+a terminal, "<tt>--quiet</tt>" to enable quiet mode, and "<tt>-q</tt>" for
+backwards compatibility with some of our users.  We can support these by
+declaring options of boolean type like this:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Force ("<i>f</i>", <a href="#cl::desc">cl::desc</a>("<i>Enable binary output on terminals</i>"));
+<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Quiet ("<i>quiet</i>", <a href="#cl::desc">cl::desc</a>("<i>Don't print informational messages</i>"));
+<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Quiet2("<i>q</i>", <a href="#cl::desc">cl::desc</a>("<i>Don't print informational messages</i>"), <a href="#cl::Hidden">cl::Hidden</a>);
+</pre></div>
+
+<p>This does what you would expect: it declares three boolean variables
+("<tt>Force</tt>", "<tt>Quiet</tt>", and "<tt>Quiet2</tt>") to recognize these
+options.  Note that the "<tt>-q</tt>" option is specified with the "<a
+href="#cl::Hidden"><tt>cl::Hidden</tt></a>" flag.  This modifier prevents it
+from being shown by the standard "<tt>-help</tt>" output (note that it is still
+shown in the "<tt>-help-hidden</tt>" output).</p>
+
+<p>The CommandLine library uses a <a href="#builtinparsers">different parser</a>
+for different data types.  For example, in the string case, the argument passed
+to the option is copied literally into the content of the string variable... we
+obviously cannot do that in the boolean case, however, so we must use a smarter
+parser.  In the case of the boolean parser, it allows no options (in which case
+it assigns the value of true to the variable), or it allows the values
+"<tt>true</tt>" or "<tt>false</tt>" to be specified, allowing any of the
+following inputs:</p>
+
+<div class="doc_code"><pre>
+ compiler -f          # No value, 'Force' == true
+ compiler -f=true     # Value specified, 'Force' == true
+ compiler -f=TRUE     # Value specified, 'Force' == true
+ compiler -f=FALSE    # Value specified, 'Force' == false
+</pre></div>
+
+<p>... you get the idea.  The <a href="#boolparser">bool parser</a> just turns
+the string values into boolean values, and rejects things like '<tt>compiler
+-f=foo</tt>'.  Similarly, the <a href="#doubleparser">float</a>, <a
+href="#doubleparser">double</a>, and <a href="#intparser">int</a> parsers work
+like you would expect, using the '<tt>strtol</tt>' and '<tt>strtod</tt>' C
+library calls to parse the string value into the specified data type.</p>
+
+<p>With the declarations above, "<tt>compiler -help</tt>" emits this:</p>
+
+<div class="doc_code"><pre>
+USAGE: compiler [options] &lt;input file&gt;
+
+OPTIONS:
+  <b>-f     - Enable binary output on terminals</b>
+  -o     - Override output filename
+  <b>-quiet - Don't print informational messages</b>
+  -help  - display available options (-help-hidden for more)
+</pre></div>
+
+<p>and "<tt>compiler -help-hidden</tt>" prints this:</p>
+
+<div class="doc_code"><pre>
+USAGE: compiler [options] &lt;input file&gt;
+
+OPTIONS:
+  -f     - Enable binary output on terminals
+  -o     - Override output filename
+  <b>-q     - Don't print informational messages</b>
+  -quiet - Don't print informational messages
+  -help  - display available options (-help-hidden for more)
+</pre></div>
+
+<p>This brief example has shown you how to use the '<tt><a
+href="#cl::opt">cl::opt</a></tt>' class to parse simple scalar command line
+arguments.  In addition to simple scalar arguments, the CommandLine library also
+provides primitives to support CommandLine option <a href="#alias">aliases</a>,
+and <a href="#list">lists</a> of options.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="alias">Argument Aliases</a>
+</div>
+
+<div class="doc_text">
+
+<p>So far, the example works well, except for the fact that we need to check the
+quiet condition like this now:</p>
+
+<div class="doc_code"><pre>
+...
+  if (!Quiet &amp;&amp; !Quiet2) printInformationalMessage(...);
+...
+</pre></div>
+
+<p>... which is a real pain!  Instead of defining two values for the same
+condition, we can use the "<tt><a href="#cl::alias">cl::alias</a></tt>" class to make the "<tt>-q</tt>"
+option an <b>alias</b> for the "<tt>-quiet</tt>" option, instead of providing
+a value itself:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Force ("<i>f</i>", <a href="#cl::desc">cl::desc</a>("<i>Overwrite output files</i>"));
+<a href="#cl::opt">cl::opt</a>&lt;bool&gt; Quiet ("<i>quiet</i>", <a href="#cl::desc">cl::desc</a>("<i>Don't print informational messages</i>"));
+<a href="#cl::alias">cl::alias</a>     QuietA("<i>q</i>", <a href="#cl::desc">cl::desc</a>("<i>Alias for -quiet</i>"), <a href="#cl::aliasopt">cl::aliasopt</a>(Quiet));
+</pre></div>
+
+<p>The third line (which is the only one we modified from above) defines a
+"<tt>-q</tt>" alias that updates the "<tt>Quiet</tt>" variable (as specified by
+the <tt><a href="#cl::aliasopt">cl::aliasopt</a></tt> modifier) whenever it is
+specified.  Because aliases do not hold state, the only thing the program has to
+query is the <tt>Quiet</tt> variable now.  Another nice feature of aliases is
+that they automatically hide themselves from the <tt>-help</tt> output
+(although, again, they are still visible in the <tt>-help-hidden
+output</tt>).</p>
+
+<p>Now the application code can simply use:</p>
+
+<div class="doc_code"><pre>
+...
+  if (!Quiet) printInformationalMessage(...);
+...
+</pre></div>
+
+<p>... which is much nicer!  The "<tt><a href="#cl::alias">cl::alias</a></tt>"
+can be used to specify an alternative name for any variable type, and has many
+uses.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="onealternative">Selecting an alternative from a set of
+  possibilities</a>
+</div>
+
+<div class="doc_text">
+
+<p>So far we have seen how the CommandLine library handles builtin types like
+<tt>std::string</tt>, <tt>bool</tt> and <tt>int</tt>, but how does it handle
+things it doesn't know about, like enums or '<tt>int*</tt>'s?</p>
+
+<p>The answer is that it uses a table-driven generic parser (unless you specify
+your own parser, as described in the <a href="#extensionguide">Extension
+Guide</a>).  This parser maps literal strings to whatever type is required, and
+requires you to tell it what this mapping should be.</p>
+
+<p>Let's say that we would like to add four optimization levels to our
+optimizer, using the standard flags "<tt>-g</tt>", "<tt>-O0</tt>",
+"<tt>-O1</tt>", and "<tt>-O2</tt>".  We could easily implement this with boolean
+options like above, but there are several problems with this strategy:</p>
+
+<ol>
+<li>A user could specify more than one of the options at a time, for example,
+"<tt>compiler -O3 -O2</tt>".  The CommandLine library would not be able to
+catch this erroneous input for us.</li>
+
+<li>We would have to test 4 different variables to see which ones are set.</li>
+
+<li>This doesn't map to the numeric levels that we want... so we cannot easily
+see if some level &gt;= "<tt>-O1</tt>" is enabled.</li>
+
+</ol>
+
+<p>To cope with these problems, we can use an enum value, and have the
+CommandLine library fill it in with the appropriate level directly, which is
+used like this:</p>
+
+<div class="doc_code"><pre>
+enum OptLevel {
+  g, O1, O2, O3
+};
+
+<a href="#cl::opt">cl::opt</a>&lt;OptLevel&gt; OptimizationLevel(<a href="#cl::desc">cl::desc</a>("<i>Choose optimization level:</i>"),
+  <a href="#cl::values">cl::values</a>(
+    clEnumVal(g , "<i>No optimizations, enable debugging</i>"),
+    clEnumVal(O1, "<i>Enable trivial optimizations</i>"),
+    clEnumVal(O2, "<i>Enable default optimizations</i>"),
+    clEnumVal(O3, "<i>Enable expensive optimizations</i>"),
+   clEnumValEnd));
+
+...
+  if (OptimizationLevel &gt;= O2) doPartialRedundancyElimination(...);
+...
+</pre></div>
+
+<p>This declaration defines a variable "<tt>OptimizationLevel</tt>" of the
+"<tt>OptLevel</tt>" enum type.  This variable can be assigned any of the values
+that are listed in the declaration (Note that the declaration list must be
+terminated with the "<tt>clEnumValEnd</tt>" argument!).  The CommandLine
+library enforces
+that the user can only specify one of the options, and it ensure that only valid
+enum values can be specified.  The "<tt>clEnumVal</tt>" macros ensure that the
+command line arguments matched the enum values.  With this option added, our
+help output now is:</p>
+
+<div class="doc_code"><pre>
+USAGE: compiler [options] &lt;input file&gt;
+
+OPTIONS:
+  <b>Choose optimization level:
+    -g          - No optimizations, enable debugging
+    -O1         - Enable trivial optimizations
+    -O2         - Enable default optimizations
+    -O3         - Enable expensive optimizations</b>
+  -f            - Enable binary output on terminals
+  -help         - display available options (-help-hidden for more)
+  -o &lt;filename&gt; - Specify output filename
+  -quiet        - Don't print informational messages
+</pre></div>
+
+<p>In this case, it is sort of awkward that flag names correspond directly to
+enum names, because we probably don't want a enum definition named "<tt>g</tt>"
+in our program.  Because of this, we can alternatively write this example like
+this:</p>
+
+<div class="doc_code"><pre>
+enum OptLevel {
+  Debug, O1, O2, O3
+};
+
+<a href="#cl::opt">cl::opt</a>&lt;OptLevel&gt; OptimizationLevel(<a href="#cl::desc">cl::desc</a>("<i>Choose optimization level:</i>"),
+  <a href="#cl::values">cl::values</a>(
+   clEnumValN(Debug, "g", "<i>No optimizations, enable debugging</i>"),
+    clEnumVal(O1        , "<i>Enable trivial optimizations</i>"),
+    clEnumVal(O2        , "<i>Enable default optimizations</i>"),
+    clEnumVal(O3        , "<i>Enable expensive optimizations</i>"),
+   clEnumValEnd));
+
+...
+  if (OptimizationLevel == Debug) outputDebugInfo(...);
+...
+</pre></div>
+
+<p>By using the "<tt>clEnumValN</tt>" macro instead of "<tt>clEnumVal</tt>", we
+can directly specify the name that the flag should get.  In general a direct
+mapping is nice, but sometimes you can't or don't want to preserve the mapping,
+which is when you would use it.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="namedalternatives">Named Alternatives</a>
+</div>
+
+<div class="doc_text">
+
+<p>Another useful argument form is a named alternative style.  We shall use this
+style in our compiler to specify different debug levels that can be used.
+Instead of each debug level being its own switch, we want to support the
+following options, of which only one can be specified at a time:
+"<tt>--debug-level=none</tt>", "<tt>--debug-level=quick</tt>",
+"<tt>--debug-level=detailed</tt>".  To do this, we use the exact same format as
+our optimization level flags, but we also specify an option name.  For this
+case, the code looks like this:</p>
+
+<div class="doc_code"><pre>
+enum DebugLev {
+  nodebuginfo, quick, detailed
+};
+
+// Enable Debug Options to be specified on the command line
+<a href="#cl::opt">cl::opt</a>&lt;DebugLev&gt; DebugLevel("<i>debug_level</i>", <a href="#cl::desc">cl::desc</a>("<i>Set the debugging level:</i>"),
+  <a href="#cl::values">cl::values</a>(
+    clEnumValN(nodebuginfo, "none", "<i>disable debug information</i>"),
+     clEnumVal(quick,               "<i>enable quick debug information</i>"),
+     clEnumVal(detailed,            "<i>enable detailed debug information</i>"),
+    clEnumValEnd));
+</pre></div>
+
+<p>This definition defines an enumerated command line variable of type "<tt>enum
+DebugLev</tt>", which works exactly the same way as before.  The difference here
+is just the interface exposed to the user of your program and the help output by
+the "<tt>-help</tt>" option:</p>
+
+<div class="doc_code"><pre>
+USAGE: compiler [options] &lt;input file&gt;
+
+OPTIONS:
+  Choose optimization level:
+    -g          - No optimizations, enable debugging
+    -O1         - Enable trivial optimizations
+    -O2         - Enable default optimizations
+    -O3         - Enable expensive optimizations
+  <b>-debug_level  - Set the debugging level:
+    =none       - disable debug information
+    =quick      - enable quick debug information
+    =detailed   - enable detailed debug information</b>
+  -f            - Enable binary output on terminals
+  -help         - display available options (-help-hidden for more)
+  -o &lt;filename&gt; - Specify output filename
+  -quiet        - Don't print informational messages
+</pre></div>
+
+<p>Again, the only structural difference between the debug level declaration and
+the optimization level declaration is that the debug level declaration includes
+an option name (<tt>"debug_level"</tt>), which automatically changes how the
+library processes the argument.  The CommandLine library supports both forms so
+that you can choose the form most appropriate for your application.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="list">Parsing a list of options</a>
+</div>
+
+<div class="doc_text">
+
+<p>Now that we have the standard run-of-the-mill argument types out of the way,
+lets get a little wild and crazy.  Lets say that we want our optimizer to accept
+a <b>list</b> of optimizations to perform, allowing duplicates.  For example, we
+might want to run: "<tt>compiler -dce -constprop -inline -dce -strip</tt>".  In
+this case, the order of the arguments and the number of appearances is very
+important.  This is what the "<tt><a href="#cl::list">cl::list</a></tt>"
+template is for.  First, start by defining an enum of the optimizations that you
+would like to perform:</p>
+
+<div class="doc_code"><pre>
+enum Opts {
+  // 'inline' is a C++ keyword, so name it 'inlining'
+  dce, constprop, inlining, strip
+};
+</pre></div>
+
+<p>Then define your "<tt><a href="#cl::list">cl::list</a></tt>" variable:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::list">cl::list</a>&lt;Opts&gt; OptimizationList(<a href="#cl::desc">cl::desc</a>("<i>Available Optimizations:</i>"),
+  <a href="#cl::values">cl::values</a>(
+    clEnumVal(dce               , "<i>Dead Code Elimination</i>"),
+    clEnumVal(constprop         , "<i>Constant Propagation</i>"),
+   clEnumValN(inlining, "<i>inline</i>", "<i>Procedure Integration</i>"),
+    clEnumVal(strip             , "<i>Strip Symbols</i>"),
+  clEnumValEnd));
+</pre></div>
+
+<p>This defines a variable that is conceptually of the type
+"<tt>std::vector&lt;enum Opts&gt;</tt>".  Thus, you can access it with standard
+vector methods:</p>
+
+<div class="doc_code"><pre>
+  for (unsigned i = 0; i != OptimizationList.size(); ++i)
+    switch (OptimizationList[i])
+       ...
+</pre></div>
+
+<p>... to iterate through the list of options specified.</p>
+
+<p>Note that the "<tt><a href="#cl::list">cl::list</a></tt>" template is
+completely general and may be used with any data types or other arguments that
+you can use with the "<tt><a href="#cl::opt">cl::opt</a></tt>" template.  One
+especially useful way to use a list is to capture all of the positional
+arguments together if there may be more than one specified.  In the case of a
+linker, for example, the linker takes several '<tt>.o</tt>' files, and needs to
+capture them into a list.  This is naturally specified as:</p>
+
+<div class="doc_code"><pre>
+...
+<a href="#cl::list">cl::list</a>&lt;std::string&gt; InputFilenames(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("&lt;Input files&gt;"), <a href="#cl::OneOrMore">cl::OneOrMore</a>);
+...
+</pre></div>
+
+<p>This variable works just like a "<tt>vector&lt;string&gt;</tt>" object.  As
+such, accessing the list is simple, just like above.  In this example, we used
+the <tt><a href="#cl::OneOrMore">cl::OneOrMore</a></tt> modifier to inform the
+CommandLine library that it is an error if the user does not specify any
+<tt>.o</tt> files on our command line.  Again, this just reduces the amount of
+checking we have to do.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="bits">Collecting options as a set of flags</a>
+</div>
+
+<div class="doc_text">
+
+<p>Instead of collecting sets of options in a list, it is also possible to
+gather information for enum values in a <b>bit vector</b>.  The representation used by
+the <a href="#bits"><tt>cl::bits</tt></a> class is an <tt>unsigned</tt>
+integer.  An enum value is represented by a 0/1 in the enum's ordinal value bit
+position. 1 indicating that the enum was specified, 0 otherwise.  As each
+specified value is parsed, the resulting enum's bit is set in the option's bit
+vector:</p>
+
+<div class="doc_code"><pre>
+  <i>bits</i> |= 1 << (unsigned)<i>enum</i>;
+</pre></div>
+
+<p>Options that are specified multiple times are redundant.  Any instances after
+the first are discarded.</p>
+
+<p>Reworking the above list example, we could replace <a href="#list">
+<tt>cl::list</tt></a> with <a href="#bits"><tt>cl::bits</tt></a>:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::bits">cl::bits</a>&lt;Opts&gt; OptimizationBits(<a href="#cl::desc">cl::desc</a>("<i>Available Optimizations:</i>"),
+  <a href="#cl::values">cl::values</a>(
+    clEnumVal(dce               , "<i>Dead Code Elimination</i>"),
+    clEnumVal(constprop         , "<i>Constant Propagation</i>"),
+   clEnumValN(inlining, "<i>inline</i>", "<i>Procedure Integration</i>"),
+    clEnumVal(strip             , "<i>Strip Symbols</i>"),
+  clEnumValEnd));
+</pre></div>
+
+<p>To test to see if <tt>constprop</tt> was specified, we can use the
+<tt>cl:bits::isSet</tt> function:</p>
+
+<div class="doc_code"><pre>
+  if (OptimizationBits.isSet(constprop)) {
+    ...
+  }
+</pre></div>
+
+<p>It's also possible to get the raw bit vector using the
+<tt>cl::bits::getBits</tt> function:</p>
+
+<div class="doc_code"><pre>
+  unsigned bits = OptimizationBits.getBits();
+</pre></div>
+
+<p>Finally, if external storage is used, then the location specified must be of
+<b>type</b> <tt>unsigned</tt>. In all other ways a <a
+href="#bits"><tt>cl::bits</tt></a> option is equivalent to a <a
+href="#list"> <tt>cl::list</tt></a> option.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="description">Adding freeform text to help output</a>
+</div>
+
+<div class="doc_text">
+
+<p>As our program grows and becomes more mature, we may decide to put summary
+information about what it does into the help output.  The help output is styled
+to look similar to a Unix <tt>man</tt> page, providing concise information about
+a program.  Unix <tt>man</tt> pages, however often have a description about what
+the program does.  To add this to your CommandLine program, simply pass a third
+argument to the <a
+href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>
+call in main.  This additional argument is then printed as the overview
+information for your program, allowing you to include any additional information
+that you want.  For example:</p>
+
+<div class="doc_code"><pre>
+int main(int argc, char **argv) {
+  <a href="#cl::ParseCommandLineOptions">cl::ParseCommandLineOptions</a>(argc, argv, " CommandLine compiler example\n\n"
+                              "  This program blah blah blah...\n");
+  ...
+}
+</pre></div>
+
+<p>would yield the help output:</p>
+
+<div class="doc_code"><pre>
+<b>OVERVIEW: CommandLine compiler example
+
+  This program blah blah blah...</b>
+
+USAGE: compiler [options] &lt;input file&gt;
+
+OPTIONS:
+  ...
+  -help             - display available options (-help-hidden for more)
+  -o &lt;filename&gt;     - Specify output filename
+</pre></div>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="referenceguide">Reference Guide</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Now that you know the basics of how to use the CommandLine library, this
+section will give you the detailed information you need to tune how command line
+options work, as well as information on more "advanced" command line option
+processing capabilities.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="positional">Positional Arguments</a>
+</div>
+
+<div class="doc_text">
+
+<p>Positional arguments are those arguments that are not named, and are not
+specified with a hyphen.  Positional arguments should be used when an option is
+specified by its position alone.  For example, the standard Unix <tt>grep</tt>
+tool takes a regular expression argument, and an optional filename to search
+through (which defaults to standard input if a filename is not specified).
+Using the CommandLine library, this would be specified as:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::opt">cl::opt</a>&lt;string&gt; Regex   (<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;regular expression&gt;</i>"), <a href="#cl::Required">cl::Required</a>);
+<a href="#cl::opt">cl::opt</a>&lt;string&gt; Filename(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;input file&gt;</i>"), <a href="#cl::init">cl::init</a>("<i>-</i>"));
+</pre></div>
+
+<p>Given these two option declarations, the <tt>-help</tt> output for our grep
+replacement would look like this:</p>
+
+<div class="doc_code"><pre>
+USAGE: spiffygrep [options] <b>&lt;regular expression&gt; &lt;input file&gt;</b>
+
+OPTIONS:
+  -help - display available options (-help-hidden for more)
+</pre></div>
+
+<p>... and the resultant program could be used just like the standard
+<tt>grep</tt> tool.</p>
+
+<p>Positional arguments are sorted by their order of construction.  This means
+that command line options will be ordered according to how they are listed in a
+.cpp file, but will not have an ordering defined if the positional arguments
+are defined in multiple .cpp files.  The fix for this problem is simply to
+define all of your positional arguments in one .cpp file.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="--">Specifying positional options with hyphens</a>
+</div>
+
+<div class="doc_text">
+
+<p>Sometimes you may want to specify a value to your positional argument that
+starts with a hyphen (for example, searching for '<tt>-foo</tt>' in a file).  At
+first, you will have trouble doing this, because it will try to find an argument
+named '<tt>-foo</tt>', and will fail (and single quotes will not save you).
+Note that the system <tt>grep</tt> has the same problem:</p>
+
+<div class="doc_code"><pre>
+  $ spiffygrep '-foo' test.txt
+  Unknown command line argument '-foo'.  Try: spiffygrep -help'
+
+  $ grep '-foo' test.txt
+  grep: illegal option -- f
+  grep: illegal option -- o
+  grep: illegal option -- o
+  Usage: grep -hblcnsviw pattern file . . .
+</pre></div>
+
+<p>The solution for this problem is the same for both your tool and the system
+version: use the '<tt>--</tt>' marker.  When the user specifies '<tt>--</tt>' on
+the command line, it is telling the program that all options after the
+'<tt>--</tt>' should be treated as positional arguments, not options.  Thus, we
+can use it like this:</p>
+
+<div class="doc_code"><pre>
+  $ spiffygrep -- -foo test.txt
+    ...output...
+</pre></div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="getPosition">Determining absolute position with getPosition()</a>
+</div>
+<div class="doc_text">
+  <p>Sometimes an option can affect or modify the meaning of another option. For
+  example, consider <tt>gcc</tt>'s <tt>-x LANG</tt> option. This tells
+  <tt>gcc</tt> to ignore the suffix of subsequent positional arguments and force
+  the file to be interpreted as if it contained source code in language
+  <tt>LANG</tt>. In order to handle this properly, you need to know the
+  absolute position of each argument, especially those in lists, so their
+  interaction(s) can be applied correctly. This is also useful for options like
+  <tt>-llibname</tt> which is actually a positional argument that starts with
+  a dash.</p>
+  <p>So, generally, the problem is that you have two <tt>cl::list</tt> variables
+  that interact in some way. To ensure the correct interaction, you can use the
+  <tt>cl::list::getPosition(optnum)</tt> method. This method returns the
+  absolute position (as found on the command line) of the <tt>optnum</tt>
+  item in the <tt>cl::list</tt>.</p>
+  <p>The idiom for usage is like this:</p>
+
+  <div class="doc_code"><pre>
+  static cl::list&lt;std::string&gt; Files(cl::Positional, cl::OneOrMore);
+  static cl::list&lt;std::string&gt; Libraries("l", cl::ZeroOrMore);
+
+  int main(int argc, char**argv) {
+    // ...
+    std::vector&lt;std::string&gt;::iterator fileIt = Files.begin();
+    std::vector&lt;std::string&gt;::iterator libIt  = Libraries.begin();
+    unsigned libPos = 0, filePos = 0;
+    while ( 1 ) {
+      if ( libIt != Libraries.end() )
+        libPos = Libraries.getPosition( libIt - Libraries.begin() );
+      else
+        libPos = 0;
+      if ( fileIt != Files.end() )
+        filePos = Files.getPosition( fileIt - Files.begin() );
+      else
+        filePos = 0;
+
+      if ( filePos != 0 &amp;&amp; (libPos == 0 || filePos &lt; libPos) ) {
+        // Source File Is next
+        ++fileIt;
+      }
+      else if ( libPos != 0 &amp;&amp; (filePos == 0 || libPos &lt; filePos) ) {
+        // Library is next
+        ++libIt;
+      }
+      else
+        break; // we're done with the list
+    }
+  }</pre></div>
+
+  <p>Note that, for compatibility reasons, the <tt>cl::opt</tt> also supports an
+  <tt>unsigned getPosition()</tt> option that will provide the absolute position
+  of that option. You can apply the same approach as above with a
+  <tt>cl::opt</tt> and a <tt>cl::list</tt> option as you can with two lists.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="cl::ConsumeAfter">The <tt>cl::ConsumeAfter</tt> modifier</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::ConsumeAfter</tt> <a href="#formatting">formatting option</a> is
+used to construct programs that use "interpreter style" option processing.  With
+this style of option processing, all arguments specified after the last
+positional argument are treated as special interpreter arguments that are not
+interpreted by the command line argument.</p>
+
+<p>As a concrete example, lets say we are developing a replacement for the
+standard Unix Bourne shell (<tt>/bin/sh</tt>).  To run <tt>/bin/sh</tt>, first
+you specify options to the shell itself (like <tt>-x</tt> which turns on trace
+output), then you specify the name of the script to run, then you specify
+arguments to the script.  These arguments to the script are parsed by the Bourne
+shell command line option processor, but are not interpreted as options to the
+shell itself.  Using the CommandLine library, we would specify this as:</p>
+
+<div class="doc_code"><pre>
+<a href="#cl::opt">cl::opt</a>&lt;string&gt; Script(<a href="#cl::Positional">cl::Positional</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;input script&gt;</i>"), <a href="#cl::init">cl::init</a>("-"));
+<a href="#cl::list">cl::list</a>&lt;string&gt;  Argv(<a href="#cl::ConsumeAfter">cl::ConsumeAfter</a>, <a href="#cl::desc">cl::desc</a>("<i>&lt;program arguments&gt;...</i>"));
+<a href="#cl::opt">cl::opt</a>&lt;bool&gt;    Trace("<i>x</i>", <a href="#cl::desc">cl::desc</a>("<i>Enable trace output</i>"));
+</pre></div>
+
+<p>which automatically provides the help output:</p>
+
+<div class="doc_code"><pre>
+USAGE: spiffysh [options] <b>&lt;input script&gt; &lt;program arguments&gt;...</b>
+
+OPTIONS:
+  -help - display available options (-help-hidden for more)
+  <b>-x    - Enable trace output</b>
+</pre></div>
+
+<p>At runtime, if we run our new shell replacement as `<tt>spiffysh -x test.sh
+-a -x -y bar</tt>', the <tt>Trace</tt> variable will be set to true, the
+<tt>Script</tt> variable will be set to "<tt>test.sh</tt>", and the
+<tt>Argv</tt> list will contain <tt>["-a", "-x", "-y", "bar"]</tt>, because they
+were specified after the last positional argument (which is the script
+name).</p>
+
+<p>There are several limitations to when <tt>cl::ConsumeAfter</tt> options can
+be specified.  For example, only one <tt>cl::ConsumeAfter</tt> can be specified
+per program, there must be at least one <a href="#positional">positional
+argument</a> specified, there must not be any <a href="#cl::list">cl::list</a>
+positional arguments, and the <tt>cl::ConsumeAfter</tt> option should be a <a
+href="#cl::list">cl::list</a> option.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="storage">Internal vs External Storage</a>
+</div>
+
+<div class="doc_text">
+
+<p>By default, all command line options automatically hold the value that they
+parse from the command line.  This is very convenient in the common case,
+especially when combined with the ability to define command line options in the
+files that use them.  This is called the internal storage model.</p>
+
+<p>Sometimes, however, it is nice to separate the command line option processing
+code from the storage of the value parsed.  For example, lets say that we have a
+'<tt>-debug</tt>' option that we would like to use to enable debug information
+across the entire body of our program.  In this case, the boolean value
+controlling the debug code should be globally accessible (in a header file, for
+example) yet the command line option processing code should not be exposed to
+all of these clients (requiring lots of .cpp files to #include
+<tt>CommandLine.h</tt>).</p>
+
+<p>To do this, set up your .h file with your option, like this for example:</p>
+
+<div class="doc_code">
+<pre>
+<i>// DebugFlag.h - Get access to the '-debug' command line option
+//
+
+// DebugFlag - This boolean is set to true if the '-debug' command line option
+// is specified.  This should probably not be referenced directly, instead, use
+// the DEBUG macro below.
+//</i>
+extern bool DebugFlag;
+
+<i>// DEBUG macro - This macro should be used by code to emit debug information.
+// In the '-debug' option is specified on the command line, and if this is a
+// debug build, then the code specified as the option to the macro will be
+// executed.  Otherwise it will not be.</i>
+<span class="doc_hilite">#ifdef NDEBUG
+#define DEBUG(X)
+#else
+#define DEBUG(X)</span> do { if (DebugFlag) { X; } } while (0)
+<span class="doc_hilite">#endif</span>
+</pre>
+</div>
+
+<p>This allows clients to blissfully use the <tt>DEBUG()</tt> macro, or the
+<tt>DebugFlag</tt> explicitly if they want to.  Now we just need to be able to
+set the <tt>DebugFlag</tt> boolean when the option is set.  To do this, we pass
+an additional argument to our command line argument processor, and we specify
+where to fill in with the <a href="#cl::location">cl::location</a>
+attribute:</p>
+
+<div class="doc_code">
+<pre>
+bool DebugFlag;                  <i>// the actual value</i>
+static <a href="#cl::opt">cl::opt</a>&lt;bool, true&gt;       <i>// The parser</i>
+Debug("<i>debug</i>", <a href="#cl::desc">cl::desc</a>("<i>Enable debug output</i>"), <a href="#cl::Hidden">cl::Hidden</a>, <a href="#cl::location">cl::location</a>(DebugFlag));
+</pre>
+</div>
+
+<p>In the above example, we specify "<tt>true</tt>" as the second argument to
+the <tt><a href="#cl::opt">cl::opt</a></tt> template, indicating that the
+template should not maintain a copy of the value itself.  In addition to this,
+we specify the <tt><a href="#cl::location">cl::location</a></tt> attribute, so
+that <tt>DebugFlag</tt> is automatically set.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="attributes">Option Attributes</a>
+</div>
+
+<div class="doc_text">
+
+<p>This section describes the basic attributes that you can specify on
+options.</p>
+
+<ul>
+
+<li>The option name attribute (which is required for all options, except <a
+href="#positional">positional options</a>) specifies what the option name is.
+This option is specified in simple double quotes:
+
+<pre>
+<a href="#cl::opt">cl::opt</a>&lt;<b>bool</b>&gt; Quiet("<i>quiet</i>");
+</pre>
+
+</li>
+
+<li><a name="cl::desc">The <b><tt>cl::desc</tt></b></a> attribute specifies a
+description for the option to be shown in the <tt>-help</tt> output for the
+program.</li>
+
+<li><a name="cl::value_desc">The <b><tt>cl::value_desc</tt></b></a> attribute
+specifies a string that can be used to fine tune the <tt>-help</tt> output for
+a command line option.  Look <a href="#value_desc_example">here</a> for an
+example.</li>
+
+<li><a name="cl::init">The <b><tt>cl::init</tt></b></a> attribute specifies an
+initial value for a <a href="#cl::opt">scalar</a> option.  If this attribute is
+not specified then the command line option value defaults to the value created
+by the default constructor for the type. <b>Warning</b>: If you specify both
+<b><tt>cl::init</tt></b> and <b><tt>cl::location</tt></b> for an option,
+you must specify <b><tt>cl::location</tt></b> first, so that when the
+command-line parser sees <b><tt>cl::init</tt></b>, it knows where to put the
+initial value. (You will get an error at runtime if you don't put them in
+the right order.)</li>
+
+<li><a name="cl::location">The <b><tt>cl::location</tt></b></a> attribute where
+to store the value for a parsed command line option if using external storage.
+See the section on <a href="#storage">Internal vs External Storage</a> for more
+information.</li>
+
+<li><a name="cl::aliasopt">The <b><tt>cl::aliasopt</tt></b></a> attribute
+specifies which option a <tt><a href="#cl::alias">cl::alias</a></tt> option is
+an alias for.</li>
+
+<li><a name="cl::values">The <b><tt>cl::values</tt></b></a> attribute specifies
+the string-to-value mapping to be used by the generic parser.  It takes a
+<b>clEnumValEnd terminated</b> list of (option, value, description) triplets
+that
+specify the option name, the value mapped to, and the description shown in the
+<tt>-help</tt> for the tool.  Because the generic parser is used most
+frequently with enum values, two macros are often useful:
+
+<ol>
+
+<li><a name="clEnumVal">The <b><tt>clEnumVal</tt></b></a> macro is used as a
+nice simple way to specify a triplet for an enum.  This macro automatically
+makes the option name be the same as the enum name.  The first option to the
+macro is the enum, the second is the description for the command line
+option.</li>
+
+<li><a name="clEnumValN">The <b><tt>clEnumValN</tt></b></a> macro is used to
+specify macro options where the option name doesn't equal the enum name.  For
+this macro, the first argument is the enum value, the second is the flag name,
+and the second is the description.</li>
+
+</ol>
+
+You will get a compile time error if you try to use cl::values with a parser
+that does not support it.</li>
+
+<li><a name="cl::multi_val">The <b><tt>cl::multi_val</tt></b></a>
+attribute specifies that this option takes has multiple values
+(example: <tt>-sectalign segname sectname sectvalue</tt>). This
+attribute takes one unsigned argument - the number of values for the
+option. This attribute is valid only on <tt>cl::list</tt> options (and
+will fail with compile error if you try to use it with other option
+types). It is allowed to use all of the usual modifiers on
+multi-valued options (besides <tt>cl::ValueDisallowed</tt>,
+obviously).</li>
+
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="modifiers">Option Modifiers</a>
+</div>
+
+<div class="doc_text">
+
+<p>Option modifiers are the flags and expressions that you pass into the
+constructors for <tt><a href="#cl::opt">cl::opt</a></tt> and <tt><a
+href="#cl::list">cl::list</a></tt>.  These modifiers give you the ability to
+tweak how options are parsed and how <tt>-help</tt> output is generated to fit
+your application well.</p>
+
+<p>These options fall into five main categories:</p>
+
+<ol>
+<li><a href="#hiding">Hiding an option from <tt>-help</tt> output</a></li>
+<li><a href="#numoccurrences">Controlling the number of occurrences
+                             required and allowed</a></li>
+<li><a href="#valrequired">Controlling whether or not a value must be
+                           specified</a></li>
+<li><a href="#formatting">Controlling other formatting options</a></li>
+<li><a href="#misc">Miscellaneous option modifiers</a></li>
+</ol>
+
+<p>It is not possible to specify two options from the same category (you'll get
+a runtime error) to a single option, except for options in the miscellaneous
+category.  The CommandLine library specifies defaults for all of these settings
+that are the most useful in practice and the most common, which mean that you
+usually shouldn't have to worry about these.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="hiding">Hiding an option from <tt>-help</tt> output</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::NotHidden</tt>, <tt>cl::Hidden</tt>, and
+<tt>cl::ReallyHidden</tt> modifiers are used to control whether or not an option
+appears in the <tt>-help</tt> and <tt>-help-hidden</tt> output for the
+compiled program:</p>
+
+<ul>
+
+<li><a name="cl::NotHidden">The <b><tt>cl::NotHidden</tt></b></a> modifier
+(which is the default for <tt><a href="#cl::opt">cl::opt</a></tt> and <tt><a
+href="#cl::list">cl::list</a></tt> options) indicates the option is to appear
+in both help listings.</li>
+
+<li><a name="cl::Hidden">The <b><tt>cl::Hidden</tt></b></a> modifier (which is the
+default for <tt><a href="#cl::alias">cl::alias</a></tt> options) indicates that
+the option should not appear in the <tt>-help</tt> output, but should appear in
+the <tt>-help-hidden</tt> output.</li>
+
+<li><a name="cl::ReallyHidden">The <b><tt>cl::ReallyHidden</tt></b></a> modifier
+indicates that the option should not appear in any help output.</li>
+
+</ul>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="numoccurrences">Controlling the number of occurrences required and
+  allowed</a>
+</div>
+
+<div class="doc_text">
+
+<p>This group of options is used to control how many time an option is allowed
+(or required) to be specified on the command line of your program.  Specifying a
+value for this setting allows the CommandLine library to do error checking for
+you.</p>
+
+<p>The allowed values for this option group are:</p>
+
+<ul>
+
+<li><a name="cl::Optional">The <b><tt>cl::Optional</tt></b></a> modifier (which
+is the default for the <tt><a href="#cl::opt">cl::opt</a></tt> and <tt><a
+href="#cl::alias">cl::alias</a></tt> classes) indicates that your program will
+allow either zero or one occurrence of the option to be specified.</li>
+
+<li><a name="cl::ZeroOrMore">The <b><tt>cl::ZeroOrMore</tt></b></a> modifier
+(which is the default for the <tt><a href="#cl::list">cl::list</a></tt> class)
+indicates that your program will allow the option to be specified zero or more
+times.</li>
+
+<li><a name="cl::Required">The <b><tt>cl::Required</tt></b></a> modifier
+indicates that the specified option must be specified exactly one time.</li>
+
+<li><a name="cl::OneOrMore">The <b><tt>cl::OneOrMore</tt></b></a> modifier
+indicates that the option must be specified at least one time.</li>
+
+<li>The <b><tt>cl::ConsumeAfter</tt></b> modifier is described in the <a
+href="#positional">Positional arguments section</a>.</li>
+
+</ul>
+
+<p>If an option is not specified, then the value of the option is equal to the
+value specified by the <tt><a href="#cl::init">cl::init</a></tt> attribute.  If
+the <tt><a href="#cl::init">cl::init</a></tt> attribute is not specified, the
+option value is initialized with the default constructor for the data type.</p>
+
+<p>If an option is specified multiple times for an option of the <tt><a
+href="#cl::opt">cl::opt</a></tt> class, only the last value will be
+retained.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="valrequired">Controlling whether or not a value must be specified</a>
+</div>
+
+<div class="doc_text">
+
+<p>This group of options is used to control whether or not the option allows a
+value to be present.  In the case of the CommandLine library, a value is either
+specified with an equal sign (e.g. '<tt>-index-depth=17</tt>') or as a trailing
+string (e.g. '<tt>-o a.out</tt>').</p>
+
+<p>The allowed values for this option group are:</p>
+
+<ul>
+
+<li><a name="cl::ValueOptional">The <b><tt>cl::ValueOptional</tt></b></a> modifier
+(which is the default for <tt>bool</tt> typed options) specifies that it is
+acceptable to have a value, or not.  A boolean argument can be enabled just by
+appearing on the command line, or it can have an explicit '<tt>-foo=true</tt>'.
+If an option is specified with this mode, it is illegal for the value to be
+provided without the equal sign.  Therefore '<tt>-foo true</tt>' is illegal.  To
+get this behavior, you must use the <a
+href="#cl::ValueRequired">cl::ValueRequired</a> modifier.</li>
+
+<li><a name="cl::ValueRequired">The <b><tt>cl::ValueRequired</tt></b></a> modifier
+(which is the default for all other types except for <a
+href="#onealternative">unnamed alternatives using the generic parser</a>)
+specifies that a value must be provided.  This mode informs the command line
+library that if an option is not provides with an equal sign, that the next
+argument provided must be the value.  This allows things like '<tt>-o
+a.out</tt>' to work.</li>
+
+<li><a name="cl::ValueDisallowed">The <b><tt>cl::ValueDisallowed</tt></b></a>
+modifier (which is the default for <a href="#onealternative">unnamed
+alternatives using the generic parser</a>) indicates that it is a runtime error
+for the user to specify a value.  This can be provided to disallow users from
+providing options to boolean options (like '<tt>-foo=true</tt>').</li>
+
+</ul>
+
+<p>In general, the default values for this option group work just like you would
+want them to.  As mentioned above, you can specify the <a
+href="#cl::ValueDisallowed">cl::ValueDisallowed</a> modifier to a boolean
+argument to restrict your command line parser.  These options are mostly useful
+when <a href="#extensionguide">extending the library</a>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="formatting">Controlling other formatting options</a>
+</div>
+
+<div class="doc_text">
+
+<p>The formatting option group is used to specify that the command line option
+has special abilities and is otherwise different from other command line
+arguments.  As usual, you can only specify one of these arguments at most.</p>
+
+<ul>
+
+<li><a name="cl::NormalFormatting">The <b><tt>cl::NormalFormatting</tt></b></a>
+modifier (which is the default all options) specifies that this option is
+"normal".</li>
+
+<li><a name="cl::Positional">The <b><tt>cl::Positional</tt></b></a> modifier
+specifies that this is a positional argument that does not have a command line
+option associated with it.  See the <a href="#positional">Positional
+Arguments</a> section for more information.</li>
+
+<li>The <b><a href="#cl::ConsumeAfter"><tt>cl::ConsumeAfter</tt></a></b> modifier
+specifies that this option is used to capture "interpreter style" arguments.  See <a href="#cl::ConsumeAfter">this section for more information</a>.</li>
+
+<li><a name="cl::Prefix">The <b><tt>cl::Prefix</tt></b></a> modifier specifies
+that this option prefixes its value.  With 'Prefix' options, the equal sign does
+not separate the value from the option name specified. Instead, the value is
+everything after the prefix, including any equal sign if present. This is useful
+for processing odd arguments like <tt>-lmalloc</tt> and <tt>-L/usr/lib</tt> in a
+linker tool or <tt>-DNAME=value</tt> in a compiler tool.   Here, the
+'<tt>l</tt>', '<tt>D</tt>' and '<tt>L</tt>' options are normal string (or list)
+options, that have the <b><tt><a href="#cl::Prefix">cl::Prefix</a></tt></b>
+modifier added to allow the CommandLine library to recognize them.  Note that
+<b><tt><a href="#cl::Prefix">cl::Prefix</a></tt></b> options must not have the
+<b><tt><a href="#cl::ValueDisallowed">cl::ValueDisallowed</a></tt></b> modifier
+specified.</li>
+
+<li><a name="cl::Grouping">The <b><tt>cl::Grouping</tt></b></a> modifier is used
+to implement Unix-style tools (like <tt>ls</tt>) that have lots of single letter
+arguments, but only require a single dash.  For example, the '<tt>ls -labF</tt>'
+command actually enables four different options, all of which are single
+letters.  Note that <b><tt><a href="#cl::Grouping">cl::Grouping</a></tt></b>
+options cannot have values.</li>
+
+</ul>
+
+<p>The CommandLine library does not restrict how you use the <b><tt><a
+href="#cl::Prefix">cl::Prefix</a></tt></b> or <b><tt><a
+href="#cl::Grouping">cl::Grouping</a></tt></b> modifiers, but it is possible to
+specify ambiguous argument settings.  Thus, it is possible to have multiple
+letter options that are prefix or grouping options, and they will still work as
+designed.</p>
+
+<p>To do this, the CommandLine library uses a greedy algorithm to parse the
+input option into (potentially multiple) prefix and grouping options.  The
+strategy basically looks like this:</p>
+
+<div class="doc_code"><tt>parse(string OrigInput) {</tt>
+
+<ol>
+<li><tt>string input = OrigInput;</tt>
+<li><tt>if (isOption(input)) return getOption(input).parse();</tt>&nbsp;&nbsp;&nbsp;&nbsp;<i>// Normal option</i>
+<li><tt>while (!isOption(input) &amp;&amp; !input.empty()) input.pop_back();</tt>&nbsp;&nbsp;&nbsp;&nbsp;<i>// Remove the last letter</i>
+<li><tt>if (input.empty()) return error();</tt>&nbsp;&nbsp;&nbsp;&nbsp;<i>// No matching option</i>
+<li><tt>if (getOption(input).isPrefix())<br>
+&nbsp;&nbsp;return getOption(input).parse(input);</tt>
+<li><tt>while (!input.empty()) {&nbsp;&nbsp;&nbsp;&nbsp;<i>// Must be grouping options</i><br>
+&nbsp;&nbsp;getOption(input).parse();<br>
+&nbsp;&nbsp;OrigInput.erase(OrigInput.begin(), OrigInput.begin()+input.length());<br>
+&nbsp;&nbsp;input = OrigInput;<br>
+&nbsp;&nbsp;while (!isOption(input) &amp;&amp; !input.empty()) input.pop_back();<br>
+}</tt>
+<li><tt>if (!OrigInput.empty()) error();</tt></li>
+</ol>
+
+<p><tt>}</tt></p>
+</div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="misc">Miscellaneous option modifiers</a>
+</div>
+
+<div class="doc_text">
+
+<p>The miscellaneous option modifiers are the only flags where you can specify
+more than one flag from the set: they are not mutually exclusive.  These flags
+specify boolean properties that modify the option.</p>
+
+<ul>
+
+<li><a name="cl::CommaSeparated">The <b><tt>cl::CommaSeparated</tt></b></a> modifier
+indicates that any commas specified for an option's value should be used to
+split the value up into multiple values for the option.  For example, these two
+options are equivalent when <tt>cl::CommaSeparated</tt> is specified:
+"<tt>-foo=a -foo=b -foo=c</tt>" and "<tt>-foo=a,b,c</tt>".  This option only
+makes sense to be used in a case where the option is allowed to accept one or
+more values (i.e. it is a <a href="#cl::list">cl::list</a> option).</li>
+
+<li><a name="cl::PositionalEatsArgs">The
+<b><tt>cl::PositionalEatsArgs</tt></b></a> modifier (which only applies to
+positional arguments, and only makes sense for lists) indicates that positional
+argument should consume any strings after it (including strings that start with
+a "-") up until another recognized positional argument.  For example, if you
+have two "eating" positional arguments, "<tt>pos1</tt>" and "<tt>pos2</tt>", the
+string "<tt>-pos1 -foo -bar baz -pos2 -bork</tt>" would cause the "<tt>-foo -bar
+-baz</tt>" strings to be applied to the "<tt>-pos1</tt>" option and the
+"<tt>-bork</tt>" string to be applied to the "<tt>-pos2</tt>" option.</li>
+
+<li><a name="cl::Sink">The <b><tt>cl::Sink</tt></b></a> modifier is
+used to handle unknown options. If there is at least one option with
+<tt>cl::Sink</tt> modifier specified, the parser passes
+unrecognized option strings to it as values instead of signaling an
+error. As with <tt>cl::CommaSeparated</tt>, this modifier
+only makes sense with a <a href="#cl::list">cl::list</a> option.</li>
+
+</ul>
+
+<p>So far, these are the only three miscellaneous option modifiers.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="response">Response files</a>
+</div>
+
+<div class="doc_text">
+
+<p>Some systems, such as certain variants of Microsoft Windows and
+some older Unices have a relatively low limit on command-line
+length. It is therefore customary to use the so-called 'response
+files' to circumvent this restriction. These files are mentioned on
+the command-line (using the "@file") syntax. The program reads these
+files and inserts the contents into argv, thereby working around the
+command-line length limits. Response files are enabled by an optional
+fourth argument to
+<a href="#cl::ParseEnvironmentOptions"><tt>cl::ParseEnvironmentOptions</tt></a>
+and
+<a href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>.
+</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="toplevel">Top-Level Classes and Functions</a>
+</div>
+
+<div class="doc_text">
+
+<p>Despite all of the built-in flexibility, the CommandLine option library
+really only consists of one function (<a
+href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>)
+and three main classes: <a href="#cl::opt"><tt>cl::opt</tt></a>, <a
+href="#cl::list"><tt>cl::list</tt></a>, and <a
+href="#cl::alias"><tt>cl::alias</tt></a>.  This section describes these three
+classes in detail.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="cl::ParseCommandLineOptions">The <tt>cl::ParseCommandLineOptions</tt>
+  function</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::ParseCommandLineOptions</tt> function is designed to be called
+directly from <tt>main</tt>, and is used to fill in the values of all of the
+command line option variables once <tt>argc</tt> and <tt>argv</tt> are
+available.</p>
+
+<p>The <tt>cl::ParseCommandLineOptions</tt> function requires two parameters
+(<tt>argc</tt> and <tt>argv</tt>), but may also take an optional third parameter
+which holds <a href="#description">additional extra text</a> to emit when the
+<tt>-help</tt> option is invoked, and a fourth boolean parameter that enables
+<a href="#response">response files</a>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="cl::ParseEnvironmentOptions">The <tt>cl::ParseEnvironmentOptions</tt>
+  function</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::ParseEnvironmentOptions</tt> function has mostly the same effects
+as <a
+href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>,
+except that it is designed to take values for options from an environment
+variable, for those cases in which reading the command line is not convenient or
+desired. It fills in the values of all the command line option variables just
+like <a
+href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>
+does.</p>
+
+<p>It takes four parameters: the name of the program (since <tt>argv</tt> may
+not be available, it can't just look in <tt>argv[0]</tt>), the name of the
+environment variable to examine, the optional
+<a href="#description">additional extra text</a> to emit when the
+<tt>-help</tt> option is invoked, and the boolean
+switch that controls whether <a href="#response">response files</a>
+should be read.</p>
+
+<p><tt>cl::ParseEnvironmentOptions</tt> will break the environment
+variable's value up into words and then process them using
+<a href="#cl::ParseCommandLineOptions"><tt>cl::ParseCommandLineOptions</tt></a>.
+<b>Note:</b> Currently <tt>cl::ParseEnvironmentOptions</tt> does not support
+quoting, so an environment variable containing <tt>-option "foo bar"</tt> will
+be parsed as three words, <tt>-option</tt>, <tt>"foo</tt>, and <tt>bar"</tt>,
+which is different from what you would get from the shell with the same
+input.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="cl::SetVersionPrinter">The <tt>cl::SetVersionPrinter</tt>
+  function</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::SetVersionPrinter</tt> function is designed to be called
+directly from <tt>main</tt> and <i>before</i>
+<tt>cl::ParseCommandLineOptions</tt>. Its use is optional. It simply arranges
+for a function to be called in response to the <tt>--version</tt> option instead
+of having the <tt>CommandLine</tt> library print out the usual version string
+for LLVM. This is useful for programs that are not part of LLVM but wish to use
+the <tt>CommandLine</tt> facilities. Such programs should just define a small
+function that takes no arguments and returns <tt>void</tt> and that prints out
+whatever version information is appropriate for the program. Pass the address
+of that function to <tt>cl::SetVersionPrinter</tt> to arrange for it to be
+called when the <tt>--version</tt> option is given by the user.</p>
+
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="cl::opt">The <tt>cl::opt</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::opt</tt> class is the class used to represent scalar command line
+options, and is the one used most of the time.  It is a templated class which
+can take up to three arguments (all except for the first have default values
+though):</p>
+
+<div class="doc_code"><pre>
+<b>namespace</b> cl {
+  <b>template</b> &lt;<b>class</b> DataType, <b>bool</b> ExternalStorage = <b>false</b>,
+            <b>class</b> ParserClass = parser&lt;DataType&gt; &gt;
+  <b>class</b> opt;
+}
+</pre></div>
+
+<p>The first template argument specifies what underlying data type the command
+line argument is, and is used to select a default parser implementation.  The
+second template argument is used to specify whether the option should contain
+the storage for the option (the default) or whether external storage should be
+used to contain the value parsed for the option (see <a href="#storage">Internal
+vs External Storage</a> for more information).</p>
+
+<p>The third template argument specifies which parser to use.  The default value
+selects an instantiation of the <tt>parser</tt> class based on the underlying
+data type of the option.  In general, this default works well for most
+applications, so this option is only used when using a <a
+href="#customparser">custom parser</a>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="cl::list">The <tt>cl::list</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::list</tt> class is the class used to represent a list of command
+line options.  It too is a templated class which can take up to three
+arguments:</p>
+
+<div class="doc_code"><pre>
+<b>namespace</b> cl {
+  <b>template</b> &lt;<b>class</b> DataType, <b>class</b> Storage = <b>bool</b>,
+            <b>class</b> ParserClass = parser&lt;DataType&gt; &gt;
+  <b>class</b> list;
+}
+</pre></div>
+
+<p>This class works the exact same as the <a
+href="#cl::opt"><tt>cl::opt</tt></a> class, except that the second argument is
+the <b>type</b> of the external storage, not a boolean value.  For this class,
+the marker type '<tt>bool</tt>' is used to indicate that internal storage should
+be used.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="cl::bits">The <tt>cl::bits</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::bits</tt> class is the class used to represent a list of command
+line options in the form of a bit vector.  It is also a templated class which
+can take up to three arguments:</p>
+
+<div class="doc_code"><pre>
+<b>namespace</b> cl {
+  <b>template</b> &lt;<b>class</b> DataType, <b>class</b> Storage = <b>bool</b>,
+            <b>class</b> ParserClass = parser&lt;DataType&gt; &gt;
+  <b>class</b> bits;
+}
+</pre></div>
+
+<p>This class works the exact same as the <a
+href="#cl::opt"><tt>cl::lists</tt></a> class, except that the second argument
+must be of <b>type</b> <tt>unsigned</tt> if external storage is used.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="cl::alias">The <tt>cl::alias</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::alias</tt> class is a nontemplated class that is used to form
+aliases for other arguments.</p>
+
+<div class="doc_code"><pre>
+<b>namespace</b> cl {
+  <b>class</b> alias;
+}
+</pre></div>
+
+<p>The <a href="#cl::aliasopt"><tt>cl::aliasopt</tt></a> attribute should be
+used to specify which option this is an alias for.  Alias arguments default to
+being <a href="#cl::Hidden">Hidden</a>, and use the aliased options parser to do
+the conversion from string to data.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="cl::extrahelp">The <tt>cl::extrahelp</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>cl::extrahelp</tt> class is a nontemplated class that allows extra
+help text to be printed out for the <tt>-help</tt> option.</p>
+
+<div class="doc_code"><pre>
+<b>namespace</b> cl {
+  <b>struct</b> extrahelp;
+}
+</pre></div>
+
+<p>To use the extrahelp, simply construct one with a <tt>const char*</tt>
+parameter to the constructor. The text passed to the constructor will be printed
+at the bottom of the help message, verbatim. Note that multiple
+<tt>cl::extrahelp</tt> <b>can</b> be used, but this practice is discouraged. If
+your tool needs to print additional help information, put all that help into a
+single <tt>cl::extrahelp</tt> instance.</p>
+<p>For example:</p>
+<div class="doc_code"><pre>
+  cl::extrahelp("\nADDITIONAL HELP:\n\n  This is the extra help\n");
+</pre></div>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="builtinparsers">Builtin parsers</a>
+</div>
+
+<div class="doc_text">
+
+<p>Parsers control how the string value taken from the command line is
+translated into a typed value, suitable for use in a C++ program.  By default,
+the CommandLine library uses an instance of <tt>parser&lt;type&gt;</tt> if the
+command line option specifies that it uses values of type '<tt>type</tt>'.
+Because of this, custom option processing is specified with specializations of
+the '<tt>parser</tt>' class.</p>
+
+<p>The CommandLine library provides the following builtin parser
+specializations, which are sufficient for most applications. It can, however,
+also be extended to work with new data types and new ways of interpreting the
+same data.  See the <a href="#customparser">Writing a Custom Parser</a> for more
+details on this type of library extension.</p>
+
+<ul>
+
+<li><a name="genericparser">The <b>generic <tt>parser&lt;t&gt;</tt> parser</b></a>
+can be used to map strings values to any data type, through the use of the <a
+href="#cl::values">cl::values</a> property, which specifies the mapping
+information.  The most common use of this parser is for parsing enum values,
+which allows you to use the CommandLine library for all of the error checking to
+make sure that only valid enum values are specified (as opposed to accepting
+arbitrary strings).  Despite this, however, the generic parser class can be used
+for any data type.</li>
+
+<li><a name="boolparser">The <b><tt>parser&lt;bool&gt;</tt> specialization</b></a>
+is used to convert boolean strings to a boolean value.  Currently accepted
+strings are "<tt>true</tt>", "<tt>TRUE</tt>", "<tt>True</tt>", "<tt>1</tt>",
+"<tt>false</tt>", "<tt>FALSE</tt>", "<tt>False</tt>", and "<tt>0</tt>".</li>
+
+<li><a name="boolOrDefaultparser">The <b><tt>parser&lt;boolOrDefault&gt;</tt>
+ specialization</b></a> is used for cases where the value is boolean,
+but we also need to know whether the option was specified at all.  boolOrDefault
+is an enum with 3 values, BOU_UNSET, BOU_TRUE and BOU_FALSE.  This parser accepts
+the same strings as <b><tt>parser&lt;bool&gt;</tt></b>.</li>
+
+<li><a name="stringparser">The <b><tt>parser&lt;string&gt;</tt>
+specialization</b></a> simply stores the parsed string into the string value
+specified.  No conversion or modification of the data is performed.</li>
+
+<li><a name="intparser">The <b><tt>parser&lt;int&gt;</tt> specialization</b></a>
+uses the C <tt>strtol</tt> function to parse the string input.  As such, it will
+accept a decimal number (with an optional '+' or '-' prefix) which must start
+with a non-zero digit.  It accepts octal numbers, which are identified with a
+'<tt>0</tt>' prefix digit, and hexadecimal numbers with a prefix of
+'<tt>0x</tt>' or '<tt>0X</tt>'.</li>
+
+<li><a name="doubleparser">The <b><tt>parser&lt;double&gt;</tt></b></a> and
+<b><tt>parser&lt;float&gt;</tt> specializations</b> use the standard C
+<tt>strtod</tt> function to convert floating point strings into floating point
+values.  As such, a broad range of string formats is supported, including
+exponential notation (ex: <tt>1.7e15</tt>) and properly supports locales.
+</li>
+
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="extensionguide">Extension Guide</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Although the CommandLine library has a lot of functionality built into it
+already (as discussed previously), one of its true strengths lie in its
+extensibility.  This section discusses how the CommandLine library works under
+the covers and illustrates how to do some simple, common, extensions.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="customparser">Writing a custom parser</a>
+</div>
+
+<div class="doc_text">
+
+<p>One of the simplest and most common extensions is the use of a custom parser.
+As <a href="#builtinparsers">discussed previously</a>, parsers are the portion
+of the CommandLine library that turns string input from the user into a
+particular parsed data type, validating the input in the process.</p>
+
+<p>There are two ways to use a new parser:</p>
+
+<ol>
+
+<li>
+
+<p>Specialize the <a href="#genericparser"><tt>cl::parser</tt></a> template for
+your custom data type.<p>
+
+<p>This approach has the advantage that users of your custom data type will
+automatically use your custom parser whenever they define an option with a value
+type of your data type.  The disadvantage of this approach is that it doesn't
+work if your fundamental data type is something that is already supported.</p>
+
+</li>
+
+<li>
+
+<p>Write an independent class, using it explicitly from options that need
+it.</p>
+
+<p>This approach works well in situations where you would line to parse an
+option using special syntax for a not-very-special data-type.  The drawback of
+this approach is that users of your parser have to be aware that they are using
+your parser instead of the builtin ones.</p>
+
+</li>
+
+</ol>
+
+<p>To guide the discussion, we will discuss a custom parser that accepts file
+sizes, specified with an optional unit after the numeric size.  For example, we
+would like to parse "102kb", "41M", "1G" into the appropriate integer value.  In
+this case, the underlying data type we want to parse into is
+'<tt>unsigned</tt>'.  We choose approach #2 above because we don't want to make
+this the default for all <tt>unsigned</tt> options.</p>
+
+<p>To start out, we declare our new <tt>FileSizeParser</tt> class:</p>
+
+<div class="doc_code"><pre>
+<b>struct</b> FileSizeParser : <b>public</b> cl::basic_parser&lt;<b>unsigned</b>&gt; {
+  <i>// parse - Return true on error.</i>
+  <b>bool</b> parse(cl::Option &amp;O, <b>const char</b> *ArgName, <b>const</b> std::string &amp;ArgValue,
+             <b>unsigned</b> &amp;Val);
+};
+</pre></div>
+
+<p>Our new class inherits from the <tt>cl::basic_parser</tt> template class to
+fill in the default, boiler plate code for us.  We give it the data type that
+we parse into, the last argument to the <tt>parse</tt> method, so that clients of
+our custom parser know what object type to pass in to the parse method.  (Here we
+declare that we parse into '<tt>unsigned</tt>' variables.)</p>
+
+<p>For most purposes, the only method that must be implemented in a custom
+parser is the <tt>parse</tt> method.  The <tt>parse</tt> method is called
+whenever the option is invoked, passing in the option itself, the option name,
+the string to parse, and a reference to a return value.  If the string to parse
+is not well-formed, the parser should output an error message and return true.
+Otherwise it should return false and set '<tt>Val</tt>' to the parsed value.  In
+our example, we implement <tt>parse</tt> as:</p>
+
+<div class="doc_code"><pre>
+<b>bool</b> FileSizeParser::parse(cl::Option &amp;O, <b>const char</b> *ArgName,
+                           <b>const</b> std::string &amp;Arg, <b>unsigned</b> &amp;Val) {
+  <b>const char</b> *ArgStart = Arg.c_str();
+  <b>char</b> *End;
+
+  <i>// Parse integer part, leaving 'End' pointing to the first non-integer char</i>
+  Val = (unsigned)strtol(ArgStart, &amp;End, 0);
+
+  <b>while</b> (1) {
+    <b>switch</b> (*End++) {
+    <b>case</b> 0: <b>return</b> false;   <i>// No error</i>
+    <b>case</b> 'i':               <i>// Ignore the 'i' in KiB if people use that</i>
+    <b>case</b> 'b': <b>case</b> 'B':     <i>// Ignore B suffix</i>
+      <b>break</b>;
+
+    <b>case</b> 'g': <b>case</b> 'G': Val *= 1024*1024*1024; <b>break</b>;
+    <b>case</b> 'm': <b>case</b> 'M': Val *= 1024*1024;      <b>break</b>;
+    <b>case</b> 'k': <b>case</b> 'K': Val *= 1024;           <b>break</b>;
+
+    default:
+      <i>// Print an error message if unrecognized character!</i>
+      <b>return</b> O.error("'" + Arg + "' value invalid for file size argument!");
+    }
+  }
+}
+</pre></div>
+
+<p>This function implements a very simple parser for the kinds of strings we are
+interested in.  Although it has some holes (it allows "<tt>123KKK</tt>" for
+example), it is good enough for this example.  Note that we use the option
+itself to print out the error message (the <tt>error</tt> method always returns
+true) in order to get a nice error message (shown below).  Now that we have our
+parser class, we can use it like this:</p>
+
+<div class="doc_code"><pre>
+<b>static</b> <a href="#cl::opt">cl::opt</a>&lt;<b>unsigned</b>, <b>false</b>, FileSizeParser&gt;
+MFS(<i>"max-file-size"</i>, <a href="#cl::desc">cl::desc</a>(<i>"Maximum file size to accept"</i>),
+    <a href="#cl::value_desc">cl::value_desc</a>("<i>size</i>"));
+</pre></div>
+
+<p>Which adds this to the output of our program:</p>
+
+<div class="doc_code"><pre>
+OPTIONS:
+  -help                 - display available options (-help-hidden for more)
+  ...
+  <b>-max-file-size=&lt;size&gt; - Maximum file size to accept</b>
+</pre></div>
+
+<p>And we can test that our parse works correctly now (the test program just
+prints out the max-file-size argument value):</p>
+
+<div class="doc_code"><pre>
+$ ./test
+MFS: 0
+$ ./test -max-file-size=123MB
+MFS: 128974848
+$ ./test -max-file-size=3G
+MFS: 3221225472
+$ ./test -max-file-size=dog
+-max-file-size option: 'dog' value invalid for file size argument!
+</pre></div>
+
+<p>It looks like it works.  The error message that we get is nice and helpful,
+and we seem to accept reasonable file sizes.  This wraps up the "custom parser"
+tutorial.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="explotingexternal">Exploiting external storage</a>
+</div>
+
+<div class="doc_text">
+  <p>Several of the LLVM libraries define static <tt>cl::opt</tt> instances that
+  will automatically be included in any program that links with that library.
+  This is a feature. However, sometimes it is necessary to know the value of the
+  command line option outside of the library. In these cases the library does or
+  should provide an external storage location that is accessible to users of the
+  library. Examples of this include the <tt>llvm::DebugFlag</tt> exported by the
+  <tt>lib/Support/Debug.cpp</tt> file and the <tt>llvm::TimePassesIsEnabled</tt>
+  flag exported by the <tt>lib/VMCore/Pass.cpp</tt> file.</p>
+
+<p>TODO: complete this section</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="dynamicopts">Dynamically adding command line options</a>
+</div>
+
+<div class="doc_text">
+
+<p>TODO: fill in this section</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/CompilerDriver.html b/final/docs/CompilerDriver.html
new file mode 100644
index 00000000000..3c82e2b0769
--- /dev/null
+++ b/final/docs/CompilerDriver.html
@@ -0,0 +1,756 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="Docutils 0.5: http://docutils.sourceforge.net/" />
+<title>Customizing LLVMC: Reference Manual</title>
+<link rel="stylesheet" href="llvm.css" type="text/css" />
+</head>
+<body>
+<div class="document" id="customizing-llvmc-reference-manual">
+<h1 class="title">Customizing LLVMC: Reference Manual</h1>
+
+<!-- This file was automatically generated by rst2html.
+Please do not edit directly!
+The ReST source lives in the directory 'tools/llvmc/doc'. -->
+<div class="contents topic" id="contents">
+<p class="topic-title first">Contents</p>
+<ul class="simple">
+<li><a class="reference internal" href="#introduction" id="id8">Introduction</a></li>
+<li><a class="reference internal" href="#compiling-with-llvmc" id="id9">Compiling with LLVMC</a></li>
+<li><a class="reference internal" href="#predefined-options" id="id10">Predefined options</a></li>
+<li><a class="reference internal" href="#compiling-llvmc-plugins" id="id11">Compiling LLVMC plugins</a></li>
+<li><a class="reference internal" href="#compiling-standalone-llvmc-based-drivers" id="id12">Compiling standalone LLVMC-based drivers</a></li>
+<li><a class="reference internal" href="#customizing-llvmc-the-compilation-graph" id="id13">Customizing LLVMC: the compilation graph</a></li>
+<li><a class="reference internal" href="#describing-options" id="id14">Describing options</a><ul>
+<li><a class="reference internal" href="#external-options" id="id15">External options</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#conditional-evaluation" id="id16">Conditional evaluation</a></li>
+<li><a class="reference internal" href="#writing-a-tool-description" id="id17">Writing a tool description</a><ul>
+<li><a class="reference internal" href="#id5" id="id18">Actions</a></li>
+</ul>
+</li>
+<li><a class="reference internal" href="#language-map" id="id19">Language map</a></li>
+<li><a class="reference internal" href="#option-preprocessor" id="id20">Option preprocessor</a></li>
+<li><a class="reference internal" href="#more-advanced-topics" id="id21">More advanced topics</a><ul>
+<li><a class="reference internal" href="#hooks-and-environment-variables" id="id22">Hooks and environment variables</a></li>
+<li><a class="reference internal" href="#how-plugins-are-loaded" id="id23">How plugins are loaded</a></li>
+<li><a class="reference internal" href="#debugging" id="id24">Debugging</a></li>
+<li><a class="reference internal" href="#conditioning-on-the-executable-name" id="id25">Conditioning on the executable name</a></li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="doc_author">
+<p>Written by <a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a></p>
+</div><div class="section" id="introduction">
+<h1><a class="toc-backref" href="#id8">Introduction</a></h1>
+<p>LLVMC is a generic compiler driver, designed to be customizable and
+extensible. It plays the same role for LLVM as the <tt class="docutils literal"><span class="pre">gcc</span></tt> program
+does for GCC - LLVMC's job is essentially to transform a set of input
+files into a set of targets depending on configuration rules and user
+options. What makes LLVMC different is that these transformation rules
+are completely customizable - in fact, LLVMC knows nothing about the
+specifics of transformation (even the command-line options are mostly
+not hard-coded) and regards the transformation structure as an
+abstract graph. The structure of this graph is completely determined
+by plugins, which can be either statically or dynamically linked. This
+makes it possible to easily adapt LLVMC for other purposes - for
+example, as a build tool for game resources.</p>
+<p>Because LLVMC employs <a class="reference external" href="http://llvm.org/docs/TableGenFundamentals.html">TableGen</a> as its configuration language, you
+need to be familiar with it to customize LLVMC.</p>
+</div>
+<div class="section" id="compiling-with-llvmc">
+<h1><a class="toc-backref" href="#id9">Compiling with LLVMC</a></h1>
+<p>LLVMC tries hard to be as compatible with <tt class="docutils literal"><span class="pre">gcc</span></tt> as possible,
+although there are some small differences. Most of the time, however,
+you shouldn't be able to notice them:</p>
+<pre class="literal-block">
+$ # This works as expected:
+$ llvmc -O3 -Wall hello.cpp
+$ ./a.out
+hello
+</pre>
+<p>One nice feature of LLVMC is that one doesn't have to distinguish between
+different compilers for different languages (think <tt class="docutils literal"><span class="pre">g++</span></tt> vs.  <tt class="docutils literal"><span class="pre">gcc</span></tt>) - the
+right toolchain is chosen automatically based on input language names (which
+are, in turn, determined from file extensions). If you want to force files
+ending with &quot;.c&quot; to compile as C++, use the <tt class="docutils literal"><span class="pre">-x</span></tt> option, just like you would
+do it with <tt class="docutils literal"><span class="pre">gcc</span></tt>:</p>
+<pre class="literal-block">
+$ # hello.c is really a C++ file
+$ llvmc -x c++ hello.c
+$ ./a.out
+hello
+</pre>
+<p>On the other hand, when using LLVMC as a linker to combine several C++
+object files you should provide the <tt class="docutils literal"><span class="pre">--linker</span></tt> option since it's
+impossible for LLVMC to choose the right linker in that case:</p>
+<pre class="literal-block">
+$ llvmc -c hello.cpp
+$ llvmc hello.o
+[A lot of link-time errors skipped]
+$ llvmc --linker=c++ hello.o
+$ ./a.out
+hello
+</pre>
+<p>By default, LLVMC uses <tt class="docutils literal"><span class="pre">llvm-gcc</span></tt> to compile the source code. It is also
+possible to choose the <tt class="docutils literal"><span class="pre">clang</span></tt> compiler with the <tt class="docutils literal"><span class="pre">-clang</span></tt> option.</p>
+</div>
+<div class="section" id="predefined-options">
+<h1><a class="toc-backref" href="#id10">Predefined options</a></h1>
+<p>LLVMC has some built-in options that can't be overridden in the
+configuration libraries:</p>
+<ul class="simple">
+<li><tt class="docutils literal"><span class="pre">-o</span> <span class="pre">FILE</span></tt> - Output file name.</li>
+<li><tt class="docutils literal"><span class="pre">-x</span> <span class="pre">LANGUAGE</span></tt> - Specify the language of the following input files
+until the next -x option.</li>
+<li><tt class="docutils literal"><span class="pre">-load</span> <span class="pre">PLUGIN_NAME</span></tt> - Load the specified plugin DLL. Example:
+<tt class="docutils literal"><span class="pre">-load</span> <span class="pre">$LLVM_DIR/Release/lib/LLVMCSimple.so</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">-v</span></tt> - Enable verbose mode, i.e. print out all executed commands.</li>
+<li><tt class="docutils literal"><span class="pre">--save-temps</span></tt> - Write temporary files to the current directory and do not
+delete them on exit. This option can also take an argument: the
+<tt class="docutils literal"><span class="pre">--save-temps=obj</span></tt> switch will write files into the directory specified with
+the <tt class="docutils literal"><span class="pre">-o</span></tt> option. The <tt class="docutils literal"><span class="pre">--save-temps=cwd</span></tt> and <tt class="docutils literal"><span class="pre">--save-temps</span></tt> switches are
+both synonyms for the default behaviour.</li>
+<li><tt class="docutils literal"><span class="pre">--temp-dir</span> <span class="pre">DIRECTORY</span></tt> - Store temporary files in the given directory. This
+directory is deleted on exit unless <tt class="docutils literal"><span class="pre">--save-temps</span></tt> is specified. If
+<tt class="docutils literal"><span class="pre">--save-temps=obj</span></tt> is also specified, <tt class="docutils literal"><span class="pre">--temp-dir</span></tt> is given the
+precedence.</li>
+<li><tt class="docutils literal"><span class="pre">--check-graph</span></tt> - Check the compilation for common errors like mismatched
+output/input language names, multiple default edges and cycles. Because of
+plugins, these checks can't be performed at compile-time. Exit with code zero
+if no errors were found, and return the number of found errors
+otherwise. Hidden option, useful for debugging LLVMC plugins.</li>
+<li><tt class="docutils literal"><span class="pre">--view-graph</span></tt> - Show a graphical representation of the compilation graph
+and exit. Requires that you have <tt class="docutils literal"><span class="pre">dot</span></tt> and <tt class="docutils literal"><span class="pre">gv</span></tt> programs installed. Hidden
+option, useful for debugging LLVMC plugins.</li>
+<li><tt class="docutils literal"><span class="pre">--write-graph</span></tt> - Write a <tt class="docutils literal"><span class="pre">compilation-graph.dot</span></tt> file in the current
+directory with the compilation graph description in Graphviz format (identical
+to the file used by the <tt class="docutils literal"><span class="pre">--view-graph</span></tt> option). The <tt class="docutils literal"><span class="pre">-o</span></tt> option can be
+used to set the output file name. Hidden option, useful for debugging LLVMC
+plugins.</li>
+<li><tt class="docutils literal"><span class="pre">-help</span></tt>, <tt class="docutils literal"><span class="pre">-help-hidden</span></tt>, <tt class="docutils literal"><span class="pre">--version</span></tt> - These options have
+their standard meaning.</li>
+</ul>
+</div>
+<div class="section" id="compiling-llvmc-plugins">
+<h1><a class="toc-backref" href="#id11">Compiling LLVMC plugins</a></h1>
+<p>It's easiest to start working on your own LLVMC plugin by copying the
+skeleton project which lives under <tt class="docutils literal"><span class="pre">$LLVMC_DIR/plugins/Simple</span></tt>:</p>
+<pre class="literal-block">
+$ cd $LLVMC_DIR/plugins
+$ cp -r Simple MyPlugin
+$ cd MyPlugin
+$ ls
+Makefile PluginMain.cpp Simple.td
+</pre>
+<p>As you can see, our basic plugin consists of only two files (not
+counting the build script). <tt class="docutils literal"><span class="pre">Simple.td</span></tt> contains TableGen
+description of the compilation graph; its format is documented in the
+following sections. <tt class="docutils literal"><span class="pre">PluginMain.cpp</span></tt> is just a helper file used to
+compile the auto-generated C++ code produced from TableGen source. It
+can also contain hook definitions (see <a class="reference internal" href="#hooks">below</a>).</p>
+<p>The first thing that you should do is to change the <tt class="docutils literal"><span class="pre">LLVMC_PLUGIN</span></tt>
+variable in the <tt class="docutils literal"><span class="pre">Makefile</span></tt> to avoid conflicts (since this variable
+is used to name the resulting library):</p>
+<pre class="literal-block">
+LLVMC_PLUGIN=MyPlugin
+</pre>
+<p>It is also a good idea to rename <tt class="docutils literal"><span class="pre">Simple.td</span></tt> to something less
+generic:</p>
+<pre class="literal-block">
+$ mv Simple.td MyPlugin.td
+</pre>
+<p>To build your plugin as a dynamic library, just <tt class="docutils literal"><span class="pre">cd</span></tt> to its source
+directory and run <tt class="docutils literal"><span class="pre">make</span></tt>. The resulting file will be called
+<tt class="docutils literal"><span class="pre">plugin_llvmc_$(LLVMC_PLUGIN).$(DLL_EXTENSION)</span></tt> (in our case,
+<tt class="docutils literal"><span class="pre">plugin_llvmc_MyPlugin.so</span></tt>). This library can be then loaded in with the
+<tt class="docutils literal"><span class="pre">-load</span></tt> option. Example:</p>
+<pre class="literal-block">
+$ cd $LLVMC_DIR/plugins/Simple
+$ make
+$ llvmc -load $LLVM_DIR/Release/lib/plugin_llvmc_Simple.so
+</pre>
+</div>
+<div class="section" id="compiling-standalone-llvmc-based-drivers">
+<h1><a class="toc-backref" href="#id12">Compiling standalone LLVMC-based drivers</a></h1>
+<p>By default, the <tt class="docutils literal"><span class="pre">llvmc</span></tt> executable consists of a driver core plus several
+statically linked plugins (<tt class="docutils literal"><span class="pre">Base</span></tt> and <tt class="docutils literal"><span class="pre">Clang</span></tt> at the moment). You can
+produce a standalone LLVMC-based driver executable by linking the core with your
+own plugins. The recommended way to do this is by starting with the provided
+<tt class="docutils literal"><span class="pre">Skeleton</span></tt> example (<tt class="docutils literal"><span class="pre">$LLVMC_DIR/example/Skeleton</span></tt>):</p>
+<pre class="literal-block">
+$ cd $LLVMC_DIR/example/
+$ cp -r Skeleton mydriver
+$ cd mydriver
+$ vim Makefile
+[...]
+$ make
+</pre>
+<p>If you're compiling LLVM with different source and object directories, then you
+must perform the following additional steps before running <tt class="docutils literal"><span class="pre">make</span></tt>:</p>
+<pre class="literal-block">
+# LLVMC_SRC_DIR = $LLVM_SRC_DIR/tools/llvmc/
+# LLVMC_OBJ_DIR = $LLVM_OBJ_DIR/tools/llvmc/
+$ cp $LLVMC_SRC_DIR/example/mydriver/Makefile \
+  $LLVMC_OBJ_DIR/example/mydriver/
+$ cd $LLVMC_OBJ_DIR/example/mydriver
+$ make
+</pre>
+<p>Another way to do the same thing is by using the following command:</p>
+<pre class="literal-block">
+$ cd $LLVMC_DIR
+$ make LLVMC_BUILTIN_PLUGINS=MyPlugin LLVMC_BASED_DRIVER_NAME=mydriver
+</pre>
+<p>This works with both srcdir == objdir and srcdir != objdir, but assumes that the
+plugin source directory was placed under <tt class="docutils literal"><span class="pre">$LLVMC_DIR/plugins</span></tt>.</p>
+<p>Sometimes, you will want a 'bare-bones' version of LLVMC that has no
+built-in plugins. It can be compiled with the following command:</p>
+<pre class="literal-block">
+$ cd $LLVMC_DIR
+$ make LLVMC_BUILTIN_PLUGINS=&quot;&quot;
+</pre>
+</div>
+<div class="section" id="customizing-llvmc-the-compilation-graph">
+<h1><a class="toc-backref" href="#id13">Customizing LLVMC: the compilation graph</a></h1>
+<p>Each TableGen configuration file should include the common
+definitions:</p>
+<pre class="literal-block">
+include &quot;llvm/CompilerDriver/Common.td&quot;
+</pre>
+<p>Internally, LLVMC stores information about possible source
+transformations in form of a graph. Nodes in this graph represent
+tools, and edges between two nodes represent a transformation path. A
+special &quot;root&quot; node is used to mark entry points for the
+transformations. LLVMC also assigns a weight to each edge (more on
+this later) to choose between several alternative edges.</p>
+<p>The definition of the compilation graph (see file
+<tt class="docutils literal"><span class="pre">plugins/Base/Base.td</span></tt> for an example) is just a list of edges:</p>
+<pre class="literal-block">
+def CompilationGraph : CompilationGraph&lt;[
+    Edge&lt;&quot;root&quot;, &quot;llvm_gcc_c&quot;&gt;,
+    Edge&lt;&quot;root&quot;, &quot;llvm_gcc_assembler&quot;&gt;,
+    ...
+
+    Edge&lt;&quot;llvm_gcc_c&quot;, &quot;llc&quot;&gt;,
+    Edge&lt;&quot;llvm_gcc_cpp&quot;, &quot;llc&quot;&gt;,
+    ...
+
+    OptionalEdge&lt;&quot;llvm_gcc_c&quot;, &quot;opt&quot;, (case (switch_on &quot;opt&quot;),
+                                      (inc_weight))&gt;,
+    OptionalEdge&lt;&quot;llvm_gcc_cpp&quot;, &quot;opt&quot;, (case (switch_on &quot;opt&quot;),
+                                              (inc_weight))&gt;,
+    ...
+
+    OptionalEdge&lt;&quot;llvm_gcc_assembler&quot;, &quot;llvm_gcc_cpp_linker&quot;,
+        (case (input_languages_contain &quot;c++&quot;), (inc_weight),
+              (or (parameter_equals &quot;linker&quot;, &quot;g++&quot;),
+                  (parameter_equals &quot;linker&quot;, &quot;c++&quot;)), (inc_weight))&gt;,
+    ...
+
+    ]&gt;;
+</pre>
+<p>As you can see, the edges can be either default or optional, where
+optional edges are differentiated by an additional <tt class="docutils literal"><span class="pre">case</span></tt> expression
+used to calculate the weight of this edge. Notice also that we refer
+to tools via their names (as strings). This makes it possible to add
+edges to an existing compilation graph in plugins without having to
+know about all tool definitions used in the graph.</p>
+<p>The default edges are assigned a weight of 1, and optional edges get a
+weight of 0 + 2*N where N is the number of tests that evaluated to
+true in the <tt class="docutils literal"><span class="pre">case</span></tt> expression. It is also possible to provide an
+integer parameter to <tt class="docutils literal"><span class="pre">inc_weight</span></tt> and <tt class="docutils literal"><span class="pre">dec_weight</span></tt> - in this case,
+the weight is increased (or decreased) by the provided value instead
+of the default 2. It is also possible to change the default weight of
+an optional edge by using the <tt class="docutils literal"><span class="pre">default</span></tt> clause of the <tt class="docutils literal"><span class="pre">case</span></tt>
+construct.</p>
+<p>When passing an input file through the graph, LLVMC picks the edge
+with the maximum weight. To avoid ambiguity, there should be only one
+default edge between two nodes (with the exception of the root node,
+which gets a special treatment - there you are allowed to specify one
+default edge <em>per language</em>).</p>
+<p>When multiple plugins are loaded, their compilation graphs are merged
+together. Since multiple edges that have the same end nodes are not
+allowed (i.e. the graph is not a multigraph), an edge defined in
+several plugins will be replaced by the definition from the plugin
+that was loaded last. Plugin load order can be controlled by using the
+plugin priority feature described above.</p>
+<p>To get a visual representation of the compilation graph (useful for
+debugging), run <tt class="docutils literal"><span class="pre">llvmc</span> <span class="pre">--view-graph</span></tt>. You will need <tt class="docutils literal"><span class="pre">dot</span></tt> and
+<tt class="docutils literal"><span class="pre">gsview</span></tt> installed for this to work properly.</p>
+</div>
+<div class="section" id="describing-options">
+<h1><a class="toc-backref" href="#id14">Describing options</a></h1>
+<p>Command-line options that the plugin supports are defined by using an
+<tt class="docutils literal"><span class="pre">OptionList</span></tt>:</p>
+<pre class="literal-block">
+def Options : OptionList&lt;[
+(switch_option &quot;E&quot;, (help &quot;Help string&quot;)),
+(alias_option &quot;quiet&quot;, &quot;q&quot;)
+...
+]&gt;;
+</pre>
+<p>As you can see, the option list is just a list of DAGs, where each DAG
+is an option description consisting of the option name and some
+properties. A plugin can define more than one option list (they are
+all merged together in the end), which can be handy if one wants to
+separate option groups syntactically.</p>
+<ul>
+<li><p class="first">Possible option types:</p>
+<blockquote>
+<ul class="simple">
+<li><tt class="docutils literal"><span class="pre">switch_option</span></tt> - a simple boolean switch without arguments, for example
+<tt class="docutils literal"><span class="pre">-O2</span></tt> or <tt class="docutils literal"><span class="pre">-time</span></tt>. At most one occurrence is allowed.</li>
+<li><tt class="docutils literal"><span class="pre">parameter_option</span></tt> - option that takes one argument, for example
+<tt class="docutils literal"><span class="pre">-std=c99</span></tt>. It is also allowed to use spaces instead of the equality
+sign: <tt class="docutils literal"><span class="pre">-std</span> <span class="pre">c99</span></tt>. At most one occurrence is allowed.</li>
+<li><tt class="docutils literal"><span class="pre">parameter_list_option</span></tt> - same as the above, but more than one option
+occurence is allowed.</li>
+<li><tt class="docutils literal"><span class="pre">prefix_option</span></tt> - same as the parameter_option, but the option name and
+argument do not have to be separated. Example: <tt class="docutils literal"><span class="pre">-ofile</span></tt>. This can be also
+specified as <tt class="docutils literal"><span class="pre">-o</span> <span class="pre">file</span></tt>; however, <tt class="docutils literal"><span class="pre">-o=file</span></tt> will be parsed incorrectly
+(<tt class="docutils literal"><span class="pre">=file</span></tt> will be interpreted as option value). At most one occurrence is
+allowed.</li>
+<li><tt class="docutils literal"><span class="pre">prefix_list_option</span></tt> - same as the above, but more than one occurence of
+the option is allowed; example: <tt class="docutils literal"><span class="pre">-lm</span> <span class="pre">-lpthread</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">alias_option</span></tt> - a special option type for creating aliases. Unlike other
+option types, aliases are not allowed to have any properties besides the
+aliased option name. Usage example: <tt class="docutils literal"><span class="pre">(alias_option</span> <span class="pre">&quot;preprocess&quot;,</span> <span class="pre">&quot;E&quot;)</span></tt></li>
+</ul>
+</blockquote>
+</li>
+<li><p class="first">Possible option properties:</p>
+<blockquote>
+<ul class="simple">
+<li><tt class="docutils literal"><span class="pre">help</span></tt> - help string associated with this option. Used for <tt class="docutils literal"><span class="pre">-help</span></tt>
+output.</li>
+<li><tt class="docutils literal"><span class="pre">required</span></tt> - this option must be specified exactly once (or, in case of
+the list options without the <tt class="docutils literal"><span class="pre">multi_val</span></tt> property, at least
+once). Incompatible with <tt class="docutils literal"><span class="pre">zero_or_one</span></tt> and <tt class="docutils literal"><span class="pre">one_or_more</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">one_or_more</span></tt> - the option must be specified at least one time. Useful
+only for list options in conjunction with <tt class="docutils literal"><span class="pre">multi_val</span></tt>; for ordinary lists
+it is synonymous with <tt class="docutils literal"><span class="pre">required</span></tt>. Incompatible with <tt class="docutils literal"><span class="pre">required</span></tt> and
+<tt class="docutils literal"><span class="pre">zero_or_one</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">optional</span></tt> - the option can be specified zero or one times. Useful only
+for list options in conjunction with <tt class="docutils literal"><span class="pre">multi_val</span></tt>. Incompatible with
+<tt class="docutils literal"><span class="pre">required</span></tt> and <tt class="docutils literal"><span class="pre">one_or_more</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">hidden</span></tt> - the description of this option will not appear in
+the <tt class="docutils literal"><span class="pre">-help</span></tt> output (but will appear in the <tt class="docutils literal"><span class="pre">-help-hidden</span></tt>
+output).</li>
+<li><tt class="docutils literal"><span class="pre">really_hidden</span></tt> - the option will not be mentioned in any help
+output.</li>
+<li><tt class="docutils literal"><span class="pre">comma_separated</span></tt> - Indicates that any commas specified for an option's
+value should be used to split the value up into multiple values for the
+option. This property is valid only for list options. In conjunction with
+<tt class="docutils literal"><span class="pre">forward_value</span></tt> can be used to implement option forwarding in style of
+gcc's <tt class="docutils literal"><span class="pre">-Wa,</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">multi_val</span> <span class="pre">n</span></tt> - this option takes <em>n</em> arguments (can be useful in some
+special cases). Usage example: <tt class="docutils literal"><span class="pre">(parameter_list_option</span> <span class="pre">&quot;foo&quot;,</span> <span class="pre">(multi_val</span>
+<span class="pre">3))</span></tt>; the command-line syntax is '-foo a b c'. Only list options can have
+this attribute; you can, however, use the <tt class="docutils literal"><span class="pre">one_or_more</span></tt>, <tt class="docutils literal"><span class="pre">optional</span></tt>
+and <tt class="docutils literal"><span class="pre">required</span></tt> properties.</li>
+<li><tt class="docutils literal"><span class="pre">init</span></tt> - this option has a default value, either a string (if it is a
+parameter), or a boolean (if it is a switch; as in C++, boolean constants
+are called <tt class="docutils literal"><span class="pre">true</span></tt> and <tt class="docutils literal"><span class="pre">false</span></tt>). List options can't have <tt class="docutils literal"><span class="pre">init</span></tt>
+attribute.
+Usage examples: <tt class="docutils literal"><span class="pre">(switch_option</span> <span class="pre">&quot;foo&quot;,</span> <span class="pre">(init</span> <span class="pre">true))</span></tt>; <tt class="docutils literal"><span class="pre">(prefix_option</span>
+<span class="pre">&quot;bar&quot;,</span> <span class="pre">(init</span> <span class="pre">&quot;baz&quot;))</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">extern</span></tt> - this option is defined in some other plugin, see <a class="reference internal" href="#extern">below</a>.</li>
+</ul>
+</blockquote>
+</li>
+</ul>
+<div class="section" id="external-options">
+<span id="extern"></span><h2><a class="toc-backref" href="#id15">External options</a></h2>
+<p>Sometimes, when linking several plugins together, one plugin needs to
+access options defined in some other plugin. Because of the way
+options are implemented, such options must be marked as
+<tt class="docutils literal"><span class="pre">extern</span></tt>. This is what the <tt class="docutils literal"><span class="pre">extern</span></tt> option property is
+for. Example:</p>
+<pre class="literal-block">
+...
+(switch_option &quot;E&quot;, (extern))
+...
+</pre>
+<p>If an external option has additional attributes besides 'extern', they are
+ignored. See also the section on plugin <a class="reference internal" href="#priorities">priorities</a>.</p>
+</div>
+</div>
+<div class="section" id="conditional-evaluation">
+<span id="case"></span><h1><a class="toc-backref" href="#id16">Conditional evaluation</a></h1>
+<p>The 'case' construct is the main means by which programmability is
+achieved in LLVMC. It can be used to calculate edge weights, program
+actions and modify the shell commands to be executed. The 'case'
+expression is designed after the similarly-named construct in
+functional languages and takes the form <tt class="docutils literal"><span class="pre">(case</span> <span class="pre">(test_1),</span> <span class="pre">statement_1,</span>
+<span class="pre">(test_2),</span> <span class="pre">statement_2,</span> <span class="pre">...</span> <span class="pre">(test_N),</span> <span class="pre">statement_N)</span></tt>. The statements
+are evaluated only if the corresponding tests evaluate to true.</p>
+<p>Examples:</p>
+<pre class="literal-block">
+// Edge weight calculation
+
+// Increases edge weight by 5 if &quot;-A&quot; is provided on the
+// command-line, and by 5 more if &quot;-B&quot; is also provided.
+(case
+    (switch_on &quot;A&quot;), (inc_weight 5),
+    (switch_on &quot;B&quot;), (inc_weight 5))
+
+
+// Tool command line specification
+
+// Evaluates to &quot;cmdline1&quot; if the option &quot;-A&quot; is provided on the
+// command line; to &quot;cmdline2&quot; if &quot;-B&quot; is provided;
+// otherwise to &quot;cmdline3&quot;.
+
+(case
+    (switch_on &quot;A&quot;), &quot;cmdline1&quot;,
+    (switch_on &quot;B&quot;), &quot;cmdline2&quot;,
+    (default), &quot;cmdline3&quot;)
+</pre>
+<p>Note the slight difference in 'case' expression handling in contexts
+of edge weights and command line specification - in the second example
+the value of the <tt class="docutils literal"><span class="pre">&quot;B&quot;</span></tt> switch is never checked when switch <tt class="docutils literal"><span class="pre">&quot;A&quot;</span></tt> is
+enabled, and the whole expression always evaluates to <tt class="docutils literal"><span class="pre">&quot;cmdline1&quot;</span></tt> in
+that case.</p>
+<p>Case expressions can also be nested, i.e. the following is legal:</p>
+<pre class="literal-block">
+(case (switch_on &quot;E&quot;), (case (switch_on &quot;o&quot;), ..., (default), ...)
+      (default), ...)
+</pre>
+<p>You should, however, try to avoid doing that because it hurts
+readability. It is usually better to split tool descriptions and/or
+use TableGen inheritance instead.</p>
+<ul class="simple">
+<li>Possible tests are:<ul>
+<li><tt class="docutils literal"><span class="pre">switch_on</span></tt> - Returns true if a given command-line switch is provided by
+the user. Can be given a list as argument, in that case <tt class="docutils literal"><span class="pre">(switch_on</span> <span class="pre">[&quot;foo&quot;,</span>
+<span class="pre">&quot;bar&quot;,</span> <span class="pre">&quot;baz&quot;])</span></tt> is equivalent to <tt class="docutils literal"><span class="pre">(and</span> <span class="pre">(switch_on</span> <span class="pre">&quot;foo&quot;),</span> <span class="pre">(switch_on</span>
+<span class="pre">&quot;bar&quot;),</span> <span class="pre">(switch_on</span> <span class="pre">&quot;baz&quot;))</span></tt>.
+Example: <tt class="docutils literal"><span class="pre">(switch_on</span> <span class="pre">&quot;opt&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">any_switch_on</span></tt> - Given a list of switch options, returns true if any of
+the switches is turned on.
+Example: <tt class="docutils literal"><span class="pre">(any_switch_on</span> <span class="pre">[&quot;foo&quot;,</span> <span class="pre">&quot;bar&quot;,</span> <span class="pre">&quot;baz&quot;])</span></tt> is equivalent to <tt class="docutils literal"><span class="pre">(or</span>
+<span class="pre">(switch_on</span> <span class="pre">&quot;foo&quot;),</span> <span class="pre">(switch_on</span> <span class="pre">&quot;bar&quot;),</span> <span class="pre">(switch_on</span> <span class="pre">&quot;baz&quot;))</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">parameter_equals</span></tt> - Returns true if a command-line parameter equals
+a given value.
+Example: <tt class="docutils literal"><span class="pre">(parameter_equals</span> <span class="pre">&quot;W&quot;,</span> <span class="pre">&quot;all&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">element_in_list</span></tt> - Returns true if a command-line parameter
+list contains a given value.
+Example: <tt class="docutils literal"><span class="pre">(element_in_list</span> <span class="pre">&quot;l&quot;,</span> <span class="pre">&quot;pthread&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">input_languages_contain</span></tt> - Returns true if a given language
+belongs to the current input language set.
+Example: <tt class="docutils literal"><span class="pre">(input_languages_contain</span> <span class="pre">&quot;c++&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">in_language</span></tt> - Evaluates to true if the input file language is equal to
+the argument. At the moment works only with <tt class="docutils literal"><span class="pre">cmd_line</span></tt> and <tt class="docutils literal"><span class="pre">actions</span></tt> (on
+non-join nodes).
+Example: <tt class="docutils literal"><span class="pre">(in_language</span> <span class="pre">&quot;c++&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">not_empty</span></tt> - Returns true if a given option (which should be either a
+parameter or a parameter list) is set by the user. Like <tt class="docutils literal"><span class="pre">switch_on</span></tt>, can
+be also given a list as argument.
+Example: <tt class="docutils literal"><span class="pre">(not_empty</span> <span class="pre">&quot;o&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">any_not_empty</span></tt> - Returns true if <tt class="docutils literal"><span class="pre">not_empty</span></tt> returns true for any of
+the options in the list.
+Example: <tt class="docutils literal"><span class="pre">(any_not_empty</span> <span class="pre">[&quot;foo&quot;,</span> <span class="pre">&quot;bar&quot;,</span> <span class="pre">&quot;baz&quot;])</span></tt> is equivalent to <tt class="docutils literal"><span class="pre">(or</span>
+<span class="pre">(not_empty</span> <span class="pre">&quot;foo&quot;),</span> <span class="pre">(not_empty</span> <span class="pre">&quot;bar&quot;),</span> <span class="pre">(not_empty</span> <span class="pre">&quot;baz&quot;))</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">empty</span></tt> - The opposite of <tt class="docutils literal"><span class="pre">not_empty</span></tt>. Equivalent to <tt class="docutils literal"><span class="pre">(not</span> <span class="pre">(not_empty</span>
+<span class="pre">X))</span></tt>. Provided for convenience. Can be given a list as argument.</li>
+<li><tt class="docutils literal"><span class="pre">any_not_empty</span></tt> - Returns true if <tt class="docutils literal"><span class="pre">not_empty</span></tt> returns true for any of
+the options in the list.
+Example: <tt class="docutils literal"><span class="pre">(any_empty</span> <span class="pre">[&quot;foo&quot;,</span> <span class="pre">&quot;bar&quot;,</span> <span class="pre">&quot;baz&quot;])</span></tt> is equivalent to <tt class="docutils literal"><span class="pre">(not</span> <span class="pre">(and</span>
+<span class="pre">(not_empty</span> <span class="pre">&quot;foo&quot;),</span> <span class="pre">(not_empty</span> <span class="pre">&quot;bar&quot;),</span> <span class="pre">(not_empty</span> <span class="pre">&quot;baz&quot;)))</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">single_input_file</span></tt> - Returns true if there was only one input file
+provided on the command-line. Used without arguments:
+<tt class="docutils literal"><span class="pre">(single_input_file)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">multiple_input_files</span></tt> - Equivalent to <tt class="docutils literal"><span class="pre">(not</span> <span class="pre">(single_input_file))</span></tt> (the
+case of zero input files is considered an error).</li>
+<li><tt class="docutils literal"><span class="pre">default</span></tt> - Always evaluates to true. Should always be the last
+test in the <tt class="docutils literal"><span class="pre">case</span></tt> expression.</li>
+<li><tt class="docutils literal"><span class="pre">and</span></tt> - A standard binary logical combinator that returns true iff all of
+its arguments return true. Used like this: <tt class="docutils literal"><span class="pre">(and</span> <span class="pre">(test1),</span> <span class="pre">(test2),</span>
+<span class="pre">...</span> <span class="pre">(testN))</span></tt>. Nesting of <tt class="docutils literal"><span class="pre">and</span></tt> and <tt class="docutils literal"><span class="pre">or</span></tt> is allowed, but not
+encouraged.</li>
+<li><tt class="docutils literal"><span class="pre">or</span></tt> - A binary logical combinator that returns true iff any of its
+arguments returns true. Example: <tt class="docutils literal"><span class="pre">(or</span> <span class="pre">(test1),</span> <span class="pre">(test2),</span> <span class="pre">...</span> <span class="pre">(testN))</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">not</span></tt> - Standard unary logical combinator that negates its
+argument. Example: <tt class="docutils literal"><span class="pre">(not</span> <span class="pre">(or</span> <span class="pre">(test1),</span> <span class="pre">(test2),</span> <span class="pre">...</span> <span class="pre">(testN)))</span></tt>.</li>
+</ul>
+</li>
+</ul>
+</div>
+<div class="section" id="writing-a-tool-description">
+<h1><a class="toc-backref" href="#id17">Writing a tool description</a></h1>
+<p>As was said earlier, nodes in the compilation graph represent tools,
+which are described separately. A tool definition looks like this
+(taken from the <tt class="docutils literal"><span class="pre">include/llvm/CompilerDriver/Tools.td</span></tt> file):</p>
+<pre class="literal-block">
+def llvm_gcc_cpp : Tool&lt;[
+    (in_language &quot;c++&quot;),
+    (out_language &quot;llvm-assembler&quot;),
+    (output_suffix &quot;bc&quot;),
+    (cmd_line &quot;llvm-g++ -c $INFILE -o $OUTFILE -emit-llvm&quot;),
+    (sink)
+    ]&gt;;
+</pre>
+<p>This defines a new tool called <tt class="docutils literal"><span class="pre">llvm_gcc_cpp</span></tt>, which is an alias for
+<tt class="docutils literal"><span class="pre">llvm-g++</span></tt>. As you can see, a tool definition is just a list of
+properties; most of them should be self-explanatory. The <tt class="docutils literal"><span class="pre">sink</span></tt>
+property means that this tool should be passed all command-line
+options that aren't mentioned in the option list.</p>
+<p>The complete list of all currently implemented tool properties follows.</p>
+<ul class="simple">
+<li>Possible tool properties:<ul>
+<li><tt class="docutils literal"><span class="pre">in_language</span></tt> - input language name. Can be either a string or a
+list, in case the tool supports multiple input languages.</li>
+<li><tt class="docutils literal"><span class="pre">out_language</span></tt> - output language name. Multiple output languages are not
+allowed.</li>
+<li><tt class="docutils literal"><span class="pre">output_suffix</span></tt> - output file suffix. Can also be changed
+dynamically, see documentation on actions.</li>
+<li><tt class="docutils literal"><span class="pre">cmd_line</span></tt> - the actual command used to run the tool. You can
+use <tt class="docutils literal"><span class="pre">$INFILE</span></tt> and <tt class="docutils literal"><span class="pre">$OUTFILE</span></tt> variables, output redirection
+with <tt class="docutils literal"><span class="pre">&gt;</span></tt>, hook invocations (<tt class="docutils literal"><span class="pre">$CALL</span></tt>), environment variables
+(via <tt class="docutils literal"><span class="pre">$ENV</span></tt>) and the <tt class="docutils literal"><span class="pre">case</span></tt> construct.</li>
+<li><tt class="docutils literal"><span class="pre">join</span></tt> - this tool is a &quot;join node&quot; in the graph, i.e. it gets a
+list of input files and joins them together. Used for linkers.</li>
+<li><tt class="docutils literal"><span class="pre">sink</span></tt> - all command-line options that are not handled by other
+tools are passed to this tool.</li>
+<li><tt class="docutils literal"><span class="pre">actions</span></tt> - A single big <tt class="docutils literal"><span class="pre">case</span></tt> expression that specifies how
+this tool reacts on command-line options (described in more detail
+<a class="reference internal" href="#actions">below</a>).</li>
+</ul>
+</li>
+</ul>
+<div class="section" id="id5">
+<span id="actions"></span><h2><a class="toc-backref" href="#id18">Actions</a></h2>
+<p>A tool often needs to react to command-line options, and this is
+precisely what the <tt class="docutils literal"><span class="pre">actions</span></tt> property is for. The next example
+illustrates this feature:</p>
+<pre class="literal-block">
+def llvm_gcc_linker : Tool&lt;[
+    (in_language &quot;object-code&quot;),
+    (out_language &quot;executable&quot;),
+    (output_suffix &quot;out&quot;),
+    (cmd_line &quot;llvm-gcc $INFILE -o $OUTFILE&quot;),
+    (join),
+    (actions (case (not_empty &quot;L&quot;), (forward &quot;L&quot;),
+                   (not_empty &quot;l&quot;), (forward &quot;l&quot;),
+                   (not_empty &quot;dummy&quot;),
+                             [(append_cmd &quot;-dummy1&quot;), (append_cmd &quot;-dummy2&quot;)])
+    ]&gt;;
+</pre>
+<p>The <tt class="docutils literal"><span class="pre">actions</span></tt> tool property is implemented on top of the omnipresent
+<tt class="docutils literal"><span class="pre">case</span></tt> expression. It associates one or more different <em>actions</em>
+with given conditions - in the example, the actions are <tt class="docutils literal"><span class="pre">forward</span></tt>,
+which forwards a given option unchanged, and <tt class="docutils literal"><span class="pre">append_cmd</span></tt>, which
+appends a given string to the tool execution command. Multiple actions
+can be associated with a single condition by using a list of actions
+(used in the example to append some dummy options). The same <tt class="docutils literal"><span class="pre">case</span></tt>
+construct can also be used in the <tt class="docutils literal"><span class="pre">cmd_line</span></tt> property to modify the
+tool command line.</p>
+<p>The &quot;join&quot; property used in the example means that this tool behaves
+like a linker.</p>
+<p>The list of all possible actions follows.</p>
+<ul>
+<li><p class="first">Possible actions:</p>
+<blockquote>
+<ul class="simple">
+<li><tt class="docutils literal"><span class="pre">append_cmd</span></tt> - Append a string to the tool invocation command.
+Example: <tt class="docutils literal"><span class="pre">(case</span> <span class="pre">(switch_on</span> <span class="pre">&quot;pthread&quot;),</span> <span class="pre">(append_cmd</span> <span class="pre">&quot;-lpthread&quot;))</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">error</span></tt> - Exit with error.
+Example: <tt class="docutils literal"><span class="pre">(error</span> <span class="pre">&quot;Mixing</span> <span class="pre">-c</span> <span class="pre">and</span> <span class="pre">-S</span> <span class="pre">is</span> <span class="pre">not</span> <span class="pre">allowed!&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">warning</span></tt> - Print a warning.
+Example: <tt class="docutils literal"><span class="pre">(warning</span> <span class="pre">&quot;Specifying</span> <span class="pre">both</span> <span class="pre">-O1</span> <span class="pre">and</span> <span class="pre">-O2</span> <span class="pre">is</span> <span class="pre">meaningless!&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">forward</span></tt> - Forward the option unchanged.
+Example: <tt class="docutils literal"><span class="pre">(forward</span> <span class="pre">&quot;Wall&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">forward_as</span></tt> - Change the option's name, but forward the argument
+unchanged.
+Example: <tt class="docutils literal"><span class="pre">(forward_as</span> <span class="pre">&quot;O0&quot;,</span> <span class="pre">&quot;--disable-optimization&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">forward_value</span></tt> - Forward only option's value. Cannot be used with switch
+options (since they don't have values), but works fine with lists.
+Example: <tt class="docutils literal"><span class="pre">(forward_value</span> <span class="pre">&quot;Wa,&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">forward_transformed_value</span></tt> - As above, but applies a hook to the
+option's value before forwarding (see <a class="reference internal" href="#hooks">below</a>). When
+<tt class="docutils literal"><span class="pre">forward_transformed_value</span></tt> is applied to a list
+option, the hook must have signature
+<tt class="docutils literal"><span class="pre">std::string</span> <span class="pre">hooks::HookName</span> <span class="pre">(const</span> <span class="pre">std::vector&lt;std::string&gt;&amp;)</span></tt>.
+Example: <tt class="docutils literal"><span class="pre">(forward_transformed_value</span> <span class="pre">&quot;m&quot;,</span> <span class="pre">&quot;ConvertToMAttr&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">output_suffix</span></tt> - Modify the output suffix of this tool.
+Example: <tt class="docutils literal"><span class="pre">(output_suffix</span> <span class="pre">&quot;i&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">stop_compilation</span></tt> - Stop compilation after this tool processes its
+input. Used without arguments.
+Example: <tt class="docutils literal"><span class="pre">(stop_compilation)</span></tt>.</li>
+</ul>
+</blockquote>
+</li>
+</ul>
+</div>
+</div>
+<div class="section" id="language-map">
+<h1><a class="toc-backref" href="#id19">Language map</a></h1>
+<p>If you are adding support for a new language to LLVMC, you'll need to
+modify the language map, which defines mappings from file extensions
+to language names. It is used to choose the proper toolchain(s) for a
+given input file set. Language map definition looks like this:</p>
+<pre class="literal-block">
+def LanguageMap : LanguageMap&lt;
+    [LangToSuffixes&lt;&quot;c++&quot;, [&quot;cc&quot;, &quot;cp&quot;, &quot;cxx&quot;, &quot;cpp&quot;, &quot;CPP&quot;, &quot;c++&quot;, &quot;C&quot;]&gt;,
+     LangToSuffixes&lt;&quot;c&quot;, [&quot;c&quot;]&gt;,
+     ...
+    ]&gt;;
+</pre>
+<p>For example, without those definitions the following command wouldn't work:</p>
+<pre class="literal-block">
+$ llvmc hello.cpp
+llvmc: Unknown suffix: cpp
+</pre>
+<p>The language map entries are needed only for the tools that are linked from the
+root node. Since a tool can't have multiple output languages, for inner nodes of
+the graph the input and output languages should match. This is enforced at
+compile-time.</p>
+</div>
+<div class="section" id="option-preprocessor">
+<h1><a class="toc-backref" href="#id20">Option preprocessor</a></h1>
+<p>It is sometimes useful to run error-checking code before processing the
+compilation graph. For example, if optimization options &quot;-O1&quot; and &quot;-O2&quot; are
+implemented as switches, we might want to output a warning if the user invokes
+the driver with both of these options enabled.</p>
+<p>The <tt class="docutils literal"><span class="pre">OptionPreprocessor</span></tt> feature is reserved specially for these
+occasions. Example (adapted from the built-in Base plugin):</p>
+<pre class="literal-block">
+def Preprocess : OptionPreprocessor&lt;
+(case (not (any_switch_on [&quot;O0&quot;, &quot;O1&quot;, &quot;O2&quot;, &quot;O3&quot;])),
+           (set_option &quot;O2&quot;),
+      (and (switch_on &quot;O3&quot;), (any_switch_on [&quot;O0&quot;, &quot;O1&quot;, &quot;O2&quot;])),
+           (unset_option [&quot;O0&quot;, &quot;O1&quot;, &quot;O2&quot;]),
+      (and (switch_on &quot;O2&quot;), (any_switch_on [&quot;O0&quot;, &quot;O1&quot;])),
+           (unset_option [&quot;O0&quot;, &quot;O1&quot;]),
+      (and (switch_on &quot;O1&quot;), (switch_on &quot;O0&quot;)),
+           (unset_option &quot;O0&quot;))
+&gt;;
+</pre>
+<p>Here, <tt class="docutils literal"><span class="pre">OptionPreprocessor</span></tt> is used to unset all spurious <tt class="docutils literal"><span class="pre">-O</span></tt> options so
+that they are not forwarded to the compiler. If no optimization options are
+specified, <tt class="docutils literal"><span class="pre">-O2</span></tt> is enabled.</p>
+<p><tt class="docutils literal"><span class="pre">OptionPreprocessor</span></tt> is basically a single big <tt class="docutils literal"><span class="pre">case</span></tt> expression, which is
+evaluated only once right after the plugin is loaded. The only allowed actions
+in <tt class="docutils literal"><span class="pre">OptionPreprocessor</span></tt> are <tt class="docutils literal"><span class="pre">error</span></tt>, <tt class="docutils literal"><span class="pre">warning</span></tt>, and two special actions:
+<tt class="docutils literal"><span class="pre">unset_option</span></tt> and <tt class="docutils literal"><span class="pre">set_option</span></tt>. As their names suggest, they can be used to
+set or unset a given option. To set an option with <tt class="docutils literal"><span class="pre">set_option</span></tt>, use the
+two-argument form: <tt class="docutils literal"><span class="pre">(set_option</span> <span class="pre">&quot;parameter&quot;,</span> <span class="pre">VALUE)</span></tt>. Here, <tt class="docutils literal"><span class="pre">VALUE</span></tt> can be
+either a string, a string list, or a boolean constant.</p>
+<p>For convenience, <tt class="docutils literal"><span class="pre">set_option</span></tt> and <tt class="docutils literal"><span class="pre">unset_option</span></tt> also work on lists. That
+is, instead of <tt class="docutils literal"><span class="pre">[(unset_option</span> <span class="pre">&quot;A&quot;),</span> <span class="pre">(unset_option</span> <span class="pre">&quot;B&quot;)]</span></tt> you can use
+<tt class="docutils literal"><span class="pre">(unset_option</span> <span class="pre">[&quot;A&quot;,</span> <span class="pre">&quot;B&quot;])</span></tt>. Obviously, <tt class="docutils literal"><span class="pre">(set_option</span> <span class="pre">[&quot;A&quot;,</span> <span class="pre">&quot;B&quot;])</span></tt> is valid
+only if both <tt class="docutils literal"><span class="pre">A</span></tt> and <tt class="docutils literal"><span class="pre">B</span></tt> are switches.</p>
+</div>
+<div class="section" id="more-advanced-topics">
+<h1><a class="toc-backref" href="#id21">More advanced topics</a></h1>
+<div class="section" id="hooks-and-environment-variables">
+<span id="hooks"></span><h2><a class="toc-backref" href="#id22">Hooks and environment variables</a></h2>
+<p>Normally, LLVMC executes programs from the system <tt class="docutils literal"><span class="pre">PATH</span></tt>. Sometimes,
+this is not sufficient: for example, we may want to specify tool paths
+or names in the configuration file. This can be easily achieved via
+the hooks mechanism. To write your own hooks, just add their
+definitions to the <tt class="docutils literal"><span class="pre">PluginMain.cpp</span></tt> or drop a <tt class="docutils literal"><span class="pre">.cpp</span></tt> file into the
+your plugin directory. Hooks should live in the <tt class="docutils literal"><span class="pre">hooks</span></tt> namespace
+and have the signature <tt class="docutils literal"><span class="pre">std::string</span> <span class="pre">hooks::MyHookName</span> <span class="pre">([const</span> <span class="pre">char*</span>
+<span class="pre">Arg0</span> <span class="pre">[</span> <span class="pre">const</span> <span class="pre">char*</span> <span class="pre">Arg2</span> <span class="pre">[,</span> <span class="pre">...]]])</span></tt>. They can be used from the
+<tt class="docutils literal"><span class="pre">cmd_line</span></tt> tool property:</p>
+<pre class="literal-block">
+(cmd_line &quot;$CALL(MyHook)/path/to/file -o $CALL(AnotherHook)&quot;)
+</pre>
+<p>To pass arguments to hooks, use the following syntax:</p>
+<pre class="literal-block">
+(cmd_line &quot;$CALL(MyHook, 'Arg1', 'Arg2', 'Arg # 3')/path/to/file -o1 -o2&quot;)
+</pre>
+<p>It is also possible to use environment variables in the same manner:</p>
+<pre class="literal-block">
+(cmd_line &quot;$ENV(VAR1)/path/to/file -o $ENV(VAR2)&quot;)
+</pre>
+<p>To change the command line string based on user-provided options use
+the <tt class="docutils literal"><span class="pre">case</span></tt> expression (documented <a class="reference internal" href="#case">above</a>):</p>
+<pre class="literal-block">
+(cmd_line
+  (case
+    (switch_on &quot;E&quot;),
+       &quot;llvm-g++ -E -x c $INFILE -o $OUTFILE&quot;,
+    (default),
+       &quot;llvm-g++ -c -x c $INFILE -o $OUTFILE -emit-llvm&quot;))
+</pre>
+</div>
+<div class="section" id="how-plugins-are-loaded">
+<span id="priorities"></span><h2><a class="toc-backref" href="#id23">How plugins are loaded</a></h2>
+<p>It is possible for LLVMC plugins to depend on each other. For example,
+one can create edges between nodes defined in some other plugin. To
+make this work, however, that plugin should be loaded first. To
+achieve this, the concept of plugin priority was introduced. By
+default, every plugin has priority zero; to specify the priority
+explicitly, put the following line in your plugin's TableGen file:</p>
+<pre class="literal-block">
+def Priority : PluginPriority&lt;$PRIORITY_VALUE&gt;;
+# Where PRIORITY_VALUE is some integer &gt; 0
+</pre>
+<p>Plugins are loaded in order of their (increasing) priority, starting
+with 0. Therefore, the plugin with the highest priority value will be
+loaded last.</p>
+</div>
+<div class="section" id="debugging">
+<h2><a class="toc-backref" href="#id24">Debugging</a></h2>
+<p>When writing LLVMC plugins, it can be useful to get a visual view of
+the resulting compilation graph. This can be achieved via the command
+line option <tt class="docutils literal"><span class="pre">--view-graph</span></tt>. This command assumes that <a class="reference external" href="http://www.graphviz.org/">Graphviz</a> and
+<a class="reference external" href="http://pages.cs.wisc.edu/~ghost/">Ghostview</a> are installed. There is also a <tt class="docutils literal"><span class="pre">--write-graph</span></tt> option that
+creates a Graphviz source file (<tt class="docutils literal"><span class="pre">compilation-graph.dot</span></tt>) in the
+current directory.</p>
+<p>Another useful <tt class="docutils literal"><span class="pre">llvmc</span></tt> option is <tt class="docutils literal"><span class="pre">--check-graph</span></tt>. It checks the
+compilation graph for common errors like mismatched output/input
+language names, multiple default edges and cycles. These checks can't
+be performed at compile-time because the plugins can load code
+dynamically. When invoked with <tt class="docutils literal"><span class="pre">--check-graph</span></tt>, <tt class="docutils literal"><span class="pre">llvmc</span></tt> doesn't
+perform any compilation tasks and returns the number of encountered
+errors as its status code.</p>
+</div>
+<div class="section" id="conditioning-on-the-executable-name">
+<h2><a class="toc-backref" href="#id25">Conditioning on the executable name</a></h2>
+<p>For now, the executable name (the value passed to the driver in <tt class="docutils literal"><span class="pre">argv[0]</span></tt>) is
+accessible only in the C++ code (i.e. hooks). Use the following code:</p>
+<pre class="literal-block">
+namespace llvmc {
+extern const char* ProgramName;
+}
+
+namespace hooks {
+
+std::string MyHook() {
+//...
+if (strcmp(ProgramName, &quot;mydriver&quot;) == 0) {
+   //...
+
+}
+
+} // end namespace hooks
+</pre>
+<p>In general, you're encouraged not to make the behaviour dependent on the
+executable file name, and use command-line switches instead. See for example how
+the <tt class="docutils literal"><span class="pre">Base</span></tt> plugin behaves when it needs to choose the correct linker options
+(think <tt class="docutils literal"><span class="pre">g++</span></tt> vs. <tt class="docutils literal"><span class="pre">gcc</span></tt>).</p>
+<hr />
+<address>
+<a href="http://jigsaw.w3.org/css-validator/check/referer">
+<img src="http://jigsaw.w3.org/css-validator/images/vcss-blue"
+   alt="Valid CSS" /></a>
+<a href="http://validator.w3.org/check?uri=referer">
+<img src="http://www.w3.org/Icons/valid-xhtml10-blue"
+   alt="Valid XHTML 1.0 Transitional"/></a>
+
+<a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a><br />
+<a href="http://llvm.org">LLVM Compiler Infrastructure</a><br />
+
+Last modified: $Date$
+</address></div>
+</div>
+</div>
+</body>
+</html>
diff --git a/final/docs/CompilerDriverTutorial.html b/final/docs/CompilerDriverTutorial.html
new file mode 100644
index 00000000000..317b1d12712
--- /dev/null
+++ b/final/docs/CompilerDriverTutorial.html
@@ -0,0 +1,126 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+<meta name="generator" content="Docutils 0.5: http://docutils.sourceforge.net/" />
+<title>Tutorial - Using LLVMC</title>
+<link rel="stylesheet" href="llvm.css" type="text/css" />
+</head>
+<body>
+<div class="document" id="tutorial-using-llvmc">
+<h1 class="title">Tutorial - Using LLVMC</h1>
+
+<!-- This file was automatically generated by rst2html.
+Please do not edit directly!
+The ReST source lives in the directory 'tools/llvmc/doc'. -->
+<div class="contents topic" id="contents">
+<p class="topic-title first">Contents</p>
+<ul class="simple">
+<li><a class="reference internal" href="#introduction" id="id1">Introduction</a></li>
+<li><a class="reference internal" href="#compiling-with-llvmc" id="id2">Compiling with LLVMC</a></li>
+<li><a class="reference internal" href="#using-llvmc-to-generate-toolchain-drivers" id="id3">Using LLVMC to generate toolchain drivers</a></li>
+</ul>
+</div>
+<div class="doc_author">
+<p>Written by <a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a></p>
+</div><div class="section" id="introduction">
+<h1><a class="toc-backref" href="#id1">Introduction</a></h1>
+<p>LLVMC is a generic compiler driver, which plays the same role for LLVM
+as the <tt class="docutils literal"><span class="pre">gcc</span></tt> program does for GCC - the difference being that LLVMC
+is designed to be more adaptable and easier to customize. Most of
+LLVMC functionality is implemented via plugins, which can be loaded
+dynamically or compiled in. This tutorial describes the basic usage
+and configuration of LLVMC.</p>
+</div>
+<div class="section" id="compiling-with-llvmc">
+<h1><a class="toc-backref" href="#id2">Compiling with LLVMC</a></h1>
+<p>In general, LLVMC tries to be command-line compatible with <tt class="docutils literal"><span class="pre">gcc</span></tt> as
+much as possible, so most of the familiar options work:</p>
+<pre class="literal-block">
+$ llvmc -O3 -Wall hello.cpp
+$ ./a.out
+hello
+</pre>
+<p>This will invoke <tt class="docutils literal"><span class="pre">llvm-g++</span></tt> under the hood (you can see which
+commands are executed by using the <tt class="docutils literal"><span class="pre">-v</span></tt> option). For further help on
+command-line LLVMC usage, refer to the <tt class="docutils literal"><span class="pre">llvmc</span> <span class="pre">--help</span></tt> output.</p>
+</div>
+<div class="section" id="using-llvmc-to-generate-toolchain-drivers">
+<h1><a class="toc-backref" href="#id3">Using LLVMC to generate toolchain drivers</a></h1>
+<p>LLVMC plugins are written mostly using <a class="reference external" href="http://llvm.org/docs/TableGenFundamentals.html">TableGen</a>, so you need to
+be familiar with it to get anything done.</p>
+<p>Start by compiling <tt class="docutils literal"><span class="pre">example/Simple</span></tt>, which is a primitive wrapper for
+<tt class="docutils literal"><span class="pre">gcc</span></tt>:</p>
+<pre class="literal-block">
+$ cd $LLVM_DIR/tools/llvmc
+$ cp -r example/Simple plugins/Simple
+
+  # NB: A less verbose way to compile standalone LLVMC-based drivers is
+  # described in the reference manual.
+
+$ make LLVMC_BASED_DRIVER_NAME=mygcc LLVMC_BUILTIN_PLUGINS=Simple
+$ cat &gt; hello.c
+[...]
+$ mygcc hello.c
+$ ./hello.out
+Hello
+</pre>
+<p>Here we link our plugin with the LLVMC core statically to form an executable
+file called <tt class="docutils literal"><span class="pre">mygcc</span></tt>. It is also possible to build our plugin as a dynamic
+library to be loaded by the <tt class="docutils literal"><span class="pre">llvmc</span></tt> executable (or any other LLVMC-based
+standalone driver); this is described in the reference manual.</p>
+<p>Contents of the file <tt class="docutils literal"><span class="pre">Simple.td</span></tt> look like this:</p>
+<pre class="literal-block">
+// Include common definitions
+include &quot;llvm/CompilerDriver/Common.td&quot;
+
+// Tool descriptions
+def gcc : Tool&lt;
+[(in_language &quot;c&quot;),
+ (out_language &quot;executable&quot;),
+ (output_suffix &quot;out&quot;),
+ (cmd_line &quot;gcc $INFILE -o $OUTFILE&quot;),
+ (sink)
+]&gt;;
+
+// Language map
+def LanguageMap : LanguageMap&lt;[LangToSuffixes&lt;&quot;c&quot;, [&quot;c&quot;]&gt;]&gt;;
+
+// Compilation graph
+def CompilationGraph : CompilationGraph&lt;[Edge&lt;&quot;root&quot;, &quot;gcc&quot;&gt;]&gt;;
+</pre>
+<p>As you can see, this file consists of three parts: tool descriptions,
+language map, and the compilation graph definition.</p>
+<p>At the heart of LLVMC is the idea of a compilation graph: vertices in
+this graph are tools, and edges represent a transformation path
+between two tools (for example, assembly source produced by the
+compiler can be transformed into executable code by an assembler). The
+compilation graph is basically a list of edges; a special node named
+<tt class="docutils literal"><span class="pre">root</span></tt> is used to mark graph entry points.</p>
+<p>Tool descriptions are represented as property lists: most properties
+in the example above should be self-explanatory; the <tt class="docutils literal"><span class="pre">sink</span></tt> property
+means that all options lacking an explicit description should be
+forwarded to this tool.</p>
+<p>The <tt class="docutils literal"><span class="pre">LanguageMap</span></tt> associates a language name with a list of suffixes
+and is used for deciding which toolchain corresponds to a given input
+file.</p>
+<p>To learn more about LLVMC customization, refer to the reference
+manual and plugin source code in the <tt class="docutils literal"><span class="pre">plugins</span></tt> directory.</p>
+<hr />
+<address>
+<a href="http://jigsaw.w3.org/css-validator/check/referer">
+<img src="http://jigsaw.w3.org/css-validator/images/vcss-blue"
+   alt="Valid CSS" /></a>
+<a href="http://validator.w3.org/check?uri=referer">
+<img src="http://www.w3.org/Icons/valid-xhtml10-blue"
+   alt="Valid XHTML 1.0 Transitional"/></a>
+
+<a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a><br />
+<a href="http://llvm.org">LLVM Compiler Infrastructure</a><br />
+
+Last modified: $Date: 2008-12-11 11:34:48 -0600 (Thu, 11 Dec 2008) $
+</address></div>
+</div>
+</body>
+</html>
diff --git a/final/docs/CompilerWriterInfo.html b/final/docs/CompilerWriterInfo.html
new file mode 100644
index 00000000000..5d071f73262
--- /dev/null
+++ b/final/docs/CompilerWriterInfo.html
@@ -0,0 +1,263 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>Architecture/platform information for compiler writers</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">
+  Architecture/platform information for compiler writers
+</div>
+
+<div class="doc_warning">
+  <p>Note: This document is a work-in-progress.  Additions and clarifications
+  are welcome.</p>
+</div>
+
+<ol>
+  <li><a href="#hw">Hardware</a>
+  <ol>
+    <li><a href="#alpha">Alpha</a></li>
+    <li><a href="#arm">ARM</a></li>
+    <li><a href="#ia64">Itanium</a></li>
+    <li><a href="#mips">MIPS</a></li>
+    <li><a href="#ppc">PowerPC</a></li>
+    <li><a href="#sparc">SPARC</a></li>
+    <li><a href="#x86">X86</a></li>
+    <li><a href="#other">Other lists</a></li>
+  </ol></li>
+  <li><a href="#abi">Application Binary Interface (ABI)</a>
+  <ol>
+    <li><a href="#linux">Linux</a></li>
+    <li><a href="#osx">OS X</a></li>
+  </ol></li>
+  <li><a href="#misc">Miscellaneous resources</a></li>
+</ol>
+
+<div class="doc_author">
+  <p>Compiled by <a href="http://misha.brukman.net">Misha Brukman</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="hw">Hardware</a></div>
+<!-- *********************************************************************** -->
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="alpha">Alpha</a></div>
+
+<div class="doc_text">
+<ul>
+<li><a
+href="http://ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html">Alpha manuals</a> 
+</li>
+</ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="arm">ARM</a></div>
+
+<div class="doc_text">
+<ul>
+<li><a href="http://www.arm.com/documentation/">ARM documentation</a> 
+(<a href="http://www.arm.com/documentation/ARMProcessor_Cores/">Processor
+Cores</a>)</li>
+<li><a href="http://www.arm.com/products/DevTools/ABI.html">ABI</a></li>
+</ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="ia64">Itanium (ia64)</a></div>
+
+<div class="doc_text">
+<ul>
+<li><a
+href="http://developer.intel.com/design/itanium2/documentation.htm">Itanium documentation</a> 
+</li>
+</ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="mips">MIPS</a></div>
+
+<div class="doc_text">
+<ul>
+<li><a
+href="http://mips.com/content/Documentation/MIPSDocumentation/ProcessorArchitecture/doclibrary">MIPS
+Processor Architecture</a></li>
+</ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="ppc">PowerPC</a></div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">IBM - Official manuals and docs</div>
+
+<div class="doc_text">
+
+<ul>
+<li><a
+href="http://www-106.ibm.com/developerworks/eserver/articles/archguide.html">PowerPC
+Architecture Book</a>
+<ul>
+  <li>Book I: <a
+  href="http://www-106.ibm.com/developerworks/eserver/pdfs/archpub1.pdf">PowerPC
+  User Instruction Set Architecture</a></li>
+  <li>Book II: <a
+  href="http://www-106.ibm.com/developerworks/eserver/pdfs/archpub2.pdf">PowerPC
+  Virtual Environment Architecture</a></li>
+  <li>Book III: <a
+  href="http://www-106.ibm.com/developerworks/eserver/pdfs/archpub3.pdf">PowerPC
+  Operating Environment Architecture</a></li>
+</ul></li>
+<li><a
+href="http://www-3.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF7785256996007558C6">PowerPC
+Compiler Writer's Guide</a></li>
+<li><A
+href="http://www-3.ibm.com/chips/techlib/techlib.nsf/products/PowerPC">PowerPC
+Processor Manuals</a></li>
+<li><a
+href="http://www-106.ibm.com/developerworks/linux/library/l-powarch/">Intro to
+PowerPC architecture</a></li>
+<li><a href="http://publibn.boulder.ibm.com/doc_link/en_US/a_doc_lib/aixassem/alangref/alangreftfrm.htm">IBM AIX/5L for POWER Assembly reference</a></li>
+</ul>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">Other documents, collections, notes</div>
+
+<div class="doc_text">
+
+<ul>
+<li><a href="http://penguinppc.org/dev/#library">PowerPC ABI documents</a></li>
+<li><a href="http://gcc.gnu.org/ml/gcc-patches/2003-09/msg00997.html">PowerPC64
+alignment of long doubles (from GCC)</a></li>
+<li><a href="http://sources.redhat.com/ml/binutils/2002-04/msg00573.html">Long
+branch stubs for powerpc64-linux (from binutils)</a></li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="sparc">SPARC</a></div>
+
+<div class="doc_text">
+
+<ul>
+<li><a href="http://www.sparc.org/resource.htm">SPARC resources</a></li>
+<li><a href="http://www.sparc.org/standards.html">SPARC standards</a></li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="x86">X86</a></div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">AMD - Official manuals and docs</div>
+
+<div class="doc_text">
+<ul>
+<li><a
+href="http://www.amd.com/us-en/Processors/TechnicalResources/0,,30_182_739,00.html">AMD processor manuals</a></li>
+<li><a href="http://www.x86-64.org/documentation">X86-64 ABI</a></li>
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">Intel - Official manuals and docs</div>
+
+<div class="doc_text">
+<ul>
+<li><a
+href="http://developer.intel.com/design/pentium4/manuals/index_new.htm">IA-32
+manuals</a></li>
+<li><a
+href="http://www.intel.com/design/itanium/documentation.htm?iid=ipp_srvr_proc_itanium2+techdocs">Intel
+Itanium documentation</a></li>
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">Other x86-specific information</div>
+
+<div class="doc_text">
+<ul>
+<li><a href="http://www.agner.org/assem/calling_conventions.pdf">Calling
+conventions for different C++ compilers and operating systems</a></li>
+</ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="other">Other relevant lists</a></div>
+
+<div class="doc_text">
+
+<ul>
+<li><a href="http://gcc.gnu.org/readings.html">GCC reading list</a></li>
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="abi">ABI</a></div>
+<!-- *********************************************************************** -->
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="linux">Linux</a></div>
+
+<div class="doc_text">
+<ol>
+<li><a href="http://www.linuxbase.org/spec/ELF/ppc64/">PowerPC 64-bit ELF ABI
+Supplement</a></li>
+</ol>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="osx">OS X</a></div>
+
+<div class="doc_text">
+<ol>
+<li><a
+href="http://developer.apple.com/documentation/Darwin/RuntimeArchitecture-date.html">Mach-O
+Runtime Architecture</a></li>
+<li><a href="http://www.unsanity.org/archives/000044.php">Notes on Mach-O
+ABI</a></li>
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="misc">Miscellaneous resources</a></div>
+<!-- *********************************************************************** -->
+
+<ul>
+<li><a
+href="http://www.nondot.org/sabre/os/articles/ExecutableFileFormats/">Executable
+File Format library</a></li>
+<li><a href="http://gcc.gnu.org/projects/prefetch.html">GCC prefetch project</a>
+page has a good survey of the prefetching capabilities of a variety of modern
+processors.</li>
+</ul>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="http://misha.brukman.net">Misha Brukman</a><br>
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/DebuggingJITedCode.html b/final/docs/DebuggingJITedCode.html
new file mode 100644
index 00000000000..7c998bbe9c1
--- /dev/null
+++ b/final/docs/DebuggingJITedCode.html
@@ -0,0 +1,152 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>Debugging JITed Code With GDB</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">Debugging JITed Code With GDB</div>
+<ol>
+  <li><a href="#example">Example usage</a></li>
+  <li><a href="#background">Background</a></li>
+</ol>
+<div class="doc_author">Written by Reid Kleckner</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="example">Example usage</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+
+<p>In order to debug code JITed by LLVM, you need GDB 7.0 or newer, which is
+available on most modern distributions of Linux.  The version of GDB that Apple
+ships with XCode has been frozen at 6.3 for a while.  LLDB may be a better
+option for debugging JITed code on Mac OS X.
+</p>
+
+<p>Consider debugging the following code compiled with clang and run through
+lli:
+</p>
+
+<pre class="doc_code">
+#include &lt;stdio.h&gt;
+
+void foo() {
+    printf("%d\n", *(int*)NULL);  // Crash here
+}
+
+void bar() {
+    foo();
+}
+
+void baz() {
+    bar();
+}
+
+int main(int argc, char **argv) {
+    baz();
+}
+</pre>
+
+<p>Here are the commands to run that application under GDB and print the stack
+trace at the crash:
+</p>
+
+<pre class="doc_code">
+# Compile foo.c to bitcode.  You can use either clang or llvm-gcc with this
+# command line.  Both require -fexceptions, or the calls are all marked
+# 'nounwind' which disables DWARF exception handling info.  Custom frontends
+# should avoid adding this attribute to JITed code, since it interferes with
+# DWARF CFA generation at the moment.
+$ clang foo.c -fexceptions -emit-llvm -c -o foo.bc
+
+# Run foo.bc under lli with -jit-emit-debug.  If you built lli in debug mode,
+# -jit-emit-debug defaults to true.
+$ $GDB_INSTALL/gdb --args lli -jit-emit-debug foo.bc
+...
+
+# Run the code.
+(gdb) run
+Starting program: /tmp/gdb/lli -jit-emit-debug foo.bc
+[Thread debugging using libthread_db enabled]
+
+Program received signal SIGSEGV, Segmentation fault.
+0x00007ffff7f55164 in foo ()
+
+# Print the backtrace, this time with symbols instead of ??.
+(gdb) bt
+#0  0x00007ffff7f55164 in foo ()
+#1  0x00007ffff7f550f9 in bar ()
+#2  0x00007ffff7f55099 in baz ()
+#3  0x00007ffff7f5502a in main ()
+#4  0x00000000007c0225 in llvm::JIT::runFunction(llvm::Function*,
+    std::vector&lt;llvm::GenericValue,
+    std::allocator&lt;llvm::GenericValue&gt; &gt; const&amp;) ()
+#5  0x00000000007d6d98 in
+    llvm::ExecutionEngine::runFunctionAsMain(llvm::Function*,
+    std::vector&lt;std::string,
+    std::allocator&lt;std::string&gt; &gt; const&amp;, char const* const*) ()
+#6  0x00000000004dab76 in main ()
+</pre>
+
+<p>As you can see, GDB can correctly unwind the stack and has the appropriate
+function names.
+</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="background">Background</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+
+<p>Without special runtime support, debugging dynamically generated code with
+GDB (as well as most debuggers) can be quite painful.  Debuggers generally read
+debug information from the object file of the code, but for JITed code, there is
+no such file to look for.
+</p>
+
+<p>Depending on the architecture, this can impact the debugging experience in
+different ways.  For example, on most 32-bit x86 architectures, you can simply
+compile with -fno-omit-frame-pointer for GCC and -disable-fp-elim for LLVM.
+When GDB creates a backtrace, it can properly unwind the stack, but the stack
+frames owned by JITed code have ??'s instead of the appropriate symbol name.
+However, on Linux x86_64 in particular, GDB relies on the DWARF call frame
+address (CFA) debug information to unwind the stack, so even if you compile
+your program to leave the frame pointer untouched, GDB will usually be unable
+to unwind the stack past any JITed code stack frames.
+</p>
+
+<p>In order to communicate the necessary debug info to GDB, an interface for
+registering JITed code with debuggers has been designed and implemented for
+GDB and LLVM.  At a high level, whenever LLVM generates new machine code, it
+also generates an object file in memory containing the debug information.  LLVM
+then adds the object file to the global list of object files and calls a special
+function (__jit_debug_register_code) marked noinline that GDB knows about.  When
+GDB attaches to a process, it puts a breakpoint in this function and loads all
+of the object files in the global list.  When LLVM calls the registration
+function, GDB catches the breakpoint signal, loads the new object file from
+LLVM's memory, and resumes the execution.  In this way, GDB can get the
+necessary debug information.
+</p>
+
+<p>At the time of this writing, LLVM only supports architectures that use ELF
+object files and it only generates symbols and DWARF CFA information.  However,
+it would be easy to add more information to the object file, so we don't need to
+coordinate with GDB to get better debug information.
+</p>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+  <a href="mailto:reid.kleckner@gmail.com">Reid Kleckner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/DeveloperPolicy.html b/final/docs/DeveloperPolicy.html
new file mode 100644
index 00000000000..ef7ba39397c
--- /dev/null
+++ b/final/docs/DeveloperPolicy.html
@@ -0,0 +1,618 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>LLVM Developer Policy</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+      
+<div class="doc_title">LLVM Developer Policy</div>
+<ol>
+  <li><a href="#introduction">Introduction</a></li>
+  <li><a href="#policies">Developer Policies</a>
+  <ol>
+    <li><a href="#informed">Stay Informed</a></li>
+    <li><a href="#patches">Making a Patch</a></li>
+    <li><a href="#reviews">Code Reviews</a></li>
+    <li><a href="#owners">Code Owners</a></li>
+    <li><a href="#testcases">Test Cases</a></li>
+    <li><a href="#quality">Quality</a></li>
+    <li><a href="#commitaccess">Obtaining Commit Access</a></li>
+    <li><a href="#newwork">Making a Major Change</a></li>
+    <li><a href="#incremental">Incremental Development</a></li>
+    <li><a href="#attribution">Attribution of Changes</a></li>
+  </ol></li>
+  <li><a href="#clp">Copyright, License, and Patents</a>
+  <ol>
+    <li><a href="#copyright">Copyright</a></li>
+    <li><a href="#license">License</a></li>
+    <li><a href="#patents">Patents</a></li>
+  </ol></li>
+</ol>
+<div class="doc_author">Written by the LLVM Oversight Team</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="introduction">Introduction</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+<p>This document contains the LLVM Developer Policy which defines the project's
+   policy towards developers and their contributions. The intent of this policy
+   is to eliminate miscommunication, rework, and confusion that might arise from
+   the distributed nature of LLVM's development.  By stating the policy in clear
+   terms, we hope each developer can know ahead of time what to expect when
+   making LLVM contributions.  This policy covers all llvm.org subprojects,
+   including Clang, LLDB, etc.</p>
+<p>This policy is also designed to accomplish the following objectives:</p>
+
+<ol>
+  <li>Attract both users and developers to the LLVM project.</li>
+
+  <li>Make life as simple and easy for contributors as possible.</li>
+
+  <li>Keep the top of Subversion trees as stable as possible.</li>
+</ol>
+  
+<p>This policy is aimed at frequent contributors to LLVM. People interested in
+   contributing one-off patches can do so in an informal way by sending them to
+   the
+   <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits">llvm-commits
+   mailing list</a> and engaging another developer to see it through the
+   process.</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="policies">Developer Policies</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+<p>This section contains policies that pertain to frequent LLVM developers.  We
+   always welcome <a href="#patches">one-off patches</a> from people who do not
+   routinely contribute to LLVM, but we expect more from frequent contributors
+   to keep the system as efficient as possible for everyone.  Frequent LLVM
+   contributors are expected to meet the following requirements in order for
+   LLVM to maintain a high standard of quality.<p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"> <a name="informed">Stay Informed</a> </div>
+<div class="doc_text">
+<p>Developers should stay informed by reading at least the "dev" mailing list
+   for the projects you are interested in, such as 
+   <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev</a> for
+   LLVM, <a href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev">cfe-dev</a>
+   for Clang, or <a
+   href="http://lists.cs.uiuc.edu/mailman/listinfo/lldb-dev">lldb-dev</a>
+   for LLDB.  If you are doing anything more than just casual work on LLVM, it
+   is suggested that you also subscribe to the "commits" mailing list for the
+   subproject you're interested in, such as
+  <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits">llvm-commits</a>,
+  <a href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits">cfe-commits</a>,
+  or <a href="http://lists.cs.uiuc.edu/mailman/listinfo/lldb-commits">lldb-commits</a>.
+   Reading the "commits" list and paying attention to changes being made by
+   others is a good way to see what other people are interested in and watching
+   the flow of the project as a whole.</p>
+
+<p>We recommend that active developers register an email account with 
+   <a href="http://llvm.org/bugs/">LLVM Bugzilla</a> and preferably subscribe to
+   the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmbugs">llvm-bugs</a>
+   email list to keep track of bugs and enhancements occurring in LLVM.  We
+   really appreciate people who are proactive at catching incoming bugs in their
+   components and dealing with them promptly.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"> <a name="patches">Making a Patch</a></div>
+
+<div class="doc_text">
+<p>When making a patch for review, the goal is to make it as easy for the
+   reviewer to read it as possible.  As such, we recommend that you:</p>
+
+<ol>
+  <li>Make your patch against the Subversion trunk, not a branch, and not an old
+      version of LLVM.  This makes it easy to apply the patch.  For information
+      on how to check out SVN trunk, please see the <a
+      href="GettingStarted.html#checkout">Getting Started Guide</a>.</li>
+        
+  <li>Similarly, patches should be submitted soon after they are generated.  Old
+      patches may not apply correctly if the underlying code changes between the
+      time the patch was created and the time it is applied.</li>
+
+  <li>Patches should be made with <tt>svn diff</tt>, or similar. If you use
+      a different tool, make sure it uses the <tt>diff -u</tt> format and
+      that it doesn't contain clutter which makes it hard to read.</li>
+
+  <li>If you are modifying generated files, such as the top-level
+      <tt>configure</tt> script, please separate out those changes into
+      a separate patch from the rest of your changes.</li>
+</ol>
+  
+<p>When sending a patch to a mailing list, it is a good idea to send it as an
+   <em>attachment</em> to the message, not embedded into the text of the
+   message.  This ensures that your mailer will not mangle the patch when it
+   sends it (e.g. by making whitespace changes or by wrapping lines).</p>
+
+<p><em>For Thunderbird users:</em> Before submitting a patch, please open 
+   <em>Preferences &#8594; Advanced &#8594; General &#8594; Config Editor</em>,
+   find the key <tt>mail.content_disposition_type</tt>, and set its value to
+   <tt>1</tt>. Without this setting, Thunderbird sends your attachment using
+   <tt>Content-Disposition: inline</tt> rather than <tt>Content-Disposition:
+   attachment</tt>. Apple Mail gamely displays such a file inline, making it
+   difficult to work with for reviewers using that program.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"> <a name="reviews">Code Reviews</a></div>
+<div class="doc_text">
+<p>LLVM has a code review policy. Code review is one way to increase the quality
+   of software. We generally follow these policies:</p>
+
+<ol>
+  <li>All developers are required to have significant changes reviewed before
+      they are committed to the repository.</li>
+
+  <li>Code reviews are conducted by email, usually on the llvm-commits
+      list.</li>
+
+  <li>Code can be reviewed either before it is committed or after.  We expect
+      major changes to be reviewed before being committed, but smaller changes
+      (or changes where the developer owns the component) can be reviewed after
+      commit.</li>
+
+  <li>The developer responsible for a code change is also responsible for making
+      all necessary review-related changes.</li>
+
+  <li>Code review can be an iterative process, which continues until the patch
+      is ready to be committed.</li>
+</ol>
+  
+<p>Developers should participate in code reviews as both reviewers and
+   reviewees. If someone is kind enough to review your code, you should return
+   the favor for someone else.  Note that anyone is welcome to review and give
+   feedback on a patch, but only people with Subversion write access can approve
+   it.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"> <a name="owners">Code Owners</a></div>
+<div class="doc_text">
+
+<p>The LLVM Project relies on two features of its process to maintain rapid
+   development in addition to the high quality of its source base: the
+   combination of code review plus post-commit review for trusted maintainers.
+   Having both is a great way for the project to take advantage of the fact that
+   most people do the right thing most of the time, and only commit patches
+   without pre-commit review when they are confident they are right.</p>
+     
+<p>The trick to this is that the project has to guarantee that all patches that
+   are committed are reviewed after they go in: you don't want everyone to
+   assume someone else will review it, allowing the patch to go unreviewed.  To
+   solve this problem, we have a notion of an 'owner' for a piece of the code.
+   The sole responsibility of a code owner is to ensure that a commit to their
+   area of the code is appropriately reviewed, either by themself or by someone
+   else.  The current code owners are:</p>
+  
+<ol>
+  <li><b>Evan Cheng</b>: Code generator and all targets.</li>
+
+  <li><b>Greg Clayton</b>: LLDB.</li>
+
+  <li><b>Doug Gregor</b>: Clang Frontend Libraries.</li>
+
+  <li><b>Howard Hinnant</b>: libc++.</li>
+
+  <li><b>Anton Korobeynikov</b>: Exception handling, debug information, and
+      Windows codegen.</li>
+
+  <li><b>Ted Kremenek</b>: Clang Static Analyzer.</li>
+
+  <li><b>Chris Lattner</b>: Everything not covered by someone else.</li>
+  
+  <li><b>Duncan Sands</b>: llvm-gcc 4.2.</li>
+</ol>
+  
+<p>Note that code ownership is completely different than reviewers: anyone can
+   review a piece of code, and we welcome code review from anyone who is
+   interested.  Code owners are the "last line of defense" to guarantee that all
+   patches that are committed are actually reviewed.</p>
+
+<p>Being a code owner is a somewhat unglamorous position, but it is incredibly
+   important for the ongoing success of the project.  Because people get busy,
+   interests change, and unexpected things happen, code ownership is purely
+   opt-in, and anyone can choose to resign their "title" at any time. For now,
+   we do not have an official policy on how one gets elected to be a code
+   owner.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"> <a name="testcases">Test Cases</a></div>
+<div class="doc_text">
+<p>Developers are required to create test cases for any bugs fixed and any new
+   features added.  Some tips for getting your testcase approved:</p>
+
+<ol>
+  <li>All feature and regression test cases are added to the 
+      <tt>llvm/test</tt> directory. The appropriate sub-directory should be
+      selected (see the <a href="TestingGuide.html">Testing Guide</a> for
+      details).</li>
+
+  <li>Test cases should be written in <a href="LangRef.html">LLVM assembly
+      language</a> unless the feature or regression being tested requires
+      another language (e.g. the bug being fixed or feature being implemented is
+      in the llvm-gcc C++ front-end, in which case it must be written in
+      C++).</li>
+
+  <li>Test cases, especially for regressions, should be reduced as much as
+      possible, by <a href="Bugpoint.html">bugpoint</a> or manually. It is
+      unacceptable to place an entire failing program into <tt>llvm/test</tt> as
+      this creates a <i>time-to-test</i> burden on all developers. Please keep
+      them short.</li>
+</ol>
+  
+<p>Note that llvm/test and clang/test are designed for regression and small
+   feature tests only. More extensive test cases (e.g., entire applications,
+   benchmarks, etc)
+   should be added to the <tt>llvm-test</tt> test suite.  The llvm-test suite is
+   for coverage (correctness, performance, etc) testing, not feature or
+   regression testing.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"> <a name="quality">Quality</a></div>
+<div class="doc_text">
+<p>The minimum quality standards that any change must satisfy before being
+   committed to the main development branch are:</p>
+
+<ol>
+  <li>Code must adhere to the <a href="CodingStandards.html">LLVM Coding
+      Standards</a>.</li>
+
+  <li>Code must compile cleanly (no errors, no warnings) on at least one
+      platform.</li>
+
+  <li>Bug fixes and new features should <a href="#testcases">include a
+      testcase</a> so we know if the fix/feature ever regresses in the
+      future.</li>
+
+  <li>Code must pass the <tt>llvm/test</tt> test suite.</li>
+
+  <li>The code must not cause regressions on a reasonable subset of llvm-test,
+      where "reasonable" depends on the contributor's judgement and the scope of
+      the change (more invasive changes require more testing). A reasonable
+      subset might be something like
+      "<tt>llvm-test/MultiSource/Benchmarks</tt>".</li>
+</ol>
+
+<p>Additionally, the committer is responsible for addressing any problems found
+   in the future that the change is responsible for.  For example:</p>
+
+<ul>
+  <li>The code should compile cleanly on all supported platforms.</li>
+
+  <li>The changes should not cause any correctness regressions in the
+      <tt>llvm-test</tt> suite and must not cause any major performance
+      regressions.</li>
+
+  <li>The change set should not cause performance or correctness regressions for
+      the LLVM tools.</li>
+
+  <li>The changes should not cause performance or correctness regressions in
+      code compiled by LLVM on all applicable targets.</li>
+
+  <li>You are expected to address any <a href="http://llvm.org/bugs/">bugzilla
+      bugs</a> that result from your change.</li>
+</ul>
+  
+<p>We prefer for this to be handled before submission but understand that it
+   isn't possible to test all of this for every submission.  Our build bots and
+   nightly testing infrastructure normally finds these problems.  A good rule of
+   thumb is to check the nightly testers for regressions the day after your
+   change.  Build bots will directly email you if a group of commits that
+   included yours caused a failure.  You are expected to check the build bot
+   messages to see if they are your fault and, if so, fix the breakage.</p>
+
+<p>Commits that violate these quality standards (e.g. are very broken) may be
+   reverted. This is necessary when the change blocks other developers from
+   making progress. The developer is welcome to re-commit the change after the
+   problem has been fixed.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection">
+  <a name="commitaccess">Obtaining Commit Access</a></div>
+<div class="doc_text">
+
+<p>We grant commit access to contributors with a track record of submitting high
+   quality patches.  If you would like commit access, please send an email to
+   <a href="mailto:sabre@nondot.org">Chris</a> with the following
+   information:</p>
+
+<ol>
+  <li>The user name you want to commit with, e.g. "hacker".</li>
+
+  <li>The full name and email address you want message to llvm-commits to come
+      from, e.g. "J. Random Hacker &lt;hacker@yoyodyne.com&gt;".</li>
+
+  <li>A "password hash" of the password you want to use, e.g. "2ACR96qjUqsyM".  
+      Note that you don't ever tell us what your password is, you just give it
+      to us in an encrypted form.  To get this, run "htpasswd" (a utility that
+      comes with apache) in crypt mode (often enabled with "-d"), or find a web
+      page that will do it for you.</li>
+</ol>
+
+<p>Once you've been granted commit access, you should be able to check out an
+   LLVM tree with an SVN URL of "https://username@llvm.org/..." instead of the
+   normal anonymous URL of "http://llvm.org/...".  The first time you commit
+   you'll have to type in your password.  Note that you may get a warning from
+   SVN about an untrusted key, you can ignore this.  To verify that your commit
+   access works, please do a test commit (e.g. change a comment or add a blank
+   line).  Your first commit to a repository may require the autogenerated email
+   to be approved by a mailing list.  This is normal, and will be done when
+   the mailing list owner has time.</p>
+
+<p>If you have recently been granted commit access, these policies apply:</p>
+
+<ol>
+  <li>You are granted <i>commit-after-approval</i> to all parts of LLVM.  To get
+      approval, submit a <a href="#patches">patch</a> to
+      <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits">llvm-commits</a>.
+      When approved you may commit it yourself.</li>
+
+  <li>You are allowed to commit patches without approval which you think are
+      obvious. This is clearly a subjective decision &mdash; we simply expect
+      you to use good judgement.  Examples include: fixing build breakage,
+      reverting obviously broken patches, documentation/comment changes, any
+      other minor changes.</li>
+
+  <li>You are allowed to commit patches without approval to those portions of
+      LLVM that you have contributed or maintain (i.e., have been assigned
+      responsibility for), with the proviso that such commits must not break the
+      build.  This is a "trust but verify" policy and commits of this nature are
+      reviewed after they are committed.</li>
+
+  <li>Multiple violations of these policies or a single egregious violation may
+      cause commit access to be revoked.</li>
+</ol>
+
+<p>In any case, your changes are still subject to <a href="#reviews">code
+   review</a> (either before or after they are committed, depending on the
+   nature of the change).  You are encouraged to review other peoples' patches
+   as well, but you aren't required to.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"> <a name="newwork">Making a Major Change</a></div>
+<div class="doc_text">
+<p>When a developer begins a major new project with the aim of contributing it
+   back to LLVM, s/he should inform the community with an email to
+   the <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev</a>
+   email list, to the extent possible. The reason for this is to:
+
+<ol>
+  <li>keep the community informed about future changes to LLVM, </li>
+
+  <li>avoid duplication of effort by preventing multiple parties working on the
+      same thing and not knowing about it, and</li>
+
+  <li>ensure that any technical issues around the proposed work are discussed
+      and resolved before any significant work is done.</li>
+</ol>
+  
+<p>The design of LLVM is carefully controlled to ensure that all the pieces fit
+   together well and are as consistent as possible. If you plan to make a major
+   change to the way LLVM works or want to add a major new extension, it is a
+   good idea to get consensus with the development community before you start
+   working on it.</p>
+  
+<p>Once the design of the new feature is finalized, the work itself should be
+   done as a series of <a href="#incremental">incremental changes</a>, not as a
+   long-term development branch.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"> <a name="incremental">Incremental Development</a>
+</div>
+<div class="doc_text">
+<p>In the LLVM project, we do all significant changes as a series of incremental
+   patches.  We have a strong dislike for huge changes or long-term development
+   branches.  Long-term development branches have a number of drawbacks:</p>
+
+<ol>
+  <li>Branches must have mainline merged into them periodically.  If the branch
+      development and mainline development occur in the same pieces of code,
+      resolving merge conflicts can take a lot of time.</li>
+
+  <li>Other people in the community tend to ignore work on branches.</li>
+
+  <li>Huge changes (produced when a branch is merged back onto mainline) are
+      extremely difficult to <a href="#reviews">code review</a>.</li>
+
+  <li>Branches are not routinely tested by our nightly tester
+      infrastructure.</li>
+
+  <li>Changes developed as monolithic large changes often don't work until the
+      entire set of changes is done.  Breaking it down into a set of smaller
+      changes increases the odds that any of the work will be committed to the
+      main repository.</li>
+</ol>    
+  
+<p>To address these problems, LLVM uses an incremental development style and we
+   require contributors to follow this practice when making a large/invasive
+   change.  Some tips:</p>
+
+<ul>
+  <li>Large/invasive changes usually have a number of secondary changes that are
+      required before the big change can be made (e.g. API cleanup, etc).  These
+      sorts of changes can often be done before the major change is done,
+      independently of that work.</li>
+
+  <li>The remaining inter-related work should be decomposed into unrelated sets
+      of changes if possible.  Once this is done, define the first increment and
+      get consensus on what the end goal of the change is.</li>
+
+  <li>Each change in the set can be stand alone (e.g. to fix a bug), or part of
+      a planned series of changes that works towards the development goal.</li>
+    
+  <li>Each change should be kept as small as possible. This simplifies your work
+      (into a logical progression), simplifies code review and reduces the
+      chance that you will get negative feedback on the change. Small increments
+      also facilitate the maintenance of a high quality code base.</li>
+
+  <li>Often, an independent precursor to a big change is to add a new API and
+      slowly migrate clients to use the new API.  Each change to use the new API
+      is often "obvious" and can be committed without review.  Once the new API
+      is in place and used, it is much easier to replace the underlying
+      implementation of the API.  This implementation change is logically
+      separate from the API change.</li>
+</ul>
+  
+<p>If you are interested in making a large change, and this scares you, please
+   make sure to first <a href="#newwork">discuss the change/gather consensus</a>
+   then ask about the best way to go about making the change.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="attribution">Attribution of 
+Changes</a></div>
+<div class="doc_text">
+<p>We believe in correct attribution of contributions to their contributors.
+   However, we do not want the source code to be littered with random
+   attributions "this code written by J. Random Hacker" (this is noisy and
+   distracting).  In practice, the revision control system keeps a perfect
+   history of who changed what, and the CREDITS.txt file describes higher-level
+   contributions.  If you commit a patch for someone else, please say "patch
+   contributed by J. Random Hacker!" in the commit message.</p>
+
+<p>Overall, please do not add contributor names to the source code.</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section">
+  <a name="clp">Copyright, License, and Patents</a>
+</div>
+<!--=========================================================================-->
+
+<div class="doc_text">
+<p>This section addresses the issues of copyright, license and patents for the
+   LLVM project.  Currently, the University of Illinois is the LLVM copyright
+   holder and the terms of its license to LLVM users and developers is the
+   <a href="http://www.opensource.org/licenses/UoI-NCSA.php">University of 
+   Illinois/NCSA Open Source License</a>.</p>
+
+<div class="doc_notes">
+<p style="text-align:center;font-weight:bold">NOTE: This section deals with
+   legal matters but does not provide legal advice.  We are not lawyers, please
+   seek legal counsel from an attorney.</p>
+</div>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="copyright">Copyright</a></div>
+<div class="doc_text">
+
+<p>The LLVM project does not require copyright assignments, which means that the
+   copyright for the code in the project is held by its respective contributors
+   who have each agreed to release their contributed code under the terms of the
+   <a href="#license">LLVM License</a>.</p>
+   
+<p>An implication of this is that the LLVM license is unlikely to ever change:
+   changing it would require tracking down all the contributors to LLVM and
+   getting them to agree that a license change is acceptable for their
+   contribution.  Since there are no plans to change the license, this is not a
+   cause for concern.</p>
+   
+<p>As a contributor to the project, this means that you (or your company) retain
+   ownership of the code you contribute, that it cannot be used in a way that
+   contradicts the license (which is a liberal BSD-style license), and that the
+   license for your contributions won't change without your approval in the
+   future.</p>
+   
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="license">License</a></div>
+<div class="doc_text">
+<p>We intend to keep LLVM perpetually open source and to use a liberal open
+   source license. All of the code in LLVM is available under the
+   <a href="http://www.opensource.org/licenses/UoI-NCSA.php">University of
+   Illinois/NCSA Open Source License</a>, which boils down to this:</p>
+
+<ul>
+  <li>You can freely distribute LLVM.</li>
+  <li>You must retain the copyright notice if you redistribute LLVM.</li>
+  <li>Binaries derived from LLVM must reproduce the copyright notice (e.g. in an
+      included readme file).</li>
+  <li>You can't use our names to promote your LLVM derived products.</li>
+  <li>There's no warranty on LLVM at all.</li>
+</ul>
+  
+<p>We believe this fosters the widest adoption of LLVM because it <b>allows
+   commercial products to be derived from LLVM</b> with few restrictions and
+   without a requirement for making any derived works also open source (i.e.
+   LLVM's license is not a "copyleft" license like the GPL). We suggest that you
+   read the <a href="http://www.opensource.org/licenses/UoI-NCSA.php">License</a>
+   if further clarification is needed.</p>
+   
+<p>In addition to the UIUC license, the runtime library components of LLVM
+   (<b>compiler_rt and libc++</b>) are also licensed under the <a
+   href="http://www.opensource.org/licenses/mit-license.php">MIT license</a>,
+   which does not contain the binary redistribution clause.  As a user of these
+   runtime libraries, it means that you can choose to use the code under either
+   license (and thus don't need the binary redistribution clause), and as a
+   contributor to the code that you agree that any contributions to these
+   libraries be licensed under both licenses.  We feel that this is important
+   for runtime libraries, because they are implicitly linked into applications
+   and therefore should not subject those applications to the binary
+   redistribution clause. This also means that it is ok to move code from (e.g.)
+   libc++ to the LLVM core without concern, but that code cannot be moved from
+   the LLVM core to libc++ without the copyright owner's permission.
+</p>
+
+<p>Note that the LLVM Project does distribute llvm-gcc, <b>which is GPL.</b>
+   This means that anything "linked" into llvm-gcc must itself be compatible
+   with the GPL, and must be releasable under the terms of the GPL.  This
+   implies that <b>any code linked into llvm-gcc and distributed to others may
+   be subject to the viral aspects of the GPL</b> (for example, a proprietary
+   code generator linked into llvm-gcc must be made available under the GPL).
+   This is not a problem for code already distributed under a more liberal
+   license (like the UIUC license), and does not affect code generated by
+   llvm-gcc.  It may be a problem if you intend to base commercial development
+   on llvm-gcc without redistributing your source code.</p>
+  
+<p>We have no plans to change the license of LLVM.  If you have questions or
+   comments about the license, please contact the
+   <a href="mailto:llvmdev@cs.uiuc.edu">LLVM Developer's Mailing List</a>.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="patents">Patents</a></div>
+<div class="doc_text">
+<p>To the best of our knowledge, LLVM does not infringe on any patents (we have
+   actually removed code from LLVM in the past that was found to infringe).
+   Having code in LLVM that infringes on patents would violate an important goal
+   of the project by making it hard or impossible to reuse the code for
+   arbitrary purposes (including commercial use).</p>
+   
+<p>When contributing code, we expect contributors to notify us of any potential
+   for patent-related trouble with their changes.  If you or your employer own
+   the rights to a patent and would like to contribute code to LLVM that relies
+   on it, we require that the copyright owner sign an agreement that allows any
+   other user of LLVM to freely use your patent.  Please contact
+   the <a href="mailto:llvm-oversight@cs.uiuc.edu">oversight group</a> for more
+   details.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+  Written by the 
+  <a href="mailto:llvm-oversight@cs.uiuc.edu">LLVM Oversight Group</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/ExceptionHandling.html b/final/docs/ExceptionHandling.html
new file mode 100644
index 00000000000..d597ffb3c8b
--- /dev/null
+++ b/final/docs/ExceptionHandling.html
@@ -0,0 +1,644 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>Exception Handling in LLVM</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="description"
+        content="Exception Handling in LLVM.">
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Exception Handling in LLVM</div>
+
+<table class="layout" style="width:100%">
+  <tr class="layout">
+    <td class="left">
+<ul>
+  <li><a href="#introduction">Introduction</a>
+  <ol>
+    <li><a href="#itanium">Itanium ABI Zero-cost Exception Handling</a></li>
+    <li><a href="#sjlj">Setjmp/Longjmp Exception Handling</a></li>
+    <li><a href="#overview">Overview</a></li>
+  </ol></li>
+  <li><a href="#codegen">LLVM Code Generation</a>
+  <ol>
+    <li><a href="#throw">Throw</a></li>
+    <li><a href="#try_catch">Try/Catch</a></li>
+    <li><a href="#cleanups">Cleanups</a></li>
+    <li><a href="#throw_filters">Throw Filters</a></li>
+    <li><a href="#restrictions">Restrictions</a></li>
+  </ol></li>
+  <li><a href="#format_common_intrinsics">Exception Handling Intrinsics</a>
+  <ol>
+  	<li><a href="#llvm_eh_exception"><tt>llvm.eh.exception</tt></a></li>
+  	<li><a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a></li>
+  	<li><a href="#llvm_eh_typeid_for"><tt>llvm.eh.typeid.for</tt></a></li>
+  	<li><a href="#llvm_eh_sjlj_setjmp"><tt>llvm.eh.sjlj.setjmp</tt></a></li>
+  	<li><a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a></li>
+  	<li><a href="#llvm_eh_sjlj_lsda"><tt>llvm.eh.sjlj.lsda</tt></a></li>
+  	<li><a href="#llvm_eh_sjlj_callsite"><tt>llvm.eh.sjlj.callsite</tt></a></li>
+  	<li><a href="#llvm_eh_sjlj_dispatchsetup"><tt>llvm.eh.sjlj.dispatchsetup</tt></a></li>
+  </ol></li>
+  <li><a href="#asm">Asm Table Formats</a>
+  <ol>
+    <li><a href="#unwind_tables">Exception Handling Frame</a></li>
+    <li><a href="#exception_tables">Exception Tables</a></li>
+  </ol></li>
+  <li><a href="#todo">ToDo</a></li>
+</ul>
+</td>
+</tr></table>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:jlaskey@mac.com">Jim Laskey</a></p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="introduction">Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document is the central repository for all information pertaining to
+   exception handling in LLVM.  It describes the format that LLVM exception
+   handling information takes, which is useful for those interested in creating
+   front-ends or dealing directly with the information.  Further, this document
+   provides specific examples of what exception handling information is used for
+   in C/C++.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="itanium">Itanium ABI Zero-cost Exception Handling</a>
+</div>
+
+<div class="doc_text">
+
+<p>Exception handling for most programming languages is designed to recover from
+   conditions that rarely occur during general use of an application.  To that
+   end, exception handling should not interfere with the main flow of an
+   application's algorithm by performing checkpointing tasks, such as saving the
+   current pc or register state.</p>
+
+<p>The Itanium ABI Exception Handling Specification defines a methodology for
+   providing outlying data in the form of exception tables without inlining
+   speculative exception handling code in the flow of an application's main
+   algorithm.  Thus, the specification is said to add "zero-cost" to the normal
+   execution of an application.</p>
+
+<p>A more complete description of the Itanium ABI exception handling runtime
+   support of can be found at
+   <a href="http://www.codesourcery.com/cxx-abi/abi-eh.html">Itanium C++ ABI:
+   Exception Handling</a>. A description of the exception frame format can be
+   found at
+   <a href="http://refspecs.freestandards.org/LSB_3.0.0/LSB-Core-generic/LSB-Core-generic/ehframechpt.html">Exception
+   Frames</a>, with details of the DWARF 3 specification at
+   <a href="http://www.eagercon.com/dwarf/dwarf3std.htm">DWARF 3 Standard</a>.
+   A description for the C++ exception table formats can be found at
+   <a href="http://www.codesourcery.com/cxx-abi/exceptions.pdf">Exception Handling
+   Tables</a>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="sjlj">Setjmp/Longjmp Exception Handling</a>
+</div>
+
+<div class="doc_text">
+
+<p>Setjmp/Longjmp (SJLJ) based exception handling uses LLVM intrinsics
+   <a href="#llvm_eh_sjlj_setjmp"><tt>llvm.eh.sjlj.setjmp</tt></a> and
+   <a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a> to
+   handle control flow for exception handling.</p>
+
+<p>For each function which does exception processing, be it try/catch blocks
+   or cleanups, that function registers itself on a global frame list. When
+   exceptions are being unwound, the runtime uses this list to identify which
+   functions need processing.<p>
+
+<p>Landing pad selection is encoded in the call site entry of the function
+   context. The runtime returns to the function via
+   <a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a>, where
+   a switch table transfers control to the appropriate landing pad based on
+   the index stored in the function context.</p>
+
+<p>In contrast to DWARF exception handling, which encodes exception regions
+   and frame information in out-of-line tables, SJLJ exception handling
+   builds and removes the unwind frame context at runtime. This results in
+   faster exception handling at the expense of slower execution when no
+   exceptions are thrown. As exceptions are, by their nature, intended for
+   uncommon code paths, DWARF exception handling is generally preferred to
+   SJLJ.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="overview">Overview</a>
+</div>
+
+<div class="doc_text">
+
+<p>When an exception is thrown in LLVM code, the runtime does its best to find a
+   handler suited to processing the circumstance.</p>
+
+<p>The runtime first attempts to find an <i>exception frame</i> corresponding to
+   the function where the exception was thrown.  If the programming language
+   (e.g. C++) supports exception handling, the exception frame contains a
+   reference to an exception table describing how to process the exception.  If
+   the language (e.g. C) does not support exception handling, or if the
+   exception needs to be forwarded to a prior activation, the exception frame
+   contains information about how to unwind the current activation and restore
+   the state of the prior activation.  This process is repeated until the
+   exception is handled.  If the exception is not handled and no activations
+   remain, then the application is terminated with an appropriate error
+   message.</p>
+
+<p>Because different programming languages have different behaviors when
+   handling exceptions, the exception handling ABI provides a mechanism for
+   supplying <i>personalities.</i> An exception handling personality is defined
+   by way of a <i>personality function</i> (e.g. <tt>__gxx_personality_v0</tt>
+   in C++), which receives the context of the exception, an <i>exception
+   structure</i> containing the exception object type and value, and a reference
+   to the exception table for the current function.  The personality function
+   for the current compile unit is specified in a <i>common exception
+   frame</i>.</p>
+
+<p>The organization of an exception table is language dependent.  For C++, an
+   exception table is organized as a series of code ranges defining what to do
+   if an exception occurs in that range.  Typically, the information associated
+   with a range defines which types of exception objects (using C++ <i>type
+   info</i>) that are handled in that range, and an associated action that
+   should take place.  Actions typically pass control to a <i>landing
+   pad</i>.</p>
+
+<p>A landing pad corresponds to the code found in the <i>catch</i> portion of
+   a <i>try</i>/<i>catch</i> sequence.  When execution resumes at a landing
+   pad, it receives the exception structure and a selector corresponding to
+   the <i>type</i> of exception thrown.  The selector is then used to determine
+   which <i>catch</i> should actually process the exception.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section">
+  <a name="codegen">LLVM Code Generation</a>
+</div>
+
+<div class="doc_text">
+
+<p>At the time of this writing, only C++ exception handling support is available
+   in LLVM.  So the remainder of this document will be somewhat C++-centric.</p>
+
+<p>From the C++ developers perspective, exceptions are defined in terms of the
+   <tt>throw</tt> and <tt>try</tt>/<tt>catch</tt> statements.  In this section
+   we will describe the implementation of LLVM exception handling in terms of
+   C++ examples.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="throw">Throw</a>
+</div>
+
+<div class="doc_text">
+
+<p>Languages that support exception handling typically provide a <tt>throw</tt>
+   operation to initiate the exception process.  Internally, a throw operation
+   breaks down into two steps.  First, a request is made to allocate exception
+   space for an exception structure.  This structure needs to survive beyond the
+   current activation.  This structure will contain the type and value of the
+   object being thrown.  Second, a call is made to the runtime to raise the
+   exception, passing the exception structure as an argument.</p>
+
+<p>In C++, the allocation of the exception structure is done by
+   the <tt>__cxa_allocate_exception</tt> runtime function.  The exception
+   raising is handled by <tt>__cxa_throw</tt>.  The type of the exception is
+   represented using a C++ RTTI structure.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="try_catch">Try/Catch</a>
+</div>
+
+<div class="doc_text">
+
+<p>A call within the scope of a <i>try</i> statement can potentially raise an
+   exception.  In those circumstances, the LLVM C++ front-end replaces the call
+   with an <tt>invoke</tt> instruction.  Unlike a call, the <tt>invoke</tt> has
+   two potential continuation points: where to continue when the call succeeds
+   as per normal; and where to continue if the call raises an exception, either
+   by a throw or the unwinding of a throw.</p>
+
+<p>The term used to define a the place where an <tt>invoke</tt> continues after
+   an exception is called a <i>landing pad</i>.  LLVM landing pads are
+   conceptually alternative function entry points where an exception structure
+   reference and a type info index are passed in as arguments.  The landing pad
+   saves the exception structure reference and then proceeds to select the catch
+   block that corresponds to the type info of the exception object.</p>
+
+<p>Two LLVM intrinsic functions are used to convey information about the landing
+   pad to the back end.</p>
+
+<ol>
+  <li><a href="#llvm_eh_exception"><tt>llvm.eh.exception</tt></a> takes no
+      arguments and returns a pointer to the exception structure.  This only
+      returns a sensible value if called after an <tt>invoke</tt> has branched
+      to a landing pad.  Due to code generation limitations, it must currently
+      be called in the landing pad itself.</li>
+
+  <li><a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> takes a minimum
+      of three arguments.  The first argument is the reference to the exception
+      structure. The second argument is a reference to the personality function
+      to be used for this <tt>try</tt>/<tt>catch</tt> sequence. Each of the
+      remaining arguments is either a reference to the type info for
+      a <tt>catch</tt> statement, a <a href="#throw_filters">filter</a>
+      expression, or the number zero (<tt>0</tt>) representing
+      a <a href="#cleanups">cleanup</a>.  The exception is tested against the
+      arguments sequentially from first to last.  The result of
+      the <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> is a
+      positive number if the exception matched a type info, a negative number if
+      it matched a filter, and zero if it matched a cleanup.  If nothing is
+      matched, the behaviour of the program
+      is <a href="#restrictions">undefined</a>.  This only returns a sensible
+      value if called after an <tt>invoke</tt> has branched to a landing pad.
+      Due to codegen limitations, it must currently be called in the landing pad
+      itself.  If a type info matched, then the selector value is the index of
+      the type info in the exception table, which can be obtained using the
+      <a href="#llvm_eh_typeid_for"><tt>llvm.eh.typeid.for</tt></a>
+      intrinsic.</li>
+</ol>
+
+<p>Once the landing pad has the type info selector, the code branches to the
+   code for the first catch.  The catch then checks the value of the type info
+   selector against the index of type info for that catch.  Since the type info
+   index is not known until all the type info have been gathered in the backend,
+   the catch code will call the
+   <a href="#llvm_eh_typeid_for"><tt>llvm.eh.typeid.for</tt></a> intrinsic
+   to determine the index for a given type info.  If the catch fails to match
+   the selector then control is passed on to the next catch. Note: Since the
+   landing pad will not be used if there is no match in the list of type info on
+   the call to <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a>, then
+   neither the last catch nor <i>catch all</i> need to perform the check
+   against the selector.</p>
+
+<p>Finally, the entry and exit of catch code is bracketed with calls
+   to <tt>__cxa_begin_catch</tt> and <tt>__cxa_end_catch</tt>.</p>
+
+<ul>
+  <li><tt>__cxa_begin_catch</tt> takes a exception structure reference as an
+      argument and returns the value of the exception object.</li>
+
+  <li><tt>__cxa_end_catch</tt> takes no arguments. This function:<br><br>
+    <ol>
+      <li>Locates the most recently caught exception and decrements its handler
+          count,</li>
+      <li>Removes the exception from the "caught" stack if the handler count
+          goes to zero, and</li>
+      <li>Destroys the exception if the handler count goes to zero, and the
+          exception was not re-thrown by throw.</li>
+    </ol>
+    <p>Note: a rethrow from within the catch may replace this call with
+       a <tt>__cxa_rethrow</tt>.</p></li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="cleanups">Cleanups</a>
+</div>
+
+<div class="doc_text">
+
+<p>To handle destructors and cleanups in <tt>try</tt> code, control may not run
+   directly from a landing pad to the first catch.  Control may actually flow
+   from the landing pad to clean up code and then to the first catch.  Since the
+   required clean up for each <tt>invoke</tt> in a <tt>try</tt> may be different
+   (e.g. intervening constructor), there may be several landing pads for a given
+   try.  If cleanups need to be run, an <tt>i32 0</tt> should be passed as the
+   last <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> argument.
+   However, when using DWARF exception handling with C++, a <tt>i8* null</tt>
+   <a href="#restrictions">must</a> be passed instead.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="throw_filters">Throw Filters</a>
+</div>
+
+<div class="doc_text">
+
+<p>C++ allows the specification of which exception types can be thrown from a
+   function.  To represent this a top level landing pad may exist to filter out
+   invalid types.  To express this in LLVM code the landing pad will
+   call <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a>.  The
+   arguments are a reference to the exception structure, a reference to the
+   personality function, the length of the filter expression (the number of type
+   infos plus one), followed by the type infos themselves.
+   <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> will return a
+   negative value if the exception does not match any of the type infos.  If no
+   match is found then a call to <tt>__cxa_call_unexpected</tt> should be made,
+   otherwise <tt>_Unwind_Resume</tt>.  Each of these functions requires a
+   reference to the exception structure.  Note that the most general form of an
+   <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> call can contain
+   any number of type infos, filter expressions and cleanups (though having more
+   than one cleanup is pointless).  The LLVM C++ front-end can generate such
+   <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> calls due to
+   inlining creating nested exception handling scopes.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="restrictions">Restrictions</a>
+</div>
+
+<div class="doc_text">
+
+<p>The semantics of the invoke instruction require that any exception that
+   unwinds through an invoke call should result in a branch to the invoke's
+   unwind label.  However such a branch will only happen if the
+   <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> matches. Thus in
+   order to ensure correct operation, the front-end must only generate
+   <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> calls that are
+   guaranteed to always match whatever exception unwinds through the invoke.
+   For most languages it is enough to pass zero, indicating the presence of
+   a <a href="#cleanups">cleanup</a>, as the
+   last <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> argument.
+   However for C++ this is not sufficient, because the C++ personality function
+   will terminate the program if it detects that unwinding the exception only
+   results in matches with cleanups.  For C++ a <tt>null i8*</tt> should be
+   passed as the last <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a>
+   argument instead.  This is interpreted as a catch-all by the C++ personality
+   function, and will always match.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section">
+  <a name="format_common_intrinsics">Exception Handling Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM uses several intrinsic functions (name prefixed with "llvm.eh") to
+   provide exception handling information at various points in generated
+   code.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="llvm_eh_exception">llvm.eh.exception</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+  i8* %<a href="#llvm_eh_exception">llvm.eh.exception</a>()
+</pre>
+
+<p>This intrinsic returns a pointer to the exception structure.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="llvm_eh_selector">llvm.eh.selector</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+  i32 %<a href="#llvm_eh_selector">llvm.eh.selector</a>(i8*, i8*, ...)
+</pre>
+
+<p>This intrinsic is used to compare the exception with the given type infos,
+   filters and cleanups.</p>
+
+<p><a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> takes a minimum of
+   three arguments.  The first argument is the reference to the exception
+   structure. The second argument is a reference to the personality function to
+   be used for this try catch sequence. Each of the remaining arguments is
+   either a reference to the type info for a catch statement,
+   a <a href="#throw_filters">filter</a> expression, or the number zero
+   representing a <a href="#cleanups">cleanup</a>.  The exception is tested
+   against the arguments sequentially from first to last.  The result of
+   the <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a> is a positive
+   number if the exception matched a type info, a negative number if it matched
+   a filter, and zero if it matched a cleanup.  If nothing is matched, the
+   behaviour of the program is <a href="#restrictions">undefined</a>.  If a type
+   info matched then the selector value is the index of the type info in the
+   exception table, which can be obtained using the
+   <a href="#llvm_eh_typeid_for"><tt>llvm.eh.typeid.for</tt></a> intrinsic.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="llvm_eh_typeid_for">llvm.eh.typeid.for</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+  i32 %<a href="#llvm_eh_typeid_for">llvm.eh.typeid.for</a>(i8*)
+</pre>
+
+<p>This intrinsic returns the type info index in the exception table of the
+   current function.  This value can be used to compare against the result
+   of <a href="#llvm_eh_selector"><tt>llvm.eh.selector</tt></a>.  The single
+   argument is a reference to a type info.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="llvm_eh_sjlj_setjmp">llvm.eh.sjlj.setjmp</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+  i32 %<a href="#llvm_eh_sjlj_setjmp">llvm.eh.sjlj.setjmp</a>(i8*)
+</pre>
+
+<p>The SJLJ exception handling uses this intrinsic to force register saving for
+   the current function and to store the address of the following instruction
+   for use as a destination address by <a href="#llvm_eh_sjlj_longjmp">
+   <tt>llvm.eh.sjlj.longjmp</tt></a>. The buffer format and the overall
+   functioning of this intrinsic is compatible with the GCC
+   <tt>__builtin_setjmp</tt> implementation, allowing code built with the
+   two compilers to interoperate.</p>
+
+<p>The single parameter is a pointer to a five word buffer in which the calling
+   context is saved. The front end places the frame pointer in the first word,
+   and the target implementation of this intrinsic should place the destination
+   address for a
+   <a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a> in the
+   second word. The following three words are available for use in a
+   target-specific manner.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="llvm_eh_sjlj_longjmp">llvm.eh.sjlj.longjmp</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+  void %<a href="#llvm_eh_sjlj_longjmp">llvm.eh.sjlj.setjmp</a>(i8*)
+</pre>
+
+<p>The <a href="#llvm_eh_sjlj_longjmp"><tt>llvm.eh.sjlj.longjmp</tt></a>
+   intrinsic is used to implement <tt>__builtin_longjmp()</tt> for SJLJ
+   style exception handling. The single parameter is a pointer to a
+   buffer populated by <a href="#llvm_eh_sjlj_setjmp">
+     <tt>llvm.eh.sjlj.setjmp</tt></a>. The frame pointer and stack pointer
+   are restored from the buffer, then control is transfered to the
+   destination address.</p>
+
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="llvm_eh_sjlj_lsda">llvm.eh.sjlj.lsda</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+  i8* %<a href="#llvm_eh_sjlj_lsda">llvm.eh.sjlj.lsda</a>()
+</pre>
+
+<p>Used for SJLJ based exception handling, the <a href="#llvm_eh_sjlj_lsda">
+   <tt>llvm.eh.sjlj.lsda</tt></a> intrinsic returns the address of the Language
+   Specific Data Area (LSDA) for the current function. The SJLJ front-end code
+   stores this address in the exception handling function context for use by the
+   runtime.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="llvm_eh_sjlj_callsite">llvm.eh.sjlj.callsite</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+  void %<a href="#llvm_eh_sjlj_callsite">llvm.eh.sjlj.callsite</a>(i32)
+</pre>
+
+<p>For SJLJ based exception handling, the <a href="#llvm_eh_sjlj_callsite">
+  <tt>llvm.eh.sjlj.callsite</tt></a> intrinsic identifies the callsite value
+  associated with the following invoke instruction. This is used to ensure
+  that landing pad entries in the LSDA are generated in the matching order.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="llvm_eh_sjlj_dispatchsetup">llvm.eh.sjlj.dispatchsetup</a>
+</div>
+
+<div class="doc_text">
+
+<pre>
+  void %<a href="#llvm_eh_sjlj_dispatchsetup">llvm.eh.sjlj.dispatchsetup</a>(i32)
+</pre>
+
+<p>For SJLJ based exception handling, the <a href="#llvm_eh_sjlj_dispatchsetup">
+  <tt>llvm.eh.sjlj.dispatchsetup</tt></a> intrinsic is used by targets to do
+  any unwind-edge setup they need. By default, no action is taken.  </p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section">
+  <a name="asm">Asm Table Formats</a>
+</div>
+
+<div class="doc_text">
+
+<p>There are two tables that are used by the exception handling runtime to
+   determine which actions should take place when an exception is thrown.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="unwind_tables">Exception Handling Frame</a>
+</div>
+
+<div class="doc_text">
+
+<p>An exception handling frame <tt>eh_frame</tt> is very similar to the unwind
+   frame used by dwarf debug info.  The frame contains all the information
+   necessary to tear down the current frame and restore the state of the prior
+   frame.  There is an exception handling frame for each function in a compile
+   unit, plus a common exception handling frame that defines information common
+   to all functions in the unit.</p>
+
+<p>Todo - Table details here.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="exception_tables">Exception Tables</a>
+</div>
+
+<div class="doc_text">
+
+<p>An exception table contains information about what actions to take when an
+   exception is thrown in a particular part of a function's code.  There is one
+   exception table per function except leaf routines and functions that have
+   only calls to non-throwing functions will not need an exception table.</p>
+
+<p>Todo - Table details here.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section">
+  <a name="todo">ToDo</a>
+</div>
+
+<div class="doc_text">
+
+<ol>
+
+  <li>Testing/Testing/Testing.</li>
+
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/ExtendedIntegerResults.txt b/final/docs/ExtendedIntegerResults.txt
new file mode 100644
index 00000000000..44e9fbf0e76
--- /dev/null
+++ b/final/docs/ExtendedIntegerResults.txt
@@ -0,0 +1,133 @@
+//===----------------------------------------------------------------------===//
+// Representing sign/zero extension of function results
+//===----------------------------------------------------------------------===//
+
+Mar 25, 2009  - Initial Revision
+
+Most ABIs specify that functions which return small integers do so in a
+specific integer GPR.  This is an efficient way to go, but raises the question:
+if the returned value is smaller than the register, what do the high bits hold?
+
+There are three (interesting) possible answers: undefined, zero extended, or
+sign extended.  The number of bits in question depends on the data-type that
+the front-end is referencing (typically i1/i8/i16/i32).
+
+Knowing the answer to this is important for two reasons: 1) we want to be able
+to implement the ABI correctly.  If we need to sign extend the result according
+to the ABI, we really really do need to do this to preserve correctness.  2)
+this information is often useful for optimization purposes, and we want the
+mid-level optimizers to be able to process this (e.g. eliminate redundant
+extensions).
+
+For example, lets pretend that X86 requires the caller to properly extend the
+result of a return (I'm not sure this is the case, but the argument doesn't
+depend on this).  Given this, we should compile this:
+
+int a();
+short b() { return a(); }
+
+into:
+
+_b:
+	subl	$12, %esp
+	call	L_a$stub
+	addl	$12, %esp
+	cwtl
+	ret
+
+An optimization example is that we should be able to eliminate the explicit
+sign extension in this example:
+
+short y();
+int z() {
+  return ((int)y() << 16) >> 16;
+}
+
+_z:
+	subl	$12, %esp
+	call	_y
+	;;  movswl %ax, %eax   -> not needed because eax is already sext'd
+	addl	$12, %esp
+	ret
+
+//===----------------------------------------------------------------------===//
+// What we have right now.
+//===----------------------------------------------------------------------===//
+
+Currently, these sorts of things are modelled by compiling a function to return
+the small type and a signext/zeroext marker is used.  For example, we compile
+Z into:
+
+define i32 @z() nounwind {
+entry:
+	%0 = tail call signext i16 (...)* @y() nounwind
+	%1 = sext i16 %0 to i32
+	ret i32 %1
+}
+
+and b into:
+
+define signext i16 @b() nounwind {
+entry:
+	%0 = tail call i32 (...)* @a() nounwind		; <i32> [#uses=1]
+	%retval12 = trunc i32 %0 to i16		; <i16> [#uses=1]
+	ret i16 %retval12
+}
+
+This has some problems: 1) the actual precise semantics are really poorly
+defined (see PR3779).  2) some targets might want the caller to extend, some
+might want the callee to extend 3) the mid-level optimizer doesn't know the
+size of the GPR, so it doesn't know that %0 is sign extended up to 32-bits 
+here, and even if it did, it could not eliminate the sext. 4) the code
+generator has historically assumed that the result is extended to i32, which is
+a problem on PIC16 (and is also probably wrong on alpha and other 64-bit
+targets).
+
+//===----------------------------------------------------------------------===//
+// The proposal
+//===----------------------------------------------------------------------===//
+
+I suggest that we have the front-end fully lower out the ABI issues here to
+LLVM IR.  This makes it 100% explicit what is going on and means that there is
+no cause for confusion.  For example, the cases above should compile into:
+
+define i32 @z() nounwind {
+entry:
+        %0 = tail call i32 (...)* @y() nounwind
+	%1 = trunc i32 %0 to i16
+        %2 = sext i16 %1 to i32
+        ret i32 %2
+}
+define i32 @b() nounwind {
+entry:
+	%0 = tail call i32 (...)* @a() nounwind
+	%retval12 = trunc i32 %0 to i16
+	%tmp = sext i16 %retval12 to i32
+	ret i32 %tmp
+}
+
+In this model, no functions will return an i1/i8/i16 (and on a x86-64 target
+that extends results to i64, no i32).  This solves the ambiguity issue, allows us 
+to fully describe all possible ABIs, and now allows the optimizers to reason
+about and eliminate these extensions.
+
+The one thing that is missing is the ability for the front-end and optimizer to
+specify/infer the guarantees provided by the ABI to allow other optimizations.
+For example, in the y/z case, since y is known to return a sign extended value,
+the trunc/sext in z should be eliminable.
+
+This can be done by introducing new sext/zext attributes which mean "I know
+that the result of the function is sign extended at least N bits.  Given this,
+and given that it is stuck on the y function, the mid-level optimizer could
+easily eliminate the extensions etc with existing functionality.
+
+The major disadvantage of doing this sort of thing is that it makes the ABI
+lowering stuff even more explicit in the front-end, and that we would like to
+eventually move to having the code generator do more of this work.  However,
+the sad truth of the matter is that this is a) unlikely to happen anytime in
+the near future, and b) this is no worse than we have now with the existing
+attributes.
+
+C compilers fundamentally have to reason about the target in many ways.  
+This is ugly and horrible, but a fact of life.
+
diff --git a/final/docs/ExtendingLLVM.html b/final/docs/ExtendingLLVM.html
new file mode 100644
index 00000000000..647fa01d53b
--- /dev/null
+++ b/final/docs/ExtendingLLVM.html
@@ -0,0 +1,391 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>Extending LLVM: Adding instructions, intrinsics, types, etc.</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">
+  Extending LLVM: Adding instructions, intrinsics, types, etc.
+</div>
+
+<ol>
+  <li><a href="#introduction">Introduction and Warning</a></li>
+  <li><a href="#intrinsic">Adding a new intrinsic function</a></li>
+  <li><a href="#instruction">Adding a new instruction</a></li>
+  <li><a href="#sdnode">Adding a new SelectionDAG node</a></li>
+  <li><a href="#type">Adding a new type</a>
+  <ol>
+    <li><a href="#fund_type">Adding a new fundamental type</a></li>
+    <li><a href="#derived_type">Adding a new derived type</a></li>
+  </ol></li>
+</ol>
+
+<div class="doc_author">    
+  <p>Written by <a href="http://misha.brukman.net">Misha Brukman</a>,
+  Brad Jones, Nate Begeman,
+  and <a href="http://nondot.org/sabre">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="introduction">Introduction and Warning</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>During the course of using LLVM, you may wish to customize it for your
+research project or for experimentation. At this point, you may realize that
+you need to add something to LLVM, whether it be a new fundamental type, a new
+intrinsic function, or a whole new instruction.</p>
+
+<p>When you come to this realization, stop and think. Do you really need to
+extend LLVM? Is it a new fundamental capability that LLVM does not support at
+its current incarnation or can it be synthesized from already pre-existing LLVM
+elements? If you are not sure, ask on the <a
+href="http://mail.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVM-dev</a> list. The
+reason is that extending LLVM will get involved as you need to update all the
+different passes that you intend to use with your extension, and there are
+<em>many</em> LLVM analyses and transformations, so it may be quite a bit of
+work.</p>
+
+<p>Adding an <a href="#intrinsic">intrinsic function</a> is far easier than
+adding an instruction, and is transparent to optimization passes.  If your added
+functionality can be expressed as a
+function call, an intrinsic function is the method of choice for LLVM
+extension.</p>
+
+<p>Before you invest a significant amount of effort into a non-trivial
+extension, <span class="doc_warning">ask on the list</span> if what you are
+looking to do can be done with already-existing infrastructure, or if maybe
+someone else is already working on it. You will save yourself a lot of time and
+effort by doing so.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="intrinsic">Adding a new intrinsic function</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Adding a new intrinsic function to LLVM is much easier than adding a new
+instruction.  Almost all extensions to LLVM should start as an intrinsic
+function and then be turned into an instruction if warranted.</p>
+
+<ol>
+<li><tt>llvm/docs/LangRef.html</tt>:
+    Document the intrinsic.  Decide whether it is code generator specific and
+    what the restrictions are.  Talk to other people about it so that you are
+    sure it's a good idea.</li>
+
+<li><tt>llvm/include/llvm/Intrinsics*.td</tt>:
+    Add an entry for your intrinsic.  Describe its memory access characteristics
+    for optimization (this controls whether it will be DCE'd, CSE'd, etc). Note
+    that any intrinsic using the <tt>llvm_int_ty</tt> type for an argument will
+    be deemed by <tt>tblgen</tt> as overloaded and the corresponding suffix 
+    will be required on the intrinsic's name.</li>
+
+<li><tt>llvm/lib/Analysis/ConstantFolding.cpp</tt>: If it is possible to 
+    constant fold your intrinsic, add support to it in the 
+    <tt>canConstantFoldCallTo</tt> and <tt>ConstantFoldCall</tt> functions.</li>
+
+<li><tt>llvm/test/Regression/*</tt>: Add test cases for your test cases to the 
+    test suite</li>
+</ol>
+
+<p>Once the intrinsic has been added to the system, you must add code generator
+support for it.  Generally you must do the following steps:</p>
+
+<dl>
+<dt>Add support to the C backend in <tt>lib/Target/CBackend/</tt></dt>
+
+<dd>Depending on the intrinsic, there are a few ways to implement this.  For
+    most intrinsics, it makes sense to add code to lower your intrinsic in
+    <tt>LowerIntrinsicCall</tt> in <tt>lib/CodeGen/IntrinsicLowering.cpp</tt>.
+    Second, if it makes sense to lower the intrinsic to an expanded sequence of
+    C code in all cases, just emit the expansion in <tt>visitCallInst</tt> in
+    <tt>Writer.cpp</tt>.  If the intrinsic has some way to express it with GCC
+    (or any other compiler) extensions, it can be conditionally supported based
+    on the compiler compiling the CBE output (see <tt>llvm.prefetch</tt> for an
+    example).  Third, if the intrinsic really has no way to be lowered, just
+    have the code generator emit code that prints an error message and calls
+    abort if executed.</dd>
+
+<dt>Add support to the .td file for the target(s) of your choice in 
+   <tt>lib/Target/*/*.td</tt>.</dt>
+
+<dd>This is usually a matter of adding a pattern to the .td file that matches
+    the intrinsic, though it may obviously require adding the instructions you
+    want to generate as well.  There are lots of examples in the PowerPC and X86
+    backend to follow.</dd>
+</dl>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="sdnode">Adding a new SelectionDAG node</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>As with intrinsics, adding a new SelectionDAG node to LLVM is much easier
+than adding a new instruction.  New nodes are often added to help represent
+instructions common to many targets.  These nodes often map to an LLVM
+instruction (add, sub) or intrinsic (byteswap, population count).  In other
+cases, new nodes have been added to allow many targets to perform a common task
+(converting between floating point and integer representation) or capture more
+complicated behavior in a single node (rotate).</p>
+
+<ol>
+<li><tt>include/llvm/CodeGen/SelectionDAGNodes.h</tt>:
+    Add an enum value for the new SelectionDAG node.</li>
+<li><tt>lib/CodeGen/SelectionDAG/SelectionDAG.cpp</tt>:
+    Add code to print the node to <tt>getOperationName</tt>.  If your new node
+    can be evaluated at compile time when given constant arguments (such as an
+    add of a constant with another constant), find the <tt>getNode</tt> method
+    that takes the appropriate number of arguments, and add a case for your node
+    to the switch statement that performs constant folding for nodes that take
+    the same number of arguments as your new node.</li>
+<li><tt>lib/CodeGen/SelectionDAG/LegalizeDAG.cpp</tt>:
+    Add code to <a href="CodeGenerator.html#selectiondag_legalize">legalize, 
+    promote, and expand</a> the node as necessary.  At a minimum, you will need
+    to add a case statement for your node in <tt>LegalizeOp</tt> which calls
+    LegalizeOp on the node's operands, and returns a new node if any of the
+    operands changed as a result of being legalized.  It is likely that not all
+    targets supported by the SelectionDAG framework will natively support the
+    new node.  In this case, you must also add code in your node's case
+    statement in <tt>LegalizeOp</tt> to Expand your node into simpler, legal
+    operations.  The case for <tt>ISD::UREM</tt> for expanding a remainder into
+    a divide, multiply, and a subtract is a good example.</li>
+<li><tt>lib/CodeGen/SelectionDAG/LegalizeDAG.cpp</tt>:
+    If targets may support the new node being added only at certain sizes, you 
+    will also need to add code to your node's case statement in 
+    <tt>LegalizeOp</tt> to Promote your node's operands to a larger size, and 
+    perform the correct operation.  You will also need to add code to 
+    <tt>PromoteOp</tt> to do this as well.  For a good example, see 
+    <tt>ISD::BSWAP</tt>,
+    which promotes its operand to a wider size, performs the byteswap, and then
+    shifts the correct bytes right to emulate the narrower byteswap in the
+    wider type.</li>
+<li><tt>lib/CodeGen/SelectionDAG/LegalizeDAG.cpp</tt>:
+    Add a case for your node in <tt>ExpandOp</tt> to teach the legalizer how to
+    perform the action represented by the new node on a value that has been
+    split into high and low halves.  This case will be used to support your 
+    node with a 64 bit operand on a 32 bit target.</li>
+<li><tt>lib/CodeGen/SelectionDAG/DAGCombiner.cpp</tt>:
+    If your node can be combined with itself, or other existing nodes in a 
+    peephole-like fashion, add a visit function for it, and call that function
+    from <tt></tt>.  There are several good examples for simple combines you
+    can do; <tt>visitFABS</tt> and <tt>visitSRL</tt> are good starting places.
+    </li>
+<li><tt>lib/Target/PowerPC/PPCISelLowering.cpp</tt>:
+    Each target has an implementation of the <tt>TargetLowering</tt> class,
+    usually in its own file (although some targets include it in the same
+    file as the DAGToDAGISel).  The default behavior for a target is to
+    assume that your new node is legal for all types that are legal for
+    that target.  If this target does not natively support your node, then
+    tell the target to either Promote it (if it is supported at a larger
+    type) or Expand it.  This will cause the code you wrote in 
+    <tt>LegalizeOp</tt> above to decompose your new node into other legal
+    nodes for this target.</li>
+<li><tt>lib/Target/TargetSelectionDAG.td</tt>:
+    Most current targets supported by LLVM generate code using the DAGToDAG
+    method, where SelectionDAG nodes are pattern matched to target-specific
+    nodes, which represent individual instructions.  In order for the targets
+    to match an instruction to your new node, you must add a def for that node
+    to the list in this file, with the appropriate type constraints. Look at
+    <tt>add</tt>, <tt>bswap</tt>, and <tt>fadd</tt> for examples.</li>
+<li><tt>lib/Target/PowerPC/PPCInstrInfo.td</tt>:
+    Each target has a tablegen file that describes the target's instruction
+    set.  For targets that use the DAGToDAG instruction selection framework,
+    add a pattern for your new node that uses one or more target nodes.
+    Documentation for this is a bit sparse right now, but there are several
+    decent examples.  See the patterns for <tt>rotl</tt> in 
+    <tt>PPCInstrInfo.td</tt>.</li>
+<li>TODO: document complex patterns.</li>
+<li><tt>llvm/test/Regression/CodeGen/*</tt>: Add test cases for your new node
+    to the test suite.  <tt>llvm/test/Regression/CodeGen/X86/bswap.ll</tt> is
+    a good example.</li>
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="instruction">Adding a new instruction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p><span class="doc_warning">WARNING: adding instructions changes the bitcode
+format, and it will take some effort to maintain compatibility with
+the previous version.</span> Only add an instruction if it is absolutely
+necessary.</p>
+
+<ol>
+
+<li><tt>llvm/include/llvm/Instruction.def</tt>:
+    add a number for your instruction and an enum name</li>
+
+<li><tt>llvm/include/llvm/Instructions.h</tt>:
+    add a definition for the class that will represent your instruction</li>
+
+<li><tt>llvm/include/llvm/Support/InstVisitor.h</tt>:
+    add a prototype for a visitor to your new instruction type</li>
+
+<li><tt>llvm/lib/AsmParser/Lexer.l</tt>:
+    add a new token to parse your instruction from assembly text file</li>
+
+<li><tt>llvm/lib/AsmParser/llvmAsmParser.y</tt>:
+    add the grammar on how your instruction can be read and what it will
+    construct as a result</li>
+
+<li><tt>llvm/lib/Bitcode/Reader/Reader.cpp</tt>:
+    add a case for your instruction and how it will be parsed from bitcode</li>
+
+<li><tt>llvm/lib/VMCore/Instruction.cpp</tt>:
+    add a case for how your instruction will be printed out to assembly</li>
+
+<li><tt>llvm/lib/VMCore/Instructions.cpp</tt>:
+    implement the class you defined in
+    <tt>llvm/include/llvm/Instructions.h</tt></li>
+
+<li>Test your instruction</li>
+
+<li><tt>llvm/lib/Target/*</tt>: 
+    Add support for your instruction to code generators, or add a lowering
+    pass.</li>
+
+<li><tt>llvm/test/Regression/*</tt>: add your test cases to the test suite.</li>
+
+</ol>
+
+<p>Also, you need to implement (or modify) any analyses or passes that you want
+to understand this new instruction.</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="type">Adding a new type</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p><span class="doc_warning">WARNING: adding new types changes the bitcode
+format, and will break compatibility with currently-existing LLVM
+installations.</span> Only add new types if it is absolutely necessary.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="fund_type">Adding a fundamental type</a>
+</div>
+
+<div class="doc_text">
+
+<ol>
+
+<li><tt>llvm/include/llvm/Type.h</tt>:
+    add enum for the new type; add static <tt>Type*</tt> for this type</li>
+
+<li><tt>llvm/lib/VMCore/Type.cpp</tt>:
+    add mapping from <tt>TypeID</tt> =&gt; <tt>Type*</tt>;
+    initialize the static <tt>Type*</tt></li>
+
+<li><tt>llvm/lib/AsmReader/Lexer.l</tt>:
+    add ability to parse in the type from text assembly</li>
+
+<li><tt>llvm/lib/AsmReader/llvmAsmParser.y</tt>:
+    add a token for that type</li>
+
+</ol>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="derived_type">Adding a derived type</a>
+</div>
+
+<div class="doc_text">
+
+<ol>
+<li><tt>llvm/include/llvm/Type.h</tt>:
+    add enum for the new type; add a forward declaration of the type
+    also</li>
+
+<li><tt>llvm/include/llvm/DerivedTypes.h</tt>:
+    add new class to represent new class in the hierarchy; add forward 
+    declaration to the TypeMap value type</li>
+
+<li><tt>llvm/lib/VMCore/Type.cpp</tt>:
+    add support for derived type to: 
+<div class="doc_code">
+<pre>
+std::string getTypeDescription(const Type &amp;Ty,
+  std::vector&lt;const Type*&gt; &amp;TypeStack)
+bool TypesEqual(const Type *Ty, const Type *Ty2,
+  std::map&lt;const Type*, const Type*&gt; &amp; EqTypes)
+</pre>
+</div>
+    add necessary member functions for type, and factory methods</li>
+
+<li><tt>llvm/lib/AsmReader/Lexer.l</tt>:
+    add ability to parse in the type from text assembly</li>
+
+<li><tt>llvm/lib/BitCode/Writer/Writer.cpp</tt>:
+    modify <tt>void BitcodeWriter::outputType(const Type *T)</tt> to serialize
+    your type</li>
+
+<li><tt>llvm/lib/BitCode/Reader/Reader.cpp</tt>:
+    modify <tt>const Type *BitcodeReader::ParseType()</tt> to read your data
+    type</li> 
+
+<li><tt>llvm/lib/VMCore/AsmWriter.cpp</tt>:
+    modify
+<div class="doc_code">
+<pre>
+void calcTypeName(const Type *Ty,
+                  std::vector&lt;const Type*&gt; &amp;TypeStack,
+                  std::map&lt;const Type*,std::string&gt; &amp;TypeNames,
+                  std::string &amp; Result)
+</pre>
+</div>
+    to output the new derived type
+</li>  
+ 
+
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
+  <br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/FAQ.html b/final/docs/FAQ.html
new file mode 100644
index 00000000000..88c76767bd4
--- /dev/null
+++ b/final/docs/FAQ.html
@@ -0,0 +1,938 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>LLVM: Frequently Asked Questions</title>
+  <style type="text/css">
+    @import url("llvm.css");
+    .question { font-weight: bold }
+    .answer   { margin-left: 2em  }
+  </style>
+</head>
+<body>
+
+<div class="doc_title">
+  LLVM: Frequently Asked Questions
+</div>
+
+<ol>
+  <li><a href="#license">License</a>
+  <ol>
+    <li>Why are the LLVM source code and the front-end distributed under
+        different licenses?</li>
+
+    <li>Does the University of Illinois Open Source License really qualify as an
+       "open source" license?</li>
+
+    <li>Can I modify LLVM source code and redistribute the modified source?</li>
+
+    <li>Can I modify LLVM source code and redistribute binaries or other tools
+        based on it, without redistributing the source?</li>
+  </ol></li>
+
+  <li><a href="#source">Source code</a>
+  <ol>
+    <li>In what language is LLVM written?</li>
+
+    <li>How portable is the LLVM source code?</li>
+  </ol></li>
+
+  <li><a href="#build">Build Problems</a>
+  <ol>
+    <li>When I run configure, it finds the wrong C compiler.</li>
+
+    <li>The <tt>configure</tt> script finds the right C compiler, but it uses
+        the LLVM linker from a previous build.  What do I do?</li>
+
+    <li>When creating a dynamic library, I get a strange GLIBC error.</li>
+
+    <li>I've updated my source tree from Subversion, and now my build is trying
+        to use a file/directory that doesn't exist.</li>
+
+    <li>I've modified a Makefile in my source tree, but my build tree keeps
+        using the old version.  What do I do?</li>
+
+    <li>I've upgraded to a new version of LLVM, and I get strange build
+        errors.</li>
+
+    <li>I've built LLVM and am testing it, but the tests freeze.</li>
+
+    <li>Why do test results differ when I perform different types of
+        builds?</li>
+
+    <li>Compiling LLVM with GCC 3.3.2 fails, what should I do?</li>
+
+    <li>Compiling LLVM with GCC succeeds, but the resulting tools do not work,
+        what can be wrong?</li>
+
+    <li>When I use the test suite, all of the C Backend tests fail.  What is
+        wrong?</li>
+
+    <li>After Subversion update, rebuilding gives the error "No rule to make
+        target".</li>
+
+    <li><a href="#llvmc">The <tt>llvmc</tt> program gives me errors/doesn't
+        work.</a></li>
+
+    <li><a href="#srcdir-objdir">When I compile LLVM-GCC with srcdir == objdir,
+        it fails. Why?</a></li>
+  </ol></li>
+
+  <li><a href="#felangs">Source Languages</a>
+  <ol>
+    <li><a href="#langs">What source languages are supported?</a></li>
+
+    <li><a href="#langirgen">I'd like to write a self-hosting LLVM compiler. How
+        should I interface with the LLVM middle-end optimizers and back-end code
+        generators?</a></li>
+
+    <li><a href="#langhlsupp">What support is there for higher level source
+        language constructs for building a compiler?</a></li>
+
+    <li><a href="GetElementPtr.html">I don't understand the GetElementPtr
+      instruction. Help!</a></li>
+  </ol>
+
+  <li><a href="#cfe">Using the GCC Front End</a>
+  <ol>
+    <li>When I compile software that uses a configure script, the configure
+        script thinks my system has all of the header files and libraries it is
+        testing for.  How do I get configure to work correctly?</li>
+
+    <li>When I compile code using the LLVM GCC front end, it complains that it
+        cannot find libcrtend.a?</li>
+
+    <li>How can I disable all optimizations when compiling code using the LLVM
+        GCC front end?</li>
+
+    <li><a href="#translatecxx">Can I use LLVM to convert C++ code to C
+        code?</a></li>
+
+    <li><a href="#platformindependent">Can I compile C or C++ code to
+        platform-independent LLVM bitcode?</a></li>
+  </ol>
+  </li>
+
+  <li><a href="#cfe_code">Questions about code generated by the GCC front-end</a>
+  <ol>
+     <li><a href="#iosinit">What is this <tt>llvm.global_ctors</tt> and
+          <tt>_GLOBAL__I__tmp_webcompile...</tt> stuff that happens when I
+          #include &lt;iostream&gt;?</a></li>
+
+     <li><a href="#codedce">Where did all of my code go??</a></li>
+
+     <li><a href="#undef">What is this "<tt>undef</tt>" thing that shows up in
+         my code?</a></li>
+         
+      <li><a href="#callconvwrong">Why does instcombine + simplifycfg turn
+   a call to a function with a mismatched calling convention into "unreachable"?
+   Why not make the verifier reject it?</a></li>
+  </ol>
+  </li>
+</ol>
+
+<div class="doc_author">
+  <p>Written by <a href="http://llvm.org">The LLVM Team</a></p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="license">License</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="question">
+<p>Why are the LLVM source code and the front-end distributed under different
+   licenses?</p>
+</div>
+	
+<div class="answer">
+<p>The C/C++ front-ends are based on GCC and must be distributed under the GPL.
+   Our aim is to distribute LLVM source code under a <em>much less
+   restrictive</em> license, in particular one that does not compel users who
+   distribute tools based on modifying the source to redistribute the modified
+   source code as well.</p>
+</div>
+
+<div class="question">
+<p>Does the University of Illinois Open Source License really qualify as an
+   "open source" license?</p>
+</div>
+
+<div class="answer">
+<p>Yes, the license
+   is <a href="http://www.opensource.org/licenses/UoI-NCSA.php">certified</a> by
+   the Open Source Initiative (OSI).</p>
+</div>
+
+<div class="question">
+<p>Can I modify LLVM source code and redistribute the modified source?</p>
+</div>
+
+<div class="answer">
+<p>Yes.  The modified source distribution must retain the copyright notice and
+   follow the three bulletted conditions listed in
+   the <a href="http://llvm.org/svn/llvm-project/llvm/trunk/LICENSE.TXT">LLVM
+   license</a>.</p>
+</div>
+
+<div class="question">
+<p>Can I modify LLVM source code and redistribute binaries or other tools based
+   on it, without redistributing the source?</p>
+</div>
+
+<div class="answer">
+<p>Yes. This is why we distribute LLVM under a less restrictive license than
+   GPL, as explained in the first question above.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="source">Source Code</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="question">
+<p>In what language is LLVM written?</p>
+</div>
+
+<div class="answer">
+<p>All of the LLVM tools and libraries are written in C++ with extensive use of
+   the STL.</p>
+</div>
+
+<div class="question">
+<p>How portable is the LLVM source code?</p>
+</div>
+
+<div class="answer">
+<p>The LLVM source code should be portable to most modern UNIX-like operating
+systems.  Most of the code is written in standard C++ with operating system
+services abstracted to a support library.  The tools required to build and test
+LLVM have been ported to a plethora of platforms.</p>
+
+<p>Some porting problems may exist in the following areas:</p>
+
+<ul>
+  <li>The GCC front end code is not as portable as the LLVM suite, so it may not
+      compile as well on unsupported platforms.</li>
+
+  <li>The LLVM build system relies heavily on UNIX shell tools, like the Bourne
+      Shell and sed.  Porting to systems without these tools (MacOS 9, Plan 9)
+      will require more effort.</li>
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="build">Build Problems</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="question">
+<p>When I run configure, it finds the wrong C compiler.</p>
+</div>
+
+<div class="answer">
+<p>The <tt>configure</tt> script attempts to locate first <tt>gcc</tt> and then
+   <tt>cc</tt>, unless it finds compiler paths set in <tt>CC</tt>
+   and <tt>CXX</tt> for the C and C++ compiler, respectively.</p>
+
+<p>If <tt>configure</tt> finds the wrong compiler, either adjust your
+   <tt>PATH</tt> environment variable or set <tt>CC</tt> and <tt>CXX</tt>
+   explicitly.</p>
+
+</div>
+
+<div class="question">
+<p>The <tt>configure</tt> script finds the right C compiler, but it uses the
+   LLVM linker from a previous build.  What do I do?</p>
+</div>
+
+<div class="answer">
+<p>The <tt>configure</tt> script uses the <tt>PATH</tt> to find executables, so
+   if it's grabbing the wrong linker/assembler/etc, there are two ways to fix
+   it:</p>
+
+<ol>
+  <li><p>Adjust your <tt>PATH</tt> environment variable so that the correct
+      program appears first in the <tt>PATH</tt>.  This may work, but may not be
+      convenient when you want them <i>first</i> in your path for other
+      work.</p></li>
+
+  <li><p>Run <tt>configure</tt> with an alternative <tt>PATH</tt> that is
+      correct. In a Borne compatible shell, the syntax would be:</p>
+
+<pre class="doc_code">
+% PATH=[the path without the bad program] ./configure ...
+</pre>
+
+      <p>This is still somewhat inconvenient, but it allows <tt>configure</tt>
+         to do its work without having to adjust your <tt>PATH</tt>
+         permanently.</p></li>
+</ol>
+</div>
+
+<div class="question">
+<p>When creating a dynamic library, I get a strange GLIBC error.</p>
+</div>
+
+<div class="answer">
+<p>Under some operating systems (i.e. Linux), libtool does not work correctly if
+   GCC was compiled with the --disable-shared option.  To work around this,
+   install your own version of GCC that has shared libraries enabled by
+   default.</p>
+</div>
+
+<div class="question">
+<p>I've updated my source tree from Subversion, and now my build is trying to
+   use a file/directory that doesn't exist.</p>
+</div>
+
+<div class="answer">
+<p>You need to re-run configure in your object directory.  When new Makefiles
+   are added to the source tree, they have to be copied over to the object tree
+   in order to be used by the build.</p>
+</div>
+
+<div class="question">
+<p>I've modified a Makefile in my source tree, but my build tree keeps using the
+   old version.  What do I do?</p>
+</div>
+
+<div class="answer">
+<p>If the Makefile already exists in your object tree, you can just run the
+   following command in the top level directory of your object tree:</p>
+
+<pre class="doc_code">
+% ./config.status &lt;relative path to Makefile&gt;
+</pre>
+
+<p>If the Makefile is new, you will have to modify the configure script to copy
+   it over.</p>
+</div>
+
+<div class="question">
+<p>I've upgraded to a new version of LLVM, and I get strange build errors.</p>
+</div>
+
+<div class="answer">
+
+<p>Sometimes, changes to the LLVM source code alters how the build system works.
+   Changes in libtool, autoconf, or header file dependencies are especially
+   prone to this sort of problem.</p>
+
+<p>The best thing to try is to remove the old files and re-build.  In most
+   cases, this takes care of the problem.  To do this, just type <tt>make
+   clean</tt> and then <tt>make</tt> in the directory that fails to build.</p>
+</div>
+
+<div class="question">
+<p>I've built LLVM and am testing it, but the tests freeze.</p>
+</div>
+
+<div class="answer">
+<p>This is most likely occurring because you built a profile or release
+   (optimized) build of LLVM and have not specified the same information on the
+   <tt>gmake</tt> command line.</p>
+
+<p>For example, if you built LLVM with the command:</p>
+
+<pre class="doc_code">
+% gmake ENABLE_PROFILING=1
+</pre>
+
+<p>...then you must run the tests with the following commands:</p>
+
+<pre class="doc_code">
+% cd llvm/test
+% gmake ENABLE_PROFILING=1
+</pre>
+</div>
+
+<div class="question">
+<p>Why do test results differ when I perform different types of builds?</p>
+</div>
+
+<div class="answer">
+<p>The LLVM test suite is dependent upon several features of the LLVM tools and
+   libraries.</p>
+
+<p>First, the debugging assertions in code are not enabled in optimized or
+   profiling builds.  Hence, tests that used to fail may pass.</p>
+	
+<p>Second, some tests may rely upon debugging options or behavior that is only
+   available in the debug build.  These tests will fail in an optimized or
+   profile build.</p>
+</div>
+
+<div class="question">
+<p>Compiling LLVM with GCC 3.3.2 fails, what should I do?</p>
+</div>
+
+<div class="answer">
+<p>This is <a href="http://gcc.gnu.org/bugzilla/show_bug.cgi?id=13392">a bug in
+   GCC</a>, and affects projects other than LLVM.  Try upgrading or downgrading
+   your GCC.</p>
+</div>
+
+<div class="question">
+<p>Compiling LLVM with GCC succeeds, but the resulting tools do not work, what
+   can be wrong?</p>
+</div>
+
+<div class="answer">
+<p>Several versions of GCC have shown a weakness in miscompiling the LLVM
+   codebase. Please consult your compiler version (<tt>gcc --version</tt>) to
+   find out whether it is <a href="GettingStarted.html#brokengcc">broken</a>.
+   If so, your only option is to upgrade GCC to a known good version.</p>
+</div>
+
+<div class="question">
+<p>After Subversion update, rebuilding gives the error "No rule to make
+   target".</p>
+</div>
+
+<div class="answer">
+<p>If the error is of the form:</p>
+
+<pre class="doc_code">
+gmake[2]: *** No rule to make target `/path/to/somefile', needed by
+`/path/to/another/file.d'.<br>
+Stop.
+</pre>
+
+<p>This may occur anytime files are moved within the Subversion repository or
+   removed entirely.  In this case, the best solution is to erase all
+   <tt>.d</tt> files, which list dependencies for source files, and rebuild:</p>
+
+<pre class="doc_code">
+% cd $LLVM_OBJ_DIR
+% rm -f `find . -name \*\.d` 
+% gmake 
+</pre>
+
+<p>In other cases, it may be necessary to run <tt>make clean</tt> before
+   rebuilding.</p>
+</div>
+
+<div class="question">
+<p><a name="llvmc">The <tt>llvmc</tt> program gives me errors/doesn't
+   work.</a></p>
+</div>
+
+<div class="answer">
+<p><tt>llvmc</tt> is experimental and isn't really supported. We suggest
+   using <tt>llvm-gcc</tt> instead.</p>
+</div>
+
+<div class="question">
+<p><a name="srcdir-objdir">When I compile LLVM-GCC with srcdir == objdir, it
+   fails. Why?</a></p>
+</div>
+
+<div class="answer">
+<p>The <tt>GNUmakefile</tt> in the top-level directory of LLVM-GCC is a special
+   <tt>Makefile</tt> used by Apple to invoke the <tt>build_gcc</tt> script after
+   setting up a special environment. This has the unfortunate side-effect that
+   trying to build LLVM-GCC with srcdir == objdir in a "non-Apple way" invokes
+   the <tt>GNUmakefile</tt> instead of <tt>Makefile</tt>. Because the
+   environment isn't set up correctly to do this, the build fails.</p>
+
+<p>People not building LLVM-GCC the "Apple way" need to build LLVM-GCC with
+   srcdir != objdir, or simply remove the GNUmakefile entirely.</p>
+
+<p>We regret the inconvenience.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="felangs">Source Languages</a></div>
+
+<div class="question">
+<p><a name="langs">What source languages are supported?</a></p>
+</div>
+
+<div class="answer">
+<p>LLVM currently has full support for C and C++ source languages. These are
+   available through a special version of GCC that LLVM calls the
+   <a href="#cfe">C Front End</a></p>
+
+<p>There is an incomplete version of a Java front end available in the
+   <tt>java</tt> module. There is no documentation on this yet so you'll need to
+   download the code, compile it, and try it.</p>
+
+<p>The PyPy developers are working on integrating LLVM into the PyPy backend so
+   that PyPy language can translate to LLVM.</p>
+</div>
+
+<div class="question">
+<p><a name="langirgen">I'd like to write a self-hosting LLVM compiler. How
+   should I interface with the LLVM middle-end optimizers and back-end code
+   generators?</a></p>
+</div>
+
+<div class="answer">
+<p>Your compiler front-end will communicate with LLVM by creating a module in
+   the LLVM intermediate representation (IR) format. Assuming you want to write
+   your language's compiler in the language itself (rather than C++), there are
+   3 major ways to tackle generating LLVM IR from a front-end:</p>
+
+<ul>
+  <li><strong>Call into the LLVM libraries code using your language's FFI
+      (foreign function interface).</strong>
+
+    <ul>
+      <li><em>for:</em> best tracks changes to the LLVM IR, .ll syntax, and .bc
+          format</li>
+
+      <li><em>for:</em> enables running LLVM optimization passes without a
+          emit/parse overhead</li>
+
+      <li><em>for:</em> adapts well to a JIT context</li>
+
+      <li><em>against:</em> lots of ugly glue code to write</li>
+    </ul></li>
+
+  <li>  <strong>Emit LLVM assembly from your compiler's native language.</strong>
+    <ul>
+      <li><em>for:</em> very straightforward to get started</li>
+
+      <li><em>against:</em> the .ll parser is slower than the bitcode reader
+          when interfacing to the middle end</li>
+
+      <li><em>against:</em> you'll have to re-engineer the LLVM IR object model
+          and asm writer in your language</li>
+
+      <li><em>against:</em> it may be harder to track changes to the IR</li>
+    </ul></li>
+
+  <li><strong>Emit LLVM bitcode from your compiler's native language.</strong>
+
+    <ul>
+      <li><em>for:</em> can use the more-efficient bitcode reader when
+          interfacing to the middle end</li>
+
+      <li><em>against:</em> you'll have to re-engineer the LLVM IR object 
+          model and bitcode writer in your language</li>
+
+      <li><em>against:</em> it may be harder to track changes to the IR</li>
+    </ul></li>
+</ul>
+
+<p>If you go with the first option, the C bindings in include/llvm-c should help
+   a lot, since most languages have strong support for interfacing with C. The
+   most common hurdle with calling C from managed code is interfacing with the
+   garbage collector. The C interface was designed to require very little memory
+   management, and so is straightforward in this regard.</p>
+</div>
+
+<div class="question">
+<p><a name="langhlsupp">What support is there for a higher level source language
+   constructs for building a compiler?</a></p>
+</div>
+
+<div class="answer">
+<p>Currently, there isn't much. LLVM supports an intermediate representation
+   which is useful for code representation but will not support the high level
+   (abstract syntax tree) representation needed by most compilers. There are no
+   facilities for lexical nor semantic analysis. There is, however, a <i>mostly
+   implemented</i> configuration-driven
+   <a href="CompilerDriver.html">compiler driver</a> which simplifies the task
+   of running optimizations, linking, and executable generation.</p>
+</div>
+
+<div class="question">
+<p><a name="getelementptr">I don't understand the GetElementPtr
+   instruction. Help!</a></p>
+</div>
+
+<div class="answer">
+<p>See <a href="GetElementPtr.html">The Often Misunderstood GEP
+   Instruction</a>.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="cfe">Using the GCC Front End</a>
+</div>
+
+<div class="question">
+<p>When I compile software that uses a configure script, the configure script
+   thinks my system has all of the header files and libraries it is testing for.
+   How do I get configure to work correctly?</p>
+</div>
+
+<div class="answer">
+<p>The configure script is getting things wrong because the LLVM linker allows
+   symbols to be undefined at link time (so that they can be resolved during JIT
+   or translation to the C back end).  That is why configure thinks your system
+   "has everything."</p>
+
+<p>To work around this, perform the following steps:</p>
+
+<ol>
+  <li>Make sure the CC and CXX environment variables contains the full path to
+      the LLVM GCC front end.</li>
+
+  <li>Make sure that the regular C compiler is first in your PATH. </li>
+
+  <li>Add the string "-Wl,-native" to your CFLAGS environment variable.</li>
+</ol>
+
+<p>This will allow the <tt>llvm-ld</tt> linker to create a native code
+   executable instead of shell script that runs the JIT.  Creating native code
+   requires standard linkage, which in turn will allow the configure script to
+   find out if code is not linking on your system because the feature isn't
+   available on your system.</p>
+</div>
+
+<div class="question">
+<p>When I compile code using the LLVM GCC front end, it complains that it cannot
+   find libcrtend.a.
+</p>
+</div>
+
+<div class="answer">
+<p>The only way this can happen is if you haven't installed the runtime
+   library. To correct this, do:</p>
+
+<pre class="doc_code">
+% cd llvm/runtime
+% make clean ; make install-bytecode
+</pre>
+</div>
+
+<div class="question">
+<p>How can I disable all optimizations when compiling code using the LLVM GCC
+   front end?</p>
+</div>
+
+<div class="answer">
+<p>Passing "-Wa,-disable-opt -Wl,-disable-opt" will disable *all* cleanup and
+   optimizations done at the llvm level, leaving you with the truly horrible
+   code that you desire.</p>
+</div>
+
+
+<div class="question">
+<p><a name="translatecxx">Can I use LLVM to convert C++ code to C code?</a></p>
+</div>
+
+<div class="answer">
+<p>Yes, you can use LLVM to convert code from any language LLVM supports to C.
+   Note that the generated C code will be very low level (all loops are lowered
+   to gotos, etc) and not very pretty (comments are stripped, original source
+   formatting is totally lost, variables are renamed, expressions are
+   regrouped), so this may not be what you're looking for. Also, there are
+   several limitations noted below.<p>
+
+<p>Use commands like this:</p>
+
+<ol>
+  <li><p>Compile your program with llvm-g++:</p>
+
+<pre class="doc_code">
+% llvm-g++ -emit-llvm x.cpp -o program.bc -c
+</pre>
+
+      <p>or:</p>
+
+<pre class="doc_code">
+% llvm-g++ a.cpp -c -emit-llvm
+% llvm-g++ b.cpp -c -emit-llvm
+% llvm-ld a.o b.o -o program
+</pre>
+
+   <p>This will generate program and program.bc.  The .bc
+      file is the LLVM version of the program all linked together.</p></li>
+
+  <li><p>Convert the LLVM code to C code, using the LLC tool with the C
+      backend:</p>
+
+<pre class="doc_code">
+% llc -march=c program.bc -o program.c
+</pre></li>
+
+  <li><p>Finally, compile the C file:</p>
+
+<pre class="doc_code">
+% cc x.c -lstdc++
+</pre></li>
+
+</ol>
+
+<p>Using LLVM does not eliminate the need for C++ library support.  If you use
+   the llvm-g++ front-end, the generated code will depend on g++'s C++ support
+   libraries in the same way that code generated from g++ would.  If you use
+   another C++ front-end, the generated code will depend on whatever library
+   that front-end would normally require.</p>
+
+<p>If you are working on a platform that does not provide any C++ libraries, you
+   may be able to manually compile libstdc++ to LLVM bitcode, statically link it
+   into your program, then use the commands above to convert the whole result
+   into C code.  Alternatively, you might compile the libraries and your
+   application into two different chunks of C code and link them.</p>
+
+<p>Note that, by default, the C back end does not support exception handling.
+   If you want/need it for a certain program, you can enable it by passing
+   "-enable-correct-eh-support" to the llc program.  The resultant code will use
+   setjmp/longjmp to implement exception support that is relatively slow, and
+   not C++-ABI-conforming on most platforms, but otherwise correct.</p>
+
+<p>Also, there are a number of other limitations of the C backend that cause it
+   to produce code that does not fully conform to the C++ ABI on most
+   platforms. Some of the C++ programs in LLVM's test suite are known to fail
+   when compiled with the C back end because of ABI incompatibilities with
+   standard C++ libraries.</p>
+</div>
+
+<div class="question">
+<p><a name="platformindependent">Can I compile C or C++ code to
+   platform-independent LLVM bitcode?</a></p>
+</div>
+
+<div class="answer">
+<p>No. C and C++ are inherently platform-dependent languages. The most obvious
+   example of this is the preprocessor. A very common way that C code is made
+   portable is by using the preprocessor to include platform-specific code. In
+   practice, information about other platforms is lost after preprocessing, so
+   the result is inherently dependent on the platform that the preprocessing was
+   targeting.</p>
+
+<p>Another example is <tt>sizeof</tt>. It's common for <tt>sizeof(long)</tt> to
+   vary between platforms. In most C front-ends, <tt>sizeof</tt> is expanded to
+   a constant immediately, thus hard-wiring a platform-specific detail.</p>
+
+<p>Also, since many platforms define their ABIs in terms of C, and since LLVM is
+   lower-level than C, front-ends currently must emit platform-specific IR in
+   order to have the result conform to the platform ABI.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="cfe_code">Questions about code generated by the GCC front-end</a>
+</div>
+
+<div class="question">
+<p><a name="iosinit">What is this <tt>llvm.global_ctors</tt> and
+   <tt>_GLOBAL__I__tmp_webcompile...</tt> stuff that happens when I <tt>#include
+   &lt;iostream&gt;</tt>?</a></p>
+</div>
+
+<div class="answer">
+<p>If you <tt>#include</tt> the <tt>&lt;iostream&gt;</tt> header into a C++
+   translation unit, the file will probably use
+   the <tt>std::cin</tt>/<tt>std::cout</tt>/... global objects.  However, C++
+   does not guarantee an order of initialization between static objects in
+   different translation units, so if a static ctor/dtor in your .cpp file
+   used <tt>std::cout</tt>, for example, the object would not necessarily be
+   automatically initialized before your use.</p>
+
+<p>To make <tt>std::cout</tt> and friends work correctly in these scenarios, the
+   STL that we use declares a static object that gets created in every
+   translation unit that includes <tt>&lt;iostream&gt;</tt>.  This object has a
+   static constructor and destructor that initializes and destroys the global
+   iostream objects before they could possibly be used in the file.  The code
+   that you see in the .ll file corresponds to the constructor and destructor
+   registration code.
+</p>
+
+<p>If you would like to make it easier to <b>understand</b> the LLVM code
+   generated by the compiler in the demo page, consider using <tt>printf()</tt>
+   instead of <tt>iostream</tt>s to print values.</p>
+</div>
+
+<!--=========================================================================-->
+
+<div class="question">
+<p><a name="codedce">Where did all of my code go??</a></p>
+</div>
+
+<div class="answer">
+<p>If you are using the LLVM demo page, you may often wonder what happened to
+   all of the code that you typed in.  Remember that the demo script is running
+   the code through the LLVM optimizers, so if your code doesn't actually do
+   anything useful, it might all be deleted.</p>
+
+<p>To prevent this, make sure that the code is actually needed.  For example, if
+   you are computing some expression, return the value from the function instead
+   of leaving it in a local variable.  If you really want to constrain the
+   optimizer, you can read from and assign to <tt>volatile</tt> global
+   variables.</p>
+</div>
+
+<!--=========================================================================-->
+
+<div class="question">
+<p><a name="undef">What is this "<tt>undef</tt>" thing that shows up in my
+   code?</a></p>
+</div>
+
+<div class="answer">
+<p><a href="LangRef.html#undef"><tt>undef</tt></a> is the LLVM way of
+   representing a value that is not defined.  You can get these if you do not
+   initialize a variable before you use it.  For example, the C function:</p>
+
+<pre class="doc_code">
+int X() { int i; return i; }
+</pre>
+
+<p>Is compiled to "<tt>ret i32 undef</tt>" because "<tt>i</tt>" never has a
+   value specified for it.</p>
+</div>
+
+<!--=========================================================================-->
+
+<div class="question">
+<p><a name="callconvwrong">Why does instcombine + simplifycfg turn
+   a call to a function with a mismatched calling convention into "unreachable"?
+   Why not make the verifier reject it?</a></p>
+</div>
+
+<div class="answer">
+<p>This is a common problem run into by authors of front-ends that are using
+custom calling conventions: you need to make sure to set the right calling
+convention on both the function and on each call to the function.  For example,
+this code:</p>
+
+<pre class="doc_code">
+define fastcc void @foo() {
+        ret void
+}
+define void @bar() {
+        call void @foo()
+        ret void
+}
+</pre>
+
+<p>Is optimized to:</p>
+
+<pre class="doc_code">
+define fastcc void @foo() {
+	ret void
+}
+define void @bar() {
+	unreachable
+}
+</pre>
+
+<p>... with "opt -instcombine -simplifycfg".  This often bites people because
+"all their code disappears".  Setting the calling convention on the caller and
+callee is required for indirect calls to work, so people often ask why not make
+the verifier reject this sort of thing.</p>
+
+<p>The answer is that this code has undefined behavior, but it is not illegal.
+If we made it illegal, then every transformation that could potentially create
+this would have to ensure that it doesn't, and there is valid code that can
+create this sort of construct (in dead code).  The sorts of things that can
+cause this to happen are fairly contrived, but we still need to accept them.
+Here's an example:</p>
+
+<pre class="doc_code">
+define fastcc void @foo() {
+        ret void
+}
+define internal void @bar(void()* %FP, i1 %cond) {
+        br i1 %cond, label %T, label %F
+T:  
+        call void %FP()
+        ret void
+F:
+        call fastcc void %FP()
+        ret void
+}
+define void @test() {
+        %X = or i1 false, false
+        call void @bar(void()* @foo, i1 %X)
+        ret void
+} 
+</pre>
+
+<p>In this example, "test" always passes @foo/false into bar, which ensures that
+   it is dynamically called with the right calling conv (thus, the code is
+   perfectly well defined).  If you run this through the inliner, you get this
+   (the explicit "or" is there so that the inliner doesn't dead code eliminate
+   a bunch of stuff):
+</p>
+
+<pre class="doc_code">
+define fastcc void @foo() {
+	ret void
+}
+define void @test() {
+	%X = or i1 false, false
+	br i1 %X, label %T.i, label %F.i
+T.i:
+	call void @foo()
+	br label %bar.exit
+F.i:
+	call fastcc void @foo()
+	br label %bar.exit
+bar.exit:
+	ret void
+}
+</pre>
+
+<p>Here you can see that the inlining pass made an undefined call to @foo with
+  the wrong calling convention.  We really don't want to make the inliner have
+  to know about this sort of thing, so it needs to be valid code.  In this case,
+  dead code elimination can trivially remove the undefined code.  However, if %X
+  was an input argument to @test, the inliner would produce this:
+</p>
+
+<pre class="doc_code">
+define fastcc void @foo() {
+	ret void
+}
+
+define void @test(i1 %X) {
+	br i1 %X, label %T.i, label %F.i
+T.i:
+	call void @foo()
+	br label %bar.exit
+F.i:
+	call fastcc void @foo()
+	br label %bar.exit
+bar.exit:
+	ret void
+}
+</pre>
+
+<p>The interesting thing about this is that %X <em>must</em> be false for the
+code to be well-defined, but no amount of dead code elimination will be able to
+delete the broken call as unreachable.  However, since instcombine/simplifycfg
+turns the undefined call into unreachable, we end up with a branch on a
+condition that goes to unreachable: a branch to unreachable can never happen, so
+"-inline -instcombine -simplifycfg" is able to produce:</p>
+
+<pre class="doc_code">
+define fastcc void @foo() {
+	ret void
+}
+define void @test(i1 %X) {
+F.i:
+	call fastcc void @foo()
+	ret void
+}
+</pre>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/GCCFEBuildInstrs.html b/final/docs/GCCFEBuildInstrs.html
new file mode 100644
index 00000000000..8fe0c31cb64
--- /dev/null
+++ b/final/docs/GCCFEBuildInstrs.html
@@ -0,0 +1,279 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <link rel="stylesheet" href="llvm.css" type="text/css" media="screen">
+  <title>Building the LLVM GCC Front-End</title>
+</head>
+<body>
+
+<div class="doc_title">
+  Building the LLVM GCC Front-End
+</div>
+
+<ol>
+  <li><a href="#instructions">Building llvm-gcc from Source</a></li>
+  <li><a href="#ada">Building the Ada front-end</a></li>
+  <li><a href="#fortran">Building the Fortran front-end</a></li>
+  <li><a href="#license">License Information</a></li>
+</ol>
+
+<div class="doc_author">    
+  <p>Written by the LLVM Team</p>
+</div>
+
+<!-- *********************************************************************** -->
+<h1><a name="instructions">Building llvm-gcc from Source</a></h1>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section describes how to acquire and build llvm-gcc 4.2, which is based
+on the GCC 4.2.1 front-end.  Supported languages are Ada, C, C++, Fortran,
+Objective-C and Objective-C++.  Note that the instructions for building these
+front-ends are completely different (and much easier!) than those for building
+llvm-gcc3 in the past.</p>
+
+<ol>
+  <li><p>Retrieve the appropriate llvm-gcc-4.2-<i>version</i>.source.tar.gz
+         archive from the <a href="http://llvm.org/releases/">LLVM web
+         site</a>.</p>
+
+      <p>It is also possible to download the sources of the llvm-gcc front end
+         from a read-only mirror using subversion.  To check out the 4.2 code
+         for first time use:</p>
+
+<div class="doc_code">
+<pre>
+svn co http://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk <i>dst-directory</i>
+</pre>
+</div>
+
+      <p>After that, the code can be be updated in the destination directory
+         using:</p>
+
+<div class="doc_code">
+<pre>svn update</pre>
+</div>
+
+      <p>The mirror is brought up to date every evening.</p></li>
+
+  <li>Follow the directions in the top-level <tt>README.LLVM</tt> file for
+      up-to-date instructions on how to build llvm-gcc.  See below for building
+      with support for Ada or Fortran.
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+<h1><a name="ada">Building the Ada front-end</a></h1>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>Building with support for Ada amounts to following the directions in the
+top-level <tt>README.LLVM</tt> file, adding ",ada" to EXTRALANGS, for example:
+<tt>EXTRALANGS=,ada</tt></p>
+
+<p>There are some complications however:</p>
+
+<ol>
+  <li><p>The only platform for which the Ada front-end is known to build is
+      32 bit intel x86 running linux.  It is unlikely to build for other
+      systems without some work.</p></li>
+  <li><p>The build requires having a compiler that supports Ada, C and C++.
+      The Ada front-end is written in Ada so an Ada compiler is needed to
+      build it.  Compilers known to work with the
+      <a href="http://llvm.org/releases/download.html">LLVM 2.7 release</a>
+      are <a href="http://gcc.gnu.org/releases.html">gcc-4.2</a> and the
+      2005, 2006 and 2007 versions of the
+      <a href="http://libre.adacore.com/">GNAT GPL Edition</a>.
+      <b>GNAT GPL 2008, gcc-4.3 and later will not work</b>.
+      The LLVM parts of llvm-gcc are written in C++ so a C++ compiler is
+      needed to build them.  The rest of gcc is written in C.
+      Some linux distributions provide a version of gcc that supports all
+      three languages (the Ada part often comes as an add-on package to
+      the rest of gcc).  Otherwise it is possible to combine two versions
+      of gcc, one that supports Ada and C (such as the
+      <a href="http://libre.adacore.com/">2007 GNAT GPL Edition</a>)
+      and another which supports C++, see below.</p></li>
+  <li><p>Because the Ada front-end is experimental, it is wise to build the
+      compiler with checking enabled.  This causes it to run much slower, but
+      helps catch mistakes in the compiler (please report any problems using
+      <a href="http://llvm.org/bugs">LLVM bugzilla</a>).</p></li>
+  <li><p>The Ada front-end <a href="http://llvm.org/PR2007">fails to
+      bootstrap</a>, due to lack of LLVM support for
+      <tt>setjmp</tt>/<tt>longjmp</tt> style exception handling (used
+      internally by the compiler), so you must specify
+      <tt>--disable-bootstrap</tt>.</p></li>
+</ol>
+
+<p>Supposing appropriate compilers are available, llvm-gcc with Ada support can
+   be built on an x86-32 linux box using the following recipe:</p>
+
+<ol>
+  <li><p>Download the <a href="http://llvm.org/releases/download.html">LLVM source</a>
+      and unpack it:</p>
+
+<pre class="doc_code">
+wget http://llvm.org/releases/2.7/llvm-2.7.tgz
+tar xzf llvm-2.7.tgz
+mv llvm-2.7 llvm
+</pre>
+
+      <p>or <a href="GettingStarted.html#checkout">check out the
+      latest version from subversion</a>:</p>
+
+<pre class="doc_code">svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm</pre>
+
+      </li>
+
+  <li><p>Download the
+      <a href="http://llvm.org/releases/download.html">llvm-gcc-4.2 source</a>
+      and unpack it:</p>
+
+<pre class="doc_code">
+wget http://llvm.org/releases/2.7/llvm-gcc-4.2-2.7.source.tgz
+tar xzf llvm-gcc-4.2-2.7.source.tgz
+mv llvm-gcc-4.2-2.7.source llvm-gcc-4.2
+</pre>
+
+      <p>or <a href="GettingStarted.html#checkout">check out the
+      latest version from subversion</a>:</p>
+
+<pre class="doc_code">
+svn co http://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk llvm-gcc-4.2
+</pre>
+      </li>
+
+  <li><p>Make a build directory <tt>llvm-objects</tt> for llvm and make it the
+      current directory:</p>
+
+<pre class="doc_code">
+mkdir llvm-objects
+cd llvm-objects
+</pre>
+      </li>
+
+  <li><p>Configure LLVM (here it is configured to install into <tt>/usr/local</tt>):</p>
+
+<pre class="doc_code">
+../llvm/configure --prefix=<b>/usr/local</b> --enable-optimized --enable-assertions
+</pre>
+
+      <p>If you have a multi-compiler setup and the C++ compiler is not the
+      default, then you can configure like this:</p>
+
+<pre class="doc_code">
+CXX=<b>PATH_TO_C++_COMPILER</b> ../llvm/configure --prefix=<b>/usr/local</b> --enable-optimized --enable-assertions
+</pre>
+
+      <p>To compile without checking (not recommended), replace
+      <tt>--enable-assertions</tt> with <tt>--disable-assertions</tt>.</p>
+
+      </li>
+
+  <li><p>Build LLVM:</p>
+
+<pre class="doc_code">
+make
+</pre>
+      </li>
+
+  <li><p>Install LLVM (optional):</p>
+
+<pre class="doc_code">
+make install
+</pre>
+      </li>
+
+  <li><p>Make a build directory <tt>llvm-gcc-4.2-objects</tt> for llvm-gcc and make it the
+      current directory:</p>
+
+<pre class="doc_code">
+cd ..
+mkdir llvm-gcc-4.2-objects
+cd llvm-gcc-4.2-objects
+</pre>
+      </li>
+
+  <li><p>Configure llvm-gcc (here it is configured to install into <tt>/usr/local</tt>).
+      The <tt>--enable-checking</tt> flag turns on sanity checks inside the compiler.
+      To turn off these checks (not recommended), replace <tt>--enable-checking</tt>
+      with <tt>--disable-checking</tt>.
+      Additional languages can be appended to the <tt>--enable-languages</tt> switch,
+      for example <tt>--enable-languages=ada,c,c++</tt>.</p>
+
+<pre class="doc_code">
+../llvm-gcc-4.2/configure --prefix=<b>/usr/local</b> --enable-languages=ada,c \
+                          --enable-checking --enable-llvm=$PWD/../llvm-objects \
+			  --disable-bootstrap --disable-multilib
+</pre>
+
+      <p>If you have a multi-compiler setup, then you can configure like this:</p>
+
+<pre class="doc_code">
+export CC=<b>PATH_TO_C_AND_ADA_COMPILER</b>
+export CXX=<b>PATH_TO_C++_COMPILER</b>
+../llvm-gcc-4.2/configure --prefix=<b>/usr/local</b> --enable-languages=ada,c \
+                          --enable-checking --enable-llvm=$PWD/../llvm-objects \
+			  --disable-bootstrap --disable-multilib
+</pre>
+      </li>
+
+  <li><p>Build and install the compiler:</p>
+
+<pre class="doc_code">
+make
+make install
+</pre>
+      </li>
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+<h1><a name="fortran">Building the Fortran front-end</a></h1>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>To build with support for Fortran, follow the directions in the top-level
+<tt>README.LLVM</tt> file, adding ",fortran" to EXTRALANGS, for example:</p>
+
+<pre class="doc_code">
+EXTRALANGS=,fortran
+</pre>
+
+</div>
+
+<!-- *********************************************************************** -->
+<h1><a name="license">License Information</a></h1>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>
+The LLVM GCC frontend is licensed to you under the GNU General Public License
+and the GNU Lesser General Public License.  Please see the files COPYING and
+COPYING.LIB for more details.
+</p>
+
+<p>
+More information is <a href="FAQ.html#license">available in the FAQ</a>.
+</p>
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/GarbageCollection.html b/final/docs/GarbageCollection.html
new file mode 100644
index 00000000000..56085ca710b
--- /dev/null
+++ b/final/docs/GarbageCollection.html
@@ -0,0 +1,1387 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" Content="text/html; charset=UTF-8" >
+  <title>Accurate Garbage Collection with LLVM</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+  <style type="text/css">
+    .rowhead { text-align: left; background: inherit; }
+    .indent { padding-left: 1em; }
+    .optl { color: #BFBFBF; }
+  </style>
+</head>
+<body>
+
+<div class="doc_title">
+  Accurate Garbage Collection with LLVM
+</div>
+
+<ol>
+  <li><a href="#introduction">Introduction</a>
+    <ul>
+    <li><a href="#feature">Goals and non-goals</a></li>
+    </ul>
+  </li>
+
+  <li><a href="#quickstart">Getting started</a>
+    <ul>
+    <li><a href="#quickstart-compiler">In your compiler</a></li>
+    <li><a href="#quickstart-runtime">In your runtime library</a></li>
+    <li><a href="#shadow-stack">About the shadow stack</a></li>
+    </ul>
+  </li>
+
+  <li><a href="#core">Core support</a>
+    <ul>
+    <li><a href="#gcattr">Specifying GC code generation:
+      <tt>gc "..."</tt></a></li>
+    <li><a href="#gcroot">Identifying GC roots on the stack:
+      <tt>llvm.gcroot</tt></a></li>
+    <li><a href="#barriers">Reading and writing references in the heap</a>
+      <ul>
+      <li><a href="#gcwrite">Write barrier: <tt>llvm.gcwrite</tt></a></li>
+      <li><a href="#gcread">Read barrier: <tt>llvm.gcread</tt></a></li>
+      </ul>
+    </li>
+    </ul>
+  </li>
+  
+  <li><a href="#plugin">Compiler plugin interface</a>
+    <ul>
+    <li><a href="#collector-algos">Overview of available features</a></li>
+    <li><a href="#stack-map">Computing stack maps</a></li>
+    <li><a href="#init-roots">Initializing roots to null:
+      <tt>InitRoots</tt></a></li>
+    <li><a href="#custom">Custom lowering of intrinsics: <tt>CustomRoots</tt>, 
+      <tt>CustomReadBarriers</tt>, and <tt>CustomWriteBarriers</tt></a></li>
+    <li><a href="#safe-points">Generating safe points:
+      <tt>NeededSafePoints</tt></a></li>
+    <li><a href="#assembly">Emitting assembly code:
+      <tt>GCMetadataPrinter</tt></a></li>
+    </ul>
+  </li>
+
+  <li><a href="#runtime-impl">Implementing a collector runtime</a>
+    <ul>
+      <li><a href="#gcdescriptors">Tracing GC pointers from heap
+      objects</a></li>
+    </ul>
+  </li>
+  
+  <li><a href="#references">References</a></li>
+  
+</ol>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a> and
+     Gordon Henriksen</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="introduction">Introduction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Garbage collection is a widely used technique that frees the programmer from
+having to know the lifetimes of heap objects, making software easier to produce
+and maintain. Many programming languages rely on garbage collection for
+automatic memory management. There are two primary forms of garbage collection:
+conservative and accurate.</p>
+
+<p>Conservative garbage collection often does not require any special support
+from either the language or the compiler: it can handle non-type-safe
+programming languages (such as C/C++) and does not require any special
+information from the compiler. The
+<a href="http://www.hpl.hp.com/personal/Hans_Boehm/gc/">Boehm collector</a> is
+an example of a state-of-the-art conservative collector.</p>
+
+<p>Accurate garbage collection requires the ability to identify all pointers in
+the program at run-time (which requires that the source-language be type-safe in
+most cases). Identifying pointers at run-time requires compiler support to
+locate all places that hold live pointer variables at run-time, including the
+<a href="#gcroot">processor stack and registers</a>.</p>
+
+<p>Conservative garbage collection is attractive because it does not require any
+special compiler support, but it does have problems. In particular, because the
+conservative garbage collector cannot <i>know</i> that a particular word in the
+machine is a pointer, it cannot move live objects in the heap (preventing the
+use of compacting and generational GC algorithms) and it can occasionally suffer
+from memory leaks due to integer values that happen to point to objects in the
+program. In addition, some aggressive compiler transformations can break
+conservative garbage collectors (though these seem rare in practice).</p>
+
+<p>Accurate garbage collectors do not suffer from any of these problems, but
+they can suffer from degraded scalar optimization of the program. In particular,
+because the runtime must be able to identify and update all pointers active in
+the program, some optimizations are less effective. In practice, however, the
+locality and performance benefits of using aggressive garbage collection
+techniques dominates any low-level losses.</p>
+
+<p>This document describes the mechanisms and interfaces provided by LLVM to
+support accurate garbage collection.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="feature">Goals and non-goals</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM's intermediate representation provides <a href="#intrinsics">garbage
+collection intrinsics</a> that offer support for a broad class of
+collector models. For instance, the intrinsics permit:</p>
+
+<ul>
+  <li>semi-space collectors</li>
+  <li>mark-sweep collectors</li>
+  <li>generational collectors</li>
+  <li>reference counting</li>
+  <li>incremental collectors</li>
+  <li>concurrent collectors</li>
+  <li>cooperative collectors</li>
+</ul>
+
+<p>We hope that the primitive support built into the LLVM IR is sufficient to
+support a broad class of garbage collected languages including Scheme, ML, Java,
+C#, Perl, Python, Lua, Ruby, other scripting languages, and more.</p>
+
+<p>However, LLVM does not itself provide a garbage collector&#151;this should
+be part of your language's runtime library. LLVM provides a framework for
+compile time <a href="#plugin">code generation plugins</a>. The role of these
+plugins is to generate code and data structures which conforms to the <em>binary
+interface</em> specified by the <em>runtime library</em>. This is similar to the
+relationship between LLVM and DWARF debugging info, for example. The
+difference primarily lies in the lack of an established standard in the domain
+of garbage collection&#151;thus the plugins.</p>
+
+<p>The aspects of the binary interface with which LLVM's GC support is
+concerned are:</p>
+
+<ul>
+  <li>Creation of GC-safe points within code where collection is allowed to
+      execute safely.</li>
+  <li>Computation of the stack map. For each safe point in the code, object
+      references within the stack frame must be identified so that the
+      collector may traverse and perhaps update them.</li>
+  <li>Write barriers when storing object references to the heap. These are
+      commonly used to optimize incremental scans in generational
+      collectors.</li>
+  <li>Emission of read barriers when loading object references. These are
+      useful for interoperating with concurrent collectors.</li>
+</ul>
+
+<p>There are additional areas that LLVM does not directly address:</p>
+
+<ul>
+  <li>Registration of global roots with the runtime.</li>
+  <li>Registration of stack map entries with the runtime.</li>
+  <li>The functions used by the program to allocate memory, trigger a
+      collection, etc.</li>
+  <li>Computation or compilation of type maps, or registration of them with
+      the runtime. These are used to crawl the heap for object
+      references.</li>
+</ul>
+
+<p>In general, LLVM's support for GC does not include features which can be
+adequately addressed with other features of the IR and does not specify a
+particular binary interface. On the plus side, this means that you should be
+able to integrate LLVM with an existing runtime. On the other hand, it leaves
+a lot of work for the developer of a novel language. However, it's easy to get
+started quickly and scale up to a more sophisticated implementation as your
+compiler matures.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="quickstart">Getting started</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Using a GC with LLVM implies many things, for example:</p>
+
+<ul>
+  <li>Write a runtime library or find an existing one which implements a GC
+      heap.<ol>
+    <li>Implement a memory allocator.</li>
+    <li>Design a binary interface for the stack map, used to identify
+        references within a stack frame on the machine stack.*</li>
+    <li>Implement a stack crawler to discover functions on the call stack.*</li>
+    <li>Implement a registry for global roots.</li>
+    <li>Design a binary interface for type maps, used to identify references
+        within heap objects.</li>
+    <li>Implement a collection routine bringing together all of the above.</li>
+  </ol></li>
+  <li>Emit compatible code from your compiler.<ul>
+    <li>Initialization in the main function.</li>
+    <li>Use the <tt>gc "..."</tt> attribute to enable GC code generation
+        (or <tt>F.setGC("...")</tt>).</li>
+    <li>Use <tt>@llvm.gcroot</tt> to mark stack roots.</li>
+    <li>Use <tt>@llvm.gcread</tt> and/or <tt>@llvm.gcwrite</tt> to
+        manipulate GC references, if necessary.</li>
+    <li>Allocate memory using the GC allocation routine provided by the
+        runtime library.</li>
+    <li>Generate type maps according to your runtime's binary interface.</li>
+  </ul></li>
+  <li>Write a compiler plugin to interface LLVM with the runtime library.*<ul>
+    <li>Lower <tt>@llvm.gcread</tt> and <tt>@llvm.gcwrite</tt> to appropriate
+        code sequences.*</li>
+    <li>Compile LLVM's stack map to the binary form expected by the
+        runtime.</li>
+  </ul></li>
+  <li>Load the plugin into the compiler. Use <tt>llc -load</tt> or link the
+      plugin statically with your language's compiler.*</li>
+  <li>Link program executables with the runtime.</li>
+</ul>
+
+<p>To help with several of these tasks (those indicated with a *), LLVM
+includes a highly portable, built-in ShadowStack code generator. It is compiled
+into <tt>llc</tt> and works even with the interpreter and C backends.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="quickstart-compiler">In your compiler</a>
+</div>
+
+<div class="doc_text">
+
+<p>To turn the shadow stack on for your functions, first call:</p>
+
+<div class="doc_code"><pre
+>F.setGC("shadow-stack");</pre></div>
+
+<p>for each function your compiler emits. Since the shadow stack is built into
+LLVM, you do not need to load a plugin.</p>
+
+<p>Your compiler must also use <tt>@llvm.gcroot</tt> as documented.
+Don't forget to create a root for each intermediate value that is generated
+when evaluating an expression. In <tt>h(f(), g())</tt>, the result of
+<tt>f()</tt> could easily be collected if evaluating <tt>g()</tt> triggers a
+collection.</p>
+
+<p>There's no need to use <tt>@llvm.gcread</tt> and <tt>@llvm.gcwrite</tt> over
+plain <tt>load</tt> and <tt>store</tt> for now. You will need them when
+switching to a more advanced GC.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="quickstart-runtime">In your runtime</a>
+</div>
+
+<div class="doc_text">
+
+<p>The shadow stack doesn't imply a memory allocation algorithm. A semispace
+collector or building atop <tt>malloc</tt> are great places to start, and can
+be implemented with very little code.</p>
+
+<p>When it comes time to collect, however, your runtime needs to traverse the
+stack roots, and for this it needs to integrate with the shadow stack. Luckily,
+doing so is very simple. (This code is heavily commented to help you
+understand the data structure, but there are only 20 lines of meaningful
+code.)</p>
+
+</div>
+
+<div class="doc_code"><pre
+>/// @brief The map for a single function's stack frame. One of these is
+///        compiled as constant data into the executable for each function.
+/// 
+/// Storage of metadata values is elided if the %metadata parameter to
+/// @llvm.gcroot is null.
+struct FrameMap {
+  int32_t NumRoots;    //&lt; Number of roots in stack frame.
+  int32_t NumMeta;     //&lt; Number of metadata entries. May be &lt; NumRoots.
+  const void *Meta[0]; //&lt; Metadata for each root.
+};
+
+/// @brief A link in the dynamic shadow stack. One of these is embedded in the
+///        stack frame of each function on the call stack.
+struct StackEntry {
+  StackEntry *Next;    //&lt; Link to next stack entry (the caller's).
+  const FrameMap *Map; //&lt; Pointer to constant FrameMap.
+  void *Roots[0];      //&lt; Stack roots (in-place array).
+};
+
+/// @brief The head of the singly-linked list of StackEntries. Functions push
+///        and pop onto this in their prologue and epilogue.
+/// 
+/// Since there is only a global list, this technique is not threadsafe.
+StackEntry *llvm_gc_root_chain;
+
+/// @brief Calls Visitor(root, meta) for each GC root on the stack.
+///        root and meta are exactly the values passed to
+///        <tt>@llvm.gcroot</tt>.
+/// 
+/// Visitor could be a function to recursively mark live objects. Or it
+/// might copy them to another heap or generation.
+/// 
+/// @param Visitor A function to invoke for every GC root on the stack.
+void visitGCRoots(void (*Visitor)(void **Root, const void *Meta)) {
+  for (StackEntry *R = llvm_gc_root_chain; R; R = R->Next) {
+    unsigned i = 0;
+    
+    // For roots [0, NumMeta), the metadata pointer is in the FrameMap.
+    for (unsigned e = R->Map->NumMeta; i != e; ++i)
+      Visitor(&amp;R->Roots[i], R->Map->Meta[i]);
+    
+    // For roots [NumMeta, NumRoots), the metadata pointer is null.
+    for (unsigned e = R->Map->NumRoots; i != e; ++i)
+      Visitor(&amp;R->Roots[i], NULL);
+  }
+}</pre></div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="shadow-stack">About the shadow stack</a>
+</div>
+
+<div class="doc_text">
+
+<p>Unlike many GC algorithms which rely on a cooperative code generator to
+compile stack maps, this algorithm carefully maintains a linked list of stack
+roots [<a href="#henderson02">Henderson2002</a>]. This so-called "shadow stack"
+mirrors the machine stack. Maintaining this data structure is slower than using
+a stack map compiled into the executable as constant data, but has a significant
+portability advantage because it requires no special support from the target
+code generator, and does not require tricky platform-specific code to crawl
+the machine stack.</p>
+
+<p>The tradeoff for this simplicity and portability is:</p>
+
+<ul>
+  <li>High overhead per function call.</li>
+  <li>Not thread-safe.</li>
+</ul>
+
+<p>Still, it's an easy way to get started. After your compiler and runtime are
+up and running, writing a <a href="#plugin">plugin</a> will allow you to take
+advantage of <a href="#collector-algos">more advanced GC features</a> of LLVM
+in order to improve performance.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="core">IR features</a><a name="intrinsics"></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section describes the garbage collection facilities provided by the
+<a href="LangRef.html">LLVM intermediate representation</a>. The exact behavior
+of these IR features is specified by the binary interface implemented by a
+<a href="#plugin">code generation plugin</a>, not by this document.</p>
+
+<p>These facilities are limited to those strictly necessary; they are not
+intended to be a complete interface to any garbage collector. A program will
+need to interface with the GC library using the facilities provided by that
+program.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="gcattr">Specifying GC code generation: <tt>gc "..."</tt></a>
+</div>
+
+<div class="doc_code"><tt>
+  define <i>ty</i> @<i>name</i>(...) <span style="text-decoration: underline">gc "<i>name</i>"</span> { ...
+</tt></div>
+
+<div class="doc_text">
+
+<p>The <tt>gc</tt> function attribute is used to specify the desired GC style
+to the compiler. Its programmatic equivalent is the <tt>setGC</tt> method of
+<tt>Function</tt>.</p>
+
+<p>Setting <tt>gc "<i>name</i>"</tt> on a function triggers a search for a
+matching code generation plugin "<i>name</i>"; it is that plugin which defines
+the exact nature of the code generated to support GC. If none is found, the
+compiler will raise an error.</p>
+
+<p>Specifying the GC style on a per-function basis allows LLVM to link together
+programs that use different garbage collection algorithms (or none at all).</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="gcroot">Identifying GC roots on the stack: <tt>llvm.gcroot</tt></a>
+</div>
+
+<div class="doc_code"><tt>
+  void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
+</tt></div>
+
+<div class="doc_text">
+
+<p>The <tt>llvm.gcroot</tt> intrinsic is used to inform LLVM that a stack
+variable references an object on the heap and is to be tracked for garbage
+collection. The exact impact on generated code is specified by a <a
+href="#plugin">compiler plugin</a>.</p>
+
+<p>A compiler which uses mem2reg to raise imperative code using <tt>alloca</tt>
+into SSA form need only add a call to <tt>@llvm.gcroot</tt> for those variables
+which a pointers into the GC heap.</p>
+
+<p>It is also important to mark intermediate values with <tt>llvm.gcroot</tt>.
+For example, consider <tt>h(f(), g())</tt>. Beware leaking the result of
+<tt>f()</tt> in the case that <tt>g()</tt> triggers a collection.</p>
+
+<p>The first argument <b>must</b> be a value referring to an alloca instruction
+or a bitcast of an alloca. The second contains a pointer to metadata that
+should be associated with the pointer, and <b>must</b> be a constant or global
+value address. If your target collector uses tags, use a null pointer for
+metadata.</p>
+
+<p>The <tt>%metadata</tt> argument can be used to avoid requiring heap objects
+to have 'isa' pointers or tag bits. [<a href="#appel89">Appel89</a>, <a
+href="#goldberg91">Goldberg91</a>, <a href="#tolmach94">Tolmach94</a>] If
+specified, its value will be tracked along with the location of the pointer in
+the stack frame.</p>
+
+<p>Consider the following fragment of Java code:</p>
+
+<pre>
+       {
+         Object X;   // A null-initialized reference to an object
+         ...
+       }
+</pre>
+
+<p>This block (which may be located in the middle of a function or in a loop
+nest), could be compiled to this LLVM code:</p>
+
+<pre>
+Entry:
+   ;; In the entry block for the function, allocate the
+   ;; stack space for X, which is an LLVM pointer.
+   %X = alloca %Object*
+   
+   ;; Tell LLVM that the stack space is a stack root.
+   ;; Java has type-tags on objects, so we pass null as metadata.
+   %tmp = bitcast %Object** %X to i8**
+   call void @llvm.gcroot(i8** %X, i8* null)
+   ...
+
+   ;; "CodeBlock" is the block corresponding to the start
+   ;;  of the scope above.
+CodeBlock:
+   ;; Java null-initializes pointers.
+   store %Object* null, %Object** %X
+
+   ...
+
+   ;; As the pointer goes out of scope, store a null value into
+   ;; it, to indicate that the value is no longer live.
+   store %Object* null, %Object** %X
+   ...
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="barriers">Reading and writing references in the heap</a>
+</div>
+
+<div class="doc_text">
+
+<p>Some collectors need to be informed when the mutator (the program that needs
+garbage collection) either reads a pointer from or writes a pointer to a field
+of a heap object. The code fragments inserted at these points are called
+<em>read barriers</em> and <em>write barriers</em>, respectively. The amount of
+code that needs to be executed is usually quite small and not on the critical
+path of any computation, so the overall performance impact of the barrier is
+tolerable.</p>
+
+<p>Barriers often require access to the <em>object pointer</em> rather than the
+<em>derived pointer</em> (which is a pointer to the field within the
+object). Accordingly, these intrinsics take both pointers as separate arguments
+for completeness. In this snippet, <tt>%object</tt> is the object pointer, and 
+<tt>%derived</tt> is the derived pointer:</p>
+
+<blockquote><pre>
+    ;; An array type.
+    %class.Array = type { %class.Object, i32, [0 x %class.Object*] }
+    ...
+
+    ;; Load the object pointer from a gcroot.
+    %object = load %class.Array** %object_addr
+
+    ;; Compute the derived pointer.
+    %derived = getelementptr %object, i32 0, i32 2, i32 %n</pre></blockquote>
+
+<p>LLVM does not enforce this relationship between the object and derived
+pointer (although a <a href="#plugin">plugin</a> might). However, it would be
+an unusual collector that violated it.</p>
+
+<p>The use of these intrinsics is naturally optional if the target GC does
+require the corresponding barrier. Such a GC plugin will replace the intrinsic
+calls with the corresponding <tt>load</tt> or <tt>store</tt> instruction if they
+are used.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="gcwrite">Write barrier: <tt>llvm.gcwrite</tt></a>
+</div>
+
+<div class="doc_code"><tt>
+void @llvm.gcwrite(i8* %value, i8* %object, i8** %derived)
+</tt></div>
+
+<div class="doc_text">
+
+<p>For write barriers, LLVM provides the <tt>llvm.gcwrite</tt> intrinsic
+function. It has exactly the same semantics as a non-volatile <tt>store</tt> to
+the derived pointer (the third argument). The exact code generated is specified
+by a <a href="#plugin">compiler plugin</a>.</p>
+
+<p>Many important algorithms require write barriers, including generational
+and concurrent collectors. Additionally, write barriers could be used to
+implement reference counting.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="gcread">Read barrier: <tt>llvm.gcread</tt></a>
+</div>
+
+<div class="doc_code"><tt>
+i8* @llvm.gcread(i8* %object, i8** %derived)<br>
+</tt></div>
+
+<div class="doc_text">
+
+<p>For read barriers, LLVM provides the <tt>llvm.gcread</tt> intrinsic function.
+It has exactly the same semantics as a non-volatile <tt>load</tt> from the
+derived pointer (the second argument). The exact code generated is specified by
+a <a href="#plugin">compiler plugin</a>.</p>
+
+<p>Read barriers are needed by fewer algorithms than write barriers, and may
+have a greater performance impact since pointer reads are more frequent than
+writes.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="plugin">Implementing a collector plugin</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>User code specifies which GC code generation to use with the <tt>gc</tt>
+function attribute or, equivalently, with the <tt>setGC</tt> method of
+<tt>Function</tt>.</p>
+
+<p>To implement a GC plugin, it is necessary to subclass
+<tt>llvm::GCStrategy</tt>, which can be accomplished in a few lines of
+boilerplate code. LLVM's infrastructure provides access to several important
+algorithms. For an uncontroversial collector, all that remains may be to
+compile LLVM's computed stack map to assembly code (using the binary
+representation expected by the runtime library). This can be accomplished in
+about 100 lines of code.</p>
+
+<p>This is not the appropriate place to implement a garbage collected heap or a
+garbage collector itself. That code should exist in the language's runtime
+library. The compiler plugin is responsible for generating code which
+conforms to the binary interface defined by library, most essentially the
+<a href="#stack-map">stack map</a>.</p>
+
+<p>To subclass <tt>llvm::GCStrategy</tt> and register it with the compiler:</p>
+
+<blockquote><pre>// lib/MyGC/MyGC.cpp - Example LLVM GC plugin
+
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+namespace {
+  class LLVM_LIBRARY_VISIBILITY MyGC : public GCStrategy {
+  public:
+    MyGC() {}
+  };
+  
+  GCRegistry::Add&lt;MyGC&gt;
+  X("mygc", "My bespoke garbage collector.");
+}</pre></blockquote>
+
+<p>This boilerplate collector does nothing. More specifically:</p>
+
+<ul>
+  <li><tt>llvm.gcread</tt> calls are replaced with the corresponding
+      <tt>load</tt> instruction.</li>
+  <li><tt>llvm.gcwrite</tt> calls are replaced with the corresponding
+      <tt>store</tt> instruction.</li>
+  <li>No safe points are added to the code.</li>
+  <li>The stack map is not compiled into the executable.</li>
+</ul>
+
+<p>Using the LLVM makefiles (like the <a
+href="http://llvm.org/viewvc/llvm-project/llvm/trunk/projects/sample/">sample
+project</a>), this code can be compiled as a plugin using a simple
+makefile:</p>
+
+<blockquote><pre
+># lib/MyGC/Makefile
+
+LEVEL := ../..
+LIBRARYNAME = <var>MyGC</var>
+LOADABLE_MODULE = 1
+
+include $(LEVEL)/Makefile.common</pre></blockquote>
+
+<p>Once the plugin is compiled, code using it may be compiled using <tt>llc
+-load=<var>MyGC.so</var></tt> (though <var>MyGC.so</var> may have some other
+platform-specific extension):</p>
+
+<blockquote><pre
+>$ cat sample.ll
+define void @f() gc "mygc" {
+entry:
+        ret void
+}
+$ llvm-as &lt; sample.ll | llc -load=MyGC.so</pre></blockquote>
+
+<p>It is also possible to statically link the collector plugin into tools, such
+as a language-specific compiler front-end.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="collector-algos">Overview of available features</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>GCStrategy</tt> provides a range of features through which a plugin
+may do useful work. Some of these are callbacks, some are algorithms that can
+be enabled, disabled, or customized. This matrix summarizes the supported (and
+planned) features and correlates them with the collection techniques which
+typically require them.</p>
+
+<table>
+  <tr>
+    <th>Algorithm</th>
+    <th>Done</th>
+    <th>shadow stack</th>
+    <th>refcount</th>
+    <th>mark-sweep</th>
+    <th>copying</th>
+    <th>incremental</th>
+    <th>threaded</th>
+    <th>concurrent</th>
+  </tr>
+  <tr>
+    <th class="rowhead"><a href="#stack-map">stack map</a></th>
+    <td>&#10004;</td>
+    <td></td>
+    <td></td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+  </tr>
+  <tr>
+    <th class="rowhead"><a href="#init-roots">initialize roots</a></th>
+    <td>&#10004;</td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+  </tr>
+  <tr class="doc_warning">
+    <th class="rowhead">derived pointers</th>
+    <td>NO</td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td>&#10008;*</td>
+    <td>&#10008;*</td>
+  </tr>
+  <tr>
+    <th class="rowhead"><em><a href="#custom">custom lowering</a></em></th>
+    <td>&#10004;</td>
+    <th></th>
+    <th></th>
+    <th></th>
+    <th></th>
+    <th></th>
+    <th></th>
+    <th></th>
+  </tr>
+  <tr>
+    <th class="rowhead indent">gcroot</th>
+    <td>&#10004;</td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+  </tr>
+  <tr>
+    <th class="rowhead indent">gcwrite</th>
+    <td>&#10004;</td>
+    <td></td>
+    <td>&#10008;</td>
+    <td></td>
+    <td></td>
+    <td>&#10008;</td>
+    <td></td>
+    <td>&#10008;</td>
+  </tr>
+  <tr>
+    <th class="rowhead indent">gcread</th>
+    <td>&#10004;</td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td>&#10008;</td>
+  </tr>
+  <tr>
+    <th class="rowhead"><em><a href="#safe-points">safe points</a></em></th>
+    <td></td>
+    <th></th>
+    <th></th>
+    <th></th>
+    <th></th>
+    <th></th>
+    <th></th>
+    <th></th>
+  </tr>
+  <tr>
+    <th class="rowhead indent">in calls</th>
+    <td>&#10004;</td>
+    <td></td>
+    <td></td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+  </tr>
+  <tr>
+    <th class="rowhead indent">before calls</th>
+    <td>&#10004;</td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+  </tr>
+  <tr class="doc_warning">
+    <th class="rowhead indent">for loops</th>
+    <td>NO</td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+  </tr>
+  <tr>
+    <th class="rowhead indent">before escape</th>
+    <td>&#10004;</td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+  </tr>
+  <tr class="doc_warning">
+    <th class="rowhead">emit code at safe points</th>
+    <td>NO</td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+  </tr>
+  <tr>
+    <th class="rowhead"><em>output</em></th>
+    <td></td>
+    <th></th>
+    <th></th>
+    <th></th>
+    <th></th>
+    <th></th>
+    <th></th>
+    <th></th>
+  </tr>
+  <tr>
+    <th class="rowhead indent"><a href="#assembly">assembly</a></th>
+    <td>&#10004;</td>
+    <td></td>
+    <td></td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+    <td>&#10008;</td>
+  </tr>
+  <tr class="doc_warning">
+    <th class="rowhead indent">JIT</th>
+    <td>NO</td>
+    <td></td>
+    <td></td>
+    <td class="optl">&#10008;</td>
+    <td class="optl">&#10008;</td>
+    <td class="optl">&#10008;</td>
+    <td class="optl">&#10008;</td>
+    <td class="optl">&#10008;</td>
+  </tr>
+  <tr class="doc_warning">
+    <th class="rowhead indent">obj</th>
+    <td>NO</td>
+    <td></td>
+    <td></td>
+    <td class="optl">&#10008;</td>
+    <td class="optl">&#10008;</td>
+    <td class="optl">&#10008;</td>
+    <td class="optl">&#10008;</td>
+    <td class="optl">&#10008;</td>
+  </tr>
+  <tr class="doc_warning">
+    <th class="rowhead">live analysis</th>
+    <td>NO</td>
+    <td></td>
+    <td></td>
+    <td class="optl">&#10008;</td>
+    <td class="optl">&#10008;</td>
+    <td class="optl">&#10008;</td>
+    <td class="optl">&#10008;</td>
+    <td class="optl">&#10008;</td>
+  </tr>
+  <tr class="doc_warning">
+    <th class="rowhead">register map</th>
+    <td>NO</td>
+    <td></td>
+    <td></td>
+    <td class="optl">&#10008;</td>
+    <td class="optl">&#10008;</td>
+    <td class="optl">&#10008;</td>
+    <td class="optl">&#10008;</td>
+    <td class="optl">&#10008;</td>
+  </tr>
+  <tr>
+    <td colspan="10">
+      <div><span class="doc_warning">*</span> Derived pointers only pose a
+           hazard to copying collectors.</div>
+      <div><span class="optl">&#10008;</span> in gray denotes a feature which
+           could be utilized if available.</div>
+    </td>
+  </tr>
+</table>
+
+<p>To be clear, the collection techniques above are defined as:</p>
+
+<dl>
+  <dt>Shadow Stack</dt>
+  <dd>The mutator carefully maintains a linked list of stack roots.</dd>
+  <dt>Reference Counting</dt>
+  <dd>The mutator maintains a reference count for each object and frees an
+      object when its count falls to zero.</dd>
+  <dt>Mark-Sweep</dt>
+  <dd>When the heap is exhausted, the collector marks reachable objects starting
+      from the roots, then deallocates unreachable objects in a sweep
+      phase.</dd>
+  <dt>Copying</dt>
+  <dd>As reachability analysis proceeds, the collector copies objects from one
+      heap area to another, compacting them in the process. Copying collectors
+      enable highly efficient "bump pointer" allocation and can improve locality
+      of reference.</dd>
+  <dt>Incremental</dt>
+  <dd>(Including generational collectors.) Incremental collectors generally have
+      all the properties of a copying collector (regardless of whether the
+      mature heap is compacting), but bring the added complexity of requiring
+      write barriers.</dd>
+  <dt>Threaded</dt>
+  <dd>Denotes a multithreaded mutator; the collector must still stop the mutator
+      ("stop the world") before beginning reachability analysis. Stopping a
+      multithreaded mutator is a complicated problem. It generally requires
+      highly platform specific code in the runtime, and the production of
+      carefully designed machine code at safe points.</dd>
+  <dt>Concurrent</dt>
+  <dd>In this technique, the mutator and the collector run concurrently, with
+      the goal of eliminating pause times. In a <em>cooperative</em> collector,
+      the mutator further aids with collection should a pause occur, allowing
+      collection to take advantage of multiprocessor hosts. The "stop the world"
+      problem of threaded collectors is generally still present to a limited
+      extent. Sophisticated marking algorithms are necessary. Read barriers may
+      be necessary.</dd>
+</dl>
+
+<p>As the matrix indicates, LLVM's garbage collection infrastructure is already
+suitable for a wide variety of collectors, but does not currently extend to
+multithreaded programs. This will be added in the future as there is
+interest.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="stack-map">Computing stack maps</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM automatically computes a stack map. One of the most important features
+of a <tt>GCStrategy</tt> is to compile this information into the executable in
+the binary representation expected by the runtime library.</p>
+
+<p>The stack map consists of the location and identity of each GC root in the
+each function in the module. For each root:</p>
+
+<ul>
+  <li><tt>RootNum</tt>: The index of the root.</li>
+  <li><tt>StackOffset</tt>: The offset of the object relative to the frame
+      pointer.</li>
+  <li><tt>RootMetadata</tt>: The value passed as the <tt>%metadata</tt>
+      parameter to the <a href="#gcroot"><tt>@llvm.gcroot</tt></a> intrinsic.</li>
+</ul>
+
+<p>Also, for the function as a whole:</p>
+
+<ul>
+  <li><tt>getFrameSize()</tt>: The overall size of the function's initial
+      stack frame, not accounting for any dynamic allocation.</li>
+  <li><tt>roots_size()</tt>: The count of roots in the function.</li>
+</ul>
+
+<p>To access the stack map, use <tt>GCFunctionMetadata::roots_begin()</tt> and
+-<tt>end()</tt> from the <tt><a
+href="#assembly">GCMetadataPrinter</a></tt>:</p>
+
+<blockquote><pre
+>for (iterator I = begin(), E = end(); I != E; ++I) {
+  GCFunctionInfo *FI = *I;
+  unsigned FrameSize = FI-&gt;getFrameSize();
+  size_t RootCount = FI-&gt;roots_size();
+
+  for (GCFunctionInfo::roots_iterator RI = FI-&gt;roots_begin(),
+                                      RE = FI-&gt;roots_end();
+                                      RI != RE; ++RI) {
+    int RootNum = RI->Num;
+    int RootStackOffset = RI->StackOffset;
+    Constant *RootMetadata = RI->Metadata;
+  }
+}</pre></blockquote>
+
+<p>If the <tt>llvm.gcroot</tt> intrinsic is eliminated before code generation by
+a custom lowering pass, LLVM will compute an empty stack map. This may be useful
+for collector plugins which implement reference counting or a shadow stack.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="init-roots">Initializing roots to null: <tt>InitRoots</tt></a>
+</div>
+
+<div class="doc_text">
+
+<blockquote><pre
+>MyGC::MyGC() {
+  InitRoots = true;
+}</pre></blockquote>
+
+<p>When set, LLVM will automatically initialize each root to <tt>null</tt> upon
+entry to the function. This prevents the GC's sweep phase from visiting
+uninitialized pointers, which will almost certainly cause it to crash. This
+initialization occurs before custom lowering, so the two may be used
+together.</p>
+
+<p>Since LLVM does not yet compute liveness information, there is no means of
+distinguishing an uninitialized stack root from an initialized one. Therefore,
+this feature should be used by all GC plugins. It is enabled by default.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="custom">Custom lowering of intrinsics: <tt>CustomRoots</tt>, 
+    <tt>CustomReadBarriers</tt>, and <tt>CustomWriteBarriers</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>For GCs which use barriers or unusual treatment of stack roots, these
+flags allow the collector to perform arbitrary transformations of the LLVM
+IR:</p>
+
+<blockquote><pre
+>class MyGC : public GCStrategy {
+public:
+  MyGC() {
+    CustomRoots = true;
+    CustomReadBarriers = true;
+    CustomWriteBarriers = true;
+  }
+  
+  virtual bool initializeCustomLowering(Module &amp;M);
+  virtual bool performCustomLowering(Function &amp;F);
+};</pre></blockquote>
+
+<p>If any of these flags are set, then LLVM suppresses its default lowering for
+the corresponding intrinsics and instead calls
+<tt>performCustomLowering</tt>.</p>
+
+<p>LLVM's default action for each intrinsic is as follows:</p>
+
+<ul>
+  <li><tt>llvm.gcroot</tt>: Leave it alone. The code generator must see it
+                            or the stack map will not be computed.</li>
+  <li><tt>llvm.gcread</tt>: Substitute a <tt>load</tt> instruction.</li>
+  <li><tt>llvm.gcwrite</tt>: Substitute a <tt>store</tt> instruction.</li>
+</ul>
+
+<p>If <tt>CustomReadBarriers</tt> or <tt>CustomWriteBarriers</tt> are specified,
+then <tt>performCustomLowering</tt> <strong>must</strong> eliminate the
+corresponding barriers.</p>
+
+<p><tt>performCustomLowering</tt> must comply with the same restrictions as <a
+href="WritingAnLLVMPass.html#runOnFunction"><tt
+>FunctionPass::runOnFunction</tt></a>.
+Likewise, <tt>initializeCustomLowering</tt> has the same semantics as <a
+href="WritingAnLLVMPass.html#doInitialization_mod"><tt
+>Pass::doInitialization(Module&amp;)</tt></a>.</p>
+
+<p>The following can be used as a template:</p>
+
+<blockquote><pre
+>#include "llvm/Module.h"
+#include "llvm/IntrinsicInst.h"
+
+bool MyGC::initializeCustomLowering(Module &amp;M) {
+  return false;
+}
+
+bool MyGC::performCustomLowering(Function &amp;F) {
+  bool MadeChange = false;
+  
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (BasicBlock::iterator II = BB-&gt;begin(), E = BB-&gt;end(); II != E; )
+      if (IntrinsicInst *CI = dyn_cast&lt;IntrinsicInst&gt;(II++))
+        if (Function *F = CI-&gt;getCalledFunction())
+          switch (F-&gt;getIntrinsicID()) {
+          case Intrinsic::gcwrite:
+            // Handle llvm.gcwrite.
+            CI-&gt;eraseFromParent();
+            MadeChange = true;
+            break;
+          case Intrinsic::gcread:
+            // Handle llvm.gcread.
+            CI-&gt;eraseFromParent();
+            MadeChange = true;
+            break;
+          case Intrinsic::gcroot:
+            // Handle llvm.gcroot.
+            CI-&gt;eraseFromParent();
+            MadeChange = true;
+            break;
+          }
+  
+  return MadeChange;
+}</pre></blockquote>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="safe-points">Generating safe points: <tt>NeededSafePoints</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM can compute four kinds of safe points:</p>
+
+<blockquote><pre
+>namespace GC {
+  /// PointKind - The type of a collector-safe point.
+  /// 
+  enum PointKind {
+    Loop,    //&lt; Instr is a loop (backwards branch).
+    Return,  //&lt; Instr is a return instruction.
+    PreCall, //&lt; Instr is a call instruction.
+    PostCall //&lt; Instr is the return address of a call.
+  };
+}</pre></blockquote>
+
+<p>A collector can request any combination of the four by setting the 
+<tt>NeededSafePoints</tt> mask:</p>
+
+<blockquote><pre
+>MyGC::MyGC() {
+  NeededSafePoints = 1 &lt;&lt; GC::Loop
+                   | 1 &lt;&lt; GC::Return
+                   | 1 &lt;&lt; GC::PreCall
+                   | 1 &lt;&lt; GC::PostCall;
+}</pre></blockquote>
+
+<p>It can then use the following routines to access safe points.</p>
+
+<blockquote><pre
+>for (iterator I = begin(), E = end(); I != E; ++I) {
+  GCFunctionInfo *MD = *I;
+  size_t PointCount = MD-&gt;size();
+
+  for (GCFunctionInfo::iterator PI = MD-&gt;begin(),
+                                PE = MD-&gt;end(); PI != PE; ++PI) {
+    GC::PointKind PointKind = PI-&gt;Kind;
+    unsigned PointNum = PI-&gt;Num;
+  }
+}
+</pre></blockquote>
+
+<p>Almost every collector requires <tt>PostCall</tt> safe points, since these
+correspond to the moments when the function is suspended during a call to a
+subroutine.</p>
+
+<p>Threaded programs generally require <tt>Loop</tt> safe points to guarantee
+that the application will reach a safe point within a bounded amount of time,
+even if it is executing a long-running loop which contains no function
+calls.</p>
+
+<p>Threaded collectors may also require <tt>Return</tt> and <tt>PreCall</tt>
+safe points to implement "stop the world" techniques using self-modifying code,
+where it is important that the program not exit the function without reaching a
+safe point (because only the topmost function has been patched).</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="assembly">Emitting assembly code: <tt>GCMetadataPrinter</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM allows a plugin to print arbitrary assembly code before and after the
+rest of a module's assembly code. At the end of the module, the GC can compile
+the LLVM stack map into assembly code. (At the beginning, this information is not
+yet computed.)</p>
+
+<p>Since AsmWriter and CodeGen are separate components of LLVM, a separate
+abstract base class and registry is provided for printing assembly code, the
+<tt>GCMetadaPrinter</tt> and <tt>GCMetadataPrinterRegistry</tt>. The AsmWriter
+will look for such a subclass if the <tt>GCStrategy</tt> sets
+<tt>UsesMetadata</tt>:</p>
+
+<blockquote><pre
+>MyGC::MyGC() {
+  UsesMetadata = true;
+}</pre></blockquote>
+
+<p>This separation allows JIT-only clients to be smaller.</p>
+
+<p>Note that LLVM does not currently have analogous APIs to support code
+generation in the JIT, nor using the object writers.</p>
+
+<blockquote><pre
+>// lib/MyGC/MyGCPrinter.cpp - Example LLVM GC printer
+
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+using namespace llvm;
+
+namespace {
+  class LLVM_LIBRARY_VISIBILITY MyGCPrinter : public GCMetadataPrinter {
+  public:
+    virtual void beginAssembly(std::ostream &amp;OS, AsmPrinter &amp;AP,
+                               const TargetAsmInfo &amp;TAI);
+  
+    virtual void finishAssembly(std::ostream &amp;OS, AsmPrinter &amp;AP,
+                                const TargetAsmInfo &amp;TAI);
+  };
+  
+  GCMetadataPrinterRegistry::Add&lt;MyGCPrinter&gt;
+  X("mygc", "My bespoke garbage collector.");
+}</pre></blockquote>
+
+<p>The collector should use <tt>AsmPrinter</tt> and <tt>TargetAsmInfo</tt> to
+print portable assembly code to the <tt>std::ostream</tt>. The collector itself
+contains the stack map for the entire module, and may access the
+<tt>GCFunctionInfo</tt> using its own <tt>begin()</tt> and <tt>end()</tt>
+methods. Here's a realistic example:</p>
+
+<blockquote><pre
+>#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Function.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetAsmInfo.h"
+
+void MyGCPrinter::beginAssembly(std::ostream &amp;OS, AsmPrinter &amp;AP,
+                                const TargetAsmInfo &amp;TAI) {
+  // Nothing to do.
+}
+
+void MyGCPrinter::finishAssembly(std::ostream &amp;OS, AsmPrinter &amp;AP,
+                                 const TargetAsmInfo &amp;TAI) {
+  // Set up for emitting addresses.
+  const char *AddressDirective;
+  int AddressAlignLog;
+  if (AP.TM.getTargetData()->getPointerSize() == sizeof(int32_t)) {
+    AddressDirective = TAI.getData32bitsDirective();
+    AddressAlignLog = 2;
+  } else {
+    AddressDirective = TAI.getData64bitsDirective();
+    AddressAlignLog = 3;
+  }
+  
+  // Put this in the data section.
+  AP.SwitchToDataSection(TAI.getDataSection());
+  
+  // For each function...
+  for (iterator FI = begin(), FE = end(); FI != FE; ++FI) {
+    GCFunctionInfo &amp;MD = **FI;
+    
+    // Emit this data structure:
+    // 
+    // struct {
+    //   int32_t PointCount;
+    //   struct {
+    //     void *SafePointAddress;
+    //     int32_t LiveCount;
+    //     int32_t LiveOffsets[LiveCount];
+    //   } Points[PointCount];
+    // } __gcmap_&lt;FUNCTIONNAME&gt;;
+    
+    // Align to address width.
+    AP.EmitAlignment(AddressAlignLog);
+    
+    // Emit the symbol by which the stack map entry can be found.
+    std::string Symbol;
+    Symbol += TAI.getGlobalPrefix();
+    Symbol += "__gcmap_";
+    Symbol += MD.getFunction().getName();
+    if (const char *GlobalDirective = TAI.getGlobalDirective())
+      OS &lt;&lt; GlobalDirective &lt;&lt; Symbol &lt;&lt; "\n";
+    OS &lt;&lt; TAI.getGlobalPrefix() &lt;&lt; Symbol &lt;&lt; ":\n";
+    
+    // Emit PointCount.
+    AP.EmitInt32(MD.size());
+    AP.EOL("safe point count");
+    
+    // And each safe point...
+    for (GCFunctionInfo::iterator PI = MD.begin(),
+                                     PE = MD.end(); PI != PE; ++PI) {
+      // Align to address width.
+      AP.EmitAlignment(AddressAlignLog);
+      
+      // Emit the address of the safe point.
+      OS &lt;&lt; AddressDirective
+         &lt;&lt; TAI.getPrivateGlobalPrefix() &lt;&lt; "label" &lt;&lt; PI-&gt;Num;
+      AP.EOL("safe point address");
+      
+      // Emit the stack frame size.
+      AP.EmitInt32(MD.getFrameSize());
+      AP.EOL("stack frame size");
+      
+      // Emit the number of live roots in the function.
+      AP.EmitInt32(MD.live_size(PI));
+      AP.EOL("live root count");
+      
+      // And for each live root...
+      for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
+                                            LE = MD.live_end(PI);
+                                            LI != LE; ++LI) {
+        // Print its offset within the stack frame.
+        AP.EmitInt32(LI-&gt;StackOffset);
+        AP.EOL("stack offset");
+      }
+    }
+  }
+}
+</pre></blockquote>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="references">References</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p><a name="appel89">[Appel89]</a> Runtime Tags Aren't Necessary. Andrew
+W. Appel. Lisp and Symbolic Computation 19(7):703-705, July 1989.</p>
+
+<p><a name="goldberg91">[Goldberg91]</a> Tag-free garbage collection for
+strongly typed programming languages. Benjamin Goldberg. ACM SIGPLAN
+PLDI'91.</p>
+
+<p><a name="tolmach94">[Tolmach94]</a> Tag-free garbage collection using
+explicit type parameters. Andrew Tolmach. Proceedings of the 1994 ACM
+conference on LISP and functional programming.</p>
+
+<p><a name="henderson02">[Henderson2002]</a> <a
+href="http://citeseer.ist.psu.edu/henderson02accurate.html">
+Accurate Garbage Collection in an Uncooperative Environment</a>.
+Fergus Henderson. International Symposium on Memory Management 2002.</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/GetElementPtr.html b/final/docs/GetElementPtr.html
new file mode 100644
index 00000000000..41c45cab12d
--- /dev/null
+++ b/final/docs/GetElementPtr.html
@@ -0,0 +1,739 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>The Often Misunderstood GEP Instruction</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+  <style type="text/css">
+    TABLE   { text-align: left; border: 1px solid black; border-collapse: collapse; margin: 0 0 0 0; }
+  </style>
+</head>
+<body>
+
+<div class="doc_title">
+  The Often Misunderstood GEP Instruction
+</div>
+
+<ol>
+  <li><a href="#intro">Introduction</a></li>
+  <li><a href="#addresses">Address Computation</a>
+  <ol>
+    <li><a href="#extra_index">Why is the extra 0 index required?</a></li>
+    <li><a href="#deref">What is dereferenced by GEP?</a></li>
+    <li><a href="#firstptr">Why can you index through the first pointer but not
+      subsequent ones?</a></li>
+    <li><a href="#lead0">Why don't GEP x,0,0,1 and GEP x,1 alias? </a></li>
+    <li><a href="#trail0">Why do GEP x,1,0,0 and GEP x,1 alias? </a></li>
+    <li><a href="#vectors">Can GEP index into vector elements?</a>
+    <li><a href="#addrspace">What effect do address spaces have on GEPs?</a>
+    <li><a href="#int">How is GEP different from ptrtoint, arithmetic, and inttoptr?</a></li>
+    <li><a href="#be">I'm writing a backend for a target which needs custom lowering for GEP. How do I do this?</a>
+    <li><a href="#vla">How does VLA addressing work with GEPs?</a>
+  </ol></li>
+  <li><a href="#rules">Rules</a>
+  <ol>
+    <li><a href="#bounds">What happens if an array index is out of bounds?</a>
+    <li><a href="#negative">Can array indices be negative?</a>
+    <li><a href="#compare">Can I compare two values computed with GEPs?</a>
+    <li><a href="#types">Can I do GEP with a different pointer type than the type of the underlying object?</a>
+    <li><a href="#null">Can I cast an object's address to integer and add it to null?</a>
+    <li><a href="#ptrdiff">Can I compute the distance between two objects, and add that value to one address to compute the other address?</a>
+    <li><a href="#tbaa">Can I do type-based alias analysis on LLVM IR?</a>
+    <li><a href="#overflow">What happens if a GEP computation overflows?</a>
+    <li><a href="#check">How can I tell if my front-end is following the rules?</a>
+  </ol></li>
+  <li><a href="#rationale">Rationale</a>
+  <ol>
+    <li><a href="#goals">Why is GEP designed this way?</a></li>
+    <li><a href="#i32">Why do struct member indices always use i32?</a></li>
+    <li><a href="#uglygep">What's an uglygep?</a>
+  </ol></li>
+  <li><a href="#summary">Summary</a></li>
+</ol>
+
+<div class="doc_author">
+  <p>Written by: <a href="mailto:rspencer@reidspencer.com">Reid Spencer</a>.</p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="intro"><b>Introduction</b></a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text"> 
+  <p>This document seeks to dispel the mystery and confusion surrounding LLVM's
+  <a href="LangRef.html#i_getelementptr">GetElementPtr</a> (GEP) instruction.
+  Questions about the wily GEP instruction are
+  probably the most frequently occurring questions once a developer gets down to
+  coding with LLVM. Here we lay out the sources of confusion and show that the
+  GEP instruction is really quite simple.
+  </p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="addresses"><b>Address Computation</b></a></div>
+<!-- *********************************************************************** -->
+<div class="doc_text">
+  <p>When people are first confronted with the GEP instruction, they tend to
+  relate it to known concepts from other programming paradigms, most notably C
+  array indexing and field selection. GEP closely resembles C array indexing
+  and field selection, however it's is a little different and this leads to
+  the following questions.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+  <a name="firstptr"><b>What is the first index of the GEP instruction?</b></a>
+</div>
+<div class="doc_text">
+  <p>Quick answer: The index stepping through the first operand.</p> 
+  <p>The confusion with the first index usually arises from thinking about 
+  the GetElementPtr instruction as if it was a C index operator. They aren't the
+  same. For example, when we write, in "C":</p>
+
+<div class="doc_code">
+<pre>
+AType *Foo;
+...
+X = &amp;Foo-&gt;F;
+</pre>
+</div>
+
+  <p>it is natural to think that there is only one index, the selection of the
+  field <tt>F</tt>.  However, in this example, <tt>Foo</tt> is a pointer. That 
+  pointer must be indexed explicitly in LLVM. C, on the other hand, indices
+  through it transparently.  To arrive at the same address location as the C 
+  code, you would provide the GEP instruction with two index operands. The 
+  first operand indexes through the pointer; the second operand indexes the 
+  field <tt>F</tt> of the structure, just as if you wrote:</p>
+
+<div class="doc_code">
+<pre>
+X = &amp;Foo[0].F;
+</pre>
+</div>
+
+  <p>Sometimes this question gets rephrased as:</p>
+  <blockquote><p><i>Why is it okay to index through the first pointer, but 
+      subsequent pointers won't be dereferenced?</i></p></blockquote> 
+  <p>The answer is simply because memory does not have to be accessed to 
+  perform the computation. The first operand to the GEP instruction must be a 
+  value of a pointer type. The value of the pointer is provided directly to 
+  the GEP instruction as an operand without any need for accessing memory. It 
+  must, therefore be indexed and requires an index operand. Consider this 
+  example:</p>
+
+<div class="doc_code">
+<pre>
+struct munger_struct {
+  int f1;
+  int f2;
+};
+void munge(struct munger_struct *P) {
+  P[0].f1 = P[1].f1 + P[2].f2;
+}
+...
+munger_struct Array[3];
+...
+munge(Array);
+</pre>
+</div>
+
+  <p>In this "C" example, the front end compiler (llvm-gcc) will generate three
+  GEP instructions for the three indices through "P" in the assignment
+  statement.  The function argument <tt>P</tt> will be the first operand of each
+  of these GEP instructions.  The second operand indexes through that pointer.
+  The third operand will be the field offset into the 
+  <tt>struct munger_struct</tt> type,  for either the <tt>f1</tt> or 
+  <tt>f2</tt> field. So, in LLVM assembly the <tt>munge</tt> function looks 
+  like:</p>
+
+<div class="doc_code">
+<pre>
+void %munge(%struct.munger_struct* %P) {
+entry:
+  %tmp = getelementptr %struct.munger_struct* %P, i32 1, i32 0
+  %tmp = load i32* %tmp
+  %tmp6 = getelementptr %struct.munger_struct* %P, i32 2, i32 1
+  %tmp7 = load i32* %tmp6
+  %tmp8 = add i32 %tmp7, %tmp
+  %tmp9 = getelementptr %struct.munger_struct* %P, i32 0, i32 0
+  store i32 %tmp8, i32* %tmp9
+  ret void
+}
+</pre>
+</div>
+
+  <p>In each case the first operand is the pointer through which the GEP
+  instruction starts. The same is true whether the first operand is an
+  argument, allocated memory, or a global variable. </p>
+  <p>To make this clear, let's consider a more obtuse example:</p>
+
+<div class="doc_code">
+<pre>
+%MyVar = uninitialized global i32
+...
+%idx1 = getelementptr i32* %MyVar, i64 0
+%idx2 = getelementptr i32* %MyVar, i64 1
+%idx3 = getelementptr i32* %MyVar, i64 2
+</pre>
+</div>
+
+  <p>These GEP instructions are simply making address computations from the 
+  base address of <tt>MyVar</tt>.  They compute, as follows (using C syntax):
+  </p>
+
+<div class="doc_code">
+<pre>
+idx1 = (char*) &amp;MyVar + 0
+idx2 = (char*) &amp;MyVar + 4
+idx3 = (char*) &amp;MyVar + 8
+</pre>
+</div>
+
+  <p>Since the type <tt>i32</tt> is known to be four bytes long, the indices 
+  0, 1 and 2 translate into memory offsets of 0, 4, and 8, respectively. No 
+  memory is accessed to make these computations because the address of 
+  <tt>%MyVar</tt> is passed directly to the GEP instructions.</p>
+  <p>The obtuse part of this example is in the cases of <tt>%idx2</tt> and 
+  <tt>%idx3</tt>. They result in the computation of addresses that point to
+  memory past the end of the <tt>%MyVar</tt> global, which is only one
+  <tt>i32</tt> long, not three <tt>i32</tt>s long.  While this is legal in LLVM,
+  it is inadvisable because any load or store with the pointer that results 
+  from these GEP instructions would produce undefined results.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+  <a name="extra_index"><b>Why is the extra 0 index required?</b></a>
+</div>
+<!-- *********************************************************************** -->
+<div class="doc_text">
+  <p>Quick answer: there are no superfluous indices.</p>
+  <p>This question arises most often when the GEP instruction is applied to a
+  global variable which is always a pointer type. For example, consider
+  this:</p>
+
+<div class="doc_code">
+<pre>
+%MyStruct = uninitialized global { float*, i32 }
+...
+%idx = getelementptr { float*, i32 }* %MyStruct, i64 0, i32 1
+</pre>
+</div>
+
+  <p>The GEP above yields an <tt>i32*</tt> by indexing the <tt>i32</tt> typed 
+  field of the structure <tt>%MyStruct</tt>. When people first look at it, they 
+  wonder why the <tt>i64 0</tt> index is needed. However, a closer inspection 
+  of how globals and GEPs work reveals the need. Becoming aware of the following
+  facts will dispel the confusion:</p>
+  <ol>
+    <li>The type of <tt>%MyStruct</tt> is <i>not</i> <tt>{ float*, i32 }</tt> 
+    but rather <tt>{ float*, i32 }*</tt>. That is, <tt>%MyStruct</tt> is a 
+    pointer to a structure containing a pointer to a <tt>float</tt> and an 
+    <tt>i32</tt>.</li>
+    <li>Point #1 is evidenced by noticing the type of the first operand of 
+    the GEP instruction (<tt>%MyStruct</tt>) which is 
+    <tt>{ float*, i32 }*</tt>.</li>
+    <li>The first index, <tt>i64 0</tt> is required to step over the global
+    variable <tt>%MyStruct</tt>.  Since the first argument to the GEP
+    instruction must always be a value of pointer type, the first index 
+    steps through that pointer. A value of 0 means 0 elements offset from that
+    pointer.</li>
+    <li>The second index, <tt>i32 1</tt> selects the second field of the
+    structure (the <tt>i32</tt>). </li>
+  </ol>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+  <a name="deref"><b>What is dereferenced by GEP?</b></a>
+</div>
+<div class="doc_text">
+  <p>Quick answer: nothing.</p> 
+  <p>The GetElementPtr instruction dereferences nothing. That is, it doesn't
+  access memory in any way. That's what the Load and Store instructions are for.
+  GEP is only involved in the computation of addresses. For example, consider 
+  this:</p>
+
+<div class="doc_code">
+<pre>
+%MyVar = uninitialized global { [40 x i32 ]* }
+...
+%idx = getelementptr { [40 x i32]* }* %MyVar, i64 0, i32 0, i64 0, i64 17
+</pre>
+</div>
+
+  <p>In this example, we have a global variable, <tt>%MyVar</tt> that is a
+  pointer to a structure containing a pointer to an array of 40 ints. The 
+  GEP instruction seems to be accessing the 18th integer of the structure's
+  array of ints. However, this is actually an illegal GEP instruction. It 
+  won't compile. The reason is that the pointer in the structure <i>must</i>
+  be dereferenced in order to index into the array of 40 ints. Since the 
+  GEP instruction never accesses memory, it is illegal.</p>
+  <p>In order to access the 18th integer in the array, you would need to do the
+  following:</p>
+
+<div class="doc_code">
+<pre>
+%idx = getelementptr { [40 x i32]* }* %, i64 0, i32 0
+%arr = load [40 x i32]** %idx
+%idx = getelementptr [40 x i32]* %arr, i64 0, i64 17
+</pre>
+</div>
+
+  <p>In this case, we have to load the pointer in the structure with a load
+  instruction before we can index into the array. If the example was changed 
+  to:</p>
+
+<div class="doc_code">
+<pre>
+%MyVar = uninitialized global { [40 x i32 ] }
+...
+%idx = getelementptr { [40 x i32] }*, i64 0, i32 0, i64 17
+</pre>
+</div>
+
+  <p>then everything works fine. In this case, the structure does not contain a
+  pointer and the GEP instruction can index through the global variable,
+  into the first field of the structure and access the 18th <tt>i32</tt> in the 
+  array there.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+  <a name="lead0"><b>Why don't GEP x,0,0,1 and GEP x,1 alias?</b></a>
+</div>
+<div class="doc_text">
+  <p>Quick Answer: They compute different address locations.</p>
+  <p>If you look at the first indices in these GEP
+  instructions you find that they are different (0 and 1), therefore the address
+  computation diverges with that index. Consider this example:</p>
+
+<div class="doc_code">
+<pre>
+%MyVar = global { [10 x i32 ] }
+%idx1 = getelementptr { [10 x i32 ] }* %MyVar, i64 0, i32 0, i64 1
+%idx2 = getelementptr { [10 x i32 ] }* %MyVar, i64 1
+</pre>
+</div>
+
+  <p>In this example, <tt>idx1</tt> computes the address of the second integer
+  in the array that is in the structure in <tt>%MyVar</tt>, that is
+  <tt>MyVar+4</tt>. The type of <tt>idx1</tt> is <tt>i32*</tt>. However,
+  <tt>idx2</tt> computes the address of <i>the next</i> structure after
+  <tt>%MyVar</tt>. The type of <tt>idx2</tt> is <tt>{ [10 x i32] }*</tt> and its
+  value is equivalent to <tt>MyVar + 40</tt> because it indexes past the ten
+  4-byte integers in <tt>MyVar</tt>. Obviously, in such a situation, the
+  pointers don't alias.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+  <a name="trail0"><b>Why do GEP x,1,0,0 and GEP x,1 alias?</b></a>
+</div>
+<div class="doc_text">
+  <p>Quick Answer: They compute the same address location.</p>
+  <p>These two GEP instructions will compute the same address because indexing
+  through the 0th element does not change the address. However, it does change
+  the type. Consider this example:</p>
+
+<div class="doc_code">
+<pre>
+%MyVar = global { [10 x i32 ] }
+%idx1 = getelementptr { [10 x i32 ] }* %MyVar, i64 1, i32 0, i64 0
+%idx2 = getelementptr { [10 x i32 ] }* %MyVar, i64 1
+</pre>
+</div>
+
+  <p>In this example, the value of <tt>%idx1</tt> is <tt>%MyVar+40</tt> and
+  its type is <tt>i32*</tt>. The value of <tt>%idx2</tt> is also 
+  <tt>MyVar+40</tt> but its type is <tt>{ [10 x i32] }*</tt>.</p>
+</div>
+
+<!-- *********************************************************************** -->
+
+<div class="doc_subsection">
+  <a name="vectors"><b>Can GEP index into vector elements?</b></a>
+</div>
+<div class="doc_text">
+  <p>This hasn't always been forcefully disallowed, though it's not recommended.
+     It leads to awkward special cases in the optimizers, and fundamental
+     inconsistency in the IR. In the future, it will probably be outright
+     disallowed.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<div class="doc_subsection">
+  <a name="addrspace"><b>What effect do address spaces have on GEPs?</b></a>
+</div>
+<div class="doc_text">
+   <p>None, except that the address space qualifier on the first operand pointer
+      type always matches the address space qualifier on the result type.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<div class="doc_subsection">
+  <a name="int"><b>How is GEP different from ptrtoint, arithmetic,
+                   and inttoptr?</b></a>
+</div>
+<div class="doc_text">
+  <p>It's very similar; there are only subtle differences.</p>
+
+  <p>With ptrtoint, you have to pick an integer type. One approach is to pick i64;
+     this is safe on everything LLVM supports (LLVM internally assumes pointers
+     are never wider than 64 bits in many places), and the optimizer will actually
+     narrow the i64 arithmetic down to the actual pointer size on targets which
+     don't support 64-bit arithmetic in most cases. However, there are some cases
+     where it doesn't do this. With GEP you can avoid this problem.
+
+  <p>Also, GEP carries additional pointer aliasing rules. It's invalid to take a
+     GEP from one object, address into a different separately allocated
+     object, and dereference it. IR producers (front-ends) must follow this rule,
+     and consumers (optimizers, specifically alias analysis) benefit from being
+     able to rely on it. See the <a href="#rules">Rules</a> section for more
+     information.</p>
+
+  <p>And, GEP is more concise in common cases.</p>
+
+  <p>However, for the underlying integer computation implied, there
+     is no difference.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<div class="doc_subsection">
+  <a name="be"><b>I'm writing a backend for a target which needs custom
+                  lowering for GEP. How do I do this?</b></a>
+</div>
+<div class="doc_text">
+  <p>You don't. The integer computation implied by a GEP is target-independent.
+     Typically what you'll need to do is make your backend pattern-match
+     expressions trees involving ADD, MUL, etc., which are what GEP is lowered
+     into. This has the advantage of letting your code work correctly in more
+     cases.</p>
+
+  <p>GEP does use target-dependent parameters for the size and layout of data
+     types, which targets can customize.</p>
+
+  <p>If you require support for addressing units which are not 8 bits, you'll
+     need to fix a lot of code in the backend, with GEP lowering being only a
+     small piece of the overall picture.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<div class="doc_subsection">
+  <a name="vla"><b>How does VLA addressing work with GEPs?</b></a>
+</div>
+<div class="doc_text">
+  <p>GEPs don't natively support VLAs. LLVM's type system is entirely static,
+     and GEP address computations are guided by an LLVM type.</p>
+
+  <p>VLA indices can be implemented as linearized indices. For example, an
+     expression like X[a][b][c], must be effectively lowered into a form
+     like X[a*m+b*n+c], so that it appears to the GEP as a single-dimensional
+     array reference.</p>
+
+  <p>This means if you want to write an analysis which understands array
+     indices and you want to support VLAs, your code will have to be
+     prepared to reverse-engineer the linearization. One way to solve this
+     problem is to use the ScalarEvolution library, which always presents
+     VLA and non-VLA indexing in the same manner.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="rules"><b>Rules</b></a></div>
+<!-- *********************************************************************** -->
+
+<!-- *********************************************************************** -->
+
+<div class="doc_subsection">
+  <a name="bounds"><b>What happens if an array index is out of bounds?</b></a>
+</div>
+<div class="doc_text">
+  <p>There are two senses in which an array index can be out of bounds.</p>
+
+  <p>First, there's the array type which comes from the (static) type of
+     the first operand to the GEP. Indices greater than the number of elements
+     in the corresponding static array type are valid. There is no problem with
+     out of bounds indices in this sense. Indexing into an array only depends
+     on the size of the array element, not the number of elements.</p>
+     
+  <p>A common example of how this is used is arrays where the size is not known.
+     It's common to use array types with zero length to represent these. The
+     fact that the static type says there are zero elements is irrelevant; it's
+     perfectly valid to compute arbitrary element indices, as the computation
+     only depends on the size of the array element, not the number of
+     elements. Note that zero-sized arrays are not a special case here.</p>
+
+  <p>This sense is unconnected with <tt>inbounds</tt> keyword. The
+     <tt>inbounds</tt> keyword is designed to describe low-level pointer
+     arithmetic overflow conditions, rather than high-level array
+     indexing rules.
+
+  <p>Analysis passes which wish to understand array indexing should not
+     assume that the static array type bounds are respected.</p>
+
+  <p>The second sense of being out of bounds is computing an address that's
+     beyond the actual underlying allocated object.</p>
+
+  <p>With the <tt>inbounds</tt> keyword, the result value of the GEP is
+     undefined if the address is outside the actual underlying allocated
+     object and not the address one-past-the-end.</p>
+
+  <p>Without the <tt>inbounds</tt> keyword, there are no restrictions
+     on computing out-of-bounds addresses. Obviously, performing a load or
+     a store requires an address of allocated and sufficiently aligned
+     memory. But the GEP itself is only concerned with computing addresses.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+  <a name="negative"><b>Can array indices be negative?</b></a>
+</div>
+<div class="doc_text">
+  <p>Yes. This is basically a special case of array indices being out
+     of bounds.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+  <a name="compare"><b>Can I compare two values computed with GEPs?</b></a>
+</div>
+<div class="doc_text">
+  <p>Yes. If both addresses are within the same allocated object, or 
+     one-past-the-end, you'll get the comparison result you expect. If either
+     is outside of it, integer arithmetic wrapping may occur, so the
+     comparison may not be meaningful.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+  <a name="types"><b>Can I do GEP with a different pointer type than the type of
+                     the underlying object?</b></a>
+</div>
+<div class="doc_text">
+  <p>Yes. There are no restrictions on bitcasting a pointer value to an arbitrary
+     pointer type. The types in a GEP serve only to define the parameters for the
+     underlying integer computation. They need not correspond with the actual
+     type of the underlying object.</p>
+
+  <p>Furthermore, loads and stores don't have to use the same types as the type
+     of the underlying object. Types in this context serve only to specify
+     memory size and alignment. Beyond that there are merely a hint to the
+     optimizer indicating how the value will likely be used.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+  <a name="null"><b>Can I cast an object's address to integer and add it
+                    to null?</b></a>
+</div>
+<div class="doc_text">
+  <p>You can compute an address that way, but if you use GEP to do the add,
+     you can't use that pointer to actually access the object, unless the
+     object is managed outside of LLVM.</p>
+
+  <p>The underlying integer computation is sufficiently defined; null has a
+     defined value -- zero -- and you can add whatever value you want to it.</p>
+
+  <p>However, it's invalid to access (load from or store to) an LLVM-aware
+     object with such a pointer. This includes GlobalVariables, Allocas, and
+     objects pointed to by noalias pointers.</p>
+
+  <p>If you really need this functionality, you can do the arithmetic with
+     explicit integer instructions, and use inttoptr to convert the result to
+     an address. Most of GEP's special aliasing rules do not apply to pointers
+     computed from ptrtoint, arithmetic, and inttoptr sequences.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+  <a name="ptrdiff"><b>Can I compute the distance between two objects, and add
+                       that value to one address to compute the other address?</b></a>
+</div>
+<div class="doc_text">
+  <p>As with arithmetic on null, You can use GEP to compute an address that
+     way, but you can't use that pointer to actually access the object if you
+     do, unless the object is managed outside of LLVM.</p>
+
+  <p>Also as above, ptrtoint and inttoptr provide an alternative way to do this
+     which do not have this restriction.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+  <a name="tbaa"><b>Can I do type-based alias analysis on LLVM IR?</b></a>
+</div>
+<div class="doc_text">
+  <p>You can't do type-based alias analysis using LLVM's built-in type system,
+     because LLVM has no restrictions on mixing types in addressing, loads or
+     stores.</p>
+
+  <p>It would be possible to add special annotations to the IR, probably using
+     metadata, to describe a different type system (such as the C type system),
+     and do type-based aliasing on top of that. This is a much bigger
+     undertaking though.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<div class="doc_subsection">
+  <a name="overflow"><b>What happens if a GEP computation overflows?</b></a>
+</div>
+<div class="doc_text">
+   <p>If the GEP lacks the <tt>inbounds</tt> keyword, the value is the result
+      from evaluating the implied two's complement integer computation. However,
+      since there's no guarantee of where an object will be allocated in the
+      address space, such values have limited meaning.</p>
+
+  <p>If the GEP has the <tt>inbounds</tt> keyword, the result value is
+     undefined (a "<a href="LangRef.html#trapvalues">trap value</a>") if the GEP
+     overflows (i.e. wraps around the end of the address space).</p>
+  
+  <p>As such, there are some ramifications of this for inbounds GEPs: scales
+     implied by array/vector/pointer indices are always known to be "nsw" since
+     they are signed values that are scaled by the element size.  These values
+     are also allowed to be negative (e.g. "gep i32 *%P, i32 -1") but the
+     pointer itself is logically treated as an unsigned value.  This means that
+     GEPs have an asymmetric relation between the pointer base (which is treated
+     as unsigned) and the offset applied to it (which is treated as signed). The
+     result of the additions within the offset calculation cannot have signed
+     overflow, but when applied to the base pointer, there can be signed
+     overflow.
+  </p>
+  
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<div class="doc_subsection">
+  <a name="check"><b>How can I tell if my front-end is following the
+                     rules?</b></a>
+</div>
+<div class="doc_text">
+   <p>There is currently no checker for the getelementptr rules. Currently,
+      the only way to do this is to manually check each place in your front-end
+      where GetElementPtr operators are created.</p>
+
+   <p>It's not possible to write a checker which could find all rule
+      violations statically. It would be possible to write a checker which
+      works by instrumenting the code with dynamic checks though. Alternatively,
+      it would be possible to write a static checker which catches a subset of
+      possible problems. However, no such checker exists today.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="rationale"><b>Rationale</b></a></div>
+<!-- *********************************************************************** -->
+
+<!-- *********************************************************************** -->
+
+<div class="doc_subsection">
+  <a name="goals"><b>Why is GEP designed this way?</b></a>
+</div>
+<div class="doc_text">
+   <p>The design of GEP has the following goals, in rough unofficial
+      order of priority:</p>
+   <ul>
+     <li>Support C, C-like languages, and languages which can be
+         conceptually lowered into C (this covers a lot).</li>
+     <li>Support optimizations such as those that are common in
+         C compilers. In particular, GEP is a cornerstone of LLVM's
+         <a href="LangRef.html#pointeraliasing">pointer aliasing model</a>.</li>
+     <li>Provide a consistent method for computing addresses so that
+         address computations don't need to be a part of load and
+         store instructions in the IR.</li>
+     <li>Support non-C-like languages, to the extent that it doesn't
+         interfere with other goals.</li>
+     <li>Minimize target-specific information in the IR.</li>
+   </ul>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_subsection">
+  <a name="i32"><b>Why do struct member indices always use i32?</b></a>
+</div>
+<div class="doc_text">
+  <p>The specific type i32 is probably just a historical artifact, however it's
+     wide enough for all practical purposes, so there's been no need to change it.
+     It doesn't necessarily imply i32 address arithmetic; it's just an identifier
+     which identifies a field in a struct. Requiring that all struct indices be
+     the same reduces the range of possibilities for cases where two GEPs are
+     effectively the same but have distinct operand types.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<div class="doc_subsection">
+  <a name="uglygep"><b>What's an uglygep?</b></a>
+</div>
+<div class="doc_text">
+  <p>Some LLVM optimizers operate on GEPs by internally lowering them into
+     more primitive integer expressions, which allows them to be combined
+     with other integer expressions and/or split into multiple separate
+     integer expressions. If they've made non-trivial changes, translating
+     back into LLVM IR can involve reverse-engineering the structure of
+     the addressing in order to fit it into the static type of the original
+     first operand. It isn't always possibly to fully reconstruct this
+     structure; sometimes the underlying addressing doesn't correspond with
+     the static type at all. In such cases the optimizer instead will emit
+     a GEP with the base pointer casted to a simple address-unit pointer,
+     using the name "uglygep". This isn't pretty, but it's just as
+     valid, and it's sufficient to preserve the pointer aliasing guarantees
+     that GEP provides.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="summary"><b>Summary</b></a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+  <p>In summary, here's some things to always remember about the GetElementPtr
+  instruction:</p>
+  <ol>
+    <li>The GEP instruction never accesses memory, it only provides pointer
+    computations.</li>
+    <li>The first operand to the GEP instruction is always a pointer and it must
+    be indexed.</li>
+    <li>There are no superfluous indices for the GEP instruction.</li>
+    <li>Trailing zero indices are superfluous for pointer aliasing, but not for
+    the types of the pointers.</li>
+    <li>Leading zero indices are not superfluous for pointer aliasing nor the
+    types of the pointers.</li>
+  </ol>
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br/>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/GettingStarted.html b/final/docs/GettingStarted.html
new file mode 100644
index 00000000000..9fa8a7793df
--- /dev/null
+++ b/final/docs/GettingStarted.html
@@ -0,0 +1,1702 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>Getting Started with LLVM System</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+  Getting Started with the LLVM System  
+</div>
+
+<ul>
+  <li><a href="#overview">Overview</a>
+  <li><a href="#quickstart">Getting Started Quickly (A Summary)</a>
+  <li><a href="#requirements">Requirements</a>
+    <ol>
+      <li><a href="#hardware">Hardware</a></li>
+      <li><a href="#software">Software</a></li>
+      <li><a href="#brokengcc">Broken versions of GCC and other tools</a></li>
+    </ol></li>
+
+  <li><a href="#starting">Getting Started with LLVM</a>
+    <ol>
+      <li><a href="#terminology">Terminology and Notation</a></li>
+      <li><a href="#environment">Setting Up Your Environment</a></li>
+      <li><a href="#unpack">Unpacking the LLVM Archives</a></li>
+      <li><a href="#checkout">Checkout LLVM from Subversion</a></li>
+      <li><a href="#git_mirror">LLVM GIT mirror</a></li>
+      <li><a href="#installcf">Install the GCC Front End</a></li>
+      <li><a href="#config">Local LLVM Configuration</a></li>
+      <li><a href="#compile">Compiling the LLVM Suite Source Code</a></li>
+      <li><a href="#cross-compile">Cross-Compiling LLVM</a></li>
+      <li><a href="#objfiles">The Location of LLVM Object Files</a></li>
+      <li><a href="#optionalconfig">Optional Configuration Items</a></li>
+    </ol></li>
+
+  <li><a href="#layout">Program layout</a>
+    <ol>
+      <li><a href="#examples"><tt>llvm/examples</tt></a></li>
+      <li><a href="#include"><tt>llvm/include</tt></a></li>
+      <li><a href="#lib"><tt>llvm/lib</tt></a></li>
+      <li><a href="#projects"><tt>llvm/projects</tt></a></li>
+      <li><a href="#runtime"><tt>llvm/runtime</tt></a></li>
+      <li><a href="#test"><tt>llvm/test</tt></a></li>
+      <li><a href="#test-suite"><tt>test-suite</tt></a></li>
+      <li><a href="#tools"><tt>llvm/tools</tt></a></li>
+      <li><a href="#utils"><tt>llvm/utils</tt></a></li>
+    </ol></li>
+
+  <li><a href="#tutorial">An Example Using the LLVM Tool Chain</a>
+      <ol>
+         <li><a href="#tutorial4">Example with llvm-gcc4</a></li>
+      </ol>
+  <li><a href="#problems">Common Problems</a>
+  <li><a href="#links">Links</a>
+</ul>
+
+<div class="doc_author">
+  <p>Written by: 
+    <a href="mailto:criswell@uiuc.edu">John Criswell</a>, 
+    <a href="mailto:sabre@nondot.org">Chris Lattner</a>,
+    <a href="http://misha.brukman.net">Misha Brukman</a>, 
+    <a href="http://www.cs.uiuc.edu/~vadve">Vikram Adve</a>, and
+    <a href="mailto:gshi1@uiuc.edu">Guochun Shi</a>.
+  </p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="overview"><b>Overview</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to LLVM! In order to get started, you first need to know some
+basic information.</p>
+
+<p>First, LLVM comes in three pieces. The first piece is the LLVM
+suite. This contains all of the tools, libraries, and header files
+needed to use the low level virtual machine.  It contains an
+assembler, disassembler, bitcode analyzer and bitcode optimizer.  It
+also contains basic regression tests that can be used to test the LLVM
+tools and the GCC front end.</p>
+
+<p>The second piece is the GCC front end.  This component provides a version of
+GCC that compiles C and C++ code into LLVM bitcode.  Currently, the GCC front
+end uses the GCC parser to convert code to LLVM.  Once
+compiled into LLVM bitcode, a program can be manipulated with the LLVM tools
+from the LLVM suite.</p>
+
+<p>
+There is a third, optional piece called Test Suite.  It is a suite of programs
+with a testing harness that can be used to further test LLVM's functionality
+and performance.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="quickstart"><b>Getting Started Quickly (A Summary)</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Here's the short story for getting up and running quickly with LLVM:</p>
+
+<ol>
+  <li>Read the documentation.</li>
+  <li>Read the documentation.</li>
+  <li>Remember that you were warned twice about reading the documentation.</li>
+  <li>Install the llvm-gcc-4.2 front end if you intend to compile C or C++
+      (see <a href="#installcf">Install the GCC Front End</a> for details):</li>
+    <ol>
+      <li><tt>cd <i>where-you-want-the-C-front-end-to-live</i></tt></li>
+      <li><tt>gunzip --stdout llvm-gcc-4.2-<i>version</i>-<i>platform</i>.tar.gz | tar -xvf -</tt></li>
+	  <li><tt><i>install-binutils-binary-from-MinGW</i></tt> (Windows only)</li>
+	  <li>Note: If the binary extension is "<tt>.bz</tt>" use <tt>bunzip2</tt> instead of <tt>gunzip</tt>.</li>
+	  <li>Note: On Windows, use <a href="http://www.7-zip.org">7-Zip</a> or a similar archiving tool.</li>
+	  <li>Add <tt>llvm-gcc</tt>'s "<tt>bin</tt>" directory to your <tt>PATH</tt> environment variable.</li>
+    </ol></li>
+
+  <li>Get the LLVM Source Code
+  <ul>
+    <li>With the distributed files (or use <a href="#checkout">SVN</a>):
+    <ol>
+      <li><tt>cd <i>where-you-want-llvm-to-live</i></tt>
+      <li><tt>gunzip --stdout llvm-<i>version</i>.tar.gz | tar -xvf -</tt>
+    </ol></li>
+
+  </ul></li>
+
+  <li><b>[Optional]</b> Get the Test Suite Source Code 
+  <ul>
+    <li>With the distributed files (or use <a href="#checkout">SVN</a>):
+    <ol>
+      <li><tt>cd <i>where-you-want-llvm-to-live</i></tt>
+      <li><tt>cd llvm/projects</tt>
+      <li><tt>gunzip --stdout llvm-test-<i>version</i>.tar.gz | tar -xvf -</tt>
+      <li><tt>mv llvm-test-<i>version</i> test-suite</tt>
+    </ol></li>
+
+  </ul></li>
+
+
+  <li>Configure the LLVM Build Environment
+  <ol>
+    <li><tt>cd <i>where-you-want-to-build-llvm</i></tt></li>
+    <li><tt><i>/path/to/llvm/</i>configure [options]</tt><br>
+    Some common options:
+
+      <ul>
+        <li><tt>--prefix=<i>directory</i></tt>
+        <p>Specify for <i>directory</i> the full pathname of where you
+        want the LLVM tools and libraries to be installed (default
+        <tt>/usr/local</tt>).</p></li>
+        <li><tt>--with-llvmgccdir=<i>directory</i></tt>
+        <p>Optionally, specify for <i>directory</i> the full pathname of the 
+        C/C++ front end installation to use with this LLVM configuration. If
+        not specified, the PATH will be searched.  This is only needed if you
+        want to run test-suite or do some special kinds of LLVM builds.</p></li>
+        <li><tt>--enable-spec2000=<i>directory</i></tt>
+            <p>Enable the SPEC2000 benchmarks for testing.  The SPEC2000
+            benchmarks should be available in
+            <tt><i>directory</i></tt>.</p></li>
+      </ul>
+  </ol></li>
+
+  <li>Build the LLVM Suite:
+  <ol>
+      <li><tt>gmake -k |&amp; tee gnumake.out
+      &nbsp;&nbsp;&nbsp;# this is csh or tcsh syntax</tt></li>
+      <li>If you get an "internal compiler error (ICE)" or test failures, see 
+          <a href="#brokengcc">below</a>.</li>
+  </ol>
+
+</ol>
+
+<p>Consult the <a href="#starting">Getting Started with LLVM</a> section for
+detailed information on configuring and compiling LLVM.  See <a
+href="#environment">Setting Up Your Environment</a> for tips that simplify
+working with the GCC front end and LLVM tools.  Go to <a href="#layout">Program
+Layout</a> to learn about the layout of the source code tree.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="requirements"><b>Requirements</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Before you begin to use the LLVM system, review the requirements given below.
+This may save you some trouble by knowing ahead of time what hardware and
+software you will need.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="hardware"><b>Hardware</b></a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM is known to work on the following platforms:</p>
+
+<table cellpadding="3" summary="Known LLVM platforms">
+<tr>
+  <th>OS</th>
+  <th>Arch</th>
+  <th>Compilers</th>
+</tr>
+<tr>
+  <td>AuroraUX</td>
+  <td>x86<sup><a href="#pf_1">1</a></sup></td>
+  <td>GCC</td>
+</tr>
+<tr>
+  <td>Linux</td>
+  <td>x86<sup><a href="#pf_1">1</a></sup></td>
+  <td>GCC</td>
+</tr>
+<tr>
+  <td>Linux</td>
+  <td>amd64</td>
+  <td>GCC</td>
+</tr>
+<tr>
+  <td>Solaris</td>
+  <td>V9 (Ultrasparc)</td>
+  <td>GCC</td>
+</tr>
+<tr>
+  <td>FreeBSD</td>
+  <td>x86<sup><a href="#pf_1">1</a></sup></td>
+  <td>GCC</td>
+</tr>
+<tr>
+  <td>FreeBSD</td>
+  <td>amd64</td>
+  <td>GCC</td>
+</tr>
+<tr>
+  <td>MacOS X<sup><a href="#pf_2">2</a></sup></td>
+  <td>PowerPC</td>
+  <td>GCC</td>
+</tr>
+<tr>
+  <td>MacOS X<sup><a href="#pf_2">2</a>,<a href="#pf_9">9</a></sup></td>
+  <td>x86</td>
+  <td>GCC</td>
+</tr>
+<tr>
+  <td>Cygwin/Win32</td>
+  <td>x86<sup><a href="#pf_1">1</a>,<a href="#pf_8">8</a>,
+     <a href="#pf_11">11</a></sup></td>
+  <td>GCC 3.4.X, binutils 2.20</td>
+</tr>
+<tr>
+  <td>MinGW/Win32</td>
+  <td>x86<sup><a href="#pf_1">1</a>,<a href="#pf_6">6</a>,
+     <a href="#pf_8">8</a>, <a href="#pf_10">10</a></sup></td>
+  <td>GCC 3.4.X, binutils 2.20</td>
+</tr>
+</table>
+
+<p>LLVM has partial support for the following platforms:</p>
+
+<table summary="LLVM partial platform support">
+<tr>
+  <th>OS</th>
+  <th>Arch</th>
+  <th>Compilers</th>
+</tr>
+<tr>
+  <td>Windows</td>
+  <td>x86<sup><a href="#pf_1">1</a></sup></td>
+  <td>Visual Studio 2005 SP1 or higher<sup><a href="#pf_4">4</a>,<a href="#pf_5">5</a></sup></td>
+<tr>
+  <td>AIX<sup><a href="#pf_3">3</a>,<a href="#pf_4">4</a></sup></td>
+  <td>PowerPC</td>
+  <td>GCC</td>
+</tr>
+<tr>
+  <td>Linux<sup><a href="#pf_3">3</a>,<a href="#pf_5">5</a></sup></td>
+  <td>PowerPC</td>
+  <td>GCC</td>
+</tr>
+
+<tr>
+  <td>Linux<sup><a href="#pf_7">7</a></sup></td>
+  <td>Alpha</td>
+  <td>GCC</td>
+</tr>
+<tr>
+  <td>Linux<sup><a href="#pf_7">7</a></sup></td>
+  <td>Itanium (IA-64)</td>
+  <td>GCC</td>
+</tr>
+<tr>
+  <td>HP-UX<sup><a href="#pf_7">7</a></sup></td>
+  <td>Itanium (IA-64)</td>
+  <td>HP aCC</td>
+</tr>
+</table>
+
+<p><b>Notes:</b></p>
+
+<div class="doc_notes">
+<ol>
+<li><a name="pf_1">Code generation supported for Pentium processors and
+up</a></li>
+<li><a name="pf_2">Code generation supported for 32-bit ABI only</a></li>
+<li><a name="pf_3">No native code generation</a></li>
+<li><a name="pf_4">Build is not complete: one or more tools do not link or function</a></li>
+<li><a name="pf_5">The GCC-based C/C++ frontend does not build</a></li>
+<li><a name="pf_6">The port is done using the MSYS shell.</a></li>
+<li><a name="pf_7">Native code generation exists but is not complete.</a></li>
+<li><a name="pf_8">Binutils 2.20 or later is required to build the assembler
+    generated by LLVM properly.</a></li>
+<li><a name="pf_9">XCode 2.5 and gcc 4.0.1</a> (Apple Build 5370) will trip
+    internal LLVM assert messages when compiled for Release at optimization
+    levels greater than 0 (i.e., <i>"-O1"</i> and higher).
+    Add <i>OPTIMIZE_OPTION="-O0"</i> to the build command line
+    if compiling for LLVM Release or bootstrapping the LLVM toolchain.</li>
+<li><a name="pf_10">For MSYS/MinGW on Windows, be sure to install the MSYS
+    version of the perl package, and be sure it appears in your path
+    before any Windows-based versions such as Strawberry Perl and
+    ActivePerl, as these have Windows-specifics that will cause the
+    build to fail.</a></li>
+<li><a name="pf_11">In general, LLVM modules requiring dynamic linking can
+    not be built on Windows. However, you can build LLVM tools using
+    <i>"make tools-only"</i>.</li>
+</ol>
+</div>
+
+<p>Note that you will need about 1-3 GB of space for a full LLVM build in Debug
+mode, depending on the system (it is so large because of all the debugging
+information and the fact that the libraries are statically linked into multiple
+tools).  If you do not need many of the tools and you are space-conscious, you
+can pass <tt>ONLY_TOOLS="tools you need"</tt> to make.  The Release build
+requires considerably less space.</p>
+
+<p>The LLVM suite <i>may</i> compile on other platforms, but it is not
+guaranteed to do so.  If compilation is successful, the LLVM utilities should be
+able to assemble, disassemble, analyze, and optimize LLVM bitcode.  Code
+generation should work as well, although the generated native code may not work
+on your platform.</p>
+
+<p>The GCC front end is not very portable at the moment.  If you want to get it
+to work on another platform, you can download a copy of the source and <a
+href="GCCFEBuildInstrs.html">try to compile it</a> on your platform.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="software"><b>Software</b></a></div>
+<div class="doc_text">
+  <p>Compiling LLVM requires that you have several software packages 
+  installed. The table below lists those required packages. The Package column
+  is the usual name for the software package that LLVM depends on. The Version
+  column provides "known to work" versions of the package. The Notes column
+  describes how LLVM uses the package and provides other details.</p>
+  <table summary="Packages required to compile LLVM">
+    <tr><th>Package</th><th>Version</th><th>Notes</th></tr>
+
+    <tr>
+      <td><a href="http://savannah.gnu.org/projects/make">GNU Make</a></td>
+      <td>3.79, 3.79.1</td>
+      <td>Makefile/build processor</td>
+    </tr>
+
+    <tr>
+      <td><a href="http://gcc.gnu.org">GCC</a></td>
+      <td>3.4.2</td>
+      <td>C/C++ compiler<sup><a href="#sf1">1</a></sup></td>
+    </tr>
+
+    <tr>
+      <td><a href="http://www.gnu.org/software/texinfo">TeXinfo</a></td>
+      <td>4.5</td>
+      <td>For building the CFE</td>
+    </tr>
+
+    <tr>
+      <td><a href="http://subversion.tigris.org/project_packages.html">SVN</a></td>
+      <td>&ge;1.3</td>
+      <td>Subversion access to LLVM<sup><a href="#sf2">2</a></sup></td>
+    </tr>
+
+    <tr>
+      <td><a href="http://savannah.gnu.org/projects/dejagnu">DejaGnu</a></td>
+      <td>1.4.2</td>
+      <td>Automated test suite<sup><a href="#sf3">3</a></sup></td>
+    </tr>
+
+    <tr>
+      <td><a href="http://www.tcl.tk/software/tcltk/">tcl</a></td>
+      <td>8.3, 8.4</td>
+      <td>Automated test suite<sup><a href="#sf3">3</a></sup></td>
+    </tr>
+
+    <tr>
+      <td><a href="http://expect.nist.gov/">expect</a></td>
+      <td>5.38.0</td>
+      <td>Automated test suite<sup><a href="#sf3">3</a></sup></td>
+    </tr>
+
+    <tr>
+      <td><a href="http://www.perl.com/download.csp">perl</a></td>
+      <td>&ge;5.6.0</td>
+      <td>Nightly tester, utilities</td>
+    </tr>
+
+    <tr>
+      <td><a href="http://savannah.gnu.org/projects/m4">GNU M4</a>
+      <td>1.4</td>
+      <td>Macro processor for configuration<sup><a href="#sf4">4</a></sup></td>
+    </tr>
+
+    <tr>
+      <td><a href="http://www.gnu.org/software/autoconf">GNU Autoconf</a></td>
+      <td>2.60</td>
+      <td>Configuration script builder<sup><a href="#sf4">4</a></sup></td>
+    </tr>
+
+    <tr>
+      <td><a href="http://www.gnu.org/software/automake">GNU Automake</a></td>
+      <td>1.9.6</td>
+      <td>aclocal macro generator<sup><a href="#sf4">4</a></sup></td>
+    </tr>
+
+    <tr>
+      <td><a href="http://savannah.gnu.org/projects/libtool">libtool</a></td>
+      <td>1.5.22</td>
+      <td>Shared library manager<sup><a href="#sf4">4</a></sup></td>
+    </tr>
+
+  </table>
+
+  <p><b>Notes:</b></p>
+  <div class="doc_notes">
+  <ol>
+    <li><a name="sf1">Only the C and C++ languages are needed so there's no
+      need to build the other languages for LLVM's purposes.</a> See 
+      <a href="#brokengcc">below</a> for specific version info.</li>
+    <li><a name="sf2">You only need Subversion if you intend to build from the 
+      latest LLVM sources. If you're working from a release distribution, you
+      don't need Subversion.</a></li>
+    <li><a name="sf3">Only needed if you want to run the automated test 
+      suite in the <tt>llvm/test</tt> directory.</a></li>
+    <li><a name="sf4">If you want to make changes to the configure scripts, 
+      you will need GNU autoconf (2.60), and consequently, GNU M4 (version 1.4 
+      or higher). You will also need automake (1.9.6). We only use aclocal 
+      from that package.</a></li>
+  </ol>
+  </div>
+  
+  <p>Additionally, your compilation host is expected to have the usual 
+  plethora of Unix utilities. Specifically:</p>
+  <ul>
+    <li><b>ar</b> - archive library builder</li>
+    <li><b>bzip2*</b> - bzip2 command for distribution generation</li>
+    <li><b>bunzip2*</b> - bunzip2 command for distribution checking</li>
+    <li><b>chmod</b> - change permissions on a file</li>
+    <li><b>cat</b> - output concatenation utility</li>
+    <li><b>cp</b> - copy files</li>
+    <li><b>date</b> - print the current date/time </li>
+    <li><b>echo</b> - print to standard output</li>
+    <li><b>egrep</b> - extended regular expression search utility</li>
+    <li><b>find</b> - find files/dirs in a file system</li>
+    <li><b>grep</b> - regular expression search utility</li>
+    <li><b>gzip*</b> - gzip command for distribution generation</li>
+    <li><b>gunzip*</b> - gunzip command for distribution checking</li>
+    <li><b>install</b> - install directories/files </li>
+    <li><b>mkdir</b> - create a directory</li>
+    <li><b>mv</b> - move (rename) files</li>
+    <li><b>ranlib</b> - symbol table builder for archive libraries</li>
+    <li><b>rm</b> - remove (delete) files and directories</li>
+    <li><b>sed</b> - stream editor for transforming output</li>
+    <li><b>sh</b> - Bourne shell for make build scripts</li>
+    <li><b>tar</b> - tape archive for distribution generation</li>
+    <li><b>test</b> - test things in file system</li>
+    <li><b>unzip*</b> - unzip command for distribution checking</li>
+    <li><b>zip*</b> - zip command for distribution generation</li>
+  </ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="brokengcc">Broken versions of GCC and other tools</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM is very demanding of the host C++ compiler, and as such tends to expose
+bugs in the compiler.  In particular, several versions of GCC crash when trying
+to compile LLVM.  We routinely use GCC 3.3.3, 3.4.0, and Apple 4.0.1 
+successfully with them (however, see important notes below).  Other versions 
+of GCC will probably work as well.  GCC versions listed
+here are known to not work.  If you are using one of these versions, please try
+to upgrade your GCC to something more recent.  If you run into a problem with a
+version of GCC not listed here, please <a href="mailto:llvmdev@cs.uiuc.edu">let
+us know</a>.  Please use the "<tt>gcc -v</tt>" command to find out which version
+of GCC you are using.
+</p>
+
+<p><b>GCC versions prior to 3.0</b>: GCC 2.96.x and before had several
+problems in the STL that effectively prevent it from compiling LLVM.
+</p>
+
+<p><b>GCC 3.2.2 and 3.2.3</b>: These versions of GCC fails to compile LLVM with
+a bogus template error.  This was fixed in later GCCs.</p>
+
+<p><b>GCC 3.3.2</b>: This version of GCC suffered from a <a 
+href="http://gcc.gnu.org/PR13392">serious bug</a> which causes it to crash in
+the "<tt>convert_from_eh_region_ranges_1</tt>" GCC function.</p>
+
+<p><b>Cygwin GCC 3.3.3</b>: The version of GCC 3.3.3 commonly shipped with 
+   Cygwin does not work.  Please <a href="GCCFEBuildInstrs.html#cygwin">upgrade 
+   to a newer version</a> if possible.</p>
+<p><b>SuSE GCC 3.3.3</b>: The version of GCC 3.3.3 shipped with SuSE 9.1 (and 
+   possibly others) does not compile LLVM correctly (it appears that exception 
+   handling is broken in some cases).  Please download the FSF 3.3.3 or upgrade
+   to a newer version of GCC.</p>
+<p><b>GCC 3.4.0 on linux/x86 (32-bit)</b>: GCC miscompiles portions of the 
+   code generator, causing an infinite loop in the llvm-gcc build when built
+   with optimizations enabled (i.e. a release build).</p>
+<p><b>GCC 3.4.2 on linux/x86 (32-bit)</b>: GCC miscompiles portions of the 
+   code generator at -O3, as with 3.4.0.  However gcc 3.4.2 (unlike 3.4.0)
+   correctly compiles LLVM at -O2.  A work around is to build release LLVM
+   builds with "make ENABLE_OPTIMIZED=1 OPTIMIZE_OPTION=-O2 ..."</p>
+<p><b>GCC 3.4.x on X86-64/amd64</b>: GCC <a href="http://llvm.org/PR1056">
+   miscompiles portions of LLVM</a>.</p>
+<p><b>GCC 3.4.4 (CodeSourcery ARM 2005q3-2)</b>: this compiler miscompiles LLVM
+   when building with optimizations enabled.  It appears to work with 
+   "<tt>make ENABLE_OPTIMIZED=1 OPTIMIZE_OPTION=-O1</tt>" or build a debug
+   build.</p>
+<p><b>IA-64 GCC 4.0.0</b>: The IA-64 version of GCC 4.0.0 is known to
+   miscompile LLVM.</p>
+<p><b>Apple Xcode 2.3</b>: GCC crashes when compiling LLVM at -O3 (which is the
+   default with ENABLE_OPTIMIZED=1.  To work around this, build with 
+   "ENABLE_OPTIMIZED=1 OPTIMIZE_OPTION=-O2".</p>
+<p><b>GCC 4.1.1</b>: GCC fails to build LLVM with template concept check errors
+      compiling some files.  At the time of this writing, GCC mainline (4.2)
+      did not share the problem.</p>
+<p><b>GCC 4.1.1 on X86-64/amd64</b>: GCC <a href="http://llvm.org/PR1063">
+   miscompiles portions of LLVM</a> when compiling llvm itself into 64-bit 
+   code.  LLVM will appear to mostly work but will be buggy, e.g. failing 
+   portions of its testsuite.</p>
+<p><b>GCC 4.1.2 on OpenSUSE</b>: Seg faults during libstdc++ build and on x86_64
+platforms compiling md5.c gets a mangled constant.</p>
+<p><b>GCC 4.1.2 (20061115 (prerelease) (Debian 4.1.1-21)) on Debian</b>: Appears
+to miscompile parts of LLVM 2.4. One symptom is ValueSymbolTable complaining
+about symbols remaining in the table on destruction.</p>
+<p><b>GCC 4.1.2 20071124 (Red Hat 4.1.2-42)</b>: Suffers from the same symptoms
+as the previous one. It appears to work with ENABLE_OPTIMIZED=0 (the default).</p>
+<p><b>Cygwin GCC 4.3.2 20080827 (beta) 2</b>:
+  Users <a href="http://llvm.org/PR4145">reported</a> various problems related
+  with link errors when using this GCC version.</p>
+<p><b>Debian GCC 4.3.2 on X86</b>: Crashes building some files in LLVM 2.6.</p>
+<p><b>GCC 4.3.3 (Debian 4.3.3-10) on ARM</b>: Miscompiles parts of LLVM 2.6
+when optimizations are turned on. The symptom is an infinite loop in
+FoldingSetImpl::RemoveNode while running the code generator.</p>
+<p><b>GCC 4.3.5 and GCC 4.4.5 on ARM</b>: These can miscompile <tt>value >>
+1</tt> even at -O0. A test failure in <tt>test/Assembler/alignstack.ll</tt> is
+one symptom of the problem.
+<p><b>GNU ld 2.16.X</b>. Some 2.16.X versions of the ld linker will produce very
+long warning messages complaining that some ".gnu.linkonce.t.*" symbol was
+defined in a discarded section. You can safely ignore these messages as they are
+erroneous and the linkage is correct.  These messages disappear using ld
+2.17.</p>
+
+<p><b>GNU binutils 2.17</b>: Binutils 2.17 contains <a 
+href="http://sourceware.org/bugzilla/show_bug.cgi?id=3111">a bug</a> which
+causes huge link times (minutes instead of seconds) when building LLVM.  We
+recommend upgrading to a newer version (2.17.50.0.4 or later).</p>
+
+<p><b>GNU Binutils 2.19.1 Gold</b>: This version of Gold contained
+<a href="http://sourceware.org/bugzilla/show_bug.cgi?id=9836">a bug</a>
+which causes intermittent failures when building LLVM with position independent
+code.  The symptom is an error about cyclic dependencies.  We recommend
+upgrading to a newer version of Gold.</p>
+
+</div>
+
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="starting"><b>Getting Started with LLVM</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The remainder of this guide is meant to get you up and running with
+LLVM and to give you some basic information about the LLVM environment.</p>
+
+<p>The later sections of this guide describe the <a
+href="#layout">general layout</a> of the the LLVM source tree, a <a
+href="#tutorial">simple example</a> using the LLVM tool chain, and <a
+href="#links">links</a> to find more information about LLVM or to get
+help via e-mail.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="terminology">Terminology and Notation</a>
+</div>
+
+<div class="doc_text">
+
+<p>Throughout this manual, the following names are used to denote paths
+specific to the local system and working environment.  <i>These are not
+environment variables you need to set but just strings used in the rest
+of this document below</i>.  In any of the examples below, simply replace
+each of these names with the appropriate pathname on your local system.
+All these paths are absolute:</p>
+
+<dl>
+    <dt>SRC_ROOT
+    <dd>
+    This is the top level directory of the LLVM source tree.
+    <br><br>
+
+    <dt>OBJ_ROOT
+    <dd>
+    This is the top level directory of the LLVM object tree (i.e. the
+    tree where object files and compiled programs will be placed.  It
+    can be the same as SRC_ROOT).
+    <br><br>
+
+    <dt>LLVMGCCDIR
+    <dd>
+    This is where the LLVM GCC Front End is installed.
+    <p>
+    For the pre-built GCC front end binaries, the LLVMGCCDIR is
+    <tt>llvm-gcc/<i>platform</i>/llvm-gcc</tt>.
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="environment">Setting Up Your Environment</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+In order to compile and use LLVM, you may need to set some environment
+variables.
+
+<dl>
+  <dt><tt>LLVM_LIB_SEARCH_PATH</tt>=<tt>/path/to/your/bitcode/libs</tt></dt>
+  <dd>[Optional] This environment variable helps LLVM linking tools find the
+  locations of your bitcode libraries. It is provided only as a
+  convenience since you can specify the paths using the -L options of the
+  tools and the C/C++ front-end will automatically use the bitcode files
+  installed in its
+  <tt>lib</tt> directory.</dd>
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="unpack">Unpacking the LLVM Archives</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+If you have the LLVM distribution, you will need to unpack it before you
+can begin to compile it.  LLVM is distributed as a set of two files: the LLVM
+suite and the LLVM GCC front end compiled for your platform.  There is an
+additional test suite that is optional.  Each file is a TAR archive that is
+compressed with the gzip program.
+</p>
+
+<p>The files are as follows, with <em>x.y</em> marking the version number:
+<dl>
+  <dt><tt>llvm-x.y.tar.gz</tt></dt>
+  <dd>Source release for the LLVM libraries and tools.<br></dd>
+
+  <dt><tt>llvm-test-x.y.tar.gz</tt></dt>
+  <dd>Source release for the LLVM test-suite.</dd>
+
+  <dt><tt>llvm-gcc-4.2-x.y.source.tar.gz</tt></dt>
+  <dd>Source release of the llvm-gcc-4.2 front end.  See README.LLVM in the root
+      directory for build instructions.<br></dd>
+
+  <dt><tt>llvm-gcc-4.2-x.y-platform.tar.gz</tt></dt>
+  <dd>Binary release of the llvm-gcc-4.2 front end for a specific platform.<br></dd>
+
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="checkout">Checkout LLVM from Subversion</a>
+</div>
+
+<div class="doc_text">
+
+<p>If you have access to our Subversion repository, you can get a fresh copy of
+the entire source code.  All you need to do is check it out from Subversion as
+follows:</p>
+
+<ul>
+  <li><tt>cd <i>where-you-want-llvm-to-live</i></tt></li>
+  <li>Read-Only: <tt>svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm</tt></li>
+  <li>Read-Write:<tt>svn co https://user@llvm.org/svn/llvm-project/llvm/trunk
+    llvm</tt></li>
+</ul>
+
+
+<p>This will create an '<tt>llvm</tt>' directory in the current
+directory and fully populate it with the LLVM source code, Makefiles,
+test directories, and local copies of documentation files.</p>
+
+<p>If you want to get a specific release (as opposed to the most recent
+revision), you can checkout it from the '<tt>tags</tt>' directory (instead of
+'<tt>trunk</tt>'). The following releases are located in the following
+subdirectories of the '<tt>tags</tt>' directory:</p>
+
+<ul>
+<li>Release 2.9: <b>RELEASE_29</b></li>
+<li>Release 2.8: <b>RELEASE_28</b></li>
+<li>Release 2.7: <b>RELEASE_27</b></li>
+<li>Release 2.6: <b>RELEASE_26</b></li>
+<li>Release 2.5: <b>RELEASE_25</b></li>
+<li>Release 2.4: <b>RELEASE_24</b></li>
+<li>Release 2.3: <b>RELEASE_23</b></li>
+<li>Release 2.2: <b>RELEASE_22</b></li>
+<li>Release 2.1: <b>RELEASE_21</b></li>
+<li>Release 2.0: <b>RELEASE_20</b></li>
+<li>Release 1.9: <b>RELEASE_19</b></li>
+<li>Release 1.8: <b>RELEASE_18</b></li>
+<li>Release 1.7: <b>RELEASE_17</b></li>
+<li>Release 1.6: <b>RELEASE_16</b></li>
+<li>Release 1.5: <b>RELEASE_15</b></li>
+<li>Release 1.4: <b>RELEASE_14</b></li>
+<li>Release 1.3: <b>RELEASE_13</b></li>
+<li>Release 1.2: <b>RELEASE_12</b></li>
+<li>Release 1.1: <b>RELEASE_11</b></li>
+<li>Release 1.0: <b>RELEASE_1</b></li>
+</ul>
+
+<p>If you would like to get the LLVM test suite (a separate package as of 1.4),
+you get it from the Subversion repository:</p>
+
+<div class="doc_code">
+<pre>
+% cd llvm/projects
+% svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
+</pre>
+</div>
+
+<p>By placing it in the <tt>llvm/projects</tt>, it will be automatically
+configured by the LLVM configure script as well as automatically updated when
+you run <tt>svn update</tt>.</p>
+
+<p>If you would like to get the GCC front end source code, you can also get it 
+and build it yourself.  Please follow <a href="GCCFEBuildInstrs.html">these 
+instructions</a> to successfully get and build the LLVM GCC front-end.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="git_mirror">GIT mirror</a>
+</div>
+
+<div class="doc_text">
+
+<p>GIT mirrors are available for a number of LLVM subprojects. These mirrors
+  sync automatically with each Subversion commit and contain all necessary
+  git-svn marks (so, you can recreate git-svn metadata locally). Note that right
+  now mirrors reflect only <tt>trunk</tt> for each project. You can do the
+  read-only GIT clone of LLVM via: 
+<pre>
+% git clone http://llvm.org/git/llvm.git
+</pre>
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="installcf">Install the GCC Front End</a>
+</div>
+
+<div class="doc_text">
+
+<p>Before configuring and compiling the LLVM suite (or if you want to use just the LLVM
+GCC front end) you can optionally extract the front end from the binary distribution.
+It is used for running the LLVM test-suite and for compiling C/C++ programs.  Note that
+you can optionally <a href="GCCFEBuildInstrs.html">build llvm-gcc yourself</a> after building the
+main LLVM repository.</p>
+
+<p>To install the GCC front end, do the following (on Windows, use an archival tool
+like <a href="http://www.7-zip.org">7-zip</a> that understands gzipped tars):</p>
+
+<ol>
+  <li><tt>cd <i>where-you-want-the-front-end-to-live</i></tt></li>
+  <li><tt>gunzip --stdout llvm-gcc-4.2-<i>version</i>-<i>platform</i>.tar.gz | tar -xvf
+      -</tt></li>
+</ol>
+
+<p>Once the binary is uncompressed, if you're using a *nix-based system, add a symlink for
+<tt>llvm-gcc</tt> and <tt>llvm-g++</tt> to some directory in your path.  If you're using a
+Windows-based system, add the <tt>bin</tt> subdirectory of your front end installation directory
+to your <tt>PATH</tt> environment variable.  For example, if you uncompressed the binary to
+<tt>c:\llvm-gcc</tt>, add <tt>c:\llvm-gcc\bin</tt> to your <tt>PATH</tt>.</p>
+
+<p>If you now want to build LLVM from source, when you configure LLVM, it will 
+automatically detect <tt>llvm-gcc</tt>'s presence (if it is in your path) enabling its
+use in test-suite.  Note that you can always build or install <tt>llvm-gcc</tt> at any
+point after building the main LLVM repository: just reconfigure llvm and 
+test-suite will pick it up.
+</p>
+
+<p>As a convenience for Windows users, the front end binaries for MinGW/x86 include
+versions of the required w32api and mingw-runtime binaries.  The last remaining step for
+Windows users is to simply uncompress the binary binutils package from
+<a href="http://mingw.org/">MinGW</a> into your front end installation directory.  While the
+front end installation steps are not quite the same as a typical manual MinGW installation,
+they should be similar enough to those who have previously installed MinGW on Windows systems.</p>
+
+<p>To install binutils on Windows:</p>
+
+<ol>
+  <li><tt><i>download GNU Binutils from <a href="http://sourceforge.net/projects/mingw/files/">MinGW Downloads</a></i></tt></li>
+  <li><tt>cd <i>where-you-uncompressed-the-front-end</i></tt></li>
+  <li><tt><i>uncompress archived binutils directories (not the tar file) into the current directory</i></tt></li>
+</ol>
+
+<p>The binary versions of the LLVM GCC front end may not suit all of your needs.  For
+example, the binary distribution may include an old version of a system header
+file, not "fix" a header file that needs to be fixed for GCC, or it may be linked with
+libraries not available on your system.  In cases like these, you may want to try
+<a href="GCCFEBuildInstrs.html">building the GCC front end from source</a>.  Thankfully,
+this is much easier now than it was in the past.</p>
+
+<p>We also do not currently support updating of the GCC front end by manually overlaying
+newer versions of the w32api and mingw-runtime binary packages that may become available
+from MinGW.  At this time, it's best to think of the MinGW LLVM GCC front end binary as
+a self-contained convenience package that requires Windows users to simply download and
+uncompress the GNU Binutils binary package from the MinGW project.</p>
+
+<p>Regardless of your platform, if you discover that installing the LLVM GCC front end
+binaries is not as easy as previously described, or you would like to suggest improvements,
+please let us know how you would like to see things improved by dropping us a note on our
+<a href="http://llvm.org/docs/#maillist">mailing list</a>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="config">Local LLVM Configuration</a>
+</div>
+
+<div class="doc_text">
+
+  <p>Once checked out from the Subversion repository, the LLVM suite source 
+  code must be
+configured via the <tt>configure</tt> script.  This script sets variables in the
+various <tt>*.in</tt> files, most notably <tt>llvm/Makefile.config</tt> and 
+<tt>llvm/include/Config/config.h</tt>.  It also populates <i>OBJ_ROOT</i> with 
+the Makefiles needed to begin building LLVM.</p>
+
+<p>The following environment variables are used by the <tt>configure</tt>
+script to configure the build system:</p>
+
+<table summary="LLVM configure script environment variables">
+  <tr><th>Variable</th><th>Purpose</th></tr>
+  <tr>
+    <td>CC</td>
+    <td>Tells <tt>configure</tt> which C compiler to use.  By default,
+        <tt>configure</tt> will look for the first GCC C compiler in
+        <tt>PATH</tt>.  Use this variable to override
+        <tt>configure</tt>'s default behavior.</td>
+  </tr>
+  <tr>
+    <td>CXX</td>
+    <td>Tells <tt>configure</tt> which C++ compiler to use.  By default,
+       <tt>configure</tt> will look for the first GCC C++ compiler in
+       <tt>PATH</tt>.  Use this variable to override
+       <tt>configure</tt>'s default behavior.</td>
+  </tr>
+</table>
+
+<p>The following options can be used to set or enable LLVM specific options:</p>
+
+<dl>
+  <dt><i>--with-llvmgccdir</i></dt>
+  <dd>Path to the LLVM C/C++ FrontEnd to be used with this LLVM configuration. 
+  The value of this option should specify the full pathname of the C/C++ Front
+  End to be used. If this option is not provided, the PATH will be searched for
+  a program named <i>llvm-gcc</i> and the C/C++ FrontEnd install directory will
+  be inferred from the path found. If the option is not given, and no llvm-gcc
+  can be found in the path then a warning will be produced by 
+  <tt>configure</tt> indicating this situation. LLVM may still be built with 
+  the <tt>tools-only</tt> target but attempting to build the runtime libraries
+  will fail as these libraries require llvm-gcc and llvm-g++. See 
+  <a href="#installcf">Install the GCC Front End</a> for details on installing
+  the C/C++ Front End. See
+  <a href="GCCFEBuildInstrs.html">Bootstrapping the LLVM C/C++ Front-End</a>
+  for details on building the C/C++ Front End.</dd>
+  <dt><i>--with-tclinclude</i></dt>
+  <dd>Path to the tcl include directory under which <tt>tclsh</tt> can be
+  found. Use this if you have multiple tcl installations on your machine and you
+  want to use a specific one (8.x) for LLVM. LLVM only uses tcl for running the
+  dejagnu based test suite in <tt>llvm/test</tt>. If you don't specify this
+  option, the LLVM configure script will search for the tcl 8.4 and 8.3
+  releases.
+  <br><br>
+  </dd>
+  <dt><i>--enable-optimized</i></dt>
+  <dd>
+    Enables optimized compilation (debugging symbols are removed
+    and GCC optimization flags are enabled). Note that this is the default 
+    setting     if you are using the LLVM distribution. The default behavior 
+    of an Subversion checkout is to use an unoptimized build (also known as a 
+    debug build).
+    <br><br>
+  </dd>
+  <dt><i>--enable-debug-runtime</i></dt>
+  <dd>
+    Enables debug symbols in the runtime libraries. The default is to strip
+    debug symbols from the runtime libraries. 
+  </dd>
+  <dt><i>--enable-jit</i></dt>
+  <dd>
+    Compile the Just In Time (JIT) compiler functionality.  This is not
+    available
+    on all platforms.  The default is dependent on platform, so it is best
+    to explicitly enable it if you want it.
+    <br><br>
+  </dd>
+  <dt><i>--enable-targets=</i><tt>target-option</tt></dt>
+  <dd>Controls which targets will be built and linked into llc. The default 
+  value for <tt>target_options</tt> is "all" which builds and links all 
+  available targets.  The value "host-only" can be specified to build only a 
+  native compiler (no cross-compiler targets available). The "native" target is 
+  selected as the target of the build host. You can also specify a comma 
+  separated list of target names that you want available in llc. The target 
+  names use all lower case. The current set of targets is: <br>
+  <tt>alpha, ia64, powerpc, skeleton, sparc, x86</tt>.
+  <br><br></dd>
+  <dt><i>--enable-doxygen</i></dt>
+  <dd>Look for the doxygen program and enable construction of doxygen based
+  documentation from the source code. This is disabled by default because 
+  generating the documentation can take a long time and producess 100s of 
+  megabytes of output.</dd>
+  <dt><i>--with-udis86</i></dt>
+  <dd>LLVM can use external disassembler library for various purposes (now it's
+  used only for examining code produced by JIT). This option will enable usage
+  of <a href="http://udis86.sourceforge.net/">udis86</a> x86 (both 32 and 64
+  bits) disassembler library.</dd>
+</dl>
+
+<p>To configure LLVM, follow these steps:</p>
+
+<ol>
+    <li><p>Change directory into the object root directory:</p>
+
+    <div class="doc_code"><pre>% cd <i>OBJ_ROOT</i></pre></div></li>
+
+    <li><p>Run the <tt>configure</tt> script located in the LLVM source
+    tree:</p>
+
+    <div class="doc_code">
+    <pre>% <i>SRC_ROOT</i>/configure --prefix=/install/path [other options]</pre>
+    </div></li>
+</ol>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="compile">Compiling the LLVM Suite Source Code</a>
+</div>
+
+<div class="doc_text">
+
+<p>Once you have configured LLVM, you can build it.  There are three types of
+builds:</p>
+
+<dl>
+    <dt>Debug Builds
+    <dd>
+    These builds are the default when one is using an Subversion checkout and 
+    types <tt>gmake</tt> (unless the <tt>--enable-optimized</tt> option was 
+    used during configuration).  The build system will compile the tools and 
+    libraries with debugging information.  To get a Debug Build using the
+    LLVM distribution the <tt>--disable-optimized</tt> option must be passed
+    to <tt>configure</tt>.
+    <br><br>
+
+    <dt>Release (Optimized) Builds
+    <dd>
+    These builds are enabled with the <tt>--enable-optimized</tt> option to
+    <tt>configure</tt> or by specifying <tt>ENABLE_OPTIMIZED=1</tt> on the
+    <tt>gmake</tt> command line.  For these builds, the build system will
+    compile the tools and libraries with GCC optimizations enabled and strip
+    debugging information from the libraries and executables it generates. 
+    Note that Release Builds are default when using an LLVM distribution.
+    <br><br>
+
+    <dt>Profile Builds
+    <dd>
+    These builds are for use with profiling.  They compile profiling
+    information into the code for use with programs like <tt>gprof</tt>.
+    Profile builds must be started by specifying <tt>ENABLE_PROFILING=1</tt>
+    on the <tt>gmake</tt> command line.
+</dl>
+
+<p>Once you have LLVM configured, you can build it by entering the
+<i>OBJ_ROOT</i> directory and issuing the following command:</p>
+
+<div class="doc_code"><pre>% gmake</pre></div>
+
+<p>If the build fails, please <a href="#brokengcc">check here</a> to see if you
+are using a version of GCC that is known not to compile LLVM.</p>
+
+<p>
+If you have multiple processors in your machine, you may wish to use some of
+the parallel build options provided by GNU Make.  For example, you could use the
+command:</p>
+
+<div class="doc_code"><pre>% gmake -j2</pre></div>
+
+<p>There are several special targets which are useful when working with the LLVM
+source code:</p>
+
+<dl>
+  <dt><tt>gmake clean</tt>
+  <dd>
+  Removes all files generated by the build.  This includes object files,
+  generated C/C++ files, libraries, and executables.
+  <br><br>
+
+  <dt><tt>gmake dist-clean</tt>
+  <dd>
+  Removes everything that <tt>gmake clean</tt> does, but also removes files
+  generated by <tt>configure</tt>.  It attempts to return the source tree to the
+  original state in which it was shipped.
+  <br><br>
+
+  <dt><tt>gmake install</tt>
+  <dd>
+  Installs LLVM header files, libraries, tools, and documentation in a
+  hierarchy 
+  under $PREFIX, specified with <tt>./configure --prefix=[dir]</tt>, which 
+  defaults to <tt>/usr/local</tt>.
+  <br><br>
+
+  <dt><tt>gmake -C runtime install-bytecode</tt>
+  <dd>
+  Assuming you built LLVM into $OBJDIR, when this command is run, it will 
+  install bitcode libraries into the GCC front end's bitcode library 
+  directory.  If you need to update your bitcode libraries,
+  this is the target to use once you've built them.
+  <br><br>
+</dl>
+
+<p>Please see the <a href="MakefileGuide.html">Makefile Guide</a> for further
+details on these <tt>make</tt> targets and descriptions of other targets
+available.</p>
+
+<p>It is also possible to override default values from <tt>configure</tt> by
+declaring variables on the command line.  The following are some examples:</p>
+
+<dl>
+  <dt><tt>gmake ENABLE_OPTIMIZED=1</tt>
+  <dd>
+  Perform a Release (Optimized) build.
+  <br><br>
+
+  <dt><tt>gmake ENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1</tt>
+  <dd>
+  Perform a Release (Optimized) build without assertions enabled.
+  <br><br>
+ 
+  <dt><tt>gmake ENABLE_OPTIMIZED=0</tt>
+  <dd>
+  Perform a Debug build.
+  <br><br>
+
+  <dt><tt>gmake ENABLE_PROFILING=1</tt>
+  <dd>
+  Perform a Profiling build.
+  <br><br>
+
+  <dt><tt>gmake VERBOSE=1</tt>
+  <dd>
+  Print what <tt>gmake</tt> is doing on standard output.
+  <br><br>
+
+  <dt><tt>gmake TOOL_VERBOSE=1</tt></dt>
+  <dd>Ask each tool invoked by the makefiles to print out what it is doing on 
+  the standard output. This also implies <tt>VERBOSE=1</tt>.
+  <br><br></dd>
+</dl>
+
+<p>Every directory in the LLVM object tree includes a <tt>Makefile</tt> to build
+it and any subdirectories that it contains.  Entering any directory inside the
+LLVM object tree and typing <tt>gmake</tt> should rebuild anything in or below
+that directory that is out of date.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="cross-compile">Cross-Compiling LLVM</a>
+</div>
+
+<div class="doc_text">
+  <p>It is possible to cross-compile LLVM itself. That is, you can create LLVM
+  executables and libraries to be hosted on a platform different from the
+  platform where they are build (a Canadian Cross build). To configure a
+  cross-compile, supply the configure script with <tt>--build</tt> and
+  <tt>--host</tt> options that are different. The values of these options must
+  be legal target triples that your GCC compiler supports.</p>
+
+  <p>The result of such a build is executables that are not runnable on
+  on the build host (--build option) but can be executed on the compile host
+  (--host option).</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="objfiles">The Location of LLVM Object Files</a>
+</div>
+
+<div class="doc_text">
+
+<p>The LLVM build system is capable of sharing a single LLVM source tree among
+several LLVM builds.  Hence, it is possible to build LLVM for several different
+platforms or configurations using the same source tree.</p>
+
+<p>This is accomplished in the typical autoconf manner:</p>
+
+<ul>
+  <li><p>Change directory to where the LLVM object files should live:</p>
+
+      <div class="doc_code"><pre>% cd <i>OBJ_ROOT</i></pre></div></li>
+
+  <li><p>Run the <tt>configure</tt> script found in the LLVM source
+      directory:</p>
+
+      <div class="doc_code"><pre>% <i>SRC_ROOT</i>/configure</pre></div></li>
+</ul>
+
+<p>The LLVM build will place files underneath <i>OBJ_ROOT</i> in directories
+named after the build type:</p>
+
+<dl>
+  <dt>Debug Builds with assertions enabled (the default)
+  <dd>
+  <dl>
+    <dt>Tools
+    <dd><tt><i>OBJ_ROOT</i>/Debug+Asserts/bin</tt>
+    <dt>Libraries
+    <dd><tt><i>OBJ_ROOT</i>/Debug+Asserts/lib</tt>
+  </dl>
+  <br><br>
+
+  <dt>Release Builds
+  <dd>
+  <dl>
+    <dt>Tools
+    <dd><tt><i>OBJ_ROOT</i>/Release/bin</tt>
+    <dt>Libraries
+    <dd><tt><i>OBJ_ROOT</i>/Release/lib</tt>
+  </dl>
+  <br><br>
+
+  <dt>Profile Builds
+  <dd>
+  <dl>
+    <dt>Tools
+    <dd><tt><i>OBJ_ROOT</i>/Profile/bin</tt>
+    <dt>Libraries
+    <dd><tt><i>OBJ_ROOT</i>/Profile/lib</tt>
+  </dl>
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="optionalconfig">Optional Configuration Items</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+If you're running on a Linux system that supports the "<a
+href="http://www.tat.physik.uni-tuebingen.de/~rguenth/linux/binfmt_misc.html">binfmt_misc</a>"
+module, and you have root access on the system, you can set your system up to
+execute LLVM bitcode files directly. To do this, use commands like this (the
+first command may not be required if you are already using the module):</p>
+
+<div class="doc_code">
+<pre>
+$ mount -t binfmt_misc none /proc/sys/fs/binfmt_misc
+$ echo ':llvm:M::BC::/path/to/lli:' &gt; /proc/sys/fs/binfmt_misc/register
+$ chmod u+x hello.bc   (if needed)
+$ ./hello.bc
+</pre>
+</div>
+
+<p>
+This allows you to execute LLVM bitcode files directly.  On Debian, you 
+can also use this command instead of the 'echo' command above:</p>
+</p>
+
+<div class="doc_code">
+<pre>
+$ sudo update-binfmts --install llvm /path/to/lli --magic 'BC'
+</pre>
+</div>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="layout"><b>Program Layout</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>One useful source of information about the LLVM source base is the LLVM <a
+href="http://www.doxygen.org">doxygen</a> documentation available at <tt><a
+href="http://llvm.org/doxygen/">http://llvm.org/doxygen/</a></tt>.
+The following is a brief introduction to code layout:</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="examples"><tt>llvm/examples</tt></a></div>
+<div class="doc_text">
+  <p>This directory contains some simple examples of how to use the LLVM IR and
+  JIT.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="include"><tt>llvm/include</tt></a></div>
+<div class="doc_text">
+
+<p>This directory contains public header files exported from the LLVM
+library. The three main subdirectories of this directory are:</p>
+
+<dl>
+  <dt><tt><b>llvm/include/llvm</b></tt></dt>
+  <dd>This directory contains all of the LLVM specific header files.  This 
+  directory also has subdirectories for different portions of LLVM: 
+  <tt>Analysis</tt>, <tt>CodeGen</tt>, <tt>Target</tt>, <tt>Transforms</tt>, 
+  etc...</dd>
+
+  <dt><tt><b>llvm/include/llvm/Support</b></tt></dt>
+  <dd>This directory contains generic support libraries that are provided with 
+  LLVM but not necessarily specific to LLVM. For example, some C++ STL utilities 
+  and a Command Line option processing library store their header files here.
+  </dd>
+
+  <dt><tt><b>llvm/include/llvm/Config</b></tt></dt>
+  <dd>This directory contains header files configured by the <tt>configure</tt> 
+  script.  They wrap "standard" UNIX and C header files.  Source code can 
+  include these header files which automatically take care of the conditional 
+  #includes that the <tt>configure</tt> script generates.</dd>
+</dl>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="lib"><tt>llvm/lib</tt></a></div>
+<div class="doc_text">
+
+<p>This directory contains most of the source files of the LLVM system. In LLVM,
+almost all code exists in libraries, making it very easy to share code among the
+different <a href="#tools">tools</a>.</p>
+
+<dl>
+  <dt><tt><b>llvm/lib/VMCore/</b></tt></dt>
+  <dd> This directory holds the core LLVM source files that implement core 
+  classes like Instruction and BasicBlock.</dd>
+
+  <dt><tt><b>llvm/lib/AsmParser/</b></tt></dt>
+  <dd>This directory holds the source code for the LLVM assembly language parser 
+  library.</dd>
+
+  <dt><tt><b>llvm/lib/BitCode/</b></tt></dt>
+  <dd>This directory holds code for reading and write LLVM bitcode.</dd>
+
+  <dt><tt><b>llvm/lib/Analysis/</b></tt><dd>This directory contains a variety of
+  different program analyses, such as Dominator Information, Call Graphs,
+  Induction Variables, Interval Identification, Natural Loop Identification,
+  etc.</dd>
+
+  <dt><tt><b>llvm/lib/Transforms/</b></tt></dt>
+  <dd> This directory contains the source code for the LLVM to LLVM program 
+  transformations, such as Aggressive Dead Code Elimination, Sparse Conditional 
+  Constant Propagation, Inlining, Loop Invariant Code Motion, Dead Global 
+  Elimination, and many others.</dd>
+
+  <dt><tt><b>llvm/lib/Target/</b></tt></dt>
+  <dd> This directory contains files that describe various target architectures
+  for code generation.  For example, the <tt>llvm/lib/Target/X86</tt> 
+  directory holds the X86 machine description while
+  <tt>llvm/lib/Target/CBackend</tt> implements the LLVM-to-C converter.</dd>
+    
+  <dt><tt><b>llvm/lib/CodeGen/</b></tt></dt>
+  <dd> This directory contains the major parts of the code generator: Instruction 
+  Selector, Instruction Scheduling, and Register Allocation.</dd>
+
+  <dt><tt><b>llvm/lib/Debugger/</b></tt></dt>
+  <dd> This directory contains the source level debugger library that makes 
+  it possible to instrument LLVM programs so that a debugger could identify 
+  source code locations at which the program is executing.</dd>
+
+  <dt><tt><b>llvm/lib/ExecutionEngine/</b></tt></dt>
+  <dd> This directory contains libraries for executing LLVM bitcode directly 
+  at runtime in both interpreted and JIT compiled fashions.</dd>
+
+  <dt><tt><b>llvm/lib/Support/</b></tt></dt>
+  <dd> This directory contains the source code that corresponds to the header 
+  files located in <tt>llvm/include/Support/</tt>.</dd>
+
+  <dt><tt><b>llvm/lib/System/</b></tt></dt>
+  <dd>This directory contains the operating system abstraction layer that
+  shields LLVM from platform-specific coding.</dd>
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="projects"><tt>llvm/projects</tt></a></div>
+<div class="doc_text">
+  <p>This directory contains projects that are not strictly part of LLVM but are
+  shipped with LLVM. This is also the directory where you should create your own
+  LLVM-based projects. See <tt>llvm/projects/sample</tt> for an example of how
+  to set up your own project.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="runtime"><tt>llvm/runtime</tt></a></div>
+<div class="doc_text">
+
+<p>This directory contains libraries which are compiled into LLVM bitcode and
+used when linking programs with the GCC front end.  Most of these libraries are
+skeleton versions of real libraries; for example, libc is a stripped down
+version of glibc.</p>
+
+<p>Unlike the rest of the LLVM suite, this directory needs the LLVM GCC front
+end to compile.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="test"><tt>llvm/test</tt></a></div>
+<div class="doc_text">
+  <p>This directory contains feature and regression tests and other basic sanity
+  checks on the LLVM infrastructure. These are intended to run quickly and cover
+  a lot of territory without being exhaustive.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="test-suite"><tt>test-suite</tt></a></div>
+<div class="doc_text">
+  <p>This is not a directory in the normal llvm module; it is a separate
+  Subversion
+  module that must be checked out (usually to <tt>projects/test-suite</tt>). 
+  This
+  module contains a comprehensive correctness, performance, and benchmarking
+  test
+  suite for LLVM. It is a separate Subversion module because not every LLVM 
+  user is
+  interested in downloading or building such a comprehensive test suite. For
+  further details on this test suite, please see the 
+  <a href="TestingGuide.html">Testing Guide</a> document.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="tools"><tt>llvm/tools</tt></a></div>
+<div class="doc_text">
+
+<p>The <b>tools</b> directory contains the executables built out of the
+libraries above, which form the main part of the user interface.  You can
+always get help for a tool by typing <tt>tool_name -help</tt>.  The
+following is a brief introduction to the most important tools.  More detailed
+information is in the <a href="CommandGuide/index.html">Command Guide</a>.</p>
+
+<dl>
+
+  <dt><tt><b>bugpoint</b></tt></dt>
+  <dd><tt>bugpoint</tt> is used to debug
+  optimization passes or code generation backends by narrowing down the
+  given test case to the minimum number of passes and/or instructions that
+  still cause a problem, whether it is a crash or miscompilation. See <a
+  href="HowToSubmitABug.html">HowToSubmitABug.html</a> for more information
+  on using <tt>bugpoint</tt>.</dd>
+
+  <dt><tt><b>llvmc</b></tt></dt>
+  <dd>The LLVM Compiler Driver. This program can
+  be configured to utilize both LLVM and non-LLVM compilation tools to enable
+  pre-processing, translation, optimization, assembly, and linking of programs
+  all from one command line. <tt>llvmc</tt> also takes care of processing the
+  dependent libraries found in bitcode. This reduces the need to get the
+  traditional <tt>-l&lt;name&gt;</tt> options right on the command line. Please
+  note that this tool, while functional, is still experimental and not feature
+  complete.</dd>
+
+  <dt><tt><b>llvm-ar</b></tt></dt>
+  <dd>The archiver produces an archive containing
+  the given LLVM bitcode files, optionally with an index for faster
+  lookup.</dd>
+  
+  <dt><tt><b>llvm-as</b></tt></dt>
+  <dd>The assembler transforms the human readable LLVM assembly to LLVM 
+  bitcode.</dd>
+
+  <dt><tt><b>llvm-dis</b></tt></dt>
+  <dd>The disassembler transforms the LLVM bitcode to human readable 
+  LLVM assembly.</dd>
+
+  <dt><tt><b>llvm-ld</b></tt></dt>
+  <dd><tt>llvm-ld</tt> is a general purpose and extensible linker for LLVM. 
+  This is the linker invoked by <tt>llvmc</tt>. It performs standard link time
+  optimizations and allows optimization modules to be loaded and run so that 
+  language specific optimizations can be applied at link time.</dd>
+
+  <dt><tt><b>llvm-link</b></tt></dt>
+  <dd><tt>llvm-link</tt>, not surprisingly, links multiple LLVM modules into 
+  a single program.</dd>
+  
+  <dt><tt><b>lli</b></tt></dt>
+  <dd><tt>lli</tt> is the LLVM interpreter, which
+  can directly execute LLVM bitcode (although very slowly...). For architectures
+  that support it (currently x86, Sparc, and PowerPC), by default, <tt>lli</tt>
+  will function as a Just-In-Time compiler (if the functionality was compiled
+  in), and will execute the code <i>much</i> faster than the interpreter.</dd>
+
+  <dt><tt><b>llc</b></tt></dt>
+  <dd> <tt>llc</tt> is the LLVM backend compiler, which
+  translates LLVM bitcode to a native code assembly file or to C code (with
+  the -march=c option).</dd>
+
+  <dt><tt><b>llvm-gcc</b></tt></dt>
+  <dd><tt>llvm-gcc</tt> is a GCC-based C frontend that has been retargeted to 
+  use LLVM as its backend instead of GCC's RTL backend. It can also emit LLVM 
+  bitcode or assembly (with the <tt>-emit-llvm</tt> option) instead of the
+  usual machine code output.  It works just like any other GCC compiler, 
+  taking the typical <tt>-c, -S, -E, -o</tt> options that are typically used.  
+  Additionally, the the source code for <tt>llvm-gcc</tt> is available as a 
+  separate Subversion module.</dd>
+
+  <dt><tt><b>opt</b></tt></dt>
+  <dd><tt>opt</tt> reads LLVM bitcode, applies a series of LLVM to LLVM 
+  transformations (which are specified on the command line), and then outputs 
+  the resultant bitcode.  The '<tt>opt -help</tt>' command is a good way to 
+  get a list of the program transformations available in LLVM.<br>
+  <dd><tt>opt</tt> can also be used to run a specific analysis on an input 
+  LLVM bitcode file and print out the results.  It is primarily useful for 
+  debugging analyses, or familiarizing yourself with what an analysis does.</dd>
+</dl>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="utils"><tt>llvm/utils</tt></a></div>
+<div class="doc_text">
+
+<p>This directory contains utilities for working with LLVM source code, and some
+of the utilities are actually required as part of the build process because they
+are code generators for parts of LLVM infrastructure.</p>
+
+<dl>
+  <dt><tt><b>codegen-diff</b></tt> <dd><tt>codegen-diff</tt> is a script
+  that finds differences between code that LLC generates and code that LLI
+  generates. This is a useful tool if you are debugging one of them,
+  assuming that the other generates correct output. For the full user
+  manual, run <tt>`perldoc codegen-diff'</tt>.<br><br>
+
+  <dt><tt><b>emacs/</b></tt> <dd>The <tt>emacs</tt> directory contains
+  syntax-highlighting files which will work with Emacs and XEmacs editors,
+  providing syntax highlighting support for LLVM assembly files and TableGen
+  description files. For information on how to use the syntax files, consult
+  the <tt>README</tt> file in that directory.<br><br>
+
+  <dt><tt><b>getsrcs.sh</b></tt> <dd>The <tt>getsrcs.sh</tt> script finds
+  and outputs all non-generated source files, which is useful if one wishes
+  to do a lot of development across directories and does not want to
+  individually find each file. One way to use it is to run, for example:
+  <tt>xemacs `utils/getsources.sh`</tt> from the top of your LLVM source
+  tree.<br><br>
+
+  <dt><tt><b>llvmgrep</b></tt></dt>
+  <dd>This little tool performs an "egrep -H -n" on each source file in LLVM and
+  passes to it a regular expression provided on <tt>llvmgrep</tt>'s command
+  line. This is a very efficient way of searching the source base for a
+  particular regular expression.</dd>
+
+  <dt><tt><b>makellvm</b></tt> <dd>The <tt>makellvm</tt> script compiles all
+  files in the current directory and then compiles and links the tool that
+  is the first argument. For example, assuming you are in the directory
+  <tt>llvm/lib/Target/Sparc</tt>, if <tt>makellvm</tt> is in your path,
+  simply running <tt>makellvm llc</tt> will make a build of the current
+  directory, switch to directory <tt>llvm/tools/llc</tt> and build it,
+  causing a re-linking of LLC.<br><br>
+
+  <dt><tt><b>NewNightlyTest.pl</b></tt> and
+  <tt><b>NightlyTestTemplate.html</b></tt> <dd>These files are used in a
+  cron script to generate nightly status reports of the functionality of
+  tools, and the results can be seen by following the appropriate link on
+  the <a href="http://llvm.org/">LLVM homepage</a>.<br><br>
+
+  <dt><tt><b>TableGen/</b></tt> <dd>The <tt>TableGen</tt> directory contains
+  the tool used to generate register descriptions, instruction set
+  descriptions, and even assemblers from common TableGen description
+  files.<br><br>
+
+  <dt><tt><b>vim/</b></tt> <dd>The <tt>vim</tt> directory contains
+  syntax-highlighting files which will work with the VIM editor, providing
+  syntax highlighting support for LLVM assembly files and TableGen
+  description files. For information on how to use the syntax files, consult
+  the <tt>README</tt> file in that directory.<br><br>
+
+</dl>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="tutorial">An Example Using the LLVM Tool Chain</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>This section gives an example of using LLVM.  llvm-gcc3 is now obsolete,
+so we only include instructions for llvm-gcc4.
+</p>
+
+<p><b>Note:</b> The <i>gcc4</i> frontend's invocation is <b><i>considerably different</i></b>
+from the previous <i>gcc3</i> frontend. In particular, the <i>gcc4</i> frontend <b><i>does not</i></b>
+create bitcode by default: <i>gcc4</i> produces native code. As the example below illustrates,
+the '--emit-llvm' flag is needed to produce LLVM bitcode output. For <i>makefiles</i> and
+<i>configure</i> scripts, the CFLAGS variable needs '--emit-llvm' to produce bitcode
+output.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="tutorial4">Example with llvm-gcc4</a></div>
+
+<div class="doc_text">
+
+<ol>
+  <li><p>First, create a simple C file, name it 'hello.c':</p>
+
+<div class="doc_code">
+<pre>
+#include &lt;stdio.h&gt;
+
+int main() {
+  printf("hello world\n");
+  return 0;
+}
+</pre></div></li>
+
+  <li><p>Next, compile the C file into a native executable:</p>
+
+      <div class="doc_code"><pre>% llvm-gcc hello.c -o hello</pre></div>
+
+      <p>Note that llvm-gcc works just like GCC by default.  The standard -S and
+        -c arguments work as usual (producing a native .s or .o file,
+        respectively).</p></li>
+
+  <li><p>Next, compile the C file into a LLVM bitcode file:</p>
+
+      <div class="doc_code">
+      <pre>% llvm-gcc -O3 -emit-llvm hello.c -c -o hello.bc</pre></div>
+
+      <p>The -emit-llvm option can be used with the -S or -c options to emit an
+         LLVM ".ll" or ".bc" file (respectively) for the code.  This allows you
+         to use the <a href="CommandGuide/index.html">standard LLVM tools</a> on
+         the bitcode file.</p>
+
+      <p>Unlike llvm-gcc3, llvm-gcc4 correctly responds to -O[0123] arguments.
+         </p></li>
+
+  <li><p>Run the program in both forms. To run the program, use:</p>
+      
+      <div class="doc_code"><pre>% ./hello</pre></div>
+ 
+      <p>and</p>
+
+      <div class="doc_code"><pre>% lli hello.bc</pre></div>
+
+      <p>The second examples shows how to invoke the LLVM JIT, <a
+       href="CommandGuide/html/lli.html">lli</a>.</p></li>
+
+  <li><p>Use the <tt>llvm-dis</tt> utility to take a look at the LLVM assembly
+      code:</p>
+
+<div class="doc_code">
+<pre>llvm-dis &lt; hello.bc | less</pre>
+</div></li>
+
+  <li><p>Compile the program to native assembly using the LLC code
+      generator:</p>
+
+      <div class="doc_code"><pre>% llc hello.bc -o hello.s</pre></div></li>
+
+  <li><p>Assemble the native assembly language file into a program:</p>
+
+<div class="doc_code">
+<pre>
+<b>Solaris:</b> % /opt/SUNWspro/bin/cc -xarch=v9 hello.s -o hello.native
+
+<b>Others:</b>  % gcc hello.s -o hello.native
+</pre>
+</div></li>
+
+  <li><p>Execute the native code program:</p>
+
+      <div class="doc_code"><pre>% ./hello.native</pre></div>
+
+      <p>Note that using llvm-gcc to compile directly to native code (i.e. when
+         the -emit-llvm option is not present) does steps 6/7/8 for you.</p>
+        </li>
+
+</ol>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="problems">Common Problems</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>If you are having problems building or using LLVM, or if you have any other
+general questions about LLVM, please consult the <a href="FAQ.html">Frequently
+Asked Questions</a> page.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="links">Links</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document is just an <b>introduction</b> on how to use LLVM to do
+some simple things... there are many more interesting and complicated things
+that you can do that aren't documented here (but we'll gladly accept a patch
+if you want to write something up!).  For more information about LLVM, check
+out:</p>
+
+<ul>
+  <li><a href="http://llvm.org/">LLVM homepage</a></li>
+  <li><a href="http://llvm.org/doxygen/">LLVM doxygen tree</a></li>
+  <li><a href="http://llvm.org/docs/Projects.html">Starting a Project
+  that Uses LLVM</a></li>
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.x10sys.com/rspencer/">Reid Spencer</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/GettingStartedVS.html b/final/docs/GettingStartedVS.html
new file mode 100644
index 00000000000..74759047978
--- /dev/null
+++ b/final/docs/GettingStartedVS.html
@@ -0,0 +1,366 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>Getting Started with LLVM System for Microsoft Visual Studio</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+  Getting Started with the LLVM System using Microsoft Visual Studio
+</div>
+
+<ul>
+  <li><a href="#overview">Overview</a>
+  <li><a href="#requirements">Requirements</a>
+    <ol>
+      <li><a href="#hardware">Hardware</a>
+      <li><a href="#software">Software</a>
+    </ol></li>
+  <li><a href="#quickstart">Getting Started</a>
+  <li><a href="#tutorial">An Example Using the LLVM Tool Chain</a>
+  <li><a href="#problems">Common Problems</a>
+  <li><a href="#links">Links</a>
+</ul>
+
+<div class="doc_author">
+  <p>Written by: <a href="http://llvm.org">The LLVM Team</a></p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="overview"><b>Overview</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+  <p>Welcome to LLVM on Windows! This document only covers LLVM on Windows using
+  Visual Studio, not mingw or cygwin. In order to get started, you first need to
+  know some basic information.</p>
+
+  <p>There are many different projects that compose LLVM. The first is the LLVM
+  suite. This contains all of the tools, libraries, and header files needed to
+  use the low level virtual machine. It contains an assembler, disassembler,
+  bitcode analyzer and bitcode optimizer. It also contains a test suite that can
+  be used to test the LLVM tools.</p>
+
+  <p>Another useful project on Windows is
+  <a href="http://clang.llvm.org/">clang</a>. Clang is a C family
+  ([Objective]C/C++) compiler. Clang mostly works on Windows, but does not
+  currently understand all of the Microsoft extensions to C and C++. Because of
+  this, clang cannot parse the C++ standard library included with Visual Studio,
+  nor parts of the Windows Platform SDK. However, most standard C programs do
+  compile. Clang can be used to emit bitcode, directly emit object files or
+  even linked executables using Visual Studio's <tt>link.exe</tt></p>
+
+  <p>The large LLVM test suite cannot be run on the Visual Studio port at this
+  time.</p>
+
+  <p>Most of the tools build and work.  <tt>bugpoint</tt> does build, but does
+  not work.</p>
+
+  <p>Additional information about the LLVM directory structure and tool chain
+  can be found on the main <a href="GettingStarted.html">Getting Started</a>
+  page.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="requirements"><b>Requirements</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+  <p>Before you begin to use the LLVM system, review the requirements given
+  below.  This may save you some trouble by knowing ahead of time what hardware
+  and software you will need.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="hardware"><b>Hardware</b></a>
+</div>
+
+<div class="doc_text">
+
+  <p>Any system that can adequately run Visual Studio .NET 2005 SP1 is fine.
+  The LLVM source tree and object files, libraries and executables will consume
+  approximately 3GB.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="software"><b>Software</b></a></div>
+<div class="doc_text">
+
+  <p>You will need Visual Studio .NET 2005 SP1 or higher.  The VS2005 SP1
+  beta and the normal VS2005 still have bugs that are not completely
+  compatible.  Earlier versions of Visual Studio do not support the C++ standard
+  well enough and will not work.</p>
+
+  <p>You will also need the <a href="http://www.cmake.org/">CMake</a> build
+  system since it generates the project files you will use to build with.</p>
+
+  <p>If you would like to run the LLVM tests you will need
+  <a href="http://www.python.org/">Python</a>. Versions 2.4-2.7 are known to
+  work. You will need <a href="http://gnuwin32.sourceforge.net/">"GnuWin32"</a>
+  tools, too.</p>
+
+  <p>Do not install the LLVM directory tree into a path containing spaces (e.g.
+  C:\Documents and Settings\...) as the configure step will fail.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="quickstart"><b>Getting Started</b></a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Here's the short story for getting up and running quickly with LLVM:</p>
+
+<ol>
+  <li>Read the documentation.</li>
+  <li>Seriously, read the documentation.</li>
+  <li>Remember that you were warned twice about reading the documentation.</li>
+
+  <li>Get the Source Code
+  <ul>
+    <li>With the distributed files:
+    <ol>
+      <li><tt>cd <i>where-you-want-llvm-to-live</i></tt>
+      <li><tt>gunzip --stdout llvm-<i>version</i>.tar.gz | tar -xvf -</tt>
+      <i>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;or use WinZip</i>
+      <li><tt>cd llvm</tt></li>
+    </ol></li>
+
+    <li>With anonymous Subversion access:
+    <ol>
+      <li><tt>cd <i>where-you-want-llvm-to-live</i></tt></li>
+      <li><tt>svn co http://llvm.org/svn/llvm-project/llvm/trunk llvm</tt></li>
+      <li><tt>cd llvm</tt></li>
+    </ol></li>
+  </ul></li>
+
+  <li> Use <a href="http://www.cmake.org/">CMake</a> to generate up-to-date
+    project files:
+    <ul>
+      <li>Once CMake is installed then the simplest way is to just start the
+        CMake GUI, select the directory where you have LLVM extracted to, and the
+        default options should all be fine.  One option you may really want to
+        change, regardless of anything else, might be the CMAKE_INSTALL_PREFIX
+        setting to select a directory to INSTALL to once compiling is complete,
+        although installation is not mandatory for using LLVM.  Another important
+        option is LLVM_TARGETS_TO_BUILD, which controls the LLVM target
+        architectures that are included on the build.
+      <li>See the <a href="CMake.html">LLVM CMake guide</a> for
+        detailed information about how to configure the LLVM
+        build.</li>
+    </ul>
+  </li>
+
+  <li>Start Visual Studio
+  <ul>
+    <li>In the directory you created the project files will have
+    an <tt>llvm.sln</tt> file, just double-click on that to open
+    Visual Studio.</li>
+  </ul></li>
+
+  <li>Build the LLVM Suite:
+  <ul>
+    <li>The projects may still be built individually, but
+    to build them all do not just select all of them in batch build (as some
+    are meant as configuration projects), but rather select and build just
+    the ALL_BUILD project to build everything, or the INSTALL project, which
+    first builds the ALL_BUILD project, then installs the LLVM headers, libs,
+    and other useful things to the directory set by the CMAKE_INSTALL_PREFIX
+    setting when you first configured CMake.</li>
+    <li>The Fibonacci project is a sample program that uses the JIT.
+    Modify the project's debugging properties to provide a numeric
+    command line argument or run it from the command line.  The
+    program will print the corresponding fibonacci value.</li>
+  </ul></li>
+
+  <li>Test LLVM on Visual Studio:
+  <ul>
+    <li>If %PATH% does not contain GnuWin32, you may specify LLVM_LIT_TOOLS_DIR
+    on CMake for the path to GnuWin32.</li>
+    <li>You can run LLVM tests to build the project "check".</li>
+  </ul>
+  </li>
+
+  <!-- FIXME: Is it up-to-date? -->
+  <li>Test LLVM:
+  <ul>
+    <li>The LLVM tests can be run by <tt>cd</tt>ing to the llvm source directory
+        and running:
+
+<div class="doc_code">
+<pre>
+% llvm-lit test
+</pre>
+</div>
+
+    <p>Note that quite a few of these test will fail.</p>
+    </li>
+
+    <li>A specific test or test directory can be run with:</li>
+
+<div class="doc_code">
+<pre>
+% llvm-lit test/path/to/test
+</pre>
+</div>
+
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="tutorial">An Example Using the LLVM Tool Chain</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<ol>
+  <li><p>First, create a simple C file, name it 'hello.c':</p>
+
+<div class="doc_code">
+<pre>
+#include &lt;stdio.h&gt;
+int main() {
+  printf("hello world\n");
+  return 0;
+}
+</pre></div></li>
+
+  <li><p>Next, compile the C file into a LLVM bitcode file:</p>
+
+<div class="doc_code">
+<pre>
+% clang -c hello.c -emit-llvm -o hello.bc
+</pre>
+</div>
+
+      <p>This will create the result file <tt>hello.bc</tt> which is the LLVM
+         bitcode that corresponds the the compiled program and the library
+         facilities that it required.  You can execute this file directly using
+         <tt>lli</tt> tool, compile it to native assembly with the <tt>llc</tt>,
+         optimize or analyze it further with the <tt>opt</tt> tool, etc.</p>
+
+      <p>Alternatively you can directly output an executable with clang with:
+      </p>
+
+<div class="doc_code">
+<pre>
+% clang hello.c -o hello.exe
+</pre>
+</div>
+
+  <p>The <tt>-o hello.exe</tt> is required because clang currently outputs
+  <tt>a.out</tt> when neither <tt>-o</tt> nor <tt>-c</tt> are given.</p>
+
+  <li><p>Run the program using the just-in-time compiler:</p>
+
+<div class="doc_code">
+<pre>
+% lli hello.bc
+</pre>
+</div>
+
+  <li><p>Use the <tt>llvm-dis</tt> utility to take a look at the LLVM assembly
+      code:</p>
+
+<div class="doc_code">
+<pre>
+% llvm-dis &lt; hello.bc | more
+</pre>
+</div></li>
+
+  <li><p>Compile the program to object code using the LLC code generator:</p>
+
+<div class="doc_code">
+<pre>
+% llc -filetype=obj hello.bc
+</pre>
+</div></li>
+
+  <li><p>Link to binary using Microsoft link:</p>
+
+<div class="doc_code">
+<pre>
+% link hello.obj -defaultlib:libcmt
+</pre>
+</div>
+
+  <li><p>Execute the native code program:</p>
+
+<div class="doc_code">
+<pre>
+% hello.exe
+</pre>
+</div></li>
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="problems">Common Problems</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>If you are having problems building or using LLVM, or if you have any other
+general questions about LLVM, please consult the <a href="FAQ.html">Frequently
+Asked Questions</a> page.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="links">Links</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document is just an <b>introduction</b> to how to use LLVM to do
+some simple things... there are many more interesting and complicated things
+that you can do that aren't documented here (but we'll gladly accept a patch
+if you want to write something up!).  For more information about LLVM, check
+out:</p>
+
+<ul>
+  <li><a href="http://llvm.org/">LLVM homepage</a></li>
+  <li><a href="http://llvm.org/doxygen/">LLVM doxygen tree</a></li>
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/GoldPlugin.html b/final/docs/GoldPlugin.html
new file mode 100644
index 00000000000..68c5cf19280
--- /dev/null
+++ b/final/docs/GoldPlugin.html
@@ -0,0 +1,213 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>LLVM gold plugin</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+      
+<div class="doc_title">LLVM gold plugin</div>
+<ol>
+  <li><a href="#introduction">Introduction</a></li>
+  <li><a href="#build">How to build it</a></li>
+  <li><a href="#usage">Usage</a>
+  <ul>
+    <li><a href="#example1">Example of link time optimization</a></li>
+    <li><a href="#lto_autotools">Quickstart for using LTO with autotooled projects</a></li>
+  </ul></li>
+  <li><a href="#licensing">Licensing</a></li>
+</ol>
+<div class="doc_author">Written by Nick Lewycky</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="introduction">Introduction</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+  <p>Building with link time optimization requires cooperation from the
+system linker. LTO support on Linux systems requires that you use
+the <a href="http://sourceware.org/binutils">gold linker</a> which supports
+LTO via plugins. This is the same mechanism used by the
+<a href="http://gcc.gnu.org/wiki/LinkTimeOptimization">GCC LTO</a>
+project.</p>
+  <p>The LLVM gold plugin implements the
+<a href="http://gcc.gnu.org/wiki/whopr/driver">gold plugin interface</a>
+on top of
+<a href="http://llvm.org/docs/LinkTimeOptimization.html#lto">libLTO</a>.
+The same plugin can also be used by other tools such as <tt>ar</tt> and
+<tt>nm</tt>.
+</div>
+<!--=========================================================================-->
+<div class="doc_section"><a name="build">How to build it</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+  <p>You need to have gold with plugin support and build the LLVMgold
+plugin. Check whether you have gold running <tt>/usr/bin/ld -v</tt>. It will
+report &#8220;GNU gold&#8221; or else &#8220GNU ld&#8221; if not. If you have
+gold, check for plugin support by running <tt>/usr/bin/ld -plugin</tt>. If it
+complains &#8220missing argument&#8221 then you have plugin support. If not,
+such as an &#8220;unknown option&#8221; error then you will either need to
+build gold or install a version with plugin support.</p>
+<ul>
+  <li>To build gold with plugin support:
+    <pre class="doc_code">
+mkdir binutils
+cd binutils
+cvs -z 9 -d :pserver:anoncvs@sourceware.org:/cvs/src login
+<em>{enter "anoncvs" as the password}</em>
+cvs -z 9 -d :pserver:anoncvs@sourceware.org:/cvs/src co binutils
+mkdir build
+cd build
+../src/configure --enable-gold --enable-plugins
+make all-gold
+</pre>
+    That should leave you with <tt>binutils/build/gold/ld-new</tt> which supports the <tt>-plugin</tt> option. It also built would have
+<tt>binutils/build/binutils/ar</tt> and <tt>nm-new</tt> which support plugins
+but don't have a visible -plugin option, instead relying on the gold plugin
+being present in <tt>../lib/bfd-plugins</tt> relative to where the binaries are
+placed.
+    <li>Build the LLVMgold plugin: Configure LLVM with
+    <tt>--with-binutils-include=/path/to/binutils/src/include</tt> and run
+    <tt>make</tt>.
+</ul>
+</div>
+<!--=========================================================================-->
+<div class="doc_section"><a name="usage">Usage</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+  <p>The linker takes a <tt>-plugin</tt> option that points to the path of
+  the plugin <tt>.so</tt> file. To find out what link command <tt>gcc</tt>
+  would run in a given situation, run <tt>gcc -v <em>[...]</em></tt> and look
+  for the line where it runs <tt>collect2</tt>. Replace that with
+  <tt>ld-new -plugin /path/to/LLVMgold.so</tt> to test it out. Once you're
+  ready to switch to using gold, backup your existing <tt>/usr/bin/ld</tt>
+  then replace it with <tt>ld-new</tt>.</p>
+  <p>You can produce bitcode files from <tt>llvm-gcc</tt> using
+  <tt>-emit-llvm</tt> or <tt>-flto</tt>, or the <tt>-O4</tt> flag which is
+  synonymous with <tt>-O3 -flto</tt>.</p>
+  <p><tt>llvm-gcc</tt> has a <tt>-use-gold-plugin</tt> option which looks
+  for the gold plugin in the same directories as it looks for <tt>cc1</tt> and
+  passes the <tt>-plugin</tt> option to ld. It will not look for an alternate
+  linker, which is why you need gold to be the installed system linker in your
+  path.</p>
+  <p>If you want <tt>ar</tt> and <tt>nm</tt> to work seamlessly as well, install
+  <tt>LLVMgold.so</tt> to <tt>/usr/lib/bfd-plugins</tt>. If you built your
+  own gold, be sure to install the <tt>ar</tt> and <tt>nm-new</tt> you built to
+  <tt>/usr/bin</tt>.
+  <p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="example1">Example of link time optimization</a>
+</div>
+
+<div class="doc_text">
+  <p>The following example shows a worked example of the gold plugin mixing
+  LLVM bitcode and native code.
+<pre class="doc_code">
+--- a.c ---
+#include &lt;stdio.h&gt;
+
+extern void foo1(void);
+extern void foo4(void);
+
+void foo2(void) {
+  printf("Foo2\n");
+}
+
+void foo3(void) {
+  foo4();
+}
+
+int main(void) {
+  foo1();
+}
+
+--- b.c ---
+#include &lt;stdio.h&gt;
+
+extern void foo2(void);
+
+void foo1(void) {
+  foo2();
+}
+
+void foo4(void) {
+  printf("Foo4");
+}
+
+--- command lines ---
+$ llvm-gcc -flto a.c -c -o a.o              # &lt;-- a.o is LLVM bitcode file
+$ ar q a.a a.o                              # &lt;-- a.a is an archive with LLVM bitcode
+$ llvm-gcc b.c -c -o b.o                    # &lt;-- b.o is native object file
+$ llvm-gcc -use-gold-plugin a.a b.o -o main # &lt;-- link with LLVMgold plugin
+</pre>
+  <p>Gold informs the plugin that foo3 is never referenced outside the IR,
+  leading LLVM to delete that function. However, unlike in the
+  <a href="http://llvm.org/docs/LinkTimeOptimization.html#example1">libLTO
+  example</a> gold does not currently eliminate foo4.</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="lto_autotools">Quickstart for using LTO with autotooled projects</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+  <p>Once your system <tt>ld</tt>, <tt>ar</tt> and <tt>nm</tt> all support LLVM
+  bitcode, everything is in place for an easy to use LTO build of autotooled
+  projects:</p>
+  <ul>
+    <li>Follow the instructions <a href="#build">on how to build LLVMgold.so</a>.</li>
+    <li>Install the newly built binutils to <tt>$PREFIX</tt></li>
+    <li>Copy <tt>Release/lib/LLVMgold.so</tt> to
+    <tt>$PREFIX/libexec/gcc/x86_64-unknown-linux-gnu/4.2.1/</tt> and
+    <tt>$PREFIX/lib/bfd-plugins/</tt></li>
+    <li>Set environment variables (<tt>$PREFIX</tt> is where you installed llvm-gcc and
+    binutils):
+    <pre class="doc_code">
+export CC="$PREFIX/bin/llvm-gcc -use-gold-plugin"
+export CXX="$PREFIX/bin/llvm-g++ -use-gold-plugin"
+export AR="$PREFIX/bin/ar"
+export NM="$PREFIX/bin/nm"
+export RANLIB=/bin/true #ranlib is not needed, and doesn't support .bc files in .a
+export CFLAGS="-O4"
+</pre>
+     </li>
+     <li>Or you can just set your path:
+    <pre class="doc_code">
+export PATH="$PREFIX/bin:$PATH"
+export CC="llvm-gcc -use-gold-plugin"
+export CXX="llvm-g++ -use-gold-plugin"
+export RANLIB=/bin/true
+export CFLAGS="-O4"
+</pre>
+     </li>
+     <li>Configure &amp; build the project as usual: <tt>./configure &amp;&amp; make &amp;&amp; make check</tt> </li>
+   </ul>
+   <p> The environment variable settings may work for non-autotooled projects
+   too, but you may need to set the <tt>LD</tt> environment variable as well.</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="licensing">Licensing</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+  <p>Gold is licensed under the GPLv3. LLVMgold uses the interface file
+<tt>plugin-api.h</tt> from gold which means that the resulting LLVMgold.so
+binary is also GPLv3. This can still be used to link non-GPLv3 programs just
+as much as gold could without the plugin.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+  <a href="mailto:nicholas@metrix.on.ca">Nick Lewycky</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2010-04-16 23:58:21 -0800 (Fri, 16 Apr 2010) $
+</address>
+</body>
+</html>
diff --git a/final/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeas.txt b/final/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeas.txt
new file mode 100644
index 00000000000..f0861811920
--- /dev/null
+++ b/final/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeas.txt
@@ -0,0 +1,74 @@
+Date: Sat, 18 Nov 2000 09:19:35 -0600 (CST)
+From: Vikram Adve <vadve@cs.uiuc.edu>
+To: Chris Lattner <lattner@cs.uiuc.edu>
+Subject: a few thoughts
+
+I've been mulling over the virtual machine problem and I had some
+thoughts about some things for us to think about discuss:
+
+1. We need to be clear on our goals for the VM.  Do we want to emphasize
+   portability and safety like the Java VM?  Or shall we focus on the
+   architecture interface first (i.e., consider the code generation and
+   processor issues), since the architecture interface question is also
+   important for portable Java-type VMs?
+
+   This is important because the audiences for these two goals are very
+   different.  Architects and many compiler people care much more about
+   the second question.  The Java compiler and OS community care much more
+   about the first one.
+
+   Also, while the architecture interface question is important for
+   Java-type VMs, the design constraints are very different.
+
+
+2. Design issues to consider (an initial list that we should continue
+   to modify).  Note that I'm not trying to suggest actual solutions here,
+   but just various directions we can pursue:
+
+   a. A single-assignment VM, which we've both already been thinking about.
+
+   b. A strongly-typed VM.  One question is do we need the types to be
+      explicitly declared or should they be inferred by the dynamic compiler?
+
+   c. How do we get more high-level information into the VM while keeping
+      to a low-level VM design?
+
+        o  Explicit array references as operands?  An alternative is
+           to have just an array type, and let the index computations be
+           separate 3-operand instructions.
+
+        o  Explicit instructions to handle aliasing, e.g.s:
+           -- an instruction to say "I speculate that these two values are not
+              aliased, but check at runtime", like speculative execution in
+              EPIC?
+           -- or an instruction to check whether two values are aliased and
+              execute different code depending on the answer, somewhat like
+              predicated code in EPIC
+
+        o  (This one is a difficult but powerful idea.)
+           A "thread-id" field on every instruction that allows the static
+           compiler to generate a set of parallel threads, and then have
+           the runtime compiler and hardware do what they please with it.
+           This has very powerful uses, but thread-id on every instruction
+           is expensive in terms of instruction size and code size.
+           We would need to compactly encode it somehow.
+
+           Also, this will require some reading on at least two other
+           projects:
+                -- Multiscalar architecture from Wisconsin
+                -- Simultaneous multithreading architecture from Washington
+
+        o  Or forget all this and stick to a traditional instruction set?
+
+
+BTW, on an unrelated note, after the meeting yesterday, I did remember
+that you had suggested doing instruction scheduling on SSA form instead
+of a dependence DAG earlier in the semester.  When we talked about
+it yesterday, I didn't remember where the idea had come from but I
+remembered later.  Just giving credit where its due...
+
+Perhaps you can save the above as a file under RCS so you and I can
+continue to expand on this.
+
+--Vikram
+
diff --git a/final/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt b/final/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt
new file mode 100644
index 00000000000..1c725f5aa71
--- /dev/null
+++ b/final/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt
@@ -0,0 +1,199 @@
+Date: Sun, 19 Nov 2000 16:23:57 -0600 (CST)
+From: Chris Lattner <sabre@nondot.org>
+To: Vikram Adve <vadve@cs.uiuc.edu>
+Subject: Re: a few thoughts
+
+Okay... here are a few of my thoughts on this (it's good to know that we
+think so alike!):
+
+> 1. We need to be clear on our goals for the VM.  Do we want to emphasize
+>    portability and safety like the Java VM?  Or shall we focus on the
+>    architecture interface first (i.e., consider the code generation and
+>    processor issues), since the architecture interface question is also
+>    important for portable Java-type VMs?
+
+I forsee the architecture looking kinda like this: (which is completely
+subject to change)
+
+1. The VM code is NOT guaranteed safe in a java sense.  Doing so makes it
+   basically impossible to support C like languages.  Besides that,
+   certifying a register based language as safe at run time would be a
+   pretty expensive operation to have to do.  Additionally, we would like
+   to be able to statically eliminate many bounds checks in Java
+   programs... for example.
+
+ 2. Instead, we can do the following (eventually): 
+   * Java bytecode is used as our "safe" representation (to avoid
+     reinventing something that we don't add much value to).  When the
+     user chooses to execute Java bytecodes directly (ie, not
+     precompiled) the runtime compiler can do some very simple
+     transformations (JIT style) to convert it into valid input for our
+     VM.  Performance is not wonderful, but it works right.
+   * The file is scheduled to be compiled (rigorously) at a later
+     time.  This could be done by some background process or by a second
+     processor in the system during idle time or something...
+   * To keep things "safe" ie to enforce a sandbox on Java/foreign code,
+     we could sign the generated VM code with a host specific private
+     key.  Then before the code is executed/loaded, we can check to see if
+     the trusted compiler generated the code.  This would be much quicker
+     than having to validate consistency (especially if bounds checks have
+     been removed, for example)
+
+>    This is important because the audiences for these two goals are very
+>    different.  Architects and many compiler people care much more about
+>    the second question.  The Java compiler and OS community care much more
+>    about the first one.
+
+3. By focusing on a more low level virtual machine, we have much more room
+   for value add.  The nice safe "sandbox" VM can be provided as a layer
+   on top of it.  It also lets us focus on the more interesting compilers
+   related projects.
+
+> 2. Design issues to consider (an initial list that we should continue
+>    to modify).  Note that I'm not trying to suggest actual solutions here,
+>    but just various directions we can pursue:
+
+Understood.  :)
+
+>    a. A single-assignment VM, which we've both already been thinking
+>       about.
+
+Yup, I think that this makes a lot of sense.  I am still intrigued,
+however, by the prospect of a minimally allocated VM representation... I
+think that it could have definate advantages for certain applications
+(think very small machines, like PDAs).  I don't, however, think that our
+initial implementations should focus on this.  :)
+
+Here are some other auxilliary goals that I think we should consider:
+
+1. Primary goal: Support a high performance dynamic compilation
+   system.  This means that we have an "ideal" division of labor between
+   the runtime and static compilers.  Of course, the other goals of the
+   system somewhat reduce the importance of this point (f.e. portability
+   reduces performance, but hopefully not much)
+2. Portability to different processors.  Since we are most familiar with
+   x86 and solaris, I think that these two are excellent candidates when
+   we get that far...
+3. Support for all languages & styles of programming (general purpose
+   VM).  This is the point that disallows java style bytecodes, where all
+   array refs are checked for bounds, etc...
+4. Support linking between different language families.  For example, call
+   C functions directly from Java without using the nasty/slow/gross JNI
+   layer.  This involves several subpoints:
+  A. Support for languages that require garbage collectors and integration
+     with languages that don't.  As a base point, we could insist on
+     always using a conservative GC, but implement free as a noop, f.e.
+
+>    b. A strongly-typed VM.  One question is do we need the types to be
+>       explicitly declared or should they be inferred by the dynamic
+>       compiler?
+
+  B. This is kind of similar to another idea that I have: make OOP
+     constructs (virtual function tables, class heirarchies, etc) explicit
+     in the VM representation.  I believe that the number of additional
+     constructs would be fairly low, but would give us lots of important
+     information... something else that would/could be important is to
+     have exceptions as first class types so that they would be handled in
+     a uniform way for the entire VM... so that C functions can call Java
+     functions for example...
+
+>    c. How do we get more high-level information into the VM while keeping
+>       to a low-level VM design?
+>       o  Explicit array references as operands?  An alternative is
+>          to have just an array type, and let the index computations be
+>          separate 3-operand instructions.
+
+   C. In the model I was thinking of (subject to change of course), we
+      would just have an array type (distinct from the pointer
+      types).  This would allow us to have arbitrarily complex index
+      expressions, while still distinguishing "load" from "Array load",
+      for example.  Perhaps also, switch jump tables would be first class
+      types as well?  This would allow better reasoning about the program.
+
+5. Support dynamic loading of code from various sources.  Already
+   mentioned above was the example of loading java bytecodes, but we want
+   to support dynamic loading of VM code as well.  This makes the job of
+   the runtime compiler much more interesting:  it can do interprocedural
+   optimizations that the static compiler can't do, because it doesn't
+   have all of the required information (for example, inlining from
+   shared libraries, etc...)
+
+6. Define a set of generally useful annotations to add to the VM
+   representation.  For example, a function can be analysed to see if it
+   has any sideeffects when run... also, the MOD/REF sets could be
+   calculated, etc... we would have to determine what is reasonable.  This
+   would generally be used to make IP optimizations cheaper for the
+   runtime compiler...
+
+>       o  Explicit instructions to handle aliasing, e.g.s:
+>            -- an instruction to say "I speculate that these two values are not
+>               aliased, but check at runtime", like speculative execution in
+>             EPIC?
+>          -- or an instruction to check whether two values are aliased and
+>             execute different code depending on the answer, somewhat like
+>             predicated code in EPIC
+
+These are also very good points... if this can be determined at compile
+time.  I think that an epic style of representation (not the instruction
+packing, just the information presented) could be a very interesting model
+to use... more later...
+
+>         o  (This one is a difficult but powerful idea.)
+>          A "thread-id" field on every instruction that allows the static
+>          compiler to generate a set of parallel threads, and then have
+>          the runtime compiler and hardware do what they please with it.
+>          This has very powerful uses, but thread-id on every instruction
+>          is expensive in terms of instruction size and code size.
+>          We would need to compactly encode it somehow.
+
+Yes yes yes!  :)  I think it would be *VERY* useful to include this kind
+of information (which EPIC architectures *implicitly* encode.  The trend
+that we are seeing supports this greatly:
+
+1. Commodity processors are getting massive SIMD support:
+   * Intel/Amd MMX/MMX2
+   * AMD's 3Dnow!
+   * Intel's SSE/SSE2
+   * Sun's VIS
+2. SMP is becoming much more common, especially in the server space.
+3. Multiple processors on a die are right around the corner.
+
+If nothing else, not designing this in would severely limit our future
+expansion of the project...
+
+>          Also, this will require some reading on at least two other
+>          projects:
+>               -- Multiscalar architecture from Wisconsin
+>               -- Simultaneous multithreading architecture from Washington
+>
+>       o  Or forget all this and stick to a traditional instruction set?
+
+Heh... :)  Well, from a pure research point of view, it is almost more
+attactive to go with the most extreme/different ISA possible.  On one axis
+you get safety and conservatism, and on the other you get degree of
+influence that the results have.  Of course the problem with pure research
+is that often times there is no concrete product of the research... :)
+
+> BTW, on an unrelated note, after the meeting yesterday, I did remember
+> that you had suggested doing instruction scheduling on SSA form instead
+> of a dependence DAG earlier in the semester.  When we talked about
+> it yesterday, I didn't remember where the idea had come from but I
+> remembered later.  Just giving credit where its due...
+
+:) Thanks.  
+
+> Perhaps you can save the above as a file under RCS so you and I can
+> continue to expand on this.
+
+I think it makes sense to do so when we get our ideas more formalized and
+bounce it back and forth a couple of times... then I'll do a more formal
+writeup of our goals and ideas.  Obviously our first implementation will
+not want to do all of the stuff that I pointed out above... be we will
+want to design the project so that we do not artificially limit ourselves
+at sometime in the future...
+
+Anyways, let me know what you think about these ideas... and if they sound
+reasonable...
+
+-Chris
+
diff --git a/final/docs/HistoricalNotes/2000-12-06-EncodingIdea.txt b/final/docs/HistoricalNotes/2000-12-06-EncodingIdea.txt
new file mode 100644
index 00000000000..8c452924dd1
--- /dev/null
+++ b/final/docs/HistoricalNotes/2000-12-06-EncodingIdea.txt
@@ -0,0 +1,30 @@
+From: Chris Lattner [mailto:sabre@nondot.org]
+Sent: Wednesday, December 06, 2000 6:41 PM
+To: Vikram S. Adve
+Subject: Additional idea with respect to encoding
+
+Here's another idea with respect to keeping the common case instruction
+size down (less than 32 bits ideally):
+
+Instead of encoding an instruction to operate on two register numbers,
+have it operate on two negative offsets based on the current register
+number.  Therefore, instead of using:
+
+r57 = add r55, r56  (r57 is the implicit dest register, of course)
+
+We could use:
+
+r57 = add -2, -1
+
+My guess is that most SSA references are to recent values (especially if
+they correspond to expressions like (x+y*z+p*q/ ...), so the negative
+numbers would tend to stay small, even at the end of the procedure (where
+the implicit register destination number could be quite large).  Of course
+the negative sign is reduntant, so you would be storing small integers
+almost all of the time, and 5-6 bits worth of register number would be
+plenty for most cases...
+
+What do you think?
+
+-Chris
+
diff --git a/final/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt b/final/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt
new file mode 100644
index 00000000000..b66e18556f5
--- /dev/null
+++ b/final/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt
@@ -0,0 +1,83 @@
+SUMMARY
+-------
+
+We met to discuss the LLVM instruction format and bytecode representation:
+
+ISSUES RESOLVED
+---------------
+
+1. We decided that we shall use a flat namespace to represent our 
+   variables in SSA form, as opposed to having a two dimensional namespace
+   of the original variable and the SSA instance subscript.
+
+ARGUMENT AGAINST:
+   * A two dimensional namespace would be valuable when doing alias 
+     analysis because the extra information can help limit the scope of
+     analysis.
+
+ARGUMENT FOR:
+   * Including this information would require that all users of the LLVM
+     bytecode would have to parse and handle it.  This would slow down the
+     common case and inflate the instruction representation with another
+     infinite variable space.
+
+REASONING:
+   * It was decided that because original variable sources could be
+     reconstructed from SSA form in linear time, that it would be an
+     unjustified expense for the common case to include the extra
+     information for one optimization.  Alias analysis itself is typically
+     greater than linear in asymptotic complexity, so this extra analaysis
+     would not affect the runtime of the optimization in a significant
+     way.  Additionally, this would be an unlikely optimization to do at
+     runtime.
+
+
+IDEAS TO CONSIDER
+-----------------
+
+1. Including dominator information in the LLVM bytecode
+   representation.  This is one example of an analysis result that may be
+   packaged with the bytecodes themselves.  As a conceptual implementation 
+   idea, we could include an immediate dominator number for each basic block
+   in the LLVM bytecode program.  Basic blocks could be numbered according
+   to the order of occurance in the bytecode representation.
+
+2. Including loop header and body information.  This would facilitate
+   detection of intervals and natural loops.
+
+UNRESOLVED ISSUES 
+----------------- 
+
+1. Will oSUIF provide enough of an infrastructure to support the research
+   that we will be doing?  We know that it has less than stellar
+   performance, but hope that this will be of little importance for our
+   static compiler.  This could affect us if we decided to do some IP
+   research.  Also we do not yet understand the level of exception support
+   currently implemented.
+
+2. Should we consider the requirements of a direct hardware implementation
+   of the LLVM when we design it?  If so, several design issues should
+   have their priorities shifted.  The other option is to focus on a
+   software layer interpreting the LLVM in all cases.
+
+3. Should we use some form of packetized format to improve forward
+   compatibility?  For example, we could design the system to encode a
+   packet type and length field before analysis information, to allow a
+   runtime to skip information that it didn't understand in a bytecode
+   stream.  The obvious benefit would be for compatibility, the drawback
+   is that it would tend to splinter that 'standard' LLVM definition.
+
+4. Should we use fixed length instructions or variable length
+   instructions?  Fetching variable length instructions is expensive (for
+   either hardware or software based LLVM runtimes), but we have several
+   'infinite' spaces that instructions operate in (SSA register numbers,
+   type spaces, or packet length [if packets were implemented]).  Several
+   options were mentioned including: 
+     A. Using 16 or 32 bit numbers, which would be 'big enough'
+     B. A scheme similar to how UTF-8 works, to encode infinite numbers
+        while keeping small number small.
+     C. Use something similar to Huffman encoding, so that the most common
+        numbers are the smallest.
+
+-Chris
+
diff --git a/final/docs/HistoricalNotes/2001-01-31-UniversalIRIdea.txt b/final/docs/HistoricalNotes/2001-01-31-UniversalIRIdea.txt
new file mode 100644
index 00000000000..111706a3447
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-01-31-UniversalIRIdea.txt
@@ -0,0 +1,39 @@
+Date: Wed, 31 Jan 2001 12:04:33 -0600
+From: Vikram S. Adve <vadve@cs.uiuc.edu>
+To: Chris Lattner <lattner@cs.uiuc.edu>
+Subject: another thought
+
+I have a budding idea about making LLVM a little more ambitious: a
+customizable runtime system that can be used to implement language-specific
+virtual machines for many different languages.  E.g., a C vm, a C++ vm, a
+Java vm, a Lisp vm, ..
+
+The idea would be that LLVM would provide a standard set of runtime features
+(some low-level like standard assembly instructions with code generation and
+static and runtime optimization; some higher-level like type-safety and
+perhaps a garbage collection library).  Each language vm would select the
+runtime features needed for that language, extending or customizing them as
+needed.  Most of the machine-dependent code-generation and optimization
+features as well as low-level machine-independent optimizations (like PRE)
+could be provided by LLVM and should be sufficient for any language,
+simplifying the language compiler.  (This would also help interoperability
+between languages.)  Also, some or most of the higher-level
+machine-independent features like type-safety and access safety should be
+reusable by different languages, with minor extensions.  The language
+compiler could then focus on language-specific analyses and optimizations.
+
+The risk is that this sounds like a universal IR -- something that the
+compiler community has tried and failed to develop for decades, and is
+universally skeptical about.  No matter what we say, we won't be able to
+convince anyone that we have a universal IR that will work.  We need to
+think about whether LLVM is different or if has something novel that might
+convince people.  E.g., the idea of providing a package of separable
+features that different languages select from.  Also, using SSA with or
+without type-safety as the intermediate representation.
+
+One interesting starting point would be to discuss how a JVM would be
+implemented on top of LLVM a bit more.  That might give us clues on how to
+structure LLVM to support one or more language VMs.
+
+--Vikram
+
diff --git a/final/docs/HistoricalNotes/2001-02-06-TypeNotationDebate.txt b/final/docs/HistoricalNotes/2001-02-06-TypeNotationDebate.txt
new file mode 100644
index 00000000000..c09cf1f03cc
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-02-06-TypeNotationDebate.txt
@@ -0,0 +1,67 @@
+Date: Tue, 6 Feb 2001 20:27:37 -0600 (CST)
+From: Chris Lattner <sabre@nondot.org>
+To: Vikram S. Adve <vadve@cs.uiuc.edu>
+Subject: Type notation debate...
+
+This is the way that I am currently planning on implementing types:
+
+Primitive Types:        
+type ::= void|bool|sbyte|ubyte|short|ushort|int|uint|long|ulong
+
+Method:
+typelist ::= typelisth | /*empty*/
+typelisth ::= type | typelisth ',' type
+type ::= type (typelist)
+
+Arrays (without and with size):
+type ::= '[' type ']' | '[' INT ',' type ']'
+
+Pointer:
+type ::= type '*'
+
+Structure:
+type ::= '{' typelist '}'
+
+Packed:
+type ::= '<' INT ',' type '>'
+
+Simple examples:
+
+[[ %4, int ]]   - array of (array of 4 (int))
+[ { int, int } ] - Array of structure
+[ < %4, int > ] - Array of 128 bit SIMD packets
+int (int, [[int, %4]])  - Method taking a 2d array and int, returning int
+
+
+Okay before you comment, please look at:
+
+http://www.research.att.com/~bs/devXinterview.html
+
+Search for "In another interview, you defined the C declarator syntax as
+an experiment that failed. However, this syntactic construct has been
+around for 27 years and perhaps more; why do you consider it problematic
+(except for its cumbersome syntax)?" and read that response for me.  :)
+
+Now with this syntax, his example would be represented as:
+
+[ %10, bool (int, int) * ] *
+
+vs 
+
+bool (*(*)[10])(int, int)
+
+in C.
+
+Basically, my argument for this type construction system is that it is
+VERY simple to use and understand (although it IS different than C, it is
+very simple and straightforward, which C is NOT).  In fact, I would assert
+that most programmers TODAY do not understand pointers to member
+functions, and have to look up an example when they have to write them.
+
+In my opinion, it is critically important to have clear and concise type
+specifications, because types are going to be all over the programs.
+
+Let me know your thoughts on this.  :)
+
+-Chris
+
diff --git a/final/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp1.txt b/final/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp1.txt
new file mode 100644
index 00000000000..8bfefbf69f6
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp1.txt
@@ -0,0 +1,75 @@
+Date: Thu, 8 Feb 2001 08:42:04 -0600
+From: Vikram S. Adve <vadve@cs.uiuc.edu>
+To: Chris Lattner <sabre@nondot.org>
+Subject: RE: Type notation debate...
+
+Chris,
+
+> Okay before you comment, please look at:
+>
+> http://www.research.att.com/~bs/devXinterview.html
+
+I read this argument.  Even before that, I was already in agreement with you
+and him that the C declarator syntax is difficult and confusing.
+
+But in fact, if you read the entire answer carefully, he came to the same
+conclusion I do: that you have to go with familiar syntax over logical
+syntax because familiarity is such a strong force:
+
+        "However, familiarity is a strong force. To compare, in English, we
+live
+more or less happily with the absurd rules for "to be" (am, are, is, been,
+was, were, ...) and all attempts to simplify are treated with contempt or
+(preferably) humor. It be a curious world and it always beed."
+
+> Basically, my argument for this type construction system is that it is
+> VERY simple to use and understand (although it IS different than C, it is
+> very simple and straightforward, which C is NOT).  In fact, I would assert
+> that most programmers TODAY do not understand pointers to member
+> functions, and have to look up an example when they have to write them.
+
+Again, I don't disagree with this at all.  But to some extent this
+particular problem is inherently difficult.  Your syntax for the above
+example may be easier for you to read because this is the way you have been
+thinking about it.  Honestly, I don't find it much easier than the C syntax.
+In either case, I would have to look up an example to write pointers to
+member functions.
+
+But pointers to member functions are nowhere near as common as arrays.  And
+the old array syntax:
+        type [ int, int, ...]
+is just much more familiar and clear to people than anything new you
+introduce, no matter how logical it is.  Introducing a new syntax that may
+make function pointers easier but makes arrays much more difficult seems
+very risky to me.
+
+> In my opinion, it is critically important to have clear and concise type
+> specifications, because types are going to be all over the programs.
+
+I absolutely agree.  But the question is, what is more clear and concise?
+The syntax programmers are used to out of years of experience or a new
+syntax that they have never seen that has a more logical structure.  I think
+the answer is the former.  Sometimes, you have to give up a better idea
+because you can't overcome sociological barriers to it.  Qwerty keyboards
+and Windows are two classic examples of bad technology that are difficult to
+root out.
+
+P.S.  Also, while I agree that most your syntax is more logical, there is
+one part that isn't:
+
+Arrays (without and with size):
+type ::= '[' type ']' | '[' INT ',' type ']'.
+
+The arrays with size lists the dimensions and the type in a single list.
+That is just too confusing:
+        [10, 40, int]
+This seems to be a 3-D array where the third dimension is something strange.
+It is too confusing to have a list of 3 things, some of which are dimensions
+and one is a type.  Either of the following would be better:
+
+        array [10, 40] of int
+or
+        int [10, 40]
+
+--Vikram
+
diff --git a/final/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp2.txt b/final/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp2.txt
new file mode 100644
index 00000000000..6e9784158a3
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp2.txt
@@ -0,0 +1,53 @@
+Date: Thu, 8 Feb 2001 14:31:05 -0600 (CST)
+From: Chris Lattner <sabre@nondot.org>
+To: Vikram S. Adve <vadve@cs.uiuc.edu>
+Subject: RE: Type notation debate...
+
+> Arrays (without and with size):
+> type ::= '[' type ']' | '[' INT ',' type ']'.
+> 
+> The arrays with size lists the dimensions and the type in a single list.
+> That is just too confusing:
+
+>       [10, 40, int]
+> This seems to be a 3-D array where the third dimension is something strange.
+> It is too confusing to have a list of 3 things, some of which are dimensions
+> and one is a type. 
+
+The above grammar indicates that there is only one integer parameter, ie
+the upper bound.  The lower bound is always implied to be zero, for
+several reasons:
+
+* As a low level VM, we want to expose addressing computations
+  explicitly.  Since the lower bound must always be known in a high level
+  language statically, the language front end can do the translation
+  automatically.
+* This fits more closely with what Java needs, ie what we need in the
+  short term.  Java arrays are always zero based.
+
+If a two element list is too confusing, I would recommend an alternate
+syntax of:
+
+type ::= '[' type ']' | '[' INT 'x' type ']'.
+
+For example:
+  [12 x int]
+  [12x int]
+  [ 12 x [ 4x int ]]
+
+Which is syntactically nicer, and more explicit.
+
+> Either of the following would be better:
+>       array [10, 40] of int
+
+I considered this approach for arrays in general (ie array of int/ array
+of 12 int), but found that it made declarations WAY too long.  Remember
+that because of the nature of llvm, you get a lot of types strewn all over
+the program, and using the 'typedef' like facility is not a wonderful
+option, because then types aren't explicit anymore.
+
+I find this email interesting, because you contradict the previous email
+you sent, where you recommend that we stick to C syntax....
+
+-Chris
+
diff --git a/final/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt b/final/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt
new file mode 100644
index 00000000000..7b9032742a2
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt
@@ -0,0 +1,89 @@
+> But in fact, if you read the entire answer carefully, he came to the same
+> conclusion I do: that you have to go with familiar syntax over logical
+> syntax because familiarity is such a strong force:
+>       "However, familiarity is a strong force. To compare, in English, we
+live
+> more or less happily with the absurd rules for "to be" (am, are, is, been,
+> was, were, ...) and all attempts to simplify are treated with contempt or
+> (preferably) humor. It be a curious world and it always beed."
+
+Although you have to remember that his situation was considerably
+different than ours.  He was in a position where he was designing a high
+level language that had to be COMPATIBLE with C.  Our language is such
+that a new person would have to learn the new, different, syntax
+anyways.  Making them learn about the type system does not seem like much
+of a stretch from learning the opcodes and how SSA form works, and how
+everything ties together...
+
+> > Basically, my argument for this type construction system is that it is
+> > VERY simple to use and understand (although it IS different than C, it is
+> > very simple and straightforward, which C is NOT).  In fact, I would assert
+> > that most programmers TODAY do not understand pointers to member
+> > functions, and have to look up an example when they have to write them.
+
+> Again, I don't disagree with this at all.  But to some extent this
+> particular problem is inherently difficult.  Your syntax for the above
+> example may be easier for you to read because this is the way you have been
+> thinking about it.  Honestly, I don't find it much easier than the C syntax.
+> In either case, I would have to look up an example to write pointers to
+> member functions.
+
+I would argue that because the lexical structure of the language is self
+consistent, any person who spent a significant amount of time programming
+in LLVM directly would understand how to do it without looking it up in a
+manual.  The reason this does not work for C is because you rarely have to
+declare these pointers, and the syntax is inconsistent with the method
+declaration and calling syntax.
+
+> But pointers to member functions are nowhere near as common as arrays.
+
+Very true.  If you're implementing an object oriented language, however,
+remember that you have to do all the pointer to member function stuff
+yourself.... so everytime you invoke a virtual method one is involved
+(instead of having C++ hide it for you behind "syntactic sugar").
+
+> And the old array syntax:
+>       type [ int, int, ...]
+> is just much more familiar and clear to people than anything new you
+> introduce, no matter how logical it is.  
+
+Erm... excuse me but how is this the "old array syntax"?  If you are
+arguing for consistency with C, you should be asking for 'type int []',
+which is significantly different than the above (beside the above
+introduces a new operator and duplicates information
+needlessly).  Basically what I am suggesting is exactly the above without
+the fluff.  So instead of:
+
+       type [ int, int, ...]
+
+you use:
+
+       type [ int ]
+
+> Introducing a new syntax that may
+> make function pointers easier but makes arrays much more difficult seems
+> very risky to me.
+
+This is not about function pointers.  This is about consistency in the
+type system, and consistency with the rest of the language.  The point
+above does not make arrays any more difficult to use, and makes the
+structure of types much more obvious than the "c way".
+
+> > In my opinion, it is critically important to have clear and concise type
+> > specifications, because types are going to be all over the programs.
+> 
+> I absolutely agree.  But the question is, what is more clear and concise?
+> The syntax programmers are used to out of years of experience or a new
+> syntax that they have never seen that has a more logical structure.  I think
+> the answer is the former.  Sometimes, you have to give up a better idea
+> because you can't overcome sociological barriers to it.  Qwerty keyboards
+> and Windows are two classic examples of bad technology that are difficult to
+> root out.
+
+Very true, but you seem to be advocating a completely different Type
+system than C has, in addition to it not offering the advantages of clear
+structure that the system I recommended does... so you seem to not have a
+problem with changing this, just with what I change it to.  :)
+
+-Chris
+
diff --git a/final/docs/HistoricalNotes/2001-02-09-AdveComments.txt b/final/docs/HistoricalNotes/2001-02-09-AdveComments.txt
new file mode 100644
index 00000000000..5503233c1ed
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-02-09-AdveComments.txt
@@ -0,0 +1,120 @@
+Ok, here are my comments and suggestions about the LLVM instruction set.
+We should discuss some now, but can discuss many of them later, when we
+revisit synchronization, type inference, and other issues.
+(We have discussed some of the comments already.)
+
+
+o  We should consider eliminating the type annotation in cases where it is
+   essentially obvious from the instruction type, e.g., in br, it is obvious
+   that the first arg. should be a bool and the other args should be labels:
+
+	br bool <cond>, label <iftrue>, label <iffalse>
+
+   I think your point was that making all types explicit improves clarity
+   and readability.  I agree to some extent, but it also comes at the cost
+   of verbosity.  And when the types are obvious from people's experience
+   (e.g., in the br instruction), it doesn't seem to help as much.
+
+
+o  On reflection, I really like your idea of having the two different switch
+   types (even though they encode implementation techniques rather than
+   semantics).  It should simplify building the CFG and my guess is it could
+   enable some significant optimizations, though we should think about which.
+
+
+o  In the lookup-indirect form of the switch, is there a reason not to make
+   the val-type uint?  Most HLL switch statements (including Java and C++)
+   require that anyway.  And it would also make the val-type uniform 
+   in the two forms of the switch.
+
+   I did see the switch-on-bool examples and, while cute, we can just use
+   the branch instructions in that particular case.
+
+
+o  I agree with your comment that we don't need 'neg'.
+
+
+o  There's a trade-off with the cast instruction:
+   +  it avoids having to define all the upcasts and downcasts that are
+      valid for the operands of each instruction  (you probably have thought
+      of other benefits also)
+   -  it could make the bytecode significantly larger because there could
+      be a lot of cast operations
+
+
+o  Making the second arg. to 'shl' a ubyte seems good enough to me.
+   255 positions seems adequate for several generations of machines
+   and is more compact than uint.
+
+
+o  I still have some major concerns about including malloc and free in the
+   language (either as builtin functions or instructions).  LLVM must be
+   able to represent code from many different languages.  Languages such as
+   C, C++ Java and Fortran 90 would not be able to use our malloc anyway
+   because each of them will want to provide a library implementation of it.
+
+   This gets even worse when code from different languages is linked
+   into a single executable (which is fairly common in large apps).
+   Having a single malloc would just not suffice, and instead would simply
+   complicate the picture further because it adds an extra variant in
+   addition to the one each language provides.
+
+   Instead, providing a default library version of malloc and free
+   (and perhaps a malloc_gc with garbage collection instead of free)
+   would make a good implementation available to anyone who wants it.
+
+   I don't recall all your arguments in favor so let's discuss this again,
+   and soon.
+
+
+o  'alloca' on the other hand sounds like a good idea, and the
+   implementation seems fairly language-independent so it doesn't have the
+   problems with malloc listed above.
+
+
+o  About indirect call:
+   Your option #2 sounded good to me.  I'm not sure I understand your
+   concern about an explicit 'icall' instruction?
+
+
+o  A pair of important synchronization instr'ns to think about:
+     load-linked
+     store-conditional
+
+
+o  Other classes of instructions that are valuable for pipeline performance:
+     conditional-move		 
+     predicated instructions
+
+
+o  I believe tail calls are relatively easy to identify; do you know why
+   .NET has a tailcall instruction?
+
+
+o  I agree that we need a static data space.  Otherwise, emulating global
+   data gets unnecessarily complex.
+
+
+o  About explicit parallelism:
+
+   We once talked about adding a symbolic thread-id field to each
+   instruction.  (It could be optional so single-threaded codes are
+   not penalized.)  This could map well to multi-threaded architectures
+   while providing easy ILP for single-threaded onces.  But it is probably
+   too radical an idea to include in a base version of LLVM.  Instead, it
+   could a great topic for a separate study.
+
+   What is the semantics of the IA64 stop bit?
+
+
+
+
+o  And finally, another thought about the syntax for arrays :-)
+
+   Although this syntax:
+	  array <dimension-list> of <type>
+   is verbose, it will be used only in the human-readable assembly code so
+   size should not matter.  I think we should consider it because I find it
+   to be the clearest syntax.  It could even make arrays of function
+   pointers somewhat readable.
+
diff --git a/final/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt b/final/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt
new file mode 100644
index 00000000000..5c87330fb7e
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt
@@ -0,0 +1,245 @@
+From: Chris Lattner <sabre@nondot.org>
+To: "Vikram S. Adve" <vadve@cs.uiuc.edu>
+Subject: Re: LLVM Feedback
+
+I've included your feedback in the /home/vadve/lattner/llvm/docs directory
+so that it will live in CVS eventually with the rest of LLVM.  I've
+significantly updated the documentation to reflect the changes you
+suggested, as specified below:
+
+> We should consider eliminating the type annotation in cases where it is
+> essentially obvious from the instruction type:
+>        br bool <cond>, label <iftrue>, label <iffalse>
+> I think your point was that making all types explicit improves clarity
+> and readability.  I agree to some extent, but it also comes at the
+> cost of verbosity.  And when the types are obvious from people's
+> experience (e.g., in the br instruction), it doesn't seem to help as
+> much.
+
+Very true.  We should discuss this more, but my reasoning is more of a
+consistency argument.  There are VERY few instructions that can have all
+of the types eliminated, and doing so when available unnecesarily makes
+the language more difficult to handle.  Especially when you see 'int
+%this' and 'bool %that' all over the place, I think it would be
+disorienting to see:
+
+  br %predicate, %iftrue, %iffalse
+
+for branches.  Even just typing that once gives me the creeps. ;)  Like I
+said, we should probably discuss this further in person...
+
+> On reflection, I really like your idea of having the two different
+> switch types (even though they encode implementation techniques rather
+> than semantics).  It should simplify building the CFG and my guess is it
+> could enable some significant optimizations, though we should think
+> about which.
+
+Great.  I added a note to the switch section commenting on how the VM
+should just use the instruction type as a hint, and that the
+implementation may choose altermate representations (such as predicated
+branches).
+
+> In the lookup-indirect form of the switch, is there a reason not to
+> make the val-type uint?
+
+No.  This was something I was debating for a while, and didn't really feel
+strongly about either way.  It is common to switch on other types in HLL's
+(for example signed int's are particually common), but in this case, all
+that will be added is an additional 'cast' instruction.  I removed that
+from the spec.
+
+> I agree with your comment that we don't need 'neg'
+
+Removed.
+
+> There's a trade-off with the cast instruction:
+>  +  it avoids having to define all the upcasts and downcasts that are
+>     valid for the operands of each instruction  (you probably have
+>     thought of other benefits also)
+>  -  it could make the bytecode significantly larger because there could
+>     be a lot of cast operations
+
+ + You NEED casts to represent things like:
+    void foo(float);
+    ...
+    int x;
+    ...
+    foo(x);
+   in a language like C.  Even in a Java like language, you need upcasts
+   and some way to implement dynamic downcasts.
+ + Not all forms of instructions take every type (for example you can't
+   shift by a floating point number of bits), thus SOME programs will need
+   implicit casts.
+
+To be efficient and to avoid your '-' point above, we just have to be
+careful to specify that the instructions shall operate on all common
+types, therefore casting should be relatively uncommon.  For example all
+of the arithmetic operations work on almost all data types.
+
+> Making the second arg. to 'shl' a ubyte seems good enough to me.
+> 255 positions seems adequate for several generations of machines
+
+Okay, that comment is removed.
+
+> and is more compact than uint.
+
+No, it isn't.  Remember that the bytecode encoding saves value slots into
+the bytecode instructions themselves, not constant values.  This is
+another case where we may introduce more cast instructions (but we will
+also reduce the number of opcode variants that must be supported by a
+virtual machine).  Because most shifts are by constant values, I don't
+think that we'll have to cast many shifts.  :)
+
+> I still have some major concerns about including malloc and free in the
+> language (either as builtin functions or instructions).
+
+Agreed.  How about this proposal:
+
+malloc/free are either built in functions or actual opcodes.  They provide
+all of the type safety that the document would indicate, blah blah
+blah. :)
+
+Now, because of all of the excellent points that you raised, an
+implementation may want to override the default malloc/free behavior of
+the program.  To do this, they simply implement a "malloc" and
+"free" function.  The virtual machine will then be defined to use the user
+defined malloc/free function (which return/take void*'s, not type'd
+pointers like the builtin function would) if one is available, otherwise
+fall back on a system malloc/free.
+
+Does this sound like a good compromise?  It would give us all of the
+typesafety/elegance in the language while still allowing the user to do
+all the cool stuff they want to...
+
+>  'alloca' on the other hand sounds like a good idea, and the
+>  implementation seems fairly language-independent so it doesn't have the
+>  problems with malloc listed above.
+
+Okay, once we get the above stuff figured out, I'll put it all in the
+spec.
+
+>  About indirect call:
+>  Your option #2 sounded good to me.  I'm not sure I understand your
+>  concern about an explicit 'icall' instruction?
+
+I worry too much.  :)  The other alternative has been removed. 'icall' is
+now up in the instruction list next to 'call'.
+
+> I believe tail calls are relatively easy to identify; do you know why
+> .NET has a tailcall instruction?
+
+Although I am just guessing, I believe it probably has to do with the fact
+that they want languages like Haskell and lisp to be efficiently runnable
+on their VM.  Of course this means that the VM MUST implement tail calls
+'correctly', or else life will suck.  :)  I would put this into a future
+feature bin, because it could be pretty handy...
+
+>  A pair of important synchronization instr'ns to think about:
+>    load-linked
+>    store-conditional
+
+What is 'load-linked'?  I think that (at least for now) I should add these
+to the 'possible extensions' section, because they are not immediately
+needed...
+
+> Other classes of instructions that are valuable for pipeline
+> performance:
+>    conditional-move            
+>    predicated instructions
+
+Conditional move is effectly a special case of a predicated
+instruction... and I think that all predicated instructions can possibly
+be implemented later in LLVM.  It would significantly change things, and
+it doesn't seem to be very necessary right now.  It would seem to
+complicate flow control analysis a LOT in the virtual machine.  I would
+tend to prefer that a predicated architecture like IA64 convert from a
+"basic block" representation to a predicated rep as part of it's dynamic
+complication phase.  Also, if a basic block contains ONLY a move, then
+that can be trivally translated into a conditional move...
+
+> I agree that we need a static data space.  Otherwise, emulating global
+> data gets unnecessarily complex.
+
+Definately.  Also a later item though.  :)
+
+> We once talked about adding a symbolic thread-id field to each
+> ..
+> Instead, it could a great topic for a separate study.
+
+Agreed.  :)
+
+> What is the semantics of the IA64 stop bit?
+
+Basically, the IA64 writes instructions like this:
+mov ...
+add ...
+sub ...
+op xxx
+op xxx
+;;
+mov ...
+add ...
+sub ...
+op xxx
+op xxx
+;;
+
+Where the ;; delimits a group of instruction with no dependencies between
+them, which can all be executed concurrently (to the limits of the
+available functional units).  The ;; gets translated into a bit set in one
+of the opcodes.
+
+The advantages of this representation is that you don't have to do some
+kind of 'thread id scheduling' pass by having to specify ahead of time how
+many threads to use, and the representation doesn't have a per instruction
+overhead...
+
+> And finally, another thought about the syntax for arrays :-)
+>  Although this syntax:
+>         array <dimension-list> of <type>
+>  is verbose, it will be used only in the human-readable assembly code so
+>  size should not matter.  I think we should consider it because I find it
+>  to be the clearest syntax.  It could even make arrays of function
+>  pointers somewhat readable.
+
+My only comment will be to give you an example of why this is a bad
+idea.  :)
+
+Here is an example of using the switch statement (with my recommended
+syntax):
+
+switch uint %val, label %otherwise, 
+       [%3 x {uint, label}] [ { uint %57, label %l1 }, 
+                              { uint %20, label %l2 }, 
+                              { uint %14, label %l3 } ]
+
+Here it is with the syntax you are proposing:
+
+switch uint %val, label %otherwise, 
+       array %3 of {uint, label} 
+              array of {uint, label}
+                              { uint %57, label %l1 },
+                              { uint %20, label %l2 },
+                              { uint %14, label %l3 }
+
+Which is ambiguous and very verbose. It would be possible to specify
+constants with [] brackets as in my syntax, which would look like this:
+
+switch uint %val, label %otherwise,
+       array %3 of {uint, label}  [ { uint %57, label %l1 },
+                                    { uint %20, label %l2 },
+                                    { uint %14, label %l3 } ]
+
+But then the syntax is inconsistent between type definition and constant
+definition (why do []'s enclose the constants but not the types??).  
+
+Anyways, I'm sure that there is much debate still to be had over
+this... :)
+
+-Chris
+
+http://www.nondot.org/~sabre/os/
+http://www.nondot.org/MagicStats/
+http://korbit.sourceforge.net/
+
+
diff --git a/final/docs/HistoricalNotes/2001-02-13-Reference-Memory.txt b/final/docs/HistoricalNotes/2001-02-13-Reference-Memory.txt
new file mode 100644
index 00000000000..2c7534d9da1
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-02-13-Reference-Memory.txt
@@ -0,0 +1,39 @@
+Date: Tue, 13 Feb 2001 13:29:52 -0600 (CST)
+From: Chris Lattner <sabre@nondot.org>
+To: Vikram S. Adve <vadve@cs.uiuc.edu>
+Subject: LLVM Concerns...
+
+
+I've updated the documentation to include load store and allocation
+instructions (please take a look and let me know if I'm on the right
+track):
+
+file:/home/vadve/lattner/llvm/docs/LangRef.html#memoryops
+
+I have a couple of concerns I would like to bring up:
+
+1. Reference types
+   Right now, I've spec'd out the language to have a pointer type, which
+   works fine for lots of stuff... except that Java really has
+   references: constrained pointers that cannot be manipulated: added and
+   subtracted, moved, etc... Do we want to have a type like this?  It
+   could be very nice for analysis (pointer always points to the start of
+   an object, etc...) and more closely matches Java semantics.  The
+   pointer type would be kept for C++ like semantics.  Through analysis,
+   C++ pointers could be promoted to references in the LLVM
+   representation.
+
+2. Our "implicit" memory references in assembly language:
+   After thinking about it, this model has two problems:
+      A. If you do pointer analysis and realize that two stores are
+         independent and can share the same memory source object, there is
+         no way to represent this in either the bytecode or assembly.
+      B. When parsing assembly/bytecode, we effectively have to do a full
+         SSA generation/PHI node insertion pass to build the dependencies
+         when we don't want the "pinned" representation.  This is not
+         cool.
+   I'm tempted to make memory references explicit in both the assembly and
+   bytecode to get around this... what do you think?
+
+-Chris
+
diff --git a/final/docs/HistoricalNotes/2001-02-13-Reference-MemoryResponse.txt b/final/docs/HistoricalNotes/2001-02-13-Reference-MemoryResponse.txt
new file mode 100644
index 00000000000..505343378df
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-02-13-Reference-MemoryResponse.txt
@@ -0,0 +1,47 @@
+Date: Tue, 13 Feb 2001 18:25:42 -0600
+From: Vikram S. Adve <vadve@cs.uiuc.edu>
+To: Chris Lattner <sabre@nondot.org>
+Subject: RE: LLVM Concerns...
+
+> 1. Reference types
+>    Right now, I've spec'd out the language to have a pointer type, which
+>    works fine for lots of stuff... except that Java really has
+>    references: constrained pointers that cannot be manipulated: added and
+>    subtracted, moved, etc... Do we want to have a type like this?  It
+>    could be very nice for analysis (pointer always points to the start of
+>    an object, etc...) and more closely matches Java semantics.  The
+>    pointer type would be kept for C++ like semantics.  Through analysis,
+>    C++ pointers could be promoted to references in the LLVM
+>    representation.
+
+
+You're right, having references would be useful.  Even for C++ the *static*
+compiler could generate references instead of pointers with fairly
+straightforward analysis.  Let's include a reference type for now.  But I'm
+also really concerned that LLVM is becoming big and complex and (perhaps)
+too high-level.  After we get some initial performance results, we may have
+a clearer idea of what our goals should be and we should revisit this
+question then.
+
+> 2. Our "implicit" memory references in assembly language:
+>    After thinking about it, this model has two problems:
+>       A. If you do pointer analysis and realize that two stores are
+>          independent and can share the same memory source object,
+
+not sure what you meant by "share the same memory source object"
+
+> there is
+>          no way to represent this in either the bytecode or assembly.
+>       B. When parsing assembly/bytecode, we effectively have to do a full
+>          SSA generation/PHI node insertion pass to build the dependencies
+>          when we don't want the "pinned" representation.  This is not
+>          cool.
+
+I understand the concern.  But again, let's focus on the performance first
+and then look at the language design issues.  E.g., it would be good to know
+how big the bytecode files are before expanding them further.  I am pretty
+keen to explore the implications of LLVM for mobile devices.  Both bytecode
+size and power consumption are important to consider there.
+
+--Vikram
+
diff --git a/final/docs/HistoricalNotes/2001-04-16-DynamicCompilation.txt b/final/docs/HistoricalNotes/2001-04-16-DynamicCompilation.txt
new file mode 100644
index 00000000000..5f7843ab563
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-04-16-DynamicCompilation.txt
@@ -0,0 +1,49 @@
+By Chris:
+
+LLVM has been designed with two primary goals in mind.  First we strive to 
+enable the best possible division of labor between static and dynamic 
+compilers, and second, we need a flexible and powerful interface 
+between these two complementary stages of compilation.  We feel that 
+providing a solution to these two goals will yield an excellent solution 
+to the performance problem faced by modern architectures and programming 
+languages.
+
+A key insight into current compiler and runtime systems is that a 
+compiler may fall in anywhere in a "continuum of compilation" to do its 
+job.  On one side, scripting languages statically compile nothing and 
+dynamically compile (or equivalently, interpret) everything.  On the far 
+other side, traditional static compilers process everything statically and 
+nothing dynamically.  These approaches have typically been seen as a 
+tradeoff between performance and portability.  On a deeper level, however, 
+there are two reasons that optimal system performance may be obtained by a
+system somewhere in between these two extremes: Dynamic application 
+behavior and social constraints.
+
+From a technical perspective, pure static compilation cannot ever give 
+optimal performance in all cases, because applications have varying dynamic
+behavior that the static compiler cannot take into consideration.  Even 
+compilers that support profile guided optimization generate poor code in 
+the real world, because using such optimization tunes that application 
+to one particular usage pattern, whereas real programs (as opposed to 
+benchmarks) often have several different usage patterns.
+
+On a social level, static compilation is a very shortsighted solution to 
+the performance problem.  Instruction set architectures (ISAs) continuously 
+evolve, and each implementation of an ISA (a processor) must choose a set 
+of tradeoffs that make sense in the market context that it is designed for.  
+With every new processor introduced, the vendor faces two fundamental 
+problems: First, there is a lag time between when a processor is introduced 
+to when compilers generate quality code for the architecture.  Secondly, 
+even when compilers catch up to the new architecture there is often a large 
+body of legacy code that was compiled for previous generations and will 
+not or can not be upgraded.  Thus a large percentage of code running on a 
+processor may be compiled quite sub-optimally for the current 
+characteristics of the dynamic execution environment.
+
+For these reasons, LLVM has been designed from the beginning as a long-term 
+solution to these problems.  Its design allows the large body of platform 
+independent, static, program optimizations currently in compilers to be 
+reused unchanged in their current form.  It also provides important static 
+type information to enable powerful dynamic and link time optimizations 
+to be performed quickly and efficiently.  This combination enables an 
+increase in effective system performance for real world environments.
diff --git a/final/docs/HistoricalNotes/2001-05-18-ExceptionHandling.txt b/final/docs/HistoricalNotes/2001-05-18-ExceptionHandling.txt
new file mode 100644
index 00000000000..b546301d35a
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-05-18-ExceptionHandling.txt
@@ -0,0 +1,202 @@
+Meeting notes: Implementation idea: Exception Handling in C++/Java
+
+The 5/18/01 meeting discussed ideas for implementing exceptions in LLVM.
+We decided that the best solution requires a set of library calls provided by
+the VM, as well as an extension to the LLVM function invocation syntax.
+
+The LLVM function invocation instruction previously looks like this (ignoring
+types):
+
+  call func(arg1, arg2, arg3)
+
+The extension discussed today adds an optional "with" clause that 
+associates a label with the call site.  The new syntax looks like this:
+
+  call func(arg1, arg2, arg3) with funcCleanup
+
+This funcHandler always stays tightly associated with the call site (being
+encoded directly into the call opcode itself), and should be used whenever
+there is cleanup work that needs to be done for the current function if 
+an exception is thrown by func (or if we are in a try block).
+
+To support this, the VM/Runtime provide the following simple library 
+functions (all syntax in this document is very abstract):
+
+typedef struct { something } %frame;
+  The VM must export a "frame type", that is an opaque structure used to 
+  implement different types of stack walking that may be used by various
+  language runtime libraries. We imagine that it would be typical to 
+  represent a frame with a PC and frame pointer pair, although that is not 
+  required.
+
+%frame getStackCurrentFrame();
+  Get a frame object for the current function.  Note that if the current
+  function was inlined into its caller, the "current" frame will belong to
+  the "caller".
+
+bool isFirstFrame(%frame f);
+  Returns true if the specified frame is the top level (first activated) frame
+  for this thread.  For the main thread, this corresponds to the main() 
+  function, for a spawned thread, it corresponds to the thread function.
+
+%frame getNextFrame(%frame f);
+  Return the previous frame on the stack.  This function is undefined if f
+  satisfies the predicate isFirstFrame(f).
+
+Label *getFrameLabel(%frame f);
+  If a label was associated with f (as discussed below), this function returns
+  it.  Otherwise, it returns a null pointer.
+
+doNonLocalBranch(Label *L);
+  At this point, it is not clear whether this should be a function or 
+  intrinsic.  It should probably be an intrinsic in LLVM, but we'll deal with
+  this issue later.
+
+
+Here is a motivating example that illustrates how these facilities could be
+used to implement the C++ exception model:
+
+void TestFunction(...) {
+  A a; B b;
+  foo();        // Any function call may throw
+  bar();
+  C c;
+
+  try {
+    D d;
+    baz();
+  } catch (int) {
+    ...int Stuff...
+    // execution continues after the try block: the exception is consumed
+  } catch (double) {
+    ...double stuff...
+   throw;            // Exception is propogated
+  }
+}
+
+This function would compile to approximately the following code (heavy 
+pseudo code follows):
+
+Func:
+  %a = alloca A
+  A::A(%a)        // These ctors & dtors could throw, but we ignore this 
+  %b = alloca B   // minor detail for this example
+  B::B(%b)
+
+  call foo() with fooCleanup // An exception in foo is propogated to fooCleanup
+  call bar() with barCleanup // An exception in bar is propogated to barCleanup
+
+  %c = alloca C
+  C::C(c)
+  %d = alloca D
+  D::D(d)
+  call baz() with bazCleanup // An exception in baz is propogated to bazCleanup
+  d->~D();
+EndTry:                   // This label corresponds to the end of the try block
+  c->~C()       // These could also throw, these are also ignored
+  b->~B()
+  a->~A()
+  return
+
+Note that this is a very straight forward and literal translation: exactly
+what we want for zero cost (when unused) exception handling.  Especially on
+platforms with many registers (ie, the IA64) setjmp/longjmp style exception
+handling is *very* impractical.  Also, the "with" clauses describe the 
+control flow paths explicitly so that analysis is not adversly effected.
+
+The foo/barCleanup labels are implemented as:
+
+TryCleanup:          // Executed if an exception escapes the try block  
+  c->~C()
+barCleanup:          // Executed if an exception escapes from bar()
+  // fall through
+fooCleanup:          // Executed if an exception escapes from foo()
+  b->~B()
+  a->~A()
+  Exception *E = getThreadLocalException()
+  call throw(E)      // Implemented by the C++ runtime, described below
+
+Which does the work one would expect.  getThreadLocalException is a function
+implemented by the C++ support library.  It returns the current exception 
+object for the current thread.  Note that we do not attempt to recycle the 
+shutdown code from before, because performance of the mainline code is 
+critically important.  Also, obviously fooCleanup and barCleanup may be 
+merged and one of them eliminated.  This just shows how the code generator 
+would most likely emit code.
+
+The bazCleanup label is more interesting.  Because the exception may be caught
+by the try block, we must dispatch to its handler... but it does not exist
+on the call stack (it does not have a VM Call->Label mapping installed), so 
+we must dispatch statically with a goto.  The bazHandler thus appears as:
+
+bazHandler:
+  d->~D();    // destruct D as it goes out of scope when entering catch clauses
+  goto TryHandler
+
+In general, TryHandler is not the same as bazHandler, because multiple 
+function calls could be made from the try block.  In this case, trivial 
+optimization could merge the two basic blocks.  TryHandler is the code 
+that actually determines the type of exception, based on the Exception object
+itself.  For this discussion, assume that the exception object contains *at
+least*:
+
+1. A pointer to the RTTI info for the contained object
+2. A pointer to the dtor for the contained object
+3. The contained object itself
+
+Note that it is necessary to maintain #1 & #2 in the exception object itself
+because objects without virtual function tables may be thrown (as in this 
+example).  Assuming this, TryHandler would look something like this:
+
+TryHandler: 
+  Exception *E = getThreadLocalException();
+  switch (E->RTTIType) {
+  case IntRTTIInfo:
+    ...int Stuff...       // The action to perform from the catch block
+    break;
+  case DoubleRTTIInfo:
+    ...double Stuff...    // The action to perform from the catch block
+    goto TryCleanup       // This catch block rethrows the exception
+    break;                // Redundant, eliminated by the optimizer
+  default:
+    goto TryCleanup       // Exception not caught, rethrow
+  }
+
+  // Exception was consumed
+  if (E->dtor)
+    E->dtor(E->object)    // Invoke the dtor on the object if it exists
+  goto EndTry             // Continue mainline code...
+
+And that is all there is to it.
+
+The throw(E) function would then be implemented like this (which may be 
+inlined into the caller through standard optimization):
+
+function throw(Exception *E) {
+  // Get the start of the stack trace...
+  %frame %f = call getStackCurrentFrame()
+
+  // Get the label information that corresponds to it
+  label * %L = call getFrameLabel(%f)
+  while (%L == 0 && !isFirstFrame(%f)) {
+    // Loop until a cleanup handler is found
+    %f = call getNextFrame(%f)
+    %L = call getFrameLabel(%f)
+  }
+
+  if (%L != 0) {
+    call setThreadLocalException(E)   // Allow handlers access to this...
+    call doNonLocalBranch(%L)
+  }
+  // No handler found!
+  call BlowUp()         // Ends up calling the terminate() method in use
+}
+
+That's a brief rundown of how C++ exception handling could be implemented in
+llvm.  Java would be very similar, except it only uses destructors to unlock
+synchronized blocks, not to destroy data.  Also, it uses two stack walks: a
+nondestructive walk that builds a stack trace, then a destructive walk that
+unwinds the stack as shown here. 
+
+It would be trivial to get exception interoperability between C++ and Java.
+
diff --git a/final/docs/HistoricalNotes/2001-05-19-ExceptionResponse.txt b/final/docs/HistoricalNotes/2001-05-19-ExceptionResponse.txt
new file mode 100644
index 00000000000..3375365f54c
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-05-19-ExceptionResponse.txt
@@ -0,0 +1,45 @@
+Date: Sat, 19 May 2001 19:09:13 -0500 (CDT)
+From: Chris Lattner <sabre@nondot.org>
+To: Vikram S. Adve <vadve@cs.uiuc.edu>
+Subject: RE: Meeting writeup
+
+> I read it through and it looks great!
+
+Thanks!
+
+> The finally clause in Java may need more thought.  The code for this clause
+> is like a subroutine because it needs to be entered from many points (end of
+> try block and beginning of each catch block), and then needs to *return to
+> the place from where the code was entered*.  That's why JVM has the
+> jsr/jsr_w instruction.
+
+Hrm... I guess that is an implementation decision.  It can either be
+modelled as a subroutine (as java bytecodes do), which is really
+gross... or it can be modelled as code duplication (emitted once inline,
+then once in the exception path).  Because this could, at worst,
+slightly less than double the amount of code in a function (it is
+bounded) I don't think this is a big deal.  One of the really nice things
+about the LLVM representation is that it still allows for runtime code
+generation for exception paths (exceptions paths are not compiled until
+needed).  Obviously a static compiler couldn't do this though.  :)
+
+In this case, only one copy of the code would be compiled... until the
+other one is needed on demand.  Also this strategy fits with the "zero
+cost" exception model... the standard case is not burdened with extra
+branches or "call"s.
+
+> I suppose you could save the return address in a particular register
+> (specific to this finally block), jump to the finally block, and then at the
+> end of the finally block, jump back indirectly through this register.  It
+> will complicate building the CFG but I suppose that can be handled.  It is
+> also unsafe in terms of checking where control returns (which is I suppose
+> why the JVM doesn't use this).
+
+I think that a code duplication method would be cleaner, and would avoid
+the caveats that you mention.  Also, it does not slow down the normal case
+with an indirect branch...
+
+Like everything, we can probably defer a final decision until later.  :)
+
+-Chris
+
diff --git a/final/docs/HistoricalNotes/2001-06-01-GCCOptimizations.txt b/final/docs/HistoricalNotes/2001-06-01-GCCOptimizations.txt
new file mode 100644
index 00000000000..97af16a2dad
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-06-01-GCCOptimizations.txt
@@ -0,0 +1,63 @@
+Date: Fri, 1 Jun 2001 16:38:17 -0500 (CDT)
+From: Chris Lattner <sabre@nondot.org>
+To: Vikram S. Adve <vadve@cs.uiuc.edu>
+Subject: Interesting: GCC passes
+
+
+Take a look at this document (which describes the order of optimizations
+that GCC performs):
+
+http://gcc.gnu.org/onlinedocs/gcc_17.html
+
+The rundown is that after RTL generation, the following happens:
+
+1 . [t] jump optimization (jumps to jumps, etc)
+2 . [t] Delete unreachable code
+3 .     Compute live ranges for CSE
+4 . [t] Jump threading (jumps to jumps with identical or inverse conditions)
+5 . [t] CSE
+6 . *** Conversion to SSA 
+7 . [t] SSA Based DCE
+8 . *** Conversion to LLVM
+9 .     UnSSA
+10.     GCSE
+11.     LICM
+12.     Strength Reduction
+13.     Loop unrolling
+14. [t] CSE
+15. [t] DCE
+16.     Instruction combination, register movement, scheduling... etc.
+
+I've marked optimizations with a [t] to indicate things that I believe to
+be relatively trivial to implement in LLVM itself.  The time consuming
+things to reimplement would be SSA based PRE, Strength reduction & loop
+unrolling... these would be the major things we would miss out on if we
+did LLVM creation from tree code [inlining and other high level
+optimizations are done on the tree representation].
+
+Given the lack of "strong" optimizations that would take a long time to
+reimplement, I am leaning a bit more towards creating LLVM from the tree
+code.  Especially given that SGI has GPL'd their compiler, including many
+SSA based optimizations that could be adapted (besides the fact that their
+code looks MUCH nicer than GCC :)
+
+Even if we choose to do LLVM code emission from RTL, we will almost
+certainly want to move LLVM emission from step 8 down until at least CSE
+has been rerun... which causes me to wonder if the SSA generation code
+will still work (due to global variable dependencies and stuff).  I assume
+that it can be made to work, but might be a little more involved than we
+would like.
+
+I'm continuing to look at the Tree -> RTL code.  It is pretty gross
+because they do some of the translation a statement at a time, and some
+of it a function at a time...  I'm not quite clear why and how the
+distinction is drawn, but it does not appear that there is a wonderful
+place to attach extra info.
+
+Anyways, I'm proceeding with the RTL -> LLVM conversion phase for now.  We
+can talk about this more on Monday.
+
+Wouldn't it be nice if there were a obvious decision to be made?  :)
+
+-Chris
+
diff --git a/final/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt b/final/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt
new file mode 100644
index 00000000000..6c9e0971a04
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt
@@ -0,0 +1,71 @@
+Date: Fri, 1 Jun 2001 17:08:44 -0500 (CDT)
+From: Chris Lattner <sabre@nondot.org>
+To: Vikram S. Adve <vadve@cs.uiuc.edu>
+Subject: RE: Interesting: GCC passes
+
+> That is very interesting.  I agree that some of these could be done on LLVM
+> at link-time, but it is the extra time required that concerns me.  Link-time
+> optimization is severely time-constrained.
+
+If we were to reimplement any of these optimizations, I assume that we
+could do them a translation unit at a time, just as GCC does now.  This
+would lead to a pipeline like this:
+
+Static optimizations, xlation unit at a time:
+.c --GCC--> .llvm --llvmopt--> .llvm 
+
+Link time optimizations:
+.llvm --llvm-ld--> .llvm --llvm-link-opt--> .llvm 
+
+Of course, many optimizations could be shared between llvmopt and
+llvm-link-opt, but the wouldn't need to be shared...  Thus compile time
+could be faster, because we are using a "smarter" IR (SSA based).
+
+> BTW, about SGI, "borrowing" SSA-based optimizations from one compiler and
+> putting it into another is not necessarily easier than re-doing it.
+> Optimization code is usually heavily tied in to the specific IR they use.
+
+Understood.  The only reason that I brought this up is because SGI's IR is
+more similar to LLVM than it is different in many respects (SSA based,
+relatively low level, etc), and could be easily adapted.  Also their
+optimizations are written in C++ and are actually somewhat
+structured... of course it would be no walk in the park, but it would be
+much less time consuming to adapt, say, SSA-PRE than to rewrite it.
+
+> But your larger point is valid that adding SSA based optimizations is
+> feasible and should be fun.  (Again, link time cost is the issue.)
+
+Assuming linktime cost wasn't an issue, the question is: 
+Does using GCC's backend buy us anything?
+
+> It also occurs to me that GCC is probably doing quite a bit of back-end
+> optimization (step 16 in your list).  Do you have a breakdown of that?
+
+Not really.  The irritating part of GCC is that it mixes it all up and
+doesn't have a clean seperation of concerns.  A lot of the "back end
+optimization" happens right along with other data optimizations (ie, CSE
+of machine specific things).
+
+As far as REAL back end optimizations go, it looks something like this:
+
+1. Instruction combination: try to make CISCy instructions, if available
+2. Register movement: try to get registers in the right places for the
+architecture to avoid register to register moves.  For example, try to get
+the first argument of a function to naturally land in %o0 for sparc.
+3. Instruction scheduling: 'nuff said :)
+4. Register class preferencing: ??
+5. Local register allocation
+6. global register allocation
+7. Spilling
+8. Local regalloc
+9. Jump optimization
+10. Delay slot scheduling
+11. Branch shorting for CISC machines
+12. Instruction selection & peephole optimization
+13. Debug info output
+
+But none of this would be usable for LLVM anyways, unless we were using
+GCC as a static compiler.
+
+-Chris
+
diff --git a/final/docs/HistoricalNotes/2001-06-20-.NET-Differences.txt b/final/docs/HistoricalNotes/2001-06-20-.NET-Differences.txt
new file mode 100644
index 00000000000..1bc2eae746c
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-06-20-.NET-Differences.txt
@@ -0,0 +1,30 @@
+Date: Wed, 20 Jun 2001 12:32:22 -0500
+From: Vikram Adve <vadve@cs.uiuc.edu>
+To: Chris Lattner <lattner@cs.uiuc.edu>
+Subject: .NET vs. our VM
+
+One significant difference between .NET CLR and our VM is that the CLR
+includes full information about classes and inheritance.  In fact, I just
+sat through the paper on adding templates to .NET CLR, and the speaker
+indicated that the goal seems to be to do simple static compilation (very
+little lowering or optimization).  Also, the templates implementation in CLR
+"relies on dynamic class loading and JIT compilation".
+
+This is an important difference because I think there are some significant
+advantages to have a much lower level VM layer, and do significant static
+analysis and optimization.
+
+I also talked to the lead guy for KAI's C++ compiler (Arch Robison) and he
+said that SGI and other commercial compilers have included options to export
+their *IR* next to the object code (i.e., .il files) and use them for
+link-time code generation.  In fact, he said that the .o file was nearly
+empty and was entirely generated from the .il at link-time.  But he agreed
+that this limited the link-time interprocedural optimization to modules
+compiled by the same compiler, whereas our approach allows us to link and
+optimize modules from multiple different compilers.  (Also, of course, they
+don't do anything for runtime optimization).
+
+All issues to bring up in Related Work.
+
+--Vikram
+
diff --git a/final/docs/HistoricalNotes/2001-07-06-LoweringIRForCodeGen.txt b/final/docs/HistoricalNotes/2001-07-06-LoweringIRForCodeGen.txt
new file mode 100644
index 00000000000..3e10416fe67
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-07-06-LoweringIRForCodeGen.txt
@@ -0,0 +1,31 @@
+Date: Fri, 6 Jul 2001 16:56:56 -0500
+From: Vikram S. Adve <vadve@cs.uiuc.edu>
+To: Chris Lattner <lattner@cs.uiuc.edu>
+Subject: lowering the IR
+
+BTW, I do think that we should consider lowering the IR as you said.  I
+didn't get time to raise it today, but it comes up with the SPARC
+move-conditional instruction.  I don't think we want to put that in the core
+VM -- it is a little too specialized.  But without a corresponding
+conditional move instruction in the VM, it is pretty difficult to maintain a
+close mapping between VM and machine code.  Other architectures may have
+other such instructions.
+
+What I was going to suggest was that for a particular processor, we define
+additional VM instructions that match some of the unusual opcodes on the
+processor but have VM semantics otherwise, i.e., all operands are in SSA
+form and typed.  This means that we can re-generate core VM code from the
+more specialized code any time we want (so that portability is not lost).
+
+Typically, a static compiler like gcc would generate just the core VM, which
+is relatively portable.  Anyone (an offline tool, the linker, etc., or even
+the static compiler itself if it chooses) can transform that into more
+specialized target-specific VM code for a particular architecture.  If the
+linker does it, it can do it after all machine-independent optimizations.
+This would be the most convenient, but not necessary.
+
+The main benefit of lowering will be that we will be able to retain a close
+mapping between VM and machine code.
+
+--Vikram
+
diff --git a/final/docs/HistoricalNotes/2001-09-18-OptimizeExceptions.txt b/final/docs/HistoricalNotes/2001-09-18-OptimizeExceptions.txt
new file mode 100644
index 00000000000..9379081018d
--- /dev/null
+++ b/final/docs/HistoricalNotes/2001-09-18-OptimizeExceptions.txt
@@ -0,0 +1,56 @@
+Date: Tue, 18 Sep 2001 00:38:37 -0500 (CDT)
+From: Chris Lattner <sabre@nondot.org>
+To: Vikram S. Adve <vadve@cs.uiuc.edu>
+Subject: Idea for a simple, useful link time optimization
+
+
+In C++ programs, exceptions suck, and here's why:
+
+1. In virtually all function calls, you must assume that the function
+   throws an exception, unless it is defined as 'nothrow'.  This means
+   that every function call has to have code to invoke dtors on objects
+   locally if one is thrown by the function.  Most functions don't throw
+   exceptions, so this code is dead [with all the bad effects of dead
+   code, including icache pollution].
+2. Declaring a function nothrow causes catch blocks to be added to every
+   call that isnot  provably nothrow.  This makes them very slow.
+3. Extra extraneous exception edges reduce the opportunity for code
+   motion.
+4. EH is typically implemented with large lookup tables.  Ours is going to
+   be much smaller (than the "standard" way of doing it) to start with,
+   but eliminating it entirely would be nice. :)
+5. It is physically impossible to correctly put (accurate, correct)
+   exception specifications on generic, templated code.  But it is trivial
+   to analyze instantiations of said code.
+6. Most large C++ programs throw few exceptions.  Most well designed
+   programs only throw exceptions in specific planned portions of the
+   code.
+
+Given our _planned_ model of handling exceptions, all of this would be
+pretty trivial to eliminate through some pretty simplistic interprocedural
+analysis.  The DCE factor alone could probably be pretty significant.  The
+extra code motion opportunities could also be exploited though...
+
+Additionally, this optimization can be implemented in a straight forward
+conservative manner, allowing libraries to be optimized or individual
+files even (if there are leaf functions visible in the translation unit
+that are called).
+
+I think it's a reasonable optimization that hasn't really been addressed
+(because assembly is way too low level for this), and could have decent
+payoffs... without being a overly complex optimization.
+
+After I wrote all of that, I found this page that is talking about
+basically the same thing I just wrote, except that it is translation unit
+at a time, tree based approach:
+http://www.ocston.org/~jls/ehopt.html
+
+but is very useful from "expected gain" and references perspective.  Note
+that their compiler is apparently unable to inline functions that use
+exceptions, so there numbers are pretty worthless... also our results
+would (hopefully) be better because it's interprocedural...
+
+What do you think?
+
+-Chris
+
diff --git a/final/docs/HistoricalNotes/2002-05-12-InstListChange.txt b/final/docs/HistoricalNotes/2002-05-12-InstListChange.txt
new file mode 100644
index 00000000000..004edb068d7
--- /dev/null
+++ b/final/docs/HistoricalNotes/2002-05-12-InstListChange.txt
@@ -0,0 +1,55 @@
+Date: Sun, 12 May 2002 17:12:53 -0500 (CDT)
+From: Chris Lattner <sabre@nondot.org>
+To: "Vikram S. Adve" <vadve@cs.uiuc.edu>
+Subject: LLVM change
+
+There is a fairly fundemental change that I would like to make to the LLVM 
+infrastructure, but I'd like to know if you see any drawbacks that I 
+don't...
+
+Basically right now at the basic block level, each basic block contains an 
+instruction list (returned by getInstList()) that is a ValueHolder of 
+instructions.  To iterate over instructions, we must actually iterate over 
+the instlist, and access the instructions through the instlist.
+
+To add or remove an instruction from a basic block, we need to get an 
+iterator to an instruction, which, given just an Instruction*, requires a 
+linear search of the basic block the instruction is contained in... just 
+to insert an instruction before another instruction, or to delete an 
+instruction!  This complicates algorithms that should be very simple (like 
+simple constant propogation), because they aren't actually sparse anymore, 
+they have to traverse basic blocks to remove constant propogated 
+instructions.
+
+Additionally, adding or removing instructions to a basic block 
+_invalidates all iterators_ pointing into that block, which is really 
+irritating.
+
+To fix these problems (and others), I would like to make the ordering of
+the instructions be represented with a doubly linked list in the
+instructions themselves, instead of an external data structure.  This is 
+how many other representations do it, and frankly I can't remember why I 
+originally implemented it the way I did.
+
+Long term, all of the code that depends on the nasty features in the 
+instruction list (which can be found by grep'ing for getInstList()) will 
+be changed to do nice local transformations.  In the short term, I'll 
+change the representation, but preserve the interface (including 
+getInstList()) so that all of the code doesn't have to change.
+
+Iteration over the instructions in a basic block remains the simple:
+for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) ...
+
+But we will also support:
+for (Instruction *I = BB->front(); I; I = I->getNext()) ...
+
+After converting instructions over, I'll convert basic blocks and 
+functions to have a similar interface.
+
+The only negative aspect of this change that I see is that it increases 
+the amount of memory consumed by one pointer per instruction.  Given the 
+benefits, I think this is a very reasonable tradeoff. 
+
+What do you think?
+
+-Chris
diff --git a/final/docs/HistoricalNotes/2002-06-25-MegaPatchInfo.txt b/final/docs/HistoricalNotes/2002-06-25-MegaPatchInfo.txt
new file mode 100644
index 00000000000..2ca46117ca8
--- /dev/null
+++ b/final/docs/HistoricalNotes/2002-06-25-MegaPatchInfo.txt
@@ -0,0 +1,72 @@
+Changes:
+* Change the casting code to be const correct.  Now, doing this is invalid:
+     const Value *V = ...;
+     Instruction *I = dyn_cast<Instruction>(V);
+  instead, the second line should be:
+     const Instruction *I = dyn_cast<Instruction>(V);
+
+* Change the casting code to allow casting a reference value thus:
+     const Value &V = ...;
+     Instruction &I = cast<Instruction>(V);
+
+  dyn_cast does not work with references, because it must return a null pointer
+  on failure.
+
+* Fundamentally change how instructions and other values are represented.
+  Before, every llvm container was an instance of the ValueHolder template,
+  instantiated for each container type.  This ValueHolder was effectively a
+  wrapper around a vector of pointers to the sub-objects.
+
+  Now, instead of having a vector to pointers of objects, the objects are
+  maintained in a doubly linked list of values (ie each Instruction now has
+  Next & Previous fields).  The containers are now instances of ilist (intrusive
+  linked list class), which use the next and previous fields to chain them
+  together.  The advantage of this implementation is that iterators can be
+  formed directly from pointers to the LLVM value, and invalidation is much
+  easier to handle.
+
+* As part of the above change, dereferencing an iterator (for example:
+  BasicBlock::iterator) now produces a reference to the underlying type (same
+  example: Instruction&) instead of a pointer to the underlying object.  This
+  makes it much easier to write nested loops that iterator over things, changing
+  this:
+
+    for (Function::iterator BI = Func->begin(); BI != Func->end(); ++BI)
+      for (BasicBlock::iterator II = (*BI)->begin(); II != (*BI)->end(); ++II)
+        (*II)->dump();
+
+  into:
+
+    for (Function::iterator BI = Func->begin(); BI != Func->end(); ++BI)
+      for (BasicBlock::iterator II = BI->begin(); II != BI->end(); ++II)
+        II->dump();
+
+  which is much more natural and what users expect.
+
+* Simplification of #include's: Before, it was necessary for a .cpp file to
+  include every .h file that it used.  Now things are batched a little bit more
+  to make it easier to use.  Specifically, the include graph now includes these
+  edges:
+    Module.h -> Function.h, GlobalVariable.h
+    Function.h -> BasicBlock.h, Argument.h
+    BasicBlock.h -> Instruction.h
+
+  Which means that #including Function.h is usually sufficient for getting the
+  lower level #includes.
+
+* Printing out a Value* has now changed: Printing a Value* will soon print out
+  the address of the value instead of the contents of the Value.  To print out
+  the contents, you must convert it to a reference with (for example)
+  'cout << *I' instead of 'cout << I;'.  This conversion is not yet complete,
+  but will be eventually.  In the mean time, both forms print out the contents.
+
+* References are used much more throughout the code base.  In general, if a
+  pointer is known to never be null, it is passed in as a reference instead of a
+  pointer.  For example, the instruction visitor class uses references instead
+  of pointers, and that Pass subclasses now all receive references to Values
+  instead of pointers, because they may never be null.
+
+* The Function class now has helper functions for accessing the Arguments list.
+  Instead of having to go through getArgumentList for simple things like
+  iterator over the arguments, now the a*() methods can be used to access them.
+
diff --git a/final/docs/HistoricalNotes/2003-01-23-CygwinNotes.txt b/final/docs/HistoricalNotes/2003-01-23-CygwinNotes.txt
new file mode 100644
index 00000000000..fbe811d627f
--- /dev/null
+++ b/final/docs/HistoricalNotes/2003-01-23-CygwinNotes.txt
@@ -0,0 +1,28 @@
+Date: Mon, 20 Jan 2003 00:00:28 -0600
+From: Brian R. Gaeke <gaeke@uiuc.edu>
+Subject: windows vs. llvm
+
+If you're interested, here are some of the major problems compiling LLVM
+under Cygwin and/or Mingw.
+
+1. Cygwin doesn't have <inttypes.h> or <stdint.h>, so all the INT*_MAX
+   symbols and standard int*_t types are off in limbo somewhere. Mingw has
+   <stdint.h>, but Cygwin doesn't like it.
+
+2. Mingw doesn't have <dlfcn.h> (because Windows doesn't have it.)
+
+3. SA_SIGINFO and friends are not around; only signal() seems to work.
+
+4. Relink, aka ld -r, doesn't work (probably an ld bug); you need
+   DONT_BUILD_RELINKED. This breaks all the tools makefiles; you just need to
+   change them to have .a's.
+
+5. There isn't a <values.h>.
+
+6. There isn't a mallinfo() (or, at least, it's documented, but it doesn't seem
+   to link).
+
+7. The version of Bison that cygwin (and newer Linux versions) comes with
+   does not like = signs in rules. Burg's gram.yc source file uses them. I think
+   you can just take them out.
+
diff --git a/final/docs/HistoricalNotes/2003-06-25-Reoptimizer1.txt b/final/docs/HistoricalNotes/2003-06-25-Reoptimizer1.txt
new file mode 100644
index 00000000000..a7457846395
--- /dev/null
+++ b/final/docs/HistoricalNotes/2003-06-25-Reoptimizer1.txt
@@ -0,0 +1,137 @@
+Wed Jun 25 15:13:51 CDT 2003
+
+First-level instrumentation
+---------------------------
+
+We use opt to do Bytecode-to-bytecode instrumentation. Look at
+back-edges and insert llvm_first_trigger() function call which takes
+no arguments and no return value. This instrumentation is designed to
+be easy to remove, for instance by writing a NOP over the function
+call instruction.
+
+Keep count of every call to llvm_first_trigger(), and maintain
+counters in a map indexed by return address. If the trigger count
+exceeds a threshold, we identify a hot loop and perform second-level
+instrumentation on the hot loop region (the instructions between the
+target of the back-edge and the branch that causes the back-edge).  We
+do not move code across basic-block boundaries.
+
+
+Second-level instrumentation
+---------------------------
+
+We remove the first-level instrumentation by overwriting the CALL to
+llvm_first_trigger() with a NOP.
+
+The reoptimizer maintains a map between machine-code basic blocks and
+LLVM BasicBlock*s.  We only keep track of paths that start at the
+first machine-code basic block of the hot loop region.
+
+How do we keep track of which edges to instrument, and which edges are
+exits from the hot region? 3 step process.
+
+1) Do a DFS from the first machine-code basic block of the hot loop
+region and mark reachable edges.
+
+2) Do a DFS from the last machine-code basic block of the hot loop
+region IGNORING back edges, and mark the edges which are reachable in
+1) and also in 2) (i.e., must be reachable from both the start BB and
+the end BB of the hot region).
+
+3) Mark BBs which end in edges that exit the hot region; we need to
+instrument these differently.
+
+Assume that there is 1 free register. On SPARC we use %g1, which LLC
+has agreed not to use.  Shift a 1 into it at the beginning. At every
+edge which corresponds to a conditional branch, we shift 0 for not
+taken and 1 for taken into a register. This uniquely numbers the paths
+through the hot region. Silently fail if we need more than 64 bits.
+
+At the end BB we call countPath and increment the counter based on %g1
+and the return address of the countPath call.  We keep track of the
+number of iterations and the number of paths.  We only run this
+version 30 or 40 times.
+
+Find the BBs that total 90% or more of execution, and aggregate them
+together to form our trace. But we do not allow more than 5 paths; if
+we have more than 5 we take the ones that are executed the most.  We
+verify our assumption that we picked a hot back-edge in first-level
+instrumentation, by making sure that the number of times we took an
+exit edge from the hot trace is less than 10% of the number of
+iterations.
+
+LLC has been taught to recognize llvm_first_trigger() calls and NOT
+generate saves and restores of caller-saved registers around these
+calls.
+
+
+Phase behavior
+--------------
+
+We turn off llvm_first_trigger() calls with NOPs, but this would hide
+phase behavior from us (when some funcs/traces stop being hot and
+others become hot.)
+
+We have a SIGALRM timer that counts time for us. Every time we get a
+SIGALRM we look at our priority queue of locations where we have
+removed llvm_first_trigger() calls. Each location is inserted along
+with a time when we will next turn instrumentation back on for that
+call site. If the time has arrived for a particular call site, we pop
+that off the prio. queue and turn instrumentation back on for that
+call site.
+
+
+Generating traces
+-----------------
+
+When we finally generate an optimized trace we first copy the code
+into the trace cache. This leaves us with 3 copies of the code: the
+original code, the instrumented code, and the optimized trace. The
+optimized trace does not have instrumentation. The original code and
+the instrumented code are modified to have a branch to the trace
+cache, where the optimized traces are kept.
+
+We copy the code from the original to the instrumentation version
+by tracing the LLVM-to-Machine code basic block map and then copying
+each machine code basic block we think is in the hot region into the
+trace cache. Then we instrument that code. The process is similar for
+generating the final optimized trace; we copy the same basic blocks
+because we might need to put in fixup code for exit BBs.
+
+LLVM basic blocks are not typically used in the Reoptimizer except
+for the mapping information.
+
+We are restricted to using single instructions to branch between the
+original code, trace, and instrumented code. So we have to keep the
+code copies in memory near the original code (they can't be far enough
+away that a single pc-relative branch would not work.) Malloc() or
+data region space is too far away. this impacts the design of the 
+trace cache.
+
+We use a dummy function that is full of a bunch of for loops which we
+overwrite with trace-cache code. The trace manager keeps track of
+whether or not we have enough space in the trace cache, etc.
+
+The trace insertion routine takes an original start address, a vector
+of machine instructions representing the trace, index of branches and
+their corresponding absolute targets, and index of calls and their
+corresponding absolute targets.
+
+The trace insertion routine is responsible for inserting branches from
+the beginning of the original code to the beginning of the optimized
+trace. This is because at some point the trace cache may run out of
+space and it may have to evict a trace, at which point the branch to
+the trace would also have to be removed. It uses a round-robin
+replacement policy; we have found that this is almost as good as LRU
+and better than random (especially because of problems fitting the new
+trace in.)
+
+We cannot deal with discontiguous trace cache areas.  The trace cache
+is supposed to be cache-line-aligned, but it is not page-aligned.
+
+We generate instrumentation traces and optimized traces into separate
+trace caches. We keep the instrumented code around because you don't
+want to delete a trace when you still might have to return to it
+(i.e., return from a llvm_first_trigger() or countPath() call.)
+
+
diff --git a/final/docs/HistoricalNotes/2003-06-26-Reoptimizer2.txt b/final/docs/HistoricalNotes/2003-06-26-Reoptimizer2.txt
new file mode 100644
index 00000000000..ec4b93fea0a
--- /dev/null
+++ b/final/docs/HistoricalNotes/2003-06-26-Reoptimizer2.txt
@@ -0,0 +1,110 @@
+Thu Jun 26 14:43:04 CDT 2003
+
+Information about BinInterface
+------------------------------
+
+Take in a set of instructions with some particular register
+allocation. It allows you to add, modify, or delete some instructions,
+in SSA form (kind of like LLVM's MachineInstrs.) Then re-allocate
+registers. It assumes that the transformations you are doing are safe.
+It does not update the mapping information or the LLVM representation
+for the modified trace (so it would not, for instance, support
+multiple optimization passes; passes have to be aware of and update
+manually the mapping information.)
+
+The way you use it is you take the original code and provide it to
+BinInterface; then you do optimizations to it, then you put it in the
+trace cache.
+
+The BinInterface tries to find live-outs for traces so that it can do
+register allocation on just the trace, and stitch the trace back into
+the original code. It has to preserve the live-ins and live-outs when
+it does its register allocation.  (On exits from the trace we have
+epilogues that copy live-outs back into the right registers, but
+live-ins have to be in the right registers.)
+
+
+Limitations of BinInterface
+---------------------------
+
+It does copy insertions for PHIs, which it infers from the machine
+code. The mapping info inserted by LLC is not sufficient to determine
+the PHIs.
+
+It does not handle integer or floating-point condition codes and it
+does not handle floating-point register allocation.
+
+It is not aggressively able to use lots of registers.
+
+There is a problem with alloca: we cannot find our spill space for
+spilling registers, normally allocated on the stack, if the trace
+follows an alloca(). What might be an acceptable solution would be to
+disable trace generation on functions that have variable-sized
+alloca()s. Variable-sized allocas in the trace would also probably
+screw things up.
+
+Because of the FP and alloca limitations, the BinInterface is
+completely disabled right now.
+
+
+Demo
+----
+
+This is a demo of the Ball & Larus version that does NOT use 2-level
+profiling.
+
+1. Compile program with llvm-gcc.
+2. Run opt -lowerswitch -paths -emitfuncs on the bytecode.
+   -lowerswitch change switch statements to branches
+   -paths       Ball & Larus path-profiling algorithm
+   -emitfuncs   emit the table of functions
+3. Run llc to generate SPARC assembly code for the result of step 2.
+4. Use g++ to link the (instrumented) assembly code.
+
+We use a script to do all this:
+------------------------------------------------------------------------------
+#!/bin/sh
+llvm-gcc $1.c -o $1
+opt -lowerswitch -paths -emitfuncs $1.bc > $1.run.bc
+llc -f $1.run.bc 
+LIBS=$HOME/llvm_sparc/lib/Debug
+GXX=/usr/dcs/software/evaluation/bin/g++
+$GXX -g -L $LIBS $1.run.s -o $1.run.llc \
+$LIBS/tracecache.o \
+$LIBS/mapinfo.o \
+$LIBS/trigger.o \
+$LIBS/profpaths.o \
+$LIBS/bininterface.o \
+$LIBS/support.o \
+$LIBS/vmcore.o \
+$LIBS/transformutils.o \
+$LIBS/bcreader.o \
+-lscalaropts -lscalaropts -lanalysis \
+-lmalloc -lcpc -lm -ldl
+------------------------------------------------------------------------------
+
+5. Run the resulting binary.  You will see output from BinInterface
+(described below) intermixed with the output from the program.
+
+
+Output from BinInterface
+------------------------
+
+BinInterface's debugging code prints out the following stuff in order:
+
+1. Initial code provided to BinInterface with original register
+allocation.
+
+2. Section 0 is the trace prolog, consisting mainly of live-ins and
+register saves which will be restored in epilogs.
+
+3. Section 1 is the trace itself, in SSA form used by BinInterface,
+along with the PHIs that are inserted.
+PHIs are followed by the copies that implement them.
+Each branch (i.e., out of the trace) is annotated with the
+section number that represents the epilog it branches to.
+
+4. All the other sections starting with Section 2 are trace epilogs.
+Every branch from the trace has to go to some epilog.
+
+5. After the last section is the register allocation output.
diff --git a/final/docs/HistoricalNotes/2007-OriginalClangReadme.txt b/final/docs/HistoricalNotes/2007-OriginalClangReadme.txt
new file mode 100644
index 00000000000..611dc9d2c01
--- /dev/null
+++ b/final/docs/HistoricalNotes/2007-OriginalClangReadme.txt
@@ -0,0 +1,178 @@
+//===----------------------------------------------------------------------===//
+// C Language Family Front-end
+//===----------------------------------------------------------------------===//
+                                                             Chris Lattner
+
+I. Introduction:
+ 
+ clang: noun
+    1. A loud, resonant, metallic sound.
+    2. The strident call of a crane or goose.
+    3. C-language family front-end toolkit.
+
+ The world needs better compiler tools, tools which are built as libraries. This
+ design point allows reuse of the tools in new and novel ways. However, building
+ the tools as libraries isn't enough: they must have clean APIs, be as
+ decoupled from each other as possible, and be easy to modify/extend.  This
+ requires clean layering, decent design, and avoiding tying the libraries to a
+ specific use.  Oh yeah, did I mention that we want the resultant libraries to
+ be as fast as possible? :)
+
+ This front-end is built as a component of the LLVM toolkit that can be used
+ with the LLVM backend or independently of it.  In this spirit, the API has been
+ carefully designed as the following components:
+ 
+   libsupport  - Basic support library, reused from LLVM.
+
+   libsystem   - System abstraction library, reused from LLVM.
+   
+   libbasic    - Diagnostics, SourceLocations, SourceBuffer abstraction,
+                 file system caching for input source files.  This depends on
+                 libsupport and libsystem.
+
+   libast      - Provides classes to represent the C AST, the C type system,
+                 builtin functions, and various helpers for analyzing and
+                 manipulating the AST (visitors, pretty printers, etc).  This
+                 library depends on libbasic.
+
+
+   liblex      - C/C++/ObjC lexing and preprocessing, identifier hash table,
+                 pragma handling, tokens, and macros.  This depends on libbasic.
+
+   libparse    - C (for now) parsing and local semantic analysis. This library
+                 invokes coarse-grained 'Actions' provided by the client to do
+                 stuff (e.g. libsema builds ASTs).  This depends on liblex.
+
+   libsema     - Provides a set of parser actions to build a standardized AST
+                 for programs.  AST's are 'streamed' out a top-level declaration
+                 at a time, allowing clients to use decl-at-a-time processing,
+                 build up entire translation units, or even build 'whole
+                 program' ASTs depending on how they use the APIs.  This depends
+                 on libast and libparse.
+
+   librewrite  - Fast, scalable rewriting of source code.  This operates on
+                 the raw syntactic text of source code, allowing a client
+                 to insert and delete text in very large source files using
+                 the same source location information embedded in ASTs.  This
+                 is intended to be a low-level API that is useful for
+                 higher-level clients and libraries such as code refactoring.
+
+   libanalysis - Source-level dataflow analysis useful for performing analyses
+                 such as computing live variables.  It also includes a
+                 path-sensitive "graph-reachability" engine for writing
+                 analyses that reason about different possible paths of
+                 execution through source code.  This is currently being
+                 employed to write a set of checks for finding bugs in software.
+
+   libcodegen  - Lower the AST to LLVM IR for optimization & codegen.  Depends
+                 on libast.
+                 
+   clang       - An example driver, client of the libraries at various levels.
+                 This depends on all these libraries, and on LLVM VMCore.
+
+ This front-end has been intentionally built as a DAG of libraries, making it
+ easy to  reuse individual parts or replace pieces if desired. For example, to
+ build a preprocessor, you take the Basic and Lexer libraries. If you want an
+ indexer, you take those plus the Parser library and provide some actions for
+ indexing.  If you want a refactoring, static analysis, or source-to-source
+ compiler tool, it makes sense to take those plus the AST building and semantic
+ analyzer library.  Finally, if you want to use this with the LLVM backend,
+ you'd take these components plus the AST to LLVM lowering code.
+ 
+ In the future I hope this toolkit will grow to include new and interesting
+ components, including a C++ front-end, ObjC support, and a whole lot of other
+ things.
+
+ Finally, it should be pointed out that the goal here is to build something that
+ is high-quality and industrial-strength: all the obnoxious features of the C
+ family must be correctly supported (trigraphs, preprocessor arcana, K&R-style
+ prototypes, GCC/MS extensions, etc).  It cannot be used if it is not 'real'.
+
+
+II. Usage of clang driver:
+
+ * Basic Command-Line Options:
+   - Help: clang --help
+   - Standard GCC options accepted: -E, -I*, -i*, -pedantic, -std=c90, etc.
+   - To make diagnostics more gcc-like: -fno-caret-diagnostics -fno-show-column
+   - Enable metric printing: -stats
+
+ * -fsyntax-only is currently the default mode.
+
+ * -E mode works the same way as GCC.
+
+ * -Eonly mode does all preprocessing, but does not print the output,
+     useful for timing the preprocessor.
+ 
+ * -fsyntax-only is currently partially implemented, lacking some
+     semantic analysis (some errors and warnings are not produced).
+
+ * -parse-noop parses code without building an AST.  This is useful
+     for timing the cost of the parser without including AST building
+     time.
+ 
+ * -parse-ast builds ASTs, but doesn't print them.  This is most
+     useful for timing AST building vs -parse-noop.
+ 
+ * -parse-ast-print pretty prints most expression and statements nodes.
+
+ * -parse-ast-check checks that diagnostic messages that are expected
+     are reported and that those which are reported are expected.
+
+ * -dump-cfg builds ASTs and then CFGs.  CFGs are then pretty-printed.
+
+ * -view-cfg builds ASTs and then CFGs.  CFGs are then visualized by
+     invoking Graphviz.
+
+     For more information on getting Graphviz to work with clang/LLVM,
+     see: http://llvm.org/docs/ProgrammersManual.html#ViewGraph
+
+
+III. Current advantages over GCC:
+
+ * Column numbers are fully tracked (no 256 col limit, no GCC-style pruning).
+ * All diagnostics have column numbers, includes 'caret diagnostics', and they
+   highlight regions of interesting code (e.g. the LHS and RHS of a binop).
+ * Full diagnostic customization by client (can format diagnostics however they
+   like, e.g. in an IDE or refactoring tool) through DiagnosticClient interface.
+ * Built as a framework, can be reused by multiple tools.
+ * All languages supported linked into same library (no cc1,cc1obj, ...).
+ * mmap's code in read-only, does not dirty the pages like GCC (mem footprint).
+ * LLVM License, can be linked into non-GPL projects.
+ * Full diagnostic control, per diagnostic.  Diagnostics are identified by ID.
+ * Significantly faster than GCC at semantic analysis, parsing, preprocessing
+   and lexing.
+ * Defers exposing platform-specific stuff to as late as possible, tracks use of
+   platform-specific features (e.g. #ifdef PPC) to allow 'portable bytecodes'.
+ * The lexer doesn't rely on the "lexer hack": it has no notion of scope and
+   does not categorize identifiers as types or variables -- this is up to the
+   parser to decide.
+
+Potential Future Features:
+
+ * Fine grained diag control within the source (#pragma enable/disable warning).
+ * Better token tracking within macros?  (Token came from this line, which is
+   a macro argument instantiated here, recursively instantiated here).
+ * Fast #import with a module system.
+ * Dependency tracking: change to header file doesn't recompile every function
+   that texually depends on it: recompile only those functions that need it.
+   This is aka 'incremental parsing'.
+
+
+IV. Missing Functionality / Improvements
+
+Lexer:
+ * Source character mapping.  GCC supports ASCII and UTF-8.
+   See GCC options: -ftarget-charset and -ftarget-wide-charset.
+ * Universal character support.  Experimental in GCC, enabled with
+   -fextended-identifiers.
+ * -fpreprocessed mode.
+
+Preprocessor:
+ * #assert/#unassert
+ * MSExtension: "L#param" stringizes to a wide string literal.
+ * Add support for -M*
+
+Traditional Preprocessor:
+ * Currently, we have none. :)
+
diff --git a/final/docs/HowToReleaseLLVM.html b/final/docs/HowToReleaseLLVM.html
new file mode 100644
index 00000000000..a428ddfbe0a
--- /dev/null
+++ b/final/docs/HowToReleaseLLVM.html
@@ -0,0 +1,626 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>How To Release LLVM To The Public</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">How To Release LLVM To The Public</div>
+<ol>
+  <li><a href="#introduction">Introduction</a></li>
+  <li><a href="#criteria">Qualification Criteria</a></li>
+  <li><a href="#introduction">Release Timeline</a></li>
+  <li><a href="#process">Release Process</a></li>
+</ol>
+<div class="doc_author">
+  <p>Written by <a href="mailto:tonic@nondot.org">Tanya Lattner</a>,
+  <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>,
+  <a href="mailto:criswell@cs.uiuc.edu">John Criswell</a>, &amp;
+  <a href="mailto:wendling@apple.com">Bill Wendling</a>
+  </p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="introduction">Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document contains information about successfully releasing LLVM &mdash;
+   including subprojects: e.g., <tt>llvm-gcc</tt> and <tt>clang</tt> &mdash; to
+   the public. It is the Release Manager's responsibility to ensure that a high
+   quality build of LLVM is released.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="process">Release Timeline</a></div>
+<!-- *********************************************************************** -->
+<div class="doc_text">
+
+<p>LLVM is released on a time based schedule &mdash; roughly every 6 months. We
+   do not normally have dot releases because of the nature of LLVM's incremental
+   development philosophy. That said, the only thing preventing dot releases for
+   critical bug fixes from happening is a lack of resources &mdash; testers,
+   machines, time, etc. And, because of the high quality we desire for LLVM
+   releases, we cannot allow for a truncated form of release qualification.</p>
+
+<p>The release process is roughly as follows:</p>
+
+<ul>
+  <li><p>Set code freeze and branch creation date for 6 months after last code
+      freeze date. Announce release schedule to the LLVM community and update
+      the website.</p></li>
+
+  <li><p>Create release branch and begin release process.</p></li>
+
+  <li><p>Send out release candidate sources for first round of testing. Testing
+      lasts 7-10 days. During the first round of testing, any regressions found
+      should be fixed. Patches are merged from mainline into the release
+      branch. Also, all features need to be completed during this time. Any
+      features not completed at the end of the first round of testing will be
+      removed or disabled for the release.</p></li>
+
+  <li><p>Generate and send out the second release candidate sources. Only
+      <em>critial</em> bugs found during this testing phase will be fixed. Any
+      bugs introduced by merged patches will be fixed. If so a third round of
+      testing is needed.</p></li>
+
+  <li><p>The release notes are updated.</p></li>
+
+  <li><p>Finally, release!</p></li>
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="process">Release Process</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<ol>
+  <li><a href="#release-admin">Release Administrative Tasks</a></li>
+  <ol>
+    <li><a href="#branch">Create Release Branch</a></li>
+    <li><a href="#verchanges">Update Version Numbers</a></li>
+  </ol>
+  <li><a href="#release-build">Building the Release</a></li>
+  <ol>
+    <li><a href="#dist">Build the LLVM Source Distributions</a></li>
+    <li><a href="#build">Build LLVM</a></li>
+    <li><a href="#llvmgccbin">Build the LLVM-GCC Binary Distribution</a></li>
+    <li><a href="#clangbin">Build the Clang Binary Distribution</a></li>
+    <li><a href="#target-build">Target Specific Build Details</a></li>
+  </ol>
+  <li><a href="#release-qualify">Release Qualification Criteria</a></li>
+  <ol>
+    <li><a href="#llvm-qualify">Qualify LLVM</a></li>
+    <li><a href="#llvmgcc-qualify">Qualify LLVM-GCC</a></li>
+    <li><a href="#clang-qualify">Qualify Clang</a></li>
+    <li><a href="#targets">Specific Target Qualification Details</a></li>
+  </ol>
+
+  <li><a href="#commTest">Community Testing</a></li>    
+  <li><a href="#release-patch">Release Patch Rules</a></li>
+  <li><a href="#release-final">Release final tasks</a></li>
+
+  <ol>
+    <li><a href="#updocs">Update Documentation</a></li>
+    <li><a href="#tag">Tag the LLVM Final Release</a></li>
+    <li><a href="#updemo">Update the LLVM Demo Page</a></li>
+    <li><a href="#webupdates">Update the LLVM Website</a></li>
+    <li><a href="#announce">Announce the Release</a></li>
+  </ol>
+</ol>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="release-admin">Release Administrative Tasks</a></div>
+
+<div class="doc_text">
+
+<p>This section describes a few administrative tasks that need to be done for
+   the release process to begin. Specifically, it involves:</p>
+
+<ul>
+  <li>Creating the release branch,</li>
+  <li>Setting version numbers, and</li>
+  <li>Tagging release candidates for the release team to begin testing</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="branch">Create Release Branch</a></div>
+
+<div class="doc_text">
+
+<p>Branch the Subversion trunk using the following procedure:</p>
+
+<ol>
+  <li><p>Remind developers that the release branching is imminent and to refrain
+      from committing patches that might break the build. E.g., new features,
+      large patches for works in progress, an overhaul of the type system, an
+      exciting new TableGen feature, etc.</p></li>
+
+  <li><p>Verify that the current Subversion trunk is in decent shape by
+      examining nightly tester and buildbot results.</p></li>
+
+  <li><p>Create the release branch for <tt>llvm</tt>, <tt>llvm-gcc-4.2</tt>,
+      <tt>clang</tt>, and the <tt>test-suite</tt> from the last known good
+      revision. The branch's name is <tt>release_XY</tt>, where <tt>X</tt> is
+      the major and <tt>Y</tt> the minor release numbers. The branches should be
+      created using the following commands:</p>
+  
+<div class="doc_code">
+<pre>
+$ svn copy https://llvm.org/svn/llvm-project/llvm/trunk \
+           https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XY</i>
+
+$ svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk \
+           https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_<i>XY</i>
+
+$ svn copy https://llvm.org/svn/llvm-project/test-suite/trunk \
+           https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XY</i>
+
+$ svn copy https://llvm.org/svn/llvm-project/cfe/trunk \
+           https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XY</i>
+</pre>
+</div></li>
+
+  <li><p>Advise developers that they may now check their patches into the
+      Subversion tree again.</p></li>
+
+  <li><p>The Release Manager should switch to the release branch, because all
+      changes to the release will now be done in the branch. The easiest way to
+      do this is to grab a working copy using the following commands:</p>
+
+<div class="doc_code">
+<pre>
+$ svn co https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XY</i> llvm-<i>X.Y</i>
+
+$ svn co https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_<i>XY</i> llvm-gcc-4.2-<i>X.Y</i>
+
+$ svn co https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XY</i> test-suite-<i>X.Y</i>
+
+$ svn co https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XY</i> clang-<i>X.Y</i>
+</pre>
+</div></li>
+</ol>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="verchanges">Update LLVM Version</a></div>
+
+<div class="doc_text">
+
+<p>After creating the LLVM release branch, update the release branches'
+   <tt>autoconf</tt> and <tt>configure.ac</tt> versions from '<tt>X.Ysvn</tt>'
+   to '<tt>X.Y</tt>'. Update it on mainline as well to be the next version
+   ('<tt>X.Y+1svn</tt>'). Regenerate the configure scripts for both
+   <tt>llvm</tt> and the <tt>test-suite</tt>.</p>
+
+<p>In addition, the version numbers of all the Bugzilla components must be
+   updated for the next release.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="dist">Build the LLVM Release Candidates</a></div>
+
+<div class="doc_text">
+
+<p>Create release candidates for <tt>llvm</tt>, <tt>llvm-gcc</tt>,
+   <tt>clang</tt>, and the LLVM <tt>test-suite</tt> by tagging the branch with
+   the respective release candidate number. For instance, to create <b>Release
+   Candidate 1</b> you would issue the following commands:</p>
+
+<div class="doc_code">
+<pre>
+$ svn mkdir https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XY</i>
+$ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_<i>XY</i> \
+           https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XY</i>/rc1
+
+$ svn mkdir https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_<i>XY</i>
+$ svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_<i>XY</i> \
+           https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_<i>XY</i>/rc1
+
+$ svn mkdir https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XY</i>
+$ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_<i>XY</i> \
+           https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XY</i>/rc1
+
+$ svn mkdir https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_<i>XY</i>
+$ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_<i>XY</i> \
+           https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_<i>XY</i>/rc1
+</pre>
+</div>
+
+<p>Similarly, <b>Release Candidate 2</b> would be named <tt>RC2</tt> and so
+   on. This keeps a permanent copy of the release candidate around for people to
+   export and build as they wish. The final released sources will be tagged in
+   the <tt>RELEASE_<i>XY</i></tt> directory as <tt>Final</tt>
+   (c.f. <a href="#tag">Tag the LLVM Final Release</a>).</p>
+
+<p>The Release Manager may supply pre-packaged source tarballs for users. This
+   can be done with the following commands:</p>
+
+<div class="doc_code">
+<pre>
+$ svn export https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XY</i>/rc1 llvm-<i>X.Y</i>rc1
+$ svn export https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_<i>XY</i>/rc1 llvm-gcc4.2-<i>X.Y</i>rc1
+$ svn export https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XY</i>/rc1 llvm-test-<i>X.Y</i>rc1
+$ svn export https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_<i>XY</i>/rc1 clang-<i>X.Y</i>rc1
+
+$ tar -czvf - llvm-<i>X.Y</i>rc1        | gzip &gt; llvm-<i>X.Y</i>rc1.src.tar.gz
+$ tar -czvf - llvm-test-<i>X.Y</i>rc1   | gzip &gt; llvm-test-<i>X.Y</i>rc1.src.tar.gz
+$ tar -czvf - llvm-gcc4.2-<i>X.Y</i>rc1 | gzip &gt; llvm-gcc-4.2-<i>X.Y</i>rc1.src.tar.gz
+$ tar -czvf - clang-<i>X.Y</i>rc1       | gzip &gt; clang-<i>X.Y</i>rc1.src.tar.gz
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="release-build">Building the Release</a></div>
+
+<div class="doc_text">
+
+<p>The builds of <tt>llvm</tt>, <tt>llvm-gcc</tt>, and <tt>clang</tt>
+   <em>must</em> be free of errors and warnings in Debug, Release+Asserts, and
+   Release builds. If all builds are clean, then the release passes Build
+   Qualification.</p>
+
+<p>The <tt>make</tt> options for building the different modes:</p>
+
+<table>
+  <tr><th>Mode</th><th>Options</th></tr>
+  <tr align="left"><td>Debug</td><td><tt>ENABLE_OPTIMIZED=0</tt></td></tr>
+  <tr align="left"><td>Release+Asserts</td><td><tt>ENABLE_OPTIMIZED=1</tt></td></tr>
+  <tr align="left"><td>Release</td><td><tt>ENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1</tt></td></tr>
+</table>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="build">Build LLVM</a></div>
+
+<div class="doc_text">
+
+<p>Build <tt>Debug</tt>, <tt>Release+Asserts</tt>, and <tt>Release</tt> versions
+   of <tt>llvm</tt> on all supported platforms. Directions to build
+   <tt>llvm</tt> are
+   <a href="http://llvm.org/docs/GettingStarted.html#quickstart">here</a>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="llvmgccbin">Build the LLVM GCC Binary Distribution</a></div>
+
+<div class="doc_text">
+
+<p>Creating the <tt>llvm-gcc</tt> binary distribution (Release/Optimized)
+   requires performing the following steps for each supported platform:</p>
+
+<ol>
+  <li><p>Build the <tt>llvm-gcc</tt> front-end by following the directions in
+      the <tt>README.LLVM</tt> file. The front-end must be compiled with C, C++,
+      Objective-C (Mac only), Objective-C++ (Mac only), and Fortran
+      support.</p></li>
+
+  <li><p>Boostrapping must be enabled.</p></li>
+
+  <li><p>Be sure to build with <tt>LLVM_VERSION_INFO=X.Y</tt>, where <tt>X</tt>
+      is the major and <tt>Y</tt> is the minor release numbers.</p></li>
+
+  <li><p>Copy the installation directory to a directory named for the specific
+      target. For example on Red Hat Enterprise Linux, the directory would be
+      named <tt>llvm-gcc4.2-2.6-x86-linux-RHEL4</tt>. Archive and compress the
+      new directory.</p></li>
+</ol>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="clangbin">Build Clang Binary Distribution</a></div>
+
+<div class="doc_text">
+
+<p>Creating the <tt>clang</tt> binary distribution
+   (Debug/Release+Asserts/Release) requires performing the following steps for
+   each supported platform:</p>
+
+<ol>
+  <li>Build clang according to the directions
+      <a href="http://clang.llvm.org/get_started.html">here</a>.</li>
+
+  <li>Build both a debug and release version of clang. The binary will be the
+      release build.</lI>
+
+  <li>Package <tt>clang</tt> (details to follow).</li>
+</ol>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="target-build">Target Specific Build Details</a></div>
+
+<div class="doc_text">
+
+<p>The table below specifies which compilers are used for each Arch/OS
+   combination when qualifying the build of <tt>llvm</tt>, <tt>llvm-gcc</tt>,
+   and <tt>clang</tt>.</p>
+
+<table>
+  <tr><th>Architecture</th><th>OS</th><th>compiler</th></tr>
+  <tr><td>x86-32</td><td>Mac OS 10.5</td><td>gcc 4.0.1</td></tr>
+  <tr><td>x86-32</td><td>Linux</td><td>gcc 4.2.X, gcc 4.3.X</td></tr>
+  <tr><td>x86-32</td><td>FreeBSD</td><td>gcc 4.2.X</td></tr>
+  <tr><td>x86-32</td><td>mingw</td><td>gcc 3.4.5</td></tr>
+  <tr><td>x86-64</td><td>Mac OS 10.5</td><td>gcc 4.0.1</td></tr>
+  <tr><td>x86-64</td><td>Linux</td><td>gcc 4.2.X, gcc 4.3.X</td></tr>
+  <tr><td>x86-64</td><td>FreeBSD</td><td>gcc 4.2.X</td></tr>
+</table> 
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="release-qualify">
+Building the Release</a></div>
+
+<div class="doc_text">
+
+<p>A release is qualified when it has no regressions from the previous release
+   (or baseline). Regressions are related to correctness first and performance
+   second. (We may tolerate some minor performance regressions if they are
+   deemed necessary for the general quality of the compiler.)</p>
+
+<p><b>Regressions are new failures in the set of tests that are used to qualify
+   each product and only include things on the list. Every release will have
+   some bugs in it. It is the reality of developing a complex piece of
+   software. We need a very concrete and definitive release criteria that
+   ensures we have monotonically improving quality on some metric. The metric we
+   use is described below. This doesn't mean that we don't care about other
+   criteria, but these are the criteria which we found to be most important and
+   which must be satisfied before a release can go out</b></p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="llvm-qualify">Qualify LLVM</a></div>
+
+<div class="doc_text">
+
+<p>LLVM is qualified when it has a clean test run without a front-end. And it
+   has no regressions when using either <tt>llvm-gcc</tt> or <tt>clang</tt> with
+   the <tt>test-suite</tt> from the previous release.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="llvmgcc-qualify">Qualify LLVM-GCC</a></div>
+
+<div class="doc_text">
+
+<p><tt>LLVM-GCC</tt> is qualified when front-end specific tests in the
+   <tt>llvm</tt> regression test suite all pass and there are no regressions in
+   the <tt>test-suite</tt>.</p>
+
+<p>We do not use the GCC DejaGNU test suite as release criteria.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="clang-qualify">Qualify Clang</a></div>
+
+<div class="doc_text">
+
+<p><tt>Clang</tt> is qualified when front-end specific tests in the 
+   <tt>llvm</tt> dejagnu test suite all pass, clang's own test suite passes
+   cleanly, and there are no regressions in the <tt>test-suite</tt>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="targets">Specific Target 
+Qualification Details</a></div>
+
+<div class="doc_text">
+
+<table>
+  <tr><th>Architecture</th><th>OS</th><th>llvm-gcc baseline</th><th>clang baseline</th><th>tests</th></tr>
+  <tr><td>x86-32</td><td>Linux</td><td>last release</td><td>last release</td><td>llvm dejagnu, clang tests, test-suite (including spec)</td></tr>
+  <tr><td>x86-32</td><td>FreeBSD</td><td>none</td><td>last release</td><td>llvm dejagnu, clang tests, test-suite</td></tr>
+  <tr><td>x86-32</td><td>mingw</td><td>last release</td><td>none</td><td>QT</td></tr>
+  <tr><td>x86-64</td><td>Mac OS 10.X</td><td>last release</td><td>last release</td><td>llvm dejagnu, clang tests, test-suite (including spec)</td></tr>
+  <tr><td>x86-64</td><td>Linux</td><td>last release</td><td>last release</td><td>llvm dejagnu, clang tests, test-suite (including spec)</td></tr>
+  <tr><td>x86-64</td><td>FreeBSD</td><td>none</td><td>last release</td><td>llvm dejagnu, clang tests, test-suite</td></tr>
+</table>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="commTest">Community Testing</a></div>
+<div class="doc_text">
+
+<p>Once all testing has been completed and appropriate bugs filed, the release
+   candidate tarballs are put on the website and the LLVM community is
+   notified. Ask that all LLVM developers test the release in 2 ways:</p>
+
+<ol>
+  <li>Download <tt>llvm-<i>X.Y</i></tt>, <tt>llvm-test-<i>X.Y</i></tt>, and the
+      appropriate <tt>llvm-gcc</tt> and/or <tt>clang</tt> binary. Build
+      LLVM. Run <tt>make check</tt> and the full LLVM test suite (<tt>make
+      TEST=nightly report</tt>).</li>
+
+  <li>Download <tt>llvm-<i>X.Y</i></tt>, <tt>llvm-test-<i>X.Y</i></tt>, and the
+      <tt>llvm-gcc</tt> and/or <tt>clang</tt> source. Compile everything. Run
+      <tt>make check</tt> and the full LLVM test suite (<tt>make TEST=nightly
+      report</tt>).</li>
+</ol>
+
+<p>Ask LLVM developers to submit the test suite report and <tt>make check</tt>
+   results to the list. Verify that there are no regressions from the previous
+   release. The results are not used to qualify a release, but to spot other
+   potential problems. For unsupported targets, verify that <tt>make check</tt>
+   is at least clean.</p>
+  
+<p>During the first round of testing, all regressions must be fixed before the
+   second release candidate is tagged.</p>
+  
+<p>If this is the second round of testing, the testing is only to ensure that
+   bug fixes previously merged in have not created new major problems. <i>This
+   is not the time to solve additional and unrelated bugs!</i> If no patches are
+   merged in, the release is determined to be ready and the release manager may
+   move onto the next stage.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="release-patch">Release Patch Rules</a></div>
+
+<div class="doc_text">
+
+<p>Below are the rules regarding patching the release branch:</p>
+
+<ol>
+  <li><p>Patches applied to the release branch may only be applied by the
+      release manager.</p></li>
+
+  <li><p>During the first round of testing, patches that fix regressions or that
+      are small and relatively risk free (verified by the appropriate code
+      owner) are applied to the branch. Code owners are asked to be very
+      conservative in approving patches for the branch. We reserve the right to
+      reject any patch that does not fix a regression as previously
+      defined.</p></li>
+
+  <li><p>During the remaining rounds of testing, only patches that fix critical
+      regressions may be applied.</p></li>
+</ol>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="release-final">Release Final Tasks 
+</a></div>
+
+<div class="doc_text">
+
+<p>The final stages of the release process involves tagging the "final" release
+   branch, updating documentation that refers to the release, and updating the
+   demo page.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="updocs">Update Documentation</a></div>
+
+<div class="doc_text">
+
+<p>Review the documentation and ensure that it is up to date. The "Release
+   Notes" must be updated to reflect new features, bug fixes, new known issues,
+   and changes in the list of supported platforms. The "Getting Started Guide"
+   should be updated to reflect the new release version number tag avaiable from
+   Subversion and changes in basic system requirements. Merge both changes from
+   mainline into the release branch.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="tag">Tag the LLVM Final Release</a></div>
+
+<div class="doc_text">
+
+<p>Tag the final release sources using the following procedure:</p>
+
+<div class="doc_code">
+<pre>
+$ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \
+           https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_<i>XY</i>/Final
+
+$ svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XY \
+           https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_<i>XY</i>/Final
+
+$ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \
+           https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_<i>XY</i>/Final
+
+$ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \
+           https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_<i>XY</i>/Final
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="updemo">Update the LLVM Demo Page</a></div>
+
+<div class="doc_text">
+
+<p>The LLVM demo page must be updated to use the new release. This consists of
+   using the new <tt>llvm-gcc</tt> binary and building LLVM.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="webupdates">Update the LLVM Website</a></div>
+
+<div class="doc_text">
+
+<p>The website must be updated before the release announcement is sent out. Here
+   is what to do:</p>
+
+<ol>
+  <li>Check out the <tt>www</tt> module from Subversion.</li>
+
+  <li>Create a new subdirectory <tt>X.Y</tt> in the releases directory.</li>
+
+  <li>Commit the <tt>llvm</tt>, <tt>test-suite</tt>, <tt>llvm-gcc</tt> source,
+      <tt>clang source</tt>, <tt>clang binaries</tt>, and <tt>llvm-gcc</tt>
+      binaries in this new directory.</li>
+
+  <li>Copy and commit the <tt>llvm/docs</tt> and <tt>LICENSE.txt</tt> files
+      into this new directory. The docs should be built with
+      <tt>BUILD_FOR_WEBSITE=1</tt>.</li>
+
+  <li>Commit the <tt>index.html</tt> to the <tt>release/X.Y</tt> directory to
+      redirect (use from previous release.</li>
+
+  <li>Update the <tt>releases/download.html</tt> file with the new release.</li>
+
+  <li>Update the <tt>releases/index.html</tt> with the new release and link to
+      release documentation.</li>
+
+  <li>Finally, update the main page (<tt>index.html</tt> and sidebar) to point
+      to the new release and release announcement. Make sure this all gets
+      committed back into Subversion.</li>
+</ol>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="announce">Announce the Release</a></div>
+
+<div class="doc_text">
+
+<p>Have Chris send out the release announcement when everything is finished.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+  <a href="http://llvm.cs.uiuc.edu">The LLVM Compiler Infrastructure</a>
+  <br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/HowToSubmitABug.html b/final/docs/HowToSubmitABug.html
new file mode 100644
index 00000000000..90efbe321b1
--- /dev/null
+++ b/final/docs/HowToSubmitABug.html
@@ -0,0 +1,347 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>How to submit an LLVM bug report</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+  How to submit an LLVM bug report
+</div>
+
+<table class="layout" style="width: 90%" >
+<tr class="layout">
+  <td class="left">
+<ol>
+  <li><a href="#introduction">Introduction - Got bugs?</a></li>
+  <li><a href="#crashers">Crashing Bugs</a>
+    <ul>
+    <li><a href="#front-end">Front-end bugs</a>
+    <li><a href="#ct_optimizer">Compile-time optimization bugs</a>
+    <li><a href="#ct_codegen">Code generator bugs</a>
+    </ul></li>
+  <li><a href="#miscompilations">Miscompilations</a></li>
+  <li><a href="#codegen">Incorrect code generation (JIT and LLC)</a></li>
+</ol>
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a> and
+                <a href="http://misha.brukman.net">Misha Brukman</a></p>
+</div>
+</td>
+<td class="right">
+  <img src="img/Debugging.gif" alt="Debugging" width="444" height="314">
+</td>
+</tr>
+</table>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="introduction">Introduction - Got bugs?</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>If you're working with LLVM and run into a bug, we definitely want to know
+about it.  This document describes what you can do to increase the odds of
+getting it fixed quickly.</p>
+
+<p>Basically you have to do two things at a minimum.  First, decide whether the
+bug <a href="#crashers">crashes the compiler</a> (or an LLVM pass), or if the
+compiler is <a href="#miscompilations">miscompiling</a> the program (i.e., the
+compiler successfully produces an executable, but it doesn't run right).  Based
+on
+what type of bug it is, follow the instructions in the linked section to narrow
+down the bug so that the person who fixes it will be able to find the problem
+more easily.</p>
+
+<p>Once you have a reduced test-case, go to <a
+href="http://llvm.org/bugs/enter_bug.cgi">the LLVM Bug Tracking
+System</a> and fill out the form with the necessary details (note that you don't
+need to pick a category, just use the "new-bugs" category if you're not sure).
+The bug description should contain the following
+information:</p>
+
+<ul>
+  <li>All information necessary to reproduce the problem.</li>
+  <li>The reduced test-case that triggers the bug.</li>
+  <li>The location where you obtained LLVM (if not from our Subversion
+  repository).</li>
+</ul>
+
+<p>Thanks for helping us make LLVM better!</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="crashers">Crashing Bugs</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>More often than not, bugs in the compiler cause it to crash&mdash;often due
+to an assertion failure of some sort. The most important
+piece of the puzzle is to figure out if it is crashing in the GCC front-end
+or if it is one of the LLVM libraries (e.g. the optimizer or code generator)
+that has problems.</p>
+
+<p>To figure out which component is crashing (the front-end,
+optimizer or code generator), run the
+<tt><b>llvm-gcc</b></tt> command line as you were when the crash occurred, but
+with the following extra command line options:</p>
+
+<ul>
+  <li><tt><b>-O0 -emit-llvm</b></tt>: If <tt>llvm-gcc</tt> still crashes when
+  passed these options (which disable the optimizer and code generator), then
+  the crash is in the front-end.  Jump ahead to the section on <a
+  href="#front-end">front-end bugs</a>.</li>
+
+  <li><tt><b>-emit-llvm</b></tt>: If <tt>llvm-gcc</tt> crashes with this option
+  (which disables the code generator), you found an optimizer bug.  Jump ahead
+  to <a href="#ct_optimizer"> compile-time optimization bugs</a>.</li>
+
+  <li>Otherwise, you have a code generator crash.  Jump ahead to <a
+  href="#ct_codegen">code generator bugs</a>.</li>
+
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="front-end">Front-end bugs</a>
+</div>
+
+<div class="doc_text">
+
+<p>If the problem is in the front-end, you should re-run the same
+<tt>llvm-gcc</tt> command that resulted in the crash, but add the
+<tt>-save-temps</tt> option.  The compiler will crash again, but it will leave
+behind a <tt><i>foo</i>.i</tt> file (containing preprocessed C source code) and
+possibly <tt><i>foo</i>.s</tt> for each
+compiled <tt><i>foo</i>.c</tt> file. Send us the <tt><i>foo</i>.i</tt> file,
+along with the options you passed to llvm-gcc, and a brief description of the
+error it caused.</p>
+
+<p>The <a href="http://delta.tigris.org/">delta</a> tool helps to reduce the
+preprocessed file down to the smallest amount of code that still replicates the
+problem. You're encouraged to use delta to reduce the code to make the
+developers' lives easier. <a
+href="http://gcc.gnu.org/wiki/A_guide_to_testcase_reduction">This website</a>
+has instructions on the best way to use delta.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ct_optimizer">Compile-time optimization bugs</a>
+</div>
+
+<div class="doc_text">
+
+<p>If you find that a bug crashes in the optimizer, compile your test-case to a
+<tt>.bc</tt> file by passing "<tt><b>-emit-llvm -O0 -c -o foo.bc</b></tt>".
+Then run:</p>
+
+<div class="doc_code">
+<p><tt><b>opt</b> -std-compile-opts -debug-pass=Arguments foo.bc
+    -disable-output</tt></p>
+</div>
+
+<p>This command should do two things: it should print out a list of passes, and
+then it should crash in the same was as llvm-gcc.  If it doesn't crash, please
+follow the instructions for a <a href="#front-end">front-end bug</a>.</p>
+
+<p>If this does crash, then you should be able to debug this with the following
+bugpoint command:</p>
+
+<div class="doc_code">
+<p><tt><b>bugpoint</b> foo.bc &lt;list of passes printed by 
+<b>opt</b>&gt;</tt></p>
+</div>
+
+<p>Please run this, then file a bug with the instructions and reduced .bc files
+that bugpoint emits.  If something goes wrong with bugpoint, please submit the
+"foo.bc" file and the list of passes printed by <b>opt</b>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ct_codegen">Code generator bugs</a>
+</div>
+
+<div class="doc_text">
+
+<p>If you find a bug that crashes llvm-gcc in the code generator, compile your
+source file to a .bc file by passing "<tt><b>-emit-llvm -c -o foo.bc</b></tt>"
+to llvm-gcc (in addition to the options you already pass).  Once your have
+foo.bc, one of the following commands should fail:</p>
+
+<ol>
+<li><tt><b>llc</b> foo.bc</tt></li>
+<li><tt><b>llc</b> foo.bc -relocation-model=pic</tt></li>
+<li><tt><b>llc</b> foo.bc -relocation-model=static</tt></li>
+</ol>
+
+<p>If none of these crash, please follow the instructions for a
+<a href="#front-end">front-end bug</a>.  If one of these do crash, you should
+be able to reduce this with one of the following bugpoint command lines (use
+the one corresponding to the command above that failed):</p>
+
+<ol>
+<li><tt><b>bugpoint</b> -run-llc foo.bc</tt></li>
+<li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args
+           -relocation-model=pic</tt></li>
+<li><tt><b>bugpoint</b> -run-llc foo.bc --tool-args
+           -relocation-model=static</tt></li>
+</ol>
+
+<p>Please run this, then file a bug with the instructions and reduced .bc file
+that bugpoint emits.  If something goes wrong with bugpoint, please submit the
+"foo.bc" file and the option that llc crashes with.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="miscompilations">Miscompilations</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>If llvm-gcc successfully produces an executable, but that executable doesn't
+run right, this is either a bug in the code or a bug in the
+compiler.  The first thing to check is to make sure it is not using undefined
+behavior (e.g. reading a variable before it is defined).  In particular, check
+to see if the program <a href="http://valgrind.org/">valgrind</a>s clean,
+passes purify, or some other memory checker tool.  Many of the "LLVM bugs" that
+we have chased down ended up being bugs in the program being compiled, not
+ LLVM.</p>
+
+<p>Once you determine that the program itself is not buggy, you should choose 
+which code generator you wish to compile the program with (e.g. C backend, the 
+JIT, or LLC) and optionally a series of LLVM passes to run.  For example:</p>
+
+<div class="doc_code">
+<p><tt>
+<b>bugpoint</b> -run-cbe [... optzn passes ...] file-to-test.bc --args -- [program arguments]</tt></p>
+</div>
+
+<p><tt>bugpoint</tt> will try to narrow down your list of passes to the one pass
+that causes an error, and simplify the bitcode file as much as it can to assist
+you. It will print a message letting you know how to reproduce the resulting
+error.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="codegen">Incorrect code generation</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Similarly to debugging incorrect compilation by mis-behaving passes, you can
+debug incorrect code generation by either LLC or the JIT, using
+<tt>bugpoint</tt>. The process <tt>bugpoint</tt> follows in this case is to try
+to narrow the code down to a function that is miscompiled by one or the other
+method, but since for correctness, the entire program must be run,
+<tt>bugpoint</tt> will compile the code it deems to not be affected with the C
+Backend, and then link in the shared object it generates.</p>
+
+<p>To debug the JIT:</p>
+
+<div class="doc_code">
+<pre>
+bugpoint -run-jit -output=[correct output file] [bitcode file]  \
+         --tool-args -- [arguments to pass to lli]              \
+         --args -- [program arguments]
+</pre>
+</div>
+
+<p>Similarly, to debug the LLC, one would run:</p>
+
+<div class="doc_code">
+<pre>
+bugpoint -run-llc -output=[correct output file] [bitcode file]  \
+         --tool-args -- [arguments to pass to llc]              \
+         --args -- [program arguments]
+</pre>
+</div>
+
+<p><b>Special note:</b> if you are debugging MultiSource or SPEC tests that
+already exist in the <tt>llvm/test</tt> hierarchy, there is an easier way to
+debug the JIT, LLC, and CBE, using the pre-written Makefile targets, which
+will pass the program options specified in the Makefiles:</p>
+
+<div class="doc_code">
+<p><tt>
+cd llvm/test/../../program<br>
+make bugpoint-jit
+</tt></p>
+</div>
+
+<p>At the end of a successful <tt>bugpoint</tt> run, you will be presented
+with two bitcode files: a <em>safe</em> file which can be compiled with the C
+backend and the <em>test</em> file which either LLC or the JIT
+mis-codegenerates, and thus causes the error.</p>
+
+<p>To reproduce the error that <tt>bugpoint</tt> found, it is sufficient to do
+the following:</p>
+
+<ol>
+
+<li><p>Regenerate the shared object from the safe bitcode file:</p>
+
+<div class="doc_code">
+<p><tt>
+<b>llc</b> -march=c safe.bc -o safe.c<br>
+<b>gcc</b> -shared safe.c -o safe.so
+</tt></p>
+</div></li>
+
+<li><p>If debugging LLC, compile test bitcode native and link with the shared
+    object:</p>
+
+<div class="doc_code">
+<p><tt>
+<b>llc</b> test.bc -o test.s<br>
+<b>gcc</b> test.s safe.so -o test.llc<br>
+./test.llc [program options]
+</tt></p>
+</div></li>
+    
+<li><p>If debugging the JIT, load the shared object and supply the test
+    bitcode:</p>
+
+<div class="doc_code">
+<p><tt><b>lli</b> -load=safe.so test.bc [program options]</tt></p>
+</div></li>  
+
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
+  <br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/LangRef.html b/final/docs/LangRef.html
new file mode 100644
index 00000000000..893876620a4
--- /dev/null
+++ b/final/docs/LangRef.html
@@ -0,0 +1,7799 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>LLVM Assembly Language Reference Manual</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <meta name="description"
+  content="LLVM Assembly Language Reference Manual.">
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title"> LLVM Language Reference Manual </div>
+<ol>
+  <li><a href="#abstract">Abstract</a></li>
+  <li><a href="#introduction">Introduction</a></li>
+  <li><a href="#identifiers">Identifiers</a></li>
+  <li><a href="#highlevel">High Level Structure</a>
+    <ol>
+      <li><a href="#modulestructure">Module Structure</a></li>
+      <li><a href="#linkage">Linkage Types</a>
+        <ol>
+          <li><a href="#linkage_private">'<tt>private</tt>' Linkage</a></li>
+          <li><a href="#linkage_linker_private">'<tt>linker_private</tt>' Linkage</a></li>
+          <li><a href="#linkage_linker_private_weak">'<tt>linker_private_weak</tt>' Linkage</a></li>
+          <li><a href="#linkage_linker_private_weak_def_auto">'<tt>linker_private_weak_def_auto</tt>' Linkage</a></li>
+          <li><a href="#linkage_internal">'<tt>internal</tt>' Linkage</a></li>
+          <li><a href="#linkage_available_externally">'<tt>available_externally</tt>' Linkage</a></li>
+          <li><a href="#linkage_linkonce">'<tt>linkonce</tt>' Linkage</a></li>
+          <li><a href="#linkage_common">'<tt>common</tt>' Linkage</a></li>
+          <li><a href="#linkage_weak">'<tt>weak</tt>' Linkage</a></li>
+          <li><a href="#linkage_appending">'<tt>appending</tt>' Linkage</a></li>
+          <li><a href="#linkage_externweak">'<tt>extern_weak</tt>' Linkage</a></li>
+          <li><a href="#linkage_linkonce_odr">'<tt>linkonce_odr</tt>' Linkage</a></li>
+          <li><a href="#linkage_weak">'<tt>weak_odr</tt>' Linkage</a></li>
+          <li><a href="#linkage_external">'<tt>externally visible</tt>' Linkage</a></li>
+          <li><a href="#linkage_dllimport">'<tt>dllimport</tt>' Linkage</a></li>
+          <li><a href="#linkage_dllexport">'<tt>dllexport</tt>' Linkage</a></li>
+        </ol>
+      </li>
+      <li><a href="#callingconv">Calling Conventions</a></li>
+      <li><a href="#namedtypes">Named Types</a></li>
+      <li><a href="#globalvars">Global Variables</a></li>
+      <li><a href="#functionstructure">Functions</a></li>
+      <li><a href="#aliasstructure">Aliases</a></li>
+      <li><a href="#namedmetadatastructure">Named Metadata</a></li>
+      <li><a href="#paramattrs">Parameter Attributes</a></li>
+      <li><a href="#fnattrs">Function Attributes</a></li>
+      <li><a href="#gc">Garbage Collector Names</a></li>
+      <li><a href="#moduleasm">Module-Level Inline Assembly</a></li>
+      <li><a href="#datalayout">Data Layout</a></li>
+      <li><a href="#pointeraliasing">Pointer Aliasing Rules</a></li>
+      <li><a href="#volatile">Volatile Memory Accesses</a></li>
+    </ol>
+  </li>
+  <li><a href="#typesystem">Type System</a>
+    <ol>
+      <li><a href="#t_classifications">Type Classifications</a></li>
+      <li><a href="#t_primitive">Primitive Types</a>
+        <ol>
+          <li><a href="#t_integer">Integer Type</a></li>
+          <li><a href="#t_floating">Floating Point Types</a></li>
+          <li><a href="#t_x86mmx">X86mmx Type</a></li>
+          <li><a href="#t_void">Void Type</a></li>
+          <li><a href="#t_label">Label Type</a></li>
+          <li><a href="#t_metadata">Metadata Type</a></li>
+        </ol>
+      </li>
+      <li><a href="#t_derived">Derived Types</a>
+        <ol>
+          <li><a href="#t_aggregate">Aggregate Types</a>
+            <ol>
+              <li><a href="#t_array">Array Type</a></li>
+              <li><a href="#t_struct">Structure Type</a></li>
+              <li><a href="#t_pstruct">Packed Structure Type</a></li>
+              <li><a href="#t_vector">Vector Type</a></li>
+            </ol>
+          </li>
+          <li><a href="#t_function">Function Type</a></li>
+          <li><a href="#t_pointer">Pointer Type</a></li>
+          <li><a href="#t_opaque">Opaque Type</a></li>
+        </ol>
+      </li>
+      <li><a href="#t_uprefs">Type Up-references</a></li>
+    </ol>
+  </li>
+  <li><a href="#constants">Constants</a>
+    <ol>
+      <li><a href="#simpleconstants">Simple Constants</a></li>
+      <li><a href="#complexconstants">Complex Constants</a></li>
+      <li><a href="#globalconstants">Global Variable and Function Addresses</a></li>
+      <li><a href="#undefvalues">Undefined Values</a></li>
+      <li><a href="#trapvalues">Trap Values</a></li>
+      <li><a href="#blockaddress">Addresses of Basic Blocks</a></li>
+      <li><a href="#constantexprs">Constant Expressions</a></li>
+    </ol>
+  </li>
+  <li><a href="#othervalues">Other Values</a>
+    <ol>
+      <li><a href="#inlineasm">Inline Assembler Expressions</a></li>
+      <li><a href="#metadata">Metadata Nodes and Metadata Strings</a></li>
+    </ol>
+  </li>
+  <li><a href="#intrinsic_globals">Intrinsic Global Variables</a>
+    <ol>
+      <li><a href="#intg_used">The '<tt>llvm.used</tt>' Global Variable</a></li>
+      <li><a href="#intg_compiler_used">The '<tt>llvm.compiler.used</tt>'
+          Global Variable</a></li>
+      <li><a href="#intg_global_ctors">The '<tt>llvm.global_ctors</tt>'
+         Global Variable</a></li>
+      <li><a href="#intg_global_dtors">The '<tt>llvm.global_dtors</tt>'
+         Global Variable</a></li>
+    </ol>
+  </li>
+  <li><a href="#instref">Instruction Reference</a>
+    <ol>
+      <li><a href="#terminators">Terminator Instructions</a>
+        <ol>
+          <li><a href="#i_ret">'<tt>ret</tt>' Instruction</a></li>
+          <li><a href="#i_br">'<tt>br</tt>' Instruction</a></li>
+          <li><a href="#i_switch">'<tt>switch</tt>' Instruction</a></li>
+          <li><a href="#i_indirectbr">'<tt>indirectbr</tt>' Instruction</a></li>
+          <li><a href="#i_invoke">'<tt>invoke</tt>' Instruction</a></li>
+          <li><a href="#i_unwind">'<tt>unwind</tt>'  Instruction</a></li>
+          <li><a href="#i_unreachable">'<tt>unreachable</tt>' Instruction</a></li>
+        </ol>
+      </li>
+      <li><a href="#binaryops">Binary Operations</a>
+        <ol>
+          <li><a href="#i_add">'<tt>add</tt>' Instruction</a></li>
+          <li><a href="#i_fadd">'<tt>fadd</tt>' Instruction</a></li>
+          <li><a href="#i_sub">'<tt>sub</tt>' Instruction</a></li>
+          <li><a href="#i_fsub">'<tt>fsub</tt>' Instruction</a></li>
+          <li><a href="#i_mul">'<tt>mul</tt>' Instruction</a></li>
+          <li><a href="#i_fmul">'<tt>fmul</tt>' Instruction</a></li>
+          <li><a href="#i_udiv">'<tt>udiv</tt>' Instruction</a></li>
+          <li><a href="#i_sdiv">'<tt>sdiv</tt>' Instruction</a></li>
+          <li><a href="#i_fdiv">'<tt>fdiv</tt>' Instruction</a></li>
+          <li><a href="#i_urem">'<tt>urem</tt>' Instruction</a></li>
+          <li><a href="#i_srem">'<tt>srem</tt>' Instruction</a></li>
+          <li><a href="#i_frem">'<tt>frem</tt>' Instruction</a></li>
+        </ol>
+      </li>
+      <li><a href="#bitwiseops">Bitwise Binary Operations</a>
+        <ol>
+          <li><a href="#i_shl">'<tt>shl</tt>' Instruction</a></li>
+          <li><a href="#i_lshr">'<tt>lshr</tt>' Instruction</a></li>
+          <li><a href="#i_ashr">'<tt>ashr</tt>' Instruction</a></li>
+          <li><a href="#i_and">'<tt>and</tt>' Instruction</a></li>
+          <li><a href="#i_or">'<tt>or</tt>'  Instruction</a></li>
+          <li><a href="#i_xor">'<tt>xor</tt>' Instruction</a></li>
+        </ol>
+      </li>
+      <li><a href="#vectorops">Vector Operations</a>
+        <ol>
+          <li><a href="#i_extractelement">'<tt>extractelement</tt>' Instruction</a></li>
+          <li><a href="#i_insertelement">'<tt>insertelement</tt>' Instruction</a></li>
+          <li><a href="#i_shufflevector">'<tt>shufflevector</tt>' Instruction</a></li>
+        </ol>
+      </li>
+      <li><a href="#aggregateops">Aggregate Operations</a>
+        <ol>
+          <li><a href="#i_extractvalue">'<tt>extractvalue</tt>' Instruction</a></li>
+          <li><a href="#i_insertvalue">'<tt>insertvalue</tt>' Instruction</a></li>
+        </ol>
+      </li>
+      <li><a href="#memoryops">Memory Access and Addressing Operations</a>
+        <ol>
+          <li><a href="#i_alloca">'<tt>alloca</tt>'   Instruction</a></li>
+         <li><a href="#i_load">'<tt>load</tt>'     Instruction</a></li>
+         <li><a href="#i_store">'<tt>store</tt>'    Instruction</a></li>
+         <li><a href="#i_getelementptr">'<tt>getelementptr</tt>' Instruction</a></li>
+        </ol>
+      </li>
+      <li><a href="#convertops">Conversion Operations</a>
+        <ol>
+          <li><a href="#i_trunc">'<tt>trunc .. to</tt>' Instruction</a></li>
+          <li><a href="#i_zext">'<tt>zext .. to</tt>' Instruction</a></li>
+          <li><a href="#i_sext">'<tt>sext .. to</tt>' Instruction</a></li>
+          <li><a href="#i_fptrunc">'<tt>fptrunc .. to</tt>' Instruction</a></li>
+          <li><a href="#i_fpext">'<tt>fpext .. to</tt>' Instruction</a></li>
+          <li><a href="#i_fptoui">'<tt>fptoui .. to</tt>' Instruction</a></li>
+          <li><a href="#i_fptosi">'<tt>fptosi .. to</tt>' Instruction</a></li>
+          <li><a href="#i_uitofp">'<tt>uitofp .. to</tt>' Instruction</a></li>
+          <li><a href="#i_sitofp">'<tt>sitofp .. to</tt>' Instruction</a></li>
+          <li><a href="#i_ptrtoint">'<tt>ptrtoint .. to</tt>' Instruction</a></li>
+          <li><a href="#i_inttoptr">'<tt>inttoptr .. to</tt>' Instruction</a></li>
+          <li><a href="#i_bitcast">'<tt>bitcast .. to</tt>' Instruction</a></li>
+        </ol>
+      </li>
+      <li><a href="#otherops">Other Operations</a>
+        <ol>
+          <li><a href="#i_icmp">'<tt>icmp</tt>' Instruction</a></li>
+          <li><a href="#i_fcmp">'<tt>fcmp</tt>' Instruction</a></li>
+          <li><a href="#i_phi">'<tt>phi</tt>'   Instruction</a></li>
+          <li><a href="#i_select">'<tt>select</tt>' Instruction</a></li>
+          <li><a href="#i_call">'<tt>call</tt>'  Instruction</a></li>
+          <li><a href="#i_va_arg">'<tt>va_arg</tt>'  Instruction</a></li>
+        </ol>
+      </li>
+    </ol>
+  </li>
+  <li><a href="#intrinsics">Intrinsic Functions</a>
+    <ol>
+      <li><a href="#int_varargs">Variable Argument Handling Intrinsics</a>
+        <ol>
+          <li><a href="#int_va_start">'<tt>llvm.va_start</tt>' Intrinsic</a></li>
+          <li><a href="#int_va_end">'<tt>llvm.va_end</tt>'   Intrinsic</a></li>
+          <li><a href="#int_va_copy">'<tt>llvm.va_copy</tt>'  Intrinsic</a></li>
+        </ol>
+      </li>
+      <li><a href="#int_gc">Accurate Garbage Collection Intrinsics</a>
+        <ol>
+          <li><a href="#int_gcroot">'<tt>llvm.gcroot</tt>' Intrinsic</a></li>
+          <li><a href="#int_gcread">'<tt>llvm.gcread</tt>' Intrinsic</a></li>
+          <li><a href="#int_gcwrite">'<tt>llvm.gcwrite</tt>' Intrinsic</a></li>
+        </ol>
+      </li>
+      <li><a href="#int_codegen">Code Generator Intrinsics</a>
+        <ol>
+          <li><a href="#int_returnaddress">'<tt>llvm.returnaddress</tt>' Intrinsic</a></li>
+          <li><a href="#int_frameaddress">'<tt>llvm.frameaddress</tt>'   Intrinsic</a></li>
+          <li><a href="#int_stacksave">'<tt>llvm.stacksave</tt>' Intrinsic</a></li>
+          <li><a href="#int_stackrestore">'<tt>llvm.stackrestore</tt>' Intrinsic</a></li>
+          <li><a href="#int_prefetch">'<tt>llvm.prefetch</tt>' Intrinsic</a></li>
+          <li><a href="#int_pcmarker">'<tt>llvm.pcmarker</tt>' Intrinsic</a></li>
+          <li><a href="#int_readcyclecounter">'<tt>llvm.readcyclecounter</tt>' Intrinsic</a></li>
+        </ol>
+      </li>
+      <li><a href="#int_libc">Standard C Library Intrinsics</a>
+        <ol>
+          <li><a href="#int_memcpy">'<tt>llvm.memcpy.*</tt>' Intrinsic</a></li>
+          <li><a href="#int_memmove">'<tt>llvm.memmove.*</tt>' Intrinsic</a></li>
+          <li><a href="#int_memset">'<tt>llvm.memset.*</tt>' Intrinsic</a></li>
+          <li><a href="#int_sqrt">'<tt>llvm.sqrt.*</tt>' Intrinsic</a></li>
+          <li><a href="#int_powi">'<tt>llvm.powi.*</tt>' Intrinsic</a></li>
+          <li><a href="#int_sin">'<tt>llvm.sin.*</tt>' Intrinsic</a></li>
+          <li><a href="#int_cos">'<tt>llvm.cos.*</tt>' Intrinsic</a></li>
+          <li><a href="#int_pow">'<tt>llvm.pow.*</tt>' Intrinsic</a></li>
+        </ol>
+      </li>
+      <li><a href="#int_manip">Bit Manipulation Intrinsics</a>
+        <ol>
+          <li><a href="#int_bswap">'<tt>llvm.bswap.*</tt>' Intrinsics</a></li>
+          <li><a href="#int_ctpop">'<tt>llvm.ctpop.*</tt>' Intrinsic </a></li>
+          <li><a href="#int_ctlz">'<tt>llvm.ctlz.*</tt>' Intrinsic </a></li>
+          <li><a href="#int_cttz">'<tt>llvm.cttz.*</tt>' Intrinsic </a></li>
+        </ol>
+      </li>
+      <li><a href="#int_overflow">Arithmetic with Overflow Intrinsics</a>
+        <ol>
+          <li><a href="#int_sadd_overflow">'<tt>llvm.sadd.with.overflow.*</tt> Intrinsics</a></li>
+          <li><a href="#int_uadd_overflow">'<tt>llvm.uadd.with.overflow.*</tt> Intrinsics</a></li>
+          <li><a href="#int_ssub_overflow">'<tt>llvm.ssub.with.overflow.*</tt> Intrinsics</a></li>
+          <li><a href="#int_usub_overflow">'<tt>llvm.usub.with.overflow.*</tt> Intrinsics</a></li>
+          <li><a href="#int_smul_overflow">'<tt>llvm.smul.with.overflow.*</tt> Intrinsics</a></li>
+          <li><a href="#int_umul_overflow">'<tt>llvm.umul.with.overflow.*</tt> Intrinsics</a></li>
+        </ol>
+      </li>
+      <li><a href="#int_fp16">Half Precision Floating Point Intrinsics</a>
+        <ol>
+          <li><a href="#int_convert_to_fp16">'<tt>llvm.convert.to.fp16</tt>' Intrinsic</a></li>
+          <li><a href="#int_convert_from_fp16">'<tt>llvm.convert.from.fp16</tt>' Intrinsic</a></li>
+        </ol>
+      </li>
+      <li><a href="#int_debugger">Debugger intrinsics</a></li>
+      <li><a href="#int_eh">Exception Handling intrinsics</a></li>
+      <li><a href="#int_trampoline">Trampoline Intrinsic</a>
+        <ol>
+          <li><a href="#int_it">'<tt>llvm.init.trampoline</tt>' Intrinsic</a></li>
+        </ol>
+      </li>
+      <li><a href="#int_atomics">Atomic intrinsics</a>
+        <ol>
+          <li><a href="#int_memory_barrier"><tt>llvm.memory_barrier</tt></a></li>
+          <li><a href="#int_atomic_cmp_swap"><tt>llvm.atomic.cmp.swap</tt></a></li>
+          <li><a href="#int_atomic_swap"><tt>llvm.atomic.swap</tt></a></li>
+          <li><a href="#int_atomic_load_add"><tt>llvm.atomic.load.add</tt></a></li>
+          <li><a href="#int_atomic_load_sub"><tt>llvm.atomic.load.sub</tt></a></li>
+          <li><a href="#int_atomic_load_and"><tt>llvm.atomic.load.and</tt></a></li>
+          <li><a href="#int_atomic_load_nand"><tt>llvm.atomic.load.nand</tt></a></li>
+          <li><a href="#int_atomic_load_or"><tt>llvm.atomic.load.or</tt></a></li>
+          <li><a href="#int_atomic_load_xor"><tt>llvm.atomic.load.xor</tt></a></li>
+          <li><a href="#int_atomic_load_max"><tt>llvm.atomic.load.max</tt></a></li>
+          <li><a href="#int_atomic_load_min"><tt>llvm.atomic.load.min</tt></a></li>
+          <li><a href="#int_atomic_load_umax"><tt>llvm.atomic.load.umax</tt></a></li>
+          <li><a href="#int_atomic_load_umin"><tt>llvm.atomic.load.umin</tt></a></li>
+        </ol>
+      </li>
+      <li><a href="#int_memorymarkers">Memory Use Markers</a>
+        <ol>
+          <li><a href="#int_lifetime_start"><tt>llvm.lifetime.start</tt></a></li>
+          <li><a href="#int_lifetime_end"><tt>llvm.lifetime.end</tt></a></li>
+          <li><a href="#int_invariant_start"><tt>llvm.invariant.start</tt></a></li>
+          <li><a href="#int_invariant_end"><tt>llvm.invariant.end</tt></a></li>
+        </ol>
+      </li>
+      <li><a href="#int_general">General intrinsics</a>
+        <ol>
+          <li><a href="#int_var_annotation">
+            '<tt>llvm.var.annotation</tt>' Intrinsic</a></li>
+          <li><a href="#int_annotation">
+            '<tt>llvm.annotation.*</tt>' Intrinsic</a></li>
+          <li><a href="#int_trap">
+            '<tt>llvm.trap</tt>' Intrinsic</a></li>
+          <li><a href="#int_stackprotector">
+            '<tt>llvm.stackprotector</tt>' Intrinsic</a></li>
+	  <li><a href="#int_objectsize">
+            '<tt>llvm.objectsize</tt>' Intrinsic</a></li>
+        </ol>
+      </li>
+    </ol>
+  </li>
+</ol>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
+            and <a href="mailto:vadve@cs.uiuc.edu">Vikram Adve</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="abstract">Abstract </a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document is a reference manual for the LLVM assembly language. LLVM is
+   a Static Single Assignment (SSA) based representation that provides type
+   safety, low-level operations, flexibility, and the capability of representing
+   'all' high-level languages cleanly.  It is the common code representation
+   used throughout all phases of the LLVM compilation strategy.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="introduction">Introduction</a> </div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The LLVM code representation is designed to be used in three different forms:
+   as an in-memory compiler IR, as an on-disk bitcode representation (suitable
+   for fast loading by a Just-In-Time compiler), and as a human readable
+   assembly language representation.  This allows LLVM to provide a powerful
+   intermediate representation for efficient compiler transformations and
+   analysis, while providing a natural means to debug and visualize the
+   transformations.  The three different forms of LLVM are all equivalent.  This
+   document describes the human readable representation and notation.</p>
+
+<p>The LLVM representation aims to be light-weight and low-level while being
+   expressive, typed, and extensible at the same time.  It aims to be a
+   "universal IR" of sorts, by being at a low enough level that high-level ideas
+   may be cleanly mapped to it (similar to how microprocessors are "universal
+   IR's", allowing many source languages to be mapped to them).  By providing
+   type information, LLVM can be used as the target of optimizations: for
+   example, through pointer analysis, it can be proven that a C automatic
+   variable is never accessed outside of the current function, allowing it to
+   be promoted to a simple SSA value instead of a memory location.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="wellformed">Well-Formedness</a> </div>
+
+<div class="doc_text">
+
+<p>It is important to note that this document describes 'well formed' LLVM
+   assembly language.  There is a difference between what the parser accepts and
+   what is considered 'well formed'.  For example, the following instruction is
+   syntactically okay, but not well formed:</p>
+
+<pre class="doc_code">
+%x = <a href="#i_add">add</a> i32 1, %x
+</pre>
+
+<p>because the definition of <tt>%x</tt> does not dominate all of its uses. The
+   LLVM infrastructure provides a verification pass that may be used to verify
+   that an LLVM module is well formed.  This pass is automatically run by the
+   parser after parsing input assembly and by the optimizer before it outputs
+   bitcode.  The violations pointed out by the verifier pass indicate bugs in
+   transformation passes or input to the parser.</p>
+
+</div>
+
+<!-- Describe the typesetting conventions here. -->
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="identifiers">Identifiers</a> </div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>LLVM identifiers come in two basic types: global and local. Global
+   identifiers (functions, global variables) begin with the <tt>'@'</tt>
+   character. Local identifiers (register names, types) begin with
+   the <tt>'%'</tt> character. Additionally, there are three different formats
+   for identifiers, for different purposes:</p>
+
+<ol>
+  <li>Named values are represented as a string of characters with their prefix.
+      For example, <tt>%foo</tt>, <tt>@DivisionByZero</tt>,
+      <tt>%a.really.long.identifier</tt>. The actual regular expression used is
+      '<tt>[%@][a-zA-Z$._][a-zA-Z$._0-9]*</tt>'.  Identifiers which require
+      other characters in their names can be surrounded with quotes. Special
+      characters may be escaped using <tt>"\xx"</tt> where <tt>xx</tt> is the
+      ASCII code for the character in hexadecimal.  In this way, any character
+      can be used in a name value, even quotes themselves.</li>
+
+  <li>Unnamed values are represented as an unsigned numeric value with their
+      prefix.  For example, <tt>%12</tt>, <tt>@2</tt>, <tt>%44</tt>.</li>
+
+  <li>Constants, which are described in a <a href="#constants">section about
+      constants</a>, below.</li>
+</ol>
+
+<p>LLVM requires that values start with a prefix for two reasons: Compilers
+   don't need to worry about name clashes with reserved words, and the set of
+   reserved words may be expanded in the future without penalty.  Additionally,
+   unnamed identifiers allow a compiler to quickly come up with a temporary
+   variable without having to avoid symbol table conflicts.</p>
+
+<p>Reserved words in LLVM are very similar to reserved words in other
+   languages. There are keywords for different opcodes
+   ('<tt><a href="#i_add">add</a></tt>',
+   '<tt><a href="#i_bitcast">bitcast</a></tt>',
+   '<tt><a href="#i_ret">ret</a></tt>', etc...), for primitive type names
+   ('<tt><a href="#t_void">void</a></tt>',
+   '<tt><a href="#t_primitive">i32</a></tt>', etc...), and others.  These
+   reserved words cannot conflict with variable names, because none of them
+   start with a prefix character (<tt>'%'</tt> or <tt>'@'</tt>).</p>
+
+<p>Here is an example of LLVM code to multiply the integer variable
+   '<tt>%X</tt>' by 8:</p>
+
+<p>The easy way:</p>
+
+<pre class="doc_code">
+%result = <a href="#i_mul">mul</a> i32 %X, 8
+</pre>
+
+<p>After strength reduction:</p>
+
+<pre class="doc_code">
+%result = <a href="#i_shl">shl</a> i32 %X, i8 3
+</pre>
+
+<p>And the hard way:</p>
+
+<pre class="doc_code">
+%0 = <a href="#i_add">add</a> i32 %X, %X           <i>; yields {i32}:%0</i>
+%1 = <a href="#i_add">add</a> i32 %0, %0           <i>; yields {i32}:%1</i>
+%result = <a href="#i_add">add</a> i32 %1, %1
+</pre>
+
+<p>This last way of multiplying <tt>%X</tt> by 8 illustrates several important
+   lexical features of LLVM:</p>
+
+<ol>
+  <li>Comments are delimited with a '<tt>;</tt>' and go until the end of
+      line.</li>
+
+  <li>Unnamed temporaries are created when the result of a computation is not
+      assigned to a named value.</li>
+
+  <li>Unnamed temporaries are numbered sequentially</li>
+</ol>
+
+<p>It also shows a convention that we follow in this document.  When
+   demonstrating instructions, we will follow an instruction with a comment that
+   defines the type and name of value produced.  Comments are shown in italic
+   text.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="highlevel">High Level Structure</a> </div>
+<!-- *********************************************************************** -->
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="modulestructure">Module Structure</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM programs are composed of "Module"s, each of which is a translation unit
+   of the input programs.  Each module consists of functions, global variables,
+   and symbol table entries.  Modules may be combined together with the LLVM
+   linker, which merges function (and global variable) definitions, resolves
+   forward declarations, and merges symbol table entries. Here is an example of
+   the "hello world" module:</p>
+
+<pre class="doc_code">
+<i>; Declare the string constant as a global constant.</i>&nbsp;
+<a href="#identifiers">@.LC0</a> = <a href="#linkage_internal">internal</a>&nbsp;<a href="#globalvars">constant</a>&nbsp;<a href="#t_array">[13 x i8]</a> c"hello world\0A\00"      <i>; [13 x i8]*</i>&nbsp;
+
+<i>; External declaration of the puts function</i>&nbsp;
+<a href="#functionstructure">declare</a> i32 @puts(i8*)                                      <i>; i32 (i8*)* </i>&nbsp;
+
+<i>; Definition of main function</i>
+define i32 @main() {   <i>; i32()* </i>&nbsp;
+  <i>; Convert [13 x i8]* to i8  *...</i>&nbsp;
+  %cast210 = <a href="#i_getelementptr">getelementptr</a> [13 x i8]* @.LC0, i64 0, i64 0   <i>; i8*</i>&nbsp;
+
+  <i>; Call puts function to write out the string to stdout.</i>&nbsp;
+  <a href="#i_call">call</a> i32 @puts(i8* %cast210)           <i>; i32</i>&nbsp;
+  <a href="#i_ret">ret</a> i32 0&nbsp;
+}
+
+<i>; Named metadata</i>
+!1 = metadata !{i32 41}
+!foo = !{!1, null}
+</pre>
+
+<p>This example is made up of a <a href="#globalvars">global variable</a> named
+   "<tt>.LC0</tt>", an external declaration of the "<tt>puts</tt>" function,
+   a <a href="#functionstructure">function definition</a> for
+   "<tt>main</tt>" and <a href="#namedmetadatastructure">named metadata</a> 
+   "<tt>foo"</tt>.</p>
+
+<p>In general, a module is made up of a list of global values, where both
+   functions and global variables are global values.  Global values are
+   represented by a pointer to a memory location (in this case, a pointer to an
+   array of char, and a pointer to a function), and have one of the
+   following <a href="#linkage">linkage types</a>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="linkage">Linkage Types</a>
+</div>
+
+<div class="doc_text">
+
+<p>All Global Variables and Functions have one of the following types of
+   linkage:</p>
+
+<dl>
+  <dt><tt><b><a name="linkage_private">private</a></b></tt></dt>
+  <dd>Global values with "<tt>private</tt>" linkage are only directly accessible
+      by objects in the current module. In particular, linking code into a
+      module with an private global value may cause the private to be renamed as
+      necessary to avoid collisions.  Because the symbol is private to the
+      module, all references can be updated. This doesn't show up in any symbol
+      table in the object file.</dd>
+
+  <dt><tt><b><a name="linkage_linker_private">linker_private</a></b></tt></dt>
+  <dd>Similar to <tt>private</tt>, but the symbol is passed through the
+      assembler and evaluated by the linker. Unlike normal strong symbols, they
+      are removed by the linker from the final linked image (executable or
+      dynamic library).</dd>
+
+  <dt><tt><b><a name="linkage_linker_private_weak">linker_private_weak</a></b></tt></dt>
+  <dd>Similar to "<tt>linker_private</tt>", but the symbol is weak. Note that
+      <tt>linker_private_weak</tt> symbols are subject to coalescing by the
+      linker. The symbols are removed by the linker from the final linked image
+      (executable or dynamic library).</dd>
+
+  <dt><tt><b><a name="linkage_linker_private_weak_def_auto">linker_private_weak_def_auto</a></b></tt></dt>
+  <dd>Similar to "<tt>linker_private_weak</tt>", but it's known that the address
+      of the object is not taken. For instance, functions that had an inline
+      definition, but the compiler decided not to inline it. Note,
+      unlike <tt>linker_private</tt> and <tt>linker_private_weak</tt>,
+      <tt>linker_private_weak_def_auto</tt> may have only <tt>default</tt>
+      visibility.  The symbols are removed by the linker from the final linked
+      image (executable or dynamic library).</dd>
+
+  <dt><tt><b><a name="linkage_internal">internal</a></b></tt></dt>
+  <dd>Similar to private, but the value shows as a local symbol
+      (<tt>STB_LOCAL</tt> in the case of ELF) in the object file. This
+      corresponds to the notion of the '<tt>static</tt>' keyword in C.</dd>
+
+  <dt><tt><b><a name="linkage_available_externally">available_externally</a></b></tt></dt>
+  <dd>Globals with "<tt>available_externally</tt>" linkage are never emitted
+      into the object file corresponding to the LLVM module.  They exist to
+      allow inlining and other optimizations to take place given knowledge of
+      the definition of the global, which is known to be somewhere outside the
+      module.  Globals with <tt>available_externally</tt> linkage are allowed to
+      be discarded at will, and are otherwise the same as <tt>linkonce_odr</tt>.
+      This linkage type is only allowed on definitions, not declarations.</dd>
+
+  <dt><tt><b><a name="linkage_linkonce">linkonce</a></b></tt></dt>
+  <dd>Globals with "<tt>linkonce</tt>" linkage are merged with other globals of
+      the same name when linkage occurs.  This can be used to implement
+      some forms of inline functions, templates, or other code which must be
+      generated in each translation unit that uses it, but where the body may
+      be overridden with a more definitive definition later.  Unreferenced
+      <tt>linkonce</tt> globals are allowed to be discarded.  Note that
+      <tt>linkonce</tt> linkage does not actually allow the optimizer to
+      inline the body of this function into callers because it doesn't know if
+      this definition of the function is the definitive definition within the
+      program or whether it will be overridden by a stronger definition.
+      To enable inlining and other optimizations, use "<tt>linkonce_odr</tt>"
+      linkage.</dd>
+
+  <dt><tt><b><a name="linkage_weak">weak</a></b></tt></dt>
+  <dd>"<tt>weak</tt>" linkage has the same merging semantics as
+      <tt>linkonce</tt> linkage, except that unreferenced globals with
+      <tt>weak</tt> linkage may not be discarded.  This is used for globals that
+      are declared "weak" in C source code.</dd>
+
+  <dt><tt><b><a name="linkage_common">common</a></b></tt></dt>
+  <dd>"<tt>common</tt>" linkage is most similar to "<tt>weak</tt>" linkage, but
+      they are used for tentative definitions in C, such as "<tt>int X;</tt>" at
+      global scope.
+      Symbols with "<tt>common</tt>" linkage are merged in the same way as
+      <tt>weak symbols</tt>, and they may not be deleted if unreferenced.
+      <tt>common</tt> symbols may not have an explicit section,
+      must have a zero initializer, and may not be marked '<a
+      href="#globalvars"><tt>constant</tt></a>'.  Functions and aliases may not
+      have common linkage.</dd>
+
+
+  <dt><tt><b><a name="linkage_appending">appending</a></b></tt></dt>
+  <dd>"<tt>appending</tt>" linkage may only be applied to global variables of
+      pointer to array type.  When two global variables with appending linkage
+      are linked together, the two global arrays are appended together.  This is
+      the LLVM, typesafe, equivalent of having the system linker append together
+      "sections" with identical names when .o files are linked.</dd>
+
+  <dt><tt><b><a name="linkage_externweak">extern_weak</a></b></tt></dt>
+  <dd>The semantics of this linkage follow the ELF object file model: the symbol
+      is weak until linked, if not linked, the symbol becomes null instead of
+      being an undefined reference.</dd>
+
+  <dt><tt><b><a name="linkage_linkonce_odr">linkonce_odr</a></b></tt></dt>
+  <dt><tt><b><a name="linkage_weak_odr">weak_odr</a></b></tt></dt>
+  <dd>Some languages allow differing globals to be merged, such as two functions
+      with different semantics.  Other languages, such as <tt>C++</tt>, ensure
+      that only equivalent globals are ever merged (the "one definition rule"
+      &mdash; "ODR").  Such languages can use the <tt>linkonce_odr</tt>
+      and <tt>weak_odr</tt> linkage types to indicate that the global will only
+      be merged with equivalent globals.  These linkage types are otherwise the
+      same as their non-<tt>odr</tt> versions.</dd>
+
+  <dt><tt><b><a name="linkage_external">externally visible</a></b></tt>:</dt>
+  <dd>If none of the above identifiers are used, the global is externally
+      visible, meaning that it participates in linkage and can be used to
+      resolve external symbol references.</dd>
+</dl>
+
+<p>The next two types of linkage are targeted for Microsoft Windows platform
+   only. They are designed to support importing (exporting) symbols from (to)
+   DLLs (Dynamic Link Libraries).</p>
+
+<dl>
+  <dt><tt><b><a name="linkage_dllimport">dllimport</a></b></tt></dt>
+  <dd>"<tt>dllimport</tt>" linkage causes the compiler to reference a function
+      or variable via a global pointer to a pointer that is set up by the DLL
+      exporting the symbol. On Microsoft Windows targets, the pointer name is
+      formed by combining <code>__imp_</code> and the function or variable
+      name.</dd>
+
+  <dt><tt><b><a name="linkage_dllexport">dllexport</a></b></tt></dt>
+  <dd>"<tt>dllexport</tt>" linkage causes the compiler to provide a global
+      pointer to a pointer in a DLL, so that it can be referenced with the
+      <tt>dllimport</tt> attribute. On Microsoft Windows targets, the pointer
+      name is formed by combining <code>__imp_</code> and the function or
+      variable name.</dd>
+</dl>
+
+<p>For example, since the "<tt>.LC0</tt>" variable is defined to be internal, if
+   another module defined a "<tt>.LC0</tt>" variable and was linked with this
+   one, one of the two would be renamed, preventing a collision.  Since
+   "<tt>main</tt>" and "<tt>puts</tt>" are external (i.e., lacking any linkage
+   declarations), they are accessible outside of the current module.</p>
+
+<p>It is illegal for a function <i>declaration</i> to have any linkage type
+   other than "externally visible", <tt>dllimport</tt>
+   or <tt>extern_weak</tt>.</p>
+
+<p>Aliases can have only <tt>external</tt>, <tt>internal</tt>, <tt>weak</tt>
+   or <tt>weak_odr</tt> linkages.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="callingconv">Calling Conventions</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM <a href="#functionstructure">functions</a>, <a href="#i_call">calls</a>
+   and <a href="#i_invoke">invokes</a> can all have an optional calling
+   convention specified for the call.  The calling convention of any pair of
+   dynamic caller/callee must match, or the behavior of the program is
+   undefined.  The following calling conventions are supported by LLVM, and more
+   may be added in the future:</p>
+
+<dl>
+  <dt><b>"<tt>ccc</tt>" - The C calling convention</b>:</dt>
+  <dd>This calling convention (the default if no other calling convention is
+      specified) matches the target C calling conventions.  This calling
+      convention supports varargs function calls and tolerates some mismatch in
+      the declared prototype and implemented declaration of the function (as
+      does normal C).</dd>
+
+  <dt><b>"<tt>fastcc</tt>" - The fast calling convention</b>:</dt>
+  <dd>This calling convention attempts to make calls as fast as possible
+      (e.g. by passing things in registers).  This calling convention allows the
+      target to use whatever tricks it wants to produce fast code for the
+      target, without having to conform to an externally specified ABI
+      (Application Binary Interface).
+      <a href="CodeGenerator.html#tailcallopt">Tail calls can only be optimized
+      when this or the GHC convention is used.</a>  This calling convention
+      does not support varargs and requires the prototype of all callees to
+      exactly match the prototype of the function definition.</dd>
+
+  <dt><b>"<tt>coldcc</tt>" - The cold calling convention</b>:</dt>
+  <dd>This calling convention attempts to make code in the caller as efficient
+      as possible under the assumption that the call is not commonly executed.
+      As such, these calls often preserve all registers so that the call does
+      not break any live ranges in the caller side.  This calling convention
+      does not support varargs and requires the prototype of all callees to
+      exactly match the prototype of the function definition.</dd>
+
+  <dt><b>"<tt>cc <em>10</em></tt>" - GHC convention</b>:</dt>
+  <dd>This calling convention has been implemented specifically for use by the
+      <a href="http://www.haskell.org/ghc">Glasgow Haskell Compiler (GHC)</a>.
+      It passes everything in registers, going to extremes to achieve this by
+      disabling callee save registers. This calling convention should not be
+      used lightly but only for specific situations such as an alternative to
+      the <em>register pinning</em> performance technique often used when
+      implementing functional programming languages.At the moment only X86
+      supports this convention and it has the following limitations:
+      <ul>
+        <li>On <em>X86-32</em> only supports up to 4 bit type parameters. No
+            floating point types are supported.</li>
+        <li>On <em>X86-64</em> only supports up to 10 bit type parameters and
+            6 floating point parameters.</li>
+      </ul>
+      This calling convention supports
+      <a href="CodeGenerator.html#tailcallopt">tail call optimization</a> but
+      requires both the caller and callee are using it.
+  </dd>
+
+  <dt><b>"<tt>cc &lt;<em>n</em>&gt;</tt>" - Numbered convention</b>:</dt>
+  <dd>Any calling convention may be specified by number, allowing
+      target-specific calling conventions to be used.  Target specific calling
+      conventions start at 64.</dd>
+</dl>
+
+<p>More calling conventions can be added/defined on an as-needed basis, to
+   support Pascal conventions or any other well-known target-independent
+   convention.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="visibility">Visibility Styles</a>
+</div>
+
+<div class="doc_text">
+
+<p>All Global Variables and Functions have one of the following visibility
+   styles:</p>
+
+<dl>
+  <dt><b>"<tt>default</tt>" - Default style</b>:</dt>
+  <dd>On targets that use the ELF object file format, default visibility means
+      that the declaration is visible to other modules and, in shared libraries,
+      means that the declared entity may be overridden. On Darwin, default
+      visibility means that the declaration is visible to other modules. Default
+      visibility corresponds to "external linkage" in the language.</dd>
+
+  <dt><b>"<tt>hidden</tt>" - Hidden style</b>:</dt>
+  <dd>Two declarations of an object with hidden visibility refer to the same
+      object if they are in the same shared object. Usually, hidden visibility
+      indicates that the symbol will not be placed into the dynamic symbol
+      table, so no other module (executable or shared library) can reference it
+      directly.</dd>
+
+  <dt><b>"<tt>protected</tt>" - Protected style</b>:</dt>
+  <dd>On ELF, protected visibility indicates that the symbol will be placed in
+      the dynamic symbol table, but that references within the defining module
+      will bind to the local symbol. That is, the symbol cannot be overridden by
+      another module.</dd>
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="namedtypes">Named Types</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM IR allows you to specify name aliases for certain types.  This can make
+   it easier to read the IR and make the IR more condensed (particularly when
+   recursive types are involved).  An example of a name specification is:</p>
+
+<pre class="doc_code">
+%mytype = type { %mytype*, i32 }
+</pre>
+
+<p>You may give a name to any <a href="#typesystem">type</a> except
+   "<a href="#t_void">void</a>".  Type name aliases may be used anywhere a type
+   is expected with the syntax "%mytype".</p>
+
+<p>Note that type names are aliases for the structural type that they indicate,
+   and that you can therefore specify multiple names for the same type.  This
+   often leads to confusing behavior when dumping out a .ll file.  Since LLVM IR
+   uses structural typing, the name is not part of the type.  When printing out
+   LLVM IR, the printer will pick <em>one name</em> to render all types of a
+   particular shape.  This means that if you have code where two different
+   source types end up having the same LLVM type, that the dumper will sometimes
+   print the "wrong" or unexpected type.  This is an important design point and
+   isn't going to change.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="globalvars">Global Variables</a>
+</div>
+
+<div class="doc_text">
+
+<p>Global variables define regions of memory allocated at compilation time
+   instead of run-time.  Global variables may optionally be initialized, may
+   have an explicit section to be placed in, and may have an optional explicit
+   alignment specified.  A variable may be defined as "thread_local", which
+   means that it will not be shared by threads (each thread will have a
+   separated copy of the variable).  A variable may be defined as a global
+   "constant," which indicates that the contents of the variable
+   will <b>never</b> be modified (enabling better optimization, allowing the
+   global data to be placed in the read-only section of an executable, etc).
+   Note that variables that need runtime initialization cannot be marked
+   "constant" as there is a store to the variable.</p>
+
+<p>LLVM explicitly allows <em>declarations</em> of global variables to be marked
+   constant, even if the final definition of the global is not.  This capability
+   can be used to enable slightly better optimization of the program, but
+   requires the language definition to guarantee that optimizations based on the
+   'constantness' are valid for the translation units that do not include the
+   definition.</p>
+
+<p>As SSA values, global variables define pointer values that are in scope
+   (i.e. they dominate) all basic blocks in the program.  Global variables
+   always define a pointer to their "content" type because they describe a
+   region of memory, and all memory objects in LLVM are accessed through
+   pointers.</p>
+
+<p>Global variables can be marked with <tt>unnamed_addr</tt> which indicates
+  that the address is not significant, only the content. Constants marked
+  like this can be merged with other constants if they have the same
+  initializer. Note that a constant with significant address <em>can</em>
+  be merged with a <tt>unnamed_addr</tt> constant, the result being a
+  constant whose address is significant.</p>
+
+<p>A global variable may be declared to reside in a target-specific numbered
+   address space. For targets that support them, address spaces may affect how
+   optimizations are performed and/or what target instructions are used to
+   access the variable. The default address space is zero. The address space
+   qualifier must precede any other attributes.</p>
+
+<p>LLVM allows an explicit section to be specified for globals.  If the target
+   supports it, it will emit globals to the section specified.</p>
+
+<p>An explicit alignment may be specified for a global, which must be a power
+   of 2.  If not present, or if the alignment is set to zero, the alignment of
+   the global is set by the target to whatever it feels convenient.  If an
+   explicit alignment is specified, the global is forced to have exactly that
+   alignment.  Targets and optimizers are not allowed to over-align the global
+   if the global has an assigned section.  In this case, the extra alignment
+   could be observable: for example, code could assume that the globals are
+   densely packed in their section and try to iterate over them as an array,
+   alignment padding would break this iteration.</p>
+
+<p>For example, the following defines a global in a numbered address space with
+   an initializer, section, and alignment:</p>
+
+<pre class="doc_code">
+@G = addrspace(5) constant float 1.0, section "foo", align 4
+</pre>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="functionstructure">Functions</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM function definitions consist of the "<tt>define</tt>" keyword, an
+   optional <a href="#linkage">linkage type</a>, an optional
+   <a href="#visibility">visibility style</a>, an optional
+   <a href="#callingconv">calling convention</a>,
+   an optional <tt>unnamed_addr</tt> attribute, a return type, an optional
+   <a href="#paramattrs">parameter attribute</a> for the return type, a function
+   name, a (possibly empty) argument list (each with optional
+   <a href="#paramattrs">parameter attributes</a>), optional
+   <a href="#fnattrs">function attributes</a>, an optional section, an optional
+   alignment, an optional <a href="#gc">garbage collector name</a>, an opening
+   curly brace, a list of basic blocks, and a closing curly brace.</p>
+
+<p>LLVM function declarations consist of the "<tt>declare</tt>" keyword, an
+   optional <a href="#linkage">linkage type</a>, an optional
+   <a href="#visibility">visibility style</a>, an optional
+   <a href="#callingconv">calling convention</a>,
+   an optional <tt>unnamed_addr</tt> attribute, a return type, an optional
+   <a href="#paramattrs">parameter attribute</a> for the return type, a function
+   name, a possibly empty list of arguments, an optional alignment, and an
+   optional <a href="#gc">garbage collector name</a>.</p>
+
+<p>A function definition contains a list of basic blocks, forming the CFG
+   (Control Flow Graph) for the function.  Each basic block may optionally start
+   with a label (giving the basic block a symbol table entry), contains a list
+   of instructions, and ends with a <a href="#terminators">terminator</a>
+   instruction (such as a branch or function return).</p>
+
+<p>The first basic block in a function is special in two ways: it is immediately
+   executed on entrance to the function, and it is not allowed to have
+   predecessor basic blocks (i.e. there can not be any branches to the entry
+   block of a function).  Because the block can have no predecessors, it also
+   cannot have any <a href="#i_phi">PHI nodes</a>.</p>
+
+<p>LLVM allows an explicit section to be specified for functions.  If the target
+   supports it, it will emit functions to the section specified.</p>
+
+<p>An explicit alignment may be specified for a function.  If not present, or if
+   the alignment is set to zero, the alignment of the function is set by the
+   target to whatever it feels convenient.  If an explicit alignment is
+   specified, the function is forced to have at least that much alignment.  All
+   alignments must be a power of 2.</p>
+
+<p>If the <tt>unnamed_addr</tt> attribute is given, the address is know to not
+  be significant and two identical functions can be merged</p>.
+
+<h5>Syntax:</h5>
+<pre class="doc_code">
+define [<a href="#linkage">linkage</a>] [<a href="#visibility">visibility</a>]
+       [<a href="#callingconv">cconv</a>] [<a href="#paramattrs">ret attrs</a>]
+       &lt;ResultType&gt; @&lt;FunctionName&gt; ([argument list])
+       [<a href="#fnattrs">fn Attrs</a>] [section "name"] [align N]
+       [<a href="#gc">gc</a>] { ... }
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="aliasstructure">Aliases</a>
+</div>
+
+<div class="doc_text">
+
+<p>Aliases act as "second name" for the aliasee value (which can be either
+   function, global variable, another alias or bitcast of global value). Aliases
+   may have an optional <a href="#linkage">linkage type</a>, and an
+   optional <a href="#visibility">visibility style</a>.</p>
+
+<h5>Syntax:</h5>
+<pre class="doc_code">
+@&lt;Name&gt; = alias [Linkage] [Visibility] &lt;AliaseeTy&gt; @&lt;Aliasee&gt;
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="namedmetadatastructure">Named Metadata</a>
+</div>
+
+<div class="doc_text">
+
+<p>Named metadata is a collection of metadata. <a href="#metadata">Metadata
+   nodes</a> (but not metadata strings) are the only valid operands for
+   a named metadata.</p>
+
+<h5>Syntax:</h5>
+<pre class="doc_code">
+; Some unnamed metadata nodes, which are referenced by the named metadata.
+!0 = metadata !{metadata !"zero"}
+!1 = metadata !{metadata !"one"}
+!2 = metadata !{metadata !"two"}
+; A named metadata.
+!name = !{!0, !1, !2}
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="paramattrs">Parameter Attributes</a></div>
+
+<div class="doc_text">
+
+<p>The return type and each parameter of a function type may have a set of
+   <i>parameter attributes</i> associated with them. Parameter attributes are
+   used to communicate additional information about the result or parameters of
+   a function. Parameter attributes are considered to be part of the function,
+   not of the function type, so functions with different parameter attributes
+   can have the same function type.</p>
+
+<p>Parameter attributes are simple keywords that follow the type specified. If
+   multiple parameter attributes are needed, they are space separated. For
+   example:</p>
+
+<pre class="doc_code">
+declare i32 @printf(i8* noalias nocapture, ...)
+declare i32 @atoi(i8 zeroext)
+declare signext i8 @returns_signed_char()
+</pre>
+
+<p>Note that any attributes for the function result (<tt>nounwind</tt>,
+   <tt>readonly</tt>) come immediately after the argument list.</p>
+
+<p>Currently, only the following parameter attributes are defined:</p>
+
+<dl>
+  <dt><tt><b>zeroext</b></tt></dt>
+  <dd>This indicates to the code generator that the parameter or return value
+      should be zero-extended to a 32-bit value by the caller (for a parameter)
+      or the callee (for a return value).</dd>
+
+  <dt><tt><b>signext</b></tt></dt>
+  <dd>This indicates to the code generator that the parameter or return value
+      should be sign-extended to a 32-bit value by the caller (for a parameter)
+      or the callee (for a return value).</dd>
+
+  <dt><tt><b>inreg</b></tt></dt>
+  <dd>This indicates that this parameter or return value should be treated in a
+      special target-dependent fashion during while emitting code for a function
+      call or return (usually, by putting it in a register as opposed to memory,
+      though some targets use it to distinguish between two different kinds of
+      registers).  Use of this attribute is target-specific.</dd>
+
+  <dt><tt><b><a name="byval">byval</a></b></tt></dt>
+  <dd><p>This indicates that the pointer parameter should really be passed by
+      value to the function.  The attribute implies that a hidden copy of the
+      pointee
+      is made between the caller and the callee, so the callee is unable to
+      modify the value in the callee.  This attribute is only valid on LLVM
+      pointer arguments.  It is generally used to pass structs and arrays by
+      value, but is also valid on pointers to scalars.  The copy is considered
+      to belong to the caller not the callee (for example,
+      <tt><a href="#readonly">readonly</a></tt> functions should not write to
+      <tt>byval</tt> parameters). This is not a valid attribute for return
+      values.</p>
+      
+      <p>The byval attribute also supports specifying an alignment with
+      the align attribute.  It indicates the alignment of the stack slot to
+      form and the known alignment of the pointer specified to the call site. If
+      the alignment is not specified, then the code generator makes a
+      target-specific assumption.</p></dd>
+
+  <dt><tt><b><a name="sret">sret</a></b></tt></dt>
+  <dd>This indicates that the pointer parameter specifies the address of a
+      structure that is the return value of the function in the source program.
+      This pointer must be guaranteed by the caller to be valid: loads and
+      stores to the structure may be assumed by the callee to not to trap.  This
+      may only be applied to the first parameter. This is not a valid attribute
+      for return values. </dd>
+
+  <dt><tt><b><a name="noalias">noalias</a></b></tt></dt>
+  <dd>This indicates that pointer values
+      <a href="#pointeraliasing"><i>based</i></a> on the argument or return
+      value do not alias pointer values which are not <i>based</i> on it,
+      ignoring certain "irrelevant" dependencies.
+      For a call to the parent function, dependencies between memory
+      references from before or after the call and from those during the call
+      are "irrelevant" to the <tt>noalias</tt> keyword for the arguments and
+      return value used in that call.
+      The caller shares the responsibility with the callee for ensuring that
+      these requirements are met.
+      For further details, please see the discussion of the NoAlias response in
+      <a href="AliasAnalysis.html#MustMayNo">alias analysis</a>.<br>
+<br>
+      Note that this definition of <tt>noalias</tt> is intentionally
+      similar to the definition of <tt>restrict</tt> in C99 for function
+      arguments, though it is slightly weaker.
+<br>
+      For function return values, C99's <tt>restrict</tt> is not meaningful,
+      while LLVM's <tt>noalias</tt> is.
+      </dd>
+
+  <dt><tt><b><a name="nocapture">nocapture</a></b></tt></dt>
+  <dd>This indicates that the callee does not make any copies of the pointer
+      that outlive the callee itself. This is not a valid attribute for return
+      values.</dd>
+
+  <dt><tt><b><a name="nest">nest</a></b></tt></dt>
+  <dd>This indicates that the pointer parameter can be excised using the
+      <a href="#int_trampoline">trampoline intrinsics</a>. This is not a valid
+      attribute for return values.</dd>
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="gc">Garbage Collector Names</a>
+</div>
+
+<div class="doc_text">
+
+<p>Each function may specify a garbage collector name, which is simply a
+   string:</p>
+
+<pre class="doc_code">
+define void @f() gc "name" { ... }
+</pre>
+
+<p>The compiler declares the supported values of <i>name</i>. Specifying a
+   collector which will cause the compiler to alter its output in order to
+   support the named garbage collection algorithm.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="fnattrs">Function Attributes</a>
+</div>
+
+<div class="doc_text">
+
+<p>Function attributes are set to communicate additional information about a
+   function. Function attributes are considered to be part of the function, not
+   of the function type, so functions with different parameter attributes can
+   have the same function type.</p>
+
+<p>Function attributes are simple keywords that follow the type specified. If
+   multiple attributes are needed, they are space separated. For example:</p>
+
+<pre class="doc_code">
+define void @f() noinline { ... }
+define void @f() alwaysinline { ... }
+define void @f() alwaysinline optsize { ... }
+define void @f() optsize { ... }
+</pre>
+
+<dl>
+  <dt><tt><b>alignstack(&lt;<em>n</em>&gt;)</b></tt></dt>
+  <dd>This attribute indicates that, when emitting the prologue and epilogue,
+      the backend should forcibly align the stack pointer. Specify the
+      desired alignment, which must be a power of two, in parentheses.
+
+  <dt><tt><b>alwaysinline</b></tt></dt>
+  <dd>This attribute indicates that the inliner should attempt to inline this
+      function into callers whenever possible, ignoring any active inlining size
+      threshold for this caller.</dd>
+
+  <dt><tt><b>hotpatch</b></tt></dt>
+  <dd>This attribute indicates that the function should be 'hotpatchable',
+      meaning the function can be patched and/or hooked even while it is
+      loaded into memory. On x86, the function prologue will be preceded
+      by six bytes of padding and will begin with a two-byte instruction.
+      Most of the functions in the Windows system DLLs in Windows XP SP2 or
+      higher were compiled in this fashion.</dd>
+
+  <dt><tt><b>inlinehint</b></tt></dt>
+  <dd>This attribute indicates that the source code contained a hint that inlining
+      this function is desirable (such as the "inline" keyword in C/C++).  It
+      is just a hint; it imposes no requirements on the inliner.</dd>
+
+  <dt><tt><b>naked</b></tt></dt>
+  <dd>This attribute disables prologue / epilogue emission for the function.
+      This can have very system-specific consequences.</dd>
+
+  <dt><tt><b>noimplicitfloat</b></tt></dt>
+  <dd>This attributes disables implicit floating point instructions.</dd>
+
+  <dt><tt><b>noinline</b></tt></dt>
+  <dd>This attribute indicates that the inliner should never inline this
+      function in any situation. This attribute may not be used together with
+      the <tt>alwaysinline</tt> attribute.</dd>
+
+  <dt><tt><b>noredzone</b></tt></dt>
+  <dd>This attribute indicates that the code generator should not use a red
+      zone, even if the target-specific ABI normally permits it.</dd>
+
+  <dt><tt><b>noreturn</b></tt></dt>
+  <dd>This function attribute indicates that the function never returns
+      normally.  This produces undefined behavior at runtime if the function
+      ever does dynamically return.</dd>
+
+  <dt><tt><b>nounwind</b></tt></dt>
+  <dd>This function attribute indicates that the function never returns with an
+      unwind or exceptional control flow.  If the function does unwind, its
+      runtime behavior is undefined.</dd>
+
+  <dt><tt><b>optsize</b></tt></dt>
+  <dd>This attribute suggests that optimization passes and code generator passes
+      make choices that keep the code size of this function low, and otherwise
+      do optimizations specifically to reduce code size.</dd>
+
+  <dt><tt><b>readnone</b></tt></dt>
+  <dd>This attribute indicates that the function computes its result (or decides
+      to unwind an exception) based strictly on its arguments, without
+      dereferencing any pointer arguments or otherwise accessing any mutable
+      state (e.g. memory, control registers, etc) visible to caller functions.
+      It does not write through any pointer arguments
+      (including <tt><a href="#byval">byval</a></tt> arguments) and never
+      changes any state visible to callers.  This means that it cannot unwind
+      exceptions by calling the <tt>C++</tt> exception throwing methods, but
+      could use the <tt>unwind</tt> instruction.</dd>
+
+  <dt><tt><b><a name="readonly">readonly</a></b></tt></dt>
+  <dd>This attribute indicates that the function does not write through any
+      pointer arguments (including <tt><a href="#byval">byval</a></tt>
+      arguments) or otherwise modify any state (e.g. memory, control registers,
+      etc) visible to caller functions.  It may dereference pointer arguments
+      and read state that may be set in the caller.  A readonly function always
+      returns the same value (or unwinds an exception identically) when called
+      with the same set of arguments and global state.  It cannot unwind an
+      exception by calling the <tt>C++</tt> exception throwing methods, but may
+      use the <tt>unwind</tt> instruction.</dd>
+
+  <dt><tt><b><a name="ssp">ssp</a></b></tt></dt>
+  <dd>This attribute indicates that the function should emit a stack smashing
+      protector. It is in the form of a "canary"&mdash;a random value placed on
+      the stack before the local variables that's checked upon return from the
+      function to see if it has been overwritten. A heuristic is used to
+      determine if a function needs stack protectors or not.<br>
+<br>
+      If a function that has an <tt>ssp</tt> attribute is inlined into a
+      function that doesn't have an <tt>ssp</tt> attribute, then the resulting
+      function will have an <tt>ssp</tt> attribute.</dd>
+
+  <dt><tt><b>sspreq</b></tt></dt>
+  <dd>This attribute indicates that the function should <em>always</em> emit a
+      stack smashing protector. This overrides
+      the <tt><a href="#ssp">ssp</a></tt> function attribute.<br>
+<br>
+      If a function that has an <tt>sspreq</tt> attribute is inlined into a
+      function that doesn't have an <tt>sspreq</tt> attribute or which has
+      an <tt>ssp</tt> attribute, then the resulting function will have
+      an <tt>sspreq</tt> attribute.</dd>
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="moduleasm">Module-Level Inline Assembly</a>
+</div>
+
+<div class="doc_text">
+
+<p>Modules may contain "module-level inline asm" blocks, which corresponds to
+   the GCC "file scope inline asm" blocks.  These blocks are internally
+   concatenated by LLVM and treated as a single unit, but may be separated in
+   the <tt>.ll</tt> file if desired.  The syntax is very simple:</p>
+
+<pre class="doc_code">
+module asm "inline asm code goes here"
+module asm "more can go here"
+</pre>
+
+<p>The strings can contain any character by escaping non-printable characters.
+   The escape sequence used is simply "\xx" where "xx" is the two digit hex code
+   for the number.</p>
+
+<p>The inline asm code is simply printed to the machine code .s file when
+   assembly code is generated.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="datalayout">Data Layout</a>
+</div>
+
+<div class="doc_text">
+
+<p>A module may specify a target specific data layout string that specifies how
+   data is to be laid out in memory. The syntax for the data layout is
+   simply:</p>
+
+<pre class="doc_code">
+target datalayout = "<i>layout specification</i>"
+</pre>
+
+<p>The <i>layout specification</i> consists of a list of specifications
+   separated by the minus sign character ('-').  Each specification starts with
+   a letter and may include other information after the letter to define some
+   aspect of the data layout.  The specifications accepted are as follows:</p>
+
+<dl>
+  <dt><tt>E</tt></dt>
+  <dd>Specifies that the target lays out data in big-endian form. That is, the
+      bits with the most significance have the lowest address location.</dd>
+
+  <dt><tt>e</tt></dt>
+  <dd>Specifies that the target lays out data in little-endian form. That is,
+      the bits with the least significance have the lowest address
+      location.</dd>
+
+  <dt><tt>p:<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
+  <dd>This specifies the <i>size</i> of a pointer and its <i>abi</i> and
+      <i>preferred</i> alignments. All sizes are in bits. Specifying
+      the <i>pref</i> alignment is optional. If omitted, the
+      preceding <tt>:</tt> should be omitted too.</dd>
+
+  <dt><tt>i<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
+  <dd>This specifies the alignment for an integer type of a given bit
+      <i>size</i>. The value of <i>size</i> must be in the range [1,2^23).</dd>
+
+  <dt><tt>v<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
+  <dd>This specifies the alignment for a vector type of a given bit
+      <i>size</i>.</dd>
+
+  <dt><tt>f<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
+  <dd>This specifies the alignment for a floating point type of a given bit
+      <i>size</i>. Only values of <i>size</i> that are supported by the target
+      will work.  32 (float) and 64 (double) are supported on all targets;
+      80 or 128 (different flavors of long double) are also supported on some
+      targets.
+
+  <dt><tt>a<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
+  <dd>This specifies the alignment for an aggregate type of a given bit
+      <i>size</i>.</dd>
+
+  <dt><tt>s<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
+  <dd>This specifies the alignment for a stack object of a given bit
+      <i>size</i>.</dd>
+
+  <dt><tt>n<i>size1</i>:<i>size2</i>:<i>size3</i>...</tt></dt>
+  <dd>This specifies a set of native integer widths for the target CPU
+      in bits.  For example, it might contain "n32" for 32-bit PowerPC,
+      "n32:64" for PowerPC 64, or "n8:16:32:64" for X86-64.  Elements of
+      this set are considered to support most general arithmetic
+      operations efficiently.</dd>
+</dl>
+
+<p>When constructing the data layout for a given target, LLVM starts with a
+   default set of specifications which are then (possibly) overridden by the
+   specifications in the <tt>datalayout</tt> keyword. The default specifications
+   are given in this list:</p>
+
+<ul>
+  <li><tt>E</tt> - big endian</li>
+  <li><tt>p:64:64:64</tt> - 64-bit pointers with 64-bit alignment</li>
+  <li><tt>i1:8:8</tt> - i1 is 8-bit (byte) aligned</li>
+  <li><tt>i8:8:8</tt> - i8 is 8-bit (byte) aligned</li>
+  <li><tt>i16:16:16</tt> - i16 is 16-bit aligned</li>
+  <li><tt>i32:32:32</tt> - i32 is 32-bit aligned</li>
+  <li><tt>i64:32:64</tt> - i64 has ABI alignment of 32-bits but preferred
+  alignment of 64-bits</li>
+  <li><tt>f32:32:32</tt> - float is 32-bit aligned</li>
+  <li><tt>f64:64:64</tt> - double is 64-bit aligned</li>
+  <li><tt>v64:64:64</tt> - 64-bit vector is 64-bit aligned</li>
+  <li><tt>v128:128:128</tt> - 128-bit vector is 128-bit aligned</li>
+  <li><tt>a0:0:1</tt> - aggregates are 8-bit aligned</li>
+  <li><tt>s0:64:64</tt> - stack objects are 64-bit aligned</li>
+</ul>
+
+<p>When LLVM is determining the alignment for a given type, it uses the
+   following rules:</p>
+
+<ol>
+  <li>If the type sought is an exact match for one of the specifications, that
+      specification is used.</li>
+
+  <li>If no match is found, and the type sought is an integer type, then the
+      smallest integer type that is larger than the bitwidth of the sought type
+      is used. If none of the specifications are larger than the bitwidth then
+      the the largest integer type is used. For example, given the default
+      specifications above, the i7 type will use the alignment of i8 (next
+      largest) while both i65 and i256 will use the alignment of i64 (largest
+      specified).</li>
+
+  <li>If no match is found, and the type sought is a vector type, then the
+      largest vector type that is smaller than the sought vector type will be
+      used as a fall back.  This happens because &lt;128 x double&gt; can be
+      implemented in terms of 64 &lt;2 x double&gt;, for example.</li>
+</ol>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="pointeraliasing">Pointer Aliasing Rules</a>
+</div>
+
+<div class="doc_text">
+
+<p>Any memory access must be done through a pointer value associated
+with an address range of the memory access, otherwise the behavior
+is undefined. Pointer values are associated with address ranges
+according to the following rules:</p>
+
+<ul>
+  <li>A pointer value is associated with the addresses associated with
+      any value it is <i>based</i> on.
+  <li>An address of a global variable is associated with the address
+      range of the variable's storage.</li>
+  <li>The result value of an allocation instruction is associated with
+      the address range of the allocated storage.</li>
+  <li>A null pointer in the default address-space is associated with
+      no address.</li>
+  <li>An integer constant other than zero or a pointer value returned
+      from a function not defined within LLVM may be associated with address
+      ranges allocated through mechanisms other than those provided by
+      LLVM. Such ranges shall not overlap with any ranges of addresses
+      allocated by mechanisms provided by LLVM.</li>
+</ul>
+
+<p>A pointer value is <i>based</i> on another pointer value according
+   to the following rules:</p>
+
+<ul>
+  <li>A pointer value formed from a
+      <tt><a href="#i_getelementptr">getelementptr</a></tt> operation
+      is <i>based</i> on the first operand of the <tt>getelementptr</tt>.</li>
+  <li>The result value of a
+      <tt><a href="#i_bitcast">bitcast</a></tt> is <i>based</i> on the operand
+      of the <tt>bitcast</tt>.</li>
+  <li>A pointer value formed by an
+      <tt><a href="#i_inttoptr">inttoptr</a></tt> is <i>based</i> on all
+      pointer values that contribute (directly or indirectly) to the
+      computation of the pointer's value.</li>
+  <li>The "<i>based</i> on" relationship is transitive.</li>
+</ul>
+
+<p>Note that this definition of <i>"based"</i> is intentionally
+   similar to the definition of <i>"based"</i> in C99, though it is
+   slightly weaker.</p>
+
+<p>LLVM IR does not associate types with memory. The result type of a
+<tt><a href="#i_load">load</a></tt> merely indicates the size and
+alignment of the memory from which to load, as well as the
+interpretation of the value. The first operand type of a
+<tt><a href="#i_store">store</a></tt> similarly only indicates the size
+and alignment of the store.</p>
+
+<p>Consequently, type-based alias analysis, aka TBAA, aka
+<tt>-fstrict-aliasing</tt>, is not applicable to general unadorned
+LLVM IR. <a href="#metadata">Metadata</a> may be used to encode
+additional information which specialized optimization passes may use
+to implement type-based alias analysis.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="volatile">Volatile Memory Accesses</a>
+</div>
+
+<div class="doc_text">
+
+<p>Certain memory accesses, such as <a href="#i_load"><tt>load</tt></a>s, <a
+href="#i_store"><tt>store</tt></a>s, and <a
+href="#int_memcpy"><tt>llvm.memcpy</tt></a>s may be marked <tt>volatile</tt>.
+The optimizers must not change the number of volatile operations or change their
+order of execution relative to other volatile operations.  The optimizers
+<i>may</i> change the order of volatile operations relative to non-volatile
+operations.  This is not Java's "volatile" and has no cross-thread
+synchronization behavior.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="typesystem">Type System</a> </div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The LLVM type system is one of the most important features of the
+   intermediate representation.  Being typed enables a number of optimizations
+   to be performed on the intermediate representation directly, without having
+   to do extra analyses on the side before the transformation.  A strong type
+   system makes it easier to read the generated code and enables novel analyses
+   and transformations that are not feasible to perform on normal three address
+   code representations.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="t_classifications">Type
+Classifications</a> </div>
+
+<div class="doc_text">
+
+<p>The types fall into a few useful classifications:</p>
+
+<table border="1" cellspacing="0" cellpadding="4">
+  <tbody>
+    <tr><th>Classification</th><th>Types</th></tr>
+    <tr>
+      <td><a href="#t_integer">integer</a></td>
+      <td><tt>i1, i2, i3, ... i8, ... i16, ... i32, ... i64, ... </tt></td>
+    </tr>
+    <tr>
+      <td><a href="#t_floating">floating point</a></td>
+      <td><tt>float, double, x86_fp80, fp128, ppc_fp128</tt></td>
+    </tr>
+    <tr>
+      <td><a name="t_firstclass">first class</a></td>
+      <td><a href="#t_integer">integer</a>,
+          <a href="#t_floating">floating point</a>,
+          <a href="#t_pointer">pointer</a>,
+          <a href="#t_vector">vector</a>,
+          <a href="#t_struct">structure</a>,
+          <a href="#t_array">array</a>,
+          <a href="#t_label">label</a>,
+          <a href="#t_metadata">metadata</a>.
+      </td>
+    </tr>
+    <tr>
+      <td><a href="#t_primitive">primitive</a></td>
+      <td><a href="#t_label">label</a>,
+          <a href="#t_void">void</a>,
+          <a href="#t_integer">integer</a>,
+          <a href="#t_floating">floating point</a>,
+          <a href="#t_x86mmx">x86mmx</a>,
+          <a href="#t_metadata">metadata</a>.</td>
+    </tr>
+    <tr>
+      <td><a href="#t_derived">derived</a></td>
+      <td><a href="#t_array">array</a>,
+          <a href="#t_function">function</a>,
+          <a href="#t_pointer">pointer</a>,
+          <a href="#t_struct">structure</a>,
+          <a href="#t_pstruct">packed structure</a>,
+          <a href="#t_vector">vector</a>,
+          <a href="#t_opaque">opaque</a>.
+      </td>
+    </tr>
+  </tbody>
+</table>
+
+<p>The <a href="#t_firstclass">first class</a> types are perhaps the most
+   important.  Values of these types are the only ones which can be produced by
+   instructions.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="t_primitive">Primitive Types</a> </div>
+
+<div class="doc_text">
+
+<p>The primitive types are the fundamental building blocks of the LLVM
+   system.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_integer">Integer Type</a> </div>
+
+<div class="doc_text">
+
+<h5>Overview:</h5>
+<p>The integer type is a very simple type that simply specifies an arbitrary
+   bit width for the integer type desired. Any bit width from 1 bit to
+   2<sup>23</sup>-1 (about 8 million) can be specified.</p>
+
+<h5>Syntax:</h5>
+<pre>
+  iN
+</pre>
+
+<p>The number of bits the integer will occupy is specified by the <tt>N</tt>
+   value.</p>
+
+<h5>Examples:</h5>
+<table class="layout">
+  <tr class="layout">
+    <td class="left"><tt>i1</tt></td>
+    <td class="left">a single-bit integer.</td>
+  </tr>
+  <tr class="layout">
+    <td class="left"><tt>i32</tt></td>
+    <td class="left">a 32-bit integer.</td>
+  </tr>
+  <tr class="layout">
+    <td class="left"><tt>i1942652</tt></td>
+    <td class="left">a really big integer of over 1 million bits.</td>
+  </tr>
+</table>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_floating">Floating Point Types</a> </div>
+
+<div class="doc_text">
+
+<table>
+  <tbody>
+    <tr><th>Type</th><th>Description</th></tr>
+    <tr><td><tt>float</tt></td><td>32-bit floating point value</td></tr>
+    <tr><td><tt>double</tt></td><td>64-bit floating point value</td></tr>
+    <tr><td><tt>fp128</tt></td><td>128-bit floating point value (112-bit mantissa)</td></tr>
+    <tr><td><tt>x86_fp80</tt></td><td>80-bit floating point value (X87)</td></tr>
+    <tr><td><tt>ppc_fp128</tt></td><td>128-bit floating point value (two 64-bits)</td></tr>
+  </tbody>
+</table>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_x86mmx">X86mmx Type</a> </div>
+
+<div class="doc_text">
+
+<h5>Overview:</h5>
+<p>The x86mmx type represents a value held in an MMX register on an x86 machine.  The operations allowed on it are quite limited:  parameters and return values, load and store, and bitcast.  User-specified MMX instructions are represented as intrinsic or asm calls with arguments and/or results of this type.  There are no arrays, vectors or constants of this type.</p>
+
+<h5>Syntax:</h5>
+<pre>
+  x86mmx
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_void">Void Type</a> </div>
+
+<div class="doc_text">
+
+<h5>Overview:</h5>
+<p>The void type does not represent any value and has no size.</p>
+
+<h5>Syntax:</h5>
+<pre>
+  void
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_label">Label Type</a> </div>
+
+<div class="doc_text">
+
+<h5>Overview:</h5>
+<p>The label type represents code labels.</p>
+
+<h5>Syntax:</h5>
+<pre>
+  label
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_metadata">Metadata Type</a> </div>
+
+<div class="doc_text">
+
+<h5>Overview:</h5>
+<p>The metadata type represents embedded metadata. No derived types may be
+   created from metadata except for <a href="#t_function">function</a>
+   arguments.
+
+<h5>Syntax:</h5>
+<pre>
+  metadata
+</pre>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="t_derived">Derived Types</a> </div>
+
+<div class="doc_text">
+
+<p>The real power in LLVM comes from the derived types in the system.  This is
+   what allows a programmer to represent arrays, functions, pointers, and other
+   useful types.  Each of these types contain one or more element types which
+   may be a primitive type, or another derived type.  For example, it is
+   possible to have a two dimensional array, using an array as the element type
+   of another array.</p>
+
+   
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_aggregate">Aggregate Types</a> </div>
+
+<div class="doc_text">
+
+<p>Aggregate Types are a subset of derived types that can contain multiple
+  member types. <a href="#t_array">Arrays</a>,
+  <a href="#t_struct">structs</a>, and <a href="#t_vector">vectors</a> are
+  aggregate types.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_array">Array Type</a> </div>
+
+<div class="doc_text">
+
+<h5>Overview:</h5>
+<p>The array type is a very simple derived type that arranges elements
+   sequentially in memory.  The array type requires a size (number of elements)
+   and an underlying data type.</p>
+
+<h5>Syntax:</h5>
+<pre>
+  [&lt;# elements&gt; x &lt;elementtype&gt;]
+</pre>
+
+<p>The number of elements is a constant integer value; <tt>elementtype</tt> may
+   be any type with a size.</p>
+
+<h5>Examples:</h5>
+<table class="layout">
+  <tr class="layout">
+    <td class="left"><tt>[40 x i32]</tt></td>
+    <td class="left">Array of 40 32-bit integer values.</td>
+  </tr>
+  <tr class="layout">
+    <td class="left"><tt>[41 x i32]</tt></td>
+    <td class="left">Array of 41 32-bit integer values.</td>
+  </tr>
+  <tr class="layout">
+    <td class="left"><tt>[4 x i8]</tt></td>
+    <td class="left">Array of 4 8-bit integer values.</td>
+  </tr>
+</table>
+<p>Here are some examples of multidimensional arrays:</p>
+<table class="layout">
+  <tr class="layout">
+    <td class="left"><tt>[3 x [4 x i32]]</tt></td>
+    <td class="left">3x4 array of 32-bit integer values.</td>
+  </tr>
+  <tr class="layout">
+    <td class="left"><tt>[12 x [10 x float]]</tt></td>
+    <td class="left">12x10 array of single precision floating point values.</td>
+  </tr>
+  <tr class="layout">
+    <td class="left"><tt>[2 x [3 x [4 x i16]]]</tt></td>
+    <td class="left">2x3x4 array of 16-bit integer  values.</td>
+  </tr>
+</table>
+
+<p>There is no restriction on indexing beyond the end of the array implied by
+   a static type (though there are restrictions on indexing beyond the bounds
+   of an allocated object in some cases). This means that single-dimension
+   'variable sized array' addressing can be implemented in LLVM with a zero
+   length array type. An implementation of 'pascal style arrays' in LLVM could
+   use the type "<tt>{ i32, [0 x float]}</tt>", for example.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_function">Function Type</a> </div>
+
+<div class="doc_text">
+
+<h5>Overview:</h5>
+<p>The function type can be thought of as a function signature.  It consists of
+   a return type and a list of formal parameter types. The return type of a
+   function type is a first class type or a void type.</p>
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;returntype&gt; (&lt;parameter list&gt;)
+</pre>
+
+<p>...where '<tt>&lt;parameter list&gt;</tt>' is a comma-separated list of type
+   specifiers.  Optionally, the parameter list may include a type <tt>...</tt>,
+   which indicates that the function takes a variable number of arguments.
+   Variable argument functions can access their arguments with
+   the <a href="#int_varargs">variable argument handling intrinsic</a>
+   functions.  '<tt>&lt;returntype&gt;</tt>' is any type except
+   <a href="#t_label">label</a>.</p>
+
+<h5>Examples:</h5>
+<table class="layout">
+  <tr class="layout">
+    <td class="left"><tt>i32 (i32)</tt></td>
+    <td class="left">function taking an <tt>i32</tt>, returning an <tt>i32</tt>
+    </td>
+  </tr><tr class="layout">
+    <td class="left"><tt>float&nbsp;(i16,&nbsp;i32&nbsp;*)&nbsp;*
+    </tt></td>
+    <td class="left"><a href="#t_pointer">Pointer</a> to a function that takes
+      an <tt>i16</tt> and a <a href="#t_pointer">pointer</a> to <tt>i32</tt>,
+      returning <tt>float</tt>.
+    </td>
+  </tr><tr class="layout">
+    <td class="left"><tt>i32 (i8*, ...)</tt></td>
+    <td class="left">A vararg function that takes at least one
+      <a href="#t_pointer">pointer</a> to <tt>i8 </tt> (char in C),
+      which returns an integer.  This is the signature for <tt>printf</tt> in
+      LLVM.
+    </td>
+  </tr><tr class="layout">
+    <td class="left"><tt>{i32, i32} (i32)</tt></td>
+    <td class="left">A function taking an <tt>i32</tt>, returning a
+        <a href="#t_struct">structure</a> containing two <tt>i32</tt> values
+    </td>
+  </tr>
+</table>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_struct">Structure Type</a> </div>
+
+<div class="doc_text">
+
+<h5>Overview:</h5>
+<p>The structure type is used to represent a collection of data members together
+   in memory.  The packing of the field types is defined to match the ABI of the
+   underlying processor.  The elements of a structure may be any type that has a
+   size.</p>
+
+<p>Structures in memory are accessed using '<tt><a href="#i_load">load</a></tt>'
+   and '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a field
+   with the '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.
+   Structures in registers are accessed using the
+   '<tt><a href="#i_extractvalue">extractvalue</a></tt>' and
+   '<tt><a href="#i_insertvalue">insertvalue</a></tt>' instructions.</p>
+<h5>Syntax:</h5>
+<pre>
+  { &lt;type list&gt; }
+</pre>
+
+<h5>Examples:</h5>
+<table class="layout">
+  <tr class="layout">
+    <td class="left"><tt>{ i32, i32, i32 }</tt></td>
+    <td class="left">A triple of three <tt>i32</tt> values</td>
+  </tr><tr class="layout">
+    <td class="left"><tt>{&nbsp;float,&nbsp;i32&nbsp;(i32)&nbsp;*&nbsp;}</tt></td>
+    <td class="left">A pair, where the first element is a <tt>float</tt> and the
+      second element is a <a href="#t_pointer">pointer</a> to a
+      <a href="#t_function">function</a> that takes an <tt>i32</tt>, returning
+      an <tt>i32</tt>.</td>
+  </tr>
+</table>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_pstruct">Packed Structure Type</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Overview:</h5>
+<p>The packed structure type is used to represent a collection of data members
+   together in memory.  There is no padding between fields.  Further, the
+   alignment of a packed structure is 1 byte.  The elements of a packed
+   structure may be any type that has a size.</p>
+
+<p>Structures are accessed using '<tt><a href="#i_load">load</a></tt> and
+   '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a field with
+   the '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.</p>
+
+<h5>Syntax:</h5>
+<pre>
+  &lt; { &lt;type list&gt; } &gt;
+</pre>
+
+<h5>Examples:</h5>
+<table class="layout">
+  <tr class="layout">
+    <td class="left"><tt>&lt; { i32, i32, i32 } &gt;</tt></td>
+    <td class="left">A triple of three <tt>i32</tt> values</td>
+  </tr><tr class="layout">
+  <td class="left">
+<tt>&lt;&nbsp;{&nbsp;float,&nbsp;i32&nbsp;(i32)*&nbsp;}&nbsp;&gt;</tt></td>
+    <td class="left">A pair, where the first element is a <tt>float</tt> and the
+      second element is a <a href="#t_pointer">pointer</a> to a
+      <a href="#t_function">function</a> that takes an <tt>i32</tt>, returning
+      an <tt>i32</tt>.</td>
+  </tr>
+</table>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_pointer">Pointer Type</a> </div>
+
+<div class="doc_text">
+
+<h5>Overview:</h5>
+<p>The pointer type is used to specify memory locations.
+   Pointers are commonly used to reference objects in memory.</p>
+   
+<p>Pointer types may have an optional address space attribute defining the
+   numbered address space where the pointed-to object resides. The default
+   address space is number zero. The semantics of non-zero address
+   spaces are target-specific.</p>
+
+<p>Note that LLVM does not permit pointers to void (<tt>void*</tt>) nor does it
+   permit pointers to labels (<tt>label*</tt>).  Use <tt>i8*</tt> instead.</p>
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;type&gt; *
+</pre>
+
+<h5>Examples:</h5>
+<table class="layout">
+  <tr class="layout">
+    <td class="left"><tt>[4 x i32]*</tt></td>
+    <td class="left">A <a href="#t_pointer">pointer</a> to <a
+                    href="#t_array">array</a> of four <tt>i32</tt> values.</td>
+  </tr>
+  <tr class="layout">
+    <td class="left"><tt>i32 (i32*) *</tt></td>
+    <td class="left"> A <a href="#t_pointer">pointer</a> to a <a
+      href="#t_function">function</a> that takes an <tt>i32*</tt>, returning an
+      <tt>i32</tt>.</td>
+  </tr>
+  <tr class="layout">
+    <td class="left"><tt>i32 addrspace(5)*</tt></td>
+    <td class="left">A <a href="#t_pointer">pointer</a> to an <tt>i32</tt> value
+     that resides in address space #5.</td>
+  </tr>
+</table>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_vector">Vector Type</a> </div>
+
+<div class="doc_text">
+
+<h5>Overview:</h5>
+<p>A vector type is a simple derived type that represents a vector of elements.
+   Vector types are used when multiple primitive data are operated in parallel
+   using a single instruction (SIMD).  A vector type requires a size (number of
+   elements) and an underlying primitive data type.  Vector types are considered
+   <a href="#t_firstclass">first class</a>.</p>
+
+<h5>Syntax:</h5>
+<pre>
+  &lt; &lt;# elements&gt; x &lt;elementtype&gt; &gt;
+</pre>
+
+<p>The number of elements is a constant integer value larger than 0; elementtype
+   may be any integer or floating point type.  Vectors of size zero are not
+   allowed, and pointers are not allowed as the element type.</p>
+
+<h5>Examples:</h5>
+<table class="layout">
+  <tr class="layout">
+    <td class="left"><tt>&lt;4 x i32&gt;</tt></td>
+    <td class="left">Vector of 4 32-bit integer values.</td>
+  </tr>
+  <tr class="layout">
+    <td class="left"><tt>&lt;8 x float&gt;</tt></td>
+    <td class="left">Vector of 8 32-bit floating-point values.</td>
+  </tr>
+  <tr class="layout">
+    <td class="left"><tt>&lt;2 x i64&gt;</tt></td>
+    <td class="left">Vector of 2 64-bit integer values.</td>
+  </tr>
+</table>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="t_opaque">Opaque Type</a> </div>
+<div class="doc_text">
+
+<h5>Overview:</h5>
+<p>Opaque types are used to represent unknown types in the system.  This
+   corresponds (for example) to the C notion of a forward declared structure
+   type.  In LLVM, opaque types can eventually be resolved to any type (not just
+   a structure type).</p>
+
+<h5>Syntax:</h5>
+<pre>
+  opaque
+</pre>
+
+<h5>Examples:</h5>
+<table class="layout">
+  <tr class="layout">
+    <td class="left"><tt>opaque</tt></td>
+    <td class="left">An opaque type.</td>
+  </tr>
+</table>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="t_uprefs">Type Up-references</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Overview:</h5>
+<p>An "up reference" allows you to refer to a lexically enclosing type without
+   requiring it to have a name. For instance, a structure declaration may
+   contain a pointer to any of the types it is lexically a member of.  Example
+   of up references (with their equivalent as named type declarations)
+   include:</p>
+
+<pre>
+   { \2 * }                %x = type { %x* }
+   { \2 }*                 %y = type { %y }*
+   \1*                     %z = type %z*
+</pre>
+
+<p>An up reference is needed by the asmprinter for printing out cyclic types
+   when there is no declared name for a type in the cycle.  Because the
+   asmprinter does not want to print out an infinite type string, it needs a
+   syntax to handle recursive types that have no names (all names are optional
+   in llvm IR).</p>
+
+<h5>Syntax:</h5>
+<pre>
+   \&lt;level&gt;
+</pre>
+
+<p>The level is the count of the lexical type that is being referred to.</p>
+
+<h5>Examples:</h5>
+<table class="layout">
+  <tr class="layout">
+    <td class="left"><tt>\1*</tt></td>
+    <td class="left">Self-referential pointer.</td>
+  </tr>
+  <tr class="layout">
+    <td class="left"><tt>{ { \3*, i8 }, i32 }</tt></td>
+    <td class="left">Recursive structure where the upref refers to the out-most
+                     structure.</td>
+  </tr>
+</table>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="constants">Constants</a> </div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>LLVM has several different basic types of constants.  This section describes
+   them all and their syntax.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="simpleconstants">Simple Constants</a></div>
+
+<div class="doc_text">
+
+<dl>
+  <dt><b>Boolean constants</b></dt>
+  <dd>The two strings '<tt>true</tt>' and '<tt>false</tt>' are both valid
+      constants of the <tt><a href="#t_integer">i1</a></tt> type.</dd>
+
+  <dt><b>Integer constants</b></dt>
+  <dd>Standard integers (such as '4') are constants of
+      the <a href="#t_integer">integer</a> type.  Negative numbers may be used
+      with integer types.</dd>
+
+  <dt><b>Floating point constants</b></dt>
+  <dd>Floating point constants use standard decimal notation (e.g. 123.421),
+      exponential notation (e.g. 1.23421e+2), or a more precise hexadecimal
+      notation (see below).  The assembler requires the exact decimal value of a
+      floating-point constant.  For example, the assembler accepts 1.25 but
+      rejects 1.3 because 1.3 is a repeating decimal in binary.  Floating point
+      constants must have a <a href="#t_floating">floating point</a> type. </dd>
+
+  <dt><b>Null pointer constants</b></dt>
+  <dd>The identifier '<tt>null</tt>' is recognized as a null pointer constant
+      and must be of <a href="#t_pointer">pointer type</a>.</dd>
+</dl>
+
+<p>The one non-intuitive notation for constants is the hexadecimal form of
+   floating point constants.  For example, the form '<tt>double
+   0x432ff973cafa8000</tt>' is equivalent to (but harder to read than)
+   '<tt>double 4.5e+15</tt>'.  The only time hexadecimal floating point
+   constants are required (and the only time that they are generated by the
+   disassembler) is when a floating point constant must be emitted but it cannot
+   be represented as a decimal floating point number in a reasonable number of
+   digits.  For example, NaN's, infinities, and other special values are
+   represented in their IEEE hexadecimal format so that assembly and disassembly
+   do not cause any bits to change in the constants.</p>
+
+<p>When using the hexadecimal form, constants of types float and double are
+   represented using the 16-digit form shown above (which matches the IEEE754
+   representation for double); float values must, however, be exactly
+   representable as IEE754 single precision.  Hexadecimal format is always used
+   for long double, and there are three forms of long double.  The 80-bit format
+   used by x86 is represented as <tt>0xK</tt> followed by 20 hexadecimal digits.
+   The 128-bit format used by PowerPC (two adjacent doubles) is represented
+   by <tt>0xM</tt> followed by 32 hexadecimal digits.  The IEEE 128-bit format
+   is represented by <tt>0xL</tt> followed by 32 hexadecimal digits; no
+   currently supported target uses this format.  Long doubles will only work if
+   they match the long double format on your target.  All hexadecimal formats
+   are big-endian (sign bit at the left).</p>
+
+<p>There are no constants of type x86mmx.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+<a name="aggregateconstants"></a> <!-- old anchor -->
+<a name="complexconstants">Complex Constants</a>
+</div>
+
+<div class="doc_text">
+
+<p>Complex constants are a (potentially recursive) combination of simple
+   constants and smaller complex constants.</p>
+
+<dl>
+  <dt><b>Structure constants</b></dt>
+  <dd>Structure constants are represented with notation similar to structure
+      type definitions (a comma separated list of elements, surrounded by braces
+      (<tt>{}</tt>)).  For example: "<tt>{ i32 4, float 17.0, i32* @G }</tt>",
+      where "<tt>@G</tt>" is declared as "<tt>@G = external global i32</tt>".
+      Structure constants must have <a href="#t_struct">structure type</a>, and
+      the number and types of elements must match those specified by the
+      type.</dd>
+
+  <dt><b>Array constants</b></dt>
+  <dd>Array constants are represented with notation similar to array type
+     definitions (a comma separated list of elements, surrounded by square
+     brackets (<tt>[]</tt>)).  For example: "<tt>[ i32 42, i32 11, i32 74
+     ]</tt>".  Array constants must have <a href="#t_array">array type</a>, and
+     the number and types of elements must match those specified by the
+     type.</dd>
+
+  <dt><b>Vector constants</b></dt>
+  <dd>Vector constants are represented with notation similar to vector type
+      definitions (a comma separated list of elements, surrounded by
+      less-than/greater-than's (<tt>&lt;&gt;</tt>)).  For example: "<tt>&lt; i32
+      42, i32 11, i32 74, i32 100 &gt;</tt>".  Vector constants must
+      have <a href="#t_vector">vector type</a>, and the number and types of
+      elements must match those specified by the type.</dd>
+
+  <dt><b>Zero initialization</b></dt>
+  <dd>The string '<tt>zeroinitializer</tt>' can be used to zero initialize a
+      value to zero of <em>any</em> type, including scalar and
+      <a href="#t_aggregate">aggregate</a> types.
+      This is often used to avoid having to print large zero initializers
+      (e.g. for large arrays) and is always exactly equivalent to using explicit
+      zero initializers.</dd>
+
+  <dt><b>Metadata node</b></dt>
+  <dd>A metadata node is a structure-like constant with
+      <a href="#t_metadata">metadata type</a>.  For example: "<tt>metadata !{
+      i32 0, metadata !"test" }</tt>".  Unlike other constants that are meant to
+      be interpreted as part of the instruction stream, metadata is a place to
+      attach additional information such as debug info.</dd>
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="globalconstants">Global Variable and Function Addresses</a>
+</div>
+
+<div class="doc_text">
+
+<p>The addresses of <a href="#globalvars">global variables</a>
+   and <a href="#functionstructure">functions</a> are always implicitly valid
+   (link-time) constants.  These constants are explicitly referenced when
+   the <a href="#identifiers">identifier for the global</a> is used and always
+   have <a href="#t_pointer">pointer</a> type. For example, the following is a
+   legal LLVM file:</p>
+
+<pre class="doc_code">
+@X = global i32 17
+@Y = global i32 42
+@Z = global [2 x i32*] [ i32* @X, i32* @Y ]
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="undefvalues">Undefined Values</a></div>
+<div class="doc_text">
+
+<p>The string '<tt>undef</tt>' can be used anywhere a constant is expected, and
+   indicates that the user of the value may receive an unspecified bit-pattern.
+   Undefined values may be of any type (other than '<tt>label</tt>'
+   or '<tt>void</tt>') and be used anywhere a constant is permitted.</p>
+
+<p>Undefined values are useful because they indicate to the compiler that the
+   program is well defined no matter what value is used.  This gives the
+   compiler more freedom to optimize.  Here are some examples of (potentially
+   surprising) transformations that are valid (in pseudo IR):</p>
+
+
+<pre class="doc_code">
+  %A = add %X, undef
+  %B = sub %X, undef
+  %C = xor %X, undef
+Safe:
+  %A = undef
+  %B = undef
+  %C = undef
+</pre>
+
+<p>This is safe because all of the output bits are affected by the undef bits.
+   Any output bit can have a zero or one depending on the input bits.</p>
+
+<pre class="doc_code">
+  %A = or %X, undef
+  %B = and %X, undef
+Safe:
+  %A = -1
+  %B = 0
+Unsafe:
+  %A = undef
+  %B = undef
+</pre>
+
+<p>These logical operations have bits that are not always affected by the input.
+   For example, if <tt>%X</tt> has a zero bit, then the output of the
+   '<tt>and</tt>' operation will always be a zero for that bit, no matter what
+   the corresponding bit from the '<tt>undef</tt>' is. As such, it is unsafe to
+   optimize or assume that the result of the '<tt>and</tt>' is '<tt>undef</tt>'.
+   However, it is safe to assume that all bits of the '<tt>undef</tt>' could be
+   0, and optimize the '<tt>and</tt>' to 0. Likewise, it is safe to assume that
+   all the bits of the '<tt>undef</tt>' operand to the '<tt>or</tt>' could be
+   set, allowing the '<tt>or</tt>' to be folded to -1.</p>
+
+<pre class="doc_code">
+  %A = select undef, %X, %Y
+  %B = select undef, 42, %Y
+  %C = select %X, %Y, undef
+Safe:
+  %A = %X     (or %Y)
+  %B = 42     (or %Y)
+  %C = %Y
+Unsafe:
+  %A = undef
+  %B = undef
+  %C = undef
+</pre>
+
+<p>This set of examples shows that undefined '<tt>select</tt>' (and conditional
+   branch) conditions can go <em>either way</em>, but they have to come from one
+   of the two operands.  In the <tt>%A</tt> example, if <tt>%X</tt> and
+   <tt>%Y</tt> were both known to have a clear low bit, then <tt>%A</tt> would
+   have to have a cleared low bit. However, in the <tt>%C</tt> example, the
+   optimizer is allowed to assume that the '<tt>undef</tt>' operand could be the
+   same as <tt>%Y</tt>, allowing the whole '<tt>select</tt>' to be
+   eliminated.</p>
+
+<pre class="doc_code">
+  %A = xor undef, undef
+
+  %B = undef
+  %C = xor %B, %B
+
+  %D = undef
+  %E = icmp lt %D, 4
+  %F = icmp gte %D, 4
+
+Safe:
+  %A = undef
+  %B = undef
+  %C = undef
+  %D = undef
+  %E = undef
+  %F = undef
+</pre>
+
+<p>This example points out that two '<tt>undef</tt>' operands are not
+   necessarily the same. This can be surprising to people (and also matches C
+   semantics) where they assume that "<tt>X^X</tt>" is always zero, even
+   if <tt>X</tt> is undefined. This isn't true for a number of reasons, but the
+   short answer is that an '<tt>undef</tt>' "variable" can arbitrarily change
+   its value over its "live range".  This is true because the variable doesn't
+   actually <em>have a live range</em>. Instead, the value is logically read
+   from arbitrary registers that happen to be around when needed, so the value
+   is not necessarily consistent over time. In fact, <tt>%A</tt> and <tt>%C</tt>
+   need to have the same semantics or the core LLVM "replace all uses with"
+   concept would not hold.</p>
+
+<pre class="doc_code">
+  %A = fdiv undef, %X
+  %B = fdiv %X, undef
+Safe:
+  %A = undef
+b: unreachable
+</pre>
+
+<p>These examples show the crucial difference between an <em>undefined
+  value</em> and <em>undefined behavior</em>. An undefined value (like
+  '<tt>undef</tt>') is allowed to have an arbitrary bit-pattern. This means that
+  the <tt>%A</tt> operation can be constant folded to '<tt>undef</tt>', because
+  the '<tt>undef</tt>' could be an SNaN, and <tt>fdiv</tt> is not (currently)
+  defined on SNaN's. However, in the second example, we can make a more
+  aggressive assumption: because the <tt>undef</tt> is allowed to be an
+  arbitrary value, we are allowed to assume that it could be zero. Since a
+  divide by zero has <em>undefined behavior</em>, we are allowed to assume that
+  the operation does not execute at all. This allows us to delete the divide and
+  all code after it. Because the undefined operation "can't happen", the
+  optimizer can assume that it occurs in dead code.</p>
+
+<pre class="doc_code">
+a:  store undef -> %X
+b:  store %X -> undef
+Safe:
+a: &lt;deleted&gt;
+b: unreachable
+</pre>
+
+<p>These examples reiterate the <tt>fdiv</tt> example: a store <em>of</em> an
+   undefined value can be assumed to not have any effect; we can assume that the
+   value is overwritten with bits that happen to match what was already there.
+   However, a store <em>to</em> an undefined location could clobber arbitrary
+   memory, therefore, it has undefined behavior.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="trapvalues">Trap Values</a></div>
+<div class="doc_text">
+
+<p>Trap values are similar to <a href="#undefvalues">undef values</a>, however
+   instead of representing an unspecified bit pattern, they represent the
+   fact that an instruction or constant expression which cannot evoke side
+   effects has nevertheless detected a condition which results in undefined
+   behavior.</p>
+
+<p>There is currently no way of representing a trap value in the IR; they
+   only exist when produced by operations such as
+   <a href="#i_add"><tt>add</tt></a> with the <tt>nsw</tt> flag.</p>
+
+<p>Trap value behavior is defined in terms of value <i>dependence</i>:</p>
+
+<ul>
+<li>Values other than <a href="#i_phi"><tt>phi</tt></a> nodes depend on
+    their operands.</li>
+
+<li><a href="#i_phi"><tt>Phi</tt></a> nodes depend on the operand corresponding
+    to their dynamic predecessor basic block.</li>
+
+<li>Function arguments depend on the corresponding actual argument values in
+    the dynamic callers of their functions.</li>
+
+<li><a href="#i_call"><tt>Call</tt></a> instructions depend on the
+    <a href="#i_ret"><tt>ret</tt></a> instructions that dynamically transfer
+    control back to them.</li>
+
+<li><a href="#i_invoke"><tt>Invoke</tt></a> instructions depend on the
+    <a href="#i_ret"><tt>ret</tt></a>, <a href="#i_unwind"><tt>unwind</tt></a>,
+    or exception-throwing call instructions that dynamically transfer control
+    back to them.</li>
+
+<li>Non-volatile loads and stores depend on the most recent stores to all of the
+    referenced memory addresses, following the order in the IR
+    (including loads and stores implied by intrinsics such as
+    <a href="#int_memcpy"><tt>@llvm.memcpy</tt></a>.)</li>
+
+<!-- TODO: In the case of multiple threads, this only applies if the store
+     "happens-before" the load or store. -->
+
+<!-- TODO: floating-point exception state -->
+
+<li>An instruction with externally visible side effects depends on the most
+    recent preceding instruction with externally visible side effects, following
+    the order in the IR. (This includes
+    <a href="#volatile">volatile operations</a>.)</li>
+
+<li>An instruction <i>control-depends</i> on a
+    <a href="#terminators">terminator instruction</a>
+    if the terminator instruction has multiple successors and the instruction
+    is always executed when control transfers to one of the successors, and
+    may not be executed when control is transfered to another.</li>
+
+<li>Dependence is transitive.</li>
+
+</ul>
+
+<p>Whenever a trap value is generated, all values which depend on it evaluate
+   to trap. If they have side effects, the evoke their side effects as if each
+   operand with a trap value were undef. If they have externally-visible side
+   effects, the behavior is undefined.</p>
+
+<p>Here are some examples:</p>
+
+<pre class="doc_code">
+entry:
+  %trap = sub nuw i32 0, 1           ; Results in a trap value.
+  %still_trap = and i32 %trap, 0     ; Whereas (and i32 undef, 0) would return 0.
+  %trap_yet_again = getelementptr i32* @h, i32 %still_trap
+  store i32 0, i32* %trap_yet_again  ; undefined behavior
+
+  store i32 %trap, i32* @g           ; Trap value conceptually stored to memory.
+  %trap2 = load i32* @g              ; Returns a trap value, not just undef.
+
+  volatile store i32 %trap, i32* @g  ; External observation; undefined behavior.
+
+  %narrowaddr = bitcast i32* @g to i16*
+  %wideaddr = bitcast i32* @g to i64*
+  %trap3 = load 16* %narrowaddr      ; Returns a trap value.
+  %trap4 = load i64* %widaddr        ; Returns a trap value.
+
+  %cmp = icmp i32 slt %trap, 0       ; Returns a trap value.
+  %br i1 %cmp, %true, %end           ; Branch to either destination.
+
+true:
+  volatile store i32 0, i32* @g      ; This is control-dependent on %cmp, so
+                                     ; it has undefined behavior.
+  br label %end
+
+end:
+  %p = phi i32 [ 0, %entry ], [ 1, %true ]
+                                     ; Both edges into this PHI are
+                                     ; control-dependent on %cmp, so this
+                                     ; always results in a trap value.
+
+  volatile store i32 0, i32* @g      ; %end is control-equivalent to %entry
+                                     ; so this is defined (ignoring earlier
+                                     ; undefined behavior in this example).
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="blockaddress">Addresses of Basic
+    Blocks</a></div>
+<div class="doc_text">
+
+<p><b><tt>blockaddress(@function, %block)</tt></b></p>
+
+<p>The '<tt>blockaddress</tt>' constant computes the address of the specified
+   basic block in the specified function, and always has an i8* type.  Taking
+   the address of the entry block is illegal.</p>
+
+<p>This value only has defined behavior when used as an operand to the
+   '<a href="#i_indirectbr"><tt>indirectbr</tt></a>' instruction, or for
+   comparisons against null. Pointer equality tests between labels addresses
+   results in undefined behavior &mdash; though, again, comparison against null
+   is ok, and no label is equal to the null pointer. This may be passed around
+   as an opaque pointer sized value as long as the bits are not inspected. This
+   allows <tt>ptrtoint</tt> and arithmetic to be performed on these values so
+   long as the original value is reconstituted before the <tt>indirectbr</tt>
+   instruction.</p>
+
+<p>Finally, some targets may provide defined semantics when using the value as
+   the operand to an inline assembly, but that is target specific.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="constantexprs">Constant Expressions</a>
+</div>
+
+<div class="doc_text">
+
+<p>Constant expressions are used to allow expressions involving other constants
+   to be used as constants.  Constant expressions may be of
+   any <a href="#t_firstclass">first class</a> type and may involve any LLVM
+   operation that does not have side effects (e.g. load and call are not
+   supported). The following is the syntax for constant expressions:</p>
+
+<dl>
+  <dt><b><tt>trunc (CST to TYPE)</tt></b></dt>
+  <dd>Truncate a constant to another type. The bit size of CST must be larger
+      than the bit size of TYPE. Both types must be integers.</dd>
+
+  <dt><b><tt>zext (CST to TYPE)</tt></b></dt>
+  <dd>Zero extend a constant to another type. The bit size of CST must be
+      smaller than the bit size of TYPE.  Both types must be integers.</dd>
+
+  <dt><b><tt>sext (CST to TYPE)</tt></b></dt>
+  <dd>Sign extend a constant to another type. The bit size of CST must be
+      smaller than the bit size of TYPE.  Both types must be integers.</dd>
+
+  <dt><b><tt>fptrunc (CST to TYPE)</tt></b></dt>
+  <dd>Truncate a floating point constant to another floating point type. The
+      size of CST must be larger than the size of TYPE. Both types must be
+      floating point.</dd>
+
+  <dt><b><tt>fpext (CST to TYPE)</tt></b></dt>
+  <dd>Floating point extend a constant to another type. The size of CST must be
+      smaller or equal to the size of TYPE. Both types must be floating
+      point.</dd>
+
+  <dt><b><tt>fptoui (CST to TYPE)</tt></b></dt>
+  <dd>Convert a floating point constant to the corresponding unsigned integer
+      constant. TYPE must be a scalar or vector integer type. CST must be of
+      scalar or vector floating point type. Both CST and TYPE must be scalars,
+      or vectors of the same number of elements. If the value won't fit in the
+      integer type, the results are undefined.</dd>
+
+  <dt><b><tt>fptosi (CST to TYPE)</tt></b></dt>
+  <dd>Convert a floating point constant to the corresponding signed integer
+      constant.  TYPE must be a scalar or vector integer type. CST must be of
+      scalar or vector floating point type. Both CST and TYPE must be scalars,
+      or vectors of the same number of elements. If the value won't fit in the
+      integer type, the results are undefined.</dd>
+
+  <dt><b><tt>uitofp (CST to TYPE)</tt></b></dt>
+  <dd>Convert an unsigned integer constant to the corresponding floating point
+      constant. TYPE must be a scalar or vector floating point type. CST must be
+      of scalar or vector integer type. Both CST and TYPE must be scalars, or
+      vectors of the same number of elements. If the value won't fit in the
+      floating point type, the results are undefined.</dd>
+
+  <dt><b><tt>sitofp (CST to TYPE)</tt></b></dt>
+  <dd>Convert a signed integer constant to the corresponding floating point
+      constant. TYPE must be a scalar or vector floating point type. CST must be
+      of scalar or vector integer type. Both CST and TYPE must be scalars, or
+      vectors of the same number of elements. If the value won't fit in the
+      floating point type, the results are undefined.</dd>
+
+  <dt><b><tt>ptrtoint (CST to TYPE)</tt></b></dt>
+  <dd>Convert a pointer typed constant to the corresponding integer constant
+      <tt>TYPE</tt> must be an integer type. <tt>CST</tt> must be of pointer
+      type. The <tt>CST</tt> value is zero extended, truncated, or unchanged to
+      make it fit in <tt>TYPE</tt>.</dd>
+
+  <dt><b><tt>inttoptr (CST to TYPE)</tt></b></dt>
+  <dd>Convert a integer constant to a pointer constant.  TYPE must be a pointer
+      type.  CST must be of integer type. The CST value is zero extended,
+      truncated, or unchanged to make it fit in a pointer size. This one is
+      <i>really</i> dangerous!</dd>
+
+  <dt><b><tt>bitcast (CST to TYPE)</tt></b></dt>
+  <dd>Convert a constant, CST, to another TYPE. The constraints of the operands
+      are the same as those for the <a href="#i_bitcast">bitcast
+      instruction</a>.</dd>
+
+  <dt><b><tt>getelementptr (CSTPTR, IDX0, IDX1, ...)</tt></b></dt>
+  <dt><b><tt>getelementptr inbounds (CSTPTR, IDX0, IDX1, ...)</tt></b></dt>
+  <dd>Perform the <a href="#i_getelementptr">getelementptr operation</a> on
+      constants.  As with the <a href="#i_getelementptr">getelementptr</a>
+      instruction, the index list may have zero or more indexes, which are
+      required to make sense for the type of "CSTPTR".</dd>
+
+  <dt><b><tt>select (COND, VAL1, VAL2)</tt></b></dt>
+  <dd>Perform the <a href="#i_select">select operation</a> on constants.</dd>
+
+  <dt><b><tt>icmp COND (VAL1, VAL2)</tt></b></dt>
+  <dd>Performs the <a href="#i_icmp">icmp operation</a> on constants.</dd>
+
+  <dt><b><tt>fcmp COND (VAL1, VAL2)</tt></b></dt>
+  <dd>Performs the <a href="#i_fcmp">fcmp operation</a> on constants.</dd>
+
+  <dt><b><tt>extractelement (VAL, IDX)</tt></b></dt>
+  <dd>Perform the <a href="#i_extractelement">extractelement operation</a> on
+      constants.</dd>
+
+  <dt><b><tt>insertelement (VAL, ELT, IDX)</tt></b></dt>
+  <dd>Perform the <a href="#i_insertelement">insertelement operation</a> on
+    constants.</dd>
+
+  <dt><b><tt>shufflevector (VEC1, VEC2, IDXMASK)</tt></b></dt>
+  <dd>Perform the <a href="#i_shufflevector">shufflevector operation</a> on
+      constants.</dd>
+
+  <dt><b><tt>extractvalue (VAL, IDX0, IDX1, ...)</tt></b></dt>
+  <dd>Perform the <a href="#i_extractvalue">extractvalue operation</a> on
+    constants. The index list is interpreted in a similar manner as indices in
+    a '<a href="#i_getelementptr">getelementptr</a>' operation. At least one
+    index value must be specified.</dd>
+
+  <dt><b><tt>insertvalue (VAL, ELT, IDX0, IDX1, ...)</tt></b></dt>
+  <dd>Perform the <a href="#i_insertvalue">insertvalue operation</a> on
+    constants. The index list is interpreted in a similar manner as indices in
+    a '<a href="#i_getelementptr">getelementptr</a>' operation. At least one
+    index value must be specified.</dd>
+
+  <dt><b><tt>OPCODE (LHS, RHS)</tt></b></dt>
+  <dd>Perform the specified operation of the LHS and RHS constants. OPCODE may
+      be any of the <a href="#binaryops">binary</a>
+      or <a href="#bitwiseops">bitwise binary</a> operations.  The constraints
+      on operands are the same as those for the corresponding instruction
+      (e.g. no bitwise operations on floating point values are allowed).</dd>
+</dl>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="othervalues">Other Values</a> </div>
+<!-- *********************************************************************** -->
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+<a name="inlineasm">Inline Assembler Expressions</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM supports inline assembler expressions (as opposed
+   to <a href="#moduleasm"> Module-Level Inline Assembly</a>) through the use of
+   a special value.  This value represents the inline assembler as a string
+   (containing the instructions to emit), a list of operand constraints (stored
+   as a string), a flag that indicates whether or not the inline asm
+   expression has side effects, and a flag indicating whether the function
+   containing the asm needs to align its stack conservatively.  An example
+   inline assembler expression is:</p>
+
+<pre class="doc_code">
+i32 (i32) asm "bswap $0", "=r,r"
+</pre>
+
+<p>Inline assembler expressions may <b>only</b> be used as the callee operand of
+   a <a href="#i_call"><tt>call</tt> instruction</a>.  Thus, typically we
+   have:</p>
+
+<pre class="doc_code">
+%X = call i32 asm "<a href="#int_bswap">bswap</a> $0", "=r,r"(i32 %Y)
+</pre>
+
+<p>Inline asms with side effects not visible in the constraint list must be
+   marked as having side effects.  This is done through the use of the
+   '<tt>sideeffect</tt>' keyword, like so:</p>
+
+<pre class="doc_code">
+call void asm sideeffect "eieio", ""()
+</pre>
+
+<p>In some cases inline asms will contain code that will not work unless the
+   stack is aligned in some way, such as calls or SSE instructions on x86,
+   yet will not contain code that does that alignment within the asm.
+   The compiler should make conservative assumptions about what the asm might
+   contain and should generate its usual stack alignment code in the prologue
+   if the '<tt>alignstack</tt>' keyword is present:</p>
+
+<pre class="doc_code">
+call void asm alignstack "eieio", ""()
+</pre>
+
+<p>If both keywords appear the '<tt>sideeffect</tt>' keyword must come
+   first.</p>
+
+<p>TODO: The format of the asm and constraints string still need to be
+   documented here.  Constraints on what can be done (e.g. duplication, moving,
+   etc need to be documented).  This is probably best done by reference to
+   another document that covers inline asm from a holistic perspective.</p>
+</div>
+
+<div class="doc_subsubsection">
+<a name="inlineasm_md">Inline Asm Metadata</a>
+</div>
+
+<div class="doc_text">
+
+<p>The call instructions that wrap inline asm nodes may have a "!srcloc" MDNode
+   attached to it that contains a list of constant integers.  If present, the
+  code generator will use the integer as the location cookie value when report
+   errors through the LLVMContext error reporting mechanisms.  This allows a
+   front-end to correlate backend errors that occur with inline asm back to the
+   source code that produced it.  For example:</p>
+
+<pre class="doc_code">
+call void asm sideeffect "something bad", ""()<b>, !srcloc !42</b>
+...
+!42 = !{ i32 1234567 }
+</pre>
+
+<p>It is up to the front-end to make sense of the magic numbers it places in the
+   IR.  If the MDNode contains multiple constants, the code generator will use
+   the one that corresponds to the line of the asm that the error occurs on.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="metadata">Metadata Nodes and Metadata
+  Strings</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM IR allows metadata to be attached to instructions in the program that
+   can convey extra information about the code to the optimizers and code
+   generator.  One example application of metadata is source-level debug
+   information.  There are two metadata primitives: strings and nodes. All
+   metadata has the <tt>metadata</tt> type and is identified in syntax by a
+   preceding exclamation point ('<tt>!</tt>').</p>
+
+<p>A metadata string is a string surrounded by double quotes.  It can contain
+   any character by escaping non-printable characters with "\xx" where "xx" is
+   the two digit hex code.  For example: "<tt>!"test\00"</tt>".</p>
+
+<p>Metadata nodes are represented with notation similar to structure constants
+   (a comma separated list of elements, surrounded by braces and preceded by an
+   exclamation point).  For example: "<tt>!{ metadata !"test\00", i32
+   10}</tt>".  Metadata nodes can have any values as their operand.</p>
+
+<p>A <a href="#namedmetadatastructure">named metadata</a> is a collection of 
+   metadata nodes, which can be looked up in the module symbol table. For
+   example: "<tt>!foo =  metadata !{!4, !3}</tt>".
+
+<p>Metadata can be used as function arguments. Here <tt>llvm.dbg.value</tt> 
+   function is using two metadata arguments.</p>
+
+<div class="doc_code">
+<pre>
+call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
+</pre>
+</div>
+
+<p>Metadata can be attached with an instruction. Here metadata <tt>!21</tt> is
+   attached with <tt>add</tt> instruction using <tt>!dbg</tt> identifier.</p>
+
+<div class="doc_code">
+<pre>
+%indvar.next = add i64 %indvar, 1, !dbg !21
+</pre>
+</div>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="intrinsic_globals">Intrinsic Global Variables</a>
+</div>
+<!-- *********************************************************************** -->
+
+<p>LLVM has a number of "magic" global variables that contain data that affect
+code generation or other IR semantics.  These are documented here.  All globals
+of this sort should have a section specified as "<tt>llvm.metadata</tt>".  This
+section and all globals that start with "<tt>llvm.</tt>" are reserved for use
+by LLVM.</p>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+<a name="intg_used">The '<tt>llvm.used</tt>' Global Variable</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>@llvm.used</tt> global is an array with i8* element type which has <a
+href="#linkage_appending">appending linkage</a>.  This array contains a list of
+pointers to global variables and functions which may optionally have a pointer
+cast formed of bitcast or getelementptr.  For example, a legal use of it is:</p>
+
+<pre>
+  @X = global i8 4
+  @Y = global i32 123
+
+  @llvm.used = appending global [2 x i8*] [
+     i8* @X,
+     i8* bitcast (i32* @Y to i8*)
+  ], section "llvm.metadata"
+</pre>
+
+<p>If a global variable appears in the <tt>@llvm.used</tt> list, then the
+compiler, assembler, and linker are required to treat the symbol as if there is
+a reference to the global that it cannot see.  For example, if a variable has
+internal linkage and no references other than that from the <tt>@llvm.used</tt>
+list, it cannot be deleted.  This is commonly used to represent references from
+inline asms and other things the compiler cannot "see", and corresponds to
+"attribute((used))" in GNU C.</p>
+
+<p>On some targets, the code generator must emit a directive to the assembler or
+object file to prevent the assembler and linker from molesting the symbol.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+<a name="intg_compiler_used">The '<tt>llvm.compiler.used</tt>' Global Variable</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>@llvm.compiler.used</tt> directive is the same as the
+<tt>@llvm.used</tt> directive, except that it only prevents the compiler from
+touching the symbol.  On targets that support it, this allows an intelligent
+linker to optimize references to the symbol without being impeded as it would be
+by <tt>@llvm.used</tt>.</p>
+
+<p>This is a rare construct that should only be used in rare circumstances, and
+should not be exposed to source languages.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+<a name="intg_global_ctors">The '<tt>llvm.global_ctors</tt>' Global Variable</a>
+</div>
+
+<div class="doc_text">
+<pre>
+%0 = type { i32, void ()* }
+@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @ctor }]
+</pre>
+<p>The <tt>@llvm.global_ctors</tt> array contains a list of constructor functions and associated priorities.  The functions referenced by this array will be called in ascending order of priority (i.e. lowest first) when the module is loaded.  The order of functions with the same priority is not defined.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+<a name="intg_global_dtors">The '<tt>llvm.global_dtors</tt>' Global Variable</a>
+</div>
+
+<div class="doc_text">
+<pre>
+%0 = type { i32, void ()* }
+@llvm.global_dtors = appending global [1 x %0] [%0 { i32 65535, void ()* @dtor }]
+</pre>
+
+<p>The <tt>@llvm.global_dtors</tt> array contains a list of destructor functions and associated priorities.  The functions referenced by this array will be called in descending order of priority (i.e. highest first) when the module is loaded.  The order of functions with the same priority is not defined.
+</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="instref">Instruction Reference</a> </div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The LLVM instruction set consists of several different classifications of
+   instructions: <a href="#terminators">terminator
+   instructions</a>, <a href="#binaryops">binary instructions</a>,
+   <a href="#bitwiseops">bitwise binary instructions</a>,
+   <a href="#memoryops">memory instructions</a>, and
+   <a href="#otherops">other instructions</a>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="terminators">Terminator
+Instructions</a> </div>
+
+<div class="doc_text">
+
+<p>As mentioned <a href="#functionstructure">previously</a>, every basic block
+   in a program ends with a "Terminator" instruction, which indicates which
+   block should be executed after the current block is finished. These
+   terminator instructions typically yield a '<tt>void</tt>' value: they produce
+   control flow, not values (the one exception being the
+   '<a href="#i_invoke"><tt>invoke</tt></a>' instruction).</p>
+
+<p>There are seven different terminator instructions: the
+   '<a href="#i_ret"><tt>ret</tt></a>' instruction, the
+   '<a href="#i_br"><tt>br</tt></a>' instruction, the
+   '<a href="#i_switch"><tt>switch</tt></a>' instruction, the
+   '<a href="#i_indirectbr">'<tt>indirectbr</tt></a>' Instruction, the
+   '<a href="#i_invoke"><tt>invoke</tt></a>' instruction, the
+   '<a href="#i_unwind"><tt>unwind</tt></a>' instruction, and the
+   '<a href="#i_unreachable"><tt>unreachable</tt></a>' instruction.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_ret">'<tt>ret</tt>'
+Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  ret &lt;type&gt; &lt;value&gt;       <i>; Return a value from a non-void function</i>
+  ret void                 <i>; Return from void function</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>ret</tt>' instruction is used to return control flow (and optionally
+   a value) from a function back to the caller.</p>
+
+<p>There are two forms of the '<tt>ret</tt>' instruction: one that returns a
+   value and then causes control flow, and one that just causes control flow to
+   occur.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>ret</tt>' instruction optionally accepts a single argument, the
+   return value. The type of the return value must be a
+   '<a href="#t_firstclass">first class</a>' type.</p>
+
+<p>A function is not <a href="#wellformed">well formed</a> if it it has a
+   non-void return type and contains a '<tt>ret</tt>' instruction with no return
+   value or a return value with a type that does not match its type, or if it
+   has a void return type and contains a '<tt>ret</tt>' instruction with a
+   return value.</p>
+
+<h5>Semantics:</h5>
+<p>When the '<tt>ret</tt>' instruction is executed, control flow returns back to
+   the calling function's context.  If the caller is a
+   "<a href="#i_call"><tt>call</tt></a>" instruction, execution continues at the
+   instruction after the call.  If the caller was an
+   "<a href="#i_invoke"><tt>invoke</tt></a>" instruction, execution continues at
+   the beginning of the "normal" destination block.  If the instruction returns
+   a value, that value shall set the call or invoke instruction's return
+   value.</p>
+
+<h5>Example:</h5>
+<pre>
+  ret i32 5                       <i>; Return an integer value of 5</i>
+  ret void                        <i>; Return from a void function</i>
+  ret { i32, i8 } { i32 4, i8 2 } <i>; Return a struct of values 4 and 2</i>
+</pre>
+
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_br">'<tt>br</tt>' Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  br i1 &lt;cond&gt;, label &lt;iftrue&gt;, label &lt;iffalse&gt;<br>  br label &lt;dest&gt;          <i>; Unconditional branch</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>br</tt>' instruction is used to cause control flow to transfer to a
+   different basic block in the current function.  There are two forms of this
+   instruction, corresponding to a conditional branch and an unconditional
+   branch.</p>
+
+<h5>Arguments:</h5>
+<p>The conditional branch form of the '<tt>br</tt>' instruction takes a single
+   '<tt>i1</tt>' value and two '<tt>label</tt>' values.  The unconditional form
+   of the '<tt>br</tt>' instruction takes a single '<tt>label</tt>' value as a
+   target.</p>
+
+<h5>Semantics:</h5>
+<p>Upon execution of a conditional '<tt>br</tt>' instruction, the '<tt>i1</tt>'
+   argument is evaluated.  If the value is <tt>true</tt>, control flows to the
+   '<tt>iftrue</tt>' <tt>label</tt> argument.  If "cond" is <tt>false</tt>,
+   control flows to the '<tt>iffalse</tt>' <tt>label</tt> argument.</p>
+
+<h5>Example:</h5>
+<pre>
+Test:
+  %cond = <a href="#i_icmp">icmp</a> eq i32 %a, %b
+  br i1 %cond, label %IfEqual, label %IfUnequal
+IfEqual:
+  <a href="#i_ret">ret</a> i32 1
+IfUnequal:
+  <a href="#i_ret">ret</a> i32 0
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_switch">'<tt>switch</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  switch &lt;intty&gt; &lt;value&gt;, label &lt;defaultdest&gt; [ &lt;intty&gt; &lt;val&gt;, label &lt;dest&gt; ... ]
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>switch</tt>' instruction is used to transfer control flow to one of
+   several different places.  It is a generalization of the '<tt>br</tt>'
+   instruction, allowing a branch to occur to one of many possible
+   destinations.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>switch</tt>' instruction uses three parameters: an integer
+   comparison value '<tt>value</tt>', a default '<tt>label</tt>' destination,
+   and an array of pairs of comparison value constants and '<tt>label</tt>'s.
+   The table is not allowed to contain duplicate constant entries.</p>
+
+<h5>Semantics:</h5>
+<p>The <tt>switch</tt> instruction specifies a table of values and
+   destinations. When the '<tt>switch</tt>' instruction is executed, this table
+   is searched for the given value.  If the value is found, control flow is
+   transferred to the corresponding destination; otherwise, control flow is
+   transferred to the default destination.</p>
+
+<h5>Implementation:</h5>
+<p>Depending on properties of the target machine and the particular
+   <tt>switch</tt> instruction, this instruction may be code generated in
+   different ways.  For example, it could be generated as a series of chained
+   conditional branches or with a lookup table.</p>
+
+<h5>Example:</h5>
+<pre>
+ <i>; Emulate a conditional br instruction</i>
+ %Val = <a href="#i_zext">zext</a> i1 %value to i32
+ switch i32 %Val, label %truedest [ i32 0, label %falsedest ]
+
+ <i>; Emulate an unconditional br instruction</i>
+ switch i32 0, label %dest [ ]
+
+ <i>; Implement a jump table:</i>
+ switch i32 %val, label %otherwise [ i32 0, label %onzero
+                                     i32 1, label %onone
+                                     i32 2, label %ontwo ]
+</pre>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_indirectbr">'<tt>indirectbr</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  indirectbr &lt;somety&gt;* &lt;address&gt;, [ label &lt;dest1&gt;, label &lt;dest2&gt;, ... ]
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>indirectbr</tt>' instruction implements an indirect branch to a label
+   within the current function, whose address is specified by
+   "<tt>address</tt>".  Address must be derived from a <a
+   href="#blockaddress">blockaddress</a> constant.</p>
+
+<h5>Arguments:</h5>
+
+<p>The '<tt>address</tt>' argument is the address of the label to jump to.  The
+   rest of the arguments indicate the full set of possible destinations that the
+   address may point to.  Blocks are allowed to occur multiple times in the
+   destination list, though this isn't particularly useful.</p>
+
+<p>This destination list is required so that dataflow analysis has an accurate
+   understanding of the CFG.</p>
+
+<h5>Semantics:</h5>
+
+<p>Control transfers to the block specified in the address argument.  All
+   possible destination blocks must be listed in the label list, otherwise this
+   instruction has undefined behavior.  This implies that jumps to labels
+   defined in other functions have undefined behavior as well.</p>
+
+<h5>Implementation:</h5>
+
+<p>This is typically implemented with a jump through a register.</p>
+
+<h5>Example:</h5>
+<pre>
+ indirectbr i8* %Addr, [ label %bb1, label %bb2, label %bb3 ]
+</pre>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="i_invoke">'<tt>invoke</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = invoke [<a href="#callingconv">cconv</a>] [<a href="#paramattrs">ret attrs</a>] &lt;ptr to function ty&gt; &lt;function ptr val&gt;(&lt;function args&gt;) [<a href="#fnattrs">fn attrs</a>]
+                to label &lt;normal label&gt; unwind label &lt;exception label&gt;
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>invoke</tt>' instruction causes control to transfer to a specified
+   function, with the possibility of control flow transfer to either the
+   '<tt>normal</tt>' label or the '<tt>exception</tt>' label.  If the callee
+   function returns with the "<tt><a href="#i_ret">ret</a></tt>" instruction,
+   control flow will return to the "normal" label.  If the callee (or any
+   indirect callees) returns with the "<a href="#i_unwind"><tt>unwind</tt></a>"
+   instruction, control is interrupted and continued at the dynamically nearest
+   "exception" label.</p>
+
+<h5>Arguments:</h5>
+<p>This instruction requires several arguments:</p>
+
+<ol>
+  <li>The optional "cconv" marker indicates which <a href="#callingconv">calling
+      convention</a> the call should use.  If none is specified, the call
+      defaults to using C calling conventions.</li>
+
+  <li>The optional <a href="#paramattrs">Parameter Attributes</a> list for
+      return values. Only '<tt>zeroext</tt>', '<tt>signext</tt>', and
+      '<tt>inreg</tt>' attributes are valid here.</li>
+
+  <li>'<tt>ptr to function ty</tt>': shall be the signature of the pointer to
+      function value being invoked.  In most cases, this is a direct function
+      invocation, but indirect <tt>invoke</tt>s are just as possible, branching
+      off an arbitrary pointer to function value.</li>
+
+  <li>'<tt>function ptr val</tt>': An LLVM value containing a pointer to a
+      function to be invoked. </li>
+
+  <li>'<tt>function args</tt>': argument list whose types match the function
+      signature argument types and parameter attributes. All arguments must be
+      of <a href="#t_firstclass">first class</a> type. If the function
+      signature indicates the function accepts a variable number of arguments,
+      the extra arguments can be specified.</li>
+
+  <li>'<tt>normal label</tt>': the label reached when the called function
+      executes a '<tt><a href="#i_ret">ret</a></tt>' instruction. </li>
+
+  <li>'<tt>exception label</tt>': the label reached when a callee returns with
+      the <a href="#i_unwind"><tt>unwind</tt></a> instruction. </li>
+
+  <li>The optional <a href="#fnattrs">function attributes</a> list. Only
+      '<tt>noreturn</tt>', '<tt>nounwind</tt>', '<tt>readonly</tt>' and
+      '<tt>readnone</tt>' attributes are valid here.</li>
+</ol>
+
+<h5>Semantics:</h5>
+<p>This instruction is designed to operate as a standard
+   '<tt><a href="#i_call">call</a></tt>' instruction in most regards.  The
+   primary difference is that it establishes an association with a label, which
+   is used by the runtime library to unwind the stack.</p>
+
+<p>This instruction is used in languages with destructors to ensure that proper
+   cleanup is performed in the case of either a <tt>longjmp</tt> or a thrown
+   exception.  Additionally, this is important for implementation of
+   '<tt>catch</tt>' clauses in high-level languages that support them.</p>
+
+<p>For the purposes of the SSA form, the definition of the value returned by the
+   '<tt>invoke</tt>' instruction is deemed to occur on the edge from the current
+   block to the "normal" label. If the callee unwinds then no return value is
+   available.</p>
+
+<p>Note that the code generator does not yet completely support unwind, and
+that the invoke/unwind semantics are likely to change in future versions.</p>
+
+<h5>Example:</h5>
+<pre>
+  %retval = invoke i32 @Test(i32 15) to label %Continue
+              unwind label %TestCleanup              <i>; {i32}:retval set</i>
+  %retval = invoke <a href="#callingconv">coldcc</a> i32 %Testfnptr(i32 15) to label %Continue
+              unwind label %TestCleanup              <i>; {i32}:retval set</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_subsubsection"> <a name="i_unwind">'<tt>unwind</tt>'
+Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  unwind
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>unwind</tt>' instruction unwinds the stack, continuing control flow
+   at the first callee in the dynamic call stack which used
+   an <a href="#i_invoke"><tt>invoke</tt></a> instruction to perform the call.
+   This is primarily used to implement exception handling.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>unwind</tt>' instruction causes execution of the current function to
+   immediately halt.  The dynamic call stack is then searched for the
+   first <a href="#i_invoke"><tt>invoke</tt></a> instruction on the call stack.
+   Once found, execution continues at the "exceptional" destination block
+   specified by the <tt>invoke</tt> instruction.  If there is no <tt>invoke</tt>
+   instruction in the dynamic call chain, undefined behavior results.</p>
+
+<p>Note that the code generator does not yet completely support unwind, and
+that the invoke/unwind semantics are likely to change in future versions.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_subsubsection"> <a name="i_unreachable">'<tt>unreachable</tt>'
+Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  unreachable
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>unreachable</tt>' instruction has no defined semantics.  This
+   instruction is used to inform the optimizer that a particular portion of the
+   code is not reachable.  This can be used to indicate that the code after a
+   no-return function cannot be reached, and other facts.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>unreachable</tt>' instruction has no defined semantics.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="binaryops">Binary Operations</a> </div>
+
+<div class="doc_text">
+
+<p>Binary operators are used to do most of the computation in a program.  They
+   require two operands of the same type, execute an operation on them, and
+   produce a single value.  The operands might represent multiple data, as is
+   the case with the <a href="#t_vector">vector</a> data type.  The result value
+   has the same type as its operands.</p>
+
+<p>There are several different binary operators:</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="i_add">'<tt>add</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = add &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;          <i>; yields {ty}:result</i>
+  &lt;result&gt; = add nuw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
+  &lt;result&gt; = add nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
+  &lt;result&gt; = add nuw nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;  <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>add</tt>' instruction returns the sum of its two operands.</p>
+
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>add</tt>' instruction must
+   be <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
+   integer values. Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+<p>The value produced is the integer sum of the two operands.</p>
+
+<p>If the sum has unsigned overflow, the result returned is the mathematical
+   result modulo 2<sup>n</sup>, where n is the bit width of the result.</p>
+
+<p>Because LLVM integers use a two's complement representation, this instruction
+   is appropriate for both signed and unsigned integers.</p>
+
+<p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
+   and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
+   <tt>nsw</tt> keywords are present, the result value of the <tt>add</tt>
+   is a <a href="#trapvalues">trap value</a> if unsigned and/or signed overflow,
+   respectively, occurs.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = add i32 4, %var          <i>; yields {i32}:result = 4 + %var</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="i_fadd">'<tt>fadd</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = fadd &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>fadd</tt>' instruction returns the sum of its two operands.</p>
+
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>fadd</tt>' instruction must be
+   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
+   floating point values. Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+<p>The value produced is the floating point sum of the two operands.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = fadd float 4.0, %var          <i>; yields {float}:result = 4.0 + %var</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_sub">'<tt>sub</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = sub &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;          <i>; yields {ty}:result</i>
+  &lt;result&gt; = sub nuw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
+  &lt;result&gt; = sub nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
+  &lt;result&gt; = sub nuw nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;  <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>sub</tt>' instruction returns the difference of its two
+   operands.</p>
+
+<p>Note that the '<tt>sub</tt>' instruction is used to represent the
+   '<tt>neg</tt>' instruction present in most other intermediate
+   representations.</p>
+
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>sub</tt>' instruction must
+   be <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
+   integer values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+<p>The value produced is the integer difference of the two operands.</p>
+
+<p>If the difference has unsigned overflow, the result returned is the
+   mathematical result modulo 2<sup>n</sup>, where n is the bit width of the
+   result.</p>
+
+<p>Because LLVM integers use a two's complement representation, this instruction
+   is appropriate for both signed and unsigned integers.</p>
+
+<p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
+   and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
+   <tt>nsw</tt> keywords are present, the result value of the <tt>sub</tt>
+   is a <a href="#trapvalues">trap value</a> if unsigned and/or signed overflow,
+   respectively, occurs.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = sub i32 4, %var          <i>; yields {i32}:result = 4 - %var</i>
+  &lt;result&gt; = sub i32 0, %val          <i>; yields {i32}:result = -%var</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_fsub">'<tt>fsub</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = fsub &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>fsub</tt>' instruction returns the difference of its two
+   operands.</p>
+
+<p>Note that the '<tt>fsub</tt>' instruction is used to represent the
+   '<tt>fneg</tt>' instruction present in most other intermediate
+   representations.</p>
+
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>fsub</tt>' instruction must be
+   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
+   floating point values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+<p>The value produced is the floating point difference of the two operands.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = fsub float 4.0, %var           <i>; yields {float}:result = 4.0 - %var</i>
+  &lt;result&gt; = fsub float -0.0, %val          <i>; yields {float}:result = -%var</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="i_mul">'<tt>mul</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = mul &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;          <i>; yields {ty}:result</i>
+  &lt;result&gt; = mul nuw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
+  &lt;result&gt; = mul nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;      <i>; yields {ty}:result</i>
+  &lt;result&gt; = mul nuw nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;  <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>mul</tt>' instruction returns the product of its two operands.</p>
+
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>mul</tt>' instruction must
+   be <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
+   integer values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+<p>The value produced is the integer product of the two operands.</p>
+
+<p>If the result of the multiplication has unsigned overflow, the result
+   returned is the mathematical result modulo 2<sup>n</sup>, where n is the bit
+   width of the result.</p>
+
+<p>Because LLVM integers use a two's complement representation, and the result
+   is the same width as the operands, this instruction returns the correct
+   result for both signed and unsigned integers.  If a full product
+   (e.g. <tt>i32</tt>x<tt>i32</tt>-><tt>i64</tt>) is needed, the operands should
+   be sign-extended or zero-extended as appropriate to the width of the full
+   product.</p>
+
+<p><tt>nuw</tt> and <tt>nsw</tt> stand for &quot;No Unsigned Wrap&quot;
+   and &quot;No Signed Wrap&quot;, respectively. If the <tt>nuw</tt> and/or
+   <tt>nsw</tt> keywords are present, the result value of the <tt>mul</tt>
+   is a <a href="#trapvalues">trap value</a> if unsigned and/or signed overflow,
+   respectively, occurs.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = mul i32 4, %var          <i>; yields {i32}:result = 4 * %var</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="i_fmul">'<tt>fmul</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = fmul &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>fmul</tt>' instruction returns the product of its two operands.</p>
+
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>fmul</tt>' instruction must be
+   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
+   floating point values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+<p>The value produced is the floating point product of the two operands.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = fmul float 4.0, %var          <i>; yields {float}:result = 4.0 * %var</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_udiv">'<tt>udiv</tt>' Instruction
+</a></div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = udiv &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;         <i>; yields {ty}:result</i>
+  &lt;result&gt; = udiv exact &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>udiv</tt>' instruction returns the quotient of its two operands.</p>
+
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>udiv</tt>' instruction must be
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+<p>The value produced is the unsigned integer quotient of the two operands.</p>
+
+<p>Note that unsigned integer division and signed integer division are distinct
+   operations; for signed integer division, use '<tt>sdiv</tt>'.</p>
+
+<p>Division by zero leads to undefined behavior.</p>
+
+<p>If the <tt>exact</tt> keyword is present, the result value of the
+   <tt>udiv</tt> is a <a href="#trapvalues">trap value</a> if %op1 is not a
+  multiple of %op2 (as such, "((a udiv exact b) mul b) == a").</p>
+
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = udiv i32 4, %var          <i>; yields {i32}:result = 4 / %var</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_sdiv">'<tt>sdiv</tt>' Instruction
+</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = sdiv &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;         <i>; yields {ty}:result</i>
+  &lt;result&gt; = sdiv exact &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>sdiv</tt>' instruction returns the quotient of its two operands.</p>
+
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>sdiv</tt>' instruction must be
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+<p>The value produced is the signed integer quotient of the two operands rounded
+   towards zero.</p>
+
+<p>Note that signed integer division and unsigned integer division are distinct
+   operations; for unsigned integer division, use '<tt>udiv</tt>'.</p>
+
+<p>Division by zero leads to undefined behavior. Overflow also leads to
+   undefined behavior; this is a rare case, but can occur, for example, by doing
+   a 32-bit division of -2147483648 by -1.</p>
+
+<p>If the <tt>exact</tt> keyword is present, the result value of the
+   <tt>sdiv</tt> is a <a href="#trapvalues">trap value</a> if the result would
+   be rounded.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = sdiv i32 4, %var          <i>; yields {i32}:result = 4 / %var</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_fdiv">'<tt>fdiv</tt>'
+Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = fdiv &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>fdiv</tt>' instruction returns the quotient of its two operands.</p>
+
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>fdiv</tt>' instruction must be
+   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
+   floating point values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+<p>The value produced is the floating point quotient of the two operands.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = fdiv float 4.0, %var          <i>; yields {float}:result = 4.0 / %var</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_urem">'<tt>urem</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = urem &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>urem</tt>' instruction returns the remainder from the unsigned
+   division of its two arguments.</p>
+
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>urem</tt>' instruction must be
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+<p>This instruction returns the unsigned integer <i>remainder</i> of a division.
+   This instruction always performs an unsigned division to get the
+   remainder.</p>
+
+<p>Note that unsigned integer remainder and signed integer remainder are
+   distinct operations; for signed integer remainder, use '<tt>srem</tt>'.</p>
+
+<p>Taking the remainder of a division by zero leads to undefined behavior.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = urem i32 4, %var          <i>; yields {i32}:result = 4 % %var</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="i_srem">'<tt>srem</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = srem &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>srem</tt>' instruction returns the remainder from the signed
+   division of its two operands. This instruction can also take
+   <a href="#t_vector">vector</a> versions of the values in which case the
+   elements must be integers.</p>
+
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>srem</tt>' instruction must be
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+<p>This instruction returns the <i>remainder</i> of a division (where the result
+   is either zero or has the same sign as the dividend, <tt>op1</tt>), not the
+   <i>modulo</i> operator (where the result is either zero or has the same sign
+   as the divisor, <tt>op2</tt>) of a value.
+   For more information about the difference,
+   see <a href="http://mathforum.org/dr.math/problems/anne.4.28.99.html">The
+   Math Forum</a>. For a table of how this is implemented in various languages,
+   please see <a href="http://en.wikipedia.org/wiki/Modulo_operation">
+   Wikipedia: modulo operation</a>.</p>
+
+<p>Note that signed integer remainder and unsigned integer remainder are
+   distinct operations; for unsigned integer remainder, use '<tt>urem</tt>'.</p>
+
+<p>Taking the remainder of a division by zero leads to undefined behavior.
+   Overflow also leads to undefined behavior; this is a rare case, but can
+   occur, for example, by taking the remainder of a 32-bit division of
+   -2147483648 by -1.  (The remainder doesn't actually overflow, but this rule
+   lets srem be implemented using instructions that return both the result of
+   the division and the remainder.)</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = srem i32 4, %var          <i>; yields {i32}:result = 4 % %var</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="i_frem">'<tt>frem</tt>' Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = frem &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>frem</tt>' instruction returns the remainder from the division of
+   its two operands.</p>
+
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>frem</tt>' instruction must be
+   <a href="#t_floating">floating point</a> or <a href="#t_vector">vector</a> of
+   floating point values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+<p>This instruction returns the <i>remainder</i> of a division.  The remainder
+   has the same sign as the dividend.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = frem float 4.0, %var          <i>; yields {float}:result = 4.0 % %var</i>
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="bitwiseops">Bitwise Binary
+Operations</a> </div>
+
+<div class="doc_text">
+
+<p>Bitwise binary operators are used to do various forms of bit-twiddling in a
+   program.  They are generally very efficient instructions and can commonly be
+   strength reduced from other instructions.  They require two operands of the
+   same type, execute an operation on them, and produce a single value.  The
+   resulting value is the same type as its operands.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_shl">'<tt>shl</tt>'
+Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = shl &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;           <i>; yields {ty}:result</i>
+  &lt;result&gt; = shl nuw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;       <i>; yields {ty}:result</i>
+  &lt;result&gt; = shl nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;       <i>; yields {ty}:result</i>
+  &lt;result&gt; = shl nuw nsw &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>shl</tt>' instruction returns the first operand shifted to the left
+   a specified number of bits.</p>
+
+<h5>Arguments:</h5>
+<p>Both arguments to the '<tt>shl</tt>' instruction must be the
+    same <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
+    integer type.  '<tt>op2</tt>' is treated as an unsigned value.</p>
+
+<h5>Semantics:</h5>
+<p>The value produced is <tt>op1</tt> * 2<sup><tt>op2</tt></sup> mod
+   2<sup>n</sup>, where <tt>n</tt> is the width of the result.  If <tt>op2</tt>
+   is (statically or dynamically) negative or equal to or larger than the number
+   of bits in <tt>op1</tt>, the result is undefined.  If the arguments are
+   vectors, each vector element of <tt>op1</tt> is shifted by the corresponding
+   shift amount in <tt>op2</tt>.</p>
+
+<p>If the <tt>nuw</tt> keyword is present, then the shift produces a 
+   <a href="#trapvalues">trap value</a> if it shifts out any non-zero bits.  If
+   the <tt>nsw</tt> keyword is present, then the shift produces a
+   <a href="#trapvalues">trap value</a> if it shifts out any bits that disagree
+   with the resultant sign bit.  As such, NUW/NSW have the same semantics as
+   they would if the shift were expressed as a mul instruction with the same
+   nsw/nuw bits in (mul %op1, (shl 1, %op2)).</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = shl i32 4, %var   <i>; yields {i32}: 4 &lt;&lt; %var</i>
+  &lt;result&gt; = shl i32 4, 2      <i>; yields {i32}: 16</i>
+  &lt;result&gt; = shl i32 1, 10     <i>; yields {i32}: 1024</i>
+  &lt;result&gt; = shl i32 1, 32     <i>; undefined</i>
+  &lt;result&gt; = shl &lt;2 x i32&gt; &lt; i32 1, i32 1&gt;, &lt; i32 1, i32 2&gt;   <i>; yields: result=&lt;2 x i32&gt; &lt; i32 2, i32 4&gt;</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_lshr">'<tt>lshr</tt>'
+Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = lshr &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;         <i>; yields {ty}:result</i>
+  &lt;result&gt; = lshr exact &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>lshr</tt>' instruction (logical shift right) returns the first
+   operand shifted to the right a specified number of bits with zero fill.</p>
+
+<h5>Arguments:</h5>
+<p>Both arguments to the '<tt>lshr</tt>' instruction must be the same
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   type. '<tt>op2</tt>' is treated as an unsigned value.</p>
+
+<h5>Semantics:</h5>
+<p>This instruction always performs a logical shift right operation. The most
+   significant bits of the result will be filled with zero bits after the shift.
+   If <tt>op2</tt> is (statically or dynamically) equal to or larger than the
+   number of bits in <tt>op1</tt>, the result is undefined. If the arguments are
+   vectors, each vector element of <tt>op1</tt> is shifted by the corresponding
+   shift amount in <tt>op2</tt>.</p>
+
+<p>If the <tt>exact</tt> keyword is present, the result value of the
+   <tt>lshr</tt> is a <a href="#trapvalues">trap value</a> if any of the bits
+   shifted out are non-zero.</p>
+
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = lshr i32 4, 1   <i>; yields {i32}:result = 2</i>
+  &lt;result&gt; = lshr i32 4, 2   <i>; yields {i32}:result = 1</i>
+  &lt;result&gt; = lshr i8  4, 3   <i>; yields {i8}:result = 0</i>
+  &lt;result&gt; = lshr i8 -2, 1   <i>; yields {i8}:result = 0x7FFFFFFF </i>
+  &lt;result&gt; = lshr i32 1, 32  <i>; undefined</i>
+  &lt;result&gt; = lshr &lt;2 x i32&gt; &lt; i32 -2, i32 4&gt;, &lt; i32 1, i32 2&gt;   <i>; yields: result=&lt;2 x i32&gt; &lt; i32 0x7FFFFFFF, i32 1&gt;</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_ashr">'<tt>ashr</tt>'
+Instruction</a> </div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = ashr &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;         <i>; yields {ty}:result</i>
+  &lt;result&gt; = ashr exact &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>ashr</tt>' instruction (arithmetic shift right) returns the first
+   operand shifted to the right a specified number of bits with sign
+   extension.</p>
+
+<h5>Arguments:</h5>
+<p>Both arguments to the '<tt>ashr</tt>' instruction must be the same
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   type.  '<tt>op2</tt>' is treated as an unsigned value.</p>
+
+<h5>Semantics:</h5>
+<p>This instruction always performs an arithmetic shift right operation, The
+   most significant bits of the result will be filled with the sign bit
+   of <tt>op1</tt>.  If <tt>op2</tt> is (statically or dynamically) equal to or
+   larger than the number of bits in <tt>op1</tt>, the result is undefined. If
+   the arguments are vectors, each vector element of <tt>op1</tt> is shifted by
+   the corresponding shift amount in <tt>op2</tt>.</p>
+
+<p>If the <tt>exact</tt> keyword is present, the result value of the
+   <tt>ashr</tt> is a <a href="#trapvalues">trap value</a> if any of the bits
+   shifted out are non-zero.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = ashr i32 4, 1   <i>; yields {i32}:result = 2</i>
+  &lt;result&gt; = ashr i32 4, 2   <i>; yields {i32}:result = 1</i>
+  &lt;result&gt; = ashr i8  4, 3   <i>; yields {i8}:result = 0</i>
+  &lt;result&gt; = ashr i8 -2, 1   <i>; yields {i8}:result = -1</i>
+  &lt;result&gt; = ashr i32 1, 32  <i>; undefined</i>
+  &lt;result&gt; = ashr &lt;2 x i32&gt; &lt; i32 -2, i32 4&gt;, &lt; i32 1, i32 3&gt;   <i>; yields: result=&lt;2 x i32&gt; &lt; i32 -1, i32 0&gt;</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_and">'<tt>and</tt>'
+Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = and &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>and</tt>' instruction returns the bitwise logical and of its two
+   operands.</p>
+
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>and</tt>' instruction must be
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+<p>The truth table used for the '<tt>and</tt>' instruction is:</p>
+
+<table border="1" cellspacing="0" cellpadding="4">
+  <tbody>
+    <tr>
+      <td>In0</td>
+      <td>In1</td>
+      <td>Out</td>
+    </tr>
+    <tr>
+      <td>0</td>
+      <td>0</td>
+      <td>0</td>
+    </tr>
+    <tr>
+      <td>0</td>
+      <td>1</td>
+      <td>0</td>
+    </tr>
+    <tr>
+      <td>1</td>
+      <td>0</td>
+      <td>0</td>
+    </tr>
+    <tr>
+      <td>1</td>
+      <td>1</td>
+      <td>1</td>
+    </tr>
+  </tbody>
+</table>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = and i32 4, %var         <i>; yields {i32}:result = 4 &amp; %var</i>
+  &lt;result&gt; = and i32 15, 40          <i>; yields {i32}:result = 8</i>
+  &lt;result&gt; = and i32 4, 8            <i>; yields {i32}:result = 0</i>
+</pre>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_or">'<tt>or</tt>' Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = or &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>or</tt>' instruction returns the bitwise logical inclusive or of its
+   two operands.</p>
+
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>or</tt>' instruction must be
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+<p>The truth table used for the '<tt>or</tt>' instruction is:</p>
+
+<table border="1" cellspacing="0" cellpadding="4">
+  <tbody>
+    <tr>
+      <td>In0</td>
+      <td>In1</td>
+      <td>Out</td>
+    </tr>
+    <tr>
+      <td>0</td>
+      <td>0</td>
+      <td>0</td>
+    </tr>
+    <tr>
+      <td>0</td>
+      <td>1</td>
+      <td>1</td>
+    </tr>
+    <tr>
+      <td>1</td>
+      <td>0</td>
+      <td>1</td>
+    </tr>
+    <tr>
+      <td>1</td>
+      <td>1</td>
+      <td>1</td>
+    </tr>
+  </tbody>
+</table>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = or i32 4, %var         <i>; yields {i32}:result = 4 | %var</i>
+  &lt;result&gt; = or i32 15, 40          <i>; yields {i32}:result = 47</i>
+  &lt;result&gt; = or i32 4, 8            <i>; yields {i32}:result = 12</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_xor">'<tt>xor</tt>'
+Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = xor &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {ty}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>xor</tt>' instruction returns the bitwise logical exclusive or of
+   its two operands.  The <tt>xor</tt> is used to implement the "one's
+   complement" operation, which is the "~" operator in C.</p>
+
+<h5>Arguments:</h5>
+<p>The two arguments to the '<tt>xor</tt>' instruction must be
+   <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
+   values.  Both arguments must have identical types.</p>
+
+<h5>Semantics:</h5>
+<p>The truth table used for the '<tt>xor</tt>' instruction is:</p>
+
+<table border="1" cellspacing="0" cellpadding="4">
+  <tbody>
+    <tr>
+      <td>In0</td>
+      <td>In1</td>
+      <td>Out</td>
+    </tr>
+    <tr>
+      <td>0</td>
+      <td>0</td>
+      <td>0</td>
+    </tr>
+    <tr>
+      <td>0</td>
+      <td>1</td>
+      <td>1</td>
+    </tr>
+    <tr>
+      <td>1</td>
+      <td>0</td>
+      <td>1</td>
+    </tr>
+    <tr>
+      <td>1</td>
+      <td>1</td>
+      <td>0</td>
+    </tr>
+  </tbody>
+</table>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = xor i32 4, %var         <i>; yields {i32}:result = 4 ^ %var</i>
+  &lt;result&gt; = xor i32 15, 40          <i>; yields {i32}:result = 39</i>
+  &lt;result&gt; = xor i32 4, 8            <i>; yields {i32}:result = 12</i>
+  &lt;result&gt; = xor i32 %V, -1          <i>; yields {i32}:result = ~%V</i>
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="vectorops">Vector Operations</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM supports several instructions to represent vector operations in a
+   target-independent manner.  These instructions cover the element-access and
+   vector-specific operations needed to process vectors effectively.  While LLVM
+   does directly support these vector operations, many sophisticated algorithms
+   will want to use target-specific intrinsics to take full advantage of a
+   specific target.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_extractelement">'<tt>extractelement</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = extractelement &lt;n x &lt;ty&gt;&gt; &lt;val&gt;, i32 &lt;idx&gt;    <i>; yields &lt;ty&gt;</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>extractelement</tt>' instruction extracts a single scalar element
+   from a vector at a specified index.</p>
+
+
+<h5>Arguments:</h5>
+<p>The first operand of an '<tt>extractelement</tt>' instruction is a value
+   of <a href="#t_vector">vector</a> type.  The second operand is an index
+   indicating the position from which to extract the element.  The index may be
+   a variable.</p>
+
+<h5>Semantics:</h5>
+<p>The result is a scalar of the same type as the element type of
+   <tt>val</tt>.  Its value is the value at position <tt>idx</tt> of
+   <tt>val</tt>.  If <tt>idx</tt> exceeds the length of <tt>val</tt>, the
+   results are undefined.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = extractelement &lt;4 x i32&gt; %vec, i32 0    <i>; yields i32</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_insertelement">'<tt>insertelement</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = insertelement &lt;n x &lt;ty&gt;&gt; &lt;val&gt;, &lt;ty&gt; &lt;elt&gt;, i32 &lt;idx&gt;    <i>; yields &lt;n x &lt;ty&gt;&gt;</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>insertelement</tt>' instruction inserts a scalar element into a
+   vector at a specified index.</p>
+
+<h5>Arguments:</h5>
+<p>The first operand of an '<tt>insertelement</tt>' instruction is a value
+   of <a href="#t_vector">vector</a> type.  The second operand is a scalar value
+   whose type must equal the element type of the first operand.  The third
+   operand is an index indicating the position at which to insert the value.
+   The index may be a variable.</p>
+
+<h5>Semantics:</h5>
+<p>The result is a vector of the same type as <tt>val</tt>.  Its element values
+   are those of <tt>val</tt> except at position <tt>idx</tt>, where it gets the
+   value <tt>elt</tt>.  If <tt>idx</tt> exceeds the length of <tt>val</tt>, the
+   results are undefined.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = insertelement &lt;4 x i32&gt; %vec, i32 1, i32 0    <i>; yields &lt;4 x i32&gt;</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_shufflevector">'<tt>shufflevector</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = shufflevector &lt;n x &lt;ty&gt;&gt; &lt;v1&gt;, &lt;n x &lt;ty&gt;&gt; &lt;v2&gt;, &lt;m x i32&gt; &lt;mask&gt;    <i>; yields &lt;m x &lt;ty&gt;&gt;</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>shufflevector</tt>' instruction constructs a permutation of elements
+   from two input vectors, returning a vector with the same element type as the
+   input and length that is the same as the shuffle mask.</p>
+
+<h5>Arguments:</h5>
+<p>The first two operands of a '<tt>shufflevector</tt>' instruction are vectors
+   with types that match each other. The third argument is a shuffle mask whose
+   element type is always 'i32'.  The result of the instruction is a vector
+   whose length is the same as the shuffle mask and whose element type is the
+   same as the element type of the first two operands.</p>
+
+<p>The shuffle mask operand is required to be a constant vector with either
+   constant integer or undef values.</p>
+
+<h5>Semantics:</h5>
+<p>The elements of the two input vectors are numbered from left to right across
+   both of the vectors.  The shuffle mask operand specifies, for each element of
+   the result vector, which element of the two input vectors the result element
+   gets.  The element selector may be undef (meaning "don't care") and the
+   second operand may be undef if performing a shuffle from only one vector.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2,
+                          &lt;4 x i32&gt; &lt;i32 0, i32 4, i32 1, i32 5&gt;  <i>; yields &lt;4 x i32&gt;</i>
+  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; undef,
+                          &lt;4 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3&gt;  <i>; yields &lt;4 x i32&gt;</i> - Identity shuffle.
+  &lt;result&gt; = shufflevector &lt;8 x i32&gt; %v1, &lt;8 x i32&gt; undef,
+                          &lt;4 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3&gt;  <i>; yields &lt;4 x i32&gt;</i>
+  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2,
+                          &lt;8 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 &gt;  <i>; yields &lt;8 x i32&gt;</i>
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="aggregateops">Aggregate Operations</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM supports several instructions for working with
+  <a href="#t_aggregate">aggregate</a> values.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_extractvalue">'<tt>extractvalue</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = extractvalue &lt;aggregate type&gt; &lt;val&gt;, &lt;idx&gt;{, &lt;idx&gt;}*
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>extractvalue</tt>' instruction extracts the value of a member field
+   from an <a href="#t_aggregate">aggregate</a> value.</p>
+
+<h5>Arguments:</h5>
+<p>The first operand of an '<tt>extractvalue</tt>' instruction is a value
+   of <a href="#t_struct">struct</a> or
+   <a href="#t_array">array</a> type.  The operands are constant indices to
+   specify which value to extract in a similar manner as indices in a
+   '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.</p>
+   <p>The major differences to <tt>getelementptr</tt> indexing are:</p>
+     <ul>
+       <li>Since the value being indexed is not a pointer, the first index is
+           omitted and assumed to be zero.</li>
+       <li>At least one index must be specified.</li>
+       <li>Not only struct indices but also array indices must be in
+           bounds.</li>
+     </ul>
+
+<h5>Semantics:</h5>
+<p>The result is the value at the position in the aggregate specified by the
+   index operands.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = extractvalue {i32, float} %agg, 0    <i>; yields i32</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_insertvalue">'<tt>insertvalue</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = insertvalue &lt;aggregate type&gt; &lt;val&gt;, &lt;ty&gt; &lt;elt&gt;, &lt;idx&gt;    <i>; yields &lt;aggregate type&gt;</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>insertvalue</tt>' instruction inserts a value into a member field
+   in an <a href="#t_aggregate">aggregate</a> value.</p>
+
+<h5>Arguments:</h5>
+<p>The first operand of an '<tt>insertvalue</tt>' instruction is a value
+   of <a href="#t_struct">struct</a> or
+   <a href="#t_array">array</a> type.  The second operand is a first-class
+   value to insert.  The following operands are constant indices indicating
+   the position at which to insert the value in a similar manner as indices in a
+   '<tt><a href="#i_extractvalue">extractvalue</a></tt>' instruction.  The
+   value to insert must have the same type as the value identified by the
+   indices.</p>
+
+<h5>Semantics:</h5>
+<p>The result is an aggregate of the same type as <tt>val</tt>.  Its value is
+   that of <tt>val</tt> except that the value at the position specified by the
+   indices is that of <tt>elt</tt>.</p>
+
+<h5>Example:</h5>
+<pre>
+  %agg1 = insertvalue {i32, float} undef, i32 1, 0         <i>; yields {i32 1, float undef}</i>
+  %agg2 = insertvalue {i32, float} %agg1, float %val, 1    <i>; yields {i32 1, float %val}</i>
+</pre>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="memoryops">Memory Access and Addressing Operations</a>
+</div>
+
+<div class="doc_text">
+
+<p>A key design point of an SSA-based representation is how it represents
+   memory.  In LLVM, no memory locations are in SSA form, which makes things
+   very simple.  This section describes how to read, write, and allocate
+   memory in LLVM.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="i_alloca">'<tt>alloca</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = alloca &lt;type&gt;[, &lt;ty&gt; &lt;NumElements&gt;][, align &lt;alignment&gt;]     <i>; yields {type*}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>alloca</tt>' instruction allocates memory on the stack frame of the
+   currently executing function, to be automatically released when this function
+   returns to its caller. The object is always allocated in the generic address
+   space (address space zero).</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>alloca</tt>' instruction
+   allocates <tt>sizeof(&lt;type&gt;)*NumElements</tt> bytes of memory on the
+   runtime stack, returning a pointer of the appropriate type to the program.
+   If "NumElements" is specified, it is the number of elements allocated,
+   otherwise "NumElements" is defaulted to be one.  If a constant alignment is
+   specified, the value result of the allocation is guaranteed to be aligned to
+   at least that boundary.  If not specified, or if zero, the target can choose
+   to align the allocation on any convenient boundary compatible with the
+   type.</p>
+
+<p>'<tt>type</tt>' may be any sized type.</p>
+
+<h5>Semantics:</h5>
+<p>Memory is allocated; a pointer is returned.  The operation is undefined if
+   there is insufficient stack space for the allocation.  '<tt>alloca</tt>'d
+   memory is automatically released when the function returns.  The
+   '<tt>alloca</tt>' instruction is commonly used to represent automatic
+   variables that must have an address available.  When the function returns
+   (either with the <tt><a href="#i_ret">ret</a></tt>
+   or <tt><a href="#i_unwind">unwind</a></tt> instructions), the memory is
+   reclaimed.  Allocating zero bytes is legal, but the result is undefined.</p>
+
+<h5>Example:</h5>
+<pre>
+  %ptr = alloca i32                             <i>; yields {i32*}:ptr</i>
+  %ptr = alloca i32, i32 4                      <i>; yields {i32*}:ptr</i>
+  %ptr = alloca i32, i32 4, align 1024          <i>; yields {i32*}:ptr</i>
+  %ptr = alloca i32, align 1024                 <i>; yields {i32*}:ptr</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_load">'<tt>load</tt>'
+Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = load &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]
+  &lt;result&gt; = volatile load &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]
+  !&lt;index&gt; = !{ i32 1 }
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>load</tt>' instruction is used to read from memory.</p>
+
+<h5>Arguments:</h5>
+<p>The argument to the '<tt>load</tt>' instruction specifies the memory address
+   from which to load.  The pointer must point to
+   a <a href="#t_firstclass">first class</a> type.  If the <tt>load</tt> is
+   marked as <tt>volatile</tt>, then the optimizer is not allowed to modify the
+   number or order of execution of this <tt>load</tt> with other <a
+   href="#volatile">volatile operations</a>.</p>
+
+<p>The optional constant <tt>align</tt> argument specifies the alignment of the
+   operation (that is, the alignment of the memory address). A value of 0 or an
+   omitted <tt>align</tt> argument means that the operation has the preferential
+   alignment for the target. It is the responsibility of the code emitter to
+   ensure that the alignment information is correct. Overestimating the
+   alignment results in undefined behavior. Underestimating the alignment may
+   produce less efficient code. An alignment of 1 is always safe.</p>
+
+<p>The optional <tt>!nontemporal</tt> metadata must reference a single
+   metatadata name &lt;index&gt; corresponding to a metadata node with
+   one <tt>i32</tt> entry of value 1.  The existence of
+   the <tt>!nontemporal</tt> metatadata on the instruction tells the optimizer
+   and code generator that this load is not expected to be reused in the cache.
+   The code generator may select special instructions to save cache bandwidth,
+   such as the <tt>MOVNT</tt> instruction on x86.</p>
+
+<h5>Semantics:</h5>
+<p>The location of memory pointed to is loaded.  If the value being loaded is of
+   scalar type then the number of bytes read does not exceed the minimum number
+   of bytes needed to hold all bits of the type.  For example, loading an
+   <tt>i24</tt> reads at most three bytes.  When loading a value of a type like
+   <tt>i20</tt> with a size that is not an integral number of bytes, the result
+   is undefined if the value was not originally written using a store of the
+   same type.</p>
+
+<h5>Examples:</h5>
+<pre>
+  %ptr = <a href="#i_alloca">alloca</a> i32                               <i>; yields {i32*}:ptr</i>
+  <a href="#i_store">store</a> i32 3, i32* %ptr                          <i>; yields {void}</i>
+  %val = load i32* %ptr                           <i>; yields {i32}:val = i32 3</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"> <a name="i_store">'<tt>store</tt>'
+Instruction</a> </div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  store &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]                   <i>; yields {void}</i>
+  volatile store &lt;ty&gt; &lt;value&gt;, &lt;ty&gt;* &lt;pointer&gt;[, align &lt;alignment&gt;][, !nontemporal !&lt;index&gt;]          <i>; yields {void}</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>store</tt>' instruction is used to write to memory.</p>
+
+<h5>Arguments:</h5>
+<p>There are two arguments to the '<tt>store</tt>' instruction: a value to store
+   and an address at which to store it.  The type of the
+   '<tt>&lt;pointer&gt;</tt>' operand must be a pointer to
+   the <a href="#t_firstclass">first class</a> type of the
+   '<tt>&lt;value&gt;</tt>' operand. If the <tt>store</tt> is marked as
+   <tt>volatile</tt>, then the optimizer is not allowed to modify the number or
+   order of execution of this <tt>store</tt> with other <a
+   href="#volatile">volatile operations</a>.</p>
+
+<p>The optional constant "align" argument specifies the alignment of the
+   operation (that is, the alignment of the memory address). A value of 0 or an
+   omitted "align" argument means that the operation has the preferential
+   alignment for the target. It is the responsibility of the code emitter to
+   ensure that the alignment information is correct. Overestimating the
+   alignment results in an undefined behavior. Underestimating the alignment may
+   produce less efficient code. An alignment of 1 is always safe.</p>
+
+<p>The optional !nontemporal metadata must reference a single metatadata
+   name &lt;index&gt; corresponding to a metadata node with one i32 entry of
+   value 1.  The existence of the !nontemporal metatadata on the
+   instruction tells the optimizer and code generator that this load is
+   not expected to be reused in the cache.  The code generator may
+   select special instructions to save cache bandwidth, such as the
+   MOVNT instruction on x86.</p>
+
+
+<h5>Semantics:</h5>
+<p>The contents of memory are updated to contain '<tt>&lt;value&gt;</tt>' at the
+   location specified by the '<tt>&lt;pointer&gt;</tt>' operand.  If
+   '<tt>&lt;value&gt;</tt>' is of scalar type then the number of bytes written
+   does not exceed the minimum number of bytes needed to hold all bits of the
+   type.  For example, storing an <tt>i24</tt> writes at most three bytes.  When
+   writing a value of a type like <tt>i20</tt> with a size that is not an
+   integral number of bytes, it is unspecified what happens to the extra bits
+   that do not belong to the type, but they will typically be overwritten.</p>
+
+<h5>Example:</h5>
+<pre>
+  %ptr = <a href="#i_alloca">alloca</a> i32                               <i>; yields {i32*}:ptr</i>
+  store i32 3, i32* %ptr                          <i>; yields {void}</i>
+  %val = <a href="#i_load">load</a> i32* %ptr                           <i>; yields {i32}:val = i32 3</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_getelementptr">'<tt>getelementptr</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = getelementptr &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
+  &lt;result&gt; = getelementptr inbounds &lt;pty&gt;* &lt;ptrval&gt;{, &lt;ty&gt; &lt;idx&gt;}*
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>getelementptr</tt>' instruction is used to get the address of a
+   subelement of an <a href="#t_aggregate">aggregate</a> data structure.
+   It performs address calculation only and does not access memory.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument is always a pointer, and forms the basis of the
+   calculation. The remaining arguments are indices that indicate which of the
+   elements of the aggregate object are indexed. The interpretation of each
+   index is dependent on the type being indexed into. The first index always
+   indexes the pointer value given as the first argument, the second index
+   indexes a value of the type pointed to (not necessarily the value directly
+   pointed to, since the first index can be non-zero), etc. The first type
+   indexed into must be a pointer value, subsequent types can be arrays,
+   vectors, and structs. Note that subsequent types being indexed into
+   can never be pointers, since that would require loading the pointer before
+   continuing calculation.</p>
+
+<p>The type of each index argument depends on the type it is indexing into.
+   When indexing into a (optionally packed) structure, only <tt>i32</tt>
+   integer <b>constants</b> are allowed.  When indexing into an array, pointer
+   or vector, integers of any width are allowed, and they are not required to be
+   constant.</p>
+
+<p>For example, let's consider a C code fragment and how it gets compiled to
+   LLVM:</p>
+
+<pre class="doc_code">
+struct RT {
+  char A;
+  int B[10][20];
+  char C;
+};
+struct ST {
+  int X;
+  double Y;
+  struct RT Z;
+};
+
+int *foo(struct ST *s) {
+  return &amp;s[1].Z.B[5][13];
+}
+</pre>
+
+<p>The LLVM code generated by the GCC frontend is:</p>
+
+<pre class="doc_code">
+%RT = <a href="#namedtypes">type</a> { i8 , [10 x [20 x i32]], i8  }
+%ST = <a href="#namedtypes">type</a> { i32, double, %RT }
+
+define i32* @foo(%ST* %s) {
+entry:
+  %reg = getelementptr %ST* %s, i32 1, i32 2, i32 1, i32 5, i32 13
+  ret i32* %reg
+}
+</pre>
+
+<h5>Semantics:</h5>
+<p>In the example above, the first index is indexing into the '<tt>%ST*</tt>'
+   type, which is a pointer, yielding a '<tt>%ST</tt>' = '<tt>{ i32, double, %RT
+   }</tt>' type, a structure.  The second index indexes into the third element
+   of the structure, yielding a '<tt>%RT</tt>' = '<tt>{ i8 , [10 x [20 x i32]],
+   i8 }</tt>' type, another structure.  The third index indexes into the second
+   element of the structure, yielding a '<tt>[10 x [20 x i32]]</tt>' type, an
+   array.  The two dimensions of the array are subscripted into, yielding an
+   '<tt>i32</tt>' type.  The '<tt>getelementptr</tt>' instruction returns a
+   pointer to this element, thus computing a value of '<tt>i32*</tt>' type.</p>
+
+<p>Note that it is perfectly legal to index partially through a structure,
+   returning a pointer to an inner element.  Because of this, the LLVM code for
+   the given testcase is equivalent to:</p>
+
+<pre>
+  define i32* @foo(%ST* %s) {
+    %t1 = getelementptr %ST* %s, i32 1                        <i>; yields %ST*:%t1</i>
+    %t2 = getelementptr %ST* %t1, i32 0, i32 2                <i>; yields %RT*:%t2</i>
+    %t3 = getelementptr %RT* %t2, i32 0, i32 1                <i>; yields [10 x [20 x i32]]*:%t3</i>
+    %t4 = getelementptr [10 x [20 x i32]]* %t3, i32 0, i32 5  <i>; yields [20 x i32]*:%t4</i>
+    %t5 = getelementptr [20 x i32]* %t4, i32 0, i32 13        <i>; yields i32*:%t5</i>
+    ret i32* %t5
+  }
+</pre>
+
+<p>If the <tt>inbounds</tt> keyword is present, the result value of the
+   <tt>getelementptr</tt> is a <a href="#trapvalues">trap value</a> if the
+   base pointer is not an <i>in bounds</i> address of an allocated object,
+   or if any of the addresses that would be formed by successive addition of
+   the offsets implied by the indices to the base address with infinitely
+   precise arithmetic are not an <i>in bounds</i> address of that allocated
+   object. The <i>in bounds</i> addresses for an allocated object are all
+   the addresses that point into the object, plus the address one byte past
+   the end.</p>
+
+<p>If the <tt>inbounds</tt> keyword is not present, the offsets are added to
+   the base address with silently-wrapping two's complement arithmetic, and
+   the result value of the <tt>getelementptr</tt> may be outside the object
+   pointed to by the base pointer. The result value may not necessarily be
+   used to access memory though, even if it happens to point into allocated
+   storage. See the <a href="#pointeraliasing">Pointer Aliasing Rules</a>
+   section for more information.</p>
+
+<p>The getelementptr instruction is often confusing.  For some more insight into
+   how it works, see <a href="GetElementPtr.html">the getelementptr FAQ</a>.</p>
+
+<h5>Example:</h5>
+<pre>
+    <i>; yields [12 x i8]*:aptr</i>
+    %aptr = getelementptr {i32, [12 x i8]}* %saptr, i64 0, i32 1
+    <i>; yields i8*:vptr</i>
+    %vptr = getelementptr {i32, &lt;2 x i8&gt;}* %svptr, i64 0, i32 1, i32 1
+    <i>; yields i8*:eptr</i>
+    %eptr = getelementptr [12 x i8]* %aptr, i64 0, i32 1
+    <i>; yields i32*:iptr</i>
+    %iptr = getelementptr [10 x i32]* @arr, i16 0, i16 0
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="convertops">Conversion Operations</a>
+</div>
+
+<div class="doc_text">
+
+<p>The instructions in this category are the conversion instructions (casting)
+   which all take a single operand and a type. They perform various bit
+   conversions on the operand.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_trunc">'<tt>trunc .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = trunc &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>trunc</tt>' instruction truncates its operand to the
+   type <tt>ty2</tt>.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>trunc</tt>' instruction takes a value to trunc, and a type to trunc it to.
+   Both types must be of <a href="#t_integer">integer</a> types, or vectors
+   of the same number of integers.
+   The bit size of the <tt>value</tt> must be larger than
+   the bit size of the destination type, <tt>ty2</tt>.
+   Equal sized types are not allowed.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>trunc</tt>' instruction truncates the high order bits
+   in <tt>value</tt> and converts the remaining bits to <tt>ty2</tt>. Since the
+   source size must be larger than the destination size, <tt>trunc</tt> cannot
+   be a <i>no-op cast</i>.  It will always truncate bits.</p>
+
+<h5>Example:</h5>
+<pre>
+  %X = trunc i32 257 to i8                        <i>; yields i8:1</i>
+  %Y = trunc i32 123 to i1                        <i>; yields i1:true</i>
+  %Z = trunc i32 122 to i1                        <i>; yields i1:false</i>
+  %W = trunc &lt;2 x i16&gt; &lt;i16 8, i16 7&gt; to &lt;2 x i8&gt; <i>; yields &lt;i8 8, i8 7&gt;</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_zext">'<tt>zext .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = zext &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>zext</tt>' instruction zero extends its operand to type
+   <tt>ty2</tt>.</p>
+
+
+<h5>Arguments:</h5>
+<p>The '<tt>zext</tt>' instruction takes a value to cast, and a type to cast it to.
+   Both types must be of <a href="#t_integer">integer</a> types, or vectors
+   of the same number of integers.
+   The bit size of the <tt>value</tt> must be smaller than
+   the bit size of the destination type,
+   <tt>ty2</tt>.</p>
+
+<h5>Semantics:</h5>
+<p>The <tt>zext</tt> fills the high order bits of the <tt>value</tt> with zero
+   bits until it reaches the size of the destination type, <tt>ty2</tt>.</p>
+
+<p>When zero extending from i1, the result will always be either 0 or 1.</p>
+
+<h5>Example:</h5>
+<pre>
+  %X = zext i32 257 to i64              <i>; yields i64:257</i>
+  %Y = zext i1 true to i32              <i>; yields i32:1</i>
+  %Z = zext &lt;2 x i16&gt; &lt;i16 8, i16 7&gt; to &lt;2 x i32&gt; <i>; yields &lt;i32 8, i32 7&gt;</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_sext">'<tt>sext .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = sext &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>sext</tt>' sign extends <tt>value</tt> to the type <tt>ty2</tt>.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>sext</tt>' instruction takes a value to cast, and a type to cast it to.
+   Both types must be of <a href="#t_integer">integer</a> types, or vectors
+   of the same number of integers.
+   The bit size of the <tt>value</tt> must be smaller than
+   the bit size of the destination type,
+   <tt>ty2</tt>.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>sext</tt>' instruction performs a sign extension by copying the sign
+   bit (highest order bit) of the <tt>value</tt> until it reaches the bit size
+   of the type <tt>ty2</tt>.</p>
+
+<p>When sign extending from i1, the extension always results in -1 or 0.</p>
+
+<h5>Example:</h5>
+<pre>
+  %X = sext i8  -1 to i16              <i>; yields i16   :65535</i>
+  %Y = sext i1 true to i32             <i>; yields i32:-1</i>
+  %Z = sext &lt;2 x i16&gt; &lt;i16 8, i16 7&gt; to &lt;2 x i32&gt; <i>; yields &lt;i32 8, i32 7&gt;</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_fptrunc">'<tt>fptrunc .. to</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = fptrunc &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>fptrunc</tt>' instruction truncates <tt>value</tt> to type
+   <tt>ty2</tt>.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>fptrunc</tt>' instruction takes a <a href="#t_floating">floating
+   point</a> value to cast and a <a href="#t_floating">floating point</a> type
+   to cast it to. The size of <tt>value</tt> must be larger than the size of
+   <tt>ty2</tt>. This implies that <tt>fptrunc</tt> cannot be used to make a
+   <i>no-op cast</i>.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>fptrunc</tt>' instruction truncates a <tt>value</tt> from a larger
+   <a href="#t_floating">floating point</a> type to a smaller
+   <a href="#t_floating">floating point</a> type.  If the value cannot fit
+   within the destination type, <tt>ty2</tt>, then the results are
+   undefined.</p>
+
+<h5>Example:</h5>
+<pre>
+  %X = fptrunc double 123.0 to float         <i>; yields float:123.0</i>
+  %Y = fptrunc double 1.0E+300 to float      <i>; yields undefined</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_fpext">'<tt>fpext .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = fpext &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>fpext</tt>' extends a floating point <tt>value</tt> to a larger
+   floating point value.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>fpext</tt>' instruction takes a
+   <a href="#t_floating">floating point</a> <tt>value</tt> to cast, and
+   a <a href="#t_floating">floating point</a> type to cast it to. The source
+   type must be smaller than the destination type.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>fpext</tt>' instruction extends the <tt>value</tt> from a smaller
+   <a href="#t_floating">floating point</a> type to a larger
+   <a href="#t_floating">floating point</a> type. The <tt>fpext</tt> cannot be
+   used to make a <i>no-op cast</i> because it always changes bits. Use
+   <tt>bitcast</tt> to make a <i>no-op cast</i> for a floating point cast.</p>
+
+<h5>Example:</h5>
+<pre>
+  %X = fpext float 3.1415 to double        <i>; yields double:3.1415</i>
+  %Y = fpext float 1.0 to float            <i>; yields float:1.0 (no-op)</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_fptoui">'<tt>fptoui .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = fptoui &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>fptoui</tt>' converts a floating point <tt>value</tt> to its
+   unsigned integer equivalent of type <tt>ty2</tt>.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>fptoui</tt>' instruction takes a value to cast, which must be a
+   scalar or vector <a href="#t_floating">floating point</a> value, and a type
+   to cast it to <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a>
+   type. If <tt>ty</tt> is a vector floating point type, <tt>ty2</tt> must be a
+   vector integer type with the same number of elements as <tt>ty</tt></p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>fptoui</tt>' instruction converts its
+   <a href="#t_floating">floating point</a> operand into the nearest (rounding
+   towards zero) unsigned integer value. If the value cannot fit
+   in <tt>ty2</tt>, the results are undefined.</p>
+
+<h5>Example:</h5>
+<pre>
+  %X = fptoui double 123.0 to i32      <i>; yields i32:123</i>
+  %Y = fptoui float 1.0E+300 to i1     <i>; yields undefined:1</i>
+  %Z = fptoui float 1.04E+17 to i8     <i>; yields undefined:1</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_fptosi">'<tt>fptosi .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = fptosi &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>fptosi</tt>' instruction converts
+   <a href="#t_floating">floating point</a> <tt>value</tt> to
+   type <tt>ty2</tt>.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>fptosi</tt>' instruction takes a value to cast, which must be a
+   scalar or vector <a href="#t_floating">floating point</a> value, and a type
+   to cast it to <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a>
+   type. If <tt>ty</tt> is a vector floating point type, <tt>ty2</tt> must be a
+   vector integer type with the same number of elements as <tt>ty</tt></p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>fptosi</tt>' instruction converts its
+   <a href="#t_floating">floating point</a> operand into the nearest (rounding
+   towards zero) signed integer value. If the value cannot fit in <tt>ty2</tt>,
+   the results are undefined.</p>
+
+<h5>Example:</h5>
+<pre>
+  %X = fptosi double -123.0 to i32      <i>; yields i32:-123</i>
+  %Y = fptosi float 1.0E-247 to i1      <i>; yields undefined:1</i>
+  %Z = fptosi float 1.04E+17 to i8      <i>; yields undefined:1</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_uitofp">'<tt>uitofp .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = uitofp &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>uitofp</tt>' instruction regards <tt>value</tt> as an unsigned
+   integer and converts that value to the <tt>ty2</tt> type.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>uitofp</tt>' instruction takes a value to cast, which must be a
+   scalar or vector <a href="#t_integer">integer</a> value, and a type to cast
+   it to <tt>ty2</tt>, which must be an <a href="#t_floating">floating point</a>
+   type. If <tt>ty</tt> is a vector integer type, <tt>ty2</tt> must be a vector
+   floating point type with the same number of elements as <tt>ty</tt></p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>uitofp</tt>' instruction interprets its operand as an unsigned
+   integer quantity and converts it to the corresponding floating point
+   value. If the value cannot fit in the floating point value, the results are
+   undefined.</p>
+
+<h5>Example:</h5>
+<pre>
+  %X = uitofp i32 257 to float         <i>; yields float:257.0</i>
+  %Y = uitofp i8 -1 to double          <i>; yields double:255.0</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_sitofp">'<tt>sitofp .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = sitofp &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>sitofp</tt>' instruction regards <tt>value</tt> as a signed integer
+   and converts that value to the <tt>ty2</tt> type.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>sitofp</tt>' instruction takes a value to cast, which must be a
+   scalar or vector <a href="#t_integer">integer</a> value, and a type to cast
+   it to <tt>ty2</tt>, which must be an <a href="#t_floating">floating point</a>
+   type. If <tt>ty</tt> is a vector integer type, <tt>ty2</tt> must be a vector
+   floating point type with the same number of elements as <tt>ty</tt></p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>sitofp</tt>' instruction interprets its operand as a signed integer
+   quantity and converts it to the corresponding floating point value. If the
+   value cannot fit in the floating point value, the results are undefined.</p>
+
+<h5>Example:</h5>
+<pre>
+  %X = sitofp i32 257 to float         <i>; yields float:257.0</i>
+  %Y = sitofp i8 -1 to double          <i>; yields double:-1.0</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_ptrtoint">'<tt>ptrtoint .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = ptrtoint &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>ptrtoint</tt>' instruction converts the pointer <tt>value</tt> to
+   the integer type <tt>ty2</tt>.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>ptrtoint</tt>' instruction takes a <tt>value</tt> to cast, which
+   must be a <a href="#t_pointer">pointer</a> value, and a type to cast it to
+   <tt>ty2</tt>, which must be an <a href="#t_integer">integer</a> type.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>ptrtoint</tt>' instruction converts <tt>value</tt> to integer type
+   <tt>ty2</tt> by interpreting the pointer value as an integer and either
+   truncating or zero extending that value to the size of the integer type. If
+   <tt>value</tt> is smaller than <tt>ty2</tt> then a zero extension is done. If
+   <tt>value</tt> is larger than <tt>ty2</tt> then a truncation is done. If they
+   are the same size, then nothing is done (<i>no-op cast</i>) other than a type
+   change.</p>
+
+<h5>Example:</h5>
+<pre>
+  %X = ptrtoint i32* %X to i8           <i>; yields truncation on 32-bit architecture</i>
+  %Y = ptrtoint i32* %x to i64          <i>; yields zero extension on 32-bit architecture</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_inttoptr">'<tt>inttoptr .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = inttoptr &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>inttoptr</tt>' instruction converts an integer <tt>value</tt> to a
+   pointer type, <tt>ty2</tt>.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>inttoptr</tt>' instruction takes an <a href="#t_integer">integer</a>
+   value to cast, and a type to cast it to, which must be a
+   <a href="#t_pointer">pointer</a> type.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>inttoptr</tt>' instruction converts <tt>value</tt> to type
+   <tt>ty2</tt> by applying either a zero extension or a truncation depending on
+   the size of the integer <tt>value</tt>. If <tt>value</tt> is larger than the
+   size of a pointer then a truncation is done. If <tt>value</tt> is smaller
+   than the size of a pointer then a zero extension is done. If they are the
+   same size, nothing is done (<i>no-op cast</i>).</p>
+
+<h5>Example:</h5>
+<pre>
+  %X = inttoptr i32 255 to i32*          <i>; yields zero extension on 64-bit architecture</i>
+  %Y = inttoptr i32 255 to i32*          <i>; yields no-op on 32-bit architecture</i>
+  %Z = inttoptr i64 0 to i32*            <i>; yields truncation on 32-bit architecture</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_bitcast">'<tt>bitcast .. to</tt>' Instruction</a>
+</div>
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = bitcast &lt;ty&gt; &lt;value&gt; to &lt;ty2&gt;             <i>; yields ty2</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>bitcast</tt>' instruction converts <tt>value</tt> to type
+   <tt>ty2</tt> without changing any bits.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>bitcast</tt>' instruction takes a value to cast, which must be a
+   non-aggregate first class value, and a type to cast it to, which must also be
+   a non-aggregate <a href="#t_firstclass">first class</a> type. The bit sizes
+   of <tt>value</tt> and the destination type, <tt>ty2</tt>, must be
+   identical. If the source type is a pointer, the destination type must also be
+   a pointer.  This instruction supports bitwise conversion of vectors to
+   integers and to vectors of other types (as long as they have the same
+   size).</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>bitcast</tt>' instruction converts <tt>value</tt> to type
+   <tt>ty2</tt>. It is always a <i>no-op cast</i> because no bits change with
+   this conversion.  The conversion is done as if the <tt>value</tt> had been
+   stored to memory and read back as type <tt>ty2</tt>. Pointer types may only
+   be converted to other pointer types with this instruction. To convert
+   pointers to other types, use the <a href="#i_inttoptr">inttoptr</a> or
+   <a href="#i_ptrtoint">ptrtoint</a> instructions first.</p>
+
+<h5>Example:</h5>
+<pre>
+  %X = bitcast i8 255 to i8              <i>; yields i8 :-1</i>
+  %Y = bitcast i32* %x to sint*          <i>; yields sint*:%x</i>
+  %Z = bitcast &lt;2 x int&gt; %V to i64;      <i>; yields i64: %V</i>
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"> <a name="otherops">Other Operations</a> </div>
+
+<div class="doc_text">
+
+<p>The instructions in this category are the "miscellaneous" instructions, which
+   defy better classification.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="i_icmp">'<tt>icmp</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = icmp &lt;cond&gt; &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;   <i>; yields {i1} or {&lt;N x i1&gt;}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>icmp</tt>' instruction returns a boolean value or a vector of
+   boolean values based on comparison of its two integer, integer vector, or
+   pointer operands.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>icmp</tt>' instruction takes three operands. The first operand is
+   the condition code indicating the kind of comparison to perform. It is not a
+   value, just a keyword. The possible condition code are:</p>
+
+<ol>
+  <li><tt>eq</tt>: equal</li>
+  <li><tt>ne</tt>: not equal </li>
+  <li><tt>ugt</tt>: unsigned greater than</li>
+  <li><tt>uge</tt>: unsigned greater or equal</li>
+  <li><tt>ult</tt>: unsigned less than</li>
+  <li><tt>ule</tt>: unsigned less or equal</li>
+  <li><tt>sgt</tt>: signed greater than</li>
+  <li><tt>sge</tt>: signed greater or equal</li>
+  <li><tt>slt</tt>: signed less than</li>
+  <li><tt>sle</tt>: signed less or equal</li>
+</ol>
+
+<p>The remaining two arguments must be <a href="#t_integer">integer</a> or
+   <a href="#t_pointer">pointer</a> or integer <a href="#t_vector">vector</a>
+   typed.  They must also be identical types.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>icmp</tt>' compares <tt>op1</tt> and <tt>op2</tt> according to the
+   condition code given as <tt>cond</tt>. The comparison performed always yields
+   either an <a href="#t_integer"><tt>i1</tt></a> or vector of <tt>i1</tt>
+   result, as follows:</p>
+
+<ol>
+  <li><tt>eq</tt>: yields <tt>true</tt> if the operands are equal,
+      <tt>false</tt> otherwise. No sign interpretation is necessary or
+      performed.</li>
+
+  <li><tt>ne</tt>: yields <tt>true</tt> if the operands are unequal,
+      <tt>false</tt> otherwise. No sign interpretation is necessary or
+      performed.</li>
+
+  <li><tt>ugt</tt>: interprets the operands as unsigned values and yields
+      <tt>true</tt> if <tt>op1</tt> is greater than <tt>op2</tt>.</li>
+
+  <li><tt>uge</tt>: interprets the operands as unsigned values and yields
+      <tt>true</tt> if <tt>op1</tt> is greater than or equal
+      to <tt>op2</tt>.</li>
+
+  <li><tt>ult</tt>: interprets the operands as unsigned values and yields
+      <tt>true</tt> if <tt>op1</tt> is less than <tt>op2</tt>.</li>
+
+  <li><tt>ule</tt>: interprets the operands as unsigned values and yields
+      <tt>true</tt> if <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
+
+  <li><tt>sgt</tt>: interprets the operands as signed values and yields
+      <tt>true</tt> if <tt>op1</tt> is greater than <tt>op2</tt>.</li>
+
+  <li><tt>sge</tt>: interprets the operands as signed values and yields
+      <tt>true</tt> if <tt>op1</tt> is greater than or equal
+      to <tt>op2</tt>.</li>
+
+  <li><tt>slt</tt>: interprets the operands as signed values and yields
+      <tt>true</tt> if <tt>op1</tt> is less than <tt>op2</tt>.</li>
+
+  <li><tt>sle</tt>: interprets the operands as signed values and yields
+      <tt>true</tt> if <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
+</ol>
+
+<p>If the operands are <a href="#t_pointer">pointer</a> typed, the pointer
+   values are compared as if they were integers.</p>
+
+<p>If the operands are integer vectors, then they are compared element by
+   element. The result is an <tt>i1</tt> vector with the same number of elements
+   as the values being compared.  Otherwise, the result is an <tt>i1</tt>.</p>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = icmp eq i32 4, 5          <i>; yields: result=false</i>
+  &lt;result&gt; = icmp ne float* %X, %X     <i>; yields: result=false</i>
+  &lt;result&gt; = icmp ult i16  4, 5        <i>; yields: result=true</i>
+  &lt;result&gt; = icmp sgt i16  4, 5        <i>; yields: result=false</i>
+  &lt;result&gt; = icmp ule i16 -4, 5        <i>; yields: result=false</i>
+  &lt;result&gt; = icmp sge i16  4, 5        <i>; yields: result=false</i>
+</pre>
+
+<p>Note that the code generator does not yet support vector types with
+   the <tt>icmp</tt> instruction.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a name="i_fcmp">'<tt>fcmp</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = fcmp &lt;cond&gt; &lt;ty&gt; &lt;op1&gt;, &lt;op2&gt;     <i>; yields {i1} or {&lt;N x i1&gt;}:result</i>
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>fcmp</tt>' instruction returns a boolean value or vector of boolean
+   values based on comparison of its operands.</p>
+
+<p>If the operands are floating point scalars, then the result type is a boolean
+(<a href="#t_integer"><tt>i1</tt></a>).</p>
+
+<p>If the operands are floating point vectors, then the result type is a vector
+   of boolean with the same number of elements as the operands being
+   compared.</p>
+
+<h5>Arguments:</h5>
+<p>The '<tt>fcmp</tt>' instruction takes three operands. The first operand is
+   the condition code indicating the kind of comparison to perform. It is not a
+   value, just a keyword. The possible condition code are:</p>
+
+<ol>
+  <li><tt>false</tt>: no comparison, always returns false</li>
+  <li><tt>oeq</tt>: ordered and equal</li>
+  <li><tt>ogt</tt>: ordered and greater than </li>
+  <li><tt>oge</tt>: ordered and greater than or equal</li>
+  <li><tt>olt</tt>: ordered and less than </li>
+  <li><tt>ole</tt>: ordered and less than or equal</li>
+  <li><tt>one</tt>: ordered and not equal</li>
+  <li><tt>ord</tt>: ordered (no nans)</li>
+  <li><tt>ueq</tt>: unordered or equal</li>
+  <li><tt>ugt</tt>: unordered or greater than </li>
+  <li><tt>uge</tt>: unordered or greater than or equal</li>
+  <li><tt>ult</tt>: unordered or less than </li>
+  <li><tt>ule</tt>: unordered or less than or equal</li>
+  <li><tt>une</tt>: unordered or not equal</li>
+  <li><tt>uno</tt>: unordered (either nans)</li>
+  <li><tt>true</tt>: no comparison, always returns true</li>
+</ol>
+
+<p><i>Ordered</i> means that neither operand is a QNAN while
+   <i>unordered</i> means that either operand may be a QNAN.</p>
+
+<p>Each of <tt>val1</tt> and <tt>val2</tt> arguments must be either
+   a <a href="#t_floating">floating point</a> type or
+   a <a href="#t_vector">vector</a> of floating point type.  They must have
+   identical types.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>fcmp</tt>' instruction compares <tt>op1</tt> and <tt>op2</tt>
+   according to the condition code given as <tt>cond</tt>.  If the operands are
+   vectors, then the vectors are compared element by element.  Each comparison
+   performed always yields an <a href="#t_integer">i1</a> result, as
+   follows:</p>
+
+<ol>
+  <li><tt>false</tt>: always yields <tt>false</tt>, regardless of operands.</li>
+
+  <li><tt>oeq</tt>: yields <tt>true</tt> if both operands are not a QNAN and
+      <tt>op1</tt> is equal to <tt>op2</tt>.</li>
+
+  <li><tt>ogt</tt>: yields <tt>true</tt> if both operands are not a QNAN and
+      <tt>op1</tt> is greater than <tt>op2</tt>.</li>
+
+  <li><tt>oge</tt>: yields <tt>true</tt> if both operands are not a QNAN and
+      <tt>op1</tt> is greater than or equal to <tt>op2</tt>.</li>
+
+  <li><tt>olt</tt>: yields <tt>true</tt> if both operands are not a QNAN and
+      <tt>op1</tt> is less than <tt>op2</tt>.</li>
+
+  <li><tt>ole</tt>: yields <tt>true</tt> if both operands are not a QNAN and
+      <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
+
+  <li><tt>one</tt>: yields <tt>true</tt> if both operands are not a QNAN and
+      <tt>op1</tt> is not equal to <tt>op2</tt>.</li>
+
+  <li><tt>ord</tt>: yields <tt>true</tt> if both operands are not a QNAN.</li>
+
+  <li><tt>ueq</tt>: yields <tt>true</tt> if either operand is a QNAN or
+      <tt>op1</tt> is equal to <tt>op2</tt>.</li>
+
+  <li><tt>ugt</tt>: yields <tt>true</tt> if either operand is a QNAN or
+      <tt>op1</tt> is greater than <tt>op2</tt>.</li>
+
+  <li><tt>uge</tt>: yields <tt>true</tt> if either operand is a QNAN or
+      <tt>op1</tt> is greater than or equal to <tt>op2</tt>.</li>
+
+  <li><tt>ult</tt>: yields <tt>true</tt> if either operand is a QNAN or
+      <tt>op1</tt> is less than <tt>op2</tt>.</li>
+
+  <li><tt>ule</tt>: yields <tt>true</tt> if either operand is a QNAN or
+      <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
+
+  <li><tt>une</tt>: yields <tt>true</tt> if either operand is a QNAN or
+      <tt>op1</tt> is not equal to <tt>op2</tt>.</li>
+
+  <li><tt>uno</tt>: yields <tt>true</tt> if either operand is a QNAN.</li>
+
+  <li><tt>true</tt>: always yields <tt>true</tt>, regardless of operands.</li>
+</ol>
+
+<h5>Example:</h5>
+<pre>
+  &lt;result&gt; = fcmp oeq float 4.0, 5.0    <i>; yields: result=false</i>
+  &lt;result&gt; = fcmp one float 4.0, 5.0    <i>; yields: result=true</i>
+  &lt;result&gt; = fcmp olt float 4.0, 5.0    <i>; yields: result=true</i>
+  &lt;result&gt; = fcmp ueq double 1.0, 2.0   <i>; yields: result=false</i>
+</pre>
+
+<p>Note that the code generator does not yet support vector types with
+   the <tt>fcmp</tt> instruction.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="i_phi">'<tt>phi</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = phi &lt;ty&gt; [ &lt;val0&gt;, &lt;label0&gt;], ...
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>phi</tt>' instruction is used to implement the &#966; node in the
+   SSA graph representing the function.</p>
+
+<h5>Arguments:</h5>
+<p>The type of the incoming values is specified with the first type field. After
+   this, the '<tt>phi</tt>' instruction takes a list of pairs as arguments, with
+   one pair for each predecessor basic block of the current block.  Only values
+   of <a href="#t_firstclass">first class</a> type may be used as the value
+   arguments to the PHI node.  Only labels may be used as the label
+   arguments.</p>
+
+<p>There must be no non-phi instructions between the start of a basic block and
+   the PHI instructions: i.e. PHI instructions must be first in a basic
+   block.</p>
+
+<p>For the purposes of the SSA form, the use of each incoming value is deemed to
+   occur on the edge from the corresponding predecessor block to the current
+   block (but after any definition of an '<tt>invoke</tt>' instruction's return
+   value on the same edge).</p>
+
+<h5>Semantics:</h5>
+<p>At runtime, the '<tt>phi</tt>' instruction logically takes on the value
+   specified by the pair corresponding to the predecessor basic block that
+   executed just prior to the current block.</p>
+
+<h5>Example:</h5>
+<pre>
+Loop:       ; Infinite loop that counts from 0 on up...
+  %indvar = phi i32 [ 0, %LoopHeader ], [ %nextindvar, %Loop ]
+  %nextindvar = add i32 %indvar, 1
+  br label %Loop
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+   <a name="i_select">'<tt>select</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = select <i>selty</i> &lt;cond&gt;, &lt;ty&gt; &lt;val1&gt;, &lt;ty&gt; &lt;val2&gt;             <i>; yields ty</i>
+
+  <i>selty</i> is either i1 or {&lt;N x i1&gt;}
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>select</tt>' instruction is used to choose one value based on a
+   condition, without branching.</p>
+
+
+<h5>Arguments:</h5>
+<p>The '<tt>select</tt>' instruction requires an 'i1' value or a vector of 'i1'
+   values indicating the condition, and two values of the
+   same <a href="#t_firstclass">first class</a> type.  If the val1/val2 are
+   vectors and the condition is a scalar, then entire vectors are selected, not
+   individual elements.</p>
+
+<h5>Semantics:</h5>
+<p>If the condition is an i1 and it evaluates to 1, the instruction returns the
+   first value argument; otherwise, it returns the second value argument.</p>
+
+<p>If the condition is a vector of i1, then the value arguments must be vectors
+   of the same size, and the selection is done element by element.</p>
+
+<h5>Example:</h5>
+<pre>
+  %X = select i1 true, i8 17, i8 42          <i>; yields i8:17</i>
+</pre>
+
+<p>Note that the code generator does not yet support conditions
+   with vector type.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="i_call">'<tt>call</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;result&gt; = [tail] call [<a href="#callingconv">cconv</a>] [<a href="#paramattrs">ret attrs</a>] &lt;ty&gt; [&lt;fnty&gt;*] &lt;fnptrval&gt;(&lt;function args&gt;) [<a href="#fnattrs">fn attrs</a>]
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>call</tt>' instruction represents a simple function call.</p>
+
+<h5>Arguments:</h5>
+<p>This instruction requires several arguments:</p>
+
+<ol>
+  <li>The optional "tail" marker indicates that the callee function does not
+      access any allocas or varargs in the caller.  Note that calls may be
+      marked "tail" even if they do not occur before
+      a <a href="#i_ret"><tt>ret</tt></a> instruction.  If the "tail" marker is
+      present, the function call is eligible for tail call optimization,
+      but <a href="CodeGenerator.html#tailcallopt">might not in fact be
+      optimized into a jump</a>.  The code generator may optimize calls marked
+      "tail" with either 1) automatic <a href="CodeGenerator.html#sibcallopt">
+      sibling call optimization</a> when the caller and callee have
+      matching signatures, or 2) forced tail call optimization when the
+      following extra requirements are met:
+      <ul>
+        <li>Caller and callee both have the calling
+            convention <tt>fastcc</tt>.</li>
+        <li>The call is in tail position (ret immediately follows call and ret
+            uses value of call or is void).</li>
+        <li>Option <tt>-tailcallopt</tt> is enabled,
+            or <code>llvm::GuaranteedTailCallOpt</code> is <code>true</code>.</li>
+        <li><a href="CodeGenerator.html#tailcallopt">Platform specific
+            constraints are met.</a></li>
+      </ul>
+  </li>
+
+  <li>The optional "cconv" marker indicates which <a href="#callingconv">calling
+      convention</a> the call should use.  If none is specified, the call
+      defaults to using C calling conventions.  The calling convention of the
+      call must match the calling convention of the target function, or else the
+      behavior is undefined.</li>
+
+  <li>The optional <a href="#paramattrs">Parameter Attributes</a> list for
+      return values. Only '<tt>zeroext</tt>', '<tt>signext</tt>', and
+      '<tt>inreg</tt>' attributes are valid here.</li>
+
+  <li>'<tt>ty</tt>': the type of the call instruction itself which is also the
+      type of the return value.  Functions that return no value are marked
+      <tt><a href="#t_void">void</a></tt>.</li>
+
+  <li>'<tt>fnty</tt>': shall be the signature of the pointer to function value
+      being invoked.  The argument types must match the types implied by this
+      signature.  This type can be omitted if the function is not varargs and if
+      the function type does not return a pointer to a function.</li>
+
+  <li>'<tt>fnptrval</tt>': An LLVM value containing a pointer to a function to
+      be invoked. In most cases, this is a direct function invocation, but
+      indirect <tt>call</tt>s are just as possible, calling an arbitrary pointer
+      to function value.</li>
+
+  <li>'<tt>function args</tt>': argument list whose types match the function
+      signature argument types and parameter attributes. All arguments must be
+      of <a href="#t_firstclass">first class</a> type. If the function
+      signature indicates the function accepts a variable number of arguments,
+      the extra arguments can be specified.</li>
+
+  <li>The optional <a href="#fnattrs">function attributes</a> list. Only
+      '<tt>noreturn</tt>', '<tt>nounwind</tt>', '<tt>readonly</tt>' and
+      '<tt>readnone</tt>' attributes are valid here.</li>
+</ol>
+
+<h5>Semantics:</h5>
+<p>The '<tt>call</tt>' instruction is used to cause control flow to transfer to
+   a specified function, with its incoming arguments bound to the specified
+   values. Upon a '<tt><a href="#i_ret">ret</a></tt>' instruction in the called
+   function, control flow continues with the instruction after the function
+   call, and the return value of the function is bound to the result
+   argument.</p>
+
+<h5>Example:</h5>
+<pre>
+  %retval = call i32 @test(i32 %argc)
+  call i32 (i8*, ...)* @printf(i8* %msg, i32 12, i8 42)        <i>; yields i32</i>
+  %X = tail call i32 @foo()                                    <i>; yields i32</i>
+  %Y = tail call <a href="#callingconv">fastcc</a> i32 @foo()  <i>; yields i32</i>
+  call void %foo(i8 97 signext)
+
+  %struct.A = type { i32, i8 }
+  %r = call %struct.A @foo()                        <i>; yields { 32, i8 }</i>
+  %gr = extractvalue %struct.A %r, 0                <i>; yields i32</i>
+  %gr1 = extractvalue %struct.A %r, 1               <i>; yields i8</i>
+  %Z = call void @foo() noreturn                    <i>; indicates that %foo never returns normally</i>
+  %ZZ = call zeroext i32 @bar()                     <i>; Return value is %zero extended</i>
+</pre>
+
+<p>llvm treats calls to some functions with names and arguments that match the
+standard C99 library as being the C99 library functions, and may perform
+optimizations or generate code for them under that assumption.  This is
+something we'd like to change in the future to provide better support for
+freestanding environments and non-C-based languages.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="i_va_arg">'<tt>va_arg</tt>' Instruction</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  &lt;resultval&gt; = va_arg &lt;va_list*&gt; &lt;arglist&gt;, &lt;argty&gt;
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>va_arg</tt>' instruction is used to access arguments passed through
+   the "variable argument" area of a function call.  It is used to implement the
+   <tt>va_arg</tt> macro in C.</p>
+
+<h5>Arguments:</h5>
+<p>This instruction takes a <tt>va_list*</tt> value and the type of the
+   argument. It returns a value of the specified argument type and increments
+   the <tt>va_list</tt> to point to the next argument.  The actual type
+   of <tt>va_list</tt> is target specific.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>va_arg</tt>' instruction loads an argument of the specified type
+   from the specified <tt>va_list</tt> and causes the <tt>va_list</tt> to point
+   to the next argument.  For more information, see the variable argument
+   handling <a href="#int_varargs">Intrinsic Functions</a>.</p>
+
+<p>It is legal for this instruction to be called in a function which does not
+   take a variable number of arguments, for example, the <tt>vfprintf</tt>
+   function.</p>
+
+<p><tt>va_arg</tt> is an LLVM instruction instead of
+   an <a href="#intrinsics">intrinsic function</a> because it takes a type as an
+   argument.</p>
+
+<h5>Example:</h5>
+<p>See the <a href="#int_varargs">variable argument processing</a> section.</p>
+
+<p>Note that the code generator does not yet fully support va_arg on many
+   targets. Also, it does not currently support va_arg with aggregate types on
+   any target.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"> <a name="intrinsics">Intrinsic Functions</a> </div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>LLVM supports the notion of an "intrinsic function".  These functions have
+   well known names and semantics and are required to follow certain
+   restrictions.  Overall, these intrinsics represent an extension mechanism for
+   the LLVM language that does not require changing all of the transformations
+   in LLVM when adding to the language (or the bitcode reader/writer, the
+   parser, etc...).</p>
+
+<p>Intrinsic function names must all start with an "<tt>llvm.</tt>" prefix. This
+   prefix is reserved in LLVM for intrinsic names; thus, function names may not
+   begin with this prefix.  Intrinsic functions must always be external
+   functions: you cannot define the body of intrinsic functions.  Intrinsic
+   functions may only be used in call or invoke instructions: it is illegal to
+   take the address of an intrinsic function.  Additionally, because intrinsic
+   functions are part of the LLVM language, it is required if any are added that
+   they be documented here.</p>
+
+<p>Some intrinsic functions can be overloaded, i.e., the intrinsic represents a
+   family of functions that perform the same operation but on different data
+   types. Because LLVM can represent over 8 million different integer types,
+   overloading is used commonly to allow an intrinsic function to operate on any
+   integer type. One or more of the argument types or the result type can be
+   overloaded to accept any integer type. Argument types may also be defined as
+   exactly matching a previous argument's type or the result type. This allows
+   an intrinsic function which accepts multiple arguments, but needs all of them
+   to be of the same type, to only be overloaded with respect to a single
+   argument or the result.</p>
+
+<p>Overloaded intrinsics will have the names of its overloaded argument types
+   encoded into its function name, each preceded by a period. Only those types
+   which are overloaded result in a name suffix. Arguments whose type is matched
+   against another type do not. For example, the <tt>llvm.ctpop</tt> function
+   can take an integer of any width and returns an integer of exactly the same
+   integer width. This leads to a family of functions such as
+   <tt>i8 @llvm.ctpop.i8(i8 %val)</tt> and <tt>i29 @llvm.ctpop.i29(i29
+   %val)</tt>.  Only one type, the return type, is overloaded, and only one type
+   suffix is required. Because the argument's type is matched against the return
+   type, it does not require its own name suffix.</p>
+
+<p>To learn how to add an intrinsic function, please see the
+   <a href="ExtendingLLVM.html">Extending LLVM Guide</a>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="int_varargs">Variable Argument Handling Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<p>Variable argument support is defined in LLVM with
+   the <a href="#i_va_arg"><tt>va_arg</tt></a> instruction and these three
+   intrinsic functions.  These functions are related to the similarly named
+   macros defined in the <tt>&lt;stdarg.h&gt;</tt> header file.</p>
+
+<p>All of these functions operate on arguments that use a target-specific value
+   type "<tt>va_list</tt>".  The LLVM assembly language reference manual does
+   not define what this type is, so all transformations should be prepared to
+   handle these functions regardless of the type used.</p>
+
+<p>This example shows how the <a href="#i_va_arg"><tt>va_arg</tt></a>
+   instruction and the variable argument handling intrinsic functions are
+   used.</p>
+
+<pre class="doc_code">
+define i32 @test(i32 %X, ...) {
+  ; Initialize variable argument processing
+  %ap = alloca i8*
+  %ap2 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap2)
+
+  ; Read a single integer argument
+  %tmp = va_arg i8** %ap, i32
+
+  ; Demonstrate usage of llvm.va_copy and llvm.va_end
+  %aq = alloca i8*
+  %aq2 = bitcast i8** %aq to i8*
+  call void @llvm.va_copy(i8* %aq2, i8* %ap2)
+  call void @llvm.va_end(i8* %aq2)
+
+  ; Stop processing of arguments.
+  call void @llvm.va_end(i8* %ap2)
+  ret i32 %tmp
+}
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+declare void @llvm.va_end(i8*)
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_va_start">'<tt>llvm.va_start</tt>' Intrinsic</a>
+</div>
+
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void %llvm.va_start(i8* &lt;arglist&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.va_start</tt>' intrinsic initializes <tt>*&lt;arglist&gt;</tt>
+   for subsequent use by <tt><a href="#i_va_arg">va_arg</a></tt>.</p>
+
+<h5>Arguments:</h5>
+<p>The argument is a pointer to a <tt>va_list</tt> element to initialize.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.va_start</tt>' intrinsic works just like the <tt>va_start</tt>
+   macro available in C.  In a target-dependent way, it initializes
+   the <tt>va_list</tt> element to which the argument points, so that the next
+   call to <tt>va_arg</tt> will produce the first variable argument passed to
+   the function.  Unlike the C <tt>va_start</tt> macro, this intrinsic does not
+   need to know the last argument of the function as the compiler can figure
+   that out.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_va_end">'<tt>llvm.va_end</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.va_end(i8* &lt;arglist&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.va_end</tt>' intrinsic destroys <tt>*&lt;arglist&gt;</tt>,
+   which has been initialized previously
+   with <tt><a href="#int_va_start">llvm.va_start</a></tt>
+   or <tt><a href="#i_va_copy">llvm.va_copy</a></tt>.</p>
+
+<h5>Arguments:</h5>
+<p>The argument is a pointer to a <tt>va_list</tt> to destroy.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.va_end</tt>' intrinsic works just like the <tt>va_end</tt>
+   macro available in C.  In a target-dependent way, it destroys
+   the <tt>va_list</tt> element to which the argument points.  Calls
+   to <a href="#int_va_start"><tt>llvm.va_start</tt></a>
+   and <a href="#int_va_copy"> <tt>llvm.va_copy</tt></a> must be matched exactly
+   with calls to <tt>llvm.va_end</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_va_copy">'<tt>llvm.va_copy</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.va_copy(i8* &lt;destarglist&gt;, i8* &lt;srcarglist&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.va_copy</tt>' intrinsic copies the current argument position
+   from the source argument list to the destination argument list.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument is a pointer to a <tt>va_list</tt> element to initialize.
+   The second argument is a pointer to a <tt>va_list</tt> element to copy
+   from.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.va_copy</tt>' intrinsic works just like the <tt>va_copy</tt>
+   macro available in C.  In a target-dependent way, it copies the
+   source <tt>va_list</tt> element into the destination <tt>va_list</tt>
+   element.  This intrinsic is necessary because
+   the <tt><a href="#int_va_start"> llvm.va_start</a></tt> intrinsic may be
+   arbitrarily complex and require, for example, memory allocation.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="int_gc">Accurate Garbage Collection Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM support for <a href="GarbageCollection.html">Accurate Garbage
+Collection</a> (GC) requires the implementation and generation of these
+intrinsics. These intrinsics allow identification of <a href="#int_gcroot">GC
+roots on the stack</a>, as well as garbage collector implementations that
+require <a href="#int_gcread">read</a> and <a href="#int_gcwrite">write</a>
+barriers.  Front-ends for type-safe garbage collected languages should generate
+these intrinsics to make use of the LLVM garbage collectors.  For more details,
+see <a href="GarbageCollection.html">Accurate Garbage Collection with
+LLVM</a>.</p>
+
+<p>The garbage collection intrinsics only operate on objects in the generic
+   address space (address space zero).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_gcroot">'<tt>llvm.gcroot</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.gcroot</tt>' intrinsic declares the existence of a GC root to
+   the code generator, and allows some metadata to be associated with it.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument specifies the address of a stack object that contains the
+   root pointer.  The second pointer (which must be either a constant or a
+   global value address) contains the meta-data to be associated with the
+   root.</p>
+
+<h5>Semantics:</h5>
+<p>At runtime, a call to this intrinsic stores a null pointer into the "ptrloc"
+   location.  At compile-time, the code generator generates information to allow
+   the runtime to find the pointer at GC safe points. The '<tt>llvm.gcroot</tt>'
+   intrinsic may only be used in a function which <a href="#gc">specifies a GC
+   algorithm</a>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_gcread">'<tt>llvm.gcread</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare i8* @llvm.gcread(i8* %ObjPtr, i8** %Ptr)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.gcread</tt>' intrinsic identifies reads of references from heap
+   locations, allowing garbage collector implementations that require read
+   barriers.</p>
+
+<h5>Arguments:</h5>
+<p>The second argument is the address to read from, which should be an address
+   allocated from the garbage collector.  The first object is a pointer to the
+   start of the referenced object, if needed by the language runtime (otherwise
+   null).</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.gcread</tt>' intrinsic has the same semantics as a load
+   instruction, but may be replaced with substantially more complex code by the
+   garbage collector runtime, as needed. The '<tt>llvm.gcread</tt>' intrinsic
+   may only be used in a function which <a href="#gc">specifies a GC
+   algorithm</a>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_gcwrite">'<tt>llvm.gcwrite</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.gcwrite(i8* %P1, i8* %Obj, i8** %P2)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.gcwrite</tt>' intrinsic identifies writes of references to heap
+   locations, allowing garbage collector implementations that require write
+   barriers (such as generational or reference counting collectors).</p>
+
+<h5>Arguments:</h5>
+<p>The first argument is the reference to store, the second is the start of the
+   object to store it to, and the third is the address of the field of Obj to
+   store to.  If the runtime does not require a pointer to the object, Obj may
+   be null.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.gcwrite</tt>' intrinsic has the same semantics as a store
+   instruction, but may be replaced with substantially more complex code by the
+   garbage collector runtime, as needed. The '<tt>llvm.gcwrite</tt>' intrinsic
+   may only be used in a function which <a href="#gc">specifies a GC
+   algorithm</a>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="int_codegen">Code Generator Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<p>These intrinsics are provided by LLVM to expose special features that may
+   only be implemented with code generator support.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_returnaddress">'<tt>llvm.returnaddress</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare i8  *@llvm.returnaddress(i32 &lt;level&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.returnaddress</tt>' intrinsic attempts to compute a
+   target-specific value indicating the return address of the current function
+   or one of its callers.</p>
+
+<h5>Arguments:</h5>
+<p>The argument to this intrinsic indicates which function to return the address
+   for.  Zero indicates the calling function, one indicates its caller, etc.
+   The argument is <b>required</b> to be a constant integer value.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.returnaddress</tt>' intrinsic either returns a pointer
+   indicating the return address of the specified call frame, or zero if it
+   cannot be identified.  The value returned by this intrinsic is likely to be
+   incorrect or 0 for arguments other than zero, so it should only be used for
+   debugging purposes.</p>
+
+<p>Note that calling this intrinsic does not prevent function inlining or other
+   aggressive transformations, so the value returned may not be that of the
+   obvious source-language caller.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_frameaddress">'<tt>llvm.frameaddress</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare i8* @llvm.frameaddress(i32 &lt;level&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.frameaddress</tt>' intrinsic attempts to return the
+   target-specific frame pointer value for the specified stack frame.</p>
+
+<h5>Arguments:</h5>
+<p>The argument to this intrinsic indicates which function to return the frame
+   pointer for.  Zero indicates the calling function, one indicates its caller,
+   etc.  The argument is <b>required</b> to be a constant integer value.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.frameaddress</tt>' intrinsic either returns a pointer
+   indicating the frame address of the specified call frame, or zero if it
+   cannot be identified.  The value returned by this intrinsic is likely to be
+   incorrect or 0 for arguments other than zero, so it should only be used for
+   debugging purposes.</p>
+
+<p>Note that calling this intrinsic does not prevent function inlining or other
+   aggressive transformations, so the value returned may not be that of the
+   obvious source-language caller.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_stacksave">'<tt>llvm.stacksave</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare i8* @llvm.stacksave()
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.stacksave</tt>' intrinsic is used to remember the current state
+   of the function stack, for use
+   with <a href="#int_stackrestore"> <tt>llvm.stackrestore</tt></a>.  This is
+   useful for implementing language features like scoped automatic variable
+   sized arrays in C99.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic returns a opaque pointer value that can be passed
+   to <a href="#int_stackrestore"><tt>llvm.stackrestore</tt></a>.  When
+   an <tt>llvm.stackrestore</tt> intrinsic is executed with a value saved
+   from <tt>llvm.stacksave</tt>, it effectively restores the state of the stack
+   to the state it was in when the <tt>llvm.stacksave</tt> intrinsic executed.
+   In practice, this pops any <a href="#i_alloca">alloca</a> blocks from the
+   stack that were allocated after the <tt>llvm.stacksave</tt> was executed.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_stackrestore">'<tt>llvm.stackrestore</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.stackrestore(i8* %ptr)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.stackrestore</tt>' intrinsic is used to restore the state of
+   the function stack to the state it was in when the
+   corresponding <a href="#int_stacksave"><tt>llvm.stacksave</tt></a> intrinsic
+   executed.  This is useful for implementing language features like scoped
+   automatic variable sized arrays in C99.</p>
+
+<h5>Semantics:</h5>
+<p>See the description
+   for <a href="#int_stacksave"><tt>llvm.stacksave</tt></a>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_prefetch">'<tt>llvm.prefetch</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.prefetch(i8* &lt;address&gt;, i32 &lt;rw&gt;, i32 &lt;locality&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.prefetch</tt>' intrinsic is a hint to the code generator to
+   insert a prefetch instruction if supported; otherwise, it is a noop.
+   Prefetches have no effect on the behavior of the program but can change its
+   performance characteristics.</p>
+
+<h5>Arguments:</h5>
+<p><tt>address</tt> is the address to be prefetched, <tt>rw</tt> is the
+   specifier determining if the fetch should be for a read (0) or write (1),
+   and <tt>locality</tt> is a temporal locality specifier ranging from (0) - no
+   locality, to (3) - extremely local keep in cache.  The <tt>rw</tt>
+   and <tt>locality</tt> arguments must be constant integers.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic does not modify the behavior of the program.  In particular,
+   prefetches cannot trap and do not produce a value.  On targets that support
+   this intrinsic, the prefetch can provide hints to the processor cache for
+   better performance.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_pcmarker">'<tt>llvm.pcmarker</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.pcmarker(i32 &lt;id&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.pcmarker</tt>' intrinsic is a method to export a Program
+   Counter (PC) in a region of code to simulators and other tools.  The method
+   is target specific, but it is expected that the marker will use exported
+   symbols to transmit the PC of the marker.  The marker makes no guarantees
+   that it will remain with any specific instruction after optimizations.  It is
+   possible that the presence of a marker will inhibit optimizations.  The
+   intended use is to be inserted after optimizations to allow correlations of
+   simulation runs.</p>
+
+<h5>Arguments:</h5>
+<p><tt>id</tt> is a numerical id identifying the marker.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic does not modify the behavior of the program.  Backends that do
+   not support this intrinsic may ignore it.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_readcyclecounter">'<tt>llvm.readcyclecounter</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare i64 @llvm.readcyclecounter()
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.readcyclecounter</tt>' intrinsic provides access to the cycle
+   counter register (or similar low latency, high accuracy clocks) on those
+   targets that support it.  On X86, it should map to RDTSC.  On Alpha, it
+   should map to RPCC.  As the backing counters overflow quickly (on the order
+   of 9 seconds on alpha), this should only be used for small timings.</p>
+
+<h5>Semantics:</h5>
+<p>When directly supported, reading the cycle counter should not modify any
+   memory.  Implementations are allowed to either return a application specific
+   value or a system wide value.  On backends without support, this is lowered
+   to a constant 0.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="int_libc">Standard C Library Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM provides intrinsics for a few important standard C library functions.
+   These intrinsics allow source-language front-ends to pass information about
+   the alignment of the pointer arguments to the code generator, providing
+   opportunity for more efficient code generation.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_memcpy">'<tt>llvm.memcpy</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.memcpy</tt> on any
+   integer bit width and for different address spaces. Not all targets support
+   all bit widths however.</p>
+
+<pre>
+  declare void @llvm.memcpy.p0i8.p0i8.i32(i8* &lt;dest&gt;, i8* &lt;src&gt;,
+                                          i32 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
+  declare void @llvm.memcpy.p0i8.p0i8.i64(i8* &lt;dest&gt;, i8* &lt;src&gt;,
+                                          i64 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.memcpy.*</tt>' intrinsics copy a block of memory from the
+   source location to the destination location.</p>
+
+<p>Note that, unlike the standard libc function, the <tt>llvm.memcpy.*</tt>
+   intrinsics do not return a value, takes extra alignment/isvolatile arguments
+   and the pointers can be in specified address spaces.</p>
+
+<h5>Arguments:</h5>
+
+<p>The first argument is a pointer to the destination, the second is a pointer
+   to the source.  The third argument is an integer argument specifying the
+   number of bytes to copy, the fourth argument is the alignment of the
+   source and destination locations, and the fifth is a boolean indicating a
+   volatile access.</p>
+
+<p>If the call to this intrinsic has an alignment value that is not 0 or 1,
+   then the caller guarantees that both the source and destination pointers are
+   aligned to that boundary.</p>
+
+<p>If the <tt>isvolatile</tt> parameter is <tt>true</tt>, the
+   <tt>llvm.memcpy</tt> call is a <a href="#volatile">volatile operation</a>.
+   The detailed access behavior is not very cleanly specified and it is unwise
+   to depend on it.</p>
+
+<h5>Semantics:</h5>
+
+<p>The '<tt>llvm.memcpy.*</tt>' intrinsics copy a block of memory from the
+   source location to the destination location, which are not allowed to
+   overlap.  It copies "len" bytes of memory over.  If the argument is known to
+   be aligned to some boundary, this can be specified as the fourth argument,
+   otherwise it should be set to 0 or 1.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_memmove">'<tt>llvm.memmove</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use llvm.memmove on any integer bit
+   width and for different address space. Not all targets support all bit
+   widths however.</p>
+
+<pre>
+  declare void @llvm.memmove.p0i8.p0i8.i32(i8* &lt;dest&gt;, i8* &lt;src&gt;,
+                                           i32 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
+  declare void @llvm.memmove.p0i8.p0i8.i64(i8* &lt;dest&gt;, i8* &lt;src&gt;,
+                                           i64 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.memmove.*</tt>' intrinsics move a block of memory from the
+   source location to the destination location. It is similar to the
+   '<tt>llvm.memcpy</tt>' intrinsic but allows the two memory locations to
+   overlap.</p>
+
+<p>Note that, unlike the standard libc function, the <tt>llvm.memmove.*</tt>
+   intrinsics do not return a value, takes extra alignment/isvolatile arguments
+   and the pointers can be in specified address spaces.</p>
+
+<h5>Arguments:</h5>
+
+<p>The first argument is a pointer to the destination, the second is a pointer
+   to the source.  The third argument is an integer argument specifying the
+   number of bytes to copy, the fourth argument is the alignment of the
+   source and destination locations, and the fifth is a boolean indicating a
+   volatile access.</p>
+
+<p>If the call to this intrinsic has an alignment value that is not 0 or 1,
+   then the caller guarantees that the source and destination pointers are
+   aligned to that boundary.</p>
+
+<p>If the <tt>isvolatile</tt> parameter is <tt>true</tt>, the
+   <tt>llvm.memmove</tt> call is a <a href="#volatile">volatile operation</a>.
+   The detailed access behavior is not very cleanly specified and it is unwise
+   to depend on it.</p>
+
+<h5>Semantics:</h5>
+
+<p>The '<tt>llvm.memmove.*</tt>' intrinsics copy a block of memory from the
+   source location to the destination location, which may overlap.  It copies
+   "len" bytes of memory over.  If the argument is known to be aligned to some
+   boundary, this can be specified as the fourth argument, otherwise it should
+   be set to 0 or 1.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_memset">'<tt>llvm.memset.*</tt>' Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use llvm.memset on any integer bit
+   width and for different address spaces. However, not all targets support all
+   bit widths.</p>
+
+<pre>
+  declare void @llvm.memset.p0i8.i32(i8* &lt;dest&gt;, i8 &lt;val&gt;,
+                                     i32 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
+  declare void @llvm.memset.p0i8.i64(i8* &lt;dest&gt;, i8 &lt;val&gt;,
+                                     i64 &lt;len&gt;, i32 &lt;align&gt;, i1 &lt;isvolatile&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.memset.*</tt>' intrinsics fill a block of memory with a
+   particular byte value.</p>
+
+<p>Note that, unlike the standard libc function, the <tt>llvm.memset</tt>
+   intrinsic does not return a value and takes extra alignment/volatile
+   arguments.  Also, the destination can be in an arbitrary address space.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument is a pointer to the destination to fill, the second is the
+   byte value with which to fill it, the third argument is an integer argument
+   specifying the number of bytes to fill, and the fourth argument is the known
+   alignment of the destination location.</p>
+
+<p>If the call to this intrinsic has an alignment value that is not 0 or 1,
+   then the caller guarantees that the destination pointer is aligned to that
+   boundary.</p>
+
+<p>If the <tt>isvolatile</tt> parameter is <tt>true</tt>, the
+   <tt>llvm.memset</tt> call is a <a href="#volatile">volatile operation</a>.
+   The detailed access behavior is not very cleanly specified and it is unwise
+   to depend on it.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.memset.*</tt>' intrinsics fill "len" bytes of memory starting
+   at the destination location.  If the argument is known to be aligned to some
+   boundary, this can be specified as the fourth argument, otherwise it should
+   be set to 0 or 1.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_sqrt">'<tt>llvm.sqrt.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.sqrt</tt> on any
+   floating point or vector of floating point type. Not all targets support all
+   types however.</p>
+
+<pre>
+  declare float     @llvm.sqrt.f32(float %Val)
+  declare double    @llvm.sqrt.f64(double %Val)
+  declare x86_fp80  @llvm.sqrt.f80(x86_fp80 %Val)
+  declare fp128     @llvm.sqrt.f128(fp128 %Val)
+  declare ppc_fp128 @llvm.sqrt.ppcf128(ppc_fp128 %Val)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.sqrt</tt>' intrinsics return the sqrt of the specified operand,
+   returning the same value as the libm '<tt>sqrt</tt>' functions would.
+   Unlike <tt>sqrt</tt> in libm, however, <tt>llvm.sqrt</tt> has undefined
+   behavior for negative numbers other than -0.0 (which allows for better
+   optimization, because there is no need to worry about errno being
+   set).  <tt>llvm.sqrt(-0.0)</tt> is defined to return -0.0 like IEEE sqrt.</p>
+
+<h5>Arguments:</h5>
+<p>The argument and return value are floating point numbers of the same
+   type.</p>
+
+<h5>Semantics:</h5>
+<p>This function returns the sqrt of the specified operand if it is a
+   nonnegative floating point number.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_powi">'<tt>llvm.powi.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.powi</tt> on any
+   floating point or vector of floating point type. Not all targets support all
+   types however.</p>
+
+<pre>
+  declare float     @llvm.powi.f32(float  %Val, i32 %power)
+  declare double    @llvm.powi.f64(double %Val, i32 %power)
+  declare x86_fp80  @llvm.powi.f80(x86_fp80  %Val, i32 %power)
+  declare fp128     @llvm.powi.f128(fp128 %Val, i32 %power)
+  declare ppc_fp128 @llvm.powi.ppcf128(ppc_fp128  %Val, i32 %power)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.powi.*</tt>' intrinsics return the first operand raised to the
+   specified (positive or negative) power.  The order of evaluation of
+   multiplications is not defined.  When a vector of floating point type is
+   used, the second argument remains a scalar integer value.</p>
+
+<h5>Arguments:</h5>
+<p>The second argument is an integer power, and the first is a value to raise to
+   that power.</p>
+
+<h5>Semantics:</h5>
+<p>This function returns the first value raised to the second power with an
+   unspecified sequence of rounding operations.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_sin">'<tt>llvm.sin.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.sin</tt> on any
+   floating point or vector of floating point type. Not all targets support all
+   types however.</p>
+
+<pre>
+  declare float     @llvm.sin.f32(float  %Val)
+  declare double    @llvm.sin.f64(double %Val)
+  declare x86_fp80  @llvm.sin.f80(x86_fp80  %Val)
+  declare fp128     @llvm.sin.f128(fp128 %Val)
+  declare ppc_fp128 @llvm.sin.ppcf128(ppc_fp128  %Val)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.sin.*</tt>' intrinsics return the sine of the operand.</p>
+
+<h5>Arguments:</h5>
+<p>The argument and return value are floating point numbers of the same
+   type.</p>
+
+<h5>Semantics:</h5>
+<p>This function returns the sine of the specified operand, returning the same
+   values as the libm <tt>sin</tt> functions would, and handles error conditions
+   in the same way.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_cos">'<tt>llvm.cos.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.cos</tt> on any
+   floating point or vector of floating point type. Not all targets support all
+   types however.</p>
+
+<pre>
+  declare float     @llvm.cos.f32(float  %Val)
+  declare double    @llvm.cos.f64(double %Val)
+  declare x86_fp80  @llvm.cos.f80(x86_fp80  %Val)
+  declare fp128     @llvm.cos.f128(fp128 %Val)
+  declare ppc_fp128 @llvm.cos.ppcf128(ppc_fp128  %Val)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.cos.*</tt>' intrinsics return the cosine of the operand.</p>
+
+<h5>Arguments:</h5>
+<p>The argument and return value are floating point numbers of the same
+   type.</p>
+
+<h5>Semantics:</h5>
+<p>This function returns the cosine of the specified operand, returning the same
+   values as the libm <tt>cos</tt> functions would, and handles error conditions
+   in the same way.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_pow">'<tt>llvm.pow.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.pow</tt> on any
+   floating point or vector of floating point type. Not all targets support all
+   types however.</p>
+
+<pre>
+  declare float     @llvm.pow.f32(float  %Val, float %Power)
+  declare double    @llvm.pow.f64(double %Val, double %Power)
+  declare x86_fp80  @llvm.pow.f80(x86_fp80  %Val, x86_fp80 %Power)
+  declare fp128     @llvm.pow.f128(fp128 %Val, fp128 %Power)
+  declare ppc_fp128 @llvm.pow.ppcf128(ppc_fp128  %Val, ppc_fp128 Power)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.pow.*</tt>' intrinsics return the first operand raised to the
+   specified (positive or negative) power.</p>
+
+<h5>Arguments:</h5>
+<p>The second argument is a floating point power, and the first is a value to
+   raise to that power.</p>
+
+<h5>Semantics:</h5>
+<p>This function returns the first value raised to the second power, returning
+   the same values as the libm <tt>pow</tt> functions would, and handles error
+   conditions in the same way.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="int_manip">Bit Manipulation Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM provides intrinsics for a few important bit manipulation operations.
+   These allow efficient code generation for some algorithms.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_bswap">'<tt>llvm.bswap.*</tt>' Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic function. You can use bswap on any integer
+   type that is an even number of bytes (i.e. BitWidth % 16 == 0).</p>
+
+<pre>
+  declare i16 @llvm.bswap.i16(i16 &lt;id&gt;)
+  declare i32 @llvm.bswap.i32(i32 &lt;id&gt;)
+  declare i64 @llvm.bswap.i64(i64 &lt;id&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.bswap</tt>' family of intrinsics is used to byte swap integer
+   values with an even number of bytes (positive multiple of 16 bits).  These
+   are useful for performing operations on data that is not in the target's
+   native byte order.</p>
+
+<h5>Semantics:</h5>
+<p>The <tt>llvm.bswap.i16</tt> intrinsic returns an i16 value that has the high
+   and low byte of the input i16 swapped.  Similarly,
+   the <tt>llvm.bswap.i32</tt> intrinsic returns an i32 value that has the four
+   bytes of the input i32 swapped, so that if the input bytes are numbered 0, 1,
+   2, 3 then the returned i32 will have its bytes in 3, 2, 1, 0 order.
+   The <tt>llvm.bswap.i48</tt>, <tt>llvm.bswap.i64</tt> and other intrinsics
+   extend this concept to additional even-byte lengths (6 bytes, 8 bytes and
+   more, respectively).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_ctpop">'<tt>llvm.ctpop.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use llvm.ctpop on any integer bit
+   width. Not all targets support all bit widths however.</p>
+
+<pre>
+  declare i8 @llvm.ctpop.i8(i8  &lt;src&gt;)
+  declare i16 @llvm.ctpop.i16(i16 &lt;src&gt;)
+  declare i32 @llvm.ctpop.i32(i32 &lt;src&gt;)
+  declare i64 @llvm.ctpop.i64(i64 &lt;src&gt;)
+  declare i256 @llvm.ctpop.i256(i256 &lt;src&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.ctpop</tt>' family of intrinsics counts the number of bits set
+   in a value.</p>
+
+<h5>Arguments:</h5>
+<p>The only argument is the value to be counted.  The argument may be of any
+   integer type.  The return type must match the argument type.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.ctpop</tt>' intrinsic counts the 1's in a variable.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_ctlz">'<tt>llvm.ctlz.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.ctlz</tt> on any
+   integer bit width. Not all targets support all bit widths however.</p>
+
+<pre>
+  declare i8 @llvm.ctlz.i8 (i8  &lt;src&gt;)
+  declare i16 @llvm.ctlz.i16(i16 &lt;src&gt;)
+  declare i32 @llvm.ctlz.i32(i32 &lt;src&gt;)
+  declare i64 @llvm.ctlz.i64(i64 &lt;src&gt;)
+  declare i256 @llvm.ctlz.i256(i256 &lt;src&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.ctlz</tt>' family of intrinsic functions counts the number of
+   leading zeros in a variable.</p>
+
+<h5>Arguments:</h5>
+<p>The only argument is the value to be counted.  The argument may be of any
+   integer type. The return type must match the argument type.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.ctlz</tt>' intrinsic counts the leading (most significant)
+   zeros in a variable.  If the src == 0 then the result is the size in bits of
+   the type of src. For example, <tt>llvm.ctlz(i32 2) = 30</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_cttz">'<tt>llvm.cttz.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.cttz</tt> on any
+   integer bit width. Not all targets support all bit widths however.</p>
+
+<pre>
+  declare i8 @llvm.cttz.i8 (i8  &lt;src&gt;)
+  declare i16 @llvm.cttz.i16(i16 &lt;src&gt;)
+  declare i32 @llvm.cttz.i32(i32 &lt;src&gt;)
+  declare i64 @llvm.cttz.i64(i64 &lt;src&gt;)
+  declare i256 @llvm.cttz.i256(i256 &lt;src&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.cttz</tt>' family of intrinsic functions counts the number of
+   trailing zeros.</p>
+
+<h5>Arguments:</h5>
+<p>The only argument is the value to be counted.  The argument may be of any
+   integer type.  The return type must match the argument type.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.cttz</tt>' intrinsic counts the trailing (least significant)
+   zeros in a variable.  If the src == 0 then the result is the size in bits of
+   the type of src.  For example, <tt>llvm.cttz(2) = 1</tt>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="int_overflow">Arithmetic with Overflow Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM provides intrinsics for some arithmetic with overflow operations.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_sadd_overflow">'<tt>llvm.sadd.with.overflow.*</tt>' Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.sadd.with.overflow</tt>
+   on any integer bit width.</p>
+
+<pre>
+  declare {i16, i1} @llvm.sadd.with.overflow.i16(i16 %a, i16 %b)
+  declare {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+  declare {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.sadd.with.overflow</tt>' family of intrinsic functions perform
+   a signed addition of the two arguments, and indicate whether an overflow
+   occurred during the signed summation.</p>
+
+<h5>Arguments:</h5>
+<p>The arguments (%a and %b) and the first element of the result structure may
+   be of integer types of any bit width, but they must have the same bit
+   width. The second element of the result structure must be of
+   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
+   undergo signed addition.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.sadd.with.overflow</tt>' family of intrinsic functions perform
+   a signed addition of the two variables. They return a structure &mdash; the
+   first element of which is the signed summation, and the second element of
+   which is a bit specifying if the signed summation resulted in an
+   overflow.</p>
+
+<h5>Examples:</h5>
+<pre>
+  %res = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+  %sum = extractvalue {i32, i1} %res, 0
+  %obit = extractvalue {i32, i1} %res, 1
+  br i1 %obit, label %overflow, label %normal
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_uadd_overflow">'<tt>llvm.uadd.with.overflow.*</tt>' Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.uadd.with.overflow</tt>
+   on any integer bit width.</p>
+
+<pre>
+  declare {i16, i1} @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
+  declare {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  declare {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.uadd.with.overflow</tt>' family of intrinsic functions perform
+   an unsigned addition of the two arguments, and indicate whether a carry
+   occurred during the unsigned summation.</p>
+
+<h5>Arguments:</h5>
+<p>The arguments (%a and %b) and the first element of the result structure may
+   be of integer types of any bit width, but they must have the same bit
+   width. The second element of the result structure must be of
+   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
+   undergo unsigned addition.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.uadd.with.overflow</tt>' family of intrinsic functions perform
+   an unsigned addition of the two arguments. They return a structure &mdash;
+   the first element of which is the sum, and the second element of which is a
+   bit specifying if the unsigned summation resulted in a carry.</p>
+
+<h5>Examples:</h5>
+<pre>
+  %res = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  %sum = extractvalue {i32, i1} %res, 0
+  %obit = extractvalue {i32, i1} %res, 1
+  br i1 %obit, label %carry, label %normal
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_ssub_overflow">'<tt>llvm.ssub.with.overflow.*</tt>' Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.ssub.with.overflow</tt>
+   on any integer bit width.</p>
+
+<pre>
+  declare {i16, i1} @llvm.ssub.with.overflow.i16(i16 %a, i16 %b)
+  declare {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
+  declare {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.ssub.with.overflow</tt>' family of intrinsic functions perform
+   a signed subtraction of the two arguments, and indicate whether an overflow
+   occurred during the signed subtraction.</p>
+
+<h5>Arguments:</h5>
+<p>The arguments (%a and %b) and the first element of the result structure may
+   be of integer types of any bit width, but they must have the same bit
+   width. The second element of the result structure must be of
+   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
+   undergo signed subtraction.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.ssub.with.overflow</tt>' family of intrinsic functions perform
+   a signed subtraction of the two arguments. They return a structure &mdash;
+   the first element of which is the subtraction, and the second element of
+   which is a bit specifying if the signed subtraction resulted in an
+   overflow.</p>
+
+<h5>Examples:</h5>
+<pre>
+  %res = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
+  %sum = extractvalue {i32, i1} %res, 0
+  %obit = extractvalue {i32, i1} %res, 1
+  br i1 %obit, label %overflow, label %normal
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_usub_overflow">'<tt>llvm.usub.with.overflow.*</tt>' Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.usub.with.overflow</tt>
+   on any integer bit width.</p>
+
+<pre>
+  declare {i16, i1} @llvm.usub.with.overflow.i16(i16 %a, i16 %b)
+  declare {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+  declare {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.usub.with.overflow</tt>' family of intrinsic functions perform
+   an unsigned subtraction of the two arguments, and indicate whether an
+   overflow occurred during the unsigned subtraction.</p>
+
+<h5>Arguments:</h5>
+<p>The arguments (%a and %b) and the first element of the result structure may
+   be of integer types of any bit width, but they must have the same bit
+   width. The second element of the result structure must be of
+   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
+   undergo unsigned subtraction.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.usub.with.overflow</tt>' family of intrinsic functions perform
+   an unsigned subtraction of the two arguments. They return a structure &mdash;
+   the first element of which is the subtraction, and the second element of
+   which is a bit specifying if the unsigned subtraction resulted in an
+   overflow.</p>
+
+<h5>Examples:</h5>
+<pre>
+  %res = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+  %sum = extractvalue {i32, i1} %res, 0
+  %obit = extractvalue {i32, i1} %res, 1
+  br i1 %obit, label %overflow, label %normal
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_smul_overflow">'<tt>llvm.smul.with.overflow.*</tt>' Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.smul.with.overflow</tt>
+   on any integer bit width.</p>
+
+<pre>
+  declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
+  declare {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+  declare {i64, i1} @llvm.smul.with.overflow.i64(i64 %a, i64 %b)
+</pre>
+
+<h5>Overview:</h5>
+
+<p>The '<tt>llvm.smul.with.overflow</tt>' family of intrinsic functions perform
+   a signed multiplication of the two arguments, and indicate whether an
+   overflow occurred during the signed multiplication.</p>
+
+<h5>Arguments:</h5>
+<p>The arguments (%a and %b) and the first element of the result structure may
+   be of integer types of any bit width, but they must have the same bit
+   width. The second element of the result structure must be of
+   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
+   undergo signed multiplication.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.smul.with.overflow</tt>' family of intrinsic functions perform
+   a signed multiplication of the two arguments. They return a structure &mdash;
+   the first element of which is the multiplication, and the second element of
+   which is a bit specifying if the signed multiplication resulted in an
+   overflow.</p>
+
+<h5>Examples:</h5>
+<pre>
+  %res = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+  %sum = extractvalue {i32, i1} %res, 0
+  %obit = extractvalue {i32, i1} %res, 1
+  br i1 %obit, label %overflow, label %normal
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_umul_overflow">'<tt>llvm.umul.with.overflow.*</tt>' Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.umul.with.overflow</tt>
+   on any integer bit width.</p>
+
+<pre>
+  declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
+  declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+  declare {i64, i1} @llvm.umul.with.overflow.i64(i64 %a, i64 %b)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.umul.with.overflow</tt>' family of intrinsic functions perform
+   a unsigned multiplication of the two arguments, and indicate whether an
+   overflow occurred during the unsigned multiplication.</p>
+
+<h5>Arguments:</h5>
+<p>The arguments (%a and %b) and the first element of the result structure may
+   be of integer types of any bit width, but they must have the same bit
+   width. The second element of the result structure must be of
+   type <tt>i1</tt>. <tt>%a</tt> and <tt>%b</tt> are the two values that will
+   undergo unsigned multiplication.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.umul.with.overflow</tt>' family of intrinsic functions perform
+   an unsigned multiplication of the two arguments. They return a structure
+   &mdash; the first element of which is the multiplication, and the second
+   element of which is a bit specifying if the unsigned multiplication resulted
+   in an overflow.</p>
+
+<h5>Examples:</h5>
+<pre>
+  %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+  %sum = extractvalue {i32, i1} %res, 0
+  %obit = extractvalue {i32, i1} %res, 1
+  br i1 %obit, label %overflow, label %normal
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="int_fp16">Half Precision Floating Point Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<p>Half precision floating point is a storage-only format. This means that it is
+   a dense encoding (in memory) but does not support computation in the
+   format.</p>
+   
+<p>This means that code must first load the half-precision floating point
+   value as an i16, then convert it to float with <a
+   href="#int_convert_from_fp16"><tt>llvm.convert.from.fp16</tt></a>.
+   Computation can then be performed on the float value (including extending to
+   double etc).  To store the value back to memory, it is first converted to
+   float if needed, then converted to i16 with
+   <a href="#int_convert_to_fp16"><tt>llvm.convert.to.fp16</tt></a>, then
+   storing as an i16 value.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_convert_to_fp16">'<tt>llvm.convert.to.fp16</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare i16 @llvm.convert.to.fp16(f32 %a)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.convert.to.fp16</tt>' intrinsic function performs
+   a conversion from single precision floating point format to half precision
+   floating point format.</p>
+
+<h5>Arguments:</h5>
+<p>The intrinsic function contains single argument - the value to be
+   converted.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.convert.to.fp16</tt>' intrinsic function performs
+   a conversion from single precision floating point format to half precision
+   floating point format. The return value is an <tt>i16</tt> which
+   contains the converted number.</p>
+
+<h5>Examples:</h5>
+<pre>
+  %res = call i16 @llvm.convert.to.fp16(f32 %a)
+  store i16 %res, i16* @x, align 2
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+ <a name="int_convert_from_fp16">'<tt>llvm.convert.from.fp16</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare f32 @llvm.convert.from.fp16(i16 %a)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.convert.from.fp16</tt>' intrinsic function performs
+   a conversion from half precision floating point format to single precision
+   floating point format.</p>
+
+<h5>Arguments:</h5>
+<p>The intrinsic function contains single argument - the value to be
+   converted.</p>
+
+<h5>Semantics:</h5>
+<p>The '<tt>llvm.convert.from.fp16</tt>' intrinsic function performs a
+   conversion from half single precision floating point format to single
+   precision floating point format. The input half-float value is represented by
+   an <tt>i16</tt> value.</p>
+
+<h5>Examples:</h5>
+<pre>
+  %a = load i16* @x, align 2
+  %res = call f32 @llvm.convert.from.fp16(i16 %a)
+</pre>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="int_debugger">Debugger Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<p>The LLVM debugger intrinsics (which all start with <tt>llvm.dbg.</tt>
+   prefix), are described in
+   the <a href="SourceLevelDebugging.html#format_common_intrinsics">LLVM Source
+   Level Debugging</a> document.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="int_eh">Exception Handling Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<p>The LLVM exception handling intrinsics (which all start with
+   <tt>llvm.eh.</tt> prefix), are described in
+   the <a href="ExceptionHandling.html#format_common_intrinsics">LLVM Exception
+   Handling</a> document.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="int_trampoline">Trampoline Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<p>This intrinsic makes it possible to excise one parameter, marked with
+   the <a href="#nest"><tt>nest</tt></a> attribute, from a function.
+   The result is a callable
+   function pointer lacking the nest parameter - the caller does not need to
+   provide a value for it.  Instead, the value to use is stored in advance in a
+   "trampoline", a block of memory usually allocated on the stack, which also
+   contains code to splice the nest value into the argument list.  This is used
+   to implement the GCC nested function address extension.</p>
+
+<p>For example, if the function is
+   <tt>i32 f(i8* nest %c, i32 %x, i32 %y)</tt> then the resulting function
+   pointer has signature <tt>i32 (i32, i32)*</tt>.  It can be created as
+   follows:</p>
+
+<pre class="doc_code">
+  %tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86
+  %tramp1 = getelementptr [10 x i8]* %tramp, i32 0, i32 0
+  %p = call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8* nest , i32, i32)* @f to i8*), i8* %nval)
+  %fp = bitcast i8* %p to i32 (i32, i32)*
+</pre>
+
+<p>The call <tt>%val = call i32 %fp(i32 %x, i32 %y)</tt> is then equivalent
+   to <tt>%val = call i32 %f(i8* %nval, i32 %x, i32 %y)</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_it">'<tt>llvm.init.trampoline</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare i8* @llvm.init.trampoline(i8* &lt;tramp&gt;, i8* &lt;func&gt;, i8* &lt;nval&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>This fills the memory pointed to by <tt>tramp</tt> with code and returns a
+   function pointer suitable for executing it.</p>
+
+<h5>Arguments:</h5>
+<p>The <tt>llvm.init.trampoline</tt> intrinsic takes three arguments, all
+   pointers.  The <tt>tramp</tt> argument must point to a sufficiently large and
+   sufficiently aligned block of memory; this memory is written to by the
+   intrinsic.  Note that the size and the alignment are target-specific - LLVM
+   currently provides no portable way of determining them, so a front-end that
+   generates this intrinsic needs to have some target-specific knowledge.
+   The <tt>func</tt> argument must hold a function bitcast to
+   an <tt>i8*</tt>.</p>
+
+<h5>Semantics:</h5>
+<p>The block of memory pointed to by <tt>tramp</tt> is filled with target
+   dependent code, turning it into a function.  A pointer to this function is
+   returned, but needs to be bitcast to an <a href="#int_trampoline">appropriate
+   function pointer type</a> before being called.  The new function's signature
+   is the same as that of <tt>func</tt> with any arguments marked with
+   the <tt>nest</tt> attribute removed.  At most one such <tt>nest</tt> argument
+   is allowed, and it must be of pointer type.  Calling the new function is
+   equivalent to calling <tt>func</tt> with the same argument list, but
+   with <tt>nval</tt> used for the missing <tt>nest</tt> argument.  If, after
+   calling <tt>llvm.init.trampoline</tt>, the memory pointed to
+   by <tt>tramp</tt> is modified, then the effect of any later call to the
+   returned function pointer is undefined.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="int_atomics">Atomic Operations and Synchronization Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<p>These intrinsic functions expand the "universal IR" of LLVM to represent
+   hardware constructs for atomic operations and memory synchronization.  This
+   provides an interface to the hardware, not an interface to the programmer. It
+   is aimed at a low enough level to allow any programming models or APIs
+   (Application Programming Interfaces) which need atomic behaviors to map
+   cleanly onto it. It is also modeled primarily on hardware behavior. Just as
+   hardware provides a "universal IR" for source languages, it also provides a
+   starting point for developing a "universal" atomic operation and
+   synchronization IR.</p>
+
+<p>These do <em>not</em> form an API such as high-level threading libraries,
+   software transaction memory systems, atomic primitives, and intrinsic
+   functions as found in BSD, GNU libc, atomic_ops, APR, and other system and
+   application libraries.  The hardware interface provided by LLVM should allow
+   a clean implementation of all of these APIs and parallel programming models.
+   No one model or paradigm should be selected above others unless the hardware
+   itself ubiquitously does so.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_memory_barrier">'<tt>llvm.memory.barrier</tt>' Intrinsic</a>
+</div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.memory.barrier(i1 &lt;ll&gt;, i1 &lt;ls&gt;, i1 &lt;sl&gt;, i1 &lt;ss&gt;, i1 &lt;device&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The <tt>llvm.memory.barrier</tt> intrinsic guarantees ordering between
+   specific pairs of memory access types.</p>
+
+<h5>Arguments:</h5>
+<p>The <tt>llvm.memory.barrier</tt> intrinsic requires five boolean arguments.
+   The first four arguments enables a specific barrier as listed below.  The
+   fifth argument specifies that the barrier applies to io or device or uncached
+   memory.</p>
+
+<ul>
+  <li><tt>ll</tt>: load-load barrier</li>
+  <li><tt>ls</tt>: load-store barrier</li>
+  <li><tt>sl</tt>: store-load barrier</li>
+  <li><tt>ss</tt>: store-store barrier</li>
+  <li><tt>device</tt>: barrier applies to device and uncached memory also.</li>
+</ul>
+
+<h5>Semantics:</h5>
+<p>This intrinsic causes the system to enforce some ordering constraints upon
+   the loads and stores of the program. This barrier does not
+   indicate <em>when</em> any events will occur, it only enforces
+   an <em>order</em> in which they occur. For any of the specified pairs of load
+   and store operations (f.ex.  load-load, or store-load), all of the first
+   operations preceding the barrier will complete before any of the second
+   operations succeeding the barrier begin. Specifically the semantics for each
+   pairing is as follows:</p>
+
+<ul>
+  <li><tt>ll</tt>: All loads before the barrier must complete before any load
+      after the barrier begins.</li>
+  <li><tt>ls</tt>: All loads before the barrier must complete before any
+      store after the barrier begins.</li>
+  <li><tt>ss</tt>: All stores before the barrier must complete before any
+      store after the barrier begins.</li>
+  <li><tt>sl</tt>: All stores before the barrier must complete before any
+      load after the barrier begins.</li>
+</ul>
+
+<p>These semantics are applied with a logical "and" behavior when more than one
+   is enabled in a single memory barrier intrinsic.</p>
+
+<p>Backends may implement stronger barriers than those requested when they do
+   not support as fine grained a barrier as requested.  Some architectures do
+   not need all types of barriers and on such architectures, these become
+   noops.</p>
+
+<h5>Example:</h5>
+<pre>
+%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
+%ptr      = bitcast i8* %mallocP to i32*
+            store i32 4, %ptr
+
+%result1  = load i32* %ptr      <i>; yields {i32}:result1 = 4</i>
+            call void @llvm.memory.barrier(i1 false, i1 true, i1 false, i1 false)
+                                <i>; guarantee the above finishes</i>
+            store i32 8, %ptr   <i>; before this begins</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_atomic_cmp_swap">'<tt>llvm.atomic.cmp.swap.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.atomic.cmp.swap</tt> on
+   any integer bit width and for different address spaces. Not all targets
+   support all bit widths however.</p>
+
+<pre>
+  declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;cmp&gt;, i8 &lt;val&gt;)
+  declare i16 @llvm.atomic.cmp.swap.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;cmp&gt;, i16 &lt;val&gt;)
+  declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;cmp&gt;, i32 &lt;val&gt;)
+  declare i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;cmp&gt;, i64 &lt;val&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>This loads a value in memory and compares it to a given value. If they are
+   equal, it stores a new value into the memory.</p>
+
+<h5>Arguments:</h5>
+<p>The <tt>llvm.atomic.cmp.swap</tt> intrinsic takes three arguments. The result
+   as well as both <tt>cmp</tt> and <tt>val</tt> must be integer values with the
+   same bit width. The <tt>ptr</tt> argument must be a pointer to a value of
+   this integer type. While any bit width integer may be used, targets may only
+   lower representations they support in hardware.</p>
+
+<h5>Semantics:</h5>
+<p>This entire intrinsic must be executed atomically. It first loads the value
+   in memory pointed to by <tt>ptr</tt> and compares it with the
+   value <tt>cmp</tt>. If they are equal, <tt>val</tt> is stored into the
+   memory. The loaded value is yielded in all cases. This provides the
+   equivalent of an atomic compare-and-swap operation within the SSA
+   framework.</p>
+
+<h5>Examples:</h5>
+<pre>
+%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
+%ptr      = bitcast i8* %mallocP to i32*
+            store i32 4, %ptr
+
+%val1     = add i32 4, 4
+%result1  = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %ptr, i32 4, %val1)
+                                          <i>; yields {i32}:result1 = 4</i>
+%stored1  = icmp eq i32 %result1, 4       <i>; yields {i1}:stored1 = true</i>
+%memval1  = load i32* %ptr                <i>; yields {i32}:memval1 = 8</i>
+
+%val2     = add i32 1, 1
+%result2  = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %ptr, i32 5, %val2)
+                                          <i>; yields {i32}:result2 = 8</i>
+%stored2  = icmp eq i32 %result2, 5       <i>; yields {i1}:stored2 = false</i>
+
+%memval2  = load i32* %ptr                <i>; yields {i32}:memval2 = 8</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_atomic_swap">'<tt>llvm.atomic.swap.*</tt>' Intrinsic</a>
+</div>
+<div class="doc_text">
+<h5>Syntax:</h5>
+
+<p>This is an overloaded intrinsic. You can use <tt>llvm.atomic.swap</tt> on any
+   integer bit width. Not all targets support all bit widths however.</p>
+
+<pre>
+  declare i8 @llvm.atomic.swap.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;val&gt;)
+  declare i16 @llvm.atomic.swap.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;val&gt;)
+  declare i32 @llvm.atomic.swap.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;val&gt;)
+  declare i64 @llvm.atomic.swap.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;val&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>This intrinsic loads the value stored in memory at <tt>ptr</tt> and yields
+   the value from memory. It then stores the value in <tt>val</tt> in the memory
+   at <tt>ptr</tt>.</p>
+
+<h5>Arguments:</h5>
+<p>The <tt>llvm.atomic.swap</tt> intrinsic takes two arguments. Both
+  the <tt>val</tt> argument and the result must be integers of the same bit
+  width.  The first argument, <tt>ptr</tt>, must be a pointer to a value of this
+  integer type. The targets may only lower integer representations they
+  support.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic loads the value pointed to by <tt>ptr</tt>, yields it, and
+   stores <tt>val</tt> back into <tt>ptr</tt> atomically. This provides the
+   equivalent of an atomic swap operation within the SSA framework.</p>
+
+<h5>Examples:</h5>
+<pre>
+%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
+%ptr      = bitcast i8* %mallocP to i32*
+            store i32 4, %ptr
+
+%val1     = add i32 4, 4
+%result1  = call i32 @llvm.atomic.swap.i32.p0i32(i32* %ptr, i32 %val1)
+                                        <i>; yields {i32}:result1 = 4</i>
+%stored1  = icmp eq i32 %result1, 4     <i>; yields {i1}:stored1 = true</i>
+%memval1  = load i32* %ptr              <i>; yields {i32}:memval1 = 8</i>
+
+%val2     = add i32 1, 1
+%result2  = call i32 @llvm.atomic.swap.i32.p0i32(i32* %ptr, i32 %val2)
+                                        <i>; yields {i32}:result2 = 8</i>
+
+%stored2  = icmp eq i32 %result2, 8     <i>; yields {i1}:stored2 = true</i>
+%memval2  = load i32* %ptr              <i>; yields {i32}:memval2 = 2</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_atomic_load_add">'<tt>llvm.atomic.load.add.*</tt>' Intrinsic</a>
+
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.atomic.load.add</tt> on
+   any integer bit width. Not all targets support all bit widths however.</p>
+
+<pre>
+  declare i8 @llvm.atomic.load.add.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
+  declare i16 @llvm.atomic.load.add.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
+  declare i32 @llvm.atomic.load.add.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
+  declare i64 @llvm.atomic.load.add.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>This intrinsic adds <tt>delta</tt> to the value stored in memory
+   at <tt>ptr</tt>. It yields the original value at <tt>ptr</tt>.</p>
+
+<h5>Arguments:</h5>
+<p>The intrinsic takes two arguments, the first a pointer to an integer value
+   and the second an integer value. The result is also an integer value. These
+   integer types can have any bit width, but they must all have the same bit
+   width. The targets may only lower integer representations they support.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic does a series of operations atomically. It first loads the
+   value stored at <tt>ptr</tt>. It then adds <tt>delta</tt>, stores the result
+   to <tt>ptr</tt>. It yields the original value stored at <tt>ptr</tt>.</p>
+
+<h5>Examples:</h5>
+<pre>
+%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
+%ptr      = bitcast i8* %mallocP to i32*
+            store i32 4, %ptr
+%result1  = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %ptr, i32 4)
+                                <i>; yields {i32}:result1 = 4</i>
+%result2  = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %ptr, i32 2)
+                                <i>; yields {i32}:result2 = 8</i>
+%result3  = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %ptr, i32 5)
+                                <i>; yields {i32}:result3 = 10</i>
+%memval1  = load i32* %ptr      <i>; yields {i32}:memval1 = 15</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_atomic_load_sub">'<tt>llvm.atomic.load.sub.*</tt>' Intrinsic</a>
+
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.atomic.load.sub</tt> on
+   any integer bit width and for different address spaces. Not all targets
+   support all bit widths however.</p>
+
+<pre>
+  declare i8 @llvm.atomic.load.sub.i8.p0i32(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
+  declare i16 @llvm.atomic.load.sub.i16.p0i32(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
+  declare i32 @llvm.atomic.load.sub.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
+  declare i64 @llvm.atomic.load.sub.i64.p0i32(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>This intrinsic subtracts <tt>delta</tt> to the value stored in memory at
+   <tt>ptr</tt>. It yields the original value at <tt>ptr</tt>.</p>
+
+<h5>Arguments:</h5>
+<p>The intrinsic takes two arguments, the first a pointer to an integer value
+   and the second an integer value. The result is also an integer value. These
+   integer types can have any bit width, but they must all have the same bit
+   width. The targets may only lower integer representations they support.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic does a series of operations atomically. It first loads the
+   value stored at <tt>ptr</tt>. It then subtracts <tt>delta</tt>, stores the
+   result to <tt>ptr</tt>. It yields the original value stored
+   at <tt>ptr</tt>.</p>
+
+<h5>Examples:</h5>
+<pre>
+%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
+%ptr      = bitcast i8* %mallocP to i32*
+            store i32 8, %ptr
+%result1  = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %ptr, i32 4)
+                                <i>; yields {i32}:result1 = 8</i>
+%result2  = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %ptr, i32 2)
+                                <i>; yields {i32}:result2 = 4</i>
+%result3  = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %ptr, i32 5)
+                                <i>; yields {i32}:result3 = 2</i>
+%memval1  = load i32* %ptr      <i>; yields {i32}:memval1 = -3</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_atomic_load_and">'<tt>llvm.atomic.load.and.*</tt>' Intrinsic</a><br>
+  <a name="int_atomic_load_nand">'<tt>llvm.atomic.load.nand.*</tt>' Intrinsic</a><br>
+  <a name="int_atomic_load_or">'<tt>llvm.atomic.load.or.*</tt>' Intrinsic</a><br>
+  <a name="int_atomic_load_xor">'<tt>llvm.atomic.load.xor.*</tt>' Intrinsic</a><br>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>These are overloaded intrinsics. You can
+  use <tt>llvm.atomic.load_and</tt>, <tt>llvm.atomic.load_nand</tt>,
+  <tt>llvm.atomic.load_or</tt>, and <tt>llvm.atomic.load_xor</tt> on any integer
+  bit width and for different address spaces. Not all targets support all bit
+  widths however.</p>
+
+<pre>
+  declare i8 @llvm.atomic.load.and.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
+  declare i16 @llvm.atomic.load.and.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
+  declare i32 @llvm.atomic.load.and.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
+  declare i64 @llvm.atomic.load.and.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
+</pre>
+
+<pre>
+  declare i8 @llvm.atomic.load.or.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
+  declare i16 @llvm.atomic.load.or.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
+  declare i32 @llvm.atomic.load.or.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
+  declare i64 @llvm.atomic.load.or.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
+</pre>
+
+<pre>
+  declare i8 @llvm.atomic.load.nand.i8.p0i32(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
+  declare i16 @llvm.atomic.load.nand.i16.p0i32(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
+  declare i32 @llvm.atomic.load.nand.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
+  declare i64 @llvm.atomic.load.nand.i64.p0i32(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
+</pre>
+
+<pre>
+  declare i8 @llvm.atomic.load.xor.i8.p0i32(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
+  declare i16 @llvm.atomic.load.xor.i16.p0i32(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
+  declare i32 @llvm.atomic.load.xor.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
+  declare i64 @llvm.atomic.load.xor.i64.p0i32(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>These intrinsics bitwise the operation (and, nand, or, xor) <tt>delta</tt> to
+   the value stored in memory at <tt>ptr</tt>. It yields the original value
+   at <tt>ptr</tt>.</p>
+
+<h5>Arguments:</h5>
+<p>These intrinsics take two arguments, the first a pointer to an integer value
+   and the second an integer value. The result is also an integer value. These
+   integer types can have any bit width, but they must all have the same bit
+   width. The targets may only lower integer representations they support.</p>
+
+<h5>Semantics:</h5>
+<p>These intrinsics does a series of operations atomically. They first load the
+   value stored at <tt>ptr</tt>. They then do the bitwise
+   operation <tt>delta</tt>, store the result to <tt>ptr</tt>. They yield the
+   original value stored at <tt>ptr</tt>.</p>
+
+<h5>Examples:</h5>
+<pre>
+%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
+%ptr      = bitcast i8* %mallocP to i32*
+            store i32 0x0F0F, %ptr
+%result0  = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %ptr, i32 0xFF)
+                                <i>; yields {i32}:result0 = 0x0F0F</i>
+%result1  = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %ptr, i32 0xFF)
+                                <i>; yields {i32}:result1 = 0xFFFFFFF0</i>
+%result2  = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %ptr, i32 0F)
+                                <i>; yields {i32}:result2 = 0xF0</i>
+%result3  = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %ptr, i32 0F)
+                                <i>; yields {i32}:result3 = FF</i>
+%memval1  = load i32* %ptr      <i>; yields {i32}:memval1 = F0</i>
+</pre>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_atomic_load_max">'<tt>llvm.atomic.load.max.*</tt>' Intrinsic</a><br>
+  <a name="int_atomic_load_min">'<tt>llvm.atomic.load.min.*</tt>' Intrinsic</a><br>
+  <a name="int_atomic_load_umax">'<tt>llvm.atomic.load.umax.*</tt>' Intrinsic</a><br>
+  <a name="int_atomic_load_umin">'<tt>llvm.atomic.load.umin.*</tt>' Intrinsic</a><br>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>These are overloaded intrinsics. You can use <tt>llvm.atomic.load_max</tt>,
+   <tt>llvm.atomic.load_min</tt>, <tt>llvm.atomic.load_umax</tt>, and
+   <tt>llvm.atomic.load_umin</tt> on any integer bit width and for different
+   address spaces. Not all targets support all bit widths however.</p>
+
+<pre>
+  declare i8 @llvm.atomic.load.max.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
+  declare i16 @llvm.atomic.load.max.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
+  declare i32 @llvm.atomic.load.max.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
+  declare i64 @llvm.atomic.load.max.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
+</pre>
+
+<pre>
+  declare i8 @llvm.atomic.load.min.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
+  declare i16 @llvm.atomic.load.min.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
+  declare i32 @llvm.atomic.load.min.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
+  declare i64 @llvm.atomic.load.min.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
+</pre>
+
+<pre>
+  declare i8 @llvm.atomic.load.umax.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
+  declare i16 @llvm.atomic.load.umax.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
+  declare i32 @llvm.atomic.load.umax.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
+  declare i64 @llvm.atomic.load.umax.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
+</pre>
+
+<pre>
+  declare i8 @llvm.atomic.load.umin.i8.p0i8(i8* &lt;ptr&gt;, i8 &lt;delta&gt;)
+  declare i16 @llvm.atomic.load.umin.i16.p0i16(i16* &lt;ptr&gt;, i16 &lt;delta&gt;)
+  declare i32 @llvm.atomic.load.umin.i32.p0i32(i32* &lt;ptr&gt;, i32 &lt;delta&gt;)
+  declare i64 @llvm.atomic.load.umin.i64.p0i64(i64* &lt;ptr&gt;, i64 &lt;delta&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>These intrinsics takes the signed or unsigned minimum or maximum of
+   <tt>delta</tt> and the value stored in memory at <tt>ptr</tt>. It yields the
+   original value at <tt>ptr</tt>.</p>
+
+<h5>Arguments:</h5>
+<p>These intrinsics take two arguments, the first a pointer to an integer value
+   and the second an integer value. The result is also an integer value. These
+   integer types can have any bit width, but they must all have the same bit
+   width. The targets may only lower integer representations they support.</p>
+
+<h5>Semantics:</h5>
+<p>These intrinsics does a series of operations atomically. They first load the
+   value stored at <tt>ptr</tt>. They then do the signed or unsigned min or
+   max <tt>delta</tt> and the value, store the result to <tt>ptr</tt>. They
+   yield the original value stored at <tt>ptr</tt>.</p>
+
+<h5>Examples:</h5>
+<pre>
+%mallocP  = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32))
+%ptr      = bitcast i8* %mallocP to i32*
+            store i32 7, %ptr
+%result0  = call i32 @llvm.atomic.load.min.i32.p0i32(i32* %ptr, i32 -2)
+                                <i>; yields {i32}:result0 = 7</i>
+%result1  = call i32 @llvm.atomic.load.max.i32.p0i32(i32* %ptr, i32 8)
+                                <i>; yields {i32}:result1 = -2</i>
+%result2  = call i32 @llvm.atomic.load.umin.i32.p0i32(i32* %ptr, i32 10)
+                                <i>; yields {i32}:result2 = 8</i>
+%result3  = call i32 @llvm.atomic.load.umax.i32.p0i32(i32* %ptr, i32 30)
+                                <i>; yields {i32}:result3 = 8</i>
+%memval1  = load i32* %ptr      <i>; yields {i32}:memval1 = 30</i>
+</pre>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="int_memorymarkers">Memory Use Markers</a>
+</div>
+
+<div class="doc_text">
+
+<p>This class of intrinsics exists to information about the lifetime of memory
+   objects and ranges where variables are immutable.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_lifetime_start">'<tt>llvm.lifetime.start</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.lifetime.start(i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.lifetime.start</tt>' intrinsic specifies the start of a memory
+   object's lifetime.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument is a constant integer representing the size of the
+   object, or -1 if it is variable sized.  The second argument is a pointer to
+   the object.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic indicates that before this point in the code, the value of the
+   memory pointed to by <tt>ptr</tt> is dead.  This means that it is known to
+   never be used and has an undefined value.  A load from the pointer that
+   precedes this intrinsic can be replaced with
+   <tt>'<a href="#undefvalues">undef</a>'</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_lifetime_end">'<tt>llvm.lifetime.end</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.lifetime.end(i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.lifetime.end</tt>' intrinsic specifies the end of a memory
+   object's lifetime.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument is a constant integer representing the size of the
+   object, or -1 if it is variable sized.  The second argument is a pointer to
+   the object.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic indicates that after this point in the code, the value of the
+   memory pointed to by <tt>ptr</tt> is dead.  This means that it is known to
+   never be used and has an undefined value.  Any stores into the memory object
+   following this intrinsic may be removed as dead.
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_invariant_start">'<tt>llvm.invariant.start</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare {}* @llvm.invariant.start(i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.invariant.start</tt>' intrinsic specifies that the contents of
+   a memory object will not change.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument is a constant integer representing the size of the
+   object, or -1 if it is variable sized.  The second argument is a pointer to
+   the object.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic indicates that until an <tt>llvm.invariant.end</tt> that uses
+   the return value, the referenced memory location is constant and
+   unchanging.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_invariant_end">'<tt>llvm.invariant.end</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.invariant.end({}* &lt;start&gt;, i64 &lt;size&gt;, i8* nocapture &lt;ptr&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.invariant.end</tt>' intrinsic specifies that the contents of
+   a memory object are mutable.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument is the matching <tt>llvm.invariant.start</tt> intrinsic.
+   The second argument is a constant integer representing the size of the
+   object, or -1 if it is variable sized and the third argument is a pointer
+   to the object.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic indicates that the memory is mutable again.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="int_general">General Intrinsics</a>
+</div>
+
+<div class="doc_text">
+
+<p>This class of intrinsics is designed to be generic and has no specific
+   purpose.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_var_annotation">'<tt>llvm.var.annotation</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.var.annotation(i8* &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.var.annotation</tt>' intrinsic.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument is a pointer to a value, the second is a pointer to a
+   global string, the third is a pointer to a global string which is the source
+   file name, and the last argument is the line number.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic allows annotation of local variables with arbitrary strings.
+   This can be useful for special purpose optimizations that want to look for
+   these annotations.  These have no other defined use, they are ignored by code
+   generation and optimization.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_annotation">'<tt>llvm.annotation.*</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use '<tt>llvm.annotation</tt>' on
+   any integer bit width.</p>
+
+<pre>
+  declare i8 @llvm.annotation.i8(i8 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
+  declare i16 @llvm.annotation.i16(i16 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
+  declare i32 @llvm.annotation.i32(i32 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
+  declare i64 @llvm.annotation.i64(i64 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
+  declare i256 @llvm.annotation.i256(i256 &lt;val&gt;, i8* &lt;str&gt;, i8* &lt;str&gt;, i32  &lt;int&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.annotation</tt>' intrinsic.</p>
+
+<h5>Arguments:</h5>
+<p>The first argument is an integer value (result of some expression), the
+   second is a pointer to a global string, the third is a pointer to a global
+   string which is the source file name, and the last argument is the line
+   number.  It returns the value of the first argument.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic allows annotations to be put on arbitrary expressions with
+   arbitrary strings.  This can be useful for special purpose optimizations that
+   want to look for these annotations.  These have no other defined use, they
+   are ignored by code generation and optimization.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_trap">'<tt>llvm.trap</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.trap()
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.trap</tt>' intrinsic.</p>
+
+<h5>Arguments:</h5>
+<p>None.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsics is lowered to the target dependent trap instruction. If the
+   target does not have a trap instruction, this intrinsic will be lowered to
+   the call of the <tt>abort()</tt> function.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_stackprotector">'<tt>llvm.stackprotector</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare void @llvm.stackprotector(i8* &lt;guard&gt;, i8** &lt;slot&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The <tt>llvm.stackprotector</tt> intrinsic takes the <tt>guard</tt> and
+   stores it onto the stack at <tt>slot</tt>. The stack slot is adjusted to
+   ensure that it is placed on the stack before local variables.</p>
+
+<h5>Arguments:</h5>
+<p>The <tt>llvm.stackprotector</tt> intrinsic requires two pointer
+   arguments. The first argument is the value loaded from the stack
+   guard <tt>@__stack_chk_guard</tt>. The second variable is an <tt>alloca</tt>
+   that has enough space to hold the value of the guard.</p>
+
+<h5>Semantics:</h5>
+<p>This intrinsic causes the prologue/epilogue inserter to force the position of
+   the <tt>AllocaInst</tt> stack slot to be before local variables on the
+   stack. This is to ensure that if a local variable on the stack is
+   overwritten, it will destroy the value of the guard. When the function exits,
+   the guard on the stack is checked against the original guard. If they are
+   different, then the program aborts by calling the <tt>__stack_chk_fail()</tt>
+   function.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="int_objectsize">'<tt>llvm.objectsize</tt>' Intrinsic</a>
+</div>
+
+<div class="doc_text">
+
+<h5>Syntax:</h5>
+<pre>
+  declare i32 @llvm.objectsize.i32(i8* &lt;object&gt;, i1 &lt;type&gt;)
+  declare i64 @llvm.objectsize.i64(i8* &lt;object&gt;, i1 &lt;type&gt;)
+</pre>
+
+<h5>Overview:</h5>
+<p>The <tt>llvm.objectsize</tt> intrinsic is designed to provide information to
+   the optimizers to determine at compile time whether a) an operation (like
+   memcpy) will overflow a buffer that corresponds to an object, or b) that a
+   runtime check for overflow isn't necessary. An object in this context means
+   an allocation of a specific class, structure, array, or other object.</p>
+
+<h5>Arguments:</h5>
+<p>The <tt>llvm.objectsize</tt> intrinsic takes two arguments. The first
+   argument is a pointer to or into the <tt>object</tt>. The second argument
+   is a boolean 0 or 1. This argument determines whether you want the 
+   maximum (0) or minimum (1) bytes remaining. This needs to be a literal 0 or
+   1, variables are not allowed.</p>
+   
+<h5>Semantics:</h5>
+<p>The <tt>llvm.objectsize</tt> intrinsic is lowered to either a constant
+   representing the size of the object concerned, or <tt>i32/i64 -1 or 0</tt>,
+   depending on the <tt>type</tt> argument, if the size cannot be determined at
+   compile time.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/Lexicon.html b/final/docs/Lexicon.html
new file mode 100644
index 00000000000..82a58aa1482
--- /dev/null
+++ b/final/docs/Lexicon.html
@@ -0,0 +1,277 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>The LLVM Lexicon</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+  <meta name="author" content="Various">
+  <meta name="description" 
+  content="A glossary of terms used with the LLVM project.">
+</head>
+<body>
+<div class="doc_title">The LLVM Lexicon</div>
+<p class="doc_warning">NOTE: This document is a work in progress!</p>
+<!-- *********************************************************************** -->
+<div class="doc_section">Table Of Contents</div>
+<!-- *********************************************************************** -->
+<div class="doc_text">
+  <table>
+    <tr><th colspan="8"><b>- <a href="#A">A</a> -</b></th></tr>
+    <tr>
+      <td><a href="#ADCE">ADCE</a></td>
+    </tr>
+    <tr><th colspan="8"><b>- <a href="#B">B</a> -</b></th></tr>
+    <tr>
+      <td><a href="#BURS">BURS</a></td>
+    </tr>
+    <tr><th colspan="8"><b>- <a href="#C">C</a> -</b></th></tr>
+    <tr>
+      <td><a href="#CSE">CSE</a></td>
+    </tr>
+    <tr><th colspan="8"><b>- <a href="#D">D</a> -</b></th></tr>
+    <tr>
+      <td><a href="#DAG">DAG</a></td>
+      <td><a href="#Derived_Pointer">Derived Pointer</a></td>
+      <td><a href="#DSA">DSA</a></td>
+      <td><a href="#DSE">DSE</a></td>
+    </tr>
+    <tr><th colspan="8"><b>- <a href="#G">G</a> -</b></th></tr>
+    <tr>
+      <td><a href="#GC">GC</a></td>
+    </tr>
+    <tr><th colspan="8"><b>- <a href="#I">I</a> -</b></th></tr>
+    <tr>
+      <td><a href="#IPA">IPA</a></td>
+      <td><a href="#IPO">IPO</a></td>
+      <td><a href="#ISel">ISel</a></td>
+    </tr>
+    <tr><th colspan="8"><b>- <a href="#L">L</a> -</b></th></tr>
+    <tr>
+      <td><a href="#LCSSA">LCSSA</a></td>
+      <td><a href="#LICM">LICM</a></td>
+      <td><a href="#Load-VN">Load-VN</a></td>
+      <td><a href="#LTO">LTO</a></td>
+    </tr>
+    <tr><th colspan="8"><b>- <a href="#M">M</a> -</b></th></tr>
+    <tr>
+      <td><a href="#MC">MC</a></td>
+    </tr>
+    <tr><th colspan="8"><b>- <a href="#O">O</a> -</b></th></tr>
+    <tr>
+      <td><a href="#Object_Pointer">Object Pointer</a></td>
+    </tr>
+    <tr><th colspan="8"><b>- <a href="#P">P</a> -</b></th></tr>
+    <tr>
+      <td><a href="#PRE">PRE</a></td>
+    </tr>
+    <tr><th colspan="8"><b>- <a href="#R">R</a> -</b></th></tr>
+    <tr>
+      <td><a href="#RAUW">RAUW</a></td>
+      <td><a href="#Reassociation">Reassociation</a></td>
+      <td><a href="#Root">Root</a></td>
+    </tr>
+    <tr><th colspan="8"><b>- <a href="#S">S</a> -</b></th></tr>
+    <tr>
+      <td><a href="#Safe_Point">Safe Point</a></td>
+      <td><a href="#SCC">SCC</a></td>
+      <td><a href="#SCCP">SCCP</a></td>
+      <td><a href="#SDISel">SDISel</a></td>
+      <td><a href="#SRoA">SRoA</a></td>
+      <td><a href="#Stack_Map">Stack Map</a></td>
+    </tr>
+  </table>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">Definitions</div>
+<!-- *********************************************************************** -->
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="A">- A -</a></div>
+<div class="doc_text">
+  <dl>
+    <dt><a name="ADCE"><b>ADCE</b></a></dt>
+    <dd>Aggressive Dead Code Elimination</dd>
+  </dl>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="B">- B -</a></div>
+<div class="doc_text">
+  <dl>
+    <dt><a name="BURS"><b>BURS</b></a></dt>
+    <dd>Bottom Up Rewriting System&mdash;A method of instruction selection for
+        code generation.  An example is the <a 
+href="http://www.program-transformation.org/Transform/BURG">BURG</a> tool.</dd>
+  </dl>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="C">- C -</a></div>
+<div class="doc_text">
+  <dl>
+    <dt><a name="CSE"><b>CSE</b></a></dt>
+    <dd>Common Subexpression Elimination. An optimization that removes common
+    subexpression compuation. For example <tt>(a+b)*(a+b)</tt> has two
+    subexpressions that are the same: <tt>(a+b)</tt>. This optimization would
+    perform the addition only once and then perform the multiply (but only if
+    it's compulationally correct/safe).
+  </dl>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="D">- D -</a></div>
+<div class="doc_text">
+  <dl>
+    <dt><a name="DAG"><b>DAG</b></a></dt>
+    <dd>Directed Acyclic Graph</dd>
+    <dt><a name="Derived_Pointer"><b>Derived Pointer</b></a></dt>
+    <dd>A pointer to the interior of an object, such that a garbage collector
+    is unable to use the pointer for reachability analysis. While a derived
+    pointer is live, the corresponding object pointer must be kept in a root,
+    otherwise the collector might free the referenced object. With copying
+    collectors, derived pointers pose an additional hazard that they may be
+    invalidated at any <a href="Safe_Point">safe point</a>. This term is used in
+    opposition to <a href="#Object_Pointer">object pointer</a>.</dd>
+    <dt><a name="DSA"><b>DSA</b></a></dt>
+    <dd>Data Structure Analysis</dd>
+    <dt><a name="DSE"><b>DSE</b></a></dt>
+    <dd>Dead Store Elimination</dd>
+  </dl>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="G">- G -</a></div>
+<div class="doc_text">
+  <dl>
+    <dt><a name="GC"><b>GC</b></a></dt>
+    <dd>Garbage Collection. The practice of using reachability analysis instead
+    of explicit memory management to reclaim unused memory.</dd>
+  </dl>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="H">- H -</a></div>
+<div class="doc_text">
+  <dl>
+    <dt><a name="Heap"><b>Heap</b></a></dt>
+    <dd>In garbage collection, the region of memory which is managed using
+    reachability analysis.</dd>
+  </dl>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="I">- I -</a></div>
+<div class="doc_text">
+  <dl>
+    <dt><a name="IPA"><b>IPA</b></a></dt>
+    <dd>Inter-Procedural Analysis. Refers to any variety of code analysis that
+    occurs between procedures, functions or compilation units (modules).</dd>
+    <dt><a name="IPO"><b>IPO</b></a></dt>
+    <dd>Inter-Procedural Optimization. Refers to any variety of code
+    optimization that occurs between procedures, functions or compilation units
+    (modules).</dd>
+    <dt><a name="ISel"><b>ISel</b></a></dt>
+    <dd>Instruction Selection.</dd>
+  </dl>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="L">- L -</a></div>
+<div class="doc_text">
+  <dl>
+    <dt><a name="LCSSA"><b>LCSSA</b></a></dt>
+    <dd>Loop-Closed Static Single Assignment Form</dd>
+    <dt><a name="LICM"><b>LICM</b></a></dt>
+    <dd>Loop Invariant Code Motion</dd>
+    <dt><a name="Load-VN"><b>Load-VN</b></a></dt>
+    <dd>Load Value Numbering</dd>
+    <dt><a name="LTO"><b>LTO</b></a></dt>
+    <dd>Link-Time Optimization</dd>
+  </dl>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="M">- M -</a></div>
+<div class="doc_text">
+  <dl>
+    <dt><a name="MC"><b>MC</b></a></dt>
+    <dd>Machine Code</dd>
+  </dl>
+</div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="O">- O -</a></div>
+<div class="doc_text">
+  <dl>
+    <dt><a name="Object_Pointer"><b>Object Pointer</b></a></dt>
+    <dd>A pointer to an object such that the garbage collector is able to trace
+    references contained within the object. This term is used in opposition to
+    <a href="#Derived_Pointer">derived pointer</a>.</dd>
+  </dl>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="P">- P -</a></div>
+<div class="doc_text">
+  <dl>
+    <dt><a name="PRE"><b>PRE</b></a></dt>
+    <dd>Partial Redundancy Elimination</dd>
+  </dl>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="R">- R -</a></div>
+<div class="doc_text">
+  <dl>
+  	<dt><a name="RAUW"><b>RAUW</b></a></dt> <dd>An abbreviation for Replace
+  	All Uses With. The functions User::replaceUsesOfWith(), 
+  	Value::replaceAllUsesWith(), and Constant::replaceUsesOfWithOnConstant()
+  	implement the replacement of one Value with another by iterating over its
+  	def/use chain and fixing up all of the pointers to point to the new value.
+  	See also <a href="ProgrammersManual.html#iterate_chains">def/use chains</a>.
+  	</dd>
+    <dt><a name="Reassociation"><b>Reassociation</b></a></dt> <dd>Rearranging
+    associative expressions to promote better redundancy elimination and other
+    optimization.  For example, changing (A+B-A) into (B+A-A), permitting it to
+    be optimized into (B+0) then (B).</dd>
+    <dt><a name="Root"><b>Root</b></a></dt> <dd>In garbage collection, a
+    pointer variable lying outside of the <a href="#Heap">heap</a> from which
+    the collector begins its reachability analysis. In the context of code
+    generation, "root" almost always refers to a "stack root" -- a local or
+    temporary variable within an executing function.</dd>
+  </dl>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="S">- S -</a></div>
+<div class="doc_text">
+  <dl>
+    <dt><a name="Safe_Point"><b>Safe Point</b></a></dt>
+    <dd>In garbage collection, it is necessary to identify <a href="#Root">stack
+    roots</a> so that reachability analysis may proceed. It may be infeasible to
+    provide this information for every instruction, so instead the information
+    may is calculated only at designated safe points. With a copying collector,
+    <a href="#Derived_Pointers">derived pointers</a> must not be retained across
+    safe points and <a href="#Object_Pointers">object pointers</a> must be
+    reloaded from stack roots.</dd>
+    <dt><a name="SDISel"><b>SDISel</b></a></dt>
+    <dd>Selection DAG Instruction Selection.</dd>
+    <dt><a name="SCC"><b>SCC</b></a></dt>
+    <dd>Strongly Connected Component</dd>
+    <dt><a name="SCCP"><b>SCCP</b></a></dt>
+    <dd>Sparse Conditional Constant Propagation</dd>
+    <dt><a name="SRoA"><b>SRoA</b></a></dt>
+    <dd>Scalar Replacement of Aggregates</dd>
+    <dt><a name="SSA"><b>SSA</b></a></dt>
+    <dd>Static Single Assignment</dd>
+    <dt><a name="Stack_Map"><b>Stack Map</b></a></dt>
+    <dd>In garbage collection, metadata emitted by the code generator which
+    identifies <a href="#Root">roots</a> within the stack frame of an executing
+    function.</dd>
+  </dl>
+</div>
+<!-- *********************************************************************** -->
+<hr>
+<address> <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+ src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a><a
+ href="http://validator.w3.org/check/referer"><img
+ src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a><a
+ href="http://llvm.org/">The LLVM Team</a><br>
+<a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+Last modified: $Date$
+</address>
+<!-- vim: sw=2
+-->
+</body>
+</html>
diff --git a/final/docs/LinkTimeOptimization.html b/final/docs/LinkTimeOptimization.html
new file mode 100644
index 00000000000..dbe8f389ce8
--- /dev/null
+++ b/final/docs/LinkTimeOptimization.html
@@ -0,0 +1,390 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" 
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <title>LLVM Link Time Optimization: Design and Implementation</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+
+<div class="doc_title">
+  LLVM Link Time Optimization: Design and Implementation
+</div>
+
+<ul>
+  <li><a href="#desc">Description</a></li>
+  <li><a href="#design">Design Philosophy</a>
+  <ul>
+    <li><a href="#example1">Example of link time optimization</a></li>
+    <li><a href="#alternative_approaches">Alternative Approaches</a></li>
+  </ul></li>
+  <li><a href="#multiphase">Multi-phase communication between LLVM and linker</a>
+  <ul>
+    <li><a href="#phase1">Phase 1 : Read LLVM Bitcode Files</a></li>
+    <li><a href="#phase2">Phase 2 : Symbol Resolution</a></li>
+    <li><a href="#phase3">Phase 3 : Optimize Bitcode Files</a></li>
+    <li><a href="#phase4">Phase 4 : Symbol Resolution after optimization</a></li>
+  </ul></li>
+  <li><a href="#lto">libLTO</a>
+  <ul>
+    <li><a href="#lto_module_t">lto_module_t</a></li>
+    <li><a href="#lto_code_gen_t">lto_code_gen_t</a></li>
+  </ul>
+</ul>
+
+<div class="doc_author">
+<p>Written by Devang Patel and Nick Kledzik</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+<a name="desc">Description</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>
+LLVM features powerful intermodular optimizations which can be used at link 
+time.  Link Time Optimization (LTO) is another name for intermodular optimization 
+when performed during the link stage. This document describes the interface 
+and design between the LTO optimizer and the linker.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+<a name="design">Design Philosophy</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>
+The LLVM Link Time Optimizer provides complete transparency, while doing 
+intermodular optimization, in the compiler tool chain. Its main goal is to let 
+the developer take advantage of intermodular optimizations without making any 
+significant changes to the developer's makefiles or build system. This is 
+achieved through tight integration with the linker. In this model, the linker 
+treates LLVM bitcode files like native object files and allows mixing and 
+matching among them. The linker uses <a href="#lto">libLTO</a>, a shared
+object, to handle LLVM bitcode files. This tight integration between 
+the linker and LLVM optimizer helps to do optimizations that are not possible 
+in other models. The linker input allows the optimizer to avoid relying on 
+conservative escape analysis.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="example1">Example of link time optimization</a>
+</div>
+
+<div class="doc_text">
+  <p>The following example illustrates the advantages of LTO's integrated
+  approach and clean interface. This example requires a system linker which
+  supports LTO through the interface described in this document.  Here,
+  llvm-gcc transparently invokes system linker. </p>
+  <ul>
+    <li> Input source file <tt>a.c</tt> is compiled into LLVM bitcode form.
+    <li> Input source file <tt>main.c</tt> is compiled into native object code.
+  </ul>
+<pre class="doc_code">
+--- a.h ---
+extern int foo1(void);
+extern void foo2(void);
+extern void foo4(void);
+--- a.c ---
+#include "a.h"
+
+static signed int i = 0;
+
+void foo2(void) {
+ i = -1;
+}
+
+static int foo3() {
+foo4();
+return 10;
+}
+
+int foo1(void) {
+int data = 0;
+
+if (i &lt; 0) { data = foo3(); }
+
+data = data + 42;
+return data;
+}
+
+--- main.c ---
+#include &lt;stdio.h&gt;
+#include "a.h"
+
+void foo4(void) {
+ printf ("Hi\n");
+}
+
+int main() {
+ return foo1();
+}
+
+--- command lines ---
+$ llvm-gcc --emit-llvm -c a.c -o a.o  # &lt;-- a.o is LLVM bitcode file
+$ llvm-gcc -c main.c -o main.o # &lt;-- main.o is native object file
+$ llvm-gcc a.o main.o -o main # &lt;-- standard link command without any modifications
+</pre>
+  <p>In this example, the linker recognizes that <tt>foo2()</tt> is an 
+  externally visible symbol defined in LLVM bitcode file. The linker completes 
+  its usual symbol resolution 
+  pass and finds that <tt>foo2()</tt> is not used anywhere. This information 
+  is used by the LLVM optimizer and it removes <tt>foo2()</tt>. As soon as 
+  <tt>foo2()</tt> is removed, the optimizer recognizes that condition 
+  <tt>i &lt; 0</tt> is always false, which means <tt>foo3()</tt> is never 
+  used. Hence, the optimizer removes <tt>foo3()</tt>, also.  And this in turn, 
+  enables linker to remove <tt>foo4()</tt>.  This example illustrates the 
+  advantage of tight integration with the linker. Here, the optimizer can not 
+  remove <tt>foo3()</tt> without the linker's input.
+  </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="alternative_approaches">Alternative Approaches</a>
+</div>
+
+<div class="doc_text">
+  <dl>
+    <dt><b>Compiler driver invokes link time optimizer separately.</b></dt>
+    <dd>In this model the link time optimizer is not able to take advantage of 
+    information collected during the linker's normal symbol resolution phase. 
+    In the above example, the optimizer can not remove <tt>foo2()</tt> without 
+    the linker's input because it is externally visible. This in turn prohibits
+    the optimizer from removing <tt>foo3()</tt>.</dd>
+    <dt><b>Use separate tool to collect symbol information from all object
+    files.</b></dt>
+    <dd>In this model, a new, separate, tool or library replicates the linker's
+    capability to collect information for link time optimization. Not only is
+    this code duplication difficult to justify, but it also has several other 
+    disadvantages.  For example, the linking semantics and the features 
+    provided by the linker on various platform are not unique. This means, 
+    this new tool needs to support all such features and platforms in one 
+    super tool or a separate tool per platform is required. This increases 
+    maintenance cost for link time optimizer significantly, which is not 
+    necessary. This approach also requires staying synchronized with linker 
+    developements on various platforms, which is not the main focus of the link 
+    time optimizer. Finally, this approach increases end user's build time due 
+    to the duplication of work done by this separate tool and the linker itself.
+    </dd>
+  </dl>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="multiphase">Multi-phase communication between libLTO and linker</a>
+</div>
+
+<div class="doc_text">
+  <p>The linker collects information about symbol defininitions and uses in 
+  various link objects which is more accurate than any information collected 
+  by other tools during typical build cycles.  The linker collects this 
+  information by looking at the definitions and uses of symbols in native .o 
+  files and using symbol visibility information. The linker also uses 
+  user-supplied information, such as a list of exported symbols. LLVM 
+  optimizer collects control flow information, data flow information and knows 
+  much more about program structure from the optimizer's point of view. 
+  Our goal is to take advantage of tight integration between the linker and 
+  the optimizer by sharing this information during various linking phases.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="phase1">Phase 1 : Read LLVM Bitcode Files</a>
+</div>
+
+<div class="doc_text">
+  <p>The linker first reads all object files in natural order and collects 
+  symbol information. This includes native object files as well as LLVM bitcode 
+  files.  To minimize the cost to the linker in the case that all .o files
+  are native object files, the linker only calls <tt>lto_module_create()</tt> 
+  when a supplied object file is found to not be a native object file.  If
+  <tt>lto_module_create()</tt> returns that the file is an LLVM bitcode file, 
+  the linker
+  then iterates over the module using <tt>lto_module_get_symbol_name()</tt> and
+  <tt>lto_module_get_symbol_attribute()</tt> to get all symbols defined and 
+  referenced.
+  This information is added to the linker's global symbol table.
+</p>
+  <p>The lto* functions are all implemented in a shared object libLTO.  This
+  allows the LLVM LTO code to be updated independently of the linker tool.
+  On platforms that support it, the shared object is lazily loaded. 
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="phase2">Phase 2 : Symbol Resolution</a>
+</div>
+
+<div class="doc_text">
+  <p>In this stage, the linker resolves symbols using global symbol table. 
+  It may report undefined symbol errors, read archive members, replace 
+  weak symbols, etc.  The linker is able to do this seamlessly even though it 
+  does not know the exact content of input LLVM bitcode files.  If dead code 
+  stripping is enabled then the linker collects the list of live symbols.
+  </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="phase3">Phase 3 : Optimize Bitcode Files</a>
+</div>
+<div class="doc_text">
+  <p>After symbol resolution, the linker tells the LTO shared object which
+  symbols are needed by native object files.  In the example above, the linker 
+  reports that only <tt>foo1()</tt> is used by native object files using 
+  <tt>lto_codegen_add_must_preserve_symbol()</tt>.  Next the linker invokes
+  the LLVM optimizer and code generators using <tt>lto_codegen_compile()</tt>
+  which returns a native object file creating by merging the LLVM bitcode files 
+  and applying various optimization passes.  
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="phase4">Phase 4 : Symbol Resolution after optimization</a>
+</div>
+
+<div class="doc_text">
+  <p>In this phase, the linker reads optimized a native object file and 
+  updates the internal global symbol table to reflect any changes. The linker 
+  also collects information about any changes in use of external symbols by 
+  LLVM bitcode files. In the example above, the linker notes that 
+  <tt>foo4()</tt> is not used any more. If dead code stripping is enabled then 
+  the linker refreshes the live symbol information appropriately and performs 
+  dead code stripping.</p>
+  <p>After this phase, the linker continues linking as if it never saw LLVM 
+  bitcode files.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+<a name="lto">libLTO</a>
+</div>
+
+<div class="doc_text">
+  <p><tt>libLTO</tt> is a shared object that is part of the LLVM tools, and 
+  is intended for use by a linker. <tt>libLTO</tt> provides an abstract C 
+  interface to use the LLVM interprocedural optimizer without exposing details 
+  of LLVM's internals. The intention is to keep the interface as stable as 
+  possible even when the LLVM optimizer continues to evolve. It should even
+  be possible for a completely different compilation technology to provide
+  a different libLTO that works with their object files and the standard
+  linker tool.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="lto_module_t">lto_module_t</a>
+</div>
+
+<div class="doc_text">
+
+<p>A non-native object file is handled via an <tt>lto_module_t</tt>.  
+The following functions allow the linker to check if a file (on disk
+or in a memory buffer) is a file which libLTO can process:</p>
+
+<pre class="doc_code">
+lto_module_is_object_file(const char*)
+lto_module_is_object_file_for_target(const char*, const char*)
+lto_module_is_object_file_in_memory(const void*, size_t)
+lto_module_is_object_file_in_memory_for_target(const void*, size_t, const char*)
+</pre>
+
+<p>If the object file can be processed by libLTO, the linker creates a
+<tt>lto_module_t</tt> by using one of</p>
+
+<pre class="doc_code">
+lto_module_create(const char*)
+lto_module_create_from_memory(const void*, size_t)
+</pre>
+
+<p>and when done, the handle is released via</p>
+
+<pre class="doc_code">
+lto_module_dispose(lto_module_t)
+</pre>
+
+<p>The linker can introspect the non-native object file by getting the number of
+symbols and getting the name and attributes of each symbol via:</p>
+
+<pre class="doc_code">
+lto_module_get_num_symbols(lto_module_t)
+lto_module_get_symbol_name(lto_module_t, unsigned int)
+lto_module_get_symbol_attribute(lto_module_t, unsigned int)
+</pre>
+
+<p>The attributes of a symbol include the alignment, visibility, and kind.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="lto_code_gen_t">lto_code_gen_t</a>
+</div>
+
+<div class="doc_text">
+
+<p>Once the linker has loaded each non-native object files into an
+<tt>lto_module_t</tt>, it can request libLTO to process them all and
+generate a native object file.  This is done in a couple of steps.
+First, a code generator is created with:</p>
+
+<pre class="doc_code">lto_codegen_create()</pre>
+
+<p>Then, each non-native object file is added to the code generator with:</p>
+
+<pre class="doc_code">
+lto_codegen_add_module(lto_code_gen_t, lto_module_t)
+</pre>
+
+<p>The linker then has the option of setting some codegen options.  Whether or
+not to generate DWARF debug info is set with:</p>
+  
+<pre class="doc_code">lto_codegen_set_debug_model(lto_code_gen_t)</pre>
+
+<p>Which kind of position independence is set with:</p>
+
+<pre class="doc_code">lto_codegen_set_pic_model(lto_code_gen_t) </pre>
+  
+<p>And each symbol that is referenced by a native object file or otherwise must
+not be optimized away is set with:</p>
+
+<pre class="doc_code">
+lto_codegen_add_must_preserve_symbol(lto_code_gen_t, const char*)
+</pre>
+
+<p>After all these settings are done, the linker requests that a native object
+file be created from the modules with the settings using:</p>
+
+<pre class="doc_code">lto_codegen_compile(lto_code_gen_t, size*)</pre>
+
+<p>which returns a pointer to a buffer containing the generated native
+object file.  The linker then parses that and links it with the rest 
+of the native object files.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  Devang Patel and Nick Kledzik<br>
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
+
diff --git a/final/docs/Makefile b/final/docs/Makefile
new file mode 100644
index 00000000000..389fd90a485
--- /dev/null
+++ b/final/docs/Makefile
@@ -0,0 +1,130 @@
+##===- docs/Makefile ---------------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL      := ..
+DIRS       := CommandGuide tutorial
+
+ifdef BUILD_FOR_WEBSITE
+PROJ_OBJ_DIR = .
+DOXYGEN = doxygen
+
+$(PROJ_OBJ_DIR)/doxygen.cfg: doxygen.cfg.in
+	cat $< | sed \
+	  -e 's/@abs_top_srcdir@/../g' \
+	  -e 's/@DOT@/dot/g' \
+	  -e 's/@PACKAGE_VERSION@/mainline/' \
+	  -e 's/@abs_top_builddir@/../g' > $@
+endif
+
+include $(LEVEL)/Makefile.common
+
+HTML       := $(wildcard $(PROJ_SRC_DIR)/*.html) \
+              $(wildcard $(PROJ_SRC_DIR)/*.css)
+IMAGES     := $(wildcard $(PROJ_SRC_DIR)/img/*.*)
+DOXYFILES  := doxygen.cfg.in doxygen.css doxygen.footer doxygen.header \
+              doxygen.intro
+EXTRA_DIST := $(HTML) $(DOXYFILES) llvm.css CommandGuide img
+
+.PHONY: install-html install-doxygen doxygen install-ocamldoc ocamldoc generated
+
+install_targets := install-html
+ifeq ($(ENABLE_DOXYGEN),1)
+install_targets += install-doxygen
+endif
+ifdef OCAMLDOC
+ifneq (,$(filter ocaml,$(BINDINGS_TO_BUILD)))
+install_targets += install-ocamldoc
+endif
+endif
+install-local:: $(install_targets)
+
+generated_targets := doxygen
+ifdef OCAMLDOC
+generated_targets += ocamldoc
+endif
+
+# Live documentation is generated for the web site using this target:
+# 'make generated BUILD_FOR_WEBSITE=1'
+generated:: $(generated_targets)
+
+install-html: $(PROJ_OBJ_DIR)/html.tar.gz
+	$(Echo) Installing HTML documentation
+	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_docsdir)/html
+	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_docsdir)/html/img
+	$(Verb) $(DataInstall) $(HTML) $(DESTDIR)$(PROJ_docsdir)/html
+	$(Verb) $(DataInstall) $(IMAGES) $(DESTDIR)$(PROJ_docsdir)/html/img
+	$(Verb) $(DataInstall) $(PROJ_OBJ_DIR)/html.tar.gz $(DESTDIR)$(PROJ_docsdir)
+
+$(PROJ_OBJ_DIR)/html.tar.gz: $(HTML)
+	$(Echo) Packaging HTML documentation
+	$(Verb) $(RM) -rf $@ $(PROJ_OBJ_DIR)/html.tar
+	$(Verb) cd $(PROJ_SRC_DIR) && \
+	  $(TAR) cf $(PROJ_OBJ_DIR)/html.tar *.html
+	$(Verb) $(GZIPBIN) $(PROJ_OBJ_DIR)/html.tar
+
+install-doxygen: doxygen
+	$(Echo) Installing doxygen documentation
+	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_docsdir)/html/doxygen
+	$(Verb) $(DataInstall) $(PROJ_OBJ_DIR)/doxygen.tar.gz $(DESTDIR)$(PROJ_docsdir)
+	$(Verb) cd $(PROJ_OBJ_DIR)/doxygen && \
+	  $(FIND) . -type f -exec \
+	    $(DataInstall) {} $(DESTDIR)$(PROJ_docsdir)/html/doxygen \;
+
+doxygen: regendoc $(PROJ_OBJ_DIR)/doxygen.tar.gz
+
+regendoc:
+	$(Echo) Building doxygen documentation
+	$(Verb) if test -e $(PROJ_OBJ_DIR)/doxygen ; then \
+	  $(RM) -rf $(PROJ_OBJ_DIR)/doxygen ; \
+	fi
+	$(Verb) $(DOXYGEN) $(PROJ_OBJ_DIR)/doxygen.cfg
+
+$(PROJ_OBJ_DIR)/doxygen.tar.gz: $(DOXYFILES) $(PROJ_OBJ_DIR)/doxygen.cfg
+	$(Echo) Packaging doxygen documentation
+	$(Verb) $(RM) -rf $@ $(PROJ_OBJ_DIR)/doxygen.tar
+	$(Verb) $(TAR) cf $(PROJ_OBJ_DIR)/doxygen.tar doxygen
+	$(Verb) $(GZIPBIN) $(PROJ_OBJ_DIR)/doxygen.tar
+	$(Verb) $(CP) $(PROJ_OBJ_DIR)/doxygen.tar.gz $(PROJ_OBJ_DIR)/doxygen/html/
+
+userloc: $(LLVM_SRC_ROOT)/docs/userloc.html
+
+$(LLVM_SRC_ROOT)/docs/userloc.html:
+	$(Echo) Making User LOC Table
+	$(Verb) cd $(LLVM_SRC_ROOT) ; ./utils/userloc.pl -details -recurse \
+	  -html lib include tools runtime utils examples autoconf test > docs/userloc.html
+
+install-ocamldoc: ocamldoc
+	$(Echo) Installing ocamldoc documentation
+	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_docsdir)/ocamldoc/html
+	$(Verb) $(DataInstall) $(PROJ_OBJ_DIR)/ocamldoc.tar.gz $(DESTDIR)$(PROJ_docsdir)
+	$(Verb) cd $(PROJ_OBJ_DIR)/ocamldoc && \
+	  $(FIND) . -type f -exec \
+	    $(DataInstall) {} $(DESTDIR)$(PROJ_docsdir)/ocamldoc/html \;
+
+ocamldoc: regen-ocamldoc
+	$(Echo) Packaging ocamldoc documentation
+	$(Verb) $(RM) -rf $(PROJ_OBJ_DIR)/ocamldoc.tar*
+	$(Verb) $(TAR) cf $(PROJ_OBJ_DIR)/ocamldoc.tar ocamldoc
+	$(Verb) $(GZIPBIN) $(PROJ_OBJ_DIR)/ocamldoc.tar
+	$(Verb) $(CP) $(PROJ_OBJ_DIR)/ocamldoc.tar.gz $(PROJ_OBJ_DIR)/ocamldoc/html/
+
+regen-ocamldoc:
+	$(Echo) Building ocamldoc documentation
+	$(Verb) if test -e $(PROJ_OBJ_DIR)/ocamldoc ; then \
+		$(RM) -rf $(PROJ_OBJ_DIR)/ocamldoc ; \
+	fi
+	$(Verb) $(MAKE) -C $(LEVEL)/bindings/ocaml ocamldoc
+	$(Verb) $(MKDIR) $(PROJ_OBJ_DIR)/ocamldoc/html
+	$(Verb) \
+		$(OCAMLDOC) -d $(PROJ_OBJ_DIR)/ocamldoc/html -sort -colorize-code -html \
+		`$(FIND) $(LEVEL)/bindings/ocaml -name "*.odoc" -exec echo -load '{}' ';'`
+
+uninstall-local::
+	$(Echo) Uninstalling Documentation
+	$(Verb) $(RM) -rf $(DESTDIR)$(PROJ_docsdir)
diff --git a/final/docs/MakefileGuide.html b/final/docs/MakefileGuide.html
new file mode 100644
index 00000000000..2ef0954a8bb
--- /dev/null
+++ b/final/docs/MakefileGuide.html
@@ -0,0 +1,1034 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>LLVM Makefile Guide</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">LLVM Makefile Guide</div>
+
+<ol>
+  <li><a href="#introduction">Introduction</a></li>
+  <li><a href="#general">General Concepts</a>
+    <ol>
+      <li><a href="#projects">Projects</a></li>
+      <li><a href="#varvals">Variable Values</a></li>
+      <li><a href="#including">Including Makefiles</a>
+        <ol>
+          <li><a href="#Makefile">Makefile</a></li>
+          <li><a href="#Makefile.common">Makefile.common</a></li>
+          <li><a href="#Makefile.config">Makefile.config</a></li>
+          <li><a href="#Makefile.rules">Makefile.rules</a></li>
+        </ol>
+      </li>
+      <li><a href="#Comments">Comments</a></li>
+    </ol>
+  </li>
+  <li><a href="#tutorial">Tutorial</a>
+    <ol>
+      <li><a href="#libraries">Libraries</a>
+        <ol>
+	  <li><a href="#BCModules">Bitcode Modules</a></li>
+	  <li><a href="#LoadableModules">Loadable Modules</a></li>
+	</ol>
+      </li>
+      <li><a href="#tools">Tools</a>
+        <ol>
+	  <li><a href="#JIT">JIT Tools</a></li>
+	</ol>
+      </li>
+      <li><a href="#projects">Projects</a></li>
+    </ol>
+  </li>
+  <li><a href="#targets">Targets Supported</a>
+    <ol>
+      <li><a href="#all">all</a></li>
+      <li><a href="#all-local">all-local</a></li>
+      <li><a href="#check">check</a></li>
+      <li><a href="#check-local">check-local</a></li>
+      <li><a href="#clean">clean</a></li>
+      <li><a href="#clean-local">clean-local</a></li>
+      <li><a href="#dist">dist</a></li>
+      <li><a href="#dist-check">dist-check</a></li>
+      <li><a href="#dist-clean">dist-clean</a></li>
+      <li><a href="#install">install</a></li>
+      <li><a href="#preconditions">preconditions</a></li>
+      <li><a href="#printvars">printvars</a></li>
+      <li><a href="#reconfigure">reconfigure</a></li>
+      <li><a href="#spotless">spotless</a></li>
+      <li><a href="#tags">tags</a></li>
+      <li><a href="#uninstall">uninstall</a></li>
+    </ol>
+  </li>
+  <li><a href="#variables">Using Variables</a>
+    <ol>
+      <li><a href="#setvars">Control Variables</a></li>
+      <li><a href="#overvars">Override Variables</a></li>
+      <li><a href="#getvars">Readable Variables</a></li>
+      <li><a href="#intvars">Internal Variables</a></li>
+    </ol>
+  </li>
+</ol>
+
+<div class="doc_author">    
+  <p>Written by <a href="mailto:reid@x10sys.com">Reid Spencer</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="introduction">Introduction </a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+  <p>This document provides <em>usage</em> information about the LLVM makefile 
+  system. While loosely patterned after the BSD makefile system, LLVM has taken 
+  a departure from BSD in order to implement additional features needed by LLVM.
+  Although makefile systems such as automake were attempted at one point, it
+  has become clear that the features needed by LLVM and the Makefile norm are 
+  too great to use a more limited tool. Consequently, LLVM requires simply GNU 
+  Make 3.79, a widely portable makefile processor. LLVM unabashedly makes heavy 
+  use of the features of GNU Make so the dependency on GNU Make is firm. If 
+  you're not familiar with <tt>make</tt>, it is recommended that you read the 
+  <a href="http://www.gnu.org/software/make/manual/make.html">GNU Makefile 
+  Manual</a>.</p>
+  <p>While this document is rightly part of the 
+  <a href="ProgrammersManual.html">LLVM Programmer's Manual</a>, it is treated
+  separately here because of the volume of content and because it is often an
+  early source of bewilderment for new developers.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="general">General Concepts</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+  <p>The LLVM Makefile System is the component of LLVM that is responsible for
+  building the software, testing it,  generating distributions, checking those
+  distributions, installing and uninstalling, etc. It consists of a several
+  files throughout the source tree. These files and other general concepts are
+  described in this section.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="projects">Projects</a></div>
+<div class="doc_text">
+  <p>The LLVM Makefile System is quite generous. It not only builds its own
+  software, but it can build yours too. Built into the system is knowledge of
+  the <tt>llvm/projects</tt> directory. Any directory under <tt>projects</tt>
+  that has both a <tt>configure</tt> script and a <tt>Makefile</tt> is assumed
+  to be a project that uses the LLVM Makefile system.  Building software that
+  uses LLVM does not require the LLVM Makefile System nor even placement in the
+  <tt>llvm/projects</tt> directory. However, doing so will allow your project
+  to get up and running quickly by utilizing the built-in features that are used
+  to compile LLVM. LLVM compiles itself using the same features of the makefile
+  system as used for projects.</p>
+  <p>For complete details on setting up your projects configuration, simply
+  mimic the <tt>llvm/projects/sample</tt> project or for further details, 
+  consult the <a href="Projects.html">Projects.html</a> page.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="varvalues">Variable Values</a></div>
+<div class="doc_text">
+  <p>To use the makefile system, you simply create a file named 
+  <tt>Makefile</tt> in your directory and declare values for certain variables. 
+  The variables and values that you select determine what the makefile system
+  will do. These variables enable rules and processing in the makefile system
+  that automatically Do The Right Thing&trade;. 
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="including">Including Makefiles</a></div>
+<div class="doc_text">
+  <p>Setting variables alone is not enough. You must include into your Makefile
+  additional files that provide the rules of the LLVM Makefile system. The 
+  various files involved are described in the sections that follow.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="Makefile">Makefile</a></div>
+<div class="doc_text">
+  <p>Each directory to participate in the build needs to have a file named
+  <tt>Makefile</tt>. This is the file first read by <tt>make</tt>. It has three
+  sections:</p>
+  <ol>
+    <li><a href="#setvars">Settable Variables</a> - Required that must be set
+    first.</li>
+    <li><a href="#Makefile.common">include <tt>$(LEVEL)/Makefile.common</tt></a>
+    - include the LLVM Makefile system.
+    <li><a href="#overvars">Override Variables</a> - Override variables set by
+    the LLVM Makefile system.
+  </ol>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="Makefile.common">Makefile.common</a>
+</div>
+<div class="doc_text">
+  <p>Every project must have a <tt>Makefile.common</tt> file at its top source 
+  directory. This file serves three purposes:</p>
+  <ol>
+    <li>It includes the project's configuration makefile to obtain values
+    determined by the <tt>configure</tt> script. This is done by including the
+    <a href="#Makefile.config"><tt>$(LEVEL)/Makefile.config</tt></a> file.</li>
+    <li>It specifies any other (static) values that are needed throughout the
+    project. Only values that are used in all or a large proportion of the
+    project's directories should be placed here.</li>
+    <li>It includes the standard rules for the LLVM Makefile system,
+    <a href="#Makefile.rules"><tt>$(LLVM_SRC_ROOT)/Makefile.rules</tt></a>. 
+    This file is the "guts" of the LLVM Makefile system.</li>
+  </ol>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="Makefile.config">Makefile.config</a>
+</div>
+<div class="doc_text">
+  <p>Every project must have a <tt>Makefile.config</tt> at the top of its
+  <em>build</em> directory. This file is <b>generated</b> by the
+  <tt>configure</tt> script from the pattern provided by the
+  <tt>Makefile.config.in</tt> file located at the top of the project's
+  <em>source</em> directory. The contents of this file depend largely on what
+  configuration items the project uses, however most projects can get what they
+  need by just relying on LLVM's configuration found in
+  <tt>$(LLVM_OBJ_ROOT)/Makefile.config</tt>.
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="Makefile.rules">Makefile.rules</a></div>
+<div class="doc_text">
+  <p>This file, located at <tt>$(LLVM_SRC_ROOT)/Makefile.rules</tt> is the heart
+  of the LLVM Makefile System. It provides all the logic, dependencies, and
+  rules for building the targets supported by the system. What it does largely
+  depends on the values of <tt>make</tt> <a href="#variables">variables</a> that
+  have been set <em>before</em> <tt>Makefile.rules</tt> is included.
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="Comments">Comments</a></div>
+<div class="doc_text">
+  <p>User Makefiles need not have comments in them unless the construction is
+  unusual or it does not strictly follow the rules and patterns of the LLVM
+  makefile system. Makefile comments are invoked with the pound (#) character.
+  The # character and any text following it, to the end of the line, are ignored
+  by <tt>make</tt>.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="tutorial">Tutorial</a></div>
+<!-- *********************************************************************** -->
+<div class="doc_text">
+  <p>This section provides some examples of the different kinds of modules you
+  can build with the LLVM makefile system. In general, each directory you 
+  provide will build a single object although that object may be composed of
+  additionally compiled components.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="libraries">Libraries</a></div>
+<div class="doc_text">
+  <p>Only a few variable definitions are needed to build a regular library.
+  Normally, the makefile system will build all the software into a single
+  <tt>libname.o</tt> (pre-linked) object. This means the library is not
+  searchable and that the distinction between compilation units has been
+  dissolved. Optionally, you can ask for a shared library (.so) or archive
+  library (.a) built.  Archive libraries are the default. For example:</p>
+  <pre><tt>
+      LIBRARYNAME = mylib
+      SHARED_LIBRARY = 1
+      ARCHIVE_LIBRARY = 1
+  </tt></pre>
+  <p>says to build a library named "mylib" with both a shared library 
+  (<tt>mylib.so</tt>) and an archive library (<tt>mylib.a</tt>) version. The
+  contents of all the
+  libraries produced will be the same, they are just constructed differently.
+  Note that you normally do not need to specify the sources involved. The LLVM
+  Makefile system will infer the source files from the contents of the source
+  directory.</p>
+  <p>The <tt>LOADABLE_MODULE=1</tt> directive can be used in conjunction with
+  <tt>SHARED_LIBRARY=1</tt> to indicate that the resulting shared library should
+  be openable with the <tt>dlopen</tt> function and searchable with the
+  <tt>dlsym</tt> function (or your operating system's equivalents). While this
+  isn't strictly necessary on Linux and a few other platforms, it is required
+  on systems like HP-UX and Darwin. You should use <tt>LOADABLE_MODULE</tt> for
+  any shared library that you intend to be loaded into an tool via the
+  <tt>-load</tt> option. See the 
+  <a href="WritingAnLLVMPass.html#makefile">WritingAnLLVMPass.html</a> document
+  for an example of why you might want to do this.
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="BCModules">Bitcode Modules</a></div>
+<div class="doc_text">
+  <p>In some situations, it is desirable to build a single bitcode module from
+  a variety of sources, instead of an archive, shared library, or bitcode 
+  library. Bitcode modules can be specified in addition to any of the other
+  types of libraries by defining the <a href="#MODULE_NAME">MODULE_NAME</a>
+  variable. For example:</p>
+  <pre><tt>
+      LIBRARYNAME = mylib
+      BYTECODE_LIBRARY = 1
+      MODULE_NAME = mymod
+  </tt></pre>
+  <p>will build a module named <tt>mymod.bc</tt> from the sources in the
+  directory. This module will be an aggregation of all the bitcode modules 
+  derived from the sources. The example will also build a bitcode archive 
+  containing a bitcode module for each compiled source file. The difference is
+  subtle, but important depending on how the module or library is to be linked.
+  </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="LoadableModules">Loadable Modules</a>
+</div>
+<div class="doc_text">
+  <p>In some situations, you need to create a loadable module. Loadable modules
+  can be loaded into programs like <tt>opt</tt> or <tt>llc</tt> to specify
+  additional passes to run or targets to support.  Loadable modules are also
+  useful for debugging a pass or providing a pass with another package if that
+  pass can't be included in LLVM.</p>
+  <p>LLVM provides complete support for building such a module. All you need to
+  do is use the LOADABLE_MODULE variable in your Makefile. For example, to 
+  build a loadable module named <tt>MyMod</tt> that uses the LLVM libraries
+  <tt>LLVMSupport.a</tt> and <tt>LLVMSystem.a</tt>, you would specify:</p>
+  <pre><tt>
+     LIBRARYNAME := MyMod
+     LOADABLE_MODULE := 1
+     LINK_COMPONENTS := support system
+  </tt></pre>
+  <p>Use of the <tt>LOADABLE_MODULE</tt> facility implies several things:</p>
+  <ol>
+    <li>There will be no "lib" prefix on the module. This differentiates it from
+    a standard shared library of the same name.</li>
+    <li>The <a href="#SHARED_LIBRARY">SHARED_LIBRARY</a> variable is turned 
+    on.</li>
+    <li>The <a href="#LINK_LIBS_IN_SHARED">LINK_LIBS_IN_SHARED</a> variable
+    is turned on.</li>
+  </ol>
+  <p>A loadable module is loaded by LLVM via the facilities of libtool's libltdl
+  library which is part of <tt>lib/System</tt> implementation.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="tools">Tools</a></div>
+<div class="doc_text">
+  <p>For building executable programs (tools), you must provide the name of the
+  tool and the names of the libraries you wish to link with the tool. For
+  example:</p>
+  <pre><tt>
+      TOOLNAME = mytool
+      USEDLIBS = mylib
+      LINK_COMPONENTS = support system
+  </tt></pre>
+  <p>says that we are to build a tool name <tt>mytool</tt> and that it requires
+  three libraries: <tt>mylib</tt>, <tt>LLVMSupport.a</tt> and
+  <tt>LLVMSystem.a</tt>.</p>
+  <p>Note that two different variables are use to indicate which libraries are
+  linked: <tt>USEDLIBS</tt> and <tt>LLVMLIBS</tt>. This distinction is necessary
+  to support projects. <tt>LLVMLIBS</tt> refers to the LLVM libraries found in 
+  the LLVM object directory. <tt>USEDLIBS</tt> refers to the libraries built by 
+  your project. In the case of building LLVM tools, <tt>USEDLIBS</tt> and 
+  <tt>LLVMLIBS</tt> can be used interchangeably since the "project" is LLVM 
+  itself and <tt>USEDLIBS</tt> refers to the same place as <tt>LLVMLIBS</tt>.
+  </p>
+  <p>Also note that there are two different ways of specifying a library: with a
+  <tt>.a</tt> suffix and without. Without the suffix, the entry refers to the
+  re-linked (.o) file which will include <em>all</em> symbols of the library.
+  This is useful, for example, to include all passes from a library of passes.
+  If the <tt>.a</tt> suffix is used then the library is linked as a searchable
+  library (with the <tt>-l</tt> option). In this case, only the symbols that are
+  unresolved <em>at that point</em> will be resolved from the library, if they
+  exist. Other (unreferenced) symbols will not be included when the <tt>.a</tt>
+  syntax is used. Note that in order to use the <tt>.a</tt> suffix, the library
+  in question must have been built with the <tt>ARCHIVE_LIBRARY</tt> option set.
+  </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="JIT">JIT Tools</a></div>
+<div class="doc_text">
+  <p>Many tools will want to use the JIT features of LLVM.  To do this, you
+     simply specify that you want an execution 'engine', and the makefiles will
+     automatically link in the appropriate JIT for the host or an interpreter
+     if none is available:</p>
+  <pre><tt>
+      TOOLNAME = my_jit_tool
+      USEDLIBS = mylib
+      LINK_COMPONENTS = engine
+  </tt></pre>
+  <p>Of course, any additional libraries may be listed as other components.  To
+  get a full understanding of how this changes the linker command, it is
+  recommended that you:</p>
+  <pre><tt>
+      cd examples/Fibonacci
+      make VERBOSE=1
+  </tt></pre>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="targets">Targets Supported</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+  <p>This section describes each of the targets that can be built using the LLVM
+  Makefile system. Any target can be invoked from any directory but not all are
+  applicable to a given directory (e.g. "check", "dist" and "install" will
+  always operate as if invoked from the top level directory).</p>
+
+  <table style="text-align:left">
+    <tr>
+      <th>Target Name</th><th>Implied Targets</th><th>Target Description</th>
+    </tr>
+    <tr><td><a href="#all"><tt>all</tt></a></td><td></td>
+      <td>Compile the software recursively. Default target.
+    </td></tr>
+    <tr><td><a href="#all-local"><tt>all-local</tt></a></td><td></td>
+      <td>Compile the software in the local directory only.
+    </td></tr>
+    <tr><td><a href="#check"><tt>check</tt></a></td><td></td>
+      <td>Change to the <tt>test</tt> directory in a project and run the
+      test suite there.
+    </td></tr>
+    <tr><td><a href="#check-local"><tt>check-local</tt></a></td><td></td>
+      <td>Run a local test suite. Generally this is only defined in the 
+        <tt>Makefile</tt> of the project's <tt>test</tt> directory.
+    </td></tr>
+    <tr><td><a href="#clean"><tt>clean</tt></a></td><td></td>
+      <td>Remove built objects recursively.
+    </td></tr>
+    <tr><td><a href="#clean-local"><tt>clean-local</tt></a></td><td></td>
+      <td>Remove built objects from the local directory only.
+    </td></tr>
+    <tr><td><a href="#dist"><tt>dist</tt></a></td><td>all</td>
+      <td>Prepare a source distribution tarball.
+    </td></tr>
+    <tr><td><a href="#dist-check"><tt>dist-check</tt></a></td><td>all</td>
+      <td>Prepare a source distribution tarball and check that it builds.
+    </td></tr>
+    <tr><td><a href="#dist-clean"><tt>dist-clean</tt></a></td><td>clean</td>
+      <td>Clean source distribution tarball temporary files.
+    </td></tr>
+    <tr><td><a href="#install"><tt>install</tt></a></td><td>all</td>
+      <td>Copy built objects to installation directory.
+    </td></tr>
+    <tr><td><a href="#preconditions"><tt>preconditions</tt></a></td><td>all</td>
+      <td>Check to make sure configuration and makefiles are up to date.
+    </td></tr>
+    <tr><td><a href="#printvars"><tt>printvars</tt></a></td><td>all</td>
+      <td>Prints variables defined by the makefile system (for debugging).
+    </td></tr>
+    <tr><td><a href="#tags"><tt>tags</tt></a></td><td></td>
+      <td>Make C and C++ tags files for emacs and vi.
+    </td></tr>
+    <tr><td><a href="#uninstall"><tt>uninstall</tt></a></td><td></td>
+      <td>Remove built objects from installation directory.
+    </td></tr>
+  </table>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="all">all (default)</a></div>
+<div class="doc_text">
+  <p>When you invoke <tt>make</tt> with no arguments, you are implicitly
+  instructing it to seek the "all" target (goal). This target is used for
+  building the software recursively and will do different things in different 
+  directories.  For example, in a <tt>lib</tt> directory, the "all" target will 
+  compile source files and generate libraries. But, in a <tt>tools</tt> 
+  directory, it will link libraries and generate executables.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="all-local">all-local</a></div>
+<div class="doc_text">
+  <p>This target is the same as <a href="#all">all</a> but it operates only on
+  the current directory instead of recursively.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="check">check</a></div>
+<div class="doc_text">
+  <p>This target can be invoked from anywhere within a project's directories
+  but always invokes the <a href="#check-local"><tt>check-local</tt></a> target 
+  in the project's <tt>test</tt> directory, if it exists and has a 
+  <tt>Makefile</tt>. A warning is produced otherwise.  If 
+  <a href="#TESTSUITE"><tt>TESTSUITE</tt></a> is defined on the <tt>make</tt>
+  command line, it will be passed down to the invocation of 
+  <tt>make check-local</tt> in the <tt>test</tt> directory. The intended usage 
+  for this is to assist in running specific suites of tests. If
+  <tt>TESTSUITE</tt> is not set, the implementation of <tt>check-local</tt> 
+  should run all normal tests.  It is up to the project to define what 
+  different values for <tt>TESTSUTE</tt> will do. See the 
+  <a href="TestingGuide.html">TestingGuide</a> for further details.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="check-local">check-local</a></div>
+<div class="doc_text">
+  <p>This target should be implemented by the <tt>Makefile</tt> in the project's
+  <tt>test</tt> directory. It is invoked by the <tt>check</tt> target elsewhere.
+  Each project is free to define the actions of <tt>check-local</tt> as 
+  appropriate for that project. The LLVM project itself uses dejagnu to run a 
+  suite of feature and regresson tests. Other projects may choose to use 
+  dejagnu or any other testing mechanism.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="clean">clean</a></div>
+<div class="doc_text">
+  <p>This target cleans the build directory, recursively removing all things
+  that the Makefile builds. The cleaning rules have been made guarded so they 
+  shouldn't go awry (via <tt>rm -f $(UNSET_VARIABLE)/*</tt> which will attempt
+  to erase the entire directory structure.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="clean-local">clean-local</a></div>
+<div class="doc_text">
+  <p>This target does the same thing as <tt>clean</tt> but only for the current
+  (local) directory.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="dist">dist</a></div>
+<div class="doc_text">
+  <p>This target builds a distribution tarball. It first builds the entire
+  project using the <tt>all</tt> target and then tars up the necessary files and
+  compresses it. The generated tarball is sufficient for a casual source 
+  distribution, but probably not for a release (see <tt>dist-check</tt>).</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="dist-check">dist-check</a></div>
+<div class="doc_text">
+  <p>This target does the same thing as the <tt>dist</tt> target but also checks
+  the distribution tarball. The check is made by unpacking the tarball to a new
+  directory, configuring it, building it, installing it, and then verifying that
+  the installation results are correct (by comparing to the original build).
+  This target can take a long time to run but should be done before a release
+  goes out to make sure that the distributed tarball can actually be built into
+  a working release.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="dist-clean">dist-clean</a></div>
+<div class="doc_text">
+  <p>This is a special form of the <tt>clean</tt> clean target. It performs a
+  normal <tt>clean</tt> but also removes things pertaining to building the
+  distribution.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="install">install</a></div>
+<div class="doc_text">
+  <p>This target finalizes shared objects and executables and copies all
+  libraries, headers, executables and documentation to the directory given 
+  with the <tt>--prefix</tt> option to <tt>configure</tt>.  When completed, 
+  the prefix directory will have everything needed to <b>use</b> LLVM. </p>
+  <p>The LLVM makefiles can generate complete <b>internal</b> documentation 
+  for all the classes by using <tt>doxygen</tt>. By default, this feature is 
+  <b>not</b> enabled because it takes a long time and generates a massive 
+  amount of data (>100MB). If you want this feature, you must configure LLVM
+  with the --enable-doxygen switch and ensure that a modern version of doxygen
+  (1.3.7 or later) is available in your <tt>PATH</tt>. You can download 
+  doxygen from 
+  <a href="http://www.stack.nl/~dimitri/doxygen/download.html#latestsrc">
+  here</a>.
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="preconditions">preconditions</a></div>
+<div class="doc_text">
+  <p>This utility target checks to see if the <tt>Makefile</tt> in the object
+  directory is older than the <tt>Makefile</tt> in the source directory and
+  copies it if so. It also reruns the <tt>configure</tt> script if that needs to
+  be done and rebuilds the <tt>Makefile.config</tt> file similarly. Users may
+  overload this target to ensure that sanity checks are run <em>before</em> any
+  building of targets as all the targets depend on <tt>preconditions</tt>.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="printvars">printvars</a></div>
+<div class="doc_text">
+  <p>This utility target just causes the LLVM makefiles to print out some of 
+  the makefile variables so that you can double check how things are set. </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="reconfigure">reconfigure</a></div>
+<div class="doc_text">
+  <p>This utility target will force a reconfigure of LLVM or your project. It 
+  simply runs <tt>$(PROJ_OBJ_ROOT)/config.status --recheck</tt> to rerun the
+  configuration tests and rebuild the configured files. This isn't generally
+  useful as the makefiles will reconfigure themselves whenever its necessary.
+  </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="spotless">spotless</a></div>
+<div class="doc_text">
+  <p>This utility target, only available when <tt>$(PROJ_OBJ_ROOT)</tt> is not 
+  the same as <tt>$(PROJ_SRC_ROOT)</tt>, will completely clean the
+  <tt>$(PROJ_OBJ_ROOT)</tt> directory by removing its content entirely and 
+  reconfiguring the directory. This returns the <tt>$(PROJ_OBJ_ROOT)</tt> 
+  directory to a completely fresh state. All content in the directory except 
+  configured files and top-level makefiles will be lost.</p>
+  <div class="doc_warning"><p>Use with caution.</p></div>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="tags">tags</a></div>
+<div class="doc_text">
+  <p>This target will generate a <tt>TAGS</tt> file in the top-level source
+  directory. It is meant for use with emacs, XEmacs, or ViM. The TAGS file
+  provides an index of symbol definitions so that the editor can jump you to the
+  definition quickly. </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="uninstall">uninstall</a></div>
+<div class="doc_text">
+  <p>This target is the opposite of the <tt>install</tt> target. It removes the
+  header, library and executable files from the installation directories. Note
+  that the directories themselves are not removed because it is not guaranteed
+  that LLVM is the only thing installing there (e.g. --prefix=/usr).</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="variables">Variables</a></div>
+<!-- *********************************************************************** -->
+<div class="doc_text">
+  <p>Variables are used to tell the LLVM Makefile System what to do and to
+  obtain information from it. Variables are also used internally by the LLVM
+  Makefile System. Variable names that contain only the upper case alphabetic
+  letters and underscore are intended for use by the end user. All other
+  variables are internal to the LLVM Makefile System and should not be relied
+  upon nor modified. The sections below describe how to use the LLVM Makefile 
+  variables.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="setvars">Control Variables</a></div>
+<div class="doc_text">
+  <p>Variables listed in the table below should be set <em>before</em> the 
+  inclusion of <a href="#Makefile.common"><tt>$(LEVEL)/Makefile.common</tt></a>.
+  These variables provide input to the LLVM make system that tell it what to do 
+  for the current directory.</p>
+  <dl>
+    <dt><a name="BUILD_ARCHIVE"><tt>BUILD_ARCHIVE</tt></a></dt>
+    <dd>If set to any value, causes an archive (.a) library to be built.</dd>
+    <dt><a name="BUILT_SOURCES"><tt>BUILT_SOURCES</tt></a></dt>
+    <dd>Specifies a set of source files that are generated from other source
+    files. These sources will be built before any other target processing to 
+    ensure they are present.</dd>
+    <dt><a name="BYTECODE_LIBRARY"><tt>BYTECODE_LIBRARY</tt></a></dt>
+    <dd>If set to any value, causes a bitcode library (.bc) to be built.</dd>
+    <dt><a name="CONFIG_FILES"><tt>CONFIG_FILES</tt></a></dt>
+    <dd>Specifies a set of configuration files to be installed.</dd>
+    <dt><a name="DEBUG_SYMBOLS"><tt>DEBUG_SYMBOLS</tt></a></dt>
+    <dd>If set to any value, causes the build to include debugging
+    symbols even in optimized objects, libraries and executables. This
+    alters the flags specified to the compilers and linkers. Debugging
+    isn't fun in an optimized build, but it is possible.</dd>
+    <dt><a name="DIRS"><tt>DIRS</tt></a></dt>
+    <dd>Specifies a set of directories, usually children of the current
+    directory, that should also be made using the same goal. These directories 
+    will be built serially.</dd>
+    <dt><a name="DISABLE_AUTO_DEPENDENCIES"><tt>DISABLE_AUTO_DEPENDENCIES</tt></a></dt>
+    <dd>If set to any value, causes the makefiles to <b>not</b> automatically
+    generate dependencies when running the compiler. Use of this feature is
+    discouraged and it may be removed at a later date.</dd>
+    <dt><a name="ENABLE_OPTIMIZED"><tt>ENABLE_OPTIMIZED</tt></a></dt>
+    <dd>If set to 1, causes the build to generate optimized objects,
+    libraries and executables. This alters the flags specified to the compilers
+    and linkers. Generally debugging won't be a fun experience with an optimized
+    build.</dd>
+    <dt><a name="ENABLE_PROFILING"><tt>ENABLE_PROFILING</tt></a></dt>
+    <dd>If set to 1, causes the build to generate both optimized and 
+    profiled objects, libraries and executables. This alters the flags specified
+    to the compilers and linkers to ensure that profile data can be collected
+    from the tools built. Use the <tt>gprof</tt> tool to analyze the output from
+    the profiled tools (<tt>gmon.out</tt>).</dd>
+    <dt><a name="DISABLE_ASSERTIONS"><tt>DISABLE_ASSERTIONS</tt></a></dt>
+    <dd>If set to 1, causes the build to disable assertions, even if 
+    building a debug or profile build.  This will exclude all assertion check
+    code from the build. LLVM will execute faster, but with little help when
+    things go wrong.</dd>
+    <dt><a name="EXPERIMENTAL_DIRS"><tt>EXPERIMENTAL_DIRS</tt></a></dt>
+    <dd>Specify a set of directories that should be built, but if they fail, it
+    should not cause the build to fail. Note that this should only be used 
+    temporarily while code is being written.</dd> 
+    <dt><a name="EXPORTED_SYMBOL_FILE"><tt>EXPORTED_SYMBOL_FILE</tt></a></dt>
+    <dd>Specifies the name of a single file that contains a list of the 
+    symbols to be exported by the linker. One symbol per line.</dd>
+    <dt><a name="EXPORTED_SYMBOL_LIST"><tt>EXPORTED_SYMBOL_LIST</tt></a></dt>
+    <dd>Specifies a set of symbols to be exported by the linker.</dd>
+    <dt><a name="EXTRA_DIST"><tt>EXTRA_DIST</tt></a></dt>
+    <dd>Specifies additional files that should be distributed with LLVM. All
+    source files, all built sources, all Makefiles, and most documentation files
+    will be automatically distributed. Use this variable to distribute any 
+    files that are not automatically distributed.</dd>
+    <dt><a name="KEEP_SYMBOLS"><tt>KEEP_SYMBOLS</tt></a></dt>
+    <dd>If set to any value, specifies that when linking executables the
+    makefiles should retain debug symbols in the executable. Normally, symbols
+    are stripped from the executable.</dd>
+    <dt><a name="LEVEL"><tt>LEVEL</tt></a><small>(required)</small></dt>
+    <dd>Specify the level of nesting from the top level. This variable must be
+    set in each makefile as it is used to find the top level and thus the other
+    makefiles.</dd>
+    <dt><a name="LIBRARYNAME"><tt>LIBRARYNAME</tt></a></dt>
+    <dd>Specify the name of the library to be built. (Required For
+    Libraries)</dd>
+    <dt><a name="LINK_COMPONENTS"><tt>LINK_COMPONENTS</tt></a></dt>
+    <dd>When specified for building a tool, the value of this variable will be
+    passed to the <tt>llvm-config</tt> tool to generate a link line for the
+    tool. Unlike <tt>USEDLIBS</tt> and <tt>LLVMLIBS</tt>, not all libraries need
+    to be specified. The <tt>llvm-config</tt> tool will figure out the library
+    dependencies and add any libraries that are needed. The <tt>USEDLIBS</tt>
+    variable can still be used in conjunction with <tt>LINK_COMPONENTS</tt> so
+    that additional project-specific libraries can be linked with the LLVM 
+    libraries specified by <tt>LINK_COMPONENTS</tt></dd>
+    <dt><a name="LINK_LIBS_IN_SHARED"><tt>LINK_LIBS_IN_SHARED</tt></a></dt>
+    <dd>By default, shared library linking will ignore any libraries specified
+    with the <a href="LLVMLIBS">LLVMLIBS</a> or <a href="USEDLIBS">USEDLIBS</a>.
+    This prevents shared libs from including things that will be in the LLVM
+    tool the shared library will be loaded into. However, sometimes it is useful
+    to link certain libraries into your shared library and this option enables
+    that feature.</dd>
+    <dt><a name="LLVMLIBS"><tt>LLVMLIBS</tt></a></dt>
+    <dd>Specifies the set of libraries from the LLVM $(ObjDir) that will be
+    linked into the tool or library.</dd>
+    <dt><a name="LOADABLE_MODULE"><tt>LOADABLE_MODULE</tt></a></dt>
+    <dd>If set to any value, causes the shared library being built to also be
+    a loadable module. Loadable modules can be opened with the dlopen() function
+    and searched with dlsym (or the operating system's equivalent). Note that
+    setting this variable without also setting <tt>SHARED_LIBRARY</tt> will have
+    no effect.</dd>
+    <dt><a name="MODULE_NAME"><tt>MODULE_NAME</tt></a></dt>
+    <dd>Specifies the name of a bitcode module to be created. A bitcode 
+    module can be specified in conjunction with other kinds of library builds 
+    or by itself. It constructs from the sources a single linked bitcode 
+    file.</dd>
+    <dt><a name="NO_INSTALL"><tt>NO_INSTALL</tt></a></dt>
+    <dd>Specifies that the build products of the directory should not be
+    installed but should be built even if the <tt>install</tt> target is given.
+    This is handy for directories that build libraries or tools that are only
+    used as part of the build process, such as code generators (e.g.
+    <tt>tblgen</tt>).</dd>
+    <dt><a name="OPTIONAL_DIRS"><tt>OPTIONAL_DIRS</tt></a></dt>
+    <dd>Specify a set of directories that may be built, if they exist, but its
+    not an error for them not to exist.</dd>
+    <dt><a name="PARALLEL_DIRS"><tt>PARALLEL_DIRS</tt></a></dt>
+    <dd>Specify a set of directories to build recursively and in parallel if
+    the -j option was used with <tt>make</tt>.</dd>
+    <dt><a name="SHARED_LIBRARY"><tt>SHARED_LIBRARY</tt></a></dt>
+    <dd>If set to any value, causes a shared library (.so) to be built in
+    addition to any other kinds of libraries. Note that this option will cause
+    all source files to be built twice: once with options for position
+    independent code and once without. Use it only where you really need a
+    shared library.</dd>
+    <dt><a name="SOURCES"><tt>SOURCES</tt><small>(optional)</small></a></dt>
+    <dd>Specifies the list of source files in the current directory to be
+    built. Source files of any type may be specified (programs, documentation, 
+    config files, etc.). If not specified, the makefile system will infer the
+    set of source files from the files present in the current directory.</dd>
+    <dt><a name="SUFFIXES"><tt>SUFFIXES</tt></a></dt>
+    <dd>Specifies a set of filename suffixes that occur in suffix match rules.
+    Only set this if your local <tt>Makefile</tt> specifies additional suffix
+    match rules.</dd> 
+    <dt><a name="TARGET"><tt>TARGET</tt></a></dt>
+    <dd>Specifies the name of the LLVM code generation target that the
+    current directory builds. Setting this variable enables additional rules to
+    build <tt>.inc</tt> files from <tt>.td</tt> files. </dd>
+    <dt><a name="TESTSUITE"><tt>TESTSUITE</tt></a></dt>
+    <dd>Specifies the directory of tests to run in <tt>llvm/test</tt>.</dd>
+    <dt><a name="TOOLNAME"><tt>TOOLNAME</tt></a></dt>
+    <dd>Specifies the name of the tool that the current directory should
+    build.</dd>
+    <dt><a name="TOOL_VERBOSE"><tt>TOOL_VERBOSE</tt></a></dt>
+    <dd>Implies VERBOSE and also tells each tool invoked to be verbose. This is
+    handy when you're trying to see the sub-tools invoked by each tool invoked 
+    by the makefile. For example, this will pass <tt>-v</tt> to the GCC 
+    compilers which causes it to print out the command lines it uses to invoke
+    sub-tools (compiler, assembler, linker).</dd>
+    <dt><a name="USEDLIBS"><tt>USEDLIBS</tt></a></dt>
+    <dd>Specifies the list of project libraries that will be linked into the
+    tool or library.</dd>
+    <dt><a name="VERBOSE"><tt>VERBOSE</tt></a></dt>
+    <dd>Tells the Makefile system to produce detailed output of what it is doing
+    instead of just summary comments. This will generate a LOT of output.</dd>
+  </dl>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="overvars">Override Variables</a></div>
+<div class="doc_text">
+  <p>Override variables can be used to override the default
+  values provided by the LLVM makefile system. These variables can be set in 
+  several ways:</p>
+  <ul>
+    <li>In the environment (e.g. setenv, export) -- not recommended.</li>
+    <li>On the <tt>make</tt> command line -- recommended.</li>
+    <li>On the <tt>configure</tt> command line</li>
+    <li>In the Makefile (only <em>after</em> the inclusion of <a
+    href="#Makefile.common"><tt>$(LEVEL)/Makefile.common</tt></a>).</li>
+  </ul>
+  <p>The override variables are given below:</p>
+  <dl>
+    <dt><a name="AR"><tt>AR</tt></a> <small>(defaulted)</small></dt>
+    <dd>Specifies the path to the <tt>ar</tt> tool.</dd>
+    <dt><a name="PROJ_OBJ_DIR"><tt>PROJ_OBJ_DIR</tt></a></dt>
+    <dd>The directory into which the products of build rules will be placed.
+    This might be the same as 
+    <a href="#PROJ_SRC_DIR"><tt>PROJ_SRC_DIR</tt></a> but typically is
+    not.</dd>
+    <dt><a name="PROJ_SRC_DIR"><tt>PROJ_SRC_DIR</tt></a></dt>
+    <dd>The directory which contains the source files to be built.</dd>
+    <dt><a name="BUILD_EXAMPLES"><tt>BUILD_EXAMPLES</tt></a></dt>
+    <dd>If set to 1, build examples in <tt>examples</tt> and (if building
+    Clang) <tt>tools/clang/examples</tt> directories.</dd>
+    <dt><a name="BZIP2"><tt>BZIP2</tt></a><small>(configured)</small></dt>
+    <dd>The path to the <tt>bzip2</tt> tool.</dd>
+    <dt><a name="CC"><tt>CC</tt></a><small>(configured)</small></dt>
+    <dd>The path to the 'C' compiler.</dd>
+    <dt><a name="CFLAGS"><tt>CFLAGS</tt></a></dt>
+    <dd>Additional flags to be passed to the 'C' compiler.</dd>
+    <dt><a name="CXX"><tt>CXX</tt></a></dt>
+    <dd>Specifies the path to the C++ compiler.</dd>
+    <dt><a name="CXXFLAGS"><tt>CXXFLAGS</tt></a></dt>
+    <dd>Additional flags to be passed to the C++ compiler.</dd>
+    <dt><a name="DATE"><tt>DATE<small>(configured)</small></tt></a></dt>
+    <dd>Specifies the path to the <tt>date</tt> program or any program that can
+    generate the current date and time on its standard output</dd>
+    <dt><a name="DOT"><tt>DOT</tt></a><small>(configured)</small></dt>
+    <dd>Specifies the path to the <tt>dot</tt> tool or <tt>false</tt> if there
+    isn't one.</dd>
+    <dt><a name="ECHO"><tt>ECHO</tt></a><small>(configured)</small></dt>
+    <dd>Specifies the path to the <tt>echo</tt> tool for printing output.</dd>
+    <dt><a name="EXEEXT"><tt>EXEEXT</tt></a><small>(configured)</small></dt>
+    <dd>Provides the extension to be used on executables built by the makefiles.
+    The value may be empty on platforms that do not use file extensions for
+    executables (e.g. Unix).</dd>
+    <dt><a name="INSTALL"><tt>INSTALL</tt></a><small>(configured)</small></dt>
+    <dd>Specifies the path to the <tt>install</tt> tool.</dd>
+    <dt><a name="LDFLAGS"><tt>LDFLAGS</tt></a><small>(configured)</small></dt>
+    <dd>Allows users to specify additional flags to pass to the linker.</dd>
+    <dt><a name="LIBS"><tt>LIBS</tt></a><small>(configured)</small></dt>
+    <dd>The list of libraries that should be linked with each tool.</dd>
+    <dt><a name="LIBTOOL"><tt>LIBTOOL</tt></a><small>(configured)</small></dt>
+    <dd>Specifies the path to the <tt>libtool</tt> tool. This tool is renamed
+    <tt>mklib</tt> by the <tt>configure</tt> script and always located in the 
+    <dt><a name="LLVMAS"><tt>LLVMAS</tt></a><small>(defaulted)</small></dt>
+    <dd>Specifies the path to the <tt>llvm-as</tt> tool.</dd>
+    <dt><a name="LLVMCC"><tt>LLVMCC</tt></a></dt>
+    <dd>Specifies the path to the LLVM capable compiler.</dd>
+    <dt><a name="LLVMCXX"><tt>LLVMCXX</tt></a></dt>
+    <dd>Specifies the path to the LLVM C++ capable compiler.</dd>
+    <dt><a name="LLVMGCC"><tt>LLVMGCC</tt></a><small>(defaulted)</small></dt>
+    <dd>Specifies the path to the LLVM version of the GCC 'C' Compiler</dd>
+    <dt><a name="LLVMGXX"><tt>LLVMGXX</tt></a><small>(defaulted)</small></dt>
+    <dd>Specifies the path to the LLVM version of the GCC C++ Compiler</dd>
+    <dt><a name="LLVMLD"><tt>LLVMLD</tt></a><small>(defaulted)</small></dt>
+    <dd>Specifies the path to the LLVM bitcode linker tool</dd>
+    <dt><a name="LLVM_OBJ_ROOT"><tt>LLVM_OBJ_ROOT</tt></a><small>(configured)
+    </small></dt>
+    <dd>Specifies the top directory into which the output of the build is
+    placed.</dd>
+    <dt><a name="LLVM_SRC_ROOT"><tt>LLVM_SRC_ROOT</tt></a><small>(configured)
+    </small></dt>
+    <dd>Specifies the top directory in which the sources are found.</dd>
+    <dt><a name="LLVM_TARBALL_NAME"><tt>LLVM_TARBALL_NAME</tt></a>
+    <small>(configured)</small></dt>
+    <dd>Specifies the name of the distribution tarball to create. This is
+    configured from the name of the project and its version number.</dd>
+    <dt><a name="MKDIR"><tt>MKDIR</tt></a><small>(defaulted)</small></dt>
+    <dd>Specifies the path to the <tt>mkdir</tt> tool that creates
+    directories.</dd>
+    <dt><a name="ONLY_TOOLS"><tt>ONLY_TOOLS</tt></a></dt>
+    <dd>If set, specifies the list of tools to build.</dd>
+    <dt><a name="PLATFORMSTRIPOPTS"><tt>PLATFORMSTRIPOPTS</tt></a></dt>
+    <dd>The options to provide to the linker to specify that a stripped (no
+    symbols) executable should be built.</dd>
+    <dt><a name="RANLIB"><tt>RANLIB</tt></a><small>(defaulted)</small></dt>
+    <dd>Specifies the path to the <tt>ranlib</tt> tool.</dd>
+    <dt><a name="RM"><tt>RM</tt></a><small>(defaulted)</small></dt>
+    <dd>Specifies the path to the <tt>rm</tt> tool.</dd>
+    <dt><a name="SED"><tt>SED</tt></a><small>(defaulted)</small></dt>
+    <dd>Specifies the path to the <tt>sed</tt> tool.</dd>
+    <dt><a name="SHLIBEXT"><tt>SHLIBEXT</tt></a><small>(configured)</small></dt>
+    <dd>Provides the filename extension to use for shared libraries.</dd>
+    <dt><a name="TBLGEN"><tt>TBLGEN</tt></a><small>(defaulted)</small></dt>
+    <dd>Specifies the path to the <tt>tblgen</tt> tool.</dd>
+    <dt><a name="TAR"><tt>TAR</tt></a><small>(defaulted)</small></dt>
+    <dd>Specifies the path to the <tt>tar</tt> tool.</dd>
+    <dt><a name="ZIP"><tt>ZIP</tt></a><small>(defaulted)</small></dt>
+    <dd>Specifies the path to the <tt>zip</tt> tool.</dd>
+  </dl>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="getvars">Readable Variables</a></div>
+<div class="doc_text">
+  <p>Variables listed in the table below can be used by the user's Makefile but
+  should not be changed. Changing the value will generally cause the build to go
+  wrong, so don't do it.</p>
+  <dl>
+    <dt><a name="bindir"><tt>bindir</tt></a></dt>
+    <dd>The directory into which executables will ultimately be installed. This
+    value is derived from the <tt>--prefix</tt> option given to
+    <tt>configure</tt>.</dd>
+    <dt><a name="BuildMode"><tt>BuildMode</tt></a></dt>
+    <dd>The name of the type of build being performed: Debug, Release, or 
+    Profile</dd>
+    <dt><a name="bitcode_libdir"><tt>bytecode_libdir</tt></a></dt>
+    <dd>The directory into which bitcode libraries will ultimately be 
+    installed.  This value is derived from the <tt>--prefix</tt> option given to
+    <tt>configure</tt>.</dd>
+    <dt><a name="ConfigureScriptFLAGS"><tt>ConfigureScriptFLAGS</tt></a></dt>
+    <dd>Additional flags given to the <tt>configure</tt> script when
+    reconfiguring.</dd>
+    <dt><a name="DistDir"><tt>DistDir</tt></a></dt>
+    <dd>The <em>current</em> directory for which a distribution copy is being
+    made.</dd>
+    <dt><a name="Echo"><tt>Echo</tt></a></dt>
+    <dd>The LLVM Makefile System output command. This provides the
+    <tt>llvm[n]</tt> prefix and starts with @ so the command itself is not
+    printed by <tt>make</tt>.</dd>
+    <dt><a name="EchoCmd"><tt>EchoCmd</tt></a></dt>
+    <dd> Same as <a href="#Echo"><tt>Echo</tt></a> but without the leading @.
+    </dd>
+    <dt><a name="includedir"><tt>includedir</tt></a></dt>
+    <dd>The directory into which include files will ultimately be installed. 
+    This value is derived from the <tt>--prefix</tt> option given to
+    <tt>configure</tt>.</dd>
+    <dt><a name="libdir"><tt>libdir</tt></a></dt><dd></dd>
+    <dd>The directory into which native libraries will ultimately be installed. 
+    This value is derived from the <tt>--prefix</tt> option given to
+    <tt>configure</tt>.</dd>
+    <dt><a name="LibDir"><tt>LibDir</tt></a></dt>
+    <dd>The configuration specific directory into which libraries are placed
+    before installation.</dd>
+    <dt><a name="MakefileConfig"><tt>MakefileConfig</tt></a></dt>
+    <dd>Full path of the <tt>Makefile.config</tt> file.</dd>
+    <dt><a name="MakefileConfigIn"><tt>MakefileConfigIn</tt></a></dt>
+    <dd>Full path of the <tt>Makefile.config.in</tt> file.</dd>
+    <dt><a name="ObjDir"><tt>ObjDir</tt></a></dt>
+    <dd>The configuration and directory specific directory where build objects
+    (compilation results) are placed.</dd>
+    <dt><a name="SubDirs"><tt>SubDirs</tt></a></dt>
+    <dd>The complete list of sub-directories of the current directory as
+    specified by other variables.</dd>
+    <dt><a name="Sources"><tt>Sources</tt></a></dt>
+    <dd>The complete list of source files.</dd>
+    <dt><a name="sysconfdir"><tt>sysconfdir</tt></a></dt>
+    <dd>The directory into which configuration files will ultimately be
+    installed. This value is derived from the <tt>--prefix</tt> option given to
+    <tt>configure</tt>.</dd>
+    <dt><a name="ToolDir"><tt>ToolDir</tt></a></dt>
+    <dd>The configuration specific directory into which executables are placed
+    before they are installed.</dd>
+    <dt><a name="TopDistDir"><tt>TopDistDir</tt></a></dt>
+    <dd>The top most directory into which the distribution files are copied.
+    </dd>
+    <dt><a name="Verb"><tt>Verb</tt></a></dt>
+    <dd>Use this as the first thing on your build script lines to enable or
+    disable verbose mode. It expands to either an @ (quiet mode) or nothing
+    (verbose mode). </dd>
+  </dl>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="intvars">Internal Variables</a></div>
+<div class="doc_text">
+  <p>Variables listed below are used by the LLVM Makefile System 
+  and considered internal. You should not use these variables under any
+  circumstances.</p>
+  <p><tt>
+    Archive
+    AR.Flags
+    BaseNameSources
+    BCCompile.C
+    BCCompile.CXX
+    BCLinkLib
+    C.Flags
+    Compile.C
+    CompileCommonOpts
+    Compile.CXX
+    ConfigStatusScript
+    ConfigureScript
+    CPP.Flags
+    CPP.Flags 
+    CXX.Flags
+    DependFiles
+    DestArchiveLib
+    DestBitcodeLib
+    DestModule
+    DestSharedLib
+    DestTool
+    DistAlways
+    DistCheckDir
+    DistCheckTop
+    DistFiles
+    DistName
+    DistOther
+    DistSources
+    DistSubDirs
+    DistTarBZ2
+    DistTarGZip
+    DistZip
+    ExtraLibs
+    FakeSources
+    INCFiles
+    InternalTargets
+    LD.Flags
+    LibName.A
+    LibName.BC
+    LibName.LA
+    LibName.O
+    LibTool.Flags
+    Link
+    LinkModule
+    LLVMLibDir
+    LLVMLibsOptions
+    LLVMLibsPaths
+    LLVMToolDir
+    LLVMUsedLibs
+    LocalTargets
+    Module
+    ObjectsBC
+    ObjectsLO
+    ObjectsO
+    ObjMakefiles
+    ParallelTargets
+    PreConditions
+    ProjLibsOptions
+    ProjLibsPaths
+    ProjUsedLibs
+    Ranlib
+    RecursiveTargets
+    SrcMakefiles
+    Strip
+    StripWarnMsg
+    TableGen
+    TDFiles
+    ToolBuildPath
+    TopLevelTargets
+    UserTargets
+  </tt></p>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/Packaging.html b/final/docs/Packaging.html
new file mode 100644
index 00000000000..217590e6c24
--- /dev/null
+++ b/final/docs/Packaging.html
@@ -0,0 +1,118 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>Advice on Packaging LLVM</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">Advice on Packaging LLVM</div>
+<ol>
+  <li><a href="#overview">Overview</a></li>
+  <li><a href="#compilation">Compile Flags</a></li>
+  <li><a href="#cxx-features">C++ Features</a></li>
+  <li><a href="#shared-library">Shared Library</a></li>
+  <li><a href="#deps">Dependencies</a></li>
+</ol>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="overview">Overview</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+
+<p>LLVM sets certain default configure options to make sure our developers don't
+break things for constrained platforms.  These settings are not optimal for most
+desktop systems, and we hope that packagers (e.g., Redhat, Debian, MacPorts,
+etc.) will tweak them.  This document lists settings we suggest you tweak.
+</p>
+
+<p>LLVM's API changes with each release, so users are likely to want, for
+example, both LLVM-2.6 and LLVM-2.7 installed at the same time to support apps
+developed against each.
+</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="compilation">Compile Flags</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+
+<p>LLVM runs much more quickly when it's optimized and assertions are removed.
+However, such a build is currently incompatible with users who build without
+defining NDEBUG, and the lack of assertions makes it hard to debug problems in
+user code.  We recommend allowing users to install both optimized and debug
+versions of LLVM in parallel.  The following configure flags are relevant:
+</p>
+
+<dl>
+  <dt><tt>--disable-assertions</tt></dt><dd>Builds LLVM with <tt>NDEBUG</tt>
+  defined.  Changes the LLVM ABI.  Also available by setting
+  <tt>DISABLE_ASSERTIONS=0|1</tt> in <tt>make</tt>'s environment.  This defaults
+  to enabled regardless of the optimization setting, but it slows things
+  down.</dd>
+
+  <dt><tt>--enable-debug-symbols</tt></dt><dd>Builds LLVM with <tt>-g</tt>.
+  Also available by setting <tt>DEBUG_SYMBOLS=0|1</tt> in <tt>make</tt>'s
+  environment.  This defaults to disabled when optimizing, so you should turn it
+  back on to let users debug their programs.</dd>
+
+  <dt><tt>--enable-optimized</tt></dt><dd>(For svn checkouts) Builds LLVM with
+  <tt>-O2</tt> and, by default, turns off debug symbols.  Also available by
+  setting <tt>ENABLE_OPTIMIZED=0|1</tt> in <tt>make</tt>'s environment.  This
+  defaults to enabled when not in a checkout.</dd>
+</dl>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="cxx-features">C++ Features</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+
+<dl>
+  <dt>RTTI</dt><dd>LLVM disables RTTI by default.  Add <tt>REQUIRES_RTTI=1</tt>
+  to your environment while running <tt>make</tt> to re-enable it.  This will
+  allow users to build with RTTI enabled and still inherit from LLVM
+  classes.</dd>
+</dl>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="shared-library">Shared Library</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+
+<p>Configure with <tt>--enable-shared</tt> to build
+<tt>libLLVM-<var>major</var>.<var>minor</var>.(so|dylib)</tt> and link the tools
+against it.  This saves lots of binary size at the cost of some startup time.
+</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="deps">Dependencies</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+
+<dl>
+<dt><tt>--enable-libffi</tt></dt><dd>Depend on <a
+href="http://sources.redhat.com/libffi/">libffi</a> to allow the LLVM
+interpreter to call external functions.</dd>
+<dt><tt>--with-oprofile</tt></dt><dd>Depend on <a
+href="http://oprofile.sourceforge.net/doc/devel/index.html">libopagent</a>
+(>=version 0.9.4) to let the LLVM JIT tell oprofile about function addresses and
+line numbers.</dd>
+</dl>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/Passes.html b/final/docs/Passes.html
new file mode 100644
index 00000000000..a5349c3b85c
--- /dev/null
+++ b/final/docs/Passes.html
@@ -0,0 +1,2249 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>LLVM's Analysis and Transform Passes</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+</head>
+<body>
+
+<!--
+
+If Passes.html is up to date, the following "one-liner" should print
+an empty diff.
+
+egrep -e '^<tr><td><a href="#.*">-.*</a></td><td>.*</td></tr>$' \
+      -e '^  <a name=".*">.*</a>$' < Passes.html >html; \
+perl >help <<'EOT' && diff -u help html; rm -f help html
+open HTML, "<Passes.html" or die "open: Passes.html: $!\n";
+while (<HTML>) {
+  m:^<tr><td><a href="#(.*)">-.*</a></td><td>.*</td></tr>$: or next;
+  $order{$1} = sprintf("%03d", 1 + int %order);
+}
+open HELP, "../Release/bin/opt -help|" or die "open: opt -help: $!\n";
+while (<HELP>) {
+  m:^    -([^ ]+) +- (.*)$: or next;
+  my $o = $order{$1};
+  $o = "000" unless defined $o;
+  push @x, "$o<tr><td><a href=\"#$1\">-$1</a></td><td>$2</td></tr>\n";
+  push @y, "$o  <a name=\"$1\">-$1: $2</a>\n";
+}
+@x = map { s/^\d\d\d//; $_ } sort @x;
+@y = map { s/^\d\d\d//; $_ } sort @y;
+print @x, @y;
+EOT
+
+This (real) one-liner can also be helpful when converting comments to HTML:
+
+perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "  <p>\n" if !$on && $_ =~ /\S/; print "  </p>\n" if $on && $_ =~ /^\s*$/; print "  $_\n"; $on = ($_ =~ /\S/); } print "  </p>\n" if $on'
+
+  -->
+
+<div class="doc_title">LLVM's Analysis and Transform Passes</div>
+
+<ol>
+  <li><a href="#intro">Introduction</a></li>
+  <li><a href="#analyses">Analysis Passes</a>
+  <li><a href="#transforms">Transform Passes</a></li>
+  <li><a href="#utilities">Utility Passes</a></li>
+</ol>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>
+            and Gordon Henriksen</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section"> <a name="intro">Introduction</a> </div>
+<div class="doc_text">
+  <p>This document serves as a high level summary of the optimization features 
+  that LLVM provides. Optimizations are implemented as Passes that traverse some
+  portion of a program to either collect information or transform the program.
+  The table below divides the passes that LLVM provides into three categories.
+  Analysis passes compute information that other passes can use or for debugging
+  or program visualization purposes. Transform passes can use (or invalidate)
+  the analysis passes. Transform passes all mutate the program in some way. 
+  Utility passes provides some utility but don't otherwise fit categorization.
+  For example passes to extract functions to bitcode or write a module to
+  bitcode are neither analysis nor transform passes.
+  <p>The table below provides a quick summary of each pass and links to the more
+  complete pass description later in the document.</p>
+</div>
+<div class="doc_text" >
+<table>
+<tr><th colspan="2"><b>ANALYSIS PASSES</b></th></tr>
+<tr><th>Option</th><th>Name</th></tr>
+<tr><td><a href="#aa-eval">-aa-eval</a></td><td>Exhaustive Alias Analysis Precision Evaluator</td></tr>
+<tr><td><a href="#basicaa">-basicaa</a></td><td>Basic Alias Analysis (default AA impl)</td></tr>
+<tr><td><a href="#basiccg">-basiccg</a></td><td>Basic CallGraph Construction</td></tr>
+<tr><td><a href="#codegenprepare">-codegenprepare</a></td><td>Optimize for code generation</td></tr>
+<tr><td><a href="#count-aa">-count-aa</a></td><td>Count Alias Analysis Query Responses</td></tr>
+<tr><td><a href="#debug-aa">-debug-aa</a></td><td>AA use debugger</td></tr>
+<tr><td><a href="#domfrontier">-domfrontier</a></td><td>Dominance Frontier Construction</td></tr>
+<tr><td><a href="#domtree">-domtree</a></td><td>Dominator Tree Construction</td></tr>
+<tr><td><a href="#dot-callgraph">-dot-callgraph</a></td><td>Print Call Graph to 'dot' file</td></tr>
+<tr><td><a href="#dot-cfg">-dot-cfg</a></td><td>Print CFG of function to 'dot' file</td></tr>
+<tr><td><a href="#dot-cfg-only">-dot-cfg-only</a></td><td>Print CFG of function to 'dot' file (with no function bodies)</td></tr>
+<tr><td><a href="#dot-dom">-dot-dom</a></td><td>Print dominator tree of function to 'dot' file</td></tr>
+<tr><td><a href="#dot-dom-only">-dot-dom-only</a></td><td>Print dominator tree of function to 'dot' file (with no function bodies)</td></tr>
+<tr><td><a href="#dot-postdom">-dot-postdom</a></td><td>Print post dominator tree of function to 'dot' file</td></tr>
+<tr><td><a href="#dot-postdom-only">-dot-postdom-only</a></td><td>Print post dominator tree of function to 'dot' file (with no function bodies)</td></tr>
+<tr><td><a href="#globalsmodref-aa">-globalsmodref-aa</a></td><td>Simple mod/ref analysis for globals</td></tr>
+<tr><td><a href="#instcount">-instcount</a></td><td>Counts the various types of Instructions</td></tr>
+<tr><td><a href="#interprocedural-aa-eval">-interprocedural-aa-eval</a></td><td>Exhaustive Interprocedural Alias Analysis Precision Evaluator</td></tr>
+<tr><td><a href="#interprocedural-basic-aa">-interprocedural-basic-aa</a></td><td>Interprocedural Basic Alias Analysis</td></tr>
+<tr><td><a href="#intervals">-intervals</a></td><td>Interval Partition Construction</td></tr>
+<tr><td><a href="#iv-users">-iv-users</a></td><td>Induction Variable Users</td></tr>
+<tr><td><a href="#lazy-value-info">-lazy-value-info</a></td><td>Lazy Value Information Analysis</td></tr>
+<tr><td><a href="#lda">-lda</a></td><td>Loop Dependence Analysis</td></tr>
+<tr><td><a href="#libcall-aa">-libcall-aa</a></td><td>LibCall Alias Analysis</td></tr>
+<tr><td><a href="#lint">-lint</a></td><td>Check for common errors in LLVM IR</td></tr>
+<tr><td><a href="#live-values">-live-values</a></td><td>Value Liveness Analysis</td></tr>
+<tr><td><a href="#loops">-loops</a></td><td>Natural Loop Information</td></tr>
+<tr><td><a href="#memdep">-memdep</a></td><td>Memory Dependence Analysis</td></tr>
+<tr><td><a href="#module-debuginfo">-module-debuginfo</a></td><td>Prints module debug info metadata</td></tr>
+<tr><td><a href="#no-aa">-no-aa</a></td><td>No Alias Analysis (always returns 'may' alias)</td></tr>
+<tr><td><a href="#no-profile">-no-profile</a></td><td>No Profile Information</td></tr>
+<tr><td><a href="#pointertracking">-pointertracking</a></td><td>Track pointer bounds</td></tr>
+<tr><td><a href="#postdomfrontier">-postdomfrontier</a></td><td>Post-Dominance Frontier Construction</td></tr>
+<tr><td><a href="#postdomtree">-postdomtree</a></td><td>Post-Dominator Tree Construction</td></tr>
+<tr><td><a href="#print-alias-sets">-print-alias-sets</a></td><td>Alias Set Printer</td></tr>
+<tr><td><a href="#print-callgraph">-print-callgraph</a></td><td>Print a call graph</td></tr>
+<tr><td><a href="#print-callgraph-sccs">-print-callgraph-sccs</a></td><td>Print SCCs of the Call Graph</td></tr>
+<tr><td><a href="#print-cfg-sccs">-print-cfg-sccs</a></td><td>Print SCCs of each function CFG</td></tr>
+<tr><td><a href="#print-dbginfo">-print-dbginfo</a></td><td>Print debug info in human readable form</td></tr>
+<tr><td><a href="#print-dom-info">-print-dom-info</a></td><td>Dominator Info Printer</td></tr>
+<tr><td><a href="#print-externalfnconstants">-print-externalfnconstants</a></td><td>Print external fn callsites passed constants</td></tr>
+<tr><td><a href="#print-function">-print-function</a></td><td>Print function to stderr</td></tr>
+<tr><td><a href="#print-module">-print-module</a></td><td>Print module to stderr</td></tr>
+<tr><td><a href="#print-used-types">-print-used-types</a></td><td>Find Used Types</td></tr>
+<tr><td><a href="#profile-estimator">-profile-estimator</a></td><td>Estimate profiling information</td></tr>
+<tr><td><a href="#profile-loader">-profile-loader</a></td><td>Load profile information from llvmprof.out</td></tr>
+<tr><td><a href="#regions">-regions</a></td><td>Detect single entry single exit regions in a function</td></tr>
+<tr><td><a href="#profile-verifier">-profile-verifier</a></td><td>Verify profiling information</td></tr>
+<tr><td><a href="#scalar-evolution">-scalar-evolution</a></td><td>Scalar Evolution Analysis</td></tr>
+<tr><td><a href="#scev-aa">-scev-aa</a></td><td>ScalarEvolution-based Alias Analysis</td></tr>
+<tr><td><a href="#targetdata">-targetdata</a></td><td>Target Data Layout</td></tr>
+
+
+<tr><th colspan="2"><b>TRANSFORM PASSES</b></th></tr>
+<tr><th>Option</th><th>Name</th></tr>
+<tr><td><a href="#abcd">-abcd</a></td><td>Remove redundant conditional branches</td></tr>
+<tr><td><a href="#adce">-adce</a></td><td>Aggressive Dead Code Elimination</td></tr>
+<tr><td><a href="#always-inline">-always-inline</a></td><td>Inliner for always_inline functions</td></tr>
+<tr><td><a href="#argpromotion">-argpromotion</a></td><td>Promote 'by reference' arguments to scalars</td></tr>
+<tr><td><a href="#block-placement">-block-placement</a></td><td>Profile Guided Basic Block Placement</td></tr>
+<tr><td><a href="#break-crit-edges">-break-crit-edges</a></td><td>Break critical edges in CFG</td></tr>
+<tr><td><a href="#codegenprepare">-codegenprepare</a></td><td>Prepare a function for code generation </td></tr>
+<tr><td><a href="#constmerge">-constmerge</a></td><td>Merge Duplicate Global Constants</td></tr>
+<tr><td><a href="#constprop">-constprop</a></td><td>Simple constant propagation</td></tr>
+<tr><td><a href="#dce">-dce</a></td><td>Dead Code Elimination</td></tr>
+<tr><td><a href="#deadargelim">-deadargelim</a></td><td>Dead Argument Elimination</td></tr>
+<tr><td><a href="#deadtypeelim">-deadtypeelim</a></td><td>Dead Type Elimination</td></tr>
+<tr><td><a href="#die">-die</a></td><td>Dead Instruction Elimination</td></tr>
+<tr><td><a href="#dse">-dse</a></td><td>Dead Store Elimination</td></tr>
+<tr><td><a href="#functionattrs">-functionattrs</a></td><td>Deduce function attributes</td></tr>
+<tr><td><a href="#globaldce">-globaldce</a></td><td>Dead Global Elimination</td></tr>
+<tr><td><a href="#globalopt">-globalopt</a></td><td>Global Variable Optimizer</td></tr>
+<tr><td><a href="#gvn">-gvn</a></td><td>Global Value Numbering</td></tr>
+<tr><td><a href="#indvars">-indvars</a></td><td>Canonicalize Induction Variables</td></tr>
+<tr><td><a href="#inline">-inline</a></td><td>Function Integration/Inlining</td></tr>
+<tr><td><a href="#insert-edge-profiling">-insert-edge-profiling</a></td><td>Insert instrumentation for edge profiling</td></tr>
+<tr><td><a href="#insert-optimal-edge-profiling">-insert-optimal-edge-profiling</a></td><td>Insert optimal instrumentation for edge profiling</td></tr>
+<tr><td><a href="#instcombine">-instcombine</a></td><td>Combine redundant instructions</td></tr>
+<tr><td><a href="#internalize">-internalize</a></td><td>Internalize Global Symbols</td></tr>
+<tr><td><a href="#ipconstprop">-ipconstprop</a></td><td>Interprocedural constant propagation</td></tr>
+<tr><td><a href="#ipsccp">-ipsccp</a></td><td>Interprocedural Sparse Conditional Constant Propagation</td></tr>
+<tr><td><a href="#jump-threading">-jump-threading</a></td><td>Thread control through conditional blocks </td></tr>
+<tr><td><a href="#lcssa">-lcssa</a></td><td>Loop-Closed SSA Form Pass</td></tr>
+<tr><td><a href="#licm">-licm</a></td><td>Loop Invariant Code Motion</td></tr>
+<tr><td><a href="#loop-deletion">-loop-deletion</a></td><td>Dead Loop Deletion Pass </td></tr>
+<tr><td><a href="#loop-extract">-loop-extract</a></td><td>Extract loops into new functions</td></tr>
+<tr><td><a href="#loop-extract-single">-loop-extract-single</a></td><td>Extract at most one loop into a new function</td></tr>
+<tr><td><a href="#loop-index-split">-loop-index-split</a></td><td>Index Split Loops</td></tr>
+<tr><td><a href="#loop-reduce">-loop-reduce</a></td><td>Loop Strength Reduction</td></tr>
+<tr><td><a href="#loop-rotate">-loop-rotate</a></td><td>Rotate Loops</td></tr>
+<tr><td><a href="#loop-unroll">-loop-unroll</a></td><td>Unroll loops</td></tr>
+<tr><td><a href="#loop-unswitch">-loop-unswitch</a></td><td>Unswitch loops</td></tr>
+<tr><td><a href="#loop-simplify">-loop-simplify</a></td><td>Canonicalize natural loops</td></tr>
+<tr><td><a href="#loweratomic">-loweratomic</a></td><td>Lower atomic intrinsics</td></tr>
+<tr><td><a href="#lowerinvoke">-lowerinvoke</a></td><td>Lower invoke and unwind, for unwindless code generators</td></tr>
+<tr><td><a href="#lowersetjmp">-lowersetjmp</a></td><td>Lower Set Jump</td></tr>
+<tr><td><a href="#lowerswitch">-lowerswitch</a></td><td>Lower SwitchInst's to branches</td></tr>
+<tr><td><a href="#mem2reg">-mem2reg</a></td><td>Promote Memory to Register</td></tr>
+<tr><td><a href="#memcpyopt">-memcpyopt</a></td><td>Optimize use of memcpy and friends</td></tr>
+<tr><td><a href="#mergefunc">-mergefunc</a></td><td>Merge Functions</td></tr>
+<tr><td><a href="#mergereturn">-mergereturn</a></td><td>Unify function exit nodes</td></tr>
+<tr><td><a href="#partial-inliner">-partial-inliner</a></td><td>Partial Inliner</td></tr>
+<tr><td><a href="#partialspecialization">-partialspecialization</a></td><td>Partial Specialization</td></tr>
+<tr><td><a href="#prune-eh">-prune-eh</a></td><td>Remove unused exception handling info</td></tr>
+<tr><td><a href="#reassociate">-reassociate</a></td><td>Reassociate expressions</td></tr>
+<tr><td><a href="#reg2mem">-reg2mem</a></td><td>Demote all values to stack slots</td></tr>
+<tr><td><a href="#scalarrepl">-scalarrepl</a></td><td>Scalar Replacement of Aggregates</td></tr>
+<tr><td><a href="#sccp">-sccp</a></td><td>Sparse Conditional Constant Propagation</td></tr>
+<tr><td><a href="#sink">-sink</a></td><td>Code Sinking</td></tr>
+<tr><td><a href="#simplify-libcalls">-simplify-libcalls</a></td><td>Simplify well-known library calls</td></tr>
+<tr><td><a href="#simplify-libcalls-halfpowr">-simplify-libcalls-halfpowr</a></td><td>Simplify half_powr library calls</td></tr>
+<tr><td><a href="#simplifycfg">-simplifycfg</a></td><td>Simplify the CFG</td></tr>
+<tr><td><a href="#split-geps">-split-geps</a></td><td>Split complex GEPs into simple GEPs</td></tr>
+<tr><td><a href="#ssi">-ssi</a></td><td>Static Single Information Construction</td></tr>
+<tr><td><a href="#ssi-everything">-ssi-everything</a></td><td>Static Single Information Construction (everything, intended for debugging)</td></tr>
+<tr><td><a href="#strip">-strip</a></td><td>Strip all symbols from a module</td></tr>
+<tr><td><a href="#strip-dead-debug-info">-strip-dead-debug-info</a></td><td>Strip debug info for unused symbols</td></tr>
+<tr><td><a href="#strip-dead-prototypes">-strip-dead-prototypes</a></td><td>Remove unused function declarations</td></tr>
+<tr><td><a href="#strip-debug-declare">-strip-debug-declare</a></td><td>Strip all llvm.dbg.declare intrinsics</td></tr>
+<tr><td><a href="#strip-nondebug">-strip-nondebug</a></td><td>Strip all symbols, except dbg symbols, from a module</td></tr>
+<tr><td><a href="#sretpromotion">-sretpromotion</a></td><td>Promote sret arguments</td></tr>
+<tr><td><a href="#tailcallelim">-tailcallelim</a></td><td>Tail Call Elimination</td></tr>
+<tr><td><a href="#tailduplicate">-tailduplicate</a></td><td>Tail Duplication</td></tr>
+
+
+<tr><th colspan="2"><b>UTILITY PASSES</b></th></tr>
+<tr><th>Option</th><th>Name</th></tr>
+<tr><td><a href="#deadarghaX0r">-deadarghaX0r</a></td><td>Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)</td></tr>
+<tr><td><a href="#extract-blocks">-extract-blocks</a></td><td>Extract Basic Blocks From Module (for bugpoint use)</td></tr>
+<tr><td><a href="#instnamer">-instnamer</a></td><td>Assign names to anonymous instructions</td></tr>
+<tr><td><a href="#preverify">-preverify</a></td><td>Preliminary module verification</td></tr>
+<tr><td><a href="#verify">-verify</a></td><td>Module Verifier</td></tr>
+<tr><td><a href="#view-cfg">-view-cfg</a></td><td>View CFG of function</td></tr>
+<tr><td><a href="#view-cfg-only">-view-cfg-only</a></td><td>View CFG of function (with no function bodies)</td></tr>
+<tr><td><a href="#view-dom">-view-dom</a></td><td>View dominator tree of function</td></tr>
+<tr><td><a href="#view-dom-only">-view-dom-only</a></td><td>View dominator tree of function (with no function bodies)</td></tr>
+<tr><td><a href="#view-postdom">-view-postdom</a></td><td>View post dominator tree of function</td></tr>
+<tr><td><a href="#view-postdom-only">-view-postdom-only</a></td><td>View post dominator tree of function (with no function bodies)</td></tr>
+</table>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section"> <a name="example">Analysis Passes</a></div>
+<div class="doc_text">
+  <p>This section describes the LLVM Analysis Passes.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="aa-eval">-aa-eval: Exhaustive Alias Analysis Precision Evaluator</a>
+</div>
+<div class="doc_text">
+  <p>This is a simple N^2 alias analysis accuracy evaluator.
+  Basically, for each function in the program, it simply queries to see how the
+  alias analysis implementation answers alias queries between each pair of
+  pointers in the function.</p>
+
+  <p>This is inspired and adapted from code by: Naveen Neelakantam, Francesco
+  Spadini, and Wojciech Stryjewski.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="basicaa">-basicaa: Basic Alias Analysis (default AA impl)</a>
+</div>
+<div class="doc_text">
+  <p>
+  This is the default implementation of the Alias Analysis interface
+  that simply implements a few identities (two different globals cannot alias,
+  etc), but otherwise does no analysis.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="basiccg">-basiccg: Basic CallGraph Construction</a>
+</div>
+<div class="doc_text">
+  <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="codegenprepare">-codegenprepare: Optimize for code generation</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass munges the code in the input function to better prepare it for
+  SelectionDAG-based code generation.  This works around limitations in it's
+  basic-block-at-a-time approach.  It should eventually be removed.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="count-aa">-count-aa: Count Alias Analysis Query Responses</a>
+</div>
+<div class="doc_text">
+  <p>
+  A pass which can be used to count how many alias queries
+  are being made and how the alias analysis implementation being used responds.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="debug-aa">-debug-aa: AA use debugger</a>
+</div>
+<div class="doc_text">
+  <p>
+  This simple pass checks alias analysis users to ensure that if they
+  create a new value, they do not query AA without informing it of the value.
+  It acts as a shim over any other AA pass you want.
+  </p>
+  
+  <p>
+  Yes keeping track of every value in the program is expensive, but this is 
+  a debugging pass.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="domfrontier">-domfrontier: Dominance Frontier Construction</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass is a simple dominator construction algorithm for finding forward
+  dominator frontiers.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="domtree">-domtree: Dominator Tree Construction</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass is a simple dominator construction algorithm for finding forward
+  dominators.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="dot-callgraph">-dot-callgraph: Print Call Graph to 'dot' file</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass, only available in <code>opt</code>, prints the call graph into a
+  <code>.dot</code> graph.  This graph can then be processed with the "dot" tool
+  to convert it to postscript or some other suitable format.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="dot-cfg">-dot-cfg: Print CFG of function to 'dot' file</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass, only available in <code>opt</code>, prints the control flow graph
+  into a <code>.dot</code> graph.  This graph can then be processed with the
+  "dot" tool to convert it to postscript or some other suitable format.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="dot-cfg-only">-dot-cfg-only: Print CFG of function to 'dot' file (with no function bodies)</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass, only available in <code>opt</code>, prints the control flow graph
+  into a <code>.dot</code> graph, omitting the function bodies.  This graph can
+  then be processed with the "dot" tool to convert it to postscript or some
+  other suitable format.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="dot-dom">-dot-dom: Print dominator tree of function to 'dot' file</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass, only available in <code>opt</code>, prints the dominator tree
+  into a <code>.dot</code> graph.  This graph can then be processed with the
+  "dot" tool to convert it to postscript or some other suitable format.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="dot-dom-only">-dot-dom-only: Print dominator tree of function to 'dot' file (with no
+  function bodies)</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass, only available in <code>opt</code>, prints the dominator tree
+  into a <code>.dot</code> graph, omitting the function bodies.  This graph can
+  then be processed with the "dot" tool to convert it to postscript or some
+  other suitable format.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="dot-postdom">-dot-postdom: Print post dominator tree of function to 'dot' file</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass, only available in <code>opt</code>, prints the post dominator tree
+  into a <code>.dot</code> graph.  This graph can then be processed with the
+  "dot" tool to convert it to postscript or some other suitable format.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="dot-postdom-only">-dot-postdom-only: Print post dominator tree of function to 'dot' file
+  (with no function bodies)</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass, only available in <code>opt</code>, prints the post dominator tree
+  into a <code>.dot</code> graph, omitting the function bodies.  This graph can
+  then be processed with the "dot" tool to convert it to postscript or some
+  other suitable format.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="globalsmodref-aa">-globalsmodref-aa: Simple mod/ref analysis for globals</a>
+</div>
+<div class="doc_text">
+  <p>
+  This simple pass provides alias and mod/ref information for global values
+  that do not have their address taken, and keeps track of whether functions
+  read or write memory (are "pure").  For this simple (but very common) case,
+  we can provide pretty accurate and useful information.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="instcount">-instcount: Counts the various types of Instructions</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass collects the count of all instructions and reports them
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="interprocedural-aa-eval">-interprocedural-aa-eval: Exhaustive Interprocedural Alias Analysis Precision Evaluator</a>
+</div>
+<div class="doc_text">
+  <p>This pass implements a simple N^2 alias analysis accuracy evaluator.
+  Basically, for each function in the program, it simply queries to see how the
+  alias analysis implementation answers alias queries between each pair of
+  pointers in the function.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="interprocedural-basic-aa">-interprocedural-basic-aa: Interprocedural Basic Alias Analysis</a>
+</div>
+<div class="doc_text">
+  <p>This pass defines the default implementation of the Alias Analysis interface
+  that simply implements a few identities (two different globals cannot alias,
+  etc), but otherwise does no analysis.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="intervals">-intervals: Interval Partition Construction</a>
+</div>
+<div class="doc_text">
+  <p>
+  This analysis calculates and represents the interval partition of a function,
+  or a preexisting interval partition.
+  </p>
+  
+  <p>
+  In this way, the interval partition may be used to reduce a flow graph down
+  to its degenerate single node interval partition (unless it is irreducible).
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="iv-users">-iv-users: Induction Variable Users</a>
+</div>
+<div class="doc_text">
+  <p>Bookkeeping for "interesting" users of expressions computed from 
+  induction variables.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="lazy-value-info">-lazy-value-info: Lazy Value Information Analysis</a>
+</div>
+<div class="doc_text">
+  <p>Interface for lazy computation of value constraint information.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="lda">-lda: Loop Dependence Analysis</a>
+</div>
+<div class="doc_text">
+  <p>Loop dependence analysis framework, which is used to detect dependences in
+  memory accesses in loops.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="libcall-aa">-libcall-aa: LibCall Alias Analysis</a>
+</div>
+<div class="doc_text">
+  <p>LibCall Alias Analysis.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="lint">-lint: Check for common errors in LLVM IR</a>
+</div>
+<div class="doc_text">
+  <p>This pass statically checks for common and easily-identified constructs
+  which produce undefined or likely unintended behavior in LLVM IR.</p>
+ 
+  <p>It is not a guarantee of correctness, in two ways. First, it isn't
+  comprehensive. There are checks which could be done statically which are
+  not yet implemented. Some of these are indicated by TODO comments, but
+  those aren't comprehensive either. Second, many conditions cannot be
+  checked statically. This pass does no dynamic instrumentation, so it
+  can't check for all possible problems.</p>
+  
+  <p>Another limitation is that it assumes all code will be executed. A store
+  through a null pointer in a basic block which is never reached is harmless,
+  but this pass will warn about it anyway.</p>
+ 
+  <p>Optimization passes may make conditions that this pass checks for more or
+  less obvious. If an optimization pass appears to be introducing a warning,
+  it may be that the optimization pass is merely exposing an existing
+  condition in the code.</p>
+  
+  <p>This code may be run before instcombine. In many cases, instcombine checks
+  for the same kinds of things and turns instructions with undefined behavior
+  into unreachable (or equivalent). Because of this, this pass makes some
+  effort to look through bitcasts and so on.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="live-values">-live-values: Values Liveness Analysis</a>
+</div>
+<div class="doc_text">
+  <p>LLVM IR Value liveness analysis pass.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="loops">-loops: Natural Loop Construction</a>
+</div>
+<div class="doc_text">
+  <p>
+  This analysis is used to identify natural loops and determine the loop depth
+  of various nodes of the CFG.  Note that the loops identified may actually be
+  several natural loops that share the same header node... not just a single
+  natural loop.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="memdep">-memdep: Memory Dependence Analysis</a>
+</div>
+<div class="doc_text">
+  <p>
+  An analysis that determines, for a given memory operation, what preceding 
+  memory operations it depends on.  It builds on alias analysis information, and 
+  tries to provide a lazy, caching interface to a common kind of alias 
+  information query.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="module-debuginfo">-module-debuginfo: Prints module debug info metadata</a>
+</div>
+<div class="doc_text">
+  <p>This pass decodes the debug info metadata in a module and prints in a
+ (sufficiently-prepared-) human-readable form.
+
+ For example, run this pass from opt along with the -analyze option, and
+ it'll print to standard output.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="no-aa">-no-aa: No Alias Analysis (always returns 'may' alias)</a>
+</div>
+<div class="doc_text">
+  <p>
+  Always returns "I don't know" for alias queries.  NoAA is unlike other alias
+  analysis implementations, in that it does not chain to a previous analysis. As
+  such it doesn't follow many of the rules that other alias analyses must.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="no-profile">-no-profile: No Profile Information</a>
+</div>
+<div class="doc_text">
+  <p>
+  The default "no profile" implementation of the abstract
+  <code>ProfileInfo</code> interface.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="pointertracking">-pointertracking: Track pointer bounds.</a>
+</div>
+<div class="doc_text">
+  <p>Tracking of pointer bounds.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="postdomfrontier">-postdomfrontier: Post-Dominance Frontier Construction</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass is a simple post-dominator construction algorithm for finding
+  post-dominator frontiers.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="postdomtree">-postdomtree: Post-Dominator Tree Construction</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass is a simple post-dominator construction algorithm for finding
+  post-dominators.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="print-alias-sets">-print-alias-sets: Alias Set Printer</a>
+</div>
+<div class="doc_text">
+  <p>Yet to be written.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="print-callgraph">-print-callgraph: Print a call graph</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass, only available in <code>opt</code>, prints the call graph to
+  standard error in a human-readable form.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="print-callgraph-sccs">-print-callgraph-sccs: Print SCCs of the Call Graph</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass, only available in <code>opt</code>, prints the SCCs of the call
+  graph to standard error in a human-readable form.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="print-cfg-sccs">-print-cfg-sccs: Print SCCs of each function CFG</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass, only available in <code>opt</code>, prints the SCCs of each
+  function CFG to standard error in a human-readable form.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="print-dbginfo">-print-dbginfo: Print debug info in human readable form</a>
+</div>
+<div class="doc_text">
+  <p>Pass that prints instructions, and associated debug info:</p>
+  <ul>
+  
+  <li>source/line/col information</li>
+  <li>original variable name</li>
+  <li>original type name</li>
+  </ul>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="print-dom-info">-print-dom-info: Dominator Info Printer</a>
+</div>
+<div class="doc_text">
+  <p>Dominator Info Printer.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="print-externalfnconstants">-print-externalfnconstants: Print external fn callsites passed constants</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass, only available in <code>opt</code>, prints out call sites to
+  external functions that are called with constant arguments.  This can be
+  useful when looking for standard library functions we should constant fold
+  or handle in alias analyses.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="print-function">-print-function: Print function to stderr</a>
+</div>
+<div class="doc_text">
+  <p>
+  The <code>PrintFunctionPass</code> class is designed to be pipelined with
+  other <code>FunctionPass</code>es, and prints out the functions of the module
+  as they are processed.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="print-module">-print-module: Print module to stderr</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass simply prints out the entire module when it is executed.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="print-used-types">-print-used-types: Find Used Types</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass is used to seek out all of the types in use by the program.  Note
+  that this analysis explicitly does not include types only used by the symbol
+  table.
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="profile-estimator">-profile-estimator: Estimate profiling information</a>
+</div>
+<div class="doc_text">
+  <p>Profiling information that estimates the profiling information 
+  in a very crude and unimaginative way.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="profile-loader">-profile-loader: Load profile information from llvmprof.out</a>
+</div>
+<div class="doc_text">
+  <p>
+  A concrete implementation of profiling information that loads the information
+  from a profile dump file.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="profile-verifier">-profile-verifier: Verify profiling information</a>
+</div>
+<div class="doc_text">
+  <p>Pass that checks profiling information for plausibility.</p>
+</div>
+<div class="doc_subsection">
+  <a name="regions">-regions: Detect single entry single exit regions in a function</a>
+</div>
+<div class="doc_text">
+  <p>
+  The <code>RegionInfo</code> pass detects single entry single exit regions in a
+  function, where a region is defined as any subgraph that is connected to the
+  remaining graph at only two spots. Furthermore, an hierarchical region tree is
+  built.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="scalar-evolution">-scalar-evolution: Scalar Evolution Analysis</a>
+</div>
+<div class="doc_text">
+  <p>
+  The <code>ScalarEvolution</code> analysis can be used to analyze and
+  catagorize scalar expressions in loops.  It specializes in recognizing general
+  induction variables, representing them with the abstract and opaque
+  <code>SCEV</code> class.  Given this analysis, trip counts of loops and other
+  important properties can be obtained.
+  </p>
+  
+  <p>
+  This analysis is primarily useful for induction variable substitution and
+  strength reduction.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="scev-aa">-scev-aa: </a>
+</div>
+<div class="doc_text">
+  <p>Simple alias analysis implemented in terms of ScalarEvolution queries.
+ 
+  This differs from traditional loop dependence analysis in that it tests
+  for dependencies within a single iteration of a loop, rather than
+  dependencies between different iterations.
+ 
+  ScalarEvolution has a more complete understanding of pointer arithmetic
+  than BasicAliasAnalysis' collection of ad-hoc analyses.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="strip-dead-debug-info">-strip-dead-debug-info: Strip debug info for unused symbols</a>
+</div>
+<div class="doc_text">
+  <p>
+  performs code stripping. this transformation can delete:
+  </p>
+  
+  <ol>
+    <li>names for virtual registers</li>
+    <li>symbols for internal globals and functions</li>
+    <li>debug information</li>
+  </ol>
+  
+  <p>
+  note that this transformation makes code much less readable, so it should
+  only be used in situations where the <tt>strip</tt> utility would be used,
+  such as reducing code size or making it harder to reverse engineer code.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="targetdata">-targetdata: Target Data Layout</a>
+</div>
+<div class="doc_text">
+  <p>Provides other passes access to information on how the size and alignment
+  required by the the target ABI for various data types.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section"> <a name="transform">Transform Passes</a></div>
+<div class="doc_text">
+  <p>This section describes the LLVM Transform Passes.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="abcd">-abcd: Remove redundant conditional branches</a>
+</div>
+<div class="doc_text">
+  <p>ABCD removes conditional branch instructions that can be proved redundant.
+  With the SSI representation, each variable has a constraint. By analyzing these 
+  constraints we can prove that a branch is redundant. When a branch is proved 
+  redundant it means that one direction will always be taken; thus, we can change 
+  this branch into an unconditional jump.</p>
+  <p>It is advisable to run <a href="#simplifycfg">SimplifyCFG</a> and 
+  <a href="#adce">Aggressive Dead Code Elimination</a> after ABCD 
+  to clean up the code.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="adce">-adce: Aggressive Dead Code Elimination</a>
+</div>
+<div class="doc_text">
+  <p>ADCE aggressively tries to eliminate code. This pass is similar to
+  <a href="#dce">DCE</a> but it assumes that values are dead until proven 
+  otherwise. This is similar to <a href="#sccp">SCCP</a>, except applied to 
+  the liveness of values.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="always-inline">-always-inline: Inliner for always_inline functions</a>
+</div>
+<div class="doc_text">
+  <p>A custom inliner that handles only functions that are marked as 
+  "always inline".</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="argpromotion">-argpromotion: Promote 'by reference' arguments to scalars</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass promotes "by reference" arguments to be "by value" arguments.  In
+  practice, this means looking for internal functions that have pointer
+  arguments.  If it can prove, through the use of alias analysis, that an
+  argument is *only* loaded, then it can pass the value into the function
+  instead of the address of the value.  This can cause recursive simplification
+  of code and lead to the elimination of allocas (especially in C++ template
+  code like the STL).
+  </p>
+  
+  <p>
+  This pass also handles aggregate arguments that are passed into a function,
+  scalarizing them if the elements of the aggregate are only loaded.  Note that
+  it refuses to scalarize aggregates which would require passing in more than
+  three operands to the function, because passing thousands of operands for a
+  large array or structure is unprofitable!
+  </p>
+  
+  <p>
+  Note that this transformation could also be done for arguments that are only
+  stored to (returning the value instead), but does not currently.  This case
+  would be best handled when and if LLVM starts supporting multiple return
+  values from functions.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="block-placement">-block-placement: Profile Guided Basic Block Placement</a>
+</div>
+<div class="doc_text">
+  <p>This pass is a very simple profile guided basic block placement algorithm.
+  The idea is to put frequently executed blocks together at the start of the
+  function and hopefully increase the number of fall-through conditional
+  branches.  If there is no profile information for a particular function, this
+  pass basically orders blocks in depth-first order.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="break-crit-edges">-break-crit-edges: Break critical edges in CFG</a>
+</div>
+<div class="doc_text">
+  <p>
+  Break all of the critical edges in the CFG by inserting a dummy basic block.
+  It may be "required" by passes that cannot deal with critical edges. This
+  transformation obviously invalidates the CFG, but can update forward dominator
+  (set, immediate dominators, tree, and frontier) information.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="codegenprepare">-codegenprepare: Prepare a function for code generation</a>
+</div>
+<div class="doc_text">
+  This pass munges the code in the input function to better prepare it for
+  SelectionDAG-based code generation. This works around limitations in it's
+  basic-block-at-a-time approach. It should eventually be removed.
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="constmerge">-constmerge: Merge Duplicate Global Constants</a>
+</div>
+<div class="doc_text">
+  <p>
+  Merges duplicate global constants together into a single constant that is
+  shared.  This is useful because some passes (ie TraceValues) insert a lot of
+  string constants into the program, regardless of whether or not an existing
+  string is available.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="constprop">-constprop: Simple constant propagation</a>
+</div>
+<div class="doc_text">
+  <p>This file implements constant propagation and merging. It looks for
+  instructions involving only constant operands and replaces them with a
+  constant value instead of an instruction. For example:</p>
+  <blockquote><pre>add i32 1, 2</pre></blockquote>
+  <p>becomes</p>
+  <blockquote><pre>i32 3</pre></blockquote>
+  <p>NOTE: this pass has a habit of making definitions be dead.  It is a good 
+  idea to to run a <a href="#die">DIE</a> (Dead Instruction Elimination) pass 
+  sometime after running this pass.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="dce">-dce: Dead Code Elimination</a>
+</div>
+<div class="doc_text">
+  <p>
+  Dead code elimination is similar to <a href="#die">dead instruction
+  elimination</a>, but it rechecks instructions that were used by removed
+  instructions to see if they are newly dead.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="deadargelim">-deadargelim: Dead Argument Elimination</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass deletes dead arguments from internal functions.  Dead argument
+  elimination removes arguments which are directly dead, as well as arguments
+  only passed into function calls as dead arguments of other functions.  This
+  pass also deletes dead arguments in a similar way.
+  </p>
+  
+  <p>
+  This pass is often useful as a cleanup pass to run after aggressive
+  interprocedural passes, which add possibly-dead arguments.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="deadtypeelim">-deadtypeelim: Dead Type Elimination</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass is used to cleanup the output of GCC.  It eliminate names for types
+  that are unused in the entire translation unit, using the <a
+  href="#findusedtypes">find used types</a> pass.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="die">-die: Dead Instruction Elimination</a>
+</div>
+<div class="doc_text">
+  <p>
+  Dead instruction elimination performs a single pass over the function,
+  removing instructions that are obviously dead.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="dse">-dse: Dead Store Elimination</a>
+</div>
+<div class="doc_text">
+  <p>
+  A trivial dead store elimination that only considers basic-block local
+  redundant stores.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="functionattrs">-functionattrs: Deduce function attributes</a>
+</div>
+<div class="doc_text">
+  <p>A simple interprocedural pass which walks the call-graph, looking for 
+  functions which do not access or only read non-local memory, and marking them 
+  readnone/readonly.  In addition, it marks function arguments (of pointer type) 
+  'nocapture' if a call to the function does not create any copies of the pointer 
+  value that outlive the call. This more or less means that the pointer is only
+  dereferenced, and not returned from the function or stored in a global.
+  This pass is implemented as a bottom-up traversal of the call-graph.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="globaldce">-globaldce: Dead Global Elimination</a>
+</div>
+<div class="doc_text">
+  <p>
+  This transform is designed to eliminate unreachable internal globals from the
+  program.  It uses an aggressive algorithm, searching out globals that are
+  known to be alive.  After it finds all of the globals which are needed, it
+  deletes whatever is left over.  This allows it to delete recursive chunks of
+  the program which are unreachable.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="globalopt">-globalopt: Global Variable Optimizer</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass transforms simple global variables that never have their address
+  taken.  If obviously true, it marks read/write globals as constant, deletes
+  variables only stored to, etc.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="gvn">-gvn: Global Value Numbering</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass performs global value numbering to eliminate fully and partially
+  redundant instructions.  It also performs redundant load elimination.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="indvars">-indvars: Canonicalize Induction Variables</a>
+</div>
+<div class="doc_text">
+  <p>
+  This transformation analyzes and transforms the induction variables (and
+  computations derived from them) into simpler forms suitable for subsequent
+  analysis and transformation.
+  </p>
+  
+  <p>
+  This transformation makes the following changes to each loop with an
+  identifiable induction variable:
+  </p>
+  
+  <ol>
+    <li>All loops are transformed to have a <em>single</em> canonical
+        induction variable which starts at zero and steps by one.</li>
+    <li>The canonical induction variable is guaranteed to be the first PHI node
+        in the loop header block.</li>
+    <li>Any pointer arithmetic recurrences are raised to use array
+        subscripts.</li>
+  </ol>
+  
+  <p>
+  If the trip count of a loop is computable, this pass also makes the following
+  changes:
+  </p>
+  
+  <ol>
+    <li>The exit condition for the loop is canonicalized to compare the
+        induction value against the exit value.  This turns loops like:
+        <blockquote><pre>for (i = 7; i*i < 1000; ++i)</pre></blockquote>
+        into
+        <blockquote><pre>for (i = 0; i != 25; ++i)</pre></blockquote></li>
+    <li>Any use outside of the loop of an expression derived from the indvar
+        is changed to compute the derived value outside of the loop, eliminating
+        the dependence on the exit value of the induction variable.  If the only
+        purpose of the loop is to compute the exit value of some derived
+        expression, this transformation will make the loop dead.</li>
+  </ol>
+  
+  <p>
+  This transformation should be followed by strength reduction after all of the
+  desired loop transformations have been performed.  Additionally, on targets
+  where it is profitable, the loop could be transformed to count down to zero
+  (the "do loop" optimization).
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="inline">-inline: Function Integration/Inlining</a>
+</div>
+<div class="doc_text">
+  <p>
+  Bottom-up inlining of functions into callees.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="insert-edge-profiling">-insert-edge-profiling: Insert instrumentation for edge profiling</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass instruments the specified program with counters for edge profiling.
+  Edge profiling can give a reasonable approximation of the hot paths through a
+  program, and is used for a wide variety of program transformations.
+  </p>
+  
+  <p>
+  Note that this implementation is very naïve.  It inserts a counter for
+  <em>every</em> edge in the program, instead of using control flow information
+  to prune the number of counters inserted.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="insert-optimal-edge-profiling">-insert-optimal-edge-profiling: Insert optimal instrumentation for edge profiling</a>
+</div>
+<div class="doc_text">
+  <p>This pass instruments the specified program with counters for edge profiling.
+  Edge profiling can give a reasonable approximation of the hot paths through a
+  program, and is used for a wide variety of program transformations.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="instcombine">-instcombine: Combine redundant instructions</a>
+</div>
+<div class="doc_text">
+  <p>
+  Combine instructions to form fewer, simple
+  instructions.  This pass does not modify the CFG This pass is where algebraic
+  simplification happens.
+  </p>
+  
+  <p>
+  This pass combines things like:
+  </p>
+  
+<blockquote><pre
+>%Y = add i32 %X, 1
+%Z = add i32 %Y, 1</pre></blockquote>
+  
+  <p>
+  into:
+  </p>
+
+<blockquote><pre
+>%Z = add i32 %X, 2</pre></blockquote>
+  
+  <p>
+  This is a simple worklist driven algorithm.
+  </p>
+  
+  <p>
+  This pass guarantees that the following canonicalizations are performed on
+  the program:
+  </p>
+
+  <ul>
+    <li>If a binary operator has a constant operand, it is moved to the right-
+        hand side.</li>
+    <li>Bitwise operators with constant operands are always grouped so that
+        shifts are performed first, then <code>or</code>s, then
+        <code>and</code>s, then <code>xor</code>s.</li>
+    <li>Compare instructions are converted from <code>&lt;</code>,
+        <code>&gt;</code>, <code>≤</code>, or <code>≥</code> to
+        <code>=</code> or <code>≠</code> if possible.</li>
+    <li>All <code>cmp</code> instructions on boolean values are replaced with
+        logical operations.</li>
+    <li><code>add <var>X</var>, <var>X</var></code> is represented as
+        <code>mul <var>X</var>, 2</code> ⇒ <code>shl <var>X</var>, 1</code></li>
+    <li>Multiplies with a constant power-of-two argument are transformed into
+        shifts.</li>
+    <li>… etc.</li>
+  </ul>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="internalize">-internalize: Internalize Global Symbols</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass loops over all of the functions in the input module, looking for a
+  main function.  If a main function is found, all other functions and all
+  global variables with initializers are marked as internal.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="ipconstprop">-ipconstprop: Interprocedural constant propagation</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass implements an <em>extremely</em> simple interprocedural constant
+  propagation pass.  It could certainly be improved in many different ways,
+  like using a worklist.  This pass makes arguments dead, but does not remove
+  them.  The existing dead argument elimination pass should be run after this
+  to clean up the mess.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="ipsccp">-ipsccp: Interprocedural Sparse Conditional Constant Propagation</a>
+</div>
+<div class="doc_text">
+  <p>
+  An interprocedural variant of <a href="#sccp">Sparse Conditional Constant 
+  Propagation</a>.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="jump-threading">-jump-threading: Thread control through conditional blocks</a>
+</div>
+<div class="doc_text">
+  <p>
+  Jump threading tries to find distinct threads of control flow running through
+  a basic block. This pass looks at blocks that have multiple predecessors and
+  multiple successors.  If one or more of the predecessors of the block can be
+  proven to always cause a jump to one of the successors, we forward the edge
+  from the predecessor to the successor by duplicating the contents of this
+  block.
+  </p>
+  <p>
+  An example of when this can occur is code like this:
+  </p>
+
+  <pre
+>if () { ...
+  X = 4;
+}
+if (X &lt; 3) {</pre>
+
+  <p>
+  In this case, the unconditional branch at the end of the first if can be
+  revectored to the false side of the second if.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="lcssa">-lcssa: Loop-Closed SSA Form Pass</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass transforms loops by placing phi nodes at the end of the loops for
+  all values that are live across the loop boundary.  For example, it turns
+  the left into the right code:
+  </p>
+  
+  <pre
+>for (...)                for (...)
+  if (c)                   if (c)
+    X1 = ...                 X1 = ...
+  else                     else
+    X2 = ...                 X2 = ...
+  X3 = phi(X1, X2)         X3 = phi(X1, X2)
+... = X3 + 4              X4 = phi(X3)
+                          ... = X4 + 4</pre>
+  
+  <p>
+  This is still valid LLVM; the extra phi nodes are purely redundant, and will
+  be trivially eliminated by <code>InstCombine</code>.  The major benefit of
+  this transformation is that it makes many other loop optimizations, such as 
+  LoopUnswitching, simpler.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="licm">-licm: Loop Invariant Code Motion</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass performs loop invariant code motion, attempting to remove as much
+  code from the body of a loop as possible.  It does this by either hoisting
+  code into the preheader block, or by sinking code to the exit blocks if it is
+  safe.  This pass also promotes must-aliased memory locations in the loop to
+  live in registers, thus hoisting and sinking "invariant" loads and stores.
+  </p>
+  
+  <p>
+  This pass uses alias analysis for two purposes:
+  </p>
+  
+  <ul>
+    <li>Moving loop invariant loads and calls out of loops.  If we can determine
+        that a load or call inside of a loop never aliases anything stored to,
+        we can hoist it or sink it like any other instruction.</li>
+    <li>Scalar Promotion of Memory - If there is a store instruction inside of
+        the loop, we try to move the store to happen AFTER the loop instead of
+        inside of the loop.  This can only happen if a few conditions are true:
+        <ul>
+          <li>The pointer stored through is loop invariant.</li>
+          <li>There are no stores or loads in the loop which <em>may</em> alias
+              the pointer.  There are no calls in the loop which mod/ref the
+              pointer.</li>
+        </ul>
+        If these conditions are true, we can promote the loads and stores in the
+        loop of the pointer to use a temporary alloca'd variable.  We then use
+        the mem2reg functionality to construct the appropriate SSA form for the
+        variable.</li>
+  </ul>
+</div>
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="loop-deletion">-loop-deletion: Dead Loop Deletion Pass</a>
+</div>
+<div class="doc_text">
+  <p>
+  This file implements the Dead Loop Deletion Pass.  This pass is responsible
+  for eliminating loops with non-infinite computable trip counts that have no
+  side effects or volatile instructions, and do not contribute to the
+  computation of the function's return value.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="loop-extract">-loop-extract: Extract loops into new functions</a>
+</div>
+<div class="doc_text">
+  <p>
+  A pass wrapper around the <code>ExtractLoop()</code> scalar transformation to 
+  extract each top-level loop into its own new function. If the loop is the
+  <em>only</em> loop in a given function, it is not touched. This is a pass most
+  useful for debugging via bugpoint.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="loop-extract-single">-loop-extract-single: Extract at most one loop into a new function</a>
+</div>
+<div class="doc_text">
+  <p>
+  Similar to <a href="#loop-extract">Extract loops into new functions</a>,
+  this pass extracts one natural loop from the program into a function if it
+  can. This is used by bugpoint.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="loop-index-split">-loop-index-split: Index Split Loops</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass divides loop's iteration range by spliting loop such that each 
+  individual loop is executed efficiently.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="loop-reduce">-loop-reduce: Loop Strength Reduction</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass performs a strength reduction on array references inside loops that
+  have as one or more of their components the loop induction variable.  This is
+  accomplished by creating a new value to hold the initial value of the array
+  access for the first iteration, and then creating a new GEP instruction in
+  the loop to increment the value by the appropriate amount.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="loop-rotate">-loop-rotate: Rotate Loops</a>
+</div>
+<div class="doc_text">
+  <p>A simple loop rotation transformation.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="loop-unroll">-loop-unroll: Unroll loops</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass implements a simple loop unroller.  It works best when loops have
+  been canonicalized by the <a href="#indvars"><tt>-indvars</tt></a> pass,
+  allowing it to determine the trip counts of loops easily.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="loop-unswitch">-loop-unswitch: Unswitch loops</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass transforms loops that contain branches on loop-invariant conditions
+  to have multiple loops.  For example, it turns the left into the right code:
+  </p>
+  
+  <pre
+>for (...)                  if (lic)
+  A                          for (...)
+  if (lic)                     A; B; C
+    B                      else
+  C                          for (...)
+                               A; C</pre>
+  
+  <p>
+  This can increase the size of the code exponentially (doubling it every time
+  a loop is unswitched) so we only unswitch if the resultant code will be
+  smaller than a threshold.
+  </p>
+  
+  <p>
+  This pass expects LICM to be run before it to hoist invariant conditions out
+  of the loop, to make the unswitching opportunity obvious.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="loop-simplify">-loop-simplify: Canonicalize natural loops</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass performs several transformations to transform natural loops into a
+  simpler form, which makes subsequent analyses and transformations simpler and
+  more effective.
+  </p>
+  
+  <p>
+  Loop pre-header insertion guarantees that there is a single, non-critical
+  entry edge from outside of the loop to the loop header.  This simplifies a
+  number of analyses and transformations, such as LICM.
+  </p>
+  
+  <p>
+  Loop exit-block insertion guarantees that all exit blocks from the loop
+  (blocks which are outside of the loop that have predecessors inside of the
+  loop) only have predecessors from inside of the loop (and are thus dominated
+  by the loop header).  This simplifies transformations such as store-sinking
+  that are built into LICM.
+  </p>
+  
+  <p>
+  This pass also guarantees that loops will have exactly one backedge.
+  </p>
+  
+  <p>
+  Note that the simplifycfg pass will clean up blocks which are split out but
+  end up being unnecessary, so usage of this pass should not pessimize
+  generated code.
+  </p>
+  
+  <p>
+  This pass obviously modifies the CFG, but updates loop information and
+  dominator information.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="lowerallocs">-lowerallocs: Lower allocations from instructions to calls</a>
+</div>
+<div class="doc_text">
+  <p>
+  Turn <tt>malloc</tt> and <tt>free</tt> instructions into <tt>@malloc</tt> and
+  <tt>@free</tt> calls.
+  </p>
+
+  <p>
+  This is a target-dependent tranformation because it depends on the size of
+  data types and alignment constraints.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="loweratomic">-loweratomic: Lower atomic intrinsics</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass lowers atomic intrinsics to non-atomic form for use in a known
+  non-preemptible environment.
+  </p>
+
+  <p>
+  The pass does not verify that the environment is non-preemptible (in
+  general this would require knowledge of the entire call graph of the
+  program including any libraries which may not be available in bitcode form);
+  it simply lowers every atomic intrinsic.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="lowerinvoke">-lowerinvoke: Lower invoke and unwind, for unwindless code generators</a>
+</div>
+<div class="doc_text">
+  <p>
+  This transformation is designed for use by code generators which do not yet
+  support stack unwinding.  This pass supports two models of exception handling
+  lowering, the 'cheap' support and the 'expensive' support.
+  </p>
+  
+  <p>
+  'Cheap' exception handling support gives the program the ability to execute
+  any program which does not "throw an exception", by turning 'invoke'
+  instructions into calls and by turning 'unwind' instructions into calls to
+  abort().  If the program does dynamically use the unwind instruction, the
+  program will print a message then abort.
+  </p>
+  
+  <p>
+  'Expensive' exception handling support gives the full exception handling
+  support to the program at the cost of making the 'invoke' instruction
+  really expensive.  It basically inserts setjmp/longjmp calls to emulate the
+  exception handling as necessary.
+  </p>
+  
+  <p>
+  Because the 'expensive' support slows down programs a lot, and EH is only
+  used for a subset of the programs, it must be specifically enabled by the
+  <tt>-enable-correct-eh-support</tt> option.
+  </p>
+  
+  <p>
+  Note that after this pass runs the CFG is not entirely accurate (exceptional
+  control flow edges are not correct anymore) so only very simple things should
+  be done after the lowerinvoke pass has run (like generation of native code).
+  This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't
+  support the invoke instruction yet" lowering pass.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="lowersetjmp">-lowersetjmp: Lower Set Jump</a>
+</div>
+<div class="doc_text">
+  <p>
+   Lowers <tt>setjmp</tt> and <tt>longjmp</tt> to use the LLVM invoke and unwind
+   instructions as necessary.
+  </p>
+  
+  <p>
+   Lowering of <tt>longjmp</tt> is fairly trivial. We replace the call with a
+   call to the LLVM library function <tt>__llvm_sjljeh_throw_longjmp()</tt>.
+   This unwinds the stack for us calling all of the destructors for
+   objects allocated on the stack.
+  </p>
+  
+  <p>
+   At a <tt>setjmp</tt> call, the basic block is split and the <tt>setjmp</tt>
+   removed. The calls in a function that have a <tt>setjmp</tt> are converted to
+   invoke where the except part checks to see if it's a <tt>longjmp</tt>
+   exception and, if so, if it's handled in the function. If it is, then it gets
+   the value returned by the <tt>longjmp</tt> and goes to where the basic block
+   was split. <tt>invoke</tt> instructions are handled in a similar fashion with
+   the original except block being executed if it isn't a <tt>longjmp</tt>
+   except that is handled by that function.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="lowerswitch">-lowerswitch: Lower SwitchInst's to branches</a>
+</div>
+<div class="doc_text">
+  <p>
+  Rewrites <tt>switch</tt> instructions with a sequence of branches, which
+  allows targets to get away with not implementing the switch instruction until
+  it is convenient.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="mem2reg">-mem2reg: Promote Memory to Register</a>
+</div>
+<div class="doc_text">
+  <p>
+  This file promotes memory references to be register references.  It promotes
+  <tt>alloca</tt> instructions which only have <tt>load</tt>s and
+  <tt>store</tt>s as uses.  An <tt>alloca</tt> is transformed by using dominator
+  frontiers to place <tt>phi</tt> nodes, then traversing the function in
+  depth-first order to rewrite <tt>load</tt>s and <tt>store</tt>s as
+  appropriate. This is just the standard SSA construction algorithm to construct
+  "pruned" SSA form.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="memcpyopt">-memcpyopt: Optimize use of memcpy and friend</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass performs various transformations related to eliminating memcpy
+  calls, or transforming sets of stores into memset's.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="mergefunc">-mergefunc: Merge Functions</a>
+</div>
+<div class="doc_text">
+  <p>This pass looks for equivalent functions that are mergable and folds them.
+ 
+  A hash is computed from the function, based on its type and number of
+  basic blocks.
+ 
+  Once all hashes are computed, we perform an expensive equality comparison
+  on each function pair. This takes n^2/2 comparisons per bucket, so it's
+  important that the hash function be high quality. The equality comparison
+  iterates through each instruction in each basic block.
+ 
+  When a match is found the functions are folded. If both functions are
+  overridable, we move the functionality into a new internal function and
+  leave two overridable thunks to it.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="mergereturn">-mergereturn: Unify function exit nodes</a>
+</div>
+<div class="doc_text">
+  <p>
+  Ensure that functions have at most one <tt>ret</tt> instruction in them.
+  Additionally, it keeps track of which node is the new exit node of the CFG.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="partial-inliner">-partial-inliner: Partial Inliner</a>
+</div>
+<div class="doc_text">
+  <p>This pass performs partial inlining, typically by inlining an if 
+  statement that surrounds the body of the function.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="partialspecialization">-partialspecialization: Partial Specialization</a>
+</div>
+<div class="doc_text">
+  <p>This pass finds function arguments that are often a common constant and 
+  specializes a version of the called function for that constant.
+ 
+  This pass simply does the cloning for functions it specializes.  It depends
+  on <a href="#ipsccp">IPSCCP</a> and <a href="#deadargelim">DAE</a> to clean up the results.
+ 
+  The initial heuristic favors constant arguments that are used in control 
+  flow.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="prune-eh">-prune-eh: Remove unused exception handling info</a>
+</div>
+<div class="doc_text">
+  <p>
+  This file implements a simple interprocedural pass which walks the call-graph,
+  turning <tt>invoke</tt> instructions into <tt>call</tt> instructions if and
+  only if the callee cannot throw an exception. It implements this as a
+  bottom-up traversal of the call-graph.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="reassociate">-reassociate: Reassociate expressions</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass reassociates commutative expressions in an order that is designed
+  to promote better constant propagation, GCSE, LICM, PRE, etc.
+  </p>
+  
+  <p>
+  For example: 4 + (<var>x</var> + 5) ⇒ <var>x</var> + (4 + 5)
+  </p>
+  
+  <p>
+  In the implementation of this algorithm, constants are assigned rank = 0,
+  function arguments are rank = 1, and other values are assigned ranks
+  corresponding to the reverse post order traversal of current function
+  (starting at 2), which effectively gives values in deep loops higher rank
+  than values not in loops.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="reg2mem">-reg2mem: Demote all values to stack slots</a>
+</div>
+<div class="doc_text">
+  <p>
+  This file demotes all registers to memory references.  It is intented to be
+  the inverse of <a href="#mem2reg"><tt>-mem2reg</tt></a>.  By converting to
+  <tt>load</tt> instructions, the only values live across basic blocks are
+  <tt>alloca</tt> instructions and <tt>load</tt> instructions before
+  <tt>phi</tt> nodes. It is intended that this should make CFG hacking much 
+  easier. To make later hacking easier, the entry block is split into two, such
+  that all introduced <tt>alloca</tt> instructions (and nothing else) are in the
+  entry block.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="scalarrepl">-scalarrepl: Scalar Replacement of Aggregates</a>
+</div>
+<div class="doc_text">
+  <p>
+  The well-known scalar replacement of aggregates transformation.  This
+  transform breaks up <tt>alloca</tt> instructions of aggregate type (structure
+  or array) into individual <tt>alloca</tt> instructions for each member if
+  possible.  Then, if possible, it transforms the individual <tt>alloca</tt>
+  instructions into nice clean scalar SSA form.
+  </p>
+  
+  <p>
+  This combines a simple scalar replacement of aggregates algorithm with the <a
+  href="#mem2reg"><tt>mem2reg</tt></a> algorithm because often interact, 
+  especially for C++ programs.  As such, iterating between <tt>scalarrepl</tt>, 
+  then <a href="#mem2reg"><tt>mem2reg</tt></a> until we run out of things to 
+  promote works well.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="sccp">-sccp: Sparse Conditional Constant Propagation</a>
+</div>
+<div class="doc_text">
+  <p>
+  Sparse conditional constant propagation and merging, which can be summarized
+  as:
+  </p>
+  
+  <ol>
+    <li>Assumes values are constant unless proven otherwise</li>
+    <li>Assumes BasicBlocks are dead unless proven otherwise</li>
+    <li>Proves values to be constant, and replaces them with constants</li>
+    <li>Proves conditional branches to be unconditional</li>
+  </ol>
+  
+  <p>
+  Note that this pass has a habit of making definitions be dead.  It is a good
+  idea to to run a DCE pass sometime after running this pass.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="sink">-sink: Code Sinking</a>
+</div>
+<div class="doc_text">
+  <p>This pass moves instructions into successor blocks, when possible, so that
+ they aren't executed on paths where their results aren't needed.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="simplify-libcalls">-simplify-libcalls: Simplify well-known library calls</a>
+</div>
+<div class="doc_text">
+  <p>
+  Applies a variety of small optimizations for calls to specific well-known 
+  function calls (e.g. runtime library functions). For example, a call
+   <tt>exit(3)</tt> that occurs within the <tt>main()</tt> function can be 
+   transformed into simply <tt>return 3</tt>.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="simplify-libcalls-halfpowr">-simplify-libcalls-halfpowr: Simplify half_powr library calls</a>
+</div>
+<div class="doc_text">
+  <p>Simple pass that applies an experimental transformation on calls 
+  to specific functions.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="simplifycfg">-simplifycfg: Simplify the CFG</a>
+</div>
+<div class="doc_text">
+  <p>
+  Performs dead code elimination and basic block merging. Specifically:
+  </p>
+  
+  <ol>
+    <li>Removes basic blocks with no predecessors.</li>
+    <li>Merges a basic block into its predecessor if there is only one and the
+        predecessor only has one successor.</li>
+    <li>Eliminates PHI nodes for basic blocks with a single predecessor.</li>
+    <li>Eliminates a basic block that only contains an unconditional
+        branch.</li>
+  </ol>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="split-geps">-split-geps: Split complex GEPs into simple GEPs</a>
+</div>
+<div class="doc_text">
+  <p>This function breaks GEPs with more than 2 non-zero operands into smaller
+  GEPs each with no more than 2 non-zero operands. This exposes redundancy
+  between GEPs with common initial operand sequences.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="ssi">-ssi: Static Single Information Construction</a>
+</div>
+<div class="doc_text">
+  <p>This pass converts a list of variables to the Static Single Information
+  form. 
+ 
+  We are building an on-demand representation, that is, we do not convert
+  every single variable in the target function to SSI form. Rather, we receive
+  a list of target variables that must be converted. We also do not
+  completely convert a target variable to the SSI format. Instead, we only
+  change the variable in the points where new information can be attached
+  to its live range, that is, at branch points.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="ssi-everything">-ssi-everything: Static Single Information Construction (everything, intended for debugging)</a>
+</div>
+<div class="doc_text">
+  <p>A pass that runs <a href="#ssi">SSI</a> on every non-void variable, intended for debugging.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="strip">-strip: Strip all symbols from a module</a>
+</div>
+<div class="doc_text">
+  <p>
+  performs code stripping. this transformation can delete:
+  </p>
+  
+  <ol>
+    <li>names for virtual registers</li>
+    <li>symbols for internal globals and functions</li>
+    <li>debug information</li>
+  </ol>
+  
+  <p>
+  note that this transformation makes code much less readable, so it should
+  only be used in situations where the <tt>strip</tt> utility would be used,
+  such as reducing code size or making it harder to reverse engineer code.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="strip-dead-prototypes">-strip-dead-prototypes: Remove unused function declarations</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass loops over all of the functions in the input module, looking for
+  dead declarations and removes them. Dead declarations are declarations of
+  functions for which no implementation is available (i.e., declarations for
+  unused library functions).
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="strip-debug-declare">-strip-debug-declare: Strip all llvm.dbg.declare intrinsics</a>
+</div>
+<div class="doc_text">
+  <p>This pass implements code stripping. Specifically, it can delete:</p>
+  <ul>
+  <li>names for virtual registers</li>
+  <li>symbols for internal globals and functions</li>
+  <li>debug information</li>
+  </ul>
+  <p>
+  Note that this transformation makes code much less readable, so it should
+  only be used in situations where the 'strip' utility would be used, such as
+  reducing code size or making it harder to reverse engineer code.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="strip-nondebug">-strip-nondebug: Strip all symbols, except dbg symbols, from a module</a>
+</div>
+<div class="doc_text">
+  <p>This pass implements code stripping. Specifically, it can delete:</p>
+  <ul>
+  <li>names for virtual registers</li>
+  <li>symbols for internal globals and functions</li>
+  <li>debug information</li>
+  </ul>
+  <p>
+  Note that this transformation makes code much less readable, so it should
+  only be used in situations where the 'strip' utility would be used, such as
+  reducing code size or making it harder to reverse engineer code.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="sretpromotion">-sretpromotion: Promote sret arguments</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass finds functions that return a struct (using a pointer to the struct
+  as the first argument of the function, marked with the '<tt>sret</tt>' attribute) and
+  replaces them with a new function that simply returns each of the elements of
+  that struct (using multiple return values).
+  </p>
+
+  <p>
+  This pass works under a number of conditions:
+  </p>
+
+  <ul>
+  <li>The returned struct must not contain other structs</li>
+  <li>The returned struct must only be used to load values from</li>
+  <li>The placeholder struct passed in is the result of an <tt>alloca</tt></li>
+  </ul>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="tailcallelim">-tailcallelim: Tail Call Elimination</a>
+</div>
+<div class="doc_text">
+  <p>
+  This file transforms calls of the current function (self recursion) followed
+  by a return instruction with a branch to the entry of the function, creating
+  a loop.  This pass also implements the following extensions to the basic
+  algorithm:
+  </p>
+  
+  <ul>
+  <li>Trivial instructions between the call and return do not prevent the
+      transformation from taking place, though currently the analysis cannot
+      support moving any really useful instructions (only dead ones).
+  <li>This pass transforms functions that are prevented from being tail
+      recursive by an associative expression to use an accumulator variable,
+      thus compiling the typical naive factorial or <tt>fib</tt> implementation
+      into efficient code.
+  <li>TRE is performed if the function returns void, if the return
+      returns the result returned by the call, or if the function returns a
+      run-time constant on all exits from the function.  It is possible, though
+      unlikely, that the return returns something else (like constant 0), and
+      can still be TRE'd.  It can be TRE'd if <em>all other</em> return 
+      instructions in the function return the exact same value.
+  <li>If it can prove that callees do not access theier caller stack frame,
+      they are marked as eligible for tail call elimination (by the code
+      generator).
+  </ul>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="tailduplicate">-tailduplicate: Tail Duplication</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass performs a limited form of tail duplication, intended to simplify
+  CFGs by removing some unconditional branches.  This pass is necessary to
+  straighten out loops created by the C front-end, but also is capable of
+  making other code nicer.  After this pass is run, the CFG simplify pass
+  should be run to clean up the mess.
+  </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section"> <a name="transform">Utility Passes</a></div>
+<div class="doc_text">
+  <p>This section describes the LLVM Utility Passes.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="deadarghaX0r">-deadarghaX0r: Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)</a>
+</div>
+<div class="doc_text">
+  <p>
+  Same as dead argument elimination, but deletes arguments to functions which
+  are external.  This is only for use by <a
+  href="Bugpoint.html">bugpoint</a>.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="extract-blocks">-extract-blocks: Extract Basic Blocks From Module (for bugpoint use)</a>
+</div>
+<div class="doc_text">
+  <p>
+  This pass is used by bugpoint to extract all blocks from the module into their
+  own functions.</p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="instnamer">-instnamer: Assign names to anonymous instructions</a>
+</div>
+<div class="doc_text">
+  <p>This is a little utility pass that gives instructions names, this is mostly
+ useful when diffing the effect of an optimization because deleting an
+ unnamed instruction can change all other instruction numbering, making the
+ diff very noisy.  
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="preverify">-preverify: Preliminary module verification</a>
+</div>
+<div class="doc_text">
+  <p>
+  Ensures that the module is in the form required by the <a
+  href="#verifier">Module Verifier</a> pass.
+  </p>
+  
+  <p>
+  Running the verifier runs this pass automatically, so there should be no need
+  to use it directly.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="verify">-verify: Module Verifier</a>
+</div>
+<div class="doc_text">
+  <p>
+  Verifies an LLVM IR code. This is useful to run after an optimization which is
+  undergoing testing. Note that <tt>llvm-as</tt> verifies its input before
+  emitting bitcode, and also that malformed bitcode is likely to make LLVM
+  crash. All language front-ends are therefore encouraged to verify their output
+  before performing optimizing transformations.
+  </p>
+
+  <ul>
+    <li>Both of a binary operator's parameters are of the same type.</li>
+    <li>Verify that the indices of mem access instructions match other
+        operands.</li>
+    <li>Verify that arithmetic and other things are only performed on
+        first-class types.  Verify that shifts and logicals only happen on
+        integrals f.e.</li>
+    <li>All of the constants in a switch statement are of the correct type.</li>
+    <li>The code is in valid SSA form.</li>
+    <li>It is illegal to put a label into any other type (like a structure) or 
+        to return one.</li>
+    <li>Only phi nodes can be self referential: <tt>%x = add i32 %x, %x</tt> is
+        invalid.</li>
+    <li>PHI nodes must have an entry for each predecessor, with no extras.</li>
+    <li>PHI nodes must be the first thing in a basic block, all grouped
+        together.</li>
+    <li>PHI nodes must have at least one entry.</li>
+    <li>All basic blocks should only end with terminator insts, not contain
+        them.</li>
+    <li>The entry node to a function must not have predecessors.</li>
+    <li>All Instructions must be embedded into a basic block.</li>
+    <li>Functions cannot take a void-typed parameter.</li>
+    <li>Verify that a function's argument list agrees with its declared
+        type.</li>
+    <li>It is illegal to specify a name for a void value.</li>
+    <li>It is illegal to have a internal global value with no initializer.</li>
+    <li>It is illegal to have a ret instruction that returns a value that does
+        not agree with the function return value type.</li>
+    <li>Function call argument types match the function prototype.</li>
+    <li>All other things that are tested by asserts spread about the code.</li>
+  </ul>
+  
+  <p>
+  Note that this does not provide full security verification (like Java), but
+  instead just tries to ensure that code is well-formed.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="view-cfg">-view-cfg: View CFG of function</a>
+</div>
+<div class="doc_text">
+  <p>
+  Displays the control flow graph using the GraphViz tool.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="view-cfg-only">-view-cfg-only: View CFG of function (with no function bodies)</a>
+</div>
+<div class="doc_text">
+  <p>
+  Displays the control flow graph using the GraphViz tool, but omitting function
+  bodies.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="view-dom">-view-dom: View dominator tree of function</a>
+</div>
+<div class="doc_text">
+  <p>
+  Displays the dominator tree using the GraphViz tool.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="view-dom-only">-view-dom-only: View dominator tree of function (with no function
+  bodies)
+  </a>
+</div>
+<div class="doc_text">
+  <p>
+  Displays the dominator tree using the GraphViz tool, but omitting function
+  bodies.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="view-postdom">-view-postdom: View post dominator tree of function</a>
+</div>
+<div class="doc_text">
+  <p>
+  Displays the post dominator tree using the GraphViz tool.
+  </p>
+</div>
+
+<!-------------------------------------------------------------------------- -->
+<div class="doc_subsection">
+  <a name="view-postdom-only">-view-postdom-only: View post dominator tree of function (with no
+  function bodies)
+  </a>
+</div>
+<div class="doc_text">
+  <p>
+  Displays the post dominator tree using the GraphViz tool, but omitting
+  function bodies.
+  </p>
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/ProgrammersManual.html b/final/docs/ProgrammersManual.html
new file mode 100644
index 00000000000..bc09ca957d0
--- /dev/null
+++ b/final/docs/ProgrammersManual.html
@@ -0,0 +1,3965 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-type" content="text/html;charset=UTF-8">
+  <title>LLVM Programmer's Manual</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+  LLVM Programmer's Manual
+</div>
+
+<ol>
+  <li><a href="#introduction">Introduction</a></li>
+  <li><a href="#general">General Information</a>
+    <ul>
+      <li><a href="#stl">The C++ Standard Template Library</a></li>
+<!--
+      <li>The <tt>-time-passes</tt> option</li>
+      <li>How to use the LLVM Makefile system</li>
+      <li>How to write a regression test</li>
+
+--> 
+    </ul>
+  </li>
+  <li><a href="#apis">Important and useful LLVM APIs</a>
+    <ul>
+      <li><a href="#isa">The <tt>isa&lt;&gt;</tt>, <tt>cast&lt;&gt;</tt>
+and <tt>dyn_cast&lt;&gt;</tt> templates</a> </li>
+      <li><a href="#string_apis">Passing strings (the <tt>StringRef</tt>
+and <tt>Twine</tt> classes)</a>
+        <ul>
+          <li><a href="#StringRef">The <tt>StringRef</tt> class</a> </li>
+          <li><a href="#Twine">The <tt>Twine</tt> class</a> </li>
+        </ul>
+      </li>
+      <li><a href="#DEBUG">The <tt>DEBUG()</tt> macro and <tt>-debug</tt>
+option</a>
+        <ul>
+          <li><a href="#DEBUG_TYPE">Fine grained debug info with <tt>DEBUG_TYPE</tt>
+and the <tt>-debug-only</tt> option</a> </li>
+        </ul>
+      </li>
+      <li><a href="#Statistic">The <tt>Statistic</tt> class &amp; <tt>-stats</tt>
+option</a></li>
+<!--
+      <li>The <tt>InstVisitor</tt> template
+      <li>The general graph API
+--> 
+      <li><a href="#ViewGraph">Viewing graphs while debugging code</a></li>
+    </ul>
+  </li>
+  <li><a href="#datastructure">Picking the Right Data Structure for a Task</a>
+    <ul>
+    <li><a href="#ds_sequential">Sequential Containers (std::vector, std::list, etc)</a>
+    <ul>
+      <li><a href="#dss_fixedarrays">Fixed Size Arrays</a></li>
+      <li><a href="#dss_heaparrays">Heap Allocated Arrays</a></li>
+      <li><a href="#dss_smallvector">"llvm/ADT/SmallVector.h"</a></li>
+      <li><a href="#dss_vector">&lt;vector&gt;</a></li>
+      <li><a href="#dss_deque">&lt;deque&gt;</a></li>
+      <li><a href="#dss_list">&lt;list&gt;</a></li>
+      <li><a href="#dss_ilist">llvm/ADT/ilist.h</a></li>
+      <li><a href="#dss_other">Other Sequential Container Options</a></li>
+    </ul></li>
+    <li><a href="#ds_set">Set-Like Containers (std::set, SmallSet, SetVector, etc)</a>
+    <ul>
+      <li><a href="#dss_sortedvectorset">A sorted 'vector'</a></li>
+      <li><a href="#dss_smallset">"llvm/ADT/SmallSet.h"</a></li>
+      <li><a href="#dss_smallptrset">"llvm/ADT/SmallPtrSet.h"</a></li>
+      <li><a href="#dss_denseset">"llvm/ADT/DenseSet.h"</a></li>
+      <li><a href="#dss_FoldingSet">"llvm/ADT/FoldingSet.h"</a></li>
+      <li><a href="#dss_set">&lt;set&gt;</a></li>
+      <li><a href="#dss_setvector">"llvm/ADT/SetVector.h"</a></li>
+      <li><a href="#dss_uniquevector">"llvm/ADT/UniqueVector.h"</a></li>
+      <li><a href="#dss_otherset">Other Set-Like ContainerOptions</a></li>
+    </ul></li>
+    <li><a href="#ds_map">Map-Like Containers (std::map, DenseMap, etc)</a>
+    <ul>
+      <li><a href="#dss_sortedvectormap">A sorted 'vector'</a></li>
+      <li><a href="#dss_stringmap">"llvm/ADT/StringMap.h"</a></li>
+      <li><a href="#dss_indexedmap">"llvm/ADT/IndexedMap.h"</a></li>
+      <li><a href="#dss_densemap">"llvm/ADT/DenseMap.h"</a></li>
+      <li><a href="#dss_valuemap">"llvm/ADT/ValueMap.h"</a></li>
+      <li><a href="#dss_intervalmap">"llvm/ADT/IntervalMap.h"</a></li>
+      <li><a href="#dss_map">&lt;map&gt;</a></li>
+      <li><a href="#dss_othermap">Other Map-Like Container Options</a></li>
+    </ul></li>
+    <li><a href="#ds_string">String-like containers</a>
+    <!--<ul>
+       todo
+    </ul>--></li>
+    <li><a href="#ds_bit">BitVector-like containers</a>
+    <ul>
+      <li><a href="#dss_bitvector">A dense bitvector</a></li>
+      <li><a href="#dss_smallbitvector">A "small" dense bitvector</a></li>
+      <li><a href="#dss_sparsebitvector">A sparse bitvector</a></li>
+    </ul></li>
+  </ul>
+  </li>
+  <li><a href="#common">Helpful Hints for Common Operations</a>
+    <ul>
+      <li><a href="#inspection">Basic Inspection and Traversal Routines</a>
+        <ul>
+          <li><a href="#iterate_function">Iterating over the <tt>BasicBlock</tt>s
+in a <tt>Function</tt></a> </li>
+          <li><a href="#iterate_basicblock">Iterating over the <tt>Instruction</tt>s
+in a <tt>BasicBlock</tt></a> </li>
+          <li><a href="#iterate_institer">Iterating over the <tt>Instruction</tt>s
+in a <tt>Function</tt></a> </li>
+          <li><a href="#iterate_convert">Turning an iterator into a
+class pointer</a> </li>
+          <li><a href="#iterate_complex">Finding call sites: a more
+complex example</a> </li>
+          <li><a href="#calls_and_invokes">Treating calls and invokes
+the same way</a> </li>
+          <li><a href="#iterate_chains">Iterating over def-use &amp;
+use-def chains</a> </li>
+          <li><a href="#iterate_preds">Iterating over predecessors &amp;
+successors of blocks</a></li>
+        </ul>
+      </li>
+      <li><a href="#simplechanges">Making simple changes</a>
+        <ul>
+          <li><a href="#schanges_creating">Creating and inserting new
+		 <tt>Instruction</tt>s</a> </li>
+          <li><a href="#schanges_deleting">Deleting 		 <tt>Instruction</tt>s</a> </li>
+          <li><a href="#schanges_replacing">Replacing an 		 <tt>Instruction</tt>
+with another <tt>Value</tt></a> </li>
+          <li><a href="#schanges_deletingGV">Deleting <tt>GlobalVariable</tt>s</a> </li>  
+        </ul>
+      </li>
+      <li><a href="#create_types">How to Create Types</a></li>
+<!--
+    <li>Working with the Control Flow Graph
+    <ul>
+      <li>Accessing predecessors and successors of a <tt>BasicBlock</tt>
+      <li>
+      <li>
+    </ul>
+--> 
+    </ul>
+  </li>
+
+  <li><a href="#threading">Threads and LLVM</a>
+  <ul>
+    <li><a href="#startmultithreaded">Entering and Exiting Multithreaded Mode
+        </a></li>
+    <li><a href="#shutdown">Ending execution with <tt>llvm_shutdown()</tt></a></li>
+    <li><a href="#managedstatic">Lazy initialization with <tt>ManagedStatic</tt></a></li>
+    <li><a href="#llvmcontext">Achieving Isolation with <tt>LLVMContext</tt></a></li>
+    <li><a href="#jitthreading">Threads and the JIT</a></li>
+  </ul>
+  </li>
+
+  <li><a href="#advanced">Advanced Topics</a>
+  <ul>
+  <li><a href="#TypeResolve">LLVM Type Resolution</a>
+  <ul>
+    <li><a href="#BuildRecType">Basic Recursive Type Construction</a></li>
+    <li><a href="#refineAbstractTypeTo">The <tt>refineAbstractTypeTo</tt> method</a></li>
+    <li><a href="#PATypeHolder">The PATypeHolder Class</a></li>
+    <li><a href="#AbstractTypeUser">The AbstractTypeUser Class</a></li>
+  </ul></li>
+
+  <li><a href="#SymbolTable">The <tt>ValueSymbolTable</tt> and <tt>TypeSymbolTable</tt> classes</a></li>
+  <li><a href="#UserLayout">The <tt>User</tt> and owned <tt>Use</tt> classes' memory layout</a></li>
+  </ul></li>
+
+  <li><a href="#coreclasses">The Core LLVM Class Hierarchy Reference</a>
+    <ul>
+      <li><a href="#Type">The <tt>Type</tt> class</a> </li>
+      <li><a href="#Module">The <tt>Module</tt> class</a></li>
+      <li><a href="#Value">The <tt>Value</tt> class</a>
+      <ul>
+        <li><a href="#User">The <tt>User</tt> class</a>
+        <ul>
+          <li><a href="#Instruction">The <tt>Instruction</tt> class</a></li>
+          <li><a href="#Constant">The <tt>Constant</tt> class</a>
+          <ul>
+            <li><a href="#GlobalValue">The <tt>GlobalValue</tt> class</a>
+            <ul>
+              <li><a href="#Function">The <tt>Function</tt> class</a></li>
+              <li><a href="#GlobalVariable">The <tt>GlobalVariable</tt> class</a></li>
+            </ul>
+            </li>
+          </ul>
+          </li>
+        </ul>
+        </li>
+        <li><a href="#BasicBlock">The <tt>BasicBlock</tt> class</a></li>
+        <li><a href="#Argument">The <tt>Argument</tt> class</a></li>
+      </ul>
+      </li>
+    </ul>
+  </li>
+</ol>
+
+<div class="doc_author">    
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>, 
+                <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a>, 
+                <a href="mailto:ggreif@gmail.com">Gabor Greif</a>, 
+                <a href="mailto:jstanley@cs.uiuc.edu">Joel Stanley</a>,
+                <a href="mailto:rspencer@x10sys.com">Reid Spencer</a> and
+                <a href="mailto:owen@apple.com">Owen Anderson</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="introduction">Introduction </a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document is meant to highlight some of the important classes and
+interfaces available in the LLVM source-base.  This manual is not
+intended to explain what LLVM is, how it works, and what LLVM code looks
+like.  It assumes that you know the basics of LLVM and are interested
+in writing transformations or otherwise analyzing or manipulating the
+code.</p>
+
+<p>This document should get you oriented so that you can find your
+way in the continuously growing source code that makes up the LLVM
+infrastructure. Note that this manual is not intended to serve as a
+replacement for reading the source code, so if you think there should be
+a method in one of these classes to do something, but it's not listed,
+check the source.  Links to the <a href="/doxygen/">doxygen</a> sources
+are provided to make this as easy as possible.</p>
+
+<p>The first section of this document describes general information that is
+useful to know when working in the LLVM infrastructure, and the second describes
+the Core LLVM classes.  In the future this manual will be extended with
+information describing how to use extension libraries, such as dominator
+information, CFG traversal routines, and useful utilities like the <tt><a
+href="/doxygen/InstVisitor_8h-source.html">InstVisitor</a></tt> template.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="general">General Information</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section contains general information that is useful if you are working
+in the LLVM source-base, but that isn't specific to any particular API.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="stl">The C++ Standard Template Library</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM makes heavy use of the C++ Standard Template Library (STL),
+perhaps much more than you are used to, or have seen before.  Because of
+this, you might want to do a little background reading in the
+techniques used and capabilities of the library.  There are many good
+pages that discuss the STL, and several books on the subject that you
+can get, so it will not be discussed in this document.</p>
+
+<p>Here are some useful links:</p>
+
+<ol>
+
+<li><a href="http://www.dinkumware.com/manuals/#Standard C++ Library">Dinkumware
+C++ Library reference</a> - an excellent reference for the STL and other parts
+of the standard C++ library.</li>
+
+<li><a href="http://www.tempest-sw.com/cpp/">C++ In a Nutshell</a> - This is an
+O'Reilly book in the making.  It has a decent Standard Library
+Reference that rivals Dinkumware's, and is unfortunately no longer free since the
+book has been published.</li>
+
+<li><a href="http://www.parashift.com/c++-faq-lite/">C++ Frequently Asked
+Questions</a></li>
+
+<li><a href="http://www.sgi.com/tech/stl/">SGI's STL Programmer's Guide</a> -
+Contains a useful <a
+href="http://www.sgi.com/tech/stl/stl_introduction.html">Introduction to the
+STL</a>.</li>
+
+<li><a href="http://www.research.att.com/%7Ebs/C++.html">Bjarne Stroustrup's C++
+Page</a></li>
+
+<li><a href="http://64.78.49.204/">
+Bruce Eckel's Thinking in C++, 2nd ed. Volume 2 Revision 4.0 (even better, get
+the book).</a></li>
+
+</ol>
+  
+<p>You are also encouraged to take a look at the <a
+href="CodingStandards.html">LLVM Coding Standards</a> guide which focuses on how
+to write maintainable code more than where to put your curly braces.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="stl">Other useful references</a>
+</div>
+
+<div class="doc_text">
+
+<ol>
+<li><a href="http://www.fortran-2000.com/ArnaudRecipes/sharedlib.html">Using
+static and shared libraries across platforms</a></li>
+</ol>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="apis">Important and useful LLVM APIs</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Here we highlight some LLVM APIs that are generally useful and good to
+know about when writing transformations.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="isa">The <tt>isa&lt;&gt;</tt>, <tt>cast&lt;&gt;</tt> and
+  <tt>dyn_cast&lt;&gt;</tt> templates</a>
+</div>
+
+<div class="doc_text">
+
+<p>The LLVM source-base makes extensive use of a custom form of RTTI.
+These templates have many similarities to the C++ <tt>dynamic_cast&lt;&gt;</tt>
+operator, but they don't have some drawbacks (primarily stemming from
+the fact that <tt>dynamic_cast&lt;&gt;</tt> only works on classes that
+have a v-table). Because they are used so often, you must know what they
+do and how they work. All of these templates are defined in the <a
+ href="/doxygen/Casting_8h-source.html"><tt>llvm/Support/Casting.h</tt></a>
+file (note that you very rarely have to include this file directly).</p>
+
+<dl>
+  <dt><tt>isa&lt;&gt;</tt>: </dt>
+
+  <dd><p>The <tt>isa&lt;&gt;</tt> operator works exactly like the Java
+  "<tt>instanceof</tt>" operator.  It returns true or false depending on whether
+  a reference or pointer points to an instance of the specified class.  This can
+  be very useful for constraint checking of various sorts (example below).</p>
+  </dd>
+
+  <dt><tt>cast&lt;&gt;</tt>: </dt>
+
+  <dd><p>The <tt>cast&lt;&gt;</tt> operator is a "checked cast" operation. It
+  converts a pointer or reference from a base class to a derived class, causing
+  an assertion failure if it is not really an instance of the right type.  This
+  should be used in cases where you have some information that makes you believe
+  that something is of the right type.  An example of the <tt>isa&lt;&gt;</tt>
+  and <tt>cast&lt;&gt;</tt> template is:</p>
+
+<div class="doc_code">
+<pre>
+static bool isLoopInvariant(const <a href="#Value">Value</a> *V, const Loop *L) {
+  if (isa&lt;<a href="#Constant">Constant</a>&gt;(V) || isa&lt;<a href="#Argument">Argument</a>&gt;(V) || isa&lt;<a href="#GlobalValue">GlobalValue</a>&gt;(V))
+    return true;
+
+  // <i>Otherwise, it must be an instruction...</i>
+  return !L-&gt;contains(cast&lt;<a href="#Instruction">Instruction</a>&gt;(V)-&gt;getParent());
+}
+</pre>
+</div>
+
+  <p>Note that you should <b>not</b> use an <tt>isa&lt;&gt;</tt> test followed
+  by a <tt>cast&lt;&gt;</tt>, for that use the <tt>dyn_cast&lt;&gt;</tt>
+  operator.</p>
+
+  </dd>
+
+  <dt><tt>dyn_cast&lt;&gt;</tt>:</dt>
+
+  <dd><p>The <tt>dyn_cast&lt;&gt;</tt> operator is a "checking cast" operation.
+  It checks to see if the operand is of the specified type, and if so, returns a
+  pointer to it (this operator does not work with references). If the operand is
+  not of the correct type, a null pointer is returned.  Thus, this works very
+  much like the <tt>dynamic_cast&lt;&gt;</tt> operator in C++, and should be
+  used in the same circumstances.  Typically, the <tt>dyn_cast&lt;&gt;</tt>
+  operator is used in an <tt>if</tt> statement or some other flow control
+  statement like this:</p>
+
+<div class="doc_code">
+<pre>
+if (<a href="#AllocationInst">AllocationInst</a> *AI = dyn_cast&lt;<a href="#AllocationInst">AllocationInst</a>&gt;(Val)) {
+  // <i>...</i>
+}
+</pre>
+</div>
+   
+  <p>This form of the <tt>if</tt> statement effectively combines together a call
+  to <tt>isa&lt;&gt;</tt> and a call to <tt>cast&lt;&gt;</tt> into one
+  statement, which is very convenient.</p>
+
+  <p>Note that the <tt>dyn_cast&lt;&gt;</tt> operator, like C++'s
+  <tt>dynamic_cast&lt;&gt;</tt> or Java's <tt>instanceof</tt> operator, can be
+  abused.  In particular, you should not use big chained <tt>if/then/else</tt>
+  blocks to check for lots of different variants of classes.  If you find
+  yourself wanting to do this, it is much cleaner and more efficient to use the
+  <tt>InstVisitor</tt> class to dispatch over the instruction type directly.</p>
+
+  </dd>
+
+  <dt><tt>cast_or_null&lt;&gt;</tt>: </dt>
+  
+  <dd><p>The <tt>cast_or_null&lt;&gt;</tt> operator works just like the
+  <tt>cast&lt;&gt;</tt> operator, except that it allows for a null pointer as an
+  argument (which it then propagates).  This can sometimes be useful, allowing
+  you to combine several null checks into one.</p></dd>
+
+  <dt><tt>dyn_cast_or_null&lt;&gt;</tt>: </dt>
+
+  <dd><p>The <tt>dyn_cast_or_null&lt;&gt;</tt> operator works just like the
+  <tt>dyn_cast&lt;&gt;</tt> operator, except that it allows for a null pointer
+  as an argument (which it then propagates).  This can sometimes be useful,
+  allowing you to combine several null checks into one.</p></dd>
+
+</dl>
+
+<p>These five templates can be used with any classes, whether they have a
+v-table or not.  To add support for these templates, you simply need to add
+<tt>classof</tt> static methods to the class you are interested casting
+to. Describing this is currently outside the scope of this document, but there
+are lots of examples in the LLVM source base.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="string_apis">Passing strings (the <tt>StringRef</tt>
+and <tt>Twine</tt> classes)</a>
+</div>
+
+<div class="doc_text">
+
+<p>Although LLVM generally does not do much string manipulation, we do have
+several important APIs which take strings.  Two important examples are the
+Value class -- which has names for instructions, functions, etc. -- and the
+StringMap class which is used extensively in LLVM and Clang.</p>
+
+<p>These are generic classes, and they need to be able to accept strings which
+may have embedded null characters.  Therefore, they cannot simply take
+a <tt>const char *</tt>, and taking a <tt>const std::string&amp;</tt> requires
+clients to perform a heap allocation which is usually unnecessary.  Instead,
+many LLVM APIs use a <tt>StringRef</tt> or a <tt>const Twine&amp;</tt> for
+passing strings efficiently.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="StringRef">The <tt>StringRef</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>StringRef</tt> data type represents a reference to a constant string
+(a character array and a length) and supports the common operations available
+on <tt>std:string</tt>, but does not require heap allocation.</p>
+
+<p>It can be implicitly constructed using a C style null-terminated string,
+an <tt>std::string</tt>, or explicitly with a character pointer and length.
+For example, the <tt>StringRef</tt> find function is declared as:</p>
+
+<pre class="doc_code">
+  iterator find(StringRef Key);
+</pre>
+
+<p>and clients can call it using any one of:</p>
+
+<pre class="doc_code">
+  Map.find("foo");                 <i>// Lookup "foo"</i>
+  Map.find(std::string("bar"));    <i>// Lookup "bar"</i>
+  Map.find(StringRef("\0baz", 4)); <i>// Lookup "\0baz"</i>
+</pre>
+
+<p>Similarly, APIs which need to return a string may return a <tt>StringRef</tt>
+instance, which can be used directly or converted to an <tt>std::string</tt>
+using the <tt>str</tt> member function.  See 
+"<tt><a href="/doxygen/classllvm_1_1StringRef_8h-source.html">llvm/ADT/StringRef.h</a></tt>"
+for more information.</p>
+
+<p>You should rarely use the <tt>StringRef</tt> class directly, because it contains
+pointers to external memory it is not generally safe to store an instance of the
+class (unless you know that the external storage will not be freed). StringRef is
+small and pervasive enough in LLVM that it should always be passed by value.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="Twine">The <tt>Twine</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>Twine</tt> class is an efficient way for APIs to accept concatenated
+strings.  For example, a common LLVM paradigm is to name one instruction based on
+the name of another instruction with a suffix, for example:</p>
+
+<div class="doc_code">
+<pre>
+    New = CmpInst::Create(<i>...</i>, SO->getName() + ".cmp");
+</pre>
+</div>
+
+<p>The <tt>Twine</tt> class is effectively a
+lightweight <a href="http://en.wikipedia.org/wiki/Rope_(computer_science)">rope</a>
+which points to temporary (stack allocated) objects.  Twines can be implicitly
+constructed as the result of the plus operator applied to strings (i.e., a C
+strings, an <tt>std::string</tt>, or a <tt>StringRef</tt>).  The twine delays the
+actual concatenation of strings until it is actually required, at which point
+it can be efficiently rendered directly into a character array.  This avoids
+unnecessary heap allocation involved in constructing the temporary results of
+string concatenation. See
+"<tt><a href="/doxygen/classllvm_1_1Twine_8h-source.html">llvm/ADT/Twine.h</a></tt>"
+for more information.</p>
+
+<p>As with a <tt>StringRef</tt>, <tt>Twine</tt> objects point to external memory
+and should almost never be stored or mentioned directly.  They are intended
+solely for use when defining a function which should be able to efficiently
+accept concatenated strings.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="DEBUG">The <tt>DEBUG()</tt> macro and <tt>-debug</tt> option</a>
+</div>
+
+<div class="doc_text">
+
+<p>Often when working on your pass you will put a bunch of debugging printouts
+and other code into your pass.  After you get it working, you want to remove
+it, but you may need it again in the future (to work out new bugs that you run
+across).</p>
+
+<p> Naturally, because of this, you don't want to delete the debug printouts,
+but you don't want them to always be noisy.  A standard compromise is to comment
+them out, allowing you to enable them if you need them in the future.</p>
+
+<p>The "<tt><a href="/doxygen/Debug_8h-source.html">llvm/Support/Debug.h</a></tt>"
+file provides a macro named <tt>DEBUG()</tt> that is a much nicer solution to
+this problem.  Basically, you can put arbitrary code into the argument of the
+<tt>DEBUG</tt> macro, and it is only executed if '<tt>opt</tt>' (or any other
+tool) is run with the '<tt>-debug</tt>' command line argument:</p>
+
+<div class="doc_code">
+<pre>
+DEBUG(errs() &lt;&lt; "I am here!\n");
+</pre>
+</div>
+
+<p>Then you can run your pass like this:</p>
+
+<div class="doc_code">
+<pre>
+$ opt &lt; a.bc &gt; /dev/null -mypass
+<i>&lt;no output&gt;</i>
+$ opt &lt; a.bc &gt; /dev/null -mypass -debug
+I am here!
+</pre>
+</div>
+
+<p>Using the <tt>DEBUG()</tt> macro instead of a home-brewed solution allows you
+to not have to create "yet another" command line option for the debug output for
+your pass.  Note that <tt>DEBUG()</tt> macros are disabled for optimized builds,
+so they do not cause a performance impact at all (for the same reason, they
+should also not contain side-effects!).</p>
+
+<p>One additional nice thing about the <tt>DEBUG()</tt> macro is that you can
+enable or disable it directly in gdb.  Just use "<tt>set DebugFlag=0</tt>" or
+"<tt>set DebugFlag=1</tt>" from the gdb if the program is running.  If the
+program hasn't been started yet, you can always just run it with
+<tt>-debug</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="DEBUG_TYPE">Fine grained debug info with <tt>DEBUG_TYPE</tt> and
+  the <tt>-debug-only</tt> option</a>
+</div>
+
+<div class="doc_text">
+
+<p>Sometimes you may find yourself in a situation where enabling <tt>-debug</tt>
+just turns on <b>too much</b> information (such as when working on the code
+generator).  If you want to enable debug information with more fine-grained
+control, you define the <tt>DEBUG_TYPE</tt> macro and the <tt>-debug</tt> only
+option as follows:</p>
+
+<div class="doc_code">
+<pre>
+#undef  DEBUG_TYPE
+DEBUG(errs() &lt;&lt; "No debug type\n");
+#define DEBUG_TYPE "foo"
+DEBUG(errs() &lt;&lt; "'foo' debug type\n");
+#undef  DEBUG_TYPE
+#define DEBUG_TYPE "bar"
+DEBUG(errs() &lt;&lt; "'bar' debug type\n"));
+#undef  DEBUG_TYPE
+#define DEBUG_TYPE ""
+DEBUG(errs() &lt;&lt; "No debug type (2)\n");
+</pre>
+</div>
+
+<p>Then you can run your pass like this:</p>
+
+<div class="doc_code">
+<pre>
+$ opt &lt; a.bc &gt; /dev/null -mypass
+<i>&lt;no output&gt;</i>
+$ opt &lt; a.bc &gt; /dev/null -mypass -debug
+No debug type
+'foo' debug type
+'bar' debug type
+No debug type (2)
+$ opt &lt; a.bc &gt; /dev/null -mypass -debug-only=foo
+'foo' debug type
+$ opt &lt; a.bc &gt; /dev/null -mypass -debug-only=bar
+'bar' debug type
+</pre>
+</div>
+
+<p>Of course, in practice, you should only set <tt>DEBUG_TYPE</tt> at the top of
+a file, to specify the debug type for the entire module (if you do this before
+you <tt>#include "llvm/Support/Debug.h"</tt>, you don't have to insert the ugly
+<tt>#undef</tt>'s).  Also, you should use names more meaningful than "foo" and
+"bar", because there is no system in place to ensure that names do not
+conflict. If two different modules use the same string, they will all be turned
+on when the name is specified. This allows, for example, all debug information
+for instruction scheduling to be enabled with <tt>-debug-type=InstrSched</tt>,
+even if the source lives in multiple files.</p>
+
+<p>The <tt>DEBUG_WITH_TYPE</tt> macro is also available for situations where you
+would like to set <tt>DEBUG_TYPE</tt>, but only for one specific <tt>DEBUG</tt>
+statement. It takes an additional first parameter, which is the type to use. For
+example, the preceding example could be written as:</p>
+
+
+<div class="doc_code">
+<pre>
+DEBUG_WITH_TYPE("", errs() &lt;&lt; "No debug type\n");
+DEBUG_WITH_TYPE("foo", errs() &lt;&lt; "'foo' debug type\n");
+DEBUG_WITH_TYPE("bar", errs() &lt;&lt; "'bar' debug type\n"));
+DEBUG_WITH_TYPE("", errs() &lt;&lt; "No debug type (2)\n");
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="Statistic">The <tt>Statistic</tt> class &amp; <tt>-stats</tt>
+  option</a>
+</div>
+
+<div class="doc_text">
+
+<p>The "<tt><a
+href="/doxygen/Statistic_8h-source.html">llvm/ADT/Statistic.h</a></tt>" file
+provides a class named <tt>Statistic</tt> that is used as a unified way to
+keep track of what the LLVM compiler is doing and how effective various
+optimizations are.  It is useful to see what optimizations are contributing to
+making a particular program run faster.</p>
+
+<p>Often you may run your pass on some big program, and you're interested to see
+how many times it makes a certain transformation.  Although you can do this with
+hand inspection, or some ad-hoc method, this is a real pain and not very useful
+for big programs.  Using the <tt>Statistic</tt> class makes it very easy to
+keep track of this information, and the calculated information is presented in a
+uniform manner with the rest of the passes being executed.</p>
+
+<p>There are many examples of <tt>Statistic</tt> uses, but the basics of using
+it are as follows:</p>
+
+<ol>
+    <li><p>Define your statistic like this:</p>
+
+<div class="doc_code">
+<pre>
+#define <a href="#DEBUG_TYPE">DEBUG_TYPE</a> "mypassname"   <i>// This goes before any #includes.</i>
+STATISTIC(NumXForms, "The # of times I did stuff");
+</pre>
+</div>
+
+  <p>The <tt>STATISTIC</tt> macro defines a static variable, whose name is
+    specified by the first argument.  The pass name is taken from the DEBUG_TYPE
+    macro, and the description is taken from the second argument.  The variable
+    defined ("NumXForms" in this case) acts like an unsigned integer.</p></li>
+
+    <li><p>Whenever you make a transformation, bump the counter:</p>
+
+<div class="doc_code">
+<pre>
+++NumXForms;   // <i>I did stuff!</i>
+</pre>
+</div>
+
+    </li>
+  </ol>
+
+  <p>That's all you have to do.  To get '<tt>opt</tt>' to print out the
+  statistics gathered, use the '<tt>-stats</tt>' option:</p>
+
+<div class="doc_code">
+<pre>
+$ opt -stats -mypassname &lt; program.bc &gt; /dev/null
+<i>... statistics output ...</i>
+</pre>
+</div>
+
+  <p> When running <tt>opt</tt> on a C file from the SPEC benchmark
+suite, it gives a report that looks like this:</p>
+
+<div class="doc_code">
+<pre>
+   7646 bitcodewriter   - Number of normal instructions
+    725 bitcodewriter   - Number of oversized instructions
+ 129996 bitcodewriter   - Number of bitcode bytes written
+   2817 raise           - Number of insts DCEd or constprop'd
+   3213 raise           - Number of cast-of-self removed
+   5046 raise           - Number of expression trees converted
+     75 raise           - Number of other getelementptr's formed
+    138 raise           - Number of load/store peepholes
+     42 deadtypeelim    - Number of unused typenames removed from symtab
+    392 funcresolve     - Number of varargs functions resolved
+     27 globaldce       - Number of global variables removed
+      2 adce            - Number of basic blocks removed
+    134 cee             - Number of branches revectored
+     49 cee             - Number of setcc instruction eliminated
+    532 gcse            - Number of loads removed
+   2919 gcse            - Number of instructions removed
+     86 indvars         - Number of canonical indvars added
+     87 indvars         - Number of aux indvars removed
+     25 instcombine     - Number of dead inst eliminate
+    434 instcombine     - Number of insts combined
+    248 licm            - Number of load insts hoisted
+   1298 licm            - Number of insts hoisted to a loop pre-header
+      3 licm            - Number of insts hoisted to multiple loop preds (bad, no loop pre-header)
+     75 mem2reg         - Number of alloca's promoted
+   1444 cfgsimplify     - Number of blocks simplified
+</pre>
+</div>
+
+<p>Obviously, with so many optimizations, having a unified framework for this
+stuff is very nice.  Making your pass fit well into the framework makes it more
+maintainable and useful.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ViewGraph">Viewing graphs while debugging code</a>
+</div>
+
+<div class="doc_text">
+
+<p>Several of the important data structures in LLVM are graphs: for example
+CFGs made out of LLVM <a href="#BasicBlock">BasicBlock</a>s, CFGs made out of
+LLVM <a href="CodeGenerator.html#machinebasicblock">MachineBasicBlock</a>s, and
+<a href="CodeGenerator.html#selectiondag_intro">Instruction Selection
+DAGs</a>.  In many cases, while debugging various parts of the compiler, it is
+nice to instantly visualize these graphs.</p>
+
+<p>LLVM provides several callbacks that are available in a debug build to do
+exactly that.  If you call the <tt>Function::viewCFG()</tt> method, for example,
+the current LLVM tool will pop up a window containing the CFG for the function
+where each basic block is a node in the graph, and each node contains the
+instructions in the block.  Similarly, there also exists 
+<tt>Function::viewCFGOnly()</tt> (does not include the instructions), the
+<tt>MachineFunction::viewCFG()</tt> and <tt>MachineFunction::viewCFGOnly()</tt>,
+and the <tt>SelectionDAG::viewGraph()</tt> methods.  Within GDB, for example,
+you can usually use something like <tt>call DAG.viewGraph()</tt> to pop
+up a window.  Alternatively, you can sprinkle calls to these functions in your
+code in places you want to debug.</p>
+
+<p>Getting this to work requires a small amount of configuration.  On Unix
+systems with X11, install the <a href="http://www.graphviz.org">graphviz</a>
+toolkit, and make sure 'dot' and 'gv' are in your path.  If you are running on
+Mac OS/X, download and install the Mac OS/X <a 
+href="http://www.pixelglow.com/graphviz/">Graphviz program</a>, and add
+<tt>/Applications/Graphviz.app/Contents/MacOS/</tt> (or wherever you install
+it) to your path.  Once in your system and path are set up, rerun the LLVM
+configure script and rebuild LLVM to enable this functionality.</p>
+
+<p><tt>SelectionDAG</tt> has been extended to make it easier to locate
+<i>interesting</i> nodes in large complex graphs.  From gdb, if you
+<tt>call DAG.setGraphColor(<i>node</i>, "<i>color</i>")</tt>, then the
+next <tt>call DAG.viewGraph()</tt> would highlight the node in the
+specified color (choices of colors can be found at <a
+href="http://www.graphviz.org/doc/info/colors.html">colors</a>.) More
+complex node attributes can be provided with <tt>call
+DAG.setGraphAttrs(<i>node</i>, "<i>attributes</i>")</tt> (choices can be
+found at <a href="http://www.graphviz.org/doc/info/attrs.html">Graph
+Attributes</a>.)  If you want to restart and clear all the current graph
+attributes, then you can <tt>call DAG.clearGraphAttrs()</tt>. </p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="datastructure">Picking the Right Data Structure for a Task</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>LLVM has a plethora of data structures in the <tt>llvm/ADT/</tt> directory,
+ and we commonly use STL data structures.  This section describes the trade-offs
+ you should consider when you pick one.</p>
+
+<p>
+The first step is a choose your own adventure: do you want a sequential
+container, a set-like container, or a map-like container?  The most important
+thing when choosing a container is the algorithmic properties of how you plan to
+access the container.  Based on that, you should use:</p>
+
+<ul>
+<li>a <a href="#ds_map">map-like</a> container if you need efficient look-up
+    of an value based on another value.  Map-like containers also support
+    efficient queries for containment (whether a key is in the map).  Map-like
+    containers generally do not support efficient reverse mapping (values to
+    keys).  If you need that, use two maps.  Some map-like containers also
+    support efficient iteration through the keys in sorted order.  Map-like
+    containers are the most expensive sort, only use them if you need one of
+    these capabilities.</li>
+
+<li>a <a href="#ds_set">set-like</a> container if you need to put a bunch of
+    stuff into a container that automatically eliminates duplicates.  Some
+    set-like containers support efficient iteration through the elements in
+    sorted order.  Set-like containers are more expensive than sequential
+    containers.
+</li>
+
+<li>a <a href="#ds_sequential">sequential</a> container provides
+    the most efficient way to add elements and keeps track of the order they are
+    added to the collection.  They permit duplicates and support efficient
+    iteration, but do not support efficient look-up based on a key.
+</li>
+
+<li>a <a href="#ds_string">string</a> container is a specialized sequential
+    container or reference structure that is used for character or byte
+    arrays.</li>
+
+<li>a <a href="#ds_bit">bit</a> container provides an efficient way to store and
+    perform set operations on sets of numeric id's, while automatically
+    eliminating duplicates.  Bit containers require a maximum of 1 bit for each
+    identifier you want to store.
+</li>
+</ul>
+
+<p>
+Once the proper category of container is determined, you can fine tune the
+memory use, constant factors, and cache behaviors of access by intelligently
+picking a member of the category.  Note that constant factors and cache behavior
+can be a big deal.  If you have a vector that usually only contains a few
+elements (but could contain many), for example, it's much better to use
+<a href="#dss_smallvector">SmallVector</a> than <a href="#dss_vector">vector</a>
+.  Doing so avoids (relatively) expensive malloc/free calls, which dwarf the
+cost of adding the elements to the container. </p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ds_sequential">Sequential Containers (std::vector, std::list, etc)</a>
+</div>
+
+<div class="doc_text">
+There are a variety of sequential containers available for you, based on your
+needs.  Pick the first in this section that will do what you want.
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_fixedarrays">Fixed Size Arrays</a>
+</div>
+
+<div class="doc_text">
+<p>Fixed size arrays are very simple and very fast.  They are good if you know
+exactly how many elements you have, or you have a (low) upper bound on how many
+you have.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_heaparrays">Heap Allocated Arrays</a>
+</div>
+
+<div class="doc_text">
+<p>Heap allocated arrays (new[] + delete[]) are also simple.  They are good if
+the number of elements is variable, if you know how many elements you will need
+before the array is allocated, and if the array is usually large (if not,
+consider a <a href="#dss_smallvector">SmallVector</a>).  The cost of a heap
+allocated array is the cost of the new/delete (aka malloc/free).  Also note that
+if you are allocating an array of a type with a constructor, the constructor and
+destructors will be run for every element in the array (re-sizable vectors only
+construct those elements actually used).</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_smallvector">"llvm/ADT/SmallVector.h"</a>
+</div>
+
+<div class="doc_text">
+<p><tt>SmallVector&lt;Type, N&gt;</tt> is a simple class that looks and smells
+just like <tt>vector&lt;Type&gt;</tt>:
+it supports efficient iteration, lays out elements in memory order (so you can
+do pointer arithmetic between elements), supports efficient push_back/pop_back
+operations, supports efficient random access to its elements, etc.</p>
+
+<p>The advantage of SmallVector is that it allocates space for
+some number of elements (N) <b>in the object itself</b>.  Because of this, if
+the SmallVector is dynamically smaller than N, no malloc is performed.  This can
+be a big win in cases where the malloc/free call is far more expensive than the
+code that fiddles around with the elements.</p>
+
+<p>This is good for vectors that are "usually small" (e.g. the number of
+predecessors/successors of a block is usually less than 8).  On the other hand,
+this makes the size of the SmallVector itself large, so you don't want to
+allocate lots of them (doing so will waste a lot of space).  As such,
+SmallVectors are most useful when on the stack.</p>
+
+<p>SmallVector also provides a nice portable and efficient replacement for
+<tt>alloca</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_vector">&lt;vector&gt;</a>
+</div>
+
+<div class="doc_text">
+<p>
+std::vector is well loved and respected.  It is useful when SmallVector isn't:
+when the size of the vector is often large (thus the small optimization will
+rarely be a benefit) or if you will be allocating many instances of the vector
+itself (which would waste space for elements that aren't in the container).
+vector is also useful when interfacing with code that expects vectors :).
+</p>
+
+<p>One worthwhile note about std::vector: avoid code like this:</p>
+
+<div class="doc_code">
+<pre>
+for ( ... ) {
+   std::vector&lt;foo&gt; V;
+   use V;
+}
+</pre>
+</div>
+
+<p>Instead, write this as:</p>
+
+<div class="doc_code">
+<pre>
+std::vector&lt;foo&gt; V;
+for ( ... ) {
+   use V;
+   V.clear();
+}
+</pre>
+</div>
+
+<p>Doing so will save (at least) one heap allocation and free per iteration of
+the loop.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_deque">&lt;deque&gt;</a>
+</div>
+
+<div class="doc_text">
+<p>std::deque is, in some senses, a generalized version of std::vector.  Like
+std::vector, it provides constant time random access and other similar
+properties, but it also provides efficient access to the front of the list.  It
+does not guarantee continuity of elements within memory.</p>
+
+<p>In exchange for this extra flexibility, std::deque has significantly higher
+constant factor costs than std::vector.  If possible, use std::vector or
+something cheaper.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_list">&lt;list&gt;</a>
+</div>
+
+<div class="doc_text">
+<p>std::list is an extremely inefficient class that is rarely useful.
+It performs a heap allocation for every element inserted into it, thus having an
+extremely high constant factor, particularly for small data types.  std::list
+also only supports bidirectional iteration, not random access iteration.</p>
+
+<p>In exchange for this high cost, std::list supports efficient access to both
+ends of the list (like std::deque, but unlike std::vector or SmallVector).  In
+addition, the iterator invalidation characteristics of std::list are stronger
+than that of a vector class: inserting or removing an element into the list does
+not invalidate iterator or pointers to other elements in the list.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_ilist">llvm/ADT/ilist.h</a>
+</div>
+
+<div class="doc_text">
+<p><tt>ilist&lt;T&gt;</tt> implements an 'intrusive' doubly-linked list.  It is
+intrusive, because it requires the element to store and provide access to the
+prev/next pointers for the list.</p>
+
+<p><tt>ilist</tt> has the same drawbacks as <tt>std::list</tt>, and additionally
+requires an <tt>ilist_traits</tt> implementation for the element type, but it
+provides some novel characteristics.  In particular, it can efficiently store
+polymorphic objects, the traits class is informed when an element is inserted or
+removed from the list, and <tt>ilist</tt>s are guaranteed to support a
+constant-time splice operation.</p>
+
+<p>These properties are exactly what we want for things like
+<tt>Instruction</tt>s and basic blocks, which is why these are implemented with
+<tt>ilist</tt>s.</p>
+
+Related classes of interest are explained in the following subsections:
+    <ul>
+      <li><a href="#dss_ilist_traits">ilist_traits</a></li>
+      <li><a href="#dss_iplist">iplist</a></li>
+      <li><a href="#dss_ilist_node">llvm/ADT/ilist_node.h</a></li>
+      <li><a href="#dss_ilist_sentinel">Sentinels</a></li>
+    </ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_ilist_traits">ilist_traits</a>
+</div>
+
+<div class="doc_text">
+<p><tt>ilist_traits&lt;T&gt;</tt> is <tt>ilist&lt;T&gt;</tt>'s customization
+mechanism. <tt>iplist&lt;T&gt;</tt> (and consequently <tt>ilist&lt;T&gt;</tt>)
+publicly derive from this traits class.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_iplist">iplist</a>
+</div>
+
+<div class="doc_text">
+<p><tt>iplist&lt;T&gt;</tt> is <tt>ilist&lt;T&gt;</tt>'s base and as such
+supports a slightly narrower interface. Notably, inserters from
+<tt>T&amp;</tt> are absent.</p>
+
+<p><tt>ilist_traits&lt;T&gt;</tt> is a public base of this class and can be
+used for a wide variety of customizations.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_ilist_node">llvm/ADT/ilist_node.h</a>
+</div>
+
+<div class="doc_text">
+<p><tt>ilist_node&lt;T&gt;</tt> implements a the forward and backward links
+that are expected by the <tt>ilist&lt;T&gt;</tt> (and analogous containers)
+in the default manner.</p>
+
+<p><tt>ilist_node&lt;T&gt;</tt>s are meant to be embedded in the node type
+<tt>T</tt>, usually <tt>T</tt> publicly derives from
+<tt>ilist_node&lt;T&gt;</tt>.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_ilist_sentinel">Sentinels</a>
+</div>
+
+<div class="doc_text">
+<p><tt>ilist</tt>s have another specialty that must be considered. To be a good
+citizen in the C++ ecosystem, it needs to support the standard container
+operations, such as <tt>begin</tt> and <tt>end</tt> iterators, etc. Also, the
+<tt>operator--</tt> must work correctly on the <tt>end</tt> iterator in the
+case of non-empty <tt>ilist</tt>s.</p>
+
+<p>The only sensible solution to this problem is to allocate a so-called
+<i>sentinel</i> along with the intrusive list, which serves as the <tt>end</tt>
+iterator, providing the back-link to the last element. However conforming to the
+C++ convention it is illegal to <tt>operator++</tt> beyond the sentinel and it
+also must not be dereferenced.</p>
+
+<p>These constraints allow for some implementation freedom to the <tt>ilist</tt>
+how to allocate and store the sentinel. The corresponding policy is dictated
+by <tt>ilist_traits&lt;T&gt;</tt>. By default a <tt>T</tt> gets heap-allocated
+whenever the need for a sentinel arises.</p>
+
+<p>While the default policy is sufficient in most cases, it may break down when
+<tt>T</tt> does not provide a default constructor. Also, in the case of many
+instances of <tt>ilist</tt>s, the memory overhead of the associated sentinels
+is wasted. To alleviate the situation with numerous and voluminous
+<tt>T</tt>-sentinels, sometimes a trick is employed, leading to <i>ghostly
+sentinels</i>.</p>
+
+<p>Ghostly sentinels are obtained by specially-crafted <tt>ilist_traits&lt;T&gt;</tt>
+which superpose the sentinel with the <tt>ilist</tt> instance in memory. Pointer
+arithmetic is used to obtain the sentinel, which is relative to the
+<tt>ilist</tt>'s <tt>this</tt> pointer. The <tt>ilist</tt> is augmented by an
+extra pointer, which serves as the back-link of the sentinel. This is the only
+field in the ghostly sentinel which can be legally accessed.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_other">Other Sequential Container options</a>
+</div>
+
+<div class="doc_text">
+<p>Other STL containers are available, such as std::string.</p>
+
+<p>There are also various STL adapter classes such as std::queue,
+std::priority_queue, std::stack, etc.  These provide simplified access to an
+underlying container but don't affect the cost of the container itself.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ds_set">Set-Like Containers (std::set, SmallSet, SetVector, etc)</a>
+</div>
+
+<div class="doc_text">
+
+<p>Set-like containers are useful when you need to canonicalize multiple values
+into a single representation.  There are several different choices for how to do
+this, providing various trade-offs.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_sortedvectorset">A sorted 'vector'</a>
+</div>
+
+<div class="doc_text">
+
+<p>If you intend to insert a lot of elements, then do a lot of queries, a
+great approach is to use a vector (or other sequential container) with
+std::sort+std::unique to remove duplicates.  This approach works really well if
+your usage pattern has these two distinct phases (insert then query), and can be
+coupled with a good choice of <a href="#ds_sequential">sequential container</a>.
+</p>
+
+<p>
+This combination provides the several nice properties: the result data is
+contiguous in memory (good for cache locality), has few allocations, is easy to
+address (iterators in the final vector are just indices or pointers), and can be
+efficiently queried with a standard binary or radix search.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_smallset">"llvm/ADT/SmallSet.h"</a>
+</div>
+
+<div class="doc_text">
+
+<p>If you have a set-like data structure that is usually small and whose elements
+are reasonably small, a <tt>SmallSet&lt;Type, N&gt;</tt> is a good choice.  This set
+has space for N elements in place (thus, if the set is dynamically smaller than
+N, no malloc traffic is required) and accesses them with a simple linear search.
+When the set grows beyond 'N' elements, it allocates a more expensive representation that
+guarantees efficient access (for most types, it falls back to std::set, but for
+pointers it uses something far better, <a
+href="#dss_smallptrset">SmallPtrSet</a>).</p>
+
+<p>The magic of this class is that it handles small sets extremely efficiently,
+but gracefully handles extremely large sets without loss of efficiency.  The
+drawback is that the interface is quite small: it supports insertion, queries
+and erasing, but does not support iteration.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_smallptrset">"llvm/ADT/SmallPtrSet.h"</a>
+</div>
+
+<div class="doc_text">
+
+<p>SmallPtrSet has all the advantages of <tt>SmallSet</tt> (and a <tt>SmallSet</tt> of pointers is 
+transparently implemented with a <tt>SmallPtrSet</tt>), but also supports iterators.  If
+more than 'N' insertions are performed, a single quadratically
+probed hash table is allocated and grows as needed, providing extremely
+efficient access (constant time insertion/deleting/queries with low constant
+factors) and is very stingy with malloc traffic.</p>
+
+<p>Note that, unlike <tt>std::set</tt>, the iterators of <tt>SmallPtrSet</tt> are invalidated
+whenever an insertion occurs.  Also, the values visited by the iterators are not
+visited in sorted order.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_denseset">"llvm/ADT/DenseSet.h"</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+DenseSet is a simple quadratically probed hash table.  It excels at supporting
+small values: it uses a single allocation to hold all of the pairs that
+are currently inserted in the set.  DenseSet is a great way to unique small
+values that are not simple pointers (use <a 
+href="#dss_smallptrset">SmallPtrSet</a> for pointers).  Note that DenseSet has
+the same requirements for the value type that <a 
+href="#dss_densemap">DenseMap</a> has.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_FoldingSet">"llvm/ADT/FoldingSet.h"</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+FoldingSet is an aggregate class that is really good at uniquing
+expensive-to-create or polymorphic objects.  It is a combination of a chained
+hash table with intrusive links (uniqued objects are required to inherit from
+FoldingSetNode) that uses <a href="#dss_smallvector">SmallVector</a> as part of
+its ID process.</p>
+
+<p>Consider a case where you want to implement a "getOrCreateFoo" method for
+a complex object (for example, a node in the code generator).  The client has a
+description of *what* it wants to generate (it knows the opcode and all the
+operands), but we don't want to 'new' a node, then try inserting it into a set
+only to find out it already exists, at which point we would have to delete it
+and return the node that already exists.
+</p>
+
+<p>To support this style of client, FoldingSet perform a query with a
+FoldingSetNodeID (which wraps SmallVector) that can be used to describe the
+element that we want to query for.  The query either returns the element
+matching the ID or it returns an opaque ID that indicates where insertion should
+take place.  Construction of the ID usually does not require heap traffic.</p>
+
+<p>Because FoldingSet uses intrusive links, it can support polymorphic objects
+in the set (for example, you can have SDNode instances mixed with LoadSDNodes).
+Because the elements are individually allocated, pointers to the elements are
+stable: inserting or removing elements does not invalidate any pointers to other
+elements.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_set">&lt;set&gt;</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>std::set</tt> is a reasonable all-around set class, which is decent at
+many things but great at nothing.  std::set allocates memory for each element
+inserted (thus it is very malloc intensive) and typically stores three pointers
+per element in the set (thus adding a large amount of per-element space
+overhead).  It offers guaranteed log(n) performance, which is not particularly
+fast from a complexity standpoint (particularly if the elements of the set are
+expensive to compare, like strings), and has extremely high constant factors for
+lookup, insertion and removal.</p>
+
+<p>The advantages of std::set are that its iterators are stable (deleting or
+inserting an element from the set does not affect iterators or pointers to other
+elements) and that iteration over the set is guaranteed to be in sorted order.
+If the elements in the set are large, then the relative overhead of the pointers
+and malloc traffic is not a big deal, but if the elements of the set are small,
+std::set is almost never a good choice.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_setvector">"llvm/ADT/SetVector.h"</a>
+</div>
+
+<div class="doc_text">
+<p>LLVM's SetVector&lt;Type&gt; is an adapter class that combines your choice of
+a set-like container along with a <a href="#ds_sequential">Sequential 
+Container</a>.  The important property
+that this provides is efficient insertion with uniquing (duplicate elements are
+ignored) with iteration support.  It implements this by inserting elements into
+both a set-like container and the sequential container, using the set-like
+container for uniquing and the sequential container for iteration.
+</p>
+
+<p>The difference between SetVector and other sets is that the order of
+iteration is guaranteed to match the order of insertion into the SetVector.
+This property is really important for things like sets of pointers.  Because
+pointer values are non-deterministic (e.g. vary across runs of the program on
+different machines), iterating over the pointers in the set will
+not be in a well-defined order.</p>
+
+<p>
+The drawback of SetVector is that it requires twice as much space as a normal
+set and has the sum of constant factors from the set-like container and the 
+sequential container that it uses.  Use it *only* if you need to iterate over 
+the elements in a deterministic order.  SetVector is also expensive to delete
+elements out of (linear time), unless you use it's "pop_back" method, which is
+faster.
+</p>
+
+<p>SetVector is an adapter class that defaults to using std::vector and std::set
+for the underlying containers, so it is quite expensive.  However,
+<tt>"llvm/ADT/SetVector.h"</tt> also provides a SmallSetVector class, which
+defaults to using a SmallVector and SmallSet of a specified size.  If you use
+this, and if your sets are dynamically smaller than N, you will save a lot of 
+heap traffic.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_uniquevector">"llvm/ADT/UniqueVector.h"</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+UniqueVector is similar to <a href="#dss_setvector">SetVector</a>, but it
+retains a unique ID for each element inserted into the set.  It internally
+contains a map and a vector, and it assigns a unique ID for each value inserted
+into the set.</p>
+
+<p>UniqueVector is very expensive: its cost is the sum of the cost of
+maintaining both the map and vector, it has high complexity, high constant
+factors, and produces a lot of malloc traffic.  It should be avoided.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_otherset">Other Set-Like Container Options</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The STL provides several other options, such as std::multiset and the various 
+"hash_set" like containers (whether from C++ TR1 or from the SGI library). We
+never use hash_set and unordered_set because they are generally very expensive 
+(each insertion requires a malloc) and very non-portable.
+</p>
+
+<p>std::multiset is useful if you're not interested in elimination of
+duplicates, but has all the drawbacks of std::set.  A sorted vector (where you 
+don't delete duplicate entries) or some other approach is almost always
+better.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ds_map">Map-Like Containers (std::map, DenseMap, etc)</a>
+</div>
+
+<div class="doc_text">
+Map-like containers are useful when you want to associate data to a key.  As
+usual, there are a lot of different ways to do this. :)
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_sortedvectormap">A sorted 'vector'</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+If your usage pattern follows a strict insert-then-query approach, you can
+trivially use the same approach as <a href="#dss_sortedvectorset">sorted vectors
+for set-like containers</a>.  The only difference is that your query function
+(which uses std::lower_bound to get efficient log(n) lookup) should only compare
+the key, not both the key and value.  This yields the same advantages as sorted
+vectors for sets.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_stringmap">"llvm/ADT/StringMap.h"</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+Strings are commonly used as keys in maps, and they are difficult to support
+efficiently: they are variable length, inefficient to hash and compare when
+long, expensive to copy, etc.  StringMap is a specialized container designed to
+cope with these issues.  It supports mapping an arbitrary range of bytes to an
+arbitrary other object.</p>
+
+<p>The StringMap implementation uses a quadratically-probed hash table, where
+the buckets store a pointer to the heap allocated entries (and some other
+stuff).  The entries in the map must be heap allocated because the strings are
+variable length.  The string data (key) and the element object (value) are
+stored in the same allocation with the string data immediately after the element
+object.  This container guarantees the "<tt>(char*)(&amp;Value+1)</tt>" points
+to the key string for a value.</p>
+
+<p>The StringMap is very fast for several reasons: quadratic probing is very
+cache efficient for lookups, the hash value of strings in buckets is not
+recomputed when looking up an element, StringMap rarely has to touch the
+memory for unrelated objects when looking up a value (even when hash collisions
+happen), hash table growth does not recompute the hash values for strings
+already in the table, and each pair in the map is store in a single allocation
+(the string data is stored in the same allocation as the Value of a pair).</p>
+
+<p>StringMap also provides query methods that take byte ranges, so it only ever
+copies a string if a value is inserted into the table.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_indexedmap">"llvm/ADT/IndexedMap.h"</a>
+</div>
+
+<div class="doc_text">
+<p>
+IndexedMap is a specialized container for mapping small dense integers (or
+values that can be mapped to small dense integers) to some other type.  It is
+internally implemented as a vector with a mapping function that maps the keys to
+the dense integer range.
+</p>
+
+<p>
+This is useful for cases like virtual registers in the LLVM code generator: they
+have a dense mapping that is offset by a compile-time constant (the first
+virtual register ID).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_densemap">"llvm/ADT/DenseMap.h"</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+DenseMap is a simple quadratically probed hash table.  It excels at supporting
+small keys and values: it uses a single allocation to hold all of the pairs that
+are currently inserted in the map.  DenseMap is a great way to map pointers to
+pointers, or map other small types to each other.
+</p>
+
+<p>
+There are several aspects of DenseMap that you should be aware of, however.  The
+iterators in a densemap are invalidated whenever an insertion occurs, unlike
+map.  Also, because DenseMap allocates space for a large number of key/value
+pairs (it starts with 64 by default), it will waste a lot of space if your keys
+or values are large.  Finally, you must implement a partial specialization of
+DenseMapInfo for the key that you want, if it isn't already supported.  This
+is required to tell DenseMap about two special marker values (which can never be
+inserted into the map) that it needs internally.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_valuemap">"llvm/ADT/ValueMap.h"</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+ValueMap is a wrapper around a <a href="#dss_densemap">DenseMap</a> mapping
+Value*s (or subclasses) to another type.  When a Value is deleted or RAUW'ed,
+ValueMap will update itself so the new version of the key is mapped to the same
+value, just as if the key were a WeakVH.  You can configure exactly how this
+happens, and what else happens on these two events, by passing
+a <code>Config</code> parameter to the ValueMap template.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_intervalmap">"llvm/ADT/IntervalMap.h"</a>
+</div>
+
+<div class="doc_text">
+
+<p> IntervalMap is a compact map for small keys and values. It maps key
+intervals instead of single keys, and it will automatically coalesce adjacent
+intervals. When then map only contains a few intervals, they are stored in the
+map object itself to avoid allocations.</p>
+
+<p> The IntervalMap iterators are quite big, so they should not be passed around
+as STL iterators. The heavyweight iterators allow a smaller data structure.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_map">&lt;map&gt;</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+std::map has similar characteristics to <a href="#dss_set">std::set</a>: it uses
+a single allocation per pair inserted into the map, it offers log(n) lookup with
+an extremely large constant factor, imposes a space penalty of 3 pointers per
+pair in the map, etc.</p>
+
+<p>std::map is most useful when your keys or values are very large, if you need
+to iterate over the collection in sorted order, or if you need stable iterators
+into the map (i.e. they don't get invalidated if an insertion or deletion of
+another element takes place).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_othermap">Other Map-Like Container Options</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The STL provides several other options, such as std::multimap and the various 
+"hash_map" like containers (whether from C++ TR1 or from the SGI library). We
+never use hash_set and unordered_set because they are generally very expensive 
+(each insertion requires a malloc) and very non-portable.</p>
+
+<p>std::multimap is useful if you want to map a key to multiple values, but has
+all the drawbacks of std::map.  A sorted vector or some other approach is almost
+always better.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ds_string">String-like containers</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+TODO: const char* vs stringref vs smallstring vs std::string.  Describe twine,
+xref to #string_apis.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ds_bit">Bit storage containers (BitVector, SparseBitVector)</a>
+</div>
+
+<div class="doc_text">
+<p>Unlike the other containers, there are only two bit storage containers, and 
+choosing when to use each is relatively straightforward.</p>
+
+<p>One additional option is 
+<tt>std::vector&lt;bool&gt;</tt>: we discourage its use for two reasons 1) the
+implementation in many common compilers (e.g. commonly available versions of 
+GCC) is extremely inefficient and 2) the C++ standards committee is likely to
+deprecate this container and/or change it significantly somehow.  In any case,
+please don't use it.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_bitvector">BitVector</a>
+</div>
+
+<div class="doc_text">
+<p> The BitVector container provides a dynamic size set of bits for manipulation.
+It supports individual bit setting/testing, as well as set operations.  The set
+operations take time O(size of bitvector), but operations are performed one word
+at a time, instead of one bit at a time.  This makes the BitVector very fast for
+set operations compared to other containers.  Use the BitVector when you expect
+the number of set bits to be high (IE a dense set).
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_smallbitvector">SmallBitVector</a>
+</div>
+
+<div class="doc_text">
+<p> The SmallBitVector container provides the same interface as BitVector, but
+it is optimized for the case where only a small number of bits, less than
+25 or so, are needed. It also transparently supports larger bit counts, but
+slightly less efficiently than a plain BitVector, so SmallBitVector should
+only be used when larger counts are rare.
+</p>
+
+<p>
+At this time, SmallBitVector does not support set operations (and, or, xor),
+and its operator[] does not provide an assignable lvalue.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="dss_sparsebitvector">SparseBitVector</a>
+</div>
+
+<div class="doc_text">
+<p> The SparseBitVector container is much like BitVector, with one major
+difference: Only the bits that are set, are stored.  This makes the
+SparseBitVector much more space efficient than BitVector when the set is sparse,
+as well as making set operations O(number of set bits) instead of O(size of
+universe).  The downside to the SparseBitVector is that setting and testing of random bits is O(N), and on large SparseBitVectors, this can be slower than BitVector. In our implementation, setting or testing bits in sorted order
+(either forwards or reverse) is O(1) worst case.  Testing and setting bits within 128 bits (depends on size) of the current bit is also O(1).  As a general statement, testing/setting bits in a SparseBitVector is O(distance away from last set bit).
+</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="common">Helpful Hints for Common Operations</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section describes how to perform some very simple transformations of
+LLVM code.  This is meant to give examples of common idioms used, showing the
+practical side of LLVM transformations.  <p> Because this is a "how-to" section,
+you should also read about the main classes that you will be working with.  The
+<a href="#coreclasses">Core LLVM Class Hierarchy Reference</a> contains details
+and descriptions of the main classes that you should know about.</p>
+
+</div>
+
+<!-- NOTE: this section should be heavy on example code -->
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="inspection">Basic Inspection and Traversal Routines</a>
+</div>
+
+<div class="doc_text">
+
+<p>The LLVM compiler infrastructure have many different data structures that may
+be traversed.  Following the example of the C++ standard template library, the
+techniques used to traverse these various data structures are all basically the
+same.  For a enumerable sequence of values, the <tt>XXXbegin()</tt> function (or
+method) returns an iterator to the start of the sequence, the <tt>XXXend()</tt>
+function returns an iterator pointing to one past the last valid element of the
+sequence, and there is some <tt>XXXiterator</tt> data type that is common
+between the two operations.</p>
+
+<p>Because the pattern for iteration is common across many different aspects of
+the program representation, the standard template library algorithms may be used
+on them, and it is easier to remember how to iterate. First we show a few common
+examples of the data structures that need to be traversed.  Other data
+structures are traversed in very similar ways.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="iterate_function">Iterating over the </a><a
+  href="#BasicBlock"><tt>BasicBlock</tt></a>s in a <a
+  href="#Function"><tt>Function</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>It's quite common to have a <tt>Function</tt> instance that you'd like to
+transform in some way; in particular, you'd like to manipulate its
+<tt>BasicBlock</tt>s.  To facilitate this, you'll need to iterate over all of
+the <tt>BasicBlock</tt>s that constitute the <tt>Function</tt>. The following is
+an example that prints the name of a <tt>BasicBlock</tt> and the number of
+<tt>Instruction</tt>s it contains:</p>
+
+<div class="doc_code">
+<pre>
+// <i>func is a pointer to a Function instance</i>
+for (Function::iterator i = func-&gt;begin(), e = func-&gt;end(); i != e; ++i)
+  // <i>Print out the name of the basic block if it has one, and then the</i>
+  // <i>number of instructions that it contains</i>
+  errs() &lt;&lt; "Basic block (name=" &lt;&lt; i-&gt;getName() &lt;&lt; ") has "
+             &lt;&lt; i-&gt;size() &lt;&lt; " instructions.\n";
+</pre>
+</div>
+
+<p>Note that i can be used as if it were a pointer for the purposes of
+invoking member functions of the <tt>Instruction</tt> class.  This is
+because the indirection operator is overloaded for the iterator
+classes.  In the above code, the expression <tt>i-&gt;size()</tt> is
+exactly equivalent to <tt>(*i).size()</tt> just like you'd expect.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="iterate_basicblock">Iterating over the </a><a
+  href="#Instruction"><tt>Instruction</tt></a>s in a <a
+  href="#BasicBlock"><tt>BasicBlock</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>Just like when dealing with <tt>BasicBlock</tt>s in <tt>Function</tt>s, it's
+easy to iterate over the individual instructions that make up
+<tt>BasicBlock</tt>s. Here's a code snippet that prints out each instruction in
+a <tt>BasicBlock</tt>:</p>
+
+<div class="doc_code">
+<pre>
+// <i>blk is a pointer to a BasicBlock instance</i>
+for (BasicBlock::iterator i = blk-&gt;begin(), e = blk-&gt;end(); i != e; ++i)
+   // <i>The next statement works since operator&lt;&lt;(ostream&amp;,...)</i>
+   // <i>is overloaded for Instruction&amp;</i>
+   errs() &lt;&lt; *i &lt;&lt; "\n";
+</pre>
+</div>
+
+<p>However, this isn't really the best way to print out the contents of a
+<tt>BasicBlock</tt>!  Since the ostream operators are overloaded for virtually
+anything you'll care about, you could have just invoked the print routine on the
+basic block itself: <tt>errs() &lt;&lt; *blk &lt;&lt; "\n";</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="iterate_institer">Iterating over the </a><a
+  href="#Instruction"><tt>Instruction</tt></a>s in a <a
+  href="#Function"><tt>Function</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>If you're finding that you commonly iterate over a <tt>Function</tt>'s
+<tt>BasicBlock</tt>s and then that <tt>BasicBlock</tt>'s <tt>Instruction</tt>s,
+<tt>InstIterator</tt> should be used instead. You'll need to include <a
+href="/doxygen/InstIterator_8h-source.html"><tt>llvm/Support/InstIterator.h</tt></a>,
+and then instantiate <tt>InstIterator</tt>s explicitly in your code.  Here's a
+small example that shows how to dump all instructions in a function to the standard error stream:<p>
+
+<div class="doc_code">
+<pre>
+#include "<a href="/doxygen/InstIterator_8h-source.html">llvm/Support/InstIterator.h</a>"
+
+// <i>F is a pointer to a Function instance</i>
+for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+  errs() &lt;&lt; *I &lt;&lt; "\n";
+</pre>
+</div>
+
+<p>Easy, isn't it?  You can also use <tt>InstIterator</tt>s to fill a
+work list with its initial contents.  For example, if you wanted to
+initialize a work list to contain all instructions in a <tt>Function</tt>
+F, all you would need to do is something like:</p>
+
+<div class="doc_code">
+<pre>
+std::set&lt;Instruction*&gt; worklist;
+// or better yet, SmallPtrSet&lt;Instruction*, 64&gt; worklist;
+
+for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+   worklist.insert(&amp;*I);
+</pre>
+</div>
+
+<p>The STL set <tt>worklist</tt> would now contain all instructions in the
+<tt>Function</tt> pointed to by F.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="iterate_convert">Turning an iterator into a class pointer (and
+  vice-versa)</a>
+</div>
+
+<div class="doc_text">
+
+<p>Sometimes, it'll be useful to grab a reference (or pointer) to a class
+instance when all you've got at hand is an iterator.  Well, extracting
+a reference or a pointer from an iterator is very straight-forward.
+Assuming that <tt>i</tt> is a <tt>BasicBlock::iterator</tt> and <tt>j</tt>
+is a <tt>BasicBlock::const_iterator</tt>:</p>
+
+<div class="doc_code">
+<pre>
+Instruction&amp; inst = *i;   // <i>Grab reference to instruction reference</i>
+Instruction* pinst = &amp;*i; // <i>Grab pointer to instruction reference</i>
+const Instruction&amp; inst = *j;
+</pre>
+</div>
+
+<p>However, the iterators you'll be working with in the LLVM framework are
+special: they will automatically convert to a ptr-to-instance type whenever they
+need to.  Instead of dereferencing the iterator and then taking the address of
+the result, you can simply assign the iterator to the proper pointer type and
+you get the dereference and address-of operation as a result of the assignment
+(behind the scenes, this is a result of overloading casting mechanisms).  Thus
+the last line of the last example,</p>
+
+<div class="doc_code">
+<pre>
+Instruction *pinst = &amp;*i;
+</pre>
+</div>
+
+<p>is semantically equivalent to</p>
+
+<div class="doc_code">
+<pre>
+Instruction *pinst = i;
+</pre>
+</div>
+
+<p>It's also possible to turn a class pointer into the corresponding iterator,
+and this is a constant time operation (very efficient).  The following code
+snippet illustrates use of the conversion constructors provided by LLVM
+iterators.  By using these, you can explicitly grab the iterator of something
+without actually obtaining it via iteration over some structure:</p>
+
+<div class="doc_code">
+<pre>
+void printNextInstruction(Instruction* inst) {
+  BasicBlock::iterator it(inst);
+  ++it; // <i>After this line, it refers to the instruction after *inst</i>
+  if (it != inst-&gt;getParent()-&gt;end()) errs() &lt;&lt; *it &lt;&lt; "\n";
+}
+</pre>
+</div>
+
+<p>Unfortunately, these implicit conversions come at a cost; they prevent
+these iterators from conforming to standard iterator conventions, and thus
+from being usable with standard algorithms and containers. For example, they
+prevent the following code, where <tt>B</tt> is a <tt>BasicBlock</tt>,
+from compiling:</p>
+
+<div class="doc_code">
+<pre>
+  llvm::SmallVector&lt;llvm::Instruction *, 16&gt;(B-&gt;begin(), B-&gt;end());
+</pre>
+</div>
+
+<p>Because of this, these implicit conversions may be removed some day,
+and <tt>operator*</tt> changed to return a pointer instead of a reference.</p>
+
+</div>
+
+<!--_______________________________________________________________________-->
+<div class="doc_subsubsection">
+  <a name="iterate_complex">Finding call sites: a slightly more complex
+  example</a>
+</div>
+
+<div class="doc_text">
+
+<p>Say that you're writing a FunctionPass and would like to count all the
+locations in the entire module (that is, across every <tt>Function</tt>) where a
+certain function (i.e., some <tt>Function</tt>*) is already in scope.  As you'll
+learn later, you may want to use an <tt>InstVisitor</tt> to accomplish this in a
+much more straight-forward manner, but this example will allow us to explore how
+you'd do it if you didn't have <tt>InstVisitor</tt> around. In pseudo-code, this
+is what we want to do:</p>
+
+<div class="doc_code">
+<pre>
+initialize callCounter to zero
+for each Function f in the Module
+  for each BasicBlock b in f
+    for each Instruction i in b
+      if (i is a CallInst and calls the given function)
+        increment callCounter
+</pre>
+</div>
+
+<p>And the actual code is (remember, because we're writing a
+<tt>FunctionPass</tt>, our <tt>FunctionPass</tt>-derived class simply has to
+override the <tt>runOnFunction</tt> method):</p>
+
+<div class="doc_code">
+<pre>
+Function* targetFunc = ...;
+
+class OurFunctionPass : public FunctionPass {
+  public:
+    OurFunctionPass(): callCounter(0) { }
+
+    virtual runOnFunction(Function&amp; F) {
+      for (Function::iterator b = F.begin(), be = F.end(); b != be; ++b) {
+        for (BasicBlock::iterator i = b-&gt;begin(), ie = b-&gt;end(); i != ie; ++i) {
+          if (<a href="#CallInst">CallInst</a>* callInst = <a href="#isa">dyn_cast</a>&lt;<a
+ href="#CallInst">CallInst</a>&gt;(&amp;*i)) {
+            // <i>We know we've encountered a call instruction, so we</i>
+            // <i>need to determine if it's a call to the</i>
+            // <i>function pointed to by m_func or not.</i>
+            if (callInst-&gt;getCalledFunction() == targetFunc)
+              ++callCounter;
+          }
+        }
+      }
+    }
+
+  private:
+    unsigned callCounter;
+};
+</pre>
+</div>
+
+</div>
+
+<!--_______________________________________________________________________-->
+<div class="doc_subsubsection">
+  <a name="calls_and_invokes">Treating calls and invokes the same way</a>
+</div>
+
+<div class="doc_text">
+
+<p>You may have noticed that the previous example was a bit oversimplified in
+that it did not deal with call sites generated by 'invoke' instructions. In
+this, and in other situations, you may find that you want to treat
+<tt>CallInst</tt>s and <tt>InvokeInst</tt>s the same way, even though their
+most-specific common base class is <tt>Instruction</tt>, which includes lots of
+less closely-related things. For these cases, LLVM provides a handy wrapper
+class called <a
+href="http://llvm.org/doxygen/classllvm_1_1CallSite.html"><tt>CallSite</tt></a>.
+It is essentially a wrapper around an <tt>Instruction</tt> pointer, with some
+methods that provide functionality common to <tt>CallInst</tt>s and
+<tt>InvokeInst</tt>s.</p>
+
+<p>This class has "value semantics": it should be passed by value, not by
+reference and it should not be dynamically allocated or deallocated using
+<tt>operator new</tt> or <tt>operator delete</tt>. It is efficiently copyable,
+assignable and constructable, with costs equivalents to that of a bare pointer.
+If you look at its definition, it has only a single pointer member.</p>
+
+</div>
+
+<!--_______________________________________________________________________-->
+<div class="doc_subsubsection">
+  <a name="iterate_chains">Iterating over def-use &amp; use-def chains</a>
+</div>
+
+<div class="doc_text">
+
+<p>Frequently, we might have an instance of the <a
+href="/doxygen/classllvm_1_1Value.html">Value Class</a> and we want to
+determine which <tt>User</tt>s use the <tt>Value</tt>.  The list of all
+<tt>User</tt>s of a particular <tt>Value</tt> is called a <i>def-use</i> chain.
+For example, let's say we have a <tt>Function*</tt> named <tt>F</tt> to a
+particular function <tt>foo</tt>. Finding all of the instructions that
+<i>use</i> <tt>foo</tt> is as simple as iterating over the <i>def-use</i> chain
+of <tt>F</tt>:</p>
+
+<div class="doc_code">
+<pre>
+Function *F = ...;
+
+for (Value::use_iterator i = F-&gt;use_begin(), e = F-&gt;use_end(); i != e; ++i)
+  if (Instruction *Inst = dyn_cast&lt;Instruction&gt;(*i)) {
+    errs() &lt;&lt; "F is used in instruction:\n";
+    errs() &lt;&lt; *Inst &lt;&lt; "\n";
+  }
+</pre>
+</div>
+
+<p>Note that dereferencing a <tt>Value::use_iterator</tt> is not a very cheap
+operation. Instead of performing <tt>*i</tt> above several times, consider
+doing it only once in the loop body and reusing its result.</p>
+
+<p>Alternatively, it's common to have an instance of the <a
+href="/doxygen/classllvm_1_1User.html">User Class</a> and need to know what
+<tt>Value</tt>s are used by it.  The list of all <tt>Value</tt>s used by a
+<tt>User</tt> is known as a <i>use-def</i> chain.  Instances of class
+<tt>Instruction</tt> are common <tt>User</tt>s, so we might want to iterate over
+all of the values that a particular instruction uses (that is, the operands of
+the particular <tt>Instruction</tt>):</p>
+
+<div class="doc_code">
+<pre>
+Instruction *pi = ...;
+
+for (User::op_iterator i = pi-&gt;op_begin(), e = pi-&gt;op_end(); i != e; ++i) {
+  Value *v = *i;
+  // <i>...</i>
+}
+</pre>
+</div>
+
+<p>Declaring objects as <tt>const</tt> is an important tool of enforcing
+mutation free algorithms (such as analyses, etc.). For this purpose above
+iterators come in constant flavors as <tt>Value::const_use_iterator</tt>
+and <tt>Value::const_op_iterator</tt>.  They automatically arise when
+calling <tt>use/op_begin()</tt> on <tt>const Value*</tt>s or
+<tt>const User*</tt>s respectively.  Upon dereferencing, they return
+<tt>const Use*</tt>s. Otherwise the above patterns remain unchanged.</p>
+
+</div>
+
+<!--_______________________________________________________________________-->
+<div class="doc_subsubsection">
+  <a name="iterate_preds">Iterating over predecessors &amp;
+successors of blocks</a>
+</div>
+
+<div class="doc_text">
+
+<p>Iterating over the predecessors and successors of a block is quite easy
+with the routines defined in <tt>"llvm/Support/CFG.h"</tt>.  Just use code like
+this to iterate over all predecessors of BB:</p>
+
+<div class="doc_code">
+<pre>
+#include "llvm/Support/CFG.h"
+BasicBlock *BB = ...;
+
+for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+  BasicBlock *Pred = *PI;
+  // <i>...</i>
+}
+</pre>
+</div>
+
+<p>Similarly, to iterate over successors use
+succ_iterator/succ_begin/succ_end.</p>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="simplechanges">Making simple changes</a>
+</div>
+
+<div class="doc_text">
+
+<p>There are some primitive transformation operations present in the LLVM
+infrastructure that are worth knowing about.  When performing
+transformations, it's fairly common to manipulate the contents of basic
+blocks. This section describes some of the common methods for doing so
+and gives example code.</p>
+
+</div>
+
+<!--_______________________________________________________________________-->
+<div class="doc_subsubsection">
+  <a name="schanges_creating">Creating and inserting new
+  <tt>Instruction</tt>s</a>
+</div>
+
+<div class="doc_text">
+
+<p><i>Instantiating Instructions</i></p>
+
+<p>Creation of <tt>Instruction</tt>s is straight-forward: simply call the
+constructor for the kind of instruction to instantiate and provide the necessary
+parameters. For example, an <tt>AllocaInst</tt> only <i>requires</i> a
+(const-ptr-to) <tt>Type</tt>. Thus:</p> 
+
+<div class="doc_code">
+<pre>
+AllocaInst* ai = new AllocaInst(Type::Int32Ty);
+</pre>
+</div>
+
+<p>will create an <tt>AllocaInst</tt> instance that represents the allocation of
+one integer in the current stack frame, at run time. Each <tt>Instruction</tt>
+subclass is likely to have varying default parameters which change the semantics
+of the instruction, so refer to the <a
+href="/doxygen/classllvm_1_1Instruction.html">doxygen documentation for the subclass of
+Instruction</a> that you're interested in instantiating.</p>
+
+<p><i>Naming values</i></p>
+
+<p>It is very useful to name the values of instructions when you're able to, as
+this facilitates the debugging of your transformations.  If you end up looking
+at generated LLVM machine code, you definitely want to have logical names
+associated with the results of instructions!  By supplying a value for the
+<tt>Name</tt> (default) parameter of the <tt>Instruction</tt> constructor, you
+associate a logical name with the result of the instruction's execution at
+run time.  For example, say that I'm writing a transformation that dynamically
+allocates space for an integer on the stack, and that integer is going to be
+used as some kind of index by some other code.  To accomplish this, I place an
+<tt>AllocaInst</tt> at the first point in the first <tt>BasicBlock</tt> of some
+<tt>Function</tt>, and I'm intending to use it within the same
+<tt>Function</tt>. I might do:</p>
+
+<div class="doc_code">
+<pre>
+AllocaInst* pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc");
+</pre>
+</div>
+
+<p>where <tt>indexLoc</tt> is now the logical name of the instruction's
+execution value, which is a pointer to an integer on the run time stack.</p>
+
+<p><i>Inserting instructions</i></p>
+
+<p>There are essentially two ways to insert an <tt>Instruction</tt>
+into an existing sequence of instructions that form a <tt>BasicBlock</tt>:</p>
+
+<ul>
+  <li>Insertion into an explicit instruction list
+
+    <p>Given a <tt>BasicBlock* pb</tt>, an <tt>Instruction* pi</tt> within that
+    <tt>BasicBlock</tt>, and a newly-created instruction we wish to insert
+    before <tt>*pi</tt>, we do the following: </p>
+
+<div class="doc_code">
+<pre>
+BasicBlock *pb = ...;
+Instruction *pi = ...;
+Instruction *newInst = new Instruction(...);
+
+pb-&gt;getInstList().insert(pi, newInst); // <i>Inserts newInst before pi in pb</i>
+</pre>
+</div>
+
+    <p>Appending to the end of a <tt>BasicBlock</tt> is so common that
+    the <tt>Instruction</tt> class and <tt>Instruction</tt>-derived
+    classes provide constructors which take a pointer to a
+    <tt>BasicBlock</tt> to be appended to. For example code that
+    looked like: </p>
+
+<div class="doc_code">
+<pre>
+BasicBlock *pb = ...;
+Instruction *newInst = new Instruction(...);
+
+pb-&gt;getInstList().push_back(newInst); // <i>Appends newInst to pb</i>
+</pre>
+</div>
+
+    <p>becomes: </p>
+
+<div class="doc_code">
+<pre>
+BasicBlock *pb = ...;
+Instruction *newInst = new Instruction(..., pb);
+</pre>
+</div>
+
+    <p>which is much cleaner, especially if you are creating
+    long instruction streams.</p></li>
+
+  <li>Insertion into an implicit instruction list
+
+    <p><tt>Instruction</tt> instances that are already in <tt>BasicBlock</tt>s
+    are implicitly associated with an existing instruction list: the instruction
+    list of the enclosing basic block. Thus, we could have accomplished the same
+    thing as the above code without being given a <tt>BasicBlock</tt> by doing:
+    </p>
+
+<div class="doc_code">
+<pre>
+Instruction *pi = ...;
+Instruction *newInst = new Instruction(...);
+
+pi-&gt;getParent()-&gt;getInstList().insert(pi, newInst);
+</pre>
+</div>
+
+    <p>In fact, this sequence of steps occurs so frequently that the
+    <tt>Instruction</tt> class and <tt>Instruction</tt>-derived classes provide
+    constructors which take (as a default parameter) a pointer to an
+    <tt>Instruction</tt> which the newly-created <tt>Instruction</tt> should
+    precede.  That is, <tt>Instruction</tt> constructors are capable of
+    inserting the newly-created instance into the <tt>BasicBlock</tt> of a
+    provided instruction, immediately before that instruction.  Using an
+    <tt>Instruction</tt> constructor with a <tt>insertBefore</tt> (default)
+    parameter, the above code becomes:</p>
+
+<div class="doc_code">
+<pre>
+Instruction* pi = ...;
+Instruction* newInst = new Instruction(..., pi);
+</pre>
+</div>
+
+    <p>which is much cleaner, especially if you're creating a lot of
+    instructions and adding them to <tt>BasicBlock</tt>s.</p></li>
+</ul>
+
+</div>
+
+<!--_______________________________________________________________________-->
+<div class="doc_subsubsection">
+  <a name="schanges_deleting">Deleting <tt>Instruction</tt>s</a>
+</div>
+
+<div class="doc_text">
+
+<p>Deleting an instruction from an existing sequence of instructions that form a
+<a href="#BasicBlock"><tt>BasicBlock</tt></a> is very straight-forward. First,
+you must have a pointer to the instruction that you wish to delete.  Second, you
+need to obtain the pointer to that instruction's basic block. You use the
+pointer to the basic block to get its list of instructions and then use the
+erase function to remove your instruction. For example:</p>
+
+<div class="doc_code">
+<pre>
+<a href="#Instruction">Instruction</a> *I = .. ;
+I-&gt;eraseFromParent();
+</pre>
+</div>
+
+</div>
+
+<!--_______________________________________________________________________-->
+<div class="doc_subsubsection">
+  <a name="schanges_replacing">Replacing an <tt>Instruction</tt> with another
+  <tt>Value</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p><i>Replacing individual instructions</i></p>
+
+<p>Including "<a href="/doxygen/BasicBlockUtils_8h-source.html">llvm/Transforms/Utils/BasicBlockUtils.h</a>"
+permits use of two very useful replace functions: <tt>ReplaceInstWithValue</tt>
+and <tt>ReplaceInstWithInst</tt>.</p>
+
+<h4><a name="schanges_deleting">Deleting <tt>Instruction</tt>s</a></h4>
+
+<ul>
+  <li><tt>ReplaceInstWithValue</tt>
+
+    <p>This function replaces all uses of a given instruction with a value,
+    and then removes the original instruction. The following example
+    illustrates the replacement of the result of a particular
+    <tt>AllocaInst</tt> that allocates memory for a single integer with a null
+    pointer to an integer.</p>
+
+<div class="doc_code">
+<pre>
+AllocaInst* instToReplace = ...;
+BasicBlock::iterator ii(instToReplace);
+
+ReplaceInstWithValue(instToReplace-&gt;getParent()-&gt;getInstList(), ii,
+                     Constant::getNullValue(PointerType::getUnqual(Type::Int32Ty)));
+</pre></div></li>
+
+  <li><tt>ReplaceInstWithInst</tt> 
+
+    <p>This function replaces a particular instruction with another
+    instruction, inserting the new instruction into the basic block at the
+    location where the old instruction was, and replacing any uses of the old
+    instruction with the new instruction. The following example illustrates
+    the replacement of one <tt>AllocaInst</tt> with another.</p>
+
+<div class="doc_code">
+<pre>
+AllocaInst* instToReplace = ...;
+BasicBlock::iterator ii(instToReplace);
+
+ReplaceInstWithInst(instToReplace-&gt;getParent()-&gt;getInstList(), ii,
+                    new AllocaInst(Type::Int32Ty, 0, "ptrToReplacedInt"));
+</pre></div></li>
+</ul>
+
+<p><i>Replacing multiple uses of <tt>User</tt>s and <tt>Value</tt>s</i></p>
+
+<p>You can use <tt>Value::replaceAllUsesWith</tt> and
+<tt>User::replaceUsesOfWith</tt> to change more than one use at a time.  See the
+doxygen documentation for the <a href="/doxygen/classllvm_1_1Value.html">Value Class</a>
+and <a href="/doxygen/classllvm_1_1User.html">User Class</a>, respectively, for more
+information.</p>
+
+<!-- Value::replaceAllUsesWith User::replaceUsesOfWith Point out:
+include/llvm/Transforms/Utils/ especially BasicBlockUtils.h with:
+ReplaceInstWithValue, ReplaceInstWithInst -->
+
+</div>
+
+<!--_______________________________________________________________________-->
+<div class="doc_subsubsection">
+  <a name="schanges_deletingGV">Deleting <tt>GlobalVariable</tt>s</a>
+</div>
+
+<div class="doc_text">
+
+<p>Deleting a global variable from a module is just as easy as deleting an 
+Instruction. First, you must have a pointer to the global variable that you wish
+ to delete.  You use this pointer to erase it from its parent, the module.
+ For example:</p>
+
+<div class="doc_code">
+<pre>
+<a href="#GlobalVariable">GlobalVariable</a> *GV = .. ;
+
+GV-&gt;eraseFromParent();
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="create_types">How to Create Types</a>
+</div>
+
+<div class="doc_text">
+
+<p>In generating IR, you may need some complex types.  If you know these types
+statically, you can use <tt>TypeBuilder&lt;...&gt;::get()</tt>, defined
+in <tt>llvm/Support/TypeBuilder.h</tt>, to retrieve them.  <tt>TypeBuilder</tt>
+has two forms depending on whether you're building types for cross-compilation
+or native library use.  <tt>TypeBuilder&lt;T, true&gt;</tt> requires
+that <tt>T</tt> be independent of the host environment, meaning that it's built
+out of types from
+the <a href="/doxygen/namespacellvm_1_1types.html"><tt>llvm::types</tt></a>
+namespace and pointers, functions, arrays, etc. built of
+those.  <tt>TypeBuilder&lt;T, false&gt;</tt> additionally allows native C types
+whose size may depend on the host compiler.  For example,</p>
+
+<div class="doc_code">
+<pre>
+FunctionType *ft = TypeBuilder&lt;types::i&lt;8&gt;(types::i&lt;32&gt;*), true&gt;::get();
+</pre>
+</div>
+
+<p>is easier to read and write than the equivalent</p>
+
+<div class="doc_code">
+<pre>
+std::vector&lt;const Type*&gt; params;
+params.push_back(PointerType::getUnqual(Type::Int32Ty));
+FunctionType *ft = FunctionType::get(Type::Int8Ty, params, false);
+</pre>
+</div>
+
+<p>See the <a href="/doxygen/TypeBuilder_8h-source.html#l00001">class
+comment</a> for more details.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="threading">Threads and LLVM</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>
+This section describes the interaction of the LLVM APIs with multithreading,
+both on the part of client applications, and in the JIT, in the hosted
+application.
+</p>
+
+<p>
+Note that LLVM's support for multithreading is still relatively young.  Up 
+through version 2.5, the execution of threaded hosted applications was
+supported, but not threaded client access to the APIs.  While this use case is
+now supported, clients <em>must</em> adhere to the guidelines specified below to
+ensure proper operation in multithreaded mode.
+</p>
+
+<p>
+Note that, on Unix-like platforms, LLVM requires the presence of GCC's atomic
+intrinsics in order to support threaded operation.  If you need a
+multhreading-capable LLVM on a platform without a suitably modern system
+compiler, consider compiling LLVM and LLVM-GCC in single-threaded mode, and 
+using the resultant compiler to build a copy of LLVM with multithreading
+support.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="startmultithreaded">Entering and Exiting Multithreaded Mode</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+In order to properly protect its internal data structures while avoiding 
+excessive locking overhead in the single-threaded case, the LLVM must intialize
+certain data structures necessary to provide guards around its internals.  To do
+so, the client program must invoke <tt>llvm_start_multithreaded()</tt> before
+making any concurrent LLVM API calls.  To subsequently tear down these
+structures, use the <tt>llvm_stop_multithreaded()</tt> call.  You can also use
+the <tt>llvm_is_multithreaded()</tt> call to check the status of multithreaded
+mode.
+</p>
+
+<p>
+Note that both of these calls must be made <em>in isolation</em>.  That is to
+say that no other LLVM API calls may be executing at any time during the 
+execution of <tt>llvm_start_multithreaded()</tt> or <tt>llvm_stop_multithreaded
+</tt>.  It's is the client's responsibility to enforce this isolation.
+</p>
+
+<p>
+The return value of <tt>llvm_start_multithreaded()</tt> indicates the success or
+failure of the initialization.  Failure typically indicates that your copy of
+LLVM was built without multithreading support, typically because GCC atomic
+intrinsics were not found in your system compiler.  In this case, the LLVM API
+will not be safe for concurrent calls.  However, it <em>will</em> be safe for
+hosting threaded applications in the JIT, though <a href="#jitthreading">care
+must be taken</a> to ensure that side exits and the like do not accidentally
+result in concurrent LLVM API calls.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="shutdown">Ending Execution with <tt>llvm_shutdown()</tt></a>
+</div>
+
+<div class="doc_text">
+<p>
+When you are done using the LLVM APIs, you should call <tt>llvm_shutdown()</tt>
+to deallocate memory used for internal structures.  This will also invoke 
+<tt>llvm_stop_multithreaded()</tt> if LLVM is operating in multithreaded mode.
+As such, <tt>llvm_shutdown()</tt> requires the same isolation guarantees as
+<tt>llvm_stop_multithreaded()</tt>.
+</p>
+
+<p>
+Note that, if you use scope-based shutdown, you can use the
+<tt>llvm_shutdown_obj</tt> class, which calls <tt>llvm_shutdown()</tt> in its
+destructor.
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="managedstatic">Lazy Initialization with <tt>ManagedStatic</tt></a>
+</div>
+
+<div class="doc_text">
+<p>
+<tt>ManagedStatic</tt> is a utility class in LLVM used to implement static
+initialization of static resources, such as the global type tables.  Before the
+invocation of <tt>llvm_shutdown()</tt>, it implements a simple lazy 
+initialization scheme.  Once <tt>llvm_start_multithreaded()</tt> returns,
+however, it uses double-checked locking to implement thread-safe lazy
+initialization.
+</p>
+
+<p>
+Note that, because no other threads are allowed to issue LLVM API calls before
+<tt>llvm_start_multithreaded()</tt> returns, it is possible to have 
+<tt>ManagedStatic</tt>s of <tt>llvm::sys::Mutex</tt>s.
+</p>
+
+<p>
+The <tt>llvm_acquire_global_lock()</tt> and <tt>llvm_release_global_lock</tt> 
+APIs provide access to the global lock used to implement the double-checked
+locking for lazy initialization.  These should only be used internally to LLVM,
+and only if you know what you're doing!
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="llvmcontext">Achieving Isolation with <tt>LLVMContext</tt></a>
+</div>
+
+<div class="doc_text">
+<p>
+<tt>LLVMContext</tt> is an opaque class in the LLVM API which clients can use
+to operate multiple, isolated instances of LLVM concurrently within the same
+address space.  For instance, in a hypothetical compile-server, the compilation
+of an individual translation unit is conceptually independent from all the 
+others, and it would be desirable to be able to compile incoming translation 
+units concurrently on independent server threads.  Fortunately, 
+<tt>LLVMContext</tt> exists to enable just this kind of scenario!
+</p>
+
+<p>
+Conceptually, <tt>LLVMContext</tt> provides isolation.  Every LLVM entity 
+(<tt>Module</tt>s, <tt>Value</tt>s, <tt>Type</tt>s, <tt>Constant</tt>s, etc.)
+in LLVM's in-memory IR belongs to an <tt>LLVMContext</tt>.  Entities in 
+different contexts <em>cannot</em> interact with each other: <tt>Module</tt>s in
+different contexts cannot be linked together, <tt>Function</tt>s cannot be added
+to <tt>Module</tt>s in different contexts, etc.  What this means is that is is
+safe to compile on multiple threads simultaneously, as long as no two threads
+operate on entities within the same context.
+</p>
+
+<p>
+In practice, very few places in the API require the explicit specification of a
+<tt>LLVMContext</tt>, other than the <tt>Type</tt> creation/lookup APIs.
+Because every <tt>Type</tt> carries a reference to its owning context, most
+other entities can determine what context they belong to by looking at their
+own <tt>Type</tt>.  If you are adding new entities to LLVM IR, please try to
+maintain this interface design.
+</p>
+
+<p>
+For clients that do <em>not</em> require the benefits of isolation, LLVM 
+provides a convenience API <tt>getGlobalContext()</tt>.  This returns a global,
+lazily initialized <tt>LLVMContext</tt> that may be used in situations where
+isolation is not a concern.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="jitthreading">Threads and the JIT</a>
+</div>
+
+<div class="doc_text">
+<p>
+LLVM's "eager" JIT compiler is safe to use in threaded programs.  Multiple
+threads can call <tt>ExecutionEngine::getPointerToFunction()</tt> or
+<tt>ExecutionEngine::runFunction()</tt> concurrently, and multiple threads can
+run code output by the JIT concurrently.  The user must still ensure that only
+one thread accesses IR in a given <tt>LLVMContext</tt> while another thread
+might be modifying it.  One way to do that is to always hold the JIT lock while
+accessing IR outside the JIT (the JIT <em>modifies</em> the IR by adding
+<tt>CallbackVH</tt>s).  Another way is to only
+call <tt>getPointerToFunction()</tt> from the <tt>LLVMContext</tt>'s thread.
+</p>
+
+<p>When the JIT is configured to compile lazily (using
+<tt>ExecutionEngine::DisableLazyCompilation(false)</tt>), there is currently a
+<a href="http://llvm.org/bugs/show_bug.cgi?id=5184">race condition</a> in
+updating call sites after a function is lazily-jitted.  It's still possible to
+use the lazy JIT in a threaded program if you ensure that only one thread at a
+time can call any particular lazy stub and that the JIT lock guards any IR
+access, but we suggest using only the eager JIT in threaded programs.
+</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="advanced">Advanced Topics</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>
+This section describes some of the advanced or obscure API's that most clients
+do not need to be aware of.  These API's tend manage the inner workings of the
+LLVM system, and only need to be accessed in unusual circumstances.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="TypeResolve">LLVM Type Resolution</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The LLVM type system has a very simple goal: allow clients to compare types for
+structural equality with a simple pointer comparison (aka a shallow compare).
+This goal makes clients much simpler and faster, and is used throughout the LLVM
+system.
+</p>
+
+<p>
+Unfortunately achieving this goal is not a simple matter.  In particular,
+recursive types and late resolution of opaque types makes the situation very
+difficult to handle.  Fortunately, for the most part, our implementation makes
+most clients able to be completely unaware of the nasty internal details.  The
+primary case where clients are exposed to the inner workings of it are when
+building a recursive type.  In addition to this case, the LLVM bitcode reader,
+assembly parser, and linker also have to be aware of the inner workings of this
+system.
+</p>
+
+<p>
+For our purposes below, we need three concepts.  First, an "Opaque Type" is 
+exactly as defined in the <a href="LangRef.html#t_opaque">language 
+reference</a>.  Second an "Abstract Type" is any type which includes an 
+opaque type as part of its type graph (for example "<tt>{ opaque, i32 }</tt>").
+Third, a concrete type is a type that is not an abstract type (e.g. "<tt>{ i32, 
+float }</tt>").
+</p>
+
+</div>
+
+<!-- ______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="BuildRecType">Basic Recursive Type Construction</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+Because the most common question is "how do I build a recursive type with LLVM",
+we answer it now and explain it as we go.  Here we include enough to cause this
+to be emitted to an output .ll file:
+</p>
+
+<div class="doc_code">
+<pre>
+%mylist = type { %mylist*, i32 }
+</pre>
+</div>
+
+<p>
+To build this, use the following LLVM APIs:
+</p>
+
+<div class="doc_code">
+<pre>
+// <i>Create the initial outer struct</i>
+<a href="#PATypeHolder">PATypeHolder</a> StructTy = OpaqueType::get();
+std::vector&lt;const Type*&gt; Elts;
+Elts.push_back(PointerType::getUnqual(StructTy));
+Elts.push_back(Type::Int32Ty);
+StructType *NewSTy = StructType::get(Elts);
+
+// <i>At this point, NewSTy = "{ opaque*, i32 }". Tell VMCore that</i>
+// <i>the struct and the opaque type are actually the same.</i>
+cast&lt;OpaqueType&gt;(StructTy.get())-&gt;<a href="#refineAbstractTypeTo">refineAbstractTypeTo</a>(NewSTy);
+
+// <i>NewSTy is potentially invalidated, but StructTy (a <a href="#PATypeHolder">PATypeHolder</a>) is</i>
+// <i>kept up-to-date</i>
+NewSTy = cast&lt;StructType&gt;(StructTy.get());
+
+// <i>Add a name for the type to the module symbol table (optional)</i>
+MyModule-&gt;addTypeName("mylist", NewSTy);
+</pre>
+</div>
+
+<p>
+This code shows the basic approach used to build recursive types: build a
+non-recursive type using 'opaque', then use type unification to close the cycle.
+The type unification step is performed by the <tt><a
+href="#refineAbstractTypeTo">refineAbstractTypeTo</a></tt> method, which is
+described next.  After that, we describe the <a
+href="#PATypeHolder">PATypeHolder class</a>.
+</p>
+
+</div>
+
+<!-- ______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="refineAbstractTypeTo">The <tt>refineAbstractTypeTo</tt> method</a>
+</div>
+
+<div class="doc_text">
+<p>
+The <tt>refineAbstractTypeTo</tt> method starts the type unification process.
+While this method is actually a member of the DerivedType class, it is most
+often used on OpaqueType instances.  Type unification is actually a recursive
+process.  After unification, types can become structurally isomorphic to
+existing types, and all duplicates are deleted (to preserve pointer equality).
+</p>
+
+<p>
+In the example above, the OpaqueType object is definitely deleted.
+Additionally, if there is an "{ \2*, i32}" type already created in the system,
+the pointer and struct type created are <b>also</b> deleted.  Obviously whenever
+a type is deleted, any "Type*" pointers in the program are invalidated.  As
+such, it is safest to avoid having <i>any</i> "Type*" pointers to abstract types
+live across a call to <tt>refineAbstractTypeTo</tt> (note that non-abstract
+types can never move or be deleted).  To deal with this, the <a
+href="#PATypeHolder">PATypeHolder</a> class is used to maintain a stable
+reference to a possibly refined type, and the <a
+href="#AbstractTypeUser">AbstractTypeUser</a> class is used to update more
+complex datastructures.
+</p>
+
+</div>
+
+<!-- ______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="PATypeHolder">The PATypeHolder Class</a>
+</div>
+
+<div class="doc_text">
+<p>
+PATypeHolder is a form of a "smart pointer" for Type objects.  When VMCore
+happily goes about nuking types that become isomorphic to existing types, it
+automatically updates all PATypeHolder objects to point to the new type.  In the
+example above, this allows the code to maintain a pointer to the resultant
+resolved recursive type, even though the Type*'s are potentially invalidated.
+</p>
+
+<p>
+PATypeHolder is an extremely light-weight object that uses a lazy union-find
+implementation to update pointers.  For example the pointer from a Value to its
+Type is maintained by PATypeHolder objects.
+</p>
+
+</div>
+
+<!-- ______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="AbstractTypeUser">The AbstractTypeUser Class</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+Some data structures need more to perform more complex updates when types get
+resolved.  To support this, a class can derive from the AbstractTypeUser class.
+This class
+allows it to get callbacks when certain types are resolved.  To register to get
+callbacks for a particular type, the DerivedType::{add/remove}AbstractTypeUser
+methods can be called on a type.  Note that these methods only work for <i>
+  abstract</i> types.  Concrete types (those that do not include any opaque 
+objects) can never be refined.
+</p>
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="SymbolTable">The <tt>ValueSymbolTable</tt> and
+   <tt>TypeSymbolTable</tt> classes</a>
+</div>
+
+<div class="doc_text">
+<p>The <tt><a href="http://llvm.org/doxygen/classllvm_1_1ValueSymbolTable.html">
+ValueSymbolTable</a></tt> class provides a symbol table that the <a
+href="#Function"><tt>Function</tt></a> and <a href="#Module">
+<tt>Module</tt></a> classes use for naming value definitions. The symbol table
+can provide a name for any <a href="#Value"><tt>Value</tt></a>. 
+The <tt><a href="http://llvm.org/doxygen/classllvm_1_1TypeSymbolTable.html">
+TypeSymbolTable</a></tt> class is used by the <tt>Module</tt> class to store
+names for types.</p>
+
+<p>Note that the <tt>SymbolTable</tt> class should not be directly accessed 
+by most clients.  It should only be used when iteration over the symbol table 
+names themselves are required, which is very special purpose.  Note that not 
+all LLVM
+<tt><a href="#Value">Value</a></tt>s have names, and those without names (i.e. they have
+an empty name) do not exist in the symbol table.
+</p>
+
+<p>These symbol tables support iteration over the values/types in the symbol
+table with <tt>begin/end/iterator</tt> and supports querying to see if a
+specific name is in the symbol table (with <tt>lookup</tt>).  The
+<tt>ValueSymbolTable</tt> class exposes no public mutator methods, instead,
+simply call <tt>setName</tt> on a value, which will autoinsert it into the
+appropriate symbol table.  For types, use the Module::addTypeName method to
+insert entries into the symbol table.</p>
+
+</div>
+
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="UserLayout">The <tt>User</tt> and owned <tt>Use</tt> classes' memory layout</a>
+</div>
+
+<div class="doc_text">
+<p>The <tt><a href="http://llvm.org/doxygen/classllvm_1_1User.html">
+User</a></tt> class provides a basis for expressing the ownership of <tt>User</tt>
+towards other <tt><a href="http://llvm.org/doxygen/classllvm_1_1Value.html">
+Value</a></tt>s. The <tt><a href="http://llvm.org/doxygen/classllvm_1_1Use.html">
+Use</a></tt> helper class is employed to do the bookkeeping and to facilitate <i>O(1)</i>
+addition and removal.</p>
+
+<!-- ______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="Use2User">Interaction and relationship between <tt>User</tt> and <tt>Use</tt> objects</a>
+</div>
+
+<div class="doc_text">
+<p>
+A subclass of <tt>User</tt> can choose between incorporating its <tt>Use</tt> objects
+or refer to them out-of-line by means of a pointer. A mixed variant
+(some <tt>Use</tt>s inline others hung off) is impractical and breaks the invariant
+that the <tt>Use</tt> objects belonging to the same <tt>User</tt> form a contiguous array.
+</p>
+</div>
+
+<p>
+We have 2 different layouts in the <tt>User</tt> (sub)classes:
+<ul>
+<li><p>Layout a)
+The <tt>Use</tt> object(s) are inside (resp. at fixed offset) of the <tt>User</tt>
+object and there are a fixed number of them.</p>
+
+<li><p>Layout b)
+The <tt>Use</tt> object(s) are referenced by a pointer to an
+array from the <tt>User</tt> object and there may be a variable
+number of them.</p>
+</ul>
+<p>
+As of v2.4 each layout still possesses a direct pointer to the
+start of the array of <tt>Use</tt>s. Though not mandatory for layout a),
+we stick to this redundancy for the sake of simplicity.
+The <tt>User</tt> object also stores the number of <tt>Use</tt> objects it
+has. (Theoretically this information can also be calculated
+given the scheme presented below.)</p>
+<p>
+Special forms of allocation operators (<tt>operator new</tt>)
+enforce the following memory layouts:</p>
+
+<ul>
+<li><p>Layout a) is modelled by prepending the <tt>User</tt> object by the <tt>Use[]</tt> array.</p>
+
+<pre>
+...---.---.---.---.-------...
+  | P | P | P | P | User
+'''---'---'---'---'-------'''
+</pre>
+
+<li><p>Layout b) is modelled by pointing at the <tt>Use[]</tt> array.</p>
+<pre>
+.-------...
+| User
+'-------'''
+    |
+    v
+    .---.---.---.---...
+    | P | P | P | P |
+    '---'---'---'---'''
+</pre>
+</ul>
+<i>(In the above figures '<tt>P</tt>' stands for the <tt>Use**</tt> that
+    is stored in each <tt>Use</tt> object in the member <tt>Use::Prev</tt>)</i>
+
+<!-- ______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="Waymarking">The waymarking algorithm</a>
+</div>
+
+<div class="doc_text">
+<p>
+Since the <tt>Use</tt> objects are deprived of the direct (back)pointer to
+their <tt>User</tt> objects, there must be a fast and exact method to
+recover it. This is accomplished by the following scheme:</p>
+</div>
+
+A bit-encoding in the 2 LSBits (least significant bits) of the <tt>Use::Prev</tt> allows to find the
+start of the <tt>User</tt> object:
+<ul>
+<li><tt>00</tt> &mdash;&gt; binary digit 0</li>
+<li><tt>01</tt> &mdash;&gt; binary digit 1</li>
+<li><tt>10</tt> &mdash;&gt; stop and calculate (<tt>s</tt>)</li>
+<li><tt>11</tt> &mdash;&gt; full stop (<tt>S</tt>)</li>
+</ul>
+<p>
+Given a <tt>Use*</tt>, all we have to do is to walk till we get
+a stop and we either have a <tt>User</tt> immediately behind or
+we have to walk to the next stop picking up digits
+and calculating the offset:</p>
+<pre>
+.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.----------------
+| 1 | s | 1 | 0 | 1 | 0 | s | 1 | 1 | 0 | s | 1 | 1 | s | 1 | S | User (or User*)
+'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'----------------
+    |+15                |+10            |+6         |+3     |+1
+    |                   |               |           |       |__>
+    |                   |               |           |__________>
+    |                   |               |______________________>
+    |                   |______________________________________>
+    |__________________________________________________________>
+</pre>
+<p>
+Only the significant number of bits need to be stored between the
+stops, so that the <i>worst case is 20 memory accesses</i> when there are
+1000 <tt>Use</tt> objects associated with a <tt>User</tt>.</p>
+
+<!-- ______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="ReferenceImpl">Reference implementation</a>
+</div>
+
+<div class="doc_text">
+<p>
+The following literate Haskell fragment demonstrates the concept:</p>
+</div>
+
+<div class="doc_code">
+<pre>
+> import Test.QuickCheck
+> 
+> digits :: Int -> [Char] -> [Char]
+> digits 0 acc = '0' : acc
+> digits 1 acc = '1' : acc
+> digits n acc = digits (n `div` 2) $ digits (n `mod` 2) acc
+> 
+> dist :: Int -> [Char] -> [Char]
+> dist 0 [] = ['S']
+> dist 0 acc = acc
+> dist 1 acc = let r = dist 0 acc in 's' : digits (length r) r
+> dist n acc = dist (n - 1) $ dist 1 acc
+> 
+> takeLast n ss = reverse $ take n $ reverse ss
+> 
+> test = takeLast 40 $ dist 20 []
+> 
+</pre>
+</div>
+<p>
+Printing &lt;test&gt; gives: <tt>"1s100000s11010s10100s1111s1010s110s11s1S"</tt></p>
+<p>
+The reverse algorithm computes the length of the string just by examining
+a certain prefix:</p>
+
+<div class="doc_code">
+<pre>
+> pref :: [Char] -> Int
+> pref "S" = 1
+> pref ('s':'1':rest) = decode 2 1 rest
+> pref (_:rest) = 1 + pref rest
+> 
+> decode walk acc ('0':rest) = decode (walk + 1) (acc * 2) rest
+> decode walk acc ('1':rest) = decode (walk + 1) (acc * 2 + 1) rest
+> decode walk acc _ = walk + acc
+> 
+</pre>
+</div>
+<p>
+Now, as expected, printing &lt;pref test&gt; gives <tt>40</tt>.</p>
+<p>
+We can <i>quickCheck</i> this with following property:</p>
+
+<div class="doc_code">
+<pre>
+> testcase = dist 2000 []
+> testcaseLength = length testcase
+> 
+> identityProp n = n > 0 && n <= testcaseLength ==> length arr == pref arr
+>     where arr = takeLast n testcase
+> 
+</pre>
+</div>
+<p>
+As expected &lt;quickCheck identityProp&gt; gives:</p>
+
+<pre>
+*Main> quickCheck identityProp
+OK, passed 100 tests.
+</pre>
+<p>
+Let's be a bit more exhaustive:</p>
+
+<div class="doc_code">
+<pre>
+> 
+> deepCheck p = check (defaultConfig { configMaxTest = 500 }) p
+> 
+</pre>
+</div>
+<p>
+And here is the result of &lt;deepCheck identityProp&gt;:</p>
+
+<pre>
+*Main> deepCheck identityProp
+OK, passed 500 tests.
+</pre>
+
+<!-- ______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="Tagging">Tagging considerations</a>
+</div>
+
+<p>
+To maintain the invariant that the 2 LSBits of each <tt>Use**</tt> in <tt>Use</tt>
+never change after being set up, setters of <tt>Use::Prev</tt> must re-tag the
+new <tt>Use**</tt> on every modification. Accordingly getters must strip the
+tag bits.</p>
+<p>
+For layout b) instead of the <tt>User</tt> we find a pointer (<tt>User*</tt> with LSBit set).
+Following this pointer brings us to the <tt>User</tt>. A portable trick ensures
+that the first bytes of <tt>User</tt> (if interpreted as a pointer) never has
+the LSBit set. (Portability is relying on the fact that all known compilers place the
+<tt>vptr</tt> in the first word of the instances.)</p>
+
+</div>
+
+  <!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="coreclasses">The Core LLVM Class Hierarchy Reference </a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p><tt>#include "<a href="/doxygen/Type_8h-source.html">llvm/Type.h</a>"</tt>
+<br>doxygen info: <a href="/doxygen/classllvm_1_1Type.html">Type Class</a></p>
+
+<p>The Core LLVM classes are the primary means of representing the program
+being inspected or transformed.  The core LLVM classes are defined in
+header files in the <tt>include/llvm/</tt> directory, and implemented in
+the <tt>lib/VMCore</tt> directory.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="Type">The <tt>Type</tt> class and Derived Types</a>
+</div>
+
+<div class="doc_text">
+
+  <p><tt>Type</tt> is a superclass of all type classes. Every <tt>Value</tt> has
+  a <tt>Type</tt>. <tt>Type</tt> cannot be instantiated directly but only
+  through its subclasses. Certain primitive types (<tt>VoidType</tt>,
+  <tt>LabelType</tt>, <tt>FloatType</tt> and <tt>DoubleType</tt>) have hidden 
+  subclasses. They are hidden because they offer no useful functionality beyond
+  what the <tt>Type</tt> class offers except to distinguish themselves from 
+  other subclasses of <tt>Type</tt>.</p>
+  <p>All other types are subclasses of <tt>DerivedType</tt>.  Types can be 
+  named, but this is not a requirement. There exists exactly 
+  one instance of a given shape at any one time.  This allows type equality to
+  be performed with address equality of the Type Instance. That is, given two 
+  <tt>Type*</tt> values, the types are identical if the pointers are identical.
+  </p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="m_Type">Important Public Methods</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+  <li><tt>bool isIntegerTy() const</tt>: Returns true for any integer type.</li>
+
+  <li><tt>bool isFloatingPointTy()</tt>: Return true if this is one of the five
+  floating point types.</li>
+
+  <li><tt>bool isAbstract()</tt>: Return true if the type is abstract (contains
+  an OpaqueType anywhere in its definition).</li>
+
+  <li><tt>bool isSized()</tt>: Return true if the type has known size. Things
+  that don't have a size are abstract types, labels and void.</li>
+
+</ul>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="derivedtypes">Important Derived Types</a>
+</div>
+<div class="doc_text">
+<dl>
+  <dt><tt>IntegerType</tt></dt>
+  <dd>Subclass of DerivedType that represents integer types of any bit width. 
+  Any bit width between <tt>IntegerType::MIN_INT_BITS</tt> (1) and 
+  <tt>IntegerType::MAX_INT_BITS</tt> (~8 million) can be represented.
+  <ul>
+    <li><tt>static const IntegerType* get(unsigned NumBits)</tt>: get an integer
+    type of a specific bit width.</li>
+    <li><tt>unsigned getBitWidth() const</tt>: Get the bit width of an integer
+    type.</li>
+  </ul>
+  </dd>
+  <dt><tt>SequentialType</tt></dt>
+  <dd>This is subclassed by ArrayType and PointerType
+    <ul>
+      <li><tt>const Type * getElementType() const</tt>: Returns the type of each
+      of the elements in the sequential type. </li>
+    </ul>
+  </dd>
+  <dt><tt>ArrayType</tt></dt>
+  <dd>This is a subclass of SequentialType and defines the interface for array 
+  types.
+    <ul>
+      <li><tt>unsigned getNumElements() const</tt>: Returns the number of 
+      elements in the array. </li>
+    </ul>
+  </dd>
+  <dt><tt>PointerType</tt></dt>
+  <dd>Subclass of SequentialType for pointer types.</dd>
+  <dt><tt>VectorType</tt></dt>
+  <dd>Subclass of SequentialType for vector types. A 
+  vector type is similar to an ArrayType but is distinguished because it is 
+  a first class type whereas ArrayType is not. Vector types are used for 
+  vector operations and are usually small vectors of of an integer or floating 
+  point type.</dd>
+  <dt><tt>StructType</tt></dt>
+  <dd>Subclass of DerivedTypes for struct types.</dd>
+  <dt><tt><a name="FunctionType">FunctionType</a></tt></dt>
+  <dd>Subclass of DerivedTypes for function types.
+    <ul>
+      <li><tt>bool isVarArg() const</tt>: Returns true if it's a vararg
+      function</li>
+      <li><tt> const Type * getReturnType() const</tt>: Returns the
+      return type of the function.</li>
+      <li><tt>const Type * getParamType (unsigned i)</tt>: Returns
+      the type of the ith parameter.</li>
+      <li><tt> const unsigned getNumParams() const</tt>: Returns the
+      number of formal parameters.</li>
+    </ul>
+  </dd>
+  <dt><tt>OpaqueType</tt></dt>
+  <dd>Sublcass of DerivedType for abstract types. This class 
+  defines no content and is used as a placeholder for some other type. Note 
+  that OpaqueType is used (temporarily) during type resolution for forward 
+  references of types. Once the referenced type is resolved, the OpaqueType 
+  is replaced with the actual type. OpaqueType can also be used for data 
+  abstraction. At link time opaque types can be resolved to actual types 
+  of the same name.</dd>
+</dl>
+</div>
+
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="Module">The <tt>Module</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>#include "<a
+href="/doxygen/Module_8h-source.html">llvm/Module.h</a>"</tt><br> doxygen info:
+<a href="/doxygen/classllvm_1_1Module.html">Module Class</a></p>
+
+<p>The <tt>Module</tt> class represents the top level structure present in LLVM
+programs.  An LLVM module is effectively either a translation unit of the
+original program or a combination of several translation units merged by the
+linker.  The <tt>Module</tt> class keeps track of a list of <a
+href="#Function"><tt>Function</tt></a>s, a list of <a
+href="#GlobalVariable"><tt>GlobalVariable</tt></a>s, and a <a
+href="#SymbolTable"><tt>SymbolTable</tt></a>.  Additionally, it contains a few
+helpful member functions that try to make common operations easy.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="m_Module">Important Public Members of the <tt>Module</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+  <li><tt>Module::Module(std::string name = "")</tt></li>
+</ul>
+
+<p>Constructing a <a href="#Module">Module</a> is easy. You can optionally
+provide a name for it (probably based on the name of the translation unit).</p>
+
+<ul>
+  <li><tt>Module::iterator</tt> - Typedef for function list iterator<br>
+    <tt>Module::const_iterator</tt> - Typedef for const_iterator.<br>
+
+    <tt>begin()</tt>, <tt>end()</tt>
+    <tt>size()</tt>, <tt>empty()</tt>
+
+    <p>These are forwarding methods that make it easy to access the contents of
+    a <tt>Module</tt> object's <a href="#Function"><tt>Function</tt></a>
+    list.</p></li>
+
+  <li><tt>Module::FunctionListType &amp;getFunctionList()</tt>
+
+    <p> Returns the list of <a href="#Function"><tt>Function</tt></a>s.  This is
+    necessary to use when you need to update the list or perform a complex
+    action that doesn't have a forwarding method.</p>
+
+    <p><!--  Global Variable --></p></li> 
+</ul>
+
+<hr>
+
+<ul>
+  <li><tt>Module::global_iterator</tt> - Typedef for global variable list iterator<br>
+
+    <tt>Module::const_global_iterator</tt> - Typedef for const_iterator.<br>
+
+    <tt>global_begin()</tt>, <tt>global_end()</tt>
+    <tt>global_size()</tt>, <tt>global_empty()</tt>
+
+    <p> These are forwarding methods that make it easy to access the contents of
+    a <tt>Module</tt> object's <a
+    href="#GlobalVariable"><tt>GlobalVariable</tt></a> list.</p></li>
+
+  <li><tt>Module::GlobalListType &amp;getGlobalList()</tt>
+
+    <p>Returns the list of <a
+    href="#GlobalVariable"><tt>GlobalVariable</tt></a>s.  This is necessary to
+    use when you need to update the list or perform a complex action that
+    doesn't have a forwarding method.</p>
+
+    <p><!--  Symbol table stuff --> </p></li>
+</ul>
+
+<hr>
+
+<ul>
+  <li><tt><a href="#SymbolTable">SymbolTable</a> *getSymbolTable()</tt>
+
+    <p>Return a reference to the <a href="#SymbolTable"><tt>SymbolTable</tt></a>
+    for this <tt>Module</tt>.</p>
+
+    <p><!--  Convenience methods --></p></li>
+</ul>
+
+<hr>
+
+<ul>
+  <li><tt><a href="#Function">Function</a> *getFunction(const std::string
+  &amp;Name, const <a href="#FunctionType">FunctionType</a> *Ty)</tt>
+
+    <p>Look up the specified function in the <tt>Module</tt> <a
+    href="#SymbolTable"><tt>SymbolTable</tt></a>. If it does not exist, return
+    <tt>null</tt>.</p></li>
+
+  <li><tt><a href="#Function">Function</a> *getOrInsertFunction(const
+  std::string &amp;Name, const <a href="#FunctionType">FunctionType</a> *T)</tt>
+
+    <p>Look up the specified function in the <tt>Module</tt> <a
+    href="#SymbolTable"><tt>SymbolTable</tt></a>. If it does not exist, add an
+    external declaration for the function and return it.</p></li>
+
+  <li><tt>std::string getTypeName(const <a href="#Type">Type</a> *Ty)</tt>
+
+    <p>If there is at least one entry in the <a
+    href="#SymbolTable"><tt>SymbolTable</tt></a> for the specified <a
+    href="#Type"><tt>Type</tt></a>, return it.  Otherwise return the empty
+    string.</p></li>
+
+  <li><tt>bool addTypeName(const std::string &amp;Name, const <a
+  href="#Type">Type</a> *Ty)</tt>
+
+    <p>Insert an entry in the <a href="#SymbolTable"><tt>SymbolTable</tt></a>
+    mapping <tt>Name</tt> to <tt>Ty</tt>. If there is already an entry for this
+    name, true is returned and the <a
+    href="#SymbolTable"><tt>SymbolTable</tt></a> is not modified.</p></li>
+</ul>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="Value">The <tt>Value</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>#include "<a href="/doxygen/Value_8h-source.html">llvm/Value.h</a>"</tt>
+<br> 
+doxygen info: <a href="/doxygen/classllvm_1_1Value.html">Value Class</a></p>
+
+<p>The <tt>Value</tt> class is the most important class in the LLVM Source
+base.  It represents a typed value that may be used (among other things) as an
+operand to an instruction.  There are many different types of <tt>Value</tt>s,
+such as <a href="#Constant"><tt>Constant</tt></a>s,<a
+href="#Argument"><tt>Argument</tt></a>s. Even <a
+href="#Instruction"><tt>Instruction</tt></a>s and <a
+href="#Function"><tt>Function</tt></a>s are <tt>Value</tt>s.</p>
+
+<p>A particular <tt>Value</tt> may be used many times in the LLVM representation
+for a program.  For example, an incoming argument to a function (represented
+with an instance of the <a href="#Argument">Argument</a> class) is "used" by
+every instruction in the function that references the argument.  To keep track
+of this relationship, the <tt>Value</tt> class keeps a list of all of the <a
+href="#User"><tt>User</tt></a>s that is using it (the <a
+href="#User"><tt>User</tt></a> class is a base class for all nodes in the LLVM
+graph that can refer to <tt>Value</tt>s).  This use list is how LLVM represents
+def-use information in the program, and is accessible through the <tt>use_</tt>*
+methods, shown below.</p>
+
+<p>Because LLVM is a typed representation, every LLVM <tt>Value</tt> is typed,
+and this <a href="#Type">Type</a> is available through the <tt>getType()</tt>
+method. In addition, all LLVM values can be named.  The "name" of the
+<tt>Value</tt> is a symbolic string printed in the LLVM code:</p>
+
+<div class="doc_code">
+<pre>
+%<b>foo</b> = add i32 1, 2
+</pre>
+</div>
+
+<p><a name="nameWarning">The name of this instruction is "foo".</a> <b>NOTE</b>
+that the name of any value may be missing (an empty string), so names should
+<b>ONLY</b> be used for debugging (making the source code easier to read,
+debugging printouts), they should not be used to keep track of values or map
+between them.  For this purpose, use a <tt>std::map</tt> of pointers to the
+<tt>Value</tt> itself instead.</p>
+
+<p>One important aspect of LLVM is that there is no distinction between an SSA
+variable and the operation that produces it.  Because of this, any reference to
+the value produced by an instruction (or the value available as an incoming
+argument, for example) is represented as a direct pointer to the instance of
+the class that
+represents this value.  Although this may take some getting used to, it
+simplifies the representation and makes it easier to manipulate.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="m_Value">Important Public Members of the <tt>Value</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+  <li><tt>Value::use_iterator</tt> - Typedef for iterator over the
+use-list<br>
+    <tt>Value::const_use_iterator</tt> - Typedef for const_iterator over
+the use-list<br>
+    <tt>unsigned use_size()</tt> - Returns the number of users of the
+value.<br>
+    <tt>bool use_empty()</tt> - Returns true if there are no users.<br>
+    <tt>use_iterator use_begin()</tt> - Get an iterator to the start of
+the use-list.<br>
+    <tt>use_iterator use_end()</tt> - Get an iterator to the end of the
+use-list.<br>
+    <tt><a href="#User">User</a> *use_back()</tt> - Returns the last
+element in the list.
+    <p> These methods are the interface to access the def-use
+information in LLVM.  As with all other iterators in LLVM, the naming
+conventions follow the conventions defined by the <a href="#stl">STL</a>.</p>
+  </li>
+  <li><tt><a href="#Type">Type</a> *getType() const</tt>
+    <p>This method returns the Type of the Value.</p>
+  </li>
+  <li><tt>bool hasName() const</tt><br>
+    <tt>std::string getName() const</tt><br>
+    <tt>void setName(const std::string &amp;Name)</tt>
+    <p> This family of methods is used to access and assign a name to a <tt>Value</tt>,
+be aware of the <a href="#nameWarning">precaution above</a>.</p>
+  </li>
+  <li><tt>void replaceAllUsesWith(Value *V)</tt>
+
+    <p>This method traverses the use list of a <tt>Value</tt> changing all <a
+    href="#User"><tt>User</tt>s</a> of the current value to refer to
+    "<tt>V</tt>" instead.  For example, if you detect that an instruction always
+    produces a constant value (for example through constant folding), you can
+    replace all uses of the instruction with the constant like this:</p>
+
+<div class="doc_code">
+<pre>
+Inst-&gt;replaceAllUsesWith(ConstVal);
+</pre>
+</div>
+
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="User">The <tt>User</tt> class</a>
+</div>
+
+<div class="doc_text">
+  
+<p>
+<tt>#include "<a href="/doxygen/User_8h-source.html">llvm/User.h</a>"</tt><br>
+doxygen info: <a href="/doxygen/classllvm_1_1User.html">User Class</a><br>
+Superclass: <a href="#Value"><tt>Value</tt></a></p>
+
+<p>The <tt>User</tt> class is the common base class of all LLVM nodes that may
+refer to <a href="#Value"><tt>Value</tt></a>s.  It exposes a list of "Operands"
+that are all of the <a href="#Value"><tt>Value</tt></a>s that the User is
+referring to.  The <tt>User</tt> class itself is a subclass of
+<tt>Value</tt>.</p>
+
+<p>The operands of a <tt>User</tt> point directly to the LLVM <a
+href="#Value"><tt>Value</tt></a> that it refers to.  Because LLVM uses Static
+Single Assignment (SSA) form, there can only be one definition referred to,
+allowing this direct connection.  This connection provides the use-def
+information in LLVM.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="m_User">Important Public Members of the <tt>User</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>User</tt> class exposes the operand list in two ways: through
+an index access interface and through an iterator based interface.</p>
+
+<ul>
+  <li><tt>Value *getOperand(unsigned i)</tt><br>
+    <tt>unsigned getNumOperands()</tt>
+    <p> These two methods expose the operands of the <tt>User</tt> in a
+convenient form for direct access.</p></li>
+
+  <li><tt>User::op_iterator</tt> - Typedef for iterator over the operand
+list<br>
+    <tt>op_iterator op_begin()</tt> - Get an iterator to the start of 
+the operand list.<br>
+    <tt>op_iterator op_end()</tt> - Get an iterator to the end of the
+operand list.
+    <p> Together, these methods make up the iterator based interface to
+the operands of a <tt>User</tt>.</p></li>
+</ul>
+
+</div>    
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="Instruction">The <tt>Instruction</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>#include "</tt><tt><a
+href="/doxygen/Instruction_8h-source.html">llvm/Instruction.h</a>"</tt><br>
+doxygen info: <a href="/doxygen/classllvm_1_1Instruction.html">Instruction Class</a><br>
+Superclasses: <a href="#User"><tt>User</tt></a>, <a
+href="#Value"><tt>Value</tt></a></p>
+
+<p>The <tt>Instruction</tt> class is the common base class for all LLVM
+instructions.  It provides only a few methods, but is a very commonly used
+class.  The primary data tracked by the <tt>Instruction</tt> class itself is the
+opcode (instruction type) and the parent <a
+href="#BasicBlock"><tt>BasicBlock</tt></a> the <tt>Instruction</tt> is embedded
+into.  To represent a specific type of instruction, one of many subclasses of
+<tt>Instruction</tt> are used.</p>
+
+<p> Because the <tt>Instruction</tt> class subclasses the <a
+href="#User"><tt>User</tt></a> class, its operands can be accessed in the same
+way as for other <a href="#User"><tt>User</tt></a>s (with the
+<tt>getOperand()</tt>/<tt>getNumOperands()</tt> and
+<tt>op_begin()</tt>/<tt>op_end()</tt> methods).</p> <p> An important file for
+the <tt>Instruction</tt> class is the <tt>llvm/Instruction.def</tt> file. This
+file contains some meta-data about the various different types of instructions
+in LLVM.  It describes the enum values that are used as opcodes (for example
+<tt>Instruction::Add</tt> and <tt>Instruction::ICmp</tt>), as well as the
+concrete sub-classes of <tt>Instruction</tt> that implement the instruction (for
+example <tt><a href="#BinaryOperator">BinaryOperator</a></tt> and <tt><a
+href="#CmpInst">CmpInst</a></tt>).  Unfortunately, the use of macros in
+this file confuses doxygen, so these enum values don't show up correctly in the
+<a href="/doxygen/classllvm_1_1Instruction.html">doxygen output</a>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="s_Instruction">Important Subclasses of the <tt>Instruction</tt>
+  class</a>
+</div>
+<div class="doc_text">
+  <ul>
+    <li><tt><a name="BinaryOperator">BinaryOperator</a></tt>
+    <p>This subclasses represents all two operand instructions whose operands
+    must be the same type, except for the comparison instructions.</p></li>
+    <li><tt><a name="CastInst">CastInst</a></tt>
+    <p>This subclass is the parent of the 12 casting instructions. It provides
+    common operations on cast instructions.</p>
+    <li><tt><a name="CmpInst">CmpInst</a></tt>
+    <p>This subclass respresents the two comparison instructions, 
+    <a href="LangRef.html#i_icmp">ICmpInst</a> (integer opreands), and
+    <a href="LangRef.html#i_fcmp">FCmpInst</a> (floating point operands).</p>
+    <li><tt><a name="TerminatorInst">TerminatorInst</a></tt>
+    <p>This subclass is the parent of all terminator instructions (those which
+    can terminate a block).</p>
+  </ul>
+  </div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="m_Instruction">Important Public Members of the <tt>Instruction</tt>
+  class</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+  <li><tt><a href="#BasicBlock">BasicBlock</a> *getParent()</tt>
+    <p>Returns the <a href="#BasicBlock"><tt>BasicBlock</tt></a> that
+this  <tt>Instruction</tt> is embedded into.</p></li>
+  <li><tt>bool mayWriteToMemory()</tt>
+    <p>Returns true if the instruction writes to memory, i.e. it is a
+      <tt>call</tt>,<tt>free</tt>,<tt>invoke</tt>, or <tt>store</tt>.</p></li>
+  <li><tt>unsigned getOpcode()</tt>
+    <p>Returns the opcode for the <tt>Instruction</tt>.</p></li>
+  <li><tt><a href="#Instruction">Instruction</a> *clone() const</tt>
+    <p>Returns another instance of the specified instruction, identical
+in all ways to the original except that the instruction has no parent
+(ie it's not embedded into a <a href="#BasicBlock"><tt>BasicBlock</tt></a>),
+and it has no name</p></li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="Constant">The <tt>Constant</tt> class and subclasses</a>
+</div>
+
+<div class="doc_text">
+
+<p>Constant represents a base class for different types of constants. It
+is subclassed by ConstantInt, ConstantArray, etc. for representing 
+the various types of Constants.  <a href="#GlobalValue">GlobalValue</a> is also
+a subclass, which represents the address of a global variable or function.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">Important Subclasses of Constant </div>
+<div class="doc_text">
+<ul>
+  <li>ConstantInt : This subclass of Constant represents an integer constant of
+  any width.
+    <ul>
+      <li><tt>const APInt&amp; getValue() const</tt>: Returns the underlying
+      value of this constant, an APInt value.</li>
+      <li><tt>int64_t getSExtValue() const</tt>: Converts the underlying APInt
+      value to an int64_t via sign extension. If the value (not the bit width)
+      of the APInt is too large to fit in an int64_t, an assertion will result.
+      For this reason, use of this method is discouraged.</li>
+      <li><tt>uint64_t getZExtValue() const</tt>: Converts the underlying APInt
+      value to a uint64_t via zero extension. IF the value (not the bit width)
+      of the APInt is too large to fit in a uint64_t, an assertion will result.
+      For this reason, use of this method is discouraged.</li>
+      <li><tt>static ConstantInt* get(const APInt&amp; Val)</tt>: Returns the
+      ConstantInt object that represents the value provided by <tt>Val</tt>.
+      The type is implied as the IntegerType that corresponds to the bit width
+      of <tt>Val</tt>.</li>
+      <li><tt>static ConstantInt* get(const Type *Ty, uint64_t Val)</tt>: 
+      Returns the ConstantInt object that represents the value provided by 
+      <tt>Val</tt> for integer type <tt>Ty</tt>.</li>
+    </ul>
+  </li>
+  <li>ConstantFP : This class represents a floating point constant.
+    <ul>
+      <li><tt>double getValue() const</tt>: Returns the underlying value of 
+      this constant. </li>
+    </ul>
+  </li>
+  <li>ConstantArray : This represents a constant array.
+    <ul>
+      <li><tt>const std::vector&lt;Use&gt; &amp;getValues() const</tt>: Returns 
+      a vector of component constants that makeup this array. </li>
+    </ul>
+  </li>
+  <li>ConstantStruct : This represents a constant struct.
+    <ul>
+      <li><tt>const std::vector&lt;Use&gt; &amp;getValues() const</tt>: Returns 
+      a vector of component constants that makeup this array. </li>
+    </ul>
+  </li>
+  <li>GlobalValue : This represents either a global variable or a function. In 
+  either case, the value is a constant fixed address (after linking). 
+  </li>
+</ul>
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="GlobalValue">The <tt>GlobalValue</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>#include "<a
+href="/doxygen/GlobalValue_8h-source.html">llvm/GlobalValue.h</a>"</tt><br>
+doxygen info: <a href="/doxygen/classllvm_1_1GlobalValue.html">GlobalValue
+Class</a><br>
+Superclasses: <a href="#Constant"><tt>Constant</tt></a>, 
+<a href="#User"><tt>User</tt></a>, <a href="#Value"><tt>Value</tt></a></p>
+
+<p>Global values (<a href="#GlobalVariable"><tt>GlobalVariable</tt></a>s or <a
+href="#Function"><tt>Function</tt></a>s) are the only LLVM values that are
+visible in the bodies of all <a href="#Function"><tt>Function</tt></a>s.
+Because they are visible at global scope, they are also subject to linking with
+other globals defined in different translation units.  To control the linking
+process, <tt>GlobalValue</tt>s know their linkage rules. Specifically,
+<tt>GlobalValue</tt>s know whether they have internal or external linkage, as
+defined by the <tt>LinkageTypes</tt> enumeration.</p>
+
+<p>If a <tt>GlobalValue</tt> has internal linkage (equivalent to being
+<tt>static</tt> in C), it is not visible to code outside the current translation
+unit, and does not participate in linking.  If it has external linkage, it is
+visible to external code, and does participate in linking.  In addition to
+linkage information, <tt>GlobalValue</tt>s keep track of which <a
+href="#Module"><tt>Module</tt></a> they are currently part of.</p>
+
+<p>Because <tt>GlobalValue</tt>s are memory objects, they are always referred to
+by their <b>address</b>. As such, the <a href="#Type"><tt>Type</tt></a> of a
+global is always a pointer to its contents. It is important to remember this
+when using the <tt>GetElementPtrInst</tt> instruction because this pointer must
+be dereferenced first. For example, if you have a <tt>GlobalVariable</tt> (a
+subclass of <tt>GlobalValue)</tt> that is an array of 24 ints, type <tt>[24 x
+i32]</tt>, then the <tt>GlobalVariable</tt> is a pointer to that array. Although
+the address of the first element of this array and the value of the
+<tt>GlobalVariable</tt> are the same, they have different types. The
+<tt>GlobalVariable</tt>'s type is <tt>[24 x i32]</tt>. The first element's type
+is <tt>i32.</tt> Because of this, accessing a global value requires you to
+dereference the pointer with <tt>GetElementPtrInst</tt> first, then its elements
+can be accessed. This is explained in the <a href="LangRef.html#globalvars">LLVM
+Language Reference Manual</a>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="m_GlobalValue">Important Public Members of the <tt>GlobalValue</tt>
+  class</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+  <li><tt>bool hasInternalLinkage() const</tt><br>
+    <tt>bool hasExternalLinkage() const</tt><br>
+    <tt>void setInternalLinkage(bool HasInternalLinkage)</tt>
+    <p> These methods manipulate the linkage characteristics of the <tt>GlobalValue</tt>.</p>
+    <p> </p>
+  </li>
+  <li><tt><a href="#Module">Module</a> *getParent()</tt>
+    <p> This returns the <a href="#Module"><tt>Module</tt></a> that the
+GlobalValue is currently embedded into.</p></li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="Function">The <tt>Function</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>#include "<a
+href="/doxygen/Function_8h-source.html">llvm/Function.h</a>"</tt><br> doxygen
+info: <a href="/doxygen/classllvm_1_1Function.html">Function Class</a><br>
+Superclasses: <a href="#GlobalValue"><tt>GlobalValue</tt></a>, 
+<a href="#Constant"><tt>Constant</tt></a>, 
+<a href="#User"><tt>User</tt></a>, 
+<a href="#Value"><tt>Value</tt></a></p>
+
+<p>The <tt>Function</tt> class represents a single procedure in LLVM.  It is
+actually one of the more complex classes in the LLVM hierarchy because it must
+keep track of a large amount of data.  The <tt>Function</tt> class keeps track
+of a list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s, a list of formal 
+<a href="#Argument"><tt>Argument</tt></a>s, and a 
+<a href="#SymbolTable"><tt>SymbolTable</tt></a>.</p>
+
+<p>The list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s is the most
+commonly used part of <tt>Function</tt> objects.  The list imposes an implicit
+ordering of the blocks in the function, which indicate how the code will be
+laid out by the backend.  Additionally, the first <a
+href="#BasicBlock"><tt>BasicBlock</tt></a> is the implicit entry node for the
+<tt>Function</tt>.  It is not legal in LLVM to explicitly branch to this initial
+block.  There are no implicit exit nodes, and in fact there may be multiple exit
+nodes from a single <tt>Function</tt>.  If the <a
+href="#BasicBlock"><tt>BasicBlock</tt></a> list is empty, this indicates that
+the <tt>Function</tt> is actually a function declaration: the actual body of the
+function hasn't been linked in yet.</p>
+
+<p>In addition to a list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s, the
+<tt>Function</tt> class also keeps track of the list of formal <a
+href="#Argument"><tt>Argument</tt></a>s that the function receives.  This
+container manages the lifetime of the <a href="#Argument"><tt>Argument</tt></a>
+nodes, just like the <a href="#BasicBlock"><tt>BasicBlock</tt></a> list does for
+the <a href="#BasicBlock"><tt>BasicBlock</tt></a>s.</p>
+
+<p>The <a href="#SymbolTable"><tt>SymbolTable</tt></a> is a very rarely used
+LLVM feature that is only used when you have to look up a value by name.  Aside
+from that, the <a href="#SymbolTable"><tt>SymbolTable</tt></a> is used
+internally to make sure that there are not conflicts between the names of <a
+href="#Instruction"><tt>Instruction</tt></a>s, <a
+href="#BasicBlock"><tt>BasicBlock</tt></a>s, or <a
+href="#Argument"><tt>Argument</tt></a>s in the function body.</p>
+
+<p>Note that <tt>Function</tt> is a <a href="#GlobalValue">GlobalValue</a>
+and therefore also a <a href="#Constant">Constant</a>. The value of the function
+is its address (after linking) which is guaranteed to be constant.</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="m_Function">Important Public Members of the <tt>Function</tt>
+  class</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+  <li><tt>Function(const </tt><tt><a href="#FunctionType">FunctionType</a>
+  *Ty, LinkageTypes Linkage, const std::string &amp;N = "", Module* Parent = 0)</tt>
+
+    <p>Constructor used when you need to create new <tt>Function</tt>s to add
+    the the program.  The constructor must specify the type of the function to
+    create and what type of linkage the function should have. The <a 
+    href="#FunctionType"><tt>FunctionType</tt></a> argument
+    specifies the formal arguments and return value for the function. The same
+    <a href="#FunctionType"><tt>FunctionType</tt></a> value can be used to
+    create multiple functions. The <tt>Parent</tt> argument specifies the Module
+    in which the function is defined. If this argument is provided, the function
+    will automatically be inserted into that module's list of
+    functions.</p></li>
+
+  <li><tt>bool isDeclaration()</tt>
+
+    <p>Return whether or not the <tt>Function</tt> has a body defined.  If the
+    function is "external", it does not have a body, and thus must be resolved
+    by linking with a function defined in a different translation unit.</p></li>
+
+  <li><tt>Function::iterator</tt> - Typedef for basic block list iterator<br>
+    <tt>Function::const_iterator</tt> - Typedef for const_iterator.<br>
+
+    <tt>begin()</tt>, <tt>end()</tt>
+    <tt>size()</tt>, <tt>empty()</tt>
+
+    <p>These are forwarding methods that make it easy to access the contents of
+    a <tt>Function</tt> object's <a href="#BasicBlock"><tt>BasicBlock</tt></a>
+    list.</p></li>
+
+  <li><tt>Function::BasicBlockListType &amp;getBasicBlockList()</tt>
+
+    <p>Returns the list of <a href="#BasicBlock"><tt>BasicBlock</tt></a>s.  This
+    is necessary to use when you need to update the list or perform a complex
+    action that doesn't have a forwarding method.</p></li>
+
+  <li><tt>Function::arg_iterator</tt> - Typedef for the argument list
+iterator<br>
+    <tt>Function::const_arg_iterator</tt> - Typedef for const_iterator.<br>
+
+    <tt>arg_begin()</tt>, <tt>arg_end()</tt>
+    <tt>arg_size()</tt>, <tt>arg_empty()</tt>
+
+    <p>These are forwarding methods that make it easy to access the contents of
+    a <tt>Function</tt> object's <a href="#Argument"><tt>Argument</tt></a>
+    list.</p></li>
+
+  <li><tt>Function::ArgumentListType &amp;getArgumentList()</tt>
+
+    <p>Returns the list of <a href="#Argument"><tt>Argument</tt></a>s.  This is
+    necessary to use when you need to update the list or perform a complex
+    action that doesn't have a forwarding method.</p></li>
+
+  <li><tt><a href="#BasicBlock">BasicBlock</a> &amp;getEntryBlock()</tt>
+
+    <p>Returns the entry <a href="#BasicBlock"><tt>BasicBlock</tt></a> for the
+    function.  Because the entry block for the function is always the first
+    block, this returns the first block of the <tt>Function</tt>.</p></li>
+
+  <li><tt><a href="#Type">Type</a> *getReturnType()</tt><br>
+    <tt><a href="#FunctionType">FunctionType</a> *getFunctionType()</tt>
+
+    <p>This traverses the <a href="#Type"><tt>Type</tt></a> of the
+    <tt>Function</tt> and returns the return type of the function, or the <a
+    href="#FunctionType"><tt>FunctionType</tt></a> of the actual
+    function.</p></li>
+
+  <li><tt><a href="#SymbolTable">SymbolTable</a> *getSymbolTable()</tt>
+
+    <p> Return a pointer to the <a href="#SymbolTable"><tt>SymbolTable</tt></a>
+    for this <tt>Function</tt>.</p></li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="GlobalVariable">The <tt>GlobalVariable</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>#include "<a
+href="/doxygen/GlobalVariable_8h-source.html">llvm/GlobalVariable.h</a>"</tt>
+<br>
+doxygen info: <a href="/doxygen/classllvm_1_1GlobalVariable.html">GlobalVariable
+ Class</a><br>
+Superclasses: <a href="#GlobalValue"><tt>GlobalValue</tt></a>, 
+<a href="#Constant"><tt>Constant</tt></a>,
+<a href="#User"><tt>User</tt></a>,
+<a href="#Value"><tt>Value</tt></a></p>
+
+<p>Global variables are represented with the (surprise surprise)
+<tt>GlobalVariable</tt> class. Like functions, <tt>GlobalVariable</tt>s are also
+subclasses of <a href="#GlobalValue"><tt>GlobalValue</tt></a>, and as such are
+always referenced by their address (global values must live in memory, so their
+"name" refers to their constant address). See 
+<a href="#GlobalValue"><tt>GlobalValue</tt></a> for more on this.  Global 
+variables may have an initial value (which must be a 
+<a href="#Constant"><tt>Constant</tt></a>), and if they have an initializer, 
+they may be marked as "constant" themselves (indicating that their contents 
+never change at runtime).</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="m_GlobalVariable">Important Public Members of the
+  <tt>GlobalVariable</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+  <li><tt>GlobalVariable(const </tt><tt><a href="#Type">Type</a> *Ty, bool
+  isConstant, LinkageTypes&amp; Linkage, <a href="#Constant">Constant</a>
+  *Initializer = 0, const std::string &amp;Name = "", Module* Parent = 0)</tt>
+
+    <p>Create a new global variable of the specified type. If
+    <tt>isConstant</tt> is true then the global variable will be marked as
+    unchanging for the program. The Linkage parameter specifies the type of
+    linkage (internal, external, weak, linkonce, appending) for the variable.
+    If the linkage is InternalLinkage, WeakAnyLinkage, WeakODRLinkage,
+    LinkOnceAnyLinkage or LinkOnceODRLinkage,&nbsp; then the resultant
+    global variable will have internal linkage.  AppendingLinkage concatenates
+    together all instances (in different translation units) of the variable
+    into a single variable but is only applicable to arrays.  &nbsp;See
+    the <a href="LangRef.html#modulestructure">LLVM Language Reference</a> for
+    further details on linkage types. Optionally an initializer, a name, and the
+    module to put the variable into may be specified for the global variable as
+    well.</p></li>
+
+  <li><tt>bool isConstant() const</tt>
+
+    <p>Returns true if this is a global variable that is known not to
+    be modified at runtime.</p></li>
+
+  <li><tt>bool hasInitializer()</tt>
+
+    <p>Returns true if this <tt>GlobalVariable</tt> has an intializer.</p></li>
+
+  <li><tt><a href="#Constant">Constant</a> *getInitializer()</tt>
+
+    <p>Returns the initial value for a <tt>GlobalVariable</tt>.  It is not legal
+    to call this method if there is no initializer.</p></li>
+</ul>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="BasicBlock">The <tt>BasicBlock</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>#include "<a
+href="/doxygen/BasicBlock_8h-source.html">llvm/BasicBlock.h</a>"</tt><br>
+doxygen info: <a href="/doxygen/classllvm_1_1BasicBlock.html">BasicBlock
+Class</a><br>
+Superclass: <a href="#Value"><tt>Value</tt></a></p>
+
+<p>This class represents a single entry single exit section of the code,
+commonly known as a basic block by the compiler community.  The
+<tt>BasicBlock</tt> class maintains a list of <a
+href="#Instruction"><tt>Instruction</tt></a>s, which form the body of the block.
+Matching the language definition, the last element of this list of instructions
+is always a terminator instruction (a subclass of the <a
+href="#TerminatorInst"><tt>TerminatorInst</tt></a> class).</p>
+
+<p>In addition to tracking the list of instructions that make up the block, the
+<tt>BasicBlock</tt> class also keeps track of the <a
+href="#Function"><tt>Function</tt></a> that it is embedded into.</p>
+
+<p>Note that <tt>BasicBlock</tt>s themselves are <a
+href="#Value"><tt>Value</tt></a>s, because they are referenced by instructions
+like branches and can go in the switch tables. <tt>BasicBlock</tt>s have type
+<tt>label</tt>.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="m_BasicBlock">Important Public Members of the <tt>BasicBlock</tt>
+  class</a>
+</div>
+
+<div class="doc_text">
+<ul>
+
+<li><tt>BasicBlock(const std::string &amp;Name = "", </tt><tt><a
+ href="#Function">Function</a> *Parent = 0)</tt>
+
+<p>The <tt>BasicBlock</tt> constructor is used to create new basic blocks for
+insertion into a function.  The constructor optionally takes a name for the new
+block, and a <a href="#Function"><tt>Function</tt></a> to insert it into.  If
+the <tt>Parent</tt> parameter is specified, the new <tt>BasicBlock</tt> is
+automatically inserted at the end of the specified <a
+href="#Function"><tt>Function</tt></a>, if not specified, the BasicBlock must be
+manually inserted into the <a href="#Function"><tt>Function</tt></a>.</p></li>
+
+<li><tt>BasicBlock::iterator</tt> - Typedef for instruction list iterator<br>
+<tt>BasicBlock::const_iterator</tt> - Typedef for const_iterator.<br>
+<tt>begin()</tt>, <tt>end()</tt>, <tt>front()</tt>, <tt>back()</tt>,
+<tt>size()</tt>, <tt>empty()</tt>
+STL-style functions for accessing the instruction list.
+
+<p>These methods and typedefs are forwarding functions that have the same
+semantics as the standard library methods of the same names.  These methods
+expose the underlying instruction list of a basic block in a way that is easy to
+manipulate.  To get the full complement of container operations (including
+operations to update the list), you must use the <tt>getInstList()</tt>
+method.</p></li>
+
+<li><tt>BasicBlock::InstListType &amp;getInstList()</tt>
+
+<p>This method is used to get access to the underlying container that actually
+holds the Instructions.  This method must be used when there isn't a forwarding
+function in the <tt>BasicBlock</tt> class for the operation that you would like
+to perform.  Because there are no forwarding functions for "updating"
+operations, you need to use this if you want to update the contents of a
+<tt>BasicBlock</tt>.</p></li>
+
+<li><tt><a href="#Function">Function</a> *getParent()</tt>
+
+<p> Returns a pointer to <a href="#Function"><tt>Function</tt></a> the block is
+embedded into, or a null pointer if it is homeless.</p></li>
+
+<li><tt><a href="#TerminatorInst">TerminatorInst</a> *getTerminator()</tt>
+
+<p> Returns a pointer to the terminator instruction that appears at the end of
+the <tt>BasicBlock</tt>.  If there is no terminator instruction, or if the last
+instruction in the block is not a terminator, then a null pointer is
+returned.</p></li>
+
+</ul>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="Argument">The <tt>Argument</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>This subclass of Value defines the interface for incoming formal
+arguments to a function. A Function maintains a list of its formal
+arguments. An argument has a pointer to the parent Function.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01 Strict"></a>
+
+  <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a> and
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/Projects.html b/final/docs/Projects.html
new file mode 100644
index 00000000000..ada6196be2a
--- /dev/null
+++ b/final/docs/Projects.html
@@ -0,0 +1,460 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>Creating an LLVM Project</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">Creating an LLVM Project</div>
+
+<ol>
+<li><a href="#overview">Overview</a></li>
+<li><a href="#create">Create a project from the Sample Project</a></li>
+<li><a href="#source">Source tree layout</a></li>
+<li><a href="#makefiles">Writing LLVM-style Makefiles</a>
+  <ol>
+  <li><a href="#reqVars">Required Variables</a></li>
+  <li><a href="#varsBuildDir">Variables for Building Subdirectories</a></li>
+  <li><a href="#varsBuildLib">Variables for Building Libraries</a></li>
+  <li><a href="#varsBuildProg">Variables for Building Programs</a></li>
+  <li><a href="#miscVars">Miscellaneous Variables</a></li>
+  </ol></li>
+<li><a href="#objcode">Placement of object code</a></li>
+<li><a href="#help">Further help</a></li>
+</ol>
+
+<div class="doc_author">
+  <p>Written by John Criswell</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="overview">Overview</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The LLVM build system is designed to facilitate the building of third party
+projects that use LLVM header files, libraries, and tools.  In order to use
+these facilities, a Makefile from a project must do the following things:</p>
+
+<ol>
+  <li>Set <tt>make</tt> variables. There are several variables that a Makefile
+  needs to set to use the LLVM build system:
+  <ul>
+    <li><tt>PROJECT_NAME</tt> - The name by which your project is known.</li>
+    <li><tt>LLVM_SRC_ROOT</tt> - The root of the LLVM source tree.</li>
+    <li><tt>LLVM_OBJ_ROOT</tt> - The root of the LLVM object tree.</li>
+    <li><tt>PROJ_SRC_ROOT</tt> - The root of the project's source tree.</li>
+    <li><tt>PROJ_OBJ_ROOT</tt> - The root of the project's object tree.</li>
+    <li><tt>PROJ_INSTALL_ROOT</tt> - The root installation directory.</li>
+    <li><tt>LEVEL</tt> - The relative path from the current directory to the 
+    project's root ($PROJ_OBJ_ROOT).</li>
+  </ul></li>
+  <li>Include <tt>Makefile.config</tt> from <tt>$(LLVM_OBJ_ROOT)</tt>.</li>
+  <li>Include <tt>Makefile.rules</tt> from <tt>$(LLVM_SRC_ROOT)</tt>.</li>
+</ol>
+
+<p>There are two ways that you can set all of these variables:</p>
+<ol>
+  <li>You can write your own Makefiles which hard-code these values.</li>
+  <li>You can use the pre-made LLVM sample project. This sample project 
+  includes Makefiles, a configure script that can be used to configure the 
+  location of LLVM, and the ability to support multiple object directories 
+  from a single source directory.</li>
+</ol>
+
+<p>This document assumes that you will base your project on the LLVM sample
+project found in <tt>llvm/projects/sample</tt>.  If you want to devise your own
+build system, studying the sample project and LLVM Makefiles will probably
+provide enough information on how to write your own Makefiles.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="create">Create a Project from the Sample Project</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Follow these simple steps to start your project:</p>
+
+<ol>
+<li>Copy the <tt>llvm/projects/sample</tt> directory to any place of your
+choosing.  You can place it anywhere you like.  Rename the directory to match
+the name of your project.</li>
+
+<li>
+If you downloaded LLVM using Subversion, remove all the directories named .svn 
+(and all the files therein) from your project's new source tree.  This will 
+keep Subversion from thinking that your project is inside 
+<tt>llvm/trunk/projects/sample</tt>.</li>
+
+<li>Add your source code and Makefiles to your source tree.</li>
+
+<li>If you want your project to be configured with the <tt>configure</tt> script
+then you need to edit <tt>autoconf/configure.ac</tt> as follows:
+  <ul>
+    <li><b>AC_INIT</b>. Place the name of your project, its version number and
+    a contact email address for your project as the arguments to this macro</li>
+    <li><b>AC_CONFIG_AUX_DIR</b>. If your project isn't in the
+    <tt>llvm/projects</tt> directory then you might need to adjust this so that
+    it specifies a relative path to the <tt>llvm/autoconf</tt> directory.</li>
+    <li><b>LLVM_CONFIG_PROJECT</b>. Just leave this alone.</li>
+    <li><b>AC_CONFIG_SRCDIR</b>. Specify a path to a file name that identifies
+    your project; or just leave it at <tt>Makefile.common.in</tt></li>
+    <li><b>AC_CONFIG_FILES</b>. Do not change.</li>
+    <li><b>AC_CONFIG_MAKEFILE</b>. Use one of these macros for each Makefile
+    that your project uses. This macro arranges for your makefiles to be copied
+    from the source directory, unmodified, to the build directory.</li>
+  </ul>
+</li>
+
+<li>After updating <tt>autoconf/configure.ac</tt>, regenerate the
+configure script with these commands:
+
+<div class="doc_code">
+<p><tt>% cd autoconf<br>
+       % ./AutoRegen.sh</tt></p>
+</div>
+
+<p>You must be using Autoconf version 2.59 or later and your aclocal version
+should be 1.9 or later.</p></li>
+
+<li>Run <tt>configure</tt> in the directory in which you want to place
+object code.  Use the following options to tell your project where it
+can find LLVM:
+
+  <dl>
+    <dt><tt>--with-llvmsrc=&lt;directory&gt;</tt></dt>
+    <dd>Tell your project where the LLVM source tree is located.</dd>
+    <dt><br><tt>--with-llvmobj=&lt;directory&gt;</tt></dt>
+    <dd>Tell your project where the LLVM object tree is located.</dd>
+    <dt><br><tt>--prefix=&lt;directory&gt;</tt></dt>
+    <dd>Tell your project where it should get installed.</dd>
+  </dl>
+</ol>
+
+<p>That's it!  Now all you have to do is type <tt>gmake</tt> (or <tt>make</tt>
+if your on a GNU/Linux system) in the root of your object directory, and your 
+project should build.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="source">Source Tree Layout</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>In order to use the LLVM build system, you will want to organize your
+source code so that it can benefit from the build system's features.
+Mainly, you want your source tree layout to look similar to the LLVM
+source tree layout.  The best way to do this is to just copy the
+project tree from <tt>llvm/projects/sample</tt> and modify it to meet
+your needs, but you can certainly add to it if you want.</p>
+
+<p>Underneath your top level directory, you should have the following
+directories:</p>
+
+<dl>
+  <dt><b>lib</b>
+  <dd>
+  This subdirectory should contain all of your library source
+  code.  For each library that you build, you will have one
+  directory in <b>lib</b> that will contain that library's source
+  code.
+
+  <p>
+  Libraries can be object files, archives, or dynamic libraries.
+  The <b>lib</b> directory is just a convenient place for libraries
+  as it places them all in a directory from which they can be linked
+  later.
+
+  <dt><b>include</b>
+  <dd>
+  This subdirectory should contain any header files that are
+  global to your project.  By global, we mean that they are used
+  by more than one library or executable of your project.
+  <p>
+  By placing your header files in <b>include</b>, they will be
+  found automatically by the LLVM build system.  For example, if
+  you have a file <b>include/jazz/note.h</b>, then your source
+  files can include it simply with <b>#include "jazz/note.h"</b>.
+
+  <dt><b>tools</b>
+  <dd>
+  This subdirectory should contain all of your source
+  code for executables.  For each program that you build, you
+  will have one directory in <b>tools</b> that will contain that
+  program's source code.
+  <p>
+
+  <dt><b>test</b>
+  <dd>
+  This subdirectory should contain tests that verify that your code
+  works correctly.  Automated tests are especially useful.
+  <p>
+  Currently, the LLVM build system provides basic support for tests.
+  The LLVM system provides the following:
+  <ul>
+    <li>
+    LLVM provides a tcl procedure that is used by Dejagnu to run
+    tests.  It can be found in <tt>llvm/lib/llvm-dg.exp</tt>.  This
+    test procedure uses RUN lines in the actual test case to determine
+    how to run the test.  See the <a
+    href="TestingGuide.html">TestingGuide</a> for more details. You
+    can easily write Makefile support similar to the Makefiles in 
+    <tt>llvm/test</tt> to use Dejagnu to run your project's tests.<br></li>
+    <li>
+    LLVM contains an optional package called <tt>llvm-test</tt>
+    which provides benchmarks and programs that are known to compile with the
+    LLVM GCC front ends.  You can use these
+    programs to test your code, gather statistics information, and
+    compare it to the current LLVM performance statistics.
+    <br>Currently, there is no way to hook your tests directly into the
+    <tt>llvm/test</tt> testing harness.  You will simply
+    need to find a way to use the source provided within that directory
+    on your own.
+  </ul>
+</dl>
+
+<p>Typically, you will want to build your <b>lib</b> directory first followed by
+your <b>tools</b> directory.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="makefiles">Writing LLVM Style Makefiles</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The LLVM build system provides a convenient way to build libraries and
+executables.  Most of your project Makefiles will only need to define a few
+variables.  Below is a list of the variables one can set and what they can
+do:</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="reqVars">Required Variables</a>
+</div>
+
+<div class="doc_text">
+
+<dl>
+  <dt>LEVEL
+  <dd>
+  This variable is the relative path from this Makefile to the
+  top directory of your project's source code.  For example, if
+  your source code is in <tt>/tmp/src</tt>, then the Makefile in
+  <tt>/tmp/src/jump/high</tt> would set <tt>LEVEL</tt> to <tt>"../.."</tt>.
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="varsBuildDir">Variables for Building Subdirectories</a>
+</div>
+
+<div class="doc_text">
+
+<dl>
+  <dt>DIRS
+  <dd>
+  This is a space separated list of subdirectories that should be
+  built.  They will be built, one at a time, in the order
+  specified.
+  <p>
+
+  <dt>PARALLEL_DIRS
+  <dd>
+  This is a list of directories that can be built in parallel.
+  These will be built after the directories in DIRS have been
+  built.
+  <p>
+
+  <dt>OPTIONAL_DIRS
+  <dd>
+  This is a list of directories that can be built if they exist,
+  but will not cause an error if they do not exist.  They are
+  built serially in the order in which they are listed.
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="varsBuildLib">Variables for Building Libraries</a>
+</div>
+
+<div class="doc_text">
+
+<dl>
+  <dt>LIBRARYNAME
+  <dd>
+  This variable contains the base name of the library that will
+  be built.  For example, to build a library named
+  <tt>libsample.a</tt>, LIBRARYNAME should be set to
+  <tt>sample</tt>.
+  <p>
+
+  <dt>BUILD_ARCHIVE
+  <dd>
+  By default, a library is a <tt>.o</tt> file that is linked
+  directly into a program.  To build an archive (also known as
+  a static library), set the BUILD_ARCHIVE variable.
+  <p>
+
+  <dt>SHARED_LIBRARY
+  <dd>
+  If SHARED_LIBRARY is defined in your Makefile, a shared
+  (or dynamic) library will be built.
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="varsBuildProg">Variables for Building Programs</a>
+</div>
+
+<div class="doc_text">
+
+<dl>
+  <dt>TOOLNAME
+  <dd>
+  This variable contains the name of the program that will
+  be built.  For example, to build an executable named
+  <tt>sample</tt>, TOOLNAME should be set to <tt>sample</tt>.
+  <p>
+
+  <dt>USEDLIBS
+  <dd>
+  This variable holds a space separated list of libraries that
+  should be linked into the program.  These libraries must either
+  be LLVM libraries or libraries that come from your <b>lib</b>
+  directory.  The libraries must be specified by their base name.
+  For example, to link libsample.a, you would set USEDLIBS to
+  <tt>sample</tt>.
+  <p>
+  Note that this works only for statically linked libraries.
+  <p>
+
+  <dt>LIBS
+  <dd>
+  To link dynamic libraries, add <tt>-l&lt;library base name&gt;</tt> to
+  the LIBS variable.  The LLVM build system will look in the same places
+  for dynamic libraries as it does for static libraries.
+  <p>
+  For example, to link <tt>libsample.so</tt>, you would have the
+  following line in your <tt>Makefile</tt>:
+  <p>
+  <tt>
+  LIBS += -lsample
+  </tt>
+</dl>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="miscVars">Miscellaneous Variables</a>
+</div>
+
+<div class="doc_text">
+
+<dl>
+  <dt>ExtraSource
+  <dd>
+  This variable contains a space separated list of extra source
+  files that need to be built.  It is useful for including the
+  output of Lex and Yacc programs.
+  <p>
+
+  <dt>CFLAGS
+  <dt>CPPFLAGS
+  <dd>
+  This variable can be used to add options to the C and C++
+  compiler, respectively.  It is typically used to add options
+  that tell the compiler the location of additional directories
+  to search for header files.
+  <p>
+  It is highly suggested that you append to CFLAGS and CPPFLAGS as
+  opposed to overwriting them.  The master Makefiles may already
+  have useful options in them that you may not want to overwrite.
+  <p>
+</dl>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="objcode">Placement of Object Code</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The final location of built libraries and executables will depend upon
+whether you do a Debug, Release, or Profile build.</p>
+
+<dl>
+  <dt>Libraries
+  <dd>
+  All libraries (static and dynamic) will be stored in
+  <tt>PROJ_OBJ_ROOT/&lt;type&gt;/lib</tt>, where type is <tt>Debug</tt>,
+  <tt>Release</tt>, or <tt>Profile</tt> for a debug, optimized, or
+  profiled build, respectively.<p>
+
+  <dt>Executables
+  <dd>All executables will be stored in
+  <tt>PROJ_OBJ_ROOT/&lt;type&gt;/bin</tt>, where type is <tt>Debug</tt>,
+  <tt>Release</tt>, or <tt>Profile</tt> for a debug, optimized, or profiled
+  build, respectively.
+</dl>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="help">Further Help</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>If you have any questions or need any help creating an LLVM project,
+the LLVM team would be more than happy to help.  You can always post your
+questions to the <a
+href="http://mail.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVM Developers
+Mailing List</a>.</p>
+
+</div>
+  
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:criswell@uiuc.edu">John Criswell</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
+  <br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/ReleaseNotes.html b/final/docs/ReleaseNotes.html
new file mode 100644
index 00000000000..2f83b9447d1
--- /dev/null
+++ b/final/docs/ReleaseNotes.html
@@ -0,0 +1,1306 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta encoding="utf8">
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+  <title>LLVM 2.8 Release Notes</title>
+</head>
+<body>
+
+<div class="doc_title">LLVM 2.8 Release Notes</div>
+
+<img align=right src="http://llvm.org/img/DragonSmall.png"
+    width="136" height="136" alt="LLVM Dragon Logo">
+
+<ol>
+  <li><a href="#intro">Introduction</a></li>
+  <li><a href="#subproj">Sub-project Status Update</a></li>
+  <li><a href="#externalproj">External Projects Using LLVM 2.8</a></li>
+  <li><a href="#whatsnew">What's New in LLVM 2.8?</a></li>
+  <li><a href="GettingStarted.html">Installation Instructions</a></li>
+  <li><a href="#knownproblems">Known Problems</a></li>
+  <li><a href="#additionalinfo">Additional Information</a></li>
+</ol>
+
+<div class="doc_author">
+  <p>Written by the <a href="http://llvm.org">LLVM Team</a></p>
+</div>
+
+<!--
+<h1 style="color:red">These are in-progress notes for the upcoming LLVM 2.8
+release.<br>
+You may prefer the
+<a href="http://llvm.org/releases/2.7/docs/ReleaseNotes.html">LLVM 2.7
+Release Notes</a>.</h1>
+-->
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="intro">Introduction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document contains the release notes for the LLVM Compiler
+Infrastructure, release 2.8.  Here we describe the status of LLVM, including
+major improvements from the previous release and significant known problems.
+All LLVM releases may be downloaded from the <a
+href="http://llvm.org/releases/">LLVM releases web site</a>.</p>
+
+<p>For more information about LLVM, including information about the latest
+release, please check out the <a href="http://llvm.org/">main LLVM
+web site</a>.  If you have questions or comments, the <a
+href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVM Developer's
+Mailing List</a> is a good place to send them.</p>
+
+<p>Note that if you are reading this file from a Subversion checkout or the
+main LLVM web page, this document applies to the <i>next</i> release, not the
+current one.  To see the release notes for a specific release, please see the
+<a href="http://llvm.org/releases/">releases page</a>.</p>
+
+</div>
+ 
+
+<!--
+Almost dead code.
+  include/llvm/Analysis/LiveValues.h => Dan
+  lib/Transforms/IPO/MergeFunctions.cpp => consider for 2.8.
+  GEPSplitterPass
+-->
+ 
+   
+<!-- Features that need text if they're finished for 2.9:
+  combiner-aa?
+  strong phi elim
+  loop dependence analysis
+  TBAA
+  CorrelatedValuePropagation
+ -->
+ 
+ <!-- Announcement, lldb, libc++ -->
+ 
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="subproj">Sub-project Status Update</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>
+The LLVM 2.8 distribution currently consists of code from the core LLVM
+repository (which roughly includes the LLVM optimizers, code generators
+and supporting tools), the Clang repository and the llvm-gcc repository.  In
+addition to this code, the LLVM Project includes other sub-projects that are in
+development.  Here we include updates on these subprojects.
+</p>
+
+</div>
+
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="clang">Clang: C/C++/Objective-C Frontend Toolkit</a>
+</div>
+
+<div class="doc_text">
+
+<p><a href="http://clang.llvm.org/">Clang</a> is an LLVM front end for the C,
+C++, and Objective-C languages. Clang aims to provide a better user experience
+through expressive diagnostics, a high level of conformance to language
+standards, fast compilation, and low memory use. Like LLVM, Clang provides a
+modular, library-based architecture that makes it suitable for creating or
+integrating with other development tools. Clang is considered a
+production-quality compiler for C, Objective-C, C++ and Objective-C++ on x86
+(32- and 64-bit), and for darwin-arm targets.</p>
+
+<p>In the LLVM 2.8 time-frame, the Clang team has made many improvements:</p>
+
+  <ul>
+    <li>Clang C++ is now feature-complete with respect to the ISO C++ 1998 and 2003 standards.</li>
+    <li>Added support for Objective-C++.</li>
+    <li>Clang now uses LLVM-MC to directly generate object code and to parse inline assembly (on Darwin).</li>
+    <li>Introduced many new warnings, including <code>-Wmissing-field-initializers</code>, <code>-Wshadow</code>, <code>-Wno-protocol</code>, <code>-Wtautological-compare</code>, <code>-Wstrict-selector-match</code>, <code>-Wcast-align</code>, <code>-Wunused</code> improvements, and greatly improved format-string checking.</li>
+    <li>Introduced the "libclang" library, a C interface to Clang intended to support IDE clients.</li>
+    <li>Added support for <code>#pragma GCC visibility</code>, <code>#pragma align</code>, and others.</li>
+    <li>Added support for SSE, AVX, ARM NEON, and AltiVec.</li>
+    <li>Improved support for many Microsoft extensions.</li>
+    <li>Implemented support for blocks in C++.</li>
+    <li>Implemented precompiled headers for C++.</li>
+    <li>Improved abstract syntax trees to retain more accurate source information.</li>
+    <li>Added driver support for handling LLVM IR and bitcode files directly.</li>
+    <li>Major improvements to compiler correctness for exception handling.</li>
+    <li>Improved generated code quality in some areas:
+      <ul>
+        <li>Good code generation for X86-32 and X86-64 ABI handling.</li>
+        <li>Improved code generation for bit-fields, although important work remains.</li>
+      </ul>
+    </li>
+  </ul>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="clangsa">Clang Static Analyzer</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <a href="http://clang-analyzer.llvm.org/">Clang Static Analyzer</a>
+   project is an effort to use static source code analysis techniques to
+   automatically find bugs in C and Objective-C programs (and hopefully <a
+   href="http://clang-analyzer.llvm.org/dev_cxx.html">C++ in the
+   future</a>!).  The tool is very good at finding bugs that occur on specific
+   paths through code, such as on error conditions.</p>
+
+<p>The LLVM 2.8 release fixes a number of bugs and slightly improves precision
+   over 2.7, but there are no major new features in the release. 
+</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="dragonegg">DragonEgg: llvm-gcc ported to gcc-4.5</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://dragonegg.llvm.org/">DragonEgg</a> is a port of llvm-gcc to
+gcc-4.5.  Unlike llvm-gcc, dragonegg in theory does not require any gcc-4.5
+modifications whatsoever (currently one small patch is needed) thanks to the
+new <a href="http://gcc.gnu.org/wiki/plugins">gcc plugin architecture</a>.
+DragonEgg is a gcc plugin that makes gcc-4.5 use the LLVM optimizers and code
+generators instead of gcc's, just like with llvm-gcc.
+</p>
+
+<p>
+DragonEgg is still a work in progress, but it is able to compile a lot of code,
+for example all of gcc, LLVM and clang.  Currently Ada, C, C++ and Fortran work
+well, while all other languages either don't work at all or only work poorly.
+For the moment only the x86-32 and x86-64 targets are supported, and only on
+linux and darwin (darwin may need additional gcc patches).
+</p>
+
+<p>
+The 2.8 release has the following notable changes:
+<ul>
+<li>The plugin loads faster due to exporting fewer symbols.</li>
+<li>Additional vector operations such as addps256 are now supported.</li>
+<li>Ada global variables with no initial value are no longer zero initialized,
+resulting in better optimization.</li>
+<li>The '-fplugin-arg-dragonegg-enable-gcc-optzns' flag now runs all gcc
+optimizers, rather than just a handful.</li>
+<li>Fortran programs using common variables now link correctly.</li>
+<li>GNU OMP constructs no longer crash the compiler.</li>
+</ul>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="vmkit">VMKit: JVM/CLI Virtual Machine Implementation</a>
+</div>
+
+<div class="doc_text">
+<p>
+The <a href="http://vmkit.llvm.org/">VMKit project</a> is an implementation of
+a Java Virtual Machine (Java VM or JVM) that uses LLVM for static and
+just-in-time compilation.  As of LLVM 2.8, VMKit now supports copying garbage
+collectors, and can be configured to use MMTk's copy mark-sweep garbage
+collector.  In LLVM 2.8, the VMKit .NET VM is no longer being maintained.
+</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="compiler-rt">compiler-rt: Compiler Runtime Library</a>
+</div>
+
+<div class="doc_text">
+<p>
+The new LLVM <a href="http://compiler-rt.llvm.org/">compiler-rt project</a>
+is a simple library that provides an implementation of the low-level
+target-specific hooks required by code generation and other runtime components.
+For example, when compiling for a 32-bit target, converting a double to a 64-bit
+unsigned integer is compiled into a runtime call to the "__fixunsdfdi"
+function. The compiler-rt library provides highly optimized implementations of
+this and other low-level routines (some are 3x faster than the equivalent
+libgcc routines).</p>
+
+<p>
+All of the code in the compiler-rt project is available under the standard LLVM
+License, a "BSD-style" license.  New in LLVM 2.8, compiler_rt now supports 
+soft floating point (for targets that don't have a real floating point unit),
+and includes an extensive testsuite for the "blocks" language feature and the
+blocks runtime included in compiler_rt.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="lldb">LLDB: Low Level Debugger</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://lldb.llvm.org/">LLDB</a> is a brand new member of the LLVM
+umbrella of projects. LLDB is a next generation, high-performance debugger. It
+is built as a set of reusable components which highly leverage existing
+libraries in the larger LLVM Project, such as the Clang expression parser, the
+LLVM disassembler and the LLVM JIT.</p>
+
+<p>
+LLDB is in early development and not included as part of the LLVM 2.8 release,
+but is mature enough to support basic debugging scenarios on Mac OS X in C,
+Objective-C and C++.  We'd really like help extending and expanding LLDB to 
+support new platforms, new languages, new architectures, and new features.
+</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="libc++">libc++: C++ Standard Library</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://libcxx.llvm.org/">libc++</a> is another new member of the LLVM
+family.  It is an implementation of the C++ standard library, written from the
+ground up to specifically target the forthcoming C++'0X standard and focus on
+delivering great performance.</p>
+
+<p>
+As of the LLVM 2.8 release, libc++ is virtually feature complete, but would
+benefit from more testing and better integration with Clang++.  It is also
+looking forward to the C++ committee finalizing the C++'0x standard.
+</p>
+
+</div>
+
+
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="klee">KLEE: A Symbolic Execution Virtual Machine</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://klee.llvm.org/">KLEE</a> is a symbolic execution framework for
+programs in LLVM bitcode form. KLEE tries to symbolically evaluate "all" paths
+through the application and records state transitions that lead to fault
+states. This allows it to construct testcases that lead to faults and can even
+be used to verify some algorithms.
+</p>
+
+<p>Although KLEE does not have any major new features as of 2.8, we have made
+various minor improvements, particular to ease development:</p>
+<ul>
+  <li>Added support for LLVM 2.8. KLEE currently maintains compatibility with
+    LLVM 2.6, 2.7, and 2.8.</li>
+  <li>Added a buildbot for 2.6, 2.7, and trunk. A 2.8 buildbot will be coming
+    soon following release.</li>
+  <li>Fixed many C++ code issues to allow building with Clang++. Mostly
+    complete, except for the version of MiniSAT which is inside the KLEE STP
+    version.</li>
+  <li>Improved support for building with separate source and build
+    directories.</li>
+  <li>Added support for "long double" on x86.</li>
+  <li>Initial work on KLEE support for using 'lit' test runner instead of
+    DejaGNU.</li>
+  <li>Added <tt>configure</tt> support for using an external version of
+    STP.</li>
+</ul>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="externalproj">External Open Source Projects Using LLVM 2.8</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>An exciting aspect of LLVM is that it is used as an enabling technology for
+   a lot of other language and tools projects.  This section lists some of the
+   projects that have already been updated to work with LLVM 2.8.</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="tce">TTA-based Codesign Environment (TCE)</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://tce.cs.tut.fi/">TCE</a> is a toolset for designing
+application-specific processors (ASP) based on the Transport triggered
+architecture (TTA). The toolset provides a complete co-design flow from C/C++
+programs down to synthesizable VHDL and parallel program binaries. Processor
+customization points include the register files, function units, supported
+operations, and the interconnection network.</p>
+
+<p>TCE uses llvm-gcc/Clang and LLVM for C/C++ language support, target
+independent optimizations and also for parts of code generation. It generates
+new LLVM-based code generators "on the fly" for the designed TTA processors and
+loads them in to the compiler backend as runtime libraries to avoid per-target
+recompilation of larger parts of the compiler chain.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="Horizon">Horizon Bytecode Compiler</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://www.quokforge.org/projects/horizon">Horizon</a> is a bytecode
+language and compiler written on top of LLVM, intended for producing
+single-address-space managed code operating systems that
+run faster than the equivalent multiple-address-space C systems.
+More in-depth blurb is available on the <a 
+href="http://www.quokforge.org/projects/horizon/wiki/Wiki">wiki</a>.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="clamav">Clam AntiVirus</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://www.clamav.net">Clam AntiVirus</a> is an open source (GPL)
+anti-virus toolkit for UNIX, designed especially for e-mail scanning on mail
+gateways.  Since version 0.96 it has <a
+href="http://vrt-sourcefire.blogspot.com/2010/09/introduction-to-clamavs-low-level.html">bytecode
+signatures</a> that allow writing detections for complex malware. It
+uses LLVM's JIT to speed up the execution of bytecode on
+X86, X86-64, PPC32/64, falling back to its own interpreter otherwise.
+The git version was updated to work with LLVM 2.8.
+</p>
+
+<p>The <a
+href="http://git.clamav.net/gitweb?p=clamav-bytecode-compiler.git;a=blob_plain;f=docs/user/clambc-user.pdf">
+ClamAV bytecode compiler</a> uses Clang and LLVM to compile a C-like
+language, insert runtime checks, and generate ClamAV bytecode.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="pure">Pure</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://pure-lang.googlecode.com/">Pure</a>
+is an algebraic/functional
+programming language based on term rewriting. Programs are collections
+of equations which are used to evaluate expressions in a symbolic
+fashion. Pure offers dynamic typing, eager and lazy evaluation, lexical
+closures, a hygienic macro system (also based on term rewriting),
+built-in list and matrix support (including list and matrix
+comprehensions) and an easy-to-use C interface. The interpreter uses
+LLVM as a backend to JIT-compile Pure programs to fast native code.</p>
+
+<p>Pure versions 0.44 and later have been tested and are known to work with
+LLVM 2.8 (and continue to work with older LLVM releases >= 2.5).</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="GHC">Glasgow Haskell Compiler (GHC)</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://www.haskell.org/ghc/">GHC</a> is an open source,
+state-of-the-art programming suite for
+Haskell, a standard lazy functional programming language. It includes
+an optimizing static compiler generating good code for a variety of
+platforms, together with an interactive system for convenient, quick
+development.</p>
+
+<p>In addition to the existing C and native code generators, GHC 7.0 now
+supports an <a
+href="http://hackage.haskell.org/trac/ghc/wiki/Commentary/Compiler/Backends/LLVM">LLVM
+code generator</a>. GHC supports LLVM 2.7 and later.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="Clay">Clay Programming Language</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://tachyon.in/clay/">Clay</a> is a new systems programming
+language that is specifically designed for generic programming. It makes
+generic programming very concise thanks to whole program type propagation. It
+uses LLVM as its backend.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="llvm-py">llvm-py Python Bindings for LLVM</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://www.mdevan.org/llvm-py/">llvm-py</a> has been updated to work
+with LLVM 2.8.  llvm-py provides Python bindings for LLVM, allowing you to write a
+compiler backend or a VM in Python.</p>
+
+</div>
+
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="FAUST">FAUST Real-Time Audio Signal Processing Language</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://faust.grame.fr">FAUST</a> is a compiled language for real-time
+audio signal processing. The name FAUST stands for Functional AUdio STream. Its
+programming model combines two approaches: functional programming and block
+diagram composition. In addition with the C, C++, JAVA output formats, the
+Faust compiler can now generate LLVM bitcode, and works with LLVM 2.7 and
+2.8.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="jade">Jade Just-in-time Adaptive Decoder Engine</a>
+</div>
+
+<div class="doc_text">
+<p><a 
+href="http://sourceforge.net/apps/trac/orcc/wiki/JadeDocumentation">Jade</a>
+(Just-in-time Adaptive Decoder Engine) is a generic video decoder engine using
+LLVM for just-in-time compilation of video decoder configurations. Those
+configurations are designed by MPEG Reconfigurable Video Coding (RVC) committee.
+MPEG RVC standard is built on a stream-based dataflow representation of
+decoders. It is composed of a standard library of coding tools written in
+RVC-CAL language and a dataflow configuration &#8212; block diagram &#8212;
+of a decoder.</p>
+
+<p>Jade project is hosted as part of the <a href="http://orcc.sf.net">Open 
+RVC-CAL Compiler</a> and requires it to translate the RVC-CAL standard library
+of video coding tools into an LLVM assembly code.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="neko_llvm_jit">LLVM JIT for Neko VM</a>
+</div>
+
+<div class="doc_text">
+<p><a href="http://github.com/vava/neko_llvm_jit">Neko LLVM JIT</a>
+replaces the standard Neko JIT with an LLVM-based implementation.  While not
+fully complete, it is already providing a 1.5x speedup on 64-bit systems.
+Neko LLVM JIT requires LLVM 2.8 or later.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="crack">Crack Scripting Language</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://code.google.com/p/crack-language/">Crack</a> aims to provide
+the ease of development of a scripting language with the performance of a
+compiled language. The language derives concepts from C++, Java and Python,
+incorporating object-oriented programming, operator overloading and strong
+typing.  Crack 0.2 works with LLVM 2.7, and the forthcoming Crack 0.2.1 release
+builds on LLVM 2.8.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="DresdenTM">Dresden TM Compiler (DTMC)</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://tm.inf.tu-dresden.de">DTMC</a> provides support for 
+Transactional Memory, which is an easy-to-use and efficient way to synchronize 
+accesses to shared memory. Transactions can contain normal C/C++ code (e.g., 
+<code>__transaction { list.remove(x); x.refCount--; }</code>) and will be executed 
+virtually atomically and isolated from other transactions.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="Kai">Kai Programming Language</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://www.oriontransfer.co.nz/research/kai">Kai</a> (Japanese 会 for
+meeting/gathering) is an experimental interpreter that provides a highly
+extensible runtime environment and explicit control over the compilation
+process. Programs are defined using nested symbolic expressions, which are all
+parsed into first-class values with minimal intrinsic semantics. Kai can
+generate optimised code at run-time (using LLVM) in order to exploit the nature
+of the underlying hardware and to integrate with external software libraries.
+It is a unique exploration into world of dynamic code compilation, and the
+interaction between high level and low level semantics.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="OSL">OSL: Open Shading Language</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://code.google.com/p/openshadinglanguage/">OSL</a> is a shading
+language designed for use in physically based renderers and in particular
+production rendering. By using LLVM instead of the interpreter, it was able to
+meet its performance goals (&gt;= C-code) while retaining the benefits of
+runtime specialization and a portable high-level language.
+</p>
+
+</div>
+
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="whatsnew">What's New in LLVM 2.8?</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This release includes a huge number of bug fixes, performance tweaks and
+minor improvements.  Some of the major improvements and new features are listed
+in this section.
+</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="majorfeatures">Major New Features</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM 2.8 includes several major new capabilities:</p>
+
+<ul>
+<li>As mentioned above, <a href="#libc++">libc++</a> and <a 
+   href="#lldb">LLDB</a> are major new additions to the LLVM collective.</li>
+<li>LLVM 2.8 now has pretty decent support for debugging optimized code.  You
+    should be able to reliably get debug info for function arguments, assuming
+    that the value is actually available where you have stopped.</li>
+<li>A new 'llvm-diff' tool is available that does a semantic diff of .ll
+    files.</li>
+<li>The <a href="#mc">MC subproject</a> has made major progress in this release.
+    Direct .o file writing support for darwin/x86[-64] is now reliable and
+    support for other targets and object file formats are in progress.</li>
+</ul>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="coreimprovements">LLVM IR and Core Improvements</a>
+</div>
+
+<div class="doc_text">
+<p>LLVM IR has several new features for better support of new targets and that
+expose new optimization opportunities:</p>
+
+<ul>
+<li>The <a href="LangRef.html#int_libc">memcpy, memmove, and memset</a>
+  intrinsics now take address space qualified pointers and a bit to indicate
+  whether the transfer is "<a href="LangRef.html#volatile">volatile</a>" or not.
+</li>
+<li>Per-instruction debug info metadata is much faster and uses less memory by
+    using the new DebugLoc class.</li>
+<li>LLVM IR now has a more formalized concept of "<a
+    href="LangRef.html#trapvalues">trap values</a>", which allow the optimizer
+    to optimize more aggressively in the presence of undefined behavior, while
+    still producing predictable results.</li>
+<li>LLVM IR now supports two new <a href="LangRef.html#linkage">linkage
+    types</a> (linker_private_weak and linker_private_weak_def_auto) which map
+    onto some obscure MachO concepts.</li>
+</ul>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="optimizer">Optimizer Improvements</a>
+</div>
+
+<div class="doc_text">
+
+<p>In addition to a large array of minor performance tweaks and bug fixes, this
+release includes a few major enhancements and additions to the optimizers:</p>
+
+<ul>
+<li>As mentioned above, the optimizer now has support for updating debug
+   information as it goes.  A key aspect of this is the new <a
+   href="SourceLevelDebugging.html#format_common_value">llvm.dbg.value</a>
+   intrinsic.  This intrinsic represents debug info for variables that are
+   promoted to SSA values (typically by mem2reg or the -scalarrepl passes).</li>
+
+<li>The JumpThreading pass is now much more aggressive about implied value
+    relations, allowing it to thread conditions like "a == 4" when a is known to
+    be 13 in one of the predecessors of a block.  It does this in conjunction
+    with the new LazyValueInfo analysis pass.</li>
+<li>The new RegionInfo analysis pass identifies single-entry single-exit regions
+    in the CFG.  You can play with it with the "opt -regions -analyze" or
+    "opt -view-regions" commands.</li>
+<li>The loop optimizer has significantly improved strength reduction and analysis
+  capabilities.  Notably it is able to build on the trap value and signed
+  integer overflow information to optimize &lt;= and &gt;= loops.</li>
+<li>The CallGraphSCCPassManager now has some basic support for iterating within
+    an SCC when a optimizer devirtualizes a function call.  This allows inlining
+    through indirect call sites that are devirtualized by store-load forwarding
+    and other optimizations.</li>
+<li>The new <A href="Passes.html#loweratomic">-loweratomic</a> pass is available
+    to lower atomic instructions into their non-atomic form.  This can be useful
+    to optimize generic code that expects to run in a single-threaded
+    environment.</li>
+</ul>
+
+<!--
+<p>In addition to these features that are done in 2.8, there is preliminary
+   support in the release for Type Based Alias Analysis 
+  Preliminary work on TBAA but not usable in 2.8.
+  New CorrelatedValuePropagation pass, not on by default in 2.8 yet.
+-->
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="mc">MC Level Improvements</a>
+</div>
+
+<div class="doc_text">
+<p>
+The LLVM Machine Code (aka MC) subsystem was created to solve a number
+of problems in the realm of assembly, disassembly, object file format handling,
+and a number of other related areas that CPU instruction-set level tools work
+in.</p>
+
+<p>The MC subproject has made great leaps in LLVM 2.8.  For example, support for
+   directly writing .o files from LLC (and clang) now works reliably for
+   darwin/x86[-64] (including inline assembly support) and the integrated
+   assembler is turned on by default in Clang for these targets.  This provides
+   improved compile times among other things.</p>
+
+<ul>
+<li>The entire compiler has converted over to using the MCStreamer assembler API
+    instead of writing out a .s file textually.</li>
+<li>The "assembler parser" is far more mature than in 2.7, supporting a full
+    complement of directives, now supports assembler macros, etc.</li>
+<li>The "assembler backend" has been completed, including support for relaxation
+    relocation processing and all the other things that an assembler does.</li>
+<li>The MachO file format support is now fully functional and works.</li>
+<li>The MC disassembler now fully supports ARM and Thumb.  ARM assembler support
+    is still in early development though.</li>
+<li>The X86 MC assembler now supports the X86 AES and AVX instruction set.</li>
+<li>Work on ELF and COFF object files and ARM target support is well underway,
+    but isn't useful yet in LLVM 2.8.  Please contact the llvmdev mailing list
+    if you're interested in this.</li>
+</ul>
+
+<p>For more information, please see the <a
+href="http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html">Intro to the
+LLVM MC Project Blog Post</a>.
+</p>
+
+</div>	
+
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="codegen">Target Independent Code Generator Improvements</a>
+</div>
+
+<div class="doc_text">
+
+<p>We have put a significant amount of work into the code generator
+infrastructure, which allows us to implement more aggressive algorithms and make
+it run faster:</p>
+
+<ul>
+<li>The clang/gcc -momit-leaf-frame-pointer argument is now supported.</li>
+<li>The clang/gcc -ffunction-sections and -fdata-sections arguments are now
+    supported on ELF targets (like GCC).</li>
+<li>The MachineCSE pass is now tuned and on by default.  It eliminates common
+    subexpressions that are exposed when lowering to machine instructions.</li>
+<li>The "local" register allocator was replaced by a new "fast" register
+    allocator.  This new allocator (which is often used at -O0) is substantially
+    faster and produces better code than the old local register allocator.</li>
+<li>A new LLC "-regalloc=default" option is available, which automatically
+    chooses a register allocator based on the -O optimization level.</li>
+<li>The common code generator code was modified to promote illegal argument and
+    return value vectors to wider ones when possible instead of scalarizing
+    them.  For example, &lt;3 x float&gt; will now pass in one SSE register
+    instead of 3 on X86.  This generates substantially better code since the
+    rest of the code generator was already expecting this.</li>
+<li>The code generator uses a new "COPY" machine instruction.  This speeds up
+    the code generator and eliminates the need for targets to implement the 
+    isMoveInstr hook.  Also, the copyRegToReg hook was renamed to copyPhysReg
+    and simplified.</li>
+<li>The code generator now has a "LocalStackSlotPass", which optimizes stack
+    slot access for targets (like ARM) that have limited stack displacement
+    addressing.</li>
+<li>A new "PeepholeOptimizer" is available, which eliminates sign and zero
+    extends, and optimizes away compare instructions when the condition result
+    is available from a previous instruction.</li>
+<li>Atomic operations now get legalized into simpler atomic operations if not
+    natively supported, easing the implementation burden on targets.</li>
+<li>We have added two new bottom-up pre-allocation register pressure aware schedulers:
+<ol>
+<li>The hybrid scheduler schedules aggressively to minimize schedule length when registers are available and avoid overscheduling in high pressure situations.</li>
+<li>The instruction-level-parallelism scheduler schedules for maximum ILP when registers are available and avoid overscheduling in high pressure situations.</li>
+</ol></li>
+<li>The tblgen type inference algorithm was rewritten to be more consistent and
+     diagnose more target bugs.  If you have an out-of-tree backend, you may
+     find that it finds bugs in your target description.  This support also
+     allows limited support for writing patterns for instructions that return
+     multiple results (e.g. a virtual register and a flag result).  The 
+     'parallel' modifier in tblgen was removed, you should use the new support
+     for multiple results instead.</li>
+<li>A new (experimental) "-rendermf" pass is available which renders a
+    MachineFunction into HTML, showing live ranges and other useful
+    details.</li>
+<li>The new SubRegIndex tablegen class allows subregisters to be indexed
+    symbolically instead of numerically.  If your target uses subregisters you
+    will need to adapt to use SubRegIndex when you upgrade to 2.8.</li>
+<!-- SplitKit -->
+
+<li>The -fast-isel instruction selection path (used at -O0 on X86) was rewritten
+    to work bottom-up on basic blocks instead of top down.  This makes it
+    slightly faster (because the MachineDCE pass is not needed any longer) and
+    allows it to generate better code in some cases.</li>
+
+</ul>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="x86">X86-32 and X86-64 Target Improvements</a>
+</div>
+
+<div class="doc_text">
+<p>New features and major changes in the X86 target include:
+</p>
+
+<ul>
+<li>The X86 backend now supports holding X87 floating point stack values
+    in registers across basic blocks, dramatically improving performance of code
+    that uses long double, and when targeting CPUs that don't support SSE.</li>
+
+<li>The X86 backend now uses a SSEDomainFix pass to optimize SSE operations.  On
+    Nehalem ("Core i7") and newer CPUs there is a 2 cycle latency penalty on
+    using a register in a different domain than where it was defined. This pass
+    optimizes away these stalls.</li>
+
+<li>The X86 backend now promotes 16-bit integer operations to 32-bits when
+    possible. This avoids 0x66 prefixes, which are slow on some
+    microarchitectures and bloat the code on all of them.</li>
+
+<li>The X86 backend now supports the Microsoft "thiscall" calling convention,
+    and a <a href="LangRef.html#callingconv">calling convention</a> to support
+    <a href="#GHC">ghc</a>.</li>
+
+<li>The X86 backend supports a new "llvm.x86.int" intrinsic, which maps onto
+    the X86 "int $42" and "int3" instructions.</li>
+
+<li>At the IR level, the &lt;2 x float&gt; datatype is now promoted and passed
+    around as a &lt;4 x float&gt; instead of being passed and returned as an MMX
+    vector.  If you have a frontend that uses this, please pass and return a
+    &lt;2 x i32&gt; instead (using bitcasts).</li>
+
+<li>When printing .s files in verbose assembly mode (the default for clang -S),
+    the X86 backend now decodes X86 shuffle instructions and prints human
+    readable comments after the most inscrutable of them, e.g.:
+    
+<pre>
+  insertps $113, %xmm3, %xmm0 <i># xmm0 = zero,xmm0[1,2],xmm3[1]</i>
+  unpcklps %xmm1, %xmm0       <i># xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]</i>
+  pshufd   $1, %xmm1, %xmm1   <i># xmm1 = xmm1[1,0,0,0]</i>
+</pre>
+</li>
+        
+</ul>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="ARM">ARM Target Improvements</a>
+</div>
+
+<div class="doc_text">
+<p>New features of the ARM target include:
+</p>
+
+<ul>
+<li>The ARM backend now optimizes tail calls into jumps.</li>
+<li>Scheduling is improved through the new list-hybrid scheduler as well
+    as through better modeling of structural hazards.</li>
+<li><a href="LangRef.html#int_fp16">Half float</a> instructions are now
+    supported.</li>
+<li>NEON support has been improved to model instructions which operate onto 
+    multiple consecutive registers more aggressively.  This avoids lots of
+    extraneous register copies.</li>
+<li>The ARM backend now uses a new "ARMGlobalMerge" pass, which merges several
+    global variables into one, saving extra address computation (all the global
+    variables can be accessed via same base address) and potentially reducing
+    register pressure.</li>
+
+<li>The ARM backend has received many minor improvements and tweaks which lead
+    to substantially better performance in a wide range of different scenarios.
+</li>
+
+<li>The ARM NEON intrinsics have been substantially reworked to reduce
+    redundancy and improve code generation.  Some of the major changes are:
+  <ol>
+  <li>
+    All of the NEON load and store intrinsics (llvm.arm.neon.vld* and
+    llvm.arm.neon.vst*) take an extra parameter to specify the alignment in bytes
+    of the memory being accessed.
+  </li>
+  <li>
+    The llvm.arm.neon.vaba intrinsic (vector absolute difference and
+    accumulate) has been removed.  This operation is now represented using
+    the llvm.arm.neon.vabd intrinsic (vector absolute difference) followed by a
+    vector add.
+  </li>
+  <li>
+    The llvm.arm.neon.vabdl and llvm.arm.neon.vabal intrinsics (lengthening
+    vector absolute difference with and without accumulation) have been removed.
+    They are represented using the llvm.arm.neon.vabd intrinsic (vector absolute
+    difference) followed by a vector zero-extend operation, and for vabal,
+    a vector add.
+  </li>
+  <li>
+    The llvm.arm.neon.vmovn intrinsic has been removed.  Calls of this intrinsic
+    are now replaced by vector truncate operations.
+  </li>
+  <li>
+    The llvm.arm.neon.vmovls and llvm.arm.neon.vmovlu intrinsics have been
+    removed.  They are now represented as vector sign-extend (vmovls) and
+    zero-extend (vmovlu) operations.
+  </li>
+  <li>
+    The llvm.arm.neon.vaddl*, llvm.arm.neon.vaddw*, llvm.arm.neon.vsubl*, and
+    llvm.arm.neon.vsubw* intrinsics (lengthening vector add and subtract) have
+    been removed.  They are replaced by vector add and vector subtract operations
+    where one (vaddw, vsubw) or both (vaddl, vsubl) of the operands are either
+    sign-extended or zero-extended.
+  </li>
+  <li>
+    The llvm.arm.neon.vmulls, llvm.arm.neon.vmullu, llvm.arm.neon.vmlal*, and
+    llvm.arm.neon.vmlsl* intrinsics (lengthening vector multiply with and without
+    accumulation and subtraction) have been removed.  These operations are now
+    represented as vector multiplications where the operands are either
+    sign-extended or zero-extended, followed by a vector add for vmlal or a
+    vector subtract for vmlsl.  Note that the polynomial vector multiply
+    intrinsic, llvm.arm.neon.vmullp, remains unchanged.
+  </li>
+  </ol>
+</li>
+
+</ul>
+</div>
+
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="changes">Major Changes and Removed Features</a>
+</div>
+
+<div class="doc_text">
+
+<p>If you're already an LLVM user or developer with out-of-tree changes based
+on LLVM 2.7, this section lists some "gotchas" that you may run into upgrading
+from the previous release.</p>
+
+<ul>
+<li>The build configuration machinery changed the output directory names.  It
+    wasn't clear to many people that a "Release-Asserts" build was a release build
+    without asserts.  To make this more clear, "Release" does not include
+    assertions and "Release+Asserts" does (likewise, "Debug" and
+    "Debug+Asserts").</li>
+<li>The MSIL Backend was removed, it was unsupported and broken.</li>
+<li>The ABCD, SSI, and SCCVN passes were removed.  These were not fully
+    functional and their behavior has been or will be subsumed by the
+    LazyValueInfo  pass.</li>
+<li>The LLVM IR 'Union' feature was removed.  While this is a desirable feature
+    for LLVM IR to support, the existing implementation was half baked and
+    barely useful.  We'd really like anyone interested to resurrect the work and
+    finish it for a future release.</li>
+<li>If you're used to reading .ll files, you'll probably notice that .ll file
+    dumps don't produce #uses comments anymore.  To get them, run a .bc file
+    through "llvm-dis --show-annotations".</li>
+<li>Target triples are now stored in a normalized form, and all inputs from
+    humans are expected to be normalized by Triple::normalize before being
+    stored in a module triple or passed to another library.</li>
+</ul>
+
+
+
+<p>In addition, many APIs have changed in this release.  Some of the major LLVM
+API changes are:</p>
+<ul>
+<li>LLVM 2.8 changes the internal order of operands in <a
+  href="http://llvm.org/doxygen/classllvm_1_1InvokeInst.html"><tt>InvokeInst</tt></a>
+  and <a href="http://llvm.org/doxygen/classllvm_1_1CallInst.html"><tt>CallInst</tt></a>.
+  To be portable across releases, please use the <tt>CallSite</tt> class and the
+  high-level accessors, such as <tt>getCalledValue</tt> and
+  <tt>setUnwindDest</tt>.
+</li>
+<li>
+  You can no longer pass use_iterators directly to cast&lt;&gt; (and similar),
+  because these routines tend to perform costly dereference operations more
+  than once. You have to dereference the iterators yourself and pass them in.
+</li>
+<li>
+  llvm.memcpy.*, llvm.memset.*, llvm.memmove.* intrinsics take an extra
+  parameter now ("i1 isVolatile"), totaling 5 parameters, and the pointer
+  operands are now address-space qualified.
+  If you were creating these intrinsic calls and prototypes yourself (as opposed
+  to using Intrinsic::getDeclaration), you can use
+  UpgradeIntrinsicFunction/UpgradeIntrinsicCall to be portable across releases.
+</li>
+<li>
+  SetCurrentDebugLocation takes a DebugLoc now instead of a MDNode.
+  Change your code to use
+  SetCurrentDebugLocation(DebugLoc::getFromDILocation(...)).
+</li>
+<li>
+  The <tt>RegisterPass</tt> and <tt>RegisterAnalysisGroup</tt> templates are
+  considered deprecated, but continue to function in LLVM 2.8.  Clients are  
+  strongly advised to use the upcoming <tt>INITIALIZE_PASS()</tt> and
+  <tt>INITIALIZE_AG_PASS()</tt> macros instead.
+</li>
+<li>
+  The constructor for the Triple class no longer tries to understand odd triple
+  specifications.  Frontends should ensure that they only pass valid triples to
+  LLVM.  The Triple::normalize utility method has been added to help front-ends
+  deal with funky triples.
+</li>
+<li>
+  The signature of the <tt>GCMetadataPrinter::finishAssembly</tt> virtual
+  function changed: the <tt>raw_ostream</tt> and <tt>MCAsmInfo</tt> arguments
+  were dropped.  GC plugins which compute stack maps must be updated to avoid
+  having the old definition overload the new signature.
+</li>
+<li>
+  The signature of <tt>MemoryBuffer::getMemBuffer</tt> changed.  Unfortunately
+  calls intended for the old version still compile, but will not work correctly,
+  leading to a confusing error about an invalid header in the bitcode.
+</li>
+  
+<li>
+  Some APIs were renamed:
+  <ul>
+  <li>llvm_report_error -&gt; report_fatal_error</li>
+  <li>llvm_install_error_handler -&gt; install_fatal_error_handler</li>
+  <li>llvm::DwarfExceptionHandling -&gt; llvm::JITExceptionHandling</li>
+  <li>VISIBILITY_HIDDEN -&gt; LLVM_LIBRARY_VISIBILITY</li>
+  </ul>
+</li>
+
+<li>
+  Some public headers were renamed:
+  <ul>
+    <li><tt>llvm/Assembly/AsmAnnotationWriter.h</tt> was renamed
+    to <tt>llvm/Assembly/AssemblyAnnotationWriter.h</tt>
+    </li>
+  </ul>
+</ul>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="devtree_changes">Development Infrastructure Changes</a>
+</div>
+
+<div class="doc_text">
+
+<p>This section lists changes to the LLVM development infrastructure. This
+mostly impacts users who actively work on LLVM or follow development on
+mainline, but may also impact users who leverage the LLVM build infrastructure
+or are interested in LLVM qualification.</p>
+
+<ul>
+  <li>The default for <tt>make check</tt> is now to use
+  the <a href="http://llvm.org/cmds/lit.html">lit</a> testing tool, which is
+  part of LLVM itself. You can use <tt>lit</tt> directly as well, or use
+  the <tt>llvm-lit</tt> tool which is created as part of a Makefile or CMake
+  build (and knows how to find the appropriate tools). See the <tt>lit</tt>
+  documentation and the <a href="http://blog.llvm.org/2009/12/lit-it.html">blog
+  post</a>, and <a href="http://llvm.org/bugs/show_bug.cgi?id=5217">PR5217</a>
+  for more information.</li>
+
+  <li>The LLVM <tt>test-suite</tt> infrastructure has a new "simple" test format
+  (<tt>make TEST=simple</tt>). The new format is intended to require only a
+  compiler and not a full set of LLVM tools. This makes it useful for testing
+  released compilers, for running the test suite with other compilers (for
+  performance comparisons), and makes sure that we are testing the compiler as
+  users would see it. The new format is also designed to work using reference
+  outputs instead of comparison to a baseline compiler, which makes it run much
+  faster and makes it less system dependent.</li>
+
+  <li>Significant progress has been made on a new interface to running the
+  LLVM <tt>test-suite</tt> (aka the LLVM "nightly tests") using
+  the <a href="http://llvm.org/docs/lnt">LNT</a> infrastructure. The LNT
+  interface to the <tt>test-suite</tt> brings significantly improved reporting
+  capabilities for monitoring the correctness and generated code quality
+  produced by LLVM over time.</li>
+</ul>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="knownproblems">Known Problems</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section contains significant known problems with the LLVM system,
+listed by component.  If you run into a problem, please check the <a
+href="http://llvm.org/bugs/">LLVM bug database</a> and submit a bug if
+there isn't already one.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="experimental">Experimental features included with this release</a>
+</div>
+
+<div class="doc_text">
+
+<p>The following components of this LLVM release are either untested, known to
+be broken or unreliable, or are in early development.  These components should
+not be relied on, and bugs should not be filed against them, but they may be
+useful to some people.  In particular, if you would like to work on one of these
+components, please contact us on the <a
+href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev list</a>.</p>
+
+<ul>
+<li>The Alpha, Blackfin, CellSPU, MicroBlaze, MSP430, MIPS, SystemZ
+    and XCore backends are experimental.</li>
+<li><tt>llc</tt> "<tt>-filetype=obj</tt>" is experimental on all targets
+    other than darwin-i386 and darwin-x86_64.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="x86-be">Known problems with the X86 back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+  <li>The X86 backend does not yet support
+    all <a href="http://llvm.org/PR879">inline assembly that uses the X86
+    floating point stack</a>.  It supports the 'f' and 't' constraints, but not
+    'u'.</li>
+  <li>Win64 code generation wasn't widely tested. Everything should work, but we
+    expect small issues to happen. Also, llvm-gcc cannot build the mingw64
+    runtime currently due to lack of support for the 'u' inline assembly
+    constraint and for X87 floating point inline assembly.</li>
+  <li>The X86-64 backend does not yet support the LLVM IR instruction
+      <tt>va_arg</tt>. Currently, front-ends support variadic
+      argument constructs on X86-64 by lowering them manually.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ppc-be">Known problems with the PowerPC back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+<li>The Linux PPC32/ABI support needs testing for the interpreter and static
+compilation, and lacks support for debug information.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="arm-be">Known problems with the ARM back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+<li>Thumb mode works only on ARMv6 or higher processors. On sub-ARMv6
+processors, thumb programs can crash or produce wrong
+results (<a href="http://llvm.org/PR1388">PR1388</a>).</li>
+<li>Compilation for ARM Linux OABI (old ABI) is supported but not fully tested.
+</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="sparc-be">Known problems with the SPARC back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+<li>The SPARC backend only supports the 32-bit SPARC ABI (-m32); it does not
+    support the 64-bit SPARC ABI (-m64).</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="mips-be">Known problems with the MIPS back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+<li>64-bit MIPS targets are not supported yet.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="alpha-be">Known problems with the Alpha back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+
+<li>On 21164s, some rare FP arithmetic sequences which may trap do not have the
+appropriate nops inserted to ensure restartability.</li>
+
+</ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="c-be">Known problems with the C back-end</a>
+</div>
+
+<div class="doc_text">
+
+<p>The C backend has numerous problems and is not being actively maintained.
+Depending on it for anything serious is not advised.</p>
+
+<ul>
+<li><a href="http://llvm.org/PR802">The C backend has only basic support for
+    inline assembly code</a>.</li>
+<li><a href="http://llvm.org/PR1658">The C backend violates the ABI of common
+    C++ programs</a>, preventing intermixing between C++ compiled by the CBE and
+    C++ code compiled with <tt>llc</tt> or native compilers.</li>
+<li>The C backend does not support all exception handling constructs.</li>
+<li>The C backend does not support arbitrary precision integers.</li>
+</ul>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="llvm-gcc">Known problems with the llvm-gcc front-end</a>
+</div>
+
+<div class="doc_text">
+
+<p>llvm-gcc is generally very stable for the C family of languages.  The only
+   major language feature of GCC not supported by llvm-gcc is the
+   <tt>__builtin_apply</tt> family of builtins.   However, some extensions
+   are only supported on some targets.  For example, trampolines are only
+   supported on some targets (these are used when you take the address of a
+   nested function).</p>
+
+<p>Fortran support generally works, but there are still several unresolved bugs
+   in <a href="http://llvm.org/bugs/">Bugzilla</a>.  Please see the
+   tools/gfortran component for details.  Note that llvm-gcc is missing major
+   Fortran performance work in the frontend and library that went into GCC after
+   4.2.  If you are interested in Fortran, we recommend that you consider using
+   <a href="#dragonegg">dragonegg</a> instead.</p>
+
+<p>The llvm-gcc 4.2 Ada compiler has basic functionality, but is no longer being
+actively maintained.  If you are interested in Ada, we recommend that you
+consider using <a href="#dragonegg">dragonegg</a> instead.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="additionalinfo">Additional Information</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>A wide variety of additional information is available on the <a
+href="http://llvm.org">LLVM web page</a>, in particular in the <a
+href="http://llvm.org/docs/">documentation</a> section.  The web page also
+contains versions of the API documentation which is up-to-date with the
+Subversion version of the source code.
+You can access versions of these documents specific to this release by going
+into the "<tt>llvm/doc/</tt>" directory in the LLVM tree.</p>
+
+<p>If you have any questions or comments about LLVM, please feel free to contact
+us via the <a href="http://llvm.org/docs/#maillist"> mailing
+lists</a>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/SourceLevelDebugging.html b/final/docs/SourceLevelDebugging.html
new file mode 100644
index 00000000000..3f88e2fc391
--- /dev/null
+++ b/final/docs/SourceLevelDebugging.html
@@ -0,0 +1,1783 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>Source Level Debugging with LLVM</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">Source Level Debugging with LLVM</div>
+
+<table class="layout" style="width:100%">
+  <tr class="layout">
+    <td class="left">
+<ul>
+  <li><a href="#introduction">Introduction</a>
+  <ol>
+    <li><a href="#phil">Philosophy behind LLVM debugging information</a></li>
+    <li><a href="#consumers">Debug information consumers</a></li>
+    <li><a href="#debugopt">Debugging optimized code</a></li>
+  </ol></li>
+  <li><a href="#format">Debugging information format</a>
+  <ol>
+    <li><a href="#debug_info_descriptors">Debug information descriptors</a>
+    <ul>
+      <li><a href="#format_compile_units">Compile unit descriptors</a></li>
+      <li><a href="#format_files">File descriptors</a></li>
+      <li><a href="#format_global_variables">Global variable descriptors</a></li>
+      <li><a href="#format_subprograms">Subprogram descriptors</a></li>
+      <li><a href="#format_blocks">Block descriptors</a></li>
+      <li><a href="#format_basic_type">Basic type descriptors</a></li>
+      <li><a href="#format_derived_type">Derived type descriptors</a></li>
+      <li><a href="#format_composite_type">Composite type descriptors</a></li>
+      <li><a href="#format_subrange">Subrange descriptors</a></li>
+      <li><a href="#format_enumeration">Enumerator descriptors</a></li>
+      <li><a href="#format_variables">Local variables</a></li>
+    </ul></li>
+    <li><a href="#format_common_intrinsics">Debugger intrinsic functions</a>
+      <ul>
+      <li><a href="#format_common_declare">llvm.dbg.declare</a></li>
+      <li><a href="#format_common_value">llvm.dbg.value</a></li>
+    </ul></li>
+  </ol></li>
+  <li><a href="#format_common_lifetime">Object lifetimes and scoping</a></li>
+  <li><a href="#ccxx_frontend">C/C++ front-end specific debug information</a>
+  <ol>
+    <li><a href="#ccxx_compile_units">C/C++ source file information</a></li>
+    <li><a href="#ccxx_global_variable">C/C++ global variable information</a></li>
+    <li><a href="#ccxx_subprogram">C/C++ function information</a></li>
+    <li><a href="#ccxx_basic_types">C/C++ basic types</a></li>
+    <li><a href="#ccxx_derived_types">C/C++ derived types</a></li>
+    <li><a href="#ccxx_composite_types">C/C++ struct/union types</a></li>
+    <li><a href="#ccxx_enumeration_types">C/C++ enumeration types</a></li>
+  </ol></li>
+</ul>
+</td>
+<td class="right">
+<img src="img/venusflytrap.jpg" alt="A leafy and green bug eater" width="247"
+height="369">
+</td>
+</tr></table>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
+            and <a href="mailto:jlaskey@mac.com">Jim Laskey</a></p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="introduction">Introduction</a></div> 
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document is the central repository for all information pertaining to
+   debug information in LLVM.  It describes the <a href="#format">actual format
+   that the LLVM debug information</a> takes, which is useful for those
+   interested in creating front-ends or dealing directly with the information.
+   Further, this document provides specific examples of what debug information
+   for C/C++ looks like.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="phil">Philosophy behind LLVM debugging information</a>
+</div>
+
+<div class="doc_text">
+
+<p>The idea of the LLVM debugging information is to capture how the important
+   pieces of the source-language's Abstract Syntax Tree map onto LLVM code.
+   Several design aspects have shaped the solution that appears here.  The
+   important ones are:</p>
+
+<ul>
+  <li>Debugging information should have very little impact on the rest of the
+      compiler.  No transformations, analyses, or code generators should need to
+      be modified because of debugging information.</li>
+
+  <li>LLVM optimizations should interact in <a href="#debugopt">well-defined and
+      easily described ways</a> with the debugging information.</li>
+
+  <li>Because LLVM is designed to support arbitrary programming languages,
+      LLVM-to-LLVM tools should not need to know anything about the semantics of
+      the source-level-language.</li>
+
+  <li>Source-level languages are often <b>widely</b> different from one another.
+      LLVM should not put any restrictions of the flavor of the source-language,
+      and the debugging information should work with any language.</li>
+
+  <li>With code generator support, it should be possible to use an LLVM compiler
+      to compile a program to native machine code and standard debugging
+      formats.  This allows compatibility with traditional machine-code level
+      debuggers, like GDB or DBX.</li>
+</ul>
+
+<p>The approach used by the LLVM implementation is to use a small set
+   of <a href="#format_common_intrinsics">intrinsic functions</a> to define a
+   mapping between LLVM program objects and the source-level objects.  The
+   description of the source-level program is maintained in LLVM metadata
+   in an <a href="#ccxx_frontend">implementation-defined format</a>
+   (the C/C++ front-end currently uses working draft 7 of
+   the <a href="http://www.eagercon.com/dwarf/dwarf3std.htm">DWARF 3
+   standard</a>).</p>
+
+<p>When a program is being debugged, a debugger interacts with the user and
+   turns the stored debug information into source-language specific information.
+   As such, a debugger must be aware of the source-language, and is thus tied to
+   a specific language or family of languages.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="consumers">Debug information consumers</a>
+</div>
+
+<div class="doc_text">
+
+<p>The role of debug information is to provide meta information normally
+   stripped away during the compilation process.  This meta information provides
+   an LLVM user a relationship between generated code and the original program
+   source code.</p>
+
+<p>Currently, debug information is consumed by DwarfDebug to produce dwarf
+   information used by the gdb debugger.  Other targets could use the same
+   information to produce stabs or other debug forms.</p>
+
+<p>It would also be reasonable to use debug information to feed profiling tools
+   for analysis of generated code, or, tools for reconstructing the original
+   source from generated code.</p>
+
+<p>TODO - expound a bit more.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="debugopt">Debugging optimized code</a>
+</div>
+
+<div class="doc_text">
+
+<p>An extremely high priority of LLVM debugging information is to make it
+   interact well with optimizations and analysis.  In particular, the LLVM debug
+   information provides the following guarantees:</p>
+
+<ul>
+  <li>LLVM debug information <b>always provides information to accurately read
+      the source-level state of the program</b>, regardless of which LLVM
+      optimizations have been run, and without any modification to the
+      optimizations themselves.  However, some optimizations may impact the
+      ability to modify the current state of the program with a debugger, such
+      as setting program variables, or calling functions that have been
+      deleted.</li>
+
+  <li>LLVM optimizations gracefully interact with debugging information.  If
+      they are not aware of debug information, they are automatically disabled
+      as necessary in the cases that would invalidate the debug info.  This
+      retains the LLVM features, making it easy to write new
+      transformations.</li>
+
+  <li>As desired, LLVM optimizations can be upgraded to be aware of the LLVM
+      debugging information, allowing them to update the debugging information
+      as they perform aggressive optimizations.  This means that, with effort,
+      the LLVM optimizers could optimize debug code just as well as non-debug
+      code.</li>
+
+  <li>LLVM debug information does not prevent many important optimizations from
+      happening (for example inlining, basic block reordering/merging/cleanup,
+      tail duplication, etc), further reducing the amount of the compiler that
+      eventually is "aware" of debugging information.</li>
+
+  <li>LLVM debug information is automatically optimized along with the rest of
+      the program, using existing facilities.  For example, duplicate
+      information is automatically merged by the linker, and unused information
+      is automatically removed.</li>
+</ul>
+
+<p>Basically, the debug information allows you to compile a program with
+   "<tt>-O0 -g</tt>" and get full debug information, allowing you to arbitrarily
+   modify the program as it executes from a debugger.  Compiling a program with
+   "<tt>-O3 -g</tt>" gives you full debug information that is always available
+   and accurate for reading (e.g., you get accurate stack traces despite tail
+   call elimination and inlining), but you might lose the ability to modify the
+   program and call functions where were optimized out of the program, or
+   inlined away completely.</p>
+
+<p><a href="TestingGuide.html#quicktestsuite">LLVM test suite</a> provides a
+   framework to test optimizer's handling of debugging information. It can be
+   run like this:</p>
+
+<div class="doc_code">
+<pre>
+% cd llvm/projects/test-suite/MultiSource/Benchmarks  # or some other level
+% make TEST=dbgopt
+</pre>
+</div>
+
+<p>This will test impact of debugging information on optimization passes. If
+   debugging information influences optimization passes then it will be reported
+   as a failure. See <a href="TestingGuide.html">TestingGuide</a> for more
+   information on LLVM test infrastructure and how to run various tests.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="format">Debugging information format</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>LLVM debugging information has been carefully designed to make it possible
+   for the optimizer to optimize the program and debugging information without
+   necessarily having to know anything about debugging information.  In
+   particular, the use of metadata avoids duplicated debugging information from
+   the beginning, and the global dead code elimination pass automatically 
+   deletes debugging information for a function if it decides to delete the 
+   function. </p>
+
+<p>To do this, most of the debugging information (descriptors for types,
+   variables, functions, source files, etc) is inserted by the language
+   front-end in the form of LLVM metadata. </p>
+
+<p>Debug information is designed to be agnostic about the target debugger and
+   debugging information representation (e.g. DWARF/Stabs/etc).  It uses a
+   generic pass to decode the information that represents variables, types, 
+   functions, namespaces, etc: this allows for arbitrary source-language 
+   semantics and type-systems to be used, as long as there is a module 
+   written for the target debugger to interpret the information. </p>
+
+<p>To provide basic functionality, the LLVM debugger does have to make some
+   assumptions about the source-level language being debugged, though it keeps
+   these to a minimum.  The only common features that the LLVM debugger assumes
+   exist are <a href="#format_files">source files</a>,
+   and <a href="#format_global_variables">program objects</a>.  These abstract
+   objects are used by a debugger to form stack traces, show information about
+   local variables, etc.</p>
+
+<p>This section of the documentation first describes the representation aspects
+   common to any source-language.  The <a href="#ccxx_frontend">next section</a>
+   describes the data layout conventions used by the C and C++ front-ends.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="debug_info_descriptors">Debug information descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<p>In consideration of the complexity and volume of debug information, LLVM
+   provides a specification for well formed debug descriptors. </p>
+
+<p>Consumers of LLVM debug information expect the descriptors for program
+   objects to start in a canonical format, but the descriptors can include
+   additional information appended at the end that is source-language
+   specific. All LLVM debugging information is versioned, allowing backwards
+   compatibility in the case that the core structures need to change in some
+   way.  Also, all debugging information objects start with a tag to indicate
+   what type of object it is.  The source-language is allowed to define its own
+   objects, by using unreserved tag numbers.  We recommend using with tags in
+   the range 0x1000 through 0x2000 (there is a defined enum DW_TAG_user_base =
+   0x1000.)</p>
+
+<p>The fields of debug descriptors used internally by LLVM 
+   are restricted to only the simple data types <tt>i32</tt>, <tt>i1</tt>,
+   <tt>float</tt>, <tt>double</tt>, <tt>mdstring</tt> and <tt>mdnode</tt>. </p>
+
+<div class="doc_code">
+<pre>
+!1 = metadata !{
+  i32,   ;; A tag
+  ...
+}
+</pre>
+</div>
+
+<p><a name="LLVMDebugVersion">The first field of a descriptor is always an
+   <tt>i32</tt> containing a tag value identifying the content of the
+   descriptor.  The remaining fields are specific to the descriptor.  The values
+   of tags are loosely bound to the tag values of DWARF information entries.
+   However, that does not restrict the use of the information supplied to DWARF
+   targets.  To facilitate versioning of debug information, the tag is augmented
+   with the current debug version (LLVMDebugVersion = 8 &lt;&lt; 16 or 0x80000 or
+   524288.)</a></p>
+
+<p>The details of the various descriptors follow.</p>  
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="format_compile_units">Compile unit descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!0 = metadata !{
+  i32,       ;; Tag = 17 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+             ;; (DW_TAG_compile_unit)
+  i32,       ;; Unused field. 
+  i32,       ;; DWARF language identifier (ex. DW_LANG_C89) 
+  metadata,  ;; Source file name
+  metadata,  ;; Source file directory (includes trailing slash)
+  metadata   ;; Producer (ex. "4.0.1 LLVM (LLVM research group)")
+  i1,        ;; True if this is a main compile unit. 
+  i1,        ;; True if this is optimized.
+  metadata,  ;; Flags
+  i32        ;; Runtime version
+}
+</pre>
+</div>
+
+<p>These descriptors contain a source language ID for the file (we use the DWARF
+   3.0 ID numbers, such as <tt>DW_LANG_C89</tt>, <tt>DW_LANG_C_plus_plus</tt>,
+   <tt>DW_LANG_Cobol74</tt>, etc), three strings describing the filename,
+   working directory of the compiler, and an identifier string for the compiler
+   that produced it.</p>
+
+<p>Compile unit descriptors provide the root context for objects declared in a
+   specific compilation unit. File descriptors are defined using this context.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="format_files">File descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!0 = metadata !{
+  i32,       ;; Tag = 41 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+             ;; (DW_TAG_file_type)
+  metadata,  ;; Source file name
+  metadata,  ;; Source file directory (includes trailing slash)
+  metadata   ;; Reference to compile unit where defined
+}
+</pre>
+</div>
+
+<p>These descriptors contain information for a file. Global variables and top
+   level functions would be defined using this context.k File descriptors also
+   provide context for source line correspondence. </p>
+
+<p>Each input file is encoded as a separate file descriptor in LLVM debugging
+   information output. Each file descriptor would be defined using a 
+   compile unit. </p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="format_global_variables">Global variable descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!1 = metadata !{
+  i32,      ;; Tag = 52 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+            ;; (DW_TAG_variable)
+  i32,      ;; Unused field.
+  metadata, ;; Reference to context descriptor
+  metadata, ;; Name
+  metadata, ;; Display name (fully qualified C++ name)
+  metadata, ;; MIPS linkage name (for C++)
+  metadata, ;; Reference to file where defined
+  i32,      ;; Line number where defined
+  metadata, ;; Reference to type descriptor
+  i1,       ;; True if the global is local to compile unit (static)
+  i1,       ;; True if the global is defined in the compile unit (not extern)
+  {}*       ;; Reference to the global variable
+}
+</pre>
+</div>
+
+<p>These descriptors provide debug information about globals variables.  The
+provide details such as name, type and where the variable is defined.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="format_subprograms">Subprogram descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!2 = metadata !{
+  i32,      ;; Tag = 46 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
+            ;; (DW_TAG_subprogram)
+  i32,      ;; Unused field.
+  metadata, ;; Reference to context descriptor
+  metadata, ;; Name
+  metadata, ;; Display name (fully qualified C++ name)
+  metadata, ;; MIPS linkage name (for C++)
+  metadata, ;; Reference to file where defined
+  i32,      ;; Line number where defined
+  metadata, ;; Reference to type descriptor
+  i1,       ;; True if the global is local to compile unit (static)
+  i1        ;; True if the global is defined in the compile unit (not extern)
+  i32       ;; Virtuality, e.g. dwarf::DW_VIRTUALITY__virtual
+  i32       ;; Index into a virtual function
+  metadata, ;; indicates which base type contains the vtable pointer for the 
+            ;; derived class
+  i1        ;; isArtificial
+  i1        ;; isOptimized
+  Function *;; Pointer to LLVM function
+}
+</pre>
+</div>
+
+<p>These descriptors provide debug information about functions, methods and
+   subprograms.  They provide details such as name, return types and the source
+   location where the subprogram is defined.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="format_blocks">Block descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!3 = metadata !{
+  i32,     ;; Tag = 11 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_lexical_block)
+  metadata,;; Reference to context descriptor
+  i32,     ;; Line number
+  i32      ;; Column number
+}
+</pre>
+</div>
+
+<p>These descriptors provide debug information about nested blocks within a
+   subprogram. The line number and column numbers are used to dinstinguish
+   two lexical blocks at same depth. </p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="format_basic_type">Basic type descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!4 = metadata !{
+  i32,      ;; Tag = 36 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+            ;; (DW_TAG_base_type)
+  metadata, ;; Reference to context (typically a compile unit)
+  metadata, ;; Name (may be "" for anonymous types)
+  metadata, ;; Reference to file where defined (may be NULL)
+  i32,      ;; Line number where defined (may be 0)
+  i64,      ;; Size in bits
+  i64,      ;; Alignment in bits
+  i64,      ;; Offset in bits
+  i32,      ;; Flags
+  i32       ;; DWARF type encoding
+}
+</pre>
+</div>
+
+<p>These descriptors define primitive types used in the code. Example int, bool
+   and float.  The context provides the scope of the type, which is usually the
+   top level.  Since basic types are not usually user defined the compile unit
+   and line number can be left as NULL and 0.  The size, alignment and offset
+   are expressed in bits and can be 64 bit values.  The alignment is used to
+   round the offset when embedded in a
+   <a href="#format_composite_type">composite type</a> (example to keep float
+   doubles on 64 bit boundaries.) The offset is the bit offset if embedded in
+   a <a href="#format_composite_type">composite type</a>.</p>
+
+<p>The type encoding provides the details of the type.  The values are typically
+   one of the following:</p>
+
+<div class="doc_code">
+<pre>
+DW_ATE_address       = 1
+DW_ATE_boolean       = 2
+DW_ATE_float         = 4
+DW_ATE_signed        = 5
+DW_ATE_signed_char   = 6
+DW_ATE_unsigned      = 7
+DW_ATE_unsigned_char = 8
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="format_derived_type">Derived type descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!5 = metadata !{
+  i32,      ;; Tag (see below)
+  metadata, ;; Reference to context
+  metadata, ;; Name (may be "" for anonymous types)
+  metadata, ;; Reference to file where defined (may be NULL)
+  i32,      ;; Line number where defined (may be 0)
+  i64,      ;; Size in bits
+  i64,      ;; Alignment in bits
+  i64,      ;; Offset in bits
+  metadata  ;; Reference to type derived from
+}
+</pre>
+</div>
+
+<p>These descriptors are used to define types derived from other types.  The
+value of the tag varies depending on the meaning.  The following are possible
+tag values:</p>
+
+<div class="doc_code">
+<pre>
+DW_TAG_formal_parameter = 5
+DW_TAG_member           = 13
+DW_TAG_pointer_type     = 15
+DW_TAG_reference_type   = 16
+DW_TAG_typedef          = 22
+DW_TAG_const_type       = 38
+DW_TAG_volatile_type    = 53
+DW_TAG_restrict_type    = 55
+</pre>
+</div>
+
+<p><tt>DW_TAG_member</tt> is used to define a member of
+   a <a href="#format_composite_type">composite type</a>
+   or <a href="#format_subprograms">subprogram</a>.  The type of the member is
+   the <a href="#format_derived_type">derived
+   type</a>. <tt>DW_TAG_formal_parameter</tt> is used to define a member which
+   is a formal argument of a subprogram.</p>
+
+<p><tt>DW_TAG_typedef</tt> is used to provide a name for the derived type.</p>
+
+<p><tt>DW_TAG_pointer_type</tt>,<tt>DW_TAG_reference_type</tt>,
+   <tt>DW_TAG_const_type</tt>, <tt>DW_TAG_volatile_type</tt>
+   and <tt>DW_TAG_restrict_type</tt> are used to qualify
+   the <a href="#format_derived_type">derived type</a>. </p>
+
+<p><a href="#format_derived_type">Derived type</a> location can be determined
+   from the compile unit and line number.  The size, alignment and offset are
+   expressed in bits and can be 64 bit values.  The alignment is used to round
+   the offset when embedded in a <a href="#format_composite_type">composite
+   type</a> (example to keep float doubles on 64 bit boundaries.) The offset is
+   the bit offset if embedded in a <a href="#format_composite_type">composite
+   type</a>.</p>
+
+<p>Note that the <tt>void *</tt> type is expressed as a type derived from NULL.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="format_composite_type">Composite type descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!6 = metadata !{
+  i32,      ;; Tag (see below)
+  metadata, ;; Reference to context
+  metadata, ;; Name (may be "" for anonymous types)
+  metadata, ;; Reference to file where defined (may be NULL)
+  i32,      ;; Line number where defined (may be 0)
+  i64,      ;; Size in bits
+  i64,      ;; Alignment in bits
+  i64,      ;; Offset in bits
+  i32,      ;; Flags
+  metadata, ;; Reference to type derived from
+  metadata, ;; Reference to array of member descriptors
+  i32       ;; Runtime languages
+}
+</pre>
+</div>
+
+<p>These descriptors are used to define types that are composed of 0 or more
+elements.  The value of the tag varies depending on the meaning.  The following
+are possible tag values:</p>
+
+<div class="doc_code">
+<pre>
+DW_TAG_array_type       = 1
+DW_TAG_enumeration_type = 4
+DW_TAG_structure_type   = 19
+DW_TAG_union_type       = 23
+DW_TAG_vector_type      = 259
+DW_TAG_subroutine_type  = 21
+DW_TAG_inheritance      = 28
+</pre>
+</div>
+
+<p>The vector flag indicates that an array type is a native packed vector.</p>
+
+<p>The members of array types (tag = <tt>DW_TAG_array_type</tt>) or vector types
+   (tag = <tt>DW_TAG_vector_type</tt>) are <a href="#format_subrange">subrange
+   descriptors</a>, each representing the range of subscripts at that level of
+   indexing.</p>
+
+<p>The members of enumeration types (tag = <tt>DW_TAG_enumeration_type</tt>) are
+   <a href="#format_enumeration">enumerator descriptors</a>, each representing
+   the definition of enumeration value for the set.</p>
+
+<p>The members of structure (tag = <tt>DW_TAG_structure_type</tt>) or union (tag
+   = <tt>DW_TAG_union_type</tt>) types are any one of
+   the <a href="#format_basic_type">basic</a>,
+   <a href="#format_derived_type">derived</a>
+   or <a href="#format_composite_type">composite</a> type descriptors, each
+   representing a field member of the structure or union.</p>
+
+<p>For C++ classes (tag = <tt>DW_TAG_structure_type</tt>), member descriptors
+   provide information about base classes, static members and member
+   functions. If a member is a <a href="#format_derived_type">derived type
+   descriptor</a> and has a tag of <tt>DW_TAG_inheritance</tt>, then the type
+   represents a base class. If the member of is
+   a <a href="#format_global_variables">global variable descriptor</a> then it
+   represents a static member.  And, if the member is
+   a <a href="#format_subprograms">subprogram descriptor</a> then it represents
+   a member function.  For static members and member
+   functions, <tt>getName()</tt> returns the members link or the C++ mangled
+   name.  <tt>getDisplayName()</tt> the simplied version of the name.</p>
+
+<p>The first member of subroutine (tag = <tt>DW_TAG_subroutine_type</tt>) type
+   elements is the return type for the subroutine.  The remaining elements are
+   the formal arguments to the subroutine.</p>
+
+<p><a href="#format_composite_type">Composite type</a> location can be
+   determined from the compile unit and line number.  The size, alignment and
+   offset are expressed in bits and can be 64 bit values.  The alignment is used
+   to round the offset when embedded in
+   a <a href="#format_composite_type">composite type</a> (as an example, to keep
+   float doubles on 64 bit boundaries.) The offset is the bit offset if embedded
+   in a <a href="#format_composite_type">composite type</a>.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="format_subrange">Subrange descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!42 = metadata !{
+  i32,    ;; Tag = 33 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> (DW_TAG_subrange_type)
+  i64,    ;; Low value
+  i64     ;; High value
+}
+</pre>
+</div>
+
+<p>These descriptors are used to define ranges of array subscripts for an array
+   <a href="#format_composite_type">composite type</a>.  The low value defines
+   the lower bounds typically zero for C/C++.  The high value is the upper
+   bounds.  Values are 64 bit.  High - low + 1 is the size of the array.  If low
+   == high the array will be unbounded.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="format_enumeration">Enumerator descriptors</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!6 = metadata !{
+  i32,      ;; Tag = 40 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a> 
+            ;; (DW_TAG_enumerator)
+  metadata, ;; Name
+  i64       ;; Value
+}
+</pre>
+</div>
+
+<p>These descriptors are used to define members of an
+   enumeration <a href="#format_composite_type">composite type</a>, it
+   associates the name to the value.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="format_variables">Local variables</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!7 = metadata !{
+  i32,      ;; Tag (see below)
+  metadata, ;; Context
+  metadata, ;; Name
+  metadata, ;; Reference to file where defined
+  i32,      ;; Line number where defined
+  metadata  ;; Type descriptor
+}
+</pre>
+</div>
+
+<p>These descriptors are used to define variables local to a sub program.  The
+   value of the tag depends on the usage of the variable:</p>
+
+<div class="doc_code">
+<pre>
+DW_TAG_auto_variable   = 256
+DW_TAG_arg_variable    = 257
+DW_TAG_return_variable = 258
+</pre>
+</div>
+
+<p>An auto variable is any variable declared in the body of the function.  An
+   argument variable is any variable that appears as a formal argument to the
+   function.  A return variable is used to track the result of a function and
+   has no source correspondent.</p>
+
+<p>The context is either the subprogram or block where the variable is defined.
+   Name the source variable name.  Compile unit and line indicate where the
+   variable was defined. Type descriptor defines the declared type of the
+   variable.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="format_common_intrinsics">Debugger intrinsic functions</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM uses several intrinsic functions (name prefixed with "llvm.dbg") to
+   provide debug information at various points in generated code.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="format_common_declare">llvm.dbg.declare</a>
+</div>
+
+<div class="doc_text">
+<pre>
+  void %<a href="#format_common_declare">llvm.dbg.declare</a>(metadata, metadata)
+</pre>
+
+<p>This intrinsic provides information about a local element (ex. variable.) The
+   first argument is metadata holding alloca for the variable.</tt>. The
+   second argument is metadata containing description of the variable. </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="format_common_value">llvm.dbg.value</a>
+</div>
+
+<div class="doc_text">
+<pre>
+  void %<a href="#format_common_value">llvm.dbg.value</a>(metadata, i64, metadata)
+</pre>
+
+<p>This intrinsic provides information when a user source variable is set to a
+   new value.  The first argument is the new value (wrapped as metadata).  The
+   second argument is the offset in the user source variable where the new value
+   is written.  The third argument is metadata containing description of the
+   user source variable. </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="format_common_lifetime">Object lifetimes and scoping</a>
+</div>
+
+<div class="doc_text">
+<p>In many languages, the local variables in functions can have their lifetimes
+   or scopes limited to a subset of a function.  In the C family of languages,
+   for example, variables are only live (readable and writable) within the
+   source block that they are defined in.  In functional languages, values are
+   only readable after they have been defined.  Though this is a very obvious
+   concept, it is non-trivial to model in LLVM, because it has no notion of
+   scoping in this sense, and does not want to be tied to a language's scoping
+   rules.</p>
+
+<p>In order to handle this, the LLVM debug format uses the metadata attached to
+   llvm instructions to encode line number and scoping information. Consider
+   the following C fragment, for example:</p>
+
+<div class="doc_code">
+<pre>
+1.  void foo() {
+2.    int X = 21;
+3.    int Y = 22;
+4.    {
+5.      int Z = 23;
+6.      Z = X;
+7.    }
+8.    X = Y;
+9.  }
+</pre>
+</div>
+
+<p>Compiled to LLVM, this function would be represented like this:</p>
+
+<div class="doc_code">
+<pre>
+define void @foo() nounwind ssp {
+entry:
+  %X = alloca i32, align 4                        ; &lt;i32*&gt; [#uses=4]
+  %Y = alloca i32, align 4                        ; &lt;i32*&gt; [#uses=4]
+  %Z = alloca i32, align 4                        ; &lt;i32*&gt; [#uses=3]
+  %0 = bitcast i32* %X to {}*                     ; &lt;{}*&gt; [#uses=1]
+  call void @llvm.dbg.declare(metadata !{i32 * %X}, metadata !0), !dbg !7
+  store i32 21, i32* %X, !dbg !8
+  %1 = bitcast i32* %Y to {}*                     ; &lt;{}*&gt; [#uses=1]
+  call void @llvm.dbg.declare(metadata !{i32 * %Y}, metadata !9), !dbg !10
+  store i32 22, i32* %Y, !dbg !11
+  %2 = bitcast i32* %Z to {}*                     ; &lt;{}*&gt; [#uses=1]
+  call void @llvm.dbg.declare(metadata !{i32 * %Z}, metadata !12), !dbg !14
+  store i32 23, i32* %Z, !dbg !15
+  %tmp = load i32* %X, !dbg !16                   ; &lt;i32&gt; [#uses=1]
+  %tmp1 = load i32* %Y, !dbg !16                  ; &lt;i32&gt; [#uses=1]
+  %add = add nsw i32 %tmp, %tmp1, !dbg !16        ; &lt;i32&gt; [#uses=1]
+  store i32 %add, i32* %Z, !dbg !16
+  %tmp2 = load i32* %Y, !dbg !17                  ; &lt;i32&gt; [#uses=1]
+  store i32 %tmp2, i32* %X, !dbg !17
+  ret void, !dbg !18
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!0 = metadata !{i32 459008, metadata !1, metadata !"X", 
+                metadata !3, i32 2, metadata !6}; [ DW_TAG_auto_variable ]
+!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo", 
+               metadata !"foo", metadata !3, i32 1, metadata !4, 
+               i1 false, i1 true}; [DW_TAG_subprogram ]
+!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"foo.c", 
+                metadata !"/private/tmp", metadata !"clang 1.1", i1 true, 
+                i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0, 
+                i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ]
+!5 = metadata !{null}
+!6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0, 
+                i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
+!7 = metadata !{i32 2, i32 7, metadata !1, null}
+!8 = metadata !{i32 2, i32 3, metadata !1, null}
+!9 = metadata !{i32 459008, metadata !1, metadata !"Y", metadata !3, i32 3, 
+                metadata !6}; [ DW_TAG_auto_variable ]
+!10 = metadata !{i32 3, i32 7, metadata !1, null}
+!11 = metadata !{i32 3, i32 3, metadata !1, null}
+!12 = metadata !{i32 459008, metadata !13, metadata !"Z", metadata !3, i32 5, 
+                 metadata !6}; [ DW_TAG_auto_variable ]
+!13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
+!14 = metadata !{i32 5, i32 9, metadata !13, null}
+!15 = metadata !{i32 5, i32 5, metadata !13, null}
+!16 = metadata !{i32 6, i32 5, metadata !13, null}
+!17 = metadata !{i32 8, i32 3, metadata !1, null}
+!18 = metadata !{i32 9, i32 1, metadata !2, null}
+</pre>
+</div>
+
+<p>This example illustrates a few important details about LLVM debugging
+   information. In particular, it shows how the <tt>llvm.dbg.declare</tt>
+   intrinsic and location information, which are attached to an instruction,
+   are applied together to allow a debugger to analyze the relationship between
+   statements, variable definitions, and the code used to implement the
+   function.</p>
+
+<div class="doc_code">
+<pre>
+call void @llvm.dbg.declare(metadata, metadata !0), !dbg !7   
+</pre>
+</div>
+
+<p>The first intrinsic
+   <tt>%<a href="#format_common_declare">llvm.dbg.declare</a></tt>
+   encodes debugging information for the variable <tt>X</tt>. The metadata
+   <tt>!dbg !7</tt> attached to the intrinsic provides scope information for the
+   variable <tt>X</tt>.</p>
+
+<div class="doc_code">
+<pre>
+!7 = metadata !{i32 2, i32 7, metadata !1, null}
+!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", 
+                metadata !"foo", metadata !"foo", metadata !3, i32 1, 
+                metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ]   
+</pre>
+</div>
+
+<p>Here <tt>!7</tt> is metadata providing location information. It has four
+   fields: line number, column number, scope, and original scope. The original
+   scope represents inline location if this instruction is inlined inside a
+   caller, and is null otherwise. In this example, scope is encoded by
+   <tt>!1</tt>. <tt>!1</tt> represents a lexical block inside the scope
+   <tt>!2</tt>, where <tt>!2</tt> is a
+   <a href="#format_subprograms">subprogram descriptor</a>. This way the
+   location information attached to the intrinsics indicates that the
+   variable <tt>X</tt> is declared at line number 2 at a function level scope in
+   function <tt>foo</tt>.</p>
+
+<p>Now lets take another example.</p>
+
+<div class="doc_code">
+<pre>
+call void @llvm.dbg.declare(metadata, metadata !12), !dbg !14
+</pre>
+</div>
+
+<p>The second intrinsic
+   <tt>%<a href="#format_common_declare">llvm.dbg.declare</a></tt>
+   encodes debugging information for variable <tt>Z</tt>. The metadata 
+   <tt>!dbg !14</tt> attached to the intrinsic provides scope information for
+   the variable <tt>Z</tt>.</p>
+
+<div class="doc_code">
+<pre>
+!13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
+!14 = metadata !{i32 5, i32 9, metadata !13, null}
+</pre>
+</div>
+
+<p>Here <tt>!14</tt> indicates that <tt>Z</tt> is declared at line number 5 and
+   column number 9 inside of lexical scope <tt>!13</tt>. The lexical scope
+   itself resides inside of lexical scope <tt>!1</tt> described above.</p>
+
+<p>The scope information attached with each instruction provides a
+   straightforward way to find instructions covered by a scope.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="ccxx_frontend">C/C++ front-end specific debug information</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The C and C++ front-ends represent information about the program in a format
+   that is effectively identical
+   to <a href="http://www.eagercon.com/dwarf/dwarf3std.htm">DWARF 3.0</a> in
+   terms of information content.  This allows code generators to trivially
+   support native debuggers by generating standard dwarf information, and
+   contains enough information for non-dwarf targets to translate it as
+   needed.</p>
+
+<p>This section describes the forms used to represent C and C++ programs. Other
+   languages could pattern themselves after this (which itself is tuned to
+   representing programs in the same way that DWARF 3 does), or they could
+   choose to provide completely different forms if they don't fit into the DWARF
+   model.  As support for debugging information gets added to the various LLVM
+   source-language front-ends, the information used should be documented
+   here.</p>
+
+<p>The following sections provide examples of various C/C++ constructs and the
+   debug information that would best describe those constructs.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ccxx_compile_units">C/C++ source file information</a>
+</div>
+
+<div class="doc_text">
+
+<p>Given the source files <tt>MySource.cpp</tt> and <tt>MyHeader.h</tt> located
+   in the directory <tt>/Users/mine/sources</tt>, the following code:</p>
+
+<div class="doc_code">
+<pre>
+#include "MyHeader.h"
+
+int main(int argc, char *argv[]) {
+  return 0;
+}
+</pre>
+</div>
+
+<p>a C/C++ front-end would generate the following descriptors:</p>
+
+<div class="doc_code">
+<pre>
+...
+;;
+;; Define the compile unit for the main source file "/Users/mine/sources/MySource.cpp".
+;;
+!2 = metadata !{
+  i32 524305,    ;; Tag
+  i32 0,         ;; Unused
+  i32 4,         ;; Language Id
+  metadata !"MySource.cpp", 
+  metadata !"/Users/mine/sources", 
+  metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build 00)", 
+  i1 true,       ;; Main Compile Unit
+  i1 false,      ;; Optimized compile unit
+  metadata !"",  ;; Compiler flags
+  i32 0}         ;; Runtime version
+
+;;
+;; Define the file for the file "/Users/mine/sources/MySource.cpp".
+;;
+!1 = metadata !{
+  i32 524329,    ;; Tag
+  metadata !"MySource.cpp", 
+  metadata !"/Users/mine/sources", 
+  metadata !2    ;; Compile unit
+}
+
+;;
+;; Define the file for the file "/Users/mine/sources/Myheader.h"
+;;
+!3 = metadata !{
+  i32 524329,    ;; Tag
+  metadata !"Myheader.h"
+  metadata !"/Users/mine/sources", 
+  metadata !2    ;; Compile unit
+}
+
+...
+</pre>
+</div>
+
+<p>llvm::Instruction provides easy access to metadata attached with an 
+instruction. One can extract line number information encoded in LLVM IR
+using <tt>Instruction::getMetadata()</tt> and 
+<tt>DILocation::getLineNumber()</tt>.
+<pre>
+ if (MDNode *N = I->getMetadata("dbg")) {  // Here I is an LLVM instruction
+   DILocation Loc(N);                      // DILocation is in DebugInfo.h
+   unsigned Line = Loc.getLineNumber();
+   StringRef File = Loc.getFilename();
+   StringRef Dir = Loc.getDirectory();
+ }
+</pre>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ccxx_global_variable">C/C++ global variable information</a>
+</div>
+
+<div class="doc_text">
+
+<p>Given an integer global variable declared as follows:</p>
+
+<div class="doc_code">
+<pre>
+int MyGlobal = 100;
+</pre>
+</div>
+
+<p>a C/C++ front-end would generate the following descriptors:</p>
+
+<div class="doc_code">
+<pre>
+;;
+;; Define the global itself.
+;;
+%MyGlobal = global int 100
+...
+;;
+;; List of debug info of globals
+;;
+!llvm.dbg.gv = !{!0}
+
+;;
+;; Define the global variable descriptor.  Note the reference to the global
+;; variable anchor and the global variable itself.
+;;
+!0 = metadata !{
+  i32 524340,              ;; Tag
+  i32 0,                   ;; Unused
+  metadata !1,             ;; Context
+  metadata !"MyGlobal",    ;; Name
+  metadata !"MyGlobal",    ;; Display Name
+  metadata !"MyGlobal",    ;; Linkage Name
+  metadata !3,             ;; Compile Unit
+  i32 1,                   ;; Line Number
+  metadata !4,             ;; Type
+  i1 false,                ;; Is a local variable
+  i1 true,                 ;; Is this a definition
+  i32* @MyGlobal           ;; The global variable
+}
+
+;;
+;; Define the basic type of 32 bit signed integer.  Note that since int is an
+;; intrinsic type the source file is NULL and line 0.
+;;    
+!4 = metadata !{
+  i32 524324,              ;; Tag
+  metadata !1,             ;; Context
+  metadata !"int",         ;; Name
+  metadata !1,             ;; File
+  i32 0,                   ;; Line number
+  i64 32,                  ;; Size in Bits
+  i64 32,                  ;; Align in Bits
+  i64 0,                   ;; Offset in Bits
+  i32 0,                   ;; Flags
+  i32 5                    ;; Encoding
+}
+
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ccxx_subprogram">C/C++ function information</a>
+</div>
+
+<div class="doc_text">
+
+<p>Given a function declared as follows:</p>
+
+<div class="doc_code">
+<pre>
+int main(int argc, char *argv[]) {
+  return 0;
+}
+</pre>
+</div>
+
+<p>a C/C++ front-end would generate the following descriptors:</p>
+
+<div class="doc_code">
+<pre>
+;;
+;; Define the anchor for subprograms.  Note that the second field of the
+;; anchor is 46, which is the same as the tag for subprograms
+;; (46 = DW_TAG_subprogram.)
+;;
+!6 = metadata !{
+  i32 524334,        ;; Tag
+  i32 0,             ;; Unused
+  metadata !1,       ;; Context
+  metadata !"main",  ;; Name
+  metadata !"main",  ;; Display name
+  metadata !"main",  ;; Linkage name
+  metadata !1,       ;; File
+  i32 1,             ;; Line number
+  metadata !4,       ;; Type
+  i1 false,          ;; Is local 
+  i1 true            ;; Is definition
+}
+;;
+;; Define the subprogram itself.
+;;
+define i32 @main(i32 %argc, i8** %argv) {
+...
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ccxx_basic_types">C/C++ basic types</a>
+</div>
+
+<div class="doc_text">
+
+<p>The following are the basic type descriptors for C/C++ core types:</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="ccxx_basic_type_bool">bool</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!2 = metadata !{
+  i32 524324,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"bool",  ;; Name
+  metadata !1,       ;; File
+  i32 0,             ;; Line number
+  i64 8,             ;; Size in Bits
+  i64 8,             ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 2              ;; Encoding
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="ccxx_basic_char">char</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!2 = metadata !{
+  i32 524324,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"char",  ;; Name
+  metadata !1,       ;; File
+  i32 0,             ;; Line number
+  i64 8,             ;; Size in Bits
+  i64 8,             ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 6              ;; Encoding
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="ccxx_basic_unsigned_char">unsigned char</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!2 = metadata !{
+  i32 524324,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"unsigned char", 
+  metadata !1,       ;; File
+  i32 0,             ;; Line number
+  i64 8,             ;; Size in Bits
+  i64 8,             ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 8              ;; Encoding
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="ccxx_basic_short">short</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!2 = metadata !{
+  i32 524324,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"short int",
+  metadata !1,       ;; File
+  i32 0,             ;; Line number
+  i64 16,            ;; Size in Bits
+  i64 16,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 5              ;; Encoding
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="ccxx_basic_unsigned_short">unsigned short</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!2 = metadata !{
+  i32 524324,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"short unsigned int",
+  metadata !1,       ;; File
+  i32 0,             ;; Line number
+  i64 16,            ;; Size in Bits
+  i64 16,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 7              ;; Encoding
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="ccxx_basic_int">int</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!2 = metadata !{
+  i32 524324,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"int",   ;; Name
+  metadata !1,       ;; File
+  i32 0,             ;; Line number
+  i64 32,            ;; Size in Bits
+  i64 32,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 5              ;; Encoding
+}
+</pre></div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="ccxx_basic_unsigned_int">unsigned int</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!2 = metadata !{
+  i32 524324,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"unsigned int",
+  metadata !1,       ;; File
+  i32 0,             ;; Line number
+  i64 32,            ;; Size in Bits
+  i64 32,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 7              ;; Encoding
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="ccxx_basic_long_long">long long</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!2 = metadata !{
+  i32 524324,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"long long int",
+  metadata !1,       ;; File
+  i32 0,             ;; Line number
+  i64 64,            ;; Size in Bits
+  i64 64,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 5              ;; Encoding
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="ccxx_basic_unsigned_long_long">unsigned long long</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!2 = metadata !{
+  i32 524324,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"long long unsigned int",
+  metadata !1,       ;; File
+  i32 0,             ;; Line number
+  i64 64,            ;; Size in Bits
+  i64 64,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 7              ;; Encoding
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="ccxx_basic_float">float</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!2 = metadata !{
+  i32 524324,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"float",
+  metadata !1,       ;; File
+  i32 0,             ;; Line number
+  i64 32,            ;; Size in Bits
+  i64 32,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 4              ;; Encoding
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection">
+  <a name="ccxx_basic_double">double</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code">
+<pre>
+!2 = metadata !{
+  i32 524324,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"double",;; Name
+  metadata !1,       ;; File
+  i32 0,             ;; Line number
+  i64 64,            ;; Size in Bits
+  i64 64,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 4              ;; Encoding
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ccxx_derived_types">C/C++ derived types</a>
+</div>
+
+<div class="doc_text">
+
+<p>Given the following as an example of C/C++ derived type:</p>
+
+<div class="doc_code">
+<pre>
+typedef const int *IntPtr;
+</pre>
+</div>
+
+<p>a C/C++ front-end would generate the following descriptors:</p>
+
+<div class="doc_code">
+<pre>
+;;
+;; Define the typedef "IntPtr".
+;;
+!2 = metadata !{
+  i32 524310,          ;; Tag
+  metadata !1,         ;; Context
+  metadata !"IntPtr",  ;; Name
+  metadata !3,         ;; File
+  i32 0,               ;; Line number
+  i64 0,               ;; Size in bits
+  i64 0,               ;; Align in bits
+  i64 0,               ;; Offset in bits
+  i32 0,               ;; Flags
+  metadata !4          ;; Derived From type
+}
+
+;;
+;; Define the pointer type.
+;;
+!4 = metadata !{
+  i32 524303,          ;; Tag
+  metadata !1,         ;; Context
+  metadata !"",        ;; Name
+  metadata !1,         ;; File
+  i32 0,               ;; Line number
+  i64 64,              ;; Size in bits
+  i64 64,              ;; Align in bits
+  i64 0,               ;; Offset in bits
+  i32 0,               ;; Flags
+  metadata !5          ;; Derived From type
+}
+;;
+;; Define the const type.
+;;
+!5 = metadata !{
+  i32 524326,          ;; Tag
+  metadata !1,         ;; Context
+  metadata !"",        ;; Name
+  metadata !1,         ;; File
+  i32 0,               ;; Line number
+  i64 32,              ;; Size in bits
+  i64 32,              ;; Align in bits
+  i64 0,               ;; Offset in bits
+  i32 0,               ;; Flags
+  metadata !6          ;; Derived From type
+}
+;;
+;; Define the int type.
+;;
+!6 = metadata !{
+  i32 524324,          ;; Tag
+  metadata !1,         ;; Context
+  metadata !"int",     ;; Name
+  metadata !1,         ;; File
+  i32 0,               ;; Line number
+  i64 32,              ;; Size in bits
+  i64 32,              ;; Align in bits
+  i64 0,               ;; Offset in bits
+  i32 0,               ;; Flags
+  5                    ;; Encoding
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ccxx_composite_types">C/C++ struct/union types</a>
+</div>
+
+<div class="doc_text">
+
+<p>Given the following as an example of C/C++ struct type:</p>
+
+<div class="doc_code">
+<pre>
+struct Color {
+  unsigned Red;
+  unsigned Green;
+  unsigned Blue;
+};
+</pre>
+</div>
+
+<p>a C/C++ front-end would generate the following descriptors:</p>
+
+<div class="doc_code">
+<pre>
+;;
+;; Define basic type for unsigned int.
+;;
+!5 = metadata !{
+  i32 524324,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"unsigned int",
+  metadata !1,       ;; File
+  i32 0,             ;; Line number
+  i64 32,            ;; Size in Bits
+  i64 32,            ;; Align in Bits
+  i64 0,             ;; Offset in Bits
+  i32 0,             ;; Flags
+  i32 7              ;; Encoding
+}
+;;
+;; Define composite type for struct Color.
+;;
+!2 = metadata !{
+  i32 524307,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"Color", ;; Name
+  metadata !1,       ;; Compile unit
+  i32 1,             ;; Line number
+  i64 96,            ;; Size in bits
+  i64 32,            ;; Align in bits
+  i64 0,             ;; Offset in bits
+  i32 0,             ;; Flags
+  null,              ;; Derived From
+  metadata !3,       ;; Elements
+  i32 0              ;; Runtime Language
+}
+
+;;
+;; Define the Red field.
+;;
+!4 = metadata !{
+  i32 524301,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"Red",   ;; Name
+  metadata !1,       ;; File
+  i32 2,             ;; Line number
+  i64 32,            ;; Size in bits
+  i64 32,            ;; Align in bits
+  i64 0,             ;; Offset in bits
+  i32 0,             ;; Flags
+  metadata !5        ;; Derived From type
+}
+
+;;
+;; Define the Green field.
+;;
+!6 = metadata !{
+  i32 524301,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"Green", ;; Name
+  metadata !1,       ;; File
+  i32 3,             ;; Line number
+  i64 32,            ;; Size in bits
+  i64 32,            ;; Align in bits
+  i64 32,             ;; Offset in bits
+  i32 0,             ;; Flags
+  metadata !5        ;; Derived From type
+}
+
+;;
+;; Define the Blue field.
+;;
+!7 = metadata !{
+  i32 524301,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"Blue",  ;; Name
+  metadata !1,       ;; File
+  i32 4,             ;; Line number
+  i64 32,            ;; Size in bits
+  i64 32,            ;; Align in bits
+  i64 64,             ;; Offset in bits
+  i32 0,             ;; Flags
+  metadata !5        ;; Derived From type
+}
+
+;;
+;; Define the array of fields used by the composite type Color.
+;;
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ccxx_enumeration_types">C/C++ enumeration types</a>
+</div>
+
+<div class="doc_text">
+
+<p>Given the following as an example of C/C++ enumeration type:</p>
+
+<div class="doc_code">
+<pre>
+enum Trees {
+  Spruce = 100,
+  Oak = 200,
+  Maple = 300
+};
+</pre>
+</div>
+
+<p>a C/C++ front-end would generate the following descriptors:</p>
+
+<div class="doc_code">
+<pre>
+;;
+;; Define composite type for enum Trees
+;;
+!2 = metadata !{
+  i32 524292,        ;; Tag
+  metadata !1,       ;; Context
+  metadata !"Trees", ;; Name
+  metadata !1,       ;; File
+  i32 1,             ;; Line number
+  i64 32,            ;; Size in bits
+  i64 32,            ;; Align in bits
+  i64 0,             ;; Offset in bits
+  i32 0,             ;; Flags
+  null,              ;; Derived From type
+  metadata !3,       ;; Elements
+  i32 0              ;; Runtime language
+}
+
+;;
+;; Define the array of enumerators used by composite type Trees.
+;;
+!3 = metadata !{metadata !4, metadata !5, metadata !6}
+
+;;
+;; Define Spruce enumerator.
+;;
+!4 = metadata !{i32 524328, metadata !"Spruce", i64 100}
+
+;;
+;; Define Oak enumerator.
+;;
+!5 = metadata !{i32 524328, metadata !"Oak", i64 200}
+
+;;
+;; Define Maple enumerator.
+;;
+!6 = metadata !{i32 524328, metadata !"Maple", i64 300}
+
+</pre>
+</div>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/SystemLibrary.html b/final/docs/SystemLibrary.html
new file mode 100644
index 00000000000..0289a554108
--- /dev/null
+++ b/final/docs/SystemLibrary.html
@@ -0,0 +1,319 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>System Library</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">System Library</div>
+<ul>
+  <li><a href="#abstract">Abstract</a></li>
+  <li><a href="#requirements">Keeping LLVM Portable</a>
+  <ol>
+    <li><a href="#headers">Don't Include System Headers</a></li>
+    <li><a href="#expose">Don't Expose System Headers</a></li>
+    <li><a href="#c_headers">Allow Standard C Header Files</a></li>
+    <li><a href="#cpp_headers">Allow Standard C++ Header Files</a></li>
+    <li><a href="#highlev">High-Level Interface</a></li>
+    <li><a href="#nofunc">No Exposed Functions</a></li>
+    <li><a href="#nodata">No Exposed Data</a></li>
+    <li><a href="#nodupl">No Duplicate Implementations</a></li>
+    <li><a href="#nounused">No Unused Functionality</a></li>
+    <li><a href="#virtuals">No Virtual Methods</a></li>
+    <li><a href="#softerrors">Minimize Soft Errors</a></li>
+    <li><a href="#throw_spec">No throw() Specifications</a></li>
+    <li><a href="#organization">Code Organization</a></li>
+    <li><a href="#semantics">Consistent Semantics</a></li>
+    <li><a href="#bug">Tracking Bugzilla Bug: 351</a></li>
+  </ol></li>
+</ul>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:rspencer@x10sys.com">Reid Spencer</a></p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="abstract">Abstract</a></div>
+<div class="doc_text">
+  <p>This document provides some details on LLVM's System Library, located in
+  the source at <tt>lib/System</tt> and <tt>include/llvm/System</tt>. The
+  library's purpose is to shield LLVM from the differences between operating
+  systems for the few services LLVM needs from the operating system. Much of
+  LLVM is written using portability features of standard C++. However, in a few
+  areas, system dependent facilities are needed and the System Library is the
+  wrapper around those system calls.</p>
+  <p>By centralizing LLVM's use of operating system interfaces, we make it 
+  possible for the LLVM tool chain and runtime libraries to be more easily 
+  ported to new platforms since (theoretically) only <tt>lib/System</tt> needs 
+  to be ported.  This library also unclutters the rest of LLVM from #ifdef use 
+  and special cases for specific operating systems. Such uses are replaced 
+  with simple calls to the interfaces provided in <tt>include/llvm/System</tt>.
+  </p> 
+  <p>Note that the System Library is not intended to be a complete operating 
+  system wrapper (such as the Adaptive Communications Environment (ACE) or 
+  Apache Portable Runtime (APR)), but only provides the functionality necessary
+  to support LLVM.
+  <p>The System Library was written by Reid Spencer who formulated the
+  design based on similar work originating from the eXtensible Programming 
+  System (XPS). Several people helped with the effort; especially,
+  Jeff Cohen and Henrik Bach on the Win32 port.</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="requirements">Keeping LLVM Portable</a>
+</div>
+<div class="doc_text">
+  <p>In order to keep LLVM portable, LLVM developers should adhere to a set of
+  portability rules associated with the System Library. Adherence to these rules
+  should help the System Library achieve its goal of shielding LLVM from the
+  variations in operating system interfaces and doing so efficiently.  The 
+  following sections define the rules needed to fulfill this objective.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="headers">Don't Include System Headers</a>
+</div>
+<div class="doc_text">
+  <p>Except in <tt>lib/System</tt>, no LLVM source code should directly
+  <tt>#include</tt> a system header. Care has been taken to remove all such
+  <tt>#includes</tt> from LLVM while <tt>lib/System</tt> was being
+  developed.  Specifically this means that header files like "unistd.h", 
+  "windows.h", "stdio.h", and "string.h" are forbidden to be included by LLVM 
+  source code outside the implementation of <tt>lib/System</tt>.</p>
+  <p>To obtain system-dependent functionality, existing interfaces to the system
+  found in <tt>include/llvm/System</tt> should be used. If an appropriate 
+  interface is not available, it should be added to <tt>include/llvm/System</tt>
+  and implemented in <tt>lib/System</tt> for all supported platforms.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="expose">Don't Expose System Headers</a>
+</div>
+<div class="doc_text">
+  <p>The System Library must shield LLVM from <em>all</em> system headers. To 
+  obtain system level functionality, LLVM source must 
+  <tt>#include "llvm/System/Thing.h"</tt> and nothing else. This means that 
+  <tt>Thing.h</tt> cannot expose any system header files. This protects LLVM 
+  from accidentally using system specific functionality and only allows it
+  via the <tt>lib/System</tt> interface.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="c_headers">Use Standard C Headers</a></div>
+<div class="doc_text">
+  <p>The <em>standard</em> C headers (the ones beginning with "c") are allowed
+  to be exposed through the <tt>lib/System</tt> interface. These headers and 
+  the things they declare are considered to be platform agnostic. LLVM source 
+  files may include them directly or obtain their inclusion through 
+  <tt>lib/System</tt> interfaces.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="cpp_headers">Use Standard C++ Headers</a>
+</div>
+<div class="doc_text">
+  <p>The <em>standard</em> C++ headers from the standard C++ library and
+  standard template library may be exposed through the <tt>lib/System</tt>
+  interface. These headers and the things they declare are considered to be
+  platform agnostic. LLVM source files may include them or obtain their
+  inclusion through lib/System interfaces.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="highlev">High Level Interface</a></div>
+<div class="doc_text">
+  <p>The entry points specified in the interface of lib/System must be aimed at 
+  completing some reasonably high level task needed by LLVM. We do not want to
+  simply wrap each operating system call. It would be preferable to wrap several
+  operating system calls that are always used in conjunction with one another by
+  LLVM.</p>
+  <p>For example, consider what is needed to execute a program, wait for it to
+  complete, and return its result code. On Unix, this involves the following
+  operating system calls: <tt>getenv, fork, execve,</tt> and <tt>wait</tt>. The
+  correct thing for lib/System to provide is a function, say
+  <tt>ExecuteProgramAndWait</tt>, that implements the functionality completely.
+  what we don't want is wrappers for the operating system calls involved.</p>
+  <p>There must <em>not</em> be a one-to-one relationship between operating
+  system calls and the System library's interface. Any such interface function
+  will be suspicious.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="nounused">No Unused Functionality</a></div>
+<div class="doc_text">
+  <p>There must be no functionality specified in the interface of lib/System 
+  that isn't actually used by LLVM. We're not writing a general purpose
+  operating system wrapper here, just enough to satisfy LLVM's needs. And, LLVM
+  doesn't need much. This design goal aims to keep the lib/System interface
+  small and understandable which should foster its actual use and adoption.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="nodupl">No Duplicate Implementations</a>
+</div>
+<div class="doc_text">
+  <p>The implementation of a function for a given platform must be written
+  exactly once. This implies that it must be possible to apply a function's 
+  implementation to multiple operating systems if those operating systems can
+  share the same implementation. This rule applies to the set of operating
+  systems supported for a given class of operating system (e.g. Unix, Win32).
+  </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="virtuals">No Virtual Methods</a></div>
+<div class="doc_text">
+  <p>The System Library interfaces can be called quite frequently by LLVM. In
+  order to make those calls as efficient as possible, we discourage the use of
+  virtual methods. There is no need to use inheritance for implementation
+  differences, it just adds complexity. The <tt>#include</tt> mechanism works
+  just fine.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="nofunc">No Exposed Functions</a></div>
+<div class="doc_text">
+  <p>Any functions defined by system libraries (i.e. not defined by lib/System) 
+  must not be exposed through the lib/System interface, even if the header file 
+  for that function is not exposed. This prevents inadvertent use of system
+  specific functionality.</p>
+  <p>For example, the <tt>stat</tt> system call is notorious for having
+  variations in the data it provides. <tt>lib/System</tt> must not declare 
+  <tt>stat</tt> nor allow it to be declared. Instead it should provide its own 
+  interface to discovering information about files and directories. Those 
+  interfaces may be implemented in terms of <tt>stat</tt> but that is strictly 
+  an implementation detail. The interface provided by the System Library must
+  be implemented on all platforms (even those without <tt>stat</tt>).</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="nodata">No Exposed Data</a></div>
+<div class="doc_text">
+  <p>Any data defined by system libraries (i.e. not defined by lib/System) must
+  not be exposed through the lib/System interface, even if the header file for
+  that function is not exposed. As with functions, this prevents inadvertent use
+  of data that might not exist on all platforms.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="softerrors">Minimize Soft Errors</a></div>
+<div class="doc_text">
+  <p>Operating system interfaces will generally provide error results for every
+  little thing that could go wrong. In almost all cases, you can divide these
+  error results into two groups: normal/good/soft and abnormal/bad/hard. That
+  is, some of the errors are simply information like "file not found", 
+  "insufficient privileges", etc. while other errors are much harder like
+  "out of space", "bad disk sector", or "system call interrupted". We'll call 
+  the first group "<i>soft</i>" errors and the second group "<i>hard</i>" 
+  errors.<p>
+  <p>lib/System must always attempt to minimize soft errors.
+  This is a design requirement because the
+  minimization of soft errors can affect the granularity and the nature of the
+  interface. In general, if you find that you're wanting to throw soft errors,
+  you must review the granularity of the interface because it is likely you're
+  trying to implement something that is too low level. The rule of thumb is to
+  provide interface functions that <em>can't</em> fail, except when faced with 
+  hard errors.</p>
+  <p>For a trivial example, suppose we wanted to add an "OpenFileForWriting" 
+  function. For many operating systems, if the file doesn't exist, attempting 
+  to open the file will produce an error.  However, lib/System should not
+  simply throw that error if it occurs because its a soft error. The problem
+  is that the interface function, OpenFileForWriting is too low level. It should
+  be OpenOrCreateFileForWriting. In the case of the soft "doesn't exist" error, 
+  this function would just create it and then open it for writing.</p>
+  <p>This design principle needs to be maintained in lib/System because it
+  avoids the propagation of soft error handling throughout the rest of LLVM.
+  Hard errors will generally just cause a termination for an LLVM tool so don't
+  be bashful about throwing them.</p>
+  <p>Rules of thumb:</p>
+  <ol>
+    <li>Don't throw soft errors, only hard errors.</li>
+    <li>If you're tempted to throw a soft error, re-think the interface.</li>
+    <li>Handle internally the most common normal/good/soft error conditions
+    so the rest of LLVM doesn't have to.</li>
+  </ol>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="throw_spec">No throw Specifications</a>
+</div>
+<div class="doc_text">
+  <p>None of the lib/System interface functions may be declared with C++ 
+  <tt>throw()</tt> specifications on them. This requirement makes sure that the
+  compiler does not insert additional exception handling code into the interface
+  functions. This is a performance consideration: lib/System functions are at
+  the bottom of many call chains and as such can be frequently called. We
+  need them to be as efficient as possible.  However, no routines in the
+  system library should actually throw exceptions.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="organization">Code Organization</a></div>
+<div class="doc_text">
+  <p>Implementations of the System Library interface are separated by their
+  general class of operating system. Currently only Unix and Win32 classes are
+  defined but more could be added for other operating system classifications.
+  To distinguish which implementation to compile, the code in lib/System uses
+  the LLVM_ON_UNIX and LLVM_ON_WIN32 #defines provided via configure through the
+  llvm/Config/config.h file. Each source file in lib/System, after implementing
+  the generic (operating system independent) functionality needs to include the
+  correct implementation using a set of <tt>#if defined(LLVM_ON_XYZ)</tt> 
+  directives. For example, if we had lib/System/File.cpp, we'd expect to see in
+  that file:</p>
+  <pre><tt>
+  #if defined(LLVM_ON_UNIX)
+  #include "Unix/File.cpp"
+  #endif
+  #if defined(LLVM_ON_WIN32)
+  #include "Win32/File.cpp"
+  #endif
+  </tt></pre>
+  <p>The implementation in lib/System/Unix/File.cpp should handle all Unix
+  variants. The implementation in lib/System/Win32/File.cpp should handle all
+  Win32 variants.  What this does is quickly differentiate the basic class of 
+  operating system that will provide the implementation. The specific details
+  for a given platform must still be determined through the use of
+  <tt>#ifdef</tt>.</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="semantics">Consistent Semantics</a></div>
+<div class="doc_text">
+  <p>The implementation of a lib/System interface can vary drastically between
+  platforms. That's okay as long as the end result of the interface function 
+  is the same. For example, a function to create a directory is pretty straight
+  forward on all operating system. System V IPC on the other hand isn't even
+  supported on all platforms. Instead of "supporting" System V IPC, lib/System
+  should provide an interface to the basic concept of inter-process 
+  communications. The implementations might use System V IPC if that was 
+  available or named pipes, or whatever gets the job done effectively for a 
+  given operating system.  In all cases, the interface and the implementation 
+  must be semantically consistent. </p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="bug">Bug 351</a></div>
+<div class="doc_text">
+  <p>See <a href="http://llvm.org/PR351">bug 351</a>
+  for further details on the progress of this work</p>
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:rspencer@x10sys.com">Reid Spencer</a><br>
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/TableGenFundamentals.html b/final/docs/TableGenFundamentals.html
new file mode 100644
index 00000000000..de46a391f36
--- /dev/null
+++ b/final/docs/TableGenFundamentals.html
@@ -0,0 +1,911 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>TableGen Fundamentals</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">TableGen Fundamentals</div>
+
+<div class="doc_text">
+<ul>
+  <li><a href="#introduction">Introduction</a>
+  <ol>
+    <li><a href="#concepts">Basic concepts</a></li>
+    <li><a href="#example">An example record</a></li>
+    <li><a href="#running">Running TableGen</a></li>
+  </ol></li>
+  <li><a href="#syntax">TableGen syntax</a>
+  <ol>
+    <li><a href="#primitives">TableGen primitives</a>
+    <ol>
+      <li><a href="#comments">TableGen comments</a></li>
+      <li><a href="#types">The TableGen type system</a></li>
+      <li><a href="#values">TableGen values and expressions</a></li>
+    </ol></li>
+    <li><a href="#classesdefs">Classes and definitions</a>
+    <ol>
+      <li><a href="#valuedef">Value definitions</a></li>
+      <li><a href="#recordlet">'let' expressions</a></li>
+      <li><a href="#templateargs">Class template arguments</a></li>
+      <li><a href="#multiclass">Multiclass definitions and instances</a></li>
+    </ol></li>
+    <li><a href="#filescope">File scope entities</a>
+    <ol>
+      <li><a href="#include">File inclusion</a></li>
+      <li><a href="#globallet">'let' expressions</a></li>
+    </ol></li>
+  </ol></li>
+  <li><a href="#backends">TableGen backends</a>
+  <ol>
+    <li><a href="#">todo</a></li>
+  </ol></li>
+</ul>
+</div>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="introduction">Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>TableGen's purpose is to help a human develop and maintain records of
+domain-specific information.  Because there may be a large number of these
+records, it is specifically designed to allow writing flexible descriptions and
+for common features of these records to be factored out.  This reduces the
+amount of duplication in the description, reduces the chance of error, and
+makes it easier to structure domain specific information.</p>
+
+<p>The core part of TableGen <a href="#syntax">parses a file</a>, instantiates
+the declarations, and hands the result off to a domain-specific "<a
+href="#backends">TableGen backend</a>" for processing.  The current major user
+of TableGen is the <a href="CodeGenerator.html">LLVM code generator</a>.</p>
+
+<p>Note that if you work on TableGen much, and use emacs or vim, that you can
+find an emacs "TableGen mode" and a vim language file in the
+<tt>llvm/utils/emacs</tt> and <tt>llvm/utils/vim</tt> directories of your LLVM
+distribution, respectively.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="concepts">Basic concepts</a></div>
+
+<div class="doc_text">
+
+<p>TableGen files consist of two key parts: 'classes' and 'definitions', both
+of which are considered 'records'.</p>
+
+<p><b>TableGen records</b> have a unique name, a list of values, and a list of
+superclasses.  The list of values is the main data that TableGen builds for each
+record; it is this that holds the domain specific information for the
+application.  The interpretation of this data is left to a specific <a
+href="#backends">TableGen backend</a>, but the structure and format rules are
+taken care of and are fixed by TableGen.</p>
+
+<p><b>TableGen definitions</b> are the concrete form of 'records'.  These
+generally do not have any undefined values, and are marked with the
+'<tt>def</tt>' keyword.</p>
+
+<p><b>TableGen classes</b> are abstract records that are used to build and
+describe other records.  These 'classes' allow the end-user to build
+abstractions for either the domain they are targeting (such as "Register",
+"RegisterClass", and "Instruction" in the LLVM code generator) or for the
+implementor to help factor out common properties of records (such as "FPInst",
+which is used to represent floating point instructions in the X86 backend).
+TableGen keeps track of all of the classes that are used to build up a
+definition, so the backend can find all definitions of a particular class, such
+as "Instruction".</p>
+
+<p><b>TableGen multiclasses</b> are groups of abstract records that are
+instantiated all at once.  Each instantiation can result in multiple
+TableGen definitions.  If a multiclass inherits from another multiclass,
+the definitions in the sub-multiclass become part of the current
+multiclass, as if they were declared in the current multiclass.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="example">An example record</a></div>
+
+<div class="doc_text">
+
+<p>With no other arguments, TableGen parses the specified file and prints out
+all of the classes, then all of the definitions.  This is a good way to see what
+the various definitions expand to fully.  Running this on the <tt>X86.td</tt>
+file prints this (at the time of this writing):</p>
+
+<div class="doc_code">
+<pre>
+...
+<b>def</b> ADD32rr {   <i>// Instruction X86Inst I</i>
+  <b>string</b> Namespace = "X86";
+  <b>dag</b> OutOperandList = (outs GR32:$dst);
+  <b>dag</b> InOperandList = (ins GR32:$src1, GR32:$src2);
+  <b>string</b> AsmString = "add{l}\t{$src2, $dst|$dst, $src2}";
+  <b>list</b>&lt;dag&gt; Pattern = [(set GR32:$dst, (add GR32:$src1, GR32:$src2))];
+  <b>list</b>&lt;Register&gt; Uses = [];
+  <b>list</b>&lt;Register&gt; Defs = [EFLAGS];
+  <b>list</b>&lt;Predicate&gt; Predicates = [];
+  <b>int</b> CodeSize = 3;
+  <b>int</b> AddedComplexity = 0;
+  <b>bit</b> isReturn = 0;
+  <b>bit</b> isBranch = 0;
+  <b>bit</b> isIndirectBranch = 0;
+  <b>bit</b> isBarrier = 0;
+  <b>bit</b> isCall = 0;
+  <b>bit</b> canFoldAsLoad = 0;
+  <b>bit</b> mayLoad = 0;
+  <b>bit</b> mayStore = 0;
+  <b>bit</b> isImplicitDef = 0;
+  <b>bit</b> isConvertibleToThreeAddress = 1;
+  <b>bit</b> isCommutable = 1;
+  <b>bit</b> isTerminator = 0;
+  <b>bit</b> isReMaterializable = 0;
+  <b>bit</b> isPredicable = 0;
+  <b>bit</b> hasDelaySlot = 0;
+  <b>bit</b> usesCustomInserter = 0;
+  <b>bit</b> hasCtrlDep = 0;
+  <b>bit</b> isNotDuplicable = 0;
+  <b>bit</b> hasSideEffects = 0;
+  <b>bit</b> neverHasSideEffects = 0;
+  InstrItinClass Itinerary = NoItinerary;
+  <b>string</b> Constraints = "";
+  <b>string</b> DisableEncoding = "";
+  <b>bits</b>&lt;8&gt; Opcode = { 0, 0, 0, 0, 0, 0, 0, 1 };
+  Format Form = MRMDestReg;
+  <b>bits</b>&lt;6&gt; FormBits = { 0, 0, 0, 0, 1, 1 };
+  ImmType ImmT = NoImm;
+  <b>bits</b>&lt;3&gt; ImmTypeBits = { 0, 0, 0 };
+  <b>bit</b> hasOpSizePrefix = 0;
+  <b>bit</b> hasAdSizePrefix = 0;
+  <b>bits</b>&lt;4&gt; Prefix = { 0, 0, 0, 0 };
+  <b>bit</b> hasREX_WPrefix = 0;
+  FPFormat FPForm = ?;
+  <b>bits</b>&lt;3&gt; FPFormBits = { 0, 0, 0 };
+}
+...
+</pre>
+</div>
+
+<p>This definition corresponds to a 32-bit register-register add instruction in
+the X86.  The string after the '<tt>def</tt>' string indicates the name of the
+record&mdash;"<tt>ADD32rr</tt>" in this case&mdash;and the comment at the end of
+the line indicates the superclasses of the definition.  The body of the record
+contains all of the data that TableGen assembled for the record, indicating that
+the instruction is part of the "X86" namespace, the pattern indicating how the
+the instruction should be emitted into the assembly file, that it is a
+two-address instruction, has a particular encoding, etc.  The contents and
+semantics of the information in the record is specific to the needs of the X86
+backend, and is only shown as an example.</p>
+
+<p>As you can see, a lot of information is needed for every instruction
+supported by the code generator, and specifying it all manually would be
+unmaintainable, prone to bugs, and tiring to do in the first place.  Because we
+are using TableGen, all of the information was derived from the following
+definition:</p>
+
+<div class="doc_code">
+<pre>
+let Defs = [EFLAGS],
+    isCommutable = 1,                  <i>// X = ADD Y,Z --&gt; X = ADD Z,Y</i>
+    isConvertibleToThreeAddress = 1 <b>in</b> <i>// Can transform into LEA.</i>
+def ADD32rr  : I&lt;0x01, MRMDestReg, (outs GR32:$dst),
+                                   (ins GR32:$src1, GR32:$src2),
+                 "add{l}\t{$src2, $dst|$dst, $src2}",
+                 [(set GR32:$dst, (add GR32:$src1, GR32:$src2))]&gt;;
+</pre>
+</div>
+
+<p>This definition makes use of the custom class <tt>I</tt> (extended from the
+custom class <tt>X86Inst</tt>), which is defined in the X86-specific TableGen
+file, to factor out the common features that instructions of its class share.  A
+key feature of TableGen is that it allows the end-user to define the
+abstractions they prefer to use when describing their information.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="running">Running TableGen</a></div>
+
+<div class="doc_text">
+
+<p>TableGen runs just like any other LLVM tool.  The first (optional) argument
+specifies the file to read.  If a filename is not specified, <tt>tblgen</tt>
+reads from standard input.</p>
+
+<p>To be useful, one of the <a href="#backends">TableGen backends</a> must be
+used.  These backends are selectable on the command line (type '<tt>tblgen
+-help</tt>' for a list).  For example, to get a list of all of the definitions
+that subclass a particular type (which can be useful for building up an enum
+list of these records), use the <tt>-print-enums</tt> option:</p>
+
+<div class="doc_code">
+<pre>
+$ tblgen X86.td -print-enums -class=Register
+AH, AL, AX, BH, BL, BP, BPL, BX, CH, CL, CX, DH, DI, DIL, DL, DX, EAX, EBP, EBX,
+ECX, EDI, EDX, EFLAGS, EIP, ESI, ESP, FP0, FP1, FP2, FP3, FP4, FP5, FP6, IP,
+MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, R10, R10B, R10D, R10W, R11, R11B, R11D,
+R11W, R12, R12B, R12D, R12W, R13, R13B, R13D, R13W, R14, R14B, R14D, R14W, R15,
+R15B, R15D, R15W, R8, R8B, R8D, R8W, R9, R9B, R9D, R9W, RAX, RBP, RBX, RCX, RDI,
+RDX, RIP, RSI, RSP, SI, SIL, SP, SPL, ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7,
+XMM0, XMM1, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5,
+XMM6, XMM7, XMM8, XMM9,
+
+$ tblgen X86.td -print-enums -class=Instruction 
+ABS_F, ABS_Fp32, ABS_Fp64, ABS_Fp80, ADC32mi, ADC32mi8, ADC32mr, ADC32ri,
+ADC32ri8, ADC32rm, ADC32rr, ADC64mi32, ADC64mi8, ADC64mr, ADC64ri32, ADC64ri8,
+ADC64rm, ADC64rr, ADD16mi, ADD16mi8, ADD16mr, ADD16ri, ADD16ri8, ADD16rm,
+ADD16rr, ADD32mi, ADD32mi8, ADD32mr, ADD32ri, ADD32ri8, ADD32rm, ADD32rr,
+ADD64mi32, ADD64mi8, ADD64mr, ADD64ri32, ...
+</pre>
+</div>
+
+<p>The default backend prints out all of the records, as described <a
+href="#example">above</a>.</p>
+
+<p>If you plan to use TableGen, you will most likely have to <a
+href="#backends">write a backend</a> that extracts the information specific to
+what you need and formats it in the appropriate way.</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="syntax">TableGen syntax</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>TableGen doesn't care about the meaning of data (that is up to the backend to
+define), but it does care about syntax, and it enforces a simple type system.
+This section describes the syntax and the constructs allowed in a TableGen file.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="primitives">TableGen primitives</a></div>
+
+<!-- -------------------------------------------------------------------------->
+<div class="doc_subsubsection"><a name="comments">TableGen comments</a></div>
+
+<div class="doc_text">
+
+<p>TableGen supports BCPL style "<tt>//</tt>" comments, which run to the end of
+the line, and it also supports <b>nestable</b> "<tt>/* */</tt>" comments.</p>
+
+</div>
+
+<!-- -------------------------------------------------------------------------->
+<div class="doc_subsubsection">
+  <a name="types">The TableGen type system</a>
+</div>
+
+<div class="doc_text">
+
+<p>TableGen files are strongly typed, in a simple (but complete) type-system.
+These types are used to perform automatic conversions, check for errors, and to
+help interface designers constrain the input that they allow.  Every <a
+href="#valuedef">value definition</a> is required to have an associated type.
+</p>
+
+<p>TableGen supports a mixture of very low-level types (such as <tt>bit</tt>)
+and very high-level types (such as <tt>dag</tt>).  This flexibility is what
+allows it to describe a wide range of information conveniently and compactly.
+The TableGen types are:</p>
+
+<dl>
+<dt><tt><b>bit</b></tt></dt>
+  <dd>A 'bit' is a boolean value that can hold either 0 or 1.</dd>
+
+<dt><tt><b>int</b></tt></dt>
+  <dd>The 'int' type represents a simple 32-bit integer value, such as 5.</dd>
+
+<dt><tt><b>string</b></tt></dt>
+  <dd>The 'string' type represents an ordered sequence of characters of
+  arbitrary length.</dd>
+
+<dt><tt><b>bits</b>&lt;n&gt;</tt></dt>
+  <dd>A 'bits' type is an arbitrary, but fixed, size integer that is broken up
+  into individual bits.  This type is useful because it can handle some bits
+  being defined while others are undefined.</dd>
+
+<dt><tt><b>list</b>&lt;ty&gt;</tt></dt>
+  <dd>This type represents a list whose elements are some other type.  The
+  contained type is arbitrary: it can even be another list type.</dd>
+
+<dt>Class type</dt>
+  <dd>Specifying a class name in a type context means that the defined value
+  must be a subclass of the specified class.  This is useful in conjunction with
+  the <b><tt>list</tt></b> type, for example, to constrain the elements of the
+  list to a common base class (e.g., a <tt><b>list</b>&lt;Register&gt;</tt> can
+  only contain definitions derived from the "<tt>Register</tt>" class).</dd>
+
+<dt><tt><b>dag</b></tt></dt>
+  <dd>This type represents a nestable directed graph of elements.</dd>
+
+<dt><tt><b>code</b></tt></dt>
+  <dd>This represents a big hunk of text.  This is lexically distinct from 
+  string values because it doesn't require escapeing double quotes and other
+  common characters that occur in code.</dd>
+</dl>
+
+<p>To date, these types have been sufficient for describing things that
+TableGen has been used for, but it is straight-forward to extend this list if
+needed.</p>
+
+</div>
+
+<!-- -------------------------------------------------------------------------->
+<div class="doc_subsubsection">
+  <a name="values">TableGen values and expressions</a>
+</div>
+
+<div class="doc_text">
+
+<p>TableGen allows for a pretty reasonable number of different expression forms
+when building up values.  These forms allow the TableGen file to be written in a
+natural syntax and flavor for the application.  The current expression forms
+supported include:</p>
+
+<dl>
+<dt><tt>?</tt></dt>
+  <dd>uninitialized field</dd>
+<dt><tt>0b1001011</tt></dt>
+  <dd>binary integer value</dd>
+<dt><tt>07654321</tt></dt>
+  <dd>octal integer value (indicated by a leading 0)</dd>
+<dt><tt>7</tt></dt>
+  <dd>decimal integer value</dd>
+<dt><tt>0x7F</tt></dt>
+  <dd>hexadecimal integer value</dd>
+<dt><tt>"foo"</tt></dt>
+  <dd>string value</dd>
+<dt><tt>[{ ... }]</tt></dt>
+  <dd>code fragment</dd>
+<dt><tt>[ X, Y, Z ]&lt;type&gt;</tt></dt>
+  <dd>list value.  &lt;type&gt; is the type of the list 
+element and is usually optional.  In rare cases,
+TableGen is unable to deduce the element type in
+which case the user must specify it explicitly.</dd>
+<dt><tt>{ a, b, c }</tt></dt>
+  <dd>initializer for a "bits&lt;3&gt;" value</dd>
+<dt><tt>value</tt></dt>
+  <dd>value reference</dd>
+<dt><tt>value{17}</tt></dt>
+  <dd>access to one bit of a value</dd>
+<dt><tt>value{15-17}</tt></dt>
+  <dd>access to multiple bits of a value</dd>
+<dt><tt>DEF</tt></dt>
+  <dd>reference to a record definition</dd>
+<dt><tt>CLASS&lt;val list&gt;</tt></dt>
+  <dd>reference to a new anonymous definition of CLASS with the specified
+      template arguments.</dd>
+<dt><tt>X.Y</tt></dt>
+  <dd>reference to the subfield of a value</dd>
+<dt><tt>list[4-7,17,2-3]</tt></dt>
+  <dd>A slice of the 'list' list, including elements 4,5,6,7,17,2, and 3 from
+  it.  Elements may be included multiple times.</dd>
+<dt><tt>(DEF a, b)</tt></dt>
+  <dd>a dag value.  The first element is required to be a record definition, the
+  remaining elements in the list may be arbitrary other values, including nested
+  `<tt>dag</tt>' values.</dd>
+<dt><tt>!strconcat(a, b)</tt></dt>
+  <dd>A string value that is the result of concatenating the 'a' and 'b'
+  strings.</dd>
+<dt><tt>!cast&lt;type&gt;(a)</tt></dt>
+  <dd>A symbol of type <em>type</em> obtained by looking up the string 'a' in
+the symbol table.  If the type of 'a' does not match <em>type</em>, TableGen
+aborts with an error. !cast&lt;string&gt; is a special case in that the argument must
+be an object defined by a 'def' construct.</dd>
+<dt><tt>!subst(a, b, c)</tt></dt>
+  <dd>If 'a' and 'b' are of string type or are symbol references, substitute 
+'b' for 'a' in 'c.'  This operation is analogous to $(subst) in GNU make.</dd>
+<dt><tt>!foreach(a, b, c)</tt></dt>
+  <dd>For each member 'b' of dag or list 'a' apply operator 'c.'  'b' is a 
+dummy variable that should be declared as a member variable of an instantiated 
+class.  This operation is analogous to $(foreach) in GNU make.</dd>
+<dt><tt>!head(a)</tt></dt>
+  <dd>The first element of list 'a.'</dd>
+<dt><tt>!tail(a)</tt></dt>
+  <dd>The 2nd-N elements of list 'a.'</dd>
+<dt><tt>!empty(a)</tt></dt>
+  <dd>An integer {0,1} indicating whether list 'a' is empty.</dd>
+<dt><tt>!if(a,b,c)</tt></dt>
+  <dd>'b' if the result of 'int' or 'bit' operator 'a' is nonzero,
+      'c' otherwise.</dd>
+<dt><tt>!eq(a,b)</tt></dt>
+  <dd>'bit 1' if string a is equal to string b, 0 otherwise.  This
+      only operates on string, int and bit objects.  Use !cast&lt;string&gt; to
+      compare other types of objects.</dd>
+</dl>
+
+<p>Note that all of the values have rules specifying how they convert to values
+for different types.  These rules allow you to assign a value like "<tt>7</tt>"
+to a "<tt>bits&lt;4&gt;</tt>" value, for example.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="classesdefs">Classes and definitions</a>
+</div>
+
+<div class="doc_text">
+
+<p>As mentioned in the <a href="#concepts">intro</a>, classes and definitions
+(collectively known as 'records') in TableGen are the main high-level unit of
+information that TableGen collects.  Records are defined with a <tt>def</tt> or
+<tt>class</tt> keyword, the record name, and an optional list of "<a
+href="#templateargs">template arguments</a>".  If the record has superclasses,
+they are specified as a comma separated list that starts with a colon character
+("<tt>:</tt>").  If <a href="#valuedef">value definitions</a> or <a
+href="#recordlet">let expressions</a> are needed for the class, they are
+enclosed in curly braces ("<tt>{}</tt>"); otherwise, the record ends with a
+semicolon.</p>
+
+<p>Here is a simple TableGen file:</p>
+
+<div class="doc_code">
+<pre>
+<b>class</b> C { <b>bit</b> V = 1; }
+<b>def</b> X : C;
+<b>def</b> Y : C {
+  <b>string</b> Greeting = "hello";
+}
+</pre>
+</div>
+
+<p>This example defines two definitions, <tt>X</tt> and <tt>Y</tt>, both of
+which derive from the <tt>C</tt> class.  Because of this, they both get the
+<tt>V</tt> bit value.  The <tt>Y</tt> definition also gets the Greeting member
+as well.</p>
+
+<p>In general, classes are useful for collecting together the commonality
+between a group of records and isolating it in a single place.  Also, classes
+permit the specification of default values for their subclasses, allowing the
+subclasses to override them as they wish.</p>
+
+</div>
+
+<!---------------------------------------------------------------------------->
+<div class="doc_subsubsection">
+  <a name="valuedef">Value definitions</a>
+</div>
+
+<div class="doc_text">
+
+<p>Value definitions define named entries in records.  A value must be defined
+before it can be referred to as the operand for another value definition or
+before the value is reset with a <a href="#recordlet">let expression</a>.  A
+value is defined by specifying a <a href="#types">TableGen type</a> and a name.
+If an initial value is available, it may be specified after the type with an
+equal sign.  Value definitions require terminating semicolons.</p>
+
+</div>
+
+<!-- -------------------------------------------------------------------------->
+<div class="doc_subsubsection">
+  <a name="recordlet">'let' expressions</a>
+</div>
+
+<div class="doc_text">
+
+<p>A record-level let expression is used to change the value of a value
+definition in a record.  This is primarily useful when a superclass defines a
+value that a derived class or definition wants to override.  Let expressions
+consist of the '<tt>let</tt>' keyword followed by a value name, an equal sign
+("<tt>=</tt>"), and a new value.  For example, a new class could be added to the
+example above, redefining the <tt>V</tt> field for all of its subclasses:</p>
+
+<div class="doc_code">
+<pre>
+<b>class</b> D : C { let V = 0; }
+<b>def</b> Z : D;
+</pre>
+</div>
+
+<p>In this case, the <tt>Z</tt> definition will have a zero value for its "V"
+value, despite the fact that it derives (indirectly) from the <tt>C</tt> class,
+because the <tt>D</tt> class overrode its value.</p>
+
+</div>
+
+<!-- -------------------------------------------------------------------------->
+<div class="doc_subsubsection">
+  <a name="templateargs">Class template arguments</a>
+</div>
+
+<div class="doc_text">
+
+<p>TableGen permits the definition of parameterized classes as well as normal
+concrete classes.  Parameterized TableGen classes specify a list of variable
+bindings (which may optionally have defaults) that are bound when used.  Here is
+a simple example:</p>
+
+<div class="doc_code">
+<pre>
+<b>class</b> FPFormat&lt;<b>bits</b>&lt;3&gt; val&gt; {
+  <b>bits</b>&lt;3&gt; Value = val;
+}
+<b>def</b> NotFP      : FPFormat&lt;0&gt;;
+<b>def</b> ZeroArgFP  : FPFormat&lt;1&gt;;
+<b>def</b> OneArgFP   : FPFormat&lt;2&gt;;
+<b>def</b> OneArgFPRW : FPFormat&lt;3&gt;;
+<b>def</b> TwoArgFP   : FPFormat&lt;4&gt;;
+<b>def</b> CompareFP  : FPFormat&lt;5&gt;;
+<b>def</b> CondMovFP  : FPFormat&lt;6&gt;;
+<b>def</b> SpecialFP  : FPFormat&lt;7&gt;;
+</pre>
+</div>
+
+<p>In this case, template arguments are used as a space efficient way to specify
+a list of "enumeration values", each with a "<tt>Value</tt>" field set to the
+specified integer.</p>
+
+<p>The more esoteric forms of <a href="#values">TableGen expressions</a> are
+useful in conjunction with template arguments.  As an example:</p>
+
+<div class="doc_code">
+<pre>
+<b>class</b> ModRefVal&lt;<b>bits</b>&lt;2&gt; val&gt; {
+  <b>bits</b>&lt;2&gt; Value = val;
+}
+
+<b>def</b> None   : ModRefVal&lt;0&gt;;
+<b>def</b> Mod    : ModRefVal&lt;1&gt;;
+<b>def</b> Ref    : ModRefVal&lt;2&gt;;
+<b>def</b> ModRef : ModRefVal&lt;3&gt;;
+
+<b>class</b> Value&lt;ModRefVal MR&gt; {
+  <i>// Decode some information into a more convenient format, while providing
+  // a nice interface to the user of the "Value" class.</i>
+  <b>bit</b> isMod = MR.Value{0};
+  <b>bit</b> isRef = MR.Value{1};
+
+  <i>// other stuff...</i>
+}
+
+<i>// Example uses</i>
+<b>def</b> bork : Value&lt;Mod&gt;;
+<b>def</b> zork : Value&lt;Ref&gt;;
+<b>def</b> hork : Value&lt;ModRef&gt;;
+</pre>
+</div>
+
+<p>This is obviously a contrived example, but it shows how template arguments
+can be used to decouple the interface provided to the user of the class from the
+actual internal data representation expected by the class.  In this case,
+running <tt>tblgen</tt> on the example prints the following definitions:</p>
+
+<div class="doc_code">
+<pre>
+<b>def</b> bork {      <i>// Value</i>
+  <b>bit</b> isMod = 1;
+  <b>bit</b> isRef = 0;
+}
+<b>def</b> hork {      <i>// Value</i>
+  <b>bit</b> isMod = 1;
+  <b>bit</b> isRef = 1;
+}
+<b>def</b> zork {      <i>// Value</i>
+  <b>bit</b> isMod = 0;
+  <b>bit</b> isRef = 1;
+}
+</pre>
+</div>
+
+<p> This shows that TableGen was able to dig into the argument and extract a
+piece of information that was requested by the designer of the "Value" class.
+For more realistic examples, please see existing users of TableGen, such as the
+X86 backend.</p>
+
+</div>
+
+<!-- -------------------------------------------------------------------------->
+<div class="doc_subsubsection">
+  <a name="multiclass">Multiclass definitions and instances</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+While classes with template arguments are a good way to factor commonality
+between two instances of a definition, multiclasses allow a convenient notation
+for defining multiple definitions at once (instances of implicitly constructed
+classes).  For example, consider an 3-address instruction set whose instructions
+come in two forms: "<tt>reg = reg op reg</tt>" and "<tt>reg = reg op imm</tt>"
+(e.g. SPARC). In this case, you'd like to specify in one place that this
+commonality exists, then in a separate place indicate what all the ops are.
+</p>
+
+<p>
+Here is an example TableGen fragment that shows this idea:
+</p>
+
+<div class="doc_code">
+<pre>
+<b>def</b> ops;
+<b>def</b> GPR;
+<b>def</b> Imm;
+<b>class</b> inst&lt;<b>int</b> opc, <b>string</b> asmstr, <b>dag</b> operandlist&gt;;
+
+<b>multiclass</b> ri_inst&lt;<b>int</b> opc, <b>string</b> asmstr&gt; {
+  def _rr : inst&lt;opc, !strconcat(asmstr, " $dst, $src1, $src2"),
+                 (ops GPR:$dst, GPR:$src1, GPR:$src2)&gt;;
+  def _ri : inst&lt;opc, !strconcat(asmstr, " $dst, $src1, $src2"),
+                 (ops GPR:$dst, GPR:$src1, Imm:$src2)&gt;;
+}
+
+<i>// Instantiations of the ri_inst multiclass.</i>
+<b>defm</b> ADD : ri_inst&lt;0b111, "add"&gt;;
+<b>defm</b> SUB : ri_inst&lt;0b101, "sub"&gt;;
+<b>defm</b> MUL : ri_inst&lt;0b100, "mul"&gt;;
+...
+</pre>
+</div>
+
+<p>The name of the resultant definitions has the multidef fragment names
+   appended to them, so this defines <tt>ADD_rr</tt>, <tt>ADD_ri</tt>,
+   <tt>SUB_rr</tt>, etc.  A defm may inherit from multiple multiclasses,
+   instantiating definitions from each multiclass.  Using a multiclass
+   this way is exactly equivalent to instantiating the classes multiple
+   times yourself, e.g. by writing:</p>
+
+<div class="doc_code">
+<pre>
+<b>def</b> ops;
+<b>def</b> GPR;
+<b>def</b> Imm;
+<b>class</b> inst&lt;<b>int</b> opc, <b>string</b> asmstr, <b>dag</b> operandlist&gt;;
+
+<b>class</b> rrinst&lt;<b>int</b> opc, <b>string</b> asmstr&gt;
+  : inst&lt;opc, !strconcat(asmstr, " $dst, $src1, $src2"),
+         (ops GPR:$dst, GPR:$src1, GPR:$src2)&gt;;
+
+<b>class</b> riinst&lt;<b>int</b> opc, <b>string</b> asmstr&gt;
+  : inst&lt;opc, !strconcat(asmstr, " $dst, $src1, $src2"),
+         (ops GPR:$dst, GPR:$src1, Imm:$src2)&gt;;
+
+<i>// Instantiations of the ri_inst multiclass.</i>
+<b>def</b> ADD_rr : rrinst&lt;0b111, "add"&gt;;
+<b>def</b> ADD_ri : riinst&lt;0b111, "add"&gt;;
+<b>def</b> SUB_rr : rrinst&lt;0b101, "sub"&gt;;
+<b>def</b> SUB_ri : riinst&lt;0b101, "sub"&gt;;
+<b>def</b> MUL_rr : rrinst&lt;0b100, "mul"&gt;;
+<b>def</b> MUL_ri : riinst&lt;0b100, "mul"&gt;;
+...
+</pre>
+</div>
+
+<p>
+A defm can also be used inside a multiclass providing several levels of
+multiclass instanciations.
+</p>
+
+<div class="doc_code">
+<pre>
+<b>class</b> Instruction&lt;bits&lt;4&gt; opc, string Name&gt; {
+  bits&lt;4&gt; opcode = opc;
+  string name = Name;
+}
+
+<b>multiclass</b> basic_r&lt;bits&lt;4&gt; opc&gt; {
+  <b>def</b> rr : Instruction&lt;opc, "rr"&gt;;
+  <b>def</b> rm : Instruction&lt;opc, "rm"&gt;;
+}
+
+<b>multiclass</b> basic_s&lt;bits&lt;4&gt; opc&gt; {
+  <b>defm</b> SS : basic_r&lt;opc&gt;;
+  <b>defm</b> SD : basic_r&lt;opc&gt;;
+  <b>def</b> X : Instruction&lt;opc, "x"&gt;;
+}
+
+<b>multiclass</b> basic_p&lt;bits&lt;4&gt; opc&gt; {
+  <b>defm</b> PS : basic_r&lt;opc&gt;;
+  <b>defm</b> PD : basic_r&lt;opc&gt;;
+  <b>def</b> Y : Instruction&lt;opc, "y"&gt;;
+}
+
+<b>defm</b> ADD : basic_s&lt;0xf&gt;, basic_p&lt;0xf&gt;;
+...
+
+<i>// Results</i>
+<b>def</b> ADDPDrm { ...
+<b>def</b> ADDPDrr { ...
+<b>def</b> ADDPSrm { ...
+<b>def</b> ADDPSrr { ...
+<b>def</b> ADDSDrm { ...
+<b>def</b> ADDSDrr { ...
+<b>def</b> ADDY { ...
+<b>def</b> ADDX { ...
+</pre>
+</div>
+
+<p>
+defm declarations can inherit from classes too, the
+rule to follow is that the class list must start after the
+last multiclass, and there must be at least one multiclass
+before them.
+</p>
+
+<div class="doc_code">
+<pre>
+<b>class</b> XD { bits&lt;4&gt; Prefix = 11; }
+<b>class</b> XS { bits&lt;4&gt; Prefix = 12; }
+
+<b>class</b> I&lt;bits<4&gt; op> {
+  bits&lt;4&gt; opcode = op;
+}
+
+<b>multiclass</b> R {
+  <b>def</b> rr : I&lt;4&gt;;
+  <b>def</b> rm : I&lt;2&gt;;
+}
+
+<b>multiclass</b> Y {
+  <b>defm</b> SS : R, XD;
+  <b>defm</b> SD : R, XS;
+}
+
+<b>defm</b> Instr : Y;
+
+<i>// Results</i>
+<b>def</b> InstrSDrm {
+  bits&lt;4&gt; opcode = { 0, 0, 1, 0 };
+  bits&lt;4&gt; Prefix = { 1, 1, 0, 0 };
+}
+...
+<b>def</b> InstrSSrr {
+  bits&lt;4&gt; opcode = { 0, 1, 0, 0 };
+  bits&lt;4&gt; Prefix = { 1, 0, 1, 1 };
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="filescope">File scope entities</a>
+</div>
+
+<!-- -------------------------------------------------------------------------->
+<div class="doc_subsubsection">
+  <a name="include">File inclusion</a>
+</div>
+
+<div class="doc_text">
+<p>TableGen supports the '<tt>include</tt>' token, which textually substitutes
+the specified file in place of the include directive.  The filename should be
+specified as a double quoted string immediately after the '<tt>include</tt>'
+keyword.  Example:</p>
+
+<div class="doc_code">
+<pre>
+<b>include</b> "foo.td"
+</pre>
+</div>
+
+</div>
+
+<!-- -------------------------------------------------------------------------->
+<div class="doc_subsubsection">
+  <a name="globallet">'let' expressions</a>
+</div>
+
+<div class="doc_text">
+
+<p>"Let" expressions at file scope are similar to <a href="#recordlet">"let"
+expressions within a record</a>, except they can specify a value binding for
+multiple records at a time, and may be useful in certain other cases.
+File-scope let expressions are really just another way that TableGen allows the
+end-user to factor out commonality from the records.</p>
+
+<p>File-scope "let" expressions take a comma-separated list of bindings to
+apply, and one or more records to bind the values in.  Here are some
+examples:</p>
+
+<div class="doc_code">
+<pre>
+<b>let</b> isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 <b>in</b>
+  <b>def</b> RET : I&lt;0xC3, RawFrm, (outs), (ins), "ret", [(X86retflag 0)]&gt;;
+
+<b>let</b> isCall = 1 <b>in</b>
+  <i>// All calls clobber the non-callee saved registers...</i>
+  <b>let</b> Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, EFLAGS] <b>in</b> {
+    <b>def</b> CALLpcrel32 : Ii32&lt;0xE8, RawFrm, (outs), (ins i32imm:$dst,variable_ops),
+                           "call\t${dst:call}", []&gt;;
+    <b>def</b> CALL32r     : I&lt;0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
+                        "call\t{*}$dst", [(X86call GR32:$dst)]&gt;;
+    <b>def</b> CALL32m     : I&lt;0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
+                        "call\t{*}$dst", []&gt;;
+  }
+</pre>
+</div>
+
+<p>File-scope "let" expressions are often useful when a couple of definitions
+need to be added to several records, and the records do not otherwise need to be
+opened, as in the case with the <tt>CALL*</tt> instructions above.</p>
+
+<p>It's also possible to use "let" expressions inside multiclasses, providing
+more ways to factor out commonality from the records, specially if using
+several levels of multiclass instanciations. This also avoids the need of using
+"let" expressions within subsequent records inside a multiclass.</p> 
+
+<pre class="doc_code">
+<b>multiclass </b>basic_r&lt;bits&lt;4&gt; opc&gt; {
+  <b>let </b>Predicates = [HasSSE2] in {
+    <b>def </b>rr : Instruction&lt;opc, "rr"&gt;;
+    <b>def </b>rm : Instruction&lt;opc, "rm"&gt;;
+  }
+  <b>let </b>Predicates = [HasSSE3] in
+    <b>def </b>rx : Instruction&lt;opc, "rx"&gt;;
+}
+
+<b>multiclass </b>basic_ss&lt;bits&lt;4&gt; opc&gt; {
+  <b>let </b>IsDouble = 0 in
+    <b>defm </b>SS : basic_r&lt;opc&gt;;
+
+  <b>let </b>IsDouble = 1 in
+    <b>defm </b>SD : basic_r&lt;opc&gt;;
+}
+
+<b>defm </b>ADD : basic_ss&lt;0xf&gt;;
+</pre>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="codegen">Code Generator backend info</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Expressions used by code generator to describe instructions and isel
+patterns:</p>
+
+<dl>
+<dt><tt>(implicit a)</tt></dt>
+  <dd>an implicitly defined physical register.  This tells the dag instruction
+  selection emitter the input pattern's extra definitions matches implicit
+  physical register definitions.</dd>
+</dl>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="backends">TableGen backends</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>TODO: How they work, how to write one.  This section should not contain
+details about any particular backend, except maybe -print-enums as an example.
+This should highlight the APIs in <tt>TableGen/Record.h</tt>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/TestingGuide.html b/final/docs/TestingGuide.html
new file mode 100644
index 00000000000..b048b72485a
--- /dev/null
+++ b/final/docs/TestingGuide.html
@@ -0,0 +1,1196 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>LLVM Testing Infrastructure Guide</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+      
+<div class="doc_title">
+  LLVM Testing Infrastructure Guide
+</div>
+
+<ol>
+  <li><a href="#overview">Overview</a></li>
+  <li><a href="#requirements">Requirements</a></li>
+  <li><a href="#org">LLVM testing infrastructure organization</a>
+    <ul>
+      <li><a href="#regressiontests">Regression tests</a></li>
+      <li><a href="#testsuite">Test suite</a></li>
+      <li><a href="#debuginfotests">Debugging Information tests</a></li>
+    </ul>
+  </li>
+  <li><a href="#quick">Quick start</a>
+    <ul>
+      <li><a href="#quickregressiontests">Regression tests</a></li>
+      <li><a href="#quicktestsuite">Test suite</a></li>
+      <li><a href="#quickdebuginfotests">Debugging Information tests</a></li>
+   </ul>
+  </li>
+  <li><a href="#rtstructure">Regression test structure</a>
+    <ul>
+      <li><a href="#rtcustom">Writing new regression tests</a></li>
+      <li><a href="#FileCheck">The FileCheck utility</a></li>
+      <li><a href="#rtvars">Variables and substitutions</a></li>
+      <li><a href="#rtfeatures">Other features</a></li>
+   </ul>
+  </li>
+  <li><a href="#testsuitestructure">Test suite structure</a></li>
+  <li><a href="#testsuiterun">Running the test suite</a>
+    <ul>
+      <li><a href="#testsuiteexternal">Configuring External Tests</a></li>
+      <li><a href="#testsuitetests">Running different tests</a></li>
+      <li><a href="#testsuiteoutput">Generating test output</a></li>
+      <li><a href="#testsuitecustom">Writing custom tests for test-suite</a></li>
+   </ul>
+  </li>
+</ol>
+
+<div class="doc_author">
+  <p>Written by John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="overview">Overview</a></div>
+<!--=========================================================================-->
+
+<div class="doc_text">
+
+<p>This document is the reference manual for the LLVM testing infrastructure. It
+documents the structure of the LLVM testing infrastructure, the tools needed to
+use it, and how to add and run tests.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="requirements">Requirements</a></div>
+<!--=========================================================================-->
+
+<div class="doc_text">
+
+<p>In order to use the LLVM testing infrastructure, you will need all of the
+software required to build LLVM, as well
+as <a href="http://python.org">Python</a> 2.4 or later.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="org">LLVM testing infrastructure organization</a></div>
+<!--=========================================================================-->
+
+<div class="doc_text">
+
+<p>The LLVM testing infrastructure contains two major categories of tests:
+regression tests and whole programs. The regression tests are contained inside
+the LLVM repository itself under <tt>llvm/test</tt> and are expected to always
+pass -- they should be run before every commit. The whole programs tests are
+referred to as the "LLVM test suite" and are in the <tt>test-suite</tt> module
+in subversion.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="regressiontests">Regression tests</a></div>
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_text">
+
+<p>The regression tests are small pieces of code that test a specific feature of
+LLVM or trigger a specific bug in LLVM.  They are usually written in LLVM
+assembly language, but can be written in other languages if the test targets a
+particular language front end (and the appropriate <tt>--with-llvmgcc</tt>
+options were used at <tt>configure</tt> time of the <tt>llvm</tt> module). These
+tests are driven by the 'lit' testing tool, which is part of LLVM.</p>
+
+<p>These code fragments are not complete programs. The code generated
+from them is never executed to determine correct behavior.</p>
+
+<p>These code fragment tests are located in the <tt>llvm/test</tt>
+directory.</p>
+
+<p>Typically when a bug is found in LLVM, a regression test containing 
+just enough code to reproduce the problem should be written and placed 
+somewhere underneath this directory.  In most cases, this will be a small 
+piece of LLVM assembly language code, often distilled from an actual 
+application or benchmark.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="testsuite">Test suite</a></div>
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_text">
+
+<p>The test suite contains whole programs, which are pieces of
+code which can be compiled and linked into a stand-alone program that can be
+executed.  These programs are generally written in high level languages such as
+C or C++, but sometimes they are written straight in LLVM assembly.</p>
+
+<p>These programs are compiled and then executed using several different
+methods (native compiler, LLVM C backend, LLVM JIT, LLVM native code generation,
+etc).  The output of these programs is compared to ensure that LLVM is compiling
+the program correctly.</p>
+
+<p>In addition to compiling and executing programs, whole program tests serve as
+a way of benchmarking LLVM performance, both in terms of the efficiency of the
+programs generated as well as the speed with which LLVM compiles, optimizes, and
+generates code.</p>
+
+<p>The test-suite is located in the <tt>test-suite</tt> Subversion module.</p> 
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="debuginfotests">Debugging Information 
+tests</a></div>
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_text">
+
+<p>The test suite contains tests to check quality of debugging information.
+The test are written in C based languages or in LLVM assembly language. </p>
+
+<p>These tests are compiled and run under a debugger. The debugger output
+is checked to validate of debugging information. See README.txt in the 
+test suite for more information . This test suite is located in the 
+<tt>debuginfo-tests</tt> Subversion module. </p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="quick">Quick start</a></div>
+<!--=========================================================================-->
+
+<div class="doc_text">
+
+  <p>The tests are located in two separate Subversion modules. The regressions
+  tests are in the main "llvm" module under the directory
+  <tt>llvm/test</tt> (so you get these tests for free with the main llvm tree).
+  The more comprehensive test suite that includes whole 
+programs in C and C++ is in the <tt>test-suite</tt> module. This module should
+be checked out to the <tt>llvm/projects</tt> directory (don't use another name
+than the default "test-suite", for then the test suite will be run every time
+you run <tt>make</tt> in the main <tt>llvm</tt> directory).
+When you <tt>configure</tt> the <tt>llvm</tt> module, 
+the <tt>test-suite</tt> directory will be automatically configured. 
+Alternatively, you can configure the <tt>test-suite</tt> module manually.</p>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="quickregressiontests">Regression tests</a></div>
+<!-- _______________________________________________________________________ -->
+<p>To run all of the LLVM regression tests, use master Makefile in
+ the <tt>llvm/test</tt> directory:</p>
+
+<div class="doc_code">
+<pre>
+% gmake -C llvm/test
+</pre>
+</div>
+
+<p>or</p>
+
+<div class="doc_code">
+<pre>
+% gmake check
+</pre>
+</div>
+
+<p>If you have <a href="http://clang.llvm.org">Clang</a> checked out and built,
+you can run the LLVM and Clang tests simultaneously using:</p>
+
+<p>or</p>
+
+<div class="doc_code">
+<pre>
+% gmake check-all
+</pre>
+</div>
+
+<p>To run the tests with Valgrind (Memcheck by default), just append
+<tt>VG=1</tt> to the commands above, e.g.:</p>
+
+<div class="doc_code">
+<pre>
+% gmake check VG=1
+</pre>
+</div>
+
+<p>To run individual tests or subsets of tests, you can use the 'llvm-lit'
+script which is built as part of LLVM. For example, to run the
+'Integer/BitCast.ll' test by itself you can run:</p>
+
+<div class="doc_code">
+<pre>
+% llvm-lit ~/llvm/test/Integer/BitCast.ll 
+</pre>
+</div>
+
+<p>or to run all of the ARM CodeGen tests:</p>
+
+<div class="doc_code">
+<pre>
+% llvm-lit ~/llvm/test/CodeGen/ARM
+</pre>
+</div>
+
+<p>For more information on using the 'lit' tool, see 'llvm-lit --help' or the
+'lit' man page.</p>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="quicktestsuite">Test suite</a></div>
+<!-- _______________________________________________________________________ -->
+
+<p>To run the comprehensive test suite (tests that compile and execute whole 
+programs), first checkout and setup the <tt>test-suite</tt> module:</p>
+
+<div class="doc_code">
+<pre>
+% cd llvm/projects
+% svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
+% cd ..
+% ./configure --with-llvmgccdir=$LLVM_GCC_DIR
+</pre>
+</div>
+
+<p>where <tt>$LLVM_GCC_DIR</tt> is the directory where
+you <em>installed</em> llvm-gcc, not its src or obj
+dir. The <tt>--with-llvmgccdir</tt> option assumes that
+the <tt>llvm-gcc-4.2</tt> module was configured with
+<tt>--program-prefix=llvm-</tt>, and therefore that the C and C++
+compiler drivers are called <tt>llvm-gcc</tt> and <tt>llvm-g++</tt>
+respectively.  If this is not the case,
+use <tt>--with-llvmgcc</tt>/<tt>--with-llvmgxx</tt> to specify each
+executable's location.</p>
+
+<p>Then, run the entire test suite by running make in the <tt>test-suite</tt>
+directory:</p>
+
+<div class="doc_code">
+<pre>
+% cd projects/test-suite
+% gmake
+</pre>
+</div>
+
+<p>Usually, running the "nightly" set of tests is a good idea, and you can also
+let it generate a report by running:</p>
+
+<div class="doc_code">
+<pre>
+% cd projects/test-suite
+% gmake TEST=nightly report report.html
+</pre>
+</div>
+
+<p>Any of the above commands can also be run in a subdirectory of
+<tt>projects/test-suite</tt> to run the specified test only on the programs in
+that subdirectory.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="quickdebuginfotests">Debugging Information 
+tests</a></div>
+<!-- _______________________________________________________________________ -->
+
+<p> To run debugging information tests simply checkout the tests inside
+clang/test directory. </p>
+
+<div class="doc_code">
+<pre>
+%cd clang/test
+% svn co http://llvm.org/svn/llvm-project/debuginfo-tests/trunk debuginfo-tests
+</pre>
+</div>
+
+<p> These tests are already set up to run as part of clang regression tests.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="rtstructure">Regression test structure</a></div>
+<!--=========================================================================-->
+<div class="doc_text">
+  <p>The LLVM regression tests are driven by 'lit' and are located in
+  the <tt>llvm/test</tt> directory.
+
+  <p>This directory contains a large array of small tests
+  that exercise various features of LLVM and to ensure that regressions do not
+  occur. The directory is broken into several sub-directories, each focused on
+  a particular area of LLVM. A few of the important ones are:</p>
+
+  <ul>
+    <li><tt>Analysis</tt>: checks Analysis passes.</li>
+    <li><tt>Archive</tt>: checks the Archive library.</li>
+    <li><tt>Assembler</tt>: checks Assembly reader/writer functionality.</li>
+    <li><tt>Bitcode</tt>: checks Bitcode reader/writer functionality.</li>
+    <li><tt>CodeGen</tt>: checks code generation and each target.</li>
+    <li><tt>Features</tt>: checks various features of the LLVM language.</li>
+    <li><tt>Linker</tt>: tests bitcode linking.</li>
+    <li><tt>Transforms</tt>: tests each of the scalar, IPO, and utility
+    transforms to ensure they make the right transformations.</li>
+    <li><tt>Verifier</tt>: tests the IR verifier.</li>
+  </ul>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="rtcustom">Writing new regression tests</a></div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_text">
+  <p>The regression test structure is very simple, but does require some
+  information to be set. This information is gathered via <tt>configure</tt> and
+  is written to a file, <tt>lit.site.cfg</tt>
+  in <tt>llvm/test</tt>. The <tt>llvm/test</tt> Makefile does this work for
+  you.</p>
+
+  <p>In order for the regression tests to work, each directory of tests must
+  have a <tt>dg.exp</tt> file. Lit looks for this file to determine how to
+  run the tests. This file is just a Tcl script and it can do anything you want,
+  but we've standardized it for the LLVM regression tests. If you're adding a
+  directory of tests, just copy <tt>dg.exp</tt> from another directory to get
+  running. The standard <tt>dg.exp</tt> simply loads a Tcl library
+  (<tt>test/lib/llvm.exp</tt>) and calls the <tt>llvm_runtests</tt> function
+  defined in that library with a list of file names to run. The names are
+  obtained by using Tcl's glob command.  Any directory that contains only
+  directories does not need the <tt>dg.exp</tt> file.</p>
+
+  <p>The <tt>llvm-runtests</tt> function lookas at each file that is passed to
+  it and gathers any lines together that match "RUN:". This are the "RUN" lines
+  that specify how the test is to be run. So, each test script must contain
+  RUN lines if it is to do anything. If there are no RUN lines, the
+  <tt>llvm-runtests</tt> function will issue an error and the test will
+  fail.</p>
+
+  <p>RUN lines are specified in the comments of the test program using the 
+  keyword <tt>RUN</tt> followed by a colon, and lastly the command (pipeline) 
+  to execute.  Together, these lines form the "script" that 
+  <tt>llvm-runtests</tt> executes to run the test case.  The syntax of the
+  RUN lines is similar to a shell's syntax for pipelines including I/O
+  redirection and variable substitution.  However, even though these lines 
+  may <i>look</i> like a shell script, they are not. RUN lines are interpreted 
+  directly by the Tcl <tt>exec</tt> command. They are never executed by a 
+  shell. Consequently the syntax differs from normal shell script syntax in a 
+  few ways.  You can specify as many RUN lines as needed.</p>
+
+  <p>lit performs substitution on each RUN line to replace LLVM tool
+  names with the full paths to the executable built for each tool (in
+  $(LLVM_OBJ_ROOT)/$(BuildMode)/bin).  This ensures that lit does not
+  invoke any stray LLVM tools in the user's path during testing.</p>
+
+  <p>Each RUN line is executed on its own, distinct from other lines unless
+  its last character is <tt>\</tt>. This continuation character causes the RUN
+  line to be concatenated with the next one. In this way you can build up long
+  pipelines of commands without making huge line lengths. The lines ending in
+  <tt>\</tt> are concatenated until a RUN line that doesn't end in <tt>\</tt> is
+  found. This concatenated set of RUN lines then constitutes one execution. 
+  Tcl will substitute variables and arrange for the pipeline to be executed. If
+  any process in the pipeline fails, the entire line (and test case) fails too.
+  </p>
+
+  <p> Below is an example of legal RUN lines in a <tt>.ll</tt> file:</p>
+
+<div class="doc_code">
+<pre>
+; RUN: llvm-as &lt; %s | llvm-dis &gt; %t1
+; RUN: llvm-dis &lt; %s.bc-13 &gt; %t2
+; RUN: diff %t1 %t2
+</pre>
+</div>
+
+  <p>As with a Unix shell, the RUN: lines permit pipelines and I/O redirection
+  to be used. However, the usage is slightly different than for Bash. To check
+  what's legal, see the documentation for the 
+  <a href="http://www.tcl.tk/man/tcl8.5/TclCmd/exec.htm#M2">Tcl exec</a>
+  command and the 
+  <a href="http://www.tcl.tk/man/tcl8.5/tutorial/Tcl26.html">tutorial</a>. 
+  The major differences are:</p>
+  <ul>
+    <li>You can't do <tt>2&gt;&amp;1</tt>. That will cause Tcl to write to a
+    file named <tt>&amp;1</tt>. Usually this is done to get stderr to go through
+    a pipe. You can do that in tcl with <tt>|&amp;</tt> so replace this idiom:
+    <tt>... 2&gt;&amp;1 | grep</tt> with <tt>... |&amp; grep</tt></li>
+    <li>You can only redirect to a file, not to another descriptor and not from
+    a here document.</li>
+    <li>tcl supports redirecting to open files with the @ syntax but you
+    shouldn't use that here.</li>
+  </ul>
+
+  <p>There are some quoting rules that you must pay attention to when writing
+  your RUN lines. In general nothing needs to be quoted. Tcl won't strip off any
+  quote characters so they will get passed to the invoked program. For
+  example:</p>
+
+<div class="doc_code">
+<pre>
+... | grep 'find this string'
+</pre>
+</div>
+
+  <p>This will fail because the ' characters are passed to grep. This would
+  instruction grep to look for <tt>'find</tt> in the files <tt>this</tt> and
+  <tt>string'</tt>. To avoid this use curly braces to tell Tcl that it should
+  treat everything enclosed as one value. So our example would become:</p>
+
+<div class="doc_code">
+<pre>
+... | grep {find this string}
+</pre>
+</div>
+
+  <p>Additionally, the characters <tt>[</tt> and <tt>]</tt> are treated 
+  specially by Tcl. They tell Tcl to interpret the content as a command to
+  execute. Since these characters are often used in regular expressions this can
+  have disastrous results and cause the entire test run in a directory to fail.
+  For example, a common idiom is to look for some basicblock number:</p>
+
+<div class="doc_code">
+<pre>
+... | grep bb[2-8]
+</pre>
+</div>
+
+  <p>This, however, will cause Tcl to fail because its going to try to execute
+  a program named "2-8". Instead, what you want is this:</p>
+
+<div class="doc_code">
+<pre>
+... | grep {bb\[2-8\]}
+</pre>
+</div>
+
+  <p>Finally, if you need to pass the <tt>\</tt> character down to a program,
+  then it must be doubled. This is another Tcl special character. So, suppose
+  you had:
+
+<div class="doc_code">
+<pre>
+... | grep 'i32\*'
+</pre>
+</div>
+
+  <p>This will fail to match what you want (a pointer to i32). First, the
+  <tt>'</tt> do not get stripped off. Second, the <tt>\</tt> gets stripped off
+  by Tcl so what grep sees is: <tt>'i32*'</tt>. That's not likely to match
+  anything. To resolve this you must use <tt>\\</tt> and the <tt>{}</tt>, like
+  this:</p>
+
+<div class="doc_code">
+<pre>
+... | grep {i32\\*}
+</pre>
+</div>
+
+<p>If your system includes GNU <tt>grep</tt>, make sure
+that <tt>GREP_OPTIONS</tt> is not set in your environment. Otherwise,
+you may get invalid results (both false positives and false
+negatives).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="FileCheck">The FileCheck utility</a></div>
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_text">
+
+<p>A powerful feature of the RUN: lines is that it allows any arbitrary commands
+   to be executed as part of the test harness.  While standard (portable) unix
+   tools like 'grep' work fine on run lines, as you see above, there are a lot
+   of caveats due to interaction with Tcl syntax, and we want to make sure the
+   run lines are portable to a wide range of systems.  Another major problem is
+   that grep is not very good at checking to verify that the output of a tools
+   contains a series of different output in a specific order.  The FileCheck
+   tool was designed to help with these problems.</p>
+
+<p>FileCheck (whose basic command line arguments are described in <a
+   href="http://llvm.org/cmds/FileCheck.html">the FileCheck man page</a> is
+   designed to read a file to check from standard input, and the set of things
+   to verify from a file specified as a command line argument.  A simple example
+   of using FileCheck from a RUN line looks like this:</p>
+   
+<div class="doc_code">
+<pre>
+; RUN: llvm-as &lt; %s | llc -march=x86-64 | <b>FileCheck %s</b>
+</pre>
+</div>
+
+<p>This syntax says to pipe the current file ("%s") into llvm-as, pipe that into
+llc, then pipe the output of llc into FileCheck.  This means that FileCheck will
+be verifying its standard input (the llc output) against the filename argument
+specified (the original .ll file specified by "%s").  To see how this works,
+lets look at the rest of the .ll file (after the RUN line):</p>
+
+<div class="doc_code">
+<pre>
+define void @sub1(i32* %p, i32 %v) {
+entry:
+; <b>CHECK: sub1:</b>
+; <b>CHECK: subl</b>
+        %0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)
+        ret void
+}
+
+define void @inc4(i64* %p) {
+entry:
+; <b>CHECK: inc4:</b>
+; <b>CHECK: incq</b>
+        %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)
+        ret void
+}
+</pre>
+</div>
+
+<p>Here you can see some "CHECK:" lines specified in comments.  Now you can see
+how the file is piped into llvm-as, then llc, and the machine code output is
+what we are verifying.  FileCheck checks the machine code output to verify that
+it matches what the "CHECK:" lines specify.</p>
+
+<p>The syntax of the CHECK: lines is very simple: they are fixed strings that
+must occur in order.  FileCheck defaults to ignoring horizontal whitespace
+differences (e.g. a space is allowed to match a tab) but otherwise, the contents
+of the CHECK: line is required to match some thing in the test file exactly.</p>
+
+<p>One nice thing about FileCheck (compared to grep) is that it allows merging
+test cases together into logical groups.  For example, because the test above
+is checking for the "sub1:" and "inc4:" labels, it will not match unless there
+is a "subl" in between those labels.  If it existed somewhere else in the file,
+that would not count: "grep subl" matches if subl exists anywhere in the
+file.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a 
+name="FileCheck-check-prefix">The FileCheck -check-prefix option</a></div>
+
+<div class="doc_text">
+
+<p>The FileCheck -check-prefix option allows multiple test configurations to be
+driven from one .ll file.  This is useful in many circumstances, for example,
+testing different architectural variants with llc.  Here's a simple example:</p>
+
+<div class="doc_code">
+<pre>
+; RUN: llvm-as &lt; %s | llc -mtriple=i686-apple-darwin9 -mattr=sse41 \
+; RUN:              | <b>FileCheck %s -check-prefix=X32</b>
+; RUN: llvm-as &lt; %s | llc -mtriple=x86_64-apple-darwin9 -mattr=sse41 \
+; RUN:              | <b>FileCheck %s -check-prefix=X64</b>
+
+define &lt;4 x i32&gt; @pinsrd_1(i32 %s, &lt;4 x i32&gt; %tmp) nounwind {
+        %tmp1 = insertelement &lt;4 x i32&gt; %tmp, i32 %s, i32 1
+        ret &lt;4 x i32&gt; %tmp1
+; <b>X32:</b> pinsrd_1:
+; <b>X32:</b>    pinsrd $1, 4(%esp), %xmm0
+
+; <b>X64:</b> pinsrd_1:
+; <b>X64:</b>    pinsrd $1, %edi, %xmm0
+}
+</pre>
+</div>
+
+<p>In this case, we're testing that we get the expected code generation with
+both 32-bit and 64-bit code generation.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a 
+name="FileCheck-CHECK-NEXT">The "CHECK-NEXT:" directive</a></div>
+
+<div class="doc_text">
+
+<p>Sometimes you want to match lines and would like to verify that matches
+happen on exactly consecutive lines with no other lines in between them.  In
+this case, you can use CHECK: and CHECK-NEXT: directives to specify this.  If
+you specified a custom check prefix, just use "&lt;PREFIX&gt;-NEXT:".  For
+example, something like this works as you'd expect:</p>
+
+<div class="doc_code">
+<pre>
+define void @t2(&lt;2 x double&gt;* %r, &lt;2 x double&gt;* %A, double %B) {
+	%tmp3 = load &lt;2 x double&gt;* %A, align 16
+	%tmp7 = insertelement &lt;2 x double&gt; undef, double %B, i32 0
+	%tmp9 = shufflevector &lt;2 x double&gt; %tmp3,
+                              &lt;2 x double&gt; %tmp7,
+                              &lt;2 x i32&gt; &lt; i32 0, i32 2 &gt;
+	store &lt;2 x double&gt; %tmp9, &lt;2 x double&gt;* %r, align 16
+	ret void
+        
+; <b>CHECK:</b> t2:
+; <b>CHECK:</b> 	movl	8(%esp), %eax
+; <b>CHECK-NEXT:</b> 	movapd	(%eax), %xmm0
+; <b>CHECK-NEXT:</b> 	movhpd	12(%esp), %xmm0
+; <b>CHECK-NEXT:</b> 	movl	4(%esp), %eax
+; <b>CHECK-NEXT:</b> 	movapd	%xmm0, (%eax)
+; <b>CHECK-NEXT:</b> 	ret
+}
+</pre>
+</div>
+
+<p>CHECK-NEXT: directives reject the input unless there is exactly one newline
+between it an the previous directive.  A CHECK-NEXT cannot be the first
+directive in a file.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a 
+name="FileCheck-CHECK-NOT">The "CHECK-NOT:" directive</a></div>
+
+<div class="doc_text">
+
+<p>The CHECK-NOT: directive is used to verify that a string doesn't occur
+between two matches (or the first match and the beginning of the file).  For
+example, to verify that a load is removed by a transformation, a test like this
+can be used:</p>
+
+<div class="doc_code">
+<pre>
+define i8 @coerce_offset0(i32 %V, i32* %P) {
+  store i32 %V, i32* %P
+   
+  %P2 = bitcast i32* %P to i8*
+  %P3 = getelementptr i8* %P2, i32 2
+
+  %A = load i8* %P3
+  ret i8 %A
+; <b>CHECK:</b> @coerce_offset0
+; <b>CHECK-NOT:</b> load
+; <b>CHECK:</b> ret i8
+}
+</pre>
+</div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a 
+name="FileCheck-Matching">FileCheck Pattern Matching Syntax</a></div>
+
+<div class="doc_text">
+
+<p>The CHECK: and CHECK-NOT: directives both take a pattern to match.  For most
+uses of FileCheck, fixed string matching is perfectly sufficient.  For some
+things, a more flexible form of matching is desired.  To support this, FileCheck
+allows you to specify regular expressions in matching strings, surrounded by
+double braces: <b>{{yourregex}}</b>.  Because we want to use fixed string
+matching for a majority of what we do, FileCheck has been designed to support
+mixing and matching fixed string matching with regular expressions.  This allows
+you to write things like this:</p>
+
+<div class="doc_code">
+<pre>
+; CHECK: movhpd	<b>{{[0-9]+}}</b>(%esp), <b>{{%xmm[0-7]}}</b>
+</pre>
+</div>
+
+<p>In this case, any offset from the ESP register will be allowed, and any xmm
+register will be allowed.</p>
+
+<p>Because regular expressions are enclosed with double braces, they are
+visually distinct, and you don't need to use escape characters within the double
+braces like you would in C.  In the rare case that you want to match double
+braces explicitly from the input, you can use something ugly like
+<b>{{[{][{]}}</b> as your pattern.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection"><a 
+name="FileCheck-Variables">FileCheck Variables</a></div>
+
+<div class="doc_text">
+
+<p>It is often useful to match a pattern and then verify that it occurs again
+later in the file.  For codegen tests, this can be useful to allow any register,
+but verify that that register is used consistently later.  To do this, FileCheck
+allows named variables to be defined and substituted into patterns.  Here is a
+simple example:</p>
+
+<div class="doc_code">
+<pre>
+; CHECK: test5:
+; CHECK:    notw	<b>[[REGISTER:%[a-z]+]]</b>
+; CHECK:    andw	{{.*}}<b>[[REGISTER]]</b>
+</pre>
+</div>
+
+<p>The first check line matches a regex (<tt>%[a-z]+</tt>) and captures it into
+the variables "REGISTER".  The second line verifies that whatever is in REGISTER
+occurs later in the file after an "andw".  FileCheck variable references are
+always contained in <tt>[[ ]]</tt> pairs, are named, and their names can be
+formed with the regex "<tt>[a-zA-Z][a-zA-Z0-9]*</tt>".  If a colon follows the
+name, then it is a definition of the variable, if not, it is a use.</p>
+
+<p>FileCheck variables can be defined multiple times, and uses always get the
+latest value.  Note that variables are all read at the start of a "CHECK" line
+and are all defined at the end.  This means that if you have something like
+"<tt>CHECK: [[XYZ:.*]]x[[XYZ]]</tt>" that the check line will read the previous
+value of the XYZ variable and define a new one after the match is performed.  If
+you need to do something like this you can probably take advantage of the fact
+that FileCheck is not actually line-oriented when it matches, this allows you to
+define two separate CHECK lines that match on the same line.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="rtvars">Variables and
+substitutions</a></div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_text">
+  <p>With a RUN line there are a number of substitutions that are permitted. In
+  general, any Tcl variable that is available in the <tt>substitute</tt> 
+  function (in <tt>test/lib/llvm.exp</tt>) can be substituted into a RUN line.
+  To make a substitution just write the variable's name preceded by a $. 
+  Additionally, for compatibility reasons with previous versions of the test
+  library, certain names can be accessed with an alternate syntax: a % prefix.
+  These alternates are deprecated and may go away in a future version.
+  </p>
+  <p>Here are the available variable names. The alternate syntax is listed in
+  parentheses.</p>
+
+  <dl style="margin-left: 25px">
+    <dt><b>$test</b> (%s)</dt>
+    <dd>The full path to the test case's source. This is suitable for passing
+    on the command line as the input to an llvm tool.</dd>
+
+    <dt><b>$srcdir</b></dt>
+    <dd>The source directory from where the "<tt>make check</tt>" was run.</dd>
+
+    <dt><b>objdir</b></dt>
+    <dd>The object directory that corresponds to the <tt>$srcdir</tt>.</dd>
+
+    <dt><b>subdir</b></dt>
+    <dd>A partial path from the <tt>test</tt> directory that contains the 
+    sub-directory that contains the test source being executed.</dd>
+
+    <dt><b>srcroot</b></dt>
+    <dd>The root directory of the LLVM src tree.</dd>
+
+    <dt><b>objroot</b></dt>
+    <dd>The root directory of the LLVM object tree. This could be the same
+    as the srcroot.</dd>
+
+    <dt><b>path</b><dt>
+    <dd>The path to the directory that contains the test case source.  This is 
+    for locating any supporting files that are not generated by the test, but 
+    used by the test.</dd>
+
+    <dt><b>tmp</b></dt>
+    <dd>The path to a temporary file name that could be used for this test case.
+    The file name won't conflict with other test cases. You can append to it if
+    you need multiple temporaries. This is useful as the destination of some
+    redirected output.</dd>
+
+    <dt><b>llvmlibsdir</b> (%llvmlibsdir)</dt>
+    <dd>The directory where the LLVM libraries are located.</dd>
+
+    <dt><b>target_triplet</b> (%target_triplet)</dt>
+    <dd>The target triplet that corresponds to the current host machine (the one
+    running the test cases). This should probably be called "host".<dd>
+
+    <dt><b>llvmgcc</b> (%llvmgcc)</dt>
+    <dd>The full path to the <tt>llvm-gcc</tt> executable as specified in the
+    configured LLVM environment</dd>
+
+    <dt><b>llvmgxx</b> (%llvmgxx)</dt>
+    <dd>The full path to the <tt>llvm-gxx</tt> executable as specified in the
+    configured LLVM environment</dd>
+
+    <dt><b>gccpath</b></dt>
+    <dd>The full path to the C compiler used to <i>build </i> LLVM. Note that 
+    this might not be gcc.</dd>
+
+    <dt><b>gxxpath</b></dt>
+    <dd>The full path to the C++ compiler used to <i>build </i> LLVM. Note that 
+    this might not be g++.</dd>
+
+    <dt><b>compile_c</b> (%compile_c)</dt>
+    <dd>The full command line used to compile LLVM C source  code. This has all 
+    the configured -I, -D and optimization options.</dd>
+
+    <dt><b>compile_cxx</b> (%compile_cxx)</dt>
+    <dd>The full command used to compile LLVM C++ source  code. This has 
+    all the configured -I, -D and optimization options.</dd>
+
+    <dt><b>link</b> (%link)</dt> 
+    <dd>This full link command used to link LLVM executables. This has all the
+    configured -I, -L and -l options.</dd>
+
+    <dt><b>shlibext</b> (%shlibext)</dt>
+    <dd>The suffix for the host platforms share library (dll) files. This
+    includes the period as the first character.</dd>
+  </dl>
+  <p>To add more variables, two things need to be changed. First, add a line in
+  the <tt>test/Makefile</tt> that creates the <tt>site.exp</tt> file. This will
+  "set" the variable as a global in the site.exp file. Second, in the
+  <tt>test/lib/llvm.exp</tt> file, in the substitute proc, add the variable name
+  to the list of "global" declarations at the beginning of the proc. That's it,
+  the variable can then be used in test scripts.</p>
+</div>
+  
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection"><a name="rtfeatures">Other Features</a></div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_text">
+  <p>To make RUN line writing easier, there are several shell scripts located
+  in the <tt>llvm/test/Scripts</tt> directory. This directory is in the PATH
+  when running tests, so you can just call these scripts using their name. For
+  example:</p>
+  <dl>
+    <dt><b>ignore</b></dt>
+    <dd>This script runs its arguments and then always returns 0. This is useful
+    in cases where the test needs to cause a tool to generate an error (e.g. to
+    check the error output). However, any program in a pipeline that returns a
+    non-zero result will cause the test to fail. This script overcomes that 
+    issue and nicely documents that the test case is purposefully ignoring the
+    result code of the tool</dd>
+
+    <dt><b>not</b></dt>
+    <dd>This script runs its arguments and then inverts the result code from 
+    it. Zero result codes become 1. Non-zero result codes become 0. This is
+    useful to invert the result of a grep. For example "not grep X" means
+    succeed only if you don't find X in the input.</dd>
+  </dl>
+
+  <p>Sometimes it is necessary to mark a test case as "expected fail" or XFAIL.
+  You can easily mark a test as XFAIL just by including <tt>XFAIL: </tt> on a
+  line near the top of the file. This signals that the test case should succeed
+  if the test fails. Such test cases are counted separately by the testing tool. To
+  specify an expected fail, use the XFAIL keyword in the comments of the test
+  program followed by a colon and one or more regular expressions (separated by
+  a comma). The regular expressions allow you to XFAIL the test conditionally by
+  host platform. The regular expressions following the : are matched against the
+  target triplet for the host machine. If there is a match, the test is expected
+  to fail. If not, the test is expected to succeed. To XFAIL everywhere just
+  specify <tt>XFAIL: *</tt>. Here is an example of an <tt>XFAIL</tt> line:</p>
+
+<div class="doc_code">
+<pre>
+; XFAIL: darwin,sun
+</pre>
+</div>
+
+  <p>To make the output more useful, the <tt>llvm_runtest</tt> function wil
+  scan the lines of the test case for ones that contain a pattern that matches
+  PR[0-9]+. This is the syntax for specifying a PR (Problem Report) number that
+  is related to the test case. The number after "PR" specifies the LLVM bugzilla
+  number. When a PR number is specified, it will be used in the pass/fail
+  reporting. This is useful to quickly get some context when a test fails.</p>
+
+  <p>Finally, any line that contains "END." will cause the special
+  interpretation of lines to terminate. This is generally done right after the
+  last RUN: line. This has two side effects: (a) it prevents special
+  interpretation of lines that are part of the test program, not the
+  instructions to the test case, and (b) it speeds things up for really big test
+  cases by avoiding interpretation of the remainder of the file.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="testsuitestructure">Test suite
+Structure</a></div>
+<!--=========================================================================-->
+
+<div class="doc_text">
+
+<p>The <tt>test-suite</tt> module contains a number of programs that can be compiled 
+with LLVM and executed. These programs are compiled using the native compiler
+and various LLVM backends. The output from the program compiled with the 
+native compiler is assumed correct; the results from the other programs are
+compared to the native program output and pass if they match.</p>
+
+<p>When executing tests, it is usually a good idea to start out with a subset of
+the available tests or programs. This makes test run times smaller at first and
+later on this is useful to investigate individual test failures. To run some
+test only on a subset of programs, simply change directory to the programs you
+want tested and run <tt>gmake</tt> there. Alternatively, you can run a different
+test using the <tt>TEST</tt> variable to change what tests or run on the
+selected programs (see below for more info).</p>
+
+<p>In addition for testing correctness, the <tt>test-suite</tt> directory also
+performs timing tests of various LLVM optimizations.  It also records
+compilation times for the compilers and the JIT.  This information can be
+used to compare the effectiveness of LLVM's optimizations and code
+generation.</p>
+
+<p><tt>test-suite</tt> tests are divided into three types of tests: MultiSource,
+SingleSource, and External.</p> 
+
+<ul>
+<li><tt>test-suite/SingleSource</tt>
+<p>The SingleSource directory contains test programs that are only a single 
+source file in size.  These are usually small benchmark programs or small 
+programs that calculate a particular value.  Several such programs are grouped 
+together in each directory.</p></li>
+
+<li><tt>test-suite/MultiSource</tt>
+<p>The MultiSource directory contains subdirectories which contain entire 
+programs with multiple source files.  Large benchmarks and whole applications 
+go here.</p></li>
+
+<li><tt>test-suite/External</tt>
+<p>The External directory contains Makefiles for building code that is external
+to (i.e., not distributed with) LLVM.  The most prominent members of this
+directory are the SPEC 95 and SPEC 2000 benchmark suites. The <tt>External</tt>
+directory does not contain these actual tests, but only the Makefiles that know
+how to properly compile these programs from somewhere else. The presence and
+location of these external programs is configured by the test-suite
+<tt>configure</tt> script.</p></li>
+</ul>
+
+<p>Each tree is then subdivided into several categories, including applications,
+benchmarks, regression tests, code that is strange grammatically, etc.  These
+organizations should be relatively self explanatory.</p>
+
+<p>Some tests are known to fail.  Some are bugs that we have not fixed yet;
+others are features that we haven't added yet (or may never add).  In the
+regression tests, the result for such tests will be XFAIL (eXpected FAILure).
+In this way, you can tell the difference between an expected and unexpected
+failure.</p>
+
+<p>The tests in the test suite have no such feature at this time. If the
+test passes, only warnings and other miscellaneous output will be generated.  If
+a test fails, a large &lt;program&gt; FAILED message will be displayed.  This
+will help you separate benign warnings from actual test failures.</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_section"><a name="testsuiterun">Running the test suite</a></div>
+<!--=========================================================================-->
+
+<div class="doc_text">
+
+<p>First, all tests are executed within the LLVM object directory tree.  They
+<i>are not</i> executed inside of the LLVM source tree. This is because the
+test suite creates temporary files during execution.</p>
+
+<p>To run the test suite, you need to use the following steps:</p>
+
+<ol>
+  <li><tt>cd</tt> into the <tt>llvm/projects</tt> directory in your source tree.
+  </li>
+
+  <li><p>Check out the <tt>test-suite</tt> module with:</p>
+
+<div class="doc_code">
+<pre>
+% svn co http://llvm.org/svn/llvm-project/test-suite/trunk test-suite
+</pre>
+</div>
+    <p>This will get the test suite into <tt>llvm/projects/test-suite</tt>.</p>
+  </li>
+  <li><p>Configure and build <tt>llvm</tt>.</p></li>
+  <li><p>Configure and build <tt>llvm-gcc</tt>.</p></li>
+  <li><p>Install <tt>llvm-gcc</tt> somewhere.</p></li>
+  <li><p><em>Re-configure</em> <tt>llvm</tt> from the top level of
+      each build tree (LLVM object directory tree) in which you want
+      to run the test suite, just as you do before building LLVM.</p>
+    <p>During the <em>re-configuration</em>, you must either: (1)
+      have <tt>llvm-gcc</tt> you just built in your path, or (2)
+      specify the directory where your just-built <tt>llvm-gcc</tt> is
+      installed using <tt>--with-llvmgccdir=$LLVM_GCC_DIR</tt>.</p>
+    <p>You must also tell the configure machinery that the test suite
+      is available so it can be configured for your build tree:</p>
+<div class="doc_code">
+<pre>
+% cd $LLVM_OBJ_ROOT ; $LLVM_SRC_ROOT/configure [--with-llvmgccdir=$LLVM_GCC_DIR]
+</pre>
+</div>
+    <p>[Remember that <tt>$LLVM_GCC_DIR</tt> is the directory where you
+    <em>installed</em> llvm-gcc, not its src or obj directory.]</p>
+  </li>
+
+  <li><p>You can now run the test suite from your build tree as follows:</p>
+<div class="doc_code">
+<pre>
+% cd $LLVM_OBJ_ROOT/projects/test-suite
+% make
+</pre>
+</div>
+  </li>
+</ol>
+<p>Note that the second and third steps only need to be done once. After you
+have the suite checked out and configured, you don't need to do it again (unless
+the test code or configure script changes).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection">
+<a name="testsuiteexternal">Configuring External Tests</a></div>
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_text">
+<p>In order to run the External tests in the <tt>test-suite</tt>
+  module, you must specify <i>--with-externals</i>.  This
+  must be done during the <em>re-configuration</em> step (see above),
+  and the <tt>llvm</tt> re-configuration must recognize the
+  previously-built <tt>llvm-gcc</tt>.  If any of these is missing or
+  neglected, the External tests won't work.</p>
+<dl>
+<dt><i>--with-externals</i></dt>
+<dt><i>--with-externals=&lt;<tt>directory</tt>&gt;</i></dt>
+</dl>
+  This tells LLVM where to find any external tests.  They are expected to be
+  in specifically named subdirectories of &lt;<tt>directory</tt>&gt;.
+  If <tt>directory</tt> is left unspecified,
+  <tt>configure</tt> uses the default value
+  <tt>/home/vadve/shared/benchmarks/speccpu2000/benchspec</tt>.
+  Subdirectory names known to LLVM include:
+  <dl>
+  <dt>spec95</dt>
+  <dt>speccpu2000</dt>
+  <dt>speccpu2006</dt>
+  <dt>povray31</dt>
+  </dl>
+  Others are added from time to time, and can be determined from 
+  <tt>configure</tt>.
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection">
+<a name="testsuitetests">Running different tests</a></div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_text">
+<p>In addition to the regular "whole program" tests, the <tt>test-suite</tt>
+module also provides a mechanism for compiling the programs in different ways.
+If the variable TEST is defined on the <tt>gmake</tt> command line, the test system will
+include a Makefile named <tt>TEST.&lt;value of TEST variable&gt;.Makefile</tt>.
+This Makefile can modify build rules to yield different results.</p>
+
+<p>For example, the LLVM nightly tester uses <tt>TEST.nightly.Makefile</tt> to
+create the nightly test reports.  To run the nightly tests, run <tt>gmake
+TEST=nightly</tt>.</p>
+
+<p>There are several TEST Makefiles available in the tree.  Some of them are
+designed for internal LLVM research and will not work outside of the LLVM
+research group.  They may still be valuable, however, as a guide to writing your
+own TEST Makefile for any optimization or analysis passes that you develop with
+LLVM.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection">
+<a name="testsuiteoutput">Generating test output</a></div>
+<!-- _______________________________________________________________________ -->
+<div class="doc_text">
+  <p>There are a number of ways to run the tests and generate output. The most
+  simple one is simply running <tt>gmake</tt> with no arguments. This will
+  compile and run all programs in the tree using a number of different methods
+  and compare results. Any failures are reported in the output, but are likely
+  drowned in the other output. Passes are not reported explicitely.</p>
+
+  <p>Somewhat better is running <tt>gmake TEST=sometest test</tt>, which runs
+  the specified test and usually adds per-program summaries to the output
+  (depending on which sometest you use). For example, the <tt>nightly</tt> test
+  explicitely outputs TEST-PASS or TEST-FAIL for every test after each program.
+  Though these lines are still drowned in the output, it's easy to grep the
+  output logs in the Output directories.</p>
+
+  <p>Even better are the <tt>report</tt> and <tt>report.format</tt> targets
+  (where <tt>format</tt> is one of <tt>html</tt>, <tt>csv</tt>, <tt>text</tt> or
+  <tt>graphs</tt>). The exact contents of the report are dependent on which
+  <tt>TEST</tt> you are running, but the text results are always shown at the
+  end of the run and the results are always stored in the
+  <tt>report.&lt;type&gt;.format</tt> file (when running with
+  <tt>TEST=&lt;type&gt;</tt>).
+
+  The <tt>report</tt> also generate a file called
+  <tt>report.&lt;type&gt;.raw.out</tt> containing the output of the entire test
+  run.
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection">
+<a name="testsuitecustom">Writing custom tests for the test suite</a></div>
+<!-- _______________________________________________________________________ -->
+
+<div class="doc_text">
+
+<p>Assuming you can run the test suite, (e.g. "<tt>gmake TEST=nightly report</tt>"
+should work), it is really easy to run optimizations or code generator
+components against every program in the tree, collecting statistics or running
+custom checks for correctness.  At base, this is how the nightly tester works,
+it's just one example of a general framework.</p>
+
+<p>Lets say that you have an LLVM optimization pass, and you want to see how
+many times it triggers.  First thing you should do is add an LLVM
+<a href="ProgrammersManual.html#Statistic">statistic</a> to your pass, which
+will tally counts of things you care about.</p>
+
+<p>Following this, you can set up a test and a report that collects these and
+formats them for easy viewing.  This consists of two files, a
+"<tt>test-suite/TEST.XXX.Makefile</tt>" fragment (where XXX is the name of your
+test) and a "<tt>test-suite/TEST.XXX.report</tt>" file that indicates how to
+format the output into a table.  There are many example reports of various
+levels of sophistication included with the test suite, and the framework is very
+general.</p>
+
+<p>If you are interested in testing an optimization pass, check out the
+"libcalls" test as an example.  It can be run like this:<p>
+
+<div class="doc_code">
+<pre>
+% cd llvm/projects/test-suite/MultiSource/Benchmarks  # or some other level
+% make TEST=libcalls report
+</pre>
+</div>
+
+<p>This will do a bunch of stuff, then eventually print a table like this:</p>
+
+<div class="doc_code">
+<pre>
+Name                                  | total | #exit |
+...
+FreeBench/analyzer/analyzer           | 51    | 6     | 
+FreeBench/fourinarow/fourinarow       | 1     | 1     | 
+FreeBench/neural/neural               | 19    | 9     | 
+FreeBench/pifft/pifft                 | 5     | 3     | 
+MallocBench/cfrac/cfrac               | 1     | *     | 
+MallocBench/espresso/espresso         | 52    | 12    | 
+MallocBench/gs/gs                     | 4     | *     | 
+Prolangs-C/TimberWolfMC/timberwolfmc  | 302   | *     | 
+Prolangs-C/agrep/agrep                | 33    | 12    | 
+Prolangs-C/allroots/allroots          | *     | *     | 
+Prolangs-C/assembler/assembler        | 47    | *     | 
+Prolangs-C/bison/mybison              | 74    | *     | 
+...
+</pre>
+</div>
+
+<p>This basically is grepping the -stats output and displaying it in a table.
+You can also use the "TEST=libcalls report.html" target to get the table in HTML
+form, similarly for report.csv and report.tex.</p>
+
+<p>The source for this is in test-suite/TEST.libcalls.*.  The format is pretty
+simple: the Makefile indicates how to run the test (in this case, 
+"<tt>opt -simplify-libcalls -stats</tt>"), and the report contains one line for
+each column of the output.  The first value is the header for the column and the
+second is the regex to grep the output of the command for.  There are lots of
+example reports that can do fancy stuff.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner<br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/UsingLibraries.html b/final/docs/UsingLibraries.html
new file mode 100644
index 00000000000..e06838b4cdb
--- /dev/null
+++ b/final/docs/UsingLibraries.html
@@ -0,0 +1,443 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+	<title>Using The LLVM Libraries</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+<div class="doc_title">Using The LLVM Libraries</div>
+<ol>
+  <li><a href="#abstract">Abstract</a></li>
+  <li><a href="#introduction">Introduction</a></li>
+  <li><a href="#descriptions">Library Descriptions</a></li>
+  <li><a href="#dependencies">Library Dependencies</a></li>
+  <li><a href="#rot">Linkage Rules Of Thumb</a>
+	  <ol>
+      <li><a href="#always">Always link LLVMCore, LLVMSupport, LLVMSystem</a>
+			<li><a href="#onlyone">Never link both archive and re-linked</a>
+		</ol>
+	</li>
+</ol>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:rspencer@x10sys.com">Reid Spencer</a></p>
+</div>
+
+<p class="doc_warning">Warning: This document is out of date, for more
+  information please
+  see <a href="CommandGuide/html/llvm-config.html">llvm-config</a> or,
+  if you use CMake, <a href=CMake.html#embedding>the CMake LLVM
+  guide</a>.</p>
+
+<!-- ======================================================================= -->
+<div class="doc_section"><a name="abstract">Abstract</a></div>
+<div class="doc_text">
+  <p>Amongst other things, LLVM is a toolkit for building compilers, linkers,
+  runtime executives, virtual machines, and other program execution related
+  tools. In addition to the LLVM tool set, the functionality of LLVM is
+  available through a set of libraries.  To use LLVM as a toolkit for
+  constructing tools, a developer needs to understand what is contained in the
+  various libraries, what they depend on, and how to use them.  Fortunately,
+  there is a tool, <tt>llvm-config</tt> to aid with this. This document 
+  describes the contents of the libraries and how to use <tt>llvm-config</tt>
+  to generate command line options.
+</p>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section"> <a name="introduction">Introduction</a></div>
+<div class="doc_text">
+  <p>If you're writing a compiler, virtual machine, or any other utility based 
+  on LLVM, you'll need to figure out which of the many libraries files you will 
+  need to link with to be successful. An understanding of the contents of these 
+  libraries will be useful in coming up with an optimal specification for the 
+  libraries to link with. The purpose of this document is to reduce some of 
+  the trial and error that the author experienced in using LLVM.</p>
+  <p>LLVM produces two types of libraries: archives (ending in <tt>.a</tt>) and
+  objects (ending in <tt>.o</tt>). However, both are libraries. Libraries ending
+  in <tt>.o</tt> are known as re-linked libraries because they contain all the
+  compilation units of the library linked together as a single <tt>.o</tt> file.
+  Furthermore, several of the libraries have <em>both</em> forms of library. The
+  re-linked libraries are used whenever you want to include all symbols from the
+  library. The archive libraries are used whenever you want to only resolve
+  outstanding symbols at that point in the link without including everything in
+  the library. </p>
+  <p>If you're using the LLVM Makefile system to link your tools,you will use 
+  the <tt>LLVMLIBS</tt> make variable. 
+  (see the <a href="MakefileGuide.html#LLVMLIBS">Makefile Guide</a> for 
+  details). This variable specifies which LLVM libraries to link into your tool 
+  and the order in which they will be linked. You specify re-linked libraries by
+  naming the library without a suffix. You specify archive libraries by naming
+  the library with a <tt>.a</tt> suffix but without the <tt>lib</tt> prefix. The
+  order in which the libraries appear in the <tt>LLVMLIBS</tt> variable
+  definition is the order in which they will be linked. Getting this order
+  correct for your tool can sometimes be challenging.
+</div>
+<!-- ======================================================================= -->
+<div class="doc_section"><a name="descriptions"></a>Library Descriptions</div>
+<div class="doc_text">
+  <p>The table below categorizes each library
+<table style="text-align:left">
+  <tr><th>Library</th><th>Forms</th><th>Description</th></tr>
+  <tr><th colspan="3">Core Libraries</th></tr>
+  <tr><td>LLVMArchive</td><td><tt>.a</tt></td>
+    <td>LLVM archive reading and writing</td></tr>
+  <tr><td>LLVMAsmParser</td><td><tt>.a</tt></td>
+    <td>LLVM assembly parsing</td></tr>
+  <tr><td>LLVMBCReader</td><td><tt>.a</tt></td>
+    <td>LLVM bitcode reading</td></tr>
+  <tr><td>LLVMBCWriter</td><td><tt>.a</tt></td>
+    <td>LLVM bitcode writing</td></tr>
+  <tr><td>LLVMCore</td><td><tt>.a</tt></td>
+    <td>LLVM core intermediate representation</td></tr>
+  <tr><td>LLVMDebugger</td><td><tt>.a</tt></td>
+    <td>Source level debugging support</td></tr>
+  <tr><td>LLVMLinker</td><td><tt>.a</tt></td>
+    <td>Bitcode and archive linking interface</td></tr>
+  <tr><td>LLVMSupport</td><td><tt>.a</tt></td>
+    <td>General support utilities</td></tr>
+  <tr><td>LLVMSystem</td><td><tt>.a</tt></td>
+    <td>Operating system abstraction layer</td></tr>
+  <tr><td>LLVMbzip2</td><td><tt>.a</tt></td>
+    <td>BZip2 compression library</td></tr>
+
+  <tr><th colspan="3">Analysis Libraries</th></tr>
+  <tr><td>LLVMAnalysis</td><td><tt>.a</tt></td>
+    <td>Various analysis passes.</td></tr>
+  <tr><td>LLVMDataStructure</td><td><tt>.o</tt></td>
+    <td>Data structure analysis passes.</td></tr>
+  <tr><td>LLVMipa</td><td><tt>.a</tt></td>
+    <td>Inter-procedural analysis passes.</td></tr>
+
+  <tr><th colspan="3">Transformation Libraries</th></tr>
+  <tr><td>LLVMInstrumentation</td><td><tt>.a</tt></td>
+    <td>Instrumentation passes.</td></tr>
+  <tr><td>LLVMipo</td><td><tt>.a</tt></td>
+    <td>All inter-procedural optimization passes.</td></tr>
+  <tr><td>LLVMScalarOpts</td><td><tt>.a</tt></td>
+    <td>All scalar optimization passes.</td></tr>
+  <tr><td>LLVMTransformUtils</td><td><tt>.a</tt></td>
+    <td>Transformation utilities used by many passes.</td></tr>
+
+  <tr><th colspan="3">Code Generation Libraries </th></tr>
+  <tr><td>LLVMCodeGen</td><td><tt>.o</tt></td>
+    <td>Native code generation infrastructure</td></tr>
+  <tr><td>LLVMSelectionDAG</td><td><tt>.o</tt></td>
+    <td>Aggressive instruction selector for directed acyclic graphs</td></tr>
+
+  <tr><th colspan="3">Target Libraries</th></tr>
+  <tr><td>LLVMAlpha</td><td><tt>.o</tt></td>
+    <td>Code generation for Alpha architecture</td></tr>
+  <tr><td>LLVMARM</td><td><tt>.o</tt></td>
+    <td>Code generation for ARM architecture</td></tr>
+  <tr><td>LLVMCBackend</td><td><tt>.o</tt></td>
+    <td>'C' language code generator.</td></tr>
+  <tr><td>LLVMPowerPC</td><td><tt>.o</tt></td>
+    <td>Code generation for PowerPC architecture</td></tr>
+  <tr><td>LLVMSparc</td><td><tt>.o</tt></td>
+    <td>Code generation for Sparc architecture</td></tr>
+  <tr><td>LLVMTarget</td><td><tt>.a</tt></td>
+    <td>Generic code generation utilities.</td></tr>
+  <tr><td>LLVMX86</td><td><tt>.o</tt></td>
+    <td>Code generation for Intel x86 architecture</td></tr>
+
+  <tr><th colspan="3">Runtime Libraries</th></tr>
+  <tr><td>LLVMInterpreter</td><td><tt>.o</tt></td>
+    <td>Bitcode Interpreter</td></tr>
+  <tr><td>LLVMJIT</td><td><tt>.o</tt></td>
+    <td>Bitcode JIT Compiler</td></tr>
+  <tr><td>LLVMExecutionEngine</td><td><tt>.o</tt></td>
+    <td>Virtual machine engine</td></tr>
+</table>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section"><a name="dependencies"></a>Using llvm-config</div>
+<div class="doc_text">
+  <p>The <tt>llvm-config</tt> tool is a perl script that produces on its output
+  various kinds of information. For example, the source or object directories 
+  used to build LLVM can be accessed by passing options to <tt>llvm-config</tt>.
+  For complete details on this tool, please see the
+  <a href="CommandGuide/html/llvm-config.html">manual page</a>.</p>
+  <p>To understand the relationships between libraries, the <tt>llvm-config</tt>
+  can be very useful. If all you know is that you want certain libraries to
+  be available, you can generate the complete set of libraries to link with
+  using one of four options, as below:</p>
+  <ol>
+    <li><tt>--ldflags</tt>. This generates the command line options necessary to
+    be passed to the <tt>ld</tt> tool in order to link with LLVM. Most notably,
+    the <tt>-L</tt> option is provided to specify a library search directory 
+    that contains the LLVM libraries.</li>
+    <li><tt>--libs</tt>. This generates command line options suitable for
+    use with a gcc-style linker. That is, libraries are given with a -l option
+    and object files are given with a full path.</li>
+    <li><tt>--libnames</tt>. This generates a list of just the library file
+    names. If you know the directory in which these files reside (see --ldflags)
+    then you can find the libraries there.</li>
+    <li><tt>--libfiles</tt>. This generates the full path names of the
+    LLVM library files.</li>
+  </ol>
+  <p>If you wish to delve further into how <tt>llvm-config</tt> generates the
+  correct order (based on library dependencies), please see the tool named
+  <tt>GenLibDeps.pl</tt> in the <tt>utils</tt> source directory of LLVM.</p>
+
+  <!-- =======NOTE: =========================================================-->
+  <!-- === The following graphs and <dl> list are generated automatically ===-->
+  <!-- === by the util named GenLibDeps.pl in the llvm/utils directory.   ===-->
+  <!-- === This should be updated whenever new libraries are added,       ===-->
+  <!-- === removed, or changed                                            ===-->
+  <!-- =======NOTE: =========================================================-->
+  <h2>Dependency Relationships Of Libraries</h2>
+  <p>This graph shows the dependency of archive libraries on other archive 
+  libraries or objects. Where a library has both archive and object forms, only
+  the archive form is shown.</p>
+  <img src="img/libdeps.gif" alt="Library Dependencies"/>
+  <h2>Dependency Relationships Of Object Files</h2>
+  <p>This graph shows the dependency of object files on archive libraries or 
+  other objects. Where a library has both object and archive forms, only the 
+  dependency to the archive form is shown.</p> 
+  <img src="img/objdeps.gif" alt="Object File Dependencies"/>
+  <p>The following list shows the dependency relationships between libraries in
+  textual form. The information is the same as shown on the graphs but arranged
+  alphabetically.</p>
+<dl>
+  <dt><b>libLLVMAnalysis.a</b></dt><dd><ul>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMTarget.a</li>
+  </ul></dd>
+  <dt><b>libLLVMArchive.a</b></dt><dd><ul>
+    <li>libLLVMBCReader.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+  </ul></dd>
+  <dt><b>libLLVMAsmParser.a</b></dt><dd><ul>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSystem.a</li>
+  </ul></dd>
+  <dt><b>libLLVMBCReader.a</b></dt><dd><ul>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+  </ul></dd>
+  <dt><b>libLLVMBCWriter.a</b></dt><dd><ul>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+  </ul></dd>
+  <dt><b>libLLVMCodeGen.a</b></dt><dd><ul>
+    <li>libLLVMAnalysis.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMScalarOpts.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMTarget.a</li>
+    <li>libLLVMTransformUtils.a</li>
+  </ul></dd>
+  <dt><b>libLLVMCore.a</b></dt><dd><ul>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+  </ul></dd>
+  <dt><b>libLLVMDebugger.a</b></dt><dd><ul>
+    <li>libLLVMBCReader.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+  </ul></dd>
+  <dt><b>libLLVMInstrumentation.a</b></dt><dd><ul>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMScalarOpts.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMTransformUtils.a</li>
+  </ul></dd>
+  <dt><b>libLLVMLinker.a</b></dt><dd><ul>
+    <li>libLLVMArchive.a</li>
+    <li>libLLVMBCReader.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+  </ul></dd>
+  <dt><b>libLLVMScalarOpts.a</b></dt><dd><ul>
+    <li>libLLVMAnalysis.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMTarget.a</li>
+    <li>libLLVMTransformUtils.a</li>
+  </ul></dd>
+  <dt><b>libLLVMSelectionDAG.a</b></dt><dd><ul>
+    <li>libLLVMAnalysis.a</li>
+    <li>libLLVMCodeGen.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMTarget.a</li>
+    <li>libLLVMTransformUtils.a</li>
+  </ul></dd>
+  <dt><b>libLLVMSupport.a</b></dt><dd><ul>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMbzip2.a</li>
+  </ul></dd>
+  <dt><b>libLLVMSystem.a</b></dt><dd><ul>
+  </ul></dd>
+  <dt><b>libLLVMTarget.a</b></dt><dd><ul>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+  </ul></dd>
+  <dt><b>libLLVMTransformUtils.a</b></dt><dd><ul>
+    <li>libLLVMAnalysis.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMTarget.a</li>
+    <li>libLLVMipa.a</li>
+  </ul></dd>
+  <dt><b>libLLVMbzip2.a</b></dt><dd><ul>
+  </ul></dd>
+  <dt><b>libLLVMipa.a</b></dt><dd><ul>
+    <li>libLLVMAnalysis.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+  </ul></dd>
+  <dt><b>libLLVMipo.a</b></dt><dd><ul>
+    <li>libLLVMAnalysis.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMTarget.a</li>
+    <li>libLLVMTransformUtils.a</li>
+    <li>libLLVMipa.a</li>
+  </ul></dd>
+  <dt><b>libLLVMlto.a</b></dt><dd><ul>
+    <li>libLLVMAnalysis.a</li>
+    <li>libLLVMBCReader.a</li>
+    <li>libLLVMBCWriter.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMLinker.a</li>
+    <li>libLLVMScalarOpts.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMTarget.a</li>
+    <li>libLLVMipa.a</li>
+    <li>libLLVMipo.a</li>
+  </ul></dd>
+  <dt><b>LLVMARM.o</b></dt><dd><ul>
+    <li>libLLVMCodeGen.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSelectionDAG.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMTarget.a</li>
+  </ul></dd>
+  <dt><b>LLVMAlpha.o</b></dt><dd><ul>
+    <li>libLLVMCodeGen.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSelectionDAG.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMTarget.a</li>
+  </ul></dd>
+  <dt><b>LLVMCBackend.o</b></dt><dd><ul>
+    <li>libLLVMAnalysis.a</li>
+    <li>libLLVMCodeGen.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMScalarOpts.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMTarget.a</li>
+    <li>libLLVMTransformUtils.a</li>
+    <li>libLLVMipa.a</li>
+  </ul></dd>
+  <dt><b>LLVMExecutionEngine.o</b></dt><dd><ul>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMTarget.a</li>
+  </ul></dd>
+  <dt><b>LLVMInterpreter.o</b></dt><dd><ul>
+    <li>LLVMExecutionEngine.o</li>
+    <li>libLLVMCodeGen.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMTarget.a</li>
+  </ul></dd>
+  <dt><b>LLVMJIT.o</b></dt><dd><ul>
+    <li>LLVMExecutionEngine.o</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMTarget.a</li>
+  </ul></dd>
+  <dt><b>LLVMPowerPC.o</b></dt><dd><ul>
+    <li>libLLVMCodeGen.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSelectionDAG.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMTarget.a</li>
+  </ul></dd>
+  <dt><b>LLVMSparc.o</b></dt><dd><ul>
+    <li>libLLVMCodeGen.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSelectionDAG.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMTarget.a</li>
+  </ul></dd>
+  <dt><b>LLVMX86.o</b></dt><dd><ul>
+    <li>libLLVMCodeGen.a</li>
+    <li>libLLVMCore.a</li>
+    <li>libLLVMSelectionDAG.a</li>
+    <li>libLLVMSupport.a</li>
+    <li>libLLVMSystem.a</li>
+    <li>libLLVMTarget.a</li>
+  </ul></dd>
+</dl>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_section"><a name="rot">Linkage Rules Of Thumb</a></div>
+<div class="doc_text">
+	<p>This section contains various "rules of thumb" about what files you
+	should link into your programs.</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="always">Always Link LLVMCore, LLVMSupport,
+    and LLVMSystem</a></div>
+<div class="doc_text">
+  <p>No matter what you do with LLVM, the last three entries in the value of 
+  your LLVMLIBS make variable should always be: 
+  <tt>LLVMCore LLVMSupport.a LLVMSystem.a</tt>. There are no <tt>LLVM</tt> 
+  programs that don't depend on these three.</p>
+</div>
+<!-- ======================================================================= -->
+<div class="doc_subsection"><a name="onlyone">Never link both archive and
+    re-linked library</a></div>
+<div class="doc_text">
+  <p>There is never any point to linking both the re-linked (<tt>.o</tt>) and
+  the archive (<tt>.a</tt>) versions of a library. Since the re-linked version
+  includes the entire library, the archive version will not resolve any symbols.
+  You could even end up with link error if you place the archive version before
+  the re-linked version on the linker's command line.</p>
+</div>
+<!-- ======================================================================= -->
+<hr>
+<div class="doc_footer">
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+    src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"/></a>
+  <a href="http://validator.w3.org/check/referer"><img
+    src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+  <a href="mailto:rspencer@x10sys.com">Reid Spencer</a>
+</address>
+<a href="http://llvm.org">The LLVM Compiler Infrastructure</a> 
+<br>Last modified: $Date$ </div>
+</body>
+</html>
+<!-- vim: sw=2 ts=2 ai
+-->
diff --git a/final/docs/WritingAnLLVMBackend.html b/final/docs/WritingAnLLVMBackend.html
new file mode 100644
index 00000000000..729023240de
--- /dev/null
+++ b/final/docs/WritingAnLLVMBackend.html
@@ -0,0 +1,2556 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>Writing an LLVM Compiler Backend</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">
+  Writing an LLVM Compiler Backend
+</div>
+
+<ol>
+  <li><a href="#intro">Introduction</a>
+  <ul>
+    <li><a href="#Audience">Audience</a></li>
+    <li><a href="#Prerequisite">Prerequisite Reading</a></li>
+    <li><a href="#Basic">Basic Steps</a></li>
+    <li><a href="#Preliminaries">Preliminaries</a></li>
+  </ul>
+  <li><a href="#TargetMachine">Target Machine</a></li>
+  <li><a href="#TargetRegistration">Target Registration</a></li>
+  <li><a href="#RegisterSet">Register Set and Register Classes</a>
+  <ul>
+    <li><a href="#RegisterDef">Defining a Register</a></li>
+    <li><a href="#RegisterClassDef">Defining a Register Class</a></li>
+    <li><a href="#implementRegister">Implement a subclass of TargetRegisterInfo</a></li>
+  </ul></li>
+  <li><a href="#InstructionSet">Instruction Set</a>
+  <ul>  
+    <li><a href="#operandMapping">Instruction Operand Mapping</a></li>
+    <li><a href="#implementInstr">Implement a subclass of TargetInstrInfo</a></li>
+    <li><a href="#branchFolding">Branch Folding and If Conversion</a></li>
+  </ul></li>
+  <li><a href="#InstructionSelector">Instruction Selector</a>
+  <ul>
+    <li><a href="#LegalizePhase">The SelectionDAG Legalize Phase</a>
+    <ul>
+      <li><a href="#promote">Promote</a></li> 
+      <li><a href="#expand">Expand</a></li> 
+      <li><a href="#custom">Custom</a></li> 
+      <li><a href="#legal">Legal</a></li>       
+    </ul></li>
+    <li><a href="#callingConventions">Calling Conventions</a></li>     
+  </ul></li>
+  <li><a href="#assemblyPrinter">Assembly Printer</a></li> 
+  <li><a href="#subtargetSupport">Subtarget Support</a></li> 
+  <li><a href="#jitSupport">JIT Support</a>
+  <ul>  
+    <li><a href="#mce">Machine Code Emitter</a></li>   
+    <li><a href="#targetJITInfo">Target JIT Info</a></li>   
+  </ul></li>
+</ol>
+
+<div class="doc_author">    
+  <p>Written by <a href="http://www.woo.com">Mason Woo</a> and
+                <a href="http://misha.brukman.net">Misha Brukman</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="intro">Introduction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+This document describes techniques for writing compiler backends that convert
+the LLVM Intermediate Representation (IR) to code for a specified machine or
+other languages. Code intended for a specific machine can take the form of
+either assembly code or binary code (usable for a JIT compiler).
+</p>
+
+<p>
+The backend of LLVM features a target-independent code generator that may create
+output for several types of target CPUs &mdash; including X86, PowerPC, Alpha,
+and SPARC. The backend may also be used to generate code targeted at SPUs of the
+Cell processor or GPUs to support the execution of compute kernels.
+</p>
+
+<p>
+The document focuses on existing examples found in subdirectories
+of <tt>llvm/lib/Target</tt> in a downloaded LLVM release. In particular, this
+document focuses on the example of creating a static compiler (one that emits
+text assembly) for a SPARC target, because SPARC has fairly standard
+characteristics, such as a RISC instruction set and straightforward calling
+conventions.
+</p>
+
+</div>
+
+<div class="doc_subsection">
+  <a name="Audience">Audience</a>
+</div>  
+
+<div class="doc_text">
+
+<p>
+The audience for this document is anyone who needs to write an LLVM backend to
+generate code for a specific hardware or software target.
+</p>
+
+</div>
+
+<div class="doc_subsection">
+  <a name="Prerequisite">Prerequisite Reading</a>
+</div>  
+
+<div class="doc_text">  
+
+<p>
+These essential documents must be read before reading this document:
+</p>
+
+<ul>
+<li><i><a href="http://www.llvm.org/docs/LangRef.html">LLVM Language Reference
+    Manual</a></i> &mdash; a reference manual for the LLVM assembly language.</li>
+
+<li><i><a href="http://www.llvm.org/docs/CodeGenerator.html">The LLVM
+    Target-Independent Code Generator</a></i> &mdash; a guide to the components
+    (classes and code generation algorithms) for translating the LLVM internal
+    representation into machine code for a specified target.  Pay particular
+    attention to the descriptions of code generation stages: Instruction
+    Selection, Scheduling and Formation, SSA-based Optimization, Register
+    Allocation, Prolog/Epilog Code Insertion, Late Machine Code Optimizations,
+    and Code Emission.</li>
+
+<li><i><a href="http://www.llvm.org/docs/TableGenFundamentals.html">TableGen
+    Fundamentals</a></i> &mdash;a document that describes the TableGen
+    (<tt>tblgen</tt>) application that manages domain-specific information to
+    support LLVM code generation. TableGen processes input from a target
+    description file (<tt>.td</tt> suffix) and generates C++ code that can be
+    used for code generation.</li>
+
+<li><i><a href="http://www.llvm.org/docs/WritingAnLLVMPass.html">Writing an LLVM
+    Pass</a></i> &mdash; The assembly printer is a <tt>FunctionPass</tt>, as are
+    several SelectionDAG processing steps.</li>
+</ul>
+
+<p>
+To follow the SPARC examples in this document, have a copy of
+<i><a href="http://www.sparc.org/standards/V8.pdf">The SPARC Architecture
+Manual, Version 8</a></i> for reference. For details about the ARM instruction
+set, refer to the <i><a href="http://infocenter.arm.com/">ARM Architecture
+Reference Manual</a></i>. For more about the GNU Assembler format
+(<tt>GAS</tt>), see
+<i><a href="http://sourceware.org/binutils/docs/as/index.html">Using As</a></i>,
+especially for the assembly printer. <i>Using As</i> contains a list of target
+machine dependent features.
+</p>
+
+</div>
+
+<div class="doc_subsection">
+  <a name="Basic">Basic Steps</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+To write a compiler backend for LLVM that converts the LLVM IR to code for a
+specified target (machine or other language), follow these steps:
+</p>
+
+<ul>
+<li>Create a subclass of the TargetMachine class that describes characteristics
+    of your target machine. Copy existing examples of specific TargetMachine
+    class and header files; for example, start with
+    <tt>SparcTargetMachine.cpp</tt> and <tt>SparcTargetMachine.h</tt>, but
+    change the file names for your target. Similarly, change code that
+    references "Sparc" to reference your target. </li>
+
+<li>Describe the register set of the target. Use TableGen to generate code for
+    register definition, register aliases, and register classes from a
+    target-specific <tt>RegisterInfo.td</tt> input file. You should also write
+    additional code for a subclass of the TargetRegisterInfo class that
+    represents the class register file data used for register allocation and
+    also describes the interactions between registers.</li>
+
+<li>Describe the instruction set of the target. Use TableGen to generate code
+    for target-specific instructions from target-specific versions of
+    <tt>TargetInstrFormats.td</tt> and <tt>TargetInstrInfo.td</tt>. You should
+    write additional code for a subclass of the TargetInstrInfo class to
+    represent machine instructions supported by the target machine. </li>
+
+<li>Describe the selection and conversion of the LLVM IR from a Directed Acyclic
+    Graph (DAG) representation of instructions to native target-specific
+    instructions. Use TableGen to generate code that matches patterns and
+    selects instructions based on additional information in a target-specific
+    version of <tt>TargetInstrInfo.td</tt>. Write code
+    for <tt>XXXISelDAGToDAG.cpp</tt>, where XXX identifies the specific target,
+    to perform pattern matching and DAG-to-DAG instruction selection. Also write
+    code in <tt>XXXISelLowering.cpp</tt> to replace or remove operations and
+    data types that are not supported natively in a SelectionDAG. </li>
+
+<li>Write code for an assembly printer that converts LLVM IR to a GAS format for
+    your target machine.  You should add assembly strings to the instructions
+    defined in your target-specific version of <tt>TargetInstrInfo.td</tt>. You
+    should also write code for a subclass of AsmPrinter that performs the
+    LLVM-to-assembly conversion and a trivial subclass of TargetAsmInfo.</li>
+
+<li>Optionally, add support for subtargets (i.e., variants with different
+    capabilities). You should also write code for a subclass of the
+    TargetSubtarget class, which allows you to use the <tt>-mcpu=</tt>
+    and <tt>-mattr=</tt> command-line options.</li>
+
+<li>Optionally, add JIT support and create a machine code emitter (subclass of
+    TargetJITInfo) that is used to emit binary code directly into memory. </li>
+</ul>
+
+<p>
+In the <tt>.cpp</tt> and <tt>.h</tt>. files, initially stub up these methods and
+then implement them later. Initially, you may not know which private members
+that the class will need and which components will need to be subclassed.
+</p>
+
+</div>
+
+<div class="doc_subsection">
+  <a name="Preliminaries">Preliminaries</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+To actually create your compiler backend, you need to create and modify a few
+files. The absolute minimum is discussed here. But to actually use the LLVM
+target-independent code generator, you must perform the steps described in
+the <a href="http://www.llvm.org/docs/CodeGenerator.html">LLVM
+Target-Independent Code Generator</a> document.
+</p>
+
+<p>
+First, you should create a subdirectory under <tt>lib/Target</tt> to hold all
+the files related to your target. If your target is called "Dummy," create the
+directory <tt>lib/Target/Dummy</tt>.
+</p>
+
+<p>
+In this new
+directory, create a <tt>Makefile</tt>. It is easiest to copy a
+<tt>Makefile</tt> of another target and modify it. It should at least contain
+the <tt>LEVEL</tt>, <tt>LIBRARYNAME</tt> and <tt>TARGET</tt> variables, and then
+include <tt>$(LEVEL)/Makefile.common</tt>. The library can be
+named <tt>LLVMDummy</tt> (for example, see the MIPS target). Alternatively, you
+can split the library into <tt>LLVMDummyCodeGen</tt>
+and <tt>LLVMDummyAsmPrinter</tt>, the latter of which should be implemented in a
+subdirectory below <tt>lib/Target/Dummy</tt> (for example, see the PowerPC
+target).
+</p>
+
+<p>
+Note that these two naming schemes are hardcoded into <tt>llvm-config</tt>.
+Using any other naming scheme will confuse <tt>llvm-config</tt> and produce a
+lot of (seemingly unrelated) linker errors when linking <tt>llc</tt>.
+</p>
+
+<p>
+To make your target actually do something, you need to implement a subclass of
+<tt>TargetMachine</tt>. This implementation should typically be in the file
+<tt>lib/Target/DummyTargetMachine.cpp</tt>, but any file in
+the <tt>lib/Target</tt> directory will be built and should work. To use LLVM's
+target independent code generator, you should do what all current machine
+backends do: create a subclass of <tt>LLVMTargetMachine</tt>. (To create a
+target from scratch, create a subclass of <tt>TargetMachine</tt>.)
+</p>
+
+<p>
+To get LLVM to actually build and link your target, you need to add it to
+the <tt>TARGETS_TO_BUILD</tt> variable. To do this, you modify the configure
+script to know about your target when parsing the <tt>--enable-targets</tt>
+option. Search the configure script for <tt>TARGETS_TO_BUILD</tt>, add your
+target to the lists there (some creativity required), and then
+reconfigure. Alternatively, you can change <tt>autotools/configure.ac</tt> and
+regenerate configure by running <tt>./autoconf/AutoRegen.sh</tt>.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="TargetMachine">Target Machine</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+<tt>LLVMTargetMachine</tt> is designed as a base class for targets implemented
+with the LLVM target-independent code generator. The <tt>LLVMTargetMachine</tt>
+class should be specialized by a concrete target class that implements the
+various virtual methods. <tt>LLVMTargetMachine</tt> is defined as a subclass of
+<tt>TargetMachine</tt> in <tt>include/llvm/Target/TargetMachine.h</tt>. The
+<tt>TargetMachine</tt> class implementation (<tt>TargetMachine.cpp</tt>) also
+processes numerous command-line options.
+</p>
+
+<p>
+To create a concrete target-specific subclass of <tt>LLVMTargetMachine</tt>,
+start by copying an existing <tt>TargetMachine</tt> class and header.  You
+should name the files that you create to reflect your specific target. For
+instance, for the SPARC target, name the files <tt>SparcTargetMachine.h</tt> and
+<tt>SparcTargetMachine.cpp</tt>.
+</p>
+
+<p>
+For a target machine <tt>XXX</tt>, the implementation of
+<tt>XXXTargetMachine</tt> must have access methods to obtain objects that
+represent target components.  These methods are named <tt>get*Info</tt>, and are
+intended to obtain the instruction set (<tt>getInstrInfo</tt>), register set
+(<tt>getRegisterInfo</tt>), stack frame layout (<tt>getFrameInfo</tt>), and
+similar information. <tt>XXXTargetMachine</tt> must also implement the
+<tt>getTargetData</tt> method to access an object with target-specific data
+characteristics, such as data type size and alignment requirements.
+</p>
+
+<p>
+For instance, for the SPARC target, the header file
+<tt>SparcTargetMachine.h</tt> declares prototypes for several <tt>get*Info</tt>
+and <tt>getTargetData</tt> methods that simply return a class member.
+</p>
+
+<div class="doc_code">
+<pre>
+namespace llvm {
+
+class Module;
+
+class SparcTargetMachine : public LLVMTargetMachine {
+  const TargetData DataLayout;       // Calculates type size &amp; alignment
+  SparcSubtarget Subtarget;
+  SparcInstrInfo InstrInfo;
+  TargetFrameInfo FrameInfo;
+  
+protected:
+  virtual const TargetAsmInfo *createTargetAsmInfo() const;
+  
+public:
+  SparcTargetMachine(const Module &amp;M, const std::string &amp;FS);
+
+  virtual const SparcInstrInfo *getInstrInfo() const {return &amp;InstrInfo; }
+  virtual const TargetFrameInfo *getFrameInfo() const {return &amp;FrameInfo; }
+  virtual const TargetSubtarget *getSubtargetImpl() const{return &amp;Subtarget; }
+  virtual const TargetRegisterInfo *getRegisterInfo() const {
+    return &amp;InstrInfo.getRegisterInfo();
+  }
+  virtual const TargetData *getTargetData() const { return &amp;DataLayout; }
+  static unsigned getModuleMatchQuality(const Module &amp;M);
+
+  // Pass Pipeline Configuration
+  virtual bool addInstSelector(PassManagerBase &amp;PM, bool Fast);
+  virtual bool addPreEmitPass(PassManagerBase &amp;PM, bool Fast);
+};
+
+} // end namespace llvm
+</pre>
+</div>
+
+</div>
+
+
+<div class="doc_text">
+
+<ul>
+<li><tt>getInstrInfo()</tt></li>
+<li><tt>getRegisterInfo()</tt></li>
+<li><tt>getFrameInfo()</tt></li>
+<li><tt>getTargetData()</tt></li>
+<li><tt>getSubtargetImpl()</tt></li>
+</ul>
+
+<p>For some targets, you also need to support the following methods:</p>
+
+<ul>
+<li><tt>getTargetLowering()</tt></li>
+<li><tt>getJITInfo()</tt></li>
+</ul>
+
+<p>
+In addition, the <tt>XXXTargetMachine</tt> constructor should specify a
+<tt>TargetDescription</tt> string that determines the data layout for the target
+machine, including characteristics such as pointer size, alignment, and
+endianness. For example, the constructor for SparcTargetMachine contains the
+following:
+</p>
+
+<div class="doc_code">
+<pre>
+SparcTargetMachine::SparcTargetMachine(const Module &amp;M, const std::string &amp;FS)
+  : DataLayout("E-p:32:32-f128:128:128"),
+    Subtarget(M, FS), InstrInfo(Subtarget),
+    FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
+}
+</pre>
+</div>
+
+</div>
+
+<div class="doc_text">
+
+<p>Hyphens separate portions of the <tt>TargetDescription</tt> string.</p>
+
+<ul>
+<li>An upper-case "<tt>E</tt>" in the string indicates a big-endian target data
+    model. a lower-case "<tt>e</tt>" indicates little-endian.</li>
+
+<li>"<tt>p:</tt>" is followed by pointer information: size, ABI alignment, and
+    preferred alignment. If only two figures follow "<tt>p:</tt>", then the
+    first value is pointer size, and the second value is both ABI and preferred
+    alignment.</li>
+
+<li>Then a letter for numeric type alignment: "<tt>i</tt>", "<tt>f</tt>",
+    "<tt>v</tt>", or "<tt>a</tt>" (corresponding to integer, floating point,
+    vector, or aggregate). "<tt>i</tt>", "<tt>v</tt>", or "<tt>a</tt>" are
+    followed by ABI alignment and preferred alignment. "<tt>f</tt>" is followed
+    by three values: the first indicates the size of a long double, then ABI
+    alignment, and then ABI preferred alignment.</li>
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="TargetRegistration">Target Registration</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+You must also register your target with the <tt>TargetRegistry</tt>, which is
+what other LLVM tools use to be able to lookup and use your target at
+runtime. The <tt>TargetRegistry</tt> can be used directly, but for most targets
+there are helper templates which should take care of the work for you.</p>
+
+<p>
+All targets should declare a global <tt>Target</tt> object which is used to
+represent the target during registration. Then, in the target's TargetInfo
+library, the target should define that object and use
+the <tt>RegisterTarget</tt> template to register the target. For example, the Sparc registration code looks like this:
+</p>
+
+<div class="doc_code">
+<pre>
+Target llvm::TheSparcTarget;
+
+extern "C" void LLVMInitializeSparcTargetInfo() { 
+  RegisterTarget&lt;Triple::sparc, /*HasJIT=*/false&gt;
+    X(TheSparcTarget, "sparc", "Sparc");
+}
+</pre>
+</div>
+
+<p>
+This allows the <tt>TargetRegistry</tt> to look up the target by name or by
+target triple. In addition, most targets will also register additional features
+which are available in separate libraries. These registration steps are
+separate, because some clients may wish to only link in some parts of the target
+-- the JIT code generator does not require the use of the assembler printer, for
+example. Here is an example of registering the Sparc assembly printer:
+</p>
+
+<div class="doc_code">
+<pre>
+extern "C" void LLVMInitializeSparcAsmPrinter() { 
+  RegisterAsmPrinter&lt;SparcAsmPrinter&gt; X(TheSparcTarget);
+}
+</pre>
+</div>
+
+<p>
+For more information, see
+"<a href="/doxygen/TargetRegistry_8h-source.html">llvm/Target/TargetRegistry.h</a>".
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="RegisterSet">Register Set and Register Classes</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+You should describe a concrete target-specific class that represents the
+register file of a target machine. This class is called <tt>XXXRegisterInfo</tt>
+(where <tt>XXX</tt> identifies the target) and represents the class register
+file data that is used for register allocation. It also describes the
+interactions between registers.
+</p>
+
+<p>
+You also need to define register classes to categorize related registers. A
+register class should be added for groups of registers that are all treated the
+same way for some instruction. Typical examples are register classes for
+integer, floating-point, or vector registers. A register allocator allows an
+instruction to use any register in a specified register class to perform the
+instruction in a similar manner. Register classes allocate virtual registers to
+instructions from these sets, and register classes let the target-independent
+register allocator automatically choose the actual registers.
+</p>
+
+<p>
+Much of the code for registers, including register definition, register aliases,
+and register classes, is generated by TableGen from <tt>XXXRegisterInfo.td</tt>
+input files and placed in <tt>XXXGenRegisterInfo.h.inc</tt> and
+<tt>XXXGenRegisterInfo.inc</tt> output files. Some of the code in the
+implementation of <tt>XXXRegisterInfo</tt> requires hand-coding.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="RegisterDef">Defining a Register</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The <tt>XXXRegisterInfo.td</tt> file typically starts with register definitions
+for a target machine. The <tt>Register</tt> class (specified
+in <tt>Target.td</tt>) is used to define an object for each register. The
+specified string <tt>n</tt> becomes the <tt>Name</tt> of the register. The
+basic <tt>Register</tt> object does not have any subregisters and does not
+specify any aliases.
+</p>
+
+<div class="doc_code">
+<pre>
+class Register&lt;string n&gt; {
+  string Namespace = "";
+  string AsmName = n;
+  string Name = n;
+  int SpillSize = 0;
+  int SpillAlignment = 0;
+  list&lt;Register&gt; Aliases = [];
+  list&lt;Register&gt; SubRegs = [];
+  list&lt;int&gt; DwarfNumbers = [];
+}
+</pre>
+</div>
+
+<p>
+For example, in the <tt>X86RegisterInfo.td</tt> file, there are register
+definitions that utilize the Register class, such as:
+</p>
+
+<div class="doc_code">
+<pre>
+def AL : Register&lt;"AL"&gt;, DwarfRegNum&lt;[0, 0, 0]&gt;;
+</pre>
+</div>
+
+<p>
+This defines the register <tt>AL</tt> and assigns it values (with
+<tt>DwarfRegNum</tt>) that are used by <tt>gcc</tt>, <tt>gdb</tt>, or a debug
+information writer to identify a register. For register
+<tt>AL</tt>, <tt>DwarfRegNum</tt> takes an array of 3 values representing 3
+different modes: the first element is for X86-64, the second for exception
+handling (EH) on X86-32, and the third is generic. -1 is a special Dwarf number
+that indicates the gcc number is undefined, and -2 indicates the register number
+is invalid for this mode.
+</p>
+
+<p>
+From the previously described line in the <tt>X86RegisterInfo.td</tt> file,
+TableGen generates this code in the <tt>X86GenRegisterInfo.inc</tt> file:
+</p>
+
+<div class="doc_code">
+<pre>
+static const unsigned GR8[] = { X86::AL, ... };
+
+const unsigned AL_AliasSet[] = { X86::AX, X86::EAX, X86::RAX, 0 };
+
+const TargetRegisterDesc RegisterDescriptors[] = { 
+  ...
+{ "AL", "AL", AL_AliasSet, Empty_SubRegsSet, Empty_SubRegsSet, AL_SuperRegsSet }, ...
+</pre>
+</div>
+
+<p>
+From the register info file, TableGen generates a <tt>TargetRegisterDesc</tt>
+object for each register. <tt>TargetRegisterDesc</tt> is defined in
+<tt>include/llvm/Target/TargetRegisterInfo.h</tt> with the following fields:
+</p>
+
+<div class="doc_code">
+<pre>
+struct TargetRegisterDesc {
+  const char     *AsmName;      // Assembly language name for the register
+  const char     *Name;         // Printable name for the reg (for debugging)
+  const unsigned *AliasSet;     // Register Alias Set
+  const unsigned *SubRegs;      // Sub-register set
+  const unsigned *ImmSubRegs;   // Immediate sub-register set
+  const unsigned *SuperRegs;    // Super-register set
+};</pre>
+</div>
+
+<p>
+TableGen uses the entire target description file (<tt>.td</tt>) to determine
+text names for the register (in the <tt>AsmName</tt> and <tt>Name</tt> fields of
+<tt>TargetRegisterDesc</tt>) and the relationships of other registers to the
+defined register (in the other <tt>TargetRegisterDesc</tt> fields). In this
+example, other definitions establish the registers "<tt>AX</tt>",
+"<tt>EAX</tt>", and "<tt>RAX</tt>" as aliases for one another, so TableGen
+generates a null-terminated array (<tt>AL_AliasSet</tt>) for this register alias
+set.
+</p>
+
+<p>
+The <tt>Register</tt> class is commonly used as a base class for more complex
+classes. In <tt>Target.td</tt>, the <tt>Register</tt> class is the base for the
+<tt>RegisterWithSubRegs</tt> class that is used to define registers that need to
+specify subregisters in the <tt>SubRegs</tt> list, as shown here:
+</p>
+
+<div class="doc_code">
+<pre>
+class RegisterWithSubRegs&lt;string n,
+list&lt;Register&gt; subregs&gt; : Register&lt;n&gt; {
+  let SubRegs = subregs;
+}
+</pre>
+</div>
+
+<p>
+In <tt>SparcRegisterInfo.td</tt>, additional register classes are defined for
+SPARC: a Register subclass, SparcReg, and further subclasses: <tt>Ri</tt>,
+<tt>Rf</tt>, and <tt>Rd</tt>. SPARC registers are identified by 5-bit ID
+numbers, which is a feature common to these subclasses. Note the use of
+'<tt>let</tt>' expressions to override values that are initially defined in a
+superclass (such as <tt>SubRegs</tt> field in the <tt>Rd</tt> class).
+</p>
+
+<div class="doc_code">
+<pre>
+class SparcReg&lt;string n&gt; : Register&lt;n&gt; {
+  field bits&lt;5&gt; Num;
+  let Namespace = "SP";
+}
+// Ri - 32-bit integer registers
+class Ri&lt;bits&lt;5&gt; num, string n&gt; :
+SparcReg&lt;n&gt; {
+  let Num = num;
+}
+// Rf - 32-bit floating-point registers
+class Rf&lt;bits&lt;5&gt; num, string n&gt; :
+SparcReg&lt;n&gt; {
+  let Num = num;
+}
+// Rd - Slots in the FP register file for 64-bit
+floating-point values.
+class Rd&lt;bits&lt;5&gt; num, string n,
+list&lt;Register&gt; subregs&gt; : SparcReg&lt;n&gt; {
+  let Num = num;
+  let SubRegs = subregs;
+}
+</pre>
+</div>
+
+<p>
+In the <tt>SparcRegisterInfo.td</tt> file, there are register definitions that
+utilize these subclasses of <tt>Register</tt>, such as:
+</p>
+
+<div class="doc_code">
+<pre>
+def G0 : Ri&lt; 0, "G0"&gt;,
+DwarfRegNum&lt;[0]&gt;;
+def G1 : Ri&lt; 1, "G1"&gt;, DwarfRegNum&lt;[1]&gt;;
+...
+def F0 : Rf&lt; 0, "F0"&gt;,
+DwarfRegNum&lt;[32]&gt;;
+def F1 : Rf&lt; 1, "F1"&gt;,
+DwarfRegNum&lt;[33]&gt;;
+...
+def D0 : Rd&lt; 0, "F0", [F0, F1]&gt;,
+DwarfRegNum&lt;[32]&gt;;
+def D1 : Rd&lt; 2, "F2", [F2, F3]&gt;,
+DwarfRegNum&lt;[34]&gt;;
+</pre>
+</div>
+
+<p>
+The last two registers shown above (<tt>D0</tt> and <tt>D1</tt>) are
+double-precision floating-point registers that are aliases for pairs of
+single-precision floating-point sub-registers. In addition to aliases, the
+sub-register and super-register relationships of the defined register are in
+fields of a register's TargetRegisterDesc.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="RegisterClassDef">Defining a Register Class</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The <tt>RegisterClass</tt> class (specified in <tt>Target.td</tt>) is used to
+define an object that represents a group of related registers and also defines
+the default allocation order of the registers. A target description file
+<tt>XXXRegisterInfo.td</tt> that uses <tt>Target.td</tt> can construct register
+classes using the following class:
+</p>
+
+<div class="doc_code">
+<pre>
+class RegisterClass&lt;string namespace,
+list&lt;ValueType&gt; regTypes, int alignment,
+                    list&lt;Register&gt; regList&gt; {
+  string Namespace = namespace;
+  list&lt;ValueType&gt; RegTypes = regTypes;
+  int Size = 0;  // spill size, in bits; zero lets tblgen pick the size
+  int Alignment = alignment;
+
+  // CopyCost is the cost of copying a value between two registers
+  // default value 1 means a single instruction
+  // A negative value means copying is extremely expensive or impossible
+  int CopyCost = 1;  
+  list&lt;Register&gt; MemberList = regList;
+  
+  // for register classes that are subregisters of this class
+  list&lt;RegisterClass&gt; SubRegClassList = [];  
+  
+  code MethodProtos = [{}];  // to insert arbitrary code
+  code MethodBodies = [{}];
+}
+</pre>
+</div>
+
+<p>To define a RegisterClass, use the following 4 arguments:</p>
+
+<ul>
+<li>The first argument of the definition is the name of the namespace.</li>
+
+<li>The second argument is a list of <tt>ValueType</tt> register type values
+    that are defined in <tt>include/llvm/CodeGen/ValueTypes.td</tt>. Defined
+    values include integer types (such as <tt>i16</tt>, <tt>i32</tt>,
+    and <tt>i1</tt> for Boolean), floating-point types
+    (<tt>f32</tt>, <tt>f64</tt>), and vector types (for example, <tt>v8i16</tt>
+    for an <tt>8 x i16</tt> vector). All registers in a <tt>RegisterClass</tt>
+    must have the same <tt>ValueType</tt>, but some registers may store vector
+    data in different configurations. For example a register that can process a
+    128-bit vector may be able to handle 16 8-bit integer elements, 8 16-bit
+    integers, 4 32-bit integers, and so on. </li>
+
+<li>The third argument of the <tt>RegisterClass</tt> definition specifies the
+    alignment required of the registers when they are stored or loaded to
+    memory.</li>
+
+<li>The final argument, <tt>regList</tt>, specifies which registers are in this
+    class.  If an <tt>allocation_order_*</tt> method is not specified,
+    then <tt>regList</tt> also defines the order of allocation used by the
+    register allocator.</li>
+</ul>
+
+<p>
+In <tt>SparcRegisterInfo.td</tt>, three RegisterClass objects are defined:
+<tt>FPRegs</tt>, <tt>DFPRegs</tt>, and <tt>IntRegs</tt>. For all three register
+classes, the first argument defines the namespace with the string
+'<tt>SP</tt>'. <tt>FPRegs</tt> defines a group of 32 single-precision
+floating-point registers (<tt>F0</tt> to <tt>F31</tt>); <tt>DFPRegs</tt> defines
+a group of 16 double-precision registers
+(<tt>D0-D15</tt>). For <tt>IntRegs</tt>, the <tt>MethodProtos</tt>
+and <tt>MethodBodies</tt> methods are used by TableGen to insert the specified
+code into generated output.
+</p>
+
+<div class="doc_code">
+<pre>
+def FPRegs : RegisterClass&lt;"SP", [f32], 32,
+  [F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15,
+   F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]&gt;;
+
+def DFPRegs : RegisterClass&lt;"SP", [f64], 64,
+  [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15]&gt;;
+&nbsp;
+def IntRegs : RegisterClass&lt;"SP", [i32], 32,
+    [L0, L1, L2, L3, L4, L5, L6, L7,
+     I0, I1, I2, I3, I4, I5,
+     O0, O1, O2, O3, O4, O5, O7,
+     G1,
+     // Non-allocatable regs:
+     G2, G3, G4, 
+     O6,        // stack ptr
+    I6,        // frame ptr
+     I7,        // return address
+     G0,        // constant zero
+     G5, G6, G7 // reserved for kernel
+    ]&gt; {
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &amp;MF) const;
+  }];
+  let MethodBodies = [{
+    IntRegsClass::iterator
+    IntRegsClass::allocation_order_end(const MachineFunction &amp;MF) const {
+      return end() - 10  // Don't allocate special registers
+         -1;
+    }
+  }];
+}
+</pre>
+</div>
+
+<p>
+Using <tt>SparcRegisterInfo.td</tt> with TableGen generates several output files
+that are intended for inclusion in other source code that you write.
+<tt>SparcRegisterInfo.td</tt> generates <tt>SparcGenRegisterInfo.h.inc</tt>,
+which should be included in the header file for the implementation of the SPARC
+register implementation that you write (<tt>SparcRegisterInfo.h</tt>). In
+<tt>SparcGenRegisterInfo.h.inc</tt> a new structure is defined called
+<tt>SparcGenRegisterInfo</tt> that uses <tt>TargetRegisterInfo</tt> as its
+base. It also specifies types, based upon the defined register
+classes: <tt>DFPRegsClass</tt>, <tt>FPRegsClass</tt>, and <tt>IntRegsClass</tt>.
+</p>
+
+<p>
+<tt>SparcRegisterInfo.td</tt> also generates <tt>SparcGenRegisterInfo.inc</tt>,
+which is included at the bottom of <tt>SparcRegisterInfo.cpp</tt>, the SPARC
+register implementation. The code below shows only the generated integer
+registers and associated register classes. The order of registers
+in <tt>IntRegs</tt> reflects the order in the definition of <tt>IntRegs</tt> in
+the target description file. Take special note of the use
+of <tt>MethodBodies</tt> in <tt>SparcRegisterInfo.td</tt> to create code in
+<tt>SparcGenRegisterInfo.inc</tt>. <tt>MethodProtos</tt> generates similar code
+in <tt>SparcGenRegisterInfo.h.inc</tt>.
+</p>
+
+<div class="doc_code">
+<pre>  // IntRegs Register Class...
+  static const unsigned IntRegs[] = {
+    SP::L0, SP::L1, SP::L2, SP::L3, SP::L4, SP::L5,
+    SP::L6, SP::L7, SP::I0, SP::I1, SP::I2, SP::I3,
+    SP::I4, SP::I5, SP::O0, SP::O1, SP::O2, SP::O3,
+    SP::O4, SP::O5, SP::O7, SP::G1, SP::G2, SP::G3,
+    SP::G4, SP::O6, SP::I6, SP::I7, SP::G0, SP::G5,
+    SP::G6, SP::G7, 
+  };
+
+  // IntRegsVTs Register Class Value Types...
+  static const MVT::ValueType IntRegsVTs[] = {
+    MVT::i32, MVT::Other
+  };
+
+namespace SP {   // Register class instances
+  DFPRegsClass&nbsp;&nbsp;&nbsp; DFPRegsRegClass;
+  FPRegsClass&nbsp;&nbsp;&nbsp;&nbsp; FPRegsRegClass;
+  IntRegsClass&nbsp;&nbsp;&nbsp; IntRegsRegClass;
+...
+  // IntRegs Sub-register Classess...
+  static const TargetRegisterClass* const IntRegsSubRegClasses [] = {
+    NULL
+  };
+...
+  // IntRegs Super-register Classess...
+  static const TargetRegisterClass* const IntRegsSuperRegClasses [] = {
+    NULL
+  };
+...
+  // IntRegs Register Class sub-classes...
+  static const TargetRegisterClass* const IntRegsSubclasses [] = {
+    NULL
+  };
+...
+  // IntRegs Register Class super-classes...
+  static const TargetRegisterClass* const IntRegsSuperclasses [] = {
+    NULL
+  };
+...
+  IntRegsClass::iterator
+  IntRegsClass::allocation_order_end(const MachineFunction &amp;MF) const {
+     return end()-10  // Don't allocate special registers
+         -1;
+  }
+  
+  IntRegsClass::IntRegsClass() : TargetRegisterClass(IntRegsRegClassID, 
+    IntRegsVTs, IntRegsSubclasses, IntRegsSuperclasses, IntRegsSubRegClasses, 
+    IntRegsSuperRegClasses, 4, 4, 1, IntRegs, IntRegs + 32) {}
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="implementRegister">Implement a subclass of</a> 
+  <a href="http://www.llvm.org/docs/CodeGenerator.html#targetregisterinfo">TargetRegisterInfo</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The final step is to hand code portions of <tt>XXXRegisterInfo</tt>, which
+implements the interface described in <tt>TargetRegisterInfo.h</tt>. These
+functions return <tt>0</tt>, <tt>NULL</tt>, or <tt>false</tt>, unless
+overridden. Here is a list of functions that are overridden for the SPARC
+implementation in <tt>SparcRegisterInfo.cpp</tt>:
+</p>
+
+<ul>
+<li><tt>getCalleeSavedRegs</tt> &mdash; Returns a list of callee-saved registers
+    in the order of the desired callee-save stack frame offset.</li>
+
+<li><tt>getReservedRegs</tt> &mdash; Returns a bitset indexed by physical
+    register numbers, indicating if a particular register is unavailable.</li>
+
+<li><tt>hasFP</tt> &mdash; Return a Boolean indicating if a function should have
+    a dedicated frame pointer register.</li>
+
+<li><tt>eliminateCallFramePseudoInstr</tt> &mdash; If call frame setup or
+    destroy pseudo instructions are used, this can be called to eliminate
+    them.</li>
+
+<li><tt>eliminateFrameIndex</tt> &mdash; Eliminate abstract frame indices from
+    instructions that may use them.</li>
+
+<li><tt>emitPrologue</tt> &mdash; Insert prologue code into the function.</li>
+
+<li><tt>emitEpilogue</tt> &mdash; Insert epilogue code into the function.</li>
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="InstructionSet">Instruction Set</a>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_text">
+
+<p>
+During the early stages of code generation, the LLVM IR code is converted to a
+<tt>SelectionDAG</tt> with nodes that are instances of the <tt>SDNode</tt> class
+containing target instructions. An <tt>SDNode</tt> has an opcode, operands, type
+requirements, and operation properties. For example, is an operation
+commutative, does an operation load from memory. The various operation node
+types are described in the <tt>include/llvm/CodeGen/SelectionDAGNodes.h</tt>
+file (values of the <tt>NodeType</tt> enum in the <tt>ISD</tt> namespace).
+</p>
+
+<p>
+TableGen uses the following target description (<tt>.td</tt>) input files to
+generate much of the code for instruction definition:
+</p>
+
+<ul>
+<li><tt>Target.td</tt> &mdash; Where the <tt>Instruction</tt>, <tt>Operand</tt>,
+    <tt>InstrInfo</tt>, and other fundamental classes are defined.</li>
+
+<li><tt>TargetSelectionDAG.td</tt>&mdash; Used by <tt>SelectionDAG</tt>
+    instruction selection generators, contains <tt>SDTC*</tt> classes (selection
+    DAG type constraint), definitions of <tt>SelectionDAG</tt> nodes (such as
+    <tt>imm</tt>, <tt>cond</tt>, <tt>bb</tt>, <tt>add</tt>, <tt>fadd</tt>,
+    <tt>sub</tt>), and pattern support (<tt>Pattern</tt>, <tt>Pat</tt>,
+    <tt>PatFrag</tt>, <tt>PatLeaf</tt>, <tt>ComplexPattern</tt>.</li>
+
+<li><tt>XXXInstrFormats.td</tt> &mdash; Patterns for definitions of
+    target-specific instructions.</li>
+
+<li><tt>XXXInstrInfo.td</tt> &mdash; Target-specific definitions of instruction
+    templates, condition codes, and instructions of an instruction set. For
+    architecture modifications, a different file name may be used. For example,
+    for Pentium with SSE instruction, this file is <tt>X86InstrSSE.td</tt>, and
+    for Pentium with MMX, this file is <tt>X86InstrMMX.td</tt>.</li>
+</ul>
+
+<p>
+There is also a target-specific <tt>XXX.td</tt> file, where <tt>XXX</tt> is the
+name of the target. The <tt>XXX.td</tt> file includes the other <tt>.td</tt>
+input files, but its contents are only directly important for subtargets.
+</p>
+
+<p>
+You should describe a concrete target-specific class <tt>XXXInstrInfo</tt> that
+represents machine instructions supported by a target machine.
+<tt>XXXInstrInfo</tt> contains an array of <tt>XXXInstrDescriptor</tt> objects,
+each of which describes one instruction. An instruction descriptor defines:</p>
+
+<ul>
+<li>Opcode mnemonic</li>
+
+<li>Number of operands</li>
+
+<li>List of implicit register definitions and uses</li>
+
+<li>Target-independent properties (such as memory access, is commutable)</li>
+
+<li>Target-specific flags </li>
+</ul>
+
+<p>
+The Instruction class (defined in <tt>Target.td</tt>) is mostly used as a base
+for more complex instruction classes.
+</p>
+
+<div class="doc_code">
+<pre>class Instruction {
+  string Namespace = "";
+  dag OutOperandList;       // An dag containing the MI def operand list.
+  dag InOperandList;        // An dag containing the MI use operand list.
+  string AsmString = "";    // The .s format to print the instruction with.
+  list&lt;dag&gt; Pattern;  // Set to the DAG pattern for this instruction
+  list&lt;Register&gt; Uses = []; 
+  list&lt;Register&gt; Defs = [];
+  list&lt;Predicate&gt; Predicates = [];  // predicates turned into isel match code
+  ... remainder not shown for space ...
+}
+</pre>
+</div>
+
+<p>
+A <tt>SelectionDAG</tt> node (<tt>SDNode</tt>) should contain an object
+representing a target-specific instruction that is defined
+in <tt>XXXInstrInfo.td</tt>. The instruction objects should represent
+instructions from the architecture manual of the target machine (such as the
+SPARC Architecture Manual for the SPARC target).
+</p>
+
+<p>
+A single instruction from the architecture manual is often modeled as multiple
+target instructions, depending upon its operands. For example, a manual might
+describe an add instruction that takes a register or an immediate operand. An
+LLVM target could model this with two instructions named <tt>ADDri</tt> and
+<tt>ADDrr</tt>.
+</p>
+
+<p>
+You should define a class for each instruction category and define each opcode
+as a subclass of the category with appropriate parameters such as the fixed
+binary encoding of opcodes and extended opcodes. You should map the register
+bits to the bits of the instruction in which they are encoded (for the
+JIT). Also you should specify how the instruction should be printed when the
+automatic assembly printer is used.
+</p>
+
+<p>
+As is described in the SPARC Architecture Manual, Version 8, there are three
+major 32-bit formats for instructions. Format 1 is only for the <tt>CALL</tt>
+instruction. Format 2 is for branch on condition codes and <tt>SETHI</tt> (set
+high bits of a register) instructions.  Format 3 is for other instructions.
+</p>
+
+<p>
+Each of these formats has corresponding classes in <tt>SparcInstrFormat.td</tt>.
+<tt>InstSP</tt> is a base class for other instruction classes. Additional base
+classes are specified for more precise formats: for example
+in <tt>SparcInstrFormat.td</tt>, <tt>F2_1</tt> is for <tt>SETHI</tt>,
+and <tt>F2_2</tt> is for branches. There are three other base
+classes: <tt>F3_1</tt> for register/register operations, <tt>F3_2</tt> for
+register/immediate operations, and <tt>F3_3</tt> for floating-point
+operations. <tt>SparcInstrInfo.td</tt> also adds the base class Pseudo for
+synthetic SPARC instructions.
+</p>
+
+<p>
+<tt>SparcInstrInfo.td</tt> largely consists of operand and instruction
+definitions for the SPARC target. In <tt>SparcInstrInfo.td</tt>, the following
+target description file entry, <tt>LDrr</tt>, defines the Load Integer
+instruction for a Word (the <tt>LD</tt> SPARC opcode) from a memory address to a
+register. The first parameter, the value 3 (<tt>11<sub>2</sub></tt>), is the
+operation value for this category of operation. The second parameter
+(<tt>000000<sub>2</sub></tt>) is the specific operation value
+for <tt>LD</tt>/Load Word. The third parameter is the output destination, which
+is a register operand and defined in the <tt>Register</tt> target description
+file (<tt>IntRegs</tt>).
+</p>
+
+<div class="doc_code">
+<pre>def LDrr : F3_1 &lt;3, 0b000000, (outs IntRegs:$dst), (ins MEMrr:$addr),
+                 "ld [$addr], $dst",
+                 [(set IntRegs:$dst, (load ADDRrr:$addr))]&gt;;
+</pre>
+</div>
+
+<p>
+The fourth parameter is the input source, which uses the address
+operand <tt>MEMrr</tt> that is defined earlier in <tt>SparcInstrInfo.td</tt>:
+</p>
+
+<div class="doc_code">
+<pre>def MEMrr : Operand&lt;i32&gt; {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+</pre>
+</div>
+
+<p>
+The fifth parameter is a string that is used by the assembly printer and can be
+left as an empty string until the assembly printer interface is implemented. The
+sixth and final parameter is the pattern used to match the instruction during
+the SelectionDAG Select Phase described in
+(<a href="http://www.llvm.org/docs/CodeGenerator.html">The LLVM
+Target-Independent Code Generator</a>).  This parameter is detailed in the next
+section, <a href="#InstructionSelector">Instruction Selector</a>.
+</p>
+
+<p>
+Instruction class definitions are not overloaded for different operand types, so
+separate versions of instructions are needed for register, memory, or immediate
+value operands. For example, to perform a Load Integer instruction for a Word
+from an immediate operand to a register, the following instruction class is
+defined:
+</p>
+
+<div class="doc_code">
+<pre>def LDri : F3_2 &lt;3, 0b000000, (outs IntRegs:$dst), (ins MEMri:$addr),
+                 "ld [$addr], $dst",
+                 [(set IntRegs:$dst, (load ADDRri:$addr))]&gt;;
+</pre>
+</div>
+
+<p>
+Writing these definitions for so many similar instructions can involve a lot of
+cut and paste. In td files, the <tt>multiclass</tt> directive enables the
+creation of templates to define several instruction classes at once (using
+the <tt>defm</tt> directive). For example in <tt>SparcInstrInfo.td</tt>, the
+<tt>multiclass</tt> pattern <tt>F3_12</tt> is defined to create 2 instruction
+classes each time <tt>F3_12</tt> is invoked:
+</p>
+
+<div class="doc_code">
+<pre>multiclass F3_12 &lt;string OpcStr, bits&lt;6&gt; Op3Val, SDNode OpNode&gt; {
+  def rr  : F3_1 &lt;2, Op3Val, 
+                 (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                 !strconcat(OpcStr, " $b, $c, $dst"),
+                 [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]&gt;;
+  def ri  : F3_2 &lt;2, Op3Val,
+                 (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+                 !strconcat(OpcStr, " $b, $c, $dst"),
+                 [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]&gt;;
+}
+</pre>
+</div>
+
+<p>
+So when the <tt>defm</tt> directive is used for the <tt>XOR</tt>
+and <tt>ADD</tt> instructions, as seen below, it creates four instruction
+objects: <tt>XORrr</tt>, <tt>XORri</tt>, <tt>ADDrr</tt>, and <tt>ADDri</tt>.
+</p>
+
+<div class="doc_code">
+<pre>
+defm XOR   : F3_12&lt;"xor", 0b000011, xor&gt;;
+defm ADD   : F3_12&lt;"add", 0b000000, add&gt;;
+</pre>
+</div>
+
+<p>
+<tt>SparcInstrInfo.td</tt> also includes definitions for condition codes that
+are referenced by branch instructions. The following definitions
+in <tt>SparcInstrInfo.td</tt> indicate the bit location of the SPARC condition
+code. For example, the 10<sup>th</sup> bit represents the 'greater than'
+condition for integers, and the 22<sup>nd</sup> bit represents the 'greater
+than' condition for floats.
+</p>
+
+<div class="doc_code">
+<pre>
+def ICC_NE  : ICC_VAL&lt; 9&gt;;  // Not Equal
+def ICC_E   : ICC_VAL&lt; 1&gt;;  // Equal
+def ICC_G   : ICC_VAL&lt;10&gt;;  // Greater
+...
+def FCC_U   : FCC_VAL&lt;23&gt;;  // Unordered
+def FCC_G   : FCC_VAL&lt;22&gt;;  // Greater
+def FCC_UG  : FCC_VAL&lt;21&gt;;  // Unordered or Greater
+...
+</pre>
+</div>
+
+<p>
+(Note that <tt>Sparc.h</tt> also defines enums that correspond to the same SPARC
+condition codes. Care must be taken to ensure the values in <tt>Sparc.h</tt>
+correspond to the values in <tt>SparcInstrInfo.td</tt>. I.e.,
+<tt>SPCC::ICC_NE = 9</tt>, <tt>SPCC::FCC_U = 23</tt> and so on.)
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="operandMapping">Instruction Operand Mapping</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The code generator backend maps instruction operands to fields in the
+instruction.  Operands are assigned to unbound fields in the instruction in the
+order they are defined. Fields are bound when they are assigned a value.  For
+example, the Sparc target defines the <tt>XNORrr</tt> instruction as
+a <tt>F3_1</tt> format instruction having three operands.
+</p>
+
+<div class="doc_code">
+<pre>
+def XNORrr  : F3_1&lt;2, 0b000111,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                   "xnor $b, $c, $dst",
+                   [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]&gt;;
+</pre>
+</div>
+
+<p>
+The instruction templates in <tt>SparcInstrFormats.td</tt> show the base class
+for <tt>F3_1</tt> is <tt>InstSP</tt>.
+</p>
+
+<div class="doc_code">
+<pre>
+class InstSP&lt;dag outs, dag ins, string asmstr, list&lt;dag&gt; pattern&gt; : Instruction {
+  field bits&lt;32&gt; Inst;
+  let Namespace = "SP";
+  bits&lt;2&gt; op;
+  let Inst{31-30} = op;       
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  let AsmString   = asmstr;
+  let Pattern = pattern;
+}
+</pre>
+</div>
+
+<p><tt>InstSP</tt> leaves the <tt>op</tt> field unbound.</p>
+
+<div class="doc_code">
+<pre>
+class F3&lt;dag outs, dag ins, string asmstr, list&lt;dag&gt; pattern&gt;
+    : InstSP&lt;outs, ins, asmstr, pattern&gt; {
+  bits&lt;5&gt; rd;
+  bits&lt;6&gt; op3;
+  bits&lt;5&gt; rs1;
+  let op{1} = 1;   // Op = 2 or 3
+  let Inst{29-25} = rd;
+  let Inst{24-19} = op3;
+  let Inst{18-14} = rs1;
+}
+</pre>
+</div>
+
+<p>
+<tt>F3</tt> binds the <tt>op</tt> field and defines the <tt>rd</tt>,
+<tt>op3</tt>, and <tt>rs1</tt> fields.  <tt>F3</tt> format instructions will
+bind the operands <tt>rd</tt>, <tt>op3</tt>, and <tt>rs1</tt> fields.
+</p>
+
+<div class="doc_code">
+<pre>
+class F3_1&lt;bits&lt;2&gt; opVal, bits&lt;6&gt; op3val, dag outs, dag ins,
+           string asmstr, list&lt;dag&gt; pattern&gt; : F3&lt;outs, ins, asmstr, pattern&gt; {
+  bits&lt;8&gt; asi = 0; // asi not currently used
+  bits&lt;5&gt; rs2;
+  let op         = opVal;
+  let op3        = op3val;
+  let Inst{13}   = 0;     // i field = 0
+  let Inst{12-5} = asi;   // address space identifier
+  let Inst{4-0}  = rs2;
+}
+</pre>
+</div>
+
+<p>
+<tt>F3_1</tt> binds the <tt>op3</tt> field and defines the <tt>rs2</tt>
+fields.  <tt>F3_1</tt> format instructions will bind the operands to the <tt>rd</tt>,
+<tt>rs1</tt>, and <tt>rs2</tt> fields. This results in the <tt>XNORrr</tt>
+instruction binding <tt>$dst</tt>, <tt>$b</tt>, and <tt>$c</tt> operands to
+the <tt>rd</tt>, <tt>rs1</tt>, and <tt>rs2</tt> fields respectively.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="implementInstr">Implement a subclass of </a>
+  <a href="http://www.llvm.org/docs/CodeGenerator.html#targetinstrinfo">TargetInstrInfo</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The final step is to hand code portions of <tt>XXXInstrInfo</tt>, which
+implements the interface described in <tt>TargetInstrInfo.h</tt>. These
+functions return <tt>0</tt> or a Boolean or they assert, unless
+overridden. Here's a list of functions that are overridden for the SPARC
+implementation in <tt>SparcInstrInfo.cpp</tt>:
+</p>
+
+<ul>
+<li><tt>isLoadFromStackSlot</tt> &mdash; If the specified machine instruction is
+    a direct load from a stack slot, return the register number of the
+    destination and the <tt>FrameIndex</tt> of the stack slot.</li>
+
+<li><tt>isStoreToStackSlot</tt> &mdash; If the specified machine instruction is
+    a direct store to a stack slot, return the register number of the
+    destination and the <tt>FrameIndex</tt> of the stack slot.</li>
+
+<li><tt>copyPhysReg</tt> &mdash; Copy values between a pair of physical
+    registers.</li>
+
+<li><tt>storeRegToStackSlot</tt> &mdash; Store a register value to a stack
+    slot.</li>
+
+<li><tt>loadRegFromStackSlot</tt> &mdash; Load a register value from a stack
+    slot.</li>
+
+<li><tt>storeRegToAddr</tt> &mdash; Store a register value to memory.</li>
+
+<li><tt>loadRegFromAddr</tt> &mdash; Load a register value from memory.</li>
+
+<li><tt>foldMemoryOperand</tt> &mdash; Attempt to combine instructions of any
+    load or store instruction for the specified operand(s).</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="branchFolding">Branch Folding and If Conversion</a>
+</div>
+<div class="doc_text">
+
+<p>
+Performance can be improved by combining instructions or by eliminating
+instructions that are never reached. The <tt>AnalyzeBranch</tt> method
+in <tt>XXXInstrInfo</tt> may be implemented to examine conditional instructions
+and remove unnecessary instructions. <tt>AnalyzeBranch</tt> looks at the end of
+a machine basic block (MBB) for opportunities for improvement, such as branch
+folding and if conversion. The <tt>BranchFolder</tt> and <tt>IfConverter</tt>
+machine function passes (see the source files <tt>BranchFolding.cpp</tt> and
+<tt>IfConversion.cpp</tt> in the <tt>lib/CodeGen</tt> directory) call
+<tt>AnalyzeBranch</tt> to improve the control flow graph that represents the
+instructions.
+</p>
+
+<p>
+Several implementations of <tt>AnalyzeBranch</tt> (for ARM, Alpha, and X86) can
+be examined as models for your own <tt>AnalyzeBranch</tt> implementation. Since
+SPARC does not implement a useful <tt>AnalyzeBranch</tt>, the ARM target
+implementation is shown below.
+</p>
+
+<p><tt>AnalyzeBranch</tt> returns a Boolean value and takes four parameters:</p>
+
+<ul>
+<li><tt>MachineBasicBlock &amp;MBB</tt> &mdash; The incoming block to be
+    examined.</li>
+
+<li><tt>MachineBasicBlock *&amp;TBB</tt> &mdash; A destination block that is
+    returned. For a conditional branch that evaluates to true, <tt>TBB</tt> is
+    the destination.</li>
+
+<li><tt>MachineBasicBlock *&amp;FBB</tt> &mdash; For a conditional branch that
+    evaluates to false, <tt>FBB</tt> is returned as the destination.</li>
+
+<li><tt>std::vector&lt;MachineOperand&gt; &amp;Cond</tt> &mdash; List of
+    operands to evaluate a condition for a conditional branch.</li>
+</ul>
+
+<p>
+In the simplest case, if a block ends without a branch, then it falls through to
+the successor block. No destination blocks are specified for either <tt>TBB</tt>
+or <tt>FBB</tt>, so both parameters return <tt>NULL</tt>. The start of
+the <tt>AnalyzeBranch</tt> (see code below for the ARM target) shows the
+function parameters and the code for the simplest case.
+</p>
+
+<div class="doc_code">
+<pre>bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &amp;MBB,
+        MachineBasicBlock *&amp;TBB, MachineBasicBlock *&amp;FBB,
+        std::vector&lt;MachineOperand&gt; &amp;Cond) const
+{
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+    return false;
+</pre>
+</div>
+
+<p>
+If a block ends with a single unconditional branch instruction, then
+<tt>AnalyzeBranch</tt> (shown below) should return the destination of that
+branch in the <tt>TBB</tt> parameter.
+</p>
+
+<div class="doc_code">
+<pre>
+  if (LastOpc == ARM::B || LastOpc == ARM::tB) {
+    TBB = LastInst-&gt;getOperand(0).getMBB();
+    return false;
+  }
+</pre>
+</div>
+
+<p>
+If a block ends with two unconditional branches, then the second branch is never
+reached. In that situation, as shown below, remove the last branch instruction
+and return the penultimate branch in the <tt>TBB</tt> parameter.
+</p>
+
+<div class="doc_code">
+<pre>
+  if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB) &amp;&amp;
+      (LastOpc == ARM::B || LastOpc == ARM::tB)) {
+    TBB = SecondLastInst-&gt;getOperand(0).getMBB();
+    I = LastInst;
+    I-&gt;eraseFromParent();
+    return false;
+  }
+</pre>
+</div>
+
+<p>
+A block may end with a single conditional branch instruction that falls through
+to successor block if the condition evaluates to false. In that case,
+<tt>AnalyzeBranch</tt> (shown below) should return the destination of that
+conditional branch in the <tt>TBB</tt> parameter and a list of operands in
+the <tt>Cond</tt> parameter to evaluate the condition.
+</p>
+
+<div class="doc_code">
+<pre>
+  if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) {
+    // Block ends with fall-through condbranch.
+    TBB = LastInst-&gt;getOperand(0).getMBB();
+    Cond.push_back(LastInst-&gt;getOperand(1));
+    Cond.push_back(LastInst-&gt;getOperand(2));
+    return false;
+  }
+</pre>
+</div>
+
+<p>
+If a block ends with both a conditional branch and an ensuing unconditional
+branch, then <tt>AnalyzeBranch</tt> (shown below) should return the conditional
+branch destination (assuming it corresponds to a conditional evaluation of
+'<tt>true</tt>') in the <tt>TBB</tt> parameter and the unconditional branch
+destination in the <tt>FBB</tt> (corresponding to a conditional evaluation of
+'<tt>false</tt>').  A list of operands to evaluate the condition should be
+returned in the <tt>Cond</tt> parameter.
+</p>
+
+<div class="doc_code">
+<pre>
+  unsigned SecondLastOpc = SecondLastInst-&gt;getOpcode();
+
+  if ((SecondLastOpc == ARM::Bcc &amp;&amp; LastOpc == ARM::B) ||
+      (SecondLastOpc == ARM::tBcc &amp;&amp; LastOpc == ARM::tB)) {
+    TBB =  SecondLastInst-&gt;getOperand(0).getMBB();
+    Cond.push_back(SecondLastInst-&gt;getOperand(1));
+    Cond.push_back(SecondLastInst-&gt;getOperand(2));
+    FBB = LastInst-&gt;getOperand(0).getMBB();
+    return false;
+  }
+</pre>
+</div>
+
+<p>
+For the last two cases (ending with a single conditional branch or ending with
+one conditional and one unconditional branch), the operands returned in
+the <tt>Cond</tt> parameter can be passed to methods of other instructions to
+create new branches or perform other operations. An implementation
+of <tt>AnalyzeBranch</tt> requires the helper methods <tt>RemoveBranch</tt>
+and <tt>InsertBranch</tt> to manage subsequent operations.
+</p>
+
+<p>
+<tt>AnalyzeBranch</tt> should return false indicating success in most circumstances.
+<tt>AnalyzeBranch</tt> should only return true when the method is stumped about what to
+do, for example, if a block has three terminating branches. <tt>AnalyzeBranch</tt> may
+return true if it encounters a terminator it cannot handle, such as an indirect
+branch.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="InstructionSelector">Instruction Selector</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+LLVM uses a <tt>SelectionDAG</tt> to represent LLVM IR instructions, and nodes
+of the <tt>SelectionDAG</tt> ideally represent native target
+instructions. During code generation, instruction selection passes are performed
+to convert non-native DAG instructions into native target-specific
+instructions. The pass described in <tt>XXXISelDAGToDAG.cpp</tt> is used to
+match patterns and perform DAG-to-DAG instruction selection. Optionally, a pass
+may be defined (in <tt>XXXBranchSelector.cpp</tt>) to perform similar DAG-to-DAG
+operations for branch instructions. Later, the code in
+<tt>XXXISelLowering.cpp</tt> replaces or removes operations and data types not
+supported natively (legalizes) in a <tt>SelectionDAG</tt>.
+</p>
+
+<p>
+TableGen generates code for instruction selection using the following target
+description input files:
+</p>
+
+<ul>
+<li><tt>XXXInstrInfo.td</tt> &mdash; Contains definitions of instructions in a
+    target-specific instruction set, generates <tt>XXXGenDAGISel.inc</tt>, which
+    is included in <tt>XXXISelDAGToDAG.cpp</tt>.</li>
+
+<li><tt>XXXCallingConv.td</tt> &mdash; Contains the calling and return value
+    conventions for the target architecture, and it generates
+    <tt>XXXGenCallingConv.inc</tt>, which is included in
+    <tt>XXXISelLowering.cpp</tt>.</li>
+</ul>
+
+<p>
+The implementation of an instruction selection pass must include a header that
+declares the <tt>FunctionPass</tt> class or a subclass of <tt>FunctionPass</tt>. In
+<tt>XXXTargetMachine.cpp</tt>, a Pass Manager (PM) should add each instruction
+selection pass into the queue of passes to run.
+</p>
+
+<p>
+The LLVM static compiler (<tt>llc</tt>) is an excellent tool for visualizing the
+contents of DAGs. To display the <tt>SelectionDAG</tt> before or after specific
+processing phases, use the command line options for <tt>llc</tt>, described
+at <a href="http://llvm.org/docs/CodeGenerator.html#selectiondag_process">
+SelectionDAG Instruction Selection Process</a>.
+</p>
+
+<p>
+To describe instruction selector behavior, you should add patterns for lowering
+LLVM code into a <tt>SelectionDAG</tt> as the last parameter of the instruction
+definitions in <tt>XXXInstrInfo.td</tt>. For example, in
+<tt>SparcInstrInfo.td</tt>, this entry defines a register store operation, and
+the last parameter describes a pattern with the store DAG operator.
+</p>
+
+<div class="doc_code">
+<pre>
+def STrr  : F3_1&lt; 3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src),
+                 "st $src, [$addr]", [(store IntRegs:$src, ADDRrr:$addr)]&gt;;
+</pre>
+</div>
+
+<p>
+<tt>ADDRrr</tt> is a memory mode that is also defined in
+<tt>SparcInstrInfo.td</tt>:
+</p>
+
+<div class="doc_code">
+<pre>
+def ADDRrr : ComplexPattern&lt;i32, 2, "SelectADDRrr", [], []&gt;;
+</pre>
+</div>
+
+<p>
+The definition of <tt>ADDRrr</tt> refers to <tt>SelectADDRrr</tt>, which is a
+function defined in an implementation of the Instructor Selector (such
+as <tt>SparcISelDAGToDAG.cpp</tt>).
+</p>
+
+<p>
+In <tt>lib/Target/TargetSelectionDAG.td</tt>, the DAG operator for store is
+defined below:
+</p>
+
+<div class="doc_code">
+<pre>
+def store : PatFrag&lt;(ops node:$val, node:$ptr),
+                    (st node:$val, node:$ptr), [{
+  if (StoreSDNode *ST = dyn_cast&lt;StoreSDNode&gt;(N))
+    return !ST-&gt;isTruncatingStore() &amp;&amp; 
+           ST-&gt;getAddressingMode() == ISD::UNINDEXED;
+  return false;
+}]&gt;;
+</pre>
+</div>
+
+<p>
+<tt>XXXInstrInfo.td</tt> also generates (in <tt>XXXGenDAGISel.inc</tt>) the
+<tt>SelectCode</tt> method that is used to call the appropriate processing
+method for an instruction. In this example, <tt>SelectCode</tt>
+calls <tt>Select_ISD_STORE</tt> for the <tt>ISD::STORE</tt> opcode.
+</p>
+
+<div class="doc_code">
+<pre>
+SDNode *SelectCode(SDValue N) {
+  ... 
+  MVT::ValueType NVT = N.getNode()-&gt;getValueType(0);
+  switch (N.getOpcode()) {
+  case ISD::STORE: {
+    switch (NVT) {
+    default:
+      return Select_ISD_STORE(N);
+      break;
+    }
+    break;
+  }
+  ...
+</pre>
+</div>
+
+<p>
+The pattern for <tt>STrr</tt> is matched, so elsewhere in
+<tt>XXXGenDAGISel.inc</tt>, code for <tt>STrr</tt> is created for
+<tt>Select_ISD_STORE</tt>. The <tt>Emit_22</tt> method is also generated
+in <tt>XXXGenDAGISel.inc</tt> to complete the processing of this
+instruction.
+</p>
+
+<div class="doc_code">
+<pre>
+SDNode *Select_ISD_STORE(const SDValue &amp;N) {
+  SDValue Chain = N.getOperand(0);
+  if (Predicate_store(N.getNode())) {
+    SDValue N1 = N.getOperand(1);
+    SDValue N2 = N.getOperand(2);
+    SDValue CPTmp0;
+    SDValue CPTmp1;
+
+    // Pattern: (st:void IntRegs:i32:$src, 
+    //           ADDRrr:i32:$addr)&lt;&lt;P:Predicate_store&gt;&gt;
+    // Emits: (STrr:void ADDRrr:i32:$addr, IntRegs:i32:$src)
+    // Pattern complexity = 13  cost = 1  size = 0
+    if (SelectADDRrr(N, N2, CPTmp0, CPTmp1) &amp;&amp;
+        N1.getNode()-&gt;getValueType(0) == MVT::i32 &amp;&amp;
+        N2.getNode()-&gt;getValueType(0) == MVT::i32) {
+      return Emit_22(N, SP::STrr, CPTmp0, CPTmp1);
+    }
+...
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="LegalizePhase">The SelectionDAG Legalize Phase</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The Legalize phase converts a DAG to use types and operations that are natively
+supported by the target. For natively unsupported types and operations, you need
+to add code to the target-specific XXXTargetLowering implementation to convert
+unsupported types and operations to supported ones.
+</p>
+
+<p>
+In the constructor for the <tt>XXXTargetLowering</tt> class, first use the
+<tt>addRegisterClass</tt> method to specify which types are supports and which
+register classes are associated with them. The code for the register classes are
+generated by TableGen from <tt>XXXRegisterInfo.td</tt> and placed
+in <tt>XXXGenRegisterInfo.h.inc</tt>. For example, the implementation of the
+constructor for the SparcTargetLowering class (in
+<tt>SparcISelLowering.cpp</tt>) starts with the following code:
+</p>
+
+<div class="doc_code">
+<pre>
+addRegisterClass(MVT::i32, SP::IntRegsRegisterClass);
+addRegisterClass(MVT::f32, SP::FPRegsRegisterClass);
+addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass); 
+</pre>
+</div>
+
+<p>
+You should examine the node types in the <tt>ISD</tt> namespace
+(<tt>include/llvm/CodeGen/SelectionDAGNodes.h</tt>) and determine which
+operations the target natively supports. For operations that do <b>not</b> have
+native support, add a callback to the constructor for the XXXTargetLowering
+class, so the instruction selection process knows what to do. The TargetLowering
+class callback methods (declared in <tt>llvm/Target/TargetLowering.h</tt>) are:
+</p>
+
+<ul>
+<li><tt>setOperationAction</tt> &mdash; General operation.</li>
+
+<li><tt>setLoadExtAction</tt> &mdash; Load with extension.</li>
+
+<li><tt>setTruncStoreAction</tt> &mdash; Truncating store.</li>
+
+<li><tt>setIndexedLoadAction</tt> &mdash; Indexed load.</li>
+
+<li><tt>setIndexedStoreAction</tt> &mdash; Indexed store.</li>
+
+<li><tt>setConvertAction</tt> &mdash; Type conversion.</li>
+
+<li><tt>setCondCodeAction</tt> &mdash; Support for a given condition code.</li>
+</ul>
+
+<p>
+Note: on older releases, <tt>setLoadXAction</tt> is used instead
+of <tt>setLoadExtAction</tt>.  Also, on older releases,
+<tt>setCondCodeAction</tt> may not be supported. Examine your release
+to see what methods are specifically supported.
+</p>
+
+<p>
+These callbacks are used to determine that an operation does or does not work
+with a specified type (or types). And in all cases, the third parameter is
+a <tt>LegalAction</tt> type enum value: <tt>Promote</tt>, <tt>Expand</tt>,
+<tt>Custom</tt>, or <tt>Legal</tt>. <tt>SparcISelLowering.cpp</tt>
+contains examples of all four <tt>LegalAction</tt> values.
+</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="promote">Promote</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+For an operation without native support for a given type, the specified type may
+be promoted to a larger type that is supported. For example, SPARC does not
+support a sign-extending load for Boolean values (<tt>i1</tt> type), so
+in <tt>SparcISelLowering.cpp</tt> the third parameter below, <tt>Promote</tt>,
+changes <tt>i1</tt> type values to a large type before loading.
+</p>
+
+<div class="doc_code">
+<pre>
+setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+</pre>
+</div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="expand">Expand</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+For a type without native support, a value may need to be broken down further,
+rather than promoted. For an operation without native support, a combination of
+other operations may be used to similar effect. In SPARC, the floating-point
+sine and cosine trig operations are supported by expansion to other operations,
+as indicated by the third parameter, <tt>Expand</tt>, to
+<tt>setOperationAction</tt>:
+</p>
+
+<div class="doc_code">
+<pre>
+setOperationAction(ISD::FSIN, MVT::f32, Expand);
+setOperationAction(ISD::FCOS, MVT::f32, Expand);
+</pre>
+</div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="custom">Custom</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+For some operations, simple type promotion or operation expansion may be
+insufficient. In some cases, a special intrinsic function must be implemented.
+</p>
+
+<p>
+For example, a constant value may require special treatment, or an operation may
+require spilling and restoring registers in the stack and working with register
+allocators.
+</p>
+
+<p>
+As seen in <tt>SparcISelLowering.cpp</tt> code below, to perform a type
+conversion from a floating point value to a signed integer, first the
+<tt>setOperationAction</tt> should be called with <tt>Custom</tt> as the third
+parameter:
+</p>
+
+<div class="doc_code">
+<pre>
+setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+</pre>
+</div>    
+
+<p>
+In the <tt>LowerOperation</tt> method, for each <tt>Custom</tt> operation, a
+case statement should be added to indicate what function to call. In the
+following code, an <tt>FP_TO_SINT</tt> opcode will call
+the <tt>LowerFP_TO_SINT</tt> method:
+</p>
+
+<div class="doc_code">
+<pre>
+SDValue SparcTargetLowering::LowerOperation(SDValue Op, SelectionDAG &amp;DAG) {
+  switch (Op.getOpcode()) {
+  case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
+  ...
+  }
+}
+</pre>
+</div>
+
+<p>
+Finally, the <tt>LowerFP_TO_SINT</tt> method is implemented, using an FP
+register to convert the floating-point value to an integer.
+</p>
+
+<div class="doc_code">
+<pre>
+static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &amp;DAG) {
+  assert(Op.getValueType() == MVT::i32);
+  Op = DAG.getNode(SPISD::FTOI, MVT::f32, Op.getOperand(0));
+  return DAG.getNode(ISD::BITCAST, MVT::i32, Op);
+}
+</pre>
+</div>    
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="legal">Legal</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+The <tt>Legal</tt> LegalizeAction enum value simply indicates that an
+operation <b>is</b> natively supported. <tt>Legal</tt> represents the default
+condition, so it is rarely used. In <tt>SparcISelLowering.cpp</tt>, the action
+for <tt>CTPOP</tt> (an operation to count the bits set in an integer) is
+natively supported only for SPARC v9. The following code enables
+the <tt>Expand</tt> conversion technique for non-v9 SPARC implementations.
+</p>
+
+<div class="doc_code">
+<pre>
+setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+...
+if (TM.getSubtarget&lt;SparcSubtarget&gt;().isV9())
+  setOperationAction(ISD::CTPOP, MVT::i32, Legal);
+  case ISD::SETULT: return SPCC::ICC_CS;
+  case ISD::SETULE: return SPCC::ICC_LEU;
+  case ISD::SETUGT: return SPCC::ICC_GU;
+  case ISD::SETUGE: return SPCC::ICC_CC;
+  }
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="callingConventions">Calling Conventions</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+To support target-specific calling conventions, <tt>XXXGenCallingConv.td</tt>
+uses interfaces (such as CCIfType and CCAssignToReg) that are defined in
+<tt>lib/Target/TargetCallingConv.td</tt>. TableGen can take the target
+descriptor file <tt>XXXGenCallingConv.td</tt> and generate the header
+file <tt>XXXGenCallingConv.inc</tt>, which is typically included
+in <tt>XXXISelLowering.cpp</tt>. You can use the interfaces in
+<tt>TargetCallingConv.td</tt> to specify:
+</p>
+
+<ul>
+<li>The order of parameter allocation.</li>
+
+<li>Where parameters and return values are placed (that is, on the stack or in
+    registers).</li>
+
+<li>Which registers may be used.</li>
+
+<li>Whether the caller or callee unwinds the stack.</li>
+</ul>
+
+<p>
+The following example demonstrates the use of the <tt>CCIfType</tt> and
+<tt>CCAssignToReg</tt> interfaces. If the <tt>CCIfType</tt> predicate is true
+(that is, if the current argument is of type <tt>f32</tt> or <tt>f64</tt>), then
+the action is performed. In this case, the <tt>CCAssignToReg</tt> action assigns
+the argument value to the first available register: either <tt>R0</tt>
+or <tt>R1</tt>.
+</p>
+
+<div class="doc_code">
+<pre>
+CCIfType&lt;[f32,f64], CCAssignToReg&lt;[R0, R1]&gt;&gt;
+</pre>
+</div>
+
+<p>
+<tt>SparcCallingConv.td</tt> contains definitions for a target-specific
+return-value calling convention (RetCC_Sparc32) and a basic 32-bit C calling
+convention (<tt>CC_Sparc32</tt>). The definition of <tt>RetCC_Sparc32</tt>
+(shown below) indicates which registers are used for specified scalar return
+types. A single-precision float is returned to register <tt>F0</tt>, and a
+double-precision float goes to register <tt>D0</tt>. A 32-bit integer is
+returned in register <tt>I0</tt> or <tt>I1</tt>.
+</p>
+
+<div class="doc_code">
+<pre>
+def RetCC_Sparc32 : CallingConv&lt;[
+  CCIfType&lt;[i32], CCAssignToReg&lt;[I0, I1]&gt;&gt;,
+  CCIfType&lt;[f32], CCAssignToReg&lt;[F0]&gt;&gt;,
+  CCIfType&lt;[f64], CCAssignToReg&lt;[D0]&gt;&gt;
+]&gt;;
+</pre>
+</div>
+
+<p>
+The definition of <tt>CC_Sparc32</tt> in <tt>SparcCallingConv.td</tt> introduces
+<tt>CCAssignToStack</tt>, which assigns the value to a stack slot with the
+specified size and alignment. In the example below, the first parameter, 4,
+indicates the size of the slot, and the second parameter, also 4, indicates the
+stack alignment along 4-byte units. (Special cases: if size is zero, then the
+ABI size is used; if alignment is zero, then the ABI alignment is used.)
+</p>
+
+<div class="doc_code">
+<pre>
+def CC_Sparc32 : CallingConv&lt;[
+  // All arguments get passed in integer registers if there is space.
+  CCIfType&lt;[i32, f32, f64], CCAssignToReg&lt;[I0, I1, I2, I3, I4, I5]&gt;&gt;,
+  CCAssignToStack&lt;4, 4&gt;
+]&gt;;
+</pre>
+</div>
+
+<p>
+<tt>CCDelegateTo</tt> is another commonly used interface, which tries to find a
+specified sub-calling convention, and, if a match is found, it is invoked. In
+the following example (in <tt>X86CallingConv.td</tt>), the definition of
+<tt>RetCC_X86_32_C</tt> ends with <tt>CCDelegateTo</tt>. After the current value
+is assigned to the register <tt>ST0</tt> or <tt>ST1</tt>,
+the <tt>RetCC_X86Common</tt> is invoked.
+</p>
+
+<div class="doc_code">
+<pre>
+def RetCC_X86_32_C : CallingConv&lt;[
+  CCIfType&lt;[f32], CCAssignToReg&lt;[ST0, ST1]&gt;&gt;,
+  CCIfType&lt;[f64], CCAssignToReg&lt;[ST0, ST1]&gt;&gt;,
+  CCDelegateTo&lt;RetCC_X86Common&gt;
+]&gt;;
+</pre>
+</div>
+
+<p>
+<tt>CCIfCC</tt> is an interface that attempts to match the given name to the
+current calling convention. If the name identifies the current calling
+convention, then a specified action is invoked. In the following example (in
+<tt>X86CallingConv.td</tt>), if the <tt>Fast</tt> calling convention is in use,
+then <tt>RetCC_X86_32_Fast</tt> is invoked. If the <tt>SSECall</tt> calling
+convention is in use, then <tt>RetCC_X86_32_SSE</tt> is invoked.
+</p>
+
+<div class="doc_code">
+<pre>
+def RetCC_X86_32 : CallingConv&lt;[
+  CCIfCC&lt;"CallingConv::Fast", CCDelegateTo&lt;RetCC_X86_32_Fast&gt;&gt;,
+  CCIfCC&lt;"CallingConv::X86_SSECall", CCDelegateTo&lt;RetCC_X86_32_SSE&gt;&gt;,
+  CCDelegateTo&lt;RetCC_X86_32_C&gt;
+]&gt;;
+</pre>
+</div>
+
+<p>Other calling convention interfaces include:</p>
+
+<ul>
+<li><tt>CCIf &lt;predicate, action&gt;</tt> &mdash; If the predicate matches,
+    apply the action.</li>
+
+<li><tt>CCIfInReg &lt;action&gt;</tt> &mdash; If the argument is marked with the
+    '<tt>inreg</tt>' attribute, then apply the action.</li>
+
+<li><tt>CCIfNest &lt;action&gt;</tt> &mdash; Inf the argument is marked with the
+    '<tt>nest</tt>' attribute, then apply the action.</li>
+
+<li><tt>CCIfNotVarArg &lt;action&gt;</tt> &mdash; If the current function does
+    not take a variable number of arguments, apply the action.</li>
+
+<li><tt>CCAssignToRegWithShadow &lt;registerList, shadowList&gt;</tt> &mdash;
+    similar to <tt>CCAssignToReg</tt>, but with a shadow list of registers.</li>
+
+<li><tt>CCPassByVal &lt;size, align&gt;</tt> &mdash; Assign value to a stack
+    slot with the minimum specified size and alignment.</li>
+
+<li><tt>CCPromoteToType &lt;type&gt;</tt> &mdash; Promote the current value to
+    the specified type.</li>
+
+<li><tt>CallingConv &lt;[actions]&gt;</tt> &mdash; Define each calling
+    convention that is supported.</li>
+</ul>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="assemblyPrinter">Assembly Printer</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+During the code emission stage, the code generator may utilize an LLVM pass to
+produce assembly output. To do this, you want to implement the code for a
+printer that converts LLVM IR to a GAS-format assembly language for your target
+machine, using the following steps:
+</p>
+
+<ul>
+<li>Define all the assembly strings for your target, adding them to the
+    instructions defined in the <tt>XXXInstrInfo.td</tt> file.
+    (See <a href="#InstructionSet">Instruction Set</a>.)  TableGen will produce
+    an output file (<tt>XXXGenAsmWriter.inc</tt>) with an implementation of
+    the <tt>printInstruction</tt> method for the XXXAsmPrinter class.</li>
+
+<li>Write <tt>XXXTargetAsmInfo.h</tt>, which contains the bare-bones declaration
+    of the <tt>XXXTargetAsmInfo</tt> class (a subclass
+    of <tt>TargetAsmInfo</tt>).</li>
+
+<li>Write <tt>XXXTargetAsmInfo.cpp</tt>, which contains target-specific values
+    for <tt>TargetAsmInfo</tt> properties and sometimes new implementations for
+    methods.</li>
+
+<li>Write <tt>XXXAsmPrinter.cpp</tt>, which implements the <tt>AsmPrinter</tt>
+    class that performs the LLVM-to-assembly conversion.</li>
+</ul>
+
+<p>
+The code in <tt>XXXTargetAsmInfo.h</tt> is usually a trivial declaration of the
+<tt>XXXTargetAsmInfo</tt> class for use in <tt>XXXTargetAsmInfo.cpp</tt>.
+Similarly, <tt>XXXTargetAsmInfo.cpp</tt> usually has a few declarations of
+<tt>XXXTargetAsmInfo</tt> replacement values that override the default values
+in <tt>TargetAsmInfo.cpp</tt>. For example in <tt>SparcTargetAsmInfo.cpp</tt>:
+</p>
+
+<div class="doc_code">
+<pre>
+SparcTargetAsmInfo::SparcTargetAsmInfo(const SparcTargetMachine &amp;TM) {
+  Data16bitsDirective = "\t.half\t";
+  Data32bitsDirective = "\t.word\t";
+  Data64bitsDirective = 0;  // .xword is only supported by V9.
+  ZeroDirective = "\t.skip\t";
+  CommentString = "!";
+  ConstantPoolSection = "\t.section \".rodata\",#alloc\n";
+}
+</pre>
+</div>
+
+<p>
+The X86 assembly printer implementation (<tt>X86TargetAsmInfo</tt>) is an
+example where the target specific <tt>TargetAsmInfo</tt> class uses an 
+overridden methods: <tt>ExpandInlineAsm</tt>.
+</p>
+
+<p>
+A target-specific implementation of AsmPrinter is written in
+<tt>XXXAsmPrinter.cpp</tt>, which implements the <tt>AsmPrinter</tt> class that
+converts the LLVM to printable assembly. The implementation must include the
+following headers that have declarations for the <tt>AsmPrinter</tt> and
+<tt>MachineFunctionPass</tt> classes. The <tt>MachineFunctionPass</tt> is a
+subclass of <tt>FunctionPass</tt>.
+</p>
+
+<div class="doc_code">
+<pre>
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h" 
+</pre>
+</div>
+
+<p>
+As a <tt>FunctionPass</tt>, <tt>AsmPrinter</tt> first
+calls <tt>doInitialization</tt> to set up the <tt>AsmPrinter</tt>. In
+<tt>SparcAsmPrinter</tt>, a <tt>Mangler</tt> object is instantiated to process
+variable names.
+</p>
+
+<p>
+In <tt>XXXAsmPrinter.cpp</tt>, the <tt>runOnMachineFunction</tt> method
+(declared in <tt>MachineFunctionPass</tt>) must be implemented
+for <tt>XXXAsmPrinter</tt>. In <tt>MachineFunctionPass</tt>,
+the <tt>runOnFunction</tt> method invokes <tt>runOnMachineFunction</tt>.
+Target-specific implementations of <tt>runOnMachineFunction</tt> differ, but
+generally do the following to process each machine function:
+</p>
+
+<ul>
+<li>Call <tt>SetupMachineFunction</tt> to perform initialization.</li>
+
+<li>Call <tt>EmitConstantPool</tt> to print out (to the output stream) constants
+    which have been spilled to memory.</li>
+
+<li>Call <tt>EmitJumpTableInfo</tt> to print out jump tables used by the current
+    function.</li>
+
+<li>Print out the label for the current function.</li>
+
+<li>Print out the code for the function, including basic block labels and the
+    assembly for the instruction (using <tt>printInstruction</tt>)</li>
+</ul>
+
+<p>
+The <tt>XXXAsmPrinter</tt> implementation must also include the code generated
+by TableGen that is output in the <tt>XXXGenAsmWriter.inc</tt> file. The code
+in <tt>XXXGenAsmWriter.inc</tt> contains an implementation of the
+<tt>printInstruction</tt> method that may call these methods:
+</p>
+
+<ul>
+<li><tt>printOperand</tt></li>
+
+<li><tt>printMemOperand</tt></li>
+
+<li><tt>printCCOperand (for conditional statements)</tt></li>
+
+<li><tt>printDataDirective</tt></li>
+
+<li><tt>printDeclare</tt></li>
+
+<li><tt>printImplicitDef</tt></li>
+
+<li><tt>printInlineAsm</tt></li>
+</ul>
+
+<p>
+The implementations of <tt>printDeclare</tt>, <tt>printImplicitDef</tt>,
+<tt>printInlineAsm</tt>, and <tt>printLabel</tt> in <tt>AsmPrinter.cpp</tt> are
+generally adequate for printing assembly and do not need to be
+overridden.
+</p>
+
+<p>
+The <tt>printOperand</tt> method is implemented with a long switch/case
+statement for the type of operand: register, immediate, basic block, external
+symbol, global address, constant pool index, or jump table index. For an
+instruction with a memory address operand, the <tt>printMemOperand</tt> method
+should be implemented to generate the proper output. Similarly,
+<tt>printCCOperand</tt> should be used to print a conditional operand.
+</p>
+
+<p><tt>doFinalization</tt> should be overridden in <tt>XXXAsmPrinter</tt>, and
+it should be called to shut down the assembly printer. During
+<tt>doFinalization</tt>, global variables and constants are printed to
+output.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="subtargetSupport">Subtarget Support</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Subtarget support is used to inform the code generation process of instruction
+set variations for a given chip set.  For example, the LLVM SPARC implementation
+provided covers three major versions of the SPARC microprocessor architecture:
+Version 8 (V8, which is a 32-bit architecture), Version 9 (V9, a 64-bit
+architecture), and the UltraSPARC architecture. V8 has 16 double-precision
+floating-point registers that are also usable as either 32 single-precision or 8
+quad-precision registers.  V8 is also purely big-endian. V9 has 32
+double-precision floating-point registers that are also usable as 16
+quad-precision registers, but cannot be used as single-precision registers. The
+UltraSPARC architecture combines V9 with UltraSPARC Visual Instruction Set
+extensions.
+</p>
+
+<p>
+If subtarget support is needed, you should implement a target-specific
+XXXSubtarget class for your architecture. This class should process the
+command-line options <tt>-mcpu=</tt> and <tt>-mattr=</tt>.
+</p>
+
+<p>
+TableGen uses definitions in the <tt>Target.td</tt> and <tt>Sparc.td</tt> files
+to generate code in <tt>SparcGenSubtarget.inc</tt>. In <tt>Target.td</tt>, shown
+below, the <tt>SubtargetFeature</tt> interface is defined. The first 4 string
+parameters of the <tt>SubtargetFeature</tt> interface are a feature name, an
+attribute set by the feature, the value of the attribute, and a description of
+the feature. (The fifth parameter is a list of features whose presence is
+implied, and its default value is an empty array.)
+</p>
+
+<div class="doc_code">
+<pre>
+class SubtargetFeature&lt;string n, string a,  string v, string d,
+                       list&lt;SubtargetFeature&gt; i = []&gt; {
+  string Name = n;
+  string Attribute = a;
+  string Value = v;
+  string Desc = d;
+  list&lt;SubtargetFeature&gt; Implies = i;
+}
+</pre>
+</div>
+
+<p>
+In the <tt>Sparc.td</tt> file, the SubtargetFeature is used to define the
+following features.
+</p>
+
+<div class="doc_code">
+<pre>
+def FeatureV9 : SubtargetFeature&lt;"v9", "IsV9", "true",
+                     "Enable SPARC-V9 instructions"&gt;;
+def FeatureV8Deprecated : SubtargetFeature&lt;"deprecated-v8", 
+                     "V8DeprecatedInsts", "true",
+                     "Enable deprecated V8 instructions in V9 mode"&gt;;
+def FeatureVIS : SubtargetFeature&lt;"vis", "IsVIS", "true",
+                     "Enable UltraSPARC Visual Instruction Set extensions"&gt;;
+</pre>
+</div>
+
+<p>
+Elsewhere in <tt>Sparc.td</tt>, the Proc class is defined and then is used to
+define particular SPARC processor subtypes that may have the previously
+described features.
+</p>
+
+<div class="doc_code">
+<pre>
+class Proc&lt;string Name, list&lt;SubtargetFeature&gt; Features&gt;
+  : Processor&lt;Name, NoItineraries, Features&gt;;
+&nbsp;
+def : Proc&lt;"generic",         []&gt;;
+def : Proc&lt;"v8",              []&gt;;
+def : Proc&lt;"supersparc",      []&gt;;
+def : Proc&lt;"sparclite",       []&gt;;
+def : Proc&lt;"f934",            []&gt;;
+def : Proc&lt;"hypersparc",      []&gt;;
+def : Proc&lt;"sparclite86x",    []&gt;;
+def : Proc&lt;"sparclet",        []&gt;;
+def : Proc&lt;"tsc701",          []&gt;;
+def : Proc&lt;"v9",              [FeatureV9]&gt;;
+def : Proc&lt;"ultrasparc",      [FeatureV9, FeatureV8Deprecated]&gt;;
+def : Proc&lt;"ultrasparc3",     [FeatureV9, FeatureV8Deprecated]&gt;;
+def : Proc&lt;"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]&gt;;
+</pre>
+</div>
+
+<p>
+From <tt>Target.td</tt> and <tt>Sparc.td</tt> files, the resulting
+SparcGenSubtarget.inc specifies enum values to identify the features, arrays of
+constants to represent the CPU features and CPU subtypes, and the
+ParseSubtargetFeatures method that parses the features string that sets
+specified subtarget options. The generated <tt>SparcGenSubtarget.inc</tt> file
+should be included in the <tt>SparcSubtarget.cpp</tt>. The target-specific
+implementation of the XXXSubtarget method should follow this pseudocode:
+</p>
+
+<div class="doc_code">
+<pre>
+XXXSubtarget::XXXSubtarget(const Module &amp;M, const std::string &amp;FS) {
+  // Set the default features
+  // Determine default and user specified characteristics of the CPU
+  // Call ParseSubtargetFeatures(FS, CPU) to parse the features string
+  // Perform any additional operations
+}
+</pre>
+</div>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="jitSupport">JIT Support</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+The implementation of a target machine optionally includes a Just-In-Time (JIT)
+code generator that emits machine code and auxiliary structures as binary output
+that can be written directly to memory.  To do this, implement JIT code
+generation by performing the following steps:
+</p>
+
+<ul>
+<li>Write an <tt>XXXCodeEmitter.cpp</tt> file that contains a machine function
+    pass that transforms target-machine instructions into relocatable machine
+    code.</li>
+
+<li>Write an <tt>XXXJITInfo.cpp</tt> file that implements the JIT interfaces for
+    target-specific code-generation activities, such as emitting machine code
+    and stubs.</li>
+
+<li>Modify <tt>XXXTargetMachine</tt> so that it provides a
+    <tt>TargetJITInfo</tt> object through its <tt>getJITInfo</tt> method.</li>
+</ul>
+
+<p>
+There are several different approaches to writing the JIT support code. For
+instance, TableGen and target descriptor files may be used for creating a JIT
+code generator, but are not mandatory. For the Alpha and PowerPC target
+machines, TableGen is used to generate <tt>XXXGenCodeEmitter.inc</tt>, which
+contains the binary coding of machine instructions and the
+<tt>getBinaryCodeForInstr</tt> method to access those codes. Other JIT
+implementations do not.
+</p>
+
+<p>
+Both <tt>XXXJITInfo.cpp</tt> and <tt>XXXCodeEmitter.cpp</tt> must include the
+<tt>llvm/CodeGen/MachineCodeEmitter.h</tt> header file that defines the
+<tt>MachineCodeEmitter</tt> class containing code for several callback functions
+that write data (in bytes, words, strings, etc.) to the output stream.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="mce">Machine Code Emitter</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+In <tt>XXXCodeEmitter.cpp</tt>, a target-specific of the <tt>Emitter</tt> class
+is implemented as a function pass (subclass
+of <tt>MachineFunctionPass</tt>). The target-specific implementation
+of <tt>runOnMachineFunction</tt> (invoked by
+<tt>runOnFunction</tt> in <tt>MachineFunctionPass</tt>) iterates through the
+<tt>MachineBasicBlock</tt> calls <tt>emitInstruction</tt> to process each
+instruction and emit binary code. <tt>emitInstruction</tt> is largely
+implemented with case statements on the instruction types defined in
+<tt>XXXInstrInfo.h</tt>. For example, in <tt>X86CodeEmitter.cpp</tt>,
+the <tt>emitInstruction</tt> method is built around the following switch/case
+statements:
+</p>
+
+<div class="doc_code">
+<pre>
+switch (Desc-&gt;TSFlags &amp; X86::FormMask) {
+case X86II::Pseudo:  // for not yet implemented instructions 
+   ...               // or pseudo-instructions
+   break;
+case X86II::RawFrm:  // for instructions with a fixed opcode value
+   ...
+   break;
+case X86II::AddRegFrm: // for instructions that have one register operand 
+   ...                 // added to their opcode
+   break;
+case X86II::MRMDestReg:// for instructions that use the Mod/RM byte
+   ...                 // to specify a destination (register)
+   break;
+case X86II::MRMDestMem:// for instructions that use the Mod/RM byte
+   ...                 // to specify a destination (memory)
+   break;
+case X86II::MRMSrcReg: // for instructions that use the Mod/RM byte
+   ...                 // to specify a source (register)
+   break;
+case X86II::MRMSrcMem: // for instructions that use the Mod/RM byte
+   ...                 // to specify a source (memory)
+   break;
+case X86II::MRM0r: case X86II::MRM1r:  // for instructions that operate on 
+case X86II::MRM2r: case X86II::MRM3r:  // a REGISTER r/m operand and
+case X86II::MRM4r: case X86II::MRM5r:  // use the Mod/RM byte and a field
+case X86II::MRM6r: case X86II::MRM7r:  // to hold extended opcode data
+   ...  
+   break;
+case X86II::MRM0m: case X86II::MRM1m:  // for instructions that operate on
+case X86II::MRM2m: case X86II::MRM3m:  // a MEMORY r/m operand and
+case X86II::MRM4m: case X86II::MRM5m:  // use the Mod/RM byte and a field
+case X86II::MRM6m: case X86II::MRM7m:  // to hold extended opcode data
+   ...  
+   break;
+case X86II::MRMInitReg: // for instructions whose source and
+   ...                  // destination are the same register
+   break;
+}
+</pre>
+</div>
+
+<p>
+The implementations of these case statements often first emit the opcode and
+then get the operand(s). Then depending upon the operand, helper methods may be
+called to process the operand(s). For example, in <tt>X86CodeEmitter.cpp</tt>,
+for the <tt>X86II::AddRegFrm</tt> case, the first data emitted
+(by <tt>emitByte</tt>) is the opcode added to the register operand. Then an
+object representing the machine operand, <tt>MO1</tt>, is extracted. The helper
+methods such as <tt>isImmediate</tt>,
+<tt>isGlobalAddress</tt>, <tt>isExternalSymbol</tt>, <tt>isConstantPoolIndex</tt>, and 
+<tt>isJumpTableIndex</tt> determine the operand
+type. (<tt>X86CodeEmitter.cpp</tt> also has private methods such
+as <tt>emitConstant</tt>, <tt>emitGlobalAddress</tt>,
+<tt>emitExternalSymbolAddress</tt>, <tt>emitConstPoolAddress</tt>,
+and <tt>emitJumpTableAddress</tt> that emit the data into the output stream.)
+</p>
+
+<div class="doc_code">
+<pre>
+case X86II::AddRegFrm:
+  MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
+  
+  if (CurOp != NumOps) {
+    const MachineOperand &amp;MO1 = MI.getOperand(CurOp++);
+    unsigned Size = X86InstrInfo::sizeOfImm(Desc);
+    if (MO1.isImmediate())
+      emitConstant(MO1.getImm(), Size);
+    else {
+      unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+        : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+      if (Opcode == X86::MOV64ri) 
+        rt = X86::reloc_absolute_dword;  // FIXME: add X86II flag?
+      if (MO1.isGlobalAddress()) {
+        bool NeedStub = isa&lt;Function&gt;(MO1.getGlobal());
+        bool isLazy = gvNeedsLazyPtr(MO1.getGlobal());
+        emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+                          NeedStub, isLazy);
+      } else if (MO1.isExternalSymbol())
+        emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+      else if (MO1.isConstantPoolIndex())
+        emitConstPoolAddress(MO1.getIndex(), rt);
+      else if (MO1.isJumpTableIndex())
+        emitJumpTableAddress(MO1.getIndex(), rt);
+    }
+  }
+  break;
+</pre>
+</div>
+
+<p>
+In the previous example, <tt>XXXCodeEmitter.cpp</tt> uses the
+variable <tt>rt</tt>, which is a RelocationType enum that may be used to
+relocate addresses (for example, a global address with a PIC base offset). The
+<tt>RelocationType</tt> enum for that target is defined in the short
+target-specific <tt>XXXRelocations.h</tt> file. The <tt>RelocationType</tt> is used by
+the <tt>relocate</tt> method defined in <tt>XXXJITInfo.cpp</tt> to rewrite
+addresses for referenced global symbols.
+</p>
+
+<p>
+For example, <tt>X86Relocations.h</tt> specifies the following relocation types
+for the X86 addresses. In all four cases, the relocated value is added to the
+value already in memory. For <tt>reloc_pcrel_word</tt>
+and <tt>reloc_picrel_word</tt>, there is an additional initial adjustment.
+</p>
+
+<div class="doc_code">
+<pre>
+enum RelocationType {
+  reloc_pcrel_word = 0,    // add reloc value after adjusting for the PC loc
+  reloc_picrel_word = 1,   // add reloc value after adjusting for the PIC base
+  reloc_absolute_word = 2, // absolute relocation; no additional adjustment 
+  reloc_absolute_dword = 3 // absolute relocation; no additional adjustment
+};
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="targetJITInfo">Target JIT Info</a>
+</div>
+
+<div class="doc_text">
+
+<p>
+<tt>XXXJITInfo.cpp</tt> implements the JIT interfaces for target-specific
+code-generation activities, such as emitting machine code and stubs. At minimum,
+a target-specific version of <tt>XXXJITInfo</tt> implements the following:
+</p>
+
+<ul>
+<li><tt>getLazyResolverFunction</tt> &mdash; Initializes the JIT, gives the
+    target a function that is used for compilation.</li>
+
+<li><tt>emitFunctionStub</tt> &mdash; Returns a native function with a specified
+    address for a callback function.</li>
+
+<li><tt>relocate</tt> &mdash; Changes the addresses of referenced globals, based
+    on relocation types.</li>
+
+<li>Callback function that are wrappers to a function stub that is used when the
+    real target is not initially known.</li>
+</ul>
+
+<p>
+<tt>getLazyResolverFunction</tt> is generally trivial to implement. It makes the
+incoming parameter as the global <tt>JITCompilerFunction</tt> and returns the
+callback function that will be used a function wrapper. For the Alpha target
+(in <tt>AlphaJITInfo.cpp</tt>), the <tt>getLazyResolverFunction</tt>
+implementation is simply:
+</p>
+
+<div class="doc_code">
+<pre>
+TargetJITInfo::LazyResolverFn AlphaJITInfo::getLazyResolverFunction(  
+                                            JITCompilerFn F) {
+  JITCompilerFunction = F;
+  return AlphaCompilationCallback;
+}
+</pre>
+</div>
+
+<p>
+For the X86 target, the <tt>getLazyResolverFunction</tt> implementation is a
+little more complication, because it returns a different callback function for
+processors with SSE instructions and XMM registers.
+</p>
+
+<p>
+The callback function initially saves and later restores the callee register
+values, incoming arguments, and frame and return address. The callback function
+needs low-level access to the registers or stack, so it is typically implemented
+with assembler.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="http://www.woo.com">Mason Woo</a> and <a href="http://misha.brukman.net">Misha Brukman</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a>
+  <br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/WritingAnLLVMPass.html b/final/docs/WritingAnLLVMPass.html
new file mode 100644
index 00000000000..edc8631d0ab
--- /dev/null
+++ b/final/docs/WritingAnLLVMPass.html
@@ -0,0 +1,1928 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <title>Writing an LLVM Pass</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">
+  Writing an LLVM Pass
+</div>
+
+<ol>
+  <li><a href="#introduction">Introduction - What is a pass?</a></li>
+  <li><a href="#quickstart">Quick Start - Writing hello world</a>
+    <ul>
+    <li><a href="#makefile">Setting up the build environment</a></li>
+    <li><a href="#basiccode">Basic code required</a></li>
+    <li><a href="#running">Running a pass with <tt>opt</tt></a></li>
+    </ul></li>
+  <li><a href="#passtype">Pass classes and requirements</a>
+     <ul>
+     <li><a href="#ImmutablePass">The <tt>ImmutablePass</tt> class</a></li>
+     <li><a href="#ModulePass">The <tt>ModulePass</tt> class</a>
+        <ul>
+        <li><a href="#runOnModule">The <tt>runOnModule</tt> method</a></li>
+        </ul></li>
+     <li><a href="#CallGraphSCCPass">The <tt>CallGraphSCCPass</tt> class</a>
+        <ul>
+        <li><a href="#doInitialization_scc">The <tt>doInitialization(CallGraph
+                                           &amp;)</tt> method</a></li>
+        <li><a href="#runOnSCC">The <tt>runOnSCC</tt> method</a></li>
+        <li><a href="#doFinalization_scc">The <tt>doFinalization(CallGraph
+                                           &amp;)</tt> method</a></li>
+        </ul></li>
+     <li><a href="#FunctionPass">The <tt>FunctionPass</tt> class</a>
+        <ul>
+        <li><a href="#doInitialization_mod">The <tt>doInitialization(Module
+                                            &amp;)</tt> method</a></li>
+        <li><a href="#runOnFunction">The <tt>runOnFunction</tt> method</a></li>
+        <li><a href="#doFinalization_mod">The <tt>doFinalization(Module
+                                            &amp;)</tt> method</a></li>
+        </ul></li>
+     <li><a href="#LoopPass">The <tt>LoopPass</tt> class</a>
+        <ul>
+        <li><a href="#doInitialization_loop">The <tt>doInitialization(Loop *,
+                                            LPPassManager &amp;)</tt> method</a></li>
+        <li><a href="#runOnLoop">The <tt>runOnLoop</tt> method</a></li>
+        <li><a href="#doFinalization_loop">The <tt>doFinalization()
+                                            </tt> method</a></li>
+        </ul></li>
+     <li><a href="#RegionPass">The <tt>RegionPass</tt> class</a>
+        <ul>
+        <li><a href="#doInitialization_region">The <tt>doInitialization(Region *,
+                                            RGPassManager &amp;)</tt> method</a></li>
+        <li><a href="#runOnRegion">The <tt>runOnRegion</tt> method</a></li>
+        <li><a href="#doFinalization_region">The <tt>doFinalization()
+                                            </tt> method</a></li>
+        </ul></li>
+     <li><a href="#BasicBlockPass">The <tt>BasicBlockPass</tt> class</a>
+        <ul>
+        <li><a href="#doInitialization_fn">The <tt>doInitialization(Function
+                                             &amp;)</tt> method</a></li>
+        <li><a href="#runOnBasicBlock">The <tt>runOnBasicBlock</tt>
+                                       method</a></li>
+        <li><a href="#doFinalization_fn">The <tt>doFinalization(Function
+                                         &amp;)</tt> method</a></li>
+        </ul></li>
+     <li><a href="#MachineFunctionPass">The <tt>MachineFunctionPass</tt>
+                                        class</a>
+        <ul>
+        <li><a href="#runOnMachineFunction">The
+            <tt>runOnMachineFunction(MachineFunction &amp;)</tt> method</a></li>
+        </ul></li>
+     </ul>
+  <li><a href="#registration">Pass Registration</a>
+     <ul>
+     <li><a href="#print">The <tt>print</tt> method</a></li>
+     </ul></li>
+  <li><a href="#interaction">Specifying interactions between passes</a>
+     <ul>
+     <li><a href="#getAnalysisUsage">The <tt>getAnalysisUsage</tt> 
+                                     method</a></li>
+     <li><a href="#AU::addRequired">The <tt>AnalysisUsage::addRequired&lt;&gt;</tt> and <tt>AnalysisUsage::addRequiredTransitive&lt;&gt;</tt> methods</a></li>
+     <li><a href="#AU::addPreserved">The <tt>AnalysisUsage::addPreserved&lt;&gt;</tt> method</a></li>
+     <li><a href="#AU::examples">Example implementations of <tt>getAnalysisUsage</tt></a></li>
+     <li><a href="#getAnalysis">The <tt>getAnalysis&lt;&gt;</tt> and
+<tt>getAnalysisIfAvailable&lt;&gt;</tt> methods</a></li>
+     </ul></li>
+  <li><a href="#analysisgroup">Implementing Analysis Groups</a>
+     <ul>
+     <li><a href="#agconcepts">Analysis Group Concepts</a></li>
+     <li><a href="#registerag">Using <tt>RegisterAnalysisGroup</tt></a></li>
+     </ul></li>
+  <li><a href="#passStatistics">Pass Statistics</a>
+  <li><a href="#passmanager">What PassManager does</a>
+    <ul>
+    <li><a href="#releaseMemory">The <tt>releaseMemory</tt> method</a></li>
+    </ul></li>
+  <li><a href="#registering">Registering dynamically loaded passes</a>
+    <ul>
+      <li><a href="#registering_existing">Using existing registries</a></li>
+      <li><a href="#registering_new">Creating new registries</a></li>
+    </ul></li>
+  <li><a href="#debughints">Using GDB with dynamically loaded passes</a>
+    <ul>
+    <li><a href="#breakpoint">Setting a breakpoint in your pass</a></li>
+    <li><a href="#debugmisc">Miscellaneous Problems</a></li>
+    </ul></li>
+  <li><a href="#future">Future extensions planned</a>
+    <ul>
+    <li><a href="#SMP">Multithreaded LLVM</a></li>
+    </ul></li>
+</ol>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a> and
+  <a href="mailto:jlaskey@mac.com">Jim Laskey</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="introduction">Introduction - What is a pass?</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The LLVM Pass Framework is an important part of the LLVM system, because LLVM
+passes are where most of the interesting parts of the compiler exist.  Passes
+perform the transformations and optimizations that make up the compiler, they
+build the analysis results that are used by these transformations, and they are,
+above all, a structuring technique for compiler code.</p>
+
+<p>All LLVM passes are subclasses of the <tt><a
+href="http://llvm.org/doxygen/classllvm_1_1Pass.html">Pass</a></tt>
+class, which implement functionality by overriding virtual methods inherited
+from <tt>Pass</tt>.  Depending on how your pass works, you should inherit from
+the <tt><a href="#ModulePass">ModulePass</a></tt>, <tt><a
+href="#CallGraphSCCPass">CallGraphSCCPass</a></tt>, <tt><a
+href="#FunctionPass">FunctionPass</a></tt>, or <tt><a
+href="#LoopPass">LoopPass</a></tt>, or <tt><a
+href="#RegionPass">RegionPass</a></tt>, or <tt><a
+href="#BasicBlockPass">BasicBlockPass</a></tt> classes, which gives the system
+more information about what your pass does, and how it can be combined with
+other passes.  One of the main features of the LLVM Pass Framework is that it
+schedules passes to run in an efficient way based on the constraints that your
+pass meets (which are indicated by which class they derive from).</p>
+
+<p>We start by showing you how to construct a pass, everything from setting up
+the code, to compiling, loading, and executing it.  After the basics are down,
+more advanced features are discussed.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="quickstart">Quick Start - Writing hello world</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Here we describe how to write the "hello world" of passes.  The "Hello" pass
+is designed to simply print out the name of non-external functions that exist in
+the program being compiled.  It does not modify the program at all, it just
+inspects it.  The source code and files for this pass are available in the LLVM
+source tree in the <tt>lib/Transforms/Hello</tt> directory.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="makefile">Setting up the build environment</a>
+</div>
+
+<div class="doc_text">
+
+  <p>First, configure and build LLVM.  This needs to be done directly inside the
+  LLVM source tree rather than in a separate objects directory.
+  Next, you need to create a new directory somewhere in the LLVM source 
+  base.  For this example, we'll assume that you made 
+  <tt>lib/Transforms/Hello</tt>.  Finally, you must set up a build script 
+  (Makefile) that will compile the source code for the new pass.  To do this, 
+  copy the following into <tt>Makefile</tt>:</p>
+  <hr/>
+
+<div class="doc_code"><pre>
+# Makefile for hello pass
+
+# Path to top level of LLVM hierarchy
+LEVEL = ../../..
+
+# Name of the library to build
+LIBRARYNAME = Hello
+
+# Make the shared library become a loadable module so the tools can 
+# dlopen/dlsym on the resulting library.
+LOADABLE_MODULE = 1
+
+# Include the makefile implementation stuff
+include $(LEVEL)/Makefile.common
+</pre></div>
+
+<p>This makefile specifies that all of the <tt>.cpp</tt> files in the current
+directory are to be compiled and linked together into a shared object
+<tt>$(LEVEL)/Debug+Asserts/lib/Hello.so</tt> that can be dynamically loaded by
+the <tt>opt</tt> or <tt>bugpoint</tt> tools via their <tt>-load</tt> options.  
+If your operating system uses a suffix other than .so (such as windows or 
+Mac OS/X), the appropriate extension will be used.</p>
+
+<p>Now that we have the build scripts set up, we just need to write the code for
+the pass itself.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="basiccode">Basic code required</a>
+</div>
+
+<div class="doc_text">
+
+<p>Now that we have a way to compile our new pass, we just have to write it.
+Start out with:</p>
+
+<div class="doc_code"><pre>
+<b>#include</b> "<a href="http://llvm.org/doxygen/Pass_8h-source.html">llvm/Pass.h</a>"
+<b>#include</b> "<a href="http://llvm.org/doxygen/Function_8h-source.html">llvm/Function.h</a>"
+<b>#include</b> "<a href="http://llvm.org/doxygen/raw__ostream_8h.html">llvm/Support/raw_ostream.h</a>"
+</pre></div>
+
+<p>Which are needed because we are writing a <tt><a
+href="http://llvm.org/doxygen/classllvm_1_1Pass.html">Pass</a></tt>,
+we are operating on <tt><a
+href="http://llvm.org/doxygen/classllvm_1_1Function.html">Function</a></tt>'s,
+and we will be doing some printing.</p>
+
+<p>Next we have:</p>
+<div class="doc_code"><pre>
+<b>using namespace llvm;</b>
+</pre></div>
+<p>... which is required because the functions from the include files 
+live in the llvm namespace.
+</p>
+
+<p>Next we have:</p>
+
+<div class="doc_code"><pre>
+<b>namespace</b> {
+</pre></div>
+
+<p>... which starts out an anonymous namespace.  Anonymous namespaces are to C++
+what the "<tt>static</tt>" keyword is to C (at global scope).  It makes the
+things declared inside of the anonymous namespace only visible to the current
+file.  If you're not familiar with them, consult a decent C++ book for more
+information.</p>
+
+<p>Next, we declare our pass itself:</p>
+
+<div class="doc_code"><pre>
+  <b>struct</b> Hello : <b>public</b> <a href="#FunctionPass">FunctionPass</a> {
+</pre></div><p>
+
+<p>This declares a "<tt>Hello</tt>" class that is a subclass of <tt><a
+href="http://llvm.org/doxygen/classllvm_1_1FunctionPass.html">FunctionPass</a></tt>.
+The different builtin pass subclasses are described in detail <a
+href="#passtype">later</a>, but for now, know that <a
+href="#FunctionPass"><tt>FunctionPass</tt></a>'s operate a function at a
+time.</p>
+
+<div class="doc_code"><pre>
+     static char ID;
+     Hello() : FunctionPass(ID) {}
+</pre></div><p>
+
+<p> This declares pass identifier used by LLVM to identify pass. This allows LLVM to
+avoid using expensive C++ runtime information.</p>
+
+<div class="doc_code"><pre>
+    <b>virtual bool</b> <a href="#runOnFunction">runOnFunction</a>(Function &amp;F) {
+      errs() &lt;&lt; "<i>Hello: </i>" &lt;&lt; F.getName() &lt;&lt; "\n";
+      <b>return false</b>;
+    }
+  };  <i>// end of struct Hello</i>
+</pre></div>
+
+<p>We declare a "<a href="#runOnFunction"><tt>runOnFunction</tt></a>" method,
+which overloads an abstract virtual method inherited from <a
+href="#FunctionPass"><tt>FunctionPass</tt></a>.  This is where we are supposed
+to do our thing, so we just print out our message with the name of each
+function.</p>
+
+<div class="doc_code"><pre>
+  char Hello::ID = 0;
+</pre></div>
+
+<p> We initialize pass ID here. LLVM uses ID's address to identify pass so 
+initialization value is not important.</p>
+
+<div class="doc_code"><pre>
+  static RegisterPass<Hello> X("<i>hello</i>", "<i>Hello World Pass</i>",
+                        false /* Only looks at CFG */,
+                        false /* Analysis Pass */);
+}  <i>// end of anonymous namespace</i>
+</pre></div>
+
+<p>Lastly, we <a href="#registration">register our class</a> <tt>Hello</tt>, 
+giving it a command line
+argument "<tt>hello</tt>", and a name "<tt>Hello World Pass</tt>".
+Last two arguments describe its behavior.
+If a pass walks CFG without modifying it then third argument is set to true. 
+If  a pass is an analysis pass, for example dominator tree pass, then true 
+is supplied as fourth argument. </p>
+
+<p>As a whole, the <tt>.cpp</tt> file looks like:</p>
+
+<div class="doc_code"><pre>
+<b>#include</b> "<a href="http://llvm.org/doxygen/Pass_8h-source.html">llvm/Pass.h</a>"
+<b>#include</b> "<a href="http://llvm.org/doxygen/Function_8h-source.html">llvm/Function.h</a>"
+<b>#include</b> "<a href="http://llvm.org/doxygen/raw__ostream_8h.html">llvm/Support/raw_ostream.h</a>"
+
+<b>using namespace llvm;</b>
+
+<b>namespace</b> {
+  <b>struct Hello</b> : <b>public</b> <a href="#FunctionPass">FunctionPass</a> {
+    
+    static char ID;
+    Hello() : FunctionPass(ID) {}
+
+    <b>virtual bool</b> <a href="#runOnFunction">runOnFunction</a>(Function &amp;F) {
+      errs() &lt;&lt; "<i>Hello: </i>" &lt;&lt; F.getName() &lt;&lt; "\n";
+      <b>return false</b>;
+    }
+  };
+  
+  char Hello::ID = 0;
+  static RegisterPass<Hello> X("hello", "Hello World Pass", false, false);
+}
+
+</pre></div>
+
+<p>Now that it's all together, compile the file with a simple "<tt>gmake</tt>"
+command in the local directory and you should get a new file
+"<tt>Debug+Asserts/lib/Hello.so</tt>" under the top level directory of the LLVM
+source tree (not in the local directory).  Note that everything in this file is
+contained in an anonymous namespace: this reflects the fact that passes are self
+contained units that do not need external interfaces (although they can have
+them) to be useful.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="running">Running a pass with <tt>opt</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>Now that you have a brand new shiny shared object file, we can use the
+<tt>opt</tt> command to run an LLVM program through your pass.  Because you
+registered your pass with <tt>RegisterPass</tt>, you will be able to
+use the <tt>opt</tt> tool to access it, once loaded.</p>
+
+<p>To test it, follow the example at the end of the <a
+href="GettingStarted.html">Getting Started Guide</a> to compile "Hello World" to
+LLVM.  We can now run the bitcode file (<tt>hello.bc</tt>) for the program
+through our transformation like this (or course, any bitcode file will
+work):</p>
+
+<div class="doc_code"><pre>
+$ opt -load ../../../Debug+Asserts/lib/Hello.so -hello &lt; hello.bc &gt; /dev/null
+Hello: __main
+Hello: puts
+Hello: main
+</pre></div>
+
+<p>The '<tt>-load</tt>' option specifies that '<tt>opt</tt>' should load your
+pass as a shared object, which makes '<tt>-hello</tt>' a valid command line
+argument (which is one reason you need to <a href="#registration">register your
+pass</a>).  Because the hello pass does not modify the program in any
+interesting way, we just throw away the result of <tt>opt</tt> (sending it to
+<tt>/dev/null</tt>).</p>
+
+<p>To see what happened to the other string you registered, try running
+<tt>opt</tt> with the <tt>-help</tt> option:</p>
+
+<div class="doc_code"><pre>
+$ opt -load ../../../Debug+Asserts/lib/Hello.so -help
+OVERVIEW: llvm .bc -&gt; .bc modular optimizer
+
+USAGE: opt [options] &lt;input bitcode&gt;
+
+OPTIONS:
+  Optimizations available:
+...
+    -funcresolve    - Resolve Functions
+    -gcse           - Global Common Subexpression Elimination
+    -globaldce      - Dead Global Elimination
+    <b>-hello          - Hello World Pass</b>
+    -indvars        - Canonicalize Induction Variables
+    -inline         - Function Integration/Inlining
+    -instcombine    - Combine redundant instructions
+...
+</pre></div>
+
+<p>The pass name get added as the information string for your pass, giving some
+documentation to users of <tt>opt</tt>.  Now that you have a working pass, you
+would go ahead and make it do the cool transformations you want.  Once you get
+it all working and tested, it may become useful to find out how fast your pass
+is.  The <a href="#passManager"><tt>PassManager</tt></a> provides a nice command
+line option (<tt>--time-passes</tt>) that allows you to get information about
+the execution time of your pass along with the other passes you queue up.  For
+example:</p>
+
+<div class="doc_code"><pre>
+$ opt -load ../../../Debug+Asserts/lib/Hello.so -hello -time-passes &lt; hello.bc &gt; /dev/null
+Hello: __main
+Hello: puts
+Hello: main
+===============================================================================
+                      ... Pass execution timing report ...
+===============================================================================
+  Total Execution Time: 0.02 seconds (0.0479059 wall clock)
+
+   ---User Time---   --System Time--   --User+System--   ---Wall Time---  --- Pass Name ---
+   0.0100 (100.0%)   0.0000 (  0.0%)   0.0100 ( 50.0%)   0.0402 ( 84.0%)  Bitcode Writer
+   0.0000 (  0.0%)   0.0100 (100.0%)   0.0100 ( 50.0%)   0.0031 (  6.4%)  Dominator Set Construction
+   0.0000 (  0.0%)   0.0000 (  0.0%)   0.0000 (  0.0%)   0.0013 (  2.7%)  Module Verifier
+ <b>  0.0000 (  0.0%)   0.0000 (  0.0%)   0.0000 (  0.0%)   0.0033 (  6.9%)  Hello World Pass</b>
+   0.0100 (100.0%)   0.0100 (100.0%)   0.0200 (100.0%)   0.0479 (100.0%)  TOTAL
+</pre></div>
+
+<p>As you can see, our implementation above is pretty fast :).  The additional
+passes listed are automatically inserted by the '<tt>opt</tt>' tool to verify
+that the LLVM emitted by your pass is still valid and well formed LLVM, which
+hasn't been broken somehow.</p>
+
+<p>Now that you have seen the basics of the mechanics behind passes, we can talk
+about some more details of how they work and how to use them.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="passtype">Pass classes and requirements</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>One of the first things that you should do when designing a new pass is to
+decide what class you should subclass for your pass.  The <a
+href="#basiccode">Hello World</a> example uses the <tt><a
+href="#FunctionPass">FunctionPass</a></tt> class for its implementation, but we
+did not discuss why or when this should occur.  Here we talk about the classes
+available, from the most general to the most specific.</p>
+
+<p>When choosing a superclass for your Pass, you should choose the <b>most
+specific</b> class possible, while still being able to meet the requirements
+listed.  This gives the LLVM Pass Infrastructure information necessary to
+optimize how passes are run, so that the resultant compiler isn't unnecessarily
+slow.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ImmutablePass">The <tt>ImmutablePass</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The most plain and boring type of pass is the "<tt><a
+href="http://llvm.org/doxygen/classllvm_1_1ImmutablePass.html">ImmutablePass</a></tt>"
+class.  This pass type is used for passes that do not have to be run, do not
+change state, and never need to be updated.  This is not a normal type of
+transformation or analysis, but can provide information about the current
+compiler configuration.</p>
+
+<p>Although this pass class is very infrequently used, it is important for
+providing information about the current target machine being compiled for, and
+other static information that can affect the various transformations.</p>
+
+<p><tt>ImmutablePass</tt>es never invalidate other transformations, are never
+invalidated, and are never "run".</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ModulePass">The <tt>ModulePass</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The "<tt><a
+href="http://llvm.org/doxygen/classllvm_1_1ModulePass.html">ModulePass</a></tt>"
+class is the most general of all superclasses that you can use.  Deriving from
+<tt>ModulePass</tt> indicates that your pass uses the entire program as a unit,
+referring to function bodies in no predictable order, or adding and removing
+functions.  Because nothing is known about the behavior of <tt>ModulePass</tt>
+subclasses, no optimization can be done for their execution.</p>
+
+<p>A module pass can use function level passes (e.g. dominators) using
+the getAnalysis interface
+<tt>getAnalysis&lt;DominatorTree&gt;(llvm::Function *)</tt> to provide the
+function to retrieve analysis result for, if the function pass does not require
+any module or immutable passes. Note that this can only be done for functions for which the
+analysis ran, e.g. in the case of dominators you should only ask for the
+DominatorTree for function definitions, not declarations.</p>
+
+<p>To write a correct <tt>ModulePass</tt> subclass, derive from
+<tt>ModulePass</tt> and overload the <tt>runOnModule</tt> method with the
+following signature:</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="runOnModule">The <tt>runOnModule</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> runOnModule(Module &amp;M) = 0;
+</pre></div>
+
+<p>The <tt>runOnModule</tt> method performs the interesting work of the pass.
+It should return true if the module was modified by the transformation and
+false otherwise.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="CallGraphSCCPass">The <tt>CallGraphSCCPass</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>The "<tt><a
+href="http://llvm.org/doxygen/classllvm_1_1CallGraphSCCPass.html">CallGraphSCCPass</a></tt>"
+is used by passes that need to traverse the program bottom-up on the call graph
+(callees before callers).  Deriving from CallGraphSCCPass provides some
+mechanics for building and traversing the CallGraph, but also allows the system
+to optimize execution of CallGraphSCCPass's.  If your pass meets the
+requirements outlined below, and doesn't meet the requirements of a <tt><a
+href="#FunctionPass">FunctionPass</a></tt> or <tt><a
+href="#BasicBlockPass">BasicBlockPass</a></tt>, you should derive from
+<tt>CallGraphSCCPass</tt>.</p>
+
+<p><b>TODO</b>: explain briefly what SCC, Tarjan's algo, and B-U mean.</p>
+
+<p>To be explicit, <tt>CallGraphSCCPass</tt> subclasses are:</p>
+
+<ol>
+
+<li>... <em>not allowed</em> to inspect or modify any <tt>Function</tt>s other
+than those in the current SCC and the direct callers and direct callees of the
+SCC.</li>
+
+<li>... <em>required</em> to preserve the current CallGraph object, updating it
+to reflect any changes made to the program.</li>
+
+<li>... <em>not allowed</em> to add or remove SCC's from the current Module,
+though they may change the contents of an SCC.</li>
+
+<li>... <em>allowed</em> to add or remove global variables from the current
+Module.</li>
+
+<li>... <em>allowed</em> to maintain state across invocations of
+    <a href="#runOnSCC"><tt>runOnSCC</tt></a> (including global data).</li>
+</ol>
+
+<p>Implementing a <tt>CallGraphSCCPass</tt> is slightly tricky in some cases
+because it has to handle SCCs with more than one node in it.  All of the virtual
+methods described below should return true if they modified the program, or
+false if they didn't.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="doInitialization_scc">The <tt>doInitialization(CallGraph &amp;)</tt>
+  method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> doInitialization(CallGraph &amp;CG);
+</pre></div>
+
+<p>The <tt>doIninitialize</tt> method is allowed to do most of the things that
+<tt>CallGraphSCCPass</tt>'s are not allowed to do.  They can add and remove
+functions, get pointers to functions, etc.  The <tt>doInitialization</tt> method
+is designed to do simple initialization type of stuff that does not depend on
+the SCCs being processed.  The <tt>doInitialization</tt> method call is not
+scheduled to overlap with any other pass executions (thus it should be very
+fast).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="runOnSCC">The <tt>runOnSCC</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> runOnSCC(CallGraphSCC &amp;SCC) = 0;
+</pre></div>
+
+<p>The <tt>runOnSCC</tt> method performs the interesting work of the pass, and
+should return true if the module was modified by the transformation, false
+otherwise.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="doFinalization_scc">The <tt>doFinalization(CallGraph
+   &amp;)</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> doFinalization(CallGraph &amp;CG);
+</pre></div>
+
+<p>The <tt>doFinalization</tt> method is an infrequently used method that is
+called when the pass framework has finished calling <a
+href="#runOnFunction"><tt>runOnFunction</tt></a> for every function in the
+program being compiled.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="FunctionPass">The <tt>FunctionPass</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>In contrast to <tt>ModulePass</tt> subclasses, <tt><a
+href="http://llvm.org/doxygen/classllvm_1_1Pass.html">FunctionPass</a></tt>
+subclasses do have a predictable, local behavior that can be expected by the
+system.  All <tt>FunctionPass</tt> execute on each function in the program
+independent of all of the other functions in the program.
+<tt>FunctionPass</tt>'s do not require that they are executed in a particular
+order, and <tt>FunctionPass</tt>'s do not modify external functions.</p>
+
+<p>To be explicit, <tt>FunctionPass</tt> subclasses are not allowed to:</p>
+
+<ol>
+<li>Modify a Function other than the one currently being processed.</li>
+<li>Add or remove Function's from the current Module.</li>
+<li>Add or remove global variables from the current Module.</li>
+<li>Maintain state across invocations of
+    <a href="#runOnFunction"><tt>runOnFunction</tt></a> (including global data)</li>
+</ol>
+
+<p>Implementing a <tt>FunctionPass</tt> is usually straightforward (See the <a
+href="#basiccode">Hello World</a> pass for example).  <tt>FunctionPass</tt>'s
+may overload three virtual methods to do their work.  All of these methods
+should return true if they modified the program, or false if they didn't.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="doInitialization_mod">The <tt>doInitialization(Module &amp;)</tt>
+  method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> doInitialization(Module &amp;M);
+</pre></div>
+
+<p>The <tt>doIninitialize</tt> method is allowed to do most of the things that
+<tt>FunctionPass</tt>'s are not allowed to do.  They can add and remove
+functions, get pointers to functions, etc.  The <tt>doInitialization</tt> method
+is designed to do simple initialization type of stuff that does not depend on
+the functions being processed.  The <tt>doInitialization</tt> method call is not
+scheduled to overlap with any other pass executions (thus it should be very
+fast).</p>
+
+<p>A good example of how this method should be used is the <a
+href="http://llvm.org/doxygen/LowerAllocations_8cpp-source.html">LowerAllocations</a>
+pass.  This pass converts <tt>malloc</tt> and <tt>free</tt> instructions into
+platform dependent <tt>malloc()</tt> and <tt>free()</tt> function calls.  It
+uses the <tt>doInitialization</tt> method to get a reference to the malloc and
+free functions that it needs, adding prototypes to the module if necessary.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="runOnFunction">The <tt>runOnFunction</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> runOnFunction(Function &amp;F) = 0;
+</pre></div><p>
+
+<p>The <tt>runOnFunction</tt> method must be implemented by your subclass to do
+the transformation or analysis work of your pass.  As usual, a true value should
+be returned if the function is modified.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="doFinalization_mod">The <tt>doFinalization(Module
+  &amp;)</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> doFinalization(Module &amp;M);
+</pre></div>
+
+<p>The <tt>doFinalization</tt> method is an infrequently used method that is
+called when the pass framework has finished calling <a
+href="#runOnFunction"><tt>runOnFunction</tt></a> for every function in the
+program being compiled.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="LoopPass">The <tt>LoopPass</tt> class </a>
+</div>
+
+<div class="doc_text">
+
+<p> All <tt>LoopPass</tt> execute on each loop in the function independent of
+all of the other loops in the function. <tt>LoopPass</tt> processes loops in
+loop nest order such that outer most loop is processed last. </p>
+
+<p> <tt>LoopPass</tt> subclasses are allowed to update loop nest using
+<tt>LPPassManager</tt> interface. Implementing a loop pass is usually
+straightforward. <tt>Looppass</tt>'s may overload three virtual methods to
+do their work. All these methods should return true if they modified the 
+program, or false if they didn't. </p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="doInitialization_loop">The <tt>doInitialization(Loop *,
+                                                 LPPassManager &amp;)</tt>
+  method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> doInitialization(Loop *, LPPassManager &amp;LPM);
+</pre></div>
+
+<p>The <tt>doInitialization</tt> method is designed to do simple initialization 
+type of stuff that does not depend on the functions being processed.  The 
+<tt>doInitialization</tt> method call is not scheduled to overlap with any 
+other pass executions (thus it should be very fast). LPPassManager 
+interface should be used to access Function or Module level analysis
+information.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="runOnLoop">The <tt>runOnLoop</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> runOnLoop(Loop *, LPPassManager &amp;LPM) = 0;
+</pre></div><p>
+
+<p>The <tt>runOnLoop</tt> method must be implemented by your subclass to do
+the transformation or analysis work of your pass.  As usual, a true value should
+be returned if the function is modified. <tt>LPPassManager</tt> interface
+should be used to update loop nest.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="doFinalization_loop">The <tt>doFinalization()</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> doFinalization();
+</pre></div>
+
+<p>The <tt>doFinalization</tt> method is an infrequently used method that is
+called when the pass framework has finished calling <a
+href="#runOnLoop"><tt>runOnLoop</tt></a> for every loop in the
+program being compiled. </p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="RegionPass">The <tt>RegionPass</tt> class </a>
+</div>
+
+<div class="doc_text">
+
+<p> <tt>RegionPass</tt> is similar to <a href="#LoopPass"><tt>LoopPass</tt></a>,
+but executes on each single entry single exit region in the function.
+<tt>RegionPass</tt> processes regions in nested order such that the outer most
+region is processed last.  </p>
+
+<p> <tt>RegionPass</tt> subclasses are allowed to update the region tree by using
+the <tt>RGPassManager</tt> interface. You may overload three virtual methods of
+<tt>RegionPass</tt> to implementing your own region pass is usually. All these
+methods should return true if they modified the program, or false if they didn not.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="doInitialization_region">The <tt>doInitialization(Region *,
+                                                 RGPassManager &amp;)</tt>
+  method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> doInitialization(Region *, RGPassManager &amp;RGM);
+</pre></div>
+
+<p>The <tt>doInitialization</tt> method is designed to do simple initialization
+type of stuff that does not depend on the functions being processed.  The
+<tt>doInitialization</tt> method call is not scheduled to overlap with any
+other pass executions (thus it should be very fast). RPPassManager
+interface should be used to access Function or Module level analysis
+information.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="runOnRegion">The <tt>runOnRegion</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> runOnRegion(Region *, RGPassManager &amp;RGM) = 0;
+</pre></div><p>
+
+<p>The <tt>runOnRegion</tt> method must be implemented by your subclass to do
+the transformation or analysis work of your pass.  As usual, a true value should
+be returned if the region is modified. <tt>RGPassManager</tt> interface
+should be used to update region tree.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="doFinalization_region">The <tt>doFinalization()</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> doFinalization();
+</pre></div>
+
+<p>The <tt>doFinalization</tt> method is an infrequently used method that is
+called when the pass framework has finished calling <a
+href="#runOnRegion"><tt>runOnRegion</tt></a> for every region in the
+program being compiled. </p>
+
+</div>
+
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="BasicBlockPass">The <tt>BasicBlockPass</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p><tt>BasicBlockPass</tt>'s are just like <a
+href="#FunctionPass"><tt>FunctionPass</tt></a>'s, except that they must limit
+their scope of inspection and modification to a single basic block at a time.
+As such, they are <b>not</b> allowed to do any of the following:</p>
+
+<ol>
+<li>Modify or inspect any basic blocks outside of the current one</li>
+<li>Maintain state across invocations of
+    <a href="#runOnBasicBlock"><tt>runOnBasicBlock</tt></a></li>
+<li>Modify the control flow graph (by altering terminator instructions)</li>
+<li>Any of the things forbidden for
+    <a href="#FunctionPass"><tt>FunctionPass</tt></a>es.</li>
+</ol>
+
+<p><tt>BasicBlockPass</tt>es are useful for traditional local and "peephole"
+optimizations.  They may override the same <a
+href="#doInitialization_mod"><tt>doInitialization(Module &amp;)</tt></a> and <a
+href="#doFinalization_mod"><tt>doFinalization(Module &amp;)</tt></a> methods that <a
+href="#FunctionPass"><tt>FunctionPass</tt></a>'s have, but also have the following virtual methods that may also be implemented:</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="doInitialization_fn">The <tt>doInitialization(Function
+  &amp;)</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> doInitialization(Function &amp;F);
+</pre></div>
+
+<p>The <tt>doIninitialize</tt> method is allowed to do most of the things that
+<tt>BasicBlockPass</tt>'s are not allowed to do, but that
+<tt>FunctionPass</tt>'s can.  The <tt>doInitialization</tt> method is designed
+to do simple initialization that does not depend on the
+BasicBlocks being processed.  The <tt>doInitialization</tt> method call is not
+scheduled to overlap with any other pass executions (thus it should be very
+fast).</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="runOnBasicBlock">The <tt>runOnBasicBlock</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> runOnBasicBlock(BasicBlock &amp;BB) = 0;
+</pre></div>
+
+<p>Override this function to do the work of the <tt>BasicBlockPass</tt>.  This
+function is not allowed to inspect or modify basic blocks other than the
+parameter, and are not allowed to modify the CFG.  A true value must be returned
+if the basic block is modified.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="doFinalization_fn">The <tt>doFinalization(Function &amp;)</tt> 
+  method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> doFinalization(Function &amp;F);
+</pre></div>
+
+<p>The <tt>doFinalization</tt> method is an infrequently used method that is
+called when the pass framework has finished calling <a
+href="#runOnBasicBlock"><tt>runOnBasicBlock</tt></a> for every BasicBlock in the
+program being compiled.  This can be used to perform per-function
+finalization.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="MachineFunctionPass">The <tt>MachineFunctionPass</tt> class</a>
+</div>
+
+<div class="doc_text">
+
+<p>A <tt>MachineFunctionPass</tt> is a part of the LLVM code generator that
+executes on the machine-dependent representation of each LLVM function in the
+program.</p>
+
+<p>Code generator passes are registered and initialized specially by
+<tt>TargetMachine::addPassesToEmitFile</tt> and similar routines, so they
+cannot generally be run from the <tt>opt</tt> or <tt>bugpoint</tt>
+commands.</p>
+
+<p>A <tt>MachineFunctionPass</tt> is also a <tt>FunctionPass</tt>, so all
+the restrictions that apply to a <tt>FunctionPass</tt> also apply to it.
+<tt>MachineFunctionPass</tt>es also have additional restrictions. In particular,
+<tt>MachineFunctionPass</tt>es are not allowed to do any of the following:</p>
+
+<ol>
+<li>Modify or create any LLVM IR Instructions, BasicBlocks, Arguments,
+    Functions, GlobalVariables, GlobalAliases, or Modules.</li>
+<li>Modify a MachineFunction other than the one currently being processed.</li>
+<li>Maintain state across invocations of <a
+href="#runOnMachineFunction"><tt>runOnMachineFunction</tt></a> (including global
+data)</li>
+</ol>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="runOnMachineFunction">The <tt>runOnMachineFunction(MachineFunction
+  &amp;MF)</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual bool</b> runOnMachineFunction(MachineFunction &amp;MF) = 0;
+</pre></div>
+
+<p><tt>runOnMachineFunction</tt> can be considered the main entry point of a
+<tt>MachineFunctionPass</tt>; that is, you should override this method to do the
+work of your <tt>MachineFunctionPass</tt>.</p>
+
+<p>The <tt>runOnMachineFunction</tt> method is called on every
+<tt>MachineFunction</tt> in a <tt>Module</tt>, so that the
+<tt>MachineFunctionPass</tt> may perform optimizations on the machine-dependent
+representation of the function. If you want to get at the LLVM <tt>Function</tt>
+for the <tt>MachineFunction</tt> you're working on, use
+<tt>MachineFunction</tt>'s <tt>getFunction()</tt> accessor method -- but
+remember, you may not modify the LLVM <tt>Function</tt> or its contents from a
+<tt>MachineFunctionPass</tt>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="registration">Pass registration</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>In the <a href="#basiccode">Hello World</a> example pass we illustrated how
+pass registration works, and discussed some of the reasons that it is used and
+what it does.  Here we discuss how and why passes are registered.</p>
+
+<p>As we saw above, passes are registered with the <b><tt>RegisterPass</tt></b>
+template.  The template parameter is the name of the pass that is to be used on
+the command line to specify that the pass should be added to a program (for
+example, with <tt>opt</tt> or <tt>bugpoint</tt>).  The first argument is the
+name of the pass, which is to be used for the <tt>-help</tt> output of
+programs, as
+well as for debug output generated by the <tt>--debug-pass</tt> option.</p>
+
+<p>If you want your pass to be easily dumpable, you should 
+implement the virtual <tt>print</tt> method:</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="print">The <tt>print</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual void</b> print(std::ostream &amp;O, <b>const</b> Module *M) <b>const</b>;
+</pre></div>
+
+<p>The <tt>print</tt> method must be implemented by "analyses" in order to print
+a human readable version of the analysis results.  This is useful for debugging
+an analysis itself, as well as for other people to figure out how an analysis
+works.  Use the <tt>opt -analyze</tt> argument to invoke this method.</p>
+
+<p>The <tt>llvm::OStream</tt> parameter specifies the stream to write the results on,
+and the <tt>Module</tt> parameter gives a pointer to the top level module of the
+program that has been analyzed.  Note however that this pointer may be null in
+certain circumstances (such as calling the <tt>Pass::dump()</tt> from a
+debugger), so it should only be used to enhance debug output, it should not be
+depended on.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="interaction">Specifying interactions between passes</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>One of the main responsibilities of the <tt>PassManager</tt> is to make sure
+that passes interact with each other correctly.  Because <tt>PassManager</tt>
+tries to <a href="#passmanager">optimize the execution of passes</a> it must
+know how the passes interact with each other and what dependencies exist between
+the various passes.  To track this, each pass can declare the set of passes that
+are required to be executed before the current pass, and the passes which are
+invalidated by the current pass.</p>
+
+<p>Typically this functionality is used to require that analysis results are
+computed before your pass is run.  Running arbitrary transformation passes can
+invalidate the computed analysis results, which is what the invalidation set
+specifies.  If a pass does not implement the <tt><a
+href="#getAnalysisUsage">getAnalysisUsage</a></tt> method, it defaults to not
+having any prerequisite passes, and invalidating <b>all</b> other passes.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="getAnalysisUsage">The <tt>getAnalysisUsage</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual void</b> getAnalysisUsage(AnalysisUsage &amp;Info) <b>const</b>;
+</pre></div>
+
+<p>By implementing the <tt>getAnalysisUsage</tt> method, the required and
+invalidated sets may be specified for your transformation.  The implementation
+should fill in the <tt><a
+href="http://llvm.org/doxygen/classllvm_1_1AnalysisUsage.html">AnalysisUsage</a></tt>
+object with information about which passes are required and not invalidated.  To
+do this, a pass may call any of the following methods on the AnalysisUsage
+object:</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="AU::addRequired">The <tt>AnalysisUsage::addRequired&lt;&gt;</tt> and <tt>AnalysisUsage::addRequiredTransitive&lt;&gt;</tt> methods</a>
+</div>
+
+<div class="doc_text">
+<p>
+If your pass requires a previous pass to be executed (an analysis for example),
+it can use one of these methods to arrange for it to be run before your pass.
+LLVM has many different types of analyses and passes that can be required,
+spanning the range from <tt>DominatorSet</tt> to <tt>BreakCriticalEdges</tt>.
+Requiring <tt>BreakCriticalEdges</tt>, for example, guarantees that there will
+be no critical edges in the CFG when your pass has been run.
+</p>
+
+<p>
+Some analyses chain to other analyses to do their job.  For example, an <a
+href="AliasAnalysis.html">AliasAnalysis</a> implementation is required to <a
+href="AliasAnalysis.html#chaining">chain</a> to other alias analysis passes.  In
+cases where analyses chain, the <tt>addRequiredTransitive</tt> method should be
+used instead of the <tt>addRequired</tt> method.  This informs the PassManager
+that the transitively required pass should be alive as long as the requiring
+pass is.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="AU::addPreserved">The <tt>AnalysisUsage::addPreserved&lt;&gt;</tt> method</a>
+</div>
+
+<div class="doc_text">
+<p>
+One of the jobs of the PassManager is to optimize how and when analyses are run.
+In particular, it attempts to avoid recomputing data unless it needs to.  For
+this reason, passes are allowed to declare that they preserve (i.e., they don't
+invalidate) an existing analysis if it's available.  For example, a simple
+constant folding pass would not modify the CFG, so it can't possibly affect the
+results of dominator analysis.  By default, all passes are assumed to invalidate
+all others.
+</p>
+
+<p>
+The <tt>AnalysisUsage</tt> class provides several methods which are useful in
+certain circumstances that are related to <tt>addPreserved</tt>.  In particular,
+the <tt>setPreservesAll</tt> method can be called to indicate that the pass does
+not modify the LLVM program at all (which is true for analyses), and the
+<tt>setPreservesCFG</tt> method can be used by transformations that change
+instructions in the program but do not modify the CFG or terminator instructions
+(note that this property is implicitly set for <a
+href="#BasicBlockPass">BasicBlockPass</a>'s).
+</p>
+
+<p>
+<tt>addPreserved</tt> is particularly useful for transformations like
+<tt>BreakCriticalEdges</tt>.  This pass knows how to update a small set of loop
+and dominator related analyses if they exist, so it can preserve them, despite
+the fact that it hacks on the CFG.
+</p>
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="AU::examples">Example implementations of <tt>getAnalysisUsage</tt></a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <i>// This is an example implementation from an analysis, which does not modify
+  // the program at all, yet has a prerequisite.</i>
+  <b>void</b> <a href="http://llvm.org/doxygen/classllvm_1_1PostDominanceFrontier.html">PostDominanceFrontier</a>::getAnalysisUsage(AnalysisUsage &amp;AU) <b>const</b> {
+    AU.setPreservesAll();
+    AU.addRequired&lt;<a href="http://llvm.org/doxygen/classllvm_1_1PostDominatorTree.html">PostDominatorTree</a>&gt;();
+  }
+</pre></div>
+
+<p>and:</p>
+
+<div class="doc_code"><pre>
+  <i>// This example modifies the program, but does not modify the CFG</i>
+  <b>void</b> <a href="http://llvm.org/doxygen/structLICM.html">LICM</a>::getAnalysisUsage(AnalysisUsage &amp;AU) <b>const</b> {
+    AU.setPreservesCFG();
+    AU.addRequired&lt;<a href="http://llvm.org/doxygen/classllvm_1_1LoopInfo.html">LoopInfo</a>&gt;();
+  }
+</pre></div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="getAnalysis">The <tt>getAnalysis&lt;&gt;</tt> and
+<tt>getAnalysisIfAvailable&lt;&gt;</tt> methods</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>Pass::getAnalysis&lt;&gt;</tt> method is automatically inherited by
+your class, providing you with access to the passes that you declared that you
+required with the <a href="#getAnalysisUsage"><tt>getAnalysisUsage</tt></a>
+method.  It takes a single template argument that specifies which pass class you
+want, and returns a reference to that pass.  For example:</p>
+
+<div class="doc_code"><pre>
+   bool LICM::runOnFunction(Function &amp;F) {
+     LoopInfo &amp;LI = getAnalysis&lt;LoopInfo&gt;();
+     ...
+   }
+</pre></div>
+
+<p>This method call returns a reference to the pass desired.  You may get a
+runtime assertion failure if you attempt to get an analysis that you did not
+declare as required in your <a
+href="#getAnalysisUsage"><tt>getAnalysisUsage</tt></a> implementation.  This
+method can be called by your <tt>run*</tt> method implementation, or by any
+other local method invoked by your <tt>run*</tt> method.
+
+A module level pass can use function level analysis info using this interface.
+For example:</p>
+
+<div class="doc_code"><pre>
+   bool ModuleLevelPass::runOnModule(Module &amp;M) {
+     ...
+     DominatorTree &amp;DT = getAnalysis&lt;DominatorTree&gt;(Func);
+     ...
+   }
+</pre></div>
+
+<p>In above example, runOnFunction for DominatorTree is called by pass manager
+before returning a reference to the desired pass.</p>
+
+<p>
+If your pass is capable of updating analyses if they exist (e.g.,
+<tt>BreakCriticalEdges</tt>, as described above), you can use the
+<tt>getAnalysisIfAvailable</tt> method, which returns a pointer to the analysis
+if it is active.  For example:</p>
+
+<div class="doc_code"><pre>
+  ...
+  if (DominatorSet *DS = getAnalysisIfAvailable&lt;DominatorSet&gt;()) {
+    <i>// A DominatorSet is active.  This code will update it.</i>
+  }
+  ...
+</pre></div>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="analysisgroup">Implementing Analysis Groups</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Now that we understand the basics of how passes are defined, how they are
+used, and how they are required from other passes, it's time to get a little bit
+fancier.  All of the pass relationships that we have seen so far are very
+simple: one pass depends on one other specific pass to be run before it can run.
+For many applications, this is great, for others, more flexibility is
+required.</p>
+
+<p>In particular, some analyses are defined such that there is a single simple
+interface to the analysis results, but multiple ways of calculating them.
+Consider alias analysis for example.  The most trivial alias analysis returns
+"may alias" for any alias query.  The most sophisticated analysis a
+flow-sensitive, context-sensitive interprocedural analysis that can take a
+significant amount of time to execute (and obviously, there is a lot of room
+between these two extremes for other implementations).  To cleanly support
+situations like this, the LLVM Pass Infrastructure supports the notion of
+Analysis Groups.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="agconcepts">Analysis Group Concepts</a>
+</div>
+
+<div class="doc_text">
+
+<p>An Analysis Group is a single simple interface that may be implemented by
+multiple different passes.  Analysis Groups can be given human readable names
+just like passes, but unlike passes, they need not derive from the <tt>Pass</tt>
+class.  An analysis group may have one or more implementations, one of which is
+the "default" implementation.</p>
+
+<p>Analysis groups are used by client passes just like other passes are: the
+<tt>AnalysisUsage::addRequired()</tt> and <tt>Pass::getAnalysis()</tt> methods.
+In order to resolve this requirement, the <a href="#passmanager">PassManager</a>
+scans the available passes to see if any implementations of the analysis group
+are available.  If none is available, the default implementation is created for
+the pass to use.  All standard rules for <A href="#interaction">interaction
+between passes</a> still apply.</p>
+
+<p>Although <a href="#registration">Pass Registration</a> is optional for normal
+passes, all analysis group implementations must be registered, and must use the
+<A href="#registerag"><tt>INITIALIZE_AG_PASS</tt></a> template to join the
+implementation pool.  Also, a default implementation of the interface
+<b>must</b> be registered with <A
+href="#registerag"><tt>RegisterAnalysisGroup</tt></a>.</p>
+
+<p>As a concrete example of an Analysis Group in action, consider the <a
+href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>
+analysis group.  The default implementation of the alias analysis interface (the
+<tt><a
+href="http://llvm.org/doxygen/structBasicAliasAnalysis.html">basicaa</a></tt>
+pass) just does a few simple checks that don't require significant analysis to
+compute (such as: two different globals can never alias each other, etc).
+Passes that use the <tt><a
+href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a></tt>
+interface (for example the <tt><a
+href="http://llvm.org/doxygen/structGCSE.html">gcse</a></tt> pass), do
+not care which implementation of alias analysis is actually provided, they just
+use the designated interface.</p>
+
+<p>From the user's perspective, commands work just like normal.  Issuing the
+command '<tt>opt -gcse ...</tt>' will cause the <tt>basicaa</tt> class to be
+instantiated and added to the pass sequence.  Issuing the command '<tt>opt
+-somefancyaa -gcse ...</tt>' will cause the <tt>gcse</tt> pass to use the
+<tt>somefancyaa</tt> alias analysis (which doesn't actually exist, it's just a
+hypothetical example) instead.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="registerag">Using <tt>RegisterAnalysisGroup</tt></a>
+</div>
+
+<div class="doc_text">
+
+<p>The <tt>RegisterAnalysisGroup</tt> template is used to register the analysis
+group itself, while the <tt>INITIALIZE_AG_PASS</tt> is used to add pass
+implementations to the analysis group.  First,
+an analysis group should be registered, with a human readable name
+provided for it.
+Unlike registration of passes, there is no command line argument to be specified
+for the Analysis Group Interface itself, because it is "abstract":</p>
+
+<div class="doc_code"><pre>
+  <b>static</b> RegisterAnalysisGroup&lt;<a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>&gt; A("<i>Alias Analysis</i>");
+</pre></div>
+
+<p>Once the analysis is registered, passes can declare that they are valid
+implementations of the interface by using the following code:</p>
+
+<div class="doc_code"><pre>
+<b>namespace</b> {
+  //<i> Declare that we implement the AliasAnalysis interface</i>
+  INITIALIZE_AG_PASS(FancyAA, <a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>, "<i>somefancyaa</i>",
+                     "<i>A more complex alias analysis implementation</i>",
+                     false, // <i>Is CFG Only?</i>
+                     true,  // <i>Is Analysis?</i>
+                     false, // <i>Is default Analysis Group implementation?</i>
+                    );
+}
+</pre></div>
+
+<p>This just shows a class <tt>FancyAA</tt> that 
+uses the <tt>INITIALIZE_AG_PASS</tt> macro both to register and
+to "join" the <tt><a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a></tt>
+analysis group.  Every implementation of an analysis group should join using
+this macro.</p>
+
+<div class="doc_code"><pre>
+<b>namespace</b> {
+  //<i> Declare that we implement the AliasAnalysis interface</i>
+  INITIALIZE_AG_PASS(BasicAA, <a href="http://llvm.org/doxygen/classllvm_1_1AliasAnalysis.html">AliasAnalysis</a>, "<i>basicaa</i>",
+                     "<i>Basic Alias Analysis (default AA impl)</i>",
+                     false, // <i>Is CFG Only?</i>
+                     true,  // <i>Is Analysis?</i>
+                     true, // <i>Is default Analysis Group implementation?</i>
+                    );
+}
+</pre></div>
+
+<p>Here we show how the default implementation is specified (using the final
+argument to the <tt>INITIALIZE_AG_PASS</tt> template).  There must be exactly
+one default implementation available at all times for an Analysis Group to be
+used.  Only default implementation can derive from <tt>ImmutablePass</tt>. 
+Here we declare that the
+ <tt><a href="http://llvm.org/doxygen/structBasicAliasAnalysis.html">BasicAliasAnalysis</a></tt>
+pass is the default implementation for the interface.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="passStatistics">Pass Statistics</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>The <a
+href="http://llvm.org/doxygen/Statistic_8h-source.html"><tt>Statistic</tt></a>
+class is designed to be an easy way to expose various success
+metrics from passes.  These statistics are printed at the end of a
+run, when the -stats command line option is enabled on the command
+line. See the <a href="http://llvm.org/docs/ProgrammersManual.html#Statistic">Statistics section</a> in the Programmer's Manual for details. 
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="passmanager">What PassManager does</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The <a
+href="http://llvm.org/doxygen/PassManager_8h-source.html"><tt>PassManager</tt></a>
+<a
+href="http://llvm.org/doxygen/classllvm_1_1PassManager.html">class</a>
+takes a list of passes, ensures their <a href="#interaction">prerequisites</a>
+are set up correctly, and then schedules passes to run efficiently.  All of the
+LLVM tools that run passes use the <tt>PassManager</tt> for execution of these
+passes.</p>
+
+<p>The <tt>PassManager</tt> does two main things to try to reduce the execution
+time of a series of passes:</p>
+
+<ol>
+<li><b>Share analysis results</b> - The PassManager attempts to avoid
+recomputing analysis results as much as possible.  This means keeping track of
+which analyses are available already, which analyses get invalidated, and which
+analyses are needed to be run for a pass.  An important part of work is that the
+<tt>PassManager</tt> tracks the exact lifetime of all analysis results, allowing
+it to <a href="#releaseMemory">free memory</a> allocated to holding analysis
+results as soon as they are no longer needed.</li>
+
+<li><b>Pipeline the execution of passes on the program</b> - The
+<tt>PassManager</tt> attempts to get better cache and memory usage behavior out
+of a series of passes by pipelining the passes together.  This means that, given
+a series of consecutive <a href="#FunctionPass"><tt>FunctionPass</tt></a>'s, it
+will execute all of the <a href="#FunctionPass"><tt>FunctionPass</tt></a>'s on
+the first function, then all of the <a
+href="#FunctionPass"><tt>FunctionPass</tt></a>es on the second function,
+etc... until the entire program has been run through the passes.
+
+<p>This improves the cache behavior of the compiler, because it is only touching
+the LLVM program representation for a single function at a time, instead of
+traversing the entire program.  It reduces the memory consumption of compiler,
+because, for example, only one <a
+href="http://llvm.org/doxygen/classllvm_1_1DominatorSet.html"><tt>DominatorSet</tt></a>
+needs to be calculated at a time.  This also makes it possible to implement
+some <a
+href="#SMP">interesting enhancements</a> in the future.</p></li>
+
+</ol>
+
+<p>The effectiveness of the <tt>PassManager</tt> is influenced directly by how
+much information it has about the behaviors of the passes it is scheduling.  For
+example, the "preserved" set is intentionally conservative in the face of an
+unimplemented <a href="#getAnalysisUsage"><tt>getAnalysisUsage</tt></a> method.
+Not implementing when it should be implemented will have the effect of not
+allowing any analysis results to live across the execution of your pass.</p>
+
+<p>The <tt>PassManager</tt> class exposes a <tt>--debug-pass</tt> command line
+options that is useful for debugging pass execution, seeing how things work, and
+diagnosing when you should be preserving more analyses than you currently are
+(To get information about all of the variants of the <tt>--debug-pass</tt>
+option, just type '<tt>opt -help-hidden</tt>').</p>
+
+<p>By using the <tt>--debug-pass=Structure</tt> option, for example, we can see
+how our <a href="#basiccode">Hello World</a> pass interacts with other passes.
+Lets try it out with the <tt>gcse</tt> and <tt>licm</tt> passes:</p>
+
+<div class="doc_code"><pre>
+$ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -licm --debug-pass=Structure &lt; hello.bc &gt; /dev/null
+Module Pass Manager
+  Function Pass Manager
+    Dominator Set Construction
+    Immediate Dominators Construction
+    Global Common Subexpression Elimination
+--  Immediate Dominators Construction
+--  Global Common Subexpression Elimination
+    Natural Loop Construction
+    Loop Invariant Code Motion
+--  Natural Loop Construction
+--  Loop Invariant Code Motion
+    Module Verifier
+--  Dominator Set Construction
+--  Module Verifier
+  Bitcode Writer
+--Bitcode Writer
+</pre></div>
+
+<p>This output shows us when passes are constructed and when the analysis
+results are known to be dead (prefixed with '<tt>--</tt>').  Here we see that
+GCSE uses dominator and immediate dominator information to do its job.  The LICM
+pass uses natural loop information, which uses dominator sets, but not immediate
+dominators.  Because immediate dominators are no longer useful after the GCSE
+pass, it is immediately destroyed.  The dominator sets are then reused to
+compute natural loop information, which is then used by the LICM pass.</p>
+
+<p>After the LICM pass, the module verifier runs (which is automatically added
+by the '<tt>opt</tt>' tool), which uses the dominator set to check that the
+resultant LLVM code is well formed.  After it finishes, the dominator set
+information is destroyed, after being computed once, and shared by three
+passes.</p>
+
+<p>Lets see how this changes when we run the <a href="#basiccode">Hello
+World</a> pass in between the two passes:</p>
+
+<div class="doc_code"><pre>
+$ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure &lt; hello.bc &gt; /dev/null
+Module Pass Manager
+  Function Pass Manager
+    Dominator Set Construction
+    Immediate Dominators Construction
+    Global Common Subexpression Elimination
+<b>--  Dominator Set Construction</b>
+--  Immediate Dominators Construction
+--  Global Common Subexpression Elimination
+<b>    Hello World Pass
+--  Hello World Pass
+    Dominator Set Construction</b>
+    Natural Loop Construction
+    Loop Invariant Code Motion
+--  Natural Loop Construction
+--  Loop Invariant Code Motion
+    Module Verifier
+--  Dominator Set Construction
+--  Module Verifier
+  Bitcode Writer
+--Bitcode Writer
+Hello: __main
+Hello: puts
+Hello: main
+</pre></div>
+
+<p>Here we see that the <a href="#basiccode">Hello World</a> pass has killed the
+Dominator Set pass, even though it doesn't modify the code at all!  To fix this,
+we need to add the following <a
+href="#getAnalysisUsage"><tt>getAnalysisUsage</tt></a> method to our pass:</p>
+
+<div class="doc_code"><pre>
+    <i>// We don't modify the program, so we preserve all analyses</i>
+    <b>virtual void</b> getAnalysisUsage(AnalysisUsage &amp;AU) <b>const</b> {
+      AU.setPreservesAll();
+    }
+</pre></div>
+
+<p>Now when we run our pass, we get this output:</p>
+
+<div class="doc_code"><pre>
+$ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure &lt; hello.bc &gt; /dev/null
+Pass Arguments:  -gcse -hello -licm
+Module Pass Manager
+  Function Pass Manager
+    Dominator Set Construction
+    Immediate Dominators Construction
+    Global Common Subexpression Elimination
+--  Immediate Dominators Construction
+--  Global Common Subexpression Elimination
+    Hello World Pass
+--  Hello World Pass
+    Natural Loop Construction
+    Loop Invariant Code Motion
+--  Loop Invariant Code Motion
+--  Natural Loop Construction
+    Module Verifier
+--  Dominator Set Construction
+--  Module Verifier
+  Bitcode Writer
+--Bitcode Writer
+Hello: __main
+Hello: puts
+Hello: main
+</pre></div>
+
+<p>Which shows that we don't accidentally invalidate dominator information
+anymore, and therefore do not have to compute it twice.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="releaseMemory">The <tt>releaseMemory</tt> method</a>
+</div>
+
+<div class="doc_text">
+
+<div class="doc_code"><pre>
+  <b>virtual void</b> releaseMemory();
+</pre></div>
+
+<p>The <tt>PassManager</tt> automatically determines when to compute analysis
+results, and how long to keep them around for.  Because the lifetime of the pass
+object itself is effectively the entire duration of the compilation process, we
+need some way to free analysis results when they are no longer useful.  The
+<tt>releaseMemory</tt> virtual method is the way to do this.</p>
+
+<p>If you are writing an analysis or any other pass that retains a significant
+amount of state (for use by another pass which "requires" your pass and uses the
+<a href="#getAnalysis">getAnalysis</a> method) you should implement
+<tt>releaseMemory</tt> to, well, release the memory allocated to maintain this
+internal state.  This method is called after the <tt>run*</tt> method for the
+class, before the next call of <tt>run*</tt> in your pass.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="registering">Registering dynamically loaded passes</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p><i>Size matters</i> when constructing production quality tools using llvm, 
+both for the purposes of distribution, and for regulating the resident code size
+when running on the target system. Therefore, it becomes desirable to
+selectively use some passes, while omitting others and maintain the flexibility
+to change configurations later on. You want to be able to do all this, and,
+provide feedback to the user. This is where pass registration comes into
+play.</p>
+
+<p>The fundamental mechanisms for pass registration are the
+<tt>MachinePassRegistry</tt> class and subclasses of
+<tt>MachinePassRegistryNode</tt>.</p>
+
+<p>An instance of <tt>MachinePassRegistry</tt> is used to maintain a list of
+<tt>MachinePassRegistryNode</tt> objects.  This instance maintains the list and
+communicates additions and deletions to the command line interface.</p>
+
+<p>An instance of <tt>MachinePassRegistryNode</tt> subclass is used to maintain
+information provided about a particular pass.  This information includes the
+command line name, the command help string and the address of the function used
+to create an instance of the pass.  A global static constructor of one of these
+instances <i>registers</i> with a corresponding <tt>MachinePassRegistry</tt>,
+the static destructor <i>unregisters</i>. Thus a pass that is statically linked
+in the tool will be registered at start up. A dynamically loaded pass will
+register on load and unregister at unload.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection">
+  <a name="registering_existing">Using existing registries</a>
+</div>
+
+<div class="doc_text">
+
+<p>There are predefined registries to track instruction scheduling
+(<tt>RegisterScheduler</tt>) and register allocation (<tt>RegisterRegAlloc</tt>)
+machine passes.  Here we will describe how to <i>register</i> a register
+allocator machine pass.</p>
+
+<p>Implement your register allocator machine pass.  In your register allocator
+.cpp file add the following include;</p>
+
+<div class="doc_code"><pre>
+  #include "llvm/CodeGen/RegAllocRegistry.h"
+</pre></div>
+
+<p>Also in your register allocator .cpp file, define a creator function in the
+form; </p>
+
+<div class="doc_code"><pre>
+  FunctionPass *createMyRegisterAllocator() {
+    return new MyRegisterAllocator();
+  }
+</pre></div>
+
+<p>Note that the signature of this function should match the type of
+<tt>RegisterRegAlloc::FunctionPassCtor</tt>.  In the same file add the
+"installing" declaration, in the form;</p>
+
+<div class="doc_code"><pre>
+  static RegisterRegAlloc myRegAlloc("myregalloc",
+    "  my register allocator help string",
+    createMyRegisterAllocator);
+</pre></div>
+
+<p>Note the two spaces prior to the help string produces a tidy result on the
+-help query.</p>
+
+<div class="doc_code"><pre>
+$ llc -help
+  ...
+  -regalloc                    - Register allocator to use (default=linearscan)
+    =linearscan                -   linear scan register allocator
+    =local                     -   local register allocator
+    =simple                    -   simple register allocator
+    =myregalloc                -   my register allocator help string
+  ...
+</pre></div>
+
+<p>And that's it.  The user is now free to use <tt>-regalloc=myregalloc</tt> as
+an option.  Registering instruction schedulers is similar except use the
+<tt>RegisterScheduler</tt> class.  Note that the
+<tt>RegisterScheduler::FunctionPassCtor</tt> is significantly different from
+<tt>RegisterRegAlloc::FunctionPassCtor</tt>.</p>
+
+<p>To force the load/linking of your register allocator into the llc/lli tools,
+add your creator function's global declaration to "Passes.h" and add a "pseudo"
+call line to <tt>llvm/Codegen/LinkAllCodegenComponents.h</tt>.</p>
+
+</div>
+
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsection">
+  <a name="registering_new">Creating new registries</a>
+</div>
+
+<div class="doc_text">
+
+<p>The easiest way to get started is to clone one of the existing registries; we
+recommend <tt>llvm/CodeGen/RegAllocRegistry.h</tt>.  The key things to modify
+are the class name and the <tt>FunctionPassCtor</tt> type.</p>
+
+<p>Then you need to declare the registry.  Example: if your pass registry is
+<tt>RegisterMyPasses</tt> then define;</p>
+
+<div class="doc_code"><pre>
+MachinePassRegistry RegisterMyPasses::Registry;
+</pre></div>
+
+<p>And finally, declare the command line option for your passes.  Example:</p> 
+
+<div class="doc_code"><pre>
+  cl::opt&lt;RegisterMyPasses::FunctionPassCtor, false,
+          RegisterPassParser&lt;RegisterMyPasses&gt; &gt;
+  MyPassOpt("mypass",
+            cl::init(&amp;createDefaultMyPass),
+            cl::desc("my pass option help")); 
+</pre></div>
+
+<p>Here the command option is "mypass", with createDefaultMyPass as the default
+creator.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="debughints">Using GDB with dynamically loaded passes</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Unfortunately, using GDB with dynamically loaded passes is not as easy as it
+should be.  First of all, you can't set a breakpoint in a shared object that has
+not been loaded yet, and second of all there are problems with inlined functions
+in shared objects.  Here are some suggestions to debugging your pass with
+GDB.</p>
+
+<p>For sake of discussion, I'm going to assume that you are debugging a
+transformation invoked by <tt>opt</tt>, although nothing described here depends
+on that.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="breakpoint">Setting a breakpoint in your pass</a>
+</div>
+
+<div class="doc_text">
+
+<p>First thing you do is start <tt>gdb</tt> on the <tt>opt</tt> process:</p>
+
+<div class="doc_code"><pre>
+$ <b>gdb opt</b>
+GNU gdb 5.0
+Copyright 2000 Free Software Foundation, Inc.
+GDB is free software, covered by the GNU General Public License, and you are
+welcome to change it and/or distribute copies of it under certain conditions.
+Type "show copying" to see the conditions.
+There is absolutely no warranty for GDB.  Type "show warranty" for details.
+This GDB was configured as "sparc-sun-solaris2.6"...
+(gdb)
+</pre></div>
+
+<p>Note that <tt>opt</tt> has a lot of debugging information in it, so it takes
+time to load.  Be patient.  Since we cannot set a breakpoint in our pass yet
+(the shared object isn't loaded until runtime), we must execute the process, and
+have it stop before it invokes our pass, but after it has loaded the shared
+object.  The most foolproof way of doing this is to set a breakpoint in
+<tt>PassManager::run</tt> and then run the process with the arguments you
+want:</p>
+
+<div class="doc_code"><pre>
+(gdb) <b>break llvm::PassManager::run</b>
+Breakpoint 1 at 0x2413bc: file Pass.cpp, line 70.
+(gdb) <b>run test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption]</b>
+Starting program: opt test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption]
+Breakpoint 1, PassManager::run (this=0xffbef174, M=@0x70b298) at Pass.cpp:70
+70      bool PassManager::run(Module &amp;M) { return PM-&gt;run(M); }
+(gdb)
+</pre></div>
+
+<p>Once the <tt>opt</tt> stops in the <tt>PassManager::run</tt> method you are
+now free to set breakpoints in your pass so that you can trace through execution
+or do other standard debugging stuff.</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="debugmisc">Miscellaneous Problems</a>
+</div>
+
+<div class="doc_text">
+
+<p>Once you have the basics down, there are a couple of problems that GDB has,
+some with solutions, some without.</p>
+
+<ul>
+<li>Inline functions have bogus stack information.  In general, GDB does a
+pretty good job getting stack traces and stepping through inline functions.
+When a pass is dynamically loaded however, it somehow completely loses this
+capability.  The only solution I know of is to de-inline a function (move it
+from the body of a class to a .cpp file).</li>
+
+<li>Restarting the program breaks breakpoints.  After following the information
+above, you have succeeded in getting some breakpoints planted in your pass.  Nex
+thing you know, you restart the program (i.e., you type '<tt>run</tt>' again),
+and you start getting errors about breakpoints being unsettable.  The only way I
+have found to "fix" this problem is to <tt>delete</tt> the breakpoints that are
+already set in your pass, run the program, and re-set the breakpoints once
+execution stops in <tt>PassManager::run</tt>.</li>
+
+</ul>
+
+<p>Hopefully these tips will help with common case debugging situations.  If
+you'd like to contribute some tips of your own, just contact <a
+href="mailto:sabre@nondot.org">Chris</a>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="future">Future extensions planned</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Although the LLVM Pass Infrastructure is very capable as it stands, and does
+some nifty stuff, there are things we'd like to add in the future.  Here is
+where we are going:</p>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<div class="doc_subsubsection">
+  <a name="SMP">Multithreaded LLVM</a>
+</div>
+
+<div class="doc_text">
+
+<p>Multiple CPU machines are becoming more common and compilation can never be
+fast enough: obviously we should allow for a multithreaded compiler.  Because of
+the semantics defined for passes above (specifically they cannot maintain state
+across invocations of their <tt>run*</tt> methods), a nice clean way to
+implement a multithreaded compiler would be for the <tt>PassManager</tt> class
+to create multiple instances of each pass object, and allow the separate
+instances to be hacking on different parts of the program at the same time.</p>
+
+<p>This implementation would prevent each of the passes from having to implement
+multithreaded constructs, requiring only the LLVM core to have locking in a few
+places (for global resources).  Although this is a simple extension, we simply
+haven't had time (or multiprocessor machines, thus a reason) to implement this.
+Despite that, we have kept the LLVM passes SMP ready, and you should too.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+
+</body>
+</html>
diff --git a/final/docs/doxygen.cfg.in b/final/docs/doxygen.cfg.in
new file mode 100644
index 00000000000..45b8f42e5d5
--- /dev/null
+++ b/final/docs/doxygen.cfg.in
@@ -0,0 +1,1419 @@
+# Doxyfile 1.5.6
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options 
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file 
+# that follow. The default is UTF-8 which is also the encoding used for all 
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the 
+# iconv built into libc) for the transcoding. See 
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded 
+# by quotes) that should identify the project.
+
+PROJECT_NAME           = LLVM
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number. 
+# This could be handy for archiving the generated documentation or 
+# if some version control system is used.
+
+PROJECT_NUMBER         = @PACKAGE_VERSION@
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) 
+# base path where the generated documentation will be put. 
+# If a relative path is entered, it will be relative to the location 
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = @abs_top_builddir@/docs/doxygen
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 
+# 4096 sub-directories (in 2 levels) under the output directory of each output 
+# format and will distribute the generated files over these directories. 
+# Enabling this option can be useful when feeding doxygen a huge amount of 
+# source files, where putting all generated files in the same directory would 
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all 
+# documentation generated by doxygen is written. Doxygen will use this 
+# information to generate all constant output in the proper language. 
+# The default language is English, other supported languages are: 
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, 
+# Croatian, Czech, Danish, Dutch, Farsi, Finnish, French, German, Greek, 
+# Hungarian, Italian, Japanese, Japanese-en (Japanese with English messages), 
+# Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, Polish, 
+# Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish, 
+# and Ukrainian.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will 
+# include brief member descriptions after the members that are listed in 
+# the file and class documentation (similar to JavaDoc). 
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend 
+# the brief description of a member or function before the detailed description. 
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the 
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator 
+# that is used to form the text in various listings. Each string 
+# in this list, if found as the leading text of the brief description, will be 
+# stripped from the text and the result after processing the whole list, is 
+# used as the annotated text. Otherwise, the brief description is used as-is. 
+# If left blank, the following values are used ("$name" is automatically 
+# replaced with the name of the entity): "The $name class" "The $name widget" 
+# "The $name file" "is" "provides" "specifies" "contains" 
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF       = 
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then 
+# Doxygen will generate a detailed section even if there is only a brief 
+# description.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all 
+# inherited members of a class in the documentation of that class as if those 
+# members were ordinary class members. Constructors, destructors and assignment 
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full 
+# path before files name in the file list and in the header files. If set 
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES        = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag 
+# can be used to strip a user-defined part of the path. Stripping is 
+# only done if one of the specified strings matches the left-hand part of 
+# the path. The tag can be used to show relative paths in the file list. 
+# If left blank the directory from which doxygen is run is used as the 
+# path to strip.
+
+STRIP_FROM_PATH        = ../..
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of 
+# the path mentioned in the documentation of a class, which tells 
+# the reader which header file to include in order to use a class. 
+# If left blank only the name of the header file containing the class 
+# definition is used. Otherwise one should specify the include paths that 
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH    = 
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter 
+# (but less readable) file names. This can be useful is your file systems 
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen 
+# will interpret the first line (until the first dot) of a JavaDoc-style 
+# comment as the brief description. If set to NO, the JavaDoc 
+# comments will behave just like regular Qt-style comments 
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF      = NO
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will 
+# interpret the first line (until the first dot) of a Qt-style 
+# comment as the brief description. If set to NO, the comments 
+# will behave just like regular Qt-style comments (thus requiring 
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen 
+# treat a multi-line C++ special comment block (i.e. a block of //! or /// 
+# comments) as a brief description. This used to be the default behaviour. 
+# The new default is to treat a multi-line C++ comment block as a detailed 
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the DETAILS_AT_TOP tag is set to YES then Doxygen 
+# will output the detailed description near the top, like JavaDoc.
+# If set to NO, the detailed description appears after the member 
+# documentation.
+
+DETAILS_AT_TOP         = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented 
+# member inherits the documentation from any documented member that it 
+# re-implements.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce 
+# a new page for each member. If set to NO, the documentation of a member will 
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab. 
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE               = 2
+
+# This tag can be used to specify a number of aliases that acts 
+# as commands in the documentation. An alias has the form "name=value". 
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to 
+# put the command \sideeffect (or @sideeffect) in the documentation, which 
+# will result in a user-defined paragraph with heading "Side Effects:". 
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                = 
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C 
+# sources only. Doxygen will then generate output that is more tailored for C. 
+# For instance, some of the names that are used will be different. The list 
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C  = NO
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java 
+# sources only. Doxygen will then generate output that is more tailored for 
+# Java. For instance, namespaces will be presented as packages, qualified 
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran 
+# sources only. Doxygen will then generate output that is more tailored for 
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL 
+# sources. Doxygen will then generate output that is tailored for 
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want 
+# to include (a tag file for) the STL sources as input, then you should 
+# set this tag to YES in order to let doxygen match functions declarations and 
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. 
+# func(std::string) {}). This also make the inheritance and collaboration 
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. 
+# Doxygen will parse them like normal C++ but will assume all classes use public 
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate getter 
+# and setter methods for a property. Setting this option to YES (the default) 
+# will make doxygen to replace the get and set methods by a property in the 
+# documentation. This will only work if the methods are indeed getting or 
+# setting a simple type. If this is not the case, or you want to show the 
+# methods anyway, you should set this option to NO.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC 
+# tag is set to YES, then doxygen will reuse the documentation of the first 
+# member in the group (if any) for the other members of the group. By default 
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of 
+# the same type (for instance a group of public functions) to be put as a 
+# subgroup of that type (e.g. under the Public Functions section). Set it to 
+# NO to prevent subgrouping. Alternatively, this can be done per class using 
+# the \nosubgrouping command.
+
+SUBGROUPING            = YES
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum 
+# is documented as struct, union, or enum with the name of the typedef. So 
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct 
+# with name TypeT. When disabled the typedef will appear as a member of a file, 
+# namespace, or class. And the struct will be named TypeS. This can typically 
+# be useful for C code in case the coding convention dictates that all compound 
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in 
+# documentation are documented, even if no documentation was available. 
+# Private class members and static file members will be hidden unless 
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL            = YES
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class 
+# will be included in the documentation.
+
+EXTRACT_PRIVATE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file 
+# will be included in the documentation.
+
+EXTRACT_STATIC         = YES
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) 
+# defined locally in source files will be included in the documentation. 
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. When set to YES local 
+# methods, which are defined in the implementation section but not in 
+# the interface are included in the documentation. 
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be 
+# extracted and appear in the documentation as a namespace called 
+# 'anonymous_namespace{file}', where file will be replaced with the base 
+# name of the file that contains the anonymous namespace. By default 
+# anonymous namespace are hidden.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all 
+# undocumented members of documented classes, files or namespaces. 
+# If set to NO (the default) these members will be included in the 
+# various overviews, but no documentation section is generated. 
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all 
+# undocumented classes that are normally visible in the class hierarchy. 
+# If set to NO (the default) these classes will be included in the various 
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all 
+# friend (class|struct|union) declarations. 
+# If set to NO (the default) these declarations will be included in the 
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any 
+# documentation blocks found inside the body of a function. 
+# If set to NO (the default) these blocks will be appended to the 
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation 
+# that is typed after a \internal command is included. If the tag is set 
+# to NO (the default) then the documentation will be excluded. 
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate 
+# file names in lower-case letters. If set to YES upper-case letters are also 
+# allowed. This is useful if you have classes or files whose names only differ 
+# in case and if your file system supports case sensitive file names. Windows 
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES       = YES
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen 
+# will show members with their full class and namespace scopes in the 
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen 
+# will put a list of the files that are included by a file in the documentation 
+# of that file.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] 
+# is inserted in the documentation for inline members.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen 
+# will sort the (detailed) documentation of file and class members 
+# alphabetically by member name. If set to NO the members will appear in 
+# declaration order.
+
+SORT_MEMBER_DOCS       = YES
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the 
+# brief documentation of file, namespace and class members alphabetically 
+# by member name. If set to NO (the default) the members will appear in 
+# declaration order.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the 
+# hierarchy of group names into alphabetical order. If set to NO (the default) 
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be 
+# sorted by fully-qualified names, including namespaces. If set to 
+# NO (the default), the class list will be sorted only by class name, 
+# not including the namespace part. 
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the 
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or 
+# disable (NO) the todo list. This list is created by putting \todo 
+# commands in the documentation.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or 
+# disable (NO) the test list. This list is created by putting \test 
+# commands in the documentation.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or 
+# disable (NO) the bug list. This list is created by putting \bug 
+# commands in the documentation.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or 
+# disable (NO) the deprecated list. This list is created by putting 
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional 
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       = 
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines 
+# the initial value of a variable or define consists of for it to appear in 
+# the documentation. If the initializer consists of more lines than specified 
+# here it will be hidden. Use a value of 0 to hide initializers completely. 
+# The appearance of the initializer of individual variables and defines in the 
+# documentation can be controlled using \showinitializer or \hideinitializer 
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated 
+# at the bottom of the documentation of classes and structs. If set to YES the 
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES        = YES
+
+# If the sources in your project are distributed over multiple directories 
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy 
+# in the documentation. The default is NO.
+
+SHOW_DIRECTORIES       = YES
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the 
+# Folder Tree View (if specified). The default is YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the 
+# Namespaces page.  This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that 
+# doxygen should invoke to get the current version for each file (typically from 
+# the version control system). Doxygen will invoke the program by executing (via 
+# popen()) the command <command> <input-file>, where <command> is the value of 
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file 
+# provided by doxygen. Whatever the program writes to standard output 
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER    = 
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated 
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET                  = NO
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are 
+# generated by doxygen. Possible values are YES and NO. If left blank 
+# NO is used.
+
+WARNINGS               = NO
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings 
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will 
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED   = NO
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for 
+# potential errors in the documentation, such as not documenting some 
+# parameters in a documented function, or documenting parameters that 
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be abled to get warnings for 
+# functions that are documented, but have no documentation for their parameters 
+# or return value. If set to NO (the default) doxygen will only warn about 
+# wrong or incomplete parameter documentation, but not about the absence of 
+# documentation.
+
+WARN_NO_PARAMDOC       = NO
+
+# The WARN_FORMAT tag determines the format of the warning messages that 
+# doxygen can produce. The string should contain the $file, $line, and $text 
+# tags, which will be replaced by the file and line number from which the 
+# warning originated and the warning text. Optionally the format may contain 
+# $version, which will be replaced by the version of the file (if it could 
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT            = 
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning 
+# and error messages should be written. If left blank the output is written 
+# to stderr.
+
+WARN_LOGFILE           = 
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain 
+# documented source files. You may enter file names like "myfile.cpp" or 
+# directories like "/usr/src/myproject". Separate the files or directories 
+# with spaces.
+
+INPUT                  = @abs_top_srcdir@/include \
+                         @abs_top_srcdir@/lib \
+                         @abs_top_srcdir@/docs/doxygen.intro
+
+# This tag can be used to specify the character encoding of the source files 
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is 
+# also the default input encoding. Doxygen uses libiconv (or the iconv built 
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for 
+# the list of possible encodings.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the 
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
+# and *.h) to filter out the source-files in the directories. If left 
+# blank the following patterns are tested: 
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx 
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          = 
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories 
+# should be searched for input files as well. Possible values are YES and NO. 
+# If left blank NO is used.
+
+RECURSIVE              = YES
+
+# The EXCLUDE tag can be used to specify files and/or directories that should 
+# excluded from the INPUT source files. This way you can easily exclude a 
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                = 
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or 
+# directories that are symbolic links (a Unix filesystem feature) are excluded 
+# from the input.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the 
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude 
+# certain files from those directories. Note that the wildcards are matched 
+# against the file with absolute path, so to exclude all test directories 
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       = 
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names 
+# (namespaces, classes, functions, etc.) that should be excluded from the 
+# output. The symbol name can be a fully qualified name, a word, or if the 
+# wildcard * is used, a substring. Examples: ANamespace, AClass, 
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        = 
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or 
+# directories that contain example code fragments that are included (see 
+# the \include command).
+
+EXAMPLE_PATH           = @abs_top_srcdir@/examples
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the 
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
+# and *.h) to filter out the source-files in the directories. If left 
+# blank all files are included.
+
+EXAMPLE_PATTERNS       = 
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be 
+# searched for input files to be used with the \include or \dontinclude 
+# commands irrespective of the value of the RECURSIVE tag. 
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE      = YES
+
+# The IMAGE_PATH tag can be used to specify one or more files or 
+# directories that contain image that are included in the documentation (see 
+# the \image command).
+
+IMAGE_PATH             = @abs_top_srcdir@/docs/img
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should 
+# invoke to filter for each input file. Doxygen will invoke the filter program 
+# by executing (via popen()) the command <filter> <input-file>, where <filter> 
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an 
+# input file. Doxygen will then use the output that the filter program writes 
+# to standard output.  If FILTER_PATTERNS is specified, this tag will be 
+# ignored.
+
+INPUT_FILTER           = 
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern 
+# basis.  Doxygen will compare the file name with each pattern and apply the 
+# filter if there is a match.  The filters are a list of the form: 
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further 
+# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER 
+# is applied to all files.
+
+FILTER_PATTERNS        = 
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using 
+# INPUT_FILTER) will be used to filter the input files when producing source 
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES    = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will 
+# be generated. Documented entities will be cross-referenced with these sources. 
+# Note: To get rid of all source code in the generated output, make sure also 
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER         = YES
+
+# Setting the INLINE_SOURCES tag to YES will include the body 
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct 
+# doxygen to hide any special comment blocks from generated source code 
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS    = NO
+
+# If the REFERENCED_BY_RELATION tag is set to YES 
+# then for each documented function all documented 
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = YES
+
+# If the REFERENCES_RELATION tag is set to YES 
+# then for each documented function all documented entities 
+# called/used by that function will be listed.
+
+REFERENCES_RELATION    = YES
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.  Otherwise they will link to the documentstion.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code 
+# will point to the HTML generated by the htags(1) tool instead of doxygen 
+# built-in source browser. The htags tool is part of GNU's global source 
+# tagging system (see http://www.gnu.org/software/global/global.html). You 
+# will need version 4.8.6 or higher.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen 
+# will generate a verbatim copy of the header file for each class for 
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index 
+# of all compounds will be generated. Enable this if the project 
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX     = YES
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then 
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns 
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX    = 4
+
+# In case all classes in a project start with a common prefix, all 
+# classes will be put under the same header in the alphabetical index. 
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that 
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX          = llvm::
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will 
+# generate HTML output.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for 
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank 
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for 
+# each generated HTML page. If it is left blank doxygen will generate a 
+# standard header.
+
+HTML_HEADER            = @abs_top_srcdir@/docs/doxygen.header
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for 
+# each generated HTML page. If it is left blank doxygen will generate a 
+# standard footer.
+
+HTML_FOOTER            = @abs_top_srcdir@/docs/doxygen.footer
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading 
+# style sheet that is used by each HTML page. It can be used to 
+# fine-tune the look of the HTML output. If the tag is left blank doxygen 
+# will generate a default style sheet. Note that doxygen will try to copy 
+# the style sheet file to the HTML output directory, so don't put your own 
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET        = @abs_top_srcdir@/docs/doxygen.css
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, 
+# files or namespaces will be aligned in HTML using tables. If set to 
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS     = YES
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files 
+# will be generated that can be used as input for tools like the 
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) 
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP      = NO
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files 
+# will be generated that can be used as input for Apple's Xcode 3 
+# integrated development environment, introduced with OSX 10.5 (Leopard). 
+# To create a documentation set, doxygen will generate a Makefile in the 
+# HTML output directory. Running make will produce the docset in that 
+# directory and running "make install" will install the docset in 
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find 
+# it at startup.
+
+GENERATE_DOCSET        = NO
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the 
+# feed. A documentation feed provides an umbrella under which multiple 
+# documentation sets from a single provider (such as a company or product suite) 
+# can be grouped.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that 
+# should uniquely identify the documentation set bundle. This should be a 
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen 
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML 
+# documentation will contain sections that can be hidden and shown after the 
+# page has loaded. For this to work a browser that supports 
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox 
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can 
+# be used to specify the file name of the resulting .chm file. You 
+# can add a path in front of the file if the result should not be 
+# written to the html output directory.
+
+CHM_FILE               = 
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can 
+# be used to specify the location (absolute path including file name) of 
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run 
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION           = 
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag 
+# controls if a separate .chi index file is generated (YES) or that 
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI           = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+
+CHM_INDEX_ENCODING     = 
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag 
+# controls whether a binary table of contents is generated (YES) or a 
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members 
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND             = NO
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at 
+# top of each HTML page. The value NO (the default) enables the index and 
+# the value YES disables it.
+
+DISABLE_INDEX          = NO
+
+# This tag can be used to set the number of enum values (range [1..20]) 
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to FRAME, a side panel will be generated
+# containing a tree-like index structure (just like the one that 
+# is generated for HTML Help). For this to work a browser that supports 
+# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, 
+# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are 
+# probably better off using the HTML help feature. Other possible values 
+# for this tag are: HIERARCHIES, which will generate the Groups, Directories,
+# and Class Hiererachy pages using a tree view instead of an ordered list;
+# ALL, which combines the behavior of FRAME and HIERARCHIES; and NONE, which
+# disables this behavior completely. For backwards compatibility with previous
+# releases of Doxygen, the values YES and NO are equivalent to FRAME and NONE
+# respectively.
+
+GENERATE_TREEVIEW      = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be 
+# used to set the initial width (in pixels) of the frame in which the tree 
+# is shown.
+
+TREEVIEW_WIDTH         = 250
+
+# Use this tag to change the font size of Latex formulas included 
+# as images in the HTML documentation. The default is 10. Note that 
+# when you change the font size after a successful doxygen run you need 
+# to manually remove any form_*.png images from the HTML output directory 
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE       = 10
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will 
+# generate Latex output.
+
+GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT           = 
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be 
+# invoked. If left blank `latex' will be used as the default command name.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to 
+# generate index for LaTeX. If left blank `makeindex' will be used as the 
+# default command name.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact 
+# LaTeX documents. This may be useful for small projects and may help to 
+# save some trees in general.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used 
+# by the printer. Possible values are: a4, a4wide, letter, legal and 
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE             = letter
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX 
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES         = 
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for 
+# the generated latex document. The header should contain everything until 
+# the first chapter. If it is left blank doxygen will generate a 
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER           = 
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated 
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will 
+# contain links (just like the HTML output) instead of page references 
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS         = NO
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of 
+# plain latex in the generated Makefile. Set this option to YES to get a 
+# higher quality PDF documentation.
+
+USE_PDFLATEX           = NO
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. 
+# command to the generated LaTeX files. This will instruct LaTeX to keep 
+# running if errors occur, instead of asking the user for help. 
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE        = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not 
+# include the index chapters (such as File Index, Compound Index, etc.) 
+# in the output.
+
+LATEX_HIDE_INDICES     = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output 
+# The RTF output is optimized for Word 97 and may not look very pretty with 
+# other RTF readers or editors.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT             = 
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact 
+# RTF documents. This may be useful for small projects and may help to 
+# save some trees in general.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated 
+# will contain hyperlink fields. The RTF file will 
+# contain links (just like the HTML output) instead of page references. 
+# This makes the output suitable for online browsing using WORD or other 
+# programs which support those fields. 
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's 
+# config file, i.e. a series of assignments. You only have to provide 
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE    = 
+
+# Set optional variables used in the generation of an rtf document. 
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE    = 
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will 
+# generate man pages
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT             = 
+
+# The MAN_EXTENSION tag determines the extension that is added to 
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION          = 
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output, 
+# then it will generate one additional man file for each entity 
+# documented in the real man page(s). These additional files 
+# only source the real man page, but without them the man command 
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will 
+# generate an XML file that captures the structure of 
+# the code including all documentation.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put. 
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema, 
+# which can be used by a validating XML parser to check the 
+# syntax of the XML files.
+
+XML_SCHEMA             = 
+
+# The XML_DTD tag can be used to specify an XML DTD, 
+# which can be used by a validating XML parser to check the 
+# syntax of the XML files.
+
+XML_DTD                = 
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will 
+# dump the program listings (including syntax highlighting 
+# and cross-referencing information) to the XML output. Note that 
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will 
+# generate an AutoGen Definitions (see autogen.sf.net) file 
+# that captures the structure of the code including all 
+# documentation. Note that this feature is still experimental 
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will 
+# generate a Perl module file that captures the structure of 
+# the code including all documentation. Note that this 
+# feature is still experimental and incomplete at the 
+# moment.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate 
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able 
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be 
+# nicely formatted so it can be parsed by a human reader.  This is useful 
+# if you want to understand what is going on.  On the other hand, if this 
+# tag is set to NO the size of the Perl module output will be much smaller 
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file 
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. 
+# This is useful so different doxyrules.make files included by the same 
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor   
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will 
+# evaluate all C-preprocessor directives found in the sources and include 
+# files.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro 
+# names in the source code. If set to NO (the default) only conditional 
+# compilation will be performed. Macro expansion can be done in a controlled 
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES 
+# then the macro expansion is limited to the macros specified with the 
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files 
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that 
+# contain include files that are not input files but should be processed by 
+# the preprocessor.
+
+INCLUDE_PATH           = ../include
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard 
+# patterns (like *.h and *.hpp) to filter out the header-files in the 
+# directories. If left blank, the patterns specified with FILE_PATTERNS will 
+# be used.
+
+INCLUDE_FILE_PATTERNS  = 
+
+# The PREDEFINED tag can be used to specify one or more macro names that 
+# are defined before the preprocessor is started (similar to the -D option of 
+# gcc). The argument of the tag is a list of macros of the form: name 
+# or name=definition (no spaces). If the definition and the = are 
+# omitted =1 is assumed. To prevent a macro definition from being 
+# undefined via #undef or recursively expanded use the := operator 
+# instead of the = operator.
+
+PREDEFINED             = 
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then 
+# this tag can be used to specify a list of macro names that should be expanded. 
+# The macro definition that is found in the sources will be used. 
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      = 
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then 
+# doxygen's preprocessor will remove all function-like macros that are alone 
+# on a line, have an all uppercase name, and do not end with a semicolon. Such 
+# function macros are typically used for boiler-plate code, and will confuse 
+# the parser if not removed.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references   
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles. 
+# Optionally an initial location of the external documentation 
+# can be added for each tagfile. The format of a tag file without 
+# this location is as follows: 
+#   TAGFILES = file1 file2 ... 
+# Adding location for the tag files is done as follows: 
+#   TAGFILES = file1=loc1 "file2 = loc2" ... 
+# where "loc1" and "loc2" can be relative or absolute paths or 
+# URLs. If a location is present for each tag, the installdox tool 
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen 
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES               = 
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create 
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE       = 
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed 
+# in the class index. If set to NO only the inherited external classes 
+# will be listed.
+
+ALLEXTERNALS           = YES
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed 
+# in the modules index. If set to NO, only the current project's groups will 
+# be listed.
+
+EXTERNAL_GROUPS        = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script 
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH              = 
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool   
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will 
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base 
+# or super classes. Setting the tag to NO turns the diagrams off. Note that 
+# this option is superseded by the HAVE_DOT option below. This is only a 
+# fallback. It is recommended to install and use dot, since it yields more 
+# powerful graphs.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc 
+# command. Doxygen will then run the mscgen tool (see 
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the 
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where 
+# the mscgen tool resides. If left empty the tool is assumed to be found in the 
+# default search path.
+
+MSCGEN_PATH            = 
+
+# If set to YES, the inheritance and collaboration graphs will hide 
+# inheritance and usage relations if the target is undocumented 
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS   = NO
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is 
+# available from the path. This tool is part of Graphviz, a graph visualization 
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section 
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT               = YES
+
+# By default doxygen will write a font called FreeSans.ttf to the output 
+# directory and reference it in all dot files that doxygen generates. This 
+# font does not include all possible unicode characters however, so when you need 
+# these (or just want a differently looking font) you can specify the font name 
+# using DOT_FONTNAME. You need need to make sure dot is able to find the font, 
+# which can be done by putting it in a standard location or by setting the 
+# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory 
+# containing the font.
+
+DOT_FONTNAME           = FreeSans
+
+# By default doxygen will tell dot to use the output directory to look for the 
+# FreeSans.ttf font (which doxygen will put there itself). If you specify a 
+# different font using DOT_FONTNAME you can set the path where dot 
+# can find it using this tag.
+
+DOT_FONTPATH           = 
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graph for each documented class showing the direct and 
+# indirect inheritance relations. Setting this tag to YES will force the 
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graph for each documented class showing the direct and 
+# indirect implementation dependencies (inheritance, containment, and 
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen 
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and 
+# collaboration diagrams in a style similar to the OMG's Unified Modeling 
+# Language.
+
+UML_LOOK               = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the 
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS     = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT 
+# tags are set to YES then doxygen will generate a graph for each documented 
+# file showing the direct and indirect include dependencies of the file with 
+# other documented files.
+
+INCLUDE_GRAPH          = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and 
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each 
+# documented header file showing the documented files that directly or 
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then 
+# doxygen will generate a call dependency graph for every global function 
+# or class method. Note that enabling this option will significantly increase 
+# the time of a run. So in most cases it will be better to enable call graphs 
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then 
+# doxygen will generate a caller dependency graph for every global function 
+# or class method. Note that enabling this option will significantly increase 
+# the time of a run. So in most cases it will be better to enable caller 
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen 
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES 
+# then doxygen will show the dependencies a directory has on other directories 
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images 
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT       = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be 
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH               = @DOT@
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that 
+# contain dot files that are included in the documentation (see the 
+# \dotfile command).
+
+DOTFILE_DIRS           = 
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of 
+# nodes that will be shown in the graph. If the number of nodes in a graph 
+# becomes larger than this value, doxygen will truncate the graph, which is 
+# visualized by representing a node as a red box. Note that doxygen if the 
+# number of direct children of the root node in a graph is already larger than 
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note 
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the 
+# graphs generated by dot. A depth value of 3 means that only nodes reachable 
+# from the root by following a path via at most 3 edges will be shown. Nodes 
+# that lay further from the root node will be omitted. Note that setting this 
+# option to 1 or 2 may greatly reduce the computation time needed for large 
+# code bases. Also note that the size of a graph can be further restricted by 
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent 
+# background. This is enabled by default, which results in a transparent 
+# background. Warning: Depending on the platform used, enabling this option 
+# may lead to badly anti-aliased labels on the edges of a graph (i.e. they 
+# become hard to read).
+
+DOT_TRANSPARENT        = YES
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output 
+# files in one run (i.e. multiple -o and -T options on the command line). This 
+# makes dot run faster, but since only newer versions of dot (>1.8.10) 
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will 
+# generate a legend page explaining the meaning of the various boxes and 
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will 
+# remove the intermediate dot files that are used to generate 
+# the various graphs.
+
+DOT_CLEANUP            = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to the search engine   
+#---------------------------------------------------------------------------
+
+# The SEARCHENGINE tag specifies whether or not a search engine should be 
+# used. If set to NO the values of all tags below this one will be ignored.
+
+SEARCHENGINE           = NO
diff --git a/final/docs/doxygen.css b/final/docs/doxygen.css
new file mode 100644
index 00000000000..83b049b5f1b
--- /dev/null
+++ b/final/docs/doxygen.css
@@ -0,0 +1,378 @@
+BODY,H1,H2,H3,H4,H5,H6,P,CENTER,TD,TH,UL,DL,DIV {
+	font-family: Verdana,Geneva,Arial,Helvetica,sans-serif;
+}
+BODY,TD {
+ font-size: 90%;
+}
+H1 {
+ text-align: center;
+ font-size: 140%;
+ font-weight: bold;
+}
+H2 {
+ font-size: 120%;
+ font-style: italic;
+}
+H3 {
+ font-size: 100%;
+}
+CAPTION { font-weight: bold }
+DIV.qindex {
+	width: 100%;
+	background-color: #eeeeff;
+	border: 1px solid #b0b0b0;
+	text-align: center;
+	margin: 2px;
+	padding: 2px;
+	line-height: 140%;
+}
+DIV.nav {
+	width: 100%;
+	background-color: #eeeeff;
+	border: 1px solid #b0b0b0;
+	text-align: center;
+	margin: 2px;
+	padding: 2px;
+	line-height: 140%;
+}
+DIV.navtab {
+       background-color: #eeeeff;
+       border: 1px solid #b0b0b0;
+       text-align: center;
+       margin: 2px;
+       margin-right: 15px;
+       padding: 2px;
+}
+TD.navtab {
+       font-size: 70%;
+}
+A.qindex {
+       text-decoration: none;
+       font-weight: bold;
+       color: #1A419D;
+}
+A.qindex:visited {
+       text-decoration: none;
+       font-weight: bold;
+       color: #1A419D
+}
+A.qindex:hover {
+	text-decoration: none;
+	background-color: #ddddff;
+}
+A.qindexHL {
+	text-decoration: none;
+	font-weight: bold;
+	background-color: #6666cc;
+	color: #ffffff;
+	border: 1px double #9295C2;
+}
+A.qindexHL:hover {
+	text-decoration: none;
+	background-color: #6666cc;
+	color: #ffffff;
+}
+A.qindexHL:visited { 
+ text-decoration: none; background-color: #6666cc; color: #ffffff }
+A.el { text-decoration: none; font-weight: bold }
+A.elRef { font-weight: bold }
+A.code:link { text-decoration: none; font-weight: normal; color: #0000FF}
+A.code:visited { text-decoration: none; font-weight: normal; color: #0000FF}
+A.codeRef:link { font-weight: normal; color: #0000FF}
+A.codeRef:visited { font-weight: normal; color: #0000FF}
+A:hover { text-decoration: none; background-color: #f2f2ff }
+DL.el { margin-left: -1cm }
+.fragment {
+       font-family: Fixed, monospace;
+       font-size: 95%;
+}
+PRE.fragment {
+	border: 1px solid #CCCCCC;
+	background-color: #f5f5f5;
+	margin-top: 4px;
+	margin-bottom: 4px;
+	margin-left: 2px;
+	margin-right: 8px;
+	padding-left: 6px;
+	padding-right: 6px;
+	padding-top: 4px;
+	padding-bottom: 4px;
+}
+DIV.ah { background-color: black; font-weight: bold; color: #ffffff; margin-bottom: 3px; margin-top: 3px }
+TD.md { background-color: #F4F4FB; font-weight: bold; }
+TD.mdPrefix {
+       background-color: #F4F4FB;
+       color: #606060;
+	font-size: 80%;
+}
+TD.mdname1 { background-color: #F4F4FB; font-weight: bold; color: #602020; }
+TD.mdname { background-color: #F4F4FB; font-weight: bold; color: #602020; width: 600px; }
+DIV.groupHeader {
+       margin-left: 16px;
+       margin-top: 12px;
+       margin-bottom: 6px;
+       font-weight: bold;
+}
+DIV.groupText { margin-left: 16px; font-style: italic; font-size: 90% }
+BODY {
+	background: white;
+	color: black;
+	margin-right: 20px;
+	margin-left: 20px;
+}
+TD.indexkey {
+	background-color: #eeeeff;
+	font-weight: bold;
+	padding-right  : 10px;
+	padding-top    : 2px;
+	padding-left   : 10px;
+	padding-bottom : 2px;
+	margin-left    : 0px;
+	margin-right   : 0px;
+	margin-top     : 2px;
+	margin-bottom  : 2px;
+	border: 1px solid #CCCCCC;
+}
+TD.indexvalue {
+	background-color: #eeeeff;
+	font-style: italic;
+	padding-right  : 10px;
+	padding-top    : 2px;
+	padding-left   : 10px;
+	padding-bottom : 2px;
+	margin-left    : 0px;
+	margin-right   : 0px;
+	margin-top     : 2px;
+	margin-bottom  : 2px;
+	border: 1px solid #CCCCCC;
+}
+TR.memlist {
+   background-color: #f0f0f0; 
+}
+P.formulaDsp { text-align: center; }
+IMG.formulaDsp { }
+IMG.formulaInl { vertical-align: middle; }
+SPAN.keyword       { color: #008000 }
+SPAN.keywordtype   { color: #604020 }
+SPAN.keywordflow   { color: #e08000 }
+SPAN.comment       { color: #800000 }
+SPAN.preprocessor  { color: #806020 }
+SPAN.stringliteral { color: #002080 }
+SPAN.charliteral   { color: #008080 }
+.mdTable {
+	border: 1px solid #868686;
+	background-color: #F4F4FB;
+}
+.mdRow {
+	padding: 8px 10px;
+}
+.mdescLeft {
+       padding: 0px 8px 4px 8px;
+	font-size: 80%;
+	font-style: italic;
+	background-color: #FAFAFA;
+	border-top: 1px none #E0E0E0;
+	border-right: 1px none #E0E0E0;
+	border-bottom: 1px none #E0E0E0;
+	border-left: 1px none #E0E0E0;
+	margin: 0px;
+}
+.mdescRight {
+       padding: 0px 8px 4px 8px;
+	font-size: 80%;
+	font-style: italic;
+	background-color: #FAFAFA;
+	border-top: 1px none #E0E0E0;
+	border-right: 1px none #E0E0E0;
+	border-bottom: 1px none #E0E0E0;
+	border-left: 1px none #E0E0E0;
+	margin: 0px;
+}
+.memItemLeft {
+	padding: 1px 0px 0px 8px;
+	margin: 4px;
+	border-top-width: 1px;
+	border-right-width: 1px;
+	border-bottom-width: 1px;
+	border-left-width: 1px;
+	border-top-color: #E0E0E0;
+	border-right-color: #E0E0E0;
+	border-bottom-color: #E0E0E0;
+	border-left-color: #E0E0E0;
+	border-top-style: solid;
+	border-right-style: none;
+	border-bottom-style: none;
+	border-left-style: none;
+	background-color: #FAFAFA;
+	font-size: 80%;
+}
+.memItemRight {
+	padding: 1px 8px 0px 8px;
+	margin: 4px;
+	border-top-width: 1px;
+	border-right-width: 1px;
+	border-bottom-width: 1px;
+	border-left-width: 1px;
+	border-top-color: #E0E0E0;
+	border-right-color: #E0E0E0;
+	border-bottom-color: #E0E0E0;
+	border-left-color: #E0E0E0;
+	border-top-style: solid;
+	border-right-style: none;
+	border-bottom-style: none;
+	border-left-style: none;
+	background-color: #FAFAFA;
+	font-size: 80%;
+}
+.memTemplItemLeft {
+	padding: 1px 0px 0px 8px;
+	margin: 4px;
+	border-top-width: 1px;
+	border-right-width: 1px;
+	border-bottom-width: 1px;
+	border-left-width: 1px;
+	border-top-color: #E0E0E0;
+	border-right-color: #E0E0E0;
+	border-bottom-color: #E0E0E0;
+	border-left-color: #E0E0E0;
+	border-top-style: none;
+	border-right-style: none;
+	border-bottom-style: none;
+	border-left-style: none;
+	background-color: #FAFAFA;
+	font-size: 80%;
+}
+.memTemplItemRight {
+	padding: 1px 8px 0px 8px;
+	margin: 4px;
+	border-top-width: 1px;
+	border-right-width: 1px;
+	border-bottom-width: 1px;
+	border-left-width: 1px;
+	border-top-color: #E0E0E0;
+	border-right-color: #E0E0E0;
+	border-bottom-color: #E0E0E0;
+	border-left-color: #E0E0E0;
+	border-top-style: none;
+	border-right-style: none;
+	border-bottom-style: none;
+	border-left-style: none;
+	background-color: #FAFAFA;
+	font-size: 80%;
+}
+.memTemplParams {
+	padding: 1px 0px 0px 8px;
+	margin: 4px;
+	border-top-width: 1px;
+	border-right-width: 1px;
+	border-bottom-width: 1px;
+	border-left-width: 1px;
+	border-top-color: #E0E0E0;
+	border-right-color: #E0E0E0;
+	border-bottom-color: #E0E0E0;
+	border-left-color: #E0E0E0;
+	border-top-style: solid;
+	border-right-style: none;
+	border-bottom-style: none;
+	border-left-style: none;
+       color: #606060;
+	background-color: #FAFAFA;
+	font-size: 80%;
+}
+.search     { color: #003399;
+              font-weight: bold;
+}
+FORM.search {
+              margin-bottom: 0px;
+              margin-top: 0px;
+}
+INPUT.search { font-size: 75%;
+               color: #000080;
+               font-weight: normal;
+               background-color: #eeeeff;
+}
+TD.tiny      { font-size: 75%;
+}
+a {
+	color: #252E78;
+}
+a:visited {
+	color: #3D2185;
+}
+.dirtab { padding: 4px;
+          border-collapse: collapse;
+          border: 1px solid #b0b0b0;
+}
+TH.dirtab { background: #eeeeff;
+            font-weight: bold;
+}
+HR { height: 1px;
+     border: none;
+     border-top: 1px solid black;
+}
+
+/* 
+ * LLVM Modifications.
+ * Note: Everything above here is generated with "doxygen -w htlm" command. See
+ * "doxygen --help" for details. What follows are CSS overrides for LLVM 
+ * specific formatting. We want to keep the above so it can be replaced with
+ * subsequent doxygen upgrades.
+ */
+
+.footer {
+        font-size: 80%;
+        font-weight: bold;
+        text-align: center;
+        vertical-align: middle;
+}
+.title {
+  font-size: 25pt; 
+  color: black; background: url("../img/lines.gif");
+  font-weight: bold;
+  border-width: 1px;
+  border-style: solid none solid none;
+  text-align: center;
+  vertical-align: middle;
+  padding-left: 8pt;
+  padding-top: 1px;
+  padding-bottom: 2px
+}
+A:link {
+        cursor: pointer;
+        text-decoration: none;
+        font-weight: bolder;
+}
+A:visited {
+        cursor: pointer;
+        text-decoration: underline;
+        font-weight: bolder;
+}
+A:hover {
+        cursor: pointer;
+        text-decoration: underline;
+        font-weight: bolder;
+}
+A:active {
+        cursor: pointer;
+        text-decoration: underline;
+        font-weight: bolder;
+        font-style: italic;
+}
+H1 {
+ text-align: center;
+ font-size: 140%;
+ font-weight: bold;
+}
+H2 {
+ font-size: 120%;
+ font-style: italic;
+}
+H3 {
+ font-size: 100%;
+}
+A.qindex {}
+A.qindexRef {}
+A.el { text-decoration: none; font-weight: bold }
+A.elRef { font-weight: bold }
+A.code { text-decoration: none; font-weight: normal; color: #4444ee }
+A.codeRef { font-weight: normal; color: #4444ee }
diff --git a/final/docs/doxygen.footer b/final/docs/doxygen.footer
new file mode 100644
index 00000000000..d75fff5e4cc
--- /dev/null
+++ b/final/docs/doxygen.footer
@@ -0,0 +1,13 @@
+<hr>
+<p class="footer">
+Generated on $datetime for <a href="http://llvm.org">$projectname</a> by
+<a href="http://www.doxygen.org"><img src="doxygen.png" alt="Doxygen"
+align="middle" border="0"/>$doxygenversion</a><br>
+Copyright &copy; 2003-2009 University of Illinois at Urbana-Champaign.
+All Rights Reserved.</p>
+
+<hr>
+<!--#include virtual="/attrib.incl" -->
+
+</body>
+</html>
diff --git a/final/docs/doxygen.header b/final/docs/doxygen.header
new file mode 100644
index 00000000000..a520434d6c2
--- /dev/null
+++ b/final/docs/doxygen.header
@@ -0,0 +1,9 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
+<html><head>
+<meta http-equiv="Content-Type" content="text/html;charset=iso-8859-1"/>
+<meta name="keywords" content="LLVM,Low Level Virtual Machine,C++,doxygen,API,documentation"/>
+<meta name="description" content="C++ source code API documentation for the Low Level Virtual Machine (LLVM)."/>
+<title>LLVM: $title</title>
+<link href="doxygen.css" rel="stylesheet" type="text/css"/>
+</head><body>
+<p class="title">LLVM API Documentation</p>
diff --git a/final/docs/doxygen.intro b/final/docs/doxygen.intro
new file mode 100644
index 00000000000..547730cba79
--- /dev/null
+++ b/final/docs/doxygen.intro
@@ -0,0 +1,18 @@
+/// @mainpage Low Level Virtual Machine
+///
+/// @section main_intro Introduction
+/// Welcome to the Low Level Virtual Machine (LLVM).
+///
+/// This documentation describes the @b internal software that makes 
+/// up LLVM, not the @b external use of  LLVM. There are no instructions
+/// here on how to use LLVM, only the APIs that make up the software. For usage 
+/// instructions, please see the programmer's guide or reference manual.
+///
+/// @section main_caveat Caveat 
+/// This documentation is generated directly from the source code with doxygen. 
+/// Since LLVM is constantly under active development, what you're about to
+/// read is out of date! However, it may still be useful since certain portions
+/// of LLVM are very stable. 
+///
+/// @section main_changelog Change Log
+/// - Original content written 12/30/2003 by Reid Spencer
diff --git a/final/docs/img/Debugging.gif b/final/docs/img/Debugging.gif
new file mode 100644
index 0000000000000000000000000000000000000000..662d35a6735d811a4145592e9514c7cf06e66935
GIT binary patch
literal 20390
zcmb4}<y#Yu`?lAtfH4LP7%+PD=z*wXbV`?iqZ{c^)QxUOhs0<^L{d=E&qxVH$^rur
z5E~F1<@x^pgy(*JAIJUTd~w~!d0b`|rW8G&VW2*68~^|Uf!y5O0s;aQ3Pny%&e+)4
z&CM-3IyxgGqpGT^qobq0zyHyrNB{o)+uz@R|NcGT|8tc8Z~y;p0iZ{8=|caIy5Nqk
z@DKN*lpn=KyiV%+lJez<!;Ugn7P)LrXd;vFJUM1Dme)OQJnMW?fdSlOa9pG8!XNGQ
zpL-a28NW6}urnvW_&I#>(c6>5XJBwN7aRtTje|Q}C@8$lD$;@YCCc$4vSXpB;<|cv
zLlT0{rNoPb#zMGw8gF#>^q#Cf15Rs0!WeW|ejjIKbWAD-0D>X96p>uMba3tX((>bi
z$>_my1!U<NAb9o3o6RkkfoEqrH{`B9|G2%k|5cv{g@(hqpdFJR6wv7Lb>kcCku;s%
zZ>I;~6KUXAI~*hPp2i6nAOB;EBBJk59?@vdk;Q|qIwOla+TEA!ZL#tNS@DzxHqH*_
zsKvfdAGd0?(3B&L;}|h*3r)%9AR>Jmn06j@kbIhAYPbMv2H>eP10SlvXRI5j06H~|
zaU3c`QeVEdI_O%5;2;vN!MWfX*c}<1HE@Ul=6A;gMcS!pnZ-Sj3%b5`pLLdUVT}<A
zxG&AYnlsr!m1I}xJk$3K!@iUZ%rm|p^A2x|!8mdfRS>Yv&)a^{!r*qIEe%>%rW{4j
zQRzOh6&~2b5VCtcZ}Q&K_JTD5iejBf+~WX7cGi?m9C(UdS$Pm(b|A*DKm4cs<!pr(
z>*egeF95@ux6S(T=SWDeghwNTI>C^&HEGE?x-fxAuae_3-(y7G1qAz;py5<Wd?%^f
z8s+}*rWJutXBQy#h#13Qyf#Q&&)ZvW^^DdeI&d`h_^1{Dn|Fbh0Rp3>P0HT1JtI$@
z1El(b;A;$UHKxB(MH)}cbT<mLK$pg(_@^l*<WTW}>de@z5UoOLj+@6#g*@=NU-Dz%
zGueGs2n|(%3`F(7+wJ<x9EF;Tdhqc;Fb~UT6eKJrZ7!(}P332oZjWr?ht=6a(FyG=
z5<q;vNd(ZqS3Z?_@t6C%j*w*yPF5&Sl1!)6a;CEm3BJ6eIMn-INpyu9BN7kG1~&jk
zQ)NHO-mh$b?vC((ybNTJsUMDRJoE5*@6e#|i(5YjvsUSFhPZ}Qm@TFj183VcK(%p)
z`xzb2pID`XW4x4T*5~grA7#&6mu2B?bA<tyWyZ&6Kwx_EwK4z!^Vs3Li?vB}j1GIb
zL0%aUt%K6?LrJ5Fqh0VdWwLD56+LPMLHM+CEbprrgH#8eJ~m)zqS}(9Q;T2#d{!|;
z&YhcA2U$Lvx_`k$<667c4%;5Vcj^nNhNMRIj9*+==p*O8CEHfif^VJv(l%h^BxXY^
zI)kYQEn_eKCB?|HvYMZ#oHV%sAASZb!@bH3p+)%P))Ej=T=X~v2gZO6;eQMs5YB&%
zP*a3{49x@fwv-Z%fF0Nv>+Q2ivBGM!W45U-NEtvGs7sff1ir!K<1|<I<pMgk7Y1*f
zvc6&0>!N6|Rsay6?a}u6hw%3WT&gu$zjk-^+G(k&b8=nk+?s~e+cG4chkyV$v=F%x
zVRA=5^a^p_#s7GP58+3^7p39#)O!W@>-sXBJ&0NI!M*@*GNF#1aE+V-cR%Y0{Z0ZZ
z#blUV`ftUB;09uW*>8f#!KG?sKECCxY20B`8j)fQSg!=}AjfM7Nm(P3$+I$A!zekC
z@4AzJ^Mul+#sQHf-4x<Z=sODD*LMD<NN?^*Ig2GEfvRCSz8jH2f+QpaqAuLM?s}GL
z_I6iD<w!|CofZ<iQ7_J__3CNN;`X#OGOozZ%xFlG8#*Ht-NewV+oH-;mV$dVU}_yb
zHvK@?aSzs&No6~^O=)0F{<AdIk6pkW;)bMB0B<%!GAr?y7@h?&iO20il5aP1n{Q;?
z3MOB=a`3ptsVem<6r1BAnXcRQtfcZGL)lnz_Dnly#w`%x-R%juE9j)mXHH5ypPzeg
zlWr~ySi*MLF`yBF4=leKUD^JVDrdnSQGL!d39uK?Pg-M`=<_FcevrGVJS6Gdpveat
z5>e5fY!LsrVFXGxC#;bk3AIG=fTB_*tmu~^Nxd@af52r)<W^nA`^<Kw_S1GnA{Ta!
z{DAiin3lz3hLNORvZ7rjFUt@F3YV37u^jB0AFIgIau)jIz(UCu*sW(hbNU3!yG(~)
zxoUJGV4u(1?q&r6=zrk#l4S%)dB-im3J`4$=G+tva_S03HjGc-mpqIXJw6i@sSJn9
zhW&y{aMqIn-7bXN`i_rMvZ5bSAAA4QBcKXL5TX#-h+#%|PyYZ{+=>W6U$Vw`u}tb`
zV~yH<KeXqZ;mIPA4IQ>Q;3B!f=b`_Lu+JSjZtnrfzPZ;vaWISbJ!GW4X@;@OOwN1H
z{cFV1`cpJbx9jsr>;Cpv?&ZsXPn0JE&}l~*;T7$8`};;eteRw|`*Lt_r=g>zC8Xh*
z`E)+a^`R!?&sGlEW|M|f5bN48)Lrr%^>=e~o?>18Odfc{?K6)??$soP)WS*|4c}Rg
z34u#W9UwJ>I(0WMfbX`Q#XnGv{1Wqm2^*;pZgu-lEf<)e&k<eRH})adoN%~y>?cnz
zFtbB$G5eeDl5uWwTSRV`c}+c96cWkIHea@5BcSJ4lV3+4s>7>aIU-afw8pIzVlYpO
zo+J$k_3s%_4cOm_Tz3XQ3^<!o1clJ+we>zs<-g?ywLeSpK%{*vkM>a*qh<S<o9>5>
z7^rp(ioG8~y0eF$U@>w9PS%EcRV))D@(3PC=^HKlGqr`)7>Rs!WE8T9Ix}!#F~(uy
zmDjLj+ng4|AjY!uc9&D>wwO__Je&^f97tX9jDdBqZ{*{}-reAAD=IX52R}p>@GZ+_
zo_fy3(x1I-=^7HPfZmu5{WEB=UXqAB_7zdB#rLWI<`a)+yC){y#B7!djx(Yj{B?QB
z`!^-mX>&8<V$U<OrE=cMU;Zpy`Z;73gxHF1e^IlNrXD^BRPlv1<-k2rR-`SSV<-lC
zc3<+p$g~1+8Lf3j5J2};;3JiJj23^-l9_2T7hloM|4QbkHX&4%a=ia3Z;Gs+a+j*E
zqxUW4T%}>RgbrU4Uq4Pk)!+_Q0B%Q!|F3)ShHoy{UxJ><Q8-Nu?f451R7U-NziF*@
zIC@$lb;F~a!GJ!L1h_7=xM;w#>diW1(+_{UE#LKGKrG&QnEz4V_u27k`1~#w@9Rrq
z3imQpFf!)HT-8$zmVQk*D*jYO4buq)SI+_a8#id()?k{q=x5_fOitH?>4nC%%=hGq
z<+>DlR`j_wdpGkH^P-bgj>^Dm<;>6ze=9!ct~JlI@Rt6NZ9(PcPnAc8by&}s#P<mi
z3IMB>&*>5>ycKrs&-Qj%o97_i#S8v!sr=|wM0qYzdX3#>^`n##)c*s@*txKq(~bGo
z^poCvB(=|dUer39YVkM?e5KkRWbA2jOe6*^m3aQh2J4*<HjI<v{<LXeYUY9hrM!Z|
z-VR<grDMBxA<EsTIhG!|1?2iIIl4&>%0K|^ai4ohB0zA=pstG)RE&=qL*<v)@Lk@z
z*d*kS3x*2pE1dK*k8<FGI)c(mHTV5I$Ry7*&TfC-rhH;3Ldp`5IEg~ZpfMvGC3+}W
z8(dF2u9pPujK9QS$I4TY3l4Fw8$r#^AWPqL5*w-Br11xns>(2yCV}#p!p|O)!Yj=V
z4a1@#AzpaMnIRu>bp@4d5MKl6(>KhhI&-T;JY>RK4RAh75H?_<bICYX6avog71iuE
zV78~-q<Y!Lo8n>KSz92UH6A7=?8uIIPGO!`ZbSD4Mliz6ab?0~8v>5nh$tEIZ=gWA
zD|y<D@C}Bl9hJu*fV}7GfMdSq@5=1#c2S2$u!^@~PH!<ru31KQC=5PdV^^k<n!EEx
zSk5?^QIh*aKp}(z`b$<AY9p{U&DTeHZCKn!#-QwO&u7`(ZpFm@cJQmz!nGY2J!e5s
zpw!$G@1B5=oco3M{3SBePtG+*tU8|FY6$tuvB8${_{nljt!0JpWk7`?Eqm&I^9qJV
zXDR8T4U&1j8suj^n1ybTdvvac09PA4=zAvG$PXdx3oC-=$Z{m)<<)<#@pB7<5BsuY
zbHFuYC^Kx1U=rwevA|If^ZfBd<f}FLuO@2!>w4g|lS=G!$xe9lUV7|W_H+>ARF2zA
zB21nu&k2zvM^j_F3U)5hZ(-rxQaqo2fKXI{$yWRhe;k3Pw6hr}2}_>CX}FG+R{-F)
zzsj{S7>JR?YJg!NTUd93vSQCw>JxV(u%yBi6f%a2cuHw^w6q!#!S!;7#%pCq<yL5%
zS0hRnhg1zG8T?&Gg0pjROC%`S3_D@a5ArQ+@?8O1sW#>4v}bAwi*)~p-&j-mVThdY
z6{H)oQnqV{46BCUforqTBUdk9^g)UChDIbHj+cWz7{<|l63PcHJJL#i{17QhCr}Ml
za@Ns*a0*9-En;`{>N+S6wAi|8I1IapR`EyOF41$!^ckZh-TaFBYgd(zQ%?F7Ew@M>
z8x!?cXM(52EP~kIb~P;9*`~>e&h}&cK;Nyc@VzY3FU?E3Y_kJdi)u#L!vTqw98kx*
z^N~4?c6w4|7w}tCF0oSX;jPI0d9fJxviDfI)kgNNyWH^Cx?&t3vVweO<^%<q${*0I
zN0x1fuez%fbha(iw_9Kgo{v~cjF|%87S5yZ>a1yaty%cpy^oR#Y1Q`8`a}V{J^?`#
zPy49HnHb9d_rE0RNHwtpL9?%%%dU=33*3Ffj8JUXVF>v$>eU$5stiipdXe@z`LHkY
z=$e*gKP{8TH|xhX?F$WYT+8)|ETMG$svd(-)B$1u%>Gi-cbMF}45kzt7kB_pFV)e|
z6+M+nB?VTvw>CU9^Nb+14W9#lA^F7Xn=pu87az+S^104p_<LfpCou|yX5_CnJT;p5
zXU*D|A)G$%eW5io=hk)W?HrL<@BkotRDmZZOHX^mIX(VT;n#uyVUucB46VK5)RVHo
zW|Az1FQ&KoznL2})A&@*^=M*ML$c6_I?;h&PP9oJ%uYYn-JJC!`IUOvQfSKiRAokT
zwfNIE!Yrso-O|tf40Ka|si9;z!C~>S?IR|^WF7rt2)w!OR<tOt{EL8EmpIbJwI8$c
zd&Sl3XF>1l<82;_**bterD6Z0B6i#~eq^?^_e(Y+Tg9u*zFGQEj)4X^9qw0L)bhYO
zy-~m)d>vFt*KuC0#EV{bxw>n;vvAChy|_b?g=Mec07rBao1Sb1UX|)r#)OR~yJSY&
z{rhcVp~RT}dJN$KKHM9+_g#p+f;v84|B7TW8eLFv9`x*ad%xh&SVx2nhZpN;DK7*o
zx3d&fGz!Y|a9kGt0ZY+^Bq%YcYK{W+iM#}ugIrNt`DF9JX)s@pfNk`pr1PL1_U)hr
z{y<ZB&>hL~TnnBinf}wYY%mzb*DMz1n7N$4XReOwxV0JvVw{sYiW0eZHkP-GrZ%Es
z^LT>N%q8f91i#rc)gPm};%{qE;m3<KyB7b`rkcNbbNBO8^gG$iiYN}r&MYN0@^#j*
zCVfP79m6Nt`@9mP_5|T=S}dXr7V#J2lM6nc!L2N6TI&IN;Rn+9wf~VhaF!$NR}E^b
z07s2QXw7n6?e6&<JMhxSe8E*5d2nKRrQ(E%t%YO8UCw_^9oH4tRLluIGb1-$iT3)U
zRx*qISzv=&XZYc6yiW}60rQl}I9xj==r|kk<aR*(>b26bZr5(B+{(%SM)f(<Joe7`
zgB~<Tl2EHXDZG9Wnm5WHBau|Zi@;0|CAXg61m$GAi8H5;oY`VF4ZIRq32slL!ye&u
zjG>qBNx&P2hS*ExvYa3HHK#qA_S;5ttlS_ih9?1M3+RR*0oX73x~%^4tlsFvfS_55
z!rb?ppM}%X?;mro1owP*{E5kW!_O5S?p3_S6=G;pbjt$4k}p`*Ia=ZknYp@Q48HhH
z^Y;~yLlwAn5hvLvRkp+5x7h2XKIKlfup2gf>Sw*f3Vza5@?}wbbwW*x+T_jY5R8*S
zpRX}#ok-CG^R#j8=o24ftukr!(mrl!+4~*lh(*`QNseg$UWGsq_S*w7lkaGs03ufm
zZ>C+Ed5WE60nD=KR7=#zmp8LEU?c9e%m1;3{Xcpi+2^;bCXV7fYNMsA(r}kgymO1_
zM~XXu+k2ayo08Arp!y22>Yt3o(-CC?EjlrCsH_WRTmKn-XI(F~r|3w#S<#3Vla-9(
z!e&@CSus<R$PgXNGNy#COP`&WaeU}E6($mw2za#skp<oMyWL@M2X9e**UMp9YZoIN
zog6C5U!nq;GIYw^Q%I*Ff4JDqJ-g*oEP3$f>Sw|Dr|LJ+?}KbtK=x&)YenIIL6D3w
zf`p25hqZl0t4IP|v5RpjnPd9J6BcQR?HfVyya@<|mHmmjeAA@KFRC1>MV<DOJ*nAm
zRnruOe<eM{Sowenwcx4~;CtxB`-<q*+7oh=d>6)%bcS<dUyCX<^S`{;p7HH2Zki)v
z7yA_H>E7)1!VB9Tj*DAMJ@HrL>Ytv19k0$H2H^v0us;m)254Zdf;|l_eHRDrn>bET
z36^cJ7z{yY%!Ex6N2iy}WW%t7L{XClnTWr$@y+PWj8Qk@=5P~kg#c-1i`aLRXa5nb
z){Wiz^HL{@CjM=mQpV#Ic-DSOpIcSnXNmeLaa-Ah7bKRz3*%RM7byEx&{x#P0DY+K
zwvciv`r~y0@oyzwBZc&xVQqk}qbhih6l|o6;e~PgJbeTIh2zO6y7(8Q&bN7|@r{{y
zEtMtkXMCd47IH6#n=giS&29r`sCqQ_N5O41=O$x7xe7k{6}KX&>Jn{4nU2%g;~m~<
zFqh@hl8Ttgg;wisT({CwxCq`d(>v_8;$<XGZMch%OI|z9%jcu&Nt<}>GNZG$n)(?;
z`8<|*e^oMh;cG9tk)WGDdGgmo^V=BPtznrTEG+OK-~{fA0p0N1@a)QTmQV0|*vQCN
z(^4&})G@mI6Rz^8%AFKKorwA;xnk->DAuOX8<+nARk)M}Fo|aYilo%LgYM-UM41J7
z<+h++-ub+ZQLr&|bE3#&hd!mVd1oIQ+V0Jv{>JY(5_?sp+p?|xd6yo2az&_K4dB~1
zt4l;?JeR1|SpgbY89sF$Qa)<nqrcPMBQEg1R862sgQtL&>RMvshVONFEHbD@Q$?#O
zH?Jc20%C%`JwJywy)2)4tyru?DC0T{DKSntL>snA{x%Wq1!fIUk1Jt+bd$}+CwTr+
zVMbqdKs}{OYf$N~8>+tW#oha_?P|p1_pI!{lkGIWoqwbgRkL(=QIBmFtR|dcZ+-Q&
z@uPuHozm)+C^E!v6Uvl(Wh570D(CnC)Y|y)lh&rDD7@;8#%A<G(QvG6N@KfH{f&PI
z$EfCkUSQWg?)n#R<Iemgn~Q<PlZ10a(=NXCnUCJayw|y?`VPZ$YBW*BPSobrL^=UZ
zpPo=V^%uOJKMur6o(CUU0zoEY5JreW!R_A8(f*#$N(Nx|R+qkZRJUNdM+2GMiPQ#t
zSUXP(%kj#OCIQh%McoSg6VQ_Ct?(09BxXnlPbfGZ6!b7T$^Zg*QTTm{ySr1en_Sb(
zI*$GcC)OP&?(_gL#AP^pV6%!GUdK{FhZp?<LUsdSzt}p;G6W(=O1TqHm%OfQ+p8Y0
zM-gi+H~EdGoER1q2uA20i&db1(&*h!Q8`ZXkG>Aw#;@J00_7KyDCNf)RhVjpPMebh
zs1H)4C+T0>xt$!}t+4qs0eOQmCqNhq=RWqBAo|U5;ij{-OgB@w);YN}Lj^}9W~VqX
zm3p#3?$MD!3XQ|50e>)s`LD|ZiY0m4J4gCAw&#FQ2Ym59`kA~cuOOa<+arObBYdlq
z+YgGF5CcDBeL&gFaenBPGDXf>4YIYnbZ;RLG&Oe>1I&JAnCc2V?|Q1kZY`lgbok<I
z+_-B5(7;j;HylpCx`}KFnbo%A2Qa~8t?d>|Vcq+wttFuOT*Cui5r2!<AUsjM7YJ&Y
zRV1%{?aJ?JzA}O$vR|}fLH(W=?9x8vE@)$hQ$QYB@}}SaZAa~1UI5fvLgzEP?ox2L
zn6lpEfZ6gHzQNd^tQ|H$g$u>AgJnQBb(kk~uF+Y_$Vrh&e~hD1Ld}9YCti8$?UAL=
zyX)Nyz)AN$b088;G78WhcJZ){8D~+Ig^?R&!EihLGHB5z^$aSMk7}<h;S%f+eu}s=
zAHBNnHp>rYw2ukM4G*HwqdszJywshiA~-4g#yQ^8(F;ZD2psDL|3{PWr)Yg-j8)99
zDLWz#@ofkMc7a7Xf;LM{#sH6MnNqUqpQ)w5xX;~k^7`%0PrUJ_g8;OlioJc=wb}zJ
zT}DjDp^(yw14OfTMr5S@IuU_ruD|b$V!jXM1o5aL<`qr|>tmj&2HcEUxxi)jETiNU
zCc(1YoxF__ESb?aMpV}|pt=>^99YqH`W(PD?wt^@KexfP1y3C!`{;~-#;0F9XF{vG
zJwuiXG_gE9Q&~I6DZ*@KGns)JqjuFUgc~3t$`)nxq3azA0$U#DLFv!JM!9(@z;#VS
zAJ<9G|HRSlXd{f7s<DzR11vL^^ZS9(;S^O?5#@jFlf{eHCu#y6NJbS_F~Oa2RZ(FX
zaB4DOlPZI_%S!8Yf#5F--kO=63^c#xoTEI0RGdvRHK%c|Y*6<=<4NjH%{)s_e&p9(
zfZC4=vKV<HX~KgJ<9u+ZFdR3M#G+cCdGQ=%G@F|b16%%mn_u4ES+N3s&XG?r;r0K4
zxd1Z9T`BYZ5@W{Gl)=qW{{7@}_0o(}o3&SPlS=SqpeW$T`bCcZ3e;e%wed-$@>DXn
zF^XoG=q^6G_%7W}q4zer0YHxg->wYT1tH+?g`PlIHc<OBUX?4ecFVGNYI*1*@u|UM
zjq{*OX0{g2l(<`SuG~^e<3S$30$KuE-euhkkKTMi<@RED8tn#Rv>U)k<2ua8?-eF?
z;v(txg1R+7+_qKUcfY!oT}&2Q23TGtUT(|9+~!S=3{qgE7dtrsiYT89Jy>kdY?eet
z_9tC+X>fxvl>~cFbv`v?7~AMvnL#zg%_A!9+~8SnZ$(`6Ofpy7E$)&(gmHb#1Kn_9
zT@`y~omr>IWi}q83f#O7MFXBT8!&HM4}I4B@&3qra6=MszX2$Ibj6tVqC7`-5-P6n
zW|Yr)Ikl?Yobof6nHIh#w!>1E@{FQ_NR)zUvQ_rXos2UF?W}GaIU-D13WbKW-0_;0
zuZHjlSz-&)m4Sf5N(5q-TKVVaIK0DiOObng$VkeL4vlJ(d*wsbIh2c-3vNJ?L6<Gp
z3iIzSF`YKI)5Ih8E>=;b(5LOpM|uLd6Cm>i`OjFg)g1byKUhhe$}gt%C4WZQs1^RS
zG57IX$zCgCAG~(H>*x{;=|e5H<7ZH6;A&Pf<JGNfA=7YGL9QqiEvBz*w4Y6G8P5}O
z=od(J&xLcf<NFCW$K@RQB)J#qaz_V0(-z=#ckv7m=3gs_Ws=8r8_#H;FPi~!kd}y_
zfoKQ^LTfqT6O*LchTv~$M&7{mJV)(yMp;e1g9*Md0NaYPA!c?6ZS!EB?rI~8w%_bX
zOs%Ql*QLAe-;J=$%E#ijIVzrqXJuuObvP8GP1>aq@~Pb(or!ViN>@k5Hx%f!<~Unu
zX>uQ5svZ6A3n<yrmewfQI8C}a9~5=1;-|hVNJ_q-|0hEyMsWsdy#Fi^v#-g*C`tXB
zRtWsaIPKy_lRD(D%I@tEVSbbp2%N3Qb3*`ej>}(FnWok*!G2bcjURPRISmHKvRHR!
zg6SEAXa6$GrNVY}&M}S_yaOc#cbuA$Yds>ppl0aOuZ_nV!LKgvOvsBBDT!~CFz#BD
z_!wR$!vEB-y>|lTom-f(`3sP<mWr+|V4BlI)?A-V6(<3sPd#$`eQf&Ilv4e~Ok8)V
z!AUR6z&%CSO~@qwS1}9DZ(~V7)54Mo+gD6U!^istR^<)BB`*~l%vU%ACxv9fC~j{g
zfZ*erT6siiyp{CYnv*dVEq3MGBMSICF$xM$3>$54w59iM`|E~juz^6i7oazugNzBh
zYGQ{%-=$3z=leCE?N-?!`Ej_pqy|tz&Hg6ftPGMjmlU37pZ@EoxyZ>|ZI&UMTP2@&
zM!A!w6(%=q&K>z?8RZbpeEs}pt7wj_)<vsBZ;s{Od;hG{e7P4bsD6+8Ya6`Bt{l=5
zw<9>bshz&KuNFH#;*~!Ne}H(N_U=Jy(P!v|*VIq~U6$1D8SJh%(&(ERBF-$jrCg`4
zrE{GI7t35mD&a>HuN%L*?=_kj`p<A3L|2lJd4L5)k~Vs$%LcT-OEWp6asdmrCRsUS
z0Xo35g%GjsDn7obTh%wX9ioiCnms!P*wYd>&-DKUKv#VTC=Gz|@?D-$`WXp;B7hJG
z5hzG_o1=q6UAO$jpj9MT*KQKhOw!YMZSkuAUFEP`6Da{U3;qVmTl9jdJYBWubf>6$
zeAu>~Bk5-K^v5n7R(g3PRHg}0GegaC9<@9frjuh9!azmjH@b6uPVLsP&t=J@j)l^n
zgdNNmI&Rw`-pZM9pX+E+y}u^3v<#bU@p4IZlsbRg!07KeV2r@7dEiM(Q&R6*Ij40)
zqGTeFQoR({Dpjkdzy?YwM<=%%9@kFn{Bx%JT~v_PEaEHK+CPOJ7t$zsDfeBvs2!Dj
z^-3yTCo?6&?EyFZio;+!@1!YP)KAkqRghRUf~yqQ7f4_Bu9x3zZJ8DR{rU9Ivo~WE
zS&KoB`@1bV-%_r`u+y)-r<oMv|A>x#j}7`cB7dnu0&vr2RqM9s)W2lLO_MpDHt&p8
z*`usK$-+i8;y7;VMA?XNEfZUsOX;zV)Dssya4St2JvQuq3<oTW6YAVKjm|Hxd;YTV
z(}Hd1Sl|PX*Hx3V2ADj5unt|wFB^_x052-rzqSW{*&Ur6V90C=qHr%xricqIvxSxU
ztfGyT^2<(^8BISFIgdAUEdl}+(7Q#nYKVlMQ<bYz7VjDj#$BHMm{*QW^&E%w;Enz&
zOzwt@NdWC<5f5@r<{8oDCk*)2h+?11BnA(;ZV1rC7GHD9pm?;;Fga(IFb;Px><c!c
zbXpjaOe9lFz6e7@#H#es$gpn4DinGj(#Sb{=%8mUJY+1w5}~{s38apyg&aZs4)UcG
z#aZDC7853pT8!(Qk&_?AjI)7jzt;6JX4loZF045eU?xZ;%ghv~(B)I<wOnci)aTRL
zF$7?|3ebJ+OlZ5ezZ>lg-Ldkqpe&6K@ASg>E-<ju&IC&P8xC8cu2Rb+gyGp4QFF!(
z;8W|oHU&-GyKA=&jdnYMnDz##B!T<dubqtZBGwm;eo>1ktPwHKG=J|zBC>%!cPhPH
zoL#6&HT~y_ChDi%Ft1E|0r92sR`d)0_in@<&6HQIVhC8ocGnj^1E7f}{&o&n9=yuV
zg8u{;y><lsulionGt*ioKPT_h<7U%v$yW-O7067HnMW4tz?J?Zko2b4*$Zo}XtUaf
zJ~sqj{KLpTD9!zKp~VwVP5CB%<NBDBd1F=0)8qov(wE~JogA@pzB=wJ+>HEFm8*Da
zs=2iwrQovU-Ils%xpB=5`Glv=ofe)+%U-4<4w5ERh>eRLnEb#2Z48z*UiG$=YQXrb
zZJGVud(>COxJ4*pRR;V6HjIsn#OzM&=^0w8EDUnn6E<E+o)9$9m@_apJ3)XFl;`4;
zo!;=(hiLl?mHN1cO9JeH4Qtd@3<@<odaIOy^)bANDeJceMaD3Bwu_V8G8f#Z&Xi#@
zap@z=ua$irwMiKs&a*1qc_NRO3zGQXu+yT@Rtm#gJX1$m#F>08Co8+-H7+zJ$XS}Z
zz|_LP_+q(4QW@PpefmlcgGqc!WE$$Bv-LeQWO01<aY7bV<4PP(&tXu~C{KB_Q3d0L
z2D%PuFfH|yXW1e*`f;|%P5D3?OCaz}a%nZ?dv&vu6W(~lAA`IR<sD(%NRC2jgig0K
zR4F&N$hpsGaXXnPbMmJ%C9Vr{ThN??aZmEc_wGKwwFXe_6uDbgEWvnl<*FG1(CQ$w
zSavvps<qJh)+D!!Heu4%x`BcCMdxWo;N0kybt-_hjpgeACp=K?4>b05HX;jpnnRM&
za+kd8&)#LQUtD&00bx{)I@nlkaN8q(mVK5yGLE-)$}BZjpyb$QD|T&)MPnYp9|%y`
zi#we4!P=aHLk2s}80=(uiA3}Uyzw0wZ;Atbz%9-WPlw{UwBQer?k$cZfb&#oZsg95
zDoY-7qo#zdjrlymB5(d|hbC)UfyZ0y)V8d$bDtCYWWL0^?zFmbzTtC*diYm(9PG=q
zq#)M1ATrQBrQRdh@ZqnF;o(dPKqr|vk$H2b{tza{S-qid@n6#Nz&Z28g;X~VpX={3
znq>g$g_)NRlTAH!O1nUg!d+i!*+p&lE-2By7lK}X^bm%!#;wcuUK>eTLX$Z*M1P-X
z&`4CpvW*~bJSy5F!<1&`%l^==akOj<3+Ay{aV{1i3n~kIoLL<(x~)K+stzB_tgCey
zahYlVI}GtlJstYw`P=82med^Jf<TKk=^v@n^aWeiZEj_S^VxVZux7gvQNwE7F!vm>
zT>e13mpMMpJndg3QQ0<^vUbEA8Jn?ed6iW!NJEg`->Y5Wi36X0upvr1Gms8+ZJaCi
z>Lh?RItN-9)1O0?O+(`q3R2RvoW`Fu8ou7V&iAUG{2<q5%FM{;DaXgOgB__->ZGPz
zH2t^klJ!Wb5A-4+4LfpQgE01C!^CVn<6FYSp8(hwSWyhArf7&TttFgS5cptx)ZAiG
zp?5Czz{m_Y9q{`h=<^iG;^@oc;Sb3~w_TBSr=5?j*!l}U9|mZx-tQe-5t|Tw0Ti|c
zZjfNA<F6@y$69{ZN$xVoQ$)E5Y>Qz?aWF33vSAb;Es<n-+hL?&Y8LpRp-)eshnPbx
z-1yGDZMrGgAa`SSp;<_0tr-2ODq7?v;z6FweRv7;Zg-f_VYbl5+AZoT|6S&Fh`$Lt
zQXGR0$Y$y^Q@w`E5tu?@%!~p013G2m;oU2M`gE}a5dl7OY?p-6gUdRV`C8Wv5rk;{
z$|@0$fS?qq8#_fZptLXa64W0rb&^MOoI+_UnTUVq9|+n_m<__BPW+S;<bE`lWDCM{
zDzB6-Ata1;CSTK!XpXGBC(*`g&n(TAOU|<P(dTvridG$O?|!Np@3i0<)av~2R$A5F
zz`yQK4?qecA1#B^yNHh(+p;Ge-ZScc?lzmg{GKd1AZZ^A42(b6E-tmt?X~@3klj&~
z&8$NM9)7wV)7nR(3BFXd2LLHaC5=SpsTWchMi189T=tJ;aE<!L7T72_9D&b6Ct|O6
zd-mNO-MyF<6j&qD1rZvO;I%A))!98y@ill4_lYPmE#W1<&C+_{Rda|J=WSeF#Mn&P
z0XQ2*c?C<^_n-BG40wGF80O+hW}rq8w|@*G{4$AIBiSCl+I&p~%{=|9eHXdW$>CH}
zK$|hly8NS5NvFu?Jol?}zMLeL>BUN~gagONvzz|SS=(|+DRkFFKR9nJ-P7?2Ez><=
zc)B?=KY|jVn9VEyEPq-y<0Cp<Bs<H^9U45r&+ndI#eDF+{}JLve(<<SSobIZ|LTwd
zzSVA6QRQJQJ6?A?#0;$hE`P4i1WN6Qc*Aof!zcM<U+v{Hj8PlGC8{T!RHw<n@}V}k
zlLfvs$0AT@>iTOJ8I60-GPgf-PF6PC5)ALms90SZscX#2ww{`k0Rw}1JlPiB31JSz
z(UqD!L1)h$NP1NFgc1CkKvb7^f>XCb=9po8TzR3z8>q0!DEQ^W(3?}!)wW@eWDMvU
zyaq==v-wfwIjPHs4jFDAE7h~i4}7(JCET8!`n%gaShJi)g%aA0thie4gzSYr5=W(X
z-ISV;@tk-`1acchKd5xdI;<rGjL=2<)tyIQW&?`YO6mMd6Zbxj?q^KYjGD_D=ef-Q
zm4F^wKV-v<5ofN!F`156BVA`E%8TvjGWn3t5t{A%E{_{?JIzwhEGUL%AtpwvdTXo3
zGvvFrn-5w<%_ec#xy;=kbhT;Rl^lNVk}xyaa<5MMbwtgex@hUx*CIT2dH=r?<=B2B
z<2NzWI!3WMp4>-sUofLqZb@23KvKyJ|A*Ig%2dP)h@5YR&7Pt2gT4g+F5l%1i@l?f
zcfQl1nXYMZ5t2L1X>zUh9laGDBllFaVz;^w0At$BQ++2M_rOTf5VfB^YGc4%?w%Ae
z)81nA@UX|uLoxk45iFZDQSsV0F`9tEbM;lse_(T)#n+FqEBSq#I_Ae;FV`o9-#^*;
zO;pA_EosbcVr(aMP3mJ5D^oEww;)-BM#SI#`%bx`Jv>i(CAuvxu@bTW!sccxM@Vb<
z!YLnj`vIs%f3q3T?ti^f+W_von$q((gg+b;ZC>oyavZ!vo`M`3Da&;u_gHYirb?Mt
z?+@GNQgL5K2d@J=-^_45JjZsp#Xa=ED;RvYO+|hlP$O+7^O14)U{9<)Z9n1&cq`rX
z^hFom9pm*4<0A(p=&SU<NogXm(RE=^jDm-R*t@i$TT4#q-%~0h?+Ghs(sVc8Bn+p-
zP9DNZXO!FyL`F(6_c7*yE!^1;k5?5G$Z9**eqg+Oge{^EiB`xY=1FUP_<Hr5Xl27a
zk6L1AM!VZI;g{5Wo2(#O1z#%Fl*dp>&F4CYoJ>x6+xX6Gc*kVC#70(qdWl~*4g~x;
zIa~)ZAea!Q54okqwS|n3&5$o8um6eb8>jHXJM<|*IN1=RT;%Zcf6#9~CHu#R65(7@
z=J5TeIqoN)$ZXo^44dQpU?;Q9e_xeMhk&?3J7TJ7F<V~3$b!21imdsgO2>2*9I#=Y
zOJnjVbfagQ@SrGzhXrqc&%K_UqN5`-R(%(!TKKG0h8<|*qO>9RASoMOC|=l5kc(3L
zVk0T2{qUnGj?Uo*-27gku0Q|ryBY14W?T1H@_Ybitv^$qGYBr*$*zFW1q3HDoVB<(
z#v7$>GI3yPl>D|s)|*84)E)S2A197%lzfb~>)*&>i_E5xHBK+vJzy;NlPjqRN`_J~
z9VUpeiJJH!_zj(}t&uInbxh;wrwd={$11<}wK~%Kx=D6HJhS~yBCCYjZ6*04amr{j
zj_M#YTOiyPc{Nx-R{VC39zmH(XO#R`AY*G_z*wWEx)-G$<-i%NY(Az%`Ab@36H!qS
ziaiWv(N|SKOTLB>v*_Fb+mn|2Ay7?HHJB@NEm+6)ei4kf5eqn=j@xJ=@b8oF2N6~4
ze1Swdhv$JXKPs~~m`p5E7?|9VS<jvcp1=DJf2hs?sd*1N+oji-OelNGD~X{;DwTqA
zn)Q(JDUXTj3;@*leRCY9b`pDTK`%*jjEcr}a^V5V2~0&T2gb{A7p2fX;OSVI>}D#p
z;sGG{d59o;A%F20p8n6B#_1bJog2NbnFGx+<YGbWo>Nb0@D0yyU@;+-Gxy$KdDKxz
z^q;~hoIu$wHM{lnO^(*f0KmZii8)8~R_RgTD^mme(oBU9dQ>RmhYjojFA(o+rGtQA
zsAnk@PNBYJE{DMTH!d|RIuP#oKrqC--%?c$#}MbCve*z%8UXN$cPT3MA@DlpNl^-M
zL=dZC`F@wfH<dKxU7ASx#bruDXXjymzhqD?eUG!}z-WvG2PxZLIR^S}FU5%8@plEe
zrQ{^T===#-s|5Q`jLD3Uu4mc20Lo)nL>l%!k0^MA{WwSZ<8!;TpywKa;ucyXeqd&Y
zy{iIjT<A4W7<$tL*l|~A{5&#U3rls6*RV2Ygfv?r>t~+6SGKc6SY!4C=w|@Gn2A(U
zt%w~Q+g2c$0dT_1q=AfckBYcW5TO%c;7{M6^<^Y$B~U<uGI><X`rZ8HcpBSXxX!8Q
z85n(LXo3L-QJ%r%kvr<PN~~8tp=$5dnE}!jXrKeQO0L<A&u6*hlj6o_#`_{k?Qin{
z#Rm;NSG$XF>f~+h_2r8S&r>-7OI&ewvrhh+iSrQ7&R6++?%vs{1V|ATuYG>6jQ{g{
z@c+;Wy)`kN-~0$6qTv`0Z#2AwB<e!|K&>}(IdMFO<XZ*U9PYiWQN`=GALRs`a00A?
zU#upCMe}y7Jkoc)$+?;Mb&azmi?+{qobdj!qlL!UsnzuS1;zHe>6|Yd2(ca>6zjL=
z$&+~2Vp<khRDXn>zQ((Ap@Q4byV6rAKz}het*t1?G)n51dyz`()c6~dJQbIZ1Sh5y
z?SALLUy*j}BD-xnv1{nnr*i@v;l2F{7>3M~H0u<e_{O`ibuB*R!L=%p68Mh?+TKNh
zeyqYmeTwVnhceDw-_9ApAoE`>Yz&D0=jLX5xcEm#fEm=w;-w7Ooh^VB!`wQD0}uvx
z+H&>tl{dr)Wkr)3j>UE}n_zEv3j%oko3EIZrT6!c*#McZd9G!fft6nFm))gFv{s1g
zJ$}h38Lei5d^2dcgdQms){8o1j403{_ig?uWW<HD`JTq9Ax70{vgi5$z}-M=fRroW
z@}8dBQ7Id>vC4nsB&i)c3>829%tIlNNmgYtfuoEZP;OFNK|i<5fA(}<2f<pL)}za~
zJ<%UStz^-8WkgPrXWz|{8uIc8@Trs`@R=^(w=yMv-t0%68`~nkC|7l3cdCU~Kv&7T
zFfOPO2ff{hm%^!dq@^<B`b`)-A$eZQ6vSfnkm*}E{i1tZ3rp&)scx~MO4^5)UN4%b
zE$8oRcN;orx4oMRu3>9>b<~Zsdald0<Q8UAMwKIp#j#y)L&lmLy4O-6U>QHbA@aCv
z3cd^0{!WlI?V<(7_Z+)k;Pi84_6a8jkrO-uD3rvBAgg(fkDEY?egcoptijEI8%J2H
z$}yn1nc3T)8C=OzY31dNbm?pmJ2D&8zh8<Es<I%9{3(?VtCo3Qg9UPm1A!Y~wXYyJ
zjF9<7*zDjVb<a*mqgg{d)^1wFSEFQ2W6K*}Zib!02%c<jWETe){;`SC+nH%;7ra#-
z6z<FwAUxN1dxhqf4rEP?l%C-4$-jJ#&x`LJ(}b;GeC9<D<hZ?qR4J~*KERP2X2d|N
zq?Z5s7@e&@m0elmlP{<Bwe_7Fji5!-+c9paAMQmD6lO1W(MKham7&|ltn!FXAkwb8
zkleVz1=M>29DA2aQN*Ic)&d3WJ+BFGchF>agPW}ZuN#^hODYm<(e_7H0C^3EvHR`<
zEwo()@f&+ZKM#Brtu<8ZjPv?iWhBjf85RUwOpduJ*!}s8ugFbNS-F%+q(!ttym7!Z
z$?FEr*Oz@lLO?oOy+MxmuwJ)v-|oWxKzW2Y#|eeS@@oO3<35TyVtoq4J3ELw9^t#G
zWMY{lF}(B7mJlnt=Vn!9y)2OvcCqi;J-sPO3lc3wgkGPR4T>5Iw#Df3KV|P;#>OmG
z#h==D+@H+r@+c_&6LS?7Q#PH@u}hK=$1un@4e|0xO}3B7X>vb^5{`gb>V{(%*L;5Q
z9Rk8L{~0*bU`rz?aP$~+SonUzqTp1_bw}Md@Vvt{M9|&Eqasmb8_1?2U+b6&_;dTC
zQ#lPXukEu<53tUBp1t=B{e7oA=+J+aOh&Q7{j<izz_XMN`KGH2+|@-nIuyY5S3e_F
zpRDL+7tu_9&lG|@$OXO_y6|{Ir(k6F2gMX8!7$;h{uMa-^tkV=56;nBy|JfrqtRkk
zzz%o98hJs#b1IQ8=u=*+Lbd}%n;pn$S7QzC)0)KT9;fizW}>uPH$p2PtQxP4%e@h&
z*sH>{0B|^u-M2QN&|-MyLpE1jxnJBh+K9_$ypX+Ub@T29I5NFf{^tYB7iZlgMNDfx
z+m8!99BhAbuJC(f(HJjGV=5!`1W#DOb$(6t<gQDgBj;Pahgwb*<ao6LzXOYp_$T#J
z!U`@K{N<n7h1D|?|8ueK^vRfqL36-6pBM$#c5{R3(1RyEOmhRh6kqO&!RGAwzH_~%
z0g|&M#IzTM)2=X)qc5-Qv}>fvy#3KDLz;f*S)<WPd1=c09bpCJ6p{g%ZR?t*j}As=
z)#$I(O&oqQrX91A50S~H!iJp}G_7xl+dDr_8Pn3x&ipo+0=FNZ*yMc`Ye~ip9Ixbx
zFr-bz#TmcluQKA38rC&7Krc{OIq@@#+DW(4tmGJ`3K-Y)&z>H4lNjwzHRh@7KSD{P
z`vdWwH@H9C`<ET7edQ;pru&~Vk!PnMwX8=+^XNjG=4c$~?tBWRSNCuCsjqGRS{!YM
z5^$tCISq8lfC%@hfJ>xbyu2@V@;kr|lei+~UCNA#pz%qNV2kRg03Yzi7{eVBh)mS|
zR&h+B{e0dC#A1v0{5s%Qtaz@g-l3+`{<u(7qt$UTAXE?)O9lfuDUA)zNi5P#Dfq0P
zTB$EHkV&vP2)?oh*#km#00?-SdgLH<T1YL2N$JQ0SM1t9A7Q`%&S?0_8oDby5&N(O
z61{iolJp5&&!vB4>+vkr`6aPaQ!okKxqp&rDMrEPV-2rn+K&T|iM$n3U5KJ^)HLat
z%5A7AN9SO3^%{Iwq!uKyIlXKFegvRpd+)+#zNv;_0Jj+d3yhP|x+l83FKKRvUa#c2
z=F9Jp>TFMB@JH(<e$|&<6FjYBk~9zbl96FD81iM0D`)`qV-I=4&9CHVzz@IJy?w-U
zL)e{OwYc@XV}MIT_hWDg$%UNQHJZ$uq{7F*^qW1G7BWPxG(^<`(RKs6fB*Lv(*T2-
z`q>O~{_=~`ZzIkn$vSfgZW(9gsBz&elch;6>G_a{i^46ErvhRNccv70K&+=6V;>Go
z5Xe|DPM3J@ZZf2%@8cIhJlP%;6K9@+%*JmKJsdVsq7bQ<ZD4p%$({!B8=U_l(eSk0
zWxYKrg2nf~joc_06SAg!l;ev$m?#e93cJG}`qrl?D>EpuTXAGi_?SAy{g&-<M>6^*
zc!MvhU`oc-;qvoEUid*qNjmJLGwPj?IK9_!V2^uICRffrr)S<`&Q%J=DAU7$FY_aw
z`$P|MB(tN+g0Gh4O7b1oEcRWKRDbN4CYco#Qq{{UJ#jztjZmqGEAj;#HNb-A;ngpe
zBs4_Bq?S{aT%dwQXkAR8h0HnLLGgoLq+f_qv5Qp9_n=QidFLV<drAIW89Wkj<$ACJ
zm;sp^ar)LAaVbt-RIfTV1`uh_xT}t7ese-<4QlupEdExceM>iOzu;Q8?_4U{jsb|1
zQybwcV{MqGHG+;NV#B8^#LO#dANCgG;;gFOv?*EK_83GBK)vHJ_f?RymIKJ>mhNtr
zi&(Z7n+Z8NUMspT^50&qZd>L?qNi3`%tavd#)$UaB5m((Z8QAYNSdhJ6!_MbaxY(Y
z7B)X@*D(=h|JVK03xCFGU6XeJ^P4vzc2H;p)>f7&c#SC7FAM{c0bk-SHNv39ZFLec
zmD*aBz=1MSD12fA5Xgr=300!z)=B;r4%k<4aA{0w08nGfBN*Bq-DvE%FusNB2}^T0
z0Cthw`le9|Y<R4!NW-EMPSgM(sG|26ko{r-yMxyMs?1uk5?{%<;6=e(PYCP6fLIHG
znr_RZGrI~_f)|UcP>Nqg!A*R5$%VV}A;zq<z>>&7O=K0HucQY!z_l%Gu%fV9ev62e
zEjle0-7dL=8;jQJ#Pb(<a2=*KuD0duXz|pra2Li}Hcat~8vwWW9;MADCu1pR`=utL
z>t5>V^86}lAA-0NvC$3JsFU~%cJ)U`%#vPbE6`k9vf9c{<SQ90&zAmzD3fr9XVM{O
z>CWURKuM**Vuwm!S#oz77`#_0ok`Q$<keCH>j!m}JQeh~4?b*Uei#ydanMo*&ib^?
zMN5Ue;ip7$E`+5woBp_ALQ-6djYXO2nt5DtW}m%hZh<TBzhdYfhs$?kTq#wLII5J>
z@-xe+l-AT<5Oe`xUvFvI1aM)dT=UL(MzI3o{AJlb4L=%8ZbXNJS7Dlp{yiF$*9-P*
z>%`NJQsDI)$5&y#QrKTx&G2LDcifD4WBc0$2v1n>r!It#V5w7opnWqgCw9@=IOa2K
z=~@0xPE396LGVQo{!}GCa<VI>n~)<~di^`;KWYPULFZnkLBldC>U`BBjF2_i0lRSR
zw<PZ`<I2UY!Os-lhwnvcp!&`bh<~!c<51LryWUex;tsH(bV(OU4=rAh`@JKoLJEKG
zGgy&%V$em-JjzU4GSv<1ZAKHn^ZmR5BkD<JuX@SVaALaXQ!cWcOry5Sk>ak;M6mSf
zY5d<FM3OU_dahP>u<sUSxG;)u@^nUwo8}n29{NXDW=84wvn;K7(LYkEKaOR`a*W>)
z6)91M3^t!!KDW5P-{l+;yfW$*lk%XANfIp21#wlC;4fUM`?H7}cEi`F`UTPz>RPaf
zU9P9MVAOMNoI7B`V~(=kh=Lx8Y#qbbaQiGs*qP4a!fG1+QWp9)ly4EDwSpXT!qh~E
zSUiI!dZ<8sZrtCqES`nEt0q|Lfk8={U+x4NG0$9lOz=bWI#s2=TsQ~HA6Hx_$msKW
zrrMGlI5@3)_Tg1HgThQ5LDFr_{$2hEP;D8#!>PeTi-r5#By_~sO@0^UPt!=5+bg?=
zJloA4QJdkqw`&>5gk3rWZ~qAR-Kc5f(BS2U*I3tzXG1RSYKu?vbZn2mb^WjOLAS!K
zCXH-lVLvUWO#QX2vZ)V`^wXw?r{mr-(5NDu(iK#ikp=35#QPe(#p#g*R;zN9tx-U*
z(|l*S;mxTXp@Li?MWSW&E=qr~I9Om>Kz(}NI>qO<Fqa6bZ9pyTnB>WC?XRN#i=yhi
zr}yl{!Jyw@TASTflSN-YaJ%DXJ?#;qVFT%8OhZlGje{aX4(@EM#QEi`?Ta9wav)d1
zd}d<XWf12f6Nzgo;NIxXC@o$_YU!Vc6=9mgdbr%}jE*$Yuj^K{Ty0me0}D0_)l>GG
zQ^I3F`Q?H=Y6+t;iqu)x;iX+azU!>?u|8UY%j}mWurCoI<fFdN2f36umrx?Wq6Jy4
zrBimB-y6zj6pyu(S@o~*Tpg1E7o}Ub8DNH~SyyatD#`k8jMY~;XGVvk_8x@TbW#a?
zSxJ_@tfha0fq$fy2$IU6pE|nH^(PpoMXtyj1w6lKa{Am7+FGP^ek9CLA*nmcPz2#q
ztU`~w6Vqt}O0)(~a<53xT)|alI!|o!Lsxv$SI#i_8vn)=G5|tvwas#ZxR=7{O0v2o
z|D(+{T-)@p#n#SBdKO;MdtUCOfLr`+R>OwM(Th)x$_K9=y@bsfiU&!eN0sxqE=E37
zUnY;m9u}Z}X22G|@i2<O>LMs*P|l~-v{U-6eFkU$PQ}RYDhwPOPKM%Nd=mV(DmIE-
zxwvbYR??+>htFQ}?lH+XGW5spZOhi9iE=eLZ+Z#YZj$!9_Z=GA1+gq0KallPosCxY
z+V?Lc)6jMq>N8%5qE;pDj|eRmX*=aj2%W<7Z|f=r!76U+iVODk;#;5Y`!C!g?U(-|
zM|7_Eypig#6QBE^JI@FAd&l&*)Q?Xc`+osr51jDfF<tWC=h)K%>l<HAQ)2$*`J4_1
z`wQfv9yqEGyzB*|jCpGPX8ri2y}$y9-~tp_4niJs|A4#$*Sp1e#2Gi%=YDS2q;el+
z!cE$Pj6IX5jH&3kuIre)15_X<LU(I;<|BNNR0pfLh55I*xeKHqsZ=h9Ly^(=P|!Og
z2)+WlEY~(r-oUyU)AUW(oRlOnW8k|^?s#Doey|0;^+ksbWKnTCTLsmnkU!(#2bD7v
zTXj9w0B>4QFDg(KEie!L&_evv2WntVq0(Tp1bZt(PApp{d}MFKs++RXi9{YT{FBAK
zUhW$&<3{t{n-r)Q5liG!68u7uh-(AZka@BfGCs3vdwGk3f)ECyDf+4f%b6Mq6b+7(
z>BL2EU%-iYC2GhkZXN;Uw{3W;!YW8l2&l{P|4%P&G$R90Lm*k2HBZnJG!i4`{S<Ey
zW~zWK0>3u{@q5?yL^&V~<<kkpB8NHxW?_j3uL-}o1__mQ&}h)(5k&*-acx(?PHrL=
zW#aILQQZIzvrw<%MiMvF168HD1)&n?>u3e-Iw%4T28$H;&>*}U{`l=N3Erx?Izt<u
z6XP&~_HWS)NGz%RcVN(h2RtRbDIzqKfjewf2_>N>Kc5a=^{D*P9`*%L0^Z~@ZexaQ
z`1MuppCXDl7ssox!2%WArlmg|(=*9{bd}Ot72!%-;B<Jvdj~myn_d|Mk@xx}(EcIx
zG*c{B37wR2r;3dODqk1fI=zb>$8wgS|HQTT`XnGApfqFS8~`ekjF60r00_$x$OM|}
zJFgQ4V(}mmi^d}|sa!IfMMf_wjS3D!Cuw2G2^0<_qc9!w1V+UHQ7G6@C&mNd8l3>w
z#^VqJ3{*!2KtY`xRv{4r%0STq4&Pk^071Y(!ves<NMwWJ00UY{fWYI@mYLW<<4(cI
z!&bo+g21bhKqno5716^$&fEyY<cLDdfy1th$O9r@!5*1T8;W4U!k6JSc6L+rbGN~0
zQ-WBcpOON1caLlV0s@1P!xn&za)7mijlqaXj={SUB4+@sX=7pufWCd>B0Y-JKu`lH
zm>$|wlmNk|5r`5tfdW7QFDZ%~|L{r}hCn$Ya~PnSxFTeLf4>AIJTU+h0~`ac)X~QR
zg@FSL)9sLjAOX07N|z#8Ms$oXCl3;&@>eL&Q)mGcEI44G(Ex%Z^q^=^g+u}r3{33#
z)?`p3rb|3xtkg&Vg9HK)+zBuszz|Dy0d-}-K;MChVxlg=5o!!eLJeXSTXMoJ5nw3r
z>KdSL*VzVo1I%fKtWq04zzlR9`jkLYxw4gp%*rEXp~E={BoKS`K&o_=_uWmp42n!%
zq}#?m;xR|v*+!OO9Aqovfa?ybm4HI2lAMcXBPM(h0CyADQ*b*dKwzQ*nMB&pFajfy
zd<O&+H88MKV<i9N6v!8V|D<3IFwr5>$pQ?tBUAyXjKQCD6&3`SZE8uCKuA@{S5RL5
zks(SEX`v8Z5s2_c;d4MW2gVLAS$GRVjRXK&W&+q_Um(BW(n<pdyb~6HL@Lo6hEf#p
z1q|Of5DRBd1Tag9Is8Y0en#H0#dnj<Gf^E;M#S4bTVU2v0wQWMBb(POP+L|lbYwsb
zlcXfufKyQTLT&Ce*dPgrMCi*PR9+KYn`6zRgP~chnOTqEVdN7f;*Ed<M@aBt;uzl{
zf<Qcx+$kC^M{v62eEa#PUl(N(ID!E-h&haayfBt1u5F6*)RZsc0TNIO_^Qn?^8^(f
zqbm$0Kn&TP^3FIj|0tkF4E157n~sqt>RP5nh|%Q>YdQcxbwjMu-U=rw!3(lwsIUb!
z%-#ws03XupMiE*1whIBZ%6Tt(L0&nGn7JCfZ8`<W=$t%z4NGjDX9Bx}2)pjk?Iw7^
z8DloFF!XJ%!C?$(y##qg0=W^1V89nv(P%FT8}~5W77C<=su8Uisf&#%@jD6&I7U><
zKds1Gv}cN*c`(x!#z6)FLMk!U9tyWv>;Szn+$;jXMS<d_d^oiO0oP2`ZZL_C+e^+1
zJgC>B-~m9w4;od136QMR8Qa)p$OK4lQzEeo%V+Fy9TClZ$O1=cVMY=P=&=-)wM|=I
z;XzRt5Z@9D|3sV1jP*o4tTHn10PYCmIKb%vX}E)hGV197vviq9l}EeKIci7h{QXl`
zrUnw=8-P(D4h2Sf#g+i-*yZpY==~Ke+Fm5k>Bj?&^g9BFv|s+cYz_#t*>FiHvj<8K
zXOl-QqZV;?H|%shK!kgszi<<#6%`a$8hXO!2`V?40i@vo65@e?P%sfhkYEUNSU^<9
zF|xi{=@B&1NLUu&z6Mey2L<>CovuKq4*^02I+Kqi_+SkKGzxq!l+IyvVgRivK@0s@
z3!8Kk0I(UsL8i-4B<95gPRXDE!t&N79<quV5ny^pD4h=ebC7?eMLuw_TLas0t0XxB
zQg={;|I2cw06Ha$4j<IW^z<?j{hUcZi5LRmUMR;GK0pB}937kLxQFuf&}%?chY*M8
zLHxz6UTJBbTm&N(F;oQs@+i$mkgzbPOp*t~OHtXJ0Jn?X;SLE17#fiVt~~t*7#o};
zD)V@NHn65mtzZrq{P;&6oDmzL+RUn^g@8ubu~Zg10r5=bp$6nGI%#V{hs4si!aZS6
zjo|?z8$hcf_~#0{h{{j8m=o`%vYWHf4MJ!*Ba0y<DY%=Z=G6BG$4EmL9wS>jD-?%l
z+J^`jvO$;DfQ9Gm$vWN}!0VV$6hevyl>NC<e`J(Rx3Gkp-h8Mw9^g5e95IJvlmV!Y
z|L`VsrZb{4%Ov%<@C|vyQ#yYMCKDicx<CaWi!WLwzUU>i8g`AFaM?*P_3|MW-hp!@
zFc3l%qE3hsHGdCq=u)0kn*s>2gB<0-(hPCSP5Iz}K(R*OhS~&6Ak};!3CR+{a|KI;
z6$vnTM_Lf76rctb3BdV~BbD$R)fBa^cFjVp62X@vtS)yPjjAC&iGueCBX@{HjShGS
zqJK5P7)dxtStA({4P}O8k+6wYlaS4}!lRl+@PZQDxRtw-wyx(CRczSuRH$Y)GpX1s
z5lA`@L<+=-*=P?z|KJz%ZH5R0sEdjAw2?zZv9-=|Ml@NX*2$ohIDF`8X`_3v|EZip
z4<vfxMOXVg=q7<v%atZHCIZ34d|{QTC~HBu;{*e~AqhE%Z9;0}2LolQHY2;1GlXS^
z5(OZt4rngDq9@$}fA26caMD{Drd>`d<(4SXD8<qwkN{w!8qQE`4`pE7BGiYlnkq}6
z6w#vVK8>@RrRP5FSK)ePp(NJ`)hP)q;~DjJGjs6l`(*c2z+NLVKQe1N<}fy-iVwZo
zs8<}ClLML5pg$`e!Cz3|3%16137d=(6hyGmRv7{`ft)dz!v{`dec=HGVCD*JxL_Fw
zSR2bY?E9oZfHl;QvLrQfqFUgebt$lc1L?s(hT;<0a`&?p+{Xi$RV~<F|78HS4Kt*Z
zIx#j2h!{e#>Y6VLVA$~@G%H{p3$vpGirOoCoR!CY*#wfpF#>-fDMuiBz#rM7=NOUO
z^bwpA)uRF`8$rQ!q=PLLvdEy7L`Vb>XplJ4?D)%ur~^V(TSQ{hB)6>C!Y}giLS8uY
zAu-K_m4tLuLLgugA-Uwx`e6v5-uDZ@7B;*sq~@sMFlR{^VdgUQ+d_BFF)0zlUSwjx
zigpAKE`A}%ickk|<sc<hgaRC6wZt0WCpW2yH^%3<I(9MI5F?yyhL}1V{h?VHQkKgW
zb_PDf5bw=s$pQW%6bE0Fdq2-J#4@;?@tt3e1y$JaWQElg+k{ar|K$$Q5kdlVmR=WE
zmaulN2WxV5zmF#RB!M#?4Tf{`o9A9<N?w?9(+K@=2a{O(85+@Ml&MQ89_e}#6mUug
zti5yZH4y<CK|GJ(#S?daceda7^}!R^K|Un{Tg~2au#aPV?%hmR9h);7RC%8^aCVRT
zan(A^i;F3{N?QmodU%g75*eKhKjwj9B5-4df5HO;8q|(v_|mrs0X!3!x(gWY0FrS_
z9NFsa1eIcb^v1udY{^t1!FJIgQb5O(Z!uG*Tmm>1;zBv(VXt?F0>wyx+-Q$4{ux6X
z>e<++49t)C47uP*UozY30|@seRDAulkH|u%j|DfvdHnvj|7!;oUHA+_-7Uc@MS;x?
z798+`CjbRDRFUP>!m4P`KHL=Ut-yhy-T!T1o0ve}ogU^?&HSxE?3LgUcnp^r14hZf
z1@6P*6_`N8o<Q)}5XHdiaUc$Yk23tg2r_~G=-g~{&<>44UVR?s7!j_V10RG*^gY{Y
z<RBEzQ65wth^U<gmWUNb-8{6Q6IepGNyoxDK#M3KHXMWgMIjpQ+%mKf*o6dO?V#@D
z&=v9(yU4`pw3c9bUmETqm|?|DREUpRO%|X*yYQQX^q^d5hJbw)+?fUt@*yNfnwQBF
zNdXiK>DcT%0r5D10<b|`xR8n5%3%~AmaG5{oM9xU|Dud-jyxTm*~tPxwN4aF1l{3)
zOstM<eVgTkisQ+~!l@!K@?kH9TLRciE(S|8{8rQmLz2kD4Yo;Su;Eh3fba#QHEv)S
z^n*C@h&pVJHF$<B?Z(jE-@zzhs=3`9UZXk=7*@ytQq0Bp0F=G-0|HD8B|w1~;F&AQ
z0~M~LKL(=`J|Y<G!6dvR5*X1JyxBiw2P8Pt1Ek~obej78BSl8yG3*c)G=tPQL7qeh
zAiRTGcmnP5frThxFIFT<USk**LO`uo{o%n`l-Q_&*la)>M5ZK8?p!n?;#`CT*sPKx
ziIlJ;TA#^<nYl<#CM6_d1!*M8GMr=yWZg#6{{geG7}$lLQf4I@_L7t2Q5L3%vD^t1
zzzynL(YoE7R;DEf9;Jt|iC_t2>a?V{m_i^p(okTe6Oa@sswH2>8&)VEo3sZ~UIQSo
z1N8M?N-e}*_9bJ6+4-<yqOibu^dl_{R_g@RQ5jH4Z5`w_re}KHRH#>?n20nw0U%Tl
z5&%_8Y1ZNJ<Y&I-2AW1wa^VR0h4-XI3uwz3w1FPNrf>4$PrL$c21;|-1V8AbiNNJ=
zE+-`F1!}fQ%AFuN77r>ir*)#CE8OIC_=N}=VGN<=b%tjeg2qHDMI6i&FJMHmV4rxl
zXB5If0anNs$jF@SQY3(dYr3a?f*FTsD`ygjL4UOaTUkLq_~(8es9hPTIgF&0yhE%|
VCW1z2P8!DRv4Vn1s0)Pv06T`D|5^Y5

literal 0
HcmV?d00001

diff --git a/final/docs/img/libdeps.gif b/final/docs/img/libdeps.gif
new file mode 100644
index 0000000000000000000000000000000000000000..c5c0ed4c403f7c33c261fb7808c65833c63a9e18
GIT binary patch
literal 52679
zcma%AQ+p*0uWdV1Yidr-UE7}8?bLR6ZQJhDwr$(pwQbv;cF%dge{gb>o2(=kd7iA5
zNJ>j^^B9SJuYwMNfcW?C9|Qyh_&@hC4FM6srq*Td*LQq8;h#8=Q8wJzJt_z6nVK7q
z$r&{8>}~HKomm|JFaG~;r0?G$At0b2=>G@#Urh*TFbPSVDKOWvvexFZ*MWZ4#dg=D
z_SWb2Uw^)X6g__Yc(D`6hBt4n+(o8FyKb%8ZSvZyZ?E1%jxlTYtl4YYu50(M-AAEr
z^Y*RVZ}#@@zktA?;E>R;@QBE$=$P2J_&*7WNy#axY3Ui6S=l+cdHDr}Ma3nhW#tu>
zRn;}Mb@dI6P0cN>ZS5VMUEMvsef<N2L&GDZW8<KS$*Jj?*}3_J#iiwy)wT7F&8_X7
z-M#&T!=vMq)3fu7%d6{~+q?UR$EW9)*SGhNzn@>=e-Pgg2&L-t2ZG@+0J`J#1w-LK
z$Yrvn8VX0EaoMc4#v6*p;z`7!2&Ef~K}j@V)k<AZW65M1v&lrZbW`bc7Ps@o7O1Ig
zHcvPh;ipV<`FxR75<qXFxni+QsYoVArloSZO0&sod!nUkwa#!T>ZfdL^?H-#Vx`_>
zYt3ex!{J1ZY+LPim&fD9_GDX~f!%i~L?XHN`u(Axe}LTR^!|jwX!7__o00mXNo9uk
z`IwF-#R&&EZz2X&_PpU@)uitXmh~5_xbj@0$X#xtD@m4{J9<?Ox4Zqph{QK<%7#1R
zW@xzm-5rmob49Xw3O${!mm{R-3Nv3z50`sG(Zr~NUB!pni%B0dTf)xYj}9mEcDH=&
z{$4y<8w=$N|NA%dj<e?t&+gt2_0<6$(}jY<uI~?qW-a?KKdwUnBm#4IKoAZJcR%Fe
zzI$H8M+z>yKRA%a(3y_4jqDqojHV$ZbO3h%Br>aeKmf*uM?WNdKUY5#zeIUaG^tZi
zZls(QSH3?qbGczO1Zx|_x9f@s;V@W|KZg(qG#<y!%zl>rP<UhkX36>}d&f}7*?6Q`
z^9hz9H~AAFT+}~tPm{3Ut+>S*jA1;&PzdZ&hftg<=VX6G260Rv13Je8(gEh07BsRK
z=Rj*kC~nLAnQM=;?0+3;CTXe?;RTRL{cEHF$W-+ZP;fElmcF%~3+HA1Hv`nkh=Y=q
z&bX{m#gIj0=U3^o&^)wI<=_SUqxe__3w@KcJ-n+nCnQ9YFg4!Itq=ro+F9ldYd?82
zQovd5A8vEms%j#;4JgOHR&(lhh%!<;NEqfzvd~d7U7Bb_-pjq<%pmE4P()&U@)~?9
z9n!9aFYj-5ejWsO<ED3#yC52B;CEx+C)tNdgY@i&DfY|DVI%i;q{kV}aoxvRcJHn4
z<F*k~kMl}zNKXq-@C>0gc4sk9OENUmPs@PtdFy!<)$5iYpJ5CyYXdnA&-i-&H7}S-
zS;j6Pg*pcJWnkTF>x%1a4cA)S$o0!M%io^YJz~U}iFF5z+ICdWSD1F}SSCgugoM)|
zkG&*|8`0xle*KJnaOUiE2Xn3}i`UujJpGspuHd+jnQCIykHy+P6o;$Hsa%)qR{sfa
z_I7GCpWAn2)YzLE4AiFkC1%Bt$B8l+UygA$7T>3|FQS=eA%}QhgB=T1f$d$KIsxAO
z+dqEqjyZiZZ&KB>VC{!!)js6cS)q)-5B+_t|2~R{Aubd8@GzMJH1S9TKVSKw$?oW2
zU0}bxzw~$gs`p3vZ2&_jm4Xao8YJ9K0vk#;2%X9dO+;v<qEL?d<0T*zPz{gtcVF~*
zM2Vg@Wc)ZRb%@}?H0*l?DN4ZEP~wky5<Xo~)M&UQasWl-y$%_M2H8lQ@I8yFX%q%I
z*{^TvEYT$EU^4OAHDTt&yJ(toW28AtiRgM|N>ijD+)bK4Jbu4ovx<u3uecN89+cx>
z-ipvr-z0=livI}xDk4D!#fhu1_{W(R^ZbCv<S$f8B#ku1U;skN8>2J3L>8gYyn|j7
z&}nTicW7m`;?w}g+?S6ObUCDxT5!}D^>TT179FvAZgG~$7-nR1Lm=&kMBGt&=otwR
zh=(5J-OEpPH<KX4!^y_JK48LL0*W#9JIMSCMNPoVAz<&ch`s3&%56p>C%je`ea=sP
zevlF6CTNsnewOxlMmwv2w`lifsw+H_uXy;?l-I_58i6vQ5E`cRYsiE~G^HpuFe<=6
zEV&$vP8!7*u7HV2l!Hu~U@@x@mJI85aVA*-j7>bK%hWgBmd=P2&8T%DWZPE#GK^B$
zIc6^cDCJ1lPgDS_7YiY9iB;O>I1*7(%Cp{pByk908G<cvK!R<x0<W?|&}_-9_qjSP
z?(d=4lqzZ~7|qr8B^;NhYEn~?^S!7QaVa&_J>AQ>0N#jhj<{M^f$xT2DPlbm1HSHN
zrzaPwYyBrkc&j#5!p?J=ZG!;~aU2q6hjVg6qH(3FSX-MgSz6t-ao+qt%V2aC=VVh~
zaEhmc@l_|aAne?rYadr^4YIfm`&AoDFDv-0PaD@BS}l9ch-?)f2P;;D^Tw{HnZRh5
zT6{DPZ38|xd_@F^S9T{g{o8V{*;RVzxM^MAx1m2%CnJ!&+LA&E2E6CJ6(`|2x|O3Z
ziFW`7A7F+8?V4ILer|)uhv#l^4N?FTo#A1?bWZZMPLN2HLRmxFps6h$8yf%q(R^VA
z^5`%+kI(wDo91vFqIqCO)biIx>M*rk1H&%fL7_3$sOOeLOiQo{bN<O-Ys{{nq~|=*
zZF=LxIe{Nt!8j>-$GD_KNJ1Bc>2spukYboq^4D?{;IDclDS4x3S@&^%?H~tTsTe6p
zj)x__V`}hWW@SMa_E+QkceIhmoHn^r7_b%SvLho&C{>R;To#k>ij7DL++jStvNUJE
z_GnUEYPn9tIVDC<S3JKGNvFQioHoi@z|nKg>Z4sP%EwVIHoc{M*t&Fp)nK6Z)1uV~
z-+w>Eb?WAbM|(GOxn<O~)-?y(V6w$&6wFbt@uk})<5e=tNO+nkV5t+`xt>bd+9K}7
zYbB;Ruv!{uSu`#0#I{y;;&z8M<tx}!`Zo#0S>5(`qsb~-r(pwohdG$bUUZIa``XU7
z*Gw1g$4l2<9`b7sw4lS+i(1Eb(}uzO>yfa&yX}W<>d~&uJ9R|wee78m%b)G{c8-DD
zWH|01x;H!9+N~q>SMJtH0;kq6q!uR0dr*q4aPNw4Ed!##jN>^NI=SoPP{^K5-<9X-
z3p3}``n&CdY`ujer$D7*xL0I}pev_p3kf>(+q^3Hk@oO$kGHIGrETUp?A0S*qpe|s
zdd3X{m2ebWyerX7{?*2Sf0v@yaO>OjtKn>SNBv9%9BiE&@vFdC<raI;W~F<_?dSbp
zFXrr7Z_&}!rn|7)4#G*Btr>6dV}2X^0(qawLfGq*g<JQOc-=b+s#{onyzhmY|68~M
zZhiag*S;yS*T!mYmjiV3-LkZgIl;fTZUH(px_?mxi8|gY#bCQpui;}=_REnn4^3Hb
zHR0FQh%W!Di|~Bk^L^XxoixU`945aYvE`5Z7$M%QS#R*4x|k|_4#|%+rOz_I%Q3%i
z<+#zNlJM>R=N5V<ul{gYZl6njorb>Yur3<vc8_g#CCULma0Agj<d8-rk;f5nas6D&
z?7Ll*{9S@*$~l@e{FR4g7`fe9DT1FQgGIxn|2+jU!|9M{T99c5hy4zu9LJzt^9Nf9
z1pE&CnT{dbZfSZNV*6xoq#-SKCa2b}Z1`l4`X;6`uBB%gDxT~QDTN7a4_0ooAx~C_
zGYuTW2s70TGsG5i;11&!=gdZPPLm83_XsC!mQTP8*;V7@faVxdwpT7UccY1b!<5f7
zji82=+h-TqItYt_i-2v{NYI2nUgd`}m-bgT;Rl}u%DwqKmk9M9Nhf<~rfNF+m0C1J
zc=+Q6<(x$xH_6t%MGY>7<Fl)zt%+%kNA|Sw?G8mZ!07bTDABXq&4mQFd&Feo@=m1d
z<7h?;Ec@o0L`hS|{=$t2TH&juknN4I@*E5IX%9Lz*V#LZI4F-^d1I$k_1!3!d3{r&
z7jwlMvA^}Oyr+rJXp-3~LwG-nfnDeQ8(}pK6FRyUvdPI)W*T=Y<*MIm1YhBnu@;AX
z{AUp^0j+`?Lo1%dHDRGu4x+*sM}mOJ#Lwg9PbF@mH(TOlge+BsQ}ur0)oQ|eazye;
zY%6Zu5>1kmSkl@WgZgTWIH$oGjCihj5SZCB>WMq)_4gk+bvYSeLQF}V?s`h9k&!-L
zsxdqv4pzz)T$I<4WofGtPFg~f35O;|+Ou+MJ7t=iC#%4F0t=q5bx|tCIh@xs!N#(!
zyH@&)GGT>zdK4bfrg4CnbTYq~bPKz^qI9b5JInY3Q8fHdQC4El|CUJwX%-J#juz1;
zO>#VZ1`~OPHIL!&eKZUfU~G^X-<Se6kz{&+1V#lyQ{`t9F!NNBr}3SzNtyvBh~#@x
z7P2U*1S=`3C<|&Z)nNnOQiGlsZAv8`bM<=~;|W12o6fgFvSFUwDP=0w1!9f`B06Kb
z655}?jmcwxhWQ}ogAAq<<6J{lF!{MBxq&eN{gC|moN+OOV*nHi2_KyR606BzL#!~%
zr*Ubdr95FRn1jmEc_6hlri>gUTXmwE5T$K9AgfQLgeRfq6if5KE&ypU9;*~+4^rYy
zkQ*;#y_1npc0!<`Q*n2aK${d%Z4d`P5dH|wtN^7^m{`1r$kSv*pnj;KeNfW_Xzp0(
zk`IU?$Vk#yDKbTAxr`}<FY=OEOX`DY=Zta&c$lF@8T)u>ep}IKT#!W@GaYFae*04*
z21Vv^K^*o#?DkQB=A15Lk>3Glw?X@d*U}1k!cZQ-KjcX>Dw@lCKnx!M02cu86G>fU
z=<Xs}z8te626H<^i=4rpnX(zbV>jqufPkbZij*jpYSsdJ^fK&Vy17D#PGI&w$jZ(I
zmcY!!;HcC#lX9=O9IXl`R4ak9kMfsy>WM@^xeWQ7cHZA}=1Er4xF`l!RzeVd$!r4s
z`XK#8Cm9qJ0}(6L>I3<80Yqs~Rbmvx+C*W|M`F-NRX;wxb*fKZQ}K*Pwb)|06+#8N
z3Mo<(<3%DP3^@smSQZZ}>0KaGH-dpSC-eD4%}gNUx5=z*KtB3n);$zS<3W9VVWs*7
zF$rSb)IF2LiKR<9v306qVntokcpVpc7Bm|*0!EJLV987nfE+-YYDI-Q$Wj&5fZoJn
zwo!#6-UJ`r1pX>u{z7BnykN#eXPMC=eLknU+0Q1=q$7YLc6wlxkj-J>sS}qe_j-_c
z^D<#Z%zFP|F9N1f1Q9#Dw?<`H^~%&eZ&27I%MtvpCZcMxlWo&XQu$5Y9>xdZ=G|tN
z)kfLaW{wfVTaj|tW*0um9%8MZBHM;t+D>vFKBCGI&zGJc8-2GL6_U^)9Nd^5-I;~Q
z<zXQfUP%;}ReZe`EkGVw$kJ9VoByLL+2p;epMa+>%Z$#NKwhHD3_AH=a9BOJ1s+cK
zTy#XOxYGI&S1y)EJzsU5V>cqN`NZGok(b7z%`U2x-xQ~fcw_3rSorHzJ;eh(Tkkyt
zp<%DQ;Ec>xMaz1I?Q*rNOY@seygLHxLs{n}SuTX-M48e?^P)Z$vz~;N-hWvF0ukkG
z9KFyKeSh98PyR;SYUKaS@EnI87>UL+pzsx<40sJ5s45RUxm2x8=|zfcw%Uu^s=%l8
zi3ki2oxF67el}lw*LzrxfA|;#6Lj734g7vFNhlqH8&94&8S>*&SVrs+;UA(|?kRE{
zlC~+e92)+Ja6CNeQI;Ft^~f-r0+R6$`YDSuYU=7@^cr+ibHa|8hsiVBj9TgrOVAIH
zr`M(N4e85`d>eH@0SuAG6vELmi+Cpc*!bH7YXu;U1tFzZN&&4nM-G<<3#!JvKXGip
z-Q^_NeS!40$-=r$1Z}P_86rIWu2Z0#DV)5|aSQ35M4v>U<alVzc+iD@Gk55)&d^@4
zI2`Y!Am;?jGLT8Fm*`3^A_D|?iTAkdsHdMocppm9oa6?2HvXqOR;FT{ZG_s#IXZ9}
zs`I_ITt=^KoZvi{;ACapM>TvRv(6^!ba9TaLUXc)*L>_?&YfMlEmtu{V<rdCrm)v7
zX5OcMpCipooz9j3=ufM9*E8mzbFg7;`(ZzL{vyp_Nl%5ur0=LHtalFrk*1s+=OxnT
zch?;s=>1j3`YWX7+FLwkvisZu7ygOOj==C3cFfFCJC)JvtWpl=)6Hli;efffhbepJ
z8~GQ1+AbNZS^nZ)LK_WI5}2`PG3K##|M2M=<3yrJQ!%A7lJ%X6t6q$`!q(Q4xR05U
zV;HS!s#Ec`{1HBevbv(4y=)<%rBO4lKV1wGUKWCnXQyA?)m@C*!nU@RkN8>{;$NL$
z7LJ8qL(QIymhCtDIgQnWHJonbYg_Cm-#JUNj?J_9RW+F=knYj55otSr=)RgK&*xub
zF+H@QHDZ)hvvB`BnPIz-)R5E*y9rIWk+`WsnPZjoI(urfhMBXGWs4Qc;k7Wm@T~@f
zB|fX;IbLo%?Z3J;Q!}bHzwX1Z70|U%a*d3DwBDSvI<Xx_dp+Oz1>RCY+ujz~9<ps+
zK}I>Z9j9{j7VF;fA=eg~-Z2u{t@d4eL~0kT+1~lm=zrbanqIpi+?3qLqIJ<b@ZCMJ
zU4J28bhp_U-`pvf5{s1GsX-R$k6i)S2^hZZ)0eDo*6bLy_r@ddW8Q4;3+#=4Y!DD_
zAfm+Y`0fm)u8@JPn%Rn9at!nx_j`H{#Csxz{BlgThJNeM-tq5q_#MO59_bDosr9U?
z(SkNsQ%G-i+U2ZF)?!Nt4(0Xt=V}hka}OH7R?A*8(OM(&k$36&k7CabByN^r^w)WE
zk2a?cm$_47TU-6A&qCskE#gkv#&;71Ph)e(;D4Vh%1vv5dl$M!&Y%v@@25|kb&nUS
zr^tIxsO-+wL<|dV_PJiPR8q>hG)y{{PyFJd{BtkMBeYHVb{)XW>hcSITO)WpP7m1o
z#WSbyg6C>Btz_GmN&^=ny~p%3i&VN-qx&l696KQW?aS?ncE3yJxMfAs>v)utB=hT~
z$%!fYJ=L2d@m%&>h01-#8wkIH`P_?6qASI?i!;Jw-quqnCf%Z&>)n~FLz;~myW{eZ
z^Szzhy~{QEk0spqJ8b_$(cII&J6HJj7mGJ1b;Y-f)?v5uTl=#2D<Knc3U|={XRM4D
z@Tdo`aksMbZEQ@ZzBUg-MC($5cd^^oEPW5!HxFQ<x>NSuJE7ah&zY4bf%|^)wlc>3
z)7$}N)JF=`CxY2y4yFfj|L5GA$7ye5UH*r#+VThRnJ4!%r{4zM@beSVQ=h`q<ECtf
z{EJ-O3(voskcf4yx@XCONs0KI=bHWTH)PlNyQQ6nA4138c~dgCui}tzkG*epsISZ2
z0``i%=QFQTTMvnE$8HAU5=M_nitoXwFV4F%;d$?w`e!-zCwaSW)w3^Rr0;6C&t>tu
zaIu1uQ}>kduPM7#HR*rV>n=o${>Cdl!|oi?$31Xv{~d`B=-U1`;QAb%9kNP)!(pm!
zRID~aL9Igll12TT%sXGndvibe1SdaP&w{^DVn4EoQBD4nzPE2dc_yplUh|53LTK~-
zf+-{r5#(B#Wm5Tco1JuMeQfd*9J!Z#yMKj#lr-jq4dgYr^C1LQQmDelLZzw#Qr)MW
zx^|ejk#+zUrjF;;YDJ-wZU~Hdi~Eb-J>z~oP2Q`jJL7rrNCk#-qz7asMdWd0`57h@
zyC&KOIwzDCr-Yb0CMCJ6G$)0?nNn)>s!KY0J*y;&!^(v#jeHxMI(tk@{8I)~#;F9y
zD%|@zvqv@;7ukn=HUM^uft*`I^Q(IT^E@-FdrMMBB^BG7=U={VntBdnFW$vkLT`Sd
z4iDWjU-K)<PK_bMcEFb<gUwkz7*o(IXbS$bId*j@CieqZBpr^p%~X+JVKF)Ah$9xD
zVHK&lfQ^-GX3A;`Y}(-ttpz-QsL=BTlY%W*NWC7<B+}?(7?DAki8G2)W!TqnF4W9c
zqt!6t&ZiYhmBwlxHVDo;qtUoyje2e<)tF@pg8HrmoEAduwy&bUTALHLioGr$7-H1-
z4PCWTuo{h$&#`m+&<Sd{((R`C6RqKME#KM3lad;yH$Lp;G*moAn!-w+H7A^^TPaJo
zKK94Wv%FhPuM{+_xIS%m8T~$-M6+eSNibM_b|jzl-ej=)W=}2(45x|R+QBx#Z+02k
zP*d*kc?ln0u2-3Ub>@Nhjt)~44c=Uxro%JpcJ2TDqn%U2k!DONc-f}7U4kXi5js94
zbWb}fA`He-Z`c!A`}v?Bum<8n4&w{4+?Pigv<O!u*Qd$xqfk%J3V>1jy=Oq=_lC|5
z=uA(D^w2uW3!#H*B@Y&Rk|YJ9M{1IDs<=MJ#{J?PE{MOR=Ol=hk~!CtRL(3RwdfhZ
zPD+aMJUmWC2zt#*5sqTr*8-NVQf6k1xWJ_Bb7-9;%`efOW;mZ$po;Sj(xB#_IX<7}
z>urgW$NAj@O9LV&GAvR%e}t$-#AxG}Ddh5&o(IcH0M24zBW2dguutCC5=>yeQ&ZNs
z;l%t&fm$t3Hvre3r`Q**Uyv26@dAnps!@2V9ScnHWvef8(v5=3$u!3+t5~CHoF=G%
z<f-#7Ew<Jg4#%gs(wGG1cu5C^1RW5yf3H$Dx<#vm?i){RYG)%53)|RjEU#O>ca_!^
z4irAs<)9yAF%70>@G&$S;i6wv%-5*RxuES<I?z&x15KGWyt?V?o``JjyRTF-t-7%C
zbajE4hlBMFniW&mQ$7ihQ6TQw)tba5JNm3C0e|(ThP8#G;{s{A?UmUQgeoB5dmL+e
zjEbVKK^rYPrHfBt=4VTm4b0WX;1@n}%BW<x8`Fm2Sa(zZpN3ejVOPbHunvo8{@+^;
z1UGgY;Fb@04kt3Rw#HTqCcC@HprR@FMR5zCv>r_bM)eh|G4Znj({F#@cNwEks6~Z|
z!JQY`V`-QdQlY(^_RN#y9Y-x$6UECvgJnHhLdX0*jzvD~7OzVJ%|3F{|2tR8ZoFUI
z-W3rUe4aE0g9xATw;6rIp}lwb-uGcEJI*LA>ICjaCjo6mdsVkeuGg}%LOgTie*YZ4
z!_?zm1N~@|4DPx$`g$RM5WUOhZEpPHdNQjGye0{Go=f3A;6QSNUD7+nv=D)Pk+5a}
zY+FfiQ$pULdJ`PwPq(KDAI<;x4bU}kfji3RQDzR~T1r83hN*)?t_iZwD$tuMD<Osg
zqXDAs41RpgixIQlTPgn{yQbjy0kDf_b*PxdBse2_ScZ{TMl{5d_85j#fDy8HK7>s<
zOL+)U_SM-`vfa{<WGr<wv%VtQ_G{^DG@Li4)-@*nwod)B%&dDLdjyT<A@Hgk#}#sP
z2t|rXAO%nf@DDh^+Wjtq<D`PG<DIeU$JGg!V?kC!Uo!I>YeFK(K0Y+$_zAowDuZsD
z@SS{y4mDCX*ES(xl9~KwiCa#^Yc74ra08!_X9DU*DSfiJfGsJb+hM&icr}I+5KY^y
zPb-l*;9|zn*8#FoNQ(I6k>OH%6ILr#Nu&Q<?O2GEg092}IcCcj3ddBme^~_kC1P3#
zSm04PCfMZsl`|H3hn?eoP)+_uY56M>PeJR!sfhIxo;ihjKE}>Dv5h}<iZMz#(Ri`Q
zlWraNww2e0AsXGl6e#>$rYcsN3;?u{umdxdgsm9WH_w-(9A)G=YnzK(o+?!Mq-WjV
z6GBx(cLBte%Z}n4NgdVz4c8addN+;=gku`+b)NYg@NsrwvWeDq1%6|%BD0AAP}jM$
zCphY^+B$S!;AiFhx8oE!xDnGQfSX1qc512**8Dj0+GGcedLELCm5G`5uWo$x2IPrq
zDV8vl3b0y@qA8vE-wD;ZDD?<TN_qsL$&E~Pba>DPom~Td1%D!EHMq%_-Z1t|M{?q&
zje+uX3rTxp>w=R_m*C<m25~EcOuhDNbJAFnX4+n_wR3QUtZ#2)yHkd%4YiB5nFwa~
zfe5D~(pB?Q*J=SEfYX^{#>A&_-_6U>+|E{n9Pmw!U~~J`yWsi8UC)}n=M!l2CP3Gl
zyUL;z_oxbm;-Je|js1bSR#ho~=fq`^|Idu21=ZX-MA}C$j}l=B8;?0THEQ8o5od2@
zKbA!e-wsM1cNi`9ZBzptB@Q3bsQQv)kEM<Y!LDW*x*)5kcXtz*5f9Wd!I;$XeGeCp
zr$*a!-7t8go!)z`*C3TOs;QVuU}IWP&uuTvy?WinS96LT44thnL~p`=m?3WcLLc2i
zXhDRmT_053#IUry!NI&b;UxGJdC%WqqR>9gPRGtqYqKm6%%i~2WLreKy@`|BHoO0m
z(eWE{CWFSwv}bN@U={FG{l_6o>96Nt`7dpi+Zkrfo5)hE5a61VR~+rT%hcU(Wv0iC
z_K6p^UnW&nS11I$%W-zevDbK}uNTU39FL~XdPS-xIvb*;8!g+}H*7dML5+Jazn=-N
zNmKb24tF@Zc#ba{+Ii*k33UrO*e-c2E`Z!eEPi?Nb|LPg3sX96eL9jhE}0xlW1Asz
z9%;4~L+1M+=%;27lp~_5MN8z2g1@NoLhDHTHene!bWHq9A@8esml`8;VBb2?bAoV#
z3Z~7+LZ_wEV_22k03>b;=19r<tl>X?FKn22NagBk5R3O*vS_)b{iz3h`{V?$>YM$@
zO1;vsHJ%mWGd`E<j@2nsT>V-pisbY4V3Fs8h0HCg#Bm$eq<*vU`)QmG)1~I{>ABY7
z^;%t3dNxsAGlv&s5>|FB*rVFXf1i76VXhdSi6gnW-26UHZSmlv=rit$;m$JHwUcst
z2Nd#;s&&+S(R=0#VwfQc$I!p~L~+k>A9N8U>ID&4Q0-~)0b9ES@49jP_Hnob>#YTL
z*&2Q0xEQ=fPhGF0+&X6Fb9T;FQ7#^LbilSkJYLPb_!AZyT}RV58vSp*9roHiyzasJ
z%aQ`Ye=Poe0Ds(>tvvYdo?jCkV&Z`tl1;Dj`l==K&{70==I@sEAGGw&L3uaIgnd7;
zPEs;W)w1t;h;OF!x@=Yt6AWKR@;EtEpTBbH820SpzXu;joF@F&pW#@>-b)7#r1{=j
z1=dC242~~JW+*Jd(jGMOl)jNnn-TFGvi@%@!yk5rYA$=cxcg|VSiSoj-#(U`kPY{7
zvA_{VTFOR<GrK=ZQC7zHu<cS&S`EC8S2y(X?~!y-VYaXD5mLU0V+FFq+V=^1f8Rjw
z;T(|oaFfmpli0286M@A99rJQY!FmO72P6v^L_pjG_<a|ooR<<(6X=9@t$ZQ?7QXsk
zc>=W5pIu6;LOKLZ7N2|uBSHY}LH0Ru{F|Zfy)1F&K1GybpyfX0=mDKXnAdh>m8&Y%
z770z5F($_z`2hYmP8kZ`!bG*%>7~4_2K@4#bmq)N$le6vL=p_nLiWVurl&$iC;iCw
za7vUzX&)1HHE6CuJvM#uOUm8n;{zUvrK1WHAtw^blUV-2A|gyvlWvo>CGuqE0|D&9
z@0AAZoqL!M`(kcJ{r`Q-n2SYr(nTxL>C9)S(En5%72!4p%P{Eg=16eB9ATjp6ND6R
zI2Lb>Q^1fOffypCd5yn=AIQZImW1?82@~VSe-?ATOiAgJyDH}eiOk^c7m!LQVIk~M
z=aR}I9REhAv~w7iBqEi+!KK?#4z(Rkko!pzt9E6yXWx9JxIkRM6DF6tufe)0M4F4q
zh{mp6#KcIc-bqYWNUjfY^nGJVoytry5mo`bFOo}4x<IKYXK;r%r&n^!sBea%Z(epR
zZj4fBbfahDMM_<Xi+oc;p|Wb6Vq#Wnrv74Pj-aEfV5D)fAk0k!v{F$eE6`7+5=@m+
z%QQALrd(2*(<P#!>Mo56gu6aIaeSaei!pxm1p!e?g}~-066#`hC)mc+%i<Vs(tGOb
z287w@CaJ&NLJh%ItOk5q)M>H9%68FiC;QWchI)WwANUi42_uIJW6J{-?#5%k!ibA)
zVVhcG7VPpsqVh0-u>d;@k-PkR<P}D7XtsqM{L*~3K^1?5l7jSv1O`h-vNP|6BD#;{
za4#x%T53W+3$)Nf|3XXt2lHrA6^e~AcZ6anDESq_vtCT{Y&;9KoN@%Hd^1PP=#_Gx
zunE&qOS0o6fBbXVojewF3i=J1K+(JwQtdyjl&Y2C=i6mI*g!mOQfiiS@ydvJ6!8|U
zjfjsps;IN<`WO$g=|2<kzo0Ulm$TCuX|v|e^cXG14T<87&m1@{y*p{tfV71{R3a+#
zhf9TiUf^@)=`*oR+9-&NP>@-K{&>b9b*lQA(K;~91^5zcLWYw353<BcVKE|Rx=4-|
z4%;+xFkc-2%p(Gdc8j2YoHyHC++$VR?8{^W%+(#tf}E)2%`Yn9b?w~_t3Xj$+_cA}
z39KhT{Abd<TFeOZs#Yty%!Np8lBzk^lRu;+&4OpC0O=&&J^Fh<#XA5A->@EG0Ah-+
zXQirVhpzXes`rMj@8U9Zw5TgFyRDH9Kf$`@BT=ug$`G*Mh*Dx>OyQ5NmjNZ(r(Qjx
zPd~bGnH5Rrh)+7^1aBk?FpWKx(3>ikFpEXI`Ug!<0A-zuF5H6;(L*LYx<cPKVKWWJ
z2j#m}6ZqM5gb)uSq!e;q#YY<AS6)Fk(gzk;bWhOrVE01^Gc2uKE(u;wQ9<+&^+Ww)
zm_ZSSlCxc&v0l@*ogusC2&B`^ydDbMen^lp6~t!HP8xqvQZ&a*(X&<^s9q)#UfQ@^
zQ)l>QbKMPqSiu&Cyt{_Lg;?95JNT%X2GsPMlOBnhj(=xpa<kz4(i<#|-vA#S85d3V
z(l5W*Li#YwfNgsiA-+vjLEhOyk_12@^+S^MAH>-~()2^k+3cd%uHyGY8QLyU1>CG$
zp4v%Q&zu}pQd^Ibov`9aPMw##-E<ZL7R`ZJYj$^3c2|XrJqUGIxwTN^Hee_k;cIQ%
zb0UZ}t7`~mh#AI!%h8)2Y^H{C`V9&6&An@Hk$nc+bR18Bhg<UT1@L(=b0>=7e5UHv
z>9)JDZWXF-iOOzQqW&G9d9~IK>WC4ly!N{_y<_3|S1WZ<`c?HxY0{18zlF-D#Ej2}
z!&e>)55wCfs`|rG`WrIFCHw#+`#VU<yUT^@J0gf5st%uS4hUn#)oz9T*Tp|zf^_uG
zd`ij8tyUp0n30-pUS^nk4(wJ%8QyE{-tir-MQbi#><%xOdur?R*A3CQnC83=d1cC&
zvw=DKH%bMLcwvlKiVl#t5Gc{tP@gQ**!{$(wQGa+-}&l^zdfKZ{l}9IZ=i4ka89d!
z8f*|P-)^82ZKf99<S36oOZ+UdVs5`f+@z;XYaCH{sH|b(Eq@@0#$qlgjgG3fC8BRp
z#{{hE)3iO1q_Z-;KKzhfzt=uTfZr>Pqk1`~f8wpi7@A_JQbxRVLU?Ore|2Kxf}?9Q
z8sJoBVH3Oe#j=mQvPi`}jzW7BIZ=^g<4tnn_&3d=0r=2Va5O(+wKf!7!r>me7p!^Q
zIc|KGaOVB@IC9Y>q0mT~cS&jg$gi;<GHkYeV($*`l7IKeV@D^h@zjT&FO2gvw?OF(
z9_JoQF6Z0^aoPxO)(l5AN{YI4e|IjuPmA0sm1JT%y#cC#8Kx@ZnlWXYWk-u;*brq+
zNHH~V<uJ^+oWm~H&KC`(e)?f#6Phr5`=v;r&zA?XWuY-Pe_)ALD57#H<W>J2AEsdK
z6y@d+d@-qb`&9H**ERAG1c_%FfTob-tm*Emo5X?c*D|+2z2OLCJ$!Whj<R;0rgFfM
zRpFYe&>}>}$t+6q5bBJ5QMy6@ygAXViHc*CMPQ6uX?(hCj_Tsg@lr?@fiB6(@Q5{}
zy<(=QMA+1#7q9;Ic81BLs3~kV^ckZC<B+&)D#GFFMP_5o#&ij~u9?4Y=PqUpHEPYd
zqgUn3N_Dm?<&4aBJ>s-h;&bcZxE|SVLgZ|?5`G0=;<*)hwspI>b$n)oI$2~?Q2ezy
z+U$LBiss@||7Kf9H9OeGuX;A%7;|}i{Qcy@c_G1$t;PBA$Q}_`9HobVOgE%l%JR;2
z4Ea{P_L@@D`ctMoXon)AaOQr-xVj|IAZ<^T`iJghc<9iua+_16tem)*dWr<nlfzq#
zNG?kf`_7&&P|@wwk<*Tf0Sy+gKz3G=yd@eD!$5@*9E=43JUV-6Ms1d>x}))bRF^Uu
zWyQHC=Cz4kl5&w>5NfEDtz{-M%|0F&C86?EE0k7g>nLy9mr((nf(!mBV3tVeWC#I0
z!gO+~?x7Ohd89fxk^pL*HB@~aj|(35>R=9=<99gO5?I=gZwQ4hO*1o%O*|HCeqxo>
zGrRI$Z<MM@MRgoS|K`iY@<bmxBCMWB)j2ISZo+YNHHIG6V`D(Eyd@meqjS8;yd!1?
z0KTe^$|f(%RtD-pFA$ifb>AyXh^LFh8{NCSqAekh$*Ag<1GLa|Oyt=yipEO2vp&Od
z#oWb*_%SWdt2V~d19?Ve;BBVig=z4S7Wnt?AEWe5v{3Pn?0hc+HuthFzVE<V(Z8)U
zQ1xZRZ*h~D-T#2rQ9Qj&AKV0d<vM)#&yT+m_#}hJ2KbsAZ`Io@wwiPvo4B5snO=t_
zyJaiedhOYz(7S8Gk^NzG#@aRp!O`AB8tz%~J}CvwY$`q@O+Led4g0!2TB{$d3El*G
z-Q0__{AgVTu=O*ye6uE&1Js*|*ttDSdQ;z|6Awz#X}%Wzb}!>@H%MA7r701M?@mg-
zE*sdb5ZEjz@&#p_b~9~ah<;5Nz|46c5pD~7srAhNDl3!j>9C;OA`s|{FUVJUgWpjq
z&ikr{c!zb?_gwkC>Fx`3_v{L;+)rE3fQ#B8r|#?<r!!v@Z-U*8Z&9Sue{2`9Bk<I_
ztmL)=uU%3=tb3n#nO{qKcP{z)<%01~{#=~yq}KHoOn;@1_TH*0F0%<{UkEyt`aa60
z@BS-v-EAM@bUC_`JY^ySn*)Sa3kXH{gn-OK{Wn`1yRZCV9<HMrbM%rgY}(tz7e^r!
zuhzj2!9vCg+n<R3;aS$Ls{fka_~0N8q*6~JOE09V?baJ_<yK!}Pe1I?S^geEdkz=g
zM|`IO_Le+GK5`WN&T-LH(<>0OdC7VR8iHVX^kFaZzPeZSMI79I>dXnrK|g)%^)ET4
zdX^k#*L-VJD^Fh;3U7y{MaQsIm0rPJhat~+d?Q<><;6eNbdk>d2+bU+v>JP}auL_z
z^OVB14t=0ST*JeEUPG(|b(}{7byK(GHb%N7ujHore7*D-jw)-!tu-eZA_#C&<$Z6y
z>&>LibkRF+{p<r@3HlSm+U^?1=K;W_c|!J32Im`Y^@^eleK4HAnrmSwaiGty$Aho*
z`wUU|s4BkKYl_GFi88WCqw7v!9F^0Dm^q<L*pY{t7F#BibgDi$Syi{OmZvcOrGj7P
zWRstcV+iJiezcue&AW|bd@fccTdv{s{OCrQv~DgkjUJ%kZLJom-&sO1K?DNi+EbN$
zWiDcFGIP0O0e`tKt0bHKw(&=YK-;K2;h_UI{9-qT-6j;Pq>?d>Orz%ZV2*F<d%tGc
z14RYpmT*rOF*WFnY-d_T*TsNijNR_c!+l6-7Tb8-40irezC%=qjc<2{MogRFGQ4x=
zkj2@9STNY0>$Y<AoeZs~n|CjIJEKl+N3H#RqOl<lN62nZmiIGpkT~yb-|hJ`_;I*=
zlTo)uQrGH;K{8+VdK)&u+mnYo%B*HD5Iw{Z*k<4v<XC_S6f)v440_C`CJ7?*YtE0r
zrfMn#p!W`_$U&Zz?b`bcsQn0FOEo@-B+pIJ_ZO5o)D2>~Ia^7{&?Lu5oL)Z55$AR*
zFYuv6;Gy6ZCR#H|jhi1cWHeHD(~VXdEgwzS1z>80h0dp#{F2kFC{1&<g*T3H3rZvl
zH0M+;b9RvVzLCxTa$H~#JVRDiaD3K5?c(tO2P|g6?#N9%t$4F2!Sq6-DRJL2JLK~M
z2aY8q*?L)3P*6l|m{pYqoLiKeqEwt$+KWdn737F)04qosPm;@9C%k|u9Y&E?4C$<C
z)(zegn>x1DotlZ1k^0_dkwqoR$PLqtG?Ki$aa}dMW1=m!BsD5@)zrIw<E82o3Kxl1
z4GY#xO=reuHo32ym)iAb&6kXgk6$trgBCj*b$wH~{Pne;aIUfK|72v0+Wtz&+*i->
zX5Y0Vb^#e2pRN8jj1t0l-j~M9bl=;&NlaZwR)u!s&6Yr#Ai8wuA3I~shHg5I7{lwr
zfVeMZoTq1D_}S`i8!hzbg_>{_ey?~kq$Jt^kq^r?TTEK~<&xwLmKAgm634*AhZDG)
zXuaE<IQ#jyPd23cmamCv$RiF*a!u7w@eu@!94mDLna(R$aWSuZYflU`-Ae+WdW$-z
z4EFn~L_}|ZWOci;K((@*<BD=NfOUKNDO(piVy6>e*;ByNqZ=kGE4}%vSK)P9si~GN
zdl&<cuOO2P`K4^WujNtIRcNMgJ;a$vyNxB7@9_adVBm37Mk~}f$eJVf$Ge4Z)|DhX
zrK{`M!g=Pcani5uLhQg<M)32?UK9bKdX70(?5i~8JIfYwdeYhqI}ya$KZH&gA;Al`
zVvqc-8tb7QqwfA3nG>1cfM{)ux^}%5>lw70>5}$yE(gU7-zW2I3XCI$!ii$}Gg$Xv
z;<xY$p%VGAl`_1TMOVYXp8&}3xWij8qWCW;%1>v8$a<|}EYA*(q)&@*p_U^ckvb}|
zVIol|sGdX)7MvHIpXFp?5}G9wGW3W=kgK>Os6lA4-oG?_BRHwZi5z9MzscusJPtZ5
zvjmM<Cr@0NNupOO_zQ)|y?$6WicnqJQKJ>(8Kg>5v!Xf04d<a+7fbn=!h~xTFHqxT
z)IQ$NCnm6Kk59l!y4wB<5|mBK{ITA}lX@E(El1hMt17E~k>Co-KH!v+V#c&XRSJlS
zoAhuS5T5uG-44<`%{7<%PS%v7?NdUl7s0~H*~H*LFwCIb2JXL1K25FZ_=Df^PGynH
z8gK6+#Z)gfQvrI+4%3?8txDk)=vEaBNSen^dy^O5vaoPc{Vo2AC(iPDn^%oQ<9QZ7
zYerU<B>bd8kIkE*7EzRkI)W-93FM7=uy-B)RsLtVO~%EG9i614SSp7VT{rb8ziW`3
z|D#McRf1M?NOM@g*D4u-Xt^}B@USvMJ5}Q+Q*t>Ee6vsTAN@E?xsqf<+2zq08<FF}
zA1$O3_NI%o&(KxlwAQ*^<*E4#YNi>3+s}<M1GL2;eEsbU&c_nPCaRTsJYU+@x%yR?
z+~<-6TRQXtEjj~1mMXtPeWe2Y#ksKNME9Sz5}Cy^;PIO$mXl;-<garTQuArn(3mS1
ziXyE6X%{nQ=o(|3%eChGm-L<r`ZuqMHO<6gGSOIr)lT(QH>KaL8(9ck>ch&{&zv2s
zYi9BU1Y2+wZY?*WH_R!Qv)l}?9r6i`-uG@4U3jl8mM!`ovXjf#P~oMvcZ?3<!JLjW
zDcz>%r6vuPJ5bV1MZ$BYD*I9cbq_bHQ{UsiZANG=Y9}Z3PD`so>*_+)E%(2P+gr_h
z)t`RZd1a6%Ys-4ANh5PfokEw)@9<(H9^&@@^c(dMx0wW`zLh!UP<3GQA1Uy*b1e2B
z``}F(NGg$K5h^$t*%Kb$oM{db-86XAA(&!=5!PFtmeC)*Rw^#)xg(pMtxB2c=Q9qL
zCsViW!#Hd-7-u^sU0`6+ho(p$VcIgldeP{5p+|LkE7OV5EJ>Hrhxi8XGx^g^I;e8m
zIUj)GfUt!88YDAfp~bn6*vZVABvuX2`S~Bcb&-?ZhZTbhd^$HQks{X?P5kRJZ8NvI
zB0khO&kweSvM#~mU*_D_$DJxYPsNjArv|dx8$B=2CXS8PDQcU;3BOUse(@E;aHdUp
z=G+w>8Ba4AWh`u6E7n-(RZ_jmP~^P0SeN{?QX{~#Tts$9`^RIa>#V*W-NdK^?RN)2
z)heP1<I-))9x^cHYl#Ng<#9FHt9}DF;Ujv364#Lr%M#IlOGQ7slV9kB(QR$;b9T_k
ztKAN&Z>_)0?D&H!x5+J#a3Jm5DB))ey4thHe;DvgRspP25l;yf#5RZhpMWJ80tbu|
zPxFMO(k4yH;==I{lrX$c^@~2UQt{0x46%1LTu0|U8*Zh1?aoBKnrmw*yJH2m#kCgi
zmtLH(<H|cX1Nc8ZEfxQ6u?)DyA-Z3w{&QNEuX(Xp@-5MZeXZomav3>c-=`-2Y-L=#
zE8-$r@!EBm`o{JsXy1IMdRVi3NYT~h3_c~}`YXmD=-psI_!Ktg9w3eCGi5<}uPyV{
zY!G{;E1Elx|1XZj%>UkLbqD;S<3jVy=I?7g+x{3b>oU!S{J2;I-ub0Z)Q@I6TR))h
znX=mRwEA(iDRs47P}lboAOG(fW0d3xyN@d7*5N4C2Xf78j|)eJ<yV=xKRS&6{VH}J
zmsJC_$9K5EtupFR$pEG?m|<9&_h#?mV|V04>mpXE>oP9c(ZHQF-wt=g&9<QGv7o63
zqqR0B-1NW%Nf_D)M>|P_6jjp6GW(!G8UY-O15SoT$zY#nU#>NGW=)pj1J7<)v&#b)
z@R(}Kk$e=6@5O<~6%;8|x#J5Yt5kZ347Zo;nU!D~r&4-|3a-z)xdo49C?{k<t%MCu
zDqMK6sEs)mLV55w4meoBGR)p1EFi-FTaY*QSy-ld1oHR~|8aj;ngD=@M^8A}D)tW)
zs4(!TGs0nnBb-oZL|9U}7o4QOB285CTjUT|co#{e%4)=kL=;76q|Z<|s44Qc&5pB_
z;UU=FY%N5u(UXqG!@-iS96E9xQmJV?!c@vHl^Wq<Sojxi?1ES<K22agoYv%-rxi`a
z;9D$W8L=aKK=oSG=?N_*m&gw8uNDu97ROk<2&y?Qd;imbiPi6EOR)p(5pq(2<u38u
zxPGT_(kX*3xb6|%nsE>4-YZY~sPGAMlz-$>qs3EW`y#jzn*+G0gZCp?qb-#lxc_{A
zw_tbv^9c*&1UJRupfM<;C0reI-I~R_DkpG;C$L4*;CIBkti;1d8jw`PVsr#jbl3>F
zB>sktMvxX@0wv4CC;gyG@_p0A3lF&G4(x1+=3fxyI!^$vCFre-3*#lHlw*Ks^y|2z
zrX!O2#OcbIA?)GP9BI>>rPEx2Y3_Jwz0F)TkAe+Se4u3W>J_T3_M|BqhY{QuA!#)k
zV8DP_axm}@_L&zjG6g3vEiE!51DN)&H7x^EK*J&>5k57L#sv2%=yT1x%u=KxJ$ddd
zJwDPlK{~UxBC7uJrvnheArc}Jl+^>Drr4pHgO@ak^(Ur7h}1Ka9~fB{k!}S{*NwzA
z*NO#)@OZh3nwfUMI%q+BtIyfQOWUc)>CAv|@Pt6@&*^wih2P8BtH|K+<Y3U0xZ%kF
zd8T-1X3d^wS1)A8aLG;c@UE@t)$<rSFWNw4=0&?@*>~i?_UAZ(a*;n$kqmR~D<BTF
zAfWs6zZpX8WE6DM=9tjtu}kw$r8xJrOQ(SHuG%x3&hfxnRwdLHP+mw&Aejx*NCIF6
z!bU!He(u?O{#6GA(p@S{e?D{o1OhVzYyiY1DCZ)w7}gL1QMlOUy#R;S%t|8Ox+AoI
z&7kHysTyA7Z$_a7S*o^7*1b}RokUST4poPEkOW5VHcu{0K(P>h8hm}0Qe{5EA8;0Y
zK7^b$1OiC`epIoKOi5Xr^L<81vs9k?g;0oE9^fM0n77pZxKN8HPmH#RTr03hE4$4V
z;D}J*fl#n7ox7O<aoACWk5I5(QG{Dr<eX6&$`kL2n`uH-5r?2pJyBvf5te+OE#s<f
zaS_APS%L8w6T_{{wNb^>S-B%!>4u-?(g}grnFI5qrtYJ1#j8B0R3j>i8Cu(`d9*q<
zja;e4f7qph3E)%bSv?O=KaEhRK@C@Uk>jFW*?^FSKvJ^*UM7lA{;eOPBeE_GP_<J@
z0^eSamY!~M&IhEdd(Vh?#jB1q4vt00f&8egTdzPyEL=NBf3bpCtIS6P8#Z=nK~Qx;
zbb)fD@T)2j%Bkpzl>j9_Gt-RJbr1;R4bv)gmy-yk8_py0Leed8q8mo=)CeXU3=m3H
zU4#S0AbM$AP%a9DVKQ4g{~UM5n~hZ!d)9)~EF+uKuVq^CtD5KQOD8tc+e&_@Uben_
zWi(w>)Us!vrB&haF<**R8R(dt16sQ}qei^)$zB+K>Qtae)!F`SjgHQ6@@|JG;B?Ch
zLm?mw+pMnyHZ&u&qiVEXMO9lwx8mS7(7ZHv^tYFJr^R`<0icN?VSgYU1T|o`>Pu^B
z>sagZwKrE)jRJ~7tGfIuY5~A11?ldQs?NU6R8N|Ye*B92%tSCtM2BZ|QD0+iPIOW?
zUH+~~hYmhaGRe5^vJ@Qds=#UYlH#Yr6-<xY6T#i{`*+Vubel~_#l~c_{H9c^V^s^l
zF^P`jozCodiG*LXwNJYigTOlZ!cSnJ1&u2D=A-s0x?5yYd`A|;|Bwlsr4J3&4=ESY
zj%al=)FXOXBDN{t9oUDARA+%W(7GvsIpw%!{kvn6Hz}~heX%r2sy~Rk>wTn+*<?__
zI@|njKhgLgrE2H%o5$>>GA^M_j+r9jW*)CkCLdwybQcaH((pav5XE`dCt`Sf=7<!0
z2C0ooK+8Zl>`1DW(^^9BzvBUV0IDY8s8rX;_w^BRLa?^Y-@)3m26#24gMCvumB<Ut
zK54cQ3c}_PV6&9$7;E=%6nd9)m6&~HsL$3g$I!5gEQs=>55p}vl;1y`f6(K~uHU;E
zR%7h6eJ~=MFC@L6<gZEmq^o;zH%+x=)-x!d5ESz|>OfEVxs>jaHBr+(QDie=XfwI%
zM$&jFsVLcd7-dyRS=-g3xl@|nDbpiFI2G&DeN5SzsUzvE8ym|!UGYslV9VaN)o!J`
z(*Wezb2a&kzgiSDo<Xl;-Px<RKS+Q{ctV!*a>Xr%)y@tyxzZgMx{L*cPxDkvZ`zDJ
z{7rYCnuutbJtA{1%F_6UH2y6Ib?&51CZ?7N3^yzMm%vGGwrg|xPO?aVp9m}V500;S
z6Coc*Oa;BC+3)Nya^GNH1`tK-xJXQ)=vLRmRJVbR*BoI8GxCh^GgFjRsq_`rJ5mIk
zj}N~<o1kr9A;@=#zE{$AYK*_=AbW|c21K>Qtsvj=JA+!~=g_OljCydkZsA;Q_fpEq
zcnv##7d^<V1N1Z9C@)l+FJuMPcJX7O_t+vm)jD$rYU)(BLzlsCM0d$0rx`RQ{k>Qr
z7IKb>q1&~_#|3@qm1DKqVp*dHYoB7p9(p|@(*zA^B_(z#?rRm8Gw^+-vp{}rS+vlI
zYr{li!&g^4R$xWFdxM2~y?29$+QuA=3@7CaXBl|4uJn_o>pEd@V$xu`Pm^JwqegKg
zw!WsOrQl>6cN%p0xr92n(Net}USqZu>$6f5))Tvl^wXX!ax*K3;@8(kxysI<xlq6m
zCT-7-@bw0T-r%ErRJpv;!}LxT^M=>k8kE9(%H>YH<vKZyShVP`%I^95YCDbYxf=N0
z#nC-vJ3>^((Y5g11umvFhNwe<J?ft5TGatG-%(Sf9{H@v`d{OM-a9D09YeM*bb<~H
z;EidxLn5NN4Gtk#wk6h^gUgz|8?G1uIQ#I{6%odxb;%<Wvm;)GaOktRZZ@Iog=6&r
zAMV=T|E3K>dPCW-X!31w1{Z9LuK#}kTR^10HebHoq?hEc9NTwRFhZlk9WKiV9v+O0
z!-)Mb|2aeRoZ_9g=j3(Vd9G8B4&1E8e0>6hpnKQ$S>W9`;&42S@|NhHs|?nCEPy?d
zHV)p1966Le=ohX=%8j*}p4Cze<p_f6LzlMc^67+L(!xT8&Fks`CDBKYPV&XiPK3XS
zOY2<=&C7b_Ar7e)^X8MC*i<p)2I_3XPEL7E)dr627Cp{Jt>!Ax>|Trwoj$!KZs1tR
zEs?C@8L8#tuHtqM(+C}i;m+CgPScQk?(Iz6vR%a$j$XU|&Boq{cs}QSPD5udGkE^R
znXz#zNp*tGsQ=FCwtT}5m+Nc(d_~b-(<RBd6u;H>-qNu`j-L&`+Kz**%UllMJi%<E
zB}U!3x7t73%HdPretmr~Pn@C4>R8CYYq9UJo9-A--c|NX0a)`l2iZ7Jtfjv5HtFal
z|4k4t!#N!FX<myPUsOmR-BV?qri5L<D2An#^YYH<RNviJp9xqW*XYjYTQA#fdKYP|
z!U755Qe+_?J?)al?ga1EEtl*N=kta_opB%Z<^JP(Pw!U>_91^yd+%CbX!gEs?KzF>
zwchw^==M=vXNsTsj8EY5Xr2ES`5YbVjc$-2EXsC`@T2(U3lGao|M~w1j*kuN4vmzD
zg!idW*#Z+P`0nfgFPr{>H~NM*`?25o?0fsmocoQ;@Y63!cAt;WKURk_{JN_dIE|~!
zl>E$SthArn6_&k&FX`+{{YzfS<WA7c5B%Z%Ovm`U3TVpGh5Yb8)tjIGbU$Z@4+4NV
zlBIc~sk)wo=mIb*vo%SVxV@)3{{vsaA(5r?6@P@~3YmODqtYq0=&UxcSY=IXT!O>m
zF*xXTd1KAic4iiuaOpO2?(W&?`h6AVSLN|I1ws+>-5l!q7$%k3FeOGxPEsOP#$9$j
zCLW1x0>Xic=0q~0u~}vgF{<IIzQSU*4z?k&*2VVZHlpr@t?mX3=iW|14!0U{W*ztH
zCO_YimcA`cBmR#9T{UBOWBoy2_c<RH`+9SUe{-*zqrbz4U!Ee{xqp5|-<@TT-0v8o
zr|zF5R@(^1^G1)IzgpzZVPlw3mbxg8j7`j_v0o){!tx;G=<4A#E!#-aF=Z{)M1z1r
zZX^arqC6=_Cc(5AQzXHQ>vjTd2}?#!UV@eZl-BDc$D;ZGu57r`)XSh&EqO#)s3OE+
zPBTTdLUY{JvR;R>lzEg-AcnN0Y7LdPtDvwo;ZYrF7AQET-?*lXxOeW|M`IlhPOR#y
zCc=xgF)pkZE=9j2BLmJtQLfN8lNEQ)nt3Z_wt_+1f{ghy=+=r}-Q1P;wYJSH-zqI_
z8nUz1{$y*5bggjaQz}FY*7Hp%wPEFJghSur+&1UO?dT?T42^m2>evnU-Wl+D^ml!W
zWAu*rBXsUny-qJL9(G)ujlPa|EGMqra-IjqlplKXu{X<rI!X3df$eqY&tYg07$Gp$
zjb@)w(p{%pIQ<beoq%b@M_M`uN(dnuxc#MEh`}l5-U|=Pwql9s$aq(c8uEtWgx>8(
zorWI%2V#Ib);HrkLP&&Pg2tUCA9p@=W~7upBG=$@Hs$1!Zn0^{;cj)=b|WspSvlpA
zqG@N5Z(`oSCQ_KdNTZo^F8N)WZ>p(akUav}V0<5P<|dhIl}RTx1{Nyfmuf|5i=g-Z
z8ERyWmL1s#Lbn`O5uXqJX~w2?o;G3&kIr~rqED>$D5?JCAW&JWMrvo3B(nOGO=AWc
zUU8k~SQ@O2i5kbR1e#i;Sa?EMA`Qn3OJuCkO{xr7io_}Hp8DZfU5JEm^JzxhzW1z!
zXsV|ZW`Z_*4m;`gDz0nMT6-m1)zX^HveKYfuB@PJ%5J?>X6r6u<bn!JWayfiBx(Y`
zOCPWnasnlxgJxRUu9Y6uFJFTi%kZs;0*tD@2*yHhi!}M`smF=I8}F<*NlYHc1cOZ7
zwzX+IU$7ysoZgA2?x-ZArP7?TvpsK2q07j21QM>qxsh+ddVX?mz<BLkA=3V&{oFB`
zLMN-N)j%h`b=ep{%=A2QY6&$PQm+W^*dxn1vDapM=IWhKJH@8a#&LOffPdpn_o8VZ
zKGcJ4t8vi9Xm2w2)J@v0b>Rl9m*w71D^0A6mS06U-Zz4KD0~V#dl>4VQ(d*MXX7LI
z<&6;xleZr`+^9+azB+q6q;tG)jI#T>`(@LvRk7j2Rld3J+11{<=3zUDxbvZl-m{pc
zs|_E|j~^H~#IENG`_k7xFKzTpvfez*60*(W!s*vja*)C=c=b~zeIECSqGeBiqY0o{
ziiN)H958krQI`9bw-*AsYkNmq4_+jPz<}vZTgXG8@*vn2`Oy!0G5+w96^JLZ1?nzq
z0%REkV<o}+1?F4Ivm454BEIoitAkc?8t+0VimtU!fesWR4xt6ZAQsSv)Z1Y2bfl3P
zDlmf$xuJ|c=(fq#?}Fb`;txk?!U6Rpchx&$0t?u}gbl5UtxE_b09C^cZ83%%luHYF
zNH#8_LW)>C;R=apy7HK8fKxi7l>Ao0#D!*NHY#5k(=*5I?684e#3OL@7{)uE?~g{j
z5GGqh$(MCbjqIDxhsdQvB>v`Mt~#SV`1r{+n9`A7M5Qd-;|}8$l92FABN<0IIPP7M
zj$0CAy!2K_EynSch$3Mx_c%+DjZsn0OrkR7NX%y9jxSb}{z~AW*fLeZF(`JNX8H`+
z$4s8Hhihz2AVH{0XjXHbDC8ss#k0+1bW?+&>_@jEiOfbS@egjv<~02W$W(UnUGHR&
z{iG?+CkAVVJ-S^H_eaqvSudTG6eu$#ImCn#v|$J(=L6&E%Y9;#Wa1Rn+$KuPlgezB
z{`BKVH;SYia_*UbS}1Em`ciiC)RPT$=tI9Xv6jN=r#`hPK!edrb$(KyBuxXR=E=}{
zCbLl{C8|~7X-#_)icgEUs3uR?3|9VAqj;2RRzX<PrsfnijNym4f?85u#&x7ax}Zyi
zrPQU8<E)`e>sz_V*1v*^aouBCt)S|;t>(3>^mO9>N@EurySxx_7Mu!Uof%Lhk#i2S
zDeL?YiCB?BR)vzCY*8^vp39Evw5;vUW+6&b&-(DNa4pzq8;e@g?)Ivw?JOw0`c<9f
z@NKaTu5p{f+-s7^u2`#CVOa+#;0{Jb8I4psO=?J(QWm)}T`nnVTTxCyk+Zjzu6b_S
z*1G}Ix9pUcTj|7J^)8XCdbMqGkB85p@{w7CCG35f2Sl~D@1F8K*=l7g+~GPFtNPuq
zQTt0Jfn?TYf}JTk3)~HkzBh;p#$WFs2U4OzxV{UXFnDEHO!8u-x-CX8g7fD}8PiX<
zHU8*0u}e)8lQfzs4smM_>Er3Xb_TG;?Tr34m0tdK*uz<rVpUa3<jCHYi4h*;kb^v1
z7B~5K7B25K^hn<TKe@u&W%BcMEaUj9Sf%rY?v-=d<>!7G%*G(#;xZS`7vYst)ZOcv
zrQD<p76Z;Fo*-lM>t$ly8OMN?vo8WjR~rh`L4mf}pz(`NHv7)aRkreuMVVzsJ6Nlc
z2HtV!H5FV&MWUP5RHvbQV@ZelMK~oc&{DO@S(kc{hPH8oYZ<Y>sg6f6ZWFNeX=c~T
zZ`e-N^_6@3=s25?&Z%Z#sL9}MZ)<zf^Xl%I^QbF1d^f<L9q^~6EgNdT)Y{wLvYOw$
zW~7ljyWa-4SoZx335WUEF)Nv0!v11RQ0p1CgcL2d*V?#U-<wwF`t`IUj&Jyl;k?4~
zH<rcSG=Fov7EHnHelpBYO|$#f2rnFnX{p$GU)y_G{PgG)>WOeGf!_^hv^M{ZrDu@5
zB&eqKBsqL*cQ+K#9gp^rjZV3pGc4aRy7|-BL55)87JEP62aVyvYIQd*<wx(bsCmBK
zgFSfWD!&}zGA`!j=$yh2yvG}HI<~Jn7tl@*AKBO4$Wg;Q@b<17&Dl;Ps(TuTaW}GX
zq^@^a*!`4RrY*`Vd-lGE9hHe*G_UurcBrq+@q6~U*o!+V(vy1bkTN${1dQjD%EP?%
zW)ad&6noh7gv6fzJm^CY{ub0Xy4=kHKIdP3Eah@q^2MHu?4ezz+B-~Q*UcK(=KOYx
z&n{Jnr#NXnwSBC+xEicyelV-JwC1ysrqH(zl%~J4>X-KVg>Q3Fh_7nXtLfjhSIPK?
zNnP`U-Q#7OR)`+U!JAr{16k>r<vCx>xzOmXp9A)uYS^Fk<s9U3n%{w++3Ax2iWlug
zAPTjgscBK)aG(6GT?i%Gz}Xl6$x#Jnm-vwx(Rm=QaUdgQ-T(qj0oL3IDPU;5p5P7M
z>`ma-1QH8AV9tRYy)j-53KbXhmH#>6&1e`R98;kU*(vl>>VZ>u#UBweVGlAx2)4@-
zcEPu;77~`8GDKni2m0Ca;o!5OAnj%0!-*Kl)Zal7+WdW%l96E@d>`Ok9c}obUCGU|
z2??`FU<&5U6?Rx1DxF7(kQaU-7>42K5QQ2>5aOZT#=+nX`rtxY(Y;WS6)vJ%aS6?&
z-{nD=j*ZxuOi$WbV)3z@Cf1c2&eibQV#DzufN6r&cnc}gVoIgr1_BuYrkE>oVdMZ@
z@O@wr8etom%_j<>K?Gt7Zd{2aMWzHJA_if^<<rU?;|?aH22RdD$l=myqYj>;uyumR
z?BXJ#BQ+LQFODPEnPWI&n#Q@IDk9m~wc-H|QBTC8EZQ12)*>!(m+0l8)lrHPvLo{4
zU_auBb142~rir8QJzqKcV1gMNrCDG^3Zd>iRl>w0c*tG;tqZEyWB1|X!HL6WS!4kI
z*E4QpKz1bhTvEWDnn->l#<?FLwPdUCBm|}-R3KYAEJUB+pfQ4DO#b9zbqPm~VAfq=
zQT`YMV&AjSW4H+AnqbMcoa7gzj2&qqQ{LW4YE91A;5}x`A0CQN^w{cooHP1lRLa$*
zK+8~`;hXX0HeSkKVx@t=BxTs03&tT{Hl5UXmq3=}R_vsZ?Or>+%3rdVU?Qb8!pm5a
zCG+3|6MTwH))_XjWm{?_IpU;Tb{_dGU<_s@A4&>lI$_UE-OH6*OUh;wCK)8%2IWcR
zpZ)>oECL2xE<<6$ruu;73ErhsR_09ZP;M$AxtUzig=N*fmQ3}gfB9HxnkHiE2XUH)
zaYCclwWeKa<#F1QLA~0VfLwdJ=X=6ud|sICK<8>gk_Dw1#HA#6W~56FCwQt$a9$#L
z=9xk6;a1ul!G&Kz3_ycA=z~INgi7dyQfP%*=!H%w3GpRoHe+RMA%XfPcJXKSFdU3+
z)mb_Z-R;);z0)f`7O_Rdwmhf-I4A;O=#Anih0<t_W@vp_Cf;2qZyF}}2v0GN=H#J7
zeYT)#BIwEKCZzRblQ!pvDTI=?z=IaR0Ay*FVkv}j=>Q-=0yyXaEP$5o=l~?Z{+Av=
z0WbiLekp~%X^$4kj{+dDFzJvIX{$+LM^Wh`rj{RU7<%r{lR{~fBIJ~|m(0ONmmYwY
zdg-G!C<ByfgDQZS7C-?s>HrwPmnOgh*yxSMsf1=~g?`R3z67t>sgPo1ZcLn$?&Cos
z<DWXFl!hn?D(86~syHE=-&pBVbm^9+>5j(fn=U|`YHFhv00Jn$02}}UeCYxxz?T+)
z0(dH=8o&aGshJw<0yJu;VymNiYqmltW16U~cqe(f9-g|Yp1#hHB;HLz7NDXZ!s+LD
z;_76YYK&SMZ15@?{Hm6UsR1nOus&!4*ysTWs{+ty0z_-1(kQexXaOYtz?wE_qf)Gv
zDu9hLfUu70mX2zKlB<$c8d$ncx~?j#u2^CoDIAsyfX2|Bjwhc|W}>>LB(CQgmV-*f
z=gtzqzjCR;I%=1WX_h8Hwr;7W2CcH9sjx<@gH|e+VrrQ(Y=d5_(Q4|qO6Wu8YQHsI
zC;$;sRbi1r<;?PA$VSzyZY_EQrDVP)tyZbNl|mcwtjN89&o(H~66}mpYo=zY0pM-K
zUh1VHYtw3~0$gn0QtN|mERAyM;%4p6)hCDA=W#5hx|)^PX6MTC<691wyzb2{B~%DX
zm!jQlB^u>5t_>L2ZQ%y3gT^Vfo@tgwYn#gMnfh(eLhS(@?g0K!>!&g+vmUL*hUx%Z
zBueINK|TkHf^FEcD{z)7;nAV2QYTe$m5bh=dAcnz^eW4qD!6U0gS=-9_^biku9hyq
z`Pyg!oNu*atn1!w(;9%W@-E{NE&_O}0i-YR%I~#8Y~P;mvVv^dR^IVqC0k*q1N~s0
z@h5XGX1;3AL|z)9P7N8}7wMXZw27A6iR`!yB3Uj0kAiRrPbis+aDzs0yX0(LP7!D=
zZx?px0(WQ1VvDRE@PL}FTK?k`+NbvJ-&n;XopxobWL(dda1n!W(H8Lut4u*KqzD0O
znb8m-hRIY4XGL}{1PgG;lqecCo~`n*Nop`uSc>F^G5$Uo@fqW&?j~{eDk2jXXpB8^
zDkzu<qAUhg@f*f#PL2%d@(u;xY8D?G(cGLhvSb<8MYRwy8Y}W52e6>_)X0M33TL7m
zQ%v)=u;%)w71MB_(qi$FE%b(@Rjv(Cj)lo|mm#-9Krr$uukscb!cKYd3eUpbDH0`G
zuGV^HaJZ~P!0gI;G7i&h7sqnk8XD@7%@Im&A@&>3GIKLK^Dc&jST1AcNnR<vaOOI2
z1DC5Zok*HVni2yuDF2qtj@w=)^B%WF0iyCp$Spn^??=+*`-rk6tLtbXFH`<<sOa)8
zlf^~kGq2(DE)^;{r$hw@@jDOj7{2oNF0&p({_!EhGfR&0aiVC^dGoqPv{*T^TV@VF
zvyVSRBlQj_zUuG}3u0c%=CYi@LE`~K7qaK#%MzF8Ri5SnNux#A*u3_VMgua-I!Yev
zaX@$84eR0cn)E`m&^a5-LGYkbe{lusbT+>6RNHbo-E`%8RsxF>3^Q=^b~I3fbQrp_
zV$NYvPxF1WC`((4GFNAbO|m{<3gAfxknmy--ZXASbpvsAS64OXo~Qr{HTKEu9}Dv^
z10wdQ^<3*+VxMZwer~Xlv8y2_w_uoEb5b-9s{WLvpEl<7iuFJFaj8VI_w}4hZ!q|p
zt798ZUVG&{&$NC~HgQxo<6(9k)6Hi7XOc%_L=7v|W5)2i4m3d{<u94GS2_Ygj&8@W
zc4Hs)VJ}u}FH=qOO_N=6HhVQ5$5S`ku{yRZmwC5gYeq{Lvw{lbY!3H0_tA1Q0~^b<
ztmKDBm~7BcSapB$R6nmxg|$u{)trPjc%R<yFm}_FH<1<hiuD+M2DW0mx5~ixMZ0iv
zs_IzlGZ^J@e%IMOiEazx-hW?kfm0wNL-ZfdvB=%G6iZlMz&F-8xN6aNeXC=M8&*&=
z;WlNshWj^l>MMzo@k$$Y0%^5#ll3DT8;W}^_^mjBVRKLaCU#eob~E`|2NSp1xM6!Z
zbN^EHR=G=F*IfO0YL66=<CFeBJ`#ktxQoLcjDMDeH`O72_mp#?Qx|lWX9z*bxqn*Q
z!5sN@HzEHucw36OUMIP|k{FujmW+SVctoJf%3B+s&U0qTf%Uk{%`9jtPI$RQiXY`9
z!ginw`gIe!3gYyl1q6%kvTfox53>?x*131e^>6zjdHa%<RCcDbX7xU_Ya{7@{&jrk
z^k|oK62j1`!#TmJ`k(*$g<E%P|6Z&g_H5guv1b%dH}|>zbap@9rxVYrFL|5)kDLei
z>Et4#n|1MF`$A7AZ8tmA;O5G?NS?cTL|3G4bIp@2dOk?Gz-T7AXgeSdyRyGK@A>j4
zzx86E<Bnl9v}^j8(f%@&Z?28VJ2dfnR?A?dbEWsWNxy6FsjqdX=dukaJFAioALBL1
zApACxIj3`cN;-UJM|{0^5ye+LnEd;Bt6pwOIQCA}$D`Lb;lc{0xW-HLrduw1>)u#3
zrkdAd-n{6=b!L#abW#I7;K=;3ds(?luxU#jt&6O)+YY@d#d80ONE4;CAGZk!NHH7W
z)4TY$e^?w-i?6RD(GRB#WBsK=inI4;)T>aLk91%E$iwqO*%Npr+mhjJGY(_CQ?+x~
z_WcklIcl2t;cxqC_k2xiy~#J%s;(_c%{|%Qbs5E1fdPKkZ8XiZP(ssf(Bo{(5q0KE
zzGTxhzgzxq{x`9%ycMj7eK?DD%Wrsho30Ux`{TcQUZQ@7_I{t@I-p0skTZVqLu7Q*
zegb~|-%WUu9=$igeZ6W^Ihl8-Cq6p|f9ek=z^A;vz@NR>H07tVx-+ojxrch^J%np=
zlZQI!Q{V4nqN$^>zNvI3$8!UM03ePdL7r%;lIGg3a9qkL@zOYM=X<X1rk0T9<K2p)
z<7}BkHl5I-vi6Hwuh=Xy_)<<~Uso!uElbJfv>I#cLD4k#dVW61=_PnpkAH9RXncN7
zcYQ>0f-r3WLyU)NiB(igj)R7OF<+6FoSjW>UYI<Ro_&R-sB@rJn=GxVZ+EAFp0BV^
zKDIjkinLL>tZ`b4U6qt&v!i2=w!d_6nzzxYT)4)&$;#4Z&D!0vSFOn1Y+l?_)8?0(
z>0-Rjtn5aa!ay@e@z&?rxVrdzV5G(gOhc_$zInOY4UDz0A)<Yjbod*$$j(HBsV;6K
zqz)q_av9x`3u12)!F~aW`EvLypu?AcdW6z<%_U4l8gFh3^GlF8onWT?RFxCp%~wSY
z+7w1_nnG}u8r@R`45G@20$Ywr3e*&%MvrO+6WO%}*Rq6u_I!vo*QudYy`2?B)2qjg
zO>1pERxhf_ohz*##Flle#kPo_$h1S(=+m}v6W8L?j7;Rjvo3!eIG1q3PFe|`)O-Gz
z9!bB9Au&rCwy`(Et|UYLYHXcnh-Ed)HWg{@-rYEVbWXi+_}$N(^x*TSyJ{Z3r@^+)
z%kl3Gl*M5`2M)BL(%!dm>uk<lb@jlp%bV3?e0tXQe_h)>4wY!~^#7t)8w?M9*z5Ii
z<`{nJu|(T`;EDAZOym_voqRMkI37>%@kh~XSamcYQ_dtcL^>o%wG>oH31;Dj7?QP}
zgAEll6>=z!$XSI2`UZ@I3?hT$F}X>DT`9_mHXkjg<%b$HqLpZ!dv&F_;DIb=w-}R8
zidQ8?G^)3wQfWmA<!?@ocZ-dr#pqv$hE&AYkyox1j*0u7cOiXSdYPnjT>h%4XPeQ~
zX=jsXZjt3qmJNDhal_fz#-fZi>L{Qx%GnDb@r3vZpVis64}byAxu>6h=7(n@l);E)
zsFU%?P@b3tV(6g;mU#xFdpL@ucFv`#5vZ4bxMx*wn#5_R+Jr_Rs$(8Ftb&nRIp(X}
zEhi|IU8yNttdgD8o0=Zd8YiaA^0r2iOzCIYW||VWY@mI{$SbmbO!+OitkSC?y!YZI
zQG93(^{%SBd5DO$^XjUZIf;J6tEG+vB4oPDP#a#dbV6&VixLy}>&B_#t7}}i7Mx?q
zg+)80D~r+xB*D#w`!SE?lDL?|`X;FHRm8SivB$~c?32qJt79>6{=2f(GM35>opEj#
zb&;~EAtj*}8_$+oZ(1{(8JNxg!K$@WDl5%mkvM<t_0sR~EbgS&syySqVmip&(~?r0
zM`l#py|u`*`s%hNq{0cJr=O}F(p5i`{5PeBvz_+QSNnLG*c#Ue9XEzj_)V@EVOO@B
zelK2E;y0fyI%w`T4kT)PFRf(9s+*VZv8S)>R*zoxGYr7R2K71Aoe#2Zy!8ZGb9loJ
zPWV!k*C=n~hpr}T?E2nEyV<q_|5EcgXF{;@VFm}iy)P&9qRb4}P8jKw49{-Vg;Osy
zSnQKu`N{JCv%2a0kE`3ij*F67Sk}6U=`ASZgPirQF_Rnq6%S}~s-HpJmmydftAF`3
zAN>r7!2;GQgKGj?0-Xh#V9f?u_n?~s6?Z@cR#0bS5z0u?r#|hJN?2%P;q<HoKoB-;
zgF5u!3g72L2z6vf#vz~!5yL&xOioAwszcq1WxQL(;(-vP%MI0)!ywL%hfNHavS4*V
zBl50@L=>9-N|?sD{mEO}dtvX)7&HqS#fn{`VGTEAJT7*SggfNdm(VCGet3~g<052#
zVAwtTtV%Xv?A!h72&wR8afqmM2T%GVgurnqdYkGZ_P!{{9)b{K2`nQW7pcf8P6lCW
zY-6>wq(ZZJv67a&r6>crgmV#3YH^g5Dt%VH7MA`plG!Q-o?3=EkhOA^^MNIre1S|#
zZcmAPk`{xgiOV_S(3)ETf+s&|AsJSqQr>)$DKkj60;&?4@O-8qWhG77$+BNe#3l>1
zxy?uFGI*(Z=fJ#k$=j5Xn8k#jI;+V<*7XsS5Z$9Z2bsM;?sAF#EKel$SxM%&u}cN@
zQ9%ERGCfLEoyS4w85PPrhRz0vb*v{jCrZ-IOj3^1Y9!GnG`E}TG>TWOsX~_t(FbxZ
zXEf`Y5IMNO1+6oG`e|KK^EOkOo;0CLy=rNGDn^z(RhiE0C+J2=Ma3|6FDBJ$DfN^*
zn;P?eU!`7D1!~c{Ui6t2($Pw#2o9HqwEl#1i5XV0B|KlwlBh*(*a+>KC&RXnv3DKp
zbmAJjiXc{E;G?Ql$r{#qy3%M|-6_&6I1yx4W-!%!9%t{S*lucXliytIS@Fj&%Bs$?
zQdO%@UAtMxem1h6bwOrxCl|?e_9#?+%S$0Pu+|3iwJ0^DTUja|+cpljk(;V!@7Y`U
zL}s|&3t^~eWL-)8E4AA#glV%X+ufP4k$9A@Z2x*(xcZf8A_^KiPTSk?+Htmj-7mhp
zJE-%m5uZriZFVTST5-<z!6b~YO83gk$=<hGQ*taIe=F2h=9RazU6{w(lerB)m#~3a
z1?~jPSqHz4b4+R=1w$La+FrOj{#ny66gk}A4}+M43%l`;0kb^D`n8$1gfQ4HyFzNb
zIF06vm~W@d+bH+f2vKblKjTSL9}ih^=u}%^p`~FSZ?CFS+o*g4ygQs4(8_D}=v&<*
z<Zbq)%nCNMiLYp@KG$-VbJp!7-&JQWv$DuZcC%Qwd%YH&^Tc;!F@3dM+s7vIzwx{B
zF7g{GIUCx0TYA)@MW|;5ZF$781nO$@JZcz!f_s7;C#WUe;oUZn2{Weg<Cv%EQE)oA
zr7g8^IXXi!SIyPvIqz-LbY=jvy4A?;q@DNd!Ib@|jRuZ$eVfX|UT@m0o!++nB2D1(
z2A8&<=EZpno$lMAxTZq>7O-`cCTR%@huGlECR{CjY3dG|-BP;Lm74bDubA6+3B$Cm
z;kIML6|LEZj(5YkbZ~Xki!y&M_=d9$R`xo3y43}^fI<iCT^|zPa*3;VDU3Cci`nF=
z&S|YnR#o;C2G4tjqoT1lJjB5~lWZ0F%>^Fe8fzKl*Ce>Y87;htzubS&hGwC|Or?r5
zM!QGTxv-fzSzH^P*(nwJXfGRIr?=GVM4>vV5Bx}Z%eCJS4=uaZo?Y@n@1ge&HH|;#
z^PiK_zTWm7>wKD%EgD?xiC(zNna*${Lly4tMmCj~e&D3_EQrs@xW-w{cdz>$>_HDa
zsGU2{qF@~9a>M@7${{~7B+KvAjyAiJBOf52Br%m{)B4USxy-(Q{_EC2`*^Wf`oE*>
zl+9T^>oNN5e7|w=G#|~m8){Qhb%xgG&U~E^Y2Mft-rs;1{jf<NW8-W7_4ZEn@^vhE
zp22>>Fc-R<*P$&TV%;Ksy0*prIQyB)vK-<!e)UFeOs8Us<S&#5eTg@2lZP2ZhFn7;
zcEN@eT!DQhhZFhNep}^v{pWu?27mz=PUJT)6?l9Nn0jSre3I3CPgYqg7=4q1c%`K~
z-*;5DhI-C7eMBc;rU!n(hj>&(P;{0_%C%XWvv$9CHt+RzEEjWjA%oL&Xh~v3g#s%2
zCq5k5f&PNVVIUZS_{U-?=s#hIS4;+c3YdJGM_f$^gX%XnW+)F(M}@-%UXTV%BXxov
z$9O;Jh3f%^1PBrg$W8}$eSs)zOo(Q&hYru@NFOA9xTHC5$7M5-YlvuuUC4)N=7%~6
z60qfMLWpigCp{jhB`?T^_W^}ev0thvN9-YDyB8<Y(S!S=BEt7ZZ8n5!Q!hXWdJLw3
z@yC6CI9{cQX!jz4MB<1f@<OdhcXc>OvB-<uh8wkbF}IixyTWmK$W9_CZjR%6fjBmi
z2!TVThKbZufCwR)BYQ1&KELCO?wCTDXb%K*Yc4m2^5iP+_&6DugXGwV-N8s&=!B{=
z{)2t>KFc_a3Pdix_lD{Si}9Fd6a{DEHE5pLczOtS;n;k_xP}4=bd8owAk#F=aD}XL
zey^yJ?X+e|_>P-+Wy3&^0l9%|rD@y3U&kg{yEtiE=#fZ>hqG9Hi~~{?BosKfYD(B~
zNC}GT*L4qRZI$+bPStB8$z4%6SUEY0KG~4asFC96R)t4@u11u~R%)Eqj`_%uFd2eQ
zSvjkSD_41zSom?}xO{K9liBEl(b$q7RZOO5gOE}VV;N6nnU#0BN|)$`x|fzh>6SEU
z8anwfVO5vHg^<KpVZddIY!{PpDQGZhl2n;P#ITS`sg;MRmVbtgTlXD}iHyPi=!n5a
zm{Iqc=@e)mi97X}9B^TiuvnBrMLH>|nRcl|r5S*U$(3MYDw0`==V+XK8IrH3mGB3U
z!P%2&HfH|CIe@v8-~=ErwUw5*fTsC9%sHN_*<IyRm<e`i$qAkpnTXP9lpV=<*N1b+
zxgOjIIozp|qxpivDSldqnsQk>=82oI=_~~4f^eBj)H$7&DS3%vbHtKDuQ_Db(VgDO
zjr}Ql{|Q`3beZml4$7yO?si?;>7F8$nw9xeNko*IC8768fEB8r45^$Hx1RHpZK9Yf
zHhPkTf|nC2W<UCm3JO}bd4{AVqO8GMFUo><2%HS6Z!#I429~2bYE}LuR-k<2QR`_+
z=#`?6d7?&oljxb0ys4A*^Nc*1gc%xmwM2>E7+Vu{d$n1L+EkZi8lbfKld?IRYe-(I
z6%kIObUy}<Zc3SQ7p250o*w6lPTG%YYKJQcoM!1fc{w9~Ca0O%m2_HMt{12?iZ<|=
zlo!g9b1IeAQmEvar1yxIz1W_<g{rMme<>$wy69(1`lYO@fsNX7$+aRSN^Mi|o0UqM
zi0YrG%0FASqs@qr@Asx`362Q|qQo|mj7p>Nxp(i;tC0Gun;5L3I;xvmsx*15S><WF
zN{YGatBMzpiU}Ea>Z&K<msq2Eak!TpIi^nJ9J&Xcpc+t<%Kojc8mgE`H~(6!Es9qO
zc%6IdjaW)31;$1wig4|Qm7hwXr{bA;N2>{1ZHHih!y2yY8lF-{v7zK)2|II3sEqyc
zt{WRvJUXwqO0obeiAd8)7MO{)5~<I|DIWW=eQL05DhFlgp%|g8G8?n4%8wn`i7m3P
zLpl=cx||V(r;lo_4LgVUH&BhTpF7*6(fXa@>XHU%dC_FD3(8lurkkm%8Z2A2Qv0%w
z*_sjSv!9tcr;4##`&9B89~P>DKO43^reb)fj|Z5pBdWB8d4ITxv9w1`bO<}JIgN5l
zQhs`=mV3FFo4HQQd;W^AY4}|2>WSQvTMJ~g&bp_<{t2&8y10_7tB0GZol~%e>b6mv
zR|9anxSPAWySu#GyT1Foz>B*#%Xyu9u87N2p=+x0QYd9&x|6HAi%GO3Nwl*jKo4TI
zEAzEpin`tvyx#l0;0wOP>pCMfzWl0zYdWqOcDzkjL|2-N_&U7}$heU7w%7}zdiu0*
zYrKk(y9R)}2oSykJixwdzV1}M{5!SC%br225=={k?t8Mb^SFm}IRhqzvr8InOSL-N
zZb_881uy_19Ks;1yCggS2#^4{djJa{!vA}_32?#+umCh{00%(8w>z}`+DqHopzWEy
zV*6`ryEi`cz7TuAI@l8%d_r@3v@1nf7GtjdpX&=He8VSvyH_m2xXS=7e7g#u!Ua$O
zH#`6aK*BRT#s@IQ1gyj4%fm@qv0TW&qASE#>&Dt>xIK!*#49MRd$yC=!QiKI*NVCg
zO8_N2!ZG~9S$xF{zyP;v!y=3T3NQc%fB+=i$hvy~2Cx8-EW!zp!ketZyUWI)<aHJI
zeWAyPxW%Fr<Ggj8z{0|?&SrbAT&}VhcXE8Mblb$RTmpye$z(jqi=4ZkOv#F@0RNi+
z3P8eH9LxiN%vh`d4DiAWpvJ;`lmp_xd5XTdtiC(wxkUV&c>Kg_8(>8Gcouo9>8iI6
z+p<VX#kf1dT08*7Jj^Q`!U-VC^8Eh4%)Gl|jKu(*01JQs2!P4DE4!sEU8fATfNQ11
z_sUI*pyP~)z}R96drBNU#eUn+ysXEgD3U_(%OqUSCak*#aK->_03<yC1|831+|Qq!
z#@2kzOZ8BqNxMJX&0_n_CDzNh?89eBrT;`BW_!_a?8iFox?DT4uj*$a{m8;x(z!ds
zFRaDO?8s(p&<vo-lMKyT%*8XEyRdq9BWuJM3bf<e!>>Zh7bC$h+JdVF#1j3-y9}nU
zo5AP1tmJd4U&sg|Z2&5b#WXz7B0SguEW(ov(Dba>&pg;RtjV1G(q3G{j(pHvbV2lE
z*1jmPrb&9n=F=vM(=&^P1^ySKt4h~({hn~>&bqA8wb5rIYREV|+q7-j*i4K3pxF({
z*|(*+zWv)<8y7|!q8lxud#ez&{5RVB(|pX@9u1!*wYjWN+tf|l0Gh-dTM|gk%^aOO
z)&1SJt)9!~)~x+RdJU0XcDfy1-02*(tqowXO}#$=-t_&w*S*K!HiGQ!X1sk}^$p%i
zx1+=juDX<dMqSkJ485w2!Ad=|;i9omngstn;k|p`dB(Haz1{qc*6<?RWDLL*Ue&=%
zaLTINOMT!iX&cFHCYWlk*W2KEEfLb~#C}cTHcr!^nav74(V)xWC=}0HY|lV`yD7ZF
zEnEOq9m4<}*1SvN{v=A?F&<Ns_NRsWyfALp_T=8M>lXBT-dYRXU>@edo#P$>L@mDC
zK8`9L9>P{E)k8i2T#UP4EW-ax#{JyPP5urxI^ZYX-U)4L>7C#TzTi;FP5Fy(M~K&~
zYjEERm&O89%l)?@>u^Ah$VkrEm~POJTmX_x$(D@8mb?HEu*sbq-U{<{Jg((hGq|V?
z&H^Nnsm-TVNyz^<<3U=pI4j>5r{hR@=91pS{teKG?94(=&jGB-(VWc7EbPwg)uuj$
zADZe{j<zR)aSE*F>dn*|K9T!-CS#7Y?3;FnC$t1!?b-ckhrZ<q;>v3t<S-n-#eM+z
ztj}7^&&*!_=Xmbqs0QWlonoUzpW<xg)4t91+U@h&p@2-F<8I*#7aH1pwYSce>|W{f
zywYzD)zzHRD=p9Xp698!@6vAY)DH0Gjz%P}&aw_zw2tTo9O15>UaE!4Jm+}~ujsia
z=ruy|Bz)5IeDRH4)ggS<^SsYpE#f#1?Hx|=&)q2xe4N=%OX0rWKz*KG9!~RU^E56@
zW2EcA>KG4S$iFAo<E8U}t<Q#C*ff0D1(4Vkuh=1s05(kOOg@1ipYA9hLiRX9D^6E;
zpYi~;dEfr@{M*y==#V=5==uHf(ET-H|3)`%-5;Oy{J!*7Gku0Q*Mbl3b}y!v6!;lE
z#Ebs_#9Pnx*j?s{ulP|9NR59nk3ZXVk3veX?h2nV%=P4<FZENeh``h3b}2B%B&>MI
z>H=Cegs7dX@7=Ax#*x2~A&>h7S@NzeT3IjflOMbC!26ny`Z%9pJ*)k=M;3h>SmJj4
z0+js9U+|0A{Lx=7Hw)JTcKHdvy{A&WM}6&8-#y-u_pUot;lKG%mee2sh$C5=Cz`4&
zI}!sN%QIcuH$D(8UlMySC>#cX#3RXw>?xPdq^Nn4GN0CKaY)7bxL(!vd;OVyWS1Ci
z4U5@n>h28Kx}Ej;N(nT=)2O>vo)4g3VT&1JqT(WBqod**q2b$PisaMf<epraUH;`K
zPbOZaC@7feSfXWNX(wna5#1c$AL}6t?Wiu)YnxyaEUNHu6epH3F=cXaR&s4_m#4ES
zawzFE5;H2~ukRXl?^H3btMY00o?5x4_bM<pe0-TUxjYwrY`FdGV14#}q<`!HS|d0J
znVEB6=;149D9k2*>e{(em@Z1KPYoxEduQ<Dwrcr6F4@>jBSBfaq>;=P%w)%wF##ev
z1kqecVKAeW<8%{e#f+n%@x126O%stammX8<km*yM`s^un8VD*AZ964G-Re@5NSIoO
zdHu<cBiCcN&|z#l^Wnm7<FL{_YBgwGyx>sMT-k3-UNHE)`t2&OrX7TF{xK2T$%bfQ
zv})zXycqfN;(L_u2JUQA5@))E6&gJZSq($ae<uSKjV<BAq?rYhoxM)BUe>wES$@p-
zDCx7RhaZ7WnK)*+zvJO98v3~C=O-!F9c@W^aXk&0S>H{&a&w`!*Uh}>yR>G!*pag@
zcD{Z0x3)}^FU-C3`su*&>ol%WPkK2mqZ@cd$wy#)&b5Wqa)7B9Uwa?<Cz)pu8mQcB
zDp8o+TG5T>m}>D&*q>#Lu|(Ebq#08Oh|ux2po`;~D4uW;+E|1f`H^H{TGR~~6ocRG
zIHDh74fmmrMaF@k5In9X<ZV&}>Enewnb#qPOmdXujS;HCo|A6=4K$5ar%lmHn7Dko
z4V74Jg_uD^js&Gt0;+gmmEQ!{6O1tisHK=Sb~$KPY0B8(d}Q9jVUK?P$Bm-<RT!J0
zT5!4HqLI$29-fWbNh6;_VVR|#G+s(5a%aeCSYuP6Sj8n^J-Ud5l1`dRr6pYoORj-N
zWooCM7UyU#C3^Q~sc)T%>XAvU3R0RPI)hz)nGEJgb;$CUSg(vdyQhBBHu+RFceDtq
zm3=zuAfPKP$t=8oK(wr@^%l8|tl5qGStIb-x@~qZJu)!8Zhaf<x_kX9Ubzd;h^{)l
zWqPl+{=6G6#u%|!FvYbYN=hlahC9oe@NSoCEILBUEu8+F3Yze-b&eWb!xOiQS+SVv
zoGi}<UYv2f(jD9?(dhm6&wb=_oY!Dr;t1BzxUMH<XlU-kVig|cnc%|+U+F8v71OyT
zqd<cRb+<dW*74B-rO2<@2#*{XAW^%}a$|PWQRdj<#_V(0T%Ra-pJvM4_29Ra9e1<3
z_8P6d0ZGHqV|FupGRlGxQu5WDRsJ{VA%*UjSupP#xTv~pPAcLSr_H!6S}lD#n;%n}
zY|{s+9rVqvomu(HGTnt#%~y4P_?D%co;B*U7><13!#)T*sy|~g`^=HIc~9=U7c}DQ
znzjz`-JA4F+_UETBt1>xWnUHZH@|oB>WQ+eZ2tJv#}-L<klVfn$E3u)_rR`usd_}l
z;1>#2xr{0Rir-wIw>SuHO?4Ke;J*^*I{kIYdbhKm!p77T><JKn5hR?+#Ksf^a`0tf
z)10-S<vm|1&|#k<T+Sv)7|fX}e=Yn*|43Lg5wZ+~FDw+-d>F#<Nho-h!&vd2R=--s
z>P(|MBKv}s!z!|kiSAP34|8InCoWHghkBaPX85E8fiaCY>|F5r(MEVE=5z;};{<6)
zK%04Se(JJe&)&GfFgh(vWJF;Xjo2h<p-x_W%wy(y_lh_QMvZY>pYhxSuEXfiidV#9
z12MJ5OlDA!$s;7na&<^huC0}ySt25}{$`U3cCeJQTBV@yNFP=vQ9~)=CD@!2Lo=S|
zYjeAw22Y8~BSBJ0T$C9R7YNNpo^U$4<en}0hs!lKlX?uK9XG-F!0fqDSh2*QC<_-#
zWSTNi*juJl@)tEHF%pT>q~=R3Nj^z_Wt+dF<}!^%PFX6^Sp(%8IGI??KcX{`StQRp
z;TaBb3e#i%n}^?s^|otzF?uP(8#Xs7(RYAmmfd7sIVptDgtl{`sbeWSqi4{OG7Xs1
zs^dKq`Ol2TD1aXHl95;zQCR|Xq(2RvKetyyZl05tFvXcV*V)lyevzg%<tWMYIn~|`
zwV`Zu3|1}W%Ik1W6LqudEv9JxQs(5rmqeAIR>S5}fdVp_r1V)>(Taz>X4Rt6eCrV3
zhtkIB^dL#q6;<`BF|T%^tBiHw-Uce!b<WkMPW9+9XR27?Sr(2EswYO-YDeqvwUS1p
zRfe9qS+drZH<dc;)`s^`yPD-)R@>fPmATp-b(WLm+aWAd>esA^sI<C+3nj@EvfZkw
zweB>ee&#jVGK#de7tF0grK(xx#_6Q5k=MEYD$kFC!??l)AV?k9PVQE-s8p-nNu>M8
z`D(W!^qXx=@%vo%cGtT+1+N)x<5=uYml6XWU>nxdMv?K?v?A^0Mb9DM0)KLo)_op@
zrTWxe9Js;FDDCE!1={`&Cl<gmyQWXm>t9y9&BLC2aHG1)tIJdkv*^5VT*JFw?YNjK
zy`>|0^-Gb0j)b1(yy^8++*E29u*Xb2^2Wvr$Qlb7y9>oIb)8FJ4x2ZiD-P@;RXZke
zE;6n+^dT<Gy5+nDxW_An@!hhCqw8q7s4l9kcZ1qTD0DfDbZrUcU|Sk8hs(%E2AQ?g
zTw>A5*~!8+*P`ouim(V;!z{(~jL*1X0QLEYf95TMD=Z*FOLNHxGL(|%k!njbS;~&y
zqn*#YX5zm0v2b?SPmIaxDtndFj*e7b-xNjVkea@RzN}r;w`#jG8n?f0@6PhY>-epx
zq&A*!t{F<;_5Svn&(9tZUy+36^c1_Tw?5Q>)2C=>x2>?>j`QK5w(F>DH^Z?8SGHd|
z?Ropj*YB=fu$lbXFT2~R(7pEpDSg;|54O?o4vW1_-DEKdjoBm1_opRo>&g22-xKwn
zz&}KASIc_jn+`QFmhHw?|JCDw#oDz^t|2jR`{M4<HN!VNWpBHjFJa}8c7!cMdmmiX
zDd)qwf5hepd;F(U+BXk7LUWyC5aT~aHr0*I^PUqM<n~f|R)+3ijmwX`$@TH8g3EPi
zL)GFNA>P!p&PN=tSkEop`r4K8^PC<Rv0_Jf(UFSU6{a1&d1orDDf{-dZrtc>6pC%L
z8}sVo{{8U#*}K|JoU*@%w&n%j(Ahc5_m*3J*oQuj-RUg%b3<q5`kp-5JJoT^U!GN&
z*Ib!v<#>}vT2)DRQ?o~3`VzwRPMWTK#1a1Zj6{B`o|o+GY40G+YcJukS384;t7@<V
zf0E*pJ{pLR^+oA;6%}_r>Tmvf*v}gJPX@pAunKfsjlD0$zdt(}adl9G?7Q&qg!`D;
zi#Z%>L|T^BN49000XCj@)Z72LjqNd=rSyUBncD6N6(x1r__>5=Js{q>)%CF3M5q<}
z$wvb2-v+i{PraW9Is`|J5sQu6_mvz2VqhHHk<&cH>u6vhaRug>1PN9ex6PgUDc}PB
zsvzY7RtpkW1g>1NEtmx+pA`AT6KSBxq1_U09R=b}n@NNa<{Zrwp#Y9y6vmhmW?_fC
zpy8PniA9$P@{50PgS73FUDzKQh96p_Mhafh7P6in>V-<+TnQdvI>Fl=ei-51o~<n0
z{k&7il$D}AA{}<2_Mu_(ot`7%-}pfsA@1RY1fsSL;sp{=8~&OMcAyhd2_t?-StJ~h
z{9caL)aNPPA>yICxShq}P$y!d2{K<Of*}$H4v*F1_atKVH5&K*32>3s-W*b<b>IH<
z7ct^t0EV3uV&c=4p*C8KFRq^`DqkV8-84pn8XjXvy%O)02YRR(EM6lo&S3t2`PMkP
z2W%vxWi6M^Embbg3@CczH)`S$4dXmQ-crDz1a2RO1ko$nqdUGMn3)jgjiVe^;qysc
zl!2osa--D!A}}^&K%U`1o}&L9Bt%Z2Ba$HuQOzm>Bnp}%zx~%VqL$^T%<36p{h?&i
zfZk%s904YwIEG|PDq>Hf7r_*zQ642yCZ+zhBR}n8Dt6>U5~EDkO+LjQ38G}(<w-gi
zVNH4@je(>@Vr9d<p$WY}4xA-grlnf0C0nlK5nkX}Zl353<k4~EmtAFZRb5kZ%0}8{
z4eny;l%ku7r6SE@Q_KNdCZ=L8CS%HgU^?VB7Un;$;kPNJWnLyy^8S*%6&fY-r7K)w
zJVK>T+9X{@%v?UDX2d}Z7(fglKx4ip4X`F`rsZQ|C7K;(6ISMI?j~%G+h$s$SXQM<
z?IaL(rtE=bOa>)MN?sizCJYq706-^n%D{BSKy@NO0>r=pEI@S1<^UwXbSl6CjHd#m
zWp&zsc#`K08~|D#)pB~}@xA3pHXU!)=VFf2{kf;=Dd!mCCoh0zXjY_LA!jX$Bs`KF
zVC;c)4gh<eXLQa$bus{V!hix~X8{xddk%mBOecoIK!%>BgU)~g7yx^2XL;5@h}M99
zlAgW*S%69-eZqiux~6^BsEYQ8B+4a!wxoX+pkMywAI27e{vM)%(jtpCr*l52bb_aB
zI_P;W00YQqgCanA9smMx=!Y_>0gPvcj_3kp=Yx7Fm=-_+AOMuc00T5Bc$VjtF2DgK
zz<7e`46NvpzNBI1rPuwNSt{v+QYVDArkrkPcg6sC4uF|1Kmo)lV`ga#RO51?Ns$6(
zT5RQxLgqFGCJ#apVInAVrlt!l=#pw^c{-^Kl&O|J=ZJo%0)%G)AV8FAsGNEzgK{W?
z&MK<*sfTi?0!Zj{J}09(W;%A|_3b5G!lZ8YYJ>i%pTYpIQmBP?sD_?rin3*jI_j`k
z&5wFzkHSc{4yz_&YH7X*pb4c~@Bygysi~5xc0y<V0*vW{vSxL@=>oVWw8}t*(&_-*
zYp>#}t@bItZfUFn>uYgpeBx=gaNV9tXLK^@zAk7CoT-LRX_a2-vWn?*nrZ?Vz?HIR
z#(t@xCMu(f-$~Kwr19P}1`~0TWiSe<Erx5PRwBWAf*zdfbf&AauBLcqD0CWt&C0+5
zNGGEjfCBt%uL>>E_9?^;K$`9=zACK&c&EV5r*TegWV&e2C9INWr_k~%znba+G-;|P
zz^X25iR!A;!hoAD?Ettb*TR7K-Bvy=jd0orx5jLcj^<9f?53*hJ|<`q(5#G(>djK<
z0(d8Mu4<Hqr-yPU&qC+{94*j7E`^dPgD(ESi9)B^PG{NPs4Cql)n+6Xx*XOj?03@Z
z;1X@S_Ug{^Yjx@?y3(kd!YKlnY2w~!x*&~-EZ(P3Af@u{-F{@tmgdV+rexOMr>bsu
zHg0r|=e#cOc+zMAoG7~P>z0mZlzy%C!ho3~fW|)P_I7XUGOzR|D#UWHhN>qGbn8&s
zY2RkztspGmmM+&isL-<QyY?*g4lTZB?5w^i0z9tH+HPaXED-5#7Rf^I7Oc1W63J%a
z@Di`x;_lztC<UwKwB~^Ney)6qX0S%(vGVD3Zmqh;fOr~iz7|03IxL1t?yPp~0Z3?;
zR;xH<TA;13;><8dwr@i+a0fr!-u~LK4ioSMS1=Ldzy;$#25T^o3TV}auISS2(~c?g
z((4r8>b|<G!+x*nGHU{CCwr!+0Ys_!&glRaS%OC0oFH%x58)DjW!}=|1H0sldg>4t
zu^rE55)&)?3hPfEEd1W_T0U;RPAF}%<X&MD8rzXC=JD@-t9|wGvu*3u$g!Rz1t4E?
zV={85ZgL4`YNcA$o?>zih-ad{Y9S9W1d^K`*8}hRBPZ|hBYX0o#PNHw@nkp)X6CXk
z@A8*)E^0!hEJHFXg|aBuC~nYjbL!y%_vI&BDrC|f9RIKqGqJZIT2%<l@8IKsD)3v@
zW?Y(LF<0<7A@eyZb0)R2{w5de$i}kXHZwN|aWz}B=K0-DZSXYn@gD}WI~x@*_j4|j
zjTr+6Eu)~B^lmG@GRq!vG)wawH}v$e;WeLQJ;NvZm1Gd>^FGrQQ<m{{{ctss-(n2(
zGoo`w6Pz~l2}b)%Njo$=PfkLz7LtNjHhORbFIh?p^Dr~yZ#{{~_6$A-l}OVDNkg(Z
zXIAir<w{p{OOKk8SYLvnV-qU?T|lD0H9||!P2aR><VZiMS{{~?v;nm<-xl2}^fE3r
zG$VCVFEuq+@>91laJJP)L-05AVf8efTTTd^^kFjP%ehGM0>99bq;n%XB)9_QS)(;R
z3-vqW^bFOb)xx#uShZp&{*YU~)m>vG-ZD>k@%74z>i`3Gv7WSZKz4{Q^i<pOEyuLC
ziWM3)Ho0P*Xp1&PJ7G`rXpjyx!(=2__W^+VZhis@Y}1W1*C|Us^eP{lJsInA;-!uJ
zc7l<Z@&GTZJ(G^TwL2EHSPOS`GYoV4F&_0#olp)3C&su;c51ip7uM2l>9ly~M&g-U
z#Mw5mc%nAzwkxl488Yy8gWMc})}WO(X}@>M!K?(HH+mOJKf6h7!kjKL;1nJ7R;P6V
z0^l2auF>H)VMq34mbdX>bB3=sIgz(zPZgx`j$!HF6RNi}4eUX|%rIezg99{#dn2Wi
zmQ?%okiws4vo?kPv&+fuvE)1$hzIwcQFK>?UdW;NSD<187T6pD$!SI5Pct)(leKow
zBYf|8hbwY;7dB%lTk%HDh!cvDdsajRc||_A7Gc(vqswd`;AzZwEkk*goArLj`HB2y
zmJ`!zC-&MXI8fO&er&W%+abq25|=3WSbrjfzxiT$cS~D%a{jkz*EmZwd7i5f+9Yyd
z`FXRfiYIThnNNCqLzLv@`1s&;c9Tw{+xa^}x>QQ~q{ljs=N_g153h)rJaRhS%+o9{
zq&KhZsE0~+nL2Gzb^PIQ1N(QY&-tr!`IfUaVnaLVuAJ=5ZAQ&>k(1GDkGYaFIjo2c
zu@}3sUj_bsd$y!n*^Nuzn)CUkyI`pe_d{+6O|liX1KKuiJGU#B=UH3DguA#mm0lxz
zVOu+{rF-cKIhH>=`-HiD-;-ESixWrntiXG=HzSNL+F2E+g0LtEDxcF`H>>wnZRK}C
z!g{-h_`<t45F2S(JV)B-V-soo*|>bDw=u)Jugr7UMWK0bmphjey!-X>|E>GV`*^8L
zSkcoWX~~w0qPk}#d`kFJ%?I|)BVcCuJ1*i=Kzp0HpSw{DJ@{e0@ZGbWbbP}r-Dk<k
z<7i~mU*WFL&eVghaz{pSQg@s2{IaY1Zhv`}xgN>$U!DK5In-KB`=24L2x}94lJR#z
z*8V(_+p|rXeKR9IY6g2V3DjRZdyg0VgfF4upFQNyh}|oD)8EwR>*Wp#<_pHXRzv;i
zIiSR!5aXx$-Fi^wV|boX)=_)D$s7G)A3bUEwr@QS>kCe|6hFepVCv`m<oWx_oPN9k
zc|Hix-t&EJ6Xf4lAnyM@_gDJBPs;K~WwKL>FSpOHs|l_vbwq^ze~$lF1-q<Je|-zT
z$YVc|_I`}#KaU?+pSeFfJU$2j;z*VNVT!1lw(gq}GECPtd*}Kn&-)K_^@6J)Z|6fA
zkEG)A=6pp<)26bD@>QQw<F*?+e!1kb`D`kV(MopO4Q0Zq^Lle_f8X&>EZsho{+GiC
z7$`VrXhTS|$kV9jMfJEuxb`)9l1Z1Pd59?&7sm2=SZWH^dCDkATDFn|sLBQ_`<F)R
zDr$Sy(^}{2m00VB8v_hl!x>5RRy-Rj)PkvO#d)klI=Q8mI_&*zg$yoE{5^D?JkEtq
zW+}W3x!dQgj^p`W&r8qVEyW!SWXiu^gJ5jC(kv6dbOGByqLweBCvxf*N;60CnZ$<m
zD$#=$P8Sh;8~e>7SyEpVev4MFbBOHYOOO}iDe7m<Btd2cbwNxC)Xcz$7DdWRQq*S9
zRUNU(jHXncB9tkUs(89o=qogpjB=!D66@ES2BVf0s8A;&cw`@{%?cO(npH}yhRqnd
zHZG&N*aA{j%BQ8+k`{aEHT4&!kb^TBN_AAOCn|e`(B?dqt*_6whdb}$%QMzdIiNu!
zHVxJ(9zTv3dp7GA^=ll1nNEg{Brxu)EH#%63B__+%%R)b`Z+tfJ=wl@s&axivhq9m
zuAxMm`s#GbBWrJGd&0X{>c88S2W}9Zw$|E&BhQmr@^Q}JvqrC27qd(58oBoRZx}Jm
z+13tr=(VO@ec2>dUoO&!$KQc9IXGcPuJxf<h3mO=8Dhm5M;d(%0(IXwrVYrNh;+Ha
zV0aMvQ{72hJ;vUMD(+z;D}k+NOm^Ur^GS;p`Uu=3Kjk!FgdqMB7-Et+VrZdV`Q0<&
zT)_Mn%!`}EDByk#X<1cpU_L2dGJ~Z^<XB4j5@4B2R@oPa!x5(@la!4~CY?9x=^T8h
zSos5%{$c3kki59KXOB6$gJ+&;!gyYS%~2R3J3<CYR(rS1>0zCb-dU)l4uyJKUVi>*
zs+o<N840O0n&(K6Gmf~;s-nK>)MQ{5Mp0pjO%sQvl5pB-hM%Tl>#e4`YA76~trq2q
z={fo0tl$*2pLbp=xCgTh+Um?JS4M_rmy8*Ek&yx^L#?N!BI~WPP$`K~YyjSy?7ZbA
z#pty>#+xXJD?L>Vz%vOcFRc3fVV$$*!e`*5>E$}Cwf?1G>gI9=Cmhno^cvIVRQNXB
z7QYW}Tw96H^`{m-A9-vq$HmIXn@|pq_X~6%9t&wT7Ek1Evz_ECv%)YlI_4PoRwfy?
zk`TSHWAQGe^Iqa2sFz_da|<!m<W~LM&ySLK6Nh3?De=2QZ!Is_Tt2N{pn~Fz>N6oP
zrCYQ>Q>boBStr8wbWnqwuhSvgwQI&bk8<{@=nAby;J2#m4Ri>PZ8d;_>aCjOiyP&w
zrJO_NBD5ru<oRnS&X}y%TVDI)=M($<ai5AqPI}S!)m}>$gO6U>$fyH<@;TeR{OZM&
z3U;#EmH(={Xwt*0aP?$AA747mS3|7wn`ak!{`4kFXwL8COFOOb>aQQM;QI1Dy|jK?
zEwp}Til6GuK#oi4rm$mNwyA9CJ$t(y&ccVk$`P+?1+?C%xI(LDx#}2{yPm3cgNVz)
zFM*F^5p0syzi=?CaXb^njs_Son|%*!8sngpHpsvf22E~IlHvQlQ?3Ob3QAq+jt;ZL
zCKq}zgY}bC2Mvh8-sy0D^ufhlY}lBz;m(LcEZ_*;W;^t;MRbe$;0xig8tdFmMscg6
z76(<t^Vz9`?VFi+dMKiGb<cZ8BwrHEIHd`;(ODrZP8qwHyg~($ci4iO2G@v3%=x8+
z{*z*wu+XmL(akNhgA4&1h${)^E|Jpy3su4B2oM!QQIg8p;}4_Q26yBPPh;#OClR@h
zyeaaOO}wGw)Hul^RZ=^bMB6M=WWHD4=_r=~<qShZvOH}|Q8UwwCGvB*Rzgsivt$|z
z_u@!c)-IT4X#^ZUMvEts3RtP(M(S)-K|VTfnj?`;GL?x-D#ouM&$Qyp4!JXG+HxKS
z+$AH|2|sRT#G6>V#K7j*%xh|L6#8_he*U?`W6BSe@Pwg2VaTux852>l8cjnXnwvU;
zG=-+iBs($4Ia78+ob}4)W6b$ccFG1TGhO7eC`zVulG2r6D5ghmanXIov~c94ge*-;
zJc0TYsr*?gFGq<}ZN5|tF%|wLR54f672foWjw0cMZW+KdcG3_*MX4}Zl0AL`6s6Mu
zlu+4vz=~a!t5TH^Osi@%9aWRABdsC-+P1`bp5?7N+DBP2ibqSe!gYj=Clr5IPlfgh
zbRGJlB7r$qyz11bE{&)F^T^kV0#G{I`zkTDa6w;IRXu;Ts#(7&QHjEKir4d9W@Fb^
z%$}2Lh!yEhc`DY34w7SDm7Qyids{K(a4~MhYi~^(TF&OuqOm+_a0&T7SlD)iz6}di
zojXzeb@sSxl__o~H%gPSs*znQsmMsX+Uh#cv$$lUeCPXGSVnBSup4isz6)Mt3MilI
zlx%_1DzjdGmcAOLW&ZGtB)8sZ_DWpLB4_MdAmFByk=x>LfBOj}00$T*ydrRck=scV
zqgRX!W~PHPr(*imaHwh}?@0o=*Bnn*MedEqz!Z$hy8f4;BVJ*NW87l1lD4S2D{gRX
zyvgu5?47YWFDDmk$YwrF$nA{ohS!NjmbMhDH34#wH<M%?BYB*TRq~UZY|R>jj*Qmz
zVRsX#CG(-kuKo>WY<c`oyogzW{$-4r&#X%*tNEA{Y_p@`=_{ie^wFt3@jNhm)-GF`
zO5A~PVYNEa$d*~Ip)2&Lck90AHW<liPVQKEgug>aSkpdc5o)M*)J*55G%s8xm$lO8
z*^<(>mUQ)>{zu0eStELA8eTN0e>*)-<Js69rJ+bDd{}Bz1=Yk1scMOXMFWdh++P-}
zwFXUS5D!_|h~Dh4-yKzIYgO8@mUrox+_5aBn`Y3CUf&WLHaJrnjlULlJRySaeUsa}
z2wgTMXYA!VOL=z~<#t}g#lDPdeB&JNxW`w;7qU{*o00tAv%~gog9p@d4d>IslWlJ^
zy1Ts2em2S;ZE<zJVgNkvxzB$N^q>oU=tM8N(T|Swq5pA}IoIogT|VZTD|$~Vk0Z<3
zoj)$ayy=}0IL)Q*rkgvO>l)d4($9|ew5xsXLa%TkvW>U*TDE+!Hjz`wEoyxweC}E|
zVa%)k==JM@y-XhSbz=cbo1Pn>=LlfC<CC6%0x;n5LtlK+B|KKLPhFlXvwO?kyivqf
z{e({i7(rRM70ttC<~#25@-|}l)+_5D1n7AI25|e^`&{?7XM6&Bet-qw-tq&8`~VQJ
zfCUsF`M+0t0S>Ts-s64%(4W5biH`X!!o4H?8sCRMKX6X>d-Us-GS;(Bb6~38KC0$C
z#2vS_*n_^SOsGB2Zx4OxU;q9+XMpf|t^mJR0MEyB1|WU<mv#u4bO&gC)u(_z$5@+I
zZ3&}FP4Ru2kZTeVPNvpaf#6}@H#X|$a7=+<%{EQjCol_#c%#sJxfgksr+?D70F(YV
z00xkM2%rGehX4$Ce+htt2Cx9lR{#pI0F76C%(sIJZ~#DPgFV=I1&{ziNQ5xBdr5cz
zx_5g~7=%H0Wbg$tv&UQ-c!A8;SI?7FCZ!wJaDEb4ULrVxJ#}n%W*(9wf8keAEXaQ@
zSc5qjgFfej)CYqKuy_fe06d6$IS74M*n^6Qd(}6629Se-cz-!~h*!vm`L}!4hlP!(
zbY8f56NpZb6n|sbLW1Xg^j1Q7a~EsKhQZ~AZ|HX?XM#}^Cc~Ck)zE_0_lJE5d|4=j
zkGOxl$9n;|iPi^%zes?Z*n8Hegu$qckGO?gC{3&;i_;Q8>;-#KhaOuNcmAm8JgWGC
zAc%K9v3@}jc<rZxpQw!&c8g+=i@LXqKc|0?*o==DkIQI`3c!fPH~<V_0E?G<4#<B4
zse8@HfW@eL{z!a2XE{qFhl_@Sv{*CdxQ^R6f1byI=7w>s=!u8-im>=Pu2+q%_%>=(
zj-HhT?^u9*D33okg}qmZlV^xncmVr2kcpUkdpH1+czwm#j0Vtrl8BJhM|}$D5%*Gt
zHHS~)IC1RPJ3Y2is?%L}$4tg!flS44O$n0RBa$QOAE~okFNRK=@Q(i|gvnQgxQCWT
zIE2gii}<K}$cF$1@Pq>igbTouOSk|{*pma<jC#43&^MRLc$bt2{(+K)l*y%7+!i^g
z=1FhDdPEUy9oU$EH<hRtWm_3|A?H0{NjE2XTrxm;qe+^jX>|OOm{w+Ir^8W)AxIf>
zj@5B`nN)#@B6dvmky{CV{YH(A;hA34j(1?1!%3W_$!oj;Ay%=JZ^w17sW=fihw4Tz
zwK+EwH;%^vn%Ov;7S<oX*-ck=3&km(<C%8H2~^d2k=9lb(1~Ayg-DtwaF`Y?jme&}
zmw|Vrnbh@dC*_?qlAR$Wn&c^<1KM``NjM^R4|=qINm4CgKz5t8osy&l#8#3ZWgztl
zX@duE>WPk_mzBX8D<3DKBTAwrI*k%1G7Oq+Cz>@6^&_$VX`LDPjkB|gVu+O&`kygU
zbtnp;tG7^zq*DHXFw&${%Nb-37I1H;m8Ar5iV<#c!E#^MGBiq|S1Dx4mZMD}qv+R1
z=fsVcDV3ndqJ;B>7ej}BWTdmE5@Knkh46ZIIEG3iMHRS;g?FPSl%?adrCb_S{V5fB
z`hF{iTDwN3_A`pIh@MO*X%>`fZYoM)Ns++mBT92MY05GXg{8VlWJwuseC4MH0;qIl
zP^@%GmI_3s7h7Yh6(r|GW9g_T2wgrJS91y}HF}$Mm8qHvWDI(#a0+!Xp=;M>ssz)a
zD#2Y{X{vHpsMvTsuWC1(s&ITps~5sj1|y$SwWR)UxT~GlYF?UM!Md#X1+8XUk%w12
z2?~m`=bA+1p1c{Ypo%W4#9z0%R=Jv=d3U6$>8<k;s_&;Yz=oCnf`SoRW$l%YEjnNZ
z>aKiek?{&69%ik$iKn^>l?v;XU>d0T7Od|;WgAL&dls<Dx~i2KtKY(Jb+MyuT5AsL
z8Xt(N;%2X~s9?_7o@@$MdeE<zMyY-lvapv`9$RVML8*1vsIkPW*D9I1f~d9nM8&F(
z6?--JDy2LsQu_)OIU6%Qo0$jevr5H|8N{mEqN=kYM$SpFHd;5vva`17t5jQG6RVkW
z%CHyfv=+;eIW?%iG+M_qw!0>`&U&7NlK!kJ%b`DYwiP;ROKVzIX|ox-8!@Y`RQaoy
z8FzyvM8Z&aQ@gNG8X#<PqBBLGEcP_KWmEg+jfk5r`7~*4+qO@8EU5>QrTVUvTe&Dk
zfx~k|iPJWHd#>tgv_#W=WXrP(ce;e@VrjFpjN7=X86{g6yOP_a(pk4R%dEFsu?p2%
zvuC2)n{h&VP~vKhU01M788B>vv}=^SO}n{ni(MIowdCr%9a&9QYp%3%DYTZY56QPY
zK%kDdE;6x{)q<gVJHXN^y_0*qu-UTe+qyM7pE!%LU<j$v%e9{xzsYe(o?Ex?D+K+^
zb{pw!PbO!zC}Y`p!AFxRo-%35{@cKgE1?j~!4PALe7d3ZTe5GayVB*tZKHUM=fS5*
zs#;sZXd8x+dcs5uhjp^TRocQ4_q8Gv!ILw=6s);X%)AABvdl|@+1q3)nTcO~e9cFF
z!dC#qXMFpIbc?xa0GqToOc+&c#n&lADrKwCOU2s+!;yQz@;k0JoDhqWeNI|}UCe)&
z2*v~1kN>xm0+@^j$a9XEbdkoWSzNAIELD3;iYtq}=~AQhOOnl-$0OLP)~RL|9FjPc
zWre%7K#NZA_=1QG%QIMjH;9Nkn1_)^iGmo7huI}oJh<*+$++5^7Taky$6QRjz9xvT
zu<LNh@v;Y;%v21c8mMRf7n?Gd>&iOGi#+#-OE`&xScrM30RH#LMahM$^ptQ1N#e`3
z%Nub)YAB<M%+b8Vc<jES49^HW!}Of5%R#okJi}*_vOJuJx>%QotcVGKj5Vl?{78)F
zoUr)|zrF;}{p_n#gPD^e&l26lbi2%(+HOS4t?k^=+&jt|(shS*xc-!$Z|24Kh>t5-
zkGBVu2c47o$cVfwk*h3zsS?ZwyjY|>9+|5XT64V?4NjrlZpebfaC|-<XRI3M&QnIS
zwmZaAjlo_FiD;~UFUfx~+0gw6gEeWCZVR2&+?vAcY^BCC-dHQ8L(&eM)c3q>8`#F<
z>NBZHz9OVKU7P;NO>EcLT+n3<l=rumYMFd&X_)Xxkcdr#a!Hp!r&v#ia+Ivr(A?6S
zXU9YG)~foL+Jqf<d$~_qu43wtMm%T8$-7^Py6zWBF`NuP49lmPVHER}Y}~B+%g@n;
zueWg!iMr3qTq?|bX4+-K`gPhJO~df4*S*}-9Bs6TCEI9xc_VDw-3CP0sN2e&yJfgX
zzSAO~4Q5JRWXAp6&5+!zOu_klp5XG+F~MuAom10|c34*1x9x4&Imp{xvqzoU?R~!7
zX_DkUVo34E0M1IXI@CHlzD9f^for`zK;LJ#+Q2(b+Y`qEF2sA>-zpe;eI3{J{I~U&
z;om~q0{&Z_OGhHMw3s`>n9x0S^8GhhVt*5UbfG)q1c%)X_MgM$#J$>PuvII(TXKZu
zCGX9-SS_^3GmDiR+^Vw9o>5kUQPHQ-CP-n@S$-XYQlw^xjXmC}nBCzX{!+m0)LNY5
z7zo=0r=aHj<eogYMa`U5F2i8DARxY;>CN4E-8o&3IGLHF6x8I37T~}N;9V)#X&$xZ
zJ<X?T$AL`3ud+Ha?&V6Z-0Bn6Bjc3grrPY9&K$nta;>40s&0ly<A^Te!|mE1mA3|q
zD-js!l8(-BQ@eY<+sb94q`T9_W8vKv*d{yZ>J7{p85aL4=Ba)|iymODZZ3+ejun~a
z{zH^L)C}jSo}asp>(XB5mXxKCuEjP+)c-9847}l9rs%8QP~<+cT-AlK4$7;l%J_Ze
zd>*^nuCqxKa;xjAt;XLh+gZK_-sP*$bIsSqZQ<aO+10K@7{pqa_UGkJ@7F%OWR#qR
z)Kg&2zSi#VrEXW9tnrMd>M%ZXqK%xr+U^I=@Zf9R)En^$ubT^UTGx!*l%?y>4)JO$
zTOB{o!tRylF7kw-<v}m>_@-@}?pRocnyXW~^{lDTI>2T=$*S?5s;8V8k5l_?=s41#
z9{SWc+jKQ<<2H_O>V?{RBP%_v<8i3)O+S9-JB=`JVDPEoYOlms45}Xg@pVN0mCBwq
zK1X~;IN^X#o@39)ggel~Vn0?N;?w)`S8a|dt}Y??_3irYFUs3@zw?XV-SLd>RgsA|
z9n;r`0Ad_`!`E|XynM_zbPqV3Oh)#h{##T|cPjtmpw9MBy`aX9O?u1Db5HjY<Kx$M
z=)=6jd*Atlk%@*J`n*TZ0LaMI=g1AX%Pl#euK&qGUFbi58kK+D$A0^O(d}<vye-f5
z{7(E%TKwXG=l$+d#;^Sv`1!c^%3SSpvRs3-tb_d6{0VT3Jy^z#4FW@AAjKh)Cb}fU
zP$Y;u6M%i=IbPem{sV(OA+cyY0(?89@<(VQqte_HN_<YG)@!!w{eu3(V&@GzKBK8s
zvpKpow$t+D9QX#u)6M&luiUTk6$zgC1wrxT)zuNqygb4T7$`hC7C6QX92mG1C~zhy
zTvm1tFmMhmoJ3AmUY@$B!loX~s=iP<4iHk6Ez<oK>hKOZDF%NX4-@0LAwNS$4<<27
z|F#7>H(O^_Z!>}~g&7u?WhY0Ill4+_7g|EPWRA|zYhEU_#B+|?<1ZjUuz*3Od8IPZ
zLZ~VMK!F79(U{`Hn>Ke^hE+6Yaoin>$#{9(C~~B^kR-)aQIqn}7-c7cNi#W3&K{Ip
z$_-gFE}f8$>|DO=3GdP<gYX<py7kC_QF^dq9Ee&<gp7VD?*1JZffcG$QjWYO3MLa8
zqKg`Xc2NZM+Dx==-C}!7cBLJ<?a-{XIJTQun(TJ6#MwjVH)K8o`?@Q4(9lY)AVO5I
z0M#Q=52tRT#6aoP$5}5OfMGyY=n=9wdyRN;@6E-D#WA7A8c(3ww(TmOowl{>-Z9Ji
z*7FArVZU(o_SN&+CSpLicRy2yQCwcvjp{)%z_inX?p3=3?3AnE)B@ngOP>5tyFP)U
zTrfy*;8y(kC(;|ZtZSE9-Rpwaq=kPH71vE1&$;u)fWa}i3}Ov>aZGiI6v$Cf3-)1{
za)JT&pL5l<mE41W6$7GN*X@_0iYv0%qKlf`w*ZU-{z&$sYO!GmTW`SiVIp1?qEy>U
zYC%|}a3LDmBX9qy<l~6QiIn4V9r~x>l;?CPB9kNn<0M)r+8CyoW74>Qc?3yzrkK^#
zASH(<a@kW@b9reX7In@MV3K<pm?58ofV4%R2r9Xen|`v@(v-}t^C4@8zPad?gZ&96
znU`XkX^YIX86AUq%Bf?XgpRsHrzcJHr>exUxvDV|O-CS*S3nx;NRZ<B*MfpBhh>(F
z9JVEem1<h7vBq+W>!)$5WNRU$0$W$Ec){eVwP*2lZLV>(<m;0f4oMbs7+R5PNz*cG
zoE;+$%crHs!W%D($$AQ4x{5jb5xNly0_|M>*J`_Ot-$U}hm=CTD(;0mZA0$5)p}c4
zI1dX;sk^sfoUz6mbKJ4VCKe3GvZ7l2t$+hlOx&&20-R*XkpfsH$_T?u?Qsf6ShK_>
zn(D7vTHYJr%PqG&Fv`#^I$>Hq8%EL46&+19ZMz!zbeD$GoabUjqr<Yzx}IAlxG9U;
zuGD8E4fNI|`bf;I`g)A*($)Ziwsi+XXf?!98}oMBH{%Se*I!fDv)F2rESTALGmd1U
zJO1RQA58b?_aT%!9_^NmW38-jdJhh|;1Op8XF49usr7V!U>@7!n=4*2=d<UlIpjU(
zyv)m%7s@-J^}<ej(?|2oDkH((>S+GyrpJ^m-ai?xuI;t6ytweRi+=ByrhenQ8@ty<
z+v~w!PgC)ONBKOhLki0K`OA;(y84N+uD6?3R*Y}?oOAtoNgO?`b-$C7NgOu7{R!uN
zIy)WWOqL)(6-a@uqg#K}mOAeVZhGkwngkITo&G`YPE#|V^bVLoVNB42;#*kf;#Vk=
zv<-u(dZ7F)SPmJ&k8m4|oC+D2J?*j2TqjE%07*!ozx^t6Jv821`1V6dAqs~5!66Ol
z<~H={@PYzNVF?pgJ=ra>Xdv9$aK6W*k4$WCC)}CH!uZ0q(TQQXLlsD*^~5zcPh^KG
z%S5&qMb!nbjCXY7Z}NDeQ~oV*F!);<xgM1*HolRNxU*N&D44`3DN%QW%%lq+w#Q!d
zQFQa$A^kci$!^IJE<~JMUQm~#0D=XQIb7l<6-6*^%*K>p^dban3B(2#4rp^C9isqA
zJwfX7l;{{E`*hSkP90H$I&9-3Ww}2;&M<*hL?sJ<Svg<64=6;`;HijN%nbz+l*v@4
z8UqJIP$8mD^t$CWpov6&^$?re<K{M}iAi2gO`lxEWlyk}MVL@=mY0lFO?=5t<uG$)
zwhCp}OlVAClCPlX6rMnvI6^~>FL{aVR!7O!&tPf^kqK2OBzd(2hdxwMGi#=YWOL4h
z(KD4xqUJ_@2vgvh{!xk;P0;zwiARDq3aGg$sYWjf#fGjkrWu;4NGZltDDLt|L{-^9
ze`L3-T1P;1bZ1rHDbNm<You8!Cs<8t%VVljs#P;+7p=&Jw!-X_65Xi-y$UeA-i}tL
zBwo1mXI25CRcP*H17N*bO)qj#u6d(tPYWqnwRZJ~KcyZ(9UG)oHI|a$>!u+=+S%m!
zOqYrUDr28$z}iJtvi2e@E?FtWEZ%RO(rGFKtG3y~PLi~Cg_37ciy+maG^G@BZ5k67
zTiJ${vi|I3aBqu3%-(aXqvh>zKlj_4h1Qy6RqAOENf*^#FmQJYFIHQczwbi#u9obr
z?!cxx%|<o;XngIiD)Wol=nC~;p{kToDQMm4t@n;pgWYrE3*Pxw5~ud{OKuzFQn%^_
zz5Y!fB0VZxY4&xp1=i4kCzfE(RrXB{cJRVz8({|prjhrXD`%x^*$i*Eh2`Zha6!!7
zJ;jr~muv6WPCVXArj<H5mh3pGt5p~`r@2O?F)#fKPWU#_CCS|IDE|m#0&cR5W6N4d
zZyGutvY3TYJ6QYL*vOoI8M6Kx9y^&f*g4nL%P#w-D8-zd1nbnyNiJoY)ePafQm({1
zd2OYbwdAY@>Cszd>5F%CxH-Cuk+38(3IToOipFWhg@)3aj|XKxIcz{!F6J3yMp8-F
z7Iprno$O~9N^7`{MWy2M^rtbMG&GMIlTMtn+-hhX-K_e$pvqfFgiK(WI1$#x#c(_0
z{A#CBs#eRbrK%aM<Y51^)Ro0klNpsuw5?KAF_d;~uiI<UKK8BFPN1D%y6csKTF^|g
z9=I7c>}RQ=YYVpV`o4-#YpuoE&(5<`_Z)9`Q&}v2OI5jlWSc;<anN02w;L-y)Qoi0
z<DiB%$J<%*AxBTD+1umECCt;leeB}%rqh88*XP?{Jl`t*cE2t8Zwo<sbjB<oM@4=y
zg{xb^Wd?27V$M{jYwXZgB+Ry7Df67)Q{(*Z`Qv_$&!F>V;MgrT{|L@(qyrh8rT%hR
z+(MjUO?OPZztPEv`BnGn`ufa-=DNa)2W_%9?4sDdX3M1&a7m)Q#t+Xjnfog4SjS!Q
z{)Rf`D{uLWBOA6n=evl!9!>k19pup@eBl+@k<v#xn`SmHzMo3myInopiI5lV<JDm#
zPZaE&x24~6C(6*LaGnTk{ePK&dOHE`>z%v%=nY%ivq#>k@2WlZ$7FQ)opI~CSN7|T
z?|fmra>u|gT07XSPvU<n*^>XU#2XJTw9XvN=)V$DW*~#=<9|ih&us3$zkMoopUN;-
z_;H&5%}!XTpB-5o(Ij5pan<p-Uxt0y$iQ9HvBX3)24v_0Js^NSXo8Fs{s3hpz$m20
z0*ptzoWOqUpPpr(?)@Kh6xC`d%lz~UEd>|jwO{Eqpsi6`1umKTogA*j9hiVdYG@#Q
z$cPIlKnoOr3P?gfq+kll3q-hpqQzSa8sG~)QVf0*PY_^xB_L91#Fp*g4(dy$b(I;-
zpU>@6ktk4`tU(5jASxVTED(ewtO5d1!U2>(CrDv=bixSc0t|S9CX9vwq#z6ggn9fS
ze~jIeU||@Zk`+3cCh=HLwOJU|O=58(mr3H!44CmT-yw+~dBGb|%^r3TMGHK}24aE?
zY=U@z!h6t20U*E~x`KFYhAzrTD|~`@OocA^na)Mx@%c~zN|`188rb+~Tt4;9@{!T1
znW2vn8P5Hc%l(rZrcW7UpgdHCE6zY@h=4v2g(Q$dR)j|`s-smnp;ZLqSCrz96{8lu
zRXAPNq><C%Y2pP=og4ntG&)%x#NOE;&t6#=hqXyJQiT!fLQ*86Bp5|Fy5mFwVmk&S
z6*?j^?$kWeBLD{A4K8Co#^W(wj<$Vb7t&ed1lcGe94AQ~o<ShTC}b-h#SBOWWn6+~
zR6;JyhaZk$Da^tvaDp$=A$F+Xw3!q~cH|t{<1=m}GXkCm^$VMwBqNQAG4dHeYL`o5
z9e~*p)Lj-4X2*8C0C;?dcL3v5kfRHXMt9tyd-Q`_a>xEa7~*6+qzfPbW!%UkZVo)T
z;8H$FQ-b6J%~*2n;{!emR)Sd(eU;4J-td89HnN`O)l(1TAEwkHrnDLo@rd>TW?Kkm
zR1#ES{tx0&8cLQ=1Ultk4dnW%<l>3p5M5r|nAY@Nri*0erEKOE#)@ZtW?)Jt5R#@w
zVvks9nlfdKY#g68dY@u8Ut~faSk9Wy-DY)eN=A8E3s$9666byC*7$*(a{`@lqGo(;
zjCZMK@Ij{B$qL7eq(L^1ZCa;wHY0DcO&0#<gdv9{GFMKR=5C0SRvuk1rDs-3r(eA%
zf$@`aQl0~PWqi(OgbHI1zMR(#$97&*cM{w76#ga~5E9WTj)10J!OW(IVJB<OCdNqS
zDc)O(y6B5WV6KJQONl4b@g=~K5s4)z0~Q8Lekg{nsB79Bj1p;qX4gwT=)0+*gt1At
zNsM;ZXrpOpVun?2E!%qH+Fwp0kRl|J`s30eDI7f?+|<{U2GEn{9+w{Al-8Ojc2bl=
z=aQaUOs%1XQDz$2=*O92l2)SkS*Sod=#ggUeX8kevT65JX=Ms2y4i^p7V7DUXY`RL
zo~B(_T520!>V9z#Qi|qL4U}I#s=?i;@`2|EDe9t1O|nQxDAJOM+8uK;99JIXCB`O9
zDyiE&=HIQ}pgI$N{^l@B9IARD1iBr%{!OL7Or;9Bsv6d5oSvqwj%lq1R;KDB`<3dU
zaw>;_Dx6XoRb{KGYE3~FXGz*q!C}v`%Ac!}sDcKmwDPH*BI6?I9BjVaw&o{qnIwgR
zl&X0gx3V9;xsAUz>s6xbx1K2Tz~@Q5E0}tgyrR*(a*@5>t5t!kzDgj1ifh@Ko{WAE
z16Jk0n%dhfEIxjum?|mASgW8`Xv7k#zfvH}W$X;)60jQVh@R=B9-WneEW@(rR${8c
zB5bd<O0jmV%5s;)PHeB9D$%x^ywR1*h7C2Et3T>&s9x*JCZCCxtkxdc&@%0_3hSiw
zs?}cOs9Mz5-q^!(n!v8aaGp&5`tfJM;%qWbVAi^8&?=SW$f~FAt+OC)(n1WJrEFP|
zDt<<3h3J*tnO9W#DA&R(t&-x_I_{X!E4B_U+Ln#dLM?#38KpjD-qjJpy$y=JDkn0Y
zVmj1!MqHRqZu?cP<zDW=l8v6M;0scs+UnUm;jXV<ZI@2YtqNf3%I(e)F6$ca$nvb+
zx-Q?=tfpG*?Aq7nYAwGq@8`~Kxxy`NQfu$PtZvEef=aG{q2b+{?5TQf-x_VUE}OYX
zFW|_`;HgrdEb0$7ZtsF^(Y96n5)9eqF3<w6cabmNN}uO8uk*^T-$pOs(rn@0FMXvU
z_Fl*uy4aMR-|!M|{Qmyz_EMk*2N>Nd4FU6D*nSag)b8%xZcwFf)A}i@cH+%OD}wT_
z{t|5aej@SI=mc*t3}fR+CN9>72<@6}p`Gvwr!eI1Fz>={=_YT(5gX!OFx(Lyv`THB
z{%?;tEvxeIo*wXhl5h~S*aE+8&mQkn!SC=W+&?<-#(v2Y=P>ftT4Qps>WwE6kDs>n
zFK+-b5_8=U3l_&>@YHr{?Bd$v1#nCXWCOP5@R{EdYfBZ^t+T#R88dPnbMeOICm$D-
zBem|PR%`Xj<{P%FAse6!4=Ed8O#EH3q4sI{KBrZ_F2rH(2;Y#F?lHDDE935#<&vB!
zL$V{w=60oPgZ@Gn|JpAnUr&FoE*&=m4n@@Sa*3GvrU~0}F?(^WN{An4WsWB7AS*NA
zR&((N>bOpF1_Sar6_~dgaQ@7)9tRsHD<drH?hdbO9|P%~l`hG7b08<PC$H(_lCv|j
z)abr2r}`QDw(qRTn%EWTLNoMZGHA)!vnVzeF`sg(GO)6`?W|&PDwC@_hwvj7^xk!(
zN1LADN$5%IU)7-=a;0k;kF>zPa)wUv!h%i#A+iqx=W9fBEmQIrS|&;Z^;dlFMNj8L
zgY*Aw?)f6;IFoaiHLB93bN{U~Mb8;nu-6{U0A<`?P~&C(Md}(GDJt474M$R)F<Qi}
zoYOvW{tbd6G}E#Wcc(*76N;2$ZgQXp_GAdoz)vb6cpxH+V5B_wwPupv)Zv`@9rV(Q
zb1ZWw6EBNq!E;HA?AV30FHm)KscH}Z^dqbSI2xrXBq5D3p%WSa2`WWbs7PRsKnmib
z^_8vpsAyFG@u*$4peFW5R~xfwF+UFtTZ<k}z#%=Buw17YT|>egzN1&rB_6(j9%@1#
zrb27i<zJ&fY0JZC=)-bj0(Of>Iv%B~Op67Ev!&7YVRo{4o;4+9G{hcp<;GI4SoUq#
zOTZ3Nxv1g`up%X7W+ucUF3h4kA_XP<;x9geEp)an7&v~5V~Q-NfnIgeIaqD~ihSe#
zofs3YPjfUdkN0~6cYJH%{_(PN-FH!ZV@=+}IF`aC@I!LHz(aQ6K{NzijzVbsLqNzl
zi&r5PKXizT@N7Sngx4eArMHiNa{(izz{oay>ohwLb0QHph#7Y|E@W;xWGOI3iEFnX
zmcola0#~G<cH;vAM8#bOcFV=~BeUI)Q#b^f@UB>yAouckGj=t$br%D9xEw1X@4$$&
zrG9&MU*lvg>W5_9Ap>Lvjer6uyaH-4#6xbz9|HJ<W-pJk_kez=Tn)9l(Jj`XFHXxj
z%DH!)FZrDdl_B(=6O1@}%w>3(2U_BSQLbfQcLySFf?cvD3z)Wg2==VYdVc<><9DAf
zptf<qv?``2@s>JvAsch0Gkd3Zme;y9sfXKQmaKeob#)GwQNfzaO8eO&`;MwyvWqVW
zCpkZt`?LQ{w6A%%lIu1n!L<jq-!Y%GC3UyYX`KNydT*#pllz=IdwUNqwA=Z*Lli%{
z`@6$CN*m2e73x9Tdyx}l%=Vm{kMX&KyQlX%ollUuOLMuX>mek(!tZa$IovUm`oqt$
zzpEU@S6Z`gaS?ZVF5~N{6Fm5l0ZXHL$V(`)4J<_G6IP-;ZtF8`i=DWe`^95?155nA
z(Y%oVX|kPOLrZ;%1{BNhb86~f&6ji1m-oKEafSDI1EcTK*VHTreg4IkCYony$WEp|
zE3YE^Jf?%_Sg+mJ$EuNAeBa7^l$pKOFY|eKFl<+tVWaca|Gb?t*{<@m*T;E=BmEtJ
zdd-xp5C!+d*N*^4T77>IOcAr+mucj#d}IrG=eF_`H?!gc)Z{mPe@%VsyS{kiJ+{n@
zjD@l`$~}?e`(!UZY<Dv-;jrjy&lRn+!7#4Q8^3FZJ;0AUnOg7oGFf8DR!w_8<CD}z
zW1OCYz2tBF>VL`cd;g2nr_0CjV21Cl4f8)vzlIxqzjbfy=Qj3dKiPM2-#1ZK+u!%&
ze?~q&leavZtG(@m03eQJX`X1Ru59a~1o6&kOy78}yYfEw{=T4aNF3RUwb&7vI|7-i
zCsPWGzIwncU<qvwzJ)<J`~{cIUqqsOScnbf1etuUe-HM&e$US1#e95JR5)B+JA@`m
zhfRr$hmMbulUtEPhLo8^n2(m4pP8IfN}igclBI|&k*QloB3h@QMrC1mbZE6@3~pZv
zYXt>=26KVA#>jQJx&wd7XM;YnvoMK^)K=I~t=iqI(~aKZ-_+dW=U0q|*WK)<s4J}L
zu&=J*pai;Oa&UlWxC<D3F>nikf@TPciBYfs!IuOJ79>cx;6Q<i{^meTz_4Ee0}{b7
zbV%`l0+166RI~+>pvZ=LqUkW_EsHyJ_t3SVsS_vu&uTq`y3&?Ys8JF{udw=g$q3V#
z@Uop~rG=ior(N=y**Czz!DYBG9_Ux#<N%HeFIoiSY|g=qA}xMBz@T46F$vcC!?U)S
z*8yhn>a`>DM$&UPdtw#3_-MJqB_KP7)L2O6$wZ5!k{qNkrOP$qNfm8%s0_~P^!dXG
z_RQJ@YXF6TlNJq1G<0hFs|XPRf`D-S0hR+-817@xA~(St*~wARIGIaFN)z>T%g{St
z-(IP+xx|owex4*R^v-&wyX({{d$q&XXT078WPtT?;dN=}(oKdpf&&2FM}}8@ZS{w4
zgegaZb@*Vy;5OMI#@%!|Jvd<@j9p0KPuBjCqE0*!Dl(pVQ}KWv7V6=o+JIjvQ%6U-
zC{Tbv{N2&a82cSSTyPcf;!JRJ0Ei=f2L+~3g6Pe|+%zFBry*%iVg?>8O-89mmEKu7
zSyC1{mDNsIVu=)qC9ZNJJzZ+Hq5<`R5fMx(MdZ*z%*0k>R}meAfPQH#Ame>^F|>dI
zG@`NR0Ke_&n|@;rdLVLTJn`L<6^ePDmQ`RXrKD+UO6GH(8nszg&~fA86rp^opqWFZ
z*XbqC#478owAT8Jq(5w0h^69%I;O9VoeC_36b`%IQk_)Bl(5>RnrWG=R&^S$KiF#R
zwb*7`M^#{Mi50cK8e5vU$EHZGH2&aja_%?|Qp=#Sq@V__HKUm)t-0)KtM9)23X){K
zigo#F6zO79@VW#yhp>|q9=x!^_<qKrI`Q5!@3YWmbML^N_3QD+`l5I#V-a$^>bVX-
z>}546i`w!fFF$OuQVY&f@y#=59IlA0rblJWihwNi(6fpR<*{LYxG>NtZ^m>@Pg|z4
z&C^2NZHCN-$Ft7Q8a*XZIEO9v*kqS&_StBcJub$Gxtm?i8BT3Vb#MpkYt*5x6SFLM
zZ~b$Xp84H%X<y5&sV9BcJ=JA>k7#(Z=r&IAu#!jVh~gk}tymHR6LNXaUSs~a!AC>=
z@?xT|H~Qq8RIZupFt7f4{?D213@V7A1#Ww$Ubiay;iZG!HdKbg-f)P8KWV&XzoScY
zJOAE(wbZ*uUuN&iH=nDjR5yND>Oo~Mo#M$~2fjGshtJdb&80sn_0_L^J>kA<uROpF
zdTZk9=|6Wq`>2u|05$ZkmX%LJ3u=rMK1Ukt*-uLhG?@D`C%gWsuN?z)-smhS!PeQ#
zdf^L)D&kka`L*ga540TjLcu{%a8PS0oF4izn84=IP=BL%2MMD?ydKK$hYE6F3@=2l
z$0(0e+I!&)UACnZDiLz5>xAG$HA02~v4laaN7aV7og(HBf+)1#6FcZF?~O5v6?B`@
zuGGXTVhDs-%%ZFQ&X`2ap@NR>dmtWJ*u@)CiHmSt+QB$x3D}{|jb5~uRkjGoCD~Dm
zo?5~lo6^WX63~E?ykqfB$GB3sPJt5xA>tN^rc6FgcbbeO8-Hg?Vu3MTe&kLp`N*zA
zK5uiBGz=UINy@8KQc6>V;^8jWN?mesmPo){F?A@e1Ag(1HN#~tw-?81_7X+N{3YAw
z)k;q;bAL|!=FcdjO$y%4c+RxsuA~V+Y9`Wz*c9iw7D&rTno6E4JY_cTm^*xa!j$_o
zC;ki*wNkzDojem6Bg@H7YclU{R$N*V{h5R;G4z&4B;qOh$*uryFM{wa<wYB6seoEk
zgc9YSNxS~H&XGn_qpzfnD{GfbV<J>J3XPacY3fky01I9z72rxur=*?!poQH`CrUr)
z(Rti-OgSBDC2@tuVJ6d`6ouGS3z<~}ZfcqF;wMv^YNDrVagIhsUm)A5N>Y)lt4g(D
zyV9ybCQ@#JB`p~8qUpH?qLr?syP{9e85PRSQ>v6&C}3xjIK)b$qN#KoS<PftSBZ47
zOl^{5BU)A9HM6Va*(}0hDO$#oBeIbEQaUP&&C6m|hsXn~Ub}f$&_*_{uU(u3@p(@%
z;r6Af<pFEa<l5YlN3ct^r|>*GOxqrJan8Nq+(w&RQtdUhlzk*|<7wOxz0FumD{b&D
z+5TO1Lbkj$MD1tmnk<82_q4%$MHm%}UR0GzwyJ}m<;1(8qeiz}5Ta<Be!DBi6&AiH
z6VZHiTHmH{)UeXM-|_ydUjH^2oUt2iY+3lO=gKgUDCF*i?VI7B8gh(CZLq-jtK7&5
zQmh<apJWpj-|ZH6#8Oihc*V!!wfcm@9)2B=mrGEY!4iB~8|XGZyi3dWSf7LasVrZ(
zyeIdUVna5faN&x{?(uTQEBu^7tvs~{ky)T!Y?#ZkEX>g2Sht*08EFBS<eokh%~xD-
zCD=@<$KKUS{B10sJ1en4*K=h?R$t1Pburq3*dF72^GRDOWvh<QCvWy6fzxV({!LF9
zot#AOWP>S>`JOqbMgB8k9f?6hud!{erp&3gY(W}&8k*-NVTdCwX&(YNZ}N2PqGvt6
zPZFDCGBxp;ZJoMN)_B)@?zJ1K2~iE(?U|Y8@v&!UjXl;mqvbH~dr2J{Q^&fYMQw4S
zsaR-f_xRQ-_2wmm?J%2wIoY~~bhM?N>R&TgsByzGzfTQsIn&L3+eYw=>xQiOLc8E9
zrkjab={<4|9H0pw#kWrU-)6%Zh94KDz9C-U?@e5ciM#5#+q~+Db}F0SE;Fw{y5wH%
z(%k$$s=xI+M3OhlJ1b}Tjwu{-><ruI>IV5LgNxGy%M=S4$5a?GEcKxNBN}>)wK}1j
zmer!ql%HC^`ly|*t`gA`C{Pdk2TcX+i)xynYIkeAYa41=8+B&RuKChMiSwLim+Jey
z``)(fcd@4tk6v$3KjV&Pw%WYx3MPAZ(q4EgIIXR8wKeA$FZZ6aIPq3PMASnMc}GuE
zzLM`WuPfj5k%2L{NKf~^S<m1Y#$MyKKKO_SOzqPXH-4(0J?&9%dh()|eUu-5?HhS}
z(Jz}x%|E8^$Lr@`QI(C!SH8Zfujbt!ne#$-Y4pD_{@vPS^nLz0;HwW+vTw2cZbvS$
z(H~*E+Zo67*D}}z4Eju~U$}Qhe0+Bm;P)Y4251FnbQ&jly_Wuet%iGDcN6gUOC7>l
zzUDJPq<RymO-7?#SH^l4@-+z<W#0D`VI@f5(0?TtO1Q>;7#K$ycz?SmScs>6;-(bx
zCTSZOPENCfu*M-is7Echb3-_FKNN!_SX1ReGH7FiPSJyKcVP~gSnU&pwe)gUm`28A
zfi2j9p0jI7SavkmYM>`cH=}$TWoBXcc(iwhLd9@dcuGVldtEp#UkGVy*eqlCPbWBs
z)ZvEi#wquehs%e5fVg^G_ZV@wZm!3Lz~zOy1#Okrfea{X1r#|dn0e)wi70n*eF!Gs
z#EI=Sh5Dyty;p}P6p7pCca*q8DhE^nMv8M|Z=tn_=Ke#7xFd>eIB!*^i_BMiL%4-6
zI0#@ki6S?Ovr}lFSWZ)dWXq_9uopJ%rBPP)j620Q)AxvWh<&R#a2Y6YW5<VSBQ|ek
zF?8mORp)kU^MGHKRJO=-Y;%EI$Sr*(iQagG*425<HI75|Tee7!JH~?mS!`w`ixo3c
zd+3PZM2}|IjSYy8Dra$e#E;EaaPN2}WHV(bXg0NFI$3s*Tjha%NISg~c}Dn<^0Rnb
z2Uz1sFoaZE6lskT*+=|{a^GilAZdFwrjVd>CQWye{+E&-w30cOk<-X^G1-L5n3OrW
zSa5@mhj)<xCTha?QVjW%G6;J@NrGSJgo$C4{t75~Oxco7$TF=XmFEPNQs;`nxIb6<
zhc`HbYXeH**M^%0mN=-6Wf?H*qn6?)ljTU2BPkX4l1GJDUX-Xnby;|R35wEamVkto
zdkL8?iHk7Pn3q;E#8a4Xd6>q<Em^sX0ce+ab2i~LnFRTVvbbNEX;<OsSH0AktEpDL
zvvG^rfRdR&k12DgNt3flcS)spy7ZQj_M5eHeLOj7MVXstxo3x%biL_6?7*B(*_xy`
znof0AvT2!60-F8Sm4S#=E;Nw07@g7yecbtjPXv^XS615De#RMYwJDE7>50yzf@8y%
z=E;|mL7ma)pBnR;miL~h7oYgxlizv%Rm;g|w?>{4v!6fKN$JU-z$BQb2!H}upc86B
zp&6GYmTd+~eh^rZ8wzaW2cVzXkNB320+pD)B$wW~ex)acD9VJR2$dnaRaTgz8dh<Y
zSe3WrqV$EB^rLh7xPoD5ow*2@9txq>rKF8eqDYfpY<Z#ZC8WYfq^W47T-jYvdU)pd
zoKAX{O{r*ODtca3rF2M=^%G(|3Q4U<V>xtuHQHPrS*B;Yq+dFVd0JpK5s&nFG$-k%
zQkqj3I;Sc6re20^7`dChi8G%irLY!QBDtn=>V=)As9zGNfZC;+sHxoMW|)YVbSkDO
z*r!qWPo_ASeq)4-P^S{csapP(jMWLMW(uDBm!FXdsh)?bsd{fdCZnU_s+qc>hRS_t
z`l+`1RhL;tO391sh>**wY(W-sfr>GMIxWWfdvaQze>tsF#u?~0mOvMixGAZ_^J@9T
zr5~5AS&CV$g{;*@fqROEMFFl)Xs&q*tJSG=>ng0+idXOIlrLDRM#_x*nyB6Cs}YB;
z*EyrB+K7T$o7(E7+-j}t#e4}%qg;Tm3~PTM>ykAEuq3-P#5%B?cCkB|pte`9A?vas
ztFaDCWdA3xz}T)7i?Vmuq#c@T0vWTZw6Q>IqC<<ZZTYV^+p0(;v=z#p7TS1GIkN}|
zT~b@Lh*-28#;Qo0B>o*+v9ASnTdQzUX|<x7i)0I;XREU|Yp|WFv`^=;e3g&~JGN<i
zo10g+4V$)*!Gb{AD_yI$@mEc@8E<fVOd(jfc$;-+YjQ`Mw20=afXj>?YJ$;)r-_TJ
z<$AeZin!%jvSGWjTq}s2OIei#wL>?$GFyvP8?#Isl8(E!m5M}9*t1S_gg|&$Fsr$H
zH;b$Lv8}tSu=}}HYKCx`hxB#3z}dSm7OJ{it3$g{H7l`8*R@W|P_5XPX^Fbm7*M&}
zyot!V!rQi6ON-;QhCnHCuk^dVx>?=py+il1eHFbnD@nzhdHC`jXT+rL%ctSGzuT+4
z*r}L<Nx!q3{-imJQ5({}{>x;aw!rELz$B<+BT2mF`>DZdc;~6TbgFC+?4;@ojkF88
zKT5&TmW0!Zz>4<4dOL**>;cORH1oi$*{8DFx2x&|_NKwId8kOE!igK7I_$z6cBVeZ
zc9-fUHe8$x9Ht#iuOHdNJ`7_Jnmc|-z>_zEQY@OhTg1!hd02e8O#Hp=xvqLP#s5o#
z^`^f~?3yy0#HiWD;N_Xpn@?f<qB%U4bX>=FyqlJL#;N(OYP^?hoHrL7yw>C#^5s25
zbI6GN1ud+{GpVeM>~dbYqRE+W*kj0ve90iw$Ie@-TTHfpEMaOIu}xOV;S(QaFejKi
z5UCt1n#^gd{=3MJe5=Mdt1{fFVLYTY@el2x%e;&zvC#}}PyqdK7GxkOexe7<pa+j)
z3~>??cQ7Mo01*)}%-a&neFTEAY|U55m{$C{B1g)Kq06`6BE8HDW$+7PU>sc$BsP)<
z$?VSU90+942C>l_ao`dIAP{-b%GTS-v8c@k*Skwt%MQ1FxZDo9ybr>R8~~xuE>Qpm
zfe`E*&9|`{DA6c)Q4+$?5(hF8xnRx%;3sqd9D%^k^tzvB4A504wOrM>sb|n`{7(su
z7hTaJy?ho9F%}{L2Wa675UtO3@dbAg20<Ot4)Dvk@fSvt7e@jJL4C}^;V&vZr7---
zQw?4I_~*?c41+Iv&_prQWMCgR{Tg;cAHty{Fwq;oQ7CG?0P4&P!LZB-0tgZP5oll$
z?>qn|&4bUI%~#!SCHKa#TaW7D*YN<>5Ah!4Tn6&-DD|P(t`P<k0U(bu*+TLo8KKU1
zlFzge3~?RO`W)5C@z-Qjw}wsHAgh%}9JcXmZkD_i{_q#@an6GhBhXyTci;v-qRYG8
z6-b>CN=+8`F%}9C7`}ZL^O4tk4W{0k(x{zSxG2Py>U8mX+BE>yY+@&G@*dkQ5_Mu5
zZ=%nFLMZ+)-VXp0Bz-6w?I;5BCI>;@zTgpWLd~Q7+(+rd)@|R-YQO9nyA7Fkuf6`t
z0)8#a&70cn-1=>!JKNtsijS^c$^#zZwvxvQZeggM-xcns2+6YJYm(WgA`%|rvVvE<
zn&DV&;s|cyL?yW{&6YB84gSd<B0l3Kje`^p&{j>F1rD!>yDTO9;xt^4cRb`oPRHe%
z;=1e4I;!M;47WZm*#A=9OwPeKzPMA~&ls-aCtkbP>Wb0OcM0yGzI){?ndMqOyJF;m
z;yafkN9Iv$s%4quo{Z#Yo{y{~za#vUUR&oj4(GVZ=Zfg&=UPE{zC&mCs)8<-j;gSL
z&TVqe#Re+CGi-gu+UTlV;t0Fr|C#7W3R)6uid2Q@nl7!24#JiW-JHI;0{#ZE&xTW#
ze(Gz6>E$@;_L=HD&ReW*=<pf2u+D_CPK~{uy|lhT<BFXO80vH$?7^a|Yo_c-Y3pMQ
z$i@zQly2tEo~z03t=GPh!)|1m<X&wnE!n>9`{?H`M(%_Q?Ho93-xX$BH16m=>%T6d
z@h*_+zN;uEoG<<EF<tL+wdnUA=J2lNAFP?S%<lnz>dfx%O8n?+9BBIPerM(I49~w*
zTjdp>?%j@nJjQeqzev`e@z<Pn^Dgq6H1K~N@>_kht^GF}-^80J<|gm$6hDb8rCER)
zIticQQn~Om|LiYMg;W~xTWK0K|MSV(jwO%u9Ix`IDw#<`ZbrZJ{ujP&9Le;odGvYv
zr~bN?0FL#{{f0gd_8A=XlS-)`@8Ew!_M5csW6$<?YWD1?s^eBd4-WS|ny7D&_m@uf
z{mV>UPfuUZ_wUP?(MtGF>i2tmVE#&sH-`8e@4=)B`6RFPSYG!LFZj+$`H?^8GSB(s
z=kyC|@QmN4QxE##D)(f%^r)Y&ivMFHTl%>2;jJ&7RsZ>%Z|-;taqf~}L0<c|-{^-A
z`514t4GQ=NZ28Wv{9d{DOt1SudzXMOyUeffukZGyKJ(DOzI!|U@Ok~*uln|${qbJ>
zKzy%sPd|GO{NgD7t~~s~&iaz?N$J0|;E&Sx|HM~s>Es{&orR-UAOMJj2oU5+mMh!3
zFC5D=UE4RF>pS23zv%=V5{t$oGO1h=Q_g3KDV<WQN~qAP-7=jml@%;TWx3`vI<4kK
z*X}nwE<a-H^E;kf2j~0C_Q;l4pqHQFAtD-JB4eZDBhVfcBxU1XB_>N0UzN((VrHl)
zV&SOiDQcqRQ)(-fovR<{)L@WpEN<>qnQpJ|ui)w|FtH|VaWJox=Li|{GjtGVG<CIg
zj_~j`>g9H1i8dXfvv+xDxOuwzdN%s{VfQ->r~9{Oy?nl+vVOn6e7kA?;>2V3qLdM4
z2nRBJg|8vRh}!~6Oe5)<Kw}wuT&(Ev$HR{zM^651M{?P*UmG<JS#{D-z?d>;(v11?
zCQfP;bAn9C4aX*qK6%0+FyJWCq)L}EZR+$X)TmOMt|V&pYOkGGXLPJobsoPPL$@lL
zYW6JJv}(VSZR<9d&9}|E_F?;Fp$#l@)fh!;04V~sf(H|(y7w^RYFfz(bF;A~<j6G>
zgG*Xqz~#%9m1-V<AOX??3o!pZnxON60u0a=Sm1hrg2IL=)2@xt<krTt=E?5ud#T;4
zeUDx~fSEJ$qYOqLRRFp`0pkM<bWX4!@Y&AGe<uxY`#bnzG;`|y=3@ACz}(lH^-dIh
zX3VEccP5^6!RqP83m}Mn06}N4Rp%OL^8PK5fC3Brh1z8b7;qqQ2?S;xgb6%YU;z>W
z7aoTlHWXP!<#Bb;X7O!yqGtsTAR1{%oz~oIu|*}_TD8G<T!z!dR^M?h26g~_4i12z
zW(@oV8*wyZra)>trgqeSe;N1Wl1VC8+5-E@sFZsidikY4Am)<|ICG7#*?!3(r5$qv
zIEPtuM^R_xic;}-R+r?J*rSF#*7sxr7#_FaQT+{B+J$S{H$axqL5F~J&S}<JqShgR
z0De7g2qveU$|dG)lI)pb0raWKXnqFlXXt;EDrrD|SyBiZfU+iPAc6{RO4*Gj_Iav~
zS=wj7uW&jlosp*Y*yvIQFga;b{>e&s0JV$m>M6F_h9_A_jF92UsGvEkskKA?8023>
zHpgFn3It{(W=dWLFQZyE884SMjw<Jlz$RGQaSEswFu=stNhP%P-WeUjp{=Nzu!=I3
zr?wSa%ptcKQyjy$DC}!4x#R*W=w*aLY3YiiAvhg`_F0PQY6(Y-)W$FMN}9o#y%tz&
zu3=ZemYN9)D^fx}h-8nNF{>1&2pGV#U#`WLpn@^ab}`mjYpsRG!hBq;#C|0xUCCfW
zshP8FChIb!)h2z^%tzIH5_;^}eYf0O>%BMMc@%Op+Wn#kBft#~teK1~x_KRx58Jn#
zyo<&?z}zI&jW_1MRo*xL=bd}510R4JkaN$Bm;Q6mrY(AOrVpnob)kPzO|;BPQm$|2
zxoiG8@4frp#0r<|u9V{IZ7!1V!z<q&E5AGcJoH~wwmegT+I{>-U2DI+Od3rRJ^0~o
zEe7>Wxoy1onS7r<`|V?VX8FTo?>=7z(SJYwIDs3#?(^qQkp2EAzyZFaW86F7ObD2j
z<|(j&4n#)i*7ra#(Pe@bydWePXaWpo4L2P8APBh@ln_GCB_uo{3b`V|K%|g;dTAjH
zA2^ot#PEI<tRW6LcSEt{P=OxoArNg7K;Q-OgFh@H5+yRkWh4=Y-D@Hgfw4nHM6rr<
ziHjAp2*4M@LyP`iR3R3>C`O>n&=O-jBO4NkMj%dc6#iQy9E117IJ&Qmx_BcU^H_s0
z>JeKeGzT90C`h3!W{^Dj<3Ivw$VDQ7kBsz&9hLaVNn%2clvH0N^>@il#$qy@3_~X0
zqR3B@k`&`9r5q92sj#53l_hax6OYI@R=Tp5{jlT!3n@Nk*s_;5!6nvY=|H^rvY5F+
z;3z49Oj@Fln8tjjww5VPX>L-3(7Yx#vpCIdX0w~#>|FBrSEFx|vz!-055Ta)9(DRA
zndW>aJVydJV0Gp*p&3o7T$He;+-XHe%TIUWDbRs_=SFE7oTj$Zoae-8I{P$VQ;eg~
zfD*K#7XB4wxbhjY`m|0}m|+!wB9u?6xh83+*^=Y(b<vfw^pN4w(MFS^As`V)Na~Ug
zOyMRtpiLBHDs3rHgX)oag3M1z8JT@b_BNkhYEk<X(zS#t)v4-(sAdaU+Q>GYVhJrU
z36faC=oM9}lC`W8`PjIOnv{PjN?=y&k+VbtP-wwRG`6FeS@Wt_os4R97}}Z6d{$G5
z2?}Sl>Y2%Uwi<ysYOH&GEM#2~PH~3pDejCcW;3e?$~vW=SJNzLL+gmia&{`06)kF0
z3zE_n=CrDPEo}G5TEe(Cwzj=(Dz}MD-14@!zWpt5gDc$O61TX<JuY&StK8)>x4F)J
Y5-xP3E8Xc*x4PE7E_Smk+7JK$I|6Cq9{>OV

literal 0
HcmV?d00001

diff --git a/final/docs/img/lines.gif b/final/docs/img/lines.gif
new file mode 100644
index 0000000000000000000000000000000000000000..88f491edc302684624036548ce3910f32cd4514f
GIT binary patch
literal 91
zcmZ?wbhEHb6kw2JSj4~}!yWbi|Nq;!Zv#mNB%lLggVZoEYe?+6^Uvpm=jsLv^V7@!
jMO<%uq861ZHD`s_rm4RV&HHn4>)eNv)_rVZVXy`Ob}}Q<

literal 0
HcmV?d00001

diff --git a/final/docs/img/objdeps.gif b/final/docs/img/objdeps.gif
new file mode 100644
index 0000000000000000000000000000000000000000..57c3e2e60d4edc6fec37f766f077c6ed3aae4c20
GIT binary patch
literal 16201
zcmV-PKeoU}Nk%w1VO{~t1B3to|Ns900092~|Nr{U004+HRl8<>zp}i>f0DtOr^L+A
z$V_Ry<>ktZox(p~ymE-ZlA*%@0000000000000000000000000000000000000000
z000000000000000A^8LW00062EC2ui0A2yh0{{j7ARvxpX`X1Ru59bRa4gSsZQppV
z?|kq7z@TtQEE<o<q;kn@I-k&}bV{vSuh^`1%k6r<;IMd1E}PHjw0g~MyWjA*d`_?1
z@A$la&+q&HfPsR8goTEOh>41ejE#<ukdcy;l$Dm3n3<ZJoSmMZprN9pq@|{(sHv)}
ztgWuEu(7hUw6(UkxVgH!yuH4^z`?@9#Kp$P$jQpf%+1cv(9zP<)YaD4*xB0K+}+;a
z;Njxq<mKk)=;`X~?CtLF@bU8V^!4`l`1$(#{Qds_00RmfNU)&6g9sBUT*$DY!-o(f
zN}NdX2Z4(iGgict5u=KZ7dNT_iNm8vFdhDbgglu=;}MfAp}1_aGNuxkQfk^9g42r5
zojiJK`3ba!P%lJ_x;QE(sZtb9%@BC{F)A9XHksxC$(5u^ssOOc;5tL=jj2G*jujhY
zrVg|s-EM`elI_xpYRlT)k+;QOfO6;3UGg`@V1s?v9(I!0@WI9~Am3bkccA2@g&9+3
zu^C@x%w#${_V?MZXfB0M%Tuj)&0f~-O0SST^aX5nv_HyWd;45&!@WauuJ;>iaH+%N
z6^FXV>Tz$+P5ap#UE6XK*O8y7el0r*?`J4;r#9aBp~dB)fe)J=&d>JD)~|mv7=AMQ
zBk<3mtG`%({appv)OX|jM?-*;A^teuS^0@$AZKJD(bW_Df%71CDPiH&O9XZ^A6y$Y
zwO|PqZnI#9Y+N>0gVy;FVjL<~7FIUbxz%4w;DLxud4Y*=;}+TJXian|j>bogEKU<#
zk48?%W0KLp#$<9;DQM-7d~qivD_3^u<(GO{`BsEg$hPGw1Pq|&nryb|=9_TFDdzyx
z35exvWNLPxkFKOS=bwNE+9rraZs${;FD=PuD}D;<=%a2<$y^tO@^l+ocPi2-q?|hX
zT&6C@xMF3XBGPC9Y@(_r0tu{nK&u59ux0|P4sbvL45%5a0SmOMtE_6$8YzZfA~=+&
zp!qc;qifbWK&=Wau%-dI{uZ!6t^*iQs{sc1swxAuT8pi=k-j)7gaYP->}<-GdS0`s
zvblf)126z=0SUCKfUxyaEA6vt9sumK`3|tJvuYm4BchY#IGnqNoEopI2n>+G0lG3E
zEdt;Mi?IRp79jGq&1yUV#|YC|R$Y}E;hl4fq6zQJCI1Vs0RjU&zycx{P_e4>!i%%4
zDJv|Yq!!snUCgRTO!KM+RIID51z?Qo0aAn8vB5T9UA4|^8qI{cNw+5+)2W=wEzY~*
zY%s3yGT?2t^sajAng~dZs@zi#I;y%O`j{S3m@?bx;ekR1v2E+(`4)+2qp~UDl)s6o
zSc`}G-;)GpF39Bmm4}{iepq>Klu2GnXG49<$}IZpqeFgfnP8eGrkRkN4oI=Q_ugYt
znP1t~;0&|dpzgx3ZYkM<1tmCZ6(XNJlqF3#S(c>3%%b$cOhshz6fXDP+1sxcS<A<c
zNg{gX!($fuVTh02{EKq$%S!e6u0PU2asFM2%d#IY_+zZ2rK^7>*-qh(*R=gPuy)To
z;1#U5IxVrUb>3?l>^8V2h6RpgX3`S<bR@t#fR2Ne^Io0Oz&{19PIe^JAPQlJxe`_{
zHAKoC1JgFcHPy{c*aDPlJ~+0<l%_<5vtb>a*0+=S%2u!%7{927t&}zKPeW{-d+hS5
z1$vQ(SA_nS&8YP(l4WaK-MZq$gqSQk<mHF~tREP`M#rGpOIHOun40WmGM(XT0T?6L
z0uUfDZapz<y|aT_J~g8+O73aS!kWbbiK~rm?P7HEqXPO^fRL>*Y>bPbf<_XR&)_eP
zj|^j*Dk-#_0j+00>zk@(xvzX#QA%1w7x;kWlqDrGa-(1(9ZwcDc-_isw#pi=W?8R)
z&C-mwv`sEg*SI7AZ&bfb<+IjyD{j6mh<lqG$nbWzZI08A30OeXis?6L;)i*(!_fKS
zsKovBaFvI9%``(*&AQN!RE?bDJ+qZh_O#2IExd_5l@-u>?q;B&I}>ws2fypd&T$!Z
zDE^Efn!^9-W}k%VCxb4kn~bLIfhZLdNnv;><h=)70d2?9G%C{8P>)zKmFYxLQ`5+F
zXr~!`#|1TtQ)~Xzg+UeS2u0*B0OFL0@lhU89p|zI8cK!c>(Epo#37KX5UNrA=~iJ%
z1goyEr=LryTGxsbiAv>8W+k0LJ#j*gaulmE8>{USy0U481gspft6)K?lem82MFp&0
zF8>NxMGp3{munxTT1Ys)n#ZGP-5n(H_QY3hb5_iJY@8ySR~*Htl&4)Ed-RHu&8jI{
z)2fyk$u&$h9?puR<&#xQSkd88wJn$hsU?KzEIZySkNiT_A1l_)lOe!u&tlad{wc|-
zyt%Ep)}`(tRVKL7hHpftNlkEBdsx>7tYVmamdQ{ywNCmmna$Ky$TsOr1xS*U@a5zW
zJIFw{Ru!x@3}ttdx(VX0vX3`&WqW05v8d&Z!S79ggZ0+nyis^qy47M+8|%^ER;6>o
zt%QNu^jxU{?3nKj?bQaxVEpDRUN3$ce~;%>mZ^)YCG9VHDb*})whdznfZNpgmg2rW
zSi;YhCAaFPDkKBz$m_H%cXeCi9%MFwIKFH{cZ?d*z6rOKV_lV-R~v?2b-*09r<dEr
z;@64UzX!T<(#)(wgZ`BhbzSoYrFzrhH8!!*%yRh{8)8Z1S<h~sADr?26gWT=i+|9&
zs7LNxU_=k}FnAqeZ>ecnMt_(}^&>QwkHywf?@!T&-Q}1vrM=%0YObUH#7vly>D2<3
z!?Xq{nt}O^NnaD5Hr5QLv-s*$q9Y~g+OQ1?_b|B{8#irjXc6Q*mzAE`){Eu_Nf;Yh
z0`v2*V+Jv{`)s0K66($XN^`Y&Emta&yE~;TEb6ipYt@FhCU|8xT}|yk%;5X0<DvH0
z^i9HW%i7ZI^mh_3=Sy{0Rk-+zwkZxR+Diqs*0%Pv2_YWAqXrY<pgbSCbDF)g9%+Sg
zsdxN7Su0k#DvF@B)fge*O^ZD@kRcDHNxl6)flt;vMN)Zd%?tipHIY$VMpxB?2?@83
zrPp$Po;1mO-f8-<FGNBwuX%SyuR9V<UsD`p#h$AGm-otA`;FZ^M?P!?I$S20#<uc4
zzI92amzC@d7~B-VE0TTf05+Sk$k|?UT{Z`XCVe-aS$^QIWlO>RlJL%QmCl{Xc*=|u
zb;fPV5v?X(-<94vF-`0>WM;BkP2X78+l=|l!+K$cbpyW9Z1BbRm|NY%{Ws$_w}Lg3
z=1cAuw}L46!E?KRZ+F@DzG`i{g1m#7*|yTEj*{I<UGbWrIAk}Sy@<=5-{23wN&|R(
z&}TyXgJn2xRy*M5yB~|Nc6aTw|Ni`G+WY8#Kf-tJZvG9^{rTt-Z}_Kw1UM{-=MwXW
z3F4PXZf6{`78Z=RDxC#$!7@v0Lq?SocyZD;7$_$im^l51fF1`OFSU7%6oFh5TV*sZ
zp%pq{_GNb!g7b!bCXyr%xGBz3T&ssiVAp~TW-+ajT?Vr@4Axwov^GqZP8XIcO1LU{
zBQ3MkghB{)sPup|NF!|}f;jj|c1KC;wKY8xE7>A>_a!ao1Ys6PFGc5Gv(z*9;z?zw
zNzW38R2WlMXnt7uXi0>E8RK|E^ImKcWEg{F4~8%*rYqrshHk=41!FQQMu;Ysa9EWg
z2?%d<MRmHxhofgqU*m(1CoM6SN$?V6N~VU+{sdtYb2Zn308l7Pp;(6*g(BHziS5>X
znRt6_<4r*ZPW3W3=EN-zfH5xTWJAa;Mu&P%_$+)Ee1OP-rq^VI)QS-nWp!w0vUr72
zQdtCaO8HlSkir0z$9jT7KpkgRS*IZj$bj!Nd)(-aC<J)1LVS-xju~f;pwNwi)fZa0
zhwfN_b60+|C}IRCB^PIn5ZI3cDQU(Qkd%Orc4cu}Hfac%kYcrv*l1|x<#GW-fff^5
zw-Q7NaC5E_O*+>|lbDVunUeN4gBWyhs`Of8BwH-lH-_XfZBlk5nR(n6B-{sFrvn5L
zw_6tp2_vQ^#>ImMlU)7MM`w4GIjL~|0QVa(aUnnHA{iz}c!-fNd0vy$hbIGuT{%vF
zXNJ{Mje8Z9)p2X<hCN9ol+L%6UO6>?$V@yVG8NM`K^BG>W_-w|7=)od>eH5zWqWjl
zFeS!`&nQXtf-4BHgO}HOXGw|S<#QkPIMtSKZb_Iw_G7y@WWE?HKSW6OGJKy@d{7y5
z;vqorA(aM+m4!A54tZBNq!G+kcvJ9B3_@tGd5&c=kUc4VZbwt0Vic{3mApv>u?bOV
zd6t*hUCmi04oE?t=98c2NQCKr+c=rN_B;#KQoRK};w5dn*$K&cX+;Gbz7}tA*OMC8
ze4|mF-c?{J2|*?ooqR=onsfe5fH{KiiAp(joUrLtw5c#P1#ClUJN#)VcGx}u3Y-5%
z7P%pv_L*A4iHF%FBn+t}S-742Btcka5j41PrIn#k@|^XUbvHt3ABA!MS#JM!R``jW
zJW*)sH=+l63`a3Ox)~lbT1p*foLLzO^O;zZvZ48=9YPR$<Tra)`I=gZX!rS`M~b9h
z;Z-IWLqYlo<w-+|QKOVOZDrL<CP+^=$_QJUXnuL3ya_75CUJsEp8KacC+eh>qI{@U
zSUc39QLu$HqmfYbSrwRyw!$sDf^(3$SKsHHRGOr5XCpgAr&@}FMVBoynRJ5sm|Y}6
zC~Bvj(-YYQsgA&>SN?RAUze0Q2$!a*lX7QoWx#50R)TLDdT`l=UYU2OSX+nME&#Vr
zD#vR6mvT+2rxc=ztR$C^cdJhsb+oBv)u(F2S#Y&kkk@&tB*soB)`^!YW#6-Hm5Gj&
zsE1Dit#Q|)vigg;=Qg{@dqb9jg=v<`iZsdhXx$o|#7R#=@u|W}6B~p8)?%o0l9m$6
ztRclk0)|Z{8Lcf@u4@Lc&WefCMtv678^xBRikh!u6Q>nqpwWqZjYFV;W{vvEr1WQL
z7Sym+0%-xe6>&yqySh{)YNkV)vV5tcg5jYvTCc{lsxDimq2Qy-BboG3K{-n{_(}_{
z29$ZCrvX=){w+H~FG~ug!>EYWpiA4IOhQ_q3bo+{LcfzDDmy7n%Th|pB>~o<S9_fP
zBeuCluChR|R;Frex}V~v6<BMrWAGZQ8I>N^s(hhnKkBqRdyf)JJmV&4ck4WJOS8)t
zxUwcTXQ8)kbhwt8r-~~_V~Zw?^tgpvu<?VqS1GMp<)5*DJ3`BzJvSheyP&a0T6QZU
zFO;z{YNBDvJ6Q^9+;~%UI|~sCyB({sjApZ^`L)g&tBYH^qOdMU<hxzUfK?i!^hjWQ
zcDX0DFw6U;InkXy8GkkQYFQghW}~dKyBjW)z1-+ltZTHZIkVlns2vt)<?C_htGm;u
zlisKPNYj}d?@K-L+qM7_yo`$yh6}bLs=pO0zd)%=p4z$=%1{FgCETmP;I=s@OR@VQ
zBmHTTE49C{KyBuRKJl8Q9}BkqcT^hu3O*Nr6=k=$lw%l7QYG9cM5Lqeny;6_!c*#h
zuDQVgMs@lqtThZABP>>fX~OGUl_smGBayueb;CIPY()HL%Q;W3p~M}=#0l%feG<Nm
zy1639LVr`m;ftZtD>=9(v#P_lU(B6WEXKy$vt<mjkNd%W<x}!x#aG<Mf(tRRSblMA
zy(2}%cBaJ`ipR31#UOf;JXf=(%c&teV1m535R4RtY?T8#byCZ3bKJ;*tUcYPyOaKG
zWpm1YSJAMFGG3kGMPod1pX@x9T#Ic$w`q)SqA|dcj2f%lx2@d1vgfI3yk*8?olo3U
zq5MxUQi&P7qarN2S$xY_)|Ucnb;fIdpL}tNj3!77!sCWo3S7Cii_AL}$fJABU$Lt&
zBgb2;#2mV!;Jm@&e874;%XaJ<Kdi>Y7Jux_%(<D((&5KOrBC#HP*}_*O$x&AEMT!a
zLD+b1w``I3T+N_d&|S=(NxZ=StUBCm&G`JR`pkKvOr=@Geq7Ya;hfPw>CADqjc6RA
z6}`_6oi+pQPbLk!w%EA|x_t4hS{L0kFO9StO>YV<$?h3yH;q}T47EEwXZ|t$wq|_2
zK}}^t?QBIIOh-MON&T~!oVo-D(m$Njk~7tB3(`fb)S|M~P4aHc_ZK2f#9qyoBj=qF
zYDBXGVowd#VU1^x%C(<IYP11qah+jvy=%2xW|nNWd5sirJ+C9Ua}vAG%RJZ*Ep_$g
zyyT;xIGrH&+}KgQC(C+{r*%P;4b|bv)u#Z=hYZ;%VoGyI%(OSSjLj%HT-t5@${w55
zL>+ytEzrtL$s)K#zQk_%Gp2VT+N%884=u{utSC!U+sNJ0qdmmM&64}U+GQQK3Rl+!
z=G?4o+#CUsaZTL_%G{+e$m7h#8>h(x46(PZ)A32k<ZVJL{Mp`({-}YiAvQSPe5?n&
zG1&)8pu%m`>;2r`jNi>h+3{_7^G)Bxjmuzt;7xth=cXdddd~^_ZP#bcf2qz2iQa#G
zy!Bl+VEw;5Y9!;D&c0>geXu3{%dIuNM9IBaD;;&#eSRJO*b%PF0#4wFx8az`;Lyo5
zxT)f*9l>PHpH9VaBrSguZp=RQt$j^91}d|jjYKWHBK<AZKCau)jlFv+Qu-;xa-CXL
z&fznT%e#HQ1Dm2HIOY+#ttNhW4ou?#?SP5m$9VeYI)*#^Y$|r{(^*bd9GiS1JK<~o
ztZnY*ILzm9j-V%<=M3!Sem&Od?b28N=V6YXXM50zBIq&x<h+sI)sv3s;GLh0uHc6r
z#Vb?k_ej8?9^>|#nTbkGtbPd+mA6%$2zmYB$a3Ep{ppH~>VI&gxW3mN?b>_Z>!D5v
z68GwwUFdnRy{pdSnwwY3J5kKud8%&gv3}!19lMj5<PE-dx*n>Yx5#?v?JNGS#~v{#
z&V0OU?&nUwzMfW|=h*b??jSBo@owocTP$lye)JyP)n49cLp-%^6cd@Nz{SStPVLZs
zoAA48HNF)GAFhyW-R$1jer4=RGw~|Rj)g6!Y8^SSNA9zMf_~~*Vi>4{T9Xm=wFUR@
zP#N+We?uhy5t15{D`=_ewP!9*V=&L07(cErbj1F)!H1-ZFFj~<2>?#%gfWGbM$z?L
z#<-WIE20Z;pZ5O0d;BtdNUQ2)N#DYXM@L`yq>N`+E<KOISKojQ&(yq{&oyrt#EM|3
zGM8-vc<)t9O8J6AuAo*Q;}=itA`kbNP9mCkm{Wsd-?B%oXfn_wmuW{>KkdG;`121F
z@b-(lgx^y*=wrGlWB@~Cs&au=6MX5kPO-F%;u!3SA;uq1<(R)jL_YNGeHIY)f*^?X
zL#xKDUih*;`x+hhPytcmIFE)W;%D9a`F`DfmX`ay@e>I>J`2Zd&-OB}-w9v)tHbfG
z_fO=k{MB#Q<d#DPzWvdkruDtq9UuD@4F3GEFZ>g_?_ClmBs`$u%KqY;phiRVgAnLv
zT^t}l00b!lCz`4&+qy3t%QIcC=83C1E&3(TCL9urHIwcK3=Wk{BQz?V9#hXJ%r^p3
zyWTHK8{RIbWiUFejwM}K_lQhd)9bJKc!`ge;`TqFR9nj%Vcc0@V%pweAES|@qU2=O
zj-ibfBIPDj8`on|Cg&(64_4t#V;?CiCJxdZD3McZZJjAxl@scnp6@Sku<$UkY}TIA
zmYJMw^P=Tt5A5pffb}(Yw)Qr6xA*rq?~Yiqu@J3udKYf)44&pWxcoeQ{kFIjIhnE`
zv-&?DXwhW6;}Xgq!h{O1;R_Q>{uMt#1_4rRl4@Y3Y32xCD}c?TH3SK+9T-`_z_kP*
zPa0S-a-lVrE>jw)i85Qm9DZW)q~a)N#fm}Rh$A<#Nym@Z6fC%AfWd$Q3!n}#urevk
zs8|yqxJDHlP%#iUVrmrhY!87fitaobFQ(FK3lt1saI-*ykqT789dKZ-O}bN~9;nIy
z0RjVsql(n(x9Nbv-NN!K2NaH5w3;uDX_qkvydzlqT077+!Qui8nj&DZvVc?t2Iw*!
zfZ%}a){48Xj!8Ev?75`5QEvOwr@zh0)%x*_8T7tt!$EfJ4OcPBtD`+CK+Lpc0t1M<
zIxa8w>%DW(nUU2+Z8?1YcFqmKijH)8w#4MM9w<;)yEU^7P}^!KwZy;yH*Kd4S6*pT
zTtPi86A66rafZ=aZ7Flv2p!qf--iRLmJ|ao0XSe_0gm*SHQ^;j8g<wtus}`wF{GYO
z?6v34J{(p^%21VjmWf!HWOXExIPKV(amkoL;g6dIc?TPZ$g$)NNosjyXwmS5jzm91
zsoa!QQn|}RZ*93ILK?C0N{{b#V%C_Nl{w!ypnY-~0c`qNPZDzGsppqIrUGT1LS2Zb
zLxf6b<e!m-bAzDkU71`!h$adU6R4chWI^W?Gb*X2l1hl6$n1$_m6~q)&z+1KWe}^9
z$ahO~Oa_CTtFQj16Dx)~X@m?#rWoqVt-$b_rmvhX8myI9zH%(C&#J*lpuVirYqPch
z8>}drP+Ki?binD*FU#iG?XT8mc&I9mjJw7afgq7jx)Vmrt}~tTXbF*WR@#cc_U5YW
zyyfuQuDj2Mn^}?GLc0jI=_cIc!u`HmCBX6eJ1-jb0wS-rxrTyq#u7KY=);-qtMbZV
zkgUhLkf5A$E#G3)a;5h+^XVV&<!iD<+~Qoy#vBXlG0_^r9J3c<4LvTcM-!a1kW25<
zbaNmh$EeJz(#+(^?^w-B!30@h<r!&51f-ef&MI!EKA+8r+e-6nH*>XKoAtH#4qbE6
zFW)^$-u_wdP4?P@;thDn4-L+BT1FopG}auOeH74+Bfi$;K5agB<Fnj+xvdkwo2FYL
zgPzDR7m+?^grOO#FzQG{4e;FMGCq#q$CUiLyDnEgt?f3i;&tw2?Y=hase+B~z;1Mz
zIrjOOF1E|JFW-FX&*KYf!(p9jy3*F;h<x|T^9?@c;-gD``9J@AOch<vPW<tN!%uGM
zjxaaZ$<GdKamN8`H!vTGiE{r7nd(qQzw9|MeCd0W-W+8N2A=ML=8E9!+(f_ve(-)K
z+}N--ST<&Pu!JOJ+b5>Syg8*Me_ldU=gRWE76OoeL#mqUW@xh-5|J~7svZM($U}Nj
z{;(HYb0AUBX2T+u?uh0{qS=<n#25ZVi9wVi71d@$9L5b<@6+NJMyR$YRxpaA$cg#X
zLdNNx>5OPx2MFQyC{LY2h|Zg05k0X#->uP(cU(pj>2n<aNidIF1myh$ImiI^uY!j>
zSt2vECrb9hk<a*}^RhL(OlmTVm&79}JN76`dah9xgd~qdS%w@6(3GeA8X-Hmp<5v^
zl`^x4dSn?xI-<&!x9o>6*Rx7izAl%Kge5_Q=|yK+&zQ%gVF5!03M@_wcIa~j@Y+U9
z&{^}En9Sy|l3C0K#t)Rz2<Jp-Sx82j6PW09lqcUb$*6r3kW(8c@90@iclxmYk;^P7
zByeb|*!A;-gHvOx&Na|;Qj=29xgy2nw$F;PY?SiUCqgF*IZo>Gpd=OD-zc>aI7*Y3
zAyug%MS8-Qy7Eh93>yTUm^fW(P+s{Ylt5*AL7FPlNdQgg42$B@GocF}AJu74g?dw>
zRz^&P@S{a>8O@Vwl6oxU8cWM5)uBdpP(8Wo$Up{1q85o}OJ!yrVmel^;X|8ph3k{3
z6-Q<=%dN$84^`*-)MiS^Q;G;)U>n7YzRvQmmBg!Hk80PDBDNEVP2yJB_f@bWE3r}?
ztV(Zoz{3`27JST+U#%5Z&wkdET|C%K(^^-{;ty02qh~{aIM*=J^tJwj*lH<ubgY&7
zEVYEp?KF2g(%u@ktXxg&a&@6TVKo+e$Q7${DX7%yKKF$<{b^U5>9O15RvOsNZZ5Yw
z%<g6tZxGF&>(InmU$PUcXaueH*2`Y+es{k@N}GH?dR^Aa)Q$MtBnc~<#^0W^ObW%*
zfSrq8{9bgCT(mDZB^)>TJQ!MaB^G+g>fA!T&A;MxFJzPU+YW=vyaX;VKxu)|P+(Yq
z$Xr)T4Qyg+)Od+H#*Bs4*<KV6u`@Ydu6i}B+#f5_q$aNB<A&R~BTtpD$qn(!7OSJ^
zHlcc3tSm053}F{5IkMxFv0<a;<zJRLfNc$(Y7r<=EH`nnv;Nd_<cz1);CaHUdM4g>
zUo284!_>+AHKT$vS6WL+Yt2mybeqRYXyENKwzv3m7;6<MKRcRekWR6rt4wJ_A3Cob
zOy;JWdOhFSdCqjsGDs~~>TPNIsQTlya7oGISIe+f9TahJmqcN*iFu?-74$^W%G4{1
z`i)+mGzxJ&+WqdD*D*t@lNQ}C=`~v*#@Z;Wqv>jExABLY6=Sig9b-nr8AGi`_CTSn
zHCfvlNaW>ceN^3P|GhiXt)*kYN(@a{;n_?T19!NE2k&|+yUC;mHo+mj6a68)Tznii
zxf@<eX7korWO1X^Mmf}rFWj6OCnv`{u8wZ@dVrX={y1<!{cnJ`O|N!(?x$S7@86xa
z<i||&IzttZvlwe)`cM`md+u)-b9t96$EKo=OCRcreB>ZqI@4un)I__oSlRm+)ho`j
ztIJ%dCx<7T)Ef0T!)v@dXB^3~K4^#|;@%Bq7~7#2_Epd4svssedx^qM!_gL2)8*vo
zs*dicsvGcS75v-mPEq^abSQ#rH{|?|accuTgo8+`;VbXBUEzqb3<i9`Es8ClGdgD%
zOm(B*PD#H%+00zFsjk@kG|#}E^rdI~=}`}C+}BKqT-SXWpYD5i(w_CV?0la3%29iQ
zG4qyMJ}5Qs{8OerzfzlZBjL7qiDG|>+LwF&vKQ}t9D^UcDNUc<%1=eXZ=ToPS8V)+
z{LVKLG5$!WJ{0|(<MEZ}v7i2_+{nNej{P6?(O>O_n)+2(?wy#LJx&dU9OjK*ZJ{5-
z#a7)F)bY7r&|z9`Elp3!o*Zpp*=1do(GBoLPAzTJ{;buTiJj;j-U`lL1iIkkg<vb`
z8z0b{>fzeZ1=Z>epoCeTz_3+PWnd7p)WIlT_VH8>rr!>>&XY-@zLAa(id!uaA=Eh_
z2X^2Ua$61xLxz=5rwNS~&L5PGp~=l3#5`c&0MN%B9Nz`f8&+Yw$su_K-zL<U&gcmk
zhT$Ie7!S?X_{5jD*w;>VUF_W$<p}-_5+>oOF=8WrQhK$NBtqWyU6LXa;*9Yj@}W}k
zaAGIoAR@L<C}IrItfHu-5e^bt8=9XgRw7(w3*k-K%6ZM%p<)qEA|P^}4N*uG!J=tt
z71jWwA%-6*#$o@l&SyA_Bn|}kyqpR3QT@GI35w$5J!2h0T{~PO3i4hteIh6pW8<Bp
z*NNjLkfRR{<2pKCHzvoiwd3Tyqh^((HL@c(UQ<1Cj+~`iJ}S}{s+|uB-~a|BCB_mm
z-kqf#iURSYmmJ<h{?A%rN-nk@<^3LzrBD~j<1pG^C@o<$cA@Z?3PG*Y37R5UjU-84
z;BD0-5So_!i5m&Z8A$r#N&X#SCDNc7M&t<+gC^qB?$AmtmfjV>Bsc2NX3?ZjEamhq
zPf||dkX<DF0OcPJ+{#(xKt`fVc8WsMqo|Y<RIXhJeWerro>*>UQ<_nEAt2uQqZ74c
zScV@~f{*!N7joU43bN!{DkWaxVw=rlVC5x^F%?t=rd5s}VWya29;TEiCSX=&V`>i^
zlHnJEU>XL+D^?SW?b(STX721|cWE0wdRt}M;2()49YWw{e&b4pUJ4E&W47jK5~gVe
z9uyLtim4t0il*m~ChF{FZ+Rqb&R+BdCnE71Q@$mc`In{<XQo|<6=9=2xn^cUV{}s0
zbfV>>;ooiIW_wZR{^hJzHX*0RkW_u#WH4GDI;LX`CRlGKXTe-2MFI|dZYMy(<%M<T
zdU6(iqGT17=VPwcBkI$Kb!B-CC{~TzST5ujCa8JY8Qv|WLJ{Z$V&*?`rx%8&sdbNe
z%Fa2u=X(z4fo9lyh3G;t6dqw{>}lwRvRjJU*d*G@a9QRoVkUE%V$sFtA=T%KUYeTm
z*-H-P``yWi-i?sf+k%A+t8CCtIwH@xC3N;uLb{h9t)W!L=p+4ThxX_G8KJ#pRCG;b
zexe(gE-91RD4C+ogZ`x`j%Ri{W{qMgXA;bo5?U9r<~ObBW8#%hbsLVtpmpJAwE*g!
z#vDS)XhP-w)DI3Qx%6J3_9edkjPU`Bq`DBLR_d5i+qRV=$#^NJawwa&DV|E_sdD6d
z7G00ZBH>tROfu?@&1G}G8L6tFYI<s_X5p_ekKlNpj?&PNuBz26Xg)b=5Ta7Z<?5H3
zYM@#fp|+o)_CvG2Dy&8!kjf_pIjOP+W8t+Yoi-S>@@ls_slZj=(1|O8C2F}YDiw<0
zw<hMYvTKli*}LjyuZm#0@?X6=-MuDh*BR^4++)+xs`$ANz&@Oh4s5it(i%Qt&fHF=
zxz#GpnXJxgT}Gfyj_9db)tx0EZ1JaNZmh(0*}R_YJ1*?t>FSK?E7_@R$6}@5h)~G3
z6U+V)Y{7o0P>K$AHm6Jm7%Y<CT?wDU$z`6UsmvOg74_>U<?O>+lH?sAttqYiOoOyI
zZB9Y$!x~nt>8aIjWSq3C0WBcOmaIX}rn5;|Bnhp-+*+8TZ7Bg@BVLwFW^KwAXk|og
zxlJv<+Dpy;OVj2pFQOLTVw=qAY-k~^hFGjOb<NqzZB}uJ;!@zWV(ra}?737f&h#AR
zlF78K&Jk(t)*9L2cI&B03G~U%=n}5!D%0umRO+g3(_t>z#!tR+Zpt*@3hM5Ta+U1{
zth?@Q)Se&P#>Z0yFT=7~?#AvRy5nSi-JA-q%bu5nnyk(J2t+Aw%a$$V#;xRz5B{?G
zZoD#5hLY|xY%bph;Bz8u^s=Xyx~|?CuJe-9Yf|miz#PKREd7%2!tgE}`mZwv$^M2<
zajh@n`kVmwBlK7d|4tnEhHoBBU(b4`n}MbL&hNRwYsXNq`}!qp<x2)n@P@1}6!9BB
zRmh5Aa9$ck1WPakdoahM#pki`=d`B@gPb!itA!YDpR#J%rkx4js6YX$3QzD4qwn7Y
zYYtDBymi@f2yqgRFbVrD67~_}Wh)5V6jCA$6C<z_pX-y5Nrj;U6`NquUhz17a6x(<
z7gMDd(=QlT<i0*C4O5*zoN=_I@%2WVn@U7kjtW0nhR`7n4mU9a|1l4b{$L<;-5VEj
z>R8nf&zHabY;F0ePA20NlO~b8a3B7rWm&QYL02%5m=Z6s2_o{?Rc$d0ncN^+9&_@<
zE%5&iFs70+PMR@sZq6h>+FKEs0kvAm=rQclaxJ^EC(~;%r{vfWGutGy<j!O-iY#c>
zT$Jr%*M^r2KkhP*ARGtcGxst9|1FWW6WV=q99uCif*t;La}OcuDbtHl4KX{nvpYW%
zJUg?~^>TPVvAVurK2v8W^YfJWbD6$!K=Yp{h%<?nZ-9Q82n%#LPjW%Cv7X`z(&=-!
z5VR51@$UUnpXM7lFEm5ztw&#U;EZlO%QH3?@2)CxM?&cn>9bk>7<0xX3(8J&o?XgH
zFR?nWb4SzIyte3`&a`<-GC!M4`zq+n4lDH(DNoZhQggI&`fRDBwAF?*5PQ=G*RQAX
z3;PIlR3EiQ(zC;WH8OjlHT!GW(%DkkG{;WoFiW+4P&J?(b6R_IguqQ(W7%85^#=>r
z@btC&05)Fpk6{m==G7ft6LyIGa$?u6PQ_DW)ASwvGBkrTT@7|+M>e})c4q62hjF&Y
zI`(5zlVw9HxUi9FZ;xkx_NBu1RH<}bk925T;c?2gbI^8c7tU*U%To9B9VZ`aTQ*52
zwNi8C5i3q{du&Uqa+&=$WtA~iE@@Yrb?v3LOB!S|r?&nm!z-UiH@3-FZ3kj+XV)O6
zWSx*47h3muqj#x&_eWnMWnYbUU$#916&ZI@d)v2k19yI-oo}j7b!qo|!#B0+^?gHL
zfwSp>FBv%tHMRouavGz9%QsL5_=1^ekyUu~fbE0Z-Zrmo9?r1EY&DMFH-1OBy-{qT
z9fyI3xQHjXmeTFQR@%fOx6%2E>?vW$5*JYixsVIF4zhTt`r(fgg`_OGLj1S)qN`_y
zt^;drpD?+V(}R;gxslr{?`aFERk=0%MqONaH=xIsWcfSOcqZzNmzTE{C`D7mh<P|h
zW0ZzTC<tG)h)pQSHpn?i*aVHViP<e}6gS+7KmJ=>%(<L{L~R5JR7eF?SVfEMhnZ)C
zpZf)$S2{SLd77(vOpC6Jxp^2Ug-75;pewpu(1l&ZL|zO?o=ZlUr^o<s1p<HsVkkyr
zX!@<E#*EmAHE?>TkGP$@B8km&mLx@~(*>?CI!T;HYV<~HoJdvBMpMWJT$n~(#Q8R0
zyLA)*UHCb$yU@X|_eZjasOJEw)454l`bSuYR$zx?z<POjM1yFBiy%g>mj-Oix_IEb
zV}Ltsj{BI@%MW8MA*4G<2ne%ZgMK7MfBeU!dxe11MuLb4rpw4vI6R;K`-23$z()y&
zhWfdKxmJ`qVwgyXu!LeLz=*&_iIB#O{?v$U)WmPpM4r3IO(e*k>-mdtJWkB`v<^JV
znfu6-xzT$=xCguC@;FTKbkQF@)c-ut%jv6pTXVZX)Mx##?-rk+HGPW>)@yy#C%u+`
zYSn)|@`SzEA3fPCJ=F{S{4lmQ4msV|JyAlqGH=R0?%IQ5xQ-M04#WNb{d8IWJ%!t(
z-sdXezf;>MBCQGi;QynFbFjp!aOA61<8$kV7kWnX;pDq?N)LPI_ipBUD8+pKIRg>p
zmOczaDZ!2YUKxJE%GS2QK1|EL1(Mc<!@h;${-?u^RFjblJGAf5ez2K$qwNb87e9lx
zc%imA#-Tp*Z)+?9Zt^!PBu;<+a>6NeLBGRlzXjJmk$S%bg8zX3Dod09xto9UqdyF)
zzl^j0b{}~KHgiq0KIPN@7~8+9$p8BiBnSZFNS0<v5J1YRYf8fBOxN~}=lag~{tpbs
zgv6dq2x}!(u2HiHjY_A~s`d1|PLIE(as`fMwdAt-jLwbIKrYYA5;0M0w*8LJzxBcT
z4K6b$#z$CasONV$lSre~A{REKI7wLv#b_C+;}{1v=9U>MI;u%3xyk7wIB44HdWe|`
zx|ph33i+C=dlxHf1Ni8PwM$%F)@xaJR13L=Yz!TZ^Gx|EmfDO(EzRAHyW8YeYpd*S
ze*RnBNrx>wj1C|Fojd;f(q4ZnuP?n#7^r>E559c_bq$>K#2K>}HTo5NSjk};P@k|d
zNhA@XMnf5oHDc$;6UL1s!9`@0&?3WzC0n}cNF`QEP9oE}ygBON8J$j?s5}T0r%*CM
z`xue)6XnpQ$?)(T#_ZxBrc@Ob-I4JkO{q4kdgb$!NZ3YESF-$y7LmtGNvEZK%j?S6
zKW(SNy^Ch9+bwmw;{8ih%GGNjx!MJc*h)~nYWn(33^@wpo^ma&dK@{k*e*mZBO9za
zGquTB17Z!0T5K=SWNDh+n>sdU%#~e_Wi1=AK-{fEzJ{GUcw-a18=5{XJh_@v6OV6F
z4t*+VB+j2lzy7W*&qBS$v3s?}tNL>9*tvA?{pI&~^2(jr+1~Fw`<4RPz@I%oHc<IK
zZMVs<pLNIF^B;hrjb=tvSs5sr5ArE!6oa}+bRL8SN{AOs)BR!@5*Xff6>k<^_~Cb;
z;Wi?EB~m3~OdWc-B2(R+sM2OJ4)vg5HNtq~P5cd3mTEn^#NRyBZFR|zByD(95kcCh
z<U>Ug=j4vmMOo04?;QC>h*<8U<sz!JQ{|WOgefMMzLja_IcR>=$3jWA>CKyGb)i)-
zY|+V&owPMZCYyY!6N*FQ=r|~$h2j~gpozXjN_>qv`e>xBY$K;+epcG#rEg41DW?(9
z^Qmim{)xJ%L8Y0x1*%OE^BJh2x(XIFtH~-PtsF&jE32*Q>JK+E;hHP3PSr`5ufH1m
zo|_yhD;TrQI%^!Xno673nboGJ#ipWqn;?4Y$c61%^6a)|x8;&ckTTXTyKb~u(vmK_
z@k*wVLiO5vFOO0J#BE^x`U_D-`|>;R9wiwZu)ql`Odm)MBmD4T5fe5s#TCQpBgT4~
zyYZbJ_jj<yAycN^yWt|Ga!e<u+1$q-Gr3--Air#6ZPS(PB+EQ2hBL~FjmsC!lGuE6
z$RZLwV6;dl{V;1x+gDVc89S^rqOdIXG}aY|2reyZUhVb3Rbx7i(LTl5GSFYoO7hSC
zaT6=HgM}TNa7RqbU8T%q8F6=9X2%JVKV+-7Z5(!n9i5>w6*}=#Na7u@r2QE*u1;OA
zye``IFg|7y&+=`uuA})~=hQwQ4z=m47A*7ToHH9M=+kymn76hc2<XVGE3Uh=n5e!{
z7P;4IEJ|t%JM;1?qKc98-2_7S?BBKqm(^{n>#DndrHvHjoR%|Tpv0GMrb3V%N}Ti@
zKWa^6?AP7;m+>c_cK-VB&wu~<i#fS!m90(2o5lbRh`<CYaDfbLpaUNW!3auFf#nEb
zvd}lA0vd3F9PFS6Kghuf2Cg(!*j-w}@fi@RaD^<Kpa{E!CwQe#Xd67B0ucVlfEMb|
zhC7Vl3m=gcwVALg;V?o13ow8M3;>BoR3H-(kN^Z8Ac_Sr00I-h!~+;$i&2aq6C0R-
zEgGPUD*U11;07`K<*bHFMBo<>_{KI4KmkfTpaF7NfCc1m00v-U0roh-I3_Rw2tXhd
z8z{&Hnh|v|Bv=O5$VMbeG64&)q8#0bzy;ip0hTO40-~q@EZQ*vfK-4O2cShR!ZDR&
zgd+kR07wSZ(US`}04l3E00eB&mJ|eF^*$9LEM4XeO_byf4PeVSe(?Z>OaL$cs6+%X
zfQbx1KqYf%fEzB*k4;>n0+b0y1;`SP;cVa{&({VD2{Rv?D5e|_3I0wuj+30ov|=0&
zpu{(Z@{@ktrzqQ~M;y+QoN_c}Efwj@?KLuuJ=zcf+{wgjUXq{()MX*bd4K{W^MHFC
zr$i4JP&wvtkK81rA-jmgdA@O)2!+Zo2?xxEY6zkTIOGz=m`t306ObuIsU2}T$5T?$
z02W>1P>agMFsc%cThwMv`MA^so)evpirx?-Hq#zzwS|d<;8fY^qVWCgMl^fqR?iAT
zOLp+9?xWX0+L{=nnH8;ct?K~TswGB>wL~GZt6$H$NMUHme}pZp{<5bd!zy;MjGZ3}
zKDPwK5%aHTy{c8N$~seMk0DH&D`mB6S&mp%udmpw==6p_{%sBbke~fvXbS;X!o}7;
zkj)cV)qu%-I&rtTB;yosdc`j)b&D%}Em%yrjOo?Z9`5J@CMgQtg4U6bcnoPD4`@=3
z7WXT|Jg&BKH;1fXa*~yd?iV$=+D?9Qj{gj)4aKO<0zk2*$Yh{#Q)U%?m==hA7_T?^
zxY2H+vYF2ur!={lK#9^ZoXlM9bfx;8+tOCQ%Z;pf$tlT-QjxcCoM#*BDM$BSvZO?P
zYD;A#;fZ7kNuq0me<OOscvh6Cn9OK^vzy)<F1U?aeBga29JbjagvC^C$ct-=QlM6G
zmpjEI1AL0zN)pwSO(kd;b!lG~rMPLZ3tW*K(b}#4{y1A(uFR4q_vJ-|8MI3&VtD9-
z=AEs%hhtvZ;(Qc9InP;#b#@q<gdC|nUwF?f?6bZ8j3WyMv9XNypHSRP=0|7B(5Qj*
zPGmG`N^8c_^;_8d(hMw4cT3ZqkrQdLCF&BAdO(5(G+$}xHty~yv!RCdsZKcSz1}O;
z@_Gx4o6+cC3wzkSvNgUy1m5rxdfA1xm9gi9>@J<rv&^ox3!M$go_((@Oh_#bm#tcB
zpV%bNcBHSt*KKm@R=@<cDJrR`)_<~+j2w>OgB`f!Rvs{;dt_yKv#h8xcWT@Jo9h>4
zOwb)ucZt^ZBWoGhRd##vq(6;fkG~CYSe5>pM1c9<3SK-^^Ja3B>ILOL$H-dxJ~NlP
zv?Z0Tm{EJaJie5?=oiPy$#II<fve`VxihqtK)m>v0e<d)2jI*ON0X(iwdNg<uF?j3
zG~9kv@GN5tT_h$wn-TA&#<R`w@=042FV^rxCkjnWg7}_Fyt0WsS>i&*vj7JtD1sfF
zM7qm2q#5mM&o@5s_1#TzW#=Q-*<DnOYE+ILRpQ0n_;|y2vx+(%s7MWYfCjI&01bG0
zbP-5upFdCVFeE&NmxyGh?((Kg{-cwjEXMjGov59hJ(gcyQMu3D?y9fvM{}9^*sIm+
zssel9<>%#ei+lM#Cbz(Wv+2Pnf&Tf_H>dNxq#w_up!(d0aPglnvPvg~+TCY9rA3b%
z*>HRN<>&tS0l%K?lWExWum56;Up3);devw_fBe;7Tjo(Y@b@Q`R$<H`JjMroxnWw5
zg=z%&KKd7c#>0PFrho;Q8rm}p71(AM7=98cE9hc>BgjAI;C~+og72aX?YBc5=z!pd
zfbB7X#z7n`$Y;pnfiTlJ(so<_@NqWSY|52`RN@^^(i=a>L)ry|K7@lXIE3OiLW$Ev
z>t=NA=3DR<T>B<Q^j23c_zi}VACBc#>_<@uCtV2ibR%U)4p&!#<b{zC9{}=e?F4fF
zCS&NuNpQ4Yp%hW1G<kY>{!p~!a`5F!&V)<e#}5#wi0UVXzW`B0cXrD}U`m%uOjk|U
z#CzT}P6Z%NN@R8Abar>eJWoi988~*&;8_{=L>tC#X-7{z#&(&=O|$r1bw_;I27_-%
z5B#M$WLJ0+g>Z?-V~a;k95qniL{6KCcb_<7<7Nye7=!b1itgeFG-XrymQ$_AQ>z7g
zKm}F*v}7?>WxIDtCI)(=h#(j;jlXj_<Y#Sf)OAc)Y(uDv;NWpv;*Qlee8cBjiAaxf
zcqponLQ7ai{)mc27&-jKTx>*;Yh{oE!hocskdoCtBxsTM7;v8fg{LMiYg20;SdR%p
zI)IWn$&-U2nOL;`mQH7+YtRFdn5L4NGLcT$Q1FM6B*~Gi#*&NpNLzN3V~AOwa|=I7
zVLqu}ISE@r=^odUVmFDD2%(fZ2rw5(m7w-2!;?Bz36)VPl_=1V4*876$AK}K6C?vx
z0`Zk0gl$TJIa;Y%W@%Mln3mTTmOWTnZb>TbI1RYtZAv(o=cp(n5ooRk3m8L^NQgvK
zD000eMOcVkS+s?I#ed+@m6t`B)xZu-S&d~_Z)a#n)wM_0wL}m1K=Igoff<3Kb(y|!
z4Y^o{wI^VBSaR&uhkzted8kXs6mu*md$a^a3vf}0NN)A9HAESBw3ZM<2VjvXMUyz1
zOGiykmqY#ymUNt0V}+zrF?M*unU}<QT&w9Rg>ylO*+#3#obohXNVIkegijzfPX@J7
zZfA3u$epE?ZJUCZ|I$4D^NZUFUBZY&HpY0yIAiq2QUS`32<C>Uxg{}UoQ%Rg9Jvff
zc8yA=Q?8ec-MD&lgrSl7R96O@cL$imS(!rvo|XwdL@1tg2yXn?f`mdoqjsWHgOLn%
zIapC`2@q~{2BRgqC)3l2GGvx4`j<S{qbeF!>Yz}uR*^Vbq^D^T_j7AG5v0gNKYLk~
zSsHpnSv0$oqTzX^@WZ8j16xaKDKt1frNA{QLzZ6`rh{@BdUKg(Duk)YC*qJ!I+>p)
zX#O#9nh*;mlWkfs3rIkJ8dH5UqjiabGqi{fC8!pNf%~bYjfRT?w=;<f54Z9MPkM`(
zLx9}zs2Kv0Siq_Ed6$4U7kjE3uS2S(dO(x8aXc}SDLJXNIyGoRnVzLuq-le&dY{7s
zS0&hG_ZNUj2vFzwT<-Q;hlq3t+D9AuqO@wO773KHLqw~R3Ib+^Or&sW*l-f}K?^Dv
zuIghiwW<(GhUIBgcF19}sfQ*9kEj%xs7FXORYxj!OZ<2u<Z7hsu^5lIoH9n81qPiG
z*H2RCc-U!qrrKZJx1$oos}GuAVgXTRhj6a=VX)Xl{!~&?G*0sQdHb4m6t{1V{@JWo
z6nm_<09=Sj;hJ$+I;}CQSav#Drw3!mnoBgMctr<LB!y9WWKHtgnW8wZBWpv`<*lKa
zRKbZy9A%oJIC0MCi2V9lg6U!unq;eojZK!JBYRXdwzTbOdzNI5uet?Q_(tw(RPgGa
zywzj#7F5L5MhIX_`0AV3Ra1$Cid0**i%O)+#{fALMrv8KXqP|(ONj^ed3v{V>PdAC
zhOqyIM5gq4M2lNVxLWOpMGvb(oGZ02=pfvOPgTUL)>pXkdPE+ko*J7(57$#x#Icbm
zQ6O7V^ZB`~K%R<up2}*nqLfNY#!DWwbAiNex@)?AyQ$<VvWA;L0D4RQH^#J#mzX~L
zd)}l+k;hFvhK!07kfzs+279z<2%2jMt{wEW5a&)+*sqZaX3BfEy%%$<$F*(rWE3@2
zV7o;ti*%J_QMkurRs^D#_l<gsKL%HxA&0DL>q%@|i%<l&cbl`wghdEIj`D>?HCJ<4
zx0@Lj!NJS0po^_C%V_V*w;#+ha-f{`C9uw!c8XhzJ8DcKrCN?VM^`7ghZnE3_+Q7%
zz8O-&ojSGZYMvLXx&UTRf5=|^B*5^guCnOF{?)`xe7g-Q8b55pHE6ST<ewAeyaI~4
zOcYWayiys)j0#FnAGKgQ?6fc2!`WlSBU(eMn0l<I!WX)Y^~?UeNyWcJg<z36xr8Xd
z2*yc-_*5J`uRx5vKK#a(%aDuAqoYcCB0Pdl>d0bts4p|CYF5dXe0)KJm-)EK1?eEw
zD#`RmM5!9eYzC+(T*(s4C^%Wlv23k$5eA@4wXV!Isw#iRx~t3QxnS9Gn2HlhFoih#
zPFDzAk10hM8nq8}Z&RE-!b~LTYK!X2tw=k*eDp<81wrQvrK0ha>IYE>e1|0WuC&Qf
zca)wP{J@fjn_An!&>YF+k&5LBi7SkB1Y3zoWlJAro!}IplW3h7=DE219^tGT70Z}J
zoSsG;VtQx0llw`c1jg@a0D%}(2ZhMV;Wq{y6ABHC_5Rky#CUX@*R(x*dO2&L9K3n*
z%+WwZJ+hq16Jd>6TgTe?di11PbbEPUivZl(p{k^w+-$b$xS8dLSp-SBAmq^};RGz0
z)Jv$NqD)qOnYrQ?)sEcMqF}(f8&ABO%qN>f9+cIKyw!zbblV!fpUH4aOVtRm$!9GK
zueq2>WX@}g&If!@aJ$ilBh&SlK`G3$mxQ>_xz7acVIdndL{xb2RBsr&x<}l^60O92
zz1R!WpEx_x0hVDp=d+f5*~Npk@Jq)BFuy&;d*Qghv=^L39ommJXWlH=ukDp<=A!FM
n+e8A@xjoFg&D*-&ezX1Ce?8N`ZQRF=+{vxn1<TzdApih72C$5w

literal 0
HcmV?d00001

diff --git a/final/docs/img/venusflytrap.jpg b/final/docs/img/venusflytrap.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..59340ef3ee12f4f46732e4a0bfd7a5521dc0d759
GIT binary patch
literal 56606
zcma%iWlSAR(C)$AU5mREcXuo9R@@Kn4u#_GP~6=QUi{!%^c>u^h2qxx=DWGy@B3^v
z*~}!9na$2-_nF;)>;HBE*vj%s@&FiE0Kodc2KaXgz>)Q~a`Fej0N?=tfargsYXGK{
zwX3BKz<TgM3CuqT;2QuD9v&V69uWZn5e4zTj)shgh>V7gii(DcijIx`Kfp%E#KOkH
zM90I!$H&7XCnF;x|Mb6rK|(^pK*u1)#wI2uz#$<0U&a6L;NKts2N}j4@CXNk1AxVW
zfy06MHwyUAL0CAL|L~uq{}%{|aPUa5Fv$PK^FNyZCkFuF;9+190EkG~09Y6}I5=2%
zcvu9a|Dr*FfrZ0?M*vXca^WFL(P(nxBU!jh(}okImb7T`3@z?ide9Nxv}${{@p|#`
z|A~-kr}rKv5<nJAE43Q&!2!U)!NS3#{AUmm1|IIe`2yg;Qp0iK!vB%dv~Umq&j<}S
z9>RZSZlv+GXe~V=T6qu&(uSAlWITEQZ358Y{&N%u4hJ9sxSb<&_TgS4@M`Of66|BD
zLpJNhAtp~YoqM@KGO&^UV!ah#pwU-V@MNObn)4Us-acWfV3I!U*+<Olj#r;<+YmF)
zuTEL&d?wt`icQpYjP0m9wUN6uoh`kFjoy%WB2i``3~JzHc7~S%%?eLt?`HDH%G@!P
zf9Om#MNExFjxnft<FCNX1=#|GWTzR6nJ{Twna0dIr54kSzj?zRSLFgeWjCp*jxumR
zaU?L3rpAeR+SmkIkUu=?yKhZUk@zXlWE~~Xev;IAx&PS0gpZgJXcd`=I7|>I%aUmU
z*0{U1g^9?^beG<-_00Y>^p|nfpUk0$&L>z^77E1vu=0dv(Nm{Y)p6EFJ~=>0bGdM#
zS=|nBw^-!NUS;ITLYYyh3yJT5%{<H5%b_DVB_oln%!g?dJA?us62X$_+pR*UOb@tB
zkp^GN$d}F)Mz-6-q$LolrCy1P4ZjGUzb|`gn~+j*#H#K7k7@gZ>uNw?xta4UsQnYz
zO#37uVO`wFFZc~tztJ|W!SAG#z@N~OhqN$GV$sweVp$ul430H3f%@xa{M0sZq30Hr
zn<DiwhqCz6S2GE0_be_Yd_yN-q|s7<LPULJCN7VAq}*6(m0Z5;l@PEiU-GY&E;<5<
zm8v1jn@)qqTq&O2q&LKU0+d8;&YjjTpNpXZp<l8ypVLp$=6W4%1h)#<vVvaY;vi)n
zfDrr&Z$z$?Mih6Hh?at?gjoiU%l+TDOjUX9mc9<G69F%4uY6^8hRB1mE7_e(PBiQ=
zpN}|qz6K@D>e=e&k-)$z#=0RmbWsS9E<~3?(-(ei=2ivc3p(&<9Uf$po4QhqZj89;
z%2ud=Bzs+}e_3PG7`Q}q`~$dBBLMv%(@w7wf4YL&L647%tn<DwVlhz??wQFaY8}ZG
z_;xH4f6W-)`^pOlyTiZ?;_JxW@KJ(bZ7s!j-3#poYHT=kSmd;pj7j6Y&%U{VuSuhB
z@@qugA_1kV`9&l0F*+V9oIn1G>pNwq9P!}NNgpEGrljF}9XyA3wsvs!3N>GUkspei
z|2p$Dr$0$Ktj&0vAGgV%ge25ps?OUR{aL36Hq?jbgGPs)oO(604i^MaYGec&Tb@k|
zAXA#iT!D65_TB<T4mMO~#DQC_>niJ(CLIIfKo!3B%(OMyL=VB<u@Cm%<*n<)8a-G$
z!2*7WBQ6>d$M7Y5xKk>mF74~cQ?x6>p<Z_OGx2r(XcHmIl>y@9-~u5!O&zh7y?H|T
zKY*?dDW=inGlsJj7M17E%s{7m$875k($hIuD&Dwk_QO<2yi<@#rxtRg!q?80SqRv;
z7I{f9p)y)q`mG=O<-oJvZ!_cQTAkP7JMKvmTw_ywudTknR|i{v2FK;hpog6gT^?q6
z(TJ)vqD`1q<#4|7%&5ke5OANjl**nZGOa%U(&)R3v9_Z|s|}&^r>ou5=nVOaNfp{Q
z+=*`p2j(tf{A3QSgcD^|4Thy?`QVM6EyFEQoY2o5JrRCQVF!G)sVc0(uLbUBbM1Gl
z1BGt~B(O^GpI{R&Bg$`S)4Ef}r<O*C@^-5&v1;J4=9S<4V^eeQuV3p=EnE6#D-t&F
z_~PpInv@aXnGPfmn<|8V1d<u%&&E;LzObwz1S7I*{Rx(<f_L)$N|HiAK@6PQF2e9v
z&+exRs%1B`lFNN|&EI#=PPT}b<V+n_5ox*_8#n>BKVe-hn}51#>&6kOnsTHa6ZE!8
z!%8<d>wkItjWJUl)>Z*;mDAMD0jXJi&v#ESqK+`ToD?jhL_3OJ+CN@;q4fbR)}zd<
zW{7Vlz63aqljAsV6RWBGxaIHY3#1R<e78na&V$j^1bZ4q@ZwZj>3Y^U0=_F86RqkW
z8vpsM7bbj5Zd|ke4`AC9U_YU&b@a%zmSB_jd!n+pJ4P%qs*XezX%j<Xps4!C7@r{o
zCk8H#;l~L_SmAE(n2Bv^-YOYdD{WGiKFE%e^Sjt?+)W%CUX*BhK6|i`Mh#A;{9fn$
z6Z~Ue%5l-SklTPnGI%6eh1V-EcPTL6Byq-<h<!VazN=Gl&<I-rKoE}9;C(Qlb(%R0
zQ>8f^w$~?cWT`B#<q-`v$yuz>oKnw3-2e6uP;eoP9RM^@Ms8x6Gf=u!lA`$$tNm_S
z+T$JRP5oQ{uNi$pJnV;@tFI5Y{=VVrVw|Yl0Xtpqa-PCqSQtg}y?~Y37h@0Gpr+ON
z<;}e1Eulg2v`nu10UG`<st|f78PoQAh%I;NW&u20CEE!^zlb6!x5=8whiU>ljmC0i
zAN6H<S*{~=!);_XdC&js{I6<r=_rquzbzaio3B16S<4X_H9KD1N6HS?RL@qci+Nav
zB-dta^>)f&ou7JFcdrnxP<}UBs#(YS8?VUDCr3X-(bTePk&&vdTfz8$00O5;)OCft
zUTl&KgqP$>#2PJD1=}-IJ2YQvPo}>|Sc@GvWp&(zta`5)R>?il4<D@Vct*<XnsvI=
zl8ni?3Z|rE>9{sDw-U88D-*dZF12CqX1sFGvrN2R{H4zV{{T?NEfUdePH~ngtwNEw
zg3YCJ_Oan2uSPLVVoilnyh<00vpSzvFxO~(6TwMtfixK-_MwZ^KLF+)+NE!?W8}NU
zh_<IX$r@&qtJ`jl@vJ-H8Xh_*k)Qu+?I&uv1<AK+bXVyX{);Y)H5c|bk^D;K{@&Xp
zG}S+b2kB^5{{Yy=5(_5EkA))foGjBWur)7rx%4qLgxZQi4<J4F8LffKpJ^-kjC4UW
zEN)-`SCfJ$@v+hSV|LcJKPJaPg}*b$D#<v#o}Oolv;~}vNJhu|O9%{?@XhVDAip*1
z^L?yPNL;Bx+}M;h()LSxLvUY3-DMSvVf%d`Pxgx-=1jq25_6i4mi`U}`pSY;mWUeC
z0OcLr8K$~ZG4N8aQ81x>YHz5xdB~VwoG@e3&~wxXIHh%5nSxjeeN6`x)BIJD0Z6yv
z7~lxvSm*l8?dEXQ<X07Ns<SAxxmVHWxrqiNMwL2PT;X@ygeP-hYGK^szI=QZO|2l2
zbWB~y#L#JflsXvmV=<l*Hu3g4l!9A!3|TL!kxp15c3REOn`RQFzC){_^ANVOH}p%|
z(?#Xj_(3;wO*^3UQ|7CF!b>cDcY9P-?~$MmZ0!=pBV@0tL%6}T8wN#_6Rn)J#YFYb
z2wIle@VDK7B$~oJ>4%Kjgrvha=V-Td(SLxeqr!kG<6c2R^vka~<>ro1fVdJXh<4HV
z_HJ2%<koXp)ws)S?KMEiox-v!Nz;IQQ3<czBU)ZdN?xlRTao7bt{mT0CeDAWAx}i5
z#siO$iRuftesI!1z+q$8WE2r{gi}NreWlU!HTM#e4Y!9s@gj!0uEdu4ye>B;WMx#T
zmQad}vb0Q3M~#voR4i`W1VS_E_S+S9{2g`blI#^R%10=N;jZFavVy-+cJi@PS`VoE
zCgv}zCmELk7omUrg>mt@RiU7*<dPKA7s$A^S~4AVcr`++gr^VFimEzn`^;?AsKHF|
zg<-zn>Jp|~M?%hcq5-^4+ApLCRKW+D2?IE3WHY0iBKj&C>%Ss(NsJyd3^hI?+Uy10
zB<Iaia#4!nXV%m@p;-CB^clOtYp|!*H?7J?>U2~(d593J($OxR!AkNnwiK(IiLor=
z@&qb)R52ETPh5OR%F^<!BCZO9Nh(62oMTqNJ?<?QD7CJlh1O{IdK}hXlew#kkJJI>
zCcB|BBAai+C%91e!saY@f??wd5f9Z8nK<CIx$@PrlH*khX7G@;Njr^wM$pKZ?eyj(
z-lp$D#MDRaptH_PNPzZ!$H1>2XZ>z$T2bF2Z?@uSw1r{i>Atib9^hCvr|UEG5wnPs
zi7yAQ=#u-nk=V)4oG}(h&0~9pT{4MDDyg^&fmM&L%5M^NlF^M<-MI2r3|_3C>C!rW
z|92$HiRGfi7owEEJ~#a3+T|%H7K&j%<zE}+_M8re&NJkLMWt`|H@^Q>T06Tr^*Le-
z3){JD5c)Len9U)N<I%+vraxu=p>6Gv2Ikv3AF?v@<$&=woM`Hh5{)M<%)@BP49CFM
zmVdLAMEzy%_(rq8MU!-_cnxj0Ut7~##bRs!w5JeA-z%ZHpsoD|xi;dWvYPJ4^|{?I
z3{n_<yTJbkkW25%`;8PSLuXL;>g4o%%|I2}D+xUK!A`v=Lzg;5kX0b(I7s@#OV)aX
zea_uY=sn~P#c3&?`(&ywUVzh=h(n94hVkHSwq7vahgF-RcBYs&7Knk26cx<B%9x{=
zOXPF1_a%&pG(Qf2|4Ofv3}aRf^lk86bV(R9<n(9ofZ#Z$O~un6?&gc=^TF#h`A9NJ
zv-5x4yuqxlCJp6UoDmllzkKNK;yxf<>*<r^wM_R%WxiiU)<MW}Wa=e_L3HMsCT<(2
ze)nVx7xKp`#KfaH1CW&j*q2W*M_r-pV1<-62ilidg@AGC@qI@6JjKQQEq7e_l1!f_
z4Zi*Bv!n7psp^n@6?d_QHEBymYVG+%C5>jd_RG_-pvpwGqz)HHTB;&YHDX*XIFPiR
zWvQMU(1615{m;R&nxKs*aD!t-8T*KXx;>qlrhil;BD#amAo}hNfiIggF=L#H<;wzn
zW)aEXm7fEIYpuv9Rv$-9mNHc0>yvjboh!dj8x1~o{bA0J=F5iAH*rqYvauoco%4Ld
zVESLtxD6U(lcjmuswm@^^81M>8qLL;rknGw%!xy|3Jag&@?7e-OuxgE>sl{i5-L1O
zj2CB%wyK5{&fTtWK}^F0=}KY>g~X{A{@Y?W`Wc5zxtH4$t(sryHaPuu;`~)x5WW-J
zjkdK%7y=S2C|Jj4?Hw9KA_`XasP1Lnl72HM8s2zph$Z{AKCFj8<W9A#Uifg@$?>$}
zS$|P|)w;?O4+JqIpx{IEF#Z9m6maM{l=&Z)pAPkYvwHSq$z_99#koSFMMzibI_vsL
zZbx#}Kk$ys-A?+!R_<m}7ETDm`DIqV?&SQq?9;rQ2xC8=K%U*2V+3ZB%p*zRAzV{R
zK2ka8olYW#6@z9Q*<h8dabLzlG*Np{l%?NLo`|uHKZlXQ21)Kwl#RFeuX}M{{W2n%
zk%1zq?N2Q%L+kKDp&3Ul3#P`<Z^(oOGngRUK?3AVJ*KB%RY$Z(OWpS;!C9p1(bHK6
zFMbO=SXK_ftAouZY^g?wOFvAHblg;>kR_nmkM6hZP?QW?hQ`4Q)+k&PkIy2=ZFIkm
zYT`pPx4tFYB19=YwHW>u5G3sJcGqG>Xj?*&PvD*Z<QsRuDO!3}vHP&tKQ{KO5gbQ+
zY@L7=y}kJC1fAAR8&v``{R7xDUqqFX;pR1o7Rsc6lHdtfTe=e27b5zwIy3M||311X
zzbSF*%y^kY==R&m^Vh9hO69%vi%MUj?s*ryNqK<Vifk%ACo~rlf7k1GX5T`_6;5o>
zxlN}gUltWv+W!FzqdUY!<+pMLs5k!stbf5gOk36-bA8aQwVI#YGB{j2u=({$uNY+8
z7@|gY=6%Qx+9&q7NhLG}Sjjc}MdLcL@g*2c{>)y7j8~#*!6!vl8~wnR4xEC)-H-Bb
zxauD;LNjR&ip&(iRz5_KbIFTzq^POjYb9J)?JWuP$hu>FPLBQ(LAH%uhdqgxR!}&w
z%^^&ow;o%q9~HmINg(<Z-<47+FwuB0F41Y?vec-&B=v&#e$Vl$INQSdsonng0O@F;
zCgy@^nczT&Xo4r+x=8M;v;Uxh@jHXKAaw_4`z#)j_jTJ^tigH2!1>)+Z_t#%6=*RI
zPnUY0rc_g^7+(vLA`D|zrR_+Ps2QedWJnet6Szd@ro`r+YrXl|+O4$XqWeQQU}KgV
zNLjWu>&_mb`r3a$^i;&0Im-abm33De?`0V7sHR-(;5%zDBHX1VN8D!0n*NsVcglQs
z0K$M3j%G~f)LQvyFrk@reA4e9@%slzZZ#pkzA7gAtd7LAjps4%MwWVT%}jGAlIVYt
z=<^0af-4^{<dC{F1(L*h4+RfJZ<DdIP5rUp$_tt$mHVn0hvHI3A^ZI}`6<y>{UFBQ
zpd?h~{mGMaNrwyxJI{K`ijppmxzz8V<7LB)S)}$qPHQF;)z>ldb~%{CTY!u$Qr(w3
znxg<)^n0fHy7f2@X-Y$q7%ov_3Z=_%lV;2P>Pp`-@q#9B{po2=s3O)-eXHIv>pOdF
z_?2_H<MsyUliQr{@%N9!e}G~NC;NCr<@38X5E1?G*UrhKYp$-qX|O+IdjuhOOC4@G
zAwx$E?{?4J&#G@c$cV<I!EAC_juXMK3!*!d)MHlT^1oBsMExWP$v%@S)ZACL-73Ge
zDiB0D%L=QH`}6ZmHkh;fS`vK<3oxE!Mqe@^U`%!m2t)Aw*m61LMf$>G%F1QrYWP@S
z=jyt|_DvtylEo#;C8Q&7!&WCp6&Si$#jY${Fp-`hE5rPmQZ8v=q$$~E!-IP}Z%pj4
z0d;UtgDGP*@qP8IA8SN`xRy8?VPzs@)BFJ1-`7BKXP3Tds)?&h7M51SGWbH_p3V`2
zKyPkC8hF;oX_3lMa>+<5o2j^OAjoHg`sr*9FkQjO^gVW$+kd9b>@%xlnlEP~_QQPN
zN*6%|=0{$p&UE&i+@YbUe%x2>Gf{Iq3I9B3%uE`?yr6}8FbZ<8Qe*L+E>5Ce6jH)I
zC%oqne3Of91gqO1ZnyyXmqTpH#t50x*i`9y1&pnMnHAikO}$h864;+%q<?oLD)4&*
z7VdedwlT>Wh%NE_`qfBEQD`NM+PPt+ucSKU9GbXkSd2rqnbae6I$H%LD0>ZwjSod5
z9*#fuzqPiDofzyjor(8s{n+uPle477StkRZwtJ<_yHO0Gj#x5G<^tn!9Gua3!xsKh
zzz8qm7aN1Fkzp{7c=oG$Q)kvZ`bRGXvx8@`&PWkkkBx0j(tNMRaVl`_3avNQMfms7
zC!?Fo*i=<S`b`?aYZjFuG>ac1(~l}Wysz~TEjzt86<iM_Qc@A%(LL0|CcCSFrWPxu
zWV<-rRDvmi$TX88dnro|U+|uOFcwLOv>iazc(PnsjKs>~blAl98he~uP-!)BwS`IP
zKf_<#APvUq6kq3MpHyJDYwWpB8=FMmJ8q}0q#?<Rk%)ROO10*Gcp~{_O*La8hYI<V
zAUsXIOUiE>B`2i6Mbot3`$O#tpJeZd_WO$2YN-YX;eFVxIY?a0ap&Dk^=>t6ghBc9
z@+RHPF+C_|6b=MV!=w*!P_#inOJxK9+JgHDv=?xhd!89Wp4qTAI``*rfb%64!s!jl
zDA^lSARv&|3YQ--Gg4s7dFgiPJS=1Bx(M=wHk2#U`8Kg`oSUj4!}2vD)Ibhup&1)H
z@3iM-e1=xr+CS#}^;4x{N2It%&`aD`l`H7|!?yheHaZ)WN;yAn#OD<%KxOSf)s;f6
ztpy?9iF?4P3_`Qv-sklF{kB`b>`8JK_R*zjHra_{PkZawG8@2UMNhbqYm<oY@$dXf
zO?QmJKc_yPKMBQ<<xQ~X{{fia;Q3ZoQLfEo6N-)uS!1HQ>rj(x<&rWQrv`6NF}$lv
zpWB3Dps=()W8d9$1*~D4j%(#EQ`<dh*xc(;y!yMp1y@{^e5q3$EdIETEQ?Jzd<&H;
zD;<LAroQoM)lMbP%*#CWS?(M$O*v(bx0;duncP!|7Wl0_D^M?sQE8ihZ;qexX+!6_
z5doQ9jomL*Fm1f=&EJdu8uGC#x{O9Moza)_*4Cq9ZpKaZHI%mQ%tq$JvY#?f0fM^H
zi3sN8?SdR~Z<^4uYNRL}dd-wuX~zA~$=k$BX^x&ExpcIOhVm*(KDtXuRc9oWp7nai
zFw_1fO1EX0kPCmwbXIHsY(mGy^O;)(wdH8W!|2oEg7;hUO_n8f(IXMX1$YOb+PR`9
zzGTP6V^~Bv(eAGN$n-$pdF>0wI<AJi5s_+YKM&6lgxk>+ceFQY`julSG8O2NNsnv5
z3eK)>a9Cc`03v!RO6IvsyEOZz&n-;J(s19jfx1|ks>$>>)f?joxHG^=87I!4ot(Sr
z)6n#;`0A%8lIT#S!2#MY9aEC%=g!$_PnCN3*?blr+%ZT(_)M5TbXc8(L-5yhwPN%_
zXX*Rei2F?7DB>jX$17VW5adk%0X}I+G->pf82!2IRVo+(hwK{%@_R+kR44i&Z#=g#
zk>|C;_I?QOGd18I1+(w_J{ixLJQAknhuE09*gZ@T$`SDHKt4>-5R^n#=$?=5PA$`R
z0sjDsdXlv)<;-==jQho>r13PD=$IeZ4*72IL<-G9OoUVWfT62vjdt$P0HHGUMpn~#
zmW#nJTAhkCU6itP2?B1Cw}M9#%pL#H#^JhRwN}-Q`&HRZU{!|>`T2Y%v6@+iX2TN9
zDupC45+ci1go+3WN3O4EkM?q7KSKPl+YI54^lRuNmR5_{8>K{4&#X!4)+Kh{s|-m2
z<x&)|uSjv5;r(<o`4e^!$x;tw{>YG`DbpgGi0{pTYGTUBq+xkEwY!I*6kF+G>=)Me
zlO}C4L&=!CILxr@Bt5U8y|ok$E%N+e4$X!Gf839?2jI%oP5GiT%A@y31L_`-X&+W6
zfH9fxH9OxNxiw;VB(u`HvYBuZ#M({B^*Rf#)<gXE;>bR(br|6cNjSf?$uxNm5f3zW
zrc}-%?8M{sSzUy^aFyv*oWi0tcc}1owUq4Q&~{%j_Mz^C#Hrmni5NSl2eYM$;)SzG
zQ><uQvV<_?pXFQUtl<|{a^c;!dB}xqsHeJq1{)fGS%j!__d|6>jRav*3i+z&b?LE6
zmdu%~C`1q4aoCsaMK%_Aa^l$AsD)rAA*u9}=Sm`ZAB<*%zf2iv>I~%QWi!i5B3V~6
zSp3;B#D&=>x$C9dxQDFQ@HVAO9R^?G!-#qzmZqBd$Ge%ur2SiJvodJWkuW_O@)^}N
z?}jy~zZ$p1<m*xq)a^o6v$@nUaOE89m`mUsf6G^a|E|~|p(@D@9PF25V_Bs|Z1Q&Z
zWaP)kWu>`p)<tueDHYkI?$}64vdnpds=CCwCKigou2I^;B&qy{9(4uIXIiQr9)}%d
zpGzviM)3atUX;|1c_y+hUH@HTvXbg><g1~0YZfobgN1ot{(PKPn79n-eYHdGgrmw{
zX9H?cMHun(KVx@W7edoXs4n};L|g}Q3NU&{ff7a^!s-9zh8MQgkf7hF?GBd(Pnzbs
zFI-@vn3oI{vfS$Al_CqJRidKn??cU5BYD&YGhPu?dyjnB__Dr-;qj20_TMMjpC{5|
z*{M3h(?aN@sI{21kIjACFH{cpvKG$7S?A0+>0tV-CY@!8JPl`U$s?VF5G34yeFk14
z-pl^srQwcBk4Wy`slL-EjNbhdI8NT~Om*#H2-lc2MJ$QT0jdV<>}<p$x|-AAP@6-<
zDG|iU4OvR1#8%!kc$vQMF}!e=PMta77;%V|X3k6+<mI1cXC%}_wM+>;Qmau&%xVeC
zY~dQbbZR1Zvt6~^Imx0@OyeGB^7G<u{sCU)%S*7$stXZEu$Zyjz2XJS=$!qUl(mEF
z>PF#(Y@s<pf#7=20R{Gkjweu1aQT}112XO`9YcS0tzdR>gIH*lrdM=nzIBux!ei1S
zi3i-K?;ai|E>BjjOAl}0s}gOf+8v41utXOowY_*??u`W7$mV}Xj=eb1NH9L$&(p@s
z)~5EzCkjmQ;wIvDb-p>oX<C<qRYD;GoNdFg^%UmwmTrR$;|BB~?}HF6Skm0)uLcic
z@7L+w()B;l{PEguo0~}2LZ7`-{-W0Am97%<m~a09$))MWX9{a-MOQ6X<}8|Kz=!P?
zEuDKnsBMb;-p0{J9Ad{Ob`<RB>gld{O_w#`Uc=A{Y6*)9`#DGLXPfOfHLvUf?w0aX
zDxV6J;dU2R2$}V)ps5AY?5lg$F!I~`f&ptr*@-yyuCmUgtZyqX`}<Bx&bywplcdI_
zQD2Egm>mNX@g37Ju<sQ>g54HoQLa;c8Jm{HD!zL;rJM0d-Y*8ohv%4g4y<}&4t2&6
zdV7`^;(ZavXf;MHpQCy${S@tgWGq#KPa0UuT2YX<@vg3Y*}pPIY>mydeM|hbNl2i_
zJ#!ED)o;Q3qBHg9En$OVYysSduuca{yE^V4htGzA35ma8@z^jh#j&MqzC(ypYd}Jp
zO0M%at4LFC1waVl?o%W?6jB0dtB*Sz(WLBiyKHH^oIR7O545PwNY%U4i(>^D38+t%
zNq1?|5X4^LZi(C-xd&JjgoTboGxfT~xn8H|D~4hU^TrS!U~QS*FL7iCjWAn9bw<1w
z+s1CB9kdP^22T+@;f#ixj;}l=^#tzo7ESOZB$p%VC%~X4v0igRuJ=2(py#@QC6rq=
zW;U{N8QuC-gdt3WsEavg^Ka~HHSW{SBF`+Z^nxdE-zko|5`wpd@=slsj%YDRl9Ft9
zhaw5x`nwiW{sDYzBTZTB`dRhik?%kO9C_l<^VE;r#p9%??yw5^cQLP%9y(e!5_^+w
z4V_h=nU-nNDt>#YrW%2MY#ph0g>dL(ty#~ZzTb^0YYqVe=IX&X$2HCL3`m?Ymjxv7
z@GY~wD`6F_O3DoX+2kcrLS*5Wd=euCG~{Z8&^ypq<~X_9o8ef%Pb94yl_mwFubQMZ
zhkxo%a}(7oHZRf)VR3=0bkvuu?)pOcRkEC{m2w(O8`}0o=0mZ606b-ag+tPBzlzaT
z_neP7nqQ|Ufp1GDlZ7Ho9=-yykny)o-q^T@b%_#G!Hb3U7#tW|p>GA0ta3LypW~<N
zcW}ghvO4<`_4VSTY>=^5i=QX;%&ttVJGkr7$sKZOk8xnGl!3+#^1H4W9V~l_+{QED
z;hz5i$_$r#edM+VruK>G$g||iUV@Uu4cK$`Z~H+wCh~sS3{{ahzbLe}Hcm)tx6b^>
z-8f?m<vBk{$1L~TZ;|uZB4cUoHT^%sH|x{$i!nDTF;AHlDO)allbmskwd86D7WITy
zSx*e_oR>a-Q{+#R968=S(e^PkT-<XbVvC0OjnnNMHqd!*VN~?h;CUF1c*a>DEY*D?
zf;)I;EMr@adk=}&-e=3=KNfK+ss<G<^yH|zkW$_yp{_)V5hZnv$Du?2>T|x~vK^~G
zg9nJY?S9fB<GXi>%$0VOX7P~{bW@z$PE`Di{7M6EmQ}R#M=M|IW_>k9q6t{-=kwL&
zww{b2a!4>}wDT`4m$$Z5R#2{u`>j52ru{L5^E>hbJIXr)M28ROF|`5>$zCs->Y0dU
zvUkmtI@JFVeaIo9WL=~Bk_tl1LO?382*+o0c#ccTTX;%#b`;$^`yyYYR#==R(<Ln$
zIPsgSkTPc`v!J!1M>Si?9Xs_P%F%!s7>6YF=Z)`-6OJ{5+5v(NBN=2nzD>K4#8>D~
zOH}7*!!$0G5tC^^5l7Ej3Cv?LA}IYn!a2tYeoJ~el)LLo<FR46bttD>5)VwnPx;bw
z>JRwt+)U4AThVV4!6B(cml5X_vmSN0B;V*Db@e_#Wk11b)Bqe`0`u6|<ouLPTBHqw
z7|_5IJcD6gy3#s?{|5X6q!ci?jI4np9ItLN<OM71jGZ8@eczX4q{aoK_$wqDduCd0
zp6h7iR0mzh+<j69f0F~}M^ST87>YIG&Qxw<x@6rT=h^*nCHUOx%q7JP2@YCFe|{-T
zQv;BgYK{9+AJ_-c2h$E;agb)_0`3@!lIp5jSlcYRNyAlwsZ-6Yw9m`mSS10UU^+89
ziCfRuman&gbL3uvK8c05eoMPz!7<S?Esgi|VwnA*?*Th$p8FmTTyh-|I1j_CP&N0E
z^%7qvAewr554BY|pn@hTady52$@OaOSK17gmMv83OCip2$}^@uXFR_PA?oKgBQq)*
zH2+%c_L1dtUBU3x-ZzKD+f9QQ6cH7dPD`})E@-@+^Uv(rb>5`QxQW^j<!kXz0#$qU
zg5zJ?LDb57DVkPQ9sJo&R4BsIbHtaV_0|0dO2_+=Rp77Qp$><K3mhI3PETv8Pp^Fu
zSjRbb@FwpP*Gv5O_GeJ@Y4@iKY0_8HRGF(SVlk0Fe~c=Rdc(n4f$m!W00J`t8DHzg
zdyJE@vz<+(zn@ja@v~z49`(GyJ!`vsb`8BhVMgplm$?q|c`3&A$QrGB+vd4&5B}ce
zPm^g#DWt`)5rsga#ZRt2V$il4wnoE(D!)jhf<N`tNz!sm-0~wor`!@qX_>Su(EEtx
zQMIQde9zA}CzE1=PMF(@YB6@OKl=2ZMYB=;h|tI>h2IssSv>w*l2|K0ydDCg4LHl9
z;U(waZgAw}(3F$nk15GC^`WXG4H5$Od%FYag~&WuCf$Z|sPwc=i>mL=ON&M{z9LAM
z?`o;1Te+WQ1bu5AqqvJA6+hsj5kk>A2WPP1C;RL+g6`G;VWrYO70j)o)?I^7+cLD|
z!H}A7eozNpT-jL<ArjIbsH6U67#&%*u4Yqi#FehHQe^~4F$QS)$UqKj{^gCipBmPw
zy7Y1chu@OQwL0#Xeebib2$>xsv|ZK`cK@cDLhAlFkhX~KSxX}~Y2Tjq1Y2_iC>yzD
zxGN6`9c)~>Y^#H#4jTf6A6wF(m_C;iXp3qp0Ee-ks5oVXEWc$Hz@o2Y{#--;^1^K1
z{{Td=%vq@u<Tz78nA=6ij9OC2wq(QIc6L^FSdQa+jSa?I<Y_w1n^8gnW}*QX6G3}O
znja}O+`cs&*u=EnK#{^P0jSrkAazsgA?Ttc*;>}B=cK=@E75DGv<BrCgB7_}iVWxS
zthq<`Im^iMpVh29b`M5ie}v_>CL`K|KG>rhqB8m0IV2H3n9%W@WZ%9tO)W!7PpOzC
z^sGd)Yv-VaT@rK*ahXgG0PvJg4h$V<e|}JBR&p7HT_h#!Nry~{?_9%s=tN#I0`q*<
zCO1;;to|!jx1VW7EI0oH*u7~J?Puk%ZPj<Pjw1$+=az$X3ME-uY6-sLeI@B1T`Pu|
zsT@jN3RwA0+_=u|qY`)1aFx!9j$G-6{T0=WWU*_V6jEuQDCCySIxUNv#6T~WyMkw;
z$-f%t<TV!tt`}4gW&o-itarVDXHWswZcAm?j<{8)4g*?)dbY5LMcQXTg8Ly1)Ip50
zAiH3CO75e2G1}m-tkIM7JeqzN64QE62%oUy`wkG%_>m*SNGPQ)*GT@W5{H`cP2>G4
zy!?HxkdPjmM!oZF8%5pO^Y0Q<4&7pG^d=}*gny$|#ye{`K1)pf(r2UqPzm(IJgFZE
z927h_O5XauMNSqMU}VyQsQf`ILTEnDcFPq}q!nc?PnDU*&vIe<lk53{QM;VqHHTu|
ziz2^<)s1^r{1^(6IqFQGB<@Zlrs8yBdM}Mk2aF2P8eQ|9hY(7<hoot`G{YbiP5l+}
zZ*aK=$~6i@UtL_XLFf*v8K!4v*A0Aiz+X}!cS@=qgiQzQc*!>+B6?A{N`}WhHWtGQ
zzVwC^>Lf%U$?^cFiJ*!z3MKlDNSCEb=pf5B@LDAo{(6?Slum2vxMIdC#!|fryG%ve
z*0!sH4tuBCx)aDb%EQSy`ho?PPprx!i|b=SU6@IemjO?Z#uY}lSN>Ydhta6!xV(96
zAkWrdRU@m97q$BqPjly{7ttSZ+WFh0^H~jp%##K!p<&u>X`EothTuT?KR%e{JX&^9
zlG=J(#VX`Ojv!%CZKJnaHVnb~FhNEarS$jn#b%KAItEvPs;4|%(cFCnN<wU<O_s^Z
z{b<4xGVIW6v9F|D$sr=d!G$g;vVm~Es=>oYo(~Yfu>wdGgk4A^_)P+y^rek;8mICv
zQr3Ulq*!B!b#Z}~L^A<3ca0~vhhkC*R=FMh5ukAz5o9;ZBr_ITxyz7AB*EfbC;w8L
zk*}4Bd1wq%{K7&)vi|^DN(@_HZd8aa;`-Zu!PBL0s%)D)BGR{cxxXHaW@f66^<oF<
zj?JvmL9_jet>X-G{Rn(CwduS?eLs*Eevs+IWR5e2u2-?OAjLTw{17xbu(_Iws1I8~
z7L%foz_1)8e(CkCouXP&j8+#0a`8Xs+m8z5_^<BY=sGqJf6FCi-pc%i15&<lRjHdj
z%&zn0`ynD^4Ms4Yx-sKAS`*-3FqL*qRl6!@gHl|TgI)@cx~mUMKN}9<pEU@o#F_M|
zxtaLAM?G*7xh4+SX5DTWOwv-ymVYm>Tf5^<eR3Lm>+r@EH6EniO3wtR+E%vpbVQKZ
z=3&mSn9m^}AoM7w_Vxz==6;TT2wUy9Zy)9NsUfK$Uk%}Y_ABjDnU)(MPH1<B?gD3O
zysf-un&~RkNwnl12k-H(6=VkPE5oaP5hu~Qg0;|1H#DbgMLTbeoyV58jjU9aFfyU5
zS9Ix87*@KyS&B2-caI*`%lhnN1qpvozQ4GQAZ^b^5&r1)u_RtKS^S9|KgP3$OX7bJ
z5+=!MGd8ho)o6NrfI4frHQs3nxeXr<H_cYL;E;hhD9}x}=gtu{h@1^G2QKiBAK(^p
zXXB2Rs~$UGyE%~iOe>bJJq$U*7@_7}H8J{~bPvSfpcrj#ofQ6M!Ak{?u+R;Q|8}X=
zR7-vedRmZBS6(yz2OwWe2xmKp5^{Oe`U1mgyaSO!HdHi%fikg3(NvbbdiMchaL{B2
z3y8E#aS==uEBh-oP<E$pF6t9qW^OIM1?hz}?%qWmiW-J{plM~kv3~rZXQ7KgdPJK_
zq9x2C1SV!qz}6=H+C%2$rU1_=;5Y3D^Hu!W#0og+=9CqgKfqg=<+J?GJ?W8C57pcM
zOE1~Rrz09?kZ!(&HFCSwiV>!&n6C9hd{f;MQQXNe`3hQ8*O~MKpK_T?;(`Ej05&ie
zu*M<zjV~?{@=K)iEvfr8+$Dwt8hot9T;9?hd3GFV37^VMdTaD*u=t_3IDmPu$tBTg
z3bw3sT}p4pF_Al5MYF%a@n6`Sbtlc)P<b^x;L}4|k$U56Vu5XheQ_d-8Z+2bNwy7J
zespy%rRYj6fSV)D+*7}|mq5Kh(-iT=G}QTqf_sg7lFDc^!&M4K1slL&f9ecyi{GN<
zsv400l*yOsfguj_;0>SAHJXA&5q-zops-ga%N#yN*#7$6+r`6$`$5?nDLf5V=T!+^
z`SZv4f@|p7Xta~oxkTvjR?%pz0$=NUY!cfx>e--SFXq8vUaUp{;LA={OQZd7<53Ce
z$=>n;4C&sDOl)4STqUwd(3g(8rS5RPQgdchp@hCrcO~i25l3wGlK=Qh&6Ryfh7ht>
z*R+teKQ#kNx#iNeUzSBTRndqMA>X5m>n#4t4~t}8@dGqZ*j2jg5=W40HRB&%iNyvd
z33f-bs@Urrqlb=#LcvPb+>#MLYi%SH&vYQvBt}C=N5e@OYH4}RlG*ltaep_)DwBlB
z)O#x**BVQDADMH2gBes9Rw%ytX<o(gTGT-IwB4I%AI2u_$Tg_&@08tNbfED5g4po1
z3tu}AGGV){#;BJYGkCRq^^o0|@V(T;W#i7kW@~uSYHGg?!{w0QXZGtY4yN)P-~NL(
z*CBzC#ju~9_B4p`zY1^d_U<>fDJ<t-mV$2Ao3Yc{vohz<I%xcx&0e>n|Aq>gNu6<h
zGE5XP-4gZJnIE0rhBO(s(t|4*e}7GXVI+hA9iApstuyJW#&%{ofLzuVQ9#Xx)xau)
zz1#}vpUU+gWB!N}OPWF&gak_`Do1vIF|UI^H6Hdq;4~hoPuwi+wi3f6bUFKGJq>d%
zfdwmubW&!KyJMq&9f4!lBhSm7Eb-<^qhBaHa&dl$BR6mcc`Q@MX<KKl-kMrn(fbq{
z)bFKdZspo)sOAL53dtK_`lK<FcR8V%TZze83%VX{m{=Rn(&ETgOyPFDCLT<ux1^nq
z<AXQ0XFw#E2_>2AZ2Of`E_VC%b2dHQ_6@6CuXCvl3cx(OgGrc!nfnxe&#tehtL48W
z*&2>|`&6XJCfz+6&^2jt$tM<DYirpZ)zvy8%!zL(SyF|7urYf<;ORF>n62Knuv@(S
z->dtZNOv4U$cM4B{An~;HAl-vZUuX6W$1?g01}xs9xHYE>kabLd@0?dR(}yMz6l{$
z2>5IeQ>Rl^j8Q8#HdsXwvt!|L-!cV5E_jakBqL-&pDRB&R<0=E*-A-OU%iS+a@QI0
zt}9Ohel1t8<u{Y}^vtyqZ8))&S-U@Sk}%N}t_u)!nZ8HouQs%-RWC_HuMTtSmW&VP
zOYGNMNTj>!(oBrDj0RLl=-UZ{k5&4}2irTHCE4O9bjE&mHe8)8<@p4}N)penyefJe
zczXOkn9<Jd+&2bQ`?pOXNNPyJ7Ga&P%6vA&O{JeY;L2+owzQp4mp2$RC}oVzGNQz(
z5V%;|W$_N?$kM+?Ms*iO`UK(&bo10rZ)9aZ^e6FHo4NpACx?cPX2uA@azW0kVRVfy
z3BOnN_g4@9lBJdWOy+yTD#M75=kh&)zQ3F>opoy++Uz7(pr46c3)XxJa48kpt)rsV
z1t*(B-+`*?PItFF;RHPZLRP;hT1}}1xDrR+VV?xw51l#x)B0y0!J|J~bY9#e<NI5&
zPpHCo+qK}Qi}^Q~dB+Fs*P69npo6_eE7qQV4kTWP>$;h&An=jv8Hcn2jJu+*H57w-
zuy(L)l(*=?jTbBA#sy8x%_@tzRmrc9?N4FrhGTRj)+<4&A~1J=zoTG&!dWv5)v_ZS
zmcrfy@E_?<dbILpWr;1(Wo=btF~#t|iPR5(O$(FiG0?(7jY-eqv&?7$Bh1{so&_i=
zdKYh&{#;g{)&_2&qH-0qx7?~aOdA(Ra=Lb0bwA`)LQjJ?gH#8*Mnbm&M2Kvv3bhSX
z;CnHC`>dlVit0u8^Erj}<N1DFXC!9)JY;_ZCA=PuTJyG_fW+SMA(<JycjXeAIUXq~
zug`aHJWiNzs?zj#wDept3eLL?n-s&!vKrxlsJuG@cbx;je*ZS(x41EjwP;3oKxHa!
zeyBHfb9JkCOsK1eML;3lfdyV(YwUYoFoBKHY$*`cJtDr)YsqJ1TQJ-nx%YXv0f4lm
zhav=(;CxmlHu3|2NNSSH%G{KP4wId4`sn)8y7EDX{T)e5n@b3|L@!~aPEnF058u+`
zDu<JFVO(%=x!{o+A)Wh8$He%G{(?R39BQg?1qr~Lo6%PJC>=UCDJCZ)SaE-xM6a<1
zLtK(NkjAyQeu~szL8lF19_!ih<UF$led4$RPr(4pP`R2zR6K<f9QVifOotoc?G9=x
zI+Fh$Ax&@5oKW(eszQ3O=svL~-m;mA8B^@}UGnwb3J5&MG&nP)G5zZ9jN5jqo8DF*
zb=|TR0pRO3%4;?{Q2qnFkDSJ5&#;<yQLp*@G1~*86F#Qzb7Gr}0{8frHn+yexTla0
z;pC;tP`{h03`ABHM2&A_io^xY30NGLN9ecRN)Yw=fUSF*jd7EBP_XDaQ&Vaj*iqQ|
z3Vw_q*9ujmPHSJ;B}Sm6j9!7(V6hG<iG8m?_`lOsjGcna6Z}JeK@G>OYvON398FD(
z#yjk<FF8_{sgp#*?a6jXi|mjz@(5}a71N*t5X~V=7UleH`fi4Q0MHXB&6whYKmr4(
zULdFURY(GOZmrK{x}l;`VAt+Xlm7#tJys`WVru%AEUG3SsJ>?uPBj!5HC<QeiOI;k
zuXfNC|82tVXU^u%0h6I6uGndsAfdS;6{tT0^F;pe7m{}6Gn)0`dsWuU&6#ees!ByK
z)c3n>o9}8@-PmA9Uz2NMe_yUSflpSsWTWkS_tjxWNr+5ORg-ClCz`x12gDb#!8fb8
zriOm!PIZ9i;J=Dai$K<zw!bR&(<KjEGFz@c$8O=O1T@o9E1Ol_LDNhmR6f>i!kA92
z`WJQ4yM$(3z;WqfByxMQU93XcOn!{f`3@_hrfs2;))jkHEwKTrobZiqZpJ^rjbLKU
zMH63L?=b);KXCEIo`0u{QRYe6a5@E=tBKozV8y0U6mw)m@uIJ6{Flg9&K5<iP=HgJ
zDDlBjN5iFMb;hNjEN{I7mI|NvB7cTMPc%!gR(O?lK(Uo}h{`6Ff}FegF}3Tgv*6KF
zGS*T|Owcz$XD6&1G3ye?q*DYhp4FRRI+Vk2phC3<f`b~ee|z}$)HxNf4D6Zd5Hh3T
z!V(q!fWDkI)1x*UzF#x)9ETW2jj9oMj6sKUakw$HMv`4`7_xdjb~91=Yc#c+)a)UP
zXGfpS3@ANzg;{3{?#N~3ocyo|+Q8cb<`S;tSZV#y@1F_Vu&O{E%?feDOFl6ZX-&AK
zIt^I(NGsln_0I`uF`a7wtmT}6o2_gfD{U3BZ_5ZfDHcIm#|EukGad)GIls45RP;V?
z&g14y#KXLI9imNURAD>FBbFuv>CaASW9O8JiTHV!wC8mgD5M^{Hg7<q8rt`hmd4n1
zh|F|tXa>T>H}A`7Y<Kfpu6X(Ng4h20=)(4E-`|Zq5li{U{mFfgZ7P#ijV=}V;PSUI
zAG8cAu`Gjt55JQR>aFp4^gj18G%MJ<O4T60f;JVw8_jd#i>vrt{m1hkhm(9KJe)RU
zRtE>uH!S6DkhpK8x4=is&`q=d1N^XFUFBZY<99czQEs=lVwqt<BB)QegWD@bBjHWK
z6}Mu%3N52|{V_%U;}tRStC?IC2v<ywK&00AmAR5_<2HLY2%Cmt(K9{zvTl4I4Ri=1
zqV*}tey0-u8M73zHEocvsBX9h;GPlGbAIFEU|)4MQ{%9vEm(#3IhGa>b!tm~@NzgG
z!d$t0*EfGQB;`&Up1S>I5?`0g5!oE*`xQ>UPvPnXZZt8e<z8UVZFE@3@9%N?BM)^K
zIYytR%0`0d(%j!9N#Iy*#~WgMN8VX}7$%Gvw@nFCTgQkI)Z75=duC<&Cpz%v-x8Pt
zg?`J}StbZi2wHvJ2_#+N5%ovU%gZpO1Dy0XCGTJlBbB;bTW(YkgQXD0h*r5v`2<#!
zrUh(YjQn`~HB!y}PUXunQ#Xlm^qg^g7I+yX8op;Eq7gQf@rKn|RM;hXe2j{}OTtfH
zo*Ww6mCYP%mo&*QKue#QQtyig&DqZ}A@Hjs$s)S;V02w>us40U4!a@&1Sk^hjJoKg
zmqQDY&|F;IF_K7cQg#^lTsiC>$9S7I^pXT=#g=Bmb<Na`R`VSnlKcq~9@r07^P3O<
z*vx1B$7nhGz!9d=-)~G;^!9^+1bJ&bDRD}b4+~&;sRCy|&XURwdh4+mcI2>KjFl>H
zw=;csSmk;4KJmXysmYjga@ldG<?#n3A^aptLOg}YZADex4<B{@)1M`%PI<)^#H5mO
zK%_9q^xJ#eg?;=6<>TP}Y~m6ZeD2uR51|zQh9+G9yas=38;u8BY`>M@dX`3HstXBC
zAHCRAL09+WFMy?f`7V~6l_6EovQ+HYhULvyk@0;KW8@{t2d(<WNq2fAf;6RzD>D7m
z3I}yAAG0$Cbt81ay}tJp)eOPkWV28R=r&)8(>lyElh*IRf<ls)yC==zk3?be8=RZ=
z%Fl@6?r#05r{n??+lq-!#D{aJ`{m?++_J30kY5a`M)4;}iTO(E;{Gch{Xlpq`>-zE
zx3RsaWKnQFBs{XWt*i5L(t_lAbIh%A)I^TCfjColZw9+681p0TD9d@qy-XYE$Mh2j
z4Tdi)*Pkdnw+kFi{cTA_==PSN11-;ZmTW2p+^<r+i^k48e~$oU<0odMr2MMGYerkm
zXk1yjjPi^q$^27a5!E#*t!0cdJh>B_B}+aCWYa%nH678o2tFdS`s_Zc>$I^tF2jtj
zfu4|%g*!v=9bo8aZ;&q88vv5z+qBIb#M297=;Dw_wH3YfYxbP$Vv`pq4rzRmoNN7Z
zF=?eq;U6<d4_f~(q5S5+;J*31qhaF=zXieo;7h&nzEF~G$q;+AI9h!O_om>keQ9Ul
z2r*Abf9W?+)baB=OEDEj-I|&0ap@&=>I<D|O<%vDJ~s1rcn0sOu9qlQO>Qc>4&r8D
zCIw==mFnKndQM^m!k-A^FFc&QqGcA!s1wW@D|Zmpc=trfS~}{CMQcj)h_L61`p1^+
zxN}99s+v+2h(_(Teli|66Yr;L{{F^f9gfY}zF_R@jp=9vmAF@{Bmepzhh_p^&4xx_
z^tN1qvKn{QI>$=sp)P(Ob~-CTiTN%hM5$ZqJp>cPao>O^IPR_v@MbdB58cRUZJVOu
z$_vXmI3hfBKdeHMdK=_lrVCEcsvzQqFnUMxY2o&Y^ly<H3T$(o#~>}D9C-sSI`{L#
zWyjUL#a22=&j_mSj3$Y^c|2Jm_i#m<M67m-@6*j|%A?j8EDgY~d9^AztwMS}G3A;-
zx|0^n;W#Uo_`+Twue*H=&q`)JaeF+FsgQ6fXe$B4>d00Rp5TFyON=Wi+N<U6Z3;dY
z*1Mm#!^`msR@gv2iYb&+cc-+&p>y%aWBK&wkf4Oh3+bR_)Np`Q@Q-p1ajDP7eP2`w
zchcS#t7%^0&*-+sAYTwQ6(!3bJJ~l=4qz+)`m<zb<_SoNGFmcqV$X&u0*PKZbEkpN
zsTqXJf#SuQohWTFVKOFmxm7*mlhTs3#6b5g5e0xLmu!^&)6{=;Tj1TX6lcbymXqf1
zK7bB(M$HE<q$n$vIOC0Kdc$R5@9Ns>=rwKL#%E_M7m!7J(L2I;SF&8WFhxfsGBH}#
zsrA@5f+eOU*eapmOQA?^win2QX!X9as@ARqeodu)bPilPS@7qen^d>kT@T9a&J*cn
zt^`ZpR2UW-mUV;7ldeRn-_*psL@SR?x<(5JKV56#B}X)+fDf+w<{AA9%&s!QIh~{`
zC*56f_N!3dYiR0lyjRD5<MuKh(^m&0pgJ_6h5_qmv~yK|{QfA++&j7M4&@C_#?%30
z^uE1&Q;bucap6ASFRkNSeHxkuPx8x_PB9w?ozS@8d40?AhbBD29HC~uWAOEK;uP1I
z-E$C~kdwz#iHi>ik1%QbtgOA;k9C|^HiV!1Mh-Vn^~;Q62_%vCrAy=!GnxKK1s|(5
zy!m)vV-TSTyT$Puj|l^7#M3q{CraG2rKRkxk%{`S-vO6=TufUgK@C3Hnz{WJPE^<N
zY|@|4e0E$w4E-G)6tJJFZ|9XwemeRRZ&r@AiZRuft;$at^(wl_v_j8yYW_8<eX7Zy
z@J2uli1G<Ur+Ol<7mCY_m;yA6x*d4A7U(A7ij5_hnve=&CcY|mo=OIwZ0p3=I>IFC
zMd%Mu#`uyFKmF|jPrc8Y!2bbvK#0E<UtImLwRTLN7_G_$k!U-f;tu<RtsK{da*4zN
z`Ey6@MUFyh8>{NQ!X^^`02z5CSnRc-b?vk_Sqs_Jc9{&15V_-V5RI9A)vZ4Ob)B!&
znf}&l(TMDC{!64@iYw!96a7>dW-Ed@EL3!pb*B*N&0pcye_dpO5FQfh{HO$zRJSTg
zEQg4zP|y!Tps_!Y16PRvE6)W|JM(4)_IK?A5=sKw6(pgmn)^!~sy7?!a@E2h2KBlv
zMsW@X!3VXn$oWQTZSk?I`p=V@yB1bRNk^VXDp$Gx07j|_>%?i82w4v^7{2ZD-_j%f
zTxcpt2s+2L5C#P~T>TU_@#F0+p@J)RB+Uwic}oB)<!jB#p!aLc%KgivN@p=<+mKn1
zj$*0FDgdDTi`7<?6*XEPL$_TwX3kRk)3)s@a%4(3yWFwVS7PAT@&3&>8)Y~xC>tnc
zEC%Ql)|-kNS7HI&Z_`wrvIjL2v;cKJM-gIfW45}9C7vW5<uqW9+wr0GQCic~k?EsE
z+_fCBCHMW6ERxQSc)SrqleY@72&(2&EfNt5A(6}Ov<jf_U1nZ^xpOW(DNS75ub%Qb
zE9ml9o=bOh_jegbGQlXKiCRbG>mm{D2)zRAJvH<g*#ZLFGi~CQRBk+pr+LwdXc+T}
zh&+_ip-3Kp>f+c6xl~yk#7}IA0}~+to|O2S*1o!t710c;S-srTPjZAAVvXi{ikbnv
zHrNfdCWzMHWbL>wqjTN^iy|p(uYQj(f(2{n)wncJt4G0sDhOoaO?^hN&G17I@!P$2
zX-!wiV%$MxYjY%b0VQ>2jd&|Y3K3NF+iH(*<*bcd%q5y9a<qVQ$usUg4;6~aPd*Jy
zP(%YwYFMyBI`+vMkHG6+;&qd|7P-cUk4gx#NF`PA4?7mhU~sL-inDdt`W;8fd`P|K
zjSA?+%O}N^UL<s5#l=;LrB$~Zoq1yt3lwW`YAbdatf89UB9`H-qGf5WCXphyF*z#e
z@=8a|A0a>q_yeZwPB8@V-SkGC*od<HKag11U)USjvT<HZ3qsbh8%Bs)nLBelmuPuP
z=^;XjgmpZ+XtKK08+Kv8wJR-Akp?26c_))a6WWU`ZLCRQ0f!p*A@1fv?9C+dmNCOL
zrJb2MMZpR7uG*)$=5c@w)NQt4Qu(G`Qnt?JY(xu<pXi~qEiJ9Q5ym1}j?F)9gotJ%
zqtc^lQksa|l^l}CdHp{06`);^sx##LhVCb{`m4)}SBh(QTZsapis07aIy9fXzj1*K
z<6_j)R;LI7B0!cjazpjy6y1jqryvOAe{pl3KaR@Da~Qk7y@k@!TWBco+_WMElq5^U
zLvL=SRXBjYpED9&6~l7l4^J{EFvY5O0f5}VGN82Pn{G&-Rfm_e8fEQqzf!;Mk>8}N
zXy<QeyM#}}<NJ}S`tsAZw6d;fxfqd;{+947N;euI)`NkAGi!<QtZUg#BHY}_m#=2_
z*Vuz?Ex}=ZJj1;gYTg7ayg2)ZxrfS8xeU{|2bLl(33q+;!x|TM4=u?#v5`Vz%Eu+e
z^pHtDQsJeM`;m@J;p99TX&kC4D?)i1jU_!M2H_AK{97D)Qu$6Qd~Qz>E&Y_YaNc1u
zwsJ<ve`?X?EV-(&t;|w=Gsh_a5FwS9@YBNHm|LMcs?{~bM4UOF{VLH7RaWNP5#5e#
zKb&K6w@V`O$jEaTiCCdw!GKpmRzBm+tNCf2Hk?9KmSEHj<U4N=J;f5-L{4KEp4Fjv
zu0tVp<I83L02D*yIm~m(b@dh(GhQso8E5QDZihE?j!tw@Qmap0E3b^DGS8WwKC1SB
z`}pw124&>?kerORS7W_o;@Fe^^<u$*UH)rlp$0GY8jPJi!+m1k7RVEgCB8ppFrovH
zyEgY0%#;GyhLOE_SKG1JifWc%;PX={RTJ!7$pkNnhZ^5b#%V2X?BniG1N$p+RcT{l
zeX%(Pv;n;fR&N3_m!)@ooc8q-R9M`f8ZM*0H@KTQ@(PxjnWKrPv9wVQy|qvo5YSbm
zW*pd`mb^EoBEGQ0IS{Ny`c<%}J+}c2NyX6niH|{52tuOnOK;cluDZl3>H9EbHtzk0
zDBVC~UR@L#pYI>*{B@^p+tp~o+7)+wBbrdDBV{yFY5xGXT{jD9krYP(BH6({ZdC3^
zYeuwE1BbXKv8UpjG>uPy8*7AKf93H>>H)bSY;VPLkGOl8+DRlby~DFRnu^ral1cdd
zbzAYu6IX4B#gX%9Z*Uo?1j!x6#jn#^!s^T=dxjA{-l90$nXkiHx@ZfUn92PziRoe%
z71??DEE~%FBPWT<Om1RZdj$p7M5v5~00mNcd))LN$6oH;;V`CFaU%o<3~oU=7qOW8
zcoExrsdXhA^Yt1dAh8O))uqk6f;r%l+?r8k1xfypHOdMjgu9B?&mtL=OK}6jpqd&o
z8q?!mnjOeyZbdO<#70kxFQ>2W<u1jnr)8-T*RZCC@G|QM0k_>VXi*BkxLZtX%xrFB
z5wW-`76*=ia4y_tV!PAuKEEAjYOEQM0cSgLL7Ysa!+J)&n)F;teQ{wVmRU)-G>sb`
zfS+#y;)hyx^vwvx+E5!}o;D#EQ`E94M2mYX+ky(al?ZuIDJRp>M!f>Z-nIV#w_Z@U
z{yz<?6l{%&?Mfsl2X2JKz%d-T4XN>6`i*%Hnb4TsHwU6zA07Q1U!5>UCF0{0cJ6a5
z%{|S`a@~5%D=A=WPNtOBeYi-{F)hi`nM1h>9uhK!M<sTvs4S=d0C;M|5r{;7N;DA{
z3EmvdHaiker4O#8yE{>Em{*~+G0~+QNT*+#ztf;f<KTxYfnI<^)B>b*B9#X~lc5`k
zCzAO7b1xG`VdQdStr!5<<&2`pP?OGCVt!Ik8he(5(^|hB@;3DJm9k03PuwCvZq<0)
zWT%Y#edSoYs35);SJ*)972Kzd%E1Jy!fAw3gl}SR$p<v*lsezje8!sW#bp^DF~_mT
zdMCQ!f;EZpjBTut!+h^1LTq$bc^{~r-60UIxp=KDY$7W1mY7C)nkp~4o~2zzu5z|W
z@(j%qAC4rqmHJ-10-2T$jeS6i9FeN<RZ8&(U`Qm81qQtSQ#sVRbf^THsr(bkBC*YO
z&Bg3jx51%CurZI_m3Y7=ig$`BIQ39^9cvp?fFx289X)*y&0geOCwt2*-0_7U%<d#{
z^3UORcPvVpFliN%J^uhdUb<sWGgXa0!5^t564@+>;6I!~vt#9+;vBuPTe}NsEpWK^
ztYdUrI|;5P9GCS@02?UlpzCa!N(5#Vz(%~s)duAxhGuSUB#Q4jecZLhW6<2>;^G_F
zJ)tdRG-mHeRS_iJ3F<%^&-MnUV6hy!`0T+<1c_n2A>3Sxh*tUrS*`AiGRYAoC{FA6
zajzO3{yGP?VIv6)vX1Sz@xe{Ya*1NE_|(!}-TeXLRL14Z#H>t<Pi(FMp#K0SgMQWp
z+P#Wkc%v7yODnmRqvVu`c4_je??NfI+EUzHjkzW*MamO!&Z+<awMQMOYhU0;<aMKN
z6<Sup19^0@tt;;oH8r6me4vrkk@;&%&9YPQi=j!%d!>kc%*(@}^{sfHT{j{l7e<j0
zi<Cn%bQa><7Z|9vQqwuzjP5AaKZ1ZaI<A%;QzEOPwL@}PqIh*kIFO3`tG3`e^ds}3
ztw+O1Yi_y@szC2m{^NO-*N(x3w8vP^*SuuN-_0r!AJR+vHsDZgx;ZVkrkMC00G?~7
zcbCAOs_(h0hY7JHhX^Js?<NBN5sUI1TP>>qqE&9{gU~aEq1gJ3Zk;TnSf8S>F?WeE
zRHyNLW$oz7`0idyn<C|m5jYdV!(a$<e@@f`ri~l<X(V%NJzwmDLnI4yp>A4X1jb2m
z*D<Qe7~*+xiHH(0*i@Y}h)c5;QrvjfI{p;$3{+fR)?}9<VI*;+Efi+5c2c|04{{Un
z{Iw3KMO|*U5eAJW0PvE8RQfQjDX%sTXo8=$HBT{WF7S^6N@9{_k|QZJ3S>jtW(4oX
zRU1=4EaG3%RdZ#Vtdk-$QefbDQYlA##L6nAWO2Pf*jI8t&8@3u8R@}k&QA$2c@Q0D
zvD!&B(6)dXrFY<p2UA)P%UUF_Js6T$E7lnaac^^LJ*qQ2inDhY_ap6+7$-_sma8bf
z^>|AuM)DUk{{SlhMQ$w{wvovR60i!vfZ<9KX}xtp5+v?Xr*Y!Uf!X9Hg8nJe3u(j;
zRTDuFh=70ioI-zD(NgKk440hcCZ;wETZ%}?9?H@)3cGK|kws7lJ8}H9obA{ip^r1*
z=+V+jRSoo3E$8#-ubSJtW8Yde#AYA~0Q{}R6@lAwhP(C9Wou0+)->z-ls!(L+4VJe
zCwy(gTt>1hO%nmeTVo(E5v>A}0`gP(Tn+tbb>+2|g}D&GjmMAYi25YYLLnWhkO7u(
ztG6y@yZm+<NXYOViaM1Hq?n6%%+qCNj#!b*Ngc>CJCRy&_ZK3S0QA;|hBu`zEwDq1
z<zO40?ro0sF?YdHu$K*Uo5;^?WhJHD7!*@z@Nja$q58~<%U4%P8<|G=h(D!3)~32M
zji|(Vftahid3`O?{D$O70>yPRNqIWO9D5L6M(6}<*Kbcn)@_h2#LS#9={pnDgea8r
zv0hos>l|i`%+a>(LU*sk@gyC3Egi_*fIX;@lj5|TR&pEcJ*Gi1ws%(2FDWf@F)hLF
z#YpbZ2P~=kwIFcWNcpwq^M4ZghAiELINiK=t3upKVx+eFqm{+UYY)=d<1h;%yJK-y
z(OUB1R^uxoJGdv?MD6vfHy%S<P?@?5WBL0nDEwq+8Xq#H_gBa<o<oVm<ZSL|hZB`$
zljyyV1bG;c{YuWmw=8#7<^ESzj!4;LjE(eLR!zCM0m%J%b&A%lnUFMA3?y86mSbg*
z$zZN!nI)t|gLM<uOL-Iz)_~BRwq^GZ>eCnW_Vp10L!)$uLD(j4LNW(3+PqHQ-VA-x
z-o*{Q^vJJyXL6?HmR`S45R4Ce75&CPBa)voupT#89J3=eA{cXh``Pz9bHpr}#ft?E
z>2@liO1KppiVspnH|eFULEnOMz+i#i%WyzEil{Xqs!c#28g=W_T2>+!ou*6IufULf
z+-uALYw+5?p0%vS(~tp6j%ckQn(l4Z0L3{_3Z3@obUW`khz;mByz&qm)m&=5!~Il@
zO~c7n;<cl4`GNj|0Uw^5yhJ0EV4t|rj$O&?7bkJzhDK{{>7~-G9@y0X0Eno^_Q(L5
z4Yt^gdA()dDP`xigp5`t_d`B2o@11|Tghj%ihrcXiGfq}a4!f7$NCnF%Af03t#$1_
zJ4!5Aj5oD-x#Qu9pa4(=id??NUfGow5VUMjU6p}W9EC?9ek;D6Wh62)<M7?18#%3~
zN$w#xIEkX=468c|M$xrat1SYazY(q=8WniVg|1uuyU$<v$-MH^Os(ycq%C74J&UC7
z8U;6MR!Wccg+4XV@_$Vf9b=Y|erYow<5o>~a&VbxCxAP}l)}tbJhUpHq>Brh#V1-9
zQ%%KnQ*^|%g-#q1atuMD)_hYtYn{W#b!tP-BUi{{lAPg{$>i{$_elfsEnchZuRS<Q
z#EeLI?%T(Lvm0i(92IQ8l3L*`WWAn3Ez{lY2bmy)K)dg%#@V=98`ECckbVmx%6UDz
zzCtJ@GGk`A?^v}6BnIT1l<r!GjWxGxOxaKF0g5Tq#8;!}iuj+0L1BCKw=3GbV&yH4
z{{T#ct1HMwR05}M^u7IS(ls1-r6Un;p;7WYMr#k4Own5@@eeGHk8q+`2XqBvGI8Y3
zzQJlh{*6CkPZ0Q}rd+xbzBz9!jenTAHaPcgY%QVe!bz_bj84(8VxW;o&e}IdDyNA?
zd~+G&_v;|STfEcSqujwRR`NLFQ4y=MTSXA&&N>Hu0_DRTQ#P$T5ycm``;2kXeN1g^
z8U`jf`O53tZ~7WA9mVt{Q}NJ6mzD{upa{Nq>k5aI`7HRXy`A6k%PW+;gX-YCxNFfl
zNxpL+7YiAEzFr2O4H7y~Vn-B>`^Ibz<Uc$2FmU6Ew}LC8zT3#sraoO&dxL&J`>Xh9
zR{lN#^Lc__C{p2a`RklpWv;%OD|mw=UfraU$s;fKQKaA(aq_6&NB$foPe|mr01&yp
z<@rok5{ymS!)(7RMQd*p04Y=LN`)ump-ba*@-Han>3_O~qGGaE>y~MnUN=zS-KQv^
zpnl<j;G<KQAv!BA{@%LC*J-700x_VK-w=L_q#Pt~DDqp2OS5lhc`dBT2x&wr85@-u
zV{P`J_gC@$jV&gU1|DD!2M)MIG95m1&7#MBWz@{pQc%M@h@~HqAaP(eKGU|^WNB=J
zNPu}OWNN2xw!U+1e|a7f?i*VsyjGGtgosqN#6g*3uH*N<L0?cciSu6^O|x<&oV&ln
z%k<_*0F_CN==O5KL=7!UARJN(Fg%)v-&3~7{Tj^BYFOp(S~^WaRJ)Gm-p=aA0ZUu;
zjpvP&GQ5yCD5aqd!c^QViXJ7G<zhAHtz8DMhTDFtOo+<AyKRE=4t|9i@Wy_k23Zzl
z3T04&oT4qsM2ZKnk?lL`nx8oldLE(Ify>2?3nN-gNFi-_J1mWti90$&kGHv-w{aSS
zB(puR3eZO(_jYY+Vl34!D3#9b?(-VF!Ud0(T$r9@QMk4MQlL{VW#oh8fk|92;sbE1
zii!{q`DvwWK#1bBESew}i8W6%KnV=3wE*d{({|>`_(&!Ds7uSi8{Jx$QMEk<4z=xF
z0xnZgya07x{yUk)zAn*TX`GMRRpeh{K%l2e6Qb!^&BBSJU`@iAaaNg2J9M|c#UsZO
z01eHdLIBtkG-4)hl=m^xx~rSQ74sQZGA@ew1YSd7;F&Cr2E`IwiZzSMoTrux2LxQ)
zM;x4=a5!=VkyFQg2jX=8_59NGMy%WL+q0t*?ab0*R!bD;F)VgIE#i@7dj*o?Av`im
zCBa}l@(|0~9cfpuO~*}o`N#oM%FqOw2Uldm;VyF5GiMt98SS>Z!`r-4)zk$L!*y0(
zK9oCww_Oa}E#*#s5J<TlO0H7+UhRdp8y6B=GbFLZLw#Lgauy$`dWtCbRcpUpL}Q61
zUhE_Qh!i_?ZQ4sCb$612SQE)_uOOc7Kbm8%<}b^#uQbLPQMgE<Re13`b@dvbaD#$V
z_~4$jrH_sA-UpJuG5-Ey<}7u*)FX@#gEaJS{{YDA8vg(W)J(=#M1QBv+T(7eSd6yG
z{;F-@z3|^Qx$*nxJ8bE0UzCikM+EIuVNN?8UhSbcv0ZN9Y?Abz<)-<IUwIsZJhuxe
zxA)sbC?E;OoK~G{N|4=jrI-jfY*vPz*n>lR(CxWqUOAJuhdVXx^ervrlF4&ssEfnA
zl;W)tf;iXvG<`+0C}oLYXnHrdG=Os>n{LE&&9X!2ZqnB8n)63{#4j1Ud1%}U*W`p$
zE|<|-F#*GJ{-4Rjp<J1E7%Day-?*MnXATeRo_i}cc?8!Xctjn2!KGltD5X?-dTT*|
zs-Sg8FE#zeta#L6c8@FENq%V0a{CT%$gxFG3a9tF`s<m?tDG5o6|@#tFh=rOJXh|k
z+$!9m<N}vJEhFXl{yKvaRfO{JMDqS0j>bcGkCNH0u7eeeAp?-HINpq{P*-YGR|qMW
zD~ZNkT;nZoL6BP5!)_doM~tdV^{p5K^L%$2s5q!e_ueUtx5>1BP-C+($WkJ~=3^R{
z<S06A)|#EjkpQoAzqR4^Fe^ELvO1QaJE<TVd@HF?Lj~`Qc>Swkn(lrb>w*bso3|86
z%p_dhg%w&_GdA>9&}Ba}1W3iA<zDg+=IWN$RulgKybe&Pij0#&f-&7+m=$*ekN3Lm
z=`1IRb<&FpB+R1Ru103M0L;!z$-M)US4xBN_#F|Nh)MUxaW<I@b=E1GR*L@6+*-pK
zBnmKepmIlYIY(^&0C`ysfhn<c$bieX+XRn&ig+zn^2yE3fmH5Hls%_ovP!ij4SH8&
z>7#_X47QH}^HlRYF=Kq({HiF~t_-Zs`4m7;dN4s9*kf8a^#h*%{ysb9elAv}?pq#5
z-Hb^uUPbYKTpVs<Tq_wo3HHeySxrLhK_8uTuA5#(3U=11g;(qb5>~wo87<_9cjKOA
z4IEvsdrrzeZqOXUo~!`TuIn=}9eta1aYEGI%orxG8M=nz;U{}*x$kokvv}4RF)-QS
zY=bJ^T$VpQ%-q|Z33jVijic)HW7`x4ap=Um*ZeHZ!LpejP<-NF4TA46Yb{7upfv@M
zue?x=m{+Ab>k2b4rxeYbjFqlI<`jkx{mK(sbo~DSJu?LGk{s64I}s37U<)2citX_~
z<kL{O5$eHAuMO(8hAFNIH!`GePF$vDRjoh(QCYg4l<B^-CwvMF7NEEz774orgSu;|
zEpfNV^Uqa|D?>XuP^F%c@IZKqlU-$Akv@+x;2mb_Z^f{xt8w>p1Ay{*ZRA^<OAg?U
z2_$u%WQ|mQ`=opEk?O^Fpy)d5(|k^s+O<#JUg1r<N<!t2E0g9#Jm<tReno+T@5Qpl
z#@<+@XlyK6IPEVILRq_Q8IFxz*-16&vD4zzUC2>yQ!(WwmJf{wA9$95q)U}|w~{AH
z2bD-!6Nms7^rL&X{Ip4j3qr(zpv0!!ZcaPIRasNA0ale)wCa;Mjko^iDkB5&8qd86
zN6a7e+0$Zf92ZZX8TQqsG0S$ZVT@b5h(W)Mte@f0mL?*DlNwYWCbR_U9Zs@FURnkf
z=8?aw5Tt5T759k!zkeHt$3K^}mXV|=i~tOhhj(fD9fY09Jw2s;y)++w;Geh&%-e1C
zNIk<|<?@SI-$x2g@JO*sUfi4;i3_oz>FKXf%ITuU>m+e$bzyHkt|ADM9Ip~(S>%Y+
z_V(nw)Nj2PeFTu~Pnh7Hj-o&Yz-`skro=N`+}vlt7K+?p5x^NB?eGJKxk)tQIS}1-
zOOygQp_1<>OPNcla(P7-!Jl=8+A&g14-rBBvC-ic43=7!de%=G@ED?oP`6W+TZU;B
zdyPuVPIaw9(w_}WRX~%Ko!G+3I?Hy-mhw27ODNnbi67q`V;vWRRD3#W;aIwE@_UaO
z`axxO<jUQvgd}@eN#j&y9V=hqsB)x^D6UXj*-3G6bt9w+Lg7VN%FGr%G_N|=wA2Mc
z9ZBT*JYG8^#^_xAMAqqWuAsGJ;uKU3g(!6abZ}Ykka;d1{!UP1Lo%-gxs}{C0jTyX
zQ*OKJ%FYpt1~V_1!>qTm!!3*xCAG4YFD#w1prRG_@BsBS{;dl2!EOYVXjIn(cgb?T
zFNzC_xQ(t-I{yGLOJb#^_OJ;Xk;W*-{^9N_O(|U^rWZ&u-ce*st1iWA)5&oc@(gOo
zZL&7yTHQcX8z_#UIQ*;vvDgvQM9({ZV^FPfBB9oKd5X=ZUgGlp#LwKB=9&br8I-Xy
z_MK`E8v1*etu{1lOd5o|W63pmG;_n1$J^S!ayFH3F!Hh7#UplSiOBY=F3ReCM<MN@
z(%Pwr#LKtnj{?y=s+aDq@pj&4m`mB(J71~0Ra<_oM?7O7Y{1r>ba+YN2;hUg$~ofx
zGO%PVFC>uT&GS~%lQdG>%7&>T&V>vtqz3-2MZyRmhYOf8*km`@+?rxMlqJK%YYc-w
zX7UK6R%1lHiO}{W;*I;O`$X|R+LNy_tN4h%Xko+a^z3)3Ldr;RiOJ&Dw-)aM+)Evu
z;-Whz6=_lA93zHMUOrP(g0XHW#4$Tisi)9LOiPBth&4czi!xq(+_>+pzMjGfbD91p
zA&hr;Vz9>9(mOnJ#R8;*n;>V1NR^5>#Dm_-bgo|l0x~?l`@tAf7Gac~)!Xs60HR+%
zXF!u<R67#XhE6EIf`XOw)?~CaEg;*e_E!v`iu&^}dY0C<6G=Sb<cb?;q`CPB<Ykj(
zh*0$-ewrP<86f4Ujy!sWT^<Cr5X<M2nnZxAvnrLRN}5oQ^Xm2@3zN7MIERpaD6Om(
z5XvGpzf(CpRbE$(0~4?UA<ecOIcOs79EGfATNVl*x!z7ZKg8m+#@rh(Ce5n}1*xlA
zV~FBJlCfzPTZa@tr{v^KHrA)ae0*-t-j?s%R@;-|LK;l7rEtvEeDI-NUP9n7+=E)T
z;5z<$>(#DKziz(d=R7vyl%c=z`+^PTHTO=m62jZ|q#qw&)DF~w9a3<0s`+<}c&FSv
z8~Im{y2+8@*_P=MT1Fs$g?nyy6DZkakKOR*DXJ1e>mID9db#$`?ft#7{{Y1vK+@Yu
z2XZ(gSs;{8314j2C{m$|eAFYIITa1L!PaU8?;!MaZ~G38<i@K}BP_AUZI2Yomdf1T
ztH~tI+Hxhgi=u|78mgXLc^VJ>hKplIM<|+YC<V?a70vr3@r{6HSvz&DLlAh^r8cIS
zHiFZQfXfi^Yuu(*@JD?s3kzC3-bslGZm&_wS`F*a4!Y63e<WObw+<^50#Q#i%*Pmy
z4+Ly*G4q-U?N}>a24CA}jff&Hbu}yQ8cRoM-g5r{lMagE+1Y|Dqn8wmHcmpbM*^54
zxK&wGk!o@37QGaoPvKoY$VNz)gFuq`HJdR7AtquD!m)~`{+{Y;!BsMS$?{dYzqiD5
z<8NpLlY+6%re;vU?gI~Q#)nW8L^4piw=+v@WxiK~wxMO><U<c4v@1%EEKLBeq$|s%
z5sM?@b_|IXl=sR&5>If*l@#2@lxit8_0*^Z7qjxqqb!&Ab2aWC7WJ~Vo-|VO1wDXV
z4o87C`H9<5R|S8|Gt*=~-eEHFg_3C==No$~8kBV+P`||*qPpQlK|+Gs`1Q6s!tb)V
z2;w)lim_QmF{2`vQ_dv@tsx)KznRoJy1nDmUg9t_JgShrv`=#&;!LQM4loD)>rTz>
zuS6WW6{s};6BwLH+qn|j4R31i46k*{Nt_mu&$*e7<dfFAu_@bjo+Z~AXbhJ!T%qG2
zWoX@fIHG|<H`|SMF*H%Bx8^5o6{~#Cat4R}>8)DfxkHeM2`#0NbS#u*4PLCHko`-j
z$rHIO#+#ph+i?k>1%D<R7nAZkxDwv_*$h!b7%LgbTq%w@q2d}a9S=7_?L0Q8K#rh$
zla0J$YJbbX_25+Bfs9y&mi@V`;@aX#nQd+-A_pwR*mI<mBp;6Lu|JNq(&DJbGcTP8
zmYj>h$sm69S}4T6-`$|+#kv0gn^1VGMb^{e_qUK@t+CfPcNf=oN|48J+@+tj7Qz@2
z7#7>wicK}=C#*j6e%UT!srfE%%Dh=$Gvn<Qv=Ji=&Kh@HxMugN7M3W$iWX&HWD)Rc
z`suAcQ!JA892>p=06of+fHWu^UvjY?Q<7<(*ZFQv=ySV>$y<4)P!u$5-tg=PyOH-F
ziK#2=zB)i>ja#>~ZYW0GD5sj6F*_5#O^%bZydi~VF?o4qagDUd%Z&Qa5?SJr;~Yl*
z?j>pmS|9Auv~LT+{U(Ukrp~#*Wc*h1BW;JWzq-kse_<xMc*L^Yw{#^SR3Nw;3Z9F-
zTUPl^8HBUId1chazP;zAM1jR>#tTdxwa#kdC~a)*+{BAc^!JxdQysT=V@i%rvC&mQ
zENQIEP?Vxh0C%0SZpUD0JA}}z@62&Gep!sUp7KSTD|E(Db51MIW#FUq#;T!EdJSo4
zb;L->iW02?#g-vH4I<$SO-rSb;kk@~v!ijS+|+(Py7a)faF{WoVZnISt&UD>iDXOI
zZO>{TRDxtGIX4qfc-L+8wyNO+EO9E(j`Zi?9yxpe03O>xfV+y!kS_8urPtB0vImT-
z#nM=&6{!lS#ZSO)6_&0-)Bfs#(l+|B!CL5qVo4QS7Iys&u(gTVECB4;W69<y3FP71
z+|~8p_#F#PWMWk`mVa_q$zO=$tiZH_wQho)dU{jx)3TE-n^xZht1eiJrB;4T-Bugz
z)`}`U1#~xB9^De<{pXMP?jpv{;?~-1T+z6WD>+Pemu#ezxxKG4vGe&>o|_73ctYjr
z895RhaE~be0E>yGI$Zgz*>Fgi{{Sh-W^Nj5hvzJavB@b_QdKrpP`wBB>rT~}z%+0{
zl*v@`*w?U;Vv$uGZS5twPzs;vIx6W!ksk@n*kUqsWG>gZA%XiaRD&d@oK!Fm$9?x&
zYm^4wgI-<9b$KP+jm}x5X&s8H@@6$8f$i(AP@X(z5iT-3en#ZMWpwPa+nx+&AXIz9
zHy?WX0n@Jfq|4DUY7J6~TuE)7j^`733(PGgX(A28I>zFi$fT+Fk~c%`H0jeq4;rQ=
z2+0G<cs^z(hS~~U<0B$prtXLfHo`{tlA)WAU*V$*fhj9Tay8*t%lYiHo;`H}$d=PX
zl!6s>7Vt=<qe%A+`%!}%0akBKwI*W}Il#oL9%<x0FNKyUAhoy8W$i+|2IM!<SvVku
z?660>cI!zg6)DO&E45+?=v)d3^)g?0MoT4h%|1sfd>;1g!x?0m=j6^<4nLZ=$89Zf
z&G_&D?fN3a0-?B2v6_&rL%BbP@zDa<Y7|1l#%8{jY4WQ!GQ}1B&Yt#^ibRZy+;mEh
zd(wiR$3gyGu^|B<Ww!h}u}W%h#m|P$s$StPe0wDJU5AR~#0}gskt|mL`<2wKREHa(
zA03X4?}{dxAPmv#2Lt(BdE}7U<LrEGdBE1Oi~E?`Q7LJz!n`puuF7GF$gktRm(dKy
zS5NMs=#SqOoq9mXq?lBeQjV(SsDO~PK8B`%Y4$-*9vN+uhAin@b)DkH%){KI_JwNE
z?BUQ6Q^f83G&_w);*AW#p*HxAR_0u4UQXO@t(hRcjZYQ}DgzAaSJG8KFg<JW8pQbx
zJ4xfxKFa6JIjFLiA-rbd!taq#_g3QGBq|<SuVzP5YD7Y=Hzf7^G<`j>?uiZ&Jrt@X
z+hEs{UGN-DwU?AE*OEg!+XXfW8vRFdc^8~*9A}RrooGhKsM}h1YaUzDt(2!@Yj7o&
z*V}P0N4U6(9n3MU3FOt?hUA6lHIJ#%2%PYvAW9EVs4K+rv~sAD-cV9NIx;Zg%Ub$_
zO;Ess;*uUJf`|uz*}fypFRkupmeUN@)5m+6w}-iOwqbk5J4Rx$$35c4ErTAN#vLj(
zuW-iYuulD(Y<dyd@LE5ac^p}6@`!k*Hy0FG`x{iB>LP&08k?RH*WO-Yc}jP10VmT~
z7p<1c0}NZc=ucV(LNP=!R}jgI6``Kl*`%STwTWdQ)7NpOZMH@j{uN{nV7`31#%1qr
zr@cfnEUwatc_=IiBrxP_uVC>MPMdO1ID31Gj~TI%Q@bpP#zjy`07=A2O}|Qt(^<D}
z0eVkB8^IFUPv2<szBW9cIf^#cvH2uX0guSYU%w<VMojkVS)CE1ZT*ND*0uRgmeg9G
zw-ftKdOu$3hpQ#^12Li}eVmR<lyn7@psy{#1XjC>iuDxH?aW3YY+16=OZW3$-p#_?
zbB*d5s0}N=#E*`tJHvX=v?CDZPjBE^X<?rr(DAic!}p{deqGmqJDU789{Fr2t3Q#p
z2#+<BdEKp$k~`*=VaiU6CC!kGty)r8xN7=(&~fQbhg%SaP{RPc(JqzkO{nf0g8YJ3
zE;RY;>#Hsa?;rN_F`R&9vXitO)E%o}gufNo2IWp45l&ro#HtyGLo*+4ayD`IEgaoS
z2`_9EhYC6sJBB}pT|g?rsQ&<o*<s|tcN*FRzO&^N?<8y9Ne8bgex1J^Qe=z~pel)C
zO|RB9_5MCwyuG%L6n1F9lczRfDpr6}nyGYlb#l(dBD+^1eyZsiENvmHxn(@S092E<
zs9d7v05T&qQ7Ra&0lJZ>vf4o)P&tZ`CBpLHNB3i^ek-U7guVzaTPu@-(%M@|A&qg2
z*A}?mD;=3ayktU(MP-xzk%@Eh_+HaG9r%JVs;QL1Vf@bzCDnwHMV_-&?1J9+h<io7
z=fYInM=wSKjrbkXSSpy`8OcH6m)Ogzgg0}Z?VYW($srOWgTin>ZGKvhcC)p2<8JIp
z-H$;E@VWtILTIkLMk|O+cR=tl5&N9Z)C<3_!>*4R#MByhEx258>qvqJHt4HHA|xpy
z`q2Pk>UfdquN8J)bJBcQnz+Vitw3xdk_NYu2@DEDEt{67Vk6F2)0XrGj-s3HHK}J)
zDZzxi@f164zZIim?56?;s4VBsJbD?t&hS{x5?RX}cXtz(CMaHLyPdkIVoQ}KH3Fuf
ze08~@zD<~tD}!_v{ZL~PO1Y26;;mqcOm(DxPUR8uH_@!K+ub-4*_oqmP_4Y|v<{=8
zJdIa!6Eo^9$Y4iJ3;nsKA!rS|6W7}A!+AB-H{7GNyvN?Xg}Tvu4q0Qovyf1441?RY
z>qb3kpk07&<Ia>SZW#ehze1I<-^?(N4npeEIUHKHKKcnq2<M79o)u+EjwKQ_Wv5+O
z$`>9W^WxNfxD{p+9MbGo`M)vDLx#mdC7R+xG9t)AL`hU>Wgrl!2lnm0+GgFcY#?2@
z<C5AjUvGR<#A8;Cn<1T527H{xq)12Zc2v|Ndx-w`<E5`^2%-#anbjiw#?Rg2S!Tqa
zqr13s9Fh=>&b(>t6f~eTKc4&3M6lC$R}nD@KOLNk-L4g9Yl{?7FVsq<B@XRQBnquZ
z;A!yDF4+?4Q78f^j$3_0<`Ugm4Dcb6c)N@oMrg{ZL&2M3Sx@lLVn#yS{821Kf{tF}
zK9cHV0?8$%%84SIfK?JN&V_4HR5j`7be+jNKJN|P>$MC+?_J|^oCBjj0da3MA@)A*
zAGS~BrDb8H;9z`l`X!mzrNM=`Ho^c)#)_&KQ+_H1e+_B>046T`(=xnBW&0r47<^Jn
z)o-LH>7x(D2`6D76f|D6{aWZ{xg*aX!jM=xP_#-(7G6xE+uWIZm57scvV&G}P8@gK
z5&oS$Bl}zIy4Vkx$-Gv9g!4-qyQnR%ZdGNuA!4|1VxK@H3$#V6wFez%YIO3-@!YmX
zqS<E0ZyZy|^DIsZGi{23Jf36_t#@<(0Cb8<(y^^SeT91bzY6JFT7TUwx{$_(<?l4k
zSaAJNyNSNwWhGxAxog>y##rZ$@@V*3q>;BI?Z?C`e&FaAf6GKVn^ETUe0%T-q5NB8
z+W<VN+Oj_-@!unY1DVJowXCv;d$GSYjhRcNd-T~&DXqybFA^7jZ&w<=Nfg(u7busV
zoO6oC^HeH|AJhRXqtuU%tGRTl7V$Oh=h2CAxSPzC<i)r{X>B9%MfS5Q@#Rim0j0l}
zWDqk7@FVVkBUF{-o)h;^I3SE*t{WW6=JMVdq2&`+iq|dfNJwvRQg<H@MWgcCW0;8+
zjwO%Vj?9?^3M06Wv))d!8T_9MjY}|v@z6snS0Oo!s#HdKtTiCW>;n+IR=l}rTA!7K
z1X<o)AAP!18^J`rFBT%*<hsvXV=Zv(ZEnoc_R%L~Az_>mAul4P$h-0Pe7ouKAt2*=
zk$9W}p?l|^8INCPL1eq)t8pc?ZzD*x_b+ycwEqB9P#;?A!UV1|nM!$&x%kH>JY|D4
zWR$JJR2)4xtXmCSL3MYO4)f2hW-KDd+Q$o##T+!g+HM<>0QLB4L_*|S#wEmf{fGQp
zpGB7bXx)5o4IdQ}4os~)>V--|4OMHqZ%&of*-*X#b>d!8k;-LfOVEeED@<8ef+3K9
zhN=e@UvITX<E+1vu@h|1ctX1k>rlb?wT5E;HDwdaXBy7$#52UClvR$u-eLIDUUyA9
zvUq}H1CKRW-JBys<tT!|kFdd8Utb;MhyfP-i-^Z6fysvxUS9#etTaH#9x9L$oS1J}
zpOJZ0?k^!@Z)a~?i@=k>8T&{Vog<TKY}`)mLC1Z;*1nhH;znbPdsEh+ZA=hVoc=P$
zC1Gvkc-w@I+U37`-r)z)_etVX<O*>{-BNGTJy)n^#LNkVhu^EkmZiu8<{yHqW3iQK
zEF*x%mg^)l+Fh1cwuVE_=O1?>ikP4LYe}i|kM9%2m+y!S#Pl})U~%FQj#!G0uMx~|
ziB4V@J)S$sEFw_SmoANZ^zX_<CNAJYSy0IoNG1OHURuwE>0Fzf9}UAFzg7uDaLH!A
zXNS#C<u^7EqxPe`Yju&@wdCPJ2+hcR!)<A4vce3lkecpVkBeYe!f`O;;*Da>&YbTj
z_H!&r;tMrs7QWT^Z`WNZl3h^55y=$3y|%?-8vg(*c&Zo%dzWCbJB8dru6aJg^n+jO
z)g>I>R;sK`Ru|?TM*dBft%3;Pw&rbQ;zW}&jzo`8c#5e104|%eXh1T(W>_oUIf%B0
zF>M9)%NQ@@y0N&rTCyJbSmT$A{+hbT7*LImrn+;*I~^eb&p*d%8-&bUWAccZM30Q}
zEHg_nBY=&{JGov=c#_|l*G}GHM4TjGY8JDD&syd@c6)IYGu&Cqu|sfnG9y$mUB9?~
zbJt#XUn6QjOZRQsg&|i~Jb&$sSz8`3c?1^Ms^O9uV+J>^e{Q^kjke@^9R?pLXK;f7
z5ZlRf+}+4>CL@Yj^L)g<H*mHqik<Fw)vjbTv?y4wG<4mCMGwnQ&su*UYTejo2c;6<
zlvQO}tmaE8C~L_BtX|mBentNPXRUuaoom9}V?Y^kD0dbs%7mz(KY=|5^=j%CgW&%F
zZ?ZR1N0rIBwu5K4wji{Z0ItEnDr1O^i&no6j+4}C7#PFV;F_=u2$Qi}B(q?AdkJ+p
z$769ZJYaov$k$Rryg@4($+vN-?KI`^8qK$%l!?xB6UM8qUAZ#mW{Vfk{mQw^c{PeY
zPbS_WkGz6gYbj+Vk|^Cv$`CMK>Gu^R)K-<&ovjtPK*S6|@5L;APPPosgk$qq`%mWi
zC0u2!%PhESBqDo)YAY?#ZU__5A~8GeI_nQp{{Uo*N;b!vaj)snff1Em$78%l2;}(Q
z3ev-hafa$hg!f3tc6GR)oT~ep6(WOSuRnX_H5W<EBGO<+>T+i60a~S8e2zv{MQNYd
zI7BMmz&#Y5$6rk<G_sXh)y1+`3fC;vX<v&9H@ce*skV$XGT;=ON@cyegolmWyvKr6
zpK~ov?e5?fl`YT@;4~wllRHptMN@5-WA~>vEqs>~JjyRFt}MIKyuS2F4AMqqjv`7a
zbd49MMFO<fulOlS?Se@l2_FE9VnQ?_XLjP%;&^ZDAf7e^q>z8Q%#3S7t~ntBy%|ku
zt+@lZ{Zl^M;uJRi9d&-l_}hr2w{AvxZdhA=GN}N`a9sA)YJH_yxl`k|uNk>xM6}_R
zJ6e6Z@mxKWH}{wAnw9uSj|5D%Q7HE_FnUWkH1_O+;tg~gy12yU6`th0t|k8fHSzI$
z^Z5;qa?>s@BvpG=Q*U(`VzI`e0yvPPiu+ol;jXu+B(&mUd%JMk6&CI}W@PJg;=l0m
z!Fb>8FVdslXD*D25w0eSlKY#Fu~Y<K%DO{UeC3e=<joO|C-h9-hxS}195C_=i9V;s
znj;nD32bhplj-AVVx5bynfgZ3Raw*;t!b=%HP~5}OSF-%ZeDbt>cFxYwhfRsxlA(Q
zygpoV<Gg{*<>(ZbEn#rcOwz^xQxJb^s<PB`Pt0`CBl4y*A~Evu<<L}tC(RX&d5?!=
zaNb9hxR&(g)_W(q#$;47G|{_u81elwL-IOw8y<vL)9H0kxo$aV9h<+VzAug{&;;&M
z_$~MH<>fR%@nHmcTRDUFr03A8H@Z26YFBh_we;6&fN@8b?mwq^Df?G4fH2#;p5$*l
zkm2LEj?|mXd!k2(b@vxQIV^7Pza-_iGwol?Ny5>&BIl>{@#ckk(Q(00@La6fFE7Lt
zkR)w!B_$Lk_(a{sQBzI<)v48mbnLX^5vL_s#AVxV3ip(xlKD;ULJ5p4@<n%U@p0rE
zw;5&}s;gcs8C%s=heN6h!LZwJs<5D9m%PvJH<7K~tX8*hLo6|oaV4^cmPuGDw6jSl
ze|vwcN@<%gaO5TAT7eGpd|Y=Hv1X0jlH58kapz*~GPgzyDixIN#(-0%i)`W*uE%&w
z-ZAxm&S86K<YtVMGsdj^BR<Awr$rpMI<&<KCWw)A5VdT>hQ-`LdSjj#;U?XfFC=BC
zR#!Xn2ch`>8uOZCBXkhrwdha4lP?sJ<(2|`YGuvDl3ZUoYvRrsTH-LPBeN1KRww0O
zU2of+Su3c6f_Lvs9tq|CJ^YT(2S%{SCov;&EVoj~?0NSSaCq@rFs9odO<{bW$6cM%
zba|q}oEw1e=_Me<;Fwc4f|~K{EG~t`>}c&Ei6magN^jg?l_i)DZGVQ5x$*ZQ4D!K4
zNE)c}$0wb%w!~V<@?<BFghZs0NeYfrBCn|`0n<MjW7@i81qkoi(Xb9yS>#s-C!03*
zcJREN#zgEu(McmQ`CVIjttrrJ(!exoR7|;|(tH=~MWv8}_b`cllsse>CZ-qx+;=JS
z$@qM~+oZ2*Op8YZNWcn|-{wA1jPVR{d$)@7$t6zZmM|+9V_$5SDy4WtqtlQl)6|Z$
zWS)gdPOA{|Pb%_nD7BL7Es$x}37OK!Mk0v=kC`}tq>hw3f&6szVg?F~mMGNa>DIrF
ztg61JW_e_3iq+iviKS1+roL}yOzpv5Hka!5%C#%#LYmipqgod1h~SuwwJ4MmUroTS
z^dtPV(Le~96tJ|csuXf^b_1_aMFG?mQxz9^r0!h`_Q_~r*liM;JA8j~K~wkz(z=_X
zG0Hcg5Efzg6B{Rx=b^T>wwmsH8)b~Gyl*W8jM4@62HXeOSJzvg4RBf0yvT@@jwB8V
zh**YSF=bxg!)>nN{#bjMk1MxNZYumVBH;$=(f2@Vu>3Wr8}42tN*qUpJQa+<=;U9x
zZsvgqFW|PivvVc2?ly68abnRQsi2`Qgw<$_yVO#LOwiR*Ky8ZBJXgNh$Dbfgt;DIy
zD;JF1J>*iF%Igs=EjRX-<4;Xs>g<6$X-=+NhU|OYC8)MRWmn0(R@P;L@(9hUGWIRX
zqO7oh{>ZLkT1Na`kM1V^g`1|UV_hq9F`Y&=-`B;41^SgO#xnAn9xJ%;Nn>o8za_{+
znaEr+MwoSo%83oQE)3n}2_rf51KI~u&<^&v-Dv6F97%9S?#{COlXfFH6Aj4h51#Tl
zn&R?rp^Su%J2;{UUL;TuM;fXs#)zs14x3PF*7OmSvrOie*Z`Z2t9G|KGY^g8?O<6F
zC6;CP87B<xuA-G%k;IQ*j+)74A+ow3zDeYiyr0BuuQ|7EilAx4RIcneNMjqHa+<K{
zXs7a}tKQ$DoRQ_0aR7>?OWZBqHxYdym14wVan9D}NAF1U-AJ%UCu|>YCa(g1UcQvm
zP`??@Blpf#=oQDuk62mfd+rL4g<oU*k{ka361|zCTa+?l=8>w&I{T=#^y{^ET8%Y)
z7!fhow%d{H%v=TsZ_{cfkGHmIW9BxNj|5{Y3Qf)z3Q22tSR`xCgz@J@k*MWJQgj(y
zl!znJ{{Vls99K&g=-a6(eaf2VPokF}EU#%SNL`QstjY@15Nqgv<?3xHjd-clS&)D{
z%l#I519wPj3K|gApgy(Ji2;gvnh0+`-AgsBl9}=Su`9_Z5hc4if`T*Xuf&hQXmLJb
z6Bh@RDp5s_wZ>XaaS(oEgi+O_Rx~fpY7jl7f@(i4c4B16mgg9Xjk%(kOp%T&Onto`
zTbR<|_djhODJqRw0i=8xOL8YbwP<ECDY@KYz`T!zckWm6z8`UMfsQ*f->bzu638pa
zIuNKp8+T9!4N4kTnji8b3ymruUW-tqt9)!$khR7_(n$LhX|1PY6G(<+QX&S9th?{H
z_|vAH%=jgJri+l6yiP)`d6Ee#S<H(KmwHhng;GnXATK2hHrRh1^q#gkfR8s<wh0wX
zF!|n32@E)#RBt9nCm@`>ajQrprx?Xri|%dr*mc)fx0DiaLcI*q34_HvLdP3;`HSK%
zGuKRBdzjUDLehJr;67EYcYn^EX`$S^E0h7FwmW^%01@d^X1s$ndo)(Q8*OtMf>P(n
z8WvatP6&`%#>%Er2&jFiN8`&zwO|UAIZ4)k<X%4ov<+;xHnwxSyq7r)g!H<VoY$0r
zd$J#m0Ht*3G*l@zTU;fy=#pe!_90Uz(nP^_+h(7K^zb^CP?>BzGU9whnTN!+mKAMg
zxfhpm_bz5g(6Cq#2+?`d+U*$w<^+*LQy?h0!r9VQ3MfMc8+<nFLN(%}5gI2zUbK<3
z0y^)pp$G8M0asQn;gyIyd2Tp?>rvEGLX^hkSxJI7J{_!VCAz(k+g{3|8!JVlOw&qx
zal4L@s`BT*x`Sb+E$QS-%A7XblQ(5D(Ot~8(nglnHnG{*z!o^;02w=KyjPs9?maj3
z(I;-$b09Y6iyI;pl}e|c&0xGw1rB#E<!m13>FumyAY5I^8U4kIq;XYhcO>XKZb%wN
z7Ov{<2*y9z;_UuBg$DkX4e1%tWd3jGzDeg7$pD5pG3z2eGZADGMT{kTY%W0V%D}Zt
za#gAM2E8SX9gRE6AV+LAf1=XT+k_%(j@a$cticD8No0dR1WzrbY}Ppkol_$aM(y^`
z8kqroxHjihmbTcJkoQpvc}*kWgWOtVe0RxB?VX+1dU(OOSd;6T-?#yD8?o(&>cM$2
z4f$z|@Vy3b&&9txaa}_l>Dw85k+89c$nPvxH^25Rjms^}z37Dyy?xG}?n9c<TC}fS
zdEHE2c_S>qJ!stN%fAG_v@RT^F$%6X6?1iV_jkZaD4|qW3=p4CeL&^v2KsW$V;Ffw
zJXV+@5n+OIwtIVMl4B(&PzP{Rm1tB{4xhk&8oZ`TT)@b-<zD{)B+3VFGVaO9EPIDd
z>g!F&9<=FALuM?AZqQs<#T~_?-QCR4T0*sJga$xm;>{FjI&+hv@*o}RXf!xOG32dq
zkuez5q)$7$x`)LrJaZUrjn|b-)8tUqrMI!i%39T4L((>ySzFZmC_Wl<<C<BnA53r+
z#hV^uclmImp{tf@({f*b277!z{{T#h&CDEUOY%Q;q+d`g?OJ*cgRc5&I=s@arvy7?
zjSW!<veRA94pQRKE=8d>85qatk=OQLnm;7t)py>yq@ZF9LGt20yK-B$gl7l9@7u$r
zIa__W$Uv!Un|Co^p_8+26=aAg<WDCQW4%(UDeI~Ol!5|K>!5u&gt6e?X|OlBHOghK
zt;RA3B`g|qH+{^H!I`69kVnVbK4$n0wBPa!qfAm4t%w>YKfAs;KbKnA#e%wmD+_Zh
zc^iwV0#MZ&0;CUrG6|=pHER)K<pfNz1!j3)0>-8(S0Uw=%{DbAoTaofOw6r7LXuGX
zN`bik%^BFya3qkr#@qOwS{!J*%u6;wV4$<Mh%AwDLnEP;g<V#y%7g+2s@Xw1og7|i
zIK^KcMdJ9|p2faa?k@5~Y#uZU>`i#Qu1G+qWUVR}paPl%^w7HbgM{3*B0veRGM+*9
z($@6Jo3Y5>WDGrvh~r@i3Z$V%+%Ci&#`N{m7Oe;djtNWiAkhGe%6{0`-kX~Y9q3L*
zkV!G)z>s*FyqY0{;+M{MQ9I=P!yS&jxn=SEWu4-ra<$9ynn#V)cEM!<m{1Q|d^9N+
zV9tqZ$&h@?v>8vhxrd5q=8qAMVhZ!Oh)g~tM&oMMi(Lv=;TmJmjx^C2p6dOQXPMpk
zU8URq05r~B+{aE9$;wEB1715I5dpU<b@bCV<Rl$d{+}aA8XisfNQ9r8*W;_DR@3&E
z?Y1>>@0#*ldt6^mc;c)sly(vjH@TKG?va_jk)sMytU;=fIZOpf6M#<F*B0?*;E_i;
zWRW|!K$@v65kaT7*Vjvvy$}WnWK*vySLP$~r^Nc}$!2AwiC9AN%Ao*fK$yR3!(a*P
zzo+G)=B*ItHVKBxc<k1e;7M}hk2BRbp#r;dJw7^nO<}4~1(R~J^smMzm7<bvRfXAx
z#MG55zW)HE6HT<t?#9q}V3;IZ6jeO0%rHJ1WoMGhg}j#NGuYvvlZ2BZ`+KrT1NLF(
zw>lDkKs(bm-HY1!h7DvI)Q!3+(vh9q#bX(WZ#;)2-cjar(Jx0VekK)Yj~&`5<3{&I
zZSMAvc|XigBVBqcd2Gko)5V+AcLH%d<;!&i0AXu($x+^Ca&pG@*vO*BV^q10+SNc*
z1dd8r)z0f(&aWLMx6MC1_@A<N8e&v2_`YW9*j&hxh>a$W;%5;?;i@?+%;*Z&)K^A2
zh(xyC$imccbbrSh;?88oU*1D_mn<T&wYNW4I!?+^G5{Jbd(~>h*V9?r$hh|{#iTKR
zC?m(S?18AXKqc9>-kNeE+TSLS<1E`7Z($NZkVw)@!cHFMb0MgAA2;J#YbGsllOa-T
zx9A528fZLx#niS}0tr~oQ<V!esmLA2BS8CsT31dI8G>#>#T9$&w^>Z^+(&QR`TL-R
zW~ilE75It{y)<ieqBXNhhf&!}khsaginxMK7fvL%;@nweKP}Cq!`s%JxYby7qfm>;
z03BZ7h=G{h-j8G^7uh>JEK<z{jf0sM?mja{_ff5Hq>?y+Vq!?xs2i^J98E&^<(Apw
z`Z?R`laKbTO5yb%3_?P<w(~zO@@3CqaT7O>T0$|n`@6^mm6@`FN9-!i@&nuA%qrgQ
zwd<{$I#|NiLTkULsmI?OYO+J4xBgq=xLY+$mS)o8?z|e?9IbNb0{o@dj9YXB9rZuP
ziJ-w#+aUz;{Ev_3vl3--__?A>ZVoaR5{Q~fBP+pVi5a;kUByV-b6RM6!R^{DxoB9q
zArraikL0|sH7ea%*j+J@sG4rnT%1JA1kl^Lr5oDb-j%2nCrDYDAQ|9J-ATI|Yjh+b
z7Cui6i$`;BJ~B0t<odjA?7<PDT5J@G35|Ag3fFFx)4~!MKT@WyF&pZ$&p*kwPdOfH
z&fBqzkx?;(Diz@sYOTVm8=Y#W`!wyB0ggM?WaR7IXLtS%?v07Pmeewy1wUye8`Ipo
zFlDaw)d_oz{ul547e-r^tUfVpd{#_^?qQ!aS|c&4v~~#9xMT!3CzY3g{69TbbIo%v
z^w+O<-E;$#qmjIl_VP=Y3Y^H|h$yJ20-`VAiUN%@5W$sYCoYQSt#VnMz__!C?`FJ)
z3EaSi&nAghPFx4+m03wY$57x3qFi|w<QFjb+*Y#6Gj~GEv_01GahiR{s;hBT2dzzi
z4OHBG1NSS&$o<8;%6`V@VIN6(^htL&X(F|rV!Y#y6&<Q`t8p~1`ln0Wgoq*2tp*K1
zu3dSJ#1ILd6}O-UTZSFu7$mER=s22HThhLo6}>PDGD1G0vkb0Y_sB9LNdwJ$Jc_YG
zpuZ~nRU-r32_OKYWB7W%7>I<8+oo%5`)&62(+gz;*Om}mc?Tga&D_+PZACdGaEkWT
z*JAHNLv%Z6Q@Oz}AenQSWVMP&VRaLb%{9X@pyCTOl06T#M&z9Y10`sH7HMss$axk@
z;^x9-gT}DO>hI=|4&==T?(+!0xxq?uRj=p1z{Cg!-cbZzr8Ns_VKKuxg}I%Ru>mTr
zMimVsF$*1kNGI_$_3<Vkhji9-a^BY~X(wWPNp1L=J4mE2Dg(qgs&wl^r*yhe7nHG9
zwHV9HQJ}ns?$$>D4&bLYT6Ajs!~wC<FX|&?W+t2zPGT4uAl`5H0~g_#_~o|xY`$V}
zRWYx~+uH5zd#6$dEywM040@UlspzzKb+QwY%WAZ*M*ibbPNboXrBXbrFUQ^7-&>zA
z&4-&9CE@=2##?Kf;3)3#ps0u|aSDsYM&6qAQ@0p3n<FW2+lY@z#PpXt@lFhqi(yTr
z^g@~C$z4r(lbf5G7^vmZVm%Tzt#~h`bj%<v#6|+2UMHDOThcJ79Y&4C!uIO*ZYMQ$
zRB)>#)GD6P6fp`7Hc`{5^wptlf^s(EcaNuUhZSlYY&E;Bj!J8heB@8rj?b|o0FSYf
z?1mtUay`By=C0ZVu1Ji~%Gi3J{lBUJ4q3zW5PQ>)+xEDN$on|UIDBbrW0qW|b0{9_
z_B8F2>}Tx6jW0!5Biw$XI;*I;6BjoeoEe+yPy4rVS=zYySeGQ4ZTSm`vP$yJE14DV
z7_@!OnT)N^iRerE@A(b2pY5J5*ro&k00yG(`Go5MmI}!PCyL7;5*Zk7#x*>LZo5?c
z-h!Lx@V8o;?Y6}jL;A5%-p`A@WfMDL?+9l%VH#^fc#_R5Ox?&y^k~e3{XwQ}hDNuV
z1;>~vu#b0x#N;xKbsgHqo!c>OH#i<rc$o(kUfCd(C6qUz^wn<p?g)y0RWFft;D{LF
zpuX}b{5Qtn$>Ngba~bJB`XXE^y|-xzj`B&J%J;dnffRrN-;ngxEZH~jxeP70SYkX!
zk0qb-SIp)h!J?MDv&l30-x-ZEnHlf0L%lyf^LA3*Q`}kvq!HGk5miRFArR?F!G#uG
z?FZsI>~vHWs{OdUwwA`_A+<py5iE1gNi5MKw2#Yhp<xK6j~{aoI(17_I@XP{B@xLG
zzrM4u%;RnDZsd+UlrdjO?*7<Iw_q_OP@~428>gY_G!JLojd~I7Tn2=y8QUv}>~50Z
z5e(}DoNr+Yjc}nyaYS<fNaC&T<J1~~qFIyTN@G%9F4@l5B?xoa)|zo|jg|~$z~eQS
zKdFs?pd7g>m2ua&tQ$?r(IXo1+reQ>##jbVpKnvcW5Q)%yOHIyonCxuylW*TWHGdI
zS87lV_OANO*2ZQ)xr~SOsrJbLggeaf)_E(KV*Yb)WrMYjIAD>{knlttQ6dL^EmC*U
zbh9x9ft1eOO2!NjJf3&LV3nq|vV7(8M;fi&;}~HABoQburw-wwjgR>>%prDOqnDz+
z<x#?g^LdQrk7fzuvu2$w6CDBULW?MA7F9_2{GujTCcS!BR~Dlhn)BX^8JPwh2zEEb
zb9YN}h=MSl$6`mhh$TXhcLae``D)U=5s6SNO0K5y>2mYSB0%x1wurHXoqei+{GbYZ
znYxN<(;#A^*fMaAMJJG2#WjmOFD>Lo7RAGA(loq{Xn<0+=~e0Q(A*N}pb#p{O#5Z_
z+x{7f8|#kQ&dnP=#nIf^g0wJ3eZ!pz4(s^?P0emxbep<4eU`U^BW(k~VXTte+!NYf
z2z}1)w*?-g*YMEe7hGtu&nog9kC55P{JuFYuq!+LP4v#o1;-i`4H}MIV<M_5cOtvc
zX}fmVDI_{hlOsI$5Zb-(`-QZwVz@Dd0U5b76zbKc$WP&`?^5M&O1Qh$`didkFJx{a
zS4WyQk+(hQkM1u`-CO?vcU}s>CP9fCaSOEhr7ncufyQJ*+t^wT<lJc#aZDH$*aYQJ
zN^Q}G--ejIsf<XBL(K3@U%3oMDSOI(<)@FvZSYSO#&|i4ASDrSfe@Z)6Eg*Q4H+uD
zf&*Wgn(vJG`<Jc@x4k>JzYc)U(?&ckV?iszxNcc*n8O?%O_jOENXuoBneOdrRV<T$
z{25!?#Tfe6(CgMmD*!MHPfrPmmMli+75%sx6@FhE@=VROw0K-p`7C{mC~jxAB1?3+
zaaRG`wL{37r%&$fr7V#XDN7P~7j4)&9&rhOawGJ^ctx?kGvRZ^BtA!#y2NIz%^txL
z4hD-_ERV&3KQFHn*Oq{FGXR(j*tHLRX;ewyx+V>=nkpsopSRItUIXK{l4c@MiIt8L
zty<Cl0BnB47GKm?YSeF`iyD&>3CDh8LC}Tm%iPQ)+i_*phuyrTpO4&NB7(T)K@Gn$
zts=y=9kplV$fa0@0PoYGI_dmbk$l*fG#$sKSm|RcncUNms;@uYY}~#yFeV@@fMr#v
zAS169j8%E?<NA41Qa&gN1DLQcg<qvwI#@<oz%SL~_*W(Sp#+{c1{~v~OQRE8+~pQH
z#O)gnDjIM*ltdg<4^0kV88abR<%xVMc@<db#BOKJ-G@%a5WMm)4SWtdJaw`R*nw8!
z;x~|@fI_z$eoDI`6b=u&(2cYMV(2vnj^vziZ_2Hk7evgYUMgQR@C-gO(hJ*s)3kSl
z813g(ySPx|tYn@c9BLw6-jREE6ep`t)iJ;}-KXlUr1=S#%oq)~s<B#RCX*c%j(;C(
zjkAy$vNrNehB+BZ^Fbmc(>DN54kD*&4!Re)2<9XcBezPozYMaLSCPR|zr4WSX0n+{
zCVQJJDEQ+{<s@r+K+L_=jaaB@cXsPT(``4ViM0U=you>ARy`arOUF(i#%8^%GV)B$
zR!OcBHhA%OGCyfr0S7hYAEzmgqSW^r@jL5UA%a5mi?-ytitND@ZcPW~dTPikX7J2p
zHoiHZo+yEe2tSvoAQvWBnpx!Ly-0G3qgmJF=WVxlrvYY<1h4lmD<!v)UgFxZNsdcf
zdwAjqP!xmrdxe31O3p%Ypk4iR)rgF*y$<xqpj~)(mKp7nA1f5hzzE0Ou_ig=ruNe>
ztL^(uU&E~`4MmnYho^tv?w!$UT!>2@$8Em0uCng8BI6r(b7wSUt?neu@!QMFrHVK_
zR#9GF;ZI+{+BU;9fIQVTc1B~wVm~Nd#q`$D+X&fSY>SC*@0p86ph?&>N%F52ky@2B
z+e8p@=K+FNGaxdPBWm#Z1-2{5EaINdW{qOVL>wMB3VSS)2T)X2v_B0FGI$w4*M~x+
zj$>60YsIZ`)=ex<>x;T{h~J_tu?dAw)5?+rp4L=lV9H6a<)YiVWxBjSJJe-~P4f7D
z3Mkt51!Z*w^r=U&$dptprAd2vm3W>?eRlk`zw!wUF68CASxRH^oOCv}7b^DPtIQ*a
zink)gLjmm^Xboz1JqG$&^ObwTv{|-LK1c3s(0Hw+mhy>O8HB*6IU*!YEYXkf)Rn0p
z$L7|M4G%Iz5+t^N7scXmY8mA-WE<Hf;Rk9SWn)vipeCE{bOJ!Ct;xz~l9R~1vep<T
zVJ8R_9xt~JwBn$TN_uLb8I=u{W`0lPHkUZj{MOdz=%<)XATQhclB4$5YDrD`a9R$V
zjW-CgfC5y_S@|YvSmQ_*&l)I}wrwnNq5x^g#29g@Z$<U5#OtT1h~L~)WvYhb!f!3{
zvCK@1k&;z}T7^=D-a!#lIAa(d<srInzJncH2`p#>_dXb+<&IZ+ucqzod}kSMrzs{*
zVE|Dl^NFnB;-LK2s}jvDhkcPI>8~{@T7@GLW!X+x_Y|19AShd{D0zR~ENphOT4E!|
zVsY`iwVanQ8;qG#R5OQ`Rz!YEXbLdtUG=G<(Au0c7>s_~-){A2TXOPPd_GSIf;(v>
znWKpU0Wyh_y!T+NYM#yNyVqLY+S;@7MlAcd*X(=K{{V+s$K?2EABtt{@n^L)gE5Nk
z+D2w^#F0rY=L%`PQHK&J6^gg-3qa<6rCXZFn#3_8yH!>4KPa+;BKICXZtU-7+W!E)
zR1Jp_8!C=05G@B^jdUv-a?EQD-ak}GM1a%t9(!%#v2$~Ac`tVD+G#3%$NIfGX}Cf$
z4G_UK04bO_9b0S^XW`RUOvG|uxBi_<F)X=dCbZkB1F!JZg<&e-yrnh-kbj3wN+AMH
zx&oV#N_;g`p@4cLK0W2Rd>(4kF$|{W=FPc4ld)xA_IYxqm${~lgAXr2K)+;4Bub%a
z)_z4doZ9wrS1m7{-`l$6a(B!U(!$wKBL$F=O2pDAP|UvO+_O_{Cvw>Y35O5MVY|b5
zS%yB$oSz`Ut6phaIpWc#IvWW5gApE5CNYwijPn&@BM6^M49z3E0)3<A`0u7{+w*26
z!QH!Wq9$C<LC1{}x0?8^wY{$lmSW03AhL!uhYf6(moq#;OR<t;7)YetvqvQxsWjLZ
zU`kHqp%BFRyHE^HBOf)TrOmTQx71^G4b2uh1|UfTD66TV3_nS)T|}lq<d34Y8Au)P
zfm=o@>m7THTNrsHiDem*DB}=B3*M}8yM^L8aYh?_!}#kjDXAEJS8AdoaI8n}5oh!7
zB&%|!Qt^4zotAe1)c#u6homHCT&S@)kE)J2En#^jWO&wBB_ms*?nSDQ!0;coPt4ww
z2U95k9HQE^Wl^!)y*PPB8tUE3qkTQbW#anT-Twe#TH&bB#>^B^c(XC6G}~%vYZ4Q$
z?HDHYQgBGb<Ji<ialSt%)vU3DVLh;0dyDv*meB)1Vz>onXrq#Xi##liU!Oz2dnkNH
z+hUWI;Sl%~cK60{w^(Sf`5TLO5QjxE#Ogs>H*64ONQwEPc*{rB72rtL8TlNuGltzf
ze0hUVx4(-FR4d~cwrhns6F2h<YGjghF-LY6b?<TR1-y=%PP;pMnL`(2$K|G3<A-QJ
z{7(-3!tm4$;ZkfqZyA+`CuMl@TTdKcOzHvcv`DbBNo<8hC<B*T3iaE4J!9l;UIsGq
z%x#JAS{82!I42dPV{h?i$KLW=e%E>?mg?G2IkbQQD#sWkFg}H92{h}X#6~zOP9(@O
zPVrk?JZ;WKY*m<@q;j+5n1zns6e3P9&xit)U<a>Kbi}R_6zU@76P|uCz0sW#-<Nvo
z=2dUpR|M6KM-k~!*F%g<iUF2YoMgP(((>yg9kd~>v38``+D?U9;$bRxWs{=-i^+l?
z&<{hc3wFdp2`-0bV3Wx#ZM>E#q_{9!_V2BQ6x&=OP&lIh0Px$FC8te;Xae|*{kc$_
zIsQ>&5hWfk$E|MSWR_Bdg8bRa@rD{k&TF~nEkFbKYG-te6H(IwHro_R2rpnsVz;?=
zw~!d8Y92x-*wuc2m-_U)#AHE`sHo{xmXs}%JD1JdWv;Ew!zHby#IRW@w}?WD8id2%
zjs2vr&Fj}*QuW!1F=ERk3M4Nn$=ck(Extg-J;ZULidC|;EgVuxc(-ymmBmV*l>U13
zp9<#LIBY)Yh(NMS$zw6OJT1l5tV#7wT*5*WkcbZBfcbd}?eW&ixu{Tmv&uFy=JG<)
zn_H+Qj!w!mcBX|y(8&CK2oAr~q_x^_b_0kzZOLCx5h;LhRFYXFk)(avL(Gst-M!TZ
z;l7T*Ef~m|bgK)~@$?$1ZY6v0{@+bXU=Y6mE7yIsF0Gs|IPX)}<E~c)ey79wwa&QA
z1?@rq03g&Mg6s7UrlD3+Qd?czTTCR0+B5RVs>OMs>W7IN5w@#12?i-+B5_(*l=<AY
z{sjd1Yim4CTO?wF39<1<4b$7i43`FLt)(f%Dl)4Nn{s#7W{NU5WdrGlEO&l+vc{Zc
zZs7u}DY2dxxOlHEAc<K7()}#a_P{N~I)LxOP)`xVe~Rl`Q7uBNiQJnnE-h`mR~e5n
z8B*K0mW4Ph++=FXp}m2`v{s?qQo2eql&%xIZv<;Z#JEou(nd4SX7>L8O?3m~ZQkZF
z)*EORR9-xUVw|6TJg7Z&Xa|K=WbW%O^DhyLAC$bi$bC?f@@9C}7}7`#lCUMk$&=j?
z=j}XeUu$o<X3gBxr8%h<h9%SIh<MeeI?6lyWNW$d5r&fFsUcN`IL5=;M<8m;yMJyM
z%3rx7k~k)%5g#c1Qg51OFJ#UbtmKj$WY-HF#EUm-IV6cfNQOg1JQb<9+go}&c`b*<
z3sB&IFjrg3jz1d*IgFNU8az5XC)68Y#dt6bNI}$c_BZM4r1d%o-MIa!JC>eKa9(4w
zH#4k~7cSHR1!z=*@YOGA;T8-Qgfx(3RJFng*bng=ij!I$tGUuX+zXBhr%eUuySRd0
z9EfQ4Ve9VSai}_INMVW*(eDV8Us~Nm3=MEnTX=zjW>!~?pV~smPfC8DO;*+0wuhQ8
z7vi8^Ht8ip$@8~ZSZ(lM$!_C@;0d9H6OFyWaZug=0DpA?j$2pPrnNMpH^yNvxakVM
z@WfRm`F-z|Wvn5(y0`}vt--DB`F3L=j|P>$P}l&?@|~zXbfIiUfd*AUaM;{|x?AfU
zrL-53I}_iRgq^{-6mUaxV?oqcu8N>V9<a+#gUL<x79Ywo0&2)&YS|u1%*GV7ij2$C
zixodD0Ng97(+ja<g1;;ZJqL_OoJcRQrEF}cRcVq{V;c6wQ?dmMCo;#0KR2$jE_|2q
ziG|-hh7G;{0IPw+sWWd_B0SqR95}i+2Zv)J4u<mTTb5P=QX@rIb>d_pM<-!gs+Ojb
z{{YR*{>bI&D02A{>qDCJA#Y|L%f&Le6CojD#mRiUNgm-hZd<@OwQ1Z>aMWmRo6i<u
zy!pQ?G6DCMlYPUsllkT@T)Hof-rX#)&24i$H+TKwDz_6`z>zJiWm#g6x;gbCpmH9U
zOK4k;Sm2wscREeR`32{VU*xSOvkLPn+up|@RdkXNeWBx`kl(2OuDWD}&dMxQ`HX%|
zn$qR$o>cB$qPl|N6v)hC2igEWzr#}~gF>bfF)kP7X{3!Wdm<<<SOx65$sBHX;kM~r
z_^IjB@X&}(!tP@DI-Ltp6Z~uO8lrjxuR|z18tPd!ZrM05OD{_PC-T=axwJuL`<CAm
z_<x&H%6+oEB#yftnuSvW>-9x{4RV56c-=p*P?ws4r1yY;ySB=JZcktI>cAzXZ~~t1
zd?(A%EG})VqsrJrWgJrD43YN$n3)}s7y(%WDIhUEhtpVAzGF|GQJKjnp<>5{Yu>?U
z-PDQ8xhLEBq_(r%8&q_j(8ki6xY{|@rHryfus;@7NmI2W?@EVVj<$v9&MxCA;6Vqk
zdYe}BA1Wi2$L1u$ThEeXWh@r5HM~S5f4F649hI#Z2#XIwy=$sK#miT3x<9bOxb5^>
zM6p|3WW3)i8E)iF1lH;~Eqi#-(r^cEL`YkIT6}Z~)(kj(Rbaq};*%Q386~ad?Y*V7
z#k@r$F!!Wen`zxijnQLs&V-IzQ2t#hYEy{<N9^6pPFW^HdX!kJ{L<OHIZK%#$4_V{
zDbBINRj9Ir9eBU^fC?2qmXC5Vd>*Ry<{LR|<6{2+UEC7yjX`;Bb9o$-ELRfCEN8aL
zOa>ARvdphVjuxl|oO`HLdeNJe0hUdZAt^bo&QUDBN+iJC&yH(LM!$kLmfN)ptc%aJ
zBEc&!f4dWn<0OE-;Z3wSMCPvLdy;6lIPKi4V={6;sCT6RVe7EfF4+=VT5u&03OJeg
zL|_BRgX^S@jJmh+L#dv4E;yLjtwm1Y`gQo~g4vELrrC`Zh?RKpVr%Hj)}4A&L6|8!
zwnUZNOqv919E3kt1fVQvS(GcGta2ezr6fbTu>Q>{J2JL?%SS02b}Y-<V{Wm*4b9D!
zz#phc4c;uRUgp@Wtg7qG{X|q=Jbg_#9=hDoe1)swu(`wS#SSIvlP_=la`G3K=@FVY
z=1LP2vQTsj{oQDQtYqnv!#sDz^F~YV+}BocvYDfu<e{1|?NTbChXd<hhOxe5Un?;Y
zfoaRZR&xL?5XxjEkgAG3r=SDSP;?sc?b?=<i7GaMiwYlNnv-3r`TRH3i7KFeS8wJ1
zPi|te%6~1v#G~pJnn+=iTAypkN-Uf}6j54!di8%9ZrF>+W|{6yO2lo@)TL}6+dN!P
zD89?XD7l2B3V^F~sAAM+AKjqV<F3vEu_udo1d6MB<qXlRD-*c-bYt6cR9D{IlDvL2
z9}TrGR2d!%t6Xo{dkKuwWbQ8ebY^Frj^?A-A%cd#$6UomX>d|rc@Kd>j*<*68eD>0
zRUPsZT(!Ac^5(#pq+`o)tH<5FMygZVO+DL{{kvk}4e`950>D!xhK~M+d0JTk<M80+
z*ZL`?FWI?2Oq;vCZjB7ge5Wql+>V8&Sa~PfDQ(eZ#LI39{@_LdSki;l7y@_FX2pEK
zziLK$J!{fL^{YL<vv;c_D3abWyiv@K)gz(XlN0)VHQ4~)A=;pgbSUrEr(Y8LS#Ud0
z+(J1D5M@<iO09kIN9_&#bp#irrP_pbI>?Rcj#ur6g3PkdZJ4)555ypJprGmPApSi+
z(Rj*WQg70&Z{)QAw4(jLWTTY(YYn<dEuLc1;X;><On?#w<k#Er>C|d}nIKqzdJ{ug
zn;4PWm74{Ue2UmxBy-MGxp68iNQ+KR?^8eq=hmK@auduQb;H1I&tjZBTfuB?=Z7Ja
zpX#kmBxrjt9MY(vSa>o0xJmanyPK}t1FQ{i%n<MqB3(4;en0KRIjX?c-IK92Pe0FM
zk)$pOB$CC{it^vn+t*%RRwPTLMAuh082k9vJIGR3Pw?JBP$)RnTzi-J5vto4Ef;|!
zp&A#+z@VA$6FD4HS>vaHJ^aYM5*0l_Q8aaA8}$SDYg<9{2GpM5G913eH*Z!(C<{Z7
z!1&LS9Mz4?k9m!iT6iUm;1e<>yid%PECNxOQ>W#xNu}4=y&xef@H9M8Wic@5B>Zu=
z(R~sKZiGc`@1X%t1{5k70!on8-|^O@pe8F-5Rr*+y<FGP=P>P%2@W}kg6j1EFvOC3
zBX(2Hs*Usi0ICS<O)q-MHpq@D^3)J0r1KvXNb<{l9h_XPog}}rjkv*e8EUM7g;mEg
z4<lY`;MZfMv>LL;6FY5!>v8vR$H#7IPs%a5d;9CF<e;*$5iPt8w`nG7h?D`wp^I`m
zS989#BXTqxQEq7NJM?Y$L~-6Np1H@Z<W}NHX;Z}1&>lTMF`!RV2+JwQp*Lpeg9%Xe
zs#3!=FTzA*Z%{=)w?>SCs9K_6)G4hl+6j3NbwCt94OZkc1!+exWWi~Cvc>^z=|fY$
z@&1ix+_c3hJ47oEqBiEORRsJ9`~^;gMqtG&av6nD+)dGv6Kx}^+RVX>g?SyS15i-Z
zRQiwW);6Z!w=pQVA!1*&ZA&m&cc1ufL&vkS%LGDPUBsX6ceb@_c6kRg9Bcxz^)yFN
zz<M61)#$a5y$TnQ$8LL8ijMUQM)6x=i@{oBxdQQmW_73du6aME^d^Jw8uGs-KXUPw
z><2*m<JOkBCn)qw9}@C02~GB|^=mtOE`V~1+sr?gx-4lMcG((wozD`r-{bJszFSd)
z2CnsKz&J>LbK;YEExZqJa*^EGsaPSERaL<tk=~~zKMMPPH0!H;XURs#mT{r|!*^E>
zit;M)y5_4no-D2=Ik4}H7Ba8~>8=kaC~CdVUf<i>s?~MtEnbD#2|$Arzi$Lksbj5S
zzsv}lV?CS)XP4L+0SYLpYB_u>)7Muj(GFAHhk;@y#^m#H194%Dx%yD<I3%-()tv%>
z!13gx;4Aoz0_ET&BAn6LjIIGHprY1koc@56s?+68gZOIn3)cunA&ml>413f(fTFJM
zO*fzf0sM7iJ_^P|a<na4l%NHTebqfZDt$Fv%A#D?X4Ytyth|BbK_{g>PWx+|6=Nsf
zyWugRQO-&ZH8kb16&*J!YIR(pONG=8d=#OOkN~2Zl1ETYDX2E#!9<yJ9yw*cwOMWj
zj1pW%v)xL0BueZ9PT_yJr;+%IfvKZlhjLkBOFJPF+Pbl5AG_O;ipYSiLY<^gLb8GY
zBh+fjrqpK;WIW4E@E;(2(Ee9?sFOtp?A)iYu!nN84wYmS6!jfF!k$m&emfyIVtTr^
z$CC1&FQGQhVOW&O^h{XsBZ>J|mFRsxKV5mhabq!2J-Y@R;%X1*_zsllX{(9zQI&Fm
zloGF1VOClVy)`9pQ?1R+btl}dQ`dfjYJZ<YFa=l=5Zs@S_~uA4xwiUCmOr<1XnQOO
z^&{k9DbTL8biXI5vFVm*@HGgryh2^ec#c(VOnB)d$s*Q~GOTjEOuSt>m8k@czHM*m
zzw9>UOQYL7MMvMc8r{%MP+S+8TU$&2019~{O0im)ZSc7pkkN}+S(~|Ubdo4`xBb@Q
z$8_nspKChZCV2bSs(Bh3;u-7Fk4wChl205FoQ@uASnn~{pel(Jq>4i$suxgE)UaR=
zs5(y0tQi75TgNrGrq&5aNE(DD?&B+l%G=M2mPl64-UvdiNO2nSH60t9voYJL)}4b3
zk@I3nPg}8W^gQ9oDXB9aHRLvyvH4BBSj+pg?!kWmtxz5bB+-B<u13bS+PyVgtV?d*
z7FpBXu_A38(Hp-+yX9YPa`=Q7mgO(;vN2e%Et{NV(!S~sY2T)e8|~`tPU(I|7bz-9
zC@j*%<B2N6pgU;hAgv+?YGbjUIW54H)oD_#?ymlTYd+)%G)dlp5^#xG@<ObsNP*Zf
z>G=K!NNQpTZR<NzY%*m`19=7@!H>m@%%!zvld&jSt{|ma-b;B(!*Gf~kp|AJDvS?Q
zw0Gwj7!CXDPKG`aFZ+Srg83xH1%gD5M)v`|3`8_?CgWy~PkPg)t9~T?=tpR$f*hcY
z*@HL$P}G_ai1ntkZh2G*HVCRkAdJx*u?N@i_4LxlLDYxEQ@X-tY#|qSHsk5473YR~
zdC@914q%c_HK(q$XJv*U^cV0{bPq3SkIO;ynJ5<S-9N(yy~N+<jfEL{d}w`iO)crz
zIiOgacpjacZbXF0OH+6!j)hmoeMPOo3mlL~D$N|r`nQ#RjNgvS*IuJb^3a<JjHkQB
zGic^4wEqACivvH3FN9!L_cL>jf>*rD#RDyqTumWo5xa#g?rti7Qmu7cu#mf*laY+A
z$V+fYx4u6%7X9(L0~}IGFD}csyt0knMkDa)U-Y*bI`nY6o-Y;H#zyN<57QCJ8HW3O
z(l^4pQ*9BK3#2Oxf-=O;s!?S2#H@Y61%1w|(!Z9Z1=7E{etbAQVoDy0lGETtclUmM
zb!Jf;tMS6dphlewaiu-V8-2F?^$f4?DzAt!qROfog^T<MTAEU(y5baz!9_kj9v)ml
zj+A98en#8cMSfGLRu>DQl4GQTMv#ym5M50{=D(<a?wv_0WpUBZ5X1dnliG+?xWrV*
z@g63P^Km^J<J1w?aCEMtEUlQ!(thb(S<7hW!98B&mdht|io0}Mt2YS2SyZqhsOku*
z3@CbQ!QJ@j#FG$AYgS%1LZz@!ys-dy$XA!R8q`q#0L!2Vo(hb(4{j=q1gzBr8VU++
z(^AKwLoS4|DGa27cH$3EK0nc}%2zK5=G(U*0raV*e~H&%xkuYp_XA2(<*&zY>(!k_
z3Za?8;&Tx!IcpWj#)-x$Dpuk;9+A2b^xLN6@Y7mdUE2li%O!CAr>pBj%~~@fng?r-
zyYZ__dq}3AJ>hY@vfN=<#+UZ}(4t4Qn6DQ#PF=D9K7y^wTJhCe)l1GjyZfX)Cy~Xd
z!+sdkDI{)H<4G={wTCH>k1=S%>lHH;yN$NEf>ayFaScl$D^4xAK8IG7+3C->a+BP5
z9$@X-2pZabaS_UKRBkagnLFRBk=bB0YrY_D5()^=dR0JG+ws%z)f;ik#cM-UVGN@Z
ztYAFT$nlvOewd-Nl}p`P$|+?O2so-&eU*5q_?@>WO=O6RgI*j}bsryQ<bTJ+aYS!0
zJ{{%uZF%G8`F;2V&5V|TgCF`9r@RwdE{7>f(WhtGC^Z-Kr!CJkds3QMNsX7aa1U}P
zqMA~iO?LcisFmFH3rpUO!0|;PkXM@$c^${0(pLkdFGbTHIye>umho8IuFfNn#>5YU
zj)zz`Po0L|D+5wI%dECa$%Cpi*LH{xYx#mkW?iZjvJxBf1b$jPqVi}GfG0|%9PgX?
zE;e3vX)U#dX!Al7nwoW9YUY$8g|<51)BHxBQS*%Q=|4iR7&;XX+zia~NOC#3*6tu~
zEbo<%2W9ssDAZGs_z_P|y<9&VW<Hokv^;xFQ!x>0imgKW(-{riiwAPvNM@GCXe6Ya
zsbT|=itSYr>Idb@H{!2NoCCS@TFY4rMx!0J=%W&`w5VL6RoL(S;nPdNsxldWBZ832
z8#;sp0E+4rk0DdIuc_s}w6xF$pn=z2x+t8r3AH5MVL=Ve+RC$9$2GL7rDD2*>Ptxy
zwA=mNi6GOXZKj8l_w_UPmhMc)E*B>$M}q_4w2AVcZ1Pxy{%F@S=1f3~tC)C?s0bE{
z$8T}z*1bBNTHdG%2X@;6Q8fCSlAc>`BiMjk_(z#w@$=<bW1Af%9vf-7bJ$g?08n_=
zqKDz9qo`=PTQJsIyDXSXijL39uo4S)yMp4uj>V=;naH6$$xa~kAJO<}xZa7ubhOg_
zq#m?RE1Vs|v&VmKz%UWCjY1fYRem%z*bQGWLNK`<%ItWK<RmL8so;)Sa^_gC17G2{
z^;1`6PjH1&d_BxiER%&0gT`khQnAq1r2hbSuAtEnNy=ZixWd5#$1U7E!&W%egFCDB
zAT3Yn2dxM3)d>lRMr5k?Se)KhD{p6rwRu^Uq_Lh_!X@LiBX<<;(zWy;XeQ`Hl&!)f
z%=T4jUN?rs_@#~3R!^&W5x|;j3uAF{kC0Ze-q{a%h#mF^uH1dCq_qb)VTXrH(O~QK
zQ@`ciwhKG^!C@!d8!TRD#@fRnk+v;qn~Ksy2Pb0BWYI%=*_V{5-Co!X!;LDyS4Ggo
zTQcB$e!II8FtB%hmOFMlp9_5^Heq{-ySL!7**TuxRA*4itsC)WBpzKlbk>aGC+ZK=
zR5BAUh3EXO^HxcHc{SEIl#*RTB!ir>!5c{H+;4pk9w74kwV$cb*fBmxj)2|@(6%KY
z6D`hnFy2pZfX3X8W;kNIxVc)yCJ>pfX+VVW9+cauVWpw*HVj;adzN7A6|9$<ONWLT
zSR1&f2<sbCG9zaw9G~vjabBZKOW<KVB;8+>8EL~hme1~X26>X+;bgLsK-^60#4uJW
z)IQ<!A?kk)x<c>3UWj;+Bokz&t1N|u#6oLJgm|131siXM><u^R*I5&_cGSGU#z;gE
zt7L7&w(P$(eZ!45AP?#3>#4IgDsf18EPU3t>yogUta4D0(A>g!ur;k3zSmlhm4y#Y
zdc8IAcHg%#ng<Yer_&IO9IJNzb7OmxE9BAJV4;(nrc)npBU*PtmI8Z3Ea}|o$~w`H
zN_2~#8z}j$$jJWyOqg~s0B)mT{RxX#Wdiu3mpIGj!`k0RY%VaF0$vP~5Yf%WiZJ9W
z$F@QCw(IMlLh*#Zxbc|X`RB_KQ9`w}+UUr;w2;wkwlN09&`&c_?@9M6@3bK7zl$CI
z6w^Zane*=4v_0>5e{!+lt=x<JoyDr;BuL324nfectvbCuJ}uYRQEWxa98z+&Ekw!~
za7?!U0BmoelbzMqc|oeABZxHpRXmP`c0?9fm%dbxL_+Fs8^2ZUOwHOtcna6?^!3&}
ztjHGtEXm(8#S6T{$~uC{LrU$pN_=%lUobohfKaQ9WPB791y_1g;xzTGY=OmG6t$A^
zOFs3p+r+Vc{BlhYQN_Vv3X$+M>C;;lb>zgk+-i(36nge$npL;8{fs_8{m#PMg2nBL
zjbvb|zWE=Mvvnszw-)mn&Q&Loz;y+^r?Mj#BBf7fJk!p$(E~;_B>an0N|R4*JxMza
zEq6r|QHrJg6dN`Q{CGU2%+p(0z%J*7ij}o>W^b4*iWs3N#RyiV81&Mj3~~DSKHV!W
z<%Y#k>_;p?inxt;Y7(TAzfeIUx~ab?nm3P%tYs)*6zhkzx0xO*Br~B5B&so8tF*E-
zgmvDZ)uBsQa$%)9ZMZUGH*Pg#Vu@O2e45&L!JS2qab9a_8SYD%!B9CkMMdA;#8tMb
z)Qi@{X!4gZ0PJ#4Xj5wan?SS5XOc6M^9vljQDd@Lh>f|FHN@8J@;HSQo>F%S_XeOq
ztSi2c{J9vpqjf(n&GZP`Tk?!d-Kj+mV#n=u<=#E4?ON3&ki_xMrAY0mF7r^j@jX<J
zhWaVj7I?b>UhSz==9}a%O5CJnhuwb5c$eENlWTE}$?#dESeiK5)+Cvf?7*+Lj^$)K
zdUV@Imb8yaYHwH5e5KifNOtYTb#sXOSeIDK%PD5dSX{7E05V9GjyGvaB!DR-CnaFO
z8fdfCHZM#=9k;HB$=;p05kfn0L?X!jxAE-F)xDl2vUt1LIW!VRt0GW<{1#w6$T<5y
zjdsw#mAqCv`6YE4yVJBMn9!a45NW>W-@SZt-rfi>9L&}^uJgf=vEtPX#d(Hh=AlCR
zei}HAXjGhVZ>T$9tqU25xIED~j~>*ikj~4H%G+cPN9wXMz>q#qxgfcAinM7a*}0Vh
zp<j=~;i6A4sBn#R8(;|i6%6Kn)<zcoM~;^$^KtE6sEsazHo^P6K$H;0le)4kYt!Zg
z(#63sBX6%A=^0)aN-TLGxceLH(E!QKCCRoyZE4+he#`-jDk);zR2C<S`$5sJ-4PT^
z89phl3~jzX!t*EGM4Pm2;C3JdFF^Gh1>`CJ0QBe)CPJ$Aq{Y|18GxE>b>?Nr2O?V5
z-KtczvEE(SdwbIS&G1v{res>26@^OAytB)rzRO?zDh9UU9i^o8BrJB)2UkExM{1fM
zj*Sa!0vx1+T1AtR{?<v_NE^HWyKqMm%w#NCw(H5a(L#W$SmbgxFZDJv%WS}a&l43S
zjleYgeKlFYNHl;WomEJ5(~SF_<o2<z)vDiG^5Z8p5w_rvqC^xGr>Hv3z3|p2P%@rh
zV^0M`Xe01>_l@LGM|32xlz=~RIi-F9*1LXcG>`r&^`h`1UEhxNQ4<^xZzHiu<9Y05
z7Z5VCg9`^z0PEJD*R5?d%QS-&a9ndtzi)hA*5Ae~v#mVEZ?Enmzn&YD40aLNqQ5C4
z_au@^Tv22MFrtCIbe_Ihj#CiLygR>#NH0z)t5zW(=M>9(kjLDl*Aqn)(@0gLmmU(`
zR9E*a1q!kEY!!gkQPSLrc$|3Cz<!F5oyg!_m5Xd-Stht_HT*eCC}db%k=dLxf)JZ|
z%FVrKc!NW>qI0=x);~QQdX>hty+Wz$0eg11*ztE{8=Ej;gWDp=@Tu8ks*ikMEl*KT
zU03-Th7}7FQxTgqa8un~6h-#{pw(3MUHVsjQdb-Vx(}A^ylpdx<3-|<JF=Cl{DPGy
zeG2ZLR}B?`N%>@CNf{KdB&wD7RQiF}LeoX&B2nQNEQ92+cQ$6qIc@}Z7f)1*;6}XE
zka@!|HjR)DBy>BUf!C`3JVbEZw&7~ni3Dy!F_N%nwP{irMCJ!BYr)K%Dp!7tbxVQR
z?q^z=hERAX%yK3x4>Z?U<%L4c20|8rF7Dmop?G3}SQ$_^zG)mU5$!)6DJ#I7u_H6J
zXx;l@D$kRsxhYD8)mie^b7HgkTvDSPb-p_+mi?t!B{<vY2X<ya0|?@nqA2#Nfw!)r
z)tMPFec(SCd1yCcdS8+eu_)GM-E)`5O9z%)TiM%vba)RSFth;HCd6OPnL9%%r))#n
zmS9<r6(5B(Q#eEy^vW(Dep*wR$Hg8dBHW)9Qs)np#>3?kTU-nHEvJsx#a0$s<eg`|
z3}#rP{l-|V*Nbx>JM<)M0CbZeQ=T!vbqcq5LH^vx3>7wSAhAXl*5+%K08Wsr4#bTh
zjbP_q6Hc||Hy>m}+E~d|T3yp=$r)vr98Uo6#v_3BNRfb1D^}WpLqC&RTjcI>439jP
zsMAk#1ZAKKpfkCD?yYEAlqdLBk@%_30$}5~;+K`9E$Y?1{f*-vrgWH8hbmRLpHM(G
z8dmO_<+0`rs^&4uyPC=iui9k=rS^`S*W+D9twgPt2L*jC^9ka+@%y1=75+BjM~>UJ
z_c)m%RV@wlY^;P6x&!WEL$lXn9o4jM>I9%BG2is($@&!g%_kjv)RT8FB8g`qWaRS0
zBGFNq39kk~;xc+KN_6>joF|e_{!@<-{I~dYCT(Sm!TqNcCn@E)i&(Mv_OTfjx_09L
zg+jEgSoSOev-0Z8uqSP7+OSA)a<<+JQ#5R|Q#S=mGhvmvy86!Sb+HBnu~?cY<WLWE
zM>DbKDD8trpgxqXH5z-+9#+SGJ)>}{Q(g>%y#iXv=TBl5xaT#!xCv_$H!jyFX$mJT
z&{f%uHm~Qc2wQHInNy0p;@93mn7QrRw0PWd{{V}4P)xx5BUe0Gi5`P*>eBk3YS?)P
zB}&sWSbIru7@TrBxb8bT+)j3os})(;P*u4|z*K&nw$Luf$0!(|s;$WvV5)i6YSR(m
zcNb7Z*3sUg<FLv(3P(b$SMlM_MkB8z)3&1J8wH5{b~@5?C5^u~px|yla3@Yo6o3m<
zdW|+nR11moxeJJ8isE;;Nf(GlBqE+b@>E(adV%n+q13vmX7eS&G%L<-LxgnaR<7RK
z8jyV{u3QaPgX8}IcKoLgo4Yyg^srshJIg-Yaq!>P+0a2Cq1_#K^kwU)5dn3{8DfWJ
zaQ-*sQb8|?w2|d59HZc+1PYq5YhpPCUai2fru06Vl>sQ1y0E#H809G>Y|+SLYJyad
zMMlgu#Bp4rOPht4xg3nMF{yPV8rHo=xl5IH4CY)i0dnMyoQ-QiO0hrNT~xquRu?2U
ztG51Kc#I^K14z51$?bJJP>OW^p1Ps25*u=paq$kmiTg)$oXB9x=I(AT@(i)wCG68k
zZVWSWPAu$s9wh*7RrJ;lpg`Q7QONX+dX{#oj6!L@w~=CIU=HS>3jTV+y%GUrxFKX7
zaHk`D(MY9j<56zXFBNAfLx3cuOD8UC@YkvMXb)xg)j4DU6)U_Sl1Ym&O?&k<jTT7A
zJ;#lZ)!B&*NoEzM{-Ei&X>AGS0i=i{5L_kcah^piU$Hrg5LC|2rDtVW!iEF_j2nri
zJ>MPbHF;iyBqDJr$5A0f>x{!&US<X5>alB;{o`#RwkA3n@(5RvJ8~5j*PFSo_TiIo
zJf+$6SGTR6T&UJ+EOpiLUtCXjX@3&NmiHF@xSltShwem$iK$f^2Cq<9jR#MxsbKH5
zaOuYy;-b`u-0F;3wzQgApn?lI?qf0B+`x+*moXjLXI1^0G5OIDDfQ*4o_V%XX}Yb{
z9}Ssa<R>gn3YGVhpQjITa$Y!F_MMmr(RlGOgR=2Zpd$W}u=r}x&|I9VE5DB=Phyop
zpUYDEJIO8O5-qeQd&z;K#Svhfi<(sObpS0`o`CB@*TiLKRuF;-CzToO<eu3b?cgy&
z(|_>c<;%1<lg(1Rx-S}^iPnTPF$TY{O6eFi0`oIAdlfX+nJFcWuPjys$k8RXp2lZa
zjKw@W<8<W@wbb>hniU{Dlu?vhod`ffVjA79vGd7gbf5>7S(Aw;rBssLJq2}Q95F-1
z9>MuPlO?*f<~m#ZHzt=+EheBQrH4k?`hb5OWa@r8%#8?yySotOY$pp#*k)MgqX7Q^
z;rADdb)`7^F!dV3y{42295^9>#Z|PaS9Nt)N25179z>cCj-9^2gE(#rPVJf@o@s4o
zZby}@%K15HT@`)M-YW!I6zr|cQc+LUTKr3Y9eB)pCZIk$<G}bN#H95?@cvPGleL|%
zleu~O)3oX~dMdNKkJIw*Q~&_elhVc{WuT%x%hH$tq}%)d0H{|)G)I}FRT}^Z=}+v_
zz#y`G!QbL>ScrtTadQ#X<9Qd63<kkhlUMjtS$eB8yEXw=d#w-0yo`BvN$xCc<ap$U
zXf39dNk|zHBTgW-NQ_Olt!i~>*pQZKnzH6^F-ksZbuMGd@pchf8?PSl#<sS}MtCzG
z?18Q&5ZAcL#R8fs057{^rdh3)#Dd>MOr)h>xp>=m&Bt|!wdH7SBR4k?LL-UhPDvz%
zA_6Ge+6x7z<GpnnBQp;^E>&2JQvU#q**(r?)p<a>mQ}k)cw_({XBfd|tq2sS<}~0^
zF;8ja7&|F#vEC$>;`zW=xRnt{a-3Ne{k_j`%kK9v+u^9nH>nah1y26}Ar0g**k!IQ
z3Z7EP43`m09M44~h3dSB;exU4uv!kCbvbw|32_N?#jY+f&vlTowwCK96}yX9i36+*
z#3_`D6YWN0u%}PPoZu*o44#gP_}73X?m03!31scctt2o&M?MI|7G@!bRX)+b<E-sQ
zyCm{NdAIOXixSA<eZTR|;%KD|oR!^WEKeQJKm{rIX({}r$o~LJq=^NQ`(uYpbXIK)
zDFUcS;oyv%NFtBOe|5JfuT!l1lGGWQ10+1<B6+un+1f<e40L7))Q;`BHC`l<&LT}|
zw&K5*w=_DbTn#@~*zNfwWa-cCkKQZKs<y>sE$x1}A)YLDLPF@0$kyk#YjUfxS>%lr
zGSZF2{B_g#__;Bo?YD{?>}E~)55Ab|_lnobEPs?4)mk|ea)`&rk&qHguw>i-KLANO
zR{sFTNP27Bg*_n{CI0}pSAH>XZT!~HGh=fV+N|RrQ6<nWiqX7iKXM5Q&=S-X*yx`f
zZsnq4&lBBRsj_8(2qE*R+ZvbhlF-R0QGXITLY&mGw`hQ1lBGUggl@GRHH;G(P}%<K
zSuRc+OFMGdPT@<*uN7xv8Nm9xc!I{zNQqh~b_ewjO;Fnh9m}#ULl2E>8&BVLd&z&r
zXwS7&@5Fp}8=ZXTReW5Zlr%Rw=!OfaG0@(yhDgJ>ei1z=!I=6s+N^IFR29*Yjteo*
zWtoz8!`e4txpuEFM|A>@k}YVetfP|h2W@+eE{;)%_Ya$Zr*7S=Nv;^sBs_`D<Z>{_
zb=<kTn5<Uozr3*us~Uh8uHMnDC_qFo43n88)iUN`wzYyU48^oz?OVN_x-J*%FV#&`
znW0#1l(Hpu<XMkh5m8t1Jl*$--gA<dHhC0SXS0$x=e3n&9Ay$Dj;b921zNlGs102T
z<TDO?m8ETh)3E+Ee~YxZmjG`sFj6>};o);7<(n`u%{7cy3b9_nD*GgYWd>CpnWGM=
zC|F$*ENLLB?0K&;{{U^gM=6cQ<e)Lw<E2oxci7i?t(fyHOiMXuXk>P7QOHHbv(+7C
zP^>|JS?rpfw(e2xfNHD7X<=Z@hV0J^A8f9I7yK%PpMQ-Q{u;-Ypx%`tTeTMD=-j|p
ztwAtFMw-pym=nrKOR23l>DQGyYCt6An~nvxqF&@A1_({1+)!k0(q!O<Ze}SxtX0>z
zpp(B!e`eZ#?V~GkY+D|RFF-A4wg}pR8o0(B<`VY|qEIj64{dL3yxL3XLp7LgMzm&j
zjMLGptlKxuo1l|NuSWj>q3u}rt>u!=yfI^*;y0c)WwMj8e!*LNEAlNLn*yPGPblMF
zy#D~z8ueEN73={bA7`Qsl)+r%jlx_CO>cDev)88}11TF;gjCm71=Vg^F~w80vbnpu
zTU#_Y6EQplYDsP&l0c<s6mt9rU2z~(p%wJ?m98trd=g2qS25jL#cY<=a+az|ad3sA
z5*A=Vd&cJce@LL|tmAVvPj=gta{@xjE-me_P$ct8mtIS9<)OH6#Vq*&65PtW29OJR
zT}u+GJ@hrxHm>KaeJQNZf)$DK&nUpy%X5pn`inUqnnvo)Ay8H(o!6Hr)X2e9nu65>
zq0<@n6e&f+sy<KWc}&%cK=!F+87w6Fa>|T$<e>!ugSAGbP$9lOg|y2p>{3oG?xBmn
z4>Lt{LrEwUZcuy3AD}Vx)P=~FCnbB1zHCNrd$xiE`hTH|gzv$~DlLrMk`Or=1Fpwy
zsB5lQjG0c7-&*sxx4e*dJ9A!K#VR?TQBsa2QOmd6WuhL2py(3<MxkkG?3@AVH1Xl}
zbV~u4hRLL{$lNqmQ^;kvyk#u5(zrb9!`<7Jiq~zaQA4Hm7L3EhdAw5EojyrAJ)X=%
zi?X=9z9dBQ%V`=!Pbub%(nU^G;;2u!k@$7iRgFG*fH4eTa0d>OcMi0b*m;sg#qH*8
z035At<gXTvF~CrMspNF62=`EHe$|l>phh7s-H?co37SWboN^99qoTEcZGqf3UYaRZ
zUQaFLGNe<VeXV1N)KxgKKG##Y4O(ycY81M1$YWv4V6a|9UP-Y<#v>zPW=qeGy#WEV
z8%yNnR=2iti`%+DZWR=TXYQaWKy?nv%;-mw_~v5=EOJcoWpUYLd#rA55M+w(Fu|mF
z<Y2C@q*7FBNg1eMJ!dkh2t}NhK_!Oh$cZ6c<EbEpZ*wX3uu^(#ci8GW6o%fJq5F1c
z!yMJ7V$y3hjpmRL{pIcNC+z@tSpEB-!%NFjB1A|oXsbtMz4qIcIFJdaroLwChzFTx
zBJu1b*Gj>17pm5HsKEt{(hf3QC_8?{Y)JnAT&A`DBTMhyVjeN9sn|OI05xPwFnK!2
zBf3j<5q83$_9KExUI4Dj%y}Qeo71f|?Z_sswWD_-;kP}CvuS@Np?s~9D9FN(V~ys-
zvOAA`*mmO*B1gz6C+0kkj0#Td9=%&~L9w<_Ns?I7;M@ma{-DR*6HDAK6bkZ{eW*`L
zwP{rqGVM~A$mN0~<lJClxeRzyazi8A0E&TB7UT18xutpot`Y@Iff%EIZ@lTTnBxvY
zR%mgCkxr(piy;A3Nae^-Fe^%N_|lqJQ>V-`N7RO5_^nqhb8nEwMSo=}oo7A!xzSj=
zG^(tV%0O!JK%hGv>FG)jDXw#I&gylgWfBciefB)f+yoY89@ER#wW;`uS4Yz7gjgCa
zjVz<REakBmu^!+uFsbd(F#yzUT9NVA=AQgywIK-27PhjFJBdq(6StVNPSlYu%Pe#>
z>+7{g!+*m`&Su5NKk>@F98>~q%n5oW-1wq6$=ssL8x)!%84loy5bQ_*9f+>8HIM-E
zJ_#*lq#rk9hw;q5Ya>0hOONs^UUrz&W^@owBFI$<YWE_zfC^Trx5R5_O@8g`Q4uXD
zpE1V_J3-%x2b#`-;D-5)wWe~-{DT<9#zA|RFf%W5E1N)}+Cfi}PV~LDJ$Tbh>F&xh
z_UDf7ZkdtK68Qa(+53|0Ns_nm;cEe4(_CAYl}L<7-K!!q5y{Z9SNp2N^3$vx(txIX
zr;uZ^R_7CKc#ks9B^H%*w~|=HDI}Hck^?sutv?-SSg|W|GOk|nw`!Q$59L-?#c!f)
zK51pLx=KQiC|>=ONZ?V+8vQ(pP)_`7$Zb$-DVt|JDi36-{^7!p(xww)#{{%)GFV)*
z-Nh79=3u9Jn~av$Fcm7Wugs3m8Kp&b`D*5&;A3_iVtdmB5L{#35xZ}y4V1^&Ti-!F
ztE=K~(93KK26&O3qcO`(mRQK;PlXRdT^89V98&iH2AC;r=eovPS>`d2_9nKuRa}EG
zXYa;}?28#@Mo&}h+z_YMwq;ToKj!e5?=rzy<a~C`(*E&^rHm8_9LmM6F^IKu#1fNR
z7Ov04kB2IPa4AkV_MtpOG%*sff)z@}BV?m05Xw{CK~7cN_0Wpl(%fMDzaK<j#{7M=
zR}w48@Y!1eE@oBk7b1<=i3ZOrrB4&E9d!U9>a|gXc$_|~TF3b8b~d(>2%#h~5%;Mq
zso!%eFq!J#xc>k*8V~5y#?X!n1l%$tlrzd~@Hbc~V9MJpwn)^PWFScZZ*O%7{YcEO
zQ|VCEU0ekt6@uJ(7Dj9pwib~koD9v$5G;~axhR!~%oLNve{QEJcSK}Y#k^ij^^EdO
zJc8cP@VALgKzRozpgS)?`RP4gv{}jz?nSp4oKp{r<Z<3V<~a`=%%7q0ZzhgATjq)7
zD`@12Z11D}$jLT$06ko{?ekh5S~aVJ<OGJ>ms~ky@kTM9Z@%UxH^^{TRx36#*Cs7;
z7C^}(rOXLgGImz}$z+l^o;EEJt48D4=zX@418(JLN%7u-Uj3-NYTVgeE#5XZ;*O7O
zvdDyzLLOjCElmRY^xHxjk--*L0l@9jfm+*OEaNNPH<Mhfw&8*_cNXr^GBEmnK$fck
z#FD4bgR9Ftxi?|!OUffN4Z4%gd+yeD5>mq6)h8-tWeU@*X&&wgs*Y6NpXb&FpZ>XH
z85th$zlt1UO3w2hQG1-fnrzdFuBIU(x~n8`L|VL36a_nCynqW@@443fjWjIBEwPq6
zZQQO4EArdi%NkqH5AG$nyi}_JaS$9<L@&jWT>y|buIKZxXQF>TXn;lm`ge7HsGK<K
z9L7FE9JKNPT=F}y*)~!A6diZpL6y@9R|Mp;+>4$2`J{W>OPd?kf&G%i)dfz&;6rWG
zO#<Q8HQQzqV~S(Ky4+ktbDo|{t0W4rC?$(G&BQGOJ1N`q`0A5(XyTc*0!C??Zs}fm
z1<kK^_}bcwNu))#gB=`U3@~yMG_rB%?d@8BtbYoS1_?<N>EIGyVsUo%vK6_G;a*tW
zw{js1@jMp1f}URL5C~&YU10pBnA~zHAw^?oN;OVK2nV{_tpgUVxjo6C;(AZAt$!8U
zUS@6$Y2bP``z50(K=M(`i-uGJN}8TZ2Ecv|LBH|Q?CVTuwCw7*A<2ih$KH#}601^L
zMHME8sNecEsCALM1}XD;H)EDarb5>nD2FF&SZ0_7$XYhUOVgZn`H}kn0ISgYX<PM;
z4_5@7XDY&7NdwMvbI6vMi)kJxVn&+wG&G7<cccnHKH#D<+wEOy{6?29Ylj(7aSe}Y
z@Ikj_y8h^2hEF6JN5e@uOI+d`X@klkgT5+l<(-H=$jcJd{{WLiSsy5v#ue(`%5oT%
zN3l4#vv>>GFRhsjQcUpN1H^_Q6smFEk(ahwv<1FhH3kuIl90%=SnLc<EEB5<V{;tH
z4-@QaGW<HP!&Y>7z;8`HJ>-zuLluqmVk`trB!!iPmq{dx#P*n|E|meAI&tc1q}NJn
zG})d}_4jrC@m4m$902ECx{<FdvP<g(xVcohzsJK2tcK<_g5u6t)orBq@h{kqijQ@8
z5Dhq;E2X$&EWfr++@vubc=!ctCB{=$=EH8VmSoG^H;d$K?&P<cGk+bzM-&kl)@Qbs
zAW}%mGF3%DRV&Fwb)+Naeo7~Sd|!45ZG`zNynL!`ezML9p_X@5sg0RT0)$e6QOEOT
zQ|Y512X07*i+M%oh~;6FDP{VYXS~Ku69A$nBywU=gn>(R6e`=A>>+_woLn+{^M4y}
zA@Znlem-HggD^ywQG+8zZ0%VjxQxm3B#h3~Lqgl_%7TI@wy&6uYp((FTw3|R^1N%5
z@#$@3va&J-?n@*AS*|KK4>KcvJZLp4bpRNdZx7wKbFE)1ZY}LrHWfD2i`sg!QiSbI
z#^YR&h)LN~W@WjyokQ4M+o#l9gTxUyp#=2>x`XljwZR-LOUyh<@5e68VEsli;e*=U
zl;U=+Ba^QOp{ZKX_0@rOc3KbGY-|4j!n00jd-pOjgVlJdDgOX!y7FH?b+1N0;rgGl
z9Obzy{nf_YN8}=S;b98d!t=<g#Z#1z8kaQo@+(dCt?-vKY5F8b*XphbA=6eP?iPOM
z?fs@>8FIhP{5fE<%~&~%Eygd9io*Ul>l=4%S%s^*DQ+er$6BT%dKryXtGtif&$;L%
zy1bXi{FJjhTS>))u0b8hWg%8@c0IXRN;)gJ)ClFlOUP3Ys-FhWwp=D{txegKO^EU&
z?$MP4C`RE*ggjoZIeZ&!3PULXdP;iT$jT;C^$Zfvjb?3c{D$t>Va4>h88}5|KuK7+
zCRb{EQ;cA<n$vCe+(fMyz*o`~+PzGq1V!=KoF$yOYdctTeYWhyk#e^X;)Q4$OLW@l
ztF&@6G$498Ac2X>M=7T+KX2Eos4E@a-+j8m`5qzhwsx}QFJ-v$OwpG5W|7>uvywmB
zS%YzY<;gWQJv87H31K%RZ&hqFOKo)-#lWTqG+b0BX(ePJyF8MS)jDnPttq(_?Pd&9
zRy8Ix9-bdmV=-l8p0u^gT1=~I@<(fYphmJp-OY<J27-p0nsg-e8m*|91}VFc7orF>
z{QW+zhhlDIvy%HKZEEnmR^lv7C6+@Jmjt+yr=VMS*t;Ht)Y79&M1Zr)$P(1s>58Fi
zh`+nFS|f`sq`0{>21jF3!;slkNE`nEwd+j|S0WEIw5fu1$6i{kyuv^CX2#4dojJ-)
zP;pW_jLZ_hFQ&5f7UW16<_Wk(;;A>0-`L;Db!|IZ%7eG{An{PHl;ZyY-VMv;KA(Zn
zMv@YSSqB?ni5pQgmI<%LJj&lWaF#h3mm7Na&n2r`H6xkfPEI6ATiYQO_OHWjVQc;%
z+8OPf(HQ2$b?wknmo(V`klvXwv5brYOkB}lj@Wtlaz~7BUh24B9XWnoHIsJC?Us~e
z4{z$D-JEd=3(b6yC1Z}Ul_af2<<uwI06*cx_3O8}fuQIsx!0rkodl!($%fz$iTZY}
zTNb3wckSEZ(v|WyvqqTf*O})_>rm!NKj|WFEP(tJaqF#{a|FsP*-wgs?hhqSdv<vJ
zm01M}vidTfK-cpcC@R-&IHGyVc?6PSuB3aYWBzH2YqYkv91J&4aTF#Kzqj`JNwMlj
z!kPw<5eY;3N%2|v2f1871tK;&+U98<Ym)@?zbNk}B*a|pNzYZ}fB1v=k5FvHCa!6~
ztM&c<>N8`14ddC(XvX#rbe8M`(GXXep*)hCf<344>8lEgAqbFJsu8~z@)HV8Z+c{u
zS_!U5cSw7-(`|iaMpjeYiA$o9MPNlYI#*F+F>$?zK@XN(XlQ@JP^9kr#~%c{mzK)j
z+aX~U$h%>E8G=}&iDGzSjzs6|288z(fHe=VL8jv6jucg#gAvVp78>f-&J2t*FVfEm
zn}ocTBr!M-1CT(W+^tCEN)L4gh(Z`%W-T*$uPC{2pIMQ=BJq;lTZr$VoP`a#q+zMQ
zKs5`b7_B$Qh9?&Vhm7Yk#LFx*3$T*K3rBA3Ia^!FKLl5klNB-%RRk?6nz5jCA~%Go
ziNM?7yt655n#rZSI2eqx$!^Od%NsOl8o=?Etd*H62SLlF2?H^7aWcdn47H4(o8oiW
zRG$$gE^Z}_-XcH<@*rw4LdvY5fWwyE4!S{-_TgHk*1Sohy10}uNg3gZ&&sK7`&8U}
zy{bqWtFmL;Uz#iB$Dggaf-H0t4|jJ#Z*a0J1V&f()ASnrDm`^*#mcFe3isQ%B3})*
zmMFfRZk}s|w^Ga_SCOcy&Pz~n+mWxtp1Sb=06Q{uM1M$#?tiKU)ECdZrVIu_XZkp=
zFRmjEXDr~-H3Zatnt<vXjdi#G0ACMa(+MA<cE!OPZ@#Hd3-^mXip>j~TU%RvESEj&
zyfvgTTdUj1+@`dS=eE;D9D=_qY<H%^TZY(GxqQp31Y64OC!fmft}~fS+{cLJp=;k6
zyt$i@nl>~L%+UK5cwVtitHg(2MP(+p0TJ?wtLf$^CYkC)WP=H50$EwhcMd-60QkFU
zrZCM9ySnbOcT`BIj318|*!yd~myOGeQ@_T6FO>$npZRtOVk|70*3%&jbL6Bf67KVu
z<f7tOxfOx7bb%<pD=~Hgg^j@@%p#2}L`iMBpKbMI55*zKVdA;X-OnwiE)-;2enoz@
z$Yn@ExGL$$)vCQKPK2q<BOz6i(uG%-cC+Utmp^9g`CDg4$>XGA%sYDtAd$)E9TJ>L
zs?vg@njEIZrN0Gn7a~y=8<!S1Ozrkw<HvBF&N(JoqlrT*n0MsTaM@y2B(;CKHq_@C
za~;Yqz{W%jK8T!H{B^5I4TQ|tt0m)eloXk6uLTPB)lacK+*Y5*<Em#n@jJOK9T_n6
z`gfito*Qvp8>rJA49#Y@Nq8iVBJU(=3&umRl~glG#S@m^&?qWsMyk$8c1F8(;u<uQ
z@$bj_cydb_+~~h073P^n{O4+fJ!ujsP@~ePay}h2Xj>7$ENilfK2V5w=hn%2^=@8S
zCy}I|i&j#tMKneuxbrl@P<%%&l{%&N%eKs|XygvOy~@O7`ZQt49?yCtM-N3RsmtYZ
ztbNk03s-Mw73jl$+j3J2l~?><BX42lmXKbo4;&W_XL}vbDJ|MLSjXNWeZa=d3mDHK
zK*~044^?cNa%@{{73VSuj`2;l`ldT$Ftw8s%?#G|Gs_c#T%#u{BQcNy;gjVAY%5Nu
zOWOiRJ61pyp>80DIH37^D6H=fY>o}Os1#KqwFBY@rrLHVFj}6dy150r<h&~JBSR(5
z2H|e#04yzMC1j;Hpkn0wDW&8x-Oc?$w(eP%BGPUD0B1f95Lh@d0hig-@!v%$yHR>>
zMebE)@B@7&HW_X3H`Mx&StWTdcaA9;2z~)km{Ethe;q9Wmiejk#B@>q-YXkW1WVP>
z3u$7S2;L^|tNTNHd!^M7%F;xv<~0u|6=Sn{d;?@AH2Q%s#~rcYidaPsQq1Bn`L&!e
zmax8y{A*(5DyehrYi53`$J^CU$An}n6b(*RZNqNOsu>K$V&aF$3|BdbXZk6*Ts%VJ
zPT)l*b*~uQpisfwdQ^?K(Tp4=1lbCQ#-_QLrLFCps&|dr;UXkZkSQrv27}j9$Z4Yu
zB3W7{2pe3r<=O1XXKwtL>7Vx>mB3!oab@?B$okY%R%OnZ{C_=^pD#Wa5r|xQwC%@t
zG(^l~NRMg|@~{m^mWDNI?53WYq|0Ef2m|UVQ_t|Xub1ZVfpA2EOO=<pmkgfpRFD`+
z!2GALrl31HMU(`+JAPGZ@s^o?x1Y~#z!@wJ%yt<tE6Ph-fbDHNdV5_OFX;R=+t5s%
ze(`@Wg2iF8mJw|fF*uE&XQSN8!~NoMBM?XkqKieM^sf4`0H_in7?;dG-&q+BI~{X#
zJ3}RBnVYn3T{)}v+E3Fd;>1#`zu~Io;;d#$U-JI|GstITb(WB-JgeD+fe3}lx<)%^
zi-_dpRgULvSquhSZ;V>aY2vt>d46c^A`Y|!0?HiF^y=Q4`OnDDHm2eK01+LO`XJoJ
zX8!<trlw~cjJ2(yWT_*I8Jm%%Xi4KudT;vJAIF#Z&--GFZ24Aa#fhL5CwfzTZ>y)@
z61BXW1(Lw$@v{#dk{{-_*-A2ub<b=F?&BxhBy*q<OAy`15<T&*sVTM?tH#kK%>t!i
zZJoBt-uT7SHHF0FaI?CV7AY8kENE^Rs;_xSXeClniC@s=gqVU|7Tx&(V)R6=G8dN@
z?;XX(wTWhpE~b^xn4^dU9kgZ~%7A^N*ZP*Xrig8^TGwu6pfMd__H=lmS*w~{i5Dei
zBJH7u5MJe|tHg658AGt8K<GB~I%Gs7wKoilnMcRynEXe@?{hvSk@7im09xX17T7XI
z6hzk@w0WlJcmXXo6`((kl)p0>fCGv@ejR9b?mGfqQ1YDZt~T_otr_E2wk>R06_(tu
z6-O$0G-rrnr($($5GFNNy)hF|<u2l~S$pi1cJTHhjPjJjM2g)K7?`s?Iw~r&Q~bJT
zAXKran3!VK>g(i;TIMYK(MKF*AY^qVkGvxeE{`N;f=Lk5h*~XZeRV??s?vt45+Wpd
zS-)1^c?vp*{ux7z7gqucY;sCOGB{;<T@WiO(y}>XtIcRQv8_V&8Ps{QfJyM{N*y#o
zV?oe=UXjJypIMxtiy0NIysBY=KAloy7jz@260|@j#zjD+)Rh|p?mDz#S2+E@U71d}
zL;;lG0K`5VwjF6qPb9Oek7$iaK(!%43hiIYx-!g)yN^h&rivpc3)$Htx0Muu9Gt*c
za~R}nzUrjcQ6j+-IZimOc5ArrFRVO?h0EIC1+d88Tr6QDy@hGPBa9E30HHgcwBEYM
z)A=NAo!()N2cIt7R*ix~$81zyRfKtANZ?<3AKa2cMK?PIqZ6>M)Ygr;Wi?tlOYMpg
z;8{ytUzNa_HU9wVmx1guQJRt2oNz2a{_9ZwTD|zZQGQ0ujk75Fqfyhi?ZperVV?<~
zyTeIW7Lu5vaBI#YYa0*ENYLRt(REEEgriOyc3`c+k4x_o&)e~oUIw;%BO6i!D!FG#
z`i<d3!2bYy;h>ZK_;YZxzCtCA+!7W#`}u9*byT_>T^cqnAaKKOK}s{P>f&e#>rqjy
z1&#|~ium2_<@b_7JhHr*4(iJsi_w`lj@bY*s`gQmHC5P<I&_H^2=+#4@!Ra1@LBI9
z%I9(LUE__%mD)(N;&N<>71hH;JBw|4q=PJS!Wmeao&DT~t2>d%R|_)+wC+^?@OVx)
z8#X)2{B|Z!7xGA!;@SYFmXopx8r`W-0S(Osf4rmQsjZok7~+Cb20rpj8|W_M{{Rx*
z!>IVv)7P$w6efIjQz@VF=gHa=j<^x;BnREH_UOcsUR@8StZKL`UU@H=!nW&ue%E#Y
zN86QRjt(q<qVw$mRuwdA@A>MWs~$=p0J!0S^Q%??p`QiGqi}e0izQ}Jw&=@K#Qb&2
ziFYxITP&MdTh5n=sS~WoMHtHx%fwKTQOk`!Blv1aQl*yCJ1a|UjjWO1+sHe23nUVL
z#KjNoNz8Do3bGYrUNzd9<tyA>%$J#a7)`tlB&{8)y2ToTT<}+A;$tiWbt}J9zWS`D
z03_}Y*;NbaWVZ^@K&&G`fmEKJ(YOo!TKR>jo04thr6yKO_g>N;_^tk+296v^lHADL
z2Q+u~f<fi(Z<lJ<^4GZdi$2ws8|Cx~izbN@^T#sL`2D|;Ng5_P>T_*w0}kP2Q|>A{
z4`J)MrnR#NZR*cfRE|?4C5q#k=2x+YlSOJ|J*6yp9w<#dZT|p0O9ypq#hK`uVe;&5
zZ^Y9D<q2EdJQDu^!&$Wq(?=SsxW?Rtqf2h?e_@HNE`4KW1C%8zKk%mflm5_ZRE!mW
zSJx;{zi)r^r<eZ#)mi@l&XY^u-O`)U`j9L?{r4~b0D-yx0GhfiupKL`<3IA08r%Nz
zpZk8T5hhd9{{ZCBf8r<p*t9vVy)UG6{ZQMl{LWwY?SJPss|-6)bzkyz$Hq(BkNrD$
z{{Vuv{{Xj5OcHm0;{O0dd{V`a_^tl{Q$PKSXp1x{@jv<1m;NFj`$g3y=_;#7z<=cV
zZ~o;M{KVEKxBh#7{;7ZNT6$c1{)yW2{{So>^=bbAG-==36IMM-Fw1}DFaFp04xERf
zZ&|`UDEHhC{YwY_7T5m(#iX>piFBU;va}w9(=Qd?BmV$GeZc<!!Q;RA#ns3Bu<QCH
zK2O)T1(suf!o2?g!+-wd)__?k$BcjF7ykhD_Mi5;UiYYV_${A|{{V`A=C}Ukrg#_s
z0IU8R{{U2f`8l+tf8&?=zN*^)0Nehw%jRGI02{Br@pgaw)S4!r5V2?KE9Reuo=}_q
z+JEX3{>T3Si(I8?A8>#422cL3FaByBP-3YSn)o06&&xmiB%l24rmR%U1*}<r`evW_
zsK4z-sC`Pq71h800L1?QF8=`jfq&;&YT-r9Ld85&{{T_q{{Z`ipZ%!P`p^FWp6B@`
z;8kUU`eA>>KkXm#>*m&dK`k|(eaZg-)|kKi6u<V1ub}9?9f;O2OA&wi6hHf-{{U<0
akN*HE=u`WaYx;D`BINwv`+POam;c#4(Pq{F

literal 0
HcmV?d00001

diff --git a/final/docs/index.html b/final/docs/index.html
new file mode 100644
index 00000000000..bf12f719b2b
--- /dev/null
+++ b/final/docs/index.html
@@ -0,0 +1,293 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>Documentation for the LLVM System at SVN head</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">Documentation for the LLVM System at SVN head</div>
+
+<p class="doc_warning">If you are using a released version of LLVM,
+see <a href="http://llvm.org/releases/">the download page</a> to find
+your documentation.</p>
+
+<div class="doc_text">
+<table class="layout" width="95%"><tr class="layout"><td class="left">
+<ul>
+  <li><a href="#llvmdesign">LLVM Design</a></li>
+  <li><a href="/pubs/">LLVM Publications</a></li>
+  <li><a href="#userguide">LLVM User Guides</a></li>
+  <li><a href="#llvmprog">General LLVM Programming Documentation</a></li>
+  <li><a href="#subsystems">LLVM Subsystem Documentation</a></li>
+  <li><a href="#maillist">LLVM Mailing Lists</a></li>
+</ul>
+</td><td class="right">
+  <form action="http://www.google.com/search" method=get>
+    <p>
+      <input type="hidden" name="sitesearch" value="llvm.org/docs">
+      <input type=text name=q size=25><br>
+      <input type=submit value="Search the LLVM Docs" name="submit">
+    </p>
+  </form>
+</td></tr></table>
+</div>
+
+<div class="doc_author">
+  <p>Written by <a href="http://llvm.org">The LLVM Team</a></p>
+</div>
+
+<!--=======================================================================-->
+<div class="doc_section"><a name="llvmdesign">LLVM Design &amp; Overview</a></div>
+<!--=======================================================================-->
+
+<ul>
+<li><a href="LangRef.html">LLVM Language Reference Manual</a> - Defines the LLVM
+intermediate representation.</li>
+<li><a href="http://llvm.org/pubs/2008-10-04-ACAT-LLVM-Intro.html">Introduction to the LLVM Compiler </a> - Presentation describing LLVM.</li>
+<li><a href="http://llvm.org/pubs/2004-09-22-LCPCLLVMTutorial.html">The LLVM Compiler Framework and
+Infrastructure Tutorial</a> - Tutorial for writing passes, exploring the system.</li>
+<li><a href="http://llvm.org/pubs/2004-01-30-CGO-LLVM.html">LLVM: A Compilation Framework for
+Lifelong Program Analysis &amp; Transformation</a> - Design overview.</li>
+<li><a href="http://llvm.org/pubs/2002-12-LattnerMSThesis.html">LLVM: An Infrastructure for
+Multi-Stage Optimization</a> - More details (quite old now).</li>
+<li><a href="GetElementPtr.html">GetElementPtr FAQ</a> - Answers to some very
+frequent questions about LLVM's most frequently misunderstood instruction.</li>
+</ul>
+
+<!--=======================================================================-->
+<div class="doc_section"><a name="userguide">LLVM User Guides</a></div>
+<!--=======================================================================-->
+
+<ul>
+<li><a href="GettingStarted.html">The LLVM Getting Started Guide</a> -
+Discusses how to get up and running quickly with the LLVM infrastructure.
+Everything from unpacking and compilation of the distribution to execution of
+some tools.</li>
+
+<li><a href="GettingStartedVS.html">Getting Started with the LLVM System using
+Microsoft Visual Studio</a> - An addendum to the main Getting Started guide for
+those using Visual Studio on Windows.</li>
+
+<li><a href="tutorial/">LLVM Tutorial</a> - A walk through the process of using
+LLVM for a custom language, and the facilities LLVM offers in tutorial form.</li>
+<li><a href="DeveloperPolicy.html">Developer Policy</a> - The LLVM project's
+policy towards developers and their contributions.</li>
+
+<li><a href="/docs/CommandGuide/index.html">LLVM Command Guide</a> - A reference
+manual for the LLVM command line utilities ("man" pages for LLVM tools).<br>
+Current tools:
+ <a href="/cmds/llvm-ar.html">llvm-ar</a>,
+ <a href="/cmds/llvm-as.html">llvm-as</a>,
+ <a href="/cmds/llvm-dis.html">llvm-dis</a>,
+ <a href="/cmds/llvm-extract.html">llvm-extract</a>,
+ <a href="/cmds/llvm-ld.html">llvm-ld</a>,
+ <a href="/cmds/llvm-link.html">llvm-link</a>,
+ <a href="/cmds/llvm-nm.html">llvm-nm</a>,
+ <a href="/cmds/llvm-prof.html">llvm-prof</a>,
+ <a href="/cmds/llvm-ranlib.html">llvm-ranlib</a>,
+ <a href="/cmds/opt.html">opt</a>,
+ <a href="/cmds/llc.html">llc</a>,
+ <a href="/cmds/lli.html">lli</a>,
+ <a href="/cmds/llvmc.html">llvmc</a>
+ <a href="/cmds/llvmgcc.html">llvm-gcc</a>,
+ <a href="/cmds/llvmgxx.html">llvm-g++</a>,
+ <a href="/cmds/bugpoint.html">bugpoint</a>,
+ <a href="/cmds/llvm-bcanalyzer.html">llvm-bcanalyzer</a>,
+</li>
+
+<li><a href="Passes.html">LLVM's Analysis and Transform Passes</a> - A list of
+optimizations and analyses implemented in LLVM.</li>
+
+<li><a href="FAQ.html">Frequently Asked Questions</a> - A list of common
+questions and problems and their solutions.</li>
+
+<li><a href="ReleaseNotes.html">Release notes for the current release</a>
+- This describes new features, known bugs, and other limitations.</li>
+
+<li><a href="HowToSubmitABug.html">How to Submit A Bug Report</a> -
+Instructions for properly submitting information about any bugs you run into in
+the LLVM system.</li>
+
+<li><a href="TestingGuide.html">LLVM Testing Infrastructure Guide</a> - A reference
+manual for using the LLVM testing infrastructure.</li>
+
+<li><a href="GCCFEBuildInstrs.html">How to build the Ada/C/C++/Fortran front-ends</a> -
+Instructions for building gcc front-ends from source.</li>
+
+<li><a href="Packaging.html">Packaging guide</a> - Advice on packaging
+LLVM into a distribution.</li>
+
+<li><a href="Lexicon.html">The LLVM Lexicon</a> - Definition of acronyms, terms
+and concepts used in LLVM.</li>
+
+<li><a name="irc">You can probably find help on the unofficial LLVM IRC
+channel</a>.  We often are on irc.oftc.net in the #llvm channel.  If you are
+using the mozilla browser, and have chatzilla installed, you can <a
+href="irc://irc.oftc.net/llvm">join #llvm on irc.oftc.net</a> directly.</li>
+
+</ul>
+
+
+<!--=======================================================================-->
+<div class="doc_section"><a name="llvmprog">General LLVM Programming Documentation</a></div>
+<!--=======================================================================-->
+
+<ul>
+<li><a href="LangRef.html">LLVM Language Reference Manual</a> - Defines the LLVM
+intermediate representation and the assembly form of the different nodes.</li>
+
+<li><a href="ProgrammersManual.html">The LLVM Programmers Manual</a> -
+Introduction to the general layout of the LLVM sourcebase, important classes
+and APIs, and some tips &amp; tricks.</li>
+
+<li><a href="Projects.html">LLVM Project Guide</a> - How-to guide and
+templates for new projects that <em>use</em> the LLVM infrastructure.  The
+templates (directory organization, Makefiles, and test tree) allow the project
+code to be located outside (or inside) the <tt>llvm/</tt> tree, while using LLVM
+header files and libraries.</li>
+
+<li><a href="MakefileGuide.html">LLVM Makefile Guide</a> - Describes how the
+LLVM makefiles work and how to use them.</li>
+
+<li><a href="CommandLine.html">CommandLine library Reference Manual</a> -
+Provides information on using the command line parsing library.</li>
+
+<li><a href="CodingStandards.html">LLVM Coding standards</a> -
+Details the LLVM coding standards and provides useful information on writing
+efficient C++ code.</li>
+
+<li><a href="ExtendingLLVM.html">Extending LLVM</a> - Look here to see how
+to add instructions and intrinsics to LLVM.</li>
+
+<li><a href="UsingLibraries.html">Using LLVM Libraries</a> - Look here to
+understand how to use the libraries produced when LLVM is compiled.</li>
+
+<li><a href="HowToReleaseLLVM.html">How To Release LLVM To The Public</a> - This
+is a guide to preparing LLVM releases. Most developers can ignore it.</li>
+
+<li><a href="http://llvm.org/doxygen/">Doxygen generated
+documentation</a> (<a
+href="http://llvm.org/doxygen/inherits.html">classes</a>)
+
+(<a href="http://llvm.org/doxygen/doxygen.tar.gz">tarball</a>)
+</li>
+
+<li><a href="http://llvm.org/viewvc/">ViewVC Repository Browser</a></li>
+
+</ul>
+
+<!--=======================================================================-->
+<div class="doc_section"><a name="subsystems">LLVM Subsystem Documentation</a></div>
+<!--=======================================================================-->
+
+<ul>
+
+<li><a href="WritingAnLLVMPass.html">Writing an LLVM Pass</a> - Information
+on how to write LLVM transformations and analyses.</li>
+
+<li><a href="WritingAnLLVMBackend.html">Writing an LLVM Backend</a> - Information
+on how to write LLVM backends for machine targets.</li>
+
+<li><a href="CodeGenerator.html">The LLVM Target-Independent Code
+Generator</a> - The design and implementation of the LLVM code generator.
+Useful if you are working on retargetting LLVM to a new architecture, designing
+a new codegen pass, or enhancing existing components.</li>
+
+<li><a href="TableGenFundamentals.html">TableGen Fundamentals</a> -
+Describes the TableGen tool, which is used heavily by the LLVM code
+generator.</li>
+
+<li><a href="AliasAnalysis.html">Alias Analysis in LLVM</a> - Information
+on how to write a new alias analysis implementation or how to use existing
+analyses.</li>
+
+<li><a href="GarbageCollection.html">Accurate Garbage Collection with
+LLVM</a> - The interfaces source-language compilers should use for compiling
+GC'd programs.</li>
+
+<li><a href="SourceLevelDebugging.html">Source Level Debugging with
+LLVM</a> - This document describes the design and philosophy behind the LLVM
+source-level debugger.</li>
+
+<li><a href="ExceptionHandling.html">Zero Cost Exception handling in LLVM</a>
+- This document describes the design and implementation of exception handling
+in LLVM.</li>
+
+<li><a href="Bugpoint.html">Bugpoint</a> - automatic bug finder and test-case
+reducer description and usage information.</li>
+
+<li><a href="CompilerDriverTutorial.html">Compiler Driver (llvmc) Tutorial</a>
+- This document is a tutorial introduction to the usage and
+configuration of the LLVM compiler driver tool, <tt>llvmc</tt>.</li>
+
+<li><a href="CompilerDriver.html">Compiler Driver (llvmc)
+Reference</a> - This document describes the design and configuration
+of <tt>llvmc</tt> in more detail.</li>
+
+<li><a href="BitCodeFormat.html">LLVM Bitcode File Format</a> - This describes
+the file format and encoding used for LLVM "bc" files.</li>
+
+<li><a href="SystemLibrary.html">System Library</a> - This document describes
+the LLVM System Library (<tt>lib/System</tt>) and how to keep LLVM source code
+portable</li>
+
+<li><a href="LinkTimeOptimization.html">Link Time Optimization</a> - This
+document describes the interface between LLVM intermodular optimizer and
+the linker and its design</li>
+
+<li><a href="GoldPlugin.html">The LLVM gold plugin</a> - How to build your
+programs with link-time optimization on Linux.</li>
+
+<li><a href="DebuggingJITedCode.html">The GDB JIT interface</a> - How to debug
+JITed code with GDB.</li>
+</ul>
+
+
+<!--=======================================================================-->
+<div class="doc_section"><a name="maillist">LLVM Mailing Lists</a></div>
+<!--=======================================================================-->
+
+<ul>
+<li>The <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-announce">
+LLVM Announcements List</a>: This is a low volume list that provides important
+announcements regarding LLVM.  It gets email about once a month.</li>
+
+<li>The <a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">Developer's
+List</a>: This list is for people who want to be included in technical
+discussions of LLVM. People post to this list when they have questions about
+writing code for or using the LLVM tools. It is relatively low volume.</li>
+
+<li>The <a href="http://lists.cs.uiuc.edu/pipermail/llvmbugs/">Bugs &amp;
+Patches Archive</a>: This list gets emailed every time a bug is opened and
+closed, and when people submit patches to be included in LLVM.  It is higher
+volume than the LLVMdev list.</li>
+
+<li>The <a href="http://lists.cs.uiuc.edu/pipermail/llvm-commits/">Commits
+Archive</a>: This list contains all commit messages that are made when LLVM
+developers commit code changes to the repository. It is useful for those who
+want to stay on the bleeding edge of LLVM development. This list is very high
+volume.</li>
+
+<li>The <a href="http://lists.cs.uiuc.edu/pipermail/llvm-testresults/">
+Test Results Archive</a>: A message is automatically sent to this list by every
+active nightly tester when it completes.  As such, this list gets email several
+times each day, making it a high volume list.</li>
+
+</ul>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body></html>
+
diff --git a/final/docs/llvm.css b/final/docs/llvm.css
new file mode 100644
index 00000000000..f572b5e5714
--- /dev/null
+++ b/final/docs/llvm.css
@@ -0,0 +1,100 @@
+/*
+ * LLVM documentation style sheet
+ */
+
+/* Common styles */
+.body { color: black; background: white; margin: 0 0 0 0 }
+
+/* No borders on image links */
+a:link img, a:visited img { border-style: none }
+
+address img { float: right; width: 88px; height: 31px; }
+address     { clear: right; }
+
+table       { text-align: center; border: 2px solid black;
+              border-collapse: collapse; margin-top: 1em; margin-left: 1em;
+              margin-right: 1em; margin-bottom: 1em; }
+tr, td      { border: 2px solid gray; padding: 4pt 4pt 2pt 2pt; }
+th          { border: 2px solid gray; font-weight: bold; font-size: 105%;
+              background: url("img/lines.gif");
+              font-family: "Georgia,Palatino,Times,Roman,SanSerif";
+              text-align: center; vertical-align: middle; }
+/*
+ * Documentation
+ */
+/* Common for title and header */
+.doc_title, .doc_section, .doc_subsection, h1, h2 {
+  color: black; background: url("img/lines.gif");
+  font-family: "Georgia,Palatino,Times,Roman,SanSerif"; font-weight: bold;
+  border-width: 1px;
+  border-style: solid none solid none;
+  text-align: center;
+  vertical-align: middle;
+  padding-left: 8pt;
+  padding-top: 1px;
+  padding-bottom: 2px
+}
+
+h1, .doc_section   { text-align: center; font-size: 22pt;
+                     margin: 20pt 0pt 5pt 0pt; }
+
+.doc_title, .title { text-align: left;   font-size: 25pt }
+
+h2, .doc_subsection { width: 75%;
+                      text-align: left;  font-size: 12pt;
+                      padding: 4pt 4pt 4pt 4pt;
+                      margin: 1.5em 0.5em 0.5em 0.5em }
+
+h3, .doc_subsubsection { margin: 2.0em 0.5em 0.5em 0.5em;
+                         font-weight: bold; font-style: oblique;
+                         border-bottom: 1px solid #999999; font-size: 12pt;
+                         width: 75%; }
+
+.doc_author     { text-align: left; font-weight: bold; padding-left: 20pt }
+.doc_text       { text-align: left; padding-left: 20pt; padding-right: 10pt }
+
+.doc_footer     { text-align: left; padding: 0 0 0 0 }
+
+.doc_hilite     { color: blue; font-weight: bold; }
+
+.doc_table      { text-align: center; width: 90%;
+                  padding: 1px 1px 1px 1px; border: 1px; }
+
+.doc_warning    { color: red; font-weight: bold }
+
+/* <div class="doc_code"> would use this class, and <div> adds more padding */
+.doc_code, .literal-block
+                { border: solid 1px gray; background: #eeeeee;
+                  margin: 0 1em 0 1em;
+                  padding: 0 1em 0 1em;
+                  display: table;
+                }
+
+/* It is preferrable to use <pre class="doc_code"> everywhere instead of the
+ * <div class="doc_code"><pre>...</ptr></div> construct.
+ *
+ * Once all docs use <pre> for code regions, this style can  be merged with the
+ * one above, and we can drop the [pre] qualifier.
+ */
+pre.doc_code, .literal-block { padding: 1em 2em 1em 1em }
+
+.doc_notes      { background: #fafafa; border: 1px solid #cecece;
+                  display: table; padding: 0 1em 0 .1em }
+
+table.layout    { text-align: left; border: none; border-collapse: collapse;
+                  padding: 4px 4px 4px 4px; }
+tr.layout, td.layout, td.left, td.right
+                { border: none; padding: 4pt 4pt 2pt 2pt; vertical-align: top; }
+td.left         { text-align: left }
+td.right        { text-align: right }
+th.layout       { border: none; font-weight: bold; font-size: 105%;
+                  text-align: center; vertical-align: middle; }
+
+/* Left align table cell */
+.td_left        { border: 2px solid gray; text-align: left; }
+
+/* ReST-specific */
+.title { margin-top: 0 }
+.topic-title{ display: none }
+div.contents ul { list-style-type: decimal }
+.toc-backref    { color: black; text-decoration: none; }
diff --git a/final/docs/re_format.7 b/final/docs/re_format.7
new file mode 100644
index 00000000000..0c0928716f4
--- /dev/null
+++ b/final/docs/re_format.7
@@ -0,0 +1,756 @@
+.\"	$OpenBSD: re_format.7,v 1.14 2007/05/31 19:19:30 jmc Exp $
+.\"
+.\" Copyright (c) 1997, Phillip F Knaack. All rights reserved.
+.\"
+.\" Copyright (c) 1992, 1993, 1994 Henry Spencer.
+.\" Copyright (c) 1992, 1993, 1994
+.\"	The Regents of the University of California.  All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" Henry Spencer.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. Neither the name of the University nor the names of its contributors
+.\"    may be used to endorse or promote products derived from this software
+.\"    without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"	@(#)re_format.7	8.3 (Berkeley) 3/20/94
+.\"
+.Dd $Mdocdate: May 31 2007 $
+.Dt RE_FORMAT 7
+.Os
+.Sh NAME
+.Nm re_format
+.Nd POSIX regular expressions
+.Sh DESCRIPTION
+Regular expressions (REs),
+as defined in
+.St -p1003.1-2004 ,
+come in two forms:
+basic regular expressions
+(BREs)
+and extended regular expressions
+(EREs).
+Both forms of regular expressions are supported
+by the interfaces described in
+.Xr regex 3 .
+Applications dealing with regular expressions
+may use one or the other form
+(or indeed both).
+For example,
+.Xr ed 1
+uses BREs,
+whilst
+.Xr egrep 1
+talks EREs.
+Consult the manual page for the specific application to find out which
+it uses.
+.Pp
+POSIX leaves some aspects of RE syntax and semantics open;
+.Sq **
+marks decisions on these aspects that
+may not be fully portable to other POSIX implementations.
+.Pp
+This manual page first describes regular expressions in general,
+specifically extended regular expressions,
+and then discusses differences between them and basic regular expressions.
+.Sh EXTENDED REGULAR EXPRESSIONS
+An ERE is one** or more non-empty**
+.Em branches ,
+separated by
+.Sq \*(Ba .
+It matches anything that matches one of the branches.
+.Pp
+A branch is one** or more
+.Em pieces ,
+concatenated.
+It matches a match for the first, followed by a match for the second, etc.
+.Pp
+A piece is an
+.Em atom
+possibly followed by a single**
+.Sq * ,
+.Sq + ,
+.Sq ?\& ,
+or
+.Em bound .
+An atom followed by
+.Sq *
+matches a sequence of 0 or more matches of the atom.
+An atom followed by
+.Sq +
+matches a sequence of 1 or more matches of the atom.
+An atom followed by
+.Sq ?\&
+matches a sequence of 0 or 1 matches of the atom.
+.Pp
+A bound is
+.Sq {
+followed by an unsigned decimal integer,
+possibly followed by
+.Sq ,\&
+possibly followed by another unsigned decimal integer,
+always followed by
+.Sq } .
+The integers must lie between 0 and
+.Dv RE_DUP_MAX
+(255**) inclusive,
+and if there are two of them, the first may not exceed the second.
+An atom followed by a bound containing one integer
+.Ar i
+and no comma matches
+a sequence of exactly
+.Ar i
+matches of the atom.
+An atom followed by a bound
+containing one integer
+.Ar i
+and a comma matches
+a sequence of
+.Ar i
+or more matches of the atom.
+An atom followed by a bound
+containing two integers
+.Ar i
+and
+.Ar j
+matches a sequence of
+.Ar i
+through
+.Ar j
+(inclusive) matches of the atom.
+.Pp
+An atom is a regular expression enclosed in
+.Sq ()
+(matching a part of the regular expression),
+an empty set of
+.Sq ()
+(matching the null string)**,
+a
+.Em bracket expression
+(see below),
+.Sq .\&
+(matching any single character),
+.Sq ^
+(matching the null string at the beginning of a line),
+.Sq $
+(matching the null string at the end of a line),
+a
+.Sq \e
+followed by one of the characters
+.Sq ^.[$()|*+?{\e
+(matching that character taken as an ordinary character),
+a
+.Sq \e
+followed by any other character**
+(matching that character taken as an ordinary character,
+as if the
+.Sq \e
+had not been present**),
+or a single character with no other significance (matching that character).
+A
+.Sq {
+followed by a character other than a digit is an ordinary character,
+not the beginning of a bound**.
+It is illegal to end an RE with
+.Sq \e .
+.Pp
+A bracket expression is a list of characters enclosed in
+.Sq [] .
+It normally matches any single character from the list (but see below).
+If the list begins with
+.Sq ^ ,
+it matches any single character
+.Em not
+from the rest of the list
+(but see below).
+If two characters in the list are separated by
+.Sq - ,
+this is shorthand for the full
+.Em range
+of characters between those two (inclusive) in the
+collating sequence, e.g.\&
+.Sq [0-9]
+in ASCII matches any decimal digit.
+It is illegal** for two ranges to share an endpoint, e.g.\&
+.Sq a-c-e .
+Ranges are very collating-sequence-dependent,
+and portable programs should avoid relying on them.
+.Pp
+To include a literal
+.Sq ]\&
+in the list, make it the first character
+(following a possible
+.Sq ^ ) .
+To include a literal
+.Sq - ,
+make it the first or last character,
+or the second endpoint of a range.
+To use a literal
+.Sq -
+as the first endpoint of a range,
+enclose it in
+.Sq [.
+and
+.Sq .]
+to make it a collating element (see below).
+With the exception of these and some combinations using
+.Sq [
+(see next paragraphs),
+all other special characters, including
+.Sq \e ,
+lose their special significance within a bracket expression.
+.Pp
+Within a bracket expression, a collating element
+(a character,
+a multi-character sequence that collates as if it were a single character,
+or a collating-sequence name for either)
+enclosed in
+.Sq [.
+and
+.Sq .]
+stands for the sequence of characters of that collating element.
+The sequence is a single element of the bracket expression's list.
+A bracket expression containing a multi-character collating element
+can thus match more than one character,
+e.g. if the collating sequence includes a
+.Sq ch
+collating element,
+then the RE
+.Sq [[.ch.]]*c
+matches the first five characters of
+.Sq chchcc .
+.Pp
+Within a bracket expression, a collating element enclosed in
+.Sq [=
+and
+.Sq =]
+is an equivalence class, standing for the sequences of characters
+of all collating elements equivalent to that one, including itself.
+(If there are no other equivalent collating elements,
+the treatment is as if the enclosing delimiters were
+.Sq [.
+and
+.Sq .] . )
+For example, if
+.Sq x
+and
+.Sq y
+are the members of an equivalence class,
+then
+.Sq [[=x=]] ,
+.Sq [[=y=]] ,
+and
+.Sq [xy]
+are all synonymous.
+An equivalence class may not** be an endpoint of a range.
+.Pp
+Within a bracket expression, the name of a
+.Em character class
+enclosed
+in
+.Sq [:
+and
+.Sq :]
+stands for the list of all characters belonging to that class.
+Standard character class names are:
+.Bd -literal -offset indent
+alnum	digit	punct
+alpha	graph	space
+blank	lower	upper
+cntrl	print	xdigit
+.Ed
+.Pp
+These stand for the character classes defined in
+.Xr ctype 3 .
+A locale may provide others.
+A character class may not be used as an endpoint of a range.
+.Pp
+There are two special cases** of bracket expressions:
+the bracket expressions
+.Sq [[:<:]]
+and
+.Sq [[:>:]]
+match the null string at the beginning and end of a word, respectively.
+A word is defined as a sequence of
+characters starting and ending with a word character
+which is neither preceded nor followed by
+word characters.
+A word character is an
+.Em alnum
+character (as defined by
+.Xr ctype 3 )
+or an underscore.
+This is an extension,
+compatible with but not specified by POSIX,
+and should be used with
+caution in software intended to be portable to other systems.
+.Pp
+In the event that an RE could match more than one substring of a given
+string,
+the RE matches the one starting earliest in the string.
+If the RE could match more than one substring starting at that point,
+it matches the longest.
+Subexpressions also match the longest possible substrings, subject to
+the constraint that the whole match be as long as possible,
+with subexpressions starting earlier in the RE taking priority over
+ones starting later.
+Note that higher-level subexpressions thus take priority over
+their lower-level component subexpressions.
+.Pp
+Match lengths are measured in characters, not collating elements.
+A null string is considered longer than no match at all.
+For example,
+.Sq bb*
+matches the three middle characters of
+.Sq abbbc ;
+.Sq (wee|week)(knights|nights)
+matches all ten characters of
+.Sq weeknights ;
+when
+.Sq (.*).*
+is matched against
+.Sq abc ,
+the parenthesized subexpression matches all three characters;
+and when
+.Sq (a*)*
+is matched against
+.Sq bc ,
+both the whole RE and the parenthesized subexpression match the null string.
+.Pp
+If case-independent matching is specified,
+the effect is much as if all case distinctions had vanished from the
+alphabet.
+When an alphabetic that exists in multiple cases appears as an
+ordinary character outside a bracket expression, it is effectively
+transformed into a bracket expression containing both cases,
+e.g.\&
+.Sq x
+becomes
+.Sq [xX] .
+When it appears inside a bracket expression,
+all case counterparts of it are added to the bracket expression,
+so that, for example,
+.Sq [x]
+becomes
+.Sq [xX]
+and
+.Sq [^x]
+becomes
+.Sq [^xX] .
+.Pp
+No particular limit is imposed on the length of REs**.
+Programs intended to be portable should not employ REs longer
+than 256 bytes,
+as an implementation can refuse to accept such REs and remain
+POSIX-compliant.
+.Pp
+The following is a list of extended regular expressions:
+.Bl -tag -width Ds
+.It Ar c
+Any character
+.Ar c
+not listed below matches itself.
+.It \e Ns Ar c
+Any backslash-escaped character
+.Ar c
+matches itself.
+.It \&.
+Matches any single character that is not a newline
+.Pq Sq \en .
+.It Bq Ar char-class
+Matches any single character in
+.Ar char-class .
+To include a
+.Ql \&]
+in
+.Ar char-class ,
+it must be the first character.
+A range of characters may be specified by separating the end characters
+of the range with a
+.Ql - ;
+e.g.\&
+.Ar a-z
+specifies the lower case characters.
+The following literal expressions can also be used in
+.Ar char-class
+to specify sets of characters:
+.Bd -unfilled -offset indent
+[:alnum:] [:cntrl:] [:lower:] [:space:]
+[:alpha:] [:digit:] [:print:] [:upper:]
+[:blank:] [:graph:] [:punct:] [:xdigit:]
+.Ed
+.Pp
+If
+.Ql -
+appears as the first or last character of
+.Ar char-class ,
+then it matches itself.
+All other characters in
+.Ar char-class
+match themselves.
+.Pp
+Patterns in
+.Ar char-class
+of the form
+.Eo [.
+.Ar col-elm
+.Ec .]\&
+or
+.Eo [=
+.Ar col-elm
+.Ec =]\& ,
+where
+.Ar col-elm
+is a collating element, are interpreted according to
+.Xr setlocale 3
+.Pq not currently supported .
+.It Bq ^ Ns Ar char-class
+Matches any single character, other than newline, not in
+.Ar char-class .
+.Ar char-class
+is defined as above.
+.It ^
+If
+.Sq ^
+is the first character of a regular expression, then it
+anchors the regular expression to the beginning of a line.
+Otherwise, it matches itself.
+.It $
+If
+.Sq $
+is the last character of a regular expression,
+it anchors the regular expression to the end of a line.
+Otherwise, it matches itself.
+.It [[:<:]]
+Anchors the single character regular expression or subexpression
+immediately following it to the beginning of a word.
+.It [[:>:]]
+Anchors the single character regular expression or subexpression
+immediately following it to the end of a word.
+.It Pq Ar re
+Defines a subexpression
+.Ar re .
+Any set of characters enclosed in parentheses
+matches whatever the set of characters without parentheses matches
+(that is a long-winded way of saying the constructs
+.Sq (re)
+and
+.Sq re
+match identically).
+.It *
+Matches the single character regular expression or subexpression
+immediately preceding it zero or more times.
+If
+.Sq *
+is the first character of a regular expression or subexpression,
+then it matches itself.
+The
+.Sq *
+operator sometimes yields unexpected results.
+For example, the regular expression
+.Ar b*
+matches the beginning of the string
+.Qq abbb
+(as opposed to the substring
+.Qq bbb ) ,
+since a null match is the only leftmost match.
+.It +
+Matches the singular character regular expression
+or subexpression immediately preceding it
+one or more times.
+.It ?
+Matches the singular character regular expression
+or subexpression immediately preceding it
+0 or 1 times.
+.Sm off
+.It Xo
+.Pf { Ar n , m No }\ \&
+.Pf { Ar n , No }\ \&
+.Pf { Ar n No }
+.Xc
+.Sm on
+Matches the single character regular expression or subexpression
+immediately preceding it at least
+.Ar n
+and at most
+.Ar m
+times.
+If
+.Ar m
+is omitted, then it matches at least
+.Ar n
+times.
+If the comma is also omitted, then it matches exactly
+.Ar n
+times.
+.It \*(Ba
+Used to separate patterns.
+For example,
+the pattern
+.Sq cat\*(Badog
+matches either
+.Sq cat
+or
+.Sq dog .
+.El
+.Sh BASIC REGULAR EXPRESSIONS
+Basic regular expressions differ in several respects:
+.Bl -bullet -offset 3n
+.It
+.Sq \*(Ba ,
+.Sq + ,
+and
+.Sq ?\&
+are ordinary characters and there is no equivalent
+for their functionality.
+.It
+The delimiters for bounds are
+.Sq \e{
+and
+.Sq \e} ,
+with
+.Sq {
+and
+.Sq }
+by themselves ordinary characters.
+.It
+The parentheses for nested subexpressions are
+.Sq \e(
+and
+.Sq \e) ,
+with
+.Sq (
+and
+.Sq )\&
+by themselves ordinary characters.
+.It
+.Sq ^
+is an ordinary character except at the beginning of the
+RE or** the beginning of a parenthesized subexpression.
+.It
+.Sq $
+is an ordinary character except at the end of the
+RE or** the end of a parenthesized subexpression.
+.It
+.Sq *
+is an ordinary character if it appears at the beginning of the
+RE or the beginning of a parenthesized subexpression
+(after a possible leading
+.Sq ^ ) .
+.It
+Finally, there is one new type of atom, a
+.Em back-reference :
+.Sq \e
+followed by a non-zero decimal digit
+.Ar d
+matches the same sequence of characters matched by the
+.Ar d Ns th
+parenthesized subexpression
+(numbering subexpressions by the positions of their opening parentheses,
+left to right),
+so that, for example,
+.Sq \e([bc]\e)\e1
+matches
+.Sq bb\&
+or
+.Sq cc
+but not
+.Sq bc .
+.El
+.Pp
+The following is a list of basic regular expressions:
+.Bl -tag -width Ds
+.It Ar c
+Any character
+.Ar c
+not listed below matches itself.
+.It \e Ns Ar c
+Any backslash-escaped character
+.Ar c ,
+except for
+.Sq { ,
+.Sq } ,
+.Sq \&( ,
+and
+.Sq \&) ,
+matches itself.
+.It \&.
+Matches any single character that is not a newline
+.Pq Sq \en .
+.It Bq Ar char-class
+Matches any single character in
+.Ar char-class .
+To include a
+.Ql \&]
+in
+.Ar char-class ,
+it must be the first character.
+A range of characters may be specified by separating the end characters
+of the range with a
+.Ql - ;
+e.g.\&
+.Ar a-z
+specifies the lower case characters.
+The following literal expressions can also be used in
+.Ar char-class
+to specify sets of characters:
+.Bd -unfilled -offset indent
+[:alnum:] [:cntrl:] [:lower:] [:space:]
+[:alpha:] [:digit:] [:print:] [:upper:]
+[:blank:] [:graph:] [:punct:] [:xdigit:]
+.Ed
+.Pp
+If
+.Ql -
+appears as the first or last character of
+.Ar char-class ,
+then it matches itself.
+All other characters in
+.Ar char-class
+match themselves.
+.Pp
+Patterns in
+.Ar char-class
+of the form
+.Eo [.
+.Ar col-elm
+.Ec .]\&
+or
+.Eo [=
+.Ar col-elm
+.Ec =]\& ,
+where
+.Ar col-elm
+is a collating element, are interpreted according to
+.Xr setlocale 3
+.Pq not currently supported .
+.It Bq ^ Ns Ar char-class
+Matches any single character, other than newline, not in
+.Ar char-class .
+.Ar char-class
+is defined as above.
+.It ^
+If
+.Sq ^
+is the first character of a regular expression, then it
+anchors the regular expression to the beginning of a line.
+Otherwise, it matches itself.
+.It $
+If
+.Sq $
+is the last character of a regular expression,
+it anchors the regular expression to the end of a line.
+Otherwise, it matches itself.
+.It [[:<:]]
+Anchors the single character regular expression or subexpression
+immediately following it to the beginning of a word.
+.It [[:>:]]
+Anchors the single character regular expression or subexpression
+immediately following it to the end of a word.
+.It \e( Ns Ar re Ns \e)
+Defines a subexpression
+.Ar re .
+Subexpressions may be nested.
+A subsequent backreference of the form
+.Pf \e Ns Ar n ,
+where
+.Ar n
+is a number in the range [1,9], expands to the text matched by the
+.Ar n Ns th
+subexpression.
+For example, the regular expression
+.Ar \e(.*\e)\e1
+matches any string consisting of identical adjacent substrings.
+Subexpressions are ordered relative to their left delimiter.
+.It *
+Matches the single character regular expression or subexpression
+immediately preceding it zero or more times.
+If
+.Sq *
+is the first character of a regular expression or subexpression,
+then it matches itself.
+The
+.Sq *
+operator sometimes yields unexpected results.
+For example, the regular expression
+.Ar b*
+matches the beginning of the string
+.Qq abbb
+(as opposed to the substring
+.Qq bbb ) ,
+since a null match is the only leftmost match.
+.Sm off
+.It Xo
+.Pf \e{ Ar n , m No \e}\ \&
+.Pf \e{ Ar n , No \e}\ \&
+.Pf \e{ Ar n No \e}
+.Xc
+.Sm on
+Matches the single character regular expression or subexpression
+immediately preceding it at least
+.Ar n
+and at most
+.Ar m
+times.
+If
+.Ar m
+is omitted, then it matches at least
+.Ar n
+times.
+If the comma is also omitted, then it matches exactly
+.Ar n
+times.
+.El
+.Sh SEE ALSO
+.Xr ctype 3 ,
+.Xr regex 3
+.Sh STANDARDS
+.St -p1003.1-2004 :
+Base Definitions, Chapter 9 (Regular Expressions).
+.Sh BUGS
+Having two kinds of REs is a botch.
+.Pp
+The current POSIX spec says that
+.Sq )\&
+is an ordinary character in the absence of an unmatched
+.Sq ( ;
+this was an unintentional result of a wording error,
+and change is likely.
+Avoid relying on it.
+.Pp
+Back-references are a dreadful botch,
+posing major problems for efficient implementations.
+They are also somewhat vaguely defined
+(does
+.Sq a\e(\e(b\e)*\e2\e)*d
+match
+.Sq abbbd ? ) .
+Avoid using them.
+.Pp
+POSIX's specification of case-independent matching is vague.
+The
+.Dq one case implies all cases
+definition given above
+is the current consensus among implementors as to the right interpretation.
+.Pp
+The syntax for word boundaries is incredibly ugly.
diff --git a/final/docs/tutorial/LangImpl1.html b/final/docs/tutorial/LangImpl1.html
new file mode 100644
index 00000000000..66843db5d3a
--- /dev/null
+++ b/final/docs/tutorial/LangImpl1.html
@@ -0,0 +1,348 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Tutorial Introduction and the Lexer</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Tutorial Introduction and the Lexer</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 1
+  <ol>
+    <li><a href="#intro">Tutorial Introduction</a></li>
+    <li><a href="#language">The Basic Language</a></li>
+    <li><a href="#lexer">The Lexer</a></li>
+  </ol>
+</li>
+<li><a href="LangImpl2.html">Chapter 2</a>: Implementing a Parser and AST</li>
+</ul>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="intro">Tutorial Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to the "Implementing a language with LLVM" tutorial.  This tutorial
+runs through the implementation of a simple language, showing how fun and
+easy it can be.  This tutorial will get you up and started as well as help to
+build a framework you can extend to other languages.  The code in this tutorial
+can also be used as a playground to hack on other LLVM specific things.
+</p>
+
+<p>
+The goal of this tutorial is to progressively unveil our language, describing
+how it is built up over time.  This will let us cover a fairly broad range of
+language design and LLVM-specific usage issues, showing and explaining the code
+for it all along the way, without overwhelming you with tons of details up
+front.</p>
+
+<p>It is useful to point out ahead of time that this tutorial is really about
+teaching compiler techniques and LLVM specifically, <em>not</em> about teaching
+modern and sane software engineering principles.  In practice, this means that
+we'll take a number of shortcuts to simplify the exposition.  For example, the
+code leaks memory, uses global variables all over the place, doesn't use nice
+design patterns like <a
+href="http://en.wikipedia.org/wiki/Visitor_pattern">visitors</a>, etc... but it
+is very simple.  If you dig in and use the code as a basis for future projects,
+fixing these deficiencies shouldn't be hard.</p>
+
+<p>I've tried to put this tutorial together in a way that makes chapters easy to
+skip over if you are already familiar with or are uninterested in the various
+pieces.  The structure of the tutorial is:
+</p>
+
+<ul>
+<li><b><a href="#language">Chapter #1</a>: Introduction to the Kaleidoscope
+language, and the definition of its Lexer</b> - This shows where we are going
+and the basic functionality that we want it to do.  In order to make this
+tutorial maximally understandable and hackable, we choose to implement 
+everything in C++ instead of using lexer and parser generators.  LLVM obviously
+works just fine with such tools, feel free to use one if you prefer.</li>
+<li><b><a href="LangImpl2.html">Chapter #2</a>: Implementing a Parser and
+AST</b> - With the lexer in place, we can talk about parsing techniques and
+basic AST construction.  This tutorial describes recursive descent parsing and
+operator precedence parsing.  Nothing in Chapters 1 or 2 is LLVM-specific,
+the code doesn't even link in LLVM at this point. :)</li>
+<li><b><a href="LangImpl3.html">Chapter #3</a>: Code generation to LLVM IR</b> -
+With the AST ready, we can show off how easy generation of LLVM IR really 
+is.</li>
+<li><b><a href="LangImpl4.html">Chapter #4</a>: Adding JIT and Optimizer
+Support</b> - Because a lot of people are interested in using LLVM as a JIT,
+we'll dive right into it and show you the 3 lines it takes to add JIT support.
+LLVM is also useful in many other ways, but this is one simple and "sexy" way
+to shows off its power. :)</li>
+<li><b><a href="LangImpl5.html">Chapter #5</a>: Extending the Language: Control
+Flow</b> - With the language up and running, we show how to extend it with
+control flow operations (if/then/else and a 'for' loop).  This gives us a chance
+to talk about simple SSA construction and control flow.</li>
+<li><b><a href="LangImpl6.html">Chapter #6</a>: Extending the Language: 
+User-defined Operators</b> - This is a silly but fun chapter that talks about
+extending the language to let the user program define their own arbitrary
+unary and binary operators (with assignable precedence!).  This lets us build a
+significant piece of the "language" as library routines.</li>
+<li><b><a href="LangImpl7.html">Chapter #7</a>: Extending the Language: Mutable
+Variables</b> - This chapter talks about adding user-defined local variables
+along with an assignment operator.  The interesting part about this is how
+easy and trivial it is to construct SSA form in LLVM: no, LLVM does <em>not</em>
+require your front-end to construct SSA form!</li>
+<li><b><a href="LangImpl8.html">Chapter #8</a>: Conclusion and other useful LLVM
+tidbits</b> - This chapter wraps up the series by talking about potential
+ways to extend the language, but also includes a bunch of pointers to info about
+"special topics" like adding garbage collection support, exceptions, debugging,
+support for "spaghetti stacks", and a bunch of other tips and tricks.</li>
+
+</ul>
+
+<p>By the end of the tutorial, we'll have written a bit less than 700 lines of 
+non-comment, non-blank, lines of code.  With this small amount of code, we'll
+have built up a very reasonable compiler for a non-trivial language including
+a hand-written lexer, parser, AST, as well as code generation support with a JIT
+compiler.  While other systems may have interesting "hello world" tutorials,
+I think the breadth of this tutorial is a great testament to the strengths of
+LLVM and why you should consider it if you're interested in language or compiler
+design.</p>
+
+<p>A note about this tutorial: we expect you to extend the language and play
+with it on your own.  Take the code and go crazy hacking away at it, compilers
+don't need to be scary creatures - it can be a lot of fun to play with
+languages!</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="language">The Basic Language</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This tutorial will be illustrated with a toy language that we'll call
+"<a href="http://en.wikipedia.org/wiki/Kaleidoscope">Kaleidoscope</a>" (derived 
+from "meaning beautiful, form, and view").
+Kaleidoscope is a procedural language that allows you to define functions, use
+conditionals, math, etc.  Over the course of the tutorial, we'll extend
+Kaleidoscope to support the if/then/else construct, a for loop, user defined
+operators, JIT compilation with a simple command line interface, etc.</p>
+
+<p>Because we want to keep things simple, the only datatype in Kaleidoscope is a
+64-bit floating point type (aka 'double' in C parlance).  As such, all values
+are implicitly double precision and the language doesn't require type
+declarations.  This gives the language a very nice and simple syntax.  For
+example, the following simple example computes <a 
+href="http://en.wikipedia.org/wiki/Fibonacci_number">Fibonacci numbers:</a></p>
+
+<div class="doc_code">
+<pre>
+# Compute the x'th fibonacci number.
+def fib(x)
+  if x &lt; 3 then
+    1
+  else
+    fib(x-1)+fib(x-2)
+
+# This expression will compute the 40th number.
+fib(40)
+</pre>
+</div>
+
+<p>We also allow Kaleidoscope to call into standard library functions (the LLVM
+JIT makes this completely trivial).  This means that you can use the 'extern'
+keyword to define a function before you use it (this is also useful for mutually
+recursive functions).  For example:</p>
+
+<div class="doc_code">
+<pre>
+extern sin(arg);
+extern cos(arg);
+extern atan2(arg1 arg2);
+
+atan2(sin(.4), cos(42))
+</pre>
+</div>
+
+<p>A more interesting example is included in Chapter 6 where we write a little
+Kaleidoscope application that <a href="LangImpl6.html#example">displays 
+a Mandelbrot Set</a> at various levels of magnification.</p>
+
+<p>Lets dive into the implementation of this language!</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="lexer">The Lexer</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>When it comes to implementing a language, the first thing needed is
+the ability to process a text file and recognize what it says.  The traditional
+way to do this is to use a "<a 
+href="http://en.wikipedia.org/wiki/Lexical_analysis">lexer</a>" (aka 'scanner')
+to break the input up into "tokens".  Each token returned by the lexer includes
+a token code and potentially some metadata (e.g. the numeric value of a number).
+First, we define the possibilities:
+</p>
+
+<div class="doc_code">
+<pre>
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5,
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+</pre>
+</div>
+
+<p>Each token returned by our lexer will either be one of the Token enum values
+or it will be an 'unknown' character like '+', which is returned as its ASCII
+value.  If the current token is an identifier, the <tt>IdentifierStr</tt>
+global variable holds the name of the identifier.  If the current token is a
+numeric literal (like 1.0), <tt>NumVal</tt> holds its value.  Note that we use
+global variables for simplicity, this is not the best choice for a real language
+implementation :).
+</p>
+
+<p>The actual implementation of the lexer is a single function named
+<tt>gettok</tt>. The <tt>gettok</tt> function is called to return the next token
+from standard input.  Its definition starts as:</p>
+
+<div class="doc_code">
+<pre>
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+</pre>
+</div>
+
+<p>
+<tt>gettok</tt> works by calling the C <tt>getchar()</tt> function to read
+characters one at a time from standard input.  It eats them as it recognizes
+them and stores the last character read, but not processed, in LastChar.  The
+first thing that it has to do is ignore whitespace between tokens.  This is 
+accomplished with the loop above.</p>
+
+<p>The next thing <tt>gettok</tt> needs to do is recognize identifiers and
+specific keywords like "def".  Kaleidoscope does this with this simple loop:</p>
+
+<div class="doc_code">
+<pre>
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    return tok_identifier;
+  }
+</pre>
+</div>
+
+<p>Note that this code sets the '<tt>IdentifierStr</tt>' global whenever it
+lexes an identifier.  Also, since language keywords are matched by the same
+loop, we handle them here inline.  Numeric values are similar:</p>
+
+<div class="doc_code">
+<pre>
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+</pre>
+</div>
+
+<p>This is all pretty straight-forward code for processing input.  When reading
+a numeric value from input, we use the C <tt>strtod</tt> function to convert it
+to a numeric value that we store in <tt>NumVal</tt>.  Note that this isn't doing
+sufficient error checking: it will incorrectly read "1.23.45.67" and handle it as
+if you typed in "1.23".  Feel free to extend it :).  Next we handle comments:
+</p>
+
+<div class="doc_code">
+<pre>
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+</pre>
+</div>
+
+<p>We handle comments by skipping to the end of the line and then return the
+next token.  Finally, if the input doesn't match one of the above cases, it is
+either an operator character like '+' or the end of the file.  These are handled
+with this code:</p>
+
+<div class="doc_code">
+<pre>
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+  
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+</pre>
+</div>
+
+<p>With this, we have the complete lexer for the basic Kaleidoscope language
+(the <a href="LangImpl2.html#code">full code listing</a> for the Lexer is
+available in the <a href="LangImpl2.html">next chapter</a> of the tutorial).
+Next we'll <a href="LangImpl2.html">build a simple parser that uses this to 
+build an Abstract Syntax Tree</a>.  When we have that, we'll include a driver
+so that you can use the lexer and parser together.
+</p>
+
+<a href="LangImpl2.html">Next: Implementing a Parser and AST</a>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/tutorial/LangImpl2.html b/final/docs/tutorial/LangImpl2.html
new file mode 100644
index 00000000000..9c13b486fa8
--- /dev/null
+++ b/final/docs/tutorial/LangImpl2.html
@@ -0,0 +1,1233 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Implementing a Parser and AST</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Implementing a Parser and AST</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 2
+  <ol>
+    <li><a href="#intro">Chapter 2 Introduction</a></li>
+    <li><a href="#ast">The Abstract Syntax Tree (AST)</a></li>
+    <li><a href="#parserbasics">Parser Basics</a></li>
+    <li><a href="#parserprimexprs">Basic Expression Parsing</a></li>
+    <li><a href="#parserbinops">Binary Expression Parsing</a></li>
+    <li><a href="#parsertop">Parsing the Rest</a></li>
+    <li><a href="#driver">The Driver</a></li>
+    <li><a href="#conclusions">Conclusions</a></li>
+    <li><a href="#code">Full Code Listing</a></li>
+  </ol>
+</li>
+<li><a href="LangImpl3.html">Chapter 3</a>: Code generation to LLVM IR</li>
+</ul>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="intro">Chapter 2 Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to Chapter 2 of the "<a href="index.html">Implementing a language
+with LLVM</a>" tutorial.  This chapter shows you how to use the lexer, built in 
+<a href="LangImpl1.html">Chapter 1</a>, to build a full <a
+href="http://en.wikipedia.org/wiki/Parsing">parser</a> for
+our Kaleidoscope language.  Once we have a parser, we'll define and build an <a 
+href="http://en.wikipedia.org/wiki/Abstract_syntax_tree">Abstract Syntax 
+Tree</a> (AST).</p>
+
+<p>The parser we will build uses a combination of <a 
+href="http://en.wikipedia.org/wiki/Recursive_descent_parser">Recursive Descent
+Parsing</a> and <a href=
+"http://en.wikipedia.org/wiki/Operator-precedence_parser">Operator-Precedence 
+Parsing</a> to parse the Kaleidoscope language (the latter for 
+binary expressions and the former for everything else).  Before we get to
+parsing though, lets talk about the output of the parser: the Abstract Syntax
+Tree.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="ast">The Abstract Syntax Tree (AST)</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The AST for a program captures its behavior in such a way that it is easy for
+later stages of the compiler (e.g. code generation) to interpret.  We basically
+want one object for each construct in the language, and the AST should closely
+model the language.  In Kaleidoscope, we have expressions, a prototype, and a
+function object.  We'll start with expressions first:</p>
+
+<div class="doc_code">
+<pre>
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+};
+</pre>
+</div>
+
+<p>The code above shows the definition of the base ExprAST class and one
+subclass which we use for numeric literals.  The important thing to note about
+this code is that the NumberExprAST class captures the numeric value of the
+literal as an instance variable. This allows later phases of the compiler to
+know what the stored numeric value is.</p>
+
+<p>Right now we only create the AST,  so there are no useful accessor methods on
+them.  It would be very easy to add a virtual method to pretty print the code,
+for example.  Here are the other expression AST node definitions that we'll use
+in the basic form of the Kaleidoscope language:
+</p>
+
+<div class="doc_code">
+<pre>
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &amp;name) : Name(name) {}
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector&lt;ExprAST*&gt; Args;
+public:
+  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
+    : Callee(callee), Args(args) {}
+};
+</pre>
+</div>
+
+<p>This is all (intentionally) rather straight-forward: variables capture the
+variable name, binary operators capture their opcode (e.g. '+'), and calls
+capture a function name as well as a list of any argument expressions.  One thing 
+that is nice about our AST is that it captures the language features without 
+talking about the syntax of the language.  Note that there is no discussion about 
+precedence of binary operators, lexical structure, etc.</p>
+
+<p>For our basic language, these are all of the expression nodes we'll define.
+Because it doesn't have conditional control flow, it isn't Turing-complete;
+we'll fix that in a later installment.  The two things we need next are a way
+to talk about the interface to a function, and a way to talk about functions
+themselves:</p>
+
+<div class="doc_code">
+<pre>
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector&lt;std::string&gt; Args;
+public:
+  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args)
+    : Name(name), Args(args) {}
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+};
+</pre>
+</div>
+
+<p>In Kaleidoscope, functions are typed with just a count of their arguments.
+Since all values are double precision floating point, the type of each argument
+doesn't need to be stored anywhere.  In a more aggressive and realistic
+language, the "ExprAST" class would probably have a type field.</p>
+
+<p>With this scaffolding, we can now talk about parsing expressions and function
+bodies in Kaleidoscope.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="parserbasics">Parser Basics</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Now that we have an AST to build, we need to define the parser code to build
+it.  The idea here is that we want to parse something like "x+y" (which is
+returned as three tokens by the lexer) into an AST that could be generated with
+calls like this:</p>
+
+<div class="doc_code">
+<pre>
+  ExprAST *X = new VariableExprAST("x");
+  ExprAST *Y = new VariableExprAST("y");
+  ExprAST *Result = new BinaryExprAST('+', X, Y);
+</pre>
+</div>
+
+<p>In order to do this, we'll start by defining some basic helper routines:</p>
+
+<div class="doc_code">
+<pre>
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+</pre>
+</div>
+
+<p>
+This implements a simple token buffer around the lexer.  This allows 
+us to look one token ahead at what the lexer is returning.  Every function in
+our parser will assume that CurTok is the current token that needs to be
+parsed.</p>
+
+<div class="doc_code">
+<pre>
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+</pre>
+</div>
+
+<p>
+The <tt>Error</tt> routines are simple helper routines that our parser will use
+to handle errors.  The error recovery in our parser will not be the best and
+is not particular user-friendly, but it will be enough for our tutorial.  These
+routines make it easier to handle errors in routines that have various return
+types: they always return null.</p>
+
+<p>With these basic helper functions, we can implement the first
+piece of our grammar: numeric literals.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="parserprimexprs">Basic Expression
+ Parsing</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>We start with numeric literals, because they are the simplest to process.
+For each production in our grammar, we'll define a function which parses that
+production.  For numeric literals, we have:
+</p>
+
+<div class="doc_code">
+<pre>
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+</pre>
+</div>
+
+<p>This routine is very simple: it expects to be called when the current token
+is a <tt>tok_number</tt> token.  It takes the current number value, creates 
+a <tt>NumberExprAST</tt> node, advances the lexer to the next token, and finally
+returns.</p>
+
+<p>There are some interesting aspects to this.  The most important one is that
+this routine eats all of the tokens that correspond to the production and
+returns the lexer buffer with the next token (which is not part of the grammar
+production) ready to go.  This is a fairly standard way to go for recursive
+descent parsers.  For a better example, the parenthesis operator is defined like
+this:</p>
+
+<div class="doc_code">
+<pre>
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+</pre>
+</div>
+
+<p>This function illustrates a number of interesting things about the 
+parser:</p>
+
+<p>
+1) It shows how we use the Error routines.  When called, this function expects
+that the current token is a '(' token, but after parsing the subexpression, it
+is possible that there is no ')' waiting.  For example, if the user types in
+"(4 x" instead of "(4)", the parser should emit an error.  Because errors can
+occur, the parser needs a way to indicate that they happened: in our parser, we
+return null on an error.</p>
+
+<p>2) Another interesting aspect of this function is that it uses recursion by
+calling <tt>ParseExpression</tt> (we will soon see that <tt>ParseExpression</tt> can call
+<tt>ParseParenExpr</tt>).  This is powerful because it allows us to handle 
+recursive grammars, and keeps each production very simple.  Note that
+parentheses do not cause construction of AST nodes themselves.  While we could
+do it this way, the most important role of parentheses are to guide the parser
+and provide grouping.  Once the parser constructs the AST, parentheses are not
+needed.</p>
+
+<p>The next simple production is for handling variable references and function
+calls:</p>
+
+<div class="doc_code">
+<pre>
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector&lt;ExprAST*&gt; Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+</pre>
+</div>
+
+<p>This routine follows the same style as the other routines.  (It expects to be
+called if the current token is a <tt>tok_identifier</tt> token).  It also has
+recursion and error handling.  One interesting aspect of this is that it uses
+<em>look-ahead</em> to determine if the current identifier is a stand alone
+variable reference or if it is a function call expression.  It handles this by
+checking to see if the token after the identifier is a '(' token, constructing
+either a <tt>VariableExprAST</tt> or <tt>CallExprAST</tt> node as appropriate.
+</p>
+
+<p>Now that we have all of our simple expression-parsing logic in place, we can
+define a helper function to wrap it together into one entry point.  We call this
+class of expressions "primary" expressions, for reasons that will become more
+clear <a href="LangImpl6.html#unary">later in the tutorial</a>.  In order to
+parse an arbitrary primary expression, we need to determine what sort of
+expression it is:</p>
+
+<div class="doc_code">
+<pre>
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  }
+}
+</pre>
+</div>
+
+<p>Now that you see the definition of this function, it is more obvious why we
+can assume the state of CurTok in the various functions.  This uses look-ahead
+to determine which sort of expression is being inspected, and then parses it
+with a function call.</p>
+
+<p>Now that basic expressions are handled, we need to handle binary expressions.
+They are a bit more complex.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="parserbinops">Binary Expression
+ Parsing</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Binary expressions are significantly harder to parse because they are often
+ambiguous.  For example, when given the string "x+y*z", the parser can choose
+to parse it as either "(x+y)*z" or "x+(y*z)".  With common definitions from
+mathematics, we expect the later parse, because "*" (multiplication) has
+higher <em>precedence</em> than "+" (addition).</p>
+
+<p>There are many ways to handle this, but an elegant and efficient way is to
+use <a href=
+"http://en.wikipedia.org/wiki/Operator-precedence_parser">Operator-Precedence 
+Parsing</a>.  This parsing technique uses the precedence of binary operators to
+guide recursion.  To start with, we need a table of precedences:</p>
+
+<div class="doc_code">
+<pre>
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map&lt;char, int&gt; BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+    
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec &lt;= 0) return -1;
+  return TokPrec;
+}
+
+int main() {
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['&lt;'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+  ...
+}
+</pre>
+</div>
+
+<p>For the basic form of Kaleidoscope, we will only support 4 binary operators
+(this can obviously be extended by you, our brave and intrepid reader).  The
+<tt>GetTokPrecedence</tt> function returns the precedence for the current token,
+or -1 if the token is not a binary operator.  Having a map makes it easy to add
+new operators and makes it clear that the algorithm doesn't depend on the
+specific operators involved, but it would be easy enough to eliminate the map
+and do the comparisons in the <tt>GetTokPrecedence</tt> function.  (Or just use
+a fixed-size array).</p>
+
+<p>With the helper above defined, we can now start parsing binary expressions.
+The basic idea of operator precedence parsing is to break down an expression
+with potentially ambiguous binary operators into pieces.  Consider ,for example,
+the expression "a+b+(c+d)*e*f+g".  Operator precedence parsing considers this
+as a stream of primary expressions separated by binary operators.  As such,
+it will first parse the leading primary expression "a", then it will see the
+pairs [+, b] [+, (c+d)] [*, e] [*, f] and [+, g].  Note that because parentheses
+are primary expressions, the binary expression parser doesn't need to worry
+about nested subexpressions like (c+d) at all. 
+</p>
+
+<p>
+To start, an expression is a primary expression potentially followed by a
+sequence of [binop,primaryexpr] pairs:</p>
+
+<div class="doc_code">
+<pre>
+/// expression
+///   ::= primary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParsePrimary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+</pre>
+</div>
+
+<p><tt>ParseBinOpRHS</tt> is the function that parses the sequence of pairs for
+us.  It takes a precedence and a pointer to an expression for the part that has been
+parsed so far.   Note that "x" is a perfectly valid expression: As such, "binoprhs" is
+allowed to be empty, in which case it returns the expression that is passed into
+it. In our example above, the code passes the expression for "a" into
+<tt>ParseBinOpRHS</tt> and the current token is "+".</p>
+
+<p>The precedence value passed into <tt>ParseBinOpRHS</tt> indicates the <em>
+minimal operator precedence</em> that the function is allowed to eat.  For
+example, if the current pair stream is [+, x] and <tt>ParseBinOpRHS</tt> is
+passed in a precedence of 40, it will not consume any tokens (because the
+precedence of '+' is only 20).  With this in mind, <tt>ParseBinOpRHS</tt> starts
+with:</p>
+
+<div class="doc_code">
+<pre>
+/// binoprhs
+///   ::= ('+' primary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec &lt; ExprPrec)
+      return LHS;
+</pre>
+</div>
+
+<p>This code gets the precedence of the current token and checks to see if if is
+too low.  Because we defined invalid tokens to have a precedence of -1, this 
+check implicitly knows that the pair-stream ends when the token stream runs out
+of binary operators.  If this check succeeds, we know that the token is a binary
+operator and that it will be included in this expression:</p>
+
+<div class="doc_code">
+<pre>
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the primary expression after the binary operator.
+    ExprAST *RHS = ParsePrimary();
+    if (!RHS) return 0;
+</pre>
+</div>
+
+<p>As such, this code eats (and remembers) the binary operator and then parses
+the primary expression that follows.  This builds up the whole pair, the first of
+which is [+, b] for the running example.</p>
+
+<p>Now that we parsed the left-hand side of an expression and one pair of the 
+RHS sequence, we have to decide which way the expression associates.  In
+particular, we could have "(a+b) binop unparsed"  or "a + (b binop unparsed)".
+To determine this, we look ahead at "binop" to determine its precedence and 
+compare it to BinOp's precedence (which is '+' in this case):</p>
+
+<div class="doc_code">
+<pre>
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec &lt; NextPrec) {
+</pre>
+</div>
+
+<p>If the precedence of the binop to the right of "RHS" is lower or equal to the
+precedence of our current operator, then we know that the parentheses associate
+as "(a+b) binop ...".  In our example, the current operator is "+" and the next 
+operator is "+", we know that they have the same precedence.  In this case we'll
+create the AST node for "a+b", and then continue parsing:</p>
+
+<div class="doc_code">
+<pre>
+      ... if body omitted ...
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }  // loop around to the top of the while loop.
+}
+</pre>
+</div>
+
+<p>In our example above, this will turn "a+b+" into "(a+b)" and execute the next
+iteration of the loop, with "+" as the current token.  The code above will eat, 
+remember, and parse "(c+d)" as the primary expression, which makes the
+current pair equal to [+, (c+d)].  It will then evaluate the 'if' conditional above with 
+"*" as the binop to the right of the primary.  In this case, the precedence of "*" is
+higher than the precedence of "+" so the if condition will be entered.</p>
+
+<p>The critical question left here is "how can the if condition parse the right
+hand side in full"?  In particular, to build the AST correctly for our example,
+it needs to get all of "(c+d)*e*f" as the RHS expression variable.  The code to
+do this is surprisingly simple (code from the above two blocks duplicated for
+context):</p>
+
+<div class="doc_code">
+<pre>
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec &lt; NextPrec) {
+      <b>RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;</b>
+    }
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }  // loop around to the top of the while loop.
+}
+</pre>
+</div>
+
+<p>At this point, we know that the binary operator to the RHS of our primary
+has higher precedence than the binop we are currently parsing.  As such, we know
+that any sequence of pairs whose operators are all higher precedence than "+"
+should be parsed together and returned as "RHS".  To do this, we recursively
+invoke the <tt>ParseBinOpRHS</tt> function specifying "TokPrec+1" as the minimum
+precedence required for it to continue.  In our example above, this will cause
+it to return the AST node for "(c+d)*e*f" as RHS, which is then set as the RHS
+of the '+' expression.</p>
+
+<p>Finally, on the next iteration of the while loop, the "+g" piece is parsed
+and added to the AST.  With this little bit of code (14 non-trivial lines), we
+correctly handle fully general binary expression parsing in a very elegant way.
+This was a whirlwind tour of this code, and it is somewhat subtle.  I recommend
+running through it with a few tough examples to see how it works.
+</p>
+
+<p>This wraps up handling of expressions.  At this point, we can point the
+parser at an arbitrary token stream and build an expression from it, stopping
+at the first token that is not part of the expression.  Next up we need to
+handle function definitions, etc.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="parsertop">Parsing the Rest</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+The next thing missing is handling of function prototypes.  In Kaleidoscope,
+these are used both for 'extern' function declarations as well as function body
+definitions.  The code to do this is straight-forward and not very interesting
+(once you've survived expressions):
+</p>
+
+<div class="doc_code">
+<pre>
+/// prototype
+///   ::= id '(' id* ')'
+static PrototypeAST *ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return ErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  // Read the list of argument names.
+  std::vector&lt;std::string&gt; ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  return new PrototypeAST(FnName, ArgNames);
+}
+</pre>
+</div>
+
+<p>Given this, a function definition is very simple, just a prototype plus
+an expression to implement the body:</p>
+
+<div class="doc_code">
+<pre>
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+</pre>
+</div>
+
+<p>In addition, we support 'extern' to declare functions like 'sin' and 'cos' as
+well as to support forward declaration of user functions.  These 'extern's are just
+prototypes with no body:</p>
+
+<div class="doc_code">
+<pre>
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+</pre>
+</div>
+
+<p>Finally, we'll also let the user type in arbitrary top-level expressions and
+evaluate them on the fly.  We will handle this by defining anonymous nullary
+(zero argument) functions for them:</p>
+
+<div class="doc_code">
+<pre>
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+</pre>
+</div>
+
+<p>Now that we have all the pieces, let's build a little driver that will let us
+actually <em>execute</em> this code we've built!</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="driver">The Driver</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The driver for this simply invokes all of the parsing pieces with a top-level
+dispatch loop.  There isn't much interesting here, so I'll just include the
+top-level loop.  See <a href="#code">below</a> for full code in the "Top-Level
+Parsing" section.</p>
+
+<div class="doc_code">
+<pre>
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready&gt; ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+</pre>
+</div>
+
+<p>The most interesting part of this is that we ignore top-level semicolons.
+Why is this, you ask?  The basic reason is that if you type "4 + 5" at the
+command line, the parser doesn't know whether that is the end of what you will type
+or not.  For example, on the next line you could type "def foo..." in which case
+4+5 is the end of a top-level expression.  Alternatively you could type "* 6",
+which would continue the expression.  Having top-level semicolons allows you to
+type "4+5;", and the parser will know you are done.</p> 
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="conclusions">Conclusions</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>With just under 400 lines of commented code (240 lines of non-comment, 
+non-blank code), we fully defined our minimal language, including a lexer,
+parser, and AST builder.  With this done, the executable will validate 
+Kaleidoscope code and tell us if it is grammatically invalid.  For
+example, here is a sample interaction:</p>
+
+<div class="doc_code">
+<pre>
+$ <b>./a.out</b>
+ready&gt; <b>def foo(x y) x+foo(y, 4.0);</b>
+Parsed a function definition.
+ready&gt; <b>def foo(x y) x+y y;</b>
+Parsed a function definition.
+Parsed a top-level expr
+ready&gt; <b>def foo(x y) x+y );</b>
+Parsed a function definition.
+Error: unknown token when expecting an expression
+ready&gt; <b>extern sin(a);</b>
+ready&gt; Parsed an extern
+ready&gt; <b>^D</b>
+$ 
+</pre>
+</div>
+
+<p>There is a lot of room for extension here.  You can define new AST nodes,
+extend the language in many ways, etc.  In the <a href="LangImpl3.html">next
+installment</a>, we will describe how to generate LLVM Intermediate
+Representation (IR) from the AST.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Here is the complete code listing for this and the previous chapter.  
+Note that it is fully self-contained: you don't need LLVM or any external
+libraries at all for this.  (Besides the C and C++ standard libraries, of
+course.)  To build this, just compile with:</p>
+
+<div class="doc_code">
+<pre>
+   # Compile
+   g++ -g -O3 toy.cpp 
+   # Run
+   ./a.out 
+</pre>
+</div>
+
+<p>Here is the code:</p>
+
+<div class="doc_code">
+<pre>
+#include &lt;cstdio&gt;
+#include &lt;cstdlib&gt;
+#include &lt;string&gt;
+#include &lt;map&gt;
+#include &lt;vector&gt;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &amp;name) : Name(name) {}
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector&lt;ExprAST*&gt; Args;
+public:
+  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
+    : Callee(callee), Args(args) {}
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector&lt;std::string&gt; Args;
+public:
+  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args)
+    : Name(name), Args(args) {}
+  
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map&lt;char, int&gt; BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec &lt;= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector&lt;ExprAST*&gt; Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  }
+}
+
+/// binoprhs
+///   ::= ('+' primary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec &lt; ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the primary expression after the binary operator.
+    ExprAST *RHS = ParsePrimary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec &lt; NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= primary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParsePrimary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+static PrototypeAST *ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return ErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector&lt;std::string&gt; ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  return new PrototypeAST(FnName, ArgNames);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing
+//===----------------------------------------------------------------------===//
+
+static void HandleDefinition() {
+  if (ParseDefinition()) {
+    fprintf(stderr, "Parsed a function definition.\n");
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (ParseExtern()) {
+    fprintf(stderr, "Parsed an extern\n");
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (ParseTopLevelExpr()) {
+    fprintf(stderr, "Parsed a top-level expr\n");
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready&gt; ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['&lt;'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready&gt; ");
+  getNextToken();
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  return 0;
+}
+</pre>
+</div>
+<a href="LangImpl3.html">Next: Implementing Code Generation to LLVM IR</a>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/tutorial/LangImpl3.html b/final/docs/tutorial/LangImpl3.html
new file mode 100644
index 00000000000..fe0917227db
--- /dev/null
+++ b/final/docs/tutorial/LangImpl3.html
@@ -0,0 +1,1269 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Implementing code generation to LLVM IR</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Code generation to LLVM IR</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 3
+  <ol>
+    <li><a href="#intro">Chapter 3 Introduction</a></li>
+    <li><a href="#basics">Code Generation Setup</a></li>
+    <li><a href="#exprs">Expression Code Generation</a></li>
+    <li><a href="#funcs">Function Code Generation</a></li>
+    <li><a href="#driver">Driver Changes and Closing Thoughts</a></li>
+    <li><a href="#code">Full Code Listing</a></li>
+  </ol>
+</li>
+<li><a href="LangImpl4.html">Chapter 4</a>: Adding JIT and Optimizer 
+Support</li>
+</ul>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="intro">Chapter 3 Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to Chapter 3 of the "<a href="index.html">Implementing a language
+with LLVM</a>" tutorial.  This chapter shows you how to transform the <a 
+href="LangImpl2.html">Abstract Syntax Tree</a>, built in Chapter 2, into LLVM IR.
+This will teach you a little bit about how LLVM does things, as well as
+demonstrate how easy it is to use.  It's much more work to build a lexer and
+parser than it is to generate LLVM IR code. :)
+</p>
+
+<p><b>Please note</b>: the code in this chapter and later require LLVM 2.2 or
+later.  LLVM 2.1 and before will not work with it.  Also note that you need
+to use a version of this tutorial that matches your LLVM release: If you are
+using an official LLVM release, use the version of the documentation included
+with your release or on the <a href="http://llvm.org/releases/">llvm.org 
+releases page</a>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="basics">Code Generation Setup</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+In order to generate LLVM IR, we want some simple setup to get started.  First
+we define virtual code generation (codegen) methods in each AST class:</p>
+
+<div class="doc_code">
+<pre>
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  <b>virtual Value *Codegen() = 0;</b>
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+  <b>virtual Value *Codegen();</b>
+};
+...
+</pre>
+</div>
+
+<p>The Codegen() method says to emit IR for that AST node along with all the things it
+depends on, and they all return an LLVM Value object. 
+"Value" is the class used to represent a "<a 
+href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Static Single
+Assignment (SSA)</a> register" or "SSA value" in LLVM.  The most distinct aspect
+of SSA values is that their value is computed as the related instruction
+executes, and it does not get a new value until (and if) the instruction
+re-executes.  In other words, there is no way to "change" an SSA value.  For
+more information, please read up on <a 
+href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Static Single
+Assignment</a> - the concepts are really quite natural once you grok them.</p>
+
+<p>Note that instead of adding virtual methods to the ExprAST class hierarchy,
+it could also make sense to use a <a
+href="http://en.wikipedia.org/wiki/Visitor_pattern">visitor pattern</a> or some
+other way to model this.  Again, this tutorial won't dwell on good software
+engineering practices: for our purposes, adding a virtual method is
+simplest.</p>
+
+<p>The
+second thing we want is an "Error" method like we used for the parser, which will
+be used to report errors found during code generation (for example, use of an
+undeclared parameter):</p>
+
+<div class="doc_code">
+<pre>
+Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+static Module *TheModule;
+static IRBuilder&lt;&gt; Builder(getGlobalContext());
+static std::map&lt;std::string, Value*&gt; NamedValues;
+</pre>
+</div>
+
+<p>The static variables will be used during code generation.  <tt>TheModule</tt>
+is the LLVM construct that contains all of the functions and global variables in
+a chunk of code.  In many ways, it is the top-level structure that the LLVM IR
+uses to contain code.</p>
+
+<p>The <tt>Builder</tt> object is a helper object that makes it easy to generate
+LLVM instructions.  Instances of the <a 
+href="http://llvm.org/doxygen/IRBuilder_8h-source.html"><tt>IRBuilder</tt></a> 
+class template keep track of the current place to insert instructions and has
+methods to create new instructions.</p>
+
+<p>The <tt>NamedValues</tt> map keeps track of which values are defined in the
+current scope and what their LLVM representation is.  (In other words, it is a
+symbol table for the code).  In this form of Kaleidoscope, the only things that
+can be referenced are function parameters.  As such, function parameters will
+be in this map when generating code for their function body.</p>
+
+<p>
+With these basics in place, we can start talking about how to generate code for
+each expression.  Note that this assumes that the <tt>Builder</tt> has been set
+up to generate code <em>into</em> something.  For now, we'll assume that this
+has already been done, and we'll just use it to emit code.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="exprs">Expression Code Generation</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Generating LLVM code for expression nodes is very straightforward: less
+than 45 lines of commented code for all four of our expression nodes.  First
+we'll do numeric literals:</p>
+
+<div class="doc_code">
+<pre>
+Value *NumberExprAST::Codegen() {
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
+}
+</pre>
+</div>
+
+<p>In the LLVM IR, numeric constants are represented with the
+<tt>ConstantFP</tt> class, which holds the numeric value in an <tt>APFloat</tt>
+internally (<tt>APFloat</tt> has the capability of holding floating point
+constants of <em>A</em>rbitrary <em>P</em>recision).  This code basically just
+creates and returns a <tt>ConstantFP</tt>.  Note that in the LLVM IR
+that constants are all uniqued together and shared.  For this reason, the API
+uses the "foo::get(...)" idiom instead of "new foo(..)" or "foo::Create(..)".</p>
+
+<div class="doc_code">
+<pre>
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  return V ? V : ErrorV("Unknown variable name");
+}
+</pre>
+</div>
+
+<p>References to variables are also quite simple using LLVM.  In the simple version
+of Kaleidoscope, we assume that the variable has already been emitted somewhere
+and its value is available.  In practice, the only values that can be in the
+<tt>NamedValues</tt> map are function arguments.  This
+code simply checks to see that the specified name is in the map (if not, an 
+unknown variable is being referenced) and returns the value for it.  In future
+chapters, we'll add support for <a href="LangImpl5.html#for">loop induction 
+variables</a> in the symbol table, and for <a 
+href="LangImpl7.html#localvars">local variables</a>.</p>
+
+<div class="doc_code">
+<pre>
+Value *BinaryExprAST::Codegen() {
+  Value *L = LHS-&gt;Codegen();
+  Value *R = RHS-&gt;Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateFAdd(L, R, "addtmp");
+  case '-': return Builder.CreateFSub(L, R, "subtmp");
+  case '*': return Builder.CreateFMul(L, R, "multmp");
+  case '&lt;':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  default: return ErrorV("invalid binary operator");
+  }
+}
+</pre>
+</div>
+
+<p>Binary operators start to get more interesting.  The basic idea here is that
+we recursively emit code for the left-hand side of the expression, then the 
+right-hand side, then we compute the result of the binary expression.  In this
+code, we do a simple switch on the opcode to create the right LLVM instruction.
+</p>
+
+<p>In the example above, the LLVM builder class is starting to show its value.  
+IRBuilder knows where to insert the newly created instruction, all you have to
+do is specify what instruction to create (e.g. with <tt>CreateFAdd</tt>), which
+operands to use (<tt>L</tt> and <tt>R</tt> here) and optionally provide a name
+for the generated instruction.</p>
+
+<p>One nice thing about LLVM is that the name is just a hint.  For instance, if
+the code above emits multiple "addtmp" variables, LLVM will automatically
+provide each one with an increasing, unique numeric suffix.  Local value names
+for instructions are purely optional, but it makes it much easier to read the
+IR dumps.</p>
+
+<p><a href="../LangRef.html#instref">LLVM instructions</a> are constrained by
+strict rules: for example, the Left and Right operators of
+an <a href="../LangRef.html#i_add">add instruction</a> must have the same
+type, and the result type of the add must match the operand types.  Because
+all values in Kaleidoscope are doubles, this makes for very simple code for add,
+sub and mul.</p>
+
+<p>On the other hand, LLVM specifies that the <a 
+href="../LangRef.html#i_fcmp">fcmp instruction</a> always returns an 'i1' value
+(a one bit integer).  The problem with this is that Kaleidoscope wants the value to be a 0.0 or 1.0 value.  In order to get these semantics, we combine the fcmp instruction with
+a <a href="../LangRef.html#i_uitofp">uitofp instruction</a>.  This instruction
+converts its input integer into a floating point value by treating the input
+as an unsigned value.  In contrast, if we used the <a 
+href="../LangRef.html#i_sitofp">sitofp instruction</a>, the Kaleidoscope '&lt;'
+operator would return 0.0 and -1.0, depending on the input value.</p>
+
+<div class="doc_code">
+<pre>
+Value *CallExprAST::Codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule-&gt;getFunction(Callee);
+  if (CalleeF == 0)
+    return ErrorV("Unknown function referenced");
+  
+  // If argument mismatch error.
+  if (CalleeF-&gt;arg_size() != Args.size())
+    return ErrorV("Incorrect # arguments passed");
+
+  std::vector&lt;Value*&gt; ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]-&gt;Codegen());
+    if (ArgsV.back() == 0) return 0;
+  }
+  
+  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+}
+</pre>
+</div>
+
+<p>Code generation for function calls is quite straightforward with LLVM.  The
+code above initially does a function name lookup in the LLVM Module's symbol
+table.  Recall that the LLVM Module is the container that holds all of the
+functions we are JIT'ing.  By giving each function the same name as what the
+user specifies, we can use the LLVM symbol table to resolve function names for
+us.</p>
+
+<p>Once we have the function to call, we recursively codegen each argument that
+is to be passed in, and create an LLVM <a href="../LangRef.html#i_call">call
+instruction</a>.  Note that LLVM uses the native C calling conventions by
+default, allowing these calls to also call into standard library functions like
+"sin" and "cos", with no additional effort.</p>
+
+<p>This wraps up our handling of the four basic expressions that we have so far
+in Kaleidoscope.  Feel free to go in and add some more.  For example, by 
+browsing the <a href="../LangRef.html">LLVM language reference</a> you'll find
+several other interesting instructions that are really easy to plug into our
+basic framework.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="funcs">Function Code Generation</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Code generation for prototypes and functions must handle a number of
+details, which make their code less beautiful than expression code
+generation, but allows us to  illustrate some important points.  First, lets
+talk about code generation for prototypes: they are used both for function 
+bodies and external function declarations.  The code starts with:</p>
+
+<div class="doc_code">
+<pre>
+Function *PrototypeAST::Codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector&lt;const Type*&gt; Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
+  
+  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+</pre>
+</div>
+
+<p>This code packs a lot of power into a few lines.  Note first that this 
+function returns a "Function*" instead of a "Value*".  Because a "prototype"
+really talks about the external interface for a function (not the value computed
+by an expression), it makes sense for it to return the LLVM Function it
+corresponds to when codegen'd.</p>
+
+<p>The call to <tt>FunctionType::get</tt> creates
+the <tt>FunctionType</tt> that should be used for a given Prototype.  Since all
+function arguments in Kaleidoscope are of type double, the first line creates
+a vector of "N" LLVM double types.  It then uses the <tt>Functiontype::get</tt>
+method to create a function type that takes "N" doubles as arguments, returns
+one double as a result, and that is not vararg (the false parameter indicates
+this).  Note that Types in LLVM are uniqued just like Constants are, so you
+don't "new" a type, you "get" it.</p>
+
+<p>The final line above actually creates the function that the prototype will
+correspond to.  This indicates the type, linkage and name to use, as well as which
+module to insert into.  "<a href="../LangRef.html#linkage">external linkage</a>"
+means that the function may be defined outside the current module and/or that it
+is callable by functions outside the module.  The Name passed in is the name the
+user specified: since "<tt>TheModule</tt>" is specified, this name is registered
+in "<tt>TheModule</tt>"s symbol table, which is used by the function call code
+above.</p>
+
+<div class="doc_code">
+<pre>
+  // If F conflicted, there was already something named 'Name'.  If it has a
+  // body, don't allow redefinition or reextern.
+  if (F-&gt;getName() != Name) {
+    // Delete the one we just made and get the existing one.
+    F-&gt;eraseFromParent();
+    F = TheModule-&gt;getFunction(Name);
+</pre>
+</div>
+
+<p>The Module symbol table works just like the Function symbol table when it
+comes to name conflicts: if a new function is created with a name that was previously
+added to the symbol table, the new function will get implicitly renamed when added to the
+Module.  The code above exploits this fact to determine if there was a previous
+definition of this function.</p>
+
+<p>In Kaleidoscope, I choose to allow redefinitions of functions in two cases:
+first, we want to allow 'extern'ing a function more than once, as long as the
+prototypes for the externs match (since all arguments have the same type, we
+just have to check that the number of arguments match).  Second, we want to
+allow 'extern'ing a function and then defining a body for it.  This is useful
+when defining mutually recursive functions.</p>
+
+<p>In order to implement this, the code above first checks to see if there is
+a collision on the name of the function.  If so, it deletes the function we just
+created (by calling <tt>eraseFromParent</tt>) and then calling 
+<tt>getFunction</tt> to get the existing function with the specified name.  Note
+that many APIs in LLVM have "erase" forms and "remove" forms.  The "remove" form
+unlinks the object from its parent (e.g. a Function from a Module) and returns
+it.  The "erase" form unlinks the object and then deletes it.</p>
+   
+<div class="doc_code">
+<pre>
+    // If F already has a body, reject this.
+    if (!F-&gt;empty()) {
+      ErrorF("redefinition of function");
+      return 0;
+    }
+    
+    // If F took a different number of args, reject.
+    if (F-&gt;arg_size() != Args.size()) {
+      ErrorF("redefinition of function with different # args");
+      return 0;
+    }
+  }
+</pre>
+</div>
+
+<p>In order to verify the logic above, we first check to see if the pre-existing
+function is "empty".  In this case, empty means that it has no basic blocks in
+it, which means it has no body.  If it has no body, it is a forward 
+declaration.  Since we don't allow anything after a full definition of the
+function, the code rejects this case.  If the previous reference to a function
+was an 'extern', we simply verify that the number of arguments for that
+definition and this one match up.  If not, we emit an error.</p>
+
+<div class="doc_code">
+<pre>
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
+       ++AI, ++Idx) {
+    AI-&gt;setName(Args[Idx]);
+    
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = AI;
+  }
+  return F;
+}
+</pre>
+</div>
+
+<p>The last bit of code for prototypes loops over all of the arguments in the
+function, setting the name of the LLVM Argument objects to match, and registering
+the arguments in the <tt>NamedValues</tt> map for future use by the
+<tt>VariableExprAST</tt> AST node.  Once this is set up, it returns the Function
+object to the caller.  Note that we don't check for conflicting 
+argument names here (e.g. "extern foo(a b a)").  Doing so would be very
+straight-forward with the mechanics we have already used above.</p>
+
+<div class="doc_code">
+<pre>
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto-&gt;Codegen();
+  if (TheFunction == 0)
+    return 0;
+</pre>
+</div>
+
+<p>Code generation for function definitions starts out simply enough: we just
+codegen the prototype (Proto) and verify that it is ok.  We then clear out the
+<tt>NamedValues</tt> map to make sure that there isn't anything in it from the
+last function we compiled.  Code generation of the prototype ensures that there
+is an LLVM Function object that is ready to go for us.</p>
+
+<div class="doc_code">
+<pre>
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  if (Value *RetVal = Body-&gt;Codegen()) {
+</pre>
+</div>
+
+<p>Now we get to the point where the <tt>Builder</tt> is set up.  The first
+line creates a new <a href="http://en.wikipedia.org/wiki/Basic_block">basic
+block</a> (named "entry"), which is inserted into <tt>TheFunction</tt>.  The
+second line then tells the builder that new instructions should be inserted into
+the end of the new basic block.  Basic blocks in LLVM are an important part
+of functions that define the <a 
+href="http://en.wikipedia.org/wiki/Control_flow_graph">Control Flow Graph</a>.
+Since we don't have any control flow, our functions will only contain one 
+block at this point.  We'll fix this in <a href="LangImpl5.html">Chapter 5</a> :).</p>
+
+<div class="doc_code">
+<pre>
+  if (Value *RetVal = Body-&gt;Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    return TheFunction;
+  }
+</pre>
+</div>
+
+<p>Once the insertion point is set up, we call the <tt>CodeGen()</tt> method for
+the root expression of the function.  If no error happens, this emits code to
+compute the expression into the entry block and returns the value that was
+computed.  Assuming no error, we then create an LLVM <a 
+href="../LangRef.html#i_ret">ret instruction</a>, which completes the function.
+Once the function is built, we call <tt>verifyFunction</tt>, which
+is provided by LLVM.  This function does a variety of consistency checks on the
+generated code, to determine if our compiler is doing everything right.  Using
+this is important: it can catch a lot of bugs.  Once the function is finished
+and validated, we return it.</p>
+  
+<div class="doc_code">
+<pre>
+  // Error reading body, remove function.
+  TheFunction-&gt;eraseFromParent();
+  return 0;
+}
+</pre>
+</div>
+
+<p>The only piece left here is handling of the error case.  For simplicity, we
+handle this by merely deleting the function we produced with the 
+<tt>eraseFromParent</tt> method.  This allows the user to redefine a function
+that they incorrectly typed in before: if we didn't delete it, it would live in
+the symbol table, with a body, preventing future redefinition.</p>
+
+<p>This code does have a bug, though.  Since the <tt>PrototypeAST::Codegen</tt>
+can return a previously defined forward declaration, our code can actually delete
+a forward declaration.  There are a number of ways to fix this bug, see what you
+can come up with!  Here is a testcase:</p>
+
+<div class="doc_code">
+<pre>
+extern foo(a b);     # ok, defines foo.
+def foo(a b) c;      # error, 'c' is invalid.
+def bar() foo(1, 2); # error, unknown function "foo"
+</pre>
+</div>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="driver">Driver Changes and 
+Closing Thoughts</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+For now, code generation to LLVM doesn't really get us much, except that we can
+look at the pretty IR calls.  The sample code inserts calls to Codegen into the
+"<tt>HandleDefinition</tt>", "<tt>HandleExtern</tt>" etc functions, and then
+dumps out the LLVM IR.  This gives a nice way to look at the LLVM IR for simple
+functions.  For example:
+</p>
+
+<div class="doc_code">
+<pre>
+ready> <b>4+5</b>;
+Read top-level expression:
+define double @""() {
+entry:
+        ret double 9.000000e+00
+}
+</pre>
+</div>
+
+<p>Note how the parser turns the top-level expression into anonymous functions
+for us.  This will be handy when we add <a href="LangImpl4.html#jit">JIT 
+support</a> in the next chapter.  Also note that the code is very literally
+transcribed, no optimizations are being performed except simple constant
+folding done by IRBuilder.  We will 
+<a href="LangImpl4.html#trivialconstfold">add optimizations</a> explicitly in
+the next chapter.</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>def foo(a b) a*a + 2*a*b + b*b;</b>
+Read function definition:
+define double @foo(double %a, double %b) {
+entry:
+        %multmp = fmul double %a, %a
+        %multmp1 = fmul double 2.000000e+00, %a
+        %multmp2 = fmul double %multmp1, %b
+        %addtmp = fadd double %multmp, %multmp2
+        %multmp3 = fmul double %b, %b
+        %addtmp4 = fadd double %addtmp, %multmp3
+        ret double %addtmp4
+}
+</pre>
+</div>
+
+<p>This shows some simple arithmetic. Notice the striking similarity to the
+LLVM builder calls that we use to create the instructions.</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>def bar(a) foo(a, 4.0) + bar(31337);</b>
+Read function definition:
+define double @bar(double %a) {
+entry:
+        %calltmp = call double @foo(double %a, double 4.000000e+00)
+        %calltmp1 = call double @bar(double 3.133700e+04)
+        %addtmp = fadd double %calltmp, %calltmp1
+        ret double %addtmp
+}
+</pre>
+</div>
+
+<p>This shows some function calls.  Note that this function will take a long
+time to execute if you call it.  In the future we'll add conditional control 
+flow to actually make recursion useful :).</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>extern cos(x);</b>
+Read extern: 
+declare double @cos(double)
+
+ready&gt; <b>cos(1.234);</b>
+Read top-level expression:
+define double @""() {
+entry:
+        %calltmp = call double @cos(double 1.234000e+00)
+        ret double %calltmp
+}
+</pre>
+</div>
+
+<p>This shows an extern for the libm "cos" function, and a call to it.</p>
+
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>^D</b>
+; ModuleID = 'my cool jit'
+
+define double @""() {
+entry:
+        %addtmp = fadd double 4.000000e+00, 5.000000e+00
+        ret double %addtmp
+}
+
+define double @foo(double %a, double %b) {
+entry:
+        %multmp = fmul double %a, %a
+        %multmp1 = fmul double 2.000000e+00, %a
+        %multmp2 = fmul double %multmp1, %b
+        %addtmp = fadd double %multmp, %multmp2
+        %multmp3 = fmul double %b, %b
+        %addtmp4 = fadd double %addtmp, %multmp3
+        ret double %addtmp4
+}
+
+define double @bar(double %a) {
+entry:
+        %calltmp = call double @foo(double %a, double 4.000000e+00)
+        %calltmp1 = call double @bar(double 3.133700e+04)
+        %addtmp = fadd double %calltmp, %calltmp1
+        ret double %addtmp
+}
+
+declare double @cos(double)
+
+define double @""() {
+entry:
+        %calltmp = call double @cos(double 1.234000e+00)
+        ret double %calltmp
+}
+</pre>
+</div>
+
+<p>When you quit the current demo, it dumps out the IR for the entire module
+generated.  Here you can see the big picture with all the functions referencing
+each other.</p>
+
+<p>This wraps up the third chapter of the Kaleidoscope tutorial.  Up next, we'll
+describe how to <a href="LangImpl4.html">add JIT codegen and optimizer
+support</a> to this so we can actually start running code!</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Here is the complete code listing for our running example, enhanced with the
+LLVM code generator.    Because this uses the LLVM libraries, we need to link
+them in.  To do this, we use the <a 
+href="http://llvm.org/cmds/llvm-config.html">llvm-config</a> tool to inform
+our makefile/command line about which options to use:</p>
+
+<div class="doc_code">
+<pre>
+   # Compile
+   g++ -g -O3 toy.cpp `llvm-config --cppflags --ldflags --libs core` -o toy
+   # Run
+   ./toy
+</pre>
+</div>
+
+<p>Here is the code:</p>
+
+<div class="doc_code">
+<pre>
+// To build this:
+// See example below.
+
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Support/IRBuilder.h"
+#include &lt;cstdio&gt;
+#include &lt;string&gt;
+#include &lt;map&gt;
+#include &lt;vector&gt;
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  virtual Value *Codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+  virtual Value *Codegen();
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &amp;name) : Name(name) {}
+  virtual Value *Codegen();
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+  virtual Value *Codegen();
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector&lt;ExprAST*&gt; Args;
+public:
+  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
+    : Callee(callee), Args(args) {}
+  virtual Value *Codegen();
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector&lt;std::string&gt; Args;
+public:
+  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args)
+    : Name(name), Args(args) {}
+  
+  Function *Codegen();
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+  Function *Codegen();
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map&lt;char, int&gt; BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec &lt;= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector&lt;ExprAST*&gt; Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  }
+}
+
+/// binoprhs
+///   ::= ('+' primary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec &lt; ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the primary expression after the binary operator.
+    ExprAST *RHS = ParsePrimary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec &lt; NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= primary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParsePrimary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+static PrototypeAST *ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return ErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector&lt;std::string&gt; ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  return new PrototypeAST(FnName, ArgNames);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static Module *TheModule;
+static IRBuilder&lt;&gt; Builder(getGlobalContext());
+static std::map&lt;std::string, Value*&gt; NamedValues;
+
+Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+Value *NumberExprAST::Codegen() {
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
+}
+
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  return V ? V : ErrorV("Unknown variable name");
+}
+
+Value *BinaryExprAST::Codegen() {
+  Value *L = LHS-&gt;Codegen();
+  Value *R = RHS-&gt;Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateFAdd(L, R, "addtmp");
+  case '-': return Builder.CreateFSub(L, R, "subtmp");
+  case '*': return Builder.CreateFMul(L, R, "multmp");
+  case '&lt;':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  default: return ErrorV("invalid binary operator");
+  }
+}
+
+Value *CallExprAST::Codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule-&gt;getFunction(Callee);
+  if (CalleeF == 0)
+    return ErrorV("Unknown function referenced");
+  
+  // If argument mismatch error.
+  if (CalleeF-&gt;arg_size() != Args.size())
+    return ErrorV("Incorrect # arguments passed");
+
+  std::vector&lt;Value*&gt; ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]-&gt;Codegen());
+    if (ArgsV.back() == 0) return 0;
+  }
+  
+  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+}
+
+Function *PrototypeAST::Codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector&lt;const Type*&gt; Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
+  
+  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+  
+  // If F conflicted, there was already something named 'Name'.  If it has a
+  // body, don't allow redefinition or reextern.
+  if (F-&gt;getName() != Name) {
+    // Delete the one we just made and get the existing one.
+    F-&gt;eraseFromParent();
+    F = TheModule-&gt;getFunction(Name);
+    
+    // If F already has a body, reject this.
+    if (!F-&gt;empty()) {
+      ErrorF("redefinition of function");
+      return 0;
+    }
+    
+    // If F took a different number of args, reject.
+    if (F-&gt;arg_size() != Args.size()) {
+      ErrorF("redefinition of function with different # args");
+      return 0;
+    }
+  }
+  
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
+       ++AI, ++Idx) {
+    AI-&gt;setName(Args[Idx]);
+    
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = AI;
+  }
+  
+  return F;
+}
+
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto-&gt;Codegen();
+  if (TheFunction == 0)
+    return 0;
+  
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  if (Value *RetVal = Body-&gt;Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    return TheFunction;
+  }
+  
+  // Error reading body, remove function.
+  TheFunction-&gt;eraseFromParent();
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static void HandleDefinition() {
+  if (FunctionAST *F = ParseDefinition()) {
+    if (Function *LF = F-&gt;Codegen()) {
+      fprintf(stderr, "Read function definition:");
+      LF-&gt;dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (PrototypeAST *P = ParseExtern()) {
+    if (Function *F = P-&gt;Codegen()) {
+      fprintf(stderr, "Read extern: ");
+      F-&gt;dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (FunctionAST *F = ParseTopLevelExpr()) {
+    if (Function *LF = F-&gt;Codegen()) {
+      fprintf(stderr, "Read top-level expression:");
+      LF-&gt;dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready&gt; ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" 
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  LLVMContext &amp;Context = getGlobalContext();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['&lt;'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready&gt; ");
+  getNextToken();
+
+  // Make the module, which holds all the code.
+  TheModule = new Module("my cool jit", Context);
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  // Print out all of the generated code.
+  TheModule-&gt;dump();
+
+  return 0;
+}
+</pre>
+</div>
+<a href="LangImpl4.html">Next: Adding JIT and Optimizer Support</a>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/tutorial/LangImpl4.html b/final/docs/tutorial/LangImpl4.html
new file mode 100644
index 00000000000..3eb5be4d14b
--- /dev/null
+++ b/final/docs/tutorial/LangImpl4.html
@@ -0,0 +1,1137 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Adding JIT and Optimizer Support</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Adding JIT and Optimizer Support</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 4
+  <ol>
+    <li><a href="#intro">Chapter 4 Introduction</a></li>
+    <li><a href="#trivialconstfold">Trivial Constant Folding</a></li>
+    <li><a href="#optimizerpasses">LLVM Optimization Passes</a></li>
+    <li><a href="#jit">Adding a JIT Compiler</a></li>
+    <li><a href="#code">Full Code Listing</a></li>
+  </ol>
+</li>
+<li><a href="LangImpl5.html">Chapter 5</a>: Extending the Language: Control 
+Flow</li>
+</ul>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="intro">Chapter 4 Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to Chapter 4 of the "<a href="index.html">Implementing a language
+with LLVM</a>" tutorial.  Chapters 1-3 described the implementation of a simple
+language and added support for generating LLVM IR.  This chapter describes
+two new techniques: adding optimizer support to your language, and adding JIT
+compiler support.  These additions will demonstrate how to get nice, efficient code 
+for the Kaleidoscope language.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="trivialconstfold">Trivial Constant
+Folding</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Our demonstration for Chapter 3 is elegant and easy to extend.  Unfortunately,
+it does not produce wonderful code.  The IRBuilder, however, does give us
+obvious optimizations when compiling simple code:</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>def test(x) 1+2+x;</b>
+Read function definition:
+define double @test(double %x) {
+entry:
+        %addtmp = fadd double 3.000000e+00, %x
+        ret double %addtmp
+}
+</pre>
+</div>
+
+<p>This code is not a literal transcription of the AST built by parsing the 
+input. That would be:
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>def test(x) 1+2+x;</b>
+Read function definition:
+define double @test(double %x) {
+entry:
+        %addtmp = fadd double 2.000000e+00, 1.000000e+00
+        %addtmp1 = fadd double %addtmp, %x
+        ret double %addtmp1
+}
+</pre>
+</div>
+
+<p>Constant folding, as seen above, in particular, is a very common and very
+important optimization: so much so that many language implementors implement
+constant folding support in their AST representation.</p>
+
+<p>With LLVM, you don't need this support in the AST.  Since all calls to build 
+LLVM IR go through the LLVM IR builder, the builder itself checked to see if 
+there was a constant folding opportunity when you call it.  If so, it just does 
+the constant fold and return the constant instead of creating an instruction.
+
+<p>Well, that was easy :).  In practice, we recommend always using
+<tt>IRBuilder</tt> when generating code like this.  It has no
+"syntactic overhead" for its use (you don't have to uglify your compiler with
+constant checks everywhere) and it can dramatically reduce the amount of
+LLVM IR that is generated in some cases (particular for languages with a macro
+preprocessor or that use a lot of constants).</p>
+
+<p>On the other hand, the <tt>IRBuilder</tt> is limited by the fact
+that it does all of its analysis inline with the code as it is built.  If you
+take a slightly more complex example:</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>def test(x) (1+2+x)*(x+(1+2));</b>
+ready> Read function definition:
+define double @test(double %x) {
+entry:
+        %addtmp = fadd double 3.000000e+00, %x
+        %addtmp1 = fadd double %x, 3.000000e+00
+        %multmp = fmul double %addtmp, %addtmp1
+        ret double %multmp
+}
+</pre>
+</div>
+
+<p>In this case, the LHS and RHS of the multiplication are the same value.  We'd
+really like to see this generate "<tt>tmp = x+3; result = tmp*tmp;</tt>" instead
+of computing "<tt>x+3</tt>" twice.</p>
+
+<p>Unfortunately, no amount of local analysis will be able to detect and correct
+this.  This requires two transformations: reassociation of expressions (to 
+make the add's lexically identical) and Common Subexpression Elimination (CSE)
+to  delete the redundant add instruction.  Fortunately, LLVM provides a broad
+range of optimizations that you can use, in the form of "passes".</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="optimizerpasses">LLVM Optimization
+ Passes</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>LLVM provides many optimization passes, which do many different sorts of
+things and have different tradeoffs.  Unlike other systems, LLVM doesn't hold
+to the mistaken notion that one set of optimizations is right for all languages
+and for all situations.  LLVM allows a compiler implementor to make complete
+decisions about what optimizations to use, in which order, and in what
+situation.</p>
+
+<p>As a concrete example, LLVM supports both "whole module" passes, which look
+across as large of body of code as they can (often a whole file, but if run 
+at link time, this can be a substantial portion of the whole program).  It also
+supports and includes "per-function" passes which just operate on a single
+function at a time, without looking at other functions.  For more information
+on passes and how they are run, see the <a href="../WritingAnLLVMPass.html">How
+to Write a Pass</a> document and the <a href="../Passes.html">List of LLVM 
+Passes</a>.</p>
+
+<p>For Kaleidoscope, we are currently generating functions on the fly, one at
+a time, as the user types them in.  We aren't shooting for the ultimate
+optimization experience in this setting, but we also want to catch the easy and
+quick stuff where possible.  As such, we will choose to run a few per-function
+optimizations as the user types the function in.  If we wanted to make a "static
+Kaleidoscope compiler", we would use exactly the code we have now, except that
+we would defer running the optimizer until the entire file has been parsed.</p>
+
+<p>In order to get per-function optimizations going, we need to set up a
+<a href="../WritingAnLLVMPass.html#passmanager">FunctionPassManager</a> to hold and
+organize the LLVM optimizations that we want to run.  Once we have that, we can
+add a set of optimizations to run.  The code looks like this:</p>
+
+<div class="doc_code">
+<pre>
+  FunctionPassManager OurFPM(TheModule);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &amp;OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+</pre>
+</div>
+
+<p>This code defines a <tt>FunctionPassManager</tt>, "<tt>OurFPM</tt>".  It
+requires a pointer to the <tt>Module</tt> to construct itself.  Once it is set
+up, we use a series of "add" calls to add a bunch of LLVM passes.  The first
+pass is basically boilerplate, it adds a pass so that later optimizations know
+how the data structures in the program are laid out.  The
+"<tt>TheExecutionEngine</tt>" variable is related to the JIT, which we will get
+to in the next section.</p>
+
+<p>In this case, we choose to add 4 optimization passes.  The passes we chose
+here are a pretty standard set of "cleanup" optimizations that are useful for
+a wide variety of code.  I won't delve into what they do but, believe me,
+they are a good starting place :).</p>
+
+<p>Once the PassManager is set up, we need to make use of it.  We do this by
+running it after our newly created function is constructed (in 
+<tt>FunctionAST::Codegen</tt>), but before it is returned to the client:</p>
+
+<div class="doc_code">
+<pre>
+  if (Value *RetVal = Body->Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    <b>// Optimize the function.
+    TheFPM-&gt;run(*TheFunction);</b>
+    
+    return TheFunction;
+  }
+</pre>
+</div>
+
+<p>As you can see, this is pretty straightforward.  The 
+<tt>FunctionPassManager</tt> optimizes and updates the LLVM Function* in place,
+improving (hopefully) its body.  With this in place, we can try our test above
+again:</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>def test(x) (1+2+x)*(x+(1+2));</b>
+ready> Read function definition:
+define double @test(double %x) {
+entry:
+        %addtmp = fadd double %x, 3.000000e+00
+        %multmp = fmul double %addtmp, %addtmp
+        ret double %multmp
+}
+</pre>
+</div>
+
+<p>As expected, we now get our nicely optimized code, saving a floating point
+add instruction from every execution of this function.</p>
+
+<p>LLVM provides a wide variety of optimizations that can be used in certain
+circumstances.  Some <a href="../Passes.html">documentation about the various 
+passes</a> is available, but it isn't very complete.  Another good source of
+ideas can come from looking at the passes that <tt>llvm-gcc</tt> or
+<tt>llvm-ld</tt> run to get started.  The "<tt>opt</tt>" tool allows you to 
+experiment with passes from the command line, so you can see if they do
+anything.</p>
+
+<p>Now that we have reasonable code coming out of our front-end, lets talk about
+executing it!</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="jit">Adding a JIT Compiler</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Code that is available in LLVM IR can have a wide variety of tools 
+applied to it.  For example, you can run optimizations on it (as we did above),
+you can dump it out in textual or binary forms, you can compile the code to an
+assembly file (.s) for some target, or you can JIT compile it.  The nice thing
+about the LLVM IR representation is that it is the "common currency" between
+many different parts of the compiler.
+</p>
+
+<p>In this section, we'll add JIT compiler support to our interpreter.  The
+basic idea that we want for Kaleidoscope is to have the user enter function
+bodies as they do now, but immediately evaluate the top-level expressions they
+type in.  For example, if they type in "1 + 2;", we should evaluate and print
+out 3.  If they define a function, they should be able to call it from the 
+command line.</p>
+
+<p>In order to do this, we first declare and initialize the JIT.  This is done
+by adding a global variable and a call in <tt>main</tt>:</p>
+
+<div class="doc_code">
+<pre>
+<b>static ExecutionEngine *TheExecutionEngine;</b>
+...
+int main() {
+  ..
+  <b>// Create the JIT.  This takes ownership of the module.
+  TheExecutionEngine = EngineBuilder(TheModule).create();</b>
+  ..
+}
+</pre>
+</div>
+
+<p>This creates an abstract "Execution Engine" which can be either a JIT
+compiler or the LLVM interpreter.  LLVM will automatically pick a JIT compiler
+for you if one is available for your platform, otherwise it will fall back to
+the interpreter.</p>
+
+<p>Once the <tt>ExecutionEngine</tt> is created, the JIT is ready to be used.
+There are a variety of APIs that are useful, but the simplest one is the
+"<tt>getPointerToFunction(F)</tt>" method.  This method JIT compiles the
+specified LLVM Function and returns a function pointer to the generated machine
+code.  In our case, this means that we can change the code that parses a
+top-level expression to look like this:</p>
+
+<div class="doc_code">
+<pre>
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (FunctionAST *F = ParseTopLevelExpr()) {
+    if (Function *LF = F-&gt;Codegen()) {
+      LF->dump();  // Dump the function for exposition purposes.
+    
+      <b>// JIT the function, returning a function pointer.
+      void *FPtr = TheExecutionEngine-&gt;getPointerToFunction(LF);
+      
+      // Cast it to the right type (takes no arguments, returns a double) so we
+      // can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      fprintf(stderr, "Evaluated to %f\n", FP());</b>
+    }
+</pre>
+</div>
+
+<p>Recall that we compile top-level expressions into a self-contained LLVM
+function that takes no arguments and returns the computed double.  Because the 
+LLVM JIT compiler matches the native platform ABI, this means that you can just
+cast the result pointer to a function pointer of that type and call it directly.
+This means, there is no difference between JIT compiled code and native machine
+code that is statically linked into your application.</p>
+
+<p>With just these two changes, lets see how Kaleidoscope works now!</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>4+5;</b>
+define double @""() {
+entry:
+        ret double 9.000000e+00
+}
+
+<em>Evaluated to 9.000000</em>
+</pre>
+</div>
+
+<p>Well this looks like it is basically working.  The dump of the function
+shows the "no argument function that always returns double" that we synthesize
+for each top-level expression that is typed in.  This demonstrates very basic
+functionality, but can we do more?</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>def testfunc(x y) x + y*2; </b> 
+Read function definition:
+define double @testfunc(double %x, double %y) {
+entry:
+        %multmp = fmul double %y, 2.000000e+00
+        %addtmp = fadd double %multmp, %x
+        ret double %addtmp
+}
+
+ready&gt; <b>testfunc(4, 10);</b>
+define double @""() {
+entry:
+        %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01)
+        ret double %calltmp
+}
+
+<em>Evaluated to 24.000000</em>
+</pre>
+</div>
+
+<p>This illustrates that we can now call user code, but there is something a bit
+subtle going on here.  Note that we only invoke the JIT on the anonymous
+functions that <em>call testfunc</em>, but we never invoked it
+on <em>testfunc</em> itself.  What actually happened here is that the JIT
+scanned for all non-JIT'd functions transitively called from the anonymous
+function and compiled all of them before returning
+from <tt>getPointerToFunction()</tt>.</p>
+
+<p>The JIT provides a number of other more advanced interfaces for things like
+freeing allocated machine code, rejit'ing functions to update them, etc.
+However, even with this simple code, we get some surprisingly powerful
+capabilities - check this out (I removed the dump of the anonymous functions,
+you should get the idea by now :) :</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>extern sin(x);</b>
+Read extern: 
+declare double @sin(double)
+
+ready&gt; <b>extern cos(x);</b>
+Read extern: 
+declare double @cos(double)
+
+ready&gt; <b>sin(1.0);</b>
+<em>Evaluated to 0.841471</em>
+
+ready&gt; <b>def foo(x) sin(x)*sin(x) + cos(x)*cos(x);</b>
+Read function definition:
+define double @foo(double %x) {
+entry:
+        %calltmp = call double @sin(double %x)
+        %multmp = fmul double %calltmp, %calltmp
+        %calltmp2 = call double @cos(double %x)
+        %multmp4 = fmul double %calltmp2, %calltmp2
+        %addtmp = fadd double %multmp, %multmp4
+        ret double %addtmp
+}
+
+ready&gt; <b>foo(4.0);</b>
+<em>Evaluated to 1.000000</em>
+</pre>
+</div>
+
+<p>Whoa, how does the JIT know about sin and cos?  The answer is surprisingly
+simple: in this
+example, the JIT started execution of a function and got to a function call.  It
+realized that the function was not yet JIT compiled and invoked the standard set
+of routines to resolve the function.  In this case, there is no body defined
+for the function, so the JIT ended up calling "<tt>dlsym("sin")</tt>" on the
+Kaleidoscope process itself.
+Since "<tt>sin</tt>" is defined within the JIT's address space, it simply
+patches up calls in the module to call the libm version of <tt>sin</tt>
+directly.</p>
+
+<p>The LLVM JIT provides a number of interfaces (look in the 
+<tt>ExecutionEngine.h</tt> file) for controlling how unknown functions get
+resolved.  It allows you to establish explicit mappings between IR objects and
+addresses (useful for LLVM global variables that you want to map to static
+tables, for example), allows you to dynamically decide on the fly based on the
+function name, and even allows you to have the JIT compile functions lazily the
+first time they're called.</p>
+
+<p>One interesting application of this is that we can now extend the language
+by writing arbitrary C++ code to implement operations.  For example, if we add:
+</p>
+
+<div class="doc_code">
+<pre>
+/// putchard - putchar that takes a double and returns 0.
+extern "C" 
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+</pre>
+</div>
+
+<p>Now we can produce simple output to the console by using things like:
+"<tt>extern putchard(x); putchard(120);</tt>", which prints a lowercase 'x' on
+the console (120 is the ASCII code for 'x').  Similar code could be used to 
+implement file I/O, console input, and many other capabilities in
+Kaleidoscope.</p>
+
+<p>This completes the JIT and optimizer chapter of the Kaleidoscope tutorial. At
+this point, we can compile a non-Turing-complete programming language, optimize
+and JIT compile it in a user-driven way.  Next up we'll look into <a 
+href="LangImpl5.html">extending the language with control flow constructs</a>,
+tackling some interesting LLVM IR issues along the way.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Here is the complete code listing for our running example, enhanced with the
+LLVM JIT and optimizer.  To build this example, use:
+</p>
+
+<div class="doc_code">
+<pre>
+   # Compile
+   g++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
+   # Run
+   ./toy
+</pre>
+</div>
+
+<p>
+If you are compiling this on Linux, make sure to add the "-rdynamic" option 
+as well.  This makes sure that the external functions are resolved properly 
+at runtime.</p>
+
+<p>Here is the code:</p>
+
+<div class="doc_code">
+<pre>
+#include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/IRBuilder.h"
+#include &lt;cstdio&gt;
+#include &lt;string&gt;
+#include &lt;map&gt;
+#include &lt;vector&gt;
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  virtual Value *Codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+  virtual Value *Codegen();
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &amp;name) : Name(name) {}
+  virtual Value *Codegen();
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+  virtual Value *Codegen();
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector&lt;ExprAST*&gt; Args;
+public:
+  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
+    : Callee(callee), Args(args) {}
+  virtual Value *Codegen();
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector&lt;std::string&gt; Args;
+public:
+  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args)
+    : Name(name), Args(args) {}
+  
+  Function *Codegen();
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+  Function *Codegen();
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map&lt;char, int&gt; BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec &lt;= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector&lt;ExprAST*&gt; Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  }
+}
+
+/// binoprhs
+///   ::= ('+' primary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec &lt; ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the primary expression after the binary operator.
+    ExprAST *RHS = ParsePrimary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec &lt; NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= primary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParsePrimary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+static PrototypeAST *ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return ErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector&lt;std::string&gt; ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  return new PrototypeAST(FnName, ArgNames);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static Module *TheModule;
+static IRBuilder&lt;&gt; Builder(getGlobalContext());
+static std::map&lt;std::string, Value*&gt; NamedValues;
+static FunctionPassManager *TheFPM;
+
+Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+Value *NumberExprAST::Codegen() {
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
+}
+
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  return V ? V : ErrorV("Unknown variable name");
+}
+
+Value *BinaryExprAST::Codegen() {
+  Value *L = LHS-&gt;Codegen();
+  Value *R = RHS-&gt;Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateFAdd(L, R, "addtmp");
+  case '-': return Builder.CreateFSub(L, R, "subtmp");
+  case '*': return Builder.CreateFMul(L, R, "multmp");
+  case '&lt;':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  default: return ErrorV("invalid binary operator");
+  }
+}
+
+Value *CallExprAST::Codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule-&gt;getFunction(Callee);
+  if (CalleeF == 0)
+    return ErrorV("Unknown function referenced");
+  
+  // If argument mismatch error.
+  if (CalleeF-&gt;arg_size() != Args.size())
+    return ErrorV("Incorrect # arguments passed");
+
+  std::vector&lt;Value*&gt; ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]-&gt;Codegen());
+    if (ArgsV.back() == 0) return 0;
+  }
+  
+  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+}
+
+Function *PrototypeAST::Codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector&lt;const Type*&gt; Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
+  
+  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+  
+  // If F conflicted, there was already something named 'Name'.  If it has a
+  // body, don't allow redefinition or reextern.
+  if (F-&gt;getName() != Name) {
+    // Delete the one we just made and get the existing one.
+    F-&gt;eraseFromParent();
+    F = TheModule-&gt;getFunction(Name);
+    
+    // If F already has a body, reject this.
+    if (!F-&gt;empty()) {
+      ErrorF("redefinition of function");
+      return 0;
+    }
+    
+    // If F took a different number of args, reject.
+    if (F-&gt;arg_size() != Args.size()) {
+      ErrorF("redefinition of function with different # args");
+      return 0;
+    }
+  }
+  
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
+       ++AI, ++Idx) {
+    AI-&gt;setName(Args[Idx]);
+    
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = AI;
+  }
+  
+  return F;
+}
+
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto-&gt;Codegen();
+  if (TheFunction == 0)
+    return 0;
+  
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  if (Value *RetVal = Body-&gt;Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    // Optimize the function.
+    TheFPM-&gt;run(*TheFunction);
+    
+    return TheFunction;
+  }
+  
+  // Error reading body, remove function.
+  TheFunction-&gt;eraseFromParent();
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static ExecutionEngine *TheExecutionEngine;
+
+static void HandleDefinition() {
+  if (FunctionAST *F = ParseDefinition()) {
+    if (Function *LF = F-&gt;Codegen()) {
+      fprintf(stderr, "Read function definition:");
+      LF-&gt;dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (PrototypeAST *P = ParseExtern()) {
+    if (Function *F = P-&gt;Codegen()) {
+      fprintf(stderr, "Read extern: ");
+      F-&gt;dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (FunctionAST *F = ParseTopLevelExpr()) {
+    if (Function *LF = F-&gt;Codegen()) {
+      // JIT the function, returning a function pointer.
+      void *FPtr = TheExecutionEngine-&gt;getPointerToFunction(LF);
+      
+      // Cast it to the right type (takes no arguments, returns a double) so we
+      // can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      fprintf(stderr, "Evaluated to %f\n", FP());
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready&gt; ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" 
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  LLVMContext &amp;Context = getGlobalContext();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['&lt;'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready&gt; ");
+  getNextToken();
+
+  // Make the module, which holds all the code.
+  TheModule = new Module("my cool jit", Context);
+
+  // Create the JIT.  This takes ownership of the module.
+  std::string ErrStr;
+  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+  if (!TheExecutionEngine) {
+    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
+    exit(1);
+  }
+
+  FunctionPassManager OurFPM(TheModule);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &amp;OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  TheFPM = 0;
+
+  // Print out all of the generated code.
+  TheModule-&gt;dump();
+
+  return 0;
+}
+</pre>
+</div>
+
+<a href="LangImpl5.html">Next: Extending the language: control flow</a>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/tutorial/LangImpl5-cfg.png b/final/docs/tutorial/LangImpl5-cfg.png
new file mode 100644
index 0000000000000000000000000000000000000000..cdba92ff6c5c95b142bd928971bcdd560117028c
GIT binary patch
literal 38586
zcmZ_0cR1F4_y%l7Rzi}!lI(1<N1+JG-XSYHAz4L62qC15GLjh~$;wKK%!bG)n?$L^
zdwPDq-+R3Oy!UZD$MMANzQ5nk=lWdNd7bBZePi_XPEu1aQxFgkP-|;x7!nW=7UI9-
z<V5(%&!g{u5fBh@xTve^Ypbhs>ic*)xwtzL5Cq&!eb?hgZ@|#}Wt>+>)Acc3HF;~8
zp``{(@`<Igi|mK}qt(TeALzS_r||VVi0c&;pXq81Roxq%Q`en$T;`c8V+8l6S}>=`
zr-mm=jD{WUvU|yM1HQCg-uLaro5qQ&R$@0Z$%+=zGVa}{_x-KuJx$yo_gdATlVPyn
zm|zc``p37w$O$|X4^rh1H(r{W{`!w+<z9$*=;@HT$JF=9!rs1~IjBlbuw6BEsM*Hv
zF~PP}PlZYmK}d~ZpGqAK$?3yvq`8!vuM#*2U*t_)2rGY>CQPeDq-s?%Kk=UQ?y!#T
z0R;&aGq*>VR{g@?ZUm_lujo$PalOMBFcO|$zm+%3`a5}FK~u!9WlsMPI(DTS#A}?=
z{o((5Rs(`)KL$q<kWaOL)sZC$6AL2u|LOiWi<+mWV&mP@(97;u7Ni53+D_<LNm+z0
z93~gFZK(9UaDmEG$fRv?kTi@_VK)5To?^1gAH=>>{<I=Kq~A<W(=b-dMR3L5TboY(
zY>SrK8;>7fcgE&x&RxFW*j!|NI-9g3{7A1c-*V8yOs!|b6qTeSCW8G&%AKblSlRI3
zO|nW~_dPTcB{lN)sMJ(Oi@eV8Mtg^v@=3w=KNtGkj&@%f(h(?&*?&?^jrS#kDmU+~
zZuuQ*TX(kQ_{gds8S!0)f5(|kANDY+RNGk|E18Ks{D%2NDFbcFvmkEHsEC%cTZFu4
zi~lvd8oMB3_IhcV`4SLN((nF9NRWA#iGYBUKwCrADB$km)eDvkBfoyV_Ge79CRHU-
z6*8zW;I)s>>nAzCT@*u6`O?&$PO5_;{(!E1g{|iKmMDs73RiteS9@a3{hFGONX`DF
zz0dynTvmBmf{;W~;NlC>O`)^PpL~|<3&(uD(;SbnD+H`MOVQ@qG;as+bzLJ%Oe10K
z?6+y&sblY{66z7BQXi0P6JvV29k4plf0xi*ubJ@K=1x7k?m$o@dz)C49aGeodiH(n
z3Ynr_texJLoU{|NB6Kkt-226psOs3sgLt*+NbKzF`isuZwBX}MV%FAtqU>(f-QY4y
zv|~!<tueniHMCR{o^BYp8sO-)WFt&WqDmgA7NgNx<u1%zrdQ}TIRDSYt2AY`q0;T;
zwUbW>6Q<O1_4yehJVr-{0xxoE(~S~6Tidx>XZyzOrg@{DKEpX9qxJQ<0meQ?c7@M;
zUaT?_624wux3qYLV-|+|jH2smvl_f!Rz0>ZA8!iXdH3*?u#k}XyRPpO-wSdLi?{y$
ztMwcgVT@yG5GkjZ8dB<yW51Z7GQYI?r7c~=%GK7ElYxSz!B8PU@{$-;{=l65`SYUI
z&$SGUeA7ebHRC3O2*l3tGf+gTwfHKsE6kI&iJ6xgb0<mqYksCP=Nq|3&Q@tEL|4<i
zXKmtpkm`V>b~5dU;n7QhEVPpTq*&*csnUpVLAx7cHSyotnjZ{!VseR-%vi9}8{6=q
z*|%|LU-Uw*3v-FiP_u**yMj5pIpxTCx#mY(&p%xiXr9nY77`J8arLi%Q^-Hp4p(j^
zQ{kMLZ;7~Mo@$CRvQP26+KtvW1_pM{&issVq(tG?<tD{O;u)tO8Xje!h|$P1E-BQ@
zo?lv8nwdGAq-A@)O0Q5tQqsZ9n3t~GL7cVIC3BB-p<yu<HT7Y-7%kq(Lmd0qdx=V0
zvs$WzG!5kB<eoV7QczN|9JAN(@u|Y1uJwqL(XiIJ4n3Nfm}qWpe)Z~AmW*dXVd2ik
zh|P_C9G`Ku$5);|@fdq%-5B)m&qDdVd-qyeTF%^4I4USeNkgM|`t;zO-@m`V#-9gD
z@E&(wG+vu%$yaq2u3}fE-M@eTR7bqTv14{jd<qJ|BV|_G+uPs2f5#8A#2s~1RGLdI
zYCnDYw79hN@87?Z39QA2#b4jv_q=f7^7d9>jE23lbKQlv?4yfzo}P$_&?GH}Lx<*O
zXS>|${&&qf#co=cD_@$wot2fP!96!zYEf?8X!|3|NI*(T>Hs+jBO{}knHihBU$fY0
zUOH`Fx_V2Ily3P;^9~m-9ATj7Okiuktvz_aYhp5zz;;}R7l-6s{>go#1S6xm(=FFd
zs`m@ju{*?y2nnsPue*=GXfzUN_qNRMZ?@tsKKH{tr@<ak!-#J`b4Sb$9~Tl5BH{F!
zpojDR+gP$hryIt0BlP4XhQ*ScoVLH$W)eA+`N+8i1SqoONR$a=6Hf8cl^U0fEO7lM
z`E-CHQar=`-2?62I2RF-K6b3d`z~*HPr9gb$kv^_q7DykoM`6Q;nocY@%N9-o_Kt!
z>o2!y>P|UwB_KfS;kzdn-ZCCMIR5O?!tz)HHXFyH)jN1|^~=Yn7eCgxjZ~iRB#)5w
znouz_JJeI9mfP3S@wIKAa_~yyub1}60(>sKzT-X7Ot|fJ7>Q%Qd1O@FRGpE)#?PO6
zr%s&}NcH#kH!L>dq<w7Dq@<w0I=P0|b+ueA-c0F>AL_iP5Ov3>{!XWy;WQ_uxpm%b
zxU0*#FGrOwhD36FdfK6Dvh#`rPLGJQuq6&6GZT~6LqiD(2|sJrUj+NtiQB|%1IdMY
z1X5qA-=O&-yNnP|>AtSU<)w%>-Apw6+LCXUd(OGe^uQFo3E?Os`DHIPia&%+`<s<5
zEG=30@BfZWkdwnnyB>BaZ{Qqr!`0oh%5NU|xU*f*i$7dtVsg@FWBwd#r)uBSdsZBb
zgQW=zgpZcGXFT4t%XY>!SGL^BI5gd`)4G;(+KYASobzTw*M$hlb(4~gEaF-d7Q?p8
z<N0<%v^lrrUlpHcwRz-O8QLlAZBxD>@^eMP+0}YFZPj(<h0#BjwJ|;|vG_>i&W$i7
z{#b5pI<<59qin1PZcfQ+8o1V&^QDC6U*pwI<`ona)IEJ#rSa-a`B?@{S`zog$cmOd
z!x?$}jn?J;&R(qPJu?&WE{1+o!DfX6E#LD`oH(&jMoW9Lcwo*sGY!8ybmQ8xpM17?
zskDqtX#dg3jfC@Ue+$j!Z1<|BmIogEX?d1`v{F|#q3uT>tLfe5ML`A%F1f#g&nYRF
zPMv#^u;OF!GGEF^F`0MBb)!HxOF&TYXzZWa{6rrgpPwt^J7br}H)i|QIcab4%GoxD
z$@zZW8sGWH(I)2N;(`NPAG#CL;J;*CYEF8=`#BL|rBKPhT*%J9U(wOgH*Zo!`+9U)
zP_@yLR9^fjo#rTW@dLyD{r$-9JFUUvw9c8^3*}8Okc2owHoxR2Dm}_tA@~^dXMHY$
zlu|%I;OElEBXlu2mG$-YKfb*c6%smq`gCyrv}c#B^n*A02`BQ+YFtx`i|6o5Vj`kH
zzg9%kdvdR5UdBJXSl{;c#z#kc`TM`=?#__$yqB7KKzE=sk)!F-LYi7`j!V$b#rHV7
z<s(QKe;mjEArtnC>$^4Conz1%s5UN9^j|a`8`l&@4jrbbXJ;j=v2Tw>Ml7>w+TQy8
zgOrq%F6QO%@Niojv9lT{?c!)<TT4s0qgko(-+?pASN|^6BG(EWJ=!8hMOQYMTUxQB
zXG{8E58+!$-M@eSl$qD)l#k?Jn2+ALcBZ-i>&IHpGg?|qvR*kgHBygPr4Afh4kj=l
z+WGaA{_%-<m9k|^PkI7bvLZ63s9gR>r_x1M<)W9%3IcOSz6fU)#K*^veflI~S*JpK
z-AI76<HE&@OA8BEHdpO#8SDFy-Oc|X<G^Y_s((JIJL1$n`D~BgOex#|@`Z89&^5As
z35ki9eSK4O9@czT^qG#1kMAwKA0i*Ry%iP~Mt?-_be8l*q&6g<&F#M{DJdxr4d)Kp
z1Xo`9wVbH*$9bSYw|sT^p5hfg-OTIH*#7>a;l5R1r`Wf!A>;Y^@vnv=O;HIGb0PDC
zBiFjkjE$d${QFZ>#9wJ@X>H9;OIzX4o&5gEg=-CrPWk!yJp42QsRBGa#71JizP_<}
z)#ev9H6!cm<;dN5wGA20p*Fg^-y~1UYSs}+r;Jq7H~6u(HX$Z3aWvW{C!U+2PbWu7
z>^oArrU8TQfS#TnwzzNr$RoPl-n3cOn=wX1KAR?+s?fNEtB|oxEQfhvBZ#UEfu$1I
z>CI|-plFrag~<62j+tRG!_mL|aU)nITWtbDicas$f>WnXVXbrJnB$}J_<4AEZs~`0
zs(Ka~3Cx}=HljGotP>S7;94Wp^U485dykHN;}rP{9f_&|L*1gAqV&G$*}A&Az#m^#
z;}jGW-rSWND6={~`z-TvN^<h^=g-*{gQizjL{qx=V5QG@+-z-ao$5^dH8()^#}#=L
zr|pnJfS0A^CxoBP&#}?b(Vf3dJJ+1lJS1Z@h@7aCdFkl@;)7++cTfQ<j4YhFD{HWF
z*&uFHamv(fDkuyXFH-Hx+?@76wLbsz%PaRXGBP3~NlzQGc6xq(E_dbUViM0uy?Y9B
z;^I|BMYCN=JUDr(s;Z$|-#b%|oOxVYiZ7sHSKv?58ZNgfA9C$|{Te7MUC8VmN{eP3
zqt%Px!0E1}gCdr;W<N(O&)ZjSeyDN#^5si-czEHN`(I;kP_;3jLN$fn&*Pt-rKN~W
zNJyZeq2W%7w!4G+3K((l;K7~mogDZ-&yljR`JoakD=XBv<@tH}tAEzVLwEko_UGRZ
z`7<{+Q267=kI?Pk-%3o%J6^tAUl<X^s>;g;_2+4x?@Z7=bEfI?%I9C>p&ZQ26gR{2
z_~TE|v$L1=JI{MW7zzCQ@m86ho*wyEwY<8f22i1S0wMf!sdn6dez5rQ<3*I!W}$5a
zJPydu*?t}o5fAJF7Z;bew>L_BS63ImsOWw9ORg)Q8xIPbzpQc(4E+5(@CT|g7hMbu
zt8CxXix#u2nFV_zOdErwRB4NgiwOw{$Af>w=jL*q9(&elz)Poyo67Qhp{=8%qorkv
zbXH!z@;XDJ{;RO5kCDJRC#M4BtvlX8!ev$s(sr%a%~vwBvTPds<L#JmKo}?;9US~N
z=7%h5J?fu5i}?QL-Mh7~y;+Wqj=xsML*HL6ef${LlWSZ;Nl6*{Z^I-~%^Tor<;7nS
zPEHkGy8R?c_wR?Ud{!(qj<dU?SBUub_4hBEYWT$1I8#3?+5fDLy+{k2boKAg_=JSv
zg$=}|*;DuTt&x;S=0>Kb&YqsiSAM!<+s>Xn8=I%WZJ?#ql_}+RBA$6>ZZ6~Q?u+%%
zA;4G9v1h5-+1aV7sdw%$(bMzLk`KCW+&+310XO&UTc@sVYG&p<w%PI3ZDKMSS$TN@
zhFHLcvV374o~gepP2~mGqoSg)^MyItfdK(j><Y4n4<F{`)!<7JHmzv>`0+SD|18SY
zKWw1}cT!^FTa+^*Vq#>j<5;){4;~yol@^RpMlhoFQIDd^khqZ0)U@-d-WLas2~d%h
zRVbxfjEYzryPUhs_o>c1-}l>_{{DWO(0@{GV!64w*!!~urwQ$JONK-&p1vL$y0kXk
z?OGG`_MReD8-MDnzP`Te*Gb68$UHqgIXLd4g8iBwqHGh(*84Uw(O2VUl38F!`NX``
zy3(FBVd{SHnzoS<LojLerl3g~IY|USH~@D7hjPf|%NU<})@Mb_-dpn}<wC;3CZ*<5
zUK7m-M*qc8?^m~vN?qllYEuZ_yv3`HNT9XCVHB}?MslDiaAa_BeX>0cIjo?dz%+Hz
z#nm+kK}xodeSPsgz4PnWuOB~tEF>kB%Bw9Q@g)6fZFRMzjLfgTTy=h#4=YVOpG!<c
zSv#eesTb)*MMXyzHb_)IHHD7%^pxzvg>^<Jvw@QflR->Tdr64uEnlDvOitSK@Z>A+
zYz+(y{FrFDcH!;4F;w}1!ZS!Lb6>yic}EgW#T3=Dt`xNHjJ49_PO=mkuX2n#kslKs
z?L1PJt<Qg3=OMr|(C*vUudh3h@rNV&owT+0Y8w3f`O~m?u)jaaEwRn}Q%{eeyu5kg
z0N28SXo6L$cmBEJRBf!vA;H6s&BQYvW5ZBPk7yh6rD#7iL_jBLIbbs!9f2m%XP~jg
z|5n5RvN9by6wh?b9_ixp@~7`VeyGnIs%PdH7O(yIfsBJrvuXQ}8v@(G>D|HxqUd9_
z3;p7y%Bm`RGwH*JHA{xf&CKu>SFT*SaN*nZ^fWg6JnIwY<UmCE0SZ!nXP}MyidUo+
zRFWOs-5;9@>FMeM<1dVqpLA1ioVt^iHjGZ=?AZawp0vp8*LjZ|xu!~AVy7h~E!`n@
zy41MsQZnzwix&+JmyA8+fA!(RhtW|cpPxB#($dltkwlW-RBg_gjB)K)9i}K3$;|Yi
z^gLA6$NKzw_m!ySdeH{p^AizRukS10x4VPvpRGr>(DE{dKK|y-z9X*xL|pf;ydtQU
zFS}t+`qI8q=V5GIoW(Og7xZN~6Nl-vjg1)t-dBYFC94ovPDx8E7+G)>Kg54oHsOM!
zW6M5UlCDc#iVoK}$sZc-?H3!0Z5I<da)ivM+3Vs@glSafpymB=N?AF%bEho!iAJ>0
z3_Zv^t0s`j%*_1MZG@&bJW7(#K_**I#&1?%tLBUW!-)2ykp(365^Z{9351D+v@|0*
z$(uKCqUXHW7)(?1iDerA5;V9~|EUl7;VfcT0Letsd&1}PT}KoXuI-_26+6w(fF|Pd
z=fDg9lNhDM#0cDYjAw@SQ$(WJ;8<vCYT7y=d#{XE@&v~fxL8<xbiF+G-|N@XN3!Lk
zb))sZ7qIoXF(P+u?HFJlypodo0X0#2{g*y~OS#QL{YN9epH(T?G{664d;3_!Wkh@6
zEuytQ8w(pDwJ1V0LDwygWLmDbW$jrb7N>gRIsPJ1dE1mP#eiW3rJj~t=hP{LimW7O
z5k(QbCG9Q_1{m7Mp=4=j=(DxyeaC!mZtlQ=13`^S5)uRb{iw4E0q3*`9m?&^o}Rg{
zgp8=It?gR#th$=PdB4XOS&<~In>*BAvR*Z|*zNt+YPnu7qxZAOcsvK}!g&N}*&9JH
zyvH$_H^zy<c&78E*y;R!T9TyX<jJY2^G;5CTJueX*u@>R)H5fhrhIp{{~`Am8wor#
zJVZ}_?GHVL+|@tTIuE(Y)k=-eG7Obk$OsCS0Fb)Wm}eGnC<G9&p5;qHA-%~NjjYgU
zop=X?&Jk^K<Z6C?a$YwtKPqNP=WlPaj~+cLpS^rPbh{gU;)HC(TV5m^Wo2b<?n(Sm
zE@Z0?;B?L}Ffj1j+xthziIl*K$fB4bpjD0_0OQ$?gZ7x)_!D<flp?YfsS~-QZR}(s
zdpvG;w89P>4S++UidG*7ydG&Pg-`cc$Uk{me~<AOiZ1VXPo3()p8-F-SZn~!l{|J#
zwZ7j^er)^S-x;64zdx6(EG>b0(4l>(^<0^scXe}PVP?)uPB!ekoU17s8|)rQD`Nee
zsU*|HymYv=HLtQ{eV(8vNJr-t0_6q`8--H2IR`Oikde_Kn6Gf=GRykptgOE+WE@C7
zRt^5b;^Ky-#)!FJKue%@lDi$Ot*t#gJaFNFO$G)AW6v(J$omzQln81X{Qb2eBPKT5
zaCrsU9V8HVf{>*THQ3nHw6uueV>n0H3uKeA#w!QE<^rjvrys0fM~3;_c;(vnp3ctB
zk=@t97$2b?Mg|7=Pj$x*A8sETYjhha!zsfBf2go)LjdAZTwPr15p0wPMK*u`?w4!?
zY|YEd!^+%O0%n;TxcKqu(x>_#QPeD^X}zyr$@|Ury-F3tRV8W7PETtjJE&*cg1!m<
zy>{o$oyEmPvnNjBCWL&+`T0|6!WPNN$;;zS_tVqS;O(Jd9mO6{vq%HdvdVfjkB^70
z&-Npi7Zo|ryh;Vv6h02(2nG7sv17d2?8p6I4-Wbv-9LQz5Ny>8-)|`6O|`YZ+T#wI
zlv`i@KB?*J`(vNdCc)72Wp5*X2EbV0*Lx`UPoDh9K7RSB=lEwF)+<;3uCA^ENG7r?
zet7kY;<@~8+{VYB6G+9<F5|Qy`rGZjjg5`RUtGO*?HU#+XnodT*{a_1Gw^J5G<jwL
zI7Zpa%a&whH$bw?Z~Va*xk_ekZEe;2enY1FiH|q$rVbtfxz^vm3%)F#y4NE$16?2Y
zn<cO##sPnOUzy{WJ;~<e(&D0@yw%t^o0zQ%Thfx4ceByIT?8udqoZ))qgr5Ky$Yw8
zn3&45{v9ncqJuz<V9%HvlFMl_3(&t~VG-GE?Cdn&Oi~=`-vvojP0h{inf4Is^ZT*?
z+{1aCSv#3KiQGOLeHGFgkWgjiv6sH#R`nGXeeLa}eD|!&$$n7WrZPXGJHQ-&qHusv
z`w$b8O02-*`2JAxPtLOBV>vF&lbTw|gRV6?58r_=Y;GnX(rsqCJ|(+mt2Vo*rwZu`
z^i@nu43aHg{Iet^hez2AJn2C{YZ?HWQni_ynldplamL3KxMb^{q}6b7DUONR3&z!s
z@|267UP5DIqgpO)Tzk%9dU`sJoSN?hw|4T(%uGi|ht~&Ig$^+)qbSa3zznc^;c7RW
zYnQzl$Vs%cI>o477xa0>pZL+q5xNWZE`F$1Ri{#wqjey@XmD&kC7fy=TLRh!Cz
zqM{<PW5>cZ4yaJ|UFsvAI9zV*qyAabuL#Yd4D%h2{#(bWuH|`g))jtU8TP)(L3;t`
z7@!gOm#8L4_-xftP8k^!UOF6BE|1SXABMfcd`LnhbBHB7`i|P}s9iSGs}tQkBu1rf
zpI|B^jeQ%oXYJ&gy!lszfySEjacq8WWUX~0aR6b^ep9h&U&g^Samv)azQt8lbOP?4
zo{3Hje<T?`-{PwfqLi!Czbj#RLt_AKDb-F9%P}emF|pa<QWgQmXOtAGCSp`6-3}Cy
z5m)vziEKqB=T=&}Jmi{+E|7QK-}Q#=xPSH2r;J83Z<4flX}7vci4&&4=4q8Z_CNTu
z(OWda<y~`gjyaoan&nEvEVqmdGXr_+QCD$m$8q%U8_8x1s&3OkrbE|Tted^(qo*IA
zTRVAhflp}E<9F4|8>Z8O45XrHJV)QX6IW9DJT}HQN|x?zdpE|&lfF(U_3r`RYU76X
zOJ-hzHg4UYF4QfR=2u=yJl`9~X^eArZ^0^X|H-1`Ikn;Ck`?wfFDM^ZIDNDk+}Q|;
zAj?VhQCzG^)@+Ll>fs=5Sw+{jkDB^GT>Au<%cer``%c%-SFIG!c8;ab8@`;e`7+j_
z_N2(+huXqSP$^;a#<Y)|hWn~3O^3vO_R?DmmFND>v$T>vQoMPtSgYnmk1?T}${VHS
zwunDfT|*!9Ts0d%AAJ94okW&wLpsCa!>Nrx;svUWV&kRS;k=-pFa5W7`=Oqy3Z86U
zZDL~L7!AW4OHrk-Ecfi=Imx0sV7pZ&eUACwlYO1uvLWRD$q|A-hWMx~E*Ky8J4kSd
zzOJ_U*9|+SHJ|52Tds!(KROT|(WR6P<a%Zykh<MTy($&~W-wQuQ+d<E&7g5+)%lOJ
zoPiuQVNVsW_6ZFQB-mUuG0WL8VpeG7Mq=oXs99E`XFGkGRoeZ|iTp1fS&{bV1kMEn
zY_84p92B(<UKlO~g8<H1Nm!Wr1(Dp}y!?Ew>vdICYYi(eK2|xN+9k`s>9a`{v)mdg
zL$?e(Mf}-hX&Jc-gakJa4_D)XKa9EqJ6k^z(J7$^ogk?@m}*mcJxcOW$*UkD?*qp#
zFE94xwju4+HHgnQ$KA6tL*2$Zp8_|4gx3G?-`#AbdryBK?!)>$9zuI3bo6Myvn1`;
z6DHx0i83>8krGIEJQ2#mX-`ePH1p~}H5*ZwPa`Vi#g3bY0IWg{-n?jTR!MWbBtM1@
z>AKpN!Q!)*7e@)bLn~NFF5Z~%pi$kyv3>TY*VW|75Q^w6S<2R56UX?Uo-Ne7RZ9Lq
zv)YB^Dw~tI0AZ?)XNp}CKj=gq9Uyx^)w1DlQd!*>c`S>&RlQgb#<io%<sv7V_Aq*r
z<+0zmhIZIoulGgiu>3t1X|`$+>Vs3f41La$k&%%&jn!(gc}4=MMgozyO!QA3iV9~V
zw{7tD@(TEteYqm^$ee=>-vy4-#HGerX=!`x+^nAYh5s%v5<p9@|Hm^nPv1#Cns$$I
z+?4F>Sy1H+@*c(wu4gHQbZPSlf6%_G0rx-J+Di21cO)wbP;q;Eds<o=ZTH^rKB1nx
zfh61UV-__rb<2Eh#P}`l6?Fab=f>k+)Y*C_mX=hi{<P<dpf7~7Qc>`Vq;)$WJL_j;
zK2z4nvpZMC>(`2nV`XK{N%qXsG#$!o)%|(nRk%g-g}B&QcXxNrygDbR=CtZ>xxe#<
zycHkoOn!VG=<Dh_k0KiKdnO%lpxm@_60!>_=O?tc{{Bx(ONFEC0N&9_%wgw1LJ@MU
z)bK4eGv9sUIap*MapBF)#KfkcjYlyt+71rW=tnq|g6llTm27Orp$C<jR<@#x1oZ=D
zN+n}XO<jFzYU-ensTgwk&6_vT;APACb_@;GJbvun5<$veV#T%;$#<3^H6x=QsDw*B
z>iUfvqE^qUp(6<OY$IiKcXykVSsoI1=mIi7Ot)9&;aIv&NcS+Kh{@#?vozgPry6{}
z(P$@QWr_DtHH$&#xr)MqFN(W)lU34LAAA8S)^uW)a_EbP4`;y8pvTkI)jg9fFNLcD
zE1t9X=+jfq?o;I>XezEbn$=sfW2ISGC=G|QocAw=o7MX`md@Pb&B)9Q0zRRnj051*
zIC=7k5DO!tv9JW^+l@>jbDj}mDi&#Xpw*<LBmh91l*JD<3;p>gfnenTGeF~TI)Z80
zw=WSOm-C8QZ*fG^={HIhxA(C~FJdXt4T7c!Y77BJLkZ^N<1;V)iOv)YL%nzJ7pDSW
zZ|}L0at`felxENzxHI6%d&e(()I9dz_@mAgRqypBA};O}b`VE97(B!w2`5`7Cr{6v
zUK!{((<FQLg#3JeL9E+Ar{VilXIn>1y6YO$D3EU-s$B+<(GBuX6c4#lwE+aZ*qn@;
z=}G61^Lc>u0zHsF^b;D>rygVTi;Jg|r)$SUjyrVS+U<L=Pv8Csx?VIeG<=69$@${N
zM#z%5xMy{B&tn+hzklC*=NONa)Q0AaKEoyGROq_x+M?c%jy8vQsm8w!tMRd5_dHaZ
zzHn*j!%$b(?7v^*`zR@UdV8h7XFhrIq`I19X8$1L=sTnG81vONxxcq21D8HXL0uv*
zri5ZjJ_%SWB{6_MKgZ9YHmTs|wg6rYtu>OU?~@PS=j|&Fk`&y3apvLwc>#3q${ss%
z<On+QBS$*lyrD5VfBEudaI9ay4i}$2(v=1}Nnhu9z*ontyXf|F)yb&Z>TVR*x(svx
z4!(J_jBv#E;A@wcm-$0k74`<20V?BSTTDBKpSBQ}B-df%1Lxj7L~lb+@_p+!Ocvi{
z`tu?pB5vQl4I<5>O4&n>C>*HJtM&R`P>jmkn>?zt0Lt7+MNUpC?C}Z1T^p;92(E$$
zadQ)7jBAO#VUj&|Wa2@Gx<5x*IjvO-OWHzw)|12@m2hIUH+r&DM2`$4B-ngnI1lV~
zKRs0Sfy?)}L&8)DL#sGoyS=?Vh-nxUa*Rvt1k@_=HM&{S_LX(0O-Mid57}g?A4xTR
z<^uuA;gUbASr$VelzdB>M!GvKJ)K9L*SaQ=DUc;clj+ExeH6cce)LpSR74X`6X;mS
z5fm>BfSpuZEA!x6kr~bN<)x*oAe1914<;~6%_F=4Inl2C&wUwRa8yaTk;gCVn*$(-
z04E*StTAP*Nasms&Ajdx-*%OR`px?rTQED=GYuD>;fPeLgocB{hAgtV8F1#zTi1*^
zknJ}uN%&J@wRq8&(#n*u1qgt7wYTSX_<qf-*XXc12r!(<uxfdohjhK_b)?2kpvh^F
zO`vCgy6s;?QpRa^GNtF$E6y+Rrb1D-{h=_liLENBABk?CM02LPEgQl4b9pT8&wV?l
zSyr#d`F;VjBFvGXhgQZKY7t^2szydeDoQ~`LvEf)5VYc65jHJ>jmD8e&vjZ?*Vo5q
z4>hyV(wEv3(+cw^7G9N;`H*cgz8e@o^n#gaYi;#nO}u&YxzCIadN*X=2%1>|e*UmU
zCva+L?SKJ3gO1kGiA_m4iz?66Luzu*RqM?QC_LZ=-9J9nEj9`Y3X+$X2ftj7YeM%6
ziOA;aMrviHH<aYMj5(#P)wZUc?JMYtAbhd2vqKkp)!WOa6dZsK3THaD>ciehHKcJg
z;nau3H3U-s{P`0R6awKJ+cW1UkM=&9x7Pi=p}D#1`SU5aZcWV1x!BwL%nu$dHEsZ4
zK<^2`qqDnv4h%E@!!hu__mh)}xLm-TfBpK^a__RHZj>Y$N*%W3R<{Gb7fnzVF45N)
zfTpMI#y$WuB#jq=Kb&U2?B_W_9@z>$AT2eOnpIYekMAq^%@9=PPh(@)zPNW^!cTFD
zi?4z2diwM!noPi^n{jc(ZEon{b5yVK$HFUc`$T?Vqp<j~W0keF<u$CcA2&BQ!FAHp
zy$xRLJ~E{As+Ve%DJ5T|A~>Y6qinh)pX>LA3Ps<XAFT`6xQRpAdax-N325%$yZXSm
zByz#9_;!JdtcyN(lGpM5S2FHiwT-$qUSXhGIDoVXg3`q$W^HCaNx1mH-V&NL64e9M
z0@0r#r+s?tf9vWB{5moopO}<G6nG!+(jsIl_?v1g685l63sciDzY_+yUe%*Z6BN^m
zQPjd(r%Lm?SA}U?I8WRtaH*qF<98Uoc8l%!@#9*_G#>@@fBJ8x8JB3zt?JWjC6oLT
z(M~2&RVi0{!{0Y!b0v;*OS9B?M2KwY(94UQ3Q=}-95YUP++8PrEYx@rj2CCjP|w&*
z6*OKu(tk<pPflpb0SnJE8ILzgvUyZ5Xaf7dTN`Z4cA0keo+b-s>`pCW?ea8VQipa7
zgr%BHx=7siNW;pHW`D<iqg>j}XIi`w4}KQOaN1D*T<I;PcB#_m*Dxckzdtq1F_Spu
zPidTbCD!f^4vz1@7cg+@>gs7J8)tI$pVu(Pv`?D%s9x<kEU<ji#x+hdQ6?6Nua3x^
z?YimNL&aIH{ez_&mCt1E8#hoIKaUD#_<LDUB`Q<Qjwt=AjKjEKE$wrGh}zF*M#iS2
zf}bsGgnPwh(Uf~z7<tMqHZ^{u-r`TJy~)_Mp>lz&MDF9Cr-YTPg}$XMgPCljj})H<
zuXeH!eVBQt^Yv8EbWlYj$5RU_|4RW|p_~byB;)^-GInkDP|vuuIJ;Qxqk6Xs-|rkd
zACNXONU-0Zs?F<d<8(g*EGp=Ov$9wK1rB?$h9XM&&fMF|OW_KXq~J7~nC?zN5k!ui
zIaxSIAo&SENHJ)=Nb2zk;rNB{c?S=V<*6y+ZX@P~6AIg8(bIKTewFDwoIF{mO<1)(
zjplh~=DTIN8PVOVz?s}D%nkB*8+psMAETT7E{0)N$36*t5@r#;wLqvcJ9Vk3fVhnQ
z8b>x?<nULU$?gPXiBd{o<6*ZKMNb9U2}#*&?@86~jhG3$!uBnva=s%!e4^fZfM}5`
zlD?gGg;<yNY4MQ4<G9X`Bo>!mv%5EuS}@%-DE+-0D5<(?S?)sEdK{^3xXfx{em>T2
z6hQIc)|QX2FW99Z=+-EspIL`Z34WJVSMTauFb#nfD=RA#>T$YoK^fBYTjd>h?W~^`
zVw<Y2<^*4skvUGK3C#_an7F$y0+&opPJ+roVni}OZ&GoN_@#WL+A#=0Qf|Y+z%S4c
z!D=4XUMneo;kx+YyEA`4*+?~ki$WAwFcj}b>wQ(%U?Y;2-iGVRK#!L_p0uK@)%(z`
z2vQ%>9sKN8Q%R*@SANLq*&-LhPVZd((I*$qAp1D*(ovu>7{q3M{(MhnUP`1+gtBYh
z#K2%bBjcU|#SrIA(f~6ofo7$5qM6Fx4;6cO?@fLmz6GKzJNw__`zJtIq?C-32p~xY
zg%LL&74PRY*5CU5#N0<KaBFexlMv8DEW9O%5Gwqmk5Zb!F>nbfOu6}aUEAP1Tb-Nx
zWmiQ!x$xb265Dc?<kj9s{mx=k*V^9UE*S`>h;C%&(@U7Yr5R2mwC3~7vo({3$_5gW
zDGGRY?)Q&xrMy$;&+b_db#$Boygx>D3cB9w*Dtr1CMWvWTY7?Y8HmDn>n>P$22IEy
zvI5cfP2wq|M<irqY`~E0Q{LjFi$MeK`NqxvziW`a2pbz~cZX-wMfqlLOJ2G|hYoem
z_=H^WlOWx_k4?+dy7bYV-@6I3L^rkDVM`p2WaM86AA(q%%=`NZNfr%L6qh#L-Ttjv
z$)l@B)TOVexT|Vvl4>XS8mMdJ1`5A6zn;Lst6hZ>pOK))=KO-Vvr33p#%(e7`Qd8*
zGO{ewNHseM=-_Y={pIAGs>Mbh7WvMo6L5b<{IE1|dy+86?|HryZ<S<1s7j005Qc%?
z>TT+ir>27JD+7Rrc}^xgaCuf)^5b3Dls$)HkmRNL!O>AudioSbC-IY9H+l^?AF+77
z9>r-zo>I1LCdfEFyvS?Tb$Ea|LBPYs^2~pPQO2D+lE*1Y`M;u+fe}2U7*t(SQlggY
z9GG>Nc4YU8oto>@(VQuv@5jD?gU+<FcDy~E7kKXPfxf&t_9#0sr(OdMqV_b-XhaSG
z4J5eXiutEZVVb*rmZ0(DF4WP_jSZ$k9{!W;!*mB9WbWo4x2J6#ySBN+`1mDkEegG=
zp>x4Smq?ZV9$5Q!)qc^YE$)==3a3-?!YsX>!mddK8~7Hq<It&^_rBk{?qVIbBuUSA
zIyx~?R9N`1cGdd~GxNQOEnb3<n_!cRhvo^1_pD}2X7My<d<DtAu)xB`Rywi}ST-Wr
za;CI{jBXt|Ce9-XwtzX0%$&VB2WVMMO%37-j8K{eh~@~y%@ncTwP1*>kYEq6vVJn;
z*)S)DElP`5Nl6L*g7T*A$@a#Az4sQRl$fIW=loFncl}4by}emhFNyb&@Gb+S?+Qgg
zW&RUV{*3`!lsHok%>_m^=8d5{+kJid(H=AP);~x$B~K)>XJ=++W@q>QmCWSC?R15O
zrDA3O#~rv9VNDsP#S0*S?ONvu-BP91Sa?4)ZVBgS2g%xbEF1@#FO>IvrY_52Y%~)C
zvOd#VaNCJ4OGzQ?9SbKi23(1vI@Ee!WK^7hO9;@c@yf5S>ZX~Tn{va>DiPM`h#zRD
zP*G8BhNY$vP7mBs2o&e94<<jAg2T>CO<R$io__iJWIL$j?}UQFdudxImzIRnd*C5i
z4^q6ZRx=m&EhwH?cAABx5q)D;M#lG$n`tDpzwnVLC(kb}7)Nk!(xw#<W65Y#`UHiA
z72y$t5Wo9{WZ^2DNm>x^VfxC4Ed2S|h~Mtr5o++A)YSa__wR8zx$hCyJ;bkC>FDUH
z0J4<q>;L;kg6Px6#YT``!4<vzO(iINfVOgLuJHan<xnLVne=^jF5K8n9s-g^bS-FN
zU{Qqm@!m|0mI`@XWMn()p|t1c*C&N!_ODv5TqGt@jNpVwE9v|7v~{0bi^~2U_>AJ>
z<H4IuW5WsVEi@jkI7gtmf`hha&z@aN!+o7{N8-a{SKwy^!=G@ses+SF-IHT0_zDYw
zQ{l+M7YC~N6T4PcbhN8U-NNkr_JN?6;CnF;NF`CFCCNp6emikK{9BMGu@_w*93T*a
zcimnnx@ZGIK;crT3B2D94GoQtEB}v81{?@#BZ-8~`H?cIJkooNgCua<s?tI%kcVIo
zH5`&A)Q8s`dj^9D_Pl^aKs$wR=G!+UkHN9AtDyC~@@NxlsjT|~Si<g1Bd7LDUPZ*C
zO95B%sqsqsc+ntpn2#gWwTkD@g8{#p;|q$4qB;I`5Q#1{Qmf>JLuy<1w6~w!fVync
z<Y8+|aI#q1gF5iJT-eC=lebFn=rKPtH8fNk@Z%YNBy}x)<MW>8EVSAYWiXBcAS3G6
zf;{5L+}}Yty9N{5vmy|5%+wq&M))V{KU)y#4n!|}k!5FOg!M;xG~{F5fvN8WguhK7
z{8Yf)2Po#(SW$6n#EXS=>-Lo?mAP|90tYNJKrDE9RX}Cnc>0c+phXa(96AzYhyyX(
z7mSI@I(vdVITS{&u-KYCEh-`p0j;szBiaNhZhu_+^PCYK0-Z9o+yI<tcx*@xrs^CP
zU67)1{pD1_`h13>Mj?!H1;vUfY6Etc?&QO;ck?rdQw>DKuz$<>d_$Erio^4TYy2M8
z&KaNII9Ke-A+YE;Jv4;(Qqy2-^4A9gncqXL8+|8;$T>oPNV$Irvu#9wyQ>}#Jvs+B
zaQO&S&e4saC$yk@i>tW~Rok8ev@hlAm2VT1O?aCrwE&3+in41>rlT36(jPW?&L;D}
zjAY>nyxp;}vB}AFGBPr<t$f157a`Y)X+2+>Q;x8^)7q*QsYbXQm!><TVgQ{MQlD+Z
z<G+FbNjadu5Fgj=OyiD7-xA=D6%Z1tuC4v{r2B<6!SB5!5x8lXe$ik)f3ASe-pMKI
z?v0wBAW7YCGc#6AA&R`b*Utp}HYZ@VbTd`kEt@{b8BJSjr__xCOda9l`U6r39bvBi
zCZoXnS!c!d)igrES5T1dDP5ia`qg+L&m!IGZAXU_oQ+UjO0UxfK4*l99|k^XdV9=;
zlMFmziiZ~X<&5s+cN-N0BS3{*^&9i6iB0-{!_H~(rF74%1*I~uE71NY7nQH(nF_(O
zDxPr@K6>jz44O&2X0^0d`;HUyFo*)G$U0|+NIfOBShsjSmeR>WTE{&f5_LorTtl_U
zs%7GT2(;X}VyZ*zN7-d|PqK+$VGL5#`@-Q2X12~;AS_h3d(9LT-f*o@ncb6NwrhVF
zgg+F&MV;@?X#sN7>2^$+1=x7)wT^feG?b{jUs8D10*>dn{6PZ<(ip6p%B8IWZT9l8
z^jjO8axmO0qQ2t{{}111Ckg>Yjm3?YD}u=IqOxdF+tBvavG~b;HT{dqvOQJ!2$-c9
zG+Ta3m9@qV<$y7j`qF)q?4W4NhDy^YJlG(wgM)*`sM>a|-B8bPaN$9Vu8!~$PIGh=
z&(Pw{b1^R&ia36Vfj&k<&A<ruNAP$W++W)40|qHZc^-@`gm)k{gc*?#5zey8XCuh+
z^0--8SfVe&pB4R+mu_#A9k@qW1L$I|%e1blIP;HOJ6DZ2adCY?PD6^gkD?M~2jvkm
zbyL9iNu<o$pbghteM|xfL~t6OJqr)5)0?}2&|wj2XR@SsO;}hXB&SQhzPpSe{8fNQ
ztZZz*x8@XFNk<b9Q*{3s>L5gd*8<l2bXZ#)d>zDUNViAy?nWmhIGITc2~qWbHnF*S
zn3f!tU6}La;aA))y2MUSU9&*g-ZkUH!NCD(R`kVYE40NnG3eJ1Po-~zDFUknxRfqx
z16K%;3Y?kNu*?}2Lp0|6kI+Pb_n>frYScBrNWe$jEgUp~GOUT_gc0|QOW<L<C+_&_
zuAFaGRaFAJVr(*j!GBz0rKtk2HK0H;Z@Fc?Vf;@LB2@<m2QM$N@-J%>lZPcOC-gDi
z!buzUB5bM-%DSaUZEfwYJIb{NS49`YD<uWe5a!@}c1)um2>+DV)La6c(kA9p|J<f2
zB$z=dQC?+W9qkDv1qI&rzxE=KhcBPwyKHSIuJ5JCx&wWkjNk;x2C+0xG0hS3GdPYg
zXO>!Fg-ix(5uZpF-pJP-&Rmh_n?VmnfjIjj<kfN9xaJA<)OE|o?SfTjGvv~`I{03x
z+2oCK^|g5E#Aw^BSUaJrCbG&^d3|XGLz^w@tyXOGtf7JD1MyjLPvtKL^rBp@gxBB#
zZtl@ae&CXs$DdhX!2Kzlm<)Wh1RPI=1G}zCEH3Fa&S~mBO)SnLAe$ndj0e(m<r^dI
zg@*3HT=xI^MGhV$Bf+4O?f@$6_tn+t*jR)^Z+G`0N#{NYM6^p|^}aX$Z@{6h{hQa!
zcf$ZV(zCpDzyJI}$iU9Fjywo?6}HdIa95)DK_jB4w`)&9c?DWyV_@)G7#0SV2V01d
zvGF#>R^T)NU-j(rinXe03yh>7qq5V}JD(`9*ANXF0;ogX!tec>!w6v=fo>TP6!ZrT
z=+5tN_hB^|$FCueHK0sD2Kc)^CjkWRTHPu38ZFg6R@r9JGKIRL!|CF?14J0o@c#E_
z1N^15lvE^Vfa&>5@>Y=PQTxbABz56XCO?zwVvaLOLX@rdYw^7VPP@J&e=LM2+p&BX
zlnnBteXcbqi#V*clC@&98$qyFfBl-4k>TR#NZMvahN&T8VT`cM!e4<g8G}{}&QtLx
z&YGLQ61xcrN`srQQzXhRr)}O(KC^)9yhNKAdYJxuMgopZC*ay)Vp>fSEG1LXgW3<D
zPGp-2@W{g3hg;muYl7gX*9o2_*fELcbUBEhKY!j~B}I!Dcxvc$I8rI)oeO~q)+Byq
zcPvFZWgnm?m=Niy!o@Nj;WGn$<+86$3=HJWBNFU89jC5H%wM_z0`T+o*HbkF#!j0S
zf|}kV1_8;1itHPc7;gI7ozUr~!8UF5gYnE2h57c-$)Hb)$<CKAww)#L{$-FSXu0vA
z-{4~55y_{+k|&3r<#8#7y@^wK>sy;GUBRKxMLv`lS4a9{u+NL~@lVgj@}|8;AN&4Q
zts6fPoRINb3w2@m!1LPpqGo8kl~L&j(;-$DLxwK4g9Nc{rN$v`7V#ldtW!+CUWK-V
zf<Qew&m~6Tf6|_b4|8La`L;XRvRb+N&3nUPCPLoXFz2aia+LxQAv_yY>XKmkCbtYJ
zb%R}qtnnl*iFUHB2iX9jrN%$~8;XRF$c}pTC6Kaq!q^B7i`rD*ZnTr4;T}tJc1&rk
zWa}d$;rdTIO;2J>2#g5cygo4$CZPTBT~vU8X<B6E*^(jnG-VUuM6WTYR{#BKJ&KQL
z)6}}*<8_OR@F;6I>0T!?ON(5U@=H5LwM#MkF1w|T-{MoR6zXZcNB&sgBfYhw-QPsT
ztyl}5oVfpA3Ai+K-QXgdiXgELYc=>-jPJny0vG`EB7yf=h7ChPGV0K(&zIeX#o$XS
z88SZQ`||2NyF2ih0I}-cSF$P`7_vAt+l=83fLjO<IS;hWOVO~^^z9Q{6%AsOHHTeG
zL*pe9?2Dc4zq^LpUt^cyT!)$(!M_pouQy8uc5!i_25|br2As7HtAmAwg{Vh#d&S{r
zi;s!nBF{5;Gaof`L|Hi$b~*?jyTmq<3J7tf?alKMoOK(;m-zb$bnl39@$+lt_ru_o
zKVA_a^T5~Is1}JGF7c~C?Lz_sUXl+!2&6um)8`aWEj7LmK`T!5g=wAF7u2LsIEigS
zem|_MyYt`JUFlXP^<y)>J`5aaIymrhOGnv(w6`!cB(fh4eb8(&9Yi};1^ocn6+%=#
z>}3Dn8IZjX`EMTL;}UaU7qv*0C4VEy3~E4?7Uh{U8h8>DX0XB;(`n7$2+6nqVsp#+
z(J`H$?GygT1jt+?w_Eoasb>~I2;u78pIoZX#h{6#Cnql-u}XSa<)T4K{iUc@GAbs0
zL&Lm&XS8`n*1UA_ha~n)HXG>n{rHr*u<-|E0~skPqH1^1F?|B&eh6bU7@*=+e88`h
z-@mm!a0ZGfz~NObE%Jn^Pv++=O8QM;cY|#T3M)RJ;6D7dFq*=_@$^@d__YA3Fe5`u
zX)rK=K9qvZJ6o^NNMIK{z&v-$)s0O@r281v`TwR(D2W}Au9_~%XP?N2ogH}ffo#D3
z#w1o1Wq4DPl6KSN)>d#buUq!V^3&l$Jw7F{sr@QY507rfOab<!6`WTvm{IVWY*Ma+
z*rIa+2Q&?aUX{y<d@`f@N0oGjl2M3B&WHD%fEVk|YTLeaVGCneiAqy!2+Q-nsENWn
zP>SI+0RbH$Mpy~RNJxatsvI!r!x_OnLpi;lw#%Efke`C72~!V!f@=feFZBleG&S45
z-4BJ*GzK>=rZ}>>Y01eW2xd(3`!S#vZ42TZPAKlA+v({R=H?MXy$_EpCzI2OCs*f2
zF=zs4nSph;$IOs*dFa}7%(ZL8Iv1~>d(Wpbrk+hR2+u4sKH3%mMn;Po^Y|%QhffhC
zma7(}#^;&%Qgw2Jk;+X?`BQb^11&0II{8&CFztci(K|E~^f+2Dxxq3<Nk#SCd+G#$
z<FDnh`)H``Uq++Q0y`r4)|h7}C8-Wn1Cdlzggt_Wtro`O$FQyfX3P?%D1p>@O!LGh
zB<%S|)TPHsTkSToj6w<&_4CJ%D_KWRs}x}3M_5DzRW%yyhNL8%weKP0LL%RLjMzQ3
z@!{jgr-c}IZCGzkCiB9a?rvwtmEL2Prg`D6nsVD@Qg${Q9o?>oQ*W6tC96vN-_T7=
zyM2IlsqwWp<Xqu7Z0DJr?Cps(MuFtulY^9Fd_K_G!#9hVA-bc|gW|e&OR4|a92K#9
zm=%Ld7AXf{+1SWv3q&ght;WWz_)<VVEn;?LYxBEQ4+x;44$SaYNCVzv7HD$!L)ckf
zSa5T7ja@fm?QFOI9!5xP41<f`oZ)S<i6Q>^p~gd?z>(bt22u)Ka`gi+_ki_-q@K)s
zj0#hlP|eX2!ERXj^r@bf7T_xE)i%N3HNCvNFd+n@?Az;%HXlQ^R+}h0Xm5DgsY**o
zSX9`m;w|9cfj7VPW|<fp`=i@}MhlG}S@QKg#RlYC`${JNC=Cq_kaa4rUA?`(1Dn{9
zwmmYv2TwD?MnQ%4WvZYFXz}2yS8G92_{_Y*OpwNj6IDWj={@;*c^I}aOL+3I;NP7B
z7cd#n7SR&{z1&j>tiq|m>~})TGW-HKSLi!;4H^(^b8>RPZSJyqfJh47lP|HbVg4A+
zx(h1;EDqqqNk~b#lB^2{4j(y^uW$;gUR6yEX<i}!Jm^6z8Ac_3Lx=<R-{s2&2M6&d
zH(6xLfF^c*lJB2<!<OO;6IkUyPRT&QMbhTLEE|v)=wX<$GjV|`oV(0|aR~%>&^W+f
zF1SYkUCzFPmD11WUw6QyT5et=ATQ6{I%ty-U`>cbjHJ=#t-!<6*473yDjV3f9O`f?
zLkvwYGJc(($3*faaC4q7et*SZWAj*m4B>x@Js>E5dHJS}`jS1<S15@vSfYpogGJb7
zr{Id;9V3B*^Ud40yQmLQ!c0#;0OkY*advhVqfBrYODih=!>8_66C}*XFQqPUzFvas
zLWN&y{sds8Ubei%y>sOM=LJB#r#UV>Y{sYim|4^3syr6hH@d4f!m5DN&A=e(^6tTx
zwi`IYSK6Ye(I-|`R%%VC%<Q1E*}bII2}0&{oDW1OAZSoP0OnVl`}%fy0TeE%z>!i+
z!q3!lE8uYj^?FC?>a)1Gp3rXo?*f+l_Pm8T90v!(DhT7(caHU-l}A#=3=9Uo;VX5*
za4(7?q=l|7;G6(NGtx0@XHQR$T}dC<dNCAbbIM<<s};4iD<5lDU{JMVgK6R}_O7Af
z3aUDI4&Z%x>@R8{DG3Qu@12YcVe@JjDV>3)cXvqKf!x$kLR=i>|4P~8BedkO?<oIU
zKg+oriY@~Mhxzz2@7!^&`G8yU!vQ>cbPi5UxD(OI4FQD16T8ao&CCmUAoB9%OIF38
z+m7!MxR`RGeTl2=!_pvorSKkNV5kNWf%k#yyO+n0xozwpE@5sXh(8dah68~)hb>G4
zLD8X*Ai3k?=eIs$6WS|w+S%C|DgY=?Tbe$}?BwKkjaM#b7KEZ|qS^WI@gs<~zL60V
zA;!Xo593+oFiH{`6;)>TWOrJ6eQixZM1*GVUTrNc9QsulZoYqa87X5^rA42PUXUnU
zrPMeS3KRYbk2~a);;%Jfs-6P{y<q3Yw=)R2llb}hoiYE19)>Qi9efyC7No>ou^m1Y
zm~YSy=4r;k!b8u%u)f@|vfKPh>e|$KwY0ZC#5sihO38omPF9vhZ|+O);E85{^{e<w
zXxIX&t{_T*^|2J$Vs>h9(<C201>iIV%SlB=1?*AiV>;onMpuXQL52KBLle^fmEQC&
zqp-P=nOV#Wn9*(>TXc3MCq)!KFS1}x+QpxW5^xit^%ue5KLy3JE5AhC&4u~-C(1~Q
zwvZ91*yPtil`eg(z2Pl~G;{Ni1Zos04vfOVE3z`)gz?;xK#lM%+muN#ju;HXQwu((
zZIBR!V;Bfq;O$+B5eu0AK}-aKTS9*ilXw$my0Jcdd<D31z-y6oOp+Zqu{(CUckf=<
zv|x$LsU0?#jecLjjsbo*H@A$_U#F)Z=<|an!H3t>Y~Yhj)8HC~Q3n_vG#;8*n8H+E
z$|hV14AeY%lBY+6yc>Ou?VddTdAKJrdgN38skwOq-08LUK%~jtmNNLaxGF7j39dBI
z;0R|g&JN{P7<Rb(?OV7+Fo}V6_o@Vr#g!rcG-LP1(uMgv^Ivjxa9D?t`$lA><An<r
zW@dIwCk7<%#OmKWj42b5{UALmvQ3PPHvj&Wpz_7WV9vvy$=J|va(S7CEgH6wvj`>3
z7`%F=Ban&+g?xWs&bOed>RorYHa~;TLqlcdCK#<QKt7nB4hRf9=O%Q^3JFnAKmeKf
zb#L$c+5`Kjh`-IxGed_lEPh}(2a}a7QW6g3?idsZXQXSGpddDZcD~*1fFQ%;AC9+)
z0sFst^Ckgf!QI_>w{B?&(=_At<%rUg02|y%U7tU*i$8e(@uOP}T=Y8nov?wOcW{sd
zNI@b+kKg(6<6q684)5IuKzuSf1|CHuo%p@XEh0?Z*bGX<Ywfn08!?XWF`{H+V-v38
z_4RcI2!dA*Rd8BB=m@VcezJ8&>Gy-29D<H^T#BPvq*_0QXTdrn5#w^1SXi2hP9e<!
z{%_~2PaUXdr_ICDNaETtPK?P-IH*AQVM|iAc)_QvegD2c2Cj3Lphj!H6tt|dd0>VW
zK=py3`S{@jY8uQETjN}6MPBO%3EbHk&)u%3s9`nx1QkWze=%AXE_FRrA@RmG3D96P
z<m8(+Q<{0NU!P9ZaddVj|H%-WhdIn|b8~VKS1{TTX0@lJSBUy>g;2?m>nR<bTT6SE
zCuO%MT8N|UK;-pNh(Ghyjqf`#+T;G8ODk!jS?s1^@$^?SwK$U7U%!2;g>XaFhHMIB
zURFxV0LJSews>Mj5zB3>SS;-TYV3bzruOi#ty^Gr%ZO=c;KK(z-Qt1q9zp}1ihYZv
zgylLviJx{24jN%X0xbic+Tn|=&99-3WLHoiFDDl+2GS8q0z6Lt>+|Y&zLiB6&&hq$
zyovlU&w#|`!JOT>qrQ02JZh@8UC}0W!*=G5{3U@c3sF*T6RO%by5U54dI{Ln{QUFA
z#=|$@@0&_g#$g^G9K2nyYYS~?Xg~~~KPT<&@tY`o0&ZSJU!=D|$q@eu3P$$h#~a?g
zdw0-)NGpBEyZ)-?0G0KD+IF$mAY^uXz>L#yxnSl)g_B=UP(n(Inu0<eBonGDmKO7#
z{y4A@YMpD$F#!0`FeX^t{oEv{7#{6}Dt-HHY-tsRx|4v&{^ot;&R}AIV%X*V_^bR{
z%T9|lZ&6)F8xML<E!VWtUent8Gfo143IzoP8p4m5c#7b}(+TudxJL@3UeIi?9<zVB
z%j;26N-^vHCr2Z}b8=2#^x1aro7b<wPUf8E1JWcGHxx*<rMc0P&#zNBfZ>%5X#D_&
z&H{fnH!)$!F=G}R6QlZ1lr9E;dytv=84Qre&UajojO@Lq_!o83vfgI~LK$|Pc#U%2
zl(iErBSw6YnjQ?fx@3mnSs$QsW$@SyOjF0kQZ!oM&&a?7Q+8oDD)#|KMxR1PV@I{1
z?XCKh6#A|n&y#)X>rt;?Ae1o5EKm((0AyuuY6?{dtBLUX4lZ}MF-IzOJ4mjh^Dw!L
z!rehmZ;vzQq9B<vkKr;7@3;MlDkYCk<a3d8W1y(BlT2-ABJioetD6n4m=7H~$Ahp{
zcRodYly-k`N}dWd#*}VAOBlh#LGA*rhQLP!pxuu0+mXnz^XB+Un9+560)yYkt?w~~
z3sVg9!C2qb6cx_d!D9N=8?LX}a*PCyUz%U%$aZN(1AG2l+Nn}wH#@uko^*o&y5~U~
z0u@ipsc-1x*+y$XC_pdQGOuEu2)zYngPMUZhKGUPB|xJJ2t0!24L~(nix*J~V*<Wd
zE_D033MZaVbliUtO(4oNh>YDImX=<`I4y`i4GlbQ;UWJ#GASzS?B~Y6h`loFMm(WM
zmsZ2U;ep=nj5<Q{S?G?EDlP2zc<fDVdATUl2X9sxDXFEg2AR<cv~>Xdc&Rd~@DXD{
zFdcLePnyZenb=jrq@|T${KXSa#2RkjW`HLZR0yWtE{|XRg&m?5I6zBl>)}BY3GXc*
z9(sfEIVh69fB!~&VP$veO?*B|lfVB@xZ^M|e*v75x_TQz;xlsd=;$V-Sio`b%a@z*
zXd;*ZcZ;9T8kko&oOkV~coq-NDp+}9;%k^LYk&C?j0=P>N?sz~Fu%EXngXf2PsKp<
zgE|dwnwFj(A9-s}&nfPt*^NIqJm^ml`9Xn!Fzfo^0|9?giDB<v{q<`H&l-Ul?`Z6Y
zrqHJF@D?EM-D?D@-!%egY62<H1P2G4vy9d&JPtJ-H3WBVb^Apg->s``=juG@Nd6jN
zE(c~kTR&@!K1tnOM2}m*&3j-*4FK`Bjw8A)=mPEbmB^gXsHELTy+8{bJzFsP{ykdw
zi=Us@S62^qbntO;{XeaJc{G)6|F0?I#?G9~A<3{4lFU;`88RiY%|j}Ql2q7c$(X5R
z&YUSl5h6pLiZV7*kx+(Gq~Uz_^RD;&&N}~{wNC3@?|Pmm_TKlsulsv_zn|&qfM^0r
zQ(X9`mv?IH<=(*d`wO-zDs}~cR9IN}Dc~(=xFKfDxq&&#5$}#_47(V_OLy@+4*=cp
z^zZ;=OSP}nw&SVYU|a^ki0r+0KR(7@{9F^y7&@K_<`pZk{(9`~_yXeM<L|=Ch6Dhx
zGAK!Jcw~qeaO+n-zdVCE2-A|Rygb!vckxdsz9qZ}`l~PtcuwOUIfDm@iZaAyqU|4O
z%F4+>uLh&uEqo?)s7l##5Qkvl;7_bT*fz))W>9;+PCuqBescK4(CdQ*4e2}8iES)d
zQC3D{NWAi$EBl6TfM%?-IP-&N&$y5`wl&#v108MfBdlobgUHgdm%HKPP+}FH9Vcl*
zHWyQ+oP-CNnLlimcc8d^FF_p`dHxw1yJx<QdFQ`V_4N9BddWeYFKqX3)M>AzsUGod
zgm|&q7QhN3Gcr6_BE*>Klnq*ZIca7_#q8dxaV+hyU}g|Np8+qO6;G6&mrQ&JP!<cB
zTMZ5L9U>9I6ACg%)h_D~aCedh4Am}EBH45a#CY0pEr8g@A0MdI=@QMHEiA}r3;p~E
zo=m;;RHs}>tFa`Vbjj!(O9VtR&~?T<)lGPWc8-k=p3ktyp8~00BzhVUCR}U2;vYPK
z#2g)|0x~fD#j7lO^omb~bnBopXhHTgTK63@CITm8TM&{2ui%88;Sy1Ylw8J|PSey+
zBB(pEpWYHS6zxq2q`M(2ejJP*T6|a6LO2TX2gZ@D;f(efSNhy7E-Kp{<(k}QDV8G$
zXjd62ZPWfm2gMFIx-dpknO>;N4j(=|K|}VU(Y$n6t2^#s_<?AmM1ckKr8vnxun_2)
zz9aG?X8QmiGw}kjSMiHO3AWEiulZC6hHb&JaJ30g3aC4!zZiB(JcYKKcZbilhO1of
ziz^?H2y#7YTUmk|w-($BuYmaj^10><c_B8WmPg9lGW+G_<x`rWS=AY`di=4jLV2&_
zU+H8mrJ$2Og@6@jTxLYmrC8o)r6C5YS&?q1ULNJ1h34_13FlbbwEih?AH(0ACyx;+
zj9~^Ym}vFwCQj*81z(|M>scy4Sc=x*FNCec&$L~wAuF?}GT|kE+h+J{2;n*d_s^fJ
zs+c3Q7DTuEB$K~wwD#CdnYP=`#4hv$dvw`%n#?z17DX`4A)>(offSsdwhYd-n+}v(
z?hXpHrr%aOE!2_2xIxd&<?5FH`iZ06yTfkltV-VbmGk~Vfa)Gr20Bz5w3i<sB@qo>
zq_u4fhMhZT;mvPqoWz3V!N1$qY0g|9I{9Ni%}6#TAT8u^N%p~x3-AA^psEei;zcHT
zdZ&sfq-4d^{Y^<7N&NB_^9jET=Go-FjJ18b_aeskw@__wj(OqFqUP}TtKj<3O$@l9
z^kB|~L)CG;{UyVh!jYdr0*`z(*X}-HiLlfPU9BxC@dyoVL9Zma+*ujeYNv2J%97l;
zb#Ng%iOt^%N^8vUT~7W&3ox-FJ!e#H@zH8c_6<Rg$4ubkPMlZ(M&lx28<SmLw^;w{
zM_c;y#S^Wur<?yuY<o67zNv;w<d~md?F%}Ox6)20>{{ka0i*#96+vVQtwQ1$C%Zvf
zVSA>8B`HFDxs&RwzkBztYt1&@`O1%uE`k~Q(Np<3k+0eVnxw>btp060^X8<b@Y}a-
zFZbA3Gi)B)e0qbj09&l9S#1{|TLy!na;W@ZS>2gLsnZq|R!a&yn<dXV`i<5GwfU|E
z3VsXc^0c2MoxOQ4Hg?15>>r}{{eNH*I`#4T9<S+tJ`R``GL;pbOnWwPV*j6g7mDYt
zRJuaRHr@Gu_UY#eZ#TcV)2geZj489>n$<p<N~Q<j4B|q!ISd<~aP<9Abp1)oN4H1q
zE$i8Te;h2E&~iNQBdD^lN5EuCO-;>WOGUr)<v+h?cX>};^}M0QP#XREp10tNn@6T;
ztHf&O-92@E!#3|8rb-E#))q7o*rk?_oFsAYzOkJCwSz0aZKG5Mhq{RA7Wz|<2Mk^I
z$osuAm=ob`I!*n}tGk_Vt=n{Mm)!Tf?CjeVqj+l$R;#@=9n0zZ_xJwpUKL*vFB$}h
z@@aNfHnTr*A>~Mj^W**|p@F*9t{H!`-QnlHfuI5VLWLnptggJCXN*@9cu|-jW4?P@
zikL23gRq?}Oy5wofPaTQz789*O{Hx??Q`IKm%$M{(C|<i$BqTL-5_+WC02*(o_NY^
zwCKqFywhr9CDZ9Uz>}l}m1Ly(<UKn(tjmSmSY-Y@ZxITl8RJitJS4|w)u!88>$f7V
z)(A=+`)RPob!dZ9j!2kB1V4UiJp4;HlU~$US+F=FYR@}68b?FqDIg09u}HoOo}Kqt
z4ZYiFV<`ns+=^obuQ+h1GqU&>aFuNqQ5JlRCNSdPDu~(s{QGWpq-SAYV&Rs~Hwgo9
ziHVkywvm_jA(@01yoDXzc+4}{ZbQG0WrW^e!{6+0YFMM*tuDPMIkx})e9Cf3*oRme
zq3jOUBXX$YfC=+E{d2t&<iMi{GhzOeELHf@T3&EQf~Y4$WES&h`FvJ--}kBXYAu6w
zgE4f?G<1$-_4{CqBGO7;F-^(2**u+UTY05_%2B~iA+z&_5ER6im{DX;e|fdWJ$0Nk
z#jyW9*nwt8jysnGy_jAT==S+YEtC9zbmVQY+#-_XMG`Q36dp?oZVkWAmC9dq`AK_n
z_y7eact|_Zm!S%1Xlf2JZliRBsb-X0S70)W9rGTp;O)xb<KaOH&I4IV4d(ox8)%P{
z66rTa2_|-9+&+B<oe|o^#6I_&57J%MS@AXJ9^_(LUq=QW@LNG%UWleEAvJ@)1Mak@
z=4SN%sS7vH>W_}Ye}yZ+YJoq?U1?KGq2+F%hHx^{bx>V$M4~^Mo7lZ7rGypnu_C@r
zo5q1f!51OJ6Ti6VpL_%WV3|?TINl7v5*($dgU;60l4$AS0s|xm)7t9KpNNZ8QB*vE
zQ3VCv0pK2z1u#l%P3sdI5VBUd<ZO3_VM~fJL?)l+=BVj}m{>4{03BV2DQq)|+>T3G
z;n{d<CLq5@<^0b;VnOryb=8)?W{*<(r+`BO*2JI9y93-WT)01Z#89K~Ll6uQ%%+t{
z^YI?38V7~~a}S|jCz&w>GCy?pXhI=FX+oEabi3f-CVRR2Ko`(BKgrX6i@FY2v(LRA
z*BTueDYWOb986Vcks$6+SHBD31D2C4wUe%uw$%Xt0fAwnTEUQy4)wx7V^4aXdE$38
zv`O*tIWUbN7-1$b1TB9<eLYmI*x(jsX1ujHlmrtYW`|Jd{{8y^ET8oDhKWQ!Is{hE
z(sB^Zo6#kK9lk>{F!)j9bS!d$70Wn*!NX*;iq@Fw+CJ}%?-qMK)IR9wfND8ZXFT?V
zaPIO$SE|WHOiT>MK3MyJq9DBsqIrO1)aDdYN^kEO%#Rp1a39JL0@m7!{4~7f#!Z{D
zb92w53&e94#yWx?B5Chg=S{Fo;TvsgQo}$Fy5b%(rt}7}QKLKd5!rK0S+LyoAheB<
zqgxxV0auPQ2*3ymXEGvaeq-PQ!m#U3h_YZJKA(uDIzALMlVCOgd<r7sz{<)B2rBLr
z9IMDBysC1PQDPk?9Jt}&F2ZMLq|n|-#hkE;NIe7!0SY7o@BjpbuVM^x2*@=qetIT_
z(F6fYv$J<SALr;&7y&^h^|_-(hI247GBUsTS(f?7gVj#>GO*h*5(JX7v)A4|WGk?k
znV)woGD<|pAwKEMjfntWs#EW}SS;e1P8rE>-MRBCsu8GI`0DV1@RLf4F-Df6f<Wex
zmV_RiV^$yTw_Grya^CsFP9br&JF7keN)ocJWuWW8G>Fj#P6Rm1XzuajZacSzTuDix
z?bv}YQi!8xaB$-Xli1WF7S0FF1?XuV0j7<N+y*#_s!kFhz*~p)Nf%Dz9`pD_55BmN
zA79uDjcP}8#h86}<Khu!$TzbMcSc)It_on=^puXd046X{9#n7#&n;l~p_@lG`%OXo
z2t?(Q(Uxxt4sq=)t1kdg8y|p2%o`dnV4>0#&HycIzjEEG_8qBW2{E<M+qC|d#5<98
zx_G0fpvr_}esz<K>E-+nv^!D)B2JV~kpwQidgVCQ%f5rQx3fgy!}ES?(L$r0IWxO-
z*i%^fS|x*S#Z7a~dX>q%zxPuktA;UtI3kxoEJ{+<da#fAwc;&T1fPO<MvEQMb;E*3
zB%NONm1|vAMX5WNPnaxCX_-xBhG_{aC#NVTa6kiMDN&{+5t{C0l~;ce`%DVXS6L6&
zVD%JgMSm|T(DPpKobC!$=M4>qKJyfNHtO6nnTkC#<Zmp-^OjM81V;Lti0}XiX(q71
z-oK~TDyU^Cf=qHjmI)GC#abUGCUl4lS^I8%U`iVaRR0a!N=Mcn>pS%4xANv`D<nD&
z$f-DlFq$*D685d!+u=;-&PJYGd6K&a)ur&}A<NE#6K#v}zY%L$R%qmIqO&JG?b<cM
zi+*d(65#O#TxRq90{K`s@G;_9Pn8`17fnL)PTTMAhXoTo^J&=X8FaCQZ|F0j%Ca$t
z;){Wg0BIzgex;QD@HYpD4043R74KJXwb*zsITk$s1hRZ)xN;Pv7kD^6Vc}Llh1Is_
z&^#gM3(&WfGe+;Ts5*cL&n-^qLazaug@_$MSok%51(ULpz$N@$VE~$h$%zRPfKZe}
zpi*X8qyi+j2yuLb29iOytGherpJuq2;oL(}0<Z+m9L+OIzGK^kR`5^wnXWD(1Mq(q
zd<48%qR}ZhI3YeXH#Y}H4V(mJv-QI7yJ%7X$%4@Xe5(P30|XOVoegvyKyy&oVU5hZ
zejSX}S5)Oe?^^&h7J!Q364bIL=4d*l(A<JTKnx##evg{kB~k4^kSb#TxCh49$cS-e
z6-*^I6)<+~KQDk3#BW3TcoAkq;3$yp`R=CbVXGY)L5<ZRg%URa5t_B2II)3*aRD`o
zJ+mu2Rg$G#4W9~gyRHhgb^*D7>WUh#09*uWez@U*XQGt<S)A0@%F{+^gu-1>u?7Ik
z&-L}uszay1wc&pWNlDJY@6fG4h=)Rhwha9Zh-z$pVsE1|Gwr}2!&R^}JG}kXxE4RJ
zb&lR#v9Ug*#6AeI%OG|oR7-AD31%lAqj{iaEiPO%$b9sBz9GFrwO#=jC1jl%$(q3C
za9gKvlTf|cig>D|xLg824|_TLqxu_Zj5qXmcQd+rl;!N3sHXfQX|+nb>lK*<;aApt
zrQAwN#XnwlITG8}rI3Q0n8=>Gh2F<(Y)oI#QX~3T;Y7C{k>%BXN(arlm-sVogAj`m
zkzTsVnHlB?YipAov(Y;NHeR`6gN<zL5`^?F@!1DTiw3xdT*xmOW6y~1-c}r8X!ARw
zOtIRQ&56gxfi-<@zx8#}!h1H^{ozGkH^n<A{f!?TKNKiAW)OI(IhkYT*MKR_ONLn!
z3!WDzjxHOg7ibxzN~StQe3clNQcJCfFl?!8X2|asf5V$dv-_24ib#31&Ou5tPh(!P
zq>sX{4SOYcx1~zzu5IEqBu1KC+`=63&^5e%)7{dc)N&OUrR+pO6}?s-7PIga!}?1M
zh7}AUiZ%!0ZF6S%uRb<nYaB{eTqQ1D5qw=y-ZZSgD=f6lh+Vy2W4pyrfT<0;Y@>hL
z#m8;uMN4e^3{J5eG}LJ=(>PM7ao2o$Dt)h0M{!%WkmL6g{pCNS)>uQ^XA3?}a*BLE
z<m={iU~S)@mIjevj*<t~!OwE<3R!-4;jMlV(_fzcCwk9IuVRhY3$3=C7YL{P^Y-Oz
zJSdEEMtW^~A^)nL#3<+GX;PL_*r&nzW6qy!Ip5qt{|mm#S-W4eyD)r1<Z=@;bF8na
ziHT*owG1aM<3U>TZZ@a9g*`W_L^7^kz30)-r{R^fCexb=Y5dCeMu+oo+oc#sI@e{H
z(>mS3%JvXCcz2kR{erqJk2f%tTaZapwD0u5df5NTltPFmGXwCc1xP||&hSyz>~+3V
z8C<#dSwAkXkmHWX@IXN1T1tQLf=+G1YOgg5)~@ST@!~K0Y>94<#Y*Osu0?ZZ?TqcO
z&U&VPBSg>2KvawMt8k-$*@ji}kIvU@ln!gf+Tl(zX_(D#1?}?+-C<1v<H6Q;Th9N#
zf3+G`@vp}rQS>T3?ZMm~f{DCO2rR$Zy-d+iFMrG9bdR>K9ER!p<KtC13+@+(cYrM_
z8nm8>QO`Y4ZY@Y)tUMu6_#!a6CdsKLhC%Ctir;^=06wC*TiBNcK1@#=A>zSaF7$+!
zaH=E}K6n~Q0v-uJ533s$S(ZKr%oO0XCDDko<!SDo9|#$(y!fzisR{&TU0ny~=*NfN
z%CjpWh{`nz=&`C2Ni=dJ3849#14rlUD{J4b=x8(7<E@O|kc0*eFbvuX@_JRh%cKf5
zRw2OC>c*hApXxm*kWo0d%gEF|Ql)K=+9g21vO%Q`)oKGLX8Y?;B~Si3_K4kk_Ush6
zWOa|5o#WZ3K%^zf@$K??#_%N(kuJWC!A1+GTu1^K+W<V<RoU-=WE!I>KwW<|A?Y-q
zD^GW)^?mSHljEDCn5zV8CmI!v&Q%8)aRx9PRTeC<I*Z;A%`)OGwbazIo_;($*IyHW
z7c5(E4P8U@8F<Z8&5kpPCH9(yT}~OFH@j)Erb+^<Gc5s)j8IWqw8)qtpxf{J__g0n
zP>w?|3LU<pL!tZa{zo)<pnC910qEl({70>an1RU5F!J+BWnkIs1?&^jwl!5}TWb!R
z&oKq~sU97Xla-x?MB@*PTczmZF8m4(*J+;$c)LrmhNHdM)MNDBSNajhn$hh%@#ty)
zLD(3TVfj8EeyfetZ#f=Rbva#Yq%%}}Vh5g42(5taVndTi@djSYE~q^5b!~CF=JxHU
zIbl7lBiv}Uk#3)to_-6t6&S&m*FsgVCJMi-iC(kkZ1cS@ODc0|7>B?ElO5W(1(0G;
zAiKG`q8>X`cMtm7(zLupGi@dfGv*8$f|$<T5anUZGnCjUw@gm6iwhMq-Skx|?0|Xc
zbCBTFc`P&(<O{U<;P$}`cTEK><v$Gww{b``!t{lSjegD!9~KXFaIBqSyIboc+A5K&
zk70WRNC(l+U*?F#uV3#Wt;I|l(Z<08UMJg;{6`&%fm#DHHa0g;N=VoY1%B12TN+-%
zw&od4XOC+gRPDLyLFm&-UQe!Jmd8c$^7B_(%VN}J&nuykHm|nbSMm&6Qnc*9!4G1K
zM35;yNv&DZekWeqb4^zWV_F<BC<O>VKo@%tGZ-~u82vxYz`=1vvlKoBYguq_HE8^)
z121&qchFa8h;Y8VC(A4{XQSmkfSCWQ%08G&ToA|#$&~Q!00a{Nu9*L9duZT6l(lSP
zLIP--qXYRJpfE6s;y1?qP6u%qSJY3`IoZ%=oH?r@lGX>M#MkfNq5Nn@e}>jtNLct}
zupKXhydw`Uue|g9Uw{umU3|p&n!vA6p|-lMLN811<h4C#4wP5{#Qy=xtF|lL>t_Y?
zP*eiG474<e!I{N~0d6UHvT&AC?KB*5w`+2Kz6A(qa&j_ASZbgf-Ot*|%0PTp1Br0}
zV#(tA-Is+oZ`^2E<D_}3jd-=Cg7DqHQi8nRUWu!7$8?2>Q^EN@G9N?i?CgXF><(G*
zD<D?n?~k^>|8bt(LY^OaVR3&Q^P+uCd_>EUv4`l{;$Vycca*Dnj!*HVH7h<QoNwTg
z05Hj^s@}#Uf><`rB{^n|k;-RCKtjf9>GMs5Zgee!fO0VLp>+zzs`6&qbK%*2x>|ok
z52M4uVIHVI>rmgE|N3}W@WE=Sh@KD|VV(gmH#cfx3|heAl9Kb#XF>Xb+}nkBtIzVI
z*s{$sva)6t7M$(vgVD_bQkdM|$j5p+gpVdDuJiHZW3a+uD&=HmRtsJf`+mAjhP;d@
zCfp^!VSo>SEaAnG9UafFAjdyH=|CV3Fgbn3b4B&TL9z&X269=LW$Br|op(y6+sxd7
z$@K{0JiEJ95wf7@Xs^C1UME?mXM=Mupk)hby^kgrFWOCeK5wIaQ`r7SOyb{v{P^+h
zn+A5s1U`9k&jyiqhm<b?DC1(FO#mnG2-2+j%E&dE-pQ$@V07Dfx=<#}q#H1@C7);K
z)R!fws6u?_1(*T~BETS`pra#2-%o8x(3eO-Z5dl(_`#91BFFXbE!&C70GTvBT~v|H
zG`e2sKT4lZX5VMkN!%Zolthgc(GgQS_q_@%wz1>`eT_=m!<ze|1P4YdY_<yx{MeSy
zz;9Zj1cfykY<v{x^)Q(Rp%<>TPQMba>DbTcqtr0&hb|T?0;&}(8eFtU$>yG?zT^4U
zW!OH@tb<}lxu=>lu;doyuJeXzI&SaM$XC?VJP%D0^c6#4T9ghlNnnwNSn@7<L`S~`
z@<u$ce<4Q3`(iq+va1on{n{Q}<;C8<yn*l&%mW=AUApNMti7`cO)j(Cg8eqN*r6J!
zf}SJ+lBn~sJa>T+VCUg0*vW7mQc@mJyt&q?0bKyihBX=^dYB!r_&7gI3t%K3YseAF
zz;Y1Yj9GV^f&##he|8~@8!*5X(+%71REm?AyIhw5QsH~I2b`5+jDJvr^#d6q28M<z
zSPkbIZ8<Mn-Ii@%>}aF%bx6x?30}h3g;t509nF`3P2@H}%HU5ncalAsO;Sutz!^2e
zV1-Kt`hfHuRAK~PkK>}`R-N{@A8N1W_B9>By}c~3Lqbzi`2J7C7U5+)j~->)Opv{E
z-I+aW9AX>*UV?j0S9OrZm>+a>OagrX9l5tIc=+>v3t_xWC7Vnt8g8Uf3S?_Uh~W`D
zg|jXI_ExI~2qYR^F<n8*P;D)Vbe2Ik_Tf9YsclN1V=x%FUH;0{^*%?&<fLt0%-4pm
z_%Vz}vs)oP26l(H^&P7fPwJ`UNK!XRb*P51C>Xjl)P$iLfM^IbMXDB&?o30x>q!xP
zn>!&BVfvA@U(!W4+&YXpjmrh<@b29^RHI)IyHGlkb8|Iu?|~$yN>;yly5X%Y((l07
zg4d;RZpJ8!=L5tgO6<e#ZljvfG)bS=pBJNOmiG3?vzE<%4f(fm0-6bRkng?a)onU{
zIR{Fg!>gKjqqcSt%M~~C0CJ-_I7E;-HGCL8Vrbk^oi0FI4ml(h2Y>nU<-><j{Zj#I
zLW{@b=L)LTQOrb=>>51xGk+_epH08K*5ZVVVWj9PWSZwFW5HbRpRzq2Bqt)ESY)BX
zmtZrl^3xPvop1K8n}0Mz9so!I<txuv(p9Ol?7jE#l?rdejl^SQWkD!{cLgs(_d!{o
z+Zr*=T4A$jy3#VM7;jsNNqq!;4bJcIc&ud;RLO*op?dY$(WCtzzn;u|NR|~I``kGd
zAb*m_b(3w97;|hVxz3)zSX4h3(P4dte1A+&)JWs_ga7H3AGm5bo)QyDlsS~TWgocI
z$}%@n)(@9*Th5WwkmesKeZLe893(XZLb+ZmhdPq-EuJWOX-2nd`-&Jg+8>y;g?D-q
zvb3|!RIzPCNfS6bWKwQjc+z=|&49KwSd^L3?V<gwJwA+jE{p-nxUw;iP5*`Hhwhwk
z=7`Zxd>RqHxt=mDvLl^w?K(>T+`>Xq#s?oM>o-xZq^zfL8xCt04FZI3zk`DlfUePn
zhl$1F6cJrgih+bnoqecQUZ48|wVP#Sz1`i>s36&9I1mK}J|lF?__D!cGg1I1D#^+|
zdinCI7;~zmieMtBYmRfbNm;WL!;mJD5hbwX{|0@b*f5ZQS`Xs6Y2d3nA@hLb8feV_
zE9i@@5bjpq?Y787;;dC{t*;TFbcCwP1ynI@QA95eT=;G4IlG$C_4Rcy^)DVi^guqB
zne-_<nW!Bs1j4WBt0$0uay2hc6ehNx-+X|hpvR%YmiO+huddDns^h-^|Eq?60Da)!
z^|yb)6~G)u_3DTI0icJrD6&Dzjbs~bczK1!_%c0=gjXlHDILVkn_q(KQOJCWK1KfU
z?cy6ZDy(H;Hspu_8u<)W4k`7qa%Tbp-UT#cbEo24gWi$W@p61@m;GMfRXKfPhrssE
zh-wjs1d{Vjk2-r)O^0eeKZ!&m<cuQLkwJIh(>E&i_KKN29eZX}-Bcd$YtO{zw<4BS
zUcdm=fC*Z9Mg|7z%b``oo1;3TIfHNx*LkqqnvpUIB`;LR7y*oHtZPQ~vxPYJ!Z9HD
znwoaT%`JbxoQ~3wo@nTxDG&>|=?JPVHaol+Rt!K=ph!O<zEcx=f_pLK8-(0)szMla
zGVpgqT0ul}9;g@8!-$7(xpN1XTW=<PFLbRQz(3R_5M3kdy|twUxibJ<si~QWG1R%d
zfjV~{Ev%@F$M6+rz=hZe6eZ9JXxNj_hIQRYe!*M3BS>91(dd0ZvzavaeZZ*x0Ra)Y
zn~9~%$Y_J6GsT!85Aw5tHpb_SQ$SdHiOfn*urUREu@2BDlWbUu1`#v+pT0bB`GW}z
zud<aEN6%IiF3|J*z>!j>XLN=yxswEd5|51V{n=MEi2s<`zp}WfW7*@UdbQ_$_OA>v
zW|5@57N?6Ri5`URN44ZiyXYMlFiFSuYBDa=fi;Vqa%f#9yS}ciLl+&{=%9IOxctX?
z`&six%|AvAX5;VP;g8Oq_S*LJlbI^Bew~F^q0sG@#T<()iF)JOf9=f_KAdOweq)<=
zLE&qFT%Aq%!wk!z^;zfBdqQhPGfrkG4!Km&*?O-`&|)?K4I#!n7Q@yG&pgn4oX=wV
zO*%=6p$cU(TxYRk!d-^_fn%@+#*1hVzEz6?i&z5*d*f~QqEV49upLR0G#NM_P8Q_Z
z2Tzxp(1EiI_e`iU!{FDDt@GkV;aq^fzs{j5t$lXUJ@j<Z<UKnPasHf<Kk6p40t`;L
z-Mu~bv{NUCys5S;X4R{YJsTdgS9&zc4sDX{CX@cTSF+~Pn;d?hF{U&8%dzFO<!}05
z8`{}1+9qprE^9jEO*Y&vF{zjQ=mK01gax*Wf??lD%gF050Hb2O&MqE_a$12iiOMZ8
zF=47*#sOg#FM#C=?mCVJpa5PH9AbvnVP<;zJHSh&LMH-aCsrvn^$@)YM#w@Xc1DU-
zc?%p*7|>8CGLw`2;qn1qh+}~sx$mtt>6?*f=U0MA2ayxaju&7iLAMnX6GPGcYcE`_
zbUBhQxqk{Ai2Z+#;y}N52Krr03osJuvFXc6Y8d0ajzPVL_mdSOxGM_+L%t7?6C6_K
za%&%L##jXkC<qR?e0JkH*CX^Xt{WAFzxoX402?PK6it6X@j*LZVO#n5qLr_9F$Zsd
z*%;(prx)J9E0u(Yw3Ds-XKirhUaP<*nqEn`RuRq=6MbT!`BUIezzgdub6!pnPBrL4
zYtZJUj!}BO(saCMMU9H;K==ZfEVstGQNx)xLNFgWR7dkm%QF2jdd;!?3pE*nbucJ}
zIo`@SQ0Hx6qW+<d5!mCAng~-1To5`3l+#V*`F^wgzK0nQw3@#-gge!Ew_6mw0<{aj
zn(#uIVGe|Fg^U;1OI~clb7xhaq3C=)D@zSWjR5|>AF>kv*ppg&F%to;x;_krAVOcN
zZC@fZ(D$%z?*13eLx)X*=72r$3ka;PF1f+9RWV$tQ)zW>aP)*tvm-)i5@2{Vmrg^1
zPud@rrNdBygNH&`@vyftjDk>UZ33fsiG46k@Bq_qf5_X1F2vq^SUkSpp$VTj|BZBt
zag~bw@H<UMfs>%ln;oBcHXy51O}Mt4Z^#IUc<!}pV}7>>y&3LBe~}v7Q*8_PF8JdK
zeg7WgRA@+Ur3&b%vHhyHkYSF{d6HsP4%PRRzZwqL!PnYPd8J-Pv5-N(KJjB?`dJ10
z<BZyLLbuR>U~ujq99%+^j7S}PR7^U=DE%PWgoFg$y==CXjSio&iX4v|5%-IqJ&lN+
zf6od7YO;<DE=bKGpH}0?Q2*-!p5sgD7BZ+>q*(pWaj&dqm9q!Eapoa>oL@2R9+Jzz
zA$@~IJ-Ay?gbv>4N1O2<LY>N7T&NuM2CEBD42Ciki5%cLSvL94KS3d;kO^oAhxc*C
zt%hnz>fX3A-w>^;xZ+cVx%cl5ZSA+9(w@-GzH+{6vle;*P(sJi3vUhaQdp?4k|T!@
zCo+ZWfcZf#D>7S^y?rCZ@;3vHNR`CPR>*(T_kT)70fsB0-W@I&ANNyTnJ&Ds{pr|%
zUY%bh(=KXriOC)zg||yfKjK;SJ9dm}Cc++wa4N7Pph=FjhBRV4#D-&LP89?Gq@F&6
z%&#9Jp?PQm5#`$i=>Wz^Y{@8WFfQO(JBs~re%!vn6V6@^$m-bP+7Fz;Q~{m@N98cT
zcE#0T`^7W`NQ;v|h~zA|w^dRSk`5#3v~4``SUt#FeknPTt?qS6!3%5)gDwb4(7<HO
zlNdWFTI?r2f$T@Oq6}&o+b8`0XwF~aqzPSOs1}~|-@uznwXS(b%B1zdn1V@(N^3(x
zr(j<j_VgooUu1y541%)&aFGZ_JB=Sf)p5S%ZHcH%->2w@f@TIfG}gAwtlYur9>qei
zuHoPnklxH2-%FaHFx@CA!S^Ak;)_#g(!^pR$ptCx)DpZyLj#9Vy#Zv08^;r8fPbc*
z;R9JN(6WnoA7uM{0of~Qa$P-$Rl3D2gX7T!G3^SQ4iRb=bbB~G9(zoZju2~O4t>zs
zX_ETuSv9NlH06jURz^=xj8PHDv|2pgV6`|{S?RX0?L3bG2UL)6BQ)>L(4K>vcJEUN
z$~|xfcMPR%kGeYE4sJAc%;kuJ8JGuB`Wrr+$!UirRz}|b@BWkD7uS9vqo~)t9*IJb
zj3M#uGaz%uqkv%%MtA7W@grttmmI2%jw|MTg3BNBiroPpdVz*O$cO%9A2HIQ`Z^A$
zgRTa4D`XrmqQn6Df$;ENb2FSws#ib1e@{^t(j|fx$8ivVV~*T8o{RG&1qcQvi$9@d
zl|FDy4NVm8B@RZY^2-cIaUGur5{Wee?;t=Y>d8J-q|Vfom4o9APHIFA{}8YScQ7Wr
zoxVe5Am#A=Z%*t6un#{B(iiUIm<&zLR_ncS{Zs!T(#@`GG=u%Z49*(miU|;RIzBP6
z@y;D;*c&=hcoR1fjBxMDKR06pPHR3OFh2xj4wV}}3hvDgIOP@z31mw(V#U6_uzm~(
zqk8U5d@G<EadhWPe3O&o=<J<v-^gS#Zk@I4YlM2nVg$!=Qj?Q)E}e|(x!A&fAmNQV
zmm3*j!+~$%LSdK7c=cVts$d_n(&6j&0AK`Kk{UsU36#L79~%10&fCVGS$3T(%ght6
z4`DxWZ0wz0I17|7L0CwLIw4|G2lxW#@9UPcX$MLXt1JgcEEWK$k>1%&glMQZ_!4k>
z3%d5=L2qj6Y0hsH!m8sEsTrCh^!bNhj*o|bIf>pAOknxwoR6{taFx~2`w0nbW>#Sz
zBci|bp7F*3Bu#d>?!G5aAc94g3J{jU3F<Nc_ZF)uU$oFp)0zsJA{hwYIy@TiQeonj
zV8*y73B&^*T1pD%{a~m+bLL0=I!@C0=&y!8f{Mjv_CrpJ^#TnAMBmyc-x)x>fWQ+6
zsEBdOMshL{vjJ``$`Q2W=z2ll{>DR(#{rlRaMtPl28!ViU3X-gDP$flGGezFN2noG
zJn#I~Kv$xB7W7*#y1n(<25I)#)J%kpfBd=z<vebZUcn+%dsL1YhvotVgF*=R;%ZI~
z&IpBhKg&#d*=%m8yz~9iwcK2!mg&^A;r;wnUv_pDVR@sS&ftKNX9lIa2<Opw!g%#&
z)OCoZJif64K14IKfADnToRPIqt)!mKaP0(M!h&!DqPOpk?<Ri3^l!?13BzTUMd;TM
zFUS$I6CfA*wLd7<Ae_-NsGWQliG!!&;=lo-uo4*EP-GxAc`YZHIK8l-jlK)$EPS6w
z%B`)gw&J-TKGs=s#a{E9(1W5eTt#62;IlnBjU@-{+kG>U$3QFiGsCE;gA{+Dl+vj8
zyqhF?>A-#>gYh#o6<5K(j{B`X@>YhP3!MxqR=k0P>Q!Xn^?z|9Z8a`^jy<QusvLC-
zPMmXai)QV?slR#!$ufh36|l3fI-*oT%Y$|TCc3|XZlXM^%DZ!Yt5_bLin?we!Sz%(
zBeJ;QJI;<}0hi?J(kc4lBh!~sM+I$kb`WXdf?y*ds9yaIbmv6d?~GYOs?^fS83P2E
zY~5OlMF2dVbQa!z%ZFuigp$h!WF-y`g)K@&8r6?n>rlxhGy5$M(6u^g1G>TFbbip*
z@U7L~D6Qz5Y(Gf`Wwqn;II0b2pg>PiT3kH;rgd}ZMgK`C(V*(T$UF=tNhzDaTKxPI
zN;MEpYAKzJ5&Z+K3miW7evG{OeyrCnhn`VP<8_l#y)GAb9U}$Q^#o>SnzNR-M3A{5
z=ULFWbH-hcZ#&#IXo#_60Qdy$5~wbnHja(HtINPX0g3>qA^0=<QJa|bZ7nQlI>H9_
zeG6$tF2Vl&pFy3W?hJM!c^JYT02fl3{m^p3*|FTgrmOl6cNz2>YOtiqX$b7ZV$lhf
zF+Lj%S^s_v5(VG{Krpd4VgH8r<0gO6%3lAX<jZ`7SIiM%ae8S{4&Fk(=QspNVSVm^
zet~R*9k49wCO5-0<cqL3XbEdZvBv?7!JPOK@#SOtORRi=Um58W8O#*hKOrRx=}LGy
zo!TydYC({8(s4cqU%4$3n5^2MTEPaA{I__%oKk;y7$$UtbYUk`biQx4ZyziYy{NkI
zTrQRz<{dd@;j<XC79`#Yz5~S<;x3$6g<_0jcQEwA%P{VDJ1_4oN-79lj8G7ehC2q>
z*C6-a?@h}ZdN!)3uA!lAZTc$23i=JeQQJdRUfrz|sYJp8cKVyZuu&DMEE-xI+)^w7
znDZf;kz{@cY6GACA{0g_8aggKj_>hK*kRbg5}+qUj|5BMS(H8qmY~W<3(g^`C6{76
z{UjH9ed@K~(LR9mDQ)e%rysXUOJlP}yL$9tJTJD-gv*zy21T%7;6HVVz^ib=7mjAM
zuXi`WoPfZ%ZxC>2oAVxg`*nzu>NWzd7_LLY<8bTNE!glelYy*`xTX1Qa29(c*2Vwm
zV(?i&O<`L^-ohWu8dPBkB83nYgv-R~2!LY1R#9aj=$^3afCj*$t*WKvq>zb-gqEhJ
zReVhd6FB>8wa(-mTB&;bDi~5>g>ymgB~wNCgTN?Y8IVUJW)7846KR3C;gb&`ga86g
zja5N2iNDy$a1!tkv~?&`uzVm_0KGcUrO}=qFMvw;JShACFmR+rq%RfHN3*mTQE+)6
zF7;Cd9&YUCp6>4X1wVUk`{Lbj3f_O>5=6MU-%m|7LM{W>B_bJX1}Y|K(Z+y7RYFu4
zzHbjlev?$l!tm6>uMXFUe^3zhW5URcB%-q#*Yon6;SL23#Umg9cfpIhK}Tm|;A;<Q
z{R5RCiUo{EOW4xCeIu~2=stc^b!ewnp1E-9;om}eC9?a)#SD%D$AP>{4PPNpX3HW*
zgPR!S7mt(uX~pL!JoN@>UU_0NwXMWGhnW{ldm^9zED8hkaoD(dti@A|Cl34X%Gde#
zA+l6e8PlfOd5Gxhsw{~0t=rC8b={#F{Y;331rA^w1#4&N&uF<E@Z`}WYIO}3CGBO|
zx!1x4XT7OA)P7^kq@ib$EHcmV_CjE}f0J=4(D#Y$n+S~FLQU($8X7ll1UX^D4|^E%
zJu+-F;y1Fh&ti38`i}7CJjB(8&^Y}i{--oFJR406bghpLmC=605zvko{CQof7(UGn
z49Q;ND=^JxnVz2~yurp1?jn{Bc*5e|rbGlbWoKky6UxiT@N{#-Wm7Ywz(N4g!UbdO
z47ZJ1<+@^80&^?t2D5)sB+G3p+q^3un3IQr5#?`>qTvrb;_a2vLGDGraPnlhbNCs~
zBA=Nc)+h-aQqb4OBEl`;_n^1}dlkt9v2kd@M&~RWIvc`5kKy%UY9$g$pe@3}XteU2
zXK!8FxP8Bk0=cQ<i;i;)6IWrzz%{m@<qeKIS0g(oYwhK-l(xLdy%F9QtlY!!e7MX^
z*_X0c@ON^>#fMV?lZUe&>z=R<egkR+tpK(AfOgYbP(u~kn+Qrri&g)}fM!3{&7ZtG
zq;$%}WNFWi&!qY+%z9@Fh5f+=6HC0GU1!k6{sJ)<tm|Sg*?DUl8uSed7CBZ~{xMrx
z3k_^Atxj!i)m+Z(cx18NsC5grbVwk>I<w4fAp8$_D^5GMd}&){)1k4n_6zeze)e8$
zg(#0V1#K-Y%y+ga3>9MoEDNnwl72k~iBzLxOlQG&{aHC2(LLl@dhk2P{oo}D7+`R`
z;Txu@P)2#S{@Wq`TQDIbIzl$B54tgGHgnP=U&h@0p*x#`t|hUCI%~_gNoW0(M9bre
zoe-rJI5r->{S^kkM_b0Mf^KQCS>PlXl@sOy?=OjH=6wHh{Pa-BLghb9dDjA**p%Jy
zuwr|}`Dnc#zhZWV3({S8t?kU});qE*X-^yL4;fV!;w%uxIipsEuRpKmMcHzx9eHwL
z^}gqsz(Bw8`YF_{j)!aW=c{DN<F=f4YZ@9(LG}!!_s3r^xnB)Jet&og3P_r(N-WI$
zyf^pfzQ7)(7u>_oe+}h1r3GZ@ls+GH-8d|f&!yFB)5ah=yl*hd;_uNvXU4S^KiW<I
zNP#6!O$`>h4&@E61xuKi_uUBrFf7CEbSr7&aJgfdQDm9vi;Et&yryiQbqNs{&R&wg
zX|qpr_2|gs4?{K$wAzXz<Q1){Hx~TW-cq59Tf5^Q&1PRqA#s~hILnv9@`A%f`|Y$|
z@3lOnKVe=fd#8dW!|JJd$Gd8$vob-^9Edn_Ux^K1F446#uDeNitOA|Tjw(r~8bU9Z
zp~+Xx+7EV_!|VALAczji@L(bv$EJ$02YO8=y#QNLy{L{*6}dOFDV{$=jHTG$6^kiv
z0|tV)THmDJ8imTe7mQ`t{oY|F13RjMZ398Z(A4Ab)T6v-y2!E{j@E_{hqDpG;OO{k
z`tjyi6?s`%Bsw64w8RP%r$5BXwb=weVpBF!s1R@Z-G?YuA3~&YMB-0oKX6Ag>Heor
zGfhioPQ_|TnYyn)bSP#zZeVM!kcm_-;KqSg%bI#VR0ah-P1m;y>k8;dQPA~j?XeqB
zb4@<v?-2N>)NTB02-y^HHkFMXV_XOF07d%=6jy}7MRrrOU|<CTh#nOa^A}<ZYztDd
zHnt1r^?W9rA;Z97?Lg>bb%O!}5p{#u4dw6tJF=cec_ginFFQ+eoed5~zSI6}r71LC
z*tbjCwkuN)OXgzO$iu^MsJ-t|+VFOLnPYg?01TZlGKxM>ddlCQX1fm#j)AaAqb8;!
z)JX2-EG^_iI6VTRG7=oe$9Jo$f&i4>x|N@g@A#XRY?*QC4YUV|Z3nIl16#*{6n^YB
zrXy@POmTPMb9THRy2<Za-*FMO6PPvsx12ls%9W@tz2vGlkOOo+c;Il`FHUbZ#o`5U
z8BCvdq4+179>uhaELxz9i6?P@>XX9fpL|De9@9O^DPT6-Q?KbLOZ<p47<Bc+WtB7X
z^2{wH-p87z4i{b7md>b4!r-e{K+@J0<uj4wGJz7fD#(q<cyOD2r#+|b)^>J)UQqN~
z7u4ZSCKFJdjcKdv-B$+6s;vudq3B@hB8Y1J!Z=RUTSXg=(etY01wiz9bZ(PScJv9g
zn3f=-q-tKamEL+D8U<YQiI=zKWwZ)TxP;;Clsg!k0mNb4rqU>o^#e{ggc5;?6|*a<
z1T%dlnqaijoi~mPCfb6##TWyq3F)vHllgghgD?{V?1A<lXY0WffbQhbE$@`->S`S^
zG)*kVn@dVd@4$49fsER>LB5Mj3_ZOEkk+6Ng!}y*)b`kQq2(1xx&w|D3IOV|fVG?_
zBxOoTfB=tY1M*`q0iv5ilqOweCpb7Dy}%r(hyUvS8_9?{_Zh<SkM8fjROcv0m;<YW
zF;8%ZtHEyWL_1bQA;LKwtdXwp`rXQk1tGqJ95$4YKTC=1RukfgOvTm*=UuV>%8JK|
zH_z(=rO0Puvlf^E#>T?9kN5_hBi!KX_!)8~uJ1l>1miSCR+%vmez;lb@fyX-m|4f$
z7J|SR4el6t%y&K{SBc;`o;K=m^!~?SJNpIdtHz$$?^WLRP?dwSR%m}|nDW}qdpEc!
zYY9?~R{>{{hrMRjS@Z>fp-*jl1xAyVzTnT)#`&@c>!8XQiD?N53E&}hKVmwpH~!l|
zWHi0-q_s`zoP?JjllvC=r;l#(FdlrZ`+OwX>Pfu@iLEvSIz2p<U?dAniC}F_nONRB
zjM#V4Rf*&ti17=*a)cj~s_CXdW}A$P<15Vlezqug9(9r@gdnld5@r*ab9qFsV!rt&
z@w4iV-2%t<gl~}WeW055T1@)W+eZy{#Y4X77h~eqKK)bkB=lZ?k6w<{-AaE;fdu-)
znn%Or`eGT%wPVkmiPq6lWmcp)N*7n=!bW=b0)`a=Bc*?;u+N9YUfYJlYsX)_cyPR#
z(-y}=iXYJ{8pQ6@uRY1GJ^CpS6o>;ij!xKI2ua$!bX8cn9*dd9sSbrtN%Bl?gXBuD
zr7%N_a%&W{!#D!lk5)lWZfI@=SR+nsoy5a|ZLYL*E2X2izyBP@22gTXE9j_p9ea_U
zoqY&Mgml^(1kyOI=0opBux#PX#5kPDis;$2?Cf#4!0~LHC#m&&E2CBdLZu3#5DXpA
z43j@=<dO2AXzhOzB946qwT<)N=sFCVbh6EE`%mhDfdfs0sRZ-XjbESjxAz?)#Dh3_
zOeR&~?DgHdH_b-(gbcI#t;mUxxY1z<Kz50_C5C--x4V)qyR#f$5?&NnrXjL$@$Y(`
zg$!moL<S(p8b=2JmPPjo^sNXPYXStI57Z-2&Rrtw6kC<^;dC>|3c*~)KXi={OTH_s
z7Ts$jX<E!Tu!_hw<>Rkbu?c|+yJg+66N2wT_lwFa+`aNWAHSx|YhBufY%l>rH_QS^
z5x^Z>OU{_JEy~H~#8mk~P76Aqwl-}_2Qw?HphhsM+CXWf>v5?Qf3?9ay3Kp%Xbx-k
zGhWBh2s_ZPqq=hnDkD=Zyy&j?U?E^#pch(soFWQWAFB!`CMO+3*#$NBqRuS_nPssj
zgHRD9B^f+IwlM4cE0n|_1Zk{D(#J1fZa)7ccH$bO<b)J4UE!Ecq<cW%L9<B#YY2c~
z?2BE3i2==yvRBmA)cl(rAt5CR-1;xukmASx;SGc0RY?<6n-8g!C8QBVu_2{EQ9*&R
z=1_qJkwKiu4CbS9m~<JE0ND2YF?ZZYo+A`z!5XbjZ5W?`*5UN;a2=WokKV^jbt+8?
z$^5V*|7++=Fpx-=>$H8U-fsK!-3|c)VjocTobLv1D`SBCm62izs~18E0R~2h>6l~L
zN&|QE^rQ~gxYr220_h7tY;lpB0lq-VMcR&UWy7<0h#{API16<3J4h(0=bW3C)Ze*7
z1(v{kfVXQx4F^G|d-^VViQeT7&jy@+g3_i-<cf0#)B|_`S<s*t%n_A<f7zcZnfDaY
zcYi^cH#>d_qot>sq?HAMFYP(I`~jN4pKEI!QA7q8WkF;V;JS~wxp8mYIQjlP@Ld_-
z5o!nw9EVT9p26Nr%?W`T7Tv>J2$A9p>LG*!!iw~U=6m<Bh^Z%%fv`gI#(UsskZ>kn
zzkUIyHK^zQfrM|{71O{Y*yq;~>4GhL4KwzU=71>4K8&h|T-6kASo?p7_n_A=!0^n?
zF!}BsUA+B7itdvw0`~wk`ayGtb(Rgwl<_F;P$5(w-aCF^*aY}23NaD%Fl6xvqoR||
zuaNx2mq0!B9%FIO!-u)MPe}udh13f>EpR;O;()9|8q5k!=EcAFVSrW-!ZhAt1gj!m
z5Whl2<lnZ5LKgO4SX)r39beoGA`uZ$iKIhJx0C*AGavsIG(b>*JAjS$G_;T?N~%vC
zM$tw6pXM?{BO@@1SQW4hdO|sf1N5+sPA&z5Hbakqn~b0^PxNLuMHmDmka?&g2ZL}3
zg(Xtg&YeR(JK~lkAYDPGhKdR=UzF_1aW^f_vDrhaq;YK621ElFm&x_?i(nyfpBnJp
z;6;H`06&0_65WULXB(iM0OPUJszF`MAjDJnl636tKj``(JEr*o{s7$TC>FF{!NB-<
zGxb1+*<s4yCIKz`SP*ImBm&Wbv@pOcmy?oExN+DZR2}FHEGmbAt0@as<I%_O<H$Z>
zJdF(vlq;^vSE6}j+OJ1=Y~6NfZeRz59LSsm>x%2g0SZ8T(6w-9_8-IG0rZ4y&35>_
z0D&>RXxJBt8Tzd^;WPGdch_;+iYtc@Wo8D(XP<%!Q-M>p!wtchlE8)`t{ch^+&dhU
z<)b9<T$jums}^fe2;tTrXa*sUM*j;c42WXo@Pi#<+vRy!Esb`ial;}63giKa$kki0
zMfa0PqENZ6|DrslqYPYd!b7C|fWilpZiofqq6Nhmm@_H~fp~k!l=bLtmi$chb_p+b
zuikm6E36GdqqIJ=0|%lDwhu?Ardyx&-(BLwugl^W`H)>>q=Dl<pZErhyx`#w4usSf
zS!#wS4*m)KI=$b|Pp(Q^$Co238})%MoIF@)7^r?8Ko?lZ0*kU?-{9gwF}CZc?A@`5
zA?U(04&^M3es)TVC5B|&)Io3M^!z8quDy)9U2oAm(b3a`14dG$CKY>B;VH3}b-C;R
zD193a#MZvx2)(y}w0&lnw}E=@@zh2R8=5~lOL6Fi-vW|B1{^e{!2hsLs_yEjiBsVQ
zR1m|~oB02Zpp8;&BMbL*E=5@lpDK2F#JKwmx<)_fp&?+v*_^8oHY270Z6r{v?uQSN
zm-cab8hs`{sUV*bNIeUg`RAoivCy#*0g?mQAA)s{F^jsFh!J(hVr(k`pPfL8O=kb+
zfdUG&3>{<yY)&r)!`_KR-h$j*#OZWFrsr_Wdk`wswDw&BF7SM{spl%Fs*XH)a(s5E
z9NsJRiLjyw2%xuh2k!x@1Z@J+tMX7=&@EsKk51W=?xrk=0*1Qy1ZWwMwC1~abKI+s
z4i-KHI0{C><u<Zmj*&8`Cw+oZ0Hz*<COi*FA9R78lfM8=qjW$S^^2Mf`}*D}9u&af
zS&o^A?m&ho(mRrp%wb2sH8wlmgoZ1rr*mmLU(D2$ac4S{zy!WxocNlTS2O61=a|gF
z)=jm+_PjsY?C5KC>KF@NJvlzJgWP_biDxrGVP{%jWE1<CFs&+&NAym?L?I3)UJjdx
zluRRlvdRcR1uAij8T%)CfWW|ZcS-_#ZYq8|MJAo|H^Z+$Y@PVhZwQOOR8Kcbl}v|J
z4xl!`12QM$w^K3<(UHkJIjxpDf^<pjYJX~v(MR94i11+PbBTQxQ0!9aC6%0RJ;UHp
zDjR>>aQwx<lK0K{&@)N$f=}}4&9ua2cAlAOo8EM8aN9&%8PWPipH#kiS&-P9+=Dxt
zWt4-AEtGay>Sqo%C%?RW|Gabf47YE%v^=FwtctW#k}=GZD@Zcp`co_JmJWJ?n|j*&
z{g0H_h`ABAT((w5R|d9~MO~ctFGq8_^P@6@NJfu!wj+Oce24ARin4a@^*Ze}*3#9M
z^sn<{V>Y6Tk{cK)<xb>EET0Pgu~`?0{WjbV5g^<M39lBR{#A?H%Ayh<Bc)F1A6}Qg
z7cX&H6|_TuTuFO+-a>n=Em+eb&aWGs*i^u7znvAA1R<CD$1BXJ2uv4pskq{xPf}66
zZj#<L0R8iTpsP~0^&lG>;5s$3Bn}bL{hfE$<XSXWy$&61uXtEc5L~b&^z}`kt-8Di
z=imG_w~uPSFOQ$+@J|1A`i7e(P0|8>X2pdoclj^P;j)?DN_)0d8vHRaJYaBBj~wy8
E01+e^=Kufz

literal 0
HcmV?d00001

diff --git a/final/docs/tutorial/LangImpl5.html b/final/docs/tutorial/LangImpl5.html
new file mode 100644
index 00000000000..166b74a4dd6
--- /dev/null
+++ b/final/docs/tutorial/LangImpl5.html
@@ -0,0 +1,1780 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Extending the Language: Control Flow</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Extending the Language: Control Flow</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 5
+  <ol>
+    <li><a href="#intro">Chapter 5 Introduction</a></li>
+    <li><a href="#ifthen">If/Then/Else</a>
+    <ol>
+      <li><a href="#iflexer">Lexer Extensions</a></li>
+      <li><a href="#ifast">AST Extensions</a></li>
+      <li><a href="#ifparser">Parser Extensions</a></li>
+      <li><a href="#ifir">LLVM IR</a></li>
+      <li><a href="#ifcodegen">Code Generation</a></li>
+    </ol>
+    </li>
+    <li><a href="#for">'for' Loop Expression</a>
+    <ol>
+      <li><a href="#forlexer">Lexer Extensions</a></li>
+      <li><a href="#forast">AST Extensions</a></li>
+      <li><a href="#forparser">Parser Extensions</a></li>
+      <li><a href="#forir">LLVM IR</a></li>
+      <li><a href="#forcodegen">Code Generation</a></li>
+    </ol>
+    </li>
+    <li><a href="#code">Full Code Listing</a></li>
+  </ol>
+</li>
+<li><a href="LangImpl6.html">Chapter 6</a>: Extending the Language: 
+User-defined Operators</li>
+</ul>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="intro">Chapter 5 Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to Chapter 5 of the "<a href="index.html">Implementing a language
+with LLVM</a>" tutorial.  Parts 1-4 described the implementation of the simple
+Kaleidoscope language and included support for generating LLVM IR, followed by
+optimizations and a JIT compiler.  Unfortunately, as presented, Kaleidoscope is
+mostly useless: it has no control flow other than call and return.  This means
+that you can't have conditional branches in the code, significantly limiting its
+power.  In this episode of "build that compiler", we'll extend Kaleidoscope to
+have an if/then/else expression plus a simple 'for' loop.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="ifthen">If/Then/Else</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Extending Kaleidoscope to support if/then/else is quite straightforward.  It
+basically requires adding support for this "new" concept to the lexer,
+parser, AST, and LLVM code emitter.  This example is nice, because it shows how
+easy it is to "grow" a language over time, incrementally extending it as new
+ideas are discovered.</p>
+
+<p>Before we get going on "how" we add this extension, lets talk about "what" we
+want.  The basic idea is that we want to be able to write this sort of thing:
+</p>
+
+<div class="doc_code">
+<pre>
+def fib(x)
+  if x &lt; 3 then
+    1
+  else
+    fib(x-1)+fib(x-2);
+</pre>
+</div>
+
+<p>In Kaleidoscope, every construct is an expression: there are no statements.
+As such, the if/then/else expression needs to return a value like any other.
+Since we're using a mostly functional form, we'll have it evaluate its
+conditional, then return the 'then' or 'else' value based on how the condition
+was resolved.  This is very similar to the C "?:" expression.</p>
+
+<p>The semantics of the if/then/else expression is that it evaluates the
+condition to a boolean equality value: 0.0 is considered to be false and
+everything else is considered to be true.
+If the condition is true, the first subexpression is evaluated and returned, if
+the condition is false, the second subexpression is evaluated and returned.
+Since Kaleidoscope allows side-effects, this behavior is important to nail down.
+</p>
+
+<p>Now that we know what we "want", lets break this down into its constituent
+pieces.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="iflexer">Lexer Extensions for
+If/Then/Else</a></div>
+<!-- ======================================================================= -->
+
+
+<div class="doc_text">
+
+<p>The lexer extensions are straightforward.  First we add new enum values
+for the relevant tokens:</p>
+
+<div class="doc_code">
+<pre>
+  // control
+  tok_if = -6, tok_then = -7, tok_else = -8,
+</pre>
+</div>
+
+<p>Once we have that, we recognize the new keywords in the lexer. This is pretty simple
+stuff:</p>
+
+<div class="doc_code">
+<pre>
+    ...
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    <b>if (IdentifierStr == "if") return tok_if;
+    if (IdentifierStr == "then") return tok_then;
+    if (IdentifierStr == "else") return tok_else;</b>
+    return tok_identifier;
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="ifast">AST Extensions for
+ If/Then/Else</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>To represent the new expression we add a new AST node for it:</p>
+
+<div class="doc_code">
+<pre>
+/// IfExprAST - Expression class for if/then/else.
+class IfExprAST : public ExprAST {
+  ExprAST *Cond, *Then, *Else;
+public:
+  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+    : Cond(cond), Then(then), Else(_else) {}
+  virtual Value *Codegen();
+};
+</pre>
+</div>
+
+<p>The AST node just has pointers to the various subexpressions.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="ifparser">Parser Extensions for
+If/Then/Else</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>Now that we have the relevant tokens coming from the lexer and we have the
+AST node to build, our parsing logic is relatively straightforward.  First we
+define a new parsing function:</p>
+
+<div class="doc_code">
+<pre>
+/// ifexpr ::= 'if' expression 'then' expression 'else' expression
+static ExprAST *ParseIfExpr() {
+  getNextToken();  // eat the if.
+  
+  // condition.
+  ExprAST *Cond = ParseExpression();
+  if (!Cond) return 0;
+  
+  if (CurTok != tok_then)
+    return Error("expected then");
+  getNextToken();  // eat the then
+  
+  ExprAST *Then = ParseExpression();
+  if (Then == 0) return 0;
+  
+  if (CurTok != tok_else)
+    return Error("expected else");
+  
+  getNextToken();
+  
+  ExprAST *Else = ParseExpression();
+  if (!Else) return 0;
+  
+  return new IfExprAST(Cond, Then, Else);
+}
+</pre>
+</div>
+
+<p>Next we hook it up as a primary expression:</p>
+
+<div class="doc_code">
+<pre>
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  <b>case tok_if:         return ParseIfExpr();</b>
+  }
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="ifir">LLVM IR for If/Then/Else</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>Now that we have it parsing and building the AST, the final piece is adding
+LLVM code generation support.  This is the most interesting part of the
+if/then/else example, because this is where it starts to introduce new concepts.
+All of the code above has been thoroughly described in previous chapters.
+</p>
+
+<p>To motivate the code we want to produce, lets take a look at a simple
+example.  Consider:</p>
+
+<div class="doc_code">
+<pre>
+extern foo();
+extern bar();
+def baz(x) if x then foo() else bar();
+</pre>
+</div>
+
+<p>If you disable optimizations, the code you'll (soon) get from Kaleidoscope
+looks like this:</p>
+
+<div class="doc_code">
+<pre>
+declare double @foo()
+
+declare double @bar()
+
+define double @baz(double %x) {
+entry:
+	%ifcond = fcmp one double %x, 0.000000e+00
+	br i1 %ifcond, label %then, label %else
+
+then:		; preds = %entry
+	%calltmp = call double @foo()
+	br label %ifcont
+
+else:		; preds = %entry
+	%calltmp1 = call double @bar()
+	br label %ifcont
+
+ifcont:		; preds = %else, %then
+	%iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
+	ret double %iftmp
+}
+</pre>
+</div>
+
+<p>To visualize the control flow graph, you can use a nifty feature of the LLVM
+'<a href="http://llvm.org/cmds/opt.html">opt</a>' tool.  If you put this LLVM IR
+into "t.ll" and run "<tt>llvm-as &lt; t.ll | opt -analyze -view-cfg</tt>", <a
+href="../ProgrammersManual.html#ViewGraph">a window will pop up</a> and you'll
+see this graph:</p>
+
+<div style="text-align: center"><img src="LangImpl5-cfg.png" alt="Example CFG" width="423" 
+height="315"></div>
+
+<p>Another way to get this is to call "<tt>F-&gt;viewCFG()</tt>" or
+"<tt>F-&gt;viewCFGOnly()</tt>" (where F is a "<tt>Function*</tt>") either by
+inserting actual calls into the code and recompiling or by calling these in the
+debugger.  LLVM has many nice features for visualizing various graphs.</p>
+
+<p>Getting back to the generated code, it is fairly simple: the entry block 
+evaluates the conditional expression ("x" in our case here) and compares the
+result to 0.0 with the "<tt><a href="../LangRef.html#i_fcmp">fcmp</a> one</tt>"
+instruction ('one' is "Ordered and Not Equal").  Based on the result of this
+expression, the code jumps to either the "then" or "else" blocks, which contain
+the expressions for the true/false cases.</p>
+
+<p>Once the then/else blocks are finished executing, they both branch back to the
+'ifcont' block to execute the code that happens after the if/then/else.  In this
+case the only thing left to do is to return to the caller of the function.  The
+question then becomes: how does the code know which expression to return?</p>
+
+<p>The answer to this question involves an important SSA operation: the
+<a href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Phi
+operation</a>.  If you're not familiar with SSA, <a 
+href="http://en.wikipedia.org/wiki/Static_single_assignment_form">the wikipedia
+article</a> is a good introduction and there are various other introductions to
+it available on your favorite search engine.  The short version is that
+"execution" of the Phi operation requires "remembering" which block control came
+from.  The Phi operation takes on the value corresponding to the input control
+block.  In this case, if control comes in from the "then" block, it gets the
+value of "calltmp".  If control comes from the "else" block, it gets the value
+of "calltmp1".</p>
+
+<p>At this point, you are probably starting to think "Oh no! This means my
+simple and elegant front-end will have to start generating SSA form in order to
+use LLVM!".  Fortunately, this is not the case, and we strongly advise
+<em>not</em> implementing an SSA construction algorithm in your front-end
+unless there is an amazingly good reason to do so.  In practice, there are two
+sorts of values that float around in code written for your average imperative
+programming language that might need Phi nodes:</p>
+
+<ol>
+<li>Code that involves user variables: <tt>x = 1; x = x + 1; </tt></li>
+<li>Values that are implicit in the structure of your AST, such as the Phi node
+in this case.</li>
+</ol>
+
+<p>In <a href="LangImpl7.html">Chapter 7</a> of this tutorial ("mutable 
+variables"), we'll talk about #1
+in depth.  For now, just believe me that you don't need SSA construction to
+handle this case.  For #2, you have the choice of using the techniques that we will 
+describe for #1, or you can insert Phi nodes directly, if convenient.  In this 
+case, it is really really easy to generate the Phi node, so we choose to do it
+directly.</p>
+
+<p>Okay, enough of the motivation and overview, lets generate code!</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="ifcodegen">Code Generation for 
+If/Then/Else</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>In order to generate code for this, we implement the <tt>Codegen</tt> method
+for <tt>IfExprAST</tt>:</p>
+
+<div class="doc_code">
+<pre>
+Value *IfExprAST::Codegen() {
+  Value *CondV = Cond-&gt;Codegen();
+  if (CondV == 0) return 0;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  CondV = Builder.CreateFCmpONE(CondV, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                "ifcond");
+</pre>
+</div>
+
+<p>This code is straightforward and similar to what we saw before.  We emit the
+expression for the condition, then compare that value to zero to get a truth
+value as a 1-bit (bool) value.</p>
+
+<div class="doc_code">
+<pre>
+  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
+  
+  // Create blocks for the then and else cases.  Insert the 'then' block at the
+  // end of the function.
+  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+
+  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+</pre>
+</div>
+
+<p>This code creates the basic blocks that are related to the if/then/else
+statement, and correspond directly to the blocks in the example above.  The
+first line gets the current Function object that is being built.  It
+gets this by asking the builder for the current BasicBlock, and asking that
+block for its "parent" (the function it is currently embedded into).</p>
+
+<p>Once it has that, it creates three blocks.  Note that it passes "TheFunction"
+into the constructor for the "then" block.  This causes the constructor to
+automatically insert the new block into the end of the specified function.  The
+other two blocks are created, but aren't yet inserted into the function.</p>
+
+<p>Once the blocks are created, we can emit the conditional branch that chooses
+between them.  Note that creating new blocks does not implicitly affect the
+IRBuilder, so it is still inserting into the block that the condition
+went into.  Also note that it is creating a branch to the "then" block and the
+"else" block, even though the "else" block isn't inserted into the function yet.
+This is all ok: it is the standard way that LLVM supports forward 
+references.</p>
+
+<div class="doc_code">
+<pre>
+  // Emit then value.
+  Builder.SetInsertPoint(ThenBB);
+  
+  Value *ThenV = Then-&gt;Codegen();
+  if (ThenV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+  ThenBB = Builder.GetInsertBlock();
+</pre>
+</div>
+
+<p>After the conditional branch is inserted, we move the builder to start
+inserting into the "then" block.  Strictly speaking, this call moves the
+insertion point to be at the end of the specified block.  However, since the
+"then" block is empty, it also starts out by inserting at the beginning of the
+block.  :)</p>
+
+<p>Once the insertion point is set, we recursively codegen the "then" expression
+from the AST.  To finish off the "then" block, we create an unconditional branch
+to the merge block.  One interesting (and very important) aspect of the LLVM IR
+is that it <a href="../LangRef.html#functionstructure">requires all basic blocks
+to be "terminated"</a> with a <a href="../LangRef.html#terminators">control flow
+instruction</a> such as return or branch.  This means that all control flow,
+<em>including fall throughs</em> must be made explicit in the LLVM IR.  If you
+violate this rule, the verifier will emit an error.</p>
+
+<p>The final line here is quite subtle, but is very important.  The basic issue
+is that when we create the Phi node in the merge block, we need to set up the
+block/value pairs that indicate how the Phi will work.  Importantly, the Phi
+node expects to have an entry for each predecessor of the block in the CFG.  Why
+then, are we getting the current block when we just set it to ThenBB 5 lines
+above?  The problem is that the "Then" expression may actually itself change the
+block that the Builder is emitting into if, for example, it contains a nested
+"if/then/else" expression.  Because calling Codegen recursively could
+arbitrarily change the notion of the current block, we are required to get an
+up-to-date value for code that will set up the Phi node.</p>
+
+<div class="doc_code">
+<pre>
+  // Emit else block.
+  TheFunction-&gt;getBasicBlockList().push_back(ElseBB);
+  Builder.SetInsertPoint(ElseBB);
+  
+  Value *ElseV = Else-&gt;Codegen();
+  if (ElseV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+  ElseBB = Builder.GetInsertBlock();
+</pre>
+</div>
+
+<p>Code generation for the 'else' block is basically identical to codegen for
+the 'then' block.  The only significant difference is the first line, which adds
+the 'else' block to the function.  Recall previously that the 'else' block was
+created, but not added to the function.  Now that the 'then' and 'else' blocks
+are emitted, we can finish up with the merge code:</p>
+
+<div class="doc_code">
+<pre>
+  // Emit merge block.
+  TheFunction->getBasicBlockList().push_back(MergeBB);
+  Builder.SetInsertPoint(MergeBB);
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+                                  "iftmp");
+  
+  PN->addIncoming(ThenV, ThenBB);
+  PN->addIncoming(ElseV, ElseBB);
+  return PN;
+}
+</pre>
+</div>
+
+<p>The first two lines here are now familiar: the first adds the "merge" block
+to the Function object (it was previously floating, like the else block above).
+The second block changes the insertion point so that newly created code will go
+into the "merge" block.  Once that is done, we need to create the PHI node and
+set up the block/value pairs for the PHI.</p>
+
+<p>Finally, the CodeGen function returns the phi node as the value computed by
+the if/then/else expression.  In our example above, this returned value will 
+feed into the code for the top-level function, which will create the return
+instruction.</p>
+
+<p>Overall, we now have the ability to execute conditional code in
+Kaleidoscope.  With this extension, Kaleidoscope is a fairly complete language
+that can calculate a wide variety of numeric functions.  Next up we'll add
+another useful expression that is familiar from non-functional languages...</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="for">'for' Loop Expression</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Now that we know how to add basic control flow constructs to the language,
+we have the tools to add more powerful things.  Lets add something more
+aggressive, a 'for' expression:</p>
+
+<div class="doc_code">
+<pre>
+ extern putchard(char)
+ def printstar(n)
+   for i = 1, i &lt; n, 1.0 in
+     putchard(42);  # ascii 42 = '*'
+     
+ # print 100 '*' characters
+ printstar(100);
+</pre>
+</div>
+
+<p>This expression defines a new variable ("i" in this case) which iterates from
+a starting value, while the condition ("i &lt; n" in this case) is true, 
+incrementing by an optional step value ("1.0" in this case).  If the step value
+is omitted, it defaults to 1.0.  While the loop is true, it executes its 
+body expression.  Because we don't have anything better to return, we'll just
+define the loop as always returning 0.0.  In the future when we have mutable
+variables, it will get more useful.</p>
+
+<p>As before, lets talk about the changes that we need to Kaleidoscope to
+support this.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="forlexer">Lexer Extensions for
+the 'for' Loop</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>The lexer extensions are the same sort of thing as for if/then/else:</p>
+
+<div class="doc_code">
+<pre>
+  ... in enum Token ...
+  // control
+  tok_if = -6, tok_then = -7, tok_else = -8,
+<b>  tok_for = -9, tok_in = -10</b>
+
+  ... in gettok ...
+  if (IdentifierStr == "def") return tok_def;
+  if (IdentifierStr == "extern") return tok_extern;
+  if (IdentifierStr == "if") return tok_if;
+  if (IdentifierStr == "then") return tok_then;
+  if (IdentifierStr == "else") return tok_else;
+  <b>if (IdentifierStr == "for") return tok_for;
+  if (IdentifierStr == "in") return tok_in;</b>
+  return tok_identifier;
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="forast">AST Extensions for
+the 'for' Loop</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>The AST node is just as simple.  It basically boils down to capturing
+the variable name and the constituent expressions in the node.</p>
+
+<div class="doc_code">
+<pre>
+/// ForExprAST - Expression class for for/in.
+class ForExprAST : public ExprAST {
+  std::string VarName;
+  ExprAST *Start, *End, *Step, *Body;
+public:
+  ForExprAST(const std::string &amp;varname, ExprAST *start, ExprAST *end,
+             ExprAST *step, ExprAST *body)
+    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+  virtual Value *Codegen();
+};
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="forparser">Parser Extensions for
+the 'for' Loop</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>The parser code is also fairly standard.  The only interesting thing here is
+handling of the optional step value.  The parser code handles it by checking to
+see if the second comma is present.  If not, it sets the step value to null in
+the AST node:</p>
+
+<div class="doc_code">
+<pre>
+/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+static ExprAST *ParseForExpr() {
+  getNextToken();  // eat the for.
+
+  if (CurTok != tok_identifier)
+    return Error("expected identifier after for");
+  
+  std::string IdName = IdentifierStr;
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '=')
+    return Error("expected '=' after for");
+  getNextToken();  // eat '='.
+  
+  
+  ExprAST *Start = ParseExpression();
+  if (Start == 0) return 0;
+  if (CurTok != ',')
+    return Error("expected ',' after for start value");
+  getNextToken();
+  
+  ExprAST *End = ParseExpression();
+  if (End == 0) return 0;
+  
+  // The step value is optional.
+  ExprAST *Step = 0;
+  if (CurTok == ',') {
+    getNextToken();
+    Step = ParseExpression();
+    if (Step == 0) return 0;
+  }
+  
+  if (CurTok != tok_in)
+    return Error("expected 'in' after for");
+  getNextToken();  // eat 'in'.
+  
+  ExprAST *Body = ParseExpression();
+  if (Body == 0) return 0;
+
+  return new ForExprAST(IdName, Start, End, Step, Body);
+}
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="forir">LLVM IR for 
+the 'for' Loop</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>Now we get to the good part: the LLVM IR we want to generate for this thing.
+With the simple example above, we get this LLVM IR (note that this dump is
+generated with optimizations disabled for clarity):
+</p>
+
+<div class="doc_code">
+<pre>
+declare double @putchard(double)
+
+define double @printstar(double %n) {
+entry:
+        ; initial value = 1.0 (inlined into phi)
+	br label %loop
+
+loop:		; preds = %loop, %entry
+	%i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ]
+        ; body
+	%calltmp = call double @putchard(double 4.200000e+01)
+        ; increment
+	%nextvar = fadd double %i, 1.000000e+00
+
+        ; termination test
+	%cmptmp = fcmp ult double %i, %n
+	%booltmp = uitofp i1 %cmptmp to double
+	%loopcond = fcmp one double %booltmp, 0.000000e+00
+	br i1 %loopcond, label %loop, label %afterloop
+
+afterloop:		; preds = %loop
+        ; loop always returns 0.0
+	ret double 0.000000e+00
+}
+</pre>
+</div>
+
+<p>This loop contains all the same constructs we saw before: a phi node, several
+expressions, and some basic blocks.  Lets see how this fits together.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="forcodegen">Code Generation for 
+the 'for' Loop</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>The first part of Codegen is very simple: we just output the start expression
+for the loop value:</p>
+
+<div class="doc_code">
+<pre>
+Value *ForExprAST::Codegen() {
+  // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start-&gt;Codegen();
+  if (StartVal == 0) return 0;
+</pre>
+</div>
+
+<p>With this out of the way, the next step is to set up the LLVM basic block
+for the start of the loop body.  In the case above, the whole loop body is one
+block, but remember that the body code itself could consist of multiple blocks
+(e.g. if it contains an if/then/else or a for/in expression).</p>
+
+<div class="doc_code">
+<pre>
+  // Make the new basic block for the loop header, inserting after current
+  // block.
+  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
+  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
+  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+  
+  // Insert an explicit fall through from the current block to the LoopBB.
+  Builder.CreateBr(LoopBB);
+</pre>
+</div>
+
+<p>This code is similar to what we saw for if/then/else.  Because we will need
+it to create the Phi node, we remember the block that falls through into the
+loop.  Once we have that, we create the actual block that starts the loop and
+create an unconditional branch for the fall-through between the two blocks.</p>
+  
+<div class="doc_code">
+<pre>
+  // Start insertion in LoopBB.
+  Builder.SetInsertPoint(LoopBB);
+  
+  // Start the PHI node with an entry for Start.
+  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
+  Variable-&gt;addIncoming(StartVal, PreheaderBB);
+</pre>
+</div>
+
+<p>Now that the "preheader" for the loop is set up, we switch to emitting code
+for the loop body.  To begin with, we move the insertion point and create the
+PHI node for the loop induction variable.  Since we already know the incoming
+value for the starting value, we add it to the Phi node.  Note that the Phi will
+eventually get a second value for the backedge, but we can't set it up yet
+(because it doesn't exist!).</p>
+
+<div class="doc_code">
+<pre>
+  // Within the loop, the variable is defined equal to the PHI node.  If it
+  // shadows an existing variable, we have to restore it, so save it now.
+  Value *OldVal = NamedValues[VarName];
+  NamedValues[VarName] = Variable;
+  
+  // Emit the body of the loop.  This, like any other expr, can change the
+  // current BB.  Note that we ignore the value computed by the body, but don't
+  // allow an error.
+  if (Body-&gt;Codegen() == 0)
+    return 0;
+</pre>
+</div>
+
+<p>Now the code starts to get more interesting.  Our 'for' loop introduces a new
+variable to the symbol table.  This means that our symbol table can now contain
+either function arguments or loop variables.  To handle this, before we codegen
+the body of the loop, we add the loop variable as the current value for its
+name.  Note that it is possible that there is a variable of the same name in the
+outer scope.  It would be easy to make this an error (emit an error and return
+null if there is already an entry for VarName) but we choose to allow shadowing
+of variables.  In order to handle this correctly, we remember the Value that
+we are potentially shadowing in <tt>OldVal</tt> (which will be null if there is
+no shadowed variable).</p>
+
+<p>Once the loop variable is set into the symbol table, the code recursively
+codegen's the body.  This allows the body to use the loop variable: any
+references to it will naturally find it in the symbol table.</p>
+
+<div class="doc_code">
+<pre>
+  // Emit the step value.
+  Value *StepVal;
+  if (Step) {
+    StepVal = Step-&gt;Codegen();
+    if (StepVal == 0) return 0;
+  } else {
+    // If not specified, use 1.0.
+    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+  }
+  
+  Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
+</pre>
+</div>
+
+<p>Now that the body is emitted, we compute the next value of the iteration
+variable by adding the step value, or 1.0 if it isn't present. '<tt>NextVar</tt>'
+will be the value of the loop variable on the next iteration of the loop.</p>
+
+<div class="doc_code">
+<pre>
+  // Compute the end condition.
+  Value *EndCond = End-&gt;Codegen();
+  if (EndCond == 0) return EndCond;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  EndCond = Builder.CreateFCmpONE(EndCond, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                  "loopcond");
+</pre>
+</div>
+
+<p>Finally, we evaluate the exit value of the loop, to determine whether the
+loop should exit.  This mirrors the condition evaluation for the if/then/else
+statement.</p>
+      
+<div class="doc_code">
+<pre>
+  // Create the "after loop" block and insert it.
+  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
+  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+  
+  // Insert the conditional branch into the end of LoopEndBB.
+  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+  
+  // Any new code will be inserted in AfterBB.
+  Builder.SetInsertPoint(AfterBB);
+</pre>
+</div>
+
+<p>With the code for the body of the loop complete, we just need to finish up
+the control flow for it.  This code remembers the end block (for the phi node), then creates the block for the loop exit ("afterloop").  Based on the value of the
+exit condition, it creates a conditional branch that chooses between executing
+the loop again and exiting the loop.  Any future code is emitted in the
+"afterloop" block, so it sets the insertion position to it.</p>
+  
+<div class="doc_code">
+<pre>
+  // Add a new entry to the PHI node for the backedge.
+  Variable-&gt;addIncoming(NextVar, LoopEndBB);
+  
+  // Restore the unshadowed variable.
+  if (OldVal)
+    NamedValues[VarName] = OldVal;
+  else
+    NamedValues.erase(VarName);
+  
+  // for expr always returns 0.0.
+  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+}
+</pre>
+</div>
+
+<p>The final code handles various cleanups: now that we have the "NextVar"
+value, we can add the incoming value to the loop PHI node.  After that, we
+remove the loop variable from the symbol table, so that it isn't in scope after
+the for loop.  Finally, code generation of the for loop always returns 0.0, so
+that is what we return from <tt>ForExprAST::Codegen</tt>.</p>
+
+<p>With this, we conclude the "adding control flow to Kaleidoscope" chapter of
+the tutorial.  In this chapter we added two control flow constructs, and used them to motivate a couple of aspects of the LLVM IR that are important for front-end implementors
+to know.  In the next chapter of our saga, we will get a bit crazier and add
+<a href="LangImpl6.html">user-defined operators</a> to our poor innocent 
+language.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Here is the complete code listing for our running example, enhanced with the
+if/then/else and for expressions..  To build this example, use:
+</p>
+
+<div class="doc_code">
+<pre>
+   # Compile
+   g++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
+   # Run
+   ./toy
+</pre>
+</div>
+
+<p>Here is the code:</p>
+
+<div class="doc_code">
+<pre>
+#include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/IRBuilder.h"
+#include &lt;cstdio&gt;
+#include &lt;string&gt;
+#include &lt;map&gt;
+#include &lt;vector&gt;
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5,
+  
+  // control
+  tok_if = -6, tok_then = -7, tok_else = -8,
+  tok_for = -9, tok_in = -10
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    if (IdentifierStr == "if") return tok_if;
+    if (IdentifierStr == "then") return tok_then;
+    if (IdentifierStr == "else") return tok_else;
+    if (IdentifierStr == "for") return tok_for;
+    if (IdentifierStr == "in") return tok_in;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  virtual Value *Codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+  virtual Value *Codegen();
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &amp;name) : Name(name) {}
+  virtual Value *Codegen();
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+  virtual Value *Codegen();
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector&lt;ExprAST*&gt; Args;
+public:
+  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
+    : Callee(callee), Args(args) {}
+  virtual Value *Codegen();
+};
+
+/// IfExprAST - Expression class for if/then/else.
+class IfExprAST : public ExprAST {
+  ExprAST *Cond, *Then, *Else;
+public:
+  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+  : Cond(cond), Then(then), Else(_else) {}
+  virtual Value *Codegen();
+};
+
+/// ForExprAST - Expression class for for/in.
+class ForExprAST : public ExprAST {
+  std::string VarName;
+  ExprAST *Start, *End, *Step, *Body;
+public:
+  ForExprAST(const std::string &amp;varname, ExprAST *start, ExprAST *end,
+             ExprAST *step, ExprAST *body)
+    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+  virtual Value *Codegen();
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector&lt;std::string&gt; Args;
+public:
+  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args)
+    : Name(name), Args(args) {}
+  
+  Function *Codegen();
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+  Function *Codegen();
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map&lt;char, int&gt; BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec &lt;= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector&lt;ExprAST*&gt; Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// ifexpr ::= 'if' expression 'then' expression 'else' expression
+static ExprAST *ParseIfExpr() {
+  getNextToken();  // eat the if.
+  
+  // condition.
+  ExprAST *Cond = ParseExpression();
+  if (!Cond) return 0;
+  
+  if (CurTok != tok_then)
+    return Error("expected then");
+  getNextToken();  // eat the then
+  
+  ExprAST *Then = ParseExpression();
+  if (Then == 0) return 0;
+  
+  if (CurTok != tok_else)
+    return Error("expected else");
+  
+  getNextToken();
+  
+  ExprAST *Else = ParseExpression();
+  if (!Else) return 0;
+  
+  return new IfExprAST(Cond, Then, Else);
+}
+
+/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+static ExprAST *ParseForExpr() {
+  getNextToken();  // eat the for.
+
+  if (CurTok != tok_identifier)
+    return Error("expected identifier after for");
+  
+  std::string IdName = IdentifierStr;
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '=')
+    return Error("expected '=' after for");
+  getNextToken();  // eat '='.
+  
+  
+  ExprAST *Start = ParseExpression();
+  if (Start == 0) return 0;
+  if (CurTok != ',')
+    return Error("expected ',' after for start value");
+  getNextToken();
+  
+  ExprAST *End = ParseExpression();
+  if (End == 0) return 0;
+  
+  // The step value is optional.
+  ExprAST *Step = 0;
+  if (CurTok == ',') {
+    getNextToken();
+    Step = ParseExpression();
+    if (Step == 0) return 0;
+  }
+  
+  if (CurTok != tok_in)
+    return Error("expected 'in' after for");
+  getNextToken();  // eat 'in'.
+  
+  ExprAST *Body = ParseExpression();
+  if (Body == 0) return 0;
+
+  return new ForExprAST(IdName, Start, End, Step, Body);
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+///   ::= ifexpr
+///   ::= forexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  case tok_if:         return ParseIfExpr();
+  case tok_for:        return ParseForExpr();
+  }
+}
+
+/// binoprhs
+///   ::= ('+' primary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec &lt; ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the primary expression after the binary operator.
+    ExprAST *RHS = ParsePrimary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec &lt; NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= primary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParsePrimary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+static PrototypeAST *ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return ErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector&lt;std::string&gt; ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  return new PrototypeAST(FnName, ArgNames);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static Module *TheModule;
+static IRBuilder&lt;&gt; Builder(getGlobalContext());
+static std::map&lt;std::string, Value*&gt; NamedValues;
+static FunctionPassManager *TheFPM;
+
+Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+Value *NumberExprAST::Codegen() {
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
+}
+
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  return V ? V : ErrorV("Unknown variable name");
+}
+
+Value *BinaryExprAST::Codegen() {
+  Value *L = LHS-&gt;Codegen();
+  Value *R = RHS-&gt;Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateFAdd(L, R, "addtmp");
+  case '-': return Builder.CreateFSub(L, R, "subtmp");
+  case '*': return Builder.CreateFMul(L, R, "multmp");
+  case '&lt;':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  default: return ErrorV("invalid binary operator");
+  }
+}
+
+Value *CallExprAST::Codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule-&gt;getFunction(Callee);
+  if (CalleeF == 0)
+    return ErrorV("Unknown function referenced");
+  
+  // If argument mismatch error.
+  if (CalleeF-&gt;arg_size() != Args.size())
+    return ErrorV("Incorrect # arguments passed");
+
+  std::vector&lt;Value*&gt; ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]-&gt;Codegen());
+    if (ArgsV.back() == 0) return 0;
+  }
+  
+  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+}
+
+Value *IfExprAST::Codegen() {
+  Value *CondV = Cond-&gt;Codegen();
+  if (CondV == 0) return 0;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  CondV = Builder.CreateFCmpONE(CondV, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                "ifcond");
+  
+  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
+  
+  // Create blocks for the then and else cases.  Insert the 'then' block at the
+  // end of the function.
+  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+  
+  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+  
+  // Emit then value.
+  Builder.SetInsertPoint(ThenBB);
+  
+  Value *ThenV = Then-&gt;Codegen();
+  if (ThenV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+  ThenBB = Builder.GetInsertBlock();
+  
+  // Emit else block.
+  TheFunction-&gt;getBasicBlockList().push_back(ElseBB);
+  Builder.SetInsertPoint(ElseBB);
+  
+  Value *ElseV = Else-&gt;Codegen();
+  if (ElseV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+  ElseBB = Builder.GetInsertBlock();
+  
+  // Emit merge block.
+  TheFunction-&gt;getBasicBlockList().push_back(MergeBB);
+  Builder.SetInsertPoint(MergeBB);
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+                                  "iftmp");
+  
+  PN-&gt;addIncoming(ThenV, ThenBB);
+  PN-&gt;addIncoming(ElseV, ElseBB);
+  return PN;
+}
+
+Value *ForExprAST::Codegen() {
+  // Output this as:
+  //   ...
+  //   start = startexpr
+  //   goto loop
+  // loop: 
+  //   variable = phi [start, loopheader], [nextvariable, loopend]
+  //   ...
+  //   bodyexpr
+  //   ...
+  // loopend:
+  //   step = stepexpr
+  //   nextvariable = variable + step
+  //   endcond = endexpr
+  //   br endcond, loop, endloop
+  // outloop:
+  
+  // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start-&gt;Codegen();
+  if (StartVal == 0) return 0;
+  
+  // Make the new basic block for the loop header, inserting after current
+  // block.
+  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
+  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
+  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+  
+  // Insert an explicit fall through from the current block to the LoopBB.
+  Builder.CreateBr(LoopBB);
+
+  // Start insertion in LoopBB.
+  Builder.SetInsertPoint(LoopBB);
+  
+  // Start the PHI node with an entry for Start.
+  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
+  Variable-&gt;addIncoming(StartVal, PreheaderBB);
+  
+  // Within the loop, the variable is defined equal to the PHI node.  If it
+  // shadows an existing variable, we have to restore it, so save it now.
+  Value *OldVal = NamedValues[VarName];
+  NamedValues[VarName] = Variable;
+  
+  // Emit the body of the loop.  This, like any other expr, can change the
+  // current BB.  Note that we ignore the value computed by the body, but don't
+  // allow an error.
+  if (Body-&gt;Codegen() == 0)
+    return 0;
+  
+  // Emit the step value.
+  Value *StepVal;
+  if (Step) {
+    StepVal = Step-&gt;Codegen();
+    if (StepVal == 0) return 0;
+  } else {
+    // If not specified, use 1.0.
+    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+  }
+  
+  Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
+
+  // Compute the end condition.
+  Value *EndCond = End-&gt;Codegen();
+  if (EndCond == 0) return EndCond;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  EndCond = Builder.CreateFCmpONE(EndCond, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                  "loopcond");
+  
+  // Create the "after loop" block and insert it.
+  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
+  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+  
+  // Insert the conditional branch into the end of LoopEndBB.
+  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+  
+  // Any new code will be inserted in AfterBB.
+  Builder.SetInsertPoint(AfterBB);
+  
+  // Add a new entry to the PHI node for the backedge.
+  Variable-&gt;addIncoming(NextVar, LoopEndBB);
+  
+  // Restore the unshadowed variable.
+  if (OldVal)
+    NamedValues[VarName] = OldVal;
+  else
+    NamedValues.erase(VarName);
+
+  
+  // for expr always returns 0.0.
+  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+}
+
+Function *PrototypeAST::Codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector&lt;const Type*&gt; Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
+  
+  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+  
+  // If F conflicted, there was already something named 'Name'.  If it has a
+  // body, don't allow redefinition or reextern.
+  if (F-&gt;getName() != Name) {
+    // Delete the one we just made and get the existing one.
+    F-&gt;eraseFromParent();
+    F = TheModule-&gt;getFunction(Name);
+    
+    // If F already has a body, reject this.
+    if (!F-&gt;empty()) {
+      ErrorF("redefinition of function");
+      return 0;
+    }
+    
+    // If F took a different number of args, reject.
+    if (F-&gt;arg_size() != Args.size()) {
+      ErrorF("redefinition of function with different # args");
+      return 0;
+    }
+  }
+  
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
+       ++AI, ++Idx) {
+    AI-&gt;setName(Args[Idx]);
+    
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = AI;
+  }
+  
+  return F;
+}
+
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto-&gt;Codegen();
+  if (TheFunction == 0)
+    return 0;
+  
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  if (Value *RetVal = Body-&gt;Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    // Optimize the function.
+    TheFPM-&gt;run(*TheFunction);
+    
+    return TheFunction;
+  }
+  
+  // Error reading body, remove function.
+  TheFunction-&gt;eraseFromParent();
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static ExecutionEngine *TheExecutionEngine;
+
+static void HandleDefinition() {
+  if (FunctionAST *F = ParseDefinition()) {
+    if (Function *LF = F-&gt;Codegen()) {
+      fprintf(stderr, "Read function definition:");
+      LF-&gt;dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (PrototypeAST *P = ParseExtern()) {
+    if (Function *F = P-&gt;Codegen()) {
+      fprintf(stderr, "Read extern: ");
+      F-&gt;dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (FunctionAST *F = ParseTopLevelExpr()) {
+    if (Function *LF = F-&gt;Codegen()) {
+      // JIT the function, returning a function pointer.
+      void *FPtr = TheExecutionEngine-&gt;getPointerToFunction(LF);
+      
+      // Cast it to the right type (takes no arguments, returns a double) so we
+      // can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      fprintf(stderr, "Evaluated to %f\n", FP());
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready&gt; ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" 
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  LLVMContext &amp;Context = getGlobalContext();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['&lt;'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready&gt; ");
+  getNextToken();
+
+  // Make the module, which holds all the code.
+  TheModule = new Module("my cool jit", Context);
+
+  // Create the JIT.  This takes ownership of the module.
+  std::string ErrStr;
+  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+  if (!TheExecutionEngine) {
+    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
+    exit(1);
+  }
+
+  FunctionPassManager OurFPM(TheModule);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &amp;OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  TheFPM = 0;
+
+  // Print out all of the generated code.
+  TheModule-&gt;dump();
+
+  return 0;
+}
+</pre>
+</div>
+
+<a href="LangImpl6.html">Next: Extending the language: user-defined operators</a>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/tutorial/LangImpl6.html b/final/docs/tutorial/LangImpl6.html
new file mode 100644
index 00000000000..77dd0ee2b12
--- /dev/null
+++ b/final/docs/tutorial/LangImpl6.html
@@ -0,0 +1,1817 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Extending the Language: User-defined Operators</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Extending the Language: User-defined Operators</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 6
+  <ol>
+    <li><a href="#intro">Chapter 6 Introduction</a></li>
+    <li><a href="#idea">User-defined Operators: the Idea</a></li>
+    <li><a href="#binary">User-defined Binary Operators</a></li>
+    <li><a href="#unary">User-defined Unary Operators</a></li>
+    <li><a href="#example">Kicking the Tires</a></li>
+    <li><a href="#code">Full Code Listing</a></li>
+  </ol>
+</li>
+<li><a href="LangImpl7.html">Chapter 7</a>: Extending the Language: Mutable
+Variables / SSA Construction</li>
+</ul>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="intro">Chapter 6 Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to Chapter 6 of the "<a href="index.html">Implementing a language
+with LLVM</a>" tutorial.  At this point in our tutorial, we now have a fully
+functional language that is fairly minimal, but also useful.  There
+is still one big problem with it, however. Our language doesn't have many 
+useful operators (like division, logical negation, or even any comparisons 
+besides less-than).</p>
+
+<p>This chapter of the tutorial takes a wild digression into adding user-defined
+operators to the simple and beautiful Kaleidoscope language. This digression now gives 
+us a simple and ugly language in some ways, but also a powerful one at the same time.
+One of the great things about creating your own language is that you get to
+decide what is good or bad.  In this tutorial we'll assume that it is okay to
+use this as a way to show some interesting parsing techniques.</p>
+
+<p>At the end of this tutorial, we'll run through an example Kaleidoscope 
+application that <a href="#example">renders the Mandelbrot set</a>.  This gives 
+an example of what you can build with Kaleidoscope and its feature set.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="idea">User-defined Operators: the Idea</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+The "operator overloading" that we will add to Kaleidoscope is more general than
+languages like C++.  In C++, you are only allowed to redefine existing
+operators: you can't programatically change the grammar, introduce new
+operators, change precedence levels, etc.  In this chapter, we will add this
+capability to Kaleidoscope, which will let the user round out the set of
+operators that are supported.</p>
+
+<p>The point of going into user-defined operators in a tutorial like this is to
+show the power and flexibility of using a hand-written parser.  Thus far, the parser
+we have been implementing uses recursive descent for most parts of the grammar and 
+operator precedence parsing for the expressions.  See <a 
+href="LangImpl2.html">Chapter 2</a> for details.  Without using operator
+precedence parsing, it would be very difficult to allow the programmer to
+introduce new operators into the grammar: the grammar is dynamically extensible
+as the JIT runs.</p>
+
+<p>The two specific features we'll add are programmable unary operators (right
+now, Kaleidoscope has no unary operators at all) as well as binary operators.
+An example of this is:</p>
+
+<div class="doc_code">
+<pre>
+# Logical unary not.
+def unary!(v)
+  if v then
+    0
+  else
+    1;
+
+# Define &gt; with the same precedence as &lt;.
+def binary&gt; 10 (LHS RHS)
+  RHS &lt; LHS;
+
+# Binary "logical or", (note that it does not "short circuit")
+def binary| 5 (LHS RHS)
+  if LHS then
+    1
+  else if RHS then
+    1
+  else
+    0;
+
+# Define = with slightly lower precedence than relationals.
+def binary= 9 (LHS RHS)
+  !(LHS &lt; RHS | LHS &gt; RHS);
+</pre>
+</div>
+
+<p>Many languages aspire to being able to implement their standard runtime
+library in the language itself.  In Kaleidoscope, we can implement significant
+parts of the language in the library!</p>
+
+<p>We will break down implementation of these features into two parts:
+implementing support for user-defined binary operators and adding unary
+operators.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="binary">User-defined Binary Operators</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Adding support for user-defined binary operators is pretty simple with our
+current framework.  We'll first add support for the unary/binary keywords:</p>
+
+<div class="doc_code">
+<pre>
+enum Token {
+  ...
+  <b>// operators
+  tok_binary = -11, tok_unary = -12</b>
+};
+...
+static int gettok() {
+...
+    if (IdentifierStr == "for") return tok_for;
+    if (IdentifierStr == "in") return tok_in;
+    <b>if (IdentifierStr == "binary") return tok_binary;
+    if (IdentifierStr == "unary") return tok_unary;</b>
+    return tok_identifier;
+</pre>
+</div>
+
+<p>This just adds lexer support for the unary and binary keywords, like we
+did in <a href="LangImpl5.html#iflexer">previous chapters</a>.  One nice thing
+about our current AST, is that we represent binary operators with full generalisation
+by using their ASCII code as the opcode.  For our extended operators, we'll use this
+same representation, so we don't need any new AST or parser support.</p>
+
+<p>On the other hand, we have to be able to represent the definitions of these
+new operators, in the "def binary| 5" part of the function definition.  In our
+grammar so far, the "name" for the function definition is parsed as the
+"prototype" production and into the <tt>PrototypeAST</tt> AST node.  To
+represent our new user-defined operators as prototypes, we have to extend
+the  <tt>PrototypeAST</tt> AST node like this:</p>
+
+<div class="doc_code">
+<pre>
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its argument names as well as if it is an operator.
+class PrototypeAST {
+  std::string Name;
+  std::vector&lt;std::string&gt; Args;
+  <b>bool isOperator;
+  unsigned Precedence;  // Precedence if a binary op.</b>
+public:
+  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args,
+               <b>bool isoperator = false, unsigned prec = 0</b>)
+  : Name(name), Args(args), <b>isOperator(isoperator), Precedence(prec)</b> {}
+  
+  <b>bool isUnaryOp() const { return isOperator &amp;&amp; Args.size() == 1; }
+  bool isBinaryOp() const { return isOperator &amp;&amp; Args.size() == 2; }
+  
+  char getOperatorName() const {
+    assert(isUnaryOp() || isBinaryOp());
+    return Name[Name.size()-1];
+  }
+  
+  unsigned getBinaryPrecedence() const { return Precedence; }</b>
+  
+  Function *Codegen();
+};
+</pre>
+</div>
+
+<p>Basically, in addition to knowing a name for the prototype, we now keep track
+of whether it was an operator, and if it was, what precedence level the operator
+is at.  The precedence is only used for binary operators (as you'll see below,
+it just doesn't apply for unary operators).  Now that we have a way to represent
+the prototype for a user-defined operator, we need to parse it:</p>
+
+<div class="doc_code">
+<pre>
+/// prototype
+///   ::= id '(' id* ')'
+<b>///   ::= binary LETTER number? (id, id)</b>
+static PrototypeAST *ParsePrototype() {
+  std::string FnName;
+  
+  <b>unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned BinaryPrecedence = 30;</b>
+  
+  switch (CurTok) {
+  default:
+    return ErrorP("Expected function name in prototype");
+  case tok_identifier:
+    FnName = IdentifierStr;
+    Kind = 0;
+    getNextToken();
+    break;
+  <b>case tok_binary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return ErrorP("Expected binary operator");
+    FnName = "binary";
+    FnName += (char)CurTok;
+    Kind = 2;
+    getNextToken();
+    
+    // Read the precedence if present.
+    if (CurTok == tok_number) {
+      if (NumVal &lt; 1 || NumVal &gt; 100)
+        return ErrorP("Invalid precedecnce: must be 1..100");
+      BinaryPrecedence = (unsigned)NumVal;
+      getNextToken();
+    }
+    break;</b>
+  }
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector&lt;std::string&gt; ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  <b>// Verify right number of names for operator.
+  if (Kind &amp;&amp; ArgNames.size() != Kind)
+    return ErrorP("Invalid number of operands for operator");
+  
+  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);</b>
+}
+</pre>
+</div>
+
+<p>This is all fairly straightforward parsing code, and we have already seen
+a lot of similar code in the past.  One interesting part about the code above is 
+the couple lines that set up <tt>FnName</tt> for binary operators.  This builds names 
+like "binary@" for a newly defined "@" operator.  This then takes advantage of the 
+fact that symbol names in the LLVM symbol table are allowed to have any character in
+them, including embedded nul characters.</p>
+
+<p>The next interesting thing to add, is codegen support for these binary operators.
+Given our current structure, this is a simple addition of a default case for our
+existing binary operator node:</p>
+
+<div class="doc_code">
+<pre>
+Value *BinaryExprAST::Codegen() {
+  Value *L = LHS-&gt;Codegen();
+  Value *R = RHS-&gt;Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateFAdd(L, R, "addtmp");
+  case '-': return Builder.CreateFSub(L, R, "subtmp");
+  case '*': return Builder.CreateFMul(L, R, "multmp");
+  case '&lt;':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  <b>default: break;</b>
+  }
+  
+  <b>// If it wasn't a builtin binary operator, it must be a user defined one. Emit
+  // a call to it.
+  Function *F = TheModule-&gt;getFunction(std::string("binary")+Op);
+  assert(F &amp;&amp; "binary operator not found!");
+  
+  Value *Ops[] = { L, R };
+  return Builder.CreateCall(F, Ops, Ops+2, "binop");</b>
+}
+
+</pre>
+</div>
+
+<p>As you can see above, the new code is actually really simple.  It just does
+a lookup for the appropriate operator in the symbol table and generates a 
+function call to it.  Since user-defined operators are just built as normal
+functions (because the "prototype" boils down to a function with the right
+name) everything falls into place.</p>
+
+<p>The final piece of code we are missing, is a bit of top-level magic:</p>
+
+<div class="doc_code">
+<pre>
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto->Codegen();
+  if (TheFunction == 0)
+    return 0;
+  
+  <b>// If this is an operator, install it.
+  if (Proto-&gt;isBinaryOp())
+    BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();</b>
+  
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  if (Value *RetVal = Body-&gt;Codegen()) {
+    ...
+</pre>
+</div>
+
+<p>Basically, before codegening a function, if it is a user-defined operator, we
+register it in the precedence table.  This allows the binary operator parsing
+logic we already have in place to handle it.  Since we are working on a fully-general operator precedence parser, this is all we need to do to "extend the grammar".</p>
+
+<p>Now we have useful user-defined binary operators.  This builds a lot
+on the previous framework we built for other operators.  Adding unary operators
+is a bit more challenging, because we don't have any framework for it yet - lets
+see what it takes.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="unary">User-defined Unary Operators</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Since we don't currently support unary operators in the Kaleidoscope
+language, we'll need to add everything to support them.  Above, we added simple
+support for the 'unary' keyword to the lexer.  In addition to that, we need an
+AST node:</p>
+
+<div class="doc_code">
+<pre>
+/// UnaryExprAST - Expression class for a unary operator.
+class UnaryExprAST : public ExprAST {
+  char Opcode;
+  ExprAST *Operand;
+public:
+  UnaryExprAST(char opcode, ExprAST *operand) 
+    : Opcode(opcode), Operand(operand) {}
+  virtual Value *Codegen();
+};
+</pre>
+</div>
+
+<p>This AST node is very simple and obvious by now.  It directly mirrors the
+binary operator AST node, except that it only has one child.  With this, we
+need to add the parsing logic.  Parsing a unary operator is pretty simple: we'll
+add a new function to do it:</p>
+
+<div class="doc_code">
+<pre>
+/// unary
+///   ::= primary
+///   ::= '!' unary
+static ExprAST *ParseUnary() {
+  // If the current token is not an operator, it must be a primary expr.
+  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+    return ParsePrimary();
+  
+  // If this is a unary operator, read it.
+  int Opc = CurTok;
+  getNextToken();
+  if (ExprAST *Operand = ParseUnary())
+    return new UnaryExprAST(Opc, Operand);
+  return 0;
+}
+</pre>
+</div>
+
+<p>The grammar we add is pretty straightforward here.  If we see a unary
+operator when parsing a primary operator, we eat the operator as a prefix and
+parse the remaining piece as another unary operator.  This allows us to handle
+multiple unary operators (e.g. "!!x").  Note that unary operators can't have 
+ambiguous parses like binary operators can, so there is no need for precedence
+information.</p>
+
+<p>The problem with this function, is that we need to call ParseUnary from somewhere.
+To do this, we change previous callers of ParsePrimary to call ParseUnary
+instead:</p>
+
+<div class="doc_code">
+<pre>
+/// binoprhs
+///   ::= ('+' unary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  ...
+    <b>// Parse the unary expression after the binary operator.
+    ExprAST *RHS = ParseUnary();
+    if (!RHS) return 0;</b>
+  ...
+}
+/// expression
+///   ::= unary binoprhs
+///
+static ExprAST *ParseExpression() {
+  <b>ExprAST *LHS = ParseUnary();</b>
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+</pre>
+</div>
+
+<p>With these two simple changes, we are now able to parse unary operators and build the
+AST for them.  Next up, we need to add parser support for prototypes, to parse
+the unary operator prototype.  We extend the binary operator code above 
+with:</p>
+
+<div class="doc_code">
+<pre>
+/// prototype
+///   ::= id '(' id* ')'
+///   ::= binary LETTER number? (id, id)
+<b>///   ::= unary LETTER (id)</b>
+static PrototypeAST *ParsePrototype() {
+  std::string FnName;
+  
+  unsigned Kind = 0;  // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned BinaryPrecedence = 30;
+  
+  switch (CurTok) {
+  default:
+    return ErrorP("Expected function name in prototype");
+  case tok_identifier:
+    FnName = IdentifierStr;
+    Kind = 0;
+    getNextToken();
+    break;
+  <b>case tok_unary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return ErrorP("Expected unary operator");
+    FnName = "unary";
+    FnName += (char)CurTok;
+    Kind = 1;
+    getNextToken();
+    break;</b>
+  case tok_binary:
+    ...
+</pre>
+</div>
+
+<p>As with binary operators, we name unary operators with a name that includes
+the operator character.  This assists us at code generation time.  Speaking of,
+the final piece we need to add is codegen support for unary operators.  It looks
+like this:</p>
+
+<div class="doc_code">
+<pre>
+Value *UnaryExprAST::Codegen() {
+  Value *OperandV = Operand->Codegen();
+  if (OperandV == 0) return 0;
+  
+  Function *F = TheModule->getFunction(std::string("unary")+Opcode);
+  if (F == 0)
+    return ErrorV("Unknown unary operator");
+  
+  return Builder.CreateCall(F, OperandV, "unop");
+}
+</pre>
+</div>
+
+<p>This code is similar to, but simpler than, the code for binary operators.  It
+is simpler primarily because it doesn't need to handle any predefined operators.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="example">Kicking the Tires</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>It is somewhat hard to believe, but with a few simple extensions we've
+covered in the last chapters, we have grown a real-ish language.  With this, we 
+can do a lot of interesting things, including I/O, math, and a bunch of other
+things.  For example, we can now add a nice sequencing operator (printd is
+defined to print out the specified value and a newline):</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>extern printd(x);</b>
+Read extern: declare double @printd(double)
+ready&gt; <b>def binary : 1 (x y) 0;  # Low-precedence operator that ignores operands.</b>
+..
+ready&gt; <b>printd(123) : printd(456) : printd(789);</b>
+123.000000
+456.000000
+789.000000
+Evaluated to 0.000000
+</pre>
+</div>
+
+<p>We can also define a bunch of other "primitive" operations, such as:</p>
+
+<div class="doc_code">
+<pre>
+# Logical unary not.
+def unary!(v)
+  if v then
+    0
+  else
+    1;
+    
+# Unary negate.
+def unary-(v)
+  0-v;
+
+# Define &gt; with the same precedence as &lt;.
+def binary&gt; 10 (LHS RHS)
+  RHS &lt; LHS;
+
+# Binary logical or, which does not short circuit. 
+def binary| 5 (LHS RHS)
+  if LHS then
+    1
+  else if RHS then
+    1
+  else
+    0;
+
+# Binary logical and, which does not short circuit. 
+def binary&amp; 6 (LHS RHS)
+  if !LHS then
+    0
+  else
+    !!RHS;
+
+# Define = with slightly lower precedence than relationals.
+def binary = 9 (LHS RHS)
+  !(LHS &lt; RHS | LHS &gt; RHS);
+
+</pre>
+</div>
+
+
+<p>Given the previous if/then/else support, we can also define interesting
+functions for I/O.  For example, the following prints out a character whose
+"density" reflects the value passed in: the lower the value, the denser the
+character:</p>
+
+<div class="doc_code">
+<pre>
+ready&gt;
+<b>
+extern putchard(char)
+def printdensity(d)
+  if d &gt; 8 then
+    putchard(32)  # ' '
+  else if d &gt; 4 then
+    putchard(46)  # '.'
+  else if d &gt; 2 then
+    putchard(43)  # '+'
+  else
+    putchard(42); # '*'</b>
+...
+ready&gt; <b>printdensity(1): printdensity(2): printdensity(3) : 
+          printdensity(4): printdensity(5): printdensity(9): putchard(10);</b>
+*++.. 
+Evaluated to 0.000000
+</pre>
+</div>
+
+<p>Based on these simple primitive operations, we can start to define more
+interesting things.  For example, here's a little function that solves for the
+number of iterations it takes a function in the complex plane to
+converge:</p>
+
+<div class="doc_code">
+<pre>
+# determine whether the specific location diverges.
+# Solve for z = z^2 + c in the complex plane.
+def mandleconverger(real imag iters creal cimag)
+  if iters &gt; 255 | (real*real + imag*imag &gt; 4) then
+    iters
+  else
+    mandleconverger(real*real - imag*imag + creal,
+                    2*real*imag + cimag,
+                    iters+1, creal, cimag);
+
+# return the number of iterations required for the iteration to escape
+def mandleconverge(real imag)
+  mandleconverger(real, imag, 0, real, imag);
+</pre>
+</div>
+
+<p>This "z = z<sup>2</sup> + c" function is a beautiful little creature that is the basis
+for computation of the <a 
+href="http://en.wikipedia.org/wiki/Mandelbrot_set">Mandelbrot Set</a>.  Our
+<tt>mandelconverge</tt> function returns the number of iterations that it takes
+for a complex orbit to escape, saturating to 255.  This is not a very useful
+function by itself, but if you plot its value over a two-dimensional plane,
+you can see the Mandelbrot set.  Given that we are limited to using putchard
+here, our amazing graphical output is limited, but we can whip together
+something using the density plotter above:</p>
+
+<div class="doc_code">
+<pre>
+# compute and plot the mandlebrot set with the specified 2 dimensional range
+# info.
+def mandelhelp(xmin xmax xstep   ymin ymax ystep)
+  for y = ymin, y &lt; ymax, ystep in (
+    (for x = xmin, x &lt; xmax, xstep in
+       printdensity(mandleconverge(x,y)))
+    : putchard(10)
+  )
+ 
+# mandel - This is a convenient helper function for ploting the mandelbrot set
+# from the specified position with the specified Magnification.
+def mandel(realstart imagstart realmag imagmag) 
+  mandelhelp(realstart, realstart+realmag*78, realmag,
+             imagstart, imagstart+imagmag*40, imagmag);
+</pre>
+</div>
+
+<p>Given this, we can try plotting out the mandlebrot set!  Lets try it out:</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>mandel(-2.3, -1.3, 0.05, 0.07);</b>
+*******************************+++++++++++*************************************
+*************************+++++++++++++++++++++++*******************************
+**********************+++++++++++++++++++++++++++++****************************
+*******************+++++++++++++++++++++.. ...++++++++*************************
+*****************++++++++++++++++++++++.... ...+++++++++***********************
+***************+++++++++++++++++++++++.....   ...+++++++++*********************
+**************+++++++++++++++++++++++....     ....+++++++++********************
+*************++++++++++++++++++++++......      .....++++++++*******************
+************+++++++++++++++++++++.......       .......+++++++******************
+***********+++++++++++++++++++....                ... .+++++++*****************
+**********+++++++++++++++++.......                     .+++++++****************
+*********++++++++++++++...........                    ...+++++++***************
+********++++++++++++............                      ...++++++++**************
+********++++++++++... ..........                        .++++++++**************
+*******+++++++++.....                                   .+++++++++*************
+*******++++++++......                                  ..+++++++++*************
+*******++++++.......                                   ..+++++++++*************
+*******+++++......                                     ..+++++++++*************
+*******.... ....                                      ...+++++++++*************
+*******.... .                                         ...+++++++++*************
+*******+++++......                                    ...+++++++++*************
+*******++++++.......                                   ..+++++++++*************
+*******++++++++......                                   .+++++++++*************
+*******+++++++++.....                                  ..+++++++++*************
+********++++++++++... ..........                        .++++++++**************
+********++++++++++++............                      ...++++++++**************
+*********++++++++++++++..........                     ...+++++++***************
+**********++++++++++++++++........                     .+++++++****************
+**********++++++++++++++++++++....                ... ..+++++++****************
+***********++++++++++++++++++++++.......       .......++++++++*****************
+************+++++++++++++++++++++++......      ......++++++++******************
+**************+++++++++++++++++++++++....      ....++++++++********************
+***************+++++++++++++++++++++++.....   ...+++++++++*********************
+*****************++++++++++++++++++++++....  ...++++++++***********************
+*******************+++++++++++++++++++++......++++++++*************************
+*********************++++++++++++++++++++++.++++++++***************************
+*************************+++++++++++++++++++++++*******************************
+******************************+++++++++++++************************************
+*******************************************************************************
+*******************************************************************************
+*******************************************************************************
+Evaluated to 0.000000
+ready&gt; <b>mandel(-2, -1, 0.02, 0.04);</b>
+**************************+++++++++++++++++++++++++++++++++++++++++++++++++++++
+***********************++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+*********************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.
+*******************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++...
+*****************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.....
+***************++++++++++++++++++++++++++++++++++++++++++++++++++++++++........
+**************++++++++++++++++++++++++++++++++++++++++++++++++++++++...........
+************+++++++++++++++++++++++++++++++++++++++++++++++++++++..............
+***********++++++++++++++++++++++++++++++++++++++++++++++++++........        . 
+**********++++++++++++++++++++++++++++++++++++++++++++++.............          
+********+++++++++++++++++++++++++++++++++++++++++++..................          
+*******+++++++++++++++++++++++++++++++++++++++.......................          
+******+++++++++++++++++++++++++++++++++++...........................           
+*****++++++++++++++++++++++++++++++++............................              
+*****++++++++++++++++++++++++++++...............................               
+****++++++++++++++++++++++++++......   .........................               
+***++++++++++++++++++++++++.........     ......    ...........                 
+***++++++++++++++++++++++............                                          
+**+++++++++++++++++++++..............                                          
+**+++++++++++++++++++................                                          
+*++++++++++++++++++.................                                           
+*++++++++++++++++............ ...                                              
+*++++++++++++++..............                                                  
+*+++....++++................                                                   
+*..........  ...........                                                       
+*                                                                              
+*..........  ...........                                                       
+*+++....++++................                                                   
+*++++++++++++++..............                                                  
+*++++++++++++++++............ ...                                              
+*++++++++++++++++++.................                                           
+**+++++++++++++++++++................                                          
+**+++++++++++++++++++++..............                                          
+***++++++++++++++++++++++............                                          
+***++++++++++++++++++++++++.........     ......    ...........                 
+****++++++++++++++++++++++++++......   .........................               
+*****++++++++++++++++++++++++++++...............................               
+*****++++++++++++++++++++++++++++++++............................              
+******+++++++++++++++++++++++++++++++++++...........................           
+*******+++++++++++++++++++++++++++++++++++++++.......................          
+********+++++++++++++++++++++++++++++++++++++++++++..................          
+Evaluated to 0.000000
+ready&gt; <b>mandel(-0.9, -1.4, 0.02, 0.03);</b>
+*******************************************************************************
+*******************************************************************************
+*******************************************************************************
+**********+++++++++++++++++++++************************************************
+*+++++++++++++++++++++++++++++++++++++++***************************************
++++++++++++++++++++++++++++++++++++++++++++++**********************************
+++++++++++++++++++++++++++++++++++++++++++++++++++*****************************
+++++++++++++++++++++++++++++++++++++++++++++++++++++++*************************
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++**********************
++++++++++++++++++++++++++++++++++.........++++++++++++++++++*******************
++++++++++++++++++++++++++++++++....   ......+++++++++++++++++++****************
++++++++++++++++++++++++++++++.......  ........+++++++++++++++++++**************
+++++++++++++++++++++++++++++........   ........++++++++++++++++++++************
++++++++++++++++++++++++++++.........     ..  ...+++++++++++++++++++++**********
+++++++++++++++++++++++++++...........        ....++++++++++++++++++++++********
+++++++++++++++++++++++++.............       .......++++++++++++++++++++++******
++++++++++++++++++++++++.............        ........+++++++++++++++++++++++****
+++++++++++++++++++++++...........           ..........++++++++++++++++++++++***
+++++++++++++++++++++...........                .........++++++++++++++++++++++*
+++++++++++++++++++............                  ...........++++++++++++++++++++
+++++++++++++++++...............                 .............++++++++++++++++++
+++++++++++++++.................                 ...............++++++++++++++++
+++++++++++++..................                  .................++++++++++++++
++++++++++..................                      .................+++++++++++++
+++++++........        .                               .........  ..++++++++++++
+++............                                         ......    ....++++++++++
+..............                                                    ...++++++++++
+..............                                                    ....+++++++++
+..............                                                    .....++++++++
+.............                                                    ......++++++++
+...........                                                     .......++++++++
+.........                                                       ........+++++++
+.........                                                       ........+++++++
+.........                                                           ....+++++++
+........                                                             ...+++++++
+.......                                                              ...+++++++
+                                                                    ....+++++++
+                                                                   .....+++++++
+                                                                    ....+++++++
+                                                                    ....+++++++
+                                                                    ....+++++++
+Evaluated to 0.000000
+ready&gt; <b>^D</b>
+</pre>
+</div>
+
+<p>At this point, you may be starting to realize that Kaleidoscope is a real
+and powerful language.  It may not be self-similar :), but it can be used to
+plot things that are!</p>
+
+<p>With this, we conclude the "adding user-defined operators" chapter of the
+tutorial.  We have successfully augmented our language, adding the ability to extend the
+language in the library, and we have shown how this can be used to build a simple but
+interesting end-user application in Kaleidoscope.  At this point, Kaleidoscope
+can build a variety of applications that are functional and can call functions
+with side-effects, but it can't actually define and mutate a variable itself.
+</p>
+
+<p>Strikingly, variable mutation is an important feature of some
+languages, and it is not at all obvious how to <a href="LangImpl7.html">add
+support for mutable variables</a> without having to add an "SSA construction"
+phase to your front-end.  In the next chapter, we will describe how you can
+add variable mutation without building SSA in your front-end.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Here is the complete code listing for our running example, enhanced with the
+if/then/else and for expressions..  To build this example, use:
+</p>
+
+<div class="doc_code">
+<pre>
+   # Compile
+   g++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
+   # Run
+   ./toy
+</pre>
+</div>
+
+<p>Here is the code:</p>
+
+<div class="doc_code">
+<pre>
+#include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/IRBuilder.h"
+#include &lt;cstdio&gt;
+#include &lt;string&gt;
+#include &lt;map&gt;
+#include &lt;vector&gt;
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5,
+  
+  // control
+  tok_if = -6, tok_then = -7, tok_else = -8,
+  tok_for = -9, tok_in = -10,
+  
+  // operators
+  tok_binary = -11, tok_unary = -12
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    if (IdentifierStr == "if") return tok_if;
+    if (IdentifierStr == "then") return tok_then;
+    if (IdentifierStr == "else") return tok_else;
+    if (IdentifierStr == "for") return tok_for;
+    if (IdentifierStr == "in") return tok_in;
+    if (IdentifierStr == "binary") return tok_binary;
+    if (IdentifierStr == "unary") return tok_unary;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  virtual Value *Codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+  virtual Value *Codegen();
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &amp;name) : Name(name) {}
+  virtual Value *Codegen();
+};
+
+/// UnaryExprAST - Expression class for a unary operator.
+class UnaryExprAST : public ExprAST {
+  char Opcode;
+  ExprAST *Operand;
+public:
+  UnaryExprAST(char opcode, ExprAST *operand) 
+    : Opcode(opcode), Operand(operand) {}
+  virtual Value *Codegen();
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+  virtual Value *Codegen();
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector&lt;ExprAST*&gt; Args;
+public:
+  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
+    : Callee(callee), Args(args) {}
+  virtual Value *Codegen();
+};
+
+/// IfExprAST - Expression class for if/then/else.
+class IfExprAST : public ExprAST {
+  ExprAST *Cond, *Then, *Else;
+public:
+  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+  : Cond(cond), Then(then), Else(_else) {}
+  virtual Value *Codegen();
+};
+
+/// ForExprAST - Expression class for for/in.
+class ForExprAST : public ExprAST {
+  std::string VarName;
+  ExprAST *Start, *End, *Step, *Body;
+public:
+  ForExprAST(const std::string &amp;varname, ExprAST *start, ExprAST *end,
+             ExprAST *step, ExprAST *body)
+    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+  virtual Value *Codegen();
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes), as well as if it is an operator.
+class PrototypeAST {
+  std::string Name;
+  std::vector&lt;std::string&gt; Args;
+  bool isOperator;
+  unsigned Precedence;  // Precedence if a binary op.
+public:
+  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args,
+               bool isoperator = false, unsigned prec = 0)
+  : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
+  
+  bool isUnaryOp() const { return isOperator &amp;&amp; Args.size() == 1; }
+  bool isBinaryOp() const { return isOperator &amp;&amp; Args.size() == 2; }
+  
+  char getOperatorName() const {
+    assert(isUnaryOp() || isBinaryOp());
+    return Name[Name.size()-1];
+  }
+  
+  unsigned getBinaryPrecedence() const { return Precedence; }
+  
+  Function *Codegen();
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+  Function *Codegen();
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map&lt;char, int&gt; BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec &lt;= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector&lt;ExprAST*&gt; Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// ifexpr ::= 'if' expression 'then' expression 'else' expression
+static ExprAST *ParseIfExpr() {
+  getNextToken();  // eat the if.
+  
+  // condition.
+  ExprAST *Cond = ParseExpression();
+  if (!Cond) return 0;
+  
+  if (CurTok != tok_then)
+    return Error("expected then");
+  getNextToken();  // eat the then
+  
+  ExprAST *Then = ParseExpression();
+  if (Then == 0) return 0;
+  
+  if (CurTok != tok_else)
+    return Error("expected else");
+  
+  getNextToken();
+  
+  ExprAST *Else = ParseExpression();
+  if (!Else) return 0;
+  
+  return new IfExprAST(Cond, Then, Else);
+}
+
+/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+static ExprAST *ParseForExpr() {
+  getNextToken();  // eat the for.
+
+  if (CurTok != tok_identifier)
+    return Error("expected identifier after for");
+  
+  std::string IdName = IdentifierStr;
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '=')
+    return Error("expected '=' after for");
+  getNextToken();  // eat '='.
+  
+  
+  ExprAST *Start = ParseExpression();
+  if (Start == 0) return 0;
+  if (CurTok != ',')
+    return Error("expected ',' after for start value");
+  getNextToken();
+  
+  ExprAST *End = ParseExpression();
+  if (End == 0) return 0;
+  
+  // The step value is optional.
+  ExprAST *Step = 0;
+  if (CurTok == ',') {
+    getNextToken();
+    Step = ParseExpression();
+    if (Step == 0) return 0;
+  }
+  
+  if (CurTok != tok_in)
+    return Error("expected 'in' after for");
+  getNextToken();  // eat 'in'.
+  
+  ExprAST *Body = ParseExpression();
+  if (Body == 0) return 0;
+
+  return new ForExprAST(IdName, Start, End, Step, Body);
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+///   ::= ifexpr
+///   ::= forexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  case tok_if:         return ParseIfExpr();
+  case tok_for:        return ParseForExpr();
+  }
+}
+
+/// unary
+///   ::= primary
+///   ::= '!' unary
+static ExprAST *ParseUnary() {
+  // If the current token is not an operator, it must be a primary expr.
+  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+    return ParsePrimary();
+  
+  // If this is a unary operator, read it.
+  int Opc = CurTok;
+  getNextToken();
+  if (ExprAST *Operand = ParseUnary())
+    return new UnaryExprAST(Opc, Operand);
+  return 0;
+}
+
+/// binoprhs
+///   ::= ('+' unary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec &lt; ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the unary expression after the binary operator.
+    ExprAST *RHS = ParseUnary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec &lt; NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= unary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParseUnary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+///   ::= binary LETTER number? (id, id)
+///   ::= unary LETTER (id)
+static PrototypeAST *ParsePrototype() {
+  std::string FnName;
+  
+  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned BinaryPrecedence = 30;
+  
+  switch (CurTok) {
+  default:
+    return ErrorP("Expected function name in prototype");
+  case tok_identifier:
+    FnName = IdentifierStr;
+    Kind = 0;
+    getNextToken();
+    break;
+  case tok_unary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return ErrorP("Expected unary operator");
+    FnName = "unary";
+    FnName += (char)CurTok;
+    Kind = 1;
+    getNextToken();
+    break;
+  case tok_binary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return ErrorP("Expected binary operator");
+    FnName = "binary";
+    FnName += (char)CurTok;
+    Kind = 2;
+    getNextToken();
+    
+    // Read the precedence if present.
+    if (CurTok == tok_number) {
+      if (NumVal &lt; 1 || NumVal &gt; 100)
+        return ErrorP("Invalid precedecnce: must be 1..100");
+      BinaryPrecedence = (unsigned)NumVal;
+      getNextToken();
+    }
+    break;
+  }
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector&lt;std::string&gt; ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  // Verify right number of names for operator.
+  if (Kind &amp;&amp; ArgNames.size() != Kind)
+    return ErrorP("Invalid number of operands for operator");
+  
+  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static Module *TheModule;
+static IRBuilder&lt;&gt; Builder(getGlobalContext());
+static std::map&lt;std::string, Value*&gt; NamedValues;
+static FunctionPassManager *TheFPM;
+
+Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+Value *NumberExprAST::Codegen() {
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
+}
+
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  return V ? V : ErrorV("Unknown variable name");
+}
+
+Value *UnaryExprAST::Codegen() {
+  Value *OperandV = Operand-&gt;Codegen();
+  if (OperandV == 0) return 0;
+  
+  Function *F = TheModule-&gt;getFunction(std::string("unary")+Opcode);
+  if (F == 0)
+    return ErrorV("Unknown unary operator");
+  
+  return Builder.CreateCall(F, OperandV, "unop");
+}
+
+Value *BinaryExprAST::Codegen() {
+  Value *L = LHS-&gt;Codegen();
+  Value *R = RHS-&gt;Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateFAdd(L, R, "addtmp");
+  case '-': return Builder.CreateFSub(L, R, "subtmp");
+  case '*': return Builder.CreateFMul(L, R, "multmp");
+  case '&lt;':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  default: break;
+  }
+  
+  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+  // a call to it.
+  Function *F = TheModule-&gt;getFunction(std::string("binary")+Op);
+  assert(F &amp;&amp; "binary operator not found!");
+  
+  Value *Ops[] = { L, R };
+  return Builder.CreateCall(F, Ops, Ops+2, "binop");
+}
+
+Value *CallExprAST::Codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule-&gt;getFunction(Callee);
+  if (CalleeF == 0)
+    return ErrorV("Unknown function referenced");
+  
+  // If argument mismatch error.
+  if (CalleeF-&gt;arg_size() != Args.size())
+    return ErrorV("Incorrect # arguments passed");
+
+  std::vector&lt;Value*&gt; ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]-&gt;Codegen());
+    if (ArgsV.back() == 0) return 0;
+  }
+  
+  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+}
+
+Value *IfExprAST::Codegen() {
+  Value *CondV = Cond-&gt;Codegen();
+  if (CondV == 0) return 0;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  CondV = Builder.CreateFCmpONE(CondV, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                "ifcond");
+  
+  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
+  
+  // Create blocks for the then and else cases.  Insert the 'then' block at the
+  // end of the function.
+  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+  
+  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+  
+  // Emit then value.
+  Builder.SetInsertPoint(ThenBB);
+  
+  Value *ThenV = Then-&gt;Codegen();
+  if (ThenV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+  ThenBB = Builder.GetInsertBlock();
+  
+  // Emit else block.
+  TheFunction-&gt;getBasicBlockList().push_back(ElseBB);
+  Builder.SetInsertPoint(ElseBB);
+  
+  Value *ElseV = Else-&gt;Codegen();
+  if (ElseV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+  ElseBB = Builder.GetInsertBlock();
+  
+  // Emit merge block.
+  TheFunction-&gt;getBasicBlockList().push_back(MergeBB);
+  Builder.SetInsertPoint(MergeBB);
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+                                  "iftmp");
+  
+  PN-&gt;addIncoming(ThenV, ThenBB);
+  PN-&gt;addIncoming(ElseV, ElseBB);
+  return PN;
+}
+
+Value *ForExprAST::Codegen() {
+  // Output this as:
+  //   ...
+  //   start = startexpr
+  //   goto loop
+  // loop: 
+  //   variable = phi [start, loopheader], [nextvariable, loopend]
+  //   ...
+  //   bodyexpr
+  //   ...
+  // loopend:
+  //   step = stepexpr
+  //   nextvariable = variable + step
+  //   endcond = endexpr
+  //   br endcond, loop, endloop
+  // outloop:
+  
+  // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start-&gt;Codegen();
+  if (StartVal == 0) return 0;
+  
+  // Make the new basic block for the loop header, inserting after current
+  // block.
+  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
+  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
+  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+  
+  // Insert an explicit fall through from the current block to the LoopBB.
+  Builder.CreateBr(LoopBB);
+
+  // Start insertion in LoopBB.
+  Builder.SetInsertPoint(LoopBB);
+  
+  // Start the PHI node with an entry for Start.
+  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
+  Variable-&gt;addIncoming(StartVal, PreheaderBB);
+  
+  // Within the loop, the variable is defined equal to the PHI node.  If it
+  // shadows an existing variable, we have to restore it, so save it now.
+  Value *OldVal = NamedValues[VarName];
+  NamedValues[VarName] = Variable;
+  
+  // Emit the body of the loop.  This, like any other expr, can change the
+  // current BB.  Note that we ignore the value computed by the body, but don't
+  // allow an error.
+  if (Body-&gt;Codegen() == 0)
+    return 0;
+  
+  // Emit the step value.
+  Value *StepVal;
+  if (Step) {
+    StepVal = Step-&gt;Codegen();
+    if (StepVal == 0) return 0;
+  } else {
+    // If not specified, use 1.0.
+    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+  }
+  
+  Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
+
+  // Compute the end condition.
+  Value *EndCond = End-&gt;Codegen();
+  if (EndCond == 0) return EndCond;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  EndCond = Builder.CreateFCmpONE(EndCond, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                  "loopcond");
+  
+  // Create the "after loop" block and insert it.
+  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
+  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+  
+  // Insert the conditional branch into the end of LoopEndBB.
+  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+  
+  // Any new code will be inserted in AfterBB.
+  Builder.SetInsertPoint(AfterBB);
+  
+  // Add a new entry to the PHI node for the backedge.
+  Variable-&gt;addIncoming(NextVar, LoopEndBB);
+  
+  // Restore the unshadowed variable.
+  if (OldVal)
+    NamedValues[VarName] = OldVal;
+  else
+    NamedValues.erase(VarName);
+
+  
+  // for expr always returns 0.0.
+  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+}
+
+Function *PrototypeAST::Codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector&lt;const Type*&gt; Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
+  
+  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+  
+  // If F conflicted, there was already something named 'Name'.  If it has a
+  // body, don't allow redefinition or reextern.
+  if (F-&gt;getName() != Name) {
+    // Delete the one we just made and get the existing one.
+    F-&gt;eraseFromParent();
+    F = TheModule-&gt;getFunction(Name);
+    
+    // If F already has a body, reject this.
+    if (!F-&gt;empty()) {
+      ErrorF("redefinition of function");
+      return 0;
+    }
+    
+    // If F took a different number of args, reject.
+    if (F-&gt;arg_size() != Args.size()) {
+      ErrorF("redefinition of function with different # args");
+      return 0;
+    }
+  }
+  
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
+       ++AI, ++Idx) {
+    AI-&gt;setName(Args[Idx]);
+    
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = AI;
+  }
+  
+  return F;
+}
+
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto-&gt;Codegen();
+  if (TheFunction == 0)
+    return 0;
+  
+  // If this is an operator, install it.
+  if (Proto-&gt;isBinaryOp())
+    BinopPrecedence[Proto-&gt;getOperatorName()] = Proto-&gt;getBinaryPrecedence();
+  
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  if (Value *RetVal = Body-&gt;Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    // Optimize the function.
+    TheFPM-&gt;run(*TheFunction);
+    
+    return TheFunction;
+  }
+  
+  // Error reading body, remove function.
+  TheFunction-&gt;eraseFromParent();
+
+  if (Proto-&gt;isBinaryOp())
+    BinopPrecedence.erase(Proto-&gt;getOperatorName());
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static ExecutionEngine *TheExecutionEngine;
+
+static void HandleDefinition() {
+  if (FunctionAST *F = ParseDefinition()) {
+    if (Function *LF = F-&gt;Codegen()) {
+      fprintf(stderr, "Read function definition:");
+      LF-&gt;dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (PrototypeAST *P = ParseExtern()) {
+    if (Function *F = P-&gt;Codegen()) {
+      fprintf(stderr, "Read extern: ");
+      F-&gt;dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (FunctionAST *F = ParseTopLevelExpr()) {
+    if (Function *LF = F-&gt;Codegen()) {
+      // JIT the function, returning a function pointer.
+      void *FPtr = TheExecutionEngine-&gt;getPointerToFunction(LF);
+      
+      // Cast it to the right type (takes no arguments, returns a double) so we
+      // can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      fprintf(stderr, "Evaluated to %f\n", FP());
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready&gt; ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" 
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+/// printd - printf that takes a double prints it as "%f\n", returning 0.
+extern "C" 
+double printd(double X) {
+  printf("%f\n", X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  LLVMContext &amp;Context = getGlobalContext();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['&lt;'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready&gt; ");
+  getNextToken();
+
+  // Make the module, which holds all the code.
+  TheModule = new Module("my cool jit", Context);
+
+  // Create the JIT.  This takes ownership of the module.
+  std::string ErrStr;
+  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+  if (!TheExecutionEngine) {
+    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
+    exit(1);
+  }
+
+  FunctionPassManager OurFPM(TheModule);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &amp;OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  TheFPM = 0;
+
+  // Print out all of the generated code.
+  TheModule-&gt;dump();
+
+  return 0;
+}
+</pre>
+</div>
+
+<a href="LangImpl7.html">Next: Extending the language: mutable variables / SSA construction</a>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/tutorial/LangImpl7.html b/final/docs/tutorial/LangImpl7.html
new file mode 100644
index 00000000000..ddddc423209
--- /dev/null
+++ b/final/docs/tutorial/LangImpl7.html
@@ -0,0 +1,2167 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Extending the Language: Mutable Variables / SSA
+         construction</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Extending the Language: Mutable Variables</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 7
+  <ol>
+    <li><a href="#intro">Chapter 7 Introduction</a></li>
+    <li><a href="#why">Why is this a hard problem?</a></li>
+    <li><a href="#memory">Memory in LLVM</a></li>
+    <li><a href="#kalvars">Mutable Variables in Kaleidoscope</a></li>
+    <li><a href="#adjustments">Adjusting Existing Variables for
+     Mutation</a></li>
+    <li><a href="#assignment">New Assignment Operator</a></li>
+    <li><a href="#localvars">User-defined Local Variables</a></li>
+    <li><a href="#code">Full Code Listing</a></li>
+  </ol>
+</li>
+<li><a href="LangImpl8.html">Chapter 8</a>: Conclusion and other useful LLVM
+ tidbits</li>
+</ul>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="intro">Chapter 7 Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to Chapter 7 of the "<a href="index.html">Implementing a language
+with LLVM</a>" tutorial.  In chapters 1 through 6, we've built a very
+respectable, albeit simple, <a 
+href="http://en.wikipedia.org/wiki/Functional_programming">functional
+programming language</a>.  In our journey, we learned some parsing techniques,
+how to build and represent an AST, how to build LLVM IR, and how to optimize
+the resultant code as well as JIT compile it.</p>
+
+<p>While Kaleidoscope is interesting as a functional language, the fact that it
+is functional makes it "too easy" to generate LLVM IR for it.  In particular, a 
+functional language makes it very easy to build LLVM IR directly in <a 
+href="http://en.wikipedia.org/wiki/Static_single_assignment_form">SSA form</a>.
+Since LLVM requires that the input code be in SSA form, this is a very nice
+property and it is often unclear to newcomers how to generate code for an
+imperative language with mutable variables.</p>
+
+<p>The short (and happy) summary of this chapter is that there is no need for
+your front-end to build SSA form: LLVM provides highly tuned and well tested
+support for this, though the way it works is a bit unexpected for some.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="why">Why is this a hard problem?</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+To understand why mutable variables cause complexities in SSA construction, 
+consider this extremely simple C example:
+</p>
+
+<div class="doc_code">
+<pre>
+int G, H;
+int test(_Bool Condition) {
+  int X;
+  if (Condition)
+    X = G;
+  else
+    X = H;
+  return X;
+}
+</pre>
+</div>
+
+<p>In this case, we have the variable "X", whose value depends on the path 
+executed in the program.  Because there are two different possible values for X
+before the return instruction, a PHI node is inserted to merge the two values.
+The LLVM IR that we want for this example looks like this:</p>
+
+<div class="doc_code">
+<pre>
+@G = weak global i32 0   ; type of @G is i32*
+@H = weak global i32 0   ; type of @H is i32*
+
+define i32 @test(i1 %Condition) {
+entry:
+	br i1 %Condition, label %cond_true, label %cond_false
+
+cond_true:
+	%X.0 = load i32* @G
+	br label %cond_next
+
+cond_false:
+	%X.1 = load i32* @H
+	br label %cond_next
+
+cond_next:
+	%X.2 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
+	ret i32 %X.2
+}
+</pre>
+</div>
+
+<p>In this example, the loads from the G and H global variables are explicit in
+the LLVM IR, and they live in the then/else branches of the if statement
+(cond_true/cond_false).  In order to merge the incoming values, the X.2 phi node
+in the cond_next block selects the right value to use based on where control 
+flow is coming from: if control flow comes from the cond_false block, X.2 gets
+the value of X.1.  Alternatively, if control flow comes from cond_true, it gets
+the value of X.0.  The intent of this chapter is not to explain the details of
+SSA form.  For more information, see one of the many <a 
+href="http://en.wikipedia.org/wiki/Static_single_assignment_form">online 
+references</a>.</p>
+
+<p>The question for this article is "who places the phi nodes when lowering 
+assignments to mutable variables?".  The issue here is that LLVM 
+<em>requires</em> that its IR be in SSA form: there is no "non-ssa" mode for it.
+However, SSA construction requires non-trivial algorithms and data structures,
+so it is inconvenient and wasteful for every front-end to have to reproduce this
+logic.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="memory">Memory in LLVM</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The 'trick' here is that while LLVM does require all register values to be
+in SSA form, it does not require (or permit) memory objects to be in SSA form.
+In the example above, note that the loads from G and H are direct accesses to
+G and H: they are not renamed or versioned.  This differs from some other
+compiler systems, which do try to version memory objects.  In LLVM, instead of
+encoding dataflow analysis of memory into the LLVM IR, it is handled with <a 
+href="../WritingAnLLVMPass.html">Analysis Passes</a> which are computed on
+demand.</p>
+
+<p>
+With this in mind, the high-level idea is that we want to make a stack variable
+(which lives in memory, because it is on the stack) for each mutable object in
+a function.  To take advantage of this trick, we need to talk about how LLVM
+represents stack variables.
+</p>
+
+<p>In LLVM, all memory accesses are explicit with load/store instructions, and
+it is carefully designed not to have (or need) an "address-of" operator.  Notice
+how the type of the @G/@H global variables is actually "i32*" even though the 
+variable is defined as "i32".  What this means is that @G defines <em>space</em>
+for an i32 in the global data area, but its <em>name</em> actually refers to the
+address for that space.  Stack variables work the same way, except that instead of 
+being declared with global variable definitions, they are declared with the 
+<a href="../LangRef.html#i_alloca">LLVM alloca instruction</a>:</p>
+
+<div class="doc_code">
+<pre>
+define i32 @example() {
+entry:
+	%X = alloca i32           ; type of %X is i32*.
+	...
+	%tmp = load i32* %X       ; load the stack value %X from the stack.
+	%tmp2 = add i32 %tmp, 1   ; increment it
+	store i32 %tmp2, i32* %X  ; store it back
+	...
+</pre>
+</div>
+
+<p>This code shows an example of how you can declare and manipulate a stack
+variable in the LLVM IR.  Stack memory allocated with the alloca instruction is
+fully general: you can pass the address of the stack slot to functions, you can
+store it in other variables, etc.  In our example above, we could rewrite the
+example to use the alloca technique to avoid using a PHI node:</p>
+
+<div class="doc_code">
+<pre>
+@G = weak global i32 0   ; type of @G is i32*
+@H = weak global i32 0   ; type of @H is i32*
+
+define i32 @test(i1 %Condition) {
+entry:
+	%X = alloca i32           ; type of %X is i32*.
+	br i1 %Condition, label %cond_true, label %cond_false
+
+cond_true:
+	%X.0 = load i32* @G
+        store i32 %X.0, i32* %X   ; Update X
+	br label %cond_next
+
+cond_false:
+	%X.1 = load i32* @H
+        store i32 %X.1, i32* %X   ; Update X
+	br label %cond_next
+
+cond_next:
+	%X.2 = load i32* %X       ; Read X
+	ret i32 %X.2
+}
+</pre>
+</div>
+
+<p>With this, we have discovered a way to handle arbitrary mutable variables
+without the need to create Phi nodes at all:</p>
+
+<ol>
+<li>Each mutable variable becomes a stack allocation.</li>
+<li>Each read of the variable becomes a load from the stack.</li>
+<li>Each update of the variable becomes a store to the stack.</li>
+<li>Taking the address of a variable just uses the stack address directly.</li>
+</ol>
+
+<p>While this solution has solved our immediate problem, it introduced another
+one: we have now apparently introduced a lot of stack traffic for very simple
+and common operations, a major performance problem.  Fortunately for us, the
+LLVM optimizer has a highly-tuned optimization pass named "mem2reg" that handles
+this case, promoting allocas like this into SSA registers, inserting Phi nodes
+as appropriate.  If you run this example through the pass, for example, you'll
+get:</p>
+
+<div class="doc_code">
+<pre>
+$ <b>llvm-as &lt; example.ll | opt -mem2reg | llvm-dis</b>
+@G = weak global i32 0
+@H = weak global i32 0
+
+define i32 @test(i1 %Condition) {
+entry:
+	br i1 %Condition, label %cond_true, label %cond_false
+
+cond_true:
+	%X.0 = load i32* @G
+	br label %cond_next
+
+cond_false:
+	%X.1 = load i32* @H
+	br label %cond_next
+
+cond_next:
+	%X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
+	ret i32 %X.01
+}
+</pre>
+</div>
+
+<p>The mem2reg pass implements the standard "iterated dominance frontier"
+algorithm for constructing SSA form and has a number of optimizations that speed
+up (very common) degenerate cases. The mem2reg optimization pass is the answer to dealing 
+with mutable variables, and we highly recommend that you depend on it.  Note that
+mem2reg only works on variables in certain circumstances:</p>
+
+<ol>
+<li>mem2reg is alloca-driven: it looks for allocas and if it can handle them, it
+promotes them.  It does not apply to global variables or heap allocations.</li>
+
+<li>mem2reg only looks for alloca instructions in the entry block of the
+function.  Being in the entry block guarantees that the alloca is only executed
+once, which makes analysis simpler.</li>
+
+<li>mem2reg only promotes allocas whose uses are direct loads and stores.  If
+the address of the stack object is passed to a function, or if any funny pointer
+arithmetic is involved, the alloca will not be promoted.</li>
+
+<li>mem2reg only works on allocas of <a 
+href="../LangRef.html#t_classifications">first class</a> 
+values (such as pointers, scalars and vectors), and only if the array size
+of the allocation is 1 (or missing in the .ll file).  mem2reg is not capable of
+promoting structs or arrays to registers.  Note that the "scalarrepl" pass is
+more powerful and can promote structs, "unions", and arrays in many cases.</li>
+
+</ol>
+
+<p>
+All of these properties are easy to satisfy for most imperative languages, and
+we'll illustrate it below with Kaleidoscope.  The final question you may be
+asking is: should I bother with this nonsense for my front-end?  Wouldn't it be
+better if I just did SSA construction directly, avoiding use of the mem2reg
+optimization pass?  In short, we strongly recommend that you use this technique
+for building SSA form, unless there is an extremely good reason not to.  Using
+this technique is:</p>
+
+<ul>
+<li>Proven and well tested: llvm-gcc and clang both use this technique for local
+mutable variables.  As such, the most common clients of LLVM are using this to
+handle a bulk of their variables.  You can be sure that bugs are found fast and
+fixed early.</li>
+
+<li>Extremely Fast: mem2reg has a number of special cases that make it fast in
+common cases as well as fully general.  For example, it has fast-paths for
+variables that are only used in a single block, variables that only have one
+assignment point, good heuristics to avoid insertion of unneeded phi nodes, etc.
+</li>
+
+<li>Needed for debug info generation: <a href="../SourceLevelDebugging.html">
+Debug information in LLVM</a> relies on having the address of the variable
+exposed so that debug info can be attached to it.  This technique dovetails 
+very naturally with this style of debug info.</li>
+</ul>
+
+<p>If nothing else, this makes it much easier to get your front-end up and 
+running, and is very simple to implement.  Lets extend Kaleidoscope with mutable
+variables now!
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="kalvars">Mutable Variables in 
+Kaleidoscope</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Now that we know the sort of problem we want to tackle, lets see what this
+looks like in the context of our little Kaleidoscope language.  We're going to
+add two features:</p>
+
+<ol>
+<li>The ability to mutate variables with the '=' operator.</li>
+<li>The ability to define new variables.</li>
+</ol>
+
+<p>While the first item is really what this is about, we only have variables
+for incoming arguments as well as for induction variables, and redefining those only
+goes so far :).  Also, the ability to define new variables is a
+useful thing regardless of whether you will be mutating them.  Here's a
+motivating example that shows how we could use these:</p>
+
+<div class="doc_code">
+<pre>
+# Define ':' for sequencing: as a low-precedence operator that ignores operands
+# and just returns the RHS.
+def binary : 1 (x y) y;
+
+# Recursive fib, we could do this before.
+def fib(x)
+  if (x &lt; 3) then
+    1
+  else
+    fib(x-1)+fib(x-2);
+
+# Iterative fib.
+def fibi(x)
+  <b>var a = 1, b = 1, c in</b>
+  (for i = 3, i &lt; x in 
+     <b>c = a + b</b> :
+     <b>a = b</b> :
+     <b>b = c</b>) :
+  b;
+
+# Call it. 
+fibi(10);
+</pre>
+</div>
+
+<p>
+In order to mutate variables, we have to change our existing variables to use
+the "alloca trick".  Once we have that, we'll add our new operator, then extend
+Kaleidoscope to support new variable definitions.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="adjustments">Adjusting Existing Variables for
+Mutation</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+The symbol table in Kaleidoscope is managed at code generation time by the 
+'<tt>NamedValues</tt>' map.  This map currently keeps track of the LLVM "Value*"
+that holds the double value for the named variable.  In order to support
+mutation, we need to change this slightly, so that it <tt>NamedValues</tt> holds
+the <em>memory location</em> of the variable in question.  Note that this 
+change is a refactoring: it changes the structure of the code, but does not
+(by itself) change the behavior of the compiler.  All of these changes are 
+isolated in the Kaleidoscope code generator.</p>
+
+<p>
+At this point in Kaleidoscope's development, it only supports variables for two
+things: incoming arguments to functions and the induction variable of 'for'
+loops.  For consistency, we'll allow mutation of these variables in addition to
+other user-defined variables.  This means that these will both need memory
+locations.
+</p>
+
+<p>To start our transformation of Kaleidoscope, we'll change the NamedValues
+map so that it maps to AllocaInst* instead of Value*.  Once we do this, the C++ 
+compiler will tell us what parts of the code we need to update:</p>
+
+<div class="doc_code">
+<pre>
+static std::map&lt;std::string, AllocaInst*&gt; NamedValues;
+</pre>
+</div>
+
+<p>Also, since we will need to create these alloca's, we'll use a helper
+function that ensures that the allocas are created in the entry block of the
+function:</p>
+
+<div class="doc_code">
+<pre>
+/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
+/// the function.  This is used for mutable variables etc.
+static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
+                                          const std::string &amp;VarName) {
+  IRBuilder&lt;&gt; TmpB(&amp;TheFunction-&gt;getEntryBlock(),
+                 TheFunction-&gt;getEntryBlock().begin());
+  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
+                           VarName.c_str());
+}
+</pre>
+</div>
+
+<p>This funny looking code creates an IRBuilder object that is pointing at
+the first instruction (.begin()) of the entry block.  It then creates an alloca
+with the expected name and returns it.  Because all values in Kaleidoscope are
+doubles, there is no need to pass in a type to use.</p>
+
+<p>With this in place, the first functionality change we want to make is to
+variable references.  In our new scheme, variables live on the stack, so code
+generating a reference to them actually needs to produce a load from the stack
+slot:</p>
+
+<div class="doc_code">
+<pre>
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  if (V == 0) return ErrorV("Unknown variable name");
+
+  <b>// Load the value.
+  return Builder.CreateLoad(V, Name.c_str());</b>
+}
+</pre>
+</div>
+
+<p>As you can see, this is pretty straightforward.  Now we need to update the
+things that define the variables to set up the alloca.  We'll start with 
+<tt>ForExprAST::Codegen</tt> (see the <a href="#code">full code listing</a> for
+the unabridged code):</p>
+
+<div class="doc_code">
+<pre>
+  Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+  <b>// Create an alloca for the variable in the entry block.
+  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);</b>
+  
+    // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start-&gt;Codegen();
+  if (StartVal == 0) return 0;
+  
+  <b>// Store the value into the alloca.
+  Builder.CreateStore(StartVal, Alloca);</b>
+  ...
+
+  // Compute the end condition.
+  Value *EndCond = End-&gt;Codegen();
+  if (EndCond == 0) return EndCond;
+  
+  <b>// Reload, increment, and restore the alloca.  This handles the case where
+  // the body of the loop mutates the variable.
+  Value *CurVar = Builder.CreateLoad(Alloca);
+  Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
+  Builder.CreateStore(NextVar, Alloca);</b>
+  ...
+</pre>
+</div>
+
+<p>This code is virtually identical to the code <a 
+href="LangImpl5.html#forcodegen">before we allowed mutable variables</a>.  The
+big difference is that we no longer have to construct a PHI node, and we use
+load/store to access the variable as needed.</p>
+
+<p>To support mutable argument variables, we need to also make allocas for them.
+The code for this is also pretty simple:</p>
+
+<div class="doc_code">
+<pre>
+/// CreateArgumentAllocas - Create an alloca for each argument and register the
+/// argument in the symbol table so that references to it will succeed.
+void PrototypeAST::CreateArgumentAllocas(Function *F) {
+  Function::arg_iterator AI = F-&gt;arg_begin();
+  for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
+    // Create an alloca for this variable.
+    AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
+
+    // Store the initial value into the alloca.
+    Builder.CreateStore(AI, Alloca);
+
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = Alloca;
+  }
+}
+</pre>
+</div>
+
+<p>For each argument, we make an alloca, store the input value to the function
+into the alloca, and register the alloca as the memory location for the
+argument.  This method gets invoked by <tt>FunctionAST::Codegen</tt> right after
+it sets up the entry block for the function.</p>
+
+<p>The final missing piece is adding the mem2reg pass, which allows us to get
+good codegen once again:</p>
+
+<div class="doc_code">
+<pre>
+    // Set up the optimizer pipeline.  Start with registering info about how the
+    // target lays out data structures.
+    OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
+    <b>// Promote allocas to registers.
+    OurFPM.add(createPromoteMemoryToRegisterPass());</b>
+    // Do simple "peephole" optimizations and bit-twiddling optzns.
+    OurFPM.add(createInstructionCombiningPass());
+    // Reassociate expressions.
+    OurFPM.add(createReassociatePass());
+</pre>
+</div>
+
+<p>It is interesting to see what the code looks like before and after the
+mem2reg optimization runs.  For example, this is the before/after code for our
+recursive fib function.  Before the optimization:</p>
+
+<div class="doc_code">
+<pre>
+define double @fib(double %x) {
+entry:
+	<b>%x1 = alloca double
+	store double %x, double* %x1
+	%x2 = load double* %x1</b>
+	%cmptmp = fcmp ult double %x2, 3.000000e+00
+	%booltmp = uitofp i1 %cmptmp to double
+	%ifcond = fcmp one double %booltmp, 0.000000e+00
+	br i1 %ifcond, label %then, label %else
+
+then:		; preds = %entry
+	br label %ifcont
+
+else:		; preds = %entry
+	<b>%x3 = load double* %x1</b>
+	%subtmp = fsub double %x3, 1.000000e+00
+	%calltmp = call double @fib(double %subtmp)
+	<b>%x4 = load double* %x1</b>
+	%subtmp5 = fsub double %x4, 2.000000e+00
+	%calltmp6 = call double @fib(double %subtmp5)
+	%addtmp = fadd double %calltmp, %calltmp6
+	br label %ifcont
+
+ifcont:		; preds = %else, %then
+	%iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
+	ret double %iftmp
+}
+</pre>
+</div>
+
+<p>Here there is only one variable (x, the input argument) but you can still
+see the extremely simple-minded code generation strategy we are using.  In the
+entry block, an alloca is created, and the initial input value is stored into
+it.  Each reference to the variable does a reload from the stack.  Also, note
+that we didn't modify the if/then/else expression, so it still inserts a PHI
+node.  While we could make an alloca for it, it is actually easier to create a 
+PHI node for it, so we still just make the PHI.</p>
+
+<p>Here is the code after the mem2reg pass runs:</p>
+
+<div class="doc_code">
+<pre>
+define double @fib(double %x) {
+entry:
+	%cmptmp = fcmp ult double <b>%x</b>, 3.000000e+00
+	%booltmp = uitofp i1 %cmptmp to double
+	%ifcond = fcmp one double %booltmp, 0.000000e+00
+	br i1 %ifcond, label %then, label %else
+
+then:
+	br label %ifcont
+
+else:
+	%subtmp = fsub double <b>%x</b>, 1.000000e+00
+	%calltmp = call double @fib(double %subtmp)
+	%subtmp5 = fsub double <b>%x</b>, 2.000000e+00
+	%calltmp6 = call double @fib(double %subtmp5)
+	%addtmp = fadd double %calltmp, %calltmp6
+	br label %ifcont
+
+ifcont:		; preds = %else, %then
+	%iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
+	ret double %iftmp
+}
+</pre>
+</div>
+
+<p>This is a trivial case for mem2reg, since there are no redefinitions of the
+variable.  The point of showing this is to calm your tension about inserting
+such blatent inefficiencies :).</p>
+
+<p>After the rest of the optimizers run, we get:</p>
+
+<div class="doc_code">
+<pre>
+define double @fib(double %x) {
+entry:
+	%cmptmp = fcmp ult double %x, 3.000000e+00
+	%booltmp = uitofp i1 %cmptmp to double
+	%ifcond = fcmp ueq double %booltmp, 0.000000e+00
+	br i1 %ifcond, label %else, label %ifcont
+
+else:
+	%subtmp = fsub double %x, 1.000000e+00
+	%calltmp = call double @fib(double %subtmp)
+	%subtmp5 = fsub double %x, 2.000000e+00
+	%calltmp6 = call double @fib(double %subtmp5)
+	%addtmp = fadd double %calltmp, %calltmp6
+	ret double %addtmp
+
+ifcont:
+	ret double 1.000000e+00
+}
+</pre>
+</div>
+
+<p>Here we see that the simplifycfg pass decided to clone the return instruction
+into the end of the 'else' block.  This allowed it to eliminate some branches
+and the PHI node.</p>
+
+<p>Now that all symbol table references are updated to use stack variables, 
+we'll add the assignment operator.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="assignment">New Assignment Operator</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>With our current framework, adding a new assignment operator is really
+simple.  We will parse it just like any other binary operator, but handle it
+internally (instead of allowing the user to define it).  The first step is to
+set a precedence:</p>
+
+<div class="doc_code">
+<pre>
+ int main() {
+   // Install standard binary operators.
+   // 1 is lowest precedence.
+   <b>BinopPrecedence['='] = 2;</b>
+   BinopPrecedence['&lt;'] = 10;
+   BinopPrecedence['+'] = 20;
+   BinopPrecedence['-'] = 20;
+</pre>
+</div>
+
+<p>Now that the parser knows the precedence of the binary operator, it takes
+care of all the parsing and AST generation.  We just need to implement codegen
+for the assignment operator.  This looks like:</p> 
+
+<div class="doc_code">
+<pre>
+Value *BinaryExprAST::Codegen() {
+  // Special case '=' because we don't want to emit the LHS as an expression.
+  if (Op == '=') {
+    // Assignment requires the LHS to be an identifier.
+    VariableExprAST *LHSE = dynamic_cast&lt;VariableExprAST*&gt;(LHS);
+    if (!LHSE)
+      return ErrorV("destination of '=' must be a variable");
+</pre>
+</div>
+
+<p>Unlike the rest of the binary operators, our assignment operator doesn't
+follow the "emit LHS, emit RHS, do computation" model.  As such, it is handled
+as a special case before the other binary operators are handled.  The other 
+strange thing is that it requires the LHS to be a variable.  It is invalid to
+have "(x+1) = expr" - only things like "x = expr" are allowed.
+</p>
+
+<div class="doc_code">
+<pre>
+    // Codegen the RHS.
+    Value *Val = RHS-&gt;Codegen();
+    if (Val == 0) return 0;
+
+    // Look up the name.
+    Value *Variable = NamedValues[LHSE-&gt;getName()];
+    if (Variable == 0) return ErrorV("Unknown variable name");
+
+    Builder.CreateStore(Val, Variable);
+    return Val;
+  }
+  ...  
+</pre>
+</div>
+
+<p>Once we have the variable, codegen'ing the assignment is straightforward:
+we emit the RHS of the assignment, create a store, and return the computed
+value.  Returning a value allows for chained assignments like "X = (Y = Z)".</p>
+
+<p>Now that we have an assignment operator, we can mutate loop variables and
+arguments.  For example, we can now run code like this:</p>
+
+<div class="doc_code">
+<pre>
+# Function to print a double.
+extern printd(x);
+
+# Define ':' for sequencing: as a low-precedence operator that ignores operands
+# and just returns the RHS.
+def binary : 1 (x y) y;
+
+def test(x)
+  printd(x) :
+  x = 4 :
+  printd(x);
+
+test(123);
+</pre>
+</div>
+
+<p>When run, this example prints "123" and then "4", showing that we did
+actually mutate the value!  Okay, we have now officially implemented our goal:
+getting this to work requires SSA construction in the general case.  However,
+to be really useful, we want the ability to define our own local variables, lets
+add this next! 
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="localvars">User-defined Local 
+Variables</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Adding var/in is just like any other other extensions we made to 
+Kaleidoscope: we extend the lexer, the parser, the AST and the code generator.
+The first step for adding our new 'var/in' construct is to extend the lexer.
+As before, this is pretty trivial, the code looks like this:</p>
+
+<div class="doc_code">
+<pre>
+enum Token {
+  ...
+  <b>// var definition
+  tok_var = -13</b>
+...
+}
+...
+static int gettok() {
+...
+    if (IdentifierStr == "in") return tok_in;
+    if (IdentifierStr == "binary") return tok_binary;
+    if (IdentifierStr == "unary") return tok_unary;
+    <b>if (IdentifierStr == "var") return tok_var;</b>
+    return tok_identifier;
+...
+</pre>
+</div>
+
+<p>The next step is to define the AST node that we will construct.  For var/in,
+it looks like this:</p>
+
+<div class="doc_code">
+<pre>
+/// VarExprAST - Expression class for var/in
+class VarExprAST : public ExprAST {
+  std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; VarNames;
+  ExprAST *Body;
+public:
+  VarExprAST(const std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; &amp;varnames,
+             ExprAST *body)
+  : VarNames(varnames), Body(body) {}
+  
+  virtual Value *Codegen();
+};
+</pre>
+</div>
+
+<p>var/in allows a list of names to be defined all at once, and each name can
+optionally have an initializer value.  As such, we capture this information in
+the VarNames vector.  Also, var/in has a body, this body is allowed to access
+the variables defined by the var/in.</p>
+
+<p>With this in place, we can define the parser pieces.  The first thing we do is add
+it as a primary expression:</p>
+
+<div class="doc_code">
+<pre>
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+///   ::= ifexpr
+///   ::= forexpr
+<b>///   ::= varexpr</b>
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  case tok_if:         return ParseIfExpr();
+  case tok_for:        return ParseForExpr();
+  <b>case tok_var:        return ParseVarExpr();</b>
+  }
+}
+</pre>
+</div>
+
+<p>Next we define ParseVarExpr:</p>
+
+<div class="doc_code">
+<pre>
+/// varexpr ::= 'var' identifier ('=' expression)? 
+//                    (',' identifier ('=' expression)?)* 'in' expression
+static ExprAST *ParseVarExpr() {
+  getNextToken();  // eat the var.
+
+  std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; VarNames;
+
+  // At least one variable name is required.
+  if (CurTok != tok_identifier)
+    return Error("expected identifier after var");
+</pre>
+</div>
+
+<p>The first part of this code parses the list of identifier/expr pairs into the
+local <tt>VarNames</tt> vector.  
+
+<div class="doc_code">
+<pre>
+  while (1) {
+    std::string Name = IdentifierStr;
+    getNextToken();  // eat identifier.
+
+    // Read the optional initializer.
+    ExprAST *Init = 0;
+    if (CurTok == '=') {
+      getNextToken(); // eat the '='.
+      
+      Init = ParseExpression();
+      if (Init == 0) return 0;
+    }
+    
+    VarNames.push_back(std::make_pair(Name, Init));
+    
+    // End of var list, exit loop.
+    if (CurTok != ',') break;
+    getNextToken(); // eat the ','.
+    
+    if (CurTok != tok_identifier)
+      return Error("expected identifier list after var");
+  }
+</pre>
+</div>
+
+<p>Once all the variables are parsed, we then parse the body and create the
+AST node:</p>
+
+<div class="doc_code">
+<pre>
+  // At this point, we have to have 'in'.
+  if (CurTok != tok_in)
+    return Error("expected 'in' keyword after 'var'");
+  getNextToken();  // eat 'in'.
+  
+  ExprAST *Body = ParseExpression();
+  if (Body == 0) return 0;
+  
+  return new VarExprAST(VarNames, Body);
+}
+</pre>
+</div>
+
+<p>Now that we can parse and represent the code, we need to support emission of
+LLVM IR for it.  This code starts out with:</p>
+
+<div class="doc_code">
+<pre>
+Value *VarExprAST::Codegen() {
+  std::vector&lt;AllocaInst *&gt; OldBindings;
+  
+  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
+
+  // Register all variables and emit their initializer.
+  for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
+    const std::string &amp;VarName = VarNames[i].first;
+    ExprAST *Init = VarNames[i].second;
+</pre>
+</div>
+
+<p>Basically it loops over all the variables, installing them one at a time.
+For each variable we put into the symbol table, we remember the previous value
+that we replace in OldBindings.</p>
+
+<div class="doc_code">
+<pre>
+    // Emit the initializer before adding the variable to scope, this prevents
+    // the initializer from referencing the variable itself, and permits stuff
+    // like this:
+    //  var a = 1 in
+    //    var a = a in ...   # refers to outer 'a'.
+    Value *InitVal;
+    if (Init) {
+      InitVal = Init-&gt;Codegen();
+      if (InitVal == 0) return 0;
+    } else { // If not specified, use 0.0.
+      InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
+    }
+    
+    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+    Builder.CreateStore(InitVal, Alloca);
+
+    // Remember the old variable binding so that we can restore the binding when
+    // we unrecurse.
+    OldBindings.push_back(NamedValues[VarName]);
+    
+    // Remember this binding.
+    NamedValues[VarName] = Alloca;
+  }
+</pre>
+</div>
+
+<p>There are more comments here than code.  The basic idea is that we emit the
+initializer, create the alloca, then update the symbol table to point to it.
+Once all the variables are installed in the symbol table, we evaluate the body
+of the var/in expression:</p>
+
+<div class="doc_code">
+<pre>
+  // Codegen the body, now that all vars are in scope.
+  Value *BodyVal = Body-&gt;Codegen();
+  if (BodyVal == 0) return 0;
+</pre>
+</div>
+
+<p>Finally, before returning, we restore the previous variable bindings:</p>
+
+<div class="doc_code">
+<pre>
+  // Pop all our variables from scope.
+  for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
+    NamedValues[VarNames[i].first] = OldBindings[i];
+
+  // Return the body computation.
+  return BodyVal;
+}
+</pre>
+</div>
+
+<p>The end result of all of this is that we get properly scoped variable 
+definitions, and we even (trivially) allow mutation of them :).</p>
+
+<p>With this, we completed what we set out to do.  Our nice iterative fib
+example from the intro compiles and runs just fine.  The mem2reg pass optimizes
+all of our stack variables into SSA registers, inserting PHI nodes where needed,
+and our front-end remains simple: no "iterated dominance frontier" computation
+anywhere in sight.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Here is the complete code listing for our running example, enhanced with mutable
+variables and var/in support.  To build this example, use:
+</p>
+
+<div class="doc_code">
+<pre>
+   # Compile
+   g++ -g toy.cpp `llvm-config --cppflags --ldflags --libs core jit native` -O3 -o toy
+   # Run
+   ./toy
+</pre>
+</div>
+
+<p>Here is the code:</p>
+
+<div class="doc_code">
+<pre>
+#include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/IRBuilder.h"
+#include &lt;cstdio&gt;
+#include &lt;string&gt;
+#include &lt;map&gt;
+#include &lt;vector&gt;
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5,
+  
+  // control
+  tok_if = -6, tok_then = -7, tok_else = -8,
+  tok_for = -9, tok_in = -10,
+  
+  // operators
+  tok_binary = -11, tok_unary = -12,
+  
+  // var definition
+  tok_var = -13
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    if (IdentifierStr == "if") return tok_if;
+    if (IdentifierStr == "then") return tok_then;
+    if (IdentifierStr == "else") return tok_else;
+    if (IdentifierStr == "for") return tok_for;
+    if (IdentifierStr == "in") return tok_in;
+    if (IdentifierStr == "binary") return tok_binary;
+    if (IdentifierStr == "unary") return tok_unary;
+    if (IdentifierStr == "var") return tok_var;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF &amp;&amp; LastChar != '\n' &amp;&amp; LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  virtual Value *Codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+  virtual Value *Codegen();
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &amp;name) : Name(name) {}
+  const std::string &amp;getName() const { return Name; }
+  virtual Value *Codegen();
+};
+
+/// UnaryExprAST - Expression class for a unary operator.
+class UnaryExprAST : public ExprAST {
+  char Opcode;
+  ExprAST *Operand;
+public:
+  UnaryExprAST(char opcode, ExprAST *operand) 
+    : Opcode(opcode), Operand(operand) {}
+  virtual Value *Codegen();
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+  virtual Value *Codegen();
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector&lt;ExprAST*&gt; Args;
+public:
+  CallExprAST(const std::string &amp;callee, std::vector&lt;ExprAST*&gt; &amp;args)
+    : Callee(callee), Args(args) {}
+  virtual Value *Codegen();
+};
+
+/// IfExprAST - Expression class for if/then/else.
+class IfExprAST : public ExprAST {
+  ExprAST *Cond, *Then, *Else;
+public:
+  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+  : Cond(cond), Then(then), Else(_else) {}
+  virtual Value *Codegen();
+};
+
+/// ForExprAST - Expression class for for/in.
+class ForExprAST : public ExprAST {
+  std::string VarName;
+  ExprAST *Start, *End, *Step, *Body;
+public:
+  ForExprAST(const std::string &amp;varname, ExprAST *start, ExprAST *end,
+             ExprAST *step, ExprAST *body)
+    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+  virtual Value *Codegen();
+};
+
+/// VarExprAST - Expression class for var/in
+class VarExprAST : public ExprAST {
+  std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; VarNames;
+  ExprAST *Body;
+public:
+  VarExprAST(const std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; &amp;varnames,
+             ExprAST *body)
+  : VarNames(varnames), Body(body) {}
+  
+  virtual Value *Codegen();
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes), as well as if it is an operator.
+class PrototypeAST {
+  std::string Name;
+  std::vector&lt;std::string&gt; Args;
+  bool isOperator;
+  unsigned Precedence;  // Precedence if a binary op.
+public:
+  PrototypeAST(const std::string &amp;name, const std::vector&lt;std::string&gt; &amp;args,
+               bool isoperator = false, unsigned prec = 0)
+  : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
+  
+  bool isUnaryOp() const { return isOperator &amp;&amp; Args.size() == 1; }
+  bool isBinaryOp() const { return isOperator &amp;&amp; Args.size() == 2; }
+  
+  char getOperatorName() const {
+    assert(isUnaryOp() || isBinaryOp());
+    return Name[Name.size()-1];
+  }
+  
+  unsigned getBinaryPrecedence() const { return Precedence; }
+  
+  Function *Codegen();
+  
+  void CreateArgumentAllocas(Function *F);
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+  Function *Codegen();
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map&lt;char, int&gt; BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec &lt;= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector&lt;ExprAST*&gt; Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// ifexpr ::= 'if' expression 'then' expression 'else' expression
+static ExprAST *ParseIfExpr() {
+  getNextToken();  // eat the if.
+  
+  // condition.
+  ExprAST *Cond = ParseExpression();
+  if (!Cond) return 0;
+  
+  if (CurTok != tok_then)
+    return Error("expected then");
+  getNextToken();  // eat the then
+  
+  ExprAST *Then = ParseExpression();
+  if (Then == 0) return 0;
+  
+  if (CurTok != tok_else)
+    return Error("expected else");
+  
+  getNextToken();
+  
+  ExprAST *Else = ParseExpression();
+  if (!Else) return 0;
+  
+  return new IfExprAST(Cond, Then, Else);
+}
+
+/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+static ExprAST *ParseForExpr() {
+  getNextToken();  // eat the for.
+
+  if (CurTok != tok_identifier)
+    return Error("expected identifier after for");
+  
+  std::string IdName = IdentifierStr;
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '=')
+    return Error("expected '=' after for");
+  getNextToken();  // eat '='.
+  
+  
+  ExprAST *Start = ParseExpression();
+  if (Start == 0) return 0;
+  if (CurTok != ',')
+    return Error("expected ',' after for start value");
+  getNextToken();
+  
+  ExprAST *End = ParseExpression();
+  if (End == 0) return 0;
+  
+  // The step value is optional.
+  ExprAST *Step = 0;
+  if (CurTok == ',') {
+    getNextToken();
+    Step = ParseExpression();
+    if (Step == 0) return 0;
+  }
+  
+  if (CurTok != tok_in)
+    return Error("expected 'in' after for");
+  getNextToken();  // eat 'in'.
+  
+  ExprAST *Body = ParseExpression();
+  if (Body == 0) return 0;
+
+  return new ForExprAST(IdName, Start, End, Step, Body);
+}
+
+/// varexpr ::= 'var' identifier ('=' expression)? 
+//                    (',' identifier ('=' expression)?)* 'in' expression
+static ExprAST *ParseVarExpr() {
+  getNextToken();  // eat the var.
+
+  std::vector&lt;std::pair&lt;std::string, ExprAST*&gt; &gt; VarNames;
+
+  // At least one variable name is required.
+  if (CurTok != tok_identifier)
+    return Error("expected identifier after var");
+  
+  while (1) {
+    std::string Name = IdentifierStr;
+    getNextToken();  // eat identifier.
+
+    // Read the optional initializer.
+    ExprAST *Init = 0;
+    if (CurTok == '=') {
+      getNextToken(); // eat the '='.
+      
+      Init = ParseExpression();
+      if (Init == 0) return 0;
+    }
+    
+    VarNames.push_back(std::make_pair(Name, Init));
+    
+    // End of var list, exit loop.
+    if (CurTok != ',') break;
+    getNextToken(); // eat the ','.
+    
+    if (CurTok != tok_identifier)
+      return Error("expected identifier list after var");
+  }
+  
+  // At this point, we have to have 'in'.
+  if (CurTok != tok_in)
+    return Error("expected 'in' keyword after 'var'");
+  getNextToken();  // eat 'in'.
+  
+  ExprAST *Body = ParseExpression();
+  if (Body == 0) return 0;
+  
+  return new VarExprAST(VarNames, Body);
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+///   ::= ifexpr
+///   ::= forexpr
+///   ::= varexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  case tok_if:         return ParseIfExpr();
+  case tok_for:        return ParseForExpr();
+  case tok_var:        return ParseVarExpr();
+  }
+}
+
+/// unary
+///   ::= primary
+///   ::= '!' unary
+static ExprAST *ParseUnary() {
+  // If the current token is not an operator, it must be a primary expr.
+  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+    return ParsePrimary();
+  
+  // If this is a unary operator, read it.
+  int Opc = CurTok;
+  getNextToken();
+  if (ExprAST *Operand = ParseUnary())
+    return new UnaryExprAST(Opc, Operand);
+  return 0;
+}
+
+/// binoprhs
+///   ::= ('+' unary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec &lt; ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the unary expression after the binary operator.
+    ExprAST *RHS = ParseUnary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec &lt; NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= unary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParseUnary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+///   ::= binary LETTER number? (id, id)
+///   ::= unary LETTER (id)
+static PrototypeAST *ParsePrototype() {
+  std::string FnName;
+  
+  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned BinaryPrecedence = 30;
+  
+  switch (CurTok) {
+  default:
+    return ErrorP("Expected function name in prototype");
+  case tok_identifier:
+    FnName = IdentifierStr;
+    Kind = 0;
+    getNextToken();
+    break;
+  case tok_unary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return ErrorP("Expected unary operator");
+    FnName = "unary";
+    FnName += (char)CurTok;
+    Kind = 1;
+    getNextToken();
+    break;
+  case tok_binary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return ErrorP("Expected binary operator");
+    FnName = "binary";
+    FnName += (char)CurTok;
+    Kind = 2;
+    getNextToken();
+    
+    // Read the precedence if present.
+    if (CurTok == tok_number) {
+      if (NumVal &lt; 1 || NumVal &gt; 100)
+        return ErrorP("Invalid precedecnce: must be 1..100");
+      BinaryPrecedence = (unsigned)NumVal;
+      getNextToken();
+    }
+    break;
+  }
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector&lt;std::string&gt; ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  // Verify right number of names for operator.
+  if (Kind &amp;&amp; ArgNames.size() != Kind)
+    return ErrorP("Invalid number of operands for operator");
+  
+  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector&lt;std::string&gt;());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static Module *TheModule;
+static IRBuilder&lt;&gt; Builder(getGlobalContext());
+static std::map&lt;std::string, AllocaInst*&gt; NamedValues;
+static FunctionPassManager *TheFPM;
+
+Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
+/// the function.  This is used for mutable variables etc.
+static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
+                                          const std::string &amp;VarName) {
+  IRBuilder&lt;&gt; TmpB(&amp;TheFunction-&gt;getEntryBlock(),
+                 TheFunction-&gt;getEntryBlock().begin());
+  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
+                           VarName.c_str());
+}
+
+Value *NumberExprAST::Codegen() {
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
+}
+
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  if (V == 0) return ErrorV("Unknown variable name");
+
+  // Load the value.
+  return Builder.CreateLoad(V, Name.c_str());
+}
+
+Value *UnaryExprAST::Codegen() {
+  Value *OperandV = Operand-&gt;Codegen();
+  if (OperandV == 0) return 0;
+  
+  Function *F = TheModule-&gt;getFunction(std::string("unary")+Opcode);
+  if (F == 0)
+    return ErrorV("Unknown unary operator");
+  
+  return Builder.CreateCall(F, OperandV, "unop");
+}
+
+Value *BinaryExprAST::Codegen() {
+  // Special case '=' because we don't want to emit the LHS as an expression.
+  if (Op == '=') {
+    // Assignment requires the LHS to be an identifier.
+    VariableExprAST *LHSE = dynamic_cast&lt;VariableExprAST*&gt;(LHS);
+    if (!LHSE)
+      return ErrorV("destination of '=' must be a variable");
+    // Codegen the RHS.
+    Value *Val = RHS-&gt;Codegen();
+    if (Val == 0) return 0;
+
+    // Look up the name.
+    Value *Variable = NamedValues[LHSE-&gt;getName()];
+    if (Variable == 0) return ErrorV("Unknown variable name");
+
+    Builder.CreateStore(Val, Variable);
+    return Val;
+  }
+  
+  Value *L = LHS-&gt;Codegen();
+  Value *R = RHS-&gt;Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateFAdd(L, R, "addtmp");
+  case '-': return Builder.CreateFSub(L, R, "subtmp");
+  case '*': return Builder.CreateFMul(L, R, "multmp");
+  case '&lt;':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  default: break;
+  }
+  
+  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+  // a call to it.
+  Function *F = TheModule-&gt;getFunction(std::string("binary")+Op);
+  assert(F &amp;&amp; "binary operator not found!");
+  
+  Value *Ops[] = { L, R };
+  return Builder.CreateCall(F, Ops, Ops+2, "binop");
+}
+
+Value *CallExprAST::Codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule-&gt;getFunction(Callee);
+  if (CalleeF == 0)
+    return ErrorV("Unknown function referenced");
+  
+  // If argument mismatch error.
+  if (CalleeF-&gt;arg_size() != Args.size())
+    return ErrorV("Incorrect # arguments passed");
+
+  std::vector&lt;Value*&gt; ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]-&gt;Codegen());
+    if (ArgsV.back() == 0) return 0;
+  }
+  
+  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+}
+
+Value *IfExprAST::Codegen() {
+  Value *CondV = Cond-&gt;Codegen();
+  if (CondV == 0) return 0;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  CondV = Builder.CreateFCmpONE(CondV, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                "ifcond");
+  
+  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
+  
+  // Create blocks for the then and else cases.  Insert the 'then' block at the
+  // end of the function.
+  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+  
+  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+  
+  // Emit then value.
+  Builder.SetInsertPoint(ThenBB);
+  
+  Value *ThenV = Then-&gt;Codegen();
+  if (ThenV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+  ThenBB = Builder.GetInsertBlock();
+  
+  // Emit else block.
+  TheFunction-&gt;getBasicBlockList().push_back(ElseBB);
+  Builder.SetInsertPoint(ElseBB);
+  
+  Value *ElseV = Else-&gt;Codegen();
+  if (ElseV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+  ElseBB = Builder.GetInsertBlock();
+  
+  // Emit merge block.
+  TheFunction-&gt;getBasicBlockList().push_back(MergeBB);
+  Builder.SetInsertPoint(MergeBB);
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+                                  "iftmp");
+  
+  PN-&gt;addIncoming(ThenV, ThenBB);
+  PN-&gt;addIncoming(ElseV, ElseBB);
+  return PN;
+}
+
+Value *ForExprAST::Codegen() {
+  // Output this as:
+  //   var = alloca double
+  //   ...
+  //   start = startexpr
+  //   store start -&gt; var
+  //   goto loop
+  // loop: 
+  //   ...
+  //   bodyexpr
+  //   ...
+  // loopend:
+  //   step = stepexpr
+  //   endcond = endexpr
+  //
+  //   curvar = load var
+  //   nextvar = curvar + step
+  //   store nextvar -&gt; var
+  //   br endcond, loop, endloop
+  // outloop:
+  
+  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
+
+  // Create an alloca for the variable in the entry block.
+  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+  
+  // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start-&gt;Codegen();
+  if (StartVal == 0) return 0;
+  
+  // Store the value into the alloca.
+  Builder.CreateStore(StartVal, Alloca);
+  
+  // Make the new basic block for the loop header, inserting after current
+  // block.
+  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+  
+  // Insert an explicit fall through from the current block to the LoopBB.
+  Builder.CreateBr(LoopBB);
+
+  // Start insertion in LoopBB.
+  Builder.SetInsertPoint(LoopBB);
+  
+  // Within the loop, the variable is defined equal to the PHI node.  If it
+  // shadows an existing variable, we have to restore it, so save it now.
+  AllocaInst *OldVal = NamedValues[VarName];
+  NamedValues[VarName] = Alloca;
+  
+  // Emit the body of the loop.  This, like any other expr, can change the
+  // current BB.  Note that we ignore the value computed by the body, but don't
+  // allow an error.
+  if (Body-&gt;Codegen() == 0)
+    return 0;
+  
+  // Emit the step value.
+  Value *StepVal;
+  if (Step) {
+    StepVal = Step-&gt;Codegen();
+    if (StepVal == 0) return 0;
+  } else {
+    // If not specified, use 1.0.
+    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+  }
+  
+  // Compute the end condition.
+  Value *EndCond = End-&gt;Codegen();
+  if (EndCond == 0) return EndCond;
+  
+  // Reload, increment, and restore the alloca.  This handles the case where
+  // the body of the loop mutates the variable.
+  Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
+  Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
+  Builder.CreateStore(NextVar, Alloca);
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  EndCond = Builder.CreateFCmpONE(EndCond, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                  "loopcond");
+  
+  // Create the "after loop" block and insert it.
+  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+  
+  // Insert the conditional branch into the end of LoopEndBB.
+  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+  
+  // Any new code will be inserted in AfterBB.
+  Builder.SetInsertPoint(AfterBB);
+  
+  // Restore the unshadowed variable.
+  if (OldVal)
+    NamedValues[VarName] = OldVal;
+  else
+    NamedValues.erase(VarName);
+
+  
+  // for expr always returns 0.0.
+  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+}
+
+Value *VarExprAST::Codegen() {
+  std::vector&lt;AllocaInst *&gt; OldBindings;
+  
+  Function *TheFunction = Builder.GetInsertBlock()-&gt;getParent();
+
+  // Register all variables and emit their initializer.
+  for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
+    const std::string &amp;VarName = VarNames[i].first;
+    ExprAST *Init = VarNames[i].second;
+    
+    // Emit the initializer before adding the variable to scope, this prevents
+    // the initializer from referencing the variable itself, and permits stuff
+    // like this:
+    //  var a = 1 in
+    //    var a = a in ...   # refers to outer 'a'.
+    Value *InitVal;
+    if (Init) {
+      InitVal = Init-&gt;Codegen();
+      if (InitVal == 0) return 0;
+    } else { // If not specified, use 0.0.
+      InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
+    }
+    
+    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+    Builder.CreateStore(InitVal, Alloca);
+
+    // Remember the old variable binding so that we can restore the binding when
+    // we unrecurse.
+    OldBindings.push_back(NamedValues[VarName]);
+    
+    // Remember this binding.
+    NamedValues[VarName] = Alloca;
+  }
+  
+  // Codegen the body, now that all vars are in scope.
+  Value *BodyVal = Body-&gt;Codegen();
+  if (BodyVal == 0) return 0;
+  
+  // Pop all our variables from scope.
+  for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
+    NamedValues[VarNames[i].first] = OldBindings[i];
+
+  // Return the body computation.
+  return BodyVal;
+}
+
+Function *PrototypeAST::Codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector&lt;const Type*&gt; Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
+  
+  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+  
+  // If F conflicted, there was already something named 'Name'.  If it has a
+  // body, don't allow redefinition or reextern.
+  if (F-&gt;getName() != Name) {
+    // Delete the one we just made and get the existing one.
+    F-&gt;eraseFromParent();
+    F = TheModule-&gt;getFunction(Name);
+    
+    // If F already has a body, reject this.
+    if (!F-&gt;empty()) {
+      ErrorF("redefinition of function");
+      return 0;
+    }
+    
+    // If F took a different number of args, reject.
+    if (F-&gt;arg_size() != Args.size()) {
+      ErrorF("redefinition of function with different # args");
+      return 0;
+    }
+  }
+  
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (Function::arg_iterator AI = F-&gt;arg_begin(); Idx != Args.size();
+       ++AI, ++Idx)
+    AI-&gt;setName(Args[Idx]);
+    
+  return F;
+}
+
+/// CreateArgumentAllocas - Create an alloca for each argument and register the
+/// argument in the symbol table so that references to it will succeed.
+void PrototypeAST::CreateArgumentAllocas(Function *F) {
+  Function::arg_iterator AI = F-&gt;arg_begin();
+  for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
+    // Create an alloca for this variable.
+    AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
+
+    // Store the initial value into the alloca.
+    Builder.CreateStore(AI, Alloca);
+
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = Alloca;
+  }
+}
+
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto-&gt;Codegen();
+  if (TheFunction == 0)
+    return 0;
+  
+  // If this is an operator, install it.
+  if (Proto-&gt;isBinaryOp())
+    BinopPrecedence[Proto-&gt;getOperatorName()] = Proto-&gt;getBinaryPrecedence();
+  
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  // Add all arguments to the symbol table and create their allocas.
+  Proto-&gt;CreateArgumentAllocas(TheFunction);
+
+  if (Value *RetVal = Body-&gt;Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    // Optimize the function.
+    TheFPM-&gt;run(*TheFunction);
+    
+    return TheFunction;
+  }
+  
+  // Error reading body, remove function.
+  TheFunction-&gt;eraseFromParent();
+
+  if (Proto-&gt;isBinaryOp())
+    BinopPrecedence.erase(Proto-&gt;getOperatorName());
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static ExecutionEngine *TheExecutionEngine;
+
+static void HandleDefinition() {
+  if (FunctionAST *F = ParseDefinition()) {
+    if (Function *LF = F-&gt;Codegen()) {
+      fprintf(stderr, "Read function definition:");
+      LF-&gt;dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (PrototypeAST *P = ParseExtern()) {
+    if (Function *F = P-&gt;Codegen()) {
+      fprintf(stderr, "Read extern: ");
+      F-&gt;dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (FunctionAST *F = ParseTopLevelExpr()) {
+    if (Function *LF = F-&gt;Codegen()) {
+      // JIT the function, returning a function pointer.
+      void *FPtr = TheExecutionEngine-&gt;getPointerToFunction(LF);
+      
+      // Cast it to the right type (takes no arguments, returns a double) so we
+      // can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      fprintf(stderr, "Evaluated to %f\n", FP());
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready&gt; ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" 
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+/// printd - printf that takes a double prints it as "%f\n", returning 0.
+extern "C" 
+double printd(double X) {
+  printf("%f\n", X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  LLVMContext &amp;Context = getGlobalContext();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['='] = 2;
+  BinopPrecedence['&lt;'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready&gt; ");
+  getNextToken();
+
+  // Make the module, which holds all the code.
+  TheModule = new Module("my cool jit", Context);
+
+  // Create the JIT.  This takes ownership of the module.
+  std::string ErrStr;
+  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&amp;ErrStr).create();
+  if (!TheExecutionEngine) {
+    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
+    exit(1);
+  }
+
+  FunctionPassManager OurFPM(TheModule);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine-&gt;getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
+  // Promote allocas to registers.
+  OurFPM.add(createPromoteMemoryToRegisterPass());
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &amp;OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  TheFPM = 0;
+
+  // Print out all of the generated code.
+  TheModule-&gt;dump();
+
+  return 0;
+}
+</pre>
+</div>
+
+<a href="LangImpl8.html">Next: Conclusion and other useful LLVM tidbits</a>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/tutorial/LangImpl8.html b/final/docs/tutorial/LangImpl8.html
new file mode 100644
index 00000000000..64a62002c4c
--- /dev/null
+++ b/final/docs/tutorial/LangImpl8.html
@@ -0,0 +1,365 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Conclusion and other useful LLVM tidbits</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Conclusion and other useful LLVM
+ tidbits</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 8
+  <ol>
+    <li><a href="#conclusion">Tutorial Conclusion</a></li>
+    <li><a href="#llvmirproperties">Properties of LLVM IR</a>
+    <ul>
+      <li><a href="#targetindep">Target Independence</a></li>
+      <li><a href="#safety">Safety Guarantees</a></li>
+      <li><a href="#langspecific">Language-Specific Optimizations</a></li>
+    </ul>
+    </li>
+    <li><a href="#tipsandtricks">Tips and Tricks</a>
+    <ul>
+      <li><a href="#offsetofsizeof">Implementing portable 
+                                    offsetof/sizeof</a></li>
+      <li><a href="#gcstack">Garbage Collected Stack Frames</a></li>
+    </ul>
+    </li>
+  </ol>
+</li>
+</ul>
+
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="conclusion">Tutorial Conclusion</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to the the final chapter of the "<a href="index.html">Implementing a
+language with LLVM</a>" tutorial.  In the course of this tutorial, we have grown
+our little Kaleidoscope language from being a useless toy, to being a
+semi-interesting (but probably still useless) toy. :)</p>
+
+<p>It is interesting to see how far we've come, and how little code it has
+taken.  We built the entire lexer, parser, AST, code generator, and an 
+interactive run-loop (with a JIT!) by-hand in under 700 lines of
+(non-comment/non-blank) code.</p>
+
+<p>Our little language supports a couple of interesting features: it supports
+user defined binary and unary operators, it uses JIT compilation for immediate
+evaluation, and it supports a few control flow constructs with SSA construction.
+</p>
+
+<p>Part of the idea of this tutorial was to show you how easy and fun it can be
+to define, build, and play with languages.  Building a compiler need not be a
+scary or mystical process!  Now that you've seen some of the basics, I strongly
+encourage you to take the code and hack on it.  For example, try adding:</p>
+
+<ul>
+<li><b>global variables</b> - While global variables have questional value in
+modern software engineering, they are often useful when putting together quick
+little hacks like the Kaleidoscope compiler itself.  Fortunately, our current
+setup makes it very easy to add global variables: just have value lookup check
+to see if an unresolved variable is in the global variable symbol table before
+rejecting it.  To create a new global variable, make an instance of the LLVM
+<tt>GlobalVariable</tt> class.</li>
+
+<li><b>typed variables</b> - Kaleidoscope currently only supports variables of
+type double.  This gives the language a very nice elegance, because only
+supporting one type means that you never have to specify types.  Different
+languages have different ways of handling this.  The easiest way is to require
+the user to specify types for every variable definition, and record the type
+of the variable in the symbol table along with its Value*.</li>
+
+<li><b>arrays, structs, vectors, etc</b> - Once you add types, you can start
+extending the type system in all sorts of interesting ways.  Simple arrays are
+very easy and are quite useful for many different applications.  Adding them is
+mostly an exercise in learning how the LLVM <a 
+href="../LangRef.html#i_getelementptr">getelementptr</a> instruction works: it
+is so nifty/unconventional, it <a 
+href="../GetElementPtr.html">has its own FAQ</a>!  If you add support
+for recursive types (e.g. linked lists), make sure to read the <a 
+href="../ProgrammersManual.html#TypeResolve">section in the LLVM
+Programmer's Manual</a> that describes how to construct them.</li>
+
+<li><b>standard runtime</b> - Our current language allows the user to access
+arbitrary external functions, and we use it for things like "printd" and
+"putchard".  As you extend the language to add higher-level constructs, often
+these constructs make the most sense if they are lowered to calls into a
+language-supplied runtime.  For example, if you add hash tables to the language,
+it would probably make sense to add the routines to a runtime, instead of 
+inlining them all the way.</li>
+
+<li><b>memory management</b> - Currently we can only access the stack in
+Kaleidoscope.  It would also be useful to be able to allocate heap memory,
+either with calls to the standard libc malloc/free interface or with a garbage
+collector.  If you would like to use garbage collection, note that LLVM fully
+supports <a href="../GarbageCollection.html">Accurate Garbage Collection</a>
+including algorithms that move objects and need to scan/update the stack.</li>
+
+<li><b>debugger support</b> - LLVM supports generation of <a 
+href="../SourceLevelDebugging.html">DWARF Debug info</a> which is understood by
+common debuggers like GDB.  Adding support for debug info is fairly 
+straightforward.  The best way to understand it is to compile some C/C++ code
+with "<tt>llvm-gcc -g -O0</tt>" and taking a look at what it produces.</li>
+
+<li><b>exception handling support</b> - LLVM supports generation of <a 
+href="../ExceptionHandling.html">zero cost exceptions</a> which interoperate
+with code compiled in other languages.  You could also generate code by
+implicitly making every function return an error value and checking it.  You 
+could also make explicit use of setjmp/longjmp.  There are many different ways
+to go here.</li>
+
+<li><b>object orientation, generics, database access, complex numbers,
+geometric programming, ...</b> - Really, there is
+no end of crazy features that you can add to the language.</li>
+
+<li><b>unusual domains</b> - We've been talking about applying LLVM to a domain
+that many people are interested in: building a compiler for a specific language.
+However, there are many other domains that can use compiler technology that are
+not typically considered.  For example, LLVM has been used to implement OpenGL
+graphics acceleration, translate C++ code to ActionScript, and many other
+cute and clever things.  Maybe you will be the first to JIT compile a regular
+expression interpreter into native code with LLVM?</li>
+
+</ul>
+
+<p>
+Have fun - try doing something crazy and unusual.  Building a language like
+everyone else always has, is much less fun than trying something a little crazy
+or off the wall and seeing how it turns out.  If you get stuck or want to talk
+about it, feel free to email the <a 
+href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev mailing 
+list</a>: it has lots of people who are interested in languages and are often
+willing to help out.
+</p>
+
+<p>Before we end this tutorial, I want to talk about some "tips and tricks" for generating
+LLVM IR.  These are some of the more subtle things that may not be obvious, but
+are very useful if you want to take advantage of LLVM's capabilities.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="llvmirproperties">Properties of the LLVM 
+IR</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>We have a couple common questions about code in the LLVM IR form - lets just
+get these out of the way right now, shall we?</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="targetindep">Target 
+Independence</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>Kaleidoscope is an example of a "portable language": any program written in
+Kaleidoscope will work the same way on any target that it runs on.  Many other
+languages have this property, e.g. lisp, java, haskell, javascript, python, etc
+(note that while these languages are portable, not all their libraries are).</p>
+
+<p>One nice aspect of LLVM is that it is often capable of preserving target
+independence in the IR: you can take the LLVM IR for a Kaleidoscope-compiled 
+program and run it on any target that LLVM supports, even emitting C code and
+compiling that on targets that LLVM doesn't support natively.  You can trivially
+tell that the Kaleidoscope compiler generates target-independent code because it
+never queries for any target-specific information when generating code.</p>
+
+<p>The fact that LLVM provides a compact, target-independent, representation for
+code gets a lot of people excited.  Unfortunately, these people are usually
+thinking about C or a language from the C family when they are asking questions
+about language portability.  I say "unfortunately", because there is really no
+way to make (fully general) C code portable, other than shipping the source code
+around (and of course, C source code is not actually portable in general
+either - ever port a really old application from 32- to 64-bits?).</p>
+
+<p>The problem with C (again, in its full generality) is that it is heavily
+laden with target specific assumptions.  As one simple example, the preprocessor
+often destructively removes target-independence from the code when it processes
+the input text:</p>
+
+<div class="doc_code">
+<pre>
+#ifdef __i386__
+  int X = 1;
+#else
+  int X = 42;
+#endif
+</pre>
+</div>
+
+<p>While it is possible to engineer more and more complex solutions to problems
+like this, it cannot be solved in full generality in a way that is better than shipping
+the actual source code.</p>
+
+<p>That said, there are interesting subsets of C that can be made portable.  If
+you are willing to fix primitive types to a fixed size (say int = 32-bits, 
+and long = 64-bits), don't care about ABI compatibility with existing binaries,
+and are willing to give up some other minor features, you can have portable
+code.  This can make sense for specialized domains such as an
+in-kernel language.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="safety">Safety Guarantees</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>Many of the languages above are also "safe" languages: it is impossible for
+a program written in Java to corrupt its address space and crash the process
+(assuming the JVM has no bugs).
+Safety is an interesting property that requires a combination of language
+design, runtime support, and often operating system support.</p>
+
+<p>It is certainly possible to implement a safe language in LLVM, but LLVM IR
+does not itself guarantee safety.  The LLVM IR allows unsafe pointer casts,
+use after free bugs, buffer over-runs, and a variety of other problems.  Safety
+needs to be implemented as a layer on top of LLVM and, conveniently, several
+groups have investigated this.  Ask on the <a 
+href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev mailing 
+list</a> if you are interested in more details.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="langspecific">Language-Specific 
+Optimizations</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>One thing about LLVM that turns off many people is that it does not solve all
+the world's problems in one system (sorry 'world hunger', someone else will have
+to solve you some other day).  One specific complaint is that people perceive
+LLVM as being incapable of performing high-level language-specific optimization:
+LLVM "loses too much information".</p>
+
+<p>Unfortunately, this is really not the place to give you a full and unified
+version of "Chris Lattner's theory of compiler design".  Instead, I'll make a
+few observations:</p>
+
+<p>First, you're right that LLVM does lose information.  For example, as of this
+writing, there is no way to distinguish in the LLVM IR whether an SSA-value came
+from a C "int" or a C "long" on an ILP32 machine (other than debug info).  Both
+get compiled down to an 'i32' value and the information about what it came from
+is lost.  The more general issue here, is that the LLVM type system uses
+"structural equivalence" instead of "name equivalence".  Another place this
+surprises people is if you have two types in a high-level language that have the
+same structure (e.g. two different structs that have a single int field): these
+types will compile down into a single LLVM type and it will be impossible to
+tell what it came from.</p>
+
+<p>Second, while LLVM does lose information, LLVM is not a fixed target: we 
+continue to enhance and improve it in many different ways.  In addition to
+adding new features (LLVM did not always support exceptions or debug info), we
+also extend the IR to capture important information for optimization (e.g.
+whether an argument is sign or zero extended, information about pointers
+aliasing, etc).  Many of the enhancements are user-driven: people want LLVM to
+include some specific feature, so they go ahead and extend it.</p>
+
+<p>Third, it is <em>possible and easy</em> to add language-specific
+optimizations, and you have a number of choices in how to do it.  As one trivial
+example, it is easy to add language-specific optimization passes that
+"know" things about code compiled for a language.  In the case of the C family,
+there is an optimization pass that "knows" about the standard C library
+functions.  If you call "exit(0)" in main(), it knows that it is safe to
+optimize that into "return 0;" because C specifies what the 'exit'
+function does.</p>
+
+<p>In addition to simple library knowledge, it is possible to embed a variety of
+other language-specific information into the LLVM IR.  If you have a specific
+need and run into a wall, please bring the topic up on the llvmdev list.  At the
+very worst, you can always treat LLVM as if it were a "dumb code generator" and
+implement the high-level optimizations you desire in your front-end, on the
+language-specific AST.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="tipsandtricks">Tips and Tricks</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>There is a variety of useful tips and tricks that you come to know after
+working on/with LLVM that aren't obvious at first glance.  Instead of letting
+everyone rediscover them, this section talks about some of these issues.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="offsetofsizeof">Implementing portable
+offsetof/sizeof</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>One interesting thing that comes up, if you are trying to keep the code 
+generated by your compiler "target independent", is that you often need to know
+the size of some LLVM type or the offset of some field in an llvm structure.
+For example, you might need to pass the size of a type into a function that
+allocates memory.</p>
+
+<p>Unfortunately, this can vary widely across targets: for example the width of
+a pointer is trivially target-specific.  However, there is a <a 
+href="http://nondot.org/sabre/LLVMNotes/SizeOf-OffsetOf-VariableSizedStructs.txt">clever
+way to use the getelementptr instruction</a> that allows you to compute this
+in a portable way.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="gcstack">Garbage Collected 
+Stack Frames</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>Some languages want to explicitly manage their stack frames, often so that
+they are garbage collected or to allow easy implementation of closures.  There
+are often better ways to implement these features than explicit stack frames,
+but <a 
+href="http://nondot.org/sabre/LLVMNotes/ExplicitlyManagedStackFrames.txt">LLVM
+does support them,</a> if you want.  It requires your front-end to convert the
+code into <a 
+href="http://en.wikipedia.org/wiki/Continuation-passing_style">Continuation
+Passing Style</a> and the use of tail calls (which LLVM also supports).</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/tutorial/Makefile b/final/docs/tutorial/Makefile
new file mode 100644
index 00000000000..9082ad4d857
--- /dev/null
+++ b/final/docs/tutorial/Makefile
@@ -0,0 +1,28 @@
+##===- docs/tutorial/Makefile ------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+include $(LEVEL)/Makefile.common
+
+HTML       := $(wildcard $(PROJ_SRC_DIR)/*.html)
+EXTRA_DIST := $(HTML) index.html
+HTML_DIR   := $(DESTDIR)$(PROJ_docsdir)/html/tutorial
+
+install-local:: $(HTML)
+	$(Echo) Installing HTML Tutorial Documentation
+	$(Verb) $(MKDIR) $(HTML_DIR)
+	$(Verb) $(DataInstall) $(HTML) $(HTML_DIR)
+	$(Verb) $(DataInstall) $(PROJ_SRC_DIR)/index.html $(HTML_DIR)
+
+uninstall-local::
+	$(Echo) Uninstalling Tutorial Documentation
+	$(Verb) $(RM) -rf $(HTML_DIR)
+
+printvars::
+	$(Echo) "HTML           : " '$(HTML)'
diff --git a/final/docs/tutorial/OCamlLangImpl1.html b/final/docs/tutorial/OCamlLangImpl1.html
new file mode 100644
index 00000000000..98c1124cc12
--- /dev/null
+++ b/final/docs/tutorial/OCamlLangImpl1.html
@@ -0,0 +1,365 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Tutorial Introduction and the Lexer</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <meta name="author" content="Erick Tryzelaar">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Tutorial Introduction and the Lexer</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 1
+  <ol>
+    <li><a href="#intro">Tutorial Introduction</a></li>
+    <li><a href="#language">The Basic Language</a></li>
+    <li><a href="#lexer">The Lexer</a></li>
+  </ol>
+</li>
+<li><a href="OCamlLangImpl2.html">Chapter 2</a>: Implementing a Parser and
+AST</li>
+</ul>
+
+<div class="doc_author">
+	<p>
+		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
+		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
+	</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="intro">Tutorial Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to the "Implementing a language with LLVM" tutorial.  This tutorial
+runs through the implementation of a simple language, showing how fun and
+easy it can be.  This tutorial will get you up and started as well as help to
+build a framework you can extend to other languages.  The code in this tutorial
+can also be used as a playground to hack on other LLVM specific things.
+</p>
+
+<p>
+The goal of this tutorial is to progressively unveil our language, describing
+how it is built up over time.  This will let us cover a fairly broad range of
+language design and LLVM-specific usage issues, showing and explaining the code
+for it all along the way, without overwhelming you with tons of details up
+front.</p>
+
+<p>It is useful to point out ahead of time that this tutorial is really about
+teaching compiler techniques and LLVM specifically, <em>not</em> about teaching
+modern and sane software engineering principles.  In practice, this means that
+we'll take a number of shortcuts to simplify the exposition.  For example, the
+code leaks memory, uses global variables all over the place, doesn't use nice
+design patterns like <a
+href="http://en.wikipedia.org/wiki/Visitor_pattern">visitors</a>, etc... but it
+is very simple.  If you dig in and use the code as a basis for future projects,
+fixing these deficiencies shouldn't be hard.</p>
+
+<p>I've tried to put this tutorial together in a way that makes chapters easy to
+skip over if you are already familiar with or are uninterested in the various
+pieces.  The structure of the tutorial is:
+</p>
+
+<ul>
+<li><b><a href="#language">Chapter #1</a>: Introduction to the Kaleidoscope
+language, and the definition of its Lexer</b> - This shows where we are going
+and the basic functionality that we want it to do.  In order to make this
+tutorial maximally understandable and hackable, we choose to implement
+everything in Objective Caml instead of using lexer and parser generators.
+LLVM obviously works just fine with such tools, feel free to use one if you
+prefer.</li>
+<li><b><a href="OCamlLangImpl2.html">Chapter #2</a>: Implementing a Parser and
+AST</b> - With the lexer in place, we can talk about parsing techniques and
+basic AST construction.  This tutorial describes recursive descent parsing and
+operator precedence parsing.  Nothing in Chapters 1 or 2 is LLVM-specific,
+the code doesn't even link in LLVM at this point. :)</li>
+<li><b><a href="OCamlLangImpl3.html">Chapter #3</a>: Code generation to LLVM
+IR</b> - With the AST ready, we can show off how easy generation of LLVM IR
+really is.</li>
+<li><b><a href="OCamlLangImpl4.html">Chapter #4</a>: Adding JIT and Optimizer
+Support</b> - Because a lot of people are interested in using LLVM as a JIT,
+we'll dive right into it and show you the 3 lines it takes to add JIT support.
+LLVM is also useful in many other ways, but this is one simple and "sexy" way
+to shows off its power. :)</li>
+<li><b><a href="OCamlLangImpl5.html">Chapter #5</a>: Extending the Language:
+Control Flow</b> - With the language up and running, we show how to extend it
+with control flow operations (if/then/else and a 'for' loop).  This gives us a
+chance to talk about simple SSA construction and control flow.</li>
+<li><b><a href="OCamlLangImpl6.html">Chapter #6</a>: Extending the Language:
+User-defined Operators</b> - This is a silly but fun chapter that talks about
+extending the language to let the user program define their own arbitrary
+unary and binary operators (with assignable precedence!).  This lets us build a
+significant piece of the "language" as library routines.</li>
+<li><b><a href="OCamlLangImpl7.html">Chapter #7</a>: Extending the Language:
+Mutable Variables</b> - This chapter talks about adding user-defined local
+variables along with an assignment operator.  The interesting part about this
+is how easy and trivial it is to construct SSA form in LLVM: no, LLVM does
+<em>not</em> require your front-end to construct SSA form!</li>
+<li><b><a href="OCamlLangImpl8.html">Chapter #8</a>: Conclusion and other
+useful LLVM tidbits</b> - This chapter wraps up the series by talking about
+potential ways to extend the language, but also includes a bunch of pointers to
+info about "special topics" like adding garbage collection support, exceptions,
+debugging, support for "spaghetti stacks", and a bunch of other tips and
+tricks.</li>
+
+</ul>
+
+<p>By the end of the tutorial, we'll have written a bit less than 700 lines of
+non-comment, non-blank, lines of code.  With this small amount of code, we'll
+have built up a very reasonable compiler for a non-trivial language including
+a hand-written lexer, parser, AST, as well as code generation support with a JIT
+compiler.  While other systems may have interesting "hello world" tutorials,
+I think the breadth of this tutorial is a great testament to the strengths of
+LLVM and why you should consider it if you're interested in language or compiler
+design.</p>
+
+<p>A note about this tutorial: we expect you to extend the language and play
+with it on your own.  Take the code and go crazy hacking away at it, compilers
+don't need to be scary creatures - it can be a lot of fun to play with
+languages!</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="language">The Basic Language</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This tutorial will be illustrated with a toy language that we'll call
+"<a href="http://en.wikipedia.org/wiki/Kaleidoscope">Kaleidoscope</a>" (derived
+from "meaning beautiful, form, and view").
+Kaleidoscope is a procedural language that allows you to define functions, use
+conditionals, math, etc.  Over the course of the tutorial, we'll extend
+Kaleidoscope to support the if/then/else construct, a for loop, user defined
+operators, JIT compilation with a simple command line interface, etc.</p>
+
+<p>Because we want to keep things simple, the only datatype in Kaleidoscope is a
+64-bit floating point type (aka 'float' in O'Caml parlance).  As such, all
+values are implicitly double precision and the language doesn't require type
+declarations.  This gives the language a very nice and simple syntax.  For
+example, the following simple example computes <a
+href="http://en.wikipedia.org/wiki/Fibonacci_number">Fibonacci numbers:</a></p>
+
+<div class="doc_code">
+<pre>
+# Compute the x'th fibonacci number.
+def fib(x)
+  if x &lt; 3 then
+    1
+  else
+    fib(x-1)+fib(x-2)
+
+# This expression will compute the 40th number.
+fib(40)
+</pre>
+</div>
+
+<p>We also allow Kaleidoscope to call into standard library functions (the LLVM
+JIT makes this completely trivial).  This means that you can use the 'extern'
+keyword to define a function before you use it (this is also useful for mutually
+recursive functions).  For example:</p>
+
+<div class="doc_code">
+<pre>
+extern sin(arg);
+extern cos(arg);
+extern atan2(arg1 arg2);
+
+atan2(sin(.4), cos(42))
+</pre>
+</div>
+
+<p>A more interesting example is included in Chapter 6 where we write a little
+Kaleidoscope application that <a href="OCamlLangImpl6.html#example">displays
+a Mandelbrot Set</a> at various levels of magnification.</p>
+
+<p>Lets dive into the implementation of this language!</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="lexer">The Lexer</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>When it comes to implementing a language, the first thing needed is
+the ability to process a text file and recognize what it says.  The traditional
+way to do this is to use a "<a
+href="http://en.wikipedia.org/wiki/Lexical_analysis">lexer</a>" (aka 'scanner')
+to break the input up into "tokens".  Each token returned by the lexer includes
+a token code and potentially some metadata (e.g. the numeric value of a number).
+First, we define the possibilities:
+</p>
+
+<div class="doc_code">
+<pre>
+(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+ * these others for known things. *)
+type token =
+  (* commands *)
+  | Def | Extern
+
+  (* primary *)
+  | Ident of string | Number of float
+
+  (* unknown *)
+  | Kwd of char
+</pre>
+</div>
+
+<p>Each token returned by our lexer will be one of the token variant values.
+An unknown character like '+' will be returned as <tt>Token.Kwd '+'</tt>.  If
+the curr token is an identifier, the value will be <tt>Token.Ident s</tt>.  If
+the current token is a numeric literal (like 1.0), the value will be
+<tt>Token.Number 1.0</tt>.
+</p>
+
+<p>The actual implementation of the lexer is a collection of functions driven
+by a function named <tt>Lexer.lex</tt>.  The <tt>Lexer.lex</tt> function is
+called to return the next token from standard input.  We will use
+<a href="http://caml.inria.fr/pub/docs/manual-camlp4/index.html">Camlp4</a>
+to simplify the tokenization of the standard input.  Its definition starts
+as:</p>
+
+<div class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Lexer
+ *===----------------------------------------------------------------------===*)
+
+let rec lex = parser
+  (* Skip any whitespace. *)
+  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
+</pre>
+</div>
+
+<p>
+<tt>Lexer.lex</tt> works by recursing over a <tt>char Stream.t</tt> to read
+characters one at a time from the standard input.  It eats them as it recognizes
+them and stores them in in a <tt>Token.token</tt> variant.  The first thing that
+it has to do is ignore whitespace between tokens.  This is accomplished with the
+recursive call above.</p>
+
+<p>The next thing <tt>Lexer.lex</tt> needs to do is recognize identifiers and
+specific keywords like "def".  Kaleidoscope does this with a pattern match
+and a helper function.<p>
+
+<div class="doc_code">
+<pre>
+  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+
+...
+
+and lex_ident buffer = parser
+  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+  | [&lt; stream=lex &gt;] -&gt;
+      match Buffer.contents buffer with
+      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
+      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
+      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
+</pre>
+</div>
+
+<p>Numeric values are similar:</p>
+
+<div class="doc_code">
+<pre>
+  (* number: [0-9.]+ *)
+  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+
+...
+
+and lex_number buffer = parser
+  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+  | [&lt; stream=lex &gt;] -&gt;
+      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
+</pre>
+</div>
+
+<p>This is all pretty straight-forward code for processing input.  When reading
+a numeric value from input, we use the ocaml <tt>float_of_string</tt> function
+to convert it to a numeric value that we store in <tt>Token.Number</tt>.  Note
+that this isn't doing sufficient error checking: it will raise <tt>Failure</tt>
+if the string "1.23.45.67".  Feel free to extend it :).  Next we handle
+comments:
+</p>
+
+<div class="doc_code">
+<pre>
+  (* Comment until end of line. *)
+  | [&lt; ' ('#'); stream &gt;] -&gt;
+      lex_comment stream
+
+...
+
+and lex_comment = parser
+  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
+  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
+  | [&lt; &gt;] -&gt; [&lt; &gt;]
+</pre>
+</div>
+
+<p>We handle comments by skipping to the end of the line and then return the
+next token.  Finally, if the input doesn't match one of the above cases, it is
+either an operator character like '+' or the end of the file.  These are handled
+with this code:</p>
+
+<div class="doc_code">
+<pre>
+  (* Otherwise, just return the character as its ascii value. *)
+  | [&lt; 'c; stream &gt;] -&gt;
+      [&lt; 'Token.Kwd c; lex stream &gt;]
+
+  (* end of stream. *)
+  | [&lt; &gt;] -&gt; [&lt; &gt;]
+</pre>
+</div>
+
+<p>With this, we have the complete lexer for the basic Kaleidoscope language
+(the <a href="OCamlLangImpl2.html#code">full code listing</a> for the Lexer is
+available in the <a href="OCamlLangImpl2.html">next chapter</a> of the
+tutorial).  Next we'll <a href="OCamlLangImpl2.html">build a simple parser that
+uses this to build an Abstract Syntax Tree</a>.  When we have that, we'll
+include a driver so that you can use the lexer and parser together.
+</p>
+
+<a href="OCamlLangImpl2.html">Next: Implementing a Parser and AST</a>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/tutorial/OCamlLangImpl2.html b/final/docs/tutorial/OCamlLangImpl2.html
new file mode 100644
index 00000000000..666510979fe
--- /dev/null
+++ b/final/docs/tutorial/OCamlLangImpl2.html
@@ -0,0 +1,1045 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Implementing a Parser and AST</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <meta name="author" content="Erick Tryzelaar">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Implementing a Parser and AST</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 2
+  <ol>
+    <li><a href="#intro">Chapter 2 Introduction</a></li>
+    <li><a href="#ast">The Abstract Syntax Tree (AST)</a></li>
+    <li><a href="#parserbasics">Parser Basics</a></li>
+    <li><a href="#parserprimexprs">Basic Expression Parsing</a></li>
+    <li><a href="#parserbinops">Binary Expression Parsing</a></li>
+    <li><a href="#parsertop">Parsing the Rest</a></li>
+    <li><a href="#driver">The Driver</a></li>
+    <li><a href="#conclusions">Conclusions</a></li>
+    <li><a href="#code">Full Code Listing</a></li>
+  </ol>
+</li>
+<li><a href="OCamlLangImpl3.html">Chapter 3</a>: Code generation to LLVM IR</li>
+</ul>
+
+<div class="doc_author">
+	<p>
+		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
+		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
+	</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="intro">Chapter 2 Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to Chapter 2 of the "<a href="index.html">Implementing a language
+with LLVM in Objective Caml</a>" tutorial.  This chapter shows you how to use
+the lexer, built in <a href="OCamlLangImpl1.html">Chapter 1</a>, to build a
+full <a href="http://en.wikipedia.org/wiki/Parsing">parser</a> for our
+Kaleidoscope language.  Once we have a parser, we'll define and build an <a
+href="http://en.wikipedia.org/wiki/Abstract_syntax_tree">Abstract Syntax
+Tree</a> (AST).</p>
+
+<p>The parser we will build uses a combination of <a
+href="http://en.wikipedia.org/wiki/Recursive_descent_parser">Recursive Descent
+Parsing</a> and <a href=
+"http://en.wikipedia.org/wiki/Operator-precedence_parser">Operator-Precedence
+Parsing</a> to parse the Kaleidoscope language (the latter for
+binary expressions and the former for everything else).  Before we get to
+parsing though, lets talk about the output of the parser: the Abstract Syntax
+Tree.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="ast">The Abstract Syntax Tree (AST)</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The AST for a program captures its behavior in such a way that it is easy for
+later stages of the compiler (e.g. code generation) to interpret.  We basically
+want one object for each construct in the language, and the AST should closely
+model the language.  In Kaleidoscope, we have expressions, a prototype, and a
+function object.  We'll start with expressions first:</p>
+
+<div class="doc_code">
+<pre>
+(* expr - Base type for all expression nodes. *)
+type expr =
+  (* variant for numeric literals like "1.0". *)
+  | Number of float
+</pre>
+</div>
+
+<p>The code above shows the definition of the base ExprAST class and one
+subclass which we use for numeric literals.  The important thing to note about
+this code is that the Number variant captures the numeric value of the
+literal as an instance variable. This allows later phases of the compiler to
+know what the stored numeric value is.</p>
+
+<p>Right now we only create the AST,  so there are no useful functions on
+them.  It would be very easy to add a function to pretty print the code,
+for example.  Here are the other expression AST node definitions that we'll use
+in the basic form of the Kaleidoscope language:
+</p>
+
+<div class="doc_code">
+<pre>
+  (* variant for referencing a variable, like "a". *)
+  | Variable of string
+
+  (* variant for a binary operator. *)
+  | Binary of char * expr * expr
+
+  (* variant for function calls. *)
+  | Call of string * expr array
+</pre>
+</div>
+
+<p>This is all (intentionally) rather straight-forward: variables capture the
+variable name, binary operators capture their opcode (e.g. '+'), and calls
+capture a function name as well as a list of any argument expressions.  One thing
+that is nice about our AST is that it captures the language features without
+talking about the syntax of the language.  Note that there is no discussion about
+precedence of binary operators, lexical structure, etc.</p>
+
+<p>For our basic language, these are all of the expression nodes we'll define.
+Because it doesn't have conditional control flow, it isn't Turing-complete;
+we'll fix that in a later installment.  The two things we need next are a way
+to talk about the interface to a function, and a way to talk about functions
+themselves:</p>
+
+<div class="doc_code">
+<pre>
+(* proto - This type represents the "prototype" for a function, which captures
+ * its name, and its argument names (thus implicitly the number of arguments the
+ * function takes). *)
+type proto = Prototype of string * string array
+
+(* func - This type represents a function definition itself. *)
+type func = Function of proto * expr
+</pre>
+</div>
+
+<p>In Kaleidoscope, functions are typed with just a count of their arguments.
+Since all values are double precision floating point, the type of each argument
+doesn't need to be stored anywhere.  In a more aggressive and realistic
+language, the "expr" variants would probably have a type field.</p>
+
+<p>With this scaffolding, we can now talk about parsing expressions and function
+bodies in Kaleidoscope.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="parserbasics">Parser Basics</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Now that we have an AST to build, we need to define the parser code to build
+it.  The idea here is that we want to parse something like "x+y" (which is
+returned as three tokens by the lexer) into an AST that could be generated with
+calls like this:</p>
+
+<div class="doc_code">
+<pre>
+  let x = Variable "x" in
+  let y = Variable "y" in
+  let result = Binary ('+', x, y) in
+  ...
+</pre>
+</div>
+
+<p>
+The error handling routines make use of the builtin <tt>Stream.Failure</tt> and
+<tt>Stream.Error</tt>s.  <tt>Stream.Failure</tt> is raised when the parser is
+unable to find any matching token in the first position of a pattern.
+<tt>Stream.Error</tt> is raised when the first token matches, but the rest do
+not.  The error recovery in our parser will not be the best and is not
+particular user-friendly, but it will be enough for our tutorial.  These
+exceptions make it easier to handle errors in routines that have various return
+types.</p>
+
+<p>With these basic types and exceptions, we can implement the first
+piece of our grammar: numeric literals.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="parserprimexprs">Basic Expression
+ Parsing</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>We start with numeric literals, because they are the simplest to process.
+For each production in our grammar, we'll define a function which parses that
+production.  We call this class of expressions "primary" expressions, for
+reasons that will become more clear <a href="OCamlLangImpl6.html#unary">
+later in the tutorial</a>.  In order to parse an arbitrary primary expression,
+we need to determine what sort of expression it is.  For numeric literals, we
+have:</p>
+
+<div class="doc_code">
+<pre>
+(* primary
+ *   ::= identifier
+ *   ::= numberexpr
+ *   ::= parenexpr *)
+parse_primary = parser
+  (* numberexpr ::= number *)
+  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
+</pre>
+</div>
+
+<p>This routine is very simple: it expects to be called when the current token
+is a <tt>Token.Number</tt> token.  It takes the current number value, creates
+a <tt>Ast.Number</tt> node, advances the lexer to the next token, and finally
+returns.</p>
+
+<p>There are some interesting aspects to this.  The most important one is that
+this routine eats all of the tokens that correspond to the production and
+returns the lexer buffer with the next token (which is not part of the grammar
+production) ready to go.  This is a fairly standard way to go for recursive
+descent parsers.  For a better example, the parenthesis operator is defined like
+this:</p>
+
+<div class="doc_code">
+<pre>
+  (* parenexpr ::= '(' expression ')' *)
+  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
+</pre>
+</div>
+
+<p>This function illustrates a number of interesting things about the
+parser:</p>
+
+<p>
+1) It shows how we use the <tt>Stream.Error</tt> exception.  When called, this
+function expects that the current token is a '(' token, but after parsing the
+subexpression, it is possible that there is no ')' waiting.  For example, if
+the user types in "(4 x" instead of "(4)", the parser should emit an error.
+Because errors can occur, the parser needs a way to indicate that they
+happened. In our parser, we use the camlp4 shortcut syntax <tt>token ?? "parse
+error"</tt>, where if the token before the <tt>??</tt> does not match, then
+<tt>Stream.Error "parse error"</tt> will be raised.</p>
+
+<p>2) Another interesting aspect of this function is that it uses recursion by
+calling <tt>Parser.parse_primary</tt> (we will soon see that
+<tt>Parser.parse_primary</tt> can call <tt>Parser.parse_primary</tt>).  This is
+powerful because it allows us to handle recursive grammars, and keeps each
+production very simple.  Note that parentheses do not cause construction of AST
+nodes themselves.  While we could do it this way, the most important role of
+parentheses are to guide the parser and provide grouping.  Once the parser
+constructs the AST, parentheses are not needed.</p>
+
+<p>The next simple production is for handling variable references and function
+calls:</p>
+
+<div class="doc_code">
+<pre>
+  (* identifierexpr
+   *   ::= identifier
+   *   ::= identifier '(' argumentexpr ')' *)
+  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
+      let rec parse_args accumulator = parser
+        | [&lt; e=parse_expr; stream &gt;] -&gt;
+            begin parser
+              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
+              | [&lt; &gt;] -&gt; e :: accumulator
+            end stream
+        | [&lt; &gt;] -&gt; accumulator
+      in
+      let rec parse_ident id = parser
+        (* Call. *)
+        | [&lt; 'Token.Kwd '(';
+             args=parse_args [];
+             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
+            Ast.Call (id, Array.of_list (List.rev args))
+
+        (* Simple variable ref. *)
+        | [&lt; &gt;] -&gt; Ast.Variable id
+      in
+      parse_ident id stream
+</pre>
+</div>
+
+<p>This routine follows the same style as the other routines.  (It expects to be
+called if the current token is a <tt>Token.Ident</tt> token).  It also has
+recursion and error handling.  One interesting aspect of this is that it uses
+<em>look-ahead</em> to determine if the current identifier is a stand alone
+variable reference or if it is a function call expression.  It handles this by
+checking to see if the token after the identifier is a '(' token, constructing
+either a <tt>Ast.Variable</tt> or <tt>Ast.Call</tt> node as appropriate.
+</p>
+
+<p>We finish up by raising an exception if we received a token we didn't
+expect:</p>
+
+<div class="doc_code">
+<pre>
+  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
+</pre>
+</div>
+
+<p>Now that basic expressions are handled, we need to handle binary expressions.
+They are a bit more complex.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="parserbinops">Binary Expression
+ Parsing</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Binary expressions are significantly harder to parse because they are often
+ambiguous.  For example, when given the string "x+y*z", the parser can choose
+to parse it as either "(x+y)*z" or "x+(y*z)".  With common definitions from
+mathematics, we expect the later parse, because "*" (multiplication) has
+higher <em>precedence</em> than "+" (addition).</p>
+
+<p>There are many ways to handle this, but an elegant and efficient way is to
+use <a href=
+"http://en.wikipedia.org/wiki/Operator-precedence_parser">Operator-Precedence
+Parsing</a>.  This parsing technique uses the precedence of binary operators to
+guide recursion.  To start with, we need a table of precedences:</p>
+
+<div class="doc_code">
+<pre>
+(* binop_precedence - This holds the precedence for each binary operator that is
+ * defined *)
+let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+(* precedence - Get the precedence of the pending binary operator token. *)
+let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
+
+...
+
+let main () =
+  (* Install standard binary operators.
+   * 1 is the lowest precedence. *)
+  Hashtbl.add Parser.binop_precedence '&lt;' 10;
+  Hashtbl.add Parser.binop_precedence '+' 20;
+  Hashtbl.add Parser.binop_precedence '-' 20;
+  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+  ...
+</pre>
+</div>
+
+<p>For the basic form of Kaleidoscope, we will only support 4 binary operators
+(this can obviously be extended by you, our brave and intrepid reader).  The
+<tt>Parser.precedence</tt> function returns the precedence for the current
+token, or -1 if the token is not a binary operator.  Having a <tt>Hashtbl.t</tt>
+makes it easy to add new operators and makes it clear that the algorithm doesn't
+depend on the specific operators involved, but it would be easy enough to
+eliminate the <tt>Hashtbl.t</tt> and do the comparisons in the
+<tt>Parser.precedence</tt> function.  (Or just use a fixed-size array).</p>
+
+<p>With the helper above defined, we can now start parsing binary expressions.
+The basic idea of operator precedence parsing is to break down an expression
+with potentially ambiguous binary operators into pieces.  Consider ,for example,
+the expression "a+b+(c+d)*e*f+g".  Operator precedence parsing considers this
+as a stream of primary expressions separated by binary operators.  As such,
+it will first parse the leading primary expression "a", then it will see the
+pairs [+, b] [+, (c+d)] [*, e] [*, f] and [+, g].  Note that because parentheses
+are primary expressions, the binary expression parser doesn't need to worry
+about nested subexpressions like (c+d) at all.
+</p>
+
+<p>
+To start, an expression is a primary expression potentially followed by a
+sequence of [binop,primaryexpr] pairs:</p>
+
+<div class="doc_code">
+<pre>
+(* expression
+ *   ::= primary binoprhs *)
+and parse_expr = parser
+  | [&lt; lhs=parse_primary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
+</pre>
+</div>
+
+<p><tt>Parser.parse_bin_rhs</tt> is the function that parses the sequence of
+pairs for us.  It takes a precedence and a pointer to an expression for the part
+that has been parsed so far.   Note that "x" is a perfectly valid expression: As
+such, "binoprhs" is allowed to be empty, in which case it returns the expression
+that is passed into it. In our example above, the code passes the expression for
+"a" into <tt>Parser.parse_bin_rhs</tt> and the current token is "+".</p>
+
+<p>The precedence value passed into <tt>Parser.parse_bin_rhs</tt> indicates the
+<em>minimal operator precedence</em> that the function is allowed to eat.  For
+example, if the current pair stream is [+, x] and <tt>Parser.parse_bin_rhs</tt>
+is passed in a precedence of 40, it will not consume any tokens (because the
+precedence of '+' is only 20).  With this in mind, <tt>Parser.parse_bin_rhs</tt>
+starts with:</p>
+
+<div class="doc_code">
+<pre>
+(* binoprhs
+ *   ::= ('+' primary)* *)
+and parse_bin_rhs expr_prec lhs stream =
+  match Stream.peek stream with
+  (* If this is a binop, find its precedence. *)
+  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
+      let token_prec = precedence c in
+
+      (* If this is a binop that binds at least as tightly as the current binop,
+       * consume it, otherwise we are done. *)
+      if token_prec &lt; expr_prec then lhs else begin
+</pre>
+</div>
+
+<p>This code gets the precedence of the current token and checks to see if if is
+too low.  Because we defined invalid tokens to have a precedence of -1, this
+check implicitly knows that the pair-stream ends when the token stream runs out
+of binary operators.  If this check succeeds, we know that the token is a binary
+operator and that it will be included in this expression:</p>
+
+<div class="doc_code">
+<pre>
+        (* Eat the binop. *)
+        Stream.junk stream;
+
+        (* Okay, we know this is a binop. *)
+        let rhs =
+          match Stream.peek stream with
+          | Some (Token.Kwd c2) -&gt;
+</pre>
+</div>
+
+<p>As such, this code eats (and remembers) the binary operator and then parses
+the primary expression that follows.  This builds up the whole pair, the first of
+which is [+, b] for the running example.</p>
+
+<p>Now that we parsed the left-hand side of an expression and one pair of the
+RHS sequence, we have to decide which way the expression associates.  In
+particular, we could have "(a+b) binop unparsed"  or "a + (b binop unparsed)".
+To determine this, we look ahead at "binop" to determine its precedence and
+compare it to BinOp's precedence (which is '+' in this case):</p>
+
+<div class="doc_code">
+<pre>
+              (* If BinOp binds less tightly with rhs than the operator after
+               * rhs, let the pending operator take rhs as its lhs. *)
+              let next_prec = precedence c2 in
+              if token_prec &lt; next_prec
+</pre>
+</div>
+
+<p>If the precedence of the binop to the right of "RHS" is lower or equal to the
+precedence of our current operator, then we know that the parentheses associate
+as "(a+b) binop ...".  In our example, the current operator is "+" and the next
+operator is "+", we know that they have the same precedence.  In this case we'll
+create the AST node for "a+b", and then continue parsing:</p>
+
+<div class="doc_code">
+<pre>
+          ... if body omitted ...
+        in
+
+        (* Merge lhs/rhs. *)
+        let lhs = Ast.Binary (c, lhs, rhs) in
+        parse_bin_rhs expr_prec lhs stream
+      end
+</pre>
+</div>
+
+<p>In our example above, this will turn "a+b+" into "(a+b)" and execute the next
+iteration of the loop, with "+" as the current token.  The code above will eat,
+remember, and parse "(c+d)" as the primary expression, which makes the
+current pair equal to [+, (c+d)].  It will then evaluate the 'if' conditional above with
+"*" as the binop to the right of the primary.  In this case, the precedence of "*" is
+higher than the precedence of "+" so the if condition will be entered.</p>
+
+<p>The critical question left here is "how can the if condition parse the right
+hand side in full"?  In particular, to build the AST correctly for our example,
+it needs to get all of "(c+d)*e*f" as the RHS expression variable.  The code to
+do this is surprisingly simple (code from the above two blocks duplicated for
+context):</p>
+
+<div class="doc_code">
+<pre>
+          match Stream.peek stream with
+          | Some (Token.Kwd c2) -&gt;
+              (* If BinOp binds less tightly with rhs than the operator after
+               * rhs, let the pending operator take rhs as its lhs. *)
+              if token_prec &lt; precedence c2
+              then <b>parse_bin_rhs (token_prec + 1) rhs stream</b>
+              else rhs
+          | _ -&gt; rhs
+        in
+
+        (* Merge lhs/rhs. *)
+        let lhs = Ast.Binary (c, lhs, rhs) in
+        parse_bin_rhs expr_prec lhs stream
+      end
+</pre>
+</div>
+
+<p>At this point, we know that the binary operator to the RHS of our primary
+has higher precedence than the binop we are currently parsing.  As such, we know
+that any sequence of pairs whose operators are all higher precedence than "+"
+should be parsed together and returned as "RHS".  To do this, we recursively
+invoke the <tt>Parser.parse_bin_rhs</tt> function specifying "token_prec+1" as
+the minimum precedence required for it to continue.  In our example above, this
+will cause it to return the AST node for "(c+d)*e*f" as RHS, which is then set
+as the RHS of the '+' expression.</p>
+
+<p>Finally, on the next iteration of the while loop, the "+g" piece is parsed
+and added to the AST.  With this little bit of code (14 non-trivial lines), we
+correctly handle fully general binary expression parsing in a very elegant way.
+This was a whirlwind tour of this code, and it is somewhat subtle.  I recommend
+running through it with a few tough examples to see how it works.
+</p>
+
+<p>This wraps up handling of expressions.  At this point, we can point the
+parser at an arbitrary token stream and build an expression from it, stopping
+at the first token that is not part of the expression.  Next up we need to
+handle function definitions, etc.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="parsertop">Parsing the Rest</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+The next thing missing is handling of function prototypes.  In Kaleidoscope,
+these are used both for 'extern' function declarations as well as function body
+definitions.  The code to do this is straight-forward and not very interesting
+(once you've survived expressions):
+</p>
+
+<div class="doc_code">
+<pre>
+(* prototype
+ *   ::= id '(' id* ')' *)
+let parse_prototype =
+  let rec parse_args accumulator = parser
+    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
+    | [&lt; &gt;] -&gt; accumulator
+  in
+
+  parser
+  | [&lt; 'Token.Ident id;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+       args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
+      (* success. *)
+      Ast.Prototype (id, Array.of_list (List.rev args))
+
+  | [&lt; &gt;] -&gt;
+      raise (Stream.Error "expected function name in prototype")
+</pre>
+</div>
+
+<p>Given this, a function definition is very simple, just a prototype plus
+an expression to implement the body:</p>
+
+<div class="doc_code">
+<pre>
+(* definition ::= 'def' prototype expression *)
+let parse_definition = parser
+  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
+      Ast.Function (p, e)
+</pre>
+</div>
+
+<p>In addition, we support 'extern' to declare functions like 'sin' and 'cos' as
+well as to support forward declaration of user functions.  These 'extern's are just
+prototypes with no body:</p>
+
+<div class="doc_code">
+<pre>
+(*  external ::= 'extern' prototype *)
+let parse_extern = parser
+  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
+</pre>
+</div>
+
+<p>Finally, we'll also let the user type in arbitrary top-level expressions and
+evaluate them on the fly.  We will handle this by defining anonymous nullary
+(zero argument) functions for them:</p>
+
+<div class="doc_code">
+<pre>
+(* toplevelexpr ::= expression *)
+let parse_toplevel = parser
+  | [&lt; e=parse_expr &gt;] -&gt;
+      (* Make an anonymous proto. *)
+      Ast.Function (Ast.Prototype ("", [||]), e)
+</pre>
+</div>
+
+<p>Now that we have all the pieces, let's build a little driver that will let us
+actually <em>execute</em> this code we've built!</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="driver">The Driver</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The driver for this simply invokes all of the parsing pieces with a top-level
+dispatch loop.  There isn't much interesting here, so I'll just include the
+top-level loop.  See <a href="#code">below</a> for full code in the "Top-Level
+Parsing" section.</p>
+
+<div class="doc_code">
+<pre>
+(* top ::= definition | external | expression | ';' *)
+let rec main_loop stream =
+  match Stream.peek stream with
+  | None -&gt; ()
+
+  (* ignore top-level semicolons. *)
+  | Some (Token.Kwd ';') -&gt;
+      Stream.junk stream;
+      main_loop stream
+
+  | Some token -&gt;
+      begin
+        try match token with
+        | Token.Def -&gt;
+            ignore(Parser.parse_definition stream);
+            print_endline "parsed a function definition.";
+        | Token.Extern -&gt;
+            ignore(Parser.parse_extern stream);
+            print_endline "parsed an extern.";
+        | _ -&gt;
+            (* Evaluate a top-level expression into an anonymous function. *)
+            ignore(Parser.parse_toplevel stream);
+            print_endline "parsed a top-level expr";
+        with Stream.Error s -&gt;
+          (* Skip token for error recovery. *)
+          Stream.junk stream;
+          print_endline s;
+      end;
+      print_string "ready&gt; "; flush stdout;
+      main_loop stream
+</pre>
+</div>
+
+<p>The most interesting part of this is that we ignore top-level semicolons.
+Why is this, you ask?  The basic reason is that if you type "4 + 5" at the
+command line, the parser doesn't know whether that is the end of what you will type
+or not.  For example, on the next line you could type "def foo..." in which case
+4+5 is the end of a top-level expression.  Alternatively you could type "* 6",
+which would continue the expression.  Having top-level semicolons allows you to
+type "4+5;", and the parser will know you are done.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="conclusions">Conclusions</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>With just under 300 lines of commented code (240 lines of non-comment,
+non-blank code), we fully defined our minimal language, including a lexer,
+parser, and AST builder.  With this done, the executable will validate
+Kaleidoscope code and tell us if it is grammatically invalid.  For
+example, here is a sample interaction:</p>
+
+<div class="doc_code">
+<pre>
+$ <b>./toy.byte</b>
+ready&gt; <b>def foo(x y) x+foo(y, 4.0);</b>
+Parsed a function definition.
+ready&gt; <b>def foo(x y) x+y y;</b>
+Parsed a function definition.
+Parsed a top-level expr
+ready&gt; <b>def foo(x y) x+y );</b>
+Parsed a function definition.
+Error: unknown token when expecting an expression
+ready&gt; <b>extern sin(a);</b>
+ready&gt; Parsed an extern
+ready&gt; <b>^D</b>
+$
+</pre>
+</div>
+
+<p>There is a lot of room for extension here.  You can define new AST nodes,
+extend the language in many ways, etc.  In the <a href="OCamlLangImpl3.html">
+next installment</a>, we will describe how to generate LLVM Intermediate
+Representation (IR) from the AST.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Here is the complete code listing for this and the previous chapter.
+Note that it is fully self-contained: you don't need LLVM or any external
+libraries at all for this.  (Besides the ocaml standard libraries, of
+course.)  To build this, just compile with:</p>
+
+<div class="doc_code">
+<pre>
+# Compile
+ocamlbuild toy.byte
+# Run
+./toy.byte
+</pre>
+</div>
+
+<p>Here is the code:</p>
+
+<dl>
+<dt>_tags:</dt>
+<dd class="doc_code">
+<pre>
+&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
+</pre>
+</dd>
+
+<dt>token.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Lexer Tokens
+ *===----------------------------------------------------------------------===*)
+
+(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+ * these others for known things. *)
+type token =
+  (* commands *)
+  | Def | Extern
+
+  (* primary *)
+  | Ident of string | Number of float
+
+  (* unknown *)
+  | Kwd of char
+</pre>
+</dd>
+
+<dt>lexer.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Lexer
+ *===----------------------------------------------------------------------===*)
+
+let rec lex = parser
+  (* Skip any whitespace. *)
+  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
+
+  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+
+  (* number: [0-9.]+ *)
+  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+
+  (* Comment until end of line. *)
+  | [&lt; ' ('#'); stream &gt;] -&gt;
+      lex_comment stream
+
+  (* Otherwise, just return the character as its ascii value. *)
+  | [&lt; 'c; stream &gt;] -&gt;
+      [&lt; 'Token.Kwd c; lex stream &gt;]
+
+  (* end of stream. *)
+  | [&lt; &gt;] -&gt; [&lt; &gt;]
+
+and lex_number buffer = parser
+  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+  | [&lt; stream=lex &gt;] -&gt;
+      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
+
+and lex_ident buffer = parser
+  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+  | [&lt; stream=lex &gt;] -&gt;
+      match Buffer.contents buffer with
+      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
+      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
+      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
+
+and lex_comment = parser
+  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
+  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
+  | [&lt; &gt;] -&gt; [&lt; &gt;]
+</pre>
+</dd>
+
+<dt>ast.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Abstract Syntax Tree (aka Parse Tree)
+ *===----------------------------------------------------------------------===*)
+
+(* expr - Base type for all expression nodes. *)
+type expr =
+  (* variant for numeric literals like "1.0". *)
+  | Number of float
+
+  (* variant for referencing a variable, like "a". *)
+  | Variable of string
+
+  (* variant for a binary operator. *)
+  | Binary of char * expr * expr
+
+  (* variant for function calls. *)
+  | Call of string * expr array
+
+(* proto - This type represents the "prototype" for a function, which captures
+ * its name, and its argument names (thus implicitly the number of arguments the
+ * function takes). *)
+type proto = Prototype of string * string array
+
+(* func - This type represents a function definition itself. *)
+type func = Function of proto * expr
+</pre>
+</dd>
+
+<dt>parser.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===---------------------------------------------------------------------===
+ * Parser
+ *===---------------------------------------------------------------------===*)
+
+(* binop_precedence - This holds the precedence for each binary operator that is
+ * defined *)
+let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+(* precedence - Get the precedence of the pending binary operator token. *)
+let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
+
+(* primary
+ *   ::= identifier
+ *   ::= numberexpr
+ *   ::= parenexpr *)
+let rec parse_primary = parser
+  (* numberexpr ::= number *)
+  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
+
+  (* parenexpr ::= '(' expression ')' *)
+  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
+
+  (* identifierexpr
+   *   ::= identifier
+   *   ::= identifier '(' argumentexpr ')' *)
+  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
+      let rec parse_args accumulator = parser
+        | [&lt; e=parse_expr; stream &gt;] -&gt;
+            begin parser
+              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
+              | [&lt; &gt;] -&gt; e :: accumulator
+            end stream
+        | [&lt; &gt;] -&gt; accumulator
+      in
+      let rec parse_ident id = parser
+        (* Call. *)
+        | [&lt; 'Token.Kwd '(';
+             args=parse_args [];
+             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
+            Ast.Call (id, Array.of_list (List.rev args))
+
+        (* Simple variable ref. *)
+        | [&lt; &gt;] -&gt; Ast.Variable id
+      in
+      parse_ident id stream
+
+  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
+
+(* binoprhs
+ *   ::= ('+' primary)* *)
+and parse_bin_rhs expr_prec lhs stream =
+  match Stream.peek stream with
+  (* If this is a binop, find its precedence. *)
+  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
+      let token_prec = precedence c in
+
+      (* If this is a binop that binds at least as tightly as the current binop,
+       * consume it, otherwise we are done. *)
+      if token_prec &lt; expr_prec then lhs else begin
+        (* Eat the binop. *)
+        Stream.junk stream;
+
+        (* Parse the primary expression after the binary operator. *)
+        let rhs = parse_primary stream in
+
+        (* Okay, we know this is a binop. *)
+        let rhs =
+          match Stream.peek stream with
+          | Some (Token.Kwd c2) -&gt;
+              (* If BinOp binds less tightly with rhs than the operator after
+               * rhs, let the pending operator take rhs as its lhs. *)
+              let next_prec = precedence c2 in
+              if token_prec &lt; next_prec
+              then parse_bin_rhs (token_prec + 1) rhs stream
+              else rhs
+          | _ -&gt; rhs
+        in
+
+        (* Merge lhs/rhs. *)
+        let lhs = Ast.Binary (c, lhs, rhs) in
+        parse_bin_rhs expr_prec lhs stream
+      end
+  | _ -&gt; lhs
+
+(* expression
+ *   ::= primary binoprhs *)
+and parse_expr = parser
+  | [&lt; lhs=parse_primary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
+
+(* prototype
+ *   ::= id '(' id* ')' *)
+let parse_prototype =
+  let rec parse_args accumulator = parser
+    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
+    | [&lt; &gt;] -&gt; accumulator
+  in
+
+  parser
+  | [&lt; 'Token.Ident id;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+       args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
+      (* success. *)
+      Ast.Prototype (id, Array.of_list (List.rev args))
+
+  | [&lt; &gt;] -&gt;
+      raise (Stream.Error "expected function name in prototype")
+
+(* definition ::= 'def' prototype expression *)
+let parse_definition = parser
+  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
+      Ast.Function (p, e)
+
+(* toplevelexpr ::= expression *)
+let parse_toplevel = parser
+  | [&lt; e=parse_expr &gt;] -&gt;
+      (* Make an anonymous proto. *)
+      Ast.Function (Ast.Prototype ("", [||]), e)
+
+(*  external ::= 'extern' prototype *)
+let parse_extern = parser
+  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
+</pre>
+</dd>
+
+<dt>toplevel.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Top-Level parsing and JIT Driver
+ *===----------------------------------------------------------------------===*)
+
+(* top ::= definition | external | expression | ';' *)
+let rec main_loop stream =
+  match Stream.peek stream with
+  | None -&gt; ()
+
+  (* ignore top-level semicolons. *)
+  | Some (Token.Kwd ';') -&gt;
+      Stream.junk stream;
+      main_loop stream
+
+  | Some token -&gt;
+      begin
+        try match token with
+        | Token.Def -&gt;
+            ignore(Parser.parse_definition stream);
+            print_endline "parsed a function definition.";
+        | Token.Extern -&gt;
+            ignore(Parser.parse_extern stream);
+            print_endline "parsed an extern.";
+        | _ -&gt;
+            (* Evaluate a top-level expression into an anonymous function. *)
+            ignore(Parser.parse_toplevel stream);
+            print_endline "parsed a top-level expr";
+        with Stream.Error s -&gt;
+          (* Skip token for error recovery. *)
+          Stream.junk stream;
+          print_endline s;
+      end;
+      print_string "ready&gt; "; flush stdout;
+      main_loop stream
+</pre>
+</dd>
+
+<dt>toy.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Main driver code.
+ *===----------------------------------------------------------------------===*)
+
+let main () =
+  (* Install standard binary operators.
+   * 1 is the lowest precedence. *)
+  Hashtbl.add Parser.binop_precedence '&lt;' 10;
+  Hashtbl.add Parser.binop_precedence '+' 20;
+  Hashtbl.add Parser.binop_precedence '-' 20;
+  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+  (* Prime the first token. *)
+  print_string "ready&gt; "; flush stdout;
+  let stream = Lexer.lex (Stream.of_channel stdin) in
+
+  (* Run the main "interpreter loop" now. *)
+  Toplevel.main_loop stream;
+;;
+
+main ()
+</pre>
+</dd>
+</dl>
+
+<a href="OCamlLangImpl3.html">Next: Implementing Code Generation to LLVM IR</a>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a>
+  <a href="mailto:erickt@users.sourceforge.net">Erick Tryzelaar</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/tutorial/OCamlLangImpl3.html b/final/docs/tutorial/OCamlLangImpl3.html
new file mode 100644
index 00000000000..d55fd0fd0cb
--- /dev/null
+++ b/final/docs/tutorial/OCamlLangImpl3.html
@@ -0,0 +1,1093 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Implementing code generation to LLVM IR</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <meta name="author" content="Erick Tryzelaar">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Code generation to LLVM IR</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 3
+  <ol>
+    <li><a href="#intro">Chapter 3 Introduction</a></li>
+    <li><a href="#basics">Code Generation Setup</a></li>
+    <li><a href="#exprs">Expression Code Generation</a></li>
+    <li><a href="#funcs">Function Code Generation</a></li>
+    <li><a href="#driver">Driver Changes and Closing Thoughts</a></li>
+    <li><a href="#code">Full Code Listing</a></li>
+  </ol>
+</li>
+<li><a href="OCamlLangImpl4.html">Chapter 4</a>: Adding JIT and Optimizer
+Support</li>
+</ul>
+
+<div class="doc_author">
+	<p>
+		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
+		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
+	</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="intro">Chapter 3 Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to Chapter 3 of the "<a href="index.html">Implementing a language
+with LLVM</a>" tutorial.  This chapter shows you how to transform the <a
+href="OCamlLangImpl2.html">Abstract Syntax Tree</a>, built in Chapter 2, into
+LLVM IR.  This will teach you a little bit about how LLVM does things, as well
+as demonstrate how easy it is to use.  It's much more work to build a lexer and
+parser than it is to generate LLVM IR code. :)
+</p>
+
+<p><b>Please note</b>: the code in this chapter and later require LLVM 2.3 or
+LLVM SVN to work.  LLVM 2.2 and before will not work with it.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="basics">Code Generation Setup</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+In order to generate LLVM IR, we want some simple setup to get started.  First
+we define virtual code generation (codegen) methods in each AST class:</p>
+
+<div class="doc_code">
+<pre>
+let rec codegen_expr = function
+  | Ast.Number n -&gt; ...
+  | Ast.Variable name -&gt; ...
+</pre>
+</div>
+
+<p>The <tt>Codegen.codegen_expr</tt> function says to emit IR for that AST node
+along with all the things it depends on, and they all return an LLVM Value
+object.  "Value" is the class used to represent a "<a
+href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Static Single
+Assignment (SSA)</a> register" or "SSA value" in LLVM.  The most distinct aspect
+of SSA values is that their value is computed as the related instruction
+executes, and it does not get a new value until (and if) the instruction
+re-executes.  In other words, there is no way to "change" an SSA value.  For
+more information, please read up on <a
+href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Static Single
+Assignment</a> - the concepts are really quite natural once you grok them.</p>
+
+<p>The
+second thing we want is an "Error" exception like we used for the parser, which
+will be used to report errors found during code generation (for example, use of
+an undeclared parameter):</p>
+
+<div class="doc_code">
+<pre>
+exception Error of string
+
+let the_module = create_module (global_context ()) "my cool jit"
+let builder = builder (global_context ())
+let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+let double_type = double_type context
+</pre>
+</div>
+
+<p>The static variables will be used during code generation.
+<tt>Codgen.the_module</tt> is the LLVM construct that contains all of the
+functions and global variables in a chunk of code.  In many ways, it is the
+top-level structure that the LLVM IR uses to contain code.</p>
+
+<p>The <tt>Codegen.builder</tt> object is a helper object that makes it easy to
+generate LLVM instructions.  Instances of the <a
+href="http://llvm.org/doxygen/IRBuilder_8h-source.html"><tt>IRBuilder</tt></a>
+class keep track of the current place to insert instructions and has methods to
+create new instructions.</p>
+
+<p>The <tt>Codegen.named_values</tt> map keeps track of which values are defined
+in the current scope and what their LLVM representation is.  (In other words, it
+is a symbol table for the code).  In this form of Kaleidoscope, the only things
+that can be referenced are function parameters.  As such, function parameters
+will be in this map when generating code for their function body.</p>
+
+<p>
+With these basics in place, we can start talking about how to generate code for
+each expression.  Note that this assumes that the <tt>Codgen.builder</tt> has
+been set up to generate code <em>into</em> something.  For now, we'll assume
+that this has already been done, and we'll just use it to emit code.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="exprs">Expression Code Generation</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Generating LLVM code for expression nodes is very straightforward: less
+than 30 lines of commented code for all four of our expression nodes.  First
+we'll do numeric literals:</p>
+
+<div class="doc_code">
+<pre>
+  | Ast.Number n -&gt; const_float double_type n
+</pre>
+</div>
+
+<p>In the LLVM IR, numeric constants are represented with the
+<tt>ConstantFP</tt> class, which holds the numeric value in an <tt>APFloat</tt>
+internally (<tt>APFloat</tt> has the capability of holding floating point
+constants of <em>A</em>rbitrary <em>P</em>recision).  This code basically just
+creates and returns a <tt>ConstantFP</tt>.  Note that in the LLVM IR
+that constants are all uniqued together and shared.  For this reason, the API
+uses "the foo::get(..)" idiom instead of "new foo(..)" or "foo::Create(..)".</p>
+
+<div class="doc_code">
+<pre>
+  | Ast.Variable name -&gt;
+      (try Hashtbl.find named_values name with
+        | Not_found -&gt; raise (Error "unknown variable name"))
+</pre>
+</div>
+
+<p>References to variables are also quite simple using LLVM.  In the simple
+version of Kaleidoscope, we assume that the variable has already been emitted
+somewhere and its value is available.  In practice, the only values that can be
+in the <tt>Codegen.named_values</tt> map are function arguments.  This code
+simply checks to see that the specified name is in the map (if not, an unknown
+variable is being referenced) and returns the value for it.  In future chapters,
+we'll add support for <a href="LangImpl5.html#for">loop induction variables</a>
+in the symbol table, and for <a href="LangImpl7.html#localvars">local
+variables</a>.</p>
+
+<div class="doc_code">
+<pre>
+  | Ast.Binary (op, lhs, rhs) -&gt;
+      let lhs_val = codegen_expr lhs in
+      let rhs_val = codegen_expr rhs in
+      begin
+        match op with
+        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
+        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
+        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
+        | '&lt;' -&gt;
+            (* Convert bool 0/1 to double 0.0 or 1.0 *)
+            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+            build_uitofp i double_type "booltmp" builder
+        | _ -&gt; raise (Error "invalid binary operator")
+      end
+</pre>
+</div>
+
+<p>Binary operators start to get more interesting.  The basic idea here is that
+we recursively emit code for the left-hand side of the expression, then the
+right-hand side, then we compute the result of the binary expression.  In this
+code, we do a simple switch on the opcode to create the right LLVM instruction.
+</p>
+
+<p>In the example above, the LLVM builder class is starting to show its value.
+IRBuilder knows where to insert the newly created instruction, all you have to
+do is specify what instruction to create (e.g. with <tt>Llvm.create_add</tt>),
+which operands to use (<tt>lhs</tt> and <tt>rhs</tt> here) and optionally
+provide a name for the generated instruction.</p>
+
+<p>One nice thing about LLVM is that the name is just a hint.  For instance, if
+the code above emits multiple "addtmp" variables, LLVM will automatically
+provide each one with an increasing, unique numeric suffix.  Local value names
+for instructions are purely optional, but it makes it much easier to read the
+IR dumps.</p>
+
+<p><a href="../LangRef.html#instref">LLVM instructions</a> are constrained by
+strict rules: for example, the Left and Right operators of
+an <a href="../LangRef.html#i_add">add instruction</a> must have the same
+type, and the result type of the add must match the operand types.  Because
+all values in Kaleidoscope are doubles, this makes for very simple code for add,
+sub and mul.</p>
+
+<p>On the other hand, LLVM specifies that the <a
+href="../LangRef.html#i_fcmp">fcmp instruction</a> always returns an 'i1' value
+(a one bit integer).  The problem with this is that Kaleidoscope wants the value to be a 0.0 or 1.0 value.  In order to get these semantics, we combine the fcmp instruction with
+a <a href="../LangRef.html#i_uitofp">uitofp instruction</a>.  This instruction
+converts its input integer into a floating point value by treating the input
+as an unsigned value.  In contrast, if we used the <a
+href="../LangRef.html#i_sitofp">sitofp instruction</a>, the Kaleidoscope '&lt;'
+operator would return 0.0 and -1.0, depending on the input value.</p>
+
+<div class="doc_code">
+<pre>
+  | Ast.Call (callee, args) -&gt;
+      (* Look up the name in the module table. *)
+      let callee =
+        match lookup_function callee the_module with
+        | Some callee -&gt; callee
+        | None -&gt; raise (Error "unknown function referenced")
+      in
+      let params = params callee in
+
+      (* If argument mismatch error. *)
+      if Array.length params == Array.length args then () else
+        raise (Error "incorrect # arguments passed");
+      let args = Array.map codegen_expr args in
+      build_call callee args "calltmp" builder
+</pre>
+</div>
+
+<p>Code generation for function calls is quite straightforward with LLVM.  The
+code above initially does a function name lookup in the LLVM Module's symbol
+table.  Recall that the LLVM Module is the container that holds all of the
+functions we are JIT'ing.  By giving each function the same name as what the
+user specifies, we can use the LLVM symbol table to resolve function names for
+us.</p>
+
+<p>Once we have the function to call, we recursively codegen each argument that
+is to be passed in, and create an LLVM <a href="../LangRef.html#i_call">call
+instruction</a>.  Note that LLVM uses the native C calling conventions by
+default, allowing these calls to also call into standard library functions like
+"sin" and "cos", with no additional effort.</p>
+
+<p>This wraps up our handling of the four basic expressions that we have so far
+in Kaleidoscope.  Feel free to go in and add some more.  For example, by
+browsing the <a href="../LangRef.html">LLVM language reference</a> you'll find
+several other interesting instructions that are really easy to plug into our
+basic framework.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="funcs">Function Code Generation</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Code generation for prototypes and functions must handle a number of
+details, which make their code less beautiful than expression code
+generation, but allows us to illustrate some important points.  First, lets
+talk about code generation for prototypes: they are used both for function
+bodies and external function declarations.  The code starts with:</p>
+
+<div class="doc_code">
+<pre>
+let codegen_proto = function
+  | Ast.Prototype (name, args) -&gt;
+      (* Make the function type: double(double,double) etc. *)
+      let doubles = Array.make (Array.length args) double_type in
+      let ft = function_type double_type doubles in
+      let f =
+        match lookup_function name the_module with
+</pre>
+</div>
+
+<p>This code packs a lot of power into a few lines.  Note first that this
+function returns a "Function*" instead of a "Value*" (although at the moment
+they both are modeled by <tt>llvalue</tt> in ocaml).  Because a "prototype"
+really talks about the external interface for a function (not the value computed
+by an expression), it makes sense for it to return the LLVM Function it
+corresponds to when codegen'd.</p>
+
+<p>The call to <tt>Llvm.function_type</tt> creates the <tt>Llvm.llvalue</tt>
+that should be used for a given Prototype.  Since all function arguments in
+Kaleidoscope are of type double, the first line creates a vector of "N" LLVM
+double types.  It then uses the <tt>Llvm.function_type</tt> method to create a
+function type that takes "N" doubles as arguments, returns one double as a
+result, and that is not vararg (that uses the function
+<tt>Llvm.var_arg_function_type</tt>).  Note that Types in LLVM are uniqued just
+like <tt>Constant</tt>s are, so you don't "new" a type, you "get" it.</p>
+
+<p>The final line above checks if the function has already been defined in
+<tt>Codegen.the_module</tt>. If not, we will create it.</p>
+
+<div class="doc_code">
+<pre>
+        | None -&gt; declare_function name ft the_module
+</pre>
+</div>
+
+<p>This indicates the type and name to use, as well as which module to insert
+into.  By default we assume a function has
+<tt>Llvm.Linkage.ExternalLinkage</tt>.  "<a href="LangRef.html#linkage">external
+linkage</a>" means that the function may be defined outside the current module
+and/or that it is callable by functions outside the module.  The "<tt>name</tt>"
+passed in is the name the user specified: this name is registered in
+"<tt>Codegen.the_module</tt>"s symbol table, which is used by the function call
+code above.</p>
+
+<p>In Kaleidoscope, I choose to allow redefinitions of functions in two cases:
+first, we want to allow 'extern'ing a function more than once, as long as the
+prototypes for the externs match (since all arguments have the same type, we
+just have to check that the number of arguments match).  Second, we want to
+allow 'extern'ing a function and then defining a body for it.  This is useful
+when defining mutually recursive functions.</p>
+
+<div class="doc_code">
+<pre>
+        (* If 'f' conflicted, there was already something named 'name'. If it
+         * has a body, don't allow redefinition or reextern. *)
+        | Some f -&gt;
+            (* If 'f' already has a body, reject this. *)
+            if Array.length (basic_blocks f) == 0 then () else
+              raise (Error "redefinition of function");
+
+            (* If 'f' took a different number of arguments, reject. *)
+            if Array.length (params f) == Array.length args then () else
+              raise (Error "redefinition of function with different # args");
+            f
+      in
+</pre>
+</div>
+
+<p>In order to verify the logic above, we first check to see if the pre-existing
+function is "empty".  In this case, empty means that it has no basic blocks in
+it, which means it has no body.  If it has no body, it is a forward
+declaration.  Since we don't allow anything after a full definition of the
+function, the code rejects this case.  If the previous reference to a function
+was an 'extern', we simply verify that the number of arguments for that
+definition and this one match up.  If not, we emit an error.</p>
+
+<div class="doc_code">
+<pre>
+      (* Set names for all arguments. *)
+      Array.iteri (fun i a -&gt;
+        let n = args.(i) in
+        set_value_name n a;
+        Hashtbl.add named_values n a;
+      ) (params f);
+      f
+</pre>
+</div>
+
+<p>The last bit of code for prototypes loops over all of the arguments in the
+function, setting the name of the LLVM Argument objects to match, and registering
+the arguments in the <tt>Codegen.named_values</tt> map for future use by the
+<tt>Ast.Variable</tt> variant.  Once this is set up, it returns the Function
+object to the caller.  Note that we don't check for conflicting
+argument names here (e.g. "extern foo(a b a)").  Doing so would be very
+straight-forward with the mechanics we have already used above.</p>
+
+<div class="doc_code">
+<pre>
+let codegen_func = function
+  | Ast.Function (proto, body) -&gt;
+      Hashtbl.clear named_values;
+      let the_function = codegen_proto proto in
+</pre>
+</div>
+
+<p>Code generation for function definitions starts out simply enough: we just
+codegen the prototype (Proto) and verify that it is ok.  We then clear out the
+<tt>Codegen.named_values</tt> map to make sure that there isn't anything in it
+from the last function we compiled.  Code generation of the prototype ensures
+that there is an LLVM Function object that is ready to go for us.</p>
+
+<div class="doc_code">
+<pre>
+      (* Create a new basic block to start insertion into. *)
+      let bb = append_block context "entry" the_function in
+      position_at_end bb builder;
+
+      try
+        let ret_val = codegen_expr body in
+</pre>
+</div>
+
+<p>Now we get to the point where the <tt>Codegen.builder</tt> is set up.  The
+first line creates a new
+<a href="http://en.wikipedia.org/wiki/Basic_block">basic block</a> (named
+"entry"), which is inserted into <tt>the_function</tt>.  The second line then
+tells the builder that new instructions should be inserted into the end of the
+new basic block.  Basic blocks in LLVM are an important part of functions that
+define the <a
+href="http://en.wikipedia.org/wiki/Control_flow_graph">Control Flow Graph</a>.
+Since we don't have any control flow, our functions will only contain one
+block at this point.  We'll fix this in <a href="OCamlLangImpl5.html">Chapter
+5</a> :).</p>
+
+<div class="doc_code">
+<pre>
+        let ret_val = codegen_expr body in
+
+        (* Finish off the function. *)
+        let _ = build_ret ret_val builder in
+
+        (* Validate the generated code, checking for consistency. *)
+        Llvm_analysis.assert_valid_function the_function;
+
+        the_function
+</pre>
+</div>
+
+<p>Once the insertion point is set up, we call the <tt>Codegen.codegen_func</tt>
+method for the root expression of the function.  If no error happens, this emits
+code to compute the expression into the entry block and returns the value that
+was computed.  Assuming no error, we then create an LLVM <a
+href="../LangRef.html#i_ret">ret instruction</a>, which completes the function.
+Once the function is built, we call
+<tt>Llvm_analysis.assert_valid_function</tt>, which is provided by LLVM.  This
+function does a variety of consistency checks on the generated code, to
+determine if our compiler is doing everything right.  Using this is important:
+it can catch a lot of bugs.  Once the function is finished and validated, we
+return it.</p>
+
+<div class="doc_code">
+<pre>
+      with e -&gt;
+        delete_function the_function;
+        raise e
+</pre>
+</div>
+
+<p>The only piece left here is handling of the error case.  For simplicity, we
+handle this by merely deleting the function we produced with the
+<tt>Llvm.delete_function</tt> method.  This allows the user to redefine a
+function that they incorrectly typed in before: if we didn't delete it, it
+would live in the symbol table, with a body, preventing future redefinition.</p>
+
+<p>This code does have a bug, though.  Since the <tt>Codegen.codegen_proto</tt>
+can return a previously defined forward declaration, our code can actually delete
+a forward declaration.  There are a number of ways to fix this bug, see what you
+can come up with!  Here is a testcase:</p>
+
+<div class="doc_code">
+<pre>
+extern foo(a b);     # ok, defines foo.
+def foo(a b) c;      # error, 'c' is invalid.
+def bar() foo(1, 2); # error, unknown function "foo"
+</pre>
+</div>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="driver">Driver Changes and
+Closing Thoughts</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+For now, code generation to LLVM doesn't really get us much, except that we can
+look at the pretty IR calls.  The sample code inserts calls to Codegen into the
+"<tt>Toplevel.main_loop</tt>", and then dumps out the LLVM IR.  This gives a
+nice way to look at the LLVM IR for simple functions.  For example:
+</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>4+5</b>;
+Read top-level expression:
+define double @""() {
+entry:
+        %addtmp = fadd double 4.000000e+00, 5.000000e+00
+        ret double %addtmp
+}
+</pre>
+</div>
+
+<p>Note how the parser turns the top-level expression into anonymous functions
+for us.  This will be handy when we add <a href="OCamlLangImpl4.html#jit">JIT
+support</a> in the next chapter.  Also note that the code is very literally
+transcribed, no optimizations are being performed.  We will
+<a href="OCamlLangImpl4.html#trivialconstfold">add optimizations</a> explicitly
+in the next chapter.</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>def foo(a b) a*a + 2*a*b + b*b;</b>
+Read function definition:
+define double @foo(double %a, double %b) {
+entry:
+        %multmp = fmul double %a, %a
+        %multmp1 = fmul double 2.000000e+00, %a
+        %multmp2 = fmul double %multmp1, %b
+        %addtmp = fadd double %multmp, %multmp2
+        %multmp3 = fmul double %b, %b
+        %addtmp4 = fadd double %addtmp, %multmp3
+        ret double %addtmp4
+}
+</pre>
+</div>
+
+<p>This shows some simple arithmetic. Notice the striking similarity to the
+LLVM builder calls that we use to create the instructions.</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>def bar(a) foo(a, 4.0) + bar(31337);</b>
+Read function definition:
+define double @bar(double %a) {
+entry:
+        %calltmp = call double @foo(double %a, double 4.000000e+00)
+        %calltmp1 = call double @bar(double 3.133700e+04)
+        %addtmp = fadd double %calltmp, %calltmp1
+        ret double %addtmp
+}
+</pre>
+</div>
+
+<p>This shows some function calls.  Note that this function will take a long
+time to execute if you call it.  In the future we'll add conditional control
+flow to actually make recursion useful :).</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>extern cos(x);</b>
+Read extern:
+declare double @cos(double)
+
+ready&gt; <b>cos(1.234);</b>
+Read top-level expression:
+define double @""() {
+entry:
+        %calltmp = call double @cos(double 1.234000e+00)
+        ret double %calltmp
+}
+</pre>
+</div>
+
+<p>This shows an extern for the libm "cos" function, and a call to it.</p>
+
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>^D</b>
+; ModuleID = 'my cool jit'
+
+define double @""() {
+entry:
+        %addtmp = fadd double 4.000000e+00, 5.000000e+00
+        ret double %addtmp
+}
+
+define double @foo(double %a, double %b) {
+entry:
+        %multmp = fmul double %a, %a
+        %multmp1 = fmul double 2.000000e+00, %a
+        %multmp2 = fmul double %multmp1, %b
+        %addtmp = fadd double %multmp, %multmp2
+        %multmp3 = fmul double %b, %b
+        %addtmp4 = fadd double %addtmp, %multmp3
+        ret double %addtmp4
+}
+
+define double @bar(double %a) {
+entry:
+        %calltmp = call double @foo(double %a, double 4.000000e+00)
+        %calltmp1 = call double @bar(double 3.133700e+04)
+        %addtmp = fadd double %calltmp, %calltmp1
+        ret double %addtmp
+}
+
+declare double @cos(double)
+
+define double @""() {
+entry:
+        %calltmp = call double @cos(double 1.234000e+00)
+        ret double %calltmp
+}
+</pre>
+</div>
+
+<p>When you quit the current demo, it dumps out the IR for the entire module
+generated.  Here you can see the big picture with all the functions referencing
+each other.</p>
+
+<p>This wraps up the third chapter of the Kaleidoscope tutorial.  Up next, we'll
+describe how to <a href="OCamlLangImpl4.html">add JIT codegen and optimizer
+support</a> to this so we can actually start running code!</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Here is the complete code listing for our running example, enhanced with the
+LLVM code generator.    Because this uses the LLVM libraries, we need to link
+them in.  To do this, we use the <a
+href="http://llvm.org/cmds/llvm-config.html">llvm-config</a> tool to inform
+our makefile/command line about which options to use:</p>
+
+<div class="doc_code">
+<pre>
+# Compile
+ocamlbuild toy.byte
+# Run
+./toy.byte
+</pre>
+</div>
+
+<p>Here is the code:</p>
+
+<dl>
+<dt>_tags:</dt>
+<dd class="doc_code">
+<pre>
+&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
+&lt;*.{byte,native}&gt;: g++, use_llvm, use_llvm_analysis
+</pre>
+</dd>
+
+<dt>myocamlbuild.ml:</dt>
+<dd class="doc_code">
+<pre>
+open Ocamlbuild_plugin;;
+
+ocaml_lib ~extern:true "llvm";;
+ocaml_lib ~extern:true "llvm_analysis";;
+
+flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
+</pre>
+</dd>
+
+<dt>token.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Lexer Tokens
+ *===----------------------------------------------------------------------===*)
+
+(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+ * these others for known things. *)
+type token =
+  (* commands *)
+  | Def | Extern
+
+  (* primary *)
+  | Ident of string | Number of float
+
+  (* unknown *)
+  | Kwd of char
+</pre>
+</dd>
+
+<dt>lexer.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Lexer
+ *===----------------------------------------------------------------------===*)
+
+let rec lex = parser
+  (* Skip any whitespace. *)
+  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
+
+  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+
+  (* number: [0-9.]+ *)
+  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+
+  (* Comment until end of line. *)
+  | [&lt; ' ('#'); stream &gt;] -&gt;
+      lex_comment stream
+
+  (* Otherwise, just return the character as its ascii value. *)
+  | [&lt; 'c; stream &gt;] -&gt;
+      [&lt; 'Token.Kwd c; lex stream &gt;]
+
+  (* end of stream. *)
+  | [&lt; &gt;] -&gt; [&lt; &gt;]
+
+and lex_number buffer = parser
+  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+  | [&lt; stream=lex &gt;] -&gt;
+      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
+
+and lex_ident buffer = parser
+  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+  | [&lt; stream=lex &gt;] -&gt;
+      match Buffer.contents buffer with
+      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
+      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
+      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
+
+and lex_comment = parser
+  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
+  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
+  | [&lt; &gt;] -&gt; [&lt; &gt;]
+</pre>
+</dd>
+
+<dt>ast.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Abstract Syntax Tree (aka Parse Tree)
+ *===----------------------------------------------------------------------===*)
+
+(* expr - Base type for all expression nodes. *)
+type expr =
+  (* variant for numeric literals like "1.0". *)
+  | Number of float
+
+  (* variant for referencing a variable, like "a". *)
+  | Variable of string
+
+  (* variant for a binary operator. *)
+  | Binary of char * expr * expr
+
+  (* variant for function calls. *)
+  | Call of string * expr array
+
+(* proto - This type represents the "prototype" for a function, which captures
+ * its name, and its argument names (thus implicitly the number of arguments the
+ * function takes). *)
+type proto = Prototype of string * string array
+
+(* func - This type represents a function definition itself. *)
+type func = Function of proto * expr
+</pre>
+</dd>
+
+<dt>parser.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===---------------------------------------------------------------------===
+ * Parser
+ *===---------------------------------------------------------------------===*)
+
+(* binop_precedence - This holds the precedence for each binary operator that is
+ * defined *)
+let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+(* precedence - Get the precedence of the pending binary operator token. *)
+let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
+
+(* primary
+ *   ::= identifier
+ *   ::= numberexpr
+ *   ::= parenexpr *)
+let rec parse_primary = parser
+  (* numberexpr ::= number *)
+  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
+
+  (* parenexpr ::= '(' expression ')' *)
+  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
+
+  (* identifierexpr
+   *   ::= identifier
+   *   ::= identifier '(' argumentexpr ')' *)
+  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
+      let rec parse_args accumulator = parser
+        | [&lt; e=parse_expr; stream &gt;] -&gt;
+            begin parser
+              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
+              | [&lt; &gt;] -&gt; e :: accumulator
+            end stream
+        | [&lt; &gt;] -&gt; accumulator
+      in
+      let rec parse_ident id = parser
+        (* Call. *)
+        | [&lt; 'Token.Kwd '(';
+             args=parse_args [];
+             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
+            Ast.Call (id, Array.of_list (List.rev args))
+
+        (* Simple variable ref. *)
+        | [&lt; &gt;] -&gt; Ast.Variable id
+      in
+      parse_ident id stream
+
+  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
+
+(* binoprhs
+ *   ::= ('+' primary)* *)
+and parse_bin_rhs expr_prec lhs stream =
+  match Stream.peek stream with
+  (* If this is a binop, find its precedence. *)
+  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
+      let token_prec = precedence c in
+
+      (* If this is a binop that binds at least as tightly as the current binop,
+       * consume it, otherwise we are done. *)
+      if token_prec &lt; expr_prec then lhs else begin
+        (* Eat the binop. *)
+        Stream.junk stream;
+
+        (* Parse the primary expression after the binary operator. *)
+        let rhs = parse_primary stream in
+
+        (* Okay, we know this is a binop. *)
+        let rhs =
+          match Stream.peek stream with
+          | Some (Token.Kwd c2) -&gt;
+              (* If BinOp binds less tightly with rhs than the operator after
+               * rhs, let the pending operator take rhs as its lhs. *)
+              let next_prec = precedence c2 in
+              if token_prec &lt; next_prec
+              then parse_bin_rhs (token_prec + 1) rhs stream
+              else rhs
+          | _ -&gt; rhs
+        in
+
+        (* Merge lhs/rhs. *)
+        let lhs = Ast.Binary (c, lhs, rhs) in
+        parse_bin_rhs expr_prec lhs stream
+      end
+  | _ -&gt; lhs
+
+(* expression
+ *   ::= primary binoprhs *)
+and parse_expr = parser
+  | [&lt; lhs=parse_primary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
+
+(* prototype
+ *   ::= id '(' id* ')' *)
+let parse_prototype =
+  let rec parse_args accumulator = parser
+    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
+    | [&lt; &gt;] -&gt; accumulator
+  in
+
+  parser
+  | [&lt; 'Token.Ident id;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+       args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
+      (* success. *)
+      Ast.Prototype (id, Array.of_list (List.rev args))
+
+  | [&lt; &gt;] -&gt;
+      raise (Stream.Error "expected function name in prototype")
+
+(* definition ::= 'def' prototype expression *)
+let parse_definition = parser
+  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
+      Ast.Function (p, e)
+
+(* toplevelexpr ::= expression *)
+let parse_toplevel = parser
+  | [&lt; e=parse_expr &gt;] -&gt;
+      (* Make an anonymous proto. *)
+      Ast.Function (Ast.Prototype ("", [||]), e)
+
+(*  external ::= 'extern' prototype *)
+let parse_extern = parser
+  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
+</pre>
+</dd>
+
+<dt>codegen.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Code Generation
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+
+exception Error of string
+
+let context = global_context ()
+let the_module = create_module context "my cool jit"
+let builder = builder context
+let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+let double_type = double_type context
+
+let rec codegen_expr = function
+  | Ast.Number n -&gt; const_float double_type n
+  | Ast.Variable name -&gt;
+      (try Hashtbl.find named_values name with
+        | Not_found -&gt; raise (Error "unknown variable name"))
+  | Ast.Binary (op, lhs, rhs) -&gt;
+      let lhs_val = codegen_expr lhs in
+      let rhs_val = codegen_expr rhs in
+      begin
+        match op with
+        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
+        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
+        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
+        | '&lt;' -&gt;
+            (* Convert bool 0/1 to double 0.0 or 1.0 *)
+            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+            build_uitofp i double_type "booltmp" builder
+        | _ -&gt; raise (Error "invalid binary operator")
+      end
+  | Ast.Call (callee, args) -&gt;
+      (* Look up the name in the module table. *)
+      let callee =
+        match lookup_function callee the_module with
+        | Some callee -&gt; callee
+        | None -&gt; raise (Error "unknown function referenced")
+      in
+      let params = params callee in
+
+      (* If argument mismatch error. *)
+      if Array.length params == Array.length args then () else
+        raise (Error "incorrect # arguments passed");
+      let args = Array.map codegen_expr args in
+      build_call callee args "calltmp" builder
+
+let codegen_proto = function
+  | Ast.Prototype (name, args) -&gt;
+      (* Make the function type: double(double,double) etc. *)
+      let doubles = Array.make (Array.length args) double_type in
+      let ft = function_type double_type doubles in
+      let f =
+        match lookup_function name the_module with
+        | None -&gt; declare_function name ft the_module
+
+        (* If 'f' conflicted, there was already something named 'name'. If it
+         * has a body, don't allow redefinition or reextern. *)
+        | Some f -&gt;
+            (* If 'f' already has a body, reject this. *)
+            if block_begin f &lt;&gt; At_end f then
+              raise (Error "redefinition of function");
+
+            (* If 'f' took a different number of arguments, reject. *)
+            if element_type (type_of f) &lt;&gt; ft then
+              raise (Error "redefinition of function with different # args");
+            f
+      in
+
+      (* Set names for all arguments. *)
+      Array.iteri (fun i a -&gt;
+        let n = args.(i) in
+        set_value_name n a;
+        Hashtbl.add named_values n a;
+      ) (params f);
+      f
+
+let codegen_func = function
+  | Ast.Function (proto, body) -&gt;
+      Hashtbl.clear named_values;
+      let the_function = codegen_proto proto in
+
+      (* Create a new basic block to start insertion into. *)
+      let bb = append_block context "entry" the_function in
+      position_at_end bb builder;
+
+      try
+        let ret_val = codegen_expr body in
+
+        (* Finish off the function. *)
+        let _ = build_ret ret_val builder in
+
+        (* Validate the generated code, checking for consistency. *)
+        Llvm_analysis.assert_valid_function the_function;
+
+        the_function
+      with e -&gt;
+        delete_function the_function;
+        raise e
+</pre>
+</dd>
+
+<dt>toplevel.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Top-Level parsing and JIT Driver
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+
+(* top ::= definition | external | expression | ';' *)
+let rec main_loop stream =
+  match Stream.peek stream with
+  | None -&gt; ()
+
+  (* ignore top-level semicolons. *)
+  | Some (Token.Kwd ';') -&gt;
+      Stream.junk stream;
+      main_loop stream
+
+  | Some token -&gt;
+      begin
+        try match token with
+        | Token.Def -&gt;
+            let e = Parser.parse_definition stream in
+            print_endline "parsed a function definition.";
+            dump_value (Codegen.codegen_func e);
+        | Token.Extern -&gt;
+            let e = Parser.parse_extern stream in
+            print_endline "parsed an extern.";
+            dump_value (Codegen.codegen_proto e);
+        | _ -&gt;
+            (* Evaluate a top-level expression into an anonymous function. *)
+            let e = Parser.parse_toplevel stream in
+            print_endline "parsed a top-level expr";
+            dump_value (Codegen.codegen_func e);
+        with Stream.Error s | Codegen.Error s -&gt;
+          (* Skip token for error recovery. *)
+          Stream.junk stream;
+          print_endline s;
+      end;
+      print_string "ready&gt; "; flush stdout;
+      main_loop stream
+</pre>
+</dd>
+
+<dt>toy.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Main driver code.
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+
+let main () =
+  (* Install standard binary operators.
+   * 1 is the lowest precedence. *)
+  Hashtbl.add Parser.binop_precedence '&lt;' 10;
+  Hashtbl.add Parser.binop_precedence '+' 20;
+  Hashtbl.add Parser.binop_precedence '-' 20;
+  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+  (* Prime the first token. *)
+  print_string "ready&gt; "; flush stdout;
+  let stream = Lexer.lex (Stream.of_channel stdin) in
+
+  (* Run the main "interpreter loop" now. *)
+  Toplevel.main_loop stream;
+
+  (* Print out all the generated code. *)
+  dump_module Codegen.the_module
+;;
+
+main ()
+</pre>
+</dd>
+</dl>
+
+<a href="OCamlLangImpl4.html">Next: Adding JIT and Optimizer Support</a>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/tutorial/OCamlLangImpl4.html b/final/docs/tutorial/OCamlLangImpl4.html
new file mode 100644
index 00000000000..979119afbc1
--- /dev/null
+++ b/final/docs/tutorial/OCamlLangImpl4.html
@@ -0,0 +1,1029 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Adding JIT and Optimizer Support</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <meta name="author" content="Erick Tryzelaar">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Adding JIT and Optimizer Support</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 4
+  <ol>
+    <li><a href="#intro">Chapter 4 Introduction</a></li>
+    <li><a href="#trivialconstfold">Trivial Constant Folding</a></li>
+    <li><a href="#optimizerpasses">LLVM Optimization Passes</a></li>
+    <li><a href="#jit">Adding a JIT Compiler</a></li>
+    <li><a href="#code">Full Code Listing</a></li>
+  </ol>
+</li>
+<li><a href="OCamlLangImpl5.html">Chapter 5</a>: Extending the Language: Control
+Flow</li>
+</ul>
+
+<div class="doc_author">
+	<p>
+		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
+		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
+	</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="intro">Chapter 4 Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to Chapter 4 of the "<a href="index.html">Implementing a language
+with LLVM</a>" tutorial.  Chapters 1-3 described the implementation of a simple
+language and added support for generating LLVM IR.  This chapter describes
+two new techniques: adding optimizer support to your language, and adding JIT
+compiler support.  These additions will demonstrate how to get nice, efficient code
+for the Kaleidoscope language.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="trivialconstfold">Trivial Constant
+Folding</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p><b>Note:</b> the default <tt>IRBuilder</tt> now always includes the constant 
+folding optimisations below.<p>
+
+<p>
+Our demonstration for Chapter 3 is elegant and easy to extend.  Unfortunately,
+it does not produce wonderful code.  For example, when compiling simple code,
+we don't get obvious optimizations:</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>def test(x) 1+2+x;</b>
+Read function definition:
+define double @test(double %x) {
+entry:
+        %addtmp = fadd double 1.000000e+00, 2.000000e+00
+        %addtmp1 = fadd double %addtmp, %x
+        ret double %addtmp1
+}
+</pre>
+</div>
+
+<p>This code is a very, very literal transcription of the AST built by parsing
+the input. As such, this transcription lacks optimizations like constant folding
+(we'd like to get "<tt>add x, 3.0</tt>" in the example above) as well as other
+more important optimizations.  Constant folding, in particular, is a very common
+and very important optimization: so much so that many language implementors
+implement constant folding support in their AST representation.</p>
+
+<p>With LLVM, you don't need this support in the AST.  Since all calls to build
+LLVM IR go through the LLVM builder, it would be nice if the builder itself
+checked to see if there was a constant folding opportunity when you call it.
+If so, it could just do the constant fold and return the constant instead of
+creating an instruction.  This is exactly what the <tt>LLVMFoldingBuilder</tt>
+class does.
+
+<p>All we did was switch from <tt>LLVMBuilder</tt> to
+<tt>LLVMFoldingBuilder</tt>.  Though we change no other code, we now have all of our
+instructions implicitly constant folded without us having to do anything
+about it.  For example, the input above now compiles to:</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>def test(x) 1+2+x;</b>
+Read function definition:
+define double @test(double %x) {
+entry:
+        %addtmp = fadd double 3.000000e+00, %x
+        ret double %addtmp
+}
+</pre>
+</div>
+
+<p>Well, that was easy :).  In practice, we recommend always using
+<tt>LLVMFoldingBuilder</tt> when generating code like this.  It has no
+"syntactic overhead" for its use (you don't have to uglify your compiler with
+constant checks everywhere) and it can dramatically reduce the amount of
+LLVM IR that is generated in some cases (particular for languages with a macro
+preprocessor or that use a lot of constants).</p>
+
+<p>On the other hand, the <tt>LLVMFoldingBuilder</tt> is limited by the fact
+that it does all of its analysis inline with the code as it is built.  If you
+take a slightly more complex example:</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>def test(x) (1+2+x)*(x+(1+2));</b>
+ready&gt; Read function definition:
+define double @test(double %x) {
+entry:
+        %addtmp = fadd double 3.000000e+00, %x
+        %addtmp1 = fadd double %x, 3.000000e+00
+        %multmp = fmul double %addtmp, %addtmp1
+        ret double %multmp
+}
+</pre>
+</div>
+
+<p>In this case, the LHS and RHS of the multiplication are the same value.  We'd
+really like to see this generate "<tt>tmp = x+3; result = tmp*tmp;</tt>" instead
+of computing "<tt>x*3</tt>" twice.</p>
+
+<p>Unfortunately, no amount of local analysis will be able to detect and correct
+this.  This requires two transformations: reassociation of expressions (to
+make the add's lexically identical) and Common Subexpression Elimination (CSE)
+to  delete the redundant add instruction.  Fortunately, LLVM provides a broad
+range of optimizations that you can use, in the form of "passes".</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="optimizerpasses">LLVM Optimization
+ Passes</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>LLVM provides many optimization passes, which do many different sorts of
+things and have different tradeoffs.  Unlike other systems, LLVM doesn't hold
+to the mistaken notion that one set of optimizations is right for all languages
+and for all situations.  LLVM allows a compiler implementor to make complete
+decisions about what optimizations to use, in which order, and in what
+situation.</p>
+
+<p>As a concrete example, LLVM supports both "whole module" passes, which look
+across as large of body of code as they can (often a whole file, but if run
+at link time, this can be a substantial portion of the whole program).  It also
+supports and includes "per-function" passes which just operate on a single
+function at a time, without looking at other functions.  For more information
+on passes and how they are run, see the <a href="../WritingAnLLVMPass.html">How
+to Write a Pass</a> document and the <a href="../Passes.html">List of LLVM
+Passes</a>.</p>
+
+<p>For Kaleidoscope, we are currently generating functions on the fly, one at
+a time, as the user types them in.  We aren't shooting for the ultimate
+optimization experience in this setting, but we also want to catch the easy and
+quick stuff where possible.  As such, we will choose to run a few per-function
+optimizations as the user types the function in.  If we wanted to make a "static
+Kaleidoscope compiler", we would use exactly the code we have now, except that
+we would defer running the optimizer until the entire file has been parsed.</p>
+
+<p>In order to get per-function optimizations going, we need to set up a
+<a href="../WritingAnLLVMPass.html#passmanager">Llvm.PassManager</a> to hold and
+organize the LLVM optimizations that we want to run.  Once we have that, we can
+add a set of optimizations to run.  The code looks like this:</p>
+
+<div class="doc_code">
+<pre>
+  (* Create the JIT. *)
+  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+  let the_fpm = PassManager.create_function Codegen.the_module in
+
+  (* Set up the optimizer pipeline.  Start with registering info about how the
+   * target lays out data structures. *)
+  TargetData.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+  add_instruction_combining the_fpm;
+
+  (* reassociate expressions. *)
+  add_reassociation the_fpm;
+
+  (* Eliminate Common SubExpressions. *)
+  add_gvn the_fpm;
+
+  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+  add_cfg_simplification the_fpm;
+
+  ignore (PassManager.initialize the_fpm);
+
+  (* Run the main "interpreter loop" now. *)
+  Toplevel.main_loop the_fpm the_execution_engine stream;
+</pre>
+</div>
+
+<p>The meat of the matter here, is the definition of "<tt>the_fpm</tt>".  It
+requires a pointer to the <tt>the_module</tt> to construct itself.  Once it is
+set up, we use a series of "add" calls to add a bunch of LLVM passes.  The
+first pass is basically boilerplate, it adds a pass so that later optimizations
+know how the data structures in the program are laid out.  The
+"<tt>the_execution_engine</tt>" variable is related to the JIT, which we will
+get to in the next section.</p>
+
+<p>In this case, we choose to add 4 optimization passes.  The passes we chose
+here are a pretty standard set of "cleanup" optimizations that are useful for
+a wide variety of code.  I won't delve into what they do but, believe me,
+they are a good starting place :).</p>
+
+<p>Once the <tt>Llvm.PassManager.</tt> is set up, we need to make use of it.
+We do this by running it after our newly created function is constructed (in
+<tt>Codegen.codegen_func</tt>), but before it is returned to the client:</p>
+
+<div class="doc_code">
+<pre>
+let codegen_func the_fpm = function
+      ...
+      try
+        let ret_val = codegen_expr body in
+
+        (* Finish off the function. *)
+        let _ = build_ret ret_val builder in
+
+        (* Validate the generated code, checking for consistency. *)
+        Llvm_analysis.assert_valid_function the_function;
+
+        (* Optimize the function. *)
+        let _ = PassManager.run_function the_function the_fpm in
+
+        the_function
+</pre>
+</div>
+
+<p>As you can see, this is pretty straightforward.  The <tt>the_fpm</tt>
+optimizes and updates the LLVM Function* in place, improving (hopefully) its
+body.  With this in place, we can try our test above again:</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>def test(x) (1+2+x)*(x+(1+2));</b>
+ready&gt; Read function definition:
+define double @test(double %x) {
+entry:
+        %addtmp = fadd double %x, 3.000000e+00
+        %multmp = fmul double %addtmp, %addtmp
+        ret double %multmp
+}
+</pre>
+</div>
+
+<p>As expected, we now get our nicely optimized code, saving a floating point
+add instruction from every execution of this function.</p>
+
+<p>LLVM provides a wide variety of optimizations that can be used in certain
+circumstances.  Some <a href="../Passes.html">documentation about the various
+passes</a> is available, but it isn't very complete.  Another good source of
+ideas can come from looking at the passes that <tt>llvm-gcc</tt> or
+<tt>llvm-ld</tt> run to get started.  The "<tt>opt</tt>" tool allows you to
+experiment with passes from the command line, so you can see if they do
+anything.</p>
+
+<p>Now that we have reasonable code coming out of our front-end, lets talk about
+executing it!</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="jit">Adding a JIT Compiler</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Code that is available in LLVM IR can have a wide variety of tools
+applied to it.  For example, you can run optimizations on it (as we did above),
+you can dump it out in textual or binary forms, you can compile the code to an
+assembly file (.s) for some target, or you can JIT compile it.  The nice thing
+about the LLVM IR representation is that it is the "common currency" between
+many different parts of the compiler.
+</p>
+
+<p>In this section, we'll add JIT compiler support to our interpreter.  The
+basic idea that we want for Kaleidoscope is to have the user enter function
+bodies as they do now, but immediately evaluate the top-level expressions they
+type in.  For example, if they type in "1 + 2;", we should evaluate and print
+out 3.  If they define a function, they should be able to call it from the
+command line.</p>
+
+<p>In order to do this, we first declare and initialize the JIT.  This is done
+by adding a global variable and a call in <tt>main</tt>:</p>
+
+<div class="doc_code">
+<pre>
+...
+let main () =
+  ...
+  <b>(* Create the JIT. *)
+  let the_execution_engine = ExecutionEngine.create Codegen.the_module in</b>
+  ...
+</pre>
+</div>
+
+<p>This creates an abstract "Execution Engine" which can be either a JIT
+compiler or the LLVM interpreter.  LLVM will automatically pick a JIT compiler
+for you if one is available for your platform, otherwise it will fall back to
+the interpreter.</p>
+
+<p>Once the <tt>Llvm_executionengine.ExecutionEngine.t</tt> is created, the JIT
+is ready to be used.  There are a variety of APIs that are useful, but the
+simplest one is the "<tt>Llvm_executionengine.ExecutionEngine.run_function</tt>"
+function.  This method JIT compiles the specified LLVM Function and returns a
+function pointer to the generated machine code.  In our case, this means that we
+can change the code that parses a top-level expression to look like this:</p>
+
+<div class="doc_code">
+<pre>
+            (* Evaluate a top-level expression into an anonymous function. *)
+            let e = Parser.parse_toplevel stream in
+            print_endline "parsed a top-level expr";
+            let the_function = Codegen.codegen_func the_fpm e in
+            dump_value the_function;
+
+            (* JIT the function, returning a function pointer. *)
+            let result = ExecutionEngine.run_function the_function [||]
+              the_execution_engine in
+
+            print_string "Evaluated to ";
+            print_float (GenericValue.as_float Codegen.double_type result);
+            print_newline ();
+</pre>
+</div>
+
+<p>Recall that we compile top-level expressions into a self-contained LLVM
+function that takes no arguments and returns the computed double.  Because the
+LLVM JIT compiler matches the native platform ABI, this means that you can just
+cast the result pointer to a function pointer of that type and call it directly.
+This means, there is no difference between JIT compiled code and native machine
+code that is statically linked into your application.</p>
+
+<p>With just these two changes, lets see how Kaleidoscope works now!</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>4+5;</b>
+define double @""() {
+entry:
+        ret double 9.000000e+00
+}
+
+<em>Evaluated to 9.000000</em>
+</pre>
+</div>
+
+<p>Well this looks like it is basically working.  The dump of the function
+shows the "no argument function that always returns double" that we synthesize
+for each top level expression that is typed in.  This demonstrates very basic
+functionality, but can we do more?</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>def testfunc(x y) x + y*2; </b>
+Read function definition:
+define double @testfunc(double %x, double %y) {
+entry:
+        %multmp = fmul double %y, 2.000000e+00
+        %addtmp = fadd double %multmp, %x
+        ret double %addtmp
+}
+
+ready&gt; <b>testfunc(4, 10);</b>
+define double @""() {
+entry:
+        %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01)
+        ret double %calltmp
+}
+
+<em>Evaluated to 24.000000</em>
+</pre>
+</div>
+
+<p>This illustrates that we can now call user code, but there is something a bit
+subtle going on here.  Note that we only invoke the JIT on the anonymous
+functions that <em>call testfunc</em>, but we never invoked it
+on <em>testfunc</em> itself.  What actually happened here is that the JIT
+scanned for all non-JIT'd functions transitively called from the anonymous
+function and compiled all of them before returning
+from <tt>run_function</tt>.</p>
+
+<p>The JIT provides a number of other more advanced interfaces for things like
+freeing allocated machine code, rejit'ing functions to update them, etc.
+However, even with this simple code, we get some surprisingly powerful
+capabilities - check this out (I removed the dump of the anonymous functions,
+you should get the idea by now :) :</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>extern sin(x);</b>
+Read extern:
+declare double @sin(double)
+
+ready&gt; <b>extern cos(x);</b>
+Read extern:
+declare double @cos(double)
+
+ready&gt; <b>sin(1.0);</b>
+<em>Evaluated to 0.841471</em>
+
+ready&gt; <b>def foo(x) sin(x)*sin(x) + cos(x)*cos(x);</b>
+Read function definition:
+define double @foo(double %x) {
+entry:
+        %calltmp = call double @sin(double %x)
+        %multmp = fmul double %calltmp, %calltmp
+        %calltmp2 = call double @cos(double %x)
+        %multmp4 = fmul double %calltmp2, %calltmp2
+        %addtmp = fadd double %multmp, %multmp4
+        ret double %addtmp
+}
+
+ready&gt; <b>foo(4.0);</b>
+<em>Evaluated to 1.000000</em>
+</pre>
+</div>
+
+<p>Whoa, how does the JIT know about sin and cos?  The answer is surprisingly
+simple: in this example, the JIT started execution of a function and got to a
+function call.  It realized that the function was not yet JIT compiled and
+invoked the standard set of routines to resolve the function.  In this case,
+there is no body defined for the function, so the JIT ended up calling
+"<tt>dlsym("sin")</tt>" on the Kaleidoscope process itself.  Since
+"<tt>sin</tt>" is defined within the JIT's address space, it simply patches up
+calls in the module to call the libm version of <tt>sin</tt> directly.</p>
+
+<p>The LLVM JIT provides a number of interfaces (look in the
+<tt>llvm_executionengine.mli</tt> file) for controlling how unknown functions
+get resolved.  It allows you to establish explicit mappings between IR objects
+and addresses (useful for LLVM global variables that you want to map to static
+tables, for example), allows you to dynamically decide on the fly based on the
+function name, and even allows you to have the JIT compile functions lazily the
+first time they're called.</p>
+
+<p>One interesting application of this is that we can now extend the language
+by writing arbitrary C code to implement operations.  For example, if we add:
+</p>
+
+<div class="doc_code">
+<pre>
+/* putchard - putchar that takes a double and returns 0. */
+extern "C"
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+</pre>
+</div>
+
+<p>Now we can produce simple output to the console by using things like:
+"<tt>extern putchard(x); putchard(120);</tt>", which prints a lowercase 'x' on
+the console (120 is the ASCII code for 'x').  Similar code could be used to
+implement file I/O, console input, and many other capabilities in
+Kaleidoscope.</p>
+
+<p>This completes the JIT and optimizer chapter of the Kaleidoscope tutorial. At
+this point, we can compile a non-Turing-complete programming language, optimize
+and JIT compile it in a user-driven way.  Next up we'll look into <a
+href="OCamlLangImpl5.html">extending the language with control flow
+constructs</a>, tackling some interesting LLVM IR issues along the way.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Here is the complete code listing for our running example, enhanced with the
+LLVM JIT and optimizer.  To build this example, use:
+</p>
+
+<div class="doc_code">
+<pre>
+# Compile
+ocamlbuild toy.byte
+# Run
+./toy.byte
+</pre>
+</div>
+
+<p>Here is the code:</p>
+
+<dl>
+<dt>_tags:</dt>
+<dd class="doc_code">
+<pre>
+&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
+&lt;*.{byte,native}&gt;: g++, use_llvm, use_llvm_analysis
+&lt;*.{byte,native}&gt;: use_llvm_executionengine, use_llvm_target
+&lt;*.{byte,native}&gt;: use_llvm_scalar_opts, use_bindings
+</pre>
+</dd>
+
+<dt>myocamlbuild.ml:</dt>
+<dd class="doc_code">
+<pre>
+open Ocamlbuild_plugin;;
+
+ocaml_lib ~extern:true "llvm";;
+ocaml_lib ~extern:true "llvm_analysis";;
+ocaml_lib ~extern:true "llvm_executionengine";;
+ocaml_lib ~extern:true "llvm_target";;
+ocaml_lib ~extern:true "llvm_scalar_opts";;
+
+flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
+dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
+</pre>
+</dd>
+
+<dt>token.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Lexer Tokens
+ *===----------------------------------------------------------------------===*)
+
+(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+ * these others for known things. *)
+type token =
+  (* commands *)
+  | Def | Extern
+
+  (* primary *)
+  | Ident of string | Number of float
+
+  (* unknown *)
+  | Kwd of char
+</pre>
+</dd>
+
+<dt>lexer.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Lexer
+ *===----------------------------------------------------------------------===*)
+
+let rec lex = parser
+  (* Skip any whitespace. *)
+  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
+
+  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+
+  (* number: [0-9.]+ *)
+  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+
+  (* Comment until end of line. *)
+  | [&lt; ' ('#'); stream &gt;] -&gt;
+      lex_comment stream
+
+  (* Otherwise, just return the character as its ascii value. *)
+  | [&lt; 'c; stream &gt;] -&gt;
+      [&lt; 'Token.Kwd c; lex stream &gt;]
+
+  (* end of stream. *)
+  | [&lt; &gt;] -&gt; [&lt; &gt;]
+
+and lex_number buffer = parser
+  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+  | [&lt; stream=lex &gt;] -&gt;
+      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
+
+and lex_ident buffer = parser
+  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+  | [&lt; stream=lex &gt;] -&gt;
+      match Buffer.contents buffer with
+      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
+      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
+      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
+
+and lex_comment = parser
+  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
+  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
+  | [&lt; &gt;] -&gt; [&lt; &gt;]
+</pre>
+</dd>
+
+<dt>ast.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Abstract Syntax Tree (aka Parse Tree)
+ *===----------------------------------------------------------------------===*)
+
+(* expr - Base type for all expression nodes. *)
+type expr =
+  (* variant for numeric literals like "1.0". *)
+  | Number of float
+
+  (* variant for referencing a variable, like "a". *)
+  | Variable of string
+
+  (* variant for a binary operator. *)
+  | Binary of char * expr * expr
+
+  (* variant for function calls. *)
+  | Call of string * expr array
+
+(* proto - This type represents the "prototype" for a function, which captures
+ * its name, and its argument names (thus implicitly the number of arguments the
+ * function takes). *)
+type proto = Prototype of string * string array
+
+(* func - This type represents a function definition itself. *)
+type func = Function of proto * expr
+</pre>
+</dd>
+
+<dt>parser.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===---------------------------------------------------------------------===
+ * Parser
+ *===---------------------------------------------------------------------===*)
+
+(* binop_precedence - This holds the precedence for each binary operator that is
+ * defined *)
+let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+(* precedence - Get the precedence of the pending binary operator token. *)
+let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
+
+(* primary
+ *   ::= identifier
+ *   ::= numberexpr
+ *   ::= parenexpr *)
+let rec parse_primary = parser
+  (* numberexpr ::= number *)
+  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
+
+  (* parenexpr ::= '(' expression ')' *)
+  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
+
+  (* identifierexpr
+   *   ::= identifier
+   *   ::= identifier '(' argumentexpr ')' *)
+  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
+      let rec parse_args accumulator = parser
+        | [&lt; e=parse_expr; stream &gt;] -&gt;
+            begin parser
+              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
+              | [&lt; &gt;] -&gt; e :: accumulator
+            end stream
+        | [&lt; &gt;] -&gt; accumulator
+      in
+      let rec parse_ident id = parser
+        (* Call. *)
+        | [&lt; 'Token.Kwd '(';
+             args=parse_args [];
+             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
+            Ast.Call (id, Array.of_list (List.rev args))
+
+        (* Simple variable ref. *)
+        | [&lt; &gt;] -&gt; Ast.Variable id
+      in
+      parse_ident id stream
+
+  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
+
+(* binoprhs
+ *   ::= ('+' primary)* *)
+and parse_bin_rhs expr_prec lhs stream =
+  match Stream.peek stream with
+  (* If this is a binop, find its precedence. *)
+  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
+      let token_prec = precedence c in
+
+      (* If this is a binop that binds at least as tightly as the current binop,
+       * consume it, otherwise we are done. *)
+      if token_prec &lt; expr_prec then lhs else begin
+        (* Eat the binop. *)
+        Stream.junk stream;
+
+        (* Parse the primary expression after the binary operator. *)
+        let rhs = parse_primary stream in
+
+        (* Okay, we know this is a binop. *)
+        let rhs =
+          match Stream.peek stream with
+          | Some (Token.Kwd c2) -&gt;
+              (* If BinOp binds less tightly with rhs than the operator after
+               * rhs, let the pending operator take rhs as its lhs. *)
+              let next_prec = precedence c2 in
+              if token_prec &lt; next_prec
+              then parse_bin_rhs (token_prec + 1) rhs stream
+              else rhs
+          | _ -&gt; rhs
+        in
+
+        (* Merge lhs/rhs. *)
+        let lhs = Ast.Binary (c, lhs, rhs) in
+        parse_bin_rhs expr_prec lhs stream
+      end
+  | _ -&gt; lhs
+
+(* expression
+ *   ::= primary binoprhs *)
+and parse_expr = parser
+  | [&lt; lhs=parse_primary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
+
+(* prototype
+ *   ::= id '(' id* ')' *)
+let parse_prototype =
+  let rec parse_args accumulator = parser
+    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
+    | [&lt; &gt;] -&gt; accumulator
+  in
+
+  parser
+  | [&lt; 'Token.Ident id;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+       args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
+      (* success. *)
+      Ast.Prototype (id, Array.of_list (List.rev args))
+
+  | [&lt; &gt;] -&gt;
+      raise (Stream.Error "expected function name in prototype")
+
+(* definition ::= 'def' prototype expression *)
+let parse_definition = parser
+  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
+      Ast.Function (p, e)
+
+(* toplevelexpr ::= expression *)
+let parse_toplevel = parser
+  | [&lt; e=parse_expr &gt;] -&gt;
+      (* Make an anonymous proto. *)
+      Ast.Function (Ast.Prototype ("", [||]), e)
+
+(*  external ::= 'extern' prototype *)
+let parse_extern = parser
+  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
+</pre>
+</dd>
+
+<dt>codegen.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Code Generation
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+
+exception Error of string
+
+let context = global_context ()
+let the_module = create_module context "my cool jit"
+let builder = builder context
+let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+let double_type = double_type context
+
+let rec codegen_expr = function
+  | Ast.Number n -&gt; const_float double_type n
+  | Ast.Variable name -&gt;
+      (try Hashtbl.find named_values name with
+        | Not_found -&gt; raise (Error "unknown variable name"))
+  | Ast.Binary (op, lhs, rhs) -&gt;
+      let lhs_val = codegen_expr lhs in
+      let rhs_val = codegen_expr rhs in
+      begin
+        match op with
+        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
+        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
+        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
+        | '&lt;' -&gt;
+            (* Convert bool 0/1 to double 0.0 or 1.0 *)
+            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+            build_uitofp i double_type "booltmp" builder
+        | _ -&gt; raise (Error "invalid binary operator")
+      end
+  | Ast.Call (callee, args) -&gt;
+      (* Look up the name in the module table. *)
+      let callee =
+        match lookup_function callee the_module with
+        | Some callee -&gt; callee
+        | None -&gt; raise (Error "unknown function referenced")
+      in
+      let params = params callee in
+
+      (* If argument mismatch error. *)
+      if Array.length params == Array.length args then () else
+        raise (Error "incorrect # arguments passed");
+      let args = Array.map codegen_expr args in
+      build_call callee args "calltmp" builder
+
+let codegen_proto = function
+  | Ast.Prototype (name, args) -&gt;
+      (* Make the function type: double(double,double) etc. *)
+      let doubles = Array.make (Array.length args) double_type in
+      let ft = function_type double_type doubles in
+      let f =
+        match lookup_function name the_module with
+        | None -&gt; declare_function name ft the_module
+
+        (* If 'f' conflicted, there was already something named 'name'. If it
+         * has a body, don't allow redefinition or reextern. *)
+        | Some f -&gt;
+            (* If 'f' already has a body, reject this. *)
+            if block_begin f &lt;&gt; At_end f then
+              raise (Error "redefinition of function");
+
+            (* If 'f' took a different number of arguments, reject. *)
+            if element_type (type_of f) &lt;&gt; ft then
+              raise (Error "redefinition of function with different # args");
+            f
+      in
+
+      (* Set names for all arguments. *)
+      Array.iteri (fun i a -&gt;
+        let n = args.(i) in
+        set_value_name n a;
+        Hashtbl.add named_values n a;
+      ) (params f);
+      f
+
+let codegen_func the_fpm = function
+  | Ast.Function (proto, body) -&gt;
+      Hashtbl.clear named_values;
+      let the_function = codegen_proto proto in
+
+      (* Create a new basic block to start insertion into. *)
+      let bb = append_block context "entry" the_function in
+      position_at_end bb builder;
+
+      try
+        let ret_val = codegen_expr body in
+
+        (* Finish off the function. *)
+        let _ = build_ret ret_val builder in
+
+        (* Validate the generated code, checking for consistency. *)
+        Llvm_analysis.assert_valid_function the_function;
+
+        (* Optimize the function. *)
+        let _ = PassManager.run_function the_function the_fpm in
+
+        the_function
+      with e -&gt;
+        delete_function the_function;
+        raise e
+</pre>
+</dd>
+
+<dt>toplevel.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Top-Level parsing and JIT Driver
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+open Llvm_executionengine
+
+(* top ::= definition | external | expression | ';' *)
+let rec main_loop the_fpm the_execution_engine stream =
+  match Stream.peek stream with
+  | None -&gt; ()
+
+  (* ignore top-level semicolons. *)
+  | Some (Token.Kwd ';') -&gt;
+      Stream.junk stream;
+      main_loop the_fpm the_execution_engine stream
+
+  | Some token -&gt;
+      begin
+        try match token with
+        | Token.Def -&gt;
+            let e = Parser.parse_definition stream in
+            print_endline "parsed a function definition.";
+            dump_value (Codegen.codegen_func the_fpm e);
+        | Token.Extern -&gt;
+            let e = Parser.parse_extern stream in
+            print_endline "parsed an extern.";
+            dump_value (Codegen.codegen_proto e);
+        | _ -&gt;
+            (* Evaluate a top-level expression into an anonymous function. *)
+            let e = Parser.parse_toplevel stream in
+            print_endline "parsed a top-level expr";
+            let the_function = Codegen.codegen_func the_fpm e in
+            dump_value the_function;
+
+            (* JIT the function, returning a function pointer. *)
+            let result = ExecutionEngine.run_function the_function [||]
+              the_execution_engine in
+
+            print_string "Evaluated to ";
+            print_float (GenericValue.as_float Codegen.double_type result);
+            print_newline ();
+        with Stream.Error s | Codegen.Error s -&gt;
+          (* Skip token for error recovery. *)
+          Stream.junk stream;
+          print_endline s;
+      end;
+      print_string "ready&gt; "; flush stdout;
+      main_loop the_fpm the_execution_engine stream
+</pre>
+</dd>
+
+<dt>toy.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Main driver code.
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+open Llvm_executionengine
+open Llvm_target
+open Llvm_scalar_opts
+
+let main () =
+  ignore (initialize_native_target ());
+
+  (* Install standard binary operators.
+   * 1 is the lowest precedence. *)
+  Hashtbl.add Parser.binop_precedence '&lt;' 10;
+  Hashtbl.add Parser.binop_precedence '+' 20;
+  Hashtbl.add Parser.binop_precedence '-' 20;
+  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+  (* Prime the first token. *)
+  print_string "ready&gt; "; flush stdout;
+  let stream = Lexer.lex (Stream.of_channel stdin) in
+
+  (* Create the JIT. *)
+  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+  let the_fpm = PassManager.create_function Codegen.the_module in
+
+  (* Set up the optimizer pipeline.  Start with registering info about how the
+   * target lays out data structures. *)
+  TargetData.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+  add_instruction_combination the_fpm;
+
+  (* reassociate expressions. *)
+  add_reassociation the_fpm;
+
+  (* Eliminate Common SubExpressions. *)
+  add_gvn the_fpm;
+
+  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+  add_cfg_simplification the_fpm;
+
+  ignore (PassManager.initialize the_fpm);
+
+  (* Run the main "interpreter loop" now. *)
+  Toplevel.main_loop the_fpm the_execution_engine stream;
+
+  (* Print out all the generated code. *)
+  dump_module Codegen.the_module
+;;
+
+main ()
+</pre>
+</dd>
+
+<dt>bindings.c</dt>
+<dd class="doc_code">
+<pre>
+#include &lt;stdio.h&gt;
+
+/* putchard - putchar that takes a double and returns 0. */
+extern double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+</pre>
+</dd>
+</dl>
+
+<a href="OCamlLangImpl5.html">Next: Extending the language: control flow</a>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/tutorial/OCamlLangImpl5.html b/final/docs/tutorial/OCamlLangImpl5.html
new file mode 100644
index 00000000000..7a213952574
--- /dev/null
+++ b/final/docs/tutorial/OCamlLangImpl5.html
@@ -0,0 +1,1569 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Extending the Language: Control Flow</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <meta name="author" content="Erick Tryzelaar">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Extending the Language: Control Flow</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 5
+  <ol>
+    <li><a href="#intro">Chapter 5 Introduction</a></li>
+    <li><a href="#ifthen">If/Then/Else</a>
+    <ol>
+      <li><a href="#iflexer">Lexer Extensions</a></li>
+      <li><a href="#ifast">AST Extensions</a></li>
+      <li><a href="#ifparser">Parser Extensions</a></li>
+      <li><a href="#ifir">LLVM IR</a></li>
+      <li><a href="#ifcodegen">Code Generation</a></li>
+    </ol>
+    </li>
+    <li><a href="#for">'for' Loop Expression</a>
+    <ol>
+      <li><a href="#forlexer">Lexer Extensions</a></li>
+      <li><a href="#forast">AST Extensions</a></li>
+      <li><a href="#forparser">Parser Extensions</a></li>
+      <li><a href="#forir">LLVM IR</a></li>
+      <li><a href="#forcodegen">Code Generation</a></li>
+    </ol>
+    </li>
+    <li><a href="#code">Full Code Listing</a></li>
+  </ol>
+</li>
+<li><a href="OCamlLangImpl6.html">Chapter 6</a>: Extending the Language:
+User-defined Operators</li>
+</ul>
+
+<div class="doc_author">
+	<p>
+		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
+		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
+	</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="intro">Chapter 5 Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to Chapter 5 of the "<a href="index.html">Implementing a language
+with LLVM</a>" tutorial.  Parts 1-4 described the implementation of the simple
+Kaleidoscope language and included support for generating LLVM IR, followed by
+optimizations and a JIT compiler.  Unfortunately, as presented, Kaleidoscope is
+mostly useless: it has no control flow other than call and return.  This means
+that you can't have conditional branches in the code, significantly limiting its
+power.  In this episode of "build that compiler", we'll extend Kaleidoscope to
+have an if/then/else expression plus a simple 'for' loop.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="ifthen">If/Then/Else</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Extending Kaleidoscope to support if/then/else is quite straightforward.  It
+basically requires adding lexer support for this "new" concept to the lexer,
+parser, AST, and LLVM code emitter.  This example is nice, because it shows how
+easy it is to "grow" a language over time, incrementally extending it as new
+ideas are discovered.</p>
+
+<p>Before we get going on "how" we add this extension, lets talk about "what" we
+want.  The basic idea is that we want to be able to write this sort of thing:
+</p>
+
+<div class="doc_code">
+<pre>
+def fib(x)
+  if x &lt; 3 then
+    1
+  else
+    fib(x-1)+fib(x-2);
+</pre>
+</div>
+
+<p>In Kaleidoscope, every construct is an expression: there are no statements.
+As such, the if/then/else expression needs to return a value like any other.
+Since we're using a mostly functional form, we'll have it evaluate its
+conditional, then return the 'then' or 'else' value based on how the condition
+was resolved.  This is very similar to the C "?:" expression.</p>
+
+<p>The semantics of the if/then/else expression is that it evaluates the
+condition to a boolean equality value: 0.0 is considered to be false and
+everything else is considered to be true.
+If the condition is true, the first subexpression is evaluated and returned, if
+the condition is false, the second subexpression is evaluated and returned.
+Since Kaleidoscope allows side-effects, this behavior is important to nail down.
+</p>
+
+<p>Now that we know what we "want", lets break this down into its constituent
+pieces.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="iflexer">Lexer Extensions for
+If/Then/Else</a></div>
+<!-- ======================================================================= -->
+
+
+<div class="doc_text">
+
+<p>The lexer extensions are straightforward.  First we add new variants
+for the relevant tokens:</p>
+
+<div class="doc_code">
+<pre>
+  (* control *)
+  | If | Then | Else | For | In
+</pre>
+</div>
+
+<p>Once we have that, we recognize the new keywords in the lexer. This is pretty simple
+stuff:</p>
+
+<div class="doc_code">
+<pre>
+      ...
+      match Buffer.contents buffer with
+      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
+      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
+      | "if" -&gt; [&lt; 'Token.If; stream &gt;]
+      | "then" -&gt; [&lt; 'Token.Then; stream &gt;]
+      | "else" -&gt; [&lt; 'Token.Else; stream &gt;]
+      | "for" -&gt; [&lt; 'Token.For; stream &gt;]
+      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
+      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="ifast">AST Extensions for
+ If/Then/Else</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>To represent the new expression we add a new AST variant for it:</p>
+
+<div class="doc_code">
+<pre>
+type expr =
+  ...
+  (* variant for if/then/else. *)
+  | If of expr * expr * expr
+</pre>
+</div>
+
+<p>The AST variant just has pointers to the various subexpressions.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="ifparser">Parser Extensions for
+If/Then/Else</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>Now that we have the relevant tokens coming from the lexer and we have the
+AST node to build, our parsing logic is relatively straightforward.  First we
+define a new parsing function:</p>
+
+<div class="doc_code">
+<pre>
+let rec parse_primary = parser
+  ...
+  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+  | [&lt; 'Token.If; c=parse_expr;
+       'Token.Then ?? "expected 'then'"; t=parse_expr;
+       'Token.Else ?? "expected 'else'"; e=parse_expr &gt;] -&gt;
+      Ast.If (c, t, e)
+</pre>
+</div>
+
+<p>Next we hook it up as a primary expression:</p>
+
+<div class="doc_code">
+<pre>
+let rec parse_primary = parser
+  ...
+  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+  | [&lt; 'Token.If; c=parse_expr;
+       'Token.Then ?? "expected 'then'"; t=parse_expr;
+       'Token.Else ?? "expected 'else'"; e=parse_expr &gt;] -&gt;
+      Ast.If (c, t, e)
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="ifir">LLVM IR for If/Then/Else</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>Now that we have it parsing and building the AST, the final piece is adding
+LLVM code generation support.  This is the most interesting part of the
+if/then/else example, because this is where it starts to introduce new concepts.
+All of the code above has been thoroughly described in previous chapters.
+</p>
+
+<p>To motivate the code we want to produce, lets take a look at a simple
+example.  Consider:</p>
+
+<div class="doc_code">
+<pre>
+extern foo();
+extern bar();
+def baz(x) if x then foo() else bar();
+</pre>
+</div>
+
+<p>If you disable optimizations, the code you'll (soon) get from Kaleidoscope
+looks like this:</p>
+
+<div class="doc_code">
+<pre>
+declare double @foo()
+
+declare double @bar()
+
+define double @baz(double %x) {
+entry:
+  %ifcond = fcmp one double %x, 0.000000e+00
+  br i1 %ifcond, label %then, label %else
+
+then:    ; preds = %entry
+  %calltmp = call double @foo()
+  br label %ifcont
+
+else:    ; preds = %entry
+  %calltmp1 = call double @bar()
+  br label %ifcont
+
+ifcont:    ; preds = %else, %then
+  %iftmp = phi double [ %calltmp, %then ], [ %calltmp1, %else ]
+  ret double %iftmp
+}
+</pre>
+</div>
+
+<p>To visualize the control flow graph, you can use a nifty feature of the LLVM
+'<a href="http://llvm.org/cmds/opt.html">opt</a>' tool.  If you put this LLVM IR
+into "t.ll" and run "<tt>llvm-as &lt; t.ll | opt -analyze -view-cfg</tt>", <a
+href="../ProgrammersManual.html#ViewGraph">a window will pop up</a> and you'll
+see this graph:</p>
+
+<div style="text-align: center"><img src="LangImpl5-cfg.png" alt="Example CFG" width="423"
+height="315"></div>
+
+<p>Another way to get this is to call "<tt>Llvm_analysis.view_function_cfg
+f</tt>" or "<tt>Llvm_analysis.view_function_cfg_only f</tt>" (where <tt>f</tt>
+is a "<tt>Function</tt>") either by inserting actual calls into the code and
+recompiling or by calling these in the debugger.  LLVM has many nice features
+for visualizing various graphs.</p>
+
+<p>Getting back to the generated code, it is fairly simple: the entry block
+evaluates the conditional expression ("x" in our case here) and compares the
+result to 0.0 with the "<tt><a href="../LangRef.html#i_fcmp">fcmp</a> one</tt>"
+instruction ('one' is "Ordered and Not Equal").  Based on the result of this
+expression, the code jumps to either the "then" or "else" blocks, which contain
+the expressions for the true/false cases.</p>
+
+<p>Once the then/else blocks are finished executing, they both branch back to the
+'ifcont' block to execute the code that happens after the if/then/else.  In this
+case the only thing left to do is to return to the caller of the function.  The
+question then becomes: how does the code know which expression to return?</p>
+
+<p>The answer to this question involves an important SSA operation: the
+<a href="http://en.wikipedia.org/wiki/Static_single_assignment_form">Phi
+operation</a>.  If you're not familiar with SSA, <a
+href="http://en.wikipedia.org/wiki/Static_single_assignment_form">the wikipedia
+article</a> is a good introduction and there are various other introductions to
+it available on your favorite search engine.  The short version is that
+"execution" of the Phi operation requires "remembering" which block control came
+from.  The Phi operation takes on the value corresponding to the input control
+block.  In this case, if control comes in from the "then" block, it gets the
+value of "calltmp".  If control comes from the "else" block, it gets the value
+of "calltmp1".</p>
+
+<p>At this point, you are probably starting to think "Oh no! This means my
+simple and elegant front-end will have to start generating SSA form in order to
+use LLVM!".  Fortunately, this is not the case, and we strongly advise
+<em>not</em> implementing an SSA construction algorithm in your front-end
+unless there is an amazingly good reason to do so.  In practice, there are two
+sorts of values that float around in code written for your average imperative
+programming language that might need Phi nodes:</p>
+
+<ol>
+<li>Code that involves user variables: <tt>x = 1; x = x + 1; </tt></li>
+<li>Values that are implicit in the structure of your AST, such as the Phi node
+in this case.</li>
+</ol>
+
+<p>In <a href="OCamlLangImpl7.html">Chapter 7</a> of this tutorial ("mutable
+variables"), we'll talk about #1
+in depth.  For now, just believe me that you don't need SSA construction to
+handle this case.  For #2, you have the choice of using the techniques that we will
+describe for #1, or you can insert Phi nodes directly, if convenient.  In this
+case, it is really really easy to generate the Phi node, so we choose to do it
+directly.</p>
+
+<p>Okay, enough of the motivation and overview, lets generate code!</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="ifcodegen">Code Generation for
+If/Then/Else</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>In order to generate code for this, we implement the <tt>Codegen</tt> method
+for <tt>IfExprAST</tt>:</p>
+
+<div class="doc_code">
+<pre>
+let rec codegen_expr = function
+  ...
+  | Ast.If (cond, then_, else_) -&gt;
+      let cond = codegen_expr cond in
+
+      (* Convert condition to a bool by comparing equal to 0.0 *)
+      let zero = const_float double_type 0.0 in
+      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
+</pre>
+</div>
+
+<p>This code is straightforward and similar to what we saw before.  We emit the
+expression for the condition, then compare that value to zero to get a truth
+value as a 1-bit (bool) value.</p>
+
+<div class="doc_code">
+<pre>
+      (* Grab the first block so that we might later add the conditional branch
+       * to it at the end of the function. *)
+      let start_bb = insertion_block builder in
+      let the_function = block_parent start_bb in
+
+      let then_bb = append_block context "then" the_function in
+      position_at_end then_bb builder;
+</pre>
+</div>
+
+<p>
+As opposed to the <a href="LangImpl5.html">C++ tutorial</a>, we have to build
+our basic blocks bottom up since we can't have dangling BasicBlocks.  We start
+off by saving a pointer to the first block (which might not be the entry
+block), which we'll need to build a conditional branch later.  We do this by
+asking the <tt>builder</tt> for the current BasicBlock.  The fourth line
+gets the current Function object that is being built.  It gets this by the
+<tt>start_bb</tt> for its "parent" (the function it is currently embedded
+into).</p>
+
+<p>Once it has that, it creates one block.  It is automatically appended into
+the function's list of blocks.</p>
+
+<div class="doc_code">
+<pre>
+      (* Emit 'then' value. *)
+      position_at_end then_bb builder;
+      let then_val = codegen_expr then_ in
+
+      (* Codegen of 'then' can change the current block, update then_bb for the
+       * phi. We create a new name because one is used for the phi node, and the
+       * other is used for the conditional branch. *)
+      let new_then_bb = insertion_block builder in
+</pre>
+</div>
+
+<p>We move the builder to start inserting into the "then" block.  Strictly
+speaking, this call moves the insertion point to be at the end of the specified
+block.  However, since the "then" block is empty, it also starts out by
+inserting at the beginning of the block.  :)</p>
+
+<p>Once the insertion point is set, we recursively codegen the "then" expression
+from the AST.</p>
+
+<p>The final line here is quite subtle, but is very important.  The basic issue
+is that when we create the Phi node in the merge block, we need to set up the
+block/value pairs that indicate how the Phi will work.  Importantly, the Phi
+node expects to have an entry for each predecessor of the block in the CFG.  Why
+then, are we getting the current block when we just set it to ThenBB 5 lines
+above?  The problem is that the "Then" expression may actually itself change the
+block that the Builder is emitting into if, for example, it contains a nested
+"if/then/else" expression.  Because calling Codegen recursively could
+arbitrarily change the notion of the current block, we are required to get an
+up-to-date value for code that will set up the Phi node.</p>
+
+<div class="doc_code">
+<pre>
+      (* Emit 'else' value. *)
+      let else_bb = append_block context "else" the_function in
+      position_at_end else_bb builder;
+      let else_val = codegen_expr else_ in
+
+      (* Codegen of 'else' can change the current block, update else_bb for the
+       * phi. *)
+      let new_else_bb = insertion_block builder in
+</pre>
+</div>
+
+<p>Code generation for the 'else' block is basically identical to codegen for
+the 'then' block.</p>
+
+<div class="doc_code">
+<pre>
+      (* Emit merge block. *)
+      let merge_bb = append_block context "ifcont" the_function in
+      position_at_end merge_bb builder;
+      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
+      let phi = build_phi incoming "iftmp" builder in
+</pre>
+</div>
+
+<p>The first two lines here are now familiar: the first adds the "merge" block
+to the Function object.  The second block changes the insertion point so that
+newly created code will go into the "merge" block.  Once that is done, we need
+to create the PHI node and set up the block/value pairs for the PHI.</p>
+
+<div class="doc_code">
+<pre>
+      (* Return to the start block to add the conditional branch. *)
+      position_at_end start_bb builder;
+      ignore (build_cond_br cond_val then_bb else_bb builder);
+</pre>
+</div>
+
+<p>Once the blocks are created, we can emit the conditional branch that chooses
+between them.  Note that creating new blocks does not implicitly affect the
+IRBuilder, so it is still inserting into the block that the condition
+went into.  This is why we needed to save the "start" block.</p>
+
+<div class="doc_code">
+<pre>
+      (* Set a unconditional branch at the end of the 'then' block and the
+       * 'else' block to the 'merge' block. *)
+      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
+      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
+
+      (* Finally, set the builder to the end of the merge block. *)
+      position_at_end merge_bb builder;
+
+      phi
+</pre>
+</div>
+
+<p>To finish off the blocks, we create an unconditional branch
+to the merge block.  One interesting (and very important) aspect of the LLVM IR
+is that it <a href="../LangRef.html#functionstructure">requires all basic blocks
+to be "terminated"</a> with a <a href="../LangRef.html#terminators">control flow
+instruction</a> such as return or branch.  This means that all control flow,
+<em>including fall throughs</em> must be made explicit in the LLVM IR.  If you
+violate this rule, the verifier will emit an error.
+
+<p>Finally, the CodeGen function returns the phi node as the value computed by
+the if/then/else expression.  In our example above, this returned value will
+feed into the code for the top-level function, which will create the return
+instruction.</p>
+
+<p>Overall, we now have the ability to execute conditional code in
+Kaleidoscope.  With this extension, Kaleidoscope is a fairly complete language
+that can calculate a wide variety of numeric functions.  Next up we'll add
+another useful expression that is familiar from non-functional languages...</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="for">'for' Loop Expression</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Now that we know how to add basic control flow constructs to the language,
+we have the tools to add more powerful things.  Lets add something more
+aggressive, a 'for' expression:</p>
+
+<div class="doc_code">
+<pre>
+ extern putchard(char);
+ def printstar(n)
+   for i = 1, i &lt; n, 1.0 in
+     putchard(42);  # ascii 42 = '*'
+
+ # print 100 '*' characters
+ printstar(100);
+</pre>
+</div>
+
+<p>This expression defines a new variable ("i" in this case) which iterates from
+a starting value, while the condition ("i &lt; n" in this case) is true,
+incrementing by an optional step value ("1.0" in this case).  If the step value
+is omitted, it defaults to 1.0.  While the loop is true, it executes its
+body expression.  Because we don't have anything better to return, we'll just
+define the loop as always returning 0.0.  In the future when we have mutable
+variables, it will get more useful.</p>
+
+<p>As before, lets talk about the changes that we need to Kaleidoscope to
+support this.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="forlexer">Lexer Extensions for
+the 'for' Loop</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>The lexer extensions are the same sort of thing as for if/then/else:</p>
+
+<div class="doc_code">
+<pre>
+  ... in Token.token ...
+  (* control *)
+  | If | Then | Else
+  <b>| For | In</b>
+
+  ... in Lexer.lex_ident...
+      match Buffer.contents buffer with
+      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
+      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
+      | "if" -&gt; [&lt; 'Token.If; stream &gt;]
+      | "then" -&gt; [&lt; 'Token.Then; stream &gt;]
+      | "else" -&gt; [&lt; 'Token.Else; stream &gt;]
+      <b>| "for" -&gt; [&lt; 'Token.For; stream &gt;]
+      | "in" -&gt; [&lt; 'Token.In; stream &gt;]</b>
+      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="forast">AST Extensions for
+the 'for' Loop</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>The AST variant is just as simple.  It basically boils down to capturing
+the variable name and the constituent expressions in the node.</p>
+
+<div class="doc_code">
+<pre>
+type expr =
+  ...
+  (* variant for for/in. *)
+  | For of string * expr * expr * expr option * expr
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="forparser">Parser Extensions for
+the 'for' Loop</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>The parser code is also fairly standard.  The only interesting thing here is
+handling of the optional step value.  The parser code handles it by checking to
+see if the second comma is present.  If not, it sets the step value to null in
+the AST node:</p>
+
+<div class="doc_code">
+<pre>
+let rec parse_primary = parser
+  ...
+  (* forexpr
+        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
+  | [&lt; 'Token.For;
+       'Token.Ident id ?? "expected identifier after for";
+       'Token.Kwd '=' ?? "expected '=' after for";
+       stream &gt;] -&gt;
+      begin parser
+        | [&lt;
+             start=parse_expr;
+             'Token.Kwd ',' ?? "expected ',' after for";
+             end_=parse_expr;
+             stream &gt;] -&gt;
+            let step =
+              begin parser
+              | [&lt; 'Token.Kwd ','; step=parse_expr &gt;] -&gt; Some step
+              | [&lt; &gt;] -&gt; None
+              end stream
+            in
+            begin parser
+            | [&lt; 'Token.In; body=parse_expr &gt;] -&gt;
+                Ast.For (id, start, end_, step, body)
+            | [&lt; &gt;] -&gt;
+                raise (Stream.Error "expected 'in' after for")
+            end stream
+        | [&lt; &gt;] -&gt;
+            raise (Stream.Error "expected '=' after for")
+      end stream
+</pre>
+</div>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="forir">LLVM IR for
+the 'for' Loop</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>Now we get to the good part: the LLVM IR we want to generate for this thing.
+With the simple example above, we get this LLVM IR (note that this dump is
+generated with optimizations disabled for clarity):
+</p>
+
+<div class="doc_code">
+<pre>
+declare double @putchard(double)
+
+define double @printstar(double %n) {
+entry:
+        ; initial value = 1.0 (inlined into phi)
+  br label %loop
+
+loop:    ; preds = %loop, %entry
+  %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ]
+        ; body
+  %calltmp = call double @putchard(double 4.200000e+01)
+        ; increment
+  %nextvar = fadd double %i, 1.000000e+00
+
+        ; termination test
+  %cmptmp = fcmp ult double %i, %n
+  %booltmp = uitofp i1 %cmptmp to double
+  %loopcond = fcmp one double %booltmp, 0.000000e+00
+  br i1 %loopcond, label %loop, label %afterloop
+
+afterloop:    ; preds = %loop
+        ; loop always returns 0.0
+  ret double 0.000000e+00
+}
+</pre>
+</div>
+
+<p>This loop contains all the same constructs we saw before: a phi node, several
+expressions, and some basic blocks.  Lets see how this fits together.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="forcodegen">Code Generation for
+the 'for' Loop</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>The first part of Codegen is very simple: we just output the start expression
+for the loop value:</p>
+
+<div class="doc_code">
+<pre>
+let rec codegen_expr = function
+  ...
+  | Ast.For (var_name, start, end_, step, body) -&gt;
+      (* Emit the start code first, without 'variable' in scope. *)
+      let start_val = codegen_expr start in
+</pre>
+</div>
+
+<p>With this out of the way, the next step is to set up the LLVM basic block
+for the start of the loop body.  In the case above, the whole loop body is one
+block, but remember that the body code itself could consist of multiple blocks
+(e.g. if it contains an if/then/else or a for/in expression).</p>
+
+<div class="doc_code">
+<pre>
+      (* Make the new basic block for the loop header, inserting after current
+       * block. *)
+      let preheader_bb = insertion_block builder in
+      let the_function = block_parent preheader_bb in
+      let loop_bb = append_block context "loop" the_function in
+
+      (* Insert an explicit fall through from the current block to the
+       * loop_bb. *)
+      ignore (build_br loop_bb builder);
+</pre>
+</div>
+
+<p>This code is similar to what we saw for if/then/else.  Because we will need
+it to create the Phi node, we remember the block that falls through into the
+loop.  Once we have that, we create the actual block that starts the loop and
+create an unconditional branch for the fall-through between the two blocks.</p>
+
+<div class="doc_code">
+<pre>
+      (* Start insertion in loop_bb. *)
+      position_at_end loop_bb builder;
+
+      (* Start the PHI node with an entry for start. *)
+      let variable = build_phi [(start_val, preheader_bb)] var_name builder in
+</pre>
+</div>
+
+<p>Now that the "preheader" for the loop is set up, we switch to emitting code
+for the loop body.  To begin with, we move the insertion point and create the
+PHI node for the loop induction variable.  Since we already know the incoming
+value for the starting value, we add it to the Phi node.  Note that the Phi will
+eventually get a second value for the backedge, but we can't set it up yet
+(because it doesn't exist!).</p>
+
+<div class="doc_code">
+<pre>
+      (* Within the loop, the variable is defined equal to the PHI node. If it
+       * shadows an existing variable, we have to restore it, so save it
+       * now. *)
+      let old_val =
+        try Some (Hashtbl.find named_values var_name) with Not_found -&gt; None
+      in
+      Hashtbl.add named_values var_name variable;
+
+      (* Emit the body of the loop.  This, like any other expr, can change the
+       * current BB.  Note that we ignore the value computed by the body, but
+       * don't allow an error *)
+      ignore (codegen_expr body);
+</pre>
+</div>
+
+<p>Now the code starts to get more interesting.  Our 'for' loop introduces a new
+variable to the symbol table.  This means that our symbol table can now contain
+either function arguments or loop variables.  To handle this, before we codegen
+the body of the loop, we add the loop variable as the current value for its
+name.  Note that it is possible that there is a variable of the same name in the
+outer scope.  It would be easy to make this an error (emit an error and return
+null if there is already an entry for VarName) but we choose to allow shadowing
+of variables.  In order to handle this correctly, we remember the Value that
+we are potentially shadowing in <tt>old_val</tt> (which will be None if there is
+no shadowed variable).</p>
+
+<p>Once the loop variable is set into the symbol table, the code recursively
+codegen's the body.  This allows the body to use the loop variable: any
+references to it will naturally find it in the symbol table.</p>
+
+<div class="doc_code">
+<pre>
+      (* Emit the step value. *)
+      let step_val =
+        match step with
+        | Some step -&gt; codegen_expr step
+        (* If not specified, use 1.0. *)
+        | None -&gt; const_float double_type 1.0
+      in
+
+      let next_var = build_add variable step_val "nextvar" builder in
+</pre>
+</div>
+
+<p>Now that the body is emitted, we compute the next value of the iteration
+variable by adding the step value, or 1.0 if it isn't present.
+'<tt>next_var</tt>' will be the value of the loop variable on the next iteration
+of the loop.</p>
+
+<div class="doc_code">
+<pre>
+      (* Compute the end condition. *)
+      let end_cond = codegen_expr end_ in
+
+      (* Convert condition to a bool by comparing equal to 0.0. *)
+      let zero = const_float double_type 0.0 in
+      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
+</pre>
+</div>
+
+<p>Finally, we evaluate the exit value of the loop, to determine whether the
+loop should exit.  This mirrors the condition evaluation for the if/then/else
+statement.</p>
+
+<div class="doc_code">
+<pre>
+      (* Create the "after loop" block and insert it. *)
+      let loop_end_bb = insertion_block builder in
+      let after_bb = append_block context "afterloop" the_function in
+
+      (* Insert the conditional branch into the end of loop_end_bb. *)
+      ignore (build_cond_br end_cond loop_bb after_bb builder);
+
+      (* Any new code will be inserted in after_bb. *)
+      position_at_end after_bb builder;
+</pre>
+</div>
+
+<p>With the code for the body of the loop complete, we just need to finish up
+the control flow for it.  This code remembers the end block (for the phi node), then creates the block for the loop exit ("afterloop").  Based on the value of the
+exit condition, it creates a conditional branch that chooses between executing
+the loop again and exiting the loop.  Any future code is emitted in the
+"afterloop" block, so it sets the insertion position to it.</p>
+
+<div class="doc_code">
+<pre>
+      (* Add a new entry to the PHI node for the backedge. *)
+      add_incoming (next_var, loop_end_bb) variable;
+
+      (* Restore the unshadowed variable. *)
+      begin match old_val with
+      | Some old_val -&gt; Hashtbl.add named_values var_name old_val
+      | None -&gt; ()
+      end;
+
+      (* for expr always returns 0.0. *)
+      const_null double_type
+</pre>
+</div>
+
+<p>The final code handles various cleanups: now that we have the
+"<tt>next_var</tt>" value, we can add the incoming value to the loop PHI node.
+After that, we remove the loop variable from the symbol table, so that it isn't
+in scope after the for loop.  Finally, code generation of the for loop always
+returns 0.0, so that is what we return from <tt>Codegen.codegen_expr</tt>.</p>
+
+<p>With this, we conclude the "adding control flow to Kaleidoscope" chapter of
+the tutorial.  In this chapter we added two control flow constructs, and used
+them to motivate a couple of aspects of the LLVM IR that are important for
+front-end implementors to know.  In the next chapter of our saga, we will get
+a bit crazier and add <a href="OCamlLangImpl6.html">user-defined operators</a>
+to our poor innocent language.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Here is the complete code listing for our running example, enhanced with the
+if/then/else and for expressions..  To build this example, use:
+</p>
+
+<div class="doc_code">
+<pre>
+# Compile
+ocamlbuild toy.byte
+# Run
+./toy.byte
+</pre>
+</div>
+
+<p>Here is the code:</p>
+
+<dl>
+<dt>_tags:</dt>
+<dd class="doc_code">
+<pre>
+&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
+&lt;*.{byte,native}&gt;: g++, use_llvm, use_llvm_analysis
+&lt;*.{byte,native}&gt;: use_llvm_executionengine, use_llvm_target
+&lt;*.{byte,native}&gt;: use_llvm_scalar_opts, use_bindings
+</pre>
+</dd>
+
+<dt>myocamlbuild.ml:</dt>
+<dd class="doc_code">
+<pre>
+open Ocamlbuild_plugin;;
+
+ocaml_lib ~extern:true "llvm";;
+ocaml_lib ~extern:true "llvm_analysis";;
+ocaml_lib ~extern:true "llvm_executionengine";;
+ocaml_lib ~extern:true "llvm_target";;
+ocaml_lib ~extern:true "llvm_scalar_opts";;
+
+flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
+dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
+</pre>
+</dd>
+
+<dt>token.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Lexer Tokens
+ *===----------------------------------------------------------------------===*)
+
+(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+ * these others for known things. *)
+type token =
+  (* commands *)
+  | Def | Extern
+
+  (* primary *)
+  | Ident of string | Number of float
+
+  (* unknown *)
+  | Kwd of char
+
+  (* control *)
+  | If | Then | Else
+  | For | In
+</pre>
+</dd>
+
+<dt>lexer.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Lexer
+ *===----------------------------------------------------------------------===*)
+
+let rec lex = parser
+  (* Skip any whitespace. *)
+  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
+
+  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+
+  (* number: [0-9.]+ *)
+  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+
+  (* Comment until end of line. *)
+  | [&lt; ' ('#'); stream &gt;] -&gt;
+      lex_comment stream
+
+  (* Otherwise, just return the character as its ascii value. *)
+  | [&lt; 'c; stream &gt;] -&gt;
+      [&lt; 'Token.Kwd c; lex stream &gt;]
+
+  (* end of stream. *)
+  | [&lt; &gt;] -&gt; [&lt; &gt;]
+
+and lex_number buffer = parser
+  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+  | [&lt; stream=lex &gt;] -&gt;
+      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
+
+and lex_ident buffer = parser
+  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+  | [&lt; stream=lex &gt;] -&gt;
+      match Buffer.contents buffer with
+      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
+      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
+      | "if" -&gt; [&lt; 'Token.If; stream &gt;]
+      | "then" -&gt; [&lt; 'Token.Then; stream &gt;]
+      | "else" -&gt; [&lt; 'Token.Else; stream &gt;]
+      | "for" -&gt; [&lt; 'Token.For; stream &gt;]
+      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
+      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
+
+and lex_comment = parser
+  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
+  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
+  | [&lt; &gt;] -&gt; [&lt; &gt;]
+</pre>
+</dd>
+
+<dt>ast.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Abstract Syntax Tree (aka Parse Tree)
+ *===----------------------------------------------------------------------===*)
+
+(* expr - Base type for all expression nodes. *)
+type expr =
+  (* variant for numeric literals like "1.0". *)
+  | Number of float
+
+  (* variant for referencing a variable, like "a". *)
+  | Variable of string
+
+  (* variant for a binary operator. *)
+  | Binary of char * expr * expr
+
+  (* variant for function calls. *)
+  | Call of string * expr array
+
+  (* variant for if/then/else. *)
+  | If of expr * expr * expr
+
+  (* variant for for/in. *)
+  | For of string * expr * expr * expr option * expr
+
+(* proto - This type represents the "prototype" for a function, which captures
+ * its name, and its argument names (thus implicitly the number of arguments the
+ * function takes). *)
+type proto = Prototype of string * string array
+
+(* func - This type represents a function definition itself. *)
+type func = Function of proto * expr
+</pre>
+</dd>
+
+<dt>parser.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===---------------------------------------------------------------------===
+ * Parser
+ *===---------------------------------------------------------------------===*)
+
+(* binop_precedence - This holds the precedence for each binary operator that is
+ * defined *)
+let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+(* precedence - Get the precedence of the pending binary operator token. *)
+let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
+
+(* primary
+ *   ::= identifier
+ *   ::= numberexpr
+ *   ::= parenexpr
+ *   ::= ifexpr
+ *   ::= forexpr *)
+let rec parse_primary = parser
+  (* numberexpr ::= number *)
+  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
+
+  (* parenexpr ::= '(' expression ')' *)
+  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
+
+  (* identifierexpr
+   *   ::= identifier
+   *   ::= identifier '(' argumentexpr ')' *)
+  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
+      let rec parse_args accumulator = parser
+        | [&lt; e=parse_expr; stream &gt;] -&gt;
+            begin parser
+              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
+              | [&lt; &gt;] -&gt; e :: accumulator
+            end stream
+        | [&lt; &gt;] -&gt; accumulator
+      in
+      let rec parse_ident id = parser
+        (* Call. *)
+        | [&lt; 'Token.Kwd '(';
+             args=parse_args [];
+             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
+            Ast.Call (id, Array.of_list (List.rev args))
+
+        (* Simple variable ref. *)
+        | [&lt; &gt;] -&gt; Ast.Variable id
+      in
+      parse_ident id stream
+
+  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+  | [&lt; 'Token.If; c=parse_expr;
+       'Token.Then ?? "expected 'then'"; t=parse_expr;
+       'Token.Else ?? "expected 'else'"; e=parse_expr &gt;] -&gt;
+      Ast.If (c, t, e)
+
+  (* forexpr
+        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
+  | [&lt; 'Token.For;
+       'Token.Ident id ?? "expected identifier after for";
+       'Token.Kwd '=' ?? "expected '=' after for";
+       stream &gt;] -&gt;
+      begin parser
+        | [&lt;
+             start=parse_expr;
+             'Token.Kwd ',' ?? "expected ',' after for";
+             end_=parse_expr;
+             stream &gt;] -&gt;
+            let step =
+              begin parser
+              | [&lt; 'Token.Kwd ','; step=parse_expr &gt;] -&gt; Some step
+              | [&lt; &gt;] -&gt; None
+              end stream
+            in
+            begin parser
+            | [&lt; 'Token.In; body=parse_expr &gt;] -&gt;
+                Ast.For (id, start, end_, step, body)
+            | [&lt; &gt;] -&gt;
+                raise (Stream.Error "expected 'in' after for")
+            end stream
+        | [&lt; &gt;] -&gt;
+            raise (Stream.Error "expected '=' after for")
+      end stream
+
+  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
+
+(* binoprhs
+ *   ::= ('+' primary)* *)
+and parse_bin_rhs expr_prec lhs stream =
+  match Stream.peek stream with
+  (* If this is a binop, find its precedence. *)
+  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
+      let token_prec = precedence c in
+
+      (* If this is a binop that binds at least as tightly as the current binop,
+       * consume it, otherwise we are done. *)
+      if token_prec &lt; expr_prec then lhs else begin
+        (* Eat the binop. *)
+        Stream.junk stream;
+
+        (* Parse the primary expression after the binary operator. *)
+        let rhs = parse_primary stream in
+
+        (* Okay, we know this is a binop. *)
+        let rhs =
+          match Stream.peek stream with
+          | Some (Token.Kwd c2) -&gt;
+              (* If BinOp binds less tightly with rhs than the operator after
+               * rhs, let the pending operator take rhs as its lhs. *)
+              let next_prec = precedence c2 in
+              if token_prec &lt; next_prec
+              then parse_bin_rhs (token_prec + 1) rhs stream
+              else rhs
+          | _ -&gt; rhs
+        in
+
+        (* Merge lhs/rhs. *)
+        let lhs = Ast.Binary (c, lhs, rhs) in
+        parse_bin_rhs expr_prec lhs stream
+      end
+  | _ -&gt; lhs
+
+(* expression
+ *   ::= primary binoprhs *)
+and parse_expr = parser
+  | [&lt; lhs=parse_primary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
+
+(* prototype
+ *   ::= id '(' id* ')' *)
+let parse_prototype =
+  let rec parse_args accumulator = parser
+    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
+    | [&lt; &gt;] -&gt; accumulator
+  in
+
+  parser
+  | [&lt; 'Token.Ident id;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+       args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
+      (* success. *)
+      Ast.Prototype (id, Array.of_list (List.rev args))
+
+  | [&lt; &gt;] -&gt;
+      raise (Stream.Error "expected function name in prototype")
+
+(* definition ::= 'def' prototype expression *)
+let parse_definition = parser
+  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
+      Ast.Function (p, e)
+
+(* toplevelexpr ::= expression *)
+let parse_toplevel = parser
+  | [&lt; e=parse_expr &gt;] -&gt;
+      (* Make an anonymous proto. *)
+      Ast.Function (Ast.Prototype ("", [||]), e)
+
+(*  external ::= 'extern' prototype *)
+let parse_extern = parser
+  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
+</pre>
+</dd>
+
+<dt>codegen.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Code Generation
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+
+exception Error of string
+
+let context = global_context ()
+let the_module = create_module context "my cool jit"
+let builder = builder context
+let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+let double_type = double_type context
+
+let rec codegen_expr = function
+  | Ast.Number n -&gt; const_float double_type n
+  | Ast.Variable name -&gt;
+      (try Hashtbl.find named_values name with
+        | Not_found -&gt; raise (Error "unknown variable name"))
+  | Ast.Binary (op, lhs, rhs) -&gt;
+      let lhs_val = codegen_expr lhs in
+      let rhs_val = codegen_expr rhs in
+      begin
+        match op with
+        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
+        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
+        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
+        | '&lt;' -&gt;
+            (* Convert bool 0/1 to double 0.0 or 1.0 *)
+            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+            build_uitofp i double_type "booltmp" builder
+        | _ -&gt; raise (Error "invalid binary operator")
+      end
+  | Ast.Call (callee, args) -&gt;
+      (* Look up the name in the module table. *)
+      let callee =
+        match lookup_function callee the_module with
+        | Some callee -&gt; callee
+        | None -&gt; raise (Error "unknown function referenced")
+      in
+      let params = params callee in
+
+      (* If argument mismatch error. *)
+      if Array.length params == Array.length args then () else
+        raise (Error "incorrect # arguments passed");
+      let args = Array.map codegen_expr args in
+      build_call callee args "calltmp" builder
+  | Ast.If (cond, then_, else_) -&gt;
+      let cond = codegen_expr cond in
+
+      (* Convert condition to a bool by comparing equal to 0.0 *)
+      let zero = const_float double_type 0.0 in
+      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
+
+      (* Grab the first block so that we might later add the conditional branch
+       * to it at the end of the function. *)
+      let start_bb = insertion_block builder in
+      let the_function = block_parent start_bb in
+
+      let then_bb = append_block context "then" the_function in
+
+      (* Emit 'then' value. *)
+      position_at_end then_bb builder;
+      let then_val = codegen_expr then_ in
+
+      (* Codegen of 'then' can change the current block, update then_bb for the
+       * phi. We create a new name because one is used for the phi node, and the
+       * other is used for the conditional branch. *)
+      let new_then_bb = insertion_block builder in
+
+      (* Emit 'else' value. *)
+      let else_bb = append_block context "else" the_function in
+      position_at_end else_bb builder;
+      let else_val = codegen_expr else_ in
+
+      (* Codegen of 'else' can change the current block, update else_bb for the
+       * phi. *)
+      let new_else_bb = insertion_block builder in
+
+      (* Emit merge block. *)
+      let merge_bb = append_block context "ifcont" the_function in
+      position_at_end merge_bb builder;
+      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
+      let phi = build_phi incoming "iftmp" builder in
+
+      (* Return to the start block to add the conditional branch. *)
+      position_at_end start_bb builder;
+      ignore (build_cond_br cond_val then_bb else_bb builder);
+
+      (* Set a unconditional branch at the end of the 'then' block and the
+       * 'else' block to the 'merge' block. *)
+      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
+      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
+
+      (* Finally, set the builder to the end of the merge block. *)
+      position_at_end merge_bb builder;
+
+      phi
+  | Ast.For (var_name, start, end_, step, body) -&gt;
+      (* Emit the start code first, without 'variable' in scope. *)
+      let start_val = codegen_expr start in
+
+      (* Make the new basic block for the loop header, inserting after current
+       * block. *)
+      let preheader_bb = insertion_block builder in
+      let the_function = block_parent preheader_bb in
+      let loop_bb = append_block context "loop" the_function in
+
+      (* Insert an explicit fall through from the current block to the
+       * loop_bb. *)
+      ignore (build_br loop_bb builder);
+
+      (* Start insertion in loop_bb. *)
+      position_at_end loop_bb builder;
+
+      (* Start the PHI node with an entry for start. *)
+      let variable = build_phi [(start_val, preheader_bb)] var_name builder in
+
+      (* Within the loop, the variable is defined equal to the PHI node. If it
+       * shadows an existing variable, we have to restore it, so save it
+       * now. *)
+      let old_val =
+        try Some (Hashtbl.find named_values var_name) with Not_found -&gt; None
+      in
+      Hashtbl.add named_values var_name variable;
+
+      (* Emit the body of the loop.  This, like any other expr, can change the
+       * current BB.  Note that we ignore the value computed by the body, but
+       * don't allow an error *)
+      ignore (codegen_expr body);
+
+      (* Emit the step value. *)
+      let step_val =
+        match step with
+        | Some step -&gt; codegen_expr step
+        (* If not specified, use 1.0. *)
+        | None -&gt; const_float double_type 1.0
+      in
+
+      let next_var = build_add variable step_val "nextvar" builder in
+
+      (* Compute the end condition. *)
+      let end_cond = codegen_expr end_ in
+
+      (* Convert condition to a bool by comparing equal to 0.0. *)
+      let zero = const_float double_type 0.0 in
+      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
+
+      (* Create the "after loop" block and insert it. *)
+      let loop_end_bb = insertion_block builder in
+      let after_bb = append_block context "afterloop" the_function in
+
+      (* Insert the conditional branch into the end of loop_end_bb. *)
+      ignore (build_cond_br end_cond loop_bb after_bb builder);
+
+      (* Any new code will be inserted in after_bb. *)
+      position_at_end after_bb builder;
+
+      (* Add a new entry to the PHI node for the backedge. *)
+      add_incoming (next_var, loop_end_bb) variable;
+
+      (* Restore the unshadowed variable. *)
+      begin match old_val with
+      | Some old_val -&gt; Hashtbl.add named_values var_name old_val
+      | None -&gt; ()
+      end;
+
+      (* for expr always returns 0.0. *)
+      const_null double_type
+
+let codegen_proto = function
+  | Ast.Prototype (name, args) -&gt;
+      (* Make the function type: double(double,double) etc. *)
+      let doubles = Array.make (Array.length args) double_type in
+      let ft = function_type double_type doubles in
+      let f =
+        match lookup_function name the_module with
+        | None -&gt; declare_function name ft the_module
+
+        (* If 'f' conflicted, there was already something named 'name'. If it
+         * has a body, don't allow redefinition or reextern. *)
+        | Some f -&gt;
+            (* If 'f' already has a body, reject this. *)
+            if block_begin f &lt;&gt; At_end f then
+              raise (Error "redefinition of function");
+
+            (* If 'f' took a different number of arguments, reject. *)
+            if element_type (type_of f) &lt;&gt; ft then
+              raise (Error "redefinition of function with different # args");
+            f
+      in
+
+      (* Set names for all arguments. *)
+      Array.iteri (fun i a -&gt;
+        let n = args.(i) in
+        set_value_name n a;
+        Hashtbl.add named_values n a;
+      ) (params f);
+      f
+
+let codegen_func the_fpm = function
+  | Ast.Function (proto, body) -&gt;
+      Hashtbl.clear named_values;
+      let the_function = codegen_proto proto in
+
+      (* Create a new basic block to start insertion into. *)
+      let bb = append_block context "entry" the_function in
+      position_at_end bb builder;
+
+      try
+        let ret_val = codegen_expr body in
+
+        (* Finish off the function. *)
+        let _ = build_ret ret_val builder in
+
+        (* Validate the generated code, checking for consistency. *)
+        Llvm_analysis.assert_valid_function the_function;
+
+        (* Optimize the function. *)
+        let _ = PassManager.run_function the_function the_fpm in
+
+        the_function
+      with e -&gt;
+        delete_function the_function;
+        raise e
+</pre>
+</dd>
+
+<dt>toplevel.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Top-Level parsing and JIT Driver
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+open Llvm_executionengine
+
+(* top ::= definition | external | expression | ';' *)
+let rec main_loop the_fpm the_execution_engine stream =
+  match Stream.peek stream with
+  | None -&gt; ()
+
+  (* ignore top-level semicolons. *)
+  | Some (Token.Kwd ';') -&gt;
+      Stream.junk stream;
+      main_loop the_fpm the_execution_engine stream
+
+  | Some token -&gt;
+      begin
+        try match token with
+        | Token.Def -&gt;
+            let e = Parser.parse_definition stream in
+            print_endline "parsed a function definition.";
+            dump_value (Codegen.codegen_func the_fpm e);
+        | Token.Extern -&gt;
+            let e = Parser.parse_extern stream in
+            print_endline "parsed an extern.";
+            dump_value (Codegen.codegen_proto e);
+        | _ -&gt;
+            (* Evaluate a top-level expression into an anonymous function. *)
+            let e = Parser.parse_toplevel stream in
+            print_endline "parsed a top-level expr";
+            let the_function = Codegen.codegen_func the_fpm e in
+            dump_value the_function;
+
+            (* JIT the function, returning a function pointer. *)
+            let result = ExecutionEngine.run_function the_function [||]
+              the_execution_engine in
+
+            print_string "Evaluated to ";
+            print_float (GenericValue.as_float Codegen.double_type result);
+            print_newline ();
+        with Stream.Error s | Codegen.Error s -&gt;
+          (* Skip token for error recovery. *)
+          Stream.junk stream;
+          print_endline s;
+      end;
+      print_string "ready&gt; "; flush stdout;
+      main_loop the_fpm the_execution_engine stream
+</pre>
+</dd>
+
+<dt>toy.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Main driver code.
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+open Llvm_executionengine
+open Llvm_target
+open Llvm_scalar_opts
+
+let main () =
+  ignore (initialize_native_target ());
+
+  (* Install standard binary operators.
+   * 1 is the lowest precedence. *)
+  Hashtbl.add Parser.binop_precedence '&lt;' 10;
+  Hashtbl.add Parser.binop_precedence '+' 20;
+  Hashtbl.add Parser.binop_precedence '-' 20;
+  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+  (* Prime the first token. *)
+  print_string "ready&gt; "; flush stdout;
+  let stream = Lexer.lex (Stream.of_channel stdin) in
+
+  (* Create the JIT. *)
+  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+  let the_fpm = PassManager.create_function Codegen.the_module in
+
+  (* Set up the optimizer pipeline.  Start with registering info about how the
+   * target lays out data structures. *)
+  TargetData.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+  add_instruction_combination the_fpm;
+
+  (* reassociate expressions. *)
+  add_reassociation the_fpm;
+
+  (* Eliminate Common SubExpressions. *)
+  add_gvn the_fpm;
+
+  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+  add_cfg_simplification the_fpm;
+
+  ignore (PassManager.initialize the_fpm);
+
+  (* Run the main "interpreter loop" now. *)
+  Toplevel.main_loop the_fpm the_execution_engine stream;
+
+  (* Print out all the generated code. *)
+  dump_module Codegen.the_module
+;;
+
+main ()
+</pre>
+</dd>
+
+<dt>bindings.c</dt>
+<dd class="doc_code">
+<pre>
+#include &lt;stdio.h&gt;
+
+/* putchard - putchar that takes a double and returns 0. */
+extern double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+</pre>
+</dd>
+</dl>
+
+<a href="OCamlLangImpl6.html">Next: Extending the language: user-defined
+operators</a>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/tutorial/OCamlLangImpl6.html b/final/docs/tutorial/OCamlLangImpl6.html
new file mode 100644
index 00000000000..86210febca2
--- /dev/null
+++ b/final/docs/tutorial/OCamlLangImpl6.html
@@ -0,0 +1,1574 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Extending the Language: User-defined Operators</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <meta name="author" content="Erick Tryzelaar">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Extending the Language: User-defined Operators</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 6
+  <ol>
+    <li><a href="#intro">Chapter 6 Introduction</a></li>
+    <li><a href="#idea">User-defined Operators: the Idea</a></li>
+    <li><a href="#binary">User-defined Binary Operators</a></li>
+    <li><a href="#unary">User-defined Unary Operators</a></li>
+    <li><a href="#example">Kicking the Tires</a></li>
+    <li><a href="#code">Full Code Listing</a></li>
+  </ol>
+</li>
+<li><a href="OCamlLangImpl7.html">Chapter 7</a>: Extending the Language: Mutable
+Variables / SSA Construction</li>
+</ul>
+
+<div class="doc_author">
+	<p>
+		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
+		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
+	</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="intro">Chapter 6 Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to Chapter 6 of the "<a href="index.html">Implementing a language
+with LLVM</a>" tutorial.  At this point in our tutorial, we now have a fully
+functional language that is fairly minimal, but also useful.  There
+is still one big problem with it, however. Our language doesn't have many
+useful operators (like division, logical negation, or even any comparisons
+besides less-than).</p>
+
+<p>This chapter of the tutorial takes a wild digression into adding user-defined
+operators to the simple and beautiful Kaleidoscope language. This digression now
+gives us a simple and ugly language in some ways, but also a powerful one at the
+same time.  One of the great things about creating your own language is that you
+get to decide what is good or bad.  In this tutorial we'll assume that it is
+okay to use this as a way to show some interesting parsing techniques.</p>
+
+<p>At the end of this tutorial, we'll run through an example Kaleidoscope
+application that <a href="#example">renders the Mandelbrot set</a>.  This gives
+an example of what you can build with Kaleidoscope and its feature set.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="idea">User-defined Operators: the Idea</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+The "operator overloading" that we will add to Kaleidoscope is more general than
+languages like C++.  In C++, you are only allowed to redefine existing
+operators: you can't programatically change the grammar, introduce new
+operators, change precedence levels, etc.  In this chapter, we will add this
+capability to Kaleidoscope, which will let the user round out the set of
+operators that are supported.</p>
+
+<p>The point of going into user-defined operators in a tutorial like this is to
+show the power and flexibility of using a hand-written parser.  Thus far, the parser
+we have been implementing uses recursive descent for most parts of the grammar and
+operator precedence parsing for the expressions.  See <a
+href="OCamlLangImpl2.html">Chapter 2</a> for details.  Without using operator
+precedence parsing, it would be very difficult to allow the programmer to
+introduce new operators into the grammar: the grammar is dynamically extensible
+as the JIT runs.</p>
+
+<p>The two specific features we'll add are programmable unary operators (right
+now, Kaleidoscope has no unary operators at all) as well as binary operators.
+An example of this is:</p>
+
+<div class="doc_code">
+<pre>
+# Logical unary not.
+def unary!(v)
+  if v then
+    0
+  else
+    1;
+
+# Define &gt; with the same precedence as &lt;.
+def binary&gt; 10 (LHS RHS)
+  RHS &lt; LHS;
+
+# Binary "logical or", (note that it does not "short circuit")
+def binary| 5 (LHS RHS)
+  if LHS then
+    1
+  else if RHS then
+    1
+  else
+    0;
+
+# Define = with slightly lower precedence than relationals.
+def binary= 9 (LHS RHS)
+  !(LHS &lt; RHS | LHS &gt; RHS);
+</pre>
+</div>
+
+<p>Many languages aspire to being able to implement their standard runtime
+library in the language itself.  In Kaleidoscope, we can implement significant
+parts of the language in the library!</p>
+
+<p>We will break down implementation of these features into two parts:
+implementing support for user-defined binary operators and adding unary
+operators.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="binary">User-defined Binary Operators</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Adding support for user-defined binary operators is pretty simple with our
+current framework.  We'll first add support for the unary/binary keywords:</p>
+
+<div class="doc_code">
+<pre>
+type token =
+  ...
+  <b>(* operators *)
+  | Binary | Unary</b>
+
+...
+
+and lex_ident buffer = parser
+  ...
+      | "for" -&gt; [&lt; 'Token.For; stream &gt;]
+      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
+      <b>| "binary" -&gt; [&lt; 'Token.Binary; stream &gt;]
+      | "unary" -&gt; [&lt; 'Token.Unary; stream &gt;]</b>
+</pre>
+</div>
+
+<p>This just adds lexer support for the unary and binary keywords, like we
+did in <a href="OCamlLangImpl5.html#iflexer">previous chapters</a>.  One nice
+thing about our current AST, is that we represent binary operators with full
+generalisation by using their ASCII code as the opcode.  For our extended
+operators, we'll use this same representation, so we don't need any new AST or
+parser support.</p>
+
+<p>On the other hand, we have to be able to represent the definitions of these
+new operators, in the "def binary| 5" part of the function definition.  In our
+grammar so far, the "name" for the function definition is parsed as the
+"prototype" production and into the <tt>Ast.Prototype</tt> AST node.  To
+represent our new user-defined operators as prototypes, we have to extend
+the  <tt>Ast.Prototype</tt> AST node like this:</p>
+
+<div class="doc_code">
+<pre>
+(* proto - This type represents the "prototype" for a function, which captures
+ * its name, and its argument names (thus implicitly the number of arguments the
+ * function takes). *)
+type proto =
+  | Prototype of string * string array
+  <b>| BinOpPrototype of string * string array * int</b>
+</pre>
+</div>
+
+<p>Basically, in addition to knowing a name for the prototype, we now keep track
+of whether it was an operator, and if it was, what precedence level the operator
+is at.  The precedence is only used for binary operators (as you'll see below,
+it just doesn't apply for unary operators).  Now that we have a way to represent
+the prototype for a user-defined operator, we need to parse it:</p>
+
+<div class="doc_code">
+<pre>
+(* prototype
+ *   ::= id '(' id* ')'
+ <b>*   ::= binary LETTER number? (id, id)
+ *   ::= unary LETTER number? (id) *)</b>
+let parse_prototype =
+  let rec parse_args accumulator = parser
+    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
+    | [&lt; &gt;] -&gt; accumulator
+  in
+  let parse_operator = parser
+    | [&lt; 'Token.Unary &gt;] -&gt; "unary", 1
+    | [&lt; 'Token.Binary &gt;] -&gt; "binary", 2
+  in
+  let parse_binary_precedence = parser
+    | [&lt; 'Token.Number n &gt;] -&gt; int_of_float n
+    | [&lt; &gt;] -&gt; 30
+  in
+  parser
+  | [&lt; 'Token.Ident id;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+       args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
+      (* success. *)
+      Ast.Prototype (id, Array.of_list (List.rev args))
+  <b>| [&lt; (prefix, kind)=parse_operator;
+       'Token.Kwd op ?? "expected an operator";
+       (* Read the precedence if present. *)
+       binary_precedence=parse_binary_precedence;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+        args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
+      let name = prefix ^ (String.make 1 op) in
+      let args = Array.of_list (List.rev args) in
+
+      (* Verify right number of arguments for operator. *)
+      if Array.length args != kind
+      then raise (Stream.Error "invalid number of operands for operator")
+      else
+        if kind == 1 then
+          Ast.Prototype (name, args)
+        else
+          Ast.BinOpPrototype (name, args, binary_precedence)</b>
+  | [&lt; &gt;] -&gt;
+      raise (Stream.Error "expected function name in prototype")
+</pre>
+</div>
+
+<p>This is all fairly straightforward parsing code, and we have already seen
+a lot of similar code in the past.  One interesting part about the code above is
+the couple lines that set up <tt>name</tt> for binary operators.  This builds
+names like "binary@" for a newly defined "@" operator.  This then takes
+advantage of the fact that symbol names in the LLVM symbol table are allowed to
+have any character in them, including embedded nul characters.</p>
+
+<p>The next interesting thing to add, is codegen support for these binary
+operators.  Given our current structure, this is a simple addition of a default
+case for our existing binary operator node:</p>
+
+<div class="doc_code">
+<pre>
+let codegen_expr = function
+  ...
+  | Ast.Binary (op, lhs, rhs) -&gt;
+      let lhs_val = codegen_expr lhs in
+      let rhs_val = codegen_expr rhs in
+      begin
+        match op with
+        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
+        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
+        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
+        | '&lt;' -&gt;
+            (* Convert bool 0/1 to double 0.0 or 1.0 *)
+            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+            build_uitofp i double_type "booltmp" builder
+        <b>| _ -&gt;
+            (* If it wasn't a builtin binary operator, it must be a user defined
+             * one. Emit a call to it. *)
+            let callee = "binary" ^ (String.make 1 op) in
+            let callee =
+              match lookup_function callee the_module with
+              | Some callee -&gt; callee
+              | None -&gt; raise (Error "binary operator not found!")
+            in
+            build_call callee [|lhs_val; rhs_val|] "binop" builder</b>
+      end
+</pre>
+</div>
+
+<p>As you can see above, the new code is actually really simple.  It just does
+a lookup for the appropriate operator in the symbol table and generates a
+function call to it.  Since user-defined operators are just built as normal
+functions (because the "prototype" boils down to a function with the right
+name) everything falls into place.</p>
+
+<p>The final piece of code we are missing, is a bit of top level magic:</p>
+
+<div class="doc_code">
+<pre>
+let codegen_func the_fpm = function
+  | Ast.Function (proto, body) -&gt;
+      Hashtbl.clear named_values;
+      let the_function = codegen_proto proto in
+
+      <b>(* If this is an operator, install it. *)
+      begin match proto with
+      | Ast.BinOpPrototype (name, args, prec) -&gt;
+          let op = name.[String.length name - 1] in
+          Hashtbl.add Parser.binop_precedence op prec;
+      | _ -&gt; ()
+      end;</b>
+
+      (* Create a new basic block to start insertion into. *)
+      let bb = append_block context "entry" the_function in
+      position_at_end bb builder;
+      ...
+</pre>
+</div>
+
+<p>Basically, before codegening a function, if it is a user-defined operator, we
+register it in the precedence table.  This allows the binary operator parsing
+logic we already have in place to handle it.  Since we are working on a
+fully-general operator precedence parser, this is all we need to do to "extend
+the grammar".</p>
+
+<p>Now we have useful user-defined binary operators.  This builds a lot
+on the previous framework we built for other operators.  Adding unary operators
+is a bit more challenging, because we don't have any framework for it yet - lets
+see what it takes.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="unary">User-defined Unary Operators</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Since we don't currently support unary operators in the Kaleidoscope
+language, we'll need to add everything to support them.  Above, we added simple
+support for the 'unary' keyword to the lexer.  In addition to that, we need an
+AST node:</p>
+
+<div class="doc_code">
+<pre>
+type expr =
+  ...
+  (* variant for a unary operator. *)
+  | Unary of char * expr
+  ...
+</pre>
+</div>
+
+<p>This AST node is very simple and obvious by now.  It directly mirrors the
+binary operator AST node, except that it only has one child.  With this, we
+need to add the parsing logic.  Parsing a unary operator is pretty simple: we'll
+add a new function to do it:</p>
+
+<div class="doc_code">
+<pre>
+(* unary
+ *   ::= primary
+ *   ::= '!' unary *)
+and parse_unary = parser
+  (* If this is a unary operator, read it. *)
+  | [&lt; 'Token.Kwd op when op != '(' &amp;&amp; op != ')'; operand=parse_expr &gt;] -&gt;
+      Ast.Unary (op, operand)
+
+  (* If the current token is not an operator, it must be a primary expr. *)
+  | [&lt; stream &gt;] -&gt; parse_primary stream
+</pre>
+</div>
+
+<p>The grammar we add is pretty straightforward here.  If we see a unary
+operator when parsing a primary operator, we eat the operator as a prefix and
+parse the remaining piece as another unary operator.  This allows us to handle
+multiple unary operators (e.g. "!!x").  Note that unary operators can't have
+ambiguous parses like binary operators can, so there is no need for precedence
+information.</p>
+
+<p>The problem with this function, is that we need to call ParseUnary from
+somewhere.  To do this, we change previous callers of ParsePrimary to call
+<tt>parse_unary</tt> instead:</p>
+
+<div class="doc_code">
+<pre>
+(* binoprhs
+ *   ::= ('+' primary)* *)
+and parse_bin_rhs expr_prec lhs stream =
+        ...
+        <b>(* Parse the unary expression after the binary operator. *)
+        let rhs = parse_unary stream in</b>
+        ...
+
+...
+
+(* expression
+ *   ::= primary binoprhs *)
+and parse_expr = parser
+  | [&lt; lhs=<b>parse_unary</b>; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
+</pre>
+</div>
+
+<p>With these two simple changes, we are now able to parse unary operators and build the
+AST for them.  Next up, we need to add parser support for prototypes, to parse
+the unary operator prototype.  We extend the binary operator code above
+with:</p>
+
+<div class="doc_code">
+<pre>
+(* prototype
+ *   ::= id '(' id* ')'
+ *   ::= binary LETTER number? (id, id)
+ <b>*   ::= unary LETTER number? (id)</b> *)
+let parse_prototype =
+  let rec parse_args accumulator = parser
+    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
+    | [&lt; &gt;] -&gt; accumulator
+  in
+  <b>let parse_operator = parser
+    | [&lt; 'Token.Unary &gt;] -&gt; "unary", 1
+    | [&lt; 'Token.Binary &gt;] -&gt; "binary", 2
+  in</b>
+  let parse_binary_precedence = parser
+    | [&lt; 'Token.Number n &gt;] -&gt; int_of_float n
+    | [&lt; &gt;] -&gt; 30
+  in
+  parser
+  | [&lt; 'Token.Ident id;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+       args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
+      (* success. *)
+      Ast.Prototype (id, Array.of_list (List.rev args))
+  <b>| [&lt; (prefix, kind)=parse_operator;
+       'Token.Kwd op ?? "expected an operator";
+       (* Read the precedence if present. *)
+       binary_precedence=parse_binary_precedence;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+        args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
+      let name = prefix ^ (String.make 1 op) in
+      let args = Array.of_list (List.rev args) in
+
+      (* Verify right number of arguments for operator. *)
+      if Array.length args != kind
+      then raise (Stream.Error "invalid number of operands for operator")
+      else
+        if kind == 1 then
+          Ast.Prototype (name, args)
+        else
+          Ast.BinOpPrototype (name, args, binary_precedence)</b>
+  | [&lt; &gt;] -&gt;
+      raise (Stream.Error "expected function name in prototype")
+</pre>
+</div>
+
+<p>As with binary operators, we name unary operators with a name that includes
+the operator character.  This assists us at code generation time.  Speaking of,
+the final piece we need to add is codegen support for unary operators.  It looks
+like this:</p>
+
+<div class="doc_code">
+<pre>
+let rec codegen_expr = function
+  ...
+  | Ast.Unary (op, operand) -&gt;
+      let operand = codegen_expr operand in
+      let callee = "unary" ^ (String.make 1 op) in
+      let callee =
+        match lookup_function callee the_module with
+        | Some callee -&gt; callee
+        | None -&gt; raise (Error "unknown unary operator")
+      in
+      build_call callee [|operand|] "unop" builder
+</pre>
+</div>
+
+<p>This code is similar to, but simpler than, the code for binary operators.  It
+is simpler primarily because it doesn't need to handle any predefined operators.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="example">Kicking the Tires</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>It is somewhat hard to believe, but with a few simple extensions we've
+covered in the last chapters, we have grown a real-ish language.  With this, we
+can do a lot of interesting things, including I/O, math, and a bunch of other
+things.  For example, we can now add a nice sequencing operator (printd is
+defined to print out the specified value and a newline):</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>extern printd(x);</b>
+Read extern: declare double @printd(double)
+ready&gt; <b>def binary : 1 (x y) 0;  # Low-precedence operator that ignores operands.</b>
+..
+ready&gt; <b>printd(123) : printd(456) : printd(789);</b>
+123.000000
+456.000000
+789.000000
+Evaluated to 0.000000
+</pre>
+</div>
+
+<p>We can also define a bunch of other "primitive" operations, such as:</p>
+
+<div class="doc_code">
+<pre>
+# Logical unary not.
+def unary!(v)
+  if v then
+    0
+  else
+    1;
+
+# Unary negate.
+def unary-(v)
+  0-v;
+
+# Define &gt; with the same precedence as &lt;.
+def binary&gt; 10 (LHS RHS)
+  RHS &lt; LHS;
+
+# Binary logical or, which does not short circuit.
+def binary| 5 (LHS RHS)
+  if LHS then
+    1
+  else if RHS then
+    1
+  else
+    0;
+
+# Binary logical and, which does not short circuit.
+def binary&amp; 6 (LHS RHS)
+  if !LHS then
+    0
+  else
+    !!RHS;
+
+# Define = with slightly lower precedence than relationals.
+def binary = 9 (LHS RHS)
+  !(LHS &lt; RHS | LHS &gt; RHS);
+
+</pre>
+</div>
+
+
+<p>Given the previous if/then/else support, we can also define interesting
+functions for I/O.  For example, the following prints out a character whose
+"density" reflects the value passed in: the lower the value, the denser the
+character:</p>
+
+<div class="doc_code">
+<pre>
+ready&gt;
+<b>
+extern putchard(char)
+def printdensity(d)
+  if d &gt; 8 then
+    putchard(32)  # ' '
+  else if d &gt; 4 then
+    putchard(46)  # '.'
+  else if d &gt; 2 then
+    putchard(43)  # '+'
+  else
+    putchard(42); # '*'</b>
+...
+ready&gt; <b>printdensity(1): printdensity(2): printdensity(3) :
+          printdensity(4): printdensity(5): printdensity(9): putchard(10);</b>
+*++..
+Evaluated to 0.000000
+</pre>
+</div>
+
+<p>Based on these simple primitive operations, we can start to define more
+interesting things.  For example, here's a little function that solves for the
+number of iterations it takes a function in the complex plane to
+converge:</p>
+
+<div class="doc_code">
+<pre>
+# determine whether the specific location diverges.
+# Solve for z = z^2 + c in the complex plane.
+def mandleconverger(real imag iters creal cimag)
+  if iters &gt; 255 | (real*real + imag*imag &gt; 4) then
+    iters
+  else
+    mandleconverger(real*real - imag*imag + creal,
+                    2*real*imag + cimag,
+                    iters+1, creal, cimag);
+
+# return the number of iterations required for the iteration to escape
+def mandleconverge(real imag)
+  mandleconverger(real, imag, 0, real, imag);
+</pre>
+</div>
+
+<p>This "z = z<sup>2</sup> + c" function is a beautiful little creature that is the basis
+for computation of the <a
+href="http://en.wikipedia.org/wiki/Mandelbrot_set">Mandelbrot Set</a>.  Our
+<tt>mandelconverge</tt> function returns the number of iterations that it takes
+for a complex orbit to escape, saturating to 255.  This is not a very useful
+function by itself, but if you plot its value over a two-dimensional plane,
+you can see the Mandelbrot set.  Given that we are limited to using putchard
+here, our amazing graphical output is limited, but we can whip together
+something using the density plotter above:</p>
+
+<div class="doc_code">
+<pre>
+# compute and plot the mandlebrot set with the specified 2 dimensional range
+# info.
+def mandelhelp(xmin xmax xstep   ymin ymax ystep)
+  for y = ymin, y &lt; ymax, ystep in (
+    (for x = xmin, x &lt; xmax, xstep in
+       printdensity(mandleconverge(x,y)))
+    : putchard(10)
+  )
+
+# mandel - This is a convenient helper function for ploting the mandelbrot set
+# from the specified position with the specified Magnification.
+def mandel(realstart imagstart realmag imagmag)
+  mandelhelp(realstart, realstart+realmag*78, realmag,
+             imagstart, imagstart+imagmag*40, imagmag);
+</pre>
+</div>
+
+<p>Given this, we can try plotting out the mandlebrot set!  Lets try it out:</p>
+
+<div class="doc_code">
+<pre>
+ready&gt; <b>mandel(-2.3, -1.3, 0.05, 0.07);</b>
+*******************************+++++++++++*************************************
+*************************+++++++++++++++++++++++*******************************
+**********************+++++++++++++++++++++++++++++****************************
+*******************+++++++++++++++++++++.. ...++++++++*************************
+*****************++++++++++++++++++++++.... ...+++++++++***********************
+***************+++++++++++++++++++++++.....   ...+++++++++*********************
+**************+++++++++++++++++++++++....     ....+++++++++********************
+*************++++++++++++++++++++++......      .....++++++++*******************
+************+++++++++++++++++++++.......       .......+++++++******************
+***********+++++++++++++++++++....                ... .+++++++*****************
+**********+++++++++++++++++.......                     .+++++++****************
+*********++++++++++++++...........                    ...+++++++***************
+********++++++++++++............                      ...++++++++**************
+********++++++++++... ..........                        .++++++++**************
+*******+++++++++.....                                   .+++++++++*************
+*******++++++++......                                  ..+++++++++*************
+*******++++++.......                                   ..+++++++++*************
+*******+++++......                                     ..+++++++++*************
+*******.... ....                                      ...+++++++++*************
+*******.... .                                         ...+++++++++*************
+*******+++++......                                    ...+++++++++*************
+*******++++++.......                                   ..+++++++++*************
+*******++++++++......                                   .+++++++++*************
+*******+++++++++.....                                  ..+++++++++*************
+********++++++++++... ..........                        .++++++++**************
+********++++++++++++............                      ...++++++++**************
+*********++++++++++++++..........                     ...+++++++***************
+**********++++++++++++++++........                     .+++++++****************
+**********++++++++++++++++++++....                ... ..+++++++****************
+***********++++++++++++++++++++++.......       .......++++++++*****************
+************+++++++++++++++++++++++......      ......++++++++******************
+**************+++++++++++++++++++++++....      ....++++++++********************
+***************+++++++++++++++++++++++.....   ...+++++++++*********************
+*****************++++++++++++++++++++++....  ...++++++++***********************
+*******************+++++++++++++++++++++......++++++++*************************
+*********************++++++++++++++++++++++.++++++++***************************
+*************************+++++++++++++++++++++++*******************************
+******************************+++++++++++++************************************
+*******************************************************************************
+*******************************************************************************
+*******************************************************************************
+Evaluated to 0.000000
+ready&gt; <b>mandel(-2, -1, 0.02, 0.04);</b>
+**************************+++++++++++++++++++++++++++++++++++++++++++++++++++++
+***********************++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+*********************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.
+*******************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++...
+*****************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.....
+***************++++++++++++++++++++++++++++++++++++++++++++++++++++++++........
+**************++++++++++++++++++++++++++++++++++++++++++++++++++++++...........
+************+++++++++++++++++++++++++++++++++++++++++++++++++++++..............
+***********++++++++++++++++++++++++++++++++++++++++++++++++++........        .
+**********++++++++++++++++++++++++++++++++++++++++++++++.............
+********+++++++++++++++++++++++++++++++++++++++++++..................
+*******+++++++++++++++++++++++++++++++++++++++.......................
+******+++++++++++++++++++++++++++++++++++...........................
+*****++++++++++++++++++++++++++++++++............................
+*****++++++++++++++++++++++++++++...............................
+****++++++++++++++++++++++++++......   .........................
+***++++++++++++++++++++++++.........     ......    ...........
+***++++++++++++++++++++++............
+**+++++++++++++++++++++..............
+**+++++++++++++++++++................
+*++++++++++++++++++.................
+*++++++++++++++++............ ...
+*++++++++++++++..............
+*+++....++++................
+*..........  ...........
+*
+*..........  ...........
+*+++....++++................
+*++++++++++++++..............
+*++++++++++++++++............ ...
+*++++++++++++++++++.................
+**+++++++++++++++++++................
+**+++++++++++++++++++++..............
+***++++++++++++++++++++++............
+***++++++++++++++++++++++++.........     ......    ...........
+****++++++++++++++++++++++++++......   .........................
+*****++++++++++++++++++++++++++++...............................
+*****++++++++++++++++++++++++++++++++............................
+******+++++++++++++++++++++++++++++++++++...........................
+*******+++++++++++++++++++++++++++++++++++++++.......................
+********+++++++++++++++++++++++++++++++++++++++++++..................
+Evaluated to 0.000000
+ready&gt; <b>mandel(-0.9, -1.4, 0.02, 0.03);</b>
+*******************************************************************************
+*******************************************************************************
+*******************************************************************************
+**********+++++++++++++++++++++************************************************
+*+++++++++++++++++++++++++++++++++++++++***************************************
++++++++++++++++++++++++++++++++++++++++++++++**********************************
+++++++++++++++++++++++++++++++++++++++++++++++++++*****************************
+++++++++++++++++++++++++++++++++++++++++++++++++++++++*************************
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++**********************
++++++++++++++++++++++++++++++++++.........++++++++++++++++++*******************
++++++++++++++++++++++++++++++++....   ......+++++++++++++++++++****************
++++++++++++++++++++++++++++++.......  ........+++++++++++++++++++**************
+++++++++++++++++++++++++++++........   ........++++++++++++++++++++************
++++++++++++++++++++++++++++.........     ..  ...+++++++++++++++++++++**********
+++++++++++++++++++++++++++...........        ....++++++++++++++++++++++********
+++++++++++++++++++++++++.............       .......++++++++++++++++++++++******
++++++++++++++++++++++++.............        ........+++++++++++++++++++++++****
+++++++++++++++++++++++...........           ..........++++++++++++++++++++++***
+++++++++++++++++++++...........                .........++++++++++++++++++++++*
+++++++++++++++++++............                  ...........++++++++++++++++++++
+++++++++++++++++...............                 .............++++++++++++++++++
+++++++++++++++.................                 ...............++++++++++++++++
+++++++++++++..................                  .................++++++++++++++
++++++++++..................                      .................+++++++++++++
+++++++........        .                               .........  ..++++++++++++
+++............                                         ......    ....++++++++++
+..............                                                    ...++++++++++
+..............                                                    ....+++++++++
+..............                                                    .....++++++++
+.............                                                    ......++++++++
+...........                                                     .......++++++++
+.........                                                       ........+++++++
+.........                                                       ........+++++++
+.........                                                           ....+++++++
+........                                                             ...+++++++
+.......                                                              ...+++++++
+                                                                    ....+++++++
+                                                                   .....+++++++
+                                                                    ....+++++++
+                                                                    ....+++++++
+                                                                    ....+++++++
+Evaluated to 0.000000
+ready&gt; <b>^D</b>
+</pre>
+</div>
+
+<p>At this point, you may be starting to realize that Kaleidoscope is a real
+and powerful language.  It may not be self-similar :), but it can be used to
+plot things that are!</p>
+
+<p>With this, we conclude the "adding user-defined operators" chapter of the
+tutorial.  We have successfully augmented our language, adding the ability to
+extend the language in the library, and we have shown how this can be used to
+build a simple but interesting end-user application in Kaleidoscope.  At this
+point, Kaleidoscope can build a variety of applications that are functional and
+can call functions with side-effects, but it can't actually define and mutate a
+variable itself.</p>
+
+<p>Strikingly, variable mutation is an important feature of some
+languages, and it is not at all obvious how to <a href="OCamlLangImpl7.html">add
+support for mutable variables</a> without having to add an "SSA construction"
+phase to your front-end.  In the next chapter, we will describe how you can
+add variable mutation without building SSA in your front-end.</p>
+
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Here is the complete code listing for our running example, enhanced with the
+if/then/else and for expressions..  To build this example, use:
+</p>
+
+<div class="doc_code">
+<pre>
+# Compile
+ocamlbuild toy.byte
+# Run
+./toy.byte
+</pre>
+</div>
+
+<p>Here is the code:</p>
+
+<dl>
+<dt>_tags:</dt>
+<dd class="doc_code">
+<pre>
+&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
+&lt;*.{byte,native}&gt;: g++, use_llvm, use_llvm_analysis
+&lt;*.{byte,native}&gt;: use_llvm_executionengine, use_llvm_target
+&lt;*.{byte,native}&gt;: use_llvm_scalar_opts, use_bindings
+</pre>
+</dd>
+
+<dt>myocamlbuild.ml:</dt>
+<dd class="doc_code">
+<pre>
+open Ocamlbuild_plugin;;
+
+ocaml_lib ~extern:true "llvm";;
+ocaml_lib ~extern:true "llvm_analysis";;
+ocaml_lib ~extern:true "llvm_executionengine";;
+ocaml_lib ~extern:true "llvm_target";;
+ocaml_lib ~extern:true "llvm_scalar_opts";;
+
+flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);;
+dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
+</pre>
+</dd>
+
+<dt>token.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Lexer Tokens
+ *===----------------------------------------------------------------------===*)
+
+(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+ * these others for known things. *)
+type token =
+  (* commands *)
+  | Def | Extern
+
+  (* primary *)
+  | Ident of string | Number of float
+
+  (* unknown *)
+  | Kwd of char
+
+  (* control *)
+  | If | Then | Else
+  | For | In
+
+  (* operators *)
+  | Binary | Unary
+</pre>
+</dd>
+
+<dt>lexer.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Lexer
+ *===----------------------------------------------------------------------===*)
+
+let rec lex = parser
+  (* Skip any whitespace. *)
+  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
+
+  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+
+  (* number: [0-9.]+ *)
+  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+
+  (* Comment until end of line. *)
+  | [&lt; ' ('#'); stream &gt;] -&gt;
+      lex_comment stream
+
+  (* Otherwise, just return the character as its ascii value. *)
+  | [&lt; 'c; stream &gt;] -&gt;
+      [&lt; 'Token.Kwd c; lex stream &gt;]
+
+  (* end of stream. *)
+  | [&lt; &gt;] -&gt; [&lt; &gt;]
+
+and lex_number buffer = parser
+  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+  | [&lt; stream=lex &gt;] -&gt;
+      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
+
+and lex_ident buffer = parser
+  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+  | [&lt; stream=lex &gt;] -&gt;
+      match Buffer.contents buffer with
+      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
+      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
+      | "if" -&gt; [&lt; 'Token.If; stream &gt;]
+      | "then" -&gt; [&lt; 'Token.Then; stream &gt;]
+      | "else" -&gt; [&lt; 'Token.Else; stream &gt;]
+      | "for" -&gt; [&lt; 'Token.For; stream &gt;]
+      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
+      | "binary" -&gt; [&lt; 'Token.Binary; stream &gt;]
+      | "unary" -&gt; [&lt; 'Token.Unary; stream &gt;]
+      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
+
+and lex_comment = parser
+  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
+  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
+  | [&lt; &gt;] -&gt; [&lt; &gt;]
+</pre>
+</dd>
+
+<dt>ast.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Abstract Syntax Tree (aka Parse Tree)
+ *===----------------------------------------------------------------------===*)
+
+(* expr - Base type for all expression nodes. *)
+type expr =
+  (* variant for numeric literals like "1.0". *)
+  | Number of float
+
+  (* variant for referencing a variable, like "a". *)
+  | Variable of string
+
+  (* variant for a unary operator. *)
+  | Unary of char * expr
+
+  (* variant for a binary operator. *)
+  | Binary of char * expr * expr
+
+  (* variant for function calls. *)
+  | Call of string * expr array
+
+  (* variant for if/then/else. *)
+  | If of expr * expr * expr
+
+  (* variant for for/in. *)
+  | For of string * expr * expr * expr option * expr
+
+(* proto - This type represents the "prototype" for a function, which captures
+ * its name, and its argument names (thus implicitly the number of arguments the
+ * function takes). *)
+type proto =
+  | Prototype of string * string array
+  | BinOpPrototype of string * string array * int
+
+(* func - This type represents a function definition itself. *)
+type func = Function of proto * expr
+</pre>
+</dd>
+
+<dt>parser.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===---------------------------------------------------------------------===
+ * Parser
+ *===---------------------------------------------------------------------===*)
+
+(* binop_precedence - This holds the precedence for each binary operator that is
+ * defined *)
+let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+(* precedence - Get the precedence of the pending binary operator token. *)
+let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
+
+(* primary
+ *   ::= identifier
+ *   ::= numberexpr
+ *   ::= parenexpr
+ *   ::= ifexpr
+ *   ::= forexpr *)
+let rec parse_primary = parser
+  (* numberexpr ::= number *)
+  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
+
+  (* parenexpr ::= '(' expression ')' *)
+  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
+
+  (* identifierexpr
+   *   ::= identifier
+   *   ::= identifier '(' argumentexpr ')' *)
+  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
+      let rec parse_args accumulator = parser
+        | [&lt; e=parse_expr; stream &gt;] -&gt;
+            begin parser
+              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
+              | [&lt; &gt;] -&gt; e :: accumulator
+            end stream
+        | [&lt; &gt;] -&gt; accumulator
+      in
+      let rec parse_ident id = parser
+        (* Call. *)
+        | [&lt; 'Token.Kwd '(';
+             args=parse_args [];
+             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
+            Ast.Call (id, Array.of_list (List.rev args))
+
+        (* Simple variable ref. *)
+        | [&lt; &gt;] -&gt; Ast.Variable id
+      in
+      parse_ident id stream
+
+  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+  | [&lt; 'Token.If; c=parse_expr;
+       'Token.Then ?? "expected 'then'"; t=parse_expr;
+       'Token.Else ?? "expected 'else'"; e=parse_expr &gt;] -&gt;
+      Ast.If (c, t, e)
+
+  (* forexpr
+        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
+  | [&lt; 'Token.For;
+       'Token.Ident id ?? "expected identifier after for";
+       'Token.Kwd '=' ?? "expected '=' after for";
+       stream &gt;] -&gt;
+      begin parser
+        | [&lt;
+             start=parse_expr;
+             'Token.Kwd ',' ?? "expected ',' after for";
+             end_=parse_expr;
+             stream &gt;] -&gt;
+            let step =
+              begin parser
+              | [&lt; 'Token.Kwd ','; step=parse_expr &gt;] -&gt; Some step
+              | [&lt; &gt;] -&gt; None
+              end stream
+            in
+            begin parser
+            | [&lt; 'Token.In; body=parse_expr &gt;] -&gt;
+                Ast.For (id, start, end_, step, body)
+            | [&lt; &gt;] -&gt;
+                raise (Stream.Error "expected 'in' after for")
+            end stream
+        | [&lt; &gt;] -&gt;
+            raise (Stream.Error "expected '=' after for")
+      end stream
+
+  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
+
+(* unary
+ *   ::= primary
+ *   ::= '!' unary *)
+and parse_unary = parser
+  (* If this is a unary operator, read it. *)
+  | [&lt; 'Token.Kwd op when op != '(' &amp;&amp; op != ')'; operand=parse_expr &gt;] -&gt;
+      Ast.Unary (op, operand)
+
+  (* If the current token is not an operator, it must be a primary expr. *)
+  | [&lt; stream &gt;] -&gt; parse_primary stream
+
+(* binoprhs
+ *   ::= ('+' primary)* *)
+and parse_bin_rhs expr_prec lhs stream =
+  match Stream.peek stream with
+  (* If this is a binop, find its precedence. *)
+  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
+      let token_prec = precedence c in
+
+      (* If this is a binop that binds at least as tightly as the current binop,
+       * consume it, otherwise we are done. *)
+      if token_prec &lt; expr_prec then lhs else begin
+        (* Eat the binop. *)
+        Stream.junk stream;
+
+        (* Parse the unary expression after the binary operator. *)
+        let rhs = parse_unary stream in
+
+        (* Okay, we know this is a binop. *)
+        let rhs =
+          match Stream.peek stream with
+          | Some (Token.Kwd c2) -&gt;
+              (* If BinOp binds less tightly with rhs than the operator after
+               * rhs, let the pending operator take rhs as its lhs. *)
+              let next_prec = precedence c2 in
+              if token_prec &lt; next_prec
+              then parse_bin_rhs (token_prec + 1) rhs stream
+              else rhs
+          | _ -&gt; rhs
+        in
+
+        (* Merge lhs/rhs. *)
+        let lhs = Ast.Binary (c, lhs, rhs) in
+        parse_bin_rhs expr_prec lhs stream
+      end
+  | _ -&gt; lhs
+
+(* expression
+ *   ::= primary binoprhs *)
+and parse_expr = parser
+  | [&lt; lhs=parse_unary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
+
+(* prototype
+ *   ::= id '(' id* ')'
+ *   ::= binary LETTER number? (id, id)
+ *   ::= unary LETTER number? (id) *)
+let parse_prototype =
+  let rec parse_args accumulator = parser
+    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
+    | [&lt; &gt;] -&gt; accumulator
+  in
+  let parse_operator = parser
+    | [&lt; 'Token.Unary &gt;] -&gt; "unary", 1
+    | [&lt; 'Token.Binary &gt;] -&gt; "binary", 2
+  in
+  let parse_binary_precedence = parser
+    | [&lt; 'Token.Number n &gt;] -&gt; int_of_float n
+    | [&lt; &gt;] -&gt; 30
+  in
+  parser
+  | [&lt; 'Token.Ident id;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+       args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
+      (* success. *)
+      Ast.Prototype (id, Array.of_list (List.rev args))
+  | [&lt; (prefix, kind)=parse_operator;
+       'Token.Kwd op ?? "expected an operator";
+       (* Read the precedence if present. *)
+       binary_precedence=parse_binary_precedence;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+        args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
+      let name = prefix ^ (String.make 1 op) in
+      let args = Array.of_list (List.rev args) in
+
+      (* Verify right number of arguments for operator. *)
+      if Array.length args != kind
+      then raise (Stream.Error "invalid number of operands for operator")
+      else
+        if kind == 1 then
+          Ast.Prototype (name, args)
+        else
+          Ast.BinOpPrototype (name, args, binary_precedence)
+  | [&lt; &gt;] -&gt;
+      raise (Stream.Error "expected function name in prototype")
+
+(* definition ::= 'def' prototype expression *)
+let parse_definition = parser
+  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
+      Ast.Function (p, e)
+
+(* toplevelexpr ::= expression *)
+let parse_toplevel = parser
+  | [&lt; e=parse_expr &gt;] -&gt;
+      (* Make an anonymous proto. *)
+      Ast.Function (Ast.Prototype ("", [||]), e)
+
+(*  external ::= 'extern' prototype *)
+let parse_extern = parser
+  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
+</pre>
+</dd>
+
+<dt>codegen.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Code Generation
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+
+exception Error of string
+
+let context = global_context ()
+let the_module = create_module context "my cool jit"
+let builder = builder context
+let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+let double_type = double_type context
+
+let rec codegen_expr = function
+  | Ast.Number n -&gt; const_float double_type n
+  | Ast.Variable name -&gt;
+      (try Hashtbl.find named_values name with
+        | Not_found -&gt; raise (Error "unknown variable name"))
+  | Ast.Unary (op, operand) -&gt;
+      let operand = codegen_expr operand in
+      let callee = "unary" ^ (String.make 1 op) in
+      let callee =
+        match lookup_function callee the_module with
+        | Some callee -&gt; callee
+        | None -&gt; raise (Error "unknown unary operator")
+      in
+      build_call callee [|operand|] "unop" builder
+  | Ast.Binary (op, lhs, rhs) -&gt;
+      let lhs_val = codegen_expr lhs in
+      let rhs_val = codegen_expr rhs in
+      begin
+        match op with
+        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
+        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
+        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
+        | '&lt;' -&gt;
+            (* Convert bool 0/1 to double 0.0 or 1.0 *)
+            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+            build_uitofp i double_type "booltmp" builder
+        | _ -&gt;
+            (* If it wasn't a builtin binary operator, it must be a user defined
+             * one. Emit a call to it. *)
+            let callee = "binary" ^ (String.make 1 op) in
+            let callee =
+              match lookup_function callee the_module with
+              | Some callee -&gt; callee
+              | None -&gt; raise (Error "binary operator not found!")
+            in
+            build_call callee [|lhs_val; rhs_val|] "binop" builder
+      end
+  | Ast.Call (callee, args) -&gt;
+      (* Look up the name in the module table. *)
+      let callee =
+        match lookup_function callee the_module with
+        | Some callee -&gt; callee
+        | None -&gt; raise (Error "unknown function referenced")
+      in
+      let params = params callee in
+
+      (* If argument mismatch error. *)
+      if Array.length params == Array.length args then () else
+        raise (Error "incorrect # arguments passed");
+      let args = Array.map codegen_expr args in
+      build_call callee args "calltmp" builder
+  | Ast.If (cond, then_, else_) -&gt;
+      let cond = codegen_expr cond in
+
+      (* Convert condition to a bool by comparing equal to 0.0 *)
+      let zero = const_float double_type 0.0 in
+      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
+
+      (* Grab the first block so that we might later add the conditional branch
+       * to it at the end of the function. *)
+      let start_bb = insertion_block builder in
+      let the_function = block_parent start_bb in
+
+      let then_bb = append_block context "then" the_function in
+
+      (* Emit 'then' value. *)
+      position_at_end then_bb builder;
+      let then_val = codegen_expr then_ in
+
+      (* Codegen of 'then' can change the current block, update then_bb for the
+       * phi. We create a new name because one is used for the phi node, and the
+       * other is used for the conditional branch. *)
+      let new_then_bb = insertion_block builder in
+
+      (* Emit 'else' value. *)
+      let else_bb = append_block context "else" the_function in
+      position_at_end else_bb builder;
+      let else_val = codegen_expr else_ in
+
+      (* Codegen of 'else' can change the current block, update else_bb for the
+       * phi. *)
+      let new_else_bb = insertion_block builder in
+
+      (* Emit merge block. *)
+      let merge_bb = append_block context "ifcont" the_function in
+      position_at_end merge_bb builder;
+      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
+      let phi = build_phi incoming "iftmp" builder in
+
+      (* Return to the start block to add the conditional branch. *)
+      position_at_end start_bb builder;
+      ignore (build_cond_br cond_val then_bb else_bb builder);
+
+      (* Set a unconditional branch at the end of the 'then' block and the
+       * 'else' block to the 'merge' block. *)
+      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
+      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
+
+      (* Finally, set the builder to the end of the merge block. *)
+      position_at_end merge_bb builder;
+
+      phi
+  | Ast.For (var_name, start, end_, step, body) -&gt;
+      (* Emit the start code first, without 'variable' in scope. *)
+      let start_val = codegen_expr start in
+
+      (* Make the new basic block for the loop header, inserting after current
+       * block. *)
+      let preheader_bb = insertion_block builder in
+      let the_function = block_parent preheader_bb in
+      let loop_bb = append_block context "loop" the_function in
+
+      (* Insert an explicit fall through from the current block to the
+       * loop_bb. *)
+      ignore (build_br loop_bb builder);
+
+      (* Start insertion in loop_bb. *)
+      position_at_end loop_bb builder;
+
+      (* Start the PHI node with an entry for start. *)
+      let variable = build_phi [(start_val, preheader_bb)] var_name builder in
+
+      (* Within the loop, the variable is defined equal to the PHI node. If it
+       * shadows an existing variable, we have to restore it, so save it
+       * now. *)
+      let old_val =
+        try Some (Hashtbl.find named_values var_name) with Not_found -&gt; None
+      in
+      Hashtbl.add named_values var_name variable;
+
+      (* Emit the body of the loop.  This, like any other expr, can change the
+       * current BB.  Note that we ignore the value computed by the body, but
+       * don't allow an error *)
+      ignore (codegen_expr body);
+
+      (* Emit the step value. *)
+      let step_val =
+        match step with
+        | Some step -&gt; codegen_expr step
+        (* If not specified, use 1.0. *)
+        | None -&gt; const_float double_type 1.0
+      in
+
+      let next_var = build_add variable step_val "nextvar" builder in
+
+      (* Compute the end condition. *)
+      let end_cond = codegen_expr end_ in
+
+      (* Convert condition to a bool by comparing equal to 0.0. *)
+      let zero = const_float double_type 0.0 in
+      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
+
+      (* Create the "after loop" block and insert it. *)
+      let loop_end_bb = insertion_block builder in
+      let after_bb = append_block context "afterloop" the_function in
+
+      (* Insert the conditional branch into the end of loop_end_bb. *)
+      ignore (build_cond_br end_cond loop_bb after_bb builder);
+
+      (* Any new code will be inserted in after_bb. *)
+      position_at_end after_bb builder;
+
+      (* Add a new entry to the PHI node for the backedge. *)
+      add_incoming (next_var, loop_end_bb) variable;
+
+      (* Restore the unshadowed variable. *)
+      begin match old_val with
+      | Some old_val -&gt; Hashtbl.add named_values var_name old_val
+      | None -&gt; ()
+      end;
+
+      (* for expr always returns 0.0. *)
+      const_null double_type
+
+let codegen_proto = function
+  | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) -&gt;
+      (* Make the function type: double(double,double) etc. *)
+      let doubles = Array.make (Array.length args) double_type in
+      let ft = function_type double_type doubles in
+      let f =
+        match lookup_function name the_module with
+        | None -&gt; declare_function name ft the_module
+
+        (* If 'f' conflicted, there was already something named 'name'. If it
+         * has a body, don't allow redefinition or reextern. *)
+        | Some f -&gt;
+            (* If 'f' already has a body, reject this. *)
+            if block_begin f &lt;&gt; At_end f then
+              raise (Error "redefinition of function");
+
+            (* If 'f' took a different number of arguments, reject. *)
+            if element_type (type_of f) &lt;&gt; ft then
+              raise (Error "redefinition of function with different # args");
+            f
+      in
+
+      (* Set names for all arguments. *)
+      Array.iteri (fun i a -&gt;
+        let n = args.(i) in
+        set_value_name n a;
+        Hashtbl.add named_values n a;
+      ) (params f);
+      f
+
+let codegen_func the_fpm = function
+  | Ast.Function (proto, body) -&gt;
+      Hashtbl.clear named_values;
+      let the_function = codegen_proto proto in
+
+      (* If this is an operator, install it. *)
+      begin match proto with
+      | Ast.BinOpPrototype (name, args, prec) -&gt;
+          let op = name.[String.length name - 1] in
+          Hashtbl.add Parser.binop_precedence op prec;
+      | _ -&gt; ()
+      end;
+
+      (* Create a new basic block to start insertion into. *)
+      let bb = append_block context "entry" the_function in
+      position_at_end bb builder;
+
+      try
+        let ret_val = codegen_expr body in
+
+        (* Finish off the function. *)
+        let _ = build_ret ret_val builder in
+
+        (* Validate the generated code, checking for consistency. *)
+        Llvm_analysis.assert_valid_function the_function;
+
+        (* Optimize the function. *)
+        let _ = PassManager.run_function the_function the_fpm in
+
+        the_function
+      with e -&gt;
+        delete_function the_function;
+        raise e
+</pre>
+</dd>
+
+<dt>toplevel.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Top-Level parsing and JIT Driver
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+open Llvm_executionengine
+
+(* top ::= definition | external | expression | ';' *)
+let rec main_loop the_fpm the_execution_engine stream =
+  match Stream.peek stream with
+  | None -&gt; ()
+
+  (* ignore top-level semicolons. *)
+  | Some (Token.Kwd ';') -&gt;
+      Stream.junk stream;
+      main_loop the_fpm the_execution_engine stream
+
+  | Some token -&gt;
+      begin
+        try match token with
+        | Token.Def -&gt;
+            let e = Parser.parse_definition stream in
+            print_endline "parsed a function definition.";
+            dump_value (Codegen.codegen_func the_fpm e);
+        | Token.Extern -&gt;
+            let e = Parser.parse_extern stream in
+            print_endline "parsed an extern.";
+            dump_value (Codegen.codegen_proto e);
+        | _ -&gt;
+            (* Evaluate a top-level expression into an anonymous function. *)
+            let e = Parser.parse_toplevel stream in
+            print_endline "parsed a top-level expr";
+            let the_function = Codegen.codegen_func the_fpm e in
+            dump_value the_function;
+
+            (* JIT the function, returning a function pointer. *)
+            let result = ExecutionEngine.run_function the_function [||]
+              the_execution_engine in
+
+            print_string "Evaluated to ";
+            print_float (GenericValue.as_float Codegen.double_type result);
+            print_newline ();
+        with Stream.Error s | Codegen.Error s -&gt;
+          (* Skip token for error recovery. *)
+          Stream.junk stream;
+          print_endline s;
+      end;
+      print_string "ready&gt; "; flush stdout;
+      main_loop the_fpm the_execution_engine stream
+</pre>
+</dd>
+
+<dt>toy.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Main driver code.
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+open Llvm_executionengine
+open Llvm_target
+open Llvm_scalar_opts
+
+let main () =
+  ignore (initialize_native_target ());
+
+  (* Install standard binary operators.
+   * 1 is the lowest precedence. *)
+  Hashtbl.add Parser.binop_precedence '&lt;' 10;
+  Hashtbl.add Parser.binop_precedence '+' 20;
+  Hashtbl.add Parser.binop_precedence '-' 20;
+  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+  (* Prime the first token. *)
+  print_string "ready&gt; "; flush stdout;
+  let stream = Lexer.lex (Stream.of_channel stdin) in
+
+  (* Create the JIT. *)
+  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+  let the_fpm = PassManager.create_function Codegen.the_module in
+
+  (* Set up the optimizer pipeline.  Start with registering info about how the
+   * target lays out data structures. *)
+  TargetData.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+  add_instruction_combination the_fpm;
+
+  (* reassociate expressions. *)
+  add_reassociation the_fpm;
+
+  (* Eliminate Common SubExpressions. *)
+  add_gvn the_fpm;
+
+  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+  add_cfg_simplification the_fpm;
+
+  ignore (PassManager.initialize the_fpm);
+
+  (* Run the main "interpreter loop" now. *)
+  Toplevel.main_loop the_fpm the_execution_engine stream;
+
+  (* Print out all the generated code. *)
+  dump_module Codegen.the_module
+;;
+
+main ()
+</pre>
+</dd>
+
+<dt>bindings.c</dt>
+<dd class="doc_code">
+<pre>
+#include &lt;stdio.h&gt;
+
+/* putchard - putchar that takes a double and returns 0. */
+extern double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+/* printd - printf that takes a double prints it as "%f\n", returning 0. */
+extern double printd(double X) {
+  printf("%f\n", X);
+  return 0;
+}
+</pre>
+</dd>
+</dl>
+
+<a href="OCamlLangImpl7.html">Next: Extending the language: mutable variables /
+SSA construction</a>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/tutorial/OCamlLangImpl7.html b/final/docs/tutorial/OCamlLangImpl7.html
new file mode 100644
index 00000000000..7146a5cacde
--- /dev/null
+++ b/final/docs/tutorial/OCamlLangImpl7.html
@@ -0,0 +1,1907 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Extending the Language: Mutable Variables / SSA
+         construction</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <meta name="author" content="Erick Tryzelaar">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Extending the Language: Mutable Variables</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 7
+  <ol>
+    <li><a href="#intro">Chapter 7 Introduction</a></li>
+    <li><a href="#why">Why is this a hard problem?</a></li>
+    <li><a href="#memory">Memory in LLVM</a></li>
+    <li><a href="#kalvars">Mutable Variables in Kaleidoscope</a></li>
+    <li><a href="#adjustments">Adjusting Existing Variables for
+     Mutation</a></li>
+    <li><a href="#assignment">New Assignment Operator</a></li>
+    <li><a href="#localvars">User-defined Local Variables</a></li>
+    <li><a href="#code">Full Code Listing</a></li>
+  </ol>
+</li>
+<li><a href="OCamlLangImpl8.html">Chapter 8</a>: Conclusion and other useful LLVM
+ tidbits</li>
+</ul>
+
+<div class="doc_author">
+	<p>
+		Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a>
+		and <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a>
+	</p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="intro">Chapter 7 Introduction</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to Chapter 7 of the "<a href="index.html">Implementing a language
+with LLVM</a>" tutorial.  In chapters 1 through 6, we've built a very
+respectable, albeit simple, <a
+href="http://en.wikipedia.org/wiki/Functional_programming">functional
+programming language</a>.  In our journey, we learned some parsing techniques,
+how to build and represent an AST, how to build LLVM IR, and how to optimize
+the resultant code as well as JIT compile it.</p>
+
+<p>While Kaleidoscope is interesting as a functional language, the fact that it
+is functional makes it "too easy" to generate LLVM IR for it.  In particular, a
+functional language makes it very easy to build LLVM IR directly in <a
+href="http://en.wikipedia.org/wiki/Static_single_assignment_form">SSA form</a>.
+Since LLVM requires that the input code be in SSA form, this is a very nice
+property and it is often unclear to newcomers how to generate code for an
+imperative language with mutable variables.</p>
+
+<p>The short (and happy) summary of this chapter is that there is no need for
+your front-end to build SSA form: LLVM provides highly tuned and well tested
+support for this, though the way it works is a bit unexpected for some.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="why">Why is this a hard problem?</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+To understand why mutable variables cause complexities in SSA construction,
+consider this extremely simple C example:
+</p>
+
+<div class="doc_code">
+<pre>
+int G, H;
+int test(_Bool Condition) {
+  int X;
+  if (Condition)
+    X = G;
+  else
+    X = H;
+  return X;
+}
+</pre>
+</div>
+
+<p>In this case, we have the variable "X", whose value depends on the path
+executed in the program.  Because there are two different possible values for X
+before the return instruction, a PHI node is inserted to merge the two values.
+The LLVM IR that we want for this example looks like this:</p>
+
+<div class="doc_code">
+<pre>
+@G = weak global i32 0   ; type of @G is i32*
+@H = weak global i32 0   ; type of @H is i32*
+
+define i32 @test(i1 %Condition) {
+entry:
+  br i1 %Condition, label %cond_true, label %cond_false
+
+cond_true:
+  %X.0 = load i32* @G
+  br label %cond_next
+
+cond_false:
+  %X.1 = load i32* @H
+  br label %cond_next
+
+cond_next:
+  %X.2 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
+  ret i32 %X.2
+}
+</pre>
+</div>
+
+<p>In this example, the loads from the G and H global variables are explicit in
+the LLVM IR, and they live in the then/else branches of the if statement
+(cond_true/cond_false).  In order to merge the incoming values, the X.2 phi node
+in the cond_next block selects the right value to use based on where control
+flow is coming from: if control flow comes from the cond_false block, X.2 gets
+the value of X.1.  Alternatively, if control flow comes from cond_true, it gets
+the value of X.0.  The intent of this chapter is not to explain the details of
+SSA form.  For more information, see one of the many <a
+href="http://en.wikipedia.org/wiki/Static_single_assignment_form">online
+references</a>.</p>
+
+<p>The question for this article is "who places the phi nodes when lowering
+assignments to mutable variables?".  The issue here is that LLVM
+<em>requires</em> that its IR be in SSA form: there is no "non-ssa" mode for it.
+However, SSA construction requires non-trivial algorithms and data structures,
+so it is inconvenient and wasteful for every front-end to have to reproduce this
+logic.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="memory">Memory in LLVM</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>The 'trick' here is that while LLVM does require all register values to be
+in SSA form, it does not require (or permit) memory objects to be in SSA form.
+In the example above, note that the loads from G and H are direct accesses to
+G and H: they are not renamed or versioned.  This differs from some other
+compiler systems, which do try to version memory objects.  In LLVM, instead of
+encoding dataflow analysis of memory into the LLVM IR, it is handled with <a
+href="../WritingAnLLVMPass.html">Analysis Passes</a> which are computed on
+demand.</p>
+
+<p>
+With this in mind, the high-level idea is that we want to make a stack variable
+(which lives in memory, because it is on the stack) for each mutable object in
+a function.  To take advantage of this trick, we need to talk about how LLVM
+represents stack variables.
+</p>
+
+<p>In LLVM, all memory accesses are explicit with load/store instructions, and
+it is carefully designed not to have (or need) an "address-of" operator.  Notice
+how the type of the @G/@H global variables is actually "i32*" even though the
+variable is defined as "i32".  What this means is that @G defines <em>space</em>
+for an i32 in the global data area, but its <em>name</em> actually refers to the
+address for that space.  Stack variables work the same way, except that instead of
+being declared with global variable definitions, they are declared with the
+<a href="../LangRef.html#i_alloca">LLVM alloca instruction</a>:</p>
+
+<div class="doc_code">
+<pre>
+define i32 @example() {
+entry:
+  %X = alloca i32           ; type of %X is i32*.
+  ...
+  %tmp = load i32* %X       ; load the stack value %X from the stack.
+  %tmp2 = add i32 %tmp, 1   ; increment it
+  store i32 %tmp2, i32* %X  ; store it back
+  ...
+</pre>
+</div>
+
+<p>This code shows an example of how you can declare and manipulate a stack
+variable in the LLVM IR.  Stack memory allocated with the alloca instruction is
+fully general: you can pass the address of the stack slot to functions, you can
+store it in other variables, etc.  In our example above, we could rewrite the
+example to use the alloca technique to avoid using a PHI node:</p>
+
+<div class="doc_code">
+<pre>
+@G = weak global i32 0   ; type of @G is i32*
+@H = weak global i32 0   ; type of @H is i32*
+
+define i32 @test(i1 %Condition) {
+entry:
+  %X = alloca i32           ; type of %X is i32*.
+  br i1 %Condition, label %cond_true, label %cond_false
+
+cond_true:
+  %X.0 = load i32* @G
+        store i32 %X.0, i32* %X   ; Update X
+  br label %cond_next
+
+cond_false:
+  %X.1 = load i32* @H
+        store i32 %X.1, i32* %X   ; Update X
+  br label %cond_next
+
+cond_next:
+  %X.2 = load i32* %X       ; Read X
+  ret i32 %X.2
+}
+</pre>
+</div>
+
+<p>With this, we have discovered a way to handle arbitrary mutable variables
+without the need to create Phi nodes at all:</p>
+
+<ol>
+<li>Each mutable variable becomes a stack allocation.</li>
+<li>Each read of the variable becomes a load from the stack.</li>
+<li>Each update of the variable becomes a store to the stack.</li>
+<li>Taking the address of a variable just uses the stack address directly.</li>
+</ol>
+
+<p>While this solution has solved our immediate problem, it introduced another
+one: we have now apparently introduced a lot of stack traffic for very simple
+and common operations, a major performance problem.  Fortunately for us, the
+LLVM optimizer has a highly-tuned optimization pass named "mem2reg" that handles
+this case, promoting allocas like this into SSA registers, inserting Phi nodes
+as appropriate.  If you run this example through the pass, for example, you'll
+get:</p>
+
+<div class="doc_code">
+<pre>
+$ <b>llvm-as &lt; example.ll | opt -mem2reg | llvm-dis</b>
+@G = weak global i32 0
+@H = weak global i32 0
+
+define i32 @test(i1 %Condition) {
+entry:
+  br i1 %Condition, label %cond_true, label %cond_false
+
+cond_true:
+  %X.0 = load i32* @G
+  br label %cond_next
+
+cond_false:
+  %X.1 = load i32* @H
+  br label %cond_next
+
+cond_next:
+  %X.01 = phi i32 [ %X.1, %cond_false ], [ %X.0, %cond_true ]
+  ret i32 %X.01
+}
+</pre>
+</div>
+
+<p>The mem2reg pass implements the standard "iterated dominance frontier"
+algorithm for constructing SSA form and has a number of optimizations that speed
+up (very common) degenerate cases. The mem2reg optimization pass is the answer
+to dealing with mutable variables, and we highly recommend that you depend on
+it.  Note that mem2reg only works on variables in certain circumstances:</p>
+
+<ol>
+<li>mem2reg is alloca-driven: it looks for allocas and if it can handle them, it
+promotes them.  It does not apply to global variables or heap allocations.</li>
+
+<li>mem2reg only looks for alloca instructions in the entry block of the
+function.  Being in the entry block guarantees that the alloca is only executed
+once, which makes analysis simpler.</li>
+
+<li>mem2reg only promotes allocas whose uses are direct loads and stores.  If
+the address of the stack object is passed to a function, or if any funny pointer
+arithmetic is involved, the alloca will not be promoted.</li>
+
+<li>mem2reg only works on allocas of <a
+href="../LangRef.html#t_classifications">first class</a>
+values (such as pointers, scalars and vectors), and only if the array size
+of the allocation is 1 (or missing in the .ll file).  mem2reg is not capable of
+promoting structs or arrays to registers.  Note that the "scalarrepl" pass is
+more powerful and can promote structs, "unions", and arrays in many cases.</li>
+
+</ol>
+
+<p>
+All of these properties are easy to satisfy for most imperative languages, and
+we'll illustrate it below with Kaleidoscope.  The final question you may be
+asking is: should I bother with this nonsense for my front-end?  Wouldn't it be
+better if I just did SSA construction directly, avoiding use of the mem2reg
+optimization pass?  In short, we strongly recommend that you use this technique
+for building SSA form, unless there is an extremely good reason not to.  Using
+this technique is:</p>
+
+<ul>
+<li>Proven and well tested: llvm-gcc and clang both use this technique for local
+mutable variables.  As such, the most common clients of LLVM are using this to
+handle a bulk of their variables.  You can be sure that bugs are found fast and
+fixed early.</li>
+
+<li>Extremely Fast: mem2reg has a number of special cases that make it fast in
+common cases as well as fully general.  For example, it has fast-paths for
+variables that are only used in a single block, variables that only have one
+assignment point, good heuristics to avoid insertion of unneeded phi nodes, etc.
+</li>
+
+<li>Needed for debug info generation: <a href="../SourceLevelDebugging.html">
+Debug information in LLVM</a> relies on having the address of the variable
+exposed so that debug info can be attached to it.  This technique dovetails
+very naturally with this style of debug info.</li>
+</ul>
+
+<p>If nothing else, this makes it much easier to get your front-end up and
+running, and is very simple to implement.  Lets extend Kaleidoscope with mutable
+variables now!
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="kalvars">Mutable Variables in
+Kaleidoscope</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Now that we know the sort of problem we want to tackle, lets see what this
+looks like in the context of our little Kaleidoscope language.  We're going to
+add two features:</p>
+
+<ol>
+<li>The ability to mutate variables with the '=' operator.</li>
+<li>The ability to define new variables.</li>
+</ol>
+
+<p>While the first item is really what this is about, we only have variables
+for incoming arguments as well as for induction variables, and redefining those only
+goes so far :).  Also, the ability to define new variables is a
+useful thing regardless of whether you will be mutating them.  Here's a
+motivating example that shows how we could use these:</p>
+
+<div class="doc_code">
+<pre>
+# Define ':' for sequencing: as a low-precedence operator that ignores operands
+# and just returns the RHS.
+def binary : 1 (x y) y;
+
+# Recursive fib, we could do this before.
+def fib(x)
+  if (x &lt; 3) then
+    1
+  else
+    fib(x-1)+fib(x-2);
+
+# Iterative fib.
+def fibi(x)
+  <b>var a = 1, b = 1, c in</b>
+  (for i = 3, i &lt; x in
+     <b>c = a + b</b> :
+     <b>a = b</b> :
+     <b>b = c</b>) :
+  b;
+
+# Call it.
+fibi(10);
+</pre>
+</div>
+
+<p>
+In order to mutate variables, we have to change our existing variables to use
+the "alloca trick".  Once we have that, we'll add our new operator, then extend
+Kaleidoscope to support new variable definitions.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="adjustments">Adjusting Existing Variables for
+Mutation</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+The symbol table in Kaleidoscope is managed at code generation time by the
+'<tt>named_values</tt>' map.  This map currently keeps track of the LLVM
+"Value*" that holds the double value for the named variable.  In order to
+support mutation, we need to change this slightly, so that it
+<tt>named_values</tt> holds the <em>memory location</em> of the variable in
+question.  Note that this change is a refactoring: it changes the structure of
+the code, but does not (by itself) change the behavior of the compiler.  All of
+these changes are isolated in the Kaleidoscope code generator.</p>
+
+<p>
+At this point in Kaleidoscope's development, it only supports variables for two
+things: incoming arguments to functions and the induction variable of 'for'
+loops.  For consistency, we'll allow mutation of these variables in addition to
+other user-defined variables.  This means that these will both need memory
+locations.
+</p>
+
+<p>To start our transformation of Kaleidoscope, we'll change the
+<tt>named_values</tt> map so that it maps to AllocaInst* instead of Value*.
+Once we do this, the C++ compiler will tell us what parts of the code we need to
+update:</p>
+
+<p><b>Note:</b> the ocaml bindings currently model both <tt>Value*</tt>s and
+<tt>AllocInst*</tt>s as <tt>Llvm.llvalue</tt>s, but this may change in the
+future to be more type safe.</p>
+
+<div class="doc_code">
+<pre>
+let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+</pre>
+</div>
+
+<p>Also, since we will need to create these alloca's, we'll use a helper
+function that ensures that the allocas are created in the entry block of the
+function:</p>
+
+<div class="doc_code">
+<pre>
+(* Create an alloca instruction in the entry block of the function. This
+ * is used for mutable variables etc. *)
+let create_entry_block_alloca the_function var_name =
+  let builder = builder_at (instr_begin (entry_block the_function)) in
+  build_alloca double_type var_name builder
+</pre>
+</div>
+
+<p>This funny looking code creates an <tt>Llvm.llbuilder</tt> object that is
+pointing at the first instruction of the entry block.  It then creates an alloca
+with the expected name and returns it.  Because all values in Kaleidoscope are
+doubles, there is no need to pass in a type to use.</p>
+
+<p>With this in place, the first functionality change we want to make is to
+variable references.  In our new scheme, variables live on the stack, so code
+generating a reference to them actually needs to produce a load from the stack
+slot:</p>
+
+<div class="doc_code">
+<pre>
+let rec codegen_expr = function
+  ...
+  | Ast.Variable name -&gt;
+      let v = try Hashtbl.find named_values name with
+        | Not_found -&gt; raise (Error "unknown variable name")
+      in
+      <b>(* Load the value. *)
+      build_load v name builder</b>
+</pre>
+</div>
+
+<p>As you can see, this is pretty straightforward.  Now we need to update the
+things that define the variables to set up the alloca.  We'll start with
+<tt>codegen_expr Ast.For ...</tt> (see the <a href="#code">full code listing</a>
+for the unabridged code):</p>
+
+<div class="doc_code">
+<pre>
+  | Ast.For (var_name, start, end_, step, body) -&gt;
+      let the_function = block_parent (insertion_block builder) in
+
+      (* Create an alloca for the variable in the entry block. *)
+      <b>let alloca = create_entry_block_alloca the_function var_name in</b>
+
+      (* Emit the start code first, without 'variable' in scope. *)
+      let start_val = codegen_expr start in
+
+      <b>(* Store the value into the alloca. *)
+      ignore(build_store start_val alloca builder);</b>
+
+      ...
+
+      (* Within the loop, the variable is defined equal to the PHI node. If it
+       * shadows an existing variable, we have to restore it, so save it
+       * now. *)
+      let old_val =
+        try Some (Hashtbl.find named_values var_name) with Not_found -&gt; None
+      in
+      <b>Hashtbl.add named_values var_name alloca;</b>
+
+      ...
+
+      (* Compute the end condition. *)
+      let end_cond = codegen_expr end_ in
+
+      <b>(* Reload, increment, and restore the alloca. This handles the case where
+       * the body of the loop mutates the variable. *)
+      let cur_var = build_load alloca var_name builder in
+      let next_var = build_add cur_var step_val "nextvar" builder in
+      ignore(build_store next_var alloca builder);</b>
+      ...
+</pre>
+</div>
+
+<p>This code is virtually identical to the code <a
+href="OCamlLangImpl5.html#forcodegen">before we allowed mutable variables</a>.
+The big difference is that we no longer have to construct a PHI node, and we use
+load/store to access the variable as needed.</p>
+
+<p>To support mutable argument variables, we need to also make allocas for them.
+The code for this is also pretty simple:</p>
+
+<div class="doc_code">
+<pre>
+(* Create an alloca for each argument and register the argument in the symbol
+ * table so that references to it will succeed. *)
+let create_argument_allocas the_function proto =
+  let args = match proto with
+    | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -&gt; args
+  in
+  Array.iteri (fun i ai -&gt;
+    let var_name = args.(i) in
+    (* Create an alloca for this variable. *)
+    let alloca = create_entry_block_alloca the_function var_name in
+
+    (* Store the initial value into the alloca. *)
+    ignore(build_store ai alloca builder);
+
+    (* Add arguments to variable symbol table. *)
+    Hashtbl.add named_values var_name alloca;
+  ) (params the_function)
+</pre>
+</div>
+
+<p>For each argument, we make an alloca, store the input value to the function
+into the alloca, and register the alloca as the memory location for the
+argument.  This method gets invoked by <tt>Codegen.codegen_func</tt> right after
+it sets up the entry block for the function.</p>
+
+<p>The final missing piece is adding the mem2reg pass, which allows us to get
+good codegen once again:</p>
+
+<div class="doc_code">
+<pre>
+let main () =
+  ...
+  let the_fpm = PassManager.create_function Codegen.the_module in
+
+  (* Set up the optimizer pipeline.  Start with registering info about how the
+   * target lays out data structures. *)
+  TargetData.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+  <b>(* Promote allocas to registers. *)
+  add_memory_to_register_promotion the_fpm;</b>
+
+  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+  add_instruction_combining the_fpm;
+
+  (* reassociate expressions. *)
+  add_reassociation the_fpm;
+</pre>
+</div>
+
+<p>It is interesting to see what the code looks like before and after the
+mem2reg optimization runs.  For example, this is the before/after code for our
+recursive fib function.  Before the optimization:</p>
+
+<div class="doc_code">
+<pre>
+define double @fib(double %x) {
+entry:
+  <b>%x1 = alloca double
+  store double %x, double* %x1
+  %x2 = load double* %x1</b>
+  %cmptmp = fcmp ult double %x2, 3.000000e+00
+  %booltmp = uitofp i1 %cmptmp to double
+  %ifcond = fcmp one double %booltmp, 0.000000e+00
+  br i1 %ifcond, label %then, label %else
+
+then:    ; preds = %entry
+  br label %ifcont
+
+else:    ; preds = %entry
+  <b>%x3 = load double* %x1</b>
+  %subtmp = fsub double %x3, 1.000000e+00
+  %calltmp = call double @fib(double %subtmp)
+  <b>%x4 = load double* %x1</b>
+  %subtmp5 = fsub double %x4, 2.000000e+00
+  %calltmp6 = call double @fib(double %subtmp5)
+  %addtmp = fadd double %calltmp, %calltmp6
+  br label %ifcont
+
+ifcont:    ; preds = %else, %then
+  %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
+  ret double %iftmp
+}
+</pre>
+</div>
+
+<p>Here there is only one variable (x, the input argument) but you can still
+see the extremely simple-minded code generation strategy we are using.  In the
+entry block, an alloca is created, and the initial input value is stored into
+it.  Each reference to the variable does a reload from the stack.  Also, note
+that we didn't modify the if/then/else expression, so it still inserts a PHI
+node.  While we could make an alloca for it, it is actually easier to create a
+PHI node for it, so we still just make the PHI.</p>
+
+<p>Here is the code after the mem2reg pass runs:</p>
+
+<div class="doc_code">
+<pre>
+define double @fib(double %x) {
+entry:
+  %cmptmp = fcmp ult double <b>%x</b>, 3.000000e+00
+  %booltmp = uitofp i1 %cmptmp to double
+  %ifcond = fcmp one double %booltmp, 0.000000e+00
+  br i1 %ifcond, label %then, label %else
+
+then:
+  br label %ifcont
+
+else:
+  %subtmp = fsub double <b>%x</b>, 1.000000e+00
+  %calltmp = call double @fib(double %subtmp)
+  %subtmp5 = fsub double <b>%x</b>, 2.000000e+00
+  %calltmp6 = call double @fib(double %subtmp5)
+  %addtmp = fadd double %calltmp, %calltmp6
+  br label %ifcont
+
+ifcont:    ; preds = %else, %then
+  %iftmp = phi double [ 1.000000e+00, %then ], [ %addtmp, %else ]
+  ret double %iftmp
+}
+</pre>
+</div>
+
+<p>This is a trivial case for mem2reg, since there are no redefinitions of the
+variable.  The point of showing this is to calm your tension about inserting
+such blatent inefficiencies :).</p>
+
+<p>After the rest of the optimizers run, we get:</p>
+
+<div class="doc_code">
+<pre>
+define double @fib(double %x) {
+entry:
+  %cmptmp = fcmp ult double %x, 3.000000e+00
+  %booltmp = uitofp i1 %cmptmp to double
+  %ifcond = fcmp ueq double %booltmp, 0.000000e+00
+  br i1 %ifcond, label %else, label %ifcont
+
+else:
+  %subtmp = fsub double %x, 1.000000e+00
+  %calltmp = call double @fib(double %subtmp)
+  %subtmp5 = fsub double %x, 2.000000e+00
+  %calltmp6 = call double @fib(double %subtmp5)
+  %addtmp = fadd double %calltmp, %calltmp6
+  ret double %addtmp
+
+ifcont:
+  ret double 1.000000e+00
+}
+</pre>
+</div>
+
+<p>Here we see that the simplifycfg pass decided to clone the return instruction
+into the end of the 'else' block.  This allowed it to eliminate some branches
+and the PHI node.</p>
+
+<p>Now that all symbol table references are updated to use stack variables,
+we'll add the assignment operator.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="assignment">New Assignment Operator</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>With our current framework, adding a new assignment operator is really
+simple.  We will parse it just like any other binary operator, but handle it
+internally (instead of allowing the user to define it).  The first step is to
+set a precedence:</p>
+
+<div class="doc_code">
+<pre>
+let main () =
+  (* Install standard binary operators.
+   * 1 is the lowest precedence. *)
+  <b>Hashtbl.add Parser.binop_precedence '=' 2;</b>
+  Hashtbl.add Parser.binop_precedence '&lt;' 10;
+  Hashtbl.add Parser.binop_precedence '+' 20;
+  Hashtbl.add Parser.binop_precedence '-' 20;
+  ...
+</pre>
+</div>
+
+<p>Now that the parser knows the precedence of the binary operator, it takes
+care of all the parsing and AST generation.  We just need to implement codegen
+for the assignment operator.  This looks like:</p>
+
+<div class="doc_code">
+<pre>
+let rec codegen_expr = function
+      begin match op with
+      | '=' -&gt;
+          (* Special case '=' because we don't want to emit the LHS as an
+           * expression. *)
+          let name =
+            match lhs with
+            | Ast.Variable name -&gt; name
+            | _ -&gt; raise (Error "destination of '=' must be a variable")
+          in
+</pre>
+</div>
+
+<p>Unlike the rest of the binary operators, our assignment operator doesn't
+follow the "emit LHS, emit RHS, do computation" model.  As such, it is handled
+as a special case before the other binary operators are handled.  The other
+strange thing is that it requires the LHS to be a variable.  It is invalid to
+have "(x+1) = expr" - only things like "x = expr" are allowed.
+</p>
+
+
+<div class="doc_code">
+<pre>
+          (* Codegen the rhs. *)
+          let val_ = codegen_expr rhs in
+
+          (* Lookup the name. *)
+          let variable = try Hashtbl.find named_values name with
+          | Not_found -&gt; raise (Error "unknown variable name")
+          in
+          ignore(build_store val_ variable builder);
+          val_
+      | _ -&gt;
+			...
+</pre>
+</div>
+
+<p>Once we have the variable, codegen'ing the assignment is straightforward:
+we emit the RHS of the assignment, create a store, and return the computed
+value.  Returning a value allows for chained assignments like "X = (Y = Z)".</p>
+
+<p>Now that we have an assignment operator, we can mutate loop variables and
+arguments.  For example, we can now run code like this:</p>
+
+<div class="doc_code">
+<pre>
+# Function to print a double.
+extern printd(x);
+
+# Define ':' for sequencing: as a low-precedence operator that ignores operands
+# and just returns the RHS.
+def binary : 1 (x y) y;
+
+def test(x)
+  printd(x) :
+  x = 4 :
+  printd(x);
+
+test(123);
+</pre>
+</div>
+
+<p>When run, this example prints "123" and then "4", showing that we did
+actually mutate the value!  Okay, we have now officially implemented our goal:
+getting this to work requires SSA construction in the general case.  However,
+to be really useful, we want the ability to define our own local variables, lets
+add this next!
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="localvars">User-defined Local
+Variables</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Adding var/in is just like any other other extensions we made to
+Kaleidoscope: we extend the lexer, the parser, the AST and the code generator.
+The first step for adding our new 'var/in' construct is to extend the lexer.
+As before, this is pretty trivial, the code looks like this:</p>
+
+<div class="doc_code">
+<pre>
+type token =
+  ...
+  <b>(* var definition *)
+  | Var</b>
+
+...
+
+and lex_ident buffer = parser
+      ...
+      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
+      | "binary" -&gt; [&lt; 'Token.Binary; stream &gt;]
+      | "unary" -&gt; [&lt; 'Token.Unary; stream &gt;]
+      <b>| "var" -&gt; [&lt; 'Token.Var; stream &gt;]</b>
+      ...
+</pre>
+</div>
+
+<p>The next step is to define the AST node that we will construct.  For var/in,
+it looks like this:</p>
+
+<div class="doc_code">
+<pre>
+type expr =
+  ...
+  (* variant for var/in. *)
+  | Var of (string * expr option) array * expr
+  ...
+</pre>
+</div>
+
+<p>var/in allows a list of names to be defined all at once, and each name can
+optionally have an initializer value.  As such, we capture this information in
+the VarNames vector.  Also, var/in has a body, this body is allowed to access
+the variables defined by the var/in.</p>
+
+<p>With this in place, we can define the parser pieces.  The first thing we do
+is add it as a primary expression:</p>
+
+<div class="doc_code">
+<pre>
+(* primary
+ *   ::= identifier
+ *   ::= numberexpr
+ *   ::= parenexpr
+ *   ::= ifexpr
+ *   ::= forexpr
+ <b>*   ::= varexpr</b> *)
+let rec parse_primary = parser
+  ...
+  <b>(* varexpr
+   *   ::= 'var' identifier ('=' expression?
+   *             (',' identifier ('=' expression)?)* 'in' expression *)
+  | [&lt; 'Token.Var;
+       (* At least one variable name is required. *)
+       'Token.Ident id ?? "expected identifier after var";
+       init=parse_var_init;
+       var_names=parse_var_names [(id, init)];
+       (* At this point, we have to have 'in'. *)
+       'Token.In ?? "expected 'in' keyword after 'var'";
+       body=parse_expr &gt;] -&gt;
+      Ast.Var (Array.of_list (List.rev var_names), body)</b>
+
+...
+
+and parse_var_init = parser
+  (* read in the optional initializer. *)
+  | [&lt; 'Token.Kwd '='; e=parse_expr &gt;] -&gt; Some e
+  | [&lt; &gt;] -&gt; None
+
+and parse_var_names accumulator = parser
+  | [&lt; 'Token.Kwd ',';
+       'Token.Ident id ?? "expected identifier list after var";
+       init=parse_var_init;
+       e=parse_var_names ((id, init) :: accumulator) &gt;] -&gt; e
+  | [&lt; &gt;] -&gt; accumulator
+</pre>
+</div>
+
+<p>Now that we can parse and represent the code, we need to support emission of
+LLVM IR for it.  This code starts out with:</p>
+
+<div class="doc_code">
+<pre>
+let rec codegen_expr = function
+  ...
+  | Ast.Var (var_names, body)
+      let old_bindings = ref [] in
+
+      let the_function = block_parent (insertion_block builder) in
+
+      (* Register all variables and emit their initializer. *)
+      Array.iter (fun (var_name, init) -&gt;
+</pre>
+</div>
+
+<p>Basically it loops over all the variables, installing them one at a time.
+For each variable we put into the symbol table, we remember the previous value
+that we replace in OldBindings.</p>
+
+<div class="doc_code">
+<pre>
+        (* Emit the initializer before adding the variable to scope, this
+         * prevents the initializer from referencing the variable itself, and
+         * permits stuff like this:
+         *   var a = 1 in
+         *     var a = a in ...   # refers to outer 'a'. *)
+        let init_val =
+          match init with
+          | Some init -&gt; codegen_expr init
+          (* If not specified, use 0.0. *)
+          | None -&gt; const_float double_type 0.0
+        in
+
+        let alloca = create_entry_block_alloca the_function var_name in
+        ignore(build_store init_val alloca builder);
+
+        (* Remember the old variable binding so that we can restore the binding
+         * when we unrecurse. *)
+
+        begin
+          try
+            let old_value = Hashtbl.find named_values var_name in
+            old_bindings := (var_name, old_value) :: !old_bindings;
+          with Not_found &gt; ()
+        end;
+
+        (* Remember this binding. *)
+        Hashtbl.add named_values var_name alloca;
+      ) var_names;
+</pre>
+</div>
+
+<p>There are more comments here than code.  The basic idea is that we emit the
+initializer, create the alloca, then update the symbol table to point to it.
+Once all the variables are installed in the symbol table, we evaluate the body
+of the var/in expression:</p>
+
+<div class="doc_code">
+<pre>
+      (* Codegen the body, now that all vars are in scope. *)
+      let body_val = codegen_expr body in
+</pre>
+</div>
+
+<p>Finally, before returning, we restore the previous variable bindings:</p>
+
+<div class="doc_code">
+<pre>
+      (* Pop all our variables from scope. *)
+      List.iter (fun (var_name, old_value) -&gt;
+        Hashtbl.add named_values var_name old_value
+      ) !old_bindings;
+
+      (* Return the body computation. *)
+      body_val
+</pre>
+</div>
+
+<p>The end result of all of this is that we get properly scoped variable
+definitions, and we even (trivially) allow mutation of them :).</p>
+
+<p>With this, we completed what we set out to do.  Our nice iterative fib
+example from the intro compiles and runs just fine.  The mem2reg pass optimizes
+all of our stack variables into SSA registers, inserting PHI nodes where needed,
+and our front-end remains simple: no "iterated dominance frontier" computation
+anywhere in sight.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="code">Full Code Listing</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>
+Here is the complete code listing for our running example, enhanced with mutable
+variables and var/in support.  To build this example, use:
+</p>
+
+<div class="doc_code">
+<pre>
+# Compile
+ocamlbuild toy.byte
+# Run
+./toy.byte
+</pre>
+</div>
+
+<p>Here is the code:</p>
+
+<dl>
+<dt>_tags:</dt>
+<dd class="doc_code">
+<pre>
+&lt;{lexer,parser}.ml&gt;: use_camlp4, pp(camlp4of)
+&lt;*.{byte,native}&gt;: g++, use_llvm, use_llvm_analysis
+&lt;*.{byte,native}&gt;: use_llvm_executionengine, use_llvm_target
+&lt;*.{byte,native}&gt;: use_llvm_scalar_opts, use_bindings
+</pre>
+</dd>
+
+<dt>myocamlbuild.ml:</dt>
+<dd class="doc_code">
+<pre>
+open Ocamlbuild_plugin;;
+
+ocaml_lib ~extern:true "llvm";;
+ocaml_lib ~extern:true "llvm_analysis";;
+ocaml_lib ~extern:true "llvm_executionengine";;
+ocaml_lib ~extern:true "llvm_target";;
+ocaml_lib ~extern:true "llvm_scalar_opts";;
+
+flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);;
+dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
+</pre>
+</dd>
+
+<dt>token.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Lexer Tokens
+ *===----------------------------------------------------------------------===*)
+
+(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+ * these others for known things. *)
+type token =
+  (* commands *)
+  | Def | Extern
+
+  (* primary *)
+  | Ident of string | Number of float
+
+  (* unknown *)
+  | Kwd of char
+
+  (* control *)
+  | If | Then | Else
+  | For | In
+
+  (* operators *)
+  | Binary | Unary
+
+  (* var definition *)
+  | Var
+</pre>
+</dd>
+
+<dt>lexer.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Lexer
+ *===----------------------------------------------------------------------===*)
+
+let rec lex = parser
+  (* Skip any whitespace. *)
+  | [&lt; ' (' ' | '\n' | '\r' | '\t'); stream &gt;] -&gt; lex stream
+
+  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' as c); stream &gt;] -&gt;
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+
+  (* number: [0-9.]+ *)
+  | [&lt; ' ('0' .. '9' as c); stream &gt;] -&gt;
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+
+  (* Comment until end of line. *)
+  | [&lt; ' ('#'); stream &gt;] -&gt;
+      lex_comment stream
+
+  (* Otherwise, just return the character as its ascii value. *)
+  | [&lt; 'c; stream &gt;] -&gt;
+      [&lt; 'Token.Kwd c; lex stream &gt;]
+
+  (* end of stream. *)
+  | [&lt; &gt;] -&gt; [&lt; &gt;]
+
+and lex_number buffer = parser
+  | [&lt; ' ('0' .. '9' | '.' as c); stream &gt;] -&gt;
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+  | [&lt; stream=lex &gt;] -&gt;
+      [&lt; 'Token.Number (float_of_string (Buffer.contents buffer)); stream &gt;]
+
+and lex_ident buffer = parser
+  | [&lt; ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream &gt;] -&gt;
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+  | [&lt; stream=lex &gt;] -&gt;
+      match Buffer.contents buffer with
+      | "def" -&gt; [&lt; 'Token.Def; stream &gt;]
+      | "extern" -&gt; [&lt; 'Token.Extern; stream &gt;]
+      | "if" -&gt; [&lt; 'Token.If; stream &gt;]
+      | "then" -&gt; [&lt; 'Token.Then; stream &gt;]
+      | "else" -&gt; [&lt; 'Token.Else; stream &gt;]
+      | "for" -&gt; [&lt; 'Token.For; stream &gt;]
+      | "in" -&gt; [&lt; 'Token.In; stream &gt;]
+      | "binary" -&gt; [&lt; 'Token.Binary; stream &gt;]
+      | "unary" -&gt; [&lt; 'Token.Unary; stream &gt;]
+      | "var" -&gt; [&lt; 'Token.Var; stream &gt;]
+      | id -&gt; [&lt; 'Token.Ident id; stream &gt;]
+
+and lex_comment = parser
+  | [&lt; ' ('\n'); stream=lex &gt;] -&gt; stream
+  | [&lt; 'c; e=lex_comment &gt;] -&gt; e
+  | [&lt; &gt;] -&gt; [&lt; &gt;]
+</pre>
+</dd>
+
+<dt>ast.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Abstract Syntax Tree (aka Parse Tree)
+ *===----------------------------------------------------------------------===*)
+
+(* expr - Base type for all expression nodes. *)
+type expr =
+  (* variant for numeric literals like "1.0". *)
+  | Number of float
+
+  (* variant for referencing a variable, like "a". *)
+  | Variable of string
+
+  (* variant for a unary operator. *)
+  | Unary of char * expr
+
+  (* variant for a binary operator. *)
+  | Binary of char * expr * expr
+
+  (* variant for function calls. *)
+  | Call of string * expr array
+
+  (* variant for if/then/else. *)
+  | If of expr * expr * expr
+
+  (* variant for for/in. *)
+  | For of string * expr * expr * expr option * expr
+
+  (* variant for var/in. *)
+  | Var of (string * expr option) array * expr
+
+(* proto - This type represents the "prototype" for a function, which captures
+ * its name, and its argument names (thus implicitly the number of arguments the
+ * function takes). *)
+type proto =
+  | Prototype of string * string array
+  | BinOpPrototype of string * string array * int
+
+(* func - This type represents a function definition itself. *)
+type func = Function of proto * expr
+</pre>
+</dd>
+
+<dt>parser.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===---------------------------------------------------------------------===
+ * Parser
+ *===---------------------------------------------------------------------===*)
+
+(* binop_precedence - This holds the precedence for each binary operator that is
+ * defined *)
+let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+(* precedence - Get the precedence of the pending binary operator token. *)
+let precedence c = try Hashtbl.find binop_precedence c with Not_found -&gt; -1
+
+(* primary
+ *   ::= identifier
+ *   ::= numberexpr
+ *   ::= parenexpr
+ *   ::= ifexpr
+ *   ::= forexpr
+ *   ::= varexpr *)
+let rec parse_primary = parser
+  (* numberexpr ::= number *)
+  | [&lt; 'Token.Number n &gt;] -&gt; Ast.Number n
+
+  (* parenexpr ::= '(' expression ')' *)
+  | [&lt; 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" &gt;] -&gt; e
+
+  (* identifierexpr
+   *   ::= identifier
+   *   ::= identifier '(' argumentexpr ')' *)
+  | [&lt; 'Token.Ident id; stream &gt;] -&gt;
+      let rec parse_args accumulator = parser
+        | [&lt; e=parse_expr; stream &gt;] -&gt;
+            begin parser
+              | [&lt; 'Token.Kwd ','; e=parse_args (e :: accumulator) &gt;] -&gt; e
+              | [&lt; &gt;] -&gt; e :: accumulator
+            end stream
+        | [&lt; &gt;] -&gt; accumulator
+      in
+      let rec parse_ident id = parser
+        (* Call. *)
+        | [&lt; 'Token.Kwd '(';
+             args=parse_args [];
+             'Token.Kwd ')' ?? "expected ')'"&gt;] -&gt;
+            Ast.Call (id, Array.of_list (List.rev args))
+
+        (* Simple variable ref. *)
+        | [&lt; &gt;] -&gt; Ast.Variable id
+      in
+      parse_ident id stream
+
+  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+  | [&lt; 'Token.If; c=parse_expr;
+       'Token.Then ?? "expected 'then'"; t=parse_expr;
+       'Token.Else ?? "expected 'else'"; e=parse_expr &gt;] -&gt;
+      Ast.If (c, t, e)
+
+  (* forexpr
+        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
+  | [&lt; 'Token.For;
+       'Token.Ident id ?? "expected identifier after for";
+       'Token.Kwd '=' ?? "expected '=' after for";
+       stream &gt;] -&gt;
+      begin parser
+        | [&lt;
+             start=parse_expr;
+             'Token.Kwd ',' ?? "expected ',' after for";
+             end_=parse_expr;
+             stream &gt;] -&gt;
+            let step =
+              begin parser
+              | [&lt; 'Token.Kwd ','; step=parse_expr &gt;] -&gt; Some step
+              | [&lt; &gt;] -&gt; None
+              end stream
+            in
+            begin parser
+            | [&lt; 'Token.In; body=parse_expr &gt;] -&gt;
+                Ast.For (id, start, end_, step, body)
+            | [&lt; &gt;] -&gt;
+                raise (Stream.Error "expected 'in' after for")
+            end stream
+        | [&lt; &gt;] -&gt;
+            raise (Stream.Error "expected '=' after for")
+      end stream
+
+  (* varexpr
+   *   ::= 'var' identifier ('=' expression?
+   *             (',' identifier ('=' expression)?)* 'in' expression *)
+  | [&lt; 'Token.Var;
+       (* At least one variable name is required. *)
+       'Token.Ident id ?? "expected identifier after var";
+       init=parse_var_init;
+       var_names=parse_var_names [(id, init)];
+       (* At this point, we have to have 'in'. *)
+       'Token.In ?? "expected 'in' keyword after 'var'";
+       body=parse_expr &gt;] -&gt;
+      Ast.Var (Array.of_list (List.rev var_names), body)
+
+  | [&lt; &gt;] -&gt; raise (Stream.Error "unknown token when expecting an expression.")
+
+(* unary
+ *   ::= primary
+ *   ::= '!' unary *)
+and parse_unary = parser
+  (* If this is a unary operator, read it. *)
+  | [&lt; 'Token.Kwd op when op != '(' &amp;&amp; op != ')'; operand=parse_expr &gt;] -&gt;
+      Ast.Unary (op, operand)
+
+  (* If the current token is not an operator, it must be a primary expr. *)
+  | [&lt; stream &gt;] -&gt; parse_primary stream
+
+(* binoprhs
+ *   ::= ('+' primary)* *)
+and parse_bin_rhs expr_prec lhs stream =
+  match Stream.peek stream with
+  (* If this is a binop, find its precedence. *)
+  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c -&gt;
+      let token_prec = precedence c in
+
+      (* If this is a binop that binds at least as tightly as the current binop,
+       * consume it, otherwise we are done. *)
+      if token_prec &lt; expr_prec then lhs else begin
+        (* Eat the binop. *)
+        Stream.junk stream;
+
+        (* Parse the primary expression after the binary operator. *)
+        let rhs = parse_unary stream in
+
+        (* Okay, we know this is a binop. *)
+        let rhs =
+          match Stream.peek stream with
+          | Some (Token.Kwd c2) -&gt;
+              (* If BinOp binds less tightly with rhs than the operator after
+               * rhs, let the pending operator take rhs as its lhs. *)
+              let next_prec = precedence c2 in
+              if token_prec &lt; next_prec
+              then parse_bin_rhs (token_prec + 1) rhs stream
+              else rhs
+          | _ -&gt; rhs
+        in
+
+        (* Merge lhs/rhs. *)
+        let lhs = Ast.Binary (c, lhs, rhs) in
+        parse_bin_rhs expr_prec lhs stream
+      end
+  | _ -&gt; lhs
+
+and parse_var_init = parser
+  (* read in the optional initializer. *)
+  | [&lt; 'Token.Kwd '='; e=parse_expr &gt;] -&gt; Some e
+  | [&lt; &gt;] -&gt; None
+
+and parse_var_names accumulator = parser
+  | [&lt; 'Token.Kwd ',';
+       'Token.Ident id ?? "expected identifier list after var";
+       init=parse_var_init;
+       e=parse_var_names ((id, init) :: accumulator) &gt;] -&gt; e
+  | [&lt; &gt;] -&gt; accumulator
+
+(* expression
+ *   ::= primary binoprhs *)
+and parse_expr = parser
+  | [&lt; lhs=parse_unary; stream &gt;] -&gt; parse_bin_rhs 0 lhs stream
+
+(* prototype
+ *   ::= id '(' id* ')'
+ *   ::= binary LETTER number? (id, id)
+ *   ::= unary LETTER number? (id) *)
+let parse_prototype =
+  let rec parse_args accumulator = parser
+    | [&lt; 'Token.Ident id; e=parse_args (id::accumulator) &gt;] -&gt; e
+    | [&lt; &gt;] -&gt; accumulator
+  in
+  let parse_operator = parser
+    | [&lt; 'Token.Unary &gt;] -&gt; "unary", 1
+    | [&lt; 'Token.Binary &gt;] -&gt; "binary", 2
+  in
+  let parse_binary_precedence = parser
+    | [&lt; 'Token.Number n &gt;] -&gt; int_of_float n
+    | [&lt; &gt;] -&gt; 30
+  in
+  parser
+  | [&lt; 'Token.Ident id;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+       args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
+      (* success. *)
+      Ast.Prototype (id, Array.of_list (List.rev args))
+  | [&lt; (prefix, kind)=parse_operator;
+       'Token.Kwd op ?? "expected an operator";
+       (* Read the precedence if present. *)
+       binary_precedence=parse_binary_precedence;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+        args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" &gt;] -&gt;
+      let name = prefix ^ (String.make 1 op) in
+      let args = Array.of_list (List.rev args) in
+
+      (* Verify right number of arguments for operator. *)
+      if Array.length args != kind
+      then raise (Stream.Error "invalid number of operands for operator")
+      else
+        if kind == 1 then
+          Ast.Prototype (name, args)
+        else
+          Ast.BinOpPrototype (name, args, binary_precedence)
+  | [&lt; &gt;] -&gt;
+      raise (Stream.Error "expected function name in prototype")
+
+(* definition ::= 'def' prototype expression *)
+let parse_definition = parser
+  | [&lt; 'Token.Def; p=parse_prototype; e=parse_expr &gt;] -&gt;
+      Ast.Function (p, e)
+
+(* toplevelexpr ::= expression *)
+let parse_toplevel = parser
+  | [&lt; e=parse_expr &gt;] -&gt;
+      (* Make an anonymous proto. *)
+      Ast.Function (Ast.Prototype ("", [||]), e)
+
+(*  external ::= 'extern' prototype *)
+let parse_extern = parser
+  | [&lt; 'Token.Extern; e=parse_prototype &gt;] -&gt; e
+</pre>
+</dd>
+
+<dt>codegen.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Code Generation
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+
+exception Error of string
+
+let context = global_context ()
+let the_module = create_module context "my cool jit"
+let builder = builder context
+let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+let double_type = double_type context
+
+(* Create an alloca instruction in the entry block of the function. This
+ * is used for mutable variables etc. *)
+let create_entry_block_alloca the_function var_name =
+  let builder = builder_at context (instr_begin (entry_block the_function)) in
+  build_alloca double_type var_name builder
+
+let rec codegen_expr = function
+  | Ast.Number n -&gt; const_float double_type n
+  | Ast.Variable name -&gt;
+      let v = try Hashtbl.find named_values name with
+        | Not_found -&gt; raise (Error "unknown variable name")
+      in
+      (* Load the value. *)
+      build_load v name builder
+  | Ast.Unary (op, operand) -&gt;
+      let operand = codegen_expr operand in
+      let callee = "unary" ^ (String.make 1 op) in
+      let callee =
+        match lookup_function callee the_module with
+        | Some callee -&gt; callee
+        | None -&gt; raise (Error "unknown unary operator")
+      in
+      build_call callee [|operand|] "unop" builder
+  | Ast.Binary (op, lhs, rhs) -&gt;
+      begin match op with
+      | '=' -&gt;
+          (* Special case '=' because we don't want to emit the LHS as an
+           * expression. *)
+          let name =
+            match lhs with
+            | Ast.Variable name -&gt; name
+            | _ -&gt; raise (Error "destination of '=' must be a variable")
+          in
+
+          (* Codegen the rhs. *)
+          let val_ = codegen_expr rhs in
+
+          (* Lookup the name. *)
+          let variable = try Hashtbl.find named_values name with
+          | Not_found -&gt; raise (Error "unknown variable name")
+          in
+          ignore(build_store val_ variable builder);
+          val_
+      | _ -&gt;
+          let lhs_val = codegen_expr lhs in
+          let rhs_val = codegen_expr rhs in
+          begin
+            match op with
+            | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
+            | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
+            | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
+            | '&lt;' -&gt;
+                (* Convert bool 0/1 to double 0.0 or 1.0 *)
+                let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+                build_uitofp i double_type "booltmp" builder
+            | _ -&gt;
+                (* If it wasn't a builtin binary operator, it must be a user defined
+                 * one. Emit a call to it. *)
+                let callee = "binary" ^ (String.make 1 op) in
+                let callee =
+                  match lookup_function callee the_module with
+                  | Some callee -&gt; callee
+                  | None -&gt; raise (Error "binary operator not found!")
+                in
+                build_call callee [|lhs_val; rhs_val|] "binop" builder
+          end
+      end
+  | Ast.Call (callee, args) -&gt;
+      (* Look up the name in the module table. *)
+      let callee =
+        match lookup_function callee the_module with
+        | Some callee -&gt; callee
+        | None -&gt; raise (Error "unknown function referenced")
+      in
+      let params = params callee in
+
+      (* If argument mismatch error. *)
+      if Array.length params == Array.length args then () else
+        raise (Error "incorrect # arguments passed");
+      let args = Array.map codegen_expr args in
+      build_call callee args "calltmp" builder
+  | Ast.If (cond, then_, else_) -&gt;
+      let cond = codegen_expr cond in
+
+      (* Convert condition to a bool by comparing equal to 0.0 *)
+      let zero = const_float double_type 0.0 in
+      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
+
+      (* Grab the first block so that we might later add the conditional branch
+       * to it at the end of the function. *)
+      let start_bb = insertion_block builder in
+      let the_function = block_parent start_bb in
+
+      let then_bb = append_block context "then" the_function in
+
+      (* Emit 'then' value. *)
+      position_at_end then_bb builder;
+      let then_val = codegen_expr then_ in
+
+      (* Codegen of 'then' can change the current block, update then_bb for the
+       * phi. We create a new name because one is used for the phi node, and the
+       * other is used for the conditional branch. *)
+      let new_then_bb = insertion_block builder in
+
+      (* Emit 'else' value. *)
+      let else_bb = append_block context "else" the_function in
+      position_at_end else_bb builder;
+      let else_val = codegen_expr else_ in
+
+      (* Codegen of 'else' can change the current block, update else_bb for the
+       * phi. *)
+      let new_else_bb = insertion_block builder in
+
+      (* Emit merge block. *)
+      let merge_bb = append_block context "ifcont" the_function in
+      position_at_end merge_bb builder;
+      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
+      let phi = build_phi incoming "iftmp" builder in
+
+      (* Return to the start block to add the conditional branch. *)
+      position_at_end start_bb builder;
+      ignore (build_cond_br cond_val then_bb else_bb builder);
+
+      (* Set a unconditional branch at the end of the 'then' block and the
+       * 'else' block to the 'merge' block. *)
+      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
+      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
+
+      (* Finally, set the builder to the end of the merge block. *)
+      position_at_end merge_bb builder;
+
+      phi
+  | Ast.For (var_name, start, end_, step, body) -&gt;
+      (* Output this as:
+       *   var = alloca double
+       *   ...
+       *   start = startexpr
+       *   store start -&gt; var
+       *   goto loop
+       * loop:
+       *   ...
+       *   bodyexpr
+       *   ...
+       * loopend:
+       *   step = stepexpr
+       *   endcond = endexpr
+       *
+       *   curvar = load var
+       *   nextvar = curvar + step
+       *   store nextvar -&gt; var
+       *   br endcond, loop, endloop
+       * outloop: *)
+
+      let the_function = block_parent (insertion_block builder) in
+
+      (* Create an alloca for the variable in the entry block. *)
+      let alloca = create_entry_block_alloca the_function var_name in
+
+      (* Emit the start code first, without 'variable' in scope. *)
+      let start_val = codegen_expr start in
+
+      (* Store the value into the alloca. *)
+      ignore(build_store start_val alloca builder);
+
+      (* Make the new basic block for the loop header, inserting after current
+       * block. *)
+      let loop_bb = append_block context "loop" the_function in
+
+      (* Insert an explicit fall through from the current block to the
+       * loop_bb. *)
+      ignore (build_br loop_bb builder);
+
+      (* Start insertion in loop_bb. *)
+      position_at_end loop_bb builder;
+
+      (* Within the loop, the variable is defined equal to the PHI node. If it
+       * shadows an existing variable, we have to restore it, so save it
+       * now. *)
+      let old_val =
+        try Some (Hashtbl.find named_values var_name) with Not_found -&gt; None
+      in
+      Hashtbl.add named_values var_name alloca;
+
+      (* Emit the body of the loop.  This, like any other expr, can change the
+       * current BB.  Note that we ignore the value computed by the body, but
+       * don't allow an error *)
+      ignore (codegen_expr body);
+
+      (* Emit the step value. *)
+      let step_val =
+        match step with
+        | Some step -&gt; codegen_expr step
+        (* If not specified, use 1.0. *)
+        | None -&gt; const_float double_type 1.0
+      in
+
+      (* Compute the end condition. *)
+      let end_cond = codegen_expr end_ in
+
+      (* Reload, increment, and restore the alloca. This handles the case where
+       * the body of the loop mutates the variable. *)
+      let cur_var = build_load alloca var_name builder in
+      let next_var = build_add cur_var step_val "nextvar" builder in
+      ignore(build_store next_var alloca builder);
+
+      (* Convert condition to a bool by comparing equal to 0.0. *)
+      let zero = const_float double_type 0.0 in
+      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
+
+      (* Create the "after loop" block and insert it. *)
+      let after_bb = append_block context "afterloop" the_function in
+
+      (* Insert the conditional branch into the end of loop_end_bb. *)
+      ignore (build_cond_br end_cond loop_bb after_bb builder);
+
+      (* Any new code will be inserted in after_bb. *)
+      position_at_end after_bb builder;
+
+      (* Restore the unshadowed variable. *)
+      begin match old_val with
+      | Some old_val -&gt; Hashtbl.add named_values var_name old_val
+      | None -&gt; ()
+      end;
+
+      (* for expr always returns 0.0. *)
+      const_null double_type
+  | Ast.Var (var_names, body) -&gt;
+      let old_bindings = ref [] in
+
+      let the_function = block_parent (insertion_block builder) in
+
+      (* Register all variables and emit their initializer. *)
+      Array.iter (fun (var_name, init) -&gt;
+        (* Emit the initializer before adding the variable to scope, this
+         * prevents the initializer from referencing the variable itself, and
+         * permits stuff like this:
+         *   var a = 1 in
+         *     var a = a in ...   # refers to outer 'a'. *)
+        let init_val =
+          match init with
+          | Some init -&gt; codegen_expr init
+          (* If not specified, use 0.0. *)
+          | None -&gt; const_float double_type 0.0
+        in
+
+        let alloca = create_entry_block_alloca the_function var_name in
+        ignore(build_store init_val alloca builder);
+
+        (* Remember the old variable binding so that we can restore the binding
+         * when we unrecurse. *)
+        begin
+          try
+            let old_value = Hashtbl.find named_values var_name in
+            old_bindings := (var_name, old_value) :: !old_bindings;
+          with Not_found -&gt; ()
+        end;
+
+        (* Remember this binding. *)
+        Hashtbl.add named_values var_name alloca;
+      ) var_names;
+
+      (* Codegen the body, now that all vars are in scope. *)
+      let body_val = codegen_expr body in
+
+      (* Pop all our variables from scope. *)
+      List.iter (fun (var_name, old_value) -&gt;
+        Hashtbl.add named_values var_name old_value
+      ) !old_bindings;
+
+      (* Return the body computation. *)
+      body_val
+
+let codegen_proto = function
+  | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) -&gt;
+      (* Make the function type: double(double,double) etc. *)
+      let doubles = Array.make (Array.length args) double_type in
+      let ft = function_type double_type doubles in
+      let f =
+        match lookup_function name the_module with
+        | None -&gt; declare_function name ft the_module
+
+        (* If 'f' conflicted, there was already something named 'name'. If it
+         * has a body, don't allow redefinition or reextern. *)
+        | Some f -&gt;
+            (* If 'f' already has a body, reject this. *)
+            if block_begin f &lt;&gt; At_end f then
+              raise (Error "redefinition of function");
+
+            (* If 'f' took a different number of arguments, reject. *)
+            if element_type (type_of f) &lt;&gt; ft then
+              raise (Error "redefinition of function with different # args");
+            f
+      in
+
+      (* Set names for all arguments. *)
+      Array.iteri (fun i a -&gt;
+        let n = args.(i) in
+        set_value_name n a;
+        Hashtbl.add named_values n a;
+      ) (params f);
+      f
+
+(* Create an alloca for each argument and register the argument in the symbol
+ * table so that references to it will succeed. *)
+let create_argument_allocas the_function proto =
+  let args = match proto with
+    | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -&gt; args
+  in
+  Array.iteri (fun i ai -&gt;
+    let var_name = args.(i) in
+    (* Create an alloca for this variable. *)
+    let alloca = create_entry_block_alloca the_function var_name in
+
+    (* Store the initial value into the alloca. *)
+    ignore(build_store ai alloca builder);
+
+    (* Add arguments to variable symbol table. *)
+    Hashtbl.add named_values var_name alloca;
+  ) (params the_function)
+
+let codegen_func the_fpm = function
+  | Ast.Function (proto, body) -&gt;
+      Hashtbl.clear named_values;
+      let the_function = codegen_proto proto in
+
+      (* If this is an operator, install it. *)
+      begin match proto with
+      | Ast.BinOpPrototype (name, args, prec) -&gt;
+          let op = name.[String.length name - 1] in
+          Hashtbl.add Parser.binop_precedence op prec;
+      | _ -&gt; ()
+      end;
+
+      (* Create a new basic block to start insertion into. *)
+      let bb = append_block context "entry" the_function in
+      position_at_end bb builder;
+
+      try
+        (* Add all arguments to the symbol table and create their allocas. *)
+        create_argument_allocas the_function proto;
+
+        let ret_val = codegen_expr body in
+
+        (* Finish off the function. *)
+        let _ = build_ret ret_val builder in
+
+        (* Validate the generated code, checking for consistency. *)
+        Llvm_analysis.assert_valid_function the_function;
+
+        (* Optimize the function. *)
+        let _ = PassManager.run_function the_function the_fpm in
+
+        the_function
+      with e -&gt;
+        delete_function the_function;
+        raise e
+</pre>
+</dd>
+
+<dt>toplevel.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Top-Level parsing and JIT Driver
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+open Llvm_executionengine
+
+(* top ::= definition | external | expression | ';' *)
+let rec main_loop the_fpm the_execution_engine stream =
+  match Stream.peek stream with
+  | None -&gt; ()
+
+  (* ignore top-level semicolons. *)
+  | Some (Token.Kwd ';') -&gt;
+      Stream.junk stream;
+      main_loop the_fpm the_execution_engine stream
+
+  | Some token -&gt;
+      begin
+        try match token with
+        | Token.Def -&gt;
+            let e = Parser.parse_definition stream in
+            print_endline "parsed a function definition.";
+            dump_value (Codegen.codegen_func the_fpm e);
+        | Token.Extern -&gt;
+            let e = Parser.parse_extern stream in
+            print_endline "parsed an extern.";
+            dump_value (Codegen.codegen_proto e);
+        | _ -&gt;
+            (* Evaluate a top-level expression into an anonymous function. *)
+            let e = Parser.parse_toplevel stream in
+            print_endline "parsed a top-level expr";
+            let the_function = Codegen.codegen_func the_fpm e in
+            dump_value the_function;
+
+            (* JIT the function, returning a function pointer. *)
+            let result = ExecutionEngine.run_function the_function [||]
+              the_execution_engine in
+
+            print_string "Evaluated to ";
+            print_float (GenericValue.as_float Codegen.double_type result);
+            print_newline ();
+        with Stream.Error s | Codegen.Error s -&gt;
+          (* Skip token for error recovery. *)
+          Stream.junk stream;
+          print_endline s;
+      end;
+      print_string "ready&gt; "; flush stdout;
+      main_loop the_fpm the_execution_engine stream
+</pre>
+</dd>
+
+<dt>toy.ml:</dt>
+<dd class="doc_code">
+<pre>
+(*===----------------------------------------------------------------------===
+ * Main driver code.
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+open Llvm_executionengine
+open Llvm_target
+open Llvm_scalar_opts
+
+let main () =
+  ignore (initialize_native_target ());
+
+  (* Install standard binary operators.
+   * 1 is the lowest precedence. *)
+  Hashtbl.add Parser.binop_precedence '=' 2;
+  Hashtbl.add Parser.binop_precedence '&lt;' 10;
+  Hashtbl.add Parser.binop_precedence '+' 20;
+  Hashtbl.add Parser.binop_precedence '-' 20;
+  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+  (* Prime the first token. *)
+  print_string "ready&gt; "; flush stdout;
+  let stream = Lexer.lex (Stream.of_channel stdin) in
+
+  (* Create the JIT. *)
+  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+  let the_fpm = PassManager.create_function Codegen.the_module in
+
+  (* Set up the optimizer pipeline.  Start with registering info about how the
+   * target lays out data structures. *)
+  TargetData.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+  (* Promote allocas to registers. *)
+  add_memory_to_register_promotion the_fpm;
+
+  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+  add_instruction_combination the_fpm;
+
+  (* reassociate expressions. *)
+  add_reassociation the_fpm;
+
+  (* Eliminate Common SubExpressions. *)
+  add_gvn the_fpm;
+
+  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+  add_cfg_simplification the_fpm;
+
+  ignore (PassManager.initialize the_fpm);
+
+  (* Run the main "interpreter loop" now. *)
+  Toplevel.main_loop the_fpm the_execution_engine stream;
+
+  (* Print out all the generated code. *)
+  dump_module Codegen.the_module
+;;
+
+main ()
+</pre>
+</dd>
+
+<dt>bindings.c</dt>
+<dd class="doc_code">
+<pre>
+#include &lt;stdio.h&gt;
+
+/* putchard - putchar that takes a double and returns 0. */
+extern double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+/* printd - printf that takes a double prints it as "%f\n", returning 0. */
+extern double printd(double X) {
+  printf("%f\n", X);
+  return 0;
+}
+</pre>
+</dd>
+</dl>
+
+<a href="LangImpl8.html">Next: Conclusion and other useful LLVM tidbits</a>
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/tutorial/OCamlLangImpl8.html b/final/docs/tutorial/OCamlLangImpl8.html
new file mode 100644
index 00000000000..64a62002c4c
--- /dev/null
+++ b/final/docs/tutorial/OCamlLangImpl8.html
@@ -0,0 +1,365 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+
+<html>
+<head>
+  <title>Kaleidoscope: Conclusion and other useful LLVM tidbits</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Chris Lattner">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title">Kaleidoscope: Conclusion and other useful LLVM
+ tidbits</div>
+
+<ul>
+<li><a href="index.html">Up to Tutorial Index</a></li>
+<li>Chapter 8
+  <ol>
+    <li><a href="#conclusion">Tutorial Conclusion</a></li>
+    <li><a href="#llvmirproperties">Properties of LLVM IR</a>
+    <ul>
+      <li><a href="#targetindep">Target Independence</a></li>
+      <li><a href="#safety">Safety Guarantees</a></li>
+      <li><a href="#langspecific">Language-Specific Optimizations</a></li>
+    </ul>
+    </li>
+    <li><a href="#tipsandtricks">Tips and Tricks</a>
+    <ul>
+      <li><a href="#offsetofsizeof">Implementing portable 
+                                    offsetof/sizeof</a></li>
+      <li><a href="#gcstack">Garbage Collected Stack Frames</a></li>
+    </ul>
+    </li>
+  </ol>
+</li>
+</ul>
+
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:sabre@nondot.org">Chris Lattner</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="conclusion">Tutorial Conclusion</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>Welcome to the the final chapter of the "<a href="index.html">Implementing a
+language with LLVM</a>" tutorial.  In the course of this tutorial, we have grown
+our little Kaleidoscope language from being a useless toy, to being a
+semi-interesting (but probably still useless) toy. :)</p>
+
+<p>It is interesting to see how far we've come, and how little code it has
+taken.  We built the entire lexer, parser, AST, code generator, and an 
+interactive run-loop (with a JIT!) by-hand in under 700 lines of
+(non-comment/non-blank) code.</p>
+
+<p>Our little language supports a couple of interesting features: it supports
+user defined binary and unary operators, it uses JIT compilation for immediate
+evaluation, and it supports a few control flow constructs with SSA construction.
+</p>
+
+<p>Part of the idea of this tutorial was to show you how easy and fun it can be
+to define, build, and play with languages.  Building a compiler need not be a
+scary or mystical process!  Now that you've seen some of the basics, I strongly
+encourage you to take the code and hack on it.  For example, try adding:</p>
+
+<ul>
+<li><b>global variables</b> - While global variables have questional value in
+modern software engineering, they are often useful when putting together quick
+little hacks like the Kaleidoscope compiler itself.  Fortunately, our current
+setup makes it very easy to add global variables: just have value lookup check
+to see if an unresolved variable is in the global variable symbol table before
+rejecting it.  To create a new global variable, make an instance of the LLVM
+<tt>GlobalVariable</tt> class.</li>
+
+<li><b>typed variables</b> - Kaleidoscope currently only supports variables of
+type double.  This gives the language a very nice elegance, because only
+supporting one type means that you never have to specify types.  Different
+languages have different ways of handling this.  The easiest way is to require
+the user to specify types for every variable definition, and record the type
+of the variable in the symbol table along with its Value*.</li>
+
+<li><b>arrays, structs, vectors, etc</b> - Once you add types, you can start
+extending the type system in all sorts of interesting ways.  Simple arrays are
+very easy and are quite useful for many different applications.  Adding them is
+mostly an exercise in learning how the LLVM <a 
+href="../LangRef.html#i_getelementptr">getelementptr</a> instruction works: it
+is so nifty/unconventional, it <a 
+href="../GetElementPtr.html">has its own FAQ</a>!  If you add support
+for recursive types (e.g. linked lists), make sure to read the <a 
+href="../ProgrammersManual.html#TypeResolve">section in the LLVM
+Programmer's Manual</a> that describes how to construct them.</li>
+
+<li><b>standard runtime</b> - Our current language allows the user to access
+arbitrary external functions, and we use it for things like "printd" and
+"putchard".  As you extend the language to add higher-level constructs, often
+these constructs make the most sense if they are lowered to calls into a
+language-supplied runtime.  For example, if you add hash tables to the language,
+it would probably make sense to add the routines to a runtime, instead of 
+inlining them all the way.</li>
+
+<li><b>memory management</b> - Currently we can only access the stack in
+Kaleidoscope.  It would also be useful to be able to allocate heap memory,
+either with calls to the standard libc malloc/free interface or with a garbage
+collector.  If you would like to use garbage collection, note that LLVM fully
+supports <a href="../GarbageCollection.html">Accurate Garbage Collection</a>
+including algorithms that move objects and need to scan/update the stack.</li>
+
+<li><b>debugger support</b> - LLVM supports generation of <a 
+href="../SourceLevelDebugging.html">DWARF Debug info</a> which is understood by
+common debuggers like GDB.  Adding support for debug info is fairly 
+straightforward.  The best way to understand it is to compile some C/C++ code
+with "<tt>llvm-gcc -g -O0</tt>" and taking a look at what it produces.</li>
+
+<li><b>exception handling support</b> - LLVM supports generation of <a 
+href="../ExceptionHandling.html">zero cost exceptions</a> which interoperate
+with code compiled in other languages.  You could also generate code by
+implicitly making every function return an error value and checking it.  You 
+could also make explicit use of setjmp/longjmp.  There are many different ways
+to go here.</li>
+
+<li><b>object orientation, generics, database access, complex numbers,
+geometric programming, ...</b> - Really, there is
+no end of crazy features that you can add to the language.</li>
+
+<li><b>unusual domains</b> - We've been talking about applying LLVM to a domain
+that many people are interested in: building a compiler for a specific language.
+However, there are many other domains that can use compiler technology that are
+not typically considered.  For example, LLVM has been used to implement OpenGL
+graphics acceleration, translate C++ code to ActionScript, and many other
+cute and clever things.  Maybe you will be the first to JIT compile a regular
+expression interpreter into native code with LLVM?</li>
+
+</ul>
+
+<p>
+Have fun - try doing something crazy and unusual.  Building a language like
+everyone else always has, is much less fun than trying something a little crazy
+or off the wall and seeing how it turns out.  If you get stuck or want to talk
+about it, feel free to email the <a 
+href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev mailing 
+list</a>: it has lots of people who are interested in languages and are often
+willing to help out.
+</p>
+
+<p>Before we end this tutorial, I want to talk about some "tips and tricks" for generating
+LLVM IR.  These are some of the more subtle things that may not be obvious, but
+are very useful if you want to take advantage of LLVM's capabilities.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="llvmirproperties">Properties of the LLVM 
+IR</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>We have a couple common questions about code in the LLVM IR form - lets just
+get these out of the way right now, shall we?</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="targetindep">Target 
+Independence</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>Kaleidoscope is an example of a "portable language": any program written in
+Kaleidoscope will work the same way on any target that it runs on.  Many other
+languages have this property, e.g. lisp, java, haskell, javascript, python, etc
+(note that while these languages are portable, not all their libraries are).</p>
+
+<p>One nice aspect of LLVM is that it is often capable of preserving target
+independence in the IR: you can take the LLVM IR for a Kaleidoscope-compiled 
+program and run it on any target that LLVM supports, even emitting C code and
+compiling that on targets that LLVM doesn't support natively.  You can trivially
+tell that the Kaleidoscope compiler generates target-independent code because it
+never queries for any target-specific information when generating code.</p>
+
+<p>The fact that LLVM provides a compact, target-independent, representation for
+code gets a lot of people excited.  Unfortunately, these people are usually
+thinking about C or a language from the C family when they are asking questions
+about language portability.  I say "unfortunately", because there is really no
+way to make (fully general) C code portable, other than shipping the source code
+around (and of course, C source code is not actually portable in general
+either - ever port a really old application from 32- to 64-bits?).</p>
+
+<p>The problem with C (again, in its full generality) is that it is heavily
+laden with target specific assumptions.  As one simple example, the preprocessor
+often destructively removes target-independence from the code when it processes
+the input text:</p>
+
+<div class="doc_code">
+<pre>
+#ifdef __i386__
+  int X = 1;
+#else
+  int X = 42;
+#endif
+</pre>
+</div>
+
+<p>While it is possible to engineer more and more complex solutions to problems
+like this, it cannot be solved in full generality in a way that is better than shipping
+the actual source code.</p>
+
+<p>That said, there are interesting subsets of C that can be made portable.  If
+you are willing to fix primitive types to a fixed size (say int = 32-bits, 
+and long = 64-bits), don't care about ABI compatibility with existing binaries,
+and are willing to give up some other minor features, you can have portable
+code.  This can make sense for specialized domains such as an
+in-kernel language.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="safety">Safety Guarantees</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>Many of the languages above are also "safe" languages: it is impossible for
+a program written in Java to corrupt its address space and crash the process
+(assuming the JVM has no bugs).
+Safety is an interesting property that requires a combination of language
+design, runtime support, and often operating system support.</p>
+
+<p>It is certainly possible to implement a safe language in LLVM, but LLVM IR
+does not itself guarantee safety.  The LLVM IR allows unsafe pointer casts,
+use after free bugs, buffer over-runs, and a variety of other problems.  Safety
+needs to be implemented as a layer on top of LLVM and, conveniently, several
+groups have investigated this.  Ask on the <a 
+href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">llvmdev mailing 
+list</a> if you are interested in more details.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="langspecific">Language-Specific 
+Optimizations</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>One thing about LLVM that turns off many people is that it does not solve all
+the world's problems in one system (sorry 'world hunger', someone else will have
+to solve you some other day).  One specific complaint is that people perceive
+LLVM as being incapable of performing high-level language-specific optimization:
+LLVM "loses too much information".</p>
+
+<p>Unfortunately, this is really not the place to give you a full and unified
+version of "Chris Lattner's theory of compiler design".  Instead, I'll make a
+few observations:</p>
+
+<p>First, you're right that LLVM does lose information.  For example, as of this
+writing, there is no way to distinguish in the LLVM IR whether an SSA-value came
+from a C "int" or a C "long" on an ILP32 machine (other than debug info).  Both
+get compiled down to an 'i32' value and the information about what it came from
+is lost.  The more general issue here, is that the LLVM type system uses
+"structural equivalence" instead of "name equivalence".  Another place this
+surprises people is if you have two types in a high-level language that have the
+same structure (e.g. two different structs that have a single int field): these
+types will compile down into a single LLVM type and it will be impossible to
+tell what it came from.</p>
+
+<p>Second, while LLVM does lose information, LLVM is not a fixed target: we 
+continue to enhance and improve it in many different ways.  In addition to
+adding new features (LLVM did not always support exceptions or debug info), we
+also extend the IR to capture important information for optimization (e.g.
+whether an argument is sign or zero extended, information about pointers
+aliasing, etc).  Many of the enhancements are user-driven: people want LLVM to
+include some specific feature, so they go ahead and extend it.</p>
+
+<p>Third, it is <em>possible and easy</em> to add language-specific
+optimizations, and you have a number of choices in how to do it.  As one trivial
+example, it is easy to add language-specific optimization passes that
+"know" things about code compiled for a language.  In the case of the C family,
+there is an optimization pass that "knows" about the standard C library
+functions.  If you call "exit(0)" in main(), it knows that it is safe to
+optimize that into "return 0;" because C specifies what the 'exit'
+function does.</p>
+
+<p>In addition to simple library knowledge, it is possible to embed a variety of
+other language-specific information into the LLVM IR.  If you have a specific
+need and run into a wall, please bring the topic up on the llvmdev list.  At the
+very worst, you can always treat LLVM as if it were a "dumb code generator" and
+implement the high-level optimizations you desire in your front-end, on the
+language-specific AST.
+</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section"><a name="tipsandtricks">Tips and Tricks</a></div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>There is a variety of useful tips and tricks that you come to know after
+working on/with LLVM that aren't obvious at first glance.  Instead of letting
+everyone rediscover them, this section talks about some of these issues.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="offsetofsizeof">Implementing portable
+offsetof/sizeof</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>One interesting thing that comes up, if you are trying to keep the code 
+generated by your compiler "target independent", is that you often need to know
+the size of some LLVM type or the offset of some field in an llvm structure.
+For example, you might need to pass the size of a type into a function that
+allocates memory.</p>
+
+<p>Unfortunately, this can vary widely across targets: for example the width of
+a pointer is trivially target-specific.  However, there is a <a 
+href="http://nondot.org/sabre/LLVMNotes/SizeOf-OffsetOf-VariableSizedStructs.txt">clever
+way to use the getelementptr instruction</a> that allows you to compute this
+in a portable way.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsubsection"><a name="gcstack">Garbage Collected 
+Stack Frames</a></div>
+<!-- ======================================================================= -->
+
+<div class="doc_text">
+
+<p>Some languages want to explicitly manage their stack frames, often so that
+they are garbage collected or to allow easy implementation of closures.  There
+are often better ways to implement these features than explicit stack frames,
+but <a 
+href="http://nondot.org/sabre/LLVMNotes/ExplicitlyManagedStackFrames.txt">LLVM
+does support them,</a> if you want.  It requires your front-end to convert the
+code into <a 
+href="http://en.wikipedia.org/wiki/Continuation-passing_style">Continuation
+Passing Style</a> and the use of tail calls (which LLVM also supports).</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
+  <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date$
+</address>
+</body>
+</html>
diff --git a/final/docs/tutorial/index.html b/final/docs/tutorial/index.html
new file mode 100644
index 00000000000..11dd5e2d732
--- /dev/null
+++ b/final/docs/tutorial/index.html
@@ -0,0 +1,48 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>LLVM Tutorial: Table of Contents</title>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <meta name="author" content="Owen Anderson">
+  <meta name="description" 
+  content="LLVM Tutorial: Table of Contents.">
+  <link rel="stylesheet" href="../llvm.css" type="text/css">
+</head>
+
+<body>
+
+<div class="doc_title"> LLVM Tutorial: Table of Contents </div>
+
+<ol>
+  <li>Kaleidoscope: Implementing a Language with LLVM
+  <ol>
+    <li><a href="LangImpl1.html">Tutorial Introduction and the Lexer</a></li>
+    <li><a href="LangImpl2.html">Implementing a Parser and AST</a></li>
+    <li><a href="LangImpl3.html">Implementing Code Generation to LLVM IR</a></li>
+    <li><a href="LangImpl4.html">Adding JIT and Optimizer Support</a></li>
+    <li><a href="LangImpl5.html">Extending the language: control flow</a></li>
+    <li><a href="LangImpl6.html">Extending the language: user-defined operators</a></li>
+    <li><a href="LangImpl7.html">Extending the language: mutable variables / SSA construction</a></li>
+    <li><a href="LangImpl8.html">Conclusion and other useful LLVM tidbits</a></li>
+  </ol></li>
+  <li>Kaleidoscope: Implementing a Language with LLVM in Objective Caml
+  <ol>
+    <li><a href="OCamlLangImpl1.html">Tutorial Introduction and the Lexer</a></li>
+    <li><a href="OCamlLangImpl2.html">Implementing a Parser and AST</a></li>
+    <li><a href="OCamlLangImpl3.html">Implementing Code Generation to LLVM IR</a></li>
+    <li><a href="OCamlLangImpl4.html">Adding JIT and Optimizer Support</a></li>
+    <li><a href="OCamlLangImpl5.html">Extending the language: control flow</a></li>
+    <li><a href="OCamlLangImpl6.html">Extending the language: user-defined operators</a></li>
+    <li><a href="OCamlLangImpl7.html">Extending the language: mutable variables / SSA construction</a></li>
+    <li><a href="OCamlLangImpl8.html">Conclusion and other useful LLVM tidbits</a></li>
+  </ol></li>
+  <li>Advanced Topics
+  <ol>
+    <li><a href="http://llvm.org/pubs/2004-09-22-LCPCLLVMTutorial.html">Writing
+        an Optimization for LLVM</a></li>
+  </ol></li>
+</ol>
+
+</body>
+</html>
diff --git a/final/examples/BrainF/BrainF.cpp b/final/examples/BrainF/BrainF.cpp
new file mode 100644
index 00000000000..8536915993e
--- /dev/null
+++ b/final/examples/BrainF/BrainF.cpp
@@ -0,0 +1,468 @@
+//===-- BrainF.cpp - BrainF compiler example ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--------------------------------------------------------------------===//
+//
+// This class compiles the BrainF language into LLVM assembly.
+//
+// The BrainF language has 8 commands:
+// Command   Equivalent C    Action
+// -------   ------------    ------
+// ,         *h=getchar();   Read a character from stdin, 255 on EOF
+// .         putchar(*h);    Write a character to stdout
+// -         --*h;           Decrement tape
+// +         ++*h;           Increment tape
+// <         --h;            Move head left
+// >         ++h;            Move head right
+// [         while(*h) {     Start loop
+// ]         }               End loop
+//
+//===--------------------------------------------------------------------===//
+
+#include "BrainF.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/ADT/STLExtras.h"
+#include <iostream>
+using namespace llvm;
+
+//Set the constants for naming
+const char *BrainF::tapereg = "tape";
+const char *BrainF::headreg = "head";
+const char *BrainF::label   = "brainf";
+const char *BrainF::testreg = "test";
+
+Module *BrainF::parse(std::istream *in1, int mem, CompileFlags cf,
+                      LLVMContext& Context) {
+  in       = in1;
+  memtotal = mem;
+  comflag  = cf;
+
+  header(Context);
+  readloop(0, 0, 0, Context);
+  delete builder;
+  return module;
+}
+
+void BrainF::header(LLVMContext& C) {
+  module = new Module("BrainF", C);
+
+  //Function prototypes
+
+  //declare void @llvm.memset.p0i8.i32(i8 *, i8, i32, i32, i1)
+  const Type *Tys[] = { Type::getInt8PtrTy(C), Type::getInt32Ty(C) };
+  Function *memset_func = Intrinsic::getDeclaration(module, Intrinsic::memset,
+                                                    Tys, 2);
+
+  //declare i32 @getchar()
+  getchar_func = cast<Function>(module->
+    getOrInsertFunction("getchar", IntegerType::getInt32Ty(C), NULL));
+
+  //declare i32 @putchar(i32)
+  putchar_func = cast<Function>(module->
+    getOrInsertFunction("putchar", IntegerType::getInt32Ty(C),
+                        IntegerType::getInt32Ty(C), NULL));
+
+
+  //Function header
+
+  //define void @brainf()
+  brainf_func = cast<Function>(module->
+    getOrInsertFunction("brainf", Type::getVoidTy(C), NULL));
+
+  builder = new IRBuilder<>(BasicBlock::Create(C, label, brainf_func));
+
+  //%arr = malloc i8, i32 %d
+  ConstantInt *val_mem = ConstantInt::get(C, APInt(32, memtotal));
+  BasicBlock* BB = builder->GetInsertBlock();
+  const Type* IntPtrTy = IntegerType::getInt32Ty(C);
+  const Type* Int8Ty = IntegerType::getInt8Ty(C);
+  Constant* allocsize = ConstantExpr::getSizeOf(Int8Ty);
+  allocsize = ConstantExpr::getTruncOrBitCast(allocsize, IntPtrTy);
+  ptr_arr = CallInst::CreateMalloc(BB, IntPtrTy, Int8Ty, allocsize, val_mem, 
+                                   NULL, "arr");
+  BB->getInstList().push_back(cast<Instruction>(ptr_arr));
+
+  //call void @llvm.memset.p0i8.i32(i8 *%arr, i8 0, i32 %d, i32 1, i1 0)
+  {
+    Value *memset_params[] = {
+      ptr_arr,
+      ConstantInt::get(C, APInt(8, 0)),
+      val_mem,
+      ConstantInt::get(C, APInt(32, 1)),
+      ConstantInt::get(C, APInt(1, 0))
+    };
+
+    CallInst *memset_call = builder->
+      CreateCall(memset_func, memset_params, array_endof(memset_params));
+    memset_call->setTailCall(false);
+  }
+
+  //%arrmax = getelementptr i8 *%arr, i32 %d
+  if (comflag & flag_arraybounds) {
+    ptr_arrmax = builder->
+      CreateGEP(ptr_arr, ConstantInt::get(C, APInt(32, memtotal)), "arrmax");
+  }
+
+  //%head.%d = getelementptr i8 *%arr, i32 %d
+  curhead = builder->CreateGEP(ptr_arr,
+                               ConstantInt::get(C, APInt(32, memtotal/2)),
+                               headreg);
+
+
+
+  //Function footer
+
+  //brainf.end:
+  endbb = BasicBlock::Create(C, label, brainf_func);
+
+  //call free(i8 *%arr)
+  endbb->getInstList().push_back(CallInst::CreateFree(ptr_arr, endbb));
+
+  //ret void
+  ReturnInst::Create(C, endbb);
+
+
+
+  //Error block for array out of bounds
+  if (comflag & flag_arraybounds)
+  {
+    //@aberrormsg = internal constant [%d x i8] c"\00"
+    Constant *msg_0 =
+      ConstantArray::get(C, "Error: The head has left the tape.", true);
+
+    GlobalVariable *aberrormsg = new GlobalVariable(
+      *module,
+      msg_0->getType(),
+      true,
+      GlobalValue::InternalLinkage,
+      msg_0,
+      "aberrormsg");
+
+    //declare i32 @puts(i8 *)
+    Function *puts_func = cast<Function>(module->
+      getOrInsertFunction("puts", IntegerType::getInt32Ty(C),
+                      PointerType::getUnqual(IntegerType::getInt8Ty(C)), NULL));
+
+    //brainf.aberror:
+    aberrorbb = BasicBlock::Create(C, label, brainf_func);
+
+    //call i32 @puts(i8 *getelementptr([%d x i8] *@aberrormsg, i32 0, i32 0))
+    {
+      Constant *zero_32 = Constant::getNullValue(IntegerType::getInt32Ty(C));
+
+      Constant *gep_params[] = {
+        zero_32,
+        zero_32
+      };
+
+      Constant *msgptr = ConstantExpr::
+        getGetElementPtr(aberrormsg, gep_params,
+                         array_lengthof(gep_params));
+
+      Value *puts_params[] = {
+        msgptr
+      };
+
+      CallInst *puts_call =
+        CallInst::Create(puts_func,
+                         puts_params, array_endof(puts_params),
+                         "", aberrorbb);
+      puts_call->setTailCall(false);
+    }
+
+    //br label %brainf.end
+    BranchInst::Create(endbb, aberrorbb);
+  }
+}
+
+void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb,
+                      LLVMContext &C) {
+  Symbol cursym = SYM_NONE;
+  int curvalue = 0;
+  Symbol nextsym = SYM_NONE;
+  int nextvalue = 0;
+  char c;
+  int loop;
+  int direction;
+
+  while(cursym != SYM_EOF && cursym != SYM_ENDLOOP) {
+    // Write out commands
+    switch(cursym) {
+      case SYM_NONE:
+        // Do nothing
+        break;
+
+      case SYM_READ:
+        {
+          //%tape.%d = call i32 @getchar()
+          CallInst *getchar_call = builder->CreateCall(getchar_func, tapereg);
+          getchar_call->setTailCall(false);
+          Value *tape_0 = getchar_call;
+
+          //%tape.%d = trunc i32 %tape.%d to i8
+          Value *tape_1 = builder->
+            CreateTrunc(tape_0, IntegerType::getInt8Ty(C), tapereg);
+
+          //store i8 %tape.%d, i8 *%head.%d
+          builder->CreateStore(tape_1, curhead);
+        }
+        break;
+
+      case SYM_WRITE:
+        {
+          //%tape.%d = load i8 *%head.%d
+          LoadInst *tape_0 = builder->CreateLoad(curhead, tapereg);
+
+          //%tape.%d = sext i8 %tape.%d to i32
+          Value *tape_1 = builder->
+            CreateSExt(tape_0, IntegerType::getInt32Ty(C), tapereg);
+
+          //call i32 @putchar(i32 %tape.%d)
+          Value *putchar_params[] = {
+            tape_1
+          };
+          CallInst *putchar_call = builder->
+            CreateCall(putchar_func,
+                       putchar_params, array_endof(putchar_params));
+          putchar_call->setTailCall(false);
+        }
+        break;
+
+      case SYM_MOVE:
+        {
+          //%head.%d = getelementptr i8 *%head.%d, i32 %d
+          curhead = builder->
+            CreateGEP(curhead, ConstantInt::get(C, APInt(32, curvalue)),
+                      headreg);
+
+          //Error block for array out of bounds
+          if (comflag & flag_arraybounds)
+          {
+            //%test.%d = icmp uge i8 *%head.%d, %arrmax
+            Value *test_0 = builder->
+              CreateICmpUGE(curhead, ptr_arrmax, testreg);
+
+            //%test.%d = icmp ult i8 *%head.%d, %arr
+            Value *test_1 = builder->
+              CreateICmpULT(curhead, ptr_arr, testreg);
+
+            //%test.%d = or i1 %test.%d, %test.%d
+            Value *test_2 = builder->
+              CreateOr(test_0, test_1, testreg);
+
+            //br i1 %test.%d, label %main.%d, label %main.%d
+            BasicBlock *nextbb = BasicBlock::Create(C, label, brainf_func);
+            builder->CreateCondBr(test_2, aberrorbb, nextbb);
+
+            //main.%d:
+            builder->SetInsertPoint(nextbb);
+          }
+        }
+        break;
+
+      case SYM_CHANGE:
+        {
+          //%tape.%d = load i8 *%head.%d
+          LoadInst *tape_0 = builder->CreateLoad(curhead, tapereg);
+
+          //%tape.%d = add i8 %tape.%d, %d
+          Value *tape_1 = builder->
+            CreateAdd(tape_0, ConstantInt::get(C, APInt(8, curvalue)), tapereg);
+
+          //store i8 %tape.%d, i8 *%head.%d\n"
+          builder->CreateStore(tape_1, curhead);
+        }
+        break;
+
+      case SYM_LOOP:
+        {
+          //br label %main.%d
+          BasicBlock *testbb = BasicBlock::Create(C, label, brainf_func);
+          builder->CreateBr(testbb);
+
+          //main.%d:
+          BasicBlock *bb_0 = builder->GetInsertBlock();
+          BasicBlock *bb_1 = BasicBlock::Create(C, label, brainf_func);
+          builder->SetInsertPoint(bb_1);
+
+          // Make part of PHI instruction now, wait until end of loop to finish
+          PHINode *phi_0 =
+            PHINode::Create(PointerType::getUnqual(IntegerType::getInt8Ty(C)),
+                            headreg, testbb);
+          phi_0->reserveOperandSpace(2);
+          phi_0->addIncoming(curhead, bb_0);
+          curhead = phi_0;
+
+          readloop(phi_0, bb_1, testbb, C);
+        }
+        break;
+
+      default:
+        std::cerr << "Error: Unknown symbol.\n";
+        abort();
+        break;
+    }
+
+    cursym = nextsym;
+    curvalue = nextvalue;
+    nextsym = SYM_NONE;
+
+    // Reading stdin loop
+    loop = (cursym == SYM_NONE)
+        || (cursym == SYM_MOVE)
+        || (cursym == SYM_CHANGE);
+    while(loop) {
+      *in>>c;
+      if (in->eof()) {
+        if (cursym == SYM_NONE) {
+          cursym = SYM_EOF;
+        } else {
+          nextsym = SYM_EOF;
+        }
+        loop = 0;
+      } else {
+        direction = 1;
+        switch(c) {
+          case '-':
+            direction = -1;
+            // Fall through
+
+          case '+':
+            if (cursym == SYM_CHANGE) {
+              curvalue += direction;
+              // loop = 1
+            } else {
+              if (cursym == SYM_NONE) {
+                cursym = SYM_CHANGE;
+                curvalue = direction;
+                // loop = 1
+              } else {
+                nextsym = SYM_CHANGE;
+                nextvalue = direction;
+                loop = 0;
+              }
+            }
+            break;
+
+          case '<':
+            direction = -1;
+            // Fall through
+
+          case '>':
+            if (cursym == SYM_MOVE) {
+              curvalue += direction;
+              // loop = 1
+            } else {
+              if (cursym == SYM_NONE) {
+                cursym = SYM_MOVE;
+                curvalue = direction;
+                // loop = 1
+              } else {
+                nextsym = SYM_MOVE;
+                nextvalue = direction;
+                loop = 0;
+              }
+            }
+            break;
+
+          case ',':
+            if (cursym == SYM_NONE) {
+              cursym = SYM_READ;
+            } else {
+              nextsym = SYM_READ;
+            }
+            loop = 0;
+            break;
+
+          case '.':
+            if (cursym == SYM_NONE) {
+              cursym = SYM_WRITE;
+            } else {
+              nextsym = SYM_WRITE;
+            }
+            loop = 0;
+            break;
+
+          case '[':
+            if (cursym == SYM_NONE) {
+              cursym = SYM_LOOP;
+            } else {
+              nextsym = SYM_LOOP;
+            }
+            loop = 0;
+            break;
+
+          case ']':
+            if (cursym == SYM_NONE) {
+              cursym = SYM_ENDLOOP;
+            } else {
+              nextsym = SYM_ENDLOOP;
+            }
+            loop = 0;
+            break;
+
+          // Ignore other characters
+          default:
+            break;
+        }
+      }
+    }
+  }
+
+  if (cursym == SYM_ENDLOOP) {
+    if (!phi) {
+      std::cerr << "Error: Extra ']'\n";
+      abort();
+    }
+
+    // Write loop test
+    {
+      //br label %main.%d
+      builder->CreateBr(testbb);
+
+      //main.%d:
+
+      //%head.%d = phi i8 *[%head.%d, %main.%d], [%head.%d, %main.%d]
+      //Finish phi made at beginning of loop
+      phi->addIncoming(curhead, builder->GetInsertBlock());
+      Value *head_0 = phi;
+
+      //%tape.%d = load i8 *%head.%d
+      LoadInst *tape_0 = new LoadInst(head_0, tapereg, testbb);
+
+      //%test.%d = icmp eq i8 %tape.%d, 0
+      ICmpInst *test_0 = new ICmpInst(*testbb, ICmpInst::ICMP_EQ, tape_0,
+                                    ConstantInt::get(C, APInt(8, 0)), testreg);
+
+      //br i1 %test.%d, label %main.%d, label %main.%d
+      BasicBlock *bb_0 = BasicBlock::Create(C, label, brainf_func);
+      BranchInst::Create(bb_0, oldbb, test_0, testbb);
+
+      //main.%d:
+      builder->SetInsertPoint(bb_0);
+
+      //%head.%d = phi i8 *[%head.%d, %main.%d]
+      PHINode *phi_1 = builder->
+        CreatePHI(PointerType::getUnqual(IntegerType::getInt8Ty(C)), headreg);
+      phi_1->reserveOperandSpace(1);
+      phi_1->addIncoming(head_0, testbb);
+      curhead = phi_1;
+    }
+
+    return;
+  }
+
+  //End of the program, so go to return block
+  builder->CreateBr(endbb);
+
+  if (phi) {
+    std::cerr << "Error: Missing ']'\n";
+    abort();
+  }
+}
diff --git a/final/examples/BrainF/BrainF.h b/final/examples/BrainF/BrainF.h
new file mode 100644
index 00000000000..add0687d54a
--- /dev/null
+++ b/final/examples/BrainF/BrainF.h
@@ -0,0 +1,94 @@
+//===-- BrainF.h - BrainF compiler class ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--------------------------------------------------------------------===//
+//
+// This class stores the data for the BrainF compiler so it doesn't have
+// to pass all of it around.  The main method is parse.
+//
+//===--------------------------------------------------------------------===//
+
+#ifndef BRAINF_H
+#define BRAINF_H
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Support/IRBuilder.h"
+
+using namespace llvm;
+
+/// This class provides a parser for the BrainF language.
+/// The class itself is made to store values during
+/// parsing so they don't have to be passed around
+/// as much.
+class BrainF {
+  public:
+    /// Options for how BrainF should compile
+    enum CompileFlags {
+      flag_off         = 0,
+      flag_arraybounds = 1
+    };
+
+    /// This is the main method.  It parses BrainF from in1
+    /// and returns the module with a function
+    /// void brainf()
+    /// containing the resulting code.
+    /// On error, it calls abort.
+    /// The caller must delete the returned module.
+    Module *parse(std::istream *in1, int mem, CompileFlags cf,
+                  LLVMContext& C);
+
+  protected:
+    /// The different symbols in the BrainF language
+    enum Symbol {
+      SYM_NONE,
+      SYM_READ,
+      SYM_WRITE,
+      SYM_MOVE,
+      SYM_CHANGE,
+      SYM_LOOP,
+      SYM_ENDLOOP,
+      SYM_EOF
+    };
+
+    /// Names of the different parts of the language.
+    /// Tape is used for reading and writing the tape.
+    /// headreg is used for the position of the head.
+    /// label is used for the labels for the BasicBlocks.
+    /// testreg is used for testing the loop exit condition.
+    static const char *tapereg;
+    static const char *headreg;
+    static const char *label;
+    static const char *testreg;
+
+    /// Put the brainf function preamble and other fixed pieces of code
+    void header(LLVMContext& C);
+
+    /// The main loop for parsing.  It calls itself recursively
+    /// to handle the depth of nesting of "[]".
+    void readloop(PHINode *phi, BasicBlock *oldbb,
+                  BasicBlock *testbb, LLVMContext &Context);
+
+    /// Constants during parsing
+    int memtotal;
+    CompileFlags comflag;
+    std::istream *in;
+    Module *module;
+    Function *brainf_func;
+    Function *getchar_func;
+    Function *putchar_func;
+    Value *ptr_arr;
+    Value *ptr_arrmax;
+    BasicBlock *endbb;
+    BasicBlock *aberrorbb;
+
+    /// Variables
+    IRBuilder<> *builder;
+    Value *curhead;
+};
+
+#endif
diff --git a/final/examples/BrainF/BrainFDriver.cpp b/final/examples/BrainF/BrainFDriver.cpp
new file mode 100644
index 00000000000..c11a58067e8
--- /dev/null
+++ b/final/examples/BrainF/BrainFDriver.cpp
@@ -0,0 +1,160 @@
+//===-- BrainFDriver.cpp - BrainF compiler driver -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--------------------------------------------------------------------===//
+//
+// This program converts the BrainF language into LLVM assembly,
+// which it can then run using the JIT or output as BitCode.
+//
+// This implementation has a tape of 65536 bytes,
+// with the head starting in the middle.
+// Range checking is off by default, so be careful.
+// It can be enabled with -abc.
+//
+// Use:
+// ./BrainF -jit      prog.bf          #Run program now
+// ./BrainF -jit -abc prog.bf          #Run program now safely
+// ./BrainF           prog.bf          #Write as BitCode
+//
+// lli prog.bf.bc                      #Run generated BitCode
+// llvm-ld -native -o=prog prog.bf.bc  #Compile BitCode into native executable
+//
+//===--------------------------------------------------------------------===//
+
+#include "BrainF.h"
+#include "llvm/Constants.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include <iostream>
+#include <fstream>
+using namespace llvm;
+
+//Command line options
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input brainf>"));
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"));
+
+static cl::opt<bool>
+ArrayBoundsChecking("abc", cl::desc("Enable array bounds checking"));
+
+static cl::opt<bool>
+JIT("jit", cl::desc("Run program Just-In-Time"));
+
+
+//Add main function so can be fully compiled
+void addMainFunction(Module *mod) {
+  //define i32 @main(i32 %argc, i8 **%argv)
+  Function *main_func = cast<Function>(mod->
+    getOrInsertFunction("main", IntegerType::getInt32Ty(mod->getContext()),
+                        IntegerType::getInt32Ty(mod->getContext()),
+                        PointerType::getUnqual(PointerType::getUnqual(
+                          IntegerType::getInt8Ty(mod->getContext()))), NULL));
+  {
+    Function::arg_iterator args = main_func->arg_begin();
+    Value *arg_0 = args++;
+    arg_0->setName("argc");
+    Value *arg_1 = args++;
+    arg_1->setName("argv");
+  }
+
+  //main.0:
+  BasicBlock *bb = BasicBlock::Create(mod->getContext(), "main.0", main_func);
+
+  //call void @brainf()
+  {
+    CallInst *brainf_call = CallInst::Create(mod->getFunction("brainf"),
+                                             "", bb);
+    brainf_call->setTailCall(false);
+  }
+
+  //ret i32 0
+  ReturnInst::Create(mod->getContext(),
+                     ConstantInt::get(mod->getContext(), APInt(32, 0)), bb);
+}
+
+int main(int argc, char **argv) {
+  cl::ParseCommandLineOptions(argc, argv, " BrainF compiler\n");
+
+  LLVMContext &Context = getGlobalContext();
+
+  if (InputFilename == "") {
+    errs() << "Error: You must specify the filename of the program to "
+    "be compiled.  Use --help to see the options.\n";
+    abort();
+  }
+
+  //Get the output stream
+  raw_ostream *out = &outs();
+  if (!JIT) {
+    if (OutputFilename == "") {
+      std::string base = InputFilename;
+      if (InputFilename == "-") { base = "a"; }
+
+      // Use default filename.
+      OutputFilename = base+".bc";
+    }
+    if (OutputFilename != "-") {
+      std::string ErrInfo;
+      out = new raw_fd_ostream(OutputFilename.c_str(), ErrInfo,
+                               raw_fd_ostream::F_Binary);
+    }
+  }
+
+  //Get the input stream
+  std::istream *in = &std::cin;
+  if (InputFilename != "-")
+    in = new std::ifstream(InputFilename.c_str());
+
+  //Gather the compile flags
+  BrainF::CompileFlags cf = BrainF::flag_off;
+  if (ArrayBoundsChecking)
+    cf = BrainF::CompileFlags(cf | BrainF::flag_arraybounds);
+
+  //Read the BrainF program
+  BrainF bf;
+  Module *mod = bf.parse(in, 65536, cf, Context); //64 KiB
+  if (in != &std::cin)
+    delete in;
+  addMainFunction(mod);
+
+  //Verify generated code
+  if (verifyModule(*mod)) {
+    errs() << "Error: module failed verification.  This shouldn't happen.\n";
+    abort();
+  }
+
+  //Write it out
+  if (JIT) {
+    InitializeNativeTarget();
+
+    outs() << "------- Running JIT -------\n";
+    ExecutionEngine *ee = EngineBuilder(mod).create();
+    std::vector<GenericValue> args;
+    Function *brainf_func = mod->getFunction("brainf");
+    GenericValue gv = ee->runFunction(brainf_func, args);
+  } else {
+    WriteBitcodeToFile(mod, *out);
+  }
+
+  //Clean up
+  if (out != &outs())
+    delete out;
+  delete mod;
+
+  llvm_shutdown();
+
+  return 0;
+}
diff --git a/final/examples/BrainF/CMakeLists.txt b/final/examples/BrainF/CMakeLists.txt
new file mode 100644
index 00000000000..7bec105cdc8
--- /dev/null
+++ b/final/examples/BrainF/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_LINK_COMPONENTS jit bitwriter nativecodegen interpreter)
+
+add_llvm_example(BrainF
+  BrainF.cpp
+  BrainFDriver.cpp
+  )
diff --git a/final/examples/BrainF/Makefile b/final/examples/BrainF/Makefile
new file mode 100644
index 00000000000..2c3e0662523
--- /dev/null
+++ b/final/examples/BrainF/Makefile
@@ -0,0 +1,15 @@
+##===- examples/BrainF/Makefile ----------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+TOOLNAME = BrainF
+EXAMPLE_TOOL = 1
+
+LINK_COMPONENTS := jit bitwriter nativecodegen interpreter
+
+include $(LEVEL)/Makefile.common
diff --git a/final/examples/CMakeLists.txt b/final/examples/CMakeLists.txt
new file mode 100644
index 00000000000..54ee6cc3a3a
--- /dev/null
+++ b/final/examples/CMakeLists.txt
@@ -0,0 +1,13 @@
+add_subdirectory(BrainF)
+add_subdirectory(Fibonacci)
+add_subdirectory(HowToUseJIT)
+add_subdirectory(Kaleidoscope)
+add_subdirectory(ModuleMaker)
+
+if( NOT WIN32 )
+    add_subdirectory(ExceptionDemo)
+endif()
+
+if( HAVE_PTHREAD_H )
+  add_subdirectory(ParallelJIT)
+endif( HAVE_PTHREAD_H )
diff --git a/final/examples/ExceptionDemo/CMakeLists.txt b/final/examples/ExceptionDemo/CMakeLists.txt
new file mode 100644
index 00000000000..88c9ab7c181
--- /dev/null
+++ b/final/examples/ExceptionDemo/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_LINK_COMPONENTS jit nativecodegen)
+set(LLVM_REQUIRES_EH 1)
+
+add_llvm_example(ExceptionDemo
+  ExceptionDemo.cpp
+  )
diff --git a/final/examples/ExceptionDemo/ExceptionDemo.cpp b/final/examples/ExceptionDemo/ExceptionDemo.cpp
new file mode 100644
index 00000000000..95ccd24a689
--- /dev/null
+++ b/final/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -0,0 +1,2030 @@
+//===-- examples/ExceptionDemo/ExceptionDemo.cpp - 
+//                      An example use of the llvm Exception mechanism --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--------------------------------------------------------------------===//
+//
+// Demo program which implements an example LLVM exception implementation, and
+// shows several test cases including the handling of foreign exceptions.
+// It is run with type info types arguments to throw. A test will
+// be run for each given type info type. While type info types with the value 
+// of -1 will trigger a foreign C++ exception to be thrown; type info types
+// <= 6 and >= 1 will cause the associated generated exceptions to be thrown 
+// and caught by generated test functions; and type info types > 6
+// will result in exceptions which pass through to the test harness. All other
+// type info types are not supported and could cause a crash. In all cases,
+// the "finally" blocks of every generated test functions will executed 
+// regardless of whether or not that test function ignores or catches the
+// thrown exception.
+//
+// examples:
+//
+// ExceptionDemo
+//
+//     causes a usage to be printed to stderr
+// 
+// ExceptionDemo 2 3 7 -1
+//
+//     results in the following cases:
+//         - Value 2 causes an exception with a type info type of 2 to be 
+//           thrown and caught by an inner generated test function.
+//         - Value 3 causes an exception with a type info type of 3 to be 
+//           thrown and caught by an outer generated test function.
+//         - Value 7 causes an exception with a type info type of 7 to be 
+//           thrown and NOT be caught by any generated function.
+//         - Value -1 causes a foreign C++ exception to be thrown and not be
+//           caught by any generated function
+//
+//     Cases -1 and 7 are caught by a C++ test harness where the validity of
+//         of a C++ catch(...) clause catching a generated exception with a 
+//         type info type of 7 is questionable.
+//
+// This code uses code from the llvm compiler-rt project and the llvm 
+// Kaleidoscope project.
+//
+//===--------------------------------------------------------------------===//
+
+
+#include "llvm/LLVMContext.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/Dwarf.h"
+
+#include <cstdio>
+#include <string>
+#include <sstream>
+#include <map>
+#include <vector>
+#include <stdexcept>
+
+
+#ifndef USE_GLOBAL_STR_CONSTS
+#define USE_GLOBAL_STR_CONSTS true
+#endif
+
+// System C++ ABI unwind types from: 
+//     http://refspecs.freestandards.org/abi-eh-1.21.html
+
+extern "C" {
+
+typedef enum {
+    _URC_NO_REASON = 0,
+    _URC_FOREIGN_EXCEPTION_CAUGHT = 1,
+    _URC_FATAL_PHASE2_ERROR = 2,
+    _URC_FATAL_PHASE1_ERROR = 3,
+    _URC_NORMAL_STOP = 4,
+    _URC_END_OF_STACK = 5,
+    _URC_HANDLER_FOUND = 6,
+    _URC_INSTALL_CONTEXT = 7,
+    _URC_CONTINUE_UNWIND = 8
+} _Unwind_Reason_Code;
+
+typedef enum {
+    _UA_SEARCH_PHASE = 1,
+    _UA_CLEANUP_PHASE = 2,
+    _UA_HANDLER_FRAME = 4,
+    _UA_FORCE_UNWIND = 8,
+    _UA_END_OF_STACK = 16
+} _Unwind_Action;
+
+struct _Unwind_Exception;
+
+typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code,
+                                              struct _Unwind_Exception *);
+
+struct _Unwind_Exception {
+    uint64_t exception_class;
+    _Unwind_Exception_Cleanup_Fn exception_cleanup;
+
+    uintptr_t private_1;    
+    uintptr_t private_2;    
+
+    // @@@ The IA-64 ABI says that this structure must be double-word aligned.
+    //  Taking that literally does not make much sense generically.  Instead 
+    //  we provide the maximum alignment required by any type for the machine.
+} __attribute__((__aligned__));
+
+struct _Unwind_Context;
+typedef struct _Unwind_Context* _Unwind_Context_t;
+
+extern const uint8_t* _Unwind_GetLanguageSpecificData (_Unwind_Context_t c);
+extern uintptr_t _Unwind_GetGR (_Unwind_Context_t c, int i);
+extern void _Unwind_SetGR (_Unwind_Context_t c, int i, uintptr_t n);
+extern void _Unwind_SetIP (_Unwind_Context_t, uintptr_t new_value);
+extern uintptr_t _Unwind_GetIP (_Unwind_Context_t context);
+extern uintptr_t _Unwind_GetRegionStart (_Unwind_Context_t context);
+
+} // extern "C"
+
+//
+// Example types
+//
+
+/// This is our simplistic type info
+struct OurExceptionType_t {
+    /// type info type
+    int type;
+};
+
+
+/// This is our Exception class which relies on a negative offset to calculate
+/// pointers to its instances from pointers to its unwindException member.
+/// 
+/// Note: The above unwind.h defines struct _Unwind_Exception to be aligned
+///       on a double word boundary. This is necessary to match the standard:
+///       http://refspecs.freestandards.org/abi-eh-1.21.html
+struct OurBaseException_t {
+    struct OurExceptionType_t type;
+
+    // Note: This is properly aligned in unwind.h
+    struct _Unwind_Exception unwindException;
+};
+
+
+// Note: Not needed since we are C++
+typedef struct OurBaseException_t OurException;
+typedef struct _Unwind_Exception OurUnwindException;
+
+//
+// Various globals used to support typeinfo and generatted exceptions in 
+// general
+//
+
+static std::map<std::string, llvm::Value*> namedValues;
+
+int64_t ourBaseFromUnwindOffset;
+
+const unsigned char ourBaseExcpClassChars[] = 
+                                {'o', 'b', 'j', '\0', 'b', 'a', 's', '\0'};
+
+
+static uint64_t ourBaseExceptionClass = 0;
+
+static std::vector<std::string> ourTypeInfoNames;
+static std::map<int, std::string> ourTypeInfoNamesIndex;
+
+static llvm::StructType* ourTypeInfoType;
+static llvm::StructType* ourExceptionType;
+static llvm::StructType* ourUnwindExceptionType;
+
+static llvm::ConstantInt* ourExceptionNotThrownState;
+static llvm::ConstantInt* ourExceptionThrownState;
+static llvm::ConstantInt* ourExceptionCaughtState;
+
+typedef std::vector<std::string> ArgNames;
+typedef std::vector<const llvm::Type*> ArgTypes;
+
+//
+// Code Generation Utilities
+//
+
+/// Utility used to create a function, both declarations and definitions
+/// @param module for module instance
+/// @param retType function return type
+/// @param theArgTypes function's ordered argument types
+/// @param theArgNames function's ordered arguments needed if use of this
+///        function corresponds to a function definition. Use empty 
+///        aggregate for function declarations.
+/// @param functName function name
+/// @param linkage function linkage
+/// @param declarationOnly for function declarations
+/// @param isVarArg function uses vararg arguments
+/// @returns function instance
+llvm::Function *createFunction(llvm::Module& module,
+                               const llvm::Type* retType,
+                               const ArgTypes& theArgTypes,
+                               const ArgNames& theArgNames,
+                               const std::string& functName,
+                               llvm::GlobalValue::LinkageTypes linkage,
+                               bool declarationOnly,
+                               bool isVarArg) {
+    llvm::FunctionType* functType = llvm::FunctionType::get(retType, 
+                                                            theArgTypes, 
+                                                            isVarArg);
+    llvm::Function* ret = llvm::Function::Create(functType, 
+                                                 linkage, 
+                                                 functName, 
+                                                 &module);
+    if (!ret || declarationOnly)
+        return(ret);
+
+    namedValues.clear();
+    unsigned i = 0; 
+    for (llvm::Function::arg_iterator argIndex = ret->arg_begin();
+         i != theArgNames.size();
+         ++argIndex, ++i) {
+
+        argIndex->setName(theArgNames[i]);
+        namedValues[theArgNames[i]] = argIndex;
+    }
+
+    return(ret);
+}
+
+
+/// Create an alloca instruction in the entry block of
+/// the parent function.  This is used for mutable variables etc.
+/// @param function parent instance
+/// @param varName stack variable name
+/// @param type stack variable type
+/// @param initWith optional constant initialization value
+/// @returns AllocaInst instance
+static llvm::AllocaInst *createEntryBlockAlloca(llvm::Function& function,
+                                            const std::string &varName,
+                                            const llvm::Type* type,
+                                            llvm::Constant* initWith = NULL) {
+    llvm::BasicBlock& block = function.getEntryBlock(); 
+    llvm::IRBuilder<> tmp(&block, block.begin());
+    llvm::AllocaInst* ret = tmp.CreateAlloca(type, 0, varName.c_str());
+
+    if (initWith) 
+        tmp.CreateStore(initWith, ret);
+
+    return(ret);
+}
+
+
+//
+// Code Generation Utilities End
+//
+
+//
+// Runtime C Library functions 
+//
+
+// Note: using an extern "C" block so that static functions can be used
+extern "C" {
+
+// Note: Better ways to decide on bit width
+//
+/// Prints a 32 bit number, according to the format, to stderr.
+/// @param intToPrint integer to print 
+/// @param format printf like format to use when printing
+void print32Int(int intToPrint, const char* format) {
+    if (format) {
+        // Note: No NULL check
+        fprintf(stderr, format, intToPrint);
+    }
+    else {
+        // Note: No NULL check
+        fprintf(stderr, "::print32Int(...):NULL arg.\n");
+    }
+}
+
+
+// Note: Better ways to decide on bit width
+//
+/// Prints a 64 bit number, according to the format, to stderr.
+/// @param intToPrint integer to print 
+/// @param format printf like format to use when printing
+void print64Int(long int intToPrint, const char* format) {
+    if (format) {
+        // Note: No NULL check
+        fprintf(stderr, format, intToPrint);
+    }
+    else {
+        // Note: No NULL check
+        fprintf(stderr, "::print64Int(...):NULL arg.\n");
+    }
+}
+
+
+/// Prints a C string to stderr
+/// @param toPrint string to print
+void printStr(char* toPrint) {
+    if (toPrint) {
+        fprintf(stderr, "%s", toPrint);
+    }
+    else {
+        fprintf(stderr, "::printStr(...):NULL arg.\n");
+    }
+}
+
+
+/// Deletes the true previosly allocated exception whose address
+/// is calculated from the supplied OurBaseException_t::unwindException
+/// member address. Handles (ignores), NULL pointers.
+/// @param expToDelete exception to delete
+void deleteOurException(OurUnwindException* expToDelete) {
+#ifdef DEBUG
+    fprintf(stderr,
+            "deleteOurException(...).\n");
+#endif
+
+    if (expToDelete &&
+        (expToDelete->exception_class == ourBaseExceptionClass)) {
+
+        free(((char*) expToDelete) + ourBaseFromUnwindOffset);
+    }
+}
+
+
+/// This function is the struct _Unwind_Exception API mandated delete function 
+/// used by foreign exception handlers when deleting our exception 
+/// (OurException), instances.
+/// @param reason @link http://refspecs.freestandards.org/abi-eh-1.21.html 
+/// @unlink
+/// @param expToDelete exception instance to delete
+void deleteFromUnwindOurException(_Unwind_Reason_Code reason,
+                                  OurUnwindException* expToDelete) {
+#ifdef DEBUG
+    fprintf(stderr,
+            "deleteFromUnwindOurException(...).\n");
+#endif
+
+    deleteOurException(expToDelete);
+}
+
+
+/// Creates (allocates on the heap), an exception (OurException instance),
+/// of the supplied type info type.
+/// @param type type info type
+OurUnwindException* createOurException(int type) {
+    size_t size = sizeof(OurException);
+    OurException* ret = (OurException*) memset(malloc(size), 0, size);
+    (ret->type).type = type;
+    (ret->unwindException).exception_class = ourBaseExceptionClass;
+    (ret->unwindException).exception_cleanup = deleteFromUnwindOurException;
+
+    return(&(ret->unwindException));
+}
+
+
+/// Read a uleb128 encoded value and advance pointer 
+/// See Variable Length Data in: 
+/// @link http://dwarfstd.org/Dwarf3.pdf @unlink
+/// @param data reference variable holding memory pointer to decode from
+/// @returns decoded value
+static uintptr_t readULEB128(const uint8_t** data) {
+    uintptr_t result = 0;
+    uintptr_t shift = 0;
+    unsigned char byte;
+    const uint8_t* p = *data;
+
+    do {
+        byte = *p++;
+        result |= (byte & 0x7f) << shift;
+        shift += 7;
+    } 
+    while (byte & 0x80);
+
+    *data = p;
+
+    return result;
+}
+
+
+/// Read a sleb128 encoded value and advance pointer 
+/// See Variable Length Data in: 
+/// @link http://dwarfstd.org/Dwarf3.pdf @unlink
+/// @param data reference variable holding memory pointer to decode from
+/// @returns decoded value
+static uintptr_t readSLEB128(const uint8_t** data) {
+    uintptr_t result = 0;
+    uintptr_t shift = 0;
+    unsigned char byte;
+    const uint8_t* p = *data;
+
+    do {
+        byte = *p++;
+        result |= (byte & 0x7f) << shift;
+        shift += 7;
+    } 
+    while (byte & 0x80);
+
+    *data = p;
+
+    if ((byte & 0x40) && (shift < (sizeof(result) << 3))) {
+        result |= (~0 << shift);
+    }
+
+    return result;
+}
+
+
+/// Read a pointer encoded value and advance pointer 
+/// See Variable Length Data in: 
+/// @link http://dwarfstd.org/Dwarf3.pdf @unlink
+/// @param data reference variable holding memory pointer to decode from
+/// @param encoding dwarf encoding type
+/// @returns decoded value
+static uintptr_t readEncodedPointer(const uint8_t** data, uint8_t encoding) {
+    uintptr_t result = 0;
+    const uint8_t* p = *data;
+
+    if (encoding == llvm::dwarf::DW_EH_PE_omit) 
+        return(result);
+
+    // first get value 
+    switch (encoding & 0x0F) {
+        case llvm::dwarf::DW_EH_PE_absptr:
+            result = *((uintptr_t*)p);
+            p += sizeof(uintptr_t);
+            break;
+        case llvm::dwarf::DW_EH_PE_uleb128:
+            result = readULEB128(&p);
+            break;
+        // Note: This case has not been tested
+        case llvm::dwarf::DW_EH_PE_sleb128:
+            result = readSLEB128(&p);
+            break;
+        case llvm::dwarf::DW_EH_PE_udata2:
+            result = *((uint16_t*)p);
+            p += sizeof(uint16_t);
+            break;
+        case llvm::dwarf::DW_EH_PE_udata4:
+            result = *((uint32_t*)p);
+            p += sizeof(uint32_t);
+            break;
+        case llvm::dwarf::DW_EH_PE_udata8:
+            result = *((uint64_t*)p);
+            p += sizeof(uint64_t);
+            break;
+        case llvm::dwarf::DW_EH_PE_sdata2:
+            result = *((int16_t*)p);
+            p += sizeof(int16_t);
+            break;
+        case llvm::dwarf::DW_EH_PE_sdata4:
+            result = *((int32_t*)p);
+            p += sizeof(int32_t);
+            break;
+        case llvm::dwarf::DW_EH_PE_sdata8:
+            result = *((int64_t*)p);
+            p += sizeof(int64_t);
+            break;
+        default:
+            // not supported 
+            abort();
+            break;
+    }
+
+    // then add relative offset 
+    switch (encoding & 0x70) {
+        case llvm::dwarf::DW_EH_PE_absptr:
+            // do nothing 
+            break;
+        case llvm::dwarf::DW_EH_PE_pcrel:
+            result += (uintptr_t)(*data);
+            break;
+        case llvm::dwarf::DW_EH_PE_textrel:
+        case llvm::dwarf::DW_EH_PE_datarel:
+        case llvm::dwarf::DW_EH_PE_funcrel:
+        case llvm::dwarf::DW_EH_PE_aligned:
+        default:
+            // not supported 
+            abort();
+            break;
+    }
+
+    // then apply indirection 
+    if (encoding & llvm::dwarf::DW_EH_PE_indirect) {
+        result = *((uintptr_t*)result);
+    }
+
+    *data = p;
+
+    return result;
+}
+
+
+/// Deals with Dwarf actions matching our type infos 
+/// (OurExceptionType_t instances). Returns whether or not a dwarf emitted 
+/// action matches the supplied exception type. If such a match succeeds, 
+/// the resultAction argument will be set with > 0 index value. Only 
+/// corresponding llvm.eh.selector type info arguments, cleanup arguments 
+/// are supported. Filters are not supported.
+/// See Variable Length Data in: 
+/// @link http://dwarfstd.org/Dwarf3.pdf @unlink
+/// Also see @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink
+/// @param resultAction reference variable which will be set with result
+/// @param classInfo our array of type info pointers (to globals)
+/// @param actionEntry index into above type info array or 0 (clean up). 
+///        We do not support filters.
+/// @param exceptionClass exception class (_Unwind_Exception::exception_class)
+///        of thrown exception.
+/// @param exceptionObject thrown _Unwind_Exception instance.
+/// @returns whether or not a type info was found. False is returned if only
+///          a cleanup was found
+static bool handleActionValue(int64_t *resultAction,
+                              struct OurExceptionType_t **classInfo, 
+                              uintptr_t actionEntry, 
+                              uint64_t exceptionClass, 
+                              struct _Unwind_Exception *exceptionObject) {
+    bool ret = false;
+
+    if (!resultAction || 
+        !exceptionObject || 
+        (exceptionClass != ourBaseExceptionClass))
+        return(ret);
+
+    struct OurBaseException_t* excp = (struct OurBaseException_t*)
+                        (((char*) exceptionObject) + ourBaseFromUnwindOffset);
+    struct OurExceptionType_t *excpType = &(excp->type);
+    int type = excpType->type;
+
+#ifdef DEBUG
+    fprintf(stderr,
+            "handleActionValue(...): exceptionObject = <%p>, "
+                "excp = <%p>.\n",
+            exceptionObject,
+            excp);
+#endif
+
+    const uint8_t *actionPos = (uint8_t*) actionEntry,
+                  *tempActionPos;
+    int64_t typeOffset = 0,
+            actionOffset;
+
+    for (int i = 0; true; ++i) {
+        // Each emitted dwarf action corresponds to a 2 tuple of
+        // type info address offset, and action offset to the next
+        // emitted action.
+        typeOffset = readSLEB128(&actionPos);
+        tempActionPos = actionPos;
+        actionOffset = readSLEB128(&tempActionPos);
+
+#ifdef DEBUG
+        fprintf(stderr,
+                "handleActionValue(...):typeOffset: <%lld>, "
+                    "actionOffset: <%lld>.\n",
+                typeOffset,
+                actionOffset);
+#endif
+        assert((typeOffset >= 0) && 
+               "handleActionValue(...):filters are not supported.");
+
+        // Note: A typeOffset == 0 implies that a cleanup llvm.eh.selector
+        //       argument has been matched.
+        if ((typeOffset > 0) &&
+            (type == (classInfo[-typeOffset])->type)) {
+#ifdef DEBUG
+            fprintf(stderr,
+                    "handleActionValue(...):actionValue <%d> found.\n",
+                    i);
+#endif
+            *resultAction = i + 1;
+            ret = true;
+            break;
+        }
+
+#ifdef DEBUG
+        fprintf(stderr,
+                "handleActionValue(...):actionValue not found.\n");
+#endif
+        if (!actionOffset)
+            break;
+
+        actionPos += actionOffset;
+    }
+
+    return(ret);
+}
+
+
+/// Deals with the Language specific data portion of the emitted dwarf code.
+/// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink
+/// @param version unsupported (ignored), unwind version
+/// @param lsda language specific data area
+/// @param _Unwind_Action actions minimally supported unwind stage 
+///        (forced specifically not supported)
+/// @param exceptionClass exception class (_Unwind_Exception::exception_class)
+///        of thrown exception.
+/// @param exceptionObject thrown _Unwind_Exception instance.
+/// @param context unwind system context
+/// @returns minimally supported unwinding control indicator 
+static _Unwind_Reason_Code handleLsda(int version, 
+                                  const uint8_t* lsda,
+                                  _Unwind_Action actions,
+                                  uint64_t exceptionClass, 
+                                  struct _Unwind_Exception* exceptionObject,
+                                  _Unwind_Context_t context) {
+    _Unwind_Reason_Code ret = _URC_CONTINUE_UNWIND;
+
+    if (!lsda)
+        return(ret);
+
+#ifdef DEBUG
+    fprintf(stderr, 
+            "handleLsda(...):lsda is non-zero.\n");
+#endif
+
+    // Get the current instruction pointer and offset it before next
+    // instruction in the current frame which threw the exception.
+    uintptr_t pc = _Unwind_GetIP(context)-1;
+
+    // Get beginning current frame's code (as defined by the 
+    // emitted dwarf code)
+    uintptr_t funcStart = _Unwind_GetRegionStart(context);
+    uintptr_t pcOffset = pc - funcStart;
+    struct OurExceptionType_t** classInfo = NULL;
+
+    // Note: See JITDwarfEmitter::EmitExceptionTable(...) for corresponding
+    //       dwarf emission
+
+    // Parse LSDA header.
+    uint8_t lpStartEncoding = *lsda++;
+
+    if (lpStartEncoding != llvm::dwarf::DW_EH_PE_omit) {
+        readEncodedPointer(&lsda, lpStartEncoding); 
+    }
+
+    uint8_t ttypeEncoding = *lsda++;
+    uintptr_t classInfoOffset;
+
+    if (ttypeEncoding != llvm::dwarf::DW_EH_PE_omit) {
+        // Calculate type info locations in emitted dwarf code which
+        // were flagged by type info arguments to llvm.eh.selector
+        // intrinsic
+        classInfoOffset = readULEB128(&lsda);
+        classInfo = (struct OurExceptionType_t**) (lsda + classInfoOffset);
+    }
+
+    // Walk call-site table looking for range that 
+    // includes current PC. 
+
+    uint8_t         callSiteEncoding = *lsda++;
+    uint32_t        callSiteTableLength = readULEB128(&lsda);
+    const uint8_t*  callSiteTableStart = lsda;
+    const uint8_t*  callSiteTableEnd = callSiteTableStart + 
+                                                    callSiteTableLength;
+    const uint8_t*  actionTableStart = callSiteTableEnd;
+    const uint8_t*  callSitePtr = callSiteTableStart;
+
+    bool foreignException = false;
+
+    while (callSitePtr < callSiteTableEnd) {
+        uintptr_t start = readEncodedPointer(&callSitePtr, 
+                                             callSiteEncoding);
+        uintptr_t length = readEncodedPointer(&callSitePtr, 
+                                              callSiteEncoding);
+        uintptr_t landingPad = readEncodedPointer(&callSitePtr, 
+                                                  callSiteEncoding);
+
+        // Note: Action value
+        uintptr_t actionEntry = readULEB128(&callSitePtr);
+
+        if (exceptionClass != ourBaseExceptionClass) {
+            // We have been notified of a foreign exception being thrown,
+            // and we therefore need to execute cleanup landing pads
+            actionEntry = 0;
+            foreignException = true;
+        }
+
+        if (landingPad == 0) {
+#ifdef DEBUG
+            fprintf(stderr,
+                    "handleLsda(...): No landing pad found.\n");
+#endif
+
+            continue; // no landing pad for this entry
+        }
+
+        if (actionEntry) {
+            actionEntry += ((uintptr_t) actionTableStart) - 1;
+        }
+        else {
+#ifdef DEBUG
+            fprintf(stderr,
+                    "handleLsda(...):No action table found.\n");
+#endif
+        }
+
+        bool exceptionMatched = false;
+
+        if ((start <= pcOffset) && (pcOffset < (start + length))) {
+#ifdef DEBUG
+            fprintf(stderr,
+                    "handleLsda(...): Landing pad found.\n");
+#endif
+            int64_t actionValue = 0;
+
+            if (actionEntry) {
+                exceptionMatched = handleActionValue
+                                   (
+                                       &actionValue,
+                                       classInfo, 
+                                       actionEntry, 
+                                       exceptionClass, 
+                                       exceptionObject
+                                   );
+            }
+
+            if (!(actions & _UA_SEARCH_PHASE)) {
+#ifdef DEBUG
+                fprintf(stderr,
+                        "handleLsda(...): installed landing pad "
+                            "context.\n");
+#endif
+
+                // Found landing pad for the PC.
+                // Set Instruction Pointer to so we re-enter function 
+                // at landing pad. The landing pad is created by the 
+                // compiler to take two parameters in registers.
+                _Unwind_SetGR(context, 
+                              __builtin_eh_return_data_regno(0), 
+                              (uintptr_t)exceptionObject);
+
+                // Note: this virtual register directly corresponds
+                //       to the return of the llvm.eh.selector intrinsic
+                if (!actionEntry || !exceptionMatched) {
+                    // We indicate cleanup only
+                    _Unwind_SetGR(context, 
+                                  __builtin_eh_return_data_regno(1), 
+                                  0);
+                }
+                else {
+                    // Matched type info index of llvm.eh.selector intrinsic
+                    // passed here.
+                    _Unwind_SetGR(context, 
+                                  __builtin_eh_return_data_regno(1), 
+                                  actionValue);
+                }
+
+                // To execute landing pad set here
+                _Unwind_SetIP(context, funcStart + landingPad);
+                ret = _URC_INSTALL_CONTEXT;
+            }
+            else if (exceptionMatched) {
+#ifdef DEBUG
+                fprintf(stderr,
+                        "handleLsda(...): setting handler found.\n");
+#endif
+                ret = _URC_HANDLER_FOUND;
+            }
+            else {
+                // Note: Only non-clean up handlers are marked as
+                //       found. Otherwise the clean up handlers will be 
+                //       re-found and executed during the clean up 
+                //       phase.
+#ifdef DEBUG
+                fprintf(stderr,
+                        "handleLsda(...): cleanup handler found.\n");
+#endif
+            }
+
+            break;
+        }
+    }
+
+    return(ret);
+}
+
+
+/// This is the personality function which is embedded (dwarf emitted), in the
+/// dwarf unwind info block. Again see: JITDwarfEmitter.cpp.
+/// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink
+/// @param version unsupported (ignored), unwind version
+/// @param _Unwind_Action actions minimally supported unwind stage 
+///        (forced specifically not supported)
+/// @param exceptionClass exception class (_Unwind_Exception::exception_class)
+///        of thrown exception.
+/// @param exceptionObject thrown _Unwind_Exception instance.
+/// @param context unwind system context
+/// @returns minimally supported unwinding control indicator 
+_Unwind_Reason_Code ourPersonality(int version, 
+                               _Unwind_Action actions,
+                               uint64_t exceptionClass, 
+                               struct _Unwind_Exception* exceptionObject,
+                               _Unwind_Context_t context) {
+#ifdef DEBUG
+    fprintf(stderr, 
+            "We are in ourPersonality(...):actions is <%d>.\n",
+            actions);
+
+    if (actions & _UA_SEARCH_PHASE) {
+        fprintf(stderr, "ourPersonality(...):In search phase.\n");
+    }
+    else {
+        fprintf(stderr, "ourPersonality(...):In non-search phase.\n");
+    }
+#endif
+
+    const uint8_t* lsda = _Unwind_GetLanguageSpecificData(context);
+
+#ifdef DEBUG
+    fprintf(stderr, 
+            "ourPersonality(...):lsda = <%p>.\n",
+            lsda);
+#endif
+
+    // The real work of the personality function is captured here
+    return(handleLsda(version,
+                      lsda,
+                      actions,
+                      exceptionClass,
+                      exceptionObject,
+                      context));
+}
+
+
+/// Generates our _Unwind_Exception class from a given character array.
+/// thereby handling arbitrary lengths (not in standard), and handling
+/// embedded \0s.
+/// See @link http://refspecs.freestandards.org/abi-eh-1.21.html @unlink
+/// @param classChars char array to encode. NULL values not checkedf
+/// @param classCharsSize number of chars in classChars. Value is not checked.
+/// @returns class value
+uint64_t genClass(const unsigned char classChars[], size_t classCharsSize)
+{
+    uint64_t ret = classChars[0];
+
+    for (unsigned i = 1; i < classCharsSize; ++i) {
+        ret <<= 8;
+        ret += classChars[i];
+    }
+
+    return(ret);
+}
+
+} // extern "C"
+
+//
+// Runtime C Library functions End
+//
+
+//
+// Code generation functions
+//
+
+/// Generates code to print given constant string
+/// @param context llvm context
+/// @param module code for module instance
+/// @param builder builder instance
+/// @param toPrint string to print
+/// @param useGlobal A value of true (default) indicates a GlobalValue is 
+///        generated, and is used to hold the constant string. A value of 
+///        false indicates that the constant string will be stored on the 
+///        stack.
+void generateStringPrint(llvm::LLVMContext& context, 
+                         llvm::Module& module,
+                         llvm::IRBuilder<>& builder, 
+                         std::string toPrint,
+                         bool useGlobal = true) {
+    llvm::Function *printFunct = module.getFunction("printStr");
+
+    llvm::Value *stringVar;
+    llvm::Constant* stringConstant = 
+        llvm::ConstantArray::get(context, toPrint);
+
+    if (useGlobal) {
+        // Note: Does not work without allocation
+        stringVar = 
+            new llvm::GlobalVariable(module, 
+                                     stringConstant->getType(),
+                                     true, 
+                                     llvm::GlobalValue::LinkerPrivateLinkage, 
+                                     stringConstant, 
+                                     "");
+    }
+    else {
+        stringVar = builder.CreateAlloca(stringConstant->getType());
+        builder.CreateStore(stringConstant, stringVar);
+    }
+
+    llvm::Value* cast = 
+        builder.CreatePointerCast(stringVar, 
+                                  builder.getInt8Ty()->getPointerTo());
+    builder.CreateCall(printFunct, cast);
+}
+
+
+/// Generates code to print given runtime integer according to constant
+/// string format, and a given print function.
+/// @param context llvm context
+/// @param module code for module instance
+/// @param builder builder instance
+/// @param printFunct function used to "print" integer
+/// @param toPrint string to print
+/// @param format printf like formating string for print
+/// @param useGlobal A value of true (default) indicates a GlobalValue is 
+///        generated, and is used to hold the constant string. A value of 
+///        false indicates that the constant string will be stored on the 
+///        stack.
+void generateIntegerPrint(llvm::LLVMContext& context, 
+                          llvm::Module& module,
+                          llvm::IRBuilder<>& builder, 
+                          llvm::Function& printFunct,
+                          llvm::Value& toPrint,
+                          std::string format, 
+                          bool useGlobal = true) {
+    llvm::Constant *stringConstant = llvm::ConstantArray::get(context, format);
+    llvm::Value *stringVar;
+
+    if (useGlobal) {
+        // Note: Does not seem to work without allocation
+        stringVar = 
+            new llvm::GlobalVariable(module, 
+                                     stringConstant->getType(),
+                                    true, 
+                                     llvm::GlobalValue::LinkerPrivateLinkage, 
+                                     stringConstant, 
+                                     "");
+    }
+    else {
+        stringVar = builder.CreateAlloca(stringConstant->getType());
+        builder.CreateStore(stringConstant, stringVar);
+    }
+
+    llvm::Value* cast = 
+        builder.CreateBitCast(stringVar, 
+                              builder.getInt8Ty()->getPointerTo());
+    builder.CreateCall2(&printFunct, &toPrint, cast);
+}
+
+
+/// Generates code to handle finally block type semantics: always runs 
+/// regardless of whether a thrown exception is passing through or the 
+/// parent function is simply exiting. In addition to printing some state 
+/// to stderr, this code will resume the exception handling--runs the 
+/// unwind resume block, if the exception has not been previously caught 
+/// by a catch clause, and will otherwise execute the end block (terminator 
+/// block). In addition this function creates the corresponding function's 
+/// stack storage for the exception pointer and catch flag status.
+/// @param context llvm context
+/// @param module code for module instance
+/// @param builder builder instance
+/// @param toAddTo parent function to add block to
+/// @param blockName block name of new "finally" block.
+/// @param functionId output id used for printing
+/// @param terminatorBlock terminator "end" block
+/// @param unwindResumeBlock unwind resume block
+/// @param exceptionCaughtFlag reference exception caught/thrown status storage
+/// @param exceptionStorage reference to exception pointer storage
+/// @returns newly created block
+static llvm::BasicBlock* createFinallyBlock(llvm::LLVMContext& context, 
+                             llvm::Module& module, 
+                             llvm::IRBuilder<>& builder, 
+                             llvm::Function& toAddTo,
+                             std::string& blockName,
+                             std::string& functionId,
+                             llvm::BasicBlock& terminatorBlock,
+                             llvm::BasicBlock& unwindResumeBlock,
+                             llvm::Value** exceptionCaughtFlag,
+                             llvm::Value** exceptionStorage) {
+    assert(exceptionCaughtFlag && 
+           "ExceptionDemo::createFinallyBlock(...):exceptionCaughtFlag "
+               "is NULL");
+    assert(exceptionStorage && 
+           "ExceptionDemo::createFinallyBlock(...):exceptionStorage "
+               "is NULL");
+
+    *exceptionCaughtFlag = 
+        createEntryBlockAlloca(toAddTo,
+                               "exceptionCaught",
+                               ourExceptionNotThrownState->getType(),
+                               ourExceptionNotThrownState);
+
+    const llvm::PointerType* exceptionStorageType = 
+                                builder.getInt8Ty()->getPointerTo();
+    *exceptionStorage = 
+        createEntryBlockAlloca(toAddTo,
+                               "exceptionStorage",
+                               exceptionStorageType,
+                               llvm::ConstantPointerNull::get(
+                                   exceptionStorageType));
+
+    llvm::BasicBlock *ret = llvm::BasicBlock::Create(context,
+                                                     blockName,
+                                                     &toAddTo);
+
+    builder.SetInsertPoint(ret);
+   
+    std::ostringstream bufferToPrint;
+    bufferToPrint << "Gen: Executing finally block "
+                  << blockName
+                  << " in "
+                  << functionId
+                  << std::endl;
+    generateStringPrint(context, 
+                        module, 
+                        builder, 
+                        bufferToPrint.str(),
+                        USE_GLOBAL_STR_CONSTS);
+
+    llvm::SwitchInst* theSwitch = 
+        builder.CreateSwitch(builder.CreateLoad(*exceptionCaughtFlag), 
+                             &terminatorBlock,
+                             2);
+    theSwitch->addCase(ourExceptionCaughtState, &terminatorBlock);
+    theSwitch->addCase(ourExceptionThrownState, &unwindResumeBlock);
+
+    return(ret);
+}
+
+
+/// Generates catch block semantics which print a string to indicate type of
+/// catch executed, sets an exception caught flag, and executes passed in 
+/// end block (terminator block).
+/// @param context llvm context
+/// @param module code for module instance
+/// @param builder builder instance
+/// @param toAddTo parent function to add block to
+/// @param blockName block name of new "catch" block.
+/// @param functionId output id used for printing
+/// @param terminatorBlock terminator "end" block
+/// @param exceptionCaughtFlag exception caught/thrown status
+/// @returns newly created block
+static llvm::BasicBlock* createCatchBlock(llvm::LLVMContext& context, 
+                                          llvm::Module& module, 
+                                          llvm::IRBuilder<>& builder, 
+                                          llvm::Function& toAddTo,
+                                          std::string& blockName,
+                                          std::string& functionId,
+                                          llvm::BasicBlock& terminatorBlock,
+                                          llvm::Value& exceptionCaughtFlag) {
+
+    llvm::BasicBlock *ret = llvm::BasicBlock::Create(context,
+                                                     blockName,
+                                                     &toAddTo);
+
+    builder.SetInsertPoint(ret);
+
+    std::ostringstream bufferToPrint;
+    bufferToPrint << "Gen: Executing catch block "
+                  << blockName
+                  << " in "
+                  << functionId
+                  << std::endl;
+    generateStringPrint(context, 
+                        module, 
+                        builder, 
+                        bufferToPrint.str(),
+                        USE_GLOBAL_STR_CONSTS);
+    builder.CreateStore(ourExceptionCaughtState, &exceptionCaughtFlag);
+    builder.CreateBr(&terminatorBlock);
+
+    return(ret);
+}
+
+
+/// Generates a function which invokes a function (toInvoke) and, whose 
+/// unwind block will "catch" the type info types correspondingly held in the 
+/// exceptionTypesToCatch argument. If the toInvoke function throws an 
+/// exception which does not match any type info types contained in 
+/// exceptionTypesToCatch, the generated code will call _Unwind_Resume 
+/// with the raised exception. On the other hand the generated code will 
+/// normally exit if the toInvoke function does not throw an exception.
+/// The generated "finally" block is always run regardless of the cause of 
+/// the generated function exit.
+/// The generated function is returned after being verified.
+/// @param module code for module instance
+/// @param builder builder instance
+/// @param fpm a function pass manager holding optional IR to IR 
+///        transformations
+/// @param toInvoke inner function to invoke
+/// @param ourId id used to printing purposes
+/// @param numExceptionsToCatch length of exceptionTypesToCatch array
+/// @param exceptionTypesToCatch array of type info types to "catch"
+/// @returns generated function
+static
+llvm::Function* createCatchWrappedInvokeFunction(llvm::Module& module, 
+                    llvm::IRBuilder<>& builder, 
+                    llvm::FunctionPassManager& fpm,
+                    llvm::Function& toInvoke,
+                    std::string ourId,
+                    unsigned numExceptionsToCatch,
+                    unsigned exceptionTypesToCatch[]) {
+
+    llvm::LLVMContext& context = module.getContext();
+    llvm::Function *toPrint32Int = module.getFunction("print32Int");
+
+    ArgTypes argTypes;
+    argTypes.push_back(builder.getInt32Ty());
+
+    ArgNames argNames;
+    argNames.push_back("exceptTypeToThrow");
+
+    llvm::Function* ret = createFunction(module, 
+                                         builder.getVoidTy(),
+                                         argTypes, 
+                                         argNames, 
+                                         ourId,
+                                         llvm::Function::ExternalLinkage, 
+                                         false, 
+                                         false);
+
+    // Block which calls invoke
+    llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context,
+                                                            "entry", 
+                                                            ret);
+    // Normal block for invoke
+    llvm::BasicBlock *normalBlock = llvm::BasicBlock::Create(context, 
+                                                             "normal", 
+                                                             ret);
+    // Unwind block for invoke
+    llvm::BasicBlock *exceptionBlock = 
+        llvm::BasicBlock::Create(context, "exception", ret);
+
+    // Block which routes exception to correct catch handler block
+    llvm::BasicBlock *exceptionRouteBlock = 
+        llvm::BasicBlock::Create(context, "exceptionRoute", ret);
+
+    // Foreign exception handler
+    llvm::BasicBlock *externalExceptionBlock = 
+        llvm::BasicBlock::Create(context, "externalException", ret);
+
+    // Block which calls _Unwind_Resume
+    llvm::BasicBlock *unwindResumeBlock = 
+        llvm::BasicBlock::Create(context, "unwindResume", ret);
+
+    // Clean up block which delete exception if needed
+    llvm::BasicBlock *endBlock = 
+        llvm::BasicBlock::Create(context, "end", ret);
+
+    std::string nextName;
+    std::vector<llvm::BasicBlock*> catchBlocks(numExceptionsToCatch);
+    llvm::Value* exceptionCaughtFlag = NULL;
+    llvm::Value* exceptionStorage = NULL;
+
+    // Finally block which will branch to unwindResumeBlock if 
+    // exception is not caught. Initializes/allocates stack locations.
+    llvm::BasicBlock* finallyBlock = createFinallyBlock(context, 
+                                                        module, 
+                                                        builder, 
+                                                        *ret, 
+                                                        nextName = "finally", 
+                                                        ourId,
+                                                        *endBlock,
+                                                        *unwindResumeBlock,
+                                                        &exceptionCaughtFlag,
+                                                        &exceptionStorage);
+
+    for (unsigned i = 0; i < numExceptionsToCatch; ++i) {
+        nextName = ourTypeInfoNames[exceptionTypesToCatch[i]];
+
+        // One catch block per type info to be caught
+        catchBlocks[i] = createCatchBlock(context, 
+                                          module, 
+                                          builder, 
+                                          *ret,
+                                          nextName, 
+                                          ourId,
+                                          *finallyBlock,
+                                          *exceptionCaughtFlag);
+    }
+
+    // Entry Block
+
+    builder.SetInsertPoint(entryBlock);
+
+    std::vector<llvm::Value*> args;
+    args.push_back(namedValues["exceptTypeToThrow"]);
+    builder.CreateInvoke(&toInvoke, 
+                         normalBlock, 
+                         exceptionBlock, 
+                         args.begin(), 
+                         args.end());
+
+    // End Block
+
+    builder.SetInsertPoint(endBlock);
+
+    generateStringPrint(context, 
+                        module,
+                        builder, 
+                        "Gen: In end block: exiting in " + ourId + ".\n",
+                        USE_GLOBAL_STR_CONSTS);
+    llvm::Function *deleteOurException = 
+                                    module.getFunction("deleteOurException");
+
+    // Note: function handles NULL exceptions
+    builder.CreateCall(deleteOurException, 
+                       builder.CreateLoad(exceptionStorage));
+    builder.CreateRetVoid();
+
+    // Normal Block
+
+    builder.SetInsertPoint(normalBlock);
+
+    generateStringPrint(context, 
+                        module,
+                        builder, 
+                        "Gen: No exception in " + ourId + "!\n",
+                        USE_GLOBAL_STR_CONSTS);
+
+    // Finally block is always called
+    builder.CreateBr(finallyBlock);
+
+    // Unwind Resume Block
+
+    builder.SetInsertPoint(unwindResumeBlock);
+
+    llvm::Function *resumeOurException = 
+                module.getFunction("_Unwind_Resume");
+    builder.CreateCall(resumeOurException, 
+                       builder.CreateLoad(exceptionStorage));
+    builder.CreateUnreachable();
+
+    // Exception Block
+
+    builder.SetInsertPoint(exceptionBlock);
+
+    llvm::Function *ehException = module.getFunction("llvm.eh.exception");
+
+    // Retrieve thrown exception
+    llvm::Value* unwindException = builder.CreateCall(ehException);
+
+    // Store exception and flag
+    builder.CreateStore(unwindException, exceptionStorage);
+    builder.CreateStore(ourExceptionThrownState, exceptionCaughtFlag);
+    llvm::Function *personality = module.getFunction("ourPersonality");
+    llvm::Value* functPtr = 
+        builder.CreatePointerCast(personality, 
+                              builder.getInt8Ty()->getPointerTo());
+
+    args.clear();
+    args.push_back(unwindException);
+    args.push_back(functPtr);
+
+    // Note: Skipping index 0
+    for (unsigned i = 0; i < numExceptionsToCatch; ++i) {
+        // Set up type infos to be caught
+        args.push_back(
+            module.getGlobalVariable(
+                ourTypeInfoNames[exceptionTypesToCatch[i]]));
+    }
+
+    args.push_back(llvm::ConstantInt::get(builder.getInt32Ty(), 0));
+
+    llvm::Function *ehSelector = module.getFunction("llvm.eh.selector");
+
+    // Set up this exeption block as the landing pad which will handle
+    // given type infos. See case Intrinsic::eh_selector in 
+    // SelectionDAGBuilder::visitIntrinsicCall(...) and AddCatchInfo(...)
+    // implemented in FunctionLoweringInfo.cpp to see how the implementation
+    // handles this call. This landing pad (this exception block), will be 
+    // called either because it nees to cleanup (call finally) or a type 
+    // info was found which matched the thrown exception.
+    llvm::Value* retTypeInfoIndex = builder.CreateCall(ehSelector, 
+                                                       args.begin(), 
+                                                       args.end());
+
+    // Retrieve exception_class member from thrown exception 
+    // (_Unwind_Exception instance). This member tells us whether or not
+    // the exception is foreign.
+    llvm::Value* unwindExceptionClass = 
+        builder.CreateLoad(
+            builder.CreateStructGEP(
+                builder.CreatePointerCast(
+                    unwindException, 
+                    ourUnwindExceptionType->getPointerTo()), 
+                0));
+
+    // Branch to the externalExceptionBlock if the exception is foreign or
+    // to a catch router if not. Either way the finally block will be run.
+    builder.CreateCondBr(
+        builder.CreateICmpEQ(unwindExceptionClass,
+                             llvm::ConstantInt::get(builder.getInt64Ty(), 
+                                                    ourBaseExceptionClass)),
+        exceptionRouteBlock,
+        externalExceptionBlock);
+
+    // External Exception Block
+
+    builder.SetInsertPoint(externalExceptionBlock);
+
+    generateStringPrint(context, 
+                        module,
+                        builder, 
+                        "Gen: Foreign exception received.\n",
+                        USE_GLOBAL_STR_CONSTS);
+
+    // Branch to the finally block
+    builder.CreateBr(finallyBlock);
+
+    // Exception Route Block
+
+    builder.SetInsertPoint(exceptionRouteBlock);
+
+    // Casts exception pointer (_Unwind_Exception instance) to parent 
+    // (OurException instance).
+    //
+    // Note: ourBaseFromUnwindOffset is usually negative
+    llvm::Value* typeInfoThrown = 
+        builder.CreatePointerCast(
+            builder.CreateConstGEP1_64(unwindException,
+                                       ourBaseFromUnwindOffset),
+            ourExceptionType->getPointerTo());
+
+    // Retrieve thrown exception type info type
+    //
+    // Note: Index is not relative to pointer but instead to structure
+    //       unlike a true getelementptr (GEP) instruction
+    typeInfoThrown = builder.CreateStructGEP(typeInfoThrown, 0);
+
+    llvm::Value* typeInfoThrownType = 
+                     builder.CreateStructGEP(typeInfoThrown, 0);
+
+    generateIntegerPrint(context, 
+                         module,
+                         builder, 
+                         *toPrint32Int, 
+                         *(builder.CreateLoad(typeInfoThrownType)),
+                         "Gen: Exception type <%d> received (stack unwound) " 
+                                 " in " + 
+                             ourId + 
+                             ".\n",
+                         USE_GLOBAL_STR_CONSTS);
+
+    // Route to matched type info catch block or run cleanup finally block
+    llvm::SwitchInst* switchToCatchBlock = 
+        builder.CreateSwitch(retTypeInfoIndex, 
+                             finallyBlock, 
+                             numExceptionsToCatch);
+
+    unsigned nextTypeToCatch;
+
+    for (unsigned i = 1; i <= numExceptionsToCatch; ++i) {
+        nextTypeToCatch = i - 1;
+        switchToCatchBlock->addCase(llvm::ConstantInt::get(
+                                        llvm::Type::getInt32Ty(context), 
+                                        i),
+                                    catchBlocks[nextTypeToCatch]);
+    }
+
+    llvm::verifyFunction(*ret);
+    fpm.run(*ret);
+
+    return(ret);
+}
+
+
+/// Generates function which throws either an exception matched to a runtime
+/// determined type info type (argument to generated function), or if this 
+/// runtime value matches nativeThrowType, throws a foreign exception by 
+/// calling nativeThrowFunct.
+/// @param module code for module instance
+/// @param builder builder instance
+/// @param fpm a function pass manager holding optional IR to IR 
+///        transformations
+/// @param ourId id used to printing purposes
+/// @param nativeThrowType a runtime argument of this value results in
+///        nativeThrowFunct being called to generate/throw exception.
+/// @param nativeThrowFunct function which will throw a foreign exception
+///        if the above nativeThrowType matches generated function's arg.
+/// @returns generated function
+static
+llvm::Function* createThrowExceptionFunction(llvm::Module& module, 
+                                         llvm::IRBuilder<>& builder, 
+                                         llvm::FunctionPassManager& fpm,
+                                         std::string ourId,
+                                         int32_t nativeThrowType,
+                                         llvm::Function& nativeThrowFunct) {
+    llvm::LLVMContext& context = module.getContext();
+    namedValues.clear();
+    ArgTypes unwindArgTypes;
+    unwindArgTypes.push_back(builder.getInt32Ty());
+    ArgNames unwindArgNames;
+    unwindArgNames.push_back("exceptTypeToThrow");
+
+    llvm::Function *ret = createFunction(module,
+                                         builder.getVoidTy(),
+                                         unwindArgTypes,
+                                         unwindArgNames,
+                                         ourId,
+                                         llvm::Function::ExternalLinkage,
+                                         false,
+                                         false);
+
+    // Throws either one of our exception or a native C++ exception depending
+    // on a runtime argument value containing a type info type.
+    llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context,
+                                                            "entry", 
+                                                            ret);
+    // Throws a foreign exception
+    llvm::BasicBlock *nativeThrowBlock = 
+                                llvm::BasicBlock::Create(context,
+                                                         "nativeThrow", 
+                                                         ret);
+    // Throws one of our Exceptions
+    llvm::BasicBlock *generatedThrowBlock = 
+                                llvm::BasicBlock::Create(context,
+                                                         "generatedThrow", 
+                                                         ret);
+    // Retrieved runtime type info type to throw
+    llvm::Value* exceptionType = namedValues["exceptTypeToThrow"];
+
+    // nativeThrowBlock block
+
+    builder.SetInsertPoint(nativeThrowBlock);
+
+    // Throws foreign exception
+    builder.CreateCall(&nativeThrowFunct, exceptionType);
+    builder.CreateUnreachable();
+
+    // entry block
+
+    builder.SetInsertPoint(entryBlock);
+
+    llvm::Function *toPrint32Int = module.getFunction("print32Int");
+    generateIntegerPrint(context, 
+                         module,
+                         builder, 
+                         *toPrint32Int, 
+                         *exceptionType, 
+                         "\nGen: About to throw exception type <%d> in " + 
+                             ourId + 
+                             ".\n",
+                         USE_GLOBAL_STR_CONSTS);
+
+    // Switches on runtime type info type value to determine whether or not
+    // a foreign exception is thrown. Defaults to throwing one of our 
+    // generated exceptions.
+    llvm::SwitchInst* theSwitch = builder.CreateSwitch(exceptionType,
+                                                       generatedThrowBlock,
+                                                       1);
+
+    theSwitch->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), 
+                                              nativeThrowType),
+                       nativeThrowBlock);
+
+    // generatedThrow block
+
+    builder.SetInsertPoint(generatedThrowBlock);
+
+    llvm::Function *createOurException = 
+                module.getFunction("createOurException");
+    llvm::Function *raiseOurException = 
+                module.getFunction("_Unwind_RaiseException");
+
+    // Creates exception to throw with runtime type info type.
+    llvm::Value* exception = 
+        builder.CreateCall(createOurException, 
+                           namedValues["exceptTypeToThrow"]);
+
+    // Throw generated Exception
+    builder.CreateCall(raiseOurException, exception);
+    builder.CreateUnreachable();
+
+    llvm::verifyFunction(*ret);
+    fpm.run(*ret);
+
+    return(ret);
+}
+
+static void createStandardUtilityFunctions(unsigned numTypeInfos,
+                                           llvm::Module& module, 
+                                           llvm::IRBuilder<>& builder);
+
+/// Creates test code by generating and organizing these functions into the 
+/// test case. The test case consists of an outer function setup to invoke
+/// an inner function within an environment having multiple catch and single 
+/// finally blocks. This inner function is also setup to invoke a throw
+/// function within an evironment similar in nature to the outer function's 
+/// catch and finally blocks. Each of these two functions catch mutually
+/// exclusive subsets (even or odd) of the type info types configured
+/// for this this. All generated functions have a runtime argument which
+/// holds a type info type to throw that each function takes and passes it
+/// to the inner one if such a inner function exists. This type info type is
+/// looked at by the generated throw function to see whether or not it should
+/// throw a generated exception with the same type info type, or instead call
+/// a supplied a function which in turn will throw a foreign exception.
+/// @param module code for module instance
+/// @param builder builder instance
+/// @param fpm a function pass manager holding optional IR to IR 
+///        transformations
+/// @param nativeThrowFunctName name of external function which will throw
+///        a foreign exception
+/// @returns outermost generated test function.
+llvm::Function* createUnwindExceptionTest(llvm::Module& module, 
+                                          llvm::IRBuilder<>& builder, 
+                                          llvm::FunctionPassManager& fpm,
+                                          std::string nativeThrowFunctName) {
+    // Number of type infos to generate
+    unsigned numTypeInfos = 6;
+
+    // Initialze intrisics and external functions to use along with exception
+    // and type info globals.
+    createStandardUtilityFunctions(numTypeInfos,
+                                   module,
+                                   builder);
+    llvm::Function *nativeThrowFunct = 
+        module.getFunction(nativeThrowFunctName);
+
+    // Create exception throw function using the value ~0 to cause 
+    // foreign exceptions to be thrown.
+    llvm::Function* throwFunct = 
+                            createThrowExceptionFunction(module,
+                                                         builder,
+                                                         fpm,
+                                                         "throwFunct",
+                                                         ~0,
+                                                         *nativeThrowFunct);
+    // Inner function will catch even type infos
+    unsigned innerExceptionTypesToCatch[] = {6, 2, 4};
+    size_t numExceptionTypesToCatch = sizeof(innerExceptionTypesToCatch) / 
+                                          sizeof(unsigned);
+
+    // Generate inner function.
+    llvm::Function* innerCatchFunct = 
+        createCatchWrappedInvokeFunction(module,
+                                         builder,
+                                         fpm,
+                                         *throwFunct,
+                                         "innerCatchFunct",
+                                         numExceptionTypesToCatch,
+                                         innerExceptionTypesToCatch);
+
+    // Outer function will catch odd type infos
+    unsigned outerExceptionTypesToCatch[] = {3, 1, 5};
+    numExceptionTypesToCatch = sizeof(outerExceptionTypesToCatch) / 
+                                   sizeof(unsigned);
+
+    // Generate outer function
+    llvm::Function* outerCatchFunct = 
+        createCatchWrappedInvokeFunction(module,
+                                         builder,
+                                         fpm,
+                                         *innerCatchFunct,
+                                         "outerCatchFunct",
+                                         numExceptionTypesToCatch,
+                                         outerExceptionTypesToCatch);
+
+    // Return outer function to run
+    return(outerCatchFunct);
+}
+
+
+/// Represents our foreign exceptions
+class OurCppRunException : public std::runtime_error {
+public:
+    OurCppRunException(const std::string reason) :
+        std::runtime_error(reason) {}
+
+    OurCppRunException (const OurCppRunException& toCopy) :
+        std::runtime_error(toCopy) {}
+
+    OurCppRunException& operator = (const OurCppRunException& toCopy) {
+        return(reinterpret_cast<OurCppRunException&>(
+                   std::runtime_error::operator = (toCopy)
+               ));
+    }
+
+    ~OurCppRunException (void) throw () {}
+};
+
+
+/// Throws foreign C++ exception.
+/// @param ignoreIt unused parameter that allows function to match implied
+///        generated function contract.
+extern "C"
+void throwCppException (int32_t ignoreIt) {
+    throw(OurCppRunException("thrown by throwCppException(...)"));
+}
+
+typedef void (*OurExceptionThrowFunctType) (int32_t typeToThrow);
+
+/// This is a test harness which runs test by executing generated 
+/// function with a type info type to throw. Harness wraps the excecution 
+/// of generated function in a C++ try catch clause.
+/// @param engine execution engine to use for executing generated function.
+///        This demo program expects this to be a JIT instance for demo
+///        purposes.
+/// @param function generated test function to run
+/// @param typeToThrow type info type of generated exception to throw, or
+///        indicator to cause foreign exception to be thrown.
+static
+void runExceptionThrow(llvm::ExecutionEngine* engine, 
+                       llvm::Function* function, 
+                       int32_t typeToThrow) {
+
+    // Find test's function pointer
+    OurExceptionThrowFunctType functPtr = 
+          reinterpret_cast<OurExceptionThrowFunctType>(
+              reinterpret_cast<intptr_t>(
+                  engine->getPointerToFunction(function)
+              )
+          );
+
+    try {
+        // Run test
+        (*functPtr)(typeToThrow);
+    }
+    catch (OurCppRunException exc) {
+        // Catch foreign C++ exception
+        fprintf(stderr,
+                "\nrunExceptionThrow(...):In C++ catch OurCppRunException "
+                    "with reason: %s.\n", 
+                exc.what());
+    }
+    catch (...) {
+        // Catch all exceptions including our generated ones. I'm not sure
+        // why this latter functionality should work, as it seems that
+        // our exceptions should be foreign to C++ (the _Unwind_Exception::
+        // exception_class should be different from the one used by C++), and
+        // therefore C++ should ignore the generated exceptions. 
+
+        fprintf(stderr,
+                "\nrunExceptionThrow(...):In C++ catch all.\n");
+    }
+}
+
+//
+// End test functions
+//
+
+/// This initialization routine creates type info globals and 
+/// adds external function declarations to module.
+/// @param numTypeInfos number of linear type info associated type info types
+///        to create as GlobalVariable instances, starting with the value 1.
+/// @param module code for module instance
+/// @param builder builder instance
+static void createStandardUtilityFunctions(unsigned numTypeInfos,
+                                           llvm::Module& module, 
+                                           llvm::IRBuilder<>& builder) {
+
+    llvm::LLVMContext& context = module.getContext();
+
+    // Exception initializations
+
+    // Setup exception catch state
+    ourExceptionNotThrownState = 
+                    llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 0),
+    ourExceptionThrownState = 
+                    llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 1),
+    ourExceptionCaughtState = 
+                    llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 2),
+
+
+    // Create our type info type
+    ourTypeInfoType = llvm::StructType::get(context, 
+                                            builder.getInt32Ty(), 
+                                            NULL);
+
+    // Create OurException type
+    ourExceptionType = llvm::StructType::get(context, 
+                                             ourTypeInfoType,
+                                             NULL);
+
+    // Create portion of _Unwind_Exception type
+    //
+    // Note: Declaring only a portion of the _Unwind_Exception struct.
+    //       Does this cause problems?
+    ourUnwindExceptionType = llvm::StructType::get(context, 
+                                                   builder.getInt64Ty(),
+                                                   NULL);
+    struct OurBaseException_t dummyException;
+
+    // Calculate offset of OurException::unwindException member.
+    ourBaseFromUnwindOffset = ((uintptr_t) &dummyException) - 
+                           ((uintptr_t) &(dummyException.unwindException));
+
+#ifdef DEBUG
+    fprintf(stderr,
+            "createStandardUtilityFunctions(...):ourBaseFromUnwindOffset "
+                "= %lld, sizeof(struct OurBaseException_t) - "
+                "sizeof(struct _Unwind_Exception) = %lu.\n",
+            ourBaseFromUnwindOffset,
+            sizeof(struct OurBaseException_t) - 
+                sizeof(struct _Unwind_Exception));
+#endif
+
+    size_t numChars = sizeof(ourBaseExcpClassChars) / sizeof(char);
+
+    // Create our _Unwind_Exception::exception_class value
+    ourBaseExceptionClass = genClass(ourBaseExcpClassChars, numChars);
+
+    // Type infos
+
+    std::string baseStr = "typeInfo", typeInfoName;
+    std::ostringstream typeInfoNameBuilder;
+    std::vector<llvm::Constant*> structVals;
+    
+    llvm::Constant *nextStruct;
+    llvm::GlobalVariable* nextGlobal = NULL;
+
+    // Generate each type info
+    //
+    // Note: First type info is not used.
+    for (unsigned i = 0; i <= numTypeInfos; ++i) {
+        structVals.clear();
+        structVals.push_back(llvm::ConstantInt::get(builder.getInt32Ty(), i));
+        nextStruct = llvm::ConstantStruct::get(ourTypeInfoType, structVals);
+
+        typeInfoNameBuilder.str("");
+        typeInfoNameBuilder << baseStr << i;
+        typeInfoName = typeInfoNameBuilder.str();
+
+        // Note: Does not seem to work without allocation
+        nextGlobal = 
+            new llvm::GlobalVariable(module, 
+                                     ourTypeInfoType, 
+                                     true, 
+                                     llvm::GlobalValue::ExternalLinkage, 
+                                     nextStruct, 
+                                     typeInfoName);
+
+        ourTypeInfoNames.push_back(typeInfoName);
+        ourTypeInfoNamesIndex[i] = typeInfoName;
+    }
+
+    ArgNames argNames;
+    ArgTypes argTypes;
+    llvm::Function* funct = NULL;
+
+    // print32Int
+
+    const llvm::Type* retType = builder.getVoidTy();
+
+    argTypes.clear();
+    argTypes.push_back(builder.getInt32Ty());
+    argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+
+    argNames.clear();
+
+    createFunction(module, 
+                   retType, 
+                   argTypes, 
+                   argNames, 
+                   "print32Int", 
+                   llvm::Function::ExternalLinkage, 
+                   true, 
+                   false);
+
+    // print64Int
+
+    retType = builder.getVoidTy();
+
+    argTypes.clear();
+    argTypes.push_back(builder.getInt64Ty());
+    argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+
+    argNames.clear();
+
+    createFunction(module, 
+                   retType, 
+                   argTypes, 
+                   argNames, 
+                   "print64Int", 
+                   llvm::Function::ExternalLinkage, 
+                   true, 
+                   false);
+
+    // printStr
+
+    retType = builder.getVoidTy();
+
+    argTypes.clear();
+    argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+
+    argNames.clear();
+
+    createFunction(module, 
+                   retType, 
+                   argTypes, 
+                   argNames, 
+                   "printStr", 
+                   llvm::Function::ExternalLinkage, 
+                   true, 
+                   false);
+
+    // throwCppException
+
+    retType = builder.getVoidTy();
+
+    argTypes.clear();
+    argTypes.push_back(builder.getInt32Ty());
+
+    argNames.clear();
+
+    createFunction(module, 
+                   retType, 
+                   argTypes, 
+                   argNames, 
+                   "throwCppException", 
+                   llvm::Function::ExternalLinkage, 
+                   true, 
+                   false);
+
+    // deleteOurException
+
+    retType = builder.getVoidTy();
+
+    argTypes.clear();
+    argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+
+    argNames.clear();
+
+    createFunction(module, 
+                   retType, 
+                   argTypes, 
+                   argNames, 
+                   "deleteOurException", 
+                   llvm::Function::ExternalLinkage, 
+                   true, 
+                   false);
+
+    // createOurException
+
+    retType = builder.getInt8Ty()->getPointerTo();
+
+    argTypes.clear();
+    argTypes.push_back(builder.getInt32Ty());
+
+    argNames.clear();
+
+    createFunction(module, 
+                   retType, 
+                   argTypes, 
+                   argNames, 
+                   "createOurException", 
+                   llvm::Function::ExternalLinkage, 
+                   true, 
+                   false);
+
+    // _Unwind_RaiseException
+
+    retType = builder.getInt32Ty();
+
+    argTypes.clear();
+    argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+
+    argNames.clear();
+
+    funct = createFunction(module, 
+                           retType, 
+                           argTypes, 
+                           argNames, 
+                           "_Unwind_RaiseException", 
+                           llvm::Function::ExternalLinkage, 
+                           true, 
+                           false);
+
+    funct->addFnAttr(llvm::Attribute::NoReturn);
+
+    // _Unwind_Resume
+
+    retType = builder.getInt32Ty();
+
+    argTypes.clear();
+    argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+
+    argNames.clear();
+
+    funct = createFunction(module, 
+                           retType, 
+                           argTypes, 
+                           argNames, 
+                           "_Unwind_Resume", 
+                           llvm::Function::ExternalLinkage, 
+                           true, 
+                           false);
+
+    funct->addFnAttr(llvm::Attribute::NoReturn);
+
+    // ourPersonality
+
+    retType = builder.getInt32Ty();
+
+    argTypes.clear();
+    argTypes.push_back(builder.getInt32Ty());
+    argTypes.push_back(builder.getInt32Ty());
+    argTypes.push_back(builder.getInt64Ty());
+    argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+    argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+
+    argNames.clear();
+
+    createFunction(module, 
+                   retType, 
+                   argTypes, 
+                   argNames, 
+                   "ourPersonality", 
+                   llvm::Function::ExternalLinkage, 
+                   true, 
+                   false);
+
+    // llvm.eh.selector intrinsic
+
+    getDeclaration(&module, llvm::Intrinsic::eh_selector);
+
+    // llvm.eh.exception intrinsic
+
+    getDeclaration(&module, llvm::Intrinsic::eh_exception);
+
+    // llvm.eh.typeid.for intrinsic
+
+    getDeclaration(&module, llvm::Intrinsic::eh_typeid_for);
+}
+
+
+//===---------------------------------------------------------------------===//
+// Main test driver code.
+//===---------------------------------------------------------------------===//
+
+/// Demo main routine which takes the type info types to throw. A test will
+/// be run for each given type info type. While type info types with the value 
+/// of -1 will trigger a foreign C++ exception to be thrown; type info types
+/// <= 6 and >= 1 will be caught by test functions; and type info types > 6
+/// will result in exceptions which pass through to the test harness. All other
+/// type info types are not supported and could cause a crash.
+int main(int argc, char* argv[]) {
+    if (argc == 1) {
+        fprintf(stderr,
+                "\nUsage: ExceptionDemo <exception type to throw> "
+                    "[<type 2>...<type n>].\n"
+                    "   Each type must have the value of 1 - 6 for "
+                    "generated exceptions to be caught;\n"
+                    "   the value -1 for foreign C++ exceptions to be "
+                    "generated and thrown;\n"
+                    "   or the values > 6 for exceptions to be ignored.\n"
+                    "\nTry: ExceptionDemo 2 3 7 -1\n"
+                    "   for a full test.\n\n");
+        return(0);
+    }
+
+    // If not set, exception handling will not be turned on
+    llvm::JITExceptionHandling = true;
+
+    llvm::InitializeNativeTarget();
+    llvm::LLVMContext& context = llvm::getGlobalContext();
+    llvm::IRBuilder<> theBuilder(context);
+
+    // Make the module, which holds all the code.
+    llvm::Module* module = new llvm::Module("my cool jit", context);
+
+    // Build engine with JIT
+    llvm::EngineBuilder factory(module);
+    factory.setEngineKind(llvm::EngineKind::JIT);
+    factory.setAllocateGVsWithCode(false);
+    llvm::ExecutionEngine* executionEngine = factory.create();
+
+    {
+        llvm::FunctionPassManager fpm(module);
+
+        // Set up the optimizer pipeline.  
+        // Start with registering info about how the
+        // target lays out data structures.
+        fpm.add(new llvm::TargetData(*executionEngine->getTargetData()));
+
+        // Optimizations turned on
+#ifdef ADD_OPT_PASSES
+
+        // Basic AliasAnslysis support for GVN.
+        fpm.add(llvm::createBasicAliasAnalysisPass());
+
+        // Promote allocas to registers.
+        fpm.add(llvm::createPromoteMemoryToRegisterPass());
+
+        // Do simple "peephole" optimizations and bit-twiddling optzns.
+        fpm.add(llvm::createInstructionCombiningPass());
+
+        // Reassociate expressions.
+        fpm.add(llvm::createReassociatePass());
+
+        // Eliminate Common SubExpressions.
+        fpm.add(llvm::createGVNPass());
+
+        // Simplify the control flow graph (deleting unreachable 
+        // blocks, etc).
+        fpm.add(llvm::createCFGSimplificationPass());
+#endif  // ADD_OPT_PASSES
+
+        fpm.doInitialization();
+
+        // Generate test code using function throwCppException(...) as
+        // the function which throws foreign exceptions.
+        llvm::Function* toRun = 
+                          createUnwindExceptionTest(*module, 
+                                                    theBuilder, 
+                                                    fpm,
+                                                    "throwCppException");
+
+        fprintf(stderr, "\nBegin module dump:\n\n");
+
+        module->dump();
+
+        fprintf(stderr, "\nEnd module dump:\n");
+
+        fprintf(stderr, "\n\nBegin Test:\n");
+
+        for (int i = 1; i < argc; ++i) {
+            // Run test for each argument whose value is the exception
+            // type to throw.
+            runExceptionThrow(executionEngine, 
+                              toRun, 
+                              (unsigned) strtoul(argv[i], NULL, 10));
+        }
+
+        fprintf(stderr, "\nEnd Test:\n\n");
+    } 
+
+    delete executionEngine;
+  
+    return 0;
+}
+
diff --git a/final/examples/ExceptionDemo/Makefile b/final/examples/ExceptionDemo/Makefile
new file mode 100644
index 00000000000..480744730eb
--- /dev/null
+++ b/final/examples/ExceptionDemo/Makefile
@@ -0,0 +1,16 @@
+##===- examples/ExceptionDemo/Makefile --------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===---------------------------------------------------------------------===##
+LEVEL = ../..
+TOOLNAME = ExceptionDemo
+EXAMPLE_TOOL = 1
+REQUIRES_EH = 1
+
+LINK_COMPONENTS := jit interpreter nativecodegen
+
+include $(LEVEL)/Makefile.common
diff --git a/final/examples/Fibonacci/CMakeLists.txt b/final/examples/Fibonacci/CMakeLists.txt
new file mode 100644
index 00000000000..693761241fc
--- /dev/null
+++ b/final/examples/Fibonacci/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen)
+
+add_llvm_example(Fibonacci
+  fibonacci.cpp
+  )
diff --git a/final/examples/Fibonacci/Makefile b/final/examples/Fibonacci/Makefile
new file mode 100644
index 00000000000..71f6ba0ef52
--- /dev/null
+++ b/final/examples/Fibonacci/Makefile
@@ -0,0 +1,17 @@
+##===- examples/Fibonacci/Makefile -------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = Fibonacci
+EXAMPLE_TOOL = 1
+
+# Link in JIT support
+LINK_COMPONENTS := jit interpreter nativecodegen
+
+include $(LEVEL)/Makefile.common
diff --git a/final/examples/Fibonacci/fibonacci.cpp b/final/examples/Fibonacci/fibonacci.cpp
new file mode 100644
index 00000000000..a7bbf8c7268
--- /dev/null
+++ b/final/examples/Fibonacci/fibonacci.cpp
@@ -0,0 +1,137 @@
+//===--- examples/Fibonacci/fibonacci.cpp - An example use of the JIT -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This small program provides an example of how to build quickly a small module
+// with function Fibonacci and execute it with the JIT.
+//
+// The goal of this snippet is to create in the memory the LLVM module
+// consisting of one function as follow:
+//
+//   int fib(int x) {
+//     if(x<=2) return 1;
+//     return fib(x-1)+fib(x-2);
+//   }
+//
+// Once we have this, we compile the module via JIT, then execute the `fib'
+// function and return result to a driver, i.e. to a "host program".
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetSelect.h"
+using namespace llvm;
+
+static Function *CreateFibFunction(Module *M, LLVMContext &Context) {
+  // Create the fib function and insert it into module M.  This function is said
+  // to return an int and take an int parameter.
+  Function *FibF =
+    cast<Function>(M->getOrInsertFunction("fib", Type::getInt32Ty(Context), 
+                                          Type::getInt32Ty(Context),
+                                          (Type *)0));
+
+  // Add a basic block to the function.
+  BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", FibF);
+
+  // Get pointers to the constants.
+  Value *One = ConstantInt::get(Type::getInt32Ty(Context), 1);
+  Value *Two = ConstantInt::get(Type::getInt32Ty(Context), 2);
+
+  // Get pointer to the integer argument of the add1 function...
+  Argument *ArgX = FibF->arg_begin();   // Get the arg.
+  ArgX->setName("AnArg");            // Give it a nice symbolic name for fun.
+
+  // Create the true_block.
+  BasicBlock *RetBB = BasicBlock::Create(Context, "return", FibF);
+  // Create an exit block.
+  BasicBlock* RecurseBB = BasicBlock::Create(Context, "recurse", FibF);
+
+  // Create the "if (arg <= 2) goto exitbb"
+  Value *CondInst = new ICmpInst(*BB, ICmpInst::ICMP_SLE, ArgX, Two, "cond");
+  BranchInst::Create(RetBB, RecurseBB, CondInst, BB);
+
+  // Create: ret int 1
+  ReturnInst::Create(Context, One, RetBB);
+
+  // create fib(x-1)
+  Value *Sub = BinaryOperator::CreateSub(ArgX, One, "arg", RecurseBB);
+  CallInst *CallFibX1 = CallInst::Create(FibF, Sub, "fibx1", RecurseBB);
+  CallFibX1->setTailCall();
+
+  // create fib(x-2)
+  Sub = BinaryOperator::CreateSub(ArgX, Two, "arg", RecurseBB);
+  CallInst *CallFibX2 = CallInst::Create(FibF, Sub, "fibx2", RecurseBB);
+  CallFibX2->setTailCall();
+
+
+  // fib(x-1)+fib(x-2)
+  Value *Sum = BinaryOperator::CreateAdd(CallFibX1, CallFibX2,
+                                         "addresult", RecurseBB);
+
+  // Create the return instruction and add it to the basic block
+  ReturnInst::Create(Context, Sum, RecurseBB);
+
+  return FibF;
+}
+
+
+int main(int argc, char **argv) {
+  int n = argc > 1 ? atol(argv[1]) : 24;
+
+  InitializeNativeTarget();
+  LLVMContext Context;
+  
+  // Create some module to put our function into it.
+  OwningPtr<Module> M(new Module("test", Context));
+
+  // We are about to create the "fib" function:
+  Function *FibF = CreateFibFunction(M.get(), Context);
+
+  // Now we going to create JIT
+  std::string errStr;
+  ExecutionEngine *EE =
+    EngineBuilder(M.get())
+    .setErrorStr(&errStr)
+    .setEngineKind(EngineKind::JIT)
+    .create();
+
+  if (!EE) {
+    errs() << argv[0] << ": Failed to construct ExecutionEngine: " << errStr
+           << "\n";
+    return 1;
+  }
+
+  errs() << "verifying... ";
+  if (verifyModule(*M)) {
+    errs() << argv[0] << ": Error constructing function!\n";
+    return 1;
+  }
+
+  errs() << "OK\n";
+  errs() << "We just constructed this LLVM module:\n\n---------\n" << *M;
+  errs() << "---------\nstarting fibonacci(" << n << ") with JIT...\n";
+
+  // Call the Fibonacci function with argument n:
+  std::vector<GenericValue> Args(1);
+  Args[0].IntVal = APInt(32, n);
+  GenericValue GV = EE->runFunction(FibF, Args);
+
+  // import result of execution
+  outs() << "Result: " << GV.IntVal << "\n";
+  
+  return 0;
+}
diff --git a/final/examples/HowToUseJIT/CMakeLists.txt b/final/examples/HowToUseJIT/CMakeLists.txt
new file mode 100644
index 00000000000..428b53ffb9b
--- /dev/null
+++ b/final/examples/HowToUseJIT/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen)
+
+add_llvm_example(HowToUseJIT
+  HowToUseJIT.cpp
+  )
diff --git a/final/examples/HowToUseJIT/HowToUseJIT.cpp b/final/examples/HowToUseJIT/HowToUseJIT.cpp
new file mode 100644
index 00000000000..8e3b6dc4a27
--- /dev/null
+++ b/final/examples/HowToUseJIT/HowToUseJIT.cpp
@@ -0,0 +1,124 @@
+//===-- examples/HowToUseJIT/HowToUseJIT.cpp - An example use of the JIT --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This small program provides an example of how to quickly build a small
+//  module with two functions and execute it with the JIT.
+//
+// Goal:
+//  The goal of this snippet is to create in the memory
+//  the LLVM module consisting of two functions as follow: 
+//
+// int add1(int x) {
+//   return x+1;
+// }
+//
+// int foo() {
+//   return add1(10);
+// }
+//
+// then compile the module via JIT, then execute the `foo'
+// function and return result to a driver, i.e. to a "host program".
+//
+// Some remarks and questions:
+//
+// - could we invoke some code using noname functions too?
+//   e.g. evaluate "foo()+foo()" without fears to introduce
+//   conflict of temporary function name with some real
+//   existing function name?
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+int main() {
+  
+  InitializeNativeTarget();
+
+  LLVMContext Context;
+  
+  // Create some module to put our function into it.
+  Module *M = new Module("test", Context);
+
+  // Create the add1 function entry and insert this entry into module M.  The
+  // function will have a return type of "int" and take an argument of "int".
+  // The '0' terminates the list of argument types.
+  Function *Add1F =
+    cast<Function>(M->getOrInsertFunction("add1", Type::getInt32Ty(Context),
+                                          Type::getInt32Ty(Context),
+                                          (Type *)0));
+
+  // Add a basic block to the function. As before, it automatically inserts
+  // because of the last argument.
+  BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", Add1F);
+
+  // Get pointers to the constant `1'.
+  Value *One = ConstantInt::get(Type::getInt32Ty(Context), 1);
+
+  // Get pointers to the integer argument of the add1 function...
+  assert(Add1F->arg_begin() != Add1F->arg_end()); // Make sure there's an arg
+  Argument *ArgX = Add1F->arg_begin();  // Get the arg
+  ArgX->setName("AnArg");            // Give it a nice symbolic name for fun.
+
+  // Create the add instruction, inserting it into the end of BB.
+  Instruction *Add = BinaryOperator::CreateAdd(One, ArgX, "addresult", BB);
+
+  // Create the return instruction and add it to the basic block
+  ReturnInst::Create(Context, Add, BB);
+
+  // Now, function add1 is ready.
+
+
+  // Now we going to create function `foo', which returns an int and takes no
+  // arguments.
+  Function *FooF =
+    cast<Function>(M->getOrInsertFunction("foo", Type::getInt32Ty(Context),
+                                          (Type *)0));
+
+  // Add a basic block to the FooF function.
+  BB = BasicBlock::Create(Context, "EntryBlock", FooF);
+
+  // Get pointers to the constant `10'.
+  Value *Ten = ConstantInt::get(Type::getInt32Ty(Context), 10);
+
+  // Pass Ten to the call call:
+  CallInst *Add1CallRes = CallInst::Create(Add1F, Ten, "add1", BB);
+  Add1CallRes->setTailCall(true);
+
+  // Create the return instruction and add it to the basic block.
+  ReturnInst::Create(Context, Add1CallRes, BB);
+
+  // Now we create the JIT.
+  ExecutionEngine* EE = EngineBuilder(M).create();
+
+  outs() << "We just constructed this LLVM module:\n\n" << *M;
+  outs() << "\n\nRunning foo: ";
+  outs().flush();
+
+  // Call the `foo' function with no arguments:
+  std::vector<GenericValue> noargs;
+  GenericValue gv = EE->runFunction(FooF, noargs);
+
+  // Import result of execution:
+  outs() << "Result: " << gv.IntVal << "\n";
+  EE->freeMachineCodeForFunction(FooF);
+  delete EE;
+  llvm_shutdown();
+  return 0;
+}
diff --git a/final/examples/HowToUseJIT/Makefile b/final/examples/HowToUseJIT/Makefile
new file mode 100644
index 00000000000..c8919db90cc
--- /dev/null
+++ b/final/examples/HowToUseJIT/Makefile
@@ -0,0 +1,15 @@
+##===- examples/HowToUseJIT/Makefile -----------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+TOOLNAME = HowToUseJIT
+EXAMPLE_TOOL = 1
+
+LINK_COMPONENTS := jit interpreter nativecodegen
+
+include $(LEVEL)/Makefile.common
diff --git a/final/examples/Kaleidoscope/CMakeLists.txt b/final/examples/Kaleidoscope/CMakeLists.txt
new file mode 100644
index 00000000000..8c87ac50b7a
--- /dev/null
+++ b/final/examples/Kaleidoscope/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_subdirectory(Chapter2)
+add_subdirectory(Chapter3)
+add_subdirectory(Chapter4)
+add_subdirectory(Chapter5)
+add_subdirectory(Chapter6)
+add_subdirectory(Chapter7)
diff --git a/final/examples/Kaleidoscope/Chapter2/CMakeLists.txt b/final/examples/Kaleidoscope/Chapter2/CMakeLists.txt
new file mode 100644
index 00000000000..79f2b172d0d
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter2/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_example(Kaleidoscope-Ch2
+  toy.cpp
+  )
diff --git a/final/examples/Kaleidoscope/Chapter2/Makefile b/final/examples/Kaleidoscope/Chapter2/Makefile
new file mode 100644
index 00000000000..1a9b94ce541
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter2/Makefile
@@ -0,0 +1,13 @@
+##===- examples/Kaleidoscope/Chapter2/Makefile -------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+TOOLNAME = Kaleidoscope-Ch2
+EXAMPLE_TOOL = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/examples/Kaleidoscope/Chapter2/toy.cpp b/final/examples/Kaleidoscope/Chapter2/toy.cpp
new file mode 100644
index 00000000000..f4f09d0b351
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter2/toy.cpp
@@ -0,0 +1,398 @@
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include <map>
+#include <vector>
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &name) : Name(name) {}
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<ExprAST*> Args;
+public:
+  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+    : Callee(callee), Args(args) {}
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+public:
+  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+    : Name(name), Args(args) {}
+  
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector<ExprAST*> Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  }
+}
+
+/// binoprhs
+///   ::= ('+' primary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the primary expression after the binary operator.
+    ExprAST *RHS = ParsePrimary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= primary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParsePrimary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+static PrototypeAST *ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return ErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  return new PrototypeAST(FnName, ArgNames);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing
+//===----------------------------------------------------------------------===//
+
+static void HandleDefinition() {
+  if (ParseDefinition()) {
+    fprintf(stderr, "Parsed a function definition.\n");
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (ParseExtern()) {
+    fprintf(stderr, "Parsed an extern\n");
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (ParseTopLevelExpr()) {
+    fprintf(stderr, "Parsed a top-level expr\n");
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  return 0;
+}
diff --git a/final/examples/Kaleidoscope/Chapter3/CMakeLists.txt b/final/examples/Kaleidoscope/Chapter3/CMakeLists.txt
new file mode 100644
index 00000000000..1af8db00a17
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter3/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS core)
+
+add_llvm_example(Kaleidoscope-Ch3
+  toy.cpp
+  )
diff --git a/final/examples/Kaleidoscope/Chapter3/Makefile b/final/examples/Kaleidoscope/Chapter3/Makefile
new file mode 100644
index 00000000000..4cc6948d803
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter3/Makefile
@@ -0,0 +1,15 @@
+##===- examples/Kaleidoscope/Chapter3/Makefile -------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+TOOLNAME = Kaleidoscope-Ch3
+EXAMPLE_TOOL = 1
+
+LINK_COMPONENTS := core
+
+include $(LEVEL)/Makefile.common
diff --git a/final/examples/Kaleidoscope/Chapter3/toy.cpp b/final/examples/Kaleidoscope/Chapter3/toy.cpp
new file mode 100644
index 00000000000..80d691dd536
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter3/toy.cpp
@@ -0,0 +1,563 @@
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Support/IRBuilder.h"
+#include <cstdio>
+#include <string>
+#include <map>
+#include <vector>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  virtual Value *Codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+  virtual Value *Codegen();
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &name) : Name(name) {}
+  virtual Value *Codegen();
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+  virtual Value *Codegen();
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<ExprAST*> Args;
+public:
+  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+    : Callee(callee), Args(args) {}
+  virtual Value *Codegen();
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+public:
+  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+    : Name(name), Args(args) {}
+  
+  Function *Codegen();
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+  Function *Codegen();
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector<ExprAST*> Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  }
+}
+
+/// binoprhs
+///   ::= ('+' primary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the primary expression after the binary operator.
+    ExprAST *RHS = ParsePrimary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= primary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParsePrimary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+static PrototypeAST *ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return ErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  return new PrototypeAST(FnName, ArgNames);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static Module *TheModule;
+static IRBuilder<> Builder(getGlobalContext());
+static std::map<std::string, Value*> NamedValues;
+
+Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+Value *NumberExprAST::Codegen() {
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
+}
+
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  return V ? V : ErrorV("Unknown variable name");
+}
+
+Value *BinaryExprAST::Codegen() {
+  Value *L = LHS->Codegen();
+  Value *R = RHS->Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateFAdd(L, R, "addtmp");
+  case '-': return Builder.CreateFSub(L, R, "subtmp");
+  case '*': return Builder.CreateFMul(L, R, "multmp");
+  case '<':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  default: return ErrorV("invalid binary operator");
+  }
+}
+
+Value *CallExprAST::Codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule->getFunction(Callee);
+  if (CalleeF == 0)
+    return ErrorV("Unknown function referenced");
+  
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return ErrorV("Incorrect # arguments passed");
+
+  std::vector<Value*> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->Codegen());
+    if (ArgsV.back() == 0) return 0;
+  }
+  
+  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+}
+
+Function *PrototypeAST::Codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector<const Type*> Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
+  
+  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+  
+  // If F conflicted, there was already something named 'Name'.  If it has a
+  // body, don't allow redefinition or reextern.
+  if (F->getName() != Name) {
+    // Delete the one we just made and get the existing one.
+    F->eraseFromParent();
+    F = TheModule->getFunction(Name);
+    
+    // If F already has a body, reject this.
+    if (!F->empty()) {
+      ErrorF("redefinition of function");
+      return 0;
+    }
+    
+    // If F took a different number of args, reject.
+    if (F->arg_size() != Args.size()) {
+      ErrorF("redefinition of function with different # args");
+      return 0;
+    }
+  }
+  
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+       ++AI, ++Idx) {
+    AI->setName(Args[Idx]);
+    
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = AI;
+  }
+  
+  return F;
+}
+
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto->Codegen();
+  if (TheFunction == 0)
+    return 0;
+  
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  if (Value *RetVal = Body->Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    return TheFunction;
+  }
+  
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static void HandleDefinition() {
+  if (FunctionAST *F = ParseDefinition()) {
+    if (Function *LF = F->Codegen()) {
+      fprintf(stderr, "Read function definition:");
+      LF->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (PrototypeAST *P = ParseExtern()) {
+    if (Function *F = P->Codegen()) {
+      fprintf(stderr, "Read extern: ");
+      F->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (FunctionAST *F = ParseTopLevelExpr()) {
+    if (Function *LF = F->Codegen()) {
+      fprintf(stderr, "Read top-level expression:");
+      LF->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" 
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  LLVMContext &Context = getGlobalContext();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  // Make the module, which holds all the code.
+  TheModule = new Module("my cool jit", Context);
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  // Print out all of the generated code.
+  TheModule->dump();
+
+  return 0;
+}
diff --git a/final/examples/Kaleidoscope/Chapter4/CMakeLists.txt b/final/examples/Kaleidoscope/Chapter4/CMakeLists.txt
new file mode 100644
index 00000000000..0d1ac533f02
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter4/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS core jit interpreter native)
+
+add_llvm_example(Kaleidoscope-Ch4
+  toy.cpp
+  )
diff --git a/final/examples/Kaleidoscope/Chapter4/Makefile b/final/examples/Kaleidoscope/Chapter4/Makefile
new file mode 100644
index 00000000000..30162d94bce
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter4/Makefile
@@ -0,0 +1,15 @@
+##===- examples/Kaleidoscope/Chapter4/Makefile -------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+TOOLNAME = Kaleidoscope-Ch4
+EXAMPLE_TOOL = 1
+
+LINK_COMPONENTS := core jit native
+
+include $(LEVEL)/Makefile.common
diff --git a/final/examples/Kaleidoscope/Chapter4/toy.cpp b/final/examples/Kaleidoscope/Chapter4/toy.cpp
new file mode 100644
index 00000000000..a50d2a43dd2
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter4/toy.cpp
@@ -0,0 +1,613 @@
+#include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/IRBuilder.h"
+#include <cstdio>
+#include <string>
+#include <map>
+#include <vector>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  virtual Value *Codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+  virtual Value *Codegen();
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &name) : Name(name) {}
+  virtual Value *Codegen();
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+  virtual Value *Codegen();
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<ExprAST*> Args;
+public:
+  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+    : Callee(callee), Args(args) {}
+  virtual Value *Codegen();
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+public:
+  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+    : Name(name), Args(args) {}
+  
+  Function *Codegen();
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+  Function *Codegen();
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector<ExprAST*> Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  }
+}
+
+/// binoprhs
+///   ::= ('+' primary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the primary expression after the binary operator.
+    ExprAST *RHS = ParsePrimary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= primary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParsePrimary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+static PrototypeAST *ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return ErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  return new PrototypeAST(FnName, ArgNames);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static Module *TheModule;
+static IRBuilder<> Builder(getGlobalContext());
+static std::map<std::string, Value*> NamedValues;
+static FunctionPassManager *TheFPM;
+
+Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+Value *NumberExprAST::Codegen() {
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
+}
+
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  return V ? V : ErrorV("Unknown variable name");
+}
+
+Value *BinaryExprAST::Codegen() {
+  Value *L = LHS->Codegen();
+  Value *R = RHS->Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateFAdd(L, R, "addtmp");
+  case '-': return Builder.CreateFSub(L, R, "subtmp");
+  case '*': return Builder.CreateFMul(L, R, "multmp");
+  case '<':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  default: return ErrorV("invalid binary operator");
+  }
+}
+
+Value *CallExprAST::Codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule->getFunction(Callee);
+  if (CalleeF == 0)
+    return ErrorV("Unknown function referenced");
+  
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return ErrorV("Incorrect # arguments passed");
+
+  std::vector<Value*> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->Codegen());
+    if (ArgsV.back() == 0) return 0;
+  }
+  
+  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+}
+
+Function *PrototypeAST::Codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector<const Type*> Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
+  
+  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+  
+  // If F conflicted, there was already something named 'Name'.  If it has a
+  // body, don't allow redefinition or reextern.
+  if (F->getName() != Name) {
+    // Delete the one we just made and get the existing one.
+    F->eraseFromParent();
+    F = TheModule->getFunction(Name);
+    
+    // If F already has a body, reject this.
+    if (!F->empty()) {
+      ErrorF("redefinition of function");
+      return 0;
+    }
+    
+    // If F took a different number of args, reject.
+    if (F->arg_size() != Args.size()) {
+      ErrorF("redefinition of function with different # args");
+      return 0;
+    }
+  }
+  
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+       ++AI, ++Idx) {
+    AI->setName(Args[Idx]);
+    
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = AI;
+  }
+  
+  return F;
+}
+
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto->Codegen();
+  if (TheFunction == 0)
+    return 0;
+  
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  if (Value *RetVal = Body->Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    // Optimize the function.
+    TheFPM->run(*TheFunction);
+    
+    return TheFunction;
+  }
+  
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static ExecutionEngine *TheExecutionEngine;
+
+static void HandleDefinition() {
+  if (FunctionAST *F = ParseDefinition()) {
+    if (Function *LF = F->Codegen()) {
+      fprintf(stderr, "Read function definition:");
+      LF->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (PrototypeAST *P = ParseExtern()) {
+    if (Function *F = P->Codegen()) {
+      fprintf(stderr, "Read extern: ");
+      F->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (FunctionAST *F = ParseTopLevelExpr()) {
+    if (Function *LF = F->Codegen()) {
+      // JIT the function, returning a function pointer.
+      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+      
+      // Cast it to the right type (takes no arguments, returns a double) so we
+      // can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      fprintf(stderr, "Evaluated to %f\n", FP());
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" 
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  LLVMContext &Context = getGlobalContext();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  // Make the module, which holds all the code.
+  TheModule = new Module("my cool jit", Context);
+
+  // Create the JIT.  This takes ownership of the module.
+  std::string ErrStr;
+  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+  if (!TheExecutionEngine) {
+    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
+    exit(1);
+  }
+
+  FunctionPassManager OurFPM(TheModule);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  TheFPM = 0;
+
+  // Print out all of the generated code.
+  TheModule->dump();
+
+  return 0;
+}
diff --git a/final/examples/Kaleidoscope/Chapter5/CMakeLists.txt b/final/examples/Kaleidoscope/Chapter5/CMakeLists.txt
new file mode 100644
index 00000000000..2d75ad35923
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter5/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS core jit interpreter native)
+
+add_llvm_example(Kaleidoscope-Ch5
+  toy.cpp
+  )
diff --git a/final/examples/Kaleidoscope/Chapter5/Makefile b/final/examples/Kaleidoscope/Chapter5/Makefile
new file mode 100644
index 00000000000..d1f5e2035b4
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter5/Makefile
@@ -0,0 +1,15 @@
+##===- examples/Kaleidoscope/Chapter5/Makefile -------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+TOOLNAME = Kaleidoscope-Ch5
+EXAMPLE_TOOL = 1
+
+LINK_COMPONENTS := core jit native
+
+include $(LEVEL)/Makefile.common
diff --git a/final/examples/Kaleidoscope/Chapter5/toy.cpp b/final/examples/Kaleidoscope/Chapter5/toy.cpp
new file mode 100644
index 00000000000..26b3db66202
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter5/toy.cpp
@@ -0,0 +1,858 @@
+#include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/IRBuilder.h"
+#include <cstdio>
+#include <string>
+#include <map>
+#include <vector>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5,
+  
+  // control
+  tok_if = -6, tok_then = -7, tok_else = -8,
+  tok_for = -9, tok_in = -10
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    if (IdentifierStr == "if") return tok_if;
+    if (IdentifierStr == "then") return tok_then;
+    if (IdentifierStr == "else") return tok_else;
+    if (IdentifierStr == "for") return tok_for;
+    if (IdentifierStr == "in") return tok_in;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  virtual Value *Codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+  virtual Value *Codegen();
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &name) : Name(name) {}
+  virtual Value *Codegen();
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+  virtual Value *Codegen();
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<ExprAST*> Args;
+public:
+  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+    : Callee(callee), Args(args) {}
+  virtual Value *Codegen();
+};
+
+/// IfExprAST - Expression class for if/then/else.
+class IfExprAST : public ExprAST {
+  ExprAST *Cond, *Then, *Else;
+public:
+  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+  : Cond(cond), Then(then), Else(_else) {}
+  virtual Value *Codegen();
+};
+
+/// ForExprAST - Expression class for for/in.
+class ForExprAST : public ExprAST {
+  std::string VarName;
+  ExprAST *Start, *End, *Step, *Body;
+public:
+  ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
+             ExprAST *step, ExprAST *body)
+    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+  virtual Value *Codegen();
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes).
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+public:
+  PrototypeAST(const std::string &name, const std::vector<std::string> &args)
+    : Name(name), Args(args) {}
+  
+  Function *Codegen();
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+  Function *Codegen();
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector<ExprAST*> Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// ifexpr ::= 'if' expression 'then' expression 'else' expression
+static ExprAST *ParseIfExpr() {
+  getNextToken();  // eat the if.
+  
+  // condition.
+  ExprAST *Cond = ParseExpression();
+  if (!Cond) return 0;
+  
+  if (CurTok != tok_then)
+    return Error("expected then");
+  getNextToken();  // eat the then
+  
+  ExprAST *Then = ParseExpression();
+  if (Then == 0) return 0;
+  
+  if (CurTok != tok_else)
+    return Error("expected else");
+  
+  getNextToken();
+  
+  ExprAST *Else = ParseExpression();
+  if (!Else) return 0;
+  
+  return new IfExprAST(Cond, Then, Else);
+}
+
+/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+static ExprAST *ParseForExpr() {
+  getNextToken();  // eat the for.
+
+  if (CurTok != tok_identifier)
+    return Error("expected identifier after for");
+  
+  std::string IdName = IdentifierStr;
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '=')
+    return Error("expected '=' after for");
+  getNextToken();  // eat '='.
+  
+  
+  ExprAST *Start = ParseExpression();
+  if (Start == 0) return 0;
+  if (CurTok != ',')
+    return Error("expected ',' after for start value");
+  getNextToken();
+  
+  ExprAST *End = ParseExpression();
+  if (End == 0) return 0;
+  
+  // The step value is optional.
+  ExprAST *Step = 0;
+  if (CurTok == ',') {
+    getNextToken();
+    Step = ParseExpression();
+    if (Step == 0) return 0;
+  }
+  
+  if (CurTok != tok_in)
+    return Error("expected 'in' after for");
+  getNextToken();  // eat 'in'.
+  
+  ExprAST *Body = ParseExpression();
+  if (Body == 0) return 0;
+
+  return new ForExprAST(IdName, Start, End, Step, Body);
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+///   ::= ifexpr
+///   ::= forexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  case tok_if:         return ParseIfExpr();
+  case tok_for:        return ParseForExpr();
+  }
+}
+
+/// binoprhs
+///   ::= ('+' primary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the primary expression after the binary operator.
+    ExprAST *RHS = ParsePrimary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= primary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParsePrimary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+static PrototypeAST *ParsePrototype() {
+  if (CurTok != tok_identifier)
+    return ErrorP("Expected function name in prototype");
+
+  std::string FnName = IdentifierStr;
+  getNextToken();
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  return new PrototypeAST(FnName, ArgNames);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static Module *TheModule;
+static IRBuilder<> Builder(getGlobalContext());
+static std::map<std::string, Value*> NamedValues;
+static FunctionPassManager *TheFPM;
+
+Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+Value *NumberExprAST::Codegen() {
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
+}
+
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  return V ? V : ErrorV("Unknown variable name");
+}
+
+Value *BinaryExprAST::Codegen() {
+  Value *L = LHS->Codegen();
+  Value *R = RHS->Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateFAdd(L, R, "addtmp");
+  case '-': return Builder.CreateFSub(L, R, "subtmp");
+  case '*': return Builder.CreateFMul(L, R, "multmp");
+  case '<':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  default: return ErrorV("invalid binary operator");
+  }
+}
+
+Value *CallExprAST::Codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule->getFunction(Callee);
+  if (CalleeF == 0)
+    return ErrorV("Unknown function referenced");
+  
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return ErrorV("Incorrect # arguments passed");
+
+  std::vector<Value*> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->Codegen());
+    if (ArgsV.back() == 0) return 0;
+  }
+  
+  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+}
+
+Value *IfExprAST::Codegen() {
+  Value *CondV = Cond->Codegen();
+  if (CondV == 0) return 0;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  CondV = Builder.CreateFCmpONE(CondV, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                "ifcond");
+  
+  Function *TheFunction = Builder.GetInsertBlock()->getParent();
+  
+  // Create blocks for the then and else cases.  Insert the 'then' block at the
+  // end of the function.
+  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+  
+  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+  
+  // Emit then value.
+  Builder.SetInsertPoint(ThenBB);
+  
+  Value *ThenV = Then->Codegen();
+  if (ThenV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+  ThenBB = Builder.GetInsertBlock();
+  
+  // Emit else block.
+  TheFunction->getBasicBlockList().push_back(ElseBB);
+  Builder.SetInsertPoint(ElseBB);
+  
+  Value *ElseV = Else->Codegen();
+  if (ElseV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+  ElseBB = Builder.GetInsertBlock();
+  
+  // Emit merge block.
+  TheFunction->getBasicBlockList().push_back(MergeBB);
+  Builder.SetInsertPoint(MergeBB);
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+                                  "iftmp");
+  
+  PN->addIncoming(ThenV, ThenBB);
+  PN->addIncoming(ElseV, ElseBB);
+  return PN;
+}
+
+Value *ForExprAST::Codegen() {
+  // Output this as:
+  //   ...
+  //   start = startexpr
+  //   goto loop
+  // loop: 
+  //   variable = phi [start, loopheader], [nextvariable, loopend]
+  //   ...
+  //   bodyexpr
+  //   ...
+  // loopend:
+  //   step = stepexpr
+  //   nextvariable = variable + step
+  //   endcond = endexpr
+  //   br endcond, loop, endloop
+  // outloop:
+  
+  // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start->Codegen();
+  if (StartVal == 0) return 0;
+  
+  // Make the new basic block for the loop header, inserting after current
+  // block.
+  Function *TheFunction = Builder.GetInsertBlock()->getParent();
+  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
+  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+  
+  // Insert an explicit fall through from the current block to the LoopBB.
+  Builder.CreateBr(LoopBB);
+
+  // Start insertion in LoopBB.
+  Builder.SetInsertPoint(LoopBB);
+  
+  // Start the PHI node with an entry for Start.
+  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
+  Variable->addIncoming(StartVal, PreheaderBB);
+  
+  // Within the loop, the variable is defined equal to the PHI node.  If it
+  // shadows an existing variable, we have to restore it, so save it now.
+  Value *OldVal = NamedValues[VarName];
+  NamedValues[VarName] = Variable;
+  
+  // Emit the body of the loop.  This, like any other expr, can change the
+  // current BB.  Note that we ignore the value computed by the body, but don't
+  // allow an error.
+  if (Body->Codegen() == 0)
+    return 0;
+  
+  // Emit the step value.
+  Value *StepVal;
+  if (Step) {
+    StepVal = Step->Codegen();
+    if (StepVal == 0) return 0;
+  } else {
+    // If not specified, use 1.0.
+    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+  }
+  
+  Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
+
+  // Compute the end condition.
+  Value *EndCond = End->Codegen();
+  if (EndCond == 0) return EndCond;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  EndCond = Builder.CreateFCmpONE(EndCond, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                  "loopcond");
+  
+  // Create the "after loop" block and insert it.
+  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
+  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+  
+  // Insert the conditional branch into the end of LoopEndBB.
+  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+  
+  // Any new code will be inserted in AfterBB.
+  Builder.SetInsertPoint(AfterBB);
+  
+  // Add a new entry to the PHI node for the backedge.
+  Variable->addIncoming(NextVar, LoopEndBB);
+  
+  // Restore the unshadowed variable.
+  if (OldVal)
+    NamedValues[VarName] = OldVal;
+  else
+    NamedValues.erase(VarName);
+
+  
+  // for expr always returns 0.0.
+  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+}
+
+Function *PrototypeAST::Codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector<const Type*> Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
+  
+  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+  
+  // If F conflicted, there was already something named 'Name'.  If it has a
+  // body, don't allow redefinition or reextern.
+  if (F->getName() != Name) {
+    // Delete the one we just made and get the existing one.
+    F->eraseFromParent();
+    F = TheModule->getFunction(Name);
+    
+    // If F already has a body, reject this.
+    if (!F->empty()) {
+      ErrorF("redefinition of function");
+      return 0;
+    }
+    
+    // If F took a different number of args, reject.
+    if (F->arg_size() != Args.size()) {
+      ErrorF("redefinition of function with different # args");
+      return 0;
+    }
+  }
+  
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+       ++AI, ++Idx) {
+    AI->setName(Args[Idx]);
+    
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = AI;
+  }
+  
+  return F;
+}
+
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto->Codegen();
+  if (TheFunction == 0)
+    return 0;
+  
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  if (Value *RetVal = Body->Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    // Optimize the function.
+    TheFPM->run(*TheFunction);
+    
+    return TheFunction;
+  }
+  
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static ExecutionEngine *TheExecutionEngine;
+
+static void HandleDefinition() {
+  if (FunctionAST *F = ParseDefinition()) {
+    if (Function *LF = F->Codegen()) {
+      fprintf(stderr, "Read function definition:");
+      LF->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (PrototypeAST *P = ParseExtern()) {
+    if (Function *F = P->Codegen()) {
+      fprintf(stderr, "Read extern: ");
+      F->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (FunctionAST *F = ParseTopLevelExpr()) {
+    if (Function *LF = F->Codegen()) {
+      // JIT the function, returning a function pointer.
+      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+      
+      // Cast it to the right type (takes no arguments, returns a double) so we
+      // can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      fprintf(stderr, "Evaluated to %f\n", FP());
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" 
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  LLVMContext &Context = getGlobalContext();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  // Make the module, which holds all the code.
+  TheModule = new Module("my cool jit", Context);
+
+  // Create the JIT.  This takes ownership of the module.
+  std::string ErrStr;
+  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+  if (!TheExecutionEngine) {
+    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
+    exit(1);
+  }
+
+  FunctionPassManager OurFPM(TheModule);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  TheFPM = 0;
+
+  // Print out all of the generated code.
+  TheModule->dump();
+
+  return 0;
+}
diff --git a/final/examples/Kaleidoscope/Chapter6/CMakeLists.txt b/final/examples/Kaleidoscope/Chapter6/CMakeLists.txt
new file mode 100644
index 00000000000..2e15a5f7dfc
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter6/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS core jit interpreter native)
+
+add_llvm_example(Kaleidoscope-Ch6
+  toy.cpp
+  )
diff --git a/final/examples/Kaleidoscope/Chapter6/Makefile b/final/examples/Kaleidoscope/Chapter6/Makefile
new file mode 100644
index 00000000000..a5fbcbdf9b2
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter6/Makefile
@@ -0,0 +1,15 @@
+##===- examples/Kaleidoscope/Chapter6/Makefile -------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+TOOLNAME = Kaleidoscope-Ch6
+EXAMPLE_TOOL = 1
+
+LINK_COMPONENTS := core jit native
+
+include $(LEVEL)/Makefile.common
diff --git a/final/examples/Kaleidoscope/Chapter6/toy.cpp b/final/examples/Kaleidoscope/Chapter6/toy.cpp
new file mode 100644
index 00000000000..838125ae77d
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter6/toy.cpp
@@ -0,0 +1,976 @@
+#include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/IRBuilder.h"
+#include <cstdio>
+#include <string>
+#include <map>
+#include <vector>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5,
+  
+  // control
+  tok_if = -6, tok_then = -7, tok_else = -8,
+  tok_for = -9, tok_in = -10,
+  
+  // operators
+  tok_binary = -11, tok_unary = -12
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    if (IdentifierStr == "if") return tok_if;
+    if (IdentifierStr == "then") return tok_then;
+    if (IdentifierStr == "else") return tok_else;
+    if (IdentifierStr == "for") return tok_for;
+    if (IdentifierStr == "in") return tok_in;
+    if (IdentifierStr == "binary") return tok_binary;
+    if (IdentifierStr == "unary") return tok_unary;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  virtual Value *Codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+  virtual Value *Codegen();
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &name) : Name(name) {}
+  virtual Value *Codegen();
+};
+
+/// UnaryExprAST - Expression class for a unary operator.
+class UnaryExprAST : public ExprAST {
+  char Opcode;
+  ExprAST *Operand;
+public:
+  UnaryExprAST(char opcode, ExprAST *operand) 
+    : Opcode(opcode), Operand(operand) {}
+  virtual Value *Codegen();
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+  virtual Value *Codegen();
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<ExprAST*> Args;
+public:
+  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+    : Callee(callee), Args(args) {}
+  virtual Value *Codegen();
+};
+
+/// IfExprAST - Expression class for if/then/else.
+class IfExprAST : public ExprAST {
+  ExprAST *Cond, *Then, *Else;
+public:
+  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+  : Cond(cond), Then(then), Else(_else) {}
+  virtual Value *Codegen();
+};
+
+/// ForExprAST - Expression class for for/in.
+class ForExprAST : public ExprAST {
+  std::string VarName;
+  ExprAST *Start, *End, *Step, *Body;
+public:
+  ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
+             ExprAST *step, ExprAST *body)
+    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+  virtual Value *Codegen();
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its name, and its argument names (thus implicitly the number
+/// of arguments the function takes), as well as if it is an operator.
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+  bool isOperator;
+  unsigned Precedence;  // Precedence if a binary op.
+public:
+  PrototypeAST(const std::string &name, const std::vector<std::string> &args,
+               bool isoperator = false, unsigned prec = 0)
+  : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
+  
+  bool isUnaryOp() const { return isOperator && Args.size() == 1; }
+  bool isBinaryOp() const { return isOperator && Args.size() == 2; }
+  
+  char getOperatorName() const {
+    assert(isUnaryOp() || isBinaryOp());
+    return Name[Name.size()-1];
+  }
+  
+  unsigned getBinaryPrecedence() const { return Precedence; }
+  
+  Function *Codegen();
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+  Function *Codegen();
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector<ExprAST*> Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// ifexpr ::= 'if' expression 'then' expression 'else' expression
+static ExprAST *ParseIfExpr() {
+  getNextToken();  // eat the if.
+  
+  // condition.
+  ExprAST *Cond = ParseExpression();
+  if (!Cond) return 0;
+  
+  if (CurTok != tok_then)
+    return Error("expected then");
+  getNextToken();  // eat the then
+  
+  ExprAST *Then = ParseExpression();
+  if (Then == 0) return 0;
+  
+  if (CurTok != tok_else)
+    return Error("expected else");
+  
+  getNextToken();
+  
+  ExprAST *Else = ParseExpression();
+  if (!Else) return 0;
+  
+  return new IfExprAST(Cond, Then, Else);
+}
+
+/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+static ExprAST *ParseForExpr() {
+  getNextToken();  // eat the for.
+
+  if (CurTok != tok_identifier)
+    return Error("expected identifier after for");
+  
+  std::string IdName = IdentifierStr;
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '=')
+    return Error("expected '=' after for");
+  getNextToken();  // eat '='.
+  
+  
+  ExprAST *Start = ParseExpression();
+  if (Start == 0) return 0;
+  if (CurTok != ',')
+    return Error("expected ',' after for start value");
+  getNextToken();
+  
+  ExprAST *End = ParseExpression();
+  if (End == 0) return 0;
+  
+  // The step value is optional.
+  ExprAST *Step = 0;
+  if (CurTok == ',') {
+    getNextToken();
+    Step = ParseExpression();
+    if (Step == 0) return 0;
+  }
+  
+  if (CurTok != tok_in)
+    return Error("expected 'in' after for");
+  getNextToken();  // eat 'in'.
+  
+  ExprAST *Body = ParseExpression();
+  if (Body == 0) return 0;
+
+  return new ForExprAST(IdName, Start, End, Step, Body);
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+///   ::= ifexpr
+///   ::= forexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  case tok_if:         return ParseIfExpr();
+  case tok_for:        return ParseForExpr();
+  }
+}
+
+/// unary
+///   ::= primary
+///   ::= '!' unary
+static ExprAST *ParseUnary() {
+  // If the current token is not an operator, it must be a primary expr.
+  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+    return ParsePrimary();
+  
+  // If this is a unary operator, read it.
+  int Opc = CurTok;
+  getNextToken();
+  if (ExprAST *Operand = ParseUnary())
+    return new UnaryExprAST(Opc, Operand);
+  return 0;
+}
+
+/// binoprhs
+///   ::= ('+' unary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the unary expression after the binary operator.
+    ExprAST *RHS = ParseUnary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= unary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParseUnary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+///   ::= binary LETTER number? (id, id)
+///   ::= unary LETTER (id)
+static PrototypeAST *ParsePrototype() {
+  std::string FnName;
+  
+  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned BinaryPrecedence = 30;
+  
+  switch (CurTok) {
+  default:
+    return ErrorP("Expected function name in prototype");
+  case tok_identifier:
+    FnName = IdentifierStr;
+    Kind = 0;
+    getNextToken();
+    break;
+  case tok_unary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return ErrorP("Expected unary operator");
+    FnName = "unary";
+    FnName += (char)CurTok;
+    Kind = 1;
+    getNextToken();
+    break;
+  case tok_binary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return ErrorP("Expected binary operator");
+    FnName = "binary";
+    FnName += (char)CurTok;
+    Kind = 2;
+    getNextToken();
+    
+    // Read the precedence if present.
+    if (CurTok == tok_number) {
+      if (NumVal < 1 || NumVal > 100)
+        return ErrorP("Invalid precedecnce: must be 1..100");
+      BinaryPrecedence = (unsigned)NumVal;
+      getNextToken();
+    }
+    break;
+  }
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  // Verify right number of names for operator.
+  if (Kind && ArgNames.size() != Kind)
+    return ErrorP("Invalid number of operands for operator");
+  
+  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static Module *TheModule;
+static IRBuilder<> Builder(getGlobalContext());
+static std::map<std::string, Value*> NamedValues;
+static FunctionPassManager *TheFPM;
+
+Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+Value *NumberExprAST::Codegen() {
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
+}
+
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  return V ? V : ErrorV("Unknown variable name");
+}
+
+Value *UnaryExprAST::Codegen() {
+  Value *OperandV = Operand->Codegen();
+  if (OperandV == 0) return 0;
+  
+  Function *F = TheModule->getFunction(std::string("unary")+Opcode);
+  if (F == 0)
+    return ErrorV("Unknown unary operator");
+  
+  return Builder.CreateCall(F, OperandV, "unop");
+}
+
+Value *BinaryExprAST::Codegen() {
+  Value *L = LHS->Codegen();
+  Value *R = RHS->Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateFAdd(L, R, "addtmp");
+  case '-': return Builder.CreateFSub(L, R, "subtmp");
+  case '*': return Builder.CreateFMul(L, R, "multmp");
+  case '<':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  default: break;
+  }
+  
+  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+  // a call to it.
+  Function *F = TheModule->getFunction(std::string("binary")+Op);
+  assert(F && "binary operator not found!");
+  
+  Value *Ops[] = { L, R };
+  return Builder.CreateCall(F, Ops, Ops+2, "binop");
+}
+
+Value *CallExprAST::Codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule->getFunction(Callee);
+  if (CalleeF == 0)
+    return ErrorV("Unknown function referenced");
+  
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return ErrorV("Incorrect # arguments passed");
+
+  std::vector<Value*> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->Codegen());
+    if (ArgsV.back() == 0) return 0;
+  }
+  
+  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+}
+
+Value *IfExprAST::Codegen() {
+  Value *CondV = Cond->Codegen();
+  if (CondV == 0) return 0;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  CondV = Builder.CreateFCmpONE(CondV, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                "ifcond");
+  
+  Function *TheFunction = Builder.GetInsertBlock()->getParent();
+  
+  // Create blocks for the then and else cases.  Insert the 'then' block at the
+  // end of the function.
+  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+  
+  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+  
+  // Emit then value.
+  Builder.SetInsertPoint(ThenBB);
+  
+  Value *ThenV = Then->Codegen();
+  if (ThenV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+  ThenBB = Builder.GetInsertBlock();
+  
+  // Emit else block.
+  TheFunction->getBasicBlockList().push_back(ElseBB);
+  Builder.SetInsertPoint(ElseBB);
+  
+  Value *ElseV = Else->Codegen();
+  if (ElseV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+  ElseBB = Builder.GetInsertBlock();
+  
+  // Emit merge block.
+  TheFunction->getBasicBlockList().push_back(MergeBB);
+  Builder.SetInsertPoint(MergeBB);
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+                                  "iftmp");
+  
+  PN->addIncoming(ThenV, ThenBB);
+  PN->addIncoming(ElseV, ElseBB);
+  return PN;
+}
+
+Value *ForExprAST::Codegen() {
+  // Output this as:
+  //   ...
+  //   start = startexpr
+  //   goto loop
+  // loop: 
+  //   variable = phi [start, loopheader], [nextvariable, loopend]
+  //   ...
+  //   bodyexpr
+  //   ...
+  // loopend:
+  //   step = stepexpr
+  //   nextvariable = variable + step
+  //   endcond = endexpr
+  //   br endcond, loop, endloop
+  // outloop:
+  
+  // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start->Codegen();
+  if (StartVal == 0) return 0;
+  
+  // Make the new basic block for the loop header, inserting after current
+  // block.
+  Function *TheFunction = Builder.GetInsertBlock()->getParent();
+  BasicBlock *PreheaderBB = Builder.GetInsertBlock();
+  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+  
+  // Insert an explicit fall through from the current block to the LoopBB.
+  Builder.CreateBr(LoopBB);
+
+  // Start insertion in LoopBB.
+  Builder.SetInsertPoint(LoopBB);
+  
+  // Start the PHI node with an entry for Start.
+  PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str());
+  Variable->addIncoming(StartVal, PreheaderBB);
+  
+  // Within the loop, the variable is defined equal to the PHI node.  If it
+  // shadows an existing variable, we have to restore it, so save it now.
+  Value *OldVal = NamedValues[VarName];
+  NamedValues[VarName] = Variable;
+  
+  // Emit the body of the loop.  This, like any other expr, can change the
+  // current BB.  Note that we ignore the value computed by the body, but don't
+  // allow an error.
+  if (Body->Codegen() == 0)
+    return 0;
+  
+  // Emit the step value.
+  Value *StepVal;
+  if (Step) {
+    StepVal = Step->Codegen();
+    if (StepVal == 0) return 0;
+  } else {
+    // If not specified, use 1.0.
+    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+  }
+  
+  Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
+
+  // Compute the end condition.
+  Value *EndCond = End->Codegen();
+  if (EndCond == 0) return EndCond;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  EndCond = Builder.CreateFCmpONE(EndCond, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                  "loopcond");
+  
+  // Create the "after loop" block and insert it.
+  BasicBlock *LoopEndBB = Builder.GetInsertBlock();
+  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+  
+  // Insert the conditional branch into the end of LoopEndBB.
+  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+  
+  // Any new code will be inserted in AfterBB.
+  Builder.SetInsertPoint(AfterBB);
+  
+  // Add a new entry to the PHI node for the backedge.
+  Variable->addIncoming(NextVar, LoopEndBB);
+  
+  // Restore the unshadowed variable.
+  if (OldVal)
+    NamedValues[VarName] = OldVal;
+  else
+    NamedValues.erase(VarName);
+
+  
+  // for expr always returns 0.0.
+  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+}
+
+Function *PrototypeAST::Codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector<const Type*> Doubles(Args.size(),
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
+  
+  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+  
+  // If F conflicted, there was already something named 'Name'.  If it has a
+  // body, don't allow redefinition or reextern.
+  if (F->getName() != Name) {
+    // Delete the one we just made and get the existing one.
+    F->eraseFromParent();
+    F = TheModule->getFunction(Name);
+    
+    // If F already has a body, reject this.
+    if (!F->empty()) {
+      ErrorF("redefinition of function");
+      return 0;
+    }
+    
+    // If F took a different number of args, reject.
+    if (F->arg_size() != Args.size()) {
+      ErrorF("redefinition of function with different # args");
+      return 0;
+    }
+  }
+  
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+       ++AI, ++Idx) {
+    AI->setName(Args[Idx]);
+    
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = AI;
+  }
+  
+  return F;
+}
+
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto->Codegen();
+  if (TheFunction == 0)
+    return 0;
+  
+  // If this is an operator, install it.
+  if (Proto->isBinaryOp())
+    BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
+  
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  if (Value *RetVal = Body->Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    // Optimize the function.
+    TheFPM->run(*TheFunction);
+    
+    return TheFunction;
+  }
+  
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+
+  if (Proto->isBinaryOp())
+    BinopPrecedence.erase(Proto->getOperatorName());
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static ExecutionEngine *TheExecutionEngine;
+
+static void HandleDefinition() {
+  if (FunctionAST *F = ParseDefinition()) {
+    if (Function *LF = F->Codegen()) {
+      fprintf(stderr, "Read function definition:");
+      LF->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (PrototypeAST *P = ParseExtern()) {
+    if (Function *F = P->Codegen()) {
+      fprintf(stderr, "Read extern: ");
+      F->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (FunctionAST *F = ParseTopLevelExpr()) {
+    if (Function *LF = F->Codegen()) {
+      // JIT the function, returning a function pointer.
+      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+      
+      // Cast it to the right type (takes no arguments, returns a double) so we
+      // can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      fprintf(stderr, "Evaluated to %f\n", FP());
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" 
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+/// printd - printf that takes a double prints it as "%f\n", returning 0.
+extern "C" 
+double printd(double X) {
+  printf("%f\n", X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  LLVMContext &Context = getGlobalContext();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  // Make the module, which holds all the code.
+  TheModule = new Module("my cool jit", Context);
+
+  // Create the JIT.  This takes ownership of the module.
+  std::string ErrStr;
+  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+  if (!TheExecutionEngine) {
+    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
+    exit(1);
+  }
+
+  FunctionPassManager OurFPM(TheModule);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  TheFPM = 0;
+
+  // Print out all of the generated code.
+  TheModule->dump();
+
+  return 0;
+}
diff --git a/final/examples/Kaleidoscope/Chapter7/CMakeLists.txt b/final/examples/Kaleidoscope/Chapter7/CMakeLists.txt
new file mode 100644
index 00000000000..da3839843bd
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter7/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_LINK_COMPONENTS core jit interpreter native)
+set(LLVM_REQUIRES_RTTI 1)
+
+add_llvm_example(Kaleidoscope-Ch7
+  toy.cpp
+  )
diff --git a/final/examples/Kaleidoscope/Chapter7/Makefile b/final/examples/Kaleidoscope/Chapter7/Makefile
new file mode 100644
index 00000000000..6cec323efd4
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter7/Makefile
@@ -0,0 +1,16 @@
+##===- examples/Kaleidoscope/Chapter7/Makefile -------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+TOOLNAME = Kaleidoscope-Ch7
+EXAMPLE_TOOL = 1
+REQUIRES_RTTI := 1
+
+LINK_COMPONENTS := core jit native
+
+include $(LEVEL)/Makefile.common
diff --git a/final/examples/Kaleidoscope/Chapter7/toy.cpp b/final/examples/Kaleidoscope/Chapter7/toy.cpp
new file mode 100644
index 00000000000..e63578f57e6
--- /dev/null
+++ b/final/examples/Kaleidoscope/Chapter7/toy.cpp
@@ -0,0 +1,1142 @@
+#include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/IRBuilder.h"
+#include <cstdio>
+#include <string>
+#include <map>
+#include <vector>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Lexer
+//===----------------------------------------------------------------------===//
+
+// The lexer returns tokens [0-255] if it is an unknown character, otherwise one
+// of these for known things.
+enum Token {
+  tok_eof = -1,
+
+  // commands
+  tok_def = -2, tok_extern = -3,
+
+  // primary
+  tok_identifier = -4, tok_number = -5,
+  
+  // control
+  tok_if = -6, tok_then = -7, tok_else = -8,
+  tok_for = -9, tok_in = -10,
+  
+  // operators
+  tok_binary = -11, tok_unary = -12,
+  
+  // var definition
+  tok_var = -13
+};
+
+static std::string IdentifierStr;  // Filled in if tok_identifier
+static double NumVal;              // Filled in if tok_number
+
+/// gettok - Return the next token from standard input.
+static int gettok() {
+  static int LastChar = ' ';
+
+  // Skip any whitespace.
+  while (isspace(LastChar))
+    LastChar = getchar();
+
+  if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
+    IdentifierStr = LastChar;
+    while (isalnum((LastChar = getchar())))
+      IdentifierStr += LastChar;
+
+    if (IdentifierStr == "def") return tok_def;
+    if (IdentifierStr == "extern") return tok_extern;
+    if (IdentifierStr == "if") return tok_if;
+    if (IdentifierStr == "then") return tok_then;
+    if (IdentifierStr == "else") return tok_else;
+    if (IdentifierStr == "for") return tok_for;
+    if (IdentifierStr == "in") return tok_in;
+    if (IdentifierStr == "binary") return tok_binary;
+    if (IdentifierStr == "unary") return tok_unary;
+    if (IdentifierStr == "var") return tok_var;
+    return tok_identifier;
+  }
+
+  if (isdigit(LastChar) || LastChar == '.') {   // Number: [0-9.]+
+    std::string NumStr;
+    do {
+      NumStr += LastChar;
+      LastChar = getchar();
+    } while (isdigit(LastChar) || LastChar == '.');
+
+    NumVal = strtod(NumStr.c_str(), 0);
+    return tok_number;
+  }
+
+  if (LastChar == '#') {
+    // Comment until end of line.
+    do LastChar = getchar();
+    while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
+    
+    if (LastChar != EOF)
+      return gettok();
+  }
+  
+  // Check for end of file.  Don't eat the EOF.
+  if (LastChar == EOF)
+    return tok_eof;
+
+  // Otherwise, just return the character as its ascii value.
+  int ThisChar = LastChar;
+  LastChar = getchar();
+  return ThisChar;
+}
+
+//===----------------------------------------------------------------------===//
+// Abstract Syntax Tree (aka Parse Tree)
+//===----------------------------------------------------------------------===//
+
+/// ExprAST - Base class for all expression nodes.
+class ExprAST {
+public:
+  virtual ~ExprAST() {}
+  virtual Value *Codegen() = 0;
+};
+
+/// NumberExprAST - Expression class for numeric literals like "1.0".
+class NumberExprAST : public ExprAST {
+  double Val;
+public:
+  NumberExprAST(double val) : Val(val) {}
+  virtual Value *Codegen();
+};
+
+/// VariableExprAST - Expression class for referencing a variable, like "a".
+class VariableExprAST : public ExprAST {
+  std::string Name;
+public:
+  VariableExprAST(const std::string &name) : Name(name) {}
+  const std::string &getName() const { return Name; }
+  virtual Value *Codegen();
+};
+
+/// UnaryExprAST - Expression class for a unary operator.
+class UnaryExprAST : public ExprAST {
+  char Opcode;
+  ExprAST *Operand;
+public:
+  UnaryExprAST(char opcode, ExprAST *operand) 
+    : Opcode(opcode), Operand(operand) {}
+  virtual Value *Codegen();
+};
+
+/// BinaryExprAST - Expression class for a binary operator.
+class BinaryExprAST : public ExprAST {
+  char Op;
+  ExprAST *LHS, *RHS;
+public:
+  BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) 
+    : Op(op), LHS(lhs), RHS(rhs) {}
+  virtual Value *Codegen();
+};
+
+/// CallExprAST - Expression class for function calls.
+class CallExprAST : public ExprAST {
+  std::string Callee;
+  std::vector<ExprAST*> Args;
+public:
+  CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
+    : Callee(callee), Args(args) {}
+  virtual Value *Codegen();
+};
+
+/// IfExprAST - Expression class for if/then/else.
+class IfExprAST : public ExprAST {
+  ExprAST *Cond, *Then, *Else;
+public:
+  IfExprAST(ExprAST *cond, ExprAST *then, ExprAST *_else)
+  : Cond(cond), Then(then), Else(_else) {}
+  virtual Value *Codegen();
+};
+
+/// ForExprAST - Expression class for for/in.
+class ForExprAST : public ExprAST {
+  std::string VarName;
+  ExprAST *Start, *End, *Step, *Body;
+public:
+  ForExprAST(const std::string &varname, ExprAST *start, ExprAST *end,
+             ExprAST *step, ExprAST *body)
+    : VarName(varname), Start(start), End(end), Step(step), Body(body) {}
+  virtual Value *Codegen();
+};
+
+/// VarExprAST - Expression class for var/in
+class VarExprAST : public ExprAST {
+  std::vector<std::pair<std::string, ExprAST*> > VarNames;
+  ExprAST *Body;
+public:
+  VarExprAST(const std::vector<std::pair<std::string, ExprAST*> > &varnames,
+             ExprAST *body)
+  : VarNames(varnames), Body(body) {}
+  
+  virtual Value *Codegen();
+};
+
+/// PrototypeAST - This class represents the "prototype" for a function,
+/// which captures its argument names as well as if it is an operator.
+class PrototypeAST {
+  std::string Name;
+  std::vector<std::string> Args;
+  bool isOperator;
+  unsigned Precedence;  // Precedence if a binary op.
+public:
+  PrototypeAST(const std::string &name, const std::vector<std::string> &args,
+               bool isoperator = false, unsigned prec = 0)
+  : Name(name), Args(args), isOperator(isoperator), Precedence(prec) {}
+  
+  bool isUnaryOp() const { return isOperator && Args.size() == 1; }
+  bool isBinaryOp() const { return isOperator && Args.size() == 2; }
+  
+  char getOperatorName() const {
+    assert(isUnaryOp() || isBinaryOp());
+    return Name[Name.size()-1];
+  }
+  
+  unsigned getBinaryPrecedence() const { return Precedence; }
+  
+  Function *Codegen();
+  
+  void CreateArgumentAllocas(Function *F);
+};
+
+/// FunctionAST - This class represents a function definition itself.
+class FunctionAST {
+  PrototypeAST *Proto;
+  ExprAST *Body;
+public:
+  FunctionAST(PrototypeAST *proto, ExprAST *body)
+    : Proto(proto), Body(body) {}
+  
+  Function *Codegen();
+};
+
+//===----------------------------------------------------------------------===//
+// Parser
+//===----------------------------------------------------------------------===//
+
+/// CurTok/getNextToken - Provide a simple token buffer.  CurTok is the current
+/// token the parser is looking at.  getNextToken reads another token from the
+/// lexer and updates CurTok with its results.
+static int CurTok;
+static int getNextToken() {
+  return CurTok = gettok();
+}
+
+/// BinopPrecedence - This holds the precedence for each binary operator that is
+/// defined.
+static std::map<char, int> BinopPrecedence;
+
+/// GetTokPrecedence - Get the precedence of the pending binary operator token.
+static int GetTokPrecedence() {
+  if (!isascii(CurTok))
+    return -1;
+  
+  // Make sure it's a declared binop.
+  int TokPrec = BinopPrecedence[CurTok];
+  if (TokPrec <= 0) return -1;
+  return TokPrec;
+}
+
+/// Error* - These are little helper functions for error handling.
+ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
+PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
+FunctionAST *ErrorF(const char *Str) { Error(Str); return 0; }
+
+static ExprAST *ParseExpression();
+
+/// identifierexpr
+///   ::= identifier
+///   ::= identifier '(' expression* ')'
+static ExprAST *ParseIdentifierExpr() {
+  std::string IdName = IdentifierStr;
+  
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '(') // Simple variable ref.
+    return new VariableExprAST(IdName);
+  
+  // Call.
+  getNextToken();  // eat (
+  std::vector<ExprAST*> Args;
+  if (CurTok != ')') {
+    while (1) {
+      ExprAST *Arg = ParseExpression();
+      if (!Arg) return 0;
+      Args.push_back(Arg);
+
+      if (CurTok == ')') break;
+
+      if (CurTok != ',')
+        return Error("Expected ')' or ',' in argument list");
+      getNextToken();
+    }
+  }
+
+  // Eat the ')'.
+  getNextToken();
+  
+  return new CallExprAST(IdName, Args);
+}
+
+/// numberexpr ::= number
+static ExprAST *ParseNumberExpr() {
+  ExprAST *Result = new NumberExprAST(NumVal);
+  getNextToken(); // consume the number
+  return Result;
+}
+
+/// parenexpr ::= '(' expression ')'
+static ExprAST *ParseParenExpr() {
+  getNextToken();  // eat (.
+  ExprAST *V = ParseExpression();
+  if (!V) return 0;
+  
+  if (CurTok != ')')
+    return Error("expected ')'");
+  getNextToken();  // eat ).
+  return V;
+}
+
+/// ifexpr ::= 'if' expression 'then' expression 'else' expression
+static ExprAST *ParseIfExpr() {
+  getNextToken();  // eat the if.
+  
+  // condition.
+  ExprAST *Cond = ParseExpression();
+  if (!Cond) return 0;
+  
+  if (CurTok != tok_then)
+    return Error("expected then");
+  getNextToken();  // eat the then
+  
+  ExprAST *Then = ParseExpression();
+  if (Then == 0) return 0;
+  
+  if (CurTok != tok_else)
+    return Error("expected else");
+  
+  getNextToken();
+  
+  ExprAST *Else = ParseExpression();
+  if (!Else) return 0;
+  
+  return new IfExprAST(Cond, Then, Else);
+}
+
+/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression
+static ExprAST *ParseForExpr() {
+  getNextToken();  // eat the for.
+
+  if (CurTok != tok_identifier)
+    return Error("expected identifier after for");
+  
+  std::string IdName = IdentifierStr;
+  getNextToken();  // eat identifier.
+  
+  if (CurTok != '=')
+    return Error("expected '=' after for");
+  getNextToken();  // eat '='.
+  
+  
+  ExprAST *Start = ParseExpression();
+  if (Start == 0) return 0;
+  if (CurTok != ',')
+    return Error("expected ',' after for start value");
+  getNextToken();
+  
+  ExprAST *End = ParseExpression();
+  if (End == 0) return 0;
+  
+  // The step value is optional.
+  ExprAST *Step = 0;
+  if (CurTok == ',') {
+    getNextToken();
+    Step = ParseExpression();
+    if (Step == 0) return 0;
+  }
+  
+  if (CurTok != tok_in)
+    return Error("expected 'in' after for");
+  getNextToken();  // eat 'in'.
+  
+  ExprAST *Body = ParseExpression();
+  if (Body == 0) return 0;
+
+  return new ForExprAST(IdName, Start, End, Step, Body);
+}
+
+/// varexpr ::= 'var' identifier ('=' expression)? 
+//                    (',' identifier ('=' expression)?)* 'in' expression
+static ExprAST *ParseVarExpr() {
+  getNextToken();  // eat the var.
+
+  std::vector<std::pair<std::string, ExprAST*> > VarNames;
+
+  // At least one variable name is required.
+  if (CurTok != tok_identifier)
+    return Error("expected identifier after var");
+  
+  while (1) {
+    std::string Name = IdentifierStr;
+    getNextToken();  // eat identifier.
+
+    // Read the optional initializer.
+    ExprAST *Init = 0;
+    if (CurTok == '=') {
+      getNextToken(); // eat the '='.
+      
+      Init = ParseExpression();
+      if (Init == 0) return 0;
+    }
+    
+    VarNames.push_back(std::make_pair(Name, Init));
+    
+    // End of var list, exit loop.
+    if (CurTok != ',') break;
+    getNextToken(); // eat the ','.
+    
+    if (CurTok != tok_identifier)
+      return Error("expected identifier list after var");
+  }
+  
+  // At this point, we have to have 'in'.
+  if (CurTok != tok_in)
+    return Error("expected 'in' keyword after 'var'");
+  getNextToken();  // eat 'in'.
+  
+  ExprAST *Body = ParseExpression();
+  if (Body == 0) return 0;
+  
+  return new VarExprAST(VarNames, Body);
+}
+
+/// primary
+///   ::= identifierexpr
+///   ::= numberexpr
+///   ::= parenexpr
+///   ::= ifexpr
+///   ::= forexpr
+///   ::= varexpr
+static ExprAST *ParsePrimary() {
+  switch (CurTok) {
+  default: return Error("unknown token when expecting an expression");
+  case tok_identifier: return ParseIdentifierExpr();
+  case tok_number:     return ParseNumberExpr();
+  case '(':            return ParseParenExpr();
+  case tok_if:         return ParseIfExpr();
+  case tok_for:        return ParseForExpr();
+  case tok_var:        return ParseVarExpr();
+  }
+}
+
+/// unary
+///   ::= primary
+///   ::= '!' unary
+static ExprAST *ParseUnary() {
+  // If the current token is not an operator, it must be a primary expr.
+  if (!isascii(CurTok) || CurTok == '(' || CurTok == ',')
+    return ParsePrimary();
+  
+  // If this is a unary operator, read it.
+  int Opc = CurTok;
+  getNextToken();
+  if (ExprAST *Operand = ParseUnary())
+    return new UnaryExprAST(Opc, Operand);
+  return 0;
+}
+
+/// binoprhs
+///   ::= ('+' unary)*
+static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
+  // If this is a binop, find its precedence.
+  while (1) {
+    int TokPrec = GetTokPrecedence();
+    
+    // If this is a binop that binds at least as tightly as the current binop,
+    // consume it, otherwise we are done.
+    if (TokPrec < ExprPrec)
+      return LHS;
+    
+    // Okay, we know this is a binop.
+    int BinOp = CurTok;
+    getNextToken();  // eat binop
+    
+    // Parse the unary expression after the binary operator.
+    ExprAST *RHS = ParseUnary();
+    if (!RHS) return 0;
+    
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    int NextPrec = GetTokPrecedence();
+    if (TokPrec < NextPrec) {
+      RHS = ParseBinOpRHS(TokPrec+1, RHS);
+      if (RHS == 0) return 0;
+    }
+    
+    // Merge LHS/RHS.
+    LHS = new BinaryExprAST(BinOp, LHS, RHS);
+  }
+}
+
+/// expression
+///   ::= unary binoprhs
+///
+static ExprAST *ParseExpression() {
+  ExprAST *LHS = ParseUnary();
+  if (!LHS) return 0;
+  
+  return ParseBinOpRHS(0, LHS);
+}
+
+/// prototype
+///   ::= id '(' id* ')'
+///   ::= binary LETTER number? (id, id)
+///   ::= unary LETTER (id)
+static PrototypeAST *ParsePrototype() {
+  std::string FnName;
+  
+  unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary.
+  unsigned BinaryPrecedence = 30;
+  
+  switch (CurTok) {
+  default:
+    return ErrorP("Expected function name in prototype");
+  case tok_identifier:
+    FnName = IdentifierStr;
+    Kind = 0;
+    getNextToken();
+    break;
+  case tok_unary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return ErrorP("Expected unary operator");
+    FnName = "unary";
+    FnName += (char)CurTok;
+    Kind = 1;
+    getNextToken();
+    break;
+  case tok_binary:
+    getNextToken();
+    if (!isascii(CurTok))
+      return ErrorP("Expected binary operator");
+    FnName = "binary";
+    FnName += (char)CurTok;
+    Kind = 2;
+    getNextToken();
+    
+    // Read the precedence if present.
+    if (CurTok == tok_number) {
+      if (NumVal < 1 || NumVal > 100)
+        return ErrorP("Invalid precedecnce: must be 1..100");
+      BinaryPrecedence = (unsigned)NumVal;
+      getNextToken();
+    }
+    break;
+  }
+  
+  if (CurTok != '(')
+    return ErrorP("Expected '(' in prototype");
+  
+  std::vector<std::string> ArgNames;
+  while (getNextToken() == tok_identifier)
+    ArgNames.push_back(IdentifierStr);
+  if (CurTok != ')')
+    return ErrorP("Expected ')' in prototype");
+  
+  // success.
+  getNextToken();  // eat ')'.
+  
+  // Verify right number of names for operator.
+  if (Kind && ArgNames.size() != Kind)
+    return ErrorP("Invalid number of operands for operator");
+  
+  return new PrototypeAST(FnName, ArgNames, Kind != 0, BinaryPrecedence);
+}
+
+/// definition ::= 'def' prototype expression
+static FunctionAST *ParseDefinition() {
+  getNextToken();  // eat def.
+  PrototypeAST *Proto = ParsePrototype();
+  if (Proto == 0) return 0;
+
+  if (ExprAST *E = ParseExpression())
+    return new FunctionAST(Proto, E);
+  return 0;
+}
+
+/// toplevelexpr ::= expression
+static FunctionAST *ParseTopLevelExpr() {
+  if (ExprAST *E = ParseExpression()) {
+    // Make an anonymous proto.
+    PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
+    return new FunctionAST(Proto, E);
+  }
+  return 0;
+}
+
+/// external ::= 'extern' prototype
+static PrototypeAST *ParseExtern() {
+  getNextToken();  // eat extern.
+  return ParsePrototype();
+}
+
+//===----------------------------------------------------------------------===//
+// Code Generation
+//===----------------------------------------------------------------------===//
+
+static Module *TheModule;
+static IRBuilder<> Builder(getGlobalContext());
+static std::map<std::string, AllocaInst*> NamedValues;
+static FunctionPassManager *TheFPM;
+
+Value *ErrorV(const char *Str) { Error(Str); return 0; }
+
+/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of
+/// the function.  This is used for mutable variables etc.
+static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction,
+                                          const std::string &VarName) {
+  IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
+                 TheFunction->getEntryBlock().begin());
+  return TmpB.CreateAlloca(Type::getDoubleTy(getGlobalContext()), 0,
+                           VarName.c_str());
+}
+
+Value *NumberExprAST::Codegen() {
+  return ConstantFP::get(getGlobalContext(), APFloat(Val));
+}
+
+Value *VariableExprAST::Codegen() {
+  // Look this variable up in the function.
+  Value *V = NamedValues[Name];
+  if (V == 0) return ErrorV("Unknown variable name");
+
+  // Load the value.
+  return Builder.CreateLoad(V, Name.c_str());
+}
+
+Value *UnaryExprAST::Codegen() {
+  Value *OperandV = Operand->Codegen();
+  if (OperandV == 0) return 0;
+  
+  Function *F = TheModule->getFunction(std::string("unary")+Opcode);
+  if (F == 0)
+    return ErrorV("Unknown unary operator");
+  
+  return Builder.CreateCall(F, OperandV, "unop");
+}
+
+Value *BinaryExprAST::Codegen() {
+  // Special case '=' because we don't want to emit the LHS as an expression.
+  if (Op == '=') {
+    // Assignment requires the LHS to be an identifier.
+    VariableExprAST *LHSE = dynamic_cast<VariableExprAST*>(LHS);
+    if (!LHSE)
+      return ErrorV("destination of '=' must be a variable");
+    // Codegen the RHS.
+    Value *Val = RHS->Codegen();
+    if (Val == 0) return 0;
+
+    // Look up the name.
+    Value *Variable = NamedValues[LHSE->getName()];
+    if (Variable == 0) return ErrorV("Unknown variable name");
+
+    Builder.CreateStore(Val, Variable);
+    return Val;
+  }
+  
+  Value *L = LHS->Codegen();
+  Value *R = RHS->Codegen();
+  if (L == 0 || R == 0) return 0;
+  
+  switch (Op) {
+  case '+': return Builder.CreateFAdd(L, R, "addtmp");
+  case '-': return Builder.CreateFSub(L, R, "subtmp");
+  case '*': return Builder.CreateFMul(L, R, "multmp");    
+  case '<':
+    L = Builder.CreateFCmpULT(L, R, "cmptmp");
+    // Convert bool 0/1 to double 0.0 or 1.0
+    return Builder.CreateUIToFP(L, Type::getDoubleTy(getGlobalContext()),
+                                "booltmp");
+  default: break;
+  }
+  
+  // If it wasn't a builtin binary operator, it must be a user defined one. Emit
+  // a call to it.
+  Function *F = TheModule->getFunction(std::string("binary")+Op);
+  assert(F && "binary operator not found!");
+  
+  Value *Ops[] = { L, R };
+  return Builder.CreateCall(F, Ops, Ops+2, "binop");
+}
+
+Value *CallExprAST::Codegen() {
+  // Look up the name in the global module table.
+  Function *CalleeF = TheModule->getFunction(Callee);
+  if (CalleeF == 0)
+    return ErrorV("Unknown function referenced");
+  
+  // If argument mismatch error.
+  if (CalleeF->arg_size() != Args.size())
+    return ErrorV("Incorrect # arguments passed");
+
+  std::vector<Value*> ArgsV;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    ArgsV.push_back(Args[i]->Codegen());
+    if (ArgsV.back() == 0) return 0;
+  }
+  
+  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+}
+
+Value *IfExprAST::Codegen() {
+  Value *CondV = Cond->Codegen();
+  if (CondV == 0) return 0;
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  CondV = Builder.CreateFCmpONE(CondV, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                "ifcond");
+  
+  Function *TheFunction = Builder.GetInsertBlock()->getParent();
+  
+  // Create blocks for the then and else cases.  Insert the 'then' block at the
+  // end of the function.
+  BasicBlock *ThenBB = BasicBlock::Create(getGlobalContext(), "then", TheFunction);
+  BasicBlock *ElseBB = BasicBlock::Create(getGlobalContext(), "else");
+  BasicBlock *MergeBB = BasicBlock::Create(getGlobalContext(), "ifcont");
+  
+  Builder.CreateCondBr(CondV, ThenBB, ElseBB);
+  
+  // Emit then value.
+  Builder.SetInsertPoint(ThenBB);
+  
+  Value *ThenV = Then->Codegen();
+  if (ThenV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Then' can change the current block, update ThenBB for the PHI.
+  ThenBB = Builder.GetInsertBlock();
+  
+  // Emit else block.
+  TheFunction->getBasicBlockList().push_back(ElseBB);
+  Builder.SetInsertPoint(ElseBB);
+  
+  Value *ElseV = Else->Codegen();
+  if (ElseV == 0) return 0;
+  
+  Builder.CreateBr(MergeBB);
+  // Codegen of 'Else' can change the current block, update ElseBB for the PHI.
+  ElseBB = Builder.GetInsertBlock();
+  
+  // Emit merge block.
+  TheFunction->getBasicBlockList().push_back(MergeBB);
+  Builder.SetInsertPoint(MergeBB);
+  PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()),
+                                  "iftmp");
+  
+  PN->addIncoming(ThenV, ThenBB);
+  PN->addIncoming(ElseV, ElseBB);
+  return PN;
+}
+
+Value *ForExprAST::Codegen() {
+  // Output this as:
+  //   var = alloca double
+  //   ...
+  //   start = startexpr
+  //   store start -> var
+  //   goto loop
+  // loop: 
+  //   ...
+  //   bodyexpr
+  //   ...
+  // loopend:
+  //   step = stepexpr
+  //   endcond = endexpr
+  //
+  //   curvar = load var
+  //   nextvar = curvar + step
+  //   store nextvar -> var
+  //   br endcond, loop, endloop
+  // outloop:
+  
+  Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+  // Create an alloca for the variable in the entry block.
+  AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+  
+  // Emit the start code first, without 'variable' in scope.
+  Value *StartVal = Start->Codegen();
+  if (StartVal == 0) return 0;
+  
+  // Store the value into the alloca.
+  Builder.CreateStore(StartVal, Alloca);
+  
+  // Make the new basic block for the loop header, inserting after current
+  // block.
+  BasicBlock *LoopBB = BasicBlock::Create(getGlobalContext(), "loop", TheFunction);
+  
+  // Insert an explicit fall through from the current block to the LoopBB.
+  Builder.CreateBr(LoopBB);
+
+  // Start insertion in LoopBB.
+  Builder.SetInsertPoint(LoopBB);
+  
+  // Within the loop, the variable is defined equal to the PHI node.  If it
+  // shadows an existing variable, we have to restore it, so save it now.
+  AllocaInst *OldVal = NamedValues[VarName];
+  NamedValues[VarName] = Alloca;
+  
+  // Emit the body of the loop.  This, like any other expr, can change the
+  // current BB.  Note that we ignore the value computed by the body, but don't
+  // allow an error.
+  if (Body->Codegen() == 0)
+    return 0;
+  
+  // Emit the step value.
+  Value *StepVal;
+  if (Step) {
+    StepVal = Step->Codegen();
+    if (StepVal == 0) return 0;
+  } else {
+    // If not specified, use 1.0.
+    StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0));
+  }
+  
+  // Compute the end condition.
+  Value *EndCond = End->Codegen();
+  if (EndCond == 0) return EndCond;
+  
+  // Reload, increment, and restore the alloca.  This handles the case where
+  // the body of the loop mutates the variable.
+  Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str());
+  Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
+  Builder.CreateStore(NextVar, Alloca);
+  
+  // Convert condition to a bool by comparing equal to 0.0.
+  EndCond = Builder.CreateFCmpONE(EndCond, 
+                              ConstantFP::get(getGlobalContext(), APFloat(0.0)),
+                                  "loopcond");
+  
+  // Create the "after loop" block and insert it.
+  BasicBlock *AfterBB = BasicBlock::Create(getGlobalContext(), "afterloop", TheFunction);
+  
+  // Insert the conditional branch into the end of LoopEndBB.
+  Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
+  
+  // Any new code will be inserted in AfterBB.
+  Builder.SetInsertPoint(AfterBB);
+  
+  // Restore the unshadowed variable.
+  if (OldVal)
+    NamedValues[VarName] = OldVal;
+  else
+    NamedValues.erase(VarName);
+
+  
+  // for expr always returns 0.0.
+  return Constant::getNullValue(Type::getDoubleTy(getGlobalContext()));
+}
+
+Value *VarExprAST::Codegen() {
+  std::vector<AllocaInst *> OldBindings;
+  
+  Function *TheFunction = Builder.GetInsertBlock()->getParent();
+
+  // Register all variables and emit their initializer.
+  for (unsigned i = 0, e = VarNames.size(); i != e; ++i) {
+    const std::string &VarName = VarNames[i].first;
+    ExprAST *Init = VarNames[i].second;
+    
+    // Emit the initializer before adding the variable to scope, this prevents
+    // the initializer from referencing the variable itself, and permits stuff
+    // like this:
+    //  var a = 1 in
+    //    var a = a in ...   # refers to outer 'a'.
+    Value *InitVal;
+    if (Init) {
+      InitVal = Init->Codegen();
+      if (InitVal == 0) return 0;
+    } else { // If not specified, use 0.0.
+      InitVal = ConstantFP::get(getGlobalContext(), APFloat(0.0));
+    }
+    
+    AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
+    Builder.CreateStore(InitVal, Alloca);
+
+    // Remember the old variable binding so that we can restore the binding when
+    // we unrecurse.
+    OldBindings.push_back(NamedValues[VarName]);
+    
+    // Remember this binding.
+    NamedValues[VarName] = Alloca;
+  }
+  
+  // Codegen the body, now that all vars are in scope.
+  Value *BodyVal = Body->Codegen();
+  if (BodyVal == 0) return 0;
+  
+  // Pop all our variables from scope.
+  for (unsigned i = 0, e = VarNames.size(); i != e; ++i)
+    NamedValues[VarNames[i].first] = OldBindings[i];
+
+  // Return the body computation.
+  return BodyVal;
+}
+
+Function *PrototypeAST::Codegen() {
+  // Make the function type:  double(double,double) etc.
+  std::vector<const Type*> Doubles(Args.size(), 
+                                   Type::getDoubleTy(getGlobalContext()));
+  FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
+                                       Doubles, false);
+  
+  Function *F = Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
+  
+  // If F conflicted, there was already something named 'Name'.  If it has a
+  // body, don't allow redefinition or reextern.
+  if (F->getName() != Name) {
+    // Delete the one we just made and get the existing one.
+    F->eraseFromParent();
+    F = TheModule->getFunction(Name);
+    
+    // If F already has a body, reject this.
+    if (!F->empty()) {
+      ErrorF("redefinition of function");
+      return 0;
+    }
+    
+    // If F took a different number of args, reject.
+    if (F->arg_size() != Args.size()) {
+      ErrorF("redefinition of function with different # args");
+      return 0;
+    }
+  }
+  
+  // Set names for all arguments.
+  unsigned Idx = 0;
+  for (Function::arg_iterator AI = F->arg_begin(); Idx != Args.size();
+       ++AI, ++Idx)
+    AI->setName(Args[Idx]);
+    
+  return F;
+}
+
+/// CreateArgumentAllocas - Create an alloca for each argument and register the
+/// argument in the symbol table so that references to it will succeed.
+void PrototypeAST::CreateArgumentAllocas(Function *F) {
+  Function::arg_iterator AI = F->arg_begin();
+  for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
+    // Create an alloca for this variable.
+    AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
+
+    // Store the initial value into the alloca.
+    Builder.CreateStore(AI, Alloca);
+
+    // Add arguments to variable symbol table.
+    NamedValues[Args[Idx]] = Alloca;
+  }
+}
+
+Function *FunctionAST::Codegen() {
+  NamedValues.clear();
+  
+  Function *TheFunction = Proto->Codegen();
+  if (TheFunction == 0)
+    return 0;
+  
+  // If this is an operator, install it.
+  if (Proto->isBinaryOp())
+    BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
+  
+  // Create a new basic block to start insertion into.
+  BasicBlock *BB = BasicBlock::Create(getGlobalContext(), "entry", TheFunction);
+  Builder.SetInsertPoint(BB);
+  
+  // Add all arguments to the symbol table and create their allocas.
+  Proto->CreateArgumentAllocas(TheFunction);
+
+  if (Value *RetVal = Body->Codegen()) {
+    // Finish off the function.
+    Builder.CreateRet(RetVal);
+
+    // Validate the generated code, checking for consistency.
+    verifyFunction(*TheFunction);
+
+    // Optimize the function.
+    TheFPM->run(*TheFunction);
+    
+    return TheFunction;
+  }
+  
+  // Error reading body, remove function.
+  TheFunction->eraseFromParent();
+
+  if (Proto->isBinaryOp())
+    BinopPrecedence.erase(Proto->getOperatorName());
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Top-Level parsing and JIT Driver
+//===----------------------------------------------------------------------===//
+
+static ExecutionEngine *TheExecutionEngine;
+
+static void HandleDefinition() {
+  if (FunctionAST *F = ParseDefinition()) {
+    if (Function *LF = F->Codegen()) {
+      fprintf(stderr, "Read function definition:");
+      LF->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleExtern() {
+  if (PrototypeAST *P = ParseExtern()) {
+    if (Function *F = P->Codegen()) {
+      fprintf(stderr, "Read extern: ");
+      F->dump();
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+static void HandleTopLevelExpression() {
+  // Evaluate a top-level expression into an anonymous function.
+  if (FunctionAST *F = ParseTopLevelExpr()) {
+    if (Function *LF = F->Codegen()) {
+      // JIT the function, returning a function pointer.
+      void *FPtr = TheExecutionEngine->getPointerToFunction(LF);
+      
+      // Cast it to the right type (takes no arguments, returns a double) so we
+      // can call it as a native function.
+      double (*FP)() = (double (*)())(intptr_t)FPtr;
+      fprintf(stderr, "Evaluated to %f\n", FP());
+    }
+  } else {
+    // Skip token for error recovery.
+    getNextToken();
+  }
+}
+
+/// top ::= definition | external | expression | ';'
+static void MainLoop() {
+  while (1) {
+    fprintf(stderr, "ready> ");
+    switch (CurTok) {
+    case tok_eof:    return;
+    case ';':        getNextToken(); break;  // ignore top-level semicolons.
+    case tok_def:    HandleDefinition(); break;
+    case tok_extern: HandleExtern(); break;
+    default:         HandleTopLevelExpression(); break;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// "Library" functions that can be "extern'd" from user code.
+//===----------------------------------------------------------------------===//
+
+/// putchard - putchar that takes a double and returns 0.
+extern "C" 
+double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+/// printd - printf that takes a double prints it as "%f\n", returning 0.
+extern "C" 
+double printd(double X) {
+  printf("%f\n", X);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Main driver code.
+//===----------------------------------------------------------------------===//
+
+int main() {
+  InitializeNativeTarget();
+  LLVMContext &Context = getGlobalContext();
+
+  // Install standard binary operators.
+  // 1 is lowest precedence.
+  BinopPrecedence['='] = 2;
+  BinopPrecedence['<'] = 10;
+  BinopPrecedence['+'] = 20;
+  BinopPrecedence['-'] = 20;
+  BinopPrecedence['*'] = 40;  // highest.
+
+  // Prime the first token.
+  fprintf(stderr, "ready> ");
+  getNextToken();
+
+  // Make the module, which holds all the code.
+  TheModule = new Module("my cool jit", Context);
+
+  // Create the JIT.  This takes ownership of the module.
+  std::string ErrStr;
+  TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create();
+  if (!TheExecutionEngine) {
+    fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str());
+    exit(1);
+  }
+
+  FunctionPassManager OurFPM(TheModule);
+
+  // Set up the optimizer pipeline.  Start with registering info about how the
+  // target lays out data structures.
+  OurFPM.add(new TargetData(*TheExecutionEngine->getTargetData()));
+  // Provide basic AliasAnalysis support for GVN.
+  OurFPM.add(createBasicAliasAnalysisPass());
+  // Promote allocas to registers.
+  OurFPM.add(createPromoteMemoryToRegisterPass());
+  // Do simple "peephole" optimizations and bit-twiddling optzns.
+  OurFPM.add(createInstructionCombiningPass());
+  // Reassociate expressions.
+  OurFPM.add(createReassociatePass());
+  // Eliminate Common SubExpressions.
+  OurFPM.add(createGVNPass());
+  // Simplify the control flow graph (deleting unreachable blocks, etc).
+  OurFPM.add(createCFGSimplificationPass());
+
+  OurFPM.doInitialization();
+
+  // Set the global so the code gen can use this.
+  TheFPM = &OurFPM;
+
+  // Run the main "interpreter loop" now.
+  MainLoop();
+
+  TheFPM = 0;
+
+  // Print out all of the generated code.
+  TheModule->dump();
+
+  return 0;
+}
diff --git a/final/examples/Kaleidoscope/Makefile b/final/examples/Kaleidoscope/Makefile
new file mode 100644
index 00000000000..bd0c252c2c0
--- /dev/null
+++ b/final/examples/Kaleidoscope/Makefile
@@ -0,0 +1,15 @@
+##===- examples/Kaleidoscope/Makefile ----------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL=../..
+
+include $(LEVEL)/Makefile.config
+
+PARALLEL_DIRS:= Chapter2 Chapter3 Chapter4 Chapter5 Chapter6 Chapter7
+
+include $(LEVEL)/Makefile.common
diff --git a/final/examples/Makefile b/final/examples/Makefile
new file mode 100644
index 00000000000..50a6db76aa2
--- /dev/null
+++ b/final/examples/Makefile
@@ -0,0 +1,32 @@
+##===- examples/Makefile -----------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL=..
+
+include $(LEVEL)/Makefile.config
+
+PARALLEL_DIRS:= BrainF Fibonacci HowToUseJIT Kaleidoscope ModuleMaker
+
+ifeq ($(HAVE_PTHREAD),1)
+PARALLEL_DIRS += ParallelJIT
+endif
+
+ifeq ($(LLVM_ON_UNIX),1)
+    ifeq ($(ARCH),x86)
+	PARALLEL_DIRS += ExceptionDemo
+    endif
+    ifeq ($(ARCH),x86_64)
+	PARALLEL_DIRS += ExceptionDemo
+    endif
+endif
+
+ifeq ($(filter $(BINDINGS_TO_BUILD),ocaml),ocaml)
+	PARALLEL_DIRS += OCaml-Kaleidoscope
+endif
+
+include $(LEVEL)/Makefile.common
diff --git a/final/examples/ModuleMaker/CMakeLists.txt b/final/examples/ModuleMaker/CMakeLists.txt
new file mode 100644
index 00000000000..81e911560bd
--- /dev/null
+++ b/final/examples/ModuleMaker/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS bitwriter)
+
+add_llvm_example(ModuleMaker
+  ModuleMaker.cpp
+  )
diff --git a/final/examples/ModuleMaker/Makefile b/final/examples/ModuleMaker/Makefile
new file mode 100644
index 00000000000..9454cf514dc
--- /dev/null
+++ b/final/examples/ModuleMaker/Makefile
@@ -0,0 +1,14 @@
+##===- examples/ModuleMaker/Makefile -----------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL=../..
+TOOLNAME=ModuleMaker
+EXAMPLE_TOOL = 1
+LINK_COMPONENTS := bitwriter
+
+include $(LEVEL)/Makefile.common
diff --git a/final/examples/ModuleMaker/ModuleMaker.cpp b/final/examples/ModuleMaker/ModuleMaker.cpp
new file mode 100644
index 00000000000..6bc52c12a03
--- /dev/null
+++ b/final/examples/ModuleMaker/ModuleMaker.cpp
@@ -0,0 +1,64 @@
+//===- examples/ModuleMaker/ModuleMaker.cpp - Example project ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This programs is a simple example that creates an LLVM module "from scratch",
+// emitting it as a bitcode file to standard out.  This is just to show how
+// LLVM projects work and to demonstrate some of the LLVM APIs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+int main() {
+  LLVMContext Context;
+
+  // Create the "module" or "program" or "translation unit" to hold the
+  // function
+  Module *M = new Module("test", Context);
+
+  // Create the main function: first create the type 'int ()'
+  FunctionType *FT =
+    FunctionType::get(Type::getInt32Ty(Context), /*not vararg*/false);
+
+  // By passing a module as the last parameter to the Function constructor,
+  // it automatically gets appended to the Module.
+  Function *F = Function::Create(FT, Function::ExternalLinkage, "main", M);
+
+  // Add a basic block to the function... again, it automatically inserts
+  // because of the last argument.
+  BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", F);
+
+  // Get pointers to the constant integers...
+  Value *Two = ConstantInt::get(Type::getInt32Ty(Context), 2);
+  Value *Three = ConstantInt::get(Type::getInt32Ty(Context), 3);
+
+  // Create the add instruction... does not insert...
+  Instruction *Add = BinaryOperator::Create(Instruction::Add, Two, Three,
+                                            "addresult");
+
+  // explicitly insert it into the basic block...
+  BB->getInstList().push_back(Add);
+
+  // Create the return instruction and add it to the basic block
+  BB->getInstList().push_back(ReturnInst::Create(Context, Add));
+
+  // Output the bitcode file to stdout
+  WriteBitcodeToFile(M, outs());
+
+  // Delete the module and all of its contents.
+  delete M;
+  return 0;
+}
diff --git a/final/examples/ModuleMaker/README.txt b/final/examples/ModuleMaker/README.txt
new file mode 100644
index 00000000000..66a5d3fe0b1
--- /dev/null
+++ b/final/examples/ModuleMaker/README.txt
@@ -0,0 +1,8 @@
+//===----------------------------------------------------------------------===//
+//                         ModuleMaker Sample project
+//===----------------------------------------------------------------------===//
+
+This project is an extremely simple example of using some simple pieces of the 
+LLVM API.  The actual executable generated by this project simply emits an 
+LLVM bitcode file to standard output.  It is designed to show some basic 
+usage of LLVM APIs, and how to link to LLVM libraries.
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter2/Makefile b/final/examples/OCaml-Kaleidoscope/Chapter2/Makefile
new file mode 100644
index 00000000000..8fc03da0fbd
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter2/Makefile
@@ -0,0 +1,22 @@
+##===- examples/OCaml-Kaleidoscope/Chapter2/Makefile -------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+# 
+# This is the makefile for the Objective Caml kaleidoscope tutorial, chapter 2.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../../..
+TOOLNAME := OCaml-Kaleidoscope-Ch2
+EXAMPLE_TOOL := 1
+UsedComponents := core
+UsedOcamLibs := llvm
+
+OCAMLCFLAGS += -pp camlp4of
+
+include $(LEVEL)/bindings/ocaml/Makefile.ocaml
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter2/_tags b/final/examples/OCaml-Kaleidoscope/Chapter2/_tags
new file mode 100644
index 00000000000..7b9b80b1e94
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter2/_tags
@@ -0,0 +1 @@
+<{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter2/ast.ml b/final/examples/OCaml-Kaleidoscope/Chapter2/ast.ml
new file mode 100644
index 00000000000..4cc2dea86b7
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter2/ast.ml
@@ -0,0 +1,25 @@
+(*===----------------------------------------------------------------------===
+ * Abstract Syntax Tree (aka Parse Tree)
+ *===----------------------------------------------------------------------===*)
+
+(* expr - Base type for all expression nodes. *)
+type expr =
+  (* variant for numeric literals like "1.0". *)
+  | Number of float
+
+  (* variant for referencing a variable, like "a". *)
+  | Variable of string
+
+  (* variant for a binary operator. *)
+  | Binary of char * expr * expr
+
+  (* variant for function calls. *)
+  | Call of string * expr array
+
+(* proto - This type represents the "prototype" for a function, which captures
+ * its name, and its argument names (thus implicitly the number of arguments the
+ * function takes). *)
+type proto = Prototype of string * string array
+
+(* func - This type represents a function definition itself. *)
+type func = Function of proto * expr
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter2/lexer.ml b/final/examples/OCaml-Kaleidoscope/Chapter2/lexer.ml
new file mode 100644
index 00000000000..22a915552f0
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter2/lexer.ml
@@ -0,0 +1,52 @@
+(*===----------------------------------------------------------------------===
+ * Lexer
+ *===----------------------------------------------------------------------===*)
+
+let rec lex = parser
+  (* Skip any whitespace. *)
+  | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+  | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+
+  (* number: [0-9.]+ *)
+  | [< ' ('0' .. '9' as c); stream >] ->
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+
+  (* Comment until end of line. *)
+  | [< ' ('#'); stream >] ->
+      lex_comment stream
+
+  (* Otherwise, just return the character as its ascii value. *)
+  | [< 'c; stream >] ->
+      [< 'Token.Kwd c; lex stream >]
+
+  (* end of stream. *)
+  | [< >] -> [< >]
+
+and lex_number buffer = parser
+  | [< ' ('0' .. '9' | '.' as c); stream >] ->
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+  | [< stream=lex >] ->
+      [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+and lex_ident buffer = parser
+  | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+  | [< stream=lex >] ->
+      match Buffer.contents buffer with
+      | "def" -> [< 'Token.Def; stream >]
+      | "extern" -> [< 'Token.Extern; stream >]
+      | id -> [< 'Token.Ident id; stream >]
+
+and lex_comment = parser
+  | [< ' ('\n'); stream=lex >] -> stream
+  | [< 'c; e=lex_comment >] -> e
+  | [< >] -> [< >]
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter2/parser.ml b/final/examples/OCaml-Kaleidoscope/Chapter2/parser.ml
new file mode 100644
index 00000000000..83d9874a4ab
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter2/parser.ml
@@ -0,0 +1,122 @@
+(*===---------------------------------------------------------------------===
+ * Parser
+ *===---------------------------------------------------------------------===*)
+
+(* binop_precedence - This holds the precedence for each binary operator that is
+ * defined *)
+let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+(* precedence - Get the precedence of the pending binary operator token. *)
+let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+(* primary
+ *   ::= identifier
+ *   ::= numberexpr
+ *   ::= parenexpr *)
+let rec parse_primary = parser
+  (* numberexpr ::= number *)
+  | [< 'Token.Number n >] -> Ast.Number n
+
+  (* parenexpr ::= '(' expression ')' *)
+  | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+  (* identifierexpr
+   *   ::= identifier
+   *   ::= identifier '(' argumentexpr ')' *)
+  | [< 'Token.Ident id; stream >] ->
+      let rec parse_args accumulator = parser
+        | [< e=parse_expr; stream >] ->
+            begin parser
+              | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+              | [< >] -> e :: accumulator
+            end stream
+        | [< >] -> accumulator
+      in
+      let rec parse_ident id = parser
+        (* Call. *)
+        | [< 'Token.Kwd '(';
+             args=parse_args [];
+             'Token.Kwd ')' ?? "expected ')'">] ->
+            Ast.Call (id, Array.of_list (List.rev args))
+
+        (* Simple variable ref. *)
+        | [< >] -> Ast.Variable id
+      in
+      parse_ident id stream
+
+  | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+(* binoprhs
+ *   ::= ('+' primary)* *)
+and parse_bin_rhs expr_prec lhs stream =
+  match Stream.peek stream with
+  (* If this is a binop, find its precedence. *)
+  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+      let token_prec = precedence c in
+
+      (* If this is a binop that binds at least as tightly as the current binop,
+       * consume it, otherwise we are done. *)
+      if token_prec < expr_prec then lhs else begin
+        (* Eat the binop. *)
+        Stream.junk stream;
+
+        (* Parse the primary expression after the binary operator. *)
+        let rhs = parse_primary stream in
+
+        (* Okay, we know this is a binop. *)
+        let rhs =
+          match Stream.peek stream with
+          | Some (Token.Kwd c2) ->
+              (* If BinOp binds less tightly with rhs than the operator after
+               * rhs, let the pending operator take rhs as its lhs. *)
+              let next_prec = precedence c2 in
+              if token_prec < next_prec
+              then parse_bin_rhs (token_prec + 1) rhs stream
+              else rhs
+          | _ -> rhs
+        in
+
+        (* Merge lhs/rhs. *)
+        let lhs = Ast.Binary (c, lhs, rhs) in
+        parse_bin_rhs expr_prec lhs stream
+      end
+  | _ -> lhs
+
+(* expression
+ *   ::= primary binoprhs *)
+and parse_expr = parser
+  | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
+
+(* prototype
+ *   ::= id '(' id* ')' *)
+let parse_prototype =
+  let rec parse_args accumulator = parser
+    | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+    | [< >] -> accumulator
+  in
+
+  parser
+  | [< 'Token.Ident id;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+       args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+      (* success. *)
+      Ast.Prototype (id, Array.of_list (List.rev args))
+
+  | [< >] ->
+      raise (Stream.Error "expected function name in prototype")
+
+(* definition ::= 'def' prototype expression *)
+let parse_definition = parser
+  | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+      Ast.Function (p, e)
+
+(* toplevelexpr ::= expression *)
+let parse_toplevel = parser
+  | [< e=parse_expr >] ->
+      (* Make an anonymous proto. *)
+      Ast.Function (Ast.Prototype ("", [||]), e)
+
+(*  external ::= 'extern' prototype *)
+let parse_extern = parser
+  | [< 'Token.Extern; e=parse_prototype >] -> e
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter2/token.ml b/final/examples/OCaml-Kaleidoscope/Chapter2/token.ml
new file mode 100644
index 00000000000..2ca782e1499
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter2/token.ml
@@ -0,0 +1,15 @@
+(*===----------------------------------------------------------------------===
+ * Lexer Tokens
+ *===----------------------------------------------------------------------===*)
+
+(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+ * these others for known things. *)
+type token =
+  (* commands *)
+  | Def | Extern
+
+  (* primary *)
+  | Ident of string | Number of float
+
+  (* unknown *)
+  | Kwd of char
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter2/toplevel.ml b/final/examples/OCaml-Kaleidoscope/Chapter2/toplevel.ml
new file mode 100644
index 00000000000..01c85bd70d2
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter2/toplevel.ml
@@ -0,0 +1,34 @@
+(*===----------------------------------------------------------------------===
+ * Top-Level parsing and JIT Driver
+ *===----------------------------------------------------------------------===*)
+
+(* top ::= definition | external | expression | ';' *)
+let rec main_loop stream =
+  match Stream.peek stream with
+  | None -> ()
+
+  (* ignore top-level semicolons. *)
+  | Some (Token.Kwd ';') ->
+      Stream.junk stream;
+      main_loop stream
+
+  | Some token ->
+      begin
+        try match token with
+        | Token.Def ->
+            ignore(Parser.parse_definition stream);
+            print_endline "parsed a function definition.";
+        | Token.Extern ->
+            ignore(Parser.parse_extern stream);
+            print_endline "parsed an extern.";
+        | _ ->
+            (* Evaluate a top-level expression into an anonymous function. *)
+            ignore(Parser.parse_toplevel stream);
+            print_endline "parsed a top-level expr";
+        with Stream.Error s ->
+          (* Skip token for error recovery. *)
+          Stream.junk stream;
+          print_endline s;
+      end;
+      print_string "ready> "; flush stdout;
+      main_loop stream
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter2/toy.ml b/final/examples/OCaml-Kaleidoscope/Chapter2/toy.ml
new file mode 100644
index 00000000000..42b19fec001
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter2/toy.ml
@@ -0,0 +1,21 @@
+(*===----------------------------------------------------------------------===
+ * Main driver code.
+ *===----------------------------------------------------------------------===*)
+
+let main () =
+  (* Install standard binary operators.
+   * 1 is the lowest precedence. *)
+  Hashtbl.add Parser.binop_precedence '<' 10;
+  Hashtbl.add Parser.binop_precedence '+' 20;
+  Hashtbl.add Parser.binop_precedence '-' 20;
+  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+  (* Prime the first token. *)
+  print_string "ready> "; flush stdout;
+  let stream = Lexer.lex (Stream.of_channel stdin) in
+
+  (* Run the main "interpreter loop" now. *)
+  Toplevel.main_loop stream;
+;;
+
+main ()
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter3/Makefile b/final/examples/OCaml-Kaleidoscope/Chapter3/Makefile
new file mode 100644
index 00000000000..fdbcd5191f4
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter3/Makefile
@@ -0,0 +1,24 @@
+##===- examples/OCaml-Kaleidoscope/Chapter3/Makefile -------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+# 
+# This is the makefile for the Objective Caml kaleidoscope tutorial, chapter 3.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../../..
+TOOLNAME := OCaml-Kaleidoscope-Ch3
+EXAMPLE_TOOL := 1
+UsedComponents := core
+UsedOcamLibs := llvm llvm_analysis
+
+OCAMLCFLAGS += -pp camlp4of
+
+ExcludeSources = $(PROJ_SRC_DIR)/myocamlbuild.ml
+
+include $(LEVEL)/bindings/ocaml/Makefile.ocaml
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter3/_tags b/final/examples/OCaml-Kaleidoscope/Chapter3/_tags
new file mode 100644
index 00000000000..990490a5db2
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter3/_tags
@@ -0,0 +1,2 @@
+<{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+<*.{byte,native}>: g++, use_llvm, use_llvm_analysis
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter3/ast.ml b/final/examples/OCaml-Kaleidoscope/Chapter3/ast.ml
new file mode 100644
index 00000000000..4cc2dea86b7
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter3/ast.ml
@@ -0,0 +1,25 @@
+(*===----------------------------------------------------------------------===
+ * Abstract Syntax Tree (aka Parse Tree)
+ *===----------------------------------------------------------------------===*)
+
+(* expr - Base type for all expression nodes. *)
+type expr =
+  (* variant for numeric literals like "1.0". *)
+  | Number of float
+
+  (* variant for referencing a variable, like "a". *)
+  | Variable of string
+
+  (* variant for a binary operator. *)
+  | Binary of char * expr * expr
+
+  (* variant for function calls. *)
+  | Call of string * expr array
+
+(* proto - This type represents the "prototype" for a function, which captures
+ * its name, and its argument names (thus implicitly the number of arguments the
+ * function takes). *)
+type proto = Prototype of string * string array
+
+(* func - This type represents a function definition itself. *)
+type func = Function of proto * expr
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter3/codegen.ml b/final/examples/OCaml-Kaleidoscope/Chapter3/codegen.ml
new file mode 100644
index 00000000000..275cd0bee04
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter3/codegen.ml
@@ -0,0 +1,100 @@
+(*===----------------------------------------------------------------------===
+ * Code Generation
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+
+exception Error of string
+
+let context = global_context ()
+let the_module = create_module context "my cool jit"
+let builder = builder context
+let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+let double_type = double_type context
+
+let rec codegen_expr = function
+  | Ast.Number n -> const_float double_type n
+  | Ast.Variable name ->
+      (try Hashtbl.find named_values name with
+        | Not_found -> raise (Error "unknown variable name"))
+  | Ast.Binary (op, lhs, rhs) ->
+      let lhs_val = codegen_expr lhs in
+      let rhs_val = codegen_expr rhs in
+      begin
+        match op with
+        | '+' -> build_add lhs_val rhs_val "addtmp" builder
+        | '-' -> build_sub lhs_val rhs_val "subtmp" builder
+        | '*' -> build_mul lhs_val rhs_val "multmp" builder
+        | '<' ->
+            (* Convert bool 0/1 to double 0.0 or 1.0 *)
+            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+            build_uitofp i double_type "booltmp" builder
+        | _ -> raise (Error "invalid binary operator")
+      end
+  | Ast.Call (callee, args) ->
+      (* Look up the name in the module table. *)
+      let callee =
+        match lookup_function callee the_module with
+        | Some callee -> callee
+        | None -> raise (Error "unknown function referenced")
+      in
+      let params = params callee in
+
+      (* If argument mismatch error. *)
+      if Array.length params == Array.length args then () else
+        raise (Error "incorrect # arguments passed");
+      let args = Array.map codegen_expr args in
+      build_call callee args "calltmp" builder
+
+let codegen_proto = function
+  | Ast.Prototype (name, args) ->
+      (* Make the function type: double(double,double) etc. *)
+      let doubles = Array.make (Array.length args) double_type in
+      let ft = function_type double_type doubles in
+      let f =
+        match lookup_function name the_module with
+        | None -> declare_function name ft the_module
+
+        (* If 'f' conflicted, there was already something named 'name'. If it
+         * has a body, don't allow redefinition or reextern. *)
+        | Some f ->
+            (* If 'f' already has a body, reject this. *)
+            if block_begin f <> At_end f then
+              raise (Error "redefinition of function");
+
+            (* If 'f' took a different number of arguments, reject. *)
+            if element_type (type_of f) <> ft then
+              raise (Error "redefinition of function with different # args");
+            f
+      in
+
+      (* Set names for all arguments. *)
+      Array.iteri (fun i a ->
+        let n = args.(i) in
+        set_value_name n a;
+        Hashtbl.add named_values n a;
+      ) (params f);
+      f
+
+let codegen_func = function
+  | Ast.Function (proto, body) ->
+      Hashtbl.clear named_values;
+      let the_function = codegen_proto proto in
+
+      (* Create a new basic block to start insertion into. *)
+      let bb = append_block context "entry" the_function in
+      position_at_end bb builder;
+
+      try
+        let ret_val = codegen_expr body in
+
+        (* Finish off the function. *)
+        let _ = build_ret ret_val builder in
+
+        (* Validate the generated code, checking for consistency. *)
+        Llvm_analysis.assert_valid_function the_function;
+
+        the_function
+      with e ->
+        delete_function the_function;
+        raise e
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter3/lexer.ml b/final/examples/OCaml-Kaleidoscope/Chapter3/lexer.ml
new file mode 100644
index 00000000000..22a915552f0
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter3/lexer.ml
@@ -0,0 +1,52 @@
+(*===----------------------------------------------------------------------===
+ * Lexer
+ *===----------------------------------------------------------------------===*)
+
+let rec lex = parser
+  (* Skip any whitespace. *)
+  | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+  | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+
+  (* number: [0-9.]+ *)
+  | [< ' ('0' .. '9' as c); stream >] ->
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+
+  (* Comment until end of line. *)
+  | [< ' ('#'); stream >] ->
+      lex_comment stream
+
+  (* Otherwise, just return the character as its ascii value. *)
+  | [< 'c; stream >] ->
+      [< 'Token.Kwd c; lex stream >]
+
+  (* end of stream. *)
+  | [< >] -> [< >]
+
+and lex_number buffer = parser
+  | [< ' ('0' .. '9' | '.' as c); stream >] ->
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+  | [< stream=lex >] ->
+      [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+and lex_ident buffer = parser
+  | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+  | [< stream=lex >] ->
+      match Buffer.contents buffer with
+      | "def" -> [< 'Token.Def; stream >]
+      | "extern" -> [< 'Token.Extern; stream >]
+      | id -> [< 'Token.Ident id; stream >]
+
+and lex_comment = parser
+  | [< ' ('\n'); stream=lex >] -> stream
+  | [< 'c; e=lex_comment >] -> e
+  | [< >] -> [< >]
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter3/myocamlbuild.ml b/final/examples/OCaml-Kaleidoscope/Chapter3/myocamlbuild.ml
new file mode 100644
index 00000000000..b71f5d717ef
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter3/myocamlbuild.ml
@@ -0,0 +1,6 @@
+open Ocamlbuild_plugin;;
+
+ocaml_lib ~extern:true "llvm";;
+ocaml_lib ~extern:true "llvm_analysis";;
+
+flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter3/parser.ml b/final/examples/OCaml-Kaleidoscope/Chapter3/parser.ml
new file mode 100644
index 00000000000..83d9874a4ab
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter3/parser.ml
@@ -0,0 +1,122 @@
+(*===---------------------------------------------------------------------===
+ * Parser
+ *===---------------------------------------------------------------------===*)
+
+(* binop_precedence - This holds the precedence for each binary operator that is
+ * defined *)
+let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+(* precedence - Get the precedence of the pending binary operator token. *)
+let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+(* primary
+ *   ::= identifier
+ *   ::= numberexpr
+ *   ::= parenexpr *)
+let rec parse_primary = parser
+  (* numberexpr ::= number *)
+  | [< 'Token.Number n >] -> Ast.Number n
+
+  (* parenexpr ::= '(' expression ')' *)
+  | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+  (* identifierexpr
+   *   ::= identifier
+   *   ::= identifier '(' argumentexpr ')' *)
+  | [< 'Token.Ident id; stream >] ->
+      let rec parse_args accumulator = parser
+        | [< e=parse_expr; stream >] ->
+            begin parser
+              | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+              | [< >] -> e :: accumulator
+            end stream
+        | [< >] -> accumulator
+      in
+      let rec parse_ident id = parser
+        (* Call. *)
+        | [< 'Token.Kwd '(';
+             args=parse_args [];
+             'Token.Kwd ')' ?? "expected ')'">] ->
+            Ast.Call (id, Array.of_list (List.rev args))
+
+        (* Simple variable ref. *)
+        | [< >] -> Ast.Variable id
+      in
+      parse_ident id stream
+
+  | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+(* binoprhs
+ *   ::= ('+' primary)* *)
+and parse_bin_rhs expr_prec lhs stream =
+  match Stream.peek stream with
+  (* If this is a binop, find its precedence. *)
+  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+      let token_prec = precedence c in
+
+      (* If this is a binop that binds at least as tightly as the current binop,
+       * consume it, otherwise we are done. *)
+      if token_prec < expr_prec then lhs else begin
+        (* Eat the binop. *)
+        Stream.junk stream;
+
+        (* Parse the primary expression after the binary operator. *)
+        let rhs = parse_primary stream in
+
+        (* Okay, we know this is a binop. *)
+        let rhs =
+          match Stream.peek stream with
+          | Some (Token.Kwd c2) ->
+              (* If BinOp binds less tightly with rhs than the operator after
+               * rhs, let the pending operator take rhs as its lhs. *)
+              let next_prec = precedence c2 in
+              if token_prec < next_prec
+              then parse_bin_rhs (token_prec + 1) rhs stream
+              else rhs
+          | _ -> rhs
+        in
+
+        (* Merge lhs/rhs. *)
+        let lhs = Ast.Binary (c, lhs, rhs) in
+        parse_bin_rhs expr_prec lhs stream
+      end
+  | _ -> lhs
+
+(* expression
+ *   ::= primary binoprhs *)
+and parse_expr = parser
+  | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
+
+(* prototype
+ *   ::= id '(' id* ')' *)
+let parse_prototype =
+  let rec parse_args accumulator = parser
+    | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+    | [< >] -> accumulator
+  in
+
+  parser
+  | [< 'Token.Ident id;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+       args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+      (* success. *)
+      Ast.Prototype (id, Array.of_list (List.rev args))
+
+  | [< >] ->
+      raise (Stream.Error "expected function name in prototype")
+
+(* definition ::= 'def' prototype expression *)
+let parse_definition = parser
+  | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+      Ast.Function (p, e)
+
+(* toplevelexpr ::= expression *)
+let parse_toplevel = parser
+  | [< e=parse_expr >] ->
+      (* Make an anonymous proto. *)
+      Ast.Function (Ast.Prototype ("", [||]), e)
+
+(*  external ::= 'extern' prototype *)
+let parse_extern = parser
+  | [< 'Token.Extern; e=parse_prototype >] -> e
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter3/token.ml b/final/examples/OCaml-Kaleidoscope/Chapter3/token.ml
new file mode 100644
index 00000000000..2ca782e1499
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter3/token.ml
@@ -0,0 +1,15 @@
+(*===----------------------------------------------------------------------===
+ * Lexer Tokens
+ *===----------------------------------------------------------------------===*)
+
+(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+ * these others for known things. *)
+type token =
+  (* commands *)
+  | Def | Extern
+
+  (* primary *)
+  | Ident of string | Number of float
+
+  (* unknown *)
+  | Kwd of char
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter3/toplevel.ml b/final/examples/OCaml-Kaleidoscope/Chapter3/toplevel.ml
new file mode 100644
index 00000000000..d1bf5d4c0c6
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter3/toplevel.ml
@@ -0,0 +1,39 @@
+(*===----------------------------------------------------------------------===
+ * Top-Level parsing and JIT Driver
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+
+(* top ::= definition | external | expression | ';' *)
+let rec main_loop stream =
+  match Stream.peek stream with
+  | None -> ()
+
+  (* ignore top-level semicolons. *)
+  | Some (Token.Kwd ';') ->
+      Stream.junk stream;
+      main_loop stream
+
+  | Some token ->
+      begin
+        try match token with
+        | Token.Def ->
+            let e = Parser.parse_definition stream in
+            print_endline "parsed a function definition.";
+            dump_value (Codegen.codegen_func e);
+        | Token.Extern ->
+            let e = Parser.parse_extern stream in
+            print_endline "parsed an extern.";
+            dump_value (Codegen.codegen_proto e);
+        | _ ->
+            (* Evaluate a top-level expression into an anonymous function. *)
+            let e = Parser.parse_toplevel stream in
+            print_endline "parsed a top-level expr";
+            dump_value (Codegen.codegen_func e);
+        with Stream.Error s | Codegen.Error s ->
+          (* Skip token for error recovery. *)
+          Stream.junk stream;
+          print_endline s;
+      end;
+      print_string "ready> "; flush stdout;
+      main_loop stream
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter3/toy.ml b/final/examples/OCaml-Kaleidoscope/Chapter3/toy.ml
new file mode 100644
index 00000000000..73c1a1ec62a
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter3/toy.ml
@@ -0,0 +1,26 @@
+(*===----------------------------------------------------------------------===
+ * Main driver code.
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+
+let main () =
+  (* Install standard binary operators.
+   * 1 is the lowest precedence. *)
+  Hashtbl.add Parser.binop_precedence '<' 10;
+  Hashtbl.add Parser.binop_precedence '+' 20;
+  Hashtbl.add Parser.binop_precedence '-' 20;
+  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+  (* Prime the first token. *)
+  print_string "ready> "; flush stdout;
+  let stream = Lexer.lex (Stream.of_channel stdin) in
+
+  (* Run the main "interpreter loop" now. *)
+  Toplevel.main_loop stream;
+
+  (* Print out all the generated code. *)
+  dump_module Codegen.the_module
+;;
+
+main ()
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter4/Makefile b/final/examples/OCaml-Kaleidoscope/Chapter4/Makefile
new file mode 100644
index 00000000000..d9c3f49bea6
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter4/Makefile
@@ -0,0 +1,25 @@
+##===- examples/OCaml-Kaleidoscope/Chapter4/Makefile -------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+# 
+# This is the makefile for the Objective Caml kaleidoscope tutorial, chapter 4.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../../..
+TOOLNAME := OCaml-Kaleidoscope-Ch4
+EXAMPLE_TOOL := 1
+UsedComponents := core
+UsedOcamLibs := llvm llvm_analysis llvm_executionengine llvm_target \
+	llvm_scalar_opts
+
+OCAMLCFLAGS += -pp camlp4of
+
+ExcludeSources = $(PROJ_SRC_DIR)/myocamlbuild.ml
+
+include $(LEVEL)/bindings/ocaml/Makefile.ocaml
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter4/_tags b/final/examples/OCaml-Kaleidoscope/Chapter4/_tags
new file mode 100644
index 00000000000..7a03dba6672
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter4/_tags
@@ -0,0 +1,4 @@
+<{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+<*.{byte,native}>: g++, use_llvm, use_llvm_analysis
+<*.{byte,native}>: use_llvm_executionengine, use_llvm_target
+<*.{byte,native}>: use_llvm_scalar_opts, use_bindings
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter4/ast.ml b/final/examples/OCaml-Kaleidoscope/Chapter4/ast.ml
new file mode 100644
index 00000000000..4cc2dea86b7
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter4/ast.ml
@@ -0,0 +1,25 @@
+(*===----------------------------------------------------------------------===
+ * Abstract Syntax Tree (aka Parse Tree)
+ *===----------------------------------------------------------------------===*)
+
+(* expr - Base type for all expression nodes. *)
+type expr =
+  (* variant for numeric literals like "1.0". *)
+  | Number of float
+
+  (* variant for referencing a variable, like "a". *)
+  | Variable of string
+
+  (* variant for a binary operator. *)
+  | Binary of char * expr * expr
+
+  (* variant for function calls. *)
+  | Call of string * expr array
+
+(* proto - This type represents the "prototype" for a function, which captures
+ * its name, and its argument names (thus implicitly the number of arguments the
+ * function takes). *)
+type proto = Prototype of string * string array
+
+(* func - This type represents a function definition itself. *)
+type func = Function of proto * expr
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter4/bindings.c b/final/examples/OCaml-Kaleidoscope/Chapter4/bindings.c
new file mode 100644
index 00000000000..053513bf0c0
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter4/bindings.c
@@ -0,0 +1,7 @@
+#include <stdio.h>
+
+/* putchard - putchar that takes a double and returns 0. */
+extern double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter4/codegen.ml b/final/examples/OCaml-Kaleidoscope/Chapter4/codegen.ml
new file mode 100644
index 00000000000..8957f4c610f
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter4/codegen.ml
@@ -0,0 +1,103 @@
+(*===----------------------------------------------------------------------===
+ * Code Generation
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+
+exception Error of string
+
+let context = global_context ()
+let the_module = create_module context "my cool jit"
+let builder = builder context
+let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+let double_type = double_type context
+
+let rec codegen_expr = function
+  | Ast.Number n -> const_float double_type n
+  | Ast.Variable name ->
+      (try Hashtbl.find named_values name with
+        | Not_found -> raise (Error "unknown variable name"))
+  | Ast.Binary (op, lhs, rhs) ->
+      let lhs_val = codegen_expr lhs in
+      let rhs_val = codegen_expr rhs in
+      begin
+        match op with
+        | '+' -> build_fadd lhs_val rhs_val "addtmp" builder
+        | '-' -> build_fsub lhs_val rhs_val "subtmp" builder
+        | '*' -> build_fmul lhs_val rhs_val "multmp" builder
+        | '<' ->
+            (* Convert bool 0/1 to double 0.0 or 1.0 *)
+            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+            build_uitofp i double_type "booltmp" builder
+        | _ -> raise (Error "invalid binary operator")
+      end
+  | Ast.Call (callee, args) ->
+      (* Look up the name in the module table. *)
+      let callee =
+        match lookup_function callee the_module with
+        | Some callee -> callee
+        | None -> raise (Error "unknown function referenced")
+      in
+      let params = params callee in
+
+      (* If argument mismatch error. *)
+      if Array.length params == Array.length args then () else
+        raise (Error "incorrect # arguments passed");
+      let args = Array.map codegen_expr args in
+      build_call callee args "calltmp" builder
+
+let codegen_proto = function
+  | Ast.Prototype (name, args) ->
+      (* Make the function type: double(double,double) etc. *)
+      let doubles = Array.make (Array.length args) double_type in
+      let ft = function_type double_type doubles in
+      let f =
+        match lookup_function name the_module with
+        | None -> declare_function name ft the_module
+
+        (* If 'f' conflicted, there was already something named 'name'. If it
+         * has a body, don't allow redefinition or reextern. *)
+        | Some f ->
+            (* If 'f' already has a body, reject this. *)
+            if block_begin f <> At_end f then
+              raise (Error "redefinition of function");
+
+            (* If 'f' took a different number of arguments, reject. *)
+            if element_type (type_of f) <> ft then
+              raise (Error "redefinition of function with different # args");
+            f
+      in
+
+      (* Set names for all arguments. *)
+      Array.iteri (fun i a ->
+        let n = args.(i) in
+        set_value_name n a;
+        Hashtbl.add named_values n a;
+      ) (params f);
+      f
+
+let codegen_func the_fpm = function
+  | Ast.Function (proto, body) ->
+      Hashtbl.clear named_values;
+      let the_function = codegen_proto proto in
+
+      (* Create a new basic block to start insertion into. *)
+      let bb = append_block context "entry" the_function in
+      position_at_end bb builder;
+
+      try
+        let ret_val = codegen_expr body in
+
+        (* Finish off the function. *)
+        let _ = build_ret ret_val builder in
+
+        (* Validate the generated code, checking for consistency. *)
+        Llvm_analysis.assert_valid_function the_function;
+
+        (* Optimize the function. *)
+        let _ = PassManager.run_function the_function the_fpm in
+
+        the_function
+      with e ->
+        delete_function the_function;
+        raise e
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter4/lexer.ml b/final/examples/OCaml-Kaleidoscope/Chapter4/lexer.ml
new file mode 100644
index 00000000000..22a915552f0
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter4/lexer.ml
@@ -0,0 +1,52 @@
+(*===----------------------------------------------------------------------===
+ * Lexer
+ *===----------------------------------------------------------------------===*)
+
+let rec lex = parser
+  (* Skip any whitespace. *)
+  | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+  | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+
+  (* number: [0-9.]+ *)
+  | [< ' ('0' .. '9' as c); stream >] ->
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+
+  (* Comment until end of line. *)
+  | [< ' ('#'); stream >] ->
+      lex_comment stream
+
+  (* Otherwise, just return the character as its ascii value. *)
+  | [< 'c; stream >] ->
+      [< 'Token.Kwd c; lex stream >]
+
+  (* end of stream. *)
+  | [< >] -> [< >]
+
+and lex_number buffer = parser
+  | [< ' ('0' .. '9' | '.' as c); stream >] ->
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+  | [< stream=lex >] ->
+      [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+and lex_ident buffer = parser
+  | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+  | [< stream=lex >] ->
+      match Buffer.contents buffer with
+      | "def" -> [< 'Token.Def; stream >]
+      | "extern" -> [< 'Token.Extern; stream >]
+      | id -> [< 'Token.Ident id; stream >]
+
+and lex_comment = parser
+  | [< ' ('\n'); stream=lex >] -> stream
+  | [< 'c; e=lex_comment >] -> e
+  | [< >] -> [< >]
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter4/myocamlbuild.ml b/final/examples/OCaml-Kaleidoscope/Chapter4/myocamlbuild.ml
new file mode 100644
index 00000000000..ff42664c43b
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter4/myocamlbuild.ml
@@ -0,0 +1,10 @@
+open Ocamlbuild_plugin;;
+
+ocaml_lib ~extern:true "llvm";;
+ocaml_lib ~extern:true "llvm_analysis";;
+ocaml_lib ~extern:true "llvm_executionengine";;
+ocaml_lib ~extern:true "llvm_target";;
+ocaml_lib ~extern:true "llvm_scalar_opts";;
+
+flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
+dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter4/parser.ml b/final/examples/OCaml-Kaleidoscope/Chapter4/parser.ml
new file mode 100644
index 00000000000..83d9874a4ab
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter4/parser.ml
@@ -0,0 +1,122 @@
+(*===---------------------------------------------------------------------===
+ * Parser
+ *===---------------------------------------------------------------------===*)
+
+(* binop_precedence - This holds the precedence for each binary operator that is
+ * defined *)
+let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+(* precedence - Get the precedence of the pending binary operator token. *)
+let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+(* primary
+ *   ::= identifier
+ *   ::= numberexpr
+ *   ::= parenexpr *)
+let rec parse_primary = parser
+  (* numberexpr ::= number *)
+  | [< 'Token.Number n >] -> Ast.Number n
+
+  (* parenexpr ::= '(' expression ')' *)
+  | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+  (* identifierexpr
+   *   ::= identifier
+   *   ::= identifier '(' argumentexpr ')' *)
+  | [< 'Token.Ident id; stream >] ->
+      let rec parse_args accumulator = parser
+        | [< e=parse_expr; stream >] ->
+            begin parser
+              | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+              | [< >] -> e :: accumulator
+            end stream
+        | [< >] -> accumulator
+      in
+      let rec parse_ident id = parser
+        (* Call. *)
+        | [< 'Token.Kwd '(';
+             args=parse_args [];
+             'Token.Kwd ')' ?? "expected ')'">] ->
+            Ast.Call (id, Array.of_list (List.rev args))
+
+        (* Simple variable ref. *)
+        | [< >] -> Ast.Variable id
+      in
+      parse_ident id stream
+
+  | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+(* binoprhs
+ *   ::= ('+' primary)* *)
+and parse_bin_rhs expr_prec lhs stream =
+  match Stream.peek stream with
+  (* If this is a binop, find its precedence. *)
+  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+      let token_prec = precedence c in
+
+      (* If this is a binop that binds at least as tightly as the current binop,
+       * consume it, otherwise we are done. *)
+      if token_prec < expr_prec then lhs else begin
+        (* Eat the binop. *)
+        Stream.junk stream;
+
+        (* Parse the primary expression after the binary operator. *)
+        let rhs = parse_primary stream in
+
+        (* Okay, we know this is a binop. *)
+        let rhs =
+          match Stream.peek stream with
+          | Some (Token.Kwd c2) ->
+              (* If BinOp binds less tightly with rhs than the operator after
+               * rhs, let the pending operator take rhs as its lhs. *)
+              let next_prec = precedence c2 in
+              if token_prec < next_prec
+              then parse_bin_rhs (token_prec + 1) rhs stream
+              else rhs
+          | _ -> rhs
+        in
+
+        (* Merge lhs/rhs. *)
+        let lhs = Ast.Binary (c, lhs, rhs) in
+        parse_bin_rhs expr_prec lhs stream
+      end
+  | _ -> lhs
+
+(* expression
+ *   ::= primary binoprhs *)
+and parse_expr = parser
+  | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
+
+(* prototype
+ *   ::= id '(' id* ')' *)
+let parse_prototype =
+  let rec parse_args accumulator = parser
+    | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+    | [< >] -> accumulator
+  in
+
+  parser
+  | [< 'Token.Ident id;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+       args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+      (* success. *)
+      Ast.Prototype (id, Array.of_list (List.rev args))
+
+  | [< >] ->
+      raise (Stream.Error "expected function name in prototype")
+
+(* definition ::= 'def' prototype expression *)
+let parse_definition = parser
+  | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+      Ast.Function (p, e)
+
+(* toplevelexpr ::= expression *)
+let parse_toplevel = parser
+  | [< e=parse_expr >] ->
+      (* Make an anonymous proto. *)
+      Ast.Function (Ast.Prototype ("", [||]), e)
+
+(*  external ::= 'extern' prototype *)
+let parse_extern = parser
+  | [< 'Token.Extern; e=parse_prototype >] -> e
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter4/token.ml b/final/examples/OCaml-Kaleidoscope/Chapter4/token.ml
new file mode 100644
index 00000000000..2ca782e1499
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter4/token.ml
@@ -0,0 +1,15 @@
+(*===----------------------------------------------------------------------===
+ * Lexer Tokens
+ *===----------------------------------------------------------------------===*)
+
+(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+ * these others for known things. *)
+type token =
+  (* commands *)
+  | Def | Extern
+
+  (* primary *)
+  | Ident of string | Number of float
+
+  (* unknown *)
+  | Kwd of char
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter4/toplevel.ml b/final/examples/OCaml-Kaleidoscope/Chapter4/toplevel.ml
new file mode 100644
index 00000000000..01d24ede149
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter4/toplevel.ml
@@ -0,0 +1,49 @@
+(*===----------------------------------------------------------------------===
+ * Top-Level parsing and JIT Driver
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+open Llvm_executionengine
+
+(* top ::= definition | external | expression | ';' *)
+let rec main_loop the_fpm the_execution_engine stream =
+  match Stream.peek stream with
+  | None -> ()
+
+  (* ignore top-level semicolons. *)
+  | Some (Token.Kwd ';') ->
+      Stream.junk stream;
+      main_loop the_fpm the_execution_engine stream
+
+  | Some token ->
+      begin
+        try match token with
+        | Token.Def ->
+            let e = Parser.parse_definition stream in
+            print_endline "parsed a function definition.";
+            dump_value (Codegen.codegen_func the_fpm e);
+        | Token.Extern ->
+            let e = Parser.parse_extern stream in
+            print_endline "parsed an extern.";
+            dump_value (Codegen.codegen_proto e);
+        | _ ->
+            (* Evaluate a top-level expression into an anonymous function. *)
+            let e = Parser.parse_toplevel stream in
+            print_endline "parsed a top-level expr";
+            let the_function = Codegen.codegen_func the_fpm e in
+            dump_value the_function;
+
+            (* JIT the function, returning a function pointer. *)
+            let result = ExecutionEngine.run_function the_function [||]
+              the_execution_engine in
+
+            print_string "Evaluated to ";
+            print_float (GenericValue.as_float Codegen.double_type result);
+            print_newline ();
+        with Stream.Error s | Codegen.Error s ->
+          (* Skip token for error recovery. *)
+          Stream.junk stream;
+          print_endline s;
+      end;
+      print_string "ready> "; flush stdout;
+      main_loop the_fpm the_execution_engine stream
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter4/toy.ml b/final/examples/OCaml-Kaleidoscope/Chapter4/toy.ml
new file mode 100644
index 00000000000..5f9d912499c
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter4/toy.ml
@@ -0,0 +1,53 @@
+(*===----------------------------------------------------------------------===
+ * Main driver code.
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+open Llvm_executionengine
+open Llvm_target
+open Llvm_scalar_opts
+
+let main () =
+  ignore (initialize_native_target ());
+
+  (* Install standard binary operators.
+   * 1 is the lowest precedence. *)
+  Hashtbl.add Parser.binop_precedence '<' 10;
+  Hashtbl.add Parser.binop_precedence '+' 20;
+  Hashtbl.add Parser.binop_precedence '-' 20;
+  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+  (* Prime the first token. *)
+  print_string "ready> "; flush stdout;
+  let stream = Lexer.lex (Stream.of_channel stdin) in
+
+  (* Create the JIT. *)
+  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+  let the_fpm = PassManager.create_function Codegen.the_module in
+
+  (* Set up the optimizer pipeline.  Start with registering info about how the
+   * target lays out data structures. *)
+  TargetData.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+  add_instruction_combination the_fpm;
+
+  (* reassociate expressions. *)
+  add_reassociation the_fpm;
+
+  (* Eliminate Common SubExpressions. *)
+  add_gvn the_fpm;
+
+  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+  add_cfg_simplification the_fpm;
+
+  ignore (PassManager.initialize the_fpm);
+
+  (* Run the main "interpreter loop" now. *)
+  Toplevel.main_loop the_fpm the_execution_engine stream;
+
+  (* Print out all the generated code. *)
+  dump_module Codegen.the_module
+;;
+
+main ()
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter5/Makefile b/final/examples/OCaml-Kaleidoscope/Chapter5/Makefile
new file mode 100644
index 00000000000..f31c10d3c2f
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter5/Makefile
@@ -0,0 +1,25 @@
+##===- examples/OCaml-Kaleidoscope/Chapter5/Makefile -------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+# 
+# This is the makefile for the Objective Caml kaleidoscope tutorial, chapter 5.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../../..
+TOOLNAME := OCaml-Kaleidoscope-Ch5
+EXAMPLE_TOOL := 1
+UsedComponents := core
+UsedOcamLibs := llvm llvm_analysis llvm_executionengine llvm_target \
+	llvm_scalar_opts
+
+OCAMLCFLAGS += -pp camlp4of
+
+ExcludeSources = $(PROJ_SRC_DIR)/myocamlbuild.ml
+
+include $(LEVEL)/bindings/ocaml/Makefile.ocaml
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter5/_tags b/final/examples/OCaml-Kaleidoscope/Chapter5/_tags
new file mode 100644
index 00000000000..7a03dba6672
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter5/_tags
@@ -0,0 +1,4 @@
+<{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+<*.{byte,native}>: g++, use_llvm, use_llvm_analysis
+<*.{byte,native}>: use_llvm_executionengine, use_llvm_target
+<*.{byte,native}>: use_llvm_scalar_opts, use_bindings
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter5/ast.ml b/final/examples/OCaml-Kaleidoscope/Chapter5/ast.ml
new file mode 100644
index 00000000000..8a6703db837
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter5/ast.ml
@@ -0,0 +1,31 @@
+(*===----------------------------------------------------------------------===
+ * Abstract Syntax Tree (aka Parse Tree)
+ *===----------------------------------------------------------------------===*)
+
+(* expr - Base type for all expression nodes. *)
+type expr =
+  (* variant for numeric literals like "1.0". *)
+  | Number of float
+
+  (* variant for referencing a variable, like "a". *)
+  | Variable of string
+
+  (* variant for a binary operator. *)
+  | Binary of char * expr * expr
+
+  (* variant for function calls. *)
+  | Call of string * expr array
+
+  (* variant for if/then/else. *)
+  | If of expr * expr * expr
+
+  (* variant for for/in. *)
+  | For of string * expr * expr * expr option * expr
+
+(* proto - This type represents the "prototype" for a function, which captures
+ * its name, and its argument names (thus implicitly the number of arguments the
+ * function takes). *)
+type proto = Prototype of string * string array
+
+(* func - This type represents a function definition itself. *)
+type func = Function of proto * expr
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter5/bindings.c b/final/examples/OCaml-Kaleidoscope/Chapter5/bindings.c
new file mode 100644
index 00000000000..053513bf0c0
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter5/bindings.c
@@ -0,0 +1,7 @@
+#include <stdio.h>
+
+/* putchard - putchar that takes a double and returns 0. */
+extern double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter5/codegen.ml b/final/examples/OCaml-Kaleidoscope/Chapter5/codegen.ml
new file mode 100644
index 00000000000..e4570a65759
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter5/codegen.ml
@@ -0,0 +1,225 @@
+(*===----------------------------------------------------------------------===
+ * Code Generation
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+
+exception Error of string
+
+let context = global_context ()
+let the_module = create_module context "my cool jit"
+let builder = builder context
+let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+let double_type = double_type context
+
+let rec codegen_expr = function
+  | Ast.Number n -> const_float double_type n
+  | Ast.Variable name ->
+      (try Hashtbl.find named_values name with
+        | Not_found -> raise (Error "unknown variable name"))
+  | Ast.Binary (op, lhs, rhs) ->
+      let lhs_val = codegen_expr lhs in
+      let rhs_val = codegen_expr rhs in
+      begin
+        match op with
+        | '+' -> build_fadd lhs_val rhs_val "addtmp" builder
+        | '-' -> build_fsub lhs_val rhs_val "subtmp" builder
+        | '*' -> build_fmul lhs_val rhs_val "multmp" builder
+        | '<' ->
+            (* Convert bool 0/1 to double 0.0 or 1.0 *)
+            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+            build_uitofp i double_type "booltmp" builder
+        | _ -> raise (Error "invalid binary operator")
+      end
+  | Ast.Call (callee, args) ->
+      (* Look up the name in the module table. *)
+      let callee =
+        match lookup_function callee the_module with
+        | Some callee -> callee
+        | None -> raise (Error "unknown function referenced")
+      in
+      let params = params callee in
+
+      (* If argument mismatch error. *)
+      if Array.length params == Array.length args then () else
+        raise (Error "incorrect # arguments passed");
+      let args = Array.map codegen_expr args in
+      build_call callee args "calltmp" builder
+  | Ast.If (cond, then_, else_) ->
+      let cond = codegen_expr cond in
+
+      (* Convert condition to a bool by comparing equal to 0.0 *)
+      let zero = const_float double_type 0.0 in
+      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
+
+      (* Grab the first block so that we might later add the conditional branch
+       * to it at the end of the function. *)
+      let start_bb = insertion_block builder in
+      let the_function = block_parent start_bb in
+
+      let then_bb = append_block context "then" the_function in
+
+      (* Emit 'then' value. *)
+      position_at_end then_bb builder;
+      let then_val = codegen_expr then_ in
+
+      (* Codegen of 'then' can change the current block, update then_bb for the
+       * phi. We create a new name because one is used for the phi node, and the
+       * other is used for the conditional branch. *)
+      let new_then_bb = insertion_block builder in
+
+      (* Emit 'else' value. *)
+      let else_bb = append_block context "else" the_function in
+      position_at_end else_bb builder;
+      let else_val = codegen_expr else_ in
+
+      (* Codegen of 'else' can change the current block, update else_bb for the
+       * phi. *)
+      let new_else_bb = insertion_block builder in
+
+      (* Emit merge block. *)
+      let merge_bb = append_block context "ifcont" the_function in
+      position_at_end merge_bb builder;
+      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
+      let phi = build_phi incoming "iftmp" builder in
+
+      (* Return to the start block to add the conditional branch. *)
+      position_at_end start_bb builder;
+      ignore (build_cond_br cond_val then_bb else_bb builder);
+
+      (* Set a unconditional branch at the end of the 'then' block and the
+       * 'else' block to the 'merge' block. *)
+      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
+      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
+
+      (* Finally, set the builder to the end of the merge block. *)
+      position_at_end merge_bb builder;
+
+      phi
+  | Ast.For (var_name, start, end_, step, body) ->
+      (* Emit the start code first, without 'variable' in scope. *)
+      let start_val = codegen_expr start in
+
+      (* Make the new basic block for the loop header, inserting after current
+       * block. *)
+      let preheader_bb = insertion_block builder in
+      let the_function = block_parent preheader_bb in
+      let loop_bb = append_block context "loop" the_function in
+
+      (* Insert an explicit fall through from the current block to the
+       * loop_bb. *)
+      ignore (build_br loop_bb builder);
+
+      (* Start insertion in loop_bb. *)
+      position_at_end loop_bb builder;
+
+      (* Start the PHI node with an entry for start. *)
+      let variable = build_phi [(start_val, preheader_bb)] var_name builder in
+
+      (* Within the loop, the variable is defined equal to the PHI node. If it
+       * shadows an existing variable, we have to restore it, so save it
+       * now. *)
+      let old_val =
+        try Some (Hashtbl.find named_values var_name) with Not_found -> None
+      in
+      Hashtbl.add named_values var_name variable;
+
+      (* Emit the body of the loop.  This, like any other expr, can change the
+       * current BB.  Note that we ignore the value computed by the body, but
+       * don't allow an error *)
+      ignore (codegen_expr body);
+
+      (* Emit the step value. *)
+      let step_val =
+        match step with
+        | Some step -> codegen_expr step
+        (* If not specified, use 1.0. *)
+        | None -> const_float double_type 1.0
+      in
+
+      let next_var = build_add variable step_val "nextvar" builder in
+
+      (* Compute the end condition. *)
+      let end_cond = codegen_expr end_ in
+
+      (* Convert condition to a bool by comparing equal to 0.0. *)
+      let zero = const_float double_type 0.0 in
+      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
+
+      (* Create the "after loop" block and insert it. *)
+      let loop_end_bb = insertion_block builder in
+      let after_bb = append_block context "afterloop" the_function in
+
+      (* Insert the conditional branch into the end of loop_end_bb. *)
+      ignore (build_cond_br end_cond loop_bb after_bb builder);
+
+      (* Any new code will be inserted in after_bb. *)
+      position_at_end after_bb builder;
+
+      (* Add a new entry to the PHI node for the backedge. *)
+      add_incoming (next_var, loop_end_bb) variable;
+
+      (* Restore the unshadowed variable. *)
+      begin match old_val with
+      | Some old_val -> Hashtbl.add named_values var_name old_val
+      | None -> ()
+      end;
+
+      (* for expr always returns 0.0. *)
+      const_null double_type
+
+let codegen_proto = function
+  | Ast.Prototype (name, args) ->
+      (* Make the function type: double(double,double) etc. *)
+      let doubles = Array.make (Array.length args) double_type in
+      let ft = function_type double_type doubles in
+      let f =
+        match lookup_function name the_module with
+        | None -> declare_function name ft the_module
+
+        (* If 'f' conflicted, there was already something named 'name'. If it
+         * has a body, don't allow redefinition or reextern. *)
+        | Some f ->
+            (* If 'f' already has a body, reject this. *)
+            if block_begin f <> At_end f then
+              raise (Error "redefinition of function");
+
+            (* If 'f' took a different number of arguments, reject. *)
+            if element_type (type_of f) <> ft then
+              raise (Error "redefinition of function with different # args");
+            f
+      in
+
+      (* Set names for all arguments. *)
+      Array.iteri (fun i a ->
+        let n = args.(i) in
+        set_value_name n a;
+        Hashtbl.add named_values n a;
+      ) (params f);
+      f
+
+let codegen_func the_fpm = function
+  | Ast.Function (proto, body) ->
+      Hashtbl.clear named_values;
+      let the_function = codegen_proto proto in
+
+      (* Create a new basic block to start insertion into. *)
+      let bb = append_block context "entry" the_function in
+      position_at_end bb builder;
+
+      try
+        let ret_val = codegen_expr body in
+
+        (* Finish off the function. *)
+        let _ = build_ret ret_val builder in
+
+        (* Validate the generated code, checking for consistency. *)
+        Llvm_analysis.assert_valid_function the_function;
+
+        (* Optimize the function. *)
+        let _ = PassManager.run_function the_function the_fpm in
+
+        the_function
+      with e ->
+        delete_function the_function;
+        raise e
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter5/lexer.ml b/final/examples/OCaml-Kaleidoscope/Chapter5/lexer.ml
new file mode 100644
index 00000000000..d8c1d563010
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter5/lexer.ml
@@ -0,0 +1,57 @@
+(*===----------------------------------------------------------------------===
+ * Lexer
+ *===----------------------------------------------------------------------===*)
+
+let rec lex = parser
+  (* Skip any whitespace. *)
+  | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+  | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+
+  (* number: [0-9.]+ *)
+  | [< ' ('0' .. '9' as c); stream >] ->
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+
+  (* Comment until end of line. *)
+  | [< ' ('#'); stream >] ->
+      lex_comment stream
+
+  (* Otherwise, just return the character as its ascii value. *)
+  | [< 'c; stream >] ->
+      [< 'Token.Kwd c; lex stream >]
+
+  (* end of stream. *)
+  | [< >] -> [< >]
+
+and lex_number buffer = parser
+  | [< ' ('0' .. '9' | '.' as c); stream >] ->
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+  | [< stream=lex >] ->
+      [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+and lex_ident buffer = parser
+  | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+  | [< stream=lex >] ->
+      match Buffer.contents buffer with
+      | "def" -> [< 'Token.Def; stream >]
+      | "extern" -> [< 'Token.Extern; stream >]
+      | "if" -> [< 'Token.If; stream >]
+      | "then" -> [< 'Token.Then; stream >]
+      | "else" -> [< 'Token.Else; stream >]
+      | "for" -> [< 'Token.For; stream >]
+      | "in" -> [< 'Token.In; stream >]
+      | id -> [< 'Token.Ident id; stream >]
+
+and lex_comment = parser
+  | [< ' ('\n'); stream=lex >] -> stream
+  | [< 'c; e=lex_comment >] -> e
+  | [< >] -> [< >]
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter5/myocamlbuild.ml b/final/examples/OCaml-Kaleidoscope/Chapter5/myocamlbuild.ml
new file mode 100644
index 00000000000..ff42664c43b
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter5/myocamlbuild.ml
@@ -0,0 +1,10 @@
+open Ocamlbuild_plugin;;
+
+ocaml_lib ~extern:true "llvm";;
+ocaml_lib ~extern:true "llvm_analysis";;
+ocaml_lib ~extern:true "llvm_executionengine";;
+ocaml_lib ~extern:true "llvm_target";;
+ocaml_lib ~extern:true "llvm_scalar_opts";;
+
+flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
+dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter5/parser.ml b/final/examples/OCaml-Kaleidoscope/Chapter5/parser.ml
new file mode 100644
index 00000000000..bfb4f168458
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter5/parser.ml
@@ -0,0 +1,158 @@
+(*===---------------------------------------------------------------------===
+ * Parser
+ *===---------------------------------------------------------------------===*)
+
+(* binop_precedence - This holds the precedence for each binary operator that is
+ * defined *)
+let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+(* precedence - Get the precedence of the pending binary operator token. *)
+let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+(* primary
+ *   ::= identifier
+ *   ::= numberexpr
+ *   ::= parenexpr
+ *   ::= ifexpr
+ *   ::= forexpr *)
+let rec parse_primary = parser
+  (* numberexpr ::= number *)
+  | [< 'Token.Number n >] -> Ast.Number n
+
+  (* parenexpr ::= '(' expression ')' *)
+  | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+  (* identifierexpr
+   *   ::= identifier
+   *   ::= identifier '(' argumentexpr ')' *)
+  | [< 'Token.Ident id; stream >] ->
+      let rec parse_args accumulator = parser
+        | [< e=parse_expr; stream >] ->
+            begin parser
+              | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+              | [< >] -> e :: accumulator
+            end stream
+        | [< >] -> accumulator
+      in
+      let rec parse_ident id = parser
+        (* Call. *)
+        | [< 'Token.Kwd '(';
+             args=parse_args [];
+             'Token.Kwd ')' ?? "expected ')'">] ->
+            Ast.Call (id, Array.of_list (List.rev args))
+
+        (* Simple variable ref. *)
+        | [< >] -> Ast.Variable id
+      in
+      parse_ident id stream
+
+  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+  | [< 'Token.If; c=parse_expr;
+       'Token.Then ?? "expected 'then'"; t=parse_expr;
+       'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
+      Ast.If (c, t, e)
+
+  (* forexpr
+        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
+  | [< 'Token.For;
+       'Token.Ident id ?? "expected identifier after for";
+       'Token.Kwd '=' ?? "expected '=' after for";
+       stream >] ->
+      begin parser
+        | [<
+             start=parse_expr;
+             'Token.Kwd ',' ?? "expected ',' after for";
+             end_=parse_expr;
+             stream >] ->
+            let step =
+              begin parser
+              | [< 'Token.Kwd ','; step=parse_expr >] -> Some step
+              | [< >] -> None
+              end stream
+            in
+            begin parser
+            | [< 'Token.In; body=parse_expr >] ->
+                Ast.For (id, start, end_, step, body)
+            | [< >] ->
+                raise (Stream.Error "expected 'in' after for")
+            end stream
+        | [< >] ->
+            raise (Stream.Error "expected '=' after for")
+      end stream
+
+  | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+(* binoprhs
+ *   ::= ('+' primary)* *)
+and parse_bin_rhs expr_prec lhs stream =
+  match Stream.peek stream with
+  (* If this is a binop, find its precedence. *)
+  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+      let token_prec = precedence c in
+
+      (* If this is a binop that binds at least as tightly as the current binop,
+       * consume it, otherwise we are done. *)
+      if token_prec < expr_prec then lhs else begin
+        (* Eat the binop. *)
+        Stream.junk stream;
+
+        (* Parse the primary expression after the binary operator. *)
+        let rhs = parse_primary stream in
+
+        (* Okay, we know this is a binop. *)
+        let rhs =
+          match Stream.peek stream with
+          | Some (Token.Kwd c2) ->
+              (* If BinOp binds less tightly with rhs than the operator after
+               * rhs, let the pending operator take rhs as its lhs. *)
+              let next_prec = precedence c2 in
+              if token_prec < next_prec
+              then parse_bin_rhs (token_prec + 1) rhs stream
+              else rhs
+          | _ -> rhs
+        in
+
+        (* Merge lhs/rhs. *)
+        let lhs = Ast.Binary (c, lhs, rhs) in
+        parse_bin_rhs expr_prec lhs stream
+      end
+  | _ -> lhs
+
+(* expression
+ *   ::= primary binoprhs *)
+and parse_expr = parser
+  | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
+
+(* prototype
+ *   ::= id '(' id* ')' *)
+let parse_prototype =
+  let rec parse_args accumulator = parser
+    | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+    | [< >] -> accumulator
+  in
+
+  parser
+  | [< 'Token.Ident id;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+       args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+      (* success. *)
+      Ast.Prototype (id, Array.of_list (List.rev args))
+
+  | [< >] ->
+      raise (Stream.Error "expected function name in prototype")
+
+(* definition ::= 'def' prototype expression *)
+let parse_definition = parser
+  | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+      Ast.Function (p, e)
+
+(* toplevelexpr ::= expression *)
+let parse_toplevel = parser
+  | [< e=parse_expr >] ->
+      (* Make an anonymous proto. *)
+      Ast.Function (Ast.Prototype ("", [||]), e)
+
+(*  external ::= 'extern' prototype *)
+let parse_extern = parser
+  | [< 'Token.Extern; e=parse_prototype >] -> e
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter5/token.ml b/final/examples/OCaml-Kaleidoscope/Chapter5/token.ml
new file mode 100644
index 00000000000..5eb502fb894
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter5/token.ml
@@ -0,0 +1,19 @@
+(*===----------------------------------------------------------------------===
+ * Lexer Tokens
+ *===----------------------------------------------------------------------===*)
+
+(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+ * these others for known things. *)
+type token =
+  (* commands *)
+  | Def | Extern
+
+  (* primary *)
+  | Ident of string | Number of float
+
+  (* unknown *)
+  | Kwd of char
+
+  (* control *)
+  | If | Then | Else
+  | For | In
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter5/toplevel.ml b/final/examples/OCaml-Kaleidoscope/Chapter5/toplevel.ml
new file mode 100644
index 00000000000..01d24ede149
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter5/toplevel.ml
@@ -0,0 +1,49 @@
+(*===----------------------------------------------------------------------===
+ * Top-Level parsing and JIT Driver
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+open Llvm_executionengine
+
+(* top ::= definition | external | expression | ';' *)
+let rec main_loop the_fpm the_execution_engine stream =
+  match Stream.peek stream with
+  | None -> ()
+
+  (* ignore top-level semicolons. *)
+  | Some (Token.Kwd ';') ->
+      Stream.junk stream;
+      main_loop the_fpm the_execution_engine stream
+
+  | Some token ->
+      begin
+        try match token with
+        | Token.Def ->
+            let e = Parser.parse_definition stream in
+            print_endline "parsed a function definition.";
+            dump_value (Codegen.codegen_func the_fpm e);
+        | Token.Extern ->
+            let e = Parser.parse_extern stream in
+            print_endline "parsed an extern.";
+            dump_value (Codegen.codegen_proto e);
+        | _ ->
+            (* Evaluate a top-level expression into an anonymous function. *)
+            let e = Parser.parse_toplevel stream in
+            print_endline "parsed a top-level expr";
+            let the_function = Codegen.codegen_func the_fpm e in
+            dump_value the_function;
+
+            (* JIT the function, returning a function pointer. *)
+            let result = ExecutionEngine.run_function the_function [||]
+              the_execution_engine in
+
+            print_string "Evaluated to ";
+            print_float (GenericValue.as_float Codegen.double_type result);
+            print_newline ();
+        with Stream.Error s | Codegen.Error s ->
+          (* Skip token for error recovery. *)
+          Stream.junk stream;
+          print_endline s;
+      end;
+      print_string "ready> "; flush stdout;
+      main_loop the_fpm the_execution_engine stream
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter5/toy.ml b/final/examples/OCaml-Kaleidoscope/Chapter5/toy.ml
new file mode 100644
index 00000000000..5f9d912499c
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter5/toy.ml
@@ -0,0 +1,53 @@
+(*===----------------------------------------------------------------------===
+ * Main driver code.
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+open Llvm_executionengine
+open Llvm_target
+open Llvm_scalar_opts
+
+let main () =
+  ignore (initialize_native_target ());
+
+  (* Install standard binary operators.
+   * 1 is the lowest precedence. *)
+  Hashtbl.add Parser.binop_precedence '<' 10;
+  Hashtbl.add Parser.binop_precedence '+' 20;
+  Hashtbl.add Parser.binop_precedence '-' 20;
+  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+  (* Prime the first token. *)
+  print_string "ready> "; flush stdout;
+  let stream = Lexer.lex (Stream.of_channel stdin) in
+
+  (* Create the JIT. *)
+  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+  let the_fpm = PassManager.create_function Codegen.the_module in
+
+  (* Set up the optimizer pipeline.  Start with registering info about how the
+   * target lays out data structures. *)
+  TargetData.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+  add_instruction_combination the_fpm;
+
+  (* reassociate expressions. *)
+  add_reassociation the_fpm;
+
+  (* Eliminate Common SubExpressions. *)
+  add_gvn the_fpm;
+
+  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+  add_cfg_simplification the_fpm;
+
+  ignore (PassManager.initialize the_fpm);
+
+  (* Run the main "interpreter loop" now. *)
+  Toplevel.main_loop the_fpm the_execution_engine stream;
+
+  (* Print out all the generated code. *)
+  dump_module Codegen.the_module
+;;
+
+main ()
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter6/Makefile b/final/examples/OCaml-Kaleidoscope/Chapter6/Makefile
new file mode 100644
index 00000000000..21f0c53df4b
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter6/Makefile
@@ -0,0 +1,34 @@
+##===- examples/OCaml-Kaleidoscope/Chapter6/Makefile -------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+# 
+# This is the makefile for the Objective Caml kaleidoscope tutorial, chapter 6.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../../..
+TOOLNAME := OCaml-Kaleidoscope-Ch6
+EXAMPLE_TOOL := 1
+UsedComponents := core
+UsedOcamLibs := llvm llvm_analysis llvm_executionengine llvm_target \
+	llvm_scalar_opts
+
+OCAMLCFLAGS += -pp camlp4of
+
+OcamlSources1 = \
+	$(PROJ_SRC_DIR)/ast.ml \
+	$(PROJ_SRC_DIR)/parser.ml \
+	$(PROJ_SRC_DIR)/codegen.ml \
+	$(PROJ_SRC_DIR)/lexer.ml \
+	$(PROJ_SRC_DIR)/token.ml \
+	$(PROJ_SRC_DIR)/toplevel.ml \
+	$(PROJ_SRC_DIR)/toy.ml
+
+ExcludeSources = $(PROJ_SRC_DIR)/myocamlbuild.ml
+
+include $(LEVEL)/bindings/ocaml/Makefile.ocaml
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter6/_tags b/final/examples/OCaml-Kaleidoscope/Chapter6/_tags
new file mode 100644
index 00000000000..7a03dba6672
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter6/_tags
@@ -0,0 +1,4 @@
+<{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+<*.{byte,native}>: g++, use_llvm, use_llvm_analysis
+<*.{byte,native}>: use_llvm_executionengine, use_llvm_target
+<*.{byte,native}>: use_llvm_scalar_opts, use_bindings
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter6/ast.ml b/final/examples/OCaml-Kaleidoscope/Chapter6/ast.ml
new file mode 100644
index 00000000000..99088cfd2f5
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter6/ast.ml
@@ -0,0 +1,36 @@
+(*===----------------------------------------------------------------------===
+ * Abstract Syntax Tree (aka Parse Tree)
+ *===----------------------------------------------------------------------===*)
+
+(* expr - Base type for all expression nodes. *)
+type expr =
+  (* variant for numeric literals like "1.0". *)
+  | Number of float
+
+  (* variant for referencing a variable, like "a". *)
+  | Variable of string
+
+  (* variant for a unary operator. *)
+  | Unary of char * expr
+
+  (* variant for a binary operator. *)
+  | Binary of char * expr * expr
+
+  (* variant for function calls. *)
+  | Call of string * expr array
+
+  (* variant for if/then/else. *)
+  | If of expr * expr * expr
+
+  (* variant for for/in. *)
+  | For of string * expr * expr * expr option * expr
+
+(* proto - This type represents the "prototype" for a function, which captures
+ * its name, and its argument names (thus implicitly the number of arguments the
+ * function takes). *)
+type proto =
+  | Prototype of string * string array
+  | BinOpPrototype of string * string array * int
+
+(* func - This type represents a function definition itself. *)
+type func = Function of proto * expr
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter6/bindings.c b/final/examples/OCaml-Kaleidoscope/Chapter6/bindings.c
new file mode 100644
index 00000000000..90faed15813
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter6/bindings.c
@@ -0,0 +1,13 @@
+#include <stdio.h>
+
+/* putchard - putchar that takes a double and returns 0. */
+extern double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+/* printd - printf that takes a double prints it as "%f\n", returning 0. */
+extern double printd(double X) {
+  printf("%f\n", X);
+  return 0;
+}
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter6/codegen.ml b/final/examples/OCaml-Kaleidoscope/Chapter6/codegen.ml
new file mode 100644
index 00000000000..96674359440
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter6/codegen.ml
@@ -0,0 +1,251 @@
+(*===----------------------------------------------------------------------===
+ * Code Generation
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+
+exception Error of string
+
+let context = global_context ()
+let the_module = create_module context "my cool jit"
+let builder = builder context
+let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+let double_type = double_type context
+
+let rec codegen_expr = function
+  | Ast.Number n -> const_float double_type n
+  | Ast.Variable name ->
+      (try Hashtbl.find named_values name with
+        | Not_found -> raise (Error "unknown variable name"))
+  | Ast.Unary (op, operand) ->
+      let operand = codegen_expr operand in
+      let callee = "unary" ^ (String.make 1 op) in
+      let callee =
+        match lookup_function callee the_module with
+        | Some callee -> callee
+        | None -> raise (Error "unknown unary operator")
+      in
+      build_call callee [|operand|] "unop" builder
+  | Ast.Binary (op, lhs, rhs) ->
+      let lhs_val = codegen_expr lhs in
+      let rhs_val = codegen_expr rhs in
+      begin
+        match op with
+        | '+' -> build_fadd lhs_val rhs_val "addtmp" builder
+        | '-' -> build_fsub lhs_val rhs_val "subtmp" builder
+        | '*' -> build_fmul lhs_val rhs_val "multmp" builder
+        | '<' ->
+            (* Convert bool 0/1 to double 0.0 or 1.0 *)
+            let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+            build_uitofp i double_type "booltmp" builder
+        | _ ->
+            (* If it wasn't a builtin binary operator, it must be a user defined
+             * one. Emit a call to it. *)
+            let callee = "binary" ^ (String.make 1 op) in
+            let callee =
+              match lookup_function callee the_module with
+              | Some callee -> callee
+              | None -> raise (Error "binary operator not found!")
+            in
+            build_call callee [|lhs_val; rhs_val|] "binop" builder
+      end
+  | Ast.Call (callee, args) ->
+      (* Look up the name in the module table. *)
+      let callee =
+        match lookup_function callee the_module with
+        | Some callee -> callee
+        | None -> raise (Error "unknown function referenced")
+      in
+      let params = params callee in
+
+      (* If argument mismatch error. *)
+      if Array.length params == Array.length args then () else
+        raise (Error "incorrect # arguments passed");
+      let args = Array.map codegen_expr args in
+      build_call callee args "calltmp" builder
+  | Ast.If (cond, then_, else_) ->
+      let cond = codegen_expr cond in
+
+      (* Convert condition to a bool by comparing equal to 0.0 *)
+      let zero = const_float double_type 0.0 in
+      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
+
+      (* Grab the first block so that we might later add the conditional branch
+       * to it at the end of the function. *)
+      let start_bb = insertion_block builder in
+      let the_function = block_parent start_bb in
+
+      let then_bb = append_block context "then" the_function in
+
+      (* Emit 'then' value. *)
+      position_at_end then_bb builder;
+      let then_val = codegen_expr then_ in
+
+      (* Codegen of 'then' can change the current block, update then_bb for the
+       * phi. We create a new name because one is used for the phi node, and the
+       * other is used for the conditional branch. *)
+      let new_then_bb = insertion_block builder in
+
+      (* Emit 'else' value. *)
+      let else_bb = append_block context "else" the_function in
+      position_at_end else_bb builder;
+      let else_val = codegen_expr else_ in
+
+      (* Codegen of 'else' can change the current block, update else_bb for the
+       * phi. *)
+      let new_else_bb = insertion_block builder in
+
+      (* Emit merge block. *)
+      let merge_bb = append_block context "ifcont" the_function in
+      position_at_end merge_bb builder;
+      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
+      let phi = build_phi incoming "iftmp" builder in
+
+      (* Return to the start block to add the conditional branch. *)
+      position_at_end start_bb builder;
+      ignore (build_cond_br cond_val then_bb else_bb builder);
+
+      (* Set a unconditional branch at the end of the 'then' block and the
+       * 'else' block to the 'merge' block. *)
+      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
+      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
+
+      (* Finally, set the builder to the end of the merge block. *)
+      position_at_end merge_bb builder;
+
+      phi
+  | Ast.For (var_name, start, end_, step, body) ->
+      (* Emit the start code first, without 'variable' in scope. *)
+      let start_val = codegen_expr start in
+
+      (* Make the new basic block for the loop header, inserting after current
+       * block. *)
+      let preheader_bb = insertion_block builder in
+      let the_function = block_parent preheader_bb in
+      let loop_bb = append_block context "loop" the_function in
+
+      (* Insert an explicit fall through from the current block to the
+       * loop_bb. *)
+      ignore (build_br loop_bb builder);
+
+      (* Start insertion in loop_bb. *)
+      position_at_end loop_bb builder;
+
+      (* Start the PHI node with an entry for start. *)
+      let variable = build_phi [(start_val, preheader_bb)] var_name builder in
+
+      (* Within the loop, the variable is defined equal to the PHI node. If it
+       * shadows an existing variable, we have to restore it, so save it
+       * now. *)
+      let old_val =
+        try Some (Hashtbl.find named_values var_name) with Not_found -> None
+      in
+      Hashtbl.add named_values var_name variable;
+
+      (* Emit the body of the loop.  This, like any other expr, can change the
+       * current BB.  Note that we ignore the value computed by the body, but
+       * don't allow an error *)
+      ignore (codegen_expr body);
+
+      (* Emit the step value. *)
+      let step_val =
+        match step with
+        | Some step -> codegen_expr step
+        (* If not specified, use 1.0. *)
+        | None -> const_float double_type 1.0
+      in
+
+      let next_var = build_add variable step_val "nextvar" builder in
+
+      (* Compute the end condition. *)
+      let end_cond = codegen_expr end_ in
+
+      (* Convert condition to a bool by comparing equal to 0.0. *)
+      let zero = const_float double_type 0.0 in
+      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
+
+      (* Create the "after loop" block and insert it. *)
+      let loop_end_bb = insertion_block builder in
+      let after_bb = append_block context "afterloop" the_function in
+
+      (* Insert the conditional branch into the end of loop_end_bb. *)
+      ignore (build_cond_br end_cond loop_bb after_bb builder);
+
+      (* Any new code will be inserted in after_bb. *)
+      position_at_end after_bb builder;
+
+      (* Add a new entry to the PHI node for the backedge. *)
+      add_incoming (next_var, loop_end_bb) variable;
+
+      (* Restore the unshadowed variable. *)
+      begin match old_val with
+      | Some old_val -> Hashtbl.add named_values var_name old_val
+      | None -> ()
+      end;
+
+      (* for expr always returns 0.0. *)
+      const_null double_type
+
+let codegen_proto = function
+  | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) ->
+      (* Make the function type: double(double,double) etc. *)
+      let doubles = Array.make (Array.length args) double_type in
+      let ft = function_type double_type doubles in
+      let f =
+        match lookup_function name the_module with
+        | None -> declare_function name ft the_module
+
+        (* If 'f' conflicted, there was already something named 'name'. If it
+         * has a body, don't allow redefinition or reextern. *)
+        | Some f ->
+            (* If 'f' already has a body, reject this. *)
+            if block_begin f <> At_end f then
+              raise (Error "redefinition of function");
+
+            (* If 'f' took a different number of arguments, reject. *)
+            if element_type (type_of f) <> ft then
+              raise (Error "redefinition of function with different # args");
+            f
+      in
+
+      (* Set names for all arguments. *)
+      Array.iteri (fun i a ->
+        let n = args.(i) in
+        set_value_name n a;
+        Hashtbl.add named_values n a;
+      ) (params f);
+      f
+
+let codegen_func the_fpm = function
+  | Ast.Function (proto, body) ->
+      Hashtbl.clear named_values;
+      let the_function = codegen_proto proto in
+
+      (* If this is an operator, install it. *)
+      begin match proto with
+      | Ast.BinOpPrototype (name, args, prec) ->
+          let op = name.[String.length name - 1] in
+          Hashtbl.add Parser.binop_precedence op prec;
+      | _ -> ()
+      end;
+
+      (* Create a new basic block to start insertion into. *)
+      let bb = append_block context "entry" the_function in
+      position_at_end bb builder;
+
+      try
+        let ret_val = codegen_expr body in
+
+        (* Finish off the function. *)
+        let _ = build_ret ret_val builder in
+
+        (* Validate the generated code, checking for consistency. *)
+        Llvm_analysis.assert_valid_function the_function;
+
+        (* Optimize the function. *)
+        let _ = PassManager.run_function the_function the_fpm in
+
+        the_function
+      with e ->
+        delete_function the_function;
+        raise e
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter6/lexer.ml b/final/examples/OCaml-Kaleidoscope/Chapter6/lexer.ml
new file mode 100644
index 00000000000..5fe9da4f16c
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter6/lexer.ml
@@ -0,0 +1,59 @@
+(*===----------------------------------------------------------------------===
+ * Lexer
+ *===----------------------------------------------------------------------===*)
+
+let rec lex = parser
+  (* Skip any whitespace. *)
+  | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+  | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+
+  (* number: [0-9.]+ *)
+  | [< ' ('0' .. '9' as c); stream >] ->
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+
+  (* Comment until end of line. *)
+  | [< ' ('#'); stream >] ->
+      lex_comment stream
+
+  (* Otherwise, just return the character as its ascii value. *)
+  | [< 'c; stream >] ->
+      [< 'Token.Kwd c; lex stream >]
+
+  (* end of stream. *)
+  | [< >] -> [< >]
+
+and lex_number buffer = parser
+  | [< ' ('0' .. '9' | '.' as c); stream >] ->
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+  | [< stream=lex >] ->
+      [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+and lex_ident buffer = parser
+  | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+  | [< stream=lex >] ->
+      match Buffer.contents buffer with
+      | "def" -> [< 'Token.Def; stream >]
+      | "extern" -> [< 'Token.Extern; stream >]
+      | "if" -> [< 'Token.If; stream >]
+      | "then" -> [< 'Token.Then; stream >]
+      | "else" -> [< 'Token.Else; stream >]
+      | "for" -> [< 'Token.For; stream >]
+      | "in" -> [< 'Token.In; stream >]
+      | "binary" -> [< 'Token.Binary; stream >]
+      | "unary" -> [< 'Token.Unary; stream >]
+      | id -> [< 'Token.Ident id; stream >]
+
+and lex_comment = parser
+  | [< ' ('\n'); stream=lex >] -> stream
+  | [< 'c; e=lex_comment >] -> e
+  | [< >] -> [< >]
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter6/myocamlbuild.ml b/final/examples/OCaml-Kaleidoscope/Chapter6/myocamlbuild.ml
new file mode 100644
index 00000000000..54d3fd97709
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter6/myocamlbuild.ml
@@ -0,0 +1,10 @@
+open Ocamlbuild_plugin;;
+
+ocaml_lib ~extern:true "llvm";;
+ocaml_lib ~extern:true "llvm_analysis";;
+ocaml_lib ~extern:true "llvm_executionengine";;
+ocaml_lib ~extern:true "llvm_target";;
+ocaml_lib ~extern:true "llvm_scalar_opts";;
+
+flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);;
+dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter6/parser.ml b/final/examples/OCaml-Kaleidoscope/Chapter6/parser.ml
new file mode 100644
index 00000000000..da443c5bb68
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter6/parser.ml
@@ -0,0 +1,195 @@
+(*===---------------------------------------------------------------------===
+ * Parser
+ *===---------------------------------------------------------------------===*)
+
+(* binop_precedence - This holds the precedence for each binary operator that is
+ * defined *)
+let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+(* precedence - Get the precedence of the pending binary operator token. *)
+let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+(* primary
+ *   ::= identifier
+ *   ::= numberexpr
+ *   ::= parenexpr
+ *   ::= ifexpr
+ *   ::= forexpr *)
+let rec parse_primary = parser
+  (* numberexpr ::= number *)
+  | [< 'Token.Number n >] -> Ast.Number n
+
+  (* parenexpr ::= '(' expression ')' *)
+  | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+  (* identifierexpr
+   *   ::= identifier
+   *   ::= identifier '(' argumentexpr ')' *)
+  | [< 'Token.Ident id; stream >] ->
+      let rec parse_args accumulator = parser
+        | [< e=parse_expr; stream >] ->
+            begin parser
+              | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+              | [< >] -> e :: accumulator
+            end stream
+        | [< >] -> accumulator
+      in
+      let rec parse_ident id = parser
+        (* Call. *)
+        | [< 'Token.Kwd '(';
+             args=parse_args [];
+             'Token.Kwd ')' ?? "expected ')'">] ->
+            Ast.Call (id, Array.of_list (List.rev args))
+
+        (* Simple variable ref. *)
+        | [< >] -> Ast.Variable id
+      in
+      parse_ident id stream
+
+  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+  | [< 'Token.If; c=parse_expr;
+       'Token.Then ?? "expected 'then'"; t=parse_expr;
+       'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
+      Ast.If (c, t, e)
+
+  (* forexpr
+        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
+  | [< 'Token.For;
+       'Token.Ident id ?? "expected identifier after for";
+       'Token.Kwd '=' ?? "expected '=' after for";
+       stream >] ->
+      begin parser
+        | [<
+             start=parse_expr;
+             'Token.Kwd ',' ?? "expected ',' after for";
+             end_=parse_expr;
+             stream >] ->
+            let step =
+              begin parser
+              | [< 'Token.Kwd ','; step=parse_expr >] -> Some step
+              | [< >] -> None
+              end stream
+            in
+            begin parser
+            | [< 'Token.In; body=parse_expr >] ->
+                Ast.For (id, start, end_, step, body)
+            | [< >] ->
+                raise (Stream.Error "expected 'in' after for")
+            end stream
+        | [< >] ->
+            raise (Stream.Error "expected '=' after for")
+      end stream
+
+  | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+(* unary
+ *   ::= primary
+ *   ::= '!' unary *)
+and parse_unary = parser
+  (* If this is a unary operator, read it. *)
+  | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] ->
+      Ast.Unary (op, operand)
+
+  (* If the current token is not an operator, it must be a primary expr. *)
+  | [< stream >] -> parse_primary stream
+
+(* binoprhs
+ *   ::= ('+' primary)* *)
+and parse_bin_rhs expr_prec lhs stream =
+  match Stream.peek stream with
+  (* If this is a binop, find its precedence. *)
+  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+      let token_prec = precedence c in
+
+      (* If this is a binop that binds at least as tightly as the current binop,
+       * consume it, otherwise we are done. *)
+      if token_prec < expr_prec then lhs else begin
+        (* Eat the binop. *)
+        Stream.junk stream;
+
+        (* Parse the unary expression after the binary operator. *)
+        let rhs = parse_unary stream in
+
+        (* Okay, we know this is a binop. *)
+        let rhs =
+          match Stream.peek stream with
+          | Some (Token.Kwd c2) ->
+              (* If BinOp binds less tightly with rhs than the operator after
+               * rhs, let the pending operator take rhs as its lhs. *)
+              let next_prec = precedence c2 in
+              if token_prec < next_prec
+              then parse_bin_rhs (token_prec + 1) rhs stream
+              else rhs
+          | _ -> rhs
+        in
+
+        (* Merge lhs/rhs. *)
+        let lhs = Ast.Binary (c, lhs, rhs) in
+        parse_bin_rhs expr_prec lhs stream
+      end
+  | _ -> lhs
+
+(* expression
+ *   ::= primary binoprhs *)
+and parse_expr = parser
+  | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream
+
+(* prototype
+ *   ::= id '(' id* ')'
+ *   ::= binary LETTER number? (id, id)
+ *   ::= unary LETTER number? (id) *)
+let parse_prototype =
+  let rec parse_args accumulator = parser
+    | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+    | [< >] -> accumulator
+  in
+  let parse_operator = parser
+    | [< 'Token.Unary >] -> "unary", 1
+    | [< 'Token.Binary >] -> "binary", 2
+  in
+  let parse_binary_precedence = parser
+    | [< 'Token.Number n >] -> int_of_float n
+    | [< >] -> 30
+  in
+  parser
+  | [< 'Token.Ident id;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+       args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+      (* success. *)
+      Ast.Prototype (id, Array.of_list (List.rev args))
+  | [< (prefix, kind)=parse_operator;
+       'Token.Kwd op ?? "expected an operator";
+       (* Read the precedence if present. *)
+       binary_precedence=parse_binary_precedence;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+        args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+      let name = prefix ^ (String.make 1 op) in
+      let args = Array.of_list (List.rev args) in
+
+      (* Verify right number of arguments for operator. *)
+      if Array.length args != kind
+      then raise (Stream.Error "invalid number of operands for operator")
+      else
+        if kind == 1 then
+          Ast.Prototype (name, args)
+        else
+          Ast.BinOpPrototype (name, args, binary_precedence)
+  | [< >] ->
+      raise (Stream.Error "expected function name in prototype")
+
+(* definition ::= 'def' prototype expression *)
+let parse_definition = parser
+  | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+      Ast.Function (p, e)
+
+(* toplevelexpr ::= expression *)
+let parse_toplevel = parser
+  | [< e=parse_expr >] ->
+      (* Make an anonymous proto. *)
+      Ast.Function (Ast.Prototype ("", [||]), e)
+
+(*  external ::= 'extern' prototype *)
+let parse_extern = parser
+  | [< 'Token.Extern; e=parse_prototype >] -> e
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter6/token.ml b/final/examples/OCaml-Kaleidoscope/Chapter6/token.ml
new file mode 100644
index 00000000000..c7a5f95092d
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter6/token.ml
@@ -0,0 +1,22 @@
+(*===----------------------------------------------------------------------===
+ * Lexer Tokens
+ *===----------------------------------------------------------------------===*)
+
+(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+ * these others for known things. *)
+type token =
+  (* commands *)
+  | Def | Extern
+
+  (* primary *)
+  | Ident of string | Number of float
+
+  (* unknown *)
+  | Kwd of char
+
+  (* control *)
+  | If | Then | Else
+  | For | In
+
+  (* operators *)
+  | Binary | Unary
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter6/toplevel.ml b/final/examples/OCaml-Kaleidoscope/Chapter6/toplevel.ml
new file mode 100644
index 00000000000..01d24ede149
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter6/toplevel.ml
@@ -0,0 +1,49 @@
+(*===----------------------------------------------------------------------===
+ * Top-Level parsing and JIT Driver
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+open Llvm_executionengine
+
+(* top ::= definition | external | expression | ';' *)
+let rec main_loop the_fpm the_execution_engine stream =
+  match Stream.peek stream with
+  | None -> ()
+
+  (* ignore top-level semicolons. *)
+  | Some (Token.Kwd ';') ->
+      Stream.junk stream;
+      main_loop the_fpm the_execution_engine stream
+
+  | Some token ->
+      begin
+        try match token with
+        | Token.Def ->
+            let e = Parser.parse_definition stream in
+            print_endline "parsed a function definition.";
+            dump_value (Codegen.codegen_func the_fpm e);
+        | Token.Extern ->
+            let e = Parser.parse_extern stream in
+            print_endline "parsed an extern.";
+            dump_value (Codegen.codegen_proto e);
+        | _ ->
+            (* Evaluate a top-level expression into an anonymous function. *)
+            let e = Parser.parse_toplevel stream in
+            print_endline "parsed a top-level expr";
+            let the_function = Codegen.codegen_func the_fpm e in
+            dump_value the_function;
+
+            (* JIT the function, returning a function pointer. *)
+            let result = ExecutionEngine.run_function the_function [||]
+              the_execution_engine in
+
+            print_string "Evaluated to ";
+            print_float (GenericValue.as_float Codegen.double_type result);
+            print_newline ();
+        with Stream.Error s | Codegen.Error s ->
+          (* Skip token for error recovery. *)
+          Stream.junk stream;
+          print_endline s;
+      end;
+      print_string "ready> "; flush stdout;
+      main_loop the_fpm the_execution_engine stream
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter6/toy.ml b/final/examples/OCaml-Kaleidoscope/Chapter6/toy.ml
new file mode 100644
index 00000000000..5f9d912499c
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter6/toy.ml
@@ -0,0 +1,53 @@
+(*===----------------------------------------------------------------------===
+ * Main driver code.
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+open Llvm_executionengine
+open Llvm_target
+open Llvm_scalar_opts
+
+let main () =
+  ignore (initialize_native_target ());
+
+  (* Install standard binary operators.
+   * 1 is the lowest precedence. *)
+  Hashtbl.add Parser.binop_precedence '<' 10;
+  Hashtbl.add Parser.binop_precedence '+' 20;
+  Hashtbl.add Parser.binop_precedence '-' 20;
+  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+  (* Prime the first token. *)
+  print_string "ready> "; flush stdout;
+  let stream = Lexer.lex (Stream.of_channel stdin) in
+
+  (* Create the JIT. *)
+  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+  let the_fpm = PassManager.create_function Codegen.the_module in
+
+  (* Set up the optimizer pipeline.  Start with registering info about how the
+   * target lays out data structures. *)
+  TargetData.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+  add_instruction_combination the_fpm;
+
+  (* reassociate expressions. *)
+  add_reassociation the_fpm;
+
+  (* Eliminate Common SubExpressions. *)
+  add_gvn the_fpm;
+
+  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+  add_cfg_simplification the_fpm;
+
+  ignore (PassManager.initialize the_fpm);
+
+  (* Run the main "interpreter loop" now. *)
+  Toplevel.main_loop the_fpm the_execution_engine stream;
+
+  (* Print out all the generated code. *)
+  dump_module Codegen.the_module
+;;
+
+main ()
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter7/Makefile b/final/examples/OCaml-Kaleidoscope/Chapter7/Makefile
new file mode 100644
index 00000000000..99686e17ea8
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter7/Makefile
@@ -0,0 +1,34 @@
+##===- examples/OCaml-Kaleidoscope/Chapter7/Makefile -------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+# 
+# This is the makefile for the Objective Caml kaleidoscope tutorial, chapter 7.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../../..
+TOOLNAME := OCaml-Kaleidoscope-Ch7
+EXAMPLE_TOOL := 1
+UsedComponents := core
+UsedOcamLibs := llvm llvm_analysis llvm_executionengine llvm_target \
+	llvm_scalar_opts
+
+OCAMLCFLAGS += -pp camlp4of
+
+OcamlSources1 = \
+	$(PROJ_SRC_DIR)/ast.ml \
+	$(PROJ_SRC_DIR)/parser.ml \
+	$(PROJ_SRC_DIR)/codegen.ml \
+	$(PROJ_SRC_DIR)/lexer.ml \
+	$(PROJ_SRC_DIR)/token.ml \
+	$(PROJ_SRC_DIR)/toplevel.ml \
+	$(PROJ_SRC_DIR)/toy.ml
+
+ExcludeSources = $(PROJ_SRC_DIR)/myocamlbuild.ml
+
+include $(LEVEL)/bindings/ocaml/Makefile.ocaml
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter7/_tags b/final/examples/OCaml-Kaleidoscope/Chapter7/_tags
new file mode 100644
index 00000000000..7a03dba6672
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter7/_tags
@@ -0,0 +1,4 @@
+<{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
+<*.{byte,native}>: g++, use_llvm, use_llvm_analysis
+<*.{byte,native}>: use_llvm_executionengine, use_llvm_target
+<*.{byte,native}>: use_llvm_scalar_opts, use_bindings
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter7/ast.ml b/final/examples/OCaml-Kaleidoscope/Chapter7/ast.ml
new file mode 100644
index 00000000000..c699e8074a4
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter7/ast.ml
@@ -0,0 +1,39 @@
+(*===----------------------------------------------------------------------===
+ * Abstract Syntax Tree (aka Parse Tree)
+ *===----------------------------------------------------------------------===*)
+
+(* expr - Base type for all expression nodes. *)
+type expr =
+  (* variant for numeric literals like "1.0". *)
+  | Number of float
+
+  (* variant for referencing a variable, like "a". *)
+  | Variable of string
+
+  (* variant for a unary operator. *)
+  | Unary of char * expr
+
+  (* variant for a binary operator. *)
+  | Binary of char * expr * expr
+
+  (* variant for function calls. *)
+  | Call of string * expr array
+
+  (* variant for if/then/else. *)
+  | If of expr * expr * expr
+
+  (* variant for for/in. *)
+  | For of string * expr * expr * expr option * expr
+
+  (* variant for var/in. *)
+  | Var of (string * expr option) array * expr
+
+(* proto - This type represents the "prototype" for a function, which captures
+ * its name, and its argument names (thus implicitly the number of arguments the
+ * function takes). *)
+type proto =
+  | Prototype of string * string array
+  | BinOpPrototype of string * string array * int
+
+(* func - This type represents a function definition itself. *)
+type func = Function of proto * expr
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter7/bindings.c b/final/examples/OCaml-Kaleidoscope/Chapter7/bindings.c
new file mode 100644
index 00000000000..90faed15813
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter7/bindings.c
@@ -0,0 +1,13 @@
+#include <stdio.h>
+
+/* putchard - putchar that takes a double and returns 0. */
+extern double putchard(double X) {
+  putchar((char)X);
+  return 0;
+}
+
+/* printd - printf that takes a double prints it as "%f\n", returning 0. */
+extern double printd(double X) {
+  printf("%f\n", X);
+  return 0;
+}
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter7/codegen.ml b/final/examples/OCaml-Kaleidoscope/Chapter7/codegen.ml
new file mode 100644
index 00000000000..e66396eb6ac
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter7/codegen.ml
@@ -0,0 +1,370 @@
+(*===----------------------------------------------------------------------===
+ * Code Generation
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+
+exception Error of string
+
+let context = global_context ()
+let the_module = create_module context "my cool jit"
+let builder = builder context
+let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
+let double_type = double_type context
+
+(* Create an alloca instruction in the entry block of the function. This
+ * is used for mutable variables etc. *)
+let create_entry_block_alloca the_function var_name =
+  let builder = builder_at context (instr_begin (entry_block the_function)) in
+  build_alloca double_type var_name builder
+
+let rec codegen_expr = function
+  | Ast.Number n -> const_float double_type n
+  | Ast.Variable name ->
+      let v = try Hashtbl.find named_values name with
+        | Not_found -> raise (Error "unknown variable name")
+      in
+      (* Load the value. *)
+      build_load v name builder
+  | Ast.Unary (op, operand) ->
+      let operand = codegen_expr operand in
+      let callee = "unary" ^ (String.make 1 op) in
+      let callee =
+        match lookup_function callee the_module with
+        | Some callee -> callee
+        | None -> raise (Error "unknown unary operator")
+      in
+      build_call callee [|operand|] "unop" builder
+  | Ast.Binary (op, lhs, rhs) ->
+      begin match op with
+      | '=' ->
+          (* Special case '=' because we don't want to emit the LHS as an
+           * expression. *)
+          let name =
+            match lhs with
+            | Ast.Variable name -> name
+            | _ -> raise (Error "destination of '=' must be a variable")
+          in
+
+          (* Codegen the rhs. *)
+          let val_ = codegen_expr rhs in
+
+          (* Lookup the name. *)
+          let variable = try Hashtbl.find named_values name with
+          | Not_found -> raise (Error "unknown variable name")
+          in
+          ignore(build_store val_ variable builder);
+          val_
+      | _ ->
+          let lhs_val = codegen_expr lhs in
+          let rhs_val = codegen_expr rhs in
+          begin
+            match op with
+            | '+' -> build_fadd lhs_val rhs_val "addtmp" builder
+            | '-' -> build_fsub lhs_val rhs_val "subtmp" builder
+            | '*' -> build_fmul lhs_val rhs_val "multmp" builder
+            | '<' ->
+                (* Convert bool 0/1 to double 0.0 or 1.0 *)
+                let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
+                build_uitofp i double_type "booltmp" builder
+            | _ ->
+                (* If it wasn't a builtin binary operator, it must be a user defined
+                 * one. Emit a call to it. *)
+                let callee = "binary" ^ (String.make 1 op) in
+                let callee =
+                  match lookup_function callee the_module with
+                  | Some callee -> callee
+                  | None -> raise (Error "binary operator not found!")
+                in
+                build_call callee [|lhs_val; rhs_val|] "binop" builder
+          end
+      end
+  | Ast.Call (callee, args) ->
+      (* Look up the name in the module table. *)
+      let callee =
+        match lookup_function callee the_module with
+        | Some callee -> callee
+        | None -> raise (Error "unknown function referenced")
+      in
+      let params = params callee in
+
+      (* If argument mismatch error. *)
+      if Array.length params == Array.length args then () else
+        raise (Error "incorrect # arguments passed");
+      let args = Array.map codegen_expr args in
+      build_call callee args "calltmp" builder
+  | Ast.If (cond, then_, else_) ->
+      let cond = codegen_expr cond in
+
+      (* Convert condition to a bool by comparing equal to 0.0 *)
+      let zero = const_float double_type 0.0 in
+      let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
+
+      (* Grab the first block so that we might later add the conditional branch
+       * to it at the end of the function. *)
+      let start_bb = insertion_block builder in
+      let the_function = block_parent start_bb in
+
+      let then_bb = append_block context "then" the_function in
+
+      (* Emit 'then' value. *)
+      position_at_end then_bb builder;
+      let then_val = codegen_expr then_ in
+
+      (* Codegen of 'then' can change the current block, update then_bb for the
+       * phi. We create a new name because one is used for the phi node, and the
+       * other is used for the conditional branch. *)
+      let new_then_bb = insertion_block builder in
+
+      (* Emit 'else' value. *)
+      let else_bb = append_block context "else" the_function in
+      position_at_end else_bb builder;
+      let else_val = codegen_expr else_ in
+
+      (* Codegen of 'else' can change the current block, update else_bb for the
+       * phi. *)
+      let new_else_bb = insertion_block builder in
+
+      (* Emit merge block. *)
+      let merge_bb = append_block context "ifcont" the_function in
+      position_at_end merge_bb builder;
+      let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
+      let phi = build_phi incoming "iftmp" builder in
+
+      (* Return to the start block to add the conditional branch. *)
+      position_at_end start_bb builder;
+      ignore (build_cond_br cond_val then_bb else_bb builder);
+
+      (* Set a unconditional branch at the end of the 'then' block and the
+       * 'else' block to the 'merge' block. *)
+      position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
+      position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
+
+      (* Finally, set the builder to the end of the merge block. *)
+      position_at_end merge_bb builder;
+
+      phi
+  | Ast.For (var_name, start, end_, step, body) ->
+      (* Output this as:
+       *   var = alloca double
+       *   ...
+       *   start = startexpr
+       *   store start -> var
+       *   goto loop
+       * loop:
+       *   ...
+       *   bodyexpr
+       *   ...
+       * loopend:
+       *   step = stepexpr
+       *   endcond = endexpr
+       *
+       *   curvar = load var
+       *   nextvar = curvar + step
+       *   store nextvar -> var
+       *   br endcond, loop, endloop
+       * outloop: *)
+
+      let the_function = block_parent (insertion_block builder) in
+
+      (* Create an alloca for the variable in the entry block. *)
+      let alloca = create_entry_block_alloca the_function var_name in
+
+      (* Emit the start code first, without 'variable' in scope. *)
+      let start_val = codegen_expr start in
+
+      (* Store the value into the alloca. *)
+      ignore(build_store start_val alloca builder);
+
+      (* Make the new basic block for the loop header, inserting after current
+       * block. *)
+      let loop_bb = append_block context "loop" the_function in
+
+      (* Insert an explicit fall through from the current block to the
+       * loop_bb. *)
+      ignore (build_br loop_bb builder);
+
+      (* Start insertion in loop_bb. *)
+      position_at_end loop_bb builder;
+
+      (* Within the loop, the variable is defined equal to the PHI node. If it
+       * shadows an existing variable, we have to restore it, so save it
+       * now. *)
+      let old_val =
+        try Some (Hashtbl.find named_values var_name) with Not_found -> None
+      in
+      Hashtbl.add named_values var_name alloca;
+
+      (* Emit the body of the loop.  This, like any other expr, can change the
+       * current BB.  Note that we ignore the value computed by the body, but
+       * don't allow an error *)
+      ignore (codegen_expr body);
+
+      (* Emit the step value. *)
+      let step_val =
+        match step with
+        | Some step -> codegen_expr step
+        (* If not specified, use 1.0. *)
+        | None -> const_float double_type 1.0
+      in
+
+      (* Compute the end condition. *)
+      let end_cond = codegen_expr end_ in
+
+      (* Reload, increment, and restore the alloca. This handles the case where
+       * the body of the loop mutates the variable. *)
+      let cur_var = build_load alloca var_name builder in
+      let next_var = build_add cur_var step_val "nextvar" builder in
+      ignore(build_store next_var alloca builder);
+
+      (* Convert condition to a bool by comparing equal to 0.0. *)
+      let zero = const_float double_type 0.0 in
+      let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
+
+      (* Create the "after loop" block and insert it. *)
+      let after_bb = append_block context "afterloop" the_function in
+
+      (* Insert the conditional branch into the end of loop_end_bb. *)
+      ignore (build_cond_br end_cond loop_bb after_bb builder);
+
+      (* Any new code will be inserted in after_bb. *)
+      position_at_end after_bb builder;
+
+      (* Restore the unshadowed variable. *)
+      begin match old_val with
+      | Some old_val -> Hashtbl.add named_values var_name old_val
+      | None -> ()
+      end;
+
+      (* for expr always returns 0.0. *)
+      const_null double_type
+  | Ast.Var (var_names, body) ->
+      let old_bindings = ref [] in
+
+      let the_function = block_parent (insertion_block builder) in
+
+      (* Register all variables and emit their initializer. *)
+      Array.iter (fun (var_name, init) ->
+        (* Emit the initializer before adding the variable to scope, this
+         * prevents the initializer from referencing the variable itself, and
+         * permits stuff like this:
+         *   var a = 1 in
+         *     var a = a in ...   # refers to outer 'a'. *)
+        let init_val =
+          match init with
+          | Some init -> codegen_expr init
+          (* If not specified, use 0.0. *)
+          | None -> const_float double_type 0.0
+        in
+
+        let alloca = create_entry_block_alloca the_function var_name in
+        ignore(build_store init_val alloca builder);
+
+        (* Remember the old variable binding so that we can restore the binding
+         * when we unrecurse. *)
+        begin
+          try
+            let old_value = Hashtbl.find named_values var_name in
+            old_bindings := (var_name, old_value) :: !old_bindings;
+          with Not_found -> ()
+        end;
+
+        (* Remember this binding. *)
+        Hashtbl.add named_values var_name alloca;
+      ) var_names;
+
+      (* Codegen the body, now that all vars are in scope. *)
+      let body_val = codegen_expr body in
+
+      (* Pop all our variables from scope. *)
+      List.iter (fun (var_name, old_value) ->
+        Hashtbl.add named_values var_name old_value
+      ) !old_bindings;
+
+      (* Return the body computation. *)
+      body_val
+
+let codegen_proto = function
+  | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) ->
+      (* Make the function type: double(double,double) etc. *)
+      let doubles = Array.make (Array.length args) double_type in
+      let ft = function_type double_type doubles in
+      let f =
+        match lookup_function name the_module with
+        | None -> declare_function name ft the_module
+
+        (* If 'f' conflicted, there was already something named 'name'. If it
+         * has a body, don't allow redefinition or reextern. *)
+        | Some f ->
+            (* If 'f' already has a body, reject this. *)
+            if block_begin f <> At_end f then
+              raise (Error "redefinition of function");
+
+            (* If 'f' took a different number of arguments, reject. *)
+            if element_type (type_of f) <> ft then
+              raise (Error "redefinition of function with different # args");
+            f
+      in
+
+      (* Set names for all arguments. *)
+      Array.iteri (fun i a ->
+        let n = args.(i) in
+        set_value_name n a;
+        Hashtbl.add named_values n a;
+      ) (params f);
+      f
+
+(* Create an alloca for each argument and register the argument in the symbol
+ * table so that references to it will succeed. *)
+let create_argument_allocas the_function proto =
+  let args = match proto with
+    | Ast.Prototype (_, args) | Ast.BinOpPrototype (_, args, _) -> args
+  in
+  Array.iteri (fun i ai ->
+    let var_name = args.(i) in
+    (* Create an alloca for this variable. *)
+    let alloca = create_entry_block_alloca the_function var_name in
+
+    (* Store the initial value into the alloca. *)
+    ignore(build_store ai alloca builder);
+
+    (* Add arguments to variable symbol table. *)
+    Hashtbl.add named_values var_name alloca;
+  ) (params the_function)
+
+let codegen_func the_fpm = function
+  | Ast.Function (proto, body) ->
+      Hashtbl.clear named_values;
+      let the_function = codegen_proto proto in
+
+      (* If this is an operator, install it. *)
+      begin match proto with
+      | Ast.BinOpPrototype (name, args, prec) ->
+          let op = name.[String.length name - 1] in
+          Hashtbl.add Parser.binop_precedence op prec;
+      | _ -> ()
+      end;
+
+      (* Create a new basic block to start insertion into. *)
+      let bb = append_block context "entry" the_function in
+      position_at_end bb builder;
+
+      try
+        (* Add all arguments to the symbol table and create their allocas. *)
+        create_argument_allocas the_function proto;
+
+        let ret_val = codegen_expr body in
+
+        (* Finish off the function. *)
+        let _ = build_ret ret_val builder in
+
+        (* Validate the generated code, checking for consistency. *)
+        Llvm_analysis.assert_valid_function the_function;
+
+        (* Optimize the function. *)
+        let _ = PassManager.run_function the_function the_fpm in
+
+        the_function
+      with e ->
+        delete_function the_function;
+        raise e
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter7/lexer.ml b/final/examples/OCaml-Kaleidoscope/Chapter7/lexer.ml
new file mode 100644
index 00000000000..922cabf0428
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter7/lexer.ml
@@ -0,0 +1,60 @@
+(*===----------------------------------------------------------------------===
+ * Lexer
+ *===----------------------------------------------------------------------===*)
+
+let rec lex = parser
+  (* Skip any whitespace. *)
+  | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
+
+  (* identifier: [a-zA-Z][a-zA-Z0-9] *)
+  | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+
+  (* number: [0-9.]+ *)
+  | [< ' ('0' .. '9' as c); stream >] ->
+      let buffer = Buffer.create 1 in
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+
+  (* Comment until end of line. *)
+  | [< ' ('#'); stream >] ->
+      lex_comment stream
+
+  (* Otherwise, just return the character as its ascii value. *)
+  | [< 'c; stream >] ->
+      [< 'Token.Kwd c; lex stream >]
+
+  (* end of stream. *)
+  | [< >] -> [< >]
+
+and lex_number buffer = parser
+  | [< ' ('0' .. '9' | '.' as c); stream >] ->
+      Buffer.add_char buffer c;
+      lex_number buffer stream
+  | [< stream=lex >] ->
+      [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
+
+and lex_ident buffer = parser
+  | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
+      Buffer.add_char buffer c;
+      lex_ident buffer stream
+  | [< stream=lex >] ->
+      match Buffer.contents buffer with
+      | "def" -> [< 'Token.Def; stream >]
+      | "extern" -> [< 'Token.Extern; stream >]
+      | "if" -> [< 'Token.If; stream >]
+      | "then" -> [< 'Token.Then; stream >]
+      | "else" -> [< 'Token.Else; stream >]
+      | "for" -> [< 'Token.For; stream >]
+      | "in" -> [< 'Token.In; stream >]
+      | "binary" -> [< 'Token.Binary; stream >]
+      | "unary" -> [< 'Token.Unary; stream >]
+      | "var" -> [< 'Token.Var; stream >]
+      | id -> [< 'Token.Ident id; stream >]
+
+and lex_comment = parser
+  | [< ' ('\n'); stream=lex >] -> stream
+  | [< 'c; e=lex_comment >] -> e
+  | [< >] -> [< >]
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter7/myocamlbuild.ml b/final/examples/OCaml-Kaleidoscope/Chapter7/myocamlbuild.ml
new file mode 100644
index 00000000000..54d3fd97709
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter7/myocamlbuild.ml
@@ -0,0 +1,10 @@
+open Ocamlbuild_plugin;;
+
+ocaml_lib ~extern:true "llvm";;
+ocaml_lib ~extern:true "llvm_analysis";;
+ocaml_lib ~extern:true "llvm_executionengine";;
+ocaml_lib ~extern:true "llvm_target";;
+ocaml_lib ~extern:true "llvm_scalar_opts";;
+
+flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);;
+dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter7/parser.ml b/final/examples/OCaml-Kaleidoscope/Chapter7/parser.ml
new file mode 100644
index 00000000000..c0e7db8349a
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter7/parser.ml
@@ -0,0 +1,221 @@
+(*===---------------------------------------------------------------------===
+ * Parser
+ *===---------------------------------------------------------------------===*)
+
+(* binop_precedence - This holds the precedence for each binary operator that is
+ * defined *)
+let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
+
+(* precedence - Get the precedence of the pending binary operator token. *)
+let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
+
+(* primary
+ *   ::= identifier
+ *   ::= numberexpr
+ *   ::= parenexpr
+ *   ::= ifexpr
+ *   ::= forexpr
+ *   ::= varexpr *)
+let rec parse_primary = parser
+  (* numberexpr ::= number *)
+  | [< 'Token.Number n >] -> Ast.Number n
+
+  (* parenexpr ::= '(' expression ')' *)
+  | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
+
+  (* identifierexpr
+   *   ::= identifier
+   *   ::= identifier '(' argumentexpr ')' *)
+  | [< 'Token.Ident id; stream >] ->
+      let rec parse_args accumulator = parser
+        | [< e=parse_expr; stream >] ->
+            begin parser
+              | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
+              | [< >] -> e :: accumulator
+            end stream
+        | [< >] -> accumulator
+      in
+      let rec parse_ident id = parser
+        (* Call. *)
+        | [< 'Token.Kwd '(';
+             args=parse_args [];
+             'Token.Kwd ')' ?? "expected ')'">] ->
+            Ast.Call (id, Array.of_list (List.rev args))
+
+        (* Simple variable ref. *)
+        | [< >] -> Ast.Variable id
+      in
+      parse_ident id stream
+
+  (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
+  | [< 'Token.If; c=parse_expr;
+       'Token.Then ?? "expected 'then'"; t=parse_expr;
+       'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
+      Ast.If (c, t, e)
+
+  (* forexpr
+        ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
+  | [< 'Token.For;
+       'Token.Ident id ?? "expected identifier after for";
+       'Token.Kwd '=' ?? "expected '=' after for";
+       stream >] ->
+      begin parser
+        | [<
+             start=parse_expr;
+             'Token.Kwd ',' ?? "expected ',' after for";
+             end_=parse_expr;
+             stream >] ->
+            let step =
+              begin parser
+              | [< 'Token.Kwd ','; step=parse_expr >] -> Some step
+              | [< >] -> None
+              end stream
+            in
+            begin parser
+            | [< 'Token.In; body=parse_expr >] ->
+                Ast.For (id, start, end_, step, body)
+            | [< >] ->
+                raise (Stream.Error "expected 'in' after for")
+            end stream
+        | [< >] ->
+            raise (Stream.Error "expected '=' after for")
+      end stream
+
+  (* varexpr
+   *   ::= 'var' identifier ('=' expression?
+   *             (',' identifier ('=' expression)?)* 'in' expression *)
+  | [< 'Token.Var;
+       (* At least one variable name is required. *)
+       'Token.Ident id ?? "expected identifier after var";
+       init=parse_var_init;
+       var_names=parse_var_names [(id, init)];
+       (* At this point, we have to have 'in'. *)
+       'Token.In ?? "expected 'in' keyword after 'var'";
+       body=parse_expr >] ->
+      Ast.Var (Array.of_list (List.rev var_names), body)
+
+  | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
+
+(* unary
+ *   ::= primary
+ *   ::= '!' unary *)
+and parse_unary = parser
+  (* If this is a unary operator, read it. *)
+  | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] ->
+      Ast.Unary (op, operand)
+
+  (* If the current token is not an operator, it must be a primary expr. *)
+  | [< stream >] -> parse_primary stream
+
+(* binoprhs
+ *   ::= ('+' primary)* *)
+and parse_bin_rhs expr_prec lhs stream =
+  match Stream.peek stream with
+  (* If this is a binop, find its precedence. *)
+  | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
+      let token_prec = precedence c in
+
+      (* If this is a binop that binds at least as tightly as the current binop,
+       * consume it, otherwise we are done. *)
+      if token_prec < expr_prec then lhs else begin
+        (* Eat the binop. *)
+        Stream.junk stream;
+
+        (* Parse the primary expression after the binary operator. *)
+        let rhs = parse_unary stream in
+
+        (* Okay, we know this is a binop. *)
+        let rhs =
+          match Stream.peek stream with
+          | Some (Token.Kwd c2) ->
+              (* If BinOp binds less tightly with rhs than the operator after
+               * rhs, let the pending operator take rhs as its lhs. *)
+              let next_prec = precedence c2 in
+              if token_prec < next_prec
+              then parse_bin_rhs (token_prec + 1) rhs stream
+              else rhs
+          | _ -> rhs
+        in
+
+        (* Merge lhs/rhs. *)
+        let lhs = Ast.Binary (c, lhs, rhs) in
+        parse_bin_rhs expr_prec lhs stream
+      end
+  | _ -> lhs
+
+and parse_var_init = parser
+  (* read in the optional initializer. *)
+  | [< 'Token.Kwd '='; e=parse_expr >] -> Some e
+  | [< >] -> None
+
+and parse_var_names accumulator = parser
+  | [< 'Token.Kwd ',';
+       'Token.Ident id ?? "expected identifier list after var";
+       init=parse_var_init;
+       e=parse_var_names ((id, init) :: accumulator) >] -> e
+  | [< >] -> accumulator
+
+(* expression
+ *   ::= primary binoprhs *)
+and parse_expr = parser
+  | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream
+
+(* prototype
+ *   ::= id '(' id* ')'
+ *   ::= binary LETTER number? (id, id)
+ *   ::= unary LETTER number? (id) *)
+let parse_prototype =
+  let rec parse_args accumulator = parser
+    | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
+    | [< >] -> accumulator
+  in
+  let parse_operator = parser
+    | [< 'Token.Unary >] -> "unary", 1
+    | [< 'Token.Binary >] -> "binary", 2
+  in
+  let parse_binary_precedence = parser
+    | [< 'Token.Number n >] -> int_of_float n
+    | [< >] -> 30
+  in
+  parser
+  | [< 'Token.Ident id;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+       args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+      (* success. *)
+      Ast.Prototype (id, Array.of_list (List.rev args))
+  | [< (prefix, kind)=parse_operator;
+       'Token.Kwd op ?? "expected an operator";
+       (* Read the precedence if present. *)
+       binary_precedence=parse_binary_precedence;
+       'Token.Kwd '(' ?? "expected '(' in prototype";
+        args=parse_args [];
+       'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
+      let name = prefix ^ (String.make 1 op) in
+      let args = Array.of_list (List.rev args) in
+
+      (* Verify right number of arguments for operator. *)
+      if Array.length args != kind
+      then raise (Stream.Error "invalid number of operands for operator")
+      else
+        if kind == 1 then
+          Ast.Prototype (name, args)
+        else
+          Ast.BinOpPrototype (name, args, binary_precedence)
+  | [< >] ->
+      raise (Stream.Error "expected function name in prototype")
+
+(* definition ::= 'def' prototype expression *)
+let parse_definition = parser
+  | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
+      Ast.Function (p, e)
+
+(* toplevelexpr ::= expression *)
+let parse_toplevel = parser
+  | [< e=parse_expr >] ->
+      (* Make an anonymous proto. *)
+      Ast.Function (Ast.Prototype ("", [||]), e)
+
+(*  external ::= 'extern' prototype *)
+let parse_extern = parser
+  | [< 'Token.Extern; e=parse_prototype >] -> e
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter7/token.ml b/final/examples/OCaml-Kaleidoscope/Chapter7/token.ml
new file mode 100644
index 00000000000..1489f0b9e79
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter7/token.ml
@@ -0,0 +1,25 @@
+(*===----------------------------------------------------------------------===
+ * Lexer Tokens
+ *===----------------------------------------------------------------------===*)
+
+(* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
+ * these others for known things. *)
+type token =
+  (* commands *)
+  | Def | Extern
+
+  (* primary *)
+  | Ident of string | Number of float
+
+  (* unknown *)
+  | Kwd of char
+
+  (* control *)
+  | If | Then | Else
+  | For | In
+
+  (* operators *)
+  | Binary | Unary
+
+  (* var definition *)
+  | Var
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter7/toplevel.ml b/final/examples/OCaml-Kaleidoscope/Chapter7/toplevel.ml
new file mode 100644
index 00000000000..01d24ede149
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter7/toplevel.ml
@@ -0,0 +1,49 @@
+(*===----------------------------------------------------------------------===
+ * Top-Level parsing and JIT Driver
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+open Llvm_executionengine
+
+(* top ::= definition | external | expression | ';' *)
+let rec main_loop the_fpm the_execution_engine stream =
+  match Stream.peek stream with
+  | None -> ()
+
+  (* ignore top-level semicolons. *)
+  | Some (Token.Kwd ';') ->
+      Stream.junk stream;
+      main_loop the_fpm the_execution_engine stream
+
+  | Some token ->
+      begin
+        try match token with
+        | Token.Def ->
+            let e = Parser.parse_definition stream in
+            print_endline "parsed a function definition.";
+            dump_value (Codegen.codegen_func the_fpm e);
+        | Token.Extern ->
+            let e = Parser.parse_extern stream in
+            print_endline "parsed an extern.";
+            dump_value (Codegen.codegen_proto e);
+        | _ ->
+            (* Evaluate a top-level expression into an anonymous function. *)
+            let e = Parser.parse_toplevel stream in
+            print_endline "parsed a top-level expr";
+            let the_function = Codegen.codegen_func the_fpm e in
+            dump_value the_function;
+
+            (* JIT the function, returning a function pointer. *)
+            let result = ExecutionEngine.run_function the_function [||]
+              the_execution_engine in
+
+            print_string "Evaluated to ";
+            print_float (GenericValue.as_float Codegen.double_type result);
+            print_newline ();
+        with Stream.Error s | Codegen.Error s ->
+          (* Skip token for error recovery. *)
+          Stream.junk stream;
+          print_endline s;
+      end;
+      print_string "ready> "; flush stdout;
+      main_loop the_fpm the_execution_engine stream
diff --git a/final/examples/OCaml-Kaleidoscope/Chapter7/toy.ml b/final/examples/OCaml-Kaleidoscope/Chapter7/toy.ml
new file mode 100644
index 00000000000..babab28601d
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Chapter7/toy.ml
@@ -0,0 +1,57 @@
+(*===----------------------------------------------------------------------===
+ * Main driver code.
+ *===----------------------------------------------------------------------===*)
+
+open Llvm
+open Llvm_executionengine
+open Llvm_target
+open Llvm_scalar_opts
+
+let main () =
+  ignore (initialize_native_target ());
+
+  (* Install standard binary operators.
+   * 1 is the lowest precedence. *)
+  Hashtbl.add Parser.binop_precedence '=' 2;
+  Hashtbl.add Parser.binop_precedence '<' 10;
+  Hashtbl.add Parser.binop_precedence '+' 20;
+  Hashtbl.add Parser.binop_precedence '-' 20;
+  Hashtbl.add Parser.binop_precedence '*' 40;    (* highest. *)
+
+  (* Prime the first token. *)
+  print_string "ready> "; flush stdout;
+  let stream = Lexer.lex (Stream.of_channel stdin) in
+
+  (* Create the JIT. *)
+  let the_execution_engine = ExecutionEngine.create Codegen.the_module in
+  let the_fpm = PassManager.create_function Codegen.the_module in
+
+  (* Set up the optimizer pipeline.  Start with registering info about how the
+   * target lays out data structures. *)
+  TargetData.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
+
+  (* Promote allocas to registers. *)
+  add_memory_to_register_promotion the_fpm;
+
+  (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
+  add_instruction_combination the_fpm;
+
+  (* reassociate expressions. *)
+  add_reassociation the_fpm;
+
+  (* Eliminate Common SubExpressions. *)
+  add_gvn the_fpm;
+
+  (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
+  add_cfg_simplification the_fpm;
+
+  ignore (PassManager.initialize the_fpm);
+
+  (* Run the main "interpreter loop" now. *)
+  Toplevel.main_loop the_fpm the_execution_engine stream;
+
+  (* Print out all the generated code. *)
+  dump_module Codegen.the_module
+;;
+
+main ()
diff --git a/final/examples/OCaml-Kaleidoscope/Makefile b/final/examples/OCaml-Kaleidoscope/Makefile
new file mode 100644
index 00000000000..5342b94022a
--- /dev/null
+++ b/final/examples/OCaml-Kaleidoscope/Makefile
@@ -0,0 +1,15 @@
+##===- examples/OCaml-Kaleidoscope/Makefile ----------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL=../..
+
+include $(LEVEL)/Makefile.config
+
+PARALLEL_DIRS:= Chapter2 Chapter3 Chapter4 Chapter5 Chapter6 Chapter7
+
+include $(LEVEL)/Makefile.common
diff --git a/final/examples/ParallelJIT/CMakeLists.txt b/final/examples/ParallelJIT/CMakeLists.txt
new file mode 100644
index 00000000000..fbdc6e5fc10
--- /dev/null
+++ b/final/examples/ParallelJIT/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(LLVM_LINK_COMPONENTS jit interpreter nativecodegen)
+
+add_llvm_example(ParallelJIT
+  ParallelJIT.cpp
+  )
+
+if(HAVE_LIBPTHREAD)
+  target_link_libraries(ParallelJIT pthread)
+endif(HAVE_LIBPTHREAD)
diff --git a/final/examples/ParallelJIT/Makefile b/final/examples/ParallelJIT/Makefile
new file mode 100644
index 00000000000..8a49d427313
--- /dev/null
+++ b/final/examples/ParallelJIT/Makefile
@@ -0,0 +1,17 @@
+##===- examples/ParallelJIT/Makefile -----------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+TOOLNAME = ParallelJIT
+EXAMPLE_TOOL = 1
+
+LINK_COMPONENTS := jit interpreter nativecodegen
+
+include $(LEVEL)/Makefile.common
+
+LIBS += -lpthread
diff --git a/final/examples/ParallelJIT/ParallelJIT.cpp b/final/examples/ParallelJIT/ParallelJIT.cpp
new file mode 100644
index 00000000000..9231abf6e31
--- /dev/null
+++ b/final/examples/ParallelJIT/ParallelJIT.cpp
@@ -0,0 +1,304 @@
+//===-- examples/ParallelJIT/ParallelJIT.cpp - Exercise threaded-safe JIT -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Parallel JIT
+//
+// This test program creates two LLVM functions then calls them from three
+// separate threads.  It requires the pthreads library.
+// The three threads are created and then block waiting on a condition variable.
+// Once all threads are blocked on the conditional variable, the main thread
+// wakes them up. This complicated work is performed so that all three threads
+// call into the JIT at the same time (or the best possible approximation of the
+// same time). This test had assertion errors until I got the locking right.
+
+#include <pthread.h>
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/Target/TargetSelect.h"
+#include <iostream>
+using namespace llvm;
+
+static Function* createAdd1(Module *M) {
+  // Create the add1 function entry and insert this entry into module M.  The
+  // function will have a return type of "int" and take an argument of "int".
+  // The '0' terminates the list of argument types.
+  Function *Add1F =
+    cast<Function>(M->getOrInsertFunction("add1",
+                                          Type::getInt32Ty(M->getContext()),
+                                          Type::getInt32Ty(M->getContext()),
+                                          (Type *)0));
+
+  // Add a basic block to the function. As before, it automatically inserts
+  // because of the last argument.
+  BasicBlock *BB = BasicBlock::Create(M->getContext(), "EntryBlock", Add1F);
+
+  // Get pointers to the constant `1'.
+  Value *One = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1);
+
+  // Get pointers to the integer argument of the add1 function...
+  assert(Add1F->arg_begin() != Add1F->arg_end()); // Make sure there's an arg
+  Argument *ArgX = Add1F->arg_begin();  // Get the arg
+  ArgX->setName("AnArg");            // Give it a nice symbolic name for fun.
+
+  // Create the add instruction, inserting it into the end of BB.
+  Instruction *Add = BinaryOperator::CreateAdd(One, ArgX, "addresult", BB);
+
+  // Create the return instruction and add it to the basic block
+  ReturnInst::Create(M->getContext(), Add, BB);
+
+  // Now, function add1 is ready.
+  return Add1F;
+}
+
+static Function *CreateFibFunction(Module *M) {
+  // Create the fib function and insert it into module M.  This function is said
+  // to return an int and take an int parameter.
+  Function *FibF = 
+    cast<Function>(M->getOrInsertFunction("fib",
+                                          Type::getInt32Ty(M->getContext()),
+                                          Type::getInt32Ty(M->getContext()),
+                                          (Type *)0));
+
+  // Add a basic block to the function.
+  BasicBlock *BB = BasicBlock::Create(M->getContext(), "EntryBlock", FibF);
+
+  // Get pointers to the constants.
+  Value *One = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1);
+  Value *Two = ConstantInt::get(Type::getInt32Ty(M->getContext()), 2);
+
+  // Get pointer to the integer argument of the add1 function...
+  Argument *ArgX = FibF->arg_begin();   // Get the arg.
+  ArgX->setName("AnArg");            // Give it a nice symbolic name for fun.
+
+  // Create the true_block.
+  BasicBlock *RetBB = BasicBlock::Create(M->getContext(), "return", FibF);
+  // Create an exit block.
+  BasicBlock* RecurseBB = BasicBlock::Create(M->getContext(), "recurse", FibF);
+
+  // Create the "if (arg < 2) goto exitbb"
+  Value *CondInst = new ICmpInst(*BB, ICmpInst::ICMP_SLE, ArgX, Two, "cond");
+  BranchInst::Create(RetBB, RecurseBB, CondInst, BB);
+
+  // Create: ret int 1
+  ReturnInst::Create(M->getContext(), One, RetBB);
+
+  // create fib(x-1)
+  Value *Sub = BinaryOperator::CreateSub(ArgX, One, "arg", RecurseBB);
+  Value *CallFibX1 = CallInst::Create(FibF, Sub, "fibx1", RecurseBB);
+
+  // create fib(x-2)
+  Sub = BinaryOperator::CreateSub(ArgX, Two, "arg", RecurseBB);
+  Value *CallFibX2 = CallInst::Create(FibF, Sub, "fibx2", RecurseBB);
+
+  // fib(x-1)+fib(x-2)
+  Value *Sum =
+    BinaryOperator::CreateAdd(CallFibX1, CallFibX2, "addresult", RecurseBB);
+
+  // Create the return instruction and add it to the basic block
+  ReturnInst::Create(M->getContext(), Sum, RecurseBB);
+
+  return FibF;
+}
+
+struct threadParams {
+  ExecutionEngine* EE;
+  Function* F;
+  int value;
+};
+
+// We block the subthreads just before they begin to execute:
+// we want all of them to call into the JIT at the same time,
+// to verify that the locking is working correctly.
+class WaitForThreads
+{
+public:
+  WaitForThreads()
+  {
+    n = 0;
+    waitFor = 0;
+
+    int result = pthread_cond_init( &condition, NULL );
+    assert( result == 0 );
+
+    result = pthread_mutex_init( &mutex, NULL );
+    assert( result == 0 );
+  }
+
+  ~WaitForThreads()
+  {
+    int result = pthread_cond_destroy( &condition );
+    assert( result == 0 );
+
+    result = pthread_mutex_destroy( &mutex );
+    assert( result == 0 );
+  }
+
+  // All threads will stop here until another thread calls releaseThreads
+  void block()
+  {
+    int result = pthread_mutex_lock( &mutex );
+    assert( result == 0 );
+    n ++;
+    //~ std::cout << "block() n " << n << " waitFor " << waitFor << std::endl;
+
+    assert( waitFor == 0 || n <= waitFor );
+    if ( waitFor > 0 && n == waitFor )
+    {
+      // There are enough threads blocked that we can release all of them
+      std::cout << "Unblocking threads from block()" << std::endl;
+      unblockThreads();
+    }
+    else
+    {
+      // We just need to wait until someone unblocks us
+      result = pthread_cond_wait( &condition, &mutex );
+      assert( result == 0 );
+    }
+
+    // unlock the mutex before returning
+    result = pthread_mutex_unlock( &mutex );
+    assert( result == 0 );
+  }
+
+  // If there are num or more threads blocked, it will signal them all
+  // Otherwise, this thread blocks until there are enough OTHER threads
+  // blocked
+  void releaseThreads( size_t num )
+  {
+    int result = pthread_mutex_lock( &mutex );
+    assert( result == 0 );
+
+    if ( n >= num ) {
+      std::cout << "Unblocking threads from releaseThreads()" << std::endl;
+      unblockThreads();
+    }
+    else
+    {
+      waitFor = num;
+      pthread_cond_wait( &condition, &mutex );
+    }
+
+    // unlock the mutex before returning
+    result = pthread_mutex_unlock( &mutex );
+    assert( result == 0 );
+  }
+
+private:
+  void unblockThreads()
+  {
+    // Reset the counters to zero: this way, if any new threads
+    // enter while threads are exiting, they will block instead
+    // of triggering a new release of threads
+    n = 0;
+
+    // Reset waitFor to zero: this way, if waitFor threads enter
+    // while threads are exiting, they will block instead of
+    // triggering a new release of threads
+    waitFor = 0;
+
+    int result = pthread_cond_broadcast( &condition );
+    assert(result == 0); result=result;
+  }
+
+  size_t n;
+  size_t waitFor;
+  pthread_cond_t condition;
+  pthread_mutex_t mutex;
+};
+
+static WaitForThreads synchronize;
+
+void* callFunc( void* param )
+{
+  struct threadParams* p = (struct threadParams*) param;
+
+  // Call the `foo' function with no arguments:
+  std::vector<GenericValue> Args(1);
+  Args[0].IntVal = APInt(32, p->value);
+
+  synchronize.block(); // wait until other threads are at this point
+  GenericValue gv = p->EE->runFunction(p->F, Args);
+
+  return (void*)(intptr_t)gv.IntVal.getZExtValue();
+}
+
+int main() {
+  InitializeNativeTarget();
+  LLVMContext Context;
+
+  // Create some module to put our function into it.
+  Module *M = new Module("test", Context);
+
+  Function* add1F = createAdd1( M );
+  Function* fibF = CreateFibFunction( M );
+
+  // Now we create the JIT.
+  ExecutionEngine* EE = EngineBuilder(M).create();
+
+  //~ std::cout << "We just constructed this LLVM module:\n\n" << *M;
+  //~ std::cout << "\n\nRunning foo: " << std::flush;
+
+  // Create one thread for add1 and two threads for fib
+  struct threadParams add1 = { EE, add1F, 1000 };
+  struct threadParams fib1 = { EE, fibF, 39 };
+  struct threadParams fib2 = { EE, fibF, 42 };
+
+  pthread_t add1Thread;
+  int result = pthread_create( &add1Thread, NULL, callFunc, &add1 );
+  if ( result != 0 ) {
+          std::cerr << "Could not create thread" << std::endl;
+          return 1;
+  }
+
+  pthread_t fibThread1;
+  result = pthread_create( &fibThread1, NULL, callFunc, &fib1 );
+  if ( result != 0 ) {
+          std::cerr << "Could not create thread" << std::endl;
+          return 1;
+  }
+
+  pthread_t fibThread2;
+  result = pthread_create( &fibThread2, NULL, callFunc, &fib2 );
+  if ( result != 0 ) {
+          std::cerr << "Could not create thread" << std::endl;
+          return 1;
+  }
+
+  synchronize.releaseThreads(3); // wait until other threads are at this point
+
+  void* returnValue;
+  result = pthread_join( add1Thread, &returnValue );
+  if ( result != 0 ) {
+          std::cerr << "Could not join thread" << std::endl;
+          return 1;
+  }
+  std::cout << "Add1 returned " << intptr_t(returnValue) << std::endl;
+
+  result = pthread_join( fibThread1, &returnValue );
+  if ( result != 0 ) {
+          std::cerr << "Could not join thread" << std::endl;
+          return 1;
+  }
+  std::cout << "Fib1 returned " << intptr_t(returnValue) << std::endl;
+
+  result = pthread_join( fibThread2, &returnValue );
+  if ( result != 0 ) {
+          std::cerr << "Could not join thread" << std::endl;
+          return 1;
+  }
+  std::cout << "Fib2 returned " << intptr_t(returnValue) << std::endl;
+
+  return 0;
+}
diff --git a/final/include/llvm-c/Analysis.h b/final/include/llvm-c/Analysis.h
new file mode 100644
index 00000000000..e1e44872b16
--- /dev/null
+++ b/final/include/llvm-c/Analysis.h
@@ -0,0 +1,55 @@
+/*===-- llvm-c/Analysis.h - Analysis Library C Interface --------*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header declares the C interface to libLLVMAnalysis.a, which           *|
+|* implements various analyses of the LLVM IR.                                *|
+|*                                                                            *|
+|* Many exotic languages can interoperate with C code but have a harder time  *|
+|* with C++ due to name mangling. So in addition to C, this interface enables *|
+|* tools written in such languages.                                           *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_ANALYSIS_H
+#define LLVM_C_ANALYSIS_H
+
+#include "llvm-c/Core.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+typedef enum {
+  LLVMAbortProcessAction, /* verifier will print to stderr and abort() */
+  LLVMPrintMessageAction, /* verifier will print to stderr and return 1 */
+  LLVMReturnStatusAction  /* verifier will just return 1 */
+} LLVMVerifierFailureAction;
+
+
+/* Verifies that a module is valid, taking the specified action if not.
+   Optionally returns a human-readable description of any invalid constructs.
+   OutMessage must be disposed with LLVMDisposeMessage. */
+LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
+                          char **OutMessage);
+
+/* Verifies that a single function is valid, taking the specified action. Useful
+   for debugging. */
+LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action);
+
+/* Open up a ghostview window that displays the CFG of the current function.
+   Useful for debugging. */
+void LLVMViewFunctionCFG(LLVMValueRef Fn);
+void LLVMViewFunctionCFGOnly(LLVMValueRef Fn);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/final/include/llvm-c/BitReader.h b/final/include/llvm-c/BitReader.h
new file mode 100644
index 00000000000..6db66074b31
--- /dev/null
+++ b/final/include/llvm-c/BitReader.h
@@ -0,0 +1,66 @@
+/*===-- llvm-c/BitReader.h - BitReader Library C Interface ------*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header declares the C interface to libLLVMBitReader.a, which          *|
+|* implements input of the LLVM bitcode format.                               *|
+|*                                                                            *|
+|* Many exotic languages can interoperate with C code but have a harder time  *|
+|* with C++ due to name mangling. So in addition to C, this interface enables *|
+|* tools written in such languages.                                           *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_BITCODEREADER_H
+#define LLVM_C_BITCODEREADER_H
+
+#include "llvm-c/Core.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* Builds a module from the bitcode in the specified memory buffer, returning a
+   reference to the module via the OutModule parameter. Returns 0 on success.
+   Optionally returns a human-readable error message via OutMessage. */ 
+LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
+                          LLVMModuleRef *OutModule, char **OutMessage);
+
+LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
+                                   LLVMMemoryBufferRef MemBuf,
+                                   LLVMModuleRef *OutModule, char **OutMessage);
+
+/** Reads a module from the specified path, returning via the OutMP parameter
+    a module provider which performs lazy deserialization. Returns 0 on success.
+    Optionally returns a human-readable error message via OutMessage. */ 
+LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
+                                       LLVMMemoryBufferRef MemBuf,
+                                       LLVMModuleRef *OutM,
+                                       char **OutMessage);
+
+LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
+                              char **OutMessage);
+
+
+/** Deprecated: Use LLVMGetBitcodeModuleInContext instead. */
+LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
+                                               LLVMMemoryBufferRef MemBuf,
+                                               LLVMModuleProviderRef *OutMP,
+                                               char **OutMessage);
+
+/** Deprecated: Use LLVMGetBitcodeModule instead. */
+LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
+                                      LLVMModuleProviderRef *OutMP,
+                                      char **OutMessage);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/final/include/llvm-c/BitWriter.h b/final/include/llvm-c/BitWriter.h
new file mode 100644
index 00000000000..bcbfb111492
--- /dev/null
+++ b/final/include/llvm-c/BitWriter.h
@@ -0,0 +1,46 @@
+/*===-- llvm-c/BitWriter.h - BitWriter Library C Interface ------*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header declares the C interface to libLLVMBitWriter.a, which          *|
+|* implements output of the LLVM bitcode format.                              *|
+|*                                                                            *|
+|* Many exotic languages can interoperate with C code but have a harder time  *|
+|* with C++ due to name mangling. So in addition to C, this interface enables *|
+|* tools written in such languages.                                           *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_BITCODEWRITER_H
+#define LLVM_C_BITCODEWRITER_H
+
+#include "llvm-c/Core.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*===-- Operations on modules ---------------------------------------------===*/
+
+/** Writes a module to the specified path. Returns 0 on success. */ 
+int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path);
+
+/** Writes a module to an open file descriptor. Returns 0 on success. */
+int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose,
+                         int Unbuffered);
+
+/** Deprecated for LLVMWriteBitcodeToFD. Writes a module to an open file
+    descriptor. Returns 0 on success. Closes the Handle. */ 
+int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int Handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/final/include/llvm-c/Core.h b/final/include/llvm-c/Core.h
new file mode 100644
index 00000000000..39c3cb40117
--- /dev/null
+++ b/final/include/llvm-c/Core.h
@@ -0,0 +1,1177 @@
+/*===-- llvm-c/Core.h - Core Library C Interface ------------------*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header declares the C interface to libLLVMCore.a, which implements    *|
+|* the LLVM intermediate representation.                                      *|
+|*                                                                            *|
+|* LLVM uses a polymorphic type hierarchy which C cannot represent, therefore *|
+|* parameters must be passed as base types. Despite the declared types, most  *|
+|* of the functions provided operate only on branches of the type hierarchy.  *|
+|* The declared parameter names are descriptive and specify which type is     *|
+|* required. Additionally, each type hierarchy is documented along with the   *|
+|* functions that operate upon it. For more detail, refer to LLVM's C++ code. *|
+|* If in doubt, refer to Core.cpp, which performs paramter downcasts in the   *|
+|* form unwrap<RequiredType>(Param).                                          *|
+|*                                                                            *|
+|* Many exotic languages can interoperate with C code but have a harder time  *|
+|* with C++ due to name mangling. So in addition to C, this interface enables *|
+|* tools written in such languages.                                           *|
+|*                                                                            *|
+|* When included into a C++ source file, also declares 'wrap' and 'unwrap'    *|
+|* helpers to perform opaque reference<-->pointer conversions. These helpers  *|
+|* are shorter and more tightly typed than writing the casts by hand when     *|
+|* authoring bindings. In assert builds, they will do runtime type checking.  *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_CORE_H
+#define LLVM_C_CORE_H
+
+#include "llvm/Support/DataTypes.h"
+
+#ifdef __cplusplus
+
+/* Need these includes to support the LLVM 'cast' template for the C++ 'wrap' 
+   and 'unwrap' conversion functions. */
+#include "llvm/Module.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Support/IRBuilder.h"
+
+extern "C" {
+#endif
+
+
+typedef int LLVMBool;
+
+/* Opaque types. */
+
+/**
+ * The top-level container for all LLVM global data.  See the LLVMContext class.
+ */
+typedef struct LLVMOpaqueContext *LLVMContextRef;
+
+/**
+ * The top-level container for all other LLVM Intermediate Representation (IR)
+ * objects. See the llvm::Module class.
+ */
+typedef struct LLVMOpaqueModule *LLVMModuleRef;
+
+/**
+ * Each value in the LLVM IR has a type, an LLVMTypeRef. See the llvm::Type
+ * class.
+ */
+typedef struct LLVMOpaqueType *LLVMTypeRef;
+
+/**
+ * When building recursive types using LLVMRefineType, LLVMTypeRef values may
+ * become invalid; use LLVMTypeHandleRef to resolve this problem. See the
+ * llvm::AbstractTypeHolder class.
+ */
+typedef struct LLVMOpaqueTypeHandle *LLVMTypeHandleRef;
+
+typedef struct LLVMOpaqueValue *LLVMValueRef;
+typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef;
+typedef struct LLVMOpaqueBuilder *LLVMBuilderRef;
+
+/* Interface used to provide a module to JIT or interpreter.  This is now just a
+ * synonym for llvm::Module, but we have to keep using the different type to
+ * keep binary compatibility.
+ */
+typedef struct LLVMOpaqueModuleProvider *LLVMModuleProviderRef;
+
+/* Used to provide a module to JIT or interpreter.
+ * See the llvm::MemoryBuffer class.
+ */
+typedef struct LLVMOpaqueMemoryBuffer *LLVMMemoryBufferRef;
+
+/** See the llvm::PassManagerBase class. */
+typedef struct LLVMOpaquePassManager *LLVMPassManagerRef;
+
+/** See the llvm::PassRegistry class. */
+typedef struct LLVMOpaquePassRegistry *LLVMPassRegistryRef;
+
+/** Used to get the users and usees of a Value. See the llvm::Use class. */
+typedef struct LLVMOpaqueUse *LLVMUseRef;
+
+typedef enum {
+    LLVMZExtAttribute       = 1<<0,
+    LLVMSExtAttribute       = 1<<1,
+    LLVMNoReturnAttribute   = 1<<2,
+    LLVMInRegAttribute      = 1<<3,
+    LLVMStructRetAttribute  = 1<<4,
+    LLVMNoUnwindAttribute   = 1<<5,
+    LLVMNoAliasAttribute    = 1<<6,
+    LLVMByValAttribute      = 1<<7,
+    LLVMNestAttribute       = 1<<8,
+    LLVMReadNoneAttribute   = 1<<9,
+    LLVMReadOnlyAttribute   = 1<<10,
+    LLVMNoInlineAttribute   = 1<<11,
+    LLVMAlwaysInlineAttribute    = 1<<12,
+    LLVMOptimizeForSizeAttribute = 1<<13,
+    LLVMStackProtectAttribute    = 1<<14,
+    LLVMStackProtectReqAttribute = 1<<15,
+    LLVMAlignment = 31<<16,
+    LLVMNoCaptureAttribute  = 1<<21,
+    LLVMNoRedZoneAttribute  = 1<<22,
+    LLVMNoImplicitFloatAttribute = 1<<23,
+    LLVMNakedAttribute      = 1<<24,
+    LLVMInlineHintAttribute = 1<<25,
+    LLVMStackAlignment = 7<<26
+} LLVMAttribute;
+
+typedef enum {
+  /* Terminator Instructions */
+  LLVMRet            = 1,
+  LLVMBr             = 2,
+  LLVMSwitch         = 3,
+  LLVMIndirectBr     = 4,
+  LLVMInvoke         = 5,
+  LLVMUnwind         = 6,
+  LLVMUnreachable    = 7,
+
+  /* Standard Binary Operators */
+  LLVMAdd            = 8,
+  LLVMFAdd           = 9,
+  LLVMSub            = 10,
+  LLVMFSub           = 11,
+  LLVMMul            = 12,
+  LLVMFMul           = 13,
+  LLVMUDiv           = 14,
+  LLVMSDiv           = 15,
+  LLVMFDiv           = 16,
+  LLVMURem           = 17,
+  LLVMSRem           = 18,
+  LLVMFRem           = 19,
+
+  /* Logical Operators */
+  LLVMShl            = 20,
+  LLVMLShr           = 21,
+  LLVMAShr           = 22,
+  LLVMAnd            = 23,
+  LLVMOr             = 24,
+  LLVMXor            = 25,
+
+  /* Memory Operators */
+  LLVMAlloca         = 26,
+  LLVMLoad           = 27,
+  LLVMStore          = 28,
+  LLVMGetElementPtr  = 29,
+
+  /* Cast Operators */
+  LLVMTrunc          = 30,
+  LLVMZExt           = 31,
+  LLVMSExt           = 32,
+  LLVMFPToUI         = 33,
+  LLVMFPToSI         = 34,
+  LLVMUIToFP         = 35,
+  LLVMSIToFP         = 36,
+  LLVMFPTrunc        = 37,
+  LLVMFPExt          = 38,
+  LLVMPtrToInt       = 39,
+  LLVMIntToPtr       = 40,
+  LLVMBitCast        = 41,
+
+  /* Other Operators */
+  LLVMICmp           = 42,
+  LLVMFCmp           = 43,
+  LLVMPHI            = 44,
+  LLVMCall           = 45,
+  LLVMSelect         = 46,
+  /* UserOp1 */
+  /* UserOp2 */
+  LLVMVAArg          = 49,
+  LLVMExtractElement = 50,
+  LLVMInsertElement  = 51,
+  LLVMShuffleVector  = 52,
+  LLVMExtractValue   = 53,
+  LLVMInsertValue    = 54
+} LLVMOpcode;
+
+typedef enum {
+  LLVMVoidTypeKind,        /**< type with no size */
+  LLVMFloatTypeKind,       /**< 32 bit floating point type */
+  LLVMDoubleTypeKind,      /**< 64 bit floating point type */
+  LLVMX86_FP80TypeKind,    /**< 80 bit floating point type (X87) */
+  LLVMFP128TypeKind,       /**< 128 bit floating point type (112-bit mantissa)*/
+  LLVMPPC_FP128TypeKind,   /**< 128 bit floating point type (two 64-bits) */
+  LLVMLabelTypeKind,       /**< Labels */
+  LLVMIntegerTypeKind,     /**< Arbitrary bit width integers */
+  LLVMFunctionTypeKind,    /**< Functions */
+  LLVMStructTypeKind,      /**< Structures */
+  LLVMArrayTypeKind,       /**< Arrays */
+  LLVMPointerTypeKind,     /**< Pointers */
+  LLVMOpaqueTypeKind,      /**< Opaque: type with unknown structure */
+  LLVMVectorTypeKind,      /**< SIMD 'packed' format, or other vector type */
+  LLVMMetadataTypeKind,    /**< Metadata */
+  LLVMX86_MMXTypeKind      /**< X86 MMX */
+} LLVMTypeKind;
+
+typedef enum {
+  LLVMExternalLinkage,    /**< Externally visible function */
+  LLVMAvailableExternallyLinkage,
+  LLVMLinkOnceAnyLinkage, /**< Keep one copy of function when linking (inline)*/
+  LLVMLinkOnceODRLinkage, /**< Same, but only replaced by something
+                            equivalent. */
+  LLVMWeakAnyLinkage,     /**< Keep one copy of function when linking (weak) */
+  LLVMWeakODRLinkage,     /**< Same, but only replaced by something
+                            equivalent. */
+  LLVMAppendingLinkage,   /**< Special purpose, only applies to global arrays */
+  LLVMInternalLinkage,    /**< Rename collisions when linking (static
+                               functions) */
+  LLVMPrivateLinkage,     /**< Like Internal, but omit from symbol table */
+  LLVMDLLImportLinkage,   /**< Function to be imported from DLL */
+  LLVMDLLExportLinkage,   /**< Function to be accessible from DLL */
+  LLVMExternalWeakLinkage,/**< ExternalWeak linkage description */
+  LLVMGhostLinkage,       /**< Obsolete */
+  LLVMCommonLinkage,      /**< Tentative definitions */
+  LLVMLinkerPrivateLinkage, /**< Like Private, but linker removes. */
+  LLVMLinkerPrivateWeakLinkage, /**< Like LinkerPrivate, but is weak. */
+  LLVMLinkerPrivateWeakDefAutoLinkage /**< Like LinkerPrivateWeak, but possibly
+                                           hidden. */
+} LLVMLinkage;
+
+typedef enum {
+  LLVMDefaultVisibility,  /**< The GV is visible */
+  LLVMHiddenVisibility,   /**< The GV is hidden */
+  LLVMProtectedVisibility /**< The GV is protected */
+} LLVMVisibility;
+
+typedef enum {
+  LLVMCCallConv           = 0,
+  LLVMFastCallConv        = 8,
+  LLVMColdCallConv        = 9,
+  LLVMX86StdcallCallConv  = 64,
+  LLVMX86FastcallCallConv = 65
+} LLVMCallConv;
+
+typedef enum {
+  LLVMIntEQ = 32, /**< equal */
+  LLVMIntNE,      /**< not equal */
+  LLVMIntUGT,     /**< unsigned greater than */
+  LLVMIntUGE,     /**< unsigned greater or equal */
+  LLVMIntULT,     /**< unsigned less than */
+  LLVMIntULE,     /**< unsigned less or equal */
+  LLVMIntSGT,     /**< signed greater than */
+  LLVMIntSGE,     /**< signed greater or equal */
+  LLVMIntSLT,     /**< signed less than */
+  LLVMIntSLE      /**< signed less or equal */
+} LLVMIntPredicate;
+
+typedef enum {
+  LLVMRealPredicateFalse, /**< Always false (always folded) */
+  LLVMRealOEQ,            /**< True if ordered and equal */
+  LLVMRealOGT,            /**< True if ordered and greater than */
+  LLVMRealOGE,            /**< True if ordered and greater than or equal */
+  LLVMRealOLT,            /**< True if ordered and less than */
+  LLVMRealOLE,            /**< True if ordered and less than or equal */
+  LLVMRealONE,            /**< True if ordered and operands are unequal */
+  LLVMRealORD,            /**< True if ordered (no nans) */
+  LLVMRealUNO,            /**< True if unordered: isnan(X) | isnan(Y) */
+  LLVMRealUEQ,            /**< True if unordered or equal */
+  LLVMRealUGT,            /**< True if unordered or greater than */
+  LLVMRealUGE,            /**< True if unordered, greater than, or equal */
+  LLVMRealULT,            /**< True if unordered or less than */
+  LLVMRealULE,            /**< True if unordered, less than, or equal */
+  LLVMRealUNE,            /**< True if unordered or not equal */
+  LLVMRealPredicateTrue   /**< Always true (always folded) */
+} LLVMRealPredicate;
+
+
+/*===-- Error handling ----------------------------------------------------===*/
+
+void LLVMDisposeMessage(char *Message);
+
+
+/*===-- Contexts ----------------------------------------------------------===*/
+
+/* Create and destroy contexts. */
+LLVMContextRef LLVMContextCreate(void);
+LLVMContextRef LLVMGetGlobalContext(void);
+void LLVMContextDispose(LLVMContextRef C);
+
+unsigned LLVMGetMDKindIDInContext(LLVMContextRef C, const char* Name,
+                                  unsigned SLen);
+unsigned LLVMGetMDKindID(const char* Name, unsigned SLen);
+
+/*===-- Modules -----------------------------------------------------------===*/
+
+/* Create and destroy modules. */ 
+/** See llvm::Module::Module. */
+LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID);
+LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID,
+                                                LLVMContextRef C);
+
+/** See llvm::Module::~Module. */
+void LLVMDisposeModule(LLVMModuleRef M);
+
+/** Data layout. See Module::getDataLayout. */
+const char *LLVMGetDataLayout(LLVMModuleRef M);
+void LLVMSetDataLayout(LLVMModuleRef M, const char *Triple);
+
+/** Target triple. See Module::getTargetTriple. */
+const char *LLVMGetTarget(LLVMModuleRef M);
+void LLVMSetTarget(LLVMModuleRef M, const char *Triple);
+
+/** See Module::addTypeName. */
+LLVMBool LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty);
+void LLVMDeleteTypeName(LLVMModuleRef M, const char *Name);
+LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name);
+const char *LLVMGetTypeName(LLVMModuleRef M, LLVMTypeRef Ty);
+
+/** See Module::dump. */
+void LLVMDumpModule(LLVMModuleRef M);
+
+/** See Module::setModuleInlineAsm. */
+void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm);
+
+/** See Module::getContext. */
+LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M);
+
+/*===-- Types -------------------------------------------------------------===*/
+
+/* LLVM types conform to the following hierarchy:
+ * 
+ *   types:
+ *     integer type
+ *     real type
+ *     function type
+ *     sequence types:
+ *       array type
+ *       pointer type
+ *       vector type
+ *     void type
+ *     label type
+ *     opaque type
+ */
+
+/** See llvm::LLVMTypeKind::getTypeID. */
+LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty);
+
+/** See llvm::LLVMType::getContext. */
+LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty);
+
+/* Operations on integer types */
+LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits);
+
+LLVMTypeRef LLVMInt1Type(void);
+LLVMTypeRef LLVMInt8Type(void);
+LLVMTypeRef LLVMInt16Type(void);
+LLVMTypeRef LLVMInt32Type(void);
+LLVMTypeRef LLVMInt64Type(void);
+LLVMTypeRef LLVMIntType(unsigned NumBits);
+unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy);
+
+/* Operations on real types */
+LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMDoubleTypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMX86FP80TypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C);
+
+LLVMTypeRef LLVMFloatType(void);
+LLVMTypeRef LLVMDoubleType(void);
+LLVMTypeRef LLVMX86FP80Type(void);
+LLVMTypeRef LLVMFP128Type(void);
+LLVMTypeRef LLVMPPCFP128Type(void);
+
+/* Operations on function types */
+LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType,
+                             LLVMTypeRef *ParamTypes, unsigned ParamCount,
+                             LLVMBool IsVarArg);
+LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy);
+LLVMTypeRef LLVMGetReturnType(LLVMTypeRef FunctionTy);
+unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy);
+void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest);
+
+/* Operations on struct types */
+LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
+                                    unsigned ElementCount, LLVMBool Packed);
+LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes, unsigned ElementCount,
+                           LLVMBool Packed);
+unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy);
+void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest);
+LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy);
+
+/* Operations on array, pointer, and vector types (sequence types) */
+LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount);
+LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace);
+LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount);
+
+LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty);
+unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy);
+unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy);
+unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy);
+
+/* Operations on other types */
+LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMOpaqueTypeInContext(LLVMContextRef C);
+LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C);
+
+LLVMTypeRef LLVMVoidType(void);
+LLVMTypeRef LLVMLabelType(void);
+LLVMTypeRef LLVMOpaqueType(void);
+LLVMTypeRef LLVMX86MMXType(void);
+
+/* Operations on type handles */
+LLVMTypeHandleRef LLVMCreateTypeHandle(LLVMTypeRef PotentiallyAbstractTy);
+void LLVMRefineType(LLVMTypeRef AbstractTy, LLVMTypeRef ConcreteTy);
+LLVMTypeRef LLVMResolveTypeHandle(LLVMTypeHandleRef TypeHandle);
+void LLVMDisposeTypeHandle(LLVMTypeHandleRef TypeHandle);
+
+
+/*===-- Values ------------------------------------------------------------===*/
+
+/* The bulk of LLVM's object model consists of values, which comprise a very
+ * rich type hierarchy.
+ */
+
+#define LLVM_FOR_EACH_VALUE_SUBCLASS(macro) \
+  macro(Argument)                           \
+  macro(BasicBlock)                         \
+  macro(InlineAsm)                          \
+  macro(User)                               \
+    macro(Constant)                         \
+      macro(ConstantAggregateZero)          \
+      macro(ConstantArray)                  \
+      macro(ConstantExpr)                   \
+      macro(ConstantFP)                     \
+      macro(ConstantInt)                    \
+      macro(ConstantPointerNull)            \
+      macro(ConstantStruct)                 \
+      macro(ConstantVector)                 \
+      macro(GlobalValue)                    \
+        macro(Function)                     \
+        macro(GlobalAlias)                  \
+        macro(GlobalVariable)               \
+      macro(UndefValue)                     \
+    macro(Instruction)                      \
+      macro(BinaryOperator)                 \
+      macro(CallInst)                       \
+        macro(IntrinsicInst)                \
+          macro(DbgInfoIntrinsic)           \
+            macro(DbgDeclareInst)           \
+          macro(EHSelectorInst)             \
+          macro(MemIntrinsic)               \
+            macro(MemCpyInst)               \
+            macro(MemMoveInst)              \
+            macro(MemSetInst)               \
+      macro(CmpInst)                        \
+      macro(FCmpInst)                       \
+      macro(ICmpInst)                       \
+      macro(ExtractElementInst)             \
+      macro(GetElementPtrInst)              \
+      macro(InsertElementInst)              \
+      macro(InsertValueInst)                \
+      macro(PHINode)                        \
+      macro(SelectInst)                     \
+      macro(ShuffleVectorInst)              \
+      macro(StoreInst)                      \
+      macro(TerminatorInst)                 \
+        macro(BranchInst)                   \
+        macro(InvokeInst)                   \
+        macro(ReturnInst)                   \
+        macro(SwitchInst)                   \
+        macro(UnreachableInst)              \
+        macro(UnwindInst)                   \
+    macro(UnaryInstruction)                 \
+      macro(AllocaInst)                     \
+      macro(CastInst)                       \
+        macro(BitCastInst)                  \
+        macro(FPExtInst)                    \
+        macro(FPToSIInst)                   \
+        macro(FPToUIInst)                   \
+        macro(FPTruncInst)                  \
+        macro(IntToPtrInst)                 \
+        macro(PtrToIntInst)                 \
+        macro(SExtInst)                     \
+        macro(SIToFPInst)                   \
+        macro(TruncInst)                    \
+        macro(UIToFPInst)                   \
+        macro(ZExtInst)                     \
+      macro(ExtractValueInst)               \
+      macro(LoadInst)                       \
+      macro(VAArgInst)
+
+/* Operations on all values */
+LLVMTypeRef LLVMTypeOf(LLVMValueRef Val);
+const char *LLVMGetValueName(LLVMValueRef Val);
+void LLVMSetValueName(LLVMValueRef Val, const char *Name);
+void LLVMDumpValue(LLVMValueRef Val);
+void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal);
+int LLVMHasMetadata(LLVMValueRef Val);
+LLVMValueRef LLVMGetMetadata(LLVMValueRef Val, unsigned KindID);
+void LLVMSetMetadata(LLVMValueRef Val, unsigned KindID, LLVMValueRef Node);
+
+/* Conversion functions. Return the input value if it is an instance of the
+   specified class, otherwise NULL. See llvm::dyn_cast_or_null<>. */
+#define LLVM_DECLARE_VALUE_CAST(name) \
+  LLVMValueRef LLVMIsA##name(LLVMValueRef Val);
+LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DECLARE_VALUE_CAST)
+
+/* Operations on Uses */
+LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val);
+LLVMUseRef LLVMGetNextUse(LLVMUseRef U);
+LLVMValueRef LLVMGetUser(LLVMUseRef U);
+LLVMValueRef LLVMGetUsedValue(LLVMUseRef U);
+
+/* Operations on Users */
+LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index);
+void LLVMSetOperand(LLVMValueRef User, unsigned Index, LLVMValueRef Val);
+int LLVMGetNumOperands(LLVMValueRef Val);
+
+/* Operations on constants of any type */
+LLVMValueRef LLVMConstNull(LLVMTypeRef Ty); /* all zeroes */
+LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty); /* only for int/vector */
+LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty);
+LLVMBool LLVMIsConstant(LLVMValueRef Val);
+LLVMBool LLVMIsNull(LLVMValueRef Val);
+LLVMBool LLVMIsUndef(LLVMValueRef Val);
+LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty);
+
+/* Operations on metadata */
+LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
+                                   unsigned SLen);
+LLVMValueRef LLVMMDString(const char *Str, unsigned SLen);
+LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
+                                 unsigned Count);
+LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count);
+
+/* Operations on scalar constants */
+LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
+                          LLVMBool SignExtend);
+LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy,
+                                              unsigned NumWords,
+                                              const uint64_t Words[]);
+LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char *Text,
+                                  uint8_t Radix);
+LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char *Text,
+                                         unsigned SLen, uint8_t Radix);
+LLVMValueRef LLVMConstReal(LLVMTypeRef RealTy, double N);
+LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text);
+LLVMValueRef LLVMConstRealOfStringAndSize(LLVMTypeRef RealTy, const char *Text,
+                                          unsigned SLen);
+unsigned long long LLVMConstIntGetZExtValue(LLVMValueRef ConstantVal);
+long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal);
+
+
+/* Operations on composite constants */
+LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
+                                      unsigned Length, LLVMBool DontNullTerminate);
+LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, 
+                                      LLVMValueRef *ConstantVals,
+                                      unsigned Count, LLVMBool Packed);
+
+LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
+                             LLVMBool DontNullTerminate);
+LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
+                            LLVMValueRef *ConstantVals, unsigned Length);
+LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
+                             LLVMBool Packed);
+LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size);
+
+/* Constant expressions */
+LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal);
+LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty);
+LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty);
+LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal);
+LLVMValueRef LLVMConstNSWNeg(LLVMValueRef ConstantVal);
+LLVMValueRef LLVMConstNUWNeg(LLVMValueRef ConstantVal);
+LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal);
+LLVMValueRef LLVMConstNot(LLVMValueRef ConstantVal);
+LLVMValueRef LLVMConstAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstNSWSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstFRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstOr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstXor(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstICmp(LLVMIntPredicate Predicate,
+                           LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstFCmp(LLVMRealPredicate Predicate,
+                           LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstShl(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
+                          LLVMValueRef *ConstantIndices, unsigned NumIndices);
+LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
+                                  LLVMValueRef *ConstantIndices,
+                                  unsigned NumIndices);
+LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
+LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
+LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
+LLVMValueRef LLVMConstFPTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
+LLVMValueRef LLVMConstFPExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
+LLVMValueRef LLVMConstUIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
+LLVMValueRef LLVMConstSIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
+LLVMValueRef LLVMConstFPToUI(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
+LLVMValueRef LLVMConstFPToSI(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
+LLVMValueRef LLVMConstPtrToInt(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
+LLVMValueRef LLVMConstIntToPtr(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
+LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
+LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal,
+                                    LLVMTypeRef ToType);
+LLVMValueRef LLVMConstSExtOrBitCast(LLVMValueRef ConstantVal,
+                                    LLVMTypeRef ToType);
+LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal,
+                                     LLVMTypeRef ToType);
+LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal,
+                                  LLVMTypeRef ToType);
+LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType,
+                              LLVMBool isSigned);
+LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
+LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition,
+                             LLVMValueRef ConstantIfTrue,
+                             LLVMValueRef ConstantIfFalse);
+LLVMValueRef LLVMConstExtractElement(LLVMValueRef VectorConstant,
+                                     LLVMValueRef IndexConstant);
+LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant,
+                                    LLVMValueRef ElementValueConstant,
+                                    LLVMValueRef IndexConstant);
+LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
+                                    LLVMValueRef VectorBConstant,
+                                    LLVMValueRef MaskConstant);
+LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
+                                   unsigned NumIdx);
+LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
+                                  LLVMValueRef ElementValueConstant,
+                                  unsigned *IdxList, unsigned NumIdx);
+LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty,
+                                const char *AsmString, const char *Constraints,
+                                LLVMBool HasSideEffects, LLVMBool IsAlignStack);
+LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB);
+
+/* Operations on global variables, functions, and aliases (globals) */
+LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global);
+LLVMBool LLVMIsDeclaration(LLVMValueRef Global);
+LLVMLinkage LLVMGetLinkage(LLVMValueRef Global);
+void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage);
+const char *LLVMGetSection(LLVMValueRef Global);
+void LLVMSetSection(LLVMValueRef Global, const char *Section);
+LLVMVisibility LLVMGetVisibility(LLVMValueRef Global);
+void LLVMSetVisibility(LLVMValueRef Global, LLVMVisibility Viz);
+unsigned LLVMGetAlignment(LLVMValueRef Global);
+void LLVMSetAlignment(LLVMValueRef Global, unsigned Bytes);
+
+/* Operations on global variables */
+LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name);
+LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty,
+                                         const char *Name,
+                                         unsigned AddressSpace);
+LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name);
+LLVMValueRef LLVMGetFirstGlobal(LLVMModuleRef M);
+LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M);
+LLVMValueRef LLVMGetNextGlobal(LLVMValueRef GlobalVar);
+LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar);
+void LLVMDeleteGlobal(LLVMValueRef GlobalVar);
+LLVMValueRef LLVMGetInitializer(LLVMValueRef GlobalVar);
+void LLVMSetInitializer(LLVMValueRef GlobalVar, LLVMValueRef ConstantVal);
+LLVMBool LLVMIsThreadLocal(LLVMValueRef GlobalVar);
+void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal);
+LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar);
+void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant);
+
+/* Operations on aliases */
+LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
+                          const char *Name);
+
+/* Operations on functions */
+LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name,
+                             LLVMTypeRef FunctionTy);
+LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name);
+LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M);
+LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M);
+LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn);
+LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn);
+void LLVMDeleteFunction(LLVMValueRef Fn);
+unsigned LLVMGetIntrinsicID(LLVMValueRef Fn);
+unsigned LLVMGetFunctionCallConv(LLVMValueRef Fn);
+void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC);
+const char *LLVMGetGC(LLVMValueRef Fn);
+void LLVMSetGC(LLVMValueRef Fn, const char *Name);
+void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
+LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn);
+void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
+
+/* Operations on parameters */
+unsigned LLVMCountParams(LLVMValueRef Fn);
+void LLVMGetParams(LLVMValueRef Fn, LLVMValueRef *Params);
+LLVMValueRef LLVMGetParam(LLVMValueRef Fn, unsigned Index);
+LLVMValueRef LLVMGetParamParent(LLVMValueRef Inst);
+LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn);
+LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn);
+LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg);
+LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg);
+void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA);
+void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA);
+LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg);
+void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align);
+
+/* Operations on basic blocks */
+LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB);
+LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val);
+LLVMBasicBlockRef LLVMValueAsBasicBlock(LLVMValueRef Val);
+LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB);
+unsigned LLVMCountBasicBlocks(LLVMValueRef Fn);
+void LLVMGetBasicBlocks(LLVMValueRef Fn, LLVMBasicBlockRef *BasicBlocks);
+LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn);
+LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn);
+LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB);
+LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB);
+LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn);
+
+LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
+                                                LLVMValueRef Fn,
+                                                const char *Name);
+LLVMBasicBlockRef LLVMInsertBasicBlockInContext(LLVMContextRef C,
+                                                LLVMBasicBlockRef BB,
+                                                const char *Name);
+
+LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef Fn, const char *Name);
+LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef InsertBeforeBB,
+                                       const char *Name);
+void LLVMDeleteBasicBlock(LLVMBasicBlockRef BB);
+
+void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos);
+void LLVMMoveBasicBlockAfter(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos);
+
+/* Operations on instructions */
+LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst);
+LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB);
+LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB);
+LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst);
+LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst);
+
+/* Operations on call sites */
+void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC);
+unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr);
+void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, LLVMAttribute);
+void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, 
+                              LLVMAttribute);
+void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, 
+                                unsigned align);
+
+/* Operations on call instructions (only) */
+LLVMBool LLVMIsTailCall(LLVMValueRef CallInst);
+void LLVMSetTailCall(LLVMValueRef CallInst, LLVMBool IsTailCall);
+
+/* Operations on phi nodes */
+void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues,
+                     LLVMBasicBlockRef *IncomingBlocks, unsigned Count);
+unsigned LLVMCountIncoming(LLVMValueRef PhiNode);
+LLVMValueRef LLVMGetIncomingValue(LLVMValueRef PhiNode, unsigned Index);
+LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index);
+
+/*===-- Instruction builders ----------------------------------------------===*/
+
+/* An instruction builder represents a point within a basic block, and is the
+ * exclusive means of building instructions using the C interface.
+ */
+
+LLVMBuilderRef LLVMCreateBuilderInContext(LLVMContextRef C);
+LLVMBuilderRef LLVMCreateBuilder(void);
+void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block,
+                         LLVMValueRef Instr);
+void LLVMPositionBuilderBefore(LLVMBuilderRef Builder, LLVMValueRef Instr);
+void LLVMPositionBuilderAtEnd(LLVMBuilderRef Builder, LLVMBasicBlockRef Block);
+LLVMBasicBlockRef LLVMGetInsertBlock(LLVMBuilderRef Builder);
+void LLVMClearInsertionPosition(LLVMBuilderRef Builder);
+void LLVMInsertIntoBuilder(LLVMBuilderRef Builder, LLVMValueRef Instr);
+void LLVMInsertIntoBuilderWithName(LLVMBuilderRef Builder, LLVMValueRef Instr,
+                                   const char *Name);
+void LLVMDisposeBuilder(LLVMBuilderRef Builder);
+
+/* Metadata */
+void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L);
+LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder);
+void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst);
+
+/* Terminators */
+LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef);
+LLVMValueRef LLVMBuildRet(LLVMBuilderRef, LLVMValueRef V);
+LLVMValueRef LLVMBuildAggregateRet(LLVMBuilderRef, LLVMValueRef *RetVals,
+                                   unsigned N);
+LLVMValueRef LLVMBuildBr(LLVMBuilderRef, LLVMBasicBlockRef Dest);
+LLVMValueRef LLVMBuildCondBr(LLVMBuilderRef, LLVMValueRef If,
+                             LLVMBasicBlockRef Then, LLVMBasicBlockRef Else);
+LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef, LLVMValueRef V,
+                             LLVMBasicBlockRef Else, unsigned NumCases);
+LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr,
+                                 unsigned NumDests);
+LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef, LLVMValueRef Fn,
+                             LLVMValueRef *Args, unsigned NumArgs,
+                             LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
+                             const char *Name);
+LLVMValueRef LLVMBuildUnwind(LLVMBuilderRef);
+LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef);
+
+/* Add a case to the switch instruction */
+void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal,
+                 LLVMBasicBlockRef Dest);
+
+/* Add a destination to the indirectbr instruction */
+void LLVMAddDestination(LLVMValueRef IndirectBr, LLVMBasicBlockRef Dest);
+
+/* Arithmetic */
+LLVMValueRef LLVMBuildAdd(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name);
+LLVMValueRef LLVMBuildNSWAdd(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                             const char *Name);
+LLVMValueRef LLVMBuildNUWAdd(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                             const char *Name);
+LLVMValueRef LLVMBuildFAdd(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
+LLVMValueRef LLVMBuildSub(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name);
+LLVMValueRef LLVMBuildNSWSub(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                             const char *Name);
+LLVMValueRef LLVMBuildNUWSub(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                             const char *Name);
+LLVMValueRef LLVMBuildFSub(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
+LLVMValueRef LLVMBuildMul(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name);
+LLVMValueRef LLVMBuildNSWMul(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                             const char *Name);
+LLVMValueRef LLVMBuildNUWMul(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                             const char *Name);
+LLVMValueRef LLVMBuildFMul(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
+LLVMValueRef LLVMBuildUDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
+LLVMValueRef LLVMBuildSDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
+LLVMValueRef LLVMBuildExactSDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                                const char *Name);
+LLVMValueRef LLVMBuildFDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
+LLVMValueRef LLVMBuildURem(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
+LLVMValueRef LLVMBuildSRem(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
+LLVMValueRef LLVMBuildFRem(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
+LLVMValueRef LLVMBuildShl(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
+LLVMValueRef LLVMBuildLShr(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
+LLVMValueRef LLVMBuildAShr(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
+LLVMValueRef LLVMBuildAnd(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name);
+LLVMValueRef LLVMBuildOr(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name);
+LLVMValueRef LLVMBuildXor(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name);
+LLVMValueRef LLVMBuildBinOp(LLVMBuilderRef B, LLVMOpcode Op,
+                            LLVMValueRef LHS, LLVMValueRef RHS,
+                            const char *Name);
+LLVMValueRef LLVMBuildNeg(LLVMBuilderRef, LLVMValueRef V, const char *Name);
+LLVMValueRef LLVMBuildNSWNeg(LLVMBuilderRef B, LLVMValueRef V,
+                             const char *Name);
+LLVMValueRef LLVMBuildNUWNeg(LLVMBuilderRef B, LLVMValueRef V,
+                             const char *Name);
+LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef, LLVMValueRef V, const char *Name);
+LLVMValueRef LLVMBuildNot(LLVMBuilderRef, LLVMValueRef V, const char *Name);
+
+/* Memory */
+LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef, LLVMTypeRef Ty, const char *Name);
+LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef, LLVMTypeRef Ty,
+                                  LLVMValueRef Val, const char *Name);
+LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef, LLVMTypeRef Ty, const char *Name);
+LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef, LLVMTypeRef Ty,
+                                  LLVMValueRef Val, const char *Name);
+LLVMValueRef LLVMBuildFree(LLVMBuilderRef, LLVMValueRef PointerVal);
+LLVMValueRef LLVMBuildLoad(LLVMBuilderRef, LLVMValueRef PointerVal,
+                           const char *Name);
+LLVMValueRef LLVMBuildStore(LLVMBuilderRef, LLVMValueRef Val, LLVMValueRef Ptr);
+LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+                          LLVMValueRef *Indices, unsigned NumIndices,
+                          const char *Name);
+LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+                                  LLVMValueRef *Indices, unsigned NumIndices,
+                                  const char *Name);
+LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+                                unsigned Idx, const char *Name);
+LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
+                                   const char *Name);
+LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
+                                      const char *Name);
+
+/* Casts */
+LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef, LLVMValueRef Val,
+                            LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildZExt(LLVMBuilderRef, LLVMValueRef Val,
+                           LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildSExt(LLVMBuilderRef, LLVMValueRef Val,
+                           LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildFPToUI(LLVMBuilderRef, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildFPToSI(LLVMBuilderRef, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildUIToFP(LLVMBuilderRef, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildSIToFP(LLVMBuilderRef, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildFPTrunc(LLVMBuilderRef, LLVMValueRef Val,
+                              LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildFPExt(LLVMBuilderRef, LLVMValueRef Val,
+                            LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildPtrToInt(LLVMBuilderRef, LLVMValueRef Val,
+                               LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildIntToPtr(LLVMBuilderRef, LLVMValueRef Val,
+                               LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildBitCast(LLVMBuilderRef, LLVMValueRef Val,
+                              LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildZExtOrBitCast(LLVMBuilderRef, LLVMValueRef Val,
+                                    LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildSExtOrBitCast(LLVMBuilderRef, LLVMValueRef Val,
+                                    LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef, LLVMValueRef Val,
+                                     LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val,
+                           LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef, LLVMValueRef Val,
+                                  LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef, LLVMValueRef Val, /*Signed cast!*/
+                              LLVMTypeRef DestTy, const char *Name);
+LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name);
+
+/* Comparisons */
+LLVMValueRef LLVMBuildICmp(LLVMBuilderRef, LLVMIntPredicate Op,
+                           LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
+LLVMValueRef LLVMBuildFCmp(LLVMBuilderRef, LLVMRealPredicate Op,
+                           LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name);
+
+/* Miscellaneous instructions */
+LLVMValueRef LLVMBuildPhi(LLVMBuilderRef, LLVMTypeRef Ty, const char *Name);
+LLVMValueRef LLVMBuildCall(LLVMBuilderRef, LLVMValueRef Fn,
+                           LLVMValueRef *Args, unsigned NumArgs,
+                           const char *Name);
+LLVMValueRef LLVMBuildSelect(LLVMBuilderRef, LLVMValueRef If,
+                             LLVMValueRef Then, LLVMValueRef Else,
+                             const char *Name);
+LLVMValueRef LLVMBuildVAArg(LLVMBuilderRef, LLVMValueRef List, LLVMTypeRef Ty,
+                            const char *Name);
+LLVMValueRef LLVMBuildExtractElement(LLVMBuilderRef, LLVMValueRef VecVal,
+                                     LLVMValueRef Index, const char *Name);
+LLVMValueRef LLVMBuildInsertElement(LLVMBuilderRef, LLVMValueRef VecVal,
+                                    LLVMValueRef EltVal, LLVMValueRef Index,
+                                    const char *Name);
+LLVMValueRef LLVMBuildShuffleVector(LLVMBuilderRef, LLVMValueRef V1,
+                                    LLVMValueRef V2, LLVMValueRef Mask,
+                                    const char *Name);
+LLVMValueRef LLVMBuildExtractValue(LLVMBuilderRef, LLVMValueRef AggVal,
+                                   unsigned Index, const char *Name);
+LLVMValueRef LLVMBuildInsertValue(LLVMBuilderRef, LLVMValueRef AggVal,
+                                  LLVMValueRef EltVal, unsigned Index,
+                                  const char *Name);
+
+LLVMValueRef LLVMBuildIsNull(LLVMBuilderRef, LLVMValueRef Val,
+                             const char *Name);
+LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef, LLVMValueRef Val,
+                                const char *Name);
+LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef, LLVMValueRef LHS,
+                              LLVMValueRef RHS, const char *Name);
+
+
+/*===-- Module providers --------------------------------------------------===*/
+
+/* Changes the type of M so it can be passed to FunctionPassManagers and the
+ * JIT.  They take ModuleProviders for historical reasons.
+ */
+LLVMModuleProviderRef
+LLVMCreateModuleProviderForExistingModule(LLVMModuleRef M);
+
+/* Destroys the module M.
+ */
+void LLVMDisposeModuleProvider(LLVMModuleProviderRef M);
+
+
+/*===-- Memory buffers ----------------------------------------------------===*/
+
+LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(const char *Path,
+                                                  LLVMMemoryBufferRef *OutMemBuf,
+                                                  char **OutMessage);
+LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
+                                         char **OutMessage);
+void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf);
+
+/*===-- Pass Registry -----------------------------------------------------===*/
+
+/** Return the global pass registry, for use with initialization functions.
+    See llvm::PassRegistry::getPassRegistry. */
+LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void);
+
+/*===-- Pass Managers -----------------------------------------------------===*/
+
+/** Constructs a new whole-module pass pipeline. This type of pipeline is
+    suitable for link-time optimization and whole-module transformations.
+    See llvm::PassManager::PassManager. */
+LLVMPassManagerRef LLVMCreatePassManager(void);
+
+/** Constructs a new function-by-function pass pipeline over the module
+    provider. It does not take ownership of the module provider. This type of
+    pipeline is suitable for code generation and JIT compilation tasks.
+    See llvm::FunctionPassManager::FunctionPassManager. */
+LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M);
+
+/** Deprecated: Use LLVMCreateFunctionPassManagerForModule instead. */
+LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef MP);
+
+/** Initializes, executes on the provided module, and finalizes all of the
+    passes scheduled in the pass manager. Returns 1 if any of the passes
+    modified the module, 0 otherwise. See llvm::PassManager::run(Module&). */
+LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M);
+
+/** Initializes all of the function passes scheduled in the function pass
+    manager. Returns 1 if any of the passes modified the module, 0 otherwise.
+    See llvm::FunctionPassManager::doInitialization. */
+LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM);
+
+/** Executes all of the function passes scheduled in the function pass manager
+    on the provided function. Returns 1 if any of the passes modified the
+    function, false otherwise.
+    See llvm::FunctionPassManager::run(Function&). */
+LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F);
+
+/** Finalizes all of the function passes scheduled in in the function pass
+    manager. Returns 1 if any of the passes modified the module, 0 otherwise.
+    See llvm::FunctionPassManager::doFinalization. */
+LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM);
+
+/** Frees the memory of a pass pipeline. For function pipelines, does not free
+    the module provider.
+    See llvm::PassManagerBase::~PassManagerBase. */
+void LLVMDisposePassManager(LLVMPassManagerRef PM);
+
+
+#ifdef __cplusplus
+}
+
+namespace llvm {
+  class MemoryBuffer;
+  class PassManagerBase;
+  
+  #define DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref)   \
+    inline ty *unwrap(ref P) {                          \
+      return reinterpret_cast<ty*>(P);                  \
+    }                                                   \
+                                                        \
+    inline ref wrap(const ty *P) {                      \
+      return reinterpret_cast<ref>(const_cast<ty*>(P)); \
+    }
+  
+  #define DEFINE_ISA_CONVERSION_FUNCTIONS(ty, ref)  \
+    DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref)         \
+                                                        \
+    template<typename T>                                \
+    inline T *unwrap(ref P) {                           \
+      return cast<T>(unwrap(P));                        \
+    }
+  
+  #define DEFINE_STDCXX_CONVERSION_FUNCTIONS(ty, ref)   \
+    DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref)         \
+                                                        \
+    template<typename T>                                \
+    inline T *unwrap(ref P) {                           \
+      T *Q = (T*)unwrap(P);                             \
+      assert(Q && "Invalid cast!");                     \
+      return Q;                                         \
+    }
+  
+  DEFINE_ISA_CONVERSION_FUNCTIONS   (Type,               LLVMTypeRef          )
+  DEFINE_ISA_CONVERSION_FUNCTIONS   (Value,              LLVMValueRef         )
+  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Module,             LLVMModuleRef        )
+  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(BasicBlock,         LLVMBasicBlockRef    )
+  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(IRBuilder<>,        LLVMBuilderRef       )
+  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(PATypeHolder,       LLVMTypeHandleRef    )
+  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(MemoryBuffer,       LLVMMemoryBufferRef  )
+  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLVMContext,        LLVMContextRef       )
+  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Use,                LLVMUseRef           )
+  DEFINE_STDCXX_CONVERSION_FUNCTIONS(PassManagerBase,    LLVMPassManagerRef   )
+  DEFINE_STDCXX_CONVERSION_FUNCTIONS(PassRegistry,       LLVMPassRegistryRef  )
+  /* LLVMModuleProviderRef exists for historical reasons, but now just holds a
+   * Module.
+   */
+  inline Module *unwrap(LLVMModuleProviderRef MP) {
+    return reinterpret_cast<Module*>(MP);
+  }
+  
+  #undef DEFINE_STDCXX_CONVERSION_FUNCTIONS
+  #undef DEFINE_ISA_CONVERSION_FUNCTIONS
+  #undef DEFINE_SIMPLE_CONVERSION_FUNCTIONS
+
+  /* Specialized opaque context conversions.
+   */
+  inline LLVMContext **unwrap(LLVMContextRef* Tys) {
+    return reinterpret_cast<LLVMContext**>(Tys);
+  }
+  
+  inline LLVMContextRef *wrap(const LLVMContext **Tys) {
+    return reinterpret_cast<LLVMContextRef*>(const_cast<LLVMContext**>(Tys));
+  }
+  
+  /* Specialized opaque type conversions.
+   */
+  inline Type **unwrap(LLVMTypeRef* Tys) {
+    return reinterpret_cast<Type**>(Tys);
+  }
+  
+  inline LLVMTypeRef *wrap(const Type **Tys) {
+    return reinterpret_cast<LLVMTypeRef*>(const_cast<Type**>(Tys));
+  }
+  
+  /* Specialized opaque value conversions.
+   */ 
+  inline Value **unwrap(LLVMValueRef *Vals) {
+    return reinterpret_cast<Value**>(Vals);
+  }
+  
+  template<typename T>
+  inline T **unwrap(LLVMValueRef *Vals, unsigned Length) {
+    #if DEBUG
+    for (LLVMValueRef *I = Vals, *E = Vals + Length; I != E; ++I)
+      cast<T>(*I);
+    #endif
+    return reinterpret_cast<T**>(Vals);
+  }
+  
+  inline LLVMValueRef *wrap(const Value **Vals) {
+    return reinterpret_cast<LLVMValueRef*>(const_cast<Value**>(Vals));
+  }
+}
+
+#endif /* !defined(__cplusplus) */
+
+#endif /* !defined(LLVM_C_CORE_H) */
diff --git a/final/include/llvm-c/EnhancedDisassembly.h b/final/include/llvm-c/EnhancedDisassembly.h
new file mode 100644
index 00000000000..28ac0ed2ab3
--- /dev/null
+++ b/final/include/llvm-c/EnhancedDisassembly.h
@@ -0,0 +1,513 @@
+/*===-- llvm-c/EnhancedDisassembly.h - Disassembler C Interface ---*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header declares the C interface to EnhancedDisassembly.so, which      *|
+|* implements a disassembler with the ability to extract operand values and   *|
+|* individual tokens from assembly instructions.                              *|
+|*                                                                            *|
+|* The header declares additional interfaces if the host compiler supports    *|
+|* the blocks API.                                                            *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_ENHANCEDDISASSEMBLY_H
+#define LLVM_C_ENHANCEDDISASSEMBLY_H
+
+#include "llvm/Support/DataTypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*!
+ @typedef EDByteReaderCallback
+ Interface to memory from which instructions may be read.
+ @param byte A pointer whose target should be filled in with the data returned.
+ @param address The address of the byte to be read.
+ @param arg An anonymous argument for client use.
+ @result 0 on success; -1 otherwise.
+ */
+typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
+
+/*!
+ @typedef EDRegisterReaderCallback
+ Interface to registers from which registers may be read.
+ @param value A pointer whose target should be filled in with the value of the
+   register.
+ @param regID The LLVM register identifier for the register to read.
+ @param arg An anonymous argument for client use.
+ @result 0 if the register could be read; -1 otherwise.
+ */
+typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID, 
+                                        void* arg);
+
+/*!
+ @typedef EDAssemblySyntax_t
+ An assembly syntax for use in tokenizing instructions.
+ */
+enum {
+/*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
+  kEDAssemblySyntaxX86Intel  = 0,
+/*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
+  kEDAssemblySyntaxX86ATT    = 1,
+  kEDAssemblySyntaxARMUAL    = 2
+};
+typedef unsigned EDAssemblySyntax_t;
+
+/*!
+ @typedef EDDisassemblerRef
+ Encapsulates a disassembler for a single CPU architecture.
+ */
+typedef void *EDDisassemblerRef;
+
+/*!
+ @typedef EDInstRef
+ Encapsulates a single disassembled instruction in one assembly syntax.
+ */
+typedef void *EDInstRef;
+
+/*!
+ @typedef EDTokenRef
+ Encapsulates a token from the disassembly of an instruction.
+ */
+typedef void *EDTokenRef;
+
+/*!
+ @typedef EDOperandRef
+ Encapsulates an operand of an instruction.
+ */
+typedef void *EDOperandRef;
+  
+/*!
+ @functiongroup Getting a disassembler
+ */
+
+/*!
+ @function EDGetDisassembler
+ Gets the disassembler for a given target.
+ @param disassembler A pointer whose target will be filled in with the 
+   disassembler.
+ @param triple Identifies the target.  Example: "x86_64-apple-darwin10"
+ @param syntax The assembly syntax to use when decoding instructions.
+ @result 0 on success; -1 otherwise.
+ */
+int EDGetDisassembler(EDDisassemblerRef *disassembler,
+                      const char *triple,
+                      EDAssemblySyntax_t syntax);
+
+/*!
+ @functiongroup Generic architectural queries
+ */
+  
+/*!
+ @function EDGetRegisterName
+ Gets the human-readable name for a given register.
+ @param regName A pointer whose target will be pointed at the name of the
+   register.  The name does not need to be deallocated and will be 
+ @param disassembler The disassembler to query for the name.
+ @param regID The register identifier, as returned by EDRegisterTokenValue.
+ @result 0 on success; -1 otherwise.
+ */
+int EDGetRegisterName(const char** regName,
+                      EDDisassemblerRef disassembler,
+                      unsigned regID);
+  
+/*!
+ @function EDRegisterIsStackPointer
+ Determines if a register is one of the platform's stack-pointer registers.
+ @param disassembler The disassembler to query.
+ @param regID The register identifier, as returned by EDRegisterTokenValue.
+ @result 1 if true; 0 otherwise.
+ */
+int EDRegisterIsStackPointer(EDDisassemblerRef disassembler,
+                             unsigned regID);
+
+/*!
+ @function EDRegisterIsProgramCounter
+ Determines if a register is one of the platform's stack-pointer registers.
+ @param disassembler The disassembler to query.
+ @param regID The register identifier, as returned by EDRegisterTokenValue.
+ @result 1 if true; 0 otherwise.
+ */
+int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler,
+                               unsigned regID);
+  
+/*!
+ @functiongroup Creating and querying instructions
+ */
+  
+/*!
+ @function EDCreateInst
+ Gets a set of contiguous instructions from a disassembler.
+ @param insts A pointer to an array that will be filled in with the
+   instructions.  Must have at least count entries.  Entries not filled in will 
+   be set to NULL.
+ @param count The maximum number of instructions to fill in.
+ @param disassembler The disassembler to use when decoding the instructions.
+ @param byteReader The function to use when reading the instruction's machine
+   code.
+ @param address The address of the first byte of the instruction.
+ @param arg An anonymous argument to be passed to byteReader.
+ @result The number of instructions read on success; 0 otherwise.
+ */
+unsigned int EDCreateInsts(EDInstRef *insts,
+                           unsigned int count,
+                           EDDisassemblerRef disassembler,
+                           EDByteReaderCallback byteReader,
+                           uint64_t address,
+                           void *arg);
+
+/*!
+ @function EDReleaseInst
+ Frees the memory for an instruction.  The instruction can no longer be accessed
+ after this call.
+ @param inst The instruction to be freed.
+ */
+void EDReleaseInst(EDInstRef inst);
+
+/*!
+ @function EDInstByteSize
+ @param inst The instruction to be queried.
+ @result The number of bytes in the instruction's machine-code representation.
+ */
+int EDInstByteSize(EDInstRef inst);
+
+/*!
+ @function EDGetInstString
+ Gets the disassembled text equivalent of the instruction.
+ @param buf A pointer whose target will be filled in with a pointer to the
+   string.  (The string becomes invalid when the instruction is released.)
+ @param inst The instruction to be queried.
+ @result 0 on success; -1 otherwise.
+ */
+int EDGetInstString(const char **buf,
+                    EDInstRef inst);
+
+/*!
+ @function EDInstID
+ @param instID A pointer whose target will be filled in with the LLVM identifier
+   for the instruction.
+ @param inst The instruction to be queried.
+ @result 0 on success; -1 otherwise.
+ */
+int EDInstID(unsigned *instID, EDInstRef inst);
+  
+/*!
+ @function EDInstIsBranch
+ @param inst The instruction to be queried.
+ @result 1 if the instruction is a branch instruction; 0 if it is some other
+   type of instruction; -1 if there was an error.
+ */
+int EDInstIsBranch(EDInstRef inst);
+
+/*!
+ @function EDInstIsMove
+ @param inst The instruction to be queried.
+ @result 1 if the instruction is a move instruction; 0 if it is some other
+   type of instruction; -1 if there was an error.
+ */
+int EDInstIsMove(EDInstRef inst);
+
+/*!
+ @function EDBranchTargetID
+ @param inst The instruction to be queried.
+ @result The ID of the branch target operand, suitable for use with 
+   EDCopyOperand.  -1 if no such operand exists.
+ */
+int EDBranchTargetID(EDInstRef inst);
+
+/*!
+ @function EDMoveSourceID
+ @param inst The instruction to be queried.
+ @result The ID of the move source operand, suitable for use with 
+   EDCopyOperand.  -1 if no such operand exists.
+ */
+int EDMoveSourceID(EDInstRef inst);
+
+/*!
+ @function EDMoveTargetID
+ @param inst The instruction to be queried.
+ @result The ID of the move source operand, suitable for use with 
+   EDCopyOperand.  -1 if no such operand exists.
+ */
+int EDMoveTargetID(EDInstRef inst);
+
+/*!
+ @functiongroup Creating and querying tokens
+ */
+  
+/*!
+ @function EDNumTokens
+ @param inst The instruction to be queried.
+ @result The number of tokens in the instruction, or -1 on error.
+ */
+int EDNumTokens(EDInstRef inst);
+
+/*!
+ @function EDGetToken
+ Retrieves a token from an instruction.  The token is valid until the
+ instruction is released.
+ @param token A pointer to be filled in with the token.
+ @param inst The instruction to be queried.
+ @param index The index of the token in the instruction.
+ @result 0 on success; -1 otherwise.
+ */
+int EDGetToken(EDTokenRef *token,
+               EDInstRef inst,
+               int index);
+  
+/*!
+ @function EDGetTokenString
+ Gets the disassembled text for a token.
+ @param buf A pointer whose target will be filled in with a pointer to the
+   string.  (The string becomes invalid when the token is released.)
+ @param token The token to be queried.
+ @result 0 on success; -1 otherwise.
+ */
+int EDGetTokenString(const char **buf,
+                     EDTokenRef token);
+
+/*!
+ @function EDOperandIndexForToken
+ Returns the index of the operand to which a token belongs.
+ @param token The token to be queried.
+ @result The operand index on success; -1 otherwise
+ */
+int EDOperandIndexForToken(EDTokenRef token);
+
+/*!
+ @function EDTokenIsWhitespace
+ @param token The token to be queried.
+ @result 1 if the token is whitespace; 0 if not; -1 on error.
+ */
+int EDTokenIsWhitespace(EDTokenRef token);
+  
+/*!
+ @function EDTokenIsPunctuation
+ @param token The token to be queried.
+ @result 1 if the token is punctuation; 0 if not; -1 on error.
+ */
+int EDTokenIsPunctuation(EDTokenRef token);
+
+/*!
+ @function EDTokenIsOpcode
+ @param token The token to be queried.
+ @result 1 if the token is opcode; 0 if not; -1 on error.
+ */
+int EDTokenIsOpcode(EDTokenRef token);
+
+/*!
+ @function EDTokenIsLiteral
+ @param token The token to be queried.
+ @result 1 if the token is a numeric literal; 0 if not; -1 on error.
+ */
+int EDTokenIsLiteral(EDTokenRef token);
+
+/*!
+ @function EDTokenIsRegister
+ @param token The token to be queried.
+ @result 1 if the token identifies a register; 0 if not; -1 on error.
+ */
+int EDTokenIsRegister(EDTokenRef token);
+
+/*!
+ @function EDTokenIsNegativeLiteral
+ @param token The token to be queried.
+ @result 1 if the token is a negative signed literal; 0 if not; -1 on error.
+ */
+int EDTokenIsNegativeLiteral(EDTokenRef token);
+
+/*!
+ @function EDLiteralTokenAbsoluteValue
+ @param value A pointer whose target will be filled in with the absolute value
+   of the literal.
+ @param token The token to be queried.
+ @result 0 on success; -1 otherwise.
+ */
+int EDLiteralTokenAbsoluteValue(uint64_t *value,
+                                EDTokenRef token);
+
+/*!
+ @function EDRegisterTokenValue
+ @param registerID A pointer whose target will be filled in with the LLVM 
+   register identifier for the token.
+ @param token The token to be queried.
+ @result 0 on success; -1 otherwise.
+ */
+int EDRegisterTokenValue(unsigned *registerID,
+                         EDTokenRef token);
+  
+/*!
+ @functiongroup Creating and querying operands
+ */
+  
+/*!
+ @function EDNumOperands
+ @param inst The instruction to be queried.
+ @result The number of operands in the instruction, or -1 on error.
+ */
+int EDNumOperands(EDInstRef inst);
+
+/*!
+ @function EDGetOperand
+ Retrieves an operand from an instruction.  The operand is valid until the
+ instruction is released.
+ @param operand A pointer to be filled in with the operand.
+ @param inst The instruction to be queried.
+ @param index The index of the operand in the instruction.
+ @result 0 on success; -1 otherwise.
+ */
+int EDGetOperand(EDOperandRef *operand,
+                 EDInstRef inst,
+                 int index);
+  
+/*!
+ @function EDOperandIsRegister
+ @param operand The operand to be queried.
+ @result 1 if the operand names a register; 0 if not; -1 on error.
+ */
+int EDOperandIsRegister(EDOperandRef operand);
+
+/*!
+ @function EDOperandIsImmediate
+ @param operand The operand to be queried.
+ @result 1 if the operand specifies an immediate value; 0 if not; -1 on error.
+ */
+int EDOperandIsImmediate(EDOperandRef operand);
+
+/*!
+ @function EDOperandIsMemory
+ @param operand The operand to be queried.
+ @result 1 if the operand specifies a location in memory; 0 if not; -1 on error.
+ */
+int EDOperandIsMemory(EDOperandRef operand);
+
+/*!
+ @function EDRegisterOperandValue
+ @param value A pointer whose target will be filled in with the LLVM register ID
+   of the register named by the operand.  
+ @param operand The operand to be queried.
+ @result 0 on success; -1 otherwise.
+ */
+int EDRegisterOperandValue(unsigned *value,
+                           EDOperandRef operand);
+  
+/*!
+ @function EDImmediateOperandValue
+ @param value A pointer whose target will be filled in with the value of the
+   immediate.
+ @param operand The operand to be queried.
+ @result 0 on success; -1 otherwise.
+ */
+int EDImmediateOperandValue(uint64_t *value,
+                            EDOperandRef operand);
+
+/*!
+ @function EDEvaluateOperand
+ Evaluates an operand using a client-supplied register state accessor.  Register
+ operands are evaluated by reading the value of the register; immediate operands
+ are evaluated by reporting the immediate value; memory operands are evaluated
+ by computing the target address (with only those relocations applied that were
+ already applied to the original bytes).
+ @param result A pointer whose target is to be filled with the result of
+   evaluating the operand.
+ @param operand The operand to be evaluated.
+ @param regReader The function to use when reading registers from the register
+   state.
+ @param arg An anonymous argument for client use.
+ @result 0 if the operand could be evaluated; -1 otherwise.
+ */
+int EDEvaluateOperand(uint64_t *result,
+                      EDOperandRef operand,
+                      EDRegisterReaderCallback regReader,
+                      void *arg);
+  
+#ifdef __BLOCKS__
+
+/*!
+ @typedef EDByteBlock_t
+ Block-based interface to memory from which instructions may be read.
+ @param byte A pointer whose target should be filled in with the data returned.
+ @param address The address of the byte to be read.
+ @result 0 on success; -1 otherwise.
+ */
+typedef int (^EDByteBlock_t)(uint8_t *byte, uint64_t address);
+
+/*!
+ @typedef EDRegisterBlock_t
+ Block-based interface to registers from which registers may be read.
+ @param value A pointer whose target should be filled in with the value of the
+   register.
+ @param regID The LLVM register identifier for the register to read.
+ @result 0 if the register could be read; -1 otherwise.
+ */
+typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID);
+
+/*!
+ @typedef EDTokenVisitor_t
+ Block-based handler for individual tokens.
+ @param token The current token being read.
+ @result 0 to continue; 1 to stop normally; -1 on error.
+ */
+typedef int (^EDTokenVisitor_t)(EDTokenRef token);
+
+/*! @functiongroup Block-based interfaces */
+  
+/*!
+ @function EDBlockCreateInsts
+ Gets a set of contiguous instructions from a disassembler, using a block to
+ read memory.
+ @param insts A pointer to an array that will be filled in with the
+   instructions.  Must have at least count entries.  Entries not filled in will 
+   be set to NULL.
+ @param count The maximum number of instructions to fill in.
+ @param disassembler The disassembler to use when decoding the instructions.
+ @param byteBlock The block to use when reading the instruction's machine
+   code.
+ @param address The address of the first byte of the instruction.
+ @result The number of instructions read on success; 0 otherwise.
+ */
+unsigned int EDBlockCreateInsts(EDInstRef *insts,
+                                int count,
+                                EDDisassemblerRef disassembler,
+                                EDByteBlock_t byteBlock,
+                                uint64_t address);
+
+/*!
+ @function EDBlockEvaluateOperand
+ Evaluates an operand using a block to read registers.
+ @param result A pointer whose target is to be filled with the result of
+   evaluating the operand.
+ @param operand The operand to be evaluated.
+ @param regBlock The block to use when reading registers from the register
+   state.
+ @result 0 if the operand could be evaluated; -1 otherwise.
+ */
+int EDBlockEvaluateOperand(uint64_t *result,
+                           EDOperandRef operand,
+                           EDRegisterBlock_t regBlock);
+
+/*!
+ @function EDBlockVisitTokens
+ Visits every token with a visitor.
+ @param inst The instruction with the tokens to be visited.
+ @param visitor The visitor.
+ @result 0 if the visit ended normally; -1 if the visitor encountered an error
+   or there was some other error.
+ */
+int EDBlockVisitTokens(EDInstRef inst,
+                       EDTokenVisitor_t visitor);
+
+#endif
+  
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/final/include/llvm-c/ExecutionEngine.h b/final/include/llvm-c/ExecutionEngine.h
new file mode 100644
index 00000000000..f5f40619ef0
--- /dev/null
+++ b/final/include/llvm-c/ExecutionEngine.h
@@ -0,0 +1,152 @@
+/*===-- llvm-c/ExecutionEngine.h - ExecutionEngine Lib C Iface --*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header declares the C interface to libLLVMExecutionEngine.o, which    *|
+|* implements various analyses of the LLVM IR.                                *|
+|*                                                                            *|
+|* Many exotic languages can interoperate with C code but have a harder time  *|
+|* with C++ due to name mangling. So in addition to C, this interface enables *|
+|* tools written in such languages.                                           *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_EXECUTIONENGINE_H
+#define LLVM_C_EXECUTIONENGINE_H
+
+#include "llvm-c/Core.h"
+#include "llvm-c/Target.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void LLVMLinkInJIT(void);
+void LLVMLinkInInterpreter(void);
+
+typedef struct LLVMOpaqueGenericValue *LLVMGenericValueRef;
+typedef struct LLVMOpaqueExecutionEngine *LLVMExecutionEngineRef;
+
+/*===-- Operations on generic values --------------------------------------===*/
+
+LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty,
+                                                unsigned long long N,
+                                                LLVMBool IsSigned);
+
+LLVMGenericValueRef LLVMCreateGenericValueOfPointer(void *P);
+
+LLVMGenericValueRef LLVMCreateGenericValueOfFloat(LLVMTypeRef Ty, double N);
+
+unsigned LLVMGenericValueIntWidth(LLVMGenericValueRef GenValRef);
+
+unsigned long long LLVMGenericValueToInt(LLVMGenericValueRef GenVal,
+                                         LLVMBool IsSigned);
+
+void *LLVMGenericValueToPointer(LLVMGenericValueRef GenVal);
+
+double LLVMGenericValueToFloat(LLVMTypeRef TyRef, LLVMGenericValueRef GenVal);
+
+void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal);
+
+/*===-- Operations on execution engines -----------------------------------===*/
+
+LLVMBool LLVMCreateExecutionEngineForModule(LLVMExecutionEngineRef *OutEE,
+                                            LLVMModuleRef M,
+                                            char **OutError);
+
+LLVMBool LLVMCreateInterpreterForModule(LLVMExecutionEngineRef *OutInterp,
+                                        LLVMModuleRef M,
+                                        char **OutError);
+
+LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
+                                        LLVMModuleRef M,
+                                        unsigned OptLevel,
+                                        char **OutError);
+
+/** Deprecated: Use LLVMCreateExecutionEngineForModule instead. */
+LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
+                                   LLVMModuleProviderRef MP,
+                                   char **OutError);
+
+/** Deprecated: Use LLVMCreateInterpreterForModule instead. */
+LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
+                               LLVMModuleProviderRef MP,
+                               char **OutError);
+
+/** Deprecated: Use LLVMCreateJITCompilerForModule instead. */
+LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
+                               LLVMModuleProviderRef MP,
+                               unsigned OptLevel,
+                               char **OutError);
+
+void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE);
+
+void LLVMRunStaticConstructors(LLVMExecutionEngineRef EE);
+
+void LLVMRunStaticDestructors(LLVMExecutionEngineRef EE);
+
+int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F,
+                          unsigned ArgC, const char * const *ArgV,
+                          const char * const *EnvP);
+
+LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F,
+                                    unsigned NumArgs,
+                                    LLVMGenericValueRef *Args);
+
+void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F);
+
+void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M);
+
+/** Deprecated: Use LLVMAddModule instead. */
+void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP);
+
+LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M,
+                          LLVMModuleRef *OutMod, char **OutError);
+
+/** Deprecated: Use LLVMRemoveModule instead. */
+LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
+                                  LLVMModuleProviderRef MP,
+                                  LLVMModuleRef *OutMod, char **OutError);
+
+LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
+                          LLVMValueRef *OutFn);
+
+void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn);
+
+LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE);
+
+void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
+                          void* Addr);
+
+void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global);
+
+#ifdef __cplusplus
+}
+
+namespace llvm {
+  struct GenericValue;
+  class ExecutionEngine;
+  
+  #define DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ty, ref)   \
+    inline ty *unwrap(ref P) {                          \
+      return reinterpret_cast<ty*>(P);                  \
+    }                                                   \
+                                                        \
+    inline ref wrap(const ty *P) {                      \
+      return reinterpret_cast<ref>(const_cast<ty*>(P)); \
+    }
+  
+  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(GenericValue,    LLVMGenericValueRef   )
+  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(ExecutionEngine, LLVMExecutionEngineRef)
+  
+  #undef DEFINE_SIMPLE_CONVERSION_FUNCTIONS
+}
+  
+#endif /* defined(__cplusplus) */
+
+#endif
diff --git a/final/include/llvm-c/Initialization.h b/final/include/llvm-c/Initialization.h
new file mode 100644
index 00000000000..3b59abbec03
--- /dev/null
+++ b/final/include/llvm-c/Initialization.h
@@ -0,0 +1,40 @@
+/*===-- llvm-c/Initialization.h - Initialization C Interface ------*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header declares the C interface to LLVM initialization routines,      *|
+|* which must be called before you can use the functionality provided by      *|
+|* the corresponding LLVM library.                                            *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_INITIALIZEPASSES_H
+#define LLVM_C_INITIALIZEPASSES_H
+
+#include "llvm-c/Core.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void LLVMInitializeCore(LLVMPassRegistryRef R);
+void LLVMInitializeTransformUtils(LLVMPassRegistryRef R);
+void LLVMInitializeScalarOpts(LLVMPassRegistryRef R);
+void LLVMInitializeInstCombine(LLVMPassRegistryRef R);
+void LLVMInitializeIPO(LLVMPassRegistryRef R);
+void LLVMInitializeInstrumentation(LLVMPassRegistryRef R);
+void LLVMInitializeAnalysis(LLVMPassRegistryRef R);
+void LLVMInitializeIPA(LLVMPassRegistryRef R);
+void LLVMInitializeCodeGen(LLVMPassRegistryRef R);
+void LLVMInitializeTarget(LLVMPassRegistryRef R);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/final/include/llvm-c/LinkTimeOptimizer.h b/final/include/llvm-c/LinkTimeOptimizer.h
new file mode 100644
index 00000000000..fca394681c7
--- /dev/null
+++ b/final/include/llvm-c/LinkTimeOptimizer.h
@@ -0,0 +1,58 @@
+//===-- llvm/LinkTimeOptimizer.h - LTO Public C Interface -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This header provides a C API to use the LLVM link time optimization
+// library. This is intended to be used by linkers which are C-only in
+// their implementation for performing LTO.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LTO_CAPI_H__
+#define __LTO_CAPI_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+  /// This provides a dummy type for pointers to the LTO object.
+  typedef void* llvm_lto_t;
+
+  /// This provides a C-visible enumerator to manage status codes.
+  /// This should map exactly onto the C++ enumerator LTOStatus.
+  typedef enum llvm_lto_status {
+    LLVM_LTO_UNKNOWN,
+    LLVM_LTO_OPT_SUCCESS,
+    LLVM_LTO_READ_SUCCESS,
+    LLVM_LTO_READ_FAILURE,
+    LLVM_LTO_WRITE_FAILURE,
+    LLVM_LTO_NO_TARGET,
+    LLVM_LTO_NO_WORK,
+    LLVM_LTO_MODULE_MERGE_FAILURE,
+    LLVM_LTO_ASM_FAILURE,
+
+    //  Added C-specific error codes
+    LLVM_LTO_NULL_OBJECT
+  } llvm_lto_status_t;
+ 
+  /// This provides C interface to initialize link time optimizer. This allows
+  /// linker to use dlopen() interface to dynamically load LinkTimeOptimizer.
+  /// extern "C" helps, because dlopen() interface uses name to find the symbol.
+  extern llvm_lto_t llvm_create_optimizer(void);
+  extern void llvm_destroy_optimizer(llvm_lto_t lto);
+
+  extern llvm_lto_status_t llvm_read_object_file
+    (llvm_lto_t lto, const char* input_filename);
+  extern llvm_lto_status_t llvm_optimize_modules
+    (llvm_lto_t lto, const char* output_filename);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/final/include/llvm-c/Target.h b/final/include/llvm-c/Target.h
new file mode 100644
index 00000000000..2cd15c3fa30
--- /dev/null
+++ b/final/include/llvm-c/Target.h
@@ -0,0 +1,172 @@
+/*===-- llvm-c/Target.h - Target Lib C Iface --------------------*- C++ -*-===*/
+/*                                                                            */
+/*                     The LLVM Compiler Infrastructure                       */
+/*                                                                            */
+/* This file is distributed under the University of Illinois Open Source      */
+/* License. See LICENSE.TXT for details.                                      */
+/*                                                                            */
+/*===----------------------------------------------------------------------===*/
+/*                                                                            */
+/* This header declares the C interface to libLLVMTarget.a, which             */
+/* implements target information.                                             */
+/*                                                                            */
+/* Many exotic languages can interoperate with C code but have a harder time  */
+/* with C++ due to name mangling. So in addition to C, this interface enables */
+/* tools written in such languages.                                           */
+/*                                                                            */
+/*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_TARGET_H
+#define LLVM_C_TARGET_H
+
+#include "llvm-c/Core.h"
+#include "llvm/Config/llvm-config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum LLVMByteOrdering { LLVMBigEndian, LLVMLittleEndian };
+
+typedef struct LLVMOpaqueTargetData *LLVMTargetDataRef;
+typedef struct LLVMStructLayout *LLVMStructLayoutRef;
+
+/* Declare all of the target-initialization functions that are available. */
+#define LLVM_TARGET(TargetName) \
+  void LLVMInitialize##TargetName##TargetInfo(void);
+#include "llvm/Config/Targets.def"
+#undef LLVM_TARGET  /* Explicit undef to make SWIG happier */
+  
+#define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##Target(void);
+#include "llvm/Config/Targets.def"
+#undef LLVM_TARGET  /* Explicit undef to make SWIG happier */
+
+/** LLVMInitializeAllTargetInfos - The main program should call this function if
+    it wants access to all available targets that LLVM is configured to
+    support. */
+static inline void LLVMInitializeAllTargetInfos(void) {
+#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetInfo();
+#include "llvm/Config/Targets.def"
+#undef LLVM_TARGET  /* Explicit undef to make SWIG happier */
+}
+
+/** LLVMInitializeAllTargets - The main program should call this function if it
+    wants to link in all available targets that LLVM is configured to
+    support. */
+static inline void LLVMInitializeAllTargets(void) {
+#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##Target();
+#include "llvm/Config/Targets.def"
+#undef LLVM_TARGET  /* Explicit undef to make SWIG happier */
+}
+  
+/** LLVMInitializeNativeTarget - The main program should call this function to
+    initialize the native target corresponding to the host.  This is useful 
+    for JIT applications to ensure that the target gets linked in correctly. */
+static inline LLVMBool LLVMInitializeNativeTarget(void) {
+  /* If we have a native target, initialize it to ensure it is linked in. */
+#ifdef LLVM_NATIVE_TARGET
+  LLVM_NATIVE_TARGETINFO();
+  LLVM_NATIVE_TARGET();
+  return 0;
+#else
+  return 1;
+#endif
+}  
+
+/*===-- Target Data -------------------------------------------------------===*/
+
+/** Creates target data from a target layout string.
+    See the constructor llvm::TargetData::TargetData. */
+LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep);
+
+/** Adds target data information to a pass manager. This does not take ownership
+    of the target data.
+    See the method llvm::PassManagerBase::add. */
+void LLVMAddTargetData(LLVMTargetDataRef, LLVMPassManagerRef);
+
+/** Converts target data to a target layout string. The string must be disposed
+    with LLVMDisposeMessage.
+    See the constructor llvm::TargetData::TargetData. */
+char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef);
+
+/** Returns the byte order of a target, either LLVMBigEndian or
+    LLVMLittleEndian.
+    See the method llvm::TargetData::isLittleEndian. */
+enum LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef);
+
+/** Returns the pointer size in bytes for a target.
+    See the method llvm::TargetData::getPointerSize. */
+unsigned LLVMPointerSize(LLVMTargetDataRef);
+
+/** Returns the integer type that is the same size as a pointer on a target.
+    See the method llvm::TargetData::getIntPtrType. */
+LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef);
+
+/** Computes the size of a type in bytes for a target.
+    See the method llvm::TargetData::getTypeSizeInBits. */
+unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef, LLVMTypeRef);
+
+/** Computes the storage size of a type in bytes for a target.
+    See the method llvm::TargetData::getTypeStoreSize. */
+unsigned long long LLVMStoreSizeOfType(LLVMTargetDataRef, LLVMTypeRef);
+
+/** Computes the ABI size of a type in bytes for a target.
+    See the method llvm::TargetData::getTypeAllocSize. */
+unsigned long long LLVMABISizeOfType(LLVMTargetDataRef, LLVMTypeRef);
+
+/** Computes the ABI alignment of a type in bytes for a target.
+    See the method llvm::TargetData::getTypeABISize. */
+unsigned LLVMABIAlignmentOfType(LLVMTargetDataRef, LLVMTypeRef);
+
+/** Computes the call frame alignment of a type in bytes for a target.
+    See the method llvm::TargetData::getTypeABISize. */
+unsigned LLVMCallFrameAlignmentOfType(LLVMTargetDataRef, LLVMTypeRef);
+
+/** Computes the preferred alignment of a type in bytes for a target.
+    See the method llvm::TargetData::getTypeABISize. */
+unsigned LLVMPreferredAlignmentOfType(LLVMTargetDataRef, LLVMTypeRef);
+
+/** Computes the preferred alignment of a global variable in bytes for a target.
+    See the method llvm::TargetData::getPreferredAlignment. */
+unsigned LLVMPreferredAlignmentOfGlobal(LLVMTargetDataRef,
+                                        LLVMValueRef GlobalVar);
+
+/** Computes the structure element that contains the byte offset for a target.
+    See the method llvm::StructLayout::getElementContainingOffset. */
+unsigned LLVMElementAtOffset(LLVMTargetDataRef, LLVMTypeRef StructTy,
+                             unsigned long long Offset);
+
+/** Computes the byte offset of the indexed struct element for a target.
+    See the method llvm::StructLayout::getElementContainingOffset. */
+unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef, LLVMTypeRef StructTy,
+                                       unsigned Element);
+
+/** Struct layouts are speculatively cached. If a TargetDataRef is alive when
+    types are being refined and removed, this method must be called whenever a
+    struct type is removed to avoid a dangling pointer in this cache.
+    See the method llvm::TargetData::InvalidateStructLayoutInfo. */
+void LLVMInvalidateStructLayout(LLVMTargetDataRef, LLVMTypeRef StructTy);
+
+/** Deallocates a TargetData.
+    See the destructor llvm::TargetData::~TargetData. */
+void LLVMDisposeTargetData(LLVMTargetDataRef);
+
+
+#ifdef __cplusplus
+}
+
+namespace llvm {
+  class TargetData;
+
+  inline TargetData *unwrap(LLVMTargetDataRef P) {
+    return reinterpret_cast<TargetData*>(P);
+  }
+  
+  inline LLVMTargetDataRef wrap(const TargetData *P) {
+    return reinterpret_cast<LLVMTargetDataRef>(const_cast<TargetData*>(P));
+  }
+}
+
+#endif /* defined(__cplusplus) */
+
+#endif
diff --git a/final/include/llvm-c/Transforms/IPO.h b/final/include/llvm-c/Transforms/IPO.h
new file mode 100644
index 00000000000..d16e858bca3
--- /dev/null
+++ b/final/include/llvm-c/Transforms/IPO.h
@@ -0,0 +1,76 @@
+/*===-- IPO.h - Interprocedural Transformations C Interface -----*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header declares the C interface to libLLVMIPO.a, which implements     *|
+|* various interprocedural transformations of the LLVM IR.                    *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_TRANSFORMS_IPO_H
+#define LLVM_C_TRANSFORMS_IPO_H
+
+#include "llvm-c/Core.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** See llvm::createArgumentPromotionPass function. */
+void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM);
+
+/** See llvm::createConstantMergePass function. */
+void LLVMAddConstantMergePass(LLVMPassManagerRef PM);
+
+/** See llvm::createDeadArgEliminationPass function. */
+void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM);
+
+/** See llvm::createDeadTypeEliminationPass function. */
+void LLVMAddDeadTypeEliminationPass(LLVMPassManagerRef PM);
+
+/** See llvm::createFunctionAttrsPass function. */
+void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM);
+
+/** See llvm::createFunctionInliningPass function. */
+void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM);
+
+/** See llvm::createGlobalDCEPass function. */
+void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM);
+
+/** See llvm::createGlobalOptimizerPass function. */
+void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM);
+
+/** See llvm::createIPConstantPropagationPass function. */
+void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM);
+
+/** See llvm::createLowerSetJmpPass function. */
+void LLVMAddLowerSetJmpPass(LLVMPassManagerRef PM);
+
+/** See llvm::createPruneEHPass function. */
+void LLVMAddPruneEHPass(LLVMPassManagerRef PM);
+
+/** See llvm::createIPSCCPPass function. */
+void LLVMAddIPSCCPPass(LLVMPassManagerRef PM);
+
+/** See llvm::createInternalizePass function. */
+void LLVMAddInternalizePass(LLVMPassManagerRef, unsigned AllButMain);
+
+// FIXME: Remove in LLVM 3.0.
+void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM);
+
+/** See llvm::createStripDeadPrototypesPass function. */
+void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM);
+
+/** See llvm::createStripSymbolsPass function. */
+void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM);
+
+#ifdef __cplusplus
+}
+#endif /* defined(__cplusplus) */
+
+#endif
diff --git a/final/include/llvm-c/Transforms/Scalar.h b/final/include/llvm-c/Transforms/Scalar.h
new file mode 100644
index 00000000000..2ddfb38171c
--- /dev/null
+++ b/final/include/llvm-c/Transforms/Scalar.h
@@ -0,0 +1,102 @@
+/*===-- Scalar.h - Scalar Transformation Library C Interface ----*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header declares the C interface to libLLVMScalarOpts.a, which         *|
+|* implements various scalar transformations of the LLVM IR.                  *|
+|*                                                                            *|
+|* Many exotic languages can interoperate with C code but have a harder time  *|
+|* with C++ due to name mangling. So in addition to C, this interface enables *|
+|* tools written in such languages.                                           *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_TRANSFORMS_SCALAR_H
+#define LLVM_C_TRANSFORMS_SCALAR_H
+
+#include "llvm-c/Core.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** See llvm::createAggressiveDCEPass function. */
+void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM);
+
+/** See llvm::createCFGSimplificationPass function. */
+void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM);
+
+/** See llvm::createDeadStoreEliminationPass function. */
+void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM);
+
+/** See llvm::createGVNPass function. */
+void LLVMAddGVNPass(LLVMPassManagerRef PM);
+
+/** See llvm::createIndVarSimplifyPass function. */
+void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM);
+
+/** See llvm::createInstructionCombiningPass function. */
+void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM);
+
+/** See llvm::createJumpThreadingPass function. */
+void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM);
+
+/** See llvm::createLICMPass function. */
+void LLVMAddLICMPass(LLVMPassManagerRef PM);
+
+/** See llvm::createLoopDeletionPass function. */
+void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM);
+
+/** See llvm::createLoopRotatePass function. */
+void LLVMAddLoopRotatePass(LLVMPassManagerRef PM);
+
+/** See llvm::createLoopUnrollPass function. */
+void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM);
+
+/** See llvm::createLoopUnswitchPass function. */
+void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM);
+
+/** See llvm::createMemCpyOptPass function. */
+void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM);
+
+/** See llvm::createPromoteMemoryToRegisterPass function. */
+void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM);
+
+/** See llvm::createReassociatePass function. */
+void LLVMAddReassociatePass(LLVMPassManagerRef PM);
+
+/** See llvm::createSCCPPass function. */
+void LLVMAddSCCPPass(LLVMPassManagerRef PM);
+
+/** See llvm::createScalarReplAggregatesPass function. */
+void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM);
+
+/** See llvm::createScalarReplAggregatesPass function. */
+void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
+                                                  int Threshold);
+
+/** See llvm::createSimplifyLibCallsPass function. */
+void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM);
+
+/** See llvm::createTailCallEliminationPass function. */
+void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM);
+
+/** See llvm::createConstantPropagationPass function. */
+void LLVMAddConstantPropagationPass(LLVMPassManagerRef PM);
+
+/** See llvm::demotePromoteMemoryToRegisterPass function. */
+void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM);
+
+/** See llvm::createVerifierPass function. */
+void LLVMAddVerifierPass(LLVMPassManagerRef PM);
+
+#ifdef __cplusplus
+}
+#endif /* defined(__cplusplus) */
+
+#endif
diff --git a/final/include/llvm-c/lto.h b/final/include/llvm-c/lto.h
new file mode 100644
index 00000000000..1c42ce0cec7
--- /dev/null
+++ b/final/include/llvm-c/lto.h
@@ -0,0 +1,278 @@
+/*===-- llvm-c/lto.h - LTO Public C Interface ---------------------*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header provides public interface to an abstract link time optimization*|
+|* library.  LLVM provides an implementation of this interface for use with   *|
+|* llvm bitcode files.                                                        *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LTO_H
+#define LTO_H  1
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <unistd.h>
+
+#define LTO_API_VERSION 4
+
+typedef enum {
+    LTO_SYMBOL_ALIGNMENT_MASK              = 0x0000001F, /* log2 of alignment */
+    LTO_SYMBOL_PERMISSIONS_MASK            = 0x000000E0,    
+    LTO_SYMBOL_PERMISSIONS_CODE            = 0x000000A0,    
+    LTO_SYMBOL_PERMISSIONS_DATA            = 0x000000C0,    
+    LTO_SYMBOL_PERMISSIONS_RODATA          = 0x00000080,    
+    LTO_SYMBOL_DEFINITION_MASK             = 0x00000700,    
+    LTO_SYMBOL_DEFINITION_REGULAR          = 0x00000100,    
+    LTO_SYMBOL_DEFINITION_TENTATIVE        = 0x00000200,    
+    LTO_SYMBOL_DEFINITION_WEAK             = 0x00000300,    
+    LTO_SYMBOL_DEFINITION_UNDEFINED        = 0x00000400,    
+    LTO_SYMBOL_DEFINITION_WEAKUNDEF        = 0x00000500,
+    LTO_SYMBOL_SCOPE_MASK                  = 0x00003800,    
+    LTO_SYMBOL_SCOPE_INTERNAL              = 0x00000800,    
+    LTO_SYMBOL_SCOPE_HIDDEN                = 0x00001000,    
+    LTO_SYMBOL_SCOPE_PROTECTED             = 0x00002000,    
+    LTO_SYMBOL_SCOPE_DEFAULT               = 0x00001800,
+    LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN = 0x00002800
+} lto_symbol_attributes;
+
+typedef enum {
+    LTO_DEBUG_MODEL_NONE         = 0,
+    LTO_DEBUG_MODEL_DWARF        = 1
+} lto_debug_model;
+
+typedef enum {
+    LTO_CODEGEN_PIC_MODEL_STATIC         = 0,
+    LTO_CODEGEN_PIC_MODEL_DYNAMIC        = 1,
+    LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC = 2
+} lto_codegen_model;
+
+
+/** opaque reference to a loaded object module */
+typedef struct LTOModule*         lto_module_t;
+
+/** opaque reference to a code generator */
+typedef struct LTOCodeGenerator*  lto_code_gen_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Returns a printable string.
+ */
+extern const char*
+lto_get_version(void);
+
+
+/**
+ * Returns the last error string or NULL if last operation was sucessful.
+ */
+extern const char*
+lto_get_error_message(void);
+
+/**
+ * Checks if a file is a loadable object file.
+ */
+extern bool
+lto_module_is_object_file(const char* path);
+
+
+/**
+ * Checks if a file is a loadable object compiled for requested target.
+ */
+extern bool
+lto_module_is_object_file_for_target(const char* path, 
+                                     const char* target_triple_prefix);
+
+
+/**
+ * Checks if a buffer is a loadable object file.
+ */
+extern bool
+lto_module_is_object_file_in_memory(const void* mem, size_t length);
+
+
+/**
+ * Checks if a buffer is a loadable object compiled for requested target.
+ */
+extern bool
+lto_module_is_object_file_in_memory_for_target(const void* mem, size_t length, 
+                                              const char* target_triple_prefix);
+
+
+/**
+ * Loads an object file from disk.
+ * Returns NULL on error (check lto_get_error_message() for details).
+ */
+extern lto_module_t
+lto_module_create(const char* path);
+
+
+/**
+ * Loads an object file from memory.
+ * Returns NULL on error (check lto_get_error_message() for details).
+ */
+extern lto_module_t
+lto_module_create_from_memory(const void* mem, size_t length);
+
+/**
+ * Loads an object file from disk. The seek point of fd is not preserved.
+ * Returns NULL on error (check lto_get_error_message() for details).
+ */
+extern lto_module_t
+lto_module_create_from_fd(int fd, const char *path, off_t size);
+
+
+/**
+ * Frees all memory internally allocated by the module.
+ * Upon return the lto_module_t is no longer valid.
+ */
+extern void
+lto_module_dispose(lto_module_t mod);
+
+
+/**
+ * Returns triple string which the object module was compiled under.
+ */
+extern const char*
+lto_module_get_target_triple(lto_module_t mod);
+
+/**
+ * Sets triple string with which the object will be codegened.
+ */
+extern void
+lto_module_set_target_triple(lto_module_t mod, const char *triple);
+
+
+/**
+ * Returns the number of symbols in the object module.
+ */
+extern unsigned int
+lto_module_get_num_symbols(lto_module_t mod);
+
+
+/**
+ * Returns the name of the ith symbol in the object module.
+ */
+extern const char*
+lto_module_get_symbol_name(lto_module_t mod, unsigned int index);
+
+
+/**
+ * Returns the attributes of the ith symbol in the object module.
+ */
+extern lto_symbol_attributes
+lto_module_get_symbol_attribute(lto_module_t mod, unsigned int index);
+
+
+/**
+ * Instantiates a code generator.
+ * Returns NULL on error (check lto_get_error_message() for details).
+ */
+extern lto_code_gen_t
+lto_codegen_create(void);
+
+
+/**
+ * Frees all code generator and all memory it internally allocated.
+ * Upon return the lto_code_gen_t is no longer valid.
+ */
+extern void
+lto_codegen_dispose(lto_code_gen_t);
+
+
+
+/**
+ * Add an object module to the set of modules for which code will be generated.
+ * Returns true on error (check lto_get_error_message() for details).
+ */
+extern bool
+lto_codegen_add_module(lto_code_gen_t cg, lto_module_t mod);
+
+
+
+/**
+ * Sets if debug info should be generated.
+ * Returns true on error (check lto_get_error_message() for details).
+ */
+extern bool
+lto_codegen_set_debug_model(lto_code_gen_t cg, lto_debug_model);
+
+
+/**
+ * Sets which PIC code model to generated.
+ * Returns true on error (check lto_get_error_message() for details).
+ */
+extern bool
+lto_codegen_set_pic_model(lto_code_gen_t cg, lto_codegen_model);
+
+
+/**
+ * Sets the cpu to generate code for.
+ */
+extern void
+lto_codegen_set_cpu(lto_code_gen_t cg, const char *cpu);
+
+
+/**
+ * Sets the location of the assembler tool to run. If not set, libLTO
+ * will use gcc to invoke the assembler.
+ */
+extern void
+lto_codegen_set_assembler_path(lto_code_gen_t cg, const char* path);
+
+/**
+ * Sets extra arguments that libLTO should pass to the assembler.
+ */
+extern void
+lto_codegen_set_assembler_args(lto_code_gen_t cg, const char **args,
+                               int nargs);
+
+/**
+ * Adds to a list of all global symbols that must exist in the final
+ * generated code.  If a function is not listed, it might be
+ * inlined into every usage and optimized away.
+ */
+extern void
+lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg, const char* symbol);
+
+
+/**
+ * Writes a new object file at the specified path that contains the
+ * merged contents of all modules added so far.
+ * Returns true on error (check lto_get_error_message() for details).
+ */
+extern bool
+lto_codegen_write_merged_modules(lto_code_gen_t cg, const char* path);
+
+
+/**
+ * Generates code for all added modules into one native object file.
+ * On sucess returns a pointer to a generated mach-o/ELF buffer and
+ * length set to the buffer size.  The buffer is owned by the 
+ * lto_code_gen_t and will be freed when lto_codegen_dispose()
+ * is called, or lto_codegen_compile() is called again.
+ * On failure, returns NULL (check lto_get_error_message() for details).
+ */
+extern const void*
+lto_codegen_compile(lto_code_gen_t cg, size_t* length);
+
+
+/**
+ * Sets options to help debug codegen bugs.
+ */
+extern void
+lto_codegen_debug_options(lto_code_gen_t cg, const char *);
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif
diff --git a/final/include/llvm/ADT/APFloat.h b/final/include/llvm/ADT/APFloat.h
new file mode 100644
index 00000000000..ca4138b825a
--- /dev/null
+++ b/final/include/llvm/ADT/APFloat.h
@@ -0,0 +1,452 @@
+//== llvm/Support/APFloat.h - Arbitrary Precision Floating Point -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares a class to represent arbitrary precision floating
+// point values and provide a variety of arithmetic operations on them.
+//
+//===----------------------------------------------------------------------===//
+
+/*  A self-contained host- and target-independent arbitrary-precision
+    floating-point software implementation.  It uses bignum integer
+    arithmetic as provided by static functions in the APInt class.
+    The library will work with bignum integers whose parts are any
+    unsigned type at least 16 bits wide, but 64 bits is recommended.
+
+    Written for clarity rather than speed, in particular with a view
+    to use in the front-end of a cross compiler so that target
+    arithmetic can be correctly performed on the host.  Performance
+    should nonetheless be reasonable, particularly for its intended
+    use.  It may be useful as a base implementation for a run-time
+    library during development of a faster target-specific one.
+
+    All 5 rounding modes in the IEEE-754R draft are handled correctly
+    for all implemented operations.  Currently implemented operations
+    are add, subtract, multiply, divide, fused-multiply-add,
+    conversion-to-float, conversion-to-integer and
+    conversion-from-integer.  New rounding modes (e.g. away from zero)
+    can be added with three or four lines of code.
+
+    Four formats are built-in: IEEE single precision, double
+    precision, quadruple precision, and x87 80-bit extended double
+    (when operating with full extended precision).  Adding a new
+    format that obeys IEEE semantics only requires adding two lines of
+    code: a declaration and definition of the format.
+
+    All operations return the status of that operation as an exception
+    bit-mask, so multiple operations can be done consecutively with
+    their results or-ed together.  The returned status can be useful
+    for compiler diagnostics; e.g., inexact, underflow and overflow
+    can be easily diagnosed on constant folding, and compiler
+    optimizers can determine what exceptions would be raised by
+    folding operations and optimize, or perhaps not optimize,
+    accordingly.
+
+    At present, underflow tininess is detected after rounding; it
+    should be straight forward to add support for the before-rounding
+    case too.
+
+    The library reads hexadecimal floating point numbers as per C99,
+    and correctly rounds if necessary according to the specified
+    rounding mode.  Syntax is required to have been validated by the
+    caller.  It also converts floating point numbers to hexadecimal
+    text as per the C99 %a and %A conversions.  The output precision
+    (or alternatively the natural minimal precision) can be specified;
+    if the requested precision is less than the natural precision the
+    output is correctly rounded for the specified rounding mode.
+
+    It also reads decimal floating point numbers and correctly rounds
+    according to the specified rounding mode.
+
+    Conversion to decimal text is not currently implemented.
+
+    Non-zero finite numbers are represented internally as a sign bit,
+    a 16-bit signed exponent, and the significand as an array of
+    integer parts.  After normalization of a number of precision P the
+    exponent is within the range of the format, and if the number is
+    not denormal the P-th bit of the significand is set as an explicit
+    integer bit.  For denormals the most significant bit is shifted
+    right so that the exponent is maintained at the format's minimum,
+    so that the smallest denormal has just the least significant bit
+    of the significand set.  The sign of zeroes and infinities is
+    significant; the exponent and significand of such numbers is not
+    stored, but has a known implicit (deterministic) value: 0 for the
+    significands, 0 for zero exponent, all 1 bits for infinity
+    exponent.  For NaNs the sign and significand are deterministic,
+    although not really meaningful, and preserved in non-conversion
+    operations.  The exponent is implicitly all 1 bits.
+
+    TODO
+    ====
+
+    Some features that may or may not be worth adding:
+
+    Binary to decimal conversion (hard).
+
+    Optional ability to detect underflow tininess before rounding.
+
+    New formats: x87 in single and double precision mode (IEEE apart
+    from extended exponent range) (hard).
+
+    New operations: sqrt, IEEE remainder, C90 fmod, nextafter,
+    nexttoward.
+*/
+
+#ifndef LLVM_FLOAT_H
+#define LLVM_FLOAT_H
+
+// APInt contains static functions implementing bignum arithmetic.
+#include "llvm/ADT/APInt.h"
+
+namespace llvm {
+
+  /* Exponents are stored as signed numbers.  */
+  typedef signed short exponent_t;
+
+  struct fltSemantics;
+  class StringRef;
+
+  /* When bits of a floating point number are truncated, this enum is
+     used to indicate what fraction of the LSB those bits represented.
+     It essentially combines the roles of guard and sticky bits.  */
+  enum lostFraction {           // Example of truncated bits:
+    lfExactlyZero,              // 000000
+    lfLessThanHalf,             // 0xxxxx  x's not all zero
+    lfExactlyHalf,              // 100000
+    lfMoreThanHalf              // 1xxxxx  x's not all zero
+  };
+
+  class APFloat {
+  public:
+
+    /* We support the following floating point semantics.  */
+    static const fltSemantics IEEEhalf;
+    static const fltSemantics IEEEsingle;
+    static const fltSemantics IEEEdouble;
+    static const fltSemantics IEEEquad;
+    static const fltSemantics PPCDoubleDouble;
+    static const fltSemantics x87DoubleExtended;
+    /* And this pseudo, used to construct APFloats that cannot
+       conflict with anything real. */
+    static const fltSemantics Bogus;
+
+    static unsigned int semanticsPrecision(const fltSemantics &);
+
+    /* Floating point numbers have a four-state comparison relation.  */
+    enum cmpResult {
+      cmpLessThan,
+      cmpEqual,
+      cmpGreaterThan,
+      cmpUnordered
+    };
+
+    /* IEEE-754R gives five rounding modes.  */
+    enum roundingMode {
+      rmNearestTiesToEven,
+      rmTowardPositive,
+      rmTowardNegative,
+      rmTowardZero,
+      rmNearestTiesToAway
+    };
+
+    // Operation status.  opUnderflow or opOverflow are always returned
+    // or-ed with opInexact.
+    enum opStatus {
+      opOK          = 0x00,
+      opInvalidOp   = 0x01,
+      opDivByZero   = 0x02,
+      opOverflow    = 0x04,
+      opUnderflow   = 0x08,
+      opInexact     = 0x10
+    };
+
+    // Category of internally-represented number.
+    enum fltCategory {
+      fcInfinity,
+      fcNaN,
+      fcNormal,
+      fcZero
+    };
+
+    enum uninitializedTag {
+      uninitialized
+    };
+
+    // Constructors.
+    APFloat(const fltSemantics &); // Default construct to 0.0
+    APFloat(const fltSemantics &, StringRef);
+    APFloat(const fltSemantics &, integerPart);
+    APFloat(const fltSemantics &, fltCategory, bool negative);
+    APFloat(const fltSemantics &, uninitializedTag);
+    explicit APFloat(double d);
+    explicit APFloat(float f);
+    explicit APFloat(const APInt &, bool isIEEE = false);
+    APFloat(const APFloat &);
+    ~APFloat();
+
+    // Convenience "constructors"
+    static APFloat getZero(const fltSemantics &Sem, bool Negative = false) {
+      return APFloat(Sem, fcZero, Negative);
+    }
+    static APFloat getInf(const fltSemantics &Sem, bool Negative = false) {
+      return APFloat(Sem, fcInfinity, Negative);
+    }
+
+    /// getNaN - Factory for QNaN values.
+    ///
+    /// \param Negative - True iff the NaN generated should be negative.
+    /// \param type - The unspecified fill bits for creating the NaN, 0 by
+    /// default.  The value is truncated as necessary.
+    static APFloat getNaN(const fltSemantics &Sem, bool Negative = false,
+                          unsigned type = 0) {
+      if (type) {
+        APInt fill(64, type);
+        return getQNaN(Sem, Negative, &fill);
+      } else {
+        return getQNaN(Sem, Negative, 0);
+      }
+    }
+
+    /// getQNan - Factory for QNaN values.
+    static APFloat getQNaN(const fltSemantics &Sem,
+                           bool Negative = false,
+                           const APInt *payload = 0) {
+      return makeNaN(Sem, false, Negative, payload);
+    }
+
+    /// getSNan - Factory for SNaN values.
+    static APFloat getSNaN(const fltSemantics &Sem,
+                           bool Negative = false,
+                           const APInt *payload = 0) {
+      return makeNaN(Sem, true, Negative, payload);
+    }
+
+    /// getLargest - Returns the largest finite number in the given
+    /// semantics.
+    ///
+    /// \param Negative - True iff the number should be negative
+    static APFloat getLargest(const fltSemantics &Sem, bool Negative = false);
+
+    /// getSmallest - Returns the smallest (by magnitude) finite number
+    /// in the given semantics.  Might be denormalized, which implies a
+    /// relative loss of precision.
+    ///
+    /// \param Negative - True iff the number should be negative
+    static APFloat getSmallest(const fltSemantics &Sem, bool Negative = false);
+
+    /// getSmallestNormalized - Returns the smallest (by magnitude)
+    /// normalized finite number in the given semantics.
+    ///
+    /// \param Negative - True iff the number should be negative
+    static APFloat getSmallestNormalized(const fltSemantics &Sem,
+                                         bool Negative = false);
+
+    /// getAllOnesValue - Returns a float which is bitcasted from
+    /// an all one value int.
+    ///
+    /// \param BitWidth - Select float type
+    /// \param isIEEE   - If 128 bit number, select between PPC and IEEE
+    static APFloat getAllOnesValue(unsigned BitWidth, bool isIEEE = false);
+
+    /// Profile - Used to insert APFloat objects, or objects that contain
+    ///  APFloat objects, into FoldingSets.
+    void Profile(FoldingSetNodeID& NID) const;
+
+    /// @brief Used by the Bitcode serializer to emit APInts to Bitcode.
+    void Emit(Serializer& S) const;
+
+    /// @brief Used by the Bitcode deserializer to deserialize APInts.
+    static APFloat ReadVal(Deserializer& D);
+
+    /* Arithmetic.  */
+    opStatus add(const APFloat &, roundingMode);
+    opStatus subtract(const APFloat &, roundingMode);
+    opStatus multiply(const APFloat &, roundingMode);
+    opStatus divide(const APFloat &, roundingMode);
+    /* IEEE remainder. */
+    opStatus remainder(const APFloat &);
+    /* C fmod, or llvm frem. */
+    opStatus mod(const APFloat &, roundingMode);
+    opStatus fusedMultiplyAdd(const APFloat &, const APFloat &, roundingMode);
+
+    /* Sign operations.  */
+    void changeSign();
+    void clearSign();
+    void copySign(const APFloat &);
+
+    /* Conversions.  */
+    opStatus convert(const fltSemantics &, roundingMode, bool *);
+    opStatus convertToInteger(integerPart *, unsigned int, bool,
+                              roundingMode, bool *) const;
+    opStatus convertFromAPInt(const APInt &,
+                              bool, roundingMode);
+    opStatus convertFromSignExtendedInteger(const integerPart *, unsigned int,
+                                            bool, roundingMode);
+    opStatus convertFromZeroExtendedInteger(const integerPart *, unsigned int,
+                                            bool, roundingMode);
+    opStatus convertFromString(StringRef, roundingMode);
+    APInt bitcastToAPInt() const;
+    double convertToDouble() const;
+    float convertToFloat() const;
+
+    /* The definition of equality is not straightforward for floating point,
+       so we won't use operator==.  Use one of the following, or write
+       whatever it is you really mean. */
+    // bool operator==(const APFloat &) const;     // DO NOT IMPLEMENT
+
+    /* IEEE comparison with another floating point number (NaNs
+       compare unordered, 0==-0). */
+    cmpResult compare(const APFloat &) const;
+
+    /* Bitwise comparison for equality (QNaNs compare equal, 0!=-0). */
+    bool bitwiseIsEqual(const APFloat &) const;
+
+    /* Write out a hexadecimal representation of the floating point
+       value to DST, which must be of sufficient size, in the C99 form
+       [-]0xh.hhhhp[+-]d.  Return the number of characters written,
+       excluding the terminating NUL.  */
+    unsigned int convertToHexString(char *dst, unsigned int hexDigits,
+                                    bool upperCase, roundingMode) const;
+
+    /* Simple queries.  */
+    fltCategory getCategory() const { return category; }
+    const fltSemantics &getSemantics() const { return *semantics; }
+    bool isZero() const { return category == fcZero; }
+    bool isNonZero() const { return category != fcZero; }
+    bool isNaN() const { return category == fcNaN; }
+    bool isInfinity() const { return category == fcInfinity; }
+    bool isNegative() const { return sign; }
+    bool isPosZero() const { return isZero() && !isNegative(); }
+    bool isNegZero() const { return isZero() && isNegative(); }
+
+    APFloat& operator=(const APFloat &);
+
+    /* Return an arbitrary integer value usable for hashing. */
+    uint32_t getHashValue() const;
+
+    /// Converts this value into a decimal string.
+    ///
+    /// \param FormatPrecision The maximum number of digits of
+    ///   precision to output.  If there are fewer digits available,
+    ///   zero padding will not be used unless the value is
+    ///   integral and small enough to be expressed in
+    ///   FormatPrecision digits.  0 means to use the natural
+    ///   precision of the number.
+    /// \param FormatMaxPadding The maximum number of zeros to
+    ///   consider inserting before falling back to scientific
+    ///   notation.  0 means to always use scientific notation.
+    ///
+    /// Number       Precision    MaxPadding      Result
+    /// ------       ---------    ----------      ------
+    /// 1.01E+4              5             2       10100
+    /// 1.01E+4              4             2       1.01E+4
+    /// 1.01E+4              5             1       1.01E+4
+    /// 1.01E-2              5             2       0.0101
+    /// 1.01E-2              4             2       0.0101
+    /// 1.01E-2              4             1       1.01E-2
+    void toString(SmallVectorImpl<char> &Str,
+                  unsigned FormatPrecision = 0,
+                  unsigned FormatMaxPadding = 3) const;
+
+  private:
+
+    /* Trivial queries.  */
+    integerPart *significandParts();
+    const integerPart *significandParts() const;
+    unsigned int partCount() const;
+
+    /* Significand operations.  */
+    integerPart addSignificand(const APFloat &);
+    integerPart subtractSignificand(const APFloat &, integerPart);
+    lostFraction addOrSubtractSignificand(const APFloat &, bool subtract);
+    lostFraction multiplySignificand(const APFloat &, const APFloat *);
+    lostFraction divideSignificand(const APFloat &);
+    void incrementSignificand();
+    void initialize(const fltSemantics *);
+    void shiftSignificandLeft(unsigned int);
+    lostFraction shiftSignificandRight(unsigned int);
+    unsigned int significandLSB() const;
+    unsigned int significandMSB() const;
+    void zeroSignificand();
+
+    /* Arithmetic on special values.  */
+    opStatus addOrSubtractSpecials(const APFloat &, bool subtract);
+    opStatus divideSpecials(const APFloat &);
+    opStatus multiplySpecials(const APFloat &);
+    opStatus modSpecials(const APFloat &);
+
+    /* Miscellany.  */
+    static APFloat makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
+                           const APInt *fill);
+    void makeNaN(bool SNaN = false, bool Neg = false, const APInt *fill = 0);
+    opStatus normalize(roundingMode, lostFraction);
+    opStatus addOrSubtract(const APFloat &, roundingMode, bool subtract);
+    cmpResult compareAbsoluteValue(const APFloat &) const;
+    opStatus handleOverflow(roundingMode);
+    bool roundAwayFromZero(roundingMode, lostFraction, unsigned int) const;
+    opStatus convertToSignExtendedInteger(integerPart *, unsigned int, bool,
+                                          roundingMode, bool *) const;
+    opStatus convertFromUnsignedParts(const integerPart *, unsigned int,
+                                      roundingMode);
+    opStatus convertFromHexadecimalString(StringRef, roundingMode);
+    opStatus convertFromDecimalString(StringRef, roundingMode);
+    char *convertNormalToHexString(char *, unsigned int, bool,
+                                   roundingMode) const;
+    opStatus roundSignificandWithExponent(const integerPart *, unsigned int,
+                                          int, roundingMode);
+
+    APInt convertHalfAPFloatToAPInt() const;
+    APInt convertFloatAPFloatToAPInt() const;
+    APInt convertDoubleAPFloatToAPInt() const;
+    APInt convertQuadrupleAPFloatToAPInt() const;
+    APInt convertF80LongDoubleAPFloatToAPInt() const;
+    APInt convertPPCDoubleDoubleAPFloatToAPInt() const;
+    void initFromAPInt(const APInt& api, bool isIEEE = false);
+    void initFromHalfAPInt(const APInt& api);
+    void initFromFloatAPInt(const APInt& api);
+    void initFromDoubleAPInt(const APInt& api);
+    void initFromQuadrupleAPInt(const APInt &api);
+    void initFromF80LongDoubleAPInt(const APInt& api);
+    void initFromPPCDoubleDoubleAPInt(const APInt& api);
+
+    void assign(const APFloat &);
+    void copySignificand(const APFloat &);
+    void freeSignificand();
+
+    /* What kind of semantics does this value obey?  */
+    const fltSemantics *semantics;
+
+    /* Significand - the fraction with an explicit integer bit.  Must be
+       at least one bit wider than the target precision.  */
+    union Significand
+    {
+      integerPart part;
+      integerPart *parts;
+    } significand;
+
+    /* The exponent - a signed number.  */
+    exponent_t exponent;
+
+    /* What kind of floating point number this is.  */
+    /* Only 2 bits are required, but VisualStudio incorrectly sign extends
+       it.  Using the extra bit keeps it from failing under VisualStudio */
+    fltCategory category: 3;
+
+    /* The sign bit of this number.  */
+    unsigned int sign: 1;
+
+    /* For PPCDoubleDouble, we have a second exponent and sign (the second
+       significand is appended to the first one, although it would be wrong to
+       regard these as a single number for arithmetic purposes).  These fields
+       are not meaningful for any other type. */
+    exponent_t exponent2 : 11;
+    unsigned int sign2: 1;
+  };
+} /* namespace llvm */
+
+#endif /* LLVM_FLOAT_H */
diff --git a/final/include/llvm/ADT/APInt.h b/final/include/llvm/ADT/APInt.h
new file mode 100644
index 00000000000..d1fd3e5034b
--- /dev/null
+++ b/final/include/llvm/ADT/APInt.h
@@ -0,0 +1,1720 @@
+//===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a class to represent arbitrary precision integral
+// constant values and operations on them.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_APINT_H
+#define LLVM_APINT_H
+
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <climits>
+#include <cstring>
+#include <string>
+
+namespace llvm {
+  class Serializer;
+  class Deserializer;
+  class FoldingSetNodeID;
+  class raw_ostream;
+  class StringRef;
+
+  template<typename T>
+  class SmallVectorImpl;
+
+  // An unsigned host type used as a single part of a multi-part
+  // bignum.
+  typedef uint64_t integerPart;
+
+  const unsigned int host_char_bit = 8;
+  const unsigned int integerPartWidth = host_char_bit *
+    static_cast<unsigned int>(sizeof(integerPart));
+
+//===----------------------------------------------------------------------===//
+//                              APInt Class
+//===----------------------------------------------------------------------===//
+
+/// APInt - This class represents arbitrary precision constant integral values.
+/// It is a functional replacement for common case unsigned integer type like
+/// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width
+/// integer sizes and large integer value types such as 3-bits, 15-bits, or more
+/// than 64-bits of precision. APInt provides a variety of arithmetic operators
+/// and methods to manipulate integer values of any bit-width. It supports both
+/// the typical integer arithmetic and comparison operations as well as bitwise
+/// manipulation.
+///
+/// The class has several invariants worth noting:
+///   * All bit, byte, and word positions are zero-based.
+///   * Once the bit width is set, it doesn't change except by the Truncate,
+///     SignExtend, or ZeroExtend operations.
+///   * All binary operators must be on APInt instances of the same bit width.
+///     Attempting to use these operators on instances with different bit
+///     widths will yield an assertion.
+///   * The value is stored canonically as an unsigned value. For operations
+///     where it makes a difference, there are both signed and unsigned variants
+///     of the operation. For example, sdiv and udiv. However, because the bit
+///     widths must be the same, operations such as Mul and Add produce the same
+///     results regardless of whether the values are interpreted as signed or
+///     not.
+///   * In general, the class tries to follow the style of computation that LLVM
+///     uses in its IR. This simplifies its use for LLVM.
+///
+/// @brief Class for arbitrary precision integers.
+class APInt {
+  unsigned BitWidth;      ///< The number of bits in this APInt.
+
+  /// This union is used to store the integer value. When the
+  /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
+  union {
+    uint64_t VAL;    ///< Used to store the <= 64 bits integer value.
+    uint64_t *pVal;  ///< Used to store the >64 bits integer value.
+  };
+
+  /// This enum is used to hold the constants we needed for APInt.
+  enum {
+    /// Bits in a word
+    APINT_BITS_PER_WORD = static_cast<unsigned int>(sizeof(uint64_t)) *
+                          CHAR_BIT,
+    /// Byte size of a word
+    APINT_WORD_SIZE = static_cast<unsigned int>(sizeof(uint64_t))
+  };
+
+  /// This constructor is used only internally for speed of construction of
+  /// temporaries. It is unsafe for general use so it is not public.
+  /// @brief Fast internal constructor
+  APInt(uint64_t* val, unsigned bits) : BitWidth(bits), pVal(val) { }
+
+  /// @returns true if the number of bits <= 64, false otherwise.
+  /// @brief Determine if this APInt just has one word to store value.
+  bool isSingleWord() const {
+    return BitWidth <= APINT_BITS_PER_WORD;
+  }
+
+  /// @returns the word position for the specified bit position.
+  /// @brief Determine which word a bit is in.
+  static unsigned whichWord(unsigned bitPosition) {
+    return bitPosition / APINT_BITS_PER_WORD;
+  }
+
+  /// @returns the bit position in a word for the specified bit position
+  /// in the APInt.
+  /// @brief Determine which bit in a word a bit is in.
+  static unsigned whichBit(unsigned bitPosition) {
+    return bitPosition % APINT_BITS_PER_WORD;
+  }
+
+  /// This method generates and returns a uint64_t (word) mask for a single
+  /// bit at a specific bit position. This is used to mask the bit in the
+  /// corresponding word.
+  /// @returns a uint64_t with only bit at "whichBit(bitPosition)" set
+  /// @brief Get a single bit mask.
+  static uint64_t maskBit(unsigned bitPosition) {
+    return 1ULL << whichBit(bitPosition);
+  }
+
+  /// This method is used internally to clear the to "N" bits in the high order
+  /// word that are not used by the APInt. This is needed after the most
+  /// significant word is assigned a value to ensure that those bits are
+  /// zero'd out.
+  /// @brief Clear unused high order bits
+  APInt& clearUnusedBits() {
+    // Compute how many bits are used in the final word
+    unsigned wordBits = BitWidth % APINT_BITS_PER_WORD;
+    if (wordBits == 0)
+      // If all bits are used, we want to leave the value alone. This also
+      // avoids the undefined behavior of >> when the shift is the same size as
+      // the word size (64).
+      return *this;
+
+    // Mask out the high bits.
+    uint64_t mask = ~uint64_t(0ULL) >> (APINT_BITS_PER_WORD - wordBits);
+    if (isSingleWord())
+      VAL &= mask;
+    else
+      pVal[getNumWords() - 1] &= mask;
+    return *this;
+  }
+
+  /// @returns the corresponding word for the specified bit position.
+  /// @brief Get the word corresponding to a bit position
+  uint64_t getWord(unsigned bitPosition) const {
+    return isSingleWord() ? VAL : pVal[whichWord(bitPosition)];
+  }
+
+  /// Converts a string into a number.  The string must be non-empty
+  /// and well-formed as a number of the given base. The bit-width
+  /// must be sufficient to hold the result.
+  ///
+  /// This is used by the constructors that take string arguments.
+  ///
+  /// StringRef::getAsInteger is superficially similar but (1) does
+  /// not assume that the string is well-formed and (2) grows the
+  /// result to hold the input.
+  ///
+  /// @param radix 2, 8, 10, or 16
+  /// @brief Convert a char array into an APInt
+  void fromString(unsigned numBits, StringRef str, uint8_t radix);
+
+  /// This is used by the toString method to divide by the radix. It simply
+  /// provides a more convenient form of divide for internal use since KnuthDiv
+  /// has specific constraints on its inputs. If those constraints are not met
+  /// then it provides a simpler form of divide.
+  /// @brief An internal division function for dividing APInts.
+  static void divide(const APInt LHS, unsigned lhsWords,
+                     const APInt &RHS, unsigned rhsWords,
+                     APInt *Quotient, APInt *Remainder);
+
+  /// out-of-line slow case for inline constructor
+  void initSlowCase(unsigned numBits, uint64_t val, bool isSigned);
+
+  /// out-of-line slow case for inline copy constructor
+  void initSlowCase(const APInt& that);
+
+  /// out-of-line slow case for shl
+  APInt shlSlowCase(unsigned shiftAmt) const;
+
+  /// out-of-line slow case for operator&
+  APInt AndSlowCase(const APInt& RHS) const;
+
+  /// out-of-line slow case for operator|
+  APInt OrSlowCase(const APInt& RHS) const;
+
+  /// out-of-line slow case for operator^
+  APInt XorSlowCase(const APInt& RHS) const;
+
+  /// out-of-line slow case for operator=
+  APInt& AssignSlowCase(const APInt& RHS);
+
+  /// out-of-line slow case for operator==
+  bool EqualSlowCase(const APInt& RHS) const;
+
+  /// out-of-line slow case for operator==
+  bool EqualSlowCase(uint64_t Val) const;
+
+  /// out-of-line slow case for countLeadingZeros
+  unsigned countLeadingZerosSlowCase() const;
+
+  /// out-of-line slow case for countTrailingOnes
+  unsigned countTrailingOnesSlowCase() const;
+
+  /// out-of-line slow case for countPopulation
+  unsigned countPopulationSlowCase() const;
+
+public:
+  /// @name Constructors
+  /// @{
+  /// If isSigned is true then val is treated as if it were a signed value
+  /// (i.e. as an int64_t) and the appropriate sign extension to the bit width
+  /// will be done. Otherwise, no sign extension occurs (high order bits beyond
+  /// the range of val are zero filled).
+  /// @param numBits the bit width of the constructed APInt
+  /// @param val the initial value of the APInt
+  /// @param isSigned how to treat signedness of val
+  /// @brief Create a new APInt of numBits width, initialized as val.
+  APInt(unsigned numBits, uint64_t val, bool isSigned = false)
+    : BitWidth(numBits), VAL(0) {
+    assert(BitWidth && "bitwidth too small");
+    if (isSingleWord())
+      VAL = val;
+    else
+      initSlowCase(numBits, val, isSigned);
+    clearUnusedBits();
+  }
+
+  /// Note that numWords can be smaller or larger than the corresponding bit
+  /// width but any extraneous bits will be dropped.
+  /// @param numBits the bit width of the constructed APInt
+  /// @param numWords the number of words in bigVal
+  /// @param bigVal a sequence of words to form the initial value of the APInt
+  /// @brief Construct an APInt of numBits width, initialized as bigVal[].
+  APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]);
+
+  /// This constructor interprets the string \arg str in the given radix. The
+  /// interpretation stops when the first character that is not suitable for the
+  /// radix is encountered, or the end of the string. Acceptable radix values
+  /// are 2, 8, 10 and 16. It is an error for the value implied by the string to
+  /// require more bits than numBits.
+  ///
+  /// @param numBits the bit width of the constructed APInt
+  /// @param str the string to be interpreted
+  /// @param radix the radix to use for the conversion 
+  /// @brief Construct an APInt from a string representation.
+  APInt(unsigned numBits, StringRef str, uint8_t radix);
+
+  /// Simply makes *this a copy of that.
+  /// @brief Copy Constructor.
+  APInt(const APInt& that)
+    : BitWidth(that.BitWidth), VAL(0) {
+    assert(BitWidth && "bitwidth too small");
+    if (isSingleWord())
+      VAL = that.VAL;
+    else
+      initSlowCase(that);
+  }
+
+  /// @brief Destructor.
+  ~APInt() {
+    if (!isSingleWord())
+      delete [] pVal;
+  }
+
+  /// Default constructor that creates an uninitialized APInt.  This is useful
+  ///  for object deserialization (pair this with the static method Read).
+  explicit APInt() : BitWidth(1) {}
+
+  /// Profile - Used to insert APInt objects, or objects that contain APInt
+  ///  objects, into FoldingSets.
+  void Profile(FoldingSetNodeID& id) const;
+
+  /// @}
+  /// @name Value Tests
+  /// @{
+  /// This tests the high bit of this APInt to determine if it is set.
+  /// @returns true if this APInt is negative, false otherwise
+  /// @brief Determine sign of this APInt.
+  bool isNegative() const {
+    return (*this)[BitWidth - 1];
+  }
+
+  /// This tests the high bit of the APInt to determine if it is unset.
+  /// @brief Determine if this APInt Value is non-negative (>= 0)
+  bool isNonNegative() const {
+    return !isNegative();
+  }
+
+  /// This tests if the value of this APInt is positive (> 0). Note
+  /// that 0 is not a positive value.
+  /// @returns true if this APInt is positive.
+  /// @brief Determine if this APInt Value is positive.
+  bool isStrictlyPositive() const {
+    return isNonNegative() && !!*this;
+  }
+
+  /// This checks to see if the value has all bits of the APInt are set or not.
+  /// @brief Determine if all bits are set
+  bool isAllOnesValue() const {
+    return countPopulation() == BitWidth;
+  }
+
+  /// This checks to see if the value of this APInt is the maximum unsigned
+  /// value for the APInt's bit width.
+  /// @brief Determine if this is the largest unsigned value.
+  bool isMaxValue() const {
+    return countPopulation() == BitWidth;
+  }
+
+  /// This checks to see if the value of this APInt is the maximum signed
+  /// value for the APInt's bit width.
+  /// @brief Determine if this is the largest signed value.
+  bool isMaxSignedValue() const {
+    return BitWidth == 1 ? VAL == 0 :
+                          !isNegative() && countPopulation() == BitWidth - 1;
+  }
+
+  /// This checks to see if the value of this APInt is the minimum unsigned
+  /// value for the APInt's bit width.
+  /// @brief Determine if this is the smallest unsigned value.
+  bool isMinValue() const {
+    return !*this;
+  }
+
+  /// This checks to see if the value of this APInt is the minimum signed
+  /// value for the APInt's bit width.
+  /// @brief Determine if this is the smallest signed value.
+  bool isMinSignedValue() const {
+    return BitWidth == 1 ? VAL == 1 : isNegative() && isPowerOf2();
+  }
+
+  /// @brief Check if this APInt has an N-bits unsigned integer value.
+  bool isIntN(unsigned N) const {
+    assert(N && "N == 0 ???");
+    if (N >= getBitWidth())
+      return true;
+
+    if (isSingleWord())
+      return isUIntN(N, VAL);
+    return APInt(N, getNumWords(), pVal).zext(getBitWidth()) == (*this);
+  }
+
+  /// @brief Check if this APInt has an N-bits signed integer value.
+  bool isSignedIntN(unsigned N) const {
+    assert(N && "N == 0 ???");
+    return getMinSignedBits() <= N;
+  }
+
+  /// @returns true if the argument APInt value is a power of two > 0.
+  bool isPowerOf2() const {
+    if (isSingleWord())
+      return isPowerOf2_64(VAL);
+    return countPopulationSlowCase() == 1;
+  }
+
+  /// isSignBit - Return true if this is the value returned by getSignBit.
+  bool isSignBit() const { return isMinSignedValue(); }
+
+  /// This converts the APInt to a boolean value as a test against zero.
+  /// @brief Boolean conversion function.
+  bool getBoolValue() const {
+    return !!*this;
+  }
+
+  /// getLimitedValue - If this value is smaller than the specified limit,
+  /// return it, otherwise return the limit value.  This causes the value
+  /// to saturate to the limit.
+  uint64_t getLimitedValue(uint64_t Limit = ~0ULL) const {
+    return (getActiveBits() > 64 || getZExtValue() > Limit) ?
+      Limit :  getZExtValue();
+  }
+
+  /// @}
+  /// @name Value Generators
+  /// @{
+  /// @brief Gets maximum unsigned value of APInt for specific bit width.
+  static APInt getMaxValue(unsigned numBits) {
+    return getAllOnesValue(numBits);
+  }
+
+  /// @brief Gets maximum signed value of APInt for a specific bit width.
+  static APInt getSignedMaxValue(unsigned numBits) {
+    APInt API = getAllOnesValue(numBits);
+    API.clearBit(numBits - 1);
+    return API;
+  }
+
+  /// @brief Gets minimum unsigned value of APInt for a specific bit width.
+  static APInt getMinValue(unsigned numBits) {
+    return APInt(numBits, 0);
+  }
+
+  /// @brief Gets minimum signed value of APInt for a specific bit width.
+  static APInt getSignedMinValue(unsigned numBits) {
+    APInt API(numBits, 0);
+    API.setBit(numBits - 1);
+    return API;
+  }
+
+  /// getSignBit - This is just a wrapper function of getSignedMinValue(), and
+  /// it helps code readability when we want to get a SignBit.
+  /// @brief Get the SignBit for a specific bit width.
+  static APInt getSignBit(unsigned BitWidth) {
+    return getSignedMinValue(BitWidth);
+  }
+
+  /// @returns the all-ones value for an APInt of the specified bit-width.
+  /// @brief Get the all-ones value.
+  static APInt getAllOnesValue(unsigned numBits) {
+    return APInt(numBits, -1ULL, true);
+  }
+
+  /// @returns the '0' value for an APInt of the specified bit-width.
+  /// @brief Get the '0' value.
+  static APInt getNullValue(unsigned numBits) {
+    return APInt(numBits, 0);
+  }
+
+  /// Get an APInt with the same BitWidth as this APInt, just zero mask
+  /// the low bits and right shift to the least significant bit.
+  /// @returns the high "numBits" bits of this APInt.
+  APInt getHiBits(unsigned numBits) const;
+
+  /// Get an APInt with the same BitWidth as this APInt, just zero mask
+  /// the high bits.
+  /// @returns the low "numBits" bits of this APInt.
+  APInt getLoBits(unsigned numBits) const;
+
+  /// getOneBitSet - Return an APInt with exactly one bit set in the result.
+  static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
+    APInt Res(numBits, 0);
+    Res.setBit(BitNo);
+    return Res;
+  }
+  
+  /// Constructs an APInt value that has a contiguous range of bits set. The
+  /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
+  /// bits will be zero. For example, with parameters(32, 0, 16) you would get
+  /// 0x0000FFFF. If hiBit is less than loBit then the set bits "wrap". For
+  /// example, with parameters (32, 28, 4), you would get 0xF000000F.
+  /// @param numBits the intended bit width of the result
+  /// @param loBit the index of the lowest bit set.
+  /// @param hiBit the index of the highest bit set.
+  /// @returns An APInt value with the requested bits set.
+  /// @brief Get a value with a block of bits set.
+  static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
+    assert(hiBit <= numBits && "hiBit out of range");
+    assert(loBit < numBits && "loBit out of range");
+    if (hiBit < loBit)
+      return getLowBitsSet(numBits, hiBit) |
+             getHighBitsSet(numBits, numBits-loBit);
+    return getLowBitsSet(numBits, hiBit-loBit).shl(loBit);
+  }
+
+  /// Constructs an APInt value that has the top hiBitsSet bits set.
+  /// @param numBits the bitwidth of the result
+  /// @param hiBitsSet the number of high-order bits set in the result.
+  /// @brief Get a value with high bits set
+  static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
+    assert(hiBitsSet <= numBits && "Too many bits to set!");
+    // Handle a degenerate case, to avoid shifting by word size
+    if (hiBitsSet == 0)
+      return APInt(numBits, 0);
+    unsigned shiftAmt = numBits - hiBitsSet;
+    // For small values, return quickly
+    if (numBits <= APINT_BITS_PER_WORD)
+      return APInt(numBits, ~0ULL << shiftAmt);
+    return getAllOnesValue(numBits).shl(shiftAmt);
+  }
+
+  /// Constructs an APInt value that has the bottom loBitsSet bits set.
+  /// @param numBits the bitwidth of the result
+  /// @param loBitsSet the number of low-order bits set in the result.
+  /// @brief Get a value with low bits set
+  static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
+    assert(loBitsSet <= numBits && "Too many bits to set!");
+    // Handle a degenerate case, to avoid shifting by word size
+    if (loBitsSet == 0)
+      return APInt(numBits, 0);
+    if (loBitsSet == APINT_BITS_PER_WORD)
+      return APInt(numBits, -1ULL);
+    // For small values, return quickly.
+    if (numBits < APINT_BITS_PER_WORD)
+      return APInt(numBits, (1ULL << loBitsSet) - 1);
+    return getAllOnesValue(numBits).lshr(numBits - loBitsSet);
+  }
+
+  /// The hash value is computed as the sum of the words and the bit width.
+  /// @returns A hash value computed from the sum of the APInt words.
+  /// @brief Get a hash value based on this APInt
+  uint64_t getHashValue() const;
+
+  /// This function returns a pointer to the internal storage of the APInt.
+  /// This is useful for writing out the APInt in binary form without any
+  /// conversions.
+  const uint64_t* getRawData() const {
+    if (isSingleWord())
+      return &VAL;
+    return &pVal[0];
+  }
+
+  /// @}
+  /// @name Unary Operators
+  /// @{
+  /// @returns a new APInt value representing *this incremented by one
+  /// @brief Postfix increment operator.
+  const APInt operator++(int) {
+    APInt API(*this);
+    ++(*this);
+    return API;
+  }
+
+  /// @returns *this incremented by one
+  /// @brief Prefix increment operator.
+  APInt& operator++();
+
+  /// @returns a new APInt representing *this decremented by one.
+  /// @brief Postfix decrement operator.
+  const APInt operator--(int) {
+    APInt API(*this);
+    --(*this);
+    return API;
+  }
+
+  /// @returns *this decremented by one.
+  /// @brief Prefix decrement operator.
+  APInt& operator--();
+
+  /// Performs a bitwise complement operation on this APInt.
+  /// @returns an APInt that is the bitwise complement of *this
+  /// @brief Unary bitwise complement operator.
+  APInt operator~() const {
+    APInt Result(*this);
+    Result.flipAllBits();
+    return Result;
+  }
+
+  /// Negates *this using two's complement logic.
+  /// @returns An APInt value representing the negation of *this.
+  /// @brief Unary negation operator
+  APInt operator-() const {
+    return APInt(BitWidth, 0) - (*this);
+  }
+
+  /// Performs logical negation operation on this APInt.
+  /// @returns true if *this is zero, false otherwise.
+  /// @brief Logical negation operator.
+  bool operator!() const;
+
+  /// @}
+  /// @name Assignment Operators
+  /// @{
+  /// @returns *this after assignment of RHS.
+  /// @brief Copy assignment operator.
+  APInt& operator=(const APInt& RHS) {
+    // If the bitwidths are the same, we can avoid mucking with memory
+    if (isSingleWord() && RHS.isSingleWord()) {
+      VAL = RHS.VAL;
+      BitWidth = RHS.BitWidth;
+      return clearUnusedBits();
+    }
+
+    return AssignSlowCase(RHS);
+  }
+
+  /// The RHS value is assigned to *this. If the significant bits in RHS exceed
+  /// the bit width, the excess bits are truncated. If the bit width is larger
+  /// than 64, the value is zero filled in the unspecified high order bits.
+  /// @returns *this after assignment of RHS value.
+  /// @brief Assignment operator.
+  APInt& operator=(uint64_t RHS);
+
+  /// Performs a bitwise AND operation on this APInt and RHS. The result is
+  /// assigned to *this.
+  /// @returns *this after ANDing with RHS.
+  /// @brief Bitwise AND assignment operator.
+  APInt& operator&=(const APInt& RHS);
+
+  /// Performs a bitwise OR operation on this APInt and RHS. The result is
+  /// assigned *this;
+  /// @returns *this after ORing with RHS.
+  /// @brief Bitwise OR assignment operator.
+  APInt& operator|=(const APInt& RHS);
+
+  /// Performs a bitwise OR operation on this APInt and RHS. RHS is
+  /// logically zero-extended or truncated to match the bit-width of
+  /// the LHS.
+  /// 
+  /// @brief Bitwise OR assignment operator.
+  APInt& operator|=(uint64_t RHS) {
+    if (isSingleWord()) {
+      VAL |= RHS;
+      clearUnusedBits();
+    } else {
+      pVal[0] |= RHS;
+    }
+    return *this;
+  }
+
+  /// Performs a bitwise XOR operation on this APInt and RHS. The result is
+  /// assigned to *this.
+  /// @returns *this after XORing with RHS.
+  /// @brief Bitwise XOR assignment operator.
+  APInt& operator^=(const APInt& RHS);
+
+  /// Multiplies this APInt by RHS and assigns the result to *this.
+  /// @returns *this
+  /// @brief Multiplication assignment operator.
+  APInt& operator*=(const APInt& RHS);
+
+  /// Adds RHS to *this and assigns the result to *this.
+  /// @returns *this
+  /// @brief Addition assignment operator.
+  APInt& operator+=(const APInt& RHS);
+
+  /// Subtracts RHS from *this and assigns the result to *this.
+  /// @returns *this
+  /// @brief Subtraction assignment operator.
+  APInt& operator-=(const APInt& RHS);
+
+  /// Shifts *this left by shiftAmt and assigns the result to *this.
+  /// @returns *this after shifting left by shiftAmt
+  /// @brief Left-shift assignment function.
+  APInt& operator<<=(unsigned shiftAmt) {
+    *this = shl(shiftAmt);
+    return *this;
+  }
+
+  /// @}
+  /// @name Binary Operators
+  /// @{
+  /// Performs a bitwise AND operation on *this and RHS.
+  /// @returns An APInt value representing the bitwise AND of *this and RHS.
+  /// @brief Bitwise AND operator.
+  APInt operator&(const APInt& RHS) const {
+    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+    if (isSingleWord())
+      return APInt(getBitWidth(), VAL & RHS.VAL);
+    return AndSlowCase(RHS);
+  }
+  APInt And(const APInt& RHS) const {
+    return this->operator&(RHS);
+  }
+
+  /// Performs a bitwise OR operation on *this and RHS.
+  /// @returns An APInt value representing the bitwise OR of *this and RHS.
+  /// @brief Bitwise OR operator.
+  APInt operator|(const APInt& RHS) const {
+    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+    if (isSingleWord())
+      return APInt(getBitWidth(), VAL | RHS.VAL);
+    return OrSlowCase(RHS);
+  }
+  APInt Or(const APInt& RHS) const {
+    return this->operator|(RHS);
+  }
+
+  /// Performs a bitwise XOR operation on *this and RHS.
+  /// @returns An APInt value representing the bitwise XOR of *this and RHS.
+  /// @brief Bitwise XOR operator.
+  APInt operator^(const APInt& RHS) const {
+    assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+    if (isSingleWord())
+      return APInt(BitWidth, VAL ^ RHS.VAL);
+    return XorSlowCase(RHS);
+  }
+  APInt Xor(const APInt& RHS) const {
+    return this->operator^(RHS);
+  }
+
+  /// Multiplies this APInt by RHS and returns the result.
+  /// @brief Multiplication operator.
+  APInt operator*(const APInt& RHS) const;
+
+  /// Adds RHS to this APInt and returns the result.
+  /// @brief Addition operator.
+  APInt operator+(const APInt& RHS) const;
+  APInt operator+(uint64_t RHS) const {
+    return (*this) + APInt(BitWidth, RHS);
+  }
+
+  /// Subtracts RHS from this APInt and returns the result.
+  /// @brief Subtraction operator.
+  APInt operator-(const APInt& RHS) const;
+  APInt operator-(uint64_t RHS) const {
+    return (*this) - APInt(BitWidth, RHS);
+  }
+
+  APInt operator<<(unsigned Bits) const {
+    return shl(Bits);
+  }
+
+  APInt operator<<(const APInt &Bits) const {
+    return shl(Bits);
+  }
+
+  /// Arithmetic right-shift this APInt by shiftAmt.
+  /// @brief Arithmetic right-shift function.
+  APInt ashr(unsigned shiftAmt) const;
+
+  /// Logical right-shift this APInt by shiftAmt.
+  /// @brief Logical right-shift function.
+  APInt lshr(unsigned shiftAmt) const;
+
+  /// Left-shift this APInt by shiftAmt.
+  /// @brief Left-shift function.
+  APInt shl(unsigned shiftAmt) const {
+    assert(shiftAmt <= BitWidth && "Invalid shift amount");
+    if (isSingleWord()) {
+      if (shiftAmt == BitWidth)
+        return APInt(BitWidth, 0); // avoid undefined shift results
+      return APInt(BitWidth, VAL << shiftAmt);
+    }
+    return shlSlowCase(shiftAmt);
+  }
+
+  /// @brief Rotate left by rotateAmt.
+  APInt rotl(unsigned rotateAmt) const;
+
+  /// @brief Rotate right by rotateAmt.
+  APInt rotr(unsigned rotateAmt) const;
+
+  /// Arithmetic right-shift this APInt by shiftAmt.
+  /// @brief Arithmetic right-shift function.
+  APInt ashr(const APInt &shiftAmt) const;
+
+  /// Logical right-shift this APInt by shiftAmt.
+  /// @brief Logical right-shift function.
+  APInt lshr(const APInt &shiftAmt) const;
+
+  /// Left-shift this APInt by shiftAmt.
+  /// @brief Left-shift function.
+  APInt shl(const APInt &shiftAmt) const;
+
+  /// @brief Rotate left by rotateAmt.
+  APInt rotl(const APInt &rotateAmt) const;
+
+  /// @brief Rotate right by rotateAmt.
+  APInt rotr(const APInt &rotateAmt) const;
+
+  /// Perform an unsigned divide operation on this APInt by RHS. Both this and
+  /// RHS are treated as unsigned quantities for purposes of this division.
+  /// @returns a new APInt value containing the division result
+  /// @brief Unsigned division operation.
+  APInt udiv(const APInt &RHS) const;
+
+  /// Signed divide this APInt by APInt RHS.
+  /// @brief Signed division function for APInt.
+  APInt sdiv(const APInt &RHS) const {
+    if (isNegative())
+      if (RHS.isNegative())
+        return (-(*this)).udiv(-RHS);
+      else
+        return -((-(*this)).udiv(RHS));
+    else if (RHS.isNegative())
+      return -(this->udiv(-RHS));
+    return this->udiv(RHS);
+  }
+
+  /// Perform an unsigned remainder operation on this APInt with RHS being the
+  /// divisor. Both this and RHS are treated as unsigned quantities for purposes
+  /// of this operation. Note that this is a true remainder operation and not
+  /// a modulo operation because the sign follows the sign of the dividend
+  /// which is *this.
+  /// @returns a new APInt value containing the remainder result
+  /// @brief Unsigned remainder operation.
+  APInt urem(const APInt &RHS) const;
+
+  /// Signed remainder operation on APInt.
+  /// @brief Function for signed remainder operation.
+  APInt srem(const APInt &RHS) const {
+    if (isNegative())
+      if (RHS.isNegative())
+        return -((-(*this)).urem(-RHS));
+      else
+        return -((-(*this)).urem(RHS));
+    else if (RHS.isNegative())
+      return this->urem(-RHS);
+    return this->urem(RHS);
+  }
+
+  /// Sometimes it is convenient to divide two APInt values and obtain both the
+  /// quotient and remainder. This function does both operations in the same
+  /// computation making it a little more efficient. The pair of input arguments
+  /// may overlap with the pair of output arguments. It is safe to call
+  /// udivrem(X, Y, X, Y), for example.
+  /// @brief Dual division/remainder interface.
+  static void udivrem(const APInt &LHS, const APInt &RHS,
+                      APInt &Quotient, APInt &Remainder);
+
+  static void sdivrem(const APInt &LHS, const APInt &RHS,
+                      APInt &Quotient, APInt &Remainder) {
+    if (LHS.isNegative()) {
+      if (RHS.isNegative())
+        APInt::udivrem(-LHS, -RHS, Quotient, Remainder);
+      else
+        APInt::udivrem(-LHS, RHS, Quotient, Remainder);
+      Quotient = -Quotient;
+      Remainder = -Remainder;
+    } else if (RHS.isNegative()) {
+      APInt::udivrem(LHS, -RHS, Quotient, Remainder);
+      Quotient = -Quotient;
+    } else {
+      APInt::udivrem(LHS, RHS, Quotient, Remainder);
+    }
+  }
+  
+  
+  // Operations that return overflow indicators.
+  APInt sadd_ov(const APInt &RHS, bool &Overflow) const;
+  APInt uadd_ov(const APInt &RHS, bool &Overflow) const;
+  APInt ssub_ov(const APInt &RHS, bool &Overflow) const;
+  APInt usub_ov(const APInt &RHS, bool &Overflow) const;
+  APInt sdiv_ov(const APInt &RHS, bool &Overflow) const;
+  APInt smul_ov(const APInt &RHS, bool &Overflow) const;
+  APInt sshl_ov(unsigned Amt, bool &Overflow) const;
+
+  /// @returns the bit value at bitPosition
+  /// @brief Array-indexing support.
+  bool operator[](unsigned bitPosition) const;
+
+  /// @}
+  /// @name Comparison Operators
+  /// @{
+  /// Compares this APInt with RHS for the validity of the equality
+  /// relationship.
+  /// @brief Equality operator.
+  bool operator==(const APInt& RHS) const {
+    assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths");
+    if (isSingleWord())
+      return VAL == RHS.VAL;
+    return EqualSlowCase(RHS);
+  }
+
+  /// Compares this APInt with a uint64_t for the validity of the equality
+  /// relationship.
+  /// @returns true if *this == Val
+  /// @brief Equality operator.
+  bool operator==(uint64_t Val) const {
+    if (isSingleWord())
+      return VAL == Val;
+    return EqualSlowCase(Val);
+  }
+
+  /// Compares this APInt with RHS for the validity of the equality
+  /// relationship.
+  /// @returns true if *this == Val
+  /// @brief Equality comparison.
+  bool eq(const APInt &RHS) const {
+    return (*this) == RHS;
+  }
+
+  /// Compares this APInt with RHS for the validity of the inequality
+  /// relationship.
+  /// @returns true if *this != Val
+  /// @brief Inequality operator.
+  bool operator!=(const APInt& RHS) const {
+    return !((*this) == RHS);
+  }
+
+  /// Compares this APInt with a uint64_t for the validity of the inequality
+  /// relationship.
+  /// @returns true if *this != Val
+  /// @brief Inequality operator.
+  bool operator!=(uint64_t Val) const {
+    return !((*this) == Val);
+  }
+
+  /// Compares this APInt with RHS for the validity of the inequality
+  /// relationship.
+  /// @returns true if *this != Val
+  /// @brief Inequality comparison
+  bool ne(const APInt &RHS) const {
+    return !((*this) == RHS);
+  }
+
+  /// Regards both *this and RHS as unsigned quantities and compares them for
+  /// the validity of the less-than relationship.
+  /// @returns true if *this < RHS when both are considered unsigned.
+  /// @brief Unsigned less than comparison
+  bool ult(const APInt &RHS) const;
+
+  /// Regards both *this as an unsigned quantity and compares it with RHS for
+  /// the validity of the less-than relationship.
+  /// @returns true if *this < RHS when considered unsigned.
+  /// @brief Unsigned less than comparison
+  bool ult(uint64_t RHS) const {
+    return ult(APInt(getBitWidth(), RHS));
+  }
+
+  /// Regards both *this and RHS as signed quantities and compares them for
+  /// validity of the less-than relationship.
+  /// @returns true if *this < RHS when both are considered signed.
+  /// @brief Signed less than comparison
+  bool slt(const APInt& RHS) const;
+
+  /// Regards both *this as a signed quantity and compares it with RHS for
+  /// the validity of the less-than relationship.
+  /// @returns true if *this < RHS when considered signed.
+  /// @brief Signed less than comparison
+  bool slt(uint64_t RHS) const {
+    return slt(APInt(getBitWidth(), RHS));
+  }
+
+  /// Regards both *this and RHS as unsigned quantities and compares them for
+  /// validity of the less-or-equal relationship.
+  /// @returns true if *this <= RHS when both are considered unsigned.
+  /// @brief Unsigned less or equal comparison
+  bool ule(const APInt& RHS) const {
+    return ult(RHS) || eq(RHS);
+  }
+
+  /// Regards both *this as an unsigned quantity and compares it with RHS for
+  /// the validity of the less-or-equal relationship.
+  /// @returns true if *this <= RHS when considered unsigned.
+  /// @brief Unsigned less or equal comparison
+  bool ule(uint64_t RHS) const {
+    return ule(APInt(getBitWidth(), RHS));
+  }
+
+  /// Regards both *this and RHS as signed quantities and compares them for
+  /// validity of the less-or-equal relationship.
+  /// @returns true if *this <= RHS when both are considered signed.
+  /// @brief Signed less or equal comparison
+  bool sle(const APInt& RHS) const {
+    return slt(RHS) || eq(RHS);
+  }
+
+  /// Regards both *this as a signed quantity and compares it with RHS for
+  /// the validity of the less-or-equal relationship.
+  /// @returns true if *this <= RHS when considered signed.
+  /// @brief Signed less or equal comparison
+  bool sle(uint64_t RHS) const {
+    return sle(APInt(getBitWidth(), RHS));
+  }
+
+  /// Regards both *this and RHS as unsigned quantities and compares them for
+  /// the validity of the greater-than relationship.
+  /// @returns true if *this > RHS when both are considered unsigned.
+  /// @brief Unsigned greather than comparison
+  bool ugt(const APInt& RHS) const {
+    return !ult(RHS) && !eq(RHS);
+  }
+
+  /// Regards both *this as an unsigned quantity and compares it with RHS for
+  /// the validity of the greater-than relationship.
+  /// @returns true if *this > RHS when considered unsigned.
+  /// @brief Unsigned greater than comparison
+  bool ugt(uint64_t RHS) const {
+    return ugt(APInt(getBitWidth(), RHS));
+  }
+
+  /// Regards both *this and RHS as signed quantities and compares them for
+  /// the validity of the greater-than relationship.
+  /// @returns true if *this > RHS when both are considered signed.
+  /// @brief Signed greather than comparison
+  bool sgt(const APInt& RHS) const {
+    return !slt(RHS) && !eq(RHS);
+  }
+
+  /// Regards both *this as a signed quantity and compares it with RHS for
+  /// the validity of the greater-than relationship.
+  /// @returns true if *this > RHS when considered signed.
+  /// @brief Signed greater than comparison
+  bool sgt(uint64_t RHS) const {
+    return sgt(APInt(getBitWidth(), RHS));
+  }
+
+  /// Regards both *this and RHS as unsigned quantities and compares them for
+  /// validity of the greater-or-equal relationship.
+  /// @returns true if *this >= RHS when both are considered unsigned.
+  /// @brief Unsigned greater or equal comparison
+  bool uge(const APInt& RHS) const {
+    return !ult(RHS);
+  }
+
+  /// Regards both *this as an unsigned quantity and compares it with RHS for
+  /// the validity of the greater-or-equal relationship.
+  /// @returns true if *this >= RHS when considered unsigned.
+  /// @brief Unsigned greater or equal comparison
+  bool uge(uint64_t RHS) const {
+    return uge(APInt(getBitWidth(), RHS));
+  }
+
+  /// Regards both *this and RHS as signed quantities and compares them for
+  /// validity of the greater-or-equal relationship.
+  /// @returns true if *this >= RHS when both are considered signed.
+  /// @brief Signed greather or equal comparison
+  bool sge(const APInt& RHS) const {
+    return !slt(RHS);
+  }
+
+  /// Regards both *this as a signed quantity and compares it with RHS for
+  /// the validity of the greater-or-equal relationship.
+  /// @returns true if *this >= RHS when considered signed.
+  /// @brief Signed greater or equal comparison
+  bool sge(uint64_t RHS) const {
+    return sge(APInt(getBitWidth(), RHS));
+  }
+
+  
+  
+  
+  /// This operation tests if there are any pairs of corresponding bits
+  /// between this APInt and RHS that are both set.
+  bool intersects(const APInt &RHS) const {
+    return (*this & RHS) != 0;
+  }
+
+  /// @}
+  /// @name Resizing Operators
+  /// @{
+  /// Truncate the APInt to a specified width. It is an error to specify a width
+  /// that is greater than or equal to the current width.
+  /// @brief Truncate to new width.
+  APInt trunc(unsigned width) const;
+
+  /// This operation sign extends the APInt to a new width. If the high order
+  /// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
+  /// It is an error to specify a width that is less than or equal to the
+  /// current width.
+  /// @brief Sign extend to a new width.
+  APInt sext(unsigned width) const;
+
+  /// This operation zero extends the APInt to a new width. The high order bits
+  /// are filled with 0 bits.  It is an error to specify a width that is less
+  /// than or equal to the current width.
+  /// @brief Zero extend to a new width.
+  APInt zext(unsigned width) const;
+
+  /// Make this APInt have the bit width given by \p width. The value is sign
+  /// extended, truncated, or left alone to make it that width.
+  /// @brief Sign extend or truncate to width
+  APInt sextOrTrunc(unsigned width) const;
+
+  /// Make this APInt have the bit width given by \p width. The value is zero
+  /// extended, truncated, or left alone to make it that width.
+  /// @brief Zero extend or truncate to width
+  APInt zextOrTrunc(unsigned width) const;
+
+  /// @}
+  /// @name Bit Manipulation Operators
+  /// @{
+  /// @brief Set every bit to 1.
+  void setAllBits() {
+    if (isSingleWord())
+      VAL = -1ULL;
+    else {
+      // Set all the bits in all the words.
+      for (unsigned i = 0; i < getNumWords(); ++i)
+	pVal[i] = -1ULL;
+    }
+    // Clear the unused ones
+    clearUnusedBits();
+  }
+
+  /// Set the given bit to 1 whose position is given as "bitPosition".
+  /// @brief Set a given bit to 1.
+  void setBit(unsigned bitPosition);
+
+  /// @brief Set every bit to 0.
+  void clearAllBits() {
+    if (isSingleWord())
+      VAL = 0;
+    else
+      memset(pVal, 0, getNumWords() * APINT_WORD_SIZE);
+  }
+
+  /// Set the given bit to 0 whose position is given as "bitPosition".
+  /// @brief Set a given bit to 0.
+  void clearBit(unsigned bitPosition);
+
+  /// @brief Toggle every bit to its opposite value.
+  void flipAllBits() {
+    if (isSingleWord())
+      VAL ^= -1ULL;
+    else {
+      for (unsigned i = 0; i < getNumWords(); ++i)
+        pVal[i] ^= -1ULL;
+    }
+    clearUnusedBits();
+  }
+
+  /// Toggle a given bit to its opposite value whose position is given
+  /// as "bitPosition".
+  /// @brief Toggles a given bit to its opposite value.
+  void flipBit(unsigned bitPosition);
+
+  /// @}
+  /// @name Value Characterization Functions
+  /// @{
+
+  /// @returns the total number of bits.
+  unsigned getBitWidth() const {
+    return BitWidth;
+  }
+
+  /// Here one word's bitwidth equals to that of uint64_t.
+  /// @returns the number of words to hold the integer value of this APInt.
+  /// @brief Get the number of words.
+  unsigned getNumWords() const {
+    return getNumWords(BitWidth);
+  }
+
+  /// Here one word's bitwidth equals to that of uint64_t.
+  /// @returns the number of words to hold the integer value with a
+  /// given bit width.
+  /// @brief Get the number of words.
+  static unsigned getNumWords(unsigned BitWidth) {
+    return (BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
+  }
+
+  /// This function returns the number of active bits which is defined as the
+  /// bit width minus the number of leading zeros. This is used in several
+  /// computations to see how "wide" the value is.
+  /// @brief Compute the number of active bits in the value
+  unsigned getActiveBits() const {
+    return BitWidth - countLeadingZeros();
+  }
+
+  /// This function returns the number of active words in the value of this
+  /// APInt. This is used in conjunction with getActiveData to extract the raw
+  /// value of the APInt.
+  unsigned getActiveWords() const {
+    return whichWord(getActiveBits()-1) + 1;
+  }
+
+  /// Computes the minimum bit width for this APInt while considering it to be
+  /// a signed (and probably negative) value. If the value is not negative,
+  /// this function returns the same value as getActiveBits()+1. Otherwise, it
+  /// returns the smallest bit width that will retain the negative value. For
+  /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so
+  /// for -1, this function will always return 1.
+  /// @brief Get the minimum bit size for this signed APInt
+  unsigned getMinSignedBits() const {
+    if (isNegative())
+      return BitWidth - countLeadingOnes() + 1;
+    return getActiveBits()+1;
+  }
+
+  /// This method attempts to return the value of this APInt as a zero extended
+  /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
+  /// uint64_t. Otherwise an assertion will result.
+  /// @brief Get zero extended value
+  uint64_t getZExtValue() const {
+    if (isSingleWord())
+      return VAL;
+    assert(getActiveBits() <= 64 && "Too many bits for uint64_t");
+    return pVal[0];
+  }
+
+  /// This method attempts to return the value of this APInt as a sign extended
+  /// int64_t. The bit width must be <= 64 or the value must fit within an
+  /// int64_t. Otherwise an assertion will result.
+  /// @brief Get sign extended value
+  int64_t getSExtValue() const {
+    if (isSingleWord())
+      return int64_t(VAL << (APINT_BITS_PER_WORD - BitWidth)) >>
+                     (APINT_BITS_PER_WORD - BitWidth);
+    assert(getMinSignedBits() <= 64 && "Too many bits for int64_t");
+    return int64_t(pVal[0]);
+  }
+
+  /// This method determines how many bits are required to hold the APInt
+  /// equivalent of the string given by \arg str.
+  /// @brief Get bits required for string value.
+  static unsigned getBitsNeeded(StringRef str, uint8_t radix);
+
+  /// countLeadingZeros - This function is an APInt version of the
+  /// countLeadingZeros_{32,64} functions in MathExtras.h. It counts the number
+  /// of zeros from the most significant bit to the first one bit.
+  /// @returns BitWidth if the value is zero.
+  /// @returns the number of zeros from the most significant bit to the first
+  /// one bits.
+  unsigned countLeadingZeros() const {
+    if (isSingleWord()) {
+      unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
+      return CountLeadingZeros_64(VAL) - unusedBits;
+    }
+    return countLeadingZerosSlowCase();
+  }
+
+  /// countLeadingOnes - This function is an APInt version of the
+  /// countLeadingOnes_{32,64} functions in MathExtras.h. It counts the number
+  /// of ones from the most significant bit to the first zero bit.
+  /// @returns 0 if the high order bit is not set
+  /// @returns the number of 1 bits from the most significant to the least
+  /// @brief Count the number of leading one bits.
+  unsigned countLeadingOnes() const;
+
+  /// Computes the number of leading bits of this APInt that are equal to its
+  /// sign bit.
+  unsigned getNumSignBits() const {
+    return isNegative() ? countLeadingOnes() : countLeadingZeros();
+  }
+
+  /// countTrailingZeros - This function is an APInt version of the
+  /// countTrailingZeros_{32,64} functions in MathExtras.h. It counts
+  /// the number of zeros from the least significant bit to the first set bit.
+  /// @returns BitWidth if the value is zero.
+  /// @returns the number of zeros from the least significant bit to the first
+  /// one bit.
+  /// @brief Count the number of trailing zero bits.
+  unsigned countTrailingZeros() const;
+
+  /// countTrailingOnes - This function is an APInt version of the
+  /// countTrailingOnes_{32,64} functions in MathExtras.h. It counts
+  /// the number of ones from the least significant bit to the first zero bit.
+  /// @returns BitWidth if the value is all ones.
+  /// @returns the number of ones from the least significant bit to the first
+  /// zero bit.
+  /// @brief Count the number of trailing one bits.
+  unsigned countTrailingOnes() const {
+    if (isSingleWord())
+      return CountTrailingOnes_64(VAL);
+    return countTrailingOnesSlowCase();
+  }
+
+  /// countPopulation - This function is an APInt version of the
+  /// countPopulation_{32,64} functions in MathExtras.h. It counts the number
+  /// of 1 bits in the APInt value.
+  /// @returns 0 if the value is zero.
+  /// @returns the number of set bits.
+  /// @brief Count the number of bits set.
+  unsigned countPopulation() const {
+    if (isSingleWord())
+      return CountPopulation_64(VAL);
+    return countPopulationSlowCase();
+  }
+
+  /// @}
+  /// @name Conversion Functions
+  /// @{
+  void print(raw_ostream &OS, bool isSigned) const;
+
+  /// toString - Converts an APInt to a string and append it to Str.  Str is
+  /// commonly a SmallString.
+  void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed) const;
+
+  /// Considers the APInt to be unsigned and converts it into a string in the
+  /// radix given. The radix can be 2, 8, 10 or 16.
+  void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
+    toString(Str, Radix, false);
+  }
+
+  /// Considers the APInt to be signed and converts it into a string in the
+  /// radix given. The radix can be 2, 8, 10 or 16.
+  void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
+    toString(Str, Radix, true);
+  }
+
+  /// toString - This returns the APInt as a std::string.  Note that this is an
+  /// inefficient method.  It is better to pass in a SmallVector/SmallString
+  /// to the methods above to avoid thrashing the heap for the string.
+  std::string toString(unsigned Radix, bool Signed) const;
+
+
+  /// @returns a byte-swapped representation of this APInt Value.
+  APInt byteSwap() const;
+
+  /// @brief Converts this APInt to a double value.
+  double roundToDouble(bool isSigned) const;
+
+  /// @brief Converts this unsigned APInt to a double value.
+  double roundToDouble() const {
+    return roundToDouble(false);
+  }
+
+  /// @brief Converts this signed APInt to a double value.
+  double signedRoundToDouble() const {
+    return roundToDouble(true);
+  }
+
+  /// The conversion does not do a translation from integer to double, it just
+  /// re-interprets the bits as a double. Note that it is valid to do this on
+  /// any bit width. Exactly 64 bits will be translated.
+  /// @brief Converts APInt bits to a double
+  double bitsToDouble() const {
+    union {
+      uint64_t I;
+      double D;
+    } T;
+    T.I = (isSingleWord() ? VAL : pVal[0]);
+    return T.D;
+  }
+
+  /// The conversion does not do a translation from integer to float, it just
+  /// re-interprets the bits as a float. Note that it is valid to do this on
+  /// any bit width. Exactly 32 bits will be translated.
+  /// @brief Converts APInt bits to a double
+  float bitsToFloat() const {
+    union {
+      unsigned I;
+      float F;
+    } T;
+    T.I = unsigned((isSingleWord() ? VAL : pVal[0]));
+    return T.F;
+  }
+
+  /// The conversion does not do a translation from double to integer, it just
+  /// re-interprets the bits of the double.
+  /// @brief Converts a double to APInt bits.
+  static APInt doubleToBits(double V) {
+    union {
+      uint64_t I;
+      double D;
+    } T;
+    T.D = V;
+    return APInt(sizeof T * CHAR_BIT, T.I);
+  }
+
+  /// The conversion does not do a translation from float to integer, it just
+  /// re-interprets the bits of the float.
+  /// @brief Converts a float to APInt bits.
+  static APInt floatToBits(float V) {
+    union {
+      unsigned I;
+      float F;
+    } T;
+    T.F = V;
+    return APInt(sizeof T * CHAR_BIT, T.I);
+  }
+
+  /// @}
+  /// @name Mathematics Operations
+  /// @{
+
+  /// @returns the floor log base 2 of this APInt.
+  unsigned logBase2() const {
+    return BitWidth - 1 - countLeadingZeros();
+  }
+
+  /// @returns the ceil log base 2 of this APInt.
+  unsigned ceilLogBase2() const {
+    return BitWidth - (*this - 1).countLeadingZeros();
+  }
+
+  /// @returns the log base 2 of this APInt if its an exact power of two, -1
+  /// otherwise
+  int32_t exactLogBase2() const {
+    if (!isPowerOf2())
+      return -1;
+    return logBase2();
+  }
+
+  /// @brief Compute the square root
+  APInt sqrt() const;
+
+  /// If *this is < 0 then return -(*this), otherwise *this;
+  /// @brief Get the absolute value;
+  APInt abs() const {
+    if (isNegative())
+      return -(*this);
+    return *this;
+  }
+
+  /// @returns the multiplicative inverse for a given modulo.
+  APInt multiplicativeInverse(const APInt& modulo) const;
+
+  /// @}
+  /// @name Support for division by constant
+  /// @{
+
+  /// Calculate the magic number for signed division by a constant.
+  struct ms;
+  ms magic() const;
+
+  /// Calculate the magic number for unsigned division by a constant.
+  struct mu;
+  mu magicu() const;
+
+  /// @}
+  /// @name Building-block Operations for APInt and APFloat
+  /// @{
+
+  // These building block operations operate on a representation of
+  // arbitrary precision, two's-complement, bignum integer values.
+  // They should be sufficient to implement APInt and APFloat bignum
+  // requirements.  Inputs are generally a pointer to the base of an
+  // array of integer parts, representing an unsigned bignum, and a
+  // count of how many parts there are.
+
+  /// Sets the least significant part of a bignum to the input value,
+  /// and zeroes out higher parts.  */
+  static void tcSet(integerPart *, integerPart, unsigned int);
+
+  /// Assign one bignum to another.
+  static void tcAssign(integerPart *, const integerPart *, unsigned int);
+
+  /// Returns true if a bignum is zero, false otherwise.
+  static bool tcIsZero(const integerPart *, unsigned int);
+
+  /// Extract the given bit of a bignum; returns 0 or 1.  Zero-based.
+  static int tcExtractBit(const integerPart *, unsigned int bit);
+
+  /// Copy the bit vector of width srcBITS from SRC, starting at bit
+  /// srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB
+  /// becomes the least significant bit of DST.  All high bits above
+  /// srcBITS in DST are zero-filled.
+  static void tcExtract(integerPart *, unsigned int dstCount,
+                        const integerPart *,
+                        unsigned int srcBits, unsigned int srcLSB);
+
+  /// Set the given bit of a bignum.  Zero-based.
+  static void tcSetBit(integerPart *, unsigned int bit);
+
+  /// Clear the given bit of a bignum.  Zero-based.
+  static void tcClearBit(integerPart *, unsigned int bit);
+
+  /// Returns the bit number of the least or most significant set bit
+  /// of a number.  If the input number has no bits set -1U is
+  /// returned.
+  static unsigned int tcLSB(const integerPart *, unsigned int);
+  static unsigned int tcMSB(const integerPart *parts, unsigned int n);
+
+  /// Negate a bignum in-place.
+  static void tcNegate(integerPart *, unsigned int);
+
+  /// DST += RHS + CARRY where CARRY is zero or one.  Returns the
+  /// carry flag.
+  static integerPart tcAdd(integerPart *, const integerPart *,
+                           integerPart carry, unsigned);
+
+  /// DST -= RHS + CARRY where CARRY is zero or one.  Returns the
+  /// carry flag.
+  static integerPart tcSubtract(integerPart *, const integerPart *,
+                                integerPart carry, unsigned);
+
+  ///  DST += SRC * MULTIPLIER + PART   if add is true
+  ///  DST  = SRC * MULTIPLIER + PART   if add is false
+  ///
+  ///  Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
+  ///  they must start at the same point, i.e. DST == SRC.
+  ///
+  ///  If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is
+  ///  returned.  Otherwise DST is filled with the least significant
+  ///  DSTPARTS parts of the result, and if all of the omitted higher
+  ///  parts were zero return zero, otherwise overflow occurred and
+  ///  return one.
+  static int tcMultiplyPart(integerPart *dst, const integerPart *src,
+                            integerPart multiplier, integerPart carry,
+                            unsigned int srcParts, unsigned int dstParts,
+                            bool add);
+
+  /// DST = LHS * RHS, where DST has the same width as the operands
+  /// and is filled with the least significant parts of the result.
+  /// Returns one if overflow occurred, otherwise zero.  DST must be
+  /// disjoint from both operands.
+  static int tcMultiply(integerPart *, const integerPart *,
+                        const integerPart *, unsigned);
+
+  /// DST = LHS * RHS, where DST has width the sum of the widths of
+  /// the operands.  No overflow occurs.  DST must be disjoint from
+  /// both operands. Returns the number of parts required to hold the
+  /// result.
+  static unsigned int tcFullMultiply(integerPart *, const integerPart *,
+                                     const integerPart *, unsigned, unsigned);
+
+  /// If RHS is zero LHS and REMAINDER are left unchanged, return one.
+  /// Otherwise set LHS to LHS / RHS with the fractional part
+  /// discarded, set REMAINDER to the remainder, return zero.  i.e.
+  ///
+  ///  OLD_LHS = RHS * LHS + REMAINDER
+  ///
+  ///  SCRATCH is a bignum of the same size as the operands and result
+  ///  for use by the routine; its contents need not be initialized
+  ///  and are destroyed.  LHS, REMAINDER and SCRATCH must be
+  ///  distinct.
+  static int tcDivide(integerPart *lhs, const integerPart *rhs,
+                      integerPart *remainder, integerPart *scratch,
+                      unsigned int parts);
+
+  /// Shift a bignum left COUNT bits.  Shifted in bits are zero.
+  /// There are no restrictions on COUNT.
+  static void tcShiftLeft(integerPart *, unsigned int parts,
+                          unsigned int count);
+
+  /// Shift a bignum right COUNT bits.  Shifted in bits are zero.
+  /// There are no restrictions on COUNT.
+  static void tcShiftRight(integerPart *, unsigned int parts,
+                           unsigned int count);
+
+  /// The obvious AND, OR and XOR and complement operations.
+  static void tcAnd(integerPart *, const integerPart *, unsigned int);
+  static void tcOr(integerPart *, const integerPart *, unsigned int);
+  static void tcXor(integerPart *, const integerPart *, unsigned int);
+  static void tcComplement(integerPart *, unsigned int);
+
+  /// Comparison (unsigned) of two bignums.
+  static int tcCompare(const integerPart *, const integerPart *,
+                       unsigned int);
+
+  /// Increment a bignum in-place.  Return the carry flag.
+  static integerPart tcIncrement(integerPart *, unsigned int);
+
+  /// Set the least significant BITS and clear the rest.
+  static void tcSetLeastSignificantBits(integerPart *, unsigned int,
+                                        unsigned int bits);
+
+  /// @brief debug method
+  void dump() const;
+
+  /// @}
+};
+
+/// Magic data for optimising signed division by a constant.
+struct APInt::ms {
+  APInt m;  ///< magic number
+  unsigned s;  ///< shift amount
+};
+
+/// Magic data for optimising unsigned division by a constant.
+struct APInt::mu {
+  APInt m;     ///< magic number
+  bool a;      ///< add indicator
+  unsigned s;  ///< shift amount
+};
+
+inline bool operator==(uint64_t V1, const APInt& V2) {
+  return V2 == V1;
+}
+
+inline bool operator!=(uint64_t V1, const APInt& V2) {
+  return V2 != V1;
+}
+
+inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
+  I.print(OS, true);
+  return OS;
+}
+
+namespace APIntOps {
+
+/// @brief Determine the smaller of two APInts considered to be signed.
+inline APInt smin(const APInt &A, const APInt &B) {
+  return A.slt(B) ? A : B;
+}
+
+/// @brief Determine the larger of two APInts considered to be signed.
+inline APInt smax(const APInt &A, const APInt &B) {
+  return A.sgt(B) ? A : B;
+}
+
+/// @brief Determine the smaller of two APInts considered to be signed.
+inline APInt umin(const APInt &A, const APInt &B) {
+  return A.ult(B) ? A : B;
+}
+
+/// @brief Determine the larger of two APInts considered to be unsigned.
+inline APInt umax(const APInt &A, const APInt &B) {
+  return A.ugt(B) ? A : B;
+}
+
+/// @brief Check if the specified APInt has a N-bits unsigned integer value.
+inline bool isIntN(unsigned N, const APInt& APIVal) {
+  return APIVal.isIntN(N);
+}
+
+/// @brief Check if the specified APInt has a N-bits signed integer value.
+inline bool isSignedIntN(unsigned N, const APInt& APIVal) {
+  return APIVal.isSignedIntN(N);
+}
+
+/// @returns true if the argument APInt value is a sequence of ones
+/// starting at the least significant bit with the remainder zero.
+inline bool isMask(unsigned numBits, const APInt& APIVal) {
+  return numBits <= APIVal.getBitWidth() &&
+    APIVal == APInt::getLowBitsSet(APIVal.getBitWidth(), numBits);
+}
+
+/// @returns true if the argument APInt value contains a sequence of ones
+/// with the remainder zero.
+inline bool isShiftedMask(unsigned numBits, const APInt& APIVal) {
+  return isMask(numBits, (APIVal - APInt(numBits,1)) | APIVal);
+}
+
+/// @returns a byte-swapped representation of the specified APInt Value.
+inline APInt byteSwap(const APInt& APIVal) {
+  return APIVal.byteSwap();
+}
+
+/// @returns the floor log base 2 of the specified APInt value.
+inline unsigned logBase2(const APInt& APIVal) {
+  return APIVal.logBase2();
+}
+
+/// GreatestCommonDivisor - This function returns the greatest common
+/// divisor of the two APInt values using Euclid's algorithm.
+/// @returns the greatest common divisor of Val1 and Val2
+/// @brief Compute GCD of two APInt values.
+APInt GreatestCommonDivisor(const APInt& Val1, const APInt& Val2);
+
+/// Treats the APInt as an unsigned value for conversion purposes.
+/// @brief Converts the given APInt to a double value.
+inline double RoundAPIntToDouble(const APInt& APIVal) {
+  return APIVal.roundToDouble();
+}
+
+/// Treats the APInt as a signed value for conversion purposes.
+/// @brief Converts the given APInt to a double value.
+inline double RoundSignedAPIntToDouble(const APInt& APIVal) {
+  return APIVal.signedRoundToDouble();
+}
+
+/// @brief Converts the given APInt to a float vlalue.
+inline float RoundAPIntToFloat(const APInt& APIVal) {
+  return float(RoundAPIntToDouble(APIVal));
+}
+
+/// Treast the APInt as a signed value for conversion purposes.
+/// @brief Converts the given APInt to a float value.
+inline float RoundSignedAPIntToFloat(const APInt& APIVal) {
+  return float(APIVal.signedRoundToDouble());
+}
+
+/// RoundDoubleToAPInt - This function convert a double value to an APInt value.
+/// @brief Converts the given double value into a APInt.
+APInt RoundDoubleToAPInt(double Double, unsigned width);
+
+/// RoundFloatToAPInt - Converts a float value into an APInt value.
+/// @brief Converts a float value into a APInt.
+inline APInt RoundFloatToAPInt(float Float, unsigned width) {
+  return RoundDoubleToAPInt(double(Float), width);
+}
+
+/// Arithmetic right-shift the APInt by shiftAmt.
+/// @brief Arithmetic right-shift function.
+inline APInt ashr(const APInt& LHS, unsigned shiftAmt) {
+  return LHS.ashr(shiftAmt);
+}
+
+/// Logical right-shift the APInt by shiftAmt.
+/// @brief Logical right-shift function.
+inline APInt lshr(const APInt& LHS, unsigned shiftAmt) {
+  return LHS.lshr(shiftAmt);
+}
+
+/// Left-shift the APInt by shiftAmt.
+/// @brief Left-shift function.
+inline APInt shl(const APInt& LHS, unsigned shiftAmt) {
+  return LHS.shl(shiftAmt);
+}
+
+/// Signed divide APInt LHS by APInt RHS.
+/// @brief Signed division function for APInt.
+inline APInt sdiv(const APInt& LHS, const APInt& RHS) {
+  return LHS.sdiv(RHS);
+}
+
+/// Unsigned divide APInt LHS by APInt RHS.
+/// @brief Unsigned division function for APInt.
+inline APInt udiv(const APInt& LHS, const APInt& RHS) {
+  return LHS.udiv(RHS);
+}
+
+/// Signed remainder operation on APInt.
+/// @brief Function for signed remainder operation.
+inline APInt srem(const APInt& LHS, const APInt& RHS) {
+  return LHS.srem(RHS);
+}
+
+/// Unsigned remainder operation on APInt.
+/// @brief Function for unsigned remainder operation.
+inline APInt urem(const APInt& LHS, const APInt& RHS) {
+  return LHS.urem(RHS);
+}
+
+/// Performs multiplication on APInt values.
+/// @brief Function for multiplication operation.
+inline APInt mul(const APInt& LHS, const APInt& RHS) {
+  return LHS * RHS;
+}
+
+/// Performs addition on APInt values.
+/// @brief Function for addition operation.
+inline APInt add(const APInt& LHS, const APInt& RHS) {
+  return LHS + RHS;
+}
+
+/// Performs subtraction on APInt values.
+/// @brief Function for subtraction operation.
+inline APInt sub(const APInt& LHS, const APInt& RHS) {
+  return LHS - RHS;
+}
+
+/// Performs bitwise AND operation on APInt LHS and
+/// APInt RHS.
+/// @brief Bitwise AND function for APInt.
+inline APInt And(const APInt& LHS, const APInt& RHS) {
+  return LHS & RHS;
+}
+
+/// Performs bitwise OR operation on APInt LHS and APInt RHS.
+/// @brief Bitwise OR function for APInt.
+inline APInt Or(const APInt& LHS, const APInt& RHS) {
+  return LHS | RHS;
+}
+
+/// Performs bitwise XOR operation on APInt.
+/// @brief Bitwise XOR function for APInt.
+inline APInt Xor(const APInt& LHS, const APInt& RHS) {
+  return LHS ^ RHS;
+}
+
+/// Performs a bitwise complement operation on APInt.
+/// @brief Bitwise complement function.
+inline APInt Not(const APInt& APIVal) {
+  return ~APIVal;
+}
+
+} // End of APIntOps namespace
+
+} // End of llvm namespace
+
+#endif
diff --git a/final/include/llvm/ADT/APSInt.h b/final/include/llvm/ADT/APSInt.h
new file mode 100644
index 00000000000..54a7b601d1f
--- /dev/null
+++ b/final/include/llvm/ADT/APSInt.h
@@ -0,0 +1,266 @@
+//===-- llvm/ADT/APSInt.h - Arbitrary Precision Signed Int -----*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the APSInt class, which is a simple class that
+// represents an arbitrary sized integer that knows its signedness.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_APSINT_H
+#define LLVM_APSINT_H
+
+#include "llvm/ADT/APInt.h"
+
+namespace llvm {
+
+class APSInt : public APInt {
+  bool IsUnsigned;
+public:
+  /// Default constructor that creates an uninitialized APInt.
+  explicit APSInt() {}
+
+  /// APSInt ctor - Create an APSInt with the specified width, default to
+  /// unsigned.
+  explicit APSInt(uint32_t BitWidth, bool isUnsigned = true)
+   : APInt(BitWidth, 0), IsUnsigned(isUnsigned) {}
+
+  explicit APSInt(const APInt &I, bool isUnsigned = true)
+   : APInt(I), IsUnsigned(isUnsigned) {}
+
+  APSInt &operator=(const APSInt &RHS) {
+    APInt::operator=(RHS);
+    IsUnsigned = RHS.IsUnsigned;
+    return *this;
+  }
+
+  APSInt &operator=(const APInt &RHS) {
+    // Retain our current sign.
+    APInt::operator=(RHS);
+    return *this;
+  }
+
+  APSInt &operator=(uint64_t RHS) {
+    // Retain our current sign.
+    APInt::operator=(RHS);
+    return *this;
+  }
+
+  // Query sign information.
+  bool isSigned() const { return !IsUnsigned; }
+  bool isUnsigned() const { return IsUnsigned; }
+  void setIsUnsigned(bool Val) { IsUnsigned = Val; }
+  void setIsSigned(bool Val) { IsUnsigned = !Val; }
+
+  /// toString - Append this APSInt to the specified SmallString.
+  void toString(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
+    APInt::toString(Str, Radix, isSigned());
+  }
+  /// toString - Converts an APInt to a std::string.  This is an inefficient
+  /// method, your should prefer passing in a SmallString instead.
+  std::string toString(unsigned Radix) const {
+    return APInt::toString(Radix, isSigned());
+  }
+  using APInt::toString;
+
+  APSInt trunc(uint32_t width) const {
+    return APSInt(APInt::trunc(width), IsUnsigned);
+  }
+
+  APSInt extend(uint32_t width) const {
+    if (IsUnsigned)
+      return APSInt(zext(width), IsUnsigned);
+    else
+      return APSInt(sext(width), IsUnsigned);
+  }
+
+  APSInt extOrTrunc(uint32_t width) const {
+      if (IsUnsigned)
+        return APSInt(zextOrTrunc(width), IsUnsigned);
+      else
+        return APSInt(sextOrTrunc(width), IsUnsigned);
+  }
+
+  const APSInt &operator%=(const APSInt &RHS) {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    if (IsUnsigned)
+      *this = urem(RHS);
+    else
+      *this = srem(RHS);
+    return *this;
+  }
+  const APSInt &operator/=(const APSInt &RHS) {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    if (IsUnsigned)
+      *this = udiv(RHS);
+    else
+      *this = sdiv(RHS);
+    return *this;
+  }
+  APSInt operator%(const APSInt &RHS) const {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    return IsUnsigned ? APSInt(urem(RHS), true) : APSInt(srem(RHS), false);
+  }
+  APSInt operator/(const APSInt &RHS) const {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    return IsUnsigned ? APSInt(udiv(RHS), true) : APSInt(sdiv(RHS), false);
+  }
+
+  APSInt operator>>(unsigned Amt) const {
+    return IsUnsigned ? APSInt(lshr(Amt), true) : APSInt(ashr(Amt), false);
+  }
+  APSInt& operator>>=(unsigned Amt) {
+    *this = *this >> Amt;
+    return *this;
+  }
+
+  inline bool operator<(const APSInt& RHS) const {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    return IsUnsigned ? ult(RHS) : slt(RHS);
+  }
+  inline bool operator>(const APSInt& RHS) const {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    return IsUnsigned ? ugt(RHS) : sgt(RHS);
+  }
+  inline bool operator<=(const APSInt& RHS) const {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    return IsUnsigned ? ule(RHS) : sle(RHS);
+  }
+  inline bool operator>=(const APSInt& RHS) const {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    return IsUnsigned ? uge(RHS) : sge(RHS);
+  }
+
+  // The remaining operators just wrap the logic of APInt, but retain the
+  // signedness information.
+
+  APSInt operator<<(unsigned Bits) const {
+    return APSInt(static_cast<const APInt&>(*this) << Bits, IsUnsigned);
+  }
+  APSInt& operator<<=(unsigned Amt) {
+    *this = *this << Amt;
+    return *this;
+  }
+
+  APSInt& operator++() {
+    static_cast<APInt&>(*this)++;
+    return *this;
+  }
+  APSInt& operator--() {
+    static_cast<APInt&>(*this)--;
+    return *this;
+  }
+  APSInt operator++(int) {
+    return APSInt(++static_cast<APInt&>(*this), IsUnsigned);
+  }
+  APSInt operator--(int) {
+    return APSInt(--static_cast<APInt&>(*this), IsUnsigned);
+  }
+  APSInt operator-() const {
+    return APSInt(-static_cast<const APInt&>(*this), IsUnsigned);
+  }
+  APSInt& operator+=(const APSInt& RHS) {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    static_cast<APInt&>(*this) += RHS;
+    return *this;
+  }
+  APSInt& operator-=(const APSInt& RHS) {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    static_cast<APInt&>(*this) -= RHS;
+    return *this;
+  }
+  APSInt& operator*=(const APSInt& RHS) {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    static_cast<APInt&>(*this) *= RHS;
+    return *this;
+  }
+  APSInt& operator&=(const APSInt& RHS) {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    static_cast<APInt&>(*this) &= RHS;
+    return *this;
+  }
+  APSInt& operator|=(const APSInt& RHS) {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    static_cast<APInt&>(*this) |= RHS;
+    return *this;
+  }
+  APSInt& operator^=(const APSInt& RHS) {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    static_cast<APInt&>(*this) ^= RHS;
+    return *this;
+  }
+
+  APSInt operator&(const APSInt& RHS) const {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    return APSInt(static_cast<const APInt&>(*this) & RHS, IsUnsigned);
+  }
+  APSInt And(const APSInt& RHS) const {
+    return this->operator&(RHS);
+  }
+
+  APSInt operator|(const APSInt& RHS) const {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    return APSInt(static_cast<const APInt&>(*this) | RHS, IsUnsigned);
+  }
+  APSInt Or(const APSInt& RHS) const {
+    return this->operator|(RHS);
+  }
+
+
+  APSInt operator^(const APSInt& RHS) const {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    return APSInt(static_cast<const APInt&>(*this) ^ RHS, IsUnsigned);
+  }
+  APSInt Xor(const APSInt& RHS) const {
+    return this->operator^(RHS);
+  }
+
+  APSInt operator*(const APSInt& RHS) const {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    return APSInt(static_cast<const APInt&>(*this) * RHS, IsUnsigned);
+  }
+  APSInt operator+(const APSInt& RHS) const {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    return APSInt(static_cast<const APInt&>(*this) + RHS, IsUnsigned);
+  }
+  APSInt operator-(const APSInt& RHS) const {
+    assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
+    return APSInt(static_cast<const APInt&>(*this) - RHS, IsUnsigned);
+  }
+  APSInt operator~() const {
+    return APSInt(~static_cast<const APInt&>(*this), IsUnsigned);
+  }
+
+  /// getMaxValue - Return the APSInt representing the maximum integer value
+  ///  with the given bit width and signedness.
+  static APSInt getMaxValue(uint32_t numBits, bool Unsigned) {
+    return APSInt(Unsigned ? APInt::getMaxValue(numBits)
+                           : APInt::getSignedMaxValue(numBits), Unsigned);
+  }
+
+  /// getMinValue - Return the APSInt representing the minimum integer value
+  ///  with the given bit width and signedness.
+  static APSInt getMinValue(uint32_t numBits, bool Unsigned) {
+    return APSInt(Unsigned ? APInt::getMinValue(numBits)
+                           : APInt::getSignedMinValue(numBits), Unsigned);
+  }
+
+  /// Profile - Used to insert APSInt objects, or objects that contain APSInt
+  ///  objects, into FoldingSets.
+  void Profile(FoldingSetNodeID& ID) const;
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const APSInt &I) {
+  I.print(OS, I.isSigned());
+  return OS;
+}
+
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/ADT/ArrayRef.h b/final/include/llvm/ADT/ArrayRef.h
new file mode 100644
index 00000000000..ebddb1287eb
--- /dev/null
+++ b/final/include/llvm/ADT/ArrayRef.h
@@ -0,0 +1,137 @@
+//===--- ArrayRef.h - Array Reference Wrapper -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_ARRAYREF_H
+#define LLVM_ADT_ARRAYREF_H
+
+#include "llvm/ADT/SmallVector.h"
+#include <vector>
+
+namespace llvm {
+  class APInt;
+  
+  /// ArrayRef - Represent a constant reference to an array (0 or more elements
+  /// consecutively in memory), i.e. a start pointer and a length.  It allows
+  /// various APIs to take consecutive elements easily and conveniently.
+  ///
+  /// This class does not own the underlying data, it is expected to be used in
+  /// situations where the data resides in some other buffer, whose lifetime
+  /// extends past that of the StringRef. For this reason, it is not in general
+  /// safe to store a ArrayRef.
+  ///
+  /// This is intended to be trivially copyable, so it should be passed by
+  /// value.
+  template<typename T>
+  class ArrayRef {
+  public:
+    typedef const T *iterator;
+    typedef const T *const_iterator;
+    typedef size_t size_type;
+    
+  private:
+    /// The start of the array, in an external buffer.
+    const T *Data;
+    
+    /// The number of elements.
+    size_t Length;
+    
+  public:
+    /// @name Constructors
+    /// @{
+    
+    /// Construct an empty ArrayRef.
+    /*implicit*/ ArrayRef() : Data(0), Length(0) {}
+    
+    /// Construct an ArrayRef from a single element.
+    /*implicit*/ ArrayRef(const T &OneElt)
+      : Data(&OneElt), Length(1) {}
+    
+    /// Construct an ArrayRef from a pointer and length.
+    /*implicit*/ ArrayRef(const T *data, size_t length)
+      : Data(data), Length(length) {}
+    
+    /// Construct an ArrayRef from a SmallVector.
+    /*implicit*/ ArrayRef(const SmallVectorImpl<T> &Vec)
+      : Data(Vec.data()), Length(Vec.size()) {}
+
+    /// Construct an ArrayRef from a std::vector.
+    /*implicit*/ ArrayRef(const std::vector<T> &Vec)
+      : Data(Vec.empty() ? (T*)0 : &Vec[0]), Length(Vec.size()) {}
+    
+    /// Construct an ArrayRef from a C array.
+    template <size_t N>
+    /*implicit*/ ArrayRef(const T (&Arr)[N])
+      : Data(Arr), Length(N) {}
+    
+    /// @}
+    /// @name Simple Operations
+    /// @{
+
+    iterator begin() const { return Data; }
+    iterator end() const { return Data + Length; }
+    
+    /// empty - Check if the array is empty.
+    bool empty() const { return Length == 0; }
+    
+    const T *data() const { return Data; }
+    
+    /// size - Get the array size.
+    size_t size() const { return Length; }
+    
+    /// front - Get the first element.
+    const T &front() const {
+      assert(!empty());
+      return Data[0];
+    }
+    
+    /// back - Get the last element.
+    const T &back() const {
+      assert(!empty());
+      return Data[Length-1];
+    }
+    
+    /// slice(n) - Chop off the first N elements of the array.
+    ArrayRef<T> slice(unsigned N) {
+      assert(N <= size() && "Invalid specifier");
+      return ArrayRef<T>(data()+N, size()-N);
+    }
+
+    /// slice(n, m) - Chop off the first N elements of the array, and keep M
+    /// elements in the array.
+    ArrayRef<T> slice(unsigned N, unsigned M) {
+      assert(N+M <= size() && "Invalid specifier");
+      return ArrayRef<T>(data()+N, M);
+    }
+    
+    /// @}
+    /// @name Operator Overloads
+    /// @{
+    const T &operator[](size_t Index) const {
+      assert(Index < Length && "Invalid index!");
+      return Data[Index];
+    }
+    
+    /// @}
+    /// @name Expensive Operations
+    /// @{
+    std::vector<T> vec() const {
+      return std::vector<T>(Data, Data+Length);
+    }
+    
+    /// @}
+  };
+  
+  // ArrayRefs can be treated like a POD type.
+  template <typename T> struct isPodLike;
+  template <typename T> struct isPodLike<ArrayRef<T> > {
+    static const bool value = true;
+  };
+}
+
+#endif
diff --git a/final/include/llvm/ADT/BitVector.h b/final/include/llvm/ADT/BitVector.h
new file mode 100644
index 00000000000..ac1cf0c79a8
--- /dev/null
+++ b/final/include/llvm/ADT/BitVector.h
@@ -0,0 +1,433 @@
+//===- llvm/ADT/BitVector.h - Bit vectors -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BitVector class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_BITVECTOR_H
+#define LLVM_ADT_BITVECTOR_H
+
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstdlib>
+#include <cstring>
+
+namespace llvm {
+
+class BitVector {
+  typedef unsigned long BitWord;
+
+  enum { BITWORD_SIZE = (unsigned)sizeof(BitWord) * CHAR_BIT };
+
+  BitWord  *Bits;        // Actual bits.
+  unsigned Size;         // Size of bitvector in bits.
+  unsigned Capacity;     // Size of allocated memory in BitWord.
+
+public:
+  // Encapsulation of a single bit.
+  class reference {
+    friend class BitVector;
+
+    BitWord *WordRef;
+    unsigned BitPos;
+
+    reference();  // Undefined
+
+  public:
+    reference(BitVector &b, unsigned Idx) {
+      WordRef = &b.Bits[Idx / BITWORD_SIZE];
+      BitPos = Idx % BITWORD_SIZE;
+    }
+
+    ~reference() {}
+
+    reference &operator=(reference t) {
+      *this = bool(t);
+      return *this;
+    }
+
+    reference& operator=(bool t) {
+      if (t)
+        *WordRef |= 1L << BitPos;
+      else
+        *WordRef &= ~(1L << BitPos);
+      return *this;
+    }
+
+    operator bool() const {
+      return ((*WordRef) & (1L << BitPos)) ? true : false;
+    }
+  };
+
+
+  /// BitVector default ctor - Creates an empty bitvector.
+  BitVector() : Size(0), Capacity(0) {
+    Bits = 0;
+  }
+
+  /// BitVector ctor - Creates a bitvector of specified number of bits. All
+  /// bits are initialized to the specified value.
+  explicit BitVector(unsigned s, bool t = false) : Size(s) {
+    Capacity = NumBitWords(s);
+    Bits = (BitWord *)std::malloc(Capacity * sizeof(BitWord));
+    init_words(Bits, Capacity, t);
+    if (t)
+      clear_unused_bits();
+  }
+
+  /// BitVector copy ctor.
+  BitVector(const BitVector &RHS) : Size(RHS.size()) {
+    if (Size == 0) {
+      Bits = 0;
+      Capacity = 0;
+      return;
+    }
+
+    Capacity = NumBitWords(RHS.size());
+    Bits = (BitWord *)std::malloc(Capacity * sizeof(BitWord));
+    std::memcpy(Bits, RHS.Bits, Capacity * sizeof(BitWord));
+  }
+
+  ~BitVector() {
+    std::free(Bits);
+  }
+
+  /// empty - Tests whether there are no bits in this bitvector.
+  bool empty() const { return Size == 0; }
+
+  /// size - Returns the number of bits in this bitvector.
+  unsigned size() const { return Size; }
+
+  /// count - Returns the number of bits which are set.
+  unsigned count() const {
+    unsigned NumBits = 0;
+    for (unsigned i = 0; i < NumBitWords(size()); ++i)
+      if (sizeof(BitWord) == 4)
+        NumBits += CountPopulation_32((uint32_t)Bits[i]);
+      else if (sizeof(BitWord) == 8)
+        NumBits += CountPopulation_64(Bits[i]);
+      else
+        assert(0 && "Unsupported!");
+    return NumBits;
+  }
+
+  /// any - Returns true if any bit is set.
+  bool any() const {
+    for (unsigned i = 0; i < NumBitWords(size()); ++i)
+      if (Bits[i] != 0)
+        return true;
+    return false;
+  }
+
+  /// all - Returns true if all bits are set.
+  bool all() const {
+    // TODO: Optimize this.
+    return count() == size();
+  }
+
+  /// none - Returns true if none of the bits are set.
+  bool none() const {
+    return !any();
+  }
+
+  /// find_first - Returns the index of the first set bit, -1 if none
+  /// of the bits are set.
+  int find_first() const {
+    for (unsigned i = 0; i < NumBitWords(size()); ++i)
+      if (Bits[i] != 0) {
+        if (sizeof(BitWord) == 4)
+          return i * BITWORD_SIZE + CountTrailingZeros_32((uint32_t)Bits[i]);
+        else if (sizeof(BitWord) == 8)
+          return i * BITWORD_SIZE + CountTrailingZeros_64(Bits[i]);
+        else
+          assert(0 && "Unsupported!");
+      }
+    return -1;
+  }
+
+  /// find_next - Returns the index of the next set bit following the
+  /// "Prev" bit. Returns -1 if the next set bit is not found.
+  int find_next(unsigned Prev) const {
+    ++Prev;
+    if (Prev >= Size)
+      return -1;
+
+    unsigned WordPos = Prev / BITWORD_SIZE;
+    unsigned BitPos = Prev % BITWORD_SIZE;
+    BitWord Copy = Bits[WordPos];
+    // Mask off previous bits.
+    Copy &= ~0L << BitPos;
+
+    if (Copy != 0) {
+      if (sizeof(BitWord) == 4)
+        return WordPos * BITWORD_SIZE + CountTrailingZeros_32((uint32_t)Copy);
+      else if (sizeof(BitWord) == 8)
+        return WordPos * BITWORD_SIZE + CountTrailingZeros_64(Copy);
+      else
+        assert(0 && "Unsupported!");
+    }
+
+    // Check subsequent words.
+    for (unsigned i = WordPos+1; i < NumBitWords(size()); ++i)
+      if (Bits[i] != 0) {
+        if (sizeof(BitWord) == 4)
+          return i * BITWORD_SIZE + CountTrailingZeros_32((uint32_t)Bits[i]);
+        else if (sizeof(BitWord) == 8)
+          return i * BITWORD_SIZE + CountTrailingZeros_64(Bits[i]);
+        else
+          assert(0 && "Unsupported!");
+      }
+    return -1;
+  }
+
+  /// clear - Clear all bits.
+  void clear() {
+    Size = 0;
+  }
+
+  /// resize - Grow or shrink the bitvector.
+  void resize(unsigned N, bool t = false) {
+    if (N > Capacity * BITWORD_SIZE) {
+      unsigned OldCapacity = Capacity;
+      grow(N);
+      init_words(&Bits[OldCapacity], (Capacity-OldCapacity), t);
+    }
+
+    // Set any old unused bits that are now included in the BitVector. This
+    // may set bits that are not included in the new vector, but we will clear
+    // them back out below.
+    if (N > Size)
+      set_unused_bits(t);
+
+    // Update the size, and clear out any bits that are now unused
+    unsigned OldSize = Size;
+    Size = N;
+    if (t || N < OldSize)
+      clear_unused_bits();
+  }
+
+  void reserve(unsigned N) {
+    if (N > Capacity * BITWORD_SIZE)
+      grow(N);
+  }
+
+  // Set, reset, flip
+  BitVector &set() {
+    init_words(Bits, Capacity, true);
+    clear_unused_bits();
+    return *this;
+  }
+
+  BitVector &set(unsigned Idx) {
+    Bits[Idx / BITWORD_SIZE] |= 1L << (Idx % BITWORD_SIZE);
+    return *this;
+  }
+
+  BitVector &reset() {
+    init_words(Bits, Capacity, false);
+    return *this;
+  }
+
+  BitVector &reset(unsigned Idx) {
+    Bits[Idx / BITWORD_SIZE] &= ~(1L << (Idx % BITWORD_SIZE));
+    return *this;
+  }
+
+  BitVector &flip() {
+    for (unsigned i = 0; i < NumBitWords(size()); ++i)
+      Bits[i] = ~Bits[i];
+    clear_unused_bits();
+    return *this;
+  }
+
+  BitVector &flip(unsigned Idx) {
+    Bits[Idx / BITWORD_SIZE] ^= 1L << (Idx % BITWORD_SIZE);
+    return *this;
+  }
+
+  // No argument flip.
+  BitVector operator~() const {
+    return BitVector(*this).flip();
+  }
+
+  // Indexing.
+  reference operator[](unsigned Idx) {
+    assert (Idx < Size && "Out-of-bounds Bit access.");
+    return reference(*this, Idx);
+  }
+
+  bool operator[](unsigned Idx) const {
+    assert (Idx < Size && "Out-of-bounds Bit access.");
+    BitWord Mask = 1L << (Idx % BITWORD_SIZE);
+    return (Bits[Idx / BITWORD_SIZE] & Mask) != 0;
+  }
+
+  bool test(unsigned Idx) const {
+    return (*this)[Idx];
+  }
+
+  // Comparison operators.
+  bool operator==(const BitVector &RHS) const {
+    unsigned ThisWords = NumBitWords(size());
+    unsigned RHSWords  = NumBitWords(RHS.size());
+    unsigned i;
+    for (i = 0; i != std::min(ThisWords, RHSWords); ++i)
+      if (Bits[i] != RHS.Bits[i])
+        return false;
+
+    // Verify that any extra words are all zeros.
+    if (i != ThisWords) {
+      for (; i != ThisWords; ++i)
+        if (Bits[i])
+          return false;
+    } else if (i != RHSWords) {
+      for (; i != RHSWords; ++i)
+        if (RHS.Bits[i])
+          return false;
+    }
+    return true;
+  }
+
+  bool operator!=(const BitVector &RHS) const {
+    return !(*this == RHS);
+  }
+
+  // Intersection, union, disjoint union.
+  BitVector &operator&=(const BitVector &RHS) {
+    unsigned ThisWords = NumBitWords(size());
+    unsigned RHSWords  = NumBitWords(RHS.size());
+    unsigned i;
+    for (i = 0; i != std::min(ThisWords, RHSWords); ++i)
+      Bits[i] &= RHS.Bits[i];
+
+    // Any bits that are just in this bitvector become zero, because they aren't
+    // in the RHS bit vector.  Any words only in RHS are ignored because they
+    // are already zero in the LHS.
+    for (; i != ThisWords; ++i)
+      Bits[i] = 0;
+
+    return *this;
+  }
+
+  BitVector &operator|=(const BitVector &RHS) {
+    if (size() < RHS.size())
+      resize(RHS.size());
+    for (size_t i = 0, e = NumBitWords(RHS.size()); i != e; ++i)
+      Bits[i] |= RHS.Bits[i];
+    return *this;
+  }
+
+  BitVector &operator^=(const BitVector &RHS) {
+    if (size() < RHS.size())
+      resize(RHS.size());
+    for (size_t i = 0, e = NumBitWords(RHS.size()); i != e; ++i)
+      Bits[i] ^= RHS.Bits[i];
+    return *this;
+  }
+
+  // Assignment operator.
+  const BitVector &operator=(const BitVector &RHS) {
+    if (this == &RHS) return *this;
+
+    Size = RHS.size();
+    unsigned RHSWords = NumBitWords(Size);
+    if (Size <= Capacity * BITWORD_SIZE) {
+      if (Size)
+        std::memcpy(Bits, RHS.Bits, RHSWords * sizeof(BitWord));
+      clear_unused_bits();
+      return *this;
+    }
+
+    // Grow the bitvector to have enough elements.
+    Capacity = RHSWords;
+    BitWord *NewBits = (BitWord *)std::malloc(Capacity * sizeof(BitWord));
+    std::memcpy(NewBits, RHS.Bits, Capacity * sizeof(BitWord));
+
+    // Destroy the old bits.
+    std::free(Bits);
+    Bits = NewBits;
+
+    return *this;
+  }
+
+  void swap(BitVector &RHS) {
+    std::swap(Bits, RHS.Bits);
+    std::swap(Size, RHS.Size);
+    std::swap(Capacity, RHS.Capacity);
+  }
+
+private:
+  unsigned NumBitWords(unsigned S) const {
+    return (S + BITWORD_SIZE-1) / BITWORD_SIZE;
+  }
+
+  // Set the unused bits in the high words.
+  void set_unused_bits(bool t = true) {
+    //  Set high words first.
+    unsigned UsedWords = NumBitWords(Size);
+    if (Capacity > UsedWords)
+      init_words(&Bits[UsedWords], (Capacity-UsedWords), t);
+
+    //  Then set any stray high bits of the last used word.
+    unsigned ExtraBits = Size % BITWORD_SIZE;
+    if (ExtraBits) {
+      Bits[UsedWords-1] &= ~(~0L << ExtraBits);
+      Bits[UsedWords-1] |= (0 - (BitWord)t) << ExtraBits;
+    }
+  }
+
+  // Clear the unused bits in the high words.
+  void clear_unused_bits() {
+    set_unused_bits(false);
+  }
+
+  void grow(unsigned NewSize) {
+    Capacity = std::max(NumBitWords(NewSize), Capacity * 2);
+    Bits = (BitWord *)std::realloc(Bits, Capacity * sizeof(BitWord));
+
+    clear_unused_bits();
+  }
+
+  void init_words(BitWord *B, unsigned NumWords, bool t) {
+    memset(B, 0 - (int)t, NumWords*sizeof(BitWord));
+  }
+};
+
+inline BitVector operator&(const BitVector &LHS, const BitVector &RHS) {
+  BitVector Result(LHS);
+  Result &= RHS;
+  return Result;
+}
+
+inline BitVector operator|(const BitVector &LHS, const BitVector &RHS) {
+  BitVector Result(LHS);
+  Result |= RHS;
+  return Result;
+}
+
+inline BitVector operator^(const BitVector &LHS, const BitVector &RHS) {
+  BitVector Result(LHS);
+  Result ^= RHS;
+  return Result;
+}
+
+} // End llvm namespace
+
+namespace std {
+  /// Implement std::swap in terms of BitVector swap.
+  inline void
+  swap(llvm::BitVector &LHS, llvm::BitVector &RHS) {
+    LHS.swap(RHS);
+  }
+}
+
+#endif
diff --git a/final/include/llvm/ADT/DAGDeltaAlgorithm.h b/final/include/llvm/ADT/DAGDeltaAlgorithm.h
new file mode 100644
index 00000000000..99ed15c0d60
--- /dev/null
+++ b/final/include/llvm/ADT/DAGDeltaAlgorithm.h
@@ -0,0 +1,75 @@
+//===--- DAGDeltaAlgorithm.h - A DAG Minimization Algorithm ----*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_DAGDELTAALGORITHM_H
+#define LLVM_ADT_DAGDELTAALGORITHM_H
+
+#include <vector>
+#include <set>
+
+namespace llvm {
+
+/// DAGDeltaAlgorithm - Implements a "delta debugging" algorithm for minimizing
+/// directed acyclic graphs using a predicate function.
+///
+/// The result of the algorithm is a subset of the input change set which is
+/// guaranteed to satisfy the predicate, assuming that the input set did. For
+/// well formed predicates, the result set is guaranteed to be such that
+/// removing any single element not required by the dependencies on the other
+/// elements would falsify the predicate.
+///
+/// The DAG should be used to represent dependencies in the changes which are
+/// likely to hold across the predicate function. That is, for a particular
+/// changeset S and predicate P:
+///
+///   P(S) => P(S union pred(S))
+///
+/// The minization algorithm uses this dependency information to attempt to
+/// eagerly prune large subsets of changes. As with \see DeltaAlgorithm, the DAG
+/// is not required to satisfy this property, but the algorithm will run
+/// substantially fewer tests with appropriate dependencies. \see DeltaAlgorithm
+/// for more information on the properties which the predicate function itself
+/// should satisfy.
+class DAGDeltaAlgorithm {
+public:
+  typedef unsigned change_ty;
+  typedef std::pair<change_ty, change_ty> edge_ty;
+
+  // FIXME: Use a decent data structure.
+  typedef std::set<change_ty> changeset_ty;
+  typedef std::vector<changeset_ty> changesetlist_ty;
+
+public:
+  virtual ~DAGDeltaAlgorithm() {}
+
+  /// Run - Minimize the DAG formed by the \arg Changes vertices and the \arg
+  /// Dependencies edges by executing \see ExecuteOneTest() on subsets of
+  /// changes and returning the smallest set which still satisfies the test
+  /// predicate and the input \arg Dependencies.
+  ///
+  /// \param Changes The list of changes.
+  ///
+  /// \param Dependencies The list of dependencies amongst changes. For each
+  /// (x,y) in \arg Dependencies, both x and y must be in \arg Changes. The
+  /// minimization algorithm guarantees that for each tested changed set S, x
+  /// \in S implies y \in S. It is an error to have cyclic dependencies.
+  changeset_ty Run(const changeset_ty &Changes,
+                   const std::vector<edge_ty> &Dependencies);
+
+  /// UpdatedSearchState - Callback used when the search state changes.
+  virtual void UpdatedSearchState(const changeset_ty &Changes,
+                                  const changesetlist_ty &Sets,
+                                  const changeset_ty &Required) {}
+
+  /// ExecuteOneTest - Execute a single test predicate on the change set \arg S.
+  virtual bool ExecuteOneTest(const changeset_ty &S) = 0;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/ADT/DeltaAlgorithm.h b/final/include/llvm/ADT/DeltaAlgorithm.h
new file mode 100644
index 00000000000..45ba19891d4
--- /dev/null
+++ b/final/include/llvm/ADT/DeltaAlgorithm.h
@@ -0,0 +1,91 @@
+//===--- DeltaAlgorithm.h - A Set Minimization Algorithm -------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_DELTAALGORITHM_H
+#define LLVM_ADT_DELTAALGORITHM_H
+
+#include <vector>
+#include <set>
+
+namespace llvm {
+
+/// DeltaAlgorithm - Implements the delta debugging algorithm (A. Zeller '99)
+/// for minimizing arbitrary sets using a predicate function.
+///
+/// The result of the algorithm is a subset of the input change set which is
+/// guaranteed to satisfy the predicate, assuming that the input set did. For
+/// well formed predicates, the result set is guaranteed to be such that
+/// removing any single element would falsify the predicate.
+///
+/// For best results the predicate function *should* (but need not) satisfy
+/// certain properties, in particular:
+///  (1) The predicate should return false on an empty set and true on the full
+///  set.
+///  (2) If the predicate returns true for a set of changes, it should return
+///  true for all supersets of that set.
+///
+/// It is not an error to provide a predicate that does not satisfy these
+/// requirements, and the algorithm will generally produce reasonable
+/// results. However, it may run substantially more tests than with a good
+/// predicate.
+class DeltaAlgorithm {
+public:
+  typedef unsigned change_ty;
+  // FIXME: Use a decent data structure.
+  typedef std::set<change_ty> changeset_ty;
+  typedef std::vector<changeset_ty> changesetlist_ty;
+
+private:
+  /// Cache of failed test results. Successful test results are never cached
+  /// since we always reduce following a success.
+  std::set<changeset_ty> FailedTestsCache;
+
+  /// GetTestResult - Get the test result for the \arg Changes from the
+  /// cache, executing the test if necessary.
+  ///
+  /// \param Changes - The change set to test.
+  /// \return - The test result.
+  bool GetTestResult(const changeset_ty &Changes);
+
+  /// Split - Partition a set of changes \arg S into one or two subsets.
+  void Split(const changeset_ty &S, changesetlist_ty &Res);
+
+  /// Delta - Minimize a set of \arg Changes which has been partioned into
+  /// smaller sets, by attempting to remove individual subsets.
+  changeset_ty Delta(const changeset_ty &Changes,
+                     const changesetlist_ty &Sets);
+
+  /// Search - Search for a subset (or subsets) in \arg Sets which can be
+  /// removed from \arg Changes while still satisfying the predicate.
+  ///
+  /// \param Res - On success, a subset of Changes which satisfies the
+  /// predicate.
+  /// \return - True on success.
+  bool Search(const changeset_ty &Changes, const changesetlist_ty &Sets,
+              changeset_ty &Res);
+              
+protected:
+  /// UpdatedSearchState - Callback used when the search state changes.
+  virtual void UpdatedSearchState(const changeset_ty &Changes,
+                                  const changesetlist_ty &Sets) {}
+
+  /// ExecuteOneTest - Execute a single test predicate on the change set \arg S.
+  virtual bool ExecuteOneTest(const changeset_ty &S) = 0;
+
+public:
+  virtual ~DeltaAlgorithm();
+
+  /// Run - Minimize the set \arg Changes by executing \see ExecuteOneTest() on
+  /// subsets of changes and returning the smallest set which still satisfies
+  /// the test predicate.
+  changeset_ty Run(const changeset_ty &Changes);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/ADT/DenseMap.h b/final/include/llvm/ADT/DenseMap.h
new file mode 100644
index 00000000000..9d2b11d6b2b
--- /dev/null
+++ b/final/include/llvm/ADT/DenseMap.h
@@ -0,0 +1,533 @@
+//===- llvm/ADT/DenseMap.h - Dense probed hash table ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DenseMap class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_DENSEMAP_H
+#define LLVM_ADT_DENSEMAP_H
+
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include "llvm/Support/type_traits.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include <algorithm>
+#include <iterator>
+#include <new>
+#include <utility>
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+
+namespace llvm {
+
+template<typename KeyT, typename ValueT,
+         typename KeyInfoT = DenseMapInfo<KeyT>,
+         typename ValueInfoT = DenseMapInfo<ValueT>, bool IsConst = false>
+class DenseMapIterator;
+
+template<typename KeyT, typename ValueT,
+         typename KeyInfoT = DenseMapInfo<KeyT>,
+         typename ValueInfoT = DenseMapInfo<ValueT> >
+class DenseMap {
+  typedef std::pair<KeyT, ValueT> BucketT;
+  unsigned NumBuckets;
+  BucketT *Buckets;
+
+  unsigned NumEntries;
+  unsigned NumTombstones;
+public:
+  typedef KeyT key_type;
+  typedef ValueT mapped_type;
+  typedef BucketT value_type;
+
+  DenseMap(const DenseMap &other) {
+    NumBuckets = 0;
+    CopyFrom(other);
+  }
+
+  explicit DenseMap(unsigned NumInitBuckets = 0) {
+    init(NumInitBuckets);
+  }
+
+  template<typename InputIt>
+  DenseMap(const InputIt &I, const InputIt &E) {
+    init(NextPowerOf2(std::distance(I, E)));
+    insert(I, E);
+  }
+  
+  ~DenseMap() {
+    const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey();
+    for (BucketT *P = Buckets, *E = Buckets+NumBuckets; P != E; ++P) {
+      if (!KeyInfoT::isEqual(P->first, EmptyKey) &&
+          !KeyInfoT::isEqual(P->first, TombstoneKey))
+        P->second.~ValueT();
+      P->first.~KeyT();
+    }
+#ifndef NDEBUG
+    if (NumBuckets)
+      memset(Buckets, 0x5a, sizeof(BucketT)*NumBuckets);
+#endif
+    operator delete(Buckets);
+  }
+
+  typedef DenseMapIterator<KeyT, ValueT, KeyInfoT> iterator;
+  typedef DenseMapIterator<KeyT, ValueT,
+                           KeyInfoT, ValueInfoT, true> const_iterator;
+  inline iterator begin() {
+    // When the map is empty, avoid the overhead of AdvancePastEmptyBuckets().
+    return empty() ? end() : iterator(Buckets, Buckets+NumBuckets);
+  }
+  inline iterator end() {
+    return iterator(Buckets+NumBuckets, Buckets+NumBuckets);
+  }
+  inline const_iterator begin() const {
+    return empty() ? end() : const_iterator(Buckets, Buckets+NumBuckets);
+  }
+  inline const_iterator end() const {
+    return const_iterator(Buckets+NumBuckets, Buckets+NumBuckets);
+  }
+
+  bool empty() const { return NumEntries == 0; }
+  unsigned size() const { return NumEntries; }
+
+  /// Grow the densemap so that it has at least Size buckets. Does not shrink
+  void resize(size_t Size) {
+    if (Size > NumBuckets)
+      grow(Size);
+  }
+
+  void clear() {
+    if (NumEntries == 0 && NumTombstones == 0) return;
+    
+    // If the capacity of the array is huge, and the # elements used is small,
+    // shrink the array.
+    if (NumEntries * 4 < NumBuckets && NumBuckets > 64) {
+      shrink_and_clear();
+      return;
+    }
+
+    const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey();
+    for (BucketT *P = Buckets, *E = Buckets+NumBuckets; P != E; ++P) {
+      if (!KeyInfoT::isEqual(P->first, EmptyKey)) {
+        if (!KeyInfoT::isEqual(P->first, TombstoneKey)) {
+          P->second.~ValueT();
+          --NumEntries;
+        }
+        P->first = EmptyKey;
+      }
+    }
+    assert(NumEntries == 0 && "Node count imbalance!");
+    NumTombstones = 0;
+  }
+
+  /// count - Return true if the specified key is in the map.
+  bool count(const KeyT &Val) const {
+    BucketT *TheBucket;
+    return LookupBucketFor(Val, TheBucket);
+  }
+
+  iterator find(const KeyT &Val) {
+    BucketT *TheBucket;
+    if (LookupBucketFor(Val, TheBucket))
+      return iterator(TheBucket, Buckets+NumBuckets);
+    return end();
+  }
+  const_iterator find(const KeyT &Val) const {
+    BucketT *TheBucket;
+    if (LookupBucketFor(Val, TheBucket))
+      return const_iterator(TheBucket, Buckets+NumBuckets);
+    return end();
+  }
+
+  /// lookup - Return the entry for the specified key, or a default
+  /// constructed value if no such entry exists.
+  ValueT lookup(const KeyT &Val) const {
+    BucketT *TheBucket;
+    if (LookupBucketFor(Val, TheBucket))
+      return TheBucket->second;
+    return ValueT();
+  }
+
+  // Inserts key,value pair into the map if the key isn't already in the map.
+  // If the key is already in the map, it returns false and doesn't update the
+  // value.
+  std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT> &KV) {
+    BucketT *TheBucket;
+    if (LookupBucketFor(KV.first, TheBucket))
+      return std::make_pair(iterator(TheBucket, Buckets+NumBuckets),
+                            false); // Already in map.
+
+    // Otherwise, insert the new element.
+    TheBucket = InsertIntoBucket(KV.first, KV.second, TheBucket);
+    return std::make_pair(iterator(TheBucket, Buckets+NumBuckets),
+                          true);
+  }
+
+  /// insert - Range insertion of pairs.
+  template<typename InputIt>
+  void insert(InputIt I, InputIt E) {
+    for (; I != E; ++I)
+      insert(*I);
+  }
+
+
+  bool erase(const KeyT &Val) {
+    BucketT *TheBucket;
+    if (!LookupBucketFor(Val, TheBucket))
+      return false; // not in map.
+
+    TheBucket->second.~ValueT();
+    TheBucket->first = getTombstoneKey();
+    --NumEntries;
+    ++NumTombstones;
+    return true;
+  }
+  void erase(iterator I) {
+    BucketT *TheBucket = &*I;
+    TheBucket->second.~ValueT();
+    TheBucket->first = getTombstoneKey();
+    --NumEntries;
+    ++NumTombstones;
+  }
+
+  void swap(DenseMap& RHS) {
+    std::swap(NumBuckets, RHS.NumBuckets);
+    std::swap(Buckets, RHS.Buckets);
+    std::swap(NumEntries, RHS.NumEntries);
+    std::swap(NumTombstones, RHS.NumTombstones);
+  }
+
+  value_type& FindAndConstruct(const KeyT &Key) {
+    BucketT *TheBucket;
+    if (LookupBucketFor(Key, TheBucket))
+      return *TheBucket;
+
+    return *InsertIntoBucket(Key, ValueT(), TheBucket);
+  }
+
+  ValueT &operator[](const KeyT &Key) {
+    return FindAndConstruct(Key).second;
+  }
+
+  DenseMap& operator=(const DenseMap& other) {
+    CopyFrom(other);
+    return *this;
+  }
+
+  /// isPointerIntoBucketsArray - Return true if the specified pointer points
+  /// somewhere into the DenseMap's array of buckets (i.e. either to a key or
+  /// value in the DenseMap).
+  bool isPointerIntoBucketsArray(const void *Ptr) const {
+    return Ptr >= Buckets && Ptr < Buckets+NumBuckets;
+  }
+
+  /// getPointerIntoBucketsArray() - Return an opaque pointer into the buckets
+  /// array.  In conjunction with the previous method, this can be used to
+  /// determine whether an insertion caused the DenseMap to reallocate.
+  const void *getPointerIntoBucketsArray() const { return Buckets; }
+
+private:
+  void CopyFrom(const DenseMap& other) {
+    if (NumBuckets != 0 &&
+        (!isPodLike<KeyInfoT>::value || !isPodLike<ValueInfoT>::value)) {
+      const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey();
+      for (BucketT *P = Buckets, *E = Buckets+NumBuckets; P != E; ++P) {
+        if (!KeyInfoT::isEqual(P->first, EmptyKey) &&
+            !KeyInfoT::isEqual(P->first, TombstoneKey))
+          P->second.~ValueT();
+        P->first.~KeyT();
+      }
+    }
+
+    NumEntries = other.NumEntries;
+    NumTombstones = other.NumTombstones;
+
+    if (NumBuckets) {
+#ifndef NDEBUG
+      memset(Buckets, 0x5a, sizeof(BucketT)*NumBuckets);
+#endif
+      operator delete(Buckets);
+    }
+
+    NumBuckets = other.NumBuckets;
+
+    if (NumBuckets == 0) {
+      Buckets = 0;
+      return;
+    }
+
+    Buckets = static_cast<BucketT*>(operator new(sizeof(BucketT) * NumBuckets));
+
+    if (isPodLike<KeyInfoT>::value && isPodLike<ValueInfoT>::value)
+      memcpy(Buckets, other.Buckets, NumBuckets * sizeof(BucketT));
+    else
+      for (size_t i = 0; i < NumBuckets; ++i) {
+        new (&Buckets[i].first) KeyT(other.Buckets[i].first);
+        if (!KeyInfoT::isEqual(Buckets[i].first, getEmptyKey()) &&
+            !KeyInfoT::isEqual(Buckets[i].first, getTombstoneKey()))
+          new (&Buckets[i].second) ValueT(other.Buckets[i].second);
+      }
+  }
+
+  BucketT *InsertIntoBucket(const KeyT &Key, const ValueT &Value,
+                            BucketT *TheBucket) {
+    // If the load of the hash table is more than 3/4, or if fewer than 1/8 of
+    // the buckets are empty (meaning that many are filled with tombstones),
+    // grow the table.
+    //
+    // The later case is tricky.  For example, if we had one empty bucket with
+    // tons of tombstones, failing lookups (e.g. for insertion) would have to
+    // probe almost the entire table until it found the empty bucket.  If the
+    // table completely filled with tombstones, no lookup would ever succeed,
+    // causing infinite loops in lookup.
+    ++NumEntries;
+    if (NumEntries*4 >= NumBuckets*3 ||
+        NumBuckets-(NumEntries+NumTombstones) < NumBuckets/8) {
+      this->grow(NumBuckets * 2);
+      LookupBucketFor(Key, TheBucket);
+    }
+
+    // If we are writing over a tombstone, remember this.
+    if (!KeyInfoT::isEqual(TheBucket->first, getEmptyKey()))
+      --NumTombstones;
+
+    TheBucket->first = Key;
+    new (&TheBucket->second) ValueT(Value);
+    return TheBucket;
+  }
+
+  static unsigned getHashValue(const KeyT &Val) {
+    return KeyInfoT::getHashValue(Val);
+  }
+  static const KeyT getEmptyKey() {
+    return KeyInfoT::getEmptyKey();
+  }
+  static const KeyT getTombstoneKey() {
+    return KeyInfoT::getTombstoneKey();
+  }
+
+  /// LookupBucketFor - Lookup the appropriate bucket for Val, returning it in
+  /// FoundBucket.  If the bucket contains the key and a value, this returns
+  /// true, otherwise it returns a bucket with an empty marker or tombstone and
+  /// returns false.
+  bool LookupBucketFor(const KeyT &Val, BucketT *&FoundBucket) const {
+    unsigned BucketNo = getHashValue(Val);
+    unsigned ProbeAmt = 1;
+    BucketT *BucketsPtr = Buckets;
+
+    if (NumBuckets == 0) {
+      FoundBucket = 0;
+      return false;
+    }
+
+    // FoundTombstone - Keep track of whether we find a tombstone while probing.
+    BucketT *FoundTombstone = 0;
+    const KeyT EmptyKey = getEmptyKey();
+    const KeyT TombstoneKey = getTombstoneKey();
+    assert(!KeyInfoT::isEqual(Val, EmptyKey) &&
+           !KeyInfoT::isEqual(Val, TombstoneKey) &&
+           "Empty/Tombstone value shouldn't be inserted into map!");
+
+    while (1) {
+      BucketT *ThisBucket = BucketsPtr + (BucketNo & (NumBuckets-1));
+      // Found Val's bucket?  If so, return it.
+      if (KeyInfoT::isEqual(ThisBucket->first, Val)) {
+        FoundBucket = ThisBucket;
+        return true;
+      }
+
+      // If we found an empty bucket, the key doesn't exist in the set.
+      // Insert it and return the default value.
+      if (KeyInfoT::isEqual(ThisBucket->first, EmptyKey)) {
+        // If we've already seen a tombstone while probing, fill it in instead
+        // of the empty bucket we eventually probed to.
+        if (FoundTombstone) ThisBucket = FoundTombstone;
+        FoundBucket = FoundTombstone ? FoundTombstone : ThisBucket;
+        return false;
+      }
+
+      // If this is a tombstone, remember it.  If Val ends up not in the map, we
+      // prefer to return it than something that would require more probing.
+      if (KeyInfoT::isEqual(ThisBucket->first, TombstoneKey) && !FoundTombstone)
+        FoundTombstone = ThisBucket;  // Remember the first tombstone found.
+
+      // Otherwise, it's a hash collision or a tombstone, continue quadratic
+      // probing.
+      BucketNo += ProbeAmt++;
+    }
+  }
+
+  void init(unsigned InitBuckets) {
+    NumEntries = 0;
+    NumTombstones = 0;
+    NumBuckets = InitBuckets;
+
+    if (InitBuckets == 0) {
+      Buckets = 0;
+      return;
+    }
+
+    assert(InitBuckets && (InitBuckets & (InitBuckets-1)) == 0 &&
+           "# initial buckets must be a power of two!");
+    Buckets = static_cast<BucketT*>(operator new(sizeof(BucketT)*InitBuckets));
+    // Initialize all the keys to EmptyKey.
+    const KeyT EmptyKey = getEmptyKey();
+    for (unsigned i = 0; i != InitBuckets; ++i)
+      new (&Buckets[i].first) KeyT(EmptyKey);
+  }
+
+  void grow(unsigned AtLeast) {
+    unsigned OldNumBuckets = NumBuckets;
+    BucketT *OldBuckets = Buckets;
+
+    if (NumBuckets < 64)
+      NumBuckets = 64;
+
+    // Double the number of buckets.
+    while (NumBuckets < AtLeast)
+      NumBuckets <<= 1;
+    NumTombstones = 0;
+    Buckets = static_cast<BucketT*>(operator new(sizeof(BucketT)*NumBuckets));
+
+    // Initialize all the keys to EmptyKey.
+    const KeyT EmptyKey = getEmptyKey();
+    for (unsigned i = 0, e = NumBuckets; i != e; ++i)
+      new (&Buckets[i].first) KeyT(EmptyKey);
+
+    // Insert all the old elements.
+    const KeyT TombstoneKey = getTombstoneKey();
+    for (BucketT *B = OldBuckets, *E = OldBuckets+OldNumBuckets; B != E; ++B) {
+      if (!KeyInfoT::isEqual(B->first, EmptyKey) &&
+          !KeyInfoT::isEqual(B->first, TombstoneKey)) {
+        // Insert the key/value into the new table.
+        BucketT *DestBucket;
+        bool FoundVal = LookupBucketFor(B->first, DestBucket);
+        (void)FoundVal; // silence warning.
+        assert(!FoundVal && "Key already in new map?");
+        DestBucket->first = B->first;
+        new (&DestBucket->second) ValueT(B->second);
+
+        // Free the value.
+        B->second.~ValueT();
+      }
+      B->first.~KeyT();
+    }
+
+#ifndef NDEBUG
+    memset(OldBuckets, 0x5a, sizeof(BucketT)*OldNumBuckets);
+#endif
+    // Free the old table.
+    operator delete(OldBuckets);
+  }
+
+  void shrink_and_clear() {
+    unsigned OldNumBuckets = NumBuckets;
+    BucketT *OldBuckets = Buckets;
+
+    // Reduce the number of buckets.
+    NumBuckets = NumEntries > 32 ? 1 << (Log2_32_Ceil(NumEntries) + 1)
+                                 : 64;
+    NumTombstones = 0;
+    Buckets = static_cast<BucketT*>(operator new(sizeof(BucketT)*NumBuckets));
+
+    // Initialize all the keys to EmptyKey.
+    const KeyT EmptyKey = getEmptyKey();
+    for (unsigned i = 0, e = NumBuckets; i != e; ++i)
+      new (&Buckets[i].first) KeyT(EmptyKey);
+
+    // Free the old buckets.
+    const KeyT TombstoneKey = getTombstoneKey();
+    for (BucketT *B = OldBuckets, *E = OldBuckets+OldNumBuckets; B != E; ++B) {
+      if (!KeyInfoT::isEqual(B->first, EmptyKey) &&
+          !KeyInfoT::isEqual(B->first, TombstoneKey)) {
+        // Free the value.
+        B->second.~ValueT();
+      }
+      B->first.~KeyT();
+    }
+
+#ifndef NDEBUG
+    memset(OldBuckets, 0x5a, sizeof(BucketT)*OldNumBuckets);
+#endif
+    // Free the old table.
+    operator delete(OldBuckets);
+
+    NumEntries = 0;
+  }
+};
+
+template<typename KeyT, typename ValueT,
+         typename KeyInfoT, typename ValueInfoT, bool IsConst>
+class DenseMapIterator {
+  typedef std::pair<KeyT, ValueT> Bucket;
+  typedef DenseMapIterator<KeyT, ValueT,
+                           KeyInfoT, ValueInfoT, true> ConstIterator;
+  friend class DenseMapIterator<KeyT, ValueT, KeyInfoT, ValueInfoT, true>;
+public:
+  typedef ptrdiff_t difference_type;
+  typedef typename conditional<IsConst, const Bucket, Bucket>::type value_type;
+  typedef value_type *pointer;
+  typedef value_type &reference;
+  typedef std::forward_iterator_tag iterator_category;
+private:
+  pointer Ptr, End;
+public:
+  DenseMapIterator() : Ptr(0), End(0) {}
+
+  DenseMapIterator(pointer Pos, pointer E) : Ptr(Pos), End(E) {
+    AdvancePastEmptyBuckets();
+  }
+
+  // If IsConst is true this is a converting constructor from iterator to
+  // const_iterator and the default copy constructor is used.
+  // Otherwise this is a copy constructor for iterator.
+  DenseMapIterator(const DenseMapIterator<KeyT, ValueT,
+                                          KeyInfoT, ValueInfoT, false>& I)
+    : Ptr(I.Ptr), End(I.End) {}
+
+  reference operator*() const {
+    return *Ptr;
+  }
+  pointer operator->() const {
+    return Ptr;
+  }
+
+  bool operator==(const ConstIterator &RHS) const {
+    return Ptr == RHS.operator->();
+  }
+  bool operator!=(const ConstIterator &RHS) const {
+    return Ptr != RHS.operator->();
+  }
+
+  inline DenseMapIterator& operator++() {  // Preincrement
+    ++Ptr;
+    AdvancePastEmptyBuckets();
+    return *this;
+  }
+  DenseMapIterator operator++(int) {  // Postincrement
+    DenseMapIterator tmp = *this; ++*this; return tmp;
+  }
+
+private:
+  void AdvancePastEmptyBuckets() {
+    const KeyT Empty = KeyInfoT::getEmptyKey();
+    const KeyT Tombstone = KeyInfoT::getTombstoneKey();
+
+    while (Ptr != End &&
+           (KeyInfoT::isEqual(Ptr->first, Empty) ||
+            KeyInfoT::isEqual(Ptr->first, Tombstone)))
+      ++Ptr;
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/ADT/DenseMapInfo.h b/final/include/llvm/ADT/DenseMapInfo.h
new file mode 100644
index 00000000000..25e341bf4fd
--- /dev/null
+++ b/final/include/llvm/ADT/DenseMapInfo.h
@@ -0,0 +1,165 @@
+//===- llvm/ADT/DenseMapInfo.h - Type traits for DenseMap -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines DenseMapInfo traits for DenseMap.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_DENSEMAPINFO_H
+#define LLVM_ADT_DENSEMAPINFO_H
+
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include "llvm/Support/type_traits.h"
+
+namespace llvm {
+
+template<typename T>
+struct DenseMapInfo {
+  //static inline T getEmptyKey();
+  //static inline T getTombstoneKey();
+  //static unsigned getHashValue(const T &Val);
+  //static bool isEqual(const T &LHS, const T &RHS);
+};
+
+// Provide DenseMapInfo for all pointers.
+template<typename T>
+struct DenseMapInfo<T*> {
+  static inline T* getEmptyKey() {
+    intptr_t Val = -1;
+    Val <<= PointerLikeTypeTraits<T*>::NumLowBitsAvailable;
+    return reinterpret_cast<T*>(Val);
+  }
+  static inline T* getTombstoneKey() {
+    intptr_t Val = -2;
+    Val <<= PointerLikeTypeTraits<T*>::NumLowBitsAvailable;
+    return reinterpret_cast<T*>(Val);
+  }
+  static unsigned getHashValue(const T *PtrVal) {
+    return (unsigned((uintptr_t)PtrVal) >> 4) ^
+           (unsigned((uintptr_t)PtrVal) >> 9);
+  }
+  static bool isEqual(const T *LHS, const T *RHS) { return LHS == RHS; }
+};
+
+// Provide DenseMapInfo for chars.
+template<> struct DenseMapInfo<char> {
+  static inline char getEmptyKey() { return ~0; }
+  static inline char getTombstoneKey() { return ~0 - 1; }
+  static unsigned getHashValue(const char& Val) { return Val * 37; }
+  static bool isEqual(const char &LHS, const char &RHS) {
+    return LHS == RHS;
+  }
+};
+  
+// Provide DenseMapInfo for unsigned ints.
+template<> struct DenseMapInfo<unsigned> {
+  static inline unsigned getEmptyKey() { return ~0; }
+  static inline unsigned getTombstoneKey() { return ~0U - 1; }
+  static unsigned getHashValue(const unsigned& Val) { return Val * 37; }
+  static bool isEqual(const unsigned& LHS, const unsigned& RHS) {
+    return LHS == RHS;
+  }
+};
+
+// Provide DenseMapInfo for unsigned longs.
+template<> struct DenseMapInfo<unsigned long> {
+  static inline unsigned long getEmptyKey() { return ~0UL; }
+  static inline unsigned long getTombstoneKey() { return ~0UL - 1L; }
+  static unsigned getHashValue(const unsigned long& Val) {
+    return (unsigned)(Val * 37UL);
+  }
+  static bool isEqual(const unsigned long& LHS, const unsigned long& RHS) {
+    return LHS == RHS;
+  }
+};
+
+// Provide DenseMapInfo for unsigned long longs.
+template<> struct DenseMapInfo<unsigned long long> {
+  static inline unsigned long long getEmptyKey() { return ~0ULL; }
+  static inline unsigned long long getTombstoneKey() { return ~0ULL - 1ULL; }
+  static unsigned getHashValue(const unsigned long long& Val) {
+    return (unsigned)(Val * 37ULL);
+  }
+  static bool isEqual(const unsigned long long& LHS,
+                      const unsigned long long& RHS) {
+    return LHS == RHS;
+  }
+};
+
+// Provide DenseMapInfo for ints.
+template<> struct DenseMapInfo<int> {
+  static inline int getEmptyKey() { return 0x7fffffff; }
+  static inline int getTombstoneKey() { return -0x7fffffff - 1; }
+  static unsigned getHashValue(const int& Val) { return (unsigned)(Val * 37); }
+  static bool isEqual(const int& LHS, const int& RHS) {
+    return LHS == RHS;
+  }
+};
+
+// Provide DenseMapInfo for longs.
+template<> struct DenseMapInfo<long> {
+  static inline long getEmptyKey() {
+    return (1UL << (sizeof(long) * 8 - 1)) - 1L;
+  }
+  static inline long getTombstoneKey() { return getEmptyKey() - 1L; }
+  static unsigned getHashValue(const long& Val) {
+    return (unsigned)(Val * 37L);
+  }
+  static bool isEqual(const long& LHS, const long& RHS) {
+    return LHS == RHS;
+  }
+};
+
+// Provide DenseMapInfo for long longs.
+template<> struct DenseMapInfo<long long> {
+  static inline long long getEmptyKey() { return 0x7fffffffffffffffLL; }
+  static inline long long getTombstoneKey() { return -0x7fffffffffffffffLL-1; }
+  static unsigned getHashValue(const long long& Val) {
+    return (unsigned)(Val * 37LL);
+  }
+  static bool isEqual(const long long& LHS,
+                      const long long& RHS) {
+    return LHS == RHS;
+  }
+};
+
+// Provide DenseMapInfo for all pairs whose members have info.
+template<typename T, typename U>
+struct DenseMapInfo<std::pair<T, U> > {
+  typedef std::pair<T, U> Pair;
+  typedef DenseMapInfo<T> FirstInfo;
+  typedef DenseMapInfo<U> SecondInfo;
+
+  static inline Pair getEmptyKey() {
+    return std::make_pair(FirstInfo::getEmptyKey(),
+                          SecondInfo::getEmptyKey());
+  }
+  static inline Pair getTombstoneKey() {
+    return std::make_pair(FirstInfo::getTombstoneKey(),
+                            SecondInfo::getEmptyKey());
+  }
+  static unsigned getHashValue(const Pair& PairVal) {
+    uint64_t key = (uint64_t)FirstInfo::getHashValue(PairVal.first) << 32
+          | (uint64_t)SecondInfo::getHashValue(PairVal.second);
+    key += ~(key << 32);
+    key ^= (key >> 22);
+    key += ~(key << 13);
+    key ^= (key >> 8);
+    key += (key << 3);
+    key ^= (key >> 15);
+    key += ~(key << 27);
+    key ^= (key >> 31);
+    return (unsigned)key;
+  }
+  static bool isEqual(const Pair& LHS, const Pair& RHS) { return LHS == RHS; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/ADT/DenseSet.h b/final/include/llvm/ADT/DenseSet.h
new file mode 100644
index 00000000000..67321f53984
--- /dev/null
+++ b/final/include/llvm/ADT/DenseSet.h
@@ -0,0 +1,129 @@
+//===- llvm/ADT/DenseSet.h - Dense probed hash table ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DenseSet class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_DENSESET_H
+#define LLVM_ADT_DENSESET_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+/// DenseSet - This implements a dense probed hash-table based set.
+///
+/// FIXME: This is currently implemented directly in terms of DenseMap, this
+/// should be optimized later if there is a need.
+template<typename ValueT, typename ValueInfoT = DenseMapInfo<ValueT> >
+class DenseSet {
+  typedef DenseMap<ValueT, char, ValueInfoT> MapTy;
+  MapTy TheMap;
+public:
+  DenseSet(const DenseSet &Other) : TheMap(Other.TheMap) {}
+  explicit DenseSet(unsigned NumInitBuckets = 64) : TheMap(NumInitBuckets) {}
+
+  bool empty() const { return TheMap.empty(); }
+  unsigned size() const { return TheMap.size(); }
+
+  /// Grow the denseset so that it has at least Size buckets. Does not shrink
+  void resize(size_t Size) { TheMap.resize(Size); }
+
+  void clear() {
+    TheMap.clear();
+  }
+
+  bool count(const ValueT &V) const {
+    return TheMap.count(V);
+  }
+
+  bool erase(const ValueT &V) {
+    return TheMap.erase(V);
+  }
+
+  void swap(DenseSet& RHS) {
+    TheMap.swap(RHS.TheMap);
+  }
+
+  DenseSet &operator=(const DenseSet &RHS) {
+    TheMap = RHS.TheMap;
+    return *this;
+  }
+
+  // Iterators.
+
+  class Iterator {
+    typename MapTy::iterator I;
+    friend class DenseSet;
+  public:
+    typedef typename MapTy::iterator::difference_type difference_type;
+    typedef ValueT value_type;
+    typedef value_type *pointer;
+    typedef value_type &reference;
+    typedef std::forward_iterator_tag iterator_category;
+
+    Iterator(const typename MapTy::iterator &i) : I(i) {}
+
+    ValueT& operator*() { return I->first; }
+    ValueT* operator->() { return &I->first; }
+
+    Iterator& operator++() { ++I; return *this; }
+    bool operator==(const Iterator& X) const { return I == X.I; }
+    bool operator!=(const Iterator& X) const { return I != X.I; }
+  };
+
+  class ConstIterator {
+    typename MapTy::const_iterator I;
+    friend class DenseSet;
+  public:
+    typedef typename MapTy::const_iterator::difference_type difference_type;
+    typedef ValueT value_type;
+    typedef value_type *pointer;
+    typedef value_type &reference;
+    typedef std::forward_iterator_tag iterator_category;
+
+    ConstIterator(const typename MapTy::const_iterator &i) : I(i) {}
+
+    const ValueT& operator*() { return I->first; }
+    const ValueT* operator->() { return &I->first; }
+
+    ConstIterator& operator++() { ++I; return *this; }
+    bool operator==(const ConstIterator& X) const { return I == X.I; }
+    bool operator!=(const ConstIterator& X) const { return I != X.I; }
+  };
+
+  typedef Iterator      iterator;
+  typedef ConstIterator const_iterator;
+
+  iterator begin() { return Iterator(TheMap.begin()); }
+  iterator end() { return Iterator(TheMap.end()); }
+
+  const_iterator begin() const { return ConstIterator(TheMap.begin()); }
+  const_iterator end() const { return ConstIterator(TheMap.end()); }
+
+  iterator find(const ValueT &V) { return Iterator(TheMap.find(V)); }
+  void erase(Iterator I) { return TheMap.erase(I.I); }
+  void erase(ConstIterator CI) { return TheMap.erase(CI.I); }
+
+  std::pair<iterator, bool> insert(const ValueT &V) {
+    return TheMap.insert(std::make_pair(V, 0));
+  }
+  
+  // Range insertion of values.
+  template<typename InputIt>
+  void insert(InputIt I, InputIt E) {
+    for (; I != E; ++I)
+      insert(*I);
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/ADT/DepthFirstIterator.h b/final/include/llvm/ADT/DepthFirstIterator.h
new file mode 100644
index 00000000000..dd13a2c0205
--- /dev/null
+++ b/final/include/llvm/ADT/DepthFirstIterator.h
@@ -0,0 +1,268 @@
+//===- llvm/ADT/DepthFirstIterator.h - Depth First iterator -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file builds on the ADT/GraphTraits.h file to build generic depth
+// first graph iterator.  This file exposes the following functions/types:
+//
+// df_begin/df_end/df_iterator
+//   * Normal depth-first iteration - visit a node and then all of its children.
+//
+// idf_begin/idf_end/idf_iterator
+//   * Depth-first iteration on the 'inverse' graph.
+//
+// df_ext_begin/df_ext_end/df_ext_iterator
+//   * Normal depth-first iteration - visit a node and then all of its children.
+//     This iterator stores the 'visited' set in an external set, which allows
+//     it to be more efficient, and allows external clients to use the set for
+//     other purposes.
+//
+// idf_ext_begin/idf_ext_end/idf_ext_iterator
+//   * Depth-first iteration on the 'inverse' graph.
+//     This iterator stores the 'visited' set in an external set, which allows
+//     it to be more efficient, and allows external clients to use the set for
+//     other purposes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_DEPTHFIRSTITERATOR_H
+#define LLVM_ADT_DEPTHFIRSTITERATOR_H
+
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include <set>
+#include <vector>
+
+namespace llvm {
+
+// df_iterator_storage - A private class which is used to figure out where to
+// store the visited set.
+template<class SetType, bool External>   // Non-external set
+class df_iterator_storage {
+public:
+  SetType Visited;
+};
+
+template<class SetType>
+class df_iterator_storage<SetType, true> {
+public:
+  df_iterator_storage(SetType &VSet) : Visited(VSet) {}
+  df_iterator_storage(const df_iterator_storage &S) : Visited(S.Visited) {}
+  SetType &Visited;
+};
+
+
+// Generic Depth First Iterator
+template<class GraphT,
+class SetType = llvm::SmallPtrSet<typename GraphTraits<GraphT>::NodeType*, 8>,
+         bool ExtStorage = false, class GT = GraphTraits<GraphT> >
+class df_iterator : public std::iterator<std::forward_iterator_tag,
+                                         typename GT::NodeType, ptrdiff_t>,
+                    public df_iterator_storage<SetType, ExtStorage> {
+  typedef std::iterator<std::forward_iterator_tag,
+                        typename GT::NodeType, ptrdiff_t> super;
+
+  typedef typename GT::NodeType          NodeType;
+  typedef typename GT::ChildIteratorType ChildItTy;
+  typedef PointerIntPair<NodeType*, 1>   PointerIntTy;
+
+  // VisitStack - Used to maintain the ordering.  Top = current block
+  // First element is node pointer, second is the 'next child' to visit
+  // if the int in PointerIntTy is 0, the 'next child' to visit is invalid
+  std::vector<std::pair<PointerIntTy, ChildItTy> > VisitStack;
+private:
+  inline df_iterator(NodeType *Node) {
+    this->Visited.insert(Node);
+    VisitStack.push_back(std::make_pair(PointerIntTy(Node, 0), 
+                                        GT::child_begin(Node)));
+  }
+  inline df_iterator() { 
+    // End is when stack is empty 
+  }
+  inline df_iterator(NodeType *Node, SetType &S)
+    : df_iterator_storage<SetType, ExtStorage>(S) {
+    if (!S.count(Node)) {
+      VisitStack.push_back(std::make_pair(PointerIntTy(Node, 0), 
+                                          GT::child_begin(Node)));
+      this->Visited.insert(Node);
+    }
+  }
+  inline df_iterator(SetType &S)
+    : df_iterator_storage<SetType, ExtStorage>(S) {
+    // End is when stack is empty
+  }
+
+  inline void toNext() {
+    do {
+      std::pair<PointerIntTy, ChildItTy> &Top = VisitStack.back();
+      NodeType *Node = Top.first.getPointer();
+      ChildItTy &It  = Top.second;
+      if (!Top.first.getInt()) {
+        // now retrieve the real begin of the children before we dive in
+        It = GT::child_begin(Node);
+        Top.first.setInt(1);
+      }
+
+      while (It != GT::child_end(Node)) {
+        NodeType *Next = *It++;
+        // Has our next sibling been visited?
+        if (Next && !this->Visited.count(Next)) {  
+          // No, do it now.
+          this->Visited.insert(Next);
+          VisitStack.push_back(std::make_pair(PointerIntTy(Next, 0), 
+                                              GT::child_begin(Next)));
+          return;
+        }
+      }
+
+      // Oops, ran out of successors... go up a level on the stack.
+      VisitStack.pop_back();
+    } while (!VisitStack.empty());
+  }
+
+public:
+  typedef typename super::pointer pointer;
+  typedef df_iterator<GraphT, SetType, ExtStorage, GT> _Self;
+
+  // Provide static begin and end methods as our public "constructors"
+  static inline _Self begin(const GraphT& G) {
+    return _Self(GT::getEntryNode(G));
+  }
+  static inline _Self end(const GraphT& G) { return _Self(); }
+
+  // Static begin and end methods as our public ctors for external iterators
+  static inline _Self begin(const GraphT& G, SetType &S) {
+    return _Self(GT::getEntryNode(G), S);
+  }
+  static inline _Self end(const GraphT& G, SetType &S) { return _Self(S); }
+
+  inline bool operator==(const _Self& x) const {
+    return VisitStack == x.VisitStack;
+  }
+  inline bool operator!=(const _Self& x) const { return !operator==(x); }
+
+  inline pointer operator*() const {
+    return VisitStack.back().first.getPointer();
+  }
+
+  // This is a nonstandard operator-> that dereferences the pointer an extra
+  // time... so that you can actually call methods ON the Node, because
+  // the contained type is a pointer.  This allows BBIt->getTerminator() f.e.
+  //
+  inline NodeType *operator->() const { return operator*(); }
+
+  inline _Self& operator++() {   // Preincrement
+    toNext();
+    return *this;
+  }
+
+  // skips all children of the current node and traverses to next node
+  //
+  inline _Self& skipChildren() {  
+    VisitStack.pop_back();
+    if (!VisitStack.empty())
+      toNext();
+    return *this;
+  }
+
+  inline _Self operator++(int) { // Postincrement
+    _Self tmp = *this; ++*this; return tmp;
+  }
+
+  // nodeVisited - return true if this iterator has already visited the
+  // specified node.  This is public, and will probably be used to iterate over
+  // nodes that a depth first iteration did not find: ie unreachable nodes.
+  //
+  inline bool nodeVisited(NodeType *Node) const {
+    return this->Visited.count(Node) != 0;
+  }
+
+  /// getPathLength - Return the length of the path from the entry node to the
+  /// current node, counting both nodes.
+  unsigned getPathLength() const { return VisitStack.size(); }
+
+  /// getPath - Return the n'th node in the path from the the entry node to the
+  /// current node.
+  NodeType *getPath(unsigned n) const {
+    return VisitStack[n].first.getPointer();
+  }
+};
+
+
+// Provide global constructors that automatically figure out correct types...
+//
+template <class T>
+df_iterator<T> df_begin(const T& G) {
+  return df_iterator<T>::begin(G);
+}
+
+template <class T>
+df_iterator<T> df_end(const T& G) {
+  return df_iterator<T>::end(G);
+}
+
+// Provide global definitions of external depth first iterators...
+template <class T, class SetTy = std::set<typename GraphTraits<T>::NodeType*> >
+struct df_ext_iterator : public df_iterator<T, SetTy, true> {
+  df_ext_iterator(const df_iterator<T, SetTy, true> &V)
+    : df_iterator<T, SetTy, true>(V) {}
+};
+
+template <class T, class SetTy>
+df_ext_iterator<T, SetTy> df_ext_begin(const T& G, SetTy &S) {
+  return df_ext_iterator<T, SetTy>::begin(G, S);
+}
+
+template <class T, class SetTy>
+df_ext_iterator<T, SetTy> df_ext_end(const T& G, SetTy &S) {
+  return df_ext_iterator<T, SetTy>::end(G, S);
+}
+
+
+// Provide global definitions of inverse depth first iterators...
+template <class T,
+  class SetTy = llvm::SmallPtrSet<typename GraphTraits<T>::NodeType*, 8>,
+          bool External = false>
+struct idf_iterator : public df_iterator<Inverse<T>, SetTy, External> {
+  idf_iterator(const df_iterator<Inverse<T>, SetTy, External> &V)
+    : df_iterator<Inverse<T>, SetTy, External>(V) {}
+};
+
+template <class T>
+idf_iterator<T> idf_begin(const T& G) {
+  return idf_iterator<T>::begin(Inverse<T>(G));
+}
+
+template <class T>
+idf_iterator<T> idf_end(const T& G){
+  return idf_iterator<T>::end(Inverse<T>(G));
+}
+
+// Provide global definitions of external inverse depth first iterators...
+template <class T, class SetTy = std::set<typename GraphTraits<T>::NodeType*> >
+struct idf_ext_iterator : public idf_iterator<T, SetTy, true> {
+  idf_ext_iterator(const idf_iterator<T, SetTy, true> &V)
+    : idf_iterator<T, SetTy, true>(V) {}
+  idf_ext_iterator(const df_iterator<Inverse<T>, SetTy, true> &V)
+    : idf_iterator<T, SetTy, true>(V) {}
+};
+
+template <class T, class SetTy>
+idf_ext_iterator<T, SetTy> idf_ext_begin(const T& G, SetTy &S) {
+  return idf_ext_iterator<T, SetTy>::begin(Inverse<T>(G), S);
+}
+
+template <class T, class SetTy>
+idf_ext_iterator<T, SetTy> idf_ext_end(const T& G, SetTy &S) {
+  return idf_ext_iterator<T, SetTy>::end(Inverse<T>(G), S);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/ADT/EquivalenceClasses.h b/final/include/llvm/ADT/EquivalenceClasses.h
new file mode 100644
index 00000000000..771476c3036
--- /dev/null
+++ b/final/include/llvm/ADT/EquivalenceClasses.h
@@ -0,0 +1,281 @@
+//===-- llvm/ADT/EquivalenceClasses.h - Generic Equiv. Classes --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic implementation of equivalence classes through the use Tarjan's
+// efficient union-find algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_EQUIVALENCECLASSES_H
+#define LLVM_ADT_EQUIVALENCECLASSES_H
+
+#include "llvm/Support/DataTypes.h"
+#include <cassert>
+#include <set>
+
+namespace llvm {
+
+/// EquivalenceClasses - This represents a collection of equivalence classes and
+/// supports three efficient operations: insert an element into a class of its
+/// own, union two classes, and find the class for a given element.  In
+/// addition to these modification methods, it is possible to iterate over all
+/// of the equivalence classes and all of the elements in a class.
+///
+/// This implementation is an efficient implementation that only stores one copy
+/// of the element being indexed per entry in the set, and allows any arbitrary
+/// type to be indexed (as long as it can be ordered with operator<).
+///
+/// Here is a simple example using integers:
+///
+///  EquivalenceClasses<int> EC;
+///  EC.unionSets(1, 2);                // insert 1, 2 into the same set
+///  EC.insert(4); EC.insert(5);        // insert 4, 5 into own sets
+///  EC.unionSets(5, 1);                // merge the set for 1 with 5's set.
+///
+///  for (EquivalenceClasses<int>::iterator I = EC.begin(), E = EC.end();
+///       I != E; ++I) {           // Iterate over all of the equivalence sets.
+///    if (!I->isLeader()) continue;   // Ignore non-leader sets.
+///    for (EquivalenceClasses<int>::member_iterator MI = EC.member_begin(I);
+///         MI != EC.member_end(); ++MI)   // Loop over members in this set.
+///      cerr << *MI << " ";  // Print member.
+///    cerr << "\n";   // Finish set.
+///  }
+///
+/// This example prints:
+///   4
+///   5 1 2
+///
+template <class ElemTy>
+class EquivalenceClasses {
+  /// ECValue - The EquivalenceClasses data structure is just a set of these.
+  /// Each of these represents a relation for a value.  First it stores the
+  /// value itself, which provides the ordering that the set queries.  Next, it
+  /// provides a "next pointer", which is used to enumerate all of the elements
+  /// in the unioned set.  Finally, it defines either a "end of list pointer" or
+  /// "leader pointer" depending on whether the value itself is a leader.  A
+  /// "leader pointer" points to the node that is the leader for this element,
+  /// if the node is not a leader.  A "end of list pointer" points to the last
+  /// node in the list of members of this list.  Whether or not a node is a
+  /// leader is determined by a bit stolen from one of the pointers.
+  class ECValue {
+    friend class EquivalenceClasses;
+    mutable const ECValue *Leader, *Next;
+    ElemTy Data;
+    // ECValue ctor - Start out with EndOfList pointing to this node, Next is
+    // Null, isLeader = true.
+    ECValue(const ElemTy &Elt)
+      : Leader(this), Next((ECValue*)(intptr_t)1), Data(Elt) {}
+
+    const ECValue *getLeader() const {
+      if (isLeader()) return this;
+      if (Leader->isLeader()) return Leader;
+      // Path compression.
+      return Leader = Leader->getLeader();
+    }
+    const ECValue *getEndOfList() const {
+      assert(isLeader() && "Cannot get the end of a list for a non-leader!");
+      return Leader;
+    }
+
+    void setNext(const ECValue *NewNext) const {
+      assert(getNext() == 0 && "Already has a next pointer!");
+      Next = (const ECValue*)((intptr_t)NewNext | (intptr_t)isLeader());
+    }
+  public:
+    ECValue(const ECValue &RHS) : Leader(this), Next((ECValue*)(intptr_t)1),
+                                  Data(RHS.Data) {
+      // Only support copying of singleton nodes.
+      assert(RHS.isLeader() && RHS.getNext() == 0 && "Not a singleton!");
+    }
+
+    bool operator<(const ECValue &UFN) const { return Data < UFN.Data; }
+
+    bool isLeader() const { return (intptr_t)Next & 1; }
+    const ElemTy &getData() const { return Data; }
+
+    const ECValue *getNext() const {
+      return (ECValue*)((intptr_t)Next & ~(intptr_t)1);
+    }
+
+    template<typename T>
+    bool operator<(const T &Val) const { return Data < Val; }
+  };
+
+  /// TheMapping - This implicitly provides a mapping from ElemTy values to the
+  /// ECValues, it just keeps the key as part of the value.
+  std::set<ECValue> TheMapping;
+
+public:
+  EquivalenceClasses() {}
+  EquivalenceClasses(const EquivalenceClasses &RHS) {
+    operator=(RHS);
+  }
+
+  const EquivalenceClasses &operator=(const EquivalenceClasses &RHS) {
+    TheMapping.clear();
+    for (iterator I = RHS.begin(), E = RHS.end(); I != E; ++I)
+      if (I->isLeader()) {
+        member_iterator MI = RHS.member_begin(I);
+        member_iterator LeaderIt = member_begin(insert(*MI));
+        for (++MI; MI != member_end(); ++MI)
+          unionSets(LeaderIt, member_begin(insert(*MI)));
+      }
+    return *this;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Inspection methods
+  //
+
+  /// iterator* - Provides a way to iterate over all values in the set.
+  typedef typename std::set<ECValue>::const_iterator iterator;
+  iterator begin() const { return TheMapping.begin(); }
+  iterator end() const { return TheMapping.end(); }
+
+  bool empty() const { return TheMapping.empty(); }
+
+  /// member_* Iterate over the members of an equivalence class.
+  ///
+  class member_iterator;
+  member_iterator member_begin(iterator I) const {
+    // Only leaders provide anything to iterate over.
+    return member_iterator(I->isLeader() ? &*I : 0);
+  }
+  member_iterator member_end() const {
+    return member_iterator(0);
+  }
+
+  /// findValue - Return an iterator to the specified value.  If it does not
+  /// exist, end() is returned.
+  iterator findValue(const ElemTy &V) const {
+    return TheMapping.find(V);
+  }
+
+  /// getLeaderValue - Return the leader for the specified value that is in the
+  /// set.  It is an error to call this method for a value that is not yet in
+  /// the set.  For that, call getOrInsertLeaderValue(V).
+  const ElemTy &getLeaderValue(const ElemTy &V) const {
+    member_iterator MI = findLeader(V);
+    assert(MI != member_end() && "Value is not in the set!");
+    return *MI;
+  }
+
+  /// getOrInsertLeaderValue - Return the leader for the specified value that is
+  /// in the set.  If the member is not in the set, it is inserted, then
+  /// returned.
+  const ElemTy &getOrInsertLeaderValue(const ElemTy &V) {
+    member_iterator MI = findLeader(insert(V));
+    assert(MI != member_end() && "Value is not in the set!");
+    return *MI;
+  }
+
+  /// getNumClasses - Return the number of equivalence classes in this set.
+  /// Note that this is a linear time operation.
+  unsigned getNumClasses() const {
+    unsigned NC = 0;
+    for (iterator I = begin(), E = end(); I != E; ++I)
+      if (I->isLeader()) ++NC;
+    return NC;
+  }
+
+
+  //===--------------------------------------------------------------------===//
+  // Mutation methods
+
+  /// insert - Insert a new value into the union/find set, ignoring the request
+  /// if the value already exists.
+  iterator insert(const ElemTy &Data) {
+    return TheMapping.insert(ECValue(Data)).first;
+  }
+
+  /// findLeader - Given a value in the set, return a member iterator for the
+  /// equivalence class it is in.  This does the path-compression part that
+  /// makes union-find "union findy".  This returns an end iterator if the value
+  /// is not in the equivalence class.
+  ///
+  member_iterator findLeader(iterator I) const {
+    if (I == TheMapping.end()) return member_end();
+    return member_iterator(I->getLeader());
+  }
+  member_iterator findLeader(const ElemTy &V) const {
+    return findLeader(TheMapping.find(V));
+  }
+
+
+  /// union - Merge the two equivalence sets for the specified values, inserting
+  /// them if they do not already exist in the equivalence set.
+  member_iterator unionSets(const ElemTy &V1, const ElemTy &V2) {
+    iterator V1I = insert(V1), V2I = insert(V2);
+    return unionSets(findLeader(V1I), findLeader(V2I));
+  }
+  member_iterator unionSets(member_iterator L1, member_iterator L2) {
+    assert(L1 != member_end() && L2 != member_end() && "Illegal inputs!");
+    if (L1 == L2) return L1;   // Unifying the same two sets, noop.
+
+    // Otherwise, this is a real union operation.  Set the end of the L1 list to
+    // point to the L2 leader node.
+    const ECValue &L1LV = *L1.Node, &L2LV = *L2.Node;
+    L1LV.getEndOfList()->setNext(&L2LV);
+
+    // Update L1LV's end of list pointer.
+    L1LV.Leader = L2LV.getEndOfList();
+
+    // Clear L2's leader flag:
+    L2LV.Next = L2LV.getNext();
+
+    // L2's leader is now L1.
+    L2LV.Leader = &L1LV;
+    return L1;
+  }
+
+  class member_iterator : public std::iterator<std::forward_iterator_tag,
+                                               const ElemTy, ptrdiff_t> {
+    typedef std::iterator<std::forward_iterator_tag,
+                          const ElemTy, ptrdiff_t> super;
+    const ECValue *Node;
+    friend class EquivalenceClasses;
+  public:
+    typedef size_t size_type;
+    typedef typename super::pointer pointer;
+    typedef typename super::reference reference;
+
+    explicit member_iterator() {}
+    explicit member_iterator(const ECValue *N) : Node(N) {}
+    member_iterator(const member_iterator &I) : Node(I.Node) {}
+
+    reference operator*() const {
+      assert(Node != 0 && "Dereferencing end()!");
+      return Node->getData();
+    }
+    reference operator->() const { return operator*(); }
+
+    member_iterator &operator++() {
+      assert(Node != 0 && "++'d off the end of the list!");
+      Node = Node->getNext();
+      return *this;
+    }
+
+    member_iterator operator++(int) {    // postincrement operators.
+      member_iterator tmp = *this;
+      ++*this;
+      return tmp;
+    }
+
+    bool operator==(const member_iterator &RHS) const {
+      return Node == RHS.Node;
+    }
+    bool operator!=(const member_iterator &RHS) const {
+      return Node != RHS.Node;
+    }
+  };
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/ADT/FoldingSet.h b/final/include/llvm/ADT/FoldingSet.h
new file mode 100644
index 00000000000..879dbd05e17
--- /dev/null
+++ b/final/include/llvm/ADT/FoldingSet.h
@@ -0,0 +1,684 @@
+//===-- llvm/ADT/FoldingSet.h - Uniquing Hash Set ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a hash set that can be used to remove duplication of nodes
+// in a graph.  This code was originally created by Chris Lattner for use with
+// SelectionDAGCSEMap, but was isolated to provide use across the llvm code set.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_FOLDINGSET_H
+#define LLVM_ADT_FOLDINGSET_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+  class APFloat;
+  class APInt;
+  class BumpPtrAllocator;
+
+/// This folding set used for two purposes:
+///   1. Given information about a node we want to create, look up the unique
+///      instance of the node in the set.  If the node already exists, return
+///      it, otherwise return the bucket it should be inserted into.
+///   2. Given a node that has already been created, remove it from the set.
+///
+/// This class is implemented as a single-link chained hash table, where the
+/// "buckets" are actually the nodes themselves (the next pointer is in the
+/// node).  The last node points back to the bucket to simplify node removal.
+///
+/// Any node that is to be included in the folding set must be a subclass of
+/// FoldingSetNode.  The node class must also define a Profile method used to
+/// establish the unique bits of data for the node.  The Profile method is
+/// passed a FoldingSetNodeID object which is used to gather the bits.  Just
+/// call one of the Add* functions defined in the FoldingSetImpl::NodeID class.
+/// NOTE: That the folding set does not own the nodes and it is the
+/// responsibility of the user to dispose of the nodes.
+///
+/// Eg.
+///    class MyNode : public FoldingSetNode {
+///    private:
+///      std::string Name;
+///      unsigned Value;
+///    public:
+///      MyNode(const char *N, unsigned V) : Name(N), Value(V) {}
+///       ...
+///      void Profile(FoldingSetNodeID &ID) const {
+///        ID.AddString(Name);
+///        ID.AddInteger(Value);
+///      }
+///      ...
+///    };
+///
+/// To define the folding set itself use the FoldingSet template;
+///
+/// Eg.
+///    FoldingSet<MyNode> MyFoldingSet;
+///
+/// Four public methods are available to manipulate the folding set;
+///
+/// 1) If you have an existing node that you want add to the set but unsure
+/// that the node might already exist then call;
+///
+///    MyNode *M = MyFoldingSet.GetOrInsertNode(N);
+///
+/// If The result is equal to the input then the node has been inserted.
+/// Otherwise, the result is the node existing in the folding set, and the
+/// input can be discarded (use the result instead.)
+///
+/// 2) If you are ready to construct a node but want to check if it already
+/// exists, then call FindNodeOrInsertPos with a FoldingSetNodeID of the bits to
+/// check;
+///
+///   FoldingSetNodeID ID;
+///   ID.AddString(Name);
+///   ID.AddInteger(Value);
+///   void *InsertPoint;
+///
+///    MyNode *M = MyFoldingSet.FindNodeOrInsertPos(ID, InsertPoint);
+///
+/// If found then M with be non-NULL, else InsertPoint will point to where it
+/// should be inserted using InsertNode.
+///
+/// 3) If you get a NULL result from FindNodeOrInsertPos then you can as a new
+/// node with FindNodeOrInsertPos;
+///
+///    InsertNode(N, InsertPoint);
+///
+/// 4) Finally, if you want to remove a node from the folding set call;
+///
+///    bool WasRemoved = RemoveNode(N);
+///
+/// The result indicates whether the node existed in the folding set.
+
+class FoldingSetNodeID;
+
+//===----------------------------------------------------------------------===//
+/// FoldingSetImpl - Implements the folding set functionality.  The main
+/// structure is an array of buckets.  Each bucket is indexed by the hash of
+/// the nodes it contains.  The bucket itself points to the nodes contained
+/// in the bucket via a singly linked list.  The last node in the list points
+/// back to the bucket to facilitate node removal.
+///
+class FoldingSetImpl {
+protected:
+  /// Buckets - Array of bucket chains.
+  ///
+  void **Buckets;
+
+  /// NumBuckets - Length of the Buckets array.  Always a power of 2.
+  ///
+  unsigned NumBuckets;
+
+  /// NumNodes - Number of nodes in the folding set. Growth occurs when NumNodes
+  /// is greater than twice the number of buckets.
+  unsigned NumNodes;
+
+public:
+  explicit FoldingSetImpl(unsigned Log2InitSize = 6);
+  virtual ~FoldingSetImpl();
+
+  //===--------------------------------------------------------------------===//
+  /// Node - This class is used to maintain the singly linked bucket list in
+  /// a folding set.
+  ///
+  class Node {
+  private:
+    // NextInFoldingSetBucket - next link in the bucket list.
+    void *NextInFoldingSetBucket;
+
+  public:
+
+    Node() : NextInFoldingSetBucket(0) {}
+
+    // Accessors
+    void *getNextInBucket() const { return NextInFoldingSetBucket; }
+    void SetNextInBucket(void *N) { NextInFoldingSetBucket = N; }
+  };
+
+  /// clear - Remove all nodes from the folding set.
+  void clear();
+
+  /// RemoveNode - Remove a node from the folding set, returning true if one
+  /// was removed or false if the node was not in the folding set.
+  bool RemoveNode(Node *N);
+
+  /// GetOrInsertNode - If there is an existing simple Node exactly
+  /// equal to the specified node, return it.  Otherwise, insert 'N' and return
+  /// it instead.
+  Node *GetOrInsertNode(Node *N);
+
+  /// FindNodeOrInsertPos - Look up the node specified by ID.  If it exists,
+  /// return it.  If not, return the insertion token that will make insertion
+  /// faster.
+  Node *FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos);
+
+  /// InsertNode - Insert the specified node into the folding set, knowing that
+  /// it is not already in the folding set.  InsertPos must be obtained from
+  /// FindNodeOrInsertPos.
+  void InsertNode(Node *N, void *InsertPos);
+
+  /// InsertNode - Insert the specified node into the folding set, knowing that
+  /// it is not already in the folding set.
+  void InsertNode(Node *N) {
+    Node *Inserted = GetOrInsertNode(N);
+    (void)Inserted;
+    assert(Inserted == N && "Node already inserted!");
+  }
+
+  /// size - Returns the number of nodes in the folding set.
+  unsigned size() const { return NumNodes; }
+
+  /// empty - Returns true if there are no nodes in the folding set.
+  bool empty() const { return NumNodes == 0; }
+
+private:
+
+  /// GrowHashTable - Double the size of the hash table and rehash everything.
+  ///
+  void GrowHashTable();
+
+protected:
+
+  /// GetNodeProfile - Instantiations of the FoldingSet template implement
+  /// this function to gather data bits for the given node.
+  virtual void GetNodeProfile(Node *N, FoldingSetNodeID &ID) const = 0;
+  /// NodeEquals - Instantiations of the FoldingSet template implement
+  /// this function to compare the given node with the given ID.
+  virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID,
+                          FoldingSetNodeID &TempID) const=0;
+  /// NodeEquals - Instantiations of the FoldingSet template implement
+  /// this function to compute a hash value for the given node.
+  virtual unsigned ComputeNodeHash(Node *N,
+                                   FoldingSetNodeID &TempID) const = 0;
+};
+
+//===----------------------------------------------------------------------===//
+
+template<typename T> struct FoldingSetTrait;
+
+/// DefaultFoldingSetTrait - This class provides default implementations
+/// for FoldingSetTrait implementations.
+///
+template<typename T> struct DefaultFoldingSetTrait {
+  static void Profile(const T& X, FoldingSetNodeID& ID) {
+    X.Profile(ID);
+  }
+  static void Profile(T& X, FoldingSetNodeID& ID) {
+    X.Profile(ID);
+  }
+
+  // Equals - Test if the profile for X would match ID, using TempID
+  // to compute a temporary ID if necessary. The default implementation
+  // just calls Profile and does a regular comparison. Implementations
+  // can override this to provide more efficient implementations.
+  static inline bool Equals(T &X, const FoldingSetNodeID &ID,
+                            FoldingSetNodeID &TempID);
+
+  // ComputeHash - Compute a hash value for X, using TempID to
+  // compute a temporary ID if necessary. The default implementation
+  // just calls Profile and does a regular hash computation.
+  // Implementations can override this to provide more efficient
+  // implementations.
+  static inline unsigned ComputeHash(T &X, FoldingSetNodeID &TempID);
+};
+
+/// FoldingSetTrait - This trait class is used to define behavior of how
+/// to "profile" (in the FoldingSet parlance) an object of a given type.
+/// The default behavior is to invoke a 'Profile' method on an object, but
+/// through template specialization the behavior can be tailored for specific
+/// types.  Combined with the FoldingSetNodeWrapper class, one can add objects
+/// to FoldingSets that were not originally designed to have that behavior.
+template<typename T> struct FoldingSetTrait
+  : public DefaultFoldingSetTrait<T> {};
+
+template<typename T, typename Ctx> struct ContextualFoldingSetTrait;
+
+/// DefaultContextualFoldingSetTrait - Like DefaultFoldingSetTrait, but
+/// for ContextualFoldingSets.
+template<typename T, typename Ctx>
+struct DefaultContextualFoldingSetTrait {
+  static void Profile(T &X, FoldingSetNodeID &ID, Ctx Context) {
+    X.Profile(ID, Context);
+  }
+  static inline bool Equals(T &X, const FoldingSetNodeID &ID,
+                            FoldingSetNodeID &TempID, Ctx Context);
+  static inline unsigned ComputeHash(T &X, FoldingSetNodeID &TempID,
+                                     Ctx Context);
+};
+
+/// ContextualFoldingSetTrait - Like FoldingSetTrait, but for
+/// ContextualFoldingSets.
+template<typename T, typename Ctx> struct ContextualFoldingSetTrait
+  : public DefaultContextualFoldingSetTrait<T, Ctx> {};
+
+//===--------------------------------------------------------------------===//
+/// FoldingSetNodeIDRef - This class describes a reference to an interned
+/// FoldingSetNodeID, which can be a useful to store node id data rather
+/// than using plain FoldingSetNodeIDs, since the 32-element SmallVector
+/// is often much larger than necessary, and the possibility of heap
+/// allocation means it requires a non-trivial destructor call.
+class FoldingSetNodeIDRef {
+  const unsigned* Data;
+  size_t Size;
+public:
+  FoldingSetNodeIDRef() : Data(0), Size(0) {}
+  FoldingSetNodeIDRef(const unsigned *D, size_t S) : Data(D), Size(S) {}
+
+  /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
+  /// used to lookup the node in the FoldingSetImpl.
+  unsigned ComputeHash() const;
+
+  bool operator==(FoldingSetNodeIDRef) const;
+
+  const unsigned *getData() const { return Data; }
+  size_t getSize() const { return Size; }
+};
+
+//===--------------------------------------------------------------------===//
+/// FoldingSetNodeID - This class is used to gather all the unique data bits of
+/// a node.  When all the bits are gathered this class is used to produce a
+/// hash value for the node.
+///
+class FoldingSetNodeID {
+  /// Bits - Vector of all the data bits that make the node unique.
+  /// Use a SmallVector to avoid a heap allocation in the common case.
+  SmallVector<unsigned, 32> Bits;
+
+public:
+  FoldingSetNodeID() {}
+
+  FoldingSetNodeID(FoldingSetNodeIDRef Ref)
+    : Bits(Ref.getData(), Ref.getData() + Ref.getSize()) {}
+
+  /// Add* - Add various data types to Bit data.
+  ///
+  void AddPointer(const void *Ptr);
+  void AddInteger(signed I);
+  void AddInteger(unsigned I);
+  void AddInteger(long I);
+  void AddInteger(unsigned long I);
+  void AddInteger(long long I);
+  void AddInteger(unsigned long long I);
+  void AddBoolean(bool B) { AddInteger(B ? 1U : 0U); }
+  void AddString(StringRef String);
+
+  template <typename T>
+  inline void Add(const T& x) { FoldingSetTrait<T>::Profile(x, *this); }
+
+  /// clear - Clear the accumulated profile, allowing this FoldingSetNodeID
+  /// object to be used to compute a new profile.
+  inline void clear() { Bits.clear(); }
+
+  /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used
+  /// to lookup the node in the FoldingSetImpl.
+  unsigned ComputeHash() const;
+
+  /// operator== - Used to compare two nodes to each other.
+  ///
+  bool operator==(const FoldingSetNodeID &RHS) const;
+  bool operator==(const FoldingSetNodeIDRef RHS) const;
+
+  /// Intern - Copy this node's data to a memory region allocated from the
+  /// given allocator and return a FoldingSetNodeIDRef describing the
+  /// interned data.
+  FoldingSetNodeIDRef Intern(BumpPtrAllocator &Allocator) const;
+};
+
+// Convenience type to hide the implementation of the folding set.
+typedef FoldingSetImpl::Node FoldingSetNode;
+template<class T> class FoldingSetIterator;
+template<class T> class FoldingSetBucketIterator;
+
+// Definitions of FoldingSetTrait and ContextualFoldingSetTrait functions, which
+// require the definition of FoldingSetNodeID.
+template<typename T>
+inline bool
+DefaultFoldingSetTrait<T>::Equals(T &X, const FoldingSetNodeID &ID,
+                                  FoldingSetNodeID &TempID) {
+  FoldingSetTrait<T>::Profile(X, TempID);
+  return TempID == ID;
+}
+template<typename T>
+inline unsigned
+DefaultFoldingSetTrait<T>::ComputeHash(T &X, FoldingSetNodeID &TempID) {
+  FoldingSetTrait<T>::Profile(X, TempID);
+  return TempID.ComputeHash();
+}
+template<typename T, typename Ctx>
+inline bool
+DefaultContextualFoldingSetTrait<T, Ctx>::Equals(T &X,
+                                                 const FoldingSetNodeID &ID,
+                                                 FoldingSetNodeID &TempID,
+                                                 Ctx Context) {
+  ContextualFoldingSetTrait<T, Ctx>::Profile(X, TempID, Context);
+  return TempID == ID;
+}
+template<typename T, typename Ctx>
+inline unsigned
+DefaultContextualFoldingSetTrait<T, Ctx>::ComputeHash(T &X,
+                                                      FoldingSetNodeID &TempID,
+                                                      Ctx Context) {
+  ContextualFoldingSetTrait<T, Ctx>::Profile(X, TempID, Context);
+  return TempID.ComputeHash();
+}
+
+//===----------------------------------------------------------------------===//
+/// FoldingSet - This template class is used to instantiate a specialized
+/// implementation of the folding set to the node class T.  T must be a
+/// subclass of FoldingSetNode and implement a Profile function.
+///
+template<class T> class FoldingSet : public FoldingSetImpl {
+private:
+  /// GetNodeProfile - Each instantiatation of the FoldingSet needs to provide a
+  /// way to convert nodes into a unique specifier.
+  virtual void GetNodeProfile(Node *N, FoldingSetNodeID &ID) const {
+    T *TN = static_cast<T *>(N);
+    FoldingSetTrait<T>::Profile(*TN, ID);
+  }
+  /// NodeEquals - Instantiations may optionally provide a way to compare a
+  /// node with a specified ID.
+  virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID,
+                          FoldingSetNodeID &TempID) const {
+    T *TN = static_cast<T *>(N);
+    return FoldingSetTrait<T>::Equals(*TN, ID, TempID);
+  }
+  /// NodeEquals - Instantiations may optionally provide a way to compute a
+  /// hash value directly from a node.
+  virtual unsigned ComputeNodeHash(Node *N,
+                                   FoldingSetNodeID &TempID) const {
+    T *TN = static_cast<T *>(N);
+    return FoldingSetTrait<T>::ComputeHash(*TN, TempID);
+  }
+
+public:
+  explicit FoldingSet(unsigned Log2InitSize = 6)
+  : FoldingSetImpl(Log2InitSize)
+  {}
+
+  typedef FoldingSetIterator<T> iterator;
+  iterator begin() { return iterator(Buckets); }
+  iterator end() { return iterator(Buckets+NumBuckets); }
+
+  typedef FoldingSetIterator<const T> const_iterator;
+  const_iterator begin() const { return const_iterator(Buckets); }
+  const_iterator end() const { return const_iterator(Buckets+NumBuckets); }
+
+  typedef FoldingSetBucketIterator<T> bucket_iterator;
+
+  bucket_iterator bucket_begin(unsigned hash) {
+    return bucket_iterator(Buckets + (hash & (NumBuckets-1)));
+  }
+
+  bucket_iterator bucket_end(unsigned hash) {
+    return bucket_iterator(Buckets + (hash & (NumBuckets-1)), true);
+  }
+
+  /// GetOrInsertNode - If there is an existing simple Node exactly
+  /// equal to the specified node, return it.  Otherwise, insert 'N' and
+  /// return it instead.
+  T *GetOrInsertNode(Node *N) {
+    return static_cast<T *>(FoldingSetImpl::GetOrInsertNode(N));
+  }
+
+  /// FindNodeOrInsertPos - Look up the node specified by ID.  If it exists,
+  /// return it.  If not, return the insertion token that will make insertion
+  /// faster.
+  T *FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) {
+    return static_cast<T *>(FoldingSetImpl::FindNodeOrInsertPos(ID, InsertPos));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// ContextualFoldingSet - This template class is a further refinement
+/// of FoldingSet which provides a context argument when calling
+/// Profile on its nodes.  Currently, that argument is fixed at
+/// initialization time.
+///
+/// T must be a subclass of FoldingSetNode and implement a Profile
+/// function with signature
+///   void Profile(llvm::FoldingSetNodeID &, Ctx);
+template <class T, class Ctx>
+class ContextualFoldingSet : public FoldingSetImpl {
+  // Unfortunately, this can't derive from FoldingSet<T> because the
+  // construction vtable for FoldingSet<T> requires
+  // FoldingSet<T>::GetNodeProfile to be instantiated, which in turn
+  // requires a single-argument T::Profile().
+
+private:
+  Ctx Context;
+
+  /// GetNodeProfile - Each instantiatation of the FoldingSet needs to provide a
+  /// way to convert nodes into a unique specifier.
+  virtual void GetNodeProfile(FoldingSetImpl::Node *N,
+                              FoldingSetNodeID &ID) const {
+    T *TN = static_cast<T *>(N);
+    ContextualFoldingSetTrait<T, Ctx>::Profile(*TN, ID, Context);
+  }
+  virtual bool NodeEquals(FoldingSetImpl::Node *N,
+                          const FoldingSetNodeID &ID,
+                          FoldingSetNodeID &TempID) const {
+    T *TN = static_cast<T *>(N);
+    return ContextualFoldingSetTrait<T, Ctx>::Equals(*TN, ID, TempID, Context);
+  }
+  virtual unsigned ComputeNodeHash(FoldingSetImpl::Node *N,
+                                   FoldingSetNodeID &TempID) const {
+    T *TN = static_cast<T *>(N);
+    return ContextualFoldingSetTrait<T, Ctx>::ComputeHash(*TN, TempID, Context);
+  }
+
+public:
+  explicit ContextualFoldingSet(Ctx Context, unsigned Log2InitSize = 6)
+  : FoldingSetImpl(Log2InitSize), Context(Context)
+  {}
+
+  Ctx getContext() const { return Context; }
+
+
+  typedef FoldingSetIterator<T> iterator;
+  iterator begin() { return iterator(Buckets); }
+  iterator end() { return iterator(Buckets+NumBuckets); }
+
+  typedef FoldingSetIterator<const T> const_iterator;
+  const_iterator begin() const { return const_iterator(Buckets); }
+  const_iterator end() const { return const_iterator(Buckets+NumBuckets); }
+
+  typedef FoldingSetBucketIterator<T> bucket_iterator;
+
+  bucket_iterator bucket_begin(unsigned hash) {
+    return bucket_iterator(Buckets + (hash & (NumBuckets-1)));
+  }
+
+  bucket_iterator bucket_end(unsigned hash) {
+    return bucket_iterator(Buckets + (hash & (NumBuckets-1)), true);
+  }
+
+  /// GetOrInsertNode - If there is an existing simple Node exactly
+  /// equal to the specified node, return it.  Otherwise, insert 'N'
+  /// and return it instead.
+  T *GetOrInsertNode(Node *N) {
+    return static_cast<T *>(FoldingSetImpl::GetOrInsertNode(N));
+  }
+
+  /// FindNodeOrInsertPos - Look up the node specified by ID.  If it
+  /// exists, return it.  If not, return the insertion token that will
+  /// make insertion faster.
+  T *FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) {
+    return static_cast<T *>(FoldingSetImpl::FindNodeOrInsertPos(ID, InsertPos));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// FoldingSetIteratorImpl - This is the common iterator support shared by all
+/// folding sets, which knows how to walk the folding set hash table.
+class FoldingSetIteratorImpl {
+protected:
+  FoldingSetNode *NodePtr;
+  FoldingSetIteratorImpl(void **Bucket);
+  void advance();
+
+public:
+  bool operator==(const FoldingSetIteratorImpl &RHS) const {
+    return NodePtr == RHS.NodePtr;
+  }
+  bool operator!=(const FoldingSetIteratorImpl &RHS) const {
+    return NodePtr != RHS.NodePtr;
+  }
+};
+
+
+template<class T>
+class FoldingSetIterator : public FoldingSetIteratorImpl {
+public:
+  explicit FoldingSetIterator(void **Bucket) : FoldingSetIteratorImpl(Bucket) {}
+
+  T &operator*() const {
+    return *static_cast<T*>(NodePtr);
+  }
+
+  T *operator->() const {
+    return static_cast<T*>(NodePtr);
+  }
+
+  inline FoldingSetIterator& operator++() {          // Preincrement
+    advance();
+    return *this;
+  }
+  FoldingSetIterator operator++(int) {        // Postincrement
+    FoldingSetIterator tmp = *this; ++*this; return tmp;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// FoldingSetBucketIteratorImpl - This is the common bucket iterator support
+/// shared by all folding sets, which knows how to walk a particular bucket
+/// of a folding set hash table.
+
+class FoldingSetBucketIteratorImpl {
+protected:
+  void *Ptr;
+
+  explicit FoldingSetBucketIteratorImpl(void **Bucket);
+
+  FoldingSetBucketIteratorImpl(void **Bucket, bool)
+    : Ptr(Bucket) {}
+
+  void advance() {
+    void *Probe = static_cast<FoldingSetNode*>(Ptr)->getNextInBucket();
+    uintptr_t x = reinterpret_cast<uintptr_t>(Probe) & ~0x1;
+    Ptr = reinterpret_cast<void*>(x);
+  }
+
+public:
+  bool operator==(const FoldingSetBucketIteratorImpl &RHS) const {
+    return Ptr == RHS.Ptr;
+  }
+  bool operator!=(const FoldingSetBucketIteratorImpl &RHS) const {
+    return Ptr != RHS.Ptr;
+  }
+};
+
+
+template<class T>
+class FoldingSetBucketIterator : public FoldingSetBucketIteratorImpl {
+public:
+  explicit FoldingSetBucketIterator(void **Bucket) :
+    FoldingSetBucketIteratorImpl(Bucket) {}
+
+  FoldingSetBucketIterator(void **Bucket, bool) :
+    FoldingSetBucketIteratorImpl(Bucket, true) {}
+
+  T& operator*() const { return *static_cast<T*>(Ptr); }
+  T* operator->() const { return static_cast<T*>(Ptr); }
+
+  inline FoldingSetBucketIterator& operator++() { // Preincrement
+    advance();
+    return *this;
+  }
+  FoldingSetBucketIterator operator++(int) {      // Postincrement
+    FoldingSetBucketIterator tmp = *this; ++*this; return tmp;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// FoldingSetNodeWrapper - This template class is used to "wrap" arbitrary
+/// types in an enclosing object so that they can be inserted into FoldingSets.
+template <typename T>
+class FoldingSetNodeWrapper : public FoldingSetNode {
+  T data;
+public:
+  explicit FoldingSetNodeWrapper(const T& x) : data(x) {}
+  virtual ~FoldingSetNodeWrapper() {}
+
+  template<typename A1>
+  explicit FoldingSetNodeWrapper(const A1& a1)
+    : data(a1) {}
+
+  template <typename A1, typename A2>
+  explicit FoldingSetNodeWrapper(const A1& a1, const A2& a2)
+    : data(a1,a2) {}
+
+  template <typename A1, typename A2, typename A3>
+  explicit FoldingSetNodeWrapper(const A1& a1, const A2& a2, const A3& a3)
+    : data(a1,a2,a3) {}
+
+  template <typename A1, typename A2, typename A3, typename A4>
+  explicit FoldingSetNodeWrapper(const A1& a1, const A2& a2, const A3& a3,
+                                 const A4& a4)
+    : data(a1,a2,a3,a4) {}
+
+  template <typename A1, typename A2, typename A3, typename A4, typename A5>
+  explicit FoldingSetNodeWrapper(const A1& a1, const A2& a2, const A3& a3,
+                                 const A4& a4, const A5& a5)
+  : data(a1,a2,a3,a4,a5) {}
+
+
+  void Profile(FoldingSetNodeID& ID) { FoldingSetTrait<T>::Profile(data, ID); }
+
+  T& getValue() { return data; }
+  const T& getValue() const { return data; }
+
+  operator T&() { return data; }
+  operator const T&() const { return data; }
+};
+
+//===----------------------------------------------------------------------===//
+/// FastFoldingSetNode - This is a subclass of FoldingSetNode which stores
+/// a FoldingSetNodeID value rather than requiring the node to recompute it
+/// each time it is needed. This trades space for speed (which can be
+/// significant if the ID is long), and it also permits nodes to drop
+/// information that would otherwise only be required for recomputing an ID.
+class FastFoldingSetNode : public FoldingSetNode {
+  FoldingSetNodeID FastID;
+protected:
+  explicit FastFoldingSetNode(const FoldingSetNodeID &ID) : FastID(ID) {}
+public:
+  void Profile(FoldingSetNodeID& ID) const { ID = FastID; }
+};
+
+//===----------------------------------------------------------------------===//
+// Partial specializations of FoldingSetTrait.
+
+template<typename T> struct FoldingSetTrait<T*> {
+  static inline void Profile(const T* X, FoldingSetNodeID& ID) {
+    ID.AddPointer(X);
+  }
+};
+
+template<typename T> struct FoldingSetTrait<const T*> {
+  static inline void Profile(const T* X, FoldingSetNodeID& ID) {
+    ID.AddPointer(X);
+  }
+};
+
+} // End of namespace llvm.
+
+#endif
diff --git a/final/include/llvm/ADT/GraphTraits.h b/final/include/llvm/ADT/GraphTraits.h
new file mode 100644
index 00000000000..0fd1f5022af
--- /dev/null
+++ b/final/include/llvm/ADT/GraphTraits.h
@@ -0,0 +1,103 @@
+//===-- llvm/ADT/GraphTraits.h - Graph traits template ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the little GraphTraits<X> template class that should be
+// specialized by classes that want to be iteratable by generic graph iterators.
+//
+// This file also defines the marker class Inverse that is used to iterate over
+// graphs in a graph defined, inverse ordering...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_GRAPHTRAITS_H
+#define LLVM_ADT_GRAPHTRAITS_H
+
+namespace llvm {
+
+// GraphTraits - This class should be specialized by different graph types...
+// which is why the default version is empty.
+//
+template<class GraphType>
+struct GraphTraits {
+  // Elements to provide:
+
+  // typedef NodeType          - Type of Node in the graph
+  // typedef ChildIteratorType - Type used to iterate over children in graph
+
+  // static NodeType *getEntryNode(const GraphType &)
+  //    Return the entry node of the graph
+
+  // static ChildIteratorType child_begin(NodeType *)
+  // static ChildIteratorType child_end  (NodeType *)
+  //    Return iterators that point to the beginning and ending of the child
+  //    node list for the specified node.
+  //
+
+
+  // typedef  ...iterator nodes_iterator;
+  // static nodes_iterator nodes_begin(GraphType *G)
+  // static nodes_iterator nodes_end  (GraphType *G)
+  //
+  //    nodes_iterator/begin/end - Allow iteration over all nodes in the graph
+
+
+  // If anyone tries to use this class without having an appropriate
+  // specialization, make an error.  If you get this error, it's because you
+  // need to include the appropriate specialization of GraphTraits<> for your
+  // graph, or you need to define it for a new graph type. Either that or
+  // your argument to XXX_begin(...) is unknown or needs to have the proper .h
+  // file #include'd.
+  //
+  typedef typename GraphType::UnknownGraphTypeError NodeType;
+};
+
+
+// Inverse - This class is used as a little marker class to tell the graph
+// iterator to iterate over the graph in a graph defined "Inverse" ordering.
+// Not all graphs define an inverse ordering, and if they do, it depends on
+// the graph exactly what that is.  Here's an example of usage with the
+// df_iterator:
+//
+// idf_iterator<Method*> I = idf_begin(M), E = idf_end(M);
+// for (; I != E; ++I) { ... }
+//
+// Which is equivalent to:
+// df_iterator<Inverse<Method*> > I = idf_begin(M), E = idf_end(M);
+// for (; I != E; ++I) { ... }
+//
+template <class GraphType>
+struct Inverse {
+  const GraphType &Graph;
+
+  inline Inverse(const GraphType &G) : Graph(G) {}
+};
+
+// Provide a partial specialization of GraphTraits so that the inverse of an
+// inverse falls back to the original graph.
+template<class T>
+struct GraphTraits<Inverse<Inverse<T> > > {
+  typedef typename GraphTraits<T>::NodeType NodeType;
+  typedef typename GraphTraits<T>::ChildIteratorType ChildIteratorType;
+
+  static NodeType *getEntryNode(Inverse<Inverse<T> > *G) {
+    return GraphTraits<T>::getEntryNode(G->Graph.Graph);
+  }
+
+  static ChildIteratorType child_begin(NodeType* N) {
+    return GraphTraits<T>::child_begin(N);
+  }
+
+  static ChildIteratorType child_end(NodeType* N) {
+    return GraphTraits<T>::child_end(N);
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/ADT/ImmutableIntervalMap.h b/final/include/llvm/ADT/ImmutableIntervalMap.h
new file mode 100644
index 00000000000..0d8fcf34338
--- /dev/null
+++ b/final/include/llvm/ADT/ImmutableIntervalMap.h
@@ -0,0 +1,242 @@
+//===--- ImmutableIntervalMap.h - Immutable (functional) map  ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ImmutableIntervalMap class.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/ADT/ImmutableMap.h"
+
+namespace llvm {
+
+class Interval {
+private:
+  int64_t Start;
+  int64_t End;
+
+public:
+  Interval(int64_t S, int64_t E) : Start(S), End(E) {}
+
+  int64_t getStart() const { return Start; }
+  int64_t getEnd() const { return End; }
+};
+
+template <typename T>
+struct ImutIntervalInfo {
+  typedef const std::pair<Interval, T> value_type;
+  typedef const value_type &value_type_ref;
+  typedef const Interval key_type;
+  typedef const Interval &key_type_ref;
+  typedef const T data_type;
+  typedef const T &data_type_ref;
+
+  static key_type_ref KeyOfValue(value_type_ref V) {
+    return V.first;
+  }
+
+  static data_type_ref DataOfValue(value_type_ref V) {
+    return V.second;
+  }
+
+  static bool isEqual(key_type_ref L, key_type_ref R) {
+    return L.getStart() == R.getStart() && L.getEnd() == R.getEnd();
+  }
+
+  static bool isDataEqual(data_type_ref L, data_type_ref R) {
+    return ImutContainerInfo<T>::isEqual(L,R);
+  }
+
+  static bool isLess(key_type_ref L, key_type_ref R) {
+    // Assume L and R does not overlap.
+    if (L.getStart() < R.getStart()) {
+      assert(L.getEnd() < R.getStart());
+      return true;
+    } else if (L.getStart() == R.getStart()) {
+      assert(L.getEnd() == R.getEnd());
+      return false;
+    } else {
+      assert(L.getStart() > R.getEnd());
+      return false;
+    }
+  }
+
+  static bool isContainedIn(key_type_ref K, key_type_ref L) {
+    if (K.getStart() >= L.getStart() && K.getEnd() <= L.getEnd())
+      return true;
+    else
+      return false;
+  }
+
+  static void Profile(FoldingSetNodeID &ID, value_type_ref V) {
+    ID.AddInteger(V.first.getStart());
+    ID.AddInteger(V.first.getEnd());
+    ImutProfileInfo<T>::Profile(ID, V.second);
+  }
+};
+
+template <typename ImutInfo>
+class ImutIntervalAVLFactory : public ImutAVLFactory<ImutInfo> {
+  typedef ImutAVLTree<ImutInfo> TreeTy;
+  typedef typename ImutInfo::value_type     value_type;
+  typedef typename ImutInfo::value_type_ref value_type_ref;
+  typedef typename ImutInfo::key_type       key_type;
+  typedef typename ImutInfo::key_type_ref   key_type_ref;
+  typedef typename ImutInfo::data_type      data_type;
+  typedef typename ImutInfo::data_type_ref  data_type_ref;
+
+public:
+  ImutIntervalAVLFactory(BumpPtrAllocator &Alloc) 
+    : ImutAVLFactory<ImutInfo>(Alloc) {}
+
+  TreeTy *Add(TreeTy *T, value_type_ref V) {
+    T = add_internal(V,T);
+    this->MarkImmutable(T);
+    return T;
+  }
+
+  TreeTy *Find(TreeTy *T, key_type_ref K) {
+    if (!T)
+      return NULL;
+
+    key_type_ref CurrentKey = ImutInfo::KeyOfValue(this->getValue(T));
+
+    if (ImutInfo::isContainedIn(K, CurrentKey))
+      return T;
+    else if (ImutInfo::isLess(K, CurrentKey))
+      return Find(this->getLeft(T), K);
+    else
+      return Find(this->getRight(T), K);
+  }
+
+private:
+  TreeTy *add_internal(value_type_ref V, TreeTy *T) {
+    key_type_ref K = ImutInfo::KeyOfValue(V);
+    T = removeAllOverlaps(T, K);
+    if (this->isEmpty(T))
+      return this->CreateNode(NULL, V, NULL);
+
+    assert(!T->isMutable());
+
+    key_type_ref KCurrent = ImutInfo::KeyOfValue(this->Value(T));
+
+    if (ImutInfo::isLess(K, KCurrent))
+      return this->Balance(add_internal(V, this->Left(T)), this->Value(T), 
+                                        this->Right(T));
+    else
+      return this->Balance(this->Left(T), this->Value(T), 
+                           add_internal(V, this->Right(T)));
+  }
+
+  // Remove all overlaps from T.
+  TreeTy *removeAllOverlaps(TreeTy *T, key_type_ref K) {
+    bool Changed;
+    do {
+      Changed = false;
+      T = removeOverlap(T, K, Changed);
+      this->markImmutable(T);
+    } while (Changed);
+
+    return T;
+  }
+
+  // Remove one overlap from T.
+  TreeTy *removeOverlap(TreeTy *T, key_type_ref K, bool &Changed) {
+    if (!T)
+      return NULL;
+    Interval CurrentK = ImutInfo::KeyOfValue(this->Value(T));
+
+    // If current key does not overlap the inserted key.
+    if (CurrentK.getStart() > K.getEnd())
+      return this->Balance(removeOverlap(this->Left(T), K, Changed),
+                           this->Value(T), this->Right(T));
+    else if (CurrentK.getEnd() < K.getStart())
+      return this->Balance(this->Left(T), this->Value(T), 
+                           removeOverlap(this->Right(T), K, Changed));
+
+    // Current key overlaps with the inserted key.
+    // Remove the current key.
+    Changed = true;
+    data_type_ref OldData = ImutInfo::DataOfValue(this->Value(T));
+    T = this->Remove_internal(CurrentK, T);
+    // Add back the unoverlapped part of the current key.
+    if (CurrentK.getStart() < K.getStart()) {
+      if (CurrentK.getEnd() <= K.getEnd()) {
+        Interval NewK(CurrentK.getStart(), K.getStart()-1);
+        return add_internal(std::make_pair(NewK, OldData), T);
+      } else {
+        Interval NewK1(CurrentK.getStart(), K.getStart()-1);
+        T = add_internal(std::make_pair(NewK1, OldData), T); 
+
+        Interval NewK2(K.getEnd()+1, CurrentK.getEnd());
+        return add_internal(std::make_pair(NewK2, OldData), T);
+      }
+    } else {
+      if (CurrentK.getEnd() > K.getEnd()) {
+        Interval NewK(K.getEnd()+1, CurrentK.getEnd());
+        return add_internal(std::make_pair(NewK, OldData), T);
+      } else
+        return T;
+    }
+  }
+};
+
+/// ImmutableIntervalMap maps an interval [start, end] to a value. The intervals
+/// in the map are guaranteed to be disjoint.
+template <typename ValT>
+class ImmutableIntervalMap 
+  : public ImmutableMap<Interval, ValT, ImutIntervalInfo<ValT> > {
+
+  typedef typename ImutIntervalInfo<ValT>::value_type      value_type;
+  typedef typename ImutIntervalInfo<ValT>::value_type_ref  value_type_ref;
+  typedef typename ImutIntervalInfo<ValT>::key_type        key_type;
+  typedef typename ImutIntervalInfo<ValT>::key_type_ref    key_type_ref;
+  typedef typename ImutIntervalInfo<ValT>::data_type       data_type;
+  typedef typename ImutIntervalInfo<ValT>::data_type_ref   data_type_ref;
+  typedef ImutAVLTree<ImutIntervalInfo<ValT> > TreeTy;
+
+public:
+  explicit ImmutableIntervalMap(TreeTy *R) 
+    : ImmutableMap<Interval, ValT, ImutIntervalInfo<ValT> >(R) {}
+
+  class Factory {
+    ImutIntervalAVLFactory<ImutIntervalInfo<ValT> > F;
+
+  public:
+    Factory(BumpPtrAllocator& Alloc) : F(Alloc) {}
+
+    ImmutableIntervalMap getEmptyMap() { 
+      return ImmutableIntervalMap(F.getEmptyTree()); 
+    }
+
+    ImmutableIntervalMap add(ImmutableIntervalMap Old, 
+                             key_type_ref K, data_type_ref D) {
+      TreeTy *T = F.add(Old.Root, std::pair<key_type, data_type>(K, D));
+      return ImmutableIntervalMap(F.getCanonicalTree(T));
+    }
+
+    ImmutableIntervalMap remove(ImmutableIntervalMap Old, key_type_ref K) {
+      TreeTy *T = F.remove(Old.Root, K);
+      return ImmutableIntervalMap(F.getCanonicalTree(T));
+    }
+
+    data_type *lookup(ImmutableIntervalMap M, key_type_ref K) {
+      TreeTy *T = F.Find(M.getRoot(), K);
+      if (T)
+        return &T->getValue().second;
+      else
+        return 0;
+    }
+  };
+
+private:
+  // For ImmutableIntervalMap, the lookup operation has to be done by the 
+  // factory.
+  data_type* lookup(key_type_ref K) const;
+};
+
+} // end namespace llvm
diff --git a/final/include/llvm/ADT/ImmutableList.h b/final/include/llvm/ADT/ImmutableList.h
new file mode 100644
index 00000000000..714355b9513
--- /dev/null
+++ b/final/include/llvm/ADT/ImmutableList.h
@@ -0,0 +1,222 @@
+//==--- ImmutableList.h - Immutable (functional) list interface --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ImmutableList class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_IMLIST_H
+#define LLVM_ADT_IMLIST_H
+
+#include "llvm/Support/Allocator.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/DataTypes.h"
+#include <cassert>
+
+namespace llvm {
+
+template <typename T> class ImmutableListFactory;
+
+template <typename T>
+class ImmutableListImpl : public FoldingSetNode {
+  T Head;
+  const ImmutableListImpl* Tail;
+
+  ImmutableListImpl(const T& head, const ImmutableListImpl* tail = 0)
+    : Head(head), Tail(tail) {}
+
+  friend class ImmutableListFactory<T>;
+
+  // Do not implement.
+  void operator=(const ImmutableListImpl&);
+  ImmutableListImpl(const ImmutableListImpl&);
+
+public:
+  const T& getHead() const { return Head; }
+  const ImmutableListImpl* getTail() const { return Tail; }
+
+  static inline void Profile(FoldingSetNodeID& ID, const T& H,
+                             const ImmutableListImpl* L){
+    ID.AddPointer(L);
+    ID.Add(H);
+  }
+
+  void Profile(FoldingSetNodeID& ID) {
+    Profile(ID, Head, Tail);
+  }
+};
+
+/// ImmutableList - This class represents an immutable (functional) list.
+///  It is implemented as a smart pointer (wraps ImmutableListImpl), so it
+///  it is intended to always be copied by value as if it were a pointer.
+///  This interface matches ImmutableSet and ImmutableMap.  ImmutableList
+///  objects should almost never be created directly, and instead should
+///  be created by ImmutableListFactory objects that manage the lifetime
+///  of a group of lists.  When the factory object is reclaimed, all lists
+///  created by that factory are released as well.
+template <typename T>
+class ImmutableList {
+public:
+  typedef T value_type;
+  typedef ImmutableListFactory<T> Factory;
+
+private:
+  const ImmutableListImpl<T>* X;
+
+public:
+  // This constructor should normally only be called by ImmutableListFactory<T>.
+  // There may be cases, however, when one needs to extract the internal pointer
+  // and reconstruct a list object from that pointer.
+  ImmutableList(const ImmutableListImpl<T>* x = 0) : X(x) {}
+
+  const ImmutableListImpl<T>* getInternalPointer() const {
+    return X;
+  }
+
+  class iterator {
+    const ImmutableListImpl<T>* L;
+  public:
+    iterator() : L(0) {}
+    iterator(ImmutableList l) : L(l.getInternalPointer()) {}
+
+    iterator& operator++() { L = L->getTail(); return *this; }
+    bool operator==(const iterator& I) const { return L == I.L; }
+    bool operator!=(const iterator& I) const { return L != I.L; }
+    const value_type& operator*() const { return L->getHead(); }
+    ImmutableList getList() const { return L; }
+  };
+
+  /// begin - Returns an iterator referring to the head of the list, or
+  ///  an iterator denoting the end of the list if the list is empty.
+  iterator begin() const { return iterator(X); }
+
+  /// end - Returns an iterator denoting the end of the list.  This iterator
+  ///  does not refer to a valid list element.
+  iterator end() const { return iterator(); }
+
+  /// isEmpty - Returns true if the list is empty.
+  bool isEmpty() const { return !X; }
+
+  /// isEqual - Returns true if two lists are equal.  Because all lists created
+  ///  from the same ImmutableListFactory are uniqued, this has O(1) complexity
+  ///  because it the contents of the list do not need to be compared.  Note
+  ///  that you should only compare two lists created from the same
+  ///  ImmutableListFactory.
+  bool isEqual(const ImmutableList& L) const { return X == L.X; }
+
+  bool operator==(const ImmutableList& L) const { return isEqual(L); }
+
+  /// getHead - Returns the head of the list.
+  const T& getHead() {
+    assert (!isEmpty() && "Cannot get the head of an empty list.");
+    return X->getHead();
+  }
+
+  /// getTail - Returns the tail of the list, which is another (possibly empty)
+  ///  ImmutableList.
+  ImmutableList getTail() {
+    return X ? X->getTail() : 0;
+  }
+
+  void Profile(FoldingSetNodeID& ID) const {
+    ID.AddPointer(X);
+  }
+};
+
+template <typename T>
+class ImmutableListFactory {
+  typedef ImmutableListImpl<T> ListTy;
+  typedef FoldingSet<ListTy>   CacheTy;
+
+  CacheTy Cache;
+  uintptr_t Allocator;
+
+  bool ownsAllocator() const {
+    return Allocator & 0x1 ? false : true;
+  }
+
+  BumpPtrAllocator& getAllocator() const {
+    return *reinterpret_cast<BumpPtrAllocator*>(Allocator & ~0x1);
+  }
+
+public:
+  ImmutableListFactory()
+    : Allocator(reinterpret_cast<uintptr_t>(new BumpPtrAllocator())) {}
+
+  ImmutableListFactory(BumpPtrAllocator& Alloc)
+  : Allocator(reinterpret_cast<uintptr_t>(&Alloc) | 0x1) {}
+
+  ~ImmutableListFactory() {
+    if (ownsAllocator()) delete &getAllocator();
+  }
+
+  ImmutableList<T> concat(const T& Head, ImmutableList<T> Tail) {
+    // Profile the new list to see if it already exists in our cache.
+    FoldingSetNodeID ID;
+    void* InsertPos;
+
+    const ListTy* TailImpl = Tail.getInternalPointer();
+    ListTy::Profile(ID, Head, TailImpl);
+    ListTy* L = Cache.FindNodeOrInsertPos(ID, InsertPos);
+
+    if (!L) {
+      // The list does not exist in our cache.  Create it.
+      BumpPtrAllocator& A = getAllocator();
+      L = (ListTy*) A.Allocate<ListTy>();
+      new (L) ListTy(Head, TailImpl);
+
+      // Insert the new list into the cache.
+      Cache.InsertNode(L, InsertPos);
+    }
+
+    return L;
+  }
+
+  ImmutableList<T> add(const T& D, ImmutableList<T> L) {
+    return concat(D, L);
+  }
+
+  ImmutableList<T> getEmptyList() const {
+    return ImmutableList<T>(0);
+  }
+
+  ImmutableList<T> create(const T& X) {
+    return Concat(X, getEmptyList());
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Partially-specialized Traits.
+//===----------------------------------------------------------------------===//
+
+template<typename T> struct DenseMapInfo;
+template<typename T> struct DenseMapInfo<ImmutableList<T> > {
+  static inline ImmutableList<T> getEmptyKey() {
+    return reinterpret_cast<ImmutableListImpl<T>*>(-1);
+  }
+  static inline ImmutableList<T> getTombstoneKey() {
+    return reinterpret_cast<ImmutableListImpl<T>*>(-2);
+  }
+  static unsigned getHashValue(ImmutableList<T> X) {
+    uintptr_t PtrVal = reinterpret_cast<uintptr_t>(X.getInternalPointer());
+    return (unsigned((uintptr_t)PtrVal) >> 4) ^
+           (unsigned((uintptr_t)PtrVal) >> 9);
+  }
+  static bool isEqual(ImmutableList<T> X1, ImmutableList<T> X2) {
+    return X1 == X2;
+  }
+};
+
+template <typename T> struct isPodLike;
+template <typename T>
+struct isPodLike<ImmutableList<T> > { static const bool value = true; };
+
+} // end llvm namespace
+
+#endif
diff --git a/final/include/llvm/ADT/ImmutableMap.h b/final/include/llvm/ADT/ImmutableMap.h
new file mode 100644
index 00000000000..d6cce7ccfa0
--- /dev/null
+++ b/final/include/llvm/ADT/ImmutableMap.h
@@ -0,0 +1,261 @@
+//===--- ImmutableMap.h - Immutable (functional) map interface --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ImmutableMap class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_IMMAP_H
+#define LLVM_ADT_IMMAP_H
+
+#include "llvm/ADT/ImmutableSet.h"
+
+namespace llvm {
+
+/// ImutKeyValueInfo -Traits class used by ImmutableMap.  While both the first
+/// and second elements in a pair are used to generate profile information,
+/// only the first element (the key) is used by isEqual and isLess.
+template <typename T, typename S>
+struct ImutKeyValueInfo {
+  typedef const std::pair<T,S> value_type;
+  typedef const value_type& value_type_ref;
+  typedef const T   key_type;
+  typedef const T&  key_type_ref;
+  typedef const S   data_type;
+  typedef const S&  data_type_ref;
+
+  static inline key_type_ref KeyOfValue(value_type_ref V) {
+    return V.first;
+  }
+
+  static inline data_type_ref DataOfValue(value_type_ref V) {
+    return V.second;
+  }
+
+  static inline bool isEqual(key_type_ref L, key_type_ref R) {
+    return ImutContainerInfo<T>::isEqual(L,R);
+  }
+  static inline bool isLess(key_type_ref L, key_type_ref R) {
+    return ImutContainerInfo<T>::isLess(L,R);
+  }
+
+  static inline bool isDataEqual(data_type_ref L, data_type_ref R) {
+    return ImutContainerInfo<S>::isEqual(L,R);
+  }
+
+  static inline void Profile(FoldingSetNodeID& ID, value_type_ref V) {
+    ImutContainerInfo<T>::Profile(ID, V.first);
+    ImutContainerInfo<S>::Profile(ID, V.second);
+  }
+};
+
+
+template <typename KeyT, typename ValT,
+          typename ValInfo = ImutKeyValueInfo<KeyT,ValT> >
+class ImmutableMap {
+public:
+  typedef typename ValInfo::value_type      value_type;
+  typedef typename ValInfo::value_type_ref  value_type_ref;
+  typedef typename ValInfo::key_type        key_type;
+  typedef typename ValInfo::key_type_ref    key_type_ref;
+  typedef typename ValInfo::data_type       data_type;
+  typedef typename ValInfo::data_type_ref   data_type_ref;
+  typedef ImutAVLTree<ValInfo>              TreeTy;
+
+protected:
+  TreeTy* Root;
+
+public:
+  /// Constructs a map from a pointer to a tree root.  In general one
+  /// should use a Factory object to create maps instead of directly
+  /// invoking the constructor, but there are cases where make this
+  /// constructor public is useful.
+  explicit ImmutableMap(const TreeTy* R) : Root(const_cast<TreeTy*>(R)) {
+    if (Root) { Root->retain(); }
+  }
+  ImmutableMap(const ImmutableMap &X) : Root(X.Root) {
+    if (Root) { Root->retain(); }
+  }
+  ImmutableMap &operator=(const ImmutableMap &X) {
+    if (Root != X.Root) {
+      if (X.Root) { X.Root->retain(); }
+      if (Root) { Root->release(); }
+      Root = X.Root;
+    }
+    return *this;
+  }
+  ~ImmutableMap() {
+    if (Root) { Root->release(); }
+  }
+
+  class Factory {
+    typename TreeTy::Factory F;
+    const bool Canonicalize;
+
+  public:
+    Factory(bool canonicalize = true)
+      : Canonicalize(canonicalize) {}
+    
+    Factory(BumpPtrAllocator& Alloc, bool canonicalize = true)
+      : F(Alloc), Canonicalize(canonicalize) {}
+
+    ImmutableMap getEmptyMap() { return ImmutableMap(F.getEmptyTree()); }
+
+    ImmutableMap add(ImmutableMap Old, key_type_ref K, data_type_ref D) {
+      TreeTy *T = F.add(Old.Root, std::pair<key_type,data_type>(K,D));
+      return ImmutableMap(Canonicalize ? F.getCanonicalTree(T): T);
+    }
+
+    ImmutableMap remove(ImmutableMap Old, key_type_ref K) {
+      TreeTy *T = F.remove(Old.Root,K);
+      return ImmutableMap(Canonicalize ? F.getCanonicalTree(T): T);
+    }
+
+  private:
+    Factory(const Factory& RHS); // DO NOT IMPLEMENT
+    void operator=(const Factory& RHS); // DO NOT IMPLEMENT
+  };
+
+  bool contains(key_type_ref K) const {
+    return Root ? Root->contains(K) : false;
+  }
+
+  bool operator==(const ImmutableMap &RHS) const {
+    return Root && RHS.Root ? Root->isEqual(*RHS.Root) : Root == RHS.Root;
+  }
+
+  bool operator!=(const ImmutableMap &RHS) const {
+    return Root && RHS.Root ? Root->isNotEqual(*RHS.Root) : Root != RHS.Root;
+  }
+
+  TreeTy *getRoot() const {
+    if (Root) { Root->retain(); }
+    return Root;
+  }
+
+  TreeTy *getRootWithoutRetain() const {
+    return Root;
+  }
+  
+  void manualRetain() {
+    if (Root) Root->retain();
+  }
+  
+  void manualRelease() {
+    if (Root) Root->release();
+  }
+
+  bool isEmpty() const { return !Root; }
+
+  //===--------------------------------------------------===//
+  // Foreach - A limited form of map iteration.
+  //===--------------------------------------------------===//
+
+private:
+  template <typename Callback>
+  struct CBWrapper {
+    Callback C;
+    void operator()(value_type_ref V) { C(V.first,V.second); }
+  };
+
+  template <typename Callback>
+  struct CBWrapperRef {
+    Callback &C;
+    CBWrapperRef(Callback& c) : C(c) {}
+
+    void operator()(value_type_ref V) { C(V.first,V.second); }
+  };
+
+public:
+  template <typename Callback>
+  void foreach(Callback& C) {
+    if (Root) {
+      CBWrapperRef<Callback> CB(C);
+      Root->foreach(CB);
+    }
+  }
+
+  template <typename Callback>
+  void foreach() {
+    if (Root) {
+      CBWrapper<Callback> CB;
+      Root->foreach(CB);
+    }
+  }
+
+  //===--------------------------------------------------===//
+  // For testing.
+  //===--------------------------------------------------===//
+
+  void verify() const { if (Root) Root->verify(); }
+
+  //===--------------------------------------------------===//
+  // Iterators.
+  //===--------------------------------------------------===//
+
+  class iterator {
+    typename TreeTy::iterator itr;
+
+    iterator() {}
+    iterator(TreeTy* t) : itr(t) {}
+    friend class ImmutableMap;
+
+  public:
+    value_type_ref operator*() const { return itr->getValue(); }
+    value_type*    operator->() const { return &itr->getValue(); }
+
+    key_type_ref getKey() const { return itr->getValue().first; }
+    data_type_ref getData() const { return itr->getValue().second; }
+
+
+    iterator& operator++() { ++itr; return *this; }
+    iterator  operator++(int) { iterator tmp(*this); ++itr; return tmp; }
+    iterator& operator--() { --itr; return *this; }
+    iterator  operator--(int) { iterator tmp(*this); --itr; return tmp; }
+    bool operator==(const iterator& RHS) const { return RHS.itr == itr; }
+    bool operator!=(const iterator& RHS) const { return RHS.itr != itr; }
+  };
+
+  iterator begin() const { return iterator(Root); }
+  iterator end() const { return iterator(); }
+
+  data_type* lookup(key_type_ref K) const {
+    if (Root) {
+      TreeTy* T = Root->find(K);
+      if (T) return &T->getValue().second;
+    }
+
+    return 0;
+  }
+  
+  /// getMaxElement - Returns the <key,value> pair in the ImmutableMap for
+  ///  which key is the highest in the ordering of keys in the map.  This
+  ///  method returns NULL if the map is empty.
+  value_type* getMaxElement() const {
+    return Root ? &(Root->getMaxElement()->getValue()) : 0;
+  }
+
+  //===--------------------------------------------------===//
+  // Utility methods.
+  //===--------------------------------------------------===//
+
+  unsigned getHeight() const { return Root ? Root->getHeight() : 0; }
+
+  static inline void Profile(FoldingSetNodeID& ID, const ImmutableMap& M) {
+    ID.AddPointer(M.Root);
+  }
+
+  inline void Profile(FoldingSetNodeID& ID) const {
+    return Profile(ID,*this);
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/ADT/ImmutableSet.h b/final/include/llvm/ADT/ImmutableSet.h
new file mode 100644
index 00000000000..3ca910ce944
--- /dev/null
+++ b/final/include/llvm/ADT/ImmutableSet.h
@@ -0,0 +1,1084 @@
+//===--- ImmutableSet.h - Immutable (functional) set interface --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ImutAVLTree and ImmutableSet classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_IMSET_H
+#define LLVM_ADT_IMSET_H
+
+#include "llvm/Support/Allocator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/DataTypes.h"
+#include <cassert>
+#include <functional>
+#include <vector>
+#include <stdio.h>
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// Immutable AVL-Tree Definition.
+//===----------------------------------------------------------------------===//
+
+template <typename ImutInfo> class ImutAVLFactory;
+template <typename ImutInfo> class ImutIntervalAVLFactory;
+template <typename ImutInfo> class ImutAVLTreeInOrderIterator;
+template <typename ImutInfo> class ImutAVLTreeGenericIterator;
+
+template <typename ImutInfo >
+class ImutAVLTree {
+public:
+  typedef typename ImutInfo::key_type_ref   key_type_ref;
+  typedef typename ImutInfo::value_type     value_type;
+  typedef typename ImutInfo::value_type_ref value_type_ref;
+
+  typedef ImutAVLFactory<ImutInfo>          Factory;
+  friend class ImutAVLFactory<ImutInfo>;
+  friend class ImutIntervalAVLFactory<ImutInfo>;
+
+  friend class ImutAVLTreeGenericIterator<ImutInfo>;
+
+  typedef ImutAVLTreeInOrderIterator<ImutInfo>  iterator;
+
+  //===----------------------------------------------------===//
+  // Public Interface.
+  //===----------------------------------------------------===//
+
+  /// Return a pointer to the left subtree.  This value
+  ///  is NULL if there is no left subtree.
+  ImutAVLTree *getLeft() const { return left; }
+
+  /// Return a pointer to the right subtree.  This value is
+  ///  NULL if there is no right subtree.
+  ImutAVLTree *getRight() const { return right; }
+
+  /// getHeight - Returns the height of the tree.  A tree with no subtrees
+  ///  has a height of 1.
+  unsigned getHeight() const { return height; }
+
+  /// getValue - Returns the data value associated with the tree node.
+  const value_type& getValue() const { return value; }
+
+  /// find - Finds the subtree associated with the specified key value.
+  ///  This method returns NULL if no matching subtree is found.
+  ImutAVLTree* find(key_type_ref K) {
+    ImutAVLTree *T = this;
+    while (T) {
+      key_type_ref CurrentKey = ImutInfo::KeyOfValue(T->getValue());
+      if (ImutInfo::isEqual(K,CurrentKey))
+        return T;
+      else if (ImutInfo::isLess(K,CurrentKey))
+        T = T->getLeft();
+      else
+        T = T->getRight();
+    }
+    return NULL;
+  }
+  
+  /// getMaxElement - Find the subtree associated with the highest ranged
+  ///  key value.
+  ImutAVLTree* getMaxElement() {
+    ImutAVLTree *T = this;
+    ImutAVLTree *Right = T->getRight();    
+    while (Right) { T = right; right = T->getRight(); }
+    return T;
+  }
+
+  /// size - Returns the number of nodes in the tree, which includes
+  ///  both leaves and non-leaf nodes.
+  unsigned size() const {
+    unsigned n = 1;
+    if (const ImutAVLTree* L = getLeft())
+      n += L->size();
+    if (const ImutAVLTree* R = getRight())
+      n += R->size();
+    return n;
+  }
+
+  /// begin - Returns an iterator that iterates over the nodes of the tree
+  ///  in an inorder traversal.  The returned iterator thus refers to the
+  ///  the tree node with the minimum data element.
+  iterator begin() const { return iterator(this); }
+
+  /// end - Returns an iterator for the tree that denotes the end of an
+  ///  inorder traversal.
+  iterator end() const { return iterator(); }
+
+  bool isElementEqual(value_type_ref V) const {
+    // Compare the keys.
+    if (!ImutInfo::isEqual(ImutInfo::KeyOfValue(getValue()),
+                           ImutInfo::KeyOfValue(V)))
+      return false;
+
+    // Also compare the data values.
+    if (!ImutInfo::isDataEqual(ImutInfo::DataOfValue(getValue()),
+                               ImutInfo::DataOfValue(V)))
+      return false;
+
+    return true;
+  }
+
+  bool isElementEqual(const ImutAVLTree* RHS) const {
+    return isElementEqual(RHS->getValue());
+  }
+
+  /// isEqual - Compares two trees for structural equality and returns true
+  ///   if they are equal.  This worst case performance of this operation is
+  //    linear in the sizes of the trees.
+  bool isEqual(const ImutAVLTree& RHS) const {
+    if (&RHS == this)
+      return true;
+
+    iterator LItr = begin(), LEnd = end();
+    iterator RItr = RHS.begin(), REnd = RHS.end();
+
+    while (LItr != LEnd && RItr != REnd) {
+      if (*LItr == *RItr) {
+        LItr.skipSubTree();
+        RItr.skipSubTree();
+        continue;
+      }
+
+      if (!LItr->isElementEqual(*RItr))
+        return false;
+
+      ++LItr;
+      ++RItr;
+    }
+
+    return LItr == LEnd && RItr == REnd;
+  }
+
+  /// isNotEqual - Compares two trees for structural inequality.  Performance
+  ///  is the same is isEqual.
+  bool isNotEqual(const ImutAVLTree& RHS) const { return !isEqual(RHS); }
+
+  /// contains - Returns true if this tree contains a subtree (node) that
+  ///  has an data element that matches the specified key.  Complexity
+  ///  is logarithmic in the size of the tree.
+  bool contains(key_type_ref K) { return (bool) find(K); }
+
+  /// foreach - A member template the accepts invokes operator() on a functor
+  ///  object (specifed by Callback) for every node/subtree in the tree.
+  ///  Nodes are visited using an inorder traversal.
+  template <typename Callback>
+  void foreach(Callback& C) {
+    if (ImutAVLTree* L = getLeft())
+      L->foreach(C);
+
+    C(value);
+
+    if (ImutAVLTree* R = getRight())
+      R->foreach(C);
+  }
+
+  /// validateTree - A utility method that checks that the balancing and
+  ///  ordering invariants of the tree are satisifed.  It is a recursive
+  ///  method that returns the height of the tree, which is then consumed
+  ///  by the enclosing validateTree call.  External callers should ignore the
+  ///  return value.  An invalid tree will cause an assertion to fire in
+  ///  a debug build.
+  unsigned validateTree() const {
+    unsigned HL = getLeft() ? getLeft()->validateTree() : 0;
+    unsigned HR = getRight() ? getRight()->validateTree() : 0;
+    (void) HL;
+    (void) HR;
+
+    assert(getHeight() == ( HL > HR ? HL : HR ) + 1
+            && "Height calculation wrong");
+
+    assert((HL > HR ? HL-HR : HR-HL) <= 2
+           && "Balancing invariant violated");
+
+    assert((!getLeft() ||
+            ImutInfo::isLess(ImutInfo::KeyOfValue(getLeft()->getValue()),
+                             ImutInfo::KeyOfValue(getValue()))) &&
+           "Value in left child is not less that current value");
+
+
+    assert(!(getRight() ||
+             ImutInfo::isLess(ImutInfo::KeyOfValue(getValue()),
+                              ImutInfo::KeyOfValue(getRight()->getValue()))) &&
+           "Current value is not less that value of right child");
+
+    return getHeight();
+  }
+
+  //===----------------------------------------------------===//
+  // Internal values.
+  //===----------------------------------------------------===//
+
+private:
+  Factory *factory;
+  ImutAVLTree *left;
+  ImutAVLTree *right;
+  ImutAVLTree *prev;
+  ImutAVLTree *next;
+
+  unsigned height         : 28;
+  unsigned IsMutable      : 1;
+  unsigned IsDigestCached : 1;
+  unsigned IsCanonicalized : 1;
+
+  value_type value;
+  uint32_t digest;
+  uint32_t refCount;
+
+  //===----------------------------------------------------===//
+  // Internal methods (node manipulation; used by Factory).
+  //===----------------------------------------------------===//
+
+private:
+  /// ImutAVLTree - Internal constructor that is only called by
+  ///   ImutAVLFactory.
+  ImutAVLTree(Factory *f, ImutAVLTree* l, ImutAVLTree* r, value_type_ref v,
+              unsigned height)
+    : factory(f), left(l), right(r), prev(0), next(0), height(height),
+      IsMutable(true), IsDigestCached(false), IsCanonicalized(0),
+      value(v), digest(0), refCount(0)
+  {
+    if (left) left->retain();
+    if (right) right->retain();
+  }
+
+  /// isMutable - Returns true if the left and right subtree references
+  ///  (as well as height) can be changed.  If this method returns false,
+  ///  the tree is truly immutable.  Trees returned from an ImutAVLFactory
+  ///  object should always have this method return true.  Further, if this
+  ///  method returns false for an instance of ImutAVLTree, all subtrees
+  ///  will also have this method return false.  The converse is not true.
+  bool isMutable() const { return IsMutable; }
+  
+  /// hasCachedDigest - Returns true if the digest for this tree is cached.
+  ///  This can only be true if the tree is immutable.
+  bool hasCachedDigest() const { return IsDigestCached; }
+
+  //===----------------------------------------------------===//
+  // Mutating operations.  A tree root can be manipulated as
+  // long as its reference has not "escaped" from internal
+  // methods of a factory object (see below).  When a tree
+  // pointer is externally viewable by client code, the
+  // internal "mutable bit" is cleared to mark the tree
+  // immutable.  Note that a tree that still has its mutable
+  // bit set may have children (subtrees) that are themselves
+  // immutable.
+  //===----------------------------------------------------===//
+
+  /// markImmutable - Clears the mutable flag for a tree.  After this happens,
+  ///   it is an error to call setLeft(), setRight(), and setHeight().
+  void markImmutable() {
+    assert(isMutable() && "Mutable flag already removed.");
+    IsMutable = false;
+  }
+  
+  /// markedCachedDigest - Clears the NoCachedDigest flag for a tree.
+  void markedCachedDigest() {
+    assert(!hasCachedDigest() && "NoCachedDigest flag already removed.");
+    IsDigestCached = true;
+  }
+
+  /// setHeight - Changes the height of the tree.  Used internally by
+  ///  ImutAVLFactory.
+  void setHeight(unsigned h) {
+    assert(isMutable() && "Only a mutable tree can have its height changed.");
+    height = h;
+  }
+
+  static inline
+  uint32_t computeDigest(ImutAVLTree* L, ImutAVLTree* R, value_type_ref V) {
+    uint32_t digest = 0;
+
+    if (L)
+      digest += L->computeDigest();
+
+    // Compute digest of stored data.
+    FoldingSetNodeID ID;
+    ImutInfo::Profile(ID,V);
+    digest += ID.ComputeHash();
+
+    if (R)
+      digest += R->computeDigest();
+
+    return digest;
+  }
+
+  inline uint32_t computeDigest() {
+    // Check the lowest bit to determine if digest has actually been
+    // pre-computed.
+    if (hasCachedDigest())
+      return digest;
+
+    uint32_t X = computeDigest(getLeft(), getRight(), getValue());
+    digest = X;
+    markedCachedDigest();
+    return X;
+  }
+
+  //===----------------------------------------------------===//
+  // Reference count operations.
+  //===----------------------------------------------------===//
+
+public:
+  void retain() { ++refCount; }
+  void release() {
+    assert(refCount > 0);
+    if (--refCount == 0)
+      destroy();
+  }
+  void destroy() {
+    if (left)
+      left->release();
+    if (right)
+      right->release();
+    if (IsCanonicalized) {
+      if (next)
+        next->prev = prev;
+
+      if (prev)
+        prev->next = next;
+      else
+        factory->Cache[computeDigest()] = next;
+    }
+    
+    // We need to clear the mutability bit in case we are
+    // destroying the node as part of a sweep in ImutAVLFactory::recoverNodes().
+    IsMutable = false;
+    factory->freeNodes.push_back(this);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Immutable AVL-Tree Factory class.
+//===----------------------------------------------------------------------===//
+
+template <typename ImutInfo >
+class ImutAVLFactory {
+  friend class ImutAVLTree<ImutInfo>;
+  typedef ImutAVLTree<ImutInfo> TreeTy;
+  typedef typename TreeTy::value_type_ref value_type_ref;
+  typedef typename TreeTy::key_type_ref   key_type_ref;
+
+  typedef DenseMap<unsigned, TreeTy*> CacheTy;
+
+  CacheTy Cache;
+  uintptr_t Allocator;
+  std::vector<TreeTy*> createdNodes;
+  std::vector<TreeTy*> freeNodes;
+
+  bool ownsAllocator() const {
+    return Allocator & 0x1 ? false : true;
+  }
+
+  BumpPtrAllocator& getAllocator() const {
+    return *reinterpret_cast<BumpPtrAllocator*>(Allocator & ~0x1);
+  }
+
+  //===--------------------------------------------------===//
+  // Public interface.
+  //===--------------------------------------------------===//
+
+public:
+  ImutAVLFactory()
+    : Allocator(reinterpret_cast<uintptr_t>(new BumpPtrAllocator())) {}
+
+  ImutAVLFactory(BumpPtrAllocator& Alloc)
+    : Allocator(reinterpret_cast<uintptr_t>(&Alloc) | 0x1) {}
+
+  ~ImutAVLFactory() {
+    if (ownsAllocator()) delete &getAllocator();
+  }
+
+  TreeTy* add(TreeTy* T, value_type_ref V) {
+    T = add_internal(V,T);
+    markImmutable(T);
+    recoverNodes();
+    return T;
+  }
+
+  TreeTy* remove(TreeTy* T, key_type_ref V) {
+    T = remove_internal(V,T);
+    markImmutable(T);
+    recoverNodes();
+    return T;
+  }
+
+  TreeTy* getEmptyTree() const { return NULL; }
+
+protected:
+  
+  //===--------------------------------------------------===//
+  // A bunch of quick helper functions used for reasoning
+  // about the properties of trees and their children.
+  // These have succinct names so that the balancing code
+  // is as terse (and readable) as possible.
+  //===--------------------------------------------------===//
+
+  bool            isEmpty(TreeTy* T) const { return !T; }
+  unsigned        getHeight(TreeTy* T) const { return T ? T->getHeight() : 0; }
+  TreeTy*         getLeft(TreeTy* T) const { return T->getLeft(); }
+  TreeTy*         getRight(TreeTy* T) const { return T->getRight(); }
+  value_type_ref  getValue(TreeTy* T) const { return T->value; }
+
+  unsigned incrementHeight(TreeTy* L, TreeTy* R) const {
+    unsigned hl = getHeight(L);
+    unsigned hr = getHeight(R);
+    return (hl > hr ? hl : hr) + 1;
+  }
+
+  static bool compareTreeWithSection(TreeTy* T,
+                                     typename TreeTy::iterator& TI,
+                                     typename TreeTy::iterator& TE) {
+    typename TreeTy::iterator I = T->begin(), E = T->end();
+    for ( ; I!=E ; ++I, ++TI) {
+      if (TI == TE || !I->isElementEqual(*TI))
+        return false;
+    }
+    return true;
+  }
+
+  //===--------------------------------------------------===//
+  // "createNode" is used to generate new tree roots that link
+  // to other trees.  The functon may also simply move links
+  // in an existing root if that root is still marked mutable.
+  // This is necessary because otherwise our balancing code
+  // would leak memory as it would create nodes that are
+  // then discarded later before the finished tree is
+  // returned to the caller.
+  //===--------------------------------------------------===//
+
+  TreeTy* createNode(TreeTy* L, value_type_ref V, TreeTy* R) {   
+    BumpPtrAllocator& A = getAllocator();
+    TreeTy* T;
+    if (!freeNodes.empty()) {
+      T = freeNodes.back();
+      freeNodes.pop_back();
+      assert(T != L);
+      assert(T != R);
+    }
+    else {
+      T = (TreeTy*) A.Allocate<TreeTy>();
+    }
+    new (T) TreeTy(this, L, R, V, incrementHeight(L,R));
+    createdNodes.push_back(T);
+    return T;
+  }
+
+  TreeTy* createNode(TreeTy* newLeft, TreeTy* oldTree, TreeTy* newRight) {
+    return createNode(newLeft, getValue(oldTree), newRight);
+  }
+
+  void recoverNodes() {
+    for (unsigned i = 0, n = createdNodes.size(); i < n; ++i) {
+      TreeTy *N = createdNodes[i];
+      if (N->isMutable() && N->refCount == 0)
+        N->destroy();
+    }
+    createdNodes.clear();
+  }
+
+  /// balanceTree - Used by add_internal and remove_internal to
+  ///  balance a newly created tree.
+  TreeTy* balanceTree(TreeTy* L, value_type_ref V, TreeTy* R) {
+    unsigned hl = getHeight(L);
+    unsigned hr = getHeight(R);
+
+    if (hl > hr + 2) {
+      assert(!isEmpty(L) && "Left tree cannot be empty to have a height >= 2");
+
+      TreeTy *LL = getLeft(L);
+      TreeTy *LR = getRight(L);
+
+      if (getHeight(LL) >= getHeight(LR))
+        return createNode(LL, L, createNode(LR,V,R));
+
+      assert(!isEmpty(LR) && "LR cannot be empty because it has a height >= 1");
+
+      TreeTy *LRL = getLeft(LR);
+      TreeTy *LRR = getRight(LR);
+
+      return createNode(createNode(LL,L,LRL), LR, createNode(LRR,V,R));
+    }
+    else if (hr > hl + 2) {
+      assert(!isEmpty(R) && "Right tree cannot be empty to have a height >= 2");
+
+      TreeTy *RL = getLeft(R);
+      TreeTy *RR = getRight(R);
+
+      if (getHeight(RR) >= getHeight(RL))
+        return createNode(createNode(L,V,RL), R, RR);
+
+      assert(!isEmpty(RL) && "RL cannot be empty because it has a height >= 1");
+
+      TreeTy *RLL = getLeft(RL);
+      TreeTy *RLR = getRight(RL);
+
+      return createNode(createNode(L,V,RLL), RL, createNode(RLR,R,RR));
+    }
+    else
+      return createNode(L,V,R);
+  }
+
+  /// add_internal - Creates a new tree that includes the specified
+  ///  data and the data from the original tree.  If the original tree
+  ///  already contained the data item, the original tree is returned.
+  TreeTy* add_internal(value_type_ref V, TreeTy* T) {
+    if (isEmpty(T))
+      return createNode(T, V, T);
+    assert(!T->isMutable());
+
+    key_type_ref K = ImutInfo::KeyOfValue(V);
+    key_type_ref KCurrent = ImutInfo::KeyOfValue(getValue(T));
+
+    if (ImutInfo::isEqual(K,KCurrent))
+      return createNode(getLeft(T), V, getRight(T));
+    else if (ImutInfo::isLess(K,KCurrent))
+      return balanceTree(add_internal(V, getLeft(T)), getValue(T), getRight(T));
+    else
+      return balanceTree(getLeft(T), getValue(T), add_internal(V, getRight(T)));
+  }
+
+  /// remove_internal - Creates a new tree that includes all the data
+  ///  from the original tree except the specified data.  If the
+  ///  specified data did not exist in the original tree, the original
+  ///  tree is returned.
+  TreeTy* remove_internal(key_type_ref K, TreeTy* T) {
+    if (isEmpty(T))
+      return T;
+
+    assert(!T->isMutable());
+
+    key_type_ref KCurrent = ImutInfo::KeyOfValue(getValue(T));
+
+    if (ImutInfo::isEqual(K,KCurrent)) {
+      return combineTrees(getLeft(T), getRight(T));
+    } else if (ImutInfo::isLess(K,KCurrent)) {
+      return balanceTree(remove_internal(K, getLeft(T)),
+                                            getValue(T), getRight(T));
+    } else {
+      return balanceTree(getLeft(T), getValue(T),
+                         remove_internal(K, getRight(T)));
+    }
+  }
+
+  TreeTy* combineTrees(TreeTy* L, TreeTy* R) {
+    if (isEmpty(L))
+      return R;
+    if (isEmpty(R))
+      return L;
+    TreeTy* OldNode;
+    TreeTy* newRight = removeMinBinding(R,OldNode);
+    return balanceTree(L, getValue(OldNode), newRight);
+  }
+
+  TreeTy* removeMinBinding(TreeTy* T, TreeTy*& Noderemoved) {
+    assert(!isEmpty(T));
+    if (isEmpty(getLeft(T))) {
+      Noderemoved = T;
+      return getRight(T);
+    }
+    return balanceTree(removeMinBinding(getLeft(T), Noderemoved),
+                       getValue(T), getRight(T));
+  }
+
+  /// markImmutable - Clears the mutable bits of a root and all of its
+  ///  descendants.
+  void markImmutable(TreeTy* T) {
+    if (!T || !T->isMutable())
+      return;
+    T->markImmutable();
+    markImmutable(getLeft(T));
+    markImmutable(getRight(T));
+  }
+  
+public:
+  TreeTy *getCanonicalTree(TreeTy *TNew) {
+    if (!TNew)
+      return 0;
+
+    if (TNew->IsCanonicalized)
+      return TNew;
+
+    // Search the hashtable for another tree with the same digest, and
+    // if find a collision compare those trees by their contents.
+    unsigned digest = TNew->computeDigest();
+    TreeTy *&entry = Cache[digest];
+    do {
+      if (!entry)
+        break;
+      for (TreeTy *T = entry ; T != 0; T = T->next) {
+        // Compare the Contents('T') with Contents('TNew')
+        typename TreeTy::iterator TI = T->begin(), TE = T->end();
+        if (!compareTreeWithSection(TNew, TI, TE))
+          continue;
+        if (TI != TE)
+          continue; // T has more contents than TNew.
+        // Trees did match!  Return 'T'.
+        if (TNew->refCount == 0)
+          TNew->destroy();
+        return T;
+      }
+      entry->prev = TNew;
+      TNew->next = entry;
+    }
+    while (false);
+
+    entry = TNew;
+    TNew->IsCanonicalized = true;
+    return TNew;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Immutable AVL-Tree Iterators.
+//===----------------------------------------------------------------------===//
+
+template <typename ImutInfo>
+class ImutAVLTreeGenericIterator {
+  SmallVector<uintptr_t,20> stack;
+public:
+  enum VisitFlag { VisitedNone=0x0, VisitedLeft=0x1, VisitedRight=0x3,
+                   Flags=0x3 };
+
+  typedef ImutAVLTree<ImutInfo> TreeTy;
+  typedef ImutAVLTreeGenericIterator<ImutInfo> _Self;
+
+  inline ImutAVLTreeGenericIterator() {}
+  inline ImutAVLTreeGenericIterator(const TreeTy* Root) {
+    if (Root) stack.push_back(reinterpret_cast<uintptr_t>(Root));
+  }
+
+  TreeTy* operator*() const {
+    assert(!stack.empty());
+    return reinterpret_cast<TreeTy*>(stack.back() & ~Flags);
+  }
+
+  uintptr_t getVisitState() {
+    assert(!stack.empty());
+    return stack.back() & Flags;
+  }
+
+
+  bool atEnd() const { return stack.empty(); }
+
+  bool atBeginning() const {
+    return stack.size() == 1 && getVisitState() == VisitedNone;
+  }
+
+  void skipToParent() {
+    assert(!stack.empty());
+    stack.pop_back();
+    if (stack.empty())
+      return;
+    switch (getVisitState()) {
+      case VisitedNone:
+        stack.back() |= VisitedLeft;
+        break;
+      case VisitedLeft:
+        stack.back() |= VisitedRight;
+        break;
+      default:
+        assert(false && "Unreachable.");
+    }
+  }
+
+  inline bool operator==(const _Self& x) const {
+    if (stack.size() != x.stack.size())
+      return false;
+    for (unsigned i = 0 ; i < stack.size(); i++)
+      if (stack[i] != x.stack[i])
+        return false;
+    return true;
+  }
+
+  inline bool operator!=(const _Self& x) const { return !operator==(x); }
+
+  _Self& operator++() {
+    assert(!stack.empty());
+    TreeTy* Current = reinterpret_cast<TreeTy*>(stack.back() & ~Flags);
+    assert(Current);
+    switch (getVisitState()) {
+      case VisitedNone:
+        if (TreeTy* L = Current->getLeft())
+          stack.push_back(reinterpret_cast<uintptr_t>(L));
+        else
+          stack.back() |= VisitedLeft;
+        break;
+      case VisitedLeft:
+        if (TreeTy* R = Current->getRight())
+          stack.push_back(reinterpret_cast<uintptr_t>(R));
+        else
+          stack.back() |= VisitedRight;
+        break;
+      case VisitedRight:
+        skipToParent();
+        break;
+      default:
+        assert(false && "Unreachable.");
+    }
+    return *this;
+  }
+
+  _Self& operator--() {
+    assert(!stack.empty());
+    TreeTy* Current = reinterpret_cast<TreeTy*>(stack.back() & ~Flags);
+    assert(Current);
+    switch (getVisitState()) {
+      case VisitedNone:
+        stack.pop_back();
+        break;
+      case VisitedLeft:
+        stack.back() &= ~Flags; // Set state to "VisitedNone."
+        if (TreeTy* L = Current->getLeft())
+          stack.push_back(reinterpret_cast<uintptr_t>(L) | VisitedRight);
+        break;
+      case VisitedRight:
+        stack.back() &= ~Flags;
+        stack.back() |= VisitedLeft;
+        if (TreeTy* R = Current->getRight())
+          stack.push_back(reinterpret_cast<uintptr_t>(R) | VisitedRight);
+        break;
+      default:
+        assert(false && "Unreachable.");
+    }
+    return *this;
+  }
+};
+
+template <typename ImutInfo>
+class ImutAVLTreeInOrderIterator {
+  typedef ImutAVLTreeGenericIterator<ImutInfo> InternalIteratorTy;
+  InternalIteratorTy InternalItr;
+
+public:
+  typedef ImutAVLTree<ImutInfo> TreeTy;
+  typedef ImutAVLTreeInOrderIterator<ImutInfo> _Self;
+
+  ImutAVLTreeInOrderIterator(const TreeTy* Root) : InternalItr(Root) {
+    if (Root) operator++(); // Advance to first element.
+  }
+
+  ImutAVLTreeInOrderIterator() : InternalItr() {}
+
+  inline bool operator==(const _Self& x) const {
+    return InternalItr == x.InternalItr;
+  }
+
+  inline bool operator!=(const _Self& x) const { return !operator==(x); }
+
+  inline TreeTy* operator*() const { return *InternalItr; }
+  inline TreeTy* operator->() const { return *InternalItr; }
+
+  inline _Self& operator++() {
+    do ++InternalItr;
+    while (!InternalItr.atEnd() &&
+           InternalItr.getVisitState() != InternalIteratorTy::VisitedLeft);
+
+    return *this;
+  }
+
+  inline _Self& operator--() {
+    do --InternalItr;
+    while (!InternalItr.atBeginning() &&
+           InternalItr.getVisitState() != InternalIteratorTy::VisitedLeft);
+
+    return *this;
+  }
+
+  inline void skipSubTree() {
+    InternalItr.skipToParent();
+
+    while (!InternalItr.atEnd() &&
+           InternalItr.getVisitState() != InternalIteratorTy::VisitedLeft)
+      ++InternalItr;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Trait classes for Profile information.
+//===----------------------------------------------------------------------===//
+
+/// Generic profile template.  The default behavior is to invoke the
+/// profile method of an object.  Specializations for primitive integers
+/// and generic handling of pointers is done below.
+template <typename T>
+struct ImutProfileInfo {
+  typedef const T  value_type;
+  typedef const T& value_type_ref;
+
+  static inline void Profile(FoldingSetNodeID& ID, value_type_ref X) {
+    FoldingSetTrait<T>::Profile(X,ID);
+  }
+};
+
+/// Profile traits for integers.
+template <typename T>
+struct ImutProfileInteger {
+  typedef const T  value_type;
+  typedef const T& value_type_ref;
+
+  static inline void Profile(FoldingSetNodeID& ID, value_type_ref X) {
+    ID.AddInteger(X);
+  }
+};
+
+#define PROFILE_INTEGER_INFO(X)\
+template<> struct ImutProfileInfo<X> : ImutProfileInteger<X> {};
+
+PROFILE_INTEGER_INFO(char)
+PROFILE_INTEGER_INFO(unsigned char)
+PROFILE_INTEGER_INFO(short)
+PROFILE_INTEGER_INFO(unsigned short)
+PROFILE_INTEGER_INFO(unsigned)
+PROFILE_INTEGER_INFO(signed)
+PROFILE_INTEGER_INFO(long)
+PROFILE_INTEGER_INFO(unsigned long)
+PROFILE_INTEGER_INFO(long long)
+PROFILE_INTEGER_INFO(unsigned long long)
+
+#undef PROFILE_INTEGER_INFO
+
+/// Generic profile trait for pointer types.  We treat pointers as
+/// references to unique objects.
+template <typename T>
+struct ImutProfileInfo<T*> {
+  typedef const T*   value_type;
+  typedef value_type value_type_ref;
+
+  static inline void Profile(FoldingSetNodeID &ID, value_type_ref X) {
+    ID.AddPointer(X);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Trait classes that contain element comparison operators and type
+//  definitions used by ImutAVLTree, ImmutableSet, and ImmutableMap.  These
+//  inherit from the profile traits (ImutProfileInfo) to include operations
+//  for element profiling.
+//===----------------------------------------------------------------------===//
+
+
+/// ImutContainerInfo - Generic definition of comparison operations for
+///   elements of immutable containers that defaults to using
+///   std::equal_to<> and std::less<> to perform comparison of elements.
+template <typename T>
+struct ImutContainerInfo : public ImutProfileInfo<T> {
+  typedef typename ImutProfileInfo<T>::value_type      value_type;
+  typedef typename ImutProfileInfo<T>::value_type_ref  value_type_ref;
+  typedef value_type      key_type;
+  typedef value_type_ref  key_type_ref;
+  typedef bool            data_type;
+  typedef bool            data_type_ref;
+
+  static inline key_type_ref KeyOfValue(value_type_ref D) { return D; }
+  static inline data_type_ref DataOfValue(value_type_ref) { return true; }
+
+  static inline bool isEqual(key_type_ref LHS, key_type_ref RHS) {
+    return std::equal_to<key_type>()(LHS,RHS);
+  }
+
+  static inline bool isLess(key_type_ref LHS, key_type_ref RHS) {
+    return std::less<key_type>()(LHS,RHS);
+  }
+
+  static inline bool isDataEqual(data_type_ref,data_type_ref) { return true; }
+};
+
+/// ImutContainerInfo - Specialization for pointer values to treat pointers
+///  as references to unique objects.  Pointers are thus compared by
+///  their addresses.
+template <typename T>
+struct ImutContainerInfo<T*> : public ImutProfileInfo<T*> {
+  typedef typename ImutProfileInfo<T*>::value_type      value_type;
+  typedef typename ImutProfileInfo<T*>::value_type_ref  value_type_ref;
+  typedef value_type      key_type;
+  typedef value_type_ref  key_type_ref;
+  typedef bool            data_type;
+  typedef bool            data_type_ref;
+
+  static inline key_type_ref KeyOfValue(value_type_ref D) { return D; }
+  static inline data_type_ref DataOfValue(value_type_ref) { return true; }
+
+  static inline bool isEqual(key_type_ref LHS, key_type_ref RHS) {
+    return LHS == RHS;
+  }
+
+  static inline bool isLess(key_type_ref LHS, key_type_ref RHS) {
+    return LHS < RHS;
+  }
+
+  static inline bool isDataEqual(data_type_ref,data_type_ref) { return true; }
+};
+
+//===----------------------------------------------------------------------===//
+// Immutable Set
+//===----------------------------------------------------------------------===//
+
+template <typename ValT, typename ValInfo = ImutContainerInfo<ValT> >
+class ImmutableSet {
+public:
+  typedef typename ValInfo::value_type      value_type;
+  typedef typename ValInfo::value_type_ref  value_type_ref;
+  typedef ImutAVLTree<ValInfo> TreeTy;
+
+private:
+  TreeTy *Root;
+  
+public:
+  /// Constructs a set from a pointer to a tree root.  In general one
+  /// should use a Factory object to create sets instead of directly
+  /// invoking the constructor, but there are cases where make this
+  /// constructor public is useful.
+  explicit ImmutableSet(TreeTy* R) : Root(R) {
+    if (Root) { Root->retain(); }
+  }
+  ImmutableSet(const ImmutableSet &X) : Root(X.Root) {
+    if (Root) { Root->retain(); }
+  }
+  ImmutableSet &operator=(const ImmutableSet &X) {
+    if (Root != X.Root) {
+      if (X.Root) { X.Root->retain(); }
+      if (Root) { Root->release(); }
+      Root = X.Root;
+    }
+    return *this;
+  }
+  ~ImmutableSet() {
+    if (Root) { Root->release(); }
+  }
+
+  class Factory {
+    typename TreeTy::Factory F;
+    const bool Canonicalize;
+
+  public:
+    Factory(bool canonicalize = true)
+      : Canonicalize(canonicalize) {}
+
+    Factory(BumpPtrAllocator& Alloc, bool canonicalize = true)
+      : F(Alloc), Canonicalize(canonicalize) {}
+
+    /// getEmptySet - Returns an immutable set that contains no elements.
+    ImmutableSet getEmptySet() {
+      return ImmutableSet(F.getEmptyTree());
+    }
+
+    /// add - Creates a new immutable set that contains all of the values
+    ///  of the original set with the addition of the specified value.  If
+    ///  the original set already included the value, then the original set is
+    ///  returned and no memory is allocated.  The time and space complexity
+    ///  of this operation is logarithmic in the size of the original set.
+    ///  The memory allocated to represent the set is released when the
+    ///  factory object that created the set is destroyed.
+    ImmutableSet add(ImmutableSet Old, value_type_ref V) {
+      TreeTy *NewT = F.add(Old.Root, V);
+      return ImmutableSet(Canonicalize ? F.getCanonicalTree(NewT) : NewT);
+    }
+
+    /// remove - Creates a new immutable set that contains all of the values
+    ///  of the original set with the exception of the specified value.  If
+    ///  the original set did not contain the value, the original set is
+    ///  returned and no memory is allocated.  The time and space complexity
+    ///  of this operation is logarithmic in the size of the original set.
+    ///  The memory allocated to represent the set is released when the
+    ///  factory object that created the set is destroyed.
+    ImmutableSet remove(ImmutableSet Old, value_type_ref V) {
+      TreeTy *NewT = F.remove(Old.Root, V);
+      return ImmutableSet(Canonicalize ? F.getCanonicalTree(NewT) : NewT);
+    }
+
+    BumpPtrAllocator& getAllocator() { return F.getAllocator(); }
+
+  private:
+    Factory(const Factory& RHS); // DO NOT IMPLEMENT
+    void operator=(const Factory& RHS); // DO NOT IMPLEMENT
+  };
+
+  friend class Factory;
+
+  /// Returns true if the set contains the specified value.
+  bool contains(value_type_ref V) const {
+    return Root ? Root->contains(V) : false;
+  }
+
+  bool operator==(const ImmutableSet &RHS) const {
+    return Root && RHS.Root ? Root->isEqual(*RHS.Root) : Root == RHS.Root;
+  }
+
+  bool operator!=(const ImmutableSet &RHS) const {
+    return Root && RHS.Root ? Root->isNotEqual(*RHS.Root) : Root != RHS.Root;
+  }
+
+  TreeTy *getRoot() { 
+    if (Root) { Root->retain(); }
+    return Root;
+  }
+
+  /// isEmpty - Return true if the set contains no elements.
+  bool isEmpty() const { return !Root; }
+
+  /// isSingleton - Return true if the set contains exactly one element.
+  ///   This method runs in constant time.
+  bool isSingleton() const { return getHeight() == 1; }
+
+  template <typename Callback>
+  void foreach(Callback& C) { if (Root) Root->foreach(C); }
+
+  template <typename Callback>
+  void foreach() { if (Root) { Callback C; Root->foreach(C); } }
+
+  //===--------------------------------------------------===//
+  // Iterators.
+  //===--------------------------------------------------===//
+
+  class iterator {
+    typename TreeTy::iterator itr;
+    iterator(TreeTy* t) : itr(t) {}
+    friend class ImmutableSet<ValT,ValInfo>;
+  public:
+    iterator() {}
+    inline value_type_ref operator*() const { return itr->getValue(); }
+    inline iterator& operator++() { ++itr; return *this; }
+    inline iterator  operator++(int) { iterator tmp(*this); ++itr; return tmp; }
+    inline iterator& operator--() { --itr; return *this; }
+    inline iterator  operator--(int) { iterator tmp(*this); --itr; return tmp; }
+    inline bool operator==(const iterator& RHS) const { return RHS.itr == itr; }
+    inline bool operator!=(const iterator& RHS) const { return RHS.itr != itr; }
+    inline value_type *operator->() const { return &(operator*()); }
+  };
+
+  iterator begin() const { return iterator(Root); }
+  iterator end() const { return iterator(); }
+
+  //===--------------------------------------------------===//
+  // Utility methods.
+  //===--------------------------------------------------===//
+
+  unsigned getHeight() const { return Root ? Root->getHeight() : 0; }
+
+  static inline void Profile(FoldingSetNodeID& ID, const ImmutableSet& S) {
+    ID.AddPointer(S.Root);
+  }
+
+  inline void Profile(FoldingSetNodeID& ID) const {
+    return Profile(ID,*this);
+  }
+
+  //===--------------------------------------------------===//
+  // For testing.
+  //===--------------------------------------------------===//
+
+  void validateTree() const { if (Root) Root->validateTree(); }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/ADT/InMemoryStruct.h b/final/include/llvm/ADT/InMemoryStruct.h
new file mode 100644
index 00000000000..a56084501a6
--- /dev/null
+++ b/final/include/llvm/ADT/InMemoryStruct.h
@@ -0,0 +1,77 @@
+//===- InMemoryStruct.h - Indirect Struct Access Smart Pointer --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_INMEMORYSTRUCT_H
+#define LLVM_ADT_INMEMORYSTRUCT_H
+
+#include <cassert>
+
+namespace llvm {
+
+/// \brief Helper object for abstracting access to an in-memory structure which
+/// may require some kind of temporary storage.
+///
+/// This class is designed to be used for accessing file data structures which
+/// in the common case can be accessed from a direct pointer to a memory mapped
+/// object, but which in some cases may require indirect access to a temporary
+/// structure (which, for example, may have undergone endianness translation).
+template<typename T>
+class InMemoryStruct {
+  typedef T value_type;
+  typedef value_type &reference;
+  typedef value_type *pointer;
+  typedef const value_type &const_reference;
+  typedef const value_type *const_pointer;
+
+  /// \brief The smart pointer target.
+  value_type *Target;
+
+  /// \brief A temporary object which can be used as a target of the smart
+  /// pointer.
+  value_type Contents;
+
+private:
+
+public:
+  InMemoryStruct() : Target(0) {}
+  InMemoryStruct(reference Value) : Target(&Contents), Contents(Value) {}
+  InMemoryStruct(pointer Value) : Target(Value) {}
+  InMemoryStruct(const InMemoryStruct<T> &Value) { *this = Value; }
+  
+  void operator=(const InMemoryStruct<T> &Value) {
+    if (Value.Target != &Value.Contents) {
+      Target = Value.Target;
+    } else {
+      Target = &Contents;
+      Contents = Value.Contents;
+    }
+  }
+  
+  const_reference operator*() const {
+    assert(Target && "Cannot dereference null pointer");
+    return *Target;
+  }
+  reference operator*() {
+    assert(Target && "Cannot dereference null pointer");
+    return *Target;
+  }
+
+  const_pointer operator->() const {
+    return Target;
+  }
+  pointer operator->() {
+    return Target;
+  }
+
+  operator bool() const { return Target != 0; }
+};
+
+}
+
+#endif
diff --git a/final/include/llvm/ADT/IndexedMap.h b/final/include/llvm/ADT/IndexedMap.h
new file mode 100644
index 00000000000..87126ea4918
--- /dev/null
+++ b/final/include/llvm/ADT/IndexedMap.h
@@ -0,0 +1,87 @@
+//===- llvm/ADT/IndexedMap.h - An index map implementation ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an indexed map. The index map template takes two
+// types. The first is the mapped type and the second is a functor
+// that maps its argument to a size_t. On instantiation a "null" value
+// can be provided to be used as a "does not exist" indicator in the
+// map. A member function grow() is provided that given the value of
+// the maximally indexed key (the argument of the functor) makes sure
+// the map has enough space for it.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_INDEXEDMAP_H
+#define LLVM_ADT_INDEXEDMAP_H
+
+#include <cassert>
+#include <functional>
+#include <vector>
+
+namespace llvm {
+
+  struct IdentityFunctor : public std::unary_function<unsigned, unsigned> {
+    unsigned operator()(unsigned Index) const {
+      return Index;
+    }
+  };
+
+  template <typename T, typename ToIndexT = IdentityFunctor>
+  class IndexedMap {
+    typedef typename ToIndexT::argument_type IndexT;
+    typedef std::vector<T> StorageT;
+    StorageT storage_;
+    T nullVal_;
+    ToIndexT toIndex_;
+
+  public:
+    IndexedMap() : nullVal_(T()) { }
+
+    explicit IndexedMap(const T& val) : nullVal_(val) { }
+
+    typename StorageT::reference operator[](IndexT n) {
+      assert(toIndex_(n) < storage_.size() && "index out of bounds!");
+      return storage_[toIndex_(n)];
+    }
+
+    typename StorageT::const_reference operator[](IndexT n) const {
+      assert(toIndex_(n) < storage_.size() && "index out of bounds!");
+      return storage_[toIndex_(n)];
+    }
+
+    void reserve(typename StorageT::size_type s) {
+      storage_.reserve(s);
+    }
+
+    void resize(typename StorageT::size_type s) {
+      storage_.resize(s, nullVal_);
+    }
+
+    void clear() {
+      storage_.clear();
+    }
+
+    void grow(IndexT n) {
+      unsigned NewSize = toIndex_(n) + 1;
+      if (NewSize > storage_.size())
+        resize(NewSize);
+    }
+
+    bool inBounds(IndexT n) const {
+      return toIndex_(n) < storage_.size();
+    }
+
+    typename StorageT::size_type size() const {
+      return storage_.size();
+    }
+  };
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/ADT/IntEqClasses.h b/final/include/llvm/ADT/IntEqClasses.h
new file mode 100644
index 00000000000..8e75c48e376
--- /dev/null
+++ b/final/include/llvm/ADT/IntEqClasses.h
@@ -0,0 +1,88 @@
+//===-- llvm/ADT/IntEqClasses.h - Equiv. Classes of Integers ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Equivalence classes for small integers. This is a mapping of the integers
+// 0 .. N-1 into M equivalence classes numbered 0 .. M-1.
+//
+// Initially each integer has its own equivalence class. Classes are joined by
+// passing a representative member of each class to join().
+//
+// Once the classes are built, compress() will number them 0 .. M-1 and prevent
+// further changes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_INTEQCLASSES_H
+#define LLVM_ADT_INTEQCLASSES_H
+
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+class IntEqClasses {
+  /// EC - When uncompressed, map each integer to a smaller member of its
+  /// equivalence class. The class leader is the smallest member and maps to
+  /// itself.
+  ///
+  /// When compressed, EC[i] is the equivalence class of i.
+  SmallVector<unsigned, 8> EC;
+
+  /// NumClasses - The number of equivalence classes when compressed, or 0 when
+  /// uncompressed.
+  unsigned NumClasses;
+
+public:
+  /// IntEqClasses - Create an equivalence class mapping for 0 .. N-1.
+  IntEqClasses(unsigned N = 0) : NumClasses(0) { grow(N); }
+
+  /// grow - Increase capacity to hold 0 .. N-1, putting new integers in unique
+  /// equivalence classes.
+  /// This requires an uncompressed map.
+  void grow(unsigned N);
+
+  /// clear - Clear all classes so that grow() will assign a unique class to
+  /// every integer.
+  void clear() {
+    EC.clear();
+    NumClasses = 0;
+  }
+
+  /// join - Join the equivalence classes of a and b. After joining classes,
+  /// findLeader(a) == findLeader(b).
+  /// This requires an uncompressed map.
+  void join(unsigned a, unsigned b);
+
+  /// findLeader - Compute the leader of a's equivalence class. This is the
+  /// smallest member of the class.
+  /// This requires an uncompressed map.
+  unsigned findLeader(unsigned a) const;
+
+  /// compress - Compress equivalence classes by numbering them 0 .. M.
+  /// This makes the equivalence class map immutable.
+  void compress();
+
+  /// getNumClasses - Return the number of equivalence classes after compress()
+  /// was called.
+  unsigned getNumClasses() const { return NumClasses; }
+
+  /// operator[] - Return a's equivalence class number, 0 .. getNumClasses()-1.
+  /// This requires a compressed map.
+  unsigned operator[](unsigned a) const {
+    assert(NumClasses && "operator[] called before compress()");
+    return EC[a];
+  }
+
+  /// uncompress - Change back to the uncompressed representation that allows
+  /// editing.
+  void uncompress();
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/ADT/IntervalMap.h b/final/include/llvm/ADT/IntervalMap.h
new file mode 100644
index 00000000000..79f24d31c06
--- /dev/null
+++ b/final/include/llvm/ADT/IntervalMap.h
@@ -0,0 +1,2139 @@
+//===- llvm/ADT/IntervalMap.h - A sorted interval map -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a coalescing interval map for small objects.
+//
+// KeyT objects are mapped to ValT objects. Intervals of keys that map to the
+// same value are represented in a compressed form.
+//
+// Iterators provide ordered access to the compressed intervals rather than the
+// individual keys, and insert and erase operations use key intervals as well.
+//
+// Like SmallVector, IntervalMap will store the first N intervals in the map
+// object itself without any allocations. When space is exhausted it switches to
+// a B+-tree representation with very small overhead for small key and value
+// objects.
+//
+// A Traits class specifies how keys are compared. It also allows IntervalMap to
+// work with both closed and half-open intervals.
+//
+// Keys and values are not stored next to each other in a std::pair, so we don't
+// provide such a value_type. Dereferencing iterators only returns the mapped
+// value. The interval bounds are accessible through the start() and stop()
+// iterator methods.
+//
+// IntervalMap is optimized for small key and value objects, 4 or 8 bytes each
+// is the optimal size. For large objects use std::map instead.
+//
+//===----------------------------------------------------------------------===//
+//
+// Synopsis:
+//
+// template <typename KeyT, typename ValT, unsigned N, typename Traits>
+// class IntervalMap {
+// public:
+//   typedef KeyT key_type;
+//   typedef ValT mapped_type;
+//   typedef RecyclingAllocator<...> Allocator;
+//   class iterator;
+//   class const_iterator;
+//
+//   explicit IntervalMap(Allocator&);
+//   ~IntervalMap():
+//
+//   bool empty() const;
+//   KeyT start() const;
+//   KeyT stop() const;
+//   ValT lookup(KeyT x, Value NotFound = Value()) const;
+//
+//   const_iterator begin() const;
+//   const_iterator end() const;
+//   iterator begin();
+//   iterator end();
+//   const_iterator find(KeyT x) const;
+//   iterator find(KeyT x);
+//
+//   void insert(KeyT a, KeyT b, ValT y);
+//   void clear();
+// };
+//
+// template <typename KeyT, typename ValT, unsigned N, typename Traits>
+// class IntervalMap::const_iterator :
+//   public std::iterator<std::bidirectional_iterator_tag, ValT> {
+// public:
+//   bool operator==(const const_iterator &) const;
+//   bool operator!=(const const_iterator &) const;
+//   bool valid() const;
+//
+//   const KeyT &start() const;
+//   const KeyT &stop() const;
+//   const ValT &value() const;
+//   const ValT &operator*() const;
+//   const ValT *operator->() const;
+//
+//   const_iterator &operator++();
+//   const_iterator &operator++(int);
+//   const_iterator &operator--();
+//   const_iterator &operator--(int);
+//   void goToBegin();
+//   void goToEnd();
+//   void find(KeyT x);
+//   void advanceTo(KeyT x);
+// };
+//
+// template <typename KeyT, typename ValT, unsigned N, typename Traits>
+// class IntervalMap::iterator : public const_iterator {
+// public:
+//   void insert(KeyT a, KeyT b, Value y);
+//   void erase();
+// };
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_INTERVALMAP_H
+#define LLVM_ADT_INTERVALMAP_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/RecyclingAllocator.h"
+#include <iterator>
+
+namespace llvm {
+
+
+//===----------------------------------------------------------------------===//
+//---                              Key traits                              ---//
+//===----------------------------------------------------------------------===//
+//
+// The IntervalMap works with closed or half-open intervals.
+// Adjacent intervals that map to the same value are coalesced.
+//
+// The IntervalMapInfo traits class is used to determine if a key is contained
+// in an interval, and if two intervals are adjacent so they can be coalesced.
+// The provided implementation works for closed integer intervals, other keys
+// probably need a specialized version.
+//
+// The point x is contained in [a;b] when !startLess(x, a) && !stopLess(b, x).
+//
+// It is assumed that (a;b] half-open intervals are not used, only [a;b) is
+// allowed. This is so that stopLess(a, b) can be used to determine if two
+// intervals overlap.
+//
+//===----------------------------------------------------------------------===//
+
+template <typename T>
+struct IntervalMapInfo {
+
+  /// startLess - Return true if x is not in [a;b].
+  /// This is x < a both for closed intervals and for [a;b) half-open intervals.
+  static inline bool startLess(const T &x, const T &a) {
+    return x < a;
+  }
+
+  /// stopLess - Return true if x is not in [a;b].
+  /// This is b < x for a closed interval, b <= x for [a;b) half-open intervals.
+  static inline bool stopLess(const T &b, const T &x) {
+    return b < x;
+  }
+
+  /// adjacent - Return true when the intervals [x;a] and [b;y] can coalesce.
+  /// This is a+1 == b for closed intervals, a == b for half-open intervals.
+  static inline bool adjacent(const T &a, const T &b) {
+    return a+1 == b;
+  }
+
+};
+
+/// IntervalMapImpl - Namespace used for IntervalMap implementation details.
+/// It should be considered private to the implementation.
+namespace IntervalMapImpl {
+
+// Forward declarations.
+template <typename, typename, unsigned, typename> class LeafNode;
+template <typename, typename, unsigned, typename> class BranchNode;
+
+typedef std::pair<unsigned,unsigned> IdxPair;
+
+
+//===----------------------------------------------------------------------===//
+//---                    IntervalMapImpl::NodeBase                         ---//
+//===----------------------------------------------------------------------===//
+//
+// Both leaf and branch nodes store vectors of pairs.
+// Leaves store ((KeyT, KeyT), ValT) pairs, branches use (NodeRef, KeyT).
+//
+// Keys and values are stored in separate arrays to avoid padding caused by
+// different object alignments. This also helps improve locality of reference
+// when searching the keys.
+//
+// The nodes don't know how many elements they contain - that information is
+// stored elsewhere. Omitting the size field prevents padding and allows a node
+// to fill the allocated cache lines completely.
+//
+// These are typical key and value sizes, the node branching factor (N), and
+// wasted space when nodes are sized to fit in three cache lines (192 bytes):
+//
+//   T1  T2   N Waste  Used by
+//    4   4  24   0    Branch<4> (32-bit pointers)
+//    8   4  16   0    Leaf<4,4>, Branch<4>
+//    8   8  12   0    Leaf<4,8>, Branch<8>
+//   16   4   9  12    Leaf<8,4>
+//   16   8   8   0    Leaf<8,8>
+//
+//===----------------------------------------------------------------------===//
+
+template <typename T1, typename T2, unsigned N>
+class NodeBase {
+public:
+  enum { Capacity = N };
+
+  T1 first[N];
+  T2 second[N];
+
+  /// copy - Copy elements from another node.
+  /// @param Other Node elements are copied from.
+  /// @param i     Beginning of the source range in other.
+  /// @param j     Beginning of the destination range in this.
+  /// @param Count Number of elements to copy.
+  template <unsigned M>
+  void copy(const NodeBase<T1, T2, M> &Other, unsigned i,
+            unsigned j, unsigned Count) {
+    assert(i + Count <= M && "Invalid source range");
+    assert(j + Count <= N && "Invalid dest range");
+    for (unsigned e = i + Count; i != e; ++i, ++j) {
+      first[j]  = Other.first[i];
+      second[j] = Other.second[i];
+    }
+  }
+
+  /// moveLeft - Move elements to the left.
+  /// @param i     Beginning of the source range.
+  /// @param j     Beginning of the destination range.
+  /// @param Count Number of elements to copy.
+  void moveLeft(unsigned i, unsigned j, unsigned Count) {
+    assert(j <= i && "Use moveRight shift elements right");
+    copy(*this, i, j, Count);
+  }
+
+  /// moveRight - Move elements to the right.
+  /// @param i     Beginning of the source range.
+  /// @param j     Beginning of the destination range.
+  /// @param Count Number of elements to copy.
+  void moveRight(unsigned i, unsigned j, unsigned Count) {
+    assert(i <= j && "Use moveLeft shift elements left");
+    assert(j + Count <= N && "Invalid range");
+    while (Count--) {
+      first[j + Count]  = first[i + Count];
+      second[j + Count] = second[i + Count];
+    }
+  }
+
+  /// erase - Erase elements [i;j).
+  /// @param i    Beginning of the range to erase.
+  /// @param j    End of the range. (Exclusive).
+  /// @param Size Number of elements in node.
+  void erase(unsigned i, unsigned j, unsigned Size) {
+    moveLeft(j, i, Size - j);
+  }
+
+  /// erase - Erase element at i.
+  /// @param i    Index of element to erase.
+  /// @param Size Number of elements in node.
+  void erase(unsigned i, unsigned Size) {
+    erase(i, i+1, Size);
+  }
+
+  /// shift - Shift elements [i;size) 1 position to the right.
+  /// @param i    Beginning of the range to move.
+  /// @param Size Number of elements in node.
+  void shift(unsigned i, unsigned Size) {
+    moveRight(i, i + 1, Size - i);
+  }
+
+  /// transferToLeftSib - Transfer elements to a left sibling node.
+  /// @param Size  Number of elements in this.
+  /// @param Sib   Left sibling node.
+  /// @param SSize Number of elements in sib.
+  /// @param Count Number of elements to transfer.
+  void transferToLeftSib(unsigned Size, NodeBase &Sib, unsigned SSize,
+                         unsigned Count) {
+    Sib.copy(*this, 0, SSize, Count);
+    erase(0, Count, Size);
+  }
+
+  /// transferToRightSib - Transfer elements to a right sibling node.
+  /// @param Size  Number of elements in this.
+  /// @param Sib   Right sibling node.
+  /// @param SSize Number of elements in sib.
+  /// @param Count Number of elements to transfer.
+  void transferToRightSib(unsigned Size, NodeBase &Sib, unsigned SSize,
+                          unsigned Count) {
+    Sib.moveRight(0, Count, SSize);
+    Sib.copy(*this, Size-Count, 0, Count);
+  }
+
+  /// adjustFromLeftSib - Adjust the number if elements in this node by moving
+  /// elements to or from a left sibling node.
+  /// @param Size  Number of elements in this.
+  /// @param Sib   Right sibling node.
+  /// @param SSize Number of elements in sib.
+  /// @param Add   The number of elements to add to this node, possibly < 0.
+  /// @return      Number of elements added to this node, possibly negative.
+  int adjustFromLeftSib(unsigned Size, NodeBase &Sib, unsigned SSize, int Add) {
+    if (Add > 0) {
+      // We want to grow, copy from sib.
+      unsigned Count = std::min(std::min(unsigned(Add), SSize), N - Size);
+      Sib.transferToRightSib(SSize, *this, Size, Count);
+      return Count;
+    } else {
+      // We want to shrink, copy to sib.
+      unsigned Count = std::min(std::min(unsigned(-Add), Size), N - SSize);
+      transferToLeftSib(Size, Sib, SSize, Count);
+      return -Count;
+    }
+  }
+};
+
+/// IntervalMapImpl::adjustSiblingSizes - Move elements between sibling nodes.
+/// @param Node  Array of pointers to sibling nodes.
+/// @param Nodes Number of nodes.
+/// @param CurSize Array of current node sizes, will be overwritten.
+/// @param NewSize Array of desired node sizes.
+template <typename NodeT>
+void adjustSiblingSizes(NodeT *Node[], unsigned Nodes,
+                        unsigned CurSize[], const unsigned NewSize[]) {
+  // Move elements right.
+  for (int n = Nodes - 1; n; --n) {
+    if (CurSize[n] == NewSize[n])
+      continue;
+    for (int m = n - 1; m != -1; --m) {
+      int d = Node[n]->adjustFromLeftSib(CurSize[n], *Node[m], CurSize[m],
+                                         NewSize[n] - CurSize[n]);
+      CurSize[m] -= d;
+      CurSize[n] += d;
+      // Keep going if the current node was exhausted.
+      if (CurSize[n] >= NewSize[n])
+          break;
+    }
+  }
+
+  if (Nodes == 0)
+    return;
+
+  // Move elements left.
+  for (unsigned n = 0; n != Nodes - 1; ++n) {
+    if (CurSize[n] == NewSize[n])
+      continue;
+    for (unsigned m = n + 1; m != Nodes; ++m) {
+      int d = Node[m]->adjustFromLeftSib(CurSize[m], *Node[n], CurSize[n],
+                                        CurSize[n] -  NewSize[n]);
+      CurSize[m] += d;
+      CurSize[n] -= d;
+      // Keep going if the current node was exhausted.
+      if (CurSize[n] >= NewSize[n])
+          break;
+    }
+  }
+
+#ifndef NDEBUG
+  for (unsigned n = 0; n != Nodes; n++)
+    assert(CurSize[n] == NewSize[n] && "Insufficient element shuffle");
+#endif
+}
+
+/// IntervalMapImpl::distribute - Compute a new distribution of node elements
+/// after an overflow or underflow. Reserve space for a new element at Position,
+/// and compute the node that will hold Position after redistributing node
+/// elements.
+///
+/// It is required that
+///
+///   Elements == sum(CurSize), and
+///   Elements + Grow <= Nodes * Capacity.
+///
+/// NewSize[] will be filled in such that:
+///
+///   sum(NewSize) == Elements, and
+///   NewSize[i] <= Capacity.
+///
+/// The returned index is the node where Position will go, so:
+///
+///   sum(NewSize[0..idx-1]) <= Position
+///   sum(NewSize[0..idx])   >= Position
+///
+/// The last equality, sum(NewSize[0..idx]) == Position, can only happen when
+/// Grow is set and NewSize[idx] == Capacity-1. The index points to the node
+/// before the one holding the Position'th element where there is room for an
+/// insertion.
+///
+/// @param Nodes    The number of nodes.
+/// @param Elements Total elements in all nodes.
+/// @param Capacity The capacity of each node.
+/// @param CurSize  Array[Nodes] of current node sizes, or NULL.
+/// @param NewSize  Array[Nodes] to receive the new node sizes.
+/// @param Position Insert position.
+/// @param Grow     Reserve space for a new element at Position.
+/// @return         (node, offset) for Position.
+IdxPair distribute(unsigned Nodes, unsigned Elements, unsigned Capacity,
+                   const unsigned *CurSize, unsigned NewSize[],
+                   unsigned Position, bool Grow);
+
+
+//===----------------------------------------------------------------------===//
+//---                   IntervalMapImpl::NodeSizer                         ---//
+//===----------------------------------------------------------------------===//
+//
+// Compute node sizes from key and value types.
+//
+// The branching factors are chosen to make nodes fit in three cache lines.
+// This may not be possible if keys or values are very large. Such large objects
+// are handled correctly, but a std::map would probably give better performance.
+//
+//===----------------------------------------------------------------------===//
+
+enum {
+  // Cache line size. Most architectures have 32 or 64 byte cache lines.
+  // We use 64 bytes here because it provides good branching factors.
+  Log2CacheLine = 6,
+  CacheLineBytes = 1 << Log2CacheLine,
+  DesiredNodeBytes = 3 * CacheLineBytes
+};
+
+template <typename KeyT, typename ValT>
+struct NodeSizer {
+  enum {
+    // Compute the leaf node branching factor that makes a node fit in three
+    // cache lines. The branching factor must be at least 3, or some B+-tree
+    // balancing algorithms won't work.
+    // LeafSize can't be larger than CacheLineBytes. This is required by the
+    // PointerIntPair used by NodeRef.
+    DesiredLeafSize = DesiredNodeBytes /
+      static_cast<unsigned>(2*sizeof(KeyT)+sizeof(ValT)),
+    MinLeafSize = 3,
+    LeafSize = DesiredLeafSize > MinLeafSize ? DesiredLeafSize : MinLeafSize
+  };
+
+  typedef NodeBase<std::pair<KeyT, KeyT>, ValT, LeafSize> LeafBase;
+
+  enum {
+    // Now that we have the leaf branching factor, compute the actual allocation
+    // unit size by rounding up to a whole number of cache lines.
+    AllocBytes = (sizeof(LeafBase) + CacheLineBytes-1) & ~(CacheLineBytes-1),
+
+    // Determine the branching factor for branch nodes.
+    BranchSize = AllocBytes /
+      static_cast<unsigned>(sizeof(KeyT) + sizeof(void*))
+  };
+
+  /// Allocator - The recycling allocator used for both branch and leaf nodes.
+  /// This typedef is very likely to be identical for all IntervalMaps with
+  /// reasonably sized entries, so the same allocator can be shared among
+  /// different kinds of maps.
+  typedef RecyclingAllocator<BumpPtrAllocator, char,
+                             AllocBytes, CacheLineBytes> Allocator;
+
+};
+
+
+//===----------------------------------------------------------------------===//
+//---                     IntervalMapImpl::NodeRef                         ---//
+//===----------------------------------------------------------------------===//
+//
+// B+-tree nodes can be leaves or branches, so we need a polymorphic node
+// pointer that can point to both kinds.
+//
+// All nodes are cache line aligned and the low 6 bits of a node pointer are
+// always 0. These bits are used to store the number of elements in the
+// referenced node. Besides saving space, placing node sizes in the parents
+// allow tree balancing algorithms to run without faulting cache lines for nodes
+// that may not need to be modified.
+//
+// A NodeRef doesn't know whether it references a leaf node or a branch node.
+// It is the responsibility of the caller to use the correct types.
+//
+// Nodes are never supposed to be empty, and it is invalid to store a node size
+// of 0 in a NodeRef. The valid range of sizes is 1-64.
+//
+//===----------------------------------------------------------------------===//
+
+class NodeRef {
+  struct CacheAlignedPointerTraits {
+    static inline void *getAsVoidPointer(void *P) { return P; }
+    static inline void *getFromVoidPointer(void *P) { return P; }
+    enum { NumLowBitsAvailable = Log2CacheLine };
+  };
+  PointerIntPair<void*, Log2CacheLine, unsigned, CacheAlignedPointerTraits> pip;
+
+public:
+  /// NodeRef - Create a null ref.
+  NodeRef() {}
+
+  /// operator bool - Detect a null ref.
+  operator bool() const { return pip.getOpaqueValue(); }
+
+  /// NodeRef - Create a reference to the node p with n elements.
+  template <typename NodeT>
+  NodeRef(NodeT *p, unsigned n) : pip(p, n - 1) {
+    assert(n <= NodeT::Capacity && "Size too big for node");
+  }
+
+  /// size - Return the number of elements in the referenced node.
+  unsigned size() const { return pip.getInt() + 1; }
+
+  /// setSize - Update the node size.
+  void setSize(unsigned n) { pip.setInt(n - 1); }
+
+  /// subtree - Access the i'th subtree reference in a branch node.
+  /// This depends on branch nodes storing the NodeRef array as their first
+  /// member.
+  NodeRef &subtree(unsigned i) const {
+    return reinterpret_cast<NodeRef*>(pip.getPointer())[i];
+  }
+
+  /// get - Dereference as a NodeT reference.
+  template <typename NodeT>
+  NodeT &get() const {
+    return *reinterpret_cast<NodeT*>(pip.getPointer());
+  }
+
+  bool operator==(const NodeRef &RHS) const {
+    if (pip == RHS.pip)
+      return true;
+    assert(pip.getPointer() != RHS.pip.getPointer() && "Inconsistent NodeRefs");
+    return false;
+  }
+
+  bool operator!=(const NodeRef &RHS) const {
+    return !operator==(RHS);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//---                      IntervalMapImpl::LeafNode                       ---//
+//===----------------------------------------------------------------------===//
+//
+// Leaf nodes store up to N disjoint intervals with corresponding values.
+//
+// The intervals are kept sorted and fully coalesced so there are no adjacent
+// intervals mapping to the same value.
+//
+// These constraints are always satisfied:
+//
+// - Traits::stopLess(start(i), stop(i))    - Non-empty, sane intervals.
+//
+// - Traits::stopLess(stop(i), start(i + 1) - Sorted.
+//
+// - value(i) != value(i + 1) || !Traits::adjacent(stop(i), start(i + 1))
+//                                          - Fully coalesced.
+//
+//===----------------------------------------------------------------------===//
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+class LeafNode : public NodeBase<std::pair<KeyT, KeyT>, ValT, N> {
+public:
+  const KeyT &start(unsigned i) const { return this->first[i].first; }
+  const KeyT &stop(unsigned i) const { return this->first[i].second; }
+  const ValT &value(unsigned i) const { return this->second[i]; }
+
+  KeyT &start(unsigned i) { return this->first[i].first; }
+  KeyT &stop(unsigned i) { return this->first[i].second; }
+  ValT &value(unsigned i) { return this->second[i]; }
+
+  /// findFrom - Find the first interval after i that may contain x.
+  /// @param i    Starting index for the search.
+  /// @param Size Number of elements in node.
+  /// @param x    Key to search for.
+  /// @return     First index with !stopLess(key[i].stop, x), or size.
+  ///             This is the first interval that can possibly contain x.
+  unsigned findFrom(unsigned i, unsigned Size, KeyT x) const {
+    assert(i <= Size && Size <= N && "Bad indices");
+    assert((i == 0 || Traits::stopLess(stop(i - 1), x)) &&
+           "Index is past the needed point");
+    while (i != Size && Traits::stopLess(stop(i), x)) ++i;
+    return i;
+  }
+
+  /// safeFind - Find an interval that is known to exist. This is the same as
+  /// findFrom except is it assumed that x is at least within range of the last
+  /// interval.
+  /// @param i Starting index for the search.
+  /// @param x Key to search for.
+  /// @return  First index with !stopLess(key[i].stop, x), never size.
+  ///          This is the first interval that can possibly contain x.
+  unsigned safeFind(unsigned i, KeyT x) const {
+    assert(i < N && "Bad index");
+    assert((i == 0 || Traits::stopLess(stop(i - 1), x)) &&
+           "Index is past the needed point");
+    while (Traits::stopLess(stop(i), x)) ++i;
+    assert(i < N && "Unsafe intervals");
+    return i;
+  }
+
+  /// safeLookup - Lookup mapped value for a safe key.
+  /// It is assumed that x is within range of the last entry.
+  /// @param x        Key to search for.
+  /// @param NotFound Value to return if x is not in any interval.
+  /// @return         The mapped value at x or NotFound.
+  ValT safeLookup(KeyT x, ValT NotFound) const {
+    unsigned i = safeFind(0, x);
+    return Traits::startLess(x, start(i)) ? NotFound : value(i);
+  }
+
+  unsigned insertFrom(unsigned &Pos, unsigned Size, KeyT a, KeyT b, ValT y);
+};
+
+/// insertFrom - Add mapping of [a;b] to y if possible, coalescing as much as
+/// possible. This may cause the node to grow by 1, or it may cause the node
+/// to shrink because of coalescing.
+/// @param i    Starting index = insertFrom(0, size, a)
+/// @param Size Number of elements in node.
+/// @param a    Interval start.
+/// @param b    Interval stop.
+/// @param y    Value be mapped.
+/// @return     (insert position, new size), or (i, Capacity+1) on overflow.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+unsigned LeafNode<KeyT, ValT, N, Traits>::
+insertFrom(unsigned &Pos, unsigned Size, KeyT a, KeyT b, ValT y) {
+  unsigned i = Pos;
+  assert(i <= Size && Size <= N && "Invalid index");
+  assert(!Traits::stopLess(b, a) && "Invalid interval");
+
+  // Verify the findFrom invariant.
+  assert((i == 0 || Traits::stopLess(stop(i - 1), a)));
+  assert((i == Size || !Traits::stopLess(stop(i), a)));
+  assert((i == Size || Traits::stopLess(b, start(i))) && "Overlapping insert");
+
+  // Coalesce with previous interval.
+  if (i && value(i - 1) == y && Traits::adjacent(stop(i - 1), a)) {
+    Pos = i - 1;
+    // Also coalesce with next interval?
+    if (i != Size && value(i) == y && Traits::adjacent(b, start(i))) {
+      stop(i - 1) = stop(i);
+      this->erase(i, Size);
+      return Size - 1;
+    }
+    stop(i - 1) = b;
+    return Size;
+  }
+
+  // Detect overflow.
+  if (i == N)
+    return N + 1;
+
+  // Add new interval at end.
+  if (i == Size) {
+    start(i) = a;
+    stop(i) = b;
+    value(i) = y;
+    return Size + 1;
+  }
+
+  // Try to coalesce with following interval.
+  if (value(i) == y && Traits::adjacent(b, start(i))) {
+    start(i) = a;
+    return Size;
+  }
+
+  // We must insert before i. Detect overflow.
+  if (Size == N)
+    return N + 1;
+
+  // Insert before i.
+  this->shift(i, Size);
+  start(i) = a;
+  stop(i) = b;
+  value(i) = y;
+  return Size + 1;
+}
+
+
+//===----------------------------------------------------------------------===//
+//---                   IntervalMapImpl::BranchNode                        ---//
+//===----------------------------------------------------------------------===//
+//
+// A branch node stores references to 1--N subtrees all of the same height.
+//
+// The key array in a branch node holds the rightmost stop key of each subtree.
+// It is redundant to store the last stop key since it can be found in the
+// parent node, but doing so makes tree balancing a lot simpler.
+//
+// It is unusual for a branch node to only have one subtree, but it can happen
+// in the root node if it is smaller than the normal nodes.
+//
+// When all of the leaf nodes from all the subtrees are concatenated, they must
+// satisfy the same constraints as a single leaf node. They must be sorted,
+// sane, and fully coalesced.
+//
+//===----------------------------------------------------------------------===//
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+class BranchNode : public NodeBase<NodeRef, KeyT, N> {
+public:
+  const KeyT &stop(unsigned i) const { return this->second[i]; }
+  const NodeRef &subtree(unsigned i) const { return this->first[i]; }
+
+  KeyT &stop(unsigned i) { return this->second[i]; }
+  NodeRef &subtree(unsigned i) { return this->first[i]; }
+
+  /// findFrom - Find the first subtree after i that may contain x.
+  /// @param i    Starting index for the search.
+  /// @param Size Number of elements in node.
+  /// @param x    Key to search for.
+  /// @return     First index with !stopLess(key[i], x), or size.
+  ///             This is the first subtree that can possibly contain x.
+  unsigned findFrom(unsigned i, unsigned Size, KeyT x) const {
+    assert(i <= Size && Size <= N && "Bad indices");
+    assert((i == 0 || Traits::stopLess(stop(i - 1), x)) &&
+           "Index to findFrom is past the needed point");
+    while (i != Size && Traits::stopLess(stop(i), x)) ++i;
+    return i;
+  }
+
+  /// safeFind - Find a subtree that is known to exist. This is the same as
+  /// findFrom except is it assumed that x is in range.
+  /// @param i Starting index for the search.
+  /// @param x Key to search for.
+  /// @return  First index with !stopLess(key[i], x), never size.
+  ///          This is the first subtree that can possibly contain x.
+  unsigned safeFind(unsigned i, KeyT x) const {
+    assert(i < N && "Bad index");
+    assert((i == 0 || Traits::stopLess(stop(i - 1), x)) &&
+           "Index is past the needed point");
+    while (Traits::stopLess(stop(i), x)) ++i;
+    assert(i < N && "Unsafe intervals");
+    return i;
+  }
+
+  /// safeLookup - Get the subtree containing x, Assuming that x is in range.
+  /// @param x Key to search for.
+  /// @return  Subtree containing x
+  NodeRef safeLookup(KeyT x) const {
+    return subtree(safeFind(0, x));
+  }
+
+  /// insert - Insert a new (subtree, stop) pair.
+  /// @param i    Insert position, following entries will be shifted.
+  /// @param Size Number of elements in node.
+  /// @param Node Subtree to insert.
+  /// @param Stop Last key in subtree.
+  void insert(unsigned i, unsigned Size, NodeRef Node, KeyT Stop) {
+    assert(Size < N && "branch node overflow");
+    assert(i <= Size && "Bad insert position");
+    this->shift(i, Size);
+    subtree(i) = Node;
+    stop(i) = Stop;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//---                         IntervalMapImpl::Path                        ---//
+//===----------------------------------------------------------------------===//
+//
+// A Path is used by iterators to represent a position in a B+-tree, and the
+// path to get there from the root.
+//
+// The Path class also constains the tree navigation code that doesn't have to
+// be templatized.
+//
+//===----------------------------------------------------------------------===//
+
+class Path {
+  /// Entry - Each step in the path is a node pointer and an offset into that
+  /// node.
+  struct Entry {
+    void *node;
+    unsigned size;
+    unsigned offset;
+
+    Entry(void *Node, unsigned Size, unsigned Offset)
+      : node(Node), size(Size), offset(Offset) {}
+
+    Entry(NodeRef Node, unsigned Offset)
+      : node(&Node.subtree(0)), size(Node.size()), offset(Offset) {}
+
+    NodeRef &subtree(unsigned i) const {
+      return reinterpret_cast<NodeRef*>(node)[i];
+    }
+  };
+
+  /// path - The path entries, path[0] is the root node, path.back() is a leaf.
+  SmallVector<Entry, 4> path;
+
+public:
+  // Node accessors.
+  template <typename NodeT> NodeT &node(unsigned Level) const {
+    return *reinterpret_cast<NodeT*>(path[Level].node);
+  }
+  unsigned size(unsigned Level) const { return path[Level].size; }
+  unsigned offset(unsigned Level) const { return path[Level].offset; }
+  unsigned &offset(unsigned Level) { return path[Level].offset; }
+
+  // Leaf accessors.
+  template <typename NodeT> NodeT &leaf() const {
+    return *reinterpret_cast<NodeT*>(path.back().node);
+  }
+  unsigned leafSize() const { return path.back().size; }
+  unsigned leafOffset() const { return path.back().offset; }
+  unsigned &leafOffset() { return path.back().offset; }
+
+  /// valid - Return true if path is at a valid node, not at end().
+  bool valid() const {
+    return !path.empty() && path.front().offset < path.front().size;
+  }
+
+  /// height - Return the height of the tree corresponding to this path.
+  /// This matches map->height in a full path.
+  unsigned height() const { return path.size() - 1; }
+
+  /// subtree - Get the subtree referenced from Level. When the path is
+  /// consistent, node(Level + 1) == subtree(Level).
+  /// @param Level 0..height-1. The leaves have no subtrees.
+  NodeRef &subtree(unsigned Level) const {
+    return path[Level].subtree(path[Level].offset);
+  }
+
+  /// reset - Reset cached information about node(Level) from subtree(Level -1).
+  /// @param Level 1..height. THe node to update after parent node changed.
+  void reset(unsigned Level) {
+    path[Level] = Entry(subtree(Level - 1), offset(Level));
+  }
+
+  /// push - Add entry to path.
+  /// @param Node Node to add, should be subtree(path.size()-1).
+  /// @param Offset Offset into Node.
+  void push(NodeRef Node, unsigned Offset) {
+    path.push_back(Entry(Node, Offset));
+  }
+
+  /// pop - Remove the last path entry.
+  void pop() {
+    path.pop_back();
+  }
+
+  /// setSize - Set the size of a node both in the path and in the tree.
+  /// @param Level 0..height. Note that setting the root size won't change
+  ///              map->rootSize.
+  /// @param Size New node size.
+  void setSize(unsigned Level, unsigned Size) {
+    path[Level].size = Size;
+    if (Level)
+      subtree(Level - 1).setSize(Size);
+  }
+
+  /// setRoot - Clear the path and set a new root node.
+  /// @param Node New root node.
+  /// @param Size New root size.
+  /// @param Offset Offset into root node.
+  void setRoot(void *Node, unsigned Size, unsigned Offset) {
+    path.clear();
+    path.push_back(Entry(Node, Size, Offset));
+  }
+
+  /// replaceRoot - Replace the current root node with two new entries after the
+  /// tree height has increased.
+  /// @param Root The new root node.
+  /// @param Size Number of entries in the new root.
+  /// @param Offsets Offsets into the root and first branch nodes.
+  void replaceRoot(void *Root, unsigned Size, IdxPair Offsets);
+
+  /// getLeftSibling - Get the left sibling node at Level, or a null NodeRef.
+  /// @param Level Get the sibling to node(Level).
+  /// @return Left sibling, or NodeRef().
+  NodeRef getLeftSibling(unsigned Level) const;
+
+  /// moveLeft - Move path to the left sibling at Level. Leave nodes below Level
+  /// unaltered.
+  /// @param Level Move node(Level).
+  void moveLeft(unsigned Level);
+
+  /// fillLeft - Grow path to Height by taking leftmost branches.
+  /// @param Height The target height.
+  void fillLeft(unsigned Height) {
+    while (height() < Height)
+      push(subtree(height()), 0);
+  }
+
+  /// getLeftSibling - Get the left sibling node at Level, or a null NodeRef.
+  /// @param Level Get the sinbling to node(Level).
+  /// @return Left sibling, or NodeRef().
+  NodeRef getRightSibling(unsigned Level) const;
+
+  /// moveRight - Move path to the left sibling at Level. Leave nodes below
+  /// Level unaltered.
+  /// @param Level Move node(Level).
+  void moveRight(unsigned Level);
+
+  /// atBegin - Return true if path is at begin().
+  bool atBegin() const {
+    for (unsigned i = 0, e = path.size(); i != e; ++i)
+      if (path[i].offset != 0)
+        return false;
+    return true;
+  }
+
+  /// atLastEntry - Return true if the path is at the last entry of the node at
+  /// Level.
+  /// @param Level Node to examine.
+  bool atLastEntry(unsigned Level) const {
+    return path[Level].offset == path[Level].size - 1;
+  }
+
+  /// legalizeForInsert - Prepare the path for an insertion at Level. When the
+  /// path is at end(), node(Level) may not be a legal node. legalizeForInsert
+  /// ensures that node(Level) is real by moving back to the last node at Level,
+  /// and setting offset(Level) to size(Level) if required.
+  /// @param Level The level where an insertion is about to take place.
+  void legalizeForInsert(unsigned Level) {
+    if (valid())
+      return;
+    moveLeft(Level);
+    ++path[Level].offset;
+  }
+};
+
+} // namespace IntervalMapImpl
+
+
+//===----------------------------------------------------------------------===//
+//---                          IntervalMap                                ----//
+//===----------------------------------------------------------------------===//
+
+template <typename KeyT, typename ValT,
+          unsigned N = IntervalMapImpl::NodeSizer<KeyT, ValT>::LeafSize,
+          typename Traits = IntervalMapInfo<KeyT> >
+class IntervalMap {
+  typedef IntervalMapImpl::NodeSizer<KeyT, ValT> Sizer;
+  typedef IntervalMapImpl::LeafNode<KeyT, ValT, Sizer::LeafSize, Traits> Leaf;
+  typedef IntervalMapImpl::BranchNode<KeyT, ValT, Sizer::BranchSize, Traits>
+    Branch;
+  typedef IntervalMapImpl::LeafNode<KeyT, ValT, N, Traits> RootLeaf;
+  typedef IntervalMapImpl::IdxPair IdxPair;
+
+  // The RootLeaf capacity is given as a template parameter. We must compute the
+  // corresponding RootBranch capacity.
+  enum {
+    DesiredRootBranchCap = (sizeof(RootLeaf) - sizeof(KeyT)) /
+      (sizeof(KeyT) + sizeof(IntervalMapImpl::NodeRef)),
+    RootBranchCap = DesiredRootBranchCap ? DesiredRootBranchCap : 1
+  };
+
+  typedef IntervalMapImpl::BranchNode<KeyT, ValT, RootBranchCap, Traits>
+    RootBranch;
+
+  // When branched, we store a global start key as well as the branch node.
+  struct RootBranchData {
+    KeyT start;
+    RootBranch node;
+  };
+
+  enum {
+    RootDataSize = sizeof(RootBranchData) > sizeof(RootLeaf) ?
+                   sizeof(RootBranchData) : sizeof(RootLeaf)
+  };
+
+public:
+  typedef typename Sizer::Allocator Allocator;
+  typedef KeyT KeyType;
+  typedef ValT ValueType;
+  typedef Traits KeyTraits;
+
+private:
+  // The root data is either a RootLeaf or a RootBranchData instance.
+  // We can't put them in a union since C++03 doesn't allow non-trivial
+  // constructors in unions.
+  // Instead, we use a char array with pointer alignment. The alignment is
+  // ensured by the allocator member in the class, but still verified in the
+  // constructor. We don't support keys or values that are more aligned than a
+  // pointer.
+  char data[RootDataSize];
+
+  // Tree height.
+  // 0: Leaves in root.
+  // 1: Root points to leaf.
+  // 2: root->branch->leaf ...
+  unsigned height;
+
+  // Number of entries in the root node.
+  unsigned rootSize;
+
+  // Allocator used for creating external nodes.
+  Allocator &allocator;
+
+  /// dataAs - Represent data as a node type without breaking aliasing rules.
+  template <typename T>
+  T &dataAs() const {
+    union {
+      const char *d;
+      T *t;
+    } u;
+    u.d = data;
+    return *u.t;
+  }
+
+  const RootLeaf &rootLeaf() const {
+    assert(!branched() && "Cannot acces leaf data in branched root");
+    return dataAs<RootLeaf>();
+  }
+  RootLeaf &rootLeaf() {
+    assert(!branched() && "Cannot acces leaf data in branched root");
+    return dataAs<RootLeaf>();
+  }
+  RootBranchData &rootBranchData() const {
+    assert(branched() && "Cannot access branch data in non-branched root");
+    return dataAs<RootBranchData>();
+  }
+  RootBranchData &rootBranchData() {
+    assert(branched() && "Cannot access branch data in non-branched root");
+    return dataAs<RootBranchData>();
+  }
+  const RootBranch &rootBranch() const { return rootBranchData().node; }
+  RootBranch &rootBranch()             { return rootBranchData().node; }
+  KeyT rootBranchStart() const { return rootBranchData().start; }
+  KeyT &rootBranchStart()      { return rootBranchData().start; }
+
+  template <typename NodeT> NodeT *newNode() {
+    return new(allocator.template Allocate<NodeT>()) NodeT();
+  }
+
+  template <typename NodeT> void deleteNode(NodeT *P) {
+    P->~NodeT();
+    allocator.Deallocate(P);
+  }
+
+  IdxPair branchRoot(unsigned Position);
+  IdxPair splitRoot(unsigned Position);
+
+  void switchRootToBranch() {
+    rootLeaf().~RootLeaf();
+    height = 1;
+    new (&rootBranchData()) RootBranchData();
+  }
+
+  void switchRootToLeaf() {
+    rootBranchData().~RootBranchData();
+    height = 0;
+    new(&rootLeaf()) RootLeaf();
+  }
+
+  bool branched() const { return height > 0; }
+
+  ValT treeSafeLookup(KeyT x, ValT NotFound) const;
+  void visitNodes(void (IntervalMap::*f)(IntervalMapImpl::NodeRef,
+                  unsigned Level));
+  void deleteNode(IntervalMapImpl::NodeRef Node, unsigned Level);
+
+public:
+  explicit IntervalMap(Allocator &a) : height(0), rootSize(0), allocator(a) {
+    assert((uintptr_t(data) & (alignOf<RootLeaf>() - 1)) == 0 &&
+           "Insufficient alignment");
+    new(&rootLeaf()) RootLeaf();
+  }
+
+  ~IntervalMap() {
+    clear();
+    rootLeaf().~RootLeaf();
+  }
+
+  /// empty -  Return true when no intervals are mapped.
+  bool empty() const {
+    return rootSize == 0;
+  }
+
+  /// start - Return the smallest mapped key in a non-empty map.
+  KeyT start() const {
+    assert(!empty() && "Empty IntervalMap has no start");
+    return !branched() ? rootLeaf().start(0) : rootBranchStart();
+  }
+
+  /// stop - Return the largest mapped key in a non-empty map.
+  KeyT stop() const {
+    assert(!empty() && "Empty IntervalMap has no stop");
+    return !branched() ? rootLeaf().stop(rootSize - 1) :
+                         rootBranch().stop(rootSize - 1);
+  }
+
+  /// lookup - Return the mapped value at x or NotFound.
+  ValT lookup(KeyT x, ValT NotFound = ValT()) const {
+    if (empty() || Traits::startLess(x, start()) || Traits::stopLess(stop(), x))
+      return NotFound;
+    return branched() ? treeSafeLookup(x, NotFound) :
+                        rootLeaf().safeLookup(x, NotFound);
+  }
+
+  /// insert - Add a mapping of [a;b] to y, coalesce with adjacent intervals.
+  /// It is assumed that no key in the interval is mapped to another value, but
+  /// overlapping intervals already mapped to y will be coalesced.
+  void insert(KeyT a, KeyT b, ValT y) {
+    if (branched() || rootSize == RootLeaf::Capacity)
+      return find(a).insert(a, b, y);
+
+    // Easy insert into root leaf.
+    unsigned p = rootLeaf().findFrom(0, rootSize, a);
+    rootSize = rootLeaf().insertFrom(p, rootSize, a, b, y);
+  }
+
+  /// clear - Remove all entries.
+  void clear();
+
+  class const_iterator;
+  class iterator;
+  friend class const_iterator;
+  friend class iterator;
+
+  const_iterator begin() const {
+    const_iterator I(*this);
+    I.goToBegin();
+    return I;
+  }
+
+  iterator begin() {
+    iterator I(*this);
+    I.goToBegin();
+    return I;
+  }
+
+  const_iterator end() const {
+    const_iterator I(*this);
+    I.goToEnd();
+    return I;
+  }
+
+  iterator end() {
+    iterator I(*this);
+    I.goToEnd();
+    return I;
+  }
+
+  /// find - Return an iterator pointing to the first interval ending at or
+  /// after x, or end().
+  const_iterator find(KeyT x) const {
+    const_iterator I(*this);
+    I.find(x);
+    return I;
+  }
+
+  iterator find(KeyT x) {
+    iterator I(*this);
+    I.find(x);
+    return I;
+  }
+};
+
+/// treeSafeLookup - Return the mapped value at x or NotFound, assuming a
+/// branched root.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+ValT IntervalMap<KeyT, ValT, N, Traits>::
+treeSafeLookup(KeyT x, ValT NotFound) const {
+  assert(branched() && "treeLookup assumes a branched root");
+
+  IntervalMapImpl::NodeRef NR = rootBranch().safeLookup(x);
+  for (unsigned h = height-1; h; --h)
+    NR = NR.get<Branch>().safeLookup(x);
+  return NR.get<Leaf>().safeLookup(x, NotFound);
+}
+
+
+// branchRoot - Switch from a leaf root to a branched root.
+// Return the new (root offset, node offset) corresponding to Position.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+IntervalMapImpl::IdxPair IntervalMap<KeyT, ValT, N, Traits>::
+branchRoot(unsigned Position) {
+  using namespace IntervalMapImpl;
+  // How many external leaf nodes to hold RootLeaf+1?
+  const unsigned Nodes = RootLeaf::Capacity / Leaf::Capacity + 1;
+
+  // Compute element distribution among new nodes.
+  unsigned size[Nodes];
+  IdxPair NewOffset(0, Position);
+
+  // Is is very common for the root node to be smaller than external nodes.
+  if (Nodes == 1)
+    size[0] = rootSize;
+  else
+    NewOffset = distribute(Nodes, rootSize, Leaf::Capacity,  NULL, size,
+                           Position, true);
+
+  // Allocate new nodes.
+  unsigned pos = 0;
+  NodeRef node[Nodes];
+  for (unsigned n = 0; n != Nodes; ++n) {
+    Leaf *L = newNode<Leaf>();
+    L->copy(rootLeaf(), pos, 0, size[n]);
+    node[n] = NodeRef(L, size[n]);
+    pos += size[n];
+  }
+
+  // Destroy the old leaf node, construct branch node instead.
+  switchRootToBranch();
+  for (unsigned n = 0; n != Nodes; ++n) {
+    rootBranch().stop(n) = node[n].template get<Leaf>().stop(size[n]-1);
+    rootBranch().subtree(n) = node[n];
+  }
+  rootBranchStart() = node[0].template get<Leaf>().start(0);
+  rootSize = Nodes;
+  return NewOffset;
+}
+
+// splitRoot - Split the current BranchRoot into multiple Branch nodes.
+// Return the new (root offset, node offset) corresponding to Position.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+IntervalMapImpl::IdxPair IntervalMap<KeyT, ValT, N, Traits>::
+splitRoot(unsigned Position) {
+  using namespace IntervalMapImpl;
+  // How many external leaf nodes to hold RootBranch+1?
+  const unsigned Nodes = RootBranch::Capacity / Branch::Capacity + 1;
+
+  // Compute element distribution among new nodes.
+  unsigned Size[Nodes];
+  IdxPair NewOffset(0, Position);
+
+  // Is is very common for the root node to be smaller than external nodes.
+  if (Nodes == 1)
+    Size[0] = rootSize;
+  else
+    NewOffset = distribute(Nodes, rootSize, Leaf::Capacity,  NULL, Size,
+                           Position, true);
+
+  // Allocate new nodes.
+  unsigned Pos = 0;
+  NodeRef Node[Nodes];
+  for (unsigned n = 0; n != Nodes; ++n) {
+    Branch *B = newNode<Branch>();
+    B->copy(rootBranch(), Pos, 0, Size[n]);
+    Node[n] = NodeRef(B, Size[n]);
+    Pos += Size[n];
+  }
+
+  for (unsigned n = 0; n != Nodes; ++n) {
+    rootBranch().stop(n) = Node[n].template get<Branch>().stop(Size[n]-1);
+    rootBranch().subtree(n) = Node[n];
+  }
+  rootSize = Nodes;
+  ++height;
+  return NewOffset;
+}
+
+/// visitNodes - Visit each external node.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+visitNodes(void (IntervalMap::*f)(IntervalMapImpl::NodeRef, unsigned Height)) {
+  if (!branched())
+    return;
+  SmallVector<IntervalMapImpl::NodeRef, 4> Refs, NextRefs;
+
+  // Collect level 0 nodes from the root.
+  for (unsigned i = 0; i != rootSize; ++i)
+    Refs.push_back(rootBranch().subtree(i));
+
+  // Visit all branch nodes.
+  for (unsigned h = height - 1; h; --h) {
+    for (unsigned i = 0, e = Refs.size(); i != e; ++i) {
+      for (unsigned j = 0, s = Refs[i].size(); j != s; ++j)
+        NextRefs.push_back(Refs[i].subtree(j));
+      (this->*f)(Refs[i], h);
+    }
+    Refs.clear();
+    Refs.swap(NextRefs);
+  }
+
+  // Visit all leaf nodes.
+  for (unsigned i = 0, e = Refs.size(); i != e; ++i)
+    (this->*f)(Refs[i], 0);
+}
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+deleteNode(IntervalMapImpl::NodeRef Node, unsigned Level) {
+  if (Level)
+    deleteNode(&Node.get<Branch>());
+  else
+    deleteNode(&Node.get<Leaf>());
+}
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+clear() {
+  if (branched()) {
+    visitNodes(&IntervalMap::deleteNode);
+    switchRootToLeaf();
+  }
+  rootSize = 0;
+}
+
+//===----------------------------------------------------------------------===//
+//---                   IntervalMap::const_iterator                       ----//
+//===----------------------------------------------------------------------===//
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+class IntervalMap<KeyT, ValT, N, Traits>::const_iterator :
+  public std::iterator<std::bidirectional_iterator_tag, ValT> {
+protected:
+  friend class IntervalMap;
+
+  // The map referred to.
+  IntervalMap *map;
+
+  // We store a full path from the root to the current position.
+  // The path may be partially filled, but never between iterator calls.
+  IntervalMapImpl::Path path;
+
+  explicit const_iterator(const IntervalMap &map) :
+    map(const_cast<IntervalMap*>(&map)) {}
+
+  bool branched() const {
+    assert(map && "Invalid iterator");
+    return map->branched();
+  }
+
+  void setRoot(unsigned Offset) {
+    if (branched())
+      path.setRoot(&map->rootBranch(), map->rootSize, Offset);
+    else
+      path.setRoot(&map->rootLeaf(), map->rootSize, Offset);
+  }
+
+  void pathFillFind(KeyT x);
+  void treeFind(KeyT x);
+  void treeAdvanceTo(KeyT x);
+
+  /// unsafeStart - Writable access to start() for iterator.
+  KeyT &unsafeStart() const {
+    assert(valid() && "Cannot access invalid iterator");
+    return branched() ? path.leaf<Leaf>().start(path.leafOffset()) :
+                        path.leaf<RootLeaf>().start(path.leafOffset());
+  }
+
+  /// unsafeStop - Writable access to stop() for iterator.
+  KeyT &unsafeStop() const {
+    assert(valid() && "Cannot access invalid iterator");
+    return branched() ? path.leaf<Leaf>().stop(path.leafOffset()) :
+                        path.leaf<RootLeaf>().stop(path.leafOffset());
+  }
+
+  /// unsafeValue - Writable access to value() for iterator.
+  ValT &unsafeValue() const {
+    assert(valid() && "Cannot access invalid iterator");
+    return branched() ? path.leaf<Leaf>().value(path.leafOffset()) :
+                        path.leaf<RootLeaf>().value(path.leafOffset());
+  }
+
+public:
+  /// const_iterator - Create an iterator that isn't pointing anywhere.
+  const_iterator() : map(0) {}
+
+  /// valid - Return true if the current position is valid, false for end().
+  bool valid() const { return path.valid(); }
+
+  /// start - Return the beginning of the current interval.
+  const KeyT &start() const { return unsafeStart(); }
+
+  /// stop - Return the end of the current interval.
+  const KeyT &stop() const { return unsafeStop(); }
+
+  /// value - Return the mapped value at the current interval.
+  const ValT &value() const { return unsafeValue(); }
+
+  const ValT &operator*() const { return value(); }
+
+  bool operator==(const const_iterator &RHS) const {
+    assert(map == RHS.map && "Cannot compare iterators from different maps");
+    if (!valid())
+      return !RHS.valid();
+    if (path.leafOffset() != RHS.path.leafOffset())
+      return false;
+    return &path.template leaf<Leaf>() == &RHS.path.template leaf<Leaf>();
+  }
+
+  bool operator!=(const const_iterator &RHS) const {
+    return !operator==(RHS);
+  }
+
+  /// goToBegin - Move to the first interval in map.
+  void goToBegin() {
+    setRoot(0);
+    if (branched())
+      path.fillLeft(map->height);
+  }
+
+  /// goToEnd - Move beyond the last interval in map.
+  void goToEnd() {
+    setRoot(map->rootSize);
+  }
+
+  /// preincrement - move to the next interval.
+  const_iterator &operator++() {
+    assert(valid() && "Cannot increment end()");
+    if (++path.leafOffset() == path.leafSize() && branched())
+      path.moveRight(map->height);
+    return *this;
+  }
+
+  /// postincrement - Dont do that!
+  const_iterator operator++(int) {
+    const_iterator tmp = *this;
+    operator++();
+    return tmp;
+  }
+
+  /// predecrement - move to the previous interval.
+  const_iterator &operator--() {
+    if (path.leafOffset() && (valid() || !branched()))
+      --path.leafOffset();
+    else
+      path.moveLeft(map->height);
+    return *this;
+  }
+
+  /// postdecrement - Dont do that!
+  const_iterator operator--(int) {
+    const_iterator tmp = *this;
+    operator--();
+    return tmp;
+  }
+
+  /// find - Move to the first interval with stop >= x, or end().
+  /// This is a full search from the root, the current position is ignored.
+  void find(KeyT x) {
+    if (branched())
+      treeFind(x);
+    else
+      setRoot(map->rootLeaf().findFrom(0, map->rootSize, x));
+  }
+
+  /// advanceTo - Move to the first interval with stop >= x, or end().
+  /// The search is started from the current position, and no earlier positions
+  /// can be found. This is much faster than find() for small moves.
+  void advanceTo(KeyT x) {
+    if (!valid())
+      return;
+    if (branched())
+      treeAdvanceTo(x);
+    else
+      path.leafOffset() =
+        map->rootLeaf().findFrom(path.leafOffset(), map->rootSize, x);
+  }
+
+};
+
+/// pathFillFind - Complete path by searching for x.
+/// @param x Key to search for.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+const_iterator::pathFillFind(KeyT x) {
+  IntervalMapImpl::NodeRef NR = path.subtree(path.height());
+  for (unsigned i = map->height - path.height() - 1; i; --i) {
+    unsigned p = NR.get<Branch>().safeFind(0, x);
+    path.push(NR, p);
+    NR = NR.subtree(p);
+  }
+  path.push(NR, NR.get<Leaf>().safeFind(0, x));
+}
+
+/// treeFind - Find in a branched tree.
+/// @param x Key to search for.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+const_iterator::treeFind(KeyT x) {
+  setRoot(map->rootBranch().findFrom(0, map->rootSize, x));
+  if (valid())
+    pathFillFind(x);
+}
+
+/// treeAdvanceTo - Find position after the current one.
+/// @param x Key to search for.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+const_iterator::treeAdvanceTo(KeyT x) {
+  // Can we stay on the same leaf node?
+  if (!Traits::stopLess(path.leaf<Leaf>().stop(path.leafSize() - 1), x)) {
+    path.leafOffset() = path.leaf<Leaf>().safeFind(path.leafOffset(), x);
+    return;
+  }
+
+  // Drop the current leaf.
+  path.pop();
+
+  // Search towards the root for a usable subtree.
+  if (path.height()) {
+    for (unsigned l = path.height() - 1; l; --l) {
+      if (!Traits::stopLess(path.node<Branch>(l).stop(path.offset(l)), x)) {
+        // The branch node at l+1 is usable
+        path.offset(l + 1) =
+          path.node<Branch>(l + 1).safeFind(path.offset(l + 1), x);
+        return pathFillFind(x);
+      }
+      path.pop();
+    }
+    // Is the level-1 Branch usable?
+    if (!Traits::stopLess(map->rootBranch().stop(path.offset(0)), x)) {
+      path.offset(1) = path.node<Branch>(1).safeFind(path.offset(1), x);
+      return pathFillFind(x);
+    }
+  }
+
+  // We reached the root.
+  setRoot(map->rootBranch().findFrom(path.offset(0), map->rootSize, x));
+  if (valid())
+    pathFillFind(x);
+}
+
+//===----------------------------------------------------------------------===//
+//---                       IntervalMap::iterator                         ----//
+//===----------------------------------------------------------------------===//
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+class IntervalMap<KeyT, ValT, N, Traits>::iterator : public const_iterator {
+  friend class IntervalMap;
+  typedef IntervalMapImpl::IdxPair IdxPair;
+
+  explicit iterator(IntervalMap &map) : const_iterator(map) {}
+
+  void setNodeStop(unsigned Level, KeyT Stop);
+  bool insertNode(unsigned Level, IntervalMapImpl::NodeRef Node, KeyT Stop);
+  template <typename NodeT> bool overflow(unsigned Level);
+  void treeInsert(KeyT a, KeyT b, ValT y);
+  void eraseNode(unsigned Level);
+  void treeErase(bool UpdateRoot = true);
+  bool canCoalesceLeft(KeyT Start, ValT x);
+  bool canCoalesceRight(KeyT Stop, ValT x);
+
+public:
+  /// iterator - Create null iterator.
+  iterator() {}
+
+  /// setStart - Move the start of the current interval.
+  /// This may cause coalescing with the previous interval.
+  /// @param a New start key, must not overlap the previous interval.
+  void setStart(KeyT a);
+
+  /// setStop - Move the end of the current interval.
+  /// This may cause coalescing with the following interval.
+  /// @param b New stop key, must not overlap the following interval.
+  void setStop(KeyT b);
+
+  /// setValue - Change the mapped value of the current interval.
+  /// This may cause coalescing with the previous and following intervals.
+  /// @param x New value.
+  void setValue(ValT x);
+
+  /// setStartUnchecked - Move the start of the current interval without
+  /// checking for coalescing or overlaps.
+  /// This should only be used when it is known that coalescing is not required.
+  /// @param a New start key.
+  void setStartUnchecked(KeyT a) { this->unsafeStart() = a; }
+
+  /// setStopUnchecked - Move the end of the current interval without checking
+  /// for coalescing or overlaps.
+  /// This should only be used when it is known that coalescing is not required.
+  /// @param b New stop key.
+  void setStopUnchecked(KeyT b) {
+    this->unsafeStop() = b;
+    // Update keys in branch nodes as well.
+    if (this->path.atLastEntry(this->path.height()))
+      setNodeStop(this->path.height(), b);
+  }
+
+  /// setValueUnchecked - Change the mapped value of the current interval
+  /// without checking for coalescing.
+  /// @param x New value.
+  void setValueUnchecked(ValT x) { this->unsafeValue() = x; }
+
+  /// insert - Insert mapping [a;b] -> y before the current position.
+  void insert(KeyT a, KeyT b, ValT y);
+
+  /// erase - Erase the current interval.
+  void erase();
+
+  iterator &operator++() {
+    const_iterator::operator++();
+    return *this;
+  }
+
+  iterator operator++(int) {
+    iterator tmp = *this;
+    operator++();
+    return tmp;
+  }
+
+  iterator &operator--() {
+    const_iterator::operator--();
+    return *this;
+  }
+
+  iterator operator--(int) {
+    iterator tmp = *this;
+    operator--();
+    return tmp;
+  }
+
+};
+
+/// canCoalesceLeft - Can the current interval coalesce to the left after
+/// changing start or value?
+/// @param Start New start of current interval.
+/// @param Value New value for current interval.
+/// @return True when updating the current interval would enable coalescing.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+bool IntervalMap<KeyT, ValT, N, Traits>::
+iterator::canCoalesceLeft(KeyT Start, ValT Value) {
+  using namespace IntervalMapImpl;
+  Path &P = this->path;
+  if (!this->branched()) {
+    unsigned i = P.leafOffset();
+    RootLeaf &Node = P.leaf<RootLeaf>();
+    return i && Node.value(i-1) == Value &&
+                Traits::adjacent(Node.stop(i-1), Start);
+  }
+  // Branched.
+  if (unsigned i = P.leafOffset()) {
+    Leaf &Node = P.leaf<Leaf>();
+    return Node.value(i-1) == Value && Traits::adjacent(Node.stop(i-1), Start);
+  } else if (NodeRef NR = P.getLeftSibling(P.height())) {
+    unsigned i = NR.size() - 1;
+    Leaf &Node = NR.get<Leaf>();
+    return Node.value(i) == Value && Traits::adjacent(Node.stop(i), Start);
+  }
+  return false;
+}
+
+/// canCoalesceRight - Can the current interval coalesce to the right after
+/// changing stop or value?
+/// @param Stop New stop of current interval.
+/// @param Value New value for current interval.
+/// @return True when updating the current interval would enable coalescing.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+bool IntervalMap<KeyT, ValT, N, Traits>::
+iterator::canCoalesceRight(KeyT Stop, ValT Value) {
+  using namespace IntervalMapImpl;
+  Path &P = this->path;
+  unsigned i = P.leafOffset() + 1;
+  if (!this->branched()) {
+    if (i >= P.leafSize())
+      return false;
+    RootLeaf &Node = P.leaf<RootLeaf>();
+    return Node.value(i) == Value && Traits::adjacent(Stop, Node.start(i));
+  }
+  // Branched.
+  if (i < P.leafSize()) {
+    Leaf &Node = P.leaf<Leaf>();
+    return Node.value(i) == Value && Traits::adjacent(Stop, Node.start(i));
+  } else if (NodeRef NR = P.getRightSibling(P.height())) {
+    Leaf &Node = NR.get<Leaf>();
+    return Node.value(0) == Value && Traits::adjacent(Stop, Node.start(0));
+  }
+  return false;
+}
+
+/// setNodeStop - Update the stop key of the current node at level and above.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::setNodeStop(unsigned Level, KeyT Stop) {
+  // There are no references to the root node, so nothing to update.
+  if (!Level)
+    return;
+  IntervalMapImpl::Path &P = this->path;
+  // Update nodes pointing to the current node.
+  while (--Level) {
+    P.node<Branch>(Level).stop(P.offset(Level)) = Stop;
+    if (!P.atLastEntry(Level))
+      return;
+  }
+  // Update root separately since it has a different layout.
+  P.node<RootBranch>(Level).stop(P.offset(Level)) = Stop;
+}
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::setStart(KeyT a) {
+  assert(Traits::stopLess(a, this->stop()) && "Cannot move start beyond stop");
+  KeyT &CurStart = this->unsafeStart();
+  if (!Traits::startLess(a, CurStart) || !canCoalesceLeft(a, this->value())) {
+    CurStart = a;
+    return;
+  }
+  // Coalesce with the interval to the left.
+  --*this;
+  a = this->start();
+  erase();
+  setStartUnchecked(a);
+}
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::setStop(KeyT b) {
+  assert(Traits::stopLess(this->start(), b) && "Cannot move stop beyond start");
+  if (Traits::startLess(b, this->stop()) ||
+      !canCoalesceRight(b, this->value())) {
+    setStopUnchecked(b);
+    return;
+  }
+  // Coalesce with interval to the right.
+  KeyT a = this->start();
+  erase();
+  setStartUnchecked(a);
+}
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::setValue(ValT x) {
+  setValueUnchecked(x);
+  if (canCoalesceRight(this->stop(), x)) {
+    KeyT a = this->start();
+    erase();
+    setStartUnchecked(a);
+  }
+  if (canCoalesceLeft(this->start(), x)) {
+    --*this;
+    KeyT a = this->start();
+    erase();
+    setStartUnchecked(a);
+  }
+}
+
+/// insertNode - insert a node before the current path at level.
+/// Leave the current path pointing at the new node.
+/// @param Level path index of the node to be inserted.
+/// @param Node The node to be inserted.
+/// @param Stop The last index in the new node.
+/// @return True if the tree height was increased.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+bool IntervalMap<KeyT, ValT, N, Traits>::
+iterator::insertNode(unsigned Level, IntervalMapImpl::NodeRef Node, KeyT Stop) {
+  assert(Level && "Cannot insert next to the root");
+  bool SplitRoot = false;
+  IntervalMap &IM = *this->map;
+  IntervalMapImpl::Path &P = this->path;
+
+  if (Level == 1) {
+    // Insert into the root branch node.
+    if (IM.rootSize < RootBranch::Capacity) {
+      IM.rootBranch().insert(P.offset(0), IM.rootSize, Node, Stop);
+      P.setSize(0, ++IM.rootSize);
+      P.reset(Level);
+      return SplitRoot;
+    }
+
+    // We need to split the root while keeping our position.
+    SplitRoot = true;
+    IdxPair Offset = IM.splitRoot(P.offset(0));
+    P.replaceRoot(&IM.rootBranch(), IM.rootSize, Offset);
+
+    // Fall through to insert at the new higher level.
+    ++Level;
+  }
+
+  // When inserting before end(), make sure we have a valid path.
+  P.legalizeForInsert(--Level);
+
+  // Insert into the branch node at Level-1.
+  if (P.size(Level) == Branch::Capacity) {
+    // Branch node is full, handle handle the overflow.
+    assert(!SplitRoot && "Cannot overflow after splitting the root");
+    SplitRoot = overflow<Branch>(Level);
+    Level += SplitRoot;
+  }
+  P.node<Branch>(Level).insert(P.offset(Level), P.size(Level), Node, Stop);
+  P.setSize(Level, P.size(Level) + 1);
+  if (P.atLastEntry(Level))
+    setNodeStop(Level, Stop);
+  P.reset(Level + 1);
+  return SplitRoot;
+}
+
+// insert
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::insert(KeyT a, KeyT b, ValT y) {
+  if (this->branched())
+    return treeInsert(a, b, y);
+  IntervalMap &IM = *this->map;
+  IntervalMapImpl::Path &P = this->path;
+
+  // Try simple root leaf insert.
+  unsigned Size = IM.rootLeaf().insertFrom(P.leafOffset(), IM.rootSize, a, b, y);
+
+  // Was the root node insert successful?
+  if (Size <= RootLeaf::Capacity) {
+    P.setSize(0, IM.rootSize = Size);
+    return;
+  }
+
+  // Root leaf node is full, we must branch.
+  IdxPair Offset = IM.branchRoot(P.leafOffset());
+  P.replaceRoot(&IM.rootBranch(), IM.rootSize, Offset);
+
+  // Now it fits in the new leaf.
+  treeInsert(a, b, y);
+}
+
+
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::treeInsert(KeyT a, KeyT b, ValT y) {
+  using namespace IntervalMapImpl;
+  Path &P = this->path;
+
+  if (!P.valid())
+    P.legalizeForInsert(this->map->height);
+
+  // Check if this insertion will extend the node to the left.
+  if (P.leafOffset() == 0 && Traits::startLess(a, P.leaf<Leaf>().start(0))) {
+    // Node is growing to the left, will it affect a left sibling node?
+    if (NodeRef Sib = P.getLeftSibling(P.height())) {
+      Leaf &SibLeaf = Sib.get<Leaf>();
+      unsigned SibOfs = Sib.size() - 1;
+      if (SibLeaf.value(SibOfs) == y &&
+          Traits::adjacent(SibLeaf.stop(SibOfs), a)) {
+        // This insertion will coalesce with the last entry in SibLeaf. We can
+        // handle it in two ways:
+        //  1. Extend SibLeaf.stop to b and be done, or
+        //  2. Extend a to SibLeaf, erase the SibLeaf entry and continue.
+        // We prefer 1., but need 2 when coalescing to the right as well.
+        Leaf &CurLeaf = P.leaf<Leaf>();
+        P.moveLeft(P.height());
+        if (Traits::stopLess(b, CurLeaf.start(0)) &&
+            (y != CurLeaf.value(0) || !Traits::adjacent(b, CurLeaf.start(0)))) {
+          // Easy, just extend SibLeaf and we're done.
+          setNodeStop(P.height(), SibLeaf.stop(SibOfs) = b);
+          return;
+        } else {
+          // We have both left and right coalescing. Erase the old SibLeaf entry
+          // and continue inserting the larger interval.
+          a = SibLeaf.start(SibOfs);
+          treeErase(/* UpdateRoot= */false);
+        }
+      }
+    } else {
+      // No left sibling means we are at begin(). Update cached bound.
+      this->map->rootBranchStart() = a;
+    }
+  }
+
+  // When we are inserting at the end of a leaf node, we must update stops.
+  unsigned Size = P.leafSize();
+  bool Grow = P.leafOffset() == Size;
+  Size = P.leaf<Leaf>().insertFrom(P.leafOffset(), Size, a, b, y);
+
+  // Leaf insertion unsuccessful? Overflow and try again.
+  if (Size > Leaf::Capacity) {
+    overflow<Leaf>(P.height());
+    Grow = P.leafOffset() == P.leafSize();
+    Size = P.leaf<Leaf>().insertFrom(P.leafOffset(), P.leafSize(), a, b, y);
+    assert(Size <= Leaf::Capacity && "overflow() didn't make room");
+  }
+
+  // Inserted, update offset and leaf size.
+  P.setSize(P.height(), Size);
+
+  // Insert was the last node entry, update stops.
+  if (Grow)
+    setNodeStop(P.height(), b);
+}
+
+/// erase - erase the current interval and move to the next position.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::erase() {
+  IntervalMap &IM = *this->map;
+  IntervalMapImpl::Path &P = this->path;
+  assert(P.valid() && "Cannot erase end()");
+  if (this->branched())
+    return treeErase();
+  IM.rootLeaf().erase(P.leafOffset(), IM.rootSize);
+  P.setSize(0, --IM.rootSize);
+}
+
+/// treeErase - erase() for a branched tree.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::treeErase(bool UpdateRoot) {
+  IntervalMap &IM = *this->map;
+  IntervalMapImpl::Path &P = this->path;
+  Leaf &Node = P.leaf<Leaf>();
+
+  // Nodes are not allowed to become empty.
+  if (P.leafSize() == 1) {
+    IM.deleteNode(&Node);
+    eraseNode(IM.height);
+    // Update rootBranchStart if we erased begin().
+    if (UpdateRoot && IM.branched() && P.valid() && P.atBegin())
+      IM.rootBranchStart() = P.leaf<Leaf>().start(0);
+    return;
+  }
+
+  // Erase current entry.
+  Node.erase(P.leafOffset(), P.leafSize());
+  unsigned NewSize = P.leafSize() - 1;
+  P.setSize(IM.height, NewSize);
+  // When we erase the last entry, update stop and move to a legal position.
+  if (P.leafOffset() == NewSize) {
+    setNodeStop(IM.height, Node.stop(NewSize - 1));
+    P.moveRight(IM.height);
+  } else if (UpdateRoot && P.atBegin())
+    IM.rootBranchStart() = P.leaf<Leaf>().start(0);
+}
+
+/// eraseNode - Erase the current node at Level from its parent and move path to
+/// the first entry of the next sibling node.
+/// The node must be deallocated by the caller.
+/// @param Level 1..height, the root node cannot be erased.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+void IntervalMap<KeyT, ValT, N, Traits>::
+iterator::eraseNode(unsigned Level) {
+  assert(Level && "Cannot erase root node");
+  IntervalMap &IM = *this->map;
+  IntervalMapImpl::Path &P = this->path;
+
+  if (--Level == 0) {
+    IM.rootBranch().erase(P.offset(0), IM.rootSize);
+    P.setSize(0, --IM.rootSize);
+    // If this cleared the root, switch to height=0.
+    if (IM.empty()) {
+      IM.switchRootToLeaf();
+      this->setRoot(0);
+      return;
+    }
+  } else {
+    // Remove node ref from branch node at Level.
+    Branch &Parent = P.node<Branch>(Level);
+    if (P.size(Level) == 1) {
+      // Branch node became empty, remove it recursively.
+      IM.deleteNode(&Parent);
+      eraseNode(Level);
+    } else {
+      // Branch node won't become empty.
+      Parent.erase(P.offset(Level), P.size(Level));
+      unsigned NewSize = P.size(Level) - 1;
+      P.setSize(Level, NewSize);
+      // If we removed the last branch, update stop and move to a legal pos.
+      if (P.offset(Level) == NewSize) {
+        setNodeStop(Level, Parent.stop(NewSize - 1));
+        P.moveRight(Level);
+      }
+    }
+  }
+  // Update path cache for the new right sibling position.
+  if (P.valid()) {
+    P.reset(Level + 1);
+    P.offset(Level + 1) = 0;
+  }
+}
+
+/// overflow - Distribute entries of the current node evenly among
+/// its siblings and ensure that the current node is not full.
+/// This may require allocating a new node.
+/// @param NodeT The type of node at Level (Leaf or Branch).
+/// @param Level path index of the overflowing node.
+/// @return True when the tree height was changed.
+template <typename KeyT, typename ValT, unsigned N, typename Traits>
+template <typename NodeT>
+bool IntervalMap<KeyT, ValT, N, Traits>::
+iterator::overflow(unsigned Level) {
+  using namespace IntervalMapImpl;
+  Path &P = this->path;
+  unsigned CurSize[4];
+  NodeT *Node[4];
+  unsigned Nodes = 0;
+  unsigned Elements = 0;
+  unsigned Offset = P.offset(Level);
+
+  // Do we have a left sibling?
+  NodeRef LeftSib = P.getLeftSibling(Level);
+  if (LeftSib) {
+    Offset += Elements = CurSize[Nodes] = LeftSib.size();
+    Node[Nodes++] = &LeftSib.get<NodeT>();
+  }
+
+  // Current node.
+  Elements += CurSize[Nodes] = P.size(Level);
+  Node[Nodes++] = &P.node<NodeT>(Level);
+
+  // Do we have a right sibling?
+  NodeRef RightSib = P.getRightSibling(Level);
+  if (RightSib) {
+    Elements += CurSize[Nodes] = RightSib.size();
+    Node[Nodes++] = &RightSib.get<NodeT>();
+  }
+
+  // Do we need to allocate a new node?
+  unsigned NewNode = 0;
+  if (Elements + 1 > Nodes * NodeT::Capacity) {
+    // Insert NewNode at the penultimate position, or after a single node.
+    NewNode = Nodes == 1 ? 1 : Nodes - 1;
+    CurSize[Nodes] = CurSize[NewNode];
+    Node[Nodes] = Node[NewNode];
+    CurSize[NewNode] = 0;
+    Node[NewNode] = this->map->newNode<NodeT>();
+    ++Nodes;
+  }
+
+  // Compute the new element distribution.
+  unsigned NewSize[4];
+  IdxPair NewOffset = distribute(Nodes, Elements, NodeT::Capacity,
+                                 CurSize, NewSize, Offset, true);
+  adjustSiblingSizes(Node, Nodes, CurSize, NewSize);
+
+  // Move current location to the leftmost node.
+  if (LeftSib)
+    P.moveLeft(Level);
+
+  // Elements have been rearranged, now update node sizes and stops.
+  bool SplitRoot = false;
+  unsigned Pos = 0;
+  for (;;) {
+    KeyT Stop = Node[Pos]->stop(NewSize[Pos]-1);
+    if (NewNode && Pos == NewNode) {
+      SplitRoot = insertNode(Level, NodeRef(Node[Pos], NewSize[Pos]), Stop);
+      Level += SplitRoot;
+    } else {
+      P.setSize(Level, NewSize[Pos]);
+      setNodeStop(Level, Stop);
+    }
+    if (Pos + 1 == Nodes)
+      break;
+    P.moveRight(Level);
+    ++Pos;
+  }
+
+  // Where was I? Find NewOffset.
+  while(Pos != NewOffset.first) {
+    P.moveLeft(Level);
+    --Pos;
+  }
+  P.offset(Level) = NewOffset.second;
+  return SplitRoot;
+}
+
+//===----------------------------------------------------------------------===//
+//---                       IntervalMapOverlaps                           ----//
+//===----------------------------------------------------------------------===//
+
+/// IntervalMapOverlaps - Iterate over the overlaps of mapped intervals in two
+/// IntervalMaps. The maps may be different, but the KeyT and Traits types
+/// should be the same.
+///
+/// Typical uses:
+///
+/// 1. Test for overlap:
+///    bool overlap = IntervalMapOverlaps(a, b).valid();
+///
+/// 2. Enumerate overlaps:
+///    for (IntervalMapOverlaps I(a, b); I.valid() ; ++I) { ... }
+///
+template <typename MapA, typename MapB>
+class IntervalMapOverlaps {
+  typedef typename MapA::KeyType KeyType;
+  typedef typename MapA::KeyTraits Traits;
+  typename MapA::const_iterator posA;
+  typename MapB::const_iterator posB;
+
+  /// advance - Move posA and posB forward until reaching an overlap, or until
+  /// either meets end.
+  /// Don't move the iterators if they are already overlapping.
+  void advance() {
+    if (!valid())
+      return;
+
+    if (Traits::stopLess(posA.stop(), posB.start())) {
+      // A ends before B begins. Catch up.
+      posA.advanceTo(posB.start());
+      if (!posA.valid() || !Traits::stopLess(posB.stop(), posA.start()))
+        return;
+    } else if (Traits::stopLess(posB.stop(), posA.start())) {
+      // B ends before A begins. Catch up.
+      posB.advanceTo(posA.start());
+      if (!posB.valid() || !Traits::stopLess(posA.stop(), posB.start()))
+        return;
+    } else
+      // Already overlapping.
+      return;
+
+    for (;;) {
+      // Make a.end > b.start.
+      posA.advanceTo(posB.start());
+      if (!posA.valid() || !Traits::stopLess(posB.stop(), posA.start()))
+        return;
+      // Make b.end > a.start.
+      posB.advanceTo(posA.start());
+      if (!posB.valid() || !Traits::stopLess(posA.stop(), posB.start()))
+        return;
+    }
+  }
+
+public:
+  /// IntervalMapOverlaps - Create an iterator for the overlaps of a and b.
+  IntervalMapOverlaps(const MapA &a, const MapB &b)
+    : posA(b.empty() ? a.end() : a.find(b.start())),
+      posB(posA.valid() ? b.find(posA.start()) : b.end()) { advance(); }
+
+  /// valid - Return true if iterator is at an overlap.
+  bool valid() const {
+    return posA.valid() && posB.valid();
+  }
+
+  /// a - access the left hand side in the overlap.
+  const typename MapA::const_iterator &a() const { return posA; }
+
+  /// b - access the right hand side in the overlap.
+  const typename MapB::const_iterator &b() const { return posB; }
+
+  /// start - Beginning of the overlapping interval.
+  KeyType start() const {
+    KeyType ak = a().start();
+    KeyType bk = b().start();
+    return Traits::startLess(ak, bk) ? bk : ak;
+  }
+
+  /// stop - End of the overlapping interval.
+  KeyType stop() const {
+    KeyType ak = a().stop();
+    KeyType bk = b().stop();
+    return Traits::startLess(ak, bk) ? ak : bk;
+  }
+
+  /// skipA - Move to the next overlap that doesn't involve a().
+  void skipA() {
+    ++posA;
+    advance();
+  }
+
+  /// skipB - Move to the next overlap that doesn't involve b().
+  void skipB() {
+    ++posB;
+    advance();
+  }
+
+  /// Preincrement - Move to the next overlap.
+  IntervalMapOverlaps &operator++() {
+    // Bump the iterator that ends first. The other one may have more overlaps.
+    if (Traits::startLess(posB.stop(), posA.stop()))
+      skipB();
+    else
+      skipA();
+    return *this;
+  }
+
+  /// advanceTo - Move to the first overlapping interval with
+  /// stopLess(x, stop()).
+  void advanceTo(KeyType x) {
+    if (!valid())
+      return;
+    // Make sure advanceTo sees monotonic keys.
+    if (Traits::stopLess(posA.stop(), x))
+      posA.advanceTo(x);
+    if (Traits::stopLess(posB.stop(), x))
+      posB.advanceTo(x);
+    advance();
+  }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/final/include/llvm/ADT/IntrusiveRefCntPtr.h b/final/include/llvm/ADT/IntrusiveRefCntPtr.h
new file mode 100644
index 00000000000..37d4ac9d29d
--- /dev/null
+++ b/final/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -0,0 +1,230 @@
+//== llvm/ADT/IntrusiveRefCntPtr.h - Smart Refcounting Pointer ---*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines IntrusiveRefCntPtr, a template class that
+// implements a "smart" pointer for objects that maintain their own
+// internal reference count, and RefCountedBase/RefCountedBaseVPTR, two
+// generic base classes for objects that wish to have their lifetimes
+// managed using reference counting.
+//
+// IntrusiveRefCntPtr is similar to Boost's intrusive_ptr with added
+// LLVM-style casting.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_INTRUSIVE_REF_CNT_PTR
+#define LLVM_ADT_INTRUSIVE_REF_CNT_PTR
+
+#include <cassert>
+
+#include "llvm/Support/Casting.h"
+
+namespace llvm {
+
+  template <class T>
+  class IntrusiveRefCntPtr;
+
+//===----------------------------------------------------------------------===//
+/// RefCountedBase - A generic base class for objects that wish to
+///  have their lifetimes managed using reference counts. Classes
+///  subclass RefCountedBase to obtain such functionality, and are
+///  typically handled with IntrusivePtr "smart pointers" (see below)
+///  which automatically handle the management of reference counts.
+///  Objects that subclass RefCountedBase should not be allocated on
+///  the stack, as invoking "delete" (which is called when the
+///  reference count hits 0) on such objects is an error.
+//===----------------------------------------------------------------------===//
+  template <class Derived>
+  class RefCountedBase {
+    unsigned ref_cnt;
+
+  protected:
+    RefCountedBase() : ref_cnt(0) {}
+
+    void Retain() { ++ref_cnt; }
+    void Release() {
+      assert (ref_cnt > 0 && "Reference count is already zero.");
+      if (--ref_cnt == 0) delete static_cast<Derived*>(this);
+    }
+
+    friend class IntrusiveRefCntPtr<Derived>;
+  };
+
+//===----------------------------------------------------------------------===//
+/// RefCountedBaseVPTR - A class that has the same function as
+///  RefCountedBase, but with a virtual destructor. Should be used
+///  instead of RefCountedBase for classes that already have virtual
+///  methods to enforce dynamic allocation via 'new'. Classes that
+///  inherit from RefCountedBaseVPTR can't be allocated on stack -
+///  attempting to do this will produce a compile error.
+//===----------------------------------------------------------------------===//
+  template <class Derived>
+  class RefCountedBaseVPTR {
+    unsigned ref_cnt;
+
+  protected:
+    RefCountedBaseVPTR() : ref_cnt(0) {}
+    virtual ~RefCountedBaseVPTR() {}
+
+    void Retain() { ++ref_cnt; }
+    void Release() {
+      assert (ref_cnt > 0 && "Reference count is already zero.");
+      if (--ref_cnt == 0) delete this;
+    }
+
+    friend class IntrusiveRefCntPtr<Derived>;
+  };
+
+//===----------------------------------------------------------------------===//
+/// IntrusiveRefCntPtr - A template class that implements a "smart pointer"
+///  that assumes the wrapped object has a reference count associated
+///  with it that can be managed via calls to
+///  IntrusivePtrAddRef/IntrusivePtrRelease.  The smart pointers
+///  manage reference counts via the RAII idiom: upon creation of
+///  smart pointer the reference count of the wrapped object is
+///  incremented and upon destruction of the smart pointer the
+///  reference count is decremented.  This class also safely handles
+///  wrapping NULL pointers.
+///
+/// Reference counting is implemented via calls to
+///  Obj->Retain()/Obj->Release(). Release() is required to destroy
+///  the object when the reference count reaches zero. Inheriting from
+///  RefCountedBase/RefCountedBaseVPTR takes care of this
+///  automatically.
+//===----------------------------------------------------------------------===//
+  template <typename T>
+  class IntrusiveRefCntPtr {
+    T* Obj;
+    typedef IntrusiveRefCntPtr this_type;
+  public:
+    typedef T element_type;
+
+    explicit IntrusiveRefCntPtr() : Obj(0) {}
+
+    explicit IntrusiveRefCntPtr(T* obj) : Obj(obj) {
+      retain();
+    }
+
+    IntrusiveRefCntPtr(const IntrusiveRefCntPtr& S) : Obj(S.Obj) {
+      retain();
+    }
+
+    template <class X>
+    IntrusiveRefCntPtr(const IntrusiveRefCntPtr<X>& S)
+      : Obj(S.getPtr()) {
+      retain();
+    }
+
+    IntrusiveRefCntPtr& operator=(const IntrusiveRefCntPtr& S) {
+      replace(S.getPtr());
+      return *this;
+    }
+
+    template <class X>
+    IntrusiveRefCntPtr& operator=(const IntrusiveRefCntPtr<X>& S) {
+      replace(S.getPtr());
+      return *this;
+    }
+
+    IntrusiveRefCntPtr& operator=(T * S) {
+      replace(S);
+      return *this;
+    }
+
+    ~IntrusiveRefCntPtr() { release(); }
+
+    T& operator*() const { return *Obj; }
+
+    T* operator->() const { return Obj; }
+
+    T* getPtr() const { return Obj; }
+
+    typedef T* (IntrusiveRefCntPtr::*unspecified_bool_type) () const;
+    operator unspecified_bool_type() const {
+      return Obj == 0 ? 0 : &IntrusiveRefCntPtr::getPtr;
+    }
+
+    void swap(IntrusiveRefCntPtr& other) {
+      T* tmp = other.Obj;
+      other.Obj = Obj;
+      Obj = tmp;
+    }
+
+  private:
+    void retain() { if (Obj) Obj->Retain(); }
+    void release() { if (Obj) Obj->Release(); }
+
+    void replace(T* S) {
+      this_type(S).swap(*this);
+    }
+  };
+
+  template<class T, class U>
+  inline bool operator==(const IntrusiveRefCntPtr<T>& A,
+                         const IntrusiveRefCntPtr<U>& B)
+  {
+    return A.getPtr() == B.getPtr();
+  }
+
+  template<class T, class U>
+  inline bool operator!=(const IntrusiveRefCntPtr<T>& A,
+                         const IntrusiveRefCntPtr<U>& B)
+  {
+    return A.getPtr() != B.getPtr();
+  }
+
+  template<class T, class U>
+  inline bool operator==(const IntrusiveRefCntPtr<T>& A,
+                         U* B)
+  {
+    return A.getPtr() == B;
+  }
+
+  template<class T, class U>
+  inline bool operator!=(const IntrusiveRefCntPtr<T>& A,
+                         U* B)
+  {
+    return A.getPtr() != B;
+  }
+
+  template<class T, class U>
+  inline bool operator==(T* A,
+                         const IntrusiveRefCntPtr<U>& B)
+  {
+    return A == B.getPtr();
+  }
+
+  template<class T, class U>
+  inline bool operator!=(T* A,
+                         const IntrusiveRefCntPtr<U>& B)
+  {
+    return A != B.getPtr();
+  }
+
+//===----------------------------------------------------------------------===//
+// LLVM-style downcasting support for IntrusiveRefCntPtr objects
+//===----------------------------------------------------------------------===//
+
+  template<class T> struct simplify_type<IntrusiveRefCntPtr<T> > {
+    typedef T* SimpleType;
+    static SimpleType getSimplifiedValue(const IntrusiveRefCntPtr<T>& Val) {
+      return Val.getPtr();
+    }
+  };
+
+  template<class T> struct simplify_type<const IntrusiveRefCntPtr<T> > {
+    typedef T* SimpleType;
+    static SimpleType getSimplifiedValue(const IntrusiveRefCntPtr<T>& Val) {
+      return Val.getPtr();
+    }
+  };
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_INTRUSIVE_REF_CNT_PTR
diff --git a/final/include/llvm/ADT/NullablePtr.h b/final/include/llvm/ADT/NullablePtr.h
new file mode 100644
index 00000000000..a9c47a138ec
--- /dev/null
+++ b/final/include/llvm/ADT/NullablePtr.h
@@ -0,0 +1,52 @@
+//===- llvm/ADT/NullablePtr.h - A pointer that allows null ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines and implements the NullablePtr class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_NULLABLE_PTR_H
+#define LLVM_ADT_NULLABLE_PTR_H
+
+#include <cassert>
+#include <cstddef>
+
+namespace llvm {
+/// NullablePtr pointer wrapper - NullablePtr is used for APIs where a
+/// potentially-null pointer gets passed around that must be explicitly handled
+/// in lots of places.  By putting a wrapper around the null pointer, it makes
+/// it more likely that the null pointer case will be handled correctly.
+template<class T>
+class NullablePtr {
+  T *Ptr;
+public:
+  NullablePtr(T *P = 0) : Ptr(P) {}
+  
+  bool isNull() const { return Ptr == 0; }
+  bool isNonNull() const { return Ptr != 0; }
+
+  /// get - Return the pointer if it is non-null.
+  const T *get() const {
+    assert(Ptr && "Pointer wasn't checked for null!");
+    return Ptr;
+  }
+
+  /// get - Return the pointer if it is non-null.
+  T *get() {
+    assert(Ptr && "Pointer wasn't checked for null!");
+    return Ptr;
+  }
+  
+  T *getPtrOrNull() { return Ptr; }
+  const T *getPtrOrNull() const { return Ptr; }
+};
+  
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/ADT/Optional.h b/final/include/llvm/ADT/Optional.h
new file mode 100644
index 00000000000..ee8b69f3d12
--- /dev/null
+++ b/final/include/llvm/ADT/Optional.h
@@ -0,0 +1,120 @@
+//===-- Optional.h - Simple variant for passing optional values ---*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file provides Optional, a template class modeled in the spirit of
+//  OCaml's 'opt' variant.  The idea is to strongly type whether or not
+//  a value can be optional.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_OPTIONAL
+#define LLVM_ADT_OPTIONAL
+
+#include <cassert>
+
+namespace llvm {
+
+template<typename T>
+class Optional {
+  T x;
+  unsigned hasVal : 1;
+public:
+  explicit Optional() : x(), hasVal(false) {}
+  Optional(const T &y) : x(y), hasVal(true) {}
+
+  static inline Optional create(const T* y) {
+    return y ? Optional(*y) : Optional();
+  }
+
+  Optional &operator=(const T &y) {
+    x = y;
+    hasVal = true;
+    return *this;
+  }
+  
+  const T* getPointer() const { assert(hasVal); return &x; }
+  const T& getValue() const { assert(hasVal); return x; }
+
+  operator bool() const { return hasVal; }
+  bool hasValue() const { return hasVal; }
+  const T* operator->() const { return getPointer(); }
+  const T& operator*() const { assert(hasVal); return x; }
+};
+
+template<typename T> struct simplify_type;
+
+template <typename T>
+struct simplify_type<const Optional<T> > {
+  typedef const T* SimpleType;
+  static SimpleType getSimplifiedValue(const Optional<T> &Val) {
+    return Val.getPointer();
+  }
+};
+
+template <typename T>
+struct simplify_type<Optional<T> >
+  : public simplify_type<const Optional<T> > {};
+
+/// \brief Poison comparison between two \c Optional objects. Clients needs to
+/// explicitly compare the underlying values and account for empty \c Optional
+/// objects.
+///
+/// This routine will never be defined. It returns \c void to help diagnose 
+/// errors at compile time.
+template<typename T, typename U>
+void operator==(const Optional<T> &X, const Optional<U> &Y);
+
+/// \brief Poison comparison between two \c Optional objects. Clients needs to
+/// explicitly compare the underlying values and account for empty \c Optional
+/// objects.
+///
+/// This routine will never be defined. It returns \c void to help diagnose 
+/// errors at compile time.
+template<typename T, typename U>
+void operator!=(const Optional<T> &X, const Optional<U> &Y);
+
+/// \brief Poison comparison between two \c Optional objects. Clients needs to
+/// explicitly compare the underlying values and account for empty \c Optional
+/// objects.
+///
+/// This routine will never be defined. It returns \c void to help diagnose 
+/// errors at compile time.
+template<typename T, typename U>
+void operator<(const Optional<T> &X, const Optional<U> &Y);
+
+/// \brief Poison comparison between two \c Optional objects. Clients needs to
+/// explicitly compare the underlying values and account for empty \c Optional
+/// objects.
+///
+/// This routine will never be defined. It returns \c void to help diagnose 
+/// errors at compile time.
+template<typename T, typename U>
+void operator<=(const Optional<T> &X, const Optional<U> &Y);
+
+/// \brief Poison comparison between two \c Optional objects. Clients needs to
+/// explicitly compare the underlying values and account for empty \c Optional
+/// objects.
+///
+/// This routine will never be defined. It returns \c void to help diagnose 
+/// errors at compile time.
+template<typename T, typename U>
+void operator>=(const Optional<T> &X, const Optional<U> &Y);
+
+/// \brief Poison comparison between two \c Optional objects. Clients needs to
+/// explicitly compare the underlying values and account for empty \c Optional
+/// objects.
+///
+/// This routine will never be defined. It returns \c void to help diagnose 
+/// errors at compile time.
+template<typename T, typename U>
+void operator>(const Optional<T> &X, const Optional<U> &Y);
+
+} // end llvm namespace
+
+#endif
diff --git a/final/include/llvm/ADT/OwningPtr.h b/final/include/llvm/ADT/OwningPtr.h
new file mode 100644
index 00000000000..6d9c3059778
--- /dev/null
+++ b/final/include/llvm/ADT/OwningPtr.h
@@ -0,0 +1,133 @@
+//===- llvm/ADT/OwningPtr.h - Smart ptr that owns the pointee ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines and implements the OwningPtr class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_OWNING_PTR_H
+#define LLVM_ADT_OWNING_PTR_H
+
+#include <cassert>
+#include <cstddef>
+
+namespace llvm {
+
+/// OwningPtr smart pointer - OwningPtr mimics a built-in pointer except that it
+/// guarantees deletion of the object pointed to, either on destruction of the
+/// OwningPtr or via an explicit reset().  Once created, ownership of the
+/// pointee object can be taken away from OwningPtr by using the take method.
+template<class T>
+class OwningPtr {
+  OwningPtr(OwningPtr const &);             // DO NOT IMPLEMENT
+  OwningPtr &operator=(OwningPtr const &);  // DO NOT IMPLEMENT
+  T *Ptr;
+public:
+  explicit OwningPtr(T *P = 0) : Ptr(P) {}
+
+  ~OwningPtr() {
+    delete Ptr;
+  }
+
+  /// reset - Change the current pointee to the specified pointer.  Note that
+  /// calling this with any pointer (including a null pointer) deletes the
+  /// current pointer.
+  void reset(T *P = 0) {
+    if (P == Ptr) return;
+    T *Tmp = Ptr;
+    Ptr = P;
+    delete Tmp;
+  }
+
+  /// take - Reset the owning pointer to null and return its pointer.  This does
+  /// not delete the pointer before returning it.
+  T *take() {
+    T *Tmp = Ptr;
+    Ptr = 0;
+    return Tmp;
+  }
+
+  T &operator*() const {
+    assert(Ptr && "Cannot dereference null pointer");
+    return *Ptr;
+  }
+
+  T *operator->() const { return Ptr; }
+  T *get() const { return Ptr; }
+  operator bool() const { return Ptr != 0; }
+  bool operator!() const { return Ptr == 0; }
+
+  void swap(OwningPtr &RHS) {
+    T *Tmp = RHS.Ptr;
+    RHS.Ptr = Ptr;
+    Ptr = Tmp;
+  }
+};
+
+template<class T>
+inline void swap(OwningPtr<T> &a, OwningPtr<T> &b) {
+  a.swap(b);
+}
+
+/// OwningArrayPtr smart pointer - OwningArrayPtr provides the same
+///  functionality as OwningPtr, except that it works for array types.
+template<class T>
+class OwningArrayPtr {
+  OwningArrayPtr(OwningArrayPtr const &);            // DO NOT IMPLEMENT
+  OwningArrayPtr &operator=(OwningArrayPtr const &); // DO NOT IMPLEMENT
+  T *Ptr;
+public:
+  explicit OwningArrayPtr(T *P = 0) : Ptr(P) {}
+
+  ~OwningArrayPtr() {
+    delete [] Ptr;
+  }
+
+  /// reset - Change the current pointee to the specified pointer.  Note that
+  /// calling this with any pointer (including a null pointer) deletes the
+  /// current pointer.
+  void reset(T *P = 0) {
+    if (P == Ptr) return;
+    T *Tmp = Ptr;
+    Ptr = P;
+    delete [] Tmp;
+  }
+
+  /// take - Reset the owning pointer to null and return its pointer.  This does
+  /// not delete the pointer before returning it.
+  T *take() {
+    T *Tmp = Ptr;
+    Ptr = 0;
+    return Tmp;
+  }
+
+  T &operator[](std::ptrdiff_t i) const {
+    assert(Ptr && "Cannot dereference null pointer");
+    return Ptr[i];
+  }
+
+  T *get() const { return Ptr; }
+  operator bool() const { return Ptr != 0; }
+  bool operator!() const { return Ptr == 0; }
+
+  void swap(OwningArrayPtr &RHS) {
+    T *Tmp = RHS.Ptr;
+    RHS.Ptr = Ptr;
+    Ptr = Tmp;
+  }
+};
+
+template<class T>
+inline void swap(OwningArrayPtr<T> &a, OwningArrayPtr<T> &b) {
+  a.swap(b);
+}
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/ADT/PointerIntPair.h b/final/include/llvm/ADT/PointerIntPair.h
new file mode 100644
index 00000000000..85dbba2b4a4
--- /dev/null
+++ b/final/include/llvm/ADT/PointerIntPair.h
@@ -0,0 +1,163 @@
+//===- llvm/ADT/PointerIntPair.h - Pair for pointer and int -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PointerIntPair class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_POINTERINTPAIR_H
+#define LLVM_ADT_POINTERINTPAIR_H
+
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include <cassert>
+
+namespace llvm {
+
+template<typename T>
+struct DenseMapInfo;
+
+/// PointerIntPair - This class implements a pair of a pointer and small
+/// integer.  It is designed to represent this in the space required by one
+/// pointer by bitmangling the integer into the low part of the pointer.  This
+/// can only be done for small integers: typically up to 3 bits, but it depends
+/// on the number of bits available according to PointerLikeTypeTraits for the
+/// type.
+///
+/// Note that PointerIntPair always puts the Int part in the highest bits
+/// possible.  For example, PointerIntPair<void*, 1, bool> will put the bit for
+/// the bool into bit #2, not bit #0, which allows the low two bits to be used
+/// for something else.  For example, this allows:
+///   PointerIntPair<PointerIntPair<void*, 1, bool>, 1, bool>
+/// ... and the two bools will land in different bits.
+///
+template <typename PointerTy, unsigned IntBits, typename IntType=unsigned,
+          typename PtrTraits = PointerLikeTypeTraits<PointerTy> >
+class PointerIntPair {
+  intptr_t Value;
+  enum {
+    /// PointerBitMask - The bits that come from the pointer.
+    PointerBitMask =
+      ~(uintptr_t)(((intptr_t)1 << PtrTraits::NumLowBitsAvailable)-1),
+
+    /// IntShift - The number of low bits that we reserve for other uses, and
+    /// keep zero.
+    IntShift = (uintptr_t)PtrTraits::NumLowBitsAvailable-IntBits,
+    
+    /// IntMask - This is the unshifted mask for valid bits of the int type.
+    IntMask = (uintptr_t)(((intptr_t)1 << IntBits)-1),
+    
+    // ShiftedIntMask - This is the bits for the integer shifted in place.
+    ShiftedIntMask = (uintptr_t)(IntMask << IntShift)
+  };
+public:
+  PointerIntPair() : Value(0) {}
+  PointerIntPair(PointerTy Ptr, IntType Int) : Value(0) {
+    assert(IntBits <= PtrTraits::NumLowBitsAvailable &&
+           "PointerIntPair formed with integer size too large for pointer");
+    setPointer(Ptr);
+    setInt(Int);
+  }
+
+  PointerTy getPointer() const {
+    return PtrTraits::getFromVoidPointer(
+                         reinterpret_cast<void*>(Value & PointerBitMask));
+  }
+
+  IntType getInt() const {
+    return (IntType)((Value >> IntShift) & IntMask);
+  }
+
+  void setPointer(PointerTy Ptr) {
+    intptr_t PtrVal
+      = reinterpret_cast<intptr_t>(PtrTraits::getAsVoidPointer(Ptr));
+    assert((PtrVal & ((1 << PtrTraits::NumLowBitsAvailable)-1)) == 0 &&
+           "Pointer is not sufficiently aligned");
+    // Preserve all low bits, just update the pointer.
+    Value = PtrVal | (Value & ~PointerBitMask);
+  }
+
+  void setInt(IntType Int) {
+    intptr_t IntVal = Int;
+    assert(IntVal < (1 << IntBits) && "Integer too large for field");
+    
+    // Preserve all bits other than the ones we are updating.
+    Value &= ~ShiftedIntMask;     // Remove integer field.
+    Value |= IntVal << IntShift;  // Set new integer.
+  }
+
+  PointerTy const *getAddrOfPointer() const {
+    assert(Value == reinterpret_cast<intptr_t>(getPointer()) &&
+           "Can only return the address if IntBits is cleared and "
+           "PtrTraits doesn't change the pointer");
+    return reinterpret_cast<PointerTy const *>(&Value);
+  }
+
+  void *getOpaqueValue() const { return reinterpret_cast<void*>(Value); }
+  void setFromOpaqueValue(void *Val) { Value = reinterpret_cast<intptr_t>(Val);}
+
+  static PointerIntPair getFromOpaqueValue(void *V) {
+    PointerIntPair P; P.setFromOpaqueValue(V); return P; 
+  }
+  
+  bool operator==(const PointerIntPair &RHS) const {return Value == RHS.Value;}
+  bool operator!=(const PointerIntPair &RHS) const {return Value != RHS.Value;}
+  bool operator<(const PointerIntPair &RHS) const {return Value < RHS.Value;}
+  bool operator>(const PointerIntPair &RHS) const {return Value > RHS.Value;}
+  bool operator<=(const PointerIntPair &RHS) const {return Value <= RHS.Value;}
+  bool operator>=(const PointerIntPair &RHS) const {return Value >= RHS.Value;}
+};
+
+template <typename T> struct isPodLike;
+template<typename PointerTy, unsigned IntBits, typename IntType>
+struct isPodLike<PointerIntPair<PointerTy, IntBits, IntType> > {
+   static const bool value = true;
+};
+  
+// Provide specialization of DenseMapInfo for PointerIntPair.
+template<typename PointerTy, unsigned IntBits, typename IntType>
+struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType> > {
+  typedef PointerIntPair<PointerTy, IntBits, IntType> Ty;
+  static Ty getEmptyKey() {
+    intptr_t Val = -1;
+    Val <<= PointerLikeTypeTraits<PointerTy>::NumLowBitsAvailable;
+    return Ty(reinterpret_cast<PointerTy>(Val), IntType((1 << IntBits)-1));
+  }
+  static Ty getTombstoneKey() {
+    intptr_t Val = -2;
+    Val <<= PointerLikeTypeTraits<PointerTy>::NumLowBitsAvailable;
+    return Ty(reinterpret_cast<PointerTy>(Val), IntType(0));
+  }
+  static unsigned getHashValue(Ty V) {
+    uintptr_t IV = reinterpret_cast<uintptr_t>(V.getOpaqueValue());
+    return unsigned(IV) ^ unsigned(IV >> 9);
+  }
+  static bool isEqual(const Ty &LHS, const Ty &RHS) { return LHS == RHS; }
+};
+
+// Teach SmallPtrSet that PointerIntPair is "basically a pointer".
+template<typename PointerTy, unsigned IntBits, typename IntType,
+         typename PtrTraits>
+class PointerLikeTypeTraits<PointerIntPair<PointerTy, IntBits, IntType,
+                                           PtrTraits> > {
+public:
+  static inline void *
+  getAsVoidPointer(const PointerIntPair<PointerTy, IntBits, IntType> &P) {
+    return P.getOpaqueValue();
+  }
+  static inline PointerIntPair<PointerTy, IntBits, IntType>
+  getFromVoidPointer(void *P) {
+    return PointerIntPair<PointerTy, IntBits, IntType>::getFromOpaqueValue(P);
+  }
+  enum {
+    NumLowBitsAvailable = PtrTraits::NumLowBitsAvailable - IntBits
+  };
+};
+
+} // end namespace llvm
+#endif
diff --git a/final/include/llvm/ADT/PointerUnion.h b/final/include/llvm/ADT/PointerUnion.h
new file mode 100644
index 00000000000..13b98cef07a
--- /dev/null
+++ b/final/include/llvm/ADT/PointerUnion.h
@@ -0,0 +1,445 @@
+//===- llvm/ADT/PointerUnion.h - Discriminated Union of 2 Ptrs --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PointerUnion class, which is a discriminated union of
+// pointer types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_POINTERUNION_H
+#define LLVM_ADT_POINTERUNION_H
+
+#include "llvm/ADT/PointerIntPair.h"
+
+namespace llvm {
+
+  template <typename T>
+  struct PointerUnionTypeSelectorReturn {
+    typedef T Return;
+  };
+
+  /// \brief Get a type based on whether two types are the same or not. For:
+  /// @code
+  /// typedef typename PointerUnionTypeSelector<T1, T2, EQ, NE>::Return Ret;
+  /// @endcode
+  /// Ret will be EQ type if T1 is same as T2 or NE type otherwise.
+  template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
+  struct PointerUnionTypeSelector {
+    typedef typename PointerUnionTypeSelectorReturn<RET_NE>::Return Return;
+  };
+
+  template <typename T, typename RET_EQ, typename RET_NE>
+  struct PointerUnionTypeSelector<T, T, RET_EQ, RET_NE> {
+    typedef typename PointerUnionTypeSelectorReturn<RET_EQ>::Return Return;
+  };
+
+  template <typename T1, typename T2, typename RET_EQ, typename RET_NE>
+  struct PointerUnionTypeSelectorReturn<
+                            PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE> > {
+    typedef typename PointerUnionTypeSelector<T1, T2, RET_EQ, RET_NE>::Return
+        Return;
+  };
+
+  /// Provide PointerLikeTypeTraits for void* that is used by PointerUnion
+  /// for the two template arguments.
+  template <typename PT1, typename PT2>
+  class PointerUnionUIntTraits {
+  public:
+    static inline void *getAsVoidPointer(void *P) { return P; }
+    static inline void *getFromVoidPointer(void *P) { return P; }
+    enum {
+      PT1BitsAv = PointerLikeTypeTraits<PT1>::NumLowBitsAvailable,
+      PT2BitsAv = PointerLikeTypeTraits<PT2>::NumLowBitsAvailable,
+      NumLowBitsAvailable = PT1BitsAv < PT2BitsAv ? PT1BitsAv : PT2BitsAv
+    };
+  };
+  
+  /// PointerUnion - This implements a discriminated union of two pointer types,
+  /// and keeps the discriminator bit-mangled into the low bits of the pointer.
+  /// This allows the implementation to be extremely efficient in space, but
+  /// permits a very natural and type-safe API.
+  ///
+  /// Common use patterns would be something like this:
+  ///    PointerUnion<int*, float*> P;
+  ///    P = (int*)0;
+  ///    printf("%d %d", P.is<int*>(), P.is<float*>());  // prints "1 0"
+  ///    X = P.get<int*>();     // ok.
+  ///    Y = P.get<float*>();   // runtime assertion failure.
+  ///    Z = P.get<double*>();  // runtime assertion failure (regardless of tag)
+  ///    P = (float*)0;
+  ///    Y = P.get<float*>();   // ok.
+  ///    X = P.get<int*>();     // runtime assertion failure.
+  template <typename PT1, typename PT2>
+  class PointerUnion {
+  public:
+    typedef PointerIntPair<void*, 1, bool, 
+                           PointerUnionUIntTraits<PT1,PT2> > ValTy;
+  private:
+    ValTy Val;
+
+    struct IsPT1 {
+      static const int Num = 0;
+    };
+    struct IsPT2 {
+      static const int Num = 1;
+    };
+    template <typename T>
+    struct UNION_DOESNT_CONTAIN_TYPE { };
+
+  public:
+    PointerUnion() {}
+    
+    PointerUnion(PT1 V) {
+      Val.setPointer(
+         const_cast<void *>(PointerLikeTypeTraits<PT1>::getAsVoidPointer(V)));
+      Val.setInt(0);
+    }
+    PointerUnion(PT2 V) {
+      Val.setPointer(
+         const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(V)));
+      Val.setInt(1);
+    }
+    
+    /// isNull - Return true if the pointer held in the union is null,
+    /// regardless of which type it is.
+    bool isNull() const { return Val.getPointer() == 0; }
+    operator bool() const { return !isNull(); }
+
+    /// is<T>() return true if the Union currently holds the type matching T.
+    template<typename T>
+    int is() const {
+      typedef typename
+        ::llvm::PointerUnionTypeSelector<PT1, T, IsPT1,
+          ::llvm::PointerUnionTypeSelector<PT2, T, IsPT2,
+                                    UNION_DOESNT_CONTAIN_TYPE<T> > >::Return Ty;
+      int TyNo = Ty::Num;
+      return static_cast<int>(Val.getInt()) == TyNo;
+    }
+    
+    /// get<T>() - Return the value of the specified pointer type. If the
+    /// specified pointer type is incorrect, assert.
+    template<typename T>
+    T get() const {
+      assert(is<T>() && "Invalid accessor called");
+      return PointerLikeTypeTraits<T>::getFromVoidPointer(Val.getPointer());
+    }
+    
+    /// dyn_cast<T>() - If the current value is of the specified pointer type,
+    /// return it, otherwise return null.
+    template<typename T>
+    T dyn_cast() const {
+      if (is<T>()) return get<T>();
+      return T();
+    }
+
+    /// \brief If the union is set to the first pointer type we can get an
+    /// address pointing to it.
+    template <typename T>
+    PT1 const *getAddrOf() const {
+      assert(is<PT1>() && "Val is not the first pointer");
+      assert(get<PT1>() == Val.getPointer() &&
+         "Can't get the address because PointerLikeTypeTraits changes the ptr");
+      T const *can_only_get_address_of_first_pointer_type
+                        = reinterpret_cast<PT1 const *>(Val.getAddrOfPointer());
+      return can_only_get_address_of_first_pointer_type;
+    }
+    
+    /// Assignment operators - Allow assigning into this union from either
+    /// pointer type, setting the discriminator to remember what it came from.
+    const PointerUnion &operator=(const PT1 &RHS) {
+      Val.setPointer(
+         const_cast<void *>(PointerLikeTypeTraits<PT1>::getAsVoidPointer(RHS)));
+      Val.setInt(0);
+      return *this;
+    }
+    const PointerUnion &operator=(const PT2 &RHS) {
+      Val.setPointer(
+        const_cast<void *>(PointerLikeTypeTraits<PT2>::getAsVoidPointer(RHS)));
+      Val.setInt(1);
+      return *this;
+    }
+    
+    void *getOpaqueValue() const { return Val.getOpaqueValue(); }
+    static inline PointerUnion getFromOpaqueValue(void *VP) {
+      PointerUnion V;
+      V.Val = ValTy::getFromOpaqueValue(VP);
+      return V;
+    }
+  };
+  
+  // Teach SmallPtrSet that PointerUnion is "basically a pointer", that has
+  // # low bits available = min(PT1bits,PT2bits)-1.
+  template<typename PT1, typename PT2>
+  class PointerLikeTypeTraits<PointerUnion<PT1, PT2> > {
+  public:
+    static inline void *
+    getAsVoidPointer(const PointerUnion<PT1, PT2> &P) {
+      return P.getOpaqueValue();
+    }
+    static inline PointerUnion<PT1, PT2>
+    getFromVoidPointer(void *P) {
+      return PointerUnion<PT1, PT2>::getFromOpaqueValue(P);
+    }
+    
+    // The number of bits available are the min of the two pointer types.
+    enum {
+      NumLowBitsAvailable = 
+        PointerLikeTypeTraits<typename PointerUnion<PT1,PT2>::ValTy>
+          ::NumLowBitsAvailable
+    };
+  };
+  
+  
+  /// PointerUnion3 - This is a pointer union of three pointer types.  See
+  /// documentation for PointerUnion for usage.
+  template <typename PT1, typename PT2, typename PT3>
+  class PointerUnion3 {
+  public:
+    typedef PointerUnion<PT1, PT2> InnerUnion;
+    typedef PointerUnion<InnerUnion, PT3> ValTy;
+  private:
+    ValTy Val;
+
+    struct IsInnerUnion {
+      ValTy Val;
+      IsInnerUnion(ValTy val) : Val(val) { }
+      template<typename T>
+      int is() const {
+        return Val.template is<InnerUnion>() && 
+               Val.template get<InnerUnion>().template is<T>();
+      }
+      template<typename T>
+      T get() const {
+        return Val.template get<InnerUnion>().template get<T>();
+      }
+    };
+
+    struct IsPT3 {
+      ValTy Val;
+      IsPT3(ValTy val) : Val(val) { }
+      template<typename T>
+      int is() const {
+        return Val.template is<T>();
+      }
+      template<typename T>
+      T get() const {
+        return Val.template get<T>();
+      }
+    };
+
+  public:
+    PointerUnion3() {}
+    
+    PointerUnion3(PT1 V) {
+      Val = InnerUnion(V);
+    }
+    PointerUnion3(PT2 V) {
+      Val = InnerUnion(V);
+    }
+    PointerUnion3(PT3 V) {
+      Val = V;
+    }
+    
+    /// isNull - Return true if the pointer held in the union is null,
+    /// regardless of which type it is.
+    bool isNull() const { return Val.isNull(); }
+    operator bool() const { return !isNull(); }
+    
+    /// is<T>() return true if the Union currently holds the type matching T.
+    template<typename T>
+    int is() const {
+      // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3.
+      typedef typename
+        ::llvm::PointerUnionTypeSelector<PT1, T, IsInnerUnion,
+          ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3 >
+                                                                   >::Return Ty;
+      return Ty(Val).is<T>();
+    }
+    
+    /// get<T>() - Return the value of the specified pointer type. If the
+    /// specified pointer type is incorrect, assert.
+    template<typename T>
+    T get() const {
+      assert(is<T>() && "Invalid accessor called");
+      // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3.
+      typedef typename
+        ::llvm::PointerUnionTypeSelector<PT1, T, IsInnerUnion,
+          ::llvm::PointerUnionTypeSelector<PT2, T, IsInnerUnion, IsPT3 >
+                                                                   >::Return Ty;
+      return Ty(Val).get<T>();
+    }
+    
+    /// dyn_cast<T>() - If the current value is of the specified pointer type,
+    /// return it, otherwise return null.
+    template<typename T>
+    T dyn_cast() const {
+      if (is<T>()) return get<T>();
+      return T();
+    }
+    
+    /// Assignment operators - Allow assigning into this union from either
+    /// pointer type, setting the discriminator to remember what it came from.
+    const PointerUnion3 &operator=(const PT1 &RHS) {
+      Val = InnerUnion(RHS);
+      return *this;
+    }
+    const PointerUnion3 &operator=(const PT2 &RHS) {
+      Val = InnerUnion(RHS);
+      return *this;
+    }
+    const PointerUnion3 &operator=(const PT3 &RHS) {
+      Val = RHS;
+      return *this;
+    }
+    
+    void *getOpaqueValue() const { return Val.getOpaqueValue(); }
+    static inline PointerUnion3 getFromOpaqueValue(void *VP) {
+      PointerUnion3 V;
+      V.Val = ValTy::getFromOpaqueValue(VP);
+      return V;
+    }
+  };
+ 
+  // Teach SmallPtrSet that PointerUnion3 is "basically a pointer", that has
+  // # low bits available = min(PT1bits,PT2bits,PT2bits)-2.
+  template<typename PT1, typename PT2, typename PT3>
+  class PointerLikeTypeTraits<PointerUnion3<PT1, PT2, PT3> > {
+  public:
+    static inline void *
+    getAsVoidPointer(const PointerUnion3<PT1, PT2, PT3> &P) {
+      return P.getOpaqueValue();
+    }
+    static inline PointerUnion3<PT1, PT2, PT3>
+    getFromVoidPointer(void *P) {
+      return PointerUnion3<PT1, PT2, PT3>::getFromOpaqueValue(P);
+    }
+    
+    // The number of bits available are the min of the two pointer types.
+    enum {
+      NumLowBitsAvailable = 
+        PointerLikeTypeTraits<typename PointerUnion3<PT1, PT2, PT3>::ValTy>
+          ::NumLowBitsAvailable
+    };
+  };
+
+  /// PointerUnion4 - This is a pointer union of four pointer types.  See
+  /// documentation for PointerUnion for usage.
+  template <typename PT1, typename PT2, typename PT3, typename PT4>
+  class PointerUnion4 {
+  public:
+    typedef PointerUnion<PT1, PT2> InnerUnion1;
+    typedef PointerUnion<PT3, PT4> InnerUnion2;
+    typedef PointerUnion<InnerUnion1, InnerUnion2> ValTy;
+  private:
+    ValTy Val;
+  public:
+    PointerUnion4() {}
+    
+    PointerUnion4(PT1 V) {
+      Val = InnerUnion1(V);
+    }
+    PointerUnion4(PT2 V) {
+      Val = InnerUnion1(V);
+    }
+    PointerUnion4(PT3 V) {
+      Val = InnerUnion2(V);
+    }
+    PointerUnion4(PT4 V) {
+      Val = InnerUnion2(V);
+    }
+    
+    /// isNull - Return true if the pointer held in the union is null,
+    /// regardless of which type it is.
+    bool isNull() const { return Val.isNull(); }
+    operator bool() const { return !isNull(); }
+    
+    /// is<T>() return true if the Union currently holds the type matching T.
+    template<typename T>
+    int is() const {
+      // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2.
+      typedef typename
+        ::llvm::PointerUnionTypeSelector<PT1, T, InnerUnion1,
+          ::llvm::PointerUnionTypeSelector<PT2, T, InnerUnion1, InnerUnion2 >
+                                                                   >::Return Ty;
+      return Val.template is<Ty>() && 
+             Val.template get<Ty>().template is<T>();
+    }
+    
+    /// get<T>() - Return the value of the specified pointer type. If the
+    /// specified pointer type is incorrect, assert.
+    template<typename T>
+    T get() const {
+      assert(is<T>() && "Invalid accessor called");
+      // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2.
+      typedef typename
+        ::llvm::PointerUnionTypeSelector<PT1, T, InnerUnion1,
+          ::llvm::PointerUnionTypeSelector<PT2, T, InnerUnion1, InnerUnion2 >
+                                                                   >::Return Ty;
+      return Val.template get<Ty>().template get<T>();
+    }
+    
+    /// dyn_cast<T>() - If the current value is of the specified pointer type,
+    /// return it, otherwise return null.
+    template<typename T>
+    T dyn_cast() const {
+      if (is<T>()) return get<T>();
+      return T();
+    }
+    
+    /// Assignment operators - Allow assigning into this union from either
+    /// pointer type, setting the discriminator to remember what it came from.
+    const PointerUnion4 &operator=(const PT1 &RHS) {
+      Val = InnerUnion1(RHS);
+      return *this;
+    }
+    const PointerUnion4 &operator=(const PT2 &RHS) {
+      Val = InnerUnion1(RHS);
+      return *this;
+    }
+    const PointerUnion4 &operator=(const PT3 &RHS) {
+      Val = InnerUnion2(RHS);
+      return *this;
+    }
+    const PointerUnion4 &operator=(const PT4 &RHS) {
+      Val = InnerUnion2(RHS);
+      return *this;
+    }
+    
+    void *getOpaqueValue() const { return Val.getOpaqueValue(); }
+    static inline PointerUnion4 getFromOpaqueValue(void *VP) {
+      PointerUnion4 V;
+      V.Val = ValTy::getFromOpaqueValue(VP);
+      return V;
+    }
+  };
+  
+  // Teach SmallPtrSet that PointerUnion4 is "basically a pointer", that has
+  // # low bits available = min(PT1bits,PT2bits,PT2bits)-2.
+  template<typename PT1, typename PT2, typename PT3, typename PT4>
+  class PointerLikeTypeTraits<PointerUnion4<PT1, PT2, PT3, PT4> > {
+  public:
+    static inline void *
+    getAsVoidPointer(const PointerUnion4<PT1, PT2, PT3, PT4> &P) {
+      return P.getOpaqueValue();
+    }
+    static inline PointerUnion4<PT1, PT2, PT3, PT4>
+    getFromVoidPointer(void *P) {
+      return PointerUnion4<PT1, PT2, PT3, PT4>::getFromOpaqueValue(P);
+    }
+    
+    // The number of bits available are the min of the two pointer types.
+    enum {
+      NumLowBitsAvailable = 
+        PointerLikeTypeTraits<typename PointerUnion4<PT1, PT2, PT3, PT4>::ValTy>
+          ::NumLowBitsAvailable
+    };
+  };
+}
+
+#endif
diff --git a/final/include/llvm/ADT/PostOrderIterator.h b/final/include/llvm/ADT/PostOrderIterator.h
new file mode 100644
index 00000000000..e3b499488d0
--- /dev/null
+++ b/final/include/llvm/ADT/PostOrderIterator.h
@@ -0,0 +1,230 @@
+//===- llvm/ADT/PostOrderIterator.h - PostOrder iterator --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file builds on the ADT/GraphTraits.h file to build a generic graph
+// post order iterator.  This should work over any graph type that has a
+// GraphTraits specialization.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_POSTORDERITERATOR_H
+#define LLVM_ADT_POSTORDERITERATOR_H
+
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <set>
+#include <vector>
+
+namespace llvm {
+
+template<class SetType, bool External>   // Non-external set
+class po_iterator_storage {
+public:
+  SetType Visited;
+};
+
+template<class SetType>
+class po_iterator_storage<SetType, true> {
+public:
+  po_iterator_storage(SetType &VSet) : Visited(VSet) {}
+  po_iterator_storage(const po_iterator_storage &S) : Visited(S.Visited) {}
+  SetType &Visited;
+};
+
+template<class GraphT,
+  class SetType = llvm::SmallPtrSet<typename GraphTraits<GraphT>::NodeType*, 8>,
+  bool ExtStorage = false,
+  class GT = GraphTraits<GraphT> >
+class po_iterator : public std::iterator<std::forward_iterator_tag,
+                                         typename GT::NodeType, ptrdiff_t>,
+                    public po_iterator_storage<SetType, ExtStorage> {
+  typedef std::iterator<std::forward_iterator_tag,
+                        typename GT::NodeType, ptrdiff_t> super;
+  typedef typename GT::NodeType          NodeType;
+  typedef typename GT::ChildIteratorType ChildItTy;
+
+  // VisitStack - Used to maintain the ordering.  Top = current block
+  // First element is basic block pointer, second is the 'next child' to visit
+  std::vector<std::pair<NodeType *, ChildItTy> > VisitStack;
+
+  void traverseChild() {
+    while (VisitStack.back().second != GT::child_end(VisitStack.back().first)) {
+      NodeType *BB = *VisitStack.back().second++;
+      if (this->Visited.insert(BB)) {  // If the block is not visited...
+        VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB)));
+      }
+    }
+  }
+
+  inline po_iterator(NodeType *BB) {
+    this->Visited.insert(BB);
+    VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB)));
+    traverseChild();
+  }
+  inline po_iterator() {} // End is when stack is empty.
+
+  inline po_iterator(NodeType *BB, SetType &S) :
+    po_iterator_storage<SetType, ExtStorage>(S) {
+    if (this->Visited.insert(BB)) {
+      VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB)));
+      traverseChild();
+    }
+  }
+
+  inline po_iterator(SetType &S) :
+      po_iterator_storage<SetType, ExtStorage>(S) {
+  } // End is when stack is empty.
+public:
+  typedef typename super::pointer pointer;
+  typedef po_iterator<GraphT, SetType, ExtStorage, GT> _Self;
+
+  // Provide static "constructors"...
+  static inline _Self begin(GraphT G) { return _Self(GT::getEntryNode(G)); }
+  static inline _Self end  (GraphT G) { return _Self(); }
+
+  static inline _Self begin(GraphT G, SetType &S) {
+    return _Self(GT::getEntryNode(G), S);
+  }
+  static inline _Self end  (GraphT G, SetType &S) { return _Self(S); }
+
+  inline bool operator==(const _Self& x) const {
+    return VisitStack == x.VisitStack;
+  }
+  inline bool operator!=(const _Self& x) const { return !operator==(x); }
+
+  inline pointer operator*() const {
+    return VisitStack.back().first;
+  }
+
+  // This is a nonstandard operator-> that dereferences the pointer an extra
+  // time... so that you can actually call methods ON the BasicBlock, because
+  // the contained type is a pointer.  This allows BBIt->getTerminator() f.e.
+  //
+  inline NodeType *operator->() const { return operator*(); }
+
+  inline _Self& operator++() {   // Preincrement
+    VisitStack.pop_back();
+    if (!VisitStack.empty())
+      traverseChild();
+    return *this;
+  }
+
+  inline _Self operator++(int) { // Postincrement
+    _Self tmp = *this; ++*this; return tmp;
+  }
+};
+
+// Provide global constructors that automatically figure out correct types...
+//
+template <class T>
+po_iterator<T> po_begin(T G) { return po_iterator<T>::begin(G); }
+template <class T>
+po_iterator<T> po_end  (T G) { return po_iterator<T>::end(G); }
+
+// Provide global definitions of external postorder iterators...
+template<class T, class SetType=std::set<typename GraphTraits<T>::NodeType*> >
+struct po_ext_iterator : public po_iterator<T, SetType, true> {
+  po_ext_iterator(const po_iterator<T, SetType, true> &V) :
+  po_iterator<T, SetType, true>(V) {}
+};
+
+template<class T, class SetType>
+po_ext_iterator<T, SetType> po_ext_begin(T G, SetType &S) {
+  return po_ext_iterator<T, SetType>::begin(G, S);
+}
+
+template<class T, class SetType>
+po_ext_iterator<T, SetType> po_ext_end(T G, SetType &S) {
+  return po_ext_iterator<T, SetType>::end(G, S);
+}
+
+// Provide global definitions of inverse post order iterators...
+template <class T,
+          class SetType = std::set<typename GraphTraits<T>::NodeType*>,
+          bool External = false>
+struct ipo_iterator : public po_iterator<Inverse<T>, SetType, External > {
+  ipo_iterator(const po_iterator<Inverse<T>, SetType, External> &V) :
+     po_iterator<Inverse<T>, SetType, External> (V) {}
+};
+
+template <class T>
+ipo_iterator<T> ipo_begin(T G, bool Reverse = false) {
+  return ipo_iterator<T>::begin(G, Reverse);
+}
+
+template <class T>
+ipo_iterator<T> ipo_end(T G){
+  return ipo_iterator<T>::end(G);
+}
+
+//Provide global definitions of external inverse postorder iterators...
+template <class T,
+          class SetType = std::set<typename GraphTraits<T>::NodeType*> >
+struct ipo_ext_iterator : public ipo_iterator<T, SetType, true> {
+  ipo_ext_iterator(const ipo_iterator<T, SetType, true> &V) :
+    ipo_iterator<T, SetType, true>(&V) {}
+  ipo_ext_iterator(const po_iterator<Inverse<T>, SetType, true> &V) :
+    ipo_iterator<T, SetType, true>(&V) {}
+};
+
+template <class T, class SetType>
+ipo_ext_iterator<T, SetType> ipo_ext_begin(T G, SetType &S) {
+  return ipo_ext_iterator<T, SetType>::begin(G, S);
+}
+
+template <class T, class SetType>
+ipo_ext_iterator<T, SetType> ipo_ext_end(T G, SetType &S) {
+  return ipo_ext_iterator<T, SetType>::end(G, S);
+}
+
+//===--------------------------------------------------------------------===//
+// Reverse Post Order CFG iterator code
+//===--------------------------------------------------------------------===//
+//
+// This is used to visit basic blocks in a method in reverse post order.  This
+// class is awkward to use because I don't know a good incremental algorithm to
+// computer RPO from a graph.  Because of this, the construction of the
+// ReversePostOrderTraversal object is expensive (it must walk the entire graph
+// with a postorder iterator to build the data structures).  The moral of this
+// story is: Don't create more ReversePostOrderTraversal classes than necessary.
+//
+// This class should be used like this:
+// {
+//   ReversePostOrderTraversal<Function*> RPOT(FuncPtr); // Expensive to create
+//   for (rpo_iterator I = RPOT.begin(); I != RPOT.end(); ++I) {
+//      ...
+//   }
+//   for (rpo_iterator I = RPOT.begin(); I != RPOT.end(); ++I) {
+//      ...
+//   }
+// }
+//
+
+template<class GraphT, class GT = GraphTraits<GraphT> >
+class ReversePostOrderTraversal {
+  typedef typename GT::NodeType NodeType;
+  std::vector<NodeType*> Blocks;       // Block list in normal PO order
+  inline void Initialize(NodeType *BB) {
+    copy(po_begin(BB), po_end(BB), back_inserter(Blocks));
+  }
+public:
+  typedef typename std::vector<NodeType*>::reverse_iterator rpo_iterator;
+
+  inline ReversePostOrderTraversal(GraphT G) {
+    Initialize(GT::getEntryNode(G));
+  }
+
+  // Because we want a reverse post order, use reverse iterators from the vector
+  inline rpo_iterator begin() { return Blocks.rbegin(); }
+  inline rpo_iterator end()   { return Blocks.rend(); }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/ADT/PriorityQueue.h b/final/include/llvm/ADT/PriorityQueue.h
new file mode 100644
index 00000000000..bf8a6870816
--- /dev/null
+++ b/final/include/llvm/ADT/PriorityQueue.h
@@ -0,0 +1,84 @@
+//===- llvm/ADT/PriorityQueue.h - Priority queues ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PriorityQueue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_PRIORITY_QUEUE_H
+#define LLVM_ADT_PRIORITY_QUEUE_H
+
+#include <algorithm>
+#include <queue>
+
+namespace llvm {
+
+/// PriorityQueue - This class behaves like std::priority_queue and
+/// provides a few additional convenience functions.
+///
+template<class T,
+         class Sequence = std::vector<T>,
+         class Compare = std::less<typename Sequence::value_type> >
+class PriorityQueue : public std::priority_queue<T, Sequence, Compare> {
+public:
+  explicit PriorityQueue(const Compare &compare = Compare(),
+                         const Sequence &sequence = Sequence())
+    : std::priority_queue<T, Sequence, Compare>(compare, sequence)
+  {}
+
+  template<class Iterator>
+  PriorityQueue(Iterator begin, Iterator end,
+                const Compare &compare = Compare(),
+                const Sequence &sequence = Sequence())
+    : std::priority_queue<T, Sequence, Compare>(begin, end, compare, sequence)
+  {}
+
+  /// erase_one - Erase one element from the queue, regardless of its
+  /// position. This operation performs a linear search to find an element
+  /// equal to t, but then uses all logarithmic-time algorithms to do
+  /// the erase operation.
+  ///
+  void erase_one(const T &t) {
+    // Linear-search to find the element.
+    typename Sequence::size_type i =
+      std::find(this->c.begin(), this->c.end(), t) - this->c.begin();
+
+    // Logarithmic-time heap bubble-up.
+    while (i != 0) {
+      typename Sequence::size_type parent = (i - 1) / 2;
+      this->c[i] = this->c[parent];
+      i = parent;
+    }
+
+    // The element we want to remove is now at the root, so we can use
+    // priority_queue's plain pop to remove it.
+    this->pop();
+  }
+
+  /// reheapify - If an element in the queue has changed in a way that
+  /// affects its standing in the comparison function, the queue's
+  /// internal state becomes invalid. Calling reheapify() resets the
+  /// queue's state, making it valid again. This operation has time
+  /// complexity proportional to the number of elements in the queue,
+  /// so don't plan to use it a lot.
+  ///
+  void reheapify() {
+    std::make_heap(this->c.begin(), this->c.end(), this->comp);
+  }
+
+  /// clear - Erase all elements from the queue.
+  ///
+  void clear() {
+    this->c.clear();
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/ADT/SCCIterator.h b/final/include/llvm/ADT/SCCIterator.h
new file mode 100644
index 00000000000..3e93cfe914f
--- /dev/null
+++ b/final/include/llvm/ADT/SCCIterator.h
@@ -0,0 +1,220 @@
+//===-- Support/SCCIterator.h - Strongly Connected Comp. Iter. --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected
+// components (SCCs) of a graph in O(N+E) time using Tarjan's DFS algorithm.
+//
+// The SCC iterator has the important property that if a node in SCC S1 has an
+// edge to a node in SCC S2, then it visits S1 *after* S2.
+//
+// To visit S1 *before* S2, use the scc_iterator on the Inverse graph.
+// (NOTE: This requires some simple wrappers and is not supported yet.)
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SCCITERATOR_H
+#define LLVM_ADT_SCCITERATOR_H
+
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/DenseMap.h"
+#include <vector>
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+///
+/// scc_iterator - Enumerate the SCCs of a directed graph, in
+/// reverse topological order of the SCC DAG.
+///
+template<class GraphT, class GT = GraphTraits<GraphT> >
+class scc_iterator
+  : public std::iterator<std::forward_iterator_tag,
+                         std::vector<typename GT::NodeType>, ptrdiff_t> {
+  typedef typename GT::NodeType          NodeType;
+  typedef typename GT::ChildIteratorType ChildItTy;
+  typedef std::vector<NodeType*> SccTy;
+  typedef std::iterator<std::forward_iterator_tag,
+                        std::vector<typename GT::NodeType>, ptrdiff_t> super;
+  typedef typename super::reference reference;
+  typedef typename super::pointer pointer;
+
+  // The visit counters used to detect when a complete SCC is on the stack.
+  // visitNum is the global counter.
+  // nodeVisitNumbers are per-node visit numbers, also used as DFS flags.
+  unsigned visitNum;
+  DenseMap<NodeType *, unsigned> nodeVisitNumbers;
+
+  // SCCNodeStack - Stack holding nodes of the SCC.
+  std::vector<NodeType *> SCCNodeStack;
+
+  // CurrentSCC - The current SCC, retrieved using operator*().
+  SccTy CurrentSCC;
+
+  // VisitStack - Used to maintain the ordering.  Top = current block
+  // First element is basic block pointer, second is the 'next child' to visit
+  std::vector<std::pair<NodeType *, ChildItTy> > VisitStack;
+
+  // MinVisitNumStack - Stack holding the "min" values for each node in the DFS.
+  // This is used to track the minimum uplink values for all children of
+  // the corresponding node on the VisitStack.
+  std::vector<unsigned> MinVisitNumStack;
+
+  // A single "visit" within the non-recursive DFS traversal.
+  void DFSVisitOne(NodeType *N) {
+    ++visitNum;                         // Global counter for the visit order
+    nodeVisitNumbers[N] = visitNum;
+    SCCNodeStack.push_back(N);
+    MinVisitNumStack.push_back(visitNum);
+    VisitStack.push_back(std::make_pair(N, GT::child_begin(N)));
+    //dbgs() << "TarjanSCC: Node " << N <<
+    //      " : visitNum = " << visitNum << "\n";
+  }
+
+  // The stack-based DFS traversal; defined below.
+  void DFSVisitChildren() {
+    assert(!VisitStack.empty());
+    while (VisitStack.back().second != GT::child_end(VisitStack.back().first)) {
+      // TOS has at least one more child so continue DFS
+      NodeType *childN = *VisitStack.back().second++;
+      if (!nodeVisitNumbers.count(childN)) {
+        // this node has never been seen.
+        DFSVisitOne(childN);
+        continue;
+      }
+      
+      unsigned childNum = nodeVisitNumbers[childN];
+      if (MinVisitNumStack.back() > childNum)
+        MinVisitNumStack.back() = childNum;
+    }
+  }
+
+  // Compute the next SCC using the DFS traversal.
+  void GetNextSCC() {
+    assert(VisitStack.size() == MinVisitNumStack.size());
+    CurrentSCC.clear();                 // Prepare to compute the next SCC
+    while (!VisitStack.empty()) {
+      DFSVisitChildren();
+      assert(VisitStack.back().second ==GT::child_end(VisitStack.back().first));
+      NodeType *visitingN = VisitStack.back().first;
+      unsigned minVisitNum = MinVisitNumStack.back();
+      VisitStack.pop_back();
+      MinVisitNumStack.pop_back();
+      if (!MinVisitNumStack.empty() && MinVisitNumStack.back() > minVisitNum)
+        MinVisitNumStack.back() = minVisitNum;
+
+      //dbgs() << "TarjanSCC: Popped node " << visitingN <<
+      //      " : minVisitNum = " << minVisitNum << "; Node visit num = " <<
+      //      nodeVisitNumbers[visitingN] << "\n";
+
+      if (minVisitNum != nodeVisitNumbers[visitingN])
+        continue;
+      
+      // A full SCC is on the SCCNodeStack!  It includes all nodes below
+      // visitingN on the stack.  Copy those nodes to CurrentSCC,
+      // reset their minVisit values, and return (this suspends
+      // the DFS traversal till the next ++).
+      do {
+        CurrentSCC.push_back(SCCNodeStack.back());
+        SCCNodeStack.pop_back();
+        nodeVisitNumbers[CurrentSCC.back()] = ~0U;
+      } while (CurrentSCC.back() != visitingN);
+      return;
+    }
+  }
+
+  inline scc_iterator(NodeType *entryN) : visitNum(0) {
+    DFSVisitOne(entryN);
+    GetNextSCC();
+  }
+  inline scc_iterator() { /* End is when DFS stack is empty */ }
+
+public:
+  typedef scc_iterator<GraphT, GT> _Self;
+
+  // Provide static "constructors"...
+  static inline _Self begin(const GraphT &G){return _Self(GT::getEntryNode(G));}
+  static inline _Self end  (const GraphT &G) { return _Self(); }
+
+  // Direct loop termination test: I.isAtEnd() is more efficient than I == end()
+  inline bool isAtEnd() const {
+    assert(!CurrentSCC.empty() || VisitStack.empty());
+    return CurrentSCC.empty();
+  }
+
+  inline bool operator==(const _Self& x) const {
+    return VisitStack == x.VisitStack && CurrentSCC == x.CurrentSCC;
+  }
+  inline bool operator!=(const _Self& x) const { return !operator==(x); }
+
+  // Iterator traversal: forward iteration only
+  inline _Self& operator++() {          // Preincrement
+    GetNextSCC();
+    return *this;
+  }
+  inline _Self operator++(int) {        // Postincrement
+    _Self tmp = *this; ++*this; return tmp;
+  }
+
+  // Retrieve a reference to the current SCC
+  inline const SccTy &operator*() const {
+    assert(!CurrentSCC.empty() && "Dereferencing END SCC iterator!");
+    return CurrentSCC;
+  }
+  inline SccTy &operator*() {
+    assert(!CurrentSCC.empty() && "Dereferencing END SCC iterator!");
+    return CurrentSCC;
+  }
+
+  // hasLoop() -- Test if the current SCC has a loop.  If it has more than one
+  // node, this is trivially true.  If not, it may still contain a loop if the
+  // node has an edge back to itself.
+  bool hasLoop() const {
+    assert(!CurrentSCC.empty() && "Dereferencing END SCC iterator!");
+    if (CurrentSCC.size() > 1) return true;
+    NodeType *N = CurrentSCC.front();
+    for (ChildItTy CI = GT::child_begin(N), CE=GT::child_end(N); CI != CE; ++CI)
+      if (*CI == N)
+        return true;
+    return false;
+  }
+                           
+  /// ReplaceNode - This informs the scc_iterator that the specified Old node
+  /// has been deleted, and New is to be used in its place.
+  void ReplaceNode(NodeType *Old, NodeType *New) {
+    assert(nodeVisitNumbers.count(Old) && "Old not in scc_iterator?");
+    nodeVisitNumbers[New] = nodeVisitNumbers[Old];
+    nodeVisitNumbers.erase(Old);
+  }
+};
+
+
+// Global constructor for the SCC iterator.
+template <class T>
+scc_iterator<T> scc_begin(const T &G) {
+  return scc_iterator<T>::begin(G);
+}
+
+template <class T>
+scc_iterator<T> scc_end(const T &G) {
+  return scc_iterator<T>::end(G);
+}
+
+template <class T>
+scc_iterator<Inverse<T> > scc_begin(const Inverse<T> &G) {
+  return scc_iterator<Inverse<T> >::begin(G);
+}
+
+template <class T>
+scc_iterator<Inverse<T> > scc_end(const Inverse<T> &G) {
+  return scc_iterator<Inverse<T> >::end(G);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/ADT/STLExtras.h b/final/include/llvm/ADT/STLExtras.h
new file mode 100644
index 00000000000..0b0346be2cc
--- /dev/null
+++ b/final/include/llvm/ADT/STLExtras.h
@@ -0,0 +1,306 @@
+//===- llvm/ADT/STLExtras.h - Useful STL related functions ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some templates that are useful if you are working with the
+// STL at all.
+//
+// No library is required when using these functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_STLEXTRAS_H
+#define LLVM_ADT_STLEXTRAS_H
+
+#include <cstddef> // for std::size_t
+#include <cstdlib> // for qsort
+#include <functional>
+#include <iterator>
+#include <utility> // for std::pair
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//     Extra additions to <functional>
+//===----------------------------------------------------------------------===//
+
+template<class Ty>
+struct less_ptr : public std::binary_function<Ty, Ty, bool> {
+  bool operator()(const Ty* left, const Ty* right) const {
+    return *left < *right;
+  }
+};
+
+template<class Ty>
+struct greater_ptr : public std::binary_function<Ty, Ty, bool> {
+  bool operator()(const Ty* left, const Ty* right) const {
+    return *right < *left;
+  }
+};
+
+// deleter - Very very very simple method that is used to invoke operator
+// delete on something.  It is used like this:
+//
+//   for_each(V.begin(), B.end(), deleter<Interval>);
+//
+template <class T>
+static inline void deleter(T *Ptr) {
+  delete Ptr;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//     Extra additions to <iterator>
+//===----------------------------------------------------------------------===//
+
+// mapped_iterator - This is a simple iterator adapter that causes a function to
+// be dereferenced whenever operator* is invoked on the iterator.
+//
+template <class RootIt, class UnaryFunc>
+class mapped_iterator {
+  RootIt current;
+  UnaryFunc Fn;
+public:
+  typedef typename std::iterator_traits<RootIt>::iterator_category
+          iterator_category;
+  typedef typename std::iterator_traits<RootIt>::difference_type
+          difference_type;
+  typedef typename UnaryFunc::result_type value_type;
+
+  typedef void pointer;
+  //typedef typename UnaryFunc::result_type *pointer;
+  typedef void reference;        // Can't modify value returned by fn
+
+  typedef RootIt iterator_type;
+  typedef mapped_iterator<RootIt, UnaryFunc> _Self;
+
+  inline const RootIt &getCurrent() const { return current; }
+  inline const UnaryFunc &getFunc() const { return Fn; }
+
+  inline explicit mapped_iterator(const RootIt &I, UnaryFunc F)
+    : current(I), Fn(F) {}
+  inline mapped_iterator(const mapped_iterator &It)
+    : current(It.current), Fn(It.Fn) {}
+
+  inline value_type operator*() const {   // All this work to do this
+    return Fn(*current);         // little change
+  }
+
+  _Self& operator++() { ++current; return *this; }
+  _Self& operator--() { --current; return *this; }
+  _Self  operator++(int) { _Self __tmp = *this; ++current; return __tmp; }
+  _Self  operator--(int) { _Self __tmp = *this; --current; return __tmp; }
+  _Self  operator+    (difference_type n) const {
+    return _Self(current + n, Fn);
+  }
+  _Self& operator+=   (difference_type n) { current += n; return *this; }
+  _Self  operator-    (difference_type n) const {
+    return _Self(current - n, Fn);
+  }
+  _Self& operator-=   (difference_type n) { current -= n; return *this; }
+  reference operator[](difference_type n) const { return *(*this + n); }
+
+  inline bool operator!=(const _Self &X) const { return !operator==(X); }
+  inline bool operator==(const _Self &X) const { return current == X.current; }
+  inline bool operator< (const _Self &X) const { return current <  X.current; }
+
+  inline difference_type operator-(const _Self &X) const {
+    return current - X.current;
+  }
+};
+
+template <class _Iterator, class Func>
+inline mapped_iterator<_Iterator, Func>
+operator+(typename mapped_iterator<_Iterator, Func>::difference_type N,
+          const mapped_iterator<_Iterator, Func>& X) {
+  return mapped_iterator<_Iterator, Func>(X.getCurrent() - N, X.getFunc());
+}
+
+
+// map_iterator - Provide a convenient way to create mapped_iterators, just like
+// make_pair is useful for creating pairs...
+//
+template <class ItTy, class FuncTy>
+inline mapped_iterator<ItTy, FuncTy> map_iterator(const ItTy &I, FuncTy F) {
+  return mapped_iterator<ItTy, FuncTy>(I, F);
+}
+
+
+// next/prior - These functions unlike std::advance do not modify the
+// passed iterator but return a copy.
+//
+// next(myIt) returns copy of myIt incremented once
+// next(myIt, n) returns copy of myIt incremented n times
+// prior(myIt) returns copy of myIt decremented once
+// prior(myIt, n) returns copy of myIt decremented n times
+
+template <typename ItTy, typename Dist>
+inline ItTy next(ItTy it, Dist n)
+{
+  std::advance(it, n);
+  return it;
+}
+
+template <typename ItTy>
+inline ItTy next(ItTy it)
+{
+  return ++it;
+}
+
+template <typename ItTy, typename Dist>
+inline ItTy prior(ItTy it, Dist n)
+{
+  std::advance(it, -n);
+  return it;
+}
+
+template <typename ItTy>
+inline ItTy prior(ItTy it)
+{
+  return --it;
+}
+
+//===----------------------------------------------------------------------===//
+//     Extra additions to <utility>
+//===----------------------------------------------------------------------===//
+
+// tie - this function ties two objects and returns a temporary object
+// that is assignable from a std::pair. This can be used to make code
+// more readable when using values returned from functions bundled in
+// a std::pair. Since an example is worth 1000 words:
+//
+// typedef std::map<int, int> Int2IntMap;
+//
+// Int2IntMap myMap;
+// Int2IntMap::iterator where;
+// bool inserted;
+// tie(where, inserted) = myMap.insert(std::make_pair(123,456));
+//
+// if (inserted)
+//   // do stuff
+// else
+//   // do other stuff
+
+namespace
+{
+  template <typename T1, typename T2>
+  struct tier {
+    typedef T1 &first_type;
+    typedef T2 &second_type;
+
+    first_type first;
+    second_type second;
+
+    tier(first_type f, second_type s) : first(f), second(s) { }
+    tier& operator=(const std::pair<T1, T2>& p) {
+      first = p.first;
+      second = p.second;
+      return *this;
+    }
+  };
+}
+
+template <typename T1, typename T2>
+inline tier<T1, T2> tie(T1& f, T2& s) {
+  return tier<T1, T2>(f, s);
+}
+
+//===----------------------------------------------------------------------===//
+//     Extra additions for arrays
+//===----------------------------------------------------------------------===//
+
+/// Find where an array ends (for ending iterators)
+/// This returns a pointer to the byte immediately
+/// after the end of an array.
+template<class T, std::size_t N>
+inline T *array_endof(T (&x)[N]) {
+  return x+N;
+}
+
+/// Find the length of an array.
+template<class T, std::size_t N>
+inline size_t array_lengthof(T (&)[N]) {
+  return N;
+}
+
+/// array_pod_sort_comparator - This is helper function for array_pod_sort,
+/// which just uses operator< on T.
+template<typename T>
+static inline int array_pod_sort_comparator(const void *P1, const void *P2) {
+  if (*reinterpret_cast<const T*>(P1) < *reinterpret_cast<const T*>(P2))
+    return -1;
+  if (*reinterpret_cast<const T*>(P2) < *reinterpret_cast<const T*>(P1))
+    return 1;
+  return 0;
+}
+
+/// get_array_pad_sort_comparator - This is an internal helper function used to
+/// get type deduction of T right.
+template<typename T>
+static int (*get_array_pad_sort_comparator(const T &))
+             (const void*, const void*) {
+  return array_pod_sort_comparator<T>;
+}
+
+
+/// array_pod_sort - This sorts an array with the specified start and end
+/// extent.  This is just like std::sort, except that it calls qsort instead of
+/// using an inlined template.  qsort is slightly slower than std::sort, but
+/// most sorts are not performance critical in LLVM and std::sort has to be
+/// template instantiated for each type, leading to significant measured code
+/// bloat.  This function should generally be used instead of std::sort where
+/// possible.
+///
+/// This function assumes that you have simple POD-like types that can be
+/// compared with operator< and can be moved with memcpy.  If this isn't true,
+/// you should use std::sort.
+///
+/// NOTE: If qsort_r were portable, we could allow a custom comparator and
+/// default to std::less.
+template<class IteratorTy>
+static inline void array_pod_sort(IteratorTy Start, IteratorTy End) {
+  // Don't dereference start iterator of empty sequence.
+  if (Start == End) return;
+  qsort(&*Start, End-Start, sizeof(*Start),
+        get_array_pad_sort_comparator(*Start));
+}
+
+template<class IteratorTy>
+static inline void array_pod_sort(IteratorTy Start, IteratorTy End,
+                                  int (*Compare)(const void*, const void*)) {
+  // Don't dereference start iterator of empty sequence.
+  if (Start == End) return;
+  qsort(&*Start, End-Start, sizeof(*Start), Compare);
+}
+  
+//===----------------------------------------------------------------------===//
+//     Extra additions to <algorithm>
+//===----------------------------------------------------------------------===//
+
+/// For a container of pointers, deletes the pointers and then clears the
+/// container.
+template<typename Container>
+void DeleteContainerPointers(Container &C) {
+  for (typename Container::iterator I = C.begin(), E = C.end(); I != E; ++I)
+    delete *I;
+  C.clear();
+}
+
+/// In a container of pairs (usually a map) whose second element is a pointer,
+/// deletes the second elements and then clears the container.
+template<typename Container>
+void DeleteContainerSeconds(Container &C) {
+  for (typename Container::iterator I = C.begin(), E = C.end(); I != E; ++I)
+    delete I->second;
+  C.clear();
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/ADT/ScopedHashTable.h b/final/include/llvm/ADT/ScopedHashTable.h
new file mode 100644
index 00000000000..a6803ee0edd
--- /dev/null
+++ b/final/include/llvm/ADT/ScopedHashTable.h
@@ -0,0 +1,256 @@
+//===- ScopedHashTable.h - A simple scoped hash table ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an efficient scoped hash table, which is useful for
+// things like dominator-based optimizations.  This allows clients to do things
+// like this:
+//
+//  ScopedHashTable<int, int> HT;
+//  {
+//    ScopedHashTableScope<int, int> Scope1(HT);
+//    HT.insert(0, 0);
+//    HT.insert(1, 1);
+//    {
+//      ScopedHashTableScope<int, int> Scope2(HT);
+//      HT.insert(0, 42);
+//    }
+//  }
+//
+// Looking up the value for "0" in the Scope2 block will return 42.  Looking
+// up the value for 0 before 42 is inserted or after Scope2 is popped will
+// return 0.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SCOPEDHASHTABLE_H
+#define LLVM_ADT_SCOPEDHASHTABLE_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Allocator.h"
+
+namespace llvm {
+
+template <typename K, typename V, typename KInfo = DenseMapInfo<K>,
+          typename AllocatorTy = MallocAllocator>
+class ScopedHashTable;
+
+template <typename K, typename V>
+class ScopedHashTableVal {
+  ScopedHashTableVal *NextInScope;
+  ScopedHashTableVal *NextForKey;
+  K Key;
+  V Val;
+  ScopedHashTableVal(const K &key, const V &val) : Key(key), Val(val) {}
+public:
+
+  const K &getKey() const { return Key; }
+  const V &getValue() const { return Val; }
+  V &getValue() { return Val; }
+
+  ScopedHashTableVal *getNextForKey() { return NextForKey; }
+  const ScopedHashTableVal *getNextForKey() const { return NextForKey; }
+  ScopedHashTableVal *getNextInScope() { return NextInScope; }
+  
+  template <typename AllocatorTy>
+  static ScopedHashTableVal *Create(ScopedHashTableVal *nextInScope,
+                                    ScopedHashTableVal *nextForKey,
+                                    const K &key, const V &val,
+                                    AllocatorTy &Allocator) {
+    ScopedHashTableVal *New = Allocator.template Allocate<ScopedHashTableVal>();
+    // Set up the value.
+    new (New) ScopedHashTableVal(key, val);
+    New->NextInScope = nextInScope;
+    New->NextForKey = nextForKey; 
+    return New;
+  }
+  
+  template <typename AllocatorTy>
+  void Destroy(AllocatorTy &Allocator) {
+    // Free memory referenced by the item.
+    this->~ScopedHashTableVal();
+    Allocator.Deallocate(this);
+  }
+};
+
+template <typename K, typename V, typename KInfo = DenseMapInfo<K>,
+          typename AllocatorTy = MallocAllocator>
+class ScopedHashTableScope {
+  /// HT - The hashtable that we are active for.
+  ScopedHashTable<K, V, KInfo, AllocatorTy> &HT;
+
+  /// PrevScope - This is the scope that we are shadowing in HT.
+  ScopedHashTableScope *PrevScope;
+
+  /// LastValInScope - This is the last value that was inserted for this scope
+  /// or null if none have been inserted yet.
+  ScopedHashTableVal<K, V> *LastValInScope;
+  void operator=(ScopedHashTableScope&);       // DO NOT IMPLEMENT
+  ScopedHashTableScope(ScopedHashTableScope&); // DO NOT IMPLEMENT
+public:
+  ScopedHashTableScope(ScopedHashTable<K, V, KInfo, AllocatorTy> &HT);
+  ~ScopedHashTableScope();
+
+  ScopedHashTableScope *getParentScope() { return PrevScope; }
+  const ScopedHashTableScope *getParentScope() const { return PrevScope; }
+  
+private:
+  friend class ScopedHashTable<K, V, KInfo, AllocatorTy>;
+  ScopedHashTableVal<K, V> *getLastValInScope() {
+    return LastValInScope;
+  }
+  void setLastValInScope(ScopedHashTableVal<K, V> *Val) {
+    LastValInScope = Val;
+  }
+};
+
+
+template <typename K, typename V, typename KInfo = DenseMapInfo<K> >
+class ScopedHashTableIterator {
+  ScopedHashTableVal<K, V> *Node;
+public:
+  ScopedHashTableIterator(ScopedHashTableVal<K, V> *node) : Node(node) {}
+
+  V &operator*() const {
+    assert(Node && "Dereference end()");
+    return Node->getValue();
+  }
+  V *operator->() const {
+    return &Node->getValue();
+  }
+
+  bool operator==(const ScopedHashTableIterator &RHS) const {
+    return Node == RHS.Node;
+  }
+  bool operator!=(const ScopedHashTableIterator &RHS) const {
+    return Node != RHS.Node;
+  }
+
+  inline ScopedHashTableIterator& operator++() {          // Preincrement
+    assert(Node && "incrementing past end()");
+    Node = Node->getNextForKey();
+    return *this;
+  }
+  ScopedHashTableIterator operator++(int) {        // Postincrement
+    ScopedHashTableIterator tmp = *this; ++*this; return tmp;
+  }
+};
+
+
+template <typename K, typename V, typename KInfo, typename AllocatorTy>
+class ScopedHashTable {
+public:
+  /// ScopeTy - This is a helpful typedef that allows clients to get easy access
+  /// to the name of the scope for this hash table.
+  typedef ScopedHashTableScope<K, V, KInfo, AllocatorTy> ScopeTy;
+private:
+  typedef ScopedHashTableVal<K, V> ValTy;
+  DenseMap<K, ValTy*, KInfo> TopLevelMap;
+  ScopeTy *CurScope;
+  
+  AllocatorTy Allocator;
+  
+  ScopedHashTable(const ScopedHashTable&); // NOT YET IMPLEMENTED
+  void operator=(const ScopedHashTable&);  // NOT YET IMPLEMENTED
+  friend class ScopedHashTableScope<K, V, KInfo, AllocatorTy>;
+public:
+  ScopedHashTable() : CurScope(0) {}
+  ScopedHashTable(AllocatorTy A) : CurScope(0), Allocator(A) {}
+  ~ScopedHashTable() {
+    assert(CurScope == 0 && TopLevelMap.empty() && "Scope imbalance!");
+  }
+  
+
+  /// Access to the allocator.
+  typedef typename ReferenceAdder<AllocatorTy>::result AllocatorRefTy;
+  typedef typename ReferenceAdder<const AllocatorTy>::result AllocatorCRefTy;
+  AllocatorRefTy getAllocator() { return Allocator; }
+  AllocatorCRefTy getAllocator() const { return Allocator; }
+
+  bool count(const K &Key) const {
+    return TopLevelMap.count(Key);
+  }
+
+  V lookup(const K &Key) {
+    typename DenseMap<K, ValTy*, KInfo>::iterator I = TopLevelMap.find(Key);
+    if (I != TopLevelMap.end())
+      return I->second->getValue();
+      
+    return V();
+  }
+
+  void insert(const K &Key, const V &Val) {
+    insertIntoScope(CurScope, Key, Val);
+  }
+
+  typedef ScopedHashTableIterator<K, V, KInfo> iterator;
+
+  iterator end() { return iterator(0); }
+
+  iterator begin(const K &Key) {
+    typename DenseMap<K, ValTy*, KInfo>::iterator I =
+      TopLevelMap.find(Key);
+    if (I == TopLevelMap.end()) return end();
+    return iterator(I->second);
+  }
+  
+  ScopeTy *getCurScope() { return CurScope; }
+  const ScopeTy *getCurScope() const { return CurScope; }
+
+  /// insertIntoScope - This inserts the specified key/value at the specified
+  /// (possibly not the current) scope.  While it is ok to insert into a scope
+  /// that isn't the current one, it isn't ok to insert *underneath* an existing
+  /// value of the specified key.
+  void insertIntoScope(ScopeTy *S, const K &Key, const V &Val) {
+    assert(S && "No scope active!");
+    ScopedHashTableVal<K, V> *&KeyEntry = TopLevelMap[Key];
+    KeyEntry = ValTy::Create(S->getLastValInScope(), KeyEntry, Key, Val,
+                             Allocator);
+    S->setLastValInScope(KeyEntry);
+  }
+};
+
+/// ScopedHashTableScope ctor - Install this as the current scope for the hash
+/// table.
+template <typename K, typename V, typename KInfo, typename Allocator>
+ScopedHashTableScope<K, V, KInfo, Allocator>::
+  ScopedHashTableScope(ScopedHashTable<K, V, KInfo, Allocator> &ht) : HT(ht) {
+  PrevScope = HT.CurScope;
+  HT.CurScope = this;
+  LastValInScope = 0;
+}
+
+template <typename K, typename V, typename KInfo, typename Allocator>
+ScopedHashTableScope<K, V, KInfo, Allocator>::~ScopedHashTableScope() {
+  assert(HT.CurScope == this && "Scope imbalance!");
+  HT.CurScope = PrevScope;
+
+  // Pop and delete all values corresponding to this scope.
+  while (ScopedHashTableVal<K, V> *ThisEntry = LastValInScope) {
+    // Pop this value out of the TopLevelMap.
+    if (ThisEntry->getNextForKey() == 0) {
+      assert(HT.TopLevelMap[ThisEntry->getKey()] == ThisEntry &&
+             "Scope imbalance!");
+      HT.TopLevelMap.erase(ThisEntry->getKey());
+    } else {
+      ScopedHashTableVal<K, V> *&KeyEntry = HT.TopLevelMap[ThisEntry->getKey()];
+      assert(KeyEntry == ThisEntry && "Scope imbalance!");
+      KeyEntry = ThisEntry->getNextForKey();
+    }
+
+    // Pop this value out of the scope.
+    LastValInScope = ThisEntry->getNextInScope();
+
+    // Delete this entry.
+    ThisEntry->Destroy(HT.getAllocator());
+  }
+}
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/ADT/SetOperations.h b/final/include/llvm/ADT/SetOperations.h
new file mode 100644
index 00000000000..71f5db380f6
--- /dev/null
+++ b/final/include/llvm/ADT/SetOperations.h
@@ -0,0 +1,71 @@
+//===-- llvm/ADT/SetOperations.h - Generic Set Operations -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines generic set operations that may be used on set's of
+// different types, and different element types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SETOPERATIONS_H
+#define LLVM_ADT_SETOPERATIONS_H
+
+namespace llvm {
+
+/// set_union(A, B) - Compute A := A u B, return whether A changed.
+///
+template <class S1Ty, class S2Ty>
+bool set_union(S1Ty &S1, const S2Ty &S2) {
+  bool Changed = false;
+
+  for (typename S2Ty::const_iterator SI = S2.begin(), SE = S2.end();
+       SI != SE; ++SI)
+    if (S1.insert(*SI).second)
+      Changed = true;
+
+  return Changed;
+}
+
+/// set_intersect(A, B) - Compute A := A ^ B
+/// Identical to set_intersection, except that it works on set<>'s and
+/// is nicer to use.  Functionally, this iterates through S1, removing
+/// elements that are not contained in S2.
+///
+template <class S1Ty, class S2Ty>
+void set_intersect(S1Ty &S1, const S2Ty &S2) {
+   for (typename S1Ty::iterator I = S1.begin(); I != S1.end();) {
+     const typename S1Ty::key_type &E = *I;
+     ++I;
+     if (!S2.count(E)) S1.erase(E);   // Erase element if not in S2
+   }
+}
+
+/// set_difference(A, B) - Return A - B
+///
+template <class S1Ty, class S2Ty>
+S1Ty set_difference(const S1Ty &S1, const S2Ty &S2) {
+  S1Ty Result;
+  for (typename S1Ty::const_iterator SI = S1.begin(), SE = S1.end();
+       SI != SE; ++SI)
+    if (!S2.count(*SI))       // if the element is not in set2
+      Result.insert(*SI);
+  return Result;
+}
+
+/// set_subtract(A, B) - Compute A := A - B
+///
+template <class S1Ty, class S2Ty>
+void set_subtract(S1Ty &S1, const S2Ty &S2) {
+  for (typename S2Ty::const_iterator SI = S2.begin(), SE = S2.end();
+       SI != SE; ++SI)
+    S1.erase(*SI);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/ADT/SetVector.h b/final/include/llvm/ADT/SetVector.h
new file mode 100644
index 00000000000..abe20676d54
--- /dev/null
+++ b/final/include/llvm/ADT/SetVector.h
@@ -0,0 +1,178 @@
+//===- llvm/ADT/SetVector.h - Set with insert order iteration ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a set that has insertion order iteration
+// characteristics. This is useful for keeping a set of things that need to be
+// visited later but in a deterministic order (insertion order). The interface
+// is purposefully minimal.
+//
+// This file defines SetVector and SmallSetVector, which performs no allocations
+// if the SetVector has less than a certain number of elements.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SETVECTOR_H
+#define LLVM_ADT_SETVECTOR_H
+
+#include "llvm/ADT/SmallSet.h"
+#include <algorithm>
+#include <cassert>
+#include <vector>
+
+namespace llvm {
+
+/// This adapter class provides a way to keep a set of things that also has the
+/// property of a deterministic iteration order. The order of iteration is the
+/// order of insertion.
+/// @brief A vector that has set insertion semantics.
+template <typename T, typename Vector = std::vector<T>,
+                      typename Set = SmallSet<T, 16> >
+class SetVector {
+public:
+  typedef T value_type;
+  typedef T key_type;
+  typedef T& reference;
+  typedef const T& const_reference;
+  typedef Set set_type;
+  typedef Vector vector_type;
+  typedef typename vector_type::const_iterator iterator;
+  typedef typename vector_type::const_iterator const_iterator;
+  typedef typename vector_type::size_type size_type;
+
+  /// @brief Construct an empty SetVector
+  SetVector() {}
+
+  /// @brief Initialize a SetVector with a range of elements
+  template<typename It>
+  SetVector(It Start, It End) {
+    insert(Start, End);
+  }
+
+  /// @brief Determine if the SetVector is empty or not.
+  bool empty() const {
+    return vector_.empty();
+  }
+
+  /// @brief Determine the number of elements in the SetVector.
+  size_type size() const {
+    return vector_.size();
+  }
+
+  /// @brief Get an iterator to the beginning of the SetVector.
+  iterator begin() {
+    return vector_.begin();
+  }
+
+  /// @brief Get a const_iterator to the beginning of the SetVector.
+  const_iterator begin() const {
+    return vector_.begin();
+  }
+
+  /// @brief Get an iterator to the end of the SetVector.
+  iterator end() {
+    return vector_.end();
+  }
+
+  /// @brief Get a const_iterator to the end of the SetVector.
+  const_iterator end() const {
+    return vector_.end();
+  }
+
+  /// @brief Return the last element of the SetVector.
+  const T &back() const {
+    assert(!empty() && "Cannot call back() on empty SetVector!");
+    return vector_.back();
+  }
+
+  /// @brief Index into the SetVector.
+  const_reference operator[](size_type n) const {
+    assert(n < vector_.size() && "SetVector access out of range!");
+    return vector_[n];
+  }
+
+  /// @returns true iff the element was inserted into the SetVector.
+  /// @brief Insert a new element into the SetVector.
+  bool insert(const value_type &X) {
+    bool result = set_.insert(X);
+    if (result)
+      vector_.push_back(X);
+    return result;
+  }
+
+  /// @brief Insert a range of elements into the SetVector.
+  template<typename It>
+  void insert(It Start, It End) {
+    for (; Start != End; ++Start)
+      if (set_.insert(*Start))
+        vector_.push_back(*Start);
+  }
+
+  /// @brief Remove an item from the set vector.
+  bool remove(const value_type& X) {
+    if (set_.erase(X)) {
+      typename vector_type::iterator I =
+        std::find(vector_.begin(), vector_.end(), X);
+      assert(I != vector_.end() && "Corrupted SetVector instances!");
+      vector_.erase(I);
+      return true;
+    }
+    return false;
+  }
+
+
+  /// @returns 0 if the element is not in the SetVector, 1 if it is.
+  /// @brief Count the number of elements of a given key in the SetVector.
+  size_type count(const key_type &key) const {
+    return set_.count(key);
+  }
+
+  /// @brief Completely clear the SetVector
+  void clear() {
+    set_.clear();
+    vector_.clear();
+  }
+
+  /// @brief Remove the last element of the SetVector.
+  void pop_back() {
+    assert(!empty() && "Cannot remove an element from an empty SetVector!");
+    set_.erase(back());
+    vector_.pop_back();
+  }
+
+  bool operator==(const SetVector &that) const {
+    return vector_ == that.vector_;
+  }
+
+  bool operator!=(const SetVector &that) const {
+    return vector_ != that.vector_;
+  }
+
+private:
+  set_type set_;         ///< The set.
+  vector_type vector_;   ///< The vector.
+};
+
+/// SmallSetVector - A SetVector that performs no allocations if smaller than
+/// a certain size.
+template <typename T, unsigned N>
+class SmallSetVector : public SetVector<T, SmallVector<T, N>, SmallSet<T, N> > {
+public:
+  SmallSetVector() {}
+
+  /// @brief Initialize a SmallSetVector with a range of elements
+  template<typename It>
+  SmallSetVector(It Start, It End) {
+    this->insert(Start, End);
+  }
+};
+
+} // End llvm namespace
+
+// vim: sw=2 ai
+#endif
diff --git a/final/include/llvm/ADT/SmallBitVector.h b/final/include/llvm/ADT/SmallBitVector.h
new file mode 100644
index 00000000000..b15b3ee0418
--- /dev/null
+++ b/final/include/llvm/ADT/SmallBitVector.h
@@ -0,0 +1,461 @@
+//===- llvm/ADT/SmallBitVector.h - 'Normally small' bit vectors -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SmallBitVector class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SMALLBITVECTOR_H
+#define LLVM_ADT_SMALLBITVECTOR_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+
+/// SmallBitVector - This is a 'bitvector' (really, a variable-sized bit array),
+/// optimized for the case when the array is small.  It contains one
+/// pointer-sized field, which is directly used as a plain collection of bits
+/// when possible, or as a pointer to a larger heap-allocated array when
+/// necessary.  This allows normal "small" cases to be fast without losing
+/// generality for large inputs.
+///
+class SmallBitVector {
+  // TODO: In "large" mode, a pointer to a BitVector is used, leading to an
+  // unnecessary level of indirection. It would be more efficient to use a
+  // pointer to memory containing size, allocation size, and the array of bits.
+  uintptr_t X;
+
+  enum {
+    // The number of bits in this class.
+    NumBaseBits = sizeof(uintptr_t) * CHAR_BIT,
+
+    // One bit is used to discriminate between small and large mode. The
+    // remaining bits are used for the small-mode representation.
+    SmallNumRawBits = NumBaseBits - 1,
+
+    // A few more bits are used to store the size of the bit set in small mode.
+    // Theoretically this is a ceil-log2. These bits are encoded in the most
+    // significant bits of the raw bits.
+    SmallNumSizeBits = (NumBaseBits == 32 ? 5 :
+                        NumBaseBits == 64 ? 6 :
+                        SmallNumRawBits),
+
+    // The remaining bits are used to store the actual set in small mode.
+    SmallNumDataBits = SmallNumRawBits - SmallNumSizeBits
+  };
+
+public:
+  // Encapsulation of a single bit.
+  class reference {
+    SmallBitVector &TheVector;
+    unsigned BitPos;
+
+  public:
+    reference(SmallBitVector &b, unsigned Idx) : TheVector(b), BitPos(Idx) {}
+
+    reference& operator=(reference t) {
+      *this = bool(t);
+      return *this;
+    }
+
+    reference& operator=(bool t) {
+      if (t)
+        TheVector.set(BitPos);
+      else
+        TheVector.reset(BitPos);
+      return *this;
+    }
+
+    operator bool() const {
+      return const_cast<const SmallBitVector &>(TheVector).operator[](BitPos);
+    }
+  };
+
+private:
+  bool isSmall() const {
+    return X & uintptr_t(1);
+  }
+
+  BitVector *getPointer() const {
+    assert(!isSmall());
+    return reinterpret_cast<BitVector *>(X);
+  }
+
+  void switchToSmall(uintptr_t NewSmallBits, size_t NewSize) {
+    X = 1;
+    setSmallSize(NewSize);
+    setSmallBits(NewSmallBits);
+  }
+
+  void switchToLarge(BitVector *BV) {
+    X = reinterpret_cast<uintptr_t>(BV);
+    assert(!isSmall() && "Tried to use an unaligned pointer");
+  }
+
+  // Return all the bits used for the "small" representation; this includes
+  // bits for the size as well as the element bits.
+  uintptr_t getSmallRawBits() const {
+    assert(isSmall());
+    return X >> 1;
+  }
+
+  void setSmallRawBits(uintptr_t NewRawBits) {
+    assert(isSmall());
+    X = (NewRawBits << 1) | uintptr_t(1);
+  }
+
+  // Return the size.
+  size_t getSmallSize() const {
+    return getSmallRawBits() >> SmallNumDataBits;
+  }
+
+  void setSmallSize(size_t Size) {
+    setSmallRawBits(getSmallBits() | (Size << SmallNumDataBits));
+  }
+
+  // Return the element bits.
+  uintptr_t getSmallBits() const {
+    return getSmallRawBits() & ~(~uintptr_t(0) << getSmallSize());
+  }
+
+  void setSmallBits(uintptr_t NewBits) {
+    setSmallRawBits((NewBits & ~(~uintptr_t(0) << getSmallSize())) |
+                    (getSmallSize() << SmallNumDataBits));
+  }
+
+public:
+  /// SmallBitVector default ctor - Creates an empty bitvector.
+  SmallBitVector() : X(1) {}
+
+  /// SmallBitVector ctor - Creates a bitvector of specified number of bits. All
+  /// bits are initialized to the specified value.
+  explicit SmallBitVector(unsigned s, bool t = false) {
+    if (s <= SmallNumDataBits)
+      switchToSmall(t ? ~uintptr_t(0) : 0, s);
+    else
+      switchToLarge(new BitVector(s, t));
+  }
+
+  /// SmallBitVector copy ctor.
+  SmallBitVector(const SmallBitVector &RHS) {
+    if (RHS.isSmall())
+      X = RHS.X;
+    else
+      switchToLarge(new BitVector(*RHS.getPointer()));
+  }
+
+  ~SmallBitVector() {
+    if (!isSmall())
+      delete getPointer();
+  }
+
+  /// empty - Tests whether there are no bits in this bitvector.
+  bool empty() const {
+    return isSmall() ? getSmallSize() == 0 : getPointer()->empty();
+  }
+
+  /// size - Returns the number of bits in this bitvector.
+  size_t size() const {
+    return isSmall() ? getSmallSize() : getPointer()->size();
+  }
+
+  /// count - Returns the number of bits which are set.
+  unsigned count() const {
+    if (isSmall()) {
+      uintptr_t Bits = getSmallBits();
+      if (sizeof(uintptr_t) * CHAR_BIT == 32)
+        return CountPopulation_32(Bits);
+      if (sizeof(uintptr_t) * CHAR_BIT == 64)
+        return CountPopulation_64(Bits);
+      assert(0 && "Unsupported!");
+    }
+    return getPointer()->count();
+  }
+
+  /// any - Returns true if any bit is set.
+  bool any() const {
+    if (isSmall())
+      return getSmallBits() != 0;
+    return getPointer()->any();
+  }
+
+  /// all - Returns true if all bits are set.
+  bool all() const {
+    if (isSmall())
+      return getSmallBits() == (uintptr_t(1) << getSmallSize()) - 1;
+    return getPointer()->all();
+  }
+
+  /// none - Returns true if none of the bits are set.
+  bool none() const {
+    if (isSmall())
+      return getSmallBits() == 0;
+    return getPointer()->none();
+  }
+
+  /// find_first - Returns the index of the first set bit, -1 if none
+  /// of the bits are set.
+  int find_first() const {
+    if (isSmall()) {
+      uintptr_t Bits = getSmallBits();
+      if (Bits == 0)
+        return -1;
+      if (sizeof(uintptr_t) * CHAR_BIT == 32)
+        return CountTrailingZeros_32(Bits);
+      if (sizeof(uintptr_t) * CHAR_BIT == 64)
+        return CountTrailingZeros_64(Bits);
+      assert(0 && "Unsupported!");
+    }
+    return getPointer()->find_first();
+  }
+
+  /// find_next - Returns the index of the next set bit following the
+  /// "Prev" bit. Returns -1 if the next set bit is not found.
+  int find_next(unsigned Prev) const {
+    if (isSmall()) {
+      uintptr_t Bits = getSmallBits();
+      // Mask off previous bits.
+      Bits &= ~uintptr_t(0) << (Prev + 1);
+      if (Bits == 0 || Prev + 1 >= getSmallSize())
+        return -1;
+      if (sizeof(uintptr_t) * CHAR_BIT == 32)
+        return CountTrailingZeros_32(Bits);
+      if (sizeof(uintptr_t) * CHAR_BIT == 64)
+        return CountTrailingZeros_64(Bits);
+      assert(0 && "Unsupported!");
+    }
+    return getPointer()->find_next(Prev);
+  }
+
+  /// clear - Clear all bits.
+  void clear() {
+    if (!isSmall())
+      delete getPointer();
+    switchToSmall(0, 0);
+  }
+
+  /// resize - Grow or shrink the bitvector.
+  void resize(unsigned N, bool t = false) {
+    if (!isSmall()) {
+      getPointer()->resize(N, t);
+    } else if (SmallNumDataBits >= N) {
+      uintptr_t NewBits = t ? ~uintptr_t(0) << getSmallSize() : 0;
+      setSmallSize(N);
+      setSmallBits(NewBits | getSmallBits());
+    } else {
+      BitVector *BV = new BitVector(N, t);
+      uintptr_t OldBits = getSmallBits();
+      for (size_t i = 0, e = getSmallSize(); i != e; ++i)
+        (*BV)[i] = (OldBits >> i) & 1;
+      switchToLarge(BV);
+    }
+  }
+
+  void reserve(unsigned N) {
+    if (isSmall()) {
+      if (N > SmallNumDataBits) {
+        uintptr_t OldBits = getSmallRawBits();
+        size_t SmallSize = getSmallSize();
+        BitVector *BV = new BitVector(SmallSize);
+        for (size_t i = 0; i < SmallSize; ++i)
+          if ((OldBits >> i) & 1)
+            BV->set(i);
+        BV->reserve(N);
+        switchToLarge(BV);
+      }
+    } else {
+      getPointer()->reserve(N);
+    }
+  }
+
+  // Set, reset, flip
+  SmallBitVector &set() {
+    if (isSmall())
+      setSmallBits(~uintptr_t(0));
+    else
+      getPointer()->set();
+    return *this;
+  }
+
+  SmallBitVector &set(unsigned Idx) {
+    if (isSmall())
+      setSmallBits(getSmallBits() | (uintptr_t(1) << Idx));
+    else
+      getPointer()->set(Idx);
+    return *this;
+  }
+
+  SmallBitVector &reset() {
+    if (isSmall())
+      setSmallBits(0);
+    else
+      getPointer()->reset();
+    return *this;
+  }
+
+  SmallBitVector &reset(unsigned Idx) {
+    if (isSmall())
+      setSmallBits(getSmallBits() & ~(uintptr_t(1) << Idx));
+    else
+      getPointer()->reset(Idx);
+    return *this;
+  }
+
+  SmallBitVector &flip() {
+    if (isSmall())
+      setSmallBits(~getSmallBits());
+    else
+      getPointer()->flip();
+    return *this;
+  }
+
+  SmallBitVector &flip(unsigned Idx) {
+    if (isSmall())
+      setSmallBits(getSmallBits() ^ (uintptr_t(1) << Idx));
+    else
+      getPointer()->flip(Idx);
+    return *this;
+  }
+
+  // No argument flip.
+  SmallBitVector operator~() const {
+    return SmallBitVector(*this).flip();
+  }
+
+  // Indexing.
+  reference operator[](unsigned Idx) {
+    assert(Idx < size() && "Out-of-bounds Bit access.");
+    return reference(*this, Idx);
+  }
+
+  bool operator[](unsigned Idx) const {
+    assert(Idx < size() && "Out-of-bounds Bit access.");
+    if (isSmall())
+      return ((getSmallBits() >> Idx) & 1) != 0;
+    return getPointer()->operator[](Idx);
+  }
+
+  bool test(unsigned Idx) const {
+    return (*this)[Idx];
+  }
+
+  // Comparison operators.
+  bool operator==(const SmallBitVector &RHS) const {
+    if (size() != RHS.size())
+      return false;
+    if (isSmall())
+      return getSmallBits() == RHS.getSmallBits();
+    else
+      return *getPointer() == *RHS.getPointer();
+  }
+
+  bool operator!=(const SmallBitVector &RHS) const {
+    return !(*this == RHS);
+  }
+
+  // Intersection, union, disjoint union.
+  SmallBitVector &operator&=(const SmallBitVector &RHS) {
+    resize(std::max(size(), RHS.size()));
+    if (isSmall())
+      setSmallBits(getSmallBits() & RHS.getSmallBits());
+    else if (!RHS.isSmall())
+      getPointer()->operator&=(*RHS.getPointer());
+    else {
+      SmallBitVector Copy = RHS;
+      Copy.resize(size());
+      getPointer()->operator&=(*Copy.getPointer());
+    }
+    return *this;
+  }
+
+  SmallBitVector &operator|=(const SmallBitVector &RHS) {
+    resize(std::max(size(), RHS.size()));
+    if (isSmall())
+      setSmallBits(getSmallBits() | RHS.getSmallBits());
+    else if (!RHS.isSmall())
+      getPointer()->operator|=(*RHS.getPointer());
+    else {
+      SmallBitVector Copy = RHS;
+      Copy.resize(size());
+      getPointer()->operator|=(*Copy.getPointer());
+    }
+    return *this;
+  }
+
+  SmallBitVector &operator^=(const SmallBitVector &RHS) {
+    resize(std::max(size(), RHS.size()));
+    if (isSmall())
+      setSmallBits(getSmallBits() ^ RHS.getSmallBits());
+    else if (!RHS.isSmall())
+      getPointer()->operator^=(*RHS.getPointer());
+    else {
+      SmallBitVector Copy = RHS;
+      Copy.resize(size());
+      getPointer()->operator^=(*Copy.getPointer());
+    }
+    return *this;
+  }
+
+  // Assignment operator.
+  const SmallBitVector &operator=(const SmallBitVector &RHS) {
+    if (isSmall()) {
+      if (RHS.isSmall())
+        X = RHS.X;
+      else
+        switchToLarge(new BitVector(*RHS.getPointer()));
+    } else {
+      if (!RHS.isSmall())
+        *getPointer() = *RHS.getPointer();
+      else {
+        delete getPointer();
+        X = RHS.X;
+      }
+    }
+    return *this;
+  }
+
+  void swap(SmallBitVector &RHS) {
+    std::swap(X, RHS.X);
+  }
+};
+
+inline SmallBitVector
+operator&(const SmallBitVector &LHS, const SmallBitVector &RHS) {
+  SmallBitVector Result(LHS);
+  Result &= RHS;
+  return Result;
+}
+
+inline SmallBitVector
+operator|(const SmallBitVector &LHS, const SmallBitVector &RHS) {
+  SmallBitVector Result(LHS);
+  Result |= RHS;
+  return Result;
+}
+
+inline SmallBitVector
+operator^(const SmallBitVector &LHS, const SmallBitVector &RHS) {
+  SmallBitVector Result(LHS);
+  Result ^= RHS;
+  return Result;
+}
+
+} // End llvm namespace
+
+namespace std {
+  /// Implement std::swap in terms of BitVector swap.
+  inline void
+  swap(llvm::SmallBitVector &LHS, llvm::SmallBitVector &RHS) {
+    LHS.swap(RHS);
+  }
+}
+
+#endif
diff --git a/final/include/llvm/ADT/SmallPtrSet.h b/final/include/llvm/ADT/SmallPtrSet.h
new file mode 100644
index 00000000000..ff32ba87a26
--- /dev/null
+++ b/final/include/llvm/ADT/SmallPtrSet.h
@@ -0,0 +1,294 @@
+//===- llvm/ADT/SmallPtrSet.h - 'Normally small' pointer set ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SmallPtrSet class.  See the doxygen comment for
+// SmallPtrSetImpl for more details on the algorithm used.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SMALLPTRSET_H
+#define LLVM_ADT_SMALLPTRSET_H
+
+#include <cassert>
+#include <cstddef>
+#include <cstring>
+#include <iterator>
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+
+namespace llvm {
+
+class SmallPtrSetIteratorImpl;
+
+/// SmallPtrSetImpl - This is the common code shared among all the
+/// SmallPtrSet<>'s, which is almost everything.  SmallPtrSet has two modes, one
+/// for small and one for large sets.
+///
+/// Small sets use an array of pointers allocated in the SmallPtrSet object,
+/// which is treated as a simple array of pointers.  When a pointer is added to
+/// the set, the array is scanned to see if the element already exists, if not
+/// the element is 'pushed back' onto the array.  If we run out of space in the
+/// array, we grow into the 'large set' case.  SmallSet should be used when the
+/// sets are often small.  In this case, no memory allocation is used, and only
+/// light-weight and cache-efficient scanning is used.
+///
+/// Large sets use a classic exponentially-probed hash table.  Empty buckets are
+/// represented with an illegal pointer value (-1) to allow null pointers to be
+/// inserted.  Tombstones are represented with another illegal pointer value
+/// (-2), to allow deletion.  The hash table is resized when the table is 3/4 or
+/// more.  When this happens, the table is doubled in size.
+///
+class SmallPtrSetImpl {
+  friend class SmallPtrSetIteratorImpl;
+protected:
+  /// SmallArray - Points to a fixed size set of buckets, used in 'small mode'.
+  const void **SmallArray;
+  /// CurArray - This is the current set of buckets.  If equal to SmallArray,
+  /// then the set is in 'small mode'.
+  const void **CurArray;
+  /// CurArraySize - The allocated size of CurArray, always a power of two.
+  /// Note that CurArray points to an array that has CurArraySize+1 elements in
+  /// it, so that the end iterator actually points to valid memory.
+  unsigned CurArraySize;
+
+  // If small, this is # elts allocated consecutively
+  unsigned NumElements;
+  unsigned NumTombstones;
+
+  // Helper to copy construct a SmallPtrSet.
+  SmallPtrSetImpl(const void **SmallStorage, const SmallPtrSetImpl& that);
+  explicit SmallPtrSetImpl(const void **SmallStorage, unsigned SmallSize) :
+    SmallArray(SmallStorage), CurArray(SmallStorage), CurArraySize(SmallSize) {
+    assert(SmallSize && (SmallSize & (SmallSize-1)) == 0 &&
+           "Initial size must be a power of two!");
+    // The end pointer, always valid, is set to a valid element to help the
+    // iterator.
+    CurArray[SmallSize] = 0;
+    clear();
+  }
+  ~SmallPtrSetImpl();
+
+public:
+  bool empty() const { return size() == 0; }
+  unsigned size() const { return NumElements; }
+
+  void clear() {
+    // If the capacity of the array is huge, and the # elements used is small,
+    // shrink the array.
+    if (!isSmall() && NumElements*4 < CurArraySize && CurArraySize > 32)
+      return shrink_and_clear();
+
+    // Fill the array with empty markers.
+    memset(CurArray, -1, CurArraySize*sizeof(void*));
+    NumElements = 0;
+    NumTombstones = 0;
+  }
+
+protected:
+  static void *getTombstoneMarker() { return reinterpret_cast<void*>(-2); }
+  static void *getEmptyMarker() {
+    // Note that -1 is chosen to make clear() efficiently implementable with
+    // memset and because it's not a valid pointer value.
+    return reinterpret_cast<void*>(-1);
+  }
+
+  /// insert_imp - This returns true if the pointer was new to the set, false if
+  /// it was already in the set.  This is hidden from the client so that the
+  /// derived class can check that the right type of pointer is passed in.
+  bool insert_imp(const void * Ptr);
+
+  /// erase_imp - If the set contains the specified pointer, remove it and
+  /// return true, otherwise return false.  This is hidden from the client so
+  /// that the derived class can check that the right type of pointer is passed
+  /// in.
+  bool erase_imp(const void * Ptr);
+
+  bool count_imp(const void * Ptr) const {
+    if (isSmall()) {
+      // Linear search for the item.
+      for (const void *const *APtr = SmallArray,
+                      *const *E = SmallArray+NumElements; APtr != E; ++APtr)
+        if (*APtr == Ptr)
+          return true;
+      return false;
+    }
+
+    // Big set case.
+    return *FindBucketFor(Ptr) == Ptr;
+  }
+
+private:
+  bool isSmall() const { return CurArray == SmallArray; }
+
+  unsigned Hash(const void *Ptr) const {
+    return static_cast<unsigned>(((uintptr_t)Ptr >> 4) & (CurArraySize-1));
+  }
+  const void * const *FindBucketFor(const void *Ptr) const;
+  void shrink_and_clear();
+
+  /// Grow - Allocate a larger backing store for the buckets and move it over.
+  void Grow();
+
+  void operator=(const SmallPtrSetImpl &RHS);  // DO NOT IMPLEMENT.
+protected:
+  void CopyFrom(const SmallPtrSetImpl &RHS);
+};
+
+/// SmallPtrSetIteratorImpl - This is the common base class shared between all
+/// instances of SmallPtrSetIterator.
+class SmallPtrSetIteratorImpl {
+protected:
+  const void *const *Bucket;
+public:
+  explicit SmallPtrSetIteratorImpl(const void *const *BP) : Bucket(BP) {
+    AdvanceIfNotValid();
+  }
+
+  bool operator==(const SmallPtrSetIteratorImpl &RHS) const {
+    return Bucket == RHS.Bucket;
+  }
+  bool operator!=(const SmallPtrSetIteratorImpl &RHS) const {
+    return Bucket != RHS.Bucket;
+  }
+
+protected:
+  /// AdvanceIfNotValid - If the current bucket isn't valid, advance to a bucket
+  /// that is.   This is guaranteed to stop because the end() bucket is marked
+  /// valid.
+  void AdvanceIfNotValid() {
+    while (*Bucket == SmallPtrSetImpl::getEmptyMarker() ||
+           *Bucket == SmallPtrSetImpl::getTombstoneMarker())
+      ++Bucket;
+  }
+};
+
+/// SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet.
+template<typename PtrTy>
+class SmallPtrSetIterator : public SmallPtrSetIteratorImpl {
+  typedef PointerLikeTypeTraits<PtrTy> PtrTraits;
+  
+public:
+  typedef PtrTy                     value_type;
+  typedef PtrTy                     reference;
+  typedef PtrTy                     pointer;
+  typedef std::ptrdiff_t            difference_type;
+  typedef std::forward_iterator_tag iterator_category;
+  
+  explicit SmallPtrSetIterator(const void *const *BP)
+    : SmallPtrSetIteratorImpl(BP) {}
+
+  // Most methods provided by baseclass.
+
+  const PtrTy operator*() const {
+    return PtrTraits::getFromVoidPointer(const_cast<void*>(*Bucket));
+  }
+
+  inline SmallPtrSetIterator& operator++() {          // Preincrement
+    ++Bucket;
+    AdvanceIfNotValid();
+    return *this;
+  }
+
+  SmallPtrSetIterator operator++(int) {        // Postincrement
+    SmallPtrSetIterator tmp = *this; ++*this; return tmp;
+  }
+};
+
+/// RoundUpToPowerOfTwo - This is a helper template that rounds N up to the next
+/// power of two (which means N itself if N is already a power of two).
+template<unsigned N>
+struct RoundUpToPowerOfTwo;
+
+/// RoundUpToPowerOfTwoH - If N is not a power of two, increase it.  This is a
+/// helper template used to implement RoundUpToPowerOfTwo.
+template<unsigned N, bool isPowerTwo>
+struct RoundUpToPowerOfTwoH {
+  enum { Val = N };
+};
+template<unsigned N>
+struct RoundUpToPowerOfTwoH<N, false> {
+  enum {
+    // We could just use NextVal = N+1, but this converges faster.  N|(N-1) sets
+    // the right-most zero bits to one all at once, e.g. 0b0011000 -> 0b0011111.
+    Val = RoundUpToPowerOfTwo<(N|(N-1)) + 1>::Val
+  };
+};
+
+template<unsigned N>
+struct RoundUpToPowerOfTwo {
+  enum { Val = RoundUpToPowerOfTwoH<N, (N&(N-1)) == 0>::Val };
+};
+  
+
+/// SmallPtrSet - This class implements a set which is optimized for holding
+/// SmallSize or less elements.  This internally rounds up SmallSize to the next
+/// power of two if it is not already a power of two.  See the comments above
+/// SmallPtrSetImpl for details of the algorithm.
+template<class PtrType, unsigned SmallSize>
+class SmallPtrSet : public SmallPtrSetImpl {
+  // Make sure that SmallSize is a power of two, round up if not.
+  enum { SmallSizePowTwo = RoundUpToPowerOfTwo<SmallSize>::Val };
+  /// SmallStorage - Fixed size storage used in 'small mode'.  The extra element
+  /// ensures that the end iterator actually points to valid memory.
+  const void *SmallStorage[SmallSizePowTwo+1];
+  typedef PointerLikeTypeTraits<PtrType> PtrTraits;
+public:
+  SmallPtrSet() : SmallPtrSetImpl(SmallStorage, SmallSizePowTwo) {}
+  SmallPtrSet(const SmallPtrSet &that) : SmallPtrSetImpl(SmallStorage, that) {}
+
+  template<typename It>
+  SmallPtrSet(It I, It E) : SmallPtrSetImpl(SmallStorage, SmallSizePowTwo) {
+    insert(I, E);
+  }
+
+  /// insert - This returns true if the pointer was new to the set, false if it
+  /// was already in the set.
+  bool insert(PtrType Ptr) {
+    return insert_imp(PtrTraits::getAsVoidPointer(Ptr));
+  }
+
+  /// erase - If the set contains the specified pointer, remove it and return
+  /// true, otherwise return false.
+  bool erase(PtrType Ptr) {
+    return erase_imp(PtrTraits::getAsVoidPointer(Ptr));
+  }
+
+  /// count - Return true if the specified pointer is in the set.
+  bool count(PtrType Ptr) const {
+    return count_imp(PtrTraits::getAsVoidPointer(Ptr));
+  }
+
+  template <typename IterT>
+  void insert(IterT I, IterT E) {
+    for (; I != E; ++I)
+      insert(*I);
+  }
+
+  typedef SmallPtrSetIterator<PtrType> iterator;
+  typedef SmallPtrSetIterator<PtrType> const_iterator;
+  inline iterator begin() const {
+    return iterator(CurArray);
+  }
+  inline iterator end() const {
+    return iterator(CurArray+CurArraySize);
+  }
+
+  // Allow assignment from any smallptrset with the same element type even if it
+  // doesn't have the same smallsize.
+  const SmallPtrSet<PtrType, SmallSize>&
+  operator=(const SmallPtrSet<PtrType, SmallSize> &RHS) {
+    CopyFrom(RHS);
+    return *this;
+  }
+
+};
+
+}
+
+#endif
diff --git a/final/include/llvm/ADT/SmallSet.h b/final/include/llvm/ADT/SmallSet.h
new file mode 100644
index 00000000000..d03f1bef15b
--- /dev/null
+++ b/final/include/llvm/ADT/SmallSet.h
@@ -0,0 +1,118 @@
+//===- llvm/ADT/SmallSet.h - 'Normally small' sets --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SmallSet class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SMALLSET_H
+#define LLVM_ADT_SMALLSET_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <set>
+
+namespace llvm {
+
+/// SmallSet - This maintains a set of unique values, optimizing for the case
+/// when the set is small (less than N).  In this case, the set can be
+/// maintained with no mallocs.  If the set gets large, we expand to using an
+/// std::set to maintain reasonable lookup times.
+///
+/// Note that this set does not provide a way to iterate over members in the
+/// set.
+template <typename T, unsigned N>
+class SmallSet {
+  /// Use a SmallVector to hold the elements here (even though it will never
+  /// reach its 'large' stage) to avoid calling the default ctors of elements
+  /// we will never use.
+  SmallVector<T, N> Vector;
+  std::set<T> Set;
+  typedef typename SmallVector<T, N>::const_iterator VIterator;
+  typedef typename SmallVector<T, N>::iterator mutable_iterator;
+public:
+  SmallSet() {}
+
+  bool empty() const { return Vector.empty() && Set.empty(); }
+  unsigned size() const {
+    return isSmall() ? Vector.size() : Set.size();
+  }
+
+  /// count - Return true if the element is in the set.
+  bool count(const T &V) const {
+    if (isSmall()) {
+      // Since the collection is small, just do a linear search.
+      return vfind(V) != Vector.end();
+    } else {
+      return Set.count(V);
+    }
+  }
+
+  /// insert - Insert an element into the set if it isn't already there.
+  bool insert(const T &V) {
+    if (!isSmall())
+      return Set.insert(V).second;
+
+    VIterator I = vfind(V);
+    if (I != Vector.end())    // Don't reinsert if it already exists.
+      return false;
+    if (Vector.size() < N) {
+      Vector.push_back(V);
+      return true;
+    }
+
+    // Otherwise, grow from vector to set.
+    while (!Vector.empty()) {
+      Set.insert(Vector.back());
+      Vector.pop_back();
+    }
+    Set.insert(V);
+    return true;
+  }
+
+  template <typename IterT>
+  void insert(IterT I, IterT E) {
+    for (; I != E; ++I)
+      insert(*I);
+  }
+  
+  bool erase(const T &V) {
+    if (!isSmall())
+      return Set.erase(V);
+    for (mutable_iterator I = Vector.begin(), E = Vector.end(); I != E; ++I)
+      if (*I == V) {
+        Vector.erase(I);
+        return true;
+      }
+    return false;
+  }
+
+  void clear() {
+    Vector.clear();
+    Set.clear();
+  }
+private:
+  bool isSmall() const { return Set.empty(); }
+
+  VIterator vfind(const T &V) const {
+    for (VIterator I = Vector.begin(), E = Vector.end(); I != E; ++I)
+      if (*I == V)
+        return I;
+    return Vector.end();
+  }
+};
+
+/// If this set is of pointer values, transparently switch over to using
+/// SmallPtrSet for performance.
+template <typename PointeeType, unsigned N>
+class SmallSet<PointeeType*, N> : public SmallPtrSet<PointeeType*, N> {};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/ADT/SmallString.h b/final/include/llvm/ADT/SmallString.h
new file mode 100644
index 00000000000..da264164821
--- /dev/null
+++ b/final/include/llvm/ADT/SmallString.h
@@ -0,0 +1,72 @@
+//===- llvm/ADT/SmallString.h - 'Normally small' strings --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SmallString class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SMALLSTRING_H
+#define LLVM_ADT_SMALLSTRING_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+
+/// SmallString - A SmallString is just a SmallVector with methods and accessors
+/// that make it work better as a string (e.g. operator+ etc).
+template<unsigned InternalLen>
+class SmallString : public SmallVector<char, InternalLen> {
+public:
+  // Default ctor - Initialize to empty.
+  SmallString() {}
+
+  // Initialize from a StringRef.
+  SmallString(StringRef S) : SmallVector<char, InternalLen>(S.begin(), S.end()) {}
+
+  // Initialize with a range.
+  template<typename ItTy>
+  SmallString(ItTy S, ItTy E) : SmallVector<char, InternalLen>(S, E) {}
+
+  // Copy ctor.
+  SmallString(const SmallString &RHS) : SmallVector<char, InternalLen>(RHS) {}
+
+
+  // Extra methods.
+  StringRef str() const { return StringRef(this->begin(), this->size()); }
+
+  // TODO: Make this const, if it's safe...
+  const char* c_str() {
+    this->push_back(0);
+    this->pop_back();
+    return this->data();
+  }
+
+  // Implicit conversion to StringRef.
+  operator StringRef() const { return str(); }
+
+  // Extra operators.
+  const SmallString &operator=(StringRef RHS) {
+    this->clear();
+    return *this += RHS;
+  }
+
+  SmallString &operator+=(StringRef RHS) {
+    this->append(RHS.begin(), RHS.end());
+    return *this;
+  }
+  SmallString &operator+=(char C) {
+    this->push_back(C);
+    return *this;
+  }
+};
+
+}
+
+#endif
diff --git a/final/include/llvm/ADT/SmallVector.h b/final/include/llvm/ADT/SmallVector.h
new file mode 100644
index 00000000000..8b0a13d6ed7
--- /dev/null
+++ b/final/include/llvm/ADT/SmallVector.h
@@ -0,0 +1,752 @@
+//===- llvm/ADT/SmallVector.h - 'Normally small' vectors --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SmallVector class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SMALLVECTOR_H
+#define LLVM_ADT_SMALLVECTOR_H
+
+#include "llvm/Support/type_traits.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>
+#include <iterator>
+#include <memory>
+
+#ifdef _MSC_VER
+namespace std {
+#if _MSC_VER <= 1310
+  // Work around flawed VC++ implementation of std::uninitialized_copy.  Define
+  // additional overloads so that elements with pointer types are recognized as
+  // scalars and not objects, causing bizarre type conversion errors.
+  template<class T1, class T2>
+  inline _Scalar_ptr_iterator_tag _Ptr_cat(T1 **, T2 **) {
+    _Scalar_ptr_iterator_tag _Cat;
+    return _Cat;
+  }
+
+  template<class T1, class T2>
+  inline _Scalar_ptr_iterator_tag _Ptr_cat(T1* const *, T2 **) {
+    _Scalar_ptr_iterator_tag _Cat;
+    return _Cat;
+  }
+#else
+// FIXME: It is not clear if the problem is fixed in VS 2005.  What is clear
+// is that the above hack won't work if it wasn't fixed.
+#endif
+}
+#endif
+
+namespace llvm {
+
+/// SmallVectorBase - This is all the non-templated stuff common to all
+/// SmallVectors.
+class SmallVectorBase {
+protected:
+  void *BeginX, *EndX, *CapacityX;
+
+  // Allocate raw space for N elements of type T.  If T has a ctor or dtor, we
+  // don't want it to be automatically run, so we need to represent the space as
+  // something else.  An array of char would work great, but might not be
+  // aligned sufficiently.  Instead we use some number of union instances for
+  // the space, which guarantee maximal alignment.
+  union U {
+    double D;
+    long double LD;
+    long long L;
+    void *P;
+  } FirstEl;
+  // Space after 'FirstEl' is clobbered, do not add any instance vars after it.
+
+protected:
+  SmallVectorBase(size_t Size)
+    : BeginX(&FirstEl), EndX(&FirstEl), CapacityX((char*)&FirstEl+Size) {}
+
+  /// isSmall - Return true if this is a smallvector which has not had dynamic
+  /// memory allocated for it.
+  bool isSmall() const {
+    return BeginX == static_cast<const void*>(&FirstEl);
+  }
+
+  /// size_in_bytes - This returns size()*sizeof(T).
+  size_t size_in_bytes() const {
+    return size_t((char*)EndX - (char*)BeginX);
+  }
+
+  /// capacity_in_bytes - This returns capacity()*sizeof(T).
+  size_t capacity_in_bytes() const {
+    return size_t((char*)CapacityX - (char*)BeginX);
+  }
+
+  /// grow_pod - This is an implementation of the grow() method which only works
+  /// on POD-like data types and is out of line to reduce code duplication.
+  void grow_pod(size_t MinSizeInBytes, size_t TSize);
+
+public:
+  bool empty() const { return BeginX == EndX; }
+};
+
+
+template <typename T>
+class SmallVectorTemplateCommon : public SmallVectorBase {
+protected:
+  void setEnd(T *P) { this->EndX = P; }
+public:
+  SmallVectorTemplateCommon(size_t Size) : SmallVectorBase(Size) {}
+
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+  typedef T value_type;
+  typedef T *iterator;
+  typedef const T *const_iterator;
+
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+
+  typedef T &reference;
+  typedef const T &const_reference;
+  typedef T *pointer;
+  typedef const T *const_pointer;
+
+  // forward iterator creation methods.
+  iterator begin() { return (iterator)this->BeginX; }
+  const_iterator begin() const { return (const_iterator)this->BeginX; }
+  iterator end() { return (iterator)this->EndX; }
+  const_iterator end() const { return (const_iterator)this->EndX; }
+protected:
+  iterator capacity_ptr() { return (iterator)this->CapacityX; }
+  const_iterator capacity_ptr() const { return (const_iterator)this->CapacityX;}
+public:
+
+  // reverse iterator creation methods.
+  reverse_iterator rbegin()            { return reverse_iterator(end()); }
+  const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); }
+  reverse_iterator rend()              { return reverse_iterator(begin()); }
+  const_reverse_iterator rend() const { return const_reverse_iterator(begin());}
+
+  size_type size() const { return end()-begin(); }
+  size_type max_size() const { return size_type(-1) / sizeof(T); }
+
+  /// capacity - Return the total number of elements in the currently allocated
+  /// buffer.
+  size_t capacity() const { return capacity_ptr() - begin(); }
+
+  /// data - Return a pointer to the vector's buffer, even if empty().
+  pointer data() { return pointer(begin()); }
+  /// data - Return a pointer to the vector's buffer, even if empty().
+  const_pointer data() const { return const_pointer(begin()); }
+
+  reference operator[](unsigned idx) {
+    assert(begin() + idx < end());
+    return begin()[idx];
+  }
+  const_reference operator[](unsigned idx) const {
+    assert(begin() + idx < end());
+    return begin()[idx];
+  }
+
+  reference front() {
+    return begin()[0];
+  }
+  const_reference front() const {
+    return begin()[0];
+  }
+
+  reference back() {
+    return end()[-1];
+  }
+  const_reference back() const {
+    return end()[-1];
+  }
+};
+
+/// SmallVectorTemplateBase<isPodLike = false> - This is where we put method
+/// implementations that are designed to work with non-POD-like T's.
+template <typename T, bool isPodLike>
+class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
+public:
+  SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
+
+  static void destroy_range(T *S, T *E) {
+    while (S != E) {
+      --E;
+      E->~T();
+    }
+  }
+
+  /// uninitialized_copy - Copy the range [I, E) onto the uninitialized memory
+  /// starting with "Dest", constructing elements into it as needed.
+  template<typename It1, typename It2>
+  static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
+    std::uninitialized_copy(I, E, Dest);
+  }
+
+  /// grow - double the size of the allocated memory, guaranteeing space for at
+  /// least one more element or MinSize if specified.
+  void grow(size_t MinSize = 0);
+};
+
+// Define this out-of-line to dissuade the C++ compiler from inlining it.
+template <typename T, bool isPodLike>
+void SmallVectorTemplateBase<T, isPodLike>::grow(size_t MinSize) {
+  size_t CurCapacity = this->capacity();
+  size_t CurSize = this->size();
+  size_t NewCapacity = 2*CurCapacity + 1; // Always grow, even from zero.
+  if (NewCapacity < MinSize)
+    NewCapacity = MinSize;
+  T *NewElts = static_cast<T*>(malloc(NewCapacity*sizeof(T)));
+
+  // Copy the elements over.
+  this->uninitialized_copy(this->begin(), this->end(), NewElts);
+
+  // Destroy the original elements.
+  destroy_range(this->begin(), this->end());
+
+  // If this wasn't grown from the inline copy, deallocate the old space.
+  if (!this->isSmall())
+    free(this->begin());
+
+  this->setEnd(NewElts+CurSize);
+  this->BeginX = NewElts;
+  this->CapacityX = this->begin()+NewCapacity;
+}
+
+
+/// SmallVectorTemplateBase<isPodLike = true> - This is where we put method
+/// implementations that are designed to work with POD-like T's.
+template <typename T>
+class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
+public:
+  SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
+
+  // No need to do a destroy loop for POD's.
+  static void destroy_range(T *, T *) {}
+
+  /// uninitialized_copy - Copy the range [I, E) onto the uninitialized memory
+  /// starting with "Dest", constructing elements into it as needed.
+  template<typename It1, typename It2>
+  static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
+    // Arbitrary iterator types; just use the basic implementation.
+    std::uninitialized_copy(I, E, Dest);
+  }
+
+  /// uninitialized_copy - Copy the range [I, E) onto the uninitialized memory
+  /// starting with "Dest", constructing elements into it as needed.
+  template<typename T1, typename T2>
+  static void uninitialized_copy(T1 *I, T1 *E, T2 *Dest) {
+    // Use memcpy for PODs iterated by pointers (which includes SmallVector
+    // iterators): std::uninitialized_copy optimizes to memmove, but we can
+    // use memcpy here.
+    memcpy(Dest, I, (E-I)*sizeof(T));
+  }
+
+  /// grow - double the size of the allocated memory, guaranteeing space for at
+  /// least one more element or MinSize if specified.
+  void grow(size_t MinSize = 0) {
+    this->grow_pod(MinSize*sizeof(T), sizeof(T));
+  }
+};
+
+
+/// SmallVectorImpl - This class consists of common code factored out of the
+/// SmallVector class to reduce code duplication based on the SmallVector 'N'
+/// template parameter.
+template <typename T>
+class SmallVectorImpl : public SmallVectorTemplateBase<T, isPodLike<T>::value> {
+  typedef SmallVectorTemplateBase<T, isPodLike<T>::value > SuperClass;
+
+  SmallVectorImpl(const SmallVectorImpl&); // DISABLED.
+public:
+  typedef typename SuperClass::iterator iterator;
+  typedef typename SuperClass::size_type size_type;
+
+  // Default ctor - Initialize to empty.
+  explicit SmallVectorImpl(unsigned N)
+    : SmallVectorTemplateBase<T, isPodLike<T>::value>(N*sizeof(T)) {
+  }
+
+  ~SmallVectorImpl() {
+    // Destroy the constructed elements in the vector.
+    this->destroy_range(this->begin(), this->end());
+
+    // If this wasn't grown from the inline copy, deallocate the old space.
+    if (!this->isSmall())
+      free(this->begin());
+  }
+
+
+  void clear() {
+    this->destroy_range(this->begin(), this->end());
+    this->EndX = this->BeginX;
+  }
+
+  void resize(unsigned N) {
+    if (N < this->size()) {
+      this->destroy_range(this->begin()+N, this->end());
+      this->setEnd(this->begin()+N);
+    } else if (N > this->size()) {
+      if (this->capacity() < N)
+        this->grow(N);
+      this->construct_range(this->end(), this->begin()+N, T());
+      this->setEnd(this->begin()+N);
+    }
+  }
+
+  void resize(unsigned N, const T &NV) {
+    if (N < this->size()) {
+      this->destroy_range(this->begin()+N, this->end());
+      this->setEnd(this->begin()+N);
+    } else if (N > this->size()) {
+      if (this->capacity() < N)
+        this->grow(N);
+      construct_range(this->end(), this->begin()+N, NV);
+      this->setEnd(this->begin()+N);
+    }
+  }
+
+  void reserve(unsigned N) {
+    if (this->capacity() < N)
+      this->grow(N);
+  }
+
+  void push_back(const T &Elt) {
+    if (this->EndX < this->CapacityX) {
+    Retry:
+      new (this->end()) T(Elt);
+      this->setEnd(this->end()+1);
+      return;
+    }
+    this->grow();
+    goto Retry;
+  }
+
+  void pop_back() {
+    this->setEnd(this->end()-1);
+    this->end()->~T();
+  }
+
+  T pop_back_val() {
+    T Result = this->back();
+    pop_back();
+    return Result;
+  }
+
+  void swap(SmallVectorImpl &RHS);
+
+  /// append - Add the specified range to the end of the SmallVector.
+  ///
+  template<typename in_iter>
+  void append(in_iter in_start, in_iter in_end) {
+    size_type NumInputs = std::distance(in_start, in_end);
+    // Grow allocated space if needed.
+    if (NumInputs > size_type(this->capacity_ptr()-this->end()))
+      this->grow(this->size()+NumInputs);
+
+    // Copy the new elements over.
+    // TODO: NEED To compile time dispatch on whether in_iter is a random access
+    // iterator to use the fast uninitialized_copy.
+    std::uninitialized_copy(in_start, in_end, this->end());
+    this->setEnd(this->end() + NumInputs);
+  }
+
+  /// append - Add the specified range to the end of the SmallVector.
+  ///
+  void append(size_type NumInputs, const T &Elt) {
+    // Grow allocated space if needed.
+    if (NumInputs > size_type(this->capacity_ptr()-this->end()))
+      this->grow(this->size()+NumInputs);
+
+    // Copy the new elements over.
+    std::uninitialized_fill_n(this->end(), NumInputs, Elt);
+    this->setEnd(this->end() + NumInputs);
+  }
+
+  void assign(unsigned NumElts, const T &Elt) {
+    clear();
+    if (this->capacity() < NumElts)
+      this->grow(NumElts);
+    this->setEnd(this->begin()+NumElts);
+    construct_range(this->begin(), this->end(), Elt);
+  }
+
+  iterator erase(iterator I) {
+    iterator N = I;
+    // Shift all elts down one.
+    std::copy(I+1, this->end(), I);
+    // Drop the last elt.
+    pop_back();
+    return(N);
+  }
+
+  iterator erase(iterator S, iterator E) {
+    iterator N = S;
+    // Shift all elts down.
+    iterator I = std::copy(E, this->end(), S);
+    // Drop the last elts.
+    this->destroy_range(I, this->end());
+    this->setEnd(I);
+    return(N);
+  }
+
+  iterator insert(iterator I, const T &Elt) {
+    if (I == this->end()) {  // Important special case for empty vector.
+      push_back(Elt);
+      return this->end()-1;
+    }
+
+    if (this->EndX < this->CapacityX) {
+    Retry:
+      new (this->end()) T(this->back());
+      this->setEnd(this->end()+1);
+      // Push everything else over.
+      std::copy_backward(I, this->end()-1, this->end());
+      *I = Elt;
+      return I;
+    }
+    size_t EltNo = I-this->begin();
+    this->grow();
+    I = this->begin()+EltNo;
+    goto Retry;
+  }
+
+  iterator insert(iterator I, size_type NumToInsert, const T &Elt) {
+    if (I == this->end()) {  // Important special case for empty vector.
+      append(NumToInsert, Elt);
+      return this->end()-1;
+    }
+
+    // Convert iterator to elt# to avoid invalidating iterator when we reserve()
+    size_t InsertElt = I - this->begin();
+
+    // Ensure there is enough space.
+    reserve(static_cast<unsigned>(this->size() + NumToInsert));
+
+    // Uninvalidate the iterator.
+    I = this->begin()+InsertElt;
+
+    // If there are more elements between the insertion point and the end of the
+    // range than there are being inserted, we can use a simple approach to
+    // insertion.  Since we already reserved space, we know that this won't
+    // reallocate the vector.
+    if (size_t(this->end()-I) >= NumToInsert) {
+      T *OldEnd = this->end();
+      append(this->end()-NumToInsert, this->end());
+
+      // Copy the existing elements that get replaced.
+      std::copy_backward(I, OldEnd-NumToInsert, OldEnd);
+
+      std::fill_n(I, NumToInsert, Elt);
+      return I;
+    }
+
+    // Otherwise, we're inserting more elements than exist already, and we're
+    // not inserting at the end.
+
+    // Copy over the elements that we're about to overwrite.
+    T *OldEnd = this->end();
+    this->setEnd(this->end() + NumToInsert);
+    size_t NumOverwritten = OldEnd-I;
+    this->uninitialized_copy(I, OldEnd, this->end()-NumOverwritten);
+
+    // Replace the overwritten part.
+    std::fill_n(I, NumOverwritten, Elt);
+
+    // Insert the non-overwritten middle part.
+    std::uninitialized_fill_n(OldEnd, NumToInsert-NumOverwritten, Elt);
+    return I;
+  }
+
+  template<typename ItTy>
+  iterator insert(iterator I, ItTy From, ItTy To) {
+    if (I == this->end()) {  // Important special case for empty vector.
+      append(From, To);
+      return this->end()-1;
+    }
+
+    size_t NumToInsert = std::distance(From, To);
+    // Convert iterator to elt# to avoid invalidating iterator when we reserve()
+    size_t InsertElt = I - this->begin();
+
+    // Ensure there is enough space.
+    reserve(static_cast<unsigned>(this->size() + NumToInsert));
+
+    // Uninvalidate the iterator.
+    I = this->begin()+InsertElt;
+
+    // If there are more elements between the insertion point and the end of the
+    // range than there are being inserted, we can use a simple approach to
+    // insertion.  Since we already reserved space, we know that this won't
+    // reallocate the vector.
+    if (size_t(this->end()-I) >= NumToInsert) {
+      T *OldEnd = this->end();
+      append(this->end()-NumToInsert, this->end());
+
+      // Copy the existing elements that get replaced.
+      std::copy_backward(I, OldEnd-NumToInsert, OldEnd);
+
+      std::copy(From, To, I);
+      return I;
+    }
+
+    // Otherwise, we're inserting more elements than exist already, and we're
+    // not inserting at the end.
+
+    // Copy over the elements that we're about to overwrite.
+    T *OldEnd = this->end();
+    this->setEnd(this->end() + NumToInsert);
+    size_t NumOverwritten = OldEnd-I;
+    this->uninitialized_copy(I, OldEnd, this->end()-NumOverwritten);
+
+    // Replace the overwritten part.
+    for (; NumOverwritten > 0; --NumOverwritten) {
+      *I = *From;
+      ++I; ++From;
+    }
+
+    // Insert the non-overwritten middle part.
+    this->uninitialized_copy(From, To, OldEnd);
+    return I;
+  }
+
+  const SmallVectorImpl
+  &operator=(const SmallVectorImpl &RHS);
+
+  bool operator==(const SmallVectorImpl &RHS) const {
+    if (this->size() != RHS.size()) return false;
+    return std::equal(this->begin(), this->end(), RHS.begin());
+  }
+  bool operator!=(const SmallVectorImpl &RHS) const {
+    return !(*this == RHS);
+  }
+
+  bool operator<(const SmallVectorImpl &RHS) const {
+    return std::lexicographical_compare(this->begin(), this->end(),
+                                        RHS.begin(), RHS.end());
+  }
+
+  /// set_size - Set the array size to \arg N, which the current array must have
+  /// enough capacity for.
+  ///
+  /// This does not construct or destroy any elements in the vector.
+  ///
+  /// Clients can use this in conjunction with capacity() to write past the end
+  /// of the buffer when they know that more elements are available, and only
+  /// update the size later. This avoids the cost of value initializing elements
+  /// which will only be overwritten.
+  void set_size(unsigned N) {
+    assert(N <= this->capacity());
+    this->setEnd(this->begin() + N);
+  }
+
+private:
+  static void construct_range(T *S, T *E, const T &Elt) {
+    for (; S != E; ++S)
+      new (S) T(Elt);
+  }
+};
+
+
+template <typename T>
+void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) {
+  if (this == &RHS) return;
+
+  // We can only avoid copying elements if neither vector is small.
+  if (!this->isSmall() && !RHS.isSmall()) {
+    std::swap(this->BeginX, RHS.BeginX);
+    std::swap(this->EndX, RHS.EndX);
+    std::swap(this->CapacityX, RHS.CapacityX);
+    return;
+  }
+  if (RHS.size() > this->capacity())
+    this->grow(RHS.size());
+  if (this->size() > RHS.capacity())
+    RHS.grow(this->size());
+
+  // Swap the shared elements.
+  size_t NumShared = this->size();
+  if (NumShared > RHS.size()) NumShared = RHS.size();
+  for (unsigned i = 0; i != static_cast<unsigned>(NumShared); ++i)
+    std::swap((*this)[i], RHS[i]);
+
+  // Copy over the extra elts.
+  if (this->size() > RHS.size()) {
+    size_t EltDiff = this->size() - RHS.size();
+    this->uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end());
+    RHS.setEnd(RHS.end()+EltDiff);
+    this->destroy_range(this->begin()+NumShared, this->end());
+    this->setEnd(this->begin()+NumShared);
+  } else if (RHS.size() > this->size()) {
+    size_t EltDiff = RHS.size() - this->size();
+    this->uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end());
+    this->setEnd(this->end() + EltDiff);
+    this->destroy_range(RHS.begin()+NumShared, RHS.end());
+    RHS.setEnd(RHS.begin()+NumShared);
+  }
+}
+
+template <typename T>
+const SmallVectorImpl<T> &SmallVectorImpl<T>::
+  operator=(const SmallVectorImpl<T> &RHS) {
+  // Avoid self-assignment.
+  if (this == &RHS) return *this;
+
+  // If we already have sufficient space, assign the common elements, then
+  // destroy any excess.
+  size_t RHSSize = RHS.size();
+  size_t CurSize = this->size();
+  if (CurSize >= RHSSize) {
+    // Assign common elements.
+    iterator NewEnd;
+    if (RHSSize)
+      NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, this->begin());
+    else
+      NewEnd = this->begin();
+
+    // Destroy excess elements.
+    this->destroy_range(NewEnd, this->end());
+
+    // Trim.
+    this->setEnd(NewEnd);
+    return *this;
+  }
+
+  // If we have to grow to have enough elements, destroy the current elements.
+  // This allows us to avoid copying them during the grow.
+  if (this->capacity() < RHSSize) {
+    // Destroy current elements.
+    this->destroy_range(this->begin(), this->end());
+    this->setEnd(this->begin());
+    CurSize = 0;
+    this->grow(RHSSize);
+  } else if (CurSize) {
+    // Otherwise, use assignment for the already-constructed elements.
+    std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin());
+  }
+
+  // Copy construct the new elements in place.
+  this->uninitialized_copy(RHS.begin()+CurSize, RHS.end(),
+                           this->begin()+CurSize);
+
+  // Set end.
+  this->setEnd(this->begin()+RHSSize);
+  return *this;
+}
+
+
+/// SmallVector - This is a 'vector' (really, a variable-sized array), optimized
+/// for the case when the array is small.  It contains some number of elements
+/// in-place, which allows it to avoid heap allocation when the actual number of
+/// elements is below that threshold.  This allows normal "small" cases to be
+/// fast without losing generality for large inputs.
+///
+/// Note that this does not attempt to be exception safe.
+///
+template <typename T, unsigned N>
+class SmallVector : public SmallVectorImpl<T> {
+  /// InlineElts - These are 'N-1' elements that are stored inline in the body
+  /// of the vector.  The extra '1' element is stored in SmallVectorImpl.
+  typedef typename SmallVectorImpl<T>::U U;
+  enum {
+    // MinUs - The number of U's require to cover N T's.
+    MinUs = (static_cast<unsigned int>(sizeof(T))*N +
+             static_cast<unsigned int>(sizeof(U)) - 1) /
+            static_cast<unsigned int>(sizeof(U)),
+
+    // NumInlineEltsElts - The number of elements actually in this array.  There
+    // is already one in the parent class, and we have to round up to avoid
+    // having a zero-element array.
+    NumInlineEltsElts = MinUs > 1 ? (MinUs - 1) : 1,
+
+    // NumTsAvailable - The number of T's we actually have space for, which may
+    // be more than N due to rounding.
+    NumTsAvailable = (NumInlineEltsElts+1)*static_cast<unsigned int>(sizeof(U))/
+                     static_cast<unsigned int>(sizeof(T))
+  };
+  U InlineElts[NumInlineEltsElts];
+public:
+  SmallVector() : SmallVectorImpl<T>(NumTsAvailable) {
+  }
+
+  explicit SmallVector(unsigned Size, const T &Value = T())
+    : SmallVectorImpl<T>(NumTsAvailable) {
+    this->reserve(Size);
+    while (Size--)
+      this->push_back(Value);
+  }
+
+  template<typename ItTy>
+  SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(NumTsAvailable) {
+    this->append(S, E);
+  }
+
+  SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(NumTsAvailable) {
+    if (!RHS.empty())
+      SmallVectorImpl<T>::operator=(RHS);
+  }
+
+  const SmallVector &operator=(const SmallVector &RHS) {
+    SmallVectorImpl<T>::operator=(RHS);
+    return *this;
+  }
+
+};
+
+/// Specialize SmallVector at N=0.  This specialization guarantees
+/// that it can be instantiated at an incomplete T if none of its
+/// members are required.
+template <typename T>
+class SmallVector<T,0> : public SmallVectorImpl<T> {
+public:
+  SmallVector() : SmallVectorImpl<T>(0) {}
+
+  explicit SmallVector(unsigned Size, const T &Value = T())
+    : SmallVectorImpl<T>(0) {
+    this->reserve(Size);
+    while (Size--)
+      this->push_back(Value);
+  }
+
+  template<typename ItTy>
+  SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(0) {
+    this->append(S, E);
+  }
+
+  SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(0) {
+    SmallVectorImpl<T>::operator=(RHS);
+  }
+
+  SmallVector &operator=(const SmallVectorImpl<T> &RHS) {
+    return SmallVectorImpl<T>::operator=(RHS);
+  }
+
+};
+
+} // End llvm namespace
+
+namespace std {
+  /// Implement std::swap in terms of SmallVector swap.
+  template<typename T>
+  inline void
+  swap(llvm::SmallVectorImpl<T> &LHS, llvm::SmallVectorImpl<T> &RHS) {
+    LHS.swap(RHS);
+  }
+
+  /// Implement std::swap in terms of SmallVector swap.
+  template<typename T, unsigned N>
+  inline void
+  swap(llvm::SmallVector<T, N> &LHS, llvm::SmallVector<T, N> &RHS) {
+    LHS.swap(RHS);
+  }
+}
+
+#endif
diff --git a/final/include/llvm/ADT/SparseBitVector.h b/final/include/llvm/ADT/SparseBitVector.h
new file mode 100644
index 00000000000..d977136b2fc
--- /dev/null
+++ b/final/include/llvm/ADT/SparseBitVector.h
@@ -0,0 +1,906 @@
+//===- llvm/ADT/SparseBitVector.h - Efficient Sparse BitVector -*- C++ -*- ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SparseBitVector class.  See the doxygen comment for
+// SparseBitVector for more details on the algorithm used.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SPARSEBITVECTOR_H
+#define LLVM_ADT_SPARSEBITVECTOR_H
+
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <climits>
+#include <cstring>
+
+namespace llvm {
+
+/// SparseBitVector is an implementation of a bitvector that is sparse by only
+/// storing the elements that have non-zero bits set.  In order to make this
+/// fast for the most common cases, SparseBitVector is implemented as a linked
+/// list of SparseBitVectorElements.  We maintain a pointer to the last
+/// SparseBitVectorElement accessed (in the form of a list iterator), in order
+/// to make multiple in-order test/set constant time after the first one is
+/// executed.  Note that using vectors to store SparseBitVectorElement's does
+/// not work out very well because it causes insertion in the middle to take
+/// enormous amounts of time with a large amount of bits.  Other structures that
+/// have better worst cases for insertion in the middle (various balanced trees,
+/// etc) do not perform as well in practice as a linked list with this iterator
+/// kept up to date.  They are also significantly more memory intensive.
+
+
+template <unsigned ElementSize = 128>
+struct SparseBitVectorElement
+  : public ilist_node<SparseBitVectorElement<ElementSize> > {
+public:
+  typedef unsigned long BitWord;
+  enum {
+    BITWORD_SIZE = sizeof(BitWord) * CHAR_BIT,
+    BITWORDS_PER_ELEMENT = (ElementSize + BITWORD_SIZE - 1) / BITWORD_SIZE,
+    BITS_PER_ELEMENT = ElementSize
+  };
+
+private:
+  // Index of Element in terms of where first bit starts.
+  unsigned ElementIndex;
+  BitWord Bits[BITWORDS_PER_ELEMENT];
+  // Needed for sentinels
+  friend struct ilist_sentinel_traits<SparseBitVectorElement>;
+  SparseBitVectorElement() {
+    ElementIndex = ~0U;
+    memset(&Bits[0], 0, sizeof (BitWord) * BITWORDS_PER_ELEMENT);
+  }
+
+public:
+  explicit SparseBitVectorElement(unsigned Idx) {
+    ElementIndex = Idx;
+    memset(&Bits[0], 0, sizeof (BitWord) * BITWORDS_PER_ELEMENT);
+  }
+
+  // Comparison.
+  bool operator==(const SparseBitVectorElement &RHS) const {
+    if (ElementIndex != RHS.ElementIndex)
+      return false;
+    for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i)
+      if (Bits[i] != RHS.Bits[i])
+        return false;
+    return true;
+  }
+
+  bool operator!=(const SparseBitVectorElement &RHS) const {
+    return !(*this == RHS);
+  }
+
+  // Return the bits that make up word Idx in our element.
+  BitWord word(unsigned Idx) const {
+    assert (Idx < BITWORDS_PER_ELEMENT);
+    return Bits[Idx];
+  }
+
+  unsigned index() const {
+    return ElementIndex;
+  }
+
+  bool empty() const {
+    for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i)
+      if (Bits[i])
+        return false;
+    return true;
+  }
+
+  void set(unsigned Idx) {
+    Bits[Idx / BITWORD_SIZE] |= 1L << (Idx % BITWORD_SIZE);
+  }
+
+  bool test_and_set (unsigned Idx) {
+    bool old = test(Idx);
+    if (!old) {
+      set(Idx);
+      return true;
+    }
+    return false;
+  }
+
+  void reset(unsigned Idx) {
+    Bits[Idx / BITWORD_SIZE] &= ~(1L << (Idx % BITWORD_SIZE));
+  }
+
+  bool test(unsigned Idx) const {
+    return Bits[Idx / BITWORD_SIZE] & (1L << (Idx % BITWORD_SIZE));
+  }
+
+  unsigned count() const {
+    unsigned NumBits = 0;
+    for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i)
+      if (sizeof(BitWord) == 4)
+        NumBits += CountPopulation_32(Bits[i]);
+      else if (sizeof(BitWord) == 8)
+        NumBits += CountPopulation_64(Bits[i]);
+      else
+        assert(0 && "Unsupported!");
+    return NumBits;
+  }
+
+  /// find_first - Returns the index of the first set bit.
+  int find_first() const {
+    for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i)
+      if (Bits[i] != 0) {
+        if (sizeof(BitWord) == 4)
+          return i * BITWORD_SIZE + CountTrailingZeros_32(Bits[i]);
+        else if (sizeof(BitWord) == 8)
+          return i * BITWORD_SIZE + CountTrailingZeros_64(Bits[i]);
+        else
+          assert(0 && "Unsupported!");
+      }
+    assert(0 && "Illegal empty element");
+    return 0; // Not reached
+  }
+
+  /// find_next - Returns the index of the next set bit starting from the
+  /// "Curr" bit. Returns -1 if the next set bit is not found.
+  int find_next(unsigned Curr) const {
+    if (Curr >= BITS_PER_ELEMENT)
+      return -1;
+
+    unsigned WordPos = Curr / BITWORD_SIZE;
+    unsigned BitPos = Curr % BITWORD_SIZE;
+    BitWord Copy = Bits[WordPos];
+    assert (WordPos <= BITWORDS_PER_ELEMENT
+            && "Word Position outside of element");
+
+    // Mask off previous bits.
+    Copy &= ~0L << BitPos;
+
+    if (Copy != 0) {
+      if (sizeof(BitWord) == 4)
+        return WordPos * BITWORD_SIZE + CountTrailingZeros_32(Copy);
+      else if (sizeof(BitWord) == 8)
+        return WordPos * BITWORD_SIZE + CountTrailingZeros_64(Copy);
+      else
+        assert(0 && "Unsupported!");
+    }
+
+    // Check subsequent words.
+    for (unsigned i = WordPos+1; i < BITWORDS_PER_ELEMENT; ++i)
+      if (Bits[i] != 0) {
+        if (sizeof(BitWord) == 4)
+          return i * BITWORD_SIZE + CountTrailingZeros_32(Bits[i]);
+        else if (sizeof(BitWord) == 8)
+          return i * BITWORD_SIZE + CountTrailingZeros_64(Bits[i]);
+        else
+          assert(0 && "Unsupported!");
+      }
+    return -1;
+  }
+
+  // Union this element with RHS and return true if this one changed.
+  bool unionWith(const SparseBitVectorElement &RHS) {
+    bool changed = false;
+    for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) {
+      BitWord old = changed ? 0 : Bits[i];
+
+      Bits[i] |= RHS.Bits[i];
+      if (!changed && old != Bits[i])
+        changed = true;
+    }
+    return changed;
+  }
+
+  // Return true if we have any bits in common with RHS
+  bool intersects(const SparseBitVectorElement &RHS) const {
+    for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) {
+      if (RHS.Bits[i] & Bits[i])
+        return true;
+    }
+    return false;
+  }
+
+  // Intersect this Element with RHS and return true if this one changed.
+  // BecameZero is set to true if this element became all-zero bits.
+  bool intersectWith(const SparseBitVectorElement &RHS,
+                     bool &BecameZero) {
+    bool changed = false;
+    bool allzero = true;
+
+    BecameZero = false;
+    for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) {
+      BitWord old = changed ? 0 : Bits[i];
+
+      Bits[i] &= RHS.Bits[i];
+      if (Bits[i] != 0)
+        allzero = false;
+
+      if (!changed && old != Bits[i])
+        changed = true;
+    }
+    BecameZero = allzero;
+    return changed;
+  }
+  // Intersect this Element with the complement of RHS and return true if this
+  // one changed.  BecameZero is set to true if this element became all-zero
+  // bits.
+  bool intersectWithComplement(const SparseBitVectorElement &RHS,
+                               bool &BecameZero) {
+    bool changed = false;
+    bool allzero = true;
+
+    BecameZero = false;
+    for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) {
+      BitWord old = changed ? 0 : Bits[i];
+
+      Bits[i] &= ~RHS.Bits[i];
+      if (Bits[i] != 0)
+        allzero = false;
+
+      if (!changed && old != Bits[i])
+        changed = true;
+    }
+    BecameZero = allzero;
+    return changed;
+  }
+  // Three argument version of intersectWithComplement that intersects
+  // RHS1 & ~RHS2 into this element
+  void intersectWithComplement(const SparseBitVectorElement &RHS1,
+                               const SparseBitVectorElement &RHS2,
+                               bool &BecameZero) {
+    bool allzero = true;
+
+    BecameZero = false;
+    for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) {
+      Bits[i] = RHS1.Bits[i] & ~RHS2.Bits[i];
+      if (Bits[i] != 0)
+        allzero = false;
+    }
+    BecameZero = allzero;
+  }
+
+  // Get a hash value for this element;
+  uint64_t getHashValue() const {
+    uint64_t HashVal = 0;
+    for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i) {
+      HashVal ^= Bits[i];
+    }
+    return HashVal;
+  }
+};
+
+template <unsigned ElementSize = 128>
+class SparseBitVector {
+  typedef ilist<SparseBitVectorElement<ElementSize> > ElementList;
+  typedef typename ElementList::iterator ElementListIter;
+  typedef typename ElementList::const_iterator ElementListConstIter;
+  enum {
+    BITWORD_SIZE = SparseBitVectorElement<ElementSize>::BITWORD_SIZE
+  };
+
+  // Pointer to our current Element.
+  ElementListIter CurrElementIter;
+  ElementList Elements;
+
+  // This is like std::lower_bound, except we do linear searching from the
+  // current position.
+  ElementListIter FindLowerBound(unsigned ElementIndex) {
+
+    if (Elements.empty()) {
+      CurrElementIter = Elements.begin();
+      return Elements.begin();
+    }
+
+    // Make sure our current iterator is valid.
+    if (CurrElementIter == Elements.end())
+      --CurrElementIter;
+
+    // Search from our current iterator, either backwards or forwards,
+    // depending on what element we are looking for.
+    ElementListIter ElementIter = CurrElementIter;
+    if (CurrElementIter->index() == ElementIndex) {
+      return ElementIter;
+    } else if (CurrElementIter->index() > ElementIndex) {
+      while (ElementIter != Elements.begin()
+             && ElementIter->index() > ElementIndex)
+        --ElementIter;
+    } else {
+      while (ElementIter != Elements.end() &&
+             ElementIter->index() < ElementIndex)
+        ++ElementIter;
+    }
+    CurrElementIter = ElementIter;
+    return ElementIter;
+  }
+
+  // Iterator to walk set bits in the bitmap.  This iterator is a lot uglier
+  // than it would be, in order to be efficient.
+  class SparseBitVectorIterator {
+  private:
+    bool AtEnd;
+
+    const SparseBitVector<ElementSize> *BitVector;
+
+    // Current element inside of bitmap.
+    ElementListConstIter Iter;
+
+    // Current bit number inside of our bitmap.
+    unsigned BitNumber;
+
+    // Current word number inside of our element.
+    unsigned WordNumber;
+
+    // Current bits from the element.
+    typename SparseBitVectorElement<ElementSize>::BitWord Bits;
+
+    // Move our iterator to the first non-zero bit in the bitmap.
+    void AdvanceToFirstNonZero() {
+      if (AtEnd)
+        return;
+      if (BitVector->Elements.empty()) {
+        AtEnd = true;
+        return;
+      }
+      Iter = BitVector->Elements.begin();
+      BitNumber = Iter->index() * ElementSize;
+      unsigned BitPos = Iter->find_first();
+      BitNumber += BitPos;
+      WordNumber = (BitNumber % ElementSize) / BITWORD_SIZE;
+      Bits = Iter->word(WordNumber);
+      Bits >>= BitPos % BITWORD_SIZE;
+    }
+
+    // Move our iterator to the next non-zero bit.
+    void AdvanceToNextNonZero() {
+      if (AtEnd)
+        return;
+
+      while (Bits && !(Bits & 1)) {
+        Bits >>= 1;
+        BitNumber += 1;
+      }
+
+      // See if we ran out of Bits in this word.
+      if (!Bits) {
+        int NextSetBitNumber = Iter->find_next(BitNumber % ElementSize) ;
+        // If we ran out of set bits in this element, move to next element.
+        if (NextSetBitNumber == -1 || (BitNumber % ElementSize == 0)) {
+          ++Iter;
+          WordNumber = 0;
+
+          // We may run out of elements in the bitmap.
+          if (Iter == BitVector->Elements.end()) {
+            AtEnd = true;
+            return;
+          }
+          // Set up for next non zero word in bitmap.
+          BitNumber = Iter->index() * ElementSize;
+          NextSetBitNumber = Iter->find_first();
+          BitNumber += NextSetBitNumber;
+          WordNumber = (BitNumber % ElementSize) / BITWORD_SIZE;
+          Bits = Iter->word(WordNumber);
+          Bits >>= NextSetBitNumber % BITWORD_SIZE;
+        } else {
+          WordNumber = (NextSetBitNumber % ElementSize) / BITWORD_SIZE;
+          Bits = Iter->word(WordNumber);
+          Bits >>= NextSetBitNumber % BITWORD_SIZE;
+          BitNumber = Iter->index() * ElementSize;
+          BitNumber += NextSetBitNumber;
+        }
+      }
+    }
+  public:
+    // Preincrement.
+    inline SparseBitVectorIterator& operator++() {
+      ++BitNumber;
+      Bits >>= 1;
+      AdvanceToNextNonZero();
+      return *this;
+    }
+
+    // Postincrement.
+    inline SparseBitVectorIterator operator++(int) {
+      SparseBitVectorIterator tmp = *this;
+      ++*this;
+      return tmp;
+    }
+
+    // Return the current set bit number.
+    unsigned operator*() const {
+      return BitNumber;
+    }
+
+    bool operator==(const SparseBitVectorIterator &RHS) const {
+      // If they are both at the end, ignore the rest of the fields.
+      if (AtEnd && RHS.AtEnd)
+        return true;
+      // Otherwise they are the same if they have the same bit number and
+      // bitmap.
+      return AtEnd == RHS.AtEnd && RHS.BitNumber == BitNumber;
+    }
+    bool operator!=(const SparseBitVectorIterator &RHS) const {
+      return !(*this == RHS);
+    }
+    SparseBitVectorIterator(): BitVector(NULL) {
+    }
+
+
+    SparseBitVectorIterator(const SparseBitVector<ElementSize> *RHS,
+                            bool end = false):BitVector(RHS) {
+      Iter = BitVector->Elements.begin();
+      BitNumber = 0;
+      Bits = 0;
+      WordNumber = ~0;
+      AtEnd = end;
+      AdvanceToFirstNonZero();
+    }
+  };
+public:
+  typedef SparseBitVectorIterator iterator;
+
+  SparseBitVector () {
+    CurrElementIter = Elements.begin ();
+  }
+
+  ~SparseBitVector() {
+  }
+
+  // SparseBitVector copy ctor.
+  SparseBitVector(const SparseBitVector &RHS) {
+    ElementListConstIter ElementIter = RHS.Elements.begin();
+    while (ElementIter != RHS.Elements.end()) {
+      Elements.push_back(SparseBitVectorElement<ElementSize>(*ElementIter));
+      ++ElementIter;
+    }
+
+    CurrElementIter = Elements.begin ();
+  }
+
+  // Clear.
+  void clear() {
+    Elements.clear();
+  }
+
+  // Assignment
+  SparseBitVector& operator=(const SparseBitVector& RHS) {
+    Elements.clear();
+
+    ElementListConstIter ElementIter = RHS.Elements.begin();
+    while (ElementIter != RHS.Elements.end()) {
+      Elements.push_back(SparseBitVectorElement<ElementSize>(*ElementIter));
+      ++ElementIter;
+    }
+
+    CurrElementIter = Elements.begin ();
+
+    return *this;
+  }
+
+  // Test, Reset, and Set a bit in the bitmap.
+  bool test(unsigned Idx) {
+    if (Elements.empty())
+      return false;
+
+    unsigned ElementIndex = Idx / ElementSize;
+    ElementListIter ElementIter = FindLowerBound(ElementIndex);
+
+    // If we can't find an element that is supposed to contain this bit, there
+    // is nothing more to do.
+    if (ElementIter == Elements.end() ||
+        ElementIter->index() != ElementIndex)
+      return false;
+    return ElementIter->test(Idx % ElementSize);
+  }
+
+  void reset(unsigned Idx) {
+    if (Elements.empty())
+      return;
+
+    unsigned ElementIndex = Idx / ElementSize;
+    ElementListIter ElementIter = FindLowerBound(ElementIndex);
+
+    // If we can't find an element that is supposed to contain this bit, there
+    // is nothing more to do.
+    if (ElementIter == Elements.end() ||
+        ElementIter->index() != ElementIndex)
+      return;
+    ElementIter->reset(Idx % ElementSize);
+
+    // When the element is zeroed out, delete it.
+    if (ElementIter->empty()) {
+      ++CurrElementIter;
+      Elements.erase(ElementIter);
+    }
+  }
+
+  void set(unsigned Idx) {
+    unsigned ElementIndex = Idx / ElementSize;
+    SparseBitVectorElement<ElementSize> *Element;
+    ElementListIter ElementIter;
+    if (Elements.empty()) {
+      Element = new SparseBitVectorElement<ElementSize>(ElementIndex);
+      ElementIter = Elements.insert(Elements.end(), Element);
+
+    } else {
+      ElementIter = FindLowerBound(ElementIndex);
+
+      if (ElementIter == Elements.end() ||
+          ElementIter->index() != ElementIndex) {
+        Element = new SparseBitVectorElement<ElementSize>(ElementIndex);
+        // We may have hit the beginning of our SparseBitVector, in which case,
+        // we may need to insert right after this element, which requires moving
+        // the current iterator forward one, because insert does insert before.
+        if (ElementIter != Elements.end() &&
+            ElementIter->index() < ElementIndex)
+          ElementIter = Elements.insert(++ElementIter, Element);
+        else
+          ElementIter = Elements.insert(ElementIter, Element);
+      }
+    }
+    CurrElementIter = ElementIter;
+
+    ElementIter->set(Idx % ElementSize);
+  }
+
+  bool test_and_set (unsigned Idx) {
+    bool old = test(Idx);
+    if (!old) {
+      set(Idx);
+      return true;
+    }
+    return false;
+  }
+
+  bool operator!=(const SparseBitVector &RHS) const {
+    return !(*this == RHS);
+  }
+
+  bool operator==(const SparseBitVector &RHS) const {
+    ElementListConstIter Iter1 = Elements.begin();
+    ElementListConstIter Iter2 = RHS.Elements.begin();
+
+    for (; Iter1 != Elements.end() && Iter2 != RHS.Elements.end();
+         ++Iter1, ++Iter2) {
+      if (*Iter1 != *Iter2)
+        return false;
+    }
+    return Iter1 == Elements.end() && Iter2 == RHS.Elements.end();
+  }
+
+  // Union our bitmap with the RHS and return true if we changed.
+  bool operator|=(const SparseBitVector &RHS) {
+    bool changed = false;
+    ElementListIter Iter1 = Elements.begin();
+    ElementListConstIter Iter2 = RHS.Elements.begin();
+
+    // If RHS is empty, we are done
+    if (RHS.Elements.empty())
+      return false;
+
+    while (Iter2 != RHS.Elements.end()) {
+      if (Iter1 == Elements.end() || Iter1->index() > Iter2->index()) {
+        Elements.insert(Iter1,
+                        new SparseBitVectorElement<ElementSize>(*Iter2));
+        ++Iter2;
+        changed = true;
+      } else if (Iter1->index() == Iter2->index()) {
+        changed |= Iter1->unionWith(*Iter2);
+        ++Iter1;
+        ++Iter2;
+      } else {
+        ++Iter1;
+      }
+    }
+    CurrElementIter = Elements.begin();
+    return changed;
+  }
+
+  // Intersect our bitmap with the RHS and return true if ours changed.
+  bool operator&=(const SparseBitVector &RHS) {
+    bool changed = false;
+    ElementListIter Iter1 = Elements.begin();
+    ElementListConstIter Iter2 = RHS.Elements.begin();
+
+    // Check if both bitmaps are empty.
+    if (Elements.empty() && RHS.Elements.empty())
+      return false;
+
+    // Loop through, intersecting as we go, erasing elements when necessary.
+    while (Iter2 != RHS.Elements.end()) {
+      if (Iter1 == Elements.end()) {
+        CurrElementIter = Elements.begin();
+        return changed;
+      }
+
+      if (Iter1->index() > Iter2->index()) {
+        ++Iter2;
+      } else if (Iter1->index() == Iter2->index()) {
+        bool BecameZero;
+        changed |= Iter1->intersectWith(*Iter2, BecameZero);
+        if (BecameZero) {
+          ElementListIter IterTmp = Iter1;
+          ++Iter1;
+          Elements.erase(IterTmp);
+        } else {
+          ++Iter1;
+        }
+        ++Iter2;
+      } else {
+        ElementListIter IterTmp = Iter1;
+        ++Iter1;
+        Elements.erase(IterTmp);
+      }
+    }
+    Elements.erase(Iter1, Elements.end());
+    CurrElementIter = Elements.begin();
+    return changed;
+  }
+
+  // Intersect our bitmap with the complement of the RHS and return true
+  // if ours changed.
+  bool intersectWithComplement(const SparseBitVector &RHS) {
+    bool changed = false;
+    ElementListIter Iter1 = Elements.begin();
+    ElementListConstIter Iter2 = RHS.Elements.begin();
+
+    // If either our bitmap or RHS is empty, we are done
+    if (Elements.empty() || RHS.Elements.empty())
+      return false;
+
+    // Loop through, intersecting as we go, erasing elements when necessary.
+    while (Iter2 != RHS.Elements.end()) {
+      if (Iter1 == Elements.end()) {
+        CurrElementIter = Elements.begin();
+        return changed;
+      }
+
+      if (Iter1->index() > Iter2->index()) {
+        ++Iter2;
+      } else if (Iter1->index() == Iter2->index()) {
+        bool BecameZero;
+        changed |= Iter1->intersectWithComplement(*Iter2, BecameZero);
+        if (BecameZero) {
+          ElementListIter IterTmp = Iter1;
+          ++Iter1;
+          Elements.erase(IterTmp);
+        } else {
+          ++Iter1;
+        }
+        ++Iter2;
+      } else {
+        ++Iter1;
+      }
+    }
+    CurrElementIter = Elements.begin();
+    return changed;
+  }
+
+  bool intersectWithComplement(const SparseBitVector<ElementSize> *RHS) const {
+    return intersectWithComplement(*RHS);
+  }
+
+
+  //  Three argument version of intersectWithComplement.
+  //  Result of RHS1 & ~RHS2 is stored into this bitmap.
+  void intersectWithComplement(const SparseBitVector<ElementSize> &RHS1,
+                               const SparseBitVector<ElementSize> &RHS2)
+  {
+    Elements.clear();
+    CurrElementIter = Elements.begin();
+    ElementListConstIter Iter1 = RHS1.Elements.begin();
+    ElementListConstIter Iter2 = RHS2.Elements.begin();
+
+    // If RHS1 is empty, we are done
+    // If RHS2 is empty, we still have to copy RHS1
+    if (RHS1.Elements.empty())
+      return;
+
+    // Loop through, intersecting as we go, erasing elements when necessary.
+    while (Iter2 != RHS2.Elements.end()) {
+      if (Iter1 == RHS1.Elements.end())
+        return;
+
+      if (Iter1->index() > Iter2->index()) {
+        ++Iter2;
+      } else if (Iter1->index() == Iter2->index()) {
+        bool BecameZero = false;
+        SparseBitVectorElement<ElementSize> *NewElement =
+          new SparseBitVectorElement<ElementSize>(Iter1->index());
+        NewElement->intersectWithComplement(*Iter1, *Iter2, BecameZero);
+        if (!BecameZero) {
+          Elements.push_back(NewElement);
+        }
+        else
+          delete NewElement;
+        ++Iter1;
+        ++Iter2;
+      } else {
+        SparseBitVectorElement<ElementSize> *NewElement =
+          new SparseBitVectorElement<ElementSize>(*Iter1);
+        Elements.push_back(NewElement);
+        ++Iter1;
+      }
+    }
+
+    // copy the remaining elements
+    while (Iter1 != RHS1.Elements.end()) {
+        SparseBitVectorElement<ElementSize> *NewElement =
+          new SparseBitVectorElement<ElementSize>(*Iter1);
+        Elements.push_back(NewElement);
+        ++Iter1;
+      }
+
+    return;
+  }
+
+  void intersectWithComplement(const SparseBitVector<ElementSize> *RHS1,
+                               const SparseBitVector<ElementSize> *RHS2) {
+    intersectWithComplement(*RHS1, *RHS2);
+  }
+
+  bool intersects(const SparseBitVector<ElementSize> *RHS) const {
+    return intersects(*RHS);
+  }
+
+  // Return true if we share any bits in common with RHS
+  bool intersects(const SparseBitVector<ElementSize> &RHS) const {
+    ElementListConstIter Iter1 = Elements.begin();
+    ElementListConstIter Iter2 = RHS.Elements.begin();
+
+    // Check if both bitmaps are empty.
+    if (Elements.empty() && RHS.Elements.empty())
+      return false;
+
+    // Loop through, intersecting stopping when we hit bits in common.
+    while (Iter2 != RHS.Elements.end()) {
+      if (Iter1 == Elements.end())
+        return false;
+
+      if (Iter1->index() > Iter2->index()) {
+        ++Iter2;
+      } else if (Iter1->index() == Iter2->index()) {
+        if (Iter1->intersects(*Iter2))
+          return true;
+        ++Iter1;
+        ++Iter2;
+      } else {
+        ++Iter1;
+      }
+    }
+    return false;
+  }
+
+  // Return true iff all bits set in this SparseBitVector are
+  // also set in RHS.
+  bool contains(const SparseBitVector<ElementSize> &RHS) const {
+    SparseBitVector<ElementSize> Result(*this);
+    Result &= RHS;
+    return (Result == RHS);
+  }
+
+  // Return the first set bit in the bitmap.  Return -1 if no bits are set.
+  int find_first() const {
+    if (Elements.empty())
+      return -1;
+    const SparseBitVectorElement<ElementSize> &First = *(Elements.begin());
+    return (First.index() * ElementSize) + First.find_first();
+  }
+
+  // Return true if the SparseBitVector is empty
+  bool empty() const {
+    return Elements.empty();
+  }
+
+  unsigned count() const {
+    unsigned BitCount = 0;
+    for (ElementListConstIter Iter = Elements.begin();
+         Iter != Elements.end();
+         ++Iter)
+      BitCount += Iter->count();
+
+    return BitCount;
+  }
+  iterator begin() const {
+    return iterator(this);
+  }
+
+  iterator end() const {
+    return iterator(this, true);
+  }
+
+  // Get a hash value for this bitmap.
+  uint64_t getHashValue() const {
+    uint64_t HashVal = 0;
+    for (ElementListConstIter Iter = Elements.begin();
+         Iter != Elements.end();
+         ++Iter) {
+      HashVal ^= Iter->index();
+      HashVal ^= Iter->getHashValue();
+    }
+    return HashVal;
+  }
+};
+
+// Convenience functions to allow Or and And without dereferencing in the user
+// code.
+
+template <unsigned ElementSize>
+inline bool operator |=(SparseBitVector<ElementSize> &LHS,
+                        const SparseBitVector<ElementSize> *RHS) {
+  return LHS |= *RHS;
+}
+
+template <unsigned ElementSize>
+inline bool operator |=(SparseBitVector<ElementSize> *LHS,
+                        const SparseBitVector<ElementSize> &RHS) {
+  return LHS->operator|=(RHS);
+}
+
+template <unsigned ElementSize>
+inline bool operator &=(SparseBitVector<ElementSize> *LHS,
+                        const SparseBitVector<ElementSize> &RHS) {
+  return LHS->operator&=(RHS);
+}
+
+template <unsigned ElementSize>
+inline bool operator &=(SparseBitVector<ElementSize> &LHS,
+                        const SparseBitVector<ElementSize> *RHS) {
+  return LHS &= *RHS;
+}
+
+// Convenience functions for infix union, intersection, difference operators.
+
+template <unsigned ElementSize>
+inline SparseBitVector<ElementSize>
+operator|(const SparseBitVector<ElementSize> &LHS,
+          const SparseBitVector<ElementSize> &RHS) {
+  SparseBitVector<ElementSize> Result(LHS);
+  Result |= RHS;
+  return Result;
+}
+
+template <unsigned ElementSize>
+inline SparseBitVector<ElementSize>
+operator&(const SparseBitVector<ElementSize> &LHS,
+          const SparseBitVector<ElementSize> &RHS) {
+  SparseBitVector<ElementSize> Result(LHS);
+  Result &= RHS;
+  return Result;
+}
+
+template <unsigned ElementSize>
+inline SparseBitVector<ElementSize>
+operator-(const SparseBitVector<ElementSize> &LHS,
+          const SparseBitVector<ElementSize> &RHS) {
+  SparseBitVector<ElementSize> Result;
+  Result.intersectWithComplement(LHS, RHS);
+  return Result;
+}
+
+
+
+
+// Dump a SparseBitVector to a stream
+template <unsigned ElementSize>
+void dump(const SparseBitVector<ElementSize> &LHS, raw_ostream &out) {
+  out << "[";
+
+  typename SparseBitVector<ElementSize>::iterator bi = LHS.begin(),
+    be = LHS.end();
+  if (bi != be) {
+    out << *bi;
+    for (++bi; bi != be; ++bi) {
+      out << " " << *bi;
+    }
+  }
+  out << "]\n";
+}
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/ADT/Statistic.h b/final/include/llvm/ADT/Statistic.h
new file mode 100644
index 00000000000..fda99c6edbc
--- /dev/null
+++ b/final/include/llvm/ADT/Statistic.h
@@ -0,0 +1,135 @@
+//===-- llvm/ADT/Statistic.h - Easy way to expose stats ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the 'Statistic' class, which is designed to be an easy way
+// to expose various metrics from passes.  These statistics are printed at the
+// end of a run (from llvm_shutdown), when the -stats command line option is
+// passed on the command line.
+//
+// This is useful for reporting information like the number of instructions
+// simplified, optimized or removed by various transformations, like this:
+//
+// static Statistic NumInstsKilled("gcse", "Number of instructions killed");
+//
+// Later, in the code: ++NumInstsKilled;
+//
+// NOTE: Statistics *must* be declared as global variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_STATISTIC_H
+#define LLVM_ADT_STATISTIC_H
+
+#include "llvm/Support/Atomic.h"
+
+namespace llvm {
+class raw_ostream;
+
+class Statistic {
+public:
+  const char *Name;
+  const char *Desc;
+  volatile llvm::sys::cas_flag Value;
+  bool Initialized;
+
+  llvm::sys::cas_flag getValue() const { return Value; }
+  const char *getName() const { return Name; }
+  const char *getDesc() const { return Desc; }
+
+  /// construct - This should only be called for non-global statistics.
+  void construct(const char *name, const char *desc) {
+    Name = name; Desc = desc;
+    Value = 0; Initialized = 0;
+  }
+
+  // Allow use of this class as the value itself.
+  operator unsigned() const { return Value; }
+  const Statistic &operator=(unsigned Val) {
+    Value = Val;
+    return init();
+  }
+  
+  const Statistic &operator++() {
+    // FIXME: This function and all those that follow carefully use an
+    // atomic operation to update the value safely in the presence of
+    // concurrent accesses, but not to read the return value, so the
+    // return value is not thread safe.
+    sys::AtomicIncrement(&Value);
+    return init();
+  }
+  
+  unsigned operator++(int) {
+    init();
+    unsigned OldValue = Value;
+    sys::AtomicIncrement(&Value);
+    return OldValue;
+  }
+  
+  const Statistic &operator--() {
+    sys::AtomicDecrement(&Value);
+    return init();
+  }
+  
+  unsigned operator--(int) {
+    init();
+    unsigned OldValue = Value;
+    sys::AtomicDecrement(&Value);
+    return OldValue;
+  }
+  
+  const Statistic &operator+=(const unsigned &V) {
+    sys::AtomicAdd(&Value, V);
+    return init();
+  }
+  
+  const Statistic &operator-=(const unsigned &V) {
+    sys::AtomicAdd(&Value, -V);
+    return init();
+  }
+  
+  const Statistic &operator*=(const unsigned &V) {
+    sys::AtomicMul(&Value, V);
+    return init();
+  }
+  
+  const Statistic &operator/=(const unsigned &V) {
+    sys::AtomicDiv(&Value, V);
+    return init();
+  }
+
+protected:
+  Statistic &init() {
+    bool tmp = Initialized;
+    sys::MemoryFence();
+    if (!tmp) RegisterStatistic();
+    return *this;
+  }
+  void RegisterStatistic();
+};
+
+// STATISTIC - A macro to make definition of statistics really simple.  This
+// automatically passes the DEBUG_TYPE of the file into the statistic.
+#define STATISTIC(VARNAME, DESC) \
+  static llvm::Statistic VARNAME = { DEBUG_TYPE, DESC, 0, 0 }
+
+/// \brief Enable the collection and printing of statistics.
+void EnableStatistics();
+
+/// \brief Check if statistics are enabled.
+bool AreStatisticsEnabled();
+
+/// \brief Print statistics to the file returned by CreateInfoOutputFile().
+void PrintStatistics();
+
+/// \brief Print statistics to the given output stream.
+void PrintStatistics(raw_ostream &OS);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/ADT/StringExtras.h b/final/include/llvm/ADT/StringExtras.h
new file mode 100644
index 00000000000..acbed66ef40
--- /dev/null
+++ b/final/include/llvm/ADT/StringExtras.h
@@ -0,0 +1,171 @@
+//===-- llvm/ADT/StringExtras.h - Useful string functions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some functions that are useful when dealing with strings.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_STRINGEXTRAS_H
+#define LLVM_ADT_STRINGEXTRAS_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/StringRef.h"
+#include <cctype>
+#include <cstdio>
+#include <string>
+#include <vector>
+
+namespace llvm {
+template<typename T> class SmallVectorImpl;
+
+/// hexdigit - Return the hexadecimal character for the
+/// given number \arg X (which should be less than 16).
+static inline char hexdigit(unsigned X, bool LowerCase = false) {
+  const char HexChar = LowerCase ? 'a' : 'A';
+  return X < 10 ? '0' + X : HexChar + X - 10;
+}
+
+/// utohex_buffer - Emit the specified number into the buffer specified by
+/// BufferEnd, returning a pointer to the start of the string.  This can be used
+/// like this: (note that the buffer must be large enough to handle any number):
+///    char Buffer[40];
+///    printf("0x%s", utohex_buffer(X, Buffer+40));
+///
+/// This should only be used with unsigned types.
+///
+template<typename IntTy>
+static inline char *utohex_buffer(IntTy X, char *BufferEnd) {
+  char *BufPtr = BufferEnd;
+  *--BufPtr = 0;      // Null terminate buffer.
+  if (X == 0) {
+    *--BufPtr = '0';  // Handle special case.
+    return BufPtr;
+  }
+
+  while (X) {
+    unsigned char Mod = static_cast<unsigned char>(X) & 15;
+    *--BufPtr = hexdigit(Mod);
+    X >>= 4;
+  }
+  return BufPtr;
+}
+
+static inline std::string utohexstr(uint64_t X) {
+  char Buffer[17];
+  return utohex_buffer(X, Buffer+17);
+}
+
+static inline std::string utostr_32(uint32_t X, bool isNeg = false) {
+  char Buffer[11];
+  char *BufPtr = Buffer+11;
+
+  if (X == 0) *--BufPtr = '0';  // Handle special case...
+
+  while (X) {
+    *--BufPtr = '0' + char(X % 10);
+    X /= 10;
+  }
+
+  if (isNeg) *--BufPtr = '-';   // Add negative sign...
+
+  return std::string(BufPtr, Buffer+11);
+}
+
+static inline std::string utostr(uint64_t X, bool isNeg = false) {
+  char Buffer[21];
+  char *BufPtr = Buffer+21;
+
+  if (X == 0) *--BufPtr = '0';  // Handle special case...
+
+  while (X) {
+    *--BufPtr = '0' + char(X % 10);
+    X /= 10;
+  }
+
+  if (isNeg) *--BufPtr = '-';   // Add negative sign...
+  return std::string(BufPtr, Buffer+21);
+}
+
+
+static inline std::string itostr(int64_t X) {
+  if (X < 0)
+    return utostr(static_cast<uint64_t>(-X), true);
+  else
+    return utostr(static_cast<uint64_t>(X));
+}
+
+static inline std::string ftostr(double V) {
+  char Buffer[200];
+  sprintf(Buffer, "%20.6e", V);
+  char *B = Buffer;
+  while (*B == ' ') ++B;
+  return B;
+}
+
+static inline std::string ftostr(const APFloat& V) {
+  if (&V.getSemantics() == &APFloat::IEEEdouble)
+    return ftostr(V.convertToDouble());
+  else if (&V.getSemantics() == &APFloat::IEEEsingle)
+    return ftostr((double)V.convertToFloat());
+  return "<unknown format in ftostr>"; // error
+}
+
+static inline std::string LowercaseString(const std::string &S) {
+  std::string result(S);
+  for (unsigned i = 0; i < S.length(); ++i)
+    if (isupper(result[i]))
+      result[i] = char(tolower(result[i]));
+  return result;
+}
+
+static inline std::string UppercaseString(const std::string &S) {
+  std::string result(S);
+  for (unsigned i = 0; i < S.length(); ++i)
+    if (islower(result[i]))
+      result[i] = char(toupper(result[i]));
+  return result;
+}
+
+/// StrInStrNoCase - Portable version of strcasestr.  Locates the first
+/// occurrence of string 's1' in string 's2', ignoring case.  Returns
+/// the offset of s2 in s1 or npos if s2 cannot be found.
+StringRef::size_type StrInStrNoCase(StringRef s1, StringRef s2);
+
+/// getToken - This function extracts one token from source, ignoring any
+/// leading characters that appear in the Delimiters string, and ending the
+/// token at any of the characters that appear in the Delimiters string.  If
+/// there are no tokens in the source string, an empty string is returned.
+/// The function returns a pair containing the extracted token and the
+/// remaining tail string.
+std::pair<StringRef, StringRef> getToken(StringRef Source,
+                                         StringRef Delimiters = " \t\n\v\f\r");
+
+/// SplitString - Split up the specified string according to the specified
+/// delimiters, appending the result fragments to the output list.
+void SplitString(StringRef Source,
+                 SmallVectorImpl<StringRef> &OutFragments,
+                 StringRef Delimiters = " \t\n\v\f\r");
+
+/// HashString - Hash funtion for strings.
+///
+/// This is the Bernstein hash function.
+//
+// FIXME: Investigate whether a modified bernstein hash function performs
+// better: http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
+//   X*33+c -> X*33^c
+static inline unsigned HashString(StringRef Str, unsigned Result = 0) {
+  for (unsigned i = 0, e = Str.size(); i != e; ++i)
+    Result = Result * 33 + Str[i];
+  return Result;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/ADT/StringMap.h b/final/include/llvm/ADT/StringMap.h
new file mode 100644
index 00000000000..bad0e6f5136
--- /dev/null
+++ b/final/include/llvm/ADT/StringMap.h
@@ -0,0 +1,493 @@
+//===--- StringMap.h - String Hash table map interface ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the StringMap class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_STRINGMAP_H
+#define LLVM_ADT_STRINGMAP_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include <cstring>
+#include <string>
+
+namespace llvm {
+  template<typename ValueT>
+  class StringMapConstIterator;
+  template<typename ValueT>
+  class StringMapIterator;
+  template<typename ValueTy>
+  class StringMapEntry;
+
+/// StringMapEntryInitializer - This datatype can be partially specialized for
+/// various datatypes in a stringmap to allow them to be initialized when an
+/// entry is default constructed for the map.
+template<typename ValueTy>
+class StringMapEntryInitializer {
+public:
+  template <typename InitTy>
+  static void Initialize(StringMapEntry<ValueTy> &T, InitTy InitVal) {
+    T.second = InitVal;
+  }
+};
+
+
+/// StringMapEntryBase - Shared base class of StringMapEntry instances.
+class StringMapEntryBase {
+  unsigned StrLen;
+public:
+  explicit StringMapEntryBase(unsigned Len) : StrLen(Len) {}
+
+  unsigned getKeyLength() const { return StrLen; }
+};
+
+/// StringMapImpl - This is the base class of StringMap that is shared among
+/// all of its instantiations.
+class StringMapImpl {
+public:
+  /// ItemBucket - The hash table consists of an array of these.  If Item is
+  /// non-null, this is an extant entry, otherwise, it is a hole.
+  struct ItemBucket {
+    /// FullHashValue - This remembers the full hash value of the key for
+    /// easy scanning.
+    unsigned FullHashValue;
+
+    /// Item - This is a pointer to the actual item object.
+    StringMapEntryBase *Item;
+  };
+
+protected:
+  ItemBucket *TheTable;
+  unsigned NumBuckets;
+  unsigned NumItems;
+  unsigned NumTombstones;
+  unsigned ItemSize;
+protected:
+  explicit StringMapImpl(unsigned itemSize) : ItemSize(itemSize) {
+    // Initialize the map with zero buckets to allocation.
+    TheTable = 0;
+    NumBuckets = 0;
+    NumItems = 0;
+    NumTombstones = 0;
+  }
+  StringMapImpl(unsigned InitSize, unsigned ItemSize);
+  void RehashTable();
+
+  /// ShouldRehash - Return true if the table should be rehashed after a new
+  /// element was recently inserted.
+  bool ShouldRehash() const {
+    // If the hash table is now more than 3/4 full, or if fewer than 1/8 of
+    // the buckets are empty (meaning that many are filled with tombstones),
+    // grow the table.
+    return NumItems*4 > NumBuckets*3 ||
+           NumBuckets-(NumItems+NumTombstones) < NumBuckets/8;
+  }
+
+  /// LookupBucketFor - Look up the bucket that the specified string should end
+  /// up in.  If it already exists as a key in the map, the Item pointer for the
+  /// specified bucket will be non-null.  Otherwise, it will be null.  In either
+  /// case, the FullHashValue field of the bucket will be set to the hash value
+  /// of the string.
+  unsigned LookupBucketFor(StringRef Key);
+
+  /// FindKey - Look up the bucket that contains the specified key. If it exists
+  /// in the map, return the bucket number of the key.  Otherwise return -1.
+  /// This does not modify the map.
+  int FindKey(StringRef Key) const;
+
+  /// RemoveKey - Remove the specified StringMapEntry from the table, but do not
+  /// delete it.  This aborts if the value isn't in the table.
+  void RemoveKey(StringMapEntryBase *V);
+
+  /// RemoveKey - Remove the StringMapEntry for the specified key from the
+  /// table, returning it.  If the key is not in the table, this returns null.
+  StringMapEntryBase *RemoveKey(StringRef Key);
+private:
+  void init(unsigned Size);
+public:
+  static StringMapEntryBase *getTombstoneVal() {
+    return (StringMapEntryBase*)-1;
+  }
+
+  unsigned getNumBuckets() const { return NumBuckets; }
+  unsigned getNumItems() const { return NumItems; }
+
+  bool empty() const { return NumItems == 0; }
+  unsigned size() const { return NumItems; }
+};
+
+/// StringMapEntry - This is used to represent one value that is inserted into
+/// a StringMap.  It contains the Value itself and the key: the string length
+/// and data.
+template<typename ValueTy>
+class StringMapEntry : public StringMapEntryBase {
+public:
+  ValueTy second;
+
+  explicit StringMapEntry(unsigned strLen)
+    : StringMapEntryBase(strLen), second() {}
+  StringMapEntry(unsigned strLen, const ValueTy &V)
+    : StringMapEntryBase(strLen), second(V) {}
+
+  StringRef getKey() const {
+    return StringRef(getKeyData(), getKeyLength());
+  }
+
+  const ValueTy &getValue() const { return second; }
+  ValueTy &getValue() { return second; }
+
+  void setValue(const ValueTy &V) { second = V; }
+
+  /// getKeyData - Return the start of the string data that is the key for this
+  /// value.  The string data is always stored immediately after the
+  /// StringMapEntry object.
+  const char *getKeyData() const {return reinterpret_cast<const char*>(this+1);}
+
+  const char *first() const { return getKeyData(); }
+
+  /// Create - Create a StringMapEntry for the specified key and default
+  /// construct the value.
+  template<typename AllocatorTy, typename InitType>
+  static StringMapEntry *Create(const char *KeyStart, const char *KeyEnd,
+                                AllocatorTy &Allocator,
+                                InitType InitVal) {
+    unsigned KeyLength = static_cast<unsigned>(KeyEnd-KeyStart);
+
+    // Okay, the item doesn't already exist, and 'Bucket' is the bucket to fill
+    // in.  Allocate a new item with space for the string at the end and a null
+    // terminator.
+
+    unsigned AllocSize = static_cast<unsigned>(sizeof(StringMapEntry))+
+      KeyLength+1;
+    unsigned Alignment = alignOf<StringMapEntry>();
+
+    StringMapEntry *NewItem =
+      static_cast<StringMapEntry*>(Allocator.Allocate(AllocSize,Alignment));
+
+    // Default construct the value.
+    new (NewItem) StringMapEntry(KeyLength);
+
+    // Copy the string information.
+    char *StrBuffer = const_cast<char*>(NewItem->getKeyData());
+    memcpy(StrBuffer, KeyStart, KeyLength);
+    StrBuffer[KeyLength] = 0;  // Null terminate for convenience of clients.
+
+    // Initialize the value if the client wants to.
+    StringMapEntryInitializer<ValueTy>::Initialize(*NewItem, InitVal);
+    return NewItem;
+  }
+
+  template<typename AllocatorTy>
+  static StringMapEntry *Create(const char *KeyStart, const char *KeyEnd,
+                                AllocatorTy &Allocator) {
+    return Create(KeyStart, KeyEnd, Allocator, 0);
+  }
+
+
+  /// Create - Create a StringMapEntry with normal malloc/free.
+  template<typename InitType>
+  static StringMapEntry *Create(const char *KeyStart, const char *KeyEnd,
+                                InitType InitVal) {
+    MallocAllocator A;
+    return Create(KeyStart, KeyEnd, A, InitVal);
+  }
+
+  static StringMapEntry *Create(const char *KeyStart, const char *KeyEnd) {
+    return Create(KeyStart, KeyEnd, ValueTy());
+  }
+
+  /// GetStringMapEntryFromValue - Given a value that is known to be embedded
+  /// into a StringMapEntry, return the StringMapEntry itself.
+  static StringMapEntry &GetStringMapEntryFromValue(ValueTy &V) {
+    StringMapEntry *EPtr = 0;
+    char *Ptr = reinterpret_cast<char*>(&V) -
+                  (reinterpret_cast<char*>(&EPtr->second) -
+                   reinterpret_cast<char*>(EPtr));
+    return *reinterpret_cast<StringMapEntry*>(Ptr);
+  }
+  static const StringMapEntry &GetStringMapEntryFromValue(const ValueTy &V) {
+    return GetStringMapEntryFromValue(const_cast<ValueTy&>(V));
+  }
+
+  /// GetStringMapEntryFromKeyData - Given key data that is known to be embedded
+  /// into a StringMapEntry, return the StringMapEntry itself.
+  static StringMapEntry &GetStringMapEntryFromKeyData(const char *KeyData) {
+    char *Ptr = const_cast<char*>(KeyData) - sizeof(StringMapEntry<ValueTy>);
+    return *reinterpret_cast<StringMapEntry*>(Ptr);
+  }
+
+
+  /// Destroy - Destroy this StringMapEntry, releasing memory back to the
+  /// specified allocator.
+  template<typename AllocatorTy>
+  void Destroy(AllocatorTy &Allocator) {
+    // Free memory referenced by the item.
+    this->~StringMapEntry();
+    Allocator.Deallocate(this);
+  }
+
+  /// Destroy this object, releasing memory back to the malloc allocator.
+  void Destroy() {
+    MallocAllocator A;
+    Destroy(A);
+  }
+};
+
+
+/// StringMap - This is an unconventional map that is specialized for handling
+/// keys that are "strings", which are basically ranges of bytes. This does some
+/// funky memory allocation and hashing things to make it extremely efficient,
+/// storing the string data *after* the value in the map.
+template<typename ValueTy, typename AllocatorTy = MallocAllocator>
+class StringMap : public StringMapImpl {
+  AllocatorTy Allocator;
+  typedef StringMapEntry<ValueTy> MapEntryTy;
+public:
+  StringMap() : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) {}
+  explicit StringMap(unsigned InitialSize)
+    : StringMapImpl(InitialSize, static_cast<unsigned>(sizeof(MapEntryTy))) {}
+
+  explicit StringMap(AllocatorTy A)
+    : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))), Allocator(A) {}
+
+  explicit StringMap(const StringMap &RHS)
+    : StringMapImpl(static_cast<unsigned>(sizeof(MapEntryTy))) {
+    assert(RHS.empty() &&
+           "Copy ctor from non-empty stringmap not implemented yet!");
+    (void)RHS;
+  }
+  void operator=(const StringMap &RHS) {
+    assert(RHS.empty() &&
+           "assignment from non-empty stringmap not implemented yet!");
+    (void)RHS;
+    clear();
+  }
+
+  typedef typename ReferenceAdder<AllocatorTy>::result AllocatorRefTy;
+  typedef typename ReferenceAdder<const AllocatorTy>::result AllocatorCRefTy;
+  AllocatorRefTy getAllocator() { return Allocator; }
+  AllocatorCRefTy getAllocator() const { return Allocator; }
+
+  typedef const char* key_type;
+  typedef ValueTy mapped_type;
+  typedef StringMapEntry<ValueTy> value_type;
+  typedef size_t size_type;
+
+  typedef StringMapConstIterator<ValueTy> const_iterator;
+  typedef StringMapIterator<ValueTy> iterator;
+
+  iterator begin() {
+    return iterator(TheTable, NumBuckets == 0);
+  }
+  iterator end() {
+    return iterator(TheTable+NumBuckets, true);
+  }
+  const_iterator begin() const {
+    return const_iterator(TheTable, NumBuckets == 0);
+  }
+  const_iterator end() const {
+    return const_iterator(TheTable+NumBuckets, true);
+  }
+
+  iterator find(StringRef Key) {
+    int Bucket = FindKey(Key);
+    if (Bucket == -1) return end();
+    return iterator(TheTable+Bucket);
+  }
+
+  const_iterator find(StringRef Key) const {
+    int Bucket = FindKey(Key);
+    if (Bucket == -1) return end();
+    return const_iterator(TheTable+Bucket);
+  }
+
+   /// lookup - Return the entry for the specified key, or a default
+  /// constructed value if no such entry exists.
+  ValueTy lookup(StringRef Key) const {
+    const_iterator it = find(Key);
+    if (it != end())
+      return it->second;
+    return ValueTy();
+  }
+
+  ValueTy& operator[](StringRef Key) {
+    return GetOrCreateValue(Key).getValue();
+  }
+
+  size_type count(StringRef Key) const {
+    return find(Key) == end() ? 0 : 1;
+  }
+
+  /// insert - Insert the specified key/value pair into the map.  If the key
+  /// already exists in the map, return false and ignore the request, otherwise
+  /// insert it and return true.
+  bool insert(MapEntryTy *KeyValue) {
+    unsigned BucketNo = LookupBucketFor(KeyValue->getKey());
+    ItemBucket &Bucket = TheTable[BucketNo];
+    if (Bucket.Item && Bucket.Item != getTombstoneVal())
+      return false;  // Already exists in map.
+
+    if (Bucket.Item == getTombstoneVal())
+      --NumTombstones;
+    Bucket.Item = KeyValue;
+    ++NumItems;
+
+    if (ShouldRehash())
+      RehashTable();
+    return true;
+  }
+
+  // clear - Empties out the StringMap
+  void clear() {
+    if (empty()) return;
+
+    // Zap all values, resetting the keys back to non-present (not tombstone),
+    // which is safe because we're removing all elements.
+    for (ItemBucket *I = TheTable, *E = TheTable+NumBuckets; I != E; ++I) {
+      if (I->Item && I->Item != getTombstoneVal()) {
+        static_cast<MapEntryTy*>(I->Item)->Destroy(Allocator);
+        I->Item = 0;
+      }
+    }
+
+    NumItems = 0;
+  }
+
+  /// GetOrCreateValue - Look up the specified key in the table.  If a value
+  /// exists, return it.  Otherwise, default construct a value, insert it, and
+  /// return.
+  template <typename InitTy>
+  StringMapEntry<ValueTy> &GetOrCreateValue(StringRef Key,
+                                            InitTy Val) {
+    unsigned BucketNo = LookupBucketFor(Key);
+    ItemBucket &Bucket = TheTable[BucketNo];
+    if (Bucket.Item && Bucket.Item != getTombstoneVal())
+      return *static_cast<MapEntryTy*>(Bucket.Item);
+
+    MapEntryTy *NewItem =
+      MapEntryTy::Create(Key.begin(), Key.end(), Allocator, Val);
+
+    if (Bucket.Item == getTombstoneVal())
+      --NumTombstones;
+    ++NumItems;
+
+    // Fill in the bucket for the hash table.  The FullHashValue was already
+    // filled in by LookupBucketFor.
+    Bucket.Item = NewItem;
+
+    if (ShouldRehash())
+      RehashTable();
+    return *NewItem;
+  }
+
+  StringMapEntry<ValueTy> &GetOrCreateValue(StringRef Key) {
+    return GetOrCreateValue(Key, ValueTy());
+  }
+
+  template <typename InitTy>
+  StringMapEntry<ValueTy> &GetOrCreateValue(const char *KeyStart,
+                                            const char *KeyEnd,
+                                            InitTy Val) {
+    return GetOrCreateValue(StringRef(KeyStart, KeyEnd - KeyStart), Val);
+  }
+
+  StringMapEntry<ValueTy> &GetOrCreateValue(const char *KeyStart,
+                                            const char *KeyEnd) {
+    return GetOrCreateValue(StringRef(KeyStart, KeyEnd - KeyStart));
+  }
+
+  /// remove - Remove the specified key/value pair from the map, but do not
+  /// erase it.  This aborts if the key is not in the map.
+  void remove(MapEntryTy *KeyValue) {
+    RemoveKey(KeyValue);
+  }
+
+  void erase(iterator I) {
+    MapEntryTy &V = *I;
+    remove(&V);
+    V.Destroy(Allocator);
+  }
+
+  bool erase(StringRef Key) {
+    iterator I = find(Key);
+    if (I == end()) return false;
+    erase(I);
+    return true;
+  }
+
+  ~StringMap() {
+    clear();
+    free(TheTable);
+  }
+};
+
+
+template<typename ValueTy>
+class StringMapConstIterator {
+protected:
+  StringMapImpl::ItemBucket *Ptr;
+public:
+  typedef StringMapEntry<ValueTy> value_type;
+
+  explicit StringMapConstIterator(StringMapImpl::ItemBucket *Bucket,
+                                  bool NoAdvance = false)
+  : Ptr(Bucket) {
+    if (!NoAdvance) AdvancePastEmptyBuckets();
+  }
+
+  const value_type &operator*() const {
+    return *static_cast<StringMapEntry<ValueTy>*>(Ptr->Item);
+  }
+  const value_type *operator->() const {
+    return static_cast<StringMapEntry<ValueTy>*>(Ptr->Item);
+  }
+
+  bool operator==(const StringMapConstIterator &RHS) const {
+    return Ptr == RHS.Ptr;
+  }
+  bool operator!=(const StringMapConstIterator &RHS) const {
+    return Ptr != RHS.Ptr;
+  }
+
+  inline StringMapConstIterator& operator++() {          // Preincrement
+    ++Ptr;
+    AdvancePastEmptyBuckets();
+    return *this;
+  }
+  StringMapConstIterator operator++(int) {        // Postincrement
+    StringMapConstIterator tmp = *this; ++*this; return tmp;
+  }
+
+private:
+  void AdvancePastEmptyBuckets() {
+    while (Ptr->Item == 0 || Ptr->Item == StringMapImpl::getTombstoneVal())
+      ++Ptr;
+  }
+};
+
+template<typename ValueTy>
+class StringMapIterator : public StringMapConstIterator<ValueTy> {
+public:
+  explicit StringMapIterator(StringMapImpl::ItemBucket *Bucket,
+                             bool NoAdvance = false)
+    : StringMapConstIterator<ValueTy>(Bucket, NoAdvance) {
+  }
+  StringMapEntry<ValueTy> &operator*() const {
+    return *static_cast<StringMapEntry<ValueTy>*>(this->Ptr->Item);
+  }
+  StringMapEntry<ValueTy> *operator->() const {
+    return static_cast<StringMapEntry<ValueTy>*>(this->Ptr->Item);
+  }
+};
+
+}
+
+#endif
diff --git a/final/include/llvm/ADT/StringRef.h b/final/include/llvm/ADT/StringRef.h
new file mode 100644
index 00000000000..1766d2b9f2d
--- /dev/null
+++ b/final/include/llvm/ADT/StringRef.h
@@ -0,0 +1,458 @@
+//===--- StringRef.h - Constant String Reference Wrapper --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_STRINGREF_H
+#define LLVM_ADT_STRINGREF_H
+
+#include <cassert>
+#include <cstring>
+#include <utility>
+#include <string>
+
+namespace llvm {
+  template<typename T>
+  class SmallVectorImpl;
+  class APInt;
+
+  /// StringRef - Represent a constant reference to a string, i.e. a character
+  /// array and a length, which need not be null terminated.
+  ///
+  /// This class does not own the string data, it is expected to be used in
+  /// situations where the character data resides in some other buffer, whose
+  /// lifetime extends past that of the StringRef. For this reason, it is not in
+  /// general safe to store a StringRef.
+  class StringRef {
+  public:
+    typedef const char *iterator;
+    typedef const char *const_iterator;
+    static const size_t npos = ~size_t(0);
+    typedef size_t size_type;
+
+  private:
+    /// The start of the string, in an external buffer.
+    const char *Data;
+
+    /// The length of the string.
+    size_t Length;
+
+    // Workaround PR5482: nearly all gcc 4.x miscompile StringRef and std::min()
+    // Changing the arg of min to be an integer, instead of a reference to an
+    // integer works around this bug.
+    static size_t min(size_t a, size_t b) { return a < b ? a : b; }
+    static size_t max(size_t a, size_t b) { return a > b ? a : b; }
+
+  public:
+    /// @name Constructors
+    /// @{
+
+    /// Construct an empty string ref.
+    /*implicit*/ StringRef() : Data(0), Length(0) {}
+
+    /// Construct a string ref from a cstring.
+    /*implicit*/ StringRef(const char *Str)
+      : Data(Str), Length(::strlen(Str)) {}
+
+    /// Construct a string ref from a pointer and length.
+    /*implicit*/ StringRef(const char *data, size_t length)
+      : Data(data), Length(length) {}
+
+    /// Construct a string ref from an std::string.
+    /*implicit*/ StringRef(const std::string &Str)
+      : Data(Str.data()), Length(Str.length()) {}
+
+    /// @}
+    /// @name Iterators
+    /// @{
+
+    iterator begin() const { return Data; }
+
+    iterator end() const { return Data + Length; }
+
+    /// @}
+    /// @name String Operations
+    /// @{
+
+    /// data - Get a pointer to the start of the string (which may not be null
+    /// terminated).
+    const char *data() const { return Data; }
+
+    /// empty - Check if the string is empty.
+    bool empty() const { return Length == 0; }
+
+    /// size - Get the string size.
+    size_t size() const { return Length; }
+
+    /// front - Get the first character in the string.
+    char front() const {
+      assert(!empty());
+      return Data[0];
+    }
+
+    /// back - Get the last character in the string.
+    char back() const {
+      assert(!empty());
+      return Data[Length-1];
+    }
+
+    /// equals - Check for string equality, this is more efficient than
+    /// compare() when the relative ordering of inequal strings isn't needed.
+    bool equals(StringRef RHS) const {
+      return (Length == RHS.Length &&
+              memcmp(Data, RHS.Data, RHS.Length) == 0);
+    }
+
+    /// equals_lower - Check for string equality, ignoring case.
+    bool equals_lower(StringRef RHS) const {
+      return Length == RHS.Length && compare_lower(RHS) == 0;
+    }
+
+    /// compare - Compare two strings; the result is -1, 0, or 1 if this string
+    /// is lexicographically less than, equal to, or greater than the \arg RHS.
+    int compare(StringRef RHS) const {
+      // Check the prefix for a mismatch.
+      if (int Res = memcmp(Data, RHS.Data, min(Length, RHS.Length)))
+        return Res < 0 ? -1 : 1;
+
+      // Otherwise the prefixes match, so we only need to check the lengths.
+      if (Length == RHS.Length)
+        return 0;
+      return Length < RHS.Length ? -1 : 1;
+    }
+
+    /// compare_lower - Compare two strings, ignoring case.
+    int compare_lower(StringRef RHS) const;
+
+    /// compare_numeric - Compare two strings, treating sequences of digits as
+    /// numbers.
+    int compare_numeric(StringRef RHS) const;
+
+    /// \brief Determine the edit distance between this string and another
+    /// string.
+    ///
+    /// \param Other the string to compare this string against.
+    ///
+    /// \param AllowReplacements whether to allow character
+    /// replacements (change one character into another) as a single
+    /// operation, rather than as two operations (an insertion and a
+    /// removal).
+    ///
+    /// \param MaxEditDistance If non-zero, the maximum edit distance that
+    /// this routine is allowed to compute. If the edit distance will exceed
+    /// that maximum, returns \c MaxEditDistance+1.
+    ///
+    /// \returns the minimum number of character insertions, removals,
+    /// or (if \p AllowReplacements is \c true) replacements needed to
+    /// transform one of the given strings into the other. If zero,
+    /// the strings are identical.
+    unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
+                           unsigned MaxEditDistance = 0);
+
+    /// str - Get the contents as an std::string.
+    std::string str() const {
+      if (Data == 0) return std::string();
+      return std::string(Data, Length);
+    }
+
+    /// @}
+    /// @name Operator Overloads
+    /// @{
+
+    char operator[](size_t Index) const {
+      assert(Index < Length && "Invalid index!");
+      return Data[Index];
+    }
+
+    /// @}
+    /// @name Type Conversions
+    /// @{
+
+    operator std::string() const {
+      return str();
+    }
+
+    /// @}
+    /// @name String Predicates
+    /// @{
+
+    /// startswith - Check if this string starts with the given \arg Prefix.
+    bool startswith(StringRef Prefix) const {
+      return Length >= Prefix.Length &&
+             memcmp(Data, Prefix.Data, Prefix.Length) == 0;
+    }
+
+    /// endswith - Check if this string ends with the given \arg Suffix.
+    bool endswith(StringRef Suffix) const {
+      return Length >= Suffix.Length &&
+             memcmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
+    }
+
+    /// @}
+    /// @name String Searching
+    /// @{
+
+    /// find - Search for the first character \arg C in the string.
+    ///
+    /// \return - The index of the first occurrence of \arg C, or npos if not
+    /// found.
+    size_t find(char C, size_t From = 0) const {
+      for (size_t i = min(From, Length), e = Length; i != e; ++i)
+        if (Data[i] == C)
+          return i;
+      return npos;
+    }
+
+    /// find - Search for the first string \arg Str in the string.
+    ///
+    /// \return - The index of the first occurrence of \arg Str, or npos if not
+    /// found.
+    size_t find(StringRef Str, size_t From = 0) const;
+
+    /// rfind - Search for the last character \arg C in the string.
+    ///
+    /// \return - The index of the last occurrence of \arg C, or npos if not
+    /// found.
+    size_t rfind(char C, size_t From = npos) const {
+      From = min(From, Length);
+      size_t i = From;
+      while (i != 0) {
+        --i;
+        if (Data[i] == C)
+          return i;
+      }
+      return npos;
+    }
+
+    /// rfind - Search for the last string \arg Str in the string.
+    ///
+    /// \return - The index of the last occurrence of \arg Str, or npos if not
+    /// found.
+    size_t rfind(StringRef Str) const;
+
+    /// find_first_of - Find the first character in the string that is \arg C,
+    /// or npos if not found. Same as find.
+    size_type find_first_of(char C, size_t From = 0) const {
+      return find(C, From);
+    }
+
+    /// find_first_of - Find the first character in the string that is in \arg
+    /// Chars, or npos if not found.
+    ///
+    /// Note: O(size() + Chars.size())
+    size_type find_first_of(StringRef Chars, size_t From = 0) const;
+
+    /// find_first_not_of - Find the first character in the string that is not
+    /// \arg C or npos if not found.
+    size_type find_first_not_of(char C, size_t From = 0) const;
+
+    /// find_first_not_of - Find the first character in the string that is not
+    /// in the string \arg Chars, or npos if not found.
+    ///
+    /// Note: O(size() + Chars.size())
+    size_type find_first_not_of(StringRef Chars, size_t From = 0) const;
+
+    /// find_last_of - Find the last character in the string that is \arg C, or
+    /// npos if not found.
+    size_type find_last_of(char C, size_t From = npos) const {
+      return rfind(C, From);
+    }
+
+    /// find_last_of - Find the last character in the string that is in \arg C,
+    /// or npos if not found.
+    ///
+    /// Note: O(size() + Chars.size())
+    size_type find_last_of(StringRef Chars, size_t From = npos) const;
+
+    /// @}
+    /// @name Helpful Algorithms
+    /// @{
+
+    /// count - Return the number of occurrences of \arg C in the string.
+    size_t count(char C) const {
+      size_t Count = 0;
+      for (size_t i = 0, e = Length; i != e; ++i)
+        if (Data[i] == C)
+          ++Count;
+      return Count;
+    }
+
+    /// count - Return the number of non-overlapped occurrences of \arg Str in
+    /// the string.
+    size_t count(StringRef Str) const;
+
+    /// getAsInteger - Parse the current string as an integer of the specified
+    /// radix.  If Radix is specified as zero, this does radix autosensing using
+    /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
+    ///
+    /// If the string is invalid or if only a subset of the string is valid,
+    /// this returns true to signify the error.  The string is considered
+    /// erroneous if empty.
+    ///
+    bool getAsInteger(unsigned Radix, long long &Result) const;
+    bool getAsInteger(unsigned Radix, unsigned long long &Result) const;
+    bool getAsInteger(unsigned Radix, int &Result) const;
+    bool getAsInteger(unsigned Radix, unsigned &Result) const;
+
+    // TODO: Provide overloads for int/unsigned that check for overflow.
+
+    /// getAsInteger - Parse the current string as an integer of the
+    /// specified radix, or of an autosensed radix if the radix given
+    /// is 0.  The current value in Result is discarded, and the
+    /// storage is changed to be wide enough to store the parsed
+    /// integer.
+    ///
+    /// Returns true if the string does not solely consist of a valid
+    /// non-empty number in the appropriate base.
+    ///
+    /// APInt::fromString is superficially similar but assumes the
+    /// string is well-formed in the given radix.
+    bool getAsInteger(unsigned Radix, APInt &Result) const;
+
+    /// @}
+    /// @name Substring Operations
+    /// @{
+
+    /// substr - Return a reference to the substring from [Start, Start + N).
+    ///
+    /// \param Start - The index of the starting character in the substring; if
+    /// the index is npos or greater than the length of the string then the
+    /// empty substring will be returned.
+    ///
+    /// \param N - The number of characters to included in the substring. If N
+    /// exceeds the number of characters remaining in the string, the string
+    /// suffix (starting with \arg Start) will be returned.
+    StringRef substr(size_t Start, size_t N = npos) const {
+      Start = min(Start, Length);
+      return StringRef(Data + Start, min(N, Length - Start));
+    }
+
+    /// slice - Return a reference to the substring from [Start, End).
+    ///
+    /// \param Start - The index of the starting character in the substring; if
+    /// the index is npos or greater than the length of the string then the
+    /// empty substring will be returned.
+    ///
+    /// \param End - The index following the last character to include in the
+    /// substring. If this is npos, or less than \arg Start, or exceeds the
+    /// number of characters remaining in the string, the string suffix
+    /// (starting with \arg Start) will be returned.
+    StringRef slice(size_t Start, size_t End) const {
+      Start = min(Start, Length);
+      End = min(max(Start, End), Length);
+      return StringRef(Data + Start, End - Start);
+    }
+
+    /// split - Split into two substrings around the first occurrence of a
+    /// separator character.
+    ///
+    /// If \arg Separator is in the string, then the result is a pair (LHS, RHS)
+    /// such that (*this == LHS + Separator + RHS) is true and RHS is
+    /// maximal. If \arg Separator is not in the string, then the result is a
+    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
+    ///
+    /// \param Separator - The character to split on.
+    /// \return - The split substrings.
+    std::pair<StringRef, StringRef> split(char Separator) const {
+      size_t Idx = find(Separator);
+      if (Idx == npos)
+        return std::make_pair(*this, StringRef());
+      return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
+    }
+
+    /// split - Split into two substrings around the first occurrence of a
+    /// separator string.
+    ///
+    /// If \arg Separator is in the string, then the result is a pair (LHS, RHS)
+    /// such that (*this == LHS + Separator + RHS) is true and RHS is
+    /// maximal. If \arg Separator is not in the string, then the result is a
+    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
+    ///
+    /// \param Separator - The string to split on.
+    /// \return - The split substrings.
+    std::pair<StringRef, StringRef> split(StringRef Separator) const {
+      size_t Idx = find(Separator);
+      if (Idx == npos)
+        return std::make_pair(*this, StringRef());
+      return std::make_pair(slice(0, Idx), slice(Idx + Separator.size(), npos));
+    }
+
+    /// split - Split into substrings around the occurrences of a separator
+    /// string.
+    ///
+    /// Each substring is stored in \arg A. If \arg MaxSplit is >= 0, at most
+    /// \arg MaxSplit splits are done and consequently <= \arg MaxSplit
+    /// elements are added to A.
+    /// If \arg KeepEmpty is false, empty strings are not added to \arg A. They
+    /// still count when considering \arg MaxSplit
+    /// An useful invariant is that
+    /// Separator.join(A) == *this if MaxSplit == -1 and KeepEmpty == true
+    ///
+    /// \param A - Where to put the substrings.
+    /// \param Separator - The string to split on.
+    /// \param MaxSplit - The maximum number of times the string is split.
+    /// \param KeepEmpty - True if empty substring should be added.
+    void split(SmallVectorImpl<StringRef> &A,
+               StringRef Separator, int MaxSplit = -1,
+               bool KeepEmpty = true) const;
+
+    /// rsplit - Split into two substrings around the last occurrence of a
+    /// separator character.
+    ///
+    /// If \arg Separator is in the string, then the result is a pair (LHS, RHS)
+    /// such that (*this == LHS + Separator + RHS) is true and RHS is
+    /// minimal. If \arg Separator is not in the string, then the result is a
+    /// pair (LHS, RHS) where (*this == LHS) and (RHS == "").
+    ///
+    /// \param Separator - The character to split on.
+    /// \return - The split substrings.
+    std::pair<StringRef, StringRef> rsplit(char Separator) const {
+      size_t Idx = rfind(Separator);
+      if (Idx == npos)
+        return std::make_pair(*this, StringRef());
+      return std::make_pair(slice(0, Idx), slice(Idx+1, npos));
+    }
+
+    /// @}
+  };
+
+  /// @name StringRef Comparison Operators
+  /// @{
+
+  inline bool operator==(StringRef LHS, StringRef RHS) {
+    return LHS.equals(RHS);
+  }
+
+  inline bool operator!=(StringRef LHS, StringRef RHS) {
+    return !(LHS == RHS);
+  }
+
+  inline bool operator<(StringRef LHS, StringRef RHS) {
+    return LHS.compare(RHS) == -1;
+  }
+
+  inline bool operator<=(StringRef LHS, StringRef RHS) {
+    return LHS.compare(RHS) != 1;
+  }
+
+  inline bool operator>(StringRef LHS, StringRef RHS) {
+    return LHS.compare(RHS) == 1;
+  }
+
+  inline bool operator>=(StringRef LHS, StringRef RHS) {
+    return LHS.compare(RHS) != -1;
+  }
+
+  /// @}
+
+  // StringRefs can be treated like a POD type.
+  template <typename T> struct isPodLike;
+  template <> struct isPodLike<StringRef> { static const bool value = true; };
+
+}
+
+#endif
diff --git a/final/include/llvm/ADT/StringSet.h b/final/include/llvm/ADT/StringSet.h
new file mode 100644
index 00000000000..9c55f6b70e3
--- /dev/null
+++ b/final/include/llvm/ADT/StringSet.h
@@ -0,0 +1,38 @@
+//===--- StringSet.h - The LLVM Compiler Driver -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  StringSet - A set-like wrapper for the StringMap.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_STRINGSET_H
+#define LLVM_ADT_STRINGSET_H
+
+#include "llvm/ADT/StringMap.h"
+
+namespace llvm {
+
+  /// StringSet - A wrapper for StringMap that provides set-like
+  /// functionality.  Only insert() and count() methods are used by my
+  /// code.
+  template <class AllocatorTy = llvm::MallocAllocator>
+  class StringSet : public llvm::StringMap<char, AllocatorTy> {
+    typedef llvm::StringMap<char, AllocatorTy> base;
+  public:
+    bool insert(StringRef InLang) {
+      assert(!InLang.empty());
+      const char *KeyStart = InLang.data();
+      const char *KeyEnd = KeyStart + InLang.size();
+      return base::insert(llvm::StringMapEntry<char>::
+                          Create(KeyStart, KeyEnd, base::getAllocator(), '+'));
+    }
+  };
+}
+
+#endif // LLVM_ADT_STRINGSET_H
diff --git a/final/include/llvm/ADT/StringSwitch.h b/final/include/llvm/ADT/StringSwitch.h
new file mode 100644
index 00000000000..74805830d85
--- /dev/null
+++ b/final/include/llvm/ADT/StringSwitch.h
@@ -0,0 +1,126 @@
+//===--- StringSwitch.h - Switch-on-literal-string Construct --------------===/
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===/
+//
+//  This file implements the StringSwitch template, which mimics a switch()
+//  statement whose cases are string literals.
+//
+//===----------------------------------------------------------------------===/
+#ifndef LLVM_ADT_STRINGSWITCH_H
+#define LLVM_ADT_STRINGSWITCH_H
+
+#include "llvm/ADT/StringRef.h"
+#include <cassert>
+#include <cstring>
+
+namespace llvm {
+
+/// \brief A switch()-like statement whose cases are string literals.
+///
+/// The StringSwitch class is a simple form of a switch() statement that
+/// determines whether the given string matches one of the given string
+/// literals. The template type parameter \p T is the type of the value that
+/// will be returned from the string-switch expression. For example,
+/// the following code switches on the name of a color in \c argv[i]:
+///
+/// \code
+/// Color color = StringSwitch<Color>(argv[i])
+///   .Case("red", Red)
+///   .Case("orange", Orange)
+///   .Case("yellow", Yellow)
+///   .Case("green", Green)
+///   .Case("blue", Blue)
+///   .Case("indigo", Indigo)
+///   .Cases("violet", "purple", Violet)
+///   .Default(UnknownColor);
+/// \endcode
+template<typename T, typename R = T>
+class StringSwitch {
+  /// \brief The string we are matching.
+  StringRef Str;
+
+  /// \brief The pointer to the result of this switch statement, once known,
+  /// null before that.
+  const T *Result;
+
+public:
+  explicit StringSwitch(StringRef Str)
+  : Str(Str), Result(0) { }
+
+  template<unsigned N>
+  StringSwitch& Case(const char (&S)[N], const T& Value) {
+    if (!Result && N-1 == Str.size() &&
+        (std::memcmp(S, Str.data(), N-1) == 0)) {
+      Result = &Value;
+    }
+
+    return *this;
+  }
+
+  template<unsigned N>
+  StringSwitch& EndsWith(const char (&S)[N], const T &Value) {
+    if (!Result && Str.size() >= N-1 &&
+        std::memcmp(S, Str.data() + Str.size() + 1 - N, N-1) == 0) {
+      Result = &Value;
+    }
+
+    return *this;
+  }
+
+  template<unsigned N>
+  StringSwitch& StartsWith(const char (&S)[N], const T &Value) {
+    if (!Result && Str.size() >= N-1 &&
+        std::memcmp(S, Str.data(), N-1) == 0) {
+      Result = &Value;
+    }
+
+    return *this;
+  }
+
+  template<unsigned N0, unsigned N1>
+  StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
+                      const T& Value) {
+    return Case(S0, Value).Case(S1, Value);
+  }
+
+  template<unsigned N0, unsigned N1, unsigned N2>
+  StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
+                      const char (&S2)[N2], const T& Value) {
+    return Case(S0, Value).Case(S1, Value).Case(S2, Value);
+  }
+
+  template<unsigned N0, unsigned N1, unsigned N2, unsigned N3>
+  StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
+                      const char (&S2)[N2], const char (&S3)[N3],
+                      const T& Value) {
+    return Case(S0, Value).Case(S1, Value).Case(S2, Value).Case(S3, Value);
+  }
+
+  template<unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4>
+  StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
+                      const char (&S2)[N2], const char (&S3)[N3],
+                      const char (&S4)[N4], const T& Value) {
+    return Case(S0, Value).Case(S1, Value).Case(S2, Value).Case(S3, Value)
+      .Case(S4, Value);
+  }
+
+  R Default(const T& Value) const {
+    if (Result)
+      return *Result;
+
+    return Value;
+  }
+
+  operator R() const {
+    assert(Result && "Fell off the end of a string-switch");
+    return *Result;
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_STRINGSWITCH_H
diff --git a/final/include/llvm/ADT/Trie.h b/final/include/llvm/ADT/Trie.h
new file mode 100644
index 00000000000..6b150c8fffa
--- /dev/null
+++ b/final/include/llvm/ADT/Trie.h
@@ -0,0 +1,336 @@
+//===- llvm/ADT/Trie.h ---- Generic trie structure --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class defines a generic trie structure. The trie structure
+// is immutable after creation, but the payload contained within it is not.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_TRIE_H
+#define LLVM_ADT_TRIE_H
+
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/Support/DOTGraphTraits.h"
+
+#include <cassert>
+#include <vector>
+
+namespace llvm {
+
+// FIXME:
+// - Labels are usually small, maybe it's better to use SmallString
+// - Should we use char* during construction?
+// - Should we templatize Empty with traits-like interface?
+
+template<class Payload>
+class Trie {
+  friend class GraphTraits<Trie<Payload> >;
+  friend class DOTGraphTraits<Trie<Payload> >;
+public:
+  class Node {
+    friend class Trie;
+
+  public:
+    typedef std::vector<Node*> NodeVectorType;
+    typedef typename NodeVectorType::iterator iterator;
+    typedef typename NodeVectorType::const_iterator const_iterator;
+
+  private:
+    enum QueryResult {
+      Same           = -3,
+      StringIsPrefix = -2,
+      LabelIsPrefix  = -1,
+      DontMatch      = 0,
+      HaveCommonPart
+    };
+
+    struct NodeCmp {
+      bool operator() (Node* N1, Node* N2) {
+        return (N1->Label[0] < N2->Label[0]);
+      }
+      bool operator() (Node* N, char Id) {
+        return (N->Label[0] < Id);
+      }
+    };
+
+    std::string Label;
+    Payload Data;
+    NodeVectorType Children;
+
+    // Do not implement
+    Node(const Node&);
+    Node& operator=(const Node&);
+
+    inline void addEdge(Node* N) {
+      if (Children.empty())
+        Children.push_back(N);
+      else {
+        iterator I = std::lower_bound(Children.begin(), Children.end(),
+                                      N, NodeCmp());
+        // FIXME: no dups are allowed
+        Children.insert(I, N);
+      }
+    }
+
+    inline void setEdge(Node* N) {
+      char Id = N->Label[0];
+      iterator I = std::lower_bound(Children.begin(), Children.end(),
+                                     Id, NodeCmp());
+      assert(I != Children.end() && "Node does not exists!");
+      *I = N;
+    }
+
+    QueryResult query(const std::string& s) const {
+      unsigned i, l;
+      unsigned l1 = s.length();
+      unsigned l2 = Label.length();
+
+      // Find the length of common part
+      l = std::min(l1, l2);
+      i = 0;
+      while ((i < l) && (s[i] == Label[i]))
+        ++i;
+
+      if (i == l) { // One is prefix of another, find who is who
+        if (l1 == l2)
+          return Same;
+        else if (i == l1)
+          return StringIsPrefix;
+        else
+          return LabelIsPrefix;
+      } else // s and Label have common (possible empty) part, return its length
+        return (QueryResult)i;
+    }
+
+  public:
+    inline explicit Node(const Payload& data, const std::string& label = ""):
+        Label(label), Data(data) { }
+
+    inline const Payload& data() const { return Data; }
+    inline void setData(const Payload& data) { Data = data; }
+
+    inline const std::string& label() const { return Label; }
+
+#if 0
+    inline void dump() {
+      llvm::cerr << "Node: " << this << "\n"
+                << "Label: " << Label << "\n"
+                << "Children:\n";
+
+      for (iterator I = Children.begin(), E = Children.end(); I != E; ++I)
+        llvm::cerr << (*I)->Label << "\n";
+    }
+#endif
+
+    inline Node* getEdge(char Id) {
+      Node* fNode = NULL;
+      iterator I = std::lower_bound(Children.begin(), Children.end(),
+                                          Id, NodeCmp());
+      if (I != Children.end() && (*I)->Label[0] == Id)
+        fNode = *I;
+
+      return fNode;
+    }
+
+    inline iterator       begin()       { return Children.begin(); }
+    inline const_iterator begin() const { return Children.begin(); }
+    inline iterator       end  ()       { return Children.end();   }
+    inline const_iterator end  () const { return Children.end();   }
+
+    inline size_t         size () const { return Children.size();  }
+    inline bool           empty() const { return Children.empty(); }
+    inline const Node*   &front() const { return Children.front(); }
+    inline       Node*   &front()       { return Children.front(); }
+    inline const Node*   &back()  const { return Children.back();  }
+    inline       Node*   &back()        { return Children.back();  }
+
+  };
+
+private:
+  std::vector<Node*> Nodes;
+  Payload Empty;
+
+  inline Node* addNode(const Payload& data, const std::string label = "") {
+    Node* N = new Node(data, label);
+    Nodes.push_back(N);
+    return N;
+  }
+
+  inline Node* splitEdge(Node* N, char Id, size_t index) {
+    Node* eNode = N->getEdge(Id);
+    assert(eNode && "Node doesn't exist");
+
+    const std::string &l = eNode->Label;
+    assert(index > 0 && index < l.length() && "Trying to split too far!");
+    std::string l1 = l.substr(0, index);
+    std::string l2 = l.substr(index);
+
+    Node* nNode = addNode(Empty, l1);
+    N->setEdge(nNode);
+
+    eNode->Label = l2;
+    nNode->addEdge(eNode);
+
+    return nNode;
+  }
+
+  // Do not implement
+  Trie(const Trie&);
+  Trie& operator=(const Trie&);
+
+public:
+  inline explicit Trie(const Payload& empty):Empty(empty) {
+    addNode(Empty);
+  }
+  inline ~Trie() {
+    for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
+      delete Nodes[i];
+  }
+
+  inline Node* getRoot() const { return Nodes[0]; }
+
+  bool addString(const std::string& s, const Payload& data);
+  const Payload& lookup(const std::string& s) const;
+
+};
+
+// Define this out-of-line to dissuade the C++ compiler from inlining it.
+template<class Payload>
+bool Trie<Payload>::addString(const std::string& s, const Payload& data) {
+  Node* cNode = getRoot();
+  Node* tNode = NULL;
+  std::string s1(s);
+
+  while (tNode == NULL) {
+    char Id = s1[0];
+    if (Node* nNode = cNode->getEdge(Id)) {
+      typename Node::QueryResult r = nNode->query(s1);
+
+      switch (r) {
+      case Node::Same:
+      case Node::StringIsPrefix:
+        // Currently we don't allow to have two strings in the trie one
+        // being a prefix of another. This should be fixed.
+        assert(0 && "FIXME!");
+        return false;
+      case Node::DontMatch:
+        assert(0 && "Impossible!");
+        return false;
+      case Node::LabelIsPrefix:
+        s1 = s1.substr(nNode->label().length());
+        cNode = nNode;
+        break;
+      default:
+        nNode = splitEdge(cNode, Id, r);
+        tNode = addNode(data, s1.substr(r));
+        nNode->addEdge(tNode);
+      }
+    } else {
+      tNode = addNode(data, s1);
+      cNode->addEdge(tNode);
+    }
+  }
+
+  return true;
+}
+
+template<class Payload>
+const Payload& Trie<Payload>::lookup(const std::string& s) const {
+  Node* cNode = getRoot();
+  Node* tNode = NULL;
+  std::string s1(s);
+
+  while (tNode == NULL) {
+    char Id = s1[0];
+    if (Node* nNode = cNode->getEdge(Id)) {
+      typename Node::QueryResult r = nNode->query(s1);
+
+      switch (r) {
+      case Node::Same:
+        tNode = nNode;
+        break;
+      case Node::StringIsPrefix:
+        return Empty;
+      case Node::DontMatch:
+        assert(0 && "Impossible!");
+        return Empty;
+      case Node::LabelIsPrefix:
+        s1 = s1.substr(nNode->label().length());
+        cNode = nNode;
+        break;
+      default:
+        return Empty;
+      }
+    } else
+      return Empty;
+  }
+
+  return tNode->data();
+}
+
+template<class Payload>
+struct GraphTraits<Trie<Payload> > {
+  typedef Trie<Payload> TrieType;
+  typedef typename TrieType::Node NodeType;
+  typedef typename NodeType::iterator ChildIteratorType;
+
+  static inline NodeType *getEntryNode(const TrieType& T) {
+    return T.getRoot();
+  }
+
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->begin();
+  }
+  static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
+
+  typedef typename std::vector<NodeType*>::const_iterator nodes_iterator;
+
+  static inline nodes_iterator nodes_begin(const TrieType& G) {
+    return G.Nodes.begin();
+  }
+  static inline nodes_iterator nodes_end(const TrieType& G) {
+    return G.Nodes.end();
+  }
+
+};
+
+template<class Payload>
+struct DOTGraphTraits<Trie<Payload> > : public DefaultDOTGraphTraits {
+  typedef typename Trie<Payload>::Node NodeType;
+  typedef typename GraphTraits<Trie<Payload> >::ChildIteratorType EdgeIter;
+
+  static std::string getGraphName(const Trie<Payload>& T) {
+    return "Trie";
+  }
+
+  static std::string getNodeLabel(NodeType* Node, const Trie<Payload>& T) {
+    if (T.getRoot() == Node)
+      return "<Root>";
+    else
+      return Node->label();
+  }
+
+  static std::string getEdgeSourceLabel(NodeType* Node, EdgeIter I) {
+    NodeType* N = *I;
+    return N->label().substr(0, 1);
+  }
+
+  static std::string getNodeAttributes(const NodeType* Node,
+                                       const Trie<Payload>& T) {
+    if (Node->data() != T.Empty)
+      return "color=blue";
+
+    return "";
+  }
+
+};
+
+} // end of llvm namespace
+
+#endif // LLVM_ADT_TRIE_H
diff --git a/final/include/llvm/ADT/Triple.h b/final/include/llvm/ADT/Triple.h
new file mode 100644
index 00000000000..e6dcc23258f
--- /dev/null
+++ b/final/include/llvm/ADT/Triple.h
@@ -0,0 +1,333 @@
+//===-- llvm/ADT/Triple.h - Target triple helper class ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_TRIPLE_H
+#define LLVM_ADT_TRIPLE_H
+
+#include "llvm/ADT/StringRef.h"
+#include <string>
+
+// Some system headers or GCC predefined macros conflict with identifiers in
+// this file.  Undefine them here.
+#undef mips
+#undef sparc
+
+namespace llvm {
+class StringRef;
+class Twine;
+
+/// Triple - Helper class for working with target triples.
+///
+/// Target triples are strings in the canonical form:
+///   ARCHITECTURE-VENDOR-OPERATING_SYSTEM
+/// or
+///   ARCHITECTURE-VENDOR-OPERATING_SYSTEM-ENVIRONMENT
+///
+/// This class is used for clients which want to support arbitrary
+/// target triples, but also want to implement certain special
+/// behavior for particular targets. This class isolates the mapping
+/// from the components of the target triple to well known IDs.
+///
+/// At its core the Triple class is designed to be a wrapper for a triple
+/// string; the constructor does not change or normalize the triple string.
+/// Clients that need to handle the non-canonical triples that users often
+/// specify should use the normalize method.
+///
+/// See autoconf/config.guess for a glimpse into what triples look like in
+/// practice.
+class Triple {
+public:
+  enum ArchType {
+    UnknownArch,
+
+    alpha,   // Alpha: alpha
+    arm,     // ARM; arm, armv.*, xscale
+    bfin,    // Blackfin: bfin
+    cellspu, // CellSPU: spu, cellspu
+    mips,    // MIPS: mips, mipsallegrex
+    mipsel,  // MIPSEL: mipsel, mipsallegrexel, psp
+    msp430,  // MSP430: msp430
+    ppc,     // PPC: powerpc
+    ppc64,   // PPC64: powerpc64, ppu
+    sparc,   // Sparc: sparc
+    sparcv9, // Sparcv9: Sparcv9
+    systemz, // SystemZ: s390x
+    tce,     // TCE (http://tce.cs.tut.fi/): tce
+    thumb,   // Thumb: thumb, thumbv.*
+    x86,     // X86: i[3-9]86
+    x86_64,  // X86-64: amd64, x86_64
+    xcore,   // XCore: xcore
+    mblaze,  // MBlaze: mblaze
+    ptx,     // PTX: ptx
+
+    InvalidArch
+  };
+  enum VendorType {
+    UnknownVendor,
+
+    Apple,
+    PC
+  };
+  enum OSType {
+    UnknownOS,
+
+    AuroraUX,
+    Cygwin,
+    Darwin,
+    DragonFly,
+    FreeBSD,
+    Linux,
+    Lv2,        // PS3
+    MinGW32,    // i*86-pc-mingw32, *-w64-mingw32
+    NetBSD,
+    OpenBSD,
+    Psp,
+    Solaris,
+    Win32,
+    Haiku,
+    Minix
+  };
+  enum EnvironmentType {
+    UnknownEnvironment,
+
+    GNU,
+    GNUEABI,
+    EABI,
+    MachO
+  };
+
+private:
+  std::string Data;
+
+  /// The parsed arch type (or InvalidArch if uninitialized).
+  mutable ArchType Arch;
+
+  /// The parsed vendor type.
+  mutable VendorType Vendor;
+
+  /// The parsed OS type.
+  mutable OSType OS;
+
+  /// The parsed Environment type.
+  mutable EnvironmentType Environment;
+
+  bool isInitialized() const { return Arch != InvalidArch; }
+  static ArchType ParseArch(StringRef ArchName);
+  static VendorType ParseVendor(StringRef VendorName);
+  static OSType ParseOS(StringRef OSName);
+  static EnvironmentType ParseEnvironment(StringRef EnvironmentName);
+  void Parse() const;
+
+public:
+  /// @name Constructors
+  /// @{
+
+  Triple() : Data(), Arch(InvalidArch) {}
+  explicit Triple(StringRef Str) : Data(Str), Arch(InvalidArch) {}
+  explicit Triple(StringRef ArchStr, StringRef VendorStr, StringRef OSStr)
+    : Data(ArchStr), Arch(InvalidArch) {
+    Data += '-';
+    Data += VendorStr;
+    Data += '-';
+    Data += OSStr;
+  }
+
+  explicit Triple(StringRef ArchStr, StringRef VendorStr, StringRef OSStr,
+    StringRef EnvironmentStr)
+    : Data(ArchStr), Arch(InvalidArch) {
+    Data += '-';
+    Data += VendorStr;
+    Data += '-';
+    Data += OSStr;
+    Data += '-';
+    Data += EnvironmentStr;
+  }
+
+  /// @}
+  /// @name Normalization
+  /// @{
+
+  /// normalize - Turn an arbitrary machine specification into the canonical
+  /// triple form (or something sensible that the Triple class understands if
+  /// nothing better can reasonably be done).  In particular, it handles the
+  /// common case in which otherwise valid components are in the wrong order.
+  static std::string normalize(StringRef Str);
+
+  /// @}
+  /// @name Typed Component Access
+  /// @{
+
+  /// getArch - Get the parsed architecture type of this triple.
+  ArchType getArch() const {
+    if (!isInitialized()) Parse();
+    return Arch;
+  }
+
+  /// getVendor - Get the parsed vendor type of this triple.
+  VendorType getVendor() const {
+    if (!isInitialized()) Parse();
+    return Vendor;
+  }
+
+  /// getOS - Get the parsed operating system type of this triple.
+  OSType getOS() const {
+    if (!isInitialized()) Parse();
+    return OS;
+  }
+
+  /// hasEnvironment - Does this triple have the optional environment
+  /// (fourth) component?
+  bool hasEnvironment() const {
+    return getEnvironmentName() != "";
+  }
+
+  /// getEnvironment - Get the parsed environment type of this triple.
+  EnvironmentType getEnvironment() const {
+    if (!isInitialized()) Parse();
+    return Environment;
+  }
+
+  /// @}
+  /// @name Direct Component Access
+  /// @{
+
+  const std::string &str() const { return Data; }
+
+  const std::string &getTriple() const { return Data; }
+
+  /// getArchName - Get the architecture (first) component of the
+  /// triple.
+  StringRef getArchName() const;
+
+  /// getVendorName - Get the vendor (second) component of the triple.
+  StringRef getVendorName() const;
+
+  /// getOSName - Get the operating system (third) component of the
+  /// triple.
+  StringRef getOSName() const;
+
+  /// getEnvironmentName - Get the optional environment (fourth)
+  /// component of the triple, or "" if empty.
+  StringRef getEnvironmentName() const;
+
+  /// getOSAndEnvironmentName - Get the operating system and optional
+  /// environment components as a single string (separated by a '-'
+  /// if the environment component is present).
+  StringRef getOSAndEnvironmentName() const;
+
+
+  /// getDarwinNumber - Parse the 'darwin number' out of the specific target
+  /// triple.  For example, if we have darwin8.5 return 8,5,0.  If any entry is
+  /// not defined, return 0's.  This requires that the triple have an OSType of
+  /// darwin before it is called.
+  void getDarwinNumber(unsigned &Maj, unsigned &Min, unsigned &Revision) const;
+
+  /// getDarwinMajorNumber - Return just the major version number, this is
+  /// specialized because it is a common query.
+  unsigned getDarwinMajorNumber() const {
+    unsigned Maj, Min, Rev;
+    getDarwinNumber(Maj, Min, Rev);
+    return Maj;
+  }
+
+  /// @}
+  /// @name Mutators
+  /// @{
+
+  /// setArch - Set the architecture (first) component of the triple
+  /// to a known type.
+  void setArch(ArchType Kind);
+
+  /// setVendor - Set the vendor (second) component of the triple to a
+  /// known type.
+  void setVendor(VendorType Kind);
+
+  /// setOS - Set the operating system (third) component of the triple
+  /// to a known type.
+  void setOS(OSType Kind);
+
+  /// setEnvironment - Set the environment (fourth) component of the triple
+  /// to a known type.
+  void setEnvironment(EnvironmentType Kind);
+
+  /// setTriple - Set all components to the new triple \arg Str.
+  void setTriple(const Twine &Str);
+
+  /// setArchName - Set the architecture (first) component of the
+  /// triple by name.
+  void setArchName(StringRef Str);
+
+  /// setVendorName - Set the vendor (second) component of the triple
+  /// by name.
+  void setVendorName(StringRef Str);
+
+  /// setOSName - Set the operating system (third) component of the
+  /// triple by name.
+  void setOSName(StringRef Str);
+
+  /// setEnvironmentName - Set the optional environment (fourth)
+  /// component of the triple by name.
+  void setEnvironmentName(StringRef Str);
+
+  /// setOSAndEnvironmentName - Set the operating system and optional
+  /// environment components with a single string.
+  void setOSAndEnvironmentName(StringRef Str);
+
+  /// getArchNameForAssembler - Get an architecture name that is understood by
+  /// the target assembler.
+  const char *getArchNameForAssembler();
+
+  /// @}
+  /// @name Static helpers for IDs.
+  /// @{
+
+  /// getArchTypeName - Get the canonical name for the \arg Kind
+  /// architecture.
+  static const char *getArchTypeName(ArchType Kind);
+
+  /// getArchTypePrefix - Get the "prefix" canonical name for the \arg Kind
+  /// architecture. This is the prefix used by the architecture specific
+  /// builtins, and is suitable for passing to \see
+  /// Intrinsic::getIntrinsicForGCCBuiltin().
+  ///
+  /// \return - The architecture prefix, or 0 if none is defined.
+  static const char *getArchTypePrefix(ArchType Kind);
+
+  /// getVendorTypeName - Get the canonical name for the \arg Kind
+  /// vendor.
+  static const char *getVendorTypeName(VendorType Kind);
+
+  /// getOSTypeName - Get the canonical name for the \arg Kind operating
+  /// system.
+  static const char *getOSTypeName(OSType Kind);
+
+  /// getEnvironmentTypeName - Get the canonical name for the \arg Kind
+  /// environment.
+  static const char *getEnvironmentTypeName(EnvironmentType Kind);
+
+  /// @}
+  /// @name Static helpers for converting alternate architecture names.
+  /// @{
+
+  /// getArchTypeForLLVMName - The canonical type for the given LLVM
+  /// architecture name (e.g., "x86").
+  static ArchType getArchTypeForLLVMName(StringRef Str);
+
+  /// getArchTypeForDarwinArchName - Get the architecture type for a "Darwin"
+  /// architecture name, for example as accepted by "gcc -arch" (see also
+  /// arch(3)).
+  static ArchType getArchTypeForDarwinArchName(StringRef Str);
+
+  /// @}
+};
+
+} // End llvm namespace
+
+
+#endif
diff --git a/final/include/llvm/ADT/Twine.h b/final/include/llvm/ADT/Twine.h
new file mode 100644
index 00000000000..ab8d3653e33
--- /dev/null
+++ b/final/include/llvm/ADT/Twine.h
@@ -0,0 +1,465 @@
+//===-- Twine.h - Fast Temporary String Concatenation -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_TWINE_H
+#define LLVM_ADT_TWINE_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+#include <cassert>
+#include <string>
+
+namespace llvm {
+  template <typename T>
+  class SmallVectorImpl;
+  class StringRef;
+  class raw_ostream;
+
+  /// Twine - A lightweight data structure for efficiently representing the
+  /// concatenation of temporary values as strings.
+  ///
+  /// A Twine is a kind of rope, it represents a concatenated string using a
+  /// binary-tree, where the string is the preorder of the nodes. Since the
+  /// Twine can be efficiently rendered into a buffer when its result is used,
+  /// it avoids the cost of generating temporary values for intermediate string
+  /// results -- particularly in cases when the Twine result is never
+  /// required. By explicitly tracking the type of leaf nodes, we can also avoid
+  /// the creation of temporary strings for conversions operations (such as
+  /// appending an integer to a string).
+  ///
+  /// A Twine is not intended for use directly and should not be stored, its
+  /// implementation relies on the ability to store pointers to temporary stack
+  /// objects which may be deallocated at the end of a statement. Twines should
+  /// only be used accepted as const references in arguments, when an API wishes
+  /// to accept possibly-concatenated strings.
+  ///
+  /// Twines support a special 'null' value, which always concatenates to form
+  /// itself, and renders as an empty string. This can be returned from APIs to
+  /// effectively nullify any concatenations performed on the result.
+  ///
+  /// \b Implementation \n
+  ///
+  /// Given the nature of a Twine, it is not possible for the Twine's
+  /// concatenation method to construct interior nodes; the result must be
+  /// represented inside the returned value. For this reason a Twine object
+  /// actually holds two values, the left- and right-hand sides of a
+  /// concatenation. We also have nullary Twine objects, which are effectively
+  /// sentinel values that represent empty strings.
+  ///
+  /// Thus, a Twine can effectively have zero, one, or two children. The \see
+  /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for
+  /// testing the number of children.
+  ///
+  /// We maintain a number of invariants on Twine objects (FIXME: Why):
+  ///  - Nullary twines are always represented with their Kind on the left-hand
+  ///    side, and the Empty kind on the right-hand side.
+  ///  - Unary twines are always represented with the value on the left-hand
+  ///    side, and the Empty kind on the right-hand side.
+  ///  - If a Twine has another Twine as a child, that child should always be
+  ///    binary (otherwise it could have been folded into the parent).
+  ///
+  /// These invariants are check by \see isValid().
+  ///
+  /// \b Efficiency Considerations \n
+  ///
+  /// The Twine is designed to yield efficient and small code for common
+  /// situations. For this reason, the concat() method is inlined so that
+  /// concatenations of leaf nodes can be optimized into stores directly into a
+  /// single stack allocated object.
+  ///
+  /// In practice, not all compilers can be trusted to optimize concat() fully,
+  /// so we provide two additional methods (and accompanying operator+
+  /// overloads) to guarantee that particularly important cases (cstring plus
+  /// StringRef) codegen as desired.
+  class Twine {
+    /// NodeKind - Represent the type of an argument.
+    enum NodeKind {
+      /// An empty string; the result of concatenating anything with it is also
+      /// empty.
+      NullKind,
+
+      /// The empty string.
+      EmptyKind,
+
+      /// A pointer to a Twine instance.
+      TwineKind,
+
+      /// A pointer to a C string instance.
+      CStringKind,
+
+      /// A pointer to an std::string instance.
+      StdStringKind,
+
+      /// A pointer to a StringRef instance.
+      StringRefKind,
+
+      /// An unsigned int value reinterpreted as a pointer, to render as an
+      /// unsigned decimal integer.
+      DecUIKind,
+
+      /// An int value reinterpreted as a pointer, to render as a signed
+      /// decimal integer.
+      DecIKind,
+
+      /// A pointer to an unsigned long value, to render as an unsigned decimal
+      /// integer.
+      DecULKind,
+
+      /// A pointer to a long value, to render as a signed decimal integer.
+      DecLKind,
+
+      /// A pointer to an unsigned long long value, to render as an unsigned
+      /// decimal integer.
+      DecULLKind,
+
+      /// A pointer to a long long value, to render as a signed decimal integer.
+      DecLLKind,
+
+      /// A pointer to a uint64_t value, to render as an unsigned hexadecimal
+      /// integer.
+      UHexKind
+    };
+
+  private:
+    /// LHS - The prefix in the concatenation, which may be uninitialized for
+    /// Null or Empty kinds.
+    const void *LHS;
+    /// RHS - The suffix in the concatenation, which may be uninitialized for
+    /// Null or Empty kinds.
+    const void *RHS;
+    /// LHSKind - The NodeKind of the left hand side, \see getLHSKind().
+    unsigned char LHSKind;
+    /// RHSKind - The NodeKind of the left hand side, \see getLHSKind().
+    unsigned char RHSKind;
+
+  private:
+    /// Construct a nullary twine; the kind must be NullKind or EmptyKind.
+    explicit Twine(NodeKind Kind)
+      : LHSKind(Kind), RHSKind(EmptyKind) {
+      assert(isNullary() && "Invalid kind!");
+    }
+
+    /// Construct a binary twine.
+    explicit Twine(const Twine &_LHS, const Twine &_RHS)
+      : LHS(&_LHS), RHS(&_RHS), LHSKind(TwineKind), RHSKind(TwineKind) {
+      assert(isValid() && "Invalid twine!");
+    }
+
+    /// Construct a twine from explicit values.
+    explicit Twine(const void *_LHS, NodeKind _LHSKind,
+                   const void *_RHS, NodeKind _RHSKind)
+      : LHS(_LHS), RHS(_RHS), LHSKind(_LHSKind), RHSKind(_RHSKind) {
+      assert(isValid() && "Invalid twine!");
+    }
+
+    /// isNull - Check for the null twine.
+    bool isNull() const {
+      return getLHSKind() == NullKind;
+    }
+
+    /// isEmpty - Check for the empty twine.
+    bool isEmpty() const {
+      return getLHSKind() == EmptyKind;
+    }
+
+    /// isNullary - Check if this is a nullary twine (null or empty).
+    bool isNullary() const {
+      return isNull() || isEmpty();
+    }
+
+    /// isUnary - Check if this is a unary twine.
+    bool isUnary() const {
+      return getRHSKind() == EmptyKind && !isNullary();
+    }
+
+    /// isBinary - Check if this is a binary twine.
+    bool isBinary() const {
+      return getLHSKind() != NullKind && getRHSKind() != EmptyKind;
+    }
+
+    /// isValid - Check if this is a valid twine (satisfying the invariants on
+    /// order and number of arguments).
+    bool isValid() const {
+      // Nullary twines always have Empty on the RHS.
+      if (isNullary() && getRHSKind() != EmptyKind)
+        return false;
+
+      // Null should never appear on the RHS.
+      if (getRHSKind() == NullKind)
+        return false;
+
+      // The RHS cannot be non-empty if the LHS is empty.
+      if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind)
+        return false;
+
+      // A twine child should always be binary.
+      if (getLHSKind() == TwineKind &&
+          !static_cast<const Twine*>(LHS)->isBinary())
+        return false;
+      if (getRHSKind() == TwineKind &&
+          !static_cast<const Twine*>(RHS)->isBinary())
+        return false;
+
+      return true;
+    }
+
+    /// getLHSKind - Get the NodeKind of the left-hand side.
+    NodeKind getLHSKind() const { return (NodeKind) LHSKind; }
+
+    /// getRHSKind - Get the NodeKind of the left-hand side.
+    NodeKind getRHSKind() const { return (NodeKind) RHSKind; }
+
+    /// printOneChild - Print one child from a twine.
+    void printOneChild(raw_ostream &OS, const void *Ptr, NodeKind Kind) const;
+
+    /// printOneChildRepr - Print the representation of one child from a twine.
+    void printOneChildRepr(raw_ostream &OS, const void *Ptr,
+                           NodeKind Kind) const;
+
+  public:
+    /// @name Constructors
+    /// @{
+
+    /// Construct from an empty string.
+    /*implicit*/ Twine() : LHSKind(EmptyKind), RHSKind(EmptyKind) {
+      assert(isValid() && "Invalid twine!");
+    }
+
+    /// Construct from a C string.
+    ///
+    /// We take care here to optimize "" into the empty twine -- this will be
+    /// optimized out for string constants. This allows Twine arguments have
+    /// default "" values, without introducing unnecessary string constants.
+    /*implicit*/ Twine(const char *Str)
+      : RHSKind(EmptyKind) {
+      if (Str[0] != '\0') {
+        LHS = Str;
+        LHSKind = CStringKind;
+      } else
+        LHSKind = EmptyKind;
+
+      assert(isValid() && "Invalid twine!");
+    }
+
+    /// Construct from an std::string.
+    /*implicit*/ Twine(const std::string &Str)
+      : LHS(&Str), LHSKind(StdStringKind), RHSKind(EmptyKind) {
+      assert(isValid() && "Invalid twine!");
+    }
+
+    /// Construct from a StringRef.
+    /*implicit*/ Twine(const StringRef &Str)
+      : LHS(&Str), LHSKind(StringRefKind), RHSKind(EmptyKind) {
+      assert(isValid() && "Invalid twine!");
+    }
+
+    /// Construct a twine to print \arg Val as an unsigned decimal integer.
+    explicit Twine(unsigned Val)
+      : LHS((void*)(intptr_t)Val), LHSKind(DecUIKind), RHSKind(EmptyKind) {
+    }
+
+    /// Construct a twine to print \arg Val as a signed decimal integer.
+    explicit Twine(int Val)
+      : LHS((void*)(intptr_t)Val), LHSKind(DecIKind), RHSKind(EmptyKind) {
+    }
+
+    /// Construct a twine to print \arg Val as an unsigned decimal integer.
+    explicit Twine(const unsigned long &Val)
+      : LHS(&Val), LHSKind(DecULKind), RHSKind(EmptyKind) {
+    }
+
+    /// Construct a twine to print \arg Val as a signed decimal integer.
+    explicit Twine(const long &Val)
+      : LHS(&Val), LHSKind(DecLKind), RHSKind(EmptyKind) {
+    }
+
+    /// Construct a twine to print \arg Val as an unsigned decimal integer.
+    explicit Twine(const unsigned long long &Val)
+      : LHS(&Val), LHSKind(DecULLKind), RHSKind(EmptyKind) {
+    }
+
+    /// Construct a twine to print \arg Val as a signed decimal integer.
+    explicit Twine(const long long &Val)
+      : LHS(&Val), LHSKind(DecLLKind), RHSKind(EmptyKind) {
+    }
+
+    // FIXME: Unfortunately, to make sure this is as efficient as possible we
+    // need extra binary constructors from particular types. We can't rely on
+    // the compiler to be smart enough to fold operator+()/concat() down to the
+    // right thing. Yet.
+
+    /// Construct as the concatenation of a C string and a StringRef.
+    /*implicit*/ Twine(const char *_LHS, const StringRef &_RHS)
+      : LHS(_LHS), RHS(&_RHS), LHSKind(CStringKind), RHSKind(StringRefKind) {
+      assert(isValid() && "Invalid twine!");
+    }
+
+    /// Construct as the concatenation of a StringRef and a C string.
+    /*implicit*/ Twine(const StringRef &_LHS, const char *_RHS)
+      : LHS(&_LHS), RHS(_RHS), LHSKind(StringRefKind), RHSKind(CStringKind) {
+      assert(isValid() && "Invalid twine!");
+    }
+
+    /// Create a 'null' string, which is an empty string that always
+    /// concatenates to form another empty string.
+    static Twine createNull() {
+      return Twine(NullKind);
+    }
+
+    /// @}
+    /// @name Numeric Conversions
+    /// @{
+
+    // Construct a twine to print \arg Val as an unsigned hexadecimal integer.
+    static Twine utohexstr(const uint64_t &Val) {
+      return Twine(&Val, UHexKind, 0, EmptyKind);
+    }
+
+    /// @}
+    /// @name Predicate Operations
+    /// @{
+
+    /// isTriviallyEmpty - Check if this twine is trivially empty; a false
+    /// return value does not necessarily mean the twine is empty.
+    bool isTriviallyEmpty() const {
+      return isNullary();
+    }
+
+    /// isSingleStringRef - Return true if this twine can be dynamically
+    /// accessed as a single StringRef value with getSingleStringRef().
+    bool isSingleStringRef() const {
+      if (getRHSKind() != EmptyKind) return false;
+
+      switch (getLHSKind()) {
+      case EmptyKind:
+      case CStringKind:
+      case StdStringKind:
+      case StringRefKind:
+        return true;
+      default:
+        return false;
+      }
+    }
+
+    /// @}
+    /// @name String Operations
+    /// @{
+
+    Twine concat(const Twine &Suffix) const;
+
+    /// @}
+    /// @name Output & Conversion.
+    /// @{
+
+    /// str - Return the twine contents as a std::string.
+    std::string str() const;
+
+    /// toVector - Write the concatenated string into the given SmallString or
+    /// SmallVector.
+    void toVector(SmallVectorImpl<char> &Out) const;
+
+    /// getSingleStringRef - This returns the twine as a single StringRef.  This
+    /// method is only valid if isSingleStringRef() is true.
+    StringRef getSingleStringRef() const {
+      assert(isSingleStringRef() &&"This cannot be had as a single stringref!");
+      switch (getLHSKind()) {
+      default: assert(0 && "Out of sync with isSingleStringRef");
+      case EmptyKind:      return StringRef();
+      case CStringKind:    return StringRef((const char*)LHS);
+      case StdStringKind:  return StringRef(*(const std::string*)LHS);
+      case StringRefKind:  return *(const StringRef*)LHS;
+      }
+    }
+
+    /// toStringRef - This returns the twine as a single StringRef if it can be
+    /// represented as such. Otherwise the twine is written into the given
+    /// SmallVector and a StringRef to the SmallVector's data is returned.
+    StringRef toStringRef(SmallVectorImpl<char> &Out) const;
+
+    /// toNullTerminatedStringRef - This returns the twine as a single null
+    /// terminated StringRef if it can be represented as such. Otherwise the
+    /// twine is written into the given SmallVector and a StringRef to the
+    /// SmallVector's data is returned.
+    ///
+    /// The returned StringRef's size does not include the null terminator.
+    StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const;
+
+    /// print - Write the concatenated string represented by this twine to the
+    /// stream \arg OS.
+    void print(raw_ostream &OS) const;
+
+    /// dump - Dump the concatenated string represented by this twine to stderr.
+    void dump() const;
+
+    /// print - Write the representation of this twine to the stream \arg OS.
+    void printRepr(raw_ostream &OS) const;
+
+    /// dumpRepr - Dump the representation of this twine to stderr.
+    void dumpRepr() const;
+
+    /// @}
+  };
+
+  /// @name Twine Inline Implementations
+  /// @{
+
+  inline Twine Twine::concat(const Twine &Suffix) const {
+    // Concatenation with null is null.
+    if (isNull() || Suffix.isNull())
+      return Twine(NullKind);
+
+    // Concatenation with empty yields the other side.
+    if (isEmpty())
+      return Suffix;
+    if (Suffix.isEmpty())
+      return *this;
+
+    // Otherwise we need to create a new node, taking care to fold in unary
+    // twines.
+    const void *NewLHS = this, *NewRHS = &Suffix;
+    NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind;
+    if (isUnary()) {
+      NewLHS = LHS;
+      NewLHSKind = getLHSKind();
+    }
+    if (Suffix.isUnary()) {
+      NewRHS = Suffix.LHS;
+      NewRHSKind = Suffix.getLHSKind();
+    }
+
+    return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind);
+  }
+
+  inline Twine operator+(const Twine &LHS, const Twine &RHS) {
+    return LHS.concat(RHS);
+  }
+
+  /// Additional overload to guarantee simplified codegen; this is equivalent to
+  /// concat().
+
+  inline Twine operator+(const char *LHS, const StringRef &RHS) {
+    return Twine(LHS, RHS);
+  }
+
+  /// Additional overload to guarantee simplified codegen; this is equivalent to
+  /// concat().
+
+  inline Twine operator+(const StringRef &LHS, const char *RHS) {
+    return Twine(LHS, RHS);
+  }
+
+  inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) {
+    RHS.print(OS);
+    return OS;
+  }
+
+  /// @}
+}
+
+#endif
diff --git a/final/include/llvm/ADT/UniqueVector.h b/final/include/llvm/ADT/UniqueVector.h
new file mode 100644
index 00000000000..2d02d1ce166
--- /dev/null
+++ b/final/include/llvm/ADT/UniqueVector.h
@@ -0,0 +1,89 @@
+//===-- llvm/ADT/UniqueVector.h ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_UNIQUEVECTOR_H
+#define LLVM_ADT_UNIQUEVECTOR_H
+
+#include <cassert>
+#include <map>
+#include <vector>
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// UniqueVector - This class produces a sequential ID number (base 1) for each
+/// unique entry that is added.  T is the type of entries in the vector. This
+/// class should have an implementation of operator== and of operator<.
+/// Entries can be fetched using operator[] with the entry ID.
+template<class T> class UniqueVector {
+private:
+  // Map - Used to handle the correspondence of entry to ID.
+  std::map<T, unsigned> Map;
+
+  // Vector - ID ordered vector of entries. Entries can be indexed by ID - 1.
+  //
+  std::vector<T> Vector;
+
+public:
+  /// insert - Append entry to the vector if it doesn't already exist.  Returns
+  /// the entry's index + 1 to be used as a unique ID.
+  unsigned insert(const T &Entry) {
+    // Check if the entry is already in the map.
+    unsigned &Val = Map[Entry];
+
+    // See if entry exists, if so return prior ID.
+    if (Val) return Val;
+
+    // Compute ID for entry.
+    Val = static_cast<unsigned>(Vector.size()) + 1;
+
+    // Insert in vector.
+    Vector.push_back(Entry);
+    return Val;
+  }
+
+  /// idFor - return the ID for an existing entry.  Returns 0 if the entry is
+  /// not found.
+  unsigned idFor(const T &Entry) const {
+    // Search for entry in the map.
+    typename std::map<T, unsigned>::const_iterator MI = Map.find(Entry);
+
+    // See if entry exists, if so return ID.
+    if (MI != Map.end()) return MI->second;
+
+    // No luck.
+    return 0;
+  }
+
+  /// operator[] - Returns a reference to the entry with the specified ID.
+  ///
+  const T &operator[](unsigned ID) const {
+    assert(ID-1 < size() && "ID is 0 or out of range!");
+    return Vector[ID - 1];
+  }
+
+  /// size - Returns the number of entries in the vector.
+  ///
+  size_t size() const { return Vector.size(); }
+
+  /// empty - Returns true if the vector is empty.
+  ///
+  bool empty() const { return Vector.empty(); }
+
+  /// reset - Clears all the entries.
+  ///
+  void reset() {
+    Map.clear();
+    Vector.resize(0, 0);
+  }
+};
+
+} // End of namespace llvm
+
+#endif // LLVM_ADT_UNIQUEVECTOR_H
diff --git a/final/include/llvm/ADT/ValueMap.h b/final/include/llvm/ADT/ValueMap.h
new file mode 100644
index 00000000000..d1f4e5a0dac
--- /dev/null
+++ b/final/include/llvm/ADT/ValueMap.h
@@ -0,0 +1,368 @@
+//===- llvm/ADT/ValueMap.h - Safe map from Values to data -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ValueMap class.  ValueMap maps Value* or any subclass
+// to an arbitrary other type.  It provides the DenseMap interface but updates
+// itself to remain safe when keys are RAUWed or deleted.  By default, when a
+// key is RAUWed from V1 to V2, the old mapping V1->target is removed, and a new
+// mapping V2->target is added.  If V2 already existed, its old target is
+// overwritten.  When a key is deleted, its mapping is removed.
+//
+// You can override a ValueMap's Config parameter to control exactly what
+// happens on RAUW and destruction and to get called back on each event.  It's
+// legal to call back into the ValueMap from a Config's callbacks.  Config
+// parameters should inherit from ValueMapConfig<KeyT> to get default
+// implementations of all the methods ValueMap uses.  See ValueMapConfig for
+// documentation of the functions you can override.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_VALUEMAP_H
+#define LLVM_ADT_VALUEMAP_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/type_traits.h"
+#include "llvm/Support/Mutex.h"
+
+#include <iterator>
+
+namespace llvm {
+
+template<typename KeyT, typename ValueT, typename Config, typename ValueInfoT>
+class ValueMapCallbackVH;
+
+template<typename DenseMapT, typename KeyT>
+class ValueMapIterator;
+template<typename DenseMapT, typename KeyT>
+class ValueMapConstIterator;
+
+/// This class defines the default behavior for configurable aspects of
+/// ValueMap<>.  User Configs should inherit from this class to be as compatible
+/// as possible with future versions of ValueMap.
+template<typename KeyT>
+struct ValueMapConfig {
+  /// If FollowRAUW is true, the ValueMap will update mappings on RAUW. If it's
+  /// false, the ValueMap will leave the original mapping in place.
+  enum { FollowRAUW = true };
+
+  // All methods will be called with a first argument of type ExtraData.  The
+  // default implementations in this class take a templated first argument so
+  // that users' subclasses can use any type they want without having to
+  // override all the defaults.
+  struct ExtraData {};
+
+  template<typename ExtraDataT>
+  static void onRAUW(const ExtraDataT & /*Data*/, KeyT /*Old*/, KeyT /*New*/) {}
+  template<typename ExtraDataT>
+  static void onDelete(const ExtraDataT &/*Data*/, KeyT /*Old*/) {}
+
+  /// Returns a mutex that should be acquired around any changes to the map.
+  /// This is only acquired from the CallbackVH (and held around calls to onRAUW
+  /// and onDelete) and not inside other ValueMap methods.  NULL means that no
+  /// mutex is necessary.
+  template<typename ExtraDataT>
+  static sys::Mutex *getMutex(const ExtraDataT &/*Data*/) { return NULL; }
+};
+
+/// See the file comment.
+template<typename KeyT, typename ValueT, typename Config = ValueMapConfig<KeyT>,
+         typename ValueInfoT = DenseMapInfo<ValueT> >
+class ValueMap {
+  friend class ValueMapCallbackVH<KeyT, ValueT, Config, ValueInfoT>;
+  typedef ValueMapCallbackVH<KeyT, ValueT, Config, ValueInfoT> ValueMapCVH;
+  typedef DenseMap<ValueMapCVH, ValueT, DenseMapInfo<ValueMapCVH>,
+                   ValueInfoT> MapT;
+  typedef typename Config::ExtraData ExtraData;
+  MapT Map;
+  ExtraData Data;
+  ValueMap(const ValueMap&); // DO NOT IMPLEMENT
+  ValueMap& operator=(const ValueMap&); // DO NOT IMPLEMENT
+public:
+  typedef KeyT key_type;
+  typedef ValueT mapped_type;
+  typedef std::pair<KeyT, ValueT> value_type;
+
+  explicit ValueMap(unsigned NumInitBuckets = 64)
+    : Map(NumInitBuckets), Data() {}
+  explicit ValueMap(const ExtraData &Data, unsigned NumInitBuckets = 64)
+    : Map(NumInitBuckets), Data(Data) {}
+
+  ~ValueMap() {}
+
+  typedef ValueMapIterator<MapT, KeyT> iterator;
+  typedef ValueMapConstIterator<MapT, KeyT> const_iterator;
+  inline iterator begin() { return iterator(Map.begin()); }
+  inline iterator end() { return iterator(Map.end()); }
+  inline const_iterator begin() const { return const_iterator(Map.begin()); }
+  inline const_iterator end() const { return const_iterator(Map.end()); }
+
+  bool empty() const { return Map.empty(); }
+  unsigned size() const { return Map.size(); }
+
+  /// Grow the map so that it has at least Size buckets. Does not shrink
+  void resize(size_t Size) { Map.resize(Size); }
+
+  void clear() { Map.clear(); }
+
+  /// count - Return true if the specified key is in the map.
+  bool count(const KeyT &Val) const {
+    return Map.count(Wrap(Val));
+  }
+
+  iterator find(const KeyT &Val) {
+    return iterator(Map.find(Wrap(Val)));
+  }
+  const_iterator find(const KeyT &Val) const {
+    return const_iterator(Map.find(Wrap(Val)));
+  }
+
+  /// lookup - Return the entry for the specified key, or a default
+  /// constructed value if no such entry exists.
+  ValueT lookup(const KeyT &Val) const {
+    return Map.lookup(Wrap(Val));
+  }
+
+  // Inserts key,value pair into the map if the key isn't already in the map.
+  // If the key is already in the map, it returns false and doesn't update the
+  // value.
+  std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT> &KV) {
+    std::pair<typename MapT::iterator, bool> map_result=
+      Map.insert(std::make_pair(Wrap(KV.first), KV.second));
+    return std::make_pair(iterator(map_result.first), map_result.second);
+  }
+
+  /// insert - Range insertion of pairs.
+  template<typename InputIt>
+  void insert(InputIt I, InputIt E) {
+    for (; I != E; ++I)
+      insert(*I);
+  }
+
+
+  bool erase(const KeyT &Val) {
+    return Map.erase(Wrap(Val));
+  }
+  void erase(iterator I) {
+    return Map.erase(I.base());
+  }
+
+  value_type& FindAndConstruct(const KeyT &Key) {
+    return Map.FindAndConstruct(Wrap(Key));
+  }
+
+  ValueT &operator[](const KeyT &Key) {
+    return Map[Wrap(Key)];
+  }
+
+  /// isPointerIntoBucketsArray - Return true if the specified pointer points
+  /// somewhere into the ValueMap's array of buckets (i.e. either to a key or
+  /// value in the ValueMap).
+  bool isPointerIntoBucketsArray(const void *Ptr) const {
+    return Map.isPointerIntoBucketsArray(Ptr);
+  }
+
+  /// getPointerIntoBucketsArray() - Return an opaque pointer into the buckets
+  /// array.  In conjunction with the previous method, this can be used to
+  /// determine whether an insertion caused the ValueMap to reallocate.
+  const void *getPointerIntoBucketsArray() const {
+    return Map.getPointerIntoBucketsArray();
+  }
+
+private:
+  // Takes a key being looked up in the map and wraps it into a
+  // ValueMapCallbackVH, the actual key type of the map.  We use a helper
+  // function because ValueMapCVH is constructed with a second parameter.
+  ValueMapCVH Wrap(KeyT key) const {
+    // The only way the resulting CallbackVH could try to modify *this (making
+    // the const_cast incorrect) is if it gets inserted into the map.  But then
+    // this function must have been called from a non-const method, making the
+    // const_cast ok.
+    return ValueMapCVH(key, const_cast<ValueMap*>(this));
+  }
+};
+
+// This CallbackVH updates its ValueMap when the contained Value changes,
+// according to the user's preferences expressed through the Config object.
+template<typename KeyT, typename ValueT, typename Config, typename ValueInfoT>
+class ValueMapCallbackVH : public CallbackVH {
+  friend class ValueMap<KeyT, ValueT, Config, ValueInfoT>;
+  friend struct DenseMapInfo<ValueMapCallbackVH>;
+  typedef ValueMap<KeyT, ValueT, Config, ValueInfoT> ValueMapT;
+  typedef typename llvm::remove_pointer<KeyT>::type KeySansPointerT;
+
+  ValueMapT *Map;
+
+  ValueMapCallbackVH(KeyT Key, ValueMapT *Map)
+      : CallbackVH(const_cast<Value*>(static_cast<const Value*>(Key))),
+        Map(Map) {}
+
+public:
+  KeyT Unwrap() const { return cast_or_null<KeySansPointerT>(getValPtr()); }
+
+  virtual void deleted() {
+    // Make a copy that won't get changed even when *this is destroyed.
+    ValueMapCallbackVH Copy(*this);
+    sys::Mutex *M = Config::getMutex(Copy.Map->Data);
+    if (M)
+      M->acquire();
+    Config::onDelete(Copy.Map->Data, Copy.Unwrap());  // May destroy *this.
+    Copy.Map->Map.erase(Copy);  // Definitely destroys *this.
+    if (M)
+      M->release();
+  }
+  virtual void allUsesReplacedWith(Value *new_key) {
+    assert(isa<KeySansPointerT>(new_key) &&
+           "Invalid RAUW on key of ValueMap<>");
+    // Make a copy that won't get changed even when *this is destroyed.
+    ValueMapCallbackVH Copy(*this);
+    sys::Mutex *M = Config::getMutex(Copy.Map->Data);
+    if (M)
+      M->acquire();
+
+    KeyT typed_new_key = cast<KeySansPointerT>(new_key);
+    // Can destroy *this:
+    Config::onRAUW(Copy.Map->Data, Copy.Unwrap(), typed_new_key);
+    if (Config::FollowRAUW) {
+      typename ValueMapT::MapT::iterator I = Copy.Map->Map.find(Copy);
+      // I could == Copy.Map->Map.end() if the onRAUW callback already
+      // removed the old mapping.
+      if (I != Copy.Map->Map.end()) {
+        ValueT Target(I->second);
+        Copy.Map->Map.erase(I);  // Definitely destroys *this.
+        Copy.Map->insert(std::make_pair(typed_new_key, Target));
+      }
+    }
+    if (M)
+      M->release();
+  }
+};
+
+template<typename KeyT, typename ValueT, typename Config, typename ValueInfoT>
+struct DenseMapInfo<ValueMapCallbackVH<KeyT, ValueT, Config, ValueInfoT> > {
+  typedef ValueMapCallbackVH<KeyT, ValueT, Config, ValueInfoT> VH;
+  typedef DenseMapInfo<KeyT> PointerInfo;
+
+  static inline VH getEmptyKey() {
+    return VH(PointerInfo::getEmptyKey(), NULL);
+  }
+  static inline VH getTombstoneKey() {
+    return VH(PointerInfo::getTombstoneKey(), NULL);
+  }
+  static unsigned getHashValue(const VH &Val) {
+    return PointerInfo::getHashValue(Val.Unwrap());
+  }
+  static bool isEqual(const VH &LHS, const VH &RHS) {
+    return LHS == RHS;
+  }
+};
+
+
+template<typename DenseMapT, typename KeyT>
+class ValueMapIterator :
+    public std::iterator<std::forward_iterator_tag,
+                         std::pair<KeyT, typename DenseMapT::mapped_type>,
+                         ptrdiff_t> {
+  typedef typename DenseMapT::iterator BaseT;
+  typedef typename DenseMapT::mapped_type ValueT;
+  BaseT I;
+public:
+  ValueMapIterator() : I() {}
+
+  ValueMapIterator(BaseT I) : I(I) {}
+
+  BaseT base() const { return I; }
+
+  struct ValueTypeProxy {
+    const KeyT first;
+    ValueT& second;
+    ValueTypeProxy *operator->() { return this; }
+    operator std::pair<KeyT, ValueT>() const {
+      return std::make_pair(first, second);
+    }
+  };
+
+  ValueTypeProxy operator*() const {
+    ValueTypeProxy Result = {I->first.Unwrap(), I->second};
+    return Result;
+  }
+
+  ValueTypeProxy operator->() const {
+    return operator*();
+  }
+
+  bool operator==(const ValueMapIterator &RHS) const {
+    return I == RHS.I;
+  }
+  bool operator!=(const ValueMapIterator &RHS) const {
+    return I != RHS.I;
+  }
+
+  inline ValueMapIterator& operator++() {  // Preincrement
+    ++I;
+    return *this;
+  }
+  ValueMapIterator operator++(int) {  // Postincrement
+    ValueMapIterator tmp = *this; ++*this; return tmp;
+  }
+};
+
+template<typename DenseMapT, typename KeyT>
+class ValueMapConstIterator :
+    public std::iterator<std::forward_iterator_tag,
+                         std::pair<KeyT, typename DenseMapT::mapped_type>,
+                         ptrdiff_t> {
+  typedef typename DenseMapT::const_iterator BaseT;
+  typedef typename DenseMapT::mapped_type ValueT;
+  BaseT I;
+public:
+  ValueMapConstIterator() : I() {}
+  ValueMapConstIterator(BaseT I) : I(I) {}
+  ValueMapConstIterator(ValueMapIterator<DenseMapT, KeyT> Other)
+    : I(Other.base()) {}
+
+  BaseT base() const { return I; }
+
+  struct ValueTypeProxy {
+    const KeyT first;
+    const ValueT& second;
+    ValueTypeProxy *operator->() { return this; }
+    operator std::pair<KeyT, ValueT>() const {
+      return std::make_pair(first, second);
+    }
+  };
+
+  ValueTypeProxy operator*() const {
+    ValueTypeProxy Result = {I->first.Unwrap(), I->second};
+    return Result;
+  }
+
+  ValueTypeProxy operator->() const {
+    return operator*();
+  }
+
+  bool operator==(const ValueMapConstIterator &RHS) const {
+    return I == RHS.I;
+  }
+  bool operator!=(const ValueMapConstIterator &RHS) const {
+    return I != RHS.I;
+  }
+
+  inline ValueMapConstIterator& operator++() {  // Preincrement
+    ++I;
+    return *this;
+  }
+  ValueMapConstIterator operator++(int) {  // Postincrement
+    ValueMapConstIterator tmp = *this; ++*this; return tmp;
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/ADT/VectorExtras.h b/final/include/llvm/ADT/VectorExtras.h
new file mode 100644
index 00000000000..e05f585996f
--- /dev/null
+++ b/final/include/llvm/ADT/VectorExtras.h
@@ -0,0 +1,41 @@
+//===-- llvm/ADT/VectorExtras.h - Helpers for std::vector -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains helper functions which are useful for working with the
+// std::vector class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_VECTOREXTRAS_H
+#define LLVM_ADT_VECTOREXTRAS_H
+
+#include <cstdarg>
+#include <vector>
+
+namespace llvm {
+
+/// make_vector - Helper function which is useful for building temporary vectors
+/// to pass into type construction of CallInst ctors.  This turns a null
+/// terminated list of pointers (or other value types) into a real live vector.
+///
+template<typename T>
+inline std::vector<T> make_vector(T A, ...) {
+  va_list Args;
+  va_start(Args, A);
+  std::vector<T> Result;
+  Result.push_back(A);
+  while (T Val = va_arg(Args, T))
+    Result.push_back(Val);
+  va_end(Args);
+  return Result;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/ADT/ilist.h b/final/include/llvm/ADT/ilist.h
new file mode 100644
index 00000000000..865fcb3d8aa
--- /dev/null
+++ b/final/include/llvm/ADT/ilist.h
@@ -0,0 +1,708 @@
+//==-- llvm/ADT/ilist.h - Intrusive Linked List Template ---------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines classes to implement an intrusive doubly linked list class
+// (i.e. each node of the list must contain a next and previous field for the
+// list.
+//
+// The ilist_traits trait class is used to gain access to the next and previous
+// fields of the node type that the list is instantiated with.  If it is not
+// specialized, the list defaults to using the getPrev(), getNext() method calls
+// to get the next and previous pointers.
+//
+// The ilist class itself, should be a plug in replacement for list, assuming
+// that the nodes contain next/prev pointers.  This list replacement does not
+// provide a constant time size() method, so be careful to use empty() when you
+// really want to know if it's empty.
+//
+// The ilist class is implemented by allocating a 'tail' node when the list is
+// created (using ilist_traits<>::createSentinel()).  This tail node is
+// absolutely required because the user must be able to compute end()-1. Because
+// of this, users of the direct next/prev links will see an extra link on the
+// end of the list, which should be ignored.
+//
+// Requirements for a user of this list:
+//
+//   1. The user must provide {g|s}et{Next|Prev} methods, or specialize
+//      ilist_traits to provide an alternate way of getting and setting next and
+//      prev links.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_ILIST_H
+#define LLVM_ADT_ILIST_H
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+
+namespace llvm {
+
+template<typename NodeTy, typename Traits> class iplist;
+template<typename NodeTy> class ilist_iterator;
+
+/// ilist_nextprev_traits - A fragment for template traits for intrusive list
+/// that provides default next/prev implementations for common operations.
+///
+template<typename NodeTy>
+struct ilist_nextprev_traits {
+  static NodeTy *getPrev(NodeTy *N) { return N->getPrev(); }
+  static NodeTy *getNext(NodeTy *N) { return N->getNext(); }
+  static const NodeTy *getPrev(const NodeTy *N) { return N->getPrev(); }
+  static const NodeTy *getNext(const NodeTy *N) { return N->getNext(); }
+
+  static void setPrev(NodeTy *N, NodeTy *Prev) { N->setPrev(Prev); }
+  static void setNext(NodeTy *N, NodeTy *Next) { N->setNext(Next); }
+};
+
+template<typename NodeTy>
+struct ilist_traits;
+
+/// ilist_sentinel_traits - A fragment for template traits for intrusive list
+/// that provides default sentinel implementations for common operations.
+///
+/// ilist_sentinel_traits implements a lazy dynamic sentinel allocation
+/// strategy. The sentinel is stored in the prev field of ilist's Head.
+///
+template<typename NodeTy>
+struct ilist_sentinel_traits {
+  /// createSentinel - create the dynamic sentinel
+  static NodeTy *createSentinel() { return new NodeTy(); }
+
+  /// destroySentinel - deallocate the dynamic sentinel
+  static void destroySentinel(NodeTy *N) { delete N; }
+
+  /// provideInitialHead - when constructing an ilist, provide a starting
+  /// value for its Head
+  /// @return null node to indicate that it needs to be allocated later
+  static NodeTy *provideInitialHead() { return 0; }
+
+  /// ensureHead - make sure that Head is either already
+  /// initialized or assigned a fresh sentinel
+  /// @return the sentinel
+  static NodeTy *ensureHead(NodeTy *&Head) {
+    if (!Head) {
+      Head = ilist_traits<NodeTy>::createSentinel();
+      ilist_traits<NodeTy>::noteHead(Head, Head);
+      ilist_traits<NodeTy>::setNext(Head, 0);
+      return Head;
+    }
+    return ilist_traits<NodeTy>::getPrev(Head);
+  }
+
+  /// noteHead - stash the sentinel into its default location
+  static void noteHead(NodeTy *NewHead, NodeTy *Sentinel) {
+    ilist_traits<NodeTy>::setPrev(NewHead, Sentinel);
+  }
+};
+
+/// ilist_node_traits - A fragment for template traits for intrusive list
+/// that provides default node related operations.
+///
+template<typename NodeTy>
+struct ilist_node_traits {
+  static NodeTy *createNode(const NodeTy &V) { return new NodeTy(V); }
+  static void deleteNode(NodeTy *V) { delete V; }
+
+  void addNodeToList(NodeTy *) {}
+  void removeNodeFromList(NodeTy *) {}
+  void transferNodesFromList(ilist_node_traits &    /*SrcTraits*/,
+                             ilist_iterator<NodeTy> /*first*/,
+                             ilist_iterator<NodeTy> /*last*/) {}
+};
+
+/// ilist_default_traits - Default template traits for intrusive list.
+/// By inheriting from this, you can easily use default implementations
+/// for all common operations.
+///
+template<typename NodeTy>
+struct ilist_default_traits : public ilist_nextprev_traits<NodeTy>,
+                              public ilist_sentinel_traits<NodeTy>,
+                              public ilist_node_traits<NodeTy> {
+};
+
+// Template traits for intrusive list.  By specializing this template class, you
+// can change what next/prev fields are used to store the links...
+template<typename NodeTy>
+struct ilist_traits : public ilist_default_traits<NodeTy> {};
+
+// Const traits are the same as nonconst traits...
+template<typename Ty>
+struct ilist_traits<const Ty> : public ilist_traits<Ty> {};
+
+//===----------------------------------------------------------------------===//
+// ilist_iterator<Node> - Iterator for intrusive list.
+//
+template<typename NodeTy>
+class ilist_iterator
+  : public std::iterator<std::bidirectional_iterator_tag, NodeTy, ptrdiff_t> {
+
+public:
+  typedef ilist_traits<NodeTy> Traits;
+  typedef std::iterator<std::bidirectional_iterator_tag,
+                        NodeTy, ptrdiff_t> super;
+
+  typedef typename super::value_type value_type;
+  typedef typename super::difference_type difference_type;
+  typedef typename super::pointer pointer;
+  typedef typename super::reference reference;
+private:
+  pointer NodePtr;
+
+  // ilist_iterator is not a random-access iterator, but it has an
+  // implicit conversion to pointer-type, which is. Declare (but
+  // don't define) these functions as private to help catch
+  // accidental misuse.
+  void operator[](difference_type) const;
+  void operator+(difference_type) const;
+  void operator-(difference_type) const;
+  void operator+=(difference_type) const;
+  void operator-=(difference_type) const;
+  template<class T> void operator<(T) const;
+  template<class T> void operator<=(T) const;
+  template<class T> void operator>(T) const;
+  template<class T> void operator>=(T) const;
+  template<class T> void operator-(T) const;
+public:
+
+  ilist_iterator(pointer NP) : NodePtr(NP) {}
+  ilist_iterator(reference NR) : NodePtr(&NR) {}
+  ilist_iterator() : NodePtr(0) {}
+
+  // This is templated so that we can allow constructing a const iterator from
+  // a nonconst iterator...
+  template<class node_ty>
+  ilist_iterator(const ilist_iterator<node_ty> &RHS)
+    : NodePtr(RHS.getNodePtrUnchecked()) {}
+
+  // This is templated so that we can allow assigning to a const iterator from
+  // a nonconst iterator...
+  template<class node_ty>
+  const ilist_iterator &operator=(const ilist_iterator<node_ty> &RHS) {
+    NodePtr = RHS.getNodePtrUnchecked();
+    return *this;
+  }
+
+  // Accessors...
+  operator pointer() const {
+    return NodePtr;
+  }
+
+  reference operator*() const {
+    return *NodePtr;
+  }
+  pointer operator->() const { return &operator*(); }
+
+  // Comparison operators
+  bool operator==(const ilist_iterator &RHS) const {
+    return NodePtr == RHS.NodePtr;
+  }
+  bool operator!=(const ilist_iterator &RHS) const {
+    return NodePtr != RHS.NodePtr;
+  }
+
+  // Increment and decrement operators...
+  ilist_iterator &operator--() {      // predecrement - Back up
+    NodePtr = Traits::getPrev(NodePtr);
+    assert(NodePtr && "--'d off the beginning of an ilist!");
+    return *this;
+  }
+  ilist_iterator &operator++() {      // preincrement - Advance
+    NodePtr = Traits::getNext(NodePtr);
+    return *this;
+  }
+  ilist_iterator operator--(int) {    // postdecrement operators...
+    ilist_iterator tmp = *this;
+    --*this;
+    return tmp;
+  }
+  ilist_iterator operator++(int) {    // postincrement operators...
+    ilist_iterator tmp = *this;
+    ++*this;
+    return tmp;
+  }
+
+  // Internal interface, do not use...
+  pointer getNodePtrUnchecked() const { return NodePtr; }
+};
+
+// do not implement. this is to catch errors when people try to use
+// them as random access iterators
+template<typename T>
+void operator-(int, ilist_iterator<T>);
+template<typename T>
+void operator-(ilist_iterator<T>,int);
+
+template<typename T>
+void operator+(int, ilist_iterator<T>);
+template<typename T>
+void operator+(ilist_iterator<T>,int);
+
+// operator!=/operator== - Allow mixed comparisons without dereferencing
+// the iterator, which could very likely be pointing to end().
+template<typename T>
+bool operator!=(const T* LHS, const ilist_iterator<const T> &RHS) {
+  return LHS != RHS.getNodePtrUnchecked();
+}
+template<typename T>
+bool operator==(const T* LHS, const ilist_iterator<const T> &RHS) {
+  return LHS == RHS.getNodePtrUnchecked();
+}
+template<typename T>
+bool operator!=(T* LHS, const ilist_iterator<T> &RHS) {
+  return LHS != RHS.getNodePtrUnchecked();
+}
+template<typename T>
+bool operator==(T* LHS, const ilist_iterator<T> &RHS) {
+  return LHS == RHS.getNodePtrUnchecked();
+}
+
+
+// Allow ilist_iterators to convert into pointers to a node automatically when
+// used by the dyn_cast, cast, isa mechanisms...
+
+template<typename From> struct simplify_type;
+
+template<typename NodeTy> struct simplify_type<ilist_iterator<NodeTy> > {
+  typedef NodeTy* SimpleType;
+
+  static SimpleType getSimplifiedValue(const ilist_iterator<NodeTy> &Node) {
+    return &*Node;
+  }
+};
+template<typename NodeTy> struct simplify_type<const ilist_iterator<NodeTy> > {
+  typedef NodeTy* SimpleType;
+
+  static SimpleType getSimplifiedValue(const ilist_iterator<NodeTy> &Node) {
+    return &*Node;
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+//
+/// iplist - The subset of list functionality that can safely be used on nodes
+/// of polymorphic types, i.e. a heterogenous list with a common base class that
+/// holds the next/prev pointers.  The only state of the list itself is a single
+/// pointer to the head of the list.
+///
+/// This list can be in one of three interesting states:
+/// 1. The list may be completely unconstructed.  In this case, the head
+///    pointer is null.  When in this form, any query for an iterator (e.g.
+///    begin() or end()) causes the list to transparently change to state #2.
+/// 2. The list may be empty, but contain a sentinel for the end iterator. This
+///    sentinel is created by the Traits::createSentinel method and is a link
+///    in the list.  When the list is empty, the pointer in the iplist points
+///    to the sentinel.  Once the sentinel is constructed, it
+///    is not destroyed until the list is.
+/// 3. The list may contain actual objects in it, which are stored as a doubly
+///    linked list of nodes.  One invariant of the list is that the predecessor
+///    of the first node in the list always points to the last node in the list,
+///    and the successor pointer for the sentinel (which always stays at the
+///    end of the list) is always null.
+///
+template<typename NodeTy, typename Traits=ilist_traits<NodeTy> >
+class iplist : public Traits {
+  mutable NodeTy *Head;
+
+  // Use the prev node pointer of 'head' as the tail pointer.  This is really a
+  // circularly linked list where we snip the 'next' link from the sentinel node
+  // back to the first node in the list (to preserve assertions about going off
+  // the end of the list).
+  NodeTy *getTail() { return this->ensureHead(Head); }
+  const NodeTy *getTail() const { return this->ensureHead(Head); }
+  void setTail(NodeTy *N) const { this->noteHead(Head, N); }
+
+  /// CreateLazySentinel - This method verifies whether the sentinel for the
+  /// list has been created and lazily makes it if not.
+  void CreateLazySentinel() const {
+    this->ensureHead(Head);
+  }
+
+  static bool op_less(NodeTy &L, NodeTy &R) { return L < R; }
+  static bool op_equal(NodeTy &L, NodeTy &R) { return L == R; }
+
+  // No fundamental reason why iplist can't be copyable, but the default
+  // copy/copy-assign won't do.
+  iplist(const iplist &);         // do not implement
+  void operator=(const iplist &); // do not implement
+
+public:
+  typedef NodeTy *pointer;
+  typedef const NodeTy *const_pointer;
+  typedef NodeTy &reference;
+  typedef const NodeTy &const_reference;
+  typedef NodeTy value_type;
+  typedef ilist_iterator<NodeTy> iterator;
+  typedef ilist_iterator<const NodeTy> const_iterator;
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+  typedef std::reverse_iterator<const_iterator>  const_reverse_iterator;
+  typedef std::reverse_iterator<iterator>  reverse_iterator;
+
+  iplist() : Head(this->provideInitialHead()) {}
+  ~iplist() {
+    if (!Head) return;
+    clear();
+    Traits::destroySentinel(getTail());
+  }
+
+  // Iterator creation methods.
+  iterator begin() {
+    CreateLazySentinel();
+    return iterator(Head);
+  }
+  const_iterator begin() const {
+    CreateLazySentinel();
+    return const_iterator(Head);
+  }
+  iterator end() {
+    CreateLazySentinel();
+    return iterator(getTail());
+  }
+  const_iterator end() const {
+    CreateLazySentinel();
+    return const_iterator(getTail());
+  }
+
+  // reverse iterator creation methods.
+  reverse_iterator rbegin()            { return reverse_iterator(end()); }
+  const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); }
+  reverse_iterator rend()              { return reverse_iterator(begin()); }
+  const_reverse_iterator rend() const { return const_reverse_iterator(begin());}
+
+
+  // Miscellaneous inspection routines.
+  size_type max_size() const { return size_type(-1); }
+  bool empty() const { return Head == 0 || Head == getTail(); }
+
+  // Front and back accessor functions...
+  reference front() {
+    assert(!empty() && "Called front() on empty list!");
+    return *Head;
+  }
+  const_reference front() const {
+    assert(!empty() && "Called front() on empty list!");
+    return *Head;
+  }
+  reference back() {
+    assert(!empty() && "Called back() on empty list!");
+    return *this->getPrev(getTail());
+  }
+  const_reference back() const {
+    assert(!empty() && "Called back() on empty list!");
+    return *this->getPrev(getTail());
+  }
+
+  void swap(iplist &RHS) {
+    assert(0 && "Swap does not use list traits callback correctly yet!");
+    std::swap(Head, RHS.Head);
+  }
+
+  iterator insert(iterator where, NodeTy *New) {
+    NodeTy *CurNode = where.getNodePtrUnchecked();
+    NodeTy *PrevNode = this->getPrev(CurNode);
+    this->setNext(New, CurNode);
+    this->setPrev(New, PrevNode);
+
+    if (CurNode != Head)  // Is PrevNode off the beginning of the list?
+      this->setNext(PrevNode, New);
+    else
+      Head = New;
+    this->setPrev(CurNode, New);
+
+    this->addNodeToList(New);  // Notify traits that we added a node...
+    return New;
+  }
+
+  iterator insertAfter(iterator where, NodeTy *New) {
+    if (empty())
+      return insert(begin(), New);
+    else
+      return insert(++where, New);
+  }
+
+  NodeTy *remove(iterator &IT) {
+    assert(IT != end() && "Cannot remove end of list!");
+    NodeTy *Node = &*IT;
+    NodeTy *NextNode = this->getNext(Node);
+    NodeTy *PrevNode = this->getPrev(Node);
+
+    if (Node != Head)  // Is PrevNode off the beginning of the list?
+      this->setNext(PrevNode, NextNode);
+    else
+      Head = NextNode;
+    this->setPrev(NextNode, PrevNode);
+    IT = NextNode;
+    this->removeNodeFromList(Node);  // Notify traits that we removed a node...
+
+    // Set the next/prev pointers of the current node to null.  This isn't
+    // strictly required, but this catches errors where a node is removed from
+    // an ilist (and potentially deleted) with iterators still pointing at it.
+    // When those iterators are incremented or decremented, they will assert on
+    // the null next/prev pointer instead of "usually working".
+    this->setNext(Node, 0);
+    this->setPrev(Node, 0);
+    return Node;
+  }
+
+  NodeTy *remove(const iterator &IT) {
+    iterator MutIt = IT;
+    return remove(MutIt);
+  }
+
+  // erase - remove a node from the controlled sequence... and delete it.
+  iterator erase(iterator where) {
+    this->deleteNode(remove(where));
+    return where;
+  }
+
+
+private:
+  // transfer - The heart of the splice function.  Move linked list nodes from
+  // [first, last) into position.
+  //
+  void transfer(iterator position, iplist &L2, iterator first, iterator last) {
+    assert(first != last && "Should be checked by callers");
+
+    if (position != last) {
+      // Note: we have to be careful about the case when we move the first node
+      // in the list.  This node is the list sentinel node and we can't move it.
+      NodeTy *ThisSentinel = getTail();
+      setTail(0);
+      NodeTy *L2Sentinel = L2.getTail();
+      L2.setTail(0);
+
+      // Remove [first, last) from its old position.
+      NodeTy *First = &*first, *Prev = this->getPrev(First);
+      NodeTy *Next = last.getNodePtrUnchecked(), *Last = this->getPrev(Next);
+      if (Prev)
+        this->setNext(Prev, Next);
+      else
+        L2.Head = Next;
+      this->setPrev(Next, Prev);
+
+      // Splice [first, last) into its new position.
+      NodeTy *PosNext = position.getNodePtrUnchecked();
+      NodeTy *PosPrev = this->getPrev(PosNext);
+
+      // Fix head of list...
+      if (PosPrev)
+        this->setNext(PosPrev, First);
+      else
+        Head = First;
+      this->setPrev(First, PosPrev);
+
+      // Fix end of list...
+      this->setNext(Last, PosNext);
+      this->setPrev(PosNext, Last);
+
+      this->transferNodesFromList(L2, First, PosNext);
+
+      // Now that everything is set, restore the pointers to the list sentinels.
+      L2.setTail(L2Sentinel);
+      setTail(ThisSentinel);
+    }
+  }
+
+public:
+
+  //===----------------------------------------------------------------------===
+  // Functionality derived from other functions defined above...
+  //
+
+  size_type size() const {
+    if (Head == 0) return 0; // Don't require construction of sentinel if empty.
+    return std::distance(begin(), end());
+  }
+
+  iterator erase(iterator first, iterator last) {
+    while (first != last)
+      first = erase(first);
+    return last;
+  }
+
+  void clear() { if (Head) erase(begin(), end()); }
+
+  // Front and back inserters...
+  void push_front(NodeTy *val) { insert(begin(), val); }
+  void push_back(NodeTy *val) { insert(end(), val); }
+  void pop_front() {
+    assert(!empty() && "pop_front() on empty list!");
+    erase(begin());
+  }
+  void pop_back() {
+    assert(!empty() && "pop_back() on empty list!");
+    iterator t = end(); erase(--t);
+  }
+
+  // Special forms of insert...
+  template<class InIt> void insert(iterator where, InIt first, InIt last) {
+    for (; first != last; ++first) insert(where, *first);
+  }
+
+  // Splice members - defined in terms of transfer...
+  void splice(iterator where, iplist &L2) {
+    if (!L2.empty())
+      transfer(where, L2, L2.begin(), L2.end());
+  }
+  void splice(iterator where, iplist &L2, iterator first) {
+    iterator last = first; ++last;
+    if (where == first || where == last) return; // No change
+    transfer(where, L2, first, last);
+  }
+  void splice(iterator where, iplist &L2, iterator first, iterator last) {
+    if (first != last) transfer(where, L2, first, last);
+  }
+
+
+
+  //===----------------------------------------------------------------------===
+  // High-Level Functionality that shouldn't really be here, but is part of list
+  //
+
+  // These two functions are actually called remove/remove_if in list<>, but
+  // they actually do the job of erase, rename them accordingly.
+  //
+  void erase(const NodeTy &val) {
+    for (iterator I = begin(), E = end(); I != E; ) {
+      iterator next = I; ++next;
+      if (*I == val) erase(I);
+      I = next;
+    }
+  }
+  template<class Pr1> void erase_if(Pr1 pred) {
+    for (iterator I = begin(), E = end(); I != E; ) {
+      iterator next = I; ++next;
+      if (pred(*I)) erase(I);
+      I = next;
+    }
+  }
+
+  template<class Pr2> void unique(Pr2 pred) {
+    if (empty()) return;
+    for (iterator I = begin(), E = end(), Next = begin(); ++Next != E;) {
+      if (pred(*I))
+        erase(Next);
+      else
+        I = Next;
+      Next = I;
+    }
+  }
+  void unique() { unique(op_equal); }
+
+  template<class Pr3> void merge(iplist &right, Pr3 pred) {
+    iterator first1 = begin(), last1 = end();
+    iterator first2 = right.begin(), last2 = right.end();
+    while (first1 != last1 && first2 != last2)
+      if (pred(*first2, *first1)) {
+        iterator next = first2;
+        transfer(first1, right, first2, ++next);
+        first2 = next;
+      } else {
+        ++first1;
+      }
+    if (first2 != last2) transfer(last1, right, first2, last2);
+  }
+  void merge(iplist &right) { return merge(right, op_less); }
+
+  template<class Pr3> void sort(Pr3 pred);
+  void sort() { sort(op_less); }
+};
+
+
+template<typename NodeTy>
+struct ilist : public iplist<NodeTy> {
+  typedef typename iplist<NodeTy>::size_type size_type;
+  typedef typename iplist<NodeTy>::iterator iterator;
+
+  ilist() {}
+  ilist(const ilist &right) {
+    insert(this->begin(), right.begin(), right.end());
+  }
+  explicit ilist(size_type count) {
+    insert(this->begin(), count, NodeTy());
+  }
+  ilist(size_type count, const NodeTy &val) {
+    insert(this->begin(), count, val);
+  }
+  template<class InIt> ilist(InIt first, InIt last) {
+    insert(this->begin(), first, last);
+  }
+
+  // bring hidden functions into scope
+  using iplist<NodeTy>::insert;
+  using iplist<NodeTy>::push_front;
+  using iplist<NodeTy>::push_back;
+
+  // Main implementation here - Insert for a node passed by value...
+  iterator insert(iterator where, const NodeTy &val) {
+    return insert(where, this->createNode(val));
+  }
+
+
+  // Front and back inserters...
+  void push_front(const NodeTy &val) { insert(this->begin(), val); }
+  void push_back(const NodeTy &val) { insert(this->end(), val); }
+
+  // Special forms of insert...
+  template<class InIt> void insert(iterator where, InIt first, InIt last) {
+    for (; first != last; ++first) insert(where, *first);
+  }
+  void insert(iterator where, size_type count, const NodeTy &val) {
+    for (; count != 0; --count) insert(where, val);
+  }
+
+  // Assign special forms...
+  void assign(size_type count, const NodeTy &val) {
+    iterator I = this->begin();
+    for (; I != this->end() && count != 0; ++I, --count)
+      *I = val;
+    if (count != 0)
+      insert(this->end(), val, val);
+    else
+      erase(I, this->end());
+  }
+  template<class InIt> void assign(InIt first1, InIt last1) {
+    iterator first2 = this->begin(), last2 = this->end();
+    for ( ; first1 != last1 && first2 != last2; ++first1, ++first2)
+      *first1 = *first2;
+    if (first2 == last2)
+      erase(first1, last1);
+    else
+      insert(last1, first2, last2);
+  }
+
+
+  // Resize members...
+  void resize(size_type newsize, NodeTy val) {
+    iterator i = this->begin();
+    size_type len = 0;
+    for ( ; i != this->end() && len < newsize; ++i, ++len) /* empty*/ ;
+
+    if (len == newsize)
+      erase(i, this->end());
+    else                                          // i == end()
+      insert(this->end(), newsize - len, val);
+  }
+  void resize(size_type newsize) { resize(newsize, NodeTy()); }
+};
+
+} // End llvm namespace
+
+namespace std {
+  // Ensure that swap uses the fast list swap...
+  template<class Ty>
+  void swap(llvm::iplist<Ty> &Left, llvm::iplist<Ty> &Right) {
+    Left.swap(Right);
+  }
+}  // End 'std' extensions...
+
+#endif // LLVM_ADT_ILIST_H
diff --git a/final/include/llvm/ADT/ilist_node.h b/final/include/llvm/ADT/ilist_node.h
new file mode 100644
index 00000000000..f0080035cb8
--- /dev/null
+++ b/final/include/llvm/ADT/ilist_node.h
@@ -0,0 +1,106 @@
+//==-- llvm/ADT/ilist_node.h - Intrusive Linked List Helper ------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ilist_node class template, which is a convenient
+// base class for creating classes that can be used with ilists.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_ILIST_NODE_H
+#define LLVM_ADT_ILIST_NODE_H
+
+namespace llvm {
+
+template<typename NodeTy>
+struct ilist_traits;
+
+/// ilist_half_node - Base class that provides prev services for sentinels.
+///
+template<typename NodeTy>
+class ilist_half_node {
+  friend struct ilist_traits<NodeTy>;
+  NodeTy *Prev;
+protected:
+  NodeTy *getPrev() { return Prev; }
+  const NodeTy *getPrev() const { return Prev; }
+  void setPrev(NodeTy *P) { Prev = P; }
+  ilist_half_node() : Prev(0) {}
+};
+
+template<typename NodeTy>
+struct ilist_nextprev_traits;
+
+/// ilist_node - Base class that provides next/prev services for nodes
+/// that use ilist_nextprev_traits or ilist_default_traits.
+///
+template<typename NodeTy>
+class ilist_node : private ilist_half_node<NodeTy> {
+  friend struct ilist_nextprev_traits<NodeTy>;
+  friend struct ilist_traits<NodeTy>;
+  NodeTy *Next;
+  NodeTy *getNext() { return Next; }
+  const NodeTy *getNext() const { return Next; }
+  void setNext(NodeTy *N) { Next = N; }
+protected:
+  ilist_node() : Next(0) {}
+
+public:
+  /// @name Adjacent Node Accessors
+  /// @{
+
+  /// \brief Get the previous node, or 0 for the list head.
+  NodeTy *getPrevNode() {
+    NodeTy *Prev = this->getPrev();
+
+    // Check for sentinel.
+    if (!Prev->getNext())
+      return 0;
+
+    return Prev;
+  }
+
+  /// \brief Get the previous node, or 0 for the list head.
+  const NodeTy *getPrevNode() const {
+    const NodeTy *Prev = this->getPrev();
+
+    // Check for sentinel.
+    if (!Prev->getNext())
+      return 0;
+
+    return Prev;
+  }
+
+  /// \brief Get the next node, or 0 for the list tail.
+  NodeTy *getNextNode() {
+    NodeTy *Next = getNext();
+
+    // Check for sentinel.
+    if (!Next->getNext())
+      return 0;
+
+    return Next;
+  }
+
+  /// \brief Get the next node, or 0 for the list tail.
+  const NodeTy *getNextNode() const {
+    const NodeTy *Next = getNext();
+
+    // Check for sentinel.
+    if (!Next->getNext())
+      return 0;
+
+    return Next;
+  }
+
+  /// @}
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/AbstractTypeUser.h b/final/include/llvm/AbstractTypeUser.h
new file mode 100644
index 00000000000..81f5c5c7680
--- /dev/null
+++ b/final/include/llvm/AbstractTypeUser.h
@@ -0,0 +1,205 @@
+//===-- llvm/AbstractTypeUser.h - AbstractTypeUser Interface ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the AbstractTypeUser class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ABSTRACT_TYPE_USER_H
+#define LLVM_ABSTRACT_TYPE_USER_H
+
+#if !defined(LLVM_TYPE_H) && !defined(LLVM_VALUE_H)
+#error Do not include this file directly.  Include Type.h instead.
+#error Some versions of GCC (e.g. 3.4 and 4.1) can not handle the inlined method
+#error PATypeHolder::dropRef() correctly otherwise.
+#endif
+
+// This is the "master" include for <cassert> Whether this file needs it or not,
+// it must always include <cassert> for the files which include
+// llvm/AbstractTypeUser.h
+//
+// In this way, most every LLVM source file will have access to the assert()
+// macro without having to #include <cassert> directly.
+//
+#include <cassert>
+
+namespace llvm {
+
+class Value;
+class Type;
+class DerivedType;
+template<typename T> struct simplify_type;
+
+/// The AbstractTypeUser class is an interface to be implemented by classes who
+/// could possibly use an abstract type.  Abstract types are denoted by the
+/// isAbstract flag set to true in the Type class.  These are classes that
+/// contain an Opaque type in their structure somewhere.
+///
+/// Classes must implement this interface so that they may be notified when an
+/// abstract type is resolved.  Abstract types may be resolved into more 
+/// concrete types through: linking, parsing, and bitcode reading.  When this 
+/// happens, all of the users of the type must be updated to reference the new,
+/// more concrete type.  They are notified through the AbstractTypeUser 
+/// interface.
+///
+/// In addition to this, AbstractTypeUsers must keep the use list of the
+/// potentially abstract type that they reference up-to-date.  To do this in a
+/// nice, transparent way, the PATypeHandle class is used to hold "Potentially
+/// Abstract Types", and keep the use list of the abstract types up-to-date.
+/// @brief LLVM Abstract Type User Representation
+class AbstractTypeUser {
+protected:
+  virtual ~AbstractTypeUser();                        // Derive from me
+
+  /// setType - It's normally not possible to change a Value's type in place,
+  /// but an AbstractTypeUser subclass that knows what its doing can be
+  /// permitted to do so with care.
+  void setType(Value *V, const Type *NewTy);
+
+public:
+
+  /// refineAbstractType - The callback method invoked when an abstract type is
+  /// resolved to another type.  An object must override this method to update
+  /// its internal state to reference NewType instead of OldType.
+  ///
+  virtual void refineAbstractType(const DerivedType *OldTy,
+                                  const Type *NewTy) = 0;
+
+  /// The other case which AbstractTypeUsers must be aware of is when a type
+  /// makes the transition from being abstract (where it has clients on its
+  /// AbstractTypeUsers list) to concrete (where it does not).  This method
+  /// notifies ATU's when this occurs for a type.
+  ///
+  virtual void typeBecameConcrete(const DerivedType *AbsTy) = 0;
+
+  // for debugging...
+  virtual void dump() const = 0;
+};
+
+
+/// PATypeHandle - Handle to a Type subclass.  This class is used to keep the
+/// use list of abstract types up-to-date.
+///
+class PATypeHandle {
+  const Type *Ty;
+  AbstractTypeUser * const User;
+
+  // These functions are defined at the bottom of Type.h.  See the comment there
+  // for justification.
+  void addUser();
+  void removeUser();
+public:
+  // ctor - Add use to type if abstract.  Note that Ty must not be null
+  inline PATypeHandle(const Type *ty, AbstractTypeUser *user)
+    : Ty(ty), User(user) {
+    addUser();
+  }
+
+  // ctor - Add use to type if abstract.
+  inline PATypeHandle(const PATypeHandle &T) : Ty(T.Ty), User(T.User) {
+    addUser();
+  }
+
+  // dtor - Remove reference to type...
+  inline ~PATypeHandle() { removeUser(); }
+
+  // Automatic casting operator so that the handle may be used naturally
+  inline operator Type *() const { return const_cast<Type*>(Ty); }
+  inline Type *get() const { return const_cast<Type*>(Ty); }
+
+  // operator= - Allow assignment to handle
+  inline Type *operator=(const Type *ty) {
+    if (Ty != ty) {   // Ensure we don't accidentally drop last ref to Ty
+      removeUser();
+      Ty = ty;
+      addUser();
+    }
+    return get();
+  }
+
+  // operator= - Allow assignment to handle
+  inline const Type *operator=(const PATypeHandle &T) {
+    return operator=(T.Ty);
+  }
+
+  inline bool operator==(const Type *ty) {
+    return Ty == ty;
+  }
+
+  // operator-> - Allow user to dereference handle naturally...
+  inline const Type *operator->() const { return Ty; }
+};
+
+
+/// PATypeHolder - Holder class for a potentially abstract type.  This uses
+/// efficient union-find techniques to handle dynamic type resolution.  Unless
+/// you need to do custom processing when types are resolved, you should always
+/// use PATypeHolders in preference to PATypeHandles.
+///
+class PATypeHolder {
+  mutable const Type *Ty;
+  void destroy();
+public:
+  PATypeHolder() : Ty(0) {}
+  PATypeHolder(const Type *ty) : Ty(ty) {
+    addRef();
+  }
+  PATypeHolder(const PATypeHolder &T) : Ty(T.Ty) {
+    addRef();
+  }
+
+  ~PATypeHolder() { dropRef(); }
+
+  operator Type *() const { return get(); }
+  Type *get() const;
+
+  // operator-> - Allow user to dereference handle naturally...
+  Type *operator->() const { return get(); }
+
+  // operator= - Allow assignment to handle
+  Type *operator=(const Type *ty) {
+    if (Ty != ty) {   // Don't accidentally drop last ref to Ty.
+      dropRef();
+      Ty = ty;
+      addRef();
+    }
+    return get();
+  }
+  Type *operator=(const PATypeHolder &H) {
+    return operator=(H.Ty);
+  }
+
+  /// getRawType - This should only be used to implement the vmcore library.
+  ///
+  const Type *getRawType() const { return Ty; }
+
+private:
+  void addRef();
+  void dropRef();
+  friend class TypeMapBase;
+};
+
+// simplify_type - Allow clients to treat uses just like values when using
+// casting operators.
+template<> struct simplify_type<PATypeHolder> {
+  typedef const Type* SimpleType;
+  static SimpleType getSimplifiedValue(const PATypeHolder &Val) {
+    return static_cast<SimpleType>(Val.get());
+  }
+};
+template<> struct simplify_type<const PATypeHolder> {
+  typedef const Type* SimpleType;
+  static SimpleType getSimplifiedValue(const PATypeHolder &Val) {
+    return static_cast<SimpleType>(Val.get());
+  }
+};
+  
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/AliasAnalysis.h b/final/include/llvm/Analysis/AliasAnalysis.h
new file mode 100644
index 00000000000..71a5982c7d3
--- /dev/null
+++ b/final/include/llvm/Analysis/AliasAnalysis.h
@@ -0,0 +1,507 @@
+//===- llvm/Analysis/AliasAnalysis.h - Alias Analysis Interface -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the generic AliasAnalysis interface, which is used as the
+// common interface used by all clients of alias analysis information, and
+// implemented by all alias analysis implementations.  Mod/Ref information is
+// also captured by this interface.
+//
+// Implementations of this interface must implement the various virtual methods,
+// which automatically provides functionality for the entire suite of client
+// APIs.
+//
+// This API identifies memory regions with the Location class. The pointer
+// component specifies the base memory address of the region. The Size specifies
+// the maximum size (in address units) of the memory region, or UnknownSize if
+// the size is not known. The TBAA tag identifies the "type" of the memory
+// reference; see the TypeBasedAliasAnalysis class for details.
+//
+// Some non-obvious details include:
+//  - Pointers that point to two completely different objects in memory never
+//    alias, regardless of the value of the Size component.
+//  - NoAlias doesn't imply inequal pointers. The most obvious example of this
+//    is two pointers to constant memory. Even if they are equal, constant
+//    memory is never stored to, so there will never be any dependencies.
+//    In this and other situations, the pointers may be both NoAlias and
+//    MustAlias at the same time. The current API can only return one result,
+//    though this is rarely a problem in practice.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_ALIAS_ANALYSIS_H
+#define LLVM_ANALYSIS_ALIAS_ANALYSIS_H
+
+#include "llvm/Support/CallSite.h"
+#include <vector>
+
+namespace llvm {
+
+class LoadInst;
+class StoreInst;
+class VAArgInst;
+class TargetData;
+class Pass;
+class AnalysisUsage;
+class MemTransferInst;
+class MemIntrinsic;
+
+class AliasAnalysis {
+protected:
+  const TargetData *TD;
+
+private:
+  AliasAnalysis *AA;       // Previous Alias Analysis to chain to.
+
+protected:
+  /// InitializeAliasAnalysis - Subclasses must call this method to initialize
+  /// the AliasAnalysis interface before any other methods are called.  This is
+  /// typically called by the run* methods of these subclasses.  This may be
+  /// called multiple times.
+  ///
+  void InitializeAliasAnalysis(Pass *P);
+
+  /// getAnalysisUsage - All alias analysis implementations should invoke this
+  /// directly (using AliasAnalysis::getAnalysisUsage(AU)).
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+public:
+  static char ID; // Class identification, replacement for typeinfo
+  AliasAnalysis() : TD(0), AA(0) {}
+  virtual ~AliasAnalysis();  // We want to be subclassed
+
+  /// UnknownSize - This is a special value which can be used with the
+  /// size arguments in alias queries to indicate that the caller does not
+  /// know the sizes of the potential memory references.
+  static uint64_t const UnknownSize = ~UINT64_C(0);
+
+  /// getTargetData - Return a pointer to the current TargetData object, or
+  /// null if no TargetData object is available.
+  ///
+  const TargetData *getTargetData() const { return TD; }
+
+  /// getTypeStoreSize - Return the TargetData store size for the given type,
+  /// if known, or a conservative value otherwise.
+  ///
+  uint64_t getTypeStoreSize(const Type *Ty);
+
+  //===--------------------------------------------------------------------===//
+  /// Alias Queries...
+  ///
+
+  /// Location - A description of a memory location.
+  struct Location {
+    /// Ptr - The address of the start of the location.
+    const Value *Ptr;
+    /// Size - The maximum size of the location, in address-units, or
+    /// UnknownSize if the size is not known.  Note that an unknown size does
+    /// not mean the pointer aliases the entire virtual address space, because
+    /// there are restrictions on stepping out of one object and into another.
+    /// See http://llvm.org/docs/LangRef.html#pointeraliasing
+    uint64_t Size;
+    /// TBAATag - The metadata node which describes the TBAA type of
+    /// the location, or null if there is no known unique tag.
+    const MDNode *TBAATag;
+
+    explicit Location(const Value *P = 0, uint64_t S = UnknownSize,
+                      const MDNode *N = 0)
+      : Ptr(P), Size(S), TBAATag(N) {}
+
+    Location getWithNewPtr(const Value *NewPtr) const {
+      Location Copy(*this);
+      Copy.Ptr = NewPtr;
+      return Copy;
+    }
+
+    Location getWithNewSize(uint64_t NewSize) const {
+      Location Copy(*this);
+      Copy.Size = NewSize;
+      return Copy;
+    }
+
+    Location getWithoutTBAATag() const {
+      Location Copy(*this);
+      Copy.TBAATag = 0;
+      return Copy;
+    }
+  };
+
+  /// getLocation - Fill in Loc with information about the memory reference by
+  /// the given instruction.
+  Location getLocation(const LoadInst *LI);
+  Location getLocation(const StoreInst *SI);
+  Location getLocation(const VAArgInst *VI);
+  static Location getLocationForSource(const MemTransferInst *MTI);
+  static Location getLocationForDest(const MemIntrinsic *MI);
+
+  /// Alias analysis result - Either we know for sure that it does not alias, we
+  /// know for sure it must alias, or we don't know anything: The two pointers
+  /// _might_ alias.  This enum is designed so you can do things like:
+  ///     if (AA.alias(P1, P2)) { ... }
+  /// to check to see if two pointers might alias.
+  ///
+  /// See docs/AliasAnalysis.html for more information on the specific meanings
+  /// of these values.
+  ///
+  enum AliasResult {
+    NoAlias = 0,        ///< No dependencies.
+    MayAlias,           ///< Anything goes.
+    PartialAlias,       ///< Pointers differ, but pointees overlap.
+    MustAlias           ///< Pointers are equal.
+  };
+
+  /// alias - The main low level interface to the alias analysis implementation.
+  /// Returns an AliasResult indicating whether the two pointers are aliased to
+  /// each other.  This is the interface that must be implemented by specific
+  /// alias analysis implementations.
+  virtual AliasResult alias(const Location &LocA, const Location &LocB);
+
+  /// alias - A convenience wrapper.
+  AliasResult alias(const Value *V1, uint64_t V1Size,
+                    const Value *V2, uint64_t V2Size) {
+    return alias(Location(V1, V1Size), Location(V2, V2Size));
+  }
+
+  /// alias - A convenience wrapper.
+  AliasResult alias(const Value *V1, const Value *V2) {
+    return alias(V1, UnknownSize, V2, UnknownSize);
+  }
+
+  /// isNoAlias - A trivial helper function to check to see if the specified
+  /// pointers are no-alias.
+  bool isNoAlias(const Location &LocA, const Location &LocB) {
+    return alias(LocA, LocB) == NoAlias;
+  }
+
+  /// isNoAlias - A convenience wrapper.
+  bool isNoAlias(const Value *V1, uint64_t V1Size,
+                 const Value *V2, uint64_t V2Size) {
+    return isNoAlias(Location(V1, V1Size), Location(V2, V2Size));
+  }
+  
+  /// isMustAlias - A convenience wrapper.
+  bool isMustAlias(const Location &LocA, const Location &LocB) {
+    return alias(LocA, LocB) == MustAlias;
+  }
+
+  /// isMustAlias - A convenience wrapper.
+  bool isMustAlias(const Value *V1, const Value *V2) {
+    return alias(V1, 1, V2, 1) == MustAlias;
+  }
+  
+  /// pointsToConstantMemory - If the specified memory location is
+  /// known to be constant, return true. If OrLocal is true and the
+  /// specified memory location is known to be "local" (derived from
+  /// an alloca), return true. Otherwise return false.
+  virtual bool pointsToConstantMemory(const Location &Loc,
+                                      bool OrLocal = false);
+
+  /// pointsToConstantMemory - A convenient wrapper.
+  bool pointsToConstantMemory(const Value *P, bool OrLocal = false) {
+    return pointsToConstantMemory(Location(P), OrLocal);
+  }
+
+  //===--------------------------------------------------------------------===//
+  /// Simple mod/ref information...
+  ///
+
+  /// ModRefResult - Represent the result of a mod/ref query.  Mod and Ref are
+  /// bits which may be or'd together.
+  ///
+  enum ModRefResult { NoModRef = 0, Ref = 1, Mod = 2, ModRef = 3 };
+
+  /// These values define additional bits used to define the
+  /// ModRefBehavior values.
+  enum { Nowhere = 0, ArgumentPointees = 4, Anywhere = 8 | ArgumentPointees };
+
+  /// ModRefBehavior - Summary of how a function affects memory in the program.
+  /// Loads from constant globals are not considered memory accesses for this
+  /// interface.  Also, functions may freely modify stack space local to their
+  /// invocation without having to report it through these interfaces.
+  enum ModRefBehavior {
+    /// DoesNotAccessMemory - This function does not perform any non-local loads
+    /// or stores to memory.
+    ///
+    /// This property corresponds to the GCC 'const' attribute.
+    /// This property corresponds to the LLVM IR 'readnone' attribute.
+    /// This property corresponds to the IntrNoMem LLVM intrinsic flag.
+    DoesNotAccessMemory = Nowhere | NoModRef,
+
+    /// OnlyReadsArgumentPointees - The only memory references in this function
+    /// (if it has any) are non-volatile loads from objects pointed to by its
+    /// pointer-typed arguments, with arbitrary offsets.
+    ///
+    /// This property corresponds to the IntrReadArgMem LLVM intrinsic flag.
+    OnlyReadsArgumentPointees = ArgumentPointees | Ref,
+
+    /// OnlyAccessesArgumentPointees - The only memory references in this
+    /// function (if it has any) are non-volatile loads and stores from objects
+    /// pointed to by its pointer-typed arguments, with arbitrary offsets.
+    ///
+    /// This property corresponds to the IntrReadWriteArgMem LLVM intrinsic flag.
+    OnlyAccessesArgumentPointees = ArgumentPointees | ModRef,
+
+    /// OnlyReadsMemory - This function does not perform any non-local stores or
+    /// volatile loads, but may read from any memory location.
+    ///
+    /// This property corresponds to the GCC 'pure' attribute.
+    /// This property corresponds to the LLVM IR 'readonly' attribute.
+    /// This property corresponds to the IntrReadMem LLVM intrinsic flag.
+    OnlyReadsMemory = Anywhere | Ref,
+
+    /// UnknownModRefBehavior - This indicates that the function could not be
+    /// classified into one of the behaviors above.
+    UnknownModRefBehavior = Anywhere | ModRef
+  };
+
+  /// getModRefBehavior - Return the behavior when calling the given call site.
+  virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+
+  /// getModRefBehavior - Return the behavior when calling the given function.
+  /// For use when the call site is not known.
+  virtual ModRefBehavior getModRefBehavior(const Function *F);
+
+  /// doesNotAccessMemory - If the specified call is known to never read or
+  /// write memory, return true.  If the call only reads from known-constant
+  /// memory, it is also legal to return true.  Calls that unwind the stack
+  /// are legal for this predicate.
+  ///
+  /// Many optimizations (such as CSE and LICM) can be performed on such calls
+  /// without worrying about aliasing properties, and many calls have this
+  /// property (e.g. calls to 'sin' and 'cos').
+  ///
+  /// This property corresponds to the GCC 'const' attribute.
+  ///
+  bool doesNotAccessMemory(ImmutableCallSite CS) {
+    return getModRefBehavior(CS) == DoesNotAccessMemory;
+  }
+
+  /// doesNotAccessMemory - If the specified function is known to never read or
+  /// write memory, return true.  For use when the call site is not known.
+  ///
+  bool doesNotAccessMemory(const Function *F) {
+    return getModRefBehavior(F) == DoesNotAccessMemory;
+  }
+
+  /// onlyReadsMemory - If the specified call is known to only read from
+  /// non-volatile memory (or not access memory at all), return true.  Calls
+  /// that unwind the stack are legal for this predicate.
+  ///
+  /// This property allows many common optimizations to be performed in the
+  /// absence of interfering store instructions, such as CSE of strlen calls.
+  ///
+  /// This property corresponds to the GCC 'pure' attribute.
+  ///
+  bool onlyReadsMemory(ImmutableCallSite CS) {
+    return onlyReadsMemory(getModRefBehavior(CS));
+  }
+
+  /// onlyReadsMemory - If the specified function is known to only read from
+  /// non-volatile memory (or not access memory at all), return true.  For use
+  /// when the call site is not known.
+  ///
+  bool onlyReadsMemory(const Function *F) {
+    return onlyReadsMemory(getModRefBehavior(F));
+  }
+
+  /// onlyReadsMemory - Return true if functions with the specified behavior are
+  /// known to only read from non-volatile memory (or not access memory at all).
+  ///
+  static bool onlyReadsMemory(ModRefBehavior MRB) {
+    return !(MRB & Mod);
+  }
+
+  /// onlyAccessesArgPointees - Return true if functions with the specified
+  /// behavior are known to read and write at most from objects pointed to by
+  /// their pointer-typed arguments (with arbitrary offsets).
+  ///
+  static bool onlyAccessesArgPointees(ModRefBehavior MRB) {
+    return !(MRB & Anywhere & ~ArgumentPointees);
+  }
+
+  /// doesAccessArgPointees - Return true if functions with the specified
+  /// behavior are known to potentially read or write  from objects pointed
+  /// to be their pointer-typed arguments (with arbitrary offsets).
+  ///
+  static bool doesAccessArgPointees(ModRefBehavior MRB) {
+    return (MRB & ModRef) && (MRB & ArgumentPointees);
+  }
+
+  /// getModRefInfo - Return information about whether or not an instruction may
+  /// read or write the specified memory location.  An instruction
+  /// that doesn't read or write memory may be trivially LICM'd for example.
+  ModRefResult getModRefInfo(const Instruction *I,
+                             const Location &Loc) {
+    switch (I->getOpcode()) {
+    case Instruction::VAArg:  return getModRefInfo((const VAArgInst*)I, Loc);
+    case Instruction::Load:   return getModRefInfo((const LoadInst*)I,  Loc);
+    case Instruction::Store:  return getModRefInfo((const StoreInst*)I, Loc);
+    case Instruction::Call:   return getModRefInfo((const CallInst*)I,  Loc);
+    case Instruction::Invoke: return getModRefInfo((const InvokeInst*)I,Loc);
+    default:                  return NoModRef;
+    }
+  }
+
+  /// getModRefInfo - A convenience wrapper.
+  ModRefResult getModRefInfo(const Instruction *I,
+                             const Value *P, uint64_t Size) {
+    return getModRefInfo(I, Location(P, Size));
+  }
+
+  /// getModRefInfo (for call sites) - Return whether information about whether
+  /// a particular call site modifies or reads the specified memory location.
+  virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+                                     const Location &Loc);
+
+  /// getModRefInfo (for call sites) - A convenience wrapper.
+  ModRefResult getModRefInfo(ImmutableCallSite CS,
+                             const Value *P, uint64_t Size) {
+    return getModRefInfo(CS, Location(P, Size));
+  }
+
+  /// getModRefInfo (for calls) - Return whether information about whether
+  /// a particular call modifies or reads the specified memory location.
+  ModRefResult getModRefInfo(const CallInst *C, const Location &Loc) {
+    return getModRefInfo(ImmutableCallSite(C), Loc);
+  }
+
+  /// getModRefInfo (for calls) - A convenience wrapper.
+  ModRefResult getModRefInfo(const CallInst *C, const Value *P, uint64_t Size) {
+    return getModRefInfo(C, Location(P, Size));
+  }
+
+  /// getModRefInfo (for invokes) - Return whether information about whether
+  /// a particular invoke modifies or reads the specified memory location.
+  ModRefResult getModRefInfo(const InvokeInst *I,
+                             const Location &Loc) {
+    return getModRefInfo(ImmutableCallSite(I), Loc);
+  }
+
+  /// getModRefInfo (for invokes) - A convenience wrapper.
+  ModRefResult getModRefInfo(const InvokeInst *I,
+                             const Value *P, uint64_t Size) {
+    return getModRefInfo(I, Location(P, Size));
+  }
+
+  /// getModRefInfo (for loads) - Return whether information about whether
+  /// a particular load modifies or reads the specified memory location.
+  ModRefResult getModRefInfo(const LoadInst *L, const Location &Loc);
+
+  /// getModRefInfo (for loads) - A convenience wrapper.
+  ModRefResult getModRefInfo(const LoadInst *L, const Value *P, uint64_t Size) {
+    return getModRefInfo(L, Location(P, Size));
+  }
+
+  /// getModRefInfo (for stores) - Return whether information about whether
+  /// a particular store modifies or reads the specified memory location.
+  ModRefResult getModRefInfo(const StoreInst *S, const Location &Loc);
+
+  /// getModRefInfo (for stores) - A convenience wrapper.
+  ModRefResult getModRefInfo(const StoreInst *S, const Value *P, uint64_t Size){
+    return getModRefInfo(S, Location(P, Size));
+  }
+
+  /// getModRefInfo (for va_args) - Return whether information about whether
+  /// a particular va_arg modifies or reads the specified memory location.
+  ModRefResult getModRefInfo(const VAArgInst* I, const Location &Loc);
+
+  /// getModRefInfo (for va_args) - A convenience wrapper.
+  ModRefResult getModRefInfo(const VAArgInst* I, const Value* P, uint64_t Size){
+    return getModRefInfo(I, Location(P, Size));
+  }
+
+  /// getModRefInfo - Return information about whether two call sites may refer
+  /// to the same set of memory locations.  See 
+  ///   http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
+  /// for details.
+  virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                                     ImmutableCallSite CS2);
+
+  //===--------------------------------------------------------------------===//
+  /// Higher level methods for querying mod/ref information.
+  ///
+
+  /// canBasicBlockModify - Return true if it is possible for execution of the
+  /// specified basic block to modify the value pointed to by Ptr.
+  bool canBasicBlockModify(const BasicBlock &BB, const Location &Loc);
+
+  /// canBasicBlockModify - A convenience wrapper.
+  bool canBasicBlockModify(const BasicBlock &BB, const Value *P, uint64_t Size){
+    return canBasicBlockModify(BB, Location(P, Size));
+  }
+
+  /// canInstructionRangeModify - Return true if it is possible for the
+  /// execution of the specified instructions to modify the value pointed to by
+  /// Ptr.  The instructions to consider are all of the instructions in the
+  /// range of [I1,I2] INCLUSIVE.  I1 and I2 must be in the same basic block.
+  bool canInstructionRangeModify(const Instruction &I1, const Instruction &I2,
+                                 const Location &Loc);
+
+  /// canInstructionRangeModify - A convenience wrapper.
+  bool canInstructionRangeModify(const Instruction &I1, const Instruction &I2,
+                                 const Value *Ptr, uint64_t Size) {
+    return canInstructionRangeModify(I1, I2, Location(Ptr, Size));
+  }
+
+  //===--------------------------------------------------------------------===//
+  /// Methods that clients should call when they transform the program to allow
+  /// alias analyses to update their internal data structures.  Note that these
+  /// methods may be called on any instruction, regardless of whether or not
+  /// they have pointer-analysis implications.
+  ///
+
+  /// deleteValue - This method should be called whenever an LLVM Value is
+  /// deleted from the program, for example when an instruction is found to be
+  /// redundant and is eliminated.
+  ///
+  virtual void deleteValue(Value *V);
+
+  /// copyValue - This method should be used whenever a preexisting value in the
+  /// program is copied or cloned, introducing a new value.  Note that analysis
+  /// implementations should tolerate clients that use this method to introduce
+  /// the same value multiple times: if the analysis already knows about a
+  /// value, it should ignore the request.
+  ///
+  virtual void copyValue(Value *From, Value *To);
+
+  /// addEscapingUse - This method should be used whenever an escaping use is
+  /// added to a pointer value.  Analysis implementations may either return
+  /// conservative responses for that value in the future, or may recompute
+  /// some or all internal state to continue providing precise responses.
+  ///
+  /// Escaping uses are considered by anything _except_ the following:
+  ///  - GEPs or bitcasts of the pointer
+  ///  - Loads through the pointer
+  ///  - Stores through (but not of) the pointer
+  virtual void addEscapingUse(Use &U);
+
+  /// replaceWithNewValue - This method is the obvious combination of the two
+  /// above, and it provided as a helper to simplify client code.
+  ///
+  void replaceWithNewValue(Value *Old, Value *New) {
+    copyValue(Old, New);
+    deleteValue(Old);
+  }
+};
+
+/// isNoAliasCall - Return true if this pointer is returned by a noalias
+/// function.
+bool isNoAliasCall(const Value *V);
+
+/// isIdentifiedObject - Return true if this pointer refers to a distinct and
+/// identifiable object.  This returns true for:
+///    Global Variables and Functions (but not Global Aliases)
+///    Allocas and Mallocs
+///    ByVal and NoAlias Arguments
+///    NoAlias returns
+///
+bool isIdentifiedObject(const Value *V);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/AliasSetTracker.h b/final/include/llvm/Analysis/AliasSetTracker.h
new file mode 100644
index 00000000000..e844d10dda0
--- /dev/null
+++ b/final/include/llvm/Analysis/AliasSetTracker.h
@@ -0,0 +1,440 @@
+//===- llvm/Analysis/AliasSetTracker.h - Build Alias Sets -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines two classes: AliasSetTracker and AliasSet.  These interface
+// are used to classify a collection of pointer references into a maximal number
+// of disjoint sets.  Each AliasSet object constructed by the AliasSetTracker
+// object refers to memory disjoint from the other sets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_ALIASSETTRACKER_H
+#define LLVM_ANALYSIS_ALIASSETTRACKER_H
+
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include <vector>
+
+namespace llvm {
+
+class AliasAnalysis;
+class LoadInst;
+class StoreInst;
+class VAArgInst;
+class AliasSetTracker;
+class AliasSet;
+
+class AliasSet : public ilist_node<AliasSet> {
+  friend class AliasSetTracker;
+
+  class PointerRec {
+    Value *Val;  // The pointer this record corresponds to.
+    PointerRec **PrevInList, *NextInList;
+    AliasSet *AS;
+    uint64_t Size;
+    const MDNode *TBAAInfo;
+  public:
+    PointerRec(Value *V)
+      : Val(V), PrevInList(0), NextInList(0), AS(0), Size(0),
+        TBAAInfo(DenseMapInfo<const MDNode *>::getEmptyKey()) {}
+
+    Value *getValue() const { return Val; }
+    
+    PointerRec *getNext() const { return NextInList; }
+    bool hasAliasSet() const { return AS != 0; }
+
+    PointerRec** setPrevInList(PointerRec **PIL) {
+      PrevInList = PIL;
+      return &NextInList;
+    }
+
+    void updateSizeAndTBAAInfo(uint64_t NewSize, const MDNode *NewTBAAInfo) {
+      if (NewSize > Size) Size = NewSize;
+
+      if (TBAAInfo == DenseMapInfo<const MDNode *>::getEmptyKey())
+        // We don't have a TBAAInfo yet. Set it to NewTBAAInfo.
+        TBAAInfo = NewTBAAInfo;
+      else if (TBAAInfo != NewTBAAInfo)
+        // NewTBAAInfo conflicts with TBAAInfo.
+        TBAAInfo = DenseMapInfo<const MDNode *>::getTombstoneKey();
+    }
+
+    uint64_t getSize() const { return Size; }
+
+    /// getTBAAInfo - Return the TBAAInfo, or null if there is no
+    /// information or conflicting information.
+    const MDNode *getTBAAInfo() const {
+      // If we have missing or conflicting TBAAInfo, return null.
+      if (TBAAInfo == DenseMapInfo<const MDNode *>::getEmptyKey() ||
+          TBAAInfo == DenseMapInfo<const MDNode *>::getTombstoneKey())
+        return 0;
+      return TBAAInfo;
+    }
+
+    AliasSet *getAliasSet(AliasSetTracker &AST) {
+      assert(AS && "No AliasSet yet!");
+      if (AS->Forward) {
+        AliasSet *OldAS = AS;
+        AS = OldAS->getForwardedTarget(AST);
+        AS->addRef();
+        OldAS->dropRef(AST);
+      }
+      return AS;
+    }
+
+    void setAliasSet(AliasSet *as) {
+      assert(AS == 0 && "Already have an alias set!");
+      AS = as;
+    }
+
+    void eraseFromList() {
+      if (NextInList) NextInList->PrevInList = PrevInList;
+      *PrevInList = NextInList;
+      if (AS->PtrListEnd == &NextInList) {
+        AS->PtrListEnd = PrevInList;
+        assert(*AS->PtrListEnd == 0 && "List not terminated right!");
+      }
+      delete this;
+    }
+  };
+
+  PointerRec *PtrList, **PtrListEnd;  // Doubly linked list of nodes.
+  AliasSet *Forward;             // Forwarding pointer.
+  AliasSet *Next, *Prev;         // Doubly linked list of AliasSets.
+
+  // All calls & invokes in this alias set.
+  std::vector<AssertingVH<Instruction> > CallSites;
+
+  // RefCount - Number of nodes pointing to this AliasSet plus the number of
+  // AliasSets forwarding to it.
+  unsigned RefCount : 28;
+
+  /// AccessType - Keep track of whether this alias set merely refers to the
+  /// locations of memory, whether it modifies the memory, or whether it does
+  /// both.  The lattice goes from "NoModRef" to either Refs or Mods, then to
+  /// ModRef as necessary.
+  ///
+  enum AccessType {
+    NoModRef = 0, Refs = 1,         // Ref = bit 1
+    Mods     = 2, ModRef = 3        // Mod = bit 2
+  };
+  unsigned AccessTy : 2;
+
+  /// AliasType - Keep track the relationships between the pointers in the set.
+  /// Lattice goes from MustAlias to MayAlias.
+  ///
+  enum AliasType {
+    MustAlias = 0, MayAlias = 1
+  };
+  unsigned AliasTy : 1;
+
+  // Volatile - True if this alias set contains volatile loads or stores.
+  bool Volatile : 1;
+
+  void addRef() { ++RefCount; }
+  void dropRef(AliasSetTracker &AST) {
+    assert(RefCount >= 1 && "Invalid reference count detected!");
+    if (--RefCount == 0)
+      removeFromTracker(AST);
+  }
+
+  CallSite getCallSite(unsigned i) const {
+    assert(i < CallSites.size());
+    return CallSite(CallSites[i]);
+  }
+  
+public:
+  /// Accessors...
+  bool isRef() const { return AccessTy & Refs; }
+  bool isMod() const { return AccessTy & Mods; }
+  bool isMustAlias() const { return AliasTy == MustAlias; }
+  bool isMayAlias()  const { return AliasTy == MayAlias; }
+
+  // isVolatile - Return true if this alias set contains volatile loads or
+  // stores.
+  bool isVolatile() const { return Volatile; }
+
+  /// isForwardingAliasSet - Return true if this alias set should be ignored as
+  /// part of the AliasSetTracker object.
+  bool isForwardingAliasSet() const { return Forward; }
+
+  /// mergeSetIn - Merge the specified alias set into this alias set...
+  ///
+  void mergeSetIn(AliasSet &AS, AliasSetTracker &AST);
+
+  // Alias Set iteration - Allow access to all of the pointer which are part of
+  // this alias set...
+  class iterator;
+  iterator begin() const { return iterator(PtrList); }
+  iterator end()   const { return iterator(); }
+  bool empty() const { return PtrList == 0; }
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+
+  /// Define an iterator for alias sets... this is just a forward iterator.
+  class iterator : public std::iterator<std::forward_iterator_tag,
+                                        PointerRec, ptrdiff_t> {
+    PointerRec *CurNode;
+  public:
+    explicit iterator(PointerRec *CN = 0) : CurNode(CN) {}
+
+    bool operator==(const iterator& x) const {
+      return CurNode == x.CurNode;
+    }
+    bool operator!=(const iterator& x) const { return !operator==(x); }
+
+    const iterator &operator=(const iterator &I) {
+      CurNode = I.CurNode;
+      return *this;
+    }
+
+    value_type &operator*() const {
+      assert(CurNode && "Dereferencing AliasSet.end()!");
+      return *CurNode;
+    }
+    value_type *operator->() const { return &operator*(); }
+
+    Value *getPointer() const { return CurNode->getValue(); }
+    uint64_t getSize() const { return CurNode->getSize(); }
+    const MDNode *getTBAAInfo() const { return CurNode->getTBAAInfo(); }
+
+    iterator& operator++() {                // Preincrement
+      assert(CurNode && "Advancing past AliasSet.end()!");
+      CurNode = CurNode->getNext();
+      return *this;
+    }
+    iterator operator++(int) { // Postincrement
+      iterator tmp = *this; ++*this; return tmp;
+    }
+  };
+
+private:
+  // Can only be created by AliasSetTracker. Also, ilist creates one
+  // to serve as a sentinel.
+  friend struct ilist_sentinel_traits<AliasSet>;
+  AliasSet() : PtrList(0), PtrListEnd(&PtrList), Forward(0), RefCount(0),
+               AccessTy(NoModRef), AliasTy(MustAlias), Volatile(false) {
+  }
+
+  AliasSet(const AliasSet &AS);        // do not implement
+  void operator=(const AliasSet &AS);  // do not implement
+
+  PointerRec *getSomePointer() const {
+    return PtrList;
+  }
+
+  /// getForwardedTarget - Return the real alias set this represents.  If this
+  /// has been merged with another set and is forwarding, return the ultimate
+  /// destination set.  This also implements the union-find collapsing as well.
+  AliasSet *getForwardedTarget(AliasSetTracker &AST) {
+    if (!Forward) return this;
+
+    AliasSet *Dest = Forward->getForwardedTarget(AST);
+    if (Dest != Forward) {
+      Dest->addRef();
+      Forward->dropRef(AST);
+      Forward = Dest;
+    }
+    return Dest;
+  }
+
+  void removeFromTracker(AliasSetTracker &AST);
+
+  void addPointer(AliasSetTracker &AST, PointerRec &Entry, uint64_t Size,
+                  const MDNode *TBAAInfo,
+                  bool KnownMustAlias = false);
+  void addCallSite(CallSite CS, AliasAnalysis &AA);
+  void removeCallSite(CallSite CS) {
+    for (size_t i = 0, e = CallSites.size(); i != e; ++i)
+      if (CallSites[i] == CS.getInstruction()) {
+        CallSites[i] = CallSites.back();
+        CallSites.pop_back();
+      }
+  }
+  void setVolatile() { Volatile = true; }
+
+  /// aliasesPointer - Return true if the specified pointer "may" (or must)
+  /// alias one of the members in the set.
+  ///
+  bool aliasesPointer(const Value *Ptr, uint64_t Size, const MDNode *TBAAInfo,
+                      AliasAnalysis &AA) const;
+  bool aliasesCallSite(CallSite CS, AliasAnalysis &AA) const;
+};
+
+inline raw_ostream& operator<<(raw_ostream &OS, const AliasSet &AS) {
+  AS.print(OS);
+  return OS;
+}
+
+
+class AliasSetTracker {
+  /// CallbackVH - A CallbackVH to arrange for AliasSetTracker to be
+  /// notified whenever a Value is deleted.
+  class ASTCallbackVH : public CallbackVH {
+    AliasSetTracker *AST;
+    virtual void deleted();
+  public:
+    ASTCallbackVH(Value *V, AliasSetTracker *AST = 0);
+    ASTCallbackVH &operator=(Value *V);
+  };
+  /// ASTCallbackVHDenseMapInfo - Traits to tell DenseMap that tell us how to
+  /// compare and hash the value handle.
+  struct ASTCallbackVHDenseMapInfo : public DenseMapInfo<Value *> {};
+
+  AliasAnalysis &AA;
+  ilist<AliasSet> AliasSets;
+
+  typedef DenseMap<ASTCallbackVH, AliasSet::PointerRec*,
+                   ASTCallbackVHDenseMapInfo>
+    PointerMapType;
+
+  // Map from pointers to their node
+  PointerMapType PointerMap;
+
+public:
+  /// AliasSetTracker ctor - Create an empty collection of AliasSets, and use
+  /// the specified alias analysis object to disambiguate load and store
+  /// addresses.
+  explicit AliasSetTracker(AliasAnalysis &aa) : AA(aa) {}
+  ~AliasSetTracker() { clear(); }
+
+  /// add methods - These methods are used to add different types of
+  /// instructions to the alias sets.  Adding a new instruction can result in
+  /// one of three actions happening:
+  ///
+  ///   1. If the instruction doesn't alias any other sets, create a new set.
+  ///   2. If the instruction aliases exactly one set, add it to the set
+  ///   3. If the instruction aliases multiple sets, merge the sets, and add
+  ///      the instruction to the result.
+  ///
+  /// These methods return true if inserting the instruction resulted in the
+  /// addition of a new alias set (i.e., the pointer did not alias anything).
+  ///
+  bool add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo); // Add a location
+  bool add(LoadInst *LI);
+  bool add(StoreInst *SI);
+  bool add(VAArgInst *VAAI);
+  bool add(CallSite CS);          // Call/Invoke instructions
+  bool add(CallInst *CI)   { return add(CallSite(CI)); }
+  bool add(InvokeInst *II) { return add(CallSite(II)); }
+  bool add(Instruction *I);       // Dispatch to one of the other add methods...
+  void add(BasicBlock &BB);       // Add all instructions in basic block
+  void add(const AliasSetTracker &AST); // Add alias relations from another AST
+
+  /// remove methods - These methods are used to remove all entries that might
+  /// be aliased by the specified instruction.  These methods return true if any
+  /// alias sets were eliminated.
+  // Remove a location
+  bool remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo);
+  bool remove(LoadInst *LI);
+  bool remove(StoreInst *SI);
+  bool remove(VAArgInst *VAAI);
+  bool remove(CallSite CS);
+  bool remove(CallInst *CI)   { return remove(CallSite(CI)); }
+  bool remove(InvokeInst *II) { return remove(CallSite(II)); }
+  bool remove(Instruction *I);
+  void remove(AliasSet &AS);
+  
+  void clear();
+
+  /// getAliasSets - Return the alias sets that are active.
+  ///
+  const ilist<AliasSet> &getAliasSets() const { return AliasSets; }
+
+  /// getAliasSetForPointer - Return the alias set that the specified pointer
+  /// lives in.  If the New argument is non-null, this method sets the value to
+  /// true if a new alias set is created to contain the pointer (because the
+  /// pointer didn't alias anything).
+  AliasSet &getAliasSetForPointer(Value *P, uint64_t Size,
+                                  const MDNode *TBAAInfo,
+                                  bool *New = 0);
+
+  /// getAliasSetForPointerIfExists - Return the alias set containing the
+  /// location specified if one exists, otherwise return null.
+  AliasSet *getAliasSetForPointerIfExists(Value *P, uint64_t Size,
+                                          const MDNode *TBAAInfo) {
+    return findAliasSetForPointer(P, Size, TBAAInfo);
+  }
+
+  /// containsPointer - Return true if the specified location is represented by
+  /// this alias set, false otherwise.  This does not modify the AST object or
+  /// alias sets.
+  bool containsPointer(Value *P, uint64_t Size, const MDNode *TBAAInfo) const;
+
+  /// getAliasAnalysis - Return the underlying alias analysis object used by
+  /// this tracker.
+  AliasAnalysis &getAliasAnalysis() const { return AA; }
+
+  /// deleteValue method - This method is used to remove a pointer value from
+  /// the AliasSetTracker entirely.  It should be used when an instruction is
+  /// deleted from the program to update the AST.  If you don't use this, you
+  /// would have dangling pointers to deleted instructions.
+  ///
+  void deleteValue(Value *PtrVal);
+
+  /// copyValue - This method should be used whenever a preexisting value in the
+  /// program is copied or cloned, introducing a new value.  Note that it is ok
+  /// for clients that use this method to introduce the same value multiple
+  /// times: if the tracker already knows about a value, it will ignore the
+  /// request.
+  ///
+  void copyValue(Value *From, Value *To);
+
+
+  typedef ilist<AliasSet>::iterator iterator;
+  typedef ilist<AliasSet>::const_iterator const_iterator;
+
+  const_iterator begin() const { return AliasSets.begin(); }
+  const_iterator end()   const { return AliasSets.end(); }
+
+  iterator begin() { return AliasSets.begin(); }
+  iterator end()   { return AliasSets.end(); }
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+
+private:
+  friend class AliasSet;
+  void removeAliasSet(AliasSet *AS);
+
+  // getEntryFor - Just like operator[] on the map, except that it creates an
+  // entry for the pointer if it doesn't already exist.
+  AliasSet::PointerRec &getEntryFor(Value *V) {
+    AliasSet::PointerRec *&Entry = PointerMap[ASTCallbackVH(V, this)];
+    if (Entry == 0)
+      Entry = new AliasSet::PointerRec(V);
+    return *Entry;
+  }
+
+  AliasSet &addPointer(Value *P, uint64_t Size, const MDNode *TBAAInfo,
+                       AliasSet::AccessType E,
+                       bool &NewSet) {
+    NewSet = false;
+    AliasSet &AS = getAliasSetForPointer(P, Size, TBAAInfo, &NewSet);
+    AS.AccessTy |= E;
+    return AS;
+  }
+  AliasSet *findAliasSetForPointer(const Value *Ptr, uint64_t Size,
+                                   const MDNode *TBAAInfo);
+
+  AliasSet *findAliasSetForCallSite(CallSite CS);
+};
+
+inline raw_ostream& operator<<(raw_ostream &OS, const AliasSetTracker &AST) {
+  AST.print(OS);
+  return OS;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/CFGPrinter.h b/final/include/llvm/Analysis/CFGPrinter.h
new file mode 100644
index 00000000000..ac8f59602da
--- /dev/null
+++ b/final/include/llvm/Analysis/CFGPrinter.h
@@ -0,0 +1,111 @@
+//===-- CFGPrinter.h - CFG printer external interface -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines external functions that can be called to explicitly
+// instantiate the CFG printer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_CFGPRINTER_H
+#define LLVM_ANALYSIS_CFGPRINTER_H
+
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/GraphWriter.h"
+
+namespace llvm {
+template<>
+struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+  static std::string getGraphName(const Function *F) {
+    return "CFG for '" + F->getNameStr() + "' function";
+  }
+
+  static std::string getSimpleNodeLabel(const BasicBlock *Node,
+                                  const Function *Graph) {
+    if (!Node->getName().empty())
+      return Node->getNameStr(); 
+
+    std::string Str;
+    raw_string_ostream OS(Str);
+
+    WriteAsOperand(OS, Node, false);
+    return OS.str();
+  }
+
+  static std::string getCompleteNodeLabel(const BasicBlock *Node, 
+                                          const Function *Graph) {
+    std::string Str;
+    raw_string_ostream OS(Str);
+
+    if (Node->getName().empty()) {
+      WriteAsOperand(OS, Node, false);
+      OS << ":";
+    }
+
+    OS << *Node;
+    std::string OutStr = OS.str();
+    if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
+
+    // Process string output to make it nicer...
+    for (unsigned i = 0; i != OutStr.length(); ++i)
+      if (OutStr[i] == '\n') {                            // Left justify
+        OutStr[i] = '\\';
+        OutStr.insert(OutStr.begin()+i+1, 'l');
+      } else if (OutStr[i] == ';') {                      // Delete comments!
+        unsigned Idx = OutStr.find('\n', i+1);            // Find end of line
+        OutStr.erase(OutStr.begin()+i, OutStr.begin()+Idx);
+        --i;
+      }
+
+    return OutStr;
+  }
+
+  std::string getNodeLabel(const BasicBlock *Node,
+                           const Function *Graph) {
+    if (isSimple())
+      return getSimpleNodeLabel(Node, Graph);
+    else
+      return getCompleteNodeLabel(Node, Graph);
+  }
+
+  static std::string getEdgeSourceLabel(const BasicBlock *Node,
+                                        succ_const_iterator I) {
+    // Label source of conditional branches with "T" or "F"
+    if (const BranchInst *BI = dyn_cast<BranchInst>(Node->getTerminator()))
+      if (BI->isConditional())
+        return (I == succ_begin(Node)) ? "T" : "F";
+    
+    // Label source of switch edges with the associated value.
+    if (const SwitchInst *SI = dyn_cast<SwitchInst>(Node->getTerminator())) {
+      unsigned SuccNo = I.getSuccessorIndex();
+
+      if (SuccNo == 0) return "def";
+      
+      std::string Str;
+      raw_string_ostream OS(Str);
+      OS << SI->getCaseValue(SuccNo)->getValue();
+      return OS.str();
+    }    
+    return "";
+  }
+};
+} // End llvm namespace
+
+namespace llvm {
+  class FunctionPass;
+  FunctionPass *createCFGPrinterPass ();
+  FunctionPass *createCFGOnlyPrinterPass ();
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/CallGraph.h b/final/include/llvm/Analysis/CallGraph.h
new file mode 100644
index 00000000000..089f322e4a8
--- /dev/null
+++ b/final/include/llvm/Analysis/CallGraph.h
@@ -0,0 +1,375 @@
+//===- CallGraph.h - Build a Module's call graph ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This interface is used to build and manipulate a call graph, which is a very
+// useful tool for interprocedural optimization.
+//
+// Every function in a module is represented as a node in the call graph.  The
+// callgraph node keeps track of which functions the are called by the function
+// corresponding to the node.
+//
+// A call graph may contain nodes where the function that they correspond to is
+// null.  These 'external' nodes are used to represent control flow that is not
+// represented (or analyzable) in the module.  In particular, this analysis
+// builds one external node such that:
+//   1. All functions in the module without internal linkage will have edges
+//      from this external node, indicating that they could be called by
+//      functions outside of the module.
+//   2. All functions whose address is used for something more than a direct
+//      call, for example being stored into a memory location will also have an
+//      edge from this external node.  Since they may be called by an unknown
+//      caller later, they must be tracked as such.
+//
+// There is a second external node added for calls that leave this module.
+// Functions have a call edge to the external node iff:
+//   1. The function is external, reflecting the fact that they could call
+//      anything without internal linkage or that has its address taken.
+//   2. The function contains an indirect function call.
+//
+// As an extension in the future, there may be multiple nodes with a null
+// function.  These will be used when we can prove (through pointer analysis)
+// that an indirect call site can call only a specific set of functions.
+//
+// Because of these properties, the CallGraph captures a conservative superset
+// of all of the caller-callee relationships, which is useful for
+// transformations.
+//
+// The CallGraph class also attempts to figure out what the root of the
+// CallGraph is, which it currently does by looking for a function named 'main'.
+// If no function named 'main' is found, the external node is used as the entry
+// node, reflecting the fact that any function without internal linkage could
+// be called into (which is common for libraries).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_CALLGRAPH_H
+#define LLVM_ANALYSIS_CALLGRAPH_H
+
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/IncludeFile.h"
+#include <map>
+
+namespace llvm {
+
+class Function;
+class Module;
+class CallGraphNode;
+
+//===----------------------------------------------------------------------===//
+// CallGraph class definition
+//
+class CallGraph {
+protected:
+  Module *Mod;              // The module this call graph represents
+
+  typedef std::map<const Function *, CallGraphNode *> FunctionMapTy;
+  FunctionMapTy FunctionMap;    // Map from a function to its node
+
+public:
+  static char ID; // Class identification, replacement for typeinfo
+  //===---------------------------------------------------------------------
+  // Accessors.
+  //
+  typedef FunctionMapTy::iterator iterator;
+  typedef FunctionMapTy::const_iterator const_iterator;
+
+  /// getModule - Return the module the call graph corresponds to.
+  ///
+  Module &getModule() const { return *Mod; }
+
+  inline       iterator begin()       { return FunctionMap.begin(); }
+  inline       iterator end()         { return FunctionMap.end();   }
+  inline const_iterator begin() const { return FunctionMap.begin(); }
+  inline const_iterator end()   const { return FunctionMap.end();   }
+
+  // Subscripting operators, return the call graph node for the provided
+  // function
+  inline const CallGraphNode *operator[](const Function *F) const {
+    const_iterator I = FunctionMap.find(F);
+    assert(I != FunctionMap.end() && "Function not in callgraph!");
+    return I->second;
+  }
+  inline CallGraphNode *operator[](const Function *F) {
+    const_iterator I = FunctionMap.find(F);
+    assert(I != FunctionMap.end() && "Function not in callgraph!");
+    return I->second;
+  }
+
+  /// Returns the CallGraphNode which is used to represent undetermined calls
+  /// into the callgraph.  Override this if you want behavioral inheritance.
+  virtual CallGraphNode* getExternalCallingNode() const { return 0; }
+  virtual CallGraphNode* getCallsExternalNode()   const { return 0; }
+
+  /// Return the root/main method in the module, or some other root node, such
+  /// as the externalcallingnode.  Overload these if you behavioral
+  /// inheritance.
+  virtual CallGraphNode* getRoot() { return 0; }
+  virtual const CallGraphNode* getRoot() const { return 0; }
+
+  //===---------------------------------------------------------------------
+  // Functions to keep a call graph up to date with a function that has been
+  // modified.
+  //
+
+  /// removeFunctionFromModule - Unlink the function from this module, returning
+  /// it.  Because this removes the function from the module, the call graph
+  /// node is destroyed.  This is only valid if the function does not call any
+  /// other functions (ie, there are no edges in it's CGN).  The easiest way to
+  /// do this is to dropAllReferences before calling this.
+  ///
+  Function *removeFunctionFromModule(CallGraphNode *CGN);
+  Function *removeFunctionFromModule(Function *F) {
+    return removeFunctionFromModule((*this)[F]);
+  }
+
+  /// getOrInsertFunction - This method is identical to calling operator[], but
+  /// it will insert a new CallGraphNode for the specified function if one does
+  /// not already exist.
+  CallGraphNode *getOrInsertFunction(const Function *F);
+
+  /// spliceFunction - Replace the function represented by this node by another.
+  /// This does not rescan the body of the function, so it is suitable when
+  /// splicing the body of one function to another while also updating all
+  /// callers from the old function to the new.
+  ///
+  void spliceFunction(const Function *From, const Function *To);
+
+  //===---------------------------------------------------------------------
+  // Pass infrastructure interface glue code.
+  //
+protected:
+  CallGraph() {}
+
+public:
+  virtual ~CallGraph() { destroy(); }
+
+  /// initialize - Call this method before calling other methods,
+  /// re/initializes the state of the CallGraph.
+  ///
+  void initialize(Module &M);
+
+  void print(raw_ostream &o, Module *) const;
+  void dump() const;
+protected:
+  // destroy - Release memory for the call graph
+  virtual void destroy();
+};
+
+//===----------------------------------------------------------------------===//
+// CallGraphNode class definition.
+//
+class CallGraphNode {
+  friend class CallGraph;
+  
+  AssertingVH<Function> F;
+
+  // CallRecord - This is a pair of the calling instruction (a call or invoke)
+  // and the callgraph node being called.
+public:
+  typedef std::pair<WeakVH, CallGraphNode*> CallRecord;
+private:
+  std::vector<CallRecord> CalledFunctions;
+  
+  /// NumReferences - This is the number of times that this CallGraphNode occurs
+  /// in the CalledFunctions array of this or other CallGraphNodes.
+  unsigned NumReferences;
+
+  CallGraphNode(const CallGraphNode &);            // DO NOT IMPLEMENT
+  void operator=(const CallGraphNode &);           // DO NOT IMPLEMENT
+  
+  void DropRef() { --NumReferences; }
+  void AddRef() { ++NumReferences; }
+public:
+  typedef std::vector<CallRecord> CalledFunctionsVector;
+
+  
+  // CallGraphNode ctor - Create a node for the specified function.
+  inline CallGraphNode(Function *f) : F(f), NumReferences(0) {}
+  ~CallGraphNode() {
+    assert(NumReferences == 0 && "Node deleted while references remain");
+  }
+  
+  //===---------------------------------------------------------------------
+  // Accessor methods.
+  //
+
+  typedef std::vector<CallRecord>::iterator iterator;
+  typedef std::vector<CallRecord>::const_iterator const_iterator;
+
+  // getFunction - Return the function that this call graph node represents.
+  Function *getFunction() const { return F; }
+
+  inline iterator begin() { return CalledFunctions.begin(); }
+  inline iterator end()   { return CalledFunctions.end();   }
+  inline const_iterator begin() const { return CalledFunctions.begin(); }
+  inline const_iterator end()   const { return CalledFunctions.end();   }
+  inline bool empty() const { return CalledFunctions.empty(); }
+  inline unsigned size() const { return (unsigned)CalledFunctions.size(); }
+
+  /// getNumReferences - Return the number of other CallGraphNodes in this
+  /// CallGraph that reference this node in their callee list.
+  unsigned getNumReferences() const { return NumReferences; }
+  
+  // Subscripting operator - Return the i'th called function.
+  //
+  CallGraphNode *operator[](unsigned i) const {
+    assert(i < CalledFunctions.size() && "Invalid index");
+    return CalledFunctions[i].second;
+  }
+
+  /// dump - Print out this call graph node.
+  ///
+  void dump() const;
+  void print(raw_ostream &OS) const;
+
+  //===---------------------------------------------------------------------
+  // Methods to keep a call graph up to date with a function that has been
+  // modified
+  //
+
+  /// removeAllCalledFunctions - As the name implies, this removes all edges
+  /// from this CallGraphNode to any functions it calls.
+  void removeAllCalledFunctions() {
+    while (!CalledFunctions.empty()) {
+      CalledFunctions.back().second->DropRef();
+      CalledFunctions.pop_back();
+    }
+  }
+  
+  /// stealCalledFunctionsFrom - Move all the callee information from N to this
+  /// node.
+  void stealCalledFunctionsFrom(CallGraphNode *N) {
+    assert(CalledFunctions.empty() &&
+           "Cannot steal callsite information if I already have some");
+    std::swap(CalledFunctions, N->CalledFunctions);
+  }
+  
+
+  /// addCalledFunction - Add a function to the list of functions called by this
+  /// one.
+  void addCalledFunction(CallSite CS, CallGraphNode *M) {
+    CalledFunctions.push_back(std::make_pair(CS.getInstruction(), M));
+    M->AddRef();
+  }
+
+  void removeCallEdge(iterator I) {
+    I->second->DropRef();
+    *I = CalledFunctions.back();
+    CalledFunctions.pop_back();
+  }
+  
+  
+  /// removeCallEdgeFor - This method removes the edge in the node for the
+  /// specified call site.  Note that this method takes linear time, so it
+  /// should be used sparingly.
+  void removeCallEdgeFor(CallSite CS);
+
+  /// removeAnyCallEdgeTo - This method removes all call edges from this node
+  /// to the specified callee function.  This takes more time to execute than
+  /// removeCallEdgeTo, so it should not be used unless necessary.
+  void removeAnyCallEdgeTo(CallGraphNode *Callee);
+
+  /// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite
+  /// from this node to the specified callee function.
+  void removeOneAbstractEdgeTo(CallGraphNode *Callee);
+  
+  /// replaceCallEdge - This method replaces the edge in the node for the
+  /// specified call site with a new one.  Note that this method takes linear
+  /// time, so it should be used sparingly.
+  void replaceCallEdge(CallSite CS, CallSite NewCS, CallGraphNode *NewNode);
+  
+  /// allReferencesDropped - This is a special function that should only be
+  /// used by the CallGraph class.
+  void allReferencesDropped() {
+    NumReferences = 0;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// GraphTraits specializations for call graphs so that they can be treated as
+// graphs by the generic graph algorithms.
+//
+
+// Provide graph traits for tranversing call graphs using standard graph
+// traversals.
+template <> struct GraphTraits<CallGraphNode*> {
+  typedef CallGraphNode NodeType;
+
+  typedef CallGraphNode::CallRecord CGNPairTy;
+  typedef std::pointer_to_unary_function<CGNPairTy, CallGraphNode*> CGNDerefFun;
+
+  static NodeType *getEntryNode(CallGraphNode *CGN) { return CGN; }
+
+  typedef mapped_iterator<NodeType::iterator, CGNDerefFun> ChildIteratorType;
+
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return map_iterator(N->begin(), CGNDerefFun(CGNDeref));
+  }
+  static inline ChildIteratorType child_end  (NodeType *N) {
+    return map_iterator(N->end(), CGNDerefFun(CGNDeref));
+  }
+
+  static CallGraphNode *CGNDeref(CGNPairTy P) {
+    return P.second;
+  }
+
+};
+
+template <> struct GraphTraits<const CallGraphNode*> {
+  typedef const CallGraphNode NodeType;
+  typedef NodeType::const_iterator ChildIteratorType;
+
+  static NodeType *getEntryNode(const CallGraphNode *CGN) { return CGN; }
+  static inline ChildIteratorType child_begin(NodeType *N) { return N->begin();}
+  static inline ChildIteratorType child_end  (NodeType *N) { return N->end(); }
+};
+
+template<> struct GraphTraits<CallGraph*> : public GraphTraits<CallGraphNode*> {
+  static NodeType *getEntryNode(CallGraph *CGN) {
+    return CGN->getExternalCallingNode();  // Start at the external node!
+  }
+  typedef std::pair<const Function*, CallGraphNode*> PairTy;
+  typedef std::pointer_to_unary_function<PairTy, CallGraphNode&> DerefFun;
+
+  // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
+  typedef mapped_iterator<CallGraph::iterator, DerefFun> nodes_iterator;
+  static nodes_iterator nodes_begin(CallGraph *CG) {
+    return map_iterator(CG->begin(), DerefFun(CGdereference));
+  }
+  static nodes_iterator nodes_end  (CallGraph *CG) {
+    return map_iterator(CG->end(), DerefFun(CGdereference));
+  }
+
+  static CallGraphNode &CGdereference(PairTy P) {
+    return *P.second;
+  }
+};
+
+template<> struct GraphTraits<const CallGraph*> :
+  public GraphTraits<const CallGraphNode*> {
+  static NodeType *getEntryNode(const CallGraph *CGN) {
+    return CGN->getExternalCallingNode();
+  }
+  // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
+  typedef CallGraph::const_iterator nodes_iterator;
+  static nodes_iterator nodes_begin(const CallGraph *CG) { return CG->begin(); }
+  static nodes_iterator nodes_end  (const CallGraph *CG) { return CG->end(); }
+};
+
+} // End llvm namespace
+
+// Make sure that any clients of this file link in CallGraph.cpp
+FORCE_DEFINING_FILE_TO_BE_LINKED(CallGraph)
+
+#endif
diff --git a/final/include/llvm/Analysis/CaptureTracking.h b/final/include/llvm/Analysis/CaptureTracking.h
new file mode 100644
index 00000000000..b3390f47d2f
--- /dev/null
+++ b/final/include/llvm/Analysis/CaptureTracking.h
@@ -0,0 +1,33 @@
+//===----- llvm/Analysis/CaptureTracking.h - Pointer capture ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines that help determine which pointers are captured.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_CAPTURETRACKING_H
+#define LLVM_ANALYSIS_CAPTURETRACKING_H
+
+namespace llvm {
+  class Value;
+
+  /// PointerMayBeCaptured - Return true if this pointer value may be captured
+  /// by the enclosing function (which is required to exist).  This routine can
+  /// be expensive, so consider caching the results.  The boolean ReturnCaptures
+  /// specifies whether returning the value (or part of it) from the function
+  /// counts as capturing it or not.  The boolean StoreCaptures specified
+  /// whether storing the value (or part of it) into memory anywhere
+  /// automatically counts as capturing it or not.
+  bool PointerMayBeCaptured(const Value *V,
+                            bool ReturnCaptures,
+                            bool StoreCaptures);
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Analysis/CodeMetrics.h b/final/include/llvm/Analysis/CodeMetrics.h
new file mode 100644
index 00000000000..75edfbbed2e
--- /dev/null
+++ b/final/include/llvm/Analysis/CodeMetrics.h
@@ -0,0 +1,95 @@
+//===- CodeMetrics.h - Measures the weight of a function---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements various weight measurements for code, helping
+// the Inliner and other passes decide whether to duplicate its contents.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_CODEMETRICS_H
+#define LLVM_ANALYSIS_CODEMETRICS_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+  // CodeMetrics - Calculate size and a few similar metrics for a set of
+  // basic blocks.
+  struct CodeMetrics {
+    /// NeverInline - True if this callee should never be inlined into a
+    /// caller.
+    // bool NeverInline;
+
+    // True if this function contains a call to setjmp or _setjmp
+    bool callsSetJmp;
+
+    // True if this function calls itself
+    bool isRecursive;
+
+    // True if this function contains one or more indirect branches
+    bool containsIndirectBr;
+
+    /// usesDynamicAlloca - True if this function calls alloca (in the C sense).
+    bool usesDynamicAlloca;
+
+    /// NumInsts, NumBlocks - Keep track of how large each function is, which
+    /// is used to estimate the code size cost of inlining it.
+    unsigned NumInsts, NumBlocks;
+
+    /// NumBBInsts - Keeps track of basic block code size estimates.
+    DenseMap<const BasicBlock *, unsigned> NumBBInsts;
+
+    /// NumCalls - Keep track of the number of calls to 'big' functions.
+    unsigned NumCalls;
+    
+    /// NumInlineCandidates - Keep track of the number of calls to internal
+    /// functions with only a single caller.  These are likely targets for
+    /// future inlining, likely exposed by interleaved devirtualization.
+    unsigned NumInlineCandidates;
+
+    /// NumVectorInsts - Keep track of how many instructions produce vector
+    /// values.  The inliner is being more aggressive with inlining vector
+    /// kernels.
+    unsigned NumVectorInsts;
+
+    /// NumRets - Keep track of how many Ret instructions the block contains.
+    unsigned NumRets;
+
+    CodeMetrics() : callsSetJmp(false), isRecursive(false),
+                    containsIndirectBr(false), usesDynamicAlloca(false), 
+                    NumInsts(0), NumBlocks(0), NumCalls(0),
+                    NumInlineCandidates(0), NumVectorInsts(0), 
+                    NumRets(0) {}
+
+    /// analyzeBasicBlock - Add information about the specified basic block
+    /// to the current structure.
+    void analyzeBasicBlock(const BasicBlock *BB);
+
+    /// analyzeFunction - Add information about the specified function
+    /// to the current structure.
+    void analyzeFunction(Function *F);
+    
+    /// CountCodeReductionForConstant - Figure out an approximation for how
+    /// many instructions will be constant folded if the specified value is
+    /// constant.
+    unsigned CountCodeReductionForConstant(Value *V);
+   
+    /// CountBonusForConstant - Figure out an approximation for how much
+    /// per-call performance boost we can expect if the specified value is
+    /// constant.
+    unsigned CountBonusForConstant(Value *V);
+
+    /// CountCodeReductionForAlloca - Figure out an approximation of how much
+    /// smaller the function will be if it is inlined into a context where an
+    /// argument becomes an alloca.
+    ///
+    unsigned CountCodeReductionForAlloca(Value *V);
+  };
+}
+
+#endif
diff --git a/final/include/llvm/Analysis/ConstantFolding.h b/final/include/llvm/Analysis/ConstantFolding.h
new file mode 100644
index 00000000000..f6b1f5ab991
--- /dev/null
+++ b/final/include/llvm/Analysis/ConstantFolding.h
@@ -0,0 +1,82 @@
+//===-- ConstantFolding.h - Fold instructions into constants --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares routines for folding instructions into constants when all
+// operands are constants, for example "sub i32 1, 0" -> "1".
+//
+// Also, to supplement the basic VMCore ConstantExpr simplifications,
+// this file declares some additional folding routines that can make use of
+// TargetData information. These functions cannot go in VMCore due to library
+// dependency issues.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_CONSTANTFOLDING_H
+#define LLVM_ANALYSIS_CONSTANTFOLDING_H
+
+namespace llvm {
+  class Constant;
+  class ConstantExpr;
+  class Instruction;
+  class TargetData;
+  class Function;
+  class Type;
+
+/// ConstantFoldInstruction - Try to constant fold the specified instruction.
+/// If successful, the constant result is returned, if not, null is returned.
+/// Note that this fails if not all of the operands are constant.  Otherwise,
+/// this function can only fail when attempting to fold instructions like loads
+/// and stores, which have no constant expression form.
+Constant *ConstantFoldInstruction(Instruction *I, const TargetData *TD = 0);
+
+/// ConstantFoldConstantExpression - Attempt to fold the constant expression
+/// using the specified TargetData.  If successful, the constant result is
+/// result is returned, if not, null is returned.
+Constant *ConstantFoldConstantExpression(const ConstantExpr *CE,
+                                         const TargetData *TD = 0);
+
+/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
+/// specified operands.  If successful, the constant result is returned, if not,
+/// null is returned.  Note that this function can fail when attempting to 
+/// fold instructions like loads and stores, which have no constant expression 
+/// form.
+///
+Constant *ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
+                                   Constant *const *Ops, unsigned NumOps,
+                                   const TargetData *TD = 0);
+
+/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare
+/// instruction (icmp/fcmp) with the specified operands.  If it fails, it
+/// returns a constant expression of the specified operands.
+///
+Constant *ConstantFoldCompareInstOperands(unsigned Predicate,
+                                          Constant *LHS, Constant *RHS,
+                                          const TargetData *TD = 0);
+
+/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would
+/// produce if it is constant and determinable.  If this is not determinable,
+/// return null.
+Constant *ConstantFoldLoadFromConstPtr(Constant *C, const TargetData *TD = 0);
+
+/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a
+/// getelementptr constantexpr, return the constant value being addressed by the
+/// constant expression, or null if something is funny and we can't decide.
+Constant *ConstantFoldLoadThroughGEPConstantExpr(Constant *C, ConstantExpr *CE);
+  
+/// canConstantFoldCallTo - Return true if its even possible to fold a call to
+/// the specified function.
+bool canConstantFoldCallTo(const Function *F);
+
+/// ConstantFoldCall - Attempt to constant fold a call to the specified function
+/// with the specified arguments, returning null if unsuccessful.
+Constant *
+ConstantFoldCall(Function *F, Constant *const *Operands, unsigned NumOperands);
+}
+
+#endif
diff --git a/final/include/llvm/Analysis/ConstantsScanner.h b/final/include/llvm/Analysis/ConstantsScanner.h
new file mode 100644
index 00000000000..cdaf68d75a6
--- /dev/null
+++ b/final/include/llvm/Analysis/ConstantsScanner.h
@@ -0,0 +1,93 @@
+//==- llvm/Analysis/ConstantsScanner.h - Iterate over constants -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements an iterator to walk through the constants referenced by
+// a method.  This is used by the Bitcode & Assembly writers to build constant
+// pools.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_CONSTANTSSCANNER_H
+#define LLVM_ANALYSIS_CONSTANTSSCANNER_H
+
+#include "llvm/Support/InstIterator.h"
+
+namespace llvm {
+
+class Constant;
+
+class constant_iterator : public std::iterator<std::forward_iterator_tag,
+                                               const Constant, ptrdiff_t> {
+  const_inst_iterator InstI;                // Method instruction iterator
+  unsigned OpIdx;                           // Operand index
+
+  typedef constant_iterator _Self;
+
+  inline bool isAtConstant() const {
+    assert(!InstI.atEnd() && OpIdx < InstI->getNumOperands() &&
+           "isAtConstant called with invalid arguments!");
+    return isa<Constant>(InstI->getOperand(OpIdx));
+  }
+
+public:
+  inline constant_iterator(const Function *F) : InstI(inst_begin(F)), OpIdx(0) {
+    // Advance to first constant... if we are not already at constant or end
+    if (InstI != inst_end(F) &&                            // InstI is valid?
+        (InstI->getNumOperands() == 0 || !isAtConstant())) // Not at constant?
+      operator++();
+  }
+
+  inline constant_iterator(const Function *F, bool)   // end ctor
+    : InstI(inst_end(F)), OpIdx(0) {
+  }
+
+  inline bool operator==(const _Self& x) const { return OpIdx == x.OpIdx &&
+                                                        InstI == x.InstI; }
+  inline bool operator!=(const _Self& x) const { return !operator==(x); }
+
+  inline pointer operator*() const {
+    assert(isAtConstant() && "Dereferenced an iterator at the end!");
+    return cast<Constant>(InstI->getOperand(OpIdx));
+  }
+  inline pointer operator->() const { return operator*(); }
+
+  inline _Self& operator++() {   // Preincrement implementation
+    ++OpIdx;
+    do {
+      unsigned NumOperands = InstI->getNumOperands();
+      while (OpIdx < NumOperands && !isAtConstant()) {
+        ++OpIdx;
+      }
+
+      if (OpIdx < NumOperands) return *this;  // Found a constant!
+      ++InstI;
+      OpIdx = 0;
+    } while (!InstI.atEnd());
+
+    return *this;  // At the end of the method
+  }
+
+  inline _Self operator++(int) { // Postincrement
+    _Self tmp = *this; ++*this; return tmp;
+  }
+
+  inline bool atEnd() const { return InstI.atEnd(); }
+};
+
+inline constant_iterator constant_begin(const Function *F) {
+  return constant_iterator(F);
+}
+
+inline constant_iterator constant_end(const Function *F) {
+  return constant_iterator(F, true);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/DIBuilder.h b/final/include/llvm/Analysis/DIBuilder.h
new file mode 100644
index 00000000000..32a17e4f096
--- /dev/null
+++ b/final/include/llvm/Analysis/DIBuilder.h
@@ -0,0 +1,464 @@
+//===--- llvm/Analysis/DIBuilder.h - Debug Information Builder --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a DIBuilder that is useful for creating debugging 
+// information entries in LLVM IR form.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DIBUILDER_H
+#define LLVM_ANALYSIS_DIBUILDER_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+  class BasicBlock;
+  class Instruction;
+  class Function;
+  class Module;
+  class Value;
+  class LLVMContext;
+  class MDNode;
+  class StringRef;
+  class DIDescriptor;
+  class DIFile;
+  class DIEnumerator;
+  class DIType;
+  class DIArray;
+  class DIGlobalVariable;
+  class DINameSpace;
+  class DIVariable;
+  class DISubrange;
+  class DILexicalBlock;
+  class DISubprogram;
+  class DITemplateTypeParameter;
+  class DITemplateValueParameter;
+
+  class DIBuilder {
+    private:
+    Module &M;
+    LLVMContext & VMContext;
+    MDNode *TheCU;
+
+    Function *DeclareFn;     // llvm.dbg.declare
+    Function *ValueFn;       // llvm.dbg.value
+
+    DIBuilder(const DIBuilder &);       // DO NOT IMPLEMENT
+    void operator=(const DIBuilder &);  // DO NOT IMPLEMENT
+
+    public:
+    explicit DIBuilder(Module &M);
+    const MDNode *getCU() { return TheCU; }
+    enum ComplexAddrKind { OpPlus=1, OpDeref };
+
+    /// createCompileUnit - A CompileUnit provides an anchor for all debugging
+    /// information generated during this instance of compilation.
+    /// @param Lang     Source programming language, eg. dwarf::DW_LANG_C99
+    /// @param File     File name
+    /// @param Dir      Directory
+    /// @param Producer String identify producer of debugging information. 
+    ///                 Usuall this is a compiler version string.
+    /// @param isOptimized A boolean flag which indicates whether optimization
+    ///                    is ON or not.
+    /// @param Flags    This string lists command line options. This string is 
+    ///                 directly embedded in debug info output which may be used
+    ///                 by a tool analyzing generated debugging information.
+    /// @param RV       This indicates runtime version for languages like 
+    ///                 Objective-C.
+    void createCompileUnit(unsigned Lang, StringRef File, StringRef Dir, 
+                           StringRef Producer,
+                           bool isOptimized, StringRef Flags, unsigned RV);
+
+    /// createFile - Create a file descriptor to hold debugging information
+    /// for a file.
+    DIFile createFile(StringRef Filename, StringRef Directory);
+                           
+    /// createEnumerator - Create a single enumerator value.
+    DIEnumerator createEnumerator(StringRef Name, uint64_t Val);
+
+    /// createBasicType - Create debugging information entry for a basic 
+    /// type.
+    /// @param Name        Type name.
+    /// @param SizeInBits  Size of the type.
+    /// @param AlignInBits Type alignment.
+    /// @param Encoding    DWARF encoding code, e.g. dwarf::DW_ATE_float.
+    DIType createBasicType(StringRef Name, uint64_t SizeInBits, 
+                           uint64_t AlignInBits, unsigned Encoding);
+
+    /// createQualifiedType - Create debugging information entry for a qualified
+    /// type, e.g. 'const int'.
+    /// @param Tag         Tag identifing type, e.g. dwarf::TAG_volatile_type
+    /// @param FromTy      Base Type.
+    DIType createQualifiedType(unsigned Tag, DIType FromTy);
+
+    /// createPointerType - Create debugging information entry for a pointer.
+    /// @param PointeeTy   Type pointed by this pointer.
+    /// @param SizeInBits  Size.
+    /// @param AlignInBits Alignment. (optional)
+    /// @param Name        Pointer type name. (optional)
+    DIType createPointerType(DIType PointeeTy, uint64_t SizeInBits,
+                             uint64_t AlignInBits = 0, 
+                             StringRef Name = StringRef());
+
+    /// createReferenceType - Create debugging information entry for a c++
+    /// style reference.
+    DIType createReferenceType(DIType RTy);
+
+    /// createTypedef - Create debugging information entry for a typedef.
+    /// @param Ty          Original type.
+    /// @param Name        Typedef name.
+    /// @param File        File where this type is defined.
+    /// @param LineNo      Line number.
+    DIType createTypedef(DIType Ty, StringRef Name, DIFile File, 
+                         unsigned LineNo);
+
+    /// createFriend - Create debugging information entry for a 'friend'.
+    DIType createFriend(DIType Ty, DIType FriendTy);
+
+    /// createInheritance - Create debugging information entry to establish
+    /// inheritance relationship between two types.
+    /// @param Ty           Original type.
+    /// @param BaseTy       Base type. Ty is inherits from base.
+    /// @param BaseOffset   Base offset.
+    /// @param Flags        Flags to describe inheritance attribute, 
+    ///                     e.g. private
+    DIType createInheritance(DIType Ty, DIType BaseTy, uint64_t BaseOffset,
+                             unsigned Flags);
+
+    /// createMemberType - Create debugging information entry for a member.
+    /// @param Name         Member name.
+    /// @param File         File where this member is defined.
+    /// @param LineNo       Line number.
+    /// @param SizeInBits   Member size.
+    /// @param AlignInBits  Member alignment.
+    /// @param OffsetInBits Member offset.
+    /// @param Flags        Flags to encode member attribute, e.g. private
+    /// @param Ty           Parent type.
+    DIType createMemberType(StringRef Name, DIFile File,
+                            unsigned LineNo, uint64_t SizeInBits, 
+                            uint64_t AlignInBits, uint64_t OffsetInBits, 
+                            unsigned Flags, DIType Ty);
+
+    /// createClassType - Create debugging information entry for a class.
+    /// @param Scope        Scope in which this class is defined.
+    /// @param Name         class name.
+    /// @param File         File where this member is defined.
+    /// @param LineNo       Line number.
+    /// @param SizeInBits   Member size.
+    /// @param AlignInBits  Member alignment.
+    /// @param OffsetInBits Member offset.
+    /// @param Flags        Flags to encode member attribute, e.g. private
+    /// @param Elements     class members.
+    /// @param VTableHolder Debug info of the base class that contains vtable
+    ///                     for this type. This is used in 
+    ///                     DW_AT_containing_type. See DWARF documentation
+    ///                     for more info.
+    /// @param TemplateParms Template type parameters.
+    DIType createClassType(DIDescriptor Scope, StringRef Name, DIFile File,
+                           unsigned LineNumber, uint64_t SizeInBits,
+                           uint64_t AlignInBits, uint64_t OffsetInBits,
+                           unsigned Flags, DIType DerivedFrom, 
+                           DIArray Elements, MDNode *VTableHolder = 0,
+                           MDNode *TemplateParms = 0);
+
+    /// createStructType - Create debugging information entry for a struct.
+    /// @param Scope        Scope in which this struct is defined.
+    /// @param Name         Struct name.
+    /// @param File         File where this member is defined.
+    /// @param LineNo       Line number.
+    /// @param SizeInBits   Member size.
+    /// @param AlignInBits  Member alignment.
+    /// @param Flags        Flags to encode member attribute, e.g. private
+    /// @param Elements     Struct elements.
+    /// @param RunTimeLang  Optional parameter, Objective-C runtime version.
+    DIType createStructType(DIDescriptor Scope, StringRef Name, DIFile File,
+                            unsigned LineNumber, uint64_t SizeInBits,
+                            uint64_t AlignInBits, unsigned Flags,
+                            DIArray Elements, unsigned RunTimeLang = 0);
+
+    /// createUnionType - Create debugging information entry for an union.
+    /// @param Scope        Scope in which this union is defined.
+    /// @param Name         Union name.
+    /// @param File         File where this member is defined.
+    /// @param LineNo       Line number.
+    /// @param SizeInBits   Member size.
+    /// @param AlignInBits  Member alignment.
+    /// @param Flags        Flags to encode member attribute, e.g. private
+    /// @param Elements     Union elements.
+    /// @param RunTimeLang  Optional parameter, Objective-C runtime version.
+    DIType createUnionType(DIDescriptor Scope, StringRef Name, DIFile File,
+                           unsigned LineNumber, uint64_t SizeInBits,
+                           uint64_t AlignInBits, unsigned Flags,
+                           DIArray Elements, unsigned RunTimeLang = 0);
+
+    /// createTemplateTypeParameter - Create debugging information for template
+    /// type parameter.
+    /// @param Scope        Scope in which this type is defined.
+    /// @param Name         Type parameter name.
+    /// @param Ty           Parameter type.
+    /// @param File         File where this type parameter is defined.
+    /// @param LineNo       Line number.
+    /// @param ColumnNo     Column Number.
+    DITemplateTypeParameter
+    createTemplateTypeParameter(DIDescriptor Scope, StringRef Name, DIType Ty,
+                                MDNode *File = 0, unsigned LineNo = 0,
+                                unsigned ColumnNo = 0);
+
+    /// createTemplateValueParameter - Create debugging information for template
+    /// value parameter.
+    /// @param Scope        Scope in which this type is defined.
+    /// @param Name         Value parameter name.
+    /// @param Ty           Parameter type.
+    /// @param Value        Constant parameter value.
+    /// @param File         File where this type parameter is defined.
+    /// @param LineNo       Line number.
+    /// @param ColumnNo     Column Number.
+    DITemplateValueParameter
+    createTemplateValueParameter(DIDescriptor Scope, StringRef Name, DIType Ty,
+                                 uint64_t Value,
+                                 MDNode *File = 0, unsigned LineNo = 0,
+                                 unsigned ColumnNo = 0);
+
+    /// createArrayType - Create debugging information entry for an array.
+    /// @param Size         Array size.
+    /// @param AlignInBits  Alignment.
+    /// @param Ty           Element type.
+    /// @param Subscripts   Subscripts.
+    DIType createArrayType(uint64_t Size, uint64_t AlignInBits, 
+                           DIType Ty, DIArray Subscripts);
+
+    /// createVectorType - Create debugging information entry for a vector type.
+    /// @param Size         Array size.
+    /// @param AlignInBits  Alignment.
+    /// @param Ty           Element type.
+    /// @param Subscripts   Subscripts.
+    DIType createVectorType(uint64_t Size, uint64_t AlignInBits, 
+                            DIType Ty, DIArray Subscripts);
+
+    /// createEnumerationType - Create debugging information entry for an 
+    /// enumeration.
+    /// @param Scope        Scope in which this enumeration is defined.
+    /// @param Name         Union name.
+    /// @param File         File where this member is defined.
+    /// @param LineNo       Line number.
+    /// @param SizeInBits   Member size.
+    /// @param AlignInBits  Member alignment.
+    /// @param Elements     Enumeration elements.
+    DIType createEnumerationType(DIDescriptor Scope, StringRef Name, 
+                                 DIFile File, unsigned LineNumber, 
+                                 uint64_t SizeInBits, 
+                                 uint64_t AlignInBits, DIArray Elements);
+
+    /// createSubroutineType - Create subroutine type.
+    /// @param File          File in which this subroutine is defined.
+    /// @param ParamterTypes An array of subroutine parameter types. This
+    ///                      includes return type at 0th index.
+    DIType createSubroutineType(DIFile File, DIArray ParameterTypes);
+
+    /// createArtificialType - Create a new DIType with "artificial" flag set.
+    DIType createArtificialType(DIType Ty);
+
+    /// createTemporaryType - Create a temporary forward-declared type.
+    DIType createTemporaryType();
+    DIType createTemporaryType(DIFile F);
+
+    /// retainType - Retain DIType in a module even if it is not referenced 
+    /// through debug info anchors.
+    void retainType(DIType T);
+
+    /// createUnspecifiedParameter - Create unspeicified type descriptor
+    /// for a subroutine type.
+    DIDescriptor createUnspecifiedParameter();
+
+    /// getOrCreateArray - Get a DIArray, create one if required.
+    DIArray getOrCreateArray(Value *const *Elements, unsigned NumElements);
+
+    /// getOrCreateSubrange - Create a descriptor for a value range.  This
+    /// implicitly uniques the values returned.
+    DISubrange getOrCreateSubrange(int64_t Lo, int64_t Hi);
+
+    /// createGlobalVariable - Create a new descriptor for the specified global.
+    /// @param Name        Name of the variable.
+    /// @param File        File where this variable is defined.
+    /// @param LineNo      Line number.
+    /// @param Ty          Variable Type.
+    /// @param isLocalToUnit Boolean flag indicate whether this variable is
+    ///                      externally visible or not.
+    /// @param Val         llvm::Value of the variable.
+    DIGlobalVariable
+    createGlobalVariable(StringRef Name, DIFile File, unsigned LineNo,
+                         DIType Ty, bool isLocalToUnit, llvm::Value *Val);
+
+
+    /// createStaticVariable - Create a new descriptor for the specified 
+    /// variable.
+    /// @param Conext      Variable scope. 
+    /// @param Name        Name of the variable.
+    /// @param LinakgeName Mangled  name of the variable.
+    /// @param File        File where this variable is defined.
+    /// @param LineNo      Line number.
+    /// @param Ty          Variable Type.
+    /// @param isLocalToUnit Boolean flag indicate whether this variable is
+    ///                      externally visible or not.
+    /// @param Val         llvm::Value of the variable.
+    DIGlobalVariable
+    createStaticVariable(DIDescriptor Context, StringRef Name, 
+                         StringRef LinkageName, DIFile File, unsigned LineNo, 
+                         DIType Ty, bool isLocalToUnit, llvm::Value *Val);
+
+
+    /// createLocalVariable - Create a new descriptor for the specified 
+    /// local variable.
+    /// @param Tag         Dwarf TAG. Usually DW_TAG_auto_variable or
+    ///                    DW_TAG_arg_variable.
+    /// @param Scope       Variable scope.
+    /// @param Name        Variable name.
+    /// @param File        File where this variable is defined.
+    /// @param LineNo      Line number.
+    /// @param Ty          Variable Type
+    /// @param AlwaysPreserve Boolean. Set to true if debug info for this
+    ///                       variable should be preserved in optimized build.
+    /// @param Flags          Flags, e.g. artificial variable.
+    /// @param ArgNo       If this variable is an arugment then this argument's
+    ///                    number. 1 indicates 1st argument.
+    DIVariable createLocalVariable(unsigned Tag, DIDescriptor Scope,
+                                   StringRef Name,
+                                   DIFile File, unsigned LineNo,
+                                   DIType Ty, bool AlwaysPreserve = false,
+                                   unsigned Flags = 0,
+                                   unsigned ArgNo = 0);
+
+
+    /// createComplexVariable - Create a new descriptor for the specified
+    /// variable which has a complex address expression for its address.
+    /// @param Tag         Dwarf TAG. Usually DW_TAG_auto_variable or
+    ///                    DW_TAG_arg_variable.
+    /// @param Scope       Variable scope.
+    /// @param Name        Variable name.
+    /// @param File        File where this variable is defined.
+    /// @param LineNo      Line number.
+    /// @param Ty          Variable Type
+    /// @param Addr        A pointer to a vector of complex address operations.
+    /// @param NumAddr     Num of address operations in the vector.
+    /// @param ArgNo       If this variable is an arugment then this argument's
+    ///                    number. 1 indicates 1st argument.
+    DIVariable createComplexVariable(unsigned Tag, DIDescriptor Scope,
+                                     StringRef Name, DIFile F, unsigned LineNo,
+                                     DIType Ty, Value *const *Addr,
+                                     unsigned NumAddr, unsigned ArgNo = 0);
+
+    /// createFunction - Create a new descriptor for the specified subprogram.
+    /// See comments in DISubprogram for descriptions of these fields.
+    /// @param Scope         Function scope.
+    /// @param Name          Function name.
+    /// @param LinkageName   Mangled function name.
+    /// @param File          File where this variable is defined.
+    /// @param LineNo        Line number.
+    /// @param Ty            Function type.
+    /// @param isLocalToUnit True if this function is not externally visible..
+    /// @param isDefinition  True if this is a function definition.
+    /// @param Flags         e.g. is this function prototyped or not.
+    ///                      This flags are used to emit dwarf attributes.
+    /// @param isOptimized   True if optimization is ON.
+    /// @param Fn            llvm::Function pointer.
+    DISubprogram createFunction(DIDescriptor Scope, StringRef Name,
+                                StringRef LinkageName,
+                                DIFile File, unsigned LineNo,
+                                DIType Ty, bool isLocalToUnit,
+                                bool isDefinition,
+                                unsigned Flags = 0,
+                                bool isOptimized = false,
+                                Function *Fn = 0);
+
+    /// createMethod - Create a new descriptor for the specified C++ method.
+    /// See comments in DISubprogram for descriptions of these fields.
+    /// @param Scope         Function scope.
+    /// @param Name          Function name.
+    /// @param LinkageName   Mangled function name.
+    /// @param File          File where this variable is defined.
+    /// @param LineNo        Line number.
+    /// @param Ty            Function type.
+    /// @param isLocalToUnit True if this function is not externally visible..
+    /// @param isDefinition  True if this is a function definition.
+    /// @param Virtuality    Attributes describing virutallness. e.g. pure 
+    ///                      virtual function.
+    /// @param VTableIndex   Index no of this method in virtual table.
+    /// @param VTableHolder  Type that holds vtable.
+    /// @param Flags         e.g. is this function prototyped or not.
+    ///                      This flags are used to emit dwarf attributes.
+    /// @param isOptimized   True if optimization is ON.
+    /// @param Fn            llvm::Function pointer.
+    DISubprogram createMethod(DIDescriptor Scope, StringRef Name,
+                              StringRef LinkageName,
+                              DIFile File, unsigned LineNo,
+                              DIType Ty, bool isLocalToUnit,
+                              bool isDefinition,
+                              unsigned Virtuality = 0, unsigned VTableIndex = 0,
+                              MDNode *VTableHolder = 0,
+                              unsigned Flags = 0,
+                              bool isOptimized = false,
+                              Function *Fn = 0);
+
+    /// createNameSpace - This creates new descriptor for a namespace
+    /// with the specified parent scope.
+    /// @param Scope       Namespace scope
+    /// @param Name        Name of this namespace
+    /// @param File        Source file
+    /// @param LineNo      Line number
+    DINameSpace createNameSpace(DIDescriptor Scope, StringRef Name,
+                                DIFile File, unsigned LineNo);
+
+
+    /// createLexicalBlock - This creates a descriptor for a lexical block
+    /// with the specified parent context.
+    /// @param Scope       Parent lexical scope.
+    /// @param File        Source file
+    /// @param Line        Line number
+    /// @param Col         Column number
+    DILexicalBlock createLexicalBlock(DIDescriptor Scope, DIFile File,
+                                      unsigned Line, unsigned Col);
+
+    /// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+    /// @param Storage     llvm::Value of the variable
+    /// @param VarInfo     Variable's debug info descriptor.
+    /// @param InsertAtEnd Location for the new intrinsic.
+    Instruction *insertDeclare(llvm::Value *Storage, DIVariable VarInfo,
+                               BasicBlock *InsertAtEnd);
+
+    /// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+    /// @param Storage      llvm::Value of the variable
+    /// @param VarInfo      Variable's debug info descriptor.
+    /// @param InsertBefore Location for the new intrinsic.
+    Instruction *insertDeclare(llvm::Value *Storage, DIVariable VarInfo,
+                               Instruction *InsertBefore);
+
+
+    /// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+    /// @param Val          llvm::Value of the variable
+    /// @param Offset       Offset
+    /// @param VarInfo      Variable's debug info descriptor.
+    /// @param InsertAtEnd Location for the new intrinsic.
+    Instruction *insertDbgValueIntrinsic(llvm::Value *Val, uint64_t Offset,
+                                         DIVariable VarInfo, 
+                                         BasicBlock *InsertAtEnd);
+    
+    /// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+    /// @param Val          llvm::Value of the variable
+    /// @param Offset       Offset
+    /// @param VarInfo      Variable's debug info descriptor.
+    /// @param InsertBefore Location for the new intrinsic.
+    Instruction *insertDbgValueIntrinsic(llvm::Value *Val, uint64_t Offset,
+                                         DIVariable VarInfo, 
+                                         Instruction *InsertBefore);
+
+  };
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Analysis/DOTGraphTraitsPass.h b/final/include/llvm/Analysis/DOTGraphTraitsPass.h
new file mode 100644
index 00000000000..30741c4970a
--- /dev/null
+++ b/final/include/llvm/Analysis/DOTGraphTraitsPass.h
@@ -0,0 +1,83 @@
+//===-- DOTGraphTraitsPass.h - Print/View dotty graphs-----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Templates to create dotty viewer and printer passes for GraphTraits graphs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DOT_GRAPHTRAITS_PASS_H
+#define LLVM_ANALYSIS_DOT_GRAPHTRAITS_PASS_H
+
+#include "llvm/Pass.h"
+#include "llvm/Analysis/CFGPrinter.h"
+
+namespace llvm {
+template <class Analysis, bool Simple>
+struct DOTGraphTraitsViewer : public FunctionPass {
+  std::string Name;
+
+  DOTGraphTraitsViewer(std::string GraphName, char &ID) : FunctionPass(ID) {
+    Name = GraphName;
+  }
+
+  virtual bool runOnFunction(Function &F) {
+    Analysis *Graph;
+    std::string Title, GraphName;
+    Graph = &getAnalysis<Analysis>();
+    GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph);
+    Title = GraphName + " for '" + F.getNameStr() + "' function";
+    ViewGraph(Graph, Name, Simple, Title);
+
+    return false;
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    AU.addRequired<Analysis>();
+  }
+};
+
+template <class Analysis, bool Simple>
+struct DOTGraphTraitsPrinter : public FunctionPass {
+
+  std::string Name;
+
+  DOTGraphTraitsPrinter(std::string GraphName, char &ID)
+    : FunctionPass(ID) {
+    Name = GraphName;
+  }
+
+  virtual bool runOnFunction(Function &F) {
+    Analysis *Graph;
+    std::string Filename = Name + "." + F.getNameStr() + ".dot";
+    errs() << "Writing '" << Filename << "'...";
+
+    std::string ErrorInfo;
+    raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+    Graph = &getAnalysis<Analysis>();
+
+    std::string Title, GraphName;
+    GraphName = DOTGraphTraits<Analysis*>::getGraphName(Graph);
+    Title = GraphName + " for '" + F.getNameStr() + "' function";
+
+    if (ErrorInfo.empty())
+      WriteGraph(File, Graph, Simple, Title);
+    else
+      errs() << "  error opening file for writing!";
+    errs() << "\n";
+    return false;
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    AU.addRequired<Analysis>();
+  }
+};
+}
+#endif
diff --git a/final/include/llvm/Analysis/DebugInfo.h b/final/include/llvm/Analysis/DebugInfo.h
new file mode 100644
index 00000000000..951fd3cf1f6
--- /dev/null
+++ b/final/include/llvm/Analysis/DebugInfo.h
@@ -0,0 +1,742 @@
+//===--- llvm/Analysis/DebugInfo.h - Debug Information Helpers --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a bunch of datatypes that are useful for creating and
+// walking debug info in LLVM IR form. They essentially provide wrappers around
+// the information in the global variables that's needed when constructing the
+// DWARF information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DEBUGINFO_H
+#define LLVM_ANALYSIS_DEBUGINFO_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Dwarf.h"
+
+namespace llvm {
+  class BasicBlock;
+  class Constant;
+  class Function;
+  class GlobalVariable;
+  class Module;
+  class Type;
+  class Value;
+  class DbgDeclareInst;
+  class Instruction;
+  class MDNode;
+  class NamedMDNode;
+  class LLVMContext;
+  class raw_ostream;
+
+  class DIFile;
+  class DISubprogram;
+  class DILexicalBlock;
+  class DIVariable;
+  class DIType;
+
+  /// DIDescriptor - A thin wraper around MDNode to access encoded debug info.
+  /// This should not be stored in a container, because underly MDNode may
+  /// change in certain situations.
+  class DIDescriptor {
+  public:
+    enum {
+      FlagPrivate          = 1 << 0,
+      FlagProtected        = 1 << 1,
+      FlagFwdDecl          = 1 << 2,
+      FlagAppleBlock       = 1 << 3,
+      FlagBlockByrefStruct = 1 << 4,
+      FlagVirtual          = 1 << 5,
+      FlagArtificial       = 1 << 6,
+      FlagExplicit         = 1 << 7,
+      FlagPrototyped       = 1 << 8
+    };
+  protected:
+    const MDNode *DbgNode;
+
+    StringRef getStringField(unsigned Elt) const;
+    unsigned getUnsignedField(unsigned Elt) const {
+      return (unsigned)getUInt64Field(Elt);
+    }
+    uint64_t getUInt64Field(unsigned Elt) const;
+    DIDescriptor getDescriptorField(unsigned Elt) const;
+
+    template <typename DescTy>
+    DescTy getFieldAs(unsigned Elt) const {
+      return DescTy(getDescriptorField(Elt));
+    }
+
+    GlobalVariable *getGlobalVariableField(unsigned Elt) const;
+    Constant *getConstantField(unsigned Elt) const;
+    Function *getFunctionField(unsigned Elt) const;
+
+  public:
+    explicit DIDescriptor() : DbgNode(0) {}
+    explicit DIDescriptor(const MDNode *N) : DbgNode(N) {}
+    explicit DIDescriptor(const DIFile F);
+    explicit DIDescriptor(const DISubprogram F);
+    explicit DIDescriptor(const DILexicalBlock F);
+    explicit DIDescriptor(const DIVariable F);
+    explicit DIDescriptor(const DIType F);
+
+    bool Verify() const { return DbgNode != 0; }
+
+    operator MDNode *() const { return const_cast<MDNode*>(DbgNode); }
+    MDNode *operator ->() const { return const_cast<MDNode*>(DbgNode); }
+
+    unsigned getVersion() const {
+      return getUnsignedField(0) & LLVMDebugVersionMask;
+    }
+
+    unsigned getTag() const {
+      return getUnsignedField(0) & ~LLVMDebugVersionMask;
+    }
+
+    /// print - print descriptor.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print descriptor to dbgs() with a newline.
+    void dump() const;
+
+    bool isDerivedType() const;
+    bool isCompositeType() const;
+    bool isBasicType() const;
+    bool isVariable() const;
+    bool isSubprogram() const;
+    bool isGlobalVariable() const;
+    bool isScope() const;
+    bool isFile() const;
+    bool isCompileUnit() const;
+    bool isNameSpace() const;
+    bool isLexicalBlock() const;
+    bool isSubrange() const;
+    bool isEnumerator() const;
+    bool isType() const;
+    bool isGlobal() const;
+    bool isUnspecifiedParameter() const;
+    bool isTemplateTypeParameter() const;
+    bool isTemplateValueParameter() const;
+  };
+
+  /// DISubrange - This is used to represent ranges, for array bounds.
+  class DISubrange : public DIDescriptor {
+  public:
+    explicit DISubrange(const MDNode *N = 0) : DIDescriptor(N) {}
+
+    int64_t getLo() const { return (int64_t)getUInt64Field(1); }
+    int64_t getHi() const { return (int64_t)getUInt64Field(2); }
+  };
+
+  /// DIArray - This descriptor holds an array of descriptors.
+  class DIArray : public DIDescriptor {
+  public:
+    explicit DIArray(const MDNode *N = 0)
+      : DIDescriptor(N) {}
+
+    unsigned getNumElements() const;
+    DIDescriptor getElement(unsigned Idx) const {
+      return getDescriptorField(Idx);
+    }
+  };
+
+  /// DIScope - A base class for various scopes.
+  class DIScope : public DIDescriptor {
+  public:
+    explicit DIScope(const MDNode *N = 0) : DIDescriptor (N) {}
+    virtual ~DIScope() {}
+
+    StringRef getFilename() const;
+    StringRef getDirectory() const;
+  };
+
+  /// DICompileUnit - A wrapper for a compile unit.
+  class DICompileUnit : public DIScope {
+  public:
+    explicit DICompileUnit(const MDNode *N = 0) : DIScope(N) {}
+
+    unsigned getLanguage() const   { return getUnsignedField(2); }
+    StringRef getFilename() const  { return getStringField(3);   }
+    StringRef getDirectory() const { return getStringField(4);   }
+    StringRef getProducer() const  { return getStringField(5);   }
+
+    /// isMain - Each input file is encoded as a separate compile unit in LLVM
+    /// debugging information output. However, many target specific tool chains
+    /// prefer to encode only one compile unit in an object file. In this
+    /// situation, the LLVM code generator will include  debugging information
+    /// entities in the compile unit that is marked as main compile unit. The
+    /// code generator accepts maximum one main compile unit per module. If a
+    /// module does not contain any main compile unit then the code generator
+    /// will emit multiple compile units in the output object file.
+
+    bool isMain() const                { return getUnsignedField(6) != 0; }
+    bool isOptimized() const           { return getUnsignedField(7) != 0; }
+    StringRef getFlags() const       { return getStringField(8);   }
+    unsigned getRunTimeVersion() const { return getUnsignedField(9); }
+
+    /// Verify - Verify that a compile unit is well formed.
+    bool Verify() const;
+
+    /// print - print compile unit.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print compile unit to dbgs() with a newline.
+    void dump() const;
+  };
+
+  /// DIFile - This is a wrapper for a file.
+  class DIFile : public DIScope {
+  public:
+    explicit DIFile(const MDNode *N = 0) : DIScope(N) {
+      if (DbgNode && !isFile())
+        DbgNode = 0;
+    }
+    StringRef getFilename() const  { return getStringField(1);   }
+    StringRef getDirectory() const { return getStringField(2);   }
+    DICompileUnit getCompileUnit() const{ return getFieldAs<DICompileUnit>(3); }
+  };
+
+  /// DIEnumerator - A wrapper for an enumerator (e.g. X and Y in 'enum {X,Y}').
+  /// FIXME: it seems strange that this doesn't have either a reference to the
+  /// type/precision or a file/line pair for location info.
+  class DIEnumerator : public DIDescriptor {
+  public:
+    explicit DIEnumerator(const MDNode *N = 0) : DIDescriptor(N) {}
+
+    StringRef getName() const        { return getStringField(1); }
+    uint64_t getEnumValue() const      { return getUInt64Field(2); }
+  };
+
+  /// DIType - This is a wrapper for a type.
+  /// FIXME: Types should be factored much better so that CV qualifiers and
+  /// others do not require a huge and empty descriptor full of zeros.
+  class DIType : public DIScope {
+  public:
+  protected:
+    // This ctor is used when the Tag has already been validated by a derived
+    // ctor.
+    DIType(const MDNode *N, bool, bool) : DIScope(N) {}
+
+  public:
+
+    /// Verify - Verify that a type descriptor is well formed.
+    bool Verify() const;
+  public:
+    explicit DIType(const MDNode *N);
+    explicit DIType() {}
+    virtual ~DIType() {}
+
+    DIScope getContext() const          { return getFieldAs<DIScope>(1); }
+    StringRef getName() const           { return getStringField(2);     }
+    DICompileUnit getCompileUnit() const{ 
+     if (getVersion() == llvm::LLVMDebugVersion7)
+       return getFieldAs<DICompileUnit>(3);
+     
+     return getFieldAs<DIFile>(3).getCompileUnit();
+    }
+    DIFile getFile() const              { return getFieldAs<DIFile>(3); }
+    unsigned getLineNumber() const      { return getUnsignedField(4); }
+    uint64_t getSizeInBits() const      { return getUInt64Field(5); }
+    uint64_t getAlignInBits() const     { return getUInt64Field(6); }
+    // FIXME: Offset is only used for DW_TAG_member nodes.  Making every type
+    // carry this is just plain insane.
+    uint64_t getOffsetInBits() const    { return getUInt64Field(7); }
+    unsigned getFlags() const           { return getUnsignedField(8); }
+    bool isPrivate() const {
+      return (getFlags() & FlagPrivate) != 0;
+    }
+    bool isProtected() const {
+      return (getFlags() & FlagProtected) != 0;
+    }
+    bool isForwardDecl() const {
+      return (getFlags() & FlagFwdDecl) != 0;
+    }
+    // isAppleBlock - Return true if this is the Apple Blocks extension.
+    bool isAppleBlockExtension() const {
+      return (getFlags() & FlagAppleBlock) != 0;
+    }
+    bool isBlockByrefStruct() const {
+      return (getFlags() & FlagBlockByrefStruct) != 0;
+    }
+    bool isVirtual() const {
+      return (getFlags() & FlagVirtual) != 0;
+    }
+    bool isArtificial() const {
+      return (getFlags() & FlagArtificial) != 0;
+    }
+    bool isValid() const {
+      return DbgNode && (isBasicType() || isDerivedType() || isCompositeType());
+    }
+    StringRef getDirectory() const  { 
+      if (getVersion() == llvm::LLVMDebugVersion7)
+        return getCompileUnit().getDirectory();
+
+      return getFieldAs<DIFile>(3).getDirectory();
+    }
+    StringRef getFilename() const  { 
+      if (getVersion() == llvm::LLVMDebugVersion7)
+        return getCompileUnit().getFilename();
+
+      return getFieldAs<DIFile>(3).getFilename();
+    }
+
+    /// replaceAllUsesWith - Replace all uses of debug info referenced by
+    /// this descriptor.
+    void replaceAllUsesWith(DIDescriptor &D);
+    void replaceAllUsesWith(MDNode *D);
+
+    /// print - print type.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print type to dbgs() with a newline.
+    void dump() const;
+  };
+
+  /// DIBasicType - A basic type, like 'int' or 'float'.
+  class DIBasicType : public DIType {
+  public:
+    explicit DIBasicType(const MDNode *N = 0) : DIType(N) {}
+
+    unsigned getEncoding() const { return getUnsignedField(9); }
+
+    /// Verify - Verify that a basic type descriptor is well formed.
+    bool Verify() const;
+
+    /// print - print basic type.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print basic type to dbgs() with a newline.
+    void dump() const;
+  };
+
+  /// DIDerivedType - A simple derived type, like a const qualified type,
+  /// a typedef, a pointer or reference, etc.
+  class DIDerivedType : public DIType {
+  protected:
+    explicit DIDerivedType(const MDNode *N, bool, bool)
+      : DIType(N, true, true) {}
+  public:
+    explicit DIDerivedType(const MDNode *N = 0)
+      : DIType(N, true, true) {}
+
+    DIType getTypeDerivedFrom() const { return getFieldAs<DIType>(9); }
+
+    /// getOriginalTypeSize - If this type is derived from a base type then
+    /// return base type size.
+    uint64_t getOriginalTypeSize() const;
+
+    /// Verify - Verify that a derived type descriptor is well formed.
+    bool Verify() const;
+
+    /// print - print derived type.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print derived type to dbgs() with a newline.
+    void dump() const;
+  };
+
+  /// DICompositeType - This descriptor holds a type that can refer to multiple
+  /// other types, like a function or struct.
+  /// FIXME: Why is this a DIDerivedType??
+  class DICompositeType : public DIDerivedType {
+  public:
+    explicit DICompositeType(const MDNode *N = 0)
+      : DIDerivedType(N, true, true) {
+      if (N && !isCompositeType())
+        DbgNode = 0;
+    }
+
+    DIArray getTypeArray() const { return getFieldAs<DIArray>(10); }
+    unsigned getRunTimeLang() const { return getUnsignedField(11); }
+    DICompositeType getContainingType() const {
+      return getFieldAs<DICompositeType>(12);
+    }
+    DIArray getTemplateParams() const { return getFieldAs<DIArray>(13); }
+
+    /// Verify - Verify that a composite type descriptor is well formed.
+    bool Verify() const;
+
+    /// print - print composite type.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print composite type to dbgs() with a newline.
+    void dump() const;
+  };
+
+  /// DITemplateTypeParameter - This is a wrapper for template type parameter.
+  class DITemplateTypeParameter : public DIDescriptor {
+  public:
+    explicit DITemplateTypeParameter(const MDNode *N = 0) : DIDescriptor(N) {}
+
+    DIScope getContext() const       { return getFieldAs<DIScope>(1); }
+    StringRef getName() const        { return getStringField(2); }
+    DIType getType() const           { return getFieldAs<DIType>(3); }
+    StringRef getFilename() const    { 
+      return getFieldAs<DIFile>(4).getFilename();
+    }
+    StringRef getDirectory() const   { 
+      return getFieldAs<DIFile>(4).getDirectory();
+    }
+    unsigned getLineNumber() const   { return getUnsignedField(5); }
+    unsigned getColumnNumber() const { return getUnsignedField(6); }
+  };
+
+  /// DITemplateValueParameter - This is a wrapper for template value parameter.
+  class DITemplateValueParameter : public DIDescriptor {
+  public:
+    explicit DITemplateValueParameter(const MDNode *N = 0) : DIDescriptor(N) {}
+
+    DIScope getContext() const       { return getFieldAs<DIScope>(1); }
+    StringRef getName() const        { return getStringField(2); }
+    DIType getType() const           { return getFieldAs<DIType>(3); }
+    uint64_t getValue() const         { return getUInt64Field(4); }
+    StringRef getFilename() const    { 
+      return getFieldAs<DIFile>(5).getFilename();
+    }
+    StringRef getDirectory() const   { 
+      return getFieldAs<DIFile>(5).getDirectory();
+    }
+    unsigned getLineNumber() const   { return getUnsignedField(6); }
+    unsigned getColumnNumber() const { return getUnsignedField(7); }
+  };
+
+  /// DISubprogram - This is a wrapper for a subprogram (e.g. a function).
+  class DISubprogram : public DIScope {
+  public:
+    explicit DISubprogram(const MDNode *N = 0) : DIScope(N) {}
+
+    DIScope getContext() const          { return getFieldAs<DIScope>(2); }
+    StringRef getName() const         { return getStringField(3); }
+    StringRef getDisplayName() const  { return getStringField(4); }
+    StringRef getLinkageName() const  { return getStringField(5); }
+    DICompileUnit getCompileUnit() const{ 
+      if (getVersion() == llvm::LLVMDebugVersion7)
+        return getFieldAs<DICompileUnit>(6);
+
+      return getFieldAs<DIFile>(6).getCompileUnit(); 
+    }
+    unsigned getLineNumber() const      { return getUnsignedField(7); }
+    DICompositeType getType() const { return getFieldAs<DICompositeType>(8); }
+
+    /// getReturnTypeName - Subprogram return types are encoded either as
+    /// DIType or as DICompositeType.
+    StringRef getReturnTypeName() const {
+      DICompositeType DCT(getFieldAs<DICompositeType>(8));
+      if (DCT.Verify()) {
+        DIArray A = DCT.getTypeArray();
+        DIType T(A.getElement(0));
+        return T.getName();
+      }
+      DIType T(getFieldAs<DIType>(8));
+      return T.getName();
+    }
+
+    /// isLocalToUnit - Return true if this subprogram is local to the current
+    /// compile unit, like 'static' in C.
+    unsigned isLocalToUnit() const     { return getUnsignedField(9); }
+    unsigned isDefinition() const      { return getUnsignedField(10); }
+
+    unsigned getVirtuality() const { return getUnsignedField(11); }
+    unsigned getVirtualIndex() const { return getUnsignedField(12); }
+
+    DICompositeType getContainingType() const {
+      return getFieldAs<DICompositeType>(13);
+    }
+    unsigned isArtificial() const    { 
+      if (getVersion() <= llvm::LLVMDebugVersion8)
+        return getUnsignedField(14); 
+      return (getUnsignedField(14) & FlagArtificial) != 0;
+    }
+    /// isPrivate - Return true if this subprogram has "private"
+    /// access specifier.
+    bool isPrivate() const    { 
+      if (getVersion() <= llvm::LLVMDebugVersion8)
+        return false;
+      return (getUnsignedField(14) & FlagPrivate) != 0;
+    }
+    /// isProtected - Return true if this subprogram has "protected"
+    /// access specifier.
+    bool isProtected() const    { 
+      if (getVersion() <= llvm::LLVMDebugVersion8)
+        return false;
+      return (getUnsignedField(14) & FlagProtected) != 0;
+    }
+    /// isExplicit - Return true if this subprogram is marked as explicit.
+    bool isExplicit() const    { 
+      if (getVersion() <= llvm::LLVMDebugVersion8)
+        return false;
+      return (getUnsignedField(14) & FlagExplicit) != 0;
+    }
+    /// isPrototyped - Return true if this subprogram is prototyped.
+    bool isPrototyped() const    { 
+      if (getVersion() <= llvm::LLVMDebugVersion8)
+        return false;
+      return (getUnsignedField(14) & FlagPrototyped) != 0;
+    }
+
+    unsigned isOptimized() const;
+
+    StringRef getFilename() const    { 
+      if (getVersion() == llvm::LLVMDebugVersion7)
+        return getCompileUnit().getFilename();
+
+      return getFieldAs<DIFile>(6).getFilename(); 
+    }
+
+    StringRef getDirectory() const   { 
+      if (getVersion() == llvm::LLVMDebugVersion7)
+        return getCompileUnit().getFilename();
+
+      return getFieldAs<DIFile>(6).getDirectory(); 
+    }
+
+    /// Verify - Verify that a subprogram descriptor is well formed.
+    bool Verify() const;
+
+    /// print - print subprogram.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print subprogram to dbgs() with a newline.
+    void dump() const;
+
+    /// describes - Return true if this subprogram provides debugging
+    /// information for the function F.
+    bool describes(const Function *F);
+
+    Function *getFunction() const { return getFunctionField(16); }
+  };
+
+  /// DIGlobalVariable - This is a wrapper for a global variable.
+  class DIGlobalVariable : public DIDescriptor {
+  public:
+    explicit DIGlobalVariable(const MDNode *N = 0) : DIDescriptor(N) {}
+
+    DIScope getContext() const          { return getFieldAs<DIScope>(2); }
+    StringRef getName() const         { return getStringField(3); }
+    StringRef getDisplayName() const  { return getStringField(4); }
+    StringRef getLinkageName() const  { return getStringField(5); }
+    DICompileUnit getCompileUnit() const{ 
+      if (getVersion() == llvm::LLVMDebugVersion7)
+        return getFieldAs<DICompileUnit>(6);
+
+      DIFile F = getFieldAs<DIFile>(6); 
+      return F.getCompileUnit();
+    }
+
+    unsigned getLineNumber() const      { return getUnsignedField(7); }
+    DIType getType() const              { return getFieldAs<DIType>(8); }
+    unsigned isLocalToUnit() const      { return getUnsignedField(9); }
+    unsigned isDefinition() const       { return getUnsignedField(10); }
+
+    GlobalVariable *getGlobal() const { return getGlobalVariableField(11); }
+    Constant *getConstant() const   { return getConstantField(11); }
+
+    /// Verify - Verify that a global variable descriptor is well formed.
+    bool Verify() const;
+
+    /// print - print global variable.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print global variable to dbgs() with a newline.
+    void dump() const;
+  };
+
+  /// DIVariable - This is a wrapper for a variable (e.g. parameter, local,
+  /// global etc).
+  class DIVariable : public DIDescriptor {
+  public:
+    explicit DIVariable(const MDNode *N = 0)
+      : DIDescriptor(N) {}
+
+    DIScope getContext() const          { return getFieldAs<DIScope>(1); }
+    StringRef getName() const           { return getStringField(2);     }
+    DICompileUnit getCompileUnit() const{ 
+      if (getVersion() == llvm::LLVMDebugVersion7)
+        return getFieldAs<DICompileUnit>(3);
+
+      DIFile F = getFieldAs<DIFile>(3); 
+      return F.getCompileUnit();
+    }
+    unsigned getLineNumber() const      { 
+      return (getUnsignedField(4) << 8) >> 8; 
+    }
+    unsigned getArgNumber() const       {
+      unsigned L = getUnsignedField(4); 
+      return L >> 24;
+    }
+    DIType getType() const              { return getFieldAs<DIType>(5); }
+    
+    /// isArtificial - Return true if this variable is marked as "artificial".
+    bool isArtificial() const    { 
+      if (getVersion() <= llvm::LLVMDebugVersion8)
+        return false;
+      return (getUnsignedField(6) & FlagArtificial) != 0;
+    }
+
+
+    /// Verify - Verify that a variable descriptor is well formed.
+    bool Verify() const;
+
+    /// HasComplexAddr - Return true if the variable has a complex address.
+    bool hasComplexAddress() const {
+      return getNumAddrElements() > 0;
+    }
+
+    unsigned getNumAddrElements() const;
+    
+    uint64_t getAddrElement(unsigned Idx) const {
+      return getUInt64Field(Idx+6);
+    }
+
+    /// isBlockByrefVariable - Return true if the variable was declared as
+    /// a "__block" variable (Apple Blocks).
+    bool isBlockByrefVariable() const {
+      return getType().isBlockByrefStruct();
+    }
+
+    /// isInlinedFnArgument - Return trule if this variable provides debugging
+    /// information for an inlined function arguments.
+    bool isInlinedFnArgument(const Function *CurFn);
+
+    /// print - print variable.
+    void print(raw_ostream &OS) const;
+
+    /// dump - print variable to dbgs() with a newline.
+    void dump() const;
+  };
+
+  /// DILexicalBlock - This is a wrapper for a lexical block.
+  class DILexicalBlock : public DIScope {
+  public:
+    explicit DILexicalBlock(const MDNode *N = 0) : DIScope(N) {}
+    DIScope getContext() const       { return getFieldAs<DIScope>(1);      }
+    unsigned getLineNumber() const   { return getUnsignedField(2);         }
+    unsigned getColumnNumber() const { return getUnsignedField(3);         }
+    StringRef getDirectory() const {
+      StringRef dir = getFieldAs<DIFile>(4).getDirectory();
+      return !dir.empty() ? dir : getContext().getDirectory();
+    }
+    StringRef getFilename() const {
+      StringRef filename = getFieldAs<DIFile>(4).getFilename();
+      return !filename.empty() ? filename : getContext().getFilename();
+    }
+  };
+
+  /// DINameSpace - A wrapper for a C++ style name space.
+  class DINameSpace : public DIScope { 
+  public:
+    explicit DINameSpace(const MDNode *N = 0) : DIScope(N) {}
+    DIScope getContext() const     { return getFieldAs<DIScope>(1);      }
+    StringRef getName() const      { return getStringField(2);           }
+    StringRef getDirectory() const  { 
+      return getFieldAs<DIFile>(3).getDirectory();
+    }
+    StringRef getFilename() const  { 
+      return getFieldAs<DIFile>(3).getFilename();
+    }
+    DICompileUnit getCompileUnit() const{ 
+      if (getVersion() == llvm::LLVMDebugVersion7)
+        return getFieldAs<DICompileUnit>(3);
+
+      return getFieldAs<DIFile>(3).getCompileUnit(); 
+    }
+    unsigned getLineNumber() const { return getUnsignedField(4);         }
+    bool Verify() const;
+  };
+
+  /// DILocation - This object holds location information. This object
+  /// is not associated with any DWARF tag.
+  class DILocation : public DIDescriptor {
+  public:
+    explicit DILocation(const MDNode *N) : DIDescriptor(N) { }
+
+    unsigned getLineNumber() const     { return getUnsignedField(0); }
+    unsigned getColumnNumber() const   { return getUnsignedField(1); }
+    DIScope  getScope() const          { return getFieldAs<DIScope>(2); }
+    DILocation getOrigLocation() const { return getFieldAs<DILocation>(3); }
+    StringRef getFilename() const    { return getScope().getFilename(); }
+    StringRef getDirectory() const   { return getScope().getDirectory(); }
+    bool Verify() const;
+  };
+
+  /// getDISubprogram - Find subprogram that is enclosing this scope.
+  DISubprogram getDISubprogram(const MDNode *Scope);
+
+  /// getDICompositeType - Find underlying composite type.
+  DICompositeType getDICompositeType(DIType T);
+
+  /// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
+  /// to hold function specific information.
+  NamedMDNode *getOrInsertFnSpecificMDNode(Module &M, StringRef Name);
+
+  /// getFnSpecificMDNode - Return a NameMDNode, if available, that is 
+  /// suitable to hold function specific information.
+  NamedMDNode *getFnSpecificMDNode(const Module &M, StringRef Name);
+
+  class DebugInfoFinder {
+  public:
+    /// processModule - Process entire module and collect debug info
+    /// anchors.
+    void processModule(Module &M);
+
+  private:
+    /// processType - Process DIType.
+    void processType(DIType DT);
+
+    /// processLexicalBlock - Process DILexicalBlock.
+    void processLexicalBlock(DILexicalBlock LB);
+
+    /// processSubprogram - Process DISubprogram.
+    void processSubprogram(DISubprogram SP);
+
+    /// processDeclare - Process DbgDeclareInst.
+    void processDeclare(DbgDeclareInst *DDI);
+
+    /// processLocation - Process DILocation.
+    void processLocation(DILocation Loc);
+
+    /// addCompileUnit - Add compile unit into CUs.
+    bool addCompileUnit(DICompileUnit CU);
+
+    /// addGlobalVariable - Add global variable into GVs.
+    bool addGlobalVariable(DIGlobalVariable DIG);
+
+    // addSubprogram - Add subprgoram into SPs.
+    bool addSubprogram(DISubprogram SP);
+
+    /// addType - Add type into Tys.
+    bool addType(DIType DT);
+
+  public:
+    typedef SmallVector<MDNode *, 8>::const_iterator iterator;
+    iterator compile_unit_begin()    const { return CUs.begin(); }
+    iterator compile_unit_end()      const { return CUs.end(); }
+    iterator subprogram_begin()      const { return SPs.begin(); }
+    iterator subprogram_end()        const { return SPs.end(); }
+    iterator global_variable_begin() const { return GVs.begin(); }
+    iterator global_variable_end()   const { return GVs.end(); }
+    iterator type_begin()            const { return TYs.begin(); }
+    iterator type_end()              const { return TYs.end(); }
+
+    unsigned compile_unit_count()    const { return CUs.size(); }
+    unsigned global_variable_count() const { return GVs.size(); }
+    unsigned subprogram_count()      const { return SPs.size(); }
+    unsigned type_count()            const { return TYs.size(); }
+
+  private:
+    SmallVector<MDNode *, 8> CUs;  // Compile Units
+    SmallVector<MDNode *, 8> SPs;  // Subprograms
+    SmallVector<MDNode *, 8> GVs;  // Global Variables;
+    SmallVector<MDNode *, 8> TYs;  // Types
+    SmallPtrSet<MDNode *, 64> NodesSeen;
+  };
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Analysis/DomPrinter.h b/final/include/llvm/Analysis/DomPrinter.h
new file mode 100644
index 00000000000..0ed28994995
--- /dev/null
+++ b/final/include/llvm/Analysis/DomPrinter.h
@@ -0,0 +1,30 @@
+//===-- DomPrinter.h - Dom printer external interface ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines external functions that can be called to explicitly
+// instantiate the dominance tree printer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DOMPRINTER_H
+#define LLVM_ANALYSIS_DOMPRINTER_H
+
+namespace llvm {
+  class FunctionPass;
+  FunctionPass *createDomPrinterPass();
+  FunctionPass *createDomOnlyPrinterPass();
+  FunctionPass *createDomViewerPass();
+  FunctionPass *createDomOnlyViewerPass();
+  FunctionPass *createPostDomPrinterPass();
+  FunctionPass *createPostDomOnlyPrinterPass();
+  FunctionPass *createPostDomViewerPass();
+  FunctionPass *createPostDomOnlyViewerPass();
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/DominanceFrontier.h b/final/include/llvm/Analysis/DominanceFrontier.h
new file mode 100644
index 00000000000..d7f74af1c65
--- /dev/null
+++ b/final/include/llvm/Analysis/DominanceFrontier.h
@@ -0,0 +1,189 @@
+//===- llvm/Analysis/DominanceFrontier.h - Dominator Frontiers --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DominanceFrontier class, which calculate and holds the
+// dominance frontier for a function.
+//
+// This should be considered deprecated, don't add any more uses of this data
+// structure.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DOMINANCEFRONTIER_H
+#define LLVM_ANALYSIS_DOMINANCEFRONTIER_H
+
+#include "llvm/Analysis/Dominators.h"
+#include <map>
+#include <set>
+
+namespace llvm {
+  
+//===----------------------------------------------------------------------===//
+/// DominanceFrontierBase - Common base class for computing forward and inverse
+/// dominance frontiers for a function.
+///
+class DominanceFrontierBase : public FunctionPass {
+public:
+  typedef std::set<BasicBlock*>             DomSetType;    // Dom set for a bb
+  typedef std::map<BasicBlock*, DomSetType> DomSetMapType; // Dom set map
+protected:
+  DomSetMapType Frontiers;
+  std::vector<BasicBlock*> Roots;
+  const bool IsPostDominators;
+
+public:
+  DominanceFrontierBase(char &ID, bool isPostDom)
+    : FunctionPass(ID), IsPostDominators(isPostDom) {}
+
+  /// getRoots - Return the root blocks of the current CFG.  This may include
+  /// multiple blocks if we are computing post dominators.  For forward
+  /// dominators, this will always be a single block (the entry node).
+  ///
+  inline const std::vector<BasicBlock*> &getRoots() const { return Roots; }
+
+  /// isPostDominator - Returns true if analysis based of postdoms
+  ///
+  bool isPostDominator() const { return IsPostDominators; }
+
+  virtual void releaseMemory() { Frontiers.clear(); }
+
+  // Accessor interface:
+  typedef DomSetMapType::iterator iterator;
+  typedef DomSetMapType::const_iterator const_iterator;
+  iterator       begin()       { return Frontiers.begin(); }
+  const_iterator begin() const { return Frontiers.begin(); }
+  iterator       end()         { return Frontiers.end(); }
+  const_iterator end()   const { return Frontiers.end(); }
+  iterator       find(BasicBlock *B)       { return Frontiers.find(B); }
+  const_iterator find(BasicBlock *B) const { return Frontiers.find(B); }
+
+  iterator addBasicBlock(BasicBlock *BB, const DomSetType &frontier) {
+    assert(find(BB) == end() && "Block already in DominanceFrontier!");
+    return Frontiers.insert(std::make_pair(BB, frontier)).first;
+  }
+
+  /// removeBlock - Remove basic block BB's frontier.
+  void removeBlock(BasicBlock *BB) {
+    assert(find(BB) != end() && "Block is not in DominanceFrontier!");
+    for (iterator I = begin(), E = end(); I != E; ++I)
+      I->second.erase(BB);
+    Frontiers.erase(BB);
+  }
+
+  void addToFrontier(iterator I, BasicBlock *Node) {
+    assert(I != end() && "BB is not in DominanceFrontier!");
+    I->second.insert(Node);
+  }
+
+  void removeFromFrontier(iterator I, BasicBlock *Node) {
+    assert(I != end() && "BB is not in DominanceFrontier!");
+    assert(I->second.count(Node) && "Node is not in DominanceFrontier of BB");
+    I->second.erase(Node);
+  }
+
+  /// compareDomSet - Return false if two domsets match. Otherwise
+  /// return true;
+  bool compareDomSet(DomSetType &DS1, const DomSetType &DS2) const {
+    std::set<BasicBlock *> tmpSet;
+    for (DomSetType::const_iterator I = DS2.begin(),
+           E = DS2.end(); I != E; ++I)
+      tmpSet.insert(*I);
+
+    for (DomSetType::const_iterator I = DS1.begin(),
+           E = DS1.end(); I != E; ) {
+      BasicBlock *Node = *I++;
+
+      if (tmpSet.erase(Node) == 0)
+        // Node is in DS1 but not in DS2.
+        return true;
+    }
+
+    if (!tmpSet.empty())
+      // There are nodes that are in DS2 but not in DS1.
+      return true;
+
+    // DS1 and DS2 matches.
+    return false;
+  }
+
+  /// compare - Return true if the other dominance frontier base matches
+  /// this dominance frontier base. Otherwise return false.
+  bool compare(DominanceFrontierBase &Other) const {
+    DomSetMapType tmpFrontiers;
+    for (DomSetMapType::const_iterator I = Other.begin(),
+           E = Other.end(); I != E; ++I)
+      tmpFrontiers.insert(std::make_pair(I->first, I->second));
+
+    for (DomSetMapType::iterator I = tmpFrontiers.begin(),
+           E = tmpFrontiers.end(); I != E; ) {
+      BasicBlock *Node = I->first;
+      const_iterator DFI = find(Node);
+      if (DFI == end())
+        return true;
+
+      if (compareDomSet(I->second, DFI->second))
+        return true;
+
+      ++I;
+      tmpFrontiers.erase(Node);
+    }
+
+    if (!tmpFrontiers.empty())
+      return true;
+
+    return false;
+  }
+
+  /// print - Convert to human readable form
+  ///
+  virtual void print(raw_ostream &OS, const Module* = 0) const;
+
+  /// dump - Dump the dominance frontier to dbgs().
+  void dump() const;
+};
+
+
+//===-------------------------------------
+/// DominanceFrontier Class - Concrete subclass of DominanceFrontierBase that is
+/// used to compute a forward dominator frontiers.
+///
+class DominanceFrontier : public DominanceFrontierBase {
+public:
+  static char ID; // Pass ID, replacement for typeid
+  DominanceFrontier() :
+    DominanceFrontierBase(ID, false) {
+      initializeDominanceFrontierPass(*PassRegistry::getPassRegistry());
+    }
+
+  BasicBlock *getRoot() const {
+    assert(Roots.size() == 1 && "Should always have entry node!");
+    return Roots[0];
+  }
+
+  virtual bool runOnFunction(Function &) {
+    Frontiers.clear();
+    DominatorTree &DT = getAnalysis<DominatorTree>();
+    Roots = DT.getRoots();
+    assert(Roots.size() == 1 && "Only one entry block for forward domfronts!");
+    calculate(DT, DT[Roots[0]]);
+    return false;
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    AU.addRequired<DominatorTree>();
+  }
+
+  const DomSetType &calculate(const DominatorTree &DT,
+                              const DomTreeNode *Node);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/DominatorInternals.h b/final/include/llvm/Analysis/DominatorInternals.h
new file mode 100644
index 00000000000..ae552b05abf
--- /dev/null
+++ b/final/include/llvm/Analysis/DominatorInternals.h
@@ -0,0 +1,289 @@
+//=== llvm/Analysis/DominatorInternals.h - Dominator Calculation -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DOMINATOR_INTERNALS_H
+#define LLVM_ANALYSIS_DOMINATOR_INTERNALS_H
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+//===----------------------------------------------------------------------===//
+//
+// DominatorTree construction - This pass constructs immediate dominator
+// information for a flow-graph based on the algorithm described in this
+// document:
+//
+//   A Fast Algorithm for Finding Dominators in a Flowgraph
+//   T. Lengauer & R. Tarjan, ACM TOPLAS July 1979, pgs 121-141.
+//
+// This implements the O(n*log(n)) versions of EVAL and LINK, because it turns
+// out that the theoretically slower O(n*log(n)) implementation is actually
+// faster than the almost-linear O(n*alpha(n)) version, even for large CFGs.
+//
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+template<class GraphT>
+unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType>& DT,
+                 typename GraphT::NodeType* V, unsigned N) {
+  // This is more understandable as a recursive algorithm, but we can't use the
+  // recursive algorithm due to stack depth issues.  Keep it here for
+  // documentation purposes.
+#if 0
+  InfoRec &VInfo = DT.Info[DT.Roots[i]];
+  VInfo.DFSNum = VInfo.Semi = ++N;
+  VInfo.Label = V;
+
+  Vertex.push_back(V);        // Vertex[n] = V;
+
+  for (succ_iterator SI = succ_begin(V), E = succ_end(V); SI != E; ++SI) {
+    InfoRec &SuccVInfo = DT.Info[*SI];
+    if (SuccVInfo.Semi == 0) {
+      SuccVInfo.Parent = V;
+      N = DTDFSPass(DT, *SI, N);
+    }
+  }
+#else
+  bool IsChildOfArtificialExit = (N != 0);
+
+  SmallVector<std::pair<typename GraphT::NodeType*,
+                        typename GraphT::ChildIteratorType>, 32> Worklist;
+  Worklist.push_back(std::make_pair(V, GraphT::child_begin(V)));
+  while (!Worklist.empty()) {
+    typename GraphT::NodeType* BB = Worklist.back().first;
+    typename GraphT::ChildIteratorType NextSucc = Worklist.back().second;
+
+    typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &BBInfo =
+                                                                    DT.Info[BB];
+
+    // First time we visited this BB?
+    if (NextSucc == GraphT::child_begin(BB)) {
+      BBInfo.DFSNum = BBInfo.Semi = ++N;
+      BBInfo.Label = BB;
+
+      DT.Vertex.push_back(BB);       // Vertex[n] = V;
+
+      if (IsChildOfArtificialExit)
+        BBInfo.Parent = 1;
+
+      IsChildOfArtificialExit = false;
+    }
+
+    // store the DFS number of the current BB - the reference to BBInfo might
+    // get invalidated when processing the successors.
+    unsigned BBDFSNum = BBInfo.DFSNum;
+
+    // If we are done with this block, remove it from the worklist.
+    if (NextSucc == GraphT::child_end(BB)) {
+      Worklist.pop_back();
+      continue;
+    }
+
+    // Increment the successor number for the next time we get to it.
+    ++Worklist.back().second;
+    
+    // Visit the successor next, if it isn't already visited.
+    typename GraphT::NodeType* Succ = *NextSucc;
+
+    typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &SuccVInfo =
+                                                                  DT.Info[Succ];
+    if (SuccVInfo.Semi == 0) {
+      SuccVInfo.Parent = BBDFSNum;
+      Worklist.push_back(std::make_pair(Succ, GraphT::child_begin(Succ)));
+    }
+  }
+#endif
+    return N;
+}
+
+template<class GraphT>
+typename GraphT::NodeType* 
+Eval(DominatorTreeBase<typename GraphT::NodeType>& DT,
+     typename GraphT::NodeType *VIn, unsigned LastLinked) {
+  typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &VInInfo =
+                                                                  DT.Info[VIn];
+  if (VInInfo.DFSNum < LastLinked)
+    return VIn;
+
+  SmallVector<typename GraphT::NodeType*, 32> Work;
+  SmallPtrSet<typename GraphT::NodeType*, 32> Visited;
+
+  if (VInInfo.Parent >= LastLinked)
+    Work.push_back(VIn);
+  
+  while (!Work.empty()) {
+    typename GraphT::NodeType* V = Work.back();
+    typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &VInfo =
+                                                                     DT.Info[V];
+    typename GraphT::NodeType* VAncestor = DT.Vertex[VInfo.Parent];
+
+    // Process Ancestor first
+    if (Visited.insert(VAncestor) && VInfo.Parent >= LastLinked) {
+      Work.push_back(VAncestor);
+      continue;
+    } 
+    Work.pop_back(); 
+
+    // Update VInfo based on Ancestor info
+    if (VInfo.Parent < LastLinked)
+      continue;
+
+    typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &VAInfo =
+                                                             DT.Info[VAncestor];
+    typename GraphT::NodeType* VAncestorLabel = VAInfo.Label;
+    typename GraphT::NodeType* VLabel = VInfo.Label;
+    if (DT.Info[VAncestorLabel].Semi < DT.Info[VLabel].Semi)
+      VInfo.Label = VAncestorLabel;
+    VInfo.Parent = VAInfo.Parent;
+  }
+
+  return VInInfo.Label;
+}
+
+template<class FuncT, class NodeT>
+void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
+               FuncT& F) {
+  typedef GraphTraits<NodeT> GraphT;
+
+  unsigned N = 0;
+  bool MultipleRoots = (DT.Roots.size() > 1);
+  if (MultipleRoots) {
+    typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &BBInfo =
+        DT.Info[NULL];
+    BBInfo.DFSNum = BBInfo.Semi = ++N;
+    BBInfo.Label = NULL;
+
+    DT.Vertex.push_back(NULL);       // Vertex[n] = V;
+  }
+
+  // Step #1: Number blocks in depth-first order and initialize variables used
+  // in later stages of the algorithm.
+  for (unsigned i = 0, e = static_cast<unsigned>(DT.Roots.size());
+       i != e; ++i)
+    N = DFSPass<GraphT>(DT, DT.Roots[i], N);
+
+  // it might be that some blocks did not get a DFS number (e.g., blocks of 
+  // infinite loops). In these cases an artificial exit node is required.
+  MultipleRoots |= (DT.isPostDominator() && N != F.size());
+
+  // When naively implemented, the Lengauer-Tarjan algorithm requires a separate
+  // bucket for each vertex. However, this is unnecessary, because each vertex
+  // is only placed into a single bucket (that of its semidominator), and each
+  // vertex's bucket is processed before it is added to any bucket itself.
+  //
+  // Instead of using a bucket per vertex, we use a single array Buckets that
+  // has two purposes. Before the vertex V with preorder number i is processed,
+  // Buckets[i] stores the index of the first element in V's bucket. After V's
+  // bucket is processed, Buckets[i] stores the index of the next element in the
+  // bucket containing V, if any.
+  SmallVector<unsigned, 32> Buckets;
+  Buckets.resize(N + 1);
+  for (unsigned i = 1; i <= N; ++i)
+    Buckets[i] = i;
+
+  for (unsigned i = N; i >= 2; --i) {
+    typename GraphT::NodeType* W = DT.Vertex[i];
+    typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &WInfo =
+                                                                     DT.Info[W];
+
+    // Step #2: Implicitly define the immediate dominator of vertices
+    for (unsigned j = i; Buckets[j] != i; j = Buckets[j]) {
+      typename GraphT::NodeType* V = DT.Vertex[Buckets[j]];
+      typename GraphT::NodeType* U = Eval<GraphT>(DT, V, i + 1);
+      DT.IDoms[V] = DT.Info[U].Semi < i ? U : W;
+    }
+
+    // Step #3: Calculate the semidominators of all vertices
+
+    // initialize the semi dominator to point to the parent node
+    WInfo.Semi = WInfo.Parent;
+    typedef GraphTraits<Inverse<NodeT> > InvTraits;
+    for (typename InvTraits::ChildIteratorType CI =
+         InvTraits::child_begin(W),
+         E = InvTraits::child_end(W); CI != E; ++CI) {
+      typename InvTraits::NodeType *N = *CI;
+      if (DT.Info.count(N)) {  // Only if this predecessor is reachable!
+        unsigned SemiU = DT.Info[Eval<GraphT>(DT, N, i + 1)].Semi;
+        if (SemiU < WInfo.Semi)
+          WInfo.Semi = SemiU;
+      }
+    }
+
+    // If V is a non-root vertex and sdom(V) = parent(V), then idom(V) is
+    // necessarily parent(V). In this case, set idom(V) here and avoid placing
+    // V into a bucket.
+    if (WInfo.Semi == WInfo.Parent) {
+      DT.IDoms[W] = DT.Vertex[WInfo.Parent];
+    } else {
+      Buckets[i] = Buckets[WInfo.Semi];
+      Buckets[WInfo.Semi] = i;
+    }
+  }
+
+  if (N >= 1) {
+    typename GraphT::NodeType* Root = DT.Vertex[1];
+    for (unsigned j = 1; Buckets[j] != 1; j = Buckets[j]) {
+      typename GraphT::NodeType* V = DT.Vertex[Buckets[j]];
+      DT.IDoms[V] = Root;
+    }
+  }
+
+  // Step #4: Explicitly define the immediate dominator of each vertex
+  for (unsigned i = 2; i <= N; ++i) {
+    typename GraphT::NodeType* W = DT.Vertex[i];
+    typename GraphT::NodeType*& WIDom = DT.IDoms[W];
+    if (WIDom != DT.Vertex[DT.Info[W].Semi])
+      WIDom = DT.IDoms[WIDom];
+  }
+
+  if (DT.Roots.empty()) return;
+
+  // Add a node for the root.  This node might be the actual root, if there is
+  // one exit block, or it may be the virtual exit (denoted by (BasicBlock *)0)
+  // which postdominates all real exits if there are multiple exit blocks, or
+  // an infinite loop.
+  typename GraphT::NodeType* Root = !MultipleRoots ? DT.Roots[0] : 0;
+
+  DT.DomTreeNodes[Root] = DT.RootNode =
+                        new DomTreeNodeBase<typename GraphT::NodeType>(Root, 0);
+
+  // Loop over all of the reachable blocks in the function...
+  for (unsigned i = 2; i <= N; ++i) {
+    typename GraphT::NodeType* W = DT.Vertex[i];
+
+    DomTreeNodeBase<typename GraphT::NodeType> *BBNode = DT.DomTreeNodes[W];
+    if (BBNode) continue;  // Haven't calculated this node yet?
+
+    typename GraphT::NodeType* ImmDom = DT.getIDom(W);
+
+    assert(ImmDom || DT.DomTreeNodes[NULL]);
+
+    // Get or calculate the node for the immediate dominator
+    DomTreeNodeBase<typename GraphT::NodeType> *IDomNode =
+                                                     DT.getNodeForBlock(ImmDom);
+
+    // Add a new tree node for this BasicBlock, and link it as a child of
+    // IDomNode
+    DomTreeNodeBase<typename GraphT::NodeType> *C =
+                    new DomTreeNodeBase<typename GraphT::NodeType>(W, IDomNode);
+    DT.DomTreeNodes[W] = IDomNode->addChild(C);
+  }
+
+  // Free temporary memory used to construct idom's
+  DT.IDoms.clear();
+  DT.Info.clear();
+  std::vector<typename GraphT::NodeType*>().swap(DT.Vertex);
+
+  DT.updateDFSNumbers();
+}
+
+}
+
+#endif
diff --git a/final/include/llvm/Analysis/Dominators.h b/final/include/llvm/Analysis/Dominators.h
new file mode 100644
index 00000000000..230e83d3012
--- /dev/null
+++ b/final/include/llvm/Analysis/Dominators.h
@@ -0,0 +1,876 @@
+//===- llvm/Analysis/Dominators.h - Dominator Info Calculation --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DominatorTree class, which provides fast and efficient
+// dominance queries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DOMINATORS_H
+#define LLVM_ANALYSIS_DOMINATORS_H
+
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// DominatorBase - Base class that other, more interesting dominator analyses
+/// inherit from.
+///
+template <class NodeT>
+class DominatorBase {
+protected:
+  std::vector<NodeT*> Roots;
+  const bool IsPostDominators;
+  inline explicit DominatorBase(bool isPostDom) :
+    Roots(), IsPostDominators(isPostDom) {}
+public:
+
+  /// getRoots - Return the root blocks of the current CFG.  This may include
+  /// multiple blocks if we are computing post dominators.  For forward
+  /// dominators, this will always be a single block (the entry node).
+  ///
+  inline const std::vector<NodeT*> &getRoots() const { return Roots; }
+
+  /// isPostDominator - Returns true if analysis based of postdoms
+  ///
+  bool isPostDominator() const { return IsPostDominators; }
+};
+
+
+//===----------------------------------------------------------------------===//
+// DomTreeNode - Dominator Tree Node
+template<class NodeT> class DominatorTreeBase;
+struct PostDominatorTree;
+class MachineBasicBlock;
+
+template <class NodeT>
+class DomTreeNodeBase {
+  NodeT *TheBB;
+  DomTreeNodeBase<NodeT> *IDom;
+  std::vector<DomTreeNodeBase<NodeT> *> Children;
+  int DFSNumIn, DFSNumOut;
+
+  template<class N> friend class DominatorTreeBase;
+  friend struct PostDominatorTree;
+public:
+  typedef typename std::vector<DomTreeNodeBase<NodeT> *>::iterator iterator;
+  typedef typename std::vector<DomTreeNodeBase<NodeT> *>::const_iterator
+                   const_iterator;
+
+  iterator begin()             { return Children.begin(); }
+  iterator end()               { return Children.end(); }
+  const_iterator begin() const { return Children.begin(); }
+  const_iterator end()   const { return Children.end(); }
+
+  NodeT *getBlock() const { return TheBB; }
+  DomTreeNodeBase<NodeT> *getIDom() const { return IDom; }
+  const std::vector<DomTreeNodeBase<NodeT>*> &getChildren() const {
+    return Children;
+  }
+
+  DomTreeNodeBase(NodeT *BB, DomTreeNodeBase<NodeT> *iDom)
+    : TheBB(BB), IDom(iDom), DFSNumIn(-1), DFSNumOut(-1) { }
+
+  DomTreeNodeBase<NodeT> *addChild(DomTreeNodeBase<NodeT> *C) {
+    Children.push_back(C);
+    return C;
+  }
+
+  size_t getNumChildren() const {
+    return Children.size();
+  }
+
+  void clearAllChildren() {
+    Children.clear();
+  }
+
+  bool compare(DomTreeNodeBase<NodeT> *Other) {
+    if (getNumChildren() != Other->getNumChildren())
+      return true;
+
+    SmallPtrSet<NodeT *, 4> OtherChildren;
+    for (iterator I = Other->begin(), E = Other->end(); I != E; ++I) {
+      NodeT *Nd = (*I)->getBlock();
+      OtherChildren.insert(Nd);
+    }
+
+    for (iterator I = begin(), E = end(); I != E; ++I) {
+      NodeT *N = (*I)->getBlock();
+      if (OtherChildren.count(N) == 0)
+        return true;
+    }
+    return false;
+  }
+
+  void setIDom(DomTreeNodeBase<NodeT> *NewIDom) {
+    assert(IDom && "No immediate dominator?");
+    if (IDom != NewIDom) {
+      typename std::vector<DomTreeNodeBase<NodeT>*>::iterator I =
+                  std::find(IDom->Children.begin(), IDom->Children.end(), this);
+      assert(I != IDom->Children.end() &&
+             "Not in immediate dominator children set!");
+      // I am no longer your child...
+      IDom->Children.erase(I);
+
+      // Switch to new dominator
+      IDom = NewIDom;
+      IDom->Children.push_back(this);
+    }
+  }
+
+  /// getDFSNumIn/getDFSNumOut - These are an internal implementation detail, do
+  /// not call them.
+  unsigned getDFSNumIn() const { return DFSNumIn; }
+  unsigned getDFSNumOut() const { return DFSNumOut; }
+private:
+  // Return true if this node is dominated by other. Use this only if DFS info
+  // is valid.
+  bool DominatedBy(const DomTreeNodeBase<NodeT> *other) const {
+    return this->DFSNumIn >= other->DFSNumIn &&
+      this->DFSNumOut <= other->DFSNumOut;
+  }
+};
+
+EXTERN_TEMPLATE_INSTANTIATION(class DomTreeNodeBase<BasicBlock>);
+EXTERN_TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>);
+
+template<class NodeT>
+static raw_ostream &operator<<(raw_ostream &o,
+                               const DomTreeNodeBase<NodeT> *Node) {
+  if (Node->getBlock())
+    WriteAsOperand(o, Node->getBlock(), false);
+  else
+    o << " <<exit node>>";
+
+  o << " {" << Node->getDFSNumIn() << "," << Node->getDFSNumOut() << "}";
+
+  return o << "\n";
+}
+
+template<class NodeT>
+static void PrintDomTree(const DomTreeNodeBase<NodeT> *N, raw_ostream &o,
+                         unsigned Lev) {
+  o.indent(2*Lev) << "[" << Lev << "] " << N;
+  for (typename DomTreeNodeBase<NodeT>::const_iterator I = N->begin(),
+       E = N->end(); I != E; ++I)
+    PrintDomTree<NodeT>(*I, o, Lev+1);
+}
+
+typedef DomTreeNodeBase<BasicBlock> DomTreeNode;
+
+//===----------------------------------------------------------------------===//
+/// DominatorTree - Calculate the immediate dominator tree for a function.
+///
+
+template<class FuncT, class N>
+void Calculate(DominatorTreeBase<typename GraphTraits<N>::NodeType>& DT,
+               FuncT& F);
+
+template<class NodeT>
+class DominatorTreeBase : public DominatorBase<NodeT> {
+protected:
+  typedef DenseMap<NodeT*, DomTreeNodeBase<NodeT>*> DomTreeNodeMapType;
+  DomTreeNodeMapType DomTreeNodes;
+  DomTreeNodeBase<NodeT> *RootNode;
+
+  bool DFSInfoValid;
+  unsigned int SlowQueries;
+  // Information record used during immediate dominators computation.
+  struct InfoRec {
+    unsigned DFSNum;
+    unsigned Parent;
+    unsigned Semi;
+    NodeT *Label;
+
+    InfoRec() : DFSNum(0), Parent(0), Semi(0), Label(0) {}
+  };
+
+  DenseMap<NodeT*, NodeT*> IDoms;
+
+  // Vertex - Map the DFS number to the BasicBlock*
+  std::vector<NodeT*> Vertex;
+
+  // Info - Collection of information used during the computation of idoms.
+  DenseMap<NodeT*, InfoRec> Info;
+
+  void reset() {
+    for (typename DomTreeNodeMapType::iterator I = this->DomTreeNodes.begin(),
+           E = DomTreeNodes.end(); I != E; ++I)
+      delete I->second;
+    DomTreeNodes.clear();
+    IDoms.clear();
+    this->Roots.clear();
+    Vertex.clear();
+    RootNode = 0;
+  }
+
+  // NewBB is split and now it has one successor. Update dominator tree to
+  // reflect this change.
+  template<class N, class GraphT>
+  void Split(DominatorTreeBase<typename GraphT::NodeType>& DT,
+             typename GraphT::NodeType* NewBB) {
+    assert(std::distance(GraphT::child_begin(NewBB),
+                         GraphT::child_end(NewBB)) == 1 &&
+           "NewBB should have a single successor!");
+    typename GraphT::NodeType* NewBBSucc = *GraphT::child_begin(NewBB);
+
+    std::vector<typename GraphT::NodeType*> PredBlocks;
+    typedef GraphTraits<Inverse<N> > InvTraits;
+    for (typename InvTraits::ChildIteratorType PI =
+         InvTraits::child_begin(NewBB),
+         PE = InvTraits::child_end(NewBB); PI != PE; ++PI)
+      PredBlocks.push_back(*PI);
+
+    assert(!PredBlocks.empty() && "No predblocks?");
+
+    bool NewBBDominatesNewBBSucc = true;
+    for (typename InvTraits::ChildIteratorType PI =
+         InvTraits::child_begin(NewBBSucc),
+         E = InvTraits::child_end(NewBBSucc); PI != E; ++PI) {
+      typename InvTraits::NodeType *ND = *PI;
+      if (ND != NewBB && !DT.dominates(NewBBSucc, ND) &&
+          DT.isReachableFromEntry(ND)) {
+        NewBBDominatesNewBBSucc = false;
+        break;
+      }
+    }
+
+    // Find NewBB's immediate dominator and create new dominator tree node for
+    // NewBB.
+    NodeT *NewBBIDom = 0;
+    unsigned i = 0;
+    for (i = 0; i < PredBlocks.size(); ++i)
+      if (DT.isReachableFromEntry(PredBlocks[i])) {
+        NewBBIDom = PredBlocks[i];
+        break;
+      }
+
+    // It's possible that none of the predecessors of NewBB are reachable;
+    // in that case, NewBB itself is unreachable, so nothing needs to be
+    // changed.
+    if (!NewBBIDom)
+      return;
+
+    for (i = i + 1; i < PredBlocks.size(); ++i) {
+      if (DT.isReachableFromEntry(PredBlocks[i]))
+        NewBBIDom = DT.findNearestCommonDominator(NewBBIDom, PredBlocks[i]);
+    }
+
+    // Create the new dominator tree node... and set the idom of NewBB.
+    DomTreeNodeBase<NodeT> *NewBBNode = DT.addNewBlock(NewBB, NewBBIDom);
+
+    // If NewBB strictly dominates other blocks, then it is now the immediate
+    // dominator of NewBBSucc.  Update the dominator tree as appropriate.
+    if (NewBBDominatesNewBBSucc) {
+      DomTreeNodeBase<NodeT> *NewBBSuccNode = DT.getNode(NewBBSucc);
+      DT.changeImmediateDominator(NewBBSuccNode, NewBBNode);
+    }
+  }
+
+public:
+  explicit DominatorTreeBase(bool isPostDom)
+    : DominatorBase<NodeT>(isPostDom), DFSInfoValid(false), SlowQueries(0) {}
+  virtual ~DominatorTreeBase() { reset(); }
+
+  /// compare - Return false if the other dominator tree base matches this
+  /// dominator tree base. Otherwise return true.
+  bool compare(DominatorTreeBase &Other) const {
+
+    const DomTreeNodeMapType &OtherDomTreeNodes = Other.DomTreeNodes;
+    if (DomTreeNodes.size() != OtherDomTreeNodes.size())
+      return true;
+
+    for (typename DomTreeNodeMapType::const_iterator
+           I = this->DomTreeNodes.begin(),
+           E = this->DomTreeNodes.end(); I != E; ++I) {
+      NodeT *BB = I->first;
+      typename DomTreeNodeMapType::const_iterator OI = OtherDomTreeNodes.find(BB);
+      if (OI == OtherDomTreeNodes.end())
+        return true;
+
+      DomTreeNodeBase<NodeT>* MyNd = I->second;
+      DomTreeNodeBase<NodeT>* OtherNd = OI->second;
+
+      if (MyNd->compare(OtherNd))
+        return true;
+    }
+
+    return false;
+  }
+
+  virtual void releaseMemory() { reset(); }
+
+  /// getNode - return the (Post)DominatorTree node for the specified basic
+  /// block.  This is the same as using operator[] on this class.
+  ///
+  inline DomTreeNodeBase<NodeT> *getNode(NodeT *BB) const {
+    typename DomTreeNodeMapType::const_iterator I = DomTreeNodes.find(BB);
+    return I != DomTreeNodes.end() ? I->second : 0;
+  }
+
+  /// getRootNode - This returns the entry node for the CFG of the function.  If
+  /// this tree represents the post-dominance relations for a function, however,
+  /// this root may be a node with the block == NULL.  This is the case when
+  /// there are multiple exit nodes from a particular function.  Consumers of
+  /// post-dominance information must be capable of dealing with this
+  /// possibility.
+  ///
+  DomTreeNodeBase<NodeT> *getRootNode() { return RootNode; }
+  const DomTreeNodeBase<NodeT> *getRootNode() const { return RootNode; }
+
+  /// properlyDominates - Returns true iff this dominates N and this != N.
+  /// Note that this is not a constant time operation!
+  ///
+  bool properlyDominates(const DomTreeNodeBase<NodeT> *A,
+                         const DomTreeNodeBase<NodeT> *B) const {
+    if (A == 0 || B == 0) return false;
+    return dominatedBySlowTreeWalk(A, B);
+  }
+
+  inline bool properlyDominates(const NodeT *A, const NodeT *B) {
+    if (A == B)
+      return false;
+
+    // Cast away the const qualifiers here. This is ok since
+    // this function doesn't actually return the values returned
+    // from getNode.
+    return properlyDominates(getNode(const_cast<NodeT *>(A)),
+                             getNode(const_cast<NodeT *>(B)));
+  }
+
+  bool dominatedBySlowTreeWalk(const DomTreeNodeBase<NodeT> *A,
+                               const DomTreeNodeBase<NodeT> *B) const {
+    const DomTreeNodeBase<NodeT> *IDom;
+    if (A == 0 || B == 0) return false;
+    while ((IDom = B->getIDom()) != 0 && IDom != A && IDom != B)
+      B = IDom;   // Walk up the tree
+    return IDom != 0;
+  }
+
+
+  /// isReachableFromEntry - Return true if A is dominated by the entry
+  /// block of the function containing it.
+  bool isReachableFromEntry(const NodeT* A) {
+    assert(!this->isPostDominator() &&
+           "This is not implemented for post dominators");
+    return dominates(&A->getParent()->front(), A);
+  }
+
+  /// dominates - Returns true iff A dominates B.  Note that this is not a
+  /// constant time operation!
+  ///
+  inline bool dominates(const DomTreeNodeBase<NodeT> *A,
+                        const DomTreeNodeBase<NodeT> *B) {
+    if (B == A)
+      return true;  // A node trivially dominates itself.
+
+    if (A == 0 || B == 0)
+      return false;
+
+    // Compare the result of the tree walk and the dfs numbers, if expensive
+    // checks are enabled.
+#ifdef XDEBUG
+    assert((!DFSInfoValid ||
+            (dominatedBySlowTreeWalk(A, B) == B->DominatedBy(A))) &&
+           "Tree walk disagrees with dfs numbers!");
+#endif
+
+    if (DFSInfoValid)
+      return B->DominatedBy(A);
+
+    // If we end up with too many slow queries, just update the
+    // DFS numbers on the theory that we are going to keep querying.
+    SlowQueries++;
+    if (SlowQueries > 32) {
+      updateDFSNumbers();
+      return B->DominatedBy(A);
+    }
+
+    return dominatedBySlowTreeWalk(A, B);
+  }
+
+  inline bool dominates(const NodeT *A, const NodeT *B) {
+    if (A == B)
+      return true;
+
+    // Cast away the const qualifiers here. This is ok since
+    // this function doesn't actually return the values returned
+    // from getNode.
+    return dominates(getNode(const_cast<NodeT *>(A)),
+                     getNode(const_cast<NodeT *>(B)));
+  }
+
+  NodeT *getRoot() const {
+    assert(this->Roots.size() == 1 && "Should always have entry node!");
+    return this->Roots[0];
+  }
+
+  /// findNearestCommonDominator - Find nearest common dominator basic block
+  /// for basic block A and B. If there is no such block then return NULL.
+  NodeT *findNearestCommonDominator(NodeT *A, NodeT *B) {
+    assert(A->getParent() == B->getParent() &&
+           "Two blocks are not in same function");
+
+    // If either A or B is a entry block then it is nearest common dominator
+    // (for forward-dominators).
+    if (!this->isPostDominator()) {
+      NodeT &Entry = A->getParent()->front();
+      if (A == &Entry || B == &Entry)
+        return &Entry;
+    }
+
+    // If B dominates A then B is nearest common dominator.
+    if (dominates(B, A))
+      return B;
+
+    // If A dominates B then A is nearest common dominator.
+    if (dominates(A, B))
+      return A;
+
+    DomTreeNodeBase<NodeT> *NodeA = getNode(A);
+    DomTreeNodeBase<NodeT> *NodeB = getNode(B);
+
+    // Collect NodeA dominators set.
+    SmallPtrSet<DomTreeNodeBase<NodeT>*, 16> NodeADoms;
+    NodeADoms.insert(NodeA);
+    DomTreeNodeBase<NodeT> *IDomA = NodeA->getIDom();
+    while (IDomA) {
+      NodeADoms.insert(IDomA);
+      IDomA = IDomA->getIDom();
+    }
+
+    // Walk NodeB immediate dominators chain and find common dominator node.
+    DomTreeNodeBase<NodeT> *IDomB = NodeB->getIDom();
+    while (IDomB) {
+      if (NodeADoms.count(IDomB) != 0)
+        return IDomB->getBlock();
+
+      IDomB = IDomB->getIDom();
+    }
+
+    return NULL;
+  }
+
+  const NodeT *findNearestCommonDominator(const NodeT *A, const NodeT *B) {
+    // Cast away the const qualifiers here. This is ok since
+    // const is re-introduced on the return type.
+    return findNearestCommonDominator(const_cast<NodeT *>(A),
+                                      const_cast<NodeT *>(B));
+  }
+
+  //===--------------------------------------------------------------------===//
+  // API to update (Post)DominatorTree information based on modifications to
+  // the CFG...
+
+  /// addNewBlock - Add a new node to the dominator tree information.  This
+  /// creates a new node as a child of DomBB dominator node,linking it into
+  /// the children list of the immediate dominator.
+  DomTreeNodeBase<NodeT> *addNewBlock(NodeT *BB, NodeT *DomBB) {
+    assert(getNode(BB) == 0 && "Block already in dominator tree!");
+    DomTreeNodeBase<NodeT> *IDomNode = getNode(DomBB);
+    assert(IDomNode && "Not immediate dominator specified for block!");
+    DFSInfoValid = false;
+    return DomTreeNodes[BB] =
+      IDomNode->addChild(new DomTreeNodeBase<NodeT>(BB, IDomNode));
+  }
+
+  /// changeImmediateDominator - This method is used to update the dominator
+  /// tree information when a node's immediate dominator changes.
+  ///
+  void changeImmediateDominator(DomTreeNodeBase<NodeT> *N,
+                                DomTreeNodeBase<NodeT> *NewIDom) {
+    assert(N && NewIDom && "Cannot change null node pointers!");
+    DFSInfoValid = false;
+    N->setIDom(NewIDom);
+  }
+
+  void changeImmediateDominator(NodeT *BB, NodeT *NewBB) {
+    changeImmediateDominator(getNode(BB), getNode(NewBB));
+  }
+
+  /// eraseNode - Removes a node from the dominator tree. Block must not
+  /// dominate any other blocks. Removes node from its immediate dominator's
+  /// children list. Deletes dominator node associated with basic block BB.
+  void eraseNode(NodeT *BB) {
+    DomTreeNodeBase<NodeT> *Node = getNode(BB);
+    assert(Node && "Removing node that isn't in dominator tree.");
+    assert(Node->getChildren().empty() && "Node is not a leaf node.");
+
+      // Remove node from immediate dominator's children list.
+    DomTreeNodeBase<NodeT> *IDom = Node->getIDom();
+    if (IDom) {
+      typename std::vector<DomTreeNodeBase<NodeT>*>::iterator I =
+        std::find(IDom->Children.begin(), IDom->Children.end(), Node);
+      assert(I != IDom->Children.end() &&
+             "Not in immediate dominator children set!");
+      // I am no longer your child...
+      IDom->Children.erase(I);
+    }
+
+    DomTreeNodes.erase(BB);
+    delete Node;
+  }
+
+  /// removeNode - Removes a node from the dominator tree.  Block must not
+  /// dominate any other blocks.  Invalidates any node pointing to removed
+  /// block.
+  void removeNode(NodeT *BB) {
+    assert(getNode(BB) && "Removing node that isn't in dominator tree.");
+    DomTreeNodes.erase(BB);
+  }
+
+  /// splitBlock - BB is split and now it has one successor. Update dominator
+  /// tree to reflect this change.
+  void splitBlock(NodeT* NewBB) {
+    if (this->IsPostDominators)
+      this->Split<Inverse<NodeT*>, GraphTraits<Inverse<NodeT*> > >(*this, NewBB);
+    else
+      this->Split<NodeT*, GraphTraits<NodeT*> >(*this, NewBB);
+  }
+
+  /// print - Convert to human readable form
+  ///
+  void print(raw_ostream &o) const {
+    o << "=============================--------------------------------\n";
+    if (this->isPostDominator())
+      o << "Inorder PostDominator Tree: ";
+    else
+      o << "Inorder Dominator Tree: ";
+    if (!this->DFSInfoValid)
+      o << "DFSNumbers invalid: " << SlowQueries << " slow queries.";
+    o << "\n";
+
+    // The postdom tree can have a null root if there are no returns.
+    if (getRootNode())
+      PrintDomTree<NodeT>(getRootNode(), o, 1);
+  }
+
+protected:
+  template<class GraphT>
+  friend typename GraphT::NodeType* Eval(
+                               DominatorTreeBase<typename GraphT::NodeType>& DT,
+                                         typename GraphT::NodeType* V,
+                                         unsigned LastLinked);
+
+  template<class GraphT>
+  friend unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType>& DT,
+                          typename GraphT::NodeType* V,
+                          unsigned N);
+
+  template<class FuncT, class N>
+  friend void Calculate(DominatorTreeBase<typename GraphTraits<N>::NodeType>& DT,
+                        FuncT& F);
+
+  /// updateDFSNumbers - Assign In and Out numbers to the nodes while walking
+  /// dominator tree in dfs order.
+  void updateDFSNumbers() {
+    unsigned DFSNum = 0;
+
+    SmallVector<std::pair<DomTreeNodeBase<NodeT>*,
+                typename DomTreeNodeBase<NodeT>::iterator>, 32> WorkStack;
+
+    DomTreeNodeBase<NodeT> *ThisRoot = getRootNode();
+
+    if (!ThisRoot)
+      return;
+
+    // Even in the case of multiple exits that form the post dominator root
+    // nodes, do not iterate over all exits, but start from the virtual root
+    // node. Otherwise bbs, that are not post dominated by any exit but by the
+    // virtual root node, will never be assigned a DFS number.
+    WorkStack.push_back(std::make_pair(ThisRoot, ThisRoot->begin()));
+    ThisRoot->DFSNumIn = DFSNum++;
+
+    while (!WorkStack.empty()) {
+      DomTreeNodeBase<NodeT> *Node = WorkStack.back().first;
+      typename DomTreeNodeBase<NodeT>::iterator ChildIt =
+        WorkStack.back().second;
+
+      // If we visited all of the children of this node, "recurse" back up the
+      // stack setting the DFOutNum.
+      if (ChildIt == Node->end()) {
+        Node->DFSNumOut = DFSNum++;
+        WorkStack.pop_back();
+      } else {
+        // Otherwise, recursively visit this child.
+        DomTreeNodeBase<NodeT> *Child = *ChildIt;
+        ++WorkStack.back().second;
+
+        WorkStack.push_back(std::make_pair(Child, Child->begin()));
+        Child->DFSNumIn = DFSNum++;
+      }
+    }
+
+    SlowQueries = 0;
+    DFSInfoValid = true;
+  }
+
+  DomTreeNodeBase<NodeT> *getNodeForBlock(NodeT *BB) {
+    typename DomTreeNodeMapType::iterator I = this->DomTreeNodes.find(BB);
+    if (I != this->DomTreeNodes.end() && I->second)
+      return I->second;
+
+    // Haven't calculated this node yet?  Get or calculate the node for the
+    // immediate dominator.
+    NodeT *IDom = getIDom(BB);
+
+    assert(IDom || this->DomTreeNodes[NULL]);
+    DomTreeNodeBase<NodeT> *IDomNode = getNodeForBlock(IDom);
+
+    // Add a new tree node for this BasicBlock, and link it as a child of
+    // IDomNode
+    DomTreeNodeBase<NodeT> *C = new DomTreeNodeBase<NodeT>(BB, IDomNode);
+    return this->DomTreeNodes[BB] = IDomNode->addChild(C);
+  }
+
+  inline NodeT *getIDom(NodeT *BB) const {
+    typename DenseMap<NodeT*, NodeT*>::const_iterator I = IDoms.find(BB);
+    return I != IDoms.end() ? I->second : 0;
+  }
+
+  inline void addRoot(NodeT* BB) {
+    this->Roots.push_back(BB);
+  }
+
+public:
+  /// recalculate - compute a dominator tree for the given function
+  template<class FT>
+  void recalculate(FT& F) {
+    reset();
+    this->Vertex.push_back(0);
+
+    if (!this->IsPostDominators) {
+      // Initialize root
+      this->Roots.push_back(&F.front());
+      this->IDoms[&F.front()] = 0;
+      this->DomTreeNodes[&F.front()] = 0;
+
+      Calculate<FT, NodeT*>(*this, F);
+    } else {
+      // Initialize the roots list
+      for (typename FT::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+        if (std::distance(GraphTraits<FT*>::child_begin(I),
+                          GraphTraits<FT*>::child_end(I)) == 0)
+          addRoot(I);
+
+        // Prepopulate maps so that we don't get iterator invalidation issues later.
+        this->IDoms[I] = 0;
+        this->DomTreeNodes[I] = 0;
+      }
+
+      Calculate<FT, Inverse<NodeT*> >(*this, F);
+    }
+  }
+};
+
+EXTERN_TEMPLATE_INSTANTIATION(class DominatorTreeBase<BasicBlock>);
+
+//===-------------------------------------
+/// DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to
+/// compute a normal dominator tree.
+///
+class DominatorTree : public FunctionPass {
+public:
+  static char ID; // Pass ID, replacement for typeid
+  DominatorTreeBase<BasicBlock>* DT;
+
+  DominatorTree() : FunctionPass(ID) {
+    initializeDominatorTreePass(*PassRegistry::getPassRegistry());
+    DT = new DominatorTreeBase<BasicBlock>(false);
+  }
+
+  ~DominatorTree() {
+    delete DT;
+  }
+
+  DominatorTreeBase<BasicBlock>& getBase() { return *DT; }
+
+  /// getRoots - Return the root blocks of the current CFG.  This may include
+  /// multiple blocks if we are computing post dominators.  For forward
+  /// dominators, this will always be a single block (the entry node).
+  ///
+  inline const std::vector<BasicBlock*> &getRoots() const {
+    return DT->getRoots();
+  }
+
+  inline BasicBlock *getRoot() const {
+    return DT->getRoot();
+  }
+
+  inline DomTreeNode *getRootNode() const {
+    return DT->getRootNode();
+  }
+
+  /// compare - Return false if the other dominator tree matches this
+  /// dominator tree. Otherwise return true.
+  inline bool compare(DominatorTree &Other) const {
+    DomTreeNode *R = getRootNode();
+    DomTreeNode *OtherR = Other.getRootNode();
+
+    if (!R || !OtherR || R->getBlock() != OtherR->getBlock())
+      return true;
+
+    if (DT->compare(Other.getBase()))
+      return true;
+
+    return false;
+  }
+
+  virtual bool runOnFunction(Function &F);
+
+  virtual void verifyAnalysis() const;
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+
+  inline bool dominates(const DomTreeNode* A, const DomTreeNode* B) const {
+    return DT->dominates(A, B);
+  }
+
+  inline bool dominates(const BasicBlock* A, const BasicBlock* B) const {
+    return DT->dominates(A, B);
+  }
+
+  // dominates - Return true if A dominates B. This performs the
+  // special checks necessary if A and B are in the same basic block.
+  bool dominates(const Instruction *A, const Instruction *B) const;
+
+  bool properlyDominates(const DomTreeNode *A, const DomTreeNode *B) const {
+    return DT->properlyDominates(A, B);
+  }
+
+  bool properlyDominates(const BasicBlock *A, const BasicBlock *B) const {
+    return DT->properlyDominates(A, B);
+  }
+
+  /// findNearestCommonDominator - Find nearest common dominator basic block
+  /// for basic block A and B. If there is no such block then return NULL.
+  inline BasicBlock *findNearestCommonDominator(BasicBlock *A, BasicBlock *B) {
+    return DT->findNearestCommonDominator(A, B);
+  }
+
+  inline const BasicBlock *findNearestCommonDominator(const BasicBlock *A,
+                                                      const BasicBlock *B) {
+    return DT->findNearestCommonDominator(A, B);
+  }
+
+  inline DomTreeNode *operator[](BasicBlock *BB) const {
+    return DT->getNode(BB);
+  }
+
+  /// getNode - return the (Post)DominatorTree node for the specified basic
+  /// block.  This is the same as using operator[] on this class.
+  ///
+  inline DomTreeNode *getNode(BasicBlock *BB) const {
+    return DT->getNode(BB);
+  }
+
+  /// addNewBlock - Add a new node to the dominator tree information.  This
+  /// creates a new node as a child of DomBB dominator node,linking it into
+  /// the children list of the immediate dominator.
+  inline DomTreeNode *addNewBlock(BasicBlock *BB, BasicBlock *DomBB) {
+    return DT->addNewBlock(BB, DomBB);
+  }
+
+  /// changeImmediateDominator - This method is used to update the dominator
+  /// tree information when a node's immediate dominator changes.
+  ///
+  inline void changeImmediateDominator(BasicBlock *N, BasicBlock* NewIDom) {
+    DT->changeImmediateDominator(N, NewIDom);
+  }
+
+  inline void changeImmediateDominator(DomTreeNode *N, DomTreeNode* NewIDom) {
+    DT->changeImmediateDominator(N, NewIDom);
+  }
+
+  /// eraseNode - Removes a node from the dominator tree. Block must not
+  /// dominate any other blocks. Removes node from its immediate dominator's
+  /// children list. Deletes dominator node associated with basic block BB.
+  inline void eraseNode(BasicBlock *BB) {
+    DT->eraseNode(BB);
+  }
+
+  /// splitBlock - BB is split and now it has one successor. Update dominator
+  /// tree to reflect this change.
+  inline void splitBlock(BasicBlock* NewBB) {
+    DT->splitBlock(NewBB);
+  }
+
+  bool isReachableFromEntry(const BasicBlock* A) {
+    return DT->isReachableFromEntry(A);
+  }
+
+
+  virtual void releaseMemory() {
+    DT->releaseMemory();
+  }
+
+  virtual void print(raw_ostream &OS, const Module* M= 0) const;
+};
+
+//===-------------------------------------
+/// DominatorTree GraphTraits specialization so the DominatorTree can be
+/// iterable by generic graph iterators.
+///
+template <> struct GraphTraits<DomTreeNode*> {
+  typedef DomTreeNode NodeType;
+  typedef NodeType::iterator  ChildIteratorType;
+
+  static NodeType *getEntryNode(NodeType *N) {
+    return N;
+  }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->begin();
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->end();
+  }
+
+  typedef df_iterator<DomTreeNode*> nodes_iterator;
+
+  static nodes_iterator nodes_begin(DomTreeNode *N) {
+    return df_begin(getEntryNode(N));
+  }
+
+  static nodes_iterator nodes_end(DomTreeNode *N) {
+    return df_end(getEntryNode(N));
+  }
+};
+
+template <> struct GraphTraits<DominatorTree*>
+  : public GraphTraits<DomTreeNode*> {
+  static NodeType *getEntryNode(DominatorTree *DT) {
+    return DT->getRootNode();
+  }
+
+  static nodes_iterator nodes_begin(DominatorTree *N) {
+    return df_begin(getEntryNode(N));
+  }
+
+  static nodes_iterator nodes_end(DominatorTree *N) {
+    return df_end(getEntryNode(N));
+  }
+};
+
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/FindUsedTypes.h b/final/include/llvm/Analysis/FindUsedTypes.h
new file mode 100644
index 00000000000..fc57e1a0469
--- /dev/null
+++ b/final/include/llvm/Analysis/FindUsedTypes.h
@@ -0,0 +1,66 @@
+//===- llvm/Analysis/FindUsedTypes.h - Find all Types in use ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to seek out all of the types in use by the program.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_FINDUSEDTYPES_H
+#define LLVM_ANALYSIS_FINDUSEDTYPES_H
+
+#include "llvm/Pass.h"
+#include <set>
+
+namespace llvm {
+
+class Type;
+class Value;
+
+class FindUsedTypes : public ModulePass {
+  std::set<const Type *> UsedTypes;
+public:
+  static char ID; // Pass identification, replacement for typeid
+  FindUsedTypes() : ModulePass(ID) {
+    initializeFindUsedTypesPass(*PassRegistry::getPassRegistry());
+  }
+
+  /// getTypes - After the pass has been run, return the set containing all of
+  /// the types used in the module.
+  ///
+  const std::set<const Type *> &getTypes() const { return UsedTypes; }
+
+  /// Print the types found in the module.  If the optional Module parameter is
+  /// passed in, then the types are printed symbolically if possible, using the
+  /// symbol table from the module.
+  ///
+  void print(raw_ostream &o, const Module *M) const;
+
+private:
+  /// IncorporateType - Incorporate one type and all of its subtypes into the
+  /// collection of used types.
+  ///
+  void IncorporateType(const Type *Ty);
+
+  /// IncorporateValue - Incorporate all of the types used by this value.
+  ///
+  void IncorporateValue(const Value *V);
+
+public:
+  /// run - This incorporates all types used by the specified module
+  bool runOnModule(Module &M);
+
+  /// getAnalysisUsage - We do not modify anything.
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/IVUsers.h b/final/include/llvm/Analysis/IVUsers.h
new file mode 100644
index 00000000000..578e6aba833
--- /dev/null
+++ b/final/include/llvm/Analysis/IVUsers.h
@@ -0,0 +1,175 @@
+//===- llvm/Analysis/IVUsers.h - Induction Variable Users -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bookkeeping for "interesting" users of expressions
+// computed from induction variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_IVUSERS_H
+#define LLVM_ANALYSIS_IVUSERS_H
+
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
+#include "llvm/Support/ValueHandle.h"
+
+namespace llvm {
+
+class DominatorTree;
+class Instruction;
+class Value;
+class IVUsers;
+class ScalarEvolution;
+class SCEV;
+class IVUsers;
+
+/// IVStrideUse - Keep track of one use of a strided induction variable.
+/// The Expr member keeps track of the expression, User is the actual user
+/// instruction of the operand, and 'OperandValToReplace' is the operand of
+/// the User that is the use.
+class IVStrideUse : public CallbackVH, public ilist_node<IVStrideUse> {
+  friend class IVUsers;
+public:
+  IVStrideUse(IVUsers *P, Instruction* U, Value *O)
+    : CallbackVH(U), Parent(P), OperandValToReplace(O) {
+  }
+
+  /// getUser - Return the user instruction for this use.
+  Instruction *getUser() const {
+    return cast<Instruction>(getValPtr());
+  }
+
+  /// setUser - Assign a new user instruction for this use.
+  void setUser(Instruction *NewUser) {
+    setValPtr(NewUser);
+  }
+
+  /// getOperandValToReplace - Return the Value of the operand in the user
+  /// instruction that this IVStrideUse is representing.
+  Value *getOperandValToReplace() const {
+    return OperandValToReplace;
+  }
+
+  /// setOperandValToReplace - Assign a new Value as the operand value
+  /// to replace.
+  void setOperandValToReplace(Value *Op) {
+    OperandValToReplace = Op;
+  }
+
+  /// getPostIncLoops - Return the set of loops for which the expression has
+  /// been adjusted to use post-inc mode.
+  const PostIncLoopSet &getPostIncLoops() const {
+    return PostIncLoops;
+  }
+
+  /// transformToPostInc - Transform the expression to post-inc form for the
+  /// given loop.
+  void transformToPostInc(const Loop *L);
+
+private:
+  /// Parent - a pointer to the IVUsers that owns this IVStrideUse.
+  IVUsers *Parent;
+
+  /// OperandValToReplace - The Value of the operand in the user instruction
+  /// that this IVStrideUse is representing.
+  WeakVH OperandValToReplace;
+
+  /// PostIncLoops - The set of loops for which Expr has been adjusted to
+  /// use post-inc mode. This corresponds with SCEVExpander's post-inc concept.
+  PostIncLoopSet PostIncLoops;
+
+  /// Deleted - Implementation of CallbackVH virtual function to
+  /// receive notification when the User is deleted.
+  virtual void deleted();
+};
+
+template<> struct ilist_traits<IVStrideUse>
+  : public ilist_default_traits<IVStrideUse> {
+  // createSentinel is used to get hold of a node that marks the end of
+  // the list...
+  // The sentinel is relative to this instance, so we use a non-static
+  // method.
+  IVStrideUse *createSentinel() const {
+    // since i(p)lists always publicly derive from the corresponding
+    // traits, placing a data member in this class will augment i(p)list.
+    // But since the NodeTy is expected to publicly derive from
+    // ilist_node<NodeTy>, there is a legal viable downcast from it
+    // to NodeTy. We use this trick to superpose i(p)list with a "ghostly"
+    // NodeTy, which becomes the sentinel. Dereferencing the sentinel is
+    // forbidden (save the ilist_node<NodeTy>) so no one will ever notice
+    // the superposition.
+    return static_cast<IVStrideUse*>(&Sentinel);
+  }
+  static void destroySentinel(IVStrideUse*) {}
+
+  IVStrideUse *provideInitialHead() const { return createSentinel(); }
+  IVStrideUse *ensureHead(IVStrideUse*) const { return createSentinel(); }
+  static void noteHead(IVStrideUse*, IVStrideUse*) {}
+
+private:
+  mutable ilist_node<IVStrideUse> Sentinel;
+};
+
+class IVUsers : public LoopPass {
+  friend class IVStrideUse;
+  Loop *L;
+  LoopInfo *LI;
+  DominatorTree *DT;
+  ScalarEvolution *SE;
+  SmallPtrSet<Instruction*,16> Processed;
+
+  /// IVUses - A list of all tracked IV uses of induction variable expressions
+  /// we are interested in.
+  ilist<IVStrideUse> IVUses;
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+  virtual void releaseMemory();
+
+public:
+  static char ID; // Pass ID, replacement for typeid
+  IVUsers();
+
+  /// AddUsersIfInteresting - Inspect the specified Instruction.  If it is a
+  /// reducible SCEV, recursively add its users to the IVUsesByStride set and
+  /// return true.  Otherwise, return false.
+  bool AddUsersIfInteresting(Instruction *I);
+
+  IVStrideUse &AddUser(Instruction *User, Value *Operand);
+
+  /// getReplacementExpr - Return a SCEV expression which computes the
+  /// value of the OperandValToReplace of the given IVStrideUse.
+  const SCEV *getReplacementExpr(const IVStrideUse &IU) const;
+
+  /// getExpr - Return the expression for the use.
+  const SCEV *getExpr(const IVStrideUse &IU) const;
+
+  const SCEV *getStride(const IVStrideUse &IU, const Loop *L) const;
+
+  typedef ilist<IVStrideUse>::iterator iterator;
+  typedef ilist<IVStrideUse>::const_iterator const_iterator;
+  iterator begin() { return IVUses.begin(); }
+  iterator end()   { return IVUses.end(); }
+  const_iterator begin() const { return IVUses.begin(); }
+  const_iterator end() const   { return IVUses.end(); }
+  bool empty() const { return IVUses.empty(); }
+
+  void print(raw_ostream &OS, const Module* = 0) const;
+
+  /// dump - This method is used for debugging.
+  void dump() const;
+};
+
+Pass *createIVUsersPass();
+
+}
+
+#endif
diff --git a/final/include/llvm/Analysis/InlineCost.h b/final/include/llvm/Analysis/InlineCost.h
new file mode 100644
index 00000000000..b08bf57ace9
--- /dev/null
+++ b/final/include/llvm/Analysis/InlineCost.h
@@ -0,0 +1,182 @@
+//===- InlineCost.h - Cost analysis for inliner -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements heuristics for inlining decisions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_INLINECOST_H
+#define LLVM_ANALYSIS_INLINECOST_H
+
+#include <cassert>
+#include <climits>
+#include <vector>
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/Analysis/CodeMetrics.h"
+
+namespace llvm {
+
+  class Value;
+  class Function;
+  class BasicBlock;
+  class CallSite;
+  template<class PtrType, unsigned SmallSize>
+  class SmallPtrSet;
+
+  namespace InlineConstants {
+    // Various magic constants used to adjust heuristics.
+    const int InstrCost = 5;
+    const int IndirectCallBonus = -100;
+    const int CallPenalty = 25;
+    const int LastCallToStaticBonus = -15000;
+    const int ColdccPenalty = 2000;
+    const int NoreturnPenalty = 10000;
+  }
+
+  /// InlineCost - Represent the cost of inlining a function. This
+  /// supports special values for functions which should "always" or
+  /// "never" be inlined. Otherwise, the cost represents a unitless
+  /// amount; smaller values increase the likelyhood of the function
+  /// being inlined.
+  class InlineCost {
+    enum Kind {
+      Value,
+      Always,
+      Never
+    };
+
+    // This is a do-it-yourself implementation of
+    //   int Cost : 30;
+    //   unsigned Type : 2;
+    // We used to use bitfields, but they were sometimes miscompiled (PR3822).
+    enum { TYPE_BITS = 2 };
+    enum { COST_BITS = unsigned(sizeof(unsigned)) * CHAR_BIT - TYPE_BITS };
+    unsigned TypedCost; // int Cost : COST_BITS; unsigned Type : TYPE_BITS;
+
+    Kind getType() const {
+      return Kind(TypedCost >> COST_BITS);
+    }
+
+    int getCost() const {
+      // Sign-extend the bottom COST_BITS bits.
+      return (int(TypedCost << TYPE_BITS)) >> TYPE_BITS;
+    }
+
+    InlineCost(int C, int T) {
+      TypedCost = (unsigned(C << TYPE_BITS) >> TYPE_BITS) | (T << COST_BITS);
+      assert(getCost() == C && "Cost exceeds InlineCost precision");
+    }
+  public:
+    static InlineCost get(int Cost) { return InlineCost(Cost, Value); }
+    static InlineCost getAlways() { return InlineCost(0, Always); }
+    static InlineCost getNever() { return InlineCost(0, Never); }
+
+    bool isVariable() const { return getType() == Value; }
+    bool isAlways() const { return getType() == Always; }
+    bool isNever() const { return getType() == Never; }
+
+    /// getValue() - Return a "variable" inline cost's amount. It is
+    /// an error to call this on an "always" or "never" InlineCost.
+    int getValue() const {
+      assert(getType() == Value && "Invalid access of InlineCost");
+      return getCost();
+    }
+  };
+
+  /// InlineCostAnalyzer - Cost analyzer used by inliner.
+  class InlineCostAnalyzer {
+    struct ArgInfo {
+    public:
+      unsigned ConstantWeight;
+      unsigned AllocaWeight;
+
+      ArgInfo(unsigned CWeight, unsigned AWeight)
+        : ConstantWeight(CWeight), AllocaWeight(AWeight)
+          {}
+    };
+
+    struct FunctionInfo {
+      CodeMetrics Metrics;
+
+      /// ArgumentWeights - Each formal argument of the function is inspected to
+      /// see if it is used in any contexts where making it a constant or alloca
+      /// would reduce the code size.  If so, we add some value to the argument
+      /// entry here.
+      std::vector<ArgInfo> ArgumentWeights;
+
+      /// analyzeFunction - Add information about the specified function
+      /// to the current structure.
+      void analyzeFunction(Function *F);
+
+      /// NeverInline - Returns true if the function should never be
+      /// inlined into any caller.
+      bool NeverInline();
+    };
+
+    // The Function* for a function can be changed (by ArgumentPromotion);
+    // the ValueMap will update itself when this happens.
+    ValueMap<const Function *, FunctionInfo> CachedFunctionInfo;
+
+    int CountBonusForConstant(Value *V, Constant *C = NULL);
+    int ConstantFunctionBonus(CallSite CS, Constant *C);
+    int getInlineSize(CallSite CS, Function *Callee);
+    int getInlineBonuses(CallSite CS, Function *Callee);
+  public:
+
+    /// getInlineCost - The heuristic used to determine if we should inline the
+    /// function call or not.
+    ///
+    InlineCost getInlineCost(CallSite CS,
+                             SmallPtrSet<const Function *, 16> &NeverInline);
+    /// getCalledFunction - The heuristic used to determine if we should inline
+    /// the function call or not.  The callee is explicitly specified, to allow
+    /// you to calculate the cost of inlining a function via a pointer.  The
+    /// result assumes that the inlined version will always be used.  You should
+    /// weight it yourself in cases where this callee will not always be called.
+    InlineCost getInlineCost(CallSite CS,
+                             Function *Callee,
+                             SmallPtrSet<const Function *, 16> &NeverInline);
+
+    /// getSpecializationBonus - The heuristic used to determine the per-call
+    /// performance boost for using a specialization of Callee with argument
+    /// SpecializedArgNos replaced by a constant.
+    int getSpecializationBonus(Function *Callee,
+             SmallVectorImpl<unsigned> &SpecializedArgNo);
+
+    /// getSpecializationCost - The heuristic used to determine the code-size
+    /// impact of creating a specialized version of Callee with argument
+    /// SpecializedArgNo replaced by a constant.
+    InlineCost getSpecializationCost(Function *Callee,
+               SmallVectorImpl<unsigned> &SpecializedArgNo);
+
+    /// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+    /// higher threshold to determine if the function call should be inlined.
+    float getInlineFudgeFactor(CallSite CS);
+
+    /// resetCachedFunctionInfo - erase any cached cost info for this function.
+    void resetCachedCostInfo(Function* Caller) {
+      CachedFunctionInfo[Caller] = FunctionInfo();
+    }
+
+    /// growCachedCostInfo - update the cached cost info for Caller after Callee
+    /// has been inlined. If Callee is NULL it means a dead call has been
+    /// eliminated.
+    void growCachedCostInfo(Function* Caller, Function* Callee);
+
+    /// clear - empty the cache of inline costs
+    void clear();
+  };
+
+  /// callIsSmall - If a call is likely to lower to a single target instruction,
+  /// or is otherwise deemed small return true.
+  bool callIsSmall(const Function *Callee);
+}
+
+#endif
diff --git a/final/include/llvm/Analysis/InstructionSimplify.h b/final/include/llvm/Analysis/InstructionSimplify.h
new file mode 100644
index 00000000000..dff1ba2f7be
--- /dev/null
+++ b/final/include/llvm/Analysis/InstructionSimplify.h
@@ -0,0 +1,143 @@
+//===-- InstructionSimplify.h - Fold instructions into simpler forms ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares routines for folding instructions into simpler forms
+// that do not require creating new instructions.  This does constant folding
+// ("add i32 1, 1" -> "2") but can also handle non-constant operands, either
+// returning a constant ("and i32 %x, 0" -> "0") or an already existing value
+// ("and i32 %x, %x" -> "%x").  If the simplification is also an instruction
+// then it dominates the original instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
+#define LLVM_ANALYSIS_INSTRUCTIONSIMPLIFY_H
+
+namespace llvm {
+  class DominatorTree;
+  class Instruction;
+  class Value;
+  class TargetData;
+
+  /// SimplifyAddInst - Given operands for an Add, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
+                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+
+  /// SimplifySubInst - Given operands for a Sub, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
+                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+
+  /// SimplifyMulInst - Given operands for a Mul, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyMulInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                         const DominatorTree *DT = 0);
+
+  /// SimplifySDivInst - Given operands for an SDiv, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifySDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const DominatorTree *DT = 0);
+
+  /// SimplifyUDivInst - Given operands for a UDiv, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyUDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const DominatorTree *DT = 0);
+
+  /// SimplifyFDivInst - Given operands for an FDiv, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyFDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                          const DominatorTree *DT = 0);
+
+  /// SimplifyShlInst - Given operands for a Shl, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+
+  /// SimplifyLShrInst - Given operands for a LShr, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+                          const TargetData *TD = 0, const DominatorTree *DT=0);
+
+  /// SimplifyAShrInst - Given operands for a AShr, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+                          const TargetData *TD = 0,
+                          const DominatorTree *DT = 0);
+
+  /// SimplifyAndInst - Given operands for an And, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyAndInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                         const DominatorTree *DT = 0);
+
+  /// SimplifyOrInst - Given operands for an Or, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyOrInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                        const DominatorTree *DT = 0);
+
+  /// SimplifyXorInst - Given operands for a Xor, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyXorInst(Value *LHS, Value *RHS, const TargetData *TD = 0,
+                         const DominatorTree *DT = 0);
+
+  /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                          const TargetData *TD = 0,
+                          const DominatorTree *DT = 0);
+
+  /// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                          const TargetData *TD = 0,
+                          const DominatorTree *DT = 0);
+
+  /// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
+  /// the result.  If not, this returns null.
+  Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
+                            const TargetData *TD = 0,
+                            const DominatorTree *DT = 0);
+
+  /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyGEPInst(Value * const *Ops, unsigned NumOps,
+                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+
+  //=== Helper functions for higher up the class hierarchy.
+
+
+  /// SimplifyCmpInst - Given operands for a CmpInst, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                         const TargetData *TD = 0, const DominatorTree *DT = 0);
+
+  /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
+  /// fold the result.  If not, this returns null.
+  Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                       const TargetData *TD = 0, const DominatorTree *DT = 0);
+
+  /// SimplifyInstruction - See if we can compute a simplified version of this
+  /// instruction.  If not, this returns null.
+  Value *SimplifyInstruction(Instruction *I, const TargetData *TD = 0,
+                             const DominatorTree *DT = 0);
+
+
+  /// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then
+  /// delete the From instruction.  In addition to a basic RAUW, this does a
+  /// recursive simplification of the updated instructions.  This catches
+  /// things where one simplification exposes other opportunities.  This only
+  /// simplifies and deletes scalar operations, it does not change the CFG.
+  ///
+  void ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
+                                 const TargetData *TD = 0,
+                                 const DominatorTree *DT = 0);
+} // end namespace llvm
+
+#endif
+
diff --git a/final/include/llvm/Analysis/Interval.h b/final/include/llvm/Analysis/Interval.h
new file mode 100644
index 00000000000..ca8ad73131a
--- /dev/null
+++ b/final/include/llvm/Analysis/Interval.h
@@ -0,0 +1,153 @@
+//===- llvm/Analysis/Interval.h - Interval Class Declaration ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the Interval class, which
+// represents a set of CFG nodes and is a portion of an interval partition.
+//
+// Intervals have some interesting and useful properties, including the
+// following:
+//    1. The header node of an interval dominates all of the elements of the
+//       interval
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INTERVAL_H
+#define LLVM_INTERVAL_H
+
+#include "llvm/ADT/GraphTraits.h"
+#include <vector>
+
+namespace llvm {
+
+class BasicBlock;
+class raw_ostream;
+
+//===----------------------------------------------------------------------===//
+//
+/// Interval Class - An Interval is a set of nodes defined such that every node
+/// in the interval has all of its predecessors in the interval (except for the
+/// header)
+///
+class Interval {
+  /// HeaderNode - The header BasicBlock, which dominates all BasicBlocks in this
+  /// interval.  Also, any loops in this interval must go through the HeaderNode.
+  ///
+  BasicBlock *HeaderNode;
+public:
+  typedef std::vector<BasicBlock*>::iterator succ_iterator;
+  typedef std::vector<BasicBlock*>::iterator pred_iterator;
+  typedef std::vector<BasicBlock*>::iterator node_iterator;
+
+  inline Interval(BasicBlock *Header) : HeaderNode(Header) {
+    Nodes.push_back(Header);
+  }
+
+  inline Interval(const Interval &I) // copy ctor
+    : HeaderNode(I.HeaderNode), Nodes(I.Nodes), Successors(I.Successors) {}
+
+  inline BasicBlock *getHeaderNode() const { return HeaderNode; }
+
+  /// Nodes - The basic blocks in this interval.
+  ///
+  std::vector<BasicBlock*> Nodes;
+
+  /// Successors - List of BasicBlocks that are reachable directly from nodes in
+  /// this interval, but are not in the interval themselves.
+  /// These nodes necessarily must be header nodes for other intervals.
+  ///
+  std::vector<BasicBlock*> Successors;
+
+  /// Predecessors - List of BasicBlocks that have this Interval's header block
+  /// as one of their successors.
+  ///
+  std::vector<BasicBlock*> Predecessors;
+
+  /// contains - Find out if a basic block is in this interval
+  inline bool contains(BasicBlock *BB) const {
+    for (unsigned i = 0; i < Nodes.size(); ++i)
+      if (Nodes[i] == BB) return true;
+    return false;
+    // I don't want the dependency on <algorithm>
+    //return find(Nodes.begin(), Nodes.end(), BB) != Nodes.end();
+  }
+
+  /// isSuccessor - find out if a basic block is a successor of this Interval
+  inline bool isSuccessor(BasicBlock *BB) const {
+    for (unsigned i = 0; i < Successors.size(); ++i)
+      if (Successors[i] == BB) return true;
+    return false;
+    // I don't want the dependency on <algorithm>
+    //return find(Successors.begin(), Successors.end(), BB) != Successors.end();
+  }
+
+  /// Equality operator.  It is only valid to compare two intervals from the
+  /// same partition, because of this, all we have to check is the header node
+  /// for equality.
+  ///
+  inline bool operator==(const Interval &I) const {
+    return HeaderNode == I.HeaderNode;
+  }
+
+  /// isLoop - Find out if there is a back edge in this interval...
+  bool isLoop() const;
+
+  /// print - Show contents in human readable format...
+  void print(raw_ostream &O) const;
+};
+
+/// succ_begin/succ_end - define methods so that Intervals may be used
+/// just like BasicBlocks can with the succ_* functions, and *::succ_iterator.
+///
+inline Interval::succ_iterator succ_begin(Interval *I) {
+  return I->Successors.begin();
+}
+inline Interval::succ_iterator succ_end(Interval *I)   {
+  return I->Successors.end();
+}
+
+/// pred_begin/pred_end - define methods so that Intervals may be used
+/// just like BasicBlocks can with the pred_* functions, and *::pred_iterator.
+///
+inline Interval::pred_iterator pred_begin(Interval *I) {
+  return I->Predecessors.begin();
+}
+inline Interval::pred_iterator pred_end(Interval *I)   {
+  return I->Predecessors.end();
+}
+
+template <> struct GraphTraits<Interval*> {
+  typedef Interval NodeType;
+  typedef Interval::succ_iterator ChildIteratorType;
+
+  static NodeType *getEntryNode(Interval *I) { return I; }
+
+  /// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return succ_begin(N);
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return succ_end(N);
+  }
+};
+
+template <> struct GraphTraits<Inverse<Interval*> > {
+  typedef Interval NodeType;
+  typedef Interval::pred_iterator ChildIteratorType;
+  static NodeType *getEntryNode(Inverse<Interval *> G) { return G.Graph; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return pred_begin(N);
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return pred_end(N);
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/IntervalIterator.h b/final/include/llvm/Analysis/IntervalIterator.h
new file mode 100644
index 00000000000..82b3294cc50
--- /dev/null
+++ b/final/include/llvm/Analysis/IntervalIterator.h
@@ -0,0 +1,259 @@
+//===- IntervalIterator.h - Interval Iterator Declaration -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an iterator that enumerates the intervals in a control flow
+// graph of some sort.  This iterator is parametric, allowing iterator over the
+// following types of graphs:
+//
+//  1. A Function* object, composed of BasicBlock nodes.
+//  2. An IntervalPartition& object, composed of Interval nodes.
+//
+// This iterator is defined to walk the control flow graph, returning intervals
+// in depth first order.  These intervals are completely filled in except for
+// the predecessor fields (the successor information is filled in however).
+//
+// By default, the intervals created by this iterator are deleted after they
+// are no longer any use to the iterator.  This behavior can be changed by
+// passing a false value into the intervals_begin() function. This causes the
+// IOwnMem member to be set, and the intervals to not be deleted.
+//
+// It is only safe to use this if all of the intervals are deleted by the caller
+// and all of the intervals are processed.  However, the user of the iterator is
+// not allowed to modify or delete the intervals until after the iterator has
+// been used completely.  The IntervalPartition class uses this functionality.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INTERVAL_ITERATOR_H
+#define LLVM_INTERVAL_ITERATOR_H
+
+#include "llvm/Analysis/IntervalPartition.h"
+#include "llvm/Function.h"
+#include "llvm/Support/CFG.h"
+#include <algorithm>
+#include <set>
+#include <vector>
+
+namespace llvm {
+
+// getNodeHeader - Given a source graph node and the source graph, return the
+// BasicBlock that is the header node.  This is the opposite of
+// getSourceGraphNode.
+//
+inline BasicBlock *getNodeHeader(BasicBlock *BB) { return BB; }
+inline BasicBlock *getNodeHeader(Interval *I) { return I->getHeaderNode(); }
+
+// getSourceGraphNode - Given a BasicBlock and the source graph, return the
+// source graph node that corresponds to the BasicBlock.  This is the opposite
+// of getNodeHeader.
+//
+inline BasicBlock *getSourceGraphNode(Function *, BasicBlock *BB) {
+  return BB;
+}
+inline Interval *getSourceGraphNode(IntervalPartition *IP, BasicBlock *BB) {
+  return IP->getBlockInterval(BB);
+}
+
+// addNodeToInterval - This method exists to assist the generic ProcessNode
+// with the task of adding a node to the new interval, depending on the
+// type of the source node.  In the case of a CFG source graph (BasicBlock
+// case), the BasicBlock itself is added to the interval.
+//
+inline void addNodeToInterval(Interval *Int, BasicBlock *BB) {
+  Int->Nodes.push_back(BB);
+}
+
+// addNodeToInterval - This method exists to assist the generic ProcessNode
+// with the task of adding a node to the new interval, depending on the
+// type of the source node.  In the case of a CFG source graph (BasicBlock
+// case), the BasicBlock itself is added to the interval.  In the case of
+// an IntervalPartition source graph (Interval case), all of the member
+// BasicBlocks are added to the interval.
+//
+inline void addNodeToInterval(Interval *Int, Interval *I) {
+  // Add all of the nodes in I as new nodes in Int.
+  copy(I->Nodes.begin(), I->Nodes.end(), back_inserter(Int->Nodes));
+}
+
+
+
+
+
+template<class NodeTy, class OrigContainer_t, class GT = GraphTraits<NodeTy*>,
+         class IGT = GraphTraits<Inverse<NodeTy*> > >
+class IntervalIterator {
+  std::vector<std::pair<Interval*, typename Interval::succ_iterator> > IntStack;
+  std::set<BasicBlock*> Visited;
+  OrigContainer_t *OrigContainer;
+  bool IOwnMem;     // If True, delete intervals when done with them
+                    // See file header for conditions of use
+public:
+  typedef IntervalIterator<NodeTy, OrigContainer_t> _Self;
+  typedef std::forward_iterator_tag iterator_category;
+
+  IntervalIterator() {} // End iterator, empty stack
+  IntervalIterator(Function *M, bool OwnMemory) : IOwnMem(OwnMemory) {
+    OrigContainer = M;
+    if (!ProcessInterval(&M->front())) {
+      assert(0 && "ProcessInterval should never fail for first interval!");
+    }
+  }
+
+  IntervalIterator(IntervalPartition &IP, bool OwnMemory) : IOwnMem(OwnMemory) {
+    OrigContainer = &IP;
+    if (!ProcessInterval(IP.getRootInterval())) {
+      assert(0 && "ProcessInterval should never fail for first interval!");
+    }
+  }
+
+  inline ~IntervalIterator() {
+    if (IOwnMem)
+      while (!IntStack.empty()) {
+        delete operator*();
+        IntStack.pop_back();
+      }
+  }
+
+  inline bool operator==(const _Self& x) const { return IntStack == x.IntStack;}
+  inline bool operator!=(const _Self& x) const { return !operator==(x); }
+
+  inline const Interval *operator*() const { return IntStack.back().first; }
+  inline       Interval *operator*()       { return IntStack.back().first; }
+  inline const Interval *operator->() const { return operator*(); }
+  inline       Interval *operator->()       { return operator*(); }
+
+  _Self& operator++() {  // Preincrement
+    assert(!IntStack.empty() && "Attempting to use interval iterator at end!");
+    do {
+      // All of the intervals on the stack have been visited.  Try visiting
+      // their successors now.
+      Interval::succ_iterator &SuccIt = IntStack.back().second,
+                                EndIt = succ_end(IntStack.back().first);
+      while (SuccIt != EndIt) {                 // Loop over all interval succs
+        bool Done = ProcessInterval(getSourceGraphNode(OrigContainer, *SuccIt));
+        ++SuccIt;                               // Increment iterator
+        if (Done) return *this;                 // Found a new interval! Use it!
+      }
+
+      // Free interval memory... if necessary
+      if (IOwnMem) delete IntStack.back().first;
+
+      // We ran out of successors for this interval... pop off the stack
+      IntStack.pop_back();
+    } while (!IntStack.empty());
+
+    return *this;
+  }
+  inline _Self operator++(int) { // Postincrement
+    _Self tmp = *this; ++*this; return tmp;
+  }
+
+private:
+  // ProcessInterval - This method is used during the construction of the
+  // interval graph.  It walks through the source graph, recursively creating
+  // an interval per invokation until the entire graph is covered.  This uses
+  // the ProcessNode method to add all of the nodes to the interval.
+  //
+  // This method is templated because it may operate on two different source
+  // graphs: a basic block graph, or a preexisting interval graph.
+  //
+  bool ProcessInterval(NodeTy *Node) {
+    BasicBlock *Header = getNodeHeader(Node);
+    if (Visited.count(Header)) return false;
+
+    Interval *Int = new Interval(Header);
+    Visited.insert(Header);   // The header has now been visited!
+
+    // Check all of our successors to see if they are in the interval...
+    for (typename GT::ChildIteratorType I = GT::child_begin(Node),
+           E = GT::child_end(Node); I != E; ++I)
+      ProcessNode(Int, getSourceGraphNode(OrigContainer, *I));
+
+    IntStack.push_back(std::make_pair(Int, succ_begin(Int)));
+    return true;
+  }
+
+  // ProcessNode - This method is called by ProcessInterval to add nodes to the
+  // interval being constructed, and it is also called recursively as it walks
+  // the source graph.  A node is added to the current interval only if all of
+  // its predecessors are already in the graph.  This also takes care of keeping
+  // the successor set of an interval up to date.
+  //
+  // This method is templated because it may operate on two different source
+  // graphs: a basic block graph, or a preexisting interval graph.
+  //
+  void ProcessNode(Interval *Int, NodeTy *Node) {
+    assert(Int && "Null interval == bad!");
+    assert(Node && "Null Node == bad!");
+
+    BasicBlock *NodeHeader = getNodeHeader(Node);
+
+    if (Visited.count(NodeHeader)) {     // Node already been visited?
+      if (Int->contains(NodeHeader)) {   // Already in this interval...
+        return;
+      } else {                           // In other interval, add as successor
+        if (!Int->isSuccessor(NodeHeader)) // Add only if not already in set
+          Int->Successors.push_back(NodeHeader);
+      }
+    } else {                             // Otherwise, not in interval yet
+      for (typename IGT::ChildIteratorType I = IGT::child_begin(Node),
+             E = IGT::child_end(Node); I != E; ++I) {
+        if (!Int->contains(*I)) {        // If pred not in interval, we can't be
+          if (!Int->isSuccessor(NodeHeader)) // Add only if not already in set
+            Int->Successors.push_back(NodeHeader);
+          return;                        // See you later
+        }
+      }
+
+      // If we get here, then all of the predecessors of BB are in the interval
+      // already.  In this case, we must add BB to the interval!
+      addNodeToInterval(Int, Node);
+      Visited.insert(NodeHeader);     // The node has now been visited!
+
+      if (Int->isSuccessor(NodeHeader)) {
+        // If we were in the successor list from before... remove from succ list
+        Int->Successors.erase(std::remove(Int->Successors.begin(),
+                                          Int->Successors.end(), NodeHeader),
+                              Int->Successors.end());
+      }
+
+      // Now that we have discovered that Node is in the interval, perhaps some
+      // of its successors are as well?
+      for (typename GT::ChildIteratorType It = GT::child_begin(Node),
+             End = GT::child_end(Node); It != End; ++It)
+        ProcessNode(Int, getSourceGraphNode(OrigContainer, *It));
+    }
+  }
+};
+
+typedef IntervalIterator<BasicBlock, Function> function_interval_iterator;
+typedef IntervalIterator<Interval, IntervalPartition>
+                                          interval_part_interval_iterator;
+
+
+inline function_interval_iterator intervals_begin(Function *F,
+                                                  bool DeleteInts = true) {
+  return function_interval_iterator(F, DeleteInts);
+}
+inline function_interval_iterator intervals_end(Function *) {
+  return function_interval_iterator();
+}
+
+inline interval_part_interval_iterator
+   intervals_begin(IntervalPartition &IP, bool DeleteIntervals = true) {
+  return interval_part_interval_iterator(IP, DeleteIntervals);
+}
+
+inline interval_part_interval_iterator intervals_end(IntervalPartition &IP) {
+  return interval_part_interval_iterator();
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/IntervalPartition.h b/final/include/llvm/Analysis/IntervalPartition.h
new file mode 100644
index 00000000000..df7313f18f3
--- /dev/null
+++ b/final/include/llvm/Analysis/IntervalPartition.h
@@ -0,0 +1,111 @@
+//===- IntervalPartition.h - Interval partition Calculation -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the IntervalPartition class, which
+// calculates and represents the interval partition of a function, or a
+// preexisting interval partition.
+//
+// In this way, the interval partition may be used to reduce a flow graph down
+// to its degenerate single node interval partition (unless it is irreducible).
+//
+// TODO: The IntervalPartition class should take a bool parameter that tells
+// whether it should add the "tails" of an interval to an interval itself or if
+// they should be represented as distinct intervals.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INTERVAL_PARTITION_H
+#define LLVM_INTERVAL_PARTITION_H
+
+#include "llvm/Analysis/Interval.h"
+#include "llvm/Pass.h"
+#include <map>
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//
+// IntervalPartition - This class builds and holds an "interval partition" for
+// a function.  This partition divides the control flow graph into a set of
+// maximal intervals, as defined with the properties above.  Intuitively, a
+// BasicBlock is a (possibly nonexistent) loop with a "tail" of non looping
+// nodes following it.
+//
+class IntervalPartition : public FunctionPass {
+  typedef std::map<BasicBlock*, Interval*> IntervalMapTy;
+  IntervalMapTy IntervalMap;
+
+  typedef std::vector<Interval*> IntervalListTy;
+  Interval *RootInterval;
+  std::vector<Interval*> Intervals;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  IntervalPartition() : FunctionPass(ID), RootInterval(0) {
+    initializeIntervalPartitionPass(*PassRegistry::getPassRegistry());
+  }
+
+  // run - Calculate the interval partition for this function
+  virtual bool runOnFunction(Function &F);
+
+  // IntervalPartition ctor - Build a reduced interval partition from an
+  // existing interval graph.  This takes an additional boolean parameter to
+  // distinguish it from a copy constructor.  Always pass in false for now.
+  //
+  IntervalPartition(IntervalPartition &I, bool);
+
+  // print - Show contents in human readable format...
+  virtual void print(raw_ostream &O, const Module* = 0) const;
+
+  // getRootInterval() - Return the root interval that contains the starting
+  // block of the function.
+  inline Interval *getRootInterval() { return RootInterval; }
+
+  // isDegeneratePartition() - Returns true if the interval partition contains
+  // a single interval, and thus cannot be simplified anymore.
+  bool isDegeneratePartition() { return Intervals.size() == 1; }
+
+  // TODO: isIrreducible - look for triangle graph.
+
+  // getBlockInterval - Return the interval that a basic block exists in.
+  inline Interval *getBlockInterval(BasicBlock *BB) {
+    IntervalMapTy::iterator I = IntervalMap.find(BB);
+    return I != IntervalMap.end() ? I->second : 0;
+  }
+
+  // getAnalysisUsage - Implement the Pass API
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+
+  // Interface to Intervals vector...
+  const std::vector<Interval*> &getIntervals() const { return Intervals; }
+
+  // releaseMemory - Reset state back to before function was analyzed
+  void releaseMemory();
+
+private:
+  // addIntervalToPartition - Add an interval to the internal list of intervals,
+  // and then add mappings from all of the basic blocks in the interval to the
+  // interval itself (in the IntervalMap).
+  //
+  void addIntervalToPartition(Interval *I);
+
+  // updatePredecessors - Interval generation only sets the successor fields of
+  // the interval data structures.  After interval generation is complete,
+  // run through all of the intervals and propagate successor info as
+  // predecessor info.
+  //
+  void updatePredecessors(Interval *Int);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/LazyValueInfo.h b/final/include/llvm/Analysis/LazyValueInfo.h
new file mode 100644
index 00000000000..fc4d0af920e
--- /dev/null
+++ b/final/include/llvm/Analysis/LazyValueInfo.h
@@ -0,0 +1,81 @@
+//===- LazyValueInfo.h - Value constraint analysis --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for lazy computation of value constraint
+// information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_LAZYVALUEINFO_H
+#define LLVM_ANALYSIS_LAZYVALUEINFO_H
+
+#include "llvm/Pass.h"
+
+namespace llvm {
+  class Constant;
+  class TargetData;
+  class Value;
+  
+/// LazyValueInfo - This pass computes, caches, and vends lazy value constraint
+/// information.
+class LazyValueInfo : public FunctionPass {
+  class TargetData *TD;
+  void *PImpl;
+  LazyValueInfo(const LazyValueInfo&); // DO NOT IMPLEMENT.
+  void operator=(const LazyValueInfo&); // DO NOT IMPLEMENT.
+public:
+  static char ID;
+  LazyValueInfo() : FunctionPass(ID), PImpl(0) {
+    initializeLazyValueInfoPass(*PassRegistry::getPassRegistry());
+  }
+  ~LazyValueInfo() { assert(PImpl == 0 && "releaseMemory not called"); }
+
+  /// Tristate - This is used to return true/false/dunno results.
+  enum Tristate {
+    Unknown = -1, False = 0, True = 1
+  };
+  
+  
+  // Public query interface.
+  
+  /// getPredicateOnEdge - Determine whether the specified value comparison
+  /// with a constant is known to be true or false on the specified CFG edge.
+  /// Pred is a CmpInst predicate.
+  Tristate getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
+                              BasicBlock *FromBB, BasicBlock *ToBB);
+  
+  
+  /// getConstant - Determine whether the specified value is known to be a
+  /// constant at the end of the specified block.  Return null if not.
+  Constant *getConstant(Value *V, BasicBlock *BB);
+
+  /// getConstantOnEdge - Determine whether the specified value is known to be a
+  /// constant on the specified edge.  Return null if not.
+  Constant *getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB);
+  
+  /// threadEdge - Inform the analysis cache that we have threaded an edge from
+  /// PredBB to OldSucc to be from PredBB to NewSucc instead.
+  void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc);
+  
+  /// eraseBlock - Inform the analysis cache that we have erased a block.
+  void eraseBlock(BasicBlock *BB);
+  
+  // Implementation boilerplate.
+  
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+  virtual void releaseMemory();
+  virtual bool runOnFunction(Function &F);
+};
+
+}  // end namespace llvm
+
+#endif
+
diff --git a/final/include/llvm/Analysis/LibCallAliasAnalysis.h b/final/include/llvm/Analysis/LibCallAliasAnalysis.h
new file mode 100644
index 00000000000..243234b7563
--- /dev/null
+++ b/final/include/llvm/Analysis/LibCallAliasAnalysis.h
@@ -0,0 +1,73 @@
+//===- LibCallAliasAnalysis.h - Implement AliasAnalysis for libcalls ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LibCallAliasAnalysis class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_LIBCALL_AA_H
+#define LLVM_ANALYSIS_LIBCALL_AA_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+  class LibCallInfo;
+  struct LibCallFunctionInfo;
+  
+  /// LibCallAliasAnalysis - Alias analysis driven from LibCallInfo.
+  struct LibCallAliasAnalysis : public FunctionPass, public AliasAnalysis {
+    static char ID; // Class identification
+    
+    LibCallInfo *LCI;
+    
+    explicit LibCallAliasAnalysis(LibCallInfo *LC = 0)
+        : FunctionPass(ID), LCI(LC) {
+      initializeLibCallAliasAnalysisPass(*PassRegistry::getPassRegistry());
+    }
+    explicit LibCallAliasAnalysis(char &ID, LibCallInfo *LC)
+        : FunctionPass(ID), LCI(LC) {
+      initializeLibCallAliasAnalysisPass(*PassRegistry::getPassRegistry());
+    }
+    ~LibCallAliasAnalysis();
+    
+    ModRefResult getModRefInfo(ImmutableCallSite CS,
+                               const Location &Loc);
+    
+    ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                               ImmutableCallSite CS2) {
+      // TODO: Could compare two direct calls against each other if we cared to.
+      return AliasAnalysis::getModRefInfo(CS1, CS2);
+    }
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    
+    virtual bool runOnFunction(Function &F) {
+      InitializeAliasAnalysis(this);                 // set up super class
+      return false;
+    }
+    
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(const void *PI) {
+      if (PI == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+    
+  private:
+    ModRefResult AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
+                                       ImmutableCallSite CS,
+                                       const Location &Loc);
+  };
+}  // End of llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/LibCallSemantics.h b/final/include/llvm/Analysis/LibCallSemantics.h
new file mode 100644
index 00000000000..f5a9e96cbdd
--- /dev/null
+++ b/final/include/llvm/Analysis/LibCallSemantics.h
@@ -0,0 +1,167 @@
+//===- LibCallSemantics.h - Describe library semantics --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces that can be used to describe language specific
+// runtime library interfaces (e.g. libc, libm, etc) to LLVM optimizers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_LIBCALLSEMANTICS_H
+#define LLVM_ANALYSIS_LIBCALLSEMANTICS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+
+namespace llvm {
+
+  /// LibCallLocationInfo - This struct describes a set of memory locations that
+  /// are accessed by libcalls.  Identification of a location is doing with a
+  /// simple callback function.
+  ///
+  /// For example, the LibCallInfo may be set up to model the behavior of
+  /// standard libm functions.  The location that they may be interested in is
+  /// an abstract location that represents errno for the current target.  In
+  /// this case, a location for errno is anything such that the predicate
+  /// returns true.  On Mac OS/X, this predicate would return true if the
+  /// pointer is the result of a call to "__error()".
+  ///
+  /// Locations can also be defined in a constant-sensitive way.  For example,
+  /// it is possible to define a location that returns true iff it is passed
+  /// into the call as a specific argument.  This is useful for modeling things
+  /// like "printf", which can store to memory, but only through pointers passed
+  /// with a '%n' constraint.
+  ///
+  struct LibCallLocationInfo {
+    // TODO: Flags: isContextSensitive etc.
+    
+    /// isLocation - Return a LocResult if the specified pointer refers to this
+    /// location for the specified call site.  This returns "Yes" if we can tell
+    /// that the pointer *does definitely* refer to the location, "No" if we can
+    /// tell that the location *definitely does not* refer to the location, and
+    /// returns "Unknown" if we cannot tell for certain.
+    enum LocResult {
+      Yes, No, Unknown
+    };
+    LocResult (*isLocation)(ImmutableCallSite CS,
+                            const AliasAnalysis::Location &Loc);
+  };
+  
+  /// LibCallFunctionInfo - Each record in the array of FunctionInfo structs
+  /// records the behavior of one libcall that is known by the optimizer.  This
+  /// captures things like the side effects of the call.  Side effects are
+  /// modeled both universally (in the readnone/readonly) sense, but also
+  /// potentially against a set of abstract locations defined by the optimizer.
+  /// This allows an optimizer to define that some libcall (e.g. sqrt) is
+  /// side-effect free except that it might modify errno (thus, the call is
+  /// *not* universally readonly).  Or it might say that the side effects
+  /// are unknown other than to say that errno is not modified.
+  ///
+  struct LibCallFunctionInfo {
+    /// Name - This is the name of the libcall this describes.
+    const char *Name;
+    
+    /// TODO: Constant folding function: Constant* vector -> Constant*.
+    
+    /// UniversalBehavior - This captures the absolute mod/ref behavior without
+    /// any specific context knowledge.  For example, if the function is known
+    /// to be readonly, this would be set to 'ref'.  If known to be readnone,
+    /// this is set to NoModRef.
+    AliasAnalysis::ModRefResult UniversalBehavior;
+    
+    /// LocationMRInfo - This pair captures info about whether a specific
+    /// location is modified or referenced by a libcall.
+    struct LocationMRInfo {
+      /// LocationID - ID # of the accessed location or ~0U for array end.
+      unsigned LocationID;
+      /// MRInfo - Mod/Ref info for this location.
+      AliasAnalysis::ModRefResult MRInfo;
+    };
+    
+    /// DetailsType - Indicate the sense of the LocationDetails array.  This
+    /// controls how the LocationDetails array is interpreted.
+    enum {
+      /// DoesOnly - If DetailsType is set to DoesOnly, then we know that the
+      /// *only* mod/ref behavior of this function is captured by the
+      /// LocationDetails array.  If we are trying to say that 'sqrt' can only
+      /// modify errno, we'd have the {errnoloc,mod} in the LocationDetails
+      /// array and have DetailsType set to DoesOnly.
+      DoesOnly,
+      
+      /// DoesNot - If DetailsType is set to DoesNot, then the sense of the
+      /// LocationDetails array is completely inverted.  This means that we *do
+      /// not* know everything about the side effects of this libcall, but we do
+      /// know things that the libcall cannot do.  This is useful for complex
+      /// functions like 'ctime' which have crazy mod/ref behavior, but are
+      /// known to never read or write errno.  In this case, we'd have
+      /// {errnoloc,modref} in the LocationDetails array and DetailsType would
+      /// be set to DoesNot, indicating that ctime does not read or write the
+      /// errno location.
+      DoesNot
+    } DetailsType;
+    
+    /// LocationDetails - This is a pointer to an array of LocationMRInfo
+    /// structs which indicates the behavior of the libcall w.r.t. specific
+    /// locations.  For example, if this libcall is known to only modify
+    /// 'errno', it would have a LocationDetails array with the errno ID and
+    /// 'mod' in it.  See the DetailsType field for how this is interpreted.
+    ///
+    /// In the "DoesOnly" case, this information is 'may' information for: there
+    /// is no guarantee that the specified side effect actually does happen,
+    /// just that it could.  In the "DoesNot" case, this is 'must not' info.
+    ///
+    /// If this pointer is null, no details are known.
+    ///
+    const LocationMRInfo *LocationDetails;
+  };
+  
+  
+  /// LibCallInfo - Abstract interface to query about library call information.
+  /// Instances of this class return known information about some set of
+  /// libcalls.
+  /// 
+  class LibCallInfo {
+    // Implementation details of this object, private.
+    mutable void *Impl;
+    mutable const LibCallLocationInfo *Locations;
+    mutable unsigned NumLocations;
+  public:
+    LibCallInfo() : Impl(0), Locations(0), NumLocations(0) {}
+    virtual ~LibCallInfo();
+    
+    //===------------------------------------------------------------------===//
+    //  Accessor Methods: Efficient access to contained data.
+    //===------------------------------------------------------------------===//
+    
+    /// getLocationInfo - Return information about the specified LocationID.
+    const LibCallLocationInfo &getLocationInfo(unsigned LocID) const;
+    
+    
+    /// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to
+    /// the specified function if we have it.  If not, return null.
+    const LibCallFunctionInfo *getFunctionInfo(const Function *F) const;
+    
+    
+    //===------------------------------------------------------------------===//
+    //  Implementation Methods: Subclasses should implement these.
+    //===------------------------------------------------------------------===//
+    
+    /// getLocationInfo - Return descriptors for the locations referenced by
+    /// this set of libcalls.
+    virtual unsigned getLocationInfo(const LibCallLocationInfo *&Array) const {
+      return 0;
+    }
+    
+    /// getFunctionInfoArray - Return an array of descriptors that describe the
+    /// set of libcalls represented by this LibCallInfo object.  This array is
+    /// terminated by an entry with a NULL name.
+    virtual const LibCallFunctionInfo *getFunctionInfoArray() const = 0;
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Analysis/Lint.h b/final/include/llvm/Analysis/Lint.h
new file mode 100644
index 00000000000..eb65d223644
--- /dev/null
+++ b/final/include/llvm/Analysis/Lint.h
@@ -0,0 +1,51 @@
+//===-- llvm/Analysis/Lint.h - LLVM IR Lint ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines lint interfaces that can be used for some sanity checking
+// of input to the system, and for checking that transformations
+// haven't done something bad. In contrast to the Verifier, the Lint checker
+// checks for undefined behavior or constructions with likely unintended
+// behavior.
+//
+// To see what specifically is checked, look at Lint.cpp
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_LINT_H
+#define LLVM_ANALYSIS_LINT_H
+
+#include <string>
+
+namespace llvm {
+
+class FunctionPass;
+class Module;
+class Function;
+
+/// @brief Create a lint pass.
+///
+/// Check a module or function.
+FunctionPass *createLintPass();
+
+/// @brief Check a module.
+///
+/// This should only be used for debugging, because it plays games with
+/// PassManagers and stuff.
+void lintModule(
+  const Module &M    ///< The module to be checked
+);
+
+// lintFunction - Check a function.
+void lintFunction(
+  const Function &F  ///< The function to be checked
+);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/Loads.h b/final/include/llvm/Analysis/Loads.h
new file mode 100644
index 00000000000..1574262dd6d
--- /dev/null
+++ b/final/include/llvm/Analysis/Loads.h
@@ -0,0 +1,51 @@
+//===- Loads.h - Local load analysis --------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares simple local analyses for load instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_LOADS_H
+#define LLVM_ANALYSIS_LOADS_H
+
+#include "llvm/BasicBlock.h"
+
+namespace llvm {
+
+class AliasAnalysis;
+class TargetData;
+
+/// isSafeToLoadUnconditionally - Return true if we know that executing a load
+/// from this value cannot trap.  If it is not obviously safe to load from the
+/// specified pointer, we do a quick local scan of the basic block containing
+/// ScanFrom, to determine if the address is already accessed.
+bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
+                                 unsigned Align, const TargetData *TD = 0);
+
+/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at
+/// the instruction before ScanFrom) checking to see if we have the value at
+/// the memory address *Ptr locally available within a small number of
+///  instructions. If the value is available, return it.
+///
+/// If not, return the iterator for the last validated instruction that the 
+/// value would be live through.  If we scanned the entire block and didn't
+/// find something that invalidates *Ptr or provides it, ScanFrom would be
+/// left at begin() and this returns null.  ScanFrom could also be left 
+///
+/// MaxInstsToScan specifies the maximum instructions to scan in the block.
+/// If it is set to 0, it will scan the whole block. You can also optionally
+/// specify an alias analysis implementation, which makes this more precise.
+Value *FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
+                                BasicBlock::iterator &ScanFrom,
+                                unsigned MaxInstsToScan = 6,
+                                AliasAnalysis *AA = 0);
+
+}
+
+#endif
diff --git a/final/include/llvm/Analysis/LoopDependenceAnalysis.h b/final/include/llvm/Analysis/LoopDependenceAnalysis.h
new file mode 100644
index 00000000000..f195d278241
--- /dev/null
+++ b/final/include/llvm/Analysis/LoopDependenceAnalysis.h
@@ -0,0 +1,124 @@
+//===- llvm/Analysis/LoopDependenceAnalysis.h --------------- -*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LoopDependenceAnalysis is an LLVM pass that analyses dependences in memory
+// accesses in loops.
+//
+// Please note that this is work in progress and the interface is subject to
+// change.
+//
+// TODO: adapt as interface progresses
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_LOOP_DEPENDENCE_ANALYSIS_H
+#define LLVM_ANALYSIS_LOOP_DEPENDENCE_ANALYSIS_H
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/Allocator.h"
+
+namespace llvm {
+
+class AliasAnalysis;
+class AnalysisUsage;
+class ScalarEvolution;
+class SCEV;
+class Value;
+class raw_ostream;
+
+class LoopDependenceAnalysis : public LoopPass {
+  AliasAnalysis *AA;
+  ScalarEvolution *SE;
+
+  /// L - The loop we are currently analysing.
+  Loop *L;
+
+  /// TODO: doc
+  enum DependenceResult { Independent = 0, Dependent = 1, Unknown = 2 };
+
+  /// TODO: doc
+  struct Subscript {
+    /// TODO: Add distance, direction, breaking conditions, ...
+  };
+
+  /// DependencePair - Represents a data dependence relation between to memory
+  /// reference instructions.
+  struct DependencePair : public FastFoldingSetNode {
+    Value *A;
+    Value *B;
+    DependenceResult Result;
+    SmallVector<Subscript, 4> Subscripts;
+
+    DependencePair(const FoldingSetNodeID &ID, Value *a, Value *b) :
+        FastFoldingSetNode(ID), A(a), B(b), Result(Unknown), Subscripts() {}
+  };
+
+  /// findOrInsertDependencePair - Return true if a DependencePair for the
+  /// given Values already exists, false if a new DependencePair had to be
+  /// created. The third argument is set to the pair found or created.
+  bool findOrInsertDependencePair(Value*, Value*, DependencePair*&);
+
+  /// getLoops - Collect all loops of the loop nest L in which
+  /// a given SCEV is variant.
+  void getLoops(const SCEV*, DenseSet<const Loop*>*) const;
+
+  /// isLoopInvariant - True if a given SCEV is invariant in all loops of the
+  /// loop nest starting at the innermost loop L.
+  bool isLoopInvariant(const SCEV*) const;
+
+  /// isAffine - An SCEV is affine with respect to the loop nest starting at
+  /// the innermost loop L if it is of the form A+B*X where A, B are invariant
+  /// in the loop nest and X is a induction variable in the loop nest.
+  bool isAffine(const SCEV*) const;
+
+  /// TODO: doc
+  bool isZIVPair(const SCEV*, const SCEV*) const;
+  bool isSIVPair(const SCEV*, const SCEV*) const;
+  DependenceResult analyseZIV(const SCEV*, const SCEV*, Subscript*) const;
+  DependenceResult analyseSIV(const SCEV*, const SCEV*, Subscript*) const;
+  DependenceResult analyseMIV(const SCEV*, const SCEV*, Subscript*) const;
+  DependenceResult analyseSubscript(const SCEV*, const SCEV*, Subscript*) const;
+  DependenceResult analysePair(DependencePair*) const;
+
+public:
+  static char ID; // Class identification, replacement for typeinfo
+  LoopDependenceAnalysis() : LoopPass(ID) {
+    initializeLoopDependenceAnalysisPass(*PassRegistry::getPassRegistry());
+  }
+
+  /// isDependencePair - Check whether two values can possibly give rise to
+  /// a data dependence: that is the case if both are instructions accessing
+  /// memory and at least one of those accesses is a write.
+  bool isDependencePair(const Value*, const Value*) const;
+
+  /// depends - Return a boolean indicating if there is a data dependence
+  /// between two instructions.
+  bool depends(Value*, Value*);
+
+  bool runOnLoop(Loop*, LPPassManager&);
+  virtual void releaseMemory();
+  virtual void getAnalysisUsage(AnalysisUsage&) const;
+  void print(raw_ostream&, const Module* = 0) const;
+
+private:
+  FoldingSet<DependencePair> Pairs;
+  BumpPtrAllocator PairAllocator;
+}; // class LoopDependenceAnalysis
+
+// createLoopDependenceAnalysisPass - This creates an instance of the
+// LoopDependenceAnalysis pass.
+//
+LoopPass *createLoopDependenceAnalysisPass();
+
+} // namespace llvm
+
+#endif /* LLVM_ANALYSIS_LOOP_DEPENDENCE_ANALYSIS_H */
diff --git a/final/include/llvm/Analysis/LoopInfo.h b/final/include/llvm/Analysis/LoopInfo.h
new file mode 100644
index 00000000000..392bdad5ab0
--- /dev/null
+++ b/final/include/llvm/Analysis/LoopInfo.h
@@ -0,0 +1,1100 @@
+//===- llvm/Analysis/LoopInfo.h - Natural Loop Calculator -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LoopInfo class that is used to identify natural loops
+// and determine the loop depth of various nodes of the CFG.  A natural loop
+// has exactly one entry-point, which is called the header. Note that natural
+// loops may actually be several loops that share the same header node.
+//
+// This analysis calculates the nesting structure of loops in a function.  For
+// each natural loop identified, this analysis identifies natural loops
+// contained entirely within the loop and the basic blocks the make up the loop.
+//
+// It can calculate on the fly various bits of information, for example:
+//
+//  * whether there is a preheader for the loop
+//  * the number of back edges to the header
+//  * whether or not a particular block branches out of the loop
+//  * the successor blocks of the loop
+//  * the loop depth
+//  * the trip count
+//  * etc...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_LOOP_INFO_H
+#define LLVM_ANALYSIS_LOOP_INFO_H
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <map>
+
+namespace llvm {
+
+template<typename T>
+static void RemoveFromVector(std::vector<T*> &V, T *N) {
+  typename std::vector<T*>::iterator I = std::find(V.begin(), V.end(), N);
+  assert(I != V.end() && "N is not in this list!");
+  V.erase(I);
+}
+
+class DominatorTree;
+class LoopInfo;
+class Loop;
+class PHINode;
+template<class N, class M> class LoopInfoBase;
+template<class N, class M> class LoopBase;
+
+//===----------------------------------------------------------------------===//
+/// LoopBase class - Instances of this class are used to represent loops that
+/// are detected in the flow graph
+///
+template<class BlockT, class LoopT>
+class LoopBase {
+  LoopT *ParentLoop;
+  // SubLoops - Loops contained entirely within this one.
+  std::vector<LoopT *> SubLoops;
+
+  // Blocks - The list of blocks in this loop.  First entry is the header node.
+  std::vector<BlockT*> Blocks;
+
+  // DO NOT IMPLEMENT
+  LoopBase(const LoopBase<BlockT, LoopT> &);
+  // DO NOT IMPLEMENT
+  const LoopBase<BlockT, LoopT>&operator=(const LoopBase<BlockT, LoopT> &);
+public:
+  /// Loop ctor - This creates an empty loop.
+  LoopBase() : ParentLoop(0) {}
+  ~LoopBase() {
+    for (size_t i = 0, e = SubLoops.size(); i != e; ++i)
+      delete SubLoops[i];
+  }
+
+  /// getLoopDepth - Return the nesting level of this loop.  An outer-most
+  /// loop has depth 1, for consistency with loop depth values used for basic
+  /// blocks, where depth 0 is used for blocks not inside any loops.
+  unsigned getLoopDepth() const {
+    unsigned D = 1;
+    for (const LoopT *CurLoop = ParentLoop; CurLoop;
+         CurLoop = CurLoop->ParentLoop)
+      ++D;
+    return D;
+  }
+  BlockT *getHeader() const { return Blocks.front(); }
+  LoopT *getParentLoop() const { return ParentLoop; }
+
+  /// contains - Return true if the specified loop is contained within in
+  /// this loop.
+  ///
+  bool contains(const LoopT *L) const {
+    if (L == this) return true;
+    if (L == 0)    return false;
+    return contains(L->getParentLoop());
+  }
+    
+  /// contains - Return true if the specified basic block is in this loop.
+  ///
+  bool contains(const BlockT *BB) const {
+    return std::find(block_begin(), block_end(), BB) != block_end();
+  }
+
+  /// contains - Return true if the specified instruction is in this loop.
+  ///
+  template<class InstT>
+  bool contains(const InstT *Inst) const {
+    return contains(Inst->getParent());
+  }
+
+  /// iterator/begin/end - Return the loops contained entirely within this loop.
+  ///
+  const std::vector<LoopT *> &getSubLoops() const { return SubLoops; }
+  typedef typename std::vector<LoopT *>::const_iterator iterator;
+  iterator begin() const { return SubLoops.begin(); }
+  iterator end() const { return SubLoops.end(); }
+  bool empty() const { return SubLoops.empty(); }
+
+  /// getBlocks - Get a list of the basic blocks which make up this loop.
+  ///
+  const std::vector<BlockT*> &getBlocks() const { return Blocks; }
+  typedef typename std::vector<BlockT*>::const_iterator block_iterator;
+  block_iterator block_begin() const { return Blocks.begin(); }
+  block_iterator block_end() const { return Blocks.end(); }
+
+  /// isLoopExiting - True if terminator in the block can branch to another
+  /// block that is outside of the current loop.
+  ///
+  bool isLoopExiting(const BlockT *BB) const {
+    typedef GraphTraits<BlockT*> BlockTraits;
+    for (typename BlockTraits::ChildIteratorType SI =
+         BlockTraits::child_begin(const_cast<BlockT*>(BB)),
+         SE = BlockTraits::child_end(const_cast<BlockT*>(BB)); SI != SE; ++SI) {
+      if (!contains(*SI))
+        return true;
+    }
+    return false;
+  }
+
+  /// getNumBackEdges - Calculate the number of back edges to the loop header
+  ///
+  unsigned getNumBackEdges() const {
+    unsigned NumBackEdges = 0;
+    BlockT *H = getHeader();
+
+    typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
+    for (typename InvBlockTraits::ChildIteratorType I =
+         InvBlockTraits::child_begin(const_cast<BlockT*>(H)),
+         E = InvBlockTraits::child_end(const_cast<BlockT*>(H)); I != E; ++I)
+      if (contains(*I))
+        ++NumBackEdges;
+
+    return NumBackEdges;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // APIs for simple analysis of the loop.
+  //
+  // Note that all of these methods can fail on general loops (ie, there may not
+  // be a preheader, etc).  For best success, the loop simplification and
+  // induction variable canonicalization pass should be used to normalize loops
+  // for easy analysis.  These methods assume canonical loops.
+
+  /// getExitingBlocks - Return all blocks inside the loop that have successors
+  /// outside of the loop.  These are the blocks _inside of the current loop_
+  /// which branch out.  The returned list is always unique.
+  ///
+  void getExitingBlocks(SmallVectorImpl<BlockT *> &ExitingBlocks) const {
+    // Sort the blocks vector so that we can use binary search to do quick
+    // lookups.
+    SmallVector<BlockT*, 128> LoopBBs(block_begin(), block_end());
+    std::sort(LoopBBs.begin(), LoopBBs.end());
+
+    typedef GraphTraits<BlockT*> BlockTraits;
+    for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI)
+      for (typename BlockTraits::ChildIteratorType I =
+          BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI);
+          I != E; ++I)
+        if (!std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I)) {
+          // Not in current loop? It must be an exit block.
+          ExitingBlocks.push_back(*BI);
+          break;
+        }
+  }
+
+  /// getExitingBlock - If getExitingBlocks would return exactly one block,
+  /// return that block. Otherwise return null.
+  BlockT *getExitingBlock() const {
+    SmallVector<BlockT*, 8> ExitingBlocks;
+    getExitingBlocks(ExitingBlocks);
+    if (ExitingBlocks.size() == 1)
+      return ExitingBlocks[0];
+    return 0;
+  }
+
+  /// getExitBlocks - Return all of the successor blocks of this loop.  These
+  /// are the blocks _outside of the current loop_ which are branched to.
+  ///
+  void getExitBlocks(SmallVectorImpl<BlockT*> &ExitBlocks) const {
+    // Sort the blocks vector so that we can use binary search to do quick
+    // lookups.
+    SmallVector<BlockT*, 128> LoopBBs(block_begin(), block_end());
+    std::sort(LoopBBs.begin(), LoopBBs.end());
+
+    typedef GraphTraits<BlockT*> BlockTraits;
+    for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI)
+      for (typename BlockTraits::ChildIteratorType I =
+           BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI);
+           I != E; ++I)
+        if (!std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
+          // Not in current loop? It must be an exit block.
+          ExitBlocks.push_back(*I);
+  }
+
+  /// getExitBlock - If getExitBlocks would return exactly one block,
+  /// return that block. Otherwise return null.
+  BlockT *getExitBlock() const {
+    SmallVector<BlockT*, 8> ExitBlocks;
+    getExitBlocks(ExitBlocks);
+    if (ExitBlocks.size() == 1)
+      return ExitBlocks[0];
+    return 0;
+  }
+
+  /// Edge type.
+  typedef std::pair<BlockT*, BlockT*> Edge;
+
+  /// getExitEdges - Return all pairs of (_inside_block_,_outside_block_).
+  template <typename EdgeT>
+  void getExitEdges(SmallVectorImpl<EdgeT> &ExitEdges) const {
+    // Sort the blocks vector so that we can use binary search to do quick
+    // lookups.
+    SmallVector<BlockT*, 128> LoopBBs(block_begin(), block_end());
+    array_pod_sort(LoopBBs.begin(), LoopBBs.end());
+
+    typedef GraphTraits<BlockT*> BlockTraits;
+    for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI)
+      for (typename BlockTraits::ChildIteratorType I =
+           BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI);
+           I != E; ++I)
+        if (!std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
+          // Not in current loop? It must be an exit block.
+          ExitEdges.push_back(EdgeT(*BI, *I));
+  }
+
+  /// getLoopPreheader - If there is a preheader for this loop, return it.  A
+  /// loop has a preheader if there is only one edge to the header of the loop
+  /// from outside of the loop.  If this is the case, the block branching to the
+  /// header of the loop is the preheader node.
+  ///
+  /// This method returns null if there is no preheader for the loop.
+  ///
+  BlockT *getLoopPreheader() const {
+    // Keep track of nodes outside the loop branching to the header...
+    BlockT *Out = getLoopPredecessor();
+    if (!Out) return 0;
+
+    // Make sure there is only one exit out of the preheader.
+    typedef GraphTraits<BlockT*> BlockTraits;
+    typename BlockTraits::ChildIteratorType SI = BlockTraits::child_begin(Out);
+    ++SI;
+    if (SI != BlockTraits::child_end(Out))
+      return 0;  // Multiple exits from the block, must not be a preheader.
+
+    // The predecessor has exactly one successor, so it is a preheader.
+    return Out;
+  }
+
+  /// getLoopPredecessor - If the given loop's header has exactly one unique
+  /// predecessor outside the loop, return it. Otherwise return null.
+  /// This is less strict that the loop "preheader" concept, which requires
+  /// the predecessor to have exactly one successor.
+  ///
+  BlockT *getLoopPredecessor() const {
+    // Keep track of nodes outside the loop branching to the header...
+    BlockT *Out = 0;
+
+    // Loop over the predecessors of the header node...
+    BlockT *Header = getHeader();
+    typedef GraphTraits<BlockT*> BlockTraits;
+    typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
+    for (typename InvBlockTraits::ChildIteratorType PI =
+         InvBlockTraits::child_begin(Header),
+         PE = InvBlockTraits::child_end(Header); PI != PE; ++PI) {
+      typename InvBlockTraits::NodeType *N = *PI;
+      if (!contains(N)) {     // If the block is not in the loop...
+        if (Out && Out != N)
+          return 0;             // Multiple predecessors outside the loop
+        Out = N;
+      }
+    }
+
+    // Make sure there is only one exit out of the preheader.
+    assert(Out && "Header of loop has no predecessors from outside loop?");
+    return Out;
+  }
+
+  /// getLoopLatch - If there is a single latch block for this loop, return it.
+  /// A latch block is a block that contains a branch back to the header.
+  BlockT *getLoopLatch() const {
+    BlockT *Header = getHeader();
+    typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
+    typename InvBlockTraits::ChildIteratorType PI =
+                                            InvBlockTraits::child_begin(Header);
+    typename InvBlockTraits::ChildIteratorType PE =
+                                              InvBlockTraits::child_end(Header);
+    BlockT *Latch = 0;
+    for (; PI != PE; ++PI) {
+      typename InvBlockTraits::NodeType *N = *PI;
+      if (contains(N)) {
+        if (Latch) return 0;
+        Latch = N;
+      }
+    }
+
+    return Latch;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // APIs for updating loop information after changing the CFG
+  //
+
+  /// addBasicBlockToLoop - This method is used by other analyses to update loop
+  /// information.  NewBB is set to be a new member of the current loop.
+  /// Because of this, it is added as a member of all parent loops, and is added
+  /// to the specified LoopInfo object as being in the current basic block.  It
+  /// is not valid to replace the loop header with this method.
+  ///
+  void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LI);
+
+  /// replaceChildLoopWith - This is used when splitting loops up.  It replaces
+  /// the OldChild entry in our children list with NewChild, and updates the
+  /// parent pointer of OldChild to be null and the NewChild to be this loop.
+  /// This updates the loop depth of the new child.
+  void replaceChildLoopWith(LoopT *OldChild,
+                            LoopT *NewChild) {
+    assert(OldChild->ParentLoop == this && "This loop is already broken!");
+    assert(NewChild->ParentLoop == 0 && "NewChild already has a parent!");
+    typename std::vector<LoopT *>::iterator I =
+                          std::find(SubLoops.begin(), SubLoops.end(), OldChild);
+    assert(I != SubLoops.end() && "OldChild not in loop!");
+    *I = NewChild;
+    OldChild->ParentLoop = 0;
+    NewChild->ParentLoop = static_cast<LoopT *>(this);
+  }
+
+  /// addChildLoop - Add the specified loop to be a child of this loop.  This
+  /// updates the loop depth of the new child.
+  ///
+  void addChildLoop(LoopT *NewChild) {
+    assert(NewChild->ParentLoop == 0 && "NewChild already has a parent!");
+    NewChild->ParentLoop = static_cast<LoopT *>(this);
+    SubLoops.push_back(NewChild);
+  }
+
+  /// removeChildLoop - This removes the specified child from being a subloop of
+  /// this loop.  The loop is not deleted, as it will presumably be inserted
+  /// into another loop.
+  LoopT *removeChildLoop(iterator I) {
+    assert(I != SubLoops.end() && "Cannot remove end iterator!");
+    LoopT *Child = *I;
+    assert(Child->ParentLoop == this && "Child is not a child of this loop!");
+    SubLoops.erase(SubLoops.begin()+(I-begin()));
+    Child->ParentLoop = 0;
+    return Child;
+  }
+
+  /// addBlockEntry - This adds a basic block directly to the basic block list.
+  /// This should only be used by transformations that create new loops.  Other
+  /// transformations should use addBasicBlockToLoop.
+  void addBlockEntry(BlockT *BB) {
+    Blocks.push_back(BB);
+  }
+
+  /// moveToHeader - This method is used to move BB (which must be part of this
+  /// loop) to be the loop header of the loop (the block that dominates all
+  /// others).
+  void moveToHeader(BlockT *BB) {
+    if (Blocks[0] == BB) return;
+    for (unsigned i = 0; ; ++i) {
+      assert(i != Blocks.size() && "Loop does not contain BB!");
+      if (Blocks[i] == BB) {
+        Blocks[i] = Blocks[0];
+        Blocks[0] = BB;
+        return;
+      }
+    }
+  }
+
+  /// removeBlockFromLoop - This removes the specified basic block from the
+  /// current loop, updating the Blocks as appropriate.  This does not update
+  /// the mapping in the LoopInfo class.
+  void removeBlockFromLoop(BlockT *BB) {
+    RemoveFromVector(Blocks, BB);
+  }
+
+  /// verifyLoop - Verify loop structure
+  void verifyLoop() const {
+#ifndef NDEBUG
+    assert(!Blocks.empty() && "Loop header is missing");
+
+    // Sort the blocks vector so that we can use binary search to do quick
+    // lookups.
+    SmallVector<BlockT*, 128> LoopBBs(block_begin(), block_end());
+    std::sort(LoopBBs.begin(), LoopBBs.end());
+
+    // Check the individual blocks.
+    for (block_iterator I = block_begin(), E = block_end(); I != E; ++I) {
+      BlockT *BB = *I;
+      bool HasInsideLoopSuccs = false;
+      bool HasInsideLoopPreds = false;
+      SmallVector<BlockT *, 2> OutsideLoopPreds;
+
+      typedef GraphTraits<BlockT*> BlockTraits;
+      for (typename BlockTraits::ChildIteratorType SI =
+           BlockTraits::child_begin(BB), SE = BlockTraits::child_end(BB);
+           SI != SE; ++SI)
+        if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *SI)) {
+          HasInsideLoopSuccs = true;
+          break;
+        }
+      typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
+      for (typename InvBlockTraits::ChildIteratorType PI =
+           InvBlockTraits::child_begin(BB), PE = InvBlockTraits::child_end(BB);
+           PI != PE; ++PI) {
+        typename InvBlockTraits::NodeType *N = *PI;
+        if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), N))
+          HasInsideLoopPreds = true;
+        else
+          OutsideLoopPreds.push_back(N);
+      }
+
+      if (BB == getHeader()) {
+        assert(!OutsideLoopPreds.empty() && "Loop is unreachable!");
+      } else if (!OutsideLoopPreds.empty()) {
+        // A non-header loop shouldn't be reachable from outside the loop,
+        // though it is permitted if the predecessor is not itself actually
+        // reachable.
+        BlockT *EntryBB = BB->getParent()->begin();
+        for (df_iterator<BlockT *> NI = df_begin(EntryBB),
+             NE = df_end(EntryBB); NI != NE; ++NI)
+          for (unsigned i = 0, e = OutsideLoopPreds.size(); i != e; ++i)
+            assert(*NI != OutsideLoopPreds[i] &&
+                   "Loop has multiple entry points!");
+      }
+      assert(HasInsideLoopPreds && "Loop block has no in-loop predecessors!");
+      assert(HasInsideLoopSuccs && "Loop block has no in-loop successors!");
+      assert(BB != getHeader()->getParent()->begin() &&
+             "Loop contains function entry block!");
+    }
+
+    // Check the subloops.
+    for (iterator I = begin(), E = end(); I != E; ++I)
+      // Each block in each subloop should be contained within this loop.
+      for (block_iterator BI = (*I)->block_begin(), BE = (*I)->block_end();
+           BI != BE; ++BI) {
+        assert(std::binary_search(LoopBBs.begin(), LoopBBs.end(), *BI) &&
+               "Loop does not contain all the blocks of a subloop!");
+      }
+
+    // Check the parent loop pointer.
+    if (ParentLoop) {
+      assert(std::find(ParentLoop->begin(), ParentLoop->end(), this) !=
+               ParentLoop->end() &&
+             "Loop is not a subloop of its parent!");
+    }
+#endif
+  }
+
+  /// verifyLoop - Verify loop structure of this loop and all nested loops.
+  void verifyLoopNest() const {
+    // Verify this loop.
+    verifyLoop();
+    // Verify the subloops.
+    for (iterator I = begin(), E = end(); I != E; ++I)
+      (*I)->verifyLoopNest();
+  }
+
+  void print(raw_ostream &OS, unsigned Depth = 0) const {
+    OS.indent(Depth*2) << "Loop at depth " << getLoopDepth()
+       << " containing: ";
+
+    for (unsigned i = 0; i < getBlocks().size(); ++i) {
+      if (i) OS << ",";
+      BlockT *BB = getBlocks()[i];
+      WriteAsOperand(OS, BB, false);
+      if (BB == getHeader())    OS << "<header>";
+      if (BB == getLoopLatch()) OS << "<latch>";
+      if (isLoopExiting(BB))    OS << "<exiting>";
+    }
+    OS << "\n";
+
+    for (iterator I = begin(), E = end(); I != E; ++I)
+      (*I)->print(OS, Depth+2);
+  }
+
+protected:
+  friend class LoopInfoBase<BlockT, LoopT>;
+  explicit LoopBase(BlockT *BB) : ParentLoop(0) {
+    Blocks.push_back(BB);
+  }
+};
+
+template<class BlockT, class LoopT>
+raw_ostream& operator<<(raw_ostream &OS, const LoopBase<BlockT, LoopT> &Loop) {
+  Loop.print(OS);
+  return OS;
+}
+
+class Loop : public LoopBase<BasicBlock, Loop> {
+public:
+  Loop() {}
+
+  /// isLoopInvariant - Return true if the specified value is loop invariant
+  ///
+  bool isLoopInvariant(Value *V) const;
+
+  /// hasLoopInvariantOperands - Return true if all the operands of the
+  /// specified instruction are loop invariant. 
+  bool hasLoopInvariantOperands(Instruction *I) const;
+
+  /// makeLoopInvariant - If the given value is an instruction inside of the
+  /// loop and it can be hoisted, do so to make it trivially loop-invariant.
+  /// Return true if the value after any hoisting is loop invariant. This
+  /// function can be used as a slightly more aggressive replacement for
+  /// isLoopInvariant.
+  ///
+  /// If InsertPt is specified, it is the point to hoist instructions to.
+  /// If null, the terminator of the loop preheader is used.
+  ///
+  bool makeLoopInvariant(Value *V, bool &Changed,
+                         Instruction *InsertPt = 0) const;
+
+  /// makeLoopInvariant - If the given instruction is inside of the
+  /// loop and it can be hoisted, do so to make it trivially loop-invariant.
+  /// Return true if the instruction after any hoisting is loop invariant. This
+  /// function can be used as a slightly more aggressive replacement for
+  /// isLoopInvariant.
+  ///
+  /// If InsertPt is specified, it is the point to hoist instructions to.
+  /// If null, the terminator of the loop preheader is used.
+  ///
+  bool makeLoopInvariant(Instruction *I, bool &Changed,
+                         Instruction *InsertPt = 0) const;
+
+  /// getCanonicalInductionVariable - Check to see if the loop has a canonical
+  /// induction variable: an integer recurrence that starts at 0 and increments
+  /// by one each time through the loop.  If so, return the phi node that
+  /// corresponds to it.
+  ///
+  /// The IndVarSimplify pass transforms loops to have a canonical induction
+  /// variable.
+  ///
+  PHINode *getCanonicalInductionVariable() const;
+
+  /// getTripCount - Return a loop-invariant LLVM value indicating the number of
+  /// times the loop will be executed.  Note that this means that the backedge
+  /// of the loop executes N-1 times.  If the trip-count cannot be determined,
+  /// this returns null.
+  ///
+  /// The IndVarSimplify pass transforms loops to have a form that this
+  /// function easily understands.
+  ///
+  Value *getTripCount() const;
+
+  /// getSmallConstantTripCount - Returns the trip count of this loop as a
+  /// normal unsigned value, if possible. Returns 0 if the trip count is unknown
+  /// of not constant. Will also return 0 if the trip count is very large
+  /// (>= 2^32)
+  ///
+  /// The IndVarSimplify pass transforms loops to have a form that this
+  /// function easily understands.
+  ///
+  unsigned getSmallConstantTripCount() const;
+
+  /// getSmallConstantTripMultiple - Returns the largest constant divisor of the
+  /// trip count of this loop as a normal unsigned value, if possible. This
+  /// means that the actual trip count is always a multiple of the returned
+  /// value (don't forget the trip count could very well be zero as well!).
+  ///
+  /// Returns 1 if the trip count is unknown or not guaranteed to be the
+  /// multiple of a constant (which is also the case if the trip count is simply
+  /// constant, use getSmallConstantTripCount for that case), Will also return 1
+  /// if the trip count is very large (>= 2^32).
+  unsigned getSmallConstantTripMultiple() const;
+
+  /// isLCSSAForm - Return true if the Loop is in LCSSA form
+  bool isLCSSAForm(DominatorTree &DT) const;
+
+  /// isLoopSimplifyForm - Return true if the Loop is in the form that
+  /// the LoopSimplify form transforms loops to, which is sometimes called
+  /// normal form.
+  bool isLoopSimplifyForm() const;
+
+  /// hasDedicatedExits - Return true if no exit block for the loop
+  /// has a predecessor that is outside the loop.
+  bool hasDedicatedExits() const;
+
+  /// getUniqueExitBlocks - Return all unique successor blocks of this loop. 
+  /// These are the blocks _outside of the current loop_ which are branched to.
+  /// This assumes that loop exits are in canonical form.
+  ///
+  void getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const;
+
+  /// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one
+  /// block, return that block. Otherwise return null.
+  BasicBlock *getUniqueExitBlock() const;
+
+  void dump() const;
+  
+private:
+  friend class LoopInfoBase<BasicBlock, Loop>;
+  explicit Loop(BasicBlock *BB) : LoopBase<BasicBlock, Loop>(BB) {}
+};
+
+//===----------------------------------------------------------------------===//
+/// LoopInfo - This class builds and contains all of the top level loop
+/// structures in the specified function.
+///
+
+template<class BlockT, class LoopT>
+class LoopInfoBase {
+  // BBMap - Mapping of basic blocks to the inner most loop they occur in
+  DenseMap<BlockT *, LoopT *> BBMap;
+  std::vector<LoopT *> TopLevelLoops;
+  friend class LoopBase<BlockT, LoopT>;
+
+  void operator=(const LoopInfoBase &); // do not implement
+  LoopInfoBase(const LoopInfo &);       // do not implement
+public:
+  LoopInfoBase() { }
+  ~LoopInfoBase() { releaseMemory(); }
+  
+  void releaseMemory() {
+    for (typename std::vector<LoopT *>::iterator I =
+         TopLevelLoops.begin(), E = TopLevelLoops.end(); I != E; ++I)
+      delete *I;   // Delete all of the loops...
+
+    BBMap.clear();                           // Reset internal state of analysis
+    TopLevelLoops.clear();
+  }
+  
+  /// iterator/begin/end - The interface to the top-level loops in the current
+  /// function.
+  ///
+  typedef typename std::vector<LoopT *>::const_iterator iterator;
+  iterator begin() const { return TopLevelLoops.begin(); }
+  iterator end() const { return TopLevelLoops.end(); }
+  bool empty() const { return TopLevelLoops.empty(); }
+  
+  /// getLoopFor - Return the inner most loop that BB lives in.  If a basic
+  /// block is in no loop (for example the entry node), null is returned.
+  ///
+  LoopT *getLoopFor(const BlockT *BB) const {
+    typename DenseMap<BlockT *, LoopT *>::const_iterator I=
+      BBMap.find(const_cast<BlockT*>(BB));
+    return I != BBMap.end() ? I->second : 0;
+  }
+  
+  /// operator[] - same as getLoopFor...
+  ///
+  const LoopT *operator[](const BlockT *BB) const {
+    return getLoopFor(BB);
+  }
+  
+  /// getLoopDepth - Return the loop nesting level of the specified block.  A
+  /// depth of 0 means the block is not inside any loop.
+  ///
+  unsigned getLoopDepth(const BlockT *BB) const {
+    const LoopT *L = getLoopFor(BB);
+    return L ? L->getLoopDepth() : 0;
+  }
+
+  // isLoopHeader - True if the block is a loop header node
+  bool isLoopHeader(BlockT *BB) const {
+    const LoopT *L = getLoopFor(BB);
+    return L && L->getHeader() == BB;
+  }
+  
+  /// removeLoop - This removes the specified top-level loop from this loop info
+  /// object.  The loop is not deleted, as it will presumably be inserted into
+  /// another loop.
+  LoopT *removeLoop(iterator I) {
+    assert(I != end() && "Cannot remove end iterator!");
+    LoopT *L = *I;
+    assert(L->getParentLoop() == 0 && "Not a top-level loop!");
+    TopLevelLoops.erase(TopLevelLoops.begin() + (I-begin()));
+    return L;
+  }
+  
+  /// changeLoopFor - Change the top-level loop that contains BB to the
+  /// specified loop.  This should be used by transformations that restructure
+  /// the loop hierarchy tree.
+  void changeLoopFor(BlockT *BB, LoopT *L) {
+    LoopT *&OldLoop = BBMap[BB];
+    assert(OldLoop && "Block not in a loop yet!");
+    OldLoop = L;
+  }
+  
+  /// changeTopLevelLoop - Replace the specified loop in the top-level loops
+  /// list with the indicated loop.
+  void changeTopLevelLoop(LoopT *OldLoop,
+                          LoopT *NewLoop) {
+    typename std::vector<LoopT *>::iterator I =
+                 std::find(TopLevelLoops.begin(), TopLevelLoops.end(), OldLoop);
+    assert(I != TopLevelLoops.end() && "Old loop not at top level!");
+    *I = NewLoop;
+    assert(NewLoop->ParentLoop == 0 && OldLoop->ParentLoop == 0 &&
+           "Loops already embedded into a subloop!");
+  }
+  
+  /// addTopLevelLoop - This adds the specified loop to the collection of
+  /// top-level loops.
+  void addTopLevelLoop(LoopT *New) {
+    assert(New->getParentLoop() == 0 && "Loop already in subloop!");
+    TopLevelLoops.push_back(New);
+  }
+  
+  /// removeBlock - This method completely removes BB from all data structures,
+  /// including all of the Loop objects it is nested in and our mapping from
+  /// BasicBlocks to loops.
+  void removeBlock(BlockT *BB) {
+    typename DenseMap<BlockT *, LoopT *>::iterator I = BBMap.find(BB);
+    if (I != BBMap.end()) {
+      for (LoopT *L = I->second; L; L = L->getParentLoop())
+        L->removeBlockFromLoop(BB);
+
+      BBMap.erase(I);
+    }
+  }
+  
+  // Internals
+  
+  static bool isNotAlreadyContainedIn(const LoopT *SubLoop,
+                                      const LoopT *ParentLoop) {
+    if (SubLoop == 0) return true;
+    if (SubLoop == ParentLoop) return false;
+    return isNotAlreadyContainedIn(SubLoop->getParentLoop(), ParentLoop);
+  }
+  
+  void Calculate(DominatorTreeBase<BlockT> &DT) {
+    BlockT *RootNode = DT.getRootNode()->getBlock();
+
+    for (df_iterator<BlockT*> NI = df_begin(RootNode),
+           NE = df_end(RootNode); NI != NE; ++NI)
+      if (LoopT *L = ConsiderForLoop(*NI, DT))
+        TopLevelLoops.push_back(L);
+  }
+  
+  LoopT *ConsiderForLoop(BlockT *BB, DominatorTreeBase<BlockT> &DT) {
+    if (BBMap.find(BB) != BBMap.end()) return 0;// Haven't processed this node?
+
+    std::vector<BlockT *> TodoStack;
+
+    // Scan the predecessors of BB, checking to see if BB dominates any of
+    // them.  This identifies backedges which target this node...
+    typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
+    for (typename InvBlockTraits::ChildIteratorType I =
+         InvBlockTraits::child_begin(BB), E = InvBlockTraits::child_end(BB);
+         I != E; ++I) {
+      typename InvBlockTraits::NodeType *N = *I;
+      if (DT.dominates(BB, N))   // If BB dominates its predecessor...
+          TodoStack.push_back(N);
+    }
+
+    if (TodoStack.empty()) return 0;  // No backedges to this block...
+
+    // Create a new loop to represent this basic block...
+    LoopT *L = new LoopT(BB);
+    BBMap[BB] = L;
+
+    BlockT *EntryBlock = BB->getParent()->begin();
+
+    while (!TodoStack.empty()) {  // Process all the nodes in the loop
+      BlockT *X = TodoStack.back();
+      TodoStack.pop_back();
+
+      if (!L->contains(X) &&         // As of yet unprocessed??
+          DT.dominates(EntryBlock, X)) {   // X is reachable from entry block?
+        // Check to see if this block already belongs to a loop.  If this occurs
+        // then we have a case where a loop that is supposed to be a child of
+        // the current loop was processed before the current loop.  When this
+        // occurs, this child loop gets added to a part of the current loop,
+        // making it a sibling to the current loop.  We have to reparent this
+        // loop.
+        if (LoopT *SubLoop =
+            const_cast<LoopT *>(getLoopFor(X)))
+          if (SubLoop->getHeader() == X && isNotAlreadyContainedIn(SubLoop, L)){
+            // Remove the subloop from its current parent...
+            assert(SubLoop->ParentLoop && SubLoop->ParentLoop != L);
+            LoopT *SLP = SubLoop->ParentLoop;  // SubLoopParent
+            typename std::vector<LoopT *>::iterator I =
+              std::find(SLP->SubLoops.begin(), SLP->SubLoops.end(), SubLoop);
+            assert(I != SLP->SubLoops.end() &&"SubLoop not a child of parent?");
+            SLP->SubLoops.erase(I);   // Remove from parent...
+
+            // Add the subloop to THIS loop...
+            SubLoop->ParentLoop = L;
+            L->SubLoops.push_back(SubLoop);
+          }
+
+        // Normal case, add the block to our loop...
+        L->Blocks.push_back(X);
+        
+        typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
+        
+        // Add all of the predecessors of X to the end of the work stack...
+        TodoStack.insert(TodoStack.end(), InvBlockTraits::child_begin(X),
+                         InvBlockTraits::child_end(X));
+      }
+    }
+
+    // If there are any loops nested within this loop, create them now!
+    for (typename std::vector<BlockT*>::iterator I = L->Blocks.begin(),
+         E = L->Blocks.end(); I != E; ++I)
+      if (LoopT *NewLoop = ConsiderForLoop(*I, DT)) {
+        L->SubLoops.push_back(NewLoop);
+        NewLoop->ParentLoop = L;
+      }
+
+    // Add the basic blocks that comprise this loop to the BBMap so that this
+    // loop can be found for them.
+    //
+    for (typename std::vector<BlockT*>::iterator I = L->Blocks.begin(),
+           E = L->Blocks.end(); I != E; ++I)
+      BBMap.insert(std::make_pair(*I, L));
+
+    // Now that we have a list of all of the child loops of this loop, check to
+    // see if any of them should actually be nested inside of each other.  We
+    // can accidentally pull loops our of their parents, so we must make sure to
+    // organize the loop nests correctly now.
+    {
+      std::map<BlockT *, LoopT *> ContainingLoops;
+      for (unsigned i = 0; i != L->SubLoops.size(); ++i) {
+        LoopT *Child = L->SubLoops[i];
+        assert(Child->getParentLoop() == L && "Not proper child loop?");
+
+        if (LoopT *ContainingLoop = ContainingLoops[Child->getHeader()]) {
+          // If there is already a loop which contains this loop, move this loop
+          // into the containing loop.
+          MoveSiblingLoopInto(Child, ContainingLoop);
+          --i;  // The loop got removed from the SubLoops list.
+        } else {
+          // This is currently considered to be a top-level loop.  Check to see
+          // if any of the contained blocks are loop headers for subloops we
+          // have already processed.
+          for (unsigned b = 0, e = Child->Blocks.size(); b != e; ++b) {
+            LoopT *&BlockLoop = ContainingLoops[Child->Blocks[b]];
+            if (BlockLoop == 0) {   // Child block not processed yet...
+              BlockLoop = Child;
+            } else if (BlockLoop != Child) {
+              LoopT *SubLoop = BlockLoop;
+              // Reparent all of the blocks which used to belong to BlockLoops
+              for (unsigned j = 0, f = SubLoop->Blocks.size(); j != f; ++j)
+                ContainingLoops[SubLoop->Blocks[j]] = Child;
+
+              // There is already a loop which contains this block, that means
+              // that we should reparent the loop which the block is currently
+              // considered to belong to to be a child of this loop.
+              MoveSiblingLoopInto(SubLoop, Child);
+              --i;  // We just shrunk the SubLoops list.
+            }
+          }
+        }
+      }
+    }
+
+    return L;
+  }
+  
+  /// MoveSiblingLoopInto - This method moves the NewChild loop to live inside
+  /// of the NewParent Loop, instead of being a sibling of it.
+  void MoveSiblingLoopInto(LoopT *NewChild,
+                           LoopT *NewParent) {
+    LoopT *OldParent = NewChild->getParentLoop();
+    assert(OldParent && OldParent == NewParent->getParentLoop() &&
+           NewChild != NewParent && "Not sibling loops!");
+
+    // Remove NewChild from being a child of OldParent
+    typename std::vector<LoopT *>::iterator I =
+      std::find(OldParent->SubLoops.begin(), OldParent->SubLoops.end(),
+                NewChild);
+    assert(I != OldParent->SubLoops.end() && "Parent fields incorrect??");
+    OldParent->SubLoops.erase(I);   // Remove from parent's subloops list
+    NewChild->ParentLoop = 0;
+
+    InsertLoopInto(NewChild, NewParent);
+  }
+  
+  /// InsertLoopInto - This inserts loop L into the specified parent loop.  If
+  /// the parent loop contains a loop which should contain L, the loop gets
+  /// inserted into L instead.
+  void InsertLoopInto(LoopT *L, LoopT *Parent) {
+    BlockT *LHeader = L->getHeader();
+    assert(Parent->contains(LHeader) &&
+           "This loop should not be inserted here!");
+
+    // Check to see if it belongs in a child loop...
+    for (unsigned i = 0, e = static_cast<unsigned>(Parent->SubLoops.size());
+         i != e; ++i)
+      if (Parent->SubLoops[i]->contains(LHeader)) {
+        InsertLoopInto(L, Parent->SubLoops[i]);
+        return;
+      }
+
+    // If not, insert it here!
+    Parent->SubLoops.push_back(L);
+    L->ParentLoop = Parent;
+  }
+  
+  // Debugging
+  
+  void print(raw_ostream &OS) const {
+    for (unsigned i = 0; i < TopLevelLoops.size(); ++i)
+      TopLevelLoops[i]->print(OS);
+  #if 0
+    for (DenseMap<BasicBlock*, LoopT*>::const_iterator I = BBMap.begin(),
+           E = BBMap.end(); I != E; ++I)
+      OS << "BB '" << I->first->getName() << "' level = "
+         << I->second->getLoopDepth() << "\n";
+  #endif
+  }
+};
+
+class LoopInfo : public FunctionPass {
+  LoopInfoBase<BasicBlock, Loop> LI;
+  friend class LoopBase<BasicBlock, Loop>;
+
+  void operator=(const LoopInfo &); // do not implement
+  LoopInfo(const LoopInfo &);       // do not implement
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  LoopInfo() : FunctionPass(ID) {
+    initializeLoopInfoPass(*PassRegistry::getPassRegistry());
+  }
+
+  LoopInfoBase<BasicBlock, Loop>& getBase() { return LI; }
+
+  /// iterator/begin/end - The interface to the top-level loops in the current
+  /// function.
+  ///
+  typedef LoopInfoBase<BasicBlock, Loop>::iterator iterator;
+  inline iterator begin() const { return LI.begin(); }
+  inline iterator end() const { return LI.end(); }
+  bool empty() const { return LI.empty(); }
+
+  /// getLoopFor - Return the inner most loop that BB lives in.  If a basic
+  /// block is in no loop (for example the entry node), null is returned.
+  ///
+  inline Loop *getLoopFor(const BasicBlock *BB) const {
+    return LI.getLoopFor(BB);
+  }
+
+  /// operator[] - same as getLoopFor...
+  ///
+  inline const Loop *operator[](const BasicBlock *BB) const {
+    return LI.getLoopFor(BB);
+  }
+
+  /// getLoopDepth - Return the loop nesting level of the specified block.  A
+  /// depth of 0 means the block is not inside any loop.
+  ///
+  inline unsigned getLoopDepth(const BasicBlock *BB) const {
+    return LI.getLoopDepth(BB);
+  }
+
+  // isLoopHeader - True if the block is a loop header node
+  inline bool isLoopHeader(BasicBlock *BB) const {
+    return LI.isLoopHeader(BB);
+  }
+
+  /// runOnFunction - Calculate the natural loop information.
+  ///
+  virtual bool runOnFunction(Function &F);
+
+  virtual void verifyAnalysis() const;
+
+  virtual void releaseMemory() { LI.releaseMemory(); }
+
+  virtual void print(raw_ostream &O, const Module* M = 0) const;
+  
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  /// removeLoop - This removes the specified top-level loop from this loop info
+  /// object.  The loop is not deleted, as it will presumably be inserted into
+  /// another loop.
+  inline Loop *removeLoop(iterator I) { return LI.removeLoop(I); }
+
+  /// changeLoopFor - Change the top-level loop that contains BB to the
+  /// specified loop.  This should be used by transformations that restructure
+  /// the loop hierarchy tree.
+  inline void changeLoopFor(BasicBlock *BB, Loop *L) {
+    LI.changeLoopFor(BB, L);
+  }
+
+  /// changeTopLevelLoop - Replace the specified loop in the top-level loops
+  /// list with the indicated loop.
+  inline void changeTopLevelLoop(Loop *OldLoop, Loop *NewLoop) {
+    LI.changeTopLevelLoop(OldLoop, NewLoop);
+  }
+
+  /// addTopLevelLoop - This adds the specified loop to the collection of
+  /// top-level loops.
+  inline void addTopLevelLoop(Loop *New) {
+    LI.addTopLevelLoop(New);
+  }
+
+  /// removeBlock - This method completely removes BB from all data structures,
+  /// including all of the Loop objects it is nested in and our mapping from
+  /// BasicBlocks to loops.
+  void removeBlock(BasicBlock *BB) {
+    LI.removeBlock(BB);
+  }
+
+  /// replacementPreservesLCSSAForm - Returns true if replacing From with To
+  /// everywhere is guaranteed to preserve LCSSA form.
+  bool replacementPreservesLCSSAForm(Instruction *From, Value *To) {
+    // Preserving LCSSA form is only problematic if the replacing value is an
+    // instruction.
+    Instruction *I = dyn_cast<Instruction>(To);
+    if (!I) return true;
+    // If both instructions are defined in the same basic block then replacement
+    // cannot break LCSSA form.
+    if (I->getParent() == From->getParent())
+      return true;
+    // If the instruction is not defined in a loop then it can safely replace
+    // anything.
+    Loop *ToLoop = getLoopFor(I->getParent());
+    if (!ToLoop) return true;
+    // If the replacing instruction is defined in the same loop as the original
+    // instruction, or in a loop that contains it as an inner loop, then using
+    // it as a replacement will not break LCSSA form.
+    return ToLoop->contains(getLoopFor(From->getParent()));
+  }
+};
+
+
+// Allow clients to walk the list of nested loops...
+template <> struct GraphTraits<const Loop*> {
+  typedef const Loop NodeType;
+  typedef LoopInfo::iterator ChildIteratorType;
+
+  static NodeType *getEntryNode(const Loop *L) { return L; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->begin();
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->end();
+  }
+};
+
+template <> struct GraphTraits<Loop*> {
+  typedef Loop NodeType;
+  typedef LoopInfo::iterator ChildIteratorType;
+
+  static NodeType *getEntryNode(Loop *L) { return L; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->begin();
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->end();
+  }
+};
+
+template<class BlockT, class LoopT>
+void
+LoopBase<BlockT, LoopT>::addBasicBlockToLoop(BlockT *NewBB,
+                                             LoopInfoBase<BlockT, LoopT> &LIB) {
+  assert((Blocks.empty() || LIB[getHeader()] == this) &&
+         "Incorrect LI specified for this loop!");
+  assert(NewBB && "Cannot add a null basic block to the loop!");
+  assert(LIB[NewBB] == 0 && "BasicBlock already in the loop!");
+
+  LoopT *L = static_cast<LoopT *>(this);
+
+  // Add the loop mapping to the LoopInfo object...
+  LIB.BBMap[NewBB] = L;
+
+  // Add the basic block to this loop and all parent loops...
+  while (L) {
+    L->Blocks.push_back(NewBB);
+    L = L->getParentLoop();
+  }
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/LoopPass.h b/final/include/llvm/Analysis/LoopPass.h
new file mode 100644
index 00000000000..1603d2ea7a4
--- /dev/null
+++ b/final/include/llvm/Analysis/LoopPass.h
@@ -0,0 +1,158 @@
+//===- LoopPass.h - LoopPass class ----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines LoopPass class. All loop optimization
+// and transformation passes are derived from LoopPass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LOOP_PASS_H
+#define LLVM_LOOP_PASS_H
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManagers.h"
+#include "llvm/Function.h"
+#include <deque>
+
+namespace llvm {
+
+class LPPassManager;
+class Function;
+class PMStack;
+
+class LoopPass : public Pass {
+public:
+  explicit LoopPass(char &pid) : Pass(PT_Loop, pid) {}
+
+  /// getPrinterPass - Get a pass to print the function corresponding
+  /// to a Loop.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
+
+  // runOnLoop - This method should be implemented by the subclass to perform
+  // whatever action is necessary for the specified Loop.
+  virtual bool runOnLoop(Loop *L, LPPassManager &LPM) = 0;
+
+  // Initialization and finalization hooks.
+  virtual bool doInitialization(Loop *L, LPPassManager &LPM) {
+    return false;
+  }
+
+  // Finalization hook does not supply Loop because at this time
+  // loop nest is completely different.
+  virtual bool doFinalization() { return false; }
+
+  // Check if this pass is suitable for the current LPPassManager, if
+  // available. This pass P is not suitable for a LPPassManager if P
+  // is not preserving higher level analysis info used by other
+  // LPPassManager passes. In such case, pop LPPassManager from the
+  // stack. This will force assignPassManager() to create new
+  // LPPassManger as expected.
+  void preparePassManager(PMStack &PMS);
+
+  /// Assign pass manager to manage this pass
+  virtual void assignPassManager(PMStack &PMS,
+                                 PassManagerType PMT);
+
+  ///  Return what kind of Pass Manager can manage this pass.
+  virtual PassManagerType getPotentialPassManagerType() const {
+    return PMT_LoopPassManager;
+  }
+
+  //===--------------------------------------------------------------------===//
+  /// SimpleAnalysis - Provides simple interface to update analysis info
+  /// maintained by various passes. Note, if required this interface can
+  /// be extracted into a separate abstract class but it would require
+  /// additional use of multiple inheritance in Pass class hierarchy, something
+  /// we are trying to avoid.
+
+  /// Each loop pass can override these simple analysis hooks to update
+  /// desired analysis information.
+  /// cloneBasicBlockAnalysis - Clone analysis info associated with basic block.
+  virtual void cloneBasicBlockAnalysis(BasicBlock *F, BasicBlock *T, Loop *L) {}
+
+  /// deleteAnalysisValue - Delete analysis info associated with value V.
+  virtual void deleteAnalysisValue(Value *V, Loop *L) {}
+};
+
+class LPPassManager : public FunctionPass, public PMDataManager {
+public:
+  static char ID;
+  explicit LPPassManager(int Depth);
+
+  /// run - Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the module, and if so, return true.
+  bool runOnFunction(Function &F);
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  // LPPassManager needs LoopInfo.
+  void getAnalysisUsage(AnalysisUsage &Info) const;
+
+  virtual const char *getPassName() const {
+    return "Loop Pass Manager";
+  }
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+
+  /// Print passes managed by this manager
+  void dumpPassStructure(unsigned Offset);
+
+  LoopPass *getContainedPass(unsigned N) {
+    assert(N < PassVector.size() && "Pass number out of range!");
+    LoopPass *LP = static_cast<LoopPass *>(PassVector[N]);
+    return LP;
+  }
+
+  virtual PassManagerType getPassManagerType() const {
+    return PMT_LoopPassManager;
+  }
+
+public:
+  // Delete loop from the loop queue and loop nest (LoopInfo).
+  void deleteLoopFromQueue(Loop *L);
+
+  // Insert loop into the loop queue and add it as a child of the
+  // given parent.
+  void insertLoop(Loop *L, Loop *ParentLoop);
+
+  // Insert a loop into the loop queue.
+  void insertLoopIntoQueue(Loop *L);
+
+  // Reoptimize this loop. LPPassManager will re-insert this loop into the
+  // queue. This allows LoopPass to change loop nest for the loop. This
+  // utility may send LPPassManager into infinite loops so use caution.
+  void redoLoop(Loop *L);
+
+  //===--------------------------------------------------------------------===//
+  /// SimpleAnalysis - Provides simple interface to update analysis info
+  /// maintained by various passes. Note, if required this interface can
+  /// be extracted into a separate abstract class but it would require
+  /// additional use of multiple inheritance in Pass class hierarchy, something
+  /// we are trying to avoid.
+
+  /// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for
+  /// all passes that implement simple analysis interface.
+  void cloneBasicBlockSimpleAnalysis(BasicBlock *From, BasicBlock *To, Loop *L);
+
+  /// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes
+  /// that implement simple analysis interface.
+  void deleteSimpleAnalysisValue(Value *V, Loop *L);
+
+private:
+  std::deque<Loop *> LQ;
+  bool skipThisLoop;
+  bool redoThisLoop;
+  LoopInfo *LI;
+  Loop *CurrentLoop;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/MemoryBuiltins.h b/final/include/llvm/Analysis/MemoryBuiltins.h
new file mode 100644
index 00000000000..22493f6f8b9
--- /dev/null
+++ b/final/include/llvm/Analysis/MemoryBuiltins.h
@@ -0,0 +1,84 @@
+//===- llvm/Analysis/MemoryBuiltins.h- Calls to memory builtins -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions identifies calls to builtin functions that allocate
+// or free memory.  
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MEMORYBUILTINS_H
+#define LLVM_ANALYSIS_MEMORYBUILTINS_H
+
+namespace llvm {
+class CallInst;
+class PointerType;
+class TargetData;
+class Type;
+class Value;
+
+//===----------------------------------------------------------------------===//
+//  malloc Call Utility Functions.
+//
+
+/// isMalloc - Returns true if the value is either a malloc call or a bitcast of 
+/// the result of a malloc call
+bool isMalloc(const Value *I);
+
+/// extractMallocCall - Returns the corresponding CallInst if the instruction
+/// is a malloc call.  Since CallInst::CreateMalloc() only creates calls, we
+/// ignore InvokeInst here.
+const CallInst *extractMallocCall(const Value *I);
+CallInst *extractMallocCall(Value *I);
+
+/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the
+/// instruction is a bitcast of the result of a malloc call.
+const CallInst *extractMallocCallFromBitCast(const Value *I);
+CallInst *extractMallocCallFromBitCast(Value *I);
+
+/// isArrayMalloc - Returns the corresponding CallInst if the instruction 
+/// is a call to malloc whose array size can be determined and the array size
+/// is not constant 1.  Otherwise, return NULL.
+const CallInst *isArrayMalloc(const Value *I, const TargetData *TD);
+
+/// getMallocType - Returns the PointerType resulting from the malloc call.
+/// The PointerType depends on the number of bitcast uses of the malloc call:
+///   0: PointerType is the malloc calls' return type.
+///   1: PointerType is the bitcast's result type.
+///  >1: Unique PointerType cannot be determined, return NULL.
+const PointerType *getMallocType(const CallInst *CI);
+
+/// getMallocAllocatedType - Returns the Type allocated by malloc call.
+/// The Type depends on the number of bitcast uses of the malloc call:
+///   0: PointerType is the malloc calls' return type.
+///   1: PointerType is the bitcast's result type.
+///  >1: Unique PointerType cannot be determined, return NULL.
+const Type *getMallocAllocatedType(const CallInst *CI);
+
+/// getMallocArraySize - Returns the array size of a malloc call.  If the 
+/// argument passed to malloc is a multiple of the size of the malloced type,
+/// then return that multiple.  For non-array mallocs, the multiple is
+/// constant 1.  Otherwise, return NULL for mallocs whose array size cannot be
+/// determined.
+Value *getMallocArraySize(CallInst *CI, const TargetData *TD,
+                          bool LookThroughSExt = false);
+                          
+//===----------------------------------------------------------------------===//
+//  free Call Utility Functions.
+//
+
+/// isFreeCall - Returns non-null if the value is a call to the builtin free()
+const CallInst *isFreeCall(const Value *I);
+  
+static inline CallInst *isFreeCall(Value *I) {
+  return const_cast<CallInst*>(isFreeCall((const Value*)I));
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/MemoryDependenceAnalysis.h b/final/include/llvm/Analysis/MemoryDependenceAnalysis.h
new file mode 100644
index 00000000000..4d5dd1987f2
--- /dev/null
+++ b/final/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -0,0 +1,378 @@
+//===- llvm/Analysis/MemoryDependenceAnalysis.h - Memory Deps  --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MemoryDependenceAnalysis analysis pass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MEMORY_DEPENDENCE_H
+#define LLVM_ANALYSIS_MEMORY_DEPENDENCE_H
+
+#include "llvm/BasicBlock.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PointerIntPair.h"
+
+namespace llvm {
+  class Function;
+  class FunctionPass;
+  class Instruction;
+  class CallSite;
+  class AliasAnalysis;
+  class TargetData;
+  class MemoryDependenceAnalysis;
+  class PredIteratorCache;
+  class DominatorTree;
+  class PHITransAddr;
+  
+  /// MemDepResult - A memory dependence query can return one of three different
+  /// answers, described below.
+  class MemDepResult {
+    enum DepType {
+      /// Invalid - Clients of MemDep never see this.
+      Invalid = 0,
+      
+      /// Clobber - This is a dependence on the specified instruction which
+      /// clobbers the desired value.  The pointer member of the MemDepResult
+      /// pair holds the instruction that clobbers the memory.  For example,
+      /// this occurs when we see a may-aliased store to the memory location we
+      /// care about.
+      ///
+      /// A dependence query on the first instruction of the entry block will
+      /// return a clobber(self) result.
+      Clobber,
+
+      /// Def - This is a dependence on the specified instruction which
+      /// defines/produces the desired memory location.  The pointer member of
+      /// the MemDepResult pair holds the instruction that defines the memory.
+      /// Cases of interest:
+      ///   1. This could be a load or store for dependence queries on
+      ///      load/store.  The value loaded or stored is the produced value.
+      ///      Note that the pointer operand may be different than that of the
+      ///      queried pointer due to must aliases and phi translation.  Note
+      ///      that the def may not be the same type as the query, the pointers
+      ///      may just be must aliases.
+      ///   2. For loads and stores, this could be an allocation instruction. In
+      ///      this case, the load is loading an undef value or a store is the
+      ///      first store to (that part of) the allocation.
+      ///   3. Dependence queries on calls return Def only when they are
+      ///      readonly calls or memory use intrinsics with identical callees
+      ///      and no intervening clobbers.  No validation is done that the
+      ///      operands to the calls are the same.
+      Def,
+      
+      /// NonLocal - This marker indicates that the query has no dependency in
+      /// the specified block.  To find out more, the client should query other
+      /// predecessor blocks.
+      NonLocal
+    };
+    typedef PointerIntPair<Instruction*, 2, DepType> PairTy;
+    PairTy Value;
+    explicit MemDepResult(PairTy V) : Value(V) {}
+  public:
+    MemDepResult() : Value(0, Invalid) {}
+    
+    /// get methods: These are static ctor methods for creating various
+    /// MemDepResult kinds.
+    static MemDepResult getDef(Instruction *Inst) {
+      return MemDepResult(PairTy(Inst, Def));
+    }
+    static MemDepResult getClobber(Instruction *Inst) {
+      return MemDepResult(PairTy(Inst, Clobber));
+    }
+    static MemDepResult getNonLocal() {
+      return MemDepResult(PairTy(0, NonLocal));
+    }
+
+    /// isClobber - Return true if this MemDepResult represents a query that is
+    /// a instruction clobber dependency.
+    bool isClobber() const { return Value.getInt() == Clobber; }
+
+    /// isDef - Return true if this MemDepResult represents a query that is
+    /// a instruction definition dependency.
+    bool isDef() const { return Value.getInt() == Def; }
+    
+    /// isNonLocal - Return true if this MemDepResult represents a query that
+    /// is transparent to the start of the block, but where a non-local hasn't
+    /// been done.
+    bool isNonLocal() const { return Value.getInt() == NonLocal; }
+    
+    /// getInst() - If this is a normal dependency, return the instruction that
+    /// is depended on.  Otherwise, return null.
+    Instruction *getInst() const { return Value.getPointer(); }
+    
+    bool operator==(const MemDepResult &M) const { return Value == M.Value; }
+    bool operator!=(const MemDepResult &M) const { return Value != M.Value; }
+    bool operator<(const MemDepResult &M) const { return Value < M.Value; }
+    bool operator>(const MemDepResult &M) const { return Value > M.Value; }
+  private:
+    friend class MemoryDependenceAnalysis;
+    /// Dirty - Entries with this marker occur in a LocalDeps map or
+    /// NonLocalDeps map when the instruction they previously referenced was
+    /// removed from MemDep.  In either case, the entry may include an
+    /// instruction pointer.  If so, the pointer is an instruction in the
+    /// block where scanning can start from, saving some work.
+    ///
+    /// In a default-constructed MemDepResult object, the type will be Dirty
+    /// and the instruction pointer will be null.
+    ///
+         
+    /// isDirty - Return true if this is a MemDepResult in its dirty/invalid.
+    /// state.
+    bool isDirty() const { return Value.getInt() == Invalid; }
+    
+    static MemDepResult getDirty(Instruction *Inst) {
+      return MemDepResult(PairTy(Inst, Invalid));
+    }
+  };
+
+  /// NonLocalDepEntry - This is an entry in the NonLocalDepInfo cache.  For
+  /// each BasicBlock (the BB entry) it keeps a MemDepResult.
+  class NonLocalDepEntry {
+    BasicBlock *BB;
+    MemDepResult Result;
+  public:
+    NonLocalDepEntry(BasicBlock *bb, MemDepResult result)
+      : BB(bb), Result(result) {}
+
+    // This is used for searches.
+    NonLocalDepEntry(BasicBlock *bb) : BB(bb) {}
+
+    // BB is the sort key, it can't be changed.
+    BasicBlock *getBB() const { return BB; }
+    
+    void setResult(const MemDepResult &R) { Result = R; }
+
+    const MemDepResult &getResult() const { return Result; }
+    
+    bool operator<(const NonLocalDepEntry &RHS) const {
+      return BB < RHS.BB;
+    }
+  };
+  
+  /// NonLocalDepResult - This is a result from a NonLocal dependence query.
+  /// For each BasicBlock (the BB entry) it keeps a MemDepResult and the
+  /// (potentially phi translated) address that was live in the block.
+  class NonLocalDepResult {
+    NonLocalDepEntry Entry;
+    Value *Address;
+  public:
+    NonLocalDepResult(BasicBlock *bb, MemDepResult result, Value *address)
+      : Entry(bb, result), Address(address) {}
+    
+    // BB is the sort key, it can't be changed.
+    BasicBlock *getBB() const { return Entry.getBB(); }
+    
+    void setResult(const MemDepResult &R, Value *Addr) {
+      Entry.setResult(R);
+      Address = Addr;
+    }
+    
+    const MemDepResult &getResult() const { return Entry.getResult(); }
+    
+    /// getAddress - Return the address of this pointer in this block.  This can
+    /// be different than the address queried for the non-local result because
+    /// of phi translation.  This returns null if the address was not available
+    /// in a block (i.e. because phi translation failed) or if this is a cached
+    /// result and that address was deleted.
+    ///
+    /// The address is always null for a non-local 'call' dependence.
+    Value *getAddress() const { return Address; }
+  };
+  
+  /// MemoryDependenceAnalysis - This is an analysis that determines, for a
+  /// given memory operation, what preceding memory operations it depends on.
+  /// It builds on alias analysis information, and tries to provide a lazy,
+  /// caching interface to a common kind of alias information query.
+  ///
+  /// The dependency information returned is somewhat unusual, but is pragmatic.
+  /// If queried about a store or call that might modify memory, the analysis
+  /// will return the instruction[s] that may either load from that memory or
+  /// store to it.  If queried with a load or call that can never modify memory,
+  /// the analysis will return calls and stores that might modify the pointer,
+  /// but generally does not return loads unless a) they are volatile, or
+  /// b) they load from *must-aliased* pointers.  Returning a dependence on
+  /// must-alias'd pointers instead of all pointers interacts well with the
+  /// internal caching mechanism.
+  ///
+  class MemoryDependenceAnalysis : public FunctionPass {
+    // A map from instructions to their dependency.
+    typedef DenseMap<Instruction*, MemDepResult> LocalDepMapType;
+    LocalDepMapType LocalDeps;
+
+  public:
+    typedef std::vector<NonLocalDepEntry> NonLocalDepInfo;
+  private:
+    /// ValueIsLoadPair - This is a pair<Value*, bool> where the bool is true if
+    /// the dependence is a read only dependence, false if read/write.
+    typedef PointerIntPair<const Value*, 1, bool> ValueIsLoadPair;
+
+    /// BBSkipFirstBlockPair - This pair is used when caching information for a
+    /// block.  If the pointer is null, the cache value is not a full query that
+    /// starts at the specified block.  If non-null, the bool indicates whether
+    /// or not the contents of the block was skipped.
+    typedef PointerIntPair<BasicBlock*, 1, bool> BBSkipFirstBlockPair;
+
+    /// NonLocalPointerInfo - This record is the information kept for each
+    /// (value, is load) pair.
+    struct NonLocalPointerInfo {
+      /// Pair - The pair of the block and the skip-first-block flag.
+      BBSkipFirstBlockPair Pair;
+      /// NonLocalDeps - The results of the query for each relevant block.
+      NonLocalDepInfo NonLocalDeps;
+      /// Size - The maximum size of the dereferences of the
+      /// pointer. May be UnknownSize if the sizes are unknown.
+      uint64_t Size;
+      /// TBAATag - The TBAA tag associated with dereferences of the
+      /// pointer. May be null if there are no tags or conflicting tags.
+      const MDNode *TBAATag;
+
+      NonLocalPointerInfo() : Size(AliasAnalysis::UnknownSize), TBAATag(0) {}
+    };
+
+    /// CachedNonLocalPointerInfo - This map stores the cached results of doing
+    /// a pointer lookup at the bottom of a block.  The key of this map is the
+    /// pointer+isload bit, the value is a list of <bb->result> mappings.
+    typedef DenseMap<ValueIsLoadPair,
+                     NonLocalPointerInfo> CachedNonLocalPointerInfo;
+    CachedNonLocalPointerInfo NonLocalPointerDeps;
+
+    // A map from instructions to their non-local pointer dependencies.
+    typedef DenseMap<Instruction*, 
+                     SmallPtrSet<ValueIsLoadPair, 4> > ReverseNonLocalPtrDepTy;
+    ReverseNonLocalPtrDepTy ReverseNonLocalPtrDeps;
+
+    
+    /// PerInstNLInfo - This is the instruction we keep for each cached access
+    /// that we have for an instruction.  The pointer is an owning pointer and
+    /// the bool indicates whether we have any dirty bits in the set.
+    typedef std::pair<NonLocalDepInfo, bool> PerInstNLInfo;
+    
+    // A map from instructions to their non-local dependencies.
+    typedef DenseMap<Instruction*, PerInstNLInfo> NonLocalDepMapType;
+      
+    NonLocalDepMapType NonLocalDeps;
+    
+    // A reverse mapping from dependencies to the dependees.  This is
+    // used when removing instructions to keep the cache coherent.
+    typedef DenseMap<Instruction*,
+                     SmallPtrSet<Instruction*, 4> > ReverseDepMapType;
+    ReverseDepMapType ReverseLocalDeps;
+    
+    // A reverse mapping from dependencies to the non-local dependees.
+    ReverseDepMapType ReverseNonLocalDeps;
+    
+    /// Current AA implementation, just a cache.
+    AliasAnalysis *AA;
+    TargetData *TD;
+    OwningPtr<PredIteratorCache> PredCache;
+  public:
+    MemoryDependenceAnalysis();
+    ~MemoryDependenceAnalysis();
+    static char ID;
+
+    /// Pass Implementation stuff.  This doesn't do any analysis eagerly.
+    bool runOnFunction(Function &);
+    
+    /// Clean up memory in between runs
+    void releaseMemory();
+    
+    /// getAnalysisUsage - Does not modify anything.  It uses Value Numbering
+    /// and Alias Analysis.
+    ///
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    
+    /// getDependency - Return the instruction on which a memory operation
+    /// depends.  See the class comment for more details.  It is illegal to call
+    /// this on non-memory instructions.
+    MemDepResult getDependency(Instruction *QueryInst);
+
+    /// getNonLocalCallDependency - Perform a full dependency query for the
+    /// specified call, returning the set of blocks that the value is
+    /// potentially live across.  The returned set of results will include a
+    /// "NonLocal" result for all blocks where the value is live across.
+    ///
+    /// This method assumes the instruction returns a "NonLocal" dependency
+    /// within its own block.
+    ///
+    /// This returns a reference to an internal data structure that may be
+    /// invalidated on the next non-local query or when an instruction is
+    /// removed.  Clients must copy this data if they want it around longer than
+    /// that.
+    const NonLocalDepInfo &getNonLocalCallDependency(CallSite QueryCS);
+    
+    
+    /// getNonLocalPointerDependency - Perform a full dependency query for an
+    /// access to the specified (non-volatile) memory location, returning the
+    /// set of instructions that either define or clobber the value.
+    ///
+    /// This method assumes the pointer has a "NonLocal" dependency within BB.
+    void getNonLocalPointerDependency(const AliasAnalysis::Location &Loc,
+                                      bool isLoad, BasicBlock *BB,
+                                    SmallVectorImpl<NonLocalDepResult> &Result);
+
+    /// removeInstruction - Remove an instruction from the dependence analysis,
+    /// updating the dependence of instructions that previously depended on it.
+    void removeInstruction(Instruction *InstToRemove);
+    
+    /// invalidateCachedPointerInfo - This method is used to invalidate cached
+    /// information about the specified pointer, because it may be too
+    /// conservative in memdep.  This is an optional call that can be used when
+    /// the client detects an equivalence between the pointer and some other
+    /// value and replaces the other value with ptr. This can make Ptr available
+    /// in more places that cached info does not necessarily keep.
+    void invalidateCachedPointerInfo(Value *Ptr);
+
+    /// invalidateCachedPredecessors - Clear the PredIteratorCache info.
+    /// This needs to be done when the CFG changes, e.g., due to splitting
+    /// critical edges.
+    void invalidateCachedPredecessors();
+    
+    /// getPointerDependencyFrom - Return the instruction on which a memory
+    /// location depends.  If isLoad is true, this routine ignores may-aliases
+    /// with read-only operations.  If isLoad is false, this routine ignores
+    /// may-aliases with reads from read-only locations.
+    ///
+    /// Note that this is an uncached query, and thus may be inefficient.
+    ///
+    MemDepResult getPointerDependencyFrom(const AliasAnalysis::Location &Loc,
+                                          bool isLoad, 
+                                          BasicBlock::iterator ScanIt,
+                                          BasicBlock *BB);
+    
+  private:
+    MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall,
+                                           BasicBlock::iterator ScanIt,
+                                           BasicBlock *BB);
+    bool getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
+                                     const AliasAnalysis::Location &Loc,
+                                     bool isLoad, BasicBlock *BB,
+                                     SmallVectorImpl<NonLocalDepResult> &Result,
+                                     DenseMap<BasicBlock*, Value*> &Visited,
+                                     bool SkipFirstBlock = false);
+    MemDepResult GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
+                                         bool isLoad, BasicBlock *BB,
+                                         NonLocalDepInfo *Cache,
+                                         unsigned NumSortedEntries);
+
+    void RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P);
+    
+    /// verifyRemoved - Verify that the specified instruction does not occur
+    /// in our internal data structures.
+    void verifyRemoved(Instruction *Inst) const;
+    
+  };
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/PHITransAddr.h b/final/include/llvm/Analysis/PHITransAddr.h
new file mode 100644
index 00000000000..033efba3e74
--- /dev/null
+++ b/final/include/llvm/Analysis/PHITransAddr.h
@@ -0,0 +1,117 @@
+//===- PHITransAddr.h - PHI Translation for Addresses -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PHITransAddr class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_PHITRANSADDR_H
+#define LLVM_ANALYSIS_PHITRANSADDR_H
+
+#include "llvm/Instruction.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+  class DominatorTree;
+  class TargetData;
+  
+/// PHITransAddr - An address value which tracks and handles phi translation.
+/// As we walk "up" the CFG through predecessors, we need to ensure that the
+/// address we're tracking is kept up to date.  For example, if we're analyzing
+/// an address of "&A[i]" and walk through the definition of 'i' which is a PHI
+/// node, we *must* phi translate i to get "&A[j]" or else we will analyze an
+/// incorrect pointer in the predecessor block.
+///
+/// This is designed to be a relatively small object that lives on the stack and
+/// is copyable.
+///
+class PHITransAddr {
+  /// Addr - The actual address we're analyzing.
+  Value *Addr;
+  
+  /// TD - The target data we are playing with if known, otherwise null.
+  const TargetData *TD;
+  
+  /// InstInputs - The inputs for our symbolic address.
+  SmallVector<Instruction*, 4> InstInputs;
+public:
+  PHITransAddr(Value *addr, const TargetData *td) : Addr(addr), TD(td) {
+    // If the address is an instruction, the whole thing is considered an input.
+    if (Instruction *I = dyn_cast<Instruction>(Addr))
+      InstInputs.push_back(I);
+  }
+  
+  Value *getAddr() const { return Addr; }
+  
+  /// NeedsPHITranslationFromBlock - Return true if moving from the specified
+  /// BasicBlock to its predecessors requires PHI translation.
+  bool NeedsPHITranslationFromBlock(BasicBlock *BB) const {
+    // We do need translation if one of our input instructions is defined in
+    // this block.
+    for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+      if (InstInputs[i]->getParent() == BB)
+        return true;
+    return false;
+  }
+  
+  /// IsPotentiallyPHITranslatable - If this needs PHI translation, return true
+  /// if we have some hope of doing it.  This should be used as a filter to
+  /// avoid calling PHITranslateValue in hopeless situations.
+  bool IsPotentiallyPHITranslatable() const;
+  
+  /// PHITranslateValue - PHI translate the current address up the CFG from
+  /// CurBB to Pred, updating our state to reflect any needed changes.  If the
+  /// dominator tree DT is non-null, the translated value must dominate
+  /// PredBB.  This returns true on failure and sets Addr to null.
+  bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
+                         const DominatorTree *DT);
+  
+  /// PHITranslateWithInsertion - PHI translate this value into the specified
+  /// predecessor block, inserting a computation of the value if it is
+  /// unavailable.
+  ///
+  /// All newly created instructions are added to the NewInsts list.  This
+  /// returns null on failure.
+  ///
+  Value *PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
+                                   const DominatorTree &DT,
+                                   SmallVectorImpl<Instruction*> &NewInsts);
+  
+  void dump() const;
+  
+  /// Verify - Check internal consistency of this data structure.  If the
+  /// structure is valid, it returns true.  If invalid, it prints errors and
+  /// returns false.
+  bool Verify() const;
+private:
+  Value *PHITranslateSubExpr(Value *V, BasicBlock *CurBB, BasicBlock *PredBB,
+                             const DominatorTree *DT);
+  
+  /// InsertPHITranslatedSubExpr - Insert a computation of the PHI translated
+  /// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
+  /// block.  All newly created instructions are added to the NewInsts list.
+  /// This returns null on failure.
+  ///
+  Value *InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
+                                    BasicBlock *PredBB, const DominatorTree &DT,
+                                    SmallVectorImpl<Instruction*> &NewInsts);
+  
+  /// AddAsInput - If the specified value is an instruction, add it as an input.
+  Value *AddAsInput(Value *V) {
+    // If V is an instruction, it is now an input.
+    if (Instruction *VI = dyn_cast<Instruction>(V))
+      InstInputs.push_back(VI);
+    return V;
+  }
+  
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Analysis/Passes.h b/final/include/llvm/Analysis/Passes.h
new file mode 100644
index 00000000000..0eff75fe2f8
--- /dev/null
+++ b/final/include/llvm/Analysis/Passes.h
@@ -0,0 +1,198 @@
+//===-- llvm/Analysis/Passes.h - Constructors for analyses ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines prototypes for accessor functions that expose passes
+// in the analysis libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_PASSES_H
+#define LLVM_ANALYSIS_PASSES_H
+
+namespace llvm {
+  class FunctionPass;
+  class ImmutablePass;
+  class LoopPass;
+  class ModulePass;
+  class Pass;
+  class PassInfo;
+  class LibCallInfo;
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createGlobalsModRefPass - This pass provides alias and mod/ref info for
+  // global values that do not have their addresses taken.
+  //
+  Pass *createGlobalsModRefPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createAliasDebugger - This pass helps debug clients of AA
+  //
+  Pass *createAliasDebugger();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createAliasAnalysisCounterPass - This pass counts alias queries and how the
+  // alias analysis implementation responds.
+  //
+  ModulePass *createAliasAnalysisCounterPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createAAEvalPass - This pass implements a simple N^2 alias analysis
+  // accuracy evaluator.
+  //
+  FunctionPass *createAAEvalPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createNoAAPass - This pass implements a "I don't know" alias analysis.
+  //
+  ImmutablePass *createNoAAPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createBasicAliasAnalysisPass - This pass implements the stateless alias
+  // analysis.
+  //
+  ImmutablePass *createBasicAliasAnalysisPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  /// createLibCallAliasAnalysisPass - Create an alias analysis pass that knows
+  /// about the semantics of a set of libcalls specified by LCI.  The newly
+  /// constructed pass takes ownership of the pointer that is provided.
+  ///
+  FunctionPass *createLibCallAliasAnalysisPass(LibCallInfo *LCI);
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createScalarEvolutionAliasAnalysisPass - This pass implements a simple
+  // alias analysis using ScalarEvolution queries.
+  //
+  FunctionPass *createScalarEvolutionAliasAnalysisPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createTypeBasedAliasAnalysisPass - This pass implements metadata-based
+  // type-based alias analysis.
+  //
+  ImmutablePass *createTypeBasedAliasAnalysisPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createProfileLoaderPass - This pass loads information from a profile dump
+  // file.
+  //
+  ModulePass *createProfileLoaderPass();
+  extern char &ProfileLoaderPassID;
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createNoProfileInfoPass - This pass implements the default "no profile".
+  //
+  ImmutablePass *createNoProfileInfoPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createProfileEstimatorPass - This pass estimates profiling information
+  // instead of loading it from a previous run.
+  //
+  FunctionPass *createProfileEstimatorPass();
+  extern char &ProfileEstimatorPassID;
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createProfileVerifierPass - This pass verifies profiling information.
+  //
+  FunctionPass *createProfileVerifierPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createPathProfileLoaderPass - This pass loads information from a path
+  // profile dump file.
+  //
+  ModulePass *createPathProfileLoaderPass();
+  extern char &PathProfileLoaderPassID;
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createNoPathProfileInfoPass - This pass implements the default
+  // "no path profile".
+  //
+  ImmutablePass *createNoPathProfileInfoPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createPathProfileVerifierPass - This pass verifies path profiling
+  // information.
+  //
+  ModulePass *createPathProfileVerifierPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createDSAAPass - This pass implements simple context sensitive alias
+  // analysis.
+  //
+  ModulePass *createDSAAPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createDSOptPass - This pass uses DSA to do a series of simple
+  // optimizations.
+  //
+  ModulePass *createDSOptPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createSteensgaardPass - This pass uses the data structure graphs to do a
+  // simple context insensitive alias analysis.
+  //
+  ModulePass *createSteensgaardPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  /// createLazyValueInfoPass - This creates an instance of the LazyValueInfo
+  /// pass.
+  FunctionPass *createLazyValueInfoPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createLoopDependenceAnalysisPass - This creates an instance of the
+  // LoopDependenceAnalysis pass.
+  //
+  LoopPass *createLoopDependenceAnalysisPass();
+
+  // Minor pass prototypes, allowing us to expose them through bugpoint and
+  // analyze.
+  FunctionPass *createInstCountPass();
+
+  // print debug info intrinsics in human readable form
+  FunctionPass *createDbgInfoPrinterPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createRegionInfoPass - This pass finds all single entry single exit regions
+  // in a function and builds the region hierarchy.
+  //
+  FunctionPass *createRegionInfoPass();
+
+  // Print module-level debug info metadata in human-readable form.
+  ModulePass *createModuleDebugInfoPrinterPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createMemDepPrinter - This pass exhaustively collects all memdep
+  // information and prints it with -analyze.
+  //
+  FunctionPass *createMemDepPrinter();
+}
+
+#endif
diff --git a/final/include/llvm/Analysis/PathNumbering.h b/final/include/llvm/Analysis/PathNumbering.h
new file mode 100644
index 00000000000..7025e28484c
--- /dev/null
+++ b/final/include/llvm/Analysis/PathNumbering.h
@@ -0,0 +1,304 @@
+//===- PathNumbering.h ----------------------------------------*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Ball-Larus path numbers uniquely identify paths through a directed acyclic
+// graph (DAG) [Ball96].  For a CFG backedges are removed and replaced by phony
+// edges to obtain a DAG, and thus the unique path numbers [Ball96].
+//
+// The purpose of this analysis is to enumerate the edges in a CFG in order
+// to obtain paths from path numbers in a convenient manner.  As described in
+// [Ball96] edges can be enumerated such that given a path number by following
+// the CFG and updating the path number, the path is obtained.
+//
+// [Ball96]
+//  T. Ball and J. R. Larus. "Efficient Path Profiling."
+//  International Symposium on Microarchitecture, pages 46-57, 1996.
+//  http://portal.acm.org/citation.cfm?id=243857
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PATH_NUMBERING_H
+#define LLVM_PATH_NUMBERING_H
+
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include <map>
+#include <stack>
+#include <vector>
+
+namespace llvm {
+class BallLarusNode;
+class BallLarusEdge;
+class BallLarusDag;
+
+// typedefs for storage/ interators of various DAG components
+typedef std::vector<BallLarusNode*> BLNodeVector;
+typedef std::vector<BallLarusNode*>::iterator BLNodeIterator;
+typedef std::vector<BallLarusEdge*> BLEdgeVector;
+typedef std::vector<BallLarusEdge*>::iterator BLEdgeIterator;
+typedef std::map<BasicBlock*, BallLarusNode*> BLBlockNodeMap;
+typedef std::stack<BallLarusNode*> BLNodeStack;
+
+// Represents a basic block with information necessary for the BallLarus
+// algorithms.
+class BallLarusNode {
+public:
+  enum NodeColor { WHITE, GRAY, BLACK };
+
+  // Constructor: Initializes a new Node for the given BasicBlock
+  BallLarusNode(BasicBlock* BB) :
+    _basicBlock(BB), _numberPaths(0), _color(WHITE) {
+    static unsigned nextUID = 0;
+    _uid = nextUID++;
+  }
+
+  // Returns the basic block for the BallLarusNode
+  BasicBlock* getBlock();
+
+  // Get/set the number of paths to the exit starting at the node.
+  unsigned getNumberPaths();
+  void setNumberPaths(unsigned numberPaths);
+
+  // Get/set the NodeColor used in graph algorithms.
+  NodeColor getColor();
+  void setColor(NodeColor color);
+
+  // Iterator information for predecessor edges. Includes phony and
+  // backedges.
+  BLEdgeIterator predBegin();
+  BLEdgeIterator predEnd();
+  unsigned getNumberPredEdges();
+
+  // Iterator information for successor edges. Includes phony and
+  // backedges.
+  BLEdgeIterator succBegin();
+  BLEdgeIterator succEnd();
+  unsigned getNumberSuccEdges();
+
+  // Add an edge to the predecessor list.
+  void addPredEdge(BallLarusEdge* edge);
+
+  // Remove an edge from the predecessor list.
+  void removePredEdge(BallLarusEdge* edge);
+
+  // Add an edge to the successor list.
+  void addSuccEdge(BallLarusEdge* edge);
+
+  // Remove an edge from the successor list.
+  void removeSuccEdge(BallLarusEdge* edge);
+
+  // Returns the name of the BasicBlock being represented.  If BasicBlock
+  // is null then returns "<null>".  If BasicBlock has no name, then
+  // "<unnamed>" is returned.  Intended for use with debug output.
+  std::string getName();
+
+private:
+  // The corresponding underlying BB.
+  BasicBlock* _basicBlock;
+
+  // Holds the predecessor edges of this node.
+  BLEdgeVector _predEdges;
+
+  // Holds the successor edges of this node.
+  BLEdgeVector _succEdges;
+
+  // The number of paths from the node to the exit.
+  unsigned _numberPaths;
+
+  // 'Color' used by graph algorithms to mark the node.
+  NodeColor _color;
+
+  // Unique ID to ensure naming difference with dotgraphs
+  unsigned _uid;
+
+  // Removes an edge from an edgeVector.  Used by removePredEdge and
+  // removeSuccEdge.
+  void removeEdge(BLEdgeVector& v, BallLarusEdge* e);
+};
+
+// Represents an edge in the Dag.  For an edge, v -> w, v is the source, and
+// w is the target.
+class BallLarusEdge {
+public:
+  enum EdgeType { NORMAL, BACKEDGE, SPLITEDGE,
+    BACKEDGE_PHONY, SPLITEDGE_PHONY, CALLEDGE_PHONY };
+
+  // Constructor: Initializes an BallLarusEdge with a source and target.
+  BallLarusEdge(BallLarusNode* source, BallLarusNode* target,
+                                unsigned duplicateNumber)
+    : _source(source), _target(target), _weight(0), _edgeType(NORMAL),
+      _realEdge(NULL), _duplicateNumber(duplicateNumber) {}
+
+  // Returns the source/ target node of this edge.
+  BallLarusNode* getSource() const;
+  BallLarusNode* getTarget() const;
+
+  // Sets the type of the edge.
+  EdgeType getType() const;
+
+  // Gets the type of the edge.
+  void setType(EdgeType type);
+
+  // Returns the weight of this edge.  Used to decode path numbers to
+  // sequences of basic blocks.
+  unsigned getWeight();
+
+  // Sets the weight of the edge.  Used during path numbering.
+  void setWeight(unsigned weight);
+
+  // Gets/sets the phony edge originating at the root.
+  BallLarusEdge* getPhonyRoot();
+  void setPhonyRoot(BallLarusEdge* phonyRoot);
+
+  // Gets/sets the phony edge terminating at the exit.
+  BallLarusEdge* getPhonyExit();
+  void setPhonyExit(BallLarusEdge* phonyExit);
+
+  // Gets/sets the associated real edge if this is a phony edge.
+  BallLarusEdge* getRealEdge();
+  void setRealEdge(BallLarusEdge* realEdge);
+
+  // Returns the duplicate number of the edge.
+  unsigned getDuplicateNumber();
+
+protected:
+  // Source node for this edge.
+  BallLarusNode* _source;
+
+  // Target node for this edge.
+  BallLarusNode* _target;
+
+private:
+  // Edge weight cooresponding to path number increments before removing
+  // increments along a spanning tree. The sum over the edge weights gives
+  // the path number.
+  unsigned _weight;
+
+  // Type to represent for what this edge is intended
+  EdgeType _edgeType;
+
+  // For backedges and split-edges, the phony edge which is linked to the
+  // root node of the DAG. This contains a path number initialization.
+  BallLarusEdge* _phonyRoot;
+
+  // For backedges and split-edges, the phony edge which is linked to the
+  // exit node of the DAG. This contains a path counter increment, and
+  // potentially a path number increment.
+  BallLarusEdge* _phonyExit;
+
+  // If this is a phony edge, _realEdge is a link to the back or split
+  // edge. Otherwise, this is null.
+  BallLarusEdge* _realEdge;
+
+  // An ID to differentiate between those edges which have the same source
+  // and destination blocks.
+  unsigned _duplicateNumber;
+};
+
+// Represents the Ball Larus DAG for a given Function.  Can calculate
+// various properties required for instrumentation or analysis.  E.g. the
+// edge weights that determine the path number.
+class BallLarusDag {
+public:
+  // Initializes a BallLarusDag from the CFG of a given function.  Must
+  // call init() after creation, since some initialization requires
+  // virtual functions.
+  BallLarusDag(Function &F)
+    : _root(NULL), _exit(NULL), _function(F) {}
+
+  // Initialization that requires virtual functions which are not fully
+  // functional in the constructor.
+  void init();
+
+  // Frees all memory associated with the DAG.
+  virtual ~BallLarusDag();
+
+  // Calculate the path numbers by assigning edge increments as prescribed
+  // in Ball-Larus path profiling.
+  void calculatePathNumbers();
+
+  // Returns the number of paths for the DAG.
+  unsigned getNumberOfPaths();
+
+  // Returns the root (i.e. entry) node for the DAG.
+  BallLarusNode* getRoot();
+
+  // Returns the exit node for the DAG.
+  BallLarusNode* getExit();
+
+  // Returns the function for the DAG.
+  Function& getFunction();
+
+  // Clears the node colors.
+  void clearColors(BallLarusNode::NodeColor color);
+
+protected:
+  // All nodes in the DAG.
+  BLNodeVector _nodes;
+
+  // All edges in the DAG.
+  BLEdgeVector _edges;
+
+  // All backedges in the DAG.
+  BLEdgeVector _backEdges;
+
+  // Allows subclasses to determine which type of Node is created.
+  // Override this method to produce subclasses of BallLarusNode if
+  // necessary. The destructor of BallLarusDag will call free on each pointer
+  // created.
+  virtual BallLarusNode* createNode(BasicBlock* BB);
+
+  // Allows subclasses to determine which type of Edge is created.
+  // Override this method to produce subclasses of BallLarusEdge if
+  // necessary.  Parameters source and target will have been created by
+  // createNode and can be cast to the subclass of BallLarusNode*
+  // returned by createNode. The destructor of BallLarusDag will call free
+  // on each pointer created.
+  virtual BallLarusEdge* createEdge(BallLarusNode* source, BallLarusNode*
+                                    target, unsigned duplicateNumber);
+
+  // Proxy to node's constructor.  Updates the DAG state.
+  BallLarusNode* addNode(BasicBlock* BB);
+
+  // Proxy to edge's constructor.  Updates the DAG state.
+  BallLarusEdge* addEdge(BallLarusNode* source, BallLarusNode* target,
+                         unsigned duplicateNumber);
+
+private:
+  // The root (i.e. entry) node for this DAG.
+  BallLarusNode* _root;
+
+  // The exit node for this DAG.
+  BallLarusNode* _exit;
+
+  // The function represented by this DAG.
+  Function& _function;
+
+  // Processes one node and its imediate edges for building the DAG.
+  void buildNode(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>& dfsStack);
+
+  // Process an edge in the CFG for DAG building.
+  void buildEdge(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>& dfsStack,
+                 BallLarusNode* currentNode, BasicBlock* succBB,
+                 unsigned duplicateNumber);
+
+  // The weight on each edge is the increment required along any path that
+  // contains that edge.
+  void calculatePathNumbersFrom(BallLarusNode* node);
+
+  // Adds a backedge with its phony edges.  Updates the DAG state.
+  void addBackedge(BallLarusNode* source, BallLarusNode* target,
+                   unsigned duplicateCount);
+};
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Analysis/PathProfileInfo.h b/final/include/llvm/Analysis/PathProfileInfo.h
new file mode 100644
index 00000000000..263763f7a8d
--- /dev/null
+++ b/final/include/llvm/Analysis/PathProfileInfo.h
@@ -0,0 +1,113 @@
+//===- PathProfileInfo.h --------------------------------------*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file outlines the interface used by optimizers to load path profiles.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PATHPROFILEINFO_H
+#define LLVM_PATHPROFILEINFO_H
+
+#include "llvm/BasicBlock.h"
+#include "llvm/Analysis/PathNumbering.h"
+#include <stack>
+
+namespace llvm {
+
+class ProfilePath;
+class ProfilePathEdge;
+class PathProfileInfo;
+
+typedef std::vector<ProfilePathEdge> ProfilePathEdgeVector;
+typedef std::vector<ProfilePathEdge>::iterator ProfilePathEdgeIterator;
+
+typedef std::vector<BasicBlock*> ProfilePathBlockVector;
+typedef std::vector<BasicBlock*>::iterator ProfilePathBlockIterator;
+
+typedef std::map<unsigned int,ProfilePath*> ProfilePathMap;
+typedef std::map<unsigned int,ProfilePath*>::iterator ProfilePathIterator;
+
+typedef std::map<Function*,unsigned int> FunctionPathCountMap;
+typedef std::map<Function*,ProfilePathMap> FunctionPathMap;
+typedef std::map<Function*,ProfilePathMap>::iterator FunctionPathIterator;
+
+class ProfilePathEdge {
+public:
+  ProfilePathEdge(BasicBlock* source, BasicBlock* target,
+                  unsigned duplicateNumber);
+
+  inline unsigned getDuplicateNumber() { return _duplicateNumber; }
+  inline BasicBlock* getSource() { return _source; }
+  inline BasicBlock* getTarget() { return _target; }
+
+protected:
+  BasicBlock* _source;
+  BasicBlock* _target;
+  unsigned _duplicateNumber;
+};
+
+class ProfilePath {
+public:
+  ProfilePath(unsigned int number, unsigned int count,
+              double countStdDev, PathProfileInfo* ppi);
+
+  double getFrequency() const;
+
+  inline unsigned int getNumber() const { return _number; }
+  inline unsigned int getCount() const { return _count; }
+  inline double getCountStdDev() const { return _countStdDev; }
+
+  ProfilePathEdgeVector* getPathEdges() const;
+  ProfilePathBlockVector* getPathBlocks() const;
+
+  BasicBlock* getFirstBlockInPath() const;
+
+private:
+  unsigned int _number;
+  unsigned int _count;
+  double _countStdDev;
+
+  // double pointer back to the profiling info
+  PathProfileInfo* _ppi;
+};
+
+// TODO: overload [] operator for getting path
+// Add: getFunctionCallCount()
+class PathProfileInfo {
+  public:
+  PathProfileInfo();
+  ~PathProfileInfo();
+
+  void setCurrentFunction(Function* F);
+  Function* getCurrentFunction() const;
+  BasicBlock* getCurrentFunctionEntry();
+
+  ProfilePath* getPath(unsigned int number);
+  unsigned int getPotentialPathCount();
+
+  ProfilePathIterator pathBegin();
+  ProfilePathIterator pathEnd();
+  unsigned int pathsRun();
+
+  static char ID; // Pass identification
+  std::string argList;
+
+protected:
+  FunctionPathMap _functionPaths;
+  FunctionPathCountMap _functionPathCounts;
+
+private:
+  BallLarusDag* _currentDag;
+  Function* _currentFunction;
+
+  friend class ProfilePath;
+};
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Analysis/PostDominators.h b/final/include/llvm/Analysis/PostDominators.h
new file mode 100644
index 00000000000..2cd6ae346ee
--- /dev/null
+++ b/final/include/llvm/Analysis/PostDominators.h
@@ -0,0 +1,137 @@
+//=- llvm/Analysis/PostDominators.h - Post Dominator Calculation-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes interfaces to post dominance information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_POST_DOMINATORS_H
+#define LLVM_ANALYSIS_POST_DOMINATORS_H
+
+#include "llvm/Analysis/DominanceFrontier.h"
+
+namespace llvm {
+
+/// PostDominatorTree Class - Concrete subclass of DominatorTree that is used to
+/// compute the a post-dominator tree.
+///
+struct PostDominatorTree : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+  DominatorTreeBase<BasicBlock>* DT;
+
+  PostDominatorTree() : FunctionPass(ID) {
+    initializePostDominatorTreePass(*PassRegistry::getPassRegistry());
+    DT = new DominatorTreeBase<BasicBlock>(true);
+  }
+
+  ~PostDominatorTree();
+
+  virtual bool runOnFunction(Function &F);
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+
+  inline const std::vector<BasicBlock*> &getRoots() const {
+    return DT->getRoots();
+  }
+
+  inline DomTreeNode *getRootNode() const {
+    return DT->getRootNode();
+  }
+
+  inline DomTreeNode *operator[](BasicBlock *BB) const {
+    return DT->getNode(BB);
+  }
+
+  inline DomTreeNode *getNode(BasicBlock *BB) const {
+    return DT->getNode(BB);
+  }
+
+  inline bool dominates(DomTreeNode* A, DomTreeNode* B) const {
+    return DT->dominates(A, B);
+  }
+
+  inline bool dominates(const BasicBlock* A, const BasicBlock* B) const {
+    return DT->dominates(A, B);
+  }
+
+  inline bool properlyDominates(const DomTreeNode* A, DomTreeNode* B) const {
+    return DT->properlyDominates(A, B);
+  }
+
+  inline bool properlyDominates(BasicBlock* A, BasicBlock* B) const {
+    return DT->properlyDominates(A, B);
+  }
+
+  inline BasicBlock *findNearestCommonDominator(BasicBlock *A, BasicBlock *B) {
+    return DT->findNearestCommonDominator(A, B);
+  }
+
+  virtual void releaseMemory() {
+    DT->releaseMemory();
+  }
+
+  virtual void print(raw_ostream &OS, const Module*) const;
+};
+
+FunctionPass* createPostDomTree();
+
+template <> struct GraphTraits<PostDominatorTree*>
+  : public GraphTraits<DomTreeNode*> {
+  static NodeType *getEntryNode(PostDominatorTree *DT) {
+    return DT->getRootNode();
+  }
+
+  static nodes_iterator nodes_begin(PostDominatorTree *N) {
+    if (getEntryNode(N))
+      return df_begin(getEntryNode(N));
+    else
+      return df_end(getEntryNode(N));
+  }
+
+  static nodes_iterator nodes_end(PostDominatorTree *N) {
+    return df_end(getEntryNode(N));
+  }
+};
+
+/// PostDominanceFrontier Class - Concrete subclass of DominanceFrontier that is
+/// used to compute the a post-dominance frontier.
+///
+struct PostDominanceFrontier : public DominanceFrontierBase {
+  static char ID;
+  PostDominanceFrontier()
+    : DominanceFrontierBase(ID, true) {
+      initializePostDominanceFrontierPass(*PassRegistry::getPassRegistry());
+    }
+
+  virtual bool runOnFunction(Function &) {
+    Frontiers.clear();
+    PostDominatorTree &DT = getAnalysis<PostDominatorTree>();
+    Roots = DT.getRoots();
+    if (const DomTreeNode *Root = DT.getRootNode())
+      calculate(DT, Root);
+    return false;
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    AU.addRequired<PostDominatorTree>();
+  }
+
+private:
+  const DomSetType &calculate(const PostDominatorTree &DT,
+                              const DomTreeNode *Node);
+};
+
+FunctionPass* createPostDomFrontier();
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/ProfileInfo.h b/final/include/llvm/Analysis/ProfileInfo.h
new file mode 100644
index 00000000000..300a0279042
--- /dev/null
+++ b/final/include/llvm/Analysis/ProfileInfo.h
@@ -0,0 +1,248 @@
+//===- llvm/Analysis/ProfileInfo.h - Profile Info Interface -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the generic ProfileInfo interface, which is used as the
+// common interface used by all clients of profiling information, and
+// implemented either by making static guestimations, or by actually reading in
+// profiling information gathered by running the program.
+//
+// Note that to be useful, all profile-based optimizations should preserve
+// ProfileInfo, which requires that they notify it when changes to the CFG are
+// made. (This is not implemented yet.)
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_PROFILEINFO_H
+#define LLVM_ANALYSIS_PROFILEINFO_H
+
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <string>
+#include <map>
+#include <set>
+
+namespace llvm {
+  class Pass;
+  class raw_ostream;
+
+  class BasicBlock;
+  class Function;
+  class MachineBasicBlock;
+  class MachineFunction;
+
+  // Helper for dumping edges to dbgs().
+  raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E);
+  raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E);
+
+  raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB);
+  raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB);
+
+  raw_ostream& operator<<(raw_ostream &O, const Function *F);
+  raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF);
+
+  /// ProfileInfo Class - This class holds and maintains profiling
+  /// information for some unit of code.
+  template<class FType, class BType>
+  class ProfileInfoT {
+  public:
+    // Types for handling profiling information.
+    typedef std::pair<const BType*, const BType*> Edge;
+    typedef std::pair<Edge, double> EdgeWeight;
+    typedef std::map<Edge, double> EdgeWeights;
+    typedef std::map<const BType*, double> BlockCounts;
+    typedef std::map<const BType*, const BType*> Path;
+
+  protected:
+    // EdgeInformation - Count the number of times a transition between two
+    // blocks is executed. As a special case, we also hold an edge from the
+    // null BasicBlock to the entry block to indicate how many times the
+    // function was entered.
+    std::map<const FType*, EdgeWeights> EdgeInformation;
+
+    // BlockInformation - Count the number of times a block is executed.
+    std::map<const FType*, BlockCounts> BlockInformation;
+
+    // FunctionInformation - Count the number of times a function is executed.
+    std::map<const FType*, double> FunctionInformation;
+
+    ProfileInfoT<MachineFunction, MachineBasicBlock> *MachineProfile;
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    ProfileInfoT();
+    ~ProfileInfoT();  // We want to be subclassed
+
+    // MissingValue - The value that is returned for execution counts in case
+    // no value is available.
+    static const double MissingValue;
+
+    // getFunction() - Returns the Function for an Edge, checking for validity.
+    static const FType* getFunction(Edge e) {
+      if (e.first) {
+        return e.first->getParent();
+      } else if (e.second) {
+        return e.second->getParent();
+      }
+      assert(0 && "Invalid ProfileInfo::Edge");
+      return (const FType*)0;
+    }
+
+    // getEdge() - Creates an Edge from two BasicBlocks.
+    static Edge getEdge(const BType *Src, const BType *Dest) {
+      return std::make_pair(Src, Dest);
+    }
+
+    //===------------------------------------------------------------------===//
+    /// Profile Information Queries
+    ///
+    double getExecutionCount(const FType *F);
+
+    double getExecutionCount(const BType *BB);
+
+    void setExecutionCount(const BType *BB, double w);
+
+    void addExecutionCount(const BType *BB, double w);
+
+    double getEdgeWeight(Edge e) const {
+      typename std::map<const FType*, EdgeWeights>::const_iterator J =
+        EdgeInformation.find(getFunction(e));
+      if (J == EdgeInformation.end()) return MissingValue;
+
+      typename EdgeWeights::const_iterator I = J->second.find(e);
+      if (I == J->second.end()) return MissingValue;
+
+      return I->second;
+    }
+
+    void setEdgeWeight(Edge e, double w) {
+      DEBUG_WITH_TYPE("profile-info",
+            dbgs() << "Creating Edge " << e
+                   << " (weight: " << format("%.20g",w) << ")\n");
+      EdgeInformation[getFunction(e)][e] = w;
+    }
+
+    void addEdgeWeight(Edge e, double w);
+
+    EdgeWeights &getEdgeWeights (const FType *F) {
+      return EdgeInformation[F];
+    }
+
+    //===------------------------------------------------------------------===//
+    /// Analysis Update Methods
+    ///
+    void removeBlock(const BType *BB);
+
+    void removeEdge(Edge e);
+
+    void replaceEdge(const Edge &, const Edge &);
+
+    enum GetPathMode {
+      GetPathToExit = 1,
+      GetPathToValue = 2,
+      GetPathToDest = 4,
+      GetPathWithNewEdges = 8
+    };
+
+    const BType *GetPath(const BType *Src, const BType *Dest,
+                              Path &P, unsigned Mode);
+
+    void divertFlow(const Edge &, const Edge &);
+
+    void splitEdge(const BType *FirstBB, const BType *SecondBB,
+                   const BType *NewBB, bool MergeIdenticalEdges = false);
+
+    void splitBlock(const BType *Old, const BType* New);
+
+    void splitBlock(const BType *BB, const BType* NewBB,
+                    BType *const *Preds, unsigned NumPreds);
+
+    void replaceAllUses(const BType *RmBB, const BType *DestBB);
+
+    void transfer(const FType *Old, const FType *New);
+
+    void repair(const FType *F);
+
+    void dump(FType *F = 0, bool real = true) {
+      dbgs() << "**** This is ProfileInfo " << this << " speaking:\n";
+      if (!real) {
+        typename std::set<const FType*> Functions;
+
+        dbgs() << "Functions: \n";
+        if (F) {
+          dbgs() << F << "@" << format("%p", F) << ": " << format("%.20g",getExecutionCount(F)) << "\n";
+          Functions.insert(F);
+        } else {
+          for (typename std::map<const FType*, double>::iterator fi = FunctionInformation.begin(),
+               fe = FunctionInformation.end(); fi != fe; ++fi) {
+            dbgs() << fi->first << "@" << format("%p",fi->first) << ": " << format("%.20g",fi->second) << "\n";
+            Functions.insert(fi->first);
+          }
+        }
+
+        for (typename std::set<const FType*>::iterator FI = Functions.begin(), FE = Functions.end();
+             FI != FE; ++FI) {
+          const FType *F = *FI;
+          typename std::map<const FType*, BlockCounts>::iterator bwi = BlockInformation.find(F);
+          dbgs() << "BasicBlocks for Function " << F << ":\n";
+          for (typename BlockCounts::const_iterator bi = bwi->second.begin(), be = bwi->second.end(); bi != be; ++bi) {
+            dbgs() << bi->first << "@" << format("%p", bi->first) << ": " << format("%.20g",bi->second) << "\n";
+          }
+        }
+
+        for (typename std::set<const FType*>::iterator FI = Functions.begin(), FE = Functions.end();
+             FI != FE; ++FI) {
+          typename std::map<const FType*, EdgeWeights>::iterator ei = EdgeInformation.find(*FI);
+          dbgs() << "Edges for Function " << ei->first << ":\n";
+          for (typename EdgeWeights::iterator ewi = ei->second.begin(), ewe = ei->second.end(); 
+               ewi != ewe; ++ewi) {
+            dbgs() << ewi->first << ": " << format("%.20g",ewi->second) << "\n";
+          }
+        }
+      } else {
+        assert(F && "No function given, this is not supported!");
+        dbgs() << "Functions: \n";
+        dbgs() << F << "@" << format("%p", F) << ": " << format("%.20g",getExecutionCount(F)) << "\n";
+
+        dbgs() << "BasicBlocks for Function " << F << ":\n";
+        for (typename FType::const_iterator BI = F->begin(), BE = F->end();
+             BI != BE; ++BI) {
+          const BType *BB = &(*BI);
+          dbgs() << BB << "@" << format("%p", BB) << ": " << format("%.20g",getExecutionCount(BB)) << "\n";
+        }
+      }
+      dbgs() << "**** ProfileInfo " << this << ", over and out.\n";
+    }
+
+    bool CalculateMissingEdge(const BType *BB, Edge &removed, bool assumeEmptyExit = false);
+
+    bool EstimateMissingEdges(const BType *BB);
+
+    ProfileInfoT<MachineFunction, MachineBasicBlock> *MI() {
+      if (MachineProfile == 0)
+        MachineProfile = new ProfileInfoT<MachineFunction, MachineBasicBlock>();
+      return MachineProfile;
+    }
+
+    bool hasMI() const {
+      return (MachineProfile != 0);
+    }
+  };
+
+  typedef ProfileInfoT<Function, BasicBlock> ProfileInfo;
+  typedef ProfileInfoT<MachineFunction, MachineBasicBlock> MachineProfileInfo;
+
+  /// createProfileLoaderPass - This function returns a Pass that loads the
+  /// profiling information for the module from the specified filename, making
+  /// it available to the optimizers.
+  Pass *createProfileLoaderPass(const std::string &Filename);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/ProfileInfoLoader.h b/final/include/llvm/Analysis/ProfileInfoLoader.h
new file mode 100644
index 00000000000..9e0c393c428
--- /dev/null
+++ b/final/include/llvm/Analysis/ProfileInfoLoader.h
@@ -0,0 +1,84 @@
+//===- ProfileInfoLoader.h - Load & convert profile information -*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The ProfileInfoLoader class is used to load and represent profiling
+// information read in from the dump file.  If conversions between formats are
+// needed, it can also do this.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_PROFILEINFOLOADER_H
+#define LLVM_ANALYSIS_PROFILEINFOLOADER_H
+
+#include <vector>
+#include <string>
+#include <utility>
+
+namespace llvm {
+
+class Module;
+class Function;
+class BasicBlock;
+
+class ProfileInfoLoader {
+  const std::string &Filename;
+  Module &M;
+  std::vector<std::string> CommandLines;
+  std::vector<unsigned>    FunctionCounts;
+  std::vector<unsigned>    BlockCounts;
+  std::vector<unsigned>    EdgeCounts;
+  std::vector<unsigned>    OptimalEdgeCounts;
+  std::vector<unsigned>    BBTrace;
+  bool Warned;
+public:
+  // ProfileInfoLoader ctor - Read the specified profiling data file, exiting
+  // the program if the file is invalid or broken.
+  ProfileInfoLoader(const char *ToolName, const std::string &Filename,
+                    Module &M);
+
+  static const unsigned Uncounted;
+
+  unsigned getNumExecutions() const { return CommandLines.size(); }
+  const std::string &getExecution(unsigned i) const { return CommandLines[i]; }
+
+  const std::string &getFileName() const { return Filename; }
+
+  // getRawFunctionCounts - This method is used by consumers of function
+  // counting information.
+  //
+  const std::vector<unsigned> &getRawFunctionCounts() const {
+    return FunctionCounts;
+  }
+
+  // getRawBlockCounts - This method is used by consumers of block counting
+  // information.
+  //
+  const std::vector<unsigned> &getRawBlockCounts() const {
+    return BlockCounts;
+  }
+
+  // getEdgeCounts - This method is used by consumers of edge counting
+  // information.
+  //
+  const std::vector<unsigned> &getRawEdgeCounts() const {
+    return EdgeCounts;
+  }
+
+  // getEdgeOptimalCounts - This method is used by consumers of optimal edge 
+  // counting information.
+  //
+  const std::vector<unsigned> &getRawOptimalEdgeCounts() const {
+    return OptimalEdgeCounts;
+  }
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/ProfileInfoTypes.h b/final/include/llvm/Analysis/ProfileInfoTypes.h
new file mode 100644
index 00000000000..6b4ac85082b
--- /dev/null
+++ b/final/include/llvm/Analysis/ProfileInfoTypes.h
@@ -0,0 +1,60 @@
+/*===-- ProfileInfoTypes.h - Profiling info shared constants --------------===*\
+|*
+|*                     The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source
+|* License. See LICENSE.TXT for details.
+|*
+|*===----------------------------------------------------------------------===*|
+|*
+|* This file defines constants shared by the various different profiling
+|* runtime libraries and the LLVM C++ profile info loader. It must be a
+|* C header because, at present, the profiling runtimes are written in C.
+|*
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_ANALYSIS_PROFILEINFOTYPES_H
+#define LLVM_ANALYSIS_PROFILEINFOTYPES_H
+
+/* Included by libprofile. */
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* IDs to distinguish between those path counters stored in hashses vs arrays */
+enum ProfilingStorageType {
+  ProfilingArray = 1,
+  ProfilingHash = 2
+};
+
+enum ProfilingType {
+  ArgumentInfo  = 1,   /* The command line argument block */
+  FunctionInfo  = 2,   /* Function profiling information  */
+  BlockInfo     = 3,   /* Block profiling information     */
+  EdgeInfo      = 4,   /* Edge profiling information      */
+  PathInfo      = 5,   /* Path profiling information      */
+  BBTraceInfo   = 6,   /* Basic block trace information   */
+  OptEdgeInfo   = 7    /* Edge profiling information, optimal version */
+};
+
+/*
+ * The header for tables that map path numbers to path counters.
+ */
+typedef struct {
+  unsigned fnNumber; /* function number for these counters */
+  unsigned numEntries;   /* number of entries stored */
+} PathProfileHeader;
+
+/*
+ * Describes an entry in a tagged table for path counters.
+ */
+typedef struct {
+  unsigned pathNumber;
+  unsigned pathCounter;
+} PathProfileTableEntry;
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* LLVM_ANALYSIS_PROFILEINFOTYPES_H */
diff --git a/final/include/llvm/Analysis/RegionInfo.h b/final/include/llvm/Analysis/RegionInfo.h
new file mode 100644
index 00000000000..a36ca110d8c
--- /dev/null
+++ b/final/include/llvm/Analysis/RegionInfo.h
@@ -0,0 +1,683 @@
+//===- RegionInfo.h - SESE region analysis ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Calculate a program structure tree built out of single entry single exit
+// regions.
+// The basic ideas are taken from "The Program Structure Tree - Richard Johnson,
+// David Pearson, Keshav Pingali - 1994", however enriched with ideas from "The
+// Refined Process Structure Tree - Jussi Vanhatalo, Hagen Voelyer, Jana
+// Koehler - 2009".
+// The algorithm to calculate these data structures however is completely
+// different, as it takes advantage of existing information already available
+// in (Post)dominace tree and dominance frontier passes. This leads to a simpler
+// and in practice hopefully better performing algorithm. The runtime of the
+// algorithms described in the papers above are both linear in graph size,
+// O(V+E), whereas this algorithm is not, as the dominance frontier information
+// itself is not, but in practice runtime seems to be in the order of magnitude
+// of dominance tree calculation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_REGION_INFO_H
+#define LLVM_ANALYSIS_REGION_INFO_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Support/Allocator.h"
+
+namespace llvm {
+
+class Region;
+class RegionInfo;
+class raw_ostream;
+class Loop;
+class LoopInfo;
+
+/// @brief Marker class to iterate over the elements of a Region in flat mode.
+///
+/// The class is used to either iterate in Flat mode or by not using it to not
+/// iterate in Flat mode.  During a Flat mode iteration all Regions are entered
+/// and the iteration returns every BasicBlock.  If the Flat mode is not
+/// selected for SubRegions just one RegionNode containing the subregion is
+/// returned.
+template <class GraphType>
+class FlatIt {};
+
+/// @brief A RegionNode represents a subregion or a BasicBlock that is part of a
+/// Region.
+class RegionNode {
+  // DO NOT IMPLEMENT
+  RegionNode(const RegionNode &);
+  // DO NOT IMPLEMENT
+  const RegionNode &operator=(const RegionNode &);
+
+protected:
+  /// This is the entry basic block that starts this region node.  If this is a
+  /// BasicBlock RegionNode, then entry is just the basic block, that this
+  /// RegionNode represents.  Otherwise it is the entry of this (Sub)RegionNode.
+  ///
+  /// In the BBtoRegionNode map of the parent of this node, BB will always map
+  /// to this node no matter which kind of node this one is.
+  ///
+  /// The node can hold either a Region or a BasicBlock.
+  /// Use one bit to save, if this RegionNode is a subregion or BasicBlock
+  /// RegionNode.
+  PointerIntPair<BasicBlock*, 1, bool> entry;
+
+  /// @brief The parent Region of this RegionNode.
+  /// @see getParent()
+  Region* parent;
+
+public:
+  /// @brief Create a RegionNode.
+  ///
+  /// @param Parent      The parent of this RegionNode.
+  /// @param Entry       The entry BasicBlock of the RegionNode.  If this
+  ///                    RegionNode represents a BasicBlock, this is the
+  ///                    BasicBlock itself.  If it represents a subregion, this
+  ///                    is the entry BasicBlock of the subregion.
+  /// @param isSubRegion If this RegionNode represents a SubRegion.
+  inline RegionNode(Region* Parent, BasicBlock* Entry, bool isSubRegion = 0)
+    : entry(Entry, isSubRegion), parent(Parent) {}
+
+  /// @brief Get the parent Region of this RegionNode.
+  ///
+  /// The parent Region is the Region this RegionNode belongs to. If for
+  /// example a BasicBlock is element of two Regions, there exist two
+  /// RegionNodes for this BasicBlock. Each with the getParent() function
+  /// pointing to the Region this RegionNode belongs to.
+  ///
+  /// @return Get the parent Region of this RegionNode.
+  inline Region* getParent() const { return parent; }
+
+  /// @brief Get the entry BasicBlock of this RegionNode.
+  ///
+  /// If this RegionNode represents a BasicBlock this is just the BasicBlock
+  /// itself, otherwise we return the entry BasicBlock of the Subregion
+  ///
+  /// @return The entry BasicBlock of this RegionNode.
+  inline BasicBlock* getEntry() const { return entry.getPointer(); }
+
+  /// @brief Get the content of this RegionNode.
+  ///
+  /// This can be either a BasicBlock or a subregion. Before calling getNodeAs()
+  /// check the type of the content with the isSubRegion() function call.
+  ///
+  /// @return The content of this RegionNode.
+  template<class T>
+  inline T* getNodeAs() const;
+
+  /// @brief Is this RegionNode a subregion?
+  ///
+  /// @return True if it contains a subregion. False if it contains a
+  ///         BasicBlock.
+  inline bool isSubRegion() const {
+    return entry.getInt();
+  }
+};
+
+/// Print a RegionNode.
+inline raw_ostream &operator<<(raw_ostream &OS, const RegionNode &Node);
+
+template<>
+inline BasicBlock* RegionNode::getNodeAs<BasicBlock>() const {
+  assert(!isSubRegion() && "This is not a BasicBlock RegionNode!");
+  return getEntry();
+}
+
+template<>
+inline Region* RegionNode::getNodeAs<Region>() const {
+  assert(isSubRegion() && "This is not a subregion RegionNode!");
+  return reinterpret_cast<Region*>(const_cast<RegionNode*>(this));
+}
+
+//===----------------------------------------------------------------------===//
+/// @brief A single entry single exit Region.
+///
+/// A Region is a connected subgraph of a control flow graph that has exactly
+/// two connections to the remaining graph. It can be used to analyze or
+/// optimize parts of the control flow graph.
+///
+/// A <em> simple Region </em> is connected to the remaing graph by just two
+/// edges. One edge entering the Region and another one leaving the Region.
+///
+/// An <em> extended Region </em> (or just Region) is a subgraph that can be
+/// transform into a simple Region. The transformation is done by adding
+/// BasicBlocks that merge several entry or exit edges so that after the merge
+/// just one entry and one exit edge exists.
+///
+/// The \e Entry of a Region is the first BasicBlock that is passed after
+/// entering the Region. It is an element of the Region. The entry BasicBlock
+/// dominates all BasicBlocks in the Region.
+///
+/// The \e Exit of a Region is the first BasicBlock that is passed after
+/// leaving the Region. It is not an element of the Region. The exit BasicBlock,
+/// postdominates all BasicBlocks in the Region.
+///
+/// A <em> canonical Region </em> cannot be constructed by combining smaller
+/// Regions.
+///
+/// Region A is the \e parent of Region B, if B is completely contained in A.
+///
+/// Two canonical Regions either do not intersect at all or one is
+/// the parent of the other.
+///
+/// The <em> Program Structure Tree</em> is a graph (V, E) where V is the set of
+/// Regions in the control flow graph and E is the \e parent relation of these
+/// Regions.
+///
+/// Example:
+///
+/// \verbatim
+/// A simple control flow graph, that contains two regions.
+///
+///        1
+///       / |
+///      2   |
+///     / \   3
+///    4   5  |
+///    |   |  |
+///    6   7  8
+///     \  | /
+///      \ |/       Region A: 1 -> 9 {1,2,3,4,5,6,7,8}
+///        9        Region B: 2 -> 9 {2,4,5,6,7}
+/// \endverbatim
+///
+/// You can obtain more examples by either calling
+///
+/// <tt> "opt -regions -analyze anyprogram.ll" </tt>
+/// or
+/// <tt> "opt -view-regions-only anyprogram.ll" </tt>
+///
+/// on any LLVM file you are interested in.
+///
+/// The first call returns a textual representation of the program structure
+/// tree, the second one creates a graphical representation using graphviz.
+class Region : public RegionNode {
+  friend class RegionInfo;
+  // DO NOT IMPLEMENT
+  Region(const Region &);
+  // DO NOT IMPLEMENT
+  const Region &operator=(const Region &);
+
+  // Information necessary to manage this Region.
+  RegionInfo* RI;
+  DominatorTree *DT;
+
+  // The exit BasicBlock of this region.
+  // (The entry BasicBlock is part of RegionNode)
+  BasicBlock *exit;
+
+  typedef std::vector<Region*> RegionSet;
+
+  // The subregions of this region.
+  RegionSet children;
+
+  typedef std::map<BasicBlock*, RegionNode*> BBNodeMapT;
+
+  // Save the BasicBlock RegionNodes that are element of this Region.
+  mutable BBNodeMapT BBNodeMap;
+
+  /// verifyBBInRegion - Check if a BB is in this Region. This check also works
+  /// if the region is incorrectly built. (EXPENSIVE!)
+  void verifyBBInRegion(BasicBlock* BB) const;
+
+  /// verifyWalk - Walk over all the BBs of the region starting from BB and
+  /// verify that all reachable basic blocks are elements of the region.
+  /// (EXPENSIVE!)
+  void verifyWalk(BasicBlock* BB, std::set<BasicBlock*>* visitedBB) const;
+
+  /// verifyRegionNest - Verify if the region and its children are valid
+  /// regions (EXPENSIVE!)
+  void verifyRegionNest() const;
+
+public:
+  /// @brief Create a new region.
+  ///
+  /// @param Entry  The entry basic block of the region.
+  /// @param Exit   The exit basic block of the region.
+  /// @param RI     The region info object that is managing this region.
+  /// @param DT     The dominator tree of the current function.
+  /// @param Parent The surrounding region or NULL if this is a top level
+  ///               region.
+  Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RI,
+         DominatorTree *DT, Region *Parent = 0);
+
+  /// Delete the Region and all its subregions.
+  ~Region();
+
+  /// @brief Get the entry BasicBlock of the Region.
+  /// @return The entry BasicBlock of the region.
+  BasicBlock *getEntry() const { return RegionNode::getEntry(); }
+
+  /// @brief Replace the entry basic block of the region with the new basic
+  ///        block.
+  ///
+  /// @param BB  The new entry basic block of the region.
+  void replaceEntry(BasicBlock *BB);
+
+  /// @brief Replace the exit basic block of the region with the new basic
+  ///        block.
+  ///
+  /// @param BB  The new exit basic block of the region.
+  void replaceExit(BasicBlock *BB);
+
+  /// @brief Get the exit BasicBlock of the Region.
+  /// @return The exit BasicBlock of the Region, NULL if this is the TopLevel
+  ///         Region.
+  BasicBlock *getExit() const { return exit; }
+
+  /// @brief Get the parent of the Region.
+  /// @return The parent of the Region or NULL if this is a top level
+  ///         Region.
+  Region *getParent() const { return RegionNode::getParent(); }
+
+  /// @brief Get the RegionNode representing the current Region.
+  /// @return The RegionNode representing the current Region.
+  RegionNode* getNode() const {
+    return const_cast<RegionNode*>(reinterpret_cast<const RegionNode*>(this));
+  }
+
+  /// @brief Get the nesting level of this Region.
+  ///
+  /// An toplevel Region has depth 0.
+  ///
+  /// @return The depth of the region.
+  unsigned getDepth() const;
+
+  /// @brief Check if a Region is the TopLevel region.
+  ///
+  /// The toplevel region represents the whole function.
+  bool isTopLevelRegion() const { return exit == NULL; }
+
+  /// @brief Return a new (non canonical) region, that is obtained by joining
+  ///        this region with its predecessors.
+  ///
+  /// @return A region also starting at getEntry(), but reaching to the next
+  ///         basic block that forms with getEntry() a (non canonical) region.
+  ///         NULL if such a basic block does not exist.
+  Region *getExpandedRegion() const;
+
+  /// @brief Return the first block of this region's single entry edge,
+  ///        if existing.
+  ///
+  /// @return The BasicBlock starting this region's single entry edge,
+  ///         else NULL.
+  BasicBlock *getEnteringBlock() const;
+
+  /// @brief Return the first block of this region's single exit edge,
+  ///        if existing.
+  ///
+  /// @return The BasicBlock starting this region's single exit edge,
+  ///         else NULL.
+  BasicBlock *getExitingBlock() const;
+
+  /// @brief Is this a simple region?
+  ///
+  /// A region is simple if it has exactly one exit and one entry edge.
+  ///
+  /// @return True if the Region is simple.
+  bool isSimple() const;
+
+  /// @brief Returns the name of the Region.
+  /// @return The Name of the Region.
+  std::string getNameStr() const;
+
+  /// @brief Return the RegionInfo object, that belongs to this Region.
+  RegionInfo *getRegionInfo() const {
+    return RI;
+  }
+
+  /// @brief Print the region.
+  ///
+  /// @param OS The output stream the Region is printed to.
+  /// @param printTree Print also the tree of subregions.
+  /// @param level The indentation level used for printing.
+  void print(raw_ostream& OS, bool printTree = true, unsigned level = 0) const;
+
+  /// @brief Print the region to stderr.
+  void dump() const;
+
+  /// @brief Check if the region contains a BasicBlock.
+  ///
+  /// @param BB The BasicBlock that might be contained in this Region.
+  /// @return True if the block is contained in the region otherwise false.
+  bool contains(const BasicBlock *BB) const;
+
+  /// @brief Check if the region contains another region.
+  ///
+  /// @param SubRegion The region that might be contained in this Region.
+  /// @return True if SubRegion is contained in the region otherwise false.
+  bool contains(const Region *SubRegion) const {
+    // Toplevel Region.
+    if (!getExit())
+      return true;
+
+    return contains(SubRegion->getEntry())
+      && (contains(SubRegion->getExit()) || SubRegion->getExit() == getExit());
+  }
+
+  /// @brief Check if the region contains an Instruction.
+  ///
+  /// @param Inst The Instruction that might be contained in this region.
+  /// @return True if the Instruction is contained in the region otherwise false.
+  bool contains(const Instruction *Inst) const {
+    return contains(Inst->getParent());
+  }
+
+  /// @brief Check if the region contains a loop.
+  ///
+  /// @param L The loop that might be contained in this region.
+  /// @return True if the loop is contained in the region otherwise false.
+  ///         In case a NULL pointer is passed to this function the result
+  ///         is false, except for the region that describes the whole function.
+  ///         In that case true is returned.
+  bool contains(const Loop *L) const;
+
+  /// @brief Get the outermost loop in the region that contains a loop.
+  ///
+  /// Find for a Loop L the outermost loop OuterL that is a parent loop of L
+  /// and is itself contained in the region.
+  ///
+  /// @param L The loop the lookup is started.
+  /// @return The outermost loop in the region, NULL if such a loop does not
+  ///         exist or if the region describes the whole function.
+  Loop *outermostLoopInRegion(Loop *L) const;
+
+  /// @brief Get the outermost loop in the region that contains a basic block.
+  ///
+  /// Find for a basic block BB the outermost loop L that contains BB and is
+  /// itself contained in the region.
+  ///
+  /// @param LI A pointer to a LoopInfo analysis.
+  /// @param BB The basic block surrounded by the loop.
+  /// @return The outermost loop in the region, NULL if such a loop does not
+  ///         exist or if the region describes the whole function.
+  Loop *outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const;
+
+  /// @brief Get the subregion that starts at a BasicBlock
+  ///
+  /// @param BB The BasicBlock the subregion should start.
+  /// @return The Subregion if available, otherwise NULL.
+  Region* getSubRegionNode(BasicBlock *BB) const;
+
+  /// @brief Get the RegionNode for a BasicBlock
+  ///
+  /// @param BB The BasicBlock at which the RegionNode should start.
+  /// @return If available, the RegionNode that represents the subregion
+  ///         starting at BB. If no subregion starts at BB, the RegionNode
+  ///         representing BB.
+  RegionNode* getNode(BasicBlock *BB) const;
+
+  /// @brief Get the BasicBlock RegionNode for a BasicBlock
+  ///
+  /// @param BB The BasicBlock for which the RegionNode is requested.
+  /// @return The RegionNode representing the BB.
+  RegionNode* getBBNode(BasicBlock *BB) const;
+
+  /// @brief Add a new subregion to this Region.
+  ///
+  /// @param SubRegion The new subregion that will be added.
+  /// @param moveChildren Move the children of this region, that are also
+  ///                     contained in SubRegion into SubRegion.
+  void addSubRegion(Region *SubRegion, bool moveChildren = false);
+
+  /// @brief Remove a subregion from this Region.
+  ///
+  /// The subregion is not deleted, as it will probably be inserted into another
+  /// region.
+  /// @param SubRegion The SubRegion that will be removed.
+  Region *removeSubRegion(Region *SubRegion);
+
+  /// @brief Move all direct child nodes of this Region to another Region.
+  ///
+  /// @param To The Region the child nodes will be transfered to.
+  void transferChildrenTo(Region *To);
+
+  /// @brief Verify if the region is a correct region.
+  ///
+  /// Check if this is a correctly build Region. This is an expensive check, as
+  /// the complete CFG of the Region will be walked.
+  void verifyRegion() const;
+
+  /// @brief Clear the cache for BB RegionNodes.
+  ///
+  /// After calling this function the BasicBlock RegionNodes will be stored at
+  /// different memory locations. RegionNodes obtained before this function is
+  /// called are therefore not comparable to RegionNodes abtained afterwords.
+  void clearNodeCache();
+
+  /// @name Subregion Iterators
+  ///
+  /// These iterators iterator over all subregions of this Region.
+  //@{
+  typedef RegionSet::iterator iterator;
+  typedef RegionSet::const_iterator const_iterator;
+
+  iterator begin() { return children.begin(); }
+  iterator end() { return children.end(); }
+
+  const_iterator begin() const { return children.begin(); }
+  const_iterator end() const { return children.end(); }
+  //@}
+
+  /// @name BasicBlock Iterators
+  ///
+  /// These iterators iterate over all BasicBlock RegionNodes that are
+  /// contained in this Region. The iterator also iterates over BasicBlocks
+  /// that are elements of a subregion of this Region. It is therefore called a
+  /// flat iterator.
+  //@{
+  typedef df_iterator<RegionNode*, SmallPtrSet<RegionNode*, 8>, false,
+                      GraphTraits<FlatIt<RegionNode*> > > block_iterator;
+
+  typedef df_iterator<const RegionNode*, SmallPtrSet<const RegionNode*, 8>,
+                      false, GraphTraits<FlatIt<const RegionNode*> > >
+            const_block_iterator;
+
+  block_iterator block_begin();
+  block_iterator block_end();
+
+  const_block_iterator block_begin() const;
+  const_block_iterator block_end() const;
+  //@}
+
+  /// @name Element Iterators
+  ///
+  /// These iterators iterate over all BasicBlock and subregion RegionNodes that
+  /// are direct children of this Region. It does not iterate over any
+  /// RegionNodes that are also element of a subregion of this Region.
+  //@{
+  typedef df_iterator<RegionNode*, SmallPtrSet<RegionNode*, 8>, false,
+                      GraphTraits<RegionNode*> > element_iterator;
+
+  typedef df_iterator<const RegionNode*, SmallPtrSet<const RegionNode*, 8>,
+                      false, GraphTraits<const RegionNode*> >
+            const_element_iterator;
+
+  element_iterator element_begin();
+  element_iterator element_end();
+
+  const_element_iterator element_begin() const;
+  const_element_iterator element_end() const;
+  //@}
+};
+
+//===----------------------------------------------------------------------===//
+/// @brief Analysis that detects all canonical Regions.
+///
+/// The RegionInfo pass detects all canonical regions in a function. The Regions
+/// are connected using the parent relation. This builds a Program Structure
+/// Tree.
+class RegionInfo : public FunctionPass {
+  typedef DenseMap<BasicBlock*,BasicBlock*> BBtoBBMap;
+  typedef DenseMap<BasicBlock*, Region*> BBtoRegionMap;
+  typedef SmallPtrSet<Region*, 4> RegionSet;
+
+  // DO NOT IMPLEMENT
+  RegionInfo(const RegionInfo &);
+  // DO NOT IMPLEMENT
+  const RegionInfo &operator=(const RegionInfo &);
+
+  DominatorTree *DT;
+  PostDominatorTree *PDT;
+  DominanceFrontier *DF;
+
+  /// The top level region.
+  Region *TopLevelRegion;
+
+  /// Map every BB to the smallest region, that contains BB.
+  BBtoRegionMap BBtoRegion;
+
+  // isCommonDomFrontier - Returns true if BB is in the dominance frontier of
+  // entry, because it was inherited from exit. In the other case there is an
+  // edge going from entry to BB without passing exit.
+  bool isCommonDomFrontier(BasicBlock* BB, BasicBlock* entry,
+                           BasicBlock* exit) const;
+
+  // isRegion - Check if entry and exit surround a valid region, based on
+  // dominance tree and dominance frontier.
+  bool isRegion(BasicBlock* entry, BasicBlock* exit) const;
+
+  // insertShortCut - Saves a shortcut pointing from entry to exit.
+  // This function may extend this shortcut if possible.
+  void insertShortCut(BasicBlock* entry, BasicBlock* exit,
+                      BBtoBBMap* ShortCut) const;
+
+  // getNextPostDom - Returns the next BB that postdominates N, while skipping
+  // all post dominators that cannot finish a canonical region.
+  DomTreeNode *getNextPostDom(DomTreeNode* N, BBtoBBMap *ShortCut) const;
+
+  // isTrivialRegion - A region is trivial, if it contains only one BB.
+  bool isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const;
+
+  // createRegion - Creates a single entry single exit region.
+  Region *createRegion(BasicBlock *entry, BasicBlock *exit);
+
+  // findRegionsWithEntry - Detect all regions starting with bb 'entry'.
+  void findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut);
+
+  // scanForRegions - Detects regions in F.
+  void scanForRegions(Function &F, BBtoBBMap *ShortCut);
+
+  // getTopMostParent - Get the top most parent with the same entry block.
+  Region *getTopMostParent(Region *region);
+
+  // buildRegionsTree - build the region hierarchy after all region detected.
+  void buildRegionsTree(DomTreeNode *N, Region *region);
+
+  // Calculate - detecte all regions in function and build the region tree.
+  void Calculate(Function& F);
+
+  void releaseMemory();
+
+  // updateStatistics - Update statistic about created regions.
+  void updateStatistics(Region *R);
+
+  // isSimple - Check if a region is a simple region with exactly one entry
+  // edge and exactly one exit edge.
+  bool isSimple(Region* R) const;
+
+public:
+  static char ID;
+  explicit RegionInfo();
+
+  ~RegionInfo();
+
+  /// @name FunctionPass interface
+  //@{
+  virtual bool runOnFunction(Function &F);
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  virtual void print(raw_ostream &OS, const Module *) const;
+  virtual void verifyAnalysis() const;
+  //@}
+
+  /// @brief Get the smallest region that contains a BasicBlock.
+  ///
+  /// @param BB The basic block.
+  /// @return The smallest region, that contains BB or NULL, if there is no
+  /// region containing BB.
+  Region *getRegionFor(BasicBlock *BB) const;
+
+  /// @brief  Set the smallest region that surrounds a basic block.
+  ///
+  /// @param BB The basic block surrounded by a region.
+  /// @param R The smallest region that surrounds BB.
+  void setRegionFor(BasicBlock *BB, Region *R);
+
+  /// @brief A shortcut for getRegionFor().
+  ///
+  /// @param BB The basic block.
+  /// @return The smallest region, that contains BB or NULL, if there is no
+  /// region containing BB.
+  Region *operator[](BasicBlock *BB) const;
+
+  /// @brief Return the exit of the maximal refined region, that starts at a
+  /// BasicBlock.
+  ///
+  /// @param BB The BasicBlock the refined region starts.
+  BasicBlock *getMaxRegionExit(BasicBlock *BB) const;
+
+  /// @brief Find the smallest region that contains two regions.
+  ///
+  /// @param A The first region.
+  /// @param B The second region.
+  /// @return The smallest region containing A and B.
+  Region *getCommonRegion(Region* A, Region *B) const;
+
+  /// @brief Find the smallest region that contains two basic blocks.
+  ///
+  /// @param A The first basic block.
+  /// @param B The second basic block.
+  /// @return The smallest region that contains A and B.
+  Region* getCommonRegion(BasicBlock* A, BasicBlock *B) const {
+    return getCommonRegion(getRegionFor(A), getRegionFor(B));
+  }
+
+  /// @brief Find the smallest region that contains a set of regions.
+  ///
+  /// @param Regions A vector of regions.
+  /// @return The smallest region that contains all regions in Regions.
+  Region* getCommonRegion(SmallVectorImpl<Region*> &Regions) const;
+
+  /// @brief Find the smallest region that contains a set of basic blocks.
+  ///
+  /// @param BBs A vector of basic blocks.
+  /// @return The smallest region that contains all basic blocks in BBS.
+  Region* getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const;
+
+  Region *getTopLevelRegion() const {
+    return TopLevelRegion;
+  }
+
+  /// @brief Update RegionInfo after a basic block was split.
+  ///
+  /// @param NewBB The basic block that was created before OldBB.
+  /// @param OldBB The old basic block.
+  void splitBlock(BasicBlock* NewBB, BasicBlock *OldBB);
+
+  /// @brief Clear the Node Cache for all Regions.
+  ///
+  /// @see Region::clearNodeCache()
+  void clearNodeCache() {
+    if (TopLevelRegion)
+      TopLevelRegion->clearNodeCache();
+  }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const RegionNode &Node) {
+  if (Node.isSubRegion())
+    return OS << Node.getNodeAs<Region>()->getNameStr();
+  else
+    return OS << Node.getNodeAs<BasicBlock>()->getNameStr();
+}
+} // End llvm namespace
+#endif
+
diff --git a/final/include/llvm/Analysis/RegionIterator.h b/final/include/llvm/Analysis/RegionIterator.h
new file mode 100644
index 00000000000..ced5b528cbb
--- /dev/null
+++ b/final/include/llvm/Analysis/RegionIterator.h
@@ -0,0 +1,342 @@
+//===- RegionIterator.h - Iterators to iteratate over Regions ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file defines the iterators to iterate over the elements of a Region.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_ANALYSIS_REGION_ITERATOR_H
+#define LLVM_ANALYSIS_REGION_ITERATOR_H
+
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+//===----------------------------------------------------------------------===//
+/// @brief Hierachical RegionNode successor iterator.
+///
+/// This iterator iterates over all successors of a RegionNode.
+///
+/// For a BasicBlock RegionNode it skips all BasicBlocks that are not part of
+/// the parent Region.  Furthermore for BasicBlocks that start a subregion, a
+/// RegionNode representing the subregion is returned.
+///
+/// For a subregion RegionNode there is just one successor. The RegionNode
+/// representing the exit of the subregion.
+template<class NodeType>
+class RNSuccIterator : public std::iterator<std::forward_iterator_tag,
+                                           NodeType, ptrdiff_t>
+{
+  typedef std::iterator<std::forward_iterator_tag, NodeType, ptrdiff_t> super;
+  // The iterator works in two modes, bb mode or region mode.
+  enum ItMode{
+    // In BB mode it returns all successors of this BasicBlock as its
+    // successors.
+    ItBB,
+    // In region mode there is only one successor, thats the regionnode mapping
+    // to the exit block of the regionnode
+    ItRgBegin, // At the beginning of the regionnode successor.
+    ItRgEnd    // At the end of the regionnode successor.
+  };
+
+  // Use two bit to represent the mode iterator.
+  PointerIntPair<NodeType*, 2, enum ItMode> Node;
+
+  // The block successor iterator.
+  succ_iterator BItor;
+
+  // advanceRegionSucc - A region node has only one successor. It reaches end
+  // once we advance it.
+  void advanceRegionSucc() {
+    assert(Node.getInt() == ItRgBegin && "Cannot advance region successor!");
+    Node.setInt(ItRgEnd);
+  }
+
+  NodeType* getNode() const{ return Node.getPointer(); }
+
+  // isRegionMode - Is the current iterator in region mode?
+  bool isRegionMode() const { return Node.getInt() != ItBB; }
+
+  // Get the immediate successor. This function may return a Basic Block
+  // RegionNode or a subregion RegionNode.
+  RegionNode* getISucc(BasicBlock* BB) const {
+    RegionNode *succ;
+    succ = getNode()->getParent()->getNode(BB);
+    assert(succ && "BB not in Region or entered subregion!");
+    return succ;
+  }
+
+  // getRegionSucc - Return the successor basic block of a SubRegion RegionNode.
+  inline BasicBlock* getRegionSucc() const {
+    assert(Node.getInt() == ItRgBegin && "Cannot get the region successor!");
+    return getNode()->template getNodeAs<Region>()->getExit();
+  }
+
+  // isExit - Is this the exit BB of the Region?
+  inline bool isExit(BasicBlock* BB) const {
+    return getNode()->getParent()->getExit() == BB;
+  }
+public:
+  typedef RNSuccIterator<NodeType> Self;
+
+  typedef typename super::pointer pointer;
+
+  /// @brief Create begin iterator of a RegionNode.
+  inline RNSuccIterator(NodeType* node)
+    : Node(node, node->isSubRegion() ? ItRgBegin : ItBB),
+    BItor(succ_begin(node->getEntry())) {
+
+
+    // Skip the exit block
+    if (!isRegionMode())
+      while (succ_end(node->getEntry()) != BItor && isExit(*BItor))
+        ++BItor;
+
+    if (isRegionMode() && isExit(getRegionSucc()))
+      advanceRegionSucc();
+  }
+
+  /// @brief Create an end iterator.
+  inline RNSuccIterator(NodeType* node, bool)
+    : Node(node, node->isSubRegion() ? ItRgEnd : ItBB),
+    BItor(succ_end(node->getEntry())) {}
+
+  inline bool operator==(const Self& x) const {
+    assert(isRegionMode() == x.isRegionMode() && "Broken iterator!");
+    if (isRegionMode())
+      return Node.getInt() == x.Node.getInt();
+    else
+      return BItor == x.BItor;
+  }
+
+  inline bool operator!=(const Self& x) const { return !operator==(x); }
+
+  inline pointer operator*() const {
+    BasicBlock* BB = isRegionMode() ? getRegionSucc() : *BItor;
+    assert(!isExit(BB) && "Iterator out of range!");
+    return getISucc(BB);
+  }
+
+  inline Self& operator++() {
+    if(isRegionMode()) {
+      // The Region only has 1 successor.
+      advanceRegionSucc();
+    } else {
+      // Skip the exit.
+      do
+        ++BItor;
+      while (BItor != succ_end(getNode()->getEntry())
+          && isExit(*BItor));
+    }
+    return *this;
+  }
+
+  inline Self operator++(int) {
+    Self tmp = *this;
+    ++*this;
+    return tmp;
+  }
+
+  inline const Self &operator=(const Self &I) {
+    if (this != &I) {
+      assert(getNode()->getParent() == I.getNode()->getParent()
+             && "Cannot assign iterators of two different regions!");
+      Node = I.Node;
+      BItor = I.BItor;
+    }
+    return *this;
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+/// @brief Flat RegionNode iterator.
+///
+/// The Flat Region iterator will iterate over all BasicBlock RegionNodes that
+/// are contained in the Region and its subregions. This is close to a virtual
+/// control flow graph of the Region.
+template<class NodeType>
+class RNSuccIterator<FlatIt<NodeType> >
+  : public std::iterator<std::forward_iterator_tag, NodeType, ptrdiff_t>
+{
+  typedef std::iterator<std::forward_iterator_tag, NodeType, ptrdiff_t> super;
+  NodeType* Node;
+  succ_iterator Itor;
+
+public:
+  typedef RNSuccIterator<FlatIt<NodeType> > Self;
+  typedef typename super::pointer pointer;
+
+  /// @brief Create the iterator from a RegionNode.
+  ///
+  /// Note that the incoming node must be a bb node, otherwise it will trigger
+  /// an assertion when we try to get a BasicBlock.
+  inline RNSuccIterator(NodeType* node) : Node(node),
+    Itor(succ_begin(node->getEntry())) {
+      assert(!Node->isSubRegion()
+             && "Subregion node not allowed in flat iterating mode!");
+      assert(Node->getParent() && "A BB node must have a parent!");
+
+      // Skip the exit block of the iterating region.
+      while (succ_end(Node->getEntry()) != Itor
+          && Node->getParent()->getExit() == *Itor)
+        ++Itor;
+  }
+  /// @brief Create an end iterator
+  inline RNSuccIterator(NodeType* node, bool) : Node(node),
+    Itor(succ_end(node->getEntry())) {
+      assert(!Node->isSubRegion()
+             && "Subregion node not allowed in flat iterating mode!");
+  }
+
+  inline bool operator==(const Self& x) const {
+    assert(Node->getParent() == x.Node->getParent()
+           && "Cannot compare iterators of different regions!");
+
+    return Itor == x.Itor && Node == x.Node;
+  }
+
+  inline bool operator!=(const Self& x) const { return !operator==(x); }
+
+  inline pointer operator*() const {
+    BasicBlock* BB = *Itor;
+
+    // Get the iterating region.
+    Region* Parent = Node->getParent();
+
+    // The only case that the successor reaches out of the region is it reaches
+    // the exit of the region.
+    assert(Parent->getExit() != BB && "iterator out of range!");
+
+    return Parent->getBBNode(BB);
+  }
+
+  inline Self& operator++() {
+    // Skip the exit block of the iterating region.
+    do
+      ++Itor;
+    while (Itor != succ_end(Node->getEntry())
+        && Node->getParent()->getExit() == *Itor);
+
+    return *this;
+  }
+
+  inline Self operator++(int) {
+    Self tmp = *this;
+    ++*this;
+    return tmp;
+  }
+
+  inline const Self &operator=(const Self &I) {
+    if (this != &I) {
+      assert(Node->getParent() == I.Node->getParent()
+             && "Cannot assign iterators to two different regions!");
+      Node = I.Node;
+      Itor = I.Itor;
+    }
+    return *this;
+  }
+};
+
+template<class NodeType>
+inline RNSuccIterator<NodeType> succ_begin(NodeType* Node) {
+  return RNSuccIterator<NodeType>(Node);
+}
+
+template<class NodeType>
+inline RNSuccIterator<NodeType> succ_end(NodeType* Node) {
+  return RNSuccIterator<NodeType>(Node, true);
+}
+
+//===--------------------------------------------------------------------===//
+// RegionNode GraphTraits specialization so the bbs in the region can be
+// iterate by generic graph iterators.
+//
+// NodeT can either be region node or const region node, otherwise child_begin
+// and child_end fail.
+
+#define RegionNodeGraphTraits(NodeT) \
+  template<> struct GraphTraits<NodeT*> { \
+  typedef NodeT NodeType; \
+  typedef RNSuccIterator<NodeType> ChildIteratorType; \
+  static NodeType *getEntryNode(NodeType* N) { return N; } \
+  static inline ChildIteratorType child_begin(NodeType *N) { \
+    return RNSuccIterator<NodeType>(N); \
+  } \
+  static inline ChildIteratorType child_end(NodeType *N) { \
+    return RNSuccIterator<NodeType>(N, true); \
+  } \
+}; \
+template<> struct GraphTraits<FlatIt<NodeT*> > { \
+  typedef NodeT NodeType; \
+  typedef RNSuccIterator<FlatIt<NodeT> > ChildIteratorType; \
+  static NodeType *getEntryNode(NodeType* N) { return N; } \
+  static inline ChildIteratorType child_begin(NodeType *N) { \
+    return RNSuccIterator<FlatIt<NodeType> >(N); \
+  } \
+  static inline ChildIteratorType child_end(NodeType *N) { \
+    return RNSuccIterator<FlatIt<NodeType> >(N, true); \
+  } \
+}
+
+#define RegionGraphTraits(RegionT, NodeT) \
+template<> struct GraphTraits<RegionT*> \
+  : public GraphTraits<NodeT*> { \
+  typedef df_iterator<NodeType*> nodes_iterator; \
+  static NodeType *getEntryNode(RegionT* R) { \
+    return R->getNode(R->getEntry()); \
+  } \
+  static nodes_iterator nodes_begin(RegionT* R) { \
+    return nodes_iterator::begin(getEntryNode(R)); \
+  } \
+  static nodes_iterator nodes_end(RegionT* R) { \
+    return nodes_iterator::end(getEntryNode(R)); \
+  } \
+}; \
+template<> struct GraphTraits<FlatIt<RegionT*> > \
+  : public GraphTraits<FlatIt<NodeT*> > { \
+  typedef df_iterator<NodeType*, SmallPtrSet<NodeType*, 8>, false, \
+  GraphTraits<FlatIt<NodeType*> > > nodes_iterator; \
+  static NodeType *getEntryNode(RegionT* R) { \
+    return R->getBBNode(R->getEntry()); \
+  } \
+  static nodes_iterator nodes_begin(RegionT* R) { \
+    return nodes_iterator::begin(getEntryNode(R)); \
+  } \
+  static nodes_iterator nodes_end(RegionT* R) { \
+    return nodes_iterator::end(getEntryNode(R)); \
+  } \
+}
+
+RegionNodeGraphTraits(RegionNode);
+RegionNodeGraphTraits(const RegionNode);
+
+RegionGraphTraits(Region, RegionNode);
+RegionGraphTraits(const Region, const RegionNode);
+
+template <> struct GraphTraits<RegionInfo*>
+  : public GraphTraits<FlatIt<RegionNode*> > {
+  typedef df_iterator<NodeType*, SmallPtrSet<NodeType*, 8>, false,
+                      GraphTraits<FlatIt<NodeType*> > > nodes_iterator;
+
+  static NodeType *getEntryNode(RegionInfo *RI) {
+    return GraphTraits<FlatIt<Region*> >::getEntryNode(RI->getTopLevelRegion());
+  }
+  static nodes_iterator nodes_begin(RegionInfo* RI) {
+    return nodes_iterator::begin(getEntryNode(RI));
+  }
+  static nodes_iterator nodes_end(RegionInfo *RI) {
+    return nodes_iterator::end(getEntryNode(RI));
+  }
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/final/include/llvm/Analysis/RegionPass.h b/final/include/llvm/Analysis/RegionPass.h
new file mode 100644
index 00000000000..aedc06aa6cf
--- /dev/null
+++ b/final/include/llvm/Analysis/RegionPass.h
@@ -0,0 +1,126 @@
+//===- RegionPass.h - RegionPass class ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegionPass class. All region based analysis,
+// optimization and transformation passes are derived from RegionPass.
+// This class is implemented following the some ideas of the LoopPass.h class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_REGION_PASS_H
+#define LLVM_REGION_PASS_H
+
+#include "llvm/Analysis/RegionInfo.h"
+
+#include "llvm/Pass.h"
+#include "llvm/PassManagers.h"
+#include "llvm/Function.h"
+
+#include <deque>
+
+namespace llvm {
+
+class RGPassManager;
+class Function;
+
+//===----------------------------------------------------------------------===//
+/// @brief A pass that runs on each Region in a function.
+///
+/// RegionPass is managed by RGPassManager.
+class RegionPass : public Pass {
+public:
+  explicit RegionPass(char &pid) : Pass(PT_Region, pid) {}
+
+  //===--------------------------------------------------------------------===//
+  /// @name To be implemented by every RegionPass
+  ///
+  //@{
+  /// @brief Run the pass on a specific Region
+  ///
+  /// Accessing regions not contained in the current region is not allowed.
+  ///
+  /// @param R The region this pass is run on.
+  /// @param RGM The RegionPassManager that manages this Pass.
+  ///
+  /// @return True if the pass modifies this Region.
+  virtual bool runOnRegion(Region *R, RGPassManager &RGM) = 0;
+
+  /// @brief Get a pass to print the LLVM IR in the region.
+  ///
+  /// @param O      The ouput stream to print the Region.
+  /// @param Banner The banner to seperate different printed passes.
+  ///
+  /// @return The pass to print the LLVM IR in the region.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
+
+  virtual bool doInitialization(Region *R, RGPassManager &RGM) { return false; }
+  virtual bool doFinalization() { return false; }
+  //@}
+
+  //===--------------------------------------------------------------------===//
+  /// @name PassManager API
+  ///
+  //@{
+  void preparePassManager(PMStack &PMS);
+
+  virtual void assignPassManager(PMStack &PMS,
+    PassManagerType PMT = PMT_RegionPassManager);
+
+  virtual PassManagerType getPotentialPassManagerType() const {
+    return PMT_RegionPassManager;
+  }
+  //@}
+};
+
+/// @brief The pass manager to schedule RegionPasses.
+class RGPassManager : public FunctionPass, public PMDataManager {
+  std::deque<Region*> RQ;
+  bool skipThisRegion;
+  bool redoThisRegion;
+  RegionInfo *RI;
+  Region *CurrentRegion;
+
+public:
+  static char ID;
+  explicit RGPassManager(int Depth);
+
+  /// @brief Execute all of the passes scheduled for execution.
+  ///
+  /// @return True if any of the passes modifies the function.
+  bool runOnFunction(Function &F);
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  /// RGPassManager needs RegionInfo.
+  void getAnalysisUsage(AnalysisUsage &Info) const;
+
+  virtual const char *getPassName() const {
+    return "Region Pass Manager";
+  }
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+
+  /// @brief Print passes managed by this manager.
+  void dumpPassStructure(unsigned Offset);
+
+  /// @brief Print passes contained by this manager.
+  Pass *getContainedPass(unsigned N) {
+    assert(N < PassVector.size() && "Pass number out of range!");
+    Pass *FP = static_cast<Pass *>(PassVector[N]);
+    return FP;
+  }
+
+  virtual PassManagerType getPassManagerType() const {
+    return PMT_RegionPassManager;
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/RegionPrinter.h b/final/include/llvm/Analysis/RegionPrinter.h
new file mode 100644
index 00000000000..758748aad9e
--- /dev/null
+++ b/final/include/llvm/Analysis/RegionPrinter.h
@@ -0,0 +1,26 @@
+//===-- RegionPrinter.h - Region printer external interface -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines external functions that can be called to explicitly
+// instantiate the region printer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_REGIONPRINTER_H
+#define LLVM_ANALYSIS_REGIONPRINTER_H
+
+namespace llvm {
+  class FunctionPass;
+  FunctionPass *createRegionViewerPass();
+  FunctionPass *createRegionOnlyViewerPass();
+  FunctionPass *createRegionPrinterPass();
+  FunctionPass *createRegionOnlyPrinterPass();
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Analysis/ScalarEvolution.h b/final/include/llvm/Analysis/ScalarEvolution.h
new file mode 100644
index 00000000000..d1938061bef
--- /dev/null
+++ b/final/include/llvm/Analysis/ScalarEvolution.h
@@ -0,0 +1,748 @@
+//===- llvm/Analysis/ScalarEvolution.h - Scalar Evolution -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The ScalarEvolution class is an LLVM pass which can be used to analyze and
+// categorize scalar expressions in loops.  It specializes in recognizing
+// general induction variables, representing them with the abstract and opaque
+// SCEV class.  Given this analysis, trip counts of loops and other important
+// properties can be obtained.
+//
+// This analysis is primarily useful for induction variable substitution and
+// strength reduction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_SCALAREVOLUTION_H
+#define LLVM_ANALYSIS_SCALAREVOLUTION_H
+
+#include "llvm/Pass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include <map>
+
+namespace llvm {
+  class APInt;
+  class Constant;
+  class ConstantInt;
+  class DominatorTree;
+  class Type;
+  class ScalarEvolution;
+  class TargetData;
+  class LLVMContext;
+  class Loop;
+  class LoopInfo;
+  class Operator;
+  class SCEVUnknown;
+  class SCEV;
+  template<> struct FoldingSetTrait<SCEV>;
+
+  /// SCEV - This class represents an analyzed expression in the program.  These
+  /// are opaque objects that the client is not allowed to do much with
+  /// directly.
+  ///
+  class SCEV : public FoldingSetNode {
+    friend struct FoldingSetTrait<SCEV>;
+
+    /// FastID - A reference to an Interned FoldingSetNodeID for this node.
+    /// The ScalarEvolution's BumpPtrAllocator holds the data.
+    FoldingSetNodeIDRef FastID;
+
+    // The SCEV baseclass this node corresponds to
+    const unsigned short SCEVType;
+
+  protected:
+    /// SubclassData - This field is initialized to zero and may be used in
+    /// subclasses to store miscellaneous information.
+    unsigned short SubclassData;
+
+  private:
+    SCEV(const SCEV &);            // DO NOT IMPLEMENT
+    void operator=(const SCEV &);  // DO NOT IMPLEMENT
+
+  public:
+    explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy) :
+      FastID(ID), SCEVType(SCEVTy), SubclassData(0) {}
+
+    unsigned getSCEVType() const { return SCEVType; }
+
+    /// getType - Return the LLVM type of this SCEV expression.
+    ///
+    const Type *getType() const;
+
+    /// isZero - Return true if the expression is a constant zero.
+    ///
+    bool isZero() const;
+
+    /// isOne - Return true if the expression is a constant one.
+    ///
+    bool isOne() const;
+
+    /// isAllOnesValue - Return true if the expression is a constant
+    /// all-ones value.
+    ///
+    bool isAllOnesValue() const;
+
+    /// print - Print out the internal representation of this scalar to the
+    /// specified stream.  This should really only be used for debugging
+    /// purposes.
+    void print(raw_ostream &OS) const;
+
+    /// dump - This method is used for debugging.
+    ///
+    void dump() const;
+  };
+
+  // Specialize FoldingSetTrait for SCEV to avoid needing to compute
+  // temporary FoldingSetNodeID values.
+  template<> struct FoldingSetTrait<SCEV> : DefaultFoldingSetTrait<SCEV> {
+    static void Profile(const SCEV &X, FoldingSetNodeID& ID) {
+      ID = X.FastID;
+    }
+    static bool Equals(const SCEV &X, const FoldingSetNodeID &ID,
+                       FoldingSetNodeID &TempID) {
+      return ID == X.FastID;
+    }
+    static unsigned ComputeHash(const SCEV &X, FoldingSetNodeID &TempID) {
+      return X.FastID.ComputeHash();
+    }
+  };
+
+  inline raw_ostream &operator<<(raw_ostream &OS, const SCEV &S) {
+    S.print(OS);
+    return OS;
+  }
+
+  /// SCEVCouldNotCompute - An object of this class is returned by queries that
+  /// could not be answered.  For example, if you ask for the number of
+  /// iterations of a linked-list traversal loop, you will get one of these.
+  /// None of the standard SCEV operations are valid on this class, it is just a
+  /// marker.
+  struct SCEVCouldNotCompute : public SCEV {
+    SCEVCouldNotCompute();
+
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVCouldNotCompute *S) { return true; }
+    static bool classof(const SCEV *S);
+  };
+
+  /// ScalarEvolution - This class is the main scalar evolution driver.  Because
+  /// client code (intentionally) can't do much with the SCEV objects directly,
+  /// they must ask this class for services.
+  ///
+  class ScalarEvolution : public FunctionPass {
+  public:
+    /// LoopDisposition - An enum describing the relationship between a
+    /// SCEV and a loop.
+    enum LoopDisposition {
+      LoopVariant,    ///< The SCEV is loop-variant (unknown).
+      LoopInvariant,  ///< The SCEV is loop-invariant.
+      LoopComputable  ///< The SCEV varies predictably with the loop.
+    };
+
+    /// BlockDisposition - An enum describing the relationship between a
+    /// SCEV and a basic block.
+    enum BlockDisposition {
+      DoesNotDominateBlock,  ///< The SCEV does not dominate the block.
+      DominatesBlock,        ///< The SCEV dominates the block.
+      ProperlyDominatesBlock ///< The SCEV properly dominates the block.
+    };
+
+  private:
+    /// SCEVCallbackVH - A CallbackVH to arrange for ScalarEvolution to be
+    /// notified whenever a Value is deleted.
+    class SCEVCallbackVH : public CallbackVH {
+      ScalarEvolution *SE;
+      virtual void deleted();
+      virtual void allUsesReplacedWith(Value *New);
+    public:
+      SCEVCallbackVH(Value *V, ScalarEvolution *SE = 0);
+    };
+
+    friend class SCEVCallbackVH;
+    friend class SCEVExpander;
+    friend class SCEVUnknown;
+
+    /// F - The function we are analyzing.
+    ///
+    Function *F;
+
+    /// LI - The loop information for the function we are currently analyzing.
+    ///
+    LoopInfo *LI;
+
+    /// TD - The target data information for the target we are targeting.
+    ///
+    TargetData *TD;
+
+    /// DT - The dominator tree.
+    ///
+    DominatorTree *DT;
+
+    /// CouldNotCompute - This SCEV is used to represent unknown trip
+    /// counts and things.
+    SCEVCouldNotCompute CouldNotCompute;
+
+    /// ValueExprMapType - The typedef for ValueExprMap.
+    ///
+    typedef DenseMap<SCEVCallbackVH, const SCEV *, DenseMapInfo<Value *> >
+      ValueExprMapType;
+
+    /// ValueExprMap - This is a cache of the values we have analyzed so far.
+    ///
+    ValueExprMapType ValueExprMap;
+
+    /// BackedgeTakenInfo - Information about the backedge-taken count
+    /// of a loop. This currently includes an exact count and a maximum count.
+    ///
+    struct BackedgeTakenInfo {
+      /// Exact - An expression indicating the exact backedge-taken count of
+      /// the loop if it is known, or a SCEVCouldNotCompute otherwise.
+      const SCEV *Exact;
+
+      /// Max - An expression indicating the least maximum backedge-taken
+      /// count of the loop that is known, or a SCEVCouldNotCompute.
+      const SCEV *Max;
+
+      /*implicit*/ BackedgeTakenInfo(const SCEV *exact) :
+        Exact(exact), Max(exact) {}
+
+      BackedgeTakenInfo(const SCEV *exact, const SCEV *max) :
+        Exact(exact), Max(max) {}
+
+      /// hasAnyInfo - Test whether this BackedgeTakenInfo contains any
+      /// computed information, or whether it's all SCEVCouldNotCompute
+      /// values.
+      bool hasAnyInfo() const {
+        return !isa<SCEVCouldNotCompute>(Exact) ||
+               !isa<SCEVCouldNotCompute>(Max);
+      }
+    };
+
+    /// BackedgeTakenCounts - Cache the backedge-taken count of the loops for
+    /// this function as they are computed.
+    std::map<const Loop*, BackedgeTakenInfo> BackedgeTakenCounts;
+
+    /// ConstantEvolutionLoopExitValue - This map contains entries for all of
+    /// the PHI instructions that we attempt to compute constant evolutions for.
+    /// This allows us to avoid potentially expensive recomputation of these
+    /// properties.  An instruction maps to null if we are unable to compute its
+    /// exit value.
+    std::map<PHINode*, Constant*> ConstantEvolutionLoopExitValue;
+
+    /// ValuesAtScopes - This map contains entries for all the expressions
+    /// that we attempt to compute getSCEVAtScope information for, which can
+    /// be expensive in extreme cases.
+    std::map<const SCEV *,
+             std::map<const Loop *, const SCEV *> > ValuesAtScopes;
+
+    /// LoopDispositions - Memoized computeLoopDisposition results.
+    std::map<const SCEV *,
+             std::map<const Loop *, LoopDisposition> > LoopDispositions;
+
+    /// computeLoopDisposition - Compute a LoopDisposition value.
+    LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L);
+
+    /// BlockDispositions - Memoized computeBlockDisposition results.
+    std::map<const SCEV *,
+             std::map<const BasicBlock *, BlockDisposition> > BlockDispositions;
+
+    /// computeBlockDisposition - Compute a BlockDisposition value.
+    BlockDisposition computeBlockDisposition(const SCEV *S, const BasicBlock *BB);
+
+    /// UnsignedRanges - Memoized results from getUnsignedRange
+    DenseMap<const SCEV *, ConstantRange> UnsignedRanges;
+
+    /// SignedRanges - Memoized results from getSignedRange
+    DenseMap<const SCEV *, ConstantRange> SignedRanges;
+
+    /// setUnsignedRange - Set the memoized unsigned range for the given SCEV.
+    const ConstantRange &setUnsignedRange(const SCEV *S,
+                                          const ConstantRange &CR) {
+      std::pair<DenseMap<const SCEV *, ConstantRange>::iterator, bool> Pair =
+        UnsignedRanges.insert(std::make_pair(S, CR));
+      if (!Pair.second)
+        Pair.first->second = CR;
+      return Pair.first->second;
+    }
+
+    /// setUnsignedRange - Set the memoized signed range for the given SCEV.
+    const ConstantRange &setSignedRange(const SCEV *S,
+                                        const ConstantRange &CR) {
+      std::pair<DenseMap<const SCEV *, ConstantRange>::iterator, bool> Pair =
+        SignedRanges.insert(std::make_pair(S, CR));
+      if (!Pair.second)
+        Pair.first->second = CR;
+      return Pair.first->second;
+    }
+
+    /// createSCEV - We know that there is no SCEV for the specified value.
+    /// Analyze the expression.
+    const SCEV *createSCEV(Value *V);
+
+    /// createNodeForPHI - Provide the special handling we need to analyze PHI
+    /// SCEVs.
+    const SCEV *createNodeForPHI(PHINode *PN);
+
+    /// createNodeForGEP - Provide the special handling we need to analyze GEP
+    /// SCEVs.
+    const SCEV *createNodeForGEP(GEPOperator *GEP);
+
+    /// computeSCEVAtScope - Implementation code for getSCEVAtScope; called
+    /// at most once for each SCEV+Loop pair.
+    ///
+    const SCEV *computeSCEVAtScope(const SCEV *S, const Loop *L);
+
+    /// ForgetSymbolicValue - This looks up computed SCEV values for all
+    /// instructions that depend on the given instruction and removes them from
+    /// the ValueExprMap map if they reference SymName. This is used during PHI
+    /// resolution.
+    void ForgetSymbolicName(Instruction *I, const SCEV *SymName);
+
+    /// getBECount - Subtract the end and start values and divide by the step,
+    /// rounding up, to get the number of times the backedge is executed. Return
+    /// CouldNotCompute if an intermediate computation overflows.
+    const SCEV *getBECount(const SCEV *Start,
+                           const SCEV *End,
+                           const SCEV *Step,
+                           bool NoWrap);
+
+    /// getBackedgeTakenInfo - Return the BackedgeTakenInfo for the given
+    /// loop, lazily computing new values if the loop hasn't been analyzed
+    /// yet.
+    const BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L);
+
+    /// ComputeBackedgeTakenCount - Compute the number of times the specified
+    /// loop will iterate.
+    BackedgeTakenInfo ComputeBackedgeTakenCount(const Loop *L);
+
+    /// ComputeBackedgeTakenCountFromExit - Compute the number of times the
+    /// backedge of the specified loop will execute if it exits via the
+    /// specified block.
+    BackedgeTakenInfo ComputeBackedgeTakenCountFromExit(const Loop *L,
+                                                      BasicBlock *ExitingBlock);
+
+    /// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the
+    /// backedge of the specified loop will execute if its exit condition
+    /// were a conditional branch of ExitCond, TBB, and FBB.
+    BackedgeTakenInfo
+      ComputeBackedgeTakenCountFromExitCond(const Loop *L,
+                                            Value *ExitCond,
+                                            BasicBlock *TBB,
+                                            BasicBlock *FBB);
+
+    /// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of
+    /// times the backedge of the specified loop will execute if its exit
+    /// condition were a conditional branch of the ICmpInst ExitCond, TBB,
+    /// and FBB.
+    BackedgeTakenInfo
+      ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
+                                                ICmpInst *ExitCond,
+                                                BasicBlock *TBB,
+                                                BasicBlock *FBB);
+
+    /// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition
+    /// of 'icmp op load X, cst', try to see if we can compute the
+    /// backedge-taken count.
+    BackedgeTakenInfo
+      ComputeLoadConstantCompareBackedgeTakenCount(LoadInst *LI,
+                                                   Constant *RHS,
+                                                   const Loop *L,
+                                                   ICmpInst::Predicate p);
+
+    /// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute
+    /// a constant number of times (the condition evolves only from constants),
+    /// try to evaluate a few iterations of the loop until we get the exit
+    /// condition gets a value of ExitWhen (true or false).  If we cannot
+    /// evaluate the backedge-taken count of the loop, return CouldNotCompute.
+    const SCEV *ComputeBackedgeTakenCountExhaustively(const Loop *L,
+                                                      Value *Cond,
+                                                      bool ExitWhen);
+
+    /// HowFarToZero - Return the number of times a backedge comparing the
+    /// specified value to zero will execute.  If not computable, return
+    /// CouldNotCompute.
+    BackedgeTakenInfo HowFarToZero(const SCEV *V, const Loop *L);
+
+    /// HowFarToNonZero - Return the number of times a backedge checking the
+    /// specified value for nonzero will execute.  If not computable, return
+    /// CouldNotCompute.
+    BackedgeTakenInfo HowFarToNonZero(const SCEV *V, const Loop *L);
+
+    /// HowManyLessThans - Return the number of times a backedge containing the
+    /// specified less-than comparison will execute.  If not computable, return
+    /// CouldNotCompute. isSigned specifies whether the less-than is signed.
+    BackedgeTakenInfo HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
+                                       const Loop *L, bool isSigned);
+
+    /// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
+    /// (which may not be an immediate predecessor) which has exactly one
+    /// successor from which BB is reachable, or null if no such block is
+    /// found.
+    std::pair<BasicBlock *, BasicBlock *>
+    getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB);
+
+    /// isImpliedCond - Test whether the condition described by Pred, LHS, and
+    /// RHS is true whenever the given FoundCondValue value evaluates to true.
+    bool isImpliedCond(ICmpInst::Predicate Pred,
+                       const SCEV *LHS, const SCEV *RHS,
+                       Value *FoundCondValue,
+                       bool Inverse);
+
+    /// isImpliedCondOperands - Test whether the condition described by Pred,
+    /// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
+    /// and FoundRHS is true.
+    bool isImpliedCondOperands(ICmpInst::Predicate Pred,
+                               const SCEV *LHS, const SCEV *RHS,
+                               const SCEV *FoundLHS, const SCEV *FoundRHS);
+
+    /// isImpliedCondOperandsHelper - Test whether the condition described by
+    /// Pred, LHS, and RHS is true whenever the condition described by Pred,
+    /// FoundLHS, and FoundRHS is true.
+    bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
+                                     const SCEV *LHS, const SCEV *RHS,
+                                     const SCEV *FoundLHS, const SCEV *FoundRHS);
+
+    /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
+    /// in the header of its containing loop, we know the loop executes a
+    /// constant number of times, and the PHI node is just a recurrence
+    /// involving constants, fold it.
+    Constant *getConstantEvolutionLoopExitValue(PHINode *PN, const APInt& BEs,
+                                                const Loop *L);
+
+    /// isKnownPredicateWithRanges - Test if the given expression is known to
+    /// satisfy the condition described by Pred and the known constant ranges
+    /// of LHS and RHS.
+    ///
+    bool isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
+                                    const SCEV *LHS, const SCEV *RHS);
+
+    /// forgetMemoizedResults - Drop memoized information computed for S.
+    void forgetMemoizedResults(const SCEV *S);
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ScalarEvolution();
+
+    LLVMContext &getContext() const { return F->getContext(); }
+
+    /// isSCEVable - Test if values of the given type are analyzable within
+    /// the SCEV framework. This primarily includes integer types, and it
+    /// can optionally include pointer types if the ScalarEvolution class
+    /// has access to target-specific information.
+    bool isSCEVable(const Type *Ty) const;
+
+    /// getTypeSizeInBits - Return the size in bits of the specified type,
+    /// for which isSCEVable must return true.
+    uint64_t getTypeSizeInBits(const Type *Ty) const;
+
+    /// getEffectiveSCEVType - Return a type with the same bitwidth as
+    /// the given type and which represents how SCEV will treat the given
+    /// type, for which isSCEVable must return true. For pointer types,
+    /// this is the pointer-sized integer type.
+    const Type *getEffectiveSCEVType(const Type *Ty) const;
+
+    /// getSCEV - Return a SCEV expression for the full generality of the
+    /// specified expression.
+    const SCEV *getSCEV(Value *V);
+
+    const SCEV *getConstant(ConstantInt *V);
+    const SCEV *getConstant(const APInt& Val);
+    const SCEV *getConstant(const Type *Ty, uint64_t V, bool isSigned = false);
+    const SCEV *getTruncateExpr(const SCEV *Op, const Type *Ty);
+    const SCEV *getZeroExtendExpr(const SCEV *Op, const Type *Ty);
+    const SCEV *getSignExtendExpr(const SCEV *Op, const Type *Ty);
+    const SCEV *getAnyExtendExpr(const SCEV *Op, const Type *Ty);
+    const SCEV *getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+                           bool HasNUW = false, bool HasNSW = false);
+    const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS,
+                           bool HasNUW = false, bool HasNSW = false) {
+      SmallVector<const SCEV *, 2> Ops;
+      Ops.push_back(LHS);
+      Ops.push_back(RHS);
+      return getAddExpr(Ops, HasNUW, HasNSW);
+    }
+    const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1,
+                           const SCEV *Op2,
+                           bool HasNUW = false, bool HasNSW = false) {
+      SmallVector<const SCEV *, 3> Ops;
+      Ops.push_back(Op0);
+      Ops.push_back(Op1);
+      Ops.push_back(Op2);
+      return getAddExpr(Ops, HasNUW, HasNSW);
+    }
+    const SCEV *getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+                           bool HasNUW = false, bool HasNSW = false);
+    const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS,
+                           bool HasNUW = false, bool HasNSW = false) {
+      SmallVector<const SCEV *, 2> Ops;
+      Ops.push_back(LHS);
+      Ops.push_back(RHS);
+      return getMulExpr(Ops, HasNUW, HasNSW);
+    }
+    const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS);
+    const SCEV *getAddRecExpr(const SCEV *Start, const SCEV *Step,
+                              const Loop *L,
+                              bool HasNUW = false, bool HasNSW = false);
+    const SCEV *getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
+                              const Loop *L,
+                              bool HasNUW = false, bool HasNSW = false);
+    const SCEV *getAddRecExpr(const SmallVectorImpl<const SCEV *> &Operands,
+                              const Loop *L,
+                              bool HasNUW = false, bool HasNSW = false) {
+      SmallVector<const SCEV *, 4> NewOp(Operands.begin(), Operands.end());
+      return getAddRecExpr(NewOp, L, HasNUW, HasNSW);
+    }
+    const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);
+    const SCEV *getSMaxExpr(SmallVectorImpl<const SCEV *> &Operands);
+    const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS);
+    const SCEV *getUMaxExpr(SmallVectorImpl<const SCEV *> &Operands);
+    const SCEV *getSMinExpr(const SCEV *LHS, const SCEV *RHS);
+    const SCEV *getUMinExpr(const SCEV *LHS, const SCEV *RHS);
+    const SCEV *getUnknown(Value *V);
+    const SCEV *getCouldNotCompute();
+
+    /// getSizeOfExpr - Return an expression for sizeof on the given type.
+    ///
+    const SCEV *getSizeOfExpr(const Type *AllocTy);
+
+    /// getAlignOfExpr - Return an expression for alignof on the given type.
+    ///
+    const SCEV *getAlignOfExpr(const Type *AllocTy);
+
+    /// getOffsetOfExpr - Return an expression for offsetof on the given field.
+    ///
+    const SCEV *getOffsetOfExpr(const StructType *STy, unsigned FieldNo);
+
+    /// getOffsetOfExpr - Return an expression for offsetof on the given field.
+    ///
+    const SCEV *getOffsetOfExpr(const Type *CTy, Constant *FieldNo);
+
+    /// getNegativeSCEV - Return the SCEV object corresponding to -V.
+    ///
+    const SCEV *getNegativeSCEV(const SCEV *V);
+
+    /// getNotSCEV - Return the SCEV object corresponding to ~V.
+    ///
+    const SCEV *getNotSCEV(const SCEV *V);
+
+    /// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1,
+    /// and thus the HasNUW and HasNSW bits apply to the resultant add, not
+    /// whether the sub would have overflowed.
+    const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
+                             bool HasNUW = false, bool HasNSW = false);
+
+    /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion
+    /// of the input value to the specified type.  If the type must be
+    /// extended, it is zero extended.
+    const SCEV *getTruncateOrZeroExtend(const SCEV *V, const Type *Ty);
+
+    /// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion
+    /// of the input value to the specified type.  If the type must be
+    /// extended, it is sign extended.
+    const SCEV *getTruncateOrSignExtend(const SCEV *V, const Type *Ty);
+
+    /// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of
+    /// the input value to the specified type.  If the type must be extended,
+    /// it is zero extended.  The conversion must not be narrowing.
+    const SCEV *getNoopOrZeroExtend(const SCEV *V, const Type *Ty);
+
+    /// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of
+    /// the input value to the specified type.  If the type must be extended,
+    /// it is sign extended.  The conversion must not be narrowing.
+    const SCEV *getNoopOrSignExtend(const SCEV *V, const Type *Ty);
+
+    /// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
+    /// the input value to the specified type. If the type must be extended,
+    /// it is extended with unspecified bits. The conversion must not be
+    /// narrowing.
+    const SCEV *getNoopOrAnyExtend(const SCEV *V, const Type *Ty);
+
+    /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
+    /// input value to the specified type.  The conversion must not be
+    /// widening.
+    const SCEV *getTruncateOrNoop(const SCEV *V, const Type *Ty);
+
+    /// getUMaxFromMismatchedTypes - Promote the operands to the wider of
+    /// the types using zero-extension, and then perform a umax operation
+    /// with them.
+    const SCEV *getUMaxFromMismatchedTypes(const SCEV *LHS,
+                                           const SCEV *RHS);
+
+    /// getUMinFromMismatchedTypes - Promote the operands to the wider of
+    /// the types using zero-extension, and then perform a umin operation
+    /// with them.
+    const SCEV *getUMinFromMismatchedTypes(const SCEV *LHS,
+                                           const SCEV *RHS);
+
+    /// getSCEVAtScope - Return a SCEV expression for the specified value
+    /// at the specified scope in the program.  The L value specifies a loop
+    /// nest to evaluate the expression at, where null is the top-level or a
+    /// specified loop is immediately inside of the loop.
+    ///
+    /// This method can be used to compute the exit value for a variable defined
+    /// in a loop by querying what the value will hold in the parent loop.
+    ///
+    /// In the case that a relevant loop exit value cannot be computed, the
+    /// original value V is returned.
+    const SCEV *getSCEVAtScope(const SCEV *S, const Loop *L);
+
+    /// getSCEVAtScope - This is a convenience function which does
+    /// getSCEVAtScope(getSCEV(V), L).
+    const SCEV *getSCEVAtScope(Value *V, const Loop *L);
+
+    /// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
+    /// by a conditional between LHS and RHS.  This is used to help avoid max
+    /// expressions in loop trip counts, and to eliminate casts.
+    bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
+                                  const SCEV *LHS, const SCEV *RHS);
+
+    /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
+    /// protected by a conditional between LHS and RHS.  This is used to
+    /// to eliminate casts.
+    bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
+                                     const SCEV *LHS, const SCEV *RHS);
+
+    /// getBackedgeTakenCount - If the specified loop has a predictable
+    /// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
+    /// object. The backedge-taken count is the number of times the loop header
+    /// will be branched to from within the loop. This is one less than the
+    /// trip count of the loop, since it doesn't count the first iteration,
+    /// when the header is branched to from outside the loop.
+    ///
+    /// Note that it is not valid to call this method on a loop without a
+    /// loop-invariant backedge-taken count (see
+    /// hasLoopInvariantBackedgeTakenCount).
+    ///
+    const SCEV *getBackedgeTakenCount(const Loop *L);
+
+    /// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
+    /// return the least SCEV value that is known never to be less than the
+    /// actual backedge taken count.
+    const SCEV *getMaxBackedgeTakenCount(const Loop *L);
+
+    /// hasLoopInvariantBackedgeTakenCount - Return true if the specified loop
+    /// has an analyzable loop-invariant backedge-taken count.
+    bool hasLoopInvariantBackedgeTakenCount(const Loop *L);
+
+    /// forgetLoop - This method should be called by the client when it has
+    /// changed a loop in a way that may effect ScalarEvolution's ability to
+    /// compute a trip count, or if the loop is deleted.
+    void forgetLoop(const Loop *L);
+
+    /// forgetValue - This method should be called by the client when it has
+    /// changed a value in a way that may effect its value, or which may
+    /// disconnect it from a def-use chain linking it to a loop.
+    void forgetValue(Value *V);
+
+    /// GetMinTrailingZeros - Determine the minimum number of zero bits that S
+    /// is guaranteed to end in (at every loop iteration).  It is, at the same
+    /// time, the minimum number of times S is divisible by 2.  For example,
+    /// given {4,+,8} it returns 2.  If S is guaranteed to be 0, it returns the
+    /// bitwidth of S.
+    uint32_t GetMinTrailingZeros(const SCEV *S);
+
+    /// getUnsignedRange - Determine the unsigned range for a particular SCEV.
+    ///
+    ConstantRange getUnsignedRange(const SCEV *S);
+
+    /// getSignedRange - Determine the signed range for a particular SCEV.
+    ///
+    ConstantRange getSignedRange(const SCEV *S);
+
+    /// isKnownNegative - Test if the given expression is known to be negative.
+    ///
+    bool isKnownNegative(const SCEV *S);
+
+    /// isKnownPositive - Test if the given expression is known to be positive.
+    ///
+    bool isKnownPositive(const SCEV *S);
+
+    /// isKnownNonNegative - Test if the given expression is known to be
+    /// non-negative.
+    ///
+    bool isKnownNonNegative(const SCEV *S);
+
+    /// isKnownNonPositive - Test if the given expression is known to be
+    /// non-positive.
+    ///
+    bool isKnownNonPositive(const SCEV *S);
+
+    /// isKnownNonZero - Test if the given expression is known to be
+    /// non-zero.
+    ///
+    bool isKnownNonZero(const SCEV *S);
+
+    /// isKnownPredicate - Test if the given expression is known to satisfy
+    /// the condition described by Pred, LHS, and RHS.
+    ///
+    bool isKnownPredicate(ICmpInst::Predicate Pred,
+                          const SCEV *LHS, const SCEV *RHS);
+
+    /// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
+    /// predicate Pred. Return true iff any changes were made. If the
+    /// operands are provably equal or inequal, LHS and RHS are set to
+    /// the same value and Pred is set to either ICMP_EQ or ICMP_NE.
+    ///
+    bool SimplifyICmpOperands(ICmpInst::Predicate &Pred,
+                              const SCEV *&LHS,
+                              const SCEV *&RHS);
+
+    /// getLoopDisposition - Return the "disposition" of the given SCEV with
+    /// respect to the given loop.
+    LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L);
+
+    /// isLoopInvariant - Return true if the value of the given SCEV is
+    /// unchanging in the specified loop.
+    bool isLoopInvariant(const SCEV *S, const Loop *L);
+
+    /// hasComputableLoopEvolution - Return true if the given SCEV changes value
+    /// in a known way in the specified loop.  This property being true implies
+    /// that the value is variant in the loop AND that we can emit an expression
+    /// to compute the value of the expression at any particular loop iteration.
+    bool hasComputableLoopEvolution(const SCEV *S, const Loop *L);
+
+    /// getLoopDisposition - Return the "disposition" of the given SCEV with
+    /// respect to the given block.
+    BlockDisposition getBlockDisposition(const SCEV *S, const BasicBlock *BB);
+
+    /// dominates - Return true if elements that makes up the given SCEV
+    /// dominate the specified basic block.
+    bool dominates(const SCEV *S, const BasicBlock *BB);
+
+    /// properlyDominates - Return true if elements that makes up the given SCEV
+    /// properly dominate the specified basic block.
+    bool properlyDominates(const SCEV *S, const BasicBlock *BB);
+
+    /// hasOperand - Test whether the given SCEV has Op as a direct or
+    /// indirect operand.
+    bool hasOperand(const SCEV *S, const SCEV *Op) const;
+
+    virtual bool runOnFunction(Function &F);
+    virtual void releaseMemory();
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual void print(raw_ostream &OS, const Module* = 0) const;
+
+  private:
+    FoldingSet<SCEV> UniqueSCEVs;
+    BumpPtrAllocator SCEVAllocator;
+
+    /// FirstUnknown - The head of a linked list of all SCEVUnknown
+    /// values that have been allocated. This is used by releaseMemory
+    /// to locate them all and call their destructors.
+    SCEVUnknown *FirstUnknown;
+  };
+}
+
+#endif
diff --git a/final/include/llvm/Analysis/ScalarEvolutionExpander.h b/final/include/llvm/Analysis/ScalarEvolutionExpander.h
new file mode 100644
index 00000000000..39d378ed9be
--- /dev/null
+++ b/final/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -0,0 +1,215 @@
+//===---- llvm/Analysis/ScalarEvolutionExpander.h - SCEV Exprs --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the classes used to generate code from scalar expressions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H
+#define LLVM_ANALYSIS_SCALAREVOLUTION_EXPANDER_H
+
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/TargetFolder.h"
+#include "llvm/Support/ValueHandle.h"
+#include <set>
+
+namespace llvm {
+  /// SCEVExpander - This class uses information about analyze scalars to
+  /// rewrite expressions in canonical form.
+  ///
+  /// Clients should create an instance of this class when rewriting is needed,
+  /// and destroy it when finished to allow the release of the associated
+  /// memory.
+  class SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> {
+    ScalarEvolution &SE;
+    std::map<std::pair<const SCEV *, Instruction *>, AssertingVH<Value> >
+      InsertedExpressions;
+    std::set<AssertingVH<Value> > InsertedValues;
+    std::set<AssertingVH<Value> > InsertedPostIncValues;
+
+    /// RelevantLoops - A memoization of the "relevant" loop for a given SCEV.
+    DenseMap<const SCEV *, const Loop *> RelevantLoops;
+
+    /// PostIncLoops - Addrecs referring to any of the given loops are expanded
+    /// in post-inc mode. For example, expanding {1,+,1}<L> in post-inc mode
+    /// returns the add instruction that adds one to the phi for {0,+,1}<L>,
+    /// as opposed to a new phi starting at 1. This is only supported in
+    /// non-canonical mode.
+    PostIncLoopSet PostIncLoops;
+
+    /// IVIncInsertPos - When this is non-null, addrecs expanded in the
+    /// loop it indicates should be inserted with increments at
+    /// IVIncInsertPos.
+    const Loop *IVIncInsertLoop;
+
+    /// IVIncInsertPos - When expanding addrecs in the IVIncInsertLoop loop,
+    /// insert the IV increment at this position.
+    Instruction *IVIncInsertPos;
+
+    /// CanonicalMode - When true, expressions are expanded in "canonical"
+    /// form. In particular, addrecs are expanded as arithmetic based on
+    /// a canonical induction variable. When false, expression are expanded
+    /// in a more literal form.
+    bool CanonicalMode;
+
+    typedef IRBuilder<true, TargetFolder> BuilderType;
+    BuilderType Builder;
+
+    friend struct SCEVVisitor<SCEVExpander, Value*>;
+
+  public:
+    /// SCEVExpander - Construct a SCEVExpander in "canonical" mode.
+    explicit SCEVExpander(ScalarEvolution &se)
+      : SE(se), IVIncInsertLoop(0), CanonicalMode(true),
+        Builder(se.getContext(), TargetFolder(se.TD)) {}
+
+    /// clear - Erase the contents of the InsertedExpressions map so that users
+    /// trying to expand the same expression into multiple BasicBlocks or
+    /// different places within the same BasicBlock can do so.
+    void clear() {
+      InsertedExpressions.clear();
+      InsertedValues.clear();
+      InsertedPostIncValues.clear();
+    }
+
+    /// getOrInsertCanonicalInductionVariable - This method returns the
+    /// canonical induction variable of the specified type for the specified
+    /// loop (inserting one if there is none).  A canonical induction variable
+    /// starts at zero and steps by one on each iteration.
+    PHINode *getOrInsertCanonicalInductionVariable(const Loop *L,
+                                                   const Type *Ty);
+
+    /// expandCodeFor - Insert code to directly compute the specified SCEV
+    /// expression into the program.  The inserted code is inserted into the
+    /// specified block.
+    Value *expandCodeFor(const SCEV *SH, const Type *Ty, Instruction *I);
+
+    /// setIVIncInsertPos - Set the current IV increment loop and position.
+    void setIVIncInsertPos(const Loop *L, Instruction *Pos) {
+      assert(!CanonicalMode &&
+             "IV increment positions are not supported in CanonicalMode");
+      IVIncInsertLoop = L;
+      IVIncInsertPos = Pos;
+    }
+
+    /// setPostInc - Enable post-inc expansion for addrecs referring to the
+    /// given loops. Post-inc expansion is only supported in non-canonical
+    /// mode.
+    void setPostInc(const PostIncLoopSet &L) {
+      assert(!CanonicalMode &&
+             "Post-inc expansion is not supported in CanonicalMode");
+      PostIncLoops = L;
+    }
+
+    /// clearPostInc - Disable all post-inc expansion.
+    void clearPostInc() {
+      PostIncLoops.clear();
+
+      // When we change the post-inc loop set, cached expansions may no
+      // longer be valid.
+      InsertedPostIncValues.clear();
+    }
+
+    /// disableCanonicalMode - Disable the behavior of expanding expressions in
+    /// canonical form rather than in a more literal form. Non-canonical mode
+    /// is useful for late optimization passes.
+    void disableCanonicalMode() { CanonicalMode = false; }
+
+    /// clearInsertPoint - Clear the current insertion point. This is useful
+    /// if the instruction that had been serving as the insertion point may
+    /// have been deleted.
+    void clearInsertPoint() {
+      Builder.ClearInsertionPoint();
+    }
+
+  private:
+    LLVMContext &getContext() const { return SE.getContext(); }
+
+    /// InsertBinop - Insert the specified binary operator, doing a small amount
+    /// of work to avoid inserting an obviously redundant operation.
+    Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS);
+
+    /// ReuseOrCreateCast - Arange for there to be a cast of V to Ty at IP,
+    /// reusing an existing cast if a suitable one exists, moving an existing
+    /// cast if a suitable one exists but isn't in the right place, or
+    /// or creating a new one.
+    Value *ReuseOrCreateCast(Value *V, const Type *Ty,
+                             Instruction::CastOps Op,
+                             BasicBlock::iterator IP);
+
+    /// InsertNoopCastOfTo - Insert a cast of V to the specified type,
+    /// which must be possible with a noop cast, doing what we can to
+    /// share the casts.
+    Value *InsertNoopCastOfTo(Value *V, const Type *Ty);
+
+    /// expandAddToGEP - Expand a SCEVAddExpr with a pointer type into a GEP
+    /// instead of using ptrtoint+arithmetic+inttoptr.
+    Value *expandAddToGEP(const SCEV *const *op_begin,
+                          const SCEV *const *op_end,
+                          const PointerType *PTy, const Type *Ty, Value *V);
+
+    Value *expand(const SCEV *S);
+
+    /// expandCodeFor - Insert code to directly compute the specified SCEV
+    /// expression into the program.  The inserted code is inserted into the
+    /// SCEVExpander's current insertion point. If a type is specified, the
+    /// result will be expanded to have that type, with a cast if necessary.
+    Value *expandCodeFor(const SCEV *SH, const Type *Ty = 0);
+
+    /// isInsertedInstruction - Return true if the specified instruction was
+    /// inserted by the code rewriter.  If so, the client should not modify the
+    /// instruction.
+    bool isInsertedInstruction(Instruction *I) const {
+      return InsertedValues.count(I) || InsertedPostIncValues.count(I);
+    }
+
+    /// getRelevantLoop - Determine the most "relevant" loop for the given SCEV.
+    const Loop *getRelevantLoop(const SCEV *);
+
+    Value *visitConstant(const SCEVConstant *S) {
+      return S->getValue();
+    }
+
+    Value *visitTruncateExpr(const SCEVTruncateExpr *S);
+
+    Value *visitZeroExtendExpr(const SCEVZeroExtendExpr *S);
+
+    Value *visitSignExtendExpr(const SCEVSignExtendExpr *S);
+
+    Value *visitAddExpr(const SCEVAddExpr *S);
+
+    Value *visitMulExpr(const SCEVMulExpr *S);
+
+    Value *visitUDivExpr(const SCEVUDivExpr *S);
+
+    Value *visitAddRecExpr(const SCEVAddRecExpr *S);
+
+    Value *visitSMaxExpr(const SCEVSMaxExpr *S);
+
+    Value *visitUMaxExpr(const SCEVUMaxExpr *S);
+
+    Value *visitUnknown(const SCEVUnknown *S) {
+      return S->getValue();
+    }
+
+    void rememberInstruction(Value *I);
+
+    void restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I);
+
+    Value *expandAddRecExprLiterally(const SCEVAddRecExpr *);
+    PHINode *getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
+                                       const Loop *L,
+                                       const Type *ExpandTy,
+                                       const Type *IntTy);
+  };
+}
+
+#endif
diff --git a/final/include/llvm/Analysis/ScalarEvolutionExpressions.h b/final/include/llvm/Analysis/ScalarEvolutionExpressions.h
new file mode 100644
index 00000000000..db432c8173d
--- /dev/null
+++ b/final/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -0,0 +1,491 @@
+//===- llvm/Analysis/ScalarEvolutionExpressions.h - SCEV Exprs --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the classes used to represent and build scalar expressions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_SCALAREVOLUTION_EXPRESSIONS_H
+#define LLVM_ANALYSIS_SCALAREVOLUTION_EXPRESSIONS_H
+
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+  class ConstantInt;
+  class ConstantRange;
+  class DominatorTree;
+
+  enum SCEVTypes {
+    // These should be ordered in terms of increasing complexity to make the
+    // folders simpler.
+    scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr,
+    scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr,
+    scUnknown, scCouldNotCompute
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// SCEVConstant - This class represents a constant integer value.
+  ///
+  class SCEVConstant : public SCEV {
+    friend class ScalarEvolution;
+
+    ConstantInt *V;
+    SCEVConstant(const FoldingSetNodeIDRef ID, ConstantInt *v) :
+      SCEV(ID, scConstant), V(v) {}
+  public:
+    ConstantInt *getValue() const { return V; }
+
+    const Type *getType() const { return V->getType(); }
+
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVConstant *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scConstant;
+    }
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// SCEVCastExpr - This is the base class for unary cast operator classes.
+  ///
+  class SCEVCastExpr : public SCEV {
+  protected:
+    const SCEV *Op;
+    const Type *Ty;
+
+    SCEVCastExpr(const FoldingSetNodeIDRef ID,
+                 unsigned SCEVTy, const SCEV *op, const Type *ty);
+
+  public:
+    const SCEV *getOperand() const { return Op; }
+    const Type *getType() const { return Ty; }
+
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVCastExpr *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scTruncate ||
+             S->getSCEVType() == scZeroExtend ||
+             S->getSCEVType() == scSignExtend;
+    }
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// SCEVTruncateExpr - This class represents a truncation of an integer value
+  /// to a smaller integer value.
+  ///
+  class SCEVTruncateExpr : public SCEVCastExpr {
+    friend class ScalarEvolution;
+
+    SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
+                     const SCEV *op, const Type *ty);
+
+  public:
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVTruncateExpr *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scTruncate;
+    }
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// SCEVZeroExtendExpr - This class represents a zero extension of a small
+  /// integer value to a larger integer value.
+  ///
+  class SCEVZeroExtendExpr : public SCEVCastExpr {
+    friend class ScalarEvolution;
+
+    SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
+                       const SCEV *op, const Type *ty);
+
+  public:
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVZeroExtendExpr *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scZeroExtend;
+    }
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// SCEVSignExtendExpr - This class represents a sign extension of a small
+  /// integer value to a larger integer value.
+  ///
+  class SCEVSignExtendExpr : public SCEVCastExpr {
+    friend class ScalarEvolution;
+
+    SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
+                       const SCEV *op, const Type *ty);
+
+  public:
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVSignExtendExpr *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scSignExtend;
+    }
+  };
+
+
+  //===--------------------------------------------------------------------===//
+  /// SCEVNAryExpr - This node is a base class providing common
+  /// functionality for n'ary operators.
+  ///
+  class SCEVNAryExpr : public SCEV {
+  protected:
+    // Since SCEVs are immutable, ScalarEvolution allocates operand
+    // arrays with its SCEVAllocator, so this class just needs a simple
+    // pointer rather than a more elaborate vector-like data structure.
+    // This also avoids the need for a non-trivial destructor.
+    const SCEV *const *Operands;
+    size_t NumOperands;
+
+    SCEVNAryExpr(const FoldingSetNodeIDRef ID,
+                 enum SCEVTypes T, const SCEV *const *O, size_t N)
+      : SCEV(ID, T), Operands(O), NumOperands(N) {}
+
+  public:
+    size_t getNumOperands() const { return NumOperands; }
+    const SCEV *getOperand(unsigned i) const {
+      assert(i < NumOperands && "Operand index out of range!");
+      return Operands[i];
+    }
+
+    typedef const SCEV *const *op_iterator;
+    op_iterator op_begin() const { return Operands; }
+    op_iterator op_end() const { return Operands + NumOperands; }
+
+    const Type *getType() const { return getOperand(0)->getType(); }
+
+    bool hasNoUnsignedWrap() const { return SubclassData & (1 << 0); }
+    void setHasNoUnsignedWrap(bool B) {
+      SubclassData = (SubclassData & ~(1 << 0)) | (B << 0);
+    }
+    bool hasNoSignedWrap() const { return SubclassData & (1 << 1); }
+    void setHasNoSignedWrap(bool B) {
+      SubclassData = (SubclassData & ~(1 << 1)) | (B << 1);
+    }
+
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVNAryExpr *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scAddExpr ||
+             S->getSCEVType() == scMulExpr ||
+             S->getSCEVType() == scSMaxExpr ||
+             S->getSCEVType() == scUMaxExpr ||
+             S->getSCEVType() == scAddRecExpr;
+    }
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// SCEVCommutativeExpr - This node is the base class for n'ary commutative
+  /// operators.
+  ///
+  class SCEVCommutativeExpr : public SCEVNAryExpr {
+  protected:
+    SCEVCommutativeExpr(const FoldingSetNodeIDRef ID,
+                        enum SCEVTypes T, const SCEV *const *O, size_t N)
+      : SCEVNAryExpr(ID, T, O, N) {}
+
+  public:
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVCommutativeExpr *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scAddExpr ||
+             S->getSCEVType() == scMulExpr ||
+             S->getSCEVType() == scSMaxExpr ||
+             S->getSCEVType() == scUMaxExpr;
+    }
+  };
+
+
+  //===--------------------------------------------------------------------===//
+  /// SCEVAddExpr - This node represents an addition of some number of SCEVs.
+  ///
+  class SCEVAddExpr : public SCEVCommutativeExpr {
+    friend class ScalarEvolution;
+
+    SCEVAddExpr(const FoldingSetNodeIDRef ID,
+                const SCEV *const *O, size_t N)
+      : SCEVCommutativeExpr(ID, scAddExpr, O, N) {
+    }
+
+  public:
+    const Type *getType() const {
+      // Use the type of the last operand, which is likely to be a pointer
+      // type, if there is one. This doesn't usually matter, but it can help
+      // reduce casts when the expressions are expanded.
+      return getOperand(getNumOperands() - 1)->getType();
+    }
+
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVAddExpr *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scAddExpr;
+    }
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// SCEVMulExpr - This node represents multiplication of some number of SCEVs.
+  ///
+  class SCEVMulExpr : public SCEVCommutativeExpr {
+    friend class ScalarEvolution;
+
+    SCEVMulExpr(const FoldingSetNodeIDRef ID,
+                const SCEV *const *O, size_t N)
+      : SCEVCommutativeExpr(ID, scMulExpr, O, N) {
+    }
+
+  public:
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVMulExpr *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scMulExpr;
+    }
+  };
+
+
+  //===--------------------------------------------------------------------===//
+  /// SCEVUDivExpr - This class represents a binary unsigned division operation.
+  ///
+  class SCEVUDivExpr : public SCEV {
+    friend class ScalarEvolution;
+
+    const SCEV *LHS;
+    const SCEV *RHS;
+    SCEVUDivExpr(const FoldingSetNodeIDRef ID, const SCEV *lhs, const SCEV *rhs)
+      : SCEV(ID, scUDivExpr), LHS(lhs), RHS(rhs) {}
+
+  public:
+    const SCEV *getLHS() const { return LHS; }
+    const SCEV *getRHS() const { return RHS; }
+
+    const Type *getType() const {
+      // In most cases the types of LHS and RHS will be the same, but in some
+      // crazy cases one or the other may be a pointer. ScalarEvolution doesn't
+      // depend on the type for correctness, but handling types carefully can
+      // avoid extra casts in the SCEVExpander. The LHS is more likely to be
+      // a pointer type than the RHS, so use the RHS' type here.
+      return getRHS()->getType();
+    }
+
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVUDivExpr *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scUDivExpr;
+    }
+  };
+
+
+  //===--------------------------------------------------------------------===//
+  /// SCEVAddRecExpr - This node represents a polynomial recurrence on the trip
+  /// count of the specified loop.  This is the primary focus of the
+  /// ScalarEvolution framework; all the other SCEV subclasses are mostly just
+  /// supporting infrastructure to allow SCEVAddRecExpr expressions to be
+  /// created and analyzed.
+  ///
+  /// All operands of an AddRec are required to be loop invariant.
+  ///
+  class SCEVAddRecExpr : public SCEVNAryExpr {
+    friend class ScalarEvolution;
+
+    const Loop *L;
+
+    SCEVAddRecExpr(const FoldingSetNodeIDRef ID,
+                   const SCEV *const *O, size_t N, const Loop *l)
+      : SCEVNAryExpr(ID, scAddRecExpr, O, N), L(l) {}
+
+  public:
+    const SCEV *getStart() const { return Operands[0]; }
+    const Loop *getLoop() const { return L; }
+
+    /// getStepRecurrence - This method constructs and returns the recurrence
+    /// indicating how much this expression steps by.  If this is a polynomial
+    /// of degree N, it returns a chrec of degree N-1.
+    const SCEV *getStepRecurrence(ScalarEvolution &SE) const {
+      if (isAffine()) return getOperand(1);
+      return SE.getAddRecExpr(SmallVector<const SCEV *, 3>(op_begin()+1,
+                                                           op_end()),
+                              getLoop());
+    }
+
+    /// isAffine - Return true if this is an affine AddRec (i.e., it represents
+    /// an expressions A+B*x where A and B are loop invariant values.
+    bool isAffine() const {
+      // We know that the start value is invariant.  This expression is thus
+      // affine iff the step is also invariant.
+      return getNumOperands() == 2;
+    }
+
+    /// isQuadratic - Return true if this is an quadratic AddRec (i.e., it
+    /// represents an expressions A+B*x+C*x^2 where A, B and C are loop
+    /// invariant values.  This corresponds to an addrec of the form {L,+,M,+,N}
+    bool isQuadratic() const {
+      return getNumOperands() == 3;
+    }
+
+    /// evaluateAtIteration - Return the value of this chain of recurrences at
+    /// the specified iteration number.
+    const SCEV *evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const;
+
+    /// getNumIterationsInRange - Return the number of iterations of this loop
+    /// that produce values in the specified constant range.  Another way of
+    /// looking at this is that it returns the first iteration number where the
+    /// value is not in the condition, thus computing the exit count.  If the
+    /// iteration count can't be computed, an instance of SCEVCouldNotCompute is
+    /// returned.
+    const SCEV *getNumIterationsInRange(ConstantRange Range,
+                                       ScalarEvolution &SE) const;
+
+    /// getPostIncExpr - Return an expression representing the value of
+    /// this expression one iteration of the loop ahead.
+    const SCEVAddRecExpr *getPostIncExpr(ScalarEvolution &SE) const {
+      return cast<SCEVAddRecExpr>(SE.getAddExpr(this, getStepRecurrence(SE)));
+    }
+
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVAddRecExpr *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scAddRecExpr;
+    }
+  };
+
+
+  //===--------------------------------------------------------------------===//
+  /// SCEVSMaxExpr - This class represents a signed maximum selection.
+  ///
+  class SCEVSMaxExpr : public SCEVCommutativeExpr {
+    friend class ScalarEvolution;
+
+    SCEVSMaxExpr(const FoldingSetNodeIDRef ID,
+                 const SCEV *const *O, size_t N)
+      : SCEVCommutativeExpr(ID, scSMaxExpr, O, N) {
+      // Max never overflows.
+      setHasNoUnsignedWrap(true);
+      setHasNoSignedWrap(true);
+    }
+
+  public:
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVSMaxExpr *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scSMaxExpr;
+    }
+  };
+
+
+  //===--------------------------------------------------------------------===//
+  /// SCEVUMaxExpr - This class represents an unsigned maximum selection.
+  ///
+  class SCEVUMaxExpr : public SCEVCommutativeExpr {
+    friend class ScalarEvolution;
+
+    SCEVUMaxExpr(const FoldingSetNodeIDRef ID,
+                 const SCEV *const *O, size_t N)
+      : SCEVCommutativeExpr(ID, scUMaxExpr, O, N) {
+      // Max never overflows.
+      setHasNoUnsignedWrap(true);
+      setHasNoSignedWrap(true);
+    }
+
+  public:
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVUMaxExpr *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scUMaxExpr;
+    }
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// SCEVUnknown - This means that we are dealing with an entirely unknown SCEV
+  /// value, and only represent it as its LLVM Value.  This is the "bottom"
+  /// value for the analysis.
+  ///
+  class SCEVUnknown : public SCEV, private CallbackVH {
+    friend class ScalarEvolution;
+
+    // Implement CallbackVH.
+    virtual void deleted();
+    virtual void allUsesReplacedWith(Value *New);
+
+    /// SE - The parent ScalarEvolution value. This is used to update
+    /// the parent's maps when the value associated with a SCEVUnknown
+    /// is deleted or RAUW'd.
+    ScalarEvolution *SE;
+
+    /// Next - The next pointer in the linked list of all
+    /// SCEVUnknown instances owned by a ScalarEvolution.
+    SCEVUnknown *Next;
+
+    SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V,
+                ScalarEvolution *se, SCEVUnknown *next) :
+      SCEV(ID, scUnknown), CallbackVH(V), SE(se), Next(next) {}
+
+  public:
+    Value *getValue() const { return getValPtr(); }
+
+    /// isSizeOf, isAlignOf, isOffsetOf - Test whether this is a special
+    /// constant representing a type size, alignment, or field offset in
+    /// a target-independent manner, and hasn't happened to have been
+    /// folded with other operations into something unrecognizable. This
+    /// is mainly only useful for pretty-printing and other situations
+    /// where it isn't absolutely required for these to succeed.
+    bool isSizeOf(const Type *&AllocTy) const;
+    bool isAlignOf(const Type *&AllocTy) const;
+    bool isOffsetOf(const Type *&STy, Constant *&FieldNo) const;
+
+    const Type *getType() const { return getValPtr()->getType(); }
+
+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const SCEVUnknown *S) { return true; }
+    static inline bool classof(const SCEV *S) {
+      return S->getSCEVType() == scUnknown;
+    }
+  };
+
+  /// SCEVVisitor - This class defines a simple visitor class that may be used
+  /// for various SCEV analysis purposes.
+  template<typename SC, typename RetVal=void>
+  struct SCEVVisitor {
+    RetVal visit(const SCEV *S) {
+      switch (S->getSCEVType()) {
+      case scConstant:
+        return ((SC*)this)->visitConstant((const SCEVConstant*)S);
+      case scTruncate:
+        return ((SC*)this)->visitTruncateExpr((const SCEVTruncateExpr*)S);
+      case scZeroExtend:
+        return ((SC*)this)->visitZeroExtendExpr((const SCEVZeroExtendExpr*)S);
+      case scSignExtend:
+        return ((SC*)this)->visitSignExtendExpr((const SCEVSignExtendExpr*)S);
+      case scAddExpr:
+        return ((SC*)this)->visitAddExpr((const SCEVAddExpr*)S);
+      case scMulExpr:
+        return ((SC*)this)->visitMulExpr((const SCEVMulExpr*)S);
+      case scUDivExpr:
+        return ((SC*)this)->visitUDivExpr((const SCEVUDivExpr*)S);
+      case scAddRecExpr:
+        return ((SC*)this)->visitAddRecExpr((const SCEVAddRecExpr*)S);
+      case scSMaxExpr:
+        return ((SC*)this)->visitSMaxExpr((const SCEVSMaxExpr*)S);
+      case scUMaxExpr:
+        return ((SC*)this)->visitUMaxExpr((const SCEVUMaxExpr*)S);
+      case scUnknown:
+        return ((SC*)this)->visitUnknown((const SCEVUnknown*)S);
+      case scCouldNotCompute:
+        return ((SC*)this)->visitCouldNotCompute((const SCEVCouldNotCompute*)S);
+      default:
+        llvm_unreachable("Unknown SCEV type!");
+      }
+    }
+
+    RetVal visitCouldNotCompute(const SCEVCouldNotCompute *S) {
+      llvm_unreachable("Invalid use of SCEVCouldNotCompute!");
+      return RetVal();
+    }
+  };
+}
+
+#endif
diff --git a/final/include/llvm/Analysis/ScalarEvolutionNormalization.h b/final/include/llvm/Analysis/ScalarEvolutionNormalization.h
new file mode 100644
index 00000000000..342e5937891
--- /dev/null
+++ b/final/include/llvm/Analysis/ScalarEvolutionNormalization.h
@@ -0,0 +1,78 @@
+//===- llvm/Analysis/ScalarEvolutionNormalization.h - See below -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines utilities for working with "normalized" ScalarEvolution
+// expressions.
+//
+// The following example illustrates post-increment uses and how normalized
+// expressions help.
+//
+//   for (i=0; i!=n; ++i) {
+//     ...
+//   }
+//   use(i);
+//
+// While the expression for most uses of i inside the loop is {0,+,1}<%L>, the
+// expression for the use of i outside the loop is {1,+,1}<%L>, since i is
+// incremented at the end of the loop body. This is inconveient, since it
+// suggests that we need two different induction variables, one that starts
+// at 0 and one that starts at 1. We'd prefer to be able to think of these as
+// the same induction variable, with uses inside the loop using the
+// "pre-incremented" value, and uses after the loop using the
+// "post-incremented" value.
+//
+// Expressions for post-incremented uses are represented as an expression
+// paired with a set of loops for which the expression is in "post-increment"
+// mode (there may be multiple loops).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H
+#define LLVM_ANALYSIS_SCALAREVOLUTION_NORMALIZATION_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+class Instruction;
+class DominatorTree;
+class Loop;
+class ScalarEvolution;
+class SCEV;
+class Value;
+
+/// TransformKind - Different types of transformations that
+/// TransformForPostIncUse can do.
+enum TransformKind {
+  /// Normalize - Normalize according to the given loops.
+  Normalize,
+  /// NormalizeAutodetect - Detect post-inc opportunities on new expressions,
+  /// update the given loop set, and normalize.
+  NormalizeAutodetect,
+  /// Denormalize - Perform the inverse transform on the expression with the
+  /// given loop set.
+  Denormalize
+};
+
+/// PostIncLoopSet - A set of loops.
+typedef SmallPtrSet<const Loop *, 2> PostIncLoopSet;
+
+/// TransformForPostIncUse - Transform the given expression according to the
+/// given transformation kind.
+const SCEV *TransformForPostIncUse(TransformKind Kind,
+                                   const SCEV *S,
+                                   Instruction *User,
+                                   Value *OperandValToReplace,
+                                   PostIncLoopSet &Loops,
+                                   ScalarEvolution &SE,
+                                   DominatorTree &DT);
+
+}
+
+#endif
diff --git a/final/include/llvm/Analysis/SparsePropagation.h b/final/include/llvm/Analysis/SparsePropagation.h
new file mode 100644
index 00000000000..c3c2f4b0668
--- /dev/null
+++ b/final/include/llvm/Analysis/SparsePropagation.h
@@ -0,0 +1,206 @@
+//===- SparsePropagation.h - Sparse Conditional Property Propagation ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an abstract sparse conditional propagation algorithm,
+// modeled after SCCP, but with a customizable lattice function.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_SPARSE_PROPAGATION_H
+#define LLVM_ANALYSIS_SPARSE_PROPAGATION_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <vector>
+#include <set>
+
+namespace llvm {
+  class Value;
+  class Constant;
+  class Argument;
+  class Instruction;
+  class PHINode;
+  class TerminatorInst;
+  class BasicBlock;
+  class Function;
+  class SparseSolver;
+  class raw_ostream;
+
+  template<typename T> class SmallVectorImpl;
+  
+/// AbstractLatticeFunction - This class is implemented by the dataflow instance
+/// to specify what the lattice values are and how they handle merges etc.
+/// This gives the client the power to compute lattice values from instructions,
+/// constants, etc.  The requirement is that lattice values must all fit into
+/// a void*.  If a void* is not sufficient, the implementation should use this
+/// pointer to be a pointer into a uniquing set or something.
+///
+class AbstractLatticeFunction {
+public:
+  typedef void *LatticeVal;
+private:
+  LatticeVal UndefVal, OverdefinedVal, UntrackedVal;
+public:
+  AbstractLatticeFunction(LatticeVal undefVal, LatticeVal overdefinedVal,
+                          LatticeVal untrackedVal) {
+    UndefVal = undefVal;
+    OverdefinedVal = overdefinedVal;
+    UntrackedVal = untrackedVal;
+  }
+  virtual ~AbstractLatticeFunction();
+  
+  LatticeVal getUndefVal()       const { return UndefVal; }
+  LatticeVal getOverdefinedVal() const { return OverdefinedVal; }
+  LatticeVal getUntrackedVal()   const { return UntrackedVal; }
+  
+  /// IsUntrackedValue - If the specified Value is something that is obviously
+  /// uninteresting to the analysis (and would always return UntrackedVal),
+  /// this function can return true to avoid pointless work.
+  virtual bool IsUntrackedValue(Value *V) {
+    return false;
+  }
+  
+  /// ComputeConstant - Given a constant value, compute and return a lattice
+  /// value corresponding to the specified constant.
+  virtual LatticeVal ComputeConstant(Constant *C) {
+    return getOverdefinedVal(); // always safe
+  }
+
+  /// IsSpecialCasedPHI - Given a PHI node, determine whether this PHI node is
+  /// one that the we want to handle through ComputeInstructionState.
+  virtual bool IsSpecialCasedPHI(PHINode *PN) {
+    return false;
+  }
+  
+  /// GetConstant - If the specified lattice value is representable as an LLVM
+  /// constant value, return it.  Otherwise return null.  The returned value
+  /// must be in the same LLVM type as Val.
+  virtual Constant *GetConstant(LatticeVal LV, Value *Val, SparseSolver &SS) {
+    return 0;
+  }
+
+  /// ComputeArgument - Given a formal argument value, compute and return a
+  /// lattice value corresponding to the specified argument.
+  virtual LatticeVal ComputeArgument(Argument *I) {
+    return getOverdefinedVal(); // always safe
+  }
+  
+  /// MergeValues - Compute and return the merge of the two specified lattice
+  /// values.  Merging should only move one direction down the lattice to
+  /// guarantee convergence (toward overdefined).
+  virtual LatticeVal MergeValues(LatticeVal X, LatticeVal Y) {
+    return getOverdefinedVal(); // always safe, never useful.
+  }
+  
+  /// ComputeInstructionState - Given an instruction and a vector of its operand
+  /// values, compute the result value of the instruction.
+  virtual LatticeVal ComputeInstructionState(Instruction &I, SparseSolver &SS) {
+    return getOverdefinedVal(); // always safe, never useful.
+  }
+  
+  /// PrintValue - Render the specified lattice value to the specified stream.
+  virtual void PrintValue(LatticeVal V, raw_ostream &OS);
+};
+
+  
+/// SparseSolver - This class is a general purpose solver for Sparse Conditional
+/// Propagation with a programmable lattice function.
+///
+class SparseSolver {
+  typedef AbstractLatticeFunction::LatticeVal LatticeVal;
+  
+  /// LatticeFunc - This is the object that knows the lattice and how to do
+  /// compute transfer functions.
+  AbstractLatticeFunction *LatticeFunc;
+  
+  DenseMap<Value*, LatticeVal> ValueState;  // The state each value is in.
+  SmallPtrSet<BasicBlock*, 16> BBExecutable;   // The bbs that are executable.
+  
+  std::vector<Instruction*> InstWorkList;   // Worklist of insts to process.
+  
+  std::vector<BasicBlock*> BBWorkList;  // The BasicBlock work list
+  
+  /// KnownFeasibleEdges - Entries in this set are edges which have already had
+  /// PHI nodes retriggered.
+  typedef std::pair<BasicBlock*,BasicBlock*> Edge;
+  std::set<Edge> KnownFeasibleEdges;
+  
+  SparseSolver(const SparseSolver&);    // DO NOT IMPLEMENT
+  void operator=(const SparseSolver&);  // DO NOT IMPLEMENT
+public:
+  explicit SparseSolver(AbstractLatticeFunction *Lattice)
+    : LatticeFunc(Lattice) {}
+  ~SparseSolver() {
+    delete LatticeFunc;
+  }
+  
+  /// Solve - Solve for constants and executable blocks.
+  ///
+  void Solve(Function &F);
+  
+  void Print(Function &F, raw_ostream &OS) const;
+
+  /// getLatticeState - Return the LatticeVal object that corresponds to the
+  /// value.  If an value is not in the map, it is returned as untracked,
+  /// unlike the getOrInitValueState method.
+  LatticeVal getLatticeState(Value *V) const {
+    DenseMap<Value*, LatticeVal>::const_iterator I = ValueState.find(V);
+    return I != ValueState.end() ? I->second : LatticeFunc->getUntrackedVal();
+  }
+  
+  /// getOrInitValueState - Return the LatticeVal object that corresponds to the
+  /// value, initializing the value's state if it hasn't been entered into the
+  /// map yet.   This function is necessary because not all values should start
+  /// out in the underdefined state... Arguments should be overdefined, and
+  /// constants should be marked as constants.
+  ///
+  LatticeVal getOrInitValueState(Value *V);
+  
+  /// isEdgeFeasible - Return true if the control flow edge from the 'From'
+  /// basic block to the 'To' basic block is currently feasible.  If
+  /// AggressiveUndef is true, then this treats values with unknown lattice
+  /// values as undefined.  This is generally only useful when solving the
+  /// lattice, not when querying it.
+  bool isEdgeFeasible(BasicBlock *From, BasicBlock *To,
+                      bool AggressiveUndef = false);
+
+  /// isBlockExecutable - Return true if there are any known feasible
+  /// edges into the basic block.  This is generally only useful when
+  /// querying the lattice.
+  bool isBlockExecutable(BasicBlock *BB) const {
+    return BBExecutable.count(BB);
+  }
+  
+private:
+  /// UpdateState - When the state for some instruction is potentially updated,
+  /// this function notices and adds I to the worklist if needed.
+  void UpdateState(Instruction &Inst, LatticeVal V);
+  
+  /// MarkBlockExecutable - This method can be used by clients to mark all of
+  /// the blocks that are known to be intrinsically live in the processed unit.
+  void MarkBlockExecutable(BasicBlock *BB);
+  
+  /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+  /// work list if it is not already executable.
+  void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest);
+  
+  /// getFeasibleSuccessors - Return a vector of booleans to indicate which
+  /// successors are reachable from a given terminator instruction.
+  void getFeasibleSuccessors(TerminatorInst &TI, SmallVectorImpl<bool> &Succs,
+                             bool AggressiveUndef);
+  
+  void visitInst(Instruction &I);
+  void visitPHINode(PHINode &I);
+  void visitTerminatorInst(TerminatorInst &TI);
+
+};
+
+} // end namespace llvm
+
+#endif // LLVM_ANALYSIS_SPARSE_PROPAGATION_H
diff --git a/final/include/llvm/Analysis/Trace.h b/final/include/llvm/Analysis/Trace.h
new file mode 100644
index 00000000000..99651e192d3
--- /dev/null
+++ b/final/include/llvm/Analysis/Trace.h
@@ -0,0 +1,119 @@
+//===- llvm/Analysis/Trace.h - Represent one trace of LLVM code -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents a single trace of LLVM basic blocks.  A trace is a
+// single entry, multiple exit, region of code that is often hot.  Trace-based
+// optimizations treat traces almost like they are a large, strange, basic
+// block: because the trace path is assumed to be hot, optimizations for the
+// fall-through path are made at the expense of the non-fall-through paths.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_TRACE_H
+#define LLVM_ANALYSIS_TRACE_H
+
+#include <vector>
+#include <cassert>
+
+namespace llvm {
+  class BasicBlock;
+  class Function;
+  class Module;
+  class raw_ostream;
+
+class Trace {
+  typedef std::vector<BasicBlock *> BasicBlockListType;
+  BasicBlockListType BasicBlocks;
+
+public:
+  /// Trace ctor - Make a new trace from a vector of basic blocks,
+  /// residing in the function which is the parent of the first
+  /// basic block in the vector.
+  ///
+  Trace(const std::vector<BasicBlock *> &vBB) : BasicBlocks (vBB) {}
+
+  /// getEntryBasicBlock - Return the entry basic block (first block)
+  /// of the trace.
+  ///
+  BasicBlock *getEntryBasicBlock () const { return BasicBlocks[0]; }
+
+  /// operator[]/getBlock - Return basic block N in the trace.
+  ///
+  BasicBlock *operator[](unsigned i) const { return BasicBlocks[i]; }
+  BasicBlock *getBlock(unsigned i)   const { return BasicBlocks[i]; }
+
+  /// getFunction - Return this trace's parent function.
+  ///
+  Function *getFunction () const;
+
+  /// getModule - Return this Module that contains this trace's parent
+  /// function.
+  ///
+  Module *getModule () const;
+
+  /// getBlockIndex - Return the index of the specified basic block in the
+  /// trace, or -1 if it is not in the trace.
+  int getBlockIndex(const BasicBlock *X) const {
+    for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
+      if (BasicBlocks[i] == X)
+        return i;
+    return -1;
+  }
+
+  /// contains - Returns true if this trace contains the given basic
+  /// block.
+  ///
+  bool contains(const BasicBlock *X) const {
+    return getBlockIndex(X) != -1;
+  }
+
+  /// Returns true if B1 occurs before B2 in the trace, or if it is the same
+  /// block as B2..  Both blocks must be in the trace.
+  ///
+  bool dominates(const BasicBlock *B1, const BasicBlock *B2) const {
+    int B1Idx = getBlockIndex(B1), B2Idx = getBlockIndex(B2);
+    assert(B1Idx != -1 && B2Idx != -1 && "Block is not in the trace!");
+    return B1Idx <= B2Idx;
+  }
+
+  // BasicBlock iterators...
+  typedef BasicBlockListType::iterator iterator;
+  typedef BasicBlockListType::const_iterator const_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+
+  iterator                begin()       { return BasicBlocks.begin(); }
+  const_iterator          begin() const { return BasicBlocks.begin(); }
+  iterator                end  ()       { return BasicBlocks.end();   }
+  const_iterator          end  () const { return BasicBlocks.end();   }
+
+  reverse_iterator       rbegin()       { return BasicBlocks.rbegin(); }
+  const_reverse_iterator rbegin() const { return BasicBlocks.rbegin(); }
+  reverse_iterator       rend  ()       { return BasicBlocks.rend();   }
+  const_reverse_iterator rend  () const { return BasicBlocks.rend();   }
+
+  unsigned                 size() const { return BasicBlocks.size(); }
+  bool                    empty() const { return BasicBlocks.empty(); }
+
+  iterator erase(iterator q)               { return BasicBlocks.erase (q); }
+  iterator erase(iterator q1, iterator q2) { return BasicBlocks.erase (q1, q2); }
+
+  /// print - Write trace to output stream.
+  ///
+  void print(raw_ostream &O) const;
+
+  /// dump - Debugger convenience method; writes trace to standard error
+  /// output stream.
+  ///
+  void dump() const;
+};
+
+} // end namespace llvm
+
+#endif // TRACE_H
diff --git a/final/include/llvm/Analysis/ValueTracking.h b/final/include/llvm/Analysis/ValueTracking.h
new file mode 100644
index 00000000000..6df1693c78e
--- /dev/null
+++ b/final/include/llvm/Analysis/ValueTracking.h
@@ -0,0 +1,163 @@
+//===- llvm/Analysis/ValueTracking.h - Walk computations --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines that help analyze properties that chains of
+// computations have.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_VALUETRACKING_H
+#define LLVM_ANALYSIS_VALUETRACKING_H
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+namespace llvm {
+  template <typename T> class SmallVectorImpl;
+  class Value;
+  class Instruction;
+  class APInt;
+  class TargetData;
+  
+  /// ComputeMaskedBits - Determine which of the bits specified in Mask are
+  /// known to be either zero or one and return them in the KnownZero/KnownOne
+  /// bit sets.  This code only analyzes bits in Mask, in order to short-circuit
+  /// processing.
+  ///
+  /// This function is defined on values with integer type, values with pointer
+  /// type (but only if TD is non-null), and vectors of integers.  In the case
+  /// where V is a vector, the mask, known zero, and known one values are the
+  /// same width as the vector element, and the bit is set only if it is true
+  /// for all of the elements in the vector.
+  void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero,
+                         APInt &KnownOne, const TargetData *TD = 0,
+                         unsigned Depth = 0);
+  
+  /// ComputeSignBit - Determine whether the sign bit is known to be zero or
+  /// one.  Convenience wrapper around ComputeMaskedBits.
+  void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+                      const TargetData *TD = 0, unsigned Depth = 0);
+
+  /// isPowerOfTwo - Return true if the given value is known to have exactly one
+  /// bit set when defined. For vectors return true if every element is known to
+  /// be a power of two when defined.  Supports values with integer or pointer
+  /// type and vectors of integers.
+  bool isPowerOfTwo(Value *V, const TargetData *TD = 0, unsigned Depth = 0);
+
+  /// isKnownNonZero - Return true if the given value is known to be non-zero
+  /// when defined.  For vectors return true if every element is known to be
+  /// non-zero when defined.  Supports values with integer or pointer type and
+  /// vectors of integers.
+  bool isKnownNonZero(Value *V, const TargetData *TD = 0, unsigned Depth = 0);
+
+  /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
+  /// this predicate to simplify operations downstream.  Mask is known to be
+  /// zero for bits that V cannot have.
+  ///
+  /// This function is defined on values with integer type, values with pointer
+  /// type (but only if TD is non-null), and vectors of integers.  In the case
+  /// where V is a vector, the mask, known zero, and known one values are the
+  /// same width as the vector element, and the bit is set only if it is true
+  /// for all of the elements in the vector.
+  bool MaskedValueIsZero(Value *V, const APInt &Mask, 
+                         const TargetData *TD = 0, unsigned Depth = 0);
+
+  
+  /// ComputeNumSignBits - Return the number of times the sign bit of the
+  /// register is replicated into the other bits.  We know that at least 1 bit
+  /// is always equal to the sign bit (itself), but other cases can give us
+  /// information.  For example, immediately after an "ashr X, 2", we know that
+  /// the top 3 bits are all equal to each other, so we return 3.
+  ///
+  /// 'Op' must have a scalar integer type.
+  ///
+  unsigned ComputeNumSignBits(Value *Op, const TargetData *TD = 0,
+                              unsigned Depth = 0);
+
+  /// ComputeMultiple - This function computes the integer multiple of Base that
+  /// equals V.  If successful, it returns true and returns the multiple in
+  /// Multiple.  If unsuccessful, it returns false.  Also, if V can be
+  /// simplified to an integer, then the simplified V is returned in Val.  Look
+  /// through sext only if LookThroughSExt=true.
+  bool ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
+                       bool LookThroughSExt = false,
+                       unsigned Depth = 0);
+
+  /// CannotBeNegativeZero - Return true if we can prove that the specified FP 
+  /// value is never equal to -0.0.
+  ///
+  bool CannotBeNegativeZero(const Value *V, unsigned Depth = 0);
+
+  /// isBytewiseValue - If the specified value can be set by repeating the same
+  /// byte in memory, return the i8 value that it is represented with.  This is
+  /// true for all i8 values obviously, but is also true for i32 0, i32 -1,
+  /// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
+  /// byte store (e.g. i16 0x1234), return null.
+  Value *isBytewiseValue(Value *V);
+    
+  /// FindInsertedValue - Given an aggregrate and an sequence of indices, see if
+  /// the scalar value indexed is already around as a register, for example if
+  /// it were inserted directly into the aggregrate.
+  ///
+  /// If InsertBefore is not null, this function will duplicate (modified)
+  /// insertvalues when a part of a nested struct is extracted.
+  Value *FindInsertedValue(Value *V,
+                           const unsigned *idx_begin,
+                           const unsigned *idx_end,
+                           Instruction *InsertBefore = 0);
+
+  /// This is a convenience wrapper for finding values indexed by a single index
+  /// only.
+  inline Value *FindInsertedValue(Value *V, const unsigned Idx,
+                                  Instruction *InsertBefore = 0) {
+    const unsigned Idxs[1] = { Idx };
+    return FindInsertedValue(V, &Idxs[0], &Idxs[1], InsertBefore);
+  }
+  
+  /// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if
+  /// it can be expressed as a base pointer plus a constant offset.  Return the
+  /// base and offset to the caller.
+  Value *GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+                                          const TargetData &TD);
+  static inline const Value *
+  GetPointerBaseWithConstantOffset(const Value *Ptr, int64_t &Offset,
+                                   const TargetData &TD) {
+    return GetPointerBaseWithConstantOffset(const_cast<Value*>(Ptr), Offset,TD);
+  }
+  
+  /// GetConstantStringInfo - This function computes the length of a
+  /// null-terminated C string pointed to by V.  If successful, it returns true
+  /// and returns the string in Str.  If unsuccessful, it returns false.  If
+  /// StopAtNul is set to true (the default), the returned string is truncated
+  /// by a nul character in the global.  If StopAtNul is false, the nul
+  /// character is included in the result string.
+  bool GetConstantStringInfo(const Value *V, std::string &Str,
+                             uint64_t Offset = 0,
+                             bool StopAtNul = true);
+                        
+  /// GetStringLength - If we can compute the length of the string pointed to by
+  /// the specified pointer, return 'len+1'.  If we can't, return 0.
+  uint64_t GetStringLength(Value *V);
+
+  /// GetUnderlyingObject - This method strips off any GEP address adjustments
+  /// and pointer casts from the specified value, returning the original object
+  /// being addressed.  Note that the returned value has pointer type if the
+  /// specified value does.  If the MaxLookup value is non-zero, it limits the
+  /// number of instructions to be stripped off.
+  Value *GetUnderlyingObject(Value *V, const TargetData *TD = 0,
+                             unsigned MaxLookup = 6);
+  static inline const Value *
+  GetUnderlyingObject(const Value *V, const TargetData *TD = 0,
+                      unsigned MaxLookup = 6) {
+    return GetUnderlyingObject(const_cast<Value *>(V), TD, MaxLookup);
+  }
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Analysis/Verifier.h b/final/include/llvm/Analysis/Verifier.h
new file mode 100644
index 00000000000..ce8aeef0764
--- /dev/null
+++ b/final/include/llvm/Analysis/Verifier.h
@@ -0,0 +1,75 @@
+//===-- llvm/Analysis/Verifier.h - LLVM IR Verifier -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the function verifier interface, that can be used for some
+// sanity checking of input to the system, and for checking that transformations
+// haven't done something bad.
+//
+// Note that this does not provide full 'java style' security and verifications,
+// instead it just tries to ensure that code is well formed.
+//
+// To see what specifically is checked, look at the top of Verifier.cpp
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_VERIFIER_H
+#define LLVM_ANALYSIS_VERIFIER_H
+
+#include <string>
+
+namespace llvm {
+
+class FunctionPass;
+class Module;
+class Function;
+
+/// @brief An enumeration to specify the action to be taken if errors found.
+///
+/// This enumeration is used in the functions below to indicate what should
+/// happen if the verifier finds errors. Each of the functions that uses
+/// this enumeration as an argument provides a default value for it. The
+/// actions are listed below.
+enum VerifierFailureAction {
+  AbortProcessAction,   ///< verifyModule will print to stderr and abort()
+  PrintMessageAction,   ///< verifyModule will print to stderr and return true
+  ReturnStatusAction    ///< verifyModule will just return true
+};
+
+/// @brief Create a verifier pass.
+///
+/// Check a module or function for validity.  When the pass is used, the
+/// action indicated by the \p action argument will be used if errors are
+/// found.
+FunctionPass *createVerifierPass(
+  VerifierFailureAction action = AbortProcessAction ///< Action to take
+);
+
+/// @brief Check a module for errors.
+///
+/// If there are no errors, the function returns false. If an error is found,
+/// the action taken depends on the \p action parameter.
+/// This should only be used for debugging, because it plays games with
+/// PassManagers and stuff.
+
+bool verifyModule(
+  const Module &M,  ///< The module to be verified
+  VerifierFailureAction action = AbortProcessAction, ///< Action to take
+  std::string *ErrorInfo = 0      ///< Information about failures.
+);
+
+// verifyFunction - Check a function for errors, useful for use when debugging a
+// pass.
+bool verifyFunction(
+  const Function &F,  ///< The function to be verified
+  VerifierFailureAction action = AbortProcessAction ///< Action to take
+);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Argument.h b/final/include/llvm/Argument.h
new file mode 100644
index 00000000000..71c001fbe18
--- /dev/null
+++ b/final/include/llvm/Argument.h
@@ -0,0 +1,88 @@
+//===-- llvm/Argument.h - Definition of the Argument class ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Argument class. 
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ARGUMENT_H
+#define LLVM_ARGUMENT_H
+
+#include "llvm/Value.h"
+#include "llvm/Attributes.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/Twine.h"
+
+namespace llvm {
+
+template<typename ValueSubClass, typename ItemParentClass>
+  class SymbolTableListTraits;
+
+/// A class to represent an incoming formal argument to a Function. An argument
+/// is a very simple Value. It is essentially a named (optional) type. When used
+/// in the body of a function, it represents the value of the actual argument
+/// the function was called with.
+/// @brief LLVM Argument representation  
+class Argument : public Value, public ilist_node<Argument> {
+  Function *Parent;
+
+  friend class SymbolTableListTraits<Argument, Function>;
+  void setParent(Function *parent);
+
+public:
+  /// Argument ctor - If Function argument is specified, this argument is
+  /// inserted at the end of the argument list for the function.
+  ///
+  explicit Argument(const Type *Ty, const Twine &Name = "", Function *F = 0);
+
+  inline const Function *getParent() const { return Parent; }
+  inline       Function *getParent()       { return Parent; }
+
+  /// getArgNo - Return the index of this formal argument in its containing
+  /// function.  For example in "void foo(int a, float b)" a is 0 and b is 1. 
+  unsigned getArgNo() const;
+  
+  /// hasByValAttr - Return true if this argument has the byval attribute on it
+  /// in its containing function.
+  bool hasByValAttr() const;
+
+  /// hasNestAttr - Return true if this argument has the nest attribute on
+  /// it in its containing function.
+  bool hasNestAttr() const;
+
+  /// hasNoAliasAttr - Return true if this argument has the noalias attribute on
+  /// it in its containing function.
+  bool hasNoAliasAttr() const;
+  
+  /// hasNoCaptureAttr - Return true if this argument has the nocapture
+  /// attribute on it in its containing function.
+  bool hasNoCaptureAttr() const;
+  
+  /// hasSRetAttr - Return true if this argument has the sret attribute on it in
+  /// its containing function.
+  bool hasStructRetAttr() const;
+
+  /// addAttr - Add a Attribute to an argument
+  void addAttr(Attributes);
+  
+  /// removeAttr - Remove a Attribute from an argument
+  void removeAttr(Attributes);
+
+  /// classof - Methods for support type inquiry through isa, cast, and
+  /// dyn_cast:
+  ///
+  static inline bool classof(const Argument *) { return true; }
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == ArgumentVal;
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Assembly/AssemblyAnnotationWriter.h b/final/include/llvm/Assembly/AssemblyAnnotationWriter.h
new file mode 100644
index 00000000000..3a65f97a5b5
--- /dev/null
+++ b/final/include/llvm/Assembly/AssemblyAnnotationWriter.h
@@ -0,0 +1,63 @@
+//===-- AssemblyAnnotationWriter.h - Annotation .ll files -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Clients of the assembly writer can use this interface to add their own
+// special-purpose annotations to LLVM assembly language printouts.  Note that
+// the assembly parser won't be able to parse these, in general, so
+// implementations are advised to print stuff as LLVM comments.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ASSEMBLY_ASMANNOTATIONWRITER_H
+#define LLVM_ASSEMBLY_ASMANNOTATIONWRITER_H
+
+namespace llvm {
+
+class Function;
+class BasicBlock;
+class Instruction;
+class raw_ostream;
+class formatted_raw_ostream;
+
+class AssemblyAnnotationWriter {
+public:
+
+  virtual ~AssemblyAnnotationWriter();
+
+  /// emitFunctionAnnot - This may be implemented to emit a string right before
+  /// the start of a function.
+  virtual void emitFunctionAnnot(const Function *F,
+                                 formatted_raw_ostream &OS) {}
+
+  /// emitBasicBlockStartAnnot - This may be implemented to emit a string right
+  /// after the basic block label, but before the first instruction in the
+  /// block.
+  virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
+                                        formatted_raw_ostream &OS) {
+  }
+
+  /// emitBasicBlockEndAnnot - This may be implemented to emit a string right
+  /// after the basic block.
+  virtual void emitBasicBlockEndAnnot(const BasicBlock *BB,
+                                      formatted_raw_ostream &OS) {
+  }
+
+  /// emitInstructionAnnot - This may be implemented to emit a string right
+  /// before an instruction is emitted.
+  virtual void emitInstructionAnnot(const Instruction *I, 
+                                    formatted_raw_ostream &OS) {}
+
+  /// printInfoComment - This may be implemented to emit a comment to the
+  /// right of an instruction or global value.
+  virtual void printInfoComment(const Value &V, formatted_raw_ostream &OS) {}
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Assembly/Parser.h b/final/include/llvm/Assembly/Parser.h
new file mode 100644
index 00000000000..82ec6d81367
--- /dev/null
+++ b/final/include/llvm/Assembly/Parser.h
@@ -0,0 +1,65 @@
+//===-- llvm/Assembly/Parser.h - Parser for VM assembly files ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  These classes are implemented by the lib/AsmParser library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ASSEMBLY_PARSER_H
+#define LLVM_ASSEMBLY_PARSER_H
+
+#include <string>
+
+namespace llvm {
+
+class Module;
+class MemoryBuffer;
+class SMDiagnostic;
+class raw_ostream;
+class LLVMContext;
+
+/// This function is the main interface to the LLVM Assembly Parser. It parses
+/// an ASCII file that (presumably) contains LLVM Assembly code. It returns a
+/// Module (intermediate representation) with the corresponding features. Note
+/// that this does not verify that the generated Module is valid, so you should
+/// run the verifier after parsing the file to check that it is okay.
+/// @brief Parse LLVM Assembly from a file
+Module *ParseAssemblyFile(
+  const std::string &Filename, ///< The name of the file to parse
+  SMDiagnostic &Error,         ///< Error result info.
+  LLVMContext &Context         ///< Context in which to allocate globals info.
+);
+
+/// The function is a secondary interface to the LLVM Assembly Parser. It parses
+/// an ASCII string that (presumably) contains LLVM Assembly code. It returns a
+/// Module (intermediate representation) with the corresponding features. Note
+/// that this does not verify that the generated Module is valid, so you should
+/// run the verifier after parsing the file to check that it is okay.
+/// @brief Parse LLVM Assembly from a string
+Module *ParseAssemblyString(
+  const char *AsmString, ///< The string containing assembly
+  Module *M,             ///< A module to add the assembly too.
+  SMDiagnostic &Error,   ///< Error result info.
+  LLVMContext &Context
+);
+
+/// This function is the low-level interface to the LLVM Assembly Parser.
+/// ParseAssemblyFile and ParseAssemblyString are wrappers around this function.
+/// @brief Parse LLVM Assembly from a MemoryBuffer. This function *always*
+/// takes ownership of the MemoryBuffer.
+Module *ParseAssembly(
+    MemoryBuffer *F,     ///< The MemoryBuffer containing assembly
+    Module *M,           ///< A module to add the assembly too.
+    SMDiagnostic &Err,   ///< Error result info.
+    LLVMContext &Context
+);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Assembly/PrintModulePass.h b/final/include/llvm/Assembly/PrintModulePass.h
new file mode 100644
index 00000000000..239fbcc0c8c
--- /dev/null
+++ b/final/include/llvm/Assembly/PrintModulePass.h
@@ -0,0 +1,42 @@
+//===- llvm/Assembly/PrintModulePass.h - Printing Pass ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines two passes to print out a module.  The PrintModulePass pass
+// simply prints out the entire module when it is executed.  The
+// PrintFunctionPass class is designed to be pipelined with other
+// FunctionPass's, and prints out the functions of the module as they are
+// processed.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ASSEMBLY_PRINTMODULEPASS_H
+#define LLVM_ASSEMBLY_PRINTMODULEPASS_H
+
+#include <string>
+
+namespace llvm {
+  class FunctionPass;
+  class ModulePass;
+  class raw_ostream;
+  
+  /// createPrintModulePass - Create and return a pass that writes the
+  /// module to the specified raw_ostream.
+  ModulePass *createPrintModulePass(raw_ostream *OS,
+                                    bool DeleteStream=false,
+                                    const std::string &Banner = "");
+  
+  /// createPrintFunctionPass - Create and return a pass that prints
+  /// functions to the specified raw_ostream as they are processed.
+  FunctionPass *createPrintFunctionPass(const std::string &Banner,
+                                        raw_ostream *OS, 
+                                        bool DeleteStream=false);  
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Assembly/Writer.h b/final/include/llvm/Assembly/Writer.h
new file mode 100644
index 00000000000..c5b239079a0
--- /dev/null
+++ b/final/include/llvm/Assembly/Writer.h
@@ -0,0 +1,78 @@
+//===-- llvm/Assembly/Writer.h - Printer for LLVM assembly files --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This functionality is implemented by lib/VMCore/AsmWriter.cpp.
+// This library is used to print LLVM assembly language files to an iostream. It
+// can print LLVM code at a variety of granularities, including Modules,
+// BasicBlocks, and Instructions.  This makes it useful for debugging.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ASSEMBLY_WRITER_H
+#define LLVM_ASSEMBLY_WRITER_H
+
+#include <string>
+
+namespace llvm {
+
+class Type;
+class Module;
+class Value;
+class raw_ostream;
+template <typename T> class SmallVectorImpl;
+  
+/// TypePrinting - Type printing machinery.
+class TypePrinting {
+  void *TypeNames;  // A map to remember type names.
+  TypePrinting(const TypePrinting &);   // DO NOT IMPLEMENT
+  void operator=(const TypePrinting&);  // DO NOT IMPLEMENT
+public:
+  TypePrinting();
+  ~TypePrinting();
+  
+  void clear();
+  
+  void print(const Type *Ty, raw_ostream &OS, bool IgnoreTopLevelName = false);
+  
+  void printAtLeastOneLevel(const Type *Ty, raw_ostream &OS) {
+    print(Ty, OS, true);
+  }
+  
+  /// hasTypeName - Return true if the type has a name in TypeNames, false
+  /// otherwise.
+  bool hasTypeName(const Type *Ty) const;
+  
+  /// addTypeName - Add a name for the specified type if it doesn't already have
+  /// one.  This name will be printed instead of the structural version of the
+  /// type in order to make the output more concise.
+  void addTypeName(const Type *Ty, const std::string &N);
+  
+private:
+  void CalcTypeName(const Type *Ty, SmallVectorImpl<const Type *> &TypeStack,
+                    raw_ostream &OS, bool IgnoreTopLevelName = false);
+};
+
+// WriteTypeSymbolic - This attempts to write the specified type as a symbolic
+// type, if there is an entry in the Module's symbol table for the specified
+// type or one of its component types.
+//
+void WriteTypeSymbolic(raw_ostream &, const Type *, const Module *M);
+
+// WriteAsOperand - Write the name of the specified value out to the specified
+// ostream.  This can be useful when you just want to print int %reg126, not the
+// whole instruction that generated it.  If you specify a Module for context,
+// then even constants get pretty-printed; for example, the type of a null
+// pointer is printed symbolically.
+//
+void WriteAsOperand(raw_ostream &, const Value *, bool PrintTy = true,
+                    const Module *Context = 0);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Attributes.h b/final/include/llvm/Attributes.h
new file mode 100644
index 00000000000..da6188b1a8e
--- /dev/null
+++ b/final/include/llvm/Attributes.h
@@ -0,0 +1,290 @@
+//===-- llvm/Attributes.h - Container for Attributes ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the simple types necessary to represent the
+// attributes associated with functions and their calls.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ATTRIBUTES_H
+#define LLVM_ATTRIBUTES_H
+
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <string>
+
+namespace llvm {
+class Type;
+
+/// Attributes - A bitset of attributes.
+typedef unsigned Attributes;
+
+namespace Attribute {
+
+/// Function parameters and results can have attributes to indicate how they
+/// should be treated by optimizations and code generation. This enumeration
+/// lists the attributes that can be associated with parameters, function
+/// results or the function itself.
+/// @brief Function attributes.
+
+const Attributes None      = 0;     ///< No attributes have been set
+const Attributes ZExt      = 1<<0;  ///< Zero extended before/after call
+const Attributes SExt      = 1<<1;  ///< Sign extended before/after call
+const Attributes NoReturn  = 1<<2;  ///< Mark the function as not returning
+const Attributes InReg     = 1<<3;  ///< Force argument to be passed in register
+const Attributes StructRet = 1<<4;  ///< Hidden pointer to structure to return
+const Attributes NoUnwind  = 1<<5;  ///< Function doesn't unwind stack
+const Attributes NoAlias   = 1<<6;  ///< Considered to not alias after call
+const Attributes ByVal     = 1<<7;  ///< Pass structure by value
+const Attributes Nest      = 1<<8;  ///< Nested function static chain
+const Attributes ReadNone  = 1<<9;  ///< Function does not access memory
+const Attributes ReadOnly  = 1<<10; ///< Function only reads from memory
+const Attributes NoInline        = 1<<11; ///< inline=never
+const Attributes AlwaysInline    = 1<<12; ///< inline=always
+const Attributes OptimizeForSize = 1<<13; ///< opt_size
+const Attributes StackProtect    = 1<<14; ///< Stack protection.
+const Attributes StackProtectReq = 1<<15; ///< Stack protection required.
+const Attributes Alignment = 31<<16; ///< Alignment of parameter (5 bits)
+                                     // stored as log2 of alignment with +1 bias
+                                     // 0 means unaligned different from align 1
+const Attributes NoCapture = 1<<21; ///< Function creates no aliases of pointer
+const Attributes NoRedZone = 1<<22; /// disable redzone
+const Attributes NoImplicitFloat = 1<<23; /// disable implicit floating point
+                                          /// instructions.
+const Attributes Naked           = 1<<24; ///< Naked function
+const Attributes InlineHint      = 1<<25; ///< source said inlining was
+                                          ///desirable
+const Attributes StackAlignment  = 7<<26; ///< Alignment of stack for
+                                          ///function (3 bits) stored as log2
+                                          ///of alignment with +1 bias
+                                          ///0 means unaligned (different from
+                                          ///alignstack(1))
+const Attributes Hotpatch    = 1<<29;     ///< Function should have special
+                                          ///'hotpatch' sequence in prologue
+
+/// @brief Attributes that only apply to function parameters.
+const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture;
+
+/// @brief Attributes that may be applied to the function itself.  These cannot
+/// be used on return values or function parameters.
+const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly |
+  NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq |
+  NoRedZone | NoImplicitFloat | Naked | InlineHint | StackAlignment |
+  Hotpatch;
+
+/// @brief Parameter attributes that do not apply to vararg call arguments.
+const Attributes VarArgsIncompatible = StructRet;
+
+/// @brief Attributes that are mutually incompatible.
+const Attributes MutuallyIncompatible[4] = {
+  ByVal | InReg | Nest | StructRet,
+  ZExt  | SExt,
+  ReadNone | ReadOnly,
+  NoInline | AlwaysInline
+};
+
+/// @brief Which attributes cannot be applied to a type.
+Attributes typeIncompatible(const Type *Ty);
+
+/// This turns an int alignment (a power of 2, normally) into the
+/// form used internally in Attributes.
+inline Attributes constructAlignmentFromInt(unsigned i) {
+  // Default alignment, allow the target to define how to align it.
+  if (i == 0)
+    return 0;
+
+  assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
+  assert(i <= 0x40000000 && "Alignment too large.");
+  return (Log2_32(i)+1) << 16;
+}
+
+/// This returns the alignment field of an attribute as a byte alignment value.
+inline unsigned getAlignmentFromAttrs(Attributes A) {
+  Attributes Align = A & Attribute::Alignment;
+  if (Align == 0)
+    return 0;
+
+  return 1U << ((Align >> 16) - 1);
+}
+
+/// This turns an int stack alignment (which must be a power of 2) into
+/// the form used internally in Attributes.
+inline Attributes constructStackAlignmentFromInt(unsigned i) {
+  // Default alignment, allow the target to define how to align it.
+  if (i == 0)
+    return 0;
+
+  assert(isPowerOf2_32(i) && "Alignment must be a power of two.");
+  assert(i <= 0x100 && "Alignment too large.");
+  return (Log2_32(i)+1) << 26;
+}
+
+/// This returns the stack alignment field of an attribute as a byte alignment
+/// value.
+inline unsigned getStackAlignmentFromAttrs(Attributes A) {
+  Attributes StackAlign = A & Attribute::StackAlignment;
+  if (StackAlign == 0)
+    return 0;
+
+  return 1U << ((StackAlign >> 26) - 1);
+}
+
+
+/// The set of Attributes set in Attributes is converted to a
+/// string of equivalent mnemonics. This is, presumably, for writing out
+/// the mnemonics for the assembly writer.
+/// @brief Convert attribute bits to text
+std::string getAsString(Attributes Attrs);
+} // end namespace Attribute
+
+/// This is just a pair of values to associate a set of attributes
+/// with an index.
+struct AttributeWithIndex {
+  Attributes Attrs; ///< The attributes that are set, or'd together.
+  unsigned Index; ///< Index of the parameter for which the attributes apply.
+                  ///< Index 0 is used for return value attributes.
+                  ///< Index ~0U is used for function attributes.
+
+  static AttributeWithIndex get(unsigned Idx, Attributes Attrs) {
+    AttributeWithIndex P;
+    P.Index = Idx;
+    P.Attrs = Attrs;
+    return P;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// AttrListPtr Smart Pointer
+//===----------------------------------------------------------------------===//
+
+class AttributeListImpl;
+
+/// AttrListPtr - This class manages the ref count for the opaque
+/// AttributeListImpl object and provides accessors for it.
+class AttrListPtr {
+  /// AttrList - The attributes that we are managing.  This can be null
+  /// to represent the empty attributes list.
+  AttributeListImpl *AttrList;
+public:
+  AttrListPtr() : AttrList(0) {}
+  AttrListPtr(const AttrListPtr &P);
+  const AttrListPtr &operator=(const AttrListPtr &RHS);
+  ~AttrListPtr();
+
+  //===--------------------------------------------------------------------===//
+  // Attribute List Construction and Mutation
+  //===--------------------------------------------------------------------===//
+
+  /// get - Return a Attributes list with the specified parameter in it.
+  static AttrListPtr get(const AttributeWithIndex *Attr, unsigned NumAttrs);
+
+  /// get - Return a Attribute list with the parameters specified by the
+  /// consecutive random access iterator range.
+  template <typename Iter>
+  static AttrListPtr get(const Iter &I, const Iter &E) {
+    if (I == E) return AttrListPtr();  // Empty list.
+    return get(&*I, static_cast<unsigned>(E-I));
+  }
+
+  /// addAttr - Add the specified attribute at the specified index to this
+  /// attribute list.  Since attribute lists are immutable, this
+  /// returns the new list.
+  AttrListPtr addAttr(unsigned Idx, Attributes Attrs) const;
+
+  /// removeAttr - Remove the specified attribute at the specified index from
+  /// this attribute list.  Since attribute lists are immutable, this
+  /// returns the new list.
+  AttrListPtr removeAttr(unsigned Idx, Attributes Attrs) const;
+
+  //===--------------------------------------------------------------------===//
+  // Attribute List Accessors
+  //===--------------------------------------------------------------------===//
+  /// getParamAttributes - The attributes for the specified index are
+  /// returned.
+  Attributes getParamAttributes(unsigned Idx) const {
+    assert (Idx && Idx != ~0U && "Invalid parameter index!");
+    return getAttributes(Idx);
+  }
+
+  /// getRetAttributes - The attributes for the ret value are
+  /// returned.
+  Attributes getRetAttributes() const {
+    return getAttributes(0);
+  }
+
+  /// getFnAttributes - The function attributes are returned.
+  Attributes getFnAttributes() const {
+    return getAttributes(~0U);
+  }
+
+  /// paramHasAttr - Return true if the specified parameter index has the
+  /// specified attribute set.
+  bool paramHasAttr(unsigned Idx, Attributes Attr) const {
+    return (getAttributes(Idx) & Attr) != 0;
+  }
+
+  /// getParamAlignment - Return the alignment for the specified function
+  /// parameter.
+  unsigned getParamAlignment(unsigned Idx) const {
+    return Attribute::getAlignmentFromAttrs(getAttributes(Idx));
+  }
+
+  /// hasAttrSomewhere - Return true if the specified attribute is set for at
+  /// least one parameter or for the return value.
+  bool hasAttrSomewhere(Attributes Attr) const;
+
+  /// operator==/!= - Provide equality predicates.
+  bool operator==(const AttrListPtr &RHS) const
+  { return AttrList == RHS.AttrList; }
+  bool operator!=(const AttrListPtr &RHS) const
+  { return AttrList != RHS.AttrList; }
+
+  void dump() const;
+
+  //===--------------------------------------------------------------------===//
+  // Attribute List Introspection
+  //===--------------------------------------------------------------------===//
+
+  /// getRawPointer - Return a raw pointer that uniquely identifies this
+  /// attribute list.
+  void *getRawPointer() const {
+    return AttrList;
+  }
+
+  // Attributes are stored as a dense set of slots, where there is one
+  // slot for each argument that has an attribute.  This allows walking over the
+  // dense set instead of walking the sparse list of attributes.
+
+  /// isEmpty - Return true if there are no attributes.
+  ///
+  bool isEmpty() const {
+    return AttrList == 0;
+  }
+
+  /// getNumSlots - Return the number of slots used in this attribute list.
+  /// This is the number of arguments that have an attribute set on them
+  /// (including the function itself).
+  unsigned getNumSlots() const;
+
+  /// getSlot - Return the AttributeWithIndex at the specified slot.  This
+  /// holds a index number plus a set of attributes.
+  const AttributeWithIndex &getSlot(unsigned Slot) const;
+
+private:
+  explicit AttrListPtr(AttributeListImpl *L);
+
+  /// getAttributes - The attributes for the specified index are
+  /// returned.  Attributes for the result are denoted with Idx = 0.
+  Attributes getAttributes(unsigned Idx) const;
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/AutoUpgrade.h b/final/include/llvm/AutoUpgrade.h
new file mode 100644
index 00000000000..5ce20b69e2f
--- /dev/null
+++ b/final/include/llvm/AutoUpgrade.h
@@ -0,0 +1,48 @@
+//===-- llvm/AutoUpgrade.h - AutoUpgrade Helpers ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  These functions are implemented by lib/VMCore/AutoUpgrade.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AUTOUPGRADE_H
+#define LLVM_AUTOUPGRADE_H
+
+namespace llvm {
+  class Module;
+  class GlobalVariable;
+  class Function;
+  class CallInst;
+
+  /// This is a more granular function that simply checks an intrinsic function 
+  /// for upgrading, and returns true if it requires upgrading. It may return
+  /// null in NewFn if the all calls to the original intrinsic function
+  /// should be transformed to non-function-call instructions.
+  bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn);
+
+  /// This is the complement to the above, replacing a specific call to an 
+  /// intrinsic function with a call to the specified new function.
+  void UpgradeIntrinsicCall(CallInst *CI, Function *NewFn);
+  
+  /// This is an auto-upgrade hook for any old intrinsic function syntaxes 
+  /// which need to have both the function updated as well as all calls updated 
+  /// to the new function. This should only be run in a post-processing fashion 
+  /// so that it can update all calls to the old function.
+  void UpgradeCallsToIntrinsic(Function* F);
+
+  /// This checks for global variables which should be upgraded. It returns true
+  /// if it requires upgrading.
+  bool UpgradeGlobalVariable(GlobalVariable *GV);
+
+  /// This function checks debug info intrinsics. If an intrinsic is invalid
+  /// then this function simply removes the intrinsic. 
+  void CheckDebugInfoIntrinsics(Module *M);
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/BasicBlock.h b/final/include/llvm/BasicBlock.h
new file mode 100644
index 00000000000..7e7c9e76943
--- /dev/null
+++ b/final/include/llvm/BasicBlock.h
@@ -0,0 +1,269 @@
+//===-- llvm/BasicBlock.h - Represent a basic block in the VM ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the BasicBlock class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BASICBLOCK_H
+#define LLVM_BASICBLOCK_H
+
+#include "llvm/Instruction.h"
+#include "llvm/SymbolTableListTraits.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class TerminatorInst;
+class LLVMContext;
+class BlockAddress;
+
+template<> struct ilist_traits<Instruction>
+  : public SymbolTableListTraits<Instruction, BasicBlock> {
+  // createSentinel is used to get hold of a node that marks the end of
+  // the list...
+  // The sentinel is relative to this instance, so we use a non-static
+  // method.
+  Instruction *createSentinel() const {
+    // since i(p)lists always publicly derive from the corresponding
+    // traits, placing a data member in this class will augment i(p)list.
+    // But since the NodeTy is expected to publicly derive from
+    // ilist_node<NodeTy>, there is a legal viable downcast from it
+    // to NodeTy. We use this trick to superpose i(p)list with a "ghostly"
+    // NodeTy, which becomes the sentinel. Dereferencing the sentinel is
+    // forbidden (save the ilist_node<NodeTy>) so no one will ever notice
+    // the superposition.
+    return static_cast<Instruction*>(&Sentinel);
+  }
+  static void destroySentinel(Instruction*) {}
+
+  Instruction *provideInitialHead() const { return createSentinel(); }
+  Instruction *ensureHead(Instruction*) const { return createSentinel(); }
+  static void noteHead(Instruction*, Instruction*) {}
+private:
+  mutable ilist_half_node<Instruction> Sentinel;
+};
+
+/// This represents a single basic block in LLVM. A basic block is simply a
+/// container of instructions that execute sequentially. Basic blocks are Values
+/// because they are referenced by instructions such as branches and switch
+/// tables. The type of a BasicBlock is "Type::LabelTy" because the basic block
+/// represents a label to which a branch can jump.
+///
+/// A well formed basic block is formed of a list of non-terminating
+/// instructions followed by a single TerminatorInst instruction.
+/// TerminatorInst's may not occur in the middle of basic blocks, and must
+/// terminate the blocks. The BasicBlock class allows malformed basic blocks to
+/// occur because it may be useful in the intermediate stage of constructing or
+/// modifying a program. However, the verifier will ensure that basic blocks
+/// are "well formed".
+/// @brief LLVM Basic Block Representation
+class BasicBlock : public Value, // Basic blocks are data objects also
+                   public ilist_node<BasicBlock> {
+  friend class BlockAddress;
+public:
+  typedef iplist<Instruction> InstListType;
+private:
+  InstListType InstList;
+  Function *Parent;
+
+  void setParent(Function *parent);
+  friend class SymbolTableListTraits<BasicBlock, Function>;
+
+  BasicBlock(const BasicBlock &);     // Do not implement
+  void operator=(const BasicBlock &); // Do not implement
+
+  /// BasicBlock ctor - If the function parameter is specified, the basic block
+  /// is automatically inserted at either the end of the function (if
+  /// InsertBefore is null), or before the specified basic block.
+  ///
+  explicit BasicBlock(LLVMContext &C, const Twine &Name = "",
+                      Function *Parent = 0, BasicBlock *InsertBefore = 0);
+public:
+  /// getContext - Get the context in which this basic block lives.
+  LLVMContext &getContext() const;
+
+  /// Instruction iterators...
+  typedef InstListType::iterator                              iterator;
+  typedef InstListType::const_iterator                  const_iterator;
+
+  /// Create - Creates a new BasicBlock. If the Parent parameter is specified,
+  /// the basic block is automatically inserted at either the end of the
+  /// function (if InsertBefore is 0), or before the specified basic block.
+  static BasicBlock *Create(LLVMContext &Context, const Twine &Name = "",
+                            Function *Parent = 0,BasicBlock *InsertBefore = 0) {
+    return new BasicBlock(Context, Name, Parent, InsertBefore);
+  }
+  ~BasicBlock();
+
+  /// getParent - Return the enclosing method, or null if none
+  ///
+  const Function *getParent() const { return Parent; }
+        Function *getParent()       { return Parent; }
+
+  /// use_back - Specialize the methods defined in Value, as we know that an
+  /// BasicBlock can only be used by Users (specifically PHI nodes, terminators,
+  /// and BlockAddress's).
+  User       *use_back()       { return cast<User>(*use_begin());}
+  const User *use_back() const { return cast<User>(*use_begin());}
+
+  /// getTerminator() - If this is a well formed basic block, then this returns
+  /// a pointer to the terminator instruction.  If it is not, then you get a
+  /// null pointer back.
+  ///
+  TerminatorInst *getTerminator();
+  const TerminatorInst *getTerminator() const;
+
+  /// Returns a pointer to the first instructon in this block that is not a
+  /// PHINode instruction. When adding instruction to the beginning of the
+  /// basic block, they should be added before the returned value, not before
+  /// the first instruction, which might be PHI.
+  /// Returns 0 is there's no non-PHI instruction.
+  Instruction* getFirstNonPHI();
+  const Instruction* getFirstNonPHI() const {
+    return const_cast<BasicBlock*>(this)->getFirstNonPHI();
+  }
+
+  // Same as above, but also skip debug intrinsics.
+  Instruction* getFirstNonPHIOrDbg();
+  const Instruction* getFirstNonPHIOrDbg() const {
+    return const_cast<BasicBlock*>(this)->getFirstNonPHIOrDbg();
+  }
+
+  /// removeFromParent - This method unlinks 'this' from the containing
+  /// function, but does not delete it.
+  ///
+  void removeFromParent();
+
+  /// eraseFromParent - This method unlinks 'this' from the containing function
+  /// and deletes it.
+  ///
+  void eraseFromParent();
+
+  /// moveBefore - Unlink this basic block from its current function and
+  /// insert it into the function that MovePos lives in, right before MovePos.
+  void moveBefore(BasicBlock *MovePos);
+
+  /// moveAfter - Unlink this basic block from its current function and
+  /// insert it into the function that MovePos lives in, right after MovePos.
+  void moveAfter(BasicBlock *MovePos);
+
+
+  /// getSinglePredecessor - If this basic block has a single predecessor block,
+  /// return the block, otherwise return a null pointer.
+  BasicBlock *getSinglePredecessor();
+  const BasicBlock *getSinglePredecessor() const {
+    return const_cast<BasicBlock*>(this)->getSinglePredecessor();
+  }
+
+  /// getUniquePredecessor - If this basic block has a unique predecessor block,
+  /// return the block, otherwise return a null pointer.
+  /// Note that unique predecessor doesn't mean single edge, there can be
+  /// multiple edges from the unique predecessor to this block (for example
+  /// a switch statement with multiple cases having the same destination).
+  BasicBlock *getUniquePredecessor();
+  const BasicBlock *getUniquePredecessor() const {
+    return const_cast<BasicBlock*>(this)->getUniquePredecessor();
+  }
+
+  //===--------------------------------------------------------------------===//
+  /// Instruction iterator methods
+  ///
+  inline iterator                begin()       { return InstList.begin(); }
+  inline const_iterator          begin() const { return InstList.begin(); }
+  inline iterator                end  ()       { return InstList.end();   }
+  inline const_iterator          end  () const { return InstList.end();   }
+
+  inline size_t                   size() const { return InstList.size();  }
+  inline bool                    empty() const { return InstList.empty(); }
+  inline const Instruction      &front() const { return InstList.front(); }
+  inline       Instruction      &front()       { return InstList.front(); }
+  inline const Instruction       &back() const { return InstList.back();  }
+  inline       Instruction       &back()       { return InstList.back();  }
+
+  /// getInstList() - Return the underlying instruction list container.  You
+  /// need to access it directly if you want to modify it currently.
+  ///
+  const InstListType &getInstList() const { return InstList; }
+        InstListType &getInstList()       { return InstList; }
+
+  /// getSublistAccess() - returns pointer to member of instruction list
+  static iplist<Instruction> BasicBlock::*getSublistAccess(Instruction*) {
+    return &BasicBlock::InstList;
+  }
+
+  /// getValueSymbolTable() - returns pointer to symbol table (if any)
+  ValueSymbolTable *getValueSymbolTable();
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const BasicBlock *) { return true; }
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == Value::BasicBlockVal;
+  }
+
+  /// dropAllReferences() - This function causes all the subinstructions to "let
+  /// go" of all references that they are maintaining.  This allows one to
+  /// 'delete' a whole class at a time, even though there may be circular
+  /// references... first all references are dropped, and all use counts go to
+  /// zero.  Then everything is delete'd for real.  Note that no operations are
+  /// valid on an object that has "dropped all references", except operator
+  /// delete.
+  ///
+  void dropAllReferences();
+
+  /// removePredecessor - This method is used to notify a BasicBlock that the
+  /// specified Predecessor of the block is no longer able to reach it.  This is
+  /// actually not used to update the Predecessor list, but is actually used to
+  /// update the PHI nodes that reside in the block.  Note that this should be
+  /// called while the predecessor still refers to this block.
+  ///
+  void removePredecessor(BasicBlock *Pred, bool DontDeleteUselessPHIs = false);
+
+  /// splitBasicBlock - This splits a basic block into two at the specified
+  /// instruction.  Note that all instructions BEFORE the specified iterator
+  /// stay as part of the original basic block, an unconditional branch is added
+  /// to the original BB, and the rest of the instructions in the BB are moved
+  /// to the new BB, including the old terminator.  The newly formed BasicBlock
+  /// is returned.  This function invalidates the specified iterator.
+  ///
+  /// Note that this only works on well formed basic blocks (must have a
+  /// terminator), and 'I' must not be the end of instruction list (which would
+  /// cause a degenerate basic block to be formed, having a terminator inside of
+  /// the basic block).
+  ///
+  /// Also note that this doesn't preserve any passes. To split blocks while
+  /// keeping loop information consistent, use the SplitBlock utility function.
+  ///
+  BasicBlock *splitBasicBlock(iterator I, const Twine &BBName = "");
+
+  /// hasAddressTaken - returns true if there are any uses of this basic block
+  /// other than direct branches, switches, etc. to it.
+  bool hasAddressTaken() const { return getSubclassDataFromValue() != 0; }
+
+private:
+  /// AdjustBlockAddressRefCount - BasicBlock stores the number of BlockAddress
+  /// objects using it.  This is almost always 0, sometimes one, possibly but
+  /// almost never 2, and inconceivably 3 or more.
+  void AdjustBlockAddressRefCount(int Amt) {
+    setValueSubclassData(getSubclassDataFromValue()+Amt);
+    assert((int)(signed char)getSubclassDataFromValue() >= 0 &&
+           "Refcount wrap-around");
+  }
+  // Shadow Value::setValueSubclassData with a private forwarding method so that
+  // any future subclasses cannot accidentally use it.
+  void setValueSubclassData(unsigned short D) {
+    Value::setValueSubclassData(D);
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Bitcode/Archive.h b/final/include/llvm/Bitcode/Archive.h
new file mode 100644
index 00000000000..4abfa6e8442
--- /dev/null
+++ b/final/include/llvm/Bitcode/Archive.h
@@ -0,0 +1,546 @@
+//===-- llvm/Bitcode/Archive.h - LLVM Bitcode Archive -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file declares the Archive and ArchiveMember classes that provide
+// manipulation of LLVM Archive files.  The implementation is provided by the
+// lib/Bitcode/Archive library.  This library is used to read and write
+// archive (*.a) files that contain LLVM bitcode files (or others).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_ARCHIVE_H
+#define LLVM_BITCODE_ARCHIVE_H
+
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/Support/Path.h"
+#include <map>
+#include <set>
+
+namespace llvm {
+  class MemoryBuffer;
+
+// Forward declare classes
+class Module;              // From VMCore
+class Archive;             // Declared below
+class ArchiveMemberHeader; // Internal implementation class
+class LLVMContext;         // Global data
+
+/// This class is the main class manipulated by users of the Archive class. It
+/// holds information about one member of the Archive. It is also the element
+/// stored by the Archive's ilist, the Archive's main abstraction. Because of
+/// the special requirements of archive files, users are not permitted to
+/// construct ArchiveMember instances. You should obtain them from the methods
+/// of the Archive class instead.
+/// @brief This class represents a single archive member.
+class ArchiveMember : public ilist_node<ArchiveMember> {
+  /// @name Types
+  /// @{
+  public:
+    /// These flags are used internally by the archive member to specify various
+    /// characteristics of the member. The various "is" methods below provide
+    /// access to the flags. The flags are not user settable.
+    enum Flags {
+      CompressedFlag = 1,          ///< Member is a normal compressed file
+      SVR4SymbolTableFlag = 2,     ///< Member is a SVR4 symbol table
+      BSD4SymbolTableFlag = 4,     ///< Member is a BSD4 symbol table
+      LLVMSymbolTableFlag = 8,     ///< Member is an LLVM symbol table
+      BitcodeFlag = 16,            ///< Member is bitcode
+      HasPathFlag = 64,            ///< Member has a full or partial path
+      HasLongFilenameFlag = 128,   ///< Member uses the long filename syntax
+      StringTableFlag = 256        ///< Member is an ar(1) format string table
+    };
+
+  /// @}
+  /// @name Accessors
+  /// @{
+  public:
+    /// @returns the parent Archive instance
+    /// @brief Get the archive associated with this member
+    Archive* getArchive() const          { return parent; }
+
+    /// @returns the path to the Archive's file
+    /// @brief Get the path to the archive member
+    const sys::Path& getPath() const     { return path; }
+
+    /// The "user" is the owner of the file per Unix security. This may not
+    /// have any applicability on non-Unix systems but is a required component
+    /// of the "ar" file format.
+    /// @brief Get the user associated with this archive member.
+    unsigned getUser() const             { return info.getUser(); }
+
+    /// The "group" is the owning group of the file per Unix security. This
+    /// may not have any applicability on non-Unix systems but is a required
+    /// component of the "ar" file format.
+    /// @brief Get the group associated with this archive member.
+    unsigned getGroup() const            { return info.getGroup(); }
+
+    /// The "mode" specifies the access permissions for the file per Unix
+    /// security. This may not have any applicability on non-Unix systems but is
+    /// a required component of the "ar" file format.
+    /// @brief Get the permission mode associated with this archive member.
+    unsigned getMode() const             { return info.getMode(); }
+
+    /// This method returns the time at which the archive member was last
+    /// modified when it was not in the archive.
+    /// @brief Get the time of last modification of the archive member.
+    sys::TimeValue getModTime() const    { return info.getTimestamp(); }
+
+    /// @returns the size of the archive member in bytes.
+    /// @brief Get the size of the archive member.
+    uint64_t getSize() const             { return info.getSize(); }
+
+    /// This method returns the total size of the archive member as it
+    /// appears on disk. This includes the file content, the header, the
+    /// long file name if any, and the padding.
+    /// @brief Get total on-disk member size.
+    unsigned getMemberSize() const;
+
+    /// This method will return a pointer to the in-memory content of the
+    /// archive member, if it is available. If the data has not been loaded
+    /// into memory, the return value will be null.
+    /// @returns a pointer to the member's data.
+    /// @brief Get the data content of the archive member
+    const char* getData() const { return data; }
+
+    /// This method determines if the member is a regular compressed file.
+    /// @returns true iff the archive member is a compressed regular file.
+    /// @brief Determine if the member is a compressed regular file.
+    bool isCompressed() const { return flags&CompressedFlag; }
+
+    /// @returns true iff the member is a SVR4 (non-LLVM) symbol table
+    /// @brief Determine if this member is a SVR4 symbol table.
+    bool isSVR4SymbolTable() const { return flags&SVR4SymbolTableFlag; }
+
+    /// @returns true iff the member is a BSD4.4 (non-LLVM) symbol table
+    /// @brief Determine if this member is a BSD4.4 symbol table.
+    bool isBSD4SymbolTable() const { return flags&BSD4SymbolTableFlag; }
+
+    /// @returns true iff the archive member is the LLVM symbol table
+    /// @brief Determine if this member is the LLVM symbol table.
+    bool isLLVMSymbolTable() const { return flags&LLVMSymbolTableFlag; }
+
+    /// @returns true iff the archive member is the ar(1) string table
+    /// @brief Determine if this member is the ar(1) string table.
+    bool isStringTable() const { return flags&StringTableFlag; }
+
+    /// @returns true iff the archive member is a bitcode file.
+    /// @brief Determine if this member is a bitcode file.
+    bool isBitcode() const { return flags&BitcodeFlag; }
+
+    /// @returns true iff the file name contains a path (directory) component.
+    /// @brief Determine if the member has a path
+    bool hasPath() const { return flags&HasPathFlag; }
+
+    /// Long filenames are an artifact of the ar(1) file format which allows
+    /// up to sixteen characters in its header and doesn't allow a path
+    /// separator character (/). To avoid this, a "long format" member name is
+    /// allowed that doesn't have this restriction. This method determines if
+    /// that "long format" is used for this member.
+    /// @returns true iff the file name uses the long form
+    /// @brief Determine if the member has a long file name
+    bool hasLongFilename() const { return flags&HasLongFilenameFlag; }
+
+    /// This method returns the status info (like Unix stat(2)) for the archive
+    /// member. The status info provides the file's size, permissions, and
+    /// modification time. The contents of the Path::StatusInfo structure, other
+    /// than the size and modification time, may not have utility on non-Unix
+    /// systems.
+    /// @returns the status info for the archive member
+    /// @brief Obtain the status info for the archive member
+    const sys::FileStatus &getFileStatus() const { return info; }
+
+    /// This method causes the archive member to be replaced with the contents
+    /// of the file specified by \p File. The contents of \p this will be
+    /// updated to reflect the new data from \p File. The \p File must exist and
+    /// be readable on entry to this method.
+    /// @returns true if an error occurred, false otherwise
+    /// @brief Replace contents of archive member with a new file.
+    bool replaceWith(const sys::Path &aFile, std::string* ErrMsg);
+
+  /// @}
+  /// @name Data
+  /// @{
+  private:
+    Archive*            parent;   ///< Pointer to parent archive
+    sys::PathWithStatus path;     ///< Path of file containing the member
+    sys::FileStatus     info;     ///< Status info (size,mode,date)
+    unsigned            flags;    ///< Flags about the archive member
+    const char*         data;     ///< Data for the member
+
+  /// @}
+  /// @name Constructors
+  /// @{
+  public:
+    /// The default constructor is only used by the Archive's iplist when it
+    /// constructs the list's sentry node.
+    ArchiveMember();
+
+  private:
+    /// Used internally by the Archive class to construct an ArchiveMember.
+    /// The contents of the ArchiveMember are filled out by the Archive class.
+    explicit ArchiveMember(Archive *PAR);
+
+    // So Archive can construct an ArchiveMember
+    friend class llvm::Archive;
+  /// @}
+};
+
+/// This class defines the interface to LLVM Archive files. The Archive class
+/// presents the archive file as an ilist of ArchiveMember objects. The members
+/// can be rearranged in any fashion either by directly editing the ilist or by
+/// using editing methods on the Archive class (recommended). The Archive
+/// class also provides several ways of accessing the archive file for various
+/// purposes such as editing and linking.  Full symbol table support is provided
+/// for loading only those files that resolve symbols. Note that read
+/// performance of this library is _crucial_ for performance of JIT type
+/// applications and the linkers. Consequently, the implementation of the class
+/// is optimized for reading.
+class Archive {
+
+  /// @name Types
+  /// @{
+  public:
+    /// This is the ilist type over which users may iterate to examine
+    /// the contents of the archive
+    /// @brief The ilist type of ArchiveMembers that Archive contains.
+    typedef iplist<ArchiveMember> MembersList;
+
+    /// @brief Forward mutable iterator over ArchiveMember
+    typedef MembersList::iterator iterator;
+
+    /// @brief Forward immutable iterator over ArchiveMember
+    typedef MembersList::const_iterator const_iterator;
+
+    /// @brief Reverse mutable iterator over ArchiveMember
+    typedef std::reverse_iterator<iterator> reverse_iterator;
+
+    /// @brief Reverse immutable iterator over ArchiveMember
+    typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+
+    /// @brief The in-memory version of the symbol table
+    typedef std::map<std::string,unsigned> SymTabType;
+
+  /// @}
+  /// @name ilist accessor methods
+  /// @{
+  public:
+    inline iterator               begin()        { return members.begin();  }
+    inline const_iterator         begin()  const { return members.begin();  }
+    inline iterator               end  ()        { return members.end();    }
+    inline const_iterator         end  ()  const { return members.end();    }
+
+    inline reverse_iterator       rbegin()       { return members.rbegin(); }
+    inline const_reverse_iterator rbegin() const { return members.rbegin(); }
+    inline reverse_iterator       rend  ()       { return members.rend();   }
+    inline const_reverse_iterator rend  () const { return members.rend();   }
+
+    inline size_t                 size()   const { return members.size();   }
+    inline bool                   empty()  const { return members.empty();  }
+    inline const ArchiveMember&   front()  const { return members.front();  }
+    inline       ArchiveMember&   front()        { return members.front();  }
+    inline const ArchiveMember&   back()   const { return members.back();   }
+    inline       ArchiveMember&   back()         { return members.back();   }
+
+  /// @}
+  /// @name ilist mutator methods
+  /// @{
+  public:
+    /// This method splices a \p src member from an archive (possibly \p this),
+    /// to a position just before the member given by \p dest in \p this. When
+    /// the archive is written, \p src will be written in its new location.
+    /// @brief Move a member to a new location
+    inline void splice(iterator dest, Archive& arch, iterator src)
+      { return members.splice(dest,arch.members,src); }
+
+    /// This method erases a \p target member from the archive. When the
+    /// archive is written, it will no longer contain \p target. The associated
+    /// ArchiveMember is deleted.
+    /// @brief Erase a member.
+    inline iterator erase(iterator target) { return members.erase(target); }
+
+  /// @}
+  /// @name Constructors
+  /// @{
+  public:
+    /// Create an empty archive file and associate it with the \p Filename. This
+    /// method does not actually create the archive disk file. It creates an
+    /// empty Archive object. If the writeToDisk method is called, the archive
+    /// file \p Filename will be created at that point, with whatever content
+    /// the returned Archive object has at that time.
+    /// @returns An Archive* that represents the new archive file.
+    /// @brief Create an empty Archive.
+    static Archive* CreateEmpty(
+      const sys::Path& Filename,///< Name of the archive to (eventually) create.
+      LLVMContext& C            ///< Context to use for global information
+    );
+
+    /// Open an existing archive and load its contents in preparation for
+    /// editing. After this call, the member ilist is completely populated based
+    /// on the contents of the archive file. You should use this form of open if
+    /// you intend to modify the archive or traverse its contents (e.g. for
+    /// printing).
+    /// @brief Open and load an archive file
+    static Archive* OpenAndLoad(
+      const sys::Path& filePath,  ///< The file path to open and load
+      LLVMContext& C,       ///< The context to use for global information
+      std::string* ErrorMessage   ///< An optional error string
+    );
+
+    /// This method opens an existing archive file from \p Filename and reads in
+    /// its symbol table without reading in any of the archive's members. This
+    /// reduces both I/O and cpu time in opening the archive if it is to be used
+    /// solely for symbol lookup (e.g. during linking).  The \p Filename must
+    /// exist and be an archive file or an error will be returned. This form
+    /// of opening the archive is intended for read-only operations that need to
+    /// locate members via the symbol table for link editing.  Since the archve
+    /// members are not read by this method, the archive will appear empty upon
+    /// return. If editing operations are performed on the archive, they will
+    /// completely replace the contents of the archive! It is recommended that
+    /// if this form of opening the archive is used that only the symbol table
+    /// lookup methods (getSymbolTable, findModuleDefiningSymbol, and
+    /// findModulesDefiningSymbols) be used.
+    /// @returns an Archive* that represents the archive file, or null on error.
+    /// @brief Open an existing archive and load its symbols.
+    static Archive* OpenAndLoadSymbols(
+      const sys::Path& Filename,   ///< Name of the archive file to open
+      LLVMContext& C,              ///< The context to use for global info
+      std::string* ErrorMessage=0  ///< An optional error string
+    );
+
+    /// This destructor cleans up the Archive object, releases all memory, and
+    /// closes files. It does nothing with the archive file on disk. If you
+    /// haven't used the writeToDisk method by the time the destructor is
+    /// called, all changes to the archive will be lost.
+    /// @brief Destruct in-memory archive
+    ~Archive();
+
+  /// @}
+  /// @name Accessors
+  /// @{
+  public:
+    /// @returns the path to the archive file.
+    /// @brief Get the archive path.
+    const sys::Path& getPath() { return archPath; }
+
+    /// This method is provided so that editing methods can be invoked directly
+    /// on the Archive's iplist of ArchiveMember. However, it is recommended
+    /// that the usual STL style iterator interface be used instead.
+    /// @returns the iplist of ArchiveMember
+    /// @brief Get the iplist of the members
+    MembersList& getMembers() { return members; }
+
+    /// This method allows direct query of the Archive's symbol table. The
+    /// symbol table is a std::map of std::string (the symbol) to unsigned (the
+    /// file offset). Note that for efficiency reasons, the offset stored in
+    /// the symbol table is not the actual offset. It is the offset from the
+    /// beginning of the first "real" file member (after the symbol table). Use
+    /// the getFirstFileOffset() to obtain that offset and add this value to the
+    /// offset in the symbol table to obtain the real file offset. Note that
+    /// there is purposefully no interface provided by Archive to look up
+    /// members by their offset. Use the findModulesDefiningSymbols and
+    /// findModuleDefiningSymbol methods instead.
+    /// @returns the Archive's symbol table.
+    /// @brief Get the archive's symbol table
+    const SymTabType& getSymbolTable() { return symTab; }
+
+    /// This method returns the offset in the archive file to the first "real"
+    /// file member. Archive files, on disk, have a signature and might have a
+    /// symbol table that precedes the first actual file member. This method
+    /// allows you to determine what the size of those fields are.
+    /// @returns the offset to the first "real" file member  in the archive.
+    /// @brief Get the offset to the first "real" file member  in the archive.
+    unsigned getFirstFileOffset() { return firstFileOffset; }
+
+    /// This method will scan the archive for bitcode modules, interpret them
+    /// and return a vector of the instantiated modules in \p Modules. If an
+    /// error occurs, this method will return true. If \p ErrMessage is not null
+    /// and an error occurs, \p *ErrMessage will be set to a string explaining
+    /// the error that occurred.
+    /// @returns true if an error occurred
+    /// @brief Instantiate all the bitcode modules located in the archive
+    bool getAllModules(std::vector<Module*>& Modules, std::string* ErrMessage);
+
+    /// This accessor looks up the \p symbol in the archive's symbol table and
+    /// returns the associated module that defines that symbol. This method can
+    /// be called as many times as necessary. This is handy for linking the
+    /// archive into another module based on unresolved symbols. Note that the
+    /// Module returned by this accessor should not be deleted by the caller. It
+    /// is managed internally by the Archive class. It is possible that multiple
+    /// calls to this accessor will return the same Module instance because the
+    /// associated module defines multiple symbols.
+    /// @returns The Module* found or null if the archive does not contain a
+    /// module that defines the \p symbol.
+    /// @brief Look up a module by symbol name.
+    Module* findModuleDefiningSymbol(
+      const std::string& symbol,  ///< Symbol to be sought
+      std::string* ErrMessage     ///< Error message storage, if non-zero
+    );
+
+    /// This method is similar to findModuleDefiningSymbol but allows lookup of
+    /// more than one symbol at a time. If \p symbols contains a list of
+    /// undefined symbols in some module, then calling this method is like
+    /// making one complete pass through the archive to resolve symbols but is
+    /// more efficient than looking at the individual members. Note that on
+    /// exit, the symbols resolved by this method will be removed from \p
+    /// symbols to ensure they are not re-searched on a subsequent call. If
+    /// you need to retain the list of symbols, make a copy.
+    /// @brief Look up multiple symbols in the archive.
+    bool findModulesDefiningSymbols(
+      std::set<std::string>& symbols,     ///< Symbols to be sought
+      std::set<Module*>& modules,         ///< The modules matching \p symbols
+      std::string* ErrMessage             ///< Error msg storage, if non-zero
+    );
+
+    /// This method determines whether the archive is a properly formed llvm
+    /// bitcode archive.  It first makes sure the symbol table has been loaded
+    /// and has a non-zero size.  If it does, then it is an archive.  If not,
+    /// then it tries to load all the bitcode modules of the archive.  Finally,
+    /// it returns whether it was successful.
+    /// @returns true if the archive is a proper llvm bitcode archive
+    /// @brief Determine whether the archive is a proper llvm bitcode archive.
+    bool isBitcodeArchive();
+
+  /// @}
+  /// @name Mutators
+  /// @{
+  public:
+    /// This method is the only way to get the archive written to disk. It
+    /// creates or overwrites the file specified when \p this was created
+    /// or opened. The arguments provide options for writing the archive. If
+    /// \p CreateSymbolTable is true, the archive is scanned for bitcode files
+    /// and a symbol table of the externally visible function and global
+    /// variable names is created. If \p TruncateNames is true, the names of the
+    /// archive members will have their path component stripped and the file
+    /// name will be truncated at 15 characters. If \p Compress is specified,
+    /// all archive members will be compressed before being written. If
+    /// \p PrintSymTab is true, the symbol table will be printed to std::cout.
+    /// @returns true if an error occurred, \p error set to error message
+    /// @returns false if the writing succeeded.
+    /// @brief Write (possibly modified) archive contents to disk
+    bool writeToDisk(
+      bool CreateSymbolTable=false,   ///< Create Symbol table
+      bool TruncateNames=false,       ///< Truncate the filename to 15 chars
+      bool Compress=false,            ///< Compress files
+      std::string* ErrMessage=0       ///< If non-null, where error msg is set
+    );
+
+    /// This method adds a new file to the archive. The \p filename is examined
+    /// to determine just enough information to create an ArchiveMember object
+    /// which is then inserted into the Archive object's ilist at the location
+    /// given by \p where.
+    /// @returns true if an error occured, false otherwise
+    /// @brief Add a file to the archive.
+    bool addFileBefore(
+      const sys::Path& filename, ///< The file to be added
+      iterator where,            ///< Insertion point
+      std::string* ErrMsg        ///< Optional error message location
+    );
+
+  /// @}
+  /// @name Implementation
+  /// @{
+  protected:
+    /// @brief Construct an Archive for \p filename and optionally  map it
+    /// into memory.
+    explicit Archive(const sys::Path& filename, LLVMContext& C);
+
+    /// @param data The symbol table data to be parsed
+    /// @param len  The length of the symbol table data
+    /// @param error Set to address of a std::string to get error messages
+    /// @returns false on error
+    /// @brief Parse the symbol table at \p data.
+    bool parseSymbolTable(const void* data,unsigned len,std::string* error);
+
+    /// @returns A fully populated ArchiveMember or 0 if an error occurred.
+    /// @brief Parse the header of a member starting at \p At
+    ArchiveMember* parseMemberHeader(
+      const char*&At,    ///< The pointer to the location we're parsing
+      const char*End,    ///< The pointer to the end of the archive
+      std::string* error ///< Optional error message catcher
+    );
+
+    /// @param ErrMessage Set to address of a std::string to get error messages
+    /// @returns false on error
+    /// @brief Check that the archive signature is correct
+    bool checkSignature(std::string* ErrMessage);
+
+    /// @param ErrMessage Set to address of a std::string to get error messages
+    /// @returns false on error
+    /// @brief Load the entire archive.
+    bool loadArchive(std::string* ErrMessage);
+
+    /// @param ErrMessage Set to address of a std::string to get error messages
+    /// @returns false on error
+    /// @brief Load just the symbol table.
+    bool loadSymbolTable(std::string* ErrMessage);
+
+    /// @brief Write the symbol table to an ofstream.
+    void writeSymbolTable(std::ofstream& ARFile);
+
+    /// Writes one ArchiveMember to an ofstream. If an error occurs, returns
+    /// false, otherwise true. If an error occurs and error is non-null then
+    /// it will be set to an error message.
+    /// @returns false Writing member succeeded
+    /// @returns true Writing member failed, \p error set to error message
+    bool writeMember(
+      const ArchiveMember& member, ///< The member to be written
+      std::ofstream& ARFile,       ///< The file to write member onto
+      bool CreateSymbolTable,      ///< Should symbol table be created?
+      bool TruncateNames,          ///< Should names be truncated to 11 chars?
+      bool ShouldCompress,         ///< Should the member be compressed?
+      std::string* ErrMessage      ///< If non-null, place were error msg is set
+    );
+
+    /// @brief Fill in an ArchiveMemberHeader from ArchiveMember.
+    bool fillHeader(const ArchiveMember&mbr,
+                    ArchiveMemberHeader& hdr,int sz, bool TruncateNames) const;
+
+    /// @brief Maps archive into memory
+    bool mapToMemory(std::string* ErrMsg);
+
+    /// @brief Frees all the members and unmaps the archive file.
+    void cleanUpMemory();
+
+    /// This type is used to keep track of bitcode modules loaded from the
+    /// symbol table. It maps the file offset to a pair that consists of the
+    /// associated ArchiveMember and the Module.
+    /// @brief Module mapping type
+    typedef std::map<unsigned,std::pair<Module*,ArchiveMember*> >
+      ModuleMap;
+
+
+  /// @}
+  /// @name Data
+  /// @{
+  protected:
+    sys::Path archPath;       ///< Path to the archive file we read/write
+    MembersList members;      ///< The ilist of ArchiveMember
+    MemoryBuffer *mapfile;    ///< Raw Archive contents mapped into memory
+    const char* base;         ///< Base of the memory mapped file data
+    SymTabType symTab;        ///< The symbol table
+    std::string strtab;       ///< The string table for long file names
+    unsigned symTabSize;      ///< Size in bytes of symbol table
+    unsigned firstFileOffset; ///< Offset to first normal file.
+    ModuleMap modules;        ///< The modules loaded via symbol lookup.
+    ArchiveMember* foreignST; ///< This holds the foreign symbol table.
+    LLVMContext& Context;     ///< This holds global data.
+  /// @}
+  /// @name Hidden
+  /// @{
+  private:
+    Archive();                          ///< Do not implement
+    Archive(const Archive&);            ///< Do not implement
+    Archive& operator=(const Archive&); ///< Do not implement
+  /// @}
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Bitcode/BitCodes.h b/final/include/llvm/Bitcode/BitCodes.h
new file mode 100644
index 00000000000..449dc35d7de
--- /dev/null
+++ b/final/include/llvm/Bitcode/BitCodes.h
@@ -0,0 +1,185 @@
+//===- BitCodes.h - Enum values for the bitcode format ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header Bitcode enum values.
+//
+// The enum values defined in this file should be considered permanent.  If
+// new features are added, they should have values added at the end of the
+// respective lists.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_BITCODES_H
+#define LLVM_BITCODE_BITCODES_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+#include <cassert>
+
+namespace llvm {
+namespace bitc {
+  enum StandardWidths {
+    BlockIDWidth = 8,  // We use VBR-8 for block IDs.
+    CodeLenWidth = 4,  // Codelen are VBR-4.
+    BlockSizeWidth = 32  // BlockSize up to 2^32 32-bit words = 16GB per block.
+  };
+
+  // The standard abbrev namespace always has a way to exit a block, enter a
+  // nested block, define abbrevs, and define an unabbreviated record.
+  enum FixedAbbrevIDs {
+    END_BLOCK = 0,  // Must be zero to guarantee termination for broken bitcode.
+    ENTER_SUBBLOCK = 1,
+
+    /// DEFINE_ABBREV - Defines an abbrev for the current block.  It consists
+    /// of a vbr5 for # operand infos.  Each operand info is emitted with a
+    /// single bit to indicate if it is a literal encoding.  If so, the value is
+    /// emitted with a vbr8.  If not, the encoding is emitted as 3 bits followed
+    /// by the info value as a vbr5 if needed.
+    DEFINE_ABBREV = 2,
+
+    // UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by
+    // a vbr6 for the # operands, followed by vbr6's for each operand.
+    UNABBREV_RECORD = 3,
+
+    // This is not a code, this is a marker for the first abbrev assignment.
+    FIRST_APPLICATION_ABBREV = 4
+  };
+
+  /// StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO
+  /// block, which contains metadata about other blocks in the file.
+  enum StandardBlockIDs {
+    /// BLOCKINFO_BLOCK is used to define metadata about blocks, for example,
+    /// standard abbrevs that should be available to all blocks of a specified
+    /// ID.
+    BLOCKINFO_BLOCK_ID = 0,
+
+    // Block IDs 1-7 are reserved for future expansion.
+    FIRST_APPLICATION_BLOCKID = 8
+  };
+
+  /// BlockInfoCodes - The blockinfo block contains metadata about user-defined
+  /// blocks.
+  enum BlockInfoCodes {
+    // DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd
+    // block, instead of the BlockInfo block.
+    
+    BLOCKINFO_CODE_SETBID = 1,       // SETBID: [blockid#]
+    BLOCKINFO_CODE_BLOCKNAME = 2,    // BLOCKNAME: [name]
+    BLOCKINFO_CODE_SETRECORDNAME = 3 // BLOCKINFO_CODE_SETRECORDNAME: [id, name]
+  };
+
+} // End bitc namespace
+
+/// BitCodeAbbrevOp - This describes one or more operands in an abbreviation.
+/// This is actually a union of two different things:
+///   1. It could be a literal integer value ("the operand is always 17").
+///   2. It could be an encoding specification ("this operand encoded like so").
+///
+class BitCodeAbbrevOp {
+  uint64_t Val;           // A literal value or data for an encoding.
+  bool IsLiteral : 1;     // Indicate whether this is a literal value or not.
+  unsigned Enc   : 3;     // The encoding to use.
+public:
+  enum Encoding {
+    Fixed = 1,  // A fixed width field, Val specifies number of bits.
+    VBR   = 2,  // A VBR field where Val specifies the width of each chunk.
+    Array = 3,  // A sequence of fields, next field species elt encoding.
+    Char6 = 4,  // A 6-bit fixed field which maps to [a-zA-Z0-9._].
+    Blob  = 5   // 32-bit aligned array of 8-bit characters.
+  };
+
+  explicit BitCodeAbbrevOp(uint64_t V) :  Val(V), IsLiteral(true) {}
+  explicit BitCodeAbbrevOp(Encoding E, uint64_t Data = 0)
+    : Val(Data), IsLiteral(false), Enc(E) {}
+
+  bool isLiteral() const { return IsLiteral; }
+  bool isEncoding() const { return !IsLiteral; }
+
+  // Accessors for literals.
+  uint64_t getLiteralValue() const { assert(isLiteral()); return Val; }
+
+  // Accessors for encoding info.
+  Encoding getEncoding() const { assert(isEncoding()); return (Encoding)Enc; }
+  uint64_t getEncodingData() const {
+    assert(isEncoding() && hasEncodingData());
+    return Val;
+  }
+
+  bool hasEncodingData() const { return hasEncodingData(getEncoding()); }
+  static bool hasEncodingData(Encoding E) {
+    switch (E) {
+    default: assert(0 && "Unknown encoding");
+    case Fixed:
+    case VBR:
+      return true;
+    case Array:
+    case Char6:
+    case Blob:
+      return false;
+    }
+  }
+
+  /// isChar6 - Return true if this character is legal in the Char6 encoding.
+  static bool isChar6(char C) {
+    if (C >= 'a' && C <= 'z') return true;
+    if (C >= 'A' && C <= 'Z') return true;
+    if (C >= '0' && C <= '9') return true;
+    if (C == '.' || C == '_') return true;
+    return false;
+  }
+  static unsigned EncodeChar6(char C) {
+    if (C >= 'a' && C <= 'z') return C-'a';
+    if (C >= 'A' && C <= 'Z') return C-'A'+26;
+    if (C >= '0' && C <= '9') return C-'0'+26+26;
+    if (C == '.') return 62;
+    if (C == '_') return 63;
+    assert(0 && "Not a value Char6 character!");
+    return 0;
+  }
+
+  static char DecodeChar6(unsigned V) {
+    assert((V & ~63) == 0 && "Not a Char6 encoded character!");
+    if (V < 26) return V+'a';
+    if (V < 26+26) return V-26+'A';
+    if (V < 26+26+10) return V-26-26+'0';
+    if (V == 62) return '.';
+    if (V == 63) return '_';
+    assert(0 && "Not a value Char6 character!");
+    return ' ';
+  }
+
+};
+
+/// BitCodeAbbrev - This class represents an abbreviation record.  An
+/// abbreviation allows a complex record that has redundancy to be stored in a
+/// specialized format instead of the fully-general, fully-vbr, format.
+class BitCodeAbbrev {
+  SmallVector<BitCodeAbbrevOp, 8> OperandList;
+  unsigned char RefCount; // Number of things using this.
+  ~BitCodeAbbrev() {}
+public:
+  BitCodeAbbrev() : RefCount(1) {}
+
+  void addRef() { ++RefCount; }
+  void dropRef() { if (--RefCount == 0) delete this; }
+
+  unsigned getNumOperandInfos() const {
+    return static_cast<unsigned>(OperandList.size());
+  }
+  const BitCodeAbbrevOp &getOperandInfo(unsigned N) const {
+    return OperandList[N];
+  }
+
+  void Add(const BitCodeAbbrevOp &OpInfo) {
+    OperandList.push_back(OpInfo);
+  }
+};
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Bitcode/BitstreamReader.h b/final/include/llvm/Bitcode/BitstreamReader.h
new file mode 100644
index 00000000000..779ef5fa2d8
--- /dev/null
+++ b/final/include/llvm/Bitcode/BitstreamReader.h
@@ -0,0 +1,642 @@
+//===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitstreamReader class.  This class can be used to
+// read an arbitrary bitstream, regardless of its contents.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BITSTREAM_READER_H
+#define BITSTREAM_READER_H
+
+#include "llvm/Bitcode/BitCodes.h"
+#include <climits>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+  class Deserializer;
+
+class BitstreamReader {
+public:
+  /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
+  /// These describe abbreviations that all blocks of the specified ID inherit.
+  struct BlockInfo {
+    unsigned BlockID;
+    std::vector<BitCodeAbbrev*> Abbrevs;
+    std::string Name;
+    
+    std::vector<std::pair<unsigned, std::string> > RecordNames;
+  };
+private:
+  /// FirstChar/LastChar - This remembers the first and last bytes of the
+  /// stream.
+  const unsigned char *FirstChar, *LastChar;
+  
+  std::vector<BlockInfo> BlockInfoRecords;
+
+  /// IgnoreBlockInfoNames - This is set to true if we don't care about the
+  /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer
+  /// uses this.
+  bool IgnoreBlockInfoNames;
+  
+  BitstreamReader(const BitstreamReader&);  // NOT IMPLEMENTED
+  void operator=(const BitstreamReader&);  // NOT IMPLEMENTED
+public:
+  BitstreamReader() : FirstChar(0), LastChar(0), IgnoreBlockInfoNames(true) {
+  }
+
+  BitstreamReader(const unsigned char *Start, const unsigned char *End) {
+    IgnoreBlockInfoNames = true;
+    init(Start, End);
+  }
+
+  void init(const unsigned char *Start, const unsigned char *End) {
+    FirstChar = Start;
+    LastChar = End;
+    assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
+  }
+
+  ~BitstreamReader() {
+    // Free the BlockInfoRecords.
+    while (!BlockInfoRecords.empty()) {
+      BlockInfo &Info = BlockInfoRecords.back();
+      // Free blockinfo abbrev info.
+      for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
+           i != e; ++i)
+        Info.Abbrevs[i]->dropRef();
+      BlockInfoRecords.pop_back();
+    }
+  }
+  
+  const unsigned char *getFirstChar() const { return FirstChar; }
+  const unsigned char *getLastChar() const { return LastChar; }
+
+  /// CollectBlockInfoNames - This is called by clients that want block/record
+  /// name information.
+  void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
+  bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
+  
+  //===--------------------------------------------------------------------===//
+  // Block Manipulation
+  //===--------------------------------------------------------------------===//
+
+  /// hasBlockInfoRecords - Return true if we've already read and processed the
+  /// block info block for this Bitstream.  We only process it for the first
+  /// cursor that walks over it.
+  bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
+  
+  /// getBlockInfo - If there is block info for the specified ID, return it,
+  /// otherwise return null.
+  const BlockInfo *getBlockInfo(unsigned BlockID) const {
+    // Common case, the most recent entry matches BlockID.
+    if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
+      return &BlockInfoRecords.back();
+
+    for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
+         i != e; ++i)
+      if (BlockInfoRecords[i].BlockID == BlockID)
+        return &BlockInfoRecords[i];
+    return 0;
+  }
+
+  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
+    if (const BlockInfo *BI = getBlockInfo(BlockID))
+      return *const_cast<BlockInfo*>(BI);
+
+    // Otherwise, add a new record.
+    BlockInfoRecords.push_back(BlockInfo());
+    BlockInfoRecords.back().BlockID = BlockID;
+    return BlockInfoRecords.back();
+  }
+
+};
+
+class BitstreamCursor {
+  friend class Deserializer;
+  BitstreamReader *BitStream;
+  const unsigned char *NextChar;
+  
+  /// CurWord - This is the current data we have pulled from the stream but have
+  /// not returned to the client.
+  uint32_t CurWord;
+  
+  /// BitsInCurWord - This is the number of bits in CurWord that are valid. This
+  /// is always from [0...31] inclusive.
+  unsigned BitsInCurWord;
+  
+  // CurCodeSize - This is the declared size of code values used for the current
+  // block, in bits.
+  unsigned CurCodeSize;
+  
+  /// CurAbbrevs - Abbrevs installed at in this block.
+  std::vector<BitCodeAbbrev*> CurAbbrevs;
+  
+  struct Block {
+    unsigned PrevCodeSize;
+    std::vector<BitCodeAbbrev*> PrevAbbrevs;
+    explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
+  };
+  
+  /// BlockScope - This tracks the codesize of parent blocks.
+  SmallVector<Block, 8> BlockScope;
+  
+public:
+  BitstreamCursor() : BitStream(0), NextChar(0) {
+  }
+  BitstreamCursor(const BitstreamCursor &RHS) : BitStream(0), NextChar(0) {
+    operator=(RHS);
+  }
+  
+  explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
+    NextChar = R.getFirstChar();
+    assert(NextChar && "Bitstream not initialized yet");
+    CurWord = 0;
+    BitsInCurWord = 0;
+    CurCodeSize = 2;
+  }
+  
+  void init(BitstreamReader &R) {
+    freeState();
+    
+    BitStream = &R;
+    NextChar = R.getFirstChar();
+    assert(NextChar && "Bitstream not initialized yet");
+    CurWord = 0;
+    BitsInCurWord = 0;
+    CurCodeSize = 2;
+  }
+  
+  ~BitstreamCursor() {
+    freeState();
+  }
+  
+  void operator=(const BitstreamCursor &RHS) {
+    freeState();
+    
+    BitStream = RHS.BitStream;
+    NextChar = RHS.NextChar;
+    CurWord = RHS.CurWord;
+    BitsInCurWord = RHS.BitsInCurWord;
+    CurCodeSize = RHS.CurCodeSize;
+    
+    // Copy abbreviations, and bump ref counts.
+    CurAbbrevs = RHS.CurAbbrevs;
+    for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
+         i != e; ++i)
+      CurAbbrevs[i]->addRef();
+    
+    // Copy block scope and bump ref counts.
+    for (unsigned S = 0, e = static_cast<unsigned>(BlockScope.size());
+         S != e; ++S) {
+      std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
+      for (unsigned i = 0, e = static_cast<unsigned>(Abbrevs.size());
+           i != e; ++i)
+        Abbrevs[i]->addRef();
+    }
+  }
+  
+  void freeState() {
+    // Free all the Abbrevs.
+    for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
+         i != e; ++i)
+      CurAbbrevs[i]->dropRef();
+    CurAbbrevs.clear();
+    
+    // Free all the Abbrevs in the block scope.
+    for (unsigned S = 0, e = static_cast<unsigned>(BlockScope.size());
+         S != e; ++S) {
+      std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
+      for (unsigned i = 0, e = static_cast<unsigned>(Abbrevs.size());
+           i != e; ++i)
+        Abbrevs[i]->dropRef();
+    }
+    BlockScope.clear();
+  }
+  
+  /// GetAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
+  unsigned GetAbbrevIDWidth() const { return CurCodeSize; }
+  
+  bool AtEndOfStream() const {
+    return NextChar == BitStream->getLastChar() && BitsInCurWord == 0;
+  }
+  
+  /// GetCurrentBitNo - Return the bit # of the bit we are reading.
+  uint64_t GetCurrentBitNo() const {
+    return (NextChar-BitStream->getFirstChar())*CHAR_BIT - BitsInCurWord;
+  }
+  
+  BitstreamReader *getBitStreamReader() {
+    return BitStream;
+  }
+  const BitstreamReader *getBitStreamReader() const {
+    return BitStream;
+  }
+  
+  
+  /// JumpToBit - Reset the stream to the specified bit number.
+  void JumpToBit(uint64_t BitNo) {
+    uintptr_t ByteNo = uintptr_t(BitNo/8) & ~3;
+    uintptr_t WordBitNo = uintptr_t(BitNo) & 31;
+    assert(ByteNo <= (uintptr_t)(BitStream->getLastChar()-
+                                 BitStream->getFirstChar()) &&
+           "Invalid location");
+    
+    // Move the cursor to the right word.
+    NextChar = BitStream->getFirstChar()+ByteNo;
+    BitsInCurWord = 0;
+    CurWord = 0;
+    
+    // Skip over any bits that are already consumed.
+    if (WordBitNo)
+      Read(static_cast<unsigned>(WordBitNo));
+  }
+  
+  
+  uint32_t Read(unsigned NumBits) {
+    assert(NumBits <= 32 && "Cannot return more than 32 bits!");
+    // If the field is fully contained by CurWord, return it quickly.
+    if (BitsInCurWord >= NumBits) {
+      uint32_t R = CurWord & ((1U << NumBits)-1);
+      CurWord >>= NumBits;
+      BitsInCurWord -= NumBits;
+      return R;
+    }
+
+    // If we run out of data, stop at the end of the stream.
+    if (NextChar == BitStream->getLastChar()) {
+      CurWord = 0;
+      BitsInCurWord = 0;
+      return 0;
+    }
+
+    unsigned R = CurWord;
+
+    // Read the next word from the stream.
+    CurWord = (NextChar[0] <<  0) | (NextChar[1] << 8) |
+              (NextChar[2] << 16) | (NextChar[3] << 24);
+    NextChar += 4;
+
+    // Extract NumBits-BitsInCurWord from what we just read.
+    unsigned BitsLeft = NumBits-BitsInCurWord;
+
+    // Be careful here, BitsLeft is in the range [1..32] inclusive.
+    R |= (CurWord & (~0U >> (32-BitsLeft))) << BitsInCurWord;
+
+    // BitsLeft bits have just been used up from CurWord.
+    if (BitsLeft != 32)
+      CurWord >>= BitsLeft;
+    else
+      CurWord = 0;
+    BitsInCurWord = 32-BitsLeft;
+    return R;
+  }
+
+  uint64_t Read64(unsigned NumBits) {
+    if (NumBits <= 32) return Read(NumBits);
+
+    uint64_t V = Read(32);
+    return V | (uint64_t)Read(NumBits-32) << 32;
+  }
+
+  uint32_t ReadVBR(unsigned NumBits) {
+    uint32_t Piece = Read(NumBits);
+    if ((Piece & (1U << (NumBits-1))) == 0)
+      return Piece;
+
+    uint32_t Result = 0;
+    unsigned NextBit = 0;
+    while (1) {
+      Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
+
+      if ((Piece & (1U << (NumBits-1))) == 0)
+        return Result;
+
+      NextBit += NumBits-1;
+      Piece = Read(NumBits);
+    }
+  }
+
+  // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size.  The
+  // chunk size of the VBR must still be <= 32 bits though.
+  uint64_t ReadVBR64(unsigned NumBits) {
+    uint32_t Piece = Read(NumBits);
+    if ((Piece & (1U << (NumBits-1))) == 0)
+      return uint64_t(Piece);
+
+    uint64_t Result = 0;
+    unsigned NextBit = 0;
+    while (1) {
+      Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
+
+      if ((Piece & (1U << (NumBits-1))) == 0)
+        return Result;
+
+      NextBit += NumBits-1;
+      Piece = Read(NumBits);
+    }
+  }
+
+  void SkipToWord() {
+    BitsInCurWord = 0;
+    CurWord = 0;
+  }
+
+  unsigned ReadCode() {
+    return Read(CurCodeSize);
+  }
+
+
+  // Block header:
+  //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
+
+  /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for
+  /// the block.
+  unsigned ReadSubBlockID() {
+    return ReadVBR(bitc::BlockIDWidth);
+  }
+
+  /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip
+  /// over the body of this block.  If the block record is malformed, return
+  /// true.
+  bool SkipBlock() {
+    // Read and ignore the codelen value.  Since we are skipping this block, we
+    // don't care what code widths are used inside of it.
+    ReadVBR(bitc::CodeLenWidth);
+    SkipToWord();
+    unsigned NumWords = Read(bitc::BlockSizeWidth);
+
+    // Check that the block wasn't partially defined, and that the offset isn't
+    // bogus.
+    if (AtEndOfStream() || NextChar+NumWords*4 > BitStream->getLastChar())
+      return true;
+
+    NextChar += NumWords*4;
+    return false;
+  }
+
+  /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
+  /// the block, and return true if the block is valid.
+  bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0) {
+    // Save the current block's state on BlockScope.
+    BlockScope.push_back(Block(CurCodeSize));
+    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+
+    // Add the abbrevs specific to this block to the CurAbbrevs list.
+    if (const BitstreamReader::BlockInfo *Info =
+          BitStream->getBlockInfo(BlockID)) {
+      for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size());
+           i != e; ++i) {
+        CurAbbrevs.push_back(Info->Abbrevs[i]);
+        CurAbbrevs.back()->addRef();
+      }
+    }
+
+    // Get the codesize of this block.
+    CurCodeSize = ReadVBR(bitc::CodeLenWidth);
+    SkipToWord();
+    unsigned NumWords = Read(bitc::BlockSizeWidth);
+    if (NumWordsP) *NumWordsP = NumWords;
+
+    // Validate that this block is sane.
+    if (CurCodeSize == 0 || AtEndOfStream() ||
+        NextChar+NumWords*4 > BitStream->getLastChar())
+      return true;
+
+    return false;
+  }
+
+  bool ReadBlockEnd() {
+    if (BlockScope.empty()) return true;
+
+    // Block tail:
+    //    [END_BLOCK, <align4bytes>]
+    SkipToWord();
+
+    PopBlockScope();
+    return false;
+  }
+
+private:
+  void PopBlockScope() {
+    CurCodeSize = BlockScope.back().PrevCodeSize;
+
+    // Delete abbrevs from popped scope.
+    for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
+         i != e; ++i)
+      CurAbbrevs[i]->dropRef();
+
+    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+    BlockScope.pop_back();
+  }
+
+ //===--------------------------------------------------------------------===//
+  // Record Processing
+  //===--------------------------------------------------------------------===//
+
+private:
+  void ReadAbbreviatedLiteral(const BitCodeAbbrevOp &Op,
+                              SmallVectorImpl<uint64_t> &Vals) {
+    assert(Op.isLiteral() && "Not a literal");
+    // If the abbrev specifies the literal value to use, use it.
+    Vals.push_back(Op.getLiteralValue());
+  }
+  
+  void ReadAbbreviatedField(const BitCodeAbbrevOp &Op,
+                            SmallVectorImpl<uint64_t> &Vals) {
+    assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
+    
+    // Decode the value as we are commanded.
+    switch (Op.getEncoding()) {
+    default: assert(0 && "Unknown encoding!");
+    case BitCodeAbbrevOp::Fixed:
+      Vals.push_back(Read((unsigned)Op.getEncodingData()));
+      break;
+    case BitCodeAbbrevOp::VBR:
+      Vals.push_back(ReadVBR64((unsigned)Op.getEncodingData()));
+      break;
+    case BitCodeAbbrevOp::Char6:
+      Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6)));
+      break;
+    }
+  }
+public:
+
+  /// getAbbrev - Return the abbreviation for the specified AbbrevId. 
+  const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
+    unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV;
+    assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
+    return CurAbbrevs[AbbrevNo];
+  }
+  
+  unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
+                      const char **BlobStart = 0, unsigned *BlobLen = 0) {
+    if (AbbrevID == bitc::UNABBREV_RECORD) {
+      unsigned Code = ReadVBR(6);
+      unsigned NumElts = ReadVBR(6);
+      for (unsigned i = 0; i != NumElts; ++i)
+        Vals.push_back(ReadVBR64(6));
+      return Code;
+    }
+
+    const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
+
+    for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+      const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+      if (Op.isLiteral()) {
+        ReadAbbreviatedLiteral(Op, Vals); 
+      } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
+        // Array case.  Read the number of elements as a vbr6.
+        unsigned NumElts = ReadVBR(6);
+
+        // Get the element encoding.
+        assert(i+2 == e && "array op not second to last?");
+        const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
+
+        // Read all the elements.
+        for (; NumElts; --NumElts)
+          ReadAbbreviatedField(EltEnc, Vals);
+      } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) {
+        // Blob case.  Read the number of bytes as a vbr6.
+        unsigned NumElts = ReadVBR(6);
+        SkipToWord();  // 32-bit alignment
+
+        // Figure out where the end of this blob will be including tail padding.
+        const unsigned char *NewEnd = NextChar+((NumElts+3)&~3);
+        
+        // If this would read off the end of the bitcode file, just set the
+        // record to empty and return.
+        if (NewEnd > BitStream->getLastChar()) {
+          Vals.append(NumElts, 0);
+          NextChar = BitStream->getLastChar();
+          break;
+        }
+        
+        // Otherwise, read the number of bytes.  If we can return a reference to
+        // the data, do so to avoid copying it.
+        if (BlobStart) {
+          *BlobStart = (const char*)NextChar;
+          *BlobLen = NumElts;
+        } else {
+          for (; NumElts; ++NextChar, --NumElts)
+            Vals.push_back(*NextChar);
+        }
+        // Skip over tail padding.
+        NextChar = NewEnd;
+      } else {
+        ReadAbbreviatedField(Op, Vals);
+      }
+    }
+
+    unsigned Code = (unsigned)Vals[0];
+    Vals.erase(Vals.begin());
+    return Code;
+  }
+
+  unsigned ReadRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
+                      const char *&BlobStart, unsigned &BlobLen) {
+    return ReadRecord(AbbrevID, Vals, &BlobStart, &BlobLen);
+  }
+
+  
+  //===--------------------------------------------------------------------===//
+  // Abbrev Processing
+  //===--------------------------------------------------------------------===//
+
+  void ReadAbbrevRecord() {
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    unsigned NumOpInfo = ReadVBR(5);
+    for (unsigned i = 0; i != NumOpInfo; ++i) {
+      bool IsLiteral = Read(1) ? true : false;
+      if (IsLiteral) {
+        Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8)));
+        continue;
+      }
+
+      BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3);
+      if (BitCodeAbbrevOp::hasEncodingData(E))
+        Abbv->Add(BitCodeAbbrevOp(E, ReadVBR64(5)));
+      else
+        Abbv->Add(BitCodeAbbrevOp(E));
+    }
+    CurAbbrevs.push_back(Abbv);
+  }
+  
+public:
+
+  bool ReadBlockInfoBlock() {
+    // If this is the second stream to get to the block info block, skip it.
+    if (BitStream->hasBlockInfoRecords())
+      return SkipBlock();
+    
+    if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true;
+
+    SmallVector<uint64_t, 64> Record;
+    BitstreamReader::BlockInfo *CurBlockInfo = 0;
+
+    // Read all the records for this module.
+    while (1) {
+      unsigned Code = ReadCode();
+      if (Code == bitc::END_BLOCK)
+        return ReadBlockEnd();
+      if (Code == bitc::ENTER_SUBBLOCK) {
+        ReadSubBlockID();
+        if (SkipBlock()) return true;
+        continue;
+      }
+
+      // Read abbrev records, associate them with CurBID.
+      if (Code == bitc::DEFINE_ABBREV) {
+        if (!CurBlockInfo) return true;
+        ReadAbbrevRecord();
+
+        // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
+        // appropriate BlockInfo.
+        BitCodeAbbrev *Abbv = CurAbbrevs.back();
+        CurAbbrevs.pop_back();
+        CurBlockInfo->Abbrevs.push_back(Abbv);
+        continue;
+      }
+
+      // Read a record.
+      Record.clear();
+      switch (ReadRecord(Code, Record)) {
+      default: break;  // Default behavior, ignore unknown content.
+      case bitc::BLOCKINFO_CODE_SETBID:
+        if (Record.size() < 1) return true;
+        CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]);
+        break;
+      case bitc::BLOCKINFO_CODE_BLOCKNAME: {
+        if (!CurBlockInfo) return true;
+        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
+        std::string Name;
+        for (unsigned i = 0, e = Record.size(); i != e; ++i)
+          Name += (char)Record[i];
+        CurBlockInfo->Name = Name;
+        break;
+      }
+      case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
+        if (!CurBlockInfo) return true;
+        if (BitStream->isIgnoringBlockInfoNames()) break;  // Ignore name.
+        std::string Name;
+        for (unsigned i = 1, e = Record.size(); i != e; ++i)
+          Name += (char)Record[i];
+        CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0],
+                                                           Name));
+        break;
+      }
+      }
+    }
+  }
+};
+  
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Bitcode/BitstreamWriter.h b/final/include/llvm/Bitcode/BitstreamWriter.h
new file mode 100644
index 00000000000..bfb3a4e49c5
--- /dev/null
+++ b/final/include/llvm/Bitcode/BitstreamWriter.h
@@ -0,0 +1,533 @@
+//===- BitstreamWriter.h - Low-level bitstream writer interface -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitstreamWriter class.  This class can be used to
+// write an arbitrary bitstream, regardless of its contents.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BITSTREAM_WRITER_H
+#define BITSTREAM_WRITER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Bitcode/BitCodes.h"
+#include <vector>
+
+namespace llvm {
+
+class BitstreamWriter {
+  std::vector<unsigned char> &Out;
+
+  /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
+  unsigned CurBit;
+
+  /// CurValue - The current value.  Only bits < CurBit are valid.
+  uint32_t CurValue;
+
+  /// CurCodeSize - This is the declared size of code values used for the
+  /// current block, in bits.
+  unsigned CurCodeSize;
+
+  /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
+  /// selected BLOCK ID.
+  unsigned BlockInfoCurBID;
+
+  /// CurAbbrevs - Abbrevs installed at in this block.
+  std::vector<BitCodeAbbrev*> CurAbbrevs;
+
+  struct Block {
+    unsigned PrevCodeSize;
+    unsigned StartSizeWord;
+    std::vector<BitCodeAbbrev*> PrevAbbrevs;
+    Block(unsigned PCS, unsigned SSW) : PrevCodeSize(PCS), StartSizeWord(SSW) {}
+  };
+
+  /// BlockScope - This tracks the current blocks that we have entered.
+  std::vector<Block> BlockScope;
+
+  /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
+  /// These describe abbreviations that all blocks of the specified ID inherit.
+  struct BlockInfo {
+    unsigned BlockID;
+    std::vector<BitCodeAbbrev*> Abbrevs;
+  };
+  std::vector<BlockInfo> BlockInfoRecords;
+
+public:
+  explicit BitstreamWriter(std::vector<unsigned char> &O)
+    : Out(O), CurBit(0), CurValue(0), CurCodeSize(2) {}
+
+  ~BitstreamWriter() {
+    assert(CurBit == 0 && "Unflused data remaining");
+    assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance");
+
+    // Free the BlockInfoRecords.
+    while (!BlockInfoRecords.empty()) {
+      BlockInfo &Info = BlockInfoRecords.back();
+      // Free blockinfo abbrev info.
+      for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
+           i != e; ++i)
+        Info.Abbrevs[i]->dropRef();
+      BlockInfoRecords.pop_back();
+    }
+  }
+
+  std::vector<unsigned char> &getBuffer() { return Out; }
+
+  /// \brief Retrieve the current position in the stream, in bits.
+  uint64_t GetCurrentBitNo() const { return Out.size() * 8 + CurBit; }
+
+  //===--------------------------------------------------------------------===//
+  // Basic Primitives for emitting bits to the stream.
+  //===--------------------------------------------------------------------===//
+
+  void Emit(uint32_t Val, unsigned NumBits) {
+    assert(NumBits && NumBits <= 32 && "Invalid value size!");
+    assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
+    CurValue |= Val << CurBit;
+    if (CurBit + NumBits < 32) {
+      CurBit += NumBits;
+      return;
+    }
+
+    // Add the current word.
+    unsigned V = CurValue;
+    Out.push_back((unsigned char)(V >>  0));
+    Out.push_back((unsigned char)(V >>  8));
+    Out.push_back((unsigned char)(V >> 16));
+    Out.push_back((unsigned char)(V >> 24));
+
+    if (CurBit)
+      CurValue = Val >> (32-CurBit);
+    else
+      CurValue = 0;
+    CurBit = (CurBit+NumBits) & 31;
+  }
+
+  void Emit64(uint64_t Val, unsigned NumBits) {
+    if (NumBits <= 32)
+      Emit((uint32_t)Val, NumBits);
+    else {
+      Emit((uint32_t)Val, 32);
+      Emit((uint32_t)(Val >> 32), NumBits-32);
+    }
+  }
+
+  void FlushToWord() {
+    if (CurBit) {
+      unsigned V = CurValue;
+      Out.push_back((unsigned char)(V >>  0));
+      Out.push_back((unsigned char)(V >>  8));
+      Out.push_back((unsigned char)(V >> 16));
+      Out.push_back((unsigned char)(V >> 24));
+      CurBit = 0;
+      CurValue = 0;
+    }
+  }
+
+  void EmitVBR(uint32_t Val, unsigned NumBits) {
+    uint32_t Threshold = 1U << (NumBits-1);
+
+    // Emit the bits with VBR encoding, NumBits-1 bits at a time.
+    while (Val >= Threshold) {
+      Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits);
+      Val >>= NumBits-1;
+    }
+
+    Emit(Val, NumBits);
+  }
+
+  void EmitVBR64(uint64_t Val, unsigned NumBits) {
+    if ((uint32_t)Val == Val)
+      return EmitVBR((uint32_t)Val, NumBits);
+
+    uint64_t Threshold = 1U << (NumBits-1);
+
+    // Emit the bits with VBR encoding, NumBits-1 bits at a time.
+    while (Val >= Threshold) {
+      Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) |
+           (1 << (NumBits-1)), NumBits);
+      Val >>= NumBits-1;
+    }
+
+    Emit((uint32_t)Val, NumBits);
+  }
+
+  /// EmitCode - Emit the specified code.
+  void EmitCode(unsigned Val) {
+    Emit(Val, CurCodeSize);
+  }
+
+  // BackpatchWord - Backpatch a 32-bit word in the output with the specified
+  // value.
+  void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
+    Out[ByteNo++] = (unsigned char)(NewWord >>  0);
+    Out[ByteNo++] = (unsigned char)(NewWord >>  8);
+    Out[ByteNo++] = (unsigned char)(NewWord >> 16);
+    Out[ByteNo  ] = (unsigned char)(NewWord >> 24);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Block Manipulation
+  //===--------------------------------------------------------------------===//
+
+  /// getBlockInfo - If there is block info for the specified ID, return it,
+  /// otherwise return null.
+  BlockInfo *getBlockInfo(unsigned BlockID) {
+    // Common case, the most recent entry matches BlockID.
+    if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
+      return &BlockInfoRecords.back();
+
+    for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
+         i != e; ++i)
+      if (BlockInfoRecords[i].BlockID == BlockID)
+        return &BlockInfoRecords[i];
+    return 0;
+  }
+
+  void EnterSubblock(unsigned BlockID, unsigned CodeLen) {
+    // Block header:
+    //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
+    EmitCode(bitc::ENTER_SUBBLOCK);
+    EmitVBR(BlockID, bitc::BlockIDWidth);
+    EmitVBR(CodeLen, bitc::CodeLenWidth);
+    FlushToWord();
+
+    unsigned BlockSizeWordLoc = static_cast<unsigned>(Out.size());
+    unsigned OldCodeSize = CurCodeSize;
+
+    // Emit a placeholder, which will be replaced when the block is popped.
+    Emit(0, bitc::BlockSizeWidth);
+
+    CurCodeSize = CodeLen;
+
+    // Push the outer block's abbrev set onto the stack, start out with an
+    // empty abbrev set.
+    BlockScope.push_back(Block(OldCodeSize, BlockSizeWordLoc/4));
+    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+
+    // If there is a blockinfo for this BlockID, add all the predefined abbrevs
+    // to the abbrev list.
+    if (BlockInfo *Info = getBlockInfo(BlockID)) {
+      for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size());
+           i != e; ++i) {
+        CurAbbrevs.push_back(Info->Abbrevs[i]);
+        Info->Abbrevs[i]->addRef();
+      }
+    }
+  }
+
+  void ExitBlock() {
+    assert(!BlockScope.empty() && "Block scope imbalance!");
+
+    // Delete all abbrevs.
+    for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
+         i != e; ++i)
+      CurAbbrevs[i]->dropRef();
+
+    const Block &B = BlockScope.back();
+
+    // Block tail:
+    //    [END_BLOCK, <align4bytes>]
+    EmitCode(bitc::END_BLOCK);
+    FlushToWord();
+
+    // Compute the size of the block, in words, not counting the size field.
+    unsigned SizeInWords= static_cast<unsigned>(Out.size())/4-B.StartSizeWord-1;
+    unsigned ByteNo = B.StartSizeWord*4;
+
+    // Update the block size field in the header of this sub-block.
+    BackpatchWord(ByteNo, SizeInWords);
+
+    // Restore the inner block's code size and abbrev table.
+    CurCodeSize = B.PrevCodeSize;
+    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+    BlockScope.pop_back();
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Record Emission
+  //===--------------------------------------------------------------------===//
+
+private:
+  /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev
+  /// record.  This is a no-op, since the abbrev specifies the literal to use. 
+  template<typename uintty>
+  void EmitAbbreviatedLiteral(const BitCodeAbbrevOp &Op, uintty V) {
+    assert(Op.isLiteral() && "Not a literal");
+    // If the abbrev specifies the literal value to use, don't emit
+    // anything.
+    assert(V == Op.getLiteralValue() &&
+           "Invalid abbrev for record!");
+  }
+  
+  /// EmitAbbreviatedField - Emit a single scalar field value with the specified
+  /// encoding.
+  template<typename uintty>
+  void EmitAbbreviatedField(const BitCodeAbbrevOp &Op, uintty V) {
+    assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!");
+    
+    // Encode the value as we are commanded.
+    switch (Op.getEncoding()) {
+    default: assert(0 && "Unknown encoding!");
+    case BitCodeAbbrevOp::Fixed:
+      if (Op.getEncodingData())
+        Emit((unsigned)V, (unsigned)Op.getEncodingData());
+      break;
+    case BitCodeAbbrevOp::VBR:
+      if (Op.getEncodingData())
+        EmitVBR64(V, (unsigned)Op.getEncodingData());
+      break;
+    case BitCodeAbbrevOp::Char6:
+      Emit(BitCodeAbbrevOp::EncodeChar6((char)V), 6);
+      break;
+    }
+  }
+  
+  /// EmitRecordWithAbbrevImpl - This is the core implementation of the record
+  /// emission code.  If BlobData is non-null, then it specifies an array of
+  /// data that should be emitted as part of the Blob or Array operand that is
+  /// known to exist at the end of the record.
+  template<typename uintty>
+  void EmitRecordWithAbbrevImpl(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
+                                StringRef Blob) {
+    const char *BlobData = Blob.data();
+    unsigned BlobLen = (unsigned) Blob.size();
+    unsigned AbbrevNo = Abbrev-bitc::FIRST_APPLICATION_ABBREV;
+    assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
+    BitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo];
+
+    EmitCode(Abbrev);
+
+    unsigned RecordIdx = 0;
+    for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
+         i != e; ++i) {
+      const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+      if (Op.isLiteral()) {
+        assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
+        EmitAbbreviatedLiteral(Op, Vals[RecordIdx]);
+        ++RecordIdx;
+      } else if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
+        // Array case.
+        assert(i+2 == e && "array op not second to last?");
+        const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
+
+        // If this record has blob data, emit it, otherwise we must have record
+        // entries to encode this way.
+        if (BlobData) {
+          assert(RecordIdx == Vals.size() &&
+                 "Blob data and record entries specified for array!");
+          // Emit a vbr6 to indicate the number of elements present.
+          EmitVBR(static_cast<uint32_t>(BlobLen), 6);
+          
+          // Emit each field.
+          for (unsigned i = 0; i != BlobLen; ++i)
+            EmitAbbreviatedField(EltEnc, (unsigned char)BlobData[i]);
+          
+          // Know that blob data is consumed for assertion below.
+          BlobData = 0;
+        } else {
+          // Emit a vbr6 to indicate the number of elements present.
+          EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
+
+          // Emit each field.
+          for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx)
+            EmitAbbreviatedField(EltEnc, Vals[RecordIdx]);
+        }
+      } else if (Op.getEncoding() == BitCodeAbbrevOp::Blob) {
+        // If this record has blob data, emit it, otherwise we must have record
+        // entries to encode this way.
+        
+        // Emit a vbr6 to indicate the number of elements present.
+        if (BlobData) {
+          EmitVBR(static_cast<uint32_t>(BlobLen), 6);
+          assert(RecordIdx == Vals.size() &&
+                 "Blob data and record entries specified for blob operand!");
+        } else {
+          EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
+        }
+        
+        // Flush to a 32-bit alignment boundary.
+        FlushToWord();
+        assert((Out.size() & 3) == 0 && "Not 32-bit aligned");
+
+        // Emit each field as a literal byte.
+        if (BlobData) {
+          for (unsigned i = 0; i != BlobLen; ++i)
+            Out.push_back((unsigned char)BlobData[i]);
+          
+          // Know that blob data is consumed for assertion below.
+          BlobData = 0;
+        } else {
+          for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx) {
+            assert(Vals[RecordIdx] < 256 && "Value too large to emit as blob");
+            Out.push_back((unsigned char)Vals[RecordIdx]);
+          }
+        }
+        // Align end to 32-bits.
+        while (Out.size() & 3)
+          Out.push_back(0);
+        
+      } else {  // Single scalar field.
+        assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
+        EmitAbbreviatedField(Op, Vals[RecordIdx]);
+        ++RecordIdx;
+      }
+    }
+    assert(RecordIdx == Vals.size() && "Not all record operands emitted!");
+    assert(BlobData == 0 &&
+           "Blob data specified for record that doesn't use it!");
+  }
+  
+public:
+
+  /// EmitRecord - Emit the specified record to the stream, using an abbrev if
+  /// we have one to compress the output.
+  template<typename uintty>
+  void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals,
+                  unsigned Abbrev = 0) {
+    if (!Abbrev) {
+      // If we don't have an abbrev to use, emit this in its fully unabbreviated
+      // form.
+      EmitCode(bitc::UNABBREV_RECORD);
+      EmitVBR(Code, 6);
+      EmitVBR(static_cast<uint32_t>(Vals.size()), 6);
+      for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i)
+        EmitVBR64(Vals[i], 6);
+      return;
+    }
+
+    // Insert the code into Vals to treat it uniformly.
+    Vals.insert(Vals.begin(), Code);
+    
+    EmitRecordWithAbbrev(Abbrev, Vals);
+  }
+  
+  /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
+  /// Unlike EmitRecord, the code for the record should be included in Vals as
+  /// the first entry.
+  template<typename uintty>
+  void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) {
+    EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef());
+  }
+  
+  /// EmitRecordWithBlob - Emit the specified record to the stream, using an
+  /// abbrev that includes a blob at the end.  The blob data to emit is
+  /// specified by the pointer and length specified at the end.  In contrast to
+  /// EmitRecord, this routine expects that the first entry in Vals is the code
+  /// of the record.
+  template<typename uintty>
+  void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
+                          StringRef Blob) {
+    EmitRecordWithAbbrevImpl(Abbrev, Vals, Blob);
+  }
+  template<typename uintty>
+  void EmitRecordWithBlob(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
+                          const char *BlobData, unsigned BlobLen) {
+    return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(BlobData, BlobLen));
+  }
+
+  /// EmitRecordWithArray - Just like EmitRecordWithBlob, works with records
+  /// that end with an array.
+  template<typename uintty>
+  void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
+                          StringRef Array) {
+    EmitRecordWithAbbrevImpl(Abbrev, Vals, Array);
+  }
+  template<typename uintty>
+  void EmitRecordWithArray(unsigned Abbrev, SmallVectorImpl<uintty> &Vals,
+                          const char *ArrayData, unsigned ArrayLen) {
+    return EmitRecordWithAbbrevImpl(Abbrev, Vals, StringRef(ArrayData, 
+                                                            ArrayLen));
+  }
+  
+  //===--------------------------------------------------------------------===//
+  // Abbrev Emission
+  //===--------------------------------------------------------------------===//
+
+private:
+  // Emit the abbreviation as a DEFINE_ABBREV record.
+  void EncodeAbbrev(BitCodeAbbrev *Abbv) {
+    EmitCode(bitc::DEFINE_ABBREV);
+    EmitVBR(Abbv->getNumOperandInfos(), 5);
+    for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
+         i != e; ++i) {
+      const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+      Emit(Op.isLiteral(), 1);
+      if (Op.isLiteral()) {
+        EmitVBR64(Op.getLiteralValue(), 8);
+      } else {
+        Emit(Op.getEncoding(), 3);
+        if (Op.hasEncodingData())
+          EmitVBR64(Op.getEncodingData(), 5);
+      }
+    }
+  }
+public:
+
+  /// EmitAbbrev - This emits an abbreviation to the stream.  Note that this
+  /// method takes ownership of the specified abbrev.
+  unsigned EmitAbbrev(BitCodeAbbrev *Abbv) {
+    // Emit the abbreviation as a record.
+    EncodeAbbrev(Abbv);
+    CurAbbrevs.push_back(Abbv);
+    return static_cast<unsigned>(CurAbbrevs.size())-1 +
+      bitc::FIRST_APPLICATION_ABBREV;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // BlockInfo Block Emission
+  //===--------------------------------------------------------------------===//
+
+  /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
+  void EnterBlockInfoBlock(unsigned CodeWidth) {
+    EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth);
+    BlockInfoCurBID = -1U;
+  }
+private:
+  /// SwitchToBlockID - If we aren't already talking about the specified block
+  /// ID, emit a BLOCKINFO_CODE_SETBID record.
+  void SwitchToBlockID(unsigned BlockID) {
+    if (BlockInfoCurBID == BlockID) return;
+    SmallVector<unsigned, 2> V;
+    V.push_back(BlockID);
+    EmitRecord(bitc::BLOCKINFO_CODE_SETBID, V);
+    BlockInfoCurBID = BlockID;
+  }
+
+  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
+    if (BlockInfo *BI = getBlockInfo(BlockID))
+      return *BI;
+
+    // Otherwise, add a new record.
+    BlockInfoRecords.push_back(BlockInfo());
+    BlockInfoRecords.back().BlockID = BlockID;
+    return BlockInfoRecords.back();
+  }
+
+public:
+
+  /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified
+  /// BlockID.
+  unsigned EmitBlockInfoAbbrev(unsigned BlockID, BitCodeAbbrev *Abbv) {
+    SwitchToBlockID(BlockID);
+    EncodeAbbrev(Abbv);
+
+    // Add the abbrev to the specified block record.
+    BlockInfo &Info = getOrCreateBlockInfo(BlockID);
+    Info.Abbrevs.push_back(Abbv);
+
+    return Info.Abbrevs.size()-1+bitc::FIRST_APPLICATION_ABBREV;
+  }
+};
+
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Bitcode/LLVMBitCodes.h b/final/include/llvm/Bitcode/LLVMBitCodes.h
new file mode 100644
index 00000000000..7692bd28720
--- /dev/null
+++ b/final/include/llvm/Bitcode/LLVMBitCodes.h
@@ -0,0 +1,266 @@
+//===- LLVMBitCodes.h - Enum values for the LLVM bitcode format -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines Bitcode enum values for LLVM IR bitcode files.
+//
+// The enum values defined in this file should be considered permanent.  If
+// new features are added, they should have values added at the end of the
+// respective lists.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_LLVMBITCODES_H
+#define LLVM_BITCODE_LLVMBITCODES_H
+
+#include "llvm/Bitcode/BitCodes.h"
+
+namespace llvm {
+namespace bitc {
+  // The only top-level block type defined is for a module.
+  enum BlockIDs {
+    // Blocks
+    MODULE_BLOCK_ID          = FIRST_APPLICATION_BLOCKID,
+
+    // Module sub-block id's.
+    PARAMATTR_BLOCK_ID,
+    TYPE_BLOCK_ID,
+    CONSTANTS_BLOCK_ID,
+    FUNCTION_BLOCK_ID,
+    TYPE_SYMTAB_BLOCK_ID,
+    VALUE_SYMTAB_BLOCK_ID,
+    METADATA_BLOCK_ID,
+    METADATA_ATTACHMENT_ID
+  };
+
+
+  /// MODULE blocks have a number of optional fields and subblocks.
+  enum ModuleCodes {
+    MODULE_CODE_VERSION     = 1,    // VERSION:     [version#]
+    MODULE_CODE_TRIPLE      = 2,    // TRIPLE:      [strchr x N]
+    MODULE_CODE_DATALAYOUT  = 3,    // DATALAYOUT:  [strchr x N]
+    MODULE_CODE_ASM         = 4,    // ASM:         [strchr x N]
+    MODULE_CODE_SECTIONNAME = 5,    // SECTIONNAME: [strchr x N]
+    MODULE_CODE_DEPLIB      = 6,    // DEPLIB:      [strchr x N]
+
+    // GLOBALVAR: [pointer type, isconst, initid,
+    //             linkage, alignment, section, visibility, threadlocal]
+    MODULE_CODE_GLOBALVAR   = 7,
+
+    // FUNCTION:  [type, callingconv, isproto, linkage, paramattrs, alignment,
+    //             section, visibility]
+    MODULE_CODE_FUNCTION    = 8,
+
+    // ALIAS: [alias type, aliasee val#, linkage]
+    MODULE_CODE_ALIAS       = 9,
+
+    /// MODULE_CODE_PURGEVALS: [numvals]
+    MODULE_CODE_PURGEVALS   = 10,
+
+    MODULE_CODE_GCNAME      = 11   // GCNAME: [strchr x N]
+  };
+
+  /// PARAMATTR blocks have code for defining a parameter attribute set.
+  enum AttributeCodes {
+    PARAMATTR_CODE_ENTRY = 1   // ENTRY: [paramidx0, attr0, paramidx1, attr1...]
+  };
+
+  /// TYPE blocks have codes for each type primitive they use.
+  enum TypeCodes {
+    TYPE_CODE_NUMENTRY =  1,   // NUMENTRY: [numentries]
+
+    // Type Codes
+    TYPE_CODE_VOID     =  2,   // VOID
+    TYPE_CODE_FLOAT    =  3,   // FLOAT
+    TYPE_CODE_DOUBLE   =  4,   // DOUBLE
+    TYPE_CODE_LABEL    =  5,   // LABEL
+    TYPE_CODE_OPAQUE   =  6,   // OPAQUE
+    TYPE_CODE_INTEGER  =  7,   // INTEGER: [width]
+    TYPE_CODE_POINTER  =  8,   // POINTER: [pointee type]
+    TYPE_CODE_FUNCTION =  9,   // FUNCTION: [vararg, retty, paramty x N]
+    TYPE_CODE_STRUCT   = 10,   // STRUCT: [ispacked, eltty x N]
+    TYPE_CODE_ARRAY    = 11,   // ARRAY: [numelts, eltty]
+    TYPE_CODE_VECTOR   = 12,   // VECTOR: [numelts, eltty]
+
+    // These are not with the other floating point types because they're
+    // a late addition, and putting them in the right place breaks
+    // binary compatibility.
+    TYPE_CODE_X86_FP80 = 13,   // X86 LONG DOUBLE
+    TYPE_CODE_FP128    = 14,   // LONG DOUBLE (112 bit mantissa)
+    TYPE_CODE_PPC_FP128= 15,   // PPC LONG DOUBLE (2 doubles)
+
+    TYPE_CODE_METADATA = 16,   // METADATA
+
+    TYPE_CODE_X86_MMX = 17     // X86 MMX
+  };
+
+  // The type symbol table only has one code (TST_ENTRY_CODE).
+  enum TypeSymtabCodes {
+    TST_CODE_ENTRY = 1     // TST_ENTRY: [typeid, namechar x N]
+  };
+
+  // The value symbol table only has one code (VST_ENTRY_CODE).
+  enum ValueSymtabCodes {
+    VST_CODE_ENTRY   = 1,  // VST_ENTRY: [valid, namechar x N]
+    VST_CODE_BBENTRY = 2   // VST_BBENTRY: [bbid, namechar x N]
+  };
+
+  enum MetadataCodes {
+    METADATA_STRING        = 1,   // MDSTRING:      [values]
+    // FIXME: Remove NODE in favor of NODE2 in LLVM 3.0
+    METADATA_NODE          = 2,   // NODE with potentially invalid metadata
+    // FIXME: Remove FN_NODE in favor of FN_NODE2 in LLVM 3.0
+    METADATA_FN_NODE       = 3,   // FN_NODE with potentially invalid metadata
+    METADATA_NAME          = 4,   // STRING:        [values]
+    // FIXME: Remove NAMED_NODE in favor of NAMED_NODE2 in LLVM 3.0
+    METADATA_NAMED_NODE    = 5,   // NAMED_NODE with potentially invalid metadata
+    METADATA_KIND          = 6,   // [n x [id, name]]
+    // FIXME: Remove ATTACHMENT in favor of ATTACHMENT2 in LLVM 3.0
+    METADATA_ATTACHMENT    = 7,   // ATTACHMENT with potentially invalid metadata
+    METADATA_NODE2         = 8,   // NODE2:         [n x (type num, value num)]
+    METADATA_FN_NODE2      = 9,   // FN_NODE2:      [n x (type num, value num)]
+    METADATA_NAMED_NODE2   = 10,  // NAMED_NODE2:   [n x mdnodes]
+    METADATA_ATTACHMENT2   = 11   // [m x [value, [n x [id, mdnode]]]
+  };
+  // The constants block (CONSTANTS_BLOCK_ID) describes emission for each
+  // constant and maintains an implicit current type value.
+  enum ConstantsCodes {
+    CST_CODE_SETTYPE       =  1,  // SETTYPE:       [typeid]
+    CST_CODE_NULL          =  2,  // NULL
+    CST_CODE_UNDEF         =  3,  // UNDEF
+    CST_CODE_INTEGER       =  4,  // INTEGER:       [intval]
+    CST_CODE_WIDE_INTEGER  =  5,  // WIDE_INTEGER:  [n x intval]
+    CST_CODE_FLOAT         =  6,  // FLOAT:         [fpval]
+    CST_CODE_AGGREGATE     =  7,  // AGGREGATE:     [n x value number]
+    CST_CODE_STRING        =  8,  // STRING:        [values]
+    CST_CODE_CSTRING       =  9,  // CSTRING:       [values]
+    CST_CODE_CE_BINOP      = 10,  // CE_BINOP:      [opcode, opval, opval]
+    CST_CODE_CE_CAST       = 11,  // CE_CAST:       [opcode, opty, opval]
+    CST_CODE_CE_GEP        = 12,  // CE_GEP:        [n x operands]
+    CST_CODE_CE_SELECT     = 13,  // CE_SELECT:     [opval, opval, opval]
+    CST_CODE_CE_EXTRACTELT = 14,  // CE_EXTRACTELT: [opty, opval, opval]
+    CST_CODE_CE_INSERTELT  = 15,  // CE_INSERTELT:  [opval, opval, opval]
+    CST_CODE_CE_SHUFFLEVEC = 16,  // CE_SHUFFLEVEC: [opval, opval, opval]
+    CST_CODE_CE_CMP        = 17,  // CE_CMP:        [opty, opval, opval, pred]
+    CST_CODE_INLINEASM     = 18,  // INLINEASM:     [sideeffect,asmstr,conststr]
+    CST_CODE_CE_SHUFVEC_EX = 19,  // SHUFVEC_EX:    [opty, opval, opval, opval]
+    CST_CODE_CE_INBOUNDS_GEP = 20,// INBOUNDS_GEP:  [n x operands]
+    CST_CODE_BLOCKADDRESS  = 21   // CST_CODE_BLOCKADDRESS [fnty, fnval, bb#]
+  };
+
+  /// CastOpcodes - These are values used in the bitcode files to encode which
+  /// cast a CST_CODE_CE_CAST or a XXX refers to.  The values of these enums
+  /// have no fixed relation to the LLVM IR enum values.  Changing these will
+  /// break compatibility with old files.
+  enum CastOpcodes {
+    CAST_TRUNC    =  0,
+    CAST_ZEXT     =  1,
+    CAST_SEXT     =  2,
+    CAST_FPTOUI   =  3,
+    CAST_FPTOSI   =  4,
+    CAST_UITOFP   =  5,
+    CAST_SITOFP   =  6,
+    CAST_FPTRUNC  =  7,
+    CAST_FPEXT    =  8,
+    CAST_PTRTOINT =  9,
+    CAST_INTTOPTR = 10,
+    CAST_BITCAST  = 11
+  };
+
+  /// BinaryOpcodes - These are values used in the bitcode files to encode which
+  /// binop a CST_CODE_CE_BINOP or a XXX refers to.  The values of these enums
+  /// have no fixed relation to the LLVM IR enum values.  Changing these will
+  /// break compatibility with old files.
+  enum BinaryOpcodes {
+    BINOP_ADD  =  0,
+    BINOP_SUB  =  1,
+    BINOP_MUL  =  2,
+    BINOP_UDIV =  3,
+    BINOP_SDIV =  4,    // overloaded for FP
+    BINOP_UREM =  5,
+    BINOP_SREM =  6,    // overloaded for FP
+    BINOP_SHL  =  7,
+    BINOP_LSHR =  8,
+    BINOP_ASHR =  9,
+    BINOP_AND  = 10,
+    BINOP_OR   = 11,
+    BINOP_XOR  = 12
+  };
+
+  /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing
+  /// OverflowingBinaryOperator's SubclassOptionalData contents.
+  enum OverflowingBinaryOperatorOptionalFlags {
+    OBO_NO_UNSIGNED_WRAP = 0,
+    OBO_NO_SIGNED_WRAP = 1
+  };
+
+  /// PossiblyExactOperatorOptionalFlags - Flags for serializing 
+  /// PossiblyExactOperator's SubclassOptionalData contents.
+  enum PossiblyExactOperatorOptionalFlags {
+    PEO_EXACT = 0
+  };
+
+  // The function body block (FUNCTION_BLOCK_ID) describes function bodies.  It
+  // can contain a constant block (CONSTANTS_BLOCK_ID).
+  enum FunctionCodes {
+    FUNC_CODE_DECLAREBLOCKS    =  1, // DECLAREBLOCKS: [n]
+
+    FUNC_CODE_INST_BINOP       =  2, // BINOP:      [opcode, ty, opval, opval]
+    FUNC_CODE_INST_CAST        =  3, // CAST:       [opcode, ty, opty, opval]
+    FUNC_CODE_INST_GEP         =  4, // GEP:        [n x operands]
+    FUNC_CODE_INST_SELECT      =  5, // SELECT:     [ty, opval, opval, opval]
+    FUNC_CODE_INST_EXTRACTELT  =  6, // EXTRACTELT: [opty, opval, opval]
+    FUNC_CODE_INST_INSERTELT   =  7, // INSERTELT:  [ty, opval, opval, opval]
+    FUNC_CODE_INST_SHUFFLEVEC  =  8, // SHUFFLEVEC: [ty, opval, opval, opval]
+    FUNC_CODE_INST_CMP         =  9, // CMP:        [opty, opval, opval, pred]
+
+    FUNC_CODE_INST_RET         = 10, // RET:        [opty,opval<both optional>]
+    FUNC_CODE_INST_BR          = 11, // BR:         [bb#, bb#, cond] or [bb#]
+    FUNC_CODE_INST_SWITCH      = 12, // SWITCH:     [opty, op0, op1, ...]
+    FUNC_CODE_INST_INVOKE      = 13, // INVOKE:     [attr, fnty, op0,op1, ...]
+    FUNC_CODE_INST_UNWIND      = 14, // UNWIND
+    FUNC_CODE_INST_UNREACHABLE = 15, // UNREACHABLE
+
+    FUNC_CODE_INST_PHI         = 16, // PHI:        [ty, val0,bb0, ...]
+    FUNC_CODE_INST_MALLOC      = 17, // MALLOC:     [instty, op, align]
+    FUNC_CODE_INST_FREE        = 18, // FREE:       [opty, op]
+    FUNC_CODE_INST_ALLOCA      = 19, // ALLOCA:     [instty, op, align]
+    FUNC_CODE_INST_LOAD        = 20, // LOAD:       [opty, op, align, vol]
+    // FIXME: Remove STORE in favor of STORE2 in LLVM 3.0
+    FUNC_CODE_INST_STORE       = 21, // STORE:      [valty,val,ptr, align, vol]
+    // FIXME: Remove CALL in favor of CALL2 in LLVM 3.0
+    FUNC_CODE_INST_CALL        = 22, // CALL with potentially invalid metadata
+    FUNC_CODE_INST_VAARG       = 23, // VAARG:      [valistty, valist, instty]
+    // This store code encodes the pointer type, rather than the value type
+    // this is so information only available in the pointer type (e.g. address
+    // spaces) is retained.
+    FUNC_CODE_INST_STORE2      = 24, // STORE:      [ptrty,ptr,val, align, vol]
+    // FIXME: Remove GETRESULT in favor of EXTRACTVAL in LLVM 3.0
+    FUNC_CODE_INST_GETRESULT   = 25, // GETRESULT:  [ty, opval, n]
+    FUNC_CODE_INST_EXTRACTVAL  = 26, // EXTRACTVAL: [n x operands]
+    FUNC_CODE_INST_INSERTVAL   = 27, // INSERTVAL:  [n x operands]
+    // fcmp/icmp returning Int1TY or vector of Int1Ty. Same as CMP, exists to
+    // support legacy vicmp/vfcmp instructions.
+    FUNC_CODE_INST_CMP2        = 28, // CMP2:       [opty, opval, opval, pred]
+    // new select on i1 or [N x i1]
+    FUNC_CODE_INST_VSELECT     = 29, // VSELECT:    [ty,opval,opval,predty,pred]
+    FUNC_CODE_INST_INBOUNDS_GEP= 30, // INBOUNDS_GEP: [n x operands]
+    FUNC_CODE_INST_INDIRECTBR  = 31, // INDIRECTBR: [opty, op0, op1, ...]
+    
+    // FIXME: Remove DEBUG_LOC in favor of DEBUG_LOC2 in LLVM 3.0
+    FUNC_CODE_DEBUG_LOC        = 32, // DEBUG_LOC with potentially invalid metadata
+    FUNC_CODE_DEBUG_LOC_AGAIN  = 33, // DEBUG_LOC_AGAIN
+
+    FUNC_CODE_INST_CALL2       = 34, // CALL2:      [attr, fnty, fnid, args...]
+
+    FUNC_CODE_DEBUG_LOC2       = 35  // DEBUG_LOC2: [Line,Col,ScopeVal, IAVal]
+  };
+} // End bitc namespace
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Bitcode/ReaderWriter.h b/final/include/llvm/Bitcode/ReaderWriter.h
new file mode 100644
index 00000000000..fa754c01462
--- /dev/null
+++ b/final/include/llvm/Bitcode/ReaderWriter.h
@@ -0,0 +1,145 @@
+//===-- llvm/Bitcode/ReaderWriter.h - Bitcode reader/writers ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines interfaces to read and write LLVM bitcode files/streams.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_H
+#define LLVM_BITCODE_H
+
+#include <string>
+
+namespace llvm {
+  class Module;
+  class MemoryBuffer;
+  class ModulePass;
+  class BitstreamWriter;
+  class LLVMContext;
+  class raw_ostream;
+  
+  /// getLazyBitcodeModule - Read the header of the specified bitcode buffer
+  /// and prepare for lazy deserialization of function bodies.  If successful,
+  /// this takes ownership of 'buffer' and returns a non-null pointer.  On
+  /// error, this returns null, *does not* take ownership of Buffer, and fills
+  /// in *ErrMsg with an error description if ErrMsg is non-null.
+  Module *getLazyBitcodeModule(MemoryBuffer *Buffer,
+                               LLVMContext& Context,
+                               std::string *ErrMsg = 0);
+
+  /// getBitcodeTargetTriple - Read the header of the specified bitcode
+  /// buffer and extract just the triple information. If successful,
+  /// this returns a string and *does not* take ownership
+  /// of 'buffer'. On error, this returns "", and fills in *ErrMsg
+  /// if ErrMsg is non-null.
+  std::string getBitcodeTargetTriple(MemoryBuffer *Buffer,
+                                     LLVMContext& Context,
+                                     std::string *ErrMsg = 0);
+
+  /// ParseBitcodeFile - Read the specified bitcode file, returning the module.
+  /// If an error occurs, this returns null and fills in *ErrMsg if it is
+  /// non-null.  This method *never* takes ownership of Buffer.
+  Module *ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
+                           std::string *ErrMsg = 0);
+
+  /// WriteBitcodeToFile - Write the specified module to the specified
+  /// raw output stream.  For streams where it matters, the given stream
+  /// should be in "binary" mode.
+  void WriteBitcodeToFile(const Module *M, raw_ostream &Out);
+
+  /// WriteBitcodeToStream - Write the specified module to the specified
+  /// raw output stream.
+  void WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream);
+
+  /// createBitcodeWriterPass - Create and return a pass that writes the module
+  /// to the specified ostream.
+  ModulePass *createBitcodeWriterPass(raw_ostream &Str);
+  
+  
+  /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
+  /// for an LLVM IR bitcode wrapper.
+  ///
+  static inline bool isBitcodeWrapper(const unsigned char *BufPtr,
+                                      const unsigned char *BufEnd) {
+    // See if you can find the hidden message in the magic bytes :-).
+    // (Hint: it's a little-endian encoding.)
+    return BufPtr != BufEnd &&
+           BufPtr[0] == 0xDE &&
+           BufPtr[1] == 0xC0 &&
+           BufPtr[2] == 0x17 &&
+           BufPtr[3] == 0x0B;
+  }
+
+  /// isRawBitcode - Return true if the given bytes are the magic bytes for
+  /// raw LLVM IR bitcode (without a wrapper).
+  ///
+  static inline bool isRawBitcode(const unsigned char *BufPtr,
+                                  const unsigned char *BufEnd) {
+    // These bytes sort of have a hidden message, but it's not in
+    // little-endian this time, and it's a little redundant.
+    return BufPtr != BufEnd &&
+           BufPtr[0] == 'B' &&
+           BufPtr[1] == 'C' &&
+           BufPtr[2] == 0xc0 &&
+           BufPtr[3] == 0xde;
+  }
+
+  /// isBitcode - Return true if the given bytes are the magic bytes for
+  /// LLVM IR bitcode, either with or without a wrapper.
+  ///
+  static bool inline isBitcode(const unsigned char *BufPtr,
+                               const unsigned char *BufEnd) {
+    return isBitcodeWrapper(BufPtr, BufEnd) ||
+           isRawBitcode(BufPtr, BufEnd);
+  }
+
+  /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
+  /// header for padding or other reasons.  The format of this header is:
+  ///
+  /// struct bc_header {
+  ///   uint32_t Magic;         // 0x0B17C0DE
+  ///   uint32_t Version;       // Version, currently always 0.
+  ///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
+  ///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
+  ///   ... potentially other gunk ...
+  /// };
+  /// 
+  /// This function is called when we find a file with a matching magic number.
+  /// In this case, skip down to the subsection of the file that is actually a
+  /// BC file.
+  static inline bool SkipBitcodeWrapperHeader(unsigned char *&BufPtr,
+                                              unsigned char *&BufEnd) {
+    enum {
+      KnownHeaderSize = 4*4,  // Size of header we read.
+      OffsetField = 2*4,      // Offset in bytes to Offset field.
+      SizeField = 3*4         // Offset in bytes to Size field.
+    };
+    
+    // Must contain the header!
+    if (BufEnd-BufPtr < KnownHeaderSize) return true;
+    
+    unsigned Offset = ( BufPtr[OffsetField  ]        |
+                       (BufPtr[OffsetField+1] << 8)  |
+                       (BufPtr[OffsetField+2] << 16) |
+                       (BufPtr[OffsetField+3] << 24));
+    unsigned Size   = ( BufPtr[SizeField    ]        |
+                       (BufPtr[SizeField  +1] << 8)  |
+                       (BufPtr[SizeField  +2] << 16) |
+                       (BufPtr[SizeField  +3] << 24));
+    
+    // Verify that Offset+Size fits in the file.
+    if (Offset+Size > unsigned(BufEnd-BufPtr))
+      return true;
+    BufPtr += Offset;
+    BufEnd = BufPtr+Size;
+    return false;
+  }
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CMakeLists.txt b/final/include/llvm/CMakeLists.txt
new file mode 100644
index 00000000000..0c3ca1cd0c5
--- /dev/null
+++ b/final/include/llvm/CMakeLists.txt
@@ -0,0 +1,21 @@
+set(LLVM_TARGET_DEFINITIONS Intrinsics.td)
+
+tablegen(Intrinsics.gen -gen-intrinsic)
+
+add_custom_target(intrinsics_gen ALL
+  DEPENDS ${llvm_builded_incs_dir}/Intrinsics.gen)
+set_target_properties(intrinsics_gen PROPERTIES FOLDER "Tablegenning")
+
+set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} intrinsics_gen PARENT_SCOPE)
+
+if( MSVC_IDE OR XCODE )
+  # Creates a dummy target containing all headers for the benefit of
+  # Visual Studio users.
+  file(GLOB_RECURSE headers *.h)
+  add_td_sources(headers)
+  add_library(llvm_headers_do_not_build EXCLUDE_FROM_ALL
+    # We need at least one source file:
+    ${LLVM_MAIN_SRC_DIR}/lib/Transforms/Hello/Hello.cpp
+    ${headers})
+  set_target_properties(llvm_headers_do_not_build PROPERTIES FOLDER "Misc")
+endif()
diff --git a/final/include/llvm/CallGraphSCCPass.h b/final/include/llvm/CallGraphSCCPass.h
new file mode 100644
index 00000000000..7154aa3259d
--- /dev/null
+++ b/final/include/llvm/CallGraphSCCPass.h
@@ -0,0 +1,104 @@
+//===- CallGraphSCCPass.h - Pass that operates BU on call graph -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CallGraphSCCPass class, which is used for passes which
+// are implemented as bottom-up traversals on the call graph.  Because there may
+// be cycles in the call graph, passes of this type operate on the call-graph in
+// SCC order: that is, they process function bottom-up, except for recursive
+// functions, which they process all at once.
+//
+// These passes are inherently interprocedural, and are required to keep the
+// call graph up-to-date if they do anything which could modify it.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CALL_GRAPH_SCC_PASS_H
+#define LLVM_CALL_GRAPH_SCC_PASS_H
+
+#include "llvm/Pass.h"
+#include "llvm/Analysis/CallGraph.h"
+
+namespace llvm {
+
+class CallGraphNode;
+class CallGraph;
+class PMStack;
+class CallGraphSCC;
+  
+class CallGraphSCCPass : public Pass {
+public:
+  explicit CallGraphSCCPass(char &pid) : Pass(PT_CallGraphSCC, pid) {}
+
+  /// createPrinterPass - Get a pass that prints the Module
+  /// corresponding to a CallGraph.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
+
+  /// doInitialization - This method is called before the SCC's of the program
+  /// has been processed, allowing the pass to do initialization as necessary.
+  virtual bool doInitialization(CallGraph &CG) {
+    return false;
+  }
+
+  /// runOnSCC - This method should be implemented by the subclass to perform
+  /// whatever action is necessary for the specified SCC.  Note that
+  /// non-recursive (or only self-recursive) functions will have an SCC size of
+  /// 1, where recursive portions of the call graph will have SCC size > 1.
+  ///
+  /// SCC passes that add or delete functions to the SCC are required to update
+  /// the SCC list, otherwise stale pointers may be dereferenced.
+  ///
+  virtual bool runOnSCC(CallGraphSCC &SCC) = 0;
+
+  /// doFinalization - This method is called after the SCC's of the program has
+  /// been processed, allowing the pass to do final cleanup as necessary.
+  virtual bool doFinalization(CallGraph &CG) {
+    return false;
+  }
+
+  /// Assign pass manager to manager this pass
+  virtual void assignPassManager(PMStack &PMS,
+                                 PassManagerType PMT);
+
+  ///  Return what kind of Pass Manager can manage this pass.
+  virtual PassManagerType getPotentialPassManagerType() const {
+    return PMT_CallGraphPassManager;
+  }
+
+  /// getAnalysisUsage - For this class, we declare that we require and preserve
+  /// the call graph.  If the derived class implements this method, it should
+  /// always explicitly call the implementation here.
+  virtual void getAnalysisUsage(AnalysisUsage &Info) const;
+};
+
+/// CallGraphSCC - This is a single SCC that a CallGraphSCCPass is run on. 
+class CallGraphSCC {
+  void *Context; // The CGPassManager object that is vending this.
+  std::vector<CallGraphNode*> Nodes;
+public:
+  CallGraphSCC(void *context) : Context(context) {}
+  
+  void initialize(CallGraphNode*const*I, CallGraphNode*const*E) {
+    Nodes.assign(I, E);
+  }
+  
+  bool isSingular() const { return Nodes.size() == 1; }
+  unsigned size() const { return Nodes.size(); }
+  
+  /// ReplaceNode - This informs the SCC and the pass manager that the specified
+  /// Old node has been deleted, and New is to be used in its place.
+  void ReplaceNode(CallGraphNode *Old, CallGraphNode *New);
+  
+  typedef std::vector<CallGraphNode*>::const_iterator iterator;
+  iterator begin() const { return Nodes.begin(); }
+  iterator end() const { return Nodes.end(); }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CallingConv.h b/final/include/llvm/CallingConv.h
new file mode 100644
index 00000000000..4c5ee626709
--- /dev/null
+++ b/final/include/llvm/CallingConv.h
@@ -0,0 +1,103 @@
+//===-- llvm/CallingConv.h - LLVM Calling Conventions -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines LLVM's set of calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CALLINGCONV_H
+#define LLVM_CALLINGCONV_H
+
+namespace llvm {
+
+/// CallingConv Namespace - This namespace contains an enum with a value for
+/// the well-known calling conventions.
+///
+namespace CallingConv {
+  /// A set of enums which specify the assigned numeric values for known llvm
+  /// calling conventions.
+  /// @brief LLVM Calling Convention Representation
+  enum ID {
+    /// C - The default llvm calling convention, compatible with C.  This
+    /// convention is the only calling convention that supports varargs calls.
+    /// As with typical C calling conventions, the callee/caller have to
+    /// tolerate certain amounts of prototype mismatch.
+    C = 0,
+
+    // Generic LLVM calling conventions.  None of these calling conventions
+    // support varargs calls, and all assume that the caller and callee
+    // prototype exactly match.
+
+    /// Fast - This calling convention attempts to make calls as fast as
+    /// possible (e.g. by passing things in registers).
+    Fast = 8,
+
+    // Cold - This calling convention attempts to make code in the caller as
+    // efficient as possible under the assumption that the call is not commonly
+    // executed.  As such, these calls often preserve all registers so that the
+    // call does not break any live ranges in the caller side.
+    Cold = 9,
+
+    // GHC - Calling convention used by the Glasgow Haskell Compiler (GHC).
+    GHC = 10,
+
+    // Target - This is the start of the target-specific calling conventions,
+    // e.g. fastcall and thiscall on X86.
+    FirstTargetCC = 64,
+
+    /// X86_StdCall - stdcall is the calling conventions mostly used by the
+    /// Win32 API. It is basically the same as the C convention with the
+    /// difference in that the callee is responsible for popping the arguments
+    /// from the stack.
+    X86_StdCall = 64,
+
+    /// X86_FastCall - 'fast' analog of X86_StdCall. Passes first two arguments
+    /// in ECX:EDX registers, others - via stack. Callee is responsible for
+    /// stack cleaning.
+    X86_FastCall = 65,
+
+    /// ARM_APCS - ARM Procedure Calling Standard calling convention (obsolete,
+    /// but still used on some targets).
+    ARM_APCS = 66,
+
+    /// ARM_AAPCS - ARM Architecture Procedure Calling Standard calling
+    /// convention (aka EABI). Soft float variant.
+    ARM_AAPCS = 67,
+
+    /// ARM_AAPCS_VFP - Same as ARM_AAPCS, but uses hard floating point ABI.
+    ARM_AAPCS_VFP = 68,
+
+    /// MSP430_INTR - Calling convention used for MSP430 interrupt routines.
+    MSP430_INTR = 69,
+
+    /// X86_ThisCall - Similar to X86_StdCall. Passes first argument in ECX,
+    /// others via stack. Callee is responsible for stack cleaning. MSVC uses
+    /// this by default for methods in its ABI.
+    X86_ThisCall = 70,
+
+    /// PTX_Kernel - Call to a PTX kernel.
+    /// Passes all arguments in parameter space.
+    PTX_Kernel = 71,
+
+    /// PTX_Device - Call to a PTX device function.
+    /// Passes all arguments in register or parameter space.
+    PTX_Device = 72,
+
+    /// MBLAZE_INTR - Calling convention used for MBlaze interrupt routines.
+    MBLAZE_INTR = 73,
+
+    /// MBLAZE_INTR - Calling convention used for MBlaze interrupt support
+    /// routines (i.e. GCC's save_volatiles attribute).
+    MBLAZE_SVOL = 74
+  };
+} // End CallingConv namespace
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/Analysis.h b/final/include/llvm/CodeGen/Analysis.h
new file mode 100644
index 00000000000..78bf9fc11aa
--- /dev/null
+++ b/final/include/llvm/CodeGen/Analysis.h
@@ -0,0 +1,85 @@
+//===- CodeGen/Analysis.h - CodeGen LLVM IR Analysis Utilities --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares several CodeGen-specific LLVM IR analysis utilties.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ANALYSIS_H
+#define LLVM_CODEGEN_ANALYSIS_H
+
+#include "llvm/Instructions.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/Support/CallSite.h"
+
+namespace llvm {
+
+class GlobalVariable;
+class TargetLowering;
+class SDNode;
+class SelectionDAG;
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned ComputeLinearIndex(const Type *Ty,
+                            const unsigned *Indices,
+                            const unsigned *IndicesEnd,
+                            unsigned CurIndex = 0);
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+                     SmallVectorImpl<EVT> &ValueVTs,
+                     SmallVectorImpl<uint64_t> *Offsets = 0,
+                     uint64_t StartingOffset = 0);
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *ExtractTypeInfo(Value *V);
+
+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
+/// processed uses a memory 'm' constraint.
+bool hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
+                               const TargetLowering &TLI);
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code.  This includes
+/// consideration of global floating-point math flags.
+///
+ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred);
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+///
+ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred);
+
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+bool isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
+                          const TargetLowering &TLI);
+
+bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+                          const TargetLowering &TLI);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/AsmPrinter.h b/final/include/llvm/CodeGen/AsmPrinter.h
new file mode 100644
index 00000000000..a071febb102
--- /dev/null
+++ b/final/include/llvm/CodeGen/AsmPrinter.h
@@ -0,0 +1,462 @@
+//===-- llvm/CodeGen/AsmPrinter.h - AsmPrinter Framework --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a class to be used as the base class for target specific
+// asm writers.  This class primarily handles common functionality used by
+// all asm writers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ASMPRINTER_H
+#define LLVM_CODEGEN_ASMPRINTER_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+  class BlockAddress;
+  class GCStrategy;
+  class Constant;
+  class ConstantArray;
+  class ConstantFP;
+  class ConstantInt;
+  class ConstantStruct;
+  class ConstantVector;
+  class GCMetadataPrinter;
+  class GlobalValue;
+  class GlobalVariable;
+  class MachineBasicBlock;
+  class MachineFunction;
+  class MachineInstr;
+  class MachineLocation;
+  class MachineLoopInfo;
+  class MachineLoop;
+  class MachineConstantPool;
+  class MachineConstantPoolEntry;
+  class MachineConstantPoolValue;
+  class MachineJumpTableInfo;
+  class MachineModuleInfo;
+  class MachineMove;
+  class MCAsmInfo;
+  class MCInst;
+  class MCContext;
+  class MCSection;
+  class MCStreamer;
+  class MCSymbol;
+  class MDNode;
+  class DwarfDebug;
+  class DwarfException;
+  class Mangler;
+  class TargetLoweringObjectFile;
+  class TargetData;
+  class TargetMachine;
+  class Twine;
+  class Type;
+
+  /// AsmPrinter - This class is intended to be used as a driving class for all
+  /// asm writers.
+  class AsmPrinter : public MachineFunctionPass {
+  public:
+    /// Target machine description.
+    ///
+    TargetMachine &TM;
+
+    /// Target Asm Printer information.
+    ///
+    const MCAsmInfo *MAI;
+
+    /// OutContext - This is the context for the output file that we are
+    /// streaming.  This owns all of the global MC-related objects for the
+    /// generated translation unit.
+    MCContext &OutContext;
+
+    /// OutStreamer - This is the MCStreamer object for the file we are
+    /// generating.  This contains the transient state for the current
+    /// translation unit that we are generating (such as the current section
+    /// etc).
+    MCStreamer &OutStreamer;
+
+    /// The current machine function.
+    const MachineFunction *MF;
+
+    /// MMI - This is a pointer to the current MachineModuleInfo.
+    MachineModuleInfo *MMI;
+
+    /// Name-mangler for global names.
+    ///
+    Mangler *Mang;
+
+    /// The symbol for the current function. This is recalculated at the
+    /// beginning of each call to runOnMachineFunction().
+    ///
+    MCSymbol *CurrentFnSym;
+
+  private:
+    // GCMetadataPrinters - The garbage collection metadata printer table.
+    void *GCMetadataPrinters;  // Really a DenseMap.
+
+    /// VerboseAsm - Emit comments in assembly output if this is true.
+    ///
+    bool VerboseAsm;
+    static char ID;
+
+    /// If VerboseAsm is set, a pointer to the loop info for this
+    /// function.
+    MachineLoopInfo *LI;
+
+    /// DD - If the target supports dwarf debug info, this pointer is non-null.
+    DwarfDebug *DD;
+
+    /// DE - If the target supports dwarf exception info, this pointer is
+    /// non-null.
+    DwarfException *DE;
+
+  protected:
+    explicit AsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
+
+  public:
+    virtual ~AsmPrinter();
+
+    /// isVerbose - Return true if assembly output should contain comments.
+    ///
+    bool isVerbose() const { return VerboseAsm; }
+
+    /// getFunctionNumber - Return a unique ID for the current function.
+    ///
+    unsigned getFunctionNumber() const;
+
+    /// getObjFileLowering - Return information about object file lowering.
+    const TargetLoweringObjectFile &getObjFileLowering() const;
+
+    /// getTargetData - Return information about data layout.
+    const TargetData &getTargetData() const;
+
+    /// getCurrentSection() - Return the current section we are emitting to.
+    const MCSection *getCurrentSection() const;
+
+
+    //===------------------------------------------------------------------===//
+    // MachineFunctionPass Implementation.
+    //===------------------------------------------------------------------===//
+
+    /// getAnalysisUsage - Record analysis usage.
+    ///
+    void getAnalysisUsage(AnalysisUsage &AU) const;
+
+    /// doInitialization - Set up the AsmPrinter when we are working on a new
+    /// module.  If your pass overrides this, it must make sure to explicitly
+    /// call this implementation.
+    bool doInitialization(Module &M);
+
+    /// doFinalization - Shut down the asmprinter.  If you override this in your
+    /// pass, you must make sure to call it explicitly.
+    bool doFinalization(Module &M);
+
+    /// runOnMachineFunction - Emit the specified function out to the
+    /// OutStreamer.
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      SetupMachineFunction(MF);
+      EmitFunctionHeader();
+      EmitFunctionBody();
+      return false;
+    }
+
+    //===------------------------------------------------------------------===//
+    // Coarse grained IR lowering routines.
+    //===------------------------------------------------------------------===//
+
+    /// SetupMachineFunction - This should be called when a new MachineFunction
+    /// is being processed from runOnMachineFunction.
+    void SetupMachineFunction(MachineFunction &MF);
+
+    /// EmitFunctionHeader - This method emits the header for the current
+    /// function.
+    void EmitFunctionHeader();
+
+    /// EmitFunctionBody - This method emits the body and trailer for a
+    /// function.
+    void EmitFunctionBody();
+
+    /// EmitConstantPool - Print to the current output stream assembly
+    /// representations of the constants in the constant pool MCP. This is
+    /// used to print out constants which have been "spilled to memory" by
+    /// the code generator.
+    ///
+    virtual void EmitConstantPool();
+
+    /// EmitJumpTableInfo - Print assembly representations of the jump tables
+    /// used by the current function to the current output stream.
+    ///
+    void EmitJumpTableInfo();
+
+    /// EmitGlobalVariable - Emit the specified global variable to the .s file.
+    virtual void EmitGlobalVariable(const GlobalVariable *GV);
+
+    /// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+    /// special global used by LLVM.  If so, emit it and return true, otherwise
+    /// do nothing and return false.
+    bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
+
+    /// EmitAlignment - Emit an alignment directive to the specified power of
+    /// two boundary.  For example, if you pass in 3 here, you will get an 8
+    /// byte alignment.  If a global value is specified, and if that global has
+    /// an explicit alignment requested, it will override the alignment request
+    /// if required for correctness.
+    ///
+    void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const;
+
+    /// EmitBasicBlockStart - This method prints the label for the specified
+    /// MachineBasicBlock, an alignment (if present) and a comment describing
+    /// it if appropriate.
+    void EmitBasicBlockStart(const MachineBasicBlock *MBB) const;
+
+    /// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+    void EmitGlobalConstant(const Constant *CV, unsigned AddrSpace = 0);
+
+
+    //===------------------------------------------------------------------===//
+    // Overridable Hooks
+    //===------------------------------------------------------------------===//
+
+    // Targets can, or in the case of EmitInstruction, must implement these to
+    // customize output.
+
+    /// EmitStartOfAsmFile - This virtual method can be overridden by targets
+    /// that want to emit something at the start of their file.
+    virtual void EmitStartOfAsmFile(Module &) {}
+
+    /// EmitEndOfAsmFile - This virtual method can be overridden by targets that
+    /// want to emit something at the end of their file.
+    virtual void EmitEndOfAsmFile(Module &) {}
+
+    /// EmitFunctionBodyStart - Targets can override this to emit stuff before
+    /// the first basic block in the function.
+    virtual void EmitFunctionBodyStart() {}
+
+    /// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+    /// the last basic block in the function.
+    virtual void EmitFunctionBodyEnd() {}
+
+    /// EmitInstruction - Targets should implement this to emit instructions.
+    virtual void EmitInstruction(const MachineInstr *) {
+      assert(0 && "EmitInstruction not implemented");
+    }
+
+    virtual void EmitFunctionEntryLabel();
+
+    virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
+
+    /// isBlockOnlyReachableByFallthough - Return true if the basic block has
+    /// exactly one predecessor and the control transfer mechanism between
+    /// the predecessor and this block is a fall-through.
+    virtual bool
+    isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
+
+    //===------------------------------------------------------------------===//
+    // Symbol Lowering Routines.
+    //===------------------------------------------------------------------===//
+  public:
+
+    /// GetTempSymbol - Return the MCSymbol corresponding to the assembler
+    /// temporary label with the specified stem and unique ID.
+    MCSymbol *GetTempSymbol(StringRef Name, unsigned ID) const;
+
+    /// GetTempSymbol - Return an assembler temporary label with the specified
+    /// stem.
+    MCSymbol *GetTempSymbol(StringRef Name) const;
+
+
+    /// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with
+    /// global value name as its base, with the specified suffix, and where the
+    /// symbol is forced to have private linkage if ForcePrivate is true.
+    MCSymbol *GetSymbolWithGlobalValueBase(const GlobalValue *GV,
+                                           StringRef Suffix,
+                                           bool ForcePrivate = true) const;
+
+    /// GetExternalSymbolSymbol - Return the MCSymbol for the specified
+    /// ExternalSymbol.
+    MCSymbol *GetExternalSymbolSymbol(StringRef Sym) const;
+
+    /// GetCPISymbol - Return the symbol for the specified constant pool entry.
+    MCSymbol *GetCPISymbol(unsigned CPID) const;
+
+    /// GetJTISymbol - Return the symbol for the specified jump table entry.
+    MCSymbol *GetJTISymbol(unsigned JTID, bool isLinkerPrivate = false) const;
+
+    /// GetJTSetSymbol - Return the symbol for the specified jump table .set
+    /// FIXME: privatize to AsmPrinter.
+    MCSymbol *GetJTSetSymbol(unsigned UID, unsigned MBBID) const;
+
+    /// GetBlockAddressSymbol - Return the MCSymbol used to satisfy BlockAddress
+    /// uses of the specified basic block.
+    MCSymbol *GetBlockAddressSymbol(const BlockAddress *BA) const;
+    MCSymbol *GetBlockAddressSymbol(const BasicBlock *BB) const;
+
+    //===------------------------------------------------------------------===//
+    // Emission Helper Routines.
+    //===------------------------------------------------------------------===//
+  public:
+    /// printOffset - This is just convenient handler for printing offsets.
+    void printOffset(int64_t Offset, raw_ostream &OS) const;
+
+    /// EmitInt8 - Emit a byte directive and value.
+    ///
+    void EmitInt8(int Value) const;
+
+    /// EmitInt16 - Emit a short directive and value.
+    ///
+    void EmitInt16(int Value) const;
+
+    /// EmitInt32 - Emit a long directive and value.
+    ///
+    void EmitInt32(int Value) const;
+
+    /// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size
+    /// in bytes of the directive is specified by Size and Hi/Lo specify the
+    /// labels.  This implicitly uses .set if it is available.
+    void EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
+                             unsigned Size) const;
+
+    /// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo"
+    /// where the size in bytes of the directive is specified by Size and Hi/Lo
+    /// specify the labels.  This implicitly uses .set if it is available.
+    void EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
+                                   const MCSymbol *Lo, unsigned Size) const;
+
+    /// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
+    /// where the size in bytes of the directive is specified by Size and Label
+    /// specifies the label.  This implicitly uses .set if it is available.
+    void EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
+                                   unsigned Size) const;
+
+    //===------------------------------------------------------------------===//
+    // Dwarf Emission Helper Routines
+    //===------------------------------------------------------------------===//
+
+    /// EmitSLEB128 - emit the specified signed leb128 value.
+    void EmitSLEB128(int Value, const char *Desc = 0) const;
+
+    /// EmitULEB128 - emit the specified unsigned leb128 value.
+    void EmitULEB128(unsigned Value, const char *Desc = 0,
+                     unsigned PadTo = 0) const;
+
+    /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
+    void EmitCFAByte(unsigned Val) const;
+
+    /// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an
+    /// encoding.  If verbose assembly output is enabled, we output comments
+    /// describing the encoding.  Desc is a string saying what the encoding is
+    /// specifying (e.g. "LSDA").
+    void EmitEncodingByte(unsigned Val, const char *Desc = 0) const;
+
+    /// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
+    unsigned GetSizeOfEncodedValue(unsigned Encoding) const;
+
+    /// EmitReference - Emit a reference to a label with a specified encoding.
+    ///
+    void EmitReference(const MCSymbol *Sym, unsigned Encoding) const;
+    void EmitReference(const GlobalValue *GV, unsigned Encoding) const;
+
+    /// EmitSectionOffset - Emit the 4-byte offset of Label from the start of
+    /// its section.  This can be done with a special directive if the target
+    /// supports it (e.g. cygwin) or by emitting it as an offset from a label at
+    /// the start of the section.
+    ///
+    /// SectionLabel is a temporary label emitted at the start of the section
+    /// that Label lives in.
+    void EmitSectionOffset(const MCSymbol *Label,
+                           const MCSymbol *SectionLabel) const;
+
+    /// getDebugValueLocation - Get location information encoded by DBG_VALUE
+    /// operands.
+    virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+
+    /// getISAEncoding - Get the value for DW_AT_APPLE_isa. Zero if no isa
+    /// encoding specified.
+    virtual unsigned getISAEncoding() { return 0; }
+
+    //===------------------------------------------------------------------===//
+    // Dwarf Lowering Routines
+    //===------------------------------------------------------------------===//
+
+    /// EmitFrameMoves - Emit frame instructions to describe the layout of the
+    /// frame.
+    void EmitFrameMoves(const std::vector<MachineMove> &Moves,
+                        MCSymbol *BaseLabel, bool isEH) const;
+    void EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const;
+
+    //===------------------------------------------------------------------===//
+    // Inline Asm Support
+    //===------------------------------------------------------------------===//
+  public:
+    // These are hooks that targets can override to implement inline asm
+    // support.  These should probably be moved out of AsmPrinter someday.
+
+    /// PrintSpecial - Print information related to the specified machine instr
+    /// that is independent of the operand, and may be independent of the instr
+    /// itself.  This can be useful for portably encoding the comment character
+    /// or other bits of target-specific knowledge into the asmstrings.  The
+    /// syntax used is ${:comment}.  Targets can override this to add support
+    /// for their own strange codes.
+    virtual void PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
+                              const char *Code) const;
+
+    /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
+    /// instruction, using the specified assembler variant.  Targets should
+    /// override this to format as appropriate.  This method can return true if
+    /// the operand is erroneous.
+    virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                 unsigned AsmVariant, const char *ExtraCode,
+                                 raw_ostream &OS);
+
+    /// PrintAsmMemoryOperand - Print the specified operand of MI, an INLINEASM
+    /// instruction, using the specified assembler variant as an address.
+    /// Targets should override this to format as appropriate.  This method can
+    /// return true if the operand is erroneous.
+    virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                                       unsigned AsmVariant,
+                                       const char *ExtraCode,
+                                       raw_ostream &OS);
+
+  private:
+    /// Private state for PrintSpecial()
+    // Assign a unique ID to this machine instruction.
+    mutable const MachineInstr *LastMI;
+    mutable unsigned LastFn;
+    mutable unsigned Counter;
+    mutable unsigned SetCounter;
+
+    /// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
+    void EmitInlineAsm(StringRef Str, const MDNode *LocMDNode = 0) const;
+
+    /// EmitInlineAsm - This method formats and emits the specified machine
+    /// instruction that is an inline asm.
+    void EmitInlineAsm(const MachineInstr *MI) const;
+
+    //===------------------------------------------------------------------===//
+    // Internal Implementation Details
+    //===------------------------------------------------------------------===//
+
+    /// EmitVisibility - This emits visibility information about symbol, if
+    /// this is suported by the target.
+    void EmitVisibility(MCSymbol *Sym, unsigned Visibility,
+                        bool IsDefinition = true) const;
+
+    void EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const;
+
+    void EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                            const MachineBasicBlock *MBB,
+                            unsigned uid) const;
+    void EmitLLVMUsedList(Constant *List);
+    void EmitXXStructorList(Constant *List);
+    GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy *C);
+  };
+}
+
+#endif
diff --git a/final/include/llvm/CodeGen/BinaryObject.h b/final/include/llvm/CodeGen/BinaryObject.h
new file mode 100644
index 00000000000..8c1431ffbee
--- /dev/null
+++ b/final/include/llvm/CodeGen/BinaryObject.h
@@ -0,0 +1,353 @@
+//===-- llvm/CodeGen/BinaryObject.h - Binary Object. -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a Binary Object Aka. "blob" for holding data from code
+// generators, ready for data to the object module code writters.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BINARYOBJECT_H
+#define LLVM_CODEGEN_BINARYOBJECT_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/Support/DataTypes.h"
+
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+typedef std::vector<uint8_t> BinaryData;
+
+class BinaryObject {
+protected:
+  std::string Name;
+  bool IsLittleEndian;
+  bool Is64Bit;
+  BinaryData Data;
+  std::vector<MachineRelocation> Relocations;
+
+public:
+  /// Constructors and destructor
+  BinaryObject() {}
+
+  BinaryObject(bool isLittleEndian, bool is64Bit)
+    : IsLittleEndian(isLittleEndian), Is64Bit(is64Bit) {}
+
+  BinaryObject(const std::string &name, bool isLittleEndian, bool is64Bit)
+    : Name(name), IsLittleEndian(isLittleEndian), Is64Bit(is64Bit) {}
+
+  ~BinaryObject() {}
+
+  /// getName - get name of BinaryObject
+  inline std::string getName() const { return Name; }
+
+  /// get size of binary data
+  size_t size() const {
+    return Data.size();
+  }
+
+  /// get binary data
+  BinaryData& getData() {
+    return Data;
+  }
+
+  /// get machine relocations
+  const std::vector<MachineRelocation>& getRelocations() const {
+    return Relocations;
+  }
+
+  /// hasRelocations - Return true if 'Relocations' is not empty
+  bool hasRelocations() const {
+    return !Relocations.empty();
+  }
+
+  /// emitZeros - This callback is invoked to emit a arbitrary number 
+  /// of zero bytes to the data stream.
+  inline void emitZeros(unsigned Size) {
+    for (unsigned i=0; i < Size; ++i)
+      emitByte(0);
+  }
+
+  /// emitByte - This callback is invoked when a byte needs to be
+  /// written to the data stream.
+  inline void emitByte(uint8_t B) {
+    Data.push_back(B);
+  }
+
+  /// emitWord16 - This callback is invoked when a 16-bit word needs to be
+  /// written to the data stream in correct endian format and correct size.
+  inline void emitWord16(uint16_t W) {
+    if (IsLittleEndian)
+      emitWord16LE(W);
+    else
+      emitWord16BE(W);
+  }
+
+  /// emitWord16LE - This callback is invoked when a 16-bit word needs to be
+  /// written to the data stream in correct endian format and correct size.
+  inline void emitWord16LE(uint16_t W) {
+    Data.push_back((uint8_t)(W >> 0));
+    Data.push_back((uint8_t)(W >> 8));
+  }
+
+  /// emitWord16BE - This callback is invoked when a 16-bit word needs to be
+  /// written to the data stream in correct endian format and correct size.
+  inline void emitWord16BE(uint16_t W) {
+    Data.push_back((uint8_t)(W >> 8));
+    Data.push_back((uint8_t)(W >> 0));
+  }
+
+  /// emitWord - This callback is invoked when a word needs to be
+  /// written to the data stream in correct endian format and correct size.
+  inline void emitWord(uint64_t W) {
+    if (!Is64Bit)
+      emitWord32(W);
+    else
+      emitWord64(W);
+  }
+
+  /// emitWord32 - This callback is invoked when a 32-bit word needs to be
+  /// written to the data stream in correct endian format.
+  inline void emitWord32(uint32_t W) {
+    if (IsLittleEndian)
+      emitWordLE(W);
+    else
+      emitWordBE(W);
+  }
+
+  /// emitWord64 - This callback is invoked when a 32-bit word needs to be
+  /// written to the data stream in correct endian format.
+  inline void emitWord64(uint64_t W) {
+    if (IsLittleEndian)
+      emitDWordLE(W);
+    else
+      emitDWordBE(W);
+  }
+
+  /// emitWord64 - This callback is invoked when a x86_fp80 needs to be
+  /// written to the data stream in correct endian format.
+  inline void emitWordFP80(const uint64_t *W, unsigned PadSize) {
+    if (IsLittleEndian) {
+      emitWord64(W[0]);
+      emitWord16(W[1]);  
+    } else {
+      emitWord16(W[1]);  
+      emitWord64(W[0]);
+    }
+    emitZeros(PadSize);
+  }
+
+  /// emitWordLE - This callback is invoked when a 32-bit word needs to be
+  /// written to the data stream in little-endian format.
+  inline void emitWordLE(uint32_t W) {
+    Data.push_back((uint8_t)(W >>  0));
+    Data.push_back((uint8_t)(W >>  8));
+    Data.push_back((uint8_t)(W >> 16));
+    Data.push_back((uint8_t)(W >> 24));
+  }
+
+  /// emitWordBE - This callback is invoked when a 32-bit word needs to be
+  /// written to the data stream in big-endian format.
+  ///
+  inline void emitWordBE(uint32_t W) {
+    Data.push_back((uint8_t)(W >> 24));
+    Data.push_back((uint8_t)(W >> 16));
+    Data.push_back((uint8_t)(W >>  8));
+    Data.push_back((uint8_t)(W >>  0));
+  }
+
+  /// emitDWordLE - This callback is invoked when a 64-bit word needs to be
+  /// written to the data stream in little-endian format.
+  inline void emitDWordLE(uint64_t W) {
+    Data.push_back((uint8_t)(W >>  0));
+    Data.push_back((uint8_t)(W >>  8));
+    Data.push_back((uint8_t)(W >> 16));
+    Data.push_back((uint8_t)(W >> 24));
+    Data.push_back((uint8_t)(W >> 32));
+    Data.push_back((uint8_t)(W >> 40));
+    Data.push_back((uint8_t)(W >> 48));
+    Data.push_back((uint8_t)(W >> 56));
+  }
+
+  /// emitDWordBE - This callback is invoked when a 64-bit word needs to be
+  /// written to the data stream in big-endian format.
+  inline void emitDWordBE(uint64_t W) {
+    Data.push_back((uint8_t)(W >> 56));
+    Data.push_back((uint8_t)(W >> 48));
+    Data.push_back((uint8_t)(W >> 40));
+    Data.push_back((uint8_t)(W >> 32));
+    Data.push_back((uint8_t)(W >> 24));
+    Data.push_back((uint8_t)(W >> 16));
+    Data.push_back((uint8_t)(W >>  8));
+    Data.push_back((uint8_t)(W >>  0));
+  }
+
+  /// fixByte - This callback is invoked when a byte needs to be
+  /// fixup the buffer.
+  inline void fixByte(uint8_t B, uint32_t offset) {
+    Data[offset] = B;
+  }
+
+  /// fixWord16 - This callback is invoked when a 16-bit word needs to
+  /// fixup the data stream in correct endian format.
+  inline void fixWord16(uint16_t W, uint32_t offset) {
+    if (IsLittleEndian)
+      fixWord16LE(W, offset);
+    else
+      fixWord16BE(W, offset);
+  }
+
+  /// emitWord16LE - This callback is invoked when a 16-bit word needs to
+  /// fixup the data stream in little endian format.
+  inline void fixWord16LE(uint16_t W, uint32_t offset) {
+    Data[offset]   = (uint8_t)(W >> 0);
+    Data[++offset] = (uint8_t)(W >> 8);
+  }
+
+  /// fixWord16BE - This callback is invoked when a 16-bit word needs to
+  /// fixup data stream in big endian format.
+  inline void fixWord16BE(uint16_t W, uint32_t offset) {
+    Data[offset]   = (uint8_t)(W >> 8);
+    Data[++offset] = (uint8_t)(W >> 0);
+  }
+
+  /// emitWord - This callback is invoked when a word needs to
+  /// fixup the data in correct endian format and correct size.
+  inline void fixWord(uint64_t W, uint32_t offset) {
+    if (!Is64Bit)
+      fixWord32(W, offset);
+    else
+      fixWord64(W, offset);
+  }
+
+  /// fixWord32 - This callback is invoked when a 32-bit word needs to
+  /// fixup the data in correct endian format.
+  inline void fixWord32(uint32_t W, uint32_t offset) {
+    if (IsLittleEndian)
+      fixWord32LE(W, offset);
+    else
+      fixWord32BE(W, offset);
+  }
+
+  /// fixWord32LE - This callback is invoked when a 32-bit word needs to
+  /// fixup the data in little endian format.
+  inline void fixWord32LE(uint32_t W, uint32_t offset) {
+    Data[offset]   = (uint8_t)(W >>  0);
+    Data[++offset] = (uint8_t)(W >>  8);
+    Data[++offset] = (uint8_t)(W >> 16);
+    Data[++offset] = (uint8_t)(W >> 24);
+  }
+
+  /// fixWord32BE - This callback is invoked when a 32-bit word needs to
+  /// fixup the data in big endian format.
+  inline void fixWord32BE(uint32_t W, uint32_t offset) {
+    Data[offset]   = (uint8_t)(W >> 24);
+    Data[++offset] = (uint8_t)(W >> 16);
+    Data[++offset] = (uint8_t)(W >>  8);
+    Data[++offset] = (uint8_t)(W >>  0);
+  }
+
+  /// fixWord64 - This callback is invoked when a 64-bit word needs to
+  /// fixup the data in correct endian format.
+  inline void fixWord64(uint64_t W, uint32_t offset) {
+    if (IsLittleEndian)
+      fixWord64LE(W, offset);
+    else
+      fixWord64BE(W, offset);
+  }
+
+  /// fixWord64BE - This callback is invoked when a 64-bit word needs to
+  /// fixup the data in little endian format.
+  inline void fixWord64LE(uint64_t W, uint32_t offset) {
+    Data[offset]   = (uint8_t)(W >>  0);
+    Data[++offset] = (uint8_t)(W >>  8);
+    Data[++offset] = (uint8_t)(W >> 16);
+    Data[++offset] = (uint8_t)(W >> 24);
+    Data[++offset] = (uint8_t)(W >> 32);
+    Data[++offset] = (uint8_t)(W >> 40);
+    Data[++offset] = (uint8_t)(W >> 48);
+    Data[++offset] = (uint8_t)(W >> 56);
+  }
+
+  /// fixWord64BE - This callback is invoked when a 64-bit word needs to
+  /// fixup the data in big endian format.
+  inline void fixWord64BE(uint64_t W, uint32_t offset) {
+    Data[offset]   = (uint8_t)(W >> 56);
+    Data[++offset] = (uint8_t)(W >> 48);
+    Data[++offset] = (uint8_t)(W >> 40);
+    Data[++offset] = (uint8_t)(W >> 32);
+    Data[++offset] = (uint8_t)(W >> 24);
+    Data[++offset] = (uint8_t)(W >> 16);
+    Data[++offset] = (uint8_t)(W >>  8);
+    Data[++offset] = (uint8_t)(W >>  0);
+  }
+
+  /// emitAlignment - Pad the data to the specified alignment.
+  void emitAlignment(unsigned Alignment, uint8_t fill = 0) {
+    if (Alignment <= 1) return;
+    unsigned PadSize = -Data.size() & (Alignment-1);
+    for (unsigned i = 0; i<PadSize; ++i)
+      Data.push_back(fill);
+  }
+
+  /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
+  /// written to the data stream.
+  void emitULEB128Bytes(uint64_t Value) {
+    do {
+      uint8_t Byte = (uint8_t)(Value & 0x7f);
+      Value >>= 7;
+      if (Value) Byte |= 0x80;
+      emitByte(Byte);
+    } while (Value);
+  }
+
+  /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
+  /// written to the data stream.
+  void emitSLEB128Bytes(int64_t Value) {
+    int Sign = Value >> (8 * sizeof(Value) - 1);
+    bool IsMore;
+
+    do {
+      uint8_t Byte = (uint8_t)(Value & 0x7f);
+      Value >>= 7;
+      IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+      if (IsMore) Byte |= 0x80;
+      emitByte(Byte);
+    } while (IsMore);
+  }
+
+  /// emitString - This callback is invoked when a String needs to be
+  /// written to the data stream.
+  void emitString(const std::string &String) {
+    for (unsigned i = 0, N = static_cast<unsigned>(String.size()); i<N; ++i) {
+      unsigned char C = String[i];
+      emitByte(C);
+    }
+    emitByte(0);
+  }
+
+  /// getCurrentPCOffset - Return the offset from the start of the emitted
+  /// buffer that we are currently writing to.
+  uintptr_t getCurrentPCOffset() const {
+    return Data.size();
+  }
+
+  /// addRelocation - Whenever a relocatable address is needed, it should be
+  /// noted with this interface.
+  void addRelocation(const MachineRelocation& relocation) {
+    Relocations.push_back(relocation);
+  }
+
+};
+
+} // end namespace llvm
+
+#endif
+
diff --git a/final/include/llvm/CodeGen/CalcSpillWeights.h b/final/include/llvm/CodeGen/CalcSpillWeights.h
new file mode 100644
index 00000000000..1f5f088be75
--- /dev/null
+++ b/final/include/llvm/CodeGen/CalcSpillWeights.h
@@ -0,0 +1,83 @@
+//===---------------- lib/CodeGen/CalcSpillWeights.h ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_CODEGEN_CALCSPILLWEIGHTS_H
+#define LLVM_CODEGEN_CALCSPILLWEIGHTS_H
+
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+  class LiveInterval;
+  class LiveIntervals;
+  class MachineLoopInfo;
+
+  /// normalizeSpillWeight - The spill weight of a live interval is computed as:
+  ///
+  ///   (sum(use freq) + sum(def freq)) / (K + size)
+  ///
+  /// @param UseDefFreq Expected number of executed use and def instructions
+  ///                   per function call. Derived from block frequencies.
+  /// @param Size       Size of live interval as returnexd by getSize()
+  ///
+  static inline float normalizeSpillWeight(float UseDefFreq, unsigned Size) {
+    // The constant 25 instructions is added to avoid depending too much on
+    // accidental SlotIndex gaps for small intervals. The effect is that small
+    // intervals have a spill weight that is mostly proportional to the number
+    // of uses, while large intervals get a spill weight that is closer to a use
+    // density.
+    return UseDefFreq / (Size + 25*SlotIndex::InstrDist);
+  }
+
+  /// VirtRegAuxInfo - Calculate auxiliary information for a virtual
+  /// register such as its spill weight and allocation hint.
+  class VirtRegAuxInfo {
+    MachineFunction &mf_;
+    LiveIntervals &lis_;
+    const MachineLoopInfo &loops_;
+    DenseMap<unsigned, float> hint_;
+  public:
+    VirtRegAuxInfo(MachineFunction &mf, LiveIntervals &lis,
+                   const MachineLoopInfo &loops) :
+      mf_(mf), lis_(lis), loops_(loops) {}
+
+    /// CalculateRegClass - recompute the register class for reg from its uses.
+    /// Since the register class can affect the allocation hint, this function
+    /// should be called before CalculateWeightAndHint if both are called.
+    void CalculateRegClass(unsigned reg);
+
+    /// CalculateWeightAndHint - (re)compute li's spill weight and allocation
+    /// hint.
+    void CalculateWeightAndHint(LiveInterval &li);
+  };
+
+  /// CalculateSpillWeights - Compute spill weights for all virtual register
+  /// live intervals.
+  class CalculateSpillWeights : public MachineFunctionPass {
+  public:
+    static char ID;
+
+    CalculateSpillWeights() : MachineFunctionPass(ID) {
+      initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+    virtual bool runOnMachineFunction(MachineFunction &fn);
+
+  private:
+    /// Returns true if the given live interval is zero length.
+    bool isZeroLengthInterval(LiveInterval *li) const;
+  };
+
+}
+
+#endif // LLVM_CODEGEN_CALCSPILLWEIGHTS_H
diff --git a/final/include/llvm/CodeGen/CallingConvLower.h b/final/include/llvm/CodeGen/CallingConvLower.h
new file mode 100644
index 00000000000..2a9bbdfb7ce
--- /dev/null
+++ b/final/include/llvm/CodeGen/CallingConvLower.h
@@ -0,0 +1,300 @@
+//===-- llvm/CallingConvLower.h - Calling Conventions -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CCState and CCValAssign classes, used for lowering
+// and implementing calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CALLINGCONVLOWER_H
+#define LLVM_CODEGEN_CALLINGCONVLOWER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/TargetCallingConv.h"
+#include "llvm/CallingConv.h"
+
+namespace llvm {
+  class TargetRegisterInfo;
+  class TargetMachine;
+  class CCState;
+
+/// CCValAssign - Represent assignment of one arg/retval to a location.
+class CCValAssign {
+public:
+  enum LocInfo {
+    Full,   // The value fills the full location.
+    SExt,   // The value is sign extended in the location.
+    ZExt,   // The value is zero extended in the location.
+    AExt,   // The value is extended with undefined upper bits.
+    BCvt,   // The value is bit-converted in the location.
+    VExt,   // The value is vector-widened in the location.
+            // FIXME: Not implemented yet. Code that uses AExt to mean
+            // vector-widen should be fixed to use VExt instead.
+    Indirect // The location contains pointer to the value.
+    // TODO: a subset of the value is in the location.
+  };
+private:
+  /// ValNo - This is the value number begin assigned (e.g. an argument number).
+  unsigned ValNo;
+
+  /// Loc is either a stack offset or a register number.
+  unsigned Loc;
+
+  /// isMem - True if this is a memory loc, false if it is a register loc.
+  bool isMem : 1;
+
+  /// isCustom - True if this arg/retval requires special handling.
+  bool isCustom : 1;
+
+  /// Information about how the value is assigned.
+  LocInfo HTP : 6;
+
+  /// ValVT - The type of the value being assigned.
+  MVT ValVT;
+
+  /// LocVT - The type of the location being assigned to.
+  MVT LocVT;
+public:
+
+  static CCValAssign getReg(unsigned ValNo, MVT ValVT,
+                            unsigned RegNo, MVT LocVT,
+                            LocInfo HTP) {
+    CCValAssign Ret;
+    Ret.ValNo = ValNo;
+    Ret.Loc = RegNo;
+    Ret.isMem = false;
+    Ret.isCustom = false;
+    Ret.HTP = HTP;
+    Ret.ValVT = ValVT;
+    Ret.LocVT = LocVT;
+    return Ret;
+  }
+
+  static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT,
+                                  unsigned RegNo, MVT LocVT,
+                                  LocInfo HTP) {
+    CCValAssign Ret;
+    Ret = getReg(ValNo, ValVT, RegNo, LocVT, HTP);
+    Ret.isCustom = true;
+    return Ret;
+  }
+
+  static CCValAssign getMem(unsigned ValNo, MVT ValVT,
+                            unsigned Offset, MVT LocVT,
+                            LocInfo HTP) {
+    CCValAssign Ret;
+    Ret.ValNo = ValNo;
+    Ret.Loc = Offset;
+    Ret.isMem = true;
+    Ret.isCustom = false;
+    Ret.HTP = HTP;
+    Ret.ValVT = ValVT;
+    Ret.LocVT = LocVT;
+    return Ret;
+  }
+
+  static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT,
+                                  unsigned Offset, MVT LocVT,
+                                  LocInfo HTP) {
+    CCValAssign Ret;
+    Ret = getMem(ValNo, ValVT, Offset, LocVT, HTP);
+    Ret.isCustom = true;
+    return Ret;
+  }
+
+  unsigned getValNo() const { return ValNo; }
+  MVT getValVT() const { return ValVT; }
+
+  bool isRegLoc() const { return !isMem; }
+  bool isMemLoc() const { return isMem; }
+
+  bool needsCustom() const { return isCustom; }
+
+  unsigned getLocReg() const { assert(isRegLoc()); return Loc; }
+  unsigned getLocMemOffset() const { assert(isMemLoc()); return Loc; }
+  MVT getLocVT() const { return LocVT; }
+
+  LocInfo getLocInfo() const { return HTP; }
+  bool isExtInLoc() const {
+    return (HTP == AExt || HTP == SExt || HTP == ZExt);
+  }
+
+};
+
+/// CCAssignFn - This function assigns a location for Val, updating State to
+/// reflect the change.  It returns 'true' if it failed to handle Val.
+typedef bool CCAssignFn(unsigned ValNo, MVT ValVT,
+                        MVT LocVT, CCValAssign::LocInfo LocInfo,
+                        ISD::ArgFlagsTy ArgFlags, CCState &State);
+
+/// CCCustomFn - This function assigns a location for Val, possibly updating
+/// all args to reflect changes and indicates if it handled it. It must set
+/// isCustom if it handles the arg and returns true.
+typedef bool CCCustomFn(unsigned &ValNo, MVT &ValVT,
+                        MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                        ISD::ArgFlagsTy &ArgFlags, CCState &State);
+
+/// CCState - This class holds information needed while lowering arguments and
+/// return values.  It captures which registers are already assigned and which
+/// stack slots are used.  It provides accessors to allocate these values.
+class CCState {
+  CallingConv::ID CallingConv;
+  bool IsVarArg;
+  const TargetMachine &TM;
+  const TargetRegisterInfo &TRI;
+  SmallVector<CCValAssign, 16> &Locs;
+  LLVMContext &Context;
+
+  unsigned StackOffset;
+  SmallVector<uint32_t, 16> UsedRegs;
+public:
+  CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM,
+          SmallVector<CCValAssign, 16> &locs, LLVMContext &C);
+
+  void addLoc(const CCValAssign &V) {
+    Locs.push_back(V);
+  }
+
+  LLVMContext &getContext() const { return Context; }
+  const TargetMachine &getTarget() const { return TM; }
+  CallingConv::ID getCallingConv() const { return CallingConv; }
+  bool isVarArg() const { return IsVarArg; }
+
+  unsigned getNextStackOffset() const { return StackOffset; }
+
+  /// isAllocated - Return true if the specified register (or an alias) is
+  /// allocated.
+  bool isAllocated(unsigned Reg) const {
+    return UsedRegs[Reg/32] & (1 << (Reg&31));
+  }
+
+  /// AnalyzeFormalArguments - Analyze an array of argument values,
+  /// incorporating info about the formals into this state.
+  void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                              CCAssignFn Fn);
+
+  /// AnalyzeReturn - Analyze the returned values of a return,
+  /// incorporating info about the result values into this state.
+  void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                     CCAssignFn Fn);
+
+  /// CheckReturn - Analyze the return values of a function, returning
+  /// true if the return can be performed without sret-demotion, and
+  /// false otherwise.
+  bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
+                   CCAssignFn Fn);
+
+  /// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
+  /// incorporating info about the passed values into this state.
+  void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           CCAssignFn Fn);
+
+  /// AnalyzeCallOperands - Same as above except it takes vectors of types
+  /// and argument flags.
+  void AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
+                           SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+                           CCAssignFn Fn);
+
+  /// AnalyzeCallResult - Analyze the return values of a call,
+  /// incorporating info about the passed values into this state.
+  void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+                         CCAssignFn Fn);
+
+  /// AnalyzeCallResult - Same as above except it's specialized for calls which
+  /// produce a single value.
+  void AnalyzeCallResult(MVT VT, CCAssignFn Fn);
+
+  /// getFirstUnallocated - Return the first unallocated register in the set, or
+  /// NumRegs if they are all allocated.
+  unsigned getFirstUnallocated(const unsigned *Regs, unsigned NumRegs) const {
+    for (unsigned i = 0; i != NumRegs; ++i)
+      if (!isAllocated(Regs[i]))
+        return i;
+    return NumRegs;
+  }
+
+  /// AllocateReg - Attempt to allocate one register.  If it is not available,
+  /// return zero.  Otherwise, return the register, marking it and any aliases
+  /// as allocated.
+  unsigned AllocateReg(unsigned Reg) {
+    if (isAllocated(Reg)) return 0;
+    MarkAllocated(Reg);
+    return Reg;
+  }
+
+  /// Version of AllocateReg with extra register to be shadowed.
+  unsigned AllocateReg(unsigned Reg, unsigned ShadowReg) {
+    if (isAllocated(Reg)) return 0;
+    MarkAllocated(Reg);
+    MarkAllocated(ShadowReg);
+    return Reg;
+  }
+
+  /// AllocateReg - Attempt to allocate one of the specified registers.  If none
+  /// are available, return zero.  Otherwise, return the first one available,
+  /// marking it and any aliases as allocated.
+  unsigned AllocateReg(const unsigned *Regs, unsigned NumRegs) {
+    unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
+    if (FirstUnalloc == NumRegs)
+      return 0;    // Didn't find the reg.
+
+    // Mark the register and any aliases as allocated.
+    unsigned Reg = Regs[FirstUnalloc];
+    MarkAllocated(Reg);
+    return Reg;
+  }
+
+  /// Version of AllocateReg with list of registers to be shadowed.
+  unsigned AllocateReg(const unsigned *Regs, const unsigned *ShadowRegs,
+                       unsigned NumRegs) {
+    unsigned FirstUnalloc = getFirstUnallocated(Regs, NumRegs);
+    if (FirstUnalloc == NumRegs)
+      return 0;    // Didn't find the reg.
+
+    // Mark the register and any aliases as allocated.
+    unsigned Reg = Regs[FirstUnalloc], ShadowReg = ShadowRegs[FirstUnalloc];
+    MarkAllocated(Reg);
+    MarkAllocated(ShadowReg);
+    return Reg;
+  }
+
+  /// AllocateStack - Allocate a chunk of stack space with the specified size
+  /// and alignment.
+  unsigned AllocateStack(unsigned Size, unsigned Align) {
+    assert(Align && ((Align-1) & Align) == 0); // Align is power of 2.
+    StackOffset = ((StackOffset + Align-1) & ~(Align-1));
+    unsigned Result = StackOffset;
+    StackOffset += Size;
+    return Result;
+  }
+
+  /// Version of AllocateStack with extra register to be shadowed.
+  unsigned AllocateStack(unsigned Size, unsigned Align, unsigned ShadowReg) {
+    MarkAllocated(ShadowReg);
+    return AllocateStack(Size, Align);
+  }
+
+  // HandleByVal - Allocate a stack slot large enough to pass an argument by
+  // value. The size and alignment information of the argument is encoded in its
+  // parameter attribute.
+  void HandleByVal(unsigned ValNo, MVT ValVT,
+                   MVT LocVT, CCValAssign::LocInfo LocInfo,
+                   int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags);
+
+private:
+  /// MarkAllocated - Mark a register and all of its aliases as allocated.
+  void MarkAllocated(unsigned Reg);
+};
+
+
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/CodeGen/EdgeBundles.h b/final/include/llvm/CodeGen/EdgeBundles.h
new file mode 100644
index 00000000000..2c5215a7927
--- /dev/null
+++ b/final/include/llvm/CodeGen/EdgeBundles.h
@@ -0,0 +1,61 @@
+//===-------- EdgeBundles.h - Bundles of CFG edges --------------*- c++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The EdgeBundles analysis forms equivalence classes of CFG edges such that all
+// edges leaving a machine basic block are in the same bundle, and all edges
+// leaving a basic block are in the same bundle.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_EDGEBUNDLES_H
+#define LLVM_CODEGEN_EDGEBUNDLES_H
+
+#include "llvm/ADT/IntEqClasses.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class EdgeBundles : public MachineFunctionPass {
+  const MachineFunction *MF;
+
+  /// EC - Each edge bundle is an equivalence class. The keys are:
+  ///   2*BB->getNumber()   -> Ingoing bundle.
+  ///   2*BB->getNumber()+1 -> Outgoing bundle.
+  IntEqClasses EC;
+
+public:
+  static char ID;
+  EdgeBundles() : MachineFunctionPass(ID) {}
+
+  /// getBundle - Return the ingoing (Out = false) or outgoing (Out = true)
+  /// bundle number for basic block #N
+  unsigned getBundle(unsigned N, bool Out) const { return EC[2 * N + Out]; }
+
+  /// getNumBundles - Return the total number of bundles in the CFG.
+  unsigned getNumBundles() const { return EC.getNumClasses(); }
+
+  /// getMachineFunction - Return the last machine function computed.
+  const MachineFunction *getMachineFunction() const { return MF; }
+
+  /// view - Visualize the annotated bipartite CFG with Graphviz.
+  void view() const;
+
+private:
+  virtual bool runOnMachineFunction(MachineFunction&);
+  virtual void getAnalysisUsage(AnalysisUsage&) const;
+};
+
+/// Specialize WriteGraph, the standard implementation won't work.
+raw_ostream &WriteGraph(raw_ostream &O, const EdgeBundles &G,
+                        bool ShortNames = false,
+                        const std::string &Title = "");
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/CodeGen/FastISel.h b/final/include/llvm/CodeGen/FastISel.h
new file mode 100644
index 00000000000..fbb12005444
--- /dev/null
+++ b/final/include/llvm/CodeGen/FastISel.h
@@ -0,0 +1,348 @@
+//===-- FastISel.h - Definition of the FastISel class ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the FastISel class.
+//  
+//===----------------------------------------------------------------------===//
+  
+#ifndef LLVM_CODEGEN_FASTISEL_H
+#define LLVM_CODEGEN_FASTISEL_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+namespace llvm {
+
+class AllocaInst;
+class ConstantFP;
+class FunctionLoweringInfo;
+class Instruction;
+class MachineBasicBlock;
+class MachineConstantPool;
+class MachineFunction;
+class MachineInstr;
+class MachineFrameInfo;
+class MachineRegisterInfo;
+class TargetData;
+class TargetInstrInfo;
+class TargetLowering;
+class TargetMachine;
+class TargetRegisterClass;
+class TargetRegisterInfo;
+class LoadInst;
+
+/// FastISel - This is a fast-path instruction selection class that
+/// generates poor code and doesn't support illegal types or non-trivial
+/// lowering, but runs quickly.
+class FastISel {
+protected:
+  DenseMap<const Value *, unsigned> LocalValueMap;
+  FunctionLoweringInfo &FuncInfo;
+  MachineRegisterInfo &MRI;
+  MachineFrameInfo &MFI;
+  MachineConstantPool &MCP;
+  DebugLoc DL;
+  const TargetMachine &TM;
+  const TargetData &TD;
+  const TargetInstrInfo &TII;
+  const TargetLowering &TLI;
+  const TargetRegisterInfo &TRI;
+  MachineInstr *LastLocalValue;
+
+public:
+  /// getLastLocalValue - Return the position of the last instruction
+  /// emitted for materializing constants for use in the current block.
+  MachineInstr *getLastLocalValue() { return LastLocalValue; }
+
+  /// setLastLocalValue - Update the position of the last instruction
+  /// emitted for materializing constants for use in the current block.
+  void setLastLocalValue(MachineInstr *I) { LastLocalValue = I; }
+
+  /// startNewBlock - Set the current block to which generated machine
+  /// instructions will be appended, and clear the local CSE map.
+  ///
+  void startNewBlock();
+
+  /// getCurDebugLoc() - Return current debug location information.
+  DebugLoc getCurDebugLoc() const { return DL; }
+
+  /// SelectInstruction - Do "fast" instruction selection for the given
+  /// LLVM IR instruction, and append generated machine instructions to
+  /// the current block. Return true if selection was successful.
+  ///
+  bool SelectInstruction(const Instruction *I);
+
+  /// SelectOperator - Do "fast" instruction selection for the given
+  /// LLVM IR operator (Instruction or ConstantExpr), and append
+  /// generated machine instructions to the current block. Return true
+  /// if selection was successful.
+  ///
+  bool SelectOperator(const User *I, unsigned Opcode);
+
+  /// getRegForValue - Create a virtual register and arrange for it to
+  /// be assigned the value for the given LLVM value.
+  unsigned getRegForValue(const Value *V);
+
+  /// lookUpRegForValue - Look up the value to see if its value is already
+  /// cached in a register. It may be defined by instructions across blocks or
+  /// defined locally.
+  unsigned lookUpRegForValue(const Value *V);
+
+  /// getRegForGEPIndex - This is a wrapper around getRegForValue that also
+  /// takes care of truncating or sign-extending the given getelementptr
+  /// index value.
+  std::pair<unsigned, bool> getRegForGEPIndex(const Value *V);
+
+  /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+  /// vreg is being provided by the specified load instruction.  If possible,
+  /// try to fold the load as an operand to the instruction, returning true if
+  /// possible.
+  virtual bool TryToFoldLoad(MachineInstr * /*MI*/, unsigned /*OpNo*/,
+                             const LoadInst * /*LI*/) {
+    return false;
+  }
+  
+  /// recomputeInsertPt - Reset InsertPt to prepare for inserting instructions
+  /// into the current block.
+  void recomputeInsertPt();
+
+  struct SavePoint {
+    MachineBasicBlock::iterator InsertPt;
+    DebugLoc DL;
+  };
+
+  /// enterLocalValueArea - Prepare InsertPt to begin inserting instructions
+  /// into the local value area and return the old insert position.
+  SavePoint enterLocalValueArea();
+
+  /// leaveLocalValueArea - Reset InsertPt to the given old insert position.
+  void leaveLocalValueArea(SavePoint Old);
+
+  virtual ~FastISel();
+
+protected:
+  explicit FastISel(FunctionLoweringInfo &funcInfo);
+
+  /// TargetSelectInstruction - This method is called by target-independent
+  /// code when the normal FastISel process fails to select an instruction.
+  /// This gives targets a chance to emit code for anything that doesn't
+  /// fit into FastISel's framework. It returns true if it was successful.
+  ///
+  virtual bool
+  TargetSelectInstruction(const Instruction *I) = 0;
+
+  /// FastEmit_r - This method is called by target-independent code
+  /// to request that an instruction with the given type and opcode
+  /// be emitted.
+  virtual unsigned FastEmit_(MVT VT,
+                             MVT RetVT,
+                             unsigned Opcode);
+
+  /// FastEmit_r - This method is called by target-independent code
+  /// to request that an instruction with the given type, opcode, and
+  /// register operand be emitted.
+  ///
+  virtual unsigned FastEmit_r(MVT VT,
+                              MVT RetVT,
+                              unsigned Opcode,
+                              unsigned Op0, bool Op0IsKill);
+
+  /// FastEmit_rr - This method is called by target-independent code
+  /// to request that an instruction with the given type, opcode, and
+  /// register operands be emitted.
+  ///
+  virtual unsigned FastEmit_rr(MVT VT,
+                               MVT RetVT,
+                               unsigned Opcode,
+                               unsigned Op0, bool Op0IsKill,
+                               unsigned Op1, bool Op1IsKill);
+
+  /// FastEmit_ri - This method is called by target-independent code
+  /// to request that an instruction with the given type, opcode, and
+  /// register and immediate operands be emitted.
+  ///
+  virtual unsigned FastEmit_ri(MVT VT,
+                               MVT RetVT,
+                               unsigned Opcode,
+                               unsigned Op0, bool Op0IsKill,
+                               uint64_t Imm);
+
+  /// FastEmit_rf - This method is called by target-independent code
+  /// to request that an instruction with the given type, opcode, and
+  /// register and floating-point immediate operands be emitted.
+  ///
+  virtual unsigned FastEmit_rf(MVT VT,
+                               MVT RetVT,
+                               unsigned Opcode,
+                               unsigned Op0, bool Op0IsKill,
+                               const ConstantFP *FPImm);
+
+  /// FastEmit_rri - This method is called by target-independent code
+  /// to request that an instruction with the given type, opcode, and
+  /// register and immediate operands be emitted.
+  ///
+  virtual unsigned FastEmit_rri(MVT VT,
+                                MVT RetVT,
+                                unsigned Opcode,
+                                unsigned Op0, bool Op0IsKill,
+                                unsigned Op1, bool Op1IsKill,
+                                uint64_t Imm);
+
+  /// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries
+  /// to emit an instruction with an immediate operand using FastEmit_ri.
+  /// If that fails, it materializes the immediate into a register and try
+  /// FastEmit_rr instead.
+  unsigned FastEmit_ri_(MVT VT,
+                        unsigned Opcode,
+                        unsigned Op0, bool Op0IsKill,
+                        uint64_t Imm, MVT ImmType);
+  
+  /// FastEmit_rf_ - This method is a wrapper of FastEmit_rf. It first tries
+  /// to emit an instruction with an immediate operand using FastEmit_rf.
+  /// If that fails, it materializes the immediate into a register and try
+  /// FastEmit_rr instead.
+  unsigned FastEmit_rf_(MVT VT,
+                        unsigned Opcode,
+                        unsigned Op0, bool Op0IsKill,
+                        const ConstantFP *FPImm, MVT ImmType);
+  
+  /// FastEmit_i - This method is called by target-independent code
+  /// to request that an instruction with the given type, opcode, and
+  /// immediate operand be emitted.
+  virtual unsigned FastEmit_i(MVT VT,
+                              MVT RetVT,
+                              unsigned Opcode,
+                              uint64_t Imm);
+
+  /// FastEmit_f - This method is called by target-independent code
+  /// to request that an instruction with the given type, opcode, and
+  /// floating-point immediate operand be emitted.
+  virtual unsigned FastEmit_f(MVT VT,
+                              MVT RetVT,
+                              unsigned Opcode,
+                              const ConstantFP *FPImm);
+
+  /// FastEmitInst_ - Emit a MachineInstr with no operands and a
+  /// result register in the given register class.
+  ///
+  unsigned FastEmitInst_(unsigned MachineInstOpcode,
+                         const TargetRegisterClass *RC);
+
+  /// FastEmitInst_r - Emit a MachineInstr with one register operand
+  /// and a result register in the given register class.
+  ///
+  unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+                          const TargetRegisterClass *RC,
+                          unsigned Op0, bool Op0IsKill);
+
+  /// FastEmitInst_rr - Emit a MachineInstr with two register operands
+  /// and a result register in the given register class.
+  ///
+  unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+                           const TargetRegisterClass *RC,
+                           unsigned Op0, bool Op0IsKill,
+                           unsigned Op1, bool Op1IsKill);
+
+  /// FastEmitInst_ri - Emit a MachineInstr with two register operands
+  /// and a result register in the given register class.
+  ///
+  unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+                           const TargetRegisterClass *RC,
+                           unsigned Op0, bool Op0IsKill,
+                           uint64_t Imm);
+
+  /// FastEmitInst_rf - Emit a MachineInstr with two register operands
+  /// and a result register in the given register class.
+  ///
+  unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
+                           const TargetRegisterClass *RC,
+                           unsigned Op0, bool Op0IsKill,
+                           const ConstantFP *FPImm);
+
+  /// FastEmitInst_rri - Emit a MachineInstr with two register operands,
+  /// an immediate, and a result register in the given register class.
+  ///
+  unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+                            const TargetRegisterClass *RC,
+                            unsigned Op0, bool Op0IsKill,
+                            unsigned Op1, bool Op1IsKill,
+                            uint64_t Imm);
+  
+  /// FastEmitInst_i - Emit a MachineInstr with a single immediate
+  /// operand, and a result register in the given register class.
+  unsigned FastEmitInst_i(unsigned MachineInstrOpcode,
+                          const TargetRegisterClass *RC,
+                          uint64_t Imm);
+
+  /// FastEmitInst_extractsubreg - Emit a MachineInstr for an extract_subreg
+  /// from a specified index of a superregister to a specified type.
+  unsigned FastEmitInst_extractsubreg(MVT RetVT,
+                                      unsigned Op0, bool Op0IsKill,
+                                      uint32_t Idx);
+
+  /// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
+  /// with all but the least significant bit set to zero.
+  unsigned FastEmitZExtFromI1(MVT VT,
+                              unsigned Op0, bool Op0IsKill);
+
+  /// FastEmitBranch - Emit an unconditional branch to the given block,
+  /// unless it is the immediate (fall-through) successor, and update
+  /// the CFG.
+  void FastEmitBranch(MachineBasicBlock *MBB, DebugLoc DL);
+
+  unsigned UpdateValueMap(const Value* I, unsigned Reg);
+
+  unsigned createResultReg(const TargetRegisterClass *RC);
+  
+  /// TargetMaterializeConstant - Emit a constant in a register using 
+  /// target-specific logic, such as constant pool loads.
+  virtual unsigned TargetMaterializeConstant(const Constant* C) {
+    return 0;
+  }
+
+  /// TargetMaterializeAlloca - Emit an alloca address in a register using
+  /// target-specific logic.
+  virtual unsigned TargetMaterializeAlloca(const AllocaInst* C) {
+    return 0;
+  }
+
+private:
+  bool SelectBinaryOp(const User *I, unsigned ISDOpcode);
+
+  bool SelectFNeg(const User *I);
+
+  bool SelectGetElementPtr(const User *I);
+
+  bool SelectCall(const User *I);
+
+  bool SelectBitCast(const User *I);
+  
+  bool SelectCast(const User *I, unsigned Opcode);
+
+  /// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
+  /// Emit code to ensure constants are copied into registers when needed.
+  /// Remember the virtual registers that need to be added to the Machine PHI
+  /// nodes as input.  We cannot just directly add them, because expansion
+  /// might result in multiple MBB's for one BB.  As such, the start of the
+  /// BB might correspond to a different MBB than the end.
+  bool HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
+
+  /// materializeRegForValue - Helper for getRegForVale. This function is
+  /// called when the value isn't already available in a register and must
+  /// be materialized with new instructions.
+  unsigned materializeRegForValue(const Value *V, MVT VT);
+
+  /// hasTrivialKill - Test whether the given value has exactly one use.
+  bool hasTrivialKill(const Value *V) const;
+};
+
+}
+
+#endif
diff --git a/final/include/llvm/CodeGen/FunctionLoweringInfo.h b/final/include/llvm/CodeGen/FunctionLoweringInfo.h
new file mode 100644
index 00000000000..4421cc02d1c
--- /dev/null
+++ b/final/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -0,0 +1,224 @@
+//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
+#define LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H
+
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallVector.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SmallSet.h"
+#endif
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <vector>
+
+namespace llvm {
+
+class AllocaInst;
+class BasicBlock;
+class CallInst;
+class Function;
+class GlobalVariable;
+class Instruction;
+class MachineInstr;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineModuleInfo;
+class MachineRegisterInfo;
+class TargetLowering;
+class Value;
+
+//===--------------------------------------------------------------------===//
+/// FunctionLoweringInfo - This contains information that is global to a
+/// function that is used when lowering a region of the function.
+///
+class FunctionLoweringInfo {
+public:
+  const TargetLowering &TLI;
+  const Function *Fn;
+  MachineFunction *MF;
+  MachineRegisterInfo *RegInfo;
+
+  /// CanLowerReturn - true iff the function's return value can be lowered to
+  /// registers.
+  bool CanLowerReturn;
+
+  /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg
+  /// allocated to hold a pointer to the hidden sret parameter.
+  unsigned DemoteRegister;
+
+  /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
+  DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
+
+  /// ValueMap - Since we emit code for the function a basic block at a time,
+  /// we must remember which virtual registers hold the values for
+  /// cross-basic-block values.
+  DenseMap<const Value*, unsigned> ValueMap;
+
+  /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
+  /// the entry block.  This allows the allocas to be efficiently referenced
+  /// anywhere in the function.
+  DenseMap<const AllocaInst*, int> StaticAllocaMap;
+
+  /// ByValArgFrameIndexMap - Keep track of frame indices for byval arguments.
+  DenseMap<const Argument*, int> ByValArgFrameIndexMap;
+
+  /// ArgDbgValues - A list of DBG_VALUE instructions created during isel for
+  /// function arguments that are inserted after scheduling is completed.
+  SmallVector<MachineInstr*, 8> ArgDbgValues;
+
+  /// RegFixups - Registers which need to be replaced after isel is done.
+  DenseMap<unsigned, unsigned> RegFixups;
+
+  /// MBB - The current block.
+  MachineBasicBlock *MBB;
+
+  /// MBB - The current insert position inside the current block.
+  MachineBasicBlock::iterator InsertPt;
+
+#ifndef NDEBUG
+  SmallSet<const Instruction *, 8> CatchInfoLost;
+  SmallSet<const Instruction *, 8> CatchInfoFound;
+#endif
+
+  struct LiveOutInfo {
+    unsigned NumSignBits : 31;
+    bool IsValid : 1;
+    APInt KnownOne, KnownZero;
+    LiveOutInfo() : NumSignBits(0), IsValid(true), KnownOne(1, 0),
+                    KnownZero(1, 0) {}
+  };
+
+  /// VisitedBBs - The set of basic blocks visited thus far by instruction
+  /// selection.
+  DenseSet<const BasicBlock*> VisitedBBs;
+
+  /// PHINodesToUpdate - A list of phi instructions whose operand list will
+  /// be updated after processing the current basic block.
+  /// TODO: This isn't per-function state, it's per-basic-block state. But
+  /// there's no other convenient place for it to live right now.
+  std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
+
+  explicit FunctionLoweringInfo(const TargetLowering &TLI);
+
+  /// set - Initialize this FunctionLoweringInfo with the given Function
+  /// and its associated MachineFunction.
+  ///
+  void set(const Function &Fn, MachineFunction &MF);
+
+  /// clear - Clear out all the function-specific state. This returns this
+  /// FunctionLoweringInfo to an empty state, ready to be used for a
+  /// different function.
+  void clear();
+
+  /// isExportedInst - Return true if the specified value is an instruction
+  /// exported from its block.
+  bool isExportedInst(const Value *V) {
+    return ValueMap.count(V);
+  }
+
+  unsigned CreateReg(EVT VT);
+  
+  unsigned CreateRegs(const Type *Ty);
+  
+  unsigned InitializeRegForValue(const Value *V) {
+    unsigned &R = ValueMap[V];
+    assert(R == 0 && "Already initialized this value register!");
+    return R = CreateRegs(V->getType());
+  }
+
+  /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
+  /// register is a PHI destination and the PHI's LiveOutInfo is not valid.
+  const LiveOutInfo *GetLiveOutRegInfo(unsigned Reg) {
+    if (!LiveOutRegInfo.inBounds(Reg))
+      return NULL;
+
+    const LiveOutInfo *LOI = &LiveOutRegInfo[Reg];
+    if (!LOI->IsValid)
+      return NULL;
+
+    return LOI;
+  }
+
+  /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
+  /// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
+  /// the register's LiveOutInfo is for a smaller bit width, it is extended to
+  /// the larger bit width by zero extension. The bit width must be no smaller
+  /// than the LiveOutInfo's existing bit width.
+  const LiveOutInfo *GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth);
+
+  /// AddLiveOutRegInfo - Adds LiveOutInfo for a register.
+  void AddLiveOutRegInfo(unsigned Reg, unsigned NumSignBits,
+                         const APInt &KnownZero, const APInt &KnownOne) {
+    // Only install this information if it tells us something.
+    if (NumSignBits == 1 && KnownZero == 0 && KnownOne == 0)
+      return;
+
+    LiveOutRegInfo.grow(Reg);
+    LiveOutInfo &LOI = LiveOutRegInfo[Reg];
+    LOI.NumSignBits = NumSignBits;
+    LOI.KnownOne = KnownOne;
+    LOI.KnownZero = KnownZero;
+  }
+
+  /// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination
+  /// register based on the LiveOutInfo of its operands.
+  void ComputePHILiveOutRegInfo(const PHINode*);
+
+  /// InvalidatePHILiveOutRegInfo - Invalidates a PHI's LiveOutInfo, to be
+  /// called when a block is visited before all of its predecessors.
+  void InvalidatePHILiveOutRegInfo(const PHINode *PN) {
+    // PHIs with no uses have no ValueMap entry.
+    DenseMap<const Value*, unsigned>::const_iterator It = ValueMap.find(PN);
+    if (It == ValueMap.end())
+      return;
+
+    unsigned Reg = It->second;
+    LiveOutRegInfo.grow(Reg);
+    LiveOutRegInfo[Reg].IsValid = false;
+  }
+
+  /// setByValArgumentFrameIndex - Record frame index for the byval
+  /// argument.
+  void setByValArgumentFrameIndex(const Argument *A, int FI);
+  
+  /// getByValArgumentFrameIndex - Get frame index for the byval argument.
+  int getByValArgumentFrameIndex(const Argument *A);
+
+private:
+  /// LiveOutRegInfo - Information about live out vregs.
+  IndexedMap<LiveOutInfo, VirtReg2IndexFunctor> LiveOutRegInfo;
+};
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void AddCatchInfo(const CallInst &I,
+                  MachineModuleInfo *MMI, MachineBasicBlock *MBB);
+
+/// CopyCatchInfo - Copy catch information from SuccBB (or one of its
+/// successors) to LPad.
+void CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
+                   MachineModuleInfo *MMI, FunctionLoweringInfo &FLI);
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/CodeGen/GCMetadata.h b/final/include/llvm/CodeGen/GCMetadata.h
new file mode 100644
index 00000000000..45469ed7de8
--- /dev/null
+++ b/final/include/llvm/CodeGen/GCMetadata.h
@@ -0,0 +1,193 @@
+//===-- GCMetadata.h - Garbage collector metadata ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the GCFunctionInfo and GCModuleInfo classes, which are
+// used as a communication channel from the target code generator to the target
+// garbage collectors. This interface allows code generators and garbage
+// collectors to be developed independently.
+//
+// The GCFunctionInfo class logs the data necessary to build a type accurate
+// stack map. The code generator outputs:
+//
+//   - Safe points as specified by the GCStrategy's NeededSafePoints.
+//   - Stack offsets for GC roots, as specified by calls to llvm.gcroot
+//
+// As a refinement, liveness analysis calculates the set of live roots at each
+// safe point. Liveness analysis is not presently performed by the code
+// generator, so all roots are assumed live.
+//
+// GCModuleInfo simply collects GCFunctionInfo instances for each Function as
+// they are compiled. This accretion is necessary for collectors which must emit
+// a stack map for the compilation unit as a whole. Therefore, GCFunctionInfo
+// outlives the MachineFunction from which it is derived and must not refer to
+// any code generator data structures.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GCMETADATA_H
+#define LLVM_CODEGEN_GCMETADATA_H
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+  class AsmPrinter;
+  class GCStrategy;
+  class Constant;
+  class MCSymbol;
+
+  namespace GC {
+    /// PointKind - The type of a collector-safe point.
+    ///
+    enum PointKind {
+      Loop,    //< Instr is a loop (backwards branch).
+      Return,  //< Instr is a return instruction.
+      PreCall, //< Instr is a call instruction.
+      PostCall //< Instr is the return address of a call.
+    };
+  }
+
+  /// GCPoint - Metadata for a collector-safe point in machine code.
+  ///
+  struct GCPoint {
+    GC::PointKind Kind; //< The kind of the safe point.
+    MCSymbol *Label;    //< A label.
+    DebugLoc Loc;
+
+    GCPoint(GC::PointKind K, MCSymbol *L, DebugLoc DL)
+        : Kind(K), Label(L), Loc(DL) {}
+  };
+
+  /// GCRoot - Metadata for a pointer to an object managed by the garbage
+  /// collector.
+  struct GCRoot {
+    int Num;            //< Usually a frame index.
+    int StackOffset;    //< Offset from the stack pointer.
+    const Constant *Metadata;//< Metadata straight from the call to llvm.gcroot.
+
+    GCRoot(int N, const Constant *MD) : Num(N), StackOffset(-1), Metadata(MD) {}
+  };
+
+
+  /// GCFunctionInfo - Garbage collection metadata for a single function.
+  ///
+  class GCFunctionInfo {
+  public:
+    typedef std::vector<GCPoint>::iterator iterator;
+    typedef std::vector<GCRoot>::iterator roots_iterator;
+    typedef std::vector<GCRoot>::const_iterator live_iterator;
+
+  private:
+    const Function &F;
+    GCStrategy &S;
+    uint64_t FrameSize;
+    std::vector<GCRoot> Roots;
+    std::vector<GCPoint> SafePoints;
+
+    // FIXME: Liveness. A 2D BitVector, perhaps?
+    //
+    //   BitVector Liveness;
+    //
+    //   bool islive(int point, int root) =
+    //     Liveness[point * SafePoints.size() + root]
+    //
+    // The bit vector is the more compact representation where >3.2% of roots
+    // are live per safe point (1.5% on 64-bit hosts).
+
+  public:
+    GCFunctionInfo(const Function &F, GCStrategy &S);
+    ~GCFunctionInfo();
+
+    /// getFunction - Return the function to which this metadata applies.
+    ///
+    const Function &getFunction() const { return F; }
+
+    /// getStrategy - Return the GC strategy for the function.
+    ///
+    GCStrategy &getStrategy() { return S; }
+
+    /// addStackRoot - Registers a root that lives on the stack. Num is the
+    ///                stack object ID for the alloca (if the code generator is
+    //                 using  MachineFrameInfo).
+    void addStackRoot(int Num, const Constant *Metadata) {
+      Roots.push_back(GCRoot(Num, Metadata));
+    }
+
+    /// addSafePoint - Notes the existence of a safe point. Num is the ID of the
+    /// label just prior to the safe point (if the code generator is using
+    /// MachineModuleInfo).
+    void addSafePoint(GC::PointKind Kind, MCSymbol *Label, DebugLoc DL) {
+      SafePoints.push_back(GCPoint(Kind, Label, DL));
+    }
+
+    /// getFrameSize/setFrameSize - Records the function's frame size.
+    ///
+    uint64_t getFrameSize() const { return FrameSize; }
+    void setFrameSize(uint64_t S) { FrameSize = S; }
+
+    /// begin/end - Iterators for safe points.
+    ///
+    iterator begin() { return SafePoints.begin(); }
+    iterator end()   { return SafePoints.end();   }
+    size_t size() const { return SafePoints.size(); }
+
+    /// roots_begin/roots_end - Iterators for all roots in the function.
+    ///
+    roots_iterator roots_begin() { return Roots.begin(); }
+    roots_iterator roots_end  () { return Roots.end();   }
+    size_t roots_size() const { return Roots.size(); }
+
+    /// live_begin/live_end - Iterators for live roots at a given safe point.
+    ///
+    live_iterator live_begin(const iterator &p) { return roots_begin(); }
+    live_iterator live_end  (const iterator &p) { return roots_end();   }
+    size_t live_size(const iterator &p) const { return roots_size(); }
+  };
+
+
+  /// GCModuleInfo - Garbage collection metadata for a whole module.
+  ///
+  class GCModuleInfo : public ImmutablePass {
+    typedef StringMap<GCStrategy*> strategy_map_type;
+    typedef std::vector<GCStrategy*> list_type;
+    typedef DenseMap<const Function*,GCFunctionInfo*> finfo_map_type;
+
+    strategy_map_type StrategyMap;
+    list_type StrategyList;
+    finfo_map_type FInfoMap;
+
+    GCStrategy *getOrCreateStrategy(const Module *M, const std::string &Name);
+
+  public:
+    typedef list_type::const_iterator iterator;
+
+    static char ID;
+
+    GCModuleInfo();
+    ~GCModuleInfo();
+
+    /// clear - Resets the pass. The metadata deleter pass calls this.
+    ///
+    void clear();
+
+    /// begin/end - Iterators for used strategies.
+    ///
+    iterator begin() const { return StrategyList.begin(); }
+    iterator end()   const { return StrategyList.end();   }
+
+    /// get - Look up function metadata.
+    ///
+    GCFunctionInfo &getFunctionInfo(const Function &F);
+  };
+
+}
+
+#endif
diff --git a/final/include/llvm/CodeGen/GCMetadataPrinter.h b/final/include/llvm/CodeGen/GCMetadataPrinter.h
new file mode 100644
index 00000000000..17a26530000
--- /dev/null
+++ b/final/include/llvm/CodeGen/GCMetadataPrinter.h
@@ -0,0 +1,73 @@
+//===-- llvm/CodeGen/GCMetadataPrinter.h - Prints asm GC tables -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The abstract base class GCMetadataPrinter supports writing GC metadata tables
+// as assembly code. This is a separate class from GCStrategy in order to allow
+// users of the LLVM JIT to avoid linking with the AsmWriter.
+//
+// Subclasses of GCMetadataPrinter must be registered using the
+// GCMetadataPrinterRegistry. This is separate from the GCStrategy itself
+// because these subclasses are logically plugins for the AsmWriter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GCMETADATAPRINTER_H
+#define LLVM_CODEGEN_GCMETADATAPRINTER_H
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/Support/Registry.h"
+
+namespace llvm {
+
+  class GCMetadataPrinter;
+
+  /// GCMetadataPrinterRegistry - The GC assembly printer registry uses all the
+  /// defaults from Registry.
+  typedef Registry<GCMetadataPrinter> GCMetadataPrinterRegistry;
+
+  /// GCMetadataPrinter - Emits GC metadata as assembly code.
+  ///
+  class GCMetadataPrinter {
+  public:
+    typedef GCStrategy::list_type list_type;
+    typedef GCStrategy::iterator iterator;
+
+  private:
+    GCStrategy *S;
+
+    friend class AsmPrinter;
+
+  protected:
+    // May only be subclassed.
+    GCMetadataPrinter();
+
+    // Do not implement.
+    GCMetadataPrinter(const GCMetadataPrinter &);
+    GCMetadataPrinter &operator=(const GCMetadataPrinter &);
+
+  public:
+    GCStrategy &getStrategy() { return *S; }
+    const Module &getModule() const { return S->getModule(); }
+
+    /// begin/end - Iterate over the collected function metadata.
+    iterator begin() { return S->begin(); }
+    iterator end()   { return S->end();   }
+
+    /// beginAssembly/finishAssembly - Emit module metadata as assembly code.
+    virtual void beginAssembly(AsmPrinter &AP);
+
+    virtual void finishAssembly(AsmPrinter &AP);
+
+    virtual ~GCMetadataPrinter();
+  };
+
+}
+
+#endif
diff --git a/final/include/llvm/CodeGen/GCStrategy.h b/final/include/llvm/CodeGen/GCStrategy.h
new file mode 100644
index 00000000000..cd760dba92a
--- /dev/null
+++ b/final/include/llvm/CodeGen/GCStrategy.h
@@ -0,0 +1,142 @@
+//===-- llvm/CodeGen/GCStrategy.h - Garbage collection ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// GCStrategy coordinates code generation algorithms and implements some itself
+// in order to generate code compatible with a target code generator as
+// specified in a function's 'gc' attribute. Algorithms are enabled by setting
+// flags in a subclass's constructor, and some virtual methods can be
+// overridden.
+// 
+// When requested, the GCStrategy will be populated with data about each
+// function which uses it. Specifically:
+// 
+// - Safe points
+//   Garbage collection is generally only possible at certain points in code.
+//   GCStrategy can request that the collector insert such points:
+//
+//     - At and after any call to a subroutine
+//     - Before returning from the current function
+//     - Before backwards branches (loops)
+// 
+// - Roots
+//   When a reference to a GC-allocated object exists on the stack, it must be
+//   stored in an alloca registered with llvm.gcoot.
+//
+// This information can used to emit the metadata tables which are required by
+// the target garbage collector runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GCSTRATEGY_H
+#define LLVM_CODEGEN_GCSTRATEGY_H
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/Support/Registry.h"
+#include <string>
+
+namespace llvm {
+  
+  class GCStrategy;
+  
+  /// The GC strategy registry uses all the defaults from Registry.
+  /// 
+  typedef Registry<GCStrategy> GCRegistry;
+  
+  /// GCStrategy describes a garbage collector algorithm's code generation
+  /// requirements, and provides overridable hooks for those needs which cannot
+  /// be abstractly described.
+  class GCStrategy {
+  public:
+    typedef std::vector<GCFunctionInfo*> list_type;
+    typedef list_type::iterator iterator;
+    
+  private:
+    friend class GCModuleInfo;
+    const Module *M;
+    std::string Name;
+    
+    list_type Functions;
+    
+  protected:
+    unsigned NeededSafePoints; //< Bitmask of required safe points.
+    bool CustomReadBarriers;   //< Default is to insert loads.
+    bool CustomWriteBarriers;  //< Default is to insert stores.
+    bool CustomRoots;          //< Default is to pass through to backend.
+    bool InitRoots;            //< If set, roots are nulled during lowering.
+    bool UsesMetadata;         //< If set, backend must emit metadata tables.
+    
+  public:
+    GCStrategy();
+    
+    virtual ~GCStrategy();
+    
+    
+    /// getName - The name of the GC strategy, for debugging.
+    /// 
+    const std::string &getName() const { return Name; }
+
+    /// getModule - The module within which the GC strategy is operating.
+    /// 
+    const Module &getModule() const { return *M; }
+
+    /// needsSafePoitns - True if safe points of any kind are required. By
+    //                    default, none are recorded.
+    bool needsSafePoints() const { return NeededSafePoints != 0; }
+    
+    /// needsSafePoint(Kind) - True if the given kind of safe point is
+    //                          required. By default, none are recorded.
+    bool needsSafePoint(GC::PointKind Kind) const {
+      return (NeededSafePoints & 1 << Kind) != 0;
+    }
+    
+    /// customWriteBarrier - By default, write barriers are replaced with simple
+    ///                      store instructions. If true, then
+    ///                      performCustomLowering must instead lower them.
+    bool customWriteBarrier() const { return CustomWriteBarriers; }
+    
+    /// customReadBarrier - By default, read barriers are replaced with simple
+    ///                     load instructions. If true, then
+    ///                     performCustomLowering must instead lower them.
+    bool customReadBarrier() const { return CustomReadBarriers; }
+    
+    /// customRoots - By default, roots are left for the code generator so it
+    ///               can generate a stack map. If true, then
+    //                performCustomLowering must delete them.
+    bool customRoots() const { return CustomRoots; }
+    
+    /// initializeRoots - If set, gcroot intrinsics should initialize their
+    //                    allocas to null before the first use. This is
+    //                    necessary for most GCs and is enabled by default.
+    bool initializeRoots() const { return InitRoots; }
+    
+    /// usesMetadata - If set, appropriate metadata tables must be emitted by
+    ///                the back-end (assembler, JIT, or otherwise).
+    bool usesMetadata() const { return UsesMetadata; }
+    
+    /// begin/end - Iterators for function metadata.
+    /// 
+    iterator begin() { return Functions.begin(); }
+    iterator end()   { return Functions.end();   }
+
+    /// insertFunctionMetadata - Creates metadata for a function.
+    /// 
+    GCFunctionInfo *insertFunctionInfo(const Function &F);
+
+    /// initializeCustomLowering/performCustomLowering - If any of the actions
+    /// are set to custom, performCustomLowering must be overriden to transform
+    /// the corresponding actions to LLVM IR. initializeCustomLowering is
+    /// optional to override. These are the only GCStrategy methods through
+    /// which the LLVM IR can be modified.
+    virtual bool initializeCustomLowering(Module &F);
+    virtual bool performCustomLowering(Function &F);
+  };
+  
+}
+
+#endif
diff --git a/final/include/llvm/CodeGen/GCs.h b/final/include/llvm/CodeGen/GCs.h
new file mode 100644
index 00000000000..c407b616748
--- /dev/null
+++ b/final/include/llvm/CodeGen/GCs.h
@@ -0,0 +1,35 @@
+//===-- GCs.h - Garbage collector linkage hacks ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains hack functions to force linking in the GC components.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GCS_H
+#define LLVM_CODEGEN_GCS_H
+
+namespace llvm {
+  class GCStrategy;
+  class GCMetadataPrinter;
+  
+  /// FIXME: Collector instances are not useful on their own. These no longer
+  ///        serve any purpose except to link in the plugins.
+  
+  /// Creates an ocaml-compatible garbage collector.
+  void linkOcamlGC();
+  
+  /// Creates an ocaml-compatible metadata printer.
+  void linkOcamlGCPrinter();
+  
+  /// Creates a shadow stack garbage collector. This collector requires no code
+  /// generator support.
+  void linkShadowStackGC();
+}
+
+#endif
diff --git a/final/include/llvm/CodeGen/ISDOpcodes.h b/final/include/llvm/CodeGen/ISDOpcodes.h
new file mode 100644
index 00000000000..3da11c4a0e0
--- /dev/null
+++ b/final/include/llvm/CodeGen/ISDOpcodes.h
@@ -0,0 +1,799 @@
+//===-- llvm/CodeGen/ISDOpcodes.h - CodeGen opcodes -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares codegen opcodes and related utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ISDOPCODES_H
+#define LLVM_CODEGEN_ISDOPCODES_H
+
+namespace llvm {
+
+/// ISD namespace - This namespace contains an enum which represents all of the
+/// SelectionDAG node types and value types.
+///
+namespace ISD {
+
+  //===--------------------------------------------------------------------===//
+  /// ISD::NodeType enum - This enum defines the target-independent operators
+  /// for a SelectionDAG.
+  ///
+  /// Targets may also define target-dependent operator codes for SDNodes. For
+  /// example, on x86, these are the enum values in the X86ISD namespace.
+  /// Targets should aim to use target-independent operators to model their
+  /// instruction sets as much as possible, and only use target-dependent
+  /// operators when they have special requirements.
+  ///
+  /// Finally, during and after selection proper, SNodes may use special
+  /// operator codes that correspond directly with MachineInstr opcodes. These
+  /// are used to represent selected instructions. See the isMachineOpcode()
+  /// and getMachineOpcode() member functions of SDNode.
+  ///
+  enum NodeType {
+    // DELETED_NODE - This is an illegal value that is used to catch
+    // errors.  This opcode is not a legal opcode for any node.
+    DELETED_NODE,
+
+    // EntryToken - This is the marker used to indicate the start of the region.
+    EntryToken,
+
+    // TokenFactor - This node takes multiple tokens as input and produces a
+    // single token result.  This is used to represent the fact that the operand
+    // operators are independent of each other.
+    TokenFactor,
+
+    // AssertSext, AssertZext - These nodes record if a register contains a
+    // value that has already been zero or sign extended from a narrower type.
+    // These nodes take two operands.  The first is the node that has already
+    // been extended, and the second is a value type node indicating the width
+    // of the extension
+    AssertSext, AssertZext,
+
+    // Various leaf nodes.
+    BasicBlock, VALUETYPE, CONDCODE, Register,
+    Constant, ConstantFP,
+    GlobalAddress, GlobalTLSAddress, FrameIndex,
+    JumpTable, ConstantPool, ExternalSymbol, BlockAddress,
+
+    // The address of the GOT
+    GLOBAL_OFFSET_TABLE,
+
+    // FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and
+    // llvm.returnaddress on the DAG.  These nodes take one operand, the index
+    // of the frame or return address to return.  An index of zero corresponds
+    // to the current function's frame or return address, an index of one to the
+    // parent's frame or return address, and so on.
+    FRAMEADDR, RETURNADDR,
+
+    // FRAME_TO_ARGS_OFFSET - This node represents offset from frame pointer to
+    // first (possible) on-stack argument. This is needed for correct stack
+    // adjustment during unwind.
+    FRAME_TO_ARGS_OFFSET,
+
+    // RESULT, OUTCHAIN = EXCEPTIONADDR(INCHAIN) - This node represents the
+    // address of the exception block on entry to an landing pad block.
+    EXCEPTIONADDR,
+
+    // RESULT, OUTCHAIN = LSDAADDR(INCHAIN) - This node represents the
+    // address of the Language Specific Data Area for the enclosing function.
+    LSDAADDR,
+
+    // RESULT, OUTCHAIN = EHSELECTION(INCHAIN, EXCEPTION) - This node represents
+    // the selection index of the exception thrown.
+    EHSELECTION,
+
+    // OUTCHAIN = EH_RETURN(INCHAIN, OFFSET, HANDLER) - This node represents
+    // 'eh_return' gcc dwarf builtin, which is used to return from
+    // exception. The general meaning is: adjust stack by OFFSET and pass
+    // execution to HANDLER. Many platform-related details also :)
+    EH_RETURN,
+
+    // OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer)
+    // This corresponds to the eh.sjlj.setjmp intrinsic.
+    // It takes an input chain and a pointer to the jump buffer as inputs
+    // and returns an outchain.
+    EH_SJLJ_SETJMP,
+
+    // OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer)
+    // This corresponds to the eh.sjlj.longjmp intrinsic.
+    // It takes an input chain and a pointer to the jump buffer as inputs
+    // and returns an outchain.
+    EH_SJLJ_LONGJMP,
+
+    // OUTCHAIN = EH_SJLJ_DISPATCHSETUP(INCHAIN, context)
+    // This corresponds to the eh.sjlj.dispatchsetup intrinsic. It takes an
+    // input chain and a pointer to the sjlj function context as inputs and
+    // returns an outchain. By default, this does nothing. Targets can lower
+    // this to unwind setup code if needed.
+    EH_SJLJ_DISPATCHSETUP,
+
+    // TargetConstant* - Like Constant*, but the DAG does not do any folding,
+    // simplification, or lowering of the constant. They are used for constants
+    // which are known to fit in the immediate fields of their users, or for
+    // carrying magic numbers which are not values which need to be materialized
+    // in registers.
+    TargetConstant,
+    TargetConstantFP,
+
+    // TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or
+    // anything else with this node, and this is valid in the target-specific
+    // dag, turning into a GlobalAddress operand.
+    TargetGlobalAddress,
+    TargetGlobalTLSAddress,
+    TargetFrameIndex,
+    TargetJumpTable,
+    TargetConstantPool,
+    TargetExternalSymbol,
+    TargetBlockAddress,
+
+    /// RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...)
+    /// This node represents a target intrinsic function with no side effects.
+    /// The first operand is the ID number of the intrinsic from the
+    /// llvm::Intrinsic namespace.  The operands to the intrinsic follow.  The
+    /// node returns the result of the intrinsic.
+    INTRINSIC_WO_CHAIN,
+
+    /// RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...)
+    /// This node represents a target intrinsic function with side effects that
+    /// returns a result.  The first operand is a chain pointer.  The second is
+    /// the ID number of the intrinsic from the llvm::Intrinsic namespace.  The
+    /// operands to the intrinsic follow.  The node has two results, the result
+    /// of the intrinsic and an output chain.
+    INTRINSIC_W_CHAIN,
+
+    /// OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...)
+    /// This node represents a target intrinsic function with side effects that
+    /// does not return a result.  The first operand is a chain pointer.  The
+    /// second is the ID number of the intrinsic from the llvm::Intrinsic
+    /// namespace.  The operands to the intrinsic follow.
+    INTRINSIC_VOID,
+
+    // CopyToReg - This node has three operands: a chain, a register number to
+    // set to this value, and a value.
+    CopyToReg,
+
+    // CopyFromReg - This node indicates that the input value is a virtual or
+    // physical register that is defined outside of the scope of this
+    // SelectionDAG.  The register is available from the RegisterSDNode object.
+    CopyFromReg,
+
+    // UNDEF - An undefined node
+    UNDEF,
+
+    // EXTRACT_ELEMENT - This is used to get the lower or upper (determined by
+    // a Constant, which is required to be operand #1) half of the integer or
+    // float value specified as operand #0.  This is only for use before
+    // legalization, for values that will be broken into multiple registers.
+    EXTRACT_ELEMENT,
+
+    // BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.  Given
+    // two values of the same integer value type, this produces a value twice as
+    // big.  Like EXTRACT_ELEMENT, this can only be used before legalization.
+    BUILD_PAIR,
+
+    // MERGE_VALUES - This node takes multiple discrete operands and returns
+    // them all as its individual results.  This nodes has exactly the same
+    // number of inputs and outputs. This node is useful for some pieces of the
+    // code generator that want to think about a single node with multiple
+    // results, not multiple nodes.
+    MERGE_VALUES,
+
+    // Simple integer binary arithmetic operators.
+    ADD, SUB, MUL, SDIV, UDIV, SREM, UREM,
+
+    // SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing
+    // a signed/unsigned value of type i[2*N], and return the full value as
+    // two results, each of type iN.
+    SMUL_LOHI, UMUL_LOHI,
+
+    // SDIVREM/UDIVREM - Divide two integers and produce both a quotient and
+    // remainder result.
+    SDIVREM, UDIVREM,
+
+    // CARRY_FALSE - This node is used when folding other nodes,
+    // like ADDC/SUBC, which indicate the carry result is always false.
+    CARRY_FALSE,
+
+    // Carry-setting nodes for multiple precision addition and subtraction.
+    // These nodes take two operands of the same value type, and produce two
+    // results.  The first result is the normal add or sub result, the second
+    // result is the carry flag result.
+    ADDC, SUBC,
+
+    // Carry-using nodes for multiple precision addition and subtraction.  These
+    // nodes take three operands: The first two are the normal lhs and rhs to
+    // the add or sub, and the third is the input carry flag.  These nodes
+    // produce two results; the normal result of the add or sub, and the output
+    // carry flag.  These nodes both read and write a carry flag to allow them
+    // to them to be chained together for add and sub of arbitrarily large
+    // values.
+    ADDE, SUBE,
+
+    // RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
+    // These nodes take two operands: the normal LHS and RHS to the add. They
+    // produce two results: the normal result of the add, and a boolean that
+    // indicates if an overflow occured (*not* a flag, because it may be stored
+    // to memory, etc.).  If the type of the boolean is not i1 then the high
+    // bits conform to getBooleanContents.
+    // These nodes are generated from the llvm.[su]add.with.overflow intrinsics.
+    SADDO, UADDO,
+
+    // Same for subtraction
+    SSUBO, USUBO,
+
+    // Same for multiplication
+    SMULO, UMULO,
+
+    // Simple binary floating point operators.
+    FADD, FSUB, FMUL, FDIV, FREM,
+
+    // FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.  NOTE: This
+    // DAG node does not require that X and Y have the same type, just that they
+    // are both floating point.  X and the result must have the same type.
+    // FCOPYSIGN(f32, f64) is allowed.
+    FCOPYSIGN,
+
+    // INT = FGETSIGN(FP) - Return the sign bit of the specified floating point
+    // value as an integer 0/1 value.
+    FGETSIGN,
+
+    /// BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the
+    /// specified, possibly variable, elements.  The number of elements is
+    /// required to be a power of two.  The types of the operands must all be
+    /// the same and must match the vector element type, except that integer
+    /// types are allowed to be larger than the element type, in which case
+    /// the operands are implicitly truncated.
+    BUILD_VECTOR,
+
+    /// INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element
+    /// at IDX replaced with VAL.  If the type of VAL is larger than the vector
+    /// element type then VAL is truncated before replacement.
+    INSERT_VECTOR_ELT,
+
+    /// EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR
+    /// identified by the (potentially variable) element number IDX.  If the
+    /// return type is an integer type larger than the element type of the
+    /// vector, the result is extended to the width of the return type.
+    EXTRACT_VECTOR_ELT,
+
+    /// CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of
+    /// vector type with the same length and element type, this produces a
+    /// concatenated vector result value, with length equal to the sum of the
+    /// lengths of the input vectors.
+    CONCAT_VECTORS,
+
+    /// INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector
+    /// with VECTOR2 inserted into VECTOR1 at the (potentially
+    /// variable) element number IDX, which must be a multiple of the
+    /// VECTOR2 vector length.  The elements of VECTOR1 starting at
+    /// IDX are overwritten with VECTOR2.  Elements IDX through
+    /// vector_length(VECTOR2) must be valid VECTOR1 indices.
+    INSERT_SUBVECTOR,
+
+    /// EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an
+    /// vector value) starting with the element number IDX, which must be a
+    /// constant multiple of the result vector length.
+    EXTRACT_SUBVECTOR,
+
+    /// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as
+    /// VEC1/VEC2.  A VECTOR_SHUFFLE node also contains an array of constant int
+    /// values that indicate which value (or undef) each result element will
+    /// get.  These constant ints are accessible through the
+    /// ShuffleVectorSDNode class.  This is quite similar to the Altivec
+    /// 'vperm' instruction, except that the indices must be constants and are
+    /// in terms of the element size of VEC1/VEC2, not in terms of bytes.
+    VECTOR_SHUFFLE,
+
+    /// SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a
+    /// scalar value into element 0 of the resultant vector type.  The top
+    /// elements 1 to N-1 of the N-element vector are undefined.  The type
+    /// of the operand must match the vector element type, except when they
+    /// are integer types.  In this case the operand is allowed to be wider
+    /// than the vector element type, and is implicitly truncated to it.
+    SCALAR_TO_VECTOR,
+
+    // MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing
+    // an unsigned/signed value of type i[2*N], then return the top part.
+    MULHU, MULHS,
+
+    /// Bitwise operators - logical and, logical or, logical xor.
+    AND, OR, XOR,
+    
+    /// Shift and rotation operations.  After legalization, the type of the
+    /// shift amount is known to be TLI.getShiftAmountTy().  Before legalization
+    /// the shift amount can be any type, but care must be taken to ensure it is
+    /// large enough.  TLI.getShiftAmountTy() is i8 on some targets, but before
+    /// legalization, types like i1024 can occur and i8 doesn't have enough bits
+    /// to represent the shift amount.  By convention, DAGCombine and
+    /// SelectionDAGBuilder forces these shift amounts to i32 for simplicity.
+    ///
+    SHL, SRA, SRL, ROTL, ROTR,
+
+    /// Byte Swap and Counting operators.
+    BSWAP, CTTZ, CTLZ, CTPOP,
+
+    // Select(COND, TRUEVAL, FALSEVAL).  If the type of the boolean COND is not
+    // i1 then the high bits must conform to getBooleanContents.
+    SELECT,
+
+    // Select with condition operator - This selects between a true value and
+    // a false value (ops #2 and #3) based on the boolean result of comparing
+    // the lhs and rhs (ops #0 and #1) of a conditional expression with the
+    // condition code in op #4, a CondCodeSDNode.
+    SELECT_CC,
+
+    // SetCC operator - This evaluates to a true value iff the condition is
+    // true.  If the result value type is not i1 then the high bits conform
+    // to getBooleanContents.  The operands to this are the left and right
+    // operands to compare (ops #0, and #1) and the condition code to compare
+    // them with (op #2) as a CondCodeSDNode.
+    SETCC,
+
+    // RESULT = VSETCC(LHS, RHS, COND) operator - This evaluates to a vector of
+    // integer elements with all bits of the result elements set to true if the
+    // comparison is true or all cleared if the comparison is false.  The
+    // operands to this are the left and right operands to compare (LHS/RHS) and
+    // the condition code to compare them with (COND) as a CondCodeSDNode.
+    VSETCC,
+
+    // SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded
+    // integer shift operations, just like ADD/SUB_PARTS.  The operation
+    // ordering is:
+    //       [Lo,Hi] = op [LoLHS,HiLHS], Amt
+    SHL_PARTS, SRA_PARTS, SRL_PARTS,
+
+    // Conversion operators.  These are all single input single output
+    // operations.  For all of these, the result type must be strictly
+    // wider or narrower (depending on the operation) than the source
+    // type.
+
+    // SIGN_EXTEND - Used for integer types, replicating the sign bit
+    // into new bits.
+    SIGN_EXTEND,
+
+    // ZERO_EXTEND - Used for integer types, zeroing the new bits.
+    ZERO_EXTEND,
+
+    // ANY_EXTEND - Used for integer types.  The high bits are undefined.
+    ANY_EXTEND,
+
+    // TRUNCATE - Completely drop the high bits.
+    TRUNCATE,
+
+    // [SU]INT_TO_FP - These operators convert integers (whose interpreted sign
+    // depends on the first letter) to floating point.
+    SINT_TO_FP,
+    UINT_TO_FP,
+
+    // SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to
+    // sign extend a small value in a large integer register (e.g. sign
+    // extending the low 8 bits of a 32-bit register to fill the top 24 bits
+    // with the 7th bit).  The size of the smaller type is indicated by the 1th
+    // operand, a ValueType node.
+    SIGN_EXTEND_INREG,
+
+    /// FP_TO_[US]INT - Convert a floating point value to a signed or unsigned
+    /// integer.
+    FP_TO_SINT,
+    FP_TO_UINT,
+
+    /// X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type
+    /// down to the precision of the destination VT.  TRUNC is a flag, which is
+    /// always an integer that is zero or one.  If TRUNC is 0, this is a
+    /// normal rounding, if it is 1, this FP_ROUND is known to not change the
+    /// value of Y.
+    ///
+    /// The TRUNC = 1 case is used in cases where we know that the value will
+    /// not be modified by the node, because Y is not using any of the extra
+    /// precision of source type.  This allows certain transformations like
+    /// FP_EXTEND(FP_ROUND(X,1)) -> X which are not safe for
+    /// FP_EXTEND(FP_ROUND(X,0)) because the extra bits aren't removed.
+    FP_ROUND,
+
+    // FLT_ROUNDS_ - Returns current rounding mode:
+    // -1 Undefined
+    //  0 Round to 0
+    //  1 Round to nearest
+    //  2 Round to +inf
+    //  3 Round to -inf
+    FLT_ROUNDS_,
+
+    /// X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and
+    /// rounds it to a floating point value.  It then promotes it and returns it
+    /// in a register of the same size.  This operation effectively just
+    /// discards excess precision.  The type to round down to is specified by
+    /// the VT operand, a VTSDNode.
+    FP_ROUND_INREG,
+
+    /// X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
+    FP_EXTEND,
+
+    // BITCAST - This operator converts between integer, vector and FP
+    // values, as if the value was stored to memory with one type and loaded
+    // from the same address with the other type (or equivalently for vector
+    // format conversions, etc).  The source and result are required to have
+    // the same bit size (e.g.  f32 <-> i32).  This can also be used for
+    // int-to-int or fp-to-fp conversions, but that is a noop, deleted by
+    // getNode().
+    BITCAST,
+
+    // CONVERT_RNDSAT - This operator is used to support various conversions
+    // between various types (float, signed, unsigned and vectors of those
+    // types) with rounding and saturation. NOTE: Avoid using this operator as
+    // most target don't support it and the operator might be removed in the
+    // future. It takes the following arguments:
+    //   0) value
+    //   1) dest type (type to convert to)
+    //   2) src type (type to convert from)
+    //   3) rounding imm
+    //   4) saturation imm
+    //   5) ISD::CvtCode indicating the type of conversion to do
+    CONVERT_RNDSAT,
+
+    // FP16_TO_FP32, FP32_TO_FP16 - These operators are used to perform
+    // promotions and truncation for half-precision (16 bit) floating
+    // numbers. We need special nodes since FP16 is a storage-only type with
+    // special semantics of operations.
+    FP16_TO_FP32, FP32_TO_FP16,
+
+    // FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
+    // FLOG, FLOG2, FLOG10, FEXP, FEXP2,
+    // FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR - Perform various unary floating
+    // point operations. These are inspired by libm.
+    FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW,
+    FLOG, FLOG2, FLOG10, FEXP, FEXP2,
+    FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR,
+
+    // LOAD and STORE have token chains as their first operand, then the same
+    // operands as an LLVM load/store instruction, then an offset node that
+    // is added / subtracted from the base pointer to form the address (for
+    // indexed memory ops).
+    LOAD, STORE,
+
+    // DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned
+    // to a specified boundary.  This node always has two return values: a new
+    // stack pointer value and a chain. The first operand is the token chain,
+    // the second is the number of bytes to allocate, and the third is the
+    // alignment boundary.  The size is guaranteed to be a multiple of the stack
+    // alignment, and the alignment is guaranteed to be bigger than the stack
+    // alignment (if required) or 0 to get standard stack alignment.
+    DYNAMIC_STACKALLOC,
+
+    // Control flow instructions.  These all have token chains.
+
+    // BR - Unconditional branch.  The first operand is the chain
+    // operand, the second is the MBB to branch to.
+    BR,
+
+    // BRIND - Indirect branch.  The first operand is the chain, the second
+    // is the value to branch to, which must be of the same type as the target's
+    // pointer type.
+    BRIND,
+
+    // BR_JT - Jumptable branch. The first operand is the chain, the second
+    // is the jumptable index, the last one is the jumptable entry index.
+    BR_JT,
+
+    // BRCOND - Conditional branch.  The first operand is the chain, the
+    // second is the condition, the third is the block to branch to if the
+    // condition is true.  If the type of the condition is not i1, then the
+    // high bits must conform to getBooleanContents.
+    BRCOND,
+
+    // BR_CC - Conditional branch.  The behavior is like that of SELECT_CC, in
+    // that the condition is represented as condition code, and two nodes to
+    // compare, rather than as a combined SetCC node.  The operands in order are
+    // chain, cc, lhs, rhs, block to branch to if condition is true.
+    BR_CC,
+
+    // INLINEASM - Represents an inline asm block.  This node always has two
+    // return values: a chain and a flag result.  The inputs are as follows:
+    //   Operand #0   : Input chain.
+    //   Operand #1   : a ExternalSymbolSDNode with a pointer to the asm string.
+    //   Operand #2   : a MDNodeSDNode with the !srcloc metadata.
+    //   Operand #3   : HasSideEffect, IsAlignStack bits.
+    //   After this, it is followed by a list of operands with this format:
+    //     ConstantSDNode: Flags that encode whether it is a mem or not, the
+    //                     of operands that follow, etc.  See InlineAsm.h.
+    //     ... however many operands ...
+    //   Operand #last: Optional, an incoming flag.
+    //
+    // The variable width operands are required to represent target addressing
+    // modes as a single "operand", even though they may have multiple
+    // SDOperands.
+    INLINEASM,
+
+    // EH_LABEL - Represents a label in mid basic block used to track
+    // locations needed for debug and exception handling tables.  These nodes
+    // take a chain as input and return a chain.
+    EH_LABEL,
+
+    // STACKSAVE - STACKSAVE has one operand, an input chain.  It produces a
+    // value, the same type as the pointer type for the system, and an output
+    // chain.
+    STACKSAVE,
+
+    // STACKRESTORE has two operands, an input chain and a pointer to restore to
+    // it returns an output chain.
+    STACKRESTORE,
+
+    // CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of
+    // a call sequence, and carry arbitrary information that target might want
+    // to know.  The first operand is a chain, the rest are specified by the
+    // target and not touched by the DAG optimizers.
+    // CALLSEQ_START..CALLSEQ_END pairs may not be nested.
+    CALLSEQ_START,  // Beginning of a call sequence
+    CALLSEQ_END,    // End of a call sequence
+
+    // VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE,
+    // and the alignment. It returns a pair of values: the vaarg value and a
+    // new chain.
+    VAARG,
+
+    // VACOPY - VACOPY has five operands: an input chain, a destination pointer,
+    // a source pointer, a SRCVALUE for the destination, and a SRCVALUE for the
+    // source.
+    VACOPY,
+
+    // VAEND, VASTART - VAEND and VASTART have three operands: an input chain, a
+    // pointer, and a SRCVALUE.
+    VAEND, VASTART,
+
+    // SRCVALUE - This is a node type that holds a Value* that is used to
+    // make reference to a value in the LLVM IR.
+    SRCVALUE,
+
+    // MDNODE_SDNODE - This is a node that holdes an MDNode*, which is used to
+    // reference metadata in the IR.
+    MDNODE_SDNODE,
+
+    // PCMARKER - This corresponds to the pcmarker intrinsic.
+    PCMARKER,
+
+    // READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
+    // The only operand is a chain and a value and a chain are produced.  The
+    // value is the contents of the architecture specific cycle counter like
+    // register (or other high accuracy low latency clock source)
+    READCYCLECOUNTER,
+
+    // HANDLENODE node - Used as a handle for various purposes.
+    HANDLENODE,
+
+    // TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
+    // It takes as input a token chain, the pointer to the trampoline,
+    // the pointer to the nested function, the pointer to pass for the
+    // 'nest' parameter, a SRCVALUE for the trampoline and another for
+    // the nested function (allowing targets to access the original
+    // Function*).  It produces the result of the intrinsic and a token
+    // chain as output.
+    TRAMPOLINE,
+
+    // TRAP - Trapping instruction
+    TRAP,
+
+    // PREFETCH - This corresponds to a prefetch intrinsic. It takes chains are
+    // their first operand. The other operands are the address to prefetch,
+    // read / write specifier, and locality specifier.
+    PREFETCH,
+
+    // OUTCHAIN = MEMBARRIER(INCHAIN, load-load, load-store, store-load,
+    //                       store-store, device)
+    // This corresponds to the memory.barrier intrinsic.
+    // it takes an input chain, 4 operands to specify the type of barrier, an
+    // operand specifying if the barrier applies to device and uncached memory
+    // and produces an output chain.
+    MEMBARRIER,
+
+    // Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap)
+    // this corresponds to the atomic.lcs intrinsic.
+    // cmp is compared to *ptr, and if equal, swap is stored in *ptr.
+    // the return is always the original value in *ptr
+    ATOMIC_CMP_SWAP,
+
+    // Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt)
+    // this corresponds to the atomic.swap intrinsic.
+    // amt is stored to *ptr atomically.
+    // the return is always the original value in *ptr
+    ATOMIC_SWAP,
+
+    // Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN, ptr, amt)
+    // this corresponds to the atomic.load.[OpName] intrinsic.
+    // op(*ptr, amt) is stored to *ptr atomically.
+    // the return is always the original value in *ptr
+    ATOMIC_LOAD_ADD,
+    ATOMIC_LOAD_SUB,
+    ATOMIC_LOAD_AND,
+    ATOMIC_LOAD_OR,
+    ATOMIC_LOAD_XOR,
+    ATOMIC_LOAD_NAND,
+    ATOMIC_LOAD_MIN,
+    ATOMIC_LOAD_MAX,
+    ATOMIC_LOAD_UMIN,
+    ATOMIC_LOAD_UMAX,
+
+    /// BUILTIN_OP_END - This must be the last enum value in this list.
+    /// The target-specific pre-isel opcode values start here.
+    BUILTIN_OP_END
+  };
+
+  /// FIRST_TARGET_MEMORY_OPCODE - Target-specific pre-isel operations
+  /// which do not reference a specific memory location should be less than
+  /// this value. Those that do must not be less than this value, and can
+  /// be used with SelectionDAG::getMemIntrinsicNode.
+  static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+150;
+
+  //===--------------------------------------------------------------------===//
+  /// MemIndexedMode enum - This enum defines the load / store indexed
+  /// addressing modes.
+  ///
+  /// UNINDEXED    "Normal" load / store. The effective address is already
+  ///              computed and is available in the base pointer. The offset
+  ///              operand is always undefined. In addition to producing a
+  ///              chain, an unindexed load produces one value (result of the
+  ///              load); an unindexed store does not produce a value.
+  ///
+  /// PRE_INC      Similar to the unindexed mode where the effective address is
+  /// PRE_DEC      the value of the base pointer add / subtract the offset.
+  ///              It considers the computation as being folded into the load /
+  ///              store operation (i.e. the load / store does the address
+  ///              computation as well as performing the memory transaction).
+  ///              The base operand is always undefined. In addition to
+  ///              producing a chain, pre-indexed load produces two values
+  ///              (result of the load and the result of the address
+  ///              computation); a pre-indexed store produces one value (result
+  ///              of the address computation).
+  ///
+  /// POST_INC     The effective address is the value of the base pointer. The
+  /// POST_DEC     value of the offset operand is then added to / subtracted
+  ///              from the base after memory transaction. In addition to
+  ///              producing a chain, post-indexed load produces two values
+  ///              (the result of the load and the result of the base +/- offset
+  ///              computation); a post-indexed store produces one value (the
+  ///              the result of the base +/- offset computation).
+  enum MemIndexedMode {
+    UNINDEXED = 0,
+    PRE_INC,
+    PRE_DEC,
+    POST_INC,
+    POST_DEC,
+    LAST_INDEXED_MODE
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// LoadExtType enum - This enum defines the three variants of LOADEXT
+  /// (load with extension).
+  ///
+  /// SEXTLOAD loads the integer operand and sign extends it to a larger
+  ///          integer result type.
+  /// ZEXTLOAD loads the integer operand and zero extends it to a larger
+  ///          integer result type.
+  /// EXTLOAD  is used for two things: floating point extending loads and
+  ///          integer extending loads [the top bits are undefined].
+  enum LoadExtType {
+    NON_EXTLOAD = 0,
+    EXTLOAD,
+    SEXTLOAD,
+    ZEXTLOAD,
+    LAST_LOADEXT_TYPE
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// ISD::CondCode enum - These are ordered carefully to make the bitfields
+  /// below work out, when considering SETFALSE (something that never exists
+  /// dynamically) as 0.  "U" -> Unsigned (for integer operands) or Unordered
+  /// (for floating point), "L" -> Less than, "G" -> Greater than, "E" -> Equal
+  /// to.  If the "N" column is 1, the result of the comparison is undefined if
+  /// the input is a NAN.
+  ///
+  /// All of these (except for the 'always folded ops') should be handled for
+  /// floating point.  For integer, only the SETEQ,SETNE,SETLT,SETLE,SETGT,
+  /// SETGE,SETULT,SETULE,SETUGT, and SETUGE opcodes are used.
+  ///
+  /// Note that these are laid out in a specific order to allow bit-twiddling
+  /// to transform conditions.
+  enum CondCode {
+    // Opcode          N U L G E       Intuitive operation
+    SETFALSE,      //    0 0 0 0       Always false (always folded)
+    SETOEQ,        //    0 0 0 1       True if ordered and equal
+    SETOGT,        //    0 0 1 0       True if ordered and greater than
+    SETOGE,        //    0 0 1 1       True if ordered and greater than or equal
+    SETOLT,        //    0 1 0 0       True if ordered and less than
+    SETOLE,        //    0 1 0 1       True if ordered and less than or equal
+    SETONE,        //    0 1 1 0       True if ordered and operands are unequal
+    SETO,          //    0 1 1 1       True if ordered (no nans)
+    SETUO,         //    1 0 0 0       True if unordered: isnan(X) | isnan(Y)
+    SETUEQ,        //    1 0 0 1       True if unordered or equal
+    SETUGT,        //    1 0 1 0       True if unordered or greater than
+    SETUGE,        //    1 0 1 1       True if unordered, greater than, or equal
+    SETULT,        //    1 1 0 0       True if unordered or less than
+    SETULE,        //    1 1 0 1       True if unordered, less than, or equal
+    SETUNE,        //    1 1 1 0       True if unordered or not equal
+    SETTRUE,       //    1 1 1 1       Always true (always folded)
+    // Don't care operations: undefined if the input is a nan.
+    SETFALSE2,     //  1 X 0 0 0       Always false (always folded)
+    SETEQ,         //  1 X 0 0 1       True if equal
+    SETGT,         //  1 X 0 1 0       True if greater than
+    SETGE,         //  1 X 0 1 1       True if greater than or equal
+    SETLT,         //  1 X 1 0 0       True if less than
+    SETLE,         //  1 X 1 0 1       True if less than or equal
+    SETNE,         //  1 X 1 1 0       True if not equal
+    SETTRUE2,      //  1 X 1 1 1       Always true (always folded)
+
+    SETCC_INVALID       // Marker value.
+  };
+
+  /// isSignedIntSetCC - Return true if this is a setcc instruction that
+  /// performs a signed comparison when used with integer operands.
+  inline bool isSignedIntSetCC(CondCode Code) {
+    return Code == SETGT || Code == SETGE || Code == SETLT || Code == SETLE;
+  }
+
+  /// isUnsignedIntSetCC - Return true if this is a setcc instruction that
+  /// performs an unsigned comparison when used with integer operands.
+  inline bool isUnsignedIntSetCC(CondCode Code) {
+    return Code == SETUGT || Code == SETUGE || Code == SETULT || Code == SETULE;
+  }
+
+  /// isTrueWhenEqual - Return true if the specified condition returns true if
+  /// the two operands to the condition are equal.  Note that if one of the two
+  /// operands is a NaN, this value is meaningless.
+  inline bool isTrueWhenEqual(CondCode Cond) {
+    return ((int)Cond & 1) != 0;
+  }
+
+  /// getUnorderedFlavor - This function returns 0 if the condition is always
+  /// false if an operand is a NaN, 1 if the condition is always true if the
+  /// operand is a NaN, and 2 if the condition is undefined if the operand is a
+  /// NaN.
+  inline unsigned getUnorderedFlavor(CondCode Cond) {
+    return ((int)Cond >> 3) & 3;
+  }
+
+  /// getSetCCInverse - Return the operation corresponding to !(X op Y), where
+  /// 'op' is a valid SetCC operation.
+  CondCode getSetCCInverse(CondCode Operation, bool isInteger);
+
+  /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
+  /// when given the operation for (X op Y).
+  CondCode getSetCCSwappedOperands(CondCode Operation);
+
+  /// getSetCCOrOperation - Return the result of a logical OR between different
+  /// comparisons of identical values: ((X op1 Y) | (X op2 Y)).  This
+  /// function returns SETCC_INVALID if it is not possible to represent the
+  /// resultant comparison.
+  CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, bool isInteger);
+
+  /// getSetCCAndOperation - Return the result of a logical AND between
+  /// different comparisons of identical values: ((X op1 Y) & (X op2 Y)).  This
+  /// function returns SETCC_INVALID if it is not possible to represent the
+  /// resultant comparison.
+  CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, bool isInteger);
+
+  //===--------------------------------------------------------------------===//
+  /// CvtCode enum - This enum defines the various converts CONVERT_RNDSAT
+  /// supports.
+  enum CvtCode {
+    CVT_FF,     // Float from Float
+    CVT_FS,     // Float from Signed
+    CVT_FU,     // Float from Unsigned
+    CVT_SF,     // Signed from Float
+    CVT_UF,     // Unsigned from Float
+    CVT_SS,     // Signed from Signed
+    CVT_SU,     // Signed from Unsigned
+    CVT_US,     // Unsigned from Signed
+    CVT_UU,     // Unsigned from Unsigned
+    CVT_INVALID // Marker - Invalid opcode
+  };
+
+} // end llvm::ISD namespace
+
+} // end llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/IntrinsicLowering.h b/final/include/llvm/CodeGen/IntrinsicLowering.h
new file mode 100644
index 00000000000..767b6662254
--- /dev/null
+++ b/final/include/llvm/CodeGen/IntrinsicLowering.h
@@ -0,0 +1,59 @@
+//===-- IntrinsicLowering.h - Intrinsic Function Lowering -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the IntrinsicLowering interface.  This interface allows
+// addition of domain-specific or front-end specific intrinsics to LLVM without
+// having to modify all of the C backend or interpreter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_INTRINSICLOWERING_H
+#define LLVM_CODEGEN_INTRINSICLOWERING_H
+
+#include "llvm/Intrinsics.h"
+
+namespace llvm {
+  class CallInst;
+  class Module;
+  class TargetData;
+
+  class IntrinsicLowering {
+    const TargetData& TD;
+
+    
+    bool Warned;
+  public:
+    explicit IntrinsicLowering(const TargetData &td) :
+      TD(td), Warned(false) {}
+
+    /// AddPrototypes - This method, if called, causes all of the prototypes
+    /// that might be needed by an intrinsic lowering implementation to be
+    /// inserted into the module specified.
+    void AddPrototypes(Module &M);
+
+    /// LowerIntrinsicCall - This method replaces a call with the LLVM function
+    /// which should be used to implement the specified intrinsic function call.
+    /// If an intrinsic function must be implemented by the code generator 
+    /// (such as va_start), this function should print a message and abort.
+    ///
+    /// Otherwise, if an intrinsic function call can be lowered, the code to
+    /// implement it (often a call to a non-intrinsic function) is inserted
+    /// _after_ the call instruction and the call is deleted.  The caller must
+    /// be capable of handling this kind of change.
+    ///
+    void LowerIntrinsicCall(CallInst *CI);
+
+    /// LowerToByteSwap - Replace a call instruction into a call to bswap
+    /// intrinsic. Return false if it has determined the call is not a
+    /// simple integer bswap.
+    static bool LowerToByteSwap(CallInst *CI);
+  };
+}
+
+#endif
diff --git a/final/include/llvm/CodeGen/JITCodeEmitter.h b/final/include/llvm/CodeGen/JITCodeEmitter.h
new file mode 100644
index 00000000000..fea85230515
--- /dev/null
+++ b/final/include/llvm/CodeGen/JITCodeEmitter.h
@@ -0,0 +1,343 @@
+//===-- llvm/CodeGen/JITCodeEmitter.h - Code emission ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an abstract interface that is used by the machine code
+// emission framework to output the code.  This allows machine code emission to
+// be separated from concerns such as resolution of call targets, and where the
+// machine code will be written (memory or disk, f.e.).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_JITCODEEMITTER_H
+#define LLVM_CODEGEN_JITCODEEMITTER_H
+
+#include <string>
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/ADT/DenseMap.h"
+
+using namespace std;
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineConstantPool;
+class MachineJumpTableInfo;
+class MachineFunction;
+class MachineModuleInfo;
+class MachineRelocation;
+class Value;
+class GlobalValue;
+class Function;
+  
+/// JITCodeEmitter - This class defines two sorts of methods: those for
+/// emitting the actual bytes of machine code, and those for emitting auxillary
+/// structures, such as jump tables, relocations, etc.
+///
+/// Emission of machine code is complicated by the fact that we don't (in
+/// general) know the size of the machine code that we're about to emit before
+/// we emit it.  As such, we preallocate a certain amount of memory, and set the
+/// BufferBegin/BufferEnd pointers to the start and end of the buffer.  As we
+/// emit machine instructions, we advance the CurBufferPtr to indicate the
+/// location of the next byte to emit.  In the case of a buffer overflow (we
+/// need to emit more machine code than we have allocated space for), the
+/// CurBufferPtr will saturate to BufferEnd and ignore stores.  Once the entire
+/// function has been emitted, the overflow condition is checked, and if it has
+/// occurred, more memory is allocated, and we reemit the code into it.
+/// 
+class JITCodeEmitter : public MachineCodeEmitter {
+public:
+  virtual ~JITCodeEmitter() {}
+
+  /// startFunction - This callback is invoked when the specified function is
+  /// about to be code generated.  This initializes the BufferBegin/End/Ptr
+  /// fields.
+  ///
+  virtual void startFunction(MachineFunction &F) = 0;
+
+  /// finishFunction - This callback is invoked when the specified function has
+  /// finished code generation.  If a buffer overflow has occurred, this method
+  /// returns true (the callee is required to try again), otherwise it returns
+  /// false.
+  ///
+  virtual bool finishFunction(MachineFunction &F) = 0;
+  
+  /// allocIndirectGV - Allocates and fills storage for an indirect
+  /// GlobalValue, and returns the address.
+  virtual void *allocIndirectGV(const GlobalValue *GV,
+                                const uint8_t *Buffer, size_t Size,
+                                unsigned Alignment) = 0;
+
+  /// emitByte - This callback is invoked when a byte needs to be written to the
+  /// output stream.
+  ///
+  void emitByte(uint8_t B) {
+    if (CurBufferPtr != BufferEnd)
+      *CurBufferPtr++ = B;
+  }
+
+  /// emitWordLE - This callback is invoked when a 32-bit word needs to be
+  /// written to the output stream in little-endian format.
+  ///
+  void emitWordLE(uint32_t W) {
+    if (4 <= BufferEnd-CurBufferPtr) {
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+    } else {
+      CurBufferPtr = BufferEnd;
+    }
+  }
+  
+  /// emitWordBE - This callback is invoked when a 32-bit word needs to be
+  /// written to the output stream in big-endian format.
+  ///
+  void emitWordBE(uint32_t W) {
+    if (4 <= BufferEnd-CurBufferPtr) {
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
+    } else {
+      CurBufferPtr = BufferEnd;
+    }
+  }
+
+  /// emitDWordLE - This callback is invoked when a 64-bit word needs to be
+  /// written to the output stream in little-endian format.
+  ///
+  void emitDWordLE(uint64_t W) {
+    if (8 <= BufferEnd-CurBufferPtr) {
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 32);
+      *CurBufferPtr++ = (uint8_t)(W >> 40);
+      *CurBufferPtr++ = (uint8_t)(W >> 48);
+      *CurBufferPtr++ = (uint8_t)(W >> 56);
+    } else {
+      CurBufferPtr = BufferEnd;
+    }
+  }
+  
+  /// emitDWordBE - This callback is invoked when a 64-bit word needs to be
+  /// written to the output stream in big-endian format.
+  ///
+  void emitDWordBE(uint64_t W) {
+    if (8 <= BufferEnd-CurBufferPtr) {
+      *CurBufferPtr++ = (uint8_t)(W >> 56);
+      *CurBufferPtr++ = (uint8_t)(W >> 48);
+      *CurBufferPtr++ = (uint8_t)(W >> 40);
+      *CurBufferPtr++ = (uint8_t)(W >> 32);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
+    } else {
+      CurBufferPtr = BufferEnd;
+    }
+  }
+
+  /// emitAlignment - Move the CurBufferPtr pointer up to the specified
+  /// alignment (saturated to BufferEnd of course).
+  void emitAlignment(unsigned Alignment) {
+    if (Alignment == 0) Alignment = 1;
+    uint8_t *NewPtr = (uint8_t*)RoundUpToAlignment((uintptr_t)CurBufferPtr,
+                                                   Alignment);
+    CurBufferPtr = std::min(NewPtr, BufferEnd);
+  }
+
+  /// emitAlignmentWithFill - Similar to emitAlignment, except that the
+  /// extra bytes are filled with the provided byte.
+  void emitAlignmentWithFill(unsigned Alignment, uint8_t Fill) {
+    if (Alignment == 0) Alignment = 1;
+    uint8_t *NewPtr = (uint8_t*)RoundUpToAlignment((uintptr_t)CurBufferPtr,
+                                                   Alignment);
+    // Fail if we don't have room.
+    if (NewPtr > BufferEnd) {
+      CurBufferPtr = BufferEnd;
+      return;
+    }
+    while (CurBufferPtr < NewPtr) {
+      *CurBufferPtr++ = Fill;
+    }
+  }
+
+  /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
+  /// written to the output stream.
+  void emitULEB128Bytes(uint64_t Value, unsigned PadTo = 0) {
+    do {
+      uint8_t Byte = Value & 0x7f;
+      Value >>= 7;
+      if (Value || PadTo != 0) Byte |= 0x80;
+      emitByte(Byte);
+    } while (Value);
+
+    if (PadTo) {
+      do {
+        uint8_t Byte = (PadTo > 1) ? 0x80 : 0x0;
+        emitByte(Byte);
+      } while (--PadTo);
+    }
+  }
+  
+  /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
+  /// written to the output stream.
+  void emitSLEB128Bytes(int64_t Value) {
+    int32_t Sign = Value >> (8 * sizeof(Value) - 1);
+    bool IsMore;
+  
+    do {
+      uint8_t Byte = Value & 0x7f;
+      Value >>= 7;
+      IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+      if (IsMore) Byte |= 0x80;
+      emitByte(Byte);
+    } while (IsMore);
+  }
+
+  /// emitString - This callback is invoked when a String needs to be
+  /// written to the output stream.
+  void emitString(const std::string &String) {
+    for (unsigned i = 0, N = static_cast<unsigned>(String.size());
+         i < N; ++i) {
+      uint8_t C = String[i];
+      emitByte(C);
+    }
+    emitByte(0);
+  }
+  
+  /// emitInt32 - Emit a int32 directive.
+  void emitInt32(uint32_t Value) {
+    if (4 <= BufferEnd-CurBufferPtr) {
+      *((uint32_t*)CurBufferPtr) = Value;
+      CurBufferPtr += 4;
+    } else {
+      CurBufferPtr = BufferEnd;
+    }
+  }
+
+  /// emitInt64 - Emit a int64 directive.
+  void emitInt64(uint64_t Value) {
+    if (8 <= BufferEnd-CurBufferPtr) {
+      *((uint64_t*)CurBufferPtr) = Value;
+      CurBufferPtr += 8;
+    } else {
+      CurBufferPtr = BufferEnd;
+    }
+  }
+  
+  /// emitInt32At - Emit the Int32 Value in Addr.
+  void emitInt32At(uintptr_t *Addr, uintptr_t Value) {
+    if (Addr >= (uintptr_t*)BufferBegin && Addr < (uintptr_t*)BufferEnd)
+      (*(uint32_t*)Addr) = (uint32_t)Value;
+  }
+  
+  /// emitInt64At - Emit the Int64 Value in Addr.
+  void emitInt64At(uintptr_t *Addr, uintptr_t Value) {
+    if (Addr >= (uintptr_t*)BufferBegin && Addr < (uintptr_t*)BufferEnd)
+      (*(uint64_t*)Addr) = (uint64_t)Value;
+  }
+  
+  
+  /// emitLabel - Emits a label
+  virtual void emitLabel(MCSymbol *Label) = 0;
+
+  /// allocateSpace - Allocate a block of space in the current output buffer,
+  /// returning null (and setting conditions to indicate buffer overflow) on
+  /// failure.  Alignment is the alignment in bytes of the buffer desired.
+  virtual void *allocateSpace(uintptr_t Size, unsigned Alignment) {
+    emitAlignment(Alignment);
+    void *Result;
+    
+    // Check for buffer overflow.
+    if (Size >= (uintptr_t)(BufferEnd-CurBufferPtr)) {
+      CurBufferPtr = BufferEnd;
+      Result = 0;
+    } else {
+      // Allocate the space.
+      Result = CurBufferPtr;
+      CurBufferPtr += Size;
+    }
+    
+    return Result;
+  }
+
+  /// allocateGlobal - Allocate memory for a global.  Unlike allocateSpace,
+  /// this method does not allocate memory in the current output buffer,
+  /// because a global may live longer than the current function.
+  virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment) = 0;
+
+  /// StartMachineBasicBlock - This should be called by the target when a new
+  /// basic block is about to be emitted.  This way the MCE knows where the
+  /// start of the block is, and can implement getMachineBasicBlockAddress.
+  virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) = 0;
+  
+  /// getCurrentPCValue - This returns the address that the next emitted byte
+  /// will be output to.
+  ///
+  virtual uintptr_t getCurrentPCValue() const {
+    return (uintptr_t)CurBufferPtr;
+  }
+
+  /// getCurrentPCOffset - Return the offset from the start of the emitted
+  /// buffer that we are currently writing to.
+  uintptr_t getCurrentPCOffset() const {
+    return CurBufferPtr-BufferBegin;
+  }
+
+  /// earlyResolveAddresses - True if the code emitter can use symbol addresses 
+  /// during code emission time. The JIT is capable of doing this because it
+  /// creates jump tables or constant pools in memory on the fly while the
+  /// object code emitters rely on a linker to have real addresses and should
+  /// use relocations instead.
+  bool earlyResolveAddresses() const { return true; }
+
+  /// addRelocation - Whenever a relocatable address is needed, it should be
+  /// noted with this interface.
+  virtual void addRelocation(const MachineRelocation &MR) = 0;
+  
+  /// FIXME: These should all be handled with relocations!
+  
+  /// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
+  /// the constant pool that was last emitted with the emitConstantPool method.
+  ///
+  virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const = 0;
+
+  /// getJumpTableEntryAddress - Return the address of the jump table with index
+  /// 'Index' in the function that last called initJumpTableInfo.
+  ///
+  virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const = 0;
+  
+  /// getMachineBasicBlockAddress - Return the address of the specified
+  /// MachineBasicBlock, only usable after the label for the MBB has been
+  /// emitted.
+  ///
+  virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const= 0;
+
+  /// getLabelAddress - Return the address of the specified Label, only usable
+  /// after the Label has been emitted.
+  ///
+  virtual uintptr_t getLabelAddress(MCSymbol *Label) const = 0;
+  
+  /// Specifies the MachineModuleInfo object. This is used for exception handling
+  /// purposes.
+  virtual void setModuleInfo(MachineModuleInfo* Info) = 0;
+
+  /// getLabelLocations - Return the label locations map of the label IDs to
+  /// their address.
+  virtual DenseMap<MCSymbol*, uintptr_t> *getLabelLocations() { return 0; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/LatencyPriorityQueue.h b/final/include/llvm/CodeGen/LatencyPriorityQueue.h
new file mode 100644
index 00000000000..1ed2547ca6c
--- /dev/null
+++ b/final/include/llvm/CodeGen/LatencyPriorityQueue.h
@@ -0,0 +1,100 @@
+//===---- LatencyPriorityQueue.h - A latency-oriented priority queue ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the LatencyPriorityQueue class, which is a
+// SchedulingPriorityQueue that schedules using latency information to
+// reduce the length of the critical path through the basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LATENCY_PRIORITY_QUEUE_H
+#define LATENCY_PRIORITY_QUEUE_H
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+
+namespace llvm {
+  class LatencyPriorityQueue;
+
+  /// Sorting functions for the Available queue.
+  struct latency_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+    LatencyPriorityQueue *PQ;
+    explicit latency_sort(LatencyPriorityQueue *pq) : PQ(pq) {}
+
+    bool operator()(const SUnit* left, const SUnit* right) const;
+  };
+
+  class LatencyPriorityQueue : public SchedulingPriorityQueue {
+    // SUnits - The SUnits for the current graph.
+    std::vector<SUnit> *SUnits;
+
+    /// NumNodesSolelyBlocking - This vector contains, for every node in the
+    /// Queue, the number of nodes that the node is the sole unscheduled
+    /// predecessor for.  This is used as a tie-breaker heuristic for better
+    /// mobility.
+    std::vector<unsigned> NumNodesSolelyBlocking;
+
+    /// Queue - The queue.
+    std::vector<SUnit*> Queue;
+    latency_sort Picker;
+
+  public:
+    LatencyPriorityQueue() : Picker(this) {
+    }
+
+    bool isBottomUp() const { return false; }
+
+    void initNodes(std::vector<SUnit> &sunits) {
+      SUnits = &sunits;
+      NumNodesSolelyBlocking.resize(SUnits->size(), 0);
+    }
+
+    void addNode(const SUnit *SU) {
+      NumNodesSolelyBlocking.resize(SUnits->size(), 0);
+    }
+
+    void updateNode(const SUnit *SU) {
+    }
+
+    void releaseState() {
+      SUnits = 0;
+    }
+
+    unsigned getLatency(unsigned NodeNum) const {
+      assert(NodeNum < (*SUnits).size());
+      return (*SUnits)[NodeNum].getHeight();
+    }
+
+    unsigned getNumSolelyBlockNodes(unsigned NodeNum) const {
+      assert(NodeNum < NumNodesSolelyBlocking.size());
+      return NumNodesSolelyBlocking[NodeNum];
+    }
+
+    bool empty() const { return Queue.empty(); }
+
+    virtual void push(SUnit *U);
+
+    virtual SUnit *pop();
+
+    virtual void remove(SUnit *SU);
+
+    virtual void dump(ScheduleDAG* DAG) const;
+
+    // ScheduledNode - As nodes are scheduled, we look to see if there are any
+    // successor nodes that have a single unscheduled predecessor.  If so, that
+    // single predecessor has a higher priority, since scheduling it will make
+    // the node available.
+    void ScheduledNode(SUnit *Node);
+
+private:
+    void AdjustPriorityOfUnscheduledPreds(SUnit *SU);
+    SUnit *getSingleUnscheduledPred(SUnit *SU);
+  };
+}
+
+#endif
diff --git a/final/include/llvm/CodeGen/LinkAllAsmWriterComponents.h b/final/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
new file mode 100644
index 00000000000..7d1b1fe477a
--- /dev/null
+++ b/final/include/llvm/CodeGen/LinkAllAsmWriterComponents.h
@@ -0,0 +1,37 @@
+//===- llvm/Codegen/LinkAllAsmWriterComponents.h ----------------*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file pulls in all assembler writer related passes for tools like
+// llc that need this functionality.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LINKALLASMWRITERCOMPONENTS_H
+#define LLVM_CODEGEN_LINKALLASMWRITERCOMPONENTS_H
+
+#include "llvm/CodeGen/GCs.h"
+#include <cstdlib>
+
+namespace {
+  struct ForceAsmWriterLinking {
+    ForceAsmWriterLinking() {
+      // We must reference the plug-ins in such a way that compilers will not
+      // delete it all as dead code, even with whole program optimization,
+      // yet is effectively a NO-OP. As the compiler isn't smart enough
+      // to know that getenv() never returns -1, this will do the job.
+      if (std::getenv("bar") != (char*) -1)
+        return;
+
+      llvm::linkOcamlGCPrinter();
+
+    }
+  } ForceAsmWriterLinking; // Force link by creating a global definition.
+}
+
+#endif // LLVM_CODEGEN_LINKALLASMWRITERCOMPONENTS_H
diff --git a/final/include/llvm/CodeGen/LinkAllCodegenComponents.h b/final/include/llvm/CodeGen/LinkAllCodegenComponents.h
new file mode 100644
index 00000000000..c931261f633
--- /dev/null
+++ b/final/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -0,0 +1,59 @@
+//===- llvm/Codegen/LinkAllCodegenComponents.h ------------------*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file pulls in all codegen related passes for tools like lli and
+// llc that need this functionality.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LINKALLCODEGENCOMPONENTS_H
+#define LLVM_CODEGEN_LINKALLCODEGENCOMPONENTS_H
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cstdlib>
+
+namespace {
+  struct ForceCodegenLinking {
+    ForceCodegenLinking() {
+      // We must reference the passes in such a way that compilers will not
+      // delete it all as dead code, even with whole program optimization,
+      // yet is effectively a NO-OP. As the compiler isn't smart enough
+      // to know that getenv() never returns -1, this will do the job.
+      if (std::getenv("bar") != (char*) -1)
+        return;
+
+      (void) llvm::createDeadMachineInstructionElimPass();
+
+      (void) llvm::createFastRegisterAllocator();
+      (void) llvm::createBasicRegisterAllocator();
+      (void) llvm::createLinearScanRegisterAllocator();
+      (void) llvm::createGreedyRegisterAllocator();
+      (void) llvm::createDefaultPBQPRegisterAllocator();
+
+      (void) llvm::createSimpleRegisterCoalescer();
+      
+      llvm::linkOcamlGC();
+      llvm::linkShadowStackGC();
+      
+      (void) llvm::createBURRListDAGScheduler(NULL, llvm::CodeGenOpt::Default);
+      (void) llvm::createTDRRListDAGScheduler(NULL, llvm::CodeGenOpt::Default);
+      (void) llvm::createSourceListDAGScheduler(NULL,llvm::CodeGenOpt::Default);
+      (void) llvm::createHybridListDAGScheduler(NULL,llvm::CodeGenOpt::Default);
+      (void) llvm::createTDListDAGScheduler(NULL, llvm::CodeGenOpt::Default);
+      (void) llvm::createFastDAGScheduler(NULL, llvm::CodeGenOpt::Default);
+      (void) llvm::createDefaultScheduler(NULL, llvm::CodeGenOpt::Default);
+
+    }
+  } ForceCodegenLinking; // Force link by creating a global definition.
+}
+
+#endif
diff --git a/final/include/llvm/CodeGen/LiveInterval.h b/final/include/llvm/CodeGen/LiveInterval.h
new file mode 100644
index 00000000000..427af879609
--- /dev/null
+++ b/final/include/llvm/CodeGen/LiveInterval.h
@@ -0,0 +1,578 @@
+//===-- llvm/CodeGen/LiveInterval.h - Interval representation ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveRange and LiveInterval classes.  Given some
+// numbering of each the machine instructions an interval [i, j) is said to be a
+// live interval for register v if there is no instruction with number j' >= j
+// such that v is live at j' and there is no instruction with number i' < i such
+// that v is live at i'. In this implementation intervals can have holes,
+// i.e. an interval might look like [1,20), [50,65), [1000,1001).  Each
+// individual range is represented as an instance of LiveRange, and the whole
+// interval is represented as an instance of LiveInterval.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEINTERVAL_H
+#define LLVM_CODEGEN_LIVEINTERVAL_H
+
+#include "llvm/ADT/IntEqClasses.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include <cassert>
+#include <climits>
+
+namespace llvm {
+  class LiveIntervals;
+  class MachineInstr;
+  class MachineRegisterInfo;
+  class TargetRegisterInfo;
+  class raw_ostream;
+
+  /// VNInfo - Value Number Information.
+  /// This class holds information about a machine level values, including
+  /// definition and use points.
+  ///
+  class VNInfo {
+  private:
+    enum {
+      HAS_PHI_KILL    = 1,
+      REDEF_BY_EC     = 1 << 1,
+      IS_PHI_DEF      = 1 << 2,
+      IS_UNUSED       = 1 << 3
+    };
+
+    MachineInstr *copy;
+    unsigned char flags;
+
+  public:
+    typedef BumpPtrAllocator Allocator;
+
+    /// The ID number of this value.
+    unsigned id;
+
+    /// The index of the defining instruction (if isDefAccurate() returns true).
+    SlotIndex def;
+
+    /// VNInfo constructor.
+    VNInfo(unsigned i, SlotIndex d, MachineInstr *c)
+      : copy(c), flags(0), id(i), def(d)
+    { }
+
+    /// VNInfo construtor, copies values from orig, except for the value number.
+    VNInfo(unsigned i, const VNInfo &orig)
+      : copy(orig.copy), flags(orig.flags), id(i), def(orig.def)
+    { }
+
+    /// Copy from the parameter into this VNInfo.
+    void copyFrom(VNInfo &src) {
+      flags = src.flags;
+      copy = src.copy;
+      def = src.def;
+    }
+
+    /// Used for copying value number info.
+    unsigned getFlags() const { return flags; }
+    void setFlags(unsigned flags) { this->flags = flags; }
+
+    /// Merge flags from another VNInfo
+    void mergeFlags(const VNInfo *VNI) {
+      flags = (flags | VNI->flags) & ~IS_UNUSED;
+    }
+
+    /// For a register interval, if this VN was definied by a copy instr
+    /// getCopy() returns a pointer to it, otherwise returns 0.
+    /// For a stack interval the behaviour of this method is undefined.
+    MachineInstr* getCopy() const { return copy; }
+    /// For a register interval, set the copy member.
+    /// This method should not be called on stack intervals as it may lead to
+    /// undefined behavior.
+    void setCopy(MachineInstr *c) { copy = c; }
+
+    /// isDefByCopy - Return true when this value was defined by a copy-like
+    /// instruction as determined by MachineInstr::isCopyLike.
+    bool isDefByCopy() const { return copy != 0; }
+
+    /// Returns true if one or more kills are PHI nodes.
+    bool hasPHIKill() const { return flags & HAS_PHI_KILL; }
+    /// Set the PHI kill flag on this value.
+    void setHasPHIKill(bool hasKill) {
+      if (hasKill)
+        flags |= HAS_PHI_KILL;
+      else
+        flags &= ~HAS_PHI_KILL;
+    }
+
+    /// Returns true if this value is re-defined by an early clobber somewhere
+    /// during the live range.
+    bool hasRedefByEC() const { return flags & REDEF_BY_EC; }
+    /// Set the "redef by early clobber" flag on this value.
+    void setHasRedefByEC(bool hasRedef) {
+      if (hasRedef)
+        flags |= REDEF_BY_EC;
+      else
+        flags &= ~REDEF_BY_EC;
+    }
+
+    /// Returns true if this value is defined by a PHI instruction (or was,
+    /// PHI instrucions may have been eliminated).
+    bool isPHIDef() const { return flags & IS_PHI_DEF; }
+    /// Set the "phi def" flag on this value.
+    void setIsPHIDef(bool phiDef) {
+      if (phiDef)
+        flags |= IS_PHI_DEF;
+      else
+        flags &= ~IS_PHI_DEF;
+    }
+
+    /// Returns true if this value is unused.
+    bool isUnused() const { return flags & IS_UNUSED; }
+    /// Set the "is unused" flag on this value.
+    void setIsUnused(bool unused) {
+      if (unused)
+        flags |= IS_UNUSED;
+      else
+        flags &= ~IS_UNUSED;
+    }
+  };
+
+  /// LiveRange structure - This represents a simple register range in the
+  /// program, with an inclusive start point and an exclusive end point.
+  /// These ranges are rendered as [start,end).
+  struct LiveRange {
+    SlotIndex start;  // Start point of the interval (inclusive)
+    SlotIndex end;    // End point of the interval (exclusive)
+    VNInfo *valno;   // identifier for the value contained in this interval.
+
+    LiveRange(SlotIndex S, SlotIndex E, VNInfo *V)
+      : start(S), end(E), valno(V) {
+
+      assert(S < E && "Cannot create empty or backwards range");
+    }
+
+    /// contains - Return true if the index is covered by this range.
+    ///
+    bool contains(SlotIndex I) const {
+      return start <= I && I < end;
+    }
+
+    /// containsRange - Return true if the given range, [S, E), is covered by
+    /// this range.
+    bool containsRange(SlotIndex S, SlotIndex E) const {
+      assert((S < E) && "Backwards interval?");
+      return (start <= S && S < end) && (start < E && E <= end);
+    }
+
+    bool operator<(const LiveRange &LR) const {
+      return start < LR.start || (start == LR.start && end < LR.end);
+    }
+    bool operator==(const LiveRange &LR) const {
+      return start == LR.start && end == LR.end;
+    }
+
+    void dump() const;
+    void print(raw_ostream &os) const;
+
+  private:
+    LiveRange(); // DO NOT IMPLEMENT
+  };
+
+  template <> struct isPodLike<LiveRange> { static const bool value = true; };
+
+  raw_ostream& operator<<(raw_ostream& os, const LiveRange &LR);
+
+
+  inline bool operator<(SlotIndex V, const LiveRange &LR) {
+    return V < LR.start;
+  }
+
+  inline bool operator<(const LiveRange &LR, SlotIndex V) {
+    return LR.start < V;
+  }
+
+  /// LiveInterval - This class represents some number of live ranges for a
+  /// register or value.  This class also contains a bit of register allocator
+  /// state.
+  class LiveInterval {
+  public:
+
+    typedef SmallVector<LiveRange,4> Ranges;
+    typedef SmallVector<VNInfo*,4> VNInfoList;
+
+    const unsigned reg;  // the register or stack slot of this interval.
+    float weight;        // weight of this interval
+    Ranges ranges;       // the ranges in which this register is live
+    VNInfoList valnos;   // value#'s
+
+    struct InstrSlots {
+      enum {
+        LOAD  = 0,
+        USE   = 1,
+        DEF   = 2,
+        STORE = 3,
+        NUM   = 4
+      };
+
+    };
+
+    LiveInterval(unsigned Reg, float Weight)
+      : reg(Reg), weight(Weight) {}
+
+    typedef Ranges::iterator iterator;
+    iterator begin() { return ranges.begin(); }
+    iterator end()   { return ranges.end(); }
+
+    typedef Ranges::const_iterator const_iterator;
+    const_iterator begin() const { return ranges.begin(); }
+    const_iterator end() const  { return ranges.end(); }
+
+    typedef VNInfoList::iterator vni_iterator;
+    vni_iterator vni_begin() { return valnos.begin(); }
+    vni_iterator vni_end() { return valnos.end(); }
+
+    typedef VNInfoList::const_iterator const_vni_iterator;
+    const_vni_iterator vni_begin() const { return valnos.begin(); }
+    const_vni_iterator vni_end() const { return valnos.end(); }
+
+    /// advanceTo - Advance the specified iterator to point to the LiveRange
+    /// containing the specified position, or end() if the position is past the
+    /// end of the interval.  If no LiveRange contains this position, but the
+    /// position is in a hole, this method returns an iterator pointing to the
+    /// LiveRange immediately after the hole.
+    iterator advanceTo(iterator I, SlotIndex Pos) {
+      assert(I != end());
+      if (Pos >= endIndex())
+        return end();
+      while (I->end <= Pos) ++I;
+      return I;
+    }
+
+    /// find - Return an iterator pointing to the first range that ends after
+    /// Pos, or end(). This is the same as advanceTo(begin(), Pos), but faster
+    /// when searching large intervals.
+    ///
+    /// If Pos is contained in a LiveRange, that range is returned.
+    /// If Pos is in a hole, the following LiveRange is returned.
+    /// If Pos is beyond endIndex, end() is returned.
+    iterator find(SlotIndex Pos);
+
+    const_iterator find(SlotIndex Pos) const {
+      return const_cast<LiveInterval*>(this)->find(Pos);
+    }
+
+    void clear() {
+      valnos.clear();
+      ranges.clear();
+    }
+
+    bool hasAtLeastOneValue() const { return !valnos.empty(); }
+
+    bool containsOneValue() const { return valnos.size() == 1; }
+
+    unsigned getNumValNums() const { return (unsigned)valnos.size(); }
+
+    /// getValNumInfo - Returns pointer to the specified val#.
+    ///
+    inline VNInfo *getValNumInfo(unsigned ValNo) {
+      return valnos[ValNo];
+    }
+    inline const VNInfo *getValNumInfo(unsigned ValNo) const {
+      return valnos[ValNo];
+    }
+
+    /// getNextValue - Create a new value number and return it.  MIIdx specifies
+    /// the instruction that defines the value number.
+    VNInfo *getNextValue(SlotIndex def, MachineInstr *CopyMI,
+                         VNInfo::Allocator &VNInfoAllocator) {
+      VNInfo *VNI =
+        new (VNInfoAllocator) VNInfo((unsigned)valnos.size(), def, CopyMI);
+      valnos.push_back(VNI);
+      return VNI;
+    }
+
+    /// Create a copy of the given value. The new value will be identical except
+    /// for the Value number.
+    VNInfo *createValueCopy(const VNInfo *orig,
+                            VNInfo::Allocator &VNInfoAllocator) {
+      VNInfo *VNI =
+        new (VNInfoAllocator) VNInfo((unsigned)valnos.size(), *orig);
+      valnos.push_back(VNI);
+      return VNI;
+    }
+
+    /// RenumberValues - Renumber all values in order of appearance and remove
+    /// unused values.
+    /// Recalculate phi-kill flags in case any phi-def values were removed.
+    void RenumberValues(LiveIntervals &lis);
+
+    /// isOnlyLROfValNo - Return true if the specified live range is the only
+    /// one defined by the its val#.
+    bool isOnlyLROfValNo(const LiveRange *LR) {
+      for (const_iterator I = begin(), E = end(); I != E; ++I) {
+        const LiveRange *Tmp = I;
+        if (Tmp != LR && Tmp->valno == LR->valno)
+          return false;
+      }
+      return true;
+    }
+
+    /// MergeValueNumberInto - This method is called when two value nubmers
+    /// are found to be equivalent.  This eliminates V1, replacing all
+    /// LiveRanges with the V1 value number with the V2 value number.  This can
+    /// cause merging of V1/V2 values numbers and compaction of the value space.
+    VNInfo* MergeValueNumberInto(VNInfo *V1, VNInfo *V2);
+
+    /// MergeValueInAsValue - Merge all of the live ranges of a specific val#
+    /// in RHS into this live interval as the specified value number.
+    /// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
+    /// current interval, it will replace the value numbers of the overlaped
+    /// live ranges with the specified value number.
+    void MergeRangesInAsValue(const LiveInterval &RHS, VNInfo *LHSValNo);
+
+    /// MergeValueInAsValue - Merge all of the live ranges of a specific val#
+    /// in RHS into this live interval as the specified value number.
+    /// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
+    /// current interval, but only if the overlapping LiveRanges have the
+    /// specified value number.
+    void MergeValueInAsValue(const LiveInterval &RHS,
+                             const VNInfo *RHSValNo, VNInfo *LHSValNo);
+
+    /// Copy - Copy the specified live interval. This copies all the fields
+    /// except for the register of the interval.
+    void Copy(const LiveInterval &RHS, MachineRegisterInfo *MRI,
+              VNInfo::Allocator &VNInfoAllocator);
+
+    bool empty() const { return ranges.empty(); }
+
+    /// beginIndex - Return the lowest numbered slot covered by interval.
+    SlotIndex beginIndex() const {
+      assert(!empty() && "Call to beginIndex() on empty interval.");
+      return ranges.front().start;
+    }
+
+    /// endNumber - return the maximum point of the interval of the whole,
+    /// exclusive.
+    SlotIndex endIndex() const {
+      assert(!empty() && "Call to endIndex() on empty interval.");
+      return ranges.back().end;
+    }
+
+    bool expiredAt(SlotIndex index) const {
+      return index >= endIndex();
+    }
+
+    bool liveAt(SlotIndex index) const {
+      const_iterator r = find(index);
+      return r != end() && r->start <= index;
+    }
+
+    /// killedAt - Return true if a live range ends at index. Note that the kill
+    /// point is not contained in the half-open live range. It is usually the
+    /// getDefIndex() slot following its last use.
+    bool killedAt(SlotIndex index) const {
+      const_iterator r = find(index.getUseIndex());
+      return r != end() && r->end == index;
+    }
+
+    /// killedInRange - Return true if the interval has kills in [Start,End).
+    /// Note that the kill point is considered the end of a live range, so it is
+    /// not contained in the live range. If a live range ends at End, it won't
+    /// be counted as a kill by this method.
+    bool killedInRange(SlotIndex Start, SlotIndex End) const;
+
+    /// getLiveRangeContaining - Return the live range that contains the
+    /// specified index, or null if there is none.
+    const LiveRange *getLiveRangeContaining(SlotIndex Idx) const {
+      const_iterator I = FindLiveRangeContaining(Idx);
+      return I == end() ? 0 : &*I;
+    }
+
+    /// getLiveRangeContaining - Return the live range that contains the
+    /// specified index, or null if there is none.
+    LiveRange *getLiveRangeContaining(SlotIndex Idx) {
+      iterator I = FindLiveRangeContaining(Idx);
+      return I == end() ? 0 : &*I;
+    }
+
+    /// getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
+    VNInfo *getVNInfoAt(SlotIndex Idx) const {
+      const_iterator I = FindLiveRangeContaining(Idx);
+      return I == end() ? 0 : I->valno;
+    }
+
+    /// FindLiveRangeContaining - Return an iterator to the live range that
+    /// contains the specified index, or end() if there is none.
+    iterator FindLiveRangeContaining(SlotIndex Idx) {
+      iterator I = find(Idx);
+      return I != end() && I->start <= Idx ? I : end();
+    }
+
+    const_iterator FindLiveRangeContaining(SlotIndex Idx) const {
+      const_iterator I = find(Idx);
+      return I != end() && I->start <= Idx ? I : end();
+    }
+
+    /// findDefinedVNInfo - Find the by the specified
+    /// index (register interval) or defined
+    VNInfo *findDefinedVNInfoForRegInt(SlotIndex Idx) const;
+
+
+    /// overlaps - Return true if the intersection of the two live intervals is
+    /// not empty.
+    bool overlaps(const LiveInterval& other) const {
+      if (other.empty())
+        return false;
+      return overlapsFrom(other, other.begin());
+    }
+
+    /// overlaps - Return true if the live interval overlaps a range specified
+    /// by [Start, End).
+    bool overlaps(SlotIndex Start, SlotIndex End) const;
+
+    /// overlapsFrom - Return true if the intersection of the two live intervals
+    /// is not empty.  The specified iterator is a hint that we can begin
+    /// scanning the Other interval starting at I.
+    bool overlapsFrom(const LiveInterval& other, const_iterator I) const;
+
+    /// addRange - Add the specified LiveRange to this interval, merging
+    /// intervals as appropriate.  This returns an iterator to the inserted live
+    /// range (which may have grown since it was inserted.
+    void addRange(LiveRange LR) {
+      addRangeFrom(LR, ranges.begin());
+    }
+
+    /// extendInBlock - If this interval is live before UseIdx in the basic
+    /// block that starts at StartIdx, extend it to be live at UseIdx and return
+    /// the value. If there is no live range before UseIdx, return NULL.
+    VNInfo *extendInBlock(SlotIndex StartIdx, SlotIndex UseIdx);
+
+    /// join - Join two live intervals (this, and other) together.  This applies
+    /// mappings to the value numbers in the LHS/RHS intervals as specified.  If
+    /// the intervals are not joinable, this aborts.
+    void join(LiveInterval &Other,
+              const int *ValNoAssignments,
+              const int *RHSValNoAssignments,
+              SmallVector<VNInfo*, 16> &NewVNInfo,
+              MachineRegisterInfo *MRI);
+
+    /// isInOneLiveRange - Return true if the range specified is entirely in the
+    /// a single LiveRange of the live interval.
+    bool isInOneLiveRange(SlotIndex Start, SlotIndex End) const {
+      const_iterator r = find(Start);
+      return r != end() && r->containsRange(Start, End);
+    }
+
+    /// removeRange - Remove the specified range from this interval.  Note that
+    /// the range must be a single LiveRange in its entirety.
+    void removeRange(SlotIndex Start, SlotIndex End,
+                     bool RemoveDeadValNo = false);
+
+    void removeRange(LiveRange LR, bool RemoveDeadValNo = false) {
+      removeRange(LR.start, LR.end, RemoveDeadValNo);
+    }
+
+    /// removeValNo - Remove all the ranges defined by the specified value#.
+    /// Also remove the value# from value# list.
+    void removeValNo(VNInfo *ValNo);
+
+    /// getSize - Returns the sum of sizes of all the LiveRange's.
+    ///
+    unsigned getSize() const;
+
+    /// Returns true if the live interval is zero length, i.e. no live ranges
+    /// span instructions. It doesn't pay to spill such an interval.
+    bool isZeroLength() const {
+      for (const_iterator i = begin(), e = end(); i != e; ++i)
+        if (i->end.getPrevIndex() > i->start)
+          return false;
+      return true;
+    }
+
+    /// isSpillable - Can this interval be spilled?
+    bool isSpillable() const {
+      return weight != HUGE_VALF;
+    }
+
+    /// markNotSpillable - Mark interval as not spillable
+    void markNotSpillable() {
+      weight = HUGE_VALF;
+    }
+
+    /// ComputeJoinedWeight - Set the weight of a live interval after
+    /// Other has been merged into it.
+    void ComputeJoinedWeight(const LiveInterval &Other);
+
+    bool operator<(const LiveInterval& other) const {
+      const SlotIndex &thisIndex = beginIndex();
+      const SlotIndex &otherIndex = other.beginIndex();
+      return (thisIndex < otherIndex ||
+              (thisIndex == otherIndex && reg < other.reg));
+    }
+
+    void print(raw_ostream &OS, const TargetRegisterInfo *TRI = 0) const;
+    void dump() const;
+
+  private:
+
+    Ranges::iterator addRangeFrom(LiveRange LR, Ranges::iterator From);
+    void extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd);
+    Ranges::iterator extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStr);
+    void markValNoForDeletion(VNInfo *V);
+
+    LiveInterval& operator=(const LiveInterval& rhs); // DO NOT IMPLEMENT
+
+  };
+
+  inline raw_ostream &operator<<(raw_ostream &OS, const LiveInterval &LI) {
+    LI.print(OS);
+    return OS;
+  }
+
+  /// ConnectedVNInfoEqClasses - Helper class that can divide VNInfos in a
+  /// LiveInterval into equivalence clases of connected components. A
+  /// LiveInterval that has multiple connected components can be broken into
+  /// multiple LiveIntervals.
+  ///
+  /// Given a LiveInterval that may have multiple connected components, run:
+  ///
+  ///   unsigned numComps = ConEQ.Classify(LI);
+  ///   if (numComps > 1) {
+  ///     // allocate numComps-1 new LiveIntervals into LIS[1..]
+  ///     ConEQ.Distribute(LIS);
+  /// }
+
+  class ConnectedVNInfoEqClasses {
+    LiveIntervals &lis_;
+    IntEqClasses eqClass_;
+
+    // Note that values a and b are connected.
+    void Connect(unsigned a, unsigned b);
+
+    unsigned Renumber();
+
+  public:
+    explicit ConnectedVNInfoEqClasses(LiveIntervals &lis) : lis_(lis) {}
+
+    /// Classify - Classify the values in LI into connected components.
+    /// Return the number of connected components.
+    unsigned Classify(const LiveInterval *LI);
+
+    /// getEqClass - Classify creates equivalence classes numbered 0..N. Return
+    /// the equivalence class assigned the VNI.
+    unsigned getEqClass(const VNInfo *VNI) const { return eqClass_[VNI->id]; }
+
+    /// Distribute - Distribute values in LIV[0] into a separate LiveInterval
+    /// for each connected component. LIV must have a LiveInterval for each
+    /// connected component. The LiveIntervals in Liv[1..] must be empty.
+    void Distribute(LiveInterval *LIV[]);
+
+  };
+
+}
+#endif
diff --git a/final/include/llvm/CodeGen/LiveIntervalAnalysis.h b/final/include/llvm/CodeGen/LiveIntervalAnalysis.h
new file mode 100644
index 00000000000..1391f0f95be
--- /dev/null
+++ b/final/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -0,0 +1,465 @@
+//===-- LiveIntervalAnalysis.h - Live Interval Analysis ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveInterval analysis pass.  Given some numbering of
+// each the machine instructions (in this implemention depth-first order) an
+// interval [i, j) is said to be a live interval for register v if there is no
+// instruction with number j' > j such that v is live at j' and there is no
+// instruction with number i' < i such that v is live at i'. In this
+// implementation intervals can have holes, i.e. an interval might look like
+// [1,20), [50,65), [1000,1001).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H
+#define LLVM_CODEGEN_LIVEINTERVAL_ANALYSIS_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Allocator.h"
+#include <cmath>
+#include <iterator>
+
+namespace llvm {
+
+  class AliasAnalysis;
+  class LiveVariables;
+  class MachineLoopInfo;
+  class TargetRegisterInfo;
+  class MachineRegisterInfo;
+  class TargetInstrInfo;
+  class TargetRegisterClass;
+  class VirtRegMap;
+
+  class LiveIntervals : public MachineFunctionPass {
+    MachineFunction* mf_;
+    MachineRegisterInfo* mri_;
+    const TargetMachine* tm_;
+    const TargetRegisterInfo* tri_;
+    const TargetInstrInfo* tii_;
+    AliasAnalysis *aa_;
+    LiveVariables* lv_;
+    SlotIndexes* indexes_;
+
+    /// Special pool allocator for VNInfo's (LiveInterval val#).
+    ///
+    VNInfo::Allocator VNInfoAllocator;
+
+    typedef DenseMap<unsigned, LiveInterval*> Reg2IntervalMap;
+    Reg2IntervalMap r2iMap_;
+
+    /// allocatableRegs_ - A bit vector of allocatable registers.
+    BitVector allocatableRegs_;
+
+    /// CloneMIs - A list of clones as result of re-materialization.
+    std::vector<MachineInstr*> CloneMIs;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    LiveIntervals() : MachineFunctionPass(ID) {
+      initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+    }
+
+    // Calculate the spill weight to assign to a single instruction.
+    static float getSpillWeight(bool isDef, bool isUse, unsigned loopDepth);
+
+    typedef Reg2IntervalMap::iterator iterator;
+    typedef Reg2IntervalMap::const_iterator const_iterator;
+    const_iterator begin() const { return r2iMap_.begin(); }
+    const_iterator end() const { return r2iMap_.end(); }
+    iterator begin() { return r2iMap_.begin(); }
+    iterator end() { return r2iMap_.end(); }
+    unsigned getNumIntervals() const { return (unsigned)r2iMap_.size(); }
+
+    LiveInterval &getInterval(unsigned reg) {
+      Reg2IntervalMap::iterator I = r2iMap_.find(reg);
+      assert(I != r2iMap_.end() && "Interval does not exist for register");
+      return *I->second;
+    }
+
+    const LiveInterval &getInterval(unsigned reg) const {
+      Reg2IntervalMap::const_iterator I = r2iMap_.find(reg);
+      assert(I != r2iMap_.end() && "Interval does not exist for register");
+      return *I->second;
+    }
+
+    bool hasInterval(unsigned reg) const {
+      return r2iMap_.count(reg);
+    }
+
+    /// isAllocatable - is the physical register reg allocatable in the current
+    /// function?
+    bool isAllocatable(unsigned reg) const {
+      return allocatableRegs_.test(reg);
+    }
+
+    /// getScaledIntervalSize - get the size of an interval in "units,"
+    /// where every function is composed of one thousand units.  This
+    /// measure scales properly with empty index slots in the function.
+    double getScaledIntervalSize(LiveInterval& I) {
+      return (1000.0 * I.getSize()) / indexes_->getIndexesLength();
+    }
+
+    /// getFuncInstructionCount - Return the number of instructions in the
+    /// current function.
+    unsigned getFuncInstructionCount() {
+      return indexes_->getFunctionSize();
+    }
+
+    /// getApproximateInstructionCount - computes an estimate of the number
+    /// of instructions in a given LiveInterval.
+    unsigned getApproximateInstructionCount(LiveInterval& I) {
+      double IntervalPercentage = getScaledIntervalSize(I) / 1000.0;
+      return (unsigned)(IntervalPercentage * indexes_->getFunctionSize());
+    }
+
+    /// conflictsWithPhysReg - Returns true if the specified register is used or
+    /// defined during the duration of the specified interval. Copies to and
+    /// from li.reg are allowed. This method is only able to analyze simple
+    /// ranges that stay within a single basic block. Anything else is
+    /// considered a conflict.
+    bool conflictsWithPhysReg(const LiveInterval &li, VirtRegMap &vrm,
+                              unsigned reg);
+
+    /// conflictsWithAliasRef - Similar to conflictsWithPhysRegRef except
+    /// it checks for alias uses and defs.
+    bool conflictsWithAliasRef(LiveInterval &li, unsigned Reg,
+                                   SmallPtrSet<MachineInstr*,32> &JoinedCopies);
+
+    // Interval creation
+    LiveInterval &getOrCreateInterval(unsigned reg) {
+      Reg2IntervalMap::iterator I = r2iMap_.find(reg);
+      if (I == r2iMap_.end())
+        I = r2iMap_.insert(std::make_pair(reg, createInterval(reg))).first;
+      return *I->second;
+    }
+
+    /// dupInterval - Duplicate a live interval. The caller is responsible for
+    /// managing the allocated memory.
+    LiveInterval *dupInterval(LiveInterval *li);
+
+    /// addLiveRangeToEndOfBlock - Given a register and an instruction,
+    /// adds a live range from that instruction to the end of its MBB.
+    LiveRange addLiveRangeToEndOfBlock(unsigned reg,
+                                       MachineInstr* startInst);
+
+    /// shrinkToUses - After removing some uses of a register, shrink its live
+    /// range to just the remaining uses. This method does not compute reaching
+    /// defs for new uses, and it doesn't remove dead defs.
+    /// Dead PHIDef values are marked as unused.
+    /// New dead machine instructions are added to the dead vector.
+    void shrinkToUses(LiveInterval *li,
+                      SmallVectorImpl<MachineInstr*> *dead = 0);
+
+    // Interval removal
+
+    void removeInterval(unsigned Reg) {
+      DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.find(Reg);
+      delete I->second;
+      r2iMap_.erase(I);
+    }
+
+    SlotIndexes *getSlotIndexes() const {
+      return indexes_;
+    }
+
+    SlotIndex getZeroIndex() const {
+      return indexes_->getZeroIndex();
+    }
+
+    SlotIndex getInvalidIndex() const {
+      return indexes_->getInvalidIndex();
+    }
+
+    /// isNotInMIMap - returns true if the specified machine instr has been
+    /// removed or was never entered in the map.
+    bool isNotInMIMap(const MachineInstr* Instr) const {
+      return !indexes_->hasIndex(Instr);
+    }
+
+    /// Returns the base index of the given instruction.
+    SlotIndex getInstructionIndex(const MachineInstr *instr) const {
+      return indexes_->getInstructionIndex(instr);
+    }
+
+    /// Returns the instruction associated with the given index.
+    MachineInstr* getInstructionFromIndex(SlotIndex index) const {
+      return indexes_->getInstructionFromIndex(index);
+    }
+
+    /// Return the first index in the given basic block.
+    SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const {
+      return indexes_->getMBBStartIdx(mbb);
+    }
+
+    /// Return the last index in the given basic block.
+    SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const {
+      return indexes_->getMBBEndIdx(mbb);
+    }
+
+    bool isLiveInToMBB(const LiveInterval &li,
+                       const MachineBasicBlock *mbb) const {
+      return li.liveAt(getMBBStartIdx(mbb));
+    }
+
+    LiveRange* findEnteringRange(LiveInterval &li,
+                                 const MachineBasicBlock *mbb) {
+      return li.getLiveRangeContaining(getMBBStartIdx(mbb));
+    }
+
+    bool isLiveOutOfMBB(const LiveInterval &li,
+                        const MachineBasicBlock *mbb) const {
+      return li.liveAt(getMBBEndIdx(mbb).getPrevSlot());
+    }
+
+    LiveRange* findExitingRange(LiveInterval &li,
+                                const MachineBasicBlock *mbb) {
+      return li.getLiveRangeContaining(getMBBEndIdx(mbb).getPrevSlot());
+    }
+
+    MachineBasicBlock* getMBBFromIndex(SlotIndex index) const {
+      return indexes_->getMBBFromIndex(index);
+    }
+
+    SlotIndex InsertMachineInstrInMaps(MachineInstr *MI) {
+      return indexes_->insertMachineInstrInMaps(MI);
+    }
+
+    void RemoveMachineInstrFromMaps(MachineInstr *MI) {
+      indexes_->removeMachineInstrFromMaps(MI);
+    }
+
+    void ReplaceMachineInstrInMaps(MachineInstr *MI, MachineInstr *NewMI) {
+      indexes_->replaceMachineInstrInMaps(MI, NewMI);
+    }
+
+    void InsertMBBInMaps(MachineBasicBlock *MBB) {
+      indexes_->insertMBBInMaps(MBB);
+    }
+
+    bool findLiveInMBBs(SlotIndex Start, SlotIndex End,
+                        SmallVectorImpl<MachineBasicBlock*> &MBBs) const {
+      return indexes_->findLiveInMBBs(Start, End, MBBs);
+    }
+
+    void renumber() {
+      indexes_->renumberIndexes();
+    }
+
+    VNInfo::Allocator& getVNInfoAllocator() { return VNInfoAllocator; }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual void releaseMemory();
+
+    /// runOnMachineFunction - pass entry point
+    virtual bool runOnMachineFunction(MachineFunction&);
+
+    /// print - Implement the dump method.
+    virtual void print(raw_ostream &O, const Module* = 0) const;
+
+    /// addIntervalsForSpills - Create new intervals for spilled defs / uses of
+    /// the given interval. FIXME: It also returns the weight of the spill slot
+    /// (if any is created) by reference. This is temporary.
+    std::vector<LiveInterval*>
+    addIntervalsForSpills(const LiveInterval& i,
+                          const SmallVectorImpl<LiveInterval*> &SpillIs,
+                          const MachineLoopInfo *loopInfo, VirtRegMap& vrm);
+
+    /// spillPhysRegAroundRegDefsUses - Spill the specified physical register
+    /// around all defs and uses of the specified interval. Return true if it
+    /// was able to cut its interval.
+    bool spillPhysRegAroundRegDefsUses(const LiveInterval &li,
+                                       unsigned PhysReg, VirtRegMap &vrm);
+
+    /// isReMaterializable - Returns true if every definition of MI of every
+    /// val# of the specified interval is re-materializable. Also returns true
+    /// by reference if all of the defs are load instructions.
+    bool isReMaterializable(const LiveInterval &li,
+                            const SmallVectorImpl<LiveInterval*> &SpillIs,
+                            bool &isLoad);
+
+    /// isReMaterializable - Returns true if the definition MI of the specified
+    /// val# of the specified interval is re-materializable.
+    bool isReMaterializable(const LiveInterval &li, const VNInfo *ValNo,
+                            MachineInstr *MI);
+
+    /// getRepresentativeReg - Find the largest super register of the specified
+    /// physical register.
+    unsigned getRepresentativeReg(unsigned Reg) const;
+
+    /// getNumConflictsWithPhysReg - Return the number of uses and defs of the
+    /// specified interval that conflicts with the specified physical register.
+    unsigned getNumConflictsWithPhysReg(const LiveInterval &li,
+                                        unsigned PhysReg) const;
+
+    /// intervalIsInOneMBB - Returns true if the specified interval is entirely
+    /// within a single basic block.
+    bool intervalIsInOneMBB(const LiveInterval &li) const;
+
+    /// getLastSplitPoint - Return the last possible insertion point in mbb for
+    /// spilling and splitting code. This is the first terminator, or the call
+    /// instruction if li is live into a landing pad successor.
+    MachineBasicBlock::iterator getLastSplitPoint(const LiveInterval &li,
+                                                  MachineBasicBlock *mbb) const;
+
+    /// addKillFlags - Add kill flags to any instruction that kills a virtual
+    /// register.
+    void addKillFlags();
+
+  private:
+    /// computeIntervals - Compute live intervals.
+    void computeIntervals();
+
+    /// handleRegisterDef - update intervals for a register def
+    /// (calls handlePhysicalRegisterDef and
+    /// handleVirtualRegisterDef)
+    void handleRegisterDef(MachineBasicBlock *MBB,
+                           MachineBasicBlock::iterator MI,
+                           SlotIndex MIIdx,
+                           MachineOperand& MO, unsigned MOIdx);
+
+    /// isPartialRedef - Return true if the specified def at the specific index
+    /// is partially re-defining the specified live interval. A common case of
+    /// this is a definition of the sub-register.
+    bool isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
+                        LiveInterval &interval);
+
+    /// handleVirtualRegisterDef - update intervals for a virtual
+    /// register def
+    void handleVirtualRegisterDef(MachineBasicBlock *MBB,
+                                  MachineBasicBlock::iterator MI,
+                                  SlotIndex MIIdx, MachineOperand& MO,
+                                  unsigned MOIdx,
+                                  LiveInterval& interval);
+
+    /// handlePhysicalRegisterDef - update intervals for a physical register
+    /// def.
+    void handlePhysicalRegisterDef(MachineBasicBlock* mbb,
+                                   MachineBasicBlock::iterator mi,
+                                   SlotIndex MIIdx, MachineOperand& MO,
+                                   LiveInterval &interval,
+                                   MachineInstr *CopyMI);
+
+    /// handleLiveInRegister - Create interval for a livein register.
+    void handleLiveInRegister(MachineBasicBlock* mbb,
+                              SlotIndex MIIdx,
+                              LiveInterval &interval, bool isAlias = false);
+
+    /// getReMatImplicitUse - If the remat definition MI has one (for now, we
+    /// only allow one) virtual register operand, then its uses are implicitly
+    /// using the register. Returns the virtual register.
+    unsigned getReMatImplicitUse(const LiveInterval &li,
+                                 MachineInstr *MI) const;
+
+    /// isValNoAvailableAt - Return true if the val# of the specified interval
+    /// which reaches the given instruction also reaches the specified use
+    /// index.
+    bool isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
+                            SlotIndex UseIdx) const;
+
+    /// isReMaterializable - Returns true if the definition MI of the specified
+    /// val# of the specified interval is re-materializable. Also returns true
+    /// by reference if the def is a load.
+    bool isReMaterializable(const LiveInterval &li, const VNInfo *ValNo,
+                            MachineInstr *MI,
+                            const SmallVectorImpl<LiveInterval*> &SpillIs,
+                            bool &isLoad);
+
+    /// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
+    /// slot / to reg or any rematerialized load into ith operand of specified
+    /// MI. If it is successul, MI is updated with the newly created MI and
+    /// returns true.
+    bool tryFoldMemoryOperand(MachineInstr* &MI, VirtRegMap &vrm,
+                              MachineInstr *DefMI, SlotIndex InstrIdx,
+                              SmallVector<unsigned, 2> &Ops,
+                              bool isSS, int FrameIndex, unsigned Reg);
+
+    /// canFoldMemoryOperand - Return true if the specified load / store
+    /// folding is possible.
+    bool canFoldMemoryOperand(MachineInstr *MI,
+                              SmallVector<unsigned, 2> &Ops,
+                              bool ReMatLoadSS) const;
+
+    /// anyKillInMBBAfterIdx - Returns true if there is a kill of the specified
+    /// VNInfo that's after the specified index but is within the basic block.
+    bool anyKillInMBBAfterIdx(const LiveInterval &li, const VNInfo *VNI,
+                              MachineBasicBlock *MBB,
+                              SlotIndex Idx) const;
+
+    /// hasAllocatableSuperReg - Return true if the specified physical register
+    /// has any super register that's allocatable.
+    bool hasAllocatableSuperReg(unsigned Reg) const;
+
+    /// SRInfo - Spill / restore info.
+    struct SRInfo {
+      SlotIndex index;
+      unsigned vreg;
+      bool canFold;
+      SRInfo(SlotIndex i, unsigned vr, bool f)
+        : index(i), vreg(vr), canFold(f) {}
+    };
+
+    bool alsoFoldARestore(int Id, SlotIndex index, unsigned vr,
+                          BitVector &RestoreMBBs,
+                          DenseMap<unsigned,std::vector<SRInfo> >&RestoreIdxes);
+    void eraseRestoreInfo(int Id, SlotIndex index, unsigned vr,
+                          BitVector &RestoreMBBs,
+                          DenseMap<unsigned,std::vector<SRInfo> >&RestoreIdxes);
+
+    /// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being
+    /// spilled and create empty intervals for their uses.
+    void handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
+                              const TargetRegisterClass* rc,
+                              std::vector<LiveInterval*> &NewLIs);
+
+    /// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of
+    /// interval on to-be re-materialized operands of MI) with new register.
+    void rewriteImplicitOps(const LiveInterval &li,
+                           MachineInstr *MI, unsigned NewVReg, VirtRegMap &vrm);
+
+    /// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper
+    /// functions for addIntervalsForSpills to rewrite uses / defs for the given
+    /// live range.
+    bool rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
+        bool TrySplit, SlotIndex index, SlotIndex end,
+        MachineInstr *MI, MachineInstr *OrigDefMI, MachineInstr *DefMI,
+        unsigned Slot, int LdSlot,
+        bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
+        VirtRegMap &vrm, const TargetRegisterClass* rc,
+        SmallVector<int, 4> &ReMatIds, const MachineLoopInfo *loopInfo,
+        unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse,
+        DenseMap<unsigned,unsigned> &MBBVRegsMap,
+        std::vector<LiveInterval*> &NewLIs);
+    void rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
+        LiveInterval::Ranges::const_iterator &I,
+        MachineInstr *OrigDefMI, MachineInstr *DefMI, unsigned Slot, int LdSlot,
+        bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
+        VirtRegMap &vrm, const TargetRegisterClass* rc,
+        SmallVector<int, 4> &ReMatIds, const MachineLoopInfo *loopInfo,
+        BitVector &SpillMBBs,
+        DenseMap<unsigned,std::vector<SRInfo> > &SpillIdxes,
+        BitVector &RestoreMBBs,
+        DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes,
+        DenseMap<unsigned,unsigned> &MBBVRegsMap,
+        std::vector<LiveInterval*> &NewLIs);
+
+    static LiveInterval* createInterval(unsigned Reg);
+
+    void printInstrs(raw_ostream &O) const;
+    void dumpInstrs() const;
+  };
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/LiveStackAnalysis.h b/final/include/llvm/CodeGen/LiveStackAnalysis.h
new file mode 100644
index 00000000000..8a8dcaf5728
--- /dev/null
+++ b/final/include/llvm/CodeGen/LiveStackAnalysis.h
@@ -0,0 +1,97 @@
+//===-- LiveStackAnalysis.h - Live Stack Slot Analysis ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the live stack slot analysis pass. It is analogous to
+// live interval analysis except it's analyzing liveness of stack slots rather
+// than registers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVESTACK_ANALYSIS_H
+#define LLVM_CODEGEN_LIVESTACK_ANALYSIS_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Allocator.h"
+#include <map>
+
+namespace llvm {
+
+  class LiveStacks : public MachineFunctionPass {
+    /// Special pool allocator for VNInfo's (LiveInterval val#).
+    ///
+    VNInfo::Allocator VNInfoAllocator;
+
+    /// S2IMap - Stack slot indices to live interval mapping.
+    ///
+    typedef std::map<int, LiveInterval> SS2IntervalMap;
+    SS2IntervalMap S2IMap;
+
+    /// S2RCMap - Stack slot indices to register class mapping.
+    std::map<int, const TargetRegisterClass*> S2RCMap;
+    
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    LiveStacks() : MachineFunctionPass(ID) {
+      initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+    }
+
+    typedef SS2IntervalMap::iterator iterator;
+    typedef SS2IntervalMap::const_iterator const_iterator;
+    const_iterator begin() const { return S2IMap.begin(); }
+    const_iterator end() const { return S2IMap.end(); }
+    iterator begin() { return S2IMap.begin(); }
+    iterator end() { return S2IMap.end(); }
+
+    unsigned getNumIntervals() const { return (unsigned)S2IMap.size(); }
+
+    LiveInterval &getOrCreateInterval(int Slot, const TargetRegisterClass *RC);
+
+    LiveInterval &getInterval(int Slot) {
+      assert(Slot >= 0 && "Spill slot indice must be >= 0");
+      SS2IntervalMap::iterator I = S2IMap.find(Slot);
+      assert(I != S2IMap.end() && "Interval does not exist for stack slot");
+      return I->second;
+    }
+
+    const LiveInterval &getInterval(int Slot) const {
+      assert(Slot >= 0 && "Spill slot indice must be >= 0");
+      SS2IntervalMap::const_iterator I = S2IMap.find(Slot);
+      assert(I != S2IMap.end() && "Interval does not exist for stack slot");
+      return I->second;
+    }
+
+    bool hasInterval(int Slot) const {
+      return S2IMap.count(Slot);
+    }
+
+    const TargetRegisterClass *getIntervalRegClass(int Slot) const {
+      assert(Slot >= 0 && "Spill slot indice must be >= 0");
+      std::map<int, const TargetRegisterClass*>::const_iterator
+        I = S2RCMap.find(Slot);
+      assert(I != S2RCMap.end() &&
+             "Register class info does not exist for stack slot");
+      return I->second;
+    }
+
+    VNInfo::Allocator& getVNInfoAllocator() { return VNInfoAllocator; }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual void releaseMemory();
+
+    /// runOnMachineFunction - pass entry point
+    virtual bool runOnMachineFunction(MachineFunction&);
+
+    /// print - Implement the dump method.
+    virtual void print(raw_ostream &O, const Module* = 0) const;
+  };
+}
+
+#endif /* LLVM_CODEGEN_LIVESTACK_ANALYSIS_H */
diff --git a/final/include/llvm/CodeGen/LiveVariables.h b/final/include/llvm/CodeGen/LiveVariables.h
new file mode 100644
index 00000000000..f9b81b1ea7d
--- /dev/null
+++ b/final/include/llvm/CodeGen/LiveVariables.h
@@ -0,0 +1,316 @@
+//===-- llvm/CodeGen/LiveVariables.h - Live Variable Analysis ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveVariables analysis pass.  For each machine
+// instruction in the function, this pass calculates the set of registers that
+// are immediately dead after the instruction (i.e., the instruction calculates
+// the value, but it is never used) and the set of registers that are used by
+// the instruction, but are never used after the instruction (i.e., they are
+// killed).
+//
+// This class computes live variables using a sparse implementation based on
+// the machine code SSA form.  This class computes live variable information for
+// each virtual and _register allocatable_ physical register in a function.  It
+// uses the dominance properties of SSA form to efficiently compute live
+// variables for virtual registers, and assumes that physical registers are only
+// live within a single basic block (allowing it to do a single local analysis
+// to resolve physical register lifetimes in each basic block).  If a physical
+// register is not register allocatable, it is not tracked.  This is useful for
+// things like the stack pointer and condition codes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEVARIABLES_H
+#define LLVM_CODEGEN_LIVEVARIABLES_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseBitVector.h"
+
+namespace llvm {
+
+class MachineRegisterInfo;
+class TargetRegisterInfo;
+
+class LiveVariables : public MachineFunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  LiveVariables() : MachineFunctionPass(ID) {
+    initializeLiveVariablesPass(*PassRegistry::getPassRegistry());
+  }
+
+  /// VarInfo - This represents the regions where a virtual register is live in
+  /// the program.  We represent this with three different pieces of
+  /// information: the set of blocks in which the instruction is live
+  /// throughout, the set of blocks in which the instruction is actually used,
+  /// and the set of non-phi instructions that are the last users of the value.
+  ///
+  /// In the common case where a value is defined and killed in the same block,
+  /// There is one killing instruction, and AliveBlocks is empty.
+  ///
+  /// Otherwise, the value is live out of the block.  If the value is live
+  /// throughout any blocks, these blocks are listed in AliveBlocks.  Blocks
+  /// where the liveness range ends are not included in AliveBlocks, instead
+  /// being captured by the Kills set.  In these blocks, the value is live into
+  /// the block (unless the value is defined and killed in the same block) and
+  /// lives until the specified instruction.  Note that there cannot ever be a
+  /// value whose Kills set contains two instructions from the same basic block.
+  ///
+  /// PHI nodes complicate things a bit.  If a PHI node is the last user of a
+  /// value in one of its predecessor blocks, it is not listed in the kills set,
+  /// but does include the predecessor block in the AliveBlocks set (unless that
+  /// block also defines the value).  This leads to the (perfectly sensical)
+  /// situation where a value is defined in a block, and the last use is a phi
+  /// node in the successor.  In this case, AliveBlocks is empty (the value is
+  /// not live across any  blocks) and Kills is empty (phi nodes are not
+  /// included). This is sensical because the value must be live to the end of
+  /// the block, but is not live in any successor blocks.
+  struct VarInfo {
+    /// AliveBlocks - Set of blocks in which this value is alive completely
+    /// through.  This is a bit set which uses the basic block number as an
+    /// index.
+    ///
+    SparseBitVector<> AliveBlocks;
+
+    /// NumUses - Number of uses of this register across the entire function.
+    ///
+    unsigned NumUses;
+
+    /// Kills - List of MachineInstruction's which are the last use of this
+    /// virtual register (kill it) in their basic block.
+    ///
+    std::vector<MachineInstr*> Kills;
+
+    VarInfo() : NumUses(0) {}
+
+    /// removeKill - Delete a kill corresponding to the specified
+    /// machine instruction. Returns true if there was a kill
+    /// corresponding to this instruction, false otherwise.
+    bool removeKill(MachineInstr *MI) {
+      std::vector<MachineInstr*>::iterator
+        I = std::find(Kills.begin(), Kills.end(), MI);
+      if (I == Kills.end())
+        return false;
+      Kills.erase(I);
+      return true;
+    }
+
+    /// findKill - Find a kill instruction in MBB. Return NULL if none is found.
+    MachineInstr *findKill(const MachineBasicBlock *MBB) const;
+
+    /// isLiveIn - Is Reg live in to MBB? This means that Reg is live through
+    /// MBB, or it is killed in MBB. If Reg is only used by PHI instructions in
+    /// MBB, it is not considered live in.
+    bool isLiveIn(const MachineBasicBlock &MBB,
+                  unsigned Reg,
+                  MachineRegisterInfo &MRI);
+
+    void dump() const;
+  };
+
+private:
+  /// VirtRegInfo - This list is a mapping from virtual register number to
+  /// variable information.
+  ///
+  IndexedMap<VarInfo, VirtReg2IndexFunctor> VirtRegInfo;
+
+  /// PHIJoins - list of virtual registers that are PHI joins. These registers
+  /// may have multiple definitions, and they require special handling when
+  /// building live intervals.
+  SparseBitVector<> PHIJoins;
+
+  /// ReservedRegisters - This vector keeps track of which registers
+  /// are reserved register which are not allocatable by the target machine.
+  /// We can not track liveness for values that are in this set.
+  ///
+  BitVector ReservedRegisters;
+
+private:   // Intermediate data structures
+  MachineFunction *MF;
+
+  MachineRegisterInfo* MRI;
+
+  const TargetRegisterInfo *TRI;
+
+  // PhysRegInfo - Keep track of which instruction was the last def of a
+  // physical register. This is a purely local property, because all physical
+  // register references are presumed dead across basic blocks.
+  MachineInstr **PhysRegDef;
+
+  // PhysRegInfo - Keep track of which instruction was the last use of a
+  // physical register. This is a purely local property, because all physical
+  // register references are presumed dead across basic blocks.
+  MachineInstr **PhysRegUse;
+
+  SmallVector<unsigned, 4> *PHIVarInfo;
+
+  // DistanceMap - Keep track the distance of a MI from the start of the
+  // current basic block.
+  DenseMap<MachineInstr*, unsigned> DistanceMap;
+
+  /// HandlePhysRegKill - Add kills of Reg and its sub-registers to the
+  /// uses. Pay special attention to the sub-register uses which may come below
+  /// the last use of the whole register.
+  bool HandlePhysRegKill(unsigned Reg, MachineInstr *MI);
+
+  void HandlePhysRegUse(unsigned Reg, MachineInstr *MI);
+  void HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
+                        SmallVector<unsigned, 4> &Defs);
+  void UpdatePhysRegDefs(MachineInstr *MI, SmallVector<unsigned, 4> &Defs);
+
+  /// FindLastRefOrPartRef - Return the last reference or partial reference of
+  /// the specified register.
+  MachineInstr *FindLastRefOrPartRef(unsigned Reg);
+
+  /// FindLastPartialDef - Return the last partial def of the specified
+  /// register. Also returns the sub-registers that're defined by the
+  /// instruction.
+  MachineInstr *FindLastPartialDef(unsigned Reg,
+                                   SmallSet<unsigned,4> &PartDefRegs);
+
+  /// analyzePHINodes - Gather information about the PHI nodes in here. In
+  /// particular, we want to map the variable information of a virtual
+  /// register which is used in a PHI node. We map that to the BB the vreg
+  /// is coming from.
+  void analyzePHINodes(const MachineFunction& Fn);
+public:
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  /// RegisterDefIsDead - Return true if the specified instruction defines the
+  /// specified register, but that definition is dead.
+  bool RegisterDefIsDead(MachineInstr *MI, unsigned Reg) const;
+
+  //===--------------------------------------------------------------------===//
+  //  API to update live variable information
+
+  /// replaceKillInstruction - Update register kill info by replacing a kill
+  /// instruction with a new one.
+  void replaceKillInstruction(unsigned Reg, MachineInstr *OldMI,
+                              MachineInstr *NewMI);
+
+  /// addVirtualRegisterKilled - Add information about the fact that the
+  /// specified register is killed after being used by the specified
+  /// instruction. If AddIfNotFound is true, add a implicit operand if it's
+  /// not found.
+  void addVirtualRegisterKilled(unsigned IncomingReg, MachineInstr *MI,
+                                bool AddIfNotFound = false) {
+    if (MI->addRegisterKilled(IncomingReg, TRI, AddIfNotFound))
+      getVarInfo(IncomingReg).Kills.push_back(MI); 
+  }
+
+  /// removeVirtualRegisterKilled - Remove the specified kill of the virtual
+  /// register from the live variable information. Returns true if the
+  /// variable was marked as killed by the specified instruction,
+  /// false otherwise.
+  bool removeVirtualRegisterKilled(unsigned reg, MachineInstr *MI) {
+    if (!getVarInfo(reg).removeKill(MI))
+      return false;
+
+    bool Removed = false;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.isKill() && MO.getReg() == reg) {
+        MO.setIsKill(false);
+        Removed = true;
+        break;
+      }
+    }
+
+    assert(Removed && "Register is not used by this instruction!");
+    return true;
+  }
+
+  /// removeVirtualRegistersKilled - Remove all killed info for the specified
+  /// instruction.
+  void removeVirtualRegistersKilled(MachineInstr *MI);
+
+  /// addVirtualRegisterDead - Add information about the fact that the specified
+  /// register is dead after being used by the specified instruction. If
+  /// AddIfNotFound is true, add a implicit operand if it's not found.
+  void addVirtualRegisterDead(unsigned IncomingReg, MachineInstr *MI,
+                              bool AddIfNotFound = false) {
+    if (MI->addRegisterDead(IncomingReg, TRI, AddIfNotFound))
+      getVarInfo(IncomingReg).Kills.push_back(MI);
+  }
+
+  /// removeVirtualRegisterDead - Remove the specified kill of the virtual
+  /// register from the live variable information. Returns true if the
+  /// variable was marked dead at the specified instruction, false
+  /// otherwise.
+  bool removeVirtualRegisterDead(unsigned reg, MachineInstr *MI) {
+    if (!getVarInfo(reg).removeKill(MI))
+      return false;
+
+    bool Removed = false;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.isDef() && MO.getReg() == reg) {
+        MO.setIsDead(false);
+        Removed = true;
+        break;
+      }
+    }
+    assert(Removed && "Register is not defined by this instruction!");
+    return true;
+  }
+  
+  void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  virtual void releaseMemory() {
+    VirtRegInfo.clear();
+  }
+
+  /// getVarInfo - Return the VarInfo structure for the specified VIRTUAL
+  /// register.
+  VarInfo &getVarInfo(unsigned RegIdx);
+
+  void MarkVirtRegAliveInBlock(VarInfo& VRInfo, MachineBasicBlock* DefBlock,
+                               MachineBasicBlock *BB);
+  void MarkVirtRegAliveInBlock(VarInfo& VRInfo, MachineBasicBlock* DefBlock,
+                               MachineBasicBlock *BB,
+                               std::vector<MachineBasicBlock*> &WorkList);
+  void HandleVirtRegDef(unsigned reg, MachineInstr *MI);
+  void HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
+                        MachineInstr *MI);
+
+  bool isLiveIn(unsigned Reg, const MachineBasicBlock &MBB) {
+    return getVarInfo(Reg).isLiveIn(MBB, Reg, *MRI);
+  }
+
+  /// isLiveOut - Determine if Reg is live out from MBB, when not considering
+  /// PHI nodes. This means that Reg is either killed by a successor block or
+  /// passed through one.
+  bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB);
+
+  /// addNewBlock - Add a new basic block BB between DomBB and SuccBB. All
+  /// variables that are live out of DomBB and live into SuccBB will be marked
+  /// as passing live through BB. This method assumes that the machine code is
+  /// still in SSA form.
+  void addNewBlock(MachineBasicBlock *BB,
+                   MachineBasicBlock *DomBB,
+                   MachineBasicBlock *SuccBB);
+
+  /// isPHIJoin - Return true if Reg is a phi join register.
+  bool isPHIJoin(unsigned Reg) { return PHIJoins.test(Reg); }
+
+  /// setPHIJoin - Mark Reg as a phi join register.
+  void setPHIJoin(unsigned Reg) { PHIJoins.set(Reg); }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachORelocation.h b/final/include/llvm/CodeGen/MachORelocation.h
new file mode 100644
index 00000000000..21fe74f8e1c
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachORelocation.h
@@ -0,0 +1,56 @@
+//=== MachORelocation.h - Mach-O Relocation Info ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachORelocation class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_CODEGEN_MACHO_RELOCATION_H
+#define LLVM_CODEGEN_MACHO_RELOCATION_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+  /// MachORelocation - This struct contains information about each relocation
+  /// that needs to be emitted to the file.
+  /// see <mach-o/reloc.h>
+  class MachORelocation {
+    uint32_t r_address;   // offset in the section to what is being  relocated
+    uint32_t r_symbolnum; // symbol index if r_extern == 1 else section index
+    bool     r_pcrel;     // was relocated pc-relative already
+    uint8_t  r_length;    // length = 2 ^ r_length
+    bool     r_extern;    // 
+    uint8_t  r_type;      // if not 0, machine-specific relocation type.
+    bool     r_scattered; // 1 = scattered, 0 = non-scattered
+    int32_t  r_value;     // the value the item to be relocated is referring
+                          // to.
+  public:      
+    uint32_t getPackedFields() const {
+      if (r_scattered)
+        return (1 << 31) | (r_pcrel << 30) | ((r_length & 3) << 28) | 
+          ((r_type & 15) << 24) | (r_address & 0x00FFFFFF);
+      else
+        return (r_symbolnum << 8) | (r_pcrel << 7) | ((r_length & 3) << 5) |
+          (r_extern << 4) | (r_type & 15);
+    }
+    uint32_t getAddress() const { return r_scattered ? r_value : r_address; }
+    uint32_t getRawAddress() const { return r_address; }
+
+    MachORelocation(uint32_t addr, uint32_t index, bool pcrel, uint8_t len,
+                    bool ext, uint8_t type, bool scattered = false, 
+                    int32_t value = 0) : 
+      r_address(addr), r_symbolnum(index), r_pcrel(pcrel), r_length(len),
+      r_extern(ext), r_type(type), r_scattered(scattered), r_value(value) {}
+  };
+
+} // end llvm namespace
+
+#endif // LLVM_CODEGEN_MACHO_RELOCATION_H
diff --git a/final/include/llvm/CodeGen/MachineBasicBlock.h b/final/include/llvm/CodeGen/MachineBasicBlock.h
new file mode 100644
index 00000000000..19060933888
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineBasicBlock.h
@@ -0,0 +1,493 @@
+//===-- llvm/CodeGen/MachineBasicBlock.h ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect the sequence of machine instructions for a basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEBASICBLOCK_H
+#define LLVM_CODEGEN_MACHINEBASICBLOCK_H
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/ADT/GraphTraits.h"
+#include <functional>
+
+namespace llvm {
+
+class Pass;
+class BasicBlock;
+class MachineFunction;
+class MCSymbol;
+class SlotIndexes;
+class StringRef;
+class raw_ostream;
+
+template <>
+struct ilist_traits<MachineInstr> : public ilist_default_traits<MachineInstr> {
+private:
+  mutable ilist_half_node<MachineInstr> Sentinel;
+
+  // this is only set by the MachineBasicBlock owning the LiveList
+  friend class MachineBasicBlock;
+  MachineBasicBlock* Parent;
+
+public:
+  MachineInstr *createSentinel() const {
+    return static_cast<MachineInstr*>(&Sentinel);
+  }
+  void destroySentinel(MachineInstr *) const {}
+
+  MachineInstr *provideInitialHead() const { return createSentinel(); }
+  MachineInstr *ensureHead(MachineInstr*) const { return createSentinel(); }
+  static void noteHead(MachineInstr*, MachineInstr*) {}
+
+  void addNodeToList(MachineInstr* N);
+  void removeNodeFromList(MachineInstr* N);
+  void transferNodesFromList(ilist_traits &SrcTraits,
+                             ilist_iterator<MachineInstr> first,
+                             ilist_iterator<MachineInstr> last);
+  void deleteNode(MachineInstr *N);
+private:
+  void createNode(const MachineInstr &);
+};
+
+class MachineBasicBlock : public ilist_node<MachineBasicBlock> {
+  typedef ilist<MachineInstr> Instructions;
+  Instructions Insts;
+  const BasicBlock *BB;
+  int Number;
+  MachineFunction *xParent;
+  
+  /// Predecessors/Successors - Keep track of the predecessor / successor
+  /// basicblocks.
+  std::vector<MachineBasicBlock *> Predecessors;
+  std::vector<MachineBasicBlock *> Successors;
+
+  /// LiveIns - Keep track of the physical registers that are livein of
+  /// the basicblock.
+  std::vector<unsigned> LiveIns;
+
+  /// Alignment - Alignment of the basic block. Zero if the basic block does
+  /// not need to be aligned.
+  unsigned Alignment;
+  
+  /// IsLandingPad - Indicate that this basic block is entered via an
+  /// exception handler.
+  bool IsLandingPad;
+
+  /// AddressTaken - Indicate that this basic block is potentially the
+  /// target of an indirect branch.
+  bool AddressTaken;
+
+  // Intrusive list support
+  MachineBasicBlock() {}
+
+  explicit MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb);
+
+  ~MachineBasicBlock();
+
+  // MachineBasicBlocks are allocated and owned by MachineFunction.
+  friend class MachineFunction;
+
+public:
+  /// getBasicBlock - Return the LLVM basic block that this instance
+  /// corresponded to originally. Note that this may be NULL if this instance
+  /// does not correspond directly to an LLVM basic block.
+  ///
+  const BasicBlock *getBasicBlock() const { return BB; }
+
+  /// getName - Return the name of the corresponding LLVM basic block, or
+  /// "(null)".
+  StringRef getName() const;
+
+  /// hasAddressTaken - Test whether this block is potentially the target
+  /// of an indirect branch.
+  bool hasAddressTaken() const { return AddressTaken; }
+
+  /// setHasAddressTaken - Set this block to reflect that it potentially
+  /// is the target of an indirect branch.
+  void setHasAddressTaken() { AddressTaken = true; }
+
+  /// getParent - Return the MachineFunction containing this basic block.
+  ///
+  const MachineFunction *getParent() const { return xParent; }
+  MachineFunction *getParent() { return xParent; }
+
+  typedef Instructions::iterator                              iterator;
+  typedef Instructions::const_iterator                  const_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef std::reverse_iterator<iterator>             reverse_iterator;
+
+  unsigned size() const { return (unsigned)Insts.size(); }
+  bool empty() const { return Insts.empty(); }
+
+  MachineInstr& front() { return Insts.front(); }
+  MachineInstr& back()  { return Insts.back(); }
+  const MachineInstr& front() const { return Insts.front(); }
+  const MachineInstr& back()  const { return Insts.back(); }
+
+  iterator                begin()       { return Insts.begin();  }
+  const_iterator          begin() const { return Insts.begin();  }
+  iterator                  end()       { return Insts.end();    }
+  const_iterator            end() const { return Insts.end();    }
+  reverse_iterator       rbegin()       { return Insts.rbegin(); }
+  const_reverse_iterator rbegin() const { return Insts.rbegin(); }
+  reverse_iterator       rend  ()       { return Insts.rend();   }
+  const_reverse_iterator rend  () const { return Insts.rend();   }
+
+  // Machine-CFG iterators
+  typedef std::vector<MachineBasicBlock *>::iterator       pred_iterator;
+  typedef std::vector<MachineBasicBlock *>::const_iterator const_pred_iterator;
+  typedef std::vector<MachineBasicBlock *>::iterator       succ_iterator;
+  typedef std::vector<MachineBasicBlock *>::const_iterator const_succ_iterator;
+  typedef std::vector<MachineBasicBlock *>::reverse_iterator
+                                                         pred_reverse_iterator;
+  typedef std::vector<MachineBasicBlock *>::const_reverse_iterator
+                                                   const_pred_reverse_iterator;
+  typedef std::vector<MachineBasicBlock *>::reverse_iterator
+                                                         succ_reverse_iterator;
+  typedef std::vector<MachineBasicBlock *>::const_reverse_iterator
+                                                   const_succ_reverse_iterator;
+
+  pred_iterator        pred_begin()       { return Predecessors.begin(); }
+  const_pred_iterator  pred_begin() const { return Predecessors.begin(); }
+  pred_iterator        pred_end()         { return Predecessors.end();   }
+  const_pred_iterator  pred_end()   const { return Predecessors.end();   }
+  pred_reverse_iterator        pred_rbegin()
+                                          { return Predecessors.rbegin();}
+  const_pred_reverse_iterator  pred_rbegin() const
+                                          { return Predecessors.rbegin();}
+  pred_reverse_iterator        pred_rend()
+                                          { return Predecessors.rend();  }
+  const_pred_reverse_iterator  pred_rend()   const
+                                          { return Predecessors.rend();  }
+  unsigned             pred_size()  const {
+    return (unsigned)Predecessors.size();
+  }
+  bool                 pred_empty() const { return Predecessors.empty(); }
+  succ_iterator        succ_begin()       { return Successors.begin();   }
+  const_succ_iterator  succ_begin() const { return Successors.begin();   }
+  succ_iterator        succ_end()         { return Successors.end();     }
+  const_succ_iterator  succ_end()   const { return Successors.end();     }
+  succ_reverse_iterator        succ_rbegin()
+                                          { return Successors.rbegin();  }
+  const_succ_reverse_iterator  succ_rbegin() const
+                                          { return Successors.rbegin();  }
+  succ_reverse_iterator        succ_rend()
+                                          { return Successors.rend();    }
+  const_succ_reverse_iterator  succ_rend()   const
+                                          { return Successors.rend();    }
+  unsigned             succ_size()  const {
+    return (unsigned)Successors.size();
+  }
+  bool                 succ_empty() const { return Successors.empty();   }
+
+  // LiveIn management methods.
+
+  /// addLiveIn - Add the specified register as a live in.  Note that it
+  /// is an error to add the same register to the same set more than once.
+  void addLiveIn(unsigned Reg)  { LiveIns.push_back(Reg); }
+
+  /// removeLiveIn - Remove the specified register from the live in set.
+  ///
+  void removeLiveIn(unsigned Reg);
+
+  /// isLiveIn - Return true if the specified register is in the live in set.
+  ///
+  bool isLiveIn(unsigned Reg) const;
+
+  // Iteration support for live in sets.  These sets are kept in sorted
+  // order by their register number.
+  typedef std::vector<unsigned>::const_iterator livein_iterator;
+  livein_iterator livein_begin() const { return LiveIns.begin(); }
+  livein_iterator livein_end()   const { return LiveIns.end(); }
+  bool            livein_empty() const { return LiveIns.empty(); }
+
+  /// getAlignment - Return alignment of the basic block.
+  ///
+  unsigned getAlignment() const { return Alignment; }
+
+  /// setAlignment - Set alignment of the basic block.
+  ///
+  void setAlignment(unsigned Align) { Alignment = Align; }
+
+  /// isLandingPad - Returns true if the block is a landing pad. That is
+  /// this basic block is entered via an exception handler.
+  bool isLandingPad() const { return IsLandingPad; }
+
+  /// setIsLandingPad - Indicates the block is a landing pad.  That is
+  /// this basic block is entered via an exception handler.
+  void setIsLandingPad() { IsLandingPad = true; }
+
+  /// getLandingPadSuccessor - If this block has a successor that is a landing
+  /// pad, return it. Otherwise return NULL.
+  const MachineBasicBlock *getLandingPadSuccessor() const;
+
+  // Code Layout methods.
+  
+  /// moveBefore/moveAfter - move 'this' block before or after the specified
+  /// block.  This only moves the block, it does not modify the CFG or adjust
+  /// potential fall-throughs at the end of the block.
+  void moveBefore(MachineBasicBlock *NewAfter);
+  void moveAfter(MachineBasicBlock *NewBefore);
+
+  /// updateTerminator - Update the terminator instructions in block to account
+  /// for changes to the layout. If the block previously used a fallthrough,
+  /// it may now need a branch, and if it previously used branching it may now
+  /// be able to use a fallthrough.
+  void updateTerminator();
+
+  // Machine-CFG mutators
+  
+  /// addSuccessor - Add succ as a successor of this MachineBasicBlock.
+  /// The Predecessors list of succ is automatically updated.
+  ///
+  void addSuccessor(MachineBasicBlock *succ);
+
+  /// removeSuccessor - Remove successor from the successors list of this
+  /// MachineBasicBlock. The Predecessors list of succ is automatically updated.
+  ///
+  void removeSuccessor(MachineBasicBlock *succ);
+
+  /// removeSuccessor - Remove specified successor from the successors list of
+  /// this MachineBasicBlock. The Predecessors list of succ is automatically
+  /// updated.  Return the iterator to the element after the one removed.
+  ///
+  succ_iterator removeSuccessor(succ_iterator I);
+  
+  /// transferSuccessors - Transfers all the successors from MBB to this
+  /// machine basic block (i.e., copies all the successors fromMBB and
+  /// remove all the successors from fromMBB).
+  void transferSuccessors(MachineBasicBlock *fromMBB);
+
+  /// transferSuccessorsAndUpdatePHIs - Transfers all the successors, as
+  /// in transferSuccessors, and update PHI operands in the successor blocks
+  /// which refer to fromMBB to refer to this.
+  void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB);
+  
+  /// isSuccessor - Return true if the specified MBB is a successor of this
+  /// block.
+  bool isSuccessor(const MachineBasicBlock *MBB) const;
+
+  /// isLayoutSuccessor - Return true if the specified MBB will be emitted
+  /// immediately after this block, such that if this block exits by
+  /// falling through, control will transfer to the specified MBB. Note
+  /// that MBB need not be a successor at all, for example if this block
+  /// ends with an unconditional branch to some other block.
+  bool isLayoutSuccessor(const MachineBasicBlock *MBB) const;
+
+  /// canFallThrough - Return true if the block can implicitly transfer
+  /// control to the block after it by falling off the end of it.  This should
+  /// return false if it can reach the block after it, but it uses an explicit
+  /// branch to do so (e.g., a table jump).  True is a conservative answer.
+  bool canFallThrough();
+
+  /// Returns a pointer to the first instructon in this block that is not a 
+  /// PHINode instruction. When adding instruction to the beginning of the
+  /// basic block, they should be added before the returned value, not before
+  /// the first instruction, which might be PHI.
+  /// Returns end() is there's no non-PHI instruction.
+  iterator getFirstNonPHI();
+
+  /// SkipPHIsAndLabels - Return the first instruction in MBB after I that is
+  /// not a PHI or a label. This is the correct point to insert copies at the
+  /// beginning of a basic block.
+  iterator SkipPHIsAndLabels(iterator I);
+
+  /// getFirstTerminator - returns an iterator to the first terminator
+  /// instruction of this basic block. If a terminator does not exist,
+  /// it returns end()
+  iterator getFirstTerminator();
+
+  /// getLastNonDebugInstr - returns an iterator to the last non-debug
+  /// instruction in the basic block, or end()
+  iterator getLastNonDebugInstr();
+
+  /// SplitCriticalEdge - Split the critical edge from this block to the
+  /// given successor block, and return the newly created block, or null
+  /// if splitting is not possible.
+  ///
+  /// This function updates LiveVariables, MachineDominatorTree, and
+  /// MachineLoopInfo, as applicable.
+  MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P);
+
+  void pop_front() { Insts.pop_front(); }
+  void pop_back() { Insts.pop_back(); }
+  void push_back(MachineInstr *MI) { Insts.push_back(MI); }
+  template<typename IT>
+  void insert(iterator I, IT S, IT E) { Insts.insert(I, S, E); }
+  iterator insert(iterator I, MachineInstr *M) { return Insts.insert(I, M); }
+  iterator insertAfter(iterator I, MachineInstr *M) { 
+    return Insts.insertAfter(I, M); 
+  }
+
+  // erase - Remove the specified element or range from the instruction list.
+  // These functions delete any instructions removed.
+  //
+  iterator erase(iterator I)             { return Insts.erase(I); }
+  iterator erase(iterator I, iterator E) { return Insts.erase(I, E); }
+  MachineInstr *remove(MachineInstr *I)  { return Insts.remove(I); }
+  void clear()                           { Insts.clear(); }
+
+  /// splice - Take an instruction from MBB 'Other' at the position From,
+  /// and insert it into this MBB right before 'where'.
+  void splice(iterator where, MachineBasicBlock *Other, iterator From) {
+    Insts.splice(where, Other->Insts, From);
+  }
+
+  /// splice - Take a block of instructions from MBB 'Other' in the range [From,
+  /// To), and insert them into this MBB right before 'where'.
+  void splice(iterator where, MachineBasicBlock *Other, iterator From,
+              iterator To) {
+    Insts.splice(where, Other->Insts, From, To);
+  }
+
+  /// removeFromParent - This method unlinks 'this' from the containing
+  /// function, and returns it, but does not delete it.
+  MachineBasicBlock *removeFromParent();
+  
+  /// eraseFromParent - This method unlinks 'this' from the containing
+  /// function and deletes it.
+  void eraseFromParent();
+
+  /// ReplaceUsesOfBlockWith - Given a machine basic block that branched to
+  /// 'Old', change the code and CFG so that it branches to 'New' instead.
+  void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New);
+
+  /// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in
+  /// the CFG to be inserted.  If we have proven that MBB can only branch to
+  /// DestA and DestB, remove any other MBB successors from the CFG. DestA and
+  /// DestB can be null. Besides DestA and DestB, retain other edges leading
+  /// to LandingPads (currently there can be only one; we don't check or require
+  /// that here). Note it is possible that DestA and/or DestB are LandingPads.
+  bool CorrectExtraCFGEdges(MachineBasicBlock *DestA,
+                            MachineBasicBlock *DestB,
+                            bool isCond);
+
+  /// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
+  /// any DBG_VALUE instructions.  Return UnknownLoc if there is none.
+  DebugLoc findDebugLoc(MachineBasicBlock::iterator &MBBI);
+
+  // Debugging methods.
+  void dump() const;
+  void print(raw_ostream &OS, SlotIndexes* = 0) const;
+
+  /// getNumber - MachineBasicBlocks are uniquely numbered at the function
+  /// level, unless they're not in a MachineFunction yet, in which case this
+  /// will return -1.
+  ///
+  int getNumber() const { return Number; }
+  void setNumber(int N) { Number = N; }
+
+  /// getSymbol - Return the MCSymbol for this basic block.
+  ///
+  MCSymbol *getSymbol() const;
+  
+private:   // Methods used to maintain doubly linked list of blocks...
+  friend struct ilist_traits<MachineBasicBlock>;
+
+  // Machine-CFG mutators
+
+  /// addPredecessor - Remove pred as a predecessor of this MachineBasicBlock.
+  /// Don't do this unless you know what you're doing, because it doesn't
+  /// update pred's successors list. Use pred->addSuccessor instead.
+  ///
+  void addPredecessor(MachineBasicBlock *pred);
+
+  /// removePredecessor - Remove pred as a predecessor of this
+  /// MachineBasicBlock. Don't do this unless you know what you're
+  /// doing, because it doesn't update pred's successors list. Use
+  /// pred->removeSuccessor instead.
+  ///
+  void removePredecessor(MachineBasicBlock *pred);
+};
+
+raw_ostream& operator<<(raw_ostream &OS, const MachineBasicBlock &MBB);
+
+void WriteAsOperand(raw_ostream &, const MachineBasicBlock*, bool t);
+
+// This is useful when building IndexedMaps keyed on basic block pointers.
+struct MBB2NumberFunctor :
+  public std::unary_function<const MachineBasicBlock*, unsigned> {
+  unsigned operator()(const MachineBasicBlock *MBB) const {
+    return MBB->getNumber();
+  }
+};
+
+//===--------------------------------------------------------------------===//
+// GraphTraits specializations for machine basic block graphs (machine-CFGs)
+//===--------------------------------------------------------------------===//
+
+// Provide specializations of GraphTraits to be able to treat a
+// MachineFunction as a graph of MachineBasicBlocks...
+//
+
+template <> struct GraphTraits<MachineBasicBlock *> {
+  typedef MachineBasicBlock NodeType;
+  typedef MachineBasicBlock::succ_iterator ChildIteratorType;
+
+  static NodeType *getEntryNode(MachineBasicBlock *BB) { return BB; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->succ_begin();
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->succ_end();
+  }
+};
+
+template <> struct GraphTraits<const MachineBasicBlock *> {
+  typedef const MachineBasicBlock NodeType;
+  typedef MachineBasicBlock::const_succ_iterator ChildIteratorType;
+
+  static NodeType *getEntryNode(const MachineBasicBlock *BB) { return BB; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->succ_begin();
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->succ_end();
+  }
+};
+
+// Provide specializations of GraphTraits to be able to treat a
+// MachineFunction as a graph of MachineBasicBlocks... and to walk it
+// in inverse order.  Inverse order for a function is considered
+// to be when traversing the predecessor edges of a MBB
+// instead of the successor edges.
+//
+template <> struct GraphTraits<Inverse<MachineBasicBlock*> > {
+  typedef MachineBasicBlock NodeType;
+  typedef MachineBasicBlock::pred_iterator ChildIteratorType;
+  static NodeType *getEntryNode(Inverse<MachineBasicBlock *> G) {
+    return G.Graph;
+  }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->pred_begin();
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->pred_end();
+  }
+};
+
+template <> struct GraphTraits<Inverse<const MachineBasicBlock*> > {
+  typedef const MachineBasicBlock NodeType;
+  typedef MachineBasicBlock::const_pred_iterator ChildIteratorType;
+  static NodeType *getEntryNode(Inverse<const MachineBasicBlock*> G) {
+    return G.Graph;
+  }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->pred_begin();
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->pred_end();
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineCodeEmitter.h b/final/include/llvm/CodeGen/MachineCodeEmitter.h
new file mode 100644
index 00000000000..8fc80adf7fb
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineCodeEmitter.h
@@ -0,0 +1,332 @@
+//===-- llvm/CodeGen/MachineCodeEmitter.h - Code emission -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an abstract interface that is used by the machine code
+// emission framework to output the code.  This allows machine code emission to
+// be separated from concerns such as resolution of call targets, and where the
+// machine code will be written (memory or disk, f.e.).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINECODEEMITTER_H
+#define LLVM_CODEGEN_MACHINECODEEMITTER_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+
+class MachineBasicBlock;
+class MachineConstantPool;
+class MachineJumpTableInfo;
+class MachineFunction;
+class MachineModuleInfo;
+class MachineRelocation;
+class Value;
+class GlobalValue;
+class Function;
+class MCSymbol;
+
+/// MachineCodeEmitter - This class defines two sorts of methods: those for
+/// emitting the actual bytes of machine code, and those for emitting auxillary
+/// structures, such as jump tables, relocations, etc.
+///
+/// Emission of machine code is complicated by the fact that we don't (in
+/// general) know the size of the machine code that we're about to emit before
+/// we emit it.  As such, we preallocate a certain amount of memory, and set the
+/// BufferBegin/BufferEnd pointers to the start and end of the buffer.  As we
+/// emit machine instructions, we advance the CurBufferPtr to indicate the
+/// location of the next byte to emit.  In the case of a buffer overflow (we
+/// need to emit more machine code than we have allocated space for), the
+/// CurBufferPtr will saturate to BufferEnd and ignore stores.  Once the entire
+/// function has been emitted, the overflow condition is checked, and if it has
+/// occurred, more memory is allocated, and we reemit the code into it.
+/// 
+class MachineCodeEmitter {
+protected:
+  /// BufferBegin/BufferEnd - Pointers to the start and end of the memory
+  /// allocated for this code buffer.
+  uint8_t *BufferBegin, *BufferEnd;
+  /// CurBufferPtr - Pointer to the next byte of memory to fill when emitting
+  /// code.  This is guranteed to be in the range [BufferBegin,BufferEnd].  If
+  /// this pointer is at BufferEnd, it will never move due to code emission, and
+  /// all code emission requests will be ignored (this is the buffer overflow
+  /// condition).
+  uint8_t *CurBufferPtr;
+
+public:
+  virtual ~MachineCodeEmitter() {}
+
+  /// startFunction - This callback is invoked when the specified function is
+  /// about to be code generated.  This initializes the BufferBegin/End/Ptr
+  /// fields.
+  ///
+  virtual void startFunction(MachineFunction &F) = 0;
+
+  /// finishFunction - This callback is invoked when the specified function has
+  /// finished code generation.  If a buffer overflow has occurred, this method
+  /// returns true (the callee is required to try again), otherwise it returns
+  /// false.
+  ///
+  virtual bool finishFunction(MachineFunction &F) = 0;
+
+  /// emitByte - This callback is invoked when a byte needs to be written to the
+  /// output stream.
+  ///
+  void emitByte(uint8_t B) {
+    if (CurBufferPtr != BufferEnd)
+      *CurBufferPtr++ = B;
+  }
+
+  /// emitWordLE - This callback is invoked when a 32-bit word needs to be
+  /// written to the output stream in little-endian format.
+  ///
+  void emitWordLE(uint32_t W) {
+    if (4 <= BufferEnd-CurBufferPtr) {
+      emitWordLEInto(CurBufferPtr, W);
+    } else {
+      CurBufferPtr = BufferEnd;
+    }
+  }
+
+  /// emitWordLEInto - This callback is invoked when a 32-bit word needs to be
+  /// written to an arbitrary buffer in little-endian format.  Buf must have at
+  /// least 4 bytes of available space.
+  ///
+  static void emitWordLEInto(uint8_t *&Buf, uint32_t W) {
+    *Buf++ = (uint8_t)(W >>  0);
+    *Buf++ = (uint8_t)(W >>  8);
+    *Buf++ = (uint8_t)(W >> 16);
+    *Buf++ = (uint8_t)(W >> 24);
+  }
+
+  /// emitWordBE - This callback is invoked when a 32-bit word needs to be
+  /// written to the output stream in big-endian format.
+  ///
+  void emitWordBE(uint32_t W) {
+    if (4 <= BufferEnd-CurBufferPtr) {
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
+    } else {
+      CurBufferPtr = BufferEnd;
+    }
+  }
+
+  /// emitDWordLE - This callback is invoked when a 64-bit word needs to be
+  /// written to the output stream in little-endian format.
+  ///
+  void emitDWordLE(uint64_t W) {
+    if (8 <= BufferEnd-CurBufferPtr) {
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 32);
+      *CurBufferPtr++ = (uint8_t)(W >> 40);
+      *CurBufferPtr++ = (uint8_t)(W >> 48);
+      *CurBufferPtr++ = (uint8_t)(W >> 56);
+    } else {
+      CurBufferPtr = BufferEnd;
+    }
+  }
+  
+  /// emitDWordBE - This callback is invoked when a 64-bit word needs to be
+  /// written to the output stream in big-endian format.
+  ///
+  void emitDWordBE(uint64_t W) {
+    if (8 <= BufferEnd-CurBufferPtr) {
+      *CurBufferPtr++ = (uint8_t)(W >> 56);
+      *CurBufferPtr++ = (uint8_t)(W >> 48);
+      *CurBufferPtr++ = (uint8_t)(W >> 40);
+      *CurBufferPtr++ = (uint8_t)(W >> 32);
+      *CurBufferPtr++ = (uint8_t)(W >> 24);
+      *CurBufferPtr++ = (uint8_t)(W >> 16);
+      *CurBufferPtr++ = (uint8_t)(W >>  8);
+      *CurBufferPtr++ = (uint8_t)(W >>  0);
+    } else {
+      CurBufferPtr = BufferEnd;
+    }
+  }
+
+  /// emitAlignment - Move the CurBufferPtr pointer up to the specified
+  /// alignment (saturated to BufferEnd of course).
+  void emitAlignment(unsigned Alignment) {
+    if (Alignment == 0) Alignment = 1;
+
+    if(Alignment <= (uintptr_t)(BufferEnd-CurBufferPtr)) {
+      // Move the current buffer ptr up to the specified alignment.
+      CurBufferPtr =
+        (uint8_t*)(((uintptr_t)CurBufferPtr+Alignment-1) &
+                   ~(uintptr_t)(Alignment-1));
+    } else {
+      CurBufferPtr = BufferEnd;
+    }
+  }
+  
+
+  /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
+  /// written to the output stream.
+  void emitULEB128Bytes(uint64_t Value) {
+    do {
+      uint8_t Byte = Value & 0x7f;
+      Value >>= 7;
+      if (Value) Byte |= 0x80;
+      emitByte(Byte);
+    } while (Value);
+  }
+  
+  /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
+  /// written to the output stream.
+  void emitSLEB128Bytes(uint64_t Value) {
+    uint64_t Sign = Value >> (8 * sizeof(Value) - 1);
+    bool IsMore;
+  
+    do {
+      uint8_t Byte = Value & 0x7f;
+      Value >>= 7;
+      IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+      if (IsMore) Byte |= 0x80;
+      emitByte(Byte);
+    } while (IsMore);
+  }
+
+  /// emitString - This callback is invoked when a String needs to be
+  /// written to the output stream.
+  void emitString(const std::string &String) {
+    for (unsigned i = 0, N = static_cast<unsigned>(String.size());
+         i < N; ++i) {
+      uint8_t C = String[i];
+      emitByte(C);
+    }
+    emitByte(0);
+  }
+  
+  /// emitInt32 - Emit a int32 directive.
+  void emitInt32(int32_t Value) {
+    if (4 <= BufferEnd-CurBufferPtr) {
+      *((uint32_t*)CurBufferPtr) = Value;
+      CurBufferPtr += 4;
+    } else {
+      CurBufferPtr = BufferEnd;
+    }
+  }
+
+  /// emitInt64 - Emit a int64 directive.
+  void emitInt64(uint64_t Value) {
+    if (8 <= BufferEnd-CurBufferPtr) {
+      *((uint64_t*)CurBufferPtr) = Value;
+      CurBufferPtr += 8;
+    } else {
+      CurBufferPtr = BufferEnd;
+    }
+  }
+  
+  /// emitInt32At - Emit the Int32 Value in Addr.
+  void emitInt32At(uintptr_t *Addr, uintptr_t Value) {
+    if (Addr >= (uintptr_t*)BufferBegin && Addr < (uintptr_t*)BufferEnd)
+      (*(uint32_t*)Addr) = (uint32_t)Value;
+  }
+  
+  /// emitInt64At - Emit the Int64 Value in Addr.
+  void emitInt64At(uintptr_t *Addr, uintptr_t Value) {
+    if (Addr >= (uintptr_t*)BufferBegin && Addr < (uintptr_t*)BufferEnd)
+      (*(uint64_t*)Addr) = (uint64_t)Value;
+  }
+  
+  /// processDebugLoc - Records debug location information about a
+  /// MachineInstruction.  This is called before emitting any bytes associated
+  /// with the instruction.  Even if successive instructions have the same debug
+  /// location, this method will be called for each one.
+  virtual void processDebugLoc(DebugLoc DL, bool BeforePrintintInsn) {}
+
+  /// emitLabel - Emits a label
+  virtual void emitLabel(MCSymbol *Label) = 0;
+
+  /// allocateSpace - Allocate a block of space in the current output buffer,
+  /// returning null (and setting conditions to indicate buffer overflow) on
+  /// failure.  Alignment is the alignment in bytes of the buffer desired.
+  virtual void *allocateSpace(uintptr_t Size, unsigned Alignment) {
+    emitAlignment(Alignment);
+    void *Result;
+    
+    // Check for buffer overflow.
+    if (Size >= (uintptr_t)(BufferEnd-CurBufferPtr)) {
+      CurBufferPtr = BufferEnd;
+      Result = 0;
+    } else {
+      // Allocate the space.
+      Result = CurBufferPtr;
+      CurBufferPtr += Size;
+    }
+    
+    return Result;
+  }
+
+  /// StartMachineBasicBlock - This should be called by the target when a new
+  /// basic block is about to be emitted.  This way the MCE knows where the
+  /// start of the block is, and can implement getMachineBasicBlockAddress.
+  virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) = 0;
+  
+  /// getCurrentPCValue - This returns the address that the next emitted byte
+  /// will be output to.
+  ///
+  virtual uintptr_t getCurrentPCValue() const {
+    return (uintptr_t)CurBufferPtr;
+  }
+
+  /// getCurrentPCOffset - Return the offset from the start of the emitted
+  /// buffer that we are currently writing to.
+  virtual uintptr_t getCurrentPCOffset() const {
+    return CurBufferPtr-BufferBegin;
+  }
+
+  /// earlyResolveAddresses - True if the code emitter can use symbol addresses 
+  /// during code emission time. The JIT is capable of doing this because it
+  /// creates jump tables or constant pools in memory on the fly while the
+  /// object code emitters rely on a linker to have real addresses and should
+  /// use relocations instead.
+  virtual bool earlyResolveAddresses() const = 0;
+
+  /// addRelocation - Whenever a relocatable address is needed, it should be
+  /// noted with this interface.
+  virtual void addRelocation(const MachineRelocation &MR) = 0;
+  
+  /// FIXME: These should all be handled with relocations!
+  
+  /// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
+  /// the constant pool that was last emitted with the emitConstantPool method.
+  ///
+  virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const = 0;
+
+  /// getJumpTableEntryAddress - Return the address of the jump table with index
+  /// 'Index' in the function that last called initJumpTableInfo.
+  ///
+  virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const = 0;
+  
+  /// getMachineBasicBlockAddress - Return the address of the specified
+  /// MachineBasicBlock, only usable after the label for the MBB has been
+  /// emitted.
+  ///
+  virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const= 0;
+
+  /// getLabelAddress - Return the address of the specified Label, only usable
+  /// after the LabelID has been emitted.
+  ///
+  virtual uintptr_t getLabelAddress(MCSymbol *Label) const = 0;
+  
+  /// Specifies the MachineModuleInfo object. This is used for exception handling
+  /// purposes.
+  virtual void setModuleInfo(MachineModuleInfo* Info) = 0;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineCodeInfo.h b/final/include/llvm/CodeGen/MachineCodeInfo.h
new file mode 100644
index 00000000000..c5c0c445045
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineCodeInfo.h
@@ -0,0 +1,53 @@
+//===-- MachineCodeInfo.h - Class used to report JIT info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines MachineCodeInfo, a class used by the JIT ExecutionEngine
+// to report information about the generated machine code.
+//
+// See JIT::runJITOnFunction for usage.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef EE_MACHINE_CODE_INFO_H
+#define EE_MACHINE_CODE_INFO_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MachineCodeInfo {
+private:
+  size_t Size;   // Number of bytes in memory used
+  void *Address; // The address of the function in memory
+
+public:
+  MachineCodeInfo() : Size(0), Address(0) {}
+
+  void setSize(size_t s) {
+    Size = s;
+  }
+
+  void setAddress(void *a) {
+    Address = a;
+  }
+
+  size_t size() const {
+    return Size;
+  }
+
+  void *address() const {
+    return Address;
+  }
+
+};
+
+}
+
+#endif
+
diff --git a/final/include/llvm/CodeGen/MachineConstantPool.h b/final/include/llvm/CodeGen/MachineConstantPool.h
new file mode 100644
index 00000000000..5727321a0da
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineConstantPool.h
@@ -0,0 +1,170 @@
+//===-- CodeGen/MachineConstantPool.h - Abstract Constant Pool --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// @file
+/// This file declares the MachineConstantPool class which is an abstract
+/// constant pool to keep track of constants referenced by a function.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINECONSTANTPOOL_H
+#define LLVM_CODEGEN_MACHINECONSTANTPOOL_H
+
+#include "llvm/ADT/DenseSet.h"
+#include <cassert>
+#include <climits>
+#include <vector>
+
+namespace llvm {
+
+class Constant;
+class FoldingSetNodeID;
+class TargetData;
+class TargetMachine;
+class Type;
+class MachineConstantPool;
+class raw_ostream;
+
+/// Abstract base class for all machine specific constantpool value subclasses.
+///
+class MachineConstantPoolValue {
+  const Type *Ty;
+
+public:
+  explicit MachineConstantPoolValue(const Type *ty) : Ty(ty) {}
+  virtual ~MachineConstantPoolValue() {}
+
+  /// getType - get type of this MachineConstantPoolValue.
+  ///
+  const Type *getType() const { return Ty; }
+
+  
+  /// getRelocationInfo - This method classifies the entry according to
+  /// whether or not it may generate a relocation entry.  This must be
+  /// conservative, so if it might codegen to a relocatable entry, it should say
+  /// so.  The return values are the same as Constant::getRelocationInfo().
+  virtual unsigned getRelocationInfo() const = 0;
+  
+  virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+                                        unsigned Alignment) = 0;
+
+  virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID) = 0;
+
+  /// print - Implement operator<<
+  virtual void print(raw_ostream &O) const = 0;
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS,
+                               const MachineConstantPoolValue &V) {
+  V.print(OS);
+  return OS;
+}
+  
+
+/// This class is a data container for one entry in a MachineConstantPool.
+/// It contains a pointer to the value and an offset from the start of
+/// the constant pool.
+/// @brief An entry in a MachineConstantPool
+class MachineConstantPoolEntry {
+public:
+  /// The constant itself.
+  union {
+    const Constant *ConstVal;
+    MachineConstantPoolValue *MachineCPVal;
+  } Val;
+
+  /// The required alignment for this entry. The top bit is set when Val is
+  /// a MachineConstantPoolValue.
+  unsigned Alignment;
+
+  MachineConstantPoolEntry(const Constant *V, unsigned A)
+    : Alignment(A) {
+    Val.ConstVal = V;
+  }
+  MachineConstantPoolEntry(MachineConstantPoolValue *V, unsigned A)
+    : Alignment(A) {
+    Val.MachineCPVal = V; 
+    Alignment |= 1U << (sizeof(unsigned)*CHAR_BIT-1);
+  }
+
+  bool isMachineConstantPoolEntry() const {
+    return (int)Alignment < 0;
+  }
+
+  int getAlignment() const { 
+    return Alignment & ~(1 << (sizeof(unsigned)*CHAR_BIT-1));
+  }
+
+  const Type *getType() const;
+  
+  /// getRelocationInfo - This method classifies the entry according to
+  /// whether or not it may generate a relocation entry.  This must be
+  /// conservative, so if it might codegen to a relocatable entry, it should say
+  /// so.  The return values are:
+  /// 
+  ///  0: This constant pool entry is guaranteed to never have a relocation
+  ///     applied to it (because it holds a simple constant like '4').
+  ///  1: This entry has relocations, but the entries are guaranteed to be
+  ///     resolvable by the static linker, so the dynamic linker will never see
+  ///     them.
+  ///  2: This entry may have arbitrary relocations. 
+  unsigned getRelocationInfo() const;
+};
+  
+/// The MachineConstantPool class keeps track of constants referenced by a
+/// function which must be spilled to memory.  This is used for constants which
+/// are unable to be used directly as operands to instructions, which typically
+/// include floating point and large integer constants.
+///
+/// Instructions reference the address of these constant pool constants through
+/// the use of MO_ConstantPoolIndex values.  When emitting assembly or machine
+/// code, these virtual address references are converted to refer to the
+/// address of the function constant pool values.
+/// @brief The machine constant pool.
+class MachineConstantPool {
+  const TargetData *TD;   ///< The machine's TargetData.
+  unsigned PoolAlignment; ///< The alignment for the pool.
+  std::vector<MachineConstantPoolEntry> Constants; ///< The pool of constants.
+  /// MachineConstantPoolValues that use an existing MachineConstantPoolEntry.
+  DenseSet<MachineConstantPoolValue*> MachineCPVsSharingEntries;
+public:
+  /// @brief The only constructor.
+  explicit MachineConstantPool(const TargetData *td)
+    : TD(td), PoolAlignment(1) {}
+  ~MachineConstantPool();
+    
+  /// getConstantPoolAlignment - Return the alignment required by
+  /// the whole constant pool, of which the first element must be aligned.
+  unsigned getConstantPoolAlignment() const { return PoolAlignment; }
+  
+  /// getConstantPoolIndex - Create a new entry in the constant pool or return
+  /// an existing one.  User must specify the minimum required alignment for
+  /// the object.
+  unsigned getConstantPoolIndex(const Constant *C, unsigned Alignment);
+  unsigned getConstantPoolIndex(MachineConstantPoolValue *V,unsigned Alignment);
+  
+  /// isEmpty - Return true if this constant pool contains no constants.
+  bool isEmpty() const { return Constants.empty(); }
+
+  const std::vector<MachineConstantPoolEntry> &getConstants() const {
+    return Constants;
+  }
+
+  /// print - Used by the MachineFunction printer to print information about
+  /// constant pool objects.  Implemented in MachineFunction.cpp
+  ///
+  void print(raw_ostream &OS) const;
+
+  /// dump - Call print(cerr) to be called from the debugger.
+  void dump() const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineDominators.h b/final/include/llvm/CodeGen/MachineDominators.h
new file mode 100644
index 00000000000..ab944a2335f
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineDominators.h
@@ -0,0 +1,202 @@
+//=- llvm/CodeGen/MachineDominators.h - Machine Dom Calculation --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines classes mirroring those in llvm/Analysis/Dominators.h,
+// but for target-specific code rather than target-independent IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEDOMINATORS_H
+#define LLVM_CODEGEN_MACHINEDOMINATORS_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/DominatorInternals.h"
+
+namespace llvm {
+
+template<>
+inline void DominatorTreeBase<MachineBasicBlock>::addRoot(MachineBasicBlock* MBB) {
+  this->Roots.push_back(MBB);
+}
+
+EXTERN_TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>);
+EXTERN_TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
+
+typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
+
+//===-------------------------------------
+/// DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to
+/// compute a normal dominator tree.
+///
+class MachineDominatorTree : public MachineFunctionPass {
+public:
+  static char ID; // Pass ID, replacement for typeid
+  DominatorTreeBase<MachineBasicBlock>* DT;
+  
+  MachineDominatorTree();
+  
+  ~MachineDominatorTree();
+  
+  DominatorTreeBase<MachineBasicBlock>& getBase() { return *DT; }
+  
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  
+  /// getRoots -  Return the root blocks of the current CFG.  This may include
+  /// multiple blocks if we are computing post dominators.  For forward
+  /// dominators, this will always be a single block (the entry node).
+  ///
+  inline const std::vector<MachineBasicBlock*> &getRoots() const {
+    return DT->getRoots();
+  }
+  
+  inline MachineBasicBlock *getRoot() const {
+    return DT->getRoot();
+  }
+  
+  inline MachineDomTreeNode *getRootNode() const {
+    return DT->getRootNode();
+  }
+  
+  virtual bool runOnMachineFunction(MachineFunction &F);
+  
+  inline bool dominates(MachineDomTreeNode* A, MachineDomTreeNode* B) const {
+    return DT->dominates(A, B);
+  }
+  
+  inline bool dominates(MachineBasicBlock* A, MachineBasicBlock* B) const {
+    return DT->dominates(A, B);
+  }
+  
+  // dominates - Return true if A dominates B. This performs the
+  // special checks necessary if A and B are in the same basic block.
+  bool dominates(MachineInstr *A, MachineInstr *B) const {
+    MachineBasicBlock *BBA = A->getParent(), *BBB = B->getParent();
+    if (BBA != BBB) return DT->dominates(BBA, BBB);
+
+    // Loop through the basic block until we find A or B.
+    MachineBasicBlock::iterator I = BBA->begin();
+    for (; &*I != A && &*I != B; ++I) /*empty*/;
+
+    //if(!DT.IsPostDominators) {
+      // A dominates B if it is found first in the basic block.
+      return &*I == A;
+    //} else {
+    //  // A post-dominates B if B is found first in the basic block.
+    //  return &*I == B;
+    //}
+  }
+  
+  inline bool properlyDominates(const MachineDomTreeNode* A,
+                                MachineDomTreeNode* B) const {
+    return DT->properlyDominates(A, B);
+  }
+  
+  inline bool properlyDominates(MachineBasicBlock* A,
+                                MachineBasicBlock* B) const {
+    return DT->properlyDominates(A, B);
+  }
+  
+  /// findNearestCommonDominator - Find nearest common dominator basic block
+  /// for basic block A and B. If there is no such block then return NULL.
+  inline MachineBasicBlock *findNearestCommonDominator(MachineBasicBlock *A,
+                                                       MachineBasicBlock *B) {
+    return DT->findNearestCommonDominator(A, B);
+  }
+  
+  inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
+    return DT->getNode(BB);
+  }
+  
+  /// getNode - return the (Post)DominatorTree node for the specified basic
+  /// block.  This is the same as using operator[] on this class.
+  ///
+  inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
+    return DT->getNode(BB);
+  }
+  
+  /// addNewBlock - Add a new node to the dominator tree information.  This
+  /// creates a new node as a child of DomBB dominator node,linking it into 
+  /// the children list of the immediate dominator.
+  inline MachineDomTreeNode *addNewBlock(MachineBasicBlock *BB,
+                                         MachineBasicBlock *DomBB) {
+    return DT->addNewBlock(BB, DomBB);
+  }
+  
+  /// changeImmediateDominator - This method is used to update the dominator
+  /// tree information when a node's immediate dominator changes.
+  ///
+  inline void changeImmediateDominator(MachineBasicBlock *N,
+                                       MachineBasicBlock* NewIDom) {
+    DT->changeImmediateDominator(N, NewIDom);
+  }
+  
+  inline void changeImmediateDominator(MachineDomTreeNode *N,
+                                       MachineDomTreeNode* NewIDom) {
+    DT->changeImmediateDominator(N, NewIDom);
+  }
+  
+  /// eraseNode - Removes a node from  the dominator tree. Block must not
+  /// dominate any other blocks. Removes node from its immediate dominator's
+  /// children list. Deletes dominator node associated with basic block BB.
+  inline void eraseNode(MachineBasicBlock *BB) {
+    DT->eraseNode(BB);
+  }
+  
+  /// splitBlock - BB is split and now it has one successor. Update dominator
+  /// tree to reflect this change.
+  inline void splitBlock(MachineBasicBlock* NewBB) {
+    DT->splitBlock(NewBB);
+  }
+
+  /// isReachableFromEntry - Return true if A is dominated by the entry
+  /// block of the function containing it.
+  bool isReachableFromEntry(MachineBasicBlock *A) {
+    return DT->isReachableFromEntry(A);
+  }
+
+  virtual void releaseMemory();
+  
+  virtual void print(raw_ostream &OS, const Module*) const;
+};
+
+//===-------------------------------------
+/// DominatorTree GraphTraits specialization so the DominatorTree can be
+/// iterable by generic graph iterators.
+///
+
+template<class T> struct GraphTraits;
+
+template <> struct GraphTraits<MachineDomTreeNode *> {
+  typedef MachineDomTreeNode NodeType;
+  typedef NodeType::iterator  ChildIteratorType;
+  
+  static NodeType *getEntryNode(NodeType *N) {
+    return N;
+  }
+  static inline ChildIteratorType child_begin(NodeType* N) {
+    return N->begin();
+  }
+  static inline ChildIteratorType child_end(NodeType* N) {
+    return N->end();
+  }
+};
+
+template <> struct GraphTraits<MachineDominatorTree*>
+  : public GraphTraits<MachineDomTreeNode *> {
+  static NodeType *getEntryNode(MachineDominatorTree *DT) {
+    return DT->getRootNode();
+  }
+};
+
+}
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineFrameInfo.h b/final/include/llvm/CodeGen/MachineFrameInfo.h
new file mode 100644
index 00000000000..4ea6aa3396a
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineFrameInfo.h
@@ -0,0 +1,551 @@
+//===-- CodeGen/MachineFrameInfo.h - Abstract Stack Frame Rep. --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The file defines the MachineFrameInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEFRAMEINFO_H
+#define LLVM_CODEGEN_MACHINEFRAMEINFO_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+#include <cassert>
+#include <vector>
+
+namespace llvm {
+class raw_ostream;
+class TargetData;
+class TargetRegisterClass;
+class Type;
+class MachineFunction;
+class MachineBasicBlock;
+class TargetFrameLowering;
+class BitVector;
+
+/// The CalleeSavedInfo class tracks the information need to locate where a
+/// callee saved register is in the current frame.
+class CalleeSavedInfo {
+  unsigned Reg;
+  int FrameIdx;
+
+public:
+  explicit CalleeSavedInfo(unsigned R, int FI = 0)
+  : Reg(R), FrameIdx(FI) {}
+
+  // Accessors.
+  unsigned getReg()                        const { return Reg; }
+  int getFrameIdx()                        const { return FrameIdx; }
+  void setFrameIdx(int FI)                       { FrameIdx = FI; }
+};
+
+/// The MachineFrameInfo class represents an abstract stack frame until
+/// prolog/epilog code is inserted.  This class is key to allowing stack frame
+/// representation optimizations, such as frame pointer elimination.  It also
+/// allows more mundane (but still important) optimizations, such as reordering
+/// of abstract objects on the stack frame.
+///
+/// To support this, the class assigns unique integer identifiers to stack
+/// objects requested clients.  These identifiers are negative integers for
+/// fixed stack objects (such as arguments passed on the stack) or nonnegative
+/// for objects that may be reordered.  Instructions which refer to stack
+/// objects use a special MO_FrameIndex operand to represent these frame
+/// indexes.
+///
+/// Because this class keeps track of all references to the stack frame, it
+/// knows when a variable sized object is allocated on the stack.  This is the
+/// sole condition which prevents frame pointer elimination, which is an
+/// important optimization on register-poor architectures.  Because original
+/// variable sized alloca's in the source program are the only source of
+/// variable sized stack objects, it is safe to decide whether there will be
+/// any variable sized objects before all stack objects are known (for
+/// example, register allocator spill code never needs variable sized
+/// objects).
+///
+/// When prolog/epilog code emission is performed, the final stack frame is
+/// built and the machine instructions are modified to refer to the actual
+/// stack offsets of the object, eliminating all MO_FrameIndex operands from
+/// the program.
+///
+/// @brief Abstract Stack Frame Information
+class MachineFrameInfo {
+
+  // StackObject - Represent a single object allocated on the stack.
+  struct StackObject {
+    // SPOffset - The offset of this object from the stack pointer on entry to
+    // the function.  This field has no meaning for a variable sized element.
+    int64_t SPOffset;
+
+    // The size of this object on the stack. 0 means a variable sized object,
+    // ~0ULL means a dead object.
+    uint64_t Size;
+
+    // Alignment - The required alignment of this stack slot.
+    unsigned Alignment;
+
+    // isImmutable - If true, the value of the stack object is set before
+    // entering the function and is not modified inside the function. By
+    // default, fixed objects are immutable unless marked otherwise.
+    bool isImmutable;
+
+    // isSpillSlot - If true the stack object is used as spill slot. It
+    // cannot alias any other memory objects.
+    bool isSpillSlot;
+
+    // MayNeedSP - If true the stack object triggered the creation of the stack
+    // protector. We should allocate this object right after the stack
+    // protector.
+    bool MayNeedSP;
+
+    // PreAllocated - If true, the object was mapped into the local frame
+    // block and doesn't need additional handling for allocation beyond that.
+    bool PreAllocated;
+
+    StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM,
+                bool isSS, bool NSP)
+      : SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM),
+        isSpillSlot(isSS), MayNeedSP(NSP), PreAllocated(false) {}
+  };
+
+  /// Objects - The list of stack objects allocated...
+  ///
+  std::vector<StackObject> Objects;
+
+  /// NumFixedObjects - This contains the number of fixed objects contained on
+  /// the stack.  Because fixed objects are stored at a negative index in the
+  /// Objects list, this is also the index to the 0th object in the list.
+  ///
+  unsigned NumFixedObjects;
+
+  /// HasVarSizedObjects - This boolean keeps track of whether any variable
+  /// sized objects have been allocated yet.
+  ///
+  bool HasVarSizedObjects;
+
+  /// FrameAddressTaken - This boolean keeps track of whether there is a call
+  /// to builtin \@llvm.frameaddress.
+  bool FrameAddressTaken;
+
+  /// ReturnAddressTaken - This boolean keeps track of whether there is a call
+  /// to builtin \@llvm.returnaddress.
+  bool ReturnAddressTaken;
+
+  /// StackSize - The prolog/epilog code inserter calculates the final stack
+  /// offsets for all of the fixed size objects, updating the Objects list
+  /// above.  It then updates StackSize to contain the number of bytes that need
+  /// to be allocated on entry to the function.
+  ///
+  uint64_t StackSize;
+
+  /// OffsetAdjustment - The amount that a frame offset needs to be adjusted to
+  /// have the actual offset from the stack/frame pointer.  The exact usage of
+  /// this is target-dependent, but it is typically used to adjust between
+  /// SP-relative and FP-relative offsets.  E.G., if objects are accessed via
+  /// SP then OffsetAdjustment is zero; if FP is used, OffsetAdjustment is set
+  /// to the distance between the initial SP and the value in FP.  For many
+  /// targets, this value is only used when generating debug info (via
+  /// TargetRegisterInfo::getFrameIndexOffset); when generating code, the
+  /// corresponding adjustments are performed directly.
+  int OffsetAdjustment;
+
+  /// MaxAlignment - The prolog/epilog code inserter may process objects
+  /// that require greater alignment than the default alignment the target
+  /// provides. To handle this, MaxAlignment is set to the maximum alignment
+  /// needed by the objects on the current frame.  If this is greater than the
+  /// native alignment maintained by the compiler, dynamic alignment code will
+  /// be needed.
+  ///
+  unsigned MaxAlignment;
+
+  /// AdjustsStack - Set to true if this function adjusts the stack -- e.g.,
+  /// when calling another function. This is only valid during and after
+  /// prolog/epilog code insertion.
+  bool AdjustsStack;
+
+  /// HasCalls - Set to true if this function has any function calls.
+  bool HasCalls;
+
+  /// StackProtectorIdx - The frame index for the stack protector.
+  int StackProtectorIdx;
+
+  /// MaxCallFrameSize - This contains the size of the largest call frame if the
+  /// target uses frame setup/destroy pseudo instructions (as defined in the
+  /// TargetFrameInfo class).  This information is important for frame pointer
+  /// elimination.  If is only valid during and after prolog/epilog code
+  /// insertion.
+  ///
+  unsigned MaxCallFrameSize;
+
+  /// CSInfo - The prolog/epilog code inserter fills in this vector with each
+  /// callee saved register saved in the frame.  Beyond its use by the prolog/
+  /// epilog code inserter, this data used for debug info and exception
+  /// handling.
+  std::vector<CalleeSavedInfo> CSInfo;
+
+  /// CSIValid - Has CSInfo been set yet?
+  bool CSIValid;
+
+  /// TargetFrameLowering - Target information about frame layout.
+  ///
+  const TargetFrameLowering &TFI;
+
+  /// LocalFrameObjects - References to frame indices which are mapped
+  /// into the local frame allocation block. <FrameIdx, LocalOffset>
+  SmallVector<std::pair<int, int64_t>, 32> LocalFrameObjects;
+
+  /// LocalFrameSize - Size of the pre-allocated local frame block.
+  int64_t LocalFrameSize;
+
+  /// Required alignment of the local object blob, which is the strictest
+  /// alignment of any object in it.
+  unsigned LocalFrameMaxAlign;
+
+  /// Whether the local object blob needs to be allocated together. If not,
+  /// PEI should ignore the isPreAllocated flags on the stack objects and
+  /// just allocate them normally.
+  bool UseLocalStackAllocationBlock;
+
+public:
+    explicit MachineFrameInfo(const TargetFrameLowering &tfi) : TFI(tfi) {
+    StackSize = NumFixedObjects = OffsetAdjustment = MaxAlignment = 0;
+    HasVarSizedObjects = false;
+    FrameAddressTaken = false;
+    ReturnAddressTaken = false;
+    AdjustsStack = false;
+    HasCalls = false;
+    StackProtectorIdx = -1;
+    MaxCallFrameSize = 0;
+    CSIValid = false;
+    LocalFrameSize = 0;
+    LocalFrameMaxAlign = 0;
+    UseLocalStackAllocationBlock = false;
+  }
+
+  /// hasStackObjects - Return true if there are any stack objects in this
+  /// function.
+  ///
+  bool hasStackObjects() const { return !Objects.empty(); }
+
+  /// hasVarSizedObjects - This method may be called any time after instruction
+  /// selection is complete to determine if the stack frame for this function
+  /// contains any variable sized objects.
+  ///
+  bool hasVarSizedObjects() const { return HasVarSizedObjects; }
+
+  /// getStackProtectorIndex/setStackProtectorIndex - Return the index for the
+  /// stack protector object.
+  ///
+  int getStackProtectorIndex() const { return StackProtectorIdx; }
+  void setStackProtectorIndex(int I) { StackProtectorIdx = I; }
+
+  /// isFrameAddressTaken - This method may be called any time after instruction
+  /// selection is complete to determine if there is a call to
+  /// \@llvm.frameaddress in this function.
+  bool isFrameAddressTaken() const { return FrameAddressTaken; }
+  void setFrameAddressIsTaken(bool T) { FrameAddressTaken = T; }
+
+  /// isReturnAddressTaken - This method may be called any time after
+  /// instruction selection is complete to determine if there is a call to
+  /// \@llvm.returnaddress in this function.
+  bool isReturnAddressTaken() const { return ReturnAddressTaken; }
+  void setReturnAddressIsTaken(bool s) { ReturnAddressTaken = s; }
+
+  /// getObjectIndexBegin - Return the minimum frame object index.
+  ///
+  int getObjectIndexBegin() const { return -NumFixedObjects; }
+
+  /// getObjectIndexEnd - Return one past the maximum frame object index.
+  ///
+  int getObjectIndexEnd() const { return (int)Objects.size()-NumFixedObjects; }
+
+  /// getNumFixedObjects - Return the number of fixed objects.
+  unsigned getNumFixedObjects() const { return NumFixedObjects; }
+
+  /// getNumObjects - Return the number of objects.
+  ///
+  unsigned getNumObjects() const { return Objects.size(); }
+
+  /// mapLocalFrameObject - Map a frame index into the local object block
+  void mapLocalFrameObject(int ObjectIndex, int64_t Offset) {
+    LocalFrameObjects.push_back(std::pair<int, int64_t>(ObjectIndex, Offset));
+    Objects[ObjectIndex + NumFixedObjects].PreAllocated = true;
+  }
+
+  /// getLocalFrameObjectMap - Get the local offset mapping for a for an object
+  std::pair<int, int64_t> getLocalFrameObjectMap(int i) {
+    assert (i >= 0 && (unsigned)i < LocalFrameObjects.size() &&
+            "Invalid local object reference!");
+    return LocalFrameObjects[i];
+  }
+
+  /// getLocalFrameObjectCount - Return the number of objects allocated into
+  /// the local object block.
+  int64_t getLocalFrameObjectCount() { return LocalFrameObjects.size(); }
+
+  /// setLocalFrameSize - Set the size of the local object blob.
+  void setLocalFrameSize(int64_t sz) { LocalFrameSize = sz; }
+
+  /// getLocalFrameSize - Get the size of the local object blob.
+  int64_t getLocalFrameSize() const { return LocalFrameSize; }
+
+  /// setLocalFrameMaxAlign - Required alignment of the local object blob,
+  /// which is the strictest alignment of any object in it.
+  void setLocalFrameMaxAlign(unsigned Align) { LocalFrameMaxAlign = Align; }
+
+  /// getLocalFrameMaxAlign - Return the required alignment of the local
+  /// object blob.
+  unsigned getLocalFrameMaxAlign() const { return LocalFrameMaxAlign; }
+
+  /// getUseLocalStackAllocationBlock - Get whether the local allocation blob
+  /// should be allocated together or let PEI allocate the locals in it
+  /// directly.
+  bool getUseLocalStackAllocationBlock() {return UseLocalStackAllocationBlock;}
+
+  /// setUseLocalStackAllocationBlock - Set whether the local allocation blob
+  /// should be allocated together or let PEI allocate the locals in it
+  /// directly.
+  void setUseLocalStackAllocationBlock(bool v) {
+    UseLocalStackAllocationBlock = v;
+  }
+
+  /// isObjectPreAllocated - Return true if the object was pre-allocated into
+  /// the local block.
+  bool isObjectPreAllocated(int ObjectIdx) const {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    return Objects[ObjectIdx+NumFixedObjects].PreAllocated;
+  }
+
+  /// getObjectSize - Return the size of the specified object.
+  ///
+  int64_t getObjectSize(int ObjectIdx) const {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    return Objects[ObjectIdx+NumFixedObjects].Size;
+  }
+
+  /// setObjectSize - Change the size of the specified stack object.
+  void setObjectSize(int ObjectIdx, int64_t Size) {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    Objects[ObjectIdx+NumFixedObjects].Size = Size;
+  }
+
+  /// getObjectAlignment - Return the alignment of the specified stack object.
+  unsigned getObjectAlignment(int ObjectIdx) const {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    return Objects[ObjectIdx+NumFixedObjects].Alignment;
+  }
+
+  /// setObjectAlignment - Change the alignment of the specified stack object.
+  void setObjectAlignment(int ObjectIdx, unsigned Align) {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    Objects[ObjectIdx+NumFixedObjects].Alignment = Align;
+    MaxAlignment = std::max(MaxAlignment, Align);
+  }
+
+  /// NeedsStackProtector - Returns true if the object may need stack
+  /// protectors.
+  bool MayNeedStackProtector(int ObjectIdx) const {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    return Objects[ObjectIdx+NumFixedObjects].MayNeedSP;
+  }
+
+  /// getObjectOffset - Return the assigned stack offset of the specified object
+  /// from the incoming stack pointer.
+  ///
+  int64_t getObjectOffset(int ObjectIdx) const {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    assert(!isDeadObjectIndex(ObjectIdx) &&
+           "Getting frame offset for a dead object?");
+    return Objects[ObjectIdx+NumFixedObjects].SPOffset;
+  }
+
+  /// setObjectOffset - Set the stack frame offset of the specified object.  The
+  /// offset is relative to the stack pointer on entry to the function.
+  ///
+  void setObjectOffset(int ObjectIdx, int64_t SPOffset) {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    assert(!isDeadObjectIndex(ObjectIdx) &&
+           "Setting frame offset for a dead object?");
+    Objects[ObjectIdx+NumFixedObjects].SPOffset = SPOffset;
+  }
+
+  /// getStackSize - Return the number of bytes that must be allocated to hold
+  /// all of the fixed size frame objects.  This is only valid after
+  /// Prolog/Epilog code insertion has finalized the stack frame layout.
+  ///
+  uint64_t getStackSize() const { return StackSize; }
+
+  /// setStackSize - Set the size of the stack...
+  ///
+  void setStackSize(uint64_t Size) { StackSize = Size; }
+
+  /// getOffsetAdjustment - Return the correction for frame offsets.
+  ///
+  int getOffsetAdjustment() const { return OffsetAdjustment; }
+
+  /// setOffsetAdjustment - Set the correction for frame offsets.
+  ///
+  void setOffsetAdjustment(int Adj) { OffsetAdjustment = Adj; }
+
+  /// getMaxAlignment - Return the alignment in bytes that this function must be
+  /// aligned to, which is greater than the default stack alignment provided by
+  /// the target.
+  ///
+  unsigned getMaxAlignment() const { return MaxAlignment; }
+
+  /// setMaxAlignment - Set the preferred alignment.
+  ///
+  void setMaxAlignment(unsigned Align) { MaxAlignment = Align; }
+
+  /// AdjustsStack - Return true if this function adjusts the stack -- e.g.,
+  /// when calling another function. This is only valid during and after
+  /// prolog/epilog code insertion.
+  bool adjustsStack() const { return AdjustsStack; }
+  void setAdjustsStack(bool V) { AdjustsStack = V; }
+
+  /// hasCalls - Return true if the current function has any function calls.
+  bool hasCalls() const { return HasCalls; }
+  void setHasCalls(bool V) { HasCalls = V; }
+
+  /// getMaxCallFrameSize - Return the maximum size of a call frame that must be
+  /// allocated for an outgoing function call.  This is only available if
+  /// CallFrameSetup/Destroy pseudo instructions are used by the target, and
+  /// then only during or after prolog/epilog code insertion.
+  ///
+  unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; }
+  void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; }
+
+  /// CreateFixedObject - Create a new object at a fixed location on the stack.
+  /// All fixed objects should be created before other objects are created for
+  /// efficiency. By default, fixed objects are immutable. This returns an
+  /// index with a negative value.
+  ///
+  int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable);
+
+
+  /// isFixedObjectIndex - Returns true if the specified index corresponds to a
+  /// fixed stack object.
+  bool isFixedObjectIndex(int ObjectIdx) const {
+    return ObjectIdx < 0 && (ObjectIdx >= -(int)NumFixedObjects);
+  }
+
+  /// isImmutableObjectIndex - Returns true if the specified index corresponds
+  /// to an immutable object.
+  bool isImmutableObjectIndex(int ObjectIdx) const {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    return Objects[ObjectIdx+NumFixedObjects].isImmutable;
+  }
+
+  /// isSpillSlotObjectIndex - Returns true if the specified index corresponds
+  /// to a spill slot..
+  bool isSpillSlotObjectIndex(int ObjectIdx) const {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    return Objects[ObjectIdx+NumFixedObjects].isSpillSlot;;
+  }
+
+  /// isDeadObjectIndex - Returns true if the specified index corresponds to
+  /// a dead object.
+  bool isDeadObjectIndex(int ObjectIdx) const {
+    assert(unsigned(ObjectIdx+NumFixedObjects) < Objects.size() &&
+           "Invalid Object Idx!");
+    return Objects[ObjectIdx+NumFixedObjects].Size == ~0ULL;
+  }
+
+  /// CreateStackObject - Create a new statically sized stack object, returning
+  /// a nonnegative identifier to represent it.
+  ///
+  int CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS,
+                        bool MayNeedSP = false) {
+    assert(Size != 0 && "Cannot allocate zero size stack objects!");
+    Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP));
+    int Index = (int)Objects.size() - NumFixedObjects - 1;
+    assert(Index >= 0 && "Bad frame index!");
+    MaxAlignment = std::max(MaxAlignment, Alignment);
+    return Index;
+  }
+
+  /// CreateSpillStackObject - Create a new statically sized stack object that
+  /// represents a spill slot, returning a nonnegative identifier to represent
+  /// it.
+  ///
+  int CreateSpillStackObject(uint64_t Size, unsigned Alignment) {
+    CreateStackObject(Size, Alignment, true, false);
+    int Index = (int)Objects.size() - NumFixedObjects - 1;
+    MaxAlignment = std::max(MaxAlignment, Alignment);
+    return Index;
+  }
+
+  /// RemoveStackObject - Remove or mark dead a statically sized stack object.
+  ///
+  void RemoveStackObject(int ObjectIdx) {
+    // Mark it dead.
+    Objects[ObjectIdx+NumFixedObjects].Size = ~0ULL;
+  }
+
+  /// CreateVariableSizedObject - Notify the MachineFrameInfo object that a
+  /// variable sized object has been created.  This must be created whenever a
+  /// variable sized object is created, whether or not the index returned is
+  /// actually used.
+  ///
+  int CreateVariableSizedObject(unsigned Alignment) {
+    HasVarSizedObjects = true;
+    Objects.push_back(StackObject(0, Alignment, 0, false, false, true));
+    MaxAlignment = std::max(MaxAlignment, Alignment);
+    return (int)Objects.size()-NumFixedObjects-1;
+  }
+
+  /// getCalleeSavedInfo - Returns a reference to call saved info vector for the
+  /// current function.
+  const std::vector<CalleeSavedInfo> &getCalleeSavedInfo() const {
+    return CSInfo;
+  }
+
+  /// setCalleeSavedInfo - Used by prolog/epilog inserter to set the function's
+  /// callee saved information.
+  void setCalleeSavedInfo(const std::vector<CalleeSavedInfo> &CSI) {
+    CSInfo = CSI;
+  }
+
+  /// isCalleeSavedInfoValid - Has the callee saved info been calculated yet?
+  bool isCalleeSavedInfoValid() const { return CSIValid; }
+
+  void setCalleeSavedInfoValid(bool v) { CSIValid = v; }
+
+  /// getPristineRegs - Return a set of physical registers that are pristine on
+  /// entry to the MBB.
+  ///
+  /// Pristine registers hold a value that is useless to the current function,
+  /// but that must be preserved - they are callee saved registers that have not
+  /// been saved yet.
+  ///
+  /// Before the PrologueEpilogueInserter has placed the CSR spill code, this
+  /// method always returns an empty set.
+  BitVector getPristineRegs(const MachineBasicBlock *MBB) const;
+
+  /// print - Used by the MachineFunction printer to print information about
+  /// stack objects. Implemented in MachineFunction.cpp
+  ///
+  void print(const MachineFunction &MF, raw_ostream &OS) const;
+
+  /// dump - Print the function to stderr.
+  void dump(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineFunction.h b/final/include/llvm/CodeGen/MachineFunction.h
new file mode 100644
index 00000000000..f56c053e470
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineFunction.h
@@ -0,0 +1,478 @@
+//===-- llvm/CodeGen/MachineFunction.h --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect native machine code for a function.  This class contains a list of
+// MachineBasicBlock instances that make up the current compiled function.
+//
+// This class also contains pointers to various classes which hold
+// target-specific information about the generated code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEFUNCTION_H
+#define LLVM_CODEGEN_MACHINEFUNCTION_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Recycler.h"
+
+namespace llvm {
+
+class Value;
+class Function;
+class GCModuleInfo;
+class MachineRegisterInfo;
+class MachineFrameInfo;
+class MachineConstantPool;
+class MachineJumpTableInfo;
+class MachineModuleInfo;
+class MCContext;
+class Pass;
+class TargetMachine;
+class TargetRegisterClass;
+struct MachinePointerInfo;
+
+template <>
+struct ilist_traits<MachineBasicBlock>
+    : public ilist_default_traits<MachineBasicBlock> {
+  mutable ilist_half_node<MachineBasicBlock> Sentinel;
+public:
+  MachineBasicBlock *createSentinel() const {
+    return static_cast<MachineBasicBlock*>(&Sentinel);
+  }
+  void destroySentinel(MachineBasicBlock *) const {}
+
+  MachineBasicBlock *provideInitialHead() const { return createSentinel(); }
+  MachineBasicBlock *ensureHead(MachineBasicBlock*) const {
+    return createSentinel();
+  }
+  static void noteHead(MachineBasicBlock*, MachineBasicBlock*) {}
+
+  void addNodeToList(MachineBasicBlock* MBB);
+  void removeNodeFromList(MachineBasicBlock* MBB);
+  void deleteNode(MachineBasicBlock *MBB);
+private:
+  void createNode(const MachineBasicBlock &);
+};
+
+/// MachineFunctionInfo - This class can be derived from and used by targets to
+/// hold private target-specific information for each MachineFunction.  Objects
+/// of type are accessed/created with MF::getInfo and destroyed when the
+/// MachineFunction is destroyed.
+struct MachineFunctionInfo {
+  virtual ~MachineFunctionInfo();
+};
+
+class MachineFunction {
+  const Function *Fn;
+  const TargetMachine &Target;
+  MCContext &Ctx;
+  MachineModuleInfo &MMI;
+  GCModuleInfo *GMI;
+  
+  // RegInfo - Information about each register in use in the function.
+  MachineRegisterInfo *RegInfo;
+
+  // Used to keep track of target-specific per-machine function information for
+  // the target implementation.
+  MachineFunctionInfo *MFInfo;
+
+  // Keep track of objects allocated on the stack.
+  MachineFrameInfo *FrameInfo;
+
+  // Keep track of constants which are spilled to memory
+  MachineConstantPool *ConstantPool;
+  
+  // Keep track of jump tables for switch instructions
+  MachineJumpTableInfo *JumpTableInfo;
+
+  // Function-level unique numbering for MachineBasicBlocks.  When a
+  // MachineBasicBlock is inserted into a MachineFunction is it automatically
+  // numbered and this vector keeps track of the mapping from ID's to MBB's.
+  std::vector<MachineBasicBlock*> MBBNumbering;
+
+  // Pool-allocate MachineFunction-lifetime and IR objects.
+  BumpPtrAllocator Allocator;
+
+  // Allocation management for instructions in function.
+  Recycler<MachineInstr> InstructionRecycler;
+
+  // Allocation management for basic blocks in function.
+  Recycler<MachineBasicBlock> BasicBlockRecycler;
+
+  // List of machine basic blocks in function
+  typedef ilist<MachineBasicBlock> BasicBlockListType;
+  BasicBlockListType BasicBlocks;
+
+  /// FunctionNumber - This provides a unique ID for each function emitted in
+  /// this translation unit.
+  ///
+  unsigned FunctionNumber;
+  
+  /// Alignment - The alignment of the function.
+  unsigned Alignment;
+
+  /// CallsSetJmp - True if the function calls setjmp or sigsetjmp. This is used
+  /// to limit optimizations which cannot reason about the control flow of
+  /// setjmp.
+  bool CallsSetJmp;
+
+  MachineFunction(const MachineFunction &); // DO NOT IMPLEMENT
+  void operator=(const MachineFunction&);   // DO NOT IMPLEMENT
+public:
+  MachineFunction(const Function *Fn, const TargetMachine &TM,
+                  unsigned FunctionNum, MachineModuleInfo &MMI,
+                  GCModuleInfo* GMI);
+  ~MachineFunction();
+
+  MachineModuleInfo &getMMI() const { return MMI; }
+  GCModuleInfo *getGMI() const { return GMI; }
+  MCContext &getContext() const { return Ctx; }
+  
+  /// getFunction - Return the LLVM function that this machine code represents
+  ///
+  const Function *getFunction() const { return Fn; }
+
+  /// getFunctionNumber - Return a unique ID for the current function.
+  ///
+  unsigned getFunctionNumber() const { return FunctionNumber; }
+  
+  /// getTarget - Return the target machine this machine code is compiled with
+  ///
+  const TargetMachine &getTarget() const { return Target; }
+
+  /// getRegInfo - Return information about the registers currently in use.
+  ///
+  MachineRegisterInfo &getRegInfo() { return *RegInfo; }
+  const MachineRegisterInfo &getRegInfo() const { return *RegInfo; }
+
+  /// getFrameInfo - Return the frame info object for the current function.
+  /// This object contains information about objects allocated on the stack
+  /// frame of the current function in an abstract way.
+  ///
+  MachineFrameInfo *getFrameInfo() { return FrameInfo; }
+  const MachineFrameInfo *getFrameInfo() const { return FrameInfo; }
+
+  /// getJumpTableInfo - Return the jump table info object for the current 
+  /// function.  This object contains information about jump tables in the
+  /// current function.  If the current function has no jump tables, this will
+  /// return null.
+  const MachineJumpTableInfo *getJumpTableInfo() const { return JumpTableInfo; }
+  MachineJumpTableInfo *getJumpTableInfo() { return JumpTableInfo; }
+
+  /// getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it
+  /// does already exist, allocate one.
+  MachineJumpTableInfo *getOrCreateJumpTableInfo(unsigned JTEntryKind);
+
+  
+  /// getConstantPool - Return the constant pool object for the current
+  /// function.
+  ///
+  MachineConstantPool *getConstantPool() { return ConstantPool; }
+  const MachineConstantPool *getConstantPool() const { return ConstantPool; }
+
+  /// getAlignment - Return the alignment (log2, not bytes) of the function.
+  ///
+  unsigned getAlignment() const { return Alignment; }
+
+  /// setAlignment - Set the alignment (log2, not bytes) of the function.
+  ///
+  void setAlignment(unsigned A) { Alignment = A; }
+
+  /// EnsureAlignment - Make sure the function is at least 'A' bits aligned.
+  void EnsureAlignment(unsigned A) {
+    if (Alignment < A) Alignment = A;
+  }
+
+  /// callsSetJmp - Returns true if the function calls setjmp or sigsetjmp.
+  bool callsSetJmp() const {
+    return CallsSetJmp;
+  }
+
+  /// setCallsSetJmp - Set a flag that indicates if there's a call to setjmp or
+  /// sigsetjmp.
+  void setCallsSetJmp(bool B) {
+    CallsSetJmp = B;
+  }
+  
+  /// getInfo - Keep track of various per-function pieces of information for
+  /// backends that would like to do so.
+  ///
+  template<typename Ty>
+  Ty *getInfo() {
+    if (!MFInfo) {
+        // This should be just `new (Allocator.Allocate<Ty>()) Ty(*this)', but
+        // that apparently breaks GCC 3.3.
+        Ty *Loc = static_cast<Ty*>(Allocator.Allocate(sizeof(Ty),
+                                                      AlignOf<Ty>::Alignment));
+        MFInfo = new (Loc) Ty(*this);
+    }
+    return static_cast<Ty*>(MFInfo);
+  }
+
+  template<typename Ty>
+  const Ty *getInfo() const {
+     return const_cast<MachineFunction*>(this)->getInfo<Ty>();
+  }
+
+  /// getBlockNumbered - MachineBasicBlocks are automatically numbered when they
+  /// are inserted into the machine function.  The block number for a machine
+  /// basic block can be found by using the MBB::getBlockNumber method, this
+  /// method provides the inverse mapping.
+  ///
+  MachineBasicBlock *getBlockNumbered(unsigned N) const {
+    assert(N < MBBNumbering.size() && "Illegal block number");
+    assert(MBBNumbering[N] && "Block was removed from the machine function!");
+    return MBBNumbering[N];
+  }
+
+  /// getNumBlockIDs - Return the number of MBB ID's allocated.
+  ///
+  unsigned getNumBlockIDs() const { return (unsigned)MBBNumbering.size(); }
+  
+  /// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
+  /// recomputes them.  This guarantees that the MBB numbers are sequential,
+  /// dense, and match the ordering of the blocks within the function.  If a
+  /// specific MachineBasicBlock is specified, only that block and those after
+  /// it are renumbered.
+  void RenumberBlocks(MachineBasicBlock *MBBFrom = 0);
+  
+  /// print - Print out the MachineFunction in a format suitable for debugging
+  /// to the specified stream.
+  ///
+  void print(raw_ostream &OS, SlotIndexes* = 0) const;
+
+  /// viewCFG - This function is meant for use from the debugger.  You can just
+  /// say 'call F->viewCFG()' and a ghostview window should pop up from the
+  /// program, displaying the CFG of the current function with the code for each
+  /// basic block inside.  This depends on there being a 'dot' and 'gv' program
+  /// in your path.
+  ///
+  void viewCFG() const;
+
+  /// viewCFGOnly - This function is meant for use from the debugger.  It works
+  /// just like viewCFG, but it does not include the contents of basic blocks
+  /// into the nodes, just the label.  If you are only interested in the CFG
+  /// this can make the graph smaller.
+  ///
+  void viewCFGOnly() const;
+
+  /// dump - Print the current MachineFunction to cerr, useful for debugger use.
+  ///
+  void dump() const;
+
+  /// verify - Run the current MachineFunction through the machine code
+  /// verifier, useful for debugger use.
+  void verify(Pass *p = NULL, const char *Banner = NULL) const;
+
+  // Provide accessors for the MachineBasicBlock list...
+  typedef BasicBlockListType::iterator iterator;
+  typedef BasicBlockListType::const_iterator const_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef std::reverse_iterator<iterator>             reverse_iterator;
+
+  /// addLiveIn - Add the specified physical register as a live-in value and
+  /// create a corresponding virtual register for it.
+  unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC);
+
+  //===--------------------------------------------------------------------===//
+  // BasicBlock accessor functions.
+  //
+  iterator                 begin()       { return BasicBlocks.begin(); }
+  const_iterator           begin() const { return BasicBlocks.begin(); }
+  iterator                 end  ()       { return BasicBlocks.end();   }
+  const_iterator           end  () const { return BasicBlocks.end();   }
+
+  reverse_iterator        rbegin()       { return BasicBlocks.rbegin(); }
+  const_reverse_iterator  rbegin() const { return BasicBlocks.rbegin(); }
+  reverse_iterator        rend  ()       { return BasicBlocks.rend();   }
+  const_reverse_iterator  rend  () const { return BasicBlocks.rend();   }
+
+  unsigned                  size() const { return (unsigned)BasicBlocks.size();}
+  bool                     empty() const { return BasicBlocks.empty(); }
+  const MachineBasicBlock &front() const { return BasicBlocks.front(); }
+        MachineBasicBlock &front()       { return BasicBlocks.front(); }
+  const MachineBasicBlock & back() const { return BasicBlocks.back(); }
+        MachineBasicBlock & back()       { return BasicBlocks.back(); }
+
+  void push_back (MachineBasicBlock *MBB) { BasicBlocks.push_back (MBB); }
+  void push_front(MachineBasicBlock *MBB) { BasicBlocks.push_front(MBB); }
+  void insert(iterator MBBI, MachineBasicBlock *MBB) {
+    BasicBlocks.insert(MBBI, MBB);
+  }
+  void splice(iterator InsertPt, iterator MBBI) {
+    BasicBlocks.splice(InsertPt, BasicBlocks, MBBI);
+  }
+  void splice(iterator InsertPt, iterator MBBI, iterator MBBE) {
+    BasicBlocks.splice(InsertPt, BasicBlocks, MBBI, MBBE);
+  }
+
+  void remove(iterator MBBI) {
+    BasicBlocks.remove(MBBI);
+  }
+  void erase(iterator MBBI) {
+    BasicBlocks.erase(MBBI);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Internal functions used to automatically number MachineBasicBlocks
+  //
+
+  /// getNextMBBNumber - Returns the next unique number to be assigned
+  /// to a MachineBasicBlock in this MachineFunction.
+  ///
+  unsigned addToMBBNumbering(MachineBasicBlock *MBB) {
+    MBBNumbering.push_back(MBB);
+    return (unsigned)MBBNumbering.size()-1;
+  }
+
+  /// removeFromMBBNumbering - Remove the specific machine basic block from our
+  /// tracker, this is only really to be used by the MachineBasicBlock
+  /// implementation.
+  void removeFromMBBNumbering(unsigned N) {
+    assert(N < MBBNumbering.size() && "Illegal basic block #");
+    MBBNumbering[N] = 0;
+  }
+
+  /// CreateMachineInstr - Allocate a new MachineInstr. Use this instead
+  /// of `new MachineInstr'.
+  ///
+  MachineInstr *CreateMachineInstr(const TargetInstrDesc &TID,
+                                   DebugLoc DL,
+                                   bool NoImp = false);
+
+  /// CloneMachineInstr - Create a new MachineInstr which is a copy of the
+  /// 'Orig' instruction, identical in all ways except the instruction
+  /// has no parent, prev, or next.
+  ///
+  /// See also TargetInstrInfo::duplicate() for target-specific fixes to cloned
+  /// instructions.
+  MachineInstr *CloneMachineInstr(const MachineInstr *Orig);
+
+  /// DeleteMachineInstr - Delete the given MachineInstr.
+  ///
+  void DeleteMachineInstr(MachineInstr *MI);
+
+  /// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this
+  /// instead of `new MachineBasicBlock'.
+  ///
+  MachineBasicBlock *CreateMachineBasicBlock(const BasicBlock *bb = 0);
+
+  /// DeleteMachineBasicBlock - Delete the given MachineBasicBlock.
+  ///
+  void DeleteMachineBasicBlock(MachineBasicBlock *MBB);
+
+  /// getMachineMemOperand - Allocate a new MachineMemOperand.
+  /// MachineMemOperands are owned by the MachineFunction and need not be
+  /// explicitly deallocated.
+  MachineMemOperand *getMachineMemOperand(MachinePointerInfo PtrInfo,
+                                          unsigned f, uint64_t s,
+                                          unsigned base_alignment,
+                                          const MDNode *TBAAInfo = 0);
+  
+  /// getMachineMemOperand - Allocate a new MachineMemOperand by copying
+  /// an existing one, adjusting by an offset and using the given size.
+  /// MachineMemOperands are owned by the MachineFunction and need not be
+  /// explicitly deallocated.
+  MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
+                                          int64_t Offset, uint64_t Size);
+
+  /// allocateMemRefsArray - Allocate an array to hold MachineMemOperand
+  /// pointers.  This array is owned by the MachineFunction.
+  MachineInstr::mmo_iterator allocateMemRefsArray(unsigned long Num);
+
+  /// extractLoadMemRefs - Allocate an array and populate it with just the
+  /// load information from the given MachineMemOperand sequence.
+  std::pair<MachineInstr::mmo_iterator,
+            MachineInstr::mmo_iterator>
+    extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
+                       MachineInstr::mmo_iterator End);
+
+  /// extractStoreMemRefs - Allocate an array and populate it with just the
+  /// store information from the given MachineMemOperand sequence.
+  std::pair<MachineInstr::mmo_iterator,
+            MachineInstr::mmo_iterator>
+    extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
+                        MachineInstr::mmo_iterator End);
+
+  //===--------------------------------------------------------------------===//
+  // Label Manipulation.
+  //
+  
+  /// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
+  /// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
+  /// normal 'L' label is returned.
+  MCSymbol *getJTISymbol(unsigned JTI, MCContext &Ctx, 
+                         bool isLinkerPrivate = false) const;
+  
+  /// getPICBaseSymbol - Return a function-local symbol to represent the PIC
+  /// base.
+  MCSymbol *getPICBaseSymbol() const;
+};
+
+//===--------------------------------------------------------------------===//
+// GraphTraits specializations for function basic block graphs (CFGs)
+//===--------------------------------------------------------------------===//
+
+// Provide specializations of GraphTraits to be able to treat a
+// machine function as a graph of machine basic blocks... these are
+// the same as the machine basic block iterators, except that the root
+// node is implicitly the first node of the function.
+//
+template <> struct GraphTraits<MachineFunction*> :
+  public GraphTraits<MachineBasicBlock*> {
+  static NodeType *getEntryNode(MachineFunction *F) {
+    return &F->front();
+  }
+
+  // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
+  typedef MachineFunction::iterator nodes_iterator;
+  static nodes_iterator nodes_begin(MachineFunction *F) { return F->begin(); }
+  static nodes_iterator nodes_end  (MachineFunction *F) { return F->end(); }
+};
+template <> struct GraphTraits<const MachineFunction*> :
+  public GraphTraits<const MachineBasicBlock*> {
+  static NodeType *getEntryNode(const MachineFunction *F) {
+    return &F->front();
+  }
+
+  // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
+  typedef MachineFunction::const_iterator nodes_iterator;
+  static nodes_iterator nodes_begin(const MachineFunction *F) {
+    return F->begin();
+  }
+  static nodes_iterator nodes_end  (const MachineFunction *F) {
+    return F->end();
+  }
+};
+
+
+// Provide specializations of GraphTraits to be able to treat a function as a
+// graph of basic blocks... and to walk it in inverse order.  Inverse order for
+// a function is considered to be when traversing the predecessor edges of a BB
+// instead of the successor edges.
+//
+template <> struct GraphTraits<Inverse<MachineFunction*> > :
+  public GraphTraits<Inverse<MachineBasicBlock*> > {
+  static NodeType *getEntryNode(Inverse<MachineFunction*> G) {
+    return &G.Graph->front();
+  }
+};
+template <> struct GraphTraits<Inverse<const MachineFunction*> > :
+  public GraphTraits<Inverse<const MachineBasicBlock*> > {
+  static NodeType *getEntryNode(Inverse<const MachineFunction *> G) {
+    return &G.Graph->front();
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineFunctionAnalysis.h b/final/include/llvm/CodeGen/MachineFunctionAnalysis.h
new file mode 100644
index 00000000000..50676ad4ad4
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineFunctionAnalysis.h
@@ -0,0 +1,54 @@
+//===-- MachineFunctionAnalysis.h - Owner of MachineFunctions ----*-C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MachineFunctionAnalysis class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINE_FUNCTION_ANALYSIS_H
+#define LLVM_CODEGEN_MACHINE_FUNCTION_ANALYSIS_H
+
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class MachineFunction;
+
+/// MachineFunctionAnalysis - This class is a Pass that manages a
+/// MachineFunction object.
+struct MachineFunctionAnalysis : public FunctionPass {
+private:
+  const TargetMachine &TM;
+  CodeGenOpt::Level OptLevel;
+  MachineFunction *MF;
+  unsigned NextFnNum;
+public:
+  static char ID;
+  explicit MachineFunctionAnalysis(const TargetMachine &tm,
+                                   CodeGenOpt::Level OL = CodeGenOpt::Default);
+  ~MachineFunctionAnalysis();
+
+  MachineFunction &getMF() const { return *MF; }
+  CodeGenOpt::Level getOptLevel() const { return OptLevel; }
+  
+  virtual const char* getPassName() const {
+    return "Machine Function Analysis";
+  }
+
+private:
+  virtual bool doInitialization(Module &M);
+  virtual bool runOnFunction(Function &F);
+  virtual void releaseMemory();
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineFunctionPass.h b/final/include/llvm/CodeGen/MachineFunctionPass.h
new file mode 100644
index 00000000000..b7bf0a36c44
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineFunctionPass.h
@@ -0,0 +1,59 @@
+//===-- MachineFunctionPass.h - Pass for MachineFunctions --------*-C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachineFunctionPass class.  MachineFunctionPass's are
+// just FunctionPass's, except they operate on machine code as part of a code
+// generator.  Because they operate on machine code, not the LLVM
+// representation, MachineFunctionPass's are not allowed to modify the LLVM
+// representation.  Due to this limitation, the MachineFunctionPass class takes
+// care of declaring that no LLVM passes are invalidated.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINE_FUNCTION_PASS_H
+#define LLVM_CODEGEN_MACHINE_FUNCTION_PASS_H
+
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+class MachineFunction;
+
+/// MachineFunctionPass - This class adapts the FunctionPass interface to
+/// allow convenient creation of passes that operate on the MachineFunction
+/// representation. Instead of overriding runOnFunction, subclasses
+/// override runOnMachineFunction.
+class MachineFunctionPass : public FunctionPass {
+protected:
+  explicit MachineFunctionPass(char &ID) : FunctionPass(ID) {}
+
+  /// runOnMachineFunction - This method must be overloaded to perform the
+  /// desired machine code transformation or analysis.
+  ///
+  virtual bool runOnMachineFunction(MachineFunction &MF) = 0;
+
+  /// getAnalysisUsage - Subclasses that override getAnalysisUsage
+  /// must call this.
+  ///
+  /// For MachineFunctionPasses, calling AU.preservesCFG() indicates that
+  /// the pass does not modify the MachineBasicBlock CFG.
+  ///
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+private:
+  /// createPrinterPass - Get a machine function printer pass.
+  virtual Pass *createPrinterPass(raw_ostream &O,
+                                  const std::string &Banner) const;
+
+  virtual bool runOnFunction(Function &F);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineInstr.h b/final/include/llvm/CodeGen/MachineInstr.h
new file mode 100644
index 00000000000..0f69a7789cf
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineInstr.h
@@ -0,0 +1,578 @@
+//===-- llvm/CodeGen/MachineInstr.h - MachineInstr class --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MachineInstr class, which is the
+// basic representation for all target dependent machine instructions used by
+// the back end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEINSTR_H
+#define LLVM_CODEGEN_MACHINEINSTR_H
+
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Target/TargetInstrDesc.h"
+#include "llvm/Target/TargetOpcodes.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/Support/DebugLoc.h"
+#include <vector>
+
+namespace llvm {
+
+template <typename T> class SmallVectorImpl;
+class AliasAnalysis;
+class TargetInstrDesc;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class MachineFunction;
+class MachineMemOperand;
+
+//===----------------------------------------------------------------------===//
+/// MachineInstr - Representation of each machine instruction.
+///
+class MachineInstr : public ilist_node<MachineInstr> {
+public:
+  typedef MachineMemOperand **mmo_iterator;
+
+  /// Flags to specify different kinds of comments to output in
+  /// assembly code.  These flags carry semantic information not
+  /// otherwise easily derivable from the IR text.
+  ///
+  enum CommentFlag {
+    ReloadReuse = 0x1
+  };
+
+  enum MIFlag {
+    NoFlags    = 0,
+    FrameSetup = 1 << 0                 // Instruction is used as a part of
+                                        // function frame setup code.
+  };
+private:
+  const TargetInstrDesc *TID;           // Instruction descriptor.
+  uint16_t NumImplicitOps;              // Number of implicit operands (which
+                                        // are determined at construction time).
+
+  uint8_t Flags;                        // Various bits of additional
+                                        // information about machine
+                                        // instruction.
+
+  uint8_t AsmPrinterFlags;              // Various bits of information used by
+                                        // the AsmPrinter to emit helpful
+                                        // comments.  This is *not* semantic
+                                        // information.  Do not use this for
+                                        // anything other than to convey comment
+                                        // information to AsmPrinter.
+
+  std::vector<MachineOperand> Operands; // the operands
+  mmo_iterator MemRefs;                 // information on memory references
+  mmo_iterator MemRefsEnd;
+  MachineBasicBlock *Parent;            // Pointer to the owning basic block.
+  DebugLoc debugLoc;                    // Source line information.
+
+  // OperandComplete - Return true if it's illegal to add a new operand
+  bool OperandsComplete() const;
+
+  MachineInstr(const MachineInstr&);   // DO NOT IMPLEMENT
+  void operator=(const MachineInstr&); // DO NOT IMPLEMENT
+
+  // Intrusive list support
+  friend struct ilist_traits<MachineInstr>;
+  friend struct ilist_traits<MachineBasicBlock>;
+  void setParent(MachineBasicBlock *P) { Parent = P; }
+
+  /// MachineInstr ctor - This constructor creates a copy of the given
+  /// MachineInstr in the given MachineFunction.
+  MachineInstr(MachineFunction &, const MachineInstr &);
+
+  /// MachineInstr ctor - This constructor creates a dummy MachineInstr with
+  /// TID NULL and no operands.
+  MachineInstr();
+
+  // The next two constructors have DebugLoc and non-DebugLoc versions;
+  // over time, the non-DebugLoc versions should be phased out and eventually
+  // removed.
+
+  /// MachineInstr ctor - This constructor creates a MachineInstr and adds the
+  /// implicit operands.  It reserves space for the number of operands specified
+  /// by the TargetInstrDesc.  The version with a DebugLoc should be preferred.
+  explicit MachineInstr(const TargetInstrDesc &TID, bool NoImp = false);
+
+  /// MachineInstr ctor - Work exactly the same as the ctor above, except that
+  /// the MachineInstr is created and added to the end of the specified basic
+  /// block.  The version with a DebugLoc should be preferred.
+  MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &TID);
+
+  /// MachineInstr ctor - This constructor create a MachineInstr and add the
+  /// implicit operands.  It reserves space for number of operands specified by
+  /// TargetInstrDesc.  An explicit DebugLoc is supplied.
+  explicit MachineInstr(const TargetInstrDesc &TID, const DebugLoc dl, 
+                        bool NoImp = false);
+
+  /// MachineInstr ctor - Work exactly the same as the ctor above, except that
+  /// the MachineInstr is created and added to the end of the specified basic
+  /// block.
+  MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, 
+               const TargetInstrDesc &TID);
+
+  ~MachineInstr();
+
+  // MachineInstrs are pool-allocated and owned by MachineFunction.
+  friend class MachineFunction;
+
+public:
+  const MachineBasicBlock* getParent() const { return Parent; }
+  MachineBasicBlock* getParent() { return Parent; }
+
+  /// getAsmPrinterFlags - Return the asm printer flags bitvector.
+  ///
+  uint8_t getAsmPrinterFlags() const { return AsmPrinterFlags; }
+
+  /// clearAsmPrinterFlags - clear the AsmPrinter bitvector
+  ///
+  void clearAsmPrinterFlags() { AsmPrinterFlags = 0; }
+  
+  /// getAsmPrinterFlag - Return whether an AsmPrinter flag is set.
+  ///
+  bool getAsmPrinterFlag(CommentFlag Flag) const {
+    return AsmPrinterFlags & Flag;
+  }
+
+  /// setAsmPrinterFlag - Set a flag for the AsmPrinter.
+  ///
+  void setAsmPrinterFlag(CommentFlag Flag) {
+    AsmPrinterFlags |= (uint8_t)Flag;
+  }
+
+  /// getFlags - Return the MI flags bitvector.
+  uint8_t getFlags() const {
+    return Flags;
+  }
+
+  /// getFlag - Return whether an MI flag is set.
+  bool getFlag(MIFlag Flag) const {
+    return Flags & Flag;
+  }
+
+  /// setFlag - Set a MI flag.
+  void setFlag(MIFlag Flag) {
+    Flags |= (uint8_t)Flag;
+  }
+
+  void setFlags(unsigned flags) {
+    Flags = flags;
+  }
+  
+  /// clearAsmPrinterFlag - clear specific AsmPrinter flags
+  ///
+  void clearAsmPrinterFlag(CommentFlag Flag) {
+    AsmPrinterFlags &= ~Flag;
+  }
+
+  /// getDebugLoc - Returns the debug location id of this MachineInstr.
+  ///
+  DebugLoc getDebugLoc() const { return debugLoc; }
+
+  /// getDesc - Returns the target instruction descriptor of this
+  /// MachineInstr.
+  const TargetInstrDesc &getDesc() const { return *TID; }
+
+  /// getOpcode - Returns the opcode of this MachineInstr.
+  ///
+  int getOpcode() const { return TID->Opcode; }
+
+  /// Access to explicit operands of the instruction.
+  ///
+  unsigned getNumOperands() const { return (unsigned)Operands.size(); }
+
+  const MachineOperand& getOperand(unsigned i) const {
+    assert(i < getNumOperands() && "getOperand() out of range!");
+    return Operands[i];
+  }
+  MachineOperand& getOperand(unsigned i) {
+    assert(i < getNumOperands() && "getOperand() out of range!");
+    return Operands[i];
+  }
+
+  /// getNumExplicitOperands - Returns the number of non-implicit operands.
+  ///
+  unsigned getNumExplicitOperands() const;
+
+  /// iterator/begin/end - Iterate over all operands of a machine instruction.
+  typedef std::vector<MachineOperand>::iterator mop_iterator;
+  typedef std::vector<MachineOperand>::const_iterator const_mop_iterator;
+
+  mop_iterator operands_begin() { return Operands.begin(); }
+  mop_iterator operands_end() { return Operands.end(); }
+
+  const_mop_iterator operands_begin() const { return Operands.begin(); }
+  const_mop_iterator operands_end() const { return Operands.end(); }
+
+  /// Access to memory operands of the instruction
+  mmo_iterator memoperands_begin() const { return MemRefs; }
+  mmo_iterator memoperands_end() const { return MemRefsEnd; }
+  bool memoperands_empty() const { return MemRefsEnd == MemRefs; }
+
+  /// hasOneMemOperand - Return true if this instruction has exactly one
+  /// MachineMemOperand.
+  bool hasOneMemOperand() const {
+    return MemRefsEnd - MemRefs == 1;
+  }
+
+  enum MICheckType {
+    CheckDefs,      // Check all operands for equality
+    IgnoreDefs,     // Ignore all definitions
+    IgnoreVRegDefs  // Ignore virtual register definitions
+  };
+
+  /// isIdenticalTo - Return true if this instruction is identical to (same
+  /// opcode and same operands as) the specified instruction.
+  bool isIdenticalTo(const MachineInstr *Other,
+                     MICheckType Check = CheckDefs) const;
+
+  /// removeFromParent - This method unlinks 'this' from the containing basic
+  /// block, and returns it, but does not delete it.
+  MachineInstr *removeFromParent();
+  
+  /// eraseFromParent - This method unlinks 'this' from the containing basic
+  /// block and deletes it.
+  void eraseFromParent();
+
+  /// isLabel - Returns true if the MachineInstr represents a label.
+  ///
+  bool isLabel() const {
+    return getOpcode() == TargetOpcode::PROLOG_LABEL ||
+           getOpcode() == TargetOpcode::EH_LABEL ||
+           getOpcode() == TargetOpcode::GC_LABEL;
+  }
+  
+  bool isPrologLabel() const {
+    return getOpcode() == TargetOpcode::PROLOG_LABEL;
+  }
+  bool isEHLabel() const { return getOpcode() == TargetOpcode::EH_LABEL; }
+  bool isGCLabel() const { return getOpcode() == TargetOpcode::GC_LABEL; }
+  bool isDebugValue() const { return getOpcode() == TargetOpcode::DBG_VALUE; }
+  
+  bool isPHI() const { return getOpcode() == TargetOpcode::PHI; }
+  bool isKill() const { return getOpcode() == TargetOpcode::KILL; }
+  bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; }
+  bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; }
+  bool isStackAligningInlineAsm() const;
+  bool isInsertSubreg() const {
+    return getOpcode() == TargetOpcode::INSERT_SUBREG;
+  }
+  bool isSubregToReg() const {
+    return getOpcode() == TargetOpcode::SUBREG_TO_REG;
+  }
+  bool isRegSequence() const {
+    return getOpcode() == TargetOpcode::REG_SEQUENCE;
+  }
+  bool isCopy() const {
+    return getOpcode() == TargetOpcode::COPY;
+  }
+
+  /// isCopyLike - Return true if the instruction behaves like a copy.
+  /// This does not include native copy instructions.
+  bool isCopyLike() const {
+    return isCopy() || isSubregToReg();
+  }
+
+  /// isIdentityCopy - Return true is the instruction is an identity copy.
+  bool isIdentityCopy() const {
+    return isCopy() && getOperand(0).getReg() == getOperand(1).getReg() &&
+      getOperand(0).getSubReg() == getOperand(1).getSubReg();
+  }
+
+  /// readsRegister - Return true if the MachineInstr reads the specified
+  /// register. If TargetRegisterInfo is passed, then it also checks if there
+  /// is a read of a super-register.
+  /// This does not count partial redefines of virtual registers as reads:
+  ///   %reg1024:6 = OP.
+  bool readsRegister(unsigned Reg, const TargetRegisterInfo *TRI = NULL) const {
+    return findRegisterUseOperandIdx(Reg, false, TRI) != -1;
+  }
+
+  /// readsVirtualRegister - Return true if the MachineInstr reads the specified
+  /// virtual register. Take into account that a partial define is a
+  /// read-modify-write operation.
+  bool readsVirtualRegister(unsigned Reg) const {
+    return readsWritesVirtualRegister(Reg).first;
+  }
+
+  /// readsWritesVirtualRegister - Return a pair of bools (reads, writes)
+  /// indicating if this instruction reads or writes Reg. This also considers
+  /// partial defines.
+  /// If Ops is not null, all operand indices for Reg are added.
+  std::pair<bool,bool> readsWritesVirtualRegister(unsigned Reg,
+                                      SmallVectorImpl<unsigned> *Ops = 0) const;
+
+  /// killsRegister - Return true if the MachineInstr kills the specified
+  /// register. If TargetRegisterInfo is passed, then it also checks if there is
+  /// a kill of a super-register.
+  bool killsRegister(unsigned Reg, const TargetRegisterInfo *TRI = NULL) const {
+    return findRegisterUseOperandIdx(Reg, true, TRI) != -1;
+  }
+
+  /// definesRegister - Return true if the MachineInstr fully defines the
+  /// specified register. If TargetRegisterInfo is passed, then it also checks
+  /// if there is a def of a super-register.
+  /// NOTE: It's ignoring subreg indices on virtual registers.
+  bool definesRegister(unsigned Reg, const TargetRegisterInfo *TRI=NULL) const {
+    return findRegisterDefOperandIdx(Reg, false, false, TRI) != -1;
+  }
+
+  /// modifiesRegister - Return true if the MachineInstr modifies (fully define
+  /// or partially define) the specified register.
+  /// NOTE: It's ignoring subreg indices on virtual registers.
+  bool modifiesRegister(unsigned Reg, const TargetRegisterInfo *TRI) const {
+    return findRegisterDefOperandIdx(Reg, false, true, TRI) != -1;
+  }
+
+  /// registerDefIsDead - Returns true if the register is dead in this machine
+  /// instruction. If TargetRegisterInfo is passed, then it also checks
+  /// if there is a dead def of a super-register.
+  bool registerDefIsDead(unsigned Reg,
+                         const TargetRegisterInfo *TRI = NULL) const {
+    return findRegisterDefOperandIdx(Reg, true, false, TRI) != -1;
+  }
+
+  /// findRegisterUseOperandIdx() - Returns the operand index that is a use of
+  /// the specific register or -1 if it is not found. It further tightens
+  /// the search criteria to a use that kills the register if isKill is true.
+  int findRegisterUseOperandIdx(unsigned Reg, bool isKill = false,
+                                const TargetRegisterInfo *TRI = NULL) const;
+
+  /// findRegisterUseOperand - Wrapper for findRegisterUseOperandIdx, it returns
+  /// a pointer to the MachineOperand rather than an index.
+  MachineOperand *findRegisterUseOperand(unsigned Reg, bool isKill = false,
+                                         const TargetRegisterInfo *TRI = NULL) {
+    int Idx = findRegisterUseOperandIdx(Reg, isKill, TRI);
+    return (Idx == -1) ? NULL : &getOperand(Idx);
+  }
+  
+  /// findRegisterDefOperandIdx() - Returns the operand index that is a def of
+  /// the specified register or -1 if it is not found. If isDead is true, defs
+  /// that are not dead are skipped. If Overlap is true, then it also looks for
+  /// defs that merely overlap the specified register. If TargetRegisterInfo is
+  /// non-null, then it also checks if there is a def of a super-register.
+  int findRegisterDefOperandIdx(unsigned Reg,
+                                bool isDead = false, bool Overlap = false,
+                                const TargetRegisterInfo *TRI = NULL) const;
+
+  /// findRegisterDefOperand - Wrapper for findRegisterDefOperandIdx, it returns
+  /// a pointer to the MachineOperand rather than an index.
+  MachineOperand *findRegisterDefOperand(unsigned Reg, bool isDead = false,
+                                         const TargetRegisterInfo *TRI = NULL) {
+    int Idx = findRegisterDefOperandIdx(Reg, isDead, false, TRI);
+    return (Idx == -1) ? NULL : &getOperand(Idx);
+  }
+
+  /// findFirstPredOperandIdx() - Find the index of the first operand in the
+  /// operand list that is used to represent the predicate. It returns -1 if
+  /// none is found.
+  int findFirstPredOperandIdx() const;
+  
+  /// isRegTiedToUseOperand - Given the index of a register def operand,
+  /// check if the register def is tied to a source operand, due to either
+  /// two-address elimination or inline assembly constraints. Returns the
+  /// first tied use operand index by reference is UseOpIdx is not null.
+  bool isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx = 0) const;
+
+  /// isRegTiedToDefOperand - Return true if the use operand of the specified
+  /// index is tied to an def operand. It also returns the def operand index by
+  /// reference if DefOpIdx is not null.
+  bool isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx = 0) const;
+
+  /// clearKillInfo - Clears kill flags on all operands.
+  ///
+  void clearKillInfo();
+
+  /// copyKillDeadInfo - Copies kill / dead operand properties from MI.
+  ///
+  void copyKillDeadInfo(const MachineInstr *MI);
+
+  /// copyPredicates - Copies predicate operand(s) from MI.
+  void copyPredicates(const MachineInstr *MI);
+
+  /// substituteRegister - Replace all occurrences of FromReg with ToReg:SubIdx,
+  /// properly composing subreg indices where necessary.
+  void substituteRegister(unsigned FromReg, unsigned ToReg, unsigned SubIdx,
+                          const TargetRegisterInfo &RegInfo);
+
+  /// addRegisterKilled - We have determined MI kills a register. Look for the
+  /// operand that uses it and mark it as IsKill. If AddIfNotFound is true,
+  /// add a implicit operand if it's not found. Returns true if the operand
+  /// exists / is added.
+  bool addRegisterKilled(unsigned IncomingReg,
+                         const TargetRegisterInfo *RegInfo,
+                         bool AddIfNotFound = false);
+
+  /// addRegisterDead - We have determined MI defined a register without a use.
+  /// Look for the operand that defines it and mark it as IsDead. If
+  /// AddIfNotFound is true, add a implicit operand if it's not found. Returns
+  /// true if the operand exists / is added.
+  bool addRegisterDead(unsigned IncomingReg, const TargetRegisterInfo *RegInfo,
+                       bool AddIfNotFound = false);
+
+  /// addRegisterDefined - We have determined MI defines a register. Make sure
+  /// there is an operand defining Reg.
+  void addRegisterDefined(unsigned IncomingReg,
+                          const TargetRegisterInfo *RegInfo = 0);
+
+  /// setPhysRegsDeadExcept - Mark every physreg used by this instruction as dead
+  /// except those in the UsedRegs list.
+  void setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRegs,
+                             const TargetRegisterInfo &TRI);
+
+  /// isSafeToMove - Return true if it is safe to move this instruction. If
+  /// SawStore is set to true, it means that there is a store (or call) between
+  /// the instruction's location and its intended destination.
+  bool isSafeToMove(const TargetInstrInfo *TII, AliasAnalysis *AA,
+                    bool &SawStore) const;
+
+  /// isSafeToReMat - Return true if it's safe to rematerialize the specified
+  /// instruction which defined the specified register instead of copying it.
+  bool isSafeToReMat(const TargetInstrInfo *TII, AliasAnalysis *AA,
+                     unsigned DstReg) const;
+
+  /// hasVolatileMemoryRef - Return true if this instruction may have a
+  /// volatile memory reference, or if the information describing the
+  /// memory reference is not available. Return false if it is known to
+  /// have no volatile memory references.
+  bool hasVolatileMemoryRef() const;
+
+  /// isInvariantLoad - Return true if this instruction is loading from a
+  /// location whose value is invariant across the function.  For example,
+  /// loading a value from the constant pool or from the argument area of
+  /// a function if it does not change.  This should only return true of *all*
+  /// loads the instruction does are invariant (if it does multiple loads).
+  bool isInvariantLoad(AliasAnalysis *AA) const;
+
+  /// isConstantValuePHI - If the specified instruction is a PHI that always
+  /// merges together the same virtual register, return the register, otherwise
+  /// return 0.
+  unsigned isConstantValuePHI() const;
+
+  /// hasUnmodeledSideEffects - Return true if this instruction has side
+  /// effects that are not modeled by mayLoad / mayStore, etc.
+  /// For all instructions, the property is encoded in TargetInstrDesc::Flags
+  /// (see TargetInstrDesc::hasUnmodeledSideEffects(). The only exception is
+  /// INLINEASM instruction, in which case the side effect property is encoded
+  /// in one of its operands (see InlineAsm::Extra_HasSideEffect).
+  ///
+  bool hasUnmodeledSideEffects() const;
+
+  /// allDefsAreDead - Return true if all the defs of this instruction are dead.
+  ///
+  bool allDefsAreDead() const;
+
+  /// copyImplicitOps - Copy implicit register operands from specified
+  /// instruction to this instruction.
+  void copyImplicitOps(const MachineInstr *MI);
+
+  //
+  // Debugging support
+  //
+  void print(raw_ostream &OS, const TargetMachine *TM = 0) const;
+  void dump() const;
+
+  //===--------------------------------------------------------------------===//
+  // Accessors used to build up machine instructions.
+
+  /// addOperand - Add the specified operand to the instruction.  If it is an
+  /// implicit operand, it is added to the end of the operand list.  If it is
+  /// an explicit operand it is added at the end of the explicit operand list
+  /// (before the first implicit operand). 
+  void addOperand(const MachineOperand &Op);
+  
+  /// setDesc - Replace the instruction descriptor (thus opcode) of
+  /// the current instruction with a new one.
+  ///
+  void setDesc(const TargetInstrDesc &tid) { TID = &tid; }
+
+  /// setDebugLoc - Replace current source information with new such.
+  /// Avoid using this, the constructor argument is preferable.
+  ///
+  void setDebugLoc(const DebugLoc dl) { debugLoc = dl; }
+
+  /// RemoveOperand - Erase an operand  from an instruction, leaving it with one
+  /// fewer operand than it started with.
+  ///
+  void RemoveOperand(unsigned i);
+
+  /// addMemOperand - Add a MachineMemOperand to the machine instruction.
+  /// This function should be used only occasionally. The setMemRefs function
+  /// is the primary method for setting up a MachineInstr's MemRefs list.
+  void addMemOperand(MachineFunction &MF, MachineMemOperand *MO);
+
+  /// setMemRefs - Assign this MachineInstr's memory reference descriptor
+  /// list. This does not transfer ownership.
+  void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
+    MemRefs = NewMemRefs;
+    MemRefsEnd = NewMemRefsEnd;
+  }
+
+private:
+  /// getRegInfo - If this instruction is embedded into a MachineFunction,
+  /// return the MachineRegisterInfo object for the current function, otherwise
+  /// return null.
+  MachineRegisterInfo *getRegInfo();
+
+  /// addImplicitDefUseOperands - Add all implicit def and use operands to
+  /// this instruction.
+  void addImplicitDefUseOperands();
+  
+  /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
+  /// this instruction from their respective use lists.  This requires that the
+  /// operands already be on their use lists.
+  void RemoveRegOperandsFromUseLists();
+  
+  /// AddRegOperandsToUseLists - Add all of the register operands in
+  /// this instruction from their respective use lists.  This requires that the
+  /// operands not be on their use lists yet.
+  void AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo);
+};
+
+/// MachineInstrExpressionTrait - Special DenseMapInfo traits to compare
+/// MachineInstr* by *value* of the instruction rather than by pointer value.
+/// The hashing and equality testing functions ignore definitions so this is
+/// useful for CSE, etc.
+struct MachineInstrExpressionTrait : DenseMapInfo<MachineInstr*> {
+  static inline MachineInstr *getEmptyKey() {
+    return 0;
+  }
+
+  static inline MachineInstr *getTombstoneKey() {
+    return reinterpret_cast<MachineInstr*>(-1);
+  }
+
+  static unsigned getHashValue(const MachineInstr* const &MI);
+
+  static bool isEqual(const MachineInstr* const &LHS,
+                      const MachineInstr* const &RHS) {
+    if (RHS == getEmptyKey() || RHS == getTombstoneKey() ||
+        LHS == getEmptyKey() || LHS == getTombstoneKey())
+      return LHS == RHS;
+    return LHS->isIdenticalTo(RHS, MachineInstr::IgnoreVRegDefs);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Debugging Support
+
+inline raw_ostream& operator<<(raw_ostream &OS, const MachineInstr &MI) {
+  MI.print(OS);
+  return OS;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineInstrBuilder.h b/final/include/llvm/CodeGen/MachineInstrBuilder.h
new file mode 100644
index 00000000000..f04dee2b4b0
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -0,0 +1,258 @@
+//===-- CodeGen/MachineInstBuilder.h - Simplify creation of MIs -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes a function named BuildMI, which is useful for dramatically
+// simplifying how MachineInstr's are created.  It allows use of code like this:
+//
+//   M = BuildMI(X86::ADDrr8, 2).addReg(argVal1).addReg(argVal2);
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEINSTRBUILDER_H
+#define LLVM_CODEGEN_MACHINEINSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+class TargetInstrDesc;
+class MDNode;
+
+namespace RegState {
+  enum {
+    Define         = 0x2,
+    Implicit       = 0x4,
+    Kill           = 0x8,
+    Dead           = 0x10,
+    Undef          = 0x20,
+    EarlyClobber   = 0x40,
+    Debug          = 0x80,
+    ImplicitDefine = Implicit | Define,
+    ImplicitKill   = Implicit | Kill
+  };
+}
+
+class MachineInstrBuilder {
+  MachineInstr *MI;
+public:
+  MachineInstrBuilder() : MI(0) {}
+  explicit MachineInstrBuilder(MachineInstr *mi) : MI(mi) {}
+
+  /// Allow automatic conversion to the machine instruction we are working on.
+  ///
+  operator MachineInstr*() const { return MI; }
+  operator MachineBasicBlock::iterator() const { return MI; }
+
+  /// addReg - Add a new virtual register operand...
+  ///
+  const
+  MachineInstrBuilder &addReg(unsigned RegNo, unsigned flags = 0,
+                              unsigned SubReg = 0) const {
+    assert((flags & 0x1) == 0 &&
+           "Passing in 'true' to addReg is forbidden! Use enums instead.");
+    MI->addOperand(MachineOperand::CreateReg(RegNo,
+                                             flags & RegState::Define,
+                                             flags & RegState::Implicit,
+                                             flags & RegState::Kill,
+                                             flags & RegState::Dead,
+                                             flags & RegState::Undef,
+                                             flags & RegState::EarlyClobber,
+                                             SubReg,
+                                             flags & RegState::Debug));
+    return *this;
+  }
+
+  /// addImm - Add a new immediate operand.
+  ///
+  const MachineInstrBuilder &addImm(int64_t Val) const {
+    MI->addOperand(MachineOperand::CreateImm(Val));
+    return *this;
+  }
+
+  const MachineInstrBuilder &addFPImm(const ConstantFP *Val) const {
+    MI->addOperand(MachineOperand::CreateFPImm(Val));
+    return *this;
+  }
+
+  const MachineInstrBuilder &addMBB(MachineBasicBlock *MBB,
+                                    unsigned char TargetFlags = 0) const {
+    MI->addOperand(MachineOperand::CreateMBB(MBB, TargetFlags));
+    return *this;
+  }
+
+  const MachineInstrBuilder &addFrameIndex(unsigned Idx) const {
+    MI->addOperand(MachineOperand::CreateFI(Idx));
+    return *this;
+  }
+
+  const MachineInstrBuilder &addConstantPoolIndex(unsigned Idx,
+                                                  int Offset = 0,
+                                          unsigned char TargetFlags = 0) const {
+    MI->addOperand(MachineOperand::CreateCPI(Idx, Offset, TargetFlags));
+    return *this;
+  }
+
+  const MachineInstrBuilder &addJumpTableIndex(unsigned Idx,
+                                          unsigned char TargetFlags = 0) const {
+    MI->addOperand(MachineOperand::CreateJTI(Idx, TargetFlags));
+    return *this;
+  }
+
+  const MachineInstrBuilder &addGlobalAddress(const GlobalValue *GV,
+                                              int64_t Offset = 0,
+                                          unsigned char TargetFlags = 0) const {
+    MI->addOperand(MachineOperand::CreateGA(GV, Offset, TargetFlags));
+    return *this;
+  }
+
+  const MachineInstrBuilder &addExternalSymbol(const char *FnName,
+                                          unsigned char TargetFlags = 0) const {
+    MI->addOperand(MachineOperand::CreateES(FnName, TargetFlags));
+    return *this;
+  }
+
+  const MachineInstrBuilder &addMemOperand(MachineMemOperand *MMO) const {
+    MI->addMemOperand(*MI->getParent()->getParent(), MMO);
+    return *this;
+  }
+
+  const MachineInstrBuilder &setMemRefs(MachineInstr::mmo_iterator b,
+                                        MachineInstr::mmo_iterator e) const {
+    MI->setMemRefs(b, e);
+    return *this;
+  }
+
+
+  const MachineInstrBuilder &addOperand(const MachineOperand &MO) const {
+    MI->addOperand(MO);
+    return *this;
+  }
+
+  const MachineInstrBuilder &addMetadata(const MDNode *MD) const {
+    MI->addOperand(MachineOperand::CreateMetadata(MD));
+    return *this;
+  }
+  
+  const MachineInstrBuilder &addSym(MCSymbol *Sym) const {
+    MI->addOperand(MachineOperand::CreateMCSymbol(Sym));
+    return *this;
+  }
+
+  const MachineInstrBuilder &setMIFlags(unsigned Flags) const {
+    MI->setFlags(Flags);
+    return *this;
+  }
+
+  const MachineInstrBuilder &setMIFlag(MachineInstr::MIFlag Flag) const {
+    MI->setFlag(Flag);
+    return *this;
+  }
+
+  // Add a displacement from an existing MachineOperand with an added offset.
+  const MachineInstrBuilder &addDisp(const MachineOperand &Disp,
+                                     int64_t off) const {
+    switch (Disp.getType()) {
+      default:
+        llvm_unreachable("Unhandled operand type in addDisp()");
+      case MachineOperand::MO_Immediate:
+        return addImm(Disp.getImm() + off);
+      case MachineOperand::MO_GlobalAddress:
+        return addGlobalAddress(Disp.getGlobal(), Disp.getOffset() + off);
+    }
+  }
+};
+
+/// BuildMI - Builder interface.  Specify how to create the initial instruction
+/// itself.
+///
+inline MachineInstrBuilder BuildMI(MachineFunction &MF,
+                                   DebugLoc DL,
+                                   const TargetInstrDesc &TID) {
+  return MachineInstrBuilder(MF.CreateMachineInstr(TID, DL));
+}
+
+/// BuildMI - This version of the builder sets up the first operand as a
+/// destination virtual register.
+///
+inline MachineInstrBuilder BuildMI(MachineFunction &MF,
+                                   DebugLoc DL,
+                                   const TargetInstrDesc &TID,
+                                   unsigned DestReg) {
+  return MachineInstrBuilder(MF.CreateMachineInstr(TID, DL))
+           .addReg(DestReg, RegState::Define);
+}
+
+/// BuildMI - This version of the builder inserts the newly-built
+/// instruction before the given position in the given MachineBasicBlock, and
+/// sets up the first operand as a destination virtual register.
+///
+inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
+                                   MachineBasicBlock::iterator I,
+                                   DebugLoc DL,
+                                   const TargetInstrDesc &TID,
+                                   unsigned DestReg) {
+  MachineInstr *MI = BB.getParent()->CreateMachineInstr(TID, DL);
+  BB.insert(I, MI);
+  return MachineInstrBuilder(MI).addReg(DestReg, RegState::Define);
+}
+
+/// BuildMI - This version of the builder inserts the newly-built
+/// instruction before the given position in the given MachineBasicBlock, and
+/// does NOT take a destination register.
+///
+inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
+                                   MachineBasicBlock::iterator I,
+                                   DebugLoc DL,
+                                   const TargetInstrDesc &TID) {
+  MachineInstr *MI = BB.getParent()->CreateMachineInstr(TID, DL);
+  BB.insert(I, MI);
+  return MachineInstrBuilder(MI);
+}
+
+/// BuildMI - This version of the builder inserts the newly-built
+/// instruction at the end of the given MachineBasicBlock, and does NOT take a
+/// destination register.
+///
+inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
+                                   DebugLoc DL,
+                                   const TargetInstrDesc &TID) {
+  return BuildMI(*BB, BB->end(), DL, TID);
+}
+
+/// BuildMI - This version of the builder inserts the newly-built
+/// instruction at the end of the given MachineBasicBlock, and sets up the first
+/// operand as a destination virtual register.
+///
+inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
+                                   DebugLoc DL,
+                                   const TargetInstrDesc &TID,
+                                   unsigned DestReg) {
+  return BuildMI(*BB, BB->end(), DL, TID, DestReg);
+}
+
+inline unsigned getDefRegState(bool B) {
+  return B ? RegState::Define : 0;
+}
+inline unsigned getImplRegState(bool B) {
+  return B ? RegState::Implicit : 0;
+}
+inline unsigned getKillRegState(bool B) {
+  return B ? RegState::Kill : 0;
+}
+inline unsigned getDeadRegState(bool B) {
+  return B ? RegState::Dead : 0;
+}
+inline unsigned getUndefRegState(bool B) {
+  return B ? RegState::Undef : 0;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineJumpTableInfo.h b/final/include/llvm/CodeGen/MachineJumpTableInfo.h
new file mode 100644
index 00000000000..62643497655
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineJumpTableInfo.h
@@ -0,0 +1,125 @@
+//===-- CodeGen/MachineJumpTableInfo.h - Abstract Jump Tables  --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The MachineJumpTableInfo class keeps track of jump tables referenced by
+// lowered switch instructions in the MachineFunction.
+//
+// Instructions reference the address of these jump tables through the use of 
+// MO_JumpTableIndex values.  When emitting assembly or machine code, these 
+// virtual address references are converted to refer to the address of the 
+// function jump tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEJUMPTABLEINFO_H
+#define LLVM_CODEGEN_MACHINEJUMPTABLEINFO_H
+
+#include <vector>
+#include <cassert>
+
+namespace llvm {
+
+class MachineBasicBlock;
+class TargetData;
+class raw_ostream;
+
+/// MachineJumpTableEntry - One jump table in the jump table info.
+///
+struct MachineJumpTableEntry {
+  /// MBBs - The vector of basic blocks from which to create the jump table.
+  std::vector<MachineBasicBlock*> MBBs;
+  
+  explicit MachineJumpTableEntry(const std::vector<MachineBasicBlock*> &M)
+  : MBBs(M) {}
+};
+  
+class MachineJumpTableInfo {
+public:
+  /// JTEntryKind - This enum indicates how each entry of the jump table is
+  /// represented and emitted.
+  enum JTEntryKind {
+    /// EK_BlockAddress - Each entry is a plain address of block, e.g.:
+    ///     .word LBB123
+    EK_BlockAddress,
+    
+    /// EK_GPRel32BlockAddress - Each entry is an address of block, encoded
+    /// with a relocation as gp-relative, e.g.:
+    ///     .gprel32 LBB123
+    EK_GPRel32BlockAddress,
+    
+    /// EK_LabelDifference32 - Each entry is the address of the block minus
+    /// the address of the jump table.  This is used for PIC jump tables where
+    /// gprel32 is not supported.  e.g.:
+    ///      .word LBB123 - LJTI1_2
+    /// If the .set directive is supported, this is emitted as:
+    ///      .set L4_5_set_123, LBB123 - LJTI1_2
+    ///      .word L4_5_set_123
+    EK_LabelDifference32,
+
+    /// EK_Inline - Jump table entries are emitted inline at their point of
+    /// use. It is the responsibility of the target to emit the entries.
+    EK_Inline,
+
+    /// EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the
+    /// TargetLowering::LowerCustomJumpTableEntry hook.
+    EK_Custom32
+  };
+private:
+  JTEntryKind EntryKind;
+  std::vector<MachineJumpTableEntry> JumpTables;
+public:
+  explicit MachineJumpTableInfo(JTEntryKind Kind): EntryKind(Kind) {}
+    
+  JTEntryKind getEntryKind() const { return EntryKind; }
+
+  /// getEntrySize - Return the size of each entry in the jump table.
+  unsigned getEntrySize(const TargetData &TD) const;
+  /// getEntryAlignment - Return the alignment of each entry in the jump table.
+  unsigned getEntryAlignment(const TargetData &TD) const;
+  
+  /// createJumpTableIndex - Create a new jump table.
+  ///
+  unsigned createJumpTableIndex(const std::vector<MachineBasicBlock*> &DestBBs);
+  
+  /// isEmpty - Return true if there are no jump tables.
+  ///
+  bool isEmpty() const { return JumpTables.empty(); }
+
+  const std::vector<MachineJumpTableEntry> &getJumpTables() const {
+    return JumpTables;
+  }
+
+  /// RemoveJumpTable - Mark the specific index as being dead.  This will
+  /// prevent it from being emitted.
+  void RemoveJumpTable(unsigned Idx) {
+    JumpTables[Idx].MBBs.clear();
+  }
+  
+  /// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update
+  /// the jump tables to branch to New instead.
+  bool ReplaceMBBInJumpTables(MachineBasicBlock *Old, MachineBasicBlock *New);
+
+  /// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update
+  /// the jump table to branch to New instead.
+  bool ReplaceMBBInJumpTable(unsigned Idx, MachineBasicBlock *Old,
+                             MachineBasicBlock *New);
+
+  /// print - Used by the MachineFunction printer to print information about
+  /// jump tables.  Implemented in MachineFunction.cpp
+  ///
+  void print(raw_ostream &OS) const;
+
+  /// dump - Call to stderr.
+  ///
+  void dump() const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineLocation.h b/final/include/llvm/CodeGen/MachineLocation.h
new file mode 100644
index 00000000000..21951b6680b
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineLocation.h
@@ -0,0 +1,98 @@
+//===-- llvm/CodeGen/MachineLocation.h --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// The MachineLocation class is used to represent a simple location in a machine
+// frame.  Locations will be one of two forms; a register or an address formed
+// from a base address plus an offset.  Register indirection can be specified by
+// using an offset of zero.
+//
+// The MachineMove class is used to represent abstract move operations in the 
+// prolog/epilog of a compiled function.  A collection of these objects can be
+// used by a debug consumer to track the location of values when unwinding stack
+// frames.
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_CODEGEN_MACHINELOCATION_H
+#define LLVM_CODEGEN_MACHINELOCATION_H
+
+namespace llvm {
+  class MCSymbol;
+  
+class MachineLocation {
+private:
+  bool IsRegister;                      // True if location is a register.
+  unsigned Register;                    // gcc/gdb register number.
+  int Offset;                           // Displacement if not register.
+public:
+  enum {
+    // The target register number for an abstract frame pointer. The value is
+    // an arbitrary value that doesn't collide with any real target register.
+    VirtualFP = ~0U
+  };
+  MachineLocation()
+  : IsRegister(false), Register(0), Offset(0) {}
+  explicit MachineLocation(unsigned R)
+  : IsRegister(true), Register(R), Offset(0) {}
+  MachineLocation(unsigned R, int O)
+  : IsRegister(false), Register(R), Offset(O) {}
+
+  bool operator==(const MachineLocation &Other) const {
+      return IsRegister == Other.IsRegister && Register == Other.Register &&
+        Offset == Other.Offset;
+  }
+  
+  // Accessors
+  bool isReg()           const { return IsRegister; }
+  unsigned getReg()      const { return Register; }
+  int getOffset()        const { return Offset; }
+  void setIsRegister(bool Is)  { IsRegister = Is; }
+  void setRegister(unsigned R) { Register = R; }
+  void setOffset(int O)        { Offset = O; }
+  void set(unsigned R) {
+    IsRegister = true;
+    Register = R;
+    Offset = 0;
+  }
+  void set(unsigned R, int O) {
+    IsRegister = false;
+    Register = R;
+    Offset = O;
+  }
+
+#ifndef NDEBUG
+  void dump();
+#endif
+};
+
+/// MachineMove - This class represents the save or restore of a callee saved
+/// register that exception or debug info needs to know about.
+class MachineMove {
+private:
+  /// Label - Symbol for post-instruction address when result of move takes
+  /// effect.
+  MCSymbol *Label;
+  
+  // Move to & from location.
+  MachineLocation Destination, Source;
+public:
+  MachineMove() : Label(0) {}
+
+  MachineMove(MCSymbol *label, const MachineLocation &D,
+              const MachineLocation &S)
+  : Label(label), Destination(D), Source(S) {}
+  
+  // Accessors
+  MCSymbol *getLabel()                    const { return Label; }
+  const MachineLocation &getDestination() const { return Destination; }
+  const MachineLocation &getSource()      const { return Source; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineLoopInfo.h b/final/include/llvm/CodeGen/MachineLoopInfo.h
new file mode 100644
index 00000000000..6dd9440500b
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineLoopInfo.h
@@ -0,0 +1,178 @@
+//===- llvm/CodeGen/MachineLoopInfo.h - Natural Loop Calculator -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachineLoopInfo class that is used to identify natural 
+// loops and determine the loop depth of various nodes of the CFG.  Note that
+// natural loops may actually be several loops that share the same header node.
+//
+// This analysis calculates the nesting structure of loops in a function.  For
+// each natural loop identified, this analysis identifies natural loops
+// contained entirely within the loop and the basic blocks the make up the loop.
+//
+// It can calculate on the fly various bits of information, for example:
+//
+//  * whether there is a preheader for the loop
+//  * the number of back edges to the header
+//  * whether or not a particular block branches out of the loop
+//  * the successor blocks of the loop
+//  * the loop depth
+//  * the trip count
+//  * etc...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINE_LOOP_INFO_H
+#define LLVM_CODEGEN_MACHINE_LOOP_INFO_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Analysis/LoopInfo.h"
+
+namespace llvm {
+
+class MachineLoop : public LoopBase<MachineBasicBlock, MachineLoop> {
+public:
+  MachineLoop();
+
+  /// getTopBlock - Return the "top" block in the loop, which is the first
+  /// block in the linear layout, ignoring any parts of the loop not
+  /// contiguous with the part the contains the header.
+  MachineBasicBlock *getTopBlock();
+
+  /// getBottomBlock - Return the "bottom" block in the loop, which is the last
+  /// block in the linear layout, ignoring any parts of the loop not
+  /// contiguous with the part the contains the header.
+  MachineBasicBlock *getBottomBlock();
+
+  void dump() const;
+
+private:
+  friend class LoopInfoBase<MachineBasicBlock, MachineLoop>;
+  explicit MachineLoop(MachineBasicBlock *MBB)
+    : LoopBase<MachineBasicBlock, MachineLoop>(MBB) {}
+};
+
+class MachineLoopInfo : public MachineFunctionPass {
+  LoopInfoBase<MachineBasicBlock, MachineLoop> LI;
+  friend class LoopBase<MachineBasicBlock, MachineLoop>;
+
+  void operator=(const MachineLoopInfo &);  // do not implement
+  MachineLoopInfo(const MachineLoopInfo &); // do not implement
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  MachineLoopInfo() : MachineFunctionPass(ID) {
+    initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+  }
+
+  LoopInfoBase<MachineBasicBlock, MachineLoop>& getBase() { return LI; }
+
+  /// iterator/begin/end - The interface to the top-level loops in the current
+  /// function.
+  ///
+  typedef LoopInfoBase<MachineBasicBlock, MachineLoop>::iterator iterator;
+  inline iterator begin() const { return LI.begin(); }
+  inline iterator end() const { return LI.end(); }
+  bool empty() const { return LI.empty(); }
+
+  /// getLoopFor - Return the inner most loop that BB lives in.  If a basic
+  /// block is in no loop (for example the entry node), null is returned.
+  ///
+  inline MachineLoop *getLoopFor(const MachineBasicBlock *BB) const {
+    return LI.getLoopFor(BB);
+  }
+
+  /// operator[] - same as getLoopFor...
+  ///
+  inline const MachineLoop *operator[](const MachineBasicBlock *BB) const {
+    return LI.getLoopFor(BB);
+  }
+
+  /// getLoopDepth - Return the loop nesting level of the specified block...
+  ///
+  inline unsigned getLoopDepth(const MachineBasicBlock *BB) const {
+    return LI.getLoopDepth(BB);
+  }
+
+  // isLoopHeader - True if the block is a loop header node
+  inline bool isLoopHeader(MachineBasicBlock *BB) const {
+    return LI.isLoopHeader(BB);
+  }
+
+  /// runOnFunction - Calculate the natural loop information.
+  ///
+  virtual bool runOnMachineFunction(MachineFunction &F);
+
+  virtual void releaseMemory() { LI.releaseMemory(); }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  /// removeLoop - This removes the specified top-level loop from this loop info
+  /// object.  The loop is not deleted, as it will presumably be inserted into
+  /// another loop.
+  inline MachineLoop *removeLoop(iterator I) { return LI.removeLoop(I); }
+
+  /// changeLoopFor - Change the top-level loop that contains BB to the
+  /// specified loop.  This should be used by transformations that restructure
+  /// the loop hierarchy tree.
+  inline void changeLoopFor(MachineBasicBlock *BB, MachineLoop *L) {
+    LI.changeLoopFor(BB, L);
+  }
+
+  /// changeTopLevelLoop - Replace the specified loop in the top-level loops
+  /// list with the indicated loop.
+  inline void changeTopLevelLoop(MachineLoop *OldLoop, MachineLoop *NewLoop) {
+    LI.changeTopLevelLoop(OldLoop, NewLoop);
+  }
+
+  /// addTopLevelLoop - This adds the specified loop to the collection of
+  /// top-level loops.
+  inline void addTopLevelLoop(MachineLoop *New) {
+    LI.addTopLevelLoop(New);
+  }
+
+  /// removeBlock - This method completely removes BB from all data structures,
+  /// including all of the Loop objects it is nested in and our mapping from
+  /// MachineBasicBlocks to loops.
+  void removeBlock(MachineBasicBlock *BB) {
+    LI.removeBlock(BB);
+  }
+};
+
+
+// Allow clients to walk the list of nested loops...
+template <> struct GraphTraits<const MachineLoop*> {
+  typedef const MachineLoop NodeType;
+  typedef MachineLoopInfo::iterator ChildIteratorType;
+
+  static NodeType *getEntryNode(const MachineLoop *L) { return L; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->begin();
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->end();
+  }
+};
+
+template <> struct GraphTraits<MachineLoop*> {
+  typedef MachineLoop NodeType;
+  typedef MachineLoopInfo::iterator ChildIteratorType;
+
+  static NodeType *getEntryNode(MachineLoop *L) { return L; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->begin();
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->end();
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineLoopRanges.h b/final/include/llvm/CodeGen/MachineLoopRanges.h
new file mode 100644
index 00000000000..6a30e8b53c0
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineLoopRanges.h
@@ -0,0 +1,112 @@
+//===- MachineLoopRanges.h - Ranges of machine loops -----------*- c++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface to the MachineLoopRanges analysis.
+//
+// Provide on-demand information about the ranges of machine instructions
+// covered by a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINELOOPRANGES_H
+#define LLVM_CODEGEN_MACHINELOOPRANGES_H
+
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+namespace llvm {
+
+class MachineLoop;
+class MachineLoopInfo;
+class raw_ostream;
+
+/// MachineLoopRange - Range information for a single loop.
+class MachineLoopRange {
+  friend class MachineLoopRanges;
+
+public:
+  typedef IntervalMap<SlotIndex, unsigned, 4> Map;
+  typedef Map::Allocator Allocator;
+
+private:
+  /// The mapped loop.
+  const MachineLoop *const Loop;
+
+  /// Map intervals to a bit mask.
+  /// Bit 0 = inside loop block.
+  Map Intervals;
+
+  /// Loop area as measured by SlotIndex::distance.
+  unsigned Area;
+
+  /// Create a MachineLoopRange, only accessible to MachineLoopRanges.
+  MachineLoopRange(const MachineLoop*, Allocator&, SlotIndexes&);
+
+public:
+  /// getLoop - Return the mapped machine loop.
+  const MachineLoop *getLoop() const { return Loop; }
+
+  /// overlaps - Return true if this loop overlaps the given range of machine
+  /// inteructions.
+  bool overlaps(SlotIndex Start, SlotIndex Stop);
+
+  /// getNumber - Return the loop number. This is the same as the number of the
+  /// header block.
+  unsigned getNumber() const;
+
+  /// getArea - Return the loop area. This number is approximately proportional
+  /// to the number of instructions in the loop.
+  unsigned getArea() const { return Area; }
+
+  /// getMap - Allow public read-only access for IntervalMapOverlaps.
+  const Map &getMap() { return Intervals; }
+
+  /// print - Print loop ranges on OS.
+  void print(raw_ostream&) const;
+
+  /// byNumber - Comparator for array_pod_sort that sorts a list of
+  /// MachineLoopRange pointers by number.
+  static int byNumber(const void*, const void*);
+
+  /// byAreaDesc - Comparator for array_pod_sort that sorts a list of
+  /// MachineLoopRange pointers by descending area, then by number.
+  static int byAreaDesc(const void*, const void*);
+};
+
+raw_ostream &operator<<(raw_ostream&, const MachineLoopRange&);
+
+/// MachineLoopRanges - Analysis pass that provides on-demand per-loop range
+/// information.
+class MachineLoopRanges : public MachineFunctionPass {
+  typedef DenseMap<const MachineLoop*, MachineLoopRange*> CacheMap;
+  typedef MachineLoopRange::Allocator MapAllocator;
+
+  MapAllocator Allocator;
+  SlotIndexes *Indexes;
+  CacheMap Cache;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  MachineLoopRanges() : MachineFunctionPass(ID), Indexes(0) {}
+  ~MachineLoopRanges() { releaseMemory(); }
+
+  /// getLoopRange - Return the range of loop.
+  MachineLoopRange *getLoopRange(const MachineLoop *Loop);
+
+private:
+  virtual bool runOnMachineFunction(MachineFunction&);
+  virtual void releaseMemory();
+  virtual void getAnalysisUsage(AnalysisUsage&) const;
+};
+
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MACHINELOOPRANGES_H
diff --git a/final/include/llvm/CodeGen/MachineMemOperand.h b/final/include/llvm/CodeGen/MachineMemOperand.h
new file mode 100644
index 00000000000..768ce47f8b3
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineMemOperand.h
@@ -0,0 +1,165 @@
+//==- llvm/CodeGen/MachineMemOperand.h - MachineMemOperand class -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MachineMemOperand class, which is a
+// description of a memory reference. It is used to help track dependencies
+// in the backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEMEMOPERAND_H
+#define LLVM_CODEGEN_MACHINEMEMOPERAND_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class Value;
+class FoldingSetNodeID;
+class raw_ostream;
+
+/// MachinePointerInfo - This class contains a discriminated union of
+/// information about pointers in memory operands, relating them back to LLVM IR
+/// or to virtual locations (such as frame indices) that are exposed during
+/// codegen.
+struct MachinePointerInfo {
+  /// V - This is the IR pointer value for the access, or it is null if unknown.
+  /// If this is null, then the access is to a pointer in the default address
+  /// space.
+  const Value *V;
+  
+  /// Offset - This is an offset from the base Value*.
+  int64_t Offset;
+  
+  explicit MachinePointerInfo(const Value *v = 0, int64_t offset = 0)
+    : V(v), Offset(offset) {}
+  
+  MachinePointerInfo getWithOffset(int64_t O) const {
+    if (V == 0) return MachinePointerInfo(0, 0);
+    return MachinePointerInfo(V, Offset+O);
+  }
+  
+  /// getAddrSpace - Return the LLVM IR address space number that this pointer
+  /// points into.
+  unsigned getAddrSpace() const;
+  
+  /// getConstantPool - Return a MachinePointerInfo record that refers to the
+  /// constant pool.
+  static MachinePointerInfo getConstantPool();
+
+  /// getFixedStack - Return a MachinePointerInfo record that refers to the
+  /// the specified FrameIndex.
+  static MachinePointerInfo getFixedStack(int FI, int64_t offset = 0);
+  
+  /// getJumpTable - Return a MachinePointerInfo record that refers to a
+  /// jump table entry.
+  static MachinePointerInfo getJumpTable();
+  
+  /// getGOT - Return a MachinePointerInfo record that refers to a
+  /// GOT entry.
+  static MachinePointerInfo getGOT();
+  
+  /// getStack - stack pointer relative access.
+  static MachinePointerInfo getStack(int64_t Offset);
+};
+  
+  
+//===----------------------------------------------------------------------===//
+/// MachineMemOperand - A description of a memory reference used in the backend.
+/// Instead of holding a StoreInst or LoadInst, this class holds the address
+/// Value of the reference along with a byte size and offset. This allows it
+/// to describe lowered loads and stores. Also, the special PseudoSourceValue
+/// objects can be used to represent loads and stores to memory locations
+/// that aren't explicit in the regular LLVM IR.
+///
+class MachineMemOperand {
+  MachinePointerInfo PtrInfo;
+  uint64_t Size;
+  unsigned Flags;
+  const MDNode *TBAAInfo;
+
+public:
+  /// Flags values. These may be or'd together.
+  enum MemOperandFlags {
+    /// The memory access reads data.
+    MOLoad = 1,
+    /// The memory access writes data.
+    MOStore = 2,
+    /// The memory access is volatile.
+    MOVolatile = 4,
+    /// The memory access is non-temporal.
+    MONonTemporal = 8,
+    // This is the number of bits we need to represent flags.
+    MOMaxBits = 4
+  };
+
+  /// MachineMemOperand - Construct an MachineMemOperand object with the
+  /// specified PtrInfo, flags, size, and base alignment.
+  MachineMemOperand(MachinePointerInfo PtrInfo, unsigned flags, uint64_t s,
+                    unsigned base_alignment, const MDNode *TBAAInfo = 0);
+
+  const MachinePointerInfo &getPointerInfo() const { return PtrInfo; }
+  
+  /// getValue - Return the base address of the memory access. This may either
+  /// be a normal LLVM IR Value, or one of the special values used in CodeGen.
+  /// Special values are those obtained via
+  /// PseudoSourceValue::getFixedStack(int), PseudoSourceValue::getStack, and
+  /// other PseudoSourceValue member functions which return objects which stand
+  /// for frame/stack pointer relative references and other special references
+  /// which are not representable in the high-level IR.
+  const Value *getValue() const { return PtrInfo.V; }
+
+  /// getFlags - Return the raw flags of the source value, \see MemOperandFlags.
+  unsigned int getFlags() const { return Flags & ((1 << MOMaxBits) - 1); }
+
+  /// getOffset - For normal values, this is a byte offset added to the base
+  /// address. For PseudoSourceValue::FPRel values, this is the FrameIndex
+  /// number.
+  int64_t getOffset() const { return PtrInfo.Offset; }
+
+  /// getSize - Return the size in bytes of the memory reference.
+  uint64_t getSize() const { return Size; }
+
+  /// getAlignment - Return the minimum known alignment in bytes of the
+  /// actual memory reference.
+  uint64_t getAlignment() const;
+
+  /// getBaseAlignment - Return the minimum known alignment in bytes of the
+  /// base address, without the offset.
+  uint64_t getBaseAlignment() const { return (1u << (Flags >> MOMaxBits)) >> 1; }
+
+  /// getTBAAInfo - Return the TBAA tag for the memory reference.
+  const MDNode *getTBAAInfo() const { return TBAAInfo; }
+
+  bool isLoad() const { return Flags & MOLoad; }
+  bool isStore() const { return Flags & MOStore; }
+  bool isVolatile() const { return Flags & MOVolatile; }
+  bool isNonTemporal() const { return Flags & MONonTemporal; }
+
+  /// refineAlignment - Update this MachineMemOperand to reflect the alignment
+  /// of MMO, if it has a greater alignment. This must only be used when the
+  /// new alignment applies to all users of this MachineMemOperand.
+  void refineAlignment(const MachineMemOperand *MMO);
+
+  /// setValue - Change the SourceValue for this MachineMemOperand. This
+  /// should only be used when an object is being relocated and all references
+  /// to it are being updated.
+  void setValue(const Value *NewSV) { PtrInfo.V = NewSV; }
+  void setOffset(int64_t NewOffset) { PtrInfo.Offset = NewOffset; }
+
+  /// Profile - Gather unique data for the object.
+  ///
+  void Profile(FoldingSetNodeID &ID) const;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const MachineMemOperand &MRO);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineModuleInfo.h b/final/include/llvm/CodeGen/MachineModuleInfo.h
new file mode 100644
index 00000000000..6bc80b099fd
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineModuleInfo.h
@@ -0,0 +1,366 @@
+//===-- llvm/CodeGen/MachineModuleInfo.h ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect meta information for a module.  This information should be in a
+// neutral form that can be used by different debugging and exception handling
+// schemes.
+//
+// The organization of information is primarily clustered around the source
+// compile units.  The main exception is source line correspondence where
+// inlining may interleave code from various compile units.
+//
+// The following information can be retrieved from the MachineModuleInfo.
+//
+//  -- Source directories - Directories are uniqued based on their canonical
+//     string and assigned a sequential numeric ID (base 1.)
+//  -- Source files - Files are also uniqued based on their name and directory
+//     ID.  A file ID is sequential number (base 1.)
+//  -- Source line correspondence - A vector of file ID, line#, column# triples.
+//     A DEBUG_LOCATION instruction is generated  by the DAG Legalizer
+//     corresponding to each entry in the source line list.  This allows a debug
+//     emitter to generate labels referenced by debug information tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEMODULEINFO_H
+#define LLVM_CODEGEN_MACHINEMODULEINFO_H
+
+#include "llvm/Pass.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Metadata.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// Forward declarations.
+class Constant;
+class GlobalVariable;
+class MDNode;
+class MachineBasicBlock;
+class MachineFunction;
+class Module;
+class PointerType;
+class StructType;
+
+/// MachineModuleInfoImpl - This class can be derived from and used by targets
+/// to hold private target-specific information for each Module.  Objects of
+/// type are accessed/created with MMI::getInfo and destroyed when the
+/// MachineModuleInfo is destroyed.
+class MachineModuleInfoImpl {
+public:
+  typedef PointerIntPair<MCSymbol*, 1, bool> StubValueTy;
+  virtual ~MachineModuleInfoImpl();
+  typedef std::vector<std::pair<MCSymbol*, StubValueTy> > SymbolListTy;
+protected:
+  static SymbolListTy GetSortedStubs(const DenseMap<MCSymbol*, StubValueTy>&);
+};
+
+
+
+//===----------------------------------------------------------------------===//
+/// LandingPadInfo - This structure is used to retain landing pad info for
+/// the current function.
+///
+struct LandingPadInfo {
+  MachineBasicBlock *LandingPadBlock;    // Landing pad block.
+  SmallVector<MCSymbol*, 1> BeginLabels; // Labels prior to invoke.
+  SmallVector<MCSymbol*, 1> EndLabels;   // Labels after invoke.
+  MCSymbol *LandingPadLabel;             // Label at beginning of landing pad.
+  const Function *Personality;           // Personality function.
+  std::vector<int> TypeIds;              // List of type ids (filters negative)
+
+  explicit LandingPadInfo(MachineBasicBlock *MBB)
+    : LandingPadBlock(MBB), LandingPadLabel(0), Personality(0) {}
+};
+
+class MMIAddrLabelMap;
+
+//===----------------------------------------------------------------------===//
+/// MachineModuleInfo - This class contains meta information specific to a
+/// module.  Queries can be made by different debugging and exception handling
+/// schemes and reformated for specific use.
+///
+class MachineModuleInfo : public ImmutablePass {
+  /// Context - This is the MCContext used for the entire code generator.
+  MCContext Context;
+
+  /// TheModule - This is the LLVM Module being worked on.
+  const Module *TheModule;
+
+  /// ObjFileMMI - This is the object-file-format-specific implementation of
+  /// MachineModuleInfoImpl, which lets targets accumulate whatever info they
+  /// want.
+  MachineModuleInfoImpl *ObjFileMMI;
+
+  // FrameMoves - List of moves done by a function's prolog.  Used to construct
+  // frame maps by debug and exception handling consumers.
+  std::vector<MachineMove> FrameMoves;
+
+  // LandingPads - List of LandingPadInfo describing the landing pad information
+  // in the current function.
+  std::vector<LandingPadInfo> LandingPads;
+
+  // Map of invoke call site index values to associated begin EH_LABEL for
+  // the current function.
+  DenseMap<MCSymbol*, unsigned> CallSiteMap;
+
+  // The current call site index being processed, if any. 0 if none.
+  unsigned CurCallSite;
+
+  // TypeInfos - List of C++ TypeInfo used in the current function.
+  //
+  std::vector<const GlobalVariable *> TypeInfos;
+
+  // FilterIds - List of typeids encoding filters used in the current function.
+  //
+  std::vector<unsigned> FilterIds;
+
+  // FilterEnds - List of the indices in FilterIds corresponding to filter
+  // terminators.
+  //
+  std::vector<unsigned> FilterEnds;
+
+  // Personalities - Vector of all personality functions ever seen. Used to emit
+  // common EH frames.
+  std::vector<const Function *> Personalities;
+
+  /// UsedFunctions - The functions in the @llvm.used list in a more easily
+  /// searchable format.  This does not include the functions in
+  /// llvm.compiler.used.
+  SmallPtrSet<const Function *, 32> UsedFunctions;
+
+
+  /// AddrLabelSymbols - This map keeps track of which symbol is being used for
+  /// the specified basic block's address of label.
+  MMIAddrLabelMap *AddrLabelSymbols;
+
+  bool CallsEHReturn;
+  bool CallsUnwindInit;
+
+  /// DbgInfoAvailable - True if debugging information is available
+  /// in this module.
+  bool DbgInfoAvailable;
+
+  /// True if this module calls VarArg function with floating point arguments.
+  /// This is used to emit an undefined reference to fltused on Windows targets.
+  bool CallsExternalVAFunctionWithFloatingPointArguments;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  typedef std::pair<unsigned, DebugLoc> UnsignedDebugLocPair;
+  typedef SmallVector<std::pair<TrackingVH<MDNode>, UnsignedDebugLocPair>, 4>
+    VariableDbgInfoMapTy;
+  VariableDbgInfoMapTy VariableDbgInfo;
+
+  MachineModuleInfo();  // DUMMY CONSTRUCTOR, DO NOT CALL.
+  // Real constructor.
+  MachineModuleInfo(const MCAsmInfo &MAI, const TargetAsmInfo *TAI);
+  ~MachineModuleInfo();
+
+  bool doInitialization();
+  bool doFinalization();
+
+  /// EndFunction - Discard function meta information.
+  ///
+  void EndFunction();
+
+  const MCContext &getContext() const { return Context; }
+  MCContext &getContext() { return Context; }
+
+  void setModule(const Module *M) { TheModule = M; }
+  const Module *getModule() const { return TheModule; }
+
+  /// getInfo - Keep track of various per-function pieces of information for
+  /// backends that would like to do so.
+  ///
+  template<typename Ty>
+  Ty &getObjFileInfo() {
+    if (ObjFileMMI == 0)
+      ObjFileMMI = new Ty(*this);
+    return *static_cast<Ty*>(ObjFileMMI);
+  }
+
+  template<typename Ty>
+  const Ty &getObjFileInfo() const {
+    return const_cast<MachineModuleInfo*>(this)->getObjFileInfo<Ty>();
+  }
+
+  /// AnalyzeModule - Scan the module for global debug information.
+  ///
+  void AnalyzeModule(const Module &M);
+
+  /// hasDebugInfo - Returns true if valid debug info is present.
+  ///
+  bool hasDebugInfo() const { return DbgInfoAvailable; }
+  void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = avail; }
+
+  bool callsEHReturn() const { return CallsEHReturn; }
+  void setCallsEHReturn(bool b) { CallsEHReturn = b; }
+
+  bool callsUnwindInit() const { return CallsUnwindInit; }
+  void setCallsUnwindInit(bool b) { CallsUnwindInit = b; }
+
+  bool callsExternalVAFunctionWithFloatingPointArguments() const {
+    return CallsExternalVAFunctionWithFloatingPointArguments;
+  }
+
+  void setCallsExternalVAFunctionWithFloatingPointArguments(bool b) {
+    CallsExternalVAFunctionWithFloatingPointArguments = b;
+  }
+
+  /// getFrameMoves - Returns a reference to a list of moves done in the current
+  /// function's prologue.  Used to construct frame maps for debug and exception
+  /// handling comsumers.
+  std::vector<MachineMove> &getFrameMoves() { return FrameMoves; }
+
+  /// getAddrLabelSymbol - Return the symbol to be used for the specified basic
+  /// block when its address is taken.  This cannot be its normal LBB label
+  /// because the block may be accessed outside its containing function.
+  MCSymbol *getAddrLabelSymbol(const BasicBlock *BB);
+
+  /// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified
+  /// basic block when its address is taken.  If other blocks were RAUW'd to
+  /// this one, we may have to emit them as well, return the whole set.
+  std::vector<MCSymbol*> getAddrLabelSymbolToEmit(const BasicBlock *BB);
+
+  /// takeDeletedSymbolsForFunction - If the specified function has had any
+  /// references to address-taken blocks generated, but the block got deleted,
+  /// return the symbol now so we can emit it.  This prevents emitting a
+  /// reference to a symbol that has no definition.
+  void takeDeletedSymbolsForFunction(const Function *F,
+                                     std::vector<MCSymbol*> &Result);
+
+
+  //===- EH ---------------------------------------------------------------===//
+
+  /// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
+  /// specified MachineBasicBlock.
+  LandingPadInfo &getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad);
+
+  /// addInvoke - Provide the begin and end labels of an invoke style call and
+  /// associate it with a try landing pad block.
+  void addInvoke(MachineBasicBlock *LandingPad,
+                 MCSymbol *BeginLabel, MCSymbol *EndLabel);
+
+  /// addLandingPad - Add a new panding pad.  Returns the label ID for the
+  /// landing pad entry.
+  MCSymbol *addLandingPad(MachineBasicBlock *LandingPad);
+
+  /// addPersonality - Provide the personality function for the exception
+  /// information.
+  void addPersonality(MachineBasicBlock *LandingPad,
+                      const Function *Personality);
+
+  /// getPersonalityIndex - Get index of the current personality function inside
+  /// Personalitites array
+  unsigned getPersonalityIndex() const;
+
+  /// getPersonalities - Return array of personality functions ever seen.
+  const std::vector<const Function *>& getPersonalities() const {
+    return Personalities;
+  }
+
+  /// isUsedFunction - Return true if the functions in the llvm.used list.  This
+  /// does not return true for things in llvm.compiler.used unless they are also
+  /// in llvm.used.
+  bool isUsedFunction(const Function *F) {
+    return UsedFunctions.count(F);
+  }
+
+  /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
+  ///
+  void addCatchTypeInfo(MachineBasicBlock *LandingPad,
+                        std::vector<const GlobalVariable *> &TyInfo);
+
+  /// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
+  ///
+  void addFilterTypeInfo(MachineBasicBlock *LandingPad,
+                         std::vector<const GlobalVariable *> &TyInfo);
+
+  /// addCleanup - Add a cleanup action for a landing pad.
+  ///
+  void addCleanup(MachineBasicBlock *LandingPad);
+
+  /// getTypeIDFor - Return the type id for the specified typeinfo.  This is
+  /// function wide.
+  unsigned getTypeIDFor(const GlobalVariable *TI);
+
+  /// getFilterIDFor - Return the id of the filter encoded by TyIds.  This is
+  /// function wide.
+  int getFilterIDFor(std::vector<unsigned> &TyIds);
+
+  /// TidyLandingPads - Remap landing pad labels and remove any deleted landing
+  /// pads.
+  void TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap = 0);
+
+  /// getLandingPads - Return a reference to the landing pad info for the
+  /// current function.
+  const std::vector<LandingPadInfo> &getLandingPads() const {
+    return LandingPads;
+  }
+
+  /// setCallSiteBeginLabel - Map the begin label for a call site
+  void setCallSiteBeginLabel(MCSymbol *BeginLabel, unsigned Site) {
+    CallSiteMap[BeginLabel] = Site;
+  }
+
+  /// getCallSiteBeginLabel - Get the call site number for a begin label
+  unsigned getCallSiteBeginLabel(MCSymbol *BeginLabel) {
+    assert(CallSiteMap.count(BeginLabel) &&
+           "Missing call site number for EH_LABEL!");
+    return CallSiteMap[BeginLabel];
+  }
+
+  /// setCurrentCallSite - Set the call site currently being processed.
+  void setCurrentCallSite(unsigned Site) { CurCallSite = Site; }
+
+  /// getCurrentCallSite - Get the call site currently being processed, if any.
+  /// return zero if none.
+  unsigned getCurrentCallSite(void) { return CurCallSite; }
+
+  /// getTypeInfos - Return a reference to the C++ typeinfo for the current
+  /// function.
+  const std::vector<const GlobalVariable *> &getTypeInfos() const {
+    return TypeInfos;
+  }
+
+  /// getFilterIds - Return a reference to the typeids encoding filters used in
+  /// the current function.
+  const std::vector<unsigned> &getFilterIds() const {
+    return FilterIds;
+  }
+
+  /// getPersonality - Return a personality function if available.  The presence
+  /// of one is required to emit exception handling info.
+  const Function *getPersonality() const;
+
+  /// setVariableDbgInfo - Collect information used to emit debugging
+  /// information of a variable.
+  void setVariableDbgInfo(MDNode *N, unsigned Slot, DebugLoc Loc) {
+    VariableDbgInfo.push_back(std::make_pair(N, std::make_pair(Slot, Loc)));
+  }
+
+  VariableDbgInfoMapTy &getVariableDbgInfo() { return VariableDbgInfo; }
+
+}; // End class MachineModuleInfo
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineModuleInfoImpls.h b/final/include/llvm/CodeGen/MachineModuleInfoImpls.h
new file mode 100644
index 00000000000..9401ffd199d
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineModuleInfoImpls.h
@@ -0,0 +1,97 @@
+//===-- llvm/CodeGen/MachineModuleInfoImpls.h -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines object-file format specific implementations of
+// MachineModuleInfoImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H
+#define LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+namespace llvm {
+  class MCSymbol;
+
+  /// MachineModuleInfoMachO - This is a MachineModuleInfoImpl implementation
+  /// for MachO targets.
+  class MachineModuleInfoMachO : public MachineModuleInfoImpl {
+    /// FnStubs - Darwin '$stub' stubs.  The key is something like "Lfoo$stub",
+    /// the value is something like "_foo".
+    DenseMap<MCSymbol*, StubValueTy> FnStubs;
+    
+    /// GVStubs - Darwin '$non_lazy_ptr' stubs.  The key is something like
+    /// "Lfoo$non_lazy_ptr", the value is something like "_foo". The extra bit
+    /// is true if this GV is external.
+    DenseMap<MCSymbol*, StubValueTy> GVStubs;
+    
+    /// HiddenGVStubs - Darwin '$non_lazy_ptr' stubs.  The key is something like
+    /// "Lfoo$non_lazy_ptr", the value is something like "_foo".  Unlike GVStubs
+    /// these are for things with hidden visibility. The extra bit is true if
+    /// this GV is external.
+    DenseMap<MCSymbol*, StubValueTy> HiddenGVStubs;
+    
+    virtual void Anchor();  // Out of line virtual method.
+  public:
+    MachineModuleInfoMachO(const MachineModuleInfo &) {}
+    
+    StubValueTy &getFnStubEntry(MCSymbol *Sym) {
+      assert(Sym && "Key cannot be null");
+      return FnStubs[Sym];
+    }
+
+    StubValueTy &getGVStubEntry(MCSymbol *Sym) {
+      assert(Sym && "Key cannot be null");
+      return GVStubs[Sym];
+    }
+
+    StubValueTy &getHiddenGVStubEntry(MCSymbol *Sym) {
+      assert(Sym && "Key cannot be null");
+      return HiddenGVStubs[Sym];
+    }
+
+    /// Accessor methods to return the set of stubs in sorted order.
+    SymbolListTy GetFnStubList() const {
+      return GetSortedStubs(FnStubs);
+    }
+    SymbolListTy GetGVStubList() const {
+      return GetSortedStubs(GVStubs);
+    }
+    SymbolListTy GetHiddenGVStubList() const {
+      return GetSortedStubs(HiddenGVStubs);
+    }
+  };
+
+  /// MachineModuleInfoELF - This is a MachineModuleInfoImpl implementation
+  /// for ELF targets.
+  class MachineModuleInfoELF : public MachineModuleInfoImpl {
+    /// GVStubs - These stubs are used to materialize global addresses in PIC
+    /// mode.
+    DenseMap<MCSymbol*, StubValueTy> GVStubs;
+
+    virtual void Anchor();  // Out of line virtual method.
+  public:
+    MachineModuleInfoELF(const MachineModuleInfo &) {}
+
+    StubValueTy &getGVStubEntry(MCSymbol *Sym) {
+      assert(Sym && "Key cannot be null");
+      return GVStubs[Sym];
+    }
+
+    /// Accessor methods to return the set of stubs in sorted order.
+
+    SymbolListTy GetGVStubList() const {
+      return GetSortedStubs(GVStubs);
+    }
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineOperand.h b/final/include/llvm/CodeGen/MachineOperand.h
new file mode 100644
index 00000000000..8acc9490d8d
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineOperand.h
@@ -0,0 +1,564 @@
+//===-- llvm/CodeGen/MachineOperand.h - MachineOperand class ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MachineOperand class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEOPERAND_H
+#define LLVM_CODEGEN_MACHINEOPERAND_H
+
+#include "llvm/Support/DataTypes.h"
+#include <cassert>
+
+namespace llvm {
+
+class BlockAddress;
+class ConstantFP;
+class GlobalValue;
+class MachineBasicBlock;
+class MachineInstr;
+class MachineRegisterInfo;
+class MDNode;
+class TargetMachine;
+class TargetRegisterInfo;
+class raw_ostream;
+class MCSymbol;
+
+/// MachineOperand class - Representation of each machine instruction operand.
+///
+class MachineOperand {
+public:
+  enum MachineOperandType {
+    MO_Register,               ///< Register operand.
+    MO_Immediate,              ///< Immediate operand
+    MO_FPImmediate,            ///< Floating-point immediate operand
+    MO_MachineBasicBlock,      ///< MachineBasicBlock reference
+    MO_FrameIndex,             ///< Abstract Stack Frame Index
+    MO_ConstantPoolIndex,      ///< Address of indexed Constant in Constant Pool
+    MO_JumpTableIndex,         ///< Address of indexed Jump Table for switch
+    MO_ExternalSymbol,         ///< Name of external global symbol
+    MO_GlobalAddress,          ///< Address of a global value
+    MO_BlockAddress,           ///< Address of a basic block
+    MO_Metadata,               ///< Metadata reference (for debug info)
+    MO_MCSymbol                ///< MCSymbol reference (for debug/eh info)
+  };
+
+private:
+  /// OpKind - Specify what kind of operand this is.  This discriminates the
+  /// union.
+  unsigned char OpKind; // MachineOperandType
+
+  /// SubReg - Subregister number, only valid for MO_Register.  A value of 0
+  /// indicates the MO_Register has no subReg.
+  unsigned char SubReg;
+
+  /// TargetFlags - This is a set of target-specific operand flags.
+  unsigned char TargetFlags;
+
+  /// IsDef/IsImp/IsKill/IsDead flags - These are only valid for MO_Register
+  /// operands.
+
+  /// IsDef - True if this is a def, false if this is a use of the register.
+  ///
+  bool IsDef : 1;
+
+  /// IsImp - True if this is an implicit def or use, false if it is explicit.
+  ///
+  bool IsImp : 1;
+
+  /// IsKill - True if this instruction is the last use of the register on this
+  /// path through the function.  This is only valid on uses of registers.
+  bool IsKill : 1;
+
+  /// IsDead - True if this register is never used by a subsequent instruction.
+  /// This is only valid on definitions of registers.
+  bool IsDead : 1;
+
+  /// IsUndef - True if this is a register def / use of "undef", i.e. register
+  /// defined by an IMPLICIT_DEF. This is only valid on registers.
+  bool IsUndef : 1;
+
+  /// IsEarlyClobber - True if this MO_Register 'def' operand is written to
+  /// by the MachineInstr before all input registers are read.  This is used to
+  /// model the GCC inline asm '&' constraint modifier.
+  bool IsEarlyClobber : 1;
+
+  /// IsDebug - True if this MO_Register 'use' operand is in a debug pseudo,
+  /// not a real instruction.  Such uses should be ignored during codegen.
+  bool IsDebug : 1;
+
+  /// SmallContents - Thisreally should be part of the Contents union, but lives
+  /// out here so we can get a better packed struct.
+  /// MO_Register: Register number.
+  /// OffsetedInfo: Low bits of offset.
+  union {
+    unsigned RegNo;           // For MO_Register.
+    unsigned OffsetLo;        // Matches Contents.OffsetedInfo.OffsetHi.
+  } SmallContents;
+
+  /// ParentMI - This is the instruction that this operand is embedded into.
+  /// This is valid for all operand types, when the operand is in an instr.
+  MachineInstr *ParentMI;
+
+  /// Contents union - This contains the payload for the various operand types.
+  union {
+    MachineBasicBlock *MBB;   // For MO_MachineBasicBlock.
+    const ConstantFP *CFP;    // For MO_FPImmediate.
+    int64_t ImmVal;           // For MO_Immediate.
+    const MDNode *MD;         // For MO_Metadata.
+    MCSymbol *Sym;            // For MO_MCSymbol
+
+    struct {                  // For MO_Register.
+      // Register number is in SmallContents.RegNo.
+      MachineOperand **Prev;  // Access list for register.
+      MachineOperand *Next;
+    } Reg;
+
+    /// OffsetedInfo - This struct contains the offset and an object identifier.
+    /// this represent the object as with an optional offset from it.
+    struct {
+      union {
+        int Index;                // For MO_*Index - The index itself.
+        const char *SymbolName;   // For MO_ExternalSymbol.
+        const GlobalValue *GV;    // For MO_GlobalAddress.
+        const BlockAddress *BA;   // For MO_BlockAddress.
+      } Val;
+      // Low bits of offset are in SmallContents.OffsetLo.
+      int OffsetHi;               // An offset from the object, high 32 bits.
+    } OffsetedInfo;
+  } Contents;
+
+  explicit MachineOperand(MachineOperandType K) : OpKind(K), ParentMI(0) {
+    TargetFlags = 0;
+  }
+public:
+  /// getType - Returns the MachineOperandType for this operand.
+  ///
+  MachineOperandType getType() const { return (MachineOperandType)OpKind; }
+
+  unsigned char getTargetFlags() const { return TargetFlags; }
+  void setTargetFlags(unsigned char F) { TargetFlags = F; }
+  void addTargetFlag(unsigned char F) { TargetFlags |= F; }
+
+
+  /// getParent - Return the instruction that this operand belongs to.
+  ///
+  MachineInstr *getParent() { return ParentMI; }
+  const MachineInstr *getParent() const { return ParentMI; }
+
+  /// clearParent - Reset the parent pointer.
+  ///
+  /// The MachineOperand copy constructor also copies ParentMI, expecting the
+  /// original to be deleted. If a MachineOperand is ever stored outside a
+  /// MachineInstr, the parent pointer must be cleared.
+  ///
+  /// Never call clearParent() on an operand in a MachineInstr.
+  ///
+  void clearParent() { ParentMI = 0; }
+
+  void print(raw_ostream &os, const TargetMachine *TM = 0) const;
+
+  //===--------------------------------------------------------------------===//
+  // Accessors that tell you what kind of MachineOperand you're looking at.
+  //===--------------------------------------------------------------------===//
+
+  /// isReg - Tests if this is a MO_Register operand.
+  bool isReg() const { return OpKind == MO_Register; }
+  /// isImm - Tests if this is a MO_Immediate operand.
+  bool isImm() const { return OpKind == MO_Immediate; }
+  /// isFPImm - Tests if this is a MO_FPImmediate operand.
+  bool isFPImm() const { return OpKind == MO_FPImmediate; }
+  /// isMBB - Tests if this is a MO_MachineBasicBlock operand.
+  bool isMBB() const { return OpKind == MO_MachineBasicBlock; }
+  /// isFI - Tests if this is a MO_FrameIndex operand.
+  bool isFI() const { return OpKind == MO_FrameIndex; }
+  /// isCPI - Tests if this is a MO_ConstantPoolIndex operand.
+  bool isCPI() const { return OpKind == MO_ConstantPoolIndex; }
+  /// isJTI - Tests if this is a MO_JumpTableIndex operand.
+  bool isJTI() const { return OpKind == MO_JumpTableIndex; }
+  /// isGlobal - Tests if this is a MO_GlobalAddress operand.
+  bool isGlobal() const { return OpKind == MO_GlobalAddress; }
+  /// isSymbol - Tests if this is a MO_ExternalSymbol operand.
+  bool isSymbol() const { return OpKind == MO_ExternalSymbol; }
+  /// isBlockAddress - Tests if this is a MO_BlockAddress operand.
+  bool isBlockAddress() const { return OpKind == MO_BlockAddress; }
+  /// isMetadata - Tests if this is a MO_Metadata operand.
+  bool isMetadata() const { return OpKind == MO_Metadata; }
+  bool isMCSymbol() const { return OpKind == MO_MCSymbol; }
+
+  //===--------------------------------------------------------------------===//
+  // Accessors for Register Operands
+  //===--------------------------------------------------------------------===//
+
+  /// getReg - Returns the register number.
+  unsigned getReg() const {
+    assert(isReg() && "This is not a register operand!");
+    return SmallContents.RegNo;
+  }
+
+  unsigned getSubReg() const {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    return (unsigned)SubReg;
+  }
+
+  bool isUse() const {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    return !IsDef;
+  }
+
+  bool isDef() const {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    return IsDef;
+  }
+
+  bool isImplicit() const {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    return IsImp;
+  }
+
+  bool isDead() const {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    return IsDead;
+  }
+
+  bool isKill() const {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    return IsKill;
+  }
+
+  bool isUndef() const {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    return IsUndef;
+  }
+
+  bool isEarlyClobber() const {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    return IsEarlyClobber;
+  }
+
+  bool isDebug() const {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    return IsDebug;
+  }
+
+  /// getNextOperandForReg - Return the next MachineOperand in the function that
+  /// uses or defines this register.
+  MachineOperand *getNextOperandForReg() const {
+    assert(isReg() && "This is not a register operand!");
+    return Contents.Reg.Next;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Mutators for Register Operands
+  //===--------------------------------------------------------------------===//
+
+  /// Change the register this operand corresponds to.
+  ///
+  void setReg(unsigned Reg);
+
+  void setSubReg(unsigned subReg) {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    SubReg = (unsigned char)subReg;
+  }
+
+  /// substVirtReg - Substitute the current register with the virtual
+  /// subregister Reg:SubReg. Take any existing SubReg index into account,
+  /// using TargetRegisterInfo to compose the subreg indices if necessary.
+  /// Reg must be a virtual register, SubIdx can be 0.
+  ///
+  void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo&);
+
+  /// substPhysReg - Substitute the current register with the physical register
+  /// Reg, taking any existing SubReg into account. For instance,
+  /// substPhysReg(%EAX) will change %reg1024:sub_8bit to %AL.
+  ///
+  void substPhysReg(unsigned Reg, const TargetRegisterInfo&);
+
+  void setIsUse(bool Val = true) {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    assert((Val || !isDebug()) && "Marking a debug operation as def");
+    IsDef = !Val;
+  }
+
+  void setIsDef(bool Val = true) {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    assert((!Val || !isDebug()) && "Marking a debug operation as def");
+    IsDef = Val;
+  }
+
+  void setImplicit(bool Val = true) {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    IsImp = Val;
+  }
+
+  void setIsKill(bool Val = true) {
+    assert(isReg() && !IsDef && "Wrong MachineOperand accessor");
+    assert((!Val || !isDebug()) && "Marking a debug operation as kill");
+    IsKill = Val;
+  }
+
+  void setIsDead(bool Val = true) {
+    assert(isReg() && IsDef && "Wrong MachineOperand accessor");
+    IsDead = Val;
+  }
+
+  void setIsUndef(bool Val = true) {
+    assert(isReg() && "Wrong MachineOperand accessor");
+    IsUndef = Val;
+  }
+
+  void setIsEarlyClobber(bool Val = true) {
+    assert(isReg() && IsDef && "Wrong MachineOperand accessor");
+    IsEarlyClobber = Val;
+  }
+
+  void setIsDebug(bool Val = true) {
+    assert(isReg() && IsDef && "Wrong MachineOperand accessor");
+    IsDebug = Val;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Accessors for various operand types.
+  //===--------------------------------------------------------------------===//
+
+  int64_t getImm() const {
+    assert(isImm() && "Wrong MachineOperand accessor");
+    return Contents.ImmVal;
+  }
+
+  const ConstantFP *getFPImm() const {
+    assert(isFPImm() && "Wrong MachineOperand accessor");
+    return Contents.CFP;
+  }
+
+  MachineBasicBlock *getMBB() const {
+    assert(isMBB() && "Wrong MachineOperand accessor");
+    return Contents.MBB;
+  }
+
+  int getIndex() const {
+    assert((isFI() || isCPI() || isJTI()) &&
+           "Wrong MachineOperand accessor");
+    return Contents.OffsetedInfo.Val.Index;
+  }
+
+  const GlobalValue *getGlobal() const {
+    assert(isGlobal() && "Wrong MachineOperand accessor");
+    return Contents.OffsetedInfo.Val.GV;
+  }
+
+  const BlockAddress *getBlockAddress() const {
+    assert(isBlockAddress() && "Wrong MachineOperand accessor");
+    return Contents.OffsetedInfo.Val.BA;
+  }
+
+  MCSymbol *getMCSymbol() const {
+    assert(isMCSymbol() && "Wrong MachineOperand accessor");
+    return Contents.Sym;
+  }
+
+  /// getOffset - Return the offset from the symbol in this operand. This always
+  /// returns 0 for ExternalSymbol operands.
+  int64_t getOffset() const {
+    assert((isGlobal() || isSymbol() || isCPI() || isBlockAddress()) &&
+           "Wrong MachineOperand accessor");
+    return (int64_t(Contents.OffsetedInfo.OffsetHi) << 32) |
+           SmallContents.OffsetLo;
+  }
+
+  const char *getSymbolName() const {
+    assert(isSymbol() && "Wrong MachineOperand accessor");
+    return Contents.OffsetedInfo.Val.SymbolName;
+  }
+
+  const MDNode *getMetadata() const {
+    assert(isMetadata() && "Wrong MachineOperand accessor");
+    return Contents.MD;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Mutators for various operand types.
+  //===--------------------------------------------------------------------===//
+
+  void setImm(int64_t immVal) {
+    assert(isImm() && "Wrong MachineOperand mutator");
+    Contents.ImmVal = immVal;
+  }
+
+  void setOffset(int64_t Offset) {
+    assert((isGlobal() || isSymbol() || isCPI() || isBlockAddress()) &&
+        "Wrong MachineOperand accessor");
+    SmallContents.OffsetLo = unsigned(Offset);
+    Contents.OffsetedInfo.OffsetHi = int(Offset >> 32);
+  }
+
+  void setIndex(int Idx) {
+    assert((isFI() || isCPI() || isJTI()) &&
+           "Wrong MachineOperand accessor");
+    Contents.OffsetedInfo.Val.Index = Idx;
+  }
+
+  void setMBB(MachineBasicBlock *MBB) {
+    assert(isMBB() && "Wrong MachineOperand accessor");
+    Contents.MBB = MBB;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Other methods.
+  //===--------------------------------------------------------------------===//
+
+  /// isIdenticalTo - Return true if this operand is identical to the specified
+  /// operand. Note: This method ignores isKill and isDead properties.
+  bool isIdenticalTo(const MachineOperand &Other) const;
+
+  /// ChangeToImmediate - Replace this operand with a new immediate operand of
+  /// the specified value.  If an operand is known to be an immediate already,
+  /// the setImm method should be used.
+  void ChangeToImmediate(int64_t ImmVal);
+
+  /// ChangeToRegister - Replace this operand with a new register operand of
+  /// the specified value.  If an operand is known to be an register already,
+  /// the setReg method should be used.
+  void ChangeToRegister(unsigned Reg, bool isDef, bool isImp = false,
+                        bool isKill = false, bool isDead = false,
+                        bool isUndef = false, bool isDebug = false);
+
+  //===--------------------------------------------------------------------===//
+  // Construction methods.
+  //===--------------------------------------------------------------------===//
+
+  static MachineOperand CreateImm(int64_t Val) {
+    MachineOperand Op(MachineOperand::MO_Immediate);
+    Op.setImm(Val);
+    return Op;
+  }
+
+  static MachineOperand CreateFPImm(const ConstantFP *CFP) {
+    MachineOperand Op(MachineOperand::MO_FPImmediate);
+    Op.Contents.CFP = CFP;
+    return Op;
+  }
+
+  static MachineOperand CreateReg(unsigned Reg, bool isDef, bool isImp = false,
+                                  bool isKill = false, bool isDead = false,
+                                  bool isUndef = false,
+                                  bool isEarlyClobber = false,
+                                  unsigned SubReg = 0,
+                                  bool isDebug = false) {
+    MachineOperand Op(MachineOperand::MO_Register);
+    Op.IsDef = isDef;
+    Op.IsImp = isImp;
+    Op.IsKill = isKill;
+    Op.IsDead = isDead;
+    Op.IsUndef = isUndef;
+    Op.IsEarlyClobber = isEarlyClobber;
+    Op.IsDebug = isDebug;
+    Op.SmallContents.RegNo = Reg;
+    Op.Contents.Reg.Prev = 0;
+    Op.Contents.Reg.Next = 0;
+    Op.SubReg = SubReg;
+    return Op;
+  }
+  static MachineOperand CreateMBB(MachineBasicBlock *MBB,
+                                  unsigned char TargetFlags = 0) {
+    MachineOperand Op(MachineOperand::MO_MachineBasicBlock);
+    Op.setMBB(MBB);
+    Op.setTargetFlags(TargetFlags);
+    return Op;
+  }
+  static MachineOperand CreateFI(unsigned Idx) {
+    MachineOperand Op(MachineOperand::MO_FrameIndex);
+    Op.setIndex(Idx);
+    return Op;
+  }
+  static MachineOperand CreateCPI(unsigned Idx, int Offset,
+                                  unsigned char TargetFlags = 0) {
+    MachineOperand Op(MachineOperand::MO_ConstantPoolIndex);
+    Op.setIndex(Idx);
+    Op.setOffset(Offset);
+    Op.setTargetFlags(TargetFlags);
+    return Op;
+  }
+  static MachineOperand CreateJTI(unsigned Idx,
+                                  unsigned char TargetFlags = 0) {
+    MachineOperand Op(MachineOperand::MO_JumpTableIndex);
+    Op.setIndex(Idx);
+    Op.setTargetFlags(TargetFlags);
+    return Op;
+  }
+  static MachineOperand CreateGA(const GlobalValue *GV, int64_t Offset,
+                                 unsigned char TargetFlags = 0) {
+    MachineOperand Op(MachineOperand::MO_GlobalAddress);
+    Op.Contents.OffsetedInfo.Val.GV = GV;
+    Op.setOffset(Offset);
+    Op.setTargetFlags(TargetFlags);
+    return Op;
+  }
+  static MachineOperand CreateES(const char *SymName,
+                                 unsigned char TargetFlags = 0) {
+    MachineOperand Op(MachineOperand::MO_ExternalSymbol);
+    Op.Contents.OffsetedInfo.Val.SymbolName = SymName;
+    Op.setOffset(0); // Offset is always 0.
+    Op.setTargetFlags(TargetFlags);
+    return Op;
+  }
+  static MachineOperand CreateBA(const BlockAddress *BA,
+                                 unsigned char TargetFlags = 0) {
+    MachineOperand Op(MachineOperand::MO_BlockAddress);
+    Op.Contents.OffsetedInfo.Val.BA = BA;
+    Op.setOffset(0); // Offset is always 0.
+    Op.setTargetFlags(TargetFlags);
+    return Op;
+  }
+  static MachineOperand CreateMetadata(const MDNode *Meta) {
+    MachineOperand Op(MachineOperand::MO_Metadata);
+    Op.Contents.MD = Meta;
+    return Op;
+  }
+
+  static MachineOperand CreateMCSymbol(MCSymbol *Sym) {
+    MachineOperand Op(MachineOperand::MO_MCSymbol);
+    Op.Contents.Sym = Sym;
+    return Op;
+  }
+
+  friend class MachineInstr;
+  friend class MachineRegisterInfo;
+private:
+  //===--------------------------------------------------------------------===//
+  // Methods for handling register use/def lists.
+  //===--------------------------------------------------------------------===//
+
+  /// isOnRegUseList - Return true if this operand is on a register use/def list
+  /// or false if not.  This can only be called for register operands that are
+  /// part of a machine instruction.
+  bool isOnRegUseList() const {
+    assert(isReg() && "Can only add reg operand to use lists");
+    return Contents.Reg.Prev != 0;
+  }
+
+  /// AddRegOperandToRegInfo - Add this register operand to the specified
+  /// MachineRegisterInfo.  If it is null, then the next/prev fields should be
+  /// explicitly nulled out.
+  void AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo);
+
+  /// RemoveRegOperandFromRegInfo - Remove this register operand from the
+  /// MachineRegisterInfo it is linked with.
+  void RemoveRegOperandFromRegInfo();
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const MachineOperand& MO) {
+  MO.print(OS, 0);
+  return OS;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachinePassRegistry.h b/final/include/llvm/CodeGen/MachinePassRegistry.h
new file mode 100644
index 00000000000..6ee2e90a9f5
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachinePassRegistry.h
@@ -0,0 +1,156 @@
+//===-- llvm/CodeGen/MachinePassRegistry.h ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the mechanics for machine function pass registries.  A
+// function pass registry (MachinePassRegistry) is auto filled by the static
+// constructors of MachinePassRegistryNode.  Further there is a command line
+// parser (RegisterPassParser) which listens to each registry for additions
+// and deletions, so that the appropriate command option is updated.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEPASSREGISTRY_H
+#define LLVM_CODEGEN_MACHINEPASSREGISTRY_H
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+
+typedef void *(*MachinePassCtor)();
+
+
+//===----------------------------------------------------------------------===// 
+///
+/// MachinePassRegistryListener - Listener to adds and removals of nodes in
+/// registration list.
+///
+//===----------------------------------------------------------------------===//
+class MachinePassRegistryListener {
+public:
+  MachinePassRegistryListener() {}
+  virtual ~MachinePassRegistryListener() {}
+  virtual void NotifyAdd(const char *N, MachinePassCtor C, const char *D) = 0;
+  virtual void NotifyRemove(const char *N) = 0;
+};
+
+
+//===----------------------------------------------------------------------===// 
+///
+/// MachinePassRegistryNode - Machine pass node stored in registration list.
+///
+//===----------------------------------------------------------------------===//
+class MachinePassRegistryNode {
+
+private:
+
+  MachinePassRegistryNode *Next;        // Next function pass in list.
+  const char *Name;                     // Name of function pass.
+  const char *Description;              // Description string.
+  MachinePassCtor Ctor;                 // Function pass creator.
+  
+public:
+
+  MachinePassRegistryNode(const char *N, const char *D, MachinePassCtor C)
+  : Next(NULL)
+  , Name(N)
+  , Description(D)
+  , Ctor(C)
+  {}
+
+  // Accessors
+  MachinePassRegistryNode *getNext()      const { return Next; }
+  MachinePassRegistryNode **getNextAddress()    { return &Next; }
+  const char *getName()                   const { return Name; }
+  const char *getDescription()            const { return Description; }
+  MachinePassCtor getCtor()               const { return Ctor; }
+  void setNext(MachinePassRegistryNode *N)      { Next = N; }
+  
+};
+
+
+//===----------------------------------------------------------------------===// 
+///
+/// MachinePassRegistry - Track the registration of machine passes.
+///
+//===----------------------------------------------------------------------===//
+class MachinePassRegistry {
+
+private:
+
+  MachinePassRegistryNode *List;        // List of registry nodes.
+  MachinePassCtor Default;              // Default function pass creator.
+  MachinePassRegistryListener* Listener;// Listener for list adds are removes.
+  
+public:
+
+  // NO CONSTRUCTOR - we don't want static constructor ordering to mess
+  // with the registry.
+
+  // Accessors.
+  //
+  MachinePassRegistryNode *getList()                    { return List; }
+  MachinePassCtor getDefault()                          { return Default; }
+  void setDefault(MachinePassCtor C)                    { Default = C; }
+  void setListener(MachinePassRegistryListener *L)      { Listener = L; }
+
+  /// Add - Adds a function pass to the registration list.
+  ///
+  void Add(MachinePassRegistryNode *Node);
+
+  /// Remove - Removes a function pass from the registration list.
+  ///
+  void Remove(MachinePassRegistryNode *Node);
+
+};
+
+
+//===----------------------------------------------------------------------===//
+///
+/// RegisterPassParser class - Handle the addition of new machine passes.
+///
+//===----------------------------------------------------------------------===//
+template<class RegistryClass>
+class RegisterPassParser : public MachinePassRegistryListener,
+                   public cl::parser<typename RegistryClass::FunctionPassCtor> {
+public:
+  RegisterPassParser() {}
+  ~RegisterPassParser() { RegistryClass::setListener(NULL); }
+
+  void initialize(cl::Option &O) {
+    cl::parser<typename RegistryClass::FunctionPassCtor>::initialize(O);
+    
+    // Add existing passes to option.
+    for (RegistryClass *Node = RegistryClass::getList();
+         Node; Node = Node->getNext()) {
+      this->addLiteralOption(Node->getName(),
+                      (typename RegistryClass::FunctionPassCtor)Node->getCtor(),
+                             Node->getDescription());
+    }
+    
+    // Make sure we listen for list changes.
+    RegistryClass::setListener(this);
+  }
+
+  // Implement the MachinePassRegistryListener callbacks.
+  //
+  virtual void NotifyAdd(const char *N,
+                         MachinePassCtor C,
+                         const char *D) {
+    this->addLiteralOption(N, (typename RegistryClass::FunctionPassCtor)C, D);
+  }
+  virtual void NotifyRemove(const char *N) {
+    this->removeLiteralOption(N);
+  }
+};
+
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineRegisterInfo.h b/final/include/llvm/CodeGen/MachineRegisterInfo.h
new file mode 100644
index 00000000000..74df8da20ed
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -0,0 +1,404 @@
+//===-- llvm/CodeGen/MachineRegisterInfo.h ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachineRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEREGISTERINFO_H
+#define LLVM_CODEGEN_MACHINEREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/IndexedMap.h"
+#include <vector>
+
+namespace llvm {
+  
+/// MachineRegisterInfo - Keep track of information for virtual and physical
+/// registers, including vreg register classes, use/def chains for registers,
+/// etc.
+class MachineRegisterInfo {
+  /// VRegInfo - Information we keep for each virtual register.
+  ///
+  /// Each element in this list contains the register class of the vreg and the
+  /// start of the use/def list for the register.
+  IndexedMap<std::pair<const TargetRegisterClass*, MachineOperand*>,
+             VirtReg2IndexFunctor> VRegInfo;
+
+  /// RegClassVRegMap - This vector acts as a map from TargetRegisterClass to
+  /// virtual registers. For each target register class, it keeps a list of
+  /// virtual registers belonging to the class.
+  std::vector<unsigned> *RegClass2VRegMap;
+
+  /// RegAllocHints - This vector records register allocation hints for virtual
+  /// registers. For each virtual register, it keeps a register and hint type
+  /// pair making up the allocation hint. Hint type is target specific except
+  /// for the value 0 which means the second value of the pair is the preferred
+  /// register for allocation. For example, if the hint is <0, 1024>, it means
+  /// the allocator should prefer the physical register allocated to the virtual
+  /// register of the hint.
+  IndexedMap<std::pair<unsigned, unsigned>, VirtReg2IndexFunctor> RegAllocHints;
+  
+  /// PhysRegUseDefLists - This is an array of the head of the use/def list for
+  /// physical registers.
+  MachineOperand **PhysRegUseDefLists; 
+  
+  /// UsedPhysRegs - This is a bit vector that is computed and set by the
+  /// register allocator, and must be kept up to date by passes that run after
+  /// register allocation (though most don't modify this).  This is used
+  /// so that the code generator knows which callee save registers to save and
+  /// for other target specific uses.
+  BitVector UsedPhysRegs;
+  
+  /// LiveIns/LiveOuts - Keep track of the physical registers that are
+  /// livein/liveout of the function.  Live in values are typically arguments in
+  /// registers, live out values are typically return values in registers.
+  /// LiveIn values are allowed to have virtual registers associated with them,
+  /// stored in the second element.
+  std::vector<std::pair<unsigned, unsigned> > LiveIns;
+  std::vector<unsigned> LiveOuts;
+  
+  MachineRegisterInfo(const MachineRegisterInfo&); // DO NOT IMPLEMENT
+  void operator=(const MachineRegisterInfo&);      // DO NOT IMPLEMENT
+public:
+  explicit MachineRegisterInfo(const TargetRegisterInfo &TRI);
+  ~MachineRegisterInfo();
+  
+  //===--------------------------------------------------------------------===//
+  // Register Info
+  //===--------------------------------------------------------------------===//
+
+  /// reg_begin/reg_end - Provide iteration support to walk over all definitions
+  /// and uses of a register within the MachineFunction that corresponds to this
+  /// MachineRegisterInfo object.
+  template<bool Uses, bool Defs, bool SkipDebug>
+  class defusechain_iterator;
+
+  /// reg_iterator/reg_begin/reg_end - Walk all defs and uses of the specified
+  /// register.
+  typedef defusechain_iterator<true,true,false> reg_iterator;
+  reg_iterator reg_begin(unsigned RegNo) const {
+    return reg_iterator(getRegUseDefListHead(RegNo));
+  }
+  static reg_iterator reg_end() { return reg_iterator(0); }
+
+  /// reg_empty - Return true if there are no instructions using or defining the
+  /// specified register (it may be live-in).
+  bool reg_empty(unsigned RegNo) const { return reg_begin(RegNo) == reg_end(); }
+
+  /// reg_nodbg_iterator/reg_nodbg_begin/reg_nodbg_end - Walk all defs and uses
+  /// of the specified register, skipping those marked as Debug.
+  typedef defusechain_iterator<true,true,true> reg_nodbg_iterator;
+  reg_nodbg_iterator reg_nodbg_begin(unsigned RegNo) const {
+    return reg_nodbg_iterator(getRegUseDefListHead(RegNo));
+  }
+  static reg_nodbg_iterator reg_nodbg_end() { return reg_nodbg_iterator(0); }
+
+  /// reg_nodbg_empty - Return true if the only instructions using or defining
+  /// Reg are Debug instructions.
+  bool reg_nodbg_empty(unsigned RegNo) const {
+    return reg_nodbg_begin(RegNo) == reg_nodbg_end();
+  }
+
+  /// def_iterator/def_begin/def_end - Walk all defs of the specified register.
+  typedef defusechain_iterator<false,true,false> def_iterator;
+  def_iterator def_begin(unsigned RegNo) const {
+    return def_iterator(getRegUseDefListHead(RegNo));
+  }
+  static def_iterator def_end() { return def_iterator(0); }
+
+  /// def_empty - Return true if there are no instructions defining the
+  /// specified register (it may be live-in).
+  bool def_empty(unsigned RegNo) const { return def_begin(RegNo) == def_end(); }
+
+  /// use_iterator/use_begin/use_end - Walk all uses of the specified register.
+  typedef defusechain_iterator<true,false,false> use_iterator;
+  use_iterator use_begin(unsigned RegNo) const {
+    return use_iterator(getRegUseDefListHead(RegNo));
+  }
+  static use_iterator use_end() { return use_iterator(0); }
+  
+  /// use_empty - Return true if there are no instructions using the specified
+  /// register.
+  bool use_empty(unsigned RegNo) const { return use_begin(RegNo) == use_end(); }
+
+  /// hasOneUse - Return true if there is exactly one instruction using the
+  /// specified register.
+  bool hasOneUse(unsigned RegNo) const;
+
+  /// use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the
+  /// specified register, skipping those marked as Debug.
+  typedef defusechain_iterator<true,false,true> use_nodbg_iterator;
+  use_nodbg_iterator use_nodbg_begin(unsigned RegNo) const {
+    return use_nodbg_iterator(getRegUseDefListHead(RegNo));
+  }
+  static use_nodbg_iterator use_nodbg_end() { return use_nodbg_iterator(0); }
+  
+  /// use_nodbg_empty - Return true if there are no non-Debug instructions
+  /// using the specified register.
+  bool use_nodbg_empty(unsigned RegNo) const {
+    return use_nodbg_begin(RegNo) == use_nodbg_end();
+  }
+
+  /// hasOneNonDBGUse - Return true if there is exactly one non-Debug
+  /// instruction using the specified register.
+  bool hasOneNonDBGUse(unsigned RegNo) const;
+
+  /// replaceRegWith - Replace all instances of FromReg with ToReg in the
+  /// machine function.  This is like llvm-level X->replaceAllUsesWith(Y),
+  /// except that it also changes any definitions of the register as well.
+  void replaceRegWith(unsigned FromReg, unsigned ToReg);
+  
+  /// getRegUseDefListHead - Return the head pointer for the register use/def
+  /// list for the specified virtual or physical register.
+  MachineOperand *&getRegUseDefListHead(unsigned RegNo) {
+    if (TargetRegisterInfo::isVirtualRegister(RegNo))
+      return VRegInfo[RegNo].second;
+    return PhysRegUseDefLists[RegNo];
+  }
+  
+  MachineOperand *getRegUseDefListHead(unsigned RegNo) const {
+    if (TargetRegisterInfo::isVirtualRegister(RegNo))
+      return VRegInfo[RegNo].second;
+    return PhysRegUseDefLists[RegNo];
+  }
+
+  /// getVRegDef - Return the machine instr that defines the specified virtual
+  /// register or null if none is found.  This assumes that the code is in SSA
+  /// form, so there should only be one definition.
+  MachineInstr *getVRegDef(unsigned Reg) const;
+
+  /// clearKillFlags - Iterate over all the uses of the given register and
+  /// clear the kill flag from the MachineOperand. This function is used by
+  /// optimization passes which extend register lifetimes and need only
+  /// preserve conservative kill flag information.
+  void clearKillFlags(unsigned Reg) const;
+  
+#ifndef NDEBUG
+  void dumpUses(unsigned RegNo) const;
+#endif
+  
+  //===--------------------------------------------------------------------===//
+  // Virtual Register Info
+  //===--------------------------------------------------------------------===//
+  
+  /// getRegClass - Return the register class of the specified virtual register.
+  ///
+  const TargetRegisterClass *getRegClass(unsigned Reg) const {
+    return VRegInfo[Reg].first;
+  }
+
+  /// setRegClass - Set the register class of the specified virtual register.
+  ///
+  void setRegClass(unsigned Reg, const TargetRegisterClass *RC);
+
+  /// constrainRegClass - Constrain the register class of the specified virtual
+  /// register to be a common subclass of RC and the current register class.
+  /// Return the new register class, or NULL if no such class exists.
+  /// This should only be used when the constraint is known to be trivial, like
+  /// GR32 -> GR32_NOSP. Beware of increasing register pressure.
+  const TargetRegisterClass *constrainRegClass(unsigned Reg,
+                                               const TargetRegisterClass *RC);
+
+  /// createVirtualRegister - Create and return a new virtual register in the
+  /// function with the specified register class.
+  ///
+  unsigned createVirtualRegister(const TargetRegisterClass *RegClass);
+
+  /// getNumVirtRegs - Return the number of virtual registers created.
+  ///
+  unsigned getNumVirtRegs() const { return VRegInfo.size(); }
+
+  /// getRegClassVirtRegs - Return the list of virtual registers of the given
+  /// target register class.
+  const std::vector<unsigned> &
+  getRegClassVirtRegs(const TargetRegisterClass *RC) const {
+    return RegClass2VRegMap[RC->getID()];
+  }
+
+  /// setRegAllocationHint - Specify a register allocation hint for the
+  /// specified virtual register.
+  void setRegAllocationHint(unsigned Reg, unsigned Type, unsigned PrefReg) {
+    RegAllocHints[Reg].first  = Type;
+    RegAllocHints[Reg].second = PrefReg;
+  }
+
+  /// getRegAllocationHint - Return the register allocation hint for the
+  /// specified virtual register.
+  std::pair<unsigned, unsigned>
+  getRegAllocationHint(unsigned Reg) const {
+    return RegAllocHints[Reg];
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Physical Register Use Info
+  //===--------------------------------------------------------------------===//
+  
+  /// isPhysRegUsed - Return true if the specified register is used in this
+  /// function.  This only works after register allocation.
+  bool isPhysRegUsed(unsigned Reg) const { return UsedPhysRegs[Reg]; }
+  
+  /// setPhysRegUsed - Mark the specified register used in this function.
+  /// This should only be called during and after register allocation.
+  void setPhysRegUsed(unsigned Reg) { UsedPhysRegs[Reg] = true; }
+
+  /// addPhysRegsUsed - Mark the specified registers used in this function.
+  /// This should only be called during and after register allocation.
+  void addPhysRegsUsed(const BitVector &Regs) { UsedPhysRegs |= Regs; }
+
+  /// setPhysRegUnused - Mark the specified register unused in this function.
+  /// This should only be called during and after register allocation.
+  void setPhysRegUnused(unsigned Reg) { UsedPhysRegs[Reg] = false; }
+
+  /// closePhysRegsUsed - Expand UsedPhysRegs to its transitive closure over
+  /// subregisters. That means that if R is used, so are all subregisters.
+  void closePhysRegsUsed(const TargetRegisterInfo&);
+
+  //===--------------------------------------------------------------------===//
+  // LiveIn/LiveOut Management
+  //===--------------------------------------------------------------------===//
+  
+  /// addLiveIn/Out - Add the specified register as a live in/out.  Note that it
+  /// is an error to add the same register to the same set more than once.
+  void addLiveIn(unsigned Reg, unsigned vreg = 0) {
+    LiveIns.push_back(std::make_pair(Reg, vreg));
+  }
+  void addLiveOut(unsigned Reg) { LiveOuts.push_back(Reg); }
+  
+  // Iteration support for live in/out sets.  These sets are kept in sorted
+  // order by their register number.
+  typedef std::vector<std::pair<unsigned,unsigned> >::const_iterator
+  livein_iterator;
+  typedef std::vector<unsigned>::const_iterator liveout_iterator;
+  livein_iterator livein_begin() const { return LiveIns.begin(); }
+  livein_iterator livein_end()   const { return LiveIns.end(); }
+  bool            livein_empty() const { return LiveIns.empty(); }
+  liveout_iterator liveout_begin() const { return LiveOuts.begin(); }
+  liveout_iterator liveout_end()   const { return LiveOuts.end(); }
+  bool             liveout_empty() const { return LiveOuts.empty(); }
+
+  bool isLiveIn(unsigned Reg) const;
+  bool isLiveOut(unsigned Reg) const;
+
+  /// getLiveInPhysReg - If VReg is a live-in virtual register, return the
+  /// corresponding live-in physical register.
+  unsigned getLiveInPhysReg(unsigned VReg) const;
+
+  /// getLiveInVirtReg - If PReg is a live-in physical register, return the
+  /// corresponding live-in physical register.
+  unsigned getLiveInVirtReg(unsigned PReg) const;
+
+  /// EmitLiveInCopies - Emit copies to initialize livein virtual registers
+  /// into the given entry block.
+  void EmitLiveInCopies(MachineBasicBlock *EntryMBB,
+                        const TargetRegisterInfo &TRI,
+                        const TargetInstrInfo &TII);
+
+private:
+  void HandleVRegListReallocation();
+  
+public:
+  /// defusechain_iterator - This class provides iterator support for machine
+  /// operands in the function that use or define a specific register.  If
+  /// ReturnUses is true it returns uses of registers, if ReturnDefs is true it
+  /// returns defs.  If neither are true then you are silly and it always
+  /// returns end().  If SkipDebug is true it skips uses marked Debug
+  /// when incrementing.
+  template<bool ReturnUses, bool ReturnDefs, bool SkipDebug>
+  class defusechain_iterator
+    : public std::iterator<std::forward_iterator_tag, MachineInstr, ptrdiff_t> {
+    MachineOperand *Op;
+    explicit defusechain_iterator(MachineOperand *op) : Op(op) {
+      // If the first node isn't one we're interested in, advance to one that
+      // we are interested in.
+      if (op) {
+        if ((!ReturnUses && op->isUse()) ||
+            (!ReturnDefs && op->isDef()) ||
+            (SkipDebug && op->isDebug()))
+          ++*this;
+      }
+    }
+    friend class MachineRegisterInfo;
+  public:
+    typedef std::iterator<std::forward_iterator_tag,
+                          MachineInstr, ptrdiff_t>::reference reference;
+    typedef std::iterator<std::forward_iterator_tag,
+                          MachineInstr, ptrdiff_t>::pointer pointer;
+    
+    defusechain_iterator(const defusechain_iterator &I) : Op(I.Op) {}
+    defusechain_iterator() : Op(0) {}
+    
+    bool operator==(const defusechain_iterator &x) const {
+      return Op == x.Op;
+    }
+    bool operator!=(const defusechain_iterator &x) const {
+      return !operator==(x);
+    }
+    
+    /// atEnd - return true if this iterator is equal to reg_end() on the value.
+    bool atEnd() const { return Op == 0; }
+    
+    // Iterator traversal: forward iteration only
+    defusechain_iterator &operator++() {          // Preincrement
+      assert(Op && "Cannot increment end iterator!");
+      Op = Op->getNextOperandForReg();
+      
+      // If this is an operand we don't care about, skip it.
+      while (Op && ((!ReturnUses && Op->isUse()) || 
+                    (!ReturnDefs && Op->isDef()) ||
+                    (SkipDebug && Op->isDebug())))
+        Op = Op->getNextOperandForReg();
+      
+      return *this;
+    }
+    defusechain_iterator operator++(int) {        // Postincrement
+      defusechain_iterator tmp = *this; ++*this; return tmp;
+    }
+
+    /// skipInstruction - move forward until reaching a different instruction.
+    /// Return the skipped instruction that is no longer pointed to, or NULL if
+    /// already pointing to end().
+    MachineInstr *skipInstruction() {
+      if (!Op) return 0;
+      MachineInstr *MI = Op->getParent();
+      do ++*this;
+      while (Op && Op->getParent() == MI);
+      return MI;
+    }
+
+    MachineOperand &getOperand() const {
+      assert(Op && "Cannot dereference end iterator!");
+      return *Op;
+    }
+    
+    /// getOperandNo - Return the operand # of this MachineOperand in its
+    /// MachineInstr.
+    unsigned getOperandNo() const {
+      assert(Op && "Cannot dereference end iterator!");
+      return Op - &Op->getParent()->getOperand(0);
+    }
+    
+    // Retrieve a reference to the current operand.
+    MachineInstr &operator*() const {
+      assert(Op && "Cannot dereference end iterator!");
+      return *Op->getParent();
+    }
+    
+    MachineInstr *operator->() const {
+      assert(Op && "Cannot dereference end iterator!");
+      return Op->getParent();
+    }
+  };
+  
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineRelocation.h b/final/include/llvm/CodeGen/MachineRelocation.h
new file mode 100644
index 00000000000..244b466e172
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineRelocation.h
@@ -0,0 +1,342 @@
+//===-- llvm/CodeGen/MachineRelocation.h - Target Relocation ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachineRelocation class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINERELOCATION_H
+#define LLVM_CODEGEN_MACHINERELOCATION_H
+
+#include "llvm/Support/DataTypes.h"
+#include <cassert>
+
+namespace llvm {
+class GlobalValue;
+class MachineBasicBlock;
+
+/// MachineRelocation - This represents a target-specific relocation value,
+/// produced by the code emitter.  This relocation is resolved after the has
+/// been emitted, either to an object file or to memory, when the target of the
+/// relocation can be resolved.
+///
+/// A relocation is made up of the following logical portions:
+///   1. An offset in the machine code buffer, the location to modify.
+///   2. A target specific relocation type (a number from 0 to 63).
+///   3. A symbol being referenced, either as a GlobalValue* or as a string.
+///   4. An optional constant value to be added to the reference.
+///   5. A bit, CanRewrite, which indicates to the JIT that a function stub is
+///      not needed for the relocation.
+///   6. An index into the GOT, if the target uses a GOT
+///
+class MachineRelocation {
+  enum AddressType {
+    isResult,         // Relocation has be transformed into its result pointer.
+    isGV,             // The Target.GV field is valid.
+    isIndirectSym,    // Relocation of an indirect symbol.
+    isBB,             // Relocation of BB address.
+    isExtSym,         // The Target.ExtSym field is valid.
+    isConstPool,      // Relocation of constant pool address.
+    isJumpTable,      // Relocation of jump table address.
+    isGOTIndex        // The Target.GOTIndex field is valid.
+  };
+  
+  /// Offset - This is the offset from the start of the code buffer of the
+  /// relocation to perform.
+  uintptr_t Offset;
+  
+  /// ConstantVal - A field that may be used by the target relocation type.
+  intptr_t ConstantVal;
+
+  union {
+    void *Result;           // If this has been resolved to a resolved pointer
+    GlobalValue *GV;        // If this is a pointer to a GV or an indirect ref.
+    MachineBasicBlock *MBB; // If this is a pointer to a LLVM BB
+    const char *ExtSym;     // If this is a pointer to a named symbol
+    unsigned Index;         // Constant pool / jump table index
+    unsigned GOTIndex;      // Index in the GOT of this symbol/global
+  } Target;
+
+  unsigned TargetReloType : 6; // The target relocation ID
+  AddressType AddrType    : 4; // The field of Target to use
+  bool MayNeedFarStub     : 1; // True if this relocation may require a far-stub
+  bool GOTRelative        : 1; // Should this relocation be relative to the GOT?
+  bool TargetResolve      : 1; // True if target should resolve the address
+
+public:
+ // Relocation types used in a generic implementation.  Currently, relocation
+ // entries for all things use the generic VANILLA type until they are refined
+ // into target relocation types.
+  enum RelocationType {
+    VANILLA
+  };
+  
+  /// MachineRelocation::getGV - Return a relocation entry for a GlobalValue.
+  ///
+  static MachineRelocation getGV(uintptr_t offset, unsigned RelocationType, 
+                                 GlobalValue *GV, intptr_t cst = 0,
+                                 bool MayNeedFarStub = 0,
+                                 bool GOTrelative = 0) {
+    assert((RelocationType & ~63) == 0 && "Relocation type too large!");
+    MachineRelocation Result;
+    Result.Offset = offset;
+    Result.ConstantVal = cst;
+    Result.TargetReloType = RelocationType;
+    Result.AddrType = isGV;
+    Result.MayNeedFarStub = MayNeedFarStub;
+    Result.GOTRelative = GOTrelative;
+    Result.TargetResolve = false;
+    Result.Target.GV = GV;
+    return Result;
+  }
+
+  /// MachineRelocation::getIndirectSymbol - Return a relocation entry for an
+  /// indirect symbol.
+  static MachineRelocation getIndirectSymbol(uintptr_t offset,
+                                             unsigned RelocationType, 
+                                             GlobalValue *GV, intptr_t cst = 0,
+                                             bool MayNeedFarStub = 0,
+                                             bool GOTrelative = 0) {
+    assert((RelocationType & ~63) == 0 && "Relocation type too large!");
+    MachineRelocation Result;
+    Result.Offset = offset;
+    Result.ConstantVal = cst;
+    Result.TargetReloType = RelocationType;
+    Result.AddrType = isIndirectSym;
+    Result.MayNeedFarStub = MayNeedFarStub;
+    Result.GOTRelative = GOTrelative;
+    Result.TargetResolve = false;
+    Result.Target.GV = GV;
+    return Result;
+  }
+
+  /// MachineRelocation::getBB - Return a relocation entry for a BB.
+  ///
+  static MachineRelocation getBB(uintptr_t offset,unsigned RelocationType,
+                                 MachineBasicBlock *MBB, intptr_t cst = 0) {
+    assert((RelocationType & ~63) == 0 && "Relocation type too large!");
+    MachineRelocation Result;
+    Result.Offset = offset;
+    Result.ConstantVal = cst;
+    Result.TargetReloType = RelocationType;
+    Result.AddrType = isBB;
+    Result.MayNeedFarStub = false;
+    Result.GOTRelative = false;
+    Result.TargetResolve = false;
+    Result.Target.MBB = MBB;
+    return Result;
+  }
+
+  /// MachineRelocation::getExtSym - Return a relocation entry for an external
+  /// symbol, like "free".
+  ///
+  static MachineRelocation getExtSym(uintptr_t offset, unsigned RelocationType, 
+                                     const char *ES, intptr_t cst = 0,
+                                     bool GOTrelative = 0,
+                                     bool NeedStub = true) {
+    assert((RelocationType & ~63) == 0 && "Relocation type too large!");
+    MachineRelocation Result;
+    Result.Offset = offset;
+    Result.ConstantVal = cst;
+    Result.TargetReloType = RelocationType;
+    Result.AddrType = isExtSym;
+    Result.MayNeedFarStub = NeedStub;
+    Result.GOTRelative = GOTrelative;
+    Result.TargetResolve = false;
+    Result.Target.ExtSym = ES;
+    return Result;
+  }
+
+  /// MachineRelocation::getConstPool - Return a relocation entry for a constant
+  /// pool entry.
+  ///
+  static MachineRelocation getConstPool(uintptr_t offset,unsigned RelocationType,
+                                        unsigned CPI, intptr_t cst = 0,
+                                        bool letTargetResolve = false) {
+    assert((RelocationType & ~63) == 0 && "Relocation type too large!");
+    MachineRelocation Result;
+    Result.Offset = offset;
+    Result.ConstantVal = cst;
+    Result.TargetReloType = RelocationType;
+    Result.AddrType = isConstPool;
+    Result.MayNeedFarStub = false;
+    Result.GOTRelative = false;
+    Result.TargetResolve = letTargetResolve;
+    Result.Target.Index = CPI;
+    return Result;
+  }
+
+  /// MachineRelocation::getJumpTable - Return a relocation entry for a jump
+  /// table entry.
+  ///
+  static MachineRelocation getJumpTable(uintptr_t offset,unsigned RelocationType,
+                                        unsigned JTI, intptr_t cst = 0,
+                                        bool letTargetResolve = false) {
+    assert((RelocationType & ~63) == 0 && "Relocation type too large!");
+    MachineRelocation Result;
+    Result.Offset = offset;
+    Result.ConstantVal = cst;
+    Result.TargetReloType = RelocationType;
+    Result.AddrType = isJumpTable;
+    Result.MayNeedFarStub = false;
+    Result.GOTRelative = false;
+    Result.TargetResolve = letTargetResolve;
+    Result.Target.Index = JTI;
+    return Result;
+  }
+
+  /// getMachineCodeOffset - Return the offset into the code buffer that the
+  /// relocation should be performed.
+  intptr_t getMachineCodeOffset() const {
+    return Offset;
+  }
+
+  /// getRelocationType - Return the target-specific relocation ID for this
+  /// relocation.
+  unsigned getRelocationType() const {
+    return TargetReloType;
+  }
+
+  /// getConstantVal - Get the constant value associated with this relocation.
+  /// This is often an offset from the symbol.
+  ///
+  intptr_t getConstantVal() const {
+    return ConstantVal;
+  }
+
+  /// setConstantVal - Set the constant value associated with this relocation.
+  /// This is often an offset from the symbol.
+  ///
+  void setConstantVal(intptr_t val) {
+    ConstantVal = val;
+  }
+
+  /// isGlobalValue - Return true if this relocation is a GlobalValue, as
+  /// opposed to a constant string.
+  bool isGlobalValue() const {
+    return AddrType == isGV;
+  }
+
+  /// isIndirectSymbol - Return true if this relocation is the address an
+  /// indirect symbol
+  bool isIndirectSymbol() const {
+    return AddrType == isIndirectSym;
+  }
+
+  /// isBasicBlock - Return true if this relocation is a basic block reference.
+  ///
+  bool isBasicBlock() const {
+    return AddrType == isBB;
+  }
+
+  /// isExternalSymbol - Return true if this is a constant string.
+  ///
+  bool isExternalSymbol() const {
+    return AddrType == isExtSym;
+  }
+
+  /// isConstantPoolIndex - Return true if this is a constant pool reference.
+  ///
+  bool isConstantPoolIndex() const {
+    return AddrType == isConstPool;
+  }
+
+  /// isJumpTableIndex - Return true if this is a jump table reference.
+  ///
+  bool isJumpTableIndex() const {
+    return AddrType == isJumpTable;
+  }
+
+  /// isGOTRelative - Return true the target wants the index into the GOT of
+  /// the symbol rather than the address of the symbol.
+  bool isGOTRelative() const {
+    return GOTRelative;
+  }
+
+  /// mayNeedFarStub - This function returns true if the JIT for this target may
+  /// need either a stub function or an indirect global-variable load to handle
+  /// the relocated GlobalValue reference.  For example, the x86-64 call
+  /// instruction can only call functions within +/-2GB of the call site.
+  /// Anything farther away needs a longer mov+call sequence, which can't just
+  /// be written on top of the existing call.
+  bool mayNeedFarStub() const {
+    return MayNeedFarStub;
+  }
+
+  /// letTargetResolve - Return true if the target JITInfo is usually
+  /// responsible for resolving the address of this relocation.
+  bool letTargetResolve() const {
+    return TargetResolve;
+  }
+
+  /// getGlobalValue - If this is a global value reference, return the
+  /// referenced global.
+  GlobalValue *getGlobalValue() const {
+    assert((isGlobalValue() || isIndirectSymbol()) &&
+           "This is not a global value reference!");
+    return Target.GV;
+  }
+
+  MachineBasicBlock *getBasicBlock() const {
+    assert(isBasicBlock() && "This is not a basic block reference!");
+    return Target.MBB;
+  }
+
+  /// getString - If this is a string value, return the string reference.
+  ///
+  const char *getExternalSymbol() const {
+    assert(isExternalSymbol() && "This is not an external symbol reference!");
+    return Target.ExtSym;
+  }
+
+  /// getConstantPoolIndex - If this is a const pool reference, return
+  /// the index into the constant pool.
+  unsigned getConstantPoolIndex() const {
+    assert(isConstantPoolIndex() && "This is not a constant pool reference!");
+    return Target.Index;
+  }
+
+  /// getJumpTableIndex - If this is a jump table reference, return
+  /// the index into the jump table.
+  unsigned getJumpTableIndex() const {
+    assert(isJumpTableIndex() && "This is not a jump table reference!");
+    return Target.Index;
+  }
+
+  /// getResultPointer - Once this has been resolved to point to an actual
+  /// address, this returns the pointer.
+  void *getResultPointer() const {
+    assert(AddrType == isResult && "Result pointer isn't set yet!");
+    return Target.Result;
+  }
+
+  /// setResultPointer - Set the result to the specified pointer value.
+  ///
+  void setResultPointer(void *Ptr) {
+    Target.Result = Ptr;
+    AddrType = isResult;
+  }
+
+  /// setGOTIndex - Set the GOT index to a specific value.
+  void setGOTIndex(unsigned idx) {
+    AddrType = isGOTIndex;
+    Target.GOTIndex = idx;
+  }
+
+  /// getGOTIndex - Once this has been resolved to an entry in the GOT,
+  /// this returns that index.  The index is from the lowest address entry
+  /// in the GOT.
+  unsigned getGOTIndex() const {
+    assert(AddrType == isGOTIndex);
+    return Target.GOTIndex;
+  }
+};
+}
+
+#endif
diff --git a/final/include/llvm/CodeGen/MachineSSAUpdater.h b/final/include/llvm/CodeGen/MachineSSAUpdater.h
new file mode 100644
index 00000000000..cbb45a71275
--- /dev/null
+++ b/final/include/llvm/CodeGen/MachineSSAUpdater.h
@@ -0,0 +1,115 @@
+//===-- MachineSSAUpdater.h - Unstructured SSA Update Tool ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MachineSSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINESSAUPDATER_H
+#define LLVM_CODEGEN_MACHINESSAUPDATER_H
+
+namespace llvm {
+  class MachineBasicBlock;
+  class MachineFunction;
+  class MachineInstr;
+  class MachineOperand;
+  class MachineRegisterInfo;
+  class TargetInstrInfo;
+  class TargetRegisterClass;
+  template<typename T> class SmallVectorImpl;
+  template<typename T> class SSAUpdaterTraits;
+  class BumpPtrAllocator;
+
+/// MachineSSAUpdater - This class updates SSA form for a set of virtual
+/// registers defined in multiple blocks.  This is used when code duplication
+/// or another unstructured transformation wants to rewrite a set of uses of one
+/// vreg with uses of a set of vregs.
+class MachineSSAUpdater {
+  friend class SSAUpdaterTraits<MachineSSAUpdater>;
+
+private:
+  /// AvailableVals - This keeps track of which value to use on a per-block
+  /// basis.  When we insert PHI nodes, we keep track of them here.
+  //typedef DenseMap<MachineBasicBlock*, unsigned > AvailableValsTy;
+  void *AV;
+
+  /// VR - Current virtual register whose uses are being updated.
+  unsigned VR;
+
+  /// VRC - Register class of the current virtual register.
+  const TargetRegisterClass *VRC;
+
+  /// InsertedPHIs - If this is non-null, the MachineSSAUpdater adds all PHI
+  /// nodes that it creates to the vector.
+  SmallVectorImpl<MachineInstr*> *InsertedPHIs;
+
+  const TargetInstrInfo *TII;
+  MachineRegisterInfo *MRI;
+public:
+  /// MachineSSAUpdater constructor.  If InsertedPHIs is specified, it will be
+  /// filled in with all PHI Nodes created by rewriting.
+  explicit MachineSSAUpdater(MachineFunction &MF,
+                             SmallVectorImpl<MachineInstr*> *InsertedPHIs = 0);
+  ~MachineSSAUpdater();
+
+  /// Initialize - Reset this object to get ready for a new set of SSA
+  /// updates.
+  void Initialize(unsigned V);
+
+  /// AddAvailableValue - Indicate that a rewritten value is available at the
+  /// end of the specified block with the specified value.
+  void AddAvailableValue(MachineBasicBlock *BB, unsigned V);
+
+  /// HasValueForBlock - Return true if the MachineSSAUpdater already has a
+  /// value for the specified block.
+  bool HasValueForBlock(MachineBasicBlock *BB) const;
+
+  /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+  /// live at the end of the specified block.
+  unsigned GetValueAtEndOfBlock(MachineBasicBlock *BB);
+
+  /// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+  /// is live in the middle of the specified block.
+  ///
+  /// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+  /// important case: if there is a definition of the rewritten value after the
+  /// 'use' in BB.  Consider code like this:
+  ///
+  ///      X1 = ...
+  ///   SomeBB:
+  ///      use(X)
+  ///      X2 = ...
+  ///      br Cond, SomeBB, OutBB
+  ///
+  /// In this case, there are two values (X1 and X2) added to the AvailableVals
+  /// set by the client of the rewriter, and those values are both live out of
+  /// their respective blocks.  However, the use of X happens in the *middle* of
+  /// a block.  Because of this, we need to insert a new PHI node in SomeBB to
+  /// merge the appropriate values, and this value isn't live out of the block.
+  ///
+  unsigned GetValueInMiddleOfBlock(MachineBasicBlock *BB);
+
+  /// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
+  /// which use their value in the corresponding predecessor.  Note that this
+  /// will not work if the use is supposed to be rewritten to a value defined in
+  /// the same block as the use, but above it.  Any 'AddAvailableValue's added
+  /// for the use's block will be considered to be below it.
+  void RewriteUse(MachineOperand &U);
+
+private:
+  void ReplaceRegWith(unsigned OldReg, unsigned NewReg);
+  unsigned GetValueAtEndOfBlockInternal(MachineBasicBlock *BB);
+
+  void operator=(const MachineSSAUpdater&); // DO NOT IMPLEMENT
+  MachineSSAUpdater(const MachineSSAUpdater&);     // DO NOT IMPLEMENT
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/ObjectCodeEmitter.h b/final/include/llvm/CodeGen/ObjectCodeEmitter.h
new file mode 100644
index 00000000000..d46628caae7
--- /dev/null
+++ b/final/include/llvm/CodeGen/ObjectCodeEmitter.h
@@ -0,0 +1,171 @@
+//===-- llvm/CodeGen/ObjectCodeEmitter.h - Object Code Emitter -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Generalized Object Code Emitter, works with ObjectModule and BinaryObject.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_OBJECTCODEEMITTER_H
+#define LLVM_CODEGEN_OBJECTCODEEMITTER_H
+
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+
+namespace llvm {
+
+class BinaryObject;
+class MachineBasicBlock;
+class MachineCodeEmitter;
+class MachineFunction;
+class MachineConstantPool;
+class MachineJumpTableInfo;
+class MachineModuleInfo;
+
+class ObjectCodeEmitter : public MachineCodeEmitter {
+protected:
+
+  /// Binary Object (Section or Segment) we are emitting to.
+  BinaryObject *BO;
+
+  /// MBBLocations - This vector is a mapping from MBB ID's to their address.
+  /// It is filled in by the StartMachineBasicBlock callback and queried by
+  /// the getMachineBasicBlockAddress callback.
+  std::vector<uintptr_t> MBBLocations;
+
+  /// LabelLocations - This vector is a mapping from Label ID's to their 
+  /// address.
+  std::vector<uintptr_t> LabelLocations;
+
+  /// CPLocations - This is a map of constant pool indices to offsets from the
+  /// start of the section for that constant pool index.
+  std::vector<uintptr_t> CPLocations;
+
+  /// CPSections - This is a map of constant pool indices to the Section
+  /// containing the constant pool entry for that index.
+  std::vector<uintptr_t> CPSections;
+
+  /// JTLocations - This is a map of jump table indices to offsets from the
+  /// start of the section for that jump table index.
+  std::vector<uintptr_t> JTLocations;
+
+public:
+  ObjectCodeEmitter();
+  ObjectCodeEmitter(BinaryObject *bo);
+  virtual ~ObjectCodeEmitter();
+
+  /// setBinaryObject - set the BinaryObject we are writting to
+  void setBinaryObject(BinaryObject *bo);
+
+  /// emitByte - This callback is invoked when a byte needs to be 
+  /// written to the data stream, without buffer overflow testing.
+  void emitByte(uint8_t B);
+
+  /// emitWordLE - This callback is invoked when a 32-bit word needs to be
+  /// written to the data stream in little-endian format.
+  void emitWordLE(uint32_t W);
+
+  /// emitWordBE - This callback is invoked when a 32-bit word needs to be
+  /// written to the data stream in big-endian format.
+  void emitWordBE(uint32_t W);
+
+  /// emitDWordLE - This callback is invoked when a 64-bit word needs to be
+  /// written to the data stream in little-endian format.
+  void emitDWordLE(uint64_t W);
+
+  /// emitDWordBE - This callback is invoked when a 64-bit word needs to be
+  /// written to the data stream in big-endian format.
+  void emitDWordBE(uint64_t W);
+
+  /// emitAlignment - Move the CurBufferPtr pointer up to the specified
+  /// alignment (saturated to BufferEnd of course).
+  void emitAlignment(unsigned Alignment = 0, uint8_t fill = 0);
+
+  /// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
+  /// written to the data stream.
+  void emitULEB128Bytes(uint64_t Value);
+
+  /// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
+  /// written to the data stream.
+  void emitSLEB128Bytes(uint64_t Value);
+
+  /// emitString - This callback is invoked when a String needs to be
+  /// written to the data stream.
+  void emitString(const std::string &String);
+
+  /// getCurrentPCValue - This returns the address that the next emitted byte
+  /// will be output to.
+  uintptr_t getCurrentPCValue() const;
+
+  /// getCurrentPCOffset - Return the offset from the start of the emitted
+  /// buffer that we are currently writing to.
+  uintptr_t getCurrentPCOffset() const;
+
+  /// addRelocation - Whenever a relocatable address is needed, it should be
+  /// noted with this interface.
+  void addRelocation(const MachineRelocation& relocation);
+
+  /// earlyResolveAddresses - True if the code emitter can use symbol addresses 
+  /// during code emission time. The JIT is capable of doing this because it
+  /// creates jump tables or constant pools in memory on the fly while the
+  /// object code emitters rely on a linker to have real addresses and should
+  /// use relocations instead.
+  bool earlyResolveAddresses() const { return false; }
+
+  /// startFunction - This callback is invoked when the specified function is
+  /// about to be code generated.  This initializes the BufferBegin/End/Ptr
+  /// fields.
+  virtual void startFunction(MachineFunction &F) = 0;
+
+  /// finishFunction - This callback is invoked when the specified function has
+  /// finished code generation.  If a buffer overflow has occurred, this method
+  /// returns true (the callee is required to try again), otherwise it returns
+  /// false.
+  virtual bool finishFunction(MachineFunction &F) = 0;
+
+  /// StartMachineBasicBlock - This should be called by the target when a new
+  /// basic block is about to be emitted.  This way the MCE knows where the
+  /// start of the block is, and can implement getMachineBasicBlockAddress.
+  virtual void StartMachineBasicBlock(MachineBasicBlock *MBB);
+
+  /// getMachineBasicBlockAddress - Return the address of the specified
+  /// MachineBasicBlock, only usable after the label for the MBB has been
+  /// emitted.
+  virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const;
+
+  /// emitJumpTables - Emit all the jump tables for a given jump table info
+  /// record to the appropriate section.
+  virtual void emitJumpTables(MachineJumpTableInfo *MJTI) = 0;
+
+  /// getJumpTableEntryAddress - Return the address of the jump table with index
+  /// 'Index' in the function that last called initJumpTableInfo.
+  virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const;
+
+  /// emitConstantPool - For each constant pool entry, figure out which section
+  /// the constant should live in, allocate space for it, and emit it to the 
+  /// Section data buffer.
+  virtual void emitConstantPool(MachineConstantPool *MCP) = 0;
+
+  /// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
+  /// the constant pool that was last emitted with the emitConstantPool method.
+  virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const;
+
+  /// getConstantPoolEntrySection - Return the section of the 'Index' entry in
+  /// the constant pool that was last emitted with the emitConstantPool method.
+  virtual uintptr_t getConstantPoolEntrySection(unsigned Index) const;
+
+  /// Specifies the MachineModuleInfo object. This is used for exception handling
+  /// purposes.
+  virtual void setModuleInfo(MachineModuleInfo* Info) = 0;
+  // to be implemented or depreciated with MachineModuleInfo
+
+}; // end class ObjectCodeEmitter
+
+} // end namespace llvm
+
+#endif
+
diff --git a/final/include/llvm/CodeGen/PBQP/Graph.h b/final/include/llvm/CodeGen/PBQP/Graph.h
new file mode 100644
index 00000000000..b2224cb051d
--- /dev/null
+++ b/final/include/llvm/CodeGen/PBQP/Graph.h
@@ -0,0 +1,425 @@
+//===-------------------- Graph.h - PBQP Graph ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PBQP Graph class.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_CODEGEN_PBQP_GRAPH_H
+#define LLVM_CODEGEN_PBQP_GRAPH_H
+
+#include "Math.h"
+
+#include <list>
+#include <vector>
+#include <map>
+
+namespace PBQP {
+
+  /// PBQP Graph class.
+  /// Instances of this class describe PBQP problems.
+  class Graph {
+  private:
+
+    // ----- TYPEDEFS -----
+    class NodeEntry;
+    class EdgeEntry;
+
+    typedef std::list<NodeEntry> NodeList;
+    typedef std::list<EdgeEntry> EdgeList;
+
+  public:
+
+    typedef NodeList::iterator NodeItr;
+    typedef NodeList::const_iterator ConstNodeItr;
+
+    typedef EdgeList::iterator EdgeItr;
+    typedef EdgeList::const_iterator ConstEdgeItr;
+
+  private:
+
+    typedef std::list<EdgeItr> AdjEdgeList;
+  
+  public:
+
+    typedef AdjEdgeList::iterator AdjEdgeItr;
+
+  private:
+
+    class NodeEntry {
+    private:
+      Vector costs;      
+      AdjEdgeList adjEdges;
+      unsigned degree;
+      void *data;
+    public:
+      NodeEntry(const Vector &costs) : costs(costs), degree(0) {}
+      Vector& getCosts() { return costs; }
+      const Vector& getCosts() const { return costs; }
+      unsigned getDegree() const { return degree; }
+      AdjEdgeItr edgesBegin() { return adjEdges.begin(); }
+      AdjEdgeItr edgesEnd() { return adjEdges.end(); }
+      AdjEdgeItr addEdge(EdgeItr e) {
+        ++degree;
+        return adjEdges.insert(adjEdges.end(), e);
+      }
+      void removeEdge(AdjEdgeItr ae) {
+        --degree;
+        adjEdges.erase(ae);
+      }
+      void setData(void *data) { this->data = data; }
+      void* getData() { return data; }
+    };
+
+    class EdgeEntry {
+    private:
+      NodeItr node1, node2;
+      Matrix costs;
+      AdjEdgeItr node1AEItr, node2AEItr;
+      void *data;
+    public:
+      EdgeEntry(NodeItr node1, NodeItr node2, const Matrix &costs)
+        : node1(node1), node2(node2), costs(costs) {}
+      NodeItr getNode1() const { return node1; }
+      NodeItr getNode2() const { return node2; }
+      Matrix& getCosts() { return costs; }
+      const Matrix& getCosts() const { return costs; }
+      void setNode1AEItr(AdjEdgeItr ae) { node1AEItr = ae; }
+      AdjEdgeItr getNode1AEItr() { return node1AEItr; }
+      void setNode2AEItr(AdjEdgeItr ae) { node2AEItr = ae; }
+      AdjEdgeItr getNode2AEItr() { return node2AEItr; }
+      void setData(void *data) { this->data = data; }
+      void *getData() { return data; }
+    };
+
+    // ----- MEMBERS -----
+
+    NodeList nodes;
+    unsigned numNodes;
+
+    EdgeList edges;
+    unsigned numEdges;
+
+    // ----- INTERNAL METHODS -----
+
+    NodeEntry& getNode(NodeItr nItr) { return *nItr; }
+    const NodeEntry& getNode(ConstNodeItr nItr) const { return *nItr; }
+
+    EdgeEntry& getEdge(EdgeItr eItr) { return *eItr; }
+    const EdgeEntry& getEdge(ConstEdgeItr eItr) const { return *eItr; }
+
+    NodeItr addConstructedNode(const NodeEntry &n) {
+      ++numNodes;
+      return nodes.insert(nodes.end(), n);
+    }
+
+    EdgeItr addConstructedEdge(const EdgeEntry &e) {
+      assert(findEdge(e.getNode1(), e.getNode2()) == edges.end() &&
+             "Attempt to add duplicate edge.");
+      ++numEdges;
+      EdgeItr edgeItr = edges.insert(edges.end(), e);
+      EdgeEntry &ne = getEdge(edgeItr);
+      NodeEntry &n1 = getNode(ne.getNode1());
+      NodeEntry &n2 = getNode(ne.getNode2());
+      // Sanity check on matrix dimensions:
+      assert((n1.getCosts().getLength() == ne.getCosts().getRows()) &&
+             (n2.getCosts().getLength() == ne.getCosts().getCols()) &&
+             "Edge cost dimensions do not match node costs dimensions.");
+      ne.setNode1AEItr(n1.addEdge(edgeItr));
+      ne.setNode2AEItr(n2.addEdge(edgeItr));
+      return edgeItr;
+    }
+
+    inline void copyFrom(const Graph &other);
+  public:
+
+    /// \brief Construct an empty PBQP graph.
+    Graph() : numNodes(0), numEdges(0) {}
+
+    /// \brief Copy construct this graph from "other". Note: Does not copy node
+    ///        and edge data, only graph structure and costs.
+    /// @param other Source graph to copy from.
+    Graph(const Graph &other) : numNodes(0), numEdges(0) {
+      copyFrom(other);
+    }
+
+    /// \brief Make this graph a copy of "other". Note: Does not copy node and
+    ///        edge data, only graph structure and costs.
+    /// @param other The graph to copy from.
+    /// @return A reference to this graph.
+    ///
+    /// This will clear the current graph, erasing any nodes and edges added,
+    /// before copying from other.
+    Graph& operator=(const Graph &other) {
+      clear();      
+      copyFrom(other);
+      return *this;
+    }
+
+    /// \brief Add a node with the given costs.
+    /// @param costs Cost vector for the new node.
+    /// @return Node iterator for the added node.
+    NodeItr addNode(const Vector &costs) {
+      return addConstructedNode(NodeEntry(costs));
+    }
+
+    /// \brief Add an edge between the given nodes with the given costs.
+    /// @param n1Itr First node.
+    /// @param n2Itr Second node.
+    /// @return Edge iterator for the added edge.
+    EdgeItr addEdge(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr,
+                    const Matrix &costs) {
+      assert(getNodeCosts(n1Itr).getLength() == costs.getRows() &&
+             getNodeCosts(n2Itr).getLength() == costs.getCols() &&
+             "Matrix dimensions mismatch.");
+      return addConstructedEdge(EdgeEntry(n1Itr, n2Itr, costs)); 
+    }
+
+    /// \brief Get the number of nodes in the graph.
+    /// @return Number of nodes in the graph.
+    unsigned getNumNodes() const { return numNodes; }
+
+    /// \brief Get the number of edges in the graph.
+    /// @return Number of edges in the graph.
+    unsigned getNumEdges() const { return numEdges; }
+
+    /// \brief Get a node's cost vector.
+    /// @param nItr Node iterator.
+    /// @return Node cost vector.
+    Vector& getNodeCosts(NodeItr nItr) { return getNode(nItr).getCosts(); }
+
+    /// \brief Get a node's cost vector (const version).
+    /// @param nItr Node iterator.
+    /// @return Node cost vector.
+    const Vector& getNodeCosts(ConstNodeItr nItr) const {
+      return getNode(nItr).getCosts();
+    }
+
+    /// \brief Set a node's data pointer.
+    /// @param nItr Node iterator.
+    /// @param data Pointer to node data.
+    ///
+    /// Typically used by a PBQP solver to attach data to aid in solution.
+    void setNodeData(NodeItr nItr, void *data) { getNode(nItr).setData(data); }
+
+    /// \brief Get the node's data pointer.
+    /// @param nItr Node iterator.
+    /// @return Pointer to node data.
+    void* getNodeData(NodeItr nItr) { return getNode(nItr).getData(); }
+    
+    /// \brief Get an edge's cost matrix.
+    /// @param eItr Edge iterator.
+    /// @return Edge cost matrix.
+    Matrix& getEdgeCosts(EdgeItr eItr) { return getEdge(eItr).getCosts(); }
+
+    /// \brief Get an edge's cost matrix (const version).
+    /// @param eItr Edge iterator.
+    /// @return Edge cost matrix.
+    const Matrix& getEdgeCosts(ConstEdgeItr eItr) const {
+      return getEdge(eItr).getCosts();
+    }
+
+    /// \brief Set an edge's data pointer.
+    /// @param eItr Edge iterator.
+    /// @param data Pointer to edge data.
+    ///
+    /// Typically used by a PBQP solver to attach data to aid in solution.
+    void setEdgeData(EdgeItr eItr, void *data) { getEdge(eItr).setData(data); }
+
+    /// \brief Get an edge's data pointer.
+    /// @param eItr Edge iterator.
+    /// @return Pointer to edge data. 
+    void* getEdgeData(EdgeItr eItr) { return getEdge(eItr).getData(); }
+
+    /// \brief Get a node's degree.
+    /// @param nItr Node iterator.
+    /// @return The degree of the node.
+    unsigned getNodeDegree(NodeItr nItr) const {
+      return getNode(nItr).getDegree();
+    }
+
+    /// \brief Begin iterator for node set.
+    NodeItr nodesBegin() { return nodes.begin(); }
+
+    /// \brief Begin const iterator for node set.
+    ConstNodeItr nodesBegin() const { return nodes.begin(); }
+
+    /// \brief End iterator for node set.
+    NodeItr nodesEnd() { return nodes.end(); }
+
+    /// \brief End const iterator for node set.
+    ConstNodeItr nodesEnd() const { return nodes.end(); }
+
+    /// \brief Begin iterator for edge set.
+    EdgeItr edgesBegin() { return edges.begin(); }
+
+    /// \brief End iterator for edge set.
+    EdgeItr edgesEnd() { return edges.end(); }
+
+    /// \brief Get begin iterator for adjacent edge set.
+    /// @param nItr Node iterator.
+    /// @return Begin iterator for the set of edges connected to the given node.
+    AdjEdgeItr adjEdgesBegin(NodeItr nItr) {
+      return getNode(nItr).edgesBegin();
+    }
+
+    /// \brief Get end iterator for adjacent edge set.
+    /// @param nItr Node iterator.
+    /// @return End iterator for the set of edges connected to the given node.
+    AdjEdgeItr adjEdgesEnd(NodeItr nItr) {
+      return getNode(nItr).edgesEnd();
+    }
+
+    /// \brief Get the first node connected to this edge.
+    /// @param eItr Edge iterator.
+    /// @return The first node connected to the given edge. 
+    NodeItr getEdgeNode1(EdgeItr eItr) {
+      return getEdge(eItr).getNode1();
+    }
+
+    /// \brief Get the second node connected to this edge.
+    /// @param eItr Edge iterator.
+    /// @return The second node connected to the given edge. 
+    NodeItr getEdgeNode2(EdgeItr eItr) {
+      return getEdge(eItr).getNode2();
+    } 
+
+    /// \brief Get the "other" node connected to this edge.
+    /// @param eItr Edge iterator.
+    /// @param nItr Node iterator for the "given" node.
+    /// @return The iterator for the "other" node connected to this edge. 
+    NodeItr getEdgeOtherNode(EdgeItr eItr, NodeItr nItr) {
+      EdgeEntry &e = getEdge(eItr);
+      if (e.getNode1() == nItr) {
+        return e.getNode2();
+      } // else
+      return e.getNode1();
+    }
+
+    /// \brief Get the edge connecting two nodes.
+    /// @param n1Itr First node iterator.
+    /// @param n2Itr Second node iterator.
+    /// @return An iterator for edge (n1Itr, n2Itr) if such an edge exists,
+    ///         otherwise returns edgesEnd(). 
+    EdgeItr findEdge(NodeItr n1Itr, NodeItr n2Itr) {
+      for (AdjEdgeItr aeItr = adjEdgesBegin(n1Itr), aeEnd = adjEdgesEnd(n1Itr);
+         aeItr != aeEnd; ++aeItr) {
+        if ((getEdgeNode1(*aeItr) == n2Itr) ||
+            (getEdgeNode2(*aeItr) == n2Itr)) {
+          return *aeItr;
+        }
+      }
+      return edges.end();
+    }
+
+    /// \brief Remove a node from the graph.
+    /// @param nItr Node iterator.
+    void removeNode(NodeItr nItr) {
+      NodeEntry &n = getNode(nItr);
+      for (AdjEdgeItr itr = n.edgesBegin(), end = n.edgesEnd(); itr != end;) {
+        EdgeItr eItr = *itr;
+        ++itr;
+        removeEdge(eItr); 
+      }
+      nodes.erase(nItr);
+      --numNodes;
+    }
+
+    /// \brief Remove an edge from the graph.
+    /// @param eItr Edge iterator.
+    void removeEdge(EdgeItr eItr) {
+      EdgeEntry &e = getEdge(eItr);
+      NodeEntry &n1 = getNode(e.getNode1());
+      NodeEntry &n2 = getNode(e.getNode2());
+      n1.removeEdge(e.getNode1AEItr());
+      n2.removeEdge(e.getNode2AEItr());
+      edges.erase(eItr);
+      --numEdges;
+    }
+
+    /// \brief Remove all nodes and edges from the graph.
+    void clear() {
+      nodes.clear();
+      edges.clear();
+      numNodes = numEdges = 0;
+    }
+
+    /// \brief Print a representation of this graph in DOT format.
+    /// @param os Output stream to print on.
+    template <typename OStream>
+    void printDot(OStream &os) {
+    
+      os << "graph {\n";
+
+      for (NodeItr nodeItr = nodesBegin(), nodeEnd = nodesEnd();
+           nodeItr != nodeEnd; ++nodeItr) {
+
+        os << "  node" << nodeItr << " [ label=\""
+           << nodeItr << ": " << getNodeCosts(nodeItr) << "\" ]\n";
+      }
+
+      os << "  edge [ len=" << getNumNodes() << " ]\n";
+
+      for (EdgeItr edgeItr = edgesBegin(), edgeEnd = edgesEnd();
+           edgeItr != edgeEnd; ++edgeItr) {
+
+        os << "  node" << getEdgeNode1(edgeItr)
+           << " -- node" << getEdgeNode2(edgeItr)
+           << " [ label=\"";
+
+        const Matrix &edgeCosts = getEdgeCosts(edgeItr);
+
+        for (unsigned i = 0; i < edgeCosts.getRows(); ++i) {
+          os << edgeCosts.getRowAsVector(i) << "\\n";
+        }
+        os << "\" ]\n";
+      }
+      os << "}\n";
+    }
+
+  };
+
+  class NodeItrComparator {
+  public:
+    bool operator()(Graph::NodeItr n1, Graph::NodeItr n2) const {
+      return &*n1 < &*n2;
+    }
+
+    bool operator()(Graph::ConstNodeItr n1, Graph::ConstNodeItr n2) const {
+      return &*n1 < &*n2;
+    }
+  };
+
+  class EdgeItrCompartor {
+  public:
+    bool operator()(Graph::EdgeItr e1, Graph::EdgeItr e2) const {
+      return &*e1 < &*e2;
+    }
+
+    bool operator()(Graph::ConstEdgeItr e1, Graph::ConstEdgeItr e2) const {
+      return &*e1 < &*e2;
+    }
+  };
+
+  void Graph::copyFrom(const Graph &other) {
+    std::map<Graph::ConstNodeItr, Graph::NodeItr,
+             NodeItrComparator> nodeMap;
+
+     for (Graph::ConstNodeItr nItr = other.nodesBegin(),
+                             nEnd = other.nodesEnd();
+         nItr != nEnd; ++nItr) {
+      nodeMap[nItr] = addNode(other.getNodeCosts(nItr));
+    }
+      
+  }
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_GRAPH_HPP
diff --git a/final/include/llvm/CodeGen/PBQP/HeuristicBase.h b/final/include/llvm/CodeGen/PBQP/HeuristicBase.h
new file mode 100644
index 00000000000..791c227f0d0
--- /dev/null
+++ b/final/include/llvm/CodeGen/PBQP/HeuristicBase.h
@@ -0,0 +1,246 @@
+//===-- HeuristcBase.h --- Heuristic base class for PBQP --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_HEURISTICBASE_H
+#define LLVM_CODEGEN_PBQP_HEURISTICBASE_H
+
+#include "HeuristicSolver.h"
+
+namespace PBQP {
+
+  /// \brief Abstract base class for heuristic implementations.
+  ///
+  /// This class provides a handy base for heuristic implementations with common
+  /// solver behaviour implemented for a number of methods.
+  ///
+  /// To implement your own heuristic using this class as a base you'll have to
+  /// implement, as a minimum, the following methods:
+  /// <ul>
+  ///   <li> void addToHeuristicList(Graph::NodeItr) : Add a node to the
+  ///        heuristic reduction list.
+  ///   <li> void heuristicReduce() : Perform a single heuristic reduction.
+  ///   <li> void preUpdateEdgeCosts(Graph::EdgeItr) : Handle the (imminent)
+  ///        change to the cost matrix on the given edge (by R2).
+  ///   <li> void postUpdateEdgeCostts(Graph::EdgeItr) : Handle the new 
+  ///        costs on the given edge.
+  ///   <li> void handleAddEdge(Graph::EdgeItr) : Handle the addition of a new
+  ///        edge into the PBQP graph (by R2).
+  ///   <li> void handleRemoveEdge(Graph::EdgeItr, Graph::NodeItr) : Handle the
+  ///        disconnection of the given edge from the given node.
+  ///   <li> A constructor for your derived class : to pass back a reference to
+  ///        the solver which is using this heuristic.
+  /// </ul>
+  ///
+  /// These methods are implemented in this class for documentation purposes,
+  /// but will assert if called.
+  /// 
+  /// Note that this class uses the curiously recursive template idiom to
+  /// forward calls to the derived class. These methods need not be made
+  /// virtual, and indeed probably shouldn't for performance reasons.
+  ///
+  /// You'll also need to provide NodeData and EdgeData structs in your class.
+  /// These can be used to attach data relevant to your heuristic to each
+  /// node/edge in the PBQP graph.
+
+  template <typename HImpl>
+  class HeuristicBase {
+  private:
+
+    typedef std::list<Graph::NodeItr> OptimalList;
+
+    HeuristicSolverImpl<HImpl> &s;
+    Graph &g;
+    OptimalList optimalList;
+
+    // Return a reference to the derived heuristic.
+    HImpl& impl() { return static_cast<HImpl&>(*this); }
+
+    // Add the given node to the optimal reductions list. Keep an iterator to
+    // its location for fast removal. 
+    void addToOptimalReductionList(Graph::NodeItr nItr) {
+      optimalList.insert(optimalList.end(), nItr);
+    }
+
+  public:
+
+    /// \brief Construct an instance with a reference to the given solver.
+    /// @param solver The solver which is using this heuristic instance.
+    HeuristicBase(HeuristicSolverImpl<HImpl> &solver)
+      : s(solver), g(s.getGraph()) { }
+
+    /// \brief Get the solver which is using this heuristic instance.
+    /// @return The solver which is using this heuristic instance.
+    ///
+    /// You can use this method to get access to the solver in your derived
+    /// heuristic implementation.
+    HeuristicSolverImpl<HImpl>& getSolver() { return s; }
+
+    /// \brief Get the graph representing the problem to be solved.
+    /// @return The graph representing the problem to be solved.
+    Graph& getGraph() { return g; }
+
+    /// \brief Tell the solver to simplify the graph before the reduction phase.
+    /// @return Whether or not the solver should run a simplification phase
+    ///         prior to the main setup and reduction.
+    ///
+    /// HeuristicBase returns true from this method as it's a sensible default,
+    /// however you can over-ride it in your derived class if you want different
+    /// behaviour.
+    bool solverRunSimplify() const { return true; }
+
+    /// \brief Decide whether a node should be optimally or heuristically 
+    ///        reduced.
+    /// @return Whether or not the given node should be listed for optimal
+    ///         reduction (via R0, R1 or R2).
+    ///
+    /// HeuristicBase returns true for any node with degree less than 3. This is
+    /// sane and sensible for many situations, but not all. You can over-ride
+    /// this method in your derived class if you want a different selection
+    /// criteria. Note however that your criteria for selecting optimal nodes
+    /// should be <i>at least</i> as strong as this. I.e. Nodes of degree 3 or
+    /// higher should not be selected under any circumstances.
+    bool shouldOptimallyReduce(Graph::NodeItr nItr) {
+      if (g.getNodeDegree(nItr) < 3)
+        return true;
+      // else
+      return false;
+    }
+
+    /// \brief Add the given node to the list of nodes to be optimally reduced.
+    /// @return nItr Node iterator to be added.
+    ///
+    /// You probably don't want to over-ride this, except perhaps to record
+    /// statistics before calling this implementation. HeuristicBase relies on
+    /// its behaviour.
+    void addToOptimalReduceList(Graph::NodeItr nItr) {
+      optimalList.push_back(nItr);
+    }
+
+    /// \brief Initialise the heuristic.
+    ///
+    /// HeuristicBase iterates over all nodes in the problem and adds them to
+    /// the appropriate list using addToOptimalReduceList or
+    /// addToHeuristicReduceList based on the result of shouldOptimallyReduce.
+    ///
+    /// This behaviour should be fine for most situations.
+    void setup() {
+      for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
+           nItr != nEnd; ++nItr) {
+        if (impl().shouldOptimallyReduce(nItr)) {
+          addToOptimalReduceList(nItr);
+        } else {
+          impl().addToHeuristicReduceList(nItr);
+        }
+      }
+    }
+
+    /// \brief Optimally reduce one of the nodes in the optimal reduce list.
+    /// @return True if a reduction takes place, false if the optimal reduce
+    ///         list is empty.
+    ///
+    /// Selects a node from the optimal reduce list and removes it, applying
+    /// R0, R1 or R2 as appropriate based on the selected node's degree.
+    bool optimalReduce() {
+      if (optimalList.empty())
+        return false;
+
+      Graph::NodeItr nItr = optimalList.front();
+      optimalList.pop_front();
+
+      switch (s.getSolverDegree(nItr)) {
+        case 0: s.applyR0(nItr); break;
+        case 1: s.applyR1(nItr); break;
+        case 2: s.applyR2(nItr); break;
+        default: assert(false &&
+                        "Optimal reductions of degree > 2 nodes is invalid.");
+      }
+
+      return true;
+    }
+
+    /// \brief Perform the PBQP reduction process.
+    ///
+    /// Reduces the problem to the empty graph by repeated application of the
+    /// reduction rules R0, R1, R2 and RN.
+    /// R0, R1 or R2 are always applied if possible before RN is used.
+    void reduce() {
+      bool finished = false;
+
+      while (!finished) {
+        if (!optimalReduce()) {
+          if (impl().heuristicReduce()) {
+            getSolver().recordRN();
+          } else {
+            finished = true;
+          }
+        }
+      }
+    }
+
+    /// \brief Add a node to the heuristic reduce list.
+    /// @param nItr Node iterator to add to the heuristic reduce list.
+    void addToHeuristicList(Graph::NodeItr nItr) {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Heuristically reduce one of the nodes in the heuristic
+    ///        reduce list.
+    /// @return True if a reduction takes place, false if the heuristic reduce
+    ///         list is empty.
+    void heuristicReduce() {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Prepare a change in the costs on the given edge.
+    /// @param eItr Edge iterator.    
+    void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Handle the change in the costs on the given edge.
+    /// @param eItr Edge iterator.
+    void postUpdateEdgeCostts(Graph::EdgeItr eItr) {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Handle the addition of a new edge into the PBQP graph.
+    /// @param eItr Edge iterator for the added edge.
+    void handleAddEdge(Graph::EdgeItr eItr) {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Handle disconnection of an edge from a node.
+    /// @param eItr Edge iterator for edge being disconnected.
+    /// @param nItr Node iterator for the node being disconnected from.
+    ///
+    /// Edges are frequently removed due to the removal of a node. This
+    /// method allows for the effect to be computed only for the remaining
+    /// node in the graph.
+    void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
+      assert(false && "Must be implemented in derived class.");
+    }
+
+    /// \brief Clean up any structures used by HeuristicBase.
+    ///
+    /// At present this just performs a sanity check: that the optimal reduce
+    /// list is empty now that reduction has completed.
+    ///
+    /// If your derived class has more complex structures which need tearing
+    /// down you should over-ride this method but include a call back to this
+    /// implementation.
+    void cleanup() {
+      assert(optimalList.empty() && "Nodes left over in optimal reduce list?");
+    }
+
+  };
+
+}
+
+
+#endif // LLVM_CODEGEN_PBQP_HEURISTICBASE_H
diff --git a/final/include/llvm/CodeGen/PBQP/HeuristicSolver.h b/final/include/llvm/CodeGen/PBQP/HeuristicSolver.h
new file mode 100644
index 00000000000..35514f96747
--- /dev/null
+++ b/final/include/llvm/CodeGen/PBQP/HeuristicSolver.h
@@ -0,0 +1,616 @@
+//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Heuristic PBQP solver. This solver is able to perform optimal reductions for
+// nodes of degree 0, 1 or 2. For nodes of degree >2 a plugable heuristic is
+// used to select a node for reduction. 
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
+#define LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
+
+#include "Graph.h"
+#include "Solution.h"
+#include <vector>
+#include <limits>
+
+namespace PBQP {
+
+  /// \brief Heuristic PBQP solver implementation.
+  ///
+  /// This class should usually be created (and destroyed) indirectly via a call
+  /// to HeuristicSolver<HImpl>::solve(Graph&).
+  /// See the comments for HeuristicSolver.
+  ///
+  /// HeuristicSolverImpl provides the R0, R1 and R2 reduction rules,
+  /// backpropagation phase, and maintains the internal copy of the graph on
+  /// which the reduction is carried out (the original being kept to facilitate
+  /// backpropagation).
+  template <typename HImpl>
+  class HeuristicSolverImpl {
+  private:
+
+    typedef typename HImpl::NodeData HeuristicNodeData;
+    typedef typename HImpl::EdgeData HeuristicEdgeData;
+
+    typedef std::list<Graph::EdgeItr> SolverEdges;
+
+  public:
+  
+    /// \brief Iterator type for edges in the solver graph.
+    typedef SolverEdges::iterator SolverEdgeItr;
+
+  private:
+
+    class NodeData {
+    public:
+      NodeData() : solverDegree(0) {}
+
+      HeuristicNodeData& getHeuristicData() { return hData; }
+
+      SolverEdgeItr addSolverEdge(Graph::EdgeItr eItr) {
+        ++solverDegree;
+        return solverEdges.insert(solverEdges.end(), eItr);
+      }
+
+      void removeSolverEdge(SolverEdgeItr seItr) {
+        --solverDegree;
+        solverEdges.erase(seItr);
+      }
+
+      SolverEdgeItr solverEdgesBegin() { return solverEdges.begin(); }
+      SolverEdgeItr solverEdgesEnd() { return solverEdges.end(); }
+      unsigned getSolverDegree() const { return solverDegree; }
+      void clearSolverEdges() {
+        solverDegree = 0;
+        solverEdges.clear(); 
+      }
+      
+    private:
+      HeuristicNodeData hData;
+      unsigned solverDegree;
+      SolverEdges solverEdges;
+    };
+ 
+    class EdgeData {
+    public:
+      HeuristicEdgeData& getHeuristicData() { return hData; }
+
+      void setN1SolverEdgeItr(SolverEdgeItr n1SolverEdgeItr) {
+        this->n1SolverEdgeItr = n1SolverEdgeItr;
+      }
+
+      SolverEdgeItr getN1SolverEdgeItr() { return n1SolverEdgeItr; }
+
+      void setN2SolverEdgeItr(SolverEdgeItr n2SolverEdgeItr){
+        this->n2SolverEdgeItr = n2SolverEdgeItr;
+      }
+
+      SolverEdgeItr getN2SolverEdgeItr() { return n2SolverEdgeItr; }
+
+    private:
+
+      HeuristicEdgeData hData;
+      SolverEdgeItr n1SolverEdgeItr, n2SolverEdgeItr;
+    };
+
+    Graph &g;
+    HImpl h;
+    Solution s;
+    std::vector<Graph::NodeItr> stack;
+
+    typedef std::list<NodeData> NodeDataList;
+    NodeDataList nodeDataList;
+
+    typedef std::list<EdgeData> EdgeDataList;
+    EdgeDataList edgeDataList;
+
+  public:
+
+    /// \brief Construct a heuristic solver implementation to solve the given
+    ///        graph.
+    /// @param g The graph representing the problem instance to be solved.
+    HeuristicSolverImpl(Graph &g) : g(g), h(*this) {}  
+
+    /// \brief Get the graph being solved by this solver.
+    /// @return The graph representing the problem instance being solved by this
+    ///         solver.
+    Graph& getGraph() { return g; }
+
+    /// \brief Get the heuristic data attached to the given node.
+    /// @param nItr Node iterator.
+    /// @return The heuristic data attached to the given node.
+    HeuristicNodeData& getHeuristicNodeData(Graph::NodeItr nItr) {
+      return getSolverNodeData(nItr).getHeuristicData();
+    }
+
+    /// \brief Get the heuristic data attached to the given edge.
+    /// @param eItr Edge iterator.
+    /// @return The heuristic data attached to the given node.
+    HeuristicEdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) {
+      return getSolverEdgeData(eItr).getHeuristicData();
+    }
+
+    /// \brief Begin iterator for the set of edges adjacent to the given node in
+    ///        the solver graph.
+    /// @param nItr Node iterator.
+    /// @return Begin iterator for the set of edges adjacent to the given node
+    ///         in the solver graph. 
+    SolverEdgeItr solverEdgesBegin(Graph::NodeItr nItr) {
+      return getSolverNodeData(nItr).solverEdgesBegin();
+    }
+
+    /// \brief End iterator for the set of edges adjacent to the given node in
+    ///        the solver graph.
+    /// @param nItr Node iterator.
+    /// @return End iterator for the set of edges adjacent to the given node in
+    ///         the solver graph. 
+    SolverEdgeItr solverEdgesEnd(Graph::NodeItr nItr) {
+      return getSolverNodeData(nItr).solverEdgesEnd();
+    }
+
+    /// \brief Remove a node from the solver graph.
+    /// @param eItr Edge iterator for edge to be removed.
+    ///
+    /// Does <i>not</i> notify the heuristic of the removal. That should be
+    /// done manually if necessary.
+    void removeSolverEdge(Graph::EdgeItr eItr) {
+      EdgeData &eData = getSolverEdgeData(eItr);
+      NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)),
+               &n2Data = getSolverNodeData(g.getEdgeNode2(eItr));
+
+      n1Data.removeSolverEdge(eData.getN1SolverEdgeItr());
+      n2Data.removeSolverEdge(eData.getN2SolverEdgeItr());
+    }
+
+    /// \brief Compute a solution to the PBQP problem instance with which this
+    ///        heuristic solver was constructed.
+    /// @return A solution to the PBQP problem.
+    ///
+    /// Performs the full PBQP heuristic solver algorithm, including setup,
+    /// calls to the heuristic (which will call back to the reduction rules in
+    /// this class), and cleanup.
+    Solution computeSolution() {
+      setup();
+      h.setup();
+      h.reduce();
+      backpropagate();
+      h.cleanup();
+      cleanup();
+      return s;
+    }
+
+    /// \brief Add to the end of the stack.
+    /// @param nItr Node iterator to add to the reduction stack.
+    void pushToStack(Graph::NodeItr nItr) {
+      getSolverNodeData(nItr).clearSolverEdges();
+      stack.push_back(nItr);
+    }
+
+    /// \brief Returns the solver degree of the given node.
+    /// @param nItr Node iterator for which degree is requested.
+    /// @return Node degree in the <i>solver</i> graph (not the original graph).
+    unsigned getSolverDegree(Graph::NodeItr nItr) {
+      return  getSolverNodeData(nItr).getSolverDegree();
+    }
+
+    /// \brief Set the solution of the given node.
+    /// @param nItr Node iterator to set solution for.
+    /// @param selection Selection for node.
+    void setSolution(const Graph::NodeItr &nItr, unsigned selection) {
+      s.setSelection(nItr, selection);
+
+      for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr),
+                             aeEnd = g.adjEdgesEnd(nItr);
+           aeItr != aeEnd; ++aeItr) {
+        Graph::EdgeItr eItr(*aeItr);
+        Graph::NodeItr anItr(g.getEdgeOtherNode(eItr, nItr));
+        getSolverNodeData(anItr).addSolverEdge(eItr);
+      }
+    }
+
+    /// \brief Apply rule R0.
+    /// @param nItr Node iterator for node to apply R0 to.
+    ///
+    /// Node will be automatically pushed to the solver stack.
+    void applyR0(Graph::NodeItr nItr) {
+      assert(getSolverNodeData(nItr).getSolverDegree() == 0 &&
+             "R0 applied to node with degree != 0.");
+
+      // Nothing to do. Just push the node onto the reduction stack.
+      pushToStack(nItr);
+
+      s.recordR0();
+    }
+
+    /// \brief Apply rule R1.
+    /// @param xnItr Node iterator for node to apply R1 to.
+    ///
+    /// Node will be automatically pushed to the solver stack.
+    void applyR1(Graph::NodeItr xnItr) {
+      NodeData &nd = getSolverNodeData(xnItr);
+      assert(nd.getSolverDegree() == 1 &&
+             "R1 applied to node with degree != 1.");
+
+      Graph::EdgeItr eItr = *nd.solverEdgesBegin();
+
+      const Matrix &eCosts = g.getEdgeCosts(eItr);
+      const Vector &xCosts = g.getNodeCosts(xnItr);
+      
+      // Duplicate a little to avoid transposing matrices.
+      if (xnItr == g.getEdgeNode1(eItr)) {
+        Graph::NodeItr ynItr = g.getEdgeNode2(eItr);
+        Vector &yCosts = g.getNodeCosts(ynItr);
+        for (unsigned j = 0; j < yCosts.getLength(); ++j) {
+          PBQPNum min = eCosts[0][j] + xCosts[0];
+          for (unsigned i = 1; i < xCosts.getLength(); ++i) {
+            PBQPNum c = eCosts[i][j] + xCosts[i];
+            if (c < min)
+              min = c;
+          }
+          yCosts[j] += min;
+        }
+        h.handleRemoveEdge(eItr, ynItr);
+     } else {
+        Graph::NodeItr ynItr = g.getEdgeNode1(eItr);
+        Vector &yCosts = g.getNodeCosts(ynItr);
+        for (unsigned i = 0; i < yCosts.getLength(); ++i) {
+          PBQPNum min = eCosts[i][0] + xCosts[0];
+          for (unsigned j = 1; j < xCosts.getLength(); ++j) {
+            PBQPNum c = eCosts[i][j] + xCosts[j];
+            if (c < min)
+              min = c;
+          }
+          yCosts[i] += min;
+        }
+        h.handleRemoveEdge(eItr, ynItr);
+      }
+      removeSolverEdge(eItr);
+      assert(nd.getSolverDegree() == 0 &&
+             "Degree 1 with edge removed should be 0.");
+      pushToStack(xnItr);
+      s.recordR1();
+    }
+
+    /// \brief Apply rule R2.
+    /// @param xnItr Node iterator for node to apply R2 to.
+    ///
+    /// Node will be automatically pushed to the solver stack.
+    void applyR2(Graph::NodeItr xnItr) {
+      assert(getSolverNodeData(xnItr).getSolverDegree() == 2 &&
+             "R2 applied to node with degree != 2.");
+
+      NodeData &nd = getSolverNodeData(xnItr);
+      const Vector &xCosts = g.getNodeCosts(xnItr);
+
+      SolverEdgeItr aeItr = nd.solverEdgesBegin();
+      Graph::EdgeItr yxeItr = *aeItr,
+                     zxeItr = *(++aeItr);
+
+      Graph::NodeItr ynItr = g.getEdgeOtherNode(yxeItr, xnItr),
+                     znItr = g.getEdgeOtherNode(zxeItr, xnItr);
+
+      bool flipEdge1 = (g.getEdgeNode1(yxeItr) == xnItr),
+           flipEdge2 = (g.getEdgeNode1(zxeItr) == xnItr);
+
+      const Matrix *yxeCosts = flipEdge1 ?
+        new Matrix(g.getEdgeCosts(yxeItr).transpose()) :
+        &g.getEdgeCosts(yxeItr);
+
+      const Matrix *zxeCosts = flipEdge2 ?
+        new Matrix(g.getEdgeCosts(zxeItr).transpose()) :
+        &g.getEdgeCosts(zxeItr);
+
+      unsigned xLen = xCosts.getLength(),
+               yLen = yxeCosts->getRows(),
+               zLen = zxeCosts->getRows();
+               
+      Matrix delta(yLen, zLen);
+
+      for (unsigned i = 0; i < yLen; ++i) {
+        for (unsigned j = 0; j < zLen; ++j) {
+          PBQPNum min = (*yxeCosts)[i][0] + (*zxeCosts)[j][0] + xCosts[0];
+          for (unsigned k = 1; k < xLen; ++k) {
+            PBQPNum c = (*yxeCosts)[i][k] + (*zxeCosts)[j][k] + xCosts[k];
+            if (c < min) {
+              min = c;
+            }
+          }
+          delta[i][j] = min;
+        }
+      }
+
+      if (flipEdge1)
+        delete yxeCosts;
+
+      if (flipEdge2)
+        delete zxeCosts;
+
+      Graph::EdgeItr yzeItr = g.findEdge(ynItr, znItr);
+      bool addedEdge = false;
+
+      if (yzeItr == g.edgesEnd()) {
+        yzeItr = g.addEdge(ynItr, znItr, delta);
+        addedEdge = true;
+      } else {
+        Matrix &yzeCosts = g.getEdgeCosts(yzeItr);
+        h.preUpdateEdgeCosts(yzeItr);
+        if (ynItr == g.getEdgeNode1(yzeItr)) {
+          yzeCosts += delta;
+        } else {
+          yzeCosts += delta.transpose();
+        }
+      }
+
+      bool nullCostEdge = tryNormaliseEdgeMatrix(yzeItr);
+
+      if (!addedEdge) {
+        // If we modified the edge costs let the heuristic know.
+        h.postUpdateEdgeCosts(yzeItr);
+      }
+ 
+      if (nullCostEdge) {
+        // If this edge ended up null remove it.
+        if (!addedEdge) {
+          // We didn't just add it, so we need to notify the heuristic
+          // and remove it from the solver.
+          h.handleRemoveEdge(yzeItr, ynItr);
+          h.handleRemoveEdge(yzeItr, znItr);
+          removeSolverEdge(yzeItr);
+        }
+        g.removeEdge(yzeItr);
+      } else if (addedEdge) {
+        // If the edge was added, and non-null, finish setting it up, add it to
+        // the solver & notify heuristic.
+        edgeDataList.push_back(EdgeData());
+        g.setEdgeData(yzeItr, &edgeDataList.back());
+        addSolverEdge(yzeItr);
+        h.handleAddEdge(yzeItr);
+      }
+
+      h.handleRemoveEdge(yxeItr, ynItr);
+      removeSolverEdge(yxeItr);
+      h.handleRemoveEdge(zxeItr, znItr);
+      removeSolverEdge(zxeItr);
+
+      pushToStack(xnItr);
+      s.recordR2();
+    }
+
+    /// \brief Record an application of the RN rule.
+    ///
+    /// For use by the HeuristicBase.
+    void recordRN() { s.recordRN(); } 
+
+  private:
+
+    NodeData& getSolverNodeData(Graph::NodeItr nItr) {
+      return *static_cast<NodeData*>(g.getNodeData(nItr));
+    }
+
+    EdgeData& getSolverEdgeData(Graph::EdgeItr eItr) {
+      return *static_cast<EdgeData*>(g.getEdgeData(eItr));
+    }
+
+    void addSolverEdge(Graph::EdgeItr eItr) {
+      EdgeData &eData = getSolverEdgeData(eItr);
+      NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)),
+               &n2Data = getSolverNodeData(g.getEdgeNode2(eItr));
+
+      eData.setN1SolverEdgeItr(n1Data.addSolverEdge(eItr));
+      eData.setN2SolverEdgeItr(n2Data.addSolverEdge(eItr));
+    }
+
+    void setup() {
+      if (h.solverRunSimplify()) {
+        simplify();
+      }
+
+      // Create node data objects.
+      for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
+           nItr != nEnd; ++nItr) {
+        nodeDataList.push_back(NodeData());
+        g.setNodeData(nItr, &nodeDataList.back());
+      }
+
+      // Create edge data objects.
+      for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd();
+           eItr != eEnd; ++eItr) {
+        edgeDataList.push_back(EdgeData());
+        g.setEdgeData(eItr, &edgeDataList.back());
+        addSolverEdge(eItr);
+      }
+    }
+
+    void simplify() {
+      disconnectTrivialNodes();
+      eliminateIndependentEdges();
+    }
+
+    // Eliminate trivial nodes.
+    void disconnectTrivialNodes() {
+      unsigned numDisconnected = 0;
+
+      for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
+           nItr != nEnd; ++nItr) {
+
+        if (g.getNodeCosts(nItr).getLength() == 1) {
+
+          std::vector<Graph::EdgeItr> edgesToRemove;
+
+          for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr),
+                                 aeEnd = g.adjEdgesEnd(nItr);
+               aeItr != aeEnd; ++aeItr) {
+
+            Graph::EdgeItr eItr = *aeItr;
+
+            if (g.getEdgeNode1(eItr) == nItr) {
+              Graph::NodeItr otherNodeItr = g.getEdgeNode2(eItr);
+              g.getNodeCosts(otherNodeItr) +=
+                g.getEdgeCosts(eItr).getRowAsVector(0);
+            }
+            else {
+              Graph::NodeItr otherNodeItr = g.getEdgeNode1(eItr);
+              g.getNodeCosts(otherNodeItr) +=
+                g.getEdgeCosts(eItr).getColAsVector(0);
+            }
+
+            edgesToRemove.push_back(eItr);
+          }
+
+          if (!edgesToRemove.empty())
+            ++numDisconnected;
+
+          while (!edgesToRemove.empty()) {
+            g.removeEdge(edgesToRemove.back());
+            edgesToRemove.pop_back();
+          }
+        }
+      }
+    }
+
+    void eliminateIndependentEdges() {
+      std::vector<Graph::EdgeItr> edgesToProcess;
+      unsigned numEliminated = 0;
+
+      for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd();
+           eItr != eEnd; ++eItr) {
+        edgesToProcess.push_back(eItr);
+      }
+
+      while (!edgesToProcess.empty()) {
+        if (tryToEliminateEdge(edgesToProcess.back()))
+          ++numEliminated;
+        edgesToProcess.pop_back();
+      }
+    }
+
+    bool tryToEliminateEdge(Graph::EdgeItr eItr) {
+      if (tryNormaliseEdgeMatrix(eItr)) {
+        g.removeEdge(eItr);
+        return true; 
+      }
+      return false;
+    }
+
+    bool tryNormaliseEdgeMatrix(Graph::EdgeItr &eItr) {
+
+      const PBQPNum infinity = std::numeric_limits<PBQPNum>::infinity();
+
+      Matrix &edgeCosts = g.getEdgeCosts(eItr);
+      Vector &uCosts = g.getNodeCosts(g.getEdgeNode1(eItr)),
+             &vCosts = g.getNodeCosts(g.getEdgeNode2(eItr));
+
+      for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
+        PBQPNum rowMin = infinity;
+
+        for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
+          if (vCosts[c] != infinity && edgeCosts[r][c] < rowMin)
+            rowMin = edgeCosts[r][c];
+        }
+
+        uCosts[r] += rowMin;
+
+        if (rowMin != infinity) {
+          edgeCosts.subFromRow(r, rowMin);
+        }
+        else {
+          edgeCosts.setRow(r, 0);
+        }
+      }
+
+      for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
+        PBQPNum colMin = infinity;
+
+        for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
+          if (uCosts[r] != infinity && edgeCosts[r][c] < colMin)
+            colMin = edgeCosts[r][c];
+        }
+
+        vCosts[c] += colMin;
+
+        if (colMin != infinity) {
+          edgeCosts.subFromCol(c, colMin);
+        }
+        else {
+          edgeCosts.setCol(c, 0);
+        }
+      }
+
+      return edgeCosts.isZero();
+    }
+
+    void backpropagate() {
+      while (!stack.empty()) {
+        computeSolution(stack.back());
+        stack.pop_back();
+      }
+    }
+
+    void computeSolution(Graph::NodeItr nItr) {
+
+      NodeData &nodeData = getSolverNodeData(nItr);
+
+      Vector v(g.getNodeCosts(nItr));
+
+      // Solve based on existing solved edges.
+      for (SolverEdgeItr solvedEdgeItr = nodeData.solverEdgesBegin(),
+                         solvedEdgeEnd = nodeData.solverEdgesEnd();
+           solvedEdgeItr != solvedEdgeEnd; ++solvedEdgeItr) {
+
+        Graph::EdgeItr eItr(*solvedEdgeItr);
+        Matrix &edgeCosts = g.getEdgeCosts(eItr);
+
+        if (nItr == g.getEdgeNode1(eItr)) {
+          Graph::NodeItr adjNode(g.getEdgeNode2(eItr));
+          unsigned adjSolution = s.getSelection(adjNode);
+          v += edgeCosts.getColAsVector(adjSolution);
+        }
+        else {
+          Graph::NodeItr adjNode(g.getEdgeNode1(eItr));
+          unsigned adjSolution = s.getSelection(adjNode);
+          v += edgeCosts.getRowAsVector(adjSolution);
+        }
+
+      }
+
+      setSolution(nItr, v.minIndex());
+    }
+
+    void cleanup() {
+      h.cleanup();
+      nodeDataList.clear();
+      edgeDataList.clear();
+    }
+  };
+
+  /// \brief PBQP heuristic solver class.
+  ///
+  /// Given a PBQP Graph g representing a PBQP problem, you can find a solution
+  /// by calling
+  /// <tt>Solution s = HeuristicSolver<H>::solve(g);</tt>
+  ///
+  /// The choice of heuristic for the H parameter will affect both the solver
+  /// speed and solution quality. The heuristic should be chosen based on the
+  /// nature of the problem being solved.
+  /// Currently the only solver included with LLVM is the Briggs heuristic for
+  /// register allocation.
+  template <typename HImpl>
+  class HeuristicSolver {
+  public:
+    static Solution solve(Graph &g) {
+      HeuristicSolverImpl<HImpl> hs(g);
+      return hs.computeSolution();
+    }
+  };
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
diff --git a/final/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h b/final/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
new file mode 100644
index 00000000000..47a287ccf2f
--- /dev/null
+++ b/final/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h
@@ -0,0 +1,464 @@
+//===-- Briggs.h --- Briggs Heuristic for PBQP ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the Briggs test for "allocability" of nodes in a
+// PBQP graph representing a register allocation problem. Nodes which can be
+// proven allocable (by a safe and relatively accurate test) are removed from
+// the PBQP graph first. If no provably allocable node is present in the graph
+// then the node with the minimal spill-cost to degree ratio is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
+#define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
+
+#include "../HeuristicSolver.h"
+#include "../HeuristicBase.h"
+
+#include <set>
+#include <limits>
+
+namespace PBQP {
+  namespace Heuristics {
+
+    /// \brief PBQP Heuristic which applies an allocability test based on
+    ///        Briggs.
+    /// 
+    /// This heuristic assumes that the elements of cost vectors in the PBQP
+    /// problem represent storage options, with the first being the spill
+    /// option and subsequent elements representing legal registers for the
+    /// corresponding node. Edge cost matrices are likewise assumed to represent
+    /// register constraints.
+    /// If one or more nodes can be proven allocable by this heuristic (by
+    /// inspection of their constraint matrices) then the allocable node of
+    /// highest degree is selected for the next reduction and pushed to the
+    /// solver stack. If no nodes can be proven allocable then the node with
+    /// the lowest estimated spill cost is selected and push to the solver stack
+    /// instead.
+    /// 
+    /// This implementation is built on top of HeuristicBase.       
+    class Briggs : public HeuristicBase<Briggs> {
+    private:
+
+      class LinkDegreeComparator {
+      public:
+        LinkDegreeComparator(HeuristicSolverImpl<Briggs> &s) : s(&s) {}
+        bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
+          if (s->getSolverDegree(n1Itr) > s->getSolverDegree(n2Itr))
+            return true;
+          return false;
+        }
+      private:
+        HeuristicSolverImpl<Briggs> *s;
+      };
+
+      class SpillCostComparator {
+      public:
+        SpillCostComparator(HeuristicSolverImpl<Briggs> &s)
+          : s(&s), g(&s.getGraph()) {}
+        bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
+          const PBQP::Vector &cv1 = g->getNodeCosts(n1Itr);
+          const PBQP::Vector &cv2 = g->getNodeCosts(n2Itr);
+
+          PBQPNum cost1 = cv1[0] / s->getSolverDegree(n1Itr);
+          PBQPNum cost2 = cv2[0] / s->getSolverDegree(n2Itr);
+
+          if (cost1 < cost2)
+            return true;
+          return false;
+        }
+
+      private:
+        HeuristicSolverImpl<Briggs> *s;
+        Graph *g;
+      };
+
+      typedef std::list<Graph::NodeItr> RNAllocableList;
+      typedef RNAllocableList::iterator RNAllocableListItr;
+
+      typedef std::list<Graph::NodeItr> RNUnallocableList;  
+      typedef RNUnallocableList::iterator RNUnallocableListItr;
+
+    public:
+
+      struct NodeData {
+        typedef std::vector<unsigned> UnsafeDegreesArray;
+        bool isHeuristic, isAllocable, isInitialized;
+        unsigned numDenied, numSafe;
+        UnsafeDegreesArray unsafeDegrees;
+        RNAllocableListItr rnaItr;
+        RNUnallocableListItr rnuItr;
+
+        NodeData()
+          : isHeuristic(false), isAllocable(false), isInitialized(false),
+            numDenied(0), numSafe(0) { }
+      };
+
+      struct EdgeData {
+        typedef std::vector<unsigned> UnsafeArray;
+        unsigned worst, reverseWorst;
+        UnsafeArray unsafe, reverseUnsafe;
+        bool isUpToDate;
+
+        EdgeData() : worst(0), reverseWorst(0), isUpToDate(false) {}
+      };
+
+      /// \brief Construct an instance of the Briggs heuristic.
+      /// @param solver A reference to the solver which is using this heuristic.
+      Briggs(HeuristicSolverImpl<Briggs> &solver) :
+        HeuristicBase<Briggs>(solver) {}
+
+      /// \brief Determine whether a node should be reduced using optimal
+      ///        reduction.
+      /// @param nItr Node iterator to be considered.
+      /// @return True if the given node should be optimally reduced, false
+      ///         otherwise.
+      ///
+      /// Selects nodes of degree 0, 1 or 2 for optimal reduction, with one
+      /// exception. Nodes whose spill cost (element 0 of their cost vector) is
+      /// infinite are checked for allocability first. Allocable nodes may be
+      /// optimally reduced, but nodes whose allocability cannot be proven are
+      /// selected for heuristic reduction instead.
+      bool shouldOptimallyReduce(Graph::NodeItr nItr) {
+        if (getSolver().getSolverDegree(nItr) < 3) {
+          return true;
+        }
+        // else
+        return false;
+      }
+
+      /// \brief Add a node to the heuristic reduce list.
+      /// @param nItr Node iterator to add to the heuristic reduce list.
+      void addToHeuristicReduceList(Graph::NodeItr nItr) {
+        NodeData &nd = getHeuristicNodeData(nItr);
+        initializeNode(nItr);
+        nd.isHeuristic = true;
+        if (nd.isAllocable) {
+          nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr);
+        } else {
+          nd.rnuItr = rnUnallocableList.insert(rnUnallocableList.end(), nItr);
+        }
+      }
+
+      /// \brief Heuristically reduce one of the nodes in the heuristic
+      ///        reduce list.
+      /// @return True if a reduction takes place, false if the heuristic reduce
+      ///         list is empty.
+      ///
+      /// If the list of allocable nodes is non-empty a node is selected
+      /// from it and pushed to the stack. Otherwise if the non-allocable list
+      /// is non-empty a node is selected from it and pushed to the stack.
+      /// If both lists are empty the method simply returns false with no action
+      /// taken.
+      bool heuristicReduce() {
+        if (!rnAllocableList.empty()) {
+          RNAllocableListItr rnaItr =
+            min_element(rnAllocableList.begin(), rnAllocableList.end(),
+                        LinkDegreeComparator(getSolver()));
+          Graph::NodeItr nItr = *rnaItr;
+          rnAllocableList.erase(rnaItr);
+          handleRemoveNode(nItr);
+          getSolver().pushToStack(nItr);
+          return true;
+        } else if (!rnUnallocableList.empty()) {
+          RNUnallocableListItr rnuItr =
+            min_element(rnUnallocableList.begin(), rnUnallocableList.end(),
+                        SpillCostComparator(getSolver()));
+          Graph::NodeItr nItr = *rnuItr;
+          rnUnallocableList.erase(rnuItr);
+          handleRemoveNode(nItr);
+          getSolver().pushToStack(nItr);
+          return true;
+        }
+        // else
+        return false;
+      }
+
+      /// \brief Prepare a change in the costs on the given edge.
+      /// @param eItr Edge iterator.    
+      void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
+        Graph &g = getGraph();
+        Graph::NodeItr n1Itr = g.getEdgeNode1(eItr),
+                       n2Itr = g.getEdgeNode2(eItr);
+        NodeData &n1 = getHeuristicNodeData(n1Itr),
+                 &n2 = getHeuristicNodeData(n2Itr);
+
+        if (n1.isHeuristic)
+          subtractEdgeContributions(eItr, getGraph().getEdgeNode1(eItr));
+        if (n2.isHeuristic)
+          subtractEdgeContributions(eItr, getGraph().getEdgeNode2(eItr));
+
+        EdgeData &ed = getHeuristicEdgeData(eItr);
+        ed.isUpToDate = false;
+      }
+
+      /// \brief Handle the change in the costs on the given edge.
+      /// @param eItr Edge iterator.
+      void postUpdateEdgeCosts(Graph::EdgeItr eItr) {
+        // This is effectively the same as adding a new edge now, since
+        // we've factored out the costs of the old one.
+        handleAddEdge(eItr);
+      }
+
+      /// \brief Handle the addition of a new edge into the PBQP graph.
+      /// @param eItr Edge iterator for the added edge.
+      ///
+      /// Updates allocability of any nodes connected by this edge which are
+      /// being managed by the heuristic. If allocability changes they are
+      /// moved to the appropriate list.
+      void handleAddEdge(Graph::EdgeItr eItr) {
+        Graph &g = getGraph();
+        Graph::NodeItr n1Itr = g.getEdgeNode1(eItr),
+                       n2Itr = g.getEdgeNode2(eItr);
+        NodeData &n1 = getHeuristicNodeData(n1Itr),
+                 &n2 = getHeuristicNodeData(n2Itr);
+
+        // If neither node is managed by the heuristic there's nothing to be
+        // done.
+        if (!n1.isHeuristic && !n2.isHeuristic)
+          return;
+
+        // Ok - we need to update at least one node.
+        computeEdgeContributions(eItr);
+
+        // Update node 1 if it's managed by the heuristic.
+        if (n1.isHeuristic) {
+          bool n1WasAllocable = n1.isAllocable;
+          addEdgeContributions(eItr, n1Itr);
+          updateAllocability(n1Itr);
+          if (n1WasAllocable && !n1.isAllocable) {
+            rnAllocableList.erase(n1.rnaItr);
+            n1.rnuItr =
+              rnUnallocableList.insert(rnUnallocableList.end(), n1Itr);
+          }
+        }
+
+        // Likewise for node 2.
+        if (n2.isHeuristic) {
+          bool n2WasAllocable = n2.isAllocable;
+          addEdgeContributions(eItr, n2Itr);
+          updateAllocability(n2Itr);
+          if (n2WasAllocable && !n2.isAllocable) {
+            rnAllocableList.erase(n2.rnaItr);
+            n2.rnuItr =
+              rnUnallocableList.insert(rnUnallocableList.end(), n2Itr);
+          }
+        }
+      }
+
+      /// \brief Handle disconnection of an edge from a node.
+      /// @param eItr Edge iterator for edge being disconnected.
+      /// @param nItr Node iterator for the node being disconnected from.
+      ///
+      /// Updates allocability of the given node and, if appropriate, moves the
+      /// node to a new list.
+      void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
+        NodeData &nd = getHeuristicNodeData(nItr);
+
+        // If the node is not managed by the heuristic there's nothing to be
+        // done.
+        if (!nd.isHeuristic)
+          return;
+
+        EdgeData &ed = getHeuristicEdgeData(eItr);
+        (void)ed;
+        assert(ed.isUpToDate && "Edge data is not up to date.");
+
+        // Update node.
+        bool ndWasAllocable = nd.isAllocable;
+        subtractEdgeContributions(eItr, nItr);
+        updateAllocability(nItr);
+
+        // If the node has gone optimal...
+        if (shouldOptimallyReduce(nItr)) {
+          nd.isHeuristic = false;
+          addToOptimalReduceList(nItr);
+          if (ndWasAllocable) {
+            rnAllocableList.erase(nd.rnaItr);
+          } else {
+            rnUnallocableList.erase(nd.rnuItr);
+          }
+        } else {
+          // Node didn't go optimal, but we might have to move it
+          // from "unallocable" to "allocable".
+          if (!ndWasAllocable && nd.isAllocable) {
+            rnUnallocableList.erase(nd.rnuItr);
+            nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr);
+          }
+        }
+      }
+
+    private:
+
+      NodeData& getHeuristicNodeData(Graph::NodeItr nItr) {
+        return getSolver().getHeuristicNodeData(nItr);
+      }
+
+      EdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) {
+        return getSolver().getHeuristicEdgeData(eItr);
+      }
+
+      // Work out what this edge will contribute to the allocability of the
+      // nodes connected to it.
+      void computeEdgeContributions(Graph::EdgeItr eItr) {
+        EdgeData &ed = getHeuristicEdgeData(eItr);
+
+        if (ed.isUpToDate)
+          return; // Edge data is already up to date.
+
+        Matrix &eCosts = getGraph().getEdgeCosts(eItr);
+
+        unsigned numRegs = eCosts.getRows() - 1,
+                 numReverseRegs = eCosts.getCols() - 1;
+
+        std::vector<unsigned> rowInfCounts(numRegs, 0),
+                              colInfCounts(numReverseRegs, 0);        
+
+        ed.worst = 0;
+        ed.reverseWorst = 0;
+        ed.unsafe.clear();
+        ed.unsafe.resize(numRegs, 0);
+        ed.reverseUnsafe.clear();
+        ed.reverseUnsafe.resize(numReverseRegs, 0);
+
+        for (unsigned i = 0; i < numRegs; ++i) {
+          for (unsigned j = 0; j < numReverseRegs; ++j) {
+            if (eCosts[i + 1][j + 1] ==
+                  std::numeric_limits<PBQPNum>::infinity()) {
+              ed.unsafe[i] = 1;
+              ed.reverseUnsafe[j] = 1;
+              ++rowInfCounts[i];
+              ++colInfCounts[j];
+
+              if (colInfCounts[j] > ed.worst) {
+                ed.worst = colInfCounts[j];
+              }
+
+              if (rowInfCounts[i] > ed.reverseWorst) {
+                ed.reverseWorst = rowInfCounts[i];
+              }
+            }
+          }
+        }
+
+        ed.isUpToDate = true;
+      }
+
+      // Add the contributions of the given edge to the given node's 
+      // numDenied and safe members. No action is taken other than to update
+      // these member values. Once updated these numbers can be used by clients
+      // to update the node's allocability.
+      void addEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
+        EdgeData &ed = getHeuristicEdgeData(eItr);
+
+        assert(ed.isUpToDate && "Using out-of-date edge numbers.");
+
+        NodeData &nd = getHeuristicNodeData(nItr);
+        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
+        
+        bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr);
+        EdgeData::UnsafeArray &unsafe =
+          nIsNode1 ? ed.unsafe : ed.reverseUnsafe;
+        nd.numDenied += nIsNode1 ? ed.worst : ed.reverseWorst;
+
+        for (unsigned r = 0; r < numRegs; ++r) {
+          if (unsafe[r]) {
+            if (nd.unsafeDegrees[r]==0) {
+              --nd.numSafe;
+            }
+            ++nd.unsafeDegrees[r];
+          }
+        }
+      }
+
+      // Subtract the contributions of the given edge to the given node's 
+      // numDenied and safe members. No action is taken other than to update
+      // these member values. Once updated these numbers can be used by clients
+      // to update the node's allocability.
+      void subtractEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
+        EdgeData &ed = getHeuristicEdgeData(eItr);
+
+        assert(ed.isUpToDate && "Using out-of-date edge numbers.");
+
+        NodeData &nd = getHeuristicNodeData(nItr);
+        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
+        
+        bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr);
+        EdgeData::UnsafeArray &unsafe =
+          nIsNode1 ? ed.unsafe : ed.reverseUnsafe;
+        nd.numDenied -= nIsNode1 ? ed.worst : ed.reverseWorst;
+
+        for (unsigned r = 0; r < numRegs; ++r) {
+          if (unsafe[r]) { 
+            if (nd.unsafeDegrees[r] == 1) {
+              ++nd.numSafe;
+            }
+            --nd.unsafeDegrees[r];
+          }
+        }
+      }
+
+      void updateAllocability(Graph::NodeItr nItr) {
+        NodeData &nd = getHeuristicNodeData(nItr);
+        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
+        nd.isAllocable = nd.numDenied < numRegs || nd.numSafe > 0;
+      }
+
+      void initializeNode(Graph::NodeItr nItr) {
+        NodeData &nd = getHeuristicNodeData(nItr);
+
+        if (nd.isInitialized)
+          return; // Node data is already up to date.
+
+        unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
+
+        nd.numDenied = 0;
+        nd.numSafe = numRegs;
+        nd.unsafeDegrees.resize(numRegs, 0);
+
+        typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr;
+
+        for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(nItr),
+                           aeEnd = getSolver().solverEdgesEnd(nItr);
+             aeItr != aeEnd; ++aeItr) {
+          
+          Graph::EdgeItr eItr = *aeItr;
+          computeEdgeContributions(eItr);
+          addEdgeContributions(eItr, nItr);
+        }
+
+        updateAllocability(nItr);
+        nd.isInitialized = true;
+      }
+
+      void handleRemoveNode(Graph::NodeItr xnItr) {
+        typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr;
+        std::vector<Graph::EdgeItr> edgesToRemove;
+        for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(xnItr),
+                           aeEnd = getSolver().solverEdgesEnd(xnItr);
+             aeItr != aeEnd; ++aeItr) {
+          Graph::NodeItr ynItr = getGraph().getEdgeOtherNode(*aeItr, xnItr);
+          handleRemoveEdge(*aeItr, ynItr);
+          edgesToRemove.push_back(*aeItr);
+        }
+        while (!edgesToRemove.empty()) {
+          getSolver().removeSolverEdge(edgesToRemove.back());
+          edgesToRemove.pop_back();
+        }
+      }
+
+      RNAllocableList rnAllocableList;
+      RNUnallocableList rnUnallocableList;
+    };
+
+  }
+}
+
+
+#endif // LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
diff --git a/final/include/llvm/CodeGen/PBQP/Math.h b/final/include/llvm/CodeGen/PBQP/Math.h
new file mode 100644
index 00000000000..e7598bf3e3f
--- /dev/null
+++ b/final/include/llvm/CodeGen/PBQP/Math.h
@@ -0,0 +1,288 @@
+//===------ Math.h - PBQP Vector and Matrix classes -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_MATH_H 
+#define LLVM_CODEGEN_PBQP_MATH_H
+
+#include <cassert>
+#include <algorithm>
+#include <functional>
+
+namespace PBQP {
+
+typedef float PBQPNum;
+
+/// \brief PBQP Vector class.
+class Vector {
+  public:
+
+    /// \brief Construct a PBQP vector of the given size.
+    explicit Vector(unsigned length) :
+      length(length), data(new PBQPNum[length]) {
+      }
+
+    /// \brief Construct a PBQP vector with initializer.
+    Vector(unsigned length, PBQPNum initVal) :
+      length(length), data(new PBQPNum[length]) {
+        std::fill(data, data + length, initVal);
+      }
+
+    /// \brief Copy construct a PBQP vector.
+    Vector(const Vector &v) :
+      length(v.length), data(new PBQPNum[length]) {
+        std::copy(v.data, v.data + length, data);
+      }
+
+    /// \brief Destroy this vector, return its memory.
+    ~Vector() { delete[] data; }
+
+    /// \brief Assignment operator.
+    Vector& operator=(const Vector &v) {
+      delete[] data;
+      length = v.length;
+      data = new PBQPNum[length];
+      std::copy(v.data, v.data + length, data);
+      return *this;
+    }
+
+    /// \brief Return the length of the vector
+    unsigned getLength() const {
+      return length;
+    }
+
+    /// \brief Element access.
+    PBQPNum& operator[](unsigned index) {
+      assert(index < length && "Vector element access out of bounds.");
+      return data[index];
+    }
+
+    /// \brief Const element access.
+    const PBQPNum& operator[](unsigned index) const {
+      assert(index < length && "Vector element access out of bounds.");
+      return data[index];
+    }
+
+    /// \brief Add another vector to this one.
+    Vector& operator+=(const Vector &v) {
+      assert(length == v.length && "Vector length mismatch.");
+      std::transform(data, data + length, v.data, data, std::plus<PBQPNum>()); 
+      return *this;
+    }
+
+    /// \brief Subtract another vector from this one.
+    Vector& operator-=(const Vector &v) {
+      assert(length == v.length && "Vector length mismatch.");
+      std::transform(data, data + length, v.data, data, std::minus<PBQPNum>()); 
+      return *this;
+    }
+
+    /// \brief Returns the index of the minimum value in this vector
+    unsigned minIndex() const {
+      return std::min_element(data, data + length) - data;
+    }
+
+  private:
+    unsigned length;
+    PBQPNum *data;
+};
+
+/// \brief Output a textual representation of the given vector on the given
+///        output stream.
+template <typename OStream>
+OStream& operator<<(OStream &os, const Vector &v) {
+  assert((v.getLength() != 0) && "Zero-length vector badness.");
+
+  os << "[ " << v[0];
+  for (unsigned i = 1; i < v.getLength(); ++i) {
+    os << ", " << v[i];
+  }
+  os << " ]";
+
+  return os;
+} 
+
+
+/// \brief PBQP Matrix class
+class Matrix {
+  public:
+
+    /// \brief Construct a PBQP Matrix with the given dimensions.
+    Matrix(unsigned rows, unsigned cols) :
+      rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
+    }
+
+    /// \brief Construct a PBQP Matrix with the given dimensions and initial
+    /// value.
+    Matrix(unsigned rows, unsigned cols, PBQPNum initVal) :
+      rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
+        std::fill(data, data + (rows * cols), initVal);
+    }
+
+    /// \brief Copy construct a PBQP matrix.
+    Matrix(const Matrix &m) :
+      rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) {
+        std::copy(m.data, m.data + (rows * cols), data);  
+    }
+
+    /// \brief Destroy this matrix, return its memory.
+    ~Matrix() { delete[] data; }
+
+    /// \brief Assignment operator.
+    Matrix& operator=(const Matrix &m) {
+      delete[] data;
+      rows = m.rows; cols = m.cols;
+      data = new PBQPNum[rows * cols];
+      std::copy(m.data, m.data + (rows * cols), data);
+      return *this;
+    }
+
+    /// \brief Return the number of rows in this matrix.
+    unsigned getRows() const { return rows; }
+
+    /// \brief Return the number of cols in this matrix.
+    unsigned getCols() const { return cols; }
+
+    /// \brief Matrix element access.
+    PBQPNum* operator[](unsigned r) {
+      assert(r < rows && "Row out of bounds.");
+      return data + (r * cols);
+    }
+
+    /// \brief Matrix element access.
+    const PBQPNum* operator[](unsigned r) const {
+      assert(r < rows && "Row out of bounds.");
+      return data + (r * cols);
+    }
+
+    /// \brief Returns the given row as a vector.
+    Vector getRowAsVector(unsigned r) const {
+      Vector v(cols);
+      for (unsigned c = 0; c < cols; ++c)
+        v[c] = (*this)[r][c];
+      return v; 
+    }
+
+    /// \brief Returns the given column as a vector.
+    Vector getColAsVector(unsigned c) const {
+      Vector v(rows);
+      for (unsigned r = 0; r < rows; ++r)
+        v[r] = (*this)[r][c];
+      return v;
+    }
+
+    /// \brief Reset the matrix to the given value.
+    Matrix& reset(PBQPNum val = 0) {
+      std::fill(data, data + (rows * cols), val);
+      return *this;
+    }
+
+    /// \brief Set a single row of this matrix to the given value.
+    Matrix& setRow(unsigned r, PBQPNum val) {
+      assert(r < rows && "Row out of bounds.");
+      std::fill(data + (r * cols), data + ((r + 1) * cols), val);
+      return *this;
+    }
+
+    /// \brief Set a single column of this matrix to the given value.
+    Matrix& setCol(unsigned c, PBQPNum val) {
+      assert(c < cols && "Column out of bounds.");
+      for (unsigned r = 0; r < rows; ++r)
+        (*this)[r][c] = val;
+      return *this;
+    }
+
+    /// \brief Matrix transpose.
+    Matrix transpose() const {
+      Matrix m(cols, rows);
+      for (unsigned r = 0; r < rows; ++r)
+        for (unsigned c = 0; c < cols; ++c)
+          m[c][r] = (*this)[r][c];
+      return m;
+    }
+
+    /// \brief Returns the diagonal of the matrix as a vector.
+    ///
+    /// Matrix must be square.
+    Vector diagonalize() const {
+      assert(rows == cols && "Attempt to diagonalize non-square matrix.");
+
+      Vector v(rows);
+      for (unsigned r = 0; r < rows; ++r)
+        v[r] = (*this)[r][r];
+      return v;
+    } 
+
+    /// \brief Add the given matrix to this one.
+    Matrix& operator+=(const Matrix &m) {
+      assert(rows == m.rows && cols == m.cols &&
+          "Matrix dimensions mismatch.");
+      std::transform(data, data + (rows * cols), m.data, data,
+          std::plus<PBQPNum>());
+      return *this;
+    }
+
+    /// \brief Returns the minimum of the given row
+    PBQPNum getRowMin(unsigned r) const {
+      assert(r < rows && "Row out of bounds");
+      return *std::min_element(data + (r * cols), data + ((r + 1) * cols));
+    }
+
+    /// \brief Returns the minimum of the given column
+    PBQPNum getColMin(unsigned c) const {
+      PBQPNum minElem = (*this)[0][c];
+      for (unsigned r = 1; r < rows; ++r)
+        if ((*this)[r][c] < minElem) minElem = (*this)[r][c];
+      return minElem;
+    }
+
+    /// \brief Subtracts the given scalar from the elements of the given row.
+    Matrix& subFromRow(unsigned r, PBQPNum val) {
+      assert(r < rows && "Row out of bounds");
+      std::transform(data + (r * cols), data + ((r + 1) * cols),
+          data + (r * cols),
+          std::bind2nd(std::minus<PBQPNum>(), val));
+      return *this;
+    }
+
+    /// \brief Subtracts the given scalar from the elements of the given column.
+    Matrix& subFromCol(unsigned c, PBQPNum val) {
+      for (unsigned r = 0; r < rows; ++r)
+        (*this)[r][c] -= val;
+      return *this;
+    }
+
+    /// \brief Returns true if this is a zero matrix.
+    bool isZero() const {
+      return find_if(data, data + (rows * cols),
+          std::bind2nd(std::not_equal_to<PBQPNum>(), 0)) ==
+        data + (rows * cols);
+    }
+
+  private:
+    unsigned rows, cols;
+    PBQPNum *data;
+};
+
+/// \brief Output a textual representation of the given matrix on the given
+///        output stream.
+template <typename OStream>
+OStream& operator<<(OStream &os, const Matrix &m) {
+
+  assert((m.getRows() != 0) && "Zero-row matrix badness.");
+
+  for (unsigned i = 0; i < m.getRows(); ++i) {
+    os << m.getRowAsVector(i);
+  }
+
+  return os;
+}
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_MATH_H
diff --git a/final/include/llvm/CodeGen/PBQP/Solution.h b/final/include/llvm/CodeGen/PBQP/Solution.h
new file mode 100644
index 00000000000..57d9b95fc3b
--- /dev/null
+++ b/final/include/llvm/CodeGen/PBQP/Solution.h
@@ -0,0 +1,94 @@
+//===-- Solution.h ------- PBQP Solution ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PBQP Solution class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PBQP_SOLUTION_H
+#define LLVM_CODEGEN_PBQP_SOLUTION_H
+
+#include "Math.h"
+#include "Graph.h"
+
+#include <map>
+
+namespace PBQP {
+
+  /// \brief Represents a solution to a PBQP problem.
+  ///
+  /// To get the selection for each node in the problem use the getSelection method.
+  class Solution {
+  private:
+
+    typedef std::map<Graph::ConstNodeItr, unsigned,
+                     NodeItrComparator> SelectionsMap;
+    SelectionsMap selections;
+
+    unsigned r0Reductions, r1Reductions, r2Reductions, rNReductions;
+
+  public:
+
+    /// \brief Initialise an empty solution.
+    Solution()
+      : r0Reductions(0), r1Reductions(0), r2Reductions(0), rNReductions(0) {}
+
+    /// \brief Number of nodes for which selections have been made.
+    /// @return Number of nodes for which selections have been made.
+    unsigned numNodes() const { return selections.size(); }
+
+    /// \brief Records a reduction via the R0 rule. Should be called from the
+    ///        solver only.
+    void recordR0() { ++r0Reductions; }
+
+    /// \brief Returns the number of R0 reductions applied to solve the problem.
+    unsigned numR0Reductions() const { return r0Reductions; }
+
+    /// \brief Records a reduction via the R1 rule. Should be called from the
+    ///        solver only.
+    void recordR1() { ++r1Reductions; }
+
+    /// \brief Returns the number of R1 reductions applied to solve the problem.
+    unsigned numR1Reductions() const { return r1Reductions; }
+
+    /// \brief Records a reduction via the R2 rule. Should be called from the
+    ///        solver only.
+    void recordR2() { ++r2Reductions; }
+
+    /// \brief Returns the number of R2 reductions applied to solve the problem.
+    unsigned numR2Reductions() const { return r2Reductions; }
+
+    /// \brief Records a reduction via the RN rule. Should be called from the
+    ///        solver only.
+    void recordRN() { ++ rNReductions; }
+
+    /// \brief Returns the number of RN reductions applied to solve the problem.
+    unsigned numRNReductions() const { return rNReductions; }
+
+    /// \brief Set the selection for a given node.
+    /// @param nItr Node iterator.
+    /// @param selection Selection for nItr.
+    void setSelection(Graph::NodeItr nItr, unsigned selection) {
+      selections[nItr] = selection;
+    }
+
+    /// \brief Get a node's selection.
+    /// @param nItr Node iterator.
+    /// @return The selection for nItr;
+    unsigned getSelection(Graph::ConstNodeItr nItr) const {
+      SelectionsMap::const_iterator sItr = selections.find(nItr);
+      assert(sItr != selections.end() && "No selection for node.");
+      return sItr->second;
+    }
+
+  };
+
+}
+
+#endif // LLVM_CODEGEN_PBQP_SOLUTION_H
diff --git a/final/include/llvm/CodeGen/Passes.h b/final/include/llvm/CodeGen/Passes.h
new file mode 100644
index 00000000000..53aee7a9c9f
--- /dev/null
+++ b/final/include/llvm/CodeGen/Passes.h
@@ -0,0 +1,241 @@
+//===-- Passes.h - Target independent code generation passes ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces to access the target independent code generation
+// passes provided by the LLVM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PASSES_H
+#define LLVM_CODEGEN_PASSES_H
+
+#include "llvm/Target/TargetMachine.h"
+#include <string>
+
+namespace llvm {
+
+  class FunctionPass;
+  class MachineFunctionPass;
+  class PassInfo;
+  class TargetLowering;
+  class RegisterCoalescer;
+  class raw_ostream;
+
+  /// createUnreachableBlockEliminationPass - The LLVM code generator does not
+  /// work well with unreachable basic blocks (what live ranges make sense for a
+  /// block that cannot be reached?).  As such, a code generator should either
+  /// not instruction select unreachable blocks, or run this pass as its
+  /// last LLVM modifying pass to clean up blocks that are not reachable from
+  /// the entry block.
+  FunctionPass *createUnreachableBlockEliminationPass();
+
+  /// MachineFunctionPrinter pass - This pass prints out the machine function to
+  /// the given stream as a debugging tool.
+  MachineFunctionPass *
+  createMachineFunctionPrinterPass(raw_ostream &OS,
+                                   const std::string &Banner ="");
+
+  /// MachineLoopInfo pass - This pass is a loop analysis pass.
+  ///
+  extern char &MachineLoopInfoID;
+
+  /// MachineLoopRanges pass - This pass is an on-demand loop coverage
+  /// analysis pass.
+  ///
+  extern char &MachineLoopRangesID;
+
+  /// MachineDominators pass - This pass is a machine dominators analysis pass.
+  ///
+  extern char &MachineDominatorsID;
+
+  /// EdgeBundles analysis - Bundle machine CFG edges.
+  ///
+  extern char &EdgeBundlesID;
+
+  /// PHIElimination pass - This pass eliminates machine instruction PHI nodes
+  /// by inserting copy instructions.  This destroys SSA information, but is the
+  /// desired input for some register allocators.  This pass is "required" by
+  /// these register allocator like this: AU.addRequiredID(PHIEliminationID);
+  ///
+  extern char &PHIEliminationID;
+
+  /// StrongPHIElimination pass - This pass eliminates machine instruction PHI
+  /// nodes by inserting copy instructions.  This destroys SSA information, but
+  /// is the desired input for some register allocators.  This pass is
+  /// "required" by these register allocator like this:
+  ///    AU.addRequiredID(PHIEliminationID);
+  ///  This pass is still in development
+  extern char &StrongPHIEliminationID;
+
+  extern char &PreAllocSplittingID;
+
+  /// LiveStacks pass. An analysis keeping track of the liveness of stack slots.
+  extern char &LiveStacksID;
+
+  /// SimpleRegisterCoalescing pass.  Aggressively coalesces every register
+  /// copy it can.
+  ///
+  extern char &SimpleRegisterCoalescingID;
+
+  /// TwoAddressInstruction pass - This pass reduces two-address instructions to
+  /// use two operands. This destroys SSA information but it is desired by
+  /// register allocators.
+  extern char &TwoAddressInstructionPassID;
+
+  /// SpillPlacement analysis. Suggest optimal placement of spill code between
+  /// basic blocks.
+  ///
+  extern char &SpillPlacementID;
+
+  /// UnreachableMachineBlockElimination pass - This pass removes unreachable
+  /// machine basic blocks.
+  extern char &UnreachableMachineBlockElimID;
+
+  /// DeadMachineInstructionElim pass - This pass removes dead machine
+  /// instructions.
+  ///
+  FunctionPass *createDeadMachineInstructionElimPass();
+
+  /// Creates a register allocator as the user specified on the command line, or
+  /// picks one that matches OptLevel.
+  ///
+  FunctionPass *createRegisterAllocator(CodeGenOpt::Level OptLevel);
+
+  /// FastRegisterAllocation Pass - This pass register allocates as fast as
+  /// possible. It is best suited for debug code where live ranges are short.
+  ///
+  FunctionPass *createFastRegisterAllocator();
+
+  /// BasicRegisterAllocation Pass - This pass implements a degenerate global
+  /// register allocator using the basic regalloc framework.
+  ///
+  FunctionPass *createBasicRegisterAllocator();
+
+  /// Greedy register allocation pass - This pass implements a global register
+  /// allocator for optimized builds.
+  ///
+  FunctionPass *createGreedyRegisterAllocator();
+
+  /// LinearScanRegisterAllocation Pass - This pass implements the linear scan
+  /// register allocation algorithm, a global register allocator.
+  ///
+  FunctionPass *createLinearScanRegisterAllocator();
+
+  /// PBQPRegisterAllocation Pass - This pass implements the Partitioned Boolean
+  /// Quadratic Prograaming (PBQP) based register allocator.
+  ///
+  FunctionPass *createDefaultPBQPRegisterAllocator();
+
+  /// SimpleRegisterCoalescing Pass - Coalesce all copies possible.  Can run
+  /// independently of the register allocator.
+  ///
+  RegisterCoalescer *createSimpleRegisterCoalescer();
+
+  /// PrologEpilogCodeInserter Pass - This pass inserts prolog and epilog code,
+  /// and eliminates abstract frame references.
+  ///
+  FunctionPass *createPrologEpilogCodeInserter();
+
+  /// LowerSubregs Pass - This pass lowers subregs to register-register copies
+  /// which yields suboptimal, but correct code if the register allocator
+  /// cannot coalesce all subreg operations during allocation.
+  ///
+  FunctionPass *createLowerSubregsPass();
+
+  /// createPostRAScheduler - This pass performs post register allocation
+  /// scheduling.
+  FunctionPass *createPostRAScheduler(CodeGenOpt::Level OptLevel);
+
+  /// BranchFolding Pass - This pass performs machine code CFG based
+  /// optimizations to delete branches to branches, eliminate branches to
+  /// successor blocks (creating fall throughs), and eliminating branches over
+  /// branches.
+  FunctionPass *createBranchFoldingPass(bool DefaultEnableTailMerge);
+
+  /// TailDuplicate Pass - Duplicate blocks with unconditional branches
+  /// into tails of their predecessors.
+  FunctionPass *createTailDuplicatePass(bool PreRegAlloc = false);
+
+  /// IfConverter Pass - This pass performs machine code if conversion.
+  FunctionPass *createIfConverterPass();
+
+  /// Code Placement Pass - This pass optimize code placement and aligns loop
+  /// headers to target specific alignment boundary.
+  FunctionPass *createCodePlacementOptPass();
+
+  /// IntrinsicLowering Pass - Performs target-independent LLVM IR
+  /// transformations for highly portable strategies.
+  FunctionPass *createGCLoweringPass();
+
+  /// MachineCodeAnalysis Pass - Target-independent pass to mark safe points in
+  /// machine code. Must be added very late during code generation, just prior
+  /// to output, and importantly after all CFG transformations (such as branch
+  /// folding).
+  FunctionPass *createGCMachineCodeAnalysisPass();
+
+  /// Deleter Pass - Releases GC metadata.
+  ///
+  FunctionPass *createGCInfoDeleter();
+
+  /// Creates a pass to print GC metadata.
+  ///
+  FunctionPass *createGCInfoPrinter(raw_ostream &OS);
+
+  /// createMachineCSEPass - This pass performs global CSE on machine
+  /// instructions.
+  FunctionPass *createMachineCSEPass();
+
+  /// createMachineLICMPass - This pass performs LICM on machine instructions.
+  ///
+  FunctionPass *createMachineLICMPass(bool PreRegAlloc = true);
+
+  /// createMachineSinkingPass - This pass performs sinking on machine
+  /// instructions.
+  FunctionPass *createMachineSinkingPass();
+
+  /// createPeepholeOptimizerPass - This pass performs peephole optimizations -
+  /// like extension and comparison eliminations.
+  FunctionPass *createPeepholeOptimizerPass();
+
+  /// createOptimizePHIsPass - This pass optimizes machine instruction PHIs
+  /// to take advantage of opportunities created during DAG legalization.
+  FunctionPass *createOptimizePHIsPass();
+
+  /// createStackSlotColoringPass - This pass performs stack slot coloring.
+  FunctionPass *createStackSlotColoringPass(bool);
+
+  /// createStackProtectorPass - This pass adds stack protectors to functions.
+  FunctionPass *createStackProtectorPass(const TargetLowering *tli);
+
+  /// createMachineVerifierPass - This pass verifies cenerated machine code
+  /// instructions for correctness.
+  FunctionPass *createMachineVerifierPass(const char *Banner = 0);
+
+  /// createDwarfEHPass - This pass mulches exception handling code into a form
+  /// adapted to code generation.  Required if using dwarf exception handling.
+  FunctionPass *createDwarfEHPass(const TargetMachine *tm);
+
+  /// createSjLjEHPass - This pass adapts exception handling code to use
+  /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow.
+  FunctionPass *createSjLjEHPass(const TargetLowering *tli);
+
+  /// createLocalStackSlotAllocationPass - This pass assigns local frame
+  /// indices to stack slots relative to one another and allocates
+  /// base registers to access them when it is estimated by the target to
+  /// be out of range of normal frame pointer or stack pointer index
+  /// addressing.
+  FunctionPass *createLocalStackSlotAllocationPass();
+
+  /// createExpandISelPseudosPass - This pass expands pseudo-instructions.
+  ///
+  FunctionPass *createExpandISelPseudosPass();
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/ProcessImplicitDefs.h b/final/include/llvm/CodeGen/ProcessImplicitDefs.h
new file mode 100644
index 00000000000..e2ab899f183
--- /dev/null
+++ b/final/include/llvm/CodeGen/ProcessImplicitDefs.h
@@ -0,0 +1,45 @@
+//===-------------- llvm/CodeGen/ProcessImplicitDefs.h ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_CODEGEN_PROCESSIMPLICITDEFS_H
+#define LLVM_CODEGEN_PROCESSIMPLICITDEFS_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/SmallSet.h"
+
+namespace llvm {
+
+  class MachineInstr;
+  class TargetInstrInfo;
+
+  /// Process IMPLICIT_DEF instructions and make sure there is one implicit_def
+  /// for each use. Add isUndef marker to implicit_def defs and their uses.
+  class ProcessImplicitDefs : public MachineFunctionPass {
+  private:
+
+    bool CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg,
+                                unsigned OpIdx, const TargetInstrInfo *tii_,
+                                SmallSet<unsigned, 8> &ImpDefRegs);
+
+  public:
+    static char ID;
+
+    ProcessImplicitDefs() : MachineFunctionPass(ID) {
+      initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+    virtual bool runOnMachineFunction(MachineFunction &fn);
+  };
+
+}
+
+#endif // LLVM_CODEGEN_PROCESSIMPLICITDEFS_H
diff --git a/final/include/llvm/CodeGen/PseudoSourceValue.h b/final/include/llvm/CodeGen/PseudoSourceValue.h
new file mode 100644
index 00000000000..bace631ab95
--- /dev/null
+++ b/final/include/llvm/CodeGen/PseudoSourceValue.h
@@ -0,0 +1,112 @@
+//===-- llvm/CodeGen/PseudoSourceValue.h ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the PseudoSourceValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PSEUDOSOURCEVALUE_H
+#define LLVM_CODEGEN_PSEUDOSOURCEVALUE_H
+
+#include "llvm/Value.h"
+
+namespace llvm {
+  class MachineFrameInfo;
+  class raw_ostream;
+
+  /// PseudoSourceValue - Special value supplied for machine level alias
+  /// analysis. It indicates that the a memory access references the functions
+  /// stack frame (e.g., a spill slot), below the stack frame (e.g., argument
+  /// space), or constant pool.
+  class PseudoSourceValue : public Value {
+  private:
+    /// printCustom - Implement printing for PseudoSourceValue. This is called
+    /// from Value::print or Value's operator<<.
+    ///
+    virtual void printCustom(raw_ostream &O) const;
+
+  public:
+    explicit PseudoSourceValue(enum ValueTy Subclass = PseudoSourceValueVal);
+
+    /// isConstant - Test whether the memory pointed to by this
+    /// PseudoSourceValue has a constant value.
+    ///
+    virtual bool isConstant(const MachineFrameInfo *) const;
+
+    /// isAliased - Test whether the memory pointed to by this
+    /// PseudoSourceValue may also be pointed to by an LLVM IR Value.
+    virtual bool isAliased(const MachineFrameInfo *) const;
+
+    /// mayAlias - Return true if the memory pointed to by this
+    /// PseudoSourceValue can ever alias a LLVM IR Value.
+    virtual bool mayAlias(const MachineFrameInfo *) const;
+
+    /// classof - Methods for support type inquiry through isa, cast, and
+    /// dyn_cast:
+    ///
+    static inline bool classof(const PseudoSourceValue *) { return true; }
+    static inline bool classof(const Value *V) {
+      return V->getValueID() == PseudoSourceValueVal ||
+             V->getValueID() == FixedStackPseudoSourceValueVal;
+    }
+
+    /// A pseudo source value referencing a fixed stack frame entry,
+    /// e.g., a spill slot.
+    static const PseudoSourceValue *getFixedStack(int FI);
+
+    /// A pseudo source value referencing the area below the stack frame of
+    /// a function, e.g., the argument space.
+    static const PseudoSourceValue *getStack();
+
+    /// A pseudo source value referencing the global offset table
+    /// (or something the like).
+    static const PseudoSourceValue *getGOT();
+
+    /// A pseudo source value referencing the constant pool. Since constant
+    /// pools are constant, this doesn't need to identify a specific constant
+    /// pool entry.
+    static const PseudoSourceValue *getConstantPool();
+
+    /// A pseudo source value referencing a jump table. Since jump tables are
+    /// constant, this doesn't need to identify a specific jump table.
+    static const PseudoSourceValue *getJumpTable();
+  };
+
+  /// FixedStackPseudoSourceValue - A specialized PseudoSourceValue
+  /// for holding FixedStack values, which must include a frame
+  /// index.
+  class FixedStackPseudoSourceValue : public PseudoSourceValue {
+    const int FI;
+  public:
+    explicit FixedStackPseudoSourceValue(int fi) :
+        PseudoSourceValue(FixedStackPseudoSourceValueVal), FI(fi) {}
+
+    /// classof - Methods for support type inquiry through isa, cast, and
+    /// dyn_cast:
+    ///
+    static inline bool classof(const FixedStackPseudoSourceValue *) {
+      return true;
+    }
+    static inline bool classof(const Value *V) {
+      return V->getValueID() == FixedStackPseudoSourceValueVal;
+    }
+
+    virtual bool isConstant(const MachineFrameInfo *MFI) const;
+
+    virtual bool isAliased(const MachineFrameInfo *MFI) const;
+
+    virtual bool mayAlias(const MachineFrameInfo *) const;
+
+    virtual void printCustom(raw_ostream &OS) const;
+
+    int getFrameIndex() const { return FI; }
+  };
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/RegAllocPBQP.h b/final/include/llvm/CodeGen/RegAllocPBQP.h
new file mode 100644
index 00000000000..7e8745eddef
--- /dev/null
+++ b/final/include/llvm/CodeGen/RegAllocPBQP.h
@@ -0,0 +1,167 @@
+//===-- RegAllocPBQP.h ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PBQPBuilder interface, for classes which build PBQP
+// instances to represent register allocation problems, and the RegAllocPBQP
+// interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCPBQP_H
+#define LLVM_CODEGEN_REGALLOCPBQP_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/Solution.h"
+
+#include <map>
+#include <set>
+
+namespace llvm {
+
+  class LiveIntervals;
+  class MachineFunction;
+  class MachineLoopInfo;
+
+  /// This class wraps up a PBQP instance representing a register allocation
+  /// problem, plus the structures necessary to map back from the PBQP solution
+  /// to a register allocation solution. (i.e. The PBQP-node <--> vreg map,
+  /// and the PBQP option <--> storage location map).
+
+  class PBQPRAProblem {
+  public:
+
+    typedef SmallVector<unsigned, 16> AllowedSet;
+
+    PBQP::Graph& getGraph() { return graph; }
+
+    const PBQP::Graph& getGraph() const { return graph; }
+
+    /// Record the mapping between the given virtual register and PBQP node,
+    /// and the set of allowed pregs for the vreg.
+    ///
+    /// If you are extending
+    /// PBQPBuilder you are unlikely to need this: Nodes and options for all
+    /// vregs will already have been set up for you by the base class. 
+    template <typename AllowedRegsItr>
+    void recordVReg(unsigned vreg, PBQP::Graph::NodeItr node,
+                    AllowedRegsItr arBegin, AllowedRegsItr arEnd) {
+      assert(node2VReg.find(node) == node2VReg.end() && "Re-mapping node.");
+      assert(vreg2Node.find(vreg) == vreg2Node.end() && "Re-mapping vreg.");
+      assert(allowedSets[vreg].empty() && "vreg already has pregs.");
+
+      node2VReg[node] = vreg;
+      vreg2Node[vreg] = node;
+      std::copy(arBegin, arEnd, std::back_inserter(allowedSets[vreg]));
+    }
+
+    /// Get the virtual register corresponding to the given PBQP node.
+    unsigned getVRegForNode(PBQP::Graph::ConstNodeItr node) const;
+
+    /// Get the PBQP node corresponding to the given virtual register.
+    PBQP::Graph::NodeItr getNodeForVReg(unsigned vreg) const;
+
+    /// Returns true if the given PBQP option represents a physical register,
+    /// false otherwise.
+    bool isPRegOption(unsigned vreg, unsigned option) const {
+      // At present we only have spills or pregs, so anything that's not a
+      // spill is a preg. (This might be extended one day to support remat).
+      return !isSpillOption(vreg, option);
+    }
+
+    /// Returns true if the given PBQP option represents spilling, false
+    /// otherwise.
+    bool isSpillOption(unsigned vreg, unsigned option) const {
+      // We hardcode option zero as the spill option.
+      return option == 0;
+    }
+
+    /// Returns the allowed set for the given virtual register.
+    const AllowedSet& getAllowedSet(unsigned vreg) const;
+
+    /// Get PReg for option.
+    unsigned getPRegForOption(unsigned vreg, unsigned option) const;
+
+  private:
+
+    typedef std::map<PBQP::Graph::ConstNodeItr, unsigned,
+                     PBQP::NodeItrComparator>  Node2VReg;
+    typedef DenseMap<unsigned, PBQP::Graph::NodeItr> VReg2Node;
+    typedef std::map<unsigned, AllowedSet> AllowedSetMap;
+
+    PBQP::Graph graph;
+    Node2VReg node2VReg;
+    VReg2Node vreg2Node;
+
+    AllowedSetMap allowedSets;
+    
+  };
+
+  /// Builds PBQP instances to represent register allocation problems. Includes
+  /// spill, interference and coalescing costs by default. You can extend this
+  /// class to support additional constraints for your architecture.
+  class PBQPBuilder {
+  private:
+    PBQPBuilder(const PBQPBuilder&) {}
+    void operator=(const PBQPBuilder&) {}
+  public:
+
+    typedef std::set<unsigned> RegSet;
+ 
+    /// Default constructor.
+    PBQPBuilder() {}
+
+    /// Clean up a PBQPBuilder.
+    virtual ~PBQPBuilder() {}
+
+    /// Build a PBQP instance to represent the register allocation problem for
+    /// the given MachineFunction.
+    virtual std::auto_ptr<PBQPRAProblem> build(
+                                              MachineFunction *mf,
+                                              const LiveIntervals *lis,
+                                              const MachineLoopInfo *loopInfo,
+                                              const RegSet &vregs);
+  private:
+
+    void addSpillCosts(PBQP::Vector &costVec, PBQP::PBQPNum spillCost);
+
+    void addInterferenceCosts(PBQP::Matrix &costMat,
+                              const PBQPRAProblem::AllowedSet &vr1Allowed,
+                              const PBQPRAProblem::AllowedSet &vr2Allowed,
+                              const TargetRegisterInfo *tri);
+  };
+
+  /// Extended builder which adds coalescing constraints to a problem.
+  class PBQPBuilderWithCoalescing : public PBQPBuilder {
+  public:
+ 
+    /// Build a PBQP instance to represent the register allocation problem for
+    /// the given MachineFunction.
+    virtual std::auto_ptr<PBQPRAProblem> build(
+                                              MachineFunction *mf,
+                                              const LiveIntervals *lis,
+                                              const MachineLoopInfo *loopInfo,
+                                              const RegSet &vregs);   
+
+  private:
+
+    void addPhysRegCoalesce(PBQP::Vector &costVec, unsigned pregOption,
+                            PBQP::PBQPNum benefit);
+
+    void addVirtRegCoalesce(PBQP::Matrix &costMat,
+                            const PBQPRAProblem::AllowedSet &vr1Allowed,
+                            const PBQPRAProblem::AllowedSet &vr2Allowed,
+                            PBQP::PBQPNum benefit);
+  };
+
+  FunctionPass* createPBQPRegisterAllocator(std::auto_ptr<PBQPBuilder> builder);
+}
+
+#endif /* LLVM_CODEGEN_REGALLOCPBQP_H */
diff --git a/final/include/llvm/CodeGen/RegAllocRegistry.h b/final/include/llvm/CodeGen/RegAllocRegistry.h
new file mode 100644
index 00000000000..100e357654f
--- /dev/null
+++ b/final/include/llvm/CodeGen/RegAllocRegistry.h
@@ -0,0 +1,66 @@
+//===-- llvm/CodeGen/RegAllocRegistry.h -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation for register allocator function
+// pass registry (RegisterRegAlloc).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGENREGALLOCREGISTRY_H
+#define LLVM_CODEGENREGALLOCREGISTRY_H
+
+#include "llvm/CodeGen/MachinePassRegistry.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+///
+/// RegisterRegAlloc class - Track the registration of register allocators.
+///
+//===----------------------------------------------------------------------===//
+class RegisterRegAlloc : public MachinePassRegistryNode {
+
+public:
+
+  typedef FunctionPass *(*FunctionPassCtor)();
+
+  static MachinePassRegistry Registry;
+
+  RegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
+  : MachinePassRegistryNode(N, D, (MachinePassCtor)C)
+  { 
+     Registry.Add(this); 
+  }
+  ~RegisterRegAlloc() { Registry.Remove(this); }
+  
+
+  // Accessors.
+  //
+  RegisterRegAlloc *getNext() const {
+    return (RegisterRegAlloc *)MachinePassRegistryNode::getNext();
+  }
+  static RegisterRegAlloc *getList() {
+    return (RegisterRegAlloc *)Registry.getList();
+  }
+  static FunctionPassCtor getDefault() {
+    return (FunctionPassCtor)Registry.getDefault();
+  }
+  static void setDefault(FunctionPassCtor C) {
+    Registry.setDefault((MachinePassCtor)C);
+  }
+  static void setListener(MachinePassRegistryListener *L) {
+    Registry.setListener(L);
+  }
+  
+};
+
+} // end namespace llvm
+
+
+#endif
diff --git a/final/include/llvm/CodeGen/RegisterCoalescer.h b/final/include/llvm/CodeGen/RegisterCoalescer.h
new file mode 100644
index 00000000000..af0b3946912
--- /dev/null
+++ b/final/include/llvm/CodeGen/RegisterCoalescer.h
@@ -0,0 +1,244 @@
+//===-- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the abstract interface for register coalescers, 
+// allowing them to interact with and query register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/IncludeFile.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H
+#define LLVM_CODEGEN_REGISTER_COALESCER_H
+
+namespace llvm {
+
+  class MachineFunction;
+  class RegallocQuery;
+  class AnalysisUsage;
+  class MachineInstr;
+  class TargetRegisterInfo;
+  class TargetRegisterClass;
+  class TargetInstrInfo;
+
+  /// An abstract interface for register coalescers.  Coalescers must
+  /// implement this interface to be part of the coalescer analysis
+  /// group.
+  class RegisterCoalescer {
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    RegisterCoalescer() {}
+    virtual ~RegisterCoalescer();  // We want to be subclassed
+
+    /// Run the coalescer on this function, providing interference
+    /// data to query.  Return whether we removed any copies.
+    virtual bool coalesceFunction(MachineFunction &mf,
+                                  RegallocQuery &ifd) = 0;
+
+    /// Reset state.  Can be used to allow a coalescer run by
+    /// PassManager to be run again by the register allocator.
+    virtual void reset(MachineFunction &mf) {}
+
+    /// Register allocators must call this from their own
+    /// getAnalysisUsage to cover the case where the coalescer is not
+    /// a Pass in the proper sense and isn't managed by PassManager.
+    /// PassManager needs to know which analyses to make available and
+    /// which to invalidate when running the register allocator or any
+    /// pass that might call coalescing.  The long-term solution is to
+    /// allow hierarchies of PassManagers.
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {}
+  }; 
+
+  /// An abstract interface for register allocators to interact with
+  /// coalescers
+  ///
+  /// Example:
+  ///
+  /// This is simply an example of how to use the RegallocQuery
+  /// interface.  It is not meant to be used in production.
+  ///
+  ///   class LinearScanRegallocQuery : public RegallocQuery {
+  ///   private:
+  ///     const LiveIntervals \&li;
+  ///
+  ///   public:
+  ///     LinearScanRegallocQuery(LiveIntervals &intervals) 
+  ///         : li(intervals) {}
+  ///
+  ///     /// This is pretty slow and conservative, but since linear scan
+  ///     /// allocation doesn't pre-compute interference information it's
+  ///     /// the best we can do.  Coalescers are always free to ignore this
+  ///     /// and implement their own discovery strategy.  See
+  ///     /// SimpleRegisterCoalescing for an example.
+  ///     void getInterferences(IntervalSet &interferences,
+  ///                           const LiveInterval &a) const {
+  ///       for(LiveIntervals::const_iterator iv = li.begin(),
+  ///             ivend = li.end();
+  ///           iv != ivend;
+  ///           ++iv) {
+  ///         if (interfere(a, iv->second)) {
+  ///           interferences.insert(&iv->second);
+  ///         }
+  ///       }
+  ///     }
+  ///
+  ///     /// This is *really* slow and stupid.  See above.
+  ///     int getNumberOfInterferences(const LiveInterval &a) const {
+  ///       IntervalSet intervals;
+  ///       getInterferences(intervals, a);
+  ///       return intervals.size();
+  ///     }
+  ///   };  
+  ///
+  ///   In the allocator:
+  ///
+  ///   RegisterCoalescer &coalescer = getAnalysis<RegisterCoalescer>();
+  ///
+  ///   // We don't reset the coalescer so if it's already been run this
+  ///   // takes almost no time.
+  ///   LinearScanRegallocQuery ifd(*li_);
+  ///   coalescer.coalesceFunction(fn, ifd);
+  ///
+  class RegallocQuery {
+  public:
+    typedef SmallPtrSet<const LiveInterval *, 8> IntervalSet;
+
+    virtual ~RegallocQuery() {}
+    
+    /// Return whether two live ranges interfere.
+    virtual bool interfere(const LiveInterval &a,
+                           const LiveInterval &b) const {
+      // A naive test
+      return a.overlaps(b);
+    }
+
+    /// Return the set of intervals that interfere with this one.
+    virtual void getInterferences(IntervalSet &interferences,
+                                  const LiveInterval &a) const = 0;
+
+    /// This can often be cheaper than actually returning the
+    /// interferences.
+    virtual int getNumberOfInterferences(const LiveInterval &a) const = 0;
+
+    /// Make any data structure updates necessary to reflect
+    /// coalescing or other modifications.
+    virtual void updateDataForMerge(const LiveInterval &a,
+                                    const LiveInterval &b,
+                                    const MachineInstr &copy) {}
+
+    /// Allow the register allocator to communicate when it doesn't
+    /// want a copy coalesced.  This may be due to assumptions made by
+    /// the allocator about various invariants and so this question is
+    /// a matter of legality, not performance.  Performance decisions
+    /// about which copies to coalesce should be made by the
+    /// coalescer.
+    virtual bool isLegalToCoalesce(const MachineInstr &inst) const {
+      return true;
+    }
+  };
+
+
+  /// CoalescerPair - A helper class for register coalescers. When deciding if
+  /// two registers can be coalesced, CoalescerPair can determine if a copy
+  /// instruction would become an identity copy after coalescing.
+  class CoalescerPair {
+    const TargetInstrInfo &tii_;
+    const TargetRegisterInfo &tri_;
+
+    /// dstReg_ - The register that will be left after coalescing. It can be a
+    /// virtual or physical register.
+    unsigned dstReg_;
+
+    /// srcReg_ - the virtual register that will be coalesced into dstReg.
+    unsigned srcReg_;
+
+    /// subReg_ - The subregister index of srcReg in dstReg_. It is possible the
+    /// coalesce srcReg_ into a subreg of the larger dstReg_ when dstReg_ is a
+    /// virtual register.
+    unsigned subIdx_;
+
+    /// partial_ - True when the original copy was a partial subregister copy.
+    bool partial_;
+
+    /// crossClass_ - True when both regs are virtual, and newRC is constrained.
+    bool crossClass_;
+
+    /// flipped_ - True when DstReg and SrcReg are reversed from the oriignal copy
+    /// instruction.
+    bool flipped_;
+
+    /// newRC_ - The register class of the coalesced register, or NULL if dstReg_
+    /// is a physreg.
+    const TargetRegisterClass *newRC_;
+
+    /// compose - Compose subreg indices a and b, either may be 0.
+    unsigned compose(unsigned, unsigned) const;
+
+    /// isMoveInstr - Return true if MI is a move or subreg instruction.
+    bool isMoveInstr(const MachineInstr *MI, unsigned &Src, unsigned &Dst,
+                     unsigned &SrcSub, unsigned &DstSub) const;
+
+  public:
+    CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri)
+      : tii_(tii), tri_(tri), dstReg_(0), srcReg_(0), subIdx_(0),
+        partial_(false), crossClass_(false), flipped_(false), newRC_(0) {}
+
+    /// setRegisters - set registers to match the copy instruction MI. Return
+    /// false if MI is not a coalescable copy instruction.
+    bool setRegisters(const MachineInstr*);
+
+    /// flip - Swap srcReg_ and dstReg_. Return false if swapping is impossible
+    /// because dstReg_ is a physical register, or subIdx_ is set.
+    bool flip();
+
+    /// isCoalescable - Return true if MI is a copy instruction that will become
+    /// an identity copy after coalescing.
+    bool isCoalescable(const MachineInstr*) const;
+
+    /// isPhys - Return true if DstReg is a physical register.
+    bool isPhys() const { return !newRC_; }
+
+    /// isPartial - Return true if the original copy instruction did not copy the
+    /// full register, but was a subreg operation.
+    bool isPartial() const { return partial_; }
+
+    /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller register class than DstReg's.
+    bool isCrossClass() const { return crossClass_; }
+
+    /// isFlipped - Return true when getSrcReg is the register being defined by
+    /// the original copy instruction.
+    bool isFlipped() const { return flipped_; }
+
+    /// getDstReg - Return the register (virtual or physical) that will remain
+    /// after coalescing.
+    unsigned getDstReg() const { return dstReg_; }
+
+    /// getSrcReg - Return the virtual register that will be coalesced away.
+    unsigned getSrcReg() const { return srcReg_; }
+
+    /// getSubIdx - Return the subregister index in DstReg that SrcReg will be
+    /// coalesced into, or 0.
+    unsigned getSubIdx() const { return subIdx_; }
+
+    /// getNewRC - Return the register class of the coalesced register.
+    const TargetRegisterClass *getNewRC() const { return newRC_; }
+  };
+}
+
+// Because of the way .a files work, we must force the SimpleRC
+// implementation to be pulled in if the RegisterCoalescing header is
+// included.  Otherwise we run the risk of RegisterCoalescing being
+// used, but the default implementation not being linked into the tool
+// that uses it.
+FORCE_DEFINING_FILE_TO_BE_LINKED(RegisterCoalescer)
+FORCE_DEFINING_FILE_TO_BE_LINKED(SimpleRegisterCoalescing)
+
+#endif
diff --git a/final/include/llvm/CodeGen/RegisterScavenging.h b/final/include/llvm/CodeGen/RegisterScavenging.h
new file mode 100644
index 00000000000..26b6773c053
--- /dev/null
+++ b/final/include/llvm/CodeGen/RegisterScavenging.h
@@ -0,0 +1,168 @@
+//===-- RegisterScavenging.h - Machine register scavenging ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the machine register scavenger class. It can provide
+// information such as unused register at any point in a machine basic block.
+// It also provides a mechanism to make registers availbale by evicting them
+// to spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGISTER_SCAVENGING_H
+#define LLVM_CODEGEN_REGISTER_SCAVENGING_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+
+class MachineRegisterInfo;
+class TargetRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterClass;
+
+class RegScavenger {
+  const TargetRegisterInfo *TRI;
+  const TargetInstrInfo *TII;
+  MachineRegisterInfo* MRI;
+  MachineBasicBlock *MBB;
+  MachineBasicBlock::iterator MBBI;
+  unsigned NumPhysRegs;
+
+  /// Tracking - True if RegScavenger is currently tracking the liveness of 
+  /// registers.
+  bool Tracking;
+
+  /// ScavengingFrameIndex - Special spill slot used for scavenging a register
+  /// post register allocation.
+  int ScavengingFrameIndex;
+
+  /// ScavengedReg - If none zero, the specific register is currently being
+  /// scavenged. That is, it is spilled to the special scavenging stack slot.
+  unsigned ScavengedReg;
+
+  /// ScavengedRC - Register class of the scavenged register.
+  ///
+  const TargetRegisterClass *ScavengedRC;
+
+  /// ScavengeRestore - Instruction that restores the scavenged register from
+  /// stack.
+  const MachineInstr *ScavengeRestore;
+
+  /// CalleeSavedrRegs - A bitvector of callee saved registers for the target.
+  ///
+  BitVector CalleeSavedRegs;
+
+  /// ReservedRegs - A bitvector of reserved registers.
+  ///
+  BitVector ReservedRegs;
+
+  /// RegsAvailable - The current state of all the physical registers immediately
+  /// before MBBI. One bit per physical register. If bit is set that means it's
+  /// available, unset means the register is currently being used.
+  BitVector RegsAvailable;
+
+public:
+  RegScavenger()
+    : MBB(NULL), NumPhysRegs(0), Tracking(false),
+      ScavengingFrameIndex(-1), ScavengedReg(0), ScavengedRC(NULL) {}
+
+  /// enterBasicBlock - Start tracking liveness from the begin of the specific
+  /// basic block.
+  void enterBasicBlock(MachineBasicBlock *mbb);
+
+  /// initRegState - allow resetting register state info for multiple
+  /// passes over/within the same function.
+  void initRegState();
+
+  /// forward - Move the internal MBB iterator and update register states.
+  void forward();
+
+  /// forward - Move the internal MBB iterator and update register states until
+  /// it has processed the specific iterator.
+  void forward(MachineBasicBlock::iterator I) {
+    if (!Tracking && MBB->begin() != I) forward();
+    while (MBBI != I) forward();
+  }
+
+  /// skipTo - Move the internal MBB iterator but do not update register states.
+  ///
+  void skipTo(MachineBasicBlock::iterator I) { MBBI = I; }
+
+  /// getRegsUsed - return all registers currently in use in used.
+  void getRegsUsed(BitVector &used, bool includeReserved);
+
+  /// getRegsAvailable - Return all available registers in the register class
+  /// in Mask.
+  BitVector getRegsAvailable(const TargetRegisterClass *RC);
+
+  /// FindUnusedReg - Find a unused register of the specified register class.
+  /// Return 0 if none is found.
+  unsigned FindUnusedReg(const TargetRegisterClass *RegClass) const;
+
+  /// setScavengingFrameIndex / getScavengingFrameIndex - accessor and setter of
+  /// ScavengingFrameIndex.
+  void setScavengingFrameIndex(int FI) { ScavengingFrameIndex = FI; }
+  int getScavengingFrameIndex() const { return ScavengingFrameIndex; }
+
+  /// scavengeRegister - Make a register of the specific register class
+  /// available and do the appropriate bookkeeping. SPAdj is the stack
+  /// adjustment due to call frame, it's passed along to eliminateFrameIndex().
+  /// Returns the scavenged register.
+  unsigned scavengeRegister(const TargetRegisterClass *RegClass,
+                            MachineBasicBlock::iterator I, int SPAdj);
+  unsigned scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj) {
+    return scavengeRegister(RegClass, MBBI, SPAdj);
+  }
+
+  /// setUsed - Tell the scavenger a register is used.
+  ///
+  void setUsed(unsigned Reg);
+private:
+  /// isReserved - Returns true if a register is reserved. It is never "unused".
+  bool isReserved(unsigned Reg) const { return ReservedRegs.test(Reg); }
+
+  /// isUsed / isUnused - Test if a register is currently being used.
+  ///
+  bool isUsed(unsigned Reg) const   { return !RegsAvailable.test(Reg); }
+  bool isUnused(unsigned Reg) const { return RegsAvailable.test(Reg); }
+
+  /// isAliasUsed - Is Reg or an alias currently in use?
+  bool isAliasUsed(unsigned Reg) const;
+
+  /// setUsed / setUnused - Mark the state of one or a number of registers.
+  ///
+  void setUsed(BitVector &Regs) {
+    RegsAvailable &= ~Regs;
+  }
+  void setUnused(BitVector &Regs) {
+    RegsAvailable |= Regs;
+  }
+
+  /// Add Reg and all its sub-registers to BV.
+  void addRegWithSubRegs(BitVector &BV, unsigned Reg);
+
+  /// Add Reg and its aliases to BV.
+  void addRegWithAliases(BitVector &BV, unsigned Reg);
+
+  /// findSurvivorReg - Return the candidate register that is unused for the
+  /// longest after StartMI. UseMI is set to the instruction where the search
+  /// stopped.
+  ///
+  /// No more than InstrLimit instructions are inspected.
+  unsigned findSurvivorReg(MachineBasicBlock::iterator StartMI,
+                           BitVector &Candidates,
+                           unsigned InstrLimit,
+                           MachineBasicBlock::iterator &UseMI);
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/RuntimeLibcalls.h b/final/include/llvm/CodeGen/RuntimeLibcalls.h
new file mode 100644
index 00000000000..a51e82a6404
--- /dev/null
+++ b/final/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -0,0 +1,313 @@
+//===-- CodeGen/RuntimeLibcall.h - Runtime Library Calls --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the enum representing the list of runtime library calls
+// the backend may emit during code generation, and also some helper functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_RUNTIMELIBCALLS_H
+#define LLVM_CODEGEN_RUNTIMELIBCALLS_H
+
+#include "llvm/CodeGen/ValueTypes.h"
+
+namespace llvm {
+namespace RTLIB {
+  /// RTLIB::Libcall enum - This enum defines all of the runtime library calls
+  /// the backend can emit.  The various long double types cannot be merged,
+  /// because 80-bit library functions use "xf" and 128-bit use "tf".
+  /// 
+  /// When adding PPCF128 functions here, note that their names generally need
+  /// to be overridden for Darwin with the xxx$LDBL128 form.  See
+  /// PPCISelLowering.cpp.
+  ///
+  enum Libcall {
+    // Integer
+    SHL_I16,
+    SHL_I32,
+    SHL_I64,
+    SHL_I128,
+    SRL_I16,
+    SRL_I32,
+    SRL_I64,
+    SRL_I128,
+    SRA_I16,
+    SRA_I32,
+    SRA_I64,
+    SRA_I128,
+    MUL_I8,
+    MUL_I16,
+    MUL_I32,
+    MUL_I64,
+    MUL_I128,
+    SDIV_I8,
+    SDIV_I16,
+    SDIV_I32,
+    SDIV_I64,
+    SDIV_I128,
+    UDIV_I8,
+    UDIV_I16,
+    UDIV_I32,
+    UDIV_I64,
+    UDIV_I128,
+    SREM_I8,
+    SREM_I16,
+    SREM_I32,
+    SREM_I64,
+    SREM_I128,
+    UREM_I8,
+    UREM_I16,
+    UREM_I32,
+    UREM_I64,
+    UREM_I128,
+    NEG_I32,
+    NEG_I64,
+
+    // FLOATING POINT
+    ADD_F32,
+    ADD_F64,
+    ADD_F80,
+    ADD_PPCF128,
+    SUB_F32,
+    SUB_F64,
+    SUB_F80,
+    SUB_PPCF128,
+    MUL_F32,
+    MUL_F64,
+    MUL_F80,
+    MUL_PPCF128,
+    DIV_F32,
+    DIV_F64,
+    DIV_F80,
+    DIV_PPCF128,
+    REM_F32,
+    REM_F64,
+    REM_F80,
+    REM_PPCF128,
+    POWI_F32,
+    POWI_F64,
+    POWI_F80,
+    POWI_PPCF128,
+    SQRT_F32,
+    SQRT_F64,
+    SQRT_F80,
+    SQRT_PPCF128,
+    LOG_F32,
+    LOG_F64,
+    LOG_F80,
+    LOG_PPCF128,
+    LOG2_F32,
+    LOG2_F64,
+    LOG2_F80,
+    LOG2_PPCF128,
+    LOG10_F32,
+    LOG10_F64,
+    LOG10_F80,
+    LOG10_PPCF128,
+    EXP_F32,
+    EXP_F64,
+    EXP_F80,
+    EXP_PPCF128,
+    EXP2_F32,
+    EXP2_F64,
+    EXP2_F80,
+    EXP2_PPCF128,
+    SIN_F32,
+    SIN_F64,
+    SIN_F80,
+    SIN_PPCF128,
+    COS_F32,
+    COS_F64,
+    COS_F80,
+    COS_PPCF128,
+    POW_F32,
+    POW_F64,
+    POW_F80,
+    POW_PPCF128,
+    CEIL_F32,
+    CEIL_F64,
+    CEIL_F80,
+    CEIL_PPCF128,
+    TRUNC_F32,
+    TRUNC_F64,
+    TRUNC_F80,
+    TRUNC_PPCF128,
+    RINT_F32,
+    RINT_F64,
+    RINT_F80,
+    RINT_PPCF128,
+    NEARBYINT_F32,
+    NEARBYINT_F64,
+    NEARBYINT_F80,
+    NEARBYINT_PPCF128,
+    FLOOR_F32,
+    FLOOR_F64,
+    FLOOR_F80,
+    FLOOR_PPCF128,
+    COPYSIGN_F32,
+    COPYSIGN_F64,
+    COPYSIGN_F80,
+    COPYSIGN_PPCF128,
+
+    // CONVERSION
+    FPEXT_F32_F64,
+    FPEXT_F16_F32,
+    FPROUND_F32_F16,
+    FPROUND_F64_F32,
+    FPROUND_F80_F32,
+    FPROUND_PPCF128_F32,
+    FPROUND_F80_F64,
+    FPROUND_PPCF128_F64,
+    FPTOSINT_F32_I8,
+    FPTOSINT_F32_I16,
+    FPTOSINT_F32_I32,
+    FPTOSINT_F32_I64,
+    FPTOSINT_F32_I128,
+    FPTOSINT_F64_I8,
+    FPTOSINT_F64_I16,
+    FPTOSINT_F64_I32,
+    FPTOSINT_F64_I64,
+    FPTOSINT_F64_I128,
+    FPTOSINT_F80_I32,
+    FPTOSINT_F80_I64,
+    FPTOSINT_F80_I128,
+    FPTOSINT_PPCF128_I32,
+    FPTOSINT_PPCF128_I64,
+    FPTOSINT_PPCF128_I128,
+    FPTOUINT_F32_I8,
+    FPTOUINT_F32_I16,
+    FPTOUINT_F32_I32,
+    FPTOUINT_F32_I64,
+    FPTOUINT_F32_I128,
+    FPTOUINT_F64_I8,
+    FPTOUINT_F64_I16,
+    FPTOUINT_F64_I32,
+    FPTOUINT_F64_I64,
+    FPTOUINT_F64_I128,
+    FPTOUINT_F80_I32,
+    FPTOUINT_F80_I64,
+    FPTOUINT_F80_I128,
+    FPTOUINT_PPCF128_I32,
+    FPTOUINT_PPCF128_I64,
+    FPTOUINT_PPCF128_I128,
+    SINTTOFP_I32_F32,
+    SINTTOFP_I32_F64,
+    SINTTOFP_I32_F80,
+    SINTTOFP_I32_PPCF128,
+    SINTTOFP_I64_F32,
+    SINTTOFP_I64_F64,
+    SINTTOFP_I64_F80,
+    SINTTOFP_I64_PPCF128,
+    SINTTOFP_I128_F32,
+    SINTTOFP_I128_F64,
+    SINTTOFP_I128_F80,
+    SINTTOFP_I128_PPCF128,
+    UINTTOFP_I32_F32,
+    UINTTOFP_I32_F64,
+    UINTTOFP_I32_F80,
+    UINTTOFP_I32_PPCF128,
+    UINTTOFP_I64_F32,
+    UINTTOFP_I64_F64,
+    UINTTOFP_I64_F80,
+    UINTTOFP_I64_PPCF128,
+    UINTTOFP_I128_F32,
+    UINTTOFP_I128_F64,
+    UINTTOFP_I128_F80,
+    UINTTOFP_I128_PPCF128,
+
+    // COMPARISON
+    OEQ_F32,
+    OEQ_F64,
+    UNE_F32,
+    UNE_F64,
+    OGE_F32,
+    OGE_F64,
+    OLT_F32,
+    OLT_F64,
+    OLE_F32,
+    OLE_F64,
+    OGT_F32,
+    OGT_F64,
+    UO_F32,
+    UO_F64,
+    O_F32,
+    O_F64,
+
+    // MEMORY
+    MEMCPY,
+    MEMSET,
+    MEMMOVE,
+
+    // EXCEPTION HANDLING
+    UNWIND_RESUME,
+
+    // Family ATOMICs
+    SYNC_VAL_COMPARE_AND_SWAP_1,
+    SYNC_VAL_COMPARE_AND_SWAP_2,
+    SYNC_VAL_COMPARE_AND_SWAP_4,
+    SYNC_VAL_COMPARE_AND_SWAP_8,
+    SYNC_LOCK_TEST_AND_SET_1,
+    SYNC_LOCK_TEST_AND_SET_2,
+    SYNC_LOCK_TEST_AND_SET_4,
+    SYNC_LOCK_TEST_AND_SET_8,
+    SYNC_FETCH_AND_ADD_1,
+    SYNC_FETCH_AND_ADD_2,
+    SYNC_FETCH_AND_ADD_4,
+    SYNC_FETCH_AND_ADD_8,
+    SYNC_FETCH_AND_SUB_1,
+    SYNC_FETCH_AND_SUB_2,
+    SYNC_FETCH_AND_SUB_4,
+    SYNC_FETCH_AND_SUB_8,
+    SYNC_FETCH_AND_AND_1,
+    SYNC_FETCH_AND_AND_2,
+    SYNC_FETCH_AND_AND_4,
+    SYNC_FETCH_AND_AND_8,
+    SYNC_FETCH_AND_OR_1,
+    SYNC_FETCH_AND_OR_2,
+    SYNC_FETCH_AND_OR_4,
+    SYNC_FETCH_AND_OR_8,
+    SYNC_FETCH_AND_XOR_1,
+    SYNC_FETCH_AND_XOR_2,
+    SYNC_FETCH_AND_XOR_4,
+    SYNC_FETCH_AND_XOR_8,
+    SYNC_FETCH_AND_NAND_1,
+    SYNC_FETCH_AND_NAND_2,
+    SYNC_FETCH_AND_NAND_4,
+    SYNC_FETCH_AND_NAND_8,
+
+    UNKNOWN_LIBCALL
+  };
+
+  /// getFPEXT - Return the FPEXT_*_* value for the given types, or
+  /// UNKNOWN_LIBCALL if there is none.
+  Libcall getFPEXT(EVT OpVT, EVT RetVT);
+
+  /// getFPROUND - Return the FPROUND_*_* value for the given types, or
+  /// UNKNOWN_LIBCALL if there is none.
+  Libcall getFPROUND(EVT OpVT, EVT RetVT);
+
+  /// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
+  /// UNKNOWN_LIBCALL if there is none.
+  Libcall getFPTOSINT(EVT OpVT, EVT RetVT);
+
+  /// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
+  /// UNKNOWN_LIBCALL if there is none.
+  Libcall getFPTOUINT(EVT OpVT, EVT RetVT);
+
+  /// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
+  /// UNKNOWN_LIBCALL if there is none.
+  Libcall getSINTTOFP(EVT OpVT, EVT RetVT);
+
+  /// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
+  /// UNKNOWN_LIBCALL if there is none.
+  Libcall getUINTTOFP(EVT OpVT, EVT RetVT);
+}
+}
+
+#endif
diff --git a/final/include/llvm/CodeGen/ScheduleDAG.h b/final/include/llvm/CodeGen/ScheduleDAG.h
new file mode 100644
index 00000000000..a303dbbec0f
--- /dev/null
+++ b/final/include/llvm/CodeGen/ScheduleDAG.h
@@ -0,0 +1,718 @@
+//===------- llvm/CodeGen/ScheduleDAG.h - Common Base Class------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAG class, which is used as the common
+// base class for instruction schedulers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SCHEDULEDAG_H
+#define LLVM_CODEGEN_SCHEDULEDAG_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/PointerIntPair.h"
+
+namespace llvm {
+  class AliasAnalysis;
+  class SUnit;
+  class MachineConstantPool;
+  class MachineFunction;
+  class MachineRegisterInfo;
+  class MachineInstr;
+  class TargetRegisterInfo;
+  class ScheduleDAG;
+  class SDNode;
+  class TargetInstrInfo;
+  class TargetInstrDesc;
+  class TargetMachine;
+  class TargetRegisterClass;
+  template<class Graph> class GraphWriter;
+
+  /// SDep - Scheduling dependency. This represents one direction of an
+  /// edge in the scheduling DAG.
+  class SDep {
+  public:
+    /// Kind - These are the different kinds of scheduling dependencies.
+    enum Kind {
+      Data,        ///< Regular data dependence (aka true-dependence).
+      Anti,        ///< A register anti-dependedence (aka WAR).
+      Output,      ///< A register output-dependence (aka WAW).
+      Order        ///< Any other ordering dependency.
+    };
+
+  private:
+    /// Dep - A pointer to the depending/depended-on SUnit, and an enum
+    /// indicating the kind of the dependency.
+    PointerIntPair<SUnit *, 2, Kind> Dep;
+
+    /// Contents - A union discriminated by the dependence kind.
+    union {
+      /// Reg - For Data, Anti, and Output dependencies, the associated
+      /// register. For Data dependencies that don't currently have a register
+      /// assigned, this is set to zero.
+      unsigned Reg;
+
+      /// Order - Additional information about Order dependencies.
+      struct {
+        /// isNormalMemory - True if both sides of the dependence
+        /// access memory in non-volatile and fully modeled ways.
+        bool isNormalMemory : 1;
+
+        /// isMustAlias - True if both sides of the dependence are known to
+        /// access the same memory.
+        bool isMustAlias : 1;
+
+        /// isArtificial - True if this is an artificial dependency, meaning
+        /// it is not necessary for program correctness, and may be safely
+        /// deleted if necessary.
+        bool isArtificial : 1;
+      } Order;
+    } Contents;
+
+    /// Latency - The time associated with this edge. Often this is just
+    /// the value of the Latency field of the predecessor, however advanced
+    /// models may provide additional information about specific edges.
+    unsigned Latency;
+
+  public:
+    /// SDep - Construct a null SDep. This is only for use by container
+    /// classes which require default constructors. SUnits may not
+    /// have null SDep edges.
+    SDep() : Dep(0, Data) {}
+
+    /// SDep - Construct an SDep with the specified values.
+    SDep(SUnit *S, Kind kind, unsigned latency = 1, unsigned Reg = 0,
+         bool isNormalMemory = false, bool isMustAlias = false,
+         bool isArtificial = false)
+      : Dep(S, kind), Contents(), Latency(latency) {
+      switch (kind) {
+      case Anti:
+      case Output:
+        assert(Reg != 0 &&
+               "SDep::Anti and SDep::Output must use a non-zero Reg!");
+        // fall through
+      case Data:
+        assert(!isMustAlias && "isMustAlias only applies with SDep::Order!");
+        assert(!isArtificial && "isArtificial only applies with SDep::Order!");
+        Contents.Reg = Reg;
+        break;
+      case Order:
+        assert(Reg == 0 && "Reg given for non-register dependence!");
+        Contents.Order.isNormalMemory = isNormalMemory;
+        Contents.Order.isMustAlias = isMustAlias;
+        Contents.Order.isArtificial = isArtificial;
+        break;
+      }
+    }
+
+    bool operator==(const SDep &Other) const {
+      if (Dep != Other.Dep || Latency != Other.Latency) return false;
+      switch (Dep.getInt()) {
+      case Data:
+      case Anti:
+      case Output:
+        return Contents.Reg == Other.Contents.Reg;
+      case Order:
+        return Contents.Order.isNormalMemory ==
+                 Other.Contents.Order.isNormalMemory &&
+               Contents.Order.isMustAlias == Other.Contents.Order.isMustAlias &&
+               Contents.Order.isArtificial == Other.Contents.Order.isArtificial;
+      }
+      assert(0 && "Invalid dependency kind!");
+      return false;
+    }
+
+    bool operator!=(const SDep &Other) const {
+      return !operator==(Other);
+    }
+
+    /// getLatency - Return the latency value for this edge, which roughly
+    /// means the minimum number of cycles that must elapse between the
+    /// predecessor and the successor, given that they have this edge
+    /// between them.
+    unsigned getLatency() const {
+      return Latency;
+    }
+
+    /// setLatency - Set the latency for this edge.
+    void setLatency(unsigned Lat) {
+      Latency = Lat;
+    }
+
+    //// getSUnit - Return the SUnit to which this edge points.
+    SUnit *getSUnit() const {
+      return Dep.getPointer();
+    }
+
+    //// setSUnit - Assign the SUnit to which this edge points.
+    void setSUnit(SUnit *SU) {
+      Dep.setPointer(SU);
+    }
+
+    /// getKind - Return an enum value representing the kind of the dependence.
+    Kind getKind() const {
+      return Dep.getInt();
+    }
+
+    /// isCtrl - Shorthand for getKind() != SDep::Data.
+    bool isCtrl() const {
+      return getKind() != Data;
+    }
+
+    /// isNormalMemory - Test if this is an Order dependence between two
+    /// memory accesses where both sides of the dependence access memory
+    /// in non-volatile and fully modeled ways.
+    bool isNormalMemory() const {
+      return getKind() == Order && Contents.Order.isNormalMemory;
+    }
+
+    /// isMustAlias - Test if this is an Order dependence that is marked
+    /// as "must alias", meaning that the SUnits at either end of the edge
+    /// have a memory dependence on a known memory location.
+    bool isMustAlias() const {
+      return getKind() == Order && Contents.Order.isMustAlias;
+    }
+
+    /// isArtificial - Test if this is an Order dependence that is marked
+    /// as "artificial", meaning it isn't necessary for correctness.
+    bool isArtificial() const {
+      return getKind() == Order && Contents.Order.isArtificial;
+    }
+
+    /// isAssignedRegDep - Test if this is a Data dependence that is
+    /// associated with a register.
+    bool isAssignedRegDep() const {
+      return getKind() == Data && Contents.Reg != 0;
+    }
+
+    /// getReg - Return the register associated with this edge. This is
+    /// only valid on Data, Anti, and Output edges. On Data edges, this
+    /// value may be zero, meaning there is no associated register.
+    unsigned getReg() const {
+      assert((getKind() == Data || getKind() == Anti || getKind() == Output) &&
+             "getReg called on non-register dependence edge!");
+      return Contents.Reg;
+    }
+
+    /// setReg - Assign the associated register for this edge. This is
+    /// only valid on Data, Anti, and Output edges. On Anti and Output
+    /// edges, this value must not be zero. On Data edges, the value may
+    /// be zero, which would mean that no specific register is associated
+    /// with this edge.
+    void setReg(unsigned Reg) {
+      assert((getKind() == Data || getKind() == Anti || getKind() == Output) &&
+             "setReg called on non-register dependence edge!");
+      assert((getKind() != Anti || Reg != 0) &&
+             "SDep::Anti edge cannot use the zero register!");
+      assert((getKind() != Output || Reg != 0) &&
+             "SDep::Output edge cannot use the zero register!");
+      Contents.Reg = Reg;
+    }
+  };
+
+  template <>
+  struct isPodLike<SDep> { static const bool value = true; };
+
+  /// SUnit - Scheduling unit. This is a node in the scheduling DAG.
+  class SUnit {
+  private:
+    SDNode *Node;                       // Representative node.
+    MachineInstr *Instr;                // Alternatively, a MachineInstr.
+  public:
+    SUnit *OrigNode;                    // If not this, the node from which
+                                        // this node was cloned.
+
+    // Preds/Succs - The SUnits before/after us in the graph.
+    SmallVector<SDep, 4> Preds;  // All sunit predecessors.
+    SmallVector<SDep, 4> Succs;  // All sunit successors.
+
+    typedef SmallVector<SDep, 4>::iterator pred_iterator;
+    typedef SmallVector<SDep, 4>::iterator succ_iterator;
+    typedef SmallVector<SDep, 4>::const_iterator const_pred_iterator;
+    typedef SmallVector<SDep, 4>::const_iterator const_succ_iterator;
+
+    unsigned NodeNum;                   // Entry # of node in the node vector.
+    unsigned NodeQueueId;               // Queue id of node.
+    unsigned NumPreds;                  // # of SDep::Data preds.
+    unsigned NumSuccs;                  // # of SDep::Data sucss.
+    unsigned NumPredsLeft;              // # of preds not scheduled.
+    unsigned NumSuccsLeft;              // # of succs not scheduled.
+    unsigned short NumRegDefsLeft;      // # of reg defs with no scheduled use.
+    unsigned short Latency;             // Node latency.
+    bool isCall           : 1;          // Is a function call.
+    bool isTwoAddress     : 1;          // Is a two-address instruction.
+    bool isCommutable     : 1;          // Is a commutable instruction.
+    bool hasPhysRegDefs   : 1;          // Has physreg defs that are being used.
+    bool hasPhysRegClobbers : 1;        // Has any physreg defs, used or not.
+    bool isPending        : 1;          // True once pending.
+    bool isAvailable      : 1;          // True once available.
+    bool isScheduled      : 1;          // True once scheduled.
+    bool isScheduleHigh   : 1;          // True if preferable to schedule high.
+    bool isCloned         : 1;          // True if this node has been cloned.
+    Sched::Preference SchedulingPref;   // Scheduling preference.
+
+    SmallVector<MachineInstr*, 4> DbgInstrList; // dbg_values referencing this.
+  private:
+    bool isDepthCurrent   : 1;          // True if Depth is current.
+    bool isHeightCurrent  : 1;          // True if Height is current.
+    unsigned Depth;                     // Node depth.
+    unsigned Height;                    // Node height.
+  public:
+    const TargetRegisterClass *CopyDstRC; // Is a special copy node if not null.
+    const TargetRegisterClass *CopySrcRC;
+
+    /// SUnit - Construct an SUnit for pre-regalloc scheduling to represent
+    /// an SDNode and any nodes flagged to it.
+    SUnit(SDNode *node, unsigned nodenum)
+      : Node(node), Instr(0), OrigNode(0), NodeNum(nodenum),
+        NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
+        NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
+        isCall(false), isTwoAddress(false), isCommutable(false),
+        hasPhysRegDefs(false), hasPhysRegClobbers(false),
+        isPending(false), isAvailable(false), isScheduled(false),
+        isScheduleHigh(false), isCloned(false),
+        SchedulingPref(Sched::None),
+        isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
+        CopyDstRC(NULL), CopySrcRC(NULL) {}
+
+    /// SUnit - Construct an SUnit for post-regalloc scheduling to represent
+    /// a MachineInstr.
+    SUnit(MachineInstr *instr, unsigned nodenum)
+      : Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum),
+        NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
+        NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
+        isCall(false), isTwoAddress(false), isCommutable(false),
+        hasPhysRegDefs(false), hasPhysRegClobbers(false),
+        isPending(false), isAvailable(false), isScheduled(false),
+        isScheduleHigh(false), isCloned(false),
+        SchedulingPref(Sched::None),
+        isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
+        CopyDstRC(NULL), CopySrcRC(NULL) {}
+
+    /// SUnit - Construct a placeholder SUnit.
+    SUnit()
+      : Node(0), Instr(0), OrigNode(0), NodeNum(~0u),
+        NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
+        NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
+        isCall(false), isTwoAddress(false), isCommutable(false),
+        hasPhysRegDefs(false), hasPhysRegClobbers(false),
+        isPending(false), isAvailable(false), isScheduled(false),
+        isScheduleHigh(false), isCloned(false),
+        SchedulingPref(Sched::None),
+        isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
+        CopyDstRC(NULL), CopySrcRC(NULL) {}
+
+    /// setNode - Assign the representative SDNode for this SUnit.
+    /// This may be used during pre-regalloc scheduling.
+    void setNode(SDNode *N) {
+      assert(!Instr && "Setting SDNode of SUnit with MachineInstr!");
+      Node = N;
+    }
+
+    /// getNode - Return the representative SDNode for this SUnit.
+    /// This may be used during pre-regalloc scheduling.
+    SDNode *getNode() const {
+      assert(!Instr && "Reading SDNode of SUnit with MachineInstr!");
+      return Node;
+    }
+
+    /// isInstr - Return true if this SUnit refers to a machine instruction as
+    /// opposed to an SDNode.
+    bool isInstr() const { return Instr; }
+
+    /// setInstr - Assign the instruction for the SUnit.
+    /// This may be used during post-regalloc scheduling.
+    void setInstr(MachineInstr *MI) {
+      assert(!Node && "Setting MachineInstr of SUnit with SDNode!");
+      Instr = MI;
+    }
+
+    /// getInstr - Return the representative MachineInstr for this SUnit.
+    /// This may be used during post-regalloc scheduling.
+    MachineInstr *getInstr() const {
+      assert(!Node && "Reading MachineInstr of SUnit with SDNode!");
+      return Instr;
+    }
+
+    /// addPred - This adds the specified edge as a pred of the current node if
+    /// not already.  It also adds the current node as a successor of the
+    /// specified node.
+    bool addPred(const SDep &D);
+
+    /// removePred - This removes the specified edge as a pred of the current
+    /// node if it exists.  It also removes the current node as a successor of
+    /// the specified node.
+    void removePred(const SDep &D);
+
+    /// getDepth - Return the depth of this node, which is the length of the
+    /// maximum path up to any node which has no predecessors.
+    unsigned getDepth() const {
+      if (!isDepthCurrent)
+        const_cast<SUnit *>(this)->ComputeDepth();
+      return Depth;
+    }
+
+    /// getHeight - Return the height of this node, which is the length of the
+    /// maximum path down to any node which has no successors.
+    unsigned getHeight() const {
+      if (!isHeightCurrent)
+        const_cast<SUnit *>(this)->ComputeHeight();
+      return Height;
+    }
+
+    /// setDepthToAtLeast - If NewDepth is greater than this node's
+    /// depth value, set it to be the new depth value. This also
+    /// recursively marks successor nodes dirty.
+    void setDepthToAtLeast(unsigned NewDepth);
+
+    /// setDepthToAtLeast - If NewDepth is greater than this node's
+    /// depth value, set it to be the new height value. This also
+    /// recursively marks predecessor nodes dirty.
+    void setHeightToAtLeast(unsigned NewHeight);
+
+    /// setDepthDirty - Set a flag in this node to indicate that its
+    /// stored Depth value will require recomputation the next time
+    /// getDepth() is called.
+    void setDepthDirty();
+
+    /// setHeightDirty - Set a flag in this node to indicate that its
+    /// stored Height value will require recomputation the next time
+    /// getHeight() is called.
+    void setHeightDirty();
+
+    /// isPred - Test if node N is a predecessor of this node.
+    bool isPred(SUnit *N) {
+      for (unsigned i = 0, e = (unsigned)Preds.size(); i != e; ++i)
+        if (Preds[i].getSUnit() == N)
+          return true;
+      return false;
+    }
+
+    /// isSucc - Test if node N is a successor of this node.
+    bool isSucc(SUnit *N) {
+      for (unsigned i = 0, e = (unsigned)Succs.size(); i != e; ++i)
+        if (Succs[i].getSUnit() == N)
+          return true;
+      return false;
+    }
+
+    void dump(const ScheduleDAG *G) const;
+    void dumpAll(const ScheduleDAG *G) const;
+    void print(raw_ostream &O, const ScheduleDAG *G) const;
+
+  private:
+    void ComputeDepth();
+    void ComputeHeight();
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// SchedulingPriorityQueue - This interface is used to plug different
+  /// priorities computation algorithms into the list scheduler. It implements
+  /// the interface of a standard priority queue, where nodes are inserted in
+  /// arbitrary order and returned in priority order.  The computation of the
+  /// priority and the representation of the queue are totally up to the
+  /// implementation to decide.
+  ///
+  class SchedulingPriorityQueue {
+    unsigned CurCycle;
+    bool HasReadyFilter;
+  public:
+    SchedulingPriorityQueue(bool rf = false):
+      CurCycle(0), HasReadyFilter(rf) {}
+    virtual ~SchedulingPriorityQueue() {}
+
+    virtual bool isBottomUp() const = 0;
+
+    virtual void initNodes(std::vector<SUnit> &SUnits) = 0;
+    virtual void addNode(const SUnit *SU) = 0;
+    virtual void updateNode(const SUnit *SU) = 0;
+    virtual void releaseState() = 0;
+
+    virtual bool empty() const = 0;
+
+    bool hasReadyFilter() const { return HasReadyFilter; }
+
+    virtual bool tracksRegPressure() const { return false; }
+
+    virtual bool isReady(SUnit *) const {
+      assert(!HasReadyFilter && "The ready filter must override isReady()");
+      return true;
+    }
+    virtual void push(SUnit *U) = 0;
+
+    void push_all(const std::vector<SUnit *> &Nodes) {
+      for (std::vector<SUnit *>::const_iterator I = Nodes.begin(),
+           E = Nodes.end(); I != E; ++I)
+        push(*I);
+    }
+
+    virtual SUnit *pop() = 0;
+
+    virtual void remove(SUnit *SU) = 0;
+
+    virtual void dump(ScheduleDAG *) const {}
+
+    /// ScheduledNode - As each node is scheduled, this method is invoked.  This
+    /// allows the priority function to adjust the priority of related
+    /// unscheduled nodes, for example.
+    ///
+    virtual void ScheduledNode(SUnit *) {}
+
+    virtual void UnscheduledNode(SUnit *) {}
+
+    void setCurCycle(unsigned Cycle) {
+      CurCycle = Cycle;
+    }
+
+    unsigned getCurCycle() const {
+      return CurCycle;
+    }
+  };
+
+  class ScheduleDAG {
+  public:
+    MachineBasicBlock *BB;          // The block in which to insert instructions
+    MachineBasicBlock::iterator InsertPos;// The position to insert instructions
+    const TargetMachine &TM;              // Target processor
+    const TargetInstrInfo *TII;           // Target instruction information
+    const TargetRegisterInfo *TRI;        // Target processor register info
+    MachineFunction &MF;                  // Machine function
+    MachineRegisterInfo &MRI;             // Virtual/real register map
+    std::vector<SUnit*> Sequence;         // The schedule. Null SUnit*'s
+                                          // represent noop instructions.
+    std::vector<SUnit> SUnits;            // The scheduling units.
+    SUnit EntrySU;                        // Special node for the region entry.
+    SUnit ExitSU;                         // Special node for the region exit.
+
+    explicit ScheduleDAG(MachineFunction &mf);
+
+    virtual ~ScheduleDAG();
+
+    /// getInstrDesc - Return the TargetInstrDesc of this SUnit.
+    /// Return NULL for SDNodes without a machine opcode.
+    const TargetInstrDesc *getInstrDesc(const SUnit *SU) const {
+      if (SU->isInstr()) return &SU->getInstr()->getDesc();
+      return getNodeDesc(SU->getNode());
+    }
+
+    /// viewGraph - Pop up a GraphViz/gv window with the ScheduleDAG rendered
+    /// using 'dot'.
+    ///
+    void viewGraph();
+
+    /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock
+    /// according to the order specified in Sequence.
+    ///
+    virtual MachineBasicBlock *EmitSchedule() = 0;
+
+    void dumpSchedule() const;
+
+    virtual void dumpNode(const SUnit *SU) const = 0;
+
+    /// getGraphNodeLabel - Return a label for an SUnit node in a visualization
+    /// of the ScheduleDAG.
+    virtual std::string getGraphNodeLabel(const SUnit *SU) const = 0;
+
+    /// addCustomGraphFeatures - Add custom features for a visualization of
+    /// the ScheduleDAG.
+    virtual void addCustomGraphFeatures(GraphWriter<ScheduleDAG*> &) const {}
+
+#ifndef NDEBUG
+    /// VerifySchedule - Verify that all SUnits were scheduled and that
+    /// their state is consistent.
+    void VerifySchedule(bool isBottomUp);
+#endif
+
+  protected:
+    /// Run - perform scheduling.
+    ///
+    void Run(MachineBasicBlock *bb, MachineBasicBlock::iterator insertPos);
+
+    /// BuildSchedGraph - Build SUnits and set up their Preds and Succs
+    /// to form the scheduling dependency graph.
+    ///
+    virtual void BuildSchedGraph(AliasAnalysis *AA) = 0;
+
+    /// ComputeLatency - Compute node latency.
+    ///
+    virtual void ComputeLatency(SUnit *SU) = 0;
+
+    /// ComputeOperandLatency - Override dependence edge latency using
+    /// operand use/def information
+    ///
+    virtual void ComputeOperandLatency(SUnit *, SUnit *,
+                                       SDep&) const { }
+
+    /// Schedule - Order nodes according to selected style, filling
+    /// in the Sequence member.
+    ///
+    virtual void Schedule() = 0;
+
+    /// ForceUnitLatencies - Return true if all scheduling edges should be given
+    /// a latency value of one.  The default is to return false; schedulers may
+    /// override this as needed.
+    virtual bool ForceUnitLatencies() const { return false; }
+
+    /// EmitNoop - Emit a noop instruction.
+    ///
+    void EmitNoop();
+
+    void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
+
+  private:
+    // Return the TargetInstrDesc of this SDNode or NULL.
+    const TargetInstrDesc *getNodeDesc(const SDNode *Node) const;
+  };
+
+  class SUnitIterator : public std::iterator<std::forward_iterator_tag,
+                                             SUnit, ptrdiff_t> {
+    SUnit *Node;
+    unsigned Operand;
+
+    SUnitIterator(SUnit *N, unsigned Op) : Node(N), Operand(Op) {}
+  public:
+    bool operator==(const SUnitIterator& x) const {
+      return Operand == x.Operand;
+    }
+    bool operator!=(const SUnitIterator& x) const { return !operator==(x); }
+
+    const SUnitIterator &operator=(const SUnitIterator &I) {
+      assert(I.Node==Node && "Cannot assign iterators to two different nodes!");
+      Operand = I.Operand;
+      return *this;
+    }
+
+    pointer operator*() const {
+      return Node->Preds[Operand].getSUnit();
+    }
+    pointer operator->() const { return operator*(); }
+
+    SUnitIterator& operator++() {                // Preincrement
+      ++Operand;
+      return *this;
+    }
+    SUnitIterator operator++(int) { // Postincrement
+      SUnitIterator tmp = *this; ++*this; return tmp;
+    }
+
+    static SUnitIterator begin(SUnit *N) { return SUnitIterator(N, 0); }
+    static SUnitIterator end  (SUnit *N) {
+      return SUnitIterator(N, (unsigned)N->Preds.size());
+    }
+
+    unsigned getOperand() const { return Operand; }
+    const SUnit *getNode() const { return Node; }
+    /// isCtrlDep - Test if this is not an SDep::Data dependence.
+    bool isCtrlDep() const {
+      return getSDep().isCtrl();
+    }
+    bool isArtificialDep() const {
+      return getSDep().isArtificial();
+    }
+    const SDep &getSDep() const {
+      return Node->Preds[Operand];
+    }
+  };
+
+  template <> struct GraphTraits<SUnit*> {
+    typedef SUnit NodeType;
+    typedef SUnitIterator ChildIteratorType;
+    static inline NodeType *getEntryNode(SUnit *N) { return N; }
+    static inline ChildIteratorType child_begin(NodeType *N) {
+      return SUnitIterator::begin(N);
+    }
+    static inline ChildIteratorType child_end(NodeType *N) {
+      return SUnitIterator::end(N);
+    }
+  };
+
+  template <> struct GraphTraits<ScheduleDAG*> : public GraphTraits<SUnit*> {
+    typedef std::vector<SUnit>::iterator nodes_iterator;
+    static nodes_iterator nodes_begin(ScheduleDAG *G) {
+      return G->SUnits.begin();
+    }
+    static nodes_iterator nodes_end(ScheduleDAG *G) {
+      return G->SUnits.end();
+    }
+  };
+
+  /// ScheduleDAGTopologicalSort is a class that computes a topological
+  /// ordering for SUnits and provides methods for dynamically updating
+  /// the ordering as new edges are added.
+  ///
+  /// This allows a very fast implementation of IsReachable, for example.
+  ///
+  class ScheduleDAGTopologicalSort {
+    /// SUnits - A reference to the ScheduleDAG's SUnits.
+    std::vector<SUnit> &SUnits;
+
+    /// Index2Node - Maps topological index to the node number.
+    std::vector<int> Index2Node;
+    /// Node2Index - Maps the node number to its topological index.
+    std::vector<int> Node2Index;
+    /// Visited - a set of nodes visited during a DFS traversal.
+    BitVector Visited;
+
+    /// DFS - make a DFS traversal and mark all nodes affected by the
+    /// edge insertion. These nodes will later get new topological indexes
+    /// by means of the Shift method.
+    void DFS(const SUnit *SU, int UpperBound, bool& HasLoop);
+
+    /// Shift - reassign topological indexes for the nodes in the DAG
+    /// to preserve the topological ordering.
+    void Shift(BitVector& Visited, int LowerBound, int UpperBound);
+
+    /// Allocate - assign the topological index to the node n.
+    void Allocate(int n, int index);
+
+  public:
+    explicit ScheduleDAGTopologicalSort(std::vector<SUnit> &SUnits);
+
+    /// InitDAGTopologicalSorting - create the initial topological
+    /// ordering from the DAG to be scheduled.
+    void InitDAGTopologicalSorting();
+
+    /// IsReachable - Checks if SU is reachable from TargetSU.
+    bool IsReachable(const SUnit *SU, const SUnit *TargetSU);
+
+    /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU
+    /// will create a cycle.
+    bool WillCreateCycle(SUnit *SU, SUnit *TargetSU);
+
+    /// AddPred - Updates the topological ordering to accomodate an edge
+    /// to be added from SUnit X to SUnit Y.
+    void AddPred(SUnit *Y, SUnit *X);
+
+    /// RemovePred - Updates the topological ordering to accomodate an
+    /// an edge to be removed from the specified node N from the predecessors
+    /// of the current node M.
+    void RemovePred(SUnit *M, SUnit *N);
+
+    typedef std::vector<int>::iterator iterator;
+    typedef std::vector<int>::const_iterator const_iterator;
+    iterator begin() { return Index2Node.begin(); }
+    const_iterator begin() const { return Index2Node.begin(); }
+    iterator end() { return Index2Node.end(); }
+    const_iterator end() const { return Index2Node.end(); }
+
+    typedef std::vector<int>::reverse_iterator reverse_iterator;
+    typedef std::vector<int>::const_reverse_iterator const_reverse_iterator;
+    reverse_iterator rbegin() { return Index2Node.rbegin(); }
+    const_reverse_iterator rbegin() const { return Index2Node.rbegin(); }
+    reverse_iterator rend() { return Index2Node.rend(); }
+    const_reverse_iterator rend() const { return Index2Node.rend(); }
+  };
+}
+
+#endif
diff --git a/final/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/final/include/llvm/CodeGen/ScheduleHazardRecognizer.h
new file mode 100644
index 00000000000..2f53baa1c7e
--- /dev/null
+++ b/final/include/llvm/CodeGen/ScheduleHazardRecognizer.h
@@ -0,0 +1,93 @@
+//=- llvm/CodeGen/ScheduleHazardRecognizer.h - Scheduling Support -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleHazardRecognizer class, which implements
+// hazard-avoidance heuristics for scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SCHEDULEHAZARDRECOGNIZER_H
+#define LLVM_CODEGEN_SCHEDULEHAZARDRECOGNIZER_H
+
+namespace llvm {
+
+class SUnit;
+
+/// HazardRecognizer - This determines whether or not an instruction can be
+/// issued this cycle, and whether or not a noop needs to be inserted to handle
+/// the hazard.
+class ScheduleHazardRecognizer {
+protected:
+  /// MaxLookAhead - Indicate the number of cycles in the scoreboard
+  /// state. Important to restore the state after backtracking. Additionally,
+  /// MaxLookAhead=0 identifies a fake recognizer, allowing the client to
+  /// bypass virtual calls. Currently the PostRA scheduler ignores it.
+  unsigned MaxLookAhead;
+
+public:
+  ScheduleHazardRecognizer(): MaxLookAhead(0) {}
+  virtual ~ScheduleHazardRecognizer();
+
+  enum HazardType {
+    NoHazard,      // This instruction can be emitted at this cycle.
+    Hazard,        // This instruction can't be emitted at this cycle.
+    NoopHazard     // This instruction can't be emitted, and needs noops.
+  };
+
+  unsigned getMaxLookAhead() const { return MaxLookAhead; }
+
+  bool isEnabled() const { return MaxLookAhead != 0; }
+
+  /// atIssueLimit - Return true if no more instructions may be issued in this
+  /// cycle.
+  virtual bool atIssueLimit() const { return false; }
+
+  /// getHazardType - Return the hazard type of emitting this node.  There are
+  /// three possible results.  Either:
+  ///  * NoHazard: it is legal to issue this instruction on this cycle.
+  ///  * Hazard: issuing this instruction would stall the machine.  If some
+  ///     other instruction is available, issue it first.
+  ///  * NoopHazard: issuing this instruction would break the program.  If
+  ///     some other instruction can be issued, do so, otherwise issue a noop.
+  virtual HazardType getHazardType(SUnit *m, int Stalls) {
+    return NoHazard;
+  }
+
+  /// Reset - This callback is invoked when a new block of
+  /// instructions is about to be schedule. The hazard state should be
+  /// set to an initialized state.
+  virtual void Reset() {}
+
+  /// EmitInstruction - This callback is invoked when an instruction is
+  /// emitted, to advance the hazard state.
+  virtual void EmitInstruction(SUnit *) {}
+
+  /// AdvanceCycle - This callback is invoked whenever the next top-down
+  /// instruction to be scheduled cannot issue in the current cycle, either
+  /// because of latency or resource conflicts.  This should increment the
+  /// internal state of the hazard recognizer so that previously "Hazard"
+  /// instructions will now not be hazards.
+  virtual void AdvanceCycle() {}
+
+  /// RecedeCycle - This callback is invoked whenever the next bottom-up
+  /// instruction to be scheduled cannot issue in the current cycle, either
+  /// because of latency or resource conflicts.
+  virtual void RecedeCycle() {}
+
+  /// EmitNoop - This callback is invoked when a noop was added to the
+  /// instruction stream.
+  virtual void EmitNoop() {
+    // Default implementation: count it as a cycle.
+    AdvanceCycle();
+  }
+};
+
+}
+
+#endif
diff --git a/final/include/llvm/CodeGen/SchedulerRegistry.h b/final/include/llvm/CodeGen/SchedulerRegistry.h
new file mode 100644
index 00000000000..96573dd5d8b
--- /dev/null
+++ b/final/include/llvm/CodeGen/SchedulerRegistry.h
@@ -0,0 +1,111 @@
+//===-- llvm/CodeGen/SchedulerRegistry.h ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation for instruction scheduler function
+// pass registry (RegisterScheduler).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGENSCHEDULERREGISTRY_H
+#define LLVM_CODEGENSCHEDULERREGISTRY_H
+
+#include "llvm/CodeGen/MachinePassRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+///
+/// RegisterScheduler class - Track the registration of instruction schedulers.
+///
+//===----------------------------------------------------------------------===//
+
+class SelectionDAGISel;
+class ScheduleDAGSDNodes;
+class SelectionDAG;
+class MachineBasicBlock;
+
+class RegisterScheduler : public MachinePassRegistryNode {
+public:
+  typedef ScheduleDAGSDNodes *(*FunctionPassCtor)(SelectionDAGISel*,
+                                                  CodeGenOpt::Level);
+
+  static MachinePassRegistry Registry;
+
+  RegisterScheduler(const char *N, const char *D, FunctionPassCtor C)
+  : MachinePassRegistryNode(N, D, (MachinePassCtor)C)
+  { Registry.Add(this); }
+  ~RegisterScheduler() { Registry.Remove(this); }
+  
+
+  // Accessors.
+  //
+  RegisterScheduler *getNext() const {
+    return (RegisterScheduler *)MachinePassRegistryNode::getNext();
+  }
+  static RegisterScheduler *getList() {
+    return (RegisterScheduler *)Registry.getList();
+  }
+  static FunctionPassCtor getDefault() {
+    return (FunctionPassCtor)Registry.getDefault();
+  }
+  static void setDefault(FunctionPassCtor C) {
+    Registry.setDefault((MachinePassCtor)C);
+  }
+  static void setListener(MachinePassRegistryListener *L) {
+    Registry.setListener(L);
+  }
+};
+
+/// createBURRListDAGScheduler - This creates a bottom up register usage
+/// reduction list scheduler.
+ScheduleDAGSDNodes *createBURRListDAGScheduler(SelectionDAGISel *IS,
+                                               CodeGenOpt::Level OptLevel);
+
+/// createTDRRListDAGScheduler - This creates a top down register usage
+/// reduction list scheduler.
+ScheduleDAGSDNodes *createTDRRListDAGScheduler(SelectionDAGISel *IS,
+                                               CodeGenOpt::Level OptLevel);
+
+/// createBURRListDAGScheduler - This creates a bottom up list scheduler that
+/// schedules nodes in source code order when possible.
+ScheduleDAGSDNodes *createSourceListDAGScheduler(SelectionDAGISel *IS,
+                                                 CodeGenOpt::Level OptLevel);
+
+/// createHybridListDAGScheduler - This creates a bottom up register pressure
+/// aware list scheduler that make use of latency information to avoid stalls
+/// for long latency instructions in low register pressure mode. In high
+/// register pressure mode it schedules to reduce register pressure.
+ScheduleDAGSDNodes *createHybridListDAGScheduler(SelectionDAGISel *IS,
+                                                 CodeGenOpt::Level);
+
+/// createILPListDAGScheduler - This creates a bottom up register pressure
+/// aware list scheduler that tries to increase instruction level parallelism
+/// in low register pressure mode. In high register pressure mode it schedules
+/// to reduce register pressure.
+ScheduleDAGSDNodes *createILPListDAGScheduler(SelectionDAGISel *IS,
+                                              CodeGenOpt::Level);
+/// createTDListDAGScheduler - This creates a top-down list scheduler with
+/// a hazard recognizer.
+ScheduleDAGSDNodes *createTDListDAGScheduler(SelectionDAGISel *IS,
+                                             CodeGenOpt::Level OptLevel);
+
+/// createFastDAGScheduler - This creates a "fast" scheduler.
+///
+ScheduleDAGSDNodes *createFastDAGScheduler(SelectionDAGISel *IS,
+                                           CodeGenOpt::Level OptLevel);
+
+/// createDefaultScheduler - This creates an instruction scheduler appropriate
+/// for the target.
+ScheduleDAGSDNodes *createDefaultScheduler(SelectionDAGISel *IS,
+                                           CodeGenOpt::Level OptLevel);
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/final/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
new file mode 100644
index 00000000000..8850006df84
--- /dev/null
+++ b/final/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
@@ -0,0 +1,129 @@
+//=- llvm/CodeGen/ScoreboardHazardRecognizer.h - Schedule Support -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ScoreboardHazardRecognizer class, which
+// encapsulates hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SCOREBOARDHAZARDRECOGNIZER_H
+#define LLVM_CODEGEN_SCOREBOARDHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Support/DataTypes.h"
+
+#include <cassert>
+#include <cstring>
+#include <string>
+
+namespace llvm {
+
+class InstrItineraryData;
+class TargetInstrDesc;
+class ScheduleDAG;
+class SUnit;
+
+class ScoreboardHazardRecognizer : public ScheduleHazardRecognizer {
+  // Scoreboard to track function unit usage. Scoreboard[0] is a
+  // mask of the FUs in use in the cycle currently being
+  // schedule. Scoreboard[1] is a mask for the next cycle. The
+  // Scoreboard is used as a circular buffer with the current cycle
+  // indicated by Head.
+  //
+  // Scoreboard always counts cycles in forward execution order. If used by a
+  // bottom-up scheduler, then the scoreboard cycles are the inverse of the
+  // scheduler's cycles.
+  class Scoreboard {
+    unsigned *Data;
+
+    // The maximum number of cycles monitored by the Scoreboard. This
+    // value is determined based on the target itineraries to ensure
+    // that all hazards can be tracked.
+    size_t Depth;
+    // Indices into the Scoreboard that represent the current cycle.
+    size_t Head;
+  public:
+    Scoreboard():Data(NULL), Depth(0), Head(0) { }
+    ~Scoreboard() {
+      delete[] Data;
+    }
+
+    size_t getDepth() const { return Depth; }
+    unsigned& operator[](size_t idx) const {
+      // Depth is expected to be a power-of-2.
+      assert(Depth && !(Depth & (Depth - 1)) &&
+             "Scoreboard was not initialized properly!");
+
+      return Data[(Head + idx) & (Depth-1)];
+    }
+
+    void reset(size_t d = 1) {
+      if (Data == NULL) {
+        Depth = d;
+        Data = new unsigned[Depth];
+      }
+
+      memset(Data, 0, Depth * sizeof(Data[0]));
+      Head = 0;
+    }
+
+    void advance() {
+      Head = (Head + 1) & (Depth-1);
+    }
+
+    void recede() {
+      Head = (Head - 1) & (Depth-1);
+    }
+
+    // Print the scoreboard.
+    void dump() const;
+  };
+
+#ifndef NDEBUG
+  // Support for tracing ScoreboardHazardRecognizer as a component within
+  // another module. Follows the current thread-unsafe model of tracing.
+  static const char *DebugType;
+#endif
+
+  // Itinerary data for the target.
+  const InstrItineraryData *ItinData;
+
+  const ScheduleDAG *DAG;
+
+  /// IssueWidth - Max issue per cycle. 0=Unknown.
+  unsigned IssueWidth;
+
+  /// IssueCount - Count instructions issued in this cycle.
+  unsigned IssueCount;
+
+  Scoreboard ReservedScoreboard;
+  Scoreboard RequiredScoreboard;
+
+public:
+  ScoreboardHazardRecognizer(const InstrItineraryData *ItinData,
+                             const ScheduleDAG *DAG,
+                             const char *ParentDebugType = "");
+
+  /// atIssueLimit - Return true if no more instructions may be issued in this
+  /// cycle.
+  virtual bool atIssueLimit() const;
+
+  // Stalls provides an cycle offset at which SU will be scheduled. It will be
+  // negative for bottom-up scheduling.
+  virtual HazardType getHazardType(SUnit *SU, int Stalls);
+  virtual void Reset();
+  virtual void EmitInstruction(SUnit *SU);
+  virtual void AdvanceCycle();
+  virtual void RecedeCycle();
+};
+
+}
+
+#endif //!LLVM_CODEGEN_SCOREBOARDHAZARDRECOGNIZER_H
diff --git a/final/include/llvm/CodeGen/SelectionDAG.h b/final/include/llvm/CodeGen/SelectionDAG.h
new file mode 100644
index 00000000000..b537a77a76a
--- /dev/null
+++ b/final/include/llvm/CodeGen/SelectionDAG.h
@@ -0,0 +1,1051 @@
+//===-- llvm/CodeGen/SelectionDAG.h - InstSelection DAG ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SelectionDAG class, and transitively defines the
+// SDNode class and subclasses.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SELECTIONDAG_H
+#define LLVM_CODEGEN_SELECTIONDAG_H
+
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <vector>
+#include <map>
+#include <string>
+
+namespace llvm {
+
+class AliasAnalysis;
+class MachineConstantPoolValue;
+class MachineFunction;
+class MDNode;
+class SDNodeOrdering;
+class SDDbgValue;
+class TargetLowering;
+class TargetSelectionDAGInfo;
+
+template<> struct ilist_traits<SDNode> : public ilist_default_traits<SDNode> {
+private:
+  mutable ilist_half_node<SDNode> Sentinel;
+public:
+  SDNode *createSentinel() const {
+    return static_cast<SDNode*>(&Sentinel);
+  }
+  static void destroySentinel(SDNode *) {}
+
+  SDNode *provideInitialHead() const { return createSentinel(); }
+  SDNode *ensureHead(SDNode*) const { return createSentinel(); }
+  static void noteHead(SDNode*, SDNode*) {}
+
+  static void deleteNode(SDNode *) {
+    assert(0 && "ilist_traits<SDNode> shouldn't see a deleteNode call!");
+  }
+private:
+  static void createNode(const SDNode &);
+};
+
+/// SDDbgInfo - Keeps track of dbg_value information through SDISel.  We do
+/// not build SDNodes for these so as not to perturb the generated code;
+/// instead the info is kept off to the side in this structure. Each SDNode may
+/// have one or more associated dbg_value entries. This information is kept in
+/// DbgValMap.
+/// Byval parameters are handled separately because they don't use alloca's,
+/// which busts the normal mechanism.  There is good reason for handling all
+/// parameters separately:  they may not have code generated for them, they
+/// should always go at the beginning of the function regardless of other code
+/// motion, and debug info for them is potentially useful even if the parameter
+/// is unused.  Right now only byval parameters are handled separately.
+class SDDbgInfo {
+  SmallVector<SDDbgValue*, 32> DbgValues;
+  SmallVector<SDDbgValue*, 32> ByvalParmDbgValues;
+  DenseMap<const SDNode*, SmallVector<SDDbgValue*, 2> > DbgValMap;
+
+  void operator=(const SDDbgInfo&);   // Do not implement.
+  SDDbgInfo(const SDDbgInfo&);   // Do not implement.
+public:
+  SDDbgInfo() {}
+
+  void add(SDDbgValue *V, const SDNode *Node, bool isParameter) {
+    if (isParameter) {
+      ByvalParmDbgValues.push_back(V);
+    } else     DbgValues.push_back(V);
+    if (Node)
+      DbgValMap[Node].push_back(V);
+  }
+
+  void clear() {
+    DbgValMap.clear();
+    DbgValues.clear();
+    ByvalParmDbgValues.clear();
+  }
+
+  bool empty() const {
+    return DbgValues.empty() && ByvalParmDbgValues.empty();
+  }
+
+  SmallVector<SDDbgValue*,2> &getSDDbgValues(const SDNode *Node) {
+    return DbgValMap[Node];
+  }
+
+  typedef SmallVector<SDDbgValue*,32>::iterator DbgIterator;
+  DbgIterator DbgBegin() { return DbgValues.begin(); }
+  DbgIterator DbgEnd()   { return DbgValues.end(); }
+  DbgIterator ByvalParmDbgBegin() { return ByvalParmDbgValues.begin(); }
+  DbgIterator ByvalParmDbgEnd()   { return ByvalParmDbgValues.end(); }
+};
+
+enum CombineLevel {
+  Unrestricted,   // Combine may create illegal operations and illegal types.
+  NoIllegalTypes, // Combine may create illegal operations but no illegal types.
+  NoIllegalOperations // Combine may only create legal operations and types.
+};
+
+class SelectionDAG;
+void checkForCycles(const SDNode *N);
+void checkForCycles(const SelectionDAG *DAG);
+
+/// SelectionDAG class - This is used to represent a portion of an LLVM function
+/// in a low-level Data Dependence DAG representation suitable for instruction
+/// selection.  This DAG is constructed as the first step of instruction
+/// selection in order to allow implementation of machine specific optimizations
+/// and code simplifications.
+///
+/// The representation used by the SelectionDAG is a target-independent
+/// representation, which has some similarities to the GCC RTL representation,
+/// but is significantly more simple, powerful, and is a graph form instead of a
+/// linear form.
+///
+class SelectionDAG {
+  const TargetMachine &TM;
+  const TargetLowering &TLI;
+  const TargetSelectionDAGInfo &TSI;
+  MachineFunction *MF;
+  LLVMContext *Context;
+
+  /// EntryNode - The starting token.
+  SDNode EntryNode;
+
+  /// Root - The root of the entire DAG.
+  SDValue Root;
+
+  /// AllNodes - A linked list of nodes in the current DAG.
+  ilist<SDNode> AllNodes;
+
+  /// NodeAllocatorType - The AllocatorType for allocating SDNodes. We use
+  /// pool allocation with recycling.
+  typedef RecyclingAllocator<BumpPtrAllocator, SDNode, sizeof(LargestSDNode),
+                             AlignOf<MostAlignedSDNode>::Alignment>
+    NodeAllocatorType;
+
+  /// NodeAllocator - Pool allocation for nodes.
+  NodeAllocatorType NodeAllocator;
+
+  /// CSEMap - This structure is used to memoize nodes, automatically performing
+  /// CSE with existing nodes when a duplicate is requested.
+  FoldingSet<SDNode> CSEMap;
+
+  /// OperandAllocator - Pool allocation for machine-opcode SDNode operands.
+  BumpPtrAllocator OperandAllocator;
+
+  /// Allocator - Pool allocation for misc. objects that are created once per
+  /// SelectionDAG.
+  BumpPtrAllocator Allocator;
+
+  /// SDNodeOrdering - The ordering of the SDNodes. It roughly corresponds to
+  /// the ordering of the original LLVM instructions.
+  SDNodeOrdering *Ordering;
+
+  /// DbgInfo - Tracks dbg_value information through SDISel.
+  SDDbgInfo *DbgInfo;
+
+  /// setGraphColorHelper - Implementation of setSubgraphColor.
+  /// Return whether we had to truncate the search.
+  ///
+  bool setSubgraphColorHelper(SDNode *N, const char *Color,
+                              DenseSet<SDNode *> &visited,
+                              int level, bool &printed);
+
+  void operator=(const SelectionDAG&); // Do not implement.
+  SelectionDAG(const SelectionDAG&);   // Do not implement.
+
+public:
+  explicit SelectionDAG(const TargetMachine &TM);
+  ~SelectionDAG();
+
+  /// init - Prepare this SelectionDAG to process code in the given
+  /// MachineFunction.
+  ///
+  void init(MachineFunction &mf);
+
+  /// clear - Clear state and free memory necessary to make this
+  /// SelectionDAG ready to process a new block.
+  ///
+  void clear();
+
+  MachineFunction &getMachineFunction() const { return *MF; }
+  const TargetMachine &getTarget() const { return TM; }
+  const TargetLowering &getTargetLoweringInfo() const { return TLI; }
+  const TargetSelectionDAGInfo &getSelectionDAGInfo() const { return TSI; }
+  LLVMContext *getContext() const {return Context; }
+
+  /// viewGraph - Pop up a GraphViz/gv window with the DAG rendered using 'dot'.
+  ///
+  void viewGraph(const std::string &Title);
+  void viewGraph();
+
+#ifndef NDEBUG
+  std::map<const SDNode *, std::string> NodeGraphAttrs;
+#endif
+
+  /// clearGraphAttrs - Clear all previously defined node graph attributes.
+  /// Intended to be used from a debugging tool (eg. gdb).
+  void clearGraphAttrs();
+
+  /// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
+  ///
+  void setGraphAttrs(const SDNode *N, const char *Attrs);
+
+  /// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+  /// Used from getNodeAttributes.
+  const std::string getGraphAttrs(const SDNode *N) const;
+
+  /// setGraphColor - Convenience for setting node color attribute.
+  ///
+  void setGraphColor(const SDNode *N, const char *Color);
+
+  /// setGraphColor - Convenience for setting subgraph color attribute.
+  ///
+  void setSubgraphColor(SDNode *N, const char *Color);
+
+  typedef ilist<SDNode>::const_iterator allnodes_const_iterator;
+  allnodes_const_iterator allnodes_begin() const { return AllNodes.begin(); }
+  allnodes_const_iterator allnodes_end() const { return AllNodes.end(); }
+  typedef ilist<SDNode>::iterator allnodes_iterator;
+  allnodes_iterator allnodes_begin() { return AllNodes.begin(); }
+  allnodes_iterator allnodes_end() { return AllNodes.end(); }
+  ilist<SDNode>::size_type allnodes_size() const {
+    return AllNodes.size();
+  }
+
+  /// getRoot - Return the root tag of the SelectionDAG.
+  ///
+  const SDValue &getRoot() const { return Root; }
+
+  /// getEntryNode - Return the token chain corresponding to the entry of the
+  /// function.
+  SDValue getEntryNode() const {
+    return SDValue(const_cast<SDNode *>(&EntryNode), 0);
+  }
+
+  /// setRoot - Set the current root tag of the SelectionDAG.
+  ///
+  const SDValue &setRoot(SDValue N) {
+    assert((!N.getNode() || N.getValueType() == MVT::Other) &&
+           "DAG root value is not a chain!");
+    if (N.getNode())
+      checkForCycles(N.getNode());
+    Root = N;
+    if (N.getNode())
+      checkForCycles(this);
+    return Root;
+  }
+
+  /// Combine - This iterates over the nodes in the SelectionDAG, folding
+  /// certain types of nodes together, or eliminating superfluous nodes.  The
+  /// Level argument controls whether Combine is allowed to produce nodes and
+  /// types that are illegal on the target.
+  void Combine(CombineLevel Level, AliasAnalysis &AA,
+               CodeGenOpt::Level OptLevel);
+
+  /// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that
+  /// only uses types natively supported by the target.  Returns "true" if it
+  /// made any changes.
+  ///
+  /// Note that this is an involved process that may invalidate pointers into
+  /// the graph.
+  bool LegalizeTypes();
+
+  /// Legalize - This transforms the SelectionDAG into a SelectionDAG that is
+  /// compatible with the target instruction selector, as indicated by the
+  /// TargetLowering object.
+  ///
+  /// Note that this is an involved process that may invalidate pointers into
+  /// the graph.
+  void Legalize(CodeGenOpt::Level OptLevel);
+
+  /// LegalizeVectors - This transforms the SelectionDAG into a SelectionDAG
+  /// that only uses vector math operations supported by the target.  This is
+  /// necessary as a separate step from Legalize because unrolling a vector
+  /// operation can introduce illegal types, which requires running
+  /// LegalizeTypes again.
+  ///
+  /// This returns true if it made any changes; in that case, LegalizeTypes
+  /// is called again before Legalize.
+  ///
+  /// Note that this is an involved process that may invalidate pointers into
+  /// the graph.
+  bool LegalizeVectors();
+
+  /// RemoveDeadNodes - This method deletes all unreachable nodes in the
+  /// SelectionDAG.
+  void RemoveDeadNodes();
+
+  /// DeleteNode - Remove the specified node from the system.  This node must
+  /// have no referrers.
+  void DeleteNode(SDNode *N);
+
+  /// getVTList - Return an SDVTList that represents the list of values
+  /// specified.
+  SDVTList getVTList(EVT VT);
+  SDVTList getVTList(EVT VT1, EVT VT2);
+  SDVTList getVTList(EVT VT1, EVT VT2, EVT VT3);
+  SDVTList getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4);
+  SDVTList getVTList(const EVT *VTs, unsigned NumVTs);
+
+  //===--------------------------------------------------------------------===//
+  // Node creation methods.
+  //
+  SDValue getConstant(uint64_t Val, EVT VT, bool isTarget = false);
+  SDValue getConstant(const APInt &Val, EVT VT, bool isTarget = false);
+  SDValue getConstant(const ConstantInt &Val, EVT VT, bool isTarget = false);
+  SDValue getIntPtrConstant(uint64_t Val, bool isTarget = false);
+  SDValue getTargetConstant(uint64_t Val, EVT VT) {
+    return getConstant(Val, VT, true);
+  }
+  SDValue getTargetConstant(const APInt &Val, EVT VT) {
+    return getConstant(Val, VT, true);
+  }
+  SDValue getTargetConstant(const ConstantInt &Val, EVT VT) {
+    return getConstant(Val, VT, true);
+  }
+  // The forms below that take a double should only be used for simple
+  // constants that can be exactly represented in VT.  No checks are made.
+  SDValue getConstantFP(double Val, EVT VT, bool isTarget = false);
+  SDValue getConstantFP(const APFloat& Val, EVT VT, bool isTarget = false);
+  SDValue getConstantFP(const ConstantFP &CF, EVT VT, bool isTarget = false);
+  SDValue getTargetConstantFP(double Val, EVT VT) {
+    return getConstantFP(Val, VT, true);
+  }
+  SDValue getTargetConstantFP(const APFloat& Val, EVT VT) {
+    return getConstantFP(Val, VT, true);
+  }
+  SDValue getTargetConstantFP(const ConstantFP &Val, EVT VT) {
+    return getConstantFP(Val, VT, true);
+  }
+  SDValue getGlobalAddress(const GlobalValue *GV, DebugLoc DL, EVT VT,
+                           int64_t offset = 0, bool isTargetGA = false,
+                           unsigned char TargetFlags = 0);
+  SDValue getTargetGlobalAddress(const GlobalValue *GV, DebugLoc DL, EVT VT,
+                                 int64_t offset = 0,
+                                 unsigned char TargetFlags = 0) {
+    return getGlobalAddress(GV, DL, VT, offset, true, TargetFlags);
+  }
+  SDValue getFrameIndex(int FI, EVT VT, bool isTarget = false);
+  SDValue getTargetFrameIndex(int FI, EVT VT) {
+    return getFrameIndex(FI, VT, true);
+  }
+  SDValue getJumpTable(int JTI, EVT VT, bool isTarget = false,
+                       unsigned char TargetFlags = 0);
+  SDValue getTargetJumpTable(int JTI, EVT VT, unsigned char TargetFlags = 0) {
+    return getJumpTable(JTI, VT, true, TargetFlags);
+  }
+  SDValue getConstantPool(const Constant *C, EVT VT,
+                          unsigned Align = 0, int Offs = 0, bool isT=false,
+                          unsigned char TargetFlags = 0);
+  SDValue getTargetConstantPool(const Constant *C, EVT VT,
+                                unsigned Align = 0, int Offset = 0,
+                                unsigned char TargetFlags = 0) {
+    return getConstantPool(C, VT, Align, Offset, true, TargetFlags);
+  }
+  SDValue getConstantPool(MachineConstantPoolValue *C, EVT VT,
+                          unsigned Align = 0, int Offs = 0, bool isT=false,
+                          unsigned char TargetFlags = 0);
+  SDValue getTargetConstantPool(MachineConstantPoolValue *C,
+                                  EVT VT, unsigned Align = 0,
+                                  int Offset = 0, unsigned char TargetFlags=0) {
+    return getConstantPool(C, VT, Align, Offset, true, TargetFlags);
+  }
+  // When generating a branch to a BB, we don't in general know enough
+  // to provide debug info for the BB at that time, so keep this one around.
+  SDValue getBasicBlock(MachineBasicBlock *MBB);
+  SDValue getBasicBlock(MachineBasicBlock *MBB, DebugLoc dl);
+  SDValue getExternalSymbol(const char *Sym, EVT VT);
+  SDValue getExternalSymbol(const char *Sym, DebugLoc dl, EVT VT);
+  SDValue getTargetExternalSymbol(const char *Sym, EVT VT,
+                                  unsigned char TargetFlags = 0);
+  SDValue getValueType(EVT);
+  SDValue getRegister(unsigned Reg, EVT VT);
+  SDValue getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label);
+  SDValue getBlockAddress(const BlockAddress *BA, EVT VT,
+                          bool isTarget = false, unsigned char TargetFlags = 0);
+
+  SDValue getCopyToReg(SDValue Chain, DebugLoc dl, unsigned Reg, SDValue N) {
+    return getNode(ISD::CopyToReg, dl, MVT::Other, Chain,
+                   getRegister(Reg, N.getValueType()), N);
+  }
+
+  // This version of the getCopyToReg method takes an extra operand, which
+  // indicates that there is potentially an incoming glue value (if Glue is not
+  // null) and that there should be a glue result.
+  SDValue getCopyToReg(SDValue Chain, DebugLoc dl, unsigned Reg, SDValue N,
+                       SDValue Glue) {
+    SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
+    SDValue Ops[] = { Chain, getRegister(Reg, N.getValueType()), N, Glue };
+    return getNode(ISD::CopyToReg, dl, VTs, Ops, Glue.getNode() ? 4 : 3);
+  }
+
+  // Similar to last getCopyToReg() except parameter Reg is a SDValue
+  SDValue getCopyToReg(SDValue Chain, DebugLoc dl, SDValue Reg, SDValue N,
+                         SDValue Glue) {
+    SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
+    SDValue Ops[] = { Chain, Reg, N, Glue };
+    return getNode(ISD::CopyToReg, dl, VTs, Ops, Glue.getNode() ? 4 : 3);
+  }
+
+  SDValue getCopyFromReg(SDValue Chain, DebugLoc dl, unsigned Reg, EVT VT) {
+    SDVTList VTs = getVTList(VT, MVT::Other);
+    SDValue Ops[] = { Chain, getRegister(Reg, VT) };
+    return getNode(ISD::CopyFromReg, dl, VTs, Ops, 2);
+  }
+
+  // This version of the getCopyFromReg method takes an extra operand, which
+  // indicates that there is potentially an incoming glue value (if Glue is not
+  // null) and that there should be a glue result.
+  SDValue getCopyFromReg(SDValue Chain, DebugLoc dl, unsigned Reg, EVT VT,
+                           SDValue Glue) {
+    SDVTList VTs = getVTList(VT, MVT::Other, MVT::Glue);
+    SDValue Ops[] = { Chain, getRegister(Reg, VT), Glue };
+    return getNode(ISD::CopyFromReg, dl, VTs, Ops, Glue.getNode() ? 3 : 2);
+  }
+
+  SDValue getCondCode(ISD::CondCode Cond);
+
+  /// Returns the ConvertRndSat Note: Avoid using this node because it may
+  /// disappear in the future and most targets don't support it.
+  SDValue getConvertRndSat(EVT VT, DebugLoc dl, SDValue Val, SDValue DTy,
+                           SDValue STy,
+                           SDValue Rnd, SDValue Sat, ISD::CvtCode Code);
+
+  /// getVectorShuffle - Return an ISD::VECTOR_SHUFFLE node.  The number of
+  /// elements in VT, which must be a vector type, must match the number of
+  /// mask elements NumElts.  A integer mask element equal to -1 is treated as
+  /// undefined.
+  SDValue getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, SDValue N2,
+                           const int *MaskElts);
+
+  /// getSExtOrTrunc - Convert Op, which must be of integer type, to the
+  /// integer type VT, by either sign-extending or truncating it.
+  SDValue getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT);
+
+  /// getZExtOrTrunc - Convert Op, which must be of integer type, to the
+  /// integer type VT, by either zero-extending or truncating it.
+  SDValue getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT);
+
+  /// getZeroExtendInReg - Return the expression required to zero extend the Op
+  /// value assuming it was the smaller SrcTy value.
+  SDValue getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT SrcTy);
+
+  /// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
+  SDValue getNOT(DebugLoc DL, SDValue Val, EVT VT);
+
+  /// getCALLSEQ_START - Return a new CALLSEQ_START node, which always must have
+  /// a glue result (to ensure it's not CSE'd).  CALLSEQ_START does not have a
+  /// useful DebugLoc.
+  SDValue getCALLSEQ_START(SDValue Chain, SDValue Op) {
+    SDVTList VTs = getVTList(MVT::Other, MVT::Glue);
+    SDValue Ops[] = { Chain,  Op };
+    return getNode(ISD::CALLSEQ_START, DebugLoc(), VTs, Ops, 2);
+  }
+
+  /// getCALLSEQ_END - Return a new CALLSEQ_END node, which always must have a
+  /// glue result (to ensure it's not CSE'd).  CALLSEQ_END does not have
+  /// a useful DebugLoc.
+  SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2,
+                           SDValue InGlue) {
+    SDVTList NodeTys = getVTList(MVT::Other, MVT::Glue);
+    SmallVector<SDValue, 4> Ops;
+    Ops.push_back(Chain);
+    Ops.push_back(Op1);
+    Ops.push_back(Op2);
+    Ops.push_back(InGlue);
+    return getNode(ISD::CALLSEQ_END, DebugLoc(), NodeTys, &Ops[0],
+                   (unsigned)Ops.size() - (InGlue.getNode() == 0 ? 1 : 0));
+  }
+
+  /// getUNDEF - Return an UNDEF node.  UNDEF does not have a useful DebugLoc.
+  SDValue getUNDEF(EVT VT) {
+    return getNode(ISD::UNDEF, DebugLoc(), VT);
+  }
+
+  /// getGLOBAL_OFFSET_TABLE - Return a GLOBAL_OFFSET_TABLE node.  This does
+  /// not have a useful DebugLoc.
+  SDValue getGLOBAL_OFFSET_TABLE(EVT VT) {
+    return getNode(ISD::GLOBAL_OFFSET_TABLE, DebugLoc(), VT);
+  }
+
+  /// getNode - Gets or creates the specified node.
+  ///
+  SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                  SDValue N1, SDValue N2, SDValue N3);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                  SDValue N1, SDValue N2, SDValue N3, SDValue N4);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                  SDValue N1, SDValue N2, SDValue N3, SDValue N4,
+                  SDValue N5);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                  const SDUse *Ops, unsigned NumOps);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                  const SDValue *Ops, unsigned NumOps);
+  SDValue getNode(unsigned Opcode, DebugLoc DL,
+                  const std::vector<EVT> &ResultTys,
+                  const SDValue *Ops, unsigned NumOps);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, const EVT *VTs, unsigned NumVTs,
+                  const SDValue *Ops, unsigned NumOps);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
+                  const SDValue *Ops, unsigned NumOps);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, SDVTList VTs);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, SDValue N);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
+                  SDValue N1, SDValue N2);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
+                  SDValue N1, SDValue N2, SDValue N3);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
+                  SDValue N1, SDValue N2, SDValue N3, SDValue N4);
+  SDValue getNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
+                  SDValue N1, SDValue N2, SDValue N3, SDValue N4,
+                  SDValue N5);
+
+  /// getStackArgumentTokenFactor - Compute a TokenFactor to force all
+  /// the incoming stack arguments to be loaded from the stack. This is
+  /// used in tail call lowering to protect stack arguments from being
+  /// clobbered.
+  SDValue getStackArgumentTokenFactor(SDValue Chain);
+
+  SDValue getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src,
+                    SDValue Size, unsigned Align, bool isVol, bool AlwaysInline,
+                    MachinePointerInfo DstPtrInfo,
+                    MachinePointerInfo SrcPtrInfo);
+
+  SDValue getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src,
+                     SDValue Size, unsigned Align, bool isVol,
+                     MachinePointerInfo DstPtrInfo,
+                     MachinePointerInfo SrcPtrInfo);
+
+  SDValue getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src,
+                    SDValue Size, unsigned Align, bool isVol,
+                    MachinePointerInfo DstPtrInfo);
+
+  /// getSetCC - Helper function to make it easier to build SetCC's if you just
+  /// have an ISD::CondCode instead of an SDValue.
+  ///
+  SDValue getSetCC(DebugLoc DL, EVT VT, SDValue LHS, SDValue RHS,
+                   ISD::CondCode Cond) {
+    return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond));
+  }
+
+  /// getVSetCC - Helper function to make it easier to build VSetCC's nodes
+  /// if you just have an ISD::CondCode instead of an SDValue.
+  ///
+  SDValue getVSetCC(DebugLoc DL, EVT VT, SDValue LHS, SDValue RHS,
+                    ISD::CondCode Cond) {
+    return getNode(ISD::VSETCC, DL, VT, LHS, RHS, getCondCode(Cond));
+  }
+
+  /// getSelectCC - Helper function to make it easier to build SelectCC's if you
+  /// just have an ISD::CondCode instead of an SDValue.
+  ///
+  SDValue getSelectCC(DebugLoc DL, SDValue LHS, SDValue RHS,
+                      SDValue True, SDValue False, ISD::CondCode Cond) {
+    return getNode(ISD::SELECT_CC, DL, True.getValueType(),
+                   LHS, RHS, True, False, getCondCode(Cond));
+  }
+
+  /// getVAArg - VAArg produces a result and token chain, and takes a pointer
+  /// and a source value as input.
+  SDValue getVAArg(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr,
+                   SDValue SV, unsigned Align);
+
+  /// getAtomic - Gets a node for an atomic op, produces result and chain and
+  /// takes 3 operands
+  SDValue getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain,
+                    SDValue Ptr, SDValue Cmp, SDValue Swp,
+                    MachinePointerInfo PtrInfo, unsigned Alignment=0);
+  SDValue getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain,
+                    SDValue Ptr, SDValue Cmp, SDValue Swp,
+                    MachineMemOperand *MMO);
+
+  /// getAtomic - Gets a node for an atomic op, produces result and chain and
+  /// takes 2 operands.
+  SDValue getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain,
+                    SDValue Ptr, SDValue Val, const Value* PtrVal,
+                    unsigned Alignment = 0);
+  SDValue getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDValue Chain,
+                    SDValue Ptr, SDValue Val,
+                    MachineMemOperand *MMO);
+
+  /// getMemIntrinsicNode - Creates a MemIntrinsicNode that may produce a
+  /// result and takes a list of operands. Opcode may be INTRINSIC_VOID,
+  /// INTRINSIC_W_CHAIN, or a target-specific opcode with a value not
+  /// less than FIRST_TARGET_MEMORY_OPCODE.
+  SDValue getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
+                              const EVT *VTs, unsigned NumVTs,
+                              const SDValue *Ops, unsigned NumOps,
+                              EVT MemVT, MachinePointerInfo PtrInfo,
+                              unsigned Align = 0, bool Vol = false,
+                              bool ReadMem = true, bool WriteMem = true);
+
+  SDValue getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+                              const SDValue *Ops, unsigned NumOps,
+                              EVT MemVT, MachinePointerInfo PtrInfo,
+                              unsigned Align = 0, bool Vol = false,
+                              bool ReadMem = true, bool WriteMem = true);
+
+  SDValue getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+                              const SDValue *Ops, unsigned NumOps,
+                              EVT MemVT, MachineMemOperand *MMO);
+
+  /// getMergeValues - Create a MERGE_VALUES node from the given operands.
+  SDValue getMergeValues(const SDValue *Ops, unsigned NumOps, DebugLoc dl);
+
+  /// getLoad - Loads are not normal binary operators: their result type is not
+  /// determined by their operands, and they produce a value AND a token chain.
+  ///
+  SDValue getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr,
+                  MachinePointerInfo PtrInfo, bool isVolatile,
+                  bool isNonTemporal, unsigned Alignment,
+                  const MDNode *TBAAInfo = 0);
+  SDValue getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
+                     SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo,
+                     EVT MemVT, bool isVolatile,
+                     bool isNonTemporal, unsigned Alignment,
+                     const MDNode *TBAAInfo = 0);
+  SDValue getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
+                         SDValue Offset, ISD::MemIndexedMode AM);
+  SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+                  EVT VT, DebugLoc dl,
+                  SDValue Chain, SDValue Ptr, SDValue Offset,
+                  MachinePointerInfo PtrInfo, EVT MemVT,
+                  bool isVolatile, bool isNonTemporal, unsigned Alignment,
+                  const MDNode *TBAAInfo = 0);
+  SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+                  EVT VT, DebugLoc dl,
+                  SDValue Chain, SDValue Ptr, SDValue Offset,
+                  EVT MemVT, MachineMemOperand *MMO);
+
+  /// getStore - Helper function to build ISD::STORE nodes.
+  ///
+  SDValue getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr,
+                   MachinePointerInfo PtrInfo, bool isVolatile,
+                   bool isNonTemporal, unsigned Alignment,
+                   const MDNode *TBAAInfo = 0);
+  SDValue getStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr,
+                   MachineMemOperand *MMO);
+  SDValue getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr,
+                        MachinePointerInfo PtrInfo, EVT TVT,
+                        bool isNonTemporal, bool isVolatile,
+                        unsigned Alignment,
+                        const MDNode *TBAAInfo = 0);
+  SDValue getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, SDValue Ptr,
+                        EVT TVT, MachineMemOperand *MMO);
+  SDValue getIndexedStore(SDValue OrigStoe, DebugLoc dl, SDValue Base,
+                           SDValue Offset, ISD::MemIndexedMode AM);
+
+  /// getSrcValue - Construct a node to track a Value* through the backend.
+  SDValue getSrcValue(const Value *v);
+
+  /// getMDNode - Return an MDNodeSDNode which holds an MDNode.
+  SDValue getMDNode(const MDNode *MD);
+
+  /// getShiftAmountOperand - Return the specified value casted to
+  /// the target's desired shift amount type.
+  SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op);
+
+  /// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+  /// specified operands.  If the resultant node already exists in the DAG,
+  /// this does not modify the specified node, instead it returns the node that
+  /// already exists.  If the resultant node does not exist in the DAG, the
+  /// input node is returned.  As a degenerate case, if you specify the same
+  /// input operands as the node already has, the input node is returned.
+  SDNode *UpdateNodeOperands(SDNode *N, SDValue Op);
+  SDNode *UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2);
+  SDNode *UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+                               SDValue Op3);
+  SDNode *UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+                               SDValue Op3, SDValue Op4);
+  SDNode *UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+                               SDValue Op3, SDValue Op4, SDValue Op5);
+  SDNode *UpdateNodeOperands(SDNode *N,
+                               const SDValue *Ops, unsigned NumOps);
+
+  /// SelectNodeTo - These are used for target selectors to *mutate* the
+  /// specified node to have the specified return type, Target opcode, and
+  /// operands.  Note that target opcodes are stored as
+  /// ~TargetOpcode in the node opcode field.  The resultant node is returned.
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT, SDValue Op1);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT,
+                       SDValue Op1, SDValue Op2);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT,
+                       SDValue Op1, SDValue Op2, SDValue Op3);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT,
+                       const SDValue *Ops, unsigned NumOps);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1, EVT VT2);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
+                       EVT VT2, const SDValue *Ops, unsigned NumOps);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
+                       EVT VT2, EVT VT3, const SDValue *Ops, unsigned NumOps);
+  SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1,
+                       EVT VT2, EVT VT3, EVT VT4, const SDValue *Ops,
+                       unsigned NumOps);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
+                       EVT VT2, SDValue Op1);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
+                       EVT VT2, SDValue Op1, SDValue Op2);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
+                       EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
+                       EVT VT2, EVT VT3, SDValue Op1, SDValue Op2, SDValue Op3);
+  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, SDVTList VTs,
+                       const SDValue *Ops, unsigned NumOps);
+
+  /// MorphNodeTo - This *mutates* the specified node to have the specified
+  /// return type, opcode, and operands.
+  SDNode *MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs,
+                      const SDValue *Ops, unsigned NumOps);
+
+  /// getMachineNode - These are used for target selectors to create a new node
+  /// with specified return type(s), MachineInstr opcode, and operands.
+  ///
+  /// Note that getMachineNode returns the resultant node.  If there is already
+  /// a node of the specified opcode and operands, it returns that node instead
+  /// of the current one.
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                                SDValue Op1);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                                SDValue Op1, SDValue Op2);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                         SDValue Op1, SDValue Op2, SDValue Op3);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                         const SDValue *Ops, unsigned NumOps);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
+                         SDValue Op1);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
+                         EVT VT2, SDValue Op1, SDValue Op2);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
+                         EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
+                         const SDValue *Ops, unsigned NumOps);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
+                         EVT VT3, SDValue Op1, SDValue Op2);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
+                         EVT VT3, SDValue Op1, SDValue Op2, SDValue Op3);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
+                         EVT VT3, const SDValue *Ops, unsigned NumOps);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2,
+                         EVT VT3, EVT VT4, const SDValue *Ops, unsigned NumOps);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl,
+                         const std::vector<EVT> &ResultTys, const SDValue *Ops,
+                         unsigned NumOps);
+  MachineSDNode *getMachineNode(unsigned Opcode, DebugLoc dl, SDVTList VTs,
+                         const SDValue *Ops, unsigned NumOps);
+
+  /// getTargetExtractSubreg - A convenience function for creating
+  /// TargetInstrInfo::EXTRACT_SUBREG nodes.
+  SDValue getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT,
+                                 SDValue Operand);
+
+  /// getTargetInsertSubreg - A convenience function for creating
+  /// TargetInstrInfo::INSERT_SUBREG nodes.
+  SDValue getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
+                                SDValue Operand, SDValue Subreg);
+
+  /// getNodeIfExists - Get the specified node if it's already available, or
+  /// else return NULL.
+  SDNode *getNodeIfExists(unsigned Opcode, SDVTList VTs,
+                          const SDValue *Ops, unsigned NumOps);
+
+  /// getDbgValue - Creates a SDDbgValue node.
+  ///
+  SDDbgValue *getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off,
+                          DebugLoc DL, unsigned O);
+  SDDbgValue *getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off,
+                          DebugLoc DL, unsigned O);
+  SDDbgValue *getDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off,
+                          DebugLoc DL, unsigned O);
+
+  /// DAGUpdateListener - Clients of various APIs that cause global effects on
+  /// the DAG can optionally implement this interface.  This allows the clients
+  /// to handle the various sorts of updates that happen.
+  class DAGUpdateListener {
+  public:
+    virtual ~DAGUpdateListener();
+
+    /// NodeDeleted - The node N that was deleted and, if E is not null, an
+    /// equivalent node E that replaced it.
+    virtual void NodeDeleted(SDNode *N, SDNode *E) = 0;
+
+    /// NodeUpdated - The node N that was updated.
+    virtual void NodeUpdated(SDNode *N) = 0;
+  };
+
+  /// RemoveDeadNode - Remove the specified node from the system. If any of its
+  /// operands then becomes dead, remove them as well. Inform UpdateListener
+  /// for each node deleted.
+  void RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener = 0);
+
+  /// RemoveDeadNodes - This method deletes the unreachable nodes in the
+  /// given list, and any nodes that become unreachable as a result.
+  void RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
+                       DAGUpdateListener *UpdateListener = 0);
+
+  /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+  /// This can cause recursive merging of nodes in the DAG.  Use the first
+  /// version if 'From' is known to have a single result, use the second
+  /// if you have two nodes with identical results (or if 'To' has a superset
+  /// of the results of 'From'), use the third otherwise.
+  ///
+  /// These methods all take an optional UpdateListener, which (if not null) is
+  /// informed about nodes that are deleted and modified due to recursive
+  /// changes in the dag.
+  ///
+  /// These functions only replace all existing uses. It's possible that as
+  /// these replacements are being performed, CSE may cause the From node
+  /// to be given new uses. These new uses of From are left in place, and
+  /// not automatically transfered to To.
+  ///
+  void ReplaceAllUsesWith(SDValue From, SDValue Op,
+                          DAGUpdateListener *UpdateListener = 0);
+  void ReplaceAllUsesWith(SDNode *From, SDNode *To,
+                          DAGUpdateListener *UpdateListener = 0);
+  void ReplaceAllUsesWith(SDNode *From, const SDValue *To,
+                          DAGUpdateListener *UpdateListener = 0);
+
+  /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+  /// uses of other values produced by From.Val alone.
+  void ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
+                                 DAGUpdateListener *UpdateListener = 0);
+
+  /// ReplaceAllUsesOfValuesWith - Like ReplaceAllUsesOfValueWith, but
+  /// for multiple values at once. This correctly handles the case where
+  /// there is an overlap between the From values and the To values.
+  void ReplaceAllUsesOfValuesWith(const SDValue *From, const SDValue *To,
+                                  unsigned Num,
+                                  DAGUpdateListener *UpdateListener = 0);
+
+  /// AssignTopologicalOrder - Topological-sort the AllNodes list and a
+  /// assign a unique node id for each node in the DAG based on their
+  /// topological order. Returns the number of nodes.
+  unsigned AssignTopologicalOrder();
+
+  /// RepositionNode - Move node N in the AllNodes list to be immediately
+  /// before the given iterator Position. This may be used to update the
+  /// topological ordering when the list of nodes is modified.
+  void RepositionNode(allnodes_iterator Position, SDNode *N) {
+    AllNodes.insert(Position, AllNodes.remove(N));
+  }
+
+  /// isCommutativeBinOp - Returns true if the opcode is a commutative binary
+  /// operation.
+  static bool isCommutativeBinOp(unsigned Opcode) {
+    // FIXME: This should get its info from the td file, so that we can include
+    // target info.
+    switch (Opcode) {
+    case ISD::ADD:
+    case ISD::MUL:
+    case ISD::MULHU:
+    case ISD::MULHS:
+    case ISD::SMUL_LOHI:
+    case ISD::UMUL_LOHI:
+    case ISD::FADD:
+    case ISD::FMUL:
+    case ISD::AND:
+    case ISD::OR:
+    case ISD::XOR:
+    case ISD::SADDO:
+    case ISD::UADDO:
+    case ISD::ADDC:
+    case ISD::ADDE: return true;
+    default: return false;
+    }
+  }
+
+  /// AssignOrdering - Assign an order to the SDNode.
+  void AssignOrdering(const SDNode *SD, unsigned Order);
+
+  /// GetOrdering - Get the order for the SDNode.
+  unsigned GetOrdering(const SDNode *SD) const;
+
+  /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
+  /// value is produced by SD.
+  void AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter);
+
+  /// GetDbgValues - Get the debug values which reference the given SDNode.
+  SmallVector<SDDbgValue*,2> &GetDbgValues(const SDNode* SD) {
+    return DbgInfo->getSDDbgValues(SD);
+  }
+
+  /// TransferDbgValues - Transfer SDDbgValues.
+  void TransferDbgValues(SDValue From, SDValue To);
+
+  /// hasDebugValues - Return true if there are any SDDbgValue nodes associated
+  /// with this SelectionDAG.
+  bool hasDebugValues() const { return !DbgInfo->empty(); }
+
+  SDDbgInfo::DbgIterator DbgBegin() { return DbgInfo->DbgBegin(); }
+  SDDbgInfo::DbgIterator DbgEnd()   { return DbgInfo->DbgEnd(); }
+  SDDbgInfo::DbgIterator ByvalParmDbgBegin() {
+    return DbgInfo->ByvalParmDbgBegin();
+  }
+  SDDbgInfo::DbgIterator ByvalParmDbgEnd()   {
+    return DbgInfo->ByvalParmDbgEnd();
+  }
+
+  void dump() const;
+
+  /// CreateStackTemporary - Create a stack temporary, suitable for holding the
+  /// specified value type.  If minAlign is specified, the slot size will have
+  /// at least that alignment.
+  SDValue CreateStackTemporary(EVT VT, unsigned minAlign = 1);
+
+  /// CreateStackTemporary - Create a stack temporary suitable for holding
+  /// either of the specified value types.
+  SDValue CreateStackTemporary(EVT VT1, EVT VT2);
+
+  /// FoldConstantArithmetic -
+  SDValue FoldConstantArithmetic(unsigned Opcode,
+                                 EVT VT,
+                                 ConstantSDNode *Cst1,
+                                 ConstantSDNode *Cst2);
+
+  /// FoldSetCC - Constant fold a setcc to true or false.
+  SDValue FoldSetCC(EVT VT, SDValue N1,
+                    SDValue N2, ISD::CondCode Cond, DebugLoc dl);
+
+  /// SignBitIsZero - Return true if the sign bit of Op is known to be zero.  We
+  /// use this predicate to simplify operations downstream.
+  bool SignBitIsZero(SDValue Op, unsigned Depth = 0) const;
+
+  /// MaskedValueIsZero - Return true if 'Op & Mask' is known to be zero.  We
+  /// use this predicate to simplify operations downstream.  Op and Mask are
+  /// known to be the same type.
+  bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth = 0)
+    const;
+
+  /// ComputeMaskedBits - Determine which of the bits specified in Mask are
+  /// known to be either zero or one and return them in the KnownZero/KnownOne
+  /// bitsets.  This code only analyzes bits in Mask, in order to short-circuit
+  /// processing.  Targets can implement the computeMaskedBitsForTargetNode
+  /// method in the TargetLowering class to allow target nodes to be understood.
+  void ComputeMaskedBits(SDValue Op, const APInt &Mask, APInt &KnownZero,
+                         APInt &KnownOne, unsigned Depth = 0) const;
+
+  /// ComputeNumSignBits - Return the number of times the sign bit of the
+  /// register is replicated into the other bits.  We know that at least 1 bit
+  /// is always equal to the sign bit (itself), but other cases can give us
+  /// information.  For example, immediately after an "SRA X, 2", we know that
+  /// the top 3 bits are all equal to each other, so we return 3.  Targets can
+  /// implement the ComputeNumSignBitsForTarget method in the TargetLowering
+  /// class to allow target nodes to be understood.
+  unsigned ComputeNumSignBits(SDValue Op, unsigned Depth = 0) const;
+
+  /// isBaseWithConstantOffset - Return true if the specified operand is an
+  /// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
+  /// ISD::OR with a ConstantSDNode that is guaranteed to have the same
+  /// semantics as an ADD.  This handles the equivalence:
+  ///     X|Cst == X+Cst iff X&Cst = 0.
+  bool isBaseWithConstantOffset(SDValue Op) const;
+
+  /// isKnownNeverNan - Test whether the given SDValue is known to never be NaN.
+  bool isKnownNeverNaN(SDValue Op) const;
+
+  /// isKnownNeverZero - Test whether the given SDValue is known to never be
+  /// positive or negative Zero.
+  bool isKnownNeverZero(SDValue Op) const;
+
+  /// isEqualTo - Test whether two SDValues are known to compare equal. This
+  /// is true if they are the same value, or if one is negative zero and the
+  /// other positive zero.
+  bool isEqualTo(SDValue A, SDValue B) const;
+
+  /// isVerifiedDebugInfoDesc - Returns true if the specified SDValue has
+  /// been verified as a debug information descriptor.
+  bool isVerifiedDebugInfoDesc(SDValue Op) const;
+
+  /// UnrollVectorOp - Utility function used by legalize and lowering to
+  /// "unroll" a vector operation by splitting out the scalars and operating
+  /// on each element individually.  If the ResNE is 0, fully unroll the vector
+  /// op. If ResNE is less than the width of the vector op, unroll up to ResNE.
+  /// If the  ResNE is greater than the width of the vector op, unroll the
+  /// vector op and fill the end of the resulting vector with UNDEFS.
+  SDValue UnrollVectorOp(SDNode *N, unsigned ResNE = 0);
+
+  /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+  /// location that is 'Dist' units away from the location that the 'Base' load
+  /// is loading from.
+  bool isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
+                         unsigned Bytes, int Dist) const;
+
+  /// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
+  /// it cannot be inferred.
+  unsigned InferPtrAlignment(SDValue Ptr) const;
+
+private:
+  bool RemoveNodeFromCSEMaps(SDNode *N);
+  void AddModifiedNodeToCSEMaps(SDNode *N, DAGUpdateListener *UpdateListener);
+  SDNode *FindModifiedNodeSlot(SDNode *N, SDValue Op, void *&InsertPos);
+  SDNode *FindModifiedNodeSlot(SDNode *N, SDValue Op1, SDValue Op2,
+                               void *&InsertPos);
+  SDNode *FindModifiedNodeSlot(SDNode *N, const SDValue *Ops, unsigned NumOps,
+                               void *&InsertPos);
+
+  void DeleteNodeNotInCSEMaps(SDNode *N);
+  void DeallocateNode(SDNode *N);
+
+  unsigned getEVTAlignment(EVT MemoryVT) const;
+
+  void allnodes_clear();
+
+  /// VTList - List of non-single value types.
+  std::vector<SDVTList> VTList;
+
+  /// CondCodeNodes - Maps to auto-CSE operations.
+  std::vector<CondCodeSDNode*> CondCodeNodes;
+
+  std::vector<SDNode*> ValueTypeNodes;
+  std::map<EVT, SDNode*, EVT::compareRawBits> ExtendedValueTypeNodes;
+  StringMap<SDNode*> ExternalSymbols;
+
+  std::map<std::pair<std::string, unsigned char>,SDNode*> TargetExternalSymbols;
+};
+
+template <> struct GraphTraits<SelectionDAG*> : public GraphTraits<SDNode*> {
+  typedef SelectionDAG::allnodes_iterator nodes_iterator;
+  static nodes_iterator nodes_begin(SelectionDAG *G) {
+    return G->allnodes_begin();
+  }
+  static nodes_iterator nodes_end(SelectionDAG *G) {
+    return G->allnodes_end();
+  }
+};
+
+}  // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/CodeGen/SelectionDAGISel.h b/final/include/llvm/CodeGen/SelectionDAGISel.h
new file mode 100644
index 00000000000..54576794ded
--- /dev/null
+++ b/final/include/llvm/CodeGen/SelectionDAGISel.h
@@ -0,0 +1,313 @@
+//===-- llvm/CodeGen/SelectionDAGISel.h - Common Base Class------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAGISel class, which is used as the common
+// base class for SelectionDAG-based instruction selectors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SELECTIONDAG_ISEL_H
+#define LLVM_CODEGEN_SELECTIONDAG_ISEL_H
+
+#include "llvm/BasicBlock.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+  class FastISel;
+  class SelectionDAGBuilder;
+  class SDValue;
+  class MachineRegisterInfo;
+  class MachineBasicBlock;
+  class MachineFunction;
+  class MachineInstr;
+  class TargetLowering;
+  class TargetInstrInfo;
+  class FunctionLoweringInfo;
+  class ScheduleHazardRecognizer;
+  class GCFunctionInfo;
+  class ScheduleDAGSDNodes;
+  class LoadInst;
+
+/// SelectionDAGISel - This is the common base class used for SelectionDAG-based
+/// pattern-matching instruction selectors.
+class SelectionDAGISel : public MachineFunctionPass {
+public:
+  const TargetMachine &TM;
+  const TargetLowering &TLI;
+  FunctionLoweringInfo *FuncInfo;
+  MachineFunction *MF;
+  MachineRegisterInfo *RegInfo;
+  SelectionDAG *CurDAG;
+  SelectionDAGBuilder *SDB;
+  AliasAnalysis *AA;
+  GCFunctionInfo *GFI;
+  CodeGenOpt::Level OptLevel;
+  static char ID;
+
+  explicit SelectionDAGISel(const TargetMachine &tm,
+                            CodeGenOpt::Level OL = CodeGenOpt::Default);
+  virtual ~SelectionDAGISel();
+
+  const TargetLowering &getTargetLowering() { return TLI; }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  virtual void EmitFunctionEntryCode() {}
+
+  /// PreprocessISelDAG - This hook allows targets to hack on the graph before
+  /// instruction selection starts.
+  virtual void PreprocessISelDAG() {}
+
+  /// PostprocessISelDAG() - This hook allows the target to hack on the graph
+  /// right after selection.
+  virtual void PostprocessISelDAG() {}
+
+  /// Select - Main hook targets implement to select a node.
+  virtual SDNode *Select(SDNode *N) = 0;
+
+  /// SelectInlineAsmMemoryOperand - Select the specified address as a target
+  /// addressing mode, according to the specified constraint code.  If this does
+  /// not match or is not implemented, return true.  The resultant operands
+  /// (which will appear in the machine instruction) should be added to the
+  /// OutOps vector.
+  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                            char ConstraintCode,
+                                            std::vector<SDValue> &OutOps) {
+    return true;
+  }
+
+  /// IsProfitableToFold - Returns true if it's profitable to fold the specific
+  /// operand node N of U during instruction selection that starts at Root.
+  virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const;
+
+  /// IsLegalToFold - Returns true if the specific operand node N of
+  /// U can be folded during instruction selection that starts at Root.
+  /// FIXME: This is a static member function because the MSP430/SystemZ/X86
+  /// targets, which uses it during isel.  This could become a proper member.
+  static bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
+                            CodeGenOpt::Level OptLevel,
+                            bool IgnoreChains = false);
+
+  // Opcodes used by the DAG state machine:
+  enum BuiltinOpcodes {
+    OPC_Scope,
+    OPC_RecordNode,
+    OPC_RecordChild0, OPC_RecordChild1, OPC_RecordChild2, OPC_RecordChild3,
+    OPC_RecordChild4, OPC_RecordChild5, OPC_RecordChild6, OPC_RecordChild7,
+    OPC_RecordMemRef,
+    OPC_CaptureGlueInput,
+    OPC_MoveChild,
+    OPC_MoveParent,
+    OPC_CheckSame,
+    OPC_CheckPatternPredicate,
+    OPC_CheckPredicate,
+    OPC_CheckOpcode,
+    OPC_SwitchOpcode,
+    OPC_CheckType,
+    OPC_SwitchType,
+    OPC_CheckChild0Type, OPC_CheckChild1Type, OPC_CheckChild2Type,
+    OPC_CheckChild3Type, OPC_CheckChild4Type, OPC_CheckChild5Type,
+    OPC_CheckChild6Type, OPC_CheckChild7Type,
+    OPC_CheckInteger,
+    OPC_CheckCondCode,
+    OPC_CheckValueType,
+    OPC_CheckComplexPat,
+    OPC_CheckAndImm, OPC_CheckOrImm,
+    OPC_CheckFoldableChainNode,
+
+    OPC_EmitInteger,
+    OPC_EmitRegister,
+    OPC_EmitRegister2,
+    OPC_EmitConvertToTarget,
+    OPC_EmitMergeInputChains,
+    OPC_EmitMergeInputChains1_0,
+    OPC_EmitMergeInputChains1_1,
+    OPC_EmitCopyToReg,
+    OPC_EmitNodeXForm,
+    OPC_EmitNode,
+    OPC_MorphNodeTo,
+    OPC_MarkGlueResults,
+    OPC_CompleteMatch
+  };
+
+  enum {
+    OPFL_None       = 0,  // Node has no chain or glue input and isn't variadic.
+    OPFL_Chain      = 1,     // Node has a chain input.
+    OPFL_GlueInput  = 2,     // Node has a glue input.
+    OPFL_GlueOutput = 4,     // Node has a glue output.
+    OPFL_MemRefs    = 8,     // Node gets accumulated MemRefs.
+    OPFL_Variadic0  = 1<<4,  // Node is variadic, root has 0 fixed inputs.
+    OPFL_Variadic1  = 2<<4,  // Node is variadic, root has 1 fixed inputs.
+    OPFL_Variadic2  = 3<<4,  // Node is variadic, root has 2 fixed inputs.
+    OPFL_Variadic3  = 4<<4,  // Node is variadic, root has 3 fixed inputs.
+    OPFL_Variadic4  = 5<<4,  // Node is variadic, root has 4 fixed inputs.
+    OPFL_Variadic5  = 6<<4,  // Node is variadic, root has 5 fixed inputs.
+    OPFL_Variadic6  = 7<<4,  // Node is variadic, root has 6 fixed inputs.
+
+    OPFL_VariadicInfo = OPFL_Variadic6
+  };
+
+  /// getNumFixedFromVariadicInfo - Transform an EmitNode flags word into the
+  /// number of fixed arity values that should be skipped when copying from the
+  /// root.
+  static inline int getNumFixedFromVariadicInfo(unsigned Flags) {
+    return ((Flags&OPFL_VariadicInfo) >> 4)-1;
+  }
+
+
+protected:
+  /// DAGSize - Size of DAG being instruction selected.
+  ///
+  unsigned DAGSize;
+
+  /// ISelPosition - Node iterator marking the current position of
+  /// instruction selection as it procedes through the topologically-sorted
+  /// node list.
+  SelectionDAG::allnodes_iterator ISelPosition;
+
+
+  /// ISelUpdater - helper class to handle updates of the
+  /// instruction selection graph.
+  class ISelUpdater : public SelectionDAG::DAGUpdateListener {
+    SelectionDAG::allnodes_iterator &ISelPosition;
+  public:
+    explicit ISelUpdater(SelectionDAG::allnodes_iterator &isp)
+      : ISelPosition(isp) {}
+
+    /// NodeDeleted - Handle nodes deleted from the graph. If the
+    /// node being deleted is the current ISelPosition node, update
+    /// ISelPosition.
+    ///
+    virtual void NodeDeleted(SDNode *N, SDNode *E) {
+      if (ISelPosition == SelectionDAG::allnodes_iterator(N))
+        ++ISelPosition;
+    }
+
+    /// NodeUpdated - Ignore updates for now.
+    virtual void NodeUpdated(SDNode *N) {}
+  };
+
+  /// ReplaceUses - replace all uses of the old node F with the use
+  /// of the new node T.
+  void ReplaceUses(SDValue F, SDValue T) {
+    ISelUpdater ISU(ISelPosition);
+    CurDAG->ReplaceAllUsesOfValueWith(F, T, &ISU);
+  }
+
+  /// ReplaceUses - replace all uses of the old nodes F with the use
+  /// of the new nodes T.
+  void ReplaceUses(const SDValue *F, const SDValue *T, unsigned Num) {
+    ISelUpdater ISU(ISelPosition);
+    CurDAG->ReplaceAllUsesOfValuesWith(F, T, Num, &ISU);
+  }
+
+  /// ReplaceUses - replace all uses of the old node F with the use
+  /// of the new node T.
+  void ReplaceUses(SDNode *F, SDNode *T) {
+    ISelUpdater ISU(ISelPosition);
+    CurDAG->ReplaceAllUsesWith(F, T, &ISU);
+  }
+
+
+  /// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
+  /// by tblgen.  Others should not call it.
+  void SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops);
+
+
+public:
+  // Calls to these predicates are generated by tblgen.
+  bool CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
+                    int64_t DesiredMaskS) const;
+  bool CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
+                    int64_t DesiredMaskS) const;
+
+
+  /// CheckPatternPredicate - This function is generated by tblgen in the
+  /// target.  It runs the specified pattern predicate and returns true if it
+  /// succeeds or false if it fails.  The number is a private implementation
+  /// detail to the code tblgen produces.
+  virtual bool CheckPatternPredicate(unsigned PredNo) const {
+    assert(0 && "Tblgen should generate the implementation of this!");
+    return 0;
+  }
+
+  /// CheckNodePredicate - This function is generated by tblgen in the target.
+  /// It runs node predicate number PredNo and returns true if it succeeds or
+  /// false if it fails.  The number is a private implementation
+  /// detail to the code tblgen produces.
+  virtual bool CheckNodePredicate(SDNode *N, unsigned PredNo) const {
+    assert(0 && "Tblgen should generate the implementation of this!");
+    return 0;
+  }
+
+  virtual bool CheckComplexPattern(SDNode *Root, SDNode *Parent, SDValue N,
+                                   unsigned PatternNo,
+                        SmallVectorImpl<std::pair<SDValue, SDNode*> > &Result) {
+    assert(0 && "Tblgen should generate the implementation of this!");
+    return false;
+  }
+
+  virtual SDValue RunSDNodeXForm(SDValue V, unsigned XFormNo) {
+    assert(0 && "Tblgen shoudl generate this!");
+    return SDValue();
+  }
+
+  SDNode *SelectCodeCommon(SDNode *NodeToMatch,
+                           const unsigned char *MatcherTable,
+                           unsigned TableSize);
+
+private:
+
+  // Calls to these functions are generated by tblgen.
+  SDNode *Select_INLINEASM(SDNode *N);
+  SDNode *Select_UNDEF(SDNode *N);
+  void CannotYetSelect(SDNode *N);
+
+private:
+  void DoInstructionSelection();
+  SDNode *MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTs,
+                    const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo);
+
+  void PrepareEHLandingPad();
+  void SelectAllBasicBlocks(const Function &Fn);
+  bool TryToFoldFastISelLoad(const LoadInst *LI, FastISel *FastIS);
+  void FinishBasicBlock();
+
+  void SelectBasicBlock(BasicBlock::const_iterator Begin,
+                        BasicBlock::const_iterator End,
+                        bool &HadTailCall);
+  void CodeGenAndEmitDAG();
+  void LowerArguments(const BasicBlock *BB);
+
+  void ComputeLiveOutVRegInfo();
+
+  /// Create the scheduler. If a specific scheduler was specified
+  /// via the SchedulerRegistry, use it, otherwise select the
+  /// one preferred by the target.
+  ///
+  ScheduleDAGSDNodes *CreateScheduler();
+
+  /// OpcodeOffset - This is a cache used to dispatch efficiently into isel
+  /// state machines that start with a OPC_SwitchOpcode node.
+  std::vector<unsigned> OpcodeOffset;
+
+  void UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
+                           const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+                           SDValue InputGlue, const SmallVectorImpl<SDNode*> &F,
+                           bool isMorphNodeTo);
+
+};
+
+}
+
+#endif /* LLVM_CODEGEN_SELECTIONDAG_ISEL_H */
diff --git a/final/include/llvm/CodeGen/SelectionDAGNodes.h b/final/include/llvm/CodeGen/SelectionDAGNodes.h
new file mode 100644
index 00000000000..64546394ce9
--- /dev/null
+++ b/final/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -0,0 +1,1781 @@
+//===-- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDNode class and derived classes, which are used to
+// represent the nodes and operations present in a SelectionDAG.  These nodes
+// and operations are machine code level operations, with some similarities to
+// the GCC RTL representation.
+//
+// Clients should include the SelectionDAG.h file instead of this file directly.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
+#define LLVM_CODEGEN_SELECTIONDAGNODES_H
+
+#include "llvm/Constants.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/DebugLoc.h"
+#include <cassert>
+
+namespace llvm {
+
+class SelectionDAG;
+class GlobalValue;
+class MachineBasicBlock;
+class MachineConstantPoolValue;
+class SDNode;
+class Value;
+class MCSymbol;
+template <typename T> struct DenseMapInfo;
+template <typename T> struct simplify_type;
+template <typename T> struct ilist_traits;
+
+void checkForCycles(const SDNode *N);
+  
+/// SDVTList - This represents a list of ValueType's that has been intern'd by
+/// a SelectionDAG.  Instances of this simple value class are returned by
+/// SelectionDAG::getVTList(...).
+///
+struct SDVTList {
+  const EVT *VTs;
+  unsigned int NumVTs;
+};
+
+namespace ISD {
+  /// Node predicates
+
+  /// isBuildVectorAllOnes - Return true if the specified node is a
+  /// BUILD_VECTOR where all of the elements are ~0 or undef.
+  bool isBuildVectorAllOnes(const SDNode *N);
+
+  /// isBuildVectorAllZeros - Return true if the specified node is a
+  /// BUILD_VECTOR where all of the elements are 0 or undef.
+  bool isBuildVectorAllZeros(const SDNode *N);
+
+  /// isScalarToVector - Return true if the specified node is a
+  /// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
+  /// element is not an undef.
+  bool isScalarToVector(const SDNode *N);
+}  // end llvm:ISD namespace
+
+//===----------------------------------------------------------------------===//
+/// SDValue - Unlike LLVM values, Selection DAG nodes may return multiple
+/// values as the result of a computation.  Many nodes return multiple values,
+/// from loads (which define a token and a return value) to ADDC (which returns
+/// a result and a carry value), to calls (which may return an arbitrary number
+/// of values).
+///
+/// As such, each use of a SelectionDAG computation must indicate the node that
+/// computes it as well as which return value to use from that node.  This pair
+/// of information is represented with the SDValue value type.
+///
+class SDValue {
+  SDNode *Node;       // The node defining the value we are using.
+  unsigned ResNo;     // Which return value of the node we are using.
+public:
+  SDValue() : Node(0), ResNo(0) {}
+  SDValue(SDNode *node, unsigned resno) : Node(node), ResNo(resno) {}
+
+  /// get the index which selects a specific result in the SDNode
+  unsigned getResNo() const { return ResNo; }
+
+  /// get the SDNode which holds the desired result
+  SDNode *getNode() const { return Node; }
+
+  /// set the SDNode
+  void setNode(SDNode *N) { Node = N; }
+
+  inline SDNode *operator->() const { return Node; }
+  
+  bool operator==(const SDValue &O) const {
+    return Node == O.Node && ResNo == O.ResNo;
+  }
+  bool operator!=(const SDValue &O) const {
+    return !operator==(O);
+  }
+  bool operator<(const SDValue &O) const {
+    return Node < O.Node || (Node == O.Node && ResNo < O.ResNo);
+  }
+
+  SDValue getValue(unsigned R) const {
+    return SDValue(Node, R);
+  }
+
+  // isOperandOf - Return true if this node is an operand of N.
+  bool isOperandOf(SDNode *N) const;
+
+  /// getValueType - Return the ValueType of the referenced return value.
+  ///
+  inline EVT getValueType() const;
+
+  /// getValueSizeInBits - Returns the size of the value in bits.
+  ///
+  unsigned getValueSizeInBits() const {
+    return getValueType().getSizeInBits();
+  }
+
+  // Forwarding methods - These forward to the corresponding methods in SDNode.
+  inline unsigned getOpcode() const;
+  inline unsigned getNumOperands() const;
+  inline const SDValue &getOperand(unsigned i) const;
+  inline uint64_t getConstantOperandVal(unsigned i) const;
+  inline bool isTargetMemoryOpcode() const;
+  inline bool isTargetOpcode() const;
+  inline bool isMachineOpcode() const;
+  inline unsigned getMachineOpcode() const;
+  inline const DebugLoc getDebugLoc() const;
+
+
+  /// reachesChainWithoutSideEffects - Return true if this operand (which must
+  /// be a chain) reaches the specified operand without crossing any
+  /// side-effecting instructions.  In practice, this looks through token
+  /// factors and non-volatile loads.  In order to remain efficient, this only
+  /// looks a couple of nodes in, it does not do an exhaustive search.
+  bool reachesChainWithoutSideEffects(SDValue Dest,
+                                      unsigned Depth = 2) const;
+
+  /// use_empty - Return true if there are no nodes using value ResNo
+  /// of Node.
+  ///
+  inline bool use_empty() const;
+
+  /// hasOneUse - Return true if there is exactly one node using value
+  /// ResNo of Node.
+  ///
+  inline bool hasOneUse() const;
+};
+
+
+template<> struct DenseMapInfo<SDValue> {
+  static inline SDValue getEmptyKey() {
+    return SDValue((SDNode*)-1, -1U);
+  }
+  static inline SDValue getTombstoneKey() {
+    return SDValue((SDNode*)-1, 0);
+  }
+  static unsigned getHashValue(const SDValue &Val) {
+    return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
+            (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
+  }
+  static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
+    return LHS == RHS;
+  }
+};
+template <> struct isPodLike<SDValue> { static const bool value = true; };
+
+
+/// simplify_type specializations - Allow casting operators to work directly on
+/// SDValues as if they were SDNode*'s.
+template<> struct simplify_type<SDValue> {
+  typedef SDNode* SimpleType;
+  static SimpleType getSimplifiedValue(const SDValue &Val) {
+    return static_cast<SimpleType>(Val.getNode());
+  }
+};
+template<> struct simplify_type<const SDValue> {
+  typedef SDNode* SimpleType;
+  static SimpleType getSimplifiedValue(const SDValue &Val) {
+    return static_cast<SimpleType>(Val.getNode());
+  }
+};
+
+/// SDUse - Represents a use of a SDNode. This class holds an SDValue,
+/// which records the SDNode being used and the result number, a
+/// pointer to the SDNode using the value, and Next and Prev pointers,
+/// which link together all the uses of an SDNode.
+///
+class SDUse {
+  /// Val - The value being used.
+  SDValue Val;
+  /// User - The user of this value.
+  SDNode *User;
+  /// Prev, Next - Pointers to the uses list of the SDNode referred by
+  /// this operand.
+  SDUse **Prev, *Next;
+
+  SDUse(const SDUse &U);          // Do not implement
+  void operator=(const SDUse &U); // Do not implement
+
+public:
+  SDUse() : Val(), User(NULL), Prev(NULL), Next(NULL) {}
+
+  /// Normally SDUse will just implicitly convert to an SDValue that it holds.
+  operator const SDValue&() const { return Val; }
+
+  /// If implicit conversion to SDValue doesn't work, the get() method returns
+  /// the SDValue.
+  const SDValue &get() const { return Val; }
+
+  /// getUser - This returns the SDNode that contains this Use.
+  SDNode *getUser() { return User; }
+
+  /// getNext - Get the next SDUse in the use list.
+  SDUse *getNext() const { return Next; }
+
+  /// getNode - Convenience function for get().getNode().
+  SDNode *getNode() const { return Val.getNode(); }
+  /// getResNo - Convenience function for get().getResNo().
+  unsigned getResNo() const { return Val.getResNo(); }
+  /// getValueType - Convenience function for get().getValueType().
+  EVT getValueType() const { return Val.getValueType(); }
+
+  /// operator== - Convenience function for get().operator==
+  bool operator==(const SDValue &V) const {
+    return Val == V;
+  }
+
+  /// operator!= - Convenience function for get().operator!=
+  bool operator!=(const SDValue &V) const {
+    return Val != V;
+  }
+
+  /// operator< - Convenience function for get().operator<
+  bool operator<(const SDValue &V) const {
+    return Val < V;
+  }
+
+private:
+  friend class SelectionDAG;
+  friend class SDNode;
+
+  void setUser(SDNode *p) { User = p; }
+
+  /// set - Remove this use from its existing use list, assign it the
+  /// given value, and add it to the new value's node's use list.
+  inline void set(const SDValue &V);
+  /// setInitial - like set, but only supports initializing a newly-allocated
+  /// SDUse with a non-null value.
+  inline void setInitial(const SDValue &V);
+  /// setNode - like set, but only sets the Node portion of the value,
+  /// leaving the ResNo portion unmodified.
+  inline void setNode(SDNode *N);
+
+  void addToList(SDUse **List) {
+    Next = *List;
+    if (Next) Next->Prev = &Next;
+    Prev = List;
+    *List = this;
+  }
+
+  void removeFromList() {
+    *Prev = Next;
+    if (Next) Next->Prev = Prev;
+  }
+};
+
+/// simplify_type specializations - Allow casting operators to work directly on
+/// SDValues as if they were SDNode*'s.
+template<> struct simplify_type<SDUse> {
+  typedef SDNode* SimpleType;
+  static SimpleType getSimplifiedValue(const SDUse &Val) {
+    return static_cast<SimpleType>(Val.getNode());
+  }
+};
+template<> struct simplify_type<const SDUse> {
+  typedef SDNode* SimpleType;
+  static SimpleType getSimplifiedValue(const SDUse &Val) {
+    return static_cast<SimpleType>(Val.getNode());
+  }
+};
+
+
+/// SDNode - Represents one node in the SelectionDAG.
+///
+class SDNode : public FoldingSetNode, public ilist_node<SDNode> {
+private:
+  /// NodeType - The operation that this node performs.
+  ///
+  int16_t NodeType;
+
+  /// OperandsNeedDelete - This is true if OperandList was new[]'d.  If true,
+  /// then they will be delete[]'d when the node is destroyed.
+  uint16_t OperandsNeedDelete : 1;
+
+  /// HasDebugValue - This tracks whether this node has one or more dbg_value
+  /// nodes corresponding to it.
+  uint16_t HasDebugValue : 1;
+
+protected:
+  /// SubclassData - This member is defined by this class, but is not used for
+  /// anything.  Subclasses can use it to hold whatever state they find useful.
+  /// This field is initialized to zero by the ctor.
+  uint16_t SubclassData : 14;
+
+private:
+  /// NodeId - Unique id per SDNode in the DAG.
+  int NodeId;
+
+  /// OperandList - The values that are used by this operation.
+  ///
+  SDUse *OperandList;
+
+  /// ValueList - The types of the values this node defines.  SDNode's may
+  /// define multiple values simultaneously.
+  const EVT *ValueList;
+
+  /// UseList - List of uses for this SDNode.
+  SDUse *UseList;
+
+  /// NumOperands/NumValues - The number of entries in the Operand/Value list.
+  unsigned short NumOperands, NumValues;
+
+  /// debugLoc - source line information.
+  DebugLoc debugLoc;
+
+  /// getValueTypeList - Return a pointer to the specified value type.
+  static const EVT *getValueTypeList(EVT VT);
+
+  friend class SelectionDAG;
+  friend struct ilist_traits<SDNode>;
+
+public:
+  //===--------------------------------------------------------------------===//
+  //  Accessors
+  //
+
+  /// getOpcode - Return the SelectionDAG opcode value for this node. For
+  /// pre-isel nodes (those for which isMachineOpcode returns false), these
+  /// are the opcode values in the ISD and <target>ISD namespaces. For
+  /// post-isel opcodes, see getMachineOpcode.
+  unsigned getOpcode()  const { return (unsigned short)NodeType; }
+
+  /// isTargetOpcode - Test if this node has a target-specific opcode (in the
+  /// \<target\>ISD namespace).
+  bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; }
+
+  /// isTargetMemoryOpcode - Test if this node has a target-specific 
+  /// memory-referencing opcode (in the \<target\>ISD namespace and
+  /// greater than FIRST_TARGET_MEMORY_OPCODE).
+  bool isTargetMemoryOpcode() const {
+    return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE;
+  }
+
+  /// isMachineOpcode - Test if this node has a post-isel opcode, directly
+  /// corresponding to a MachineInstr opcode.
+  bool isMachineOpcode() const { return NodeType < 0; }
+
+  /// getMachineOpcode - This may only be called if isMachineOpcode returns
+  /// true. It returns the MachineInstr opcode value that the node's opcode
+  /// corresponds to.
+  unsigned getMachineOpcode() const {
+    assert(isMachineOpcode() && "Not a MachineInstr opcode!");
+    return ~NodeType;
+  }
+
+  /// getHasDebugValue - get this bit.
+  bool getHasDebugValue() const { return HasDebugValue; }
+
+  /// setHasDebugValue - set this bit.
+  void setHasDebugValue(bool b) { HasDebugValue = b; }
+
+  /// use_empty - Return true if there are no uses of this node.
+  ///
+  bool use_empty() const { return UseList == NULL; }
+
+  /// hasOneUse - Return true if there is exactly one use of this node.
+  ///
+  bool hasOneUse() const {
+    return !use_empty() && llvm::next(use_begin()) == use_end();
+  }
+
+  /// use_size - Return the number of uses of this node. This method takes
+  /// time proportional to the number of uses.
+  ///
+  size_t use_size() const { return std::distance(use_begin(), use_end()); }
+
+  /// getNodeId - Return the unique node id.
+  ///
+  int getNodeId() const { return NodeId; }
+
+  /// setNodeId - Set unique node id.
+  void setNodeId(int Id) { NodeId = Id; }
+
+  /// getDebugLoc - Return the source location info.
+  const DebugLoc getDebugLoc() const { return debugLoc; }
+
+  /// setDebugLoc - Set source location info.  Try to avoid this, putting
+  /// it in the constructor is preferable.
+  void setDebugLoc(const DebugLoc dl) { debugLoc = dl; }
+
+  /// use_iterator - This class provides iterator support for SDUse
+  /// operands that use a specific SDNode.
+  class use_iterator
+    : public std::iterator<std::forward_iterator_tag, SDUse, ptrdiff_t> {
+    SDUse *Op;
+    explicit use_iterator(SDUse *op) : Op(op) {
+    }
+    friend class SDNode;
+  public:
+    typedef std::iterator<std::forward_iterator_tag,
+                          SDUse, ptrdiff_t>::reference reference;
+    typedef std::iterator<std::forward_iterator_tag,
+                          SDUse, ptrdiff_t>::pointer pointer;
+
+    use_iterator(const use_iterator &I) : Op(I.Op) {}
+    use_iterator() : Op(0) {}
+
+    bool operator==(const use_iterator &x) const {
+      return Op == x.Op;
+    }
+    bool operator!=(const use_iterator &x) const {
+      return !operator==(x);
+    }
+
+    /// atEnd - return true if this iterator is at the end of uses list.
+    bool atEnd() const { return Op == 0; }
+
+    // Iterator traversal: forward iteration only.
+    use_iterator &operator++() {          // Preincrement
+      assert(Op && "Cannot increment end iterator!");
+      Op = Op->getNext();
+      return *this;
+    }
+
+    use_iterator operator++(int) {        // Postincrement
+      use_iterator tmp = *this; ++*this; return tmp;
+    }
+
+    /// Retrieve a pointer to the current user node.
+    SDNode *operator*() const {
+      assert(Op && "Cannot dereference end iterator!");
+      return Op->getUser();
+    }
+
+    SDNode *operator->() const { return operator*(); }
+
+    SDUse &getUse() const { return *Op; }
+
+    /// getOperandNo - Retrieve the operand # of this use in its user.
+    ///
+    unsigned getOperandNo() const {
+      assert(Op && "Cannot dereference end iterator!");
+      return (unsigned)(Op - Op->getUser()->OperandList);
+    }
+  };
+
+  /// use_begin/use_end - Provide iteration support to walk over all uses
+  /// of an SDNode.
+
+  use_iterator use_begin() const {
+    return use_iterator(UseList);
+  }
+
+  static use_iterator use_end() { return use_iterator(0); }
+
+
+  /// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
+  /// indicated value.  This method ignores uses of other values defined by this
+  /// operation.
+  bool hasNUsesOfValue(unsigned NUses, unsigned Value) const;
+
+  /// hasAnyUseOfValue - Return true if there are any use of the indicated
+  /// value. This method ignores uses of other values defined by this operation.
+  bool hasAnyUseOfValue(unsigned Value) const;
+
+  /// isOnlyUserOf - Return true if this node is the only use of N.
+  ///
+  bool isOnlyUserOf(SDNode *N) const;
+
+  /// isOperandOf - Return true if this node is an operand of N.
+  ///
+  bool isOperandOf(SDNode *N) const;
+
+  /// isPredecessorOf - Return true if this node is a predecessor of N. This
+  /// node is either an operand of N or it can be reached by recursively
+  /// traversing up the operands.
+  /// NOTE: this is an expensive method. Use it carefully.
+  bool isPredecessorOf(SDNode *N) const;
+
+  /// getNumOperands - Return the number of values used by this operation.
+  ///
+  unsigned getNumOperands() const { return NumOperands; }
+
+  /// getConstantOperandVal - Helper method returns the integer value of a
+  /// ConstantSDNode operand.
+  uint64_t getConstantOperandVal(unsigned Num) const;
+
+  const SDValue &getOperand(unsigned Num) const {
+    assert(Num < NumOperands && "Invalid child # of SDNode!");
+    return OperandList[Num];
+  }
+
+  typedef SDUse* op_iterator;
+  op_iterator op_begin() const { return OperandList; }
+  op_iterator op_end() const { return OperandList+NumOperands; }
+
+  SDVTList getVTList() const {
+    SDVTList X = { ValueList, NumValues };
+    return X;
+  }
+
+  /// getGluedNode - If this node has a glue operand, return the node
+  /// to which the glue operand points. Otherwise return NULL.
+  SDNode *getGluedNode() const {
+    if (getNumOperands() != 0 &&
+      getOperand(getNumOperands()-1).getValueType() == MVT::Glue)
+      return getOperand(getNumOperands()-1).getNode();
+    return 0;
+  }
+
+  // If this is a pseudo op, like copyfromreg, look to see if there is a
+  // real target node glued to it.  If so, return the target node.
+  const SDNode *getGluedMachineNode() const {
+    const SDNode *FoundNode = this;
+
+    // Climb up glue edges until a machine-opcode node is found, or the
+    // end of the chain is reached.
+    while (!FoundNode->isMachineOpcode()) {
+      const SDNode *N = FoundNode->getGluedNode();
+      if (!N) break;
+      FoundNode = N;
+    }
+
+    return FoundNode;
+  }
+
+  /// getGluedUser - If this node has a glue value with a user, return
+  /// the user (there is at most one). Otherwise return NULL.
+  SDNode *getGluedUser() const {
+    for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI)
+      if (UI.getUse().get().getValueType() == MVT::Glue)
+        return *UI;
+    return 0;
+  }
+
+  /// getNumValues - Return the number of values defined/returned by this
+  /// operator.
+  ///
+  unsigned getNumValues() const { return NumValues; }
+
+  /// getValueType - Return the type of a specified result.
+  ///
+  EVT getValueType(unsigned ResNo) const {
+    assert(ResNo < NumValues && "Illegal result number!");
+    return ValueList[ResNo];
+  }
+
+  /// getValueSizeInBits - Returns MVT::getSizeInBits(getValueType(ResNo)).
+  ///
+  unsigned getValueSizeInBits(unsigned ResNo) const {
+    return getValueType(ResNo).getSizeInBits();
+  }
+
+  typedef const EVT* value_iterator;
+  value_iterator value_begin() const { return ValueList; }
+  value_iterator value_end() const { return ValueList+NumValues; }
+
+  /// getOperationName - Return the opcode of this operation for printing.
+  ///
+  std::string getOperationName(const SelectionDAG *G = 0) const;
+  static const char* getIndexedModeName(ISD::MemIndexedMode AM);
+  void print_types(raw_ostream &OS, const SelectionDAG *G) const;
+  void print_details(raw_ostream &OS, const SelectionDAG *G) const;
+  void print(raw_ostream &OS, const SelectionDAG *G = 0) const;
+  void printr(raw_ostream &OS, const SelectionDAG *G = 0) const;
+
+  /// printrFull - Print a SelectionDAG node and all children down to
+  /// the leaves.  The given SelectionDAG allows target-specific nodes
+  /// to be printed in human-readable form.  Unlike printr, this will
+  /// print the whole DAG, including children that appear multiple
+  /// times.
+  ///
+  void printrFull(raw_ostream &O, const SelectionDAG *G = 0) const;
+
+  /// printrWithDepth - Print a SelectionDAG node and children up to
+  /// depth "depth."  The given SelectionDAG allows target-specific
+  /// nodes to be printed in human-readable form.  Unlike printr, this
+  /// will print children that appear multiple times wherever they are
+  /// used.
+  ///
+  void printrWithDepth(raw_ostream &O, const SelectionDAG *G = 0,
+                       unsigned depth = 100) const;
+
+
+  /// dump - Dump this node, for debugging.
+  void dump() const;
+
+  /// dumpr - Dump (recursively) this node and its use-def subgraph.
+  void dumpr() const;
+
+  /// dump - Dump this node, for debugging.
+  /// The given SelectionDAG allows target-specific nodes to be printed
+  /// in human-readable form.
+  void dump(const SelectionDAG *G) const;
+
+  /// dumpr - Dump (recursively) this node and its use-def subgraph.
+  /// The given SelectionDAG allows target-specific nodes to be printed
+  /// in human-readable form.
+  void dumpr(const SelectionDAG *G) const;
+
+  /// dumprFull - printrFull to dbgs().  The given SelectionDAG allows
+  /// target-specific nodes to be printed in human-readable form.
+  /// Unlike dumpr, this will print the whole DAG, including children
+  /// that appear multiple times.
+  ///
+  void dumprFull(const SelectionDAG *G = 0) const;
+
+  /// dumprWithDepth - printrWithDepth to dbgs().  The given
+  /// SelectionDAG allows target-specific nodes to be printed in
+  /// human-readable form.  Unlike dumpr, this will print children
+  /// that appear multiple times wherever they are used.
+  ///
+  void dumprWithDepth(const SelectionDAG *G = 0, unsigned depth = 100) const;
+
+
+  static bool classof(const SDNode *) { return true; }
+
+  /// Profile - Gather unique data for the node.
+  ///
+  void Profile(FoldingSetNodeID &ID) const;
+
+  /// addUse - This method should only be used by the SDUse class.
+  ///
+  void addUse(SDUse &U) { U.addToList(&UseList); }
+
+protected:
+  static SDVTList getSDVTList(EVT VT) {
+    SDVTList Ret = { getValueTypeList(VT), 1 };
+    return Ret;
+  }
+
+  SDNode(unsigned Opc, const DebugLoc dl, SDVTList VTs, const SDValue *Ops,
+         unsigned NumOps)
+    : NodeType(Opc), OperandsNeedDelete(true), HasDebugValue(false),
+      SubclassData(0), NodeId(-1),
+      OperandList(NumOps ? new SDUse[NumOps] : 0),
+      ValueList(VTs.VTs), UseList(NULL),
+      NumOperands(NumOps), NumValues(VTs.NumVTs),
+      debugLoc(dl) {
+    for (unsigned i = 0; i != NumOps; ++i) {
+      OperandList[i].setUser(this);
+      OperandList[i].setInitial(Ops[i]);
+    }
+    checkForCycles(this);
+  }
+
+  /// This constructor adds no operands itself; operands can be
+  /// set later with InitOperands.
+  SDNode(unsigned Opc, const DebugLoc dl, SDVTList VTs)
+    : NodeType(Opc), OperandsNeedDelete(false), HasDebugValue(false),
+      SubclassData(0), NodeId(-1), OperandList(0), ValueList(VTs.VTs),
+      UseList(NULL), NumOperands(0), NumValues(VTs.NumVTs),
+      debugLoc(dl) {}
+
+  /// InitOperands - Initialize the operands list of this with 1 operand.
+  void InitOperands(SDUse *Ops, const SDValue &Op0) {
+    Ops[0].setUser(this);
+    Ops[0].setInitial(Op0);
+    NumOperands = 1;
+    OperandList = Ops;
+    checkForCycles(this);
+  }
+
+  /// InitOperands - Initialize the operands list of this with 2 operands.
+  void InitOperands(SDUse *Ops, const SDValue &Op0, const SDValue &Op1) {
+    Ops[0].setUser(this);
+    Ops[0].setInitial(Op0);
+    Ops[1].setUser(this);
+    Ops[1].setInitial(Op1);
+    NumOperands = 2;
+    OperandList = Ops;
+    checkForCycles(this);
+  }
+
+  /// InitOperands - Initialize the operands list of this with 3 operands.
+  void InitOperands(SDUse *Ops, const SDValue &Op0, const SDValue &Op1,
+                    const SDValue &Op2) {
+    Ops[0].setUser(this);
+    Ops[0].setInitial(Op0);
+    Ops[1].setUser(this);
+    Ops[1].setInitial(Op1);
+    Ops[2].setUser(this);
+    Ops[2].setInitial(Op2);
+    NumOperands = 3;
+    OperandList = Ops;
+    checkForCycles(this);
+  }
+
+  /// InitOperands - Initialize the operands list of this with 4 operands.
+  void InitOperands(SDUse *Ops, const SDValue &Op0, const SDValue &Op1,
+                    const SDValue &Op2, const SDValue &Op3) {
+    Ops[0].setUser(this);
+    Ops[0].setInitial(Op0);
+    Ops[1].setUser(this);
+    Ops[1].setInitial(Op1);
+    Ops[2].setUser(this);
+    Ops[2].setInitial(Op2);
+    Ops[3].setUser(this);
+    Ops[3].setInitial(Op3);
+    NumOperands = 4;
+    OperandList = Ops;
+    checkForCycles(this);
+  }
+
+  /// InitOperands - Initialize the operands list of this with N operands.
+  void InitOperands(SDUse *Ops, const SDValue *Vals, unsigned N) {
+    for (unsigned i = 0; i != N; ++i) {
+      Ops[i].setUser(this);
+      Ops[i].setInitial(Vals[i]);
+    }
+    NumOperands = N;
+    OperandList = Ops;
+    checkForCycles(this);
+  }
+
+  /// DropOperands - Release the operands and set this node to have
+  /// zero operands.
+  void DropOperands();
+};
+
+
+// Define inline functions from the SDValue class.
+
+inline unsigned SDValue::getOpcode() const {
+  return Node->getOpcode();
+}
+inline EVT SDValue::getValueType() const {
+  return Node->getValueType(ResNo);
+}
+inline unsigned SDValue::getNumOperands() const {
+  return Node->getNumOperands();
+}
+inline const SDValue &SDValue::getOperand(unsigned i) const {
+  return Node->getOperand(i);
+}
+inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
+  return Node->getConstantOperandVal(i);
+}
+inline bool SDValue::isTargetOpcode() const {
+  return Node->isTargetOpcode();
+}
+inline bool SDValue::isTargetMemoryOpcode() const {
+  return Node->isTargetMemoryOpcode();
+}
+inline bool SDValue::isMachineOpcode() const {
+  return Node->isMachineOpcode();
+}
+inline unsigned SDValue::getMachineOpcode() const {
+  return Node->getMachineOpcode();
+}
+inline bool SDValue::use_empty() const {
+  return !Node->hasAnyUseOfValue(ResNo);
+}
+inline bool SDValue::hasOneUse() const {
+  return Node->hasNUsesOfValue(1, ResNo);
+}
+inline const DebugLoc SDValue::getDebugLoc() const {
+  return Node->getDebugLoc();
+}
+
+// Define inline functions from the SDUse class.
+
+inline void SDUse::set(const SDValue &V) {
+  if (Val.getNode()) removeFromList();
+  Val = V;
+  if (V.getNode()) V.getNode()->addUse(*this);
+}
+
+inline void SDUse::setInitial(const SDValue &V) {
+  Val = V;
+  V.getNode()->addUse(*this);
+}
+
+inline void SDUse::setNode(SDNode *N) {
+  if (Val.getNode()) removeFromList();
+  Val.setNode(N);
+  if (N) N->addUse(*this);
+}
+
+/// UnarySDNode - This class is used for single-operand SDNodes.  This is solely
+/// to allow co-allocation of node operands with the node itself.
+class UnarySDNode : public SDNode {
+  SDUse Op;
+public:
+  UnarySDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, SDValue X)
+    : SDNode(Opc, dl, VTs) {
+    InitOperands(&Op, X);
+  }
+};
+
+/// BinarySDNode - This class is used for two-operand SDNodes.  This is solely
+/// to allow co-allocation of node operands with the node itself.
+class BinarySDNode : public SDNode {
+  SDUse Ops[2];
+public:
+  BinarySDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, SDValue X, SDValue Y)
+    : SDNode(Opc, dl, VTs) {
+    InitOperands(Ops, X, Y);
+  }
+};
+
+/// TernarySDNode - This class is used for three-operand SDNodes. This is solely
+/// to allow co-allocation of node operands with the node itself.
+class TernarySDNode : public SDNode {
+  SDUse Ops[3];
+public:
+  TernarySDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, SDValue X, SDValue Y,
+                SDValue Z)
+    : SDNode(Opc, dl, VTs) {
+    InitOperands(Ops, X, Y, Z);
+  }
+};
+
+
+/// HandleSDNode - This class is used to form a handle around another node that
+/// is persistant and is updated across invocations of replaceAllUsesWith on its
+/// operand.  This node should be directly created by end-users and not added to
+/// the AllNodes list.
+class HandleSDNode : public SDNode {
+  SDUse Op;
+public:
+  // FIXME: Remove the "noinline" attribute once <rdar://problem/5852746> is
+  // fixed.
+#if __GNUC__==4 && __GNUC_MINOR__==2 && defined(__APPLE__) && !defined(__llvm__)
+  explicit __attribute__((__noinline__)) HandleSDNode(SDValue X)
+#else
+  explicit HandleSDNode(SDValue X)
+#endif
+    : SDNode(ISD::HANDLENODE, DebugLoc(), getSDVTList(MVT::Other)) {
+    InitOperands(&Op, X);
+  }
+  ~HandleSDNode();
+  const SDValue &getValue() const { return Op; }
+};
+
+/// Abstact virtual class for operations for memory operations
+class MemSDNode : public SDNode {
+private:
+  // MemoryVT - VT of in-memory value.
+  EVT MemoryVT;
+
+protected:
+  /// MMO - Memory reference information.
+  MachineMemOperand *MMO;
+
+public:
+  MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT MemoryVT,
+            MachineMemOperand *MMO);
+
+  MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, const SDValue *Ops,
+            unsigned NumOps, EVT MemoryVT, MachineMemOperand *MMO);
+
+  bool readMem() const { return MMO->isLoad(); }
+  bool writeMem() const { return MMO->isStore(); }
+
+  /// Returns alignment and volatility of the memory access
+  unsigned getOriginalAlignment() const { 
+    return MMO->getBaseAlignment();
+  }
+  unsigned getAlignment() const {
+    return MMO->getAlignment();
+  }
+
+  /// getRawSubclassData - Return the SubclassData value, which contains an
+  /// encoding of the volatile flag, as well as bits used by subclasses. This
+  /// function should only be used to compute a FoldingSetNodeID value.
+  unsigned getRawSubclassData() const {
+    return SubclassData;
+  }
+
+  // We access subclass data here so that we can check consistency
+  // with MachineMemOperand information.
+  bool isVolatile() const { return (SubclassData >> 5) & 1; }
+  bool isNonTemporal() const { return (SubclassData >> 6) & 1; }
+
+  /// Returns the SrcValue and offset that describes the location of the access
+  const Value *getSrcValue() const { return MMO->getValue(); }
+  int64_t getSrcValueOffset() const { return MMO->getOffset(); }
+
+  /// Returns the TBAAInfo that describes the dereference.
+  const MDNode *getTBAAInfo() const { return MMO->getTBAAInfo(); }
+
+  /// getMemoryVT - Return the type of the in-memory value.
+  EVT getMemoryVT() const { return MemoryVT; }
+
+  /// getMemOperand - Return a MachineMemOperand object describing the memory
+  /// reference performed by operation.
+  MachineMemOperand *getMemOperand() const { return MMO; }
+
+  const MachinePointerInfo &getPointerInfo() const {
+    return MMO->getPointerInfo();
+  }
+  
+  /// refineAlignment - Update this MemSDNode's MachineMemOperand information
+  /// to reflect the alignment of NewMMO, if it has a greater alignment.
+  /// This must only be used when the new alignment applies to all users of
+  /// this MachineMemOperand.
+  void refineAlignment(const MachineMemOperand *NewMMO) {
+    MMO->refineAlignment(NewMMO);
+  }
+
+  const SDValue &getChain() const { return getOperand(0); }
+  const SDValue &getBasePtr() const {
+    return getOperand(getOpcode() == ISD::STORE ? 2 : 1);
+  }
+
+  // Methods to support isa and dyn_cast
+  static bool classof(const MemSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    // For some targets, we lower some target intrinsics to a MemIntrinsicNode
+    // with either an intrinsic or a target opcode.
+    return N->getOpcode() == ISD::LOAD                ||
+           N->getOpcode() == ISD::STORE               ||
+           N->getOpcode() == ISD::PREFETCH            ||
+           N->getOpcode() == ISD::ATOMIC_CMP_SWAP     ||
+           N->getOpcode() == ISD::ATOMIC_SWAP         ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_ADD     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_SUB     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_AND     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_OR      ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_XOR     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_NAND    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_MIN     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_MAX     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_UMIN    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_UMAX    ||
+           N->isTargetMemoryOpcode();
+  }
+};
+
+/// AtomicSDNode - A SDNode reprenting atomic operations.
+///
+class AtomicSDNode : public MemSDNode {
+  SDUse Ops[4];
+
+public:
+  // Opc:   opcode for atomic
+  // VTL:    value type list
+  // Chain:  memory chain for operaand
+  // Ptr:    address to update as a SDValue
+  // Cmp:    compare value
+  // Swp:    swap value
+  // SrcVal: address to update as a Value (used for MemOperand)
+  // Align:  alignment of memory
+  AtomicSDNode(unsigned Opc, DebugLoc dl, SDVTList VTL, EVT MemVT,
+               SDValue Chain, SDValue Ptr,
+               SDValue Cmp, SDValue Swp, MachineMemOperand *MMO)
+    : MemSDNode(Opc, dl, VTL, MemVT, MMO) {
+    assert(readMem() && "Atomic MachineMemOperand is not a load!");
+    assert(writeMem() && "Atomic MachineMemOperand is not a store!");
+    InitOperands(Ops, Chain, Ptr, Cmp, Swp);
+  }
+  AtomicSDNode(unsigned Opc, DebugLoc dl, SDVTList VTL, EVT MemVT,
+               SDValue Chain, SDValue Ptr,
+               SDValue Val, MachineMemOperand *MMO)
+    : MemSDNode(Opc, dl, VTL, MemVT, MMO) {
+    assert(readMem() && "Atomic MachineMemOperand is not a load!");
+    assert(writeMem() && "Atomic MachineMemOperand is not a store!");
+    InitOperands(Ops, Chain, Ptr, Val);
+  }
+
+  const SDValue &getBasePtr() const { return getOperand(1); }
+  const SDValue &getVal() const { return getOperand(2); }
+
+  bool isCompareAndSwap() const {
+    unsigned Op = getOpcode();
+    return Op == ISD::ATOMIC_CMP_SWAP;
+  }
+
+  // Methods to support isa and dyn_cast
+  static bool classof(const AtomicSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::ATOMIC_CMP_SWAP     ||
+           N->getOpcode() == ISD::ATOMIC_SWAP         ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_ADD     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_SUB     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_AND     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_OR      ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_XOR     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_NAND    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_MIN     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_MAX     ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_UMIN    ||
+           N->getOpcode() == ISD::ATOMIC_LOAD_UMAX;
+  }
+};
+
+/// MemIntrinsicSDNode - This SDNode is used for target intrinsics that touch
+/// memory and need an associated MachineMemOperand. Its opcode may be
+/// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode
+/// with a value not less than FIRST_TARGET_MEMORY_OPCODE.
+class MemIntrinsicSDNode : public MemSDNode {
+public:
+  MemIntrinsicSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
+                     const SDValue *Ops, unsigned NumOps,
+                     EVT MemoryVT, MachineMemOperand *MMO)
+    : MemSDNode(Opc, dl, VTs, Ops, NumOps, MemoryVT, MMO) {
+  }
+
+  // Methods to support isa and dyn_cast
+  static bool classof(const MemIntrinsicSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    // We lower some target intrinsics to their target opcode
+    // early a node with a target opcode can be of this class
+    return N->getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+           N->getOpcode() == ISD::INTRINSIC_VOID ||
+           N->getOpcode() == ISD::PREFETCH ||
+           N->isTargetMemoryOpcode();
+  }
+};
+
+/// ShuffleVectorSDNode - This SDNode is used to implement the code generator
+/// support for the llvm IR shufflevector instruction.  It combines elements
+/// from two input vectors into a new input vector, with the selection and
+/// ordering of elements determined by an array of integers, referred to as
+/// the shuffle mask.  For input vectors of width N, mask indices of 0..N-1
+/// refer to elements from the LHS input, and indices from N to 2N-1 the RHS.
+/// An index of -1 is treated as undef, such that the code generator may put
+/// any value in the corresponding element of the result.
+class ShuffleVectorSDNode : public SDNode {
+  SDUse Ops[2];
+
+  // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and
+  // is freed when the SelectionDAG object is destroyed.
+  const int *Mask;
+protected:
+  friend class SelectionDAG;
+  ShuffleVectorSDNode(EVT VT, DebugLoc dl, SDValue N1, SDValue N2, 
+                      const int *M)
+    : SDNode(ISD::VECTOR_SHUFFLE, dl, getSDVTList(VT)), Mask(M) {
+    InitOperands(Ops, N1, N2);
+  }
+public:
+
+  void getMask(SmallVectorImpl<int> &M) const {
+    EVT VT = getValueType(0);
+    M.clear();
+    for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
+      M.push_back(Mask[i]);
+  }
+  int getMaskElt(unsigned Idx) const {
+    assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!");
+    return Mask[Idx];
+  }
+  
+  bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
+  int  getSplatIndex() const { 
+    assert(isSplat() && "Cannot get splat index for non-splat!");
+    EVT VT = getValueType(0);
+    for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+      if (Mask[i] != -1)
+        return Mask[i];
+    }
+    return -1;
+  }
+  static bool isSplatMask(const int *Mask, EVT VT);
+
+  static bool classof(const ShuffleVectorSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::VECTOR_SHUFFLE;
+  }
+};
+  
+class ConstantSDNode : public SDNode {
+  const ConstantInt *Value;
+  friend class SelectionDAG;
+  ConstantSDNode(bool isTarget, const ConstantInt *val, EVT VT)
+    : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant,
+             DebugLoc(), getSDVTList(VT)), Value(val) {
+  }
+public:
+
+  const ConstantInt *getConstantIntValue() const { return Value; }
+  const APInt &getAPIntValue() const { return Value->getValue(); }
+  uint64_t getZExtValue() const { return Value->getZExtValue(); }
+  int64_t getSExtValue() const { return Value->getSExtValue(); }
+
+  bool isOne() const { return Value->isOne(); }
+  bool isNullValue() const { return Value->isNullValue(); }
+  bool isAllOnesValue() const { return Value->isAllOnesValue(); }
+
+  static bool classof(const ConstantSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::Constant ||
+           N->getOpcode() == ISD::TargetConstant;
+  }
+};
+
+class ConstantFPSDNode : public SDNode {
+  const ConstantFP *Value;
+  friend class SelectionDAG;
+  ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT)
+    : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP,
+             DebugLoc(), getSDVTList(VT)), Value(val) {
+  }
+public:
+
+  const APFloat& getValueAPF() const { return Value->getValueAPF(); }
+  const ConstantFP *getConstantFPValue() const { return Value; }
+
+  /// isZero - Return true if the value is positive or negative zero.
+  bool isZero() const { return Value->isZero(); }
+
+  /// isNaN - Return true if the value is a NaN.
+  bool isNaN() const { return Value->isNaN(); }
+
+  /// isExactlyValue - We don't rely on operator== working on double values, as
+  /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+  /// As such, this method can be used to do an exact bit-for-bit comparison of
+  /// two floating point values.
+
+  /// We leave the version with the double argument here because it's just so
+  /// convenient to write "2.0" and the like.  Without this function we'd
+  /// have to duplicate its logic everywhere it's called.
+  bool isExactlyValue(double V) const {
+    bool ignored;
+    // convert is not supported on this type
+    if (&Value->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble)
+      return false;
+    APFloat Tmp(V);
+    Tmp.convert(Value->getValueAPF().getSemantics(),
+                APFloat::rmNearestTiesToEven, &ignored);
+    return isExactlyValue(Tmp);
+  }
+  bool isExactlyValue(const APFloat& V) const;
+
+  static bool isValueValidForType(EVT VT, const APFloat& Val);
+
+  static bool classof(const ConstantFPSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::ConstantFP ||
+           N->getOpcode() == ISD::TargetConstantFP;
+  }
+};
+
+class GlobalAddressSDNode : public SDNode {
+  const GlobalValue *TheGlobal;
+  int64_t Offset;
+  unsigned char TargetFlags;
+  friend class SelectionDAG;
+  GlobalAddressSDNode(unsigned Opc, DebugLoc DL, const GlobalValue *GA, EVT VT,
+                      int64_t o, unsigned char TargetFlags);
+public:
+
+  const GlobalValue *getGlobal() const { return TheGlobal; }
+  int64_t getOffset() const { return Offset; }
+  unsigned char getTargetFlags() const { return TargetFlags; }
+  // Return the address space this GlobalAddress belongs to.
+  unsigned getAddressSpace() const;
+
+  static bool classof(const GlobalAddressSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::GlobalAddress ||
+           N->getOpcode() == ISD::TargetGlobalAddress ||
+           N->getOpcode() == ISD::GlobalTLSAddress ||
+           N->getOpcode() == ISD::TargetGlobalTLSAddress;
+  }
+};
+
+class FrameIndexSDNode : public SDNode {
+  int FI;
+  friend class SelectionDAG;
+  FrameIndexSDNode(int fi, EVT VT, bool isTarg)
+    : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
+      DebugLoc(), getSDVTList(VT)), FI(fi) {
+  }
+public:
+
+  int getIndex() const { return FI; }
+
+  static bool classof(const FrameIndexSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::FrameIndex ||
+           N->getOpcode() == ISD::TargetFrameIndex;
+  }
+};
+
+class JumpTableSDNode : public SDNode {
+  int JTI;
+  unsigned char TargetFlags;
+  friend class SelectionDAG;
+  JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned char TF)
+    : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
+      DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
+  }
+public:
+
+  int getIndex() const { return JTI; }
+  unsigned char getTargetFlags() const { return TargetFlags; }
+
+  static bool classof(const JumpTableSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::JumpTable ||
+           N->getOpcode() == ISD::TargetJumpTable;
+  }
+};
+
+class ConstantPoolSDNode : public SDNode {
+  union {
+    const Constant *ConstVal;
+    MachineConstantPoolValue *MachineCPVal;
+  } Val;
+  int Offset;  // It's a MachineConstantPoolValue if top bit is set.
+  unsigned Alignment;  // Minimum alignment requirement of CP (not log2 value).
+  unsigned char TargetFlags;
+  friend class SelectionDAG;
+  ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
+                     unsigned Align, unsigned char TF)
+    : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool,
+             DebugLoc(),
+             getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) {
+    assert((int)Offset >= 0 && "Offset is too large");
+    Val.ConstVal = c;
+  }
+  ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v,
+                     EVT VT, int o, unsigned Align, unsigned char TF)
+    : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool,
+             DebugLoc(),
+             getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) {
+    assert((int)Offset >= 0 && "Offset is too large");
+    Val.MachineCPVal = v;
+    Offset |= 1 << (sizeof(unsigned)*CHAR_BIT-1);
+  }
+public:
+  
+
+  bool isMachineConstantPoolEntry() const {
+    return (int)Offset < 0;
+  }
+
+  const Constant *getConstVal() const {
+    assert(!isMachineConstantPoolEntry() && "Wrong constantpool type");
+    return Val.ConstVal;
+  }
+
+  MachineConstantPoolValue *getMachineCPVal() const {
+    assert(isMachineConstantPoolEntry() && "Wrong constantpool type");
+    return Val.MachineCPVal;
+  }
+
+  int getOffset() const {
+    return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT-1));
+  }
+
+  // Return the alignment of this constant pool object, which is either 0 (for
+  // default alignment) or the desired value.
+  unsigned getAlignment() const { return Alignment; }
+  unsigned char getTargetFlags() const { return TargetFlags; }
+
+  const Type *getType() const;
+
+  static bool classof(const ConstantPoolSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::ConstantPool ||
+           N->getOpcode() == ISD::TargetConstantPool;
+  }
+};
+
+class BasicBlockSDNode : public SDNode {
+  MachineBasicBlock *MBB;
+  friend class SelectionDAG;
+  /// Debug info is meaningful and potentially useful here, but we create
+  /// blocks out of order when they're jumped to, which makes it a bit
+  /// harder.  Let's see if we need it first.
+  explicit BasicBlockSDNode(MachineBasicBlock *mbb)
+    : SDNode(ISD::BasicBlock, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb) {
+  }
+public:
+
+  MachineBasicBlock *getBasicBlock() const { return MBB; }
+
+  static bool classof(const BasicBlockSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::BasicBlock;
+  }
+};
+
+/// BuildVectorSDNode - A "pseudo-class" with methods for operating on
+/// BUILD_VECTORs.
+class BuildVectorSDNode : public SDNode {
+  // These are constructed as SDNodes and then cast to BuildVectorSDNodes.
+  explicit BuildVectorSDNode();        // Do not implement
+public:
+  /// isConstantSplat - Check if this is a constant splat, and if so, find the
+  /// smallest element size that splats the vector.  If MinSplatBits is
+  /// nonzero, the element size must be at least that large.  Note that the
+  /// splat element may be the entire vector (i.e., a one element vector).
+  /// Returns the splat element value in SplatValue.  Any undefined bits in
+  /// that value are zero, and the corresponding bits in the SplatUndef mask
+  /// are set.  The SplatBitSize value is set to the splat element size in
+  /// bits.  HasAnyUndefs is set to true if any bits in the vector are
+  /// undefined.  isBigEndian describes the endianness of the target.
+  bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
+                       unsigned &SplatBitSize, bool &HasAnyUndefs,
+                       unsigned MinSplatBits = 0, bool isBigEndian = false);
+
+  static inline bool classof(const BuildVectorSDNode *) { return true; }
+  static inline bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::BUILD_VECTOR;
+  }
+};
+
+/// SrcValueSDNode - An SDNode that holds an arbitrary LLVM IR Value. This is
+/// used when the SelectionDAG needs to make a simple reference to something
+/// in the LLVM IR representation.
+///
+class SrcValueSDNode : public SDNode {
+  const Value *V;
+  friend class SelectionDAG;
+  /// Create a SrcValue for a general value.
+  explicit SrcValueSDNode(const Value *v)
+    : SDNode(ISD::SRCVALUE, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
+
+public:
+  /// getValue - return the contained Value.
+  const Value *getValue() const { return V; }
+
+  static bool classof(const SrcValueSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::SRCVALUE;
+  }
+};
+  
+class MDNodeSDNode : public SDNode {
+  const MDNode *MD;
+  friend class SelectionDAG;
+  explicit MDNodeSDNode(const MDNode *md)
+  : SDNode(ISD::MDNODE_SDNODE, DebugLoc(), getSDVTList(MVT::Other)), MD(md) {}
+public:
+  
+  const MDNode *getMD() const { return MD; }
+  
+  static bool classof(const MDNodeSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::MDNODE_SDNODE;
+  }
+};
+
+
+class RegisterSDNode : public SDNode {
+  unsigned Reg;
+  friend class SelectionDAG;
+  RegisterSDNode(unsigned reg, EVT VT)
+    : SDNode(ISD::Register, DebugLoc(), getSDVTList(VT)), Reg(reg) {
+  }
+public:
+
+  unsigned getReg() const { return Reg; }
+
+  static bool classof(const RegisterSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::Register;
+  }
+};
+
+class BlockAddressSDNode : public SDNode {
+  const BlockAddress *BA;
+  unsigned char TargetFlags;
+  friend class SelectionDAG;
+  BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
+                     unsigned char Flags)
+    : SDNode(NodeTy, DebugLoc(), getSDVTList(VT)),
+             BA(ba), TargetFlags(Flags) {
+  }
+public:
+  const BlockAddress *getBlockAddress() const { return BA; }
+  unsigned char getTargetFlags() const { return TargetFlags; }
+
+  static bool classof(const BlockAddressSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::BlockAddress ||
+           N->getOpcode() == ISD::TargetBlockAddress;
+  }
+};
+
+class EHLabelSDNode : public SDNode {
+  SDUse Chain;
+  MCSymbol *Label;
+  friend class SelectionDAG;
+  EHLabelSDNode(DebugLoc dl, SDValue ch, MCSymbol *L)
+    : SDNode(ISD::EH_LABEL, dl, getSDVTList(MVT::Other)), Label(L) {
+    InitOperands(&Chain, ch);
+  }
+public:
+  MCSymbol *getLabel() const { return Label; }
+
+  static bool classof(const EHLabelSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::EH_LABEL;
+  }
+};
+
+class ExternalSymbolSDNode : public SDNode {
+  const char *Symbol;
+  unsigned char TargetFlags;
+  
+  friend class SelectionDAG;
+  ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned char TF, EVT VT)
+    : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol,
+             DebugLoc(), getSDVTList(VT)), Symbol(Sym), TargetFlags(TF) {
+  }
+public:
+
+  const char *getSymbol() const { return Symbol; }
+  unsigned char getTargetFlags() const { return TargetFlags; }
+
+  static bool classof(const ExternalSymbolSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::ExternalSymbol ||
+           N->getOpcode() == ISD::TargetExternalSymbol;
+  }
+};
+
+class CondCodeSDNode : public SDNode {
+  ISD::CondCode Condition;
+  friend class SelectionDAG;
+  explicit CondCodeSDNode(ISD::CondCode Cond)
+    : SDNode(ISD::CONDCODE, DebugLoc(), getSDVTList(MVT::Other)),
+      Condition(Cond) {
+  }
+public:
+
+  ISD::CondCode get() const { return Condition; }
+
+  static bool classof(const CondCodeSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::CONDCODE;
+  }
+};
+  
+/// CvtRndSatSDNode - NOTE: avoid using this node as this may disappear in the
+/// future and most targets don't support it.
+class CvtRndSatSDNode : public SDNode {
+  ISD::CvtCode CvtCode;
+  friend class SelectionDAG;
+  explicit CvtRndSatSDNode(EVT VT, DebugLoc dl, const SDValue *Ops,
+                           unsigned NumOps, ISD::CvtCode Code)
+    : SDNode(ISD::CONVERT_RNDSAT, dl, getSDVTList(VT), Ops, NumOps),
+      CvtCode(Code) {
+    assert(NumOps == 5 && "wrong number of operations");
+  }
+public:
+  ISD::CvtCode getCvtCode() const { return CvtCode; }
+
+  static bool classof(const CvtRndSatSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::CONVERT_RNDSAT;
+  }
+};
+
+/// VTSDNode - This class is used to represent EVT's, which are used
+/// to parameterize some operations.
+class VTSDNode : public SDNode {
+  EVT ValueType;
+  friend class SelectionDAG;
+  explicit VTSDNode(EVT VT)
+    : SDNode(ISD::VALUETYPE, DebugLoc(), getSDVTList(MVT::Other)),
+      ValueType(VT) {
+  }
+public:
+
+  EVT getVT() const { return ValueType; }
+
+  static bool classof(const VTSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::VALUETYPE;
+  }
+};
+
+/// LSBaseSDNode - Base class for LoadSDNode and StoreSDNode
+///
+class LSBaseSDNode : public MemSDNode {
+  //! Operand array for load and store
+  /*!
+    \note Moving this array to the base class captures more
+    common functionality shared between LoadSDNode and
+    StoreSDNode
+   */
+  SDUse Ops[4];
+public:
+  LSBaseSDNode(ISD::NodeType NodeTy, DebugLoc dl, SDValue *Operands,
+               unsigned numOperands, SDVTList VTs, ISD::MemIndexedMode AM,
+               EVT MemVT, MachineMemOperand *MMO)
+    : MemSDNode(NodeTy, dl, VTs, MemVT, MMO) {
+    SubclassData |= AM << 2;
+    assert(getAddressingMode() == AM && "MemIndexedMode encoding error!");
+    InitOperands(Ops, Operands, numOperands);
+    assert((getOffset().getOpcode() == ISD::UNDEF || isIndexed()) &&
+           "Only indexed loads and stores have a non-undef offset operand");
+  }
+
+  const SDValue &getOffset() const {
+    return getOperand(getOpcode() == ISD::LOAD ? 2 : 3);
+  }
+
+  /// getAddressingMode - Return the addressing mode for this load or store:
+  /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
+  ISD::MemIndexedMode getAddressingMode() const {
+    return ISD::MemIndexedMode((SubclassData >> 2) & 7);
+  }
+
+  /// isIndexed - Return true if this is a pre/post inc/dec load/store.
+  bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; }
+
+  /// isUnindexed - Return true if this is NOT a pre/post inc/dec load/store.
+  bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; }
+
+  static bool classof(const LSBaseSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::LOAD ||
+           N->getOpcode() == ISD::STORE;
+  }
+};
+
+/// LoadSDNode - This class is used to represent ISD::LOAD nodes.
+///
+class LoadSDNode : public LSBaseSDNode {
+  friend class SelectionDAG;
+  LoadSDNode(SDValue *ChainPtrOff, DebugLoc dl, SDVTList VTs,
+             ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
+             MachineMemOperand *MMO)
+    : LSBaseSDNode(ISD::LOAD, dl, ChainPtrOff, 3,
+                   VTs, AM, MemVT, MMO) {
+    SubclassData |= (unsigned short)ETy;
+    assert(getExtensionType() == ETy && "LoadExtType encoding error!");
+    assert(readMem() && "Load MachineMemOperand is not a load!");
+    assert(!writeMem() && "Load MachineMemOperand is a store!");
+  }
+public:
+
+  /// getExtensionType - Return whether this is a plain node,
+  /// or one of the varieties of value-extending loads.
+  ISD::LoadExtType getExtensionType() const {
+    return ISD::LoadExtType(SubclassData & 3);
+  }
+
+  const SDValue &getBasePtr() const { return getOperand(1); }
+  const SDValue &getOffset() const { return getOperand(2); }
+
+  static bool classof(const LoadSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::LOAD;
+  }
+};
+
+/// StoreSDNode - This class is used to represent ISD::STORE nodes.
+///
+class StoreSDNode : public LSBaseSDNode {
+  friend class SelectionDAG;
+  StoreSDNode(SDValue *ChainValuePtrOff, DebugLoc dl, SDVTList VTs,
+              ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
+              MachineMemOperand *MMO)
+    : LSBaseSDNode(ISD::STORE, dl, ChainValuePtrOff, 4,
+                   VTs, AM, MemVT, MMO) {
+    SubclassData |= (unsigned short)isTrunc;
+    assert(isTruncatingStore() == isTrunc && "isTrunc encoding error!");
+    assert(!readMem() && "Store MachineMemOperand is a load!");
+    assert(writeMem() && "Store MachineMemOperand is not a store!");
+  }
+public:
+
+  /// isTruncatingStore - Return true if the op does a truncation before store.
+  /// For integers this is the same as doing a TRUNCATE and storing the result.
+  /// For floats, it is the same as doing an FP_ROUND and storing the result.
+  bool isTruncatingStore() const { return SubclassData & 1; }
+
+  const SDValue &getValue() const { return getOperand(1); }
+  const SDValue &getBasePtr() const { return getOperand(2); }
+  const SDValue &getOffset() const { return getOperand(3); }
+
+  static bool classof(const StoreSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->getOpcode() == ISD::STORE;
+  }
+};
+
+/// MachineSDNode - An SDNode that represents everything that will be needed
+/// to construct a MachineInstr. These nodes are created during the
+/// instruction selection proper phase.
+///
+class MachineSDNode : public SDNode {
+public:
+  typedef MachineMemOperand **mmo_iterator;
+
+private:
+  friend class SelectionDAG;
+  MachineSDNode(unsigned Opc, const DebugLoc DL, SDVTList VTs)
+    : SDNode(Opc, DL, VTs), MemRefs(0), MemRefsEnd(0) {}
+
+  /// LocalOperands - Operands for this instruction, if they fit here. If
+  /// they don't, this field is unused.
+  SDUse LocalOperands[4];
+
+  /// MemRefs - Memory reference descriptions for this instruction.
+  mmo_iterator MemRefs;
+  mmo_iterator MemRefsEnd;
+
+public:
+  mmo_iterator memoperands_begin() const { return MemRefs; }
+  mmo_iterator memoperands_end() const { return MemRefsEnd; }
+  bool memoperands_empty() const { return MemRefsEnd == MemRefs; }
+
+  /// setMemRefs - Assign this MachineSDNodes's memory reference descriptor
+  /// list. This does not transfer ownership.
+  void setMemRefs(mmo_iterator NewMemRefs, mmo_iterator NewMemRefsEnd) {
+    MemRefs = NewMemRefs;
+    MemRefsEnd = NewMemRefsEnd;
+  }
+
+  static bool classof(const MachineSDNode *) { return true; }
+  static bool classof(const SDNode *N) {
+    return N->isMachineOpcode();
+  }
+};
+
+class SDNodeIterator : public std::iterator<std::forward_iterator_tag,
+                                            SDNode, ptrdiff_t> {
+  SDNode *Node;
+  unsigned Operand;
+
+  SDNodeIterator(SDNode *N, unsigned Op) : Node(N), Operand(Op) {}
+public:
+  bool operator==(const SDNodeIterator& x) const {
+    return Operand == x.Operand;
+  }
+  bool operator!=(const SDNodeIterator& x) const { return !operator==(x); }
+
+  const SDNodeIterator &operator=(const SDNodeIterator &I) {
+    assert(I.Node == Node && "Cannot assign iterators to two different nodes!");
+    Operand = I.Operand;
+    return *this;
+  }
+
+  pointer operator*() const {
+    return Node->getOperand(Operand).getNode();
+  }
+  pointer operator->() const { return operator*(); }
+
+  SDNodeIterator& operator++() {                // Preincrement
+    ++Operand;
+    return *this;
+  }
+  SDNodeIterator operator++(int) { // Postincrement
+    SDNodeIterator tmp = *this; ++*this; return tmp;
+  }
+  size_t operator-(SDNodeIterator Other) const {
+    assert(Node == Other.Node &&
+           "Cannot compare iterators of two different nodes!");
+    return Operand - Other.Operand;
+  }
+
+  static SDNodeIterator begin(SDNode *N) { return SDNodeIterator(N, 0); }
+  static SDNodeIterator end  (SDNode *N) {
+    return SDNodeIterator(N, N->getNumOperands());
+  }
+
+  unsigned getOperand() const { return Operand; }
+  const SDNode *getNode() const { return Node; }
+};
+
+template <> struct GraphTraits<SDNode*> {
+  typedef SDNode NodeType;
+  typedef SDNodeIterator ChildIteratorType;
+  static inline NodeType *getEntryNode(SDNode *N) { return N; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return SDNodeIterator::begin(N);
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return SDNodeIterator::end(N);
+  }
+};
+
+/// LargestSDNode - The largest SDNode class.
+///
+typedef LoadSDNode LargestSDNode;
+
+/// MostAlignedSDNode - The SDNode class with the greatest alignment
+/// requirement.
+///
+typedef GlobalAddressSDNode MostAlignedSDNode;
+
+namespace ISD {
+  /// isNormalLoad - Returns true if the specified node is a non-extending
+  /// and unindexed load.
+  inline bool isNormalLoad(const SDNode *N) {
+    const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
+    return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD &&
+      Ld->getAddressingMode() == ISD::UNINDEXED;
+  }
+
+  /// isNON_EXTLoad - Returns true if the specified node is a non-extending
+  /// load.
+  inline bool isNON_EXTLoad(const SDNode *N) {
+    return isa<LoadSDNode>(N) &&
+      cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
+  }
+
+  /// isEXTLoad - Returns true if the specified node is a EXTLOAD.
+  ///
+  inline bool isEXTLoad(const SDNode *N) {
+    return isa<LoadSDNode>(N) &&
+      cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
+  }
+
+  /// isSEXTLoad - Returns true if the specified node is a SEXTLOAD.
+  ///
+  inline bool isSEXTLoad(const SDNode *N) {
+    return isa<LoadSDNode>(N) &&
+      cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
+  }
+
+  /// isZEXTLoad - Returns true if the specified node is a ZEXTLOAD.
+  ///
+  inline bool isZEXTLoad(const SDNode *N) {
+    return isa<LoadSDNode>(N) &&
+      cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
+  }
+
+  /// isUNINDEXEDLoad - Returns true if the specified node is an unindexed load.
+  ///
+  inline bool isUNINDEXEDLoad(const SDNode *N) {
+    return isa<LoadSDNode>(N) &&
+      cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
+  }
+
+  /// isNormalStore - Returns true if the specified node is a non-truncating
+  /// and unindexed store.
+  inline bool isNormalStore(const SDNode *N) {
+    const StoreSDNode *St = dyn_cast<StoreSDNode>(N);
+    return St && !St->isTruncatingStore() &&
+      St->getAddressingMode() == ISD::UNINDEXED;
+  }
+
+  /// isNON_TRUNCStore - Returns true if the specified node is a non-truncating
+  /// store.
+  inline bool isNON_TRUNCStore(const SDNode *N) {
+    return isa<StoreSDNode>(N) && !cast<StoreSDNode>(N)->isTruncatingStore();
+  }
+
+  /// isTRUNCStore - Returns true if the specified node is a truncating
+  /// store.
+  inline bool isTRUNCStore(const SDNode *N) {
+    return isa<StoreSDNode>(N) && cast<StoreSDNode>(N)->isTruncatingStore();
+  }
+
+  /// isUNINDEXEDStore - Returns true if the specified node is an
+  /// unindexed store.
+  inline bool isUNINDEXEDStore(const SDNode *N) {
+    return isa<StoreSDNode>(N) &&
+      cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
+  }
+}
+
+} // end llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/SlotIndexes.h b/final/include/llvm/CodeGen/SlotIndexes.h
new file mode 100644
index 00000000000..0e2adb58977
--- /dev/null
+++ b/final/include/llvm/CodeGen/SlotIndexes.h
@@ -0,0 +1,728 @@
+//===- llvm/CodeGen/SlotIndexes.h - Slot indexes representation -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements SlotIndex and related classes. The purpuse of SlotIndex
+// is to describe a position at which a register can become live, or cease to
+// be live.
+//
+// SlotIndex is mostly a proxy for entries of the SlotIndexList, a class which
+// is held is LiveIntervals and provides the real numbering. This allows
+// LiveIntervals to perform largely transparent renumbering.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SLOTINDEXES_H
+#define LLVM_CODEGEN_SLOTINDEXES_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Allocator.h"
+
+namespace llvm {
+
+  /// This class represents an entry in the slot index list held in the
+  /// SlotIndexes pass. It should not be used directly. See the
+  /// SlotIndex & SlotIndexes classes for the public interface to this
+  /// information.
+  class IndexListEntry {
+    IndexListEntry *next, *prev;
+    MachineInstr *mi;
+    unsigned index;
+
+  public:
+
+    IndexListEntry(MachineInstr *mi, unsigned index) : mi(mi), index(index) {}
+
+    MachineInstr* getInstr() const { return mi; }
+    void setInstr(MachineInstr *mi) {
+      this->mi = mi;
+    }
+
+    unsigned getIndex() const { return index; }
+    void setIndex(unsigned index) {
+      this->index = index;
+    }
+    
+    IndexListEntry* getNext() { return next; }
+    const IndexListEntry* getNext() const { return next; }
+    void setNext(IndexListEntry *next) {
+      this->next = next;
+    }
+
+    IndexListEntry* getPrev() { return prev; }
+    const IndexListEntry* getPrev() const { return prev; }
+    void setPrev(IndexListEntry *prev) {
+      this->prev = prev;
+    }
+  };
+
+  // Specialize PointerLikeTypeTraits for IndexListEntry.
+  template <>
+  class PointerLikeTypeTraits<IndexListEntry*> { 
+  public:
+    static inline void* getAsVoidPointer(IndexListEntry *p) {
+      return p;
+    }
+    static inline IndexListEntry* getFromVoidPointer(void *p) {
+      return static_cast<IndexListEntry*>(p);
+    }
+    enum { NumLowBitsAvailable = 3 };
+  };
+
+  /// SlotIndex - An opaque wrapper around machine indexes.
+  class SlotIndex {
+    friend class SlotIndexes;
+    friend struct DenseMapInfo<SlotIndex>;
+
+    enum Slot { LOAD, USE, DEF, STORE, NUM };
+
+    PointerIntPair<IndexListEntry*, 2, unsigned> lie;
+
+    SlotIndex(IndexListEntry *entry, unsigned slot)
+      : lie(entry, slot) {}
+
+    IndexListEntry& entry() const {
+      assert(isValid() && "Attempt to compare reserved index.");
+      return *lie.getPointer();
+    }
+
+    int getIndex() const {
+      return entry().getIndex() | getSlot();
+    }
+
+    /// Returns the slot for this SlotIndex.
+    Slot getSlot() const {
+      return static_cast<Slot>(lie.getInt());
+    }
+
+    static inline unsigned getHashValue(const SlotIndex &v) {
+      void *ptrVal = v.lie.getOpaqueValue();
+      return (unsigned((intptr_t)ptrVal)) ^ (unsigned((intptr_t)ptrVal) >> 9);
+    }
+
+  public:
+    enum {
+      /// The default distance between instructions as returned by distance().
+      /// This may vary as instructions are inserted and removed.
+      InstrDist = 4*NUM
+    };
+
+    static inline SlotIndex getEmptyKey() {
+      return SlotIndex(0, 1);
+    }
+
+    static inline SlotIndex getTombstoneKey() {
+      return SlotIndex(0, 2);
+    }
+
+    /// Construct an invalid index.
+    SlotIndex() : lie(0, 0) {}
+
+    // Construct a new slot index from the given one, and set the slot.
+    SlotIndex(const SlotIndex &li, Slot s)
+      : lie(&li.entry(), unsigned(s)) {
+      assert(lie.getPointer() != 0 &&
+             "Attempt to construct index with 0 pointer.");
+    }
+
+    /// Returns true if this is a valid index. Invalid indicies do
+    /// not point into an index table, and cannot be compared.
+    bool isValid() const {
+      return lie.getPointer();
+    }
+
+    /// Print this index to the given raw_ostream.
+    void print(raw_ostream &os) const;
+
+    /// Dump this index to stderr.
+    void dump() const;
+
+    /// Compare two SlotIndex objects for equality.
+    bool operator==(SlotIndex other) const {
+      return lie == other.lie;
+    }
+    /// Compare two SlotIndex objects for inequality.
+    bool operator!=(SlotIndex other) const {
+      return lie != other.lie;
+    }
+   
+    /// Compare two SlotIndex objects. Return true if the first index
+    /// is strictly lower than the second.
+    bool operator<(SlotIndex other) const {
+      return getIndex() < other.getIndex();
+    }
+    /// Compare two SlotIndex objects. Return true if the first index
+    /// is lower than, or equal to, the second.
+    bool operator<=(SlotIndex other) const {
+      return getIndex() <= other.getIndex();
+    }
+
+    /// Compare two SlotIndex objects. Return true if the first index
+    /// is greater than the second.
+    bool operator>(SlotIndex other) const {
+      return getIndex() > other.getIndex();
+    }
+
+    /// Compare two SlotIndex objects. Return true if the first index
+    /// is greater than, or equal to, the second.
+    bool operator>=(SlotIndex other) const {
+      return getIndex() >= other.getIndex();
+    }
+
+    /// Return the distance from this index to the given one.
+    int distance(SlotIndex other) const {
+      return other.getIndex() - getIndex();
+    }
+
+    /// isLoad - Return true if this is a LOAD slot.
+    bool isLoad() const {
+      return getSlot() == LOAD;
+    }
+
+    /// isDef - Return true if this is a DEF slot.
+    bool isDef() const {
+      return getSlot() == DEF;
+    }
+
+    /// isUse - Return true if this is a USE slot.
+    bool isUse() const {
+      return getSlot() == USE;
+    }
+
+    /// isStore - Return true if this is a STORE slot.
+    bool isStore() const {
+      return getSlot() == STORE;
+    }
+
+    /// Returns the base index for associated with this index. The base index
+    /// is the one associated with the LOAD slot for the instruction pointed to
+    /// by this index.
+    SlotIndex getBaseIndex() const {
+      return getLoadIndex();
+    }
+
+    /// Returns the boundary index for associated with this index. The boundary
+    /// index is the one associated with the LOAD slot for the instruction
+    /// pointed to by this index.
+    SlotIndex getBoundaryIndex() const {
+      return getStoreIndex();
+    }
+
+    /// Returns the index of the LOAD slot for the instruction pointed to by
+    /// this index.
+    SlotIndex getLoadIndex() const {
+      return SlotIndex(&entry(), SlotIndex::LOAD);
+    }    
+
+    /// Returns the index of the USE slot for the instruction pointed to by
+    /// this index.
+    SlotIndex getUseIndex() const {
+      return SlotIndex(&entry(), SlotIndex::USE);
+    }
+
+    /// Returns the index of the DEF slot for the instruction pointed to by
+    /// this index.
+    SlotIndex getDefIndex() const {
+      return SlotIndex(&entry(), SlotIndex::DEF);
+    }
+
+    /// Returns the index of the STORE slot for the instruction pointed to by
+    /// this index.
+    SlotIndex getStoreIndex() const {
+      return SlotIndex(&entry(), SlotIndex::STORE);
+    }    
+
+    /// Returns the next slot in the index list. This could be either the
+    /// next slot for the instruction pointed to by this index or, if this
+    /// index is a STORE, the first slot for the next instruction.
+    /// WARNING: This method is considerably more expensive than the methods
+    /// that return specific slots (getUseIndex(), etc). If you can - please
+    /// use one of those methods.
+    SlotIndex getNextSlot() const {
+      Slot s = getSlot();
+      if (s == SlotIndex::STORE) {
+        return SlotIndex(entry().getNext(), SlotIndex::LOAD);
+      }
+      return SlotIndex(&entry(), s + 1);
+    }
+
+    /// Returns the next index. This is the index corresponding to the this
+    /// index's slot, but for the next instruction.
+    SlotIndex getNextIndex() const {
+      return SlotIndex(entry().getNext(), getSlot());
+    }
+
+    /// Returns the previous slot in the index list. This could be either the
+    /// previous slot for the instruction pointed to by this index or, if this
+    /// index is a LOAD, the last slot for the previous instruction.
+    /// WARNING: This method is considerably more expensive than the methods
+    /// that return specific slots (getUseIndex(), etc). If you can - please
+    /// use one of those methods.
+    SlotIndex getPrevSlot() const {
+      Slot s = getSlot();
+      if (s == SlotIndex::LOAD) {
+        return SlotIndex(entry().getPrev(), SlotIndex::STORE);
+      }
+      return SlotIndex(&entry(), s - 1);
+    }
+
+    /// Returns the previous index. This is the index corresponding to this
+    /// index's slot, but for the previous instruction.
+    SlotIndex getPrevIndex() const {
+      return SlotIndex(entry().getPrev(), getSlot());
+    }
+
+  };
+
+  /// DenseMapInfo specialization for SlotIndex.
+  template <>
+  struct DenseMapInfo<SlotIndex> {
+    static inline SlotIndex getEmptyKey() {
+      return SlotIndex::getEmptyKey();
+    }
+    static inline SlotIndex getTombstoneKey() {
+      return SlotIndex::getTombstoneKey();
+    }
+    static inline unsigned getHashValue(const SlotIndex &v) {
+      return SlotIndex::getHashValue(v);
+    }
+    static inline bool isEqual(const SlotIndex &LHS, const SlotIndex &RHS) {
+      return (LHS == RHS);
+    }
+  };
+  
+  template <> struct isPodLike<SlotIndex> { static const bool value = true; };
+
+
+  inline raw_ostream& operator<<(raw_ostream &os, SlotIndex li) {
+    li.print(os);
+    return os;
+  }
+
+  typedef std::pair<SlotIndex, MachineBasicBlock*> IdxMBBPair;
+
+  inline bool operator<(SlotIndex V, const IdxMBBPair &IM) {
+    return V < IM.first;
+  }
+
+  inline bool operator<(const IdxMBBPair &IM, SlotIndex V) {
+    return IM.first < V;
+  }
+
+  struct Idx2MBBCompare {
+    bool operator()(const IdxMBBPair &LHS, const IdxMBBPair &RHS) const {
+      return LHS.first < RHS.first;
+    }
+  };
+
+  /// SlotIndexes pass.
+  ///
+  /// This pass assigns indexes to each instruction.
+  class SlotIndexes : public MachineFunctionPass {
+  private:
+
+    MachineFunction *mf;
+    IndexListEntry *indexListHead;
+    unsigned functionSize;
+
+    typedef DenseMap<const MachineInstr*, SlotIndex> Mi2IndexMap;
+    Mi2IndexMap mi2iMap;
+
+    /// MBB2IdxMap - The indexes of the first and last instructions in the
+    /// specified basic block.
+    typedef DenseMap<const MachineBasicBlock*,
+                     std::pair<SlotIndex, SlotIndex> > MBB2IdxMap;
+    MBB2IdxMap mbb2IdxMap;
+
+    /// Idx2MBBMap - Sorted list of pairs of index of first instruction
+    /// and MBB id.
+    std::vector<IdxMBBPair> idx2MBBMap;
+
+    // IndexListEntry allocator.
+    BumpPtrAllocator ileAllocator;
+
+    IndexListEntry* createEntry(MachineInstr *mi, unsigned index) {
+      IndexListEntry *entry =
+        static_cast<IndexListEntry*>(
+          ileAllocator.Allocate(sizeof(IndexListEntry),
+          alignOf<IndexListEntry>()));
+
+      new (entry) IndexListEntry(mi, index);
+
+      return entry;
+    }
+
+    void initList() {
+      assert(indexListHead == 0 && "Zero entry non-null at initialisation.");
+      indexListHead = createEntry(0, ~0U);
+      indexListHead->setNext(0);
+      indexListHead->setPrev(indexListHead);
+    }
+
+    void clearList() {
+      indexListHead = 0;
+      ileAllocator.Reset();
+    }
+
+    IndexListEntry* getTail() {
+      assert(indexListHead != 0 && "Call to getTail on uninitialized list.");
+      return indexListHead->getPrev();
+    }
+
+    const IndexListEntry* getTail() const {
+      assert(indexListHead != 0 && "Call to getTail on uninitialized list.");
+      return indexListHead->getPrev();
+    }
+
+    // Returns true if the index list is empty.
+    bool empty() const { return (indexListHead == getTail()); }
+
+    IndexListEntry* front() {
+      assert(!empty() && "front() called on empty index list.");
+      return indexListHead;
+    }
+
+    const IndexListEntry* front() const {
+      assert(!empty() && "front() called on empty index list.");
+      return indexListHead;
+    }
+
+    IndexListEntry* back() {
+      assert(!empty() && "back() called on empty index list.");
+      return getTail()->getPrev();
+    }
+
+    const IndexListEntry* back() const {
+      assert(!empty() && "back() called on empty index list.");
+      return getTail()->getPrev();
+    }
+
+    /// Insert a new entry before itr.
+    void insert(IndexListEntry *itr, IndexListEntry *val) {
+      assert(itr != 0 && "itr should not be null.");
+      IndexListEntry *prev = itr->getPrev();
+      val->setNext(itr);
+      val->setPrev(prev);
+      
+      if (itr != indexListHead) {
+        prev->setNext(val);
+      }
+      else {
+        indexListHead = val;
+      }
+      itr->setPrev(val);
+    }
+
+    /// Push a new entry on to the end of the list.
+    void push_back(IndexListEntry *val) {
+      insert(getTail(), val);
+    }
+
+    /// Renumber locally after inserting newEntry.
+    void renumberIndexes(IndexListEntry *newEntry);
+
+  public:
+    static char ID;
+
+    SlotIndexes() : MachineFunctionPass(ID), indexListHead(0) {
+      initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &au) const;
+    virtual void releaseMemory(); 
+
+    virtual bool runOnMachineFunction(MachineFunction &fn);
+
+    /// Dump the indexes.
+    void dump() const;
+
+    /// Renumber the index list, providing space for new instructions.
+    void renumberIndexes();
+
+    /// Returns the zero index for this analysis.
+    SlotIndex getZeroIndex() {
+      assert(front()->getIndex() == 0 && "First index is not 0?");
+      return SlotIndex(front(), 0);
+    }
+
+    /// Returns the base index of the last slot in this analysis.
+    SlotIndex getLastIndex() {
+      return SlotIndex(back(), 0);
+    }
+
+    /// Returns the invalid index marker for this analysis.
+    SlotIndex getInvalidIndex() {
+      return getZeroIndex();
+    }
+
+    /// Returns the distance between the highest and lowest indexes allocated
+    /// so far.
+    unsigned getIndexesLength() const {
+      assert(front()->getIndex() == 0 &&
+             "Initial index isn't zero?");
+
+      return back()->getIndex();
+    }
+
+    /// Returns the number of instructions in the function.
+    unsigned getFunctionSize() const {
+      return functionSize;
+    }
+
+    /// Returns true if the given machine instr is mapped to an index,
+    /// otherwise returns false.
+    bool hasIndex(const MachineInstr *instr) const {
+      return (mi2iMap.find(instr) != mi2iMap.end());
+    }
+
+    /// Returns the base index for the given instruction.
+    SlotIndex getInstructionIndex(const MachineInstr *instr) const {
+      Mi2IndexMap::const_iterator itr = mi2iMap.find(instr);
+      assert(itr != mi2iMap.end() && "Instruction not found in maps.");
+      return itr->second;
+    }
+
+    /// Returns the instruction for the given index, or null if the given
+    /// index has no instruction associated with it.
+    MachineInstr* getInstructionFromIndex(SlotIndex index) const {
+      return index.isValid() ? index.entry().getInstr() : 0;
+    }
+
+    /// Returns the next non-null index.
+    SlotIndex getNextNonNullIndex(SlotIndex index) {
+      SlotIndex nextNonNull = index.getNextIndex();
+
+      while (&nextNonNull.entry() != getTail() &&
+             getInstructionFromIndex(nextNonNull) == 0) {
+        nextNonNull = nextNonNull.getNextIndex();
+      }
+
+      return nextNonNull;
+    }
+
+    /// Return the (start,end) range of the given basic block.
+    const std::pair<SlotIndex, SlotIndex> &
+    getMBBRange(const MachineBasicBlock *mbb) const {
+      MBB2IdxMap::const_iterator itr = mbb2IdxMap.find(mbb);
+      assert(itr != mbb2IdxMap.end() && "MBB not found in maps.");
+      return itr->second;
+    }
+
+    /// Returns the first index in the given basic block.
+    SlotIndex getMBBStartIdx(const MachineBasicBlock *mbb) const {
+      return getMBBRange(mbb).first;
+    }
+
+    /// Returns the last index in the given basic block.
+    SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const {
+      return getMBBRange(mbb).second;
+    }
+
+    /// Returns the basic block which the given index falls in.
+    MachineBasicBlock* getMBBFromIndex(SlotIndex index) const {
+      std::vector<IdxMBBPair>::const_iterator I =
+        std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), index);
+      // Take the pair containing the index
+      std::vector<IdxMBBPair>::const_iterator J =
+        ((I != idx2MBBMap.end() && I->first > index) ||
+         (I == idx2MBBMap.end() && idx2MBBMap.size()>0)) ? (I-1): I;
+
+      assert(J != idx2MBBMap.end() && J->first <= index &&
+             index < getMBBEndIdx(J->second) &&
+             "index does not correspond to an MBB");
+      return J->second;
+    }
+
+    bool findLiveInMBBs(SlotIndex start, SlotIndex end,
+                        SmallVectorImpl<MachineBasicBlock*> &mbbs) const {
+      std::vector<IdxMBBPair>::const_iterator itr =
+        std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), start);
+      bool resVal = false;
+
+      while (itr != idx2MBBMap.end()) {
+        if (itr->first >= end)
+          break;
+        mbbs.push_back(itr->second);
+        resVal = true;
+        ++itr;
+      }
+      return resVal;
+    }
+
+    /// Returns the MBB covering the given range, or null if the range covers
+    /// more than one basic block.
+    MachineBasicBlock* getMBBCoveringRange(SlotIndex start, SlotIndex end) const {
+
+      assert(start < end && "Backwards ranges not allowed.");
+
+      std::vector<IdxMBBPair>::const_iterator itr =
+        std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), start);
+
+      if (itr == idx2MBBMap.end()) {
+        itr = prior(itr);
+        return itr->second;
+      }
+
+      // Check that we don't cross the boundary into this block.
+      if (itr->first < end)
+        return 0;
+
+      itr = prior(itr);
+
+      if (itr->first <= start)
+        return itr->second;
+
+      return 0;
+    }
+
+    /// Insert the given machine instruction into the mapping. Returns the
+    /// assigned index.
+    SlotIndex insertMachineInstrInMaps(MachineInstr *mi) {
+      assert(mi2iMap.find(mi) == mi2iMap.end() && "Instr already indexed.");
+      // Numbering DBG_VALUE instructions could cause code generation to be
+      // affected by debug information.
+      assert(!mi->isDebugValue() && "Cannot number DBG_VALUE instructions.");
+
+      MachineBasicBlock *mbb = mi->getParent();
+
+      assert(mbb != 0 && "Instr must be added to function.");
+
+      MBB2IdxMap::iterator mbbRangeItr = mbb2IdxMap.find(mbb);
+
+      assert(mbbRangeItr != mbb2IdxMap.end() &&
+             "Instruction's parent MBB has not been added to SlotIndexes.");
+
+      MachineBasicBlock::iterator miItr(mi);
+      IndexListEntry *newEntry;
+      // Get previous index, considering that not all instructions are indexed.
+      IndexListEntry *prevEntry;
+      for (;;) {
+        // If mi is at the mbb beginning, get the prev index from the mbb.
+        if (miItr == mbb->begin()) {
+          prevEntry = &mbbRangeItr->second.first.entry();
+          break;
+        }
+        // Otherwise rewind until we find a mapped instruction.
+        Mi2IndexMap::const_iterator itr = mi2iMap.find(--miItr);
+        if (itr != mi2iMap.end()) {
+          prevEntry = &itr->second.entry();
+          break;
+        }
+      }
+
+      // Get next entry from previous entry.
+      IndexListEntry *nextEntry = prevEntry->getNext();
+
+      // Get a number for the new instr, or 0 if there's no room currently.
+      // In the latter case we'll force a renumber later.
+      unsigned dist = ((nextEntry->getIndex() - prevEntry->getIndex())/2) & ~3u;
+      unsigned newNumber = prevEntry->getIndex() + dist;
+
+      // Insert a new list entry for mi.
+      newEntry = createEntry(mi, newNumber);
+      insert(nextEntry, newEntry);
+
+      // Renumber locally if we need to.
+      if (dist == 0)
+        renumberIndexes(newEntry);
+
+      SlotIndex newIndex(newEntry, SlotIndex::LOAD);
+      mi2iMap.insert(std::make_pair(mi, newIndex));
+      return newIndex;
+    }
+
+    /// Remove the given machine instruction from the mapping.
+    void removeMachineInstrFromMaps(MachineInstr *mi) {
+      // remove index -> MachineInstr and
+      // MachineInstr -> index mappings
+      Mi2IndexMap::iterator mi2iItr = mi2iMap.find(mi);
+      if (mi2iItr != mi2iMap.end()) {
+        IndexListEntry *miEntry(&mi2iItr->second.entry());        
+        assert(miEntry->getInstr() == mi && "Instruction indexes broken.");
+        // FIXME: Eventually we want to actually delete these indexes.
+        miEntry->setInstr(0);
+        mi2iMap.erase(mi2iItr);
+      }
+    }
+
+    /// ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in
+    /// maps used by register allocator.
+    void replaceMachineInstrInMaps(MachineInstr *mi, MachineInstr *newMI) {
+      Mi2IndexMap::iterator mi2iItr = mi2iMap.find(mi);
+      if (mi2iItr == mi2iMap.end())
+        return;
+      SlotIndex replaceBaseIndex = mi2iItr->second;
+      IndexListEntry *miEntry(&replaceBaseIndex.entry());
+      assert(miEntry->getInstr() == mi &&
+             "Mismatched instruction in index tables.");
+      miEntry->setInstr(newMI);
+      mi2iMap.erase(mi2iItr);
+      mi2iMap.insert(std::make_pair(newMI, replaceBaseIndex));
+    }
+
+    /// Add the given MachineBasicBlock into the maps.
+    void insertMBBInMaps(MachineBasicBlock *mbb) {
+      MachineFunction::iterator nextMBB =
+        llvm::next(MachineFunction::iterator(mbb));
+      IndexListEntry *startEntry = createEntry(0, 0);
+      IndexListEntry *stopEntry = createEntry(0, 0);
+      IndexListEntry *nextEntry = 0;
+
+      if (nextMBB == mbb->getParent()->end()) {
+        nextEntry = getTail();
+      } else {
+        nextEntry = &getMBBStartIdx(nextMBB).entry();
+      }
+
+      insert(nextEntry, startEntry);
+      insert(nextEntry, stopEntry);
+
+      SlotIndex startIdx(startEntry, SlotIndex::LOAD);
+      SlotIndex endIdx(nextEntry, SlotIndex::LOAD);
+
+      mbb2IdxMap.insert(
+        std::make_pair(mbb, std::make_pair(startIdx, endIdx)));
+
+      idx2MBBMap.push_back(IdxMBBPair(startIdx, mbb));
+
+      if (MachineFunction::iterator(mbb) != mbb->getParent()->begin()) {
+        // Have to update the end index of the previous block.
+        MachineBasicBlock *priorMBB =
+          llvm::prior(MachineFunction::iterator(mbb));
+        mbb2IdxMap[priorMBB].second = startIdx;
+      }
+
+      renumberIndexes();
+      std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
+
+    }
+
+  };
+
+
+  // Specialize IntervalMapInfo for half-open slot index intervals.
+  template <typename> struct IntervalMapInfo;
+  template <> struct IntervalMapInfo<SlotIndex> {
+    static inline bool startLess(const SlotIndex &x, const SlotIndex &a) {
+      return x < a;
+    }
+    static inline bool stopLess(const SlotIndex &b, const SlotIndex &x) {
+      return b <= x;
+    }
+    static inline bool adjacent(const SlotIndex &a, const SlotIndex &b) {
+      return a == b;
+    }
+  };
+
+}
+
+#endif // LLVM_CODEGEN_LIVEINDEX_H 
diff --git a/final/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/final/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
new file mode 100644
index 00000000000..fba3e48c475
--- /dev/null
+++ b/final/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -0,0 +1,206 @@
+//==-- llvm/CodeGen/TargetLoweringObjectFileImpl.h - Object Info -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
+#define LLVM_CODEGEN_TARGETLOWERINGOBJECTFILEIMPL_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+  class MachineModuleInfo;
+  class Mangler;
+  class MCAsmInfo;
+  class MCExpr;
+  class MCSection;
+  class MCSectionMachO;
+  class MCSymbol;
+  class MCContext;
+  class GlobalValue;
+  class TargetMachine;
+
+
+class TargetLoweringObjectFileELF : public TargetLoweringObjectFile {
+protected:
+  /// TLSDataSection - Section directive for Thread Local data.
+  ///
+  const MCSection *TLSDataSection;        // Defaults to ".tdata".
+
+  /// TLSBSSSection - Section directive for Thread Local uninitialized data.
+  /// Null if this target doesn't support a BSS section.
+  ///
+  const MCSection *TLSBSSSection;         // Defaults to ".tbss".
+
+  const MCSection *DataRelSection;
+  const MCSection *DataRelLocalSection;
+  const MCSection *DataRelROSection;
+  const MCSection *DataRelROLocalSection;
+
+  const MCSection *MergeableConst4Section;
+  const MCSection *MergeableConst8Section;
+  const MCSection *MergeableConst16Section;
+public:
+  TargetLoweringObjectFileELF() {}
+  ~TargetLoweringObjectFileELF() {}
+
+  virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+  virtual const MCSection *getEHFrameSection() const;
+
+  const MCSection *getDataRelSection() const { return DataRelSection; }
+
+  /// getSectionForConstant - Given a constant with the SectionKind, return a
+  /// section that it should be placed in.
+  virtual const MCSection *getSectionForConstant(SectionKind Kind) const;
+
+
+  virtual const MCSection *
+  getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                           Mangler *Mang, const TargetMachine &TM) const;
+
+  virtual const MCSection *
+  SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const;
+
+  /// getExprForDwarfGlobalReference - Return an MCExpr to use for a reference
+  /// to the specified global variable from exception handling information.
+  ///
+  virtual const MCExpr *
+  getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                                 MachineModuleInfo *MMI, unsigned Encoding,
+                                 MCStreamer &Streamer) const;
+};
+
+
+
+class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile {
+  /// TLSDataSection - Section for thread local data.
+  ///
+  const MCSection *TLSDataSection;        // Defaults to ".tdata".
+
+  /// TLSBSSSection - Section for thread local uninitialized data.
+  ///
+  const MCSection *TLSBSSSection;         // Defaults to ".tbss".
+  
+  /// TLSTLVSection - Section for thread local structure infomation.
+  /// Contains the source code name of the variable, visibility and a pointer
+  /// to the initial value (.tdata or .tbss).
+  const MCSection *TLSTLVSection;         // Defaults to ".tlv".
+  
+  /// TLSThreadInitSection - Section for thread local data initialization
+  /// functions.
+  const MCSection *TLSThreadInitSection;  // Defaults to ".thread_init_func".
+  
+  const MCSection *CStringSection;
+  const MCSection *UStringSection;
+  const MCSection *TextCoalSection;
+  const MCSection *ConstTextCoalSection;
+  const MCSection *ConstDataSection;
+  const MCSection *DataCoalSection;
+  const MCSection *DataCommonSection;
+  const MCSection *DataBSSSection;
+  const MCSection *FourByteConstantSection;
+  const MCSection *EightByteConstantSection;
+  const MCSection *SixteenByteConstantSection;
+
+  const MCSection *LazySymbolPointerSection;
+  const MCSection *NonLazySymbolPointerSection;
+public:
+  TargetLoweringObjectFileMachO() {}
+  ~TargetLoweringObjectFileMachO() {}
+
+  virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+  virtual const MCSection *getEHFrameSection() const;
+
+  virtual const MCSection *
+  SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const;
+
+  virtual const MCSection *
+  getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                           Mangler *Mang, const TargetMachine &TM) const;
+
+  virtual const MCSection *getSectionForConstant(SectionKind Kind) const;
+
+  /// shouldEmitUsedDirectiveFor - This hook allows targets to selectively
+  /// decide not to emit the UsedDirective for some symbols in llvm.used.
+  /// FIXME: REMOVE this (rdar://7071300)
+  virtual bool shouldEmitUsedDirectiveFor(const GlobalValue *GV,
+                                          Mangler *) const;
+
+  /// getTextCoalSection - Return the "__TEXT,__textcoal_nt" section we put weak
+  /// text symbols into.
+  const MCSection *getTextCoalSection() const {
+    return TextCoalSection;
+  }
+
+  /// getConstTextCoalSection - Return the "__TEXT,__const_coal" section
+  /// we put weak read-only symbols into.
+  const MCSection *getConstTextCoalSection() const {
+    return ConstTextCoalSection;
+  }
+
+  /// getLazySymbolPointerSection - Return the section corresponding to
+  /// the .lazy_symbol_pointer directive.
+  const MCSection *getLazySymbolPointerSection() const {
+    return LazySymbolPointerSection;
+  }
+
+  /// getNonLazySymbolPointerSection - Return the section corresponding to
+  /// the .non_lazy_symbol_pointer directive.
+  const MCSection *getNonLazySymbolPointerSection() const {
+    return NonLazySymbolPointerSection;
+  }
+
+  /// getExprForDwarfGlobalReference - The mach-o version of this method
+  /// defaults to returning a stub reference.
+  virtual const MCExpr *
+  getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                                 MachineModuleInfo *MMI, unsigned Encoding,
+                                 MCStreamer &Streamer) const;
+
+  virtual unsigned getPersonalityEncoding() const;
+  virtual unsigned getLSDAEncoding() const;
+  virtual unsigned getFDEEncoding() const;
+  virtual unsigned getTTypeEncoding() const;
+};
+
+
+
+class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile {
+  const MCSection *DrectveSection;
+public:
+  TargetLoweringObjectFileCOFF() {}
+  ~TargetLoweringObjectFileCOFF() {}
+
+  virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+  virtual const MCSection *getEHFrameSection() const;
+
+  virtual const MCSection *getDrectveSection() const { return DrectveSection; }
+
+  virtual const MCSection *
+  getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                           Mangler *Mang, const TargetMachine &TM) const;
+
+  virtual const MCSection *
+  SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/CodeGen/ValueTypes.h b/final/include/llvm/CodeGen/ValueTypes.h
new file mode 100644
index 00000000000..22d16222078
--- /dev/null
+++ b/final/include/llvm/CodeGen/ValueTypes.h
@@ -0,0 +1,690 @@
+//===- CodeGen/ValueTypes.h - Low-Level Target independ. types --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the set of low-level target independent types which various
+// values in the code generator are.  This allows the target specific behavior
+// of instructions to be described to target independent passes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_VALUETYPES_H
+#define LLVM_CODEGEN_VALUETYPES_H
+
+#include <cassert>
+#include <string>
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/MathExtras.h"
+
+namespace llvm {
+  class Type;
+  class LLVMContext;
+  struct EVT;
+
+  /// MVT - Machine Value Type.  Every type that is supported natively by some
+  /// processor targeted by LLVM occurs here.  This means that any legal value
+  /// type can be represented by a MVT.
+  class MVT {
+  public:
+    enum SimpleValueType {
+      // If you change this numbering, you must change the values in
+      // ValueTypes.td as well!
+      Other          =   0,   // This is a non-standard value
+      i1             =   1,   // This is a 1 bit integer value
+      i8             =   2,   // This is an 8 bit integer value
+      i16            =   3,   // This is a 16 bit integer value
+      i32            =   4,   // This is a 32 bit integer value
+      i64            =   5,   // This is a 64 bit integer value
+      i128           =   6,   // This is a 128 bit integer value
+
+      FIRST_INTEGER_VALUETYPE = i1,
+      LAST_INTEGER_VALUETYPE  = i128,
+
+      f32            =   7,   // This is a 32 bit floating point value
+      f64            =   8,   // This is a 64 bit floating point value
+      f80            =   9,   // This is a 80 bit floating point value
+      f128           =  10,   // This is a 128 bit floating point value
+      ppcf128        =  11,   // This is a PPC 128-bit floating point value
+
+      v2i8           =  12,   //  2 x i8
+      v4i8           =  13,   //  4 x i8
+      v8i8           =  14,   //  8 x i8
+      v16i8          =  15,   // 16 x i8
+      v32i8          =  16,   // 32 x i8
+      v2i16          =  17,   //  2 x i16
+      v4i16          =  18,   //  4 x i16
+      v8i16          =  19,   //  8 x i16
+      v16i16         =  20,   // 16 x i16
+      v2i32          =  21,   //  2 x i32
+      v4i32          =  22,   //  4 x i32
+      v8i32          =  23,   //  8 x i32
+      v1i64          =  24,   //  1 x i64
+      v2i64          =  25,   //  2 x i64
+      v4i64          =  26,   //  4 x i64
+      v8i64          =  27,   //  8 x i64
+
+      v2f32          =  28,   //  2 x f32
+      v4f32          =  29,   //  4 x f32
+      v8f32          =  30,   //  8 x f32
+      v2f64          =  31,   //  2 x f64
+      v4f64          =  32,   //  4 x f64
+
+      FIRST_VECTOR_VALUETYPE = v2i8,
+      LAST_VECTOR_VALUETYPE  = v4f64,
+
+      x86mmx         =  33,   // This is an X86 MMX value
+
+      Glue           =  34,   // This glues nodes together during pre-RA sched
+
+      isVoid         =  35,   // This has no value
+
+      LAST_VALUETYPE =  36,   // This always remains at the end of the list.
+
+      // This is the current maximum for LAST_VALUETYPE.
+      // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
+      // This value must be a multiple of 32.
+      MAX_ALLOWED_VALUETYPE = 64,
+
+      // Metadata - This is MDNode or MDString.
+      Metadata       = 250,
+
+      // iPTRAny - An int value the size of the pointer of the current
+      // target to any address space. This must only be used internal to
+      // tblgen. Other than for overloading, we treat iPTRAny the same as iPTR.
+      iPTRAny        = 251,
+
+      // vAny - A vector with any length and element size. This is used
+      // for intrinsics that have overloadings based on vector types.
+      // This is only for tblgen's consumption!
+      vAny           = 252,
+
+      // fAny - Any floating-point or vector floating-point value. This is used
+      // for intrinsics that have overloadings based on floating-point types.
+      // This is only for tblgen's consumption!
+      fAny           = 253,
+
+      // iAny - An integer or vector integer value of any bit width. This is
+      // used for intrinsics that have overloadings based on integer bit widths.
+      // This is only for tblgen's consumption!
+      iAny           = 254,
+
+      // iPTR - An int value the size of the pointer of the current
+      // target.  This should only be used internal to tblgen!
+      iPTR           = 255,
+
+      // LastSimpleValueType - The greatest valid SimpleValueType value.
+      LastSimpleValueType = 255,
+
+      // INVALID_SIMPLE_VALUE_TYPE - Simple value types greater than or equal
+      // to this are considered extended value types.
+      INVALID_SIMPLE_VALUE_TYPE = LastSimpleValueType + 1
+    };
+
+    SimpleValueType SimpleTy;
+
+    MVT() : SimpleTy((SimpleValueType)(INVALID_SIMPLE_VALUE_TYPE)) {}
+    MVT(SimpleValueType SVT) : SimpleTy(SVT) { }
+
+    bool operator>(const MVT& S)  const { return SimpleTy >  S.SimpleTy; }
+    bool operator<(const MVT& S)  const { return SimpleTy <  S.SimpleTy; }
+    bool operator==(const MVT& S) const { return SimpleTy == S.SimpleTy; }
+    bool operator!=(const MVT& S) const { return SimpleTy != S.SimpleTy; }
+    bool operator>=(const MVT& S) const { return SimpleTy >= S.SimpleTy; }
+    bool operator<=(const MVT& S) const { return SimpleTy <= S.SimpleTy; }
+
+    /// isFloatingPoint - Return true if this is a FP, or a vector FP type.
+    bool isFloatingPoint() const {
+      return ((SimpleTy >= MVT::f32 && SimpleTy <= MVT::ppcf128) ||
+        (SimpleTy >= MVT::v2f32 && SimpleTy <= MVT::v4f64));
+    }
+
+    /// isInteger - Return true if this is an integer, or a vector integer type.
+    bool isInteger() const {
+      return ((SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE &&
+               SimpleTy <= MVT::LAST_INTEGER_VALUETYPE) ||
+               (SimpleTy >= MVT::v2i8 && SimpleTy <= MVT::v8i64));
+    }
+
+    /// isVector - Return true if this is a vector value type.
+    bool isVector() const {
+      return (SimpleTy >= MVT::FIRST_VECTOR_VALUETYPE &&
+              SimpleTy <= MVT::LAST_VECTOR_VALUETYPE);
+    }
+
+    /// isPow2VectorType - Returns true if the given vector is a power of 2.
+    bool isPow2VectorType() const {
+      unsigned NElts = getVectorNumElements();
+      return !(NElts & (NElts - 1));
+    }
+
+    /// getPow2VectorType - Widens the length of the given vector MVT up to
+    /// the nearest power of 2 and returns that type.
+    MVT getPow2VectorType() const {
+      if (isPow2VectorType())
+        return *this;
+
+      unsigned NElts = getVectorNumElements();
+      unsigned Pow2NElts = 1 << Log2_32_Ceil(NElts);
+      return MVT::getVectorVT(getVectorElementType(), Pow2NElts);
+    }
+
+    /// getScalarType - If this is a vector type, return the element type,
+    /// otherwise return this.
+    MVT getScalarType() const {
+      return isVector() ? getVectorElementType() : *this;
+    }
+
+    MVT getVectorElementType() const {
+      switch (SimpleTy) {
+      default:
+        return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
+      case v2i8 :
+      case v4i8 :
+      case v8i8 :
+      case v16i8:
+      case v32i8: return i8;
+      case v2i16:
+      case v4i16:
+      case v8i16:
+      case v16i16: return i16;
+      case v2i32:
+      case v4i32:
+      case v8i32: return i32;
+      case v1i64:
+      case v2i64:
+      case v4i64:
+      case v8i64: return i64;
+      case v2f32:
+      case v4f32:
+      case v8f32: return f32;
+      case v2f64:
+      case v4f64: return f64;
+      }
+    }
+
+    unsigned getVectorNumElements() const {
+      switch (SimpleTy) {
+      default:
+        return ~0U;
+      case v32i8: return 32;
+      case v16i8:
+      case v16i16: return 16;
+      case v8i8 :
+      case v8i16:
+      case v8i32:
+      case v8i64:
+      case v8f32: return 8;
+      case v4i8:
+      case v4i16:
+      case v4i32:
+      case v4i64:
+      case v4f32:
+      case v4f64: return 4;
+      case v2i8:
+      case v2i16:
+      case v2i32:
+      case v2i64:
+      case v2f32:
+      case v2f64: return 2;
+      case v1i64: return 1;
+      }
+    }
+
+    unsigned getSizeInBits() const {
+      switch (SimpleTy) {
+      case iPTR:
+        assert(0 && "Value type size is target-dependent. Ask TLI.");
+      case iPTRAny:
+      case iAny:
+      case fAny:
+        assert(0 && "Value type is overloaded.");
+      default:
+        assert(0 && "getSizeInBits called on extended MVT.");
+      case i1  :  return 1;
+      case i8  :  return 8;
+      case i16 :
+      case v2i8:  return 16;
+      case f32 :
+      case i32 :
+      case v4i8:
+      case v2i16: return 32;
+      case x86mmx:
+      case f64 :
+      case i64 :
+      case v8i8:
+      case v4i16:
+      case v2i32:
+      case v1i64:
+      case v2f32: return 64;
+      case f80 :  return 80;
+      case f128:
+      case ppcf128:
+      case i128:
+      case v16i8:
+      case v8i16:
+      case v4i32:
+      case v2i64:
+      case v4f32:
+      case v2f64: return 128;
+      case v32i8:
+      case v16i16:
+      case v8i32:
+      case v4i64:
+      case v8f32:
+      case v4f64: return 256;
+      case v8i64: return 512;
+      }
+    }
+
+    /// getStoreSize - Return the number of bytes overwritten by a store
+    /// of the specified value type.
+    unsigned getStoreSize() const {
+      return (getSizeInBits() + 7) / 8;
+    }
+
+    /// getStoreSizeInBits - Return the number of bits overwritten by a store
+    /// of the specified value type.
+    unsigned getStoreSizeInBits() const {
+      return getStoreSize() * 8;
+    }
+
+    static MVT getFloatingPointVT(unsigned BitWidth) {
+      switch (BitWidth) {
+      default:
+        assert(false && "Bad bit width!");
+      case 32:
+        return MVT::f32;
+      case 64:
+        return MVT::f64;
+      case 80:
+        return MVT::f80;
+      case 128:
+        return MVT::f128;
+      }
+    }
+
+    static MVT getIntegerVT(unsigned BitWidth) {
+      switch (BitWidth) {
+      default:
+        return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
+      case 1:
+        return MVT::i1;
+      case 8:
+        return MVT::i8;
+      case 16:
+        return MVT::i16;
+      case 32:
+        return MVT::i32;
+      case 64:
+        return MVT::i64;
+      case 128:
+        return MVT::i128;
+      }
+    }
+
+    static MVT getVectorVT(MVT VT, unsigned NumElements) {
+      switch (VT.SimpleTy) {
+      default:
+        break;
+      case MVT::i8:
+        if (NumElements == 2)  return MVT::v2i8;
+        if (NumElements == 4)  return MVT::v4i8;
+        if (NumElements == 8)  return MVT::v8i8;
+        if (NumElements == 16) return MVT::v16i8;
+        if (NumElements == 32) return MVT::v32i8;
+        break;
+      case MVT::i16:
+        if (NumElements == 2)  return MVT::v2i16;
+        if (NumElements == 4)  return MVT::v4i16;
+        if (NumElements == 8)  return MVT::v8i16;
+        if (NumElements == 16) return MVT::v16i16;
+        break;
+      case MVT::i32:
+        if (NumElements == 2)  return MVT::v2i32;
+        if (NumElements == 4)  return MVT::v4i32;
+        if (NumElements == 8)  return MVT::v8i32;
+        break;
+      case MVT::i64:
+        if (NumElements == 1)  return MVT::v1i64;
+        if (NumElements == 2)  return MVT::v2i64;
+        if (NumElements == 4)  return MVT::v4i64;
+        if (NumElements == 8)  return MVT::v8i64;
+        break;
+      case MVT::f32:
+        if (NumElements == 2)  return MVT::v2f32;
+        if (NumElements == 4)  return MVT::v4f32;
+        if (NumElements == 8)  return MVT::v8f32;
+        break;
+      case MVT::f64:
+        if (NumElements == 2)  return MVT::v2f64;
+        if (NumElements == 4)  return MVT::v4f64;
+        break;
+      }
+      return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
+    }
+  };
+
+
+  /// EVT - Extended Value Type.  Capable of holding value types which are not
+  /// native for any processor (such as the i12345 type), as well as the types
+  /// a MVT can represent.
+  struct EVT {
+  private:
+    MVT V;
+    const Type *LLVMTy;
+
+  public:
+    EVT() : V((MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE)),
+            LLVMTy(0) {}
+    EVT(MVT::SimpleValueType SVT) : V(SVT), LLVMTy(0) { }
+    EVT(MVT S) : V(S), LLVMTy(0) {}
+
+    bool operator==(EVT VT) const {
+      return !(*this != VT);
+    }
+    bool operator!=(EVT VT) const {
+      if (V.SimpleTy != VT.V.SimpleTy)
+        return true;
+      if (V.SimpleTy == MVT::INVALID_SIMPLE_VALUE_TYPE)
+        return LLVMTy != VT.LLVMTy;
+      return false;
+    }
+
+    /// getFloatingPointVT - Returns the EVT that represents a floating point
+    /// type with the given number of bits.  There are two floating point types
+    /// with 128 bits - this returns f128 rather than ppcf128.
+    static EVT getFloatingPointVT(unsigned BitWidth) {
+      return MVT::getFloatingPointVT(BitWidth);
+    }
+
+    /// getIntegerVT - Returns the EVT that represents an integer with the given
+    /// number of bits.
+    static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth) {
+      MVT M = MVT::getIntegerVT(BitWidth);
+      if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
+        return M;
+      return getExtendedIntegerVT(Context, BitWidth);
+    }
+
+    /// getVectorVT - Returns the EVT that represents a vector NumElements in
+    /// length, where each element is of type VT.
+    static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements) {
+      MVT M = MVT::getVectorVT(VT.V, NumElements);
+      if (M.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE)
+        return M;
+      return getExtendedVectorVT(Context, VT, NumElements);
+    }
+
+    /// getIntVectorWithNumElements - Return any integer vector type that has
+    /// the specified number of elements.
+    static EVT getIntVectorWithNumElements(LLVMContext &C, unsigned NumElts) {
+      switch (NumElts) {
+      default: return getVectorVT(C, MVT::i8, NumElts);
+      case  1: return MVT::v1i64;
+      case  2: return MVT::v2i32;
+      case  4: return MVT::v4i16;
+      case  8: return MVT::v8i8;
+      case 16: return MVT::v16i8;
+      }
+      return MVT::INVALID_SIMPLE_VALUE_TYPE;
+    }
+
+    /// isSimple - Test if the given EVT is simple (as opposed to being
+    /// extended).
+    bool isSimple() const {
+      return V.SimpleTy <= MVT::LastSimpleValueType;
+    }
+
+    /// isExtended - Test if the given EVT is extended (as opposed to
+    /// being simple).
+    bool isExtended() const {
+      return !isSimple();
+    }
+
+    /// isFloatingPoint - Return true if this is a FP, or a vector FP type.
+    bool isFloatingPoint() const {
+      return isSimple() ? V.isFloatingPoint() : isExtendedFloatingPoint();
+    }
+
+    /// isInteger - Return true if this is an integer, or a vector integer type.
+    bool isInteger() const {
+      return isSimple() ? V.isInteger() : isExtendedInteger();
+    }
+
+    /// isVector - Return true if this is a vector value type.
+    bool isVector() const {
+      return isSimple() ? V.isVector() : isExtendedVector();
+    }
+
+    /// is64BitVector - Return true if this is a 64-bit vector type.
+    bool is64BitVector() const {
+      if (!isSimple())
+        return isExtended64BitVector();
+
+      return (V == MVT::v8i8  || V==MVT::v4i16 || V==MVT::v2i32 ||
+              V == MVT::v1i64 || V==MVT::v2f32);
+    }
+
+    /// is128BitVector - Return true if this is a 128-bit vector type.
+    bool is128BitVector() const {
+      if (!isSimple())
+        return isExtended128BitVector();
+      return (V==MVT::v16i8 || V==MVT::v8i16 || V==MVT::v4i32 ||
+              V==MVT::v2i64 || V==MVT::v4f32 || V==MVT::v2f64);
+    }
+
+    /// is256BitVector - Return true if this is a 256-bit vector type.
+    inline bool is256BitVector() const {
+      if (!isSimple())
+        return isExtended256BitVector();
+      return (V == MVT::v8f32  || V == MVT::v4f64 || V == MVT::v32i8 ||
+              V == MVT::v16i16 || V == MVT::v8i32 || V == MVT::v4i64);
+    }
+
+    /// is512BitVector - Return true if this is a 512-bit vector type.
+    inline bool is512BitVector() const {
+      return isSimple() ? (V == MVT::v8i64) : isExtended512BitVector();
+    }
+
+    /// isOverloaded - Return true if this is an overloaded type for TableGen.
+    bool isOverloaded() const {
+      return (V==MVT::iAny || V==MVT::fAny || V==MVT::vAny || V==MVT::iPTRAny);
+    }
+
+    /// isByteSized - Return true if the bit size is a multiple of 8.
+    bool isByteSized() const {
+      return (getSizeInBits() & 7) == 0;
+    }
+
+    /// isRound - Return true if the size is a power-of-two number of bytes.
+    bool isRound() const {
+      unsigned BitSize = getSizeInBits();
+      return BitSize >= 8 && !(BitSize & (BitSize - 1));
+    }
+
+    /// bitsEq - Return true if this has the same number of bits as VT.
+    bool bitsEq(EVT VT) const {
+      if (EVT::operator==(VT)) return true;
+      return getSizeInBits() == VT.getSizeInBits();
+    }
+
+    /// bitsGT - Return true if this has more bits than VT.
+    bool bitsGT(EVT VT) const {
+      if (EVT::operator==(VT)) return false;
+      return getSizeInBits() > VT.getSizeInBits();
+    }
+
+    /// bitsGE - Return true if this has no less bits than VT.
+    bool bitsGE(EVT VT) const {
+      if (EVT::operator==(VT)) return true;
+      return getSizeInBits() >= VT.getSizeInBits();
+    }
+
+    /// bitsLT - Return true if this has less bits than VT.
+    bool bitsLT(EVT VT) const {
+      if (EVT::operator==(VT)) return false;
+      return getSizeInBits() < VT.getSizeInBits();
+    }
+
+    /// bitsLE - Return true if this has no more bits than VT.
+    bool bitsLE(EVT VT) const {
+      if (EVT::operator==(VT)) return true;
+      return getSizeInBits() <= VT.getSizeInBits();
+    }
+
+
+    /// getSimpleVT - Return the SimpleValueType held in the specified
+    /// simple EVT.
+    MVT getSimpleVT() const {
+      assert(isSimple() && "Expected a SimpleValueType!");
+      return V;
+    }
+
+    /// getScalarType - If this is a vector type, return the element type,
+    /// otherwise return this.
+    EVT getScalarType() const {
+      return isVector() ? getVectorElementType() : *this;
+    }
+
+    /// getVectorElementType - Given a vector type, return the type of
+    /// each element.
+    EVT getVectorElementType() const {
+      assert(isVector() && "Invalid vector type!");
+      if (isSimple())
+        return V.getVectorElementType();
+      return getExtendedVectorElementType();
+    }
+
+    /// getVectorNumElements - Given a vector type, return the number of
+    /// elements it contains.
+    unsigned getVectorNumElements() const {
+      assert(isVector() && "Invalid vector type!");
+      if (isSimple())
+        return V.getVectorNumElements();
+      return getExtendedVectorNumElements();
+    }
+
+    /// getSizeInBits - Return the size of the specified value type in bits.
+    unsigned getSizeInBits() const {
+      if (isSimple())
+        return V.getSizeInBits();
+      return getExtendedSizeInBits();
+    }
+
+    /// getStoreSize - Return the number of bytes overwritten by a store
+    /// of the specified value type.
+    unsigned getStoreSize() const {
+      return (getSizeInBits() + 7) / 8;
+    }
+
+    /// getStoreSizeInBits - Return the number of bits overwritten by a store
+    /// of the specified value type.
+    unsigned getStoreSizeInBits() const {
+      return getStoreSize() * 8;
+    }
+
+    /// getRoundIntegerType - Rounds the bit-width of the given integer EVT up
+    /// to the nearest power of two (and at least to eight), and returns the
+    /// integer EVT with that number of bits.
+    EVT getRoundIntegerType(LLVMContext &Context) const {
+      assert(isInteger() && !isVector() && "Invalid integer type!");
+      unsigned BitWidth = getSizeInBits();
+      if (BitWidth <= 8)
+        return EVT(MVT::i8);
+      return getIntegerVT(Context, 1 << Log2_32_Ceil(BitWidth));
+    }
+
+    /// getHalfSizedIntegerVT - Finds the smallest simple value type that is
+    /// greater than or equal to half the width of this EVT. If no simple
+    /// value type can be found, an extended integer value type of half the
+    /// size (rounded up) is returned.
+    EVT getHalfSizedIntegerVT(LLVMContext &Context) const {
+      assert(isInteger() && !isVector() && "Invalid integer type!");
+      unsigned EVTSize = getSizeInBits();
+      for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE;
+          IntVT <= MVT::LAST_INTEGER_VALUETYPE; ++IntVT) {
+        EVT HalfVT = EVT((MVT::SimpleValueType)IntVT);
+        if (HalfVT.getSizeInBits() * 2 >= EVTSize)
+          return HalfVT;
+      }
+      return getIntegerVT(Context, (EVTSize + 1) / 2);
+    }
+
+    /// isPow2VectorType - Returns true if the given vector is a power of 2.
+    bool isPow2VectorType() const {
+      unsigned NElts = getVectorNumElements();
+      return !(NElts & (NElts - 1));
+    }
+
+    /// getPow2VectorType - Widens the length of the given vector EVT up to
+    /// the nearest power of 2 and returns that type.
+    EVT getPow2VectorType(LLVMContext &Context) const {
+      if (!isPow2VectorType()) {
+        unsigned NElts = getVectorNumElements();
+        unsigned Pow2NElts = 1 <<  Log2_32_Ceil(NElts);
+        return EVT::getVectorVT(Context, getVectorElementType(), Pow2NElts);
+      }
+      else {
+        return *this;
+      }
+    }
+
+    /// getEVTString - This function returns value type as a string,
+    /// e.g. "i32".
+    std::string getEVTString() const;
+
+    /// getTypeForEVT - This method returns an LLVM type corresponding to the
+    /// specified EVT.  For integer types, this returns an unsigned type.  Note
+    /// that this will abort for types that cannot be represented.
+    const Type *getTypeForEVT(LLVMContext &Context) const;
+
+    /// getEVT - Return the value type corresponding to the specified type.
+    /// This returns all pointers as iPTR.  If HandleUnknown is true, unknown
+    /// types are returned as Other, otherwise they are invalid.
+    static EVT getEVT(const Type *Ty, bool HandleUnknown = false);
+
+    intptr_t getRawBits() {
+      if (isSimple())
+        return V.SimpleTy;
+      else
+        return (intptr_t)(LLVMTy);
+    }
+
+    /// compareRawBits - A meaningless but well-behaved order, useful for
+    /// constructing containers.
+    struct compareRawBits {
+      bool operator()(EVT L, EVT R) const {
+        if (L.V.SimpleTy == R.V.SimpleTy)
+          return L.LLVMTy < R.LLVMTy;
+        else
+          return L.V.SimpleTy < R.V.SimpleTy;
+      }
+    };
+
+  private:
+    // Methods for handling the Extended-type case in functions above.
+    // These are all out-of-line to prevent users of this header file
+    // from having a dependency on Type.h.
+    static EVT getExtendedIntegerVT(LLVMContext &C, unsigned BitWidth);
+    static EVT getExtendedVectorVT(LLVMContext &C, EVT VT,
+                                   unsigned NumElements);
+    bool isExtendedFloatingPoint() const;
+    bool isExtendedInteger() const;
+    bool isExtendedVector() const;
+    bool isExtended64BitVector() const;
+    bool isExtended128BitVector() const;
+    bool isExtended256BitVector() const;
+    bool isExtended512BitVector() const;
+    EVT getExtendedVectorElementType() const;
+    unsigned getExtendedVectorNumElements() const;
+    unsigned getExtendedSizeInBits() const;
+  };
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/CodeGen/ValueTypes.td b/final/include/llvm/CodeGen/ValueTypes.td
new file mode 100644
index 00000000000..a1163f7a2f9
--- /dev/null
+++ b/final/include/llvm/CodeGen/ValueTypes.td
@@ -0,0 +1,78 @@
+//===- ValueTypes.td - ValueType definitions ---------------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// Value types - These values correspond to the register types defined in the
+// ValueTypes.h file.  If you update anything here, you must update it there as
+// well!
+//
+//===----------------------------------------------------------------------===//
+
+class ValueType<int size, int value> {
+  string Namespace = "MVT";
+  int Size = size;
+  int Value = value;
+}
+
+def OtherVT: ValueType<0  ,  0>;   // "Other" value
+def i1     : ValueType<1  ,  1>;   // One bit boolean value
+def i8     : ValueType<8  ,  2>;   // 8-bit integer value
+def i16    : ValueType<16 ,  3>;   // 16-bit integer value
+def i32    : ValueType<32 ,  4>;   // 32-bit integer value
+def i64    : ValueType<64 ,  5>;   // 64-bit integer value
+def i128   : ValueType<128,  6>;   // 128-bit integer value
+def f32    : ValueType<32 ,  7>;   // 32-bit floating point value
+def f64    : ValueType<64 ,  8>;   // 64-bit floating point value
+def f80    : ValueType<80 ,  9>;   // 80-bit floating point value
+def f128   : ValueType<128, 10>;   // 128-bit floating point value
+def ppcf128: ValueType<128, 11>;   // PPC 128-bit floating point value
+
+def v2i8   : ValueType<16 , 12>;   //  2 x i8  vector value
+def v4i8   : ValueType<32 , 13>;   //  4 x i8  vector value
+def v8i8   : ValueType<64 , 14>;   //  8 x i8  vector value
+def v16i8  : ValueType<128, 15>;   // 16 x i8  vector value
+def v32i8  : ValueType<256, 16>;   // 32 x i8 vector value
+def v2i16  : ValueType<32 , 17>;   //  2 x i16 vector value
+def v4i16  : ValueType<64 , 18>;   //  4 x i16 vector value
+def v8i16  : ValueType<128, 19>;   //  8 x i16 vector value
+def v16i16 : ValueType<256, 20>;   // 16 x i16 vector value
+def v2i32  : ValueType<64 , 21>;   //  2 x i32 vector value
+def v4i32  : ValueType<128, 22>;   //  4 x i32 vector value
+def v8i32  : ValueType<256, 23>;   //  8 x i32 vector value
+def v1i64  : ValueType<64 , 24>;   //  1 x i64 vector value
+def v2i64  : ValueType<128, 25>;   //  2 x i64 vector value
+def v4i64  : ValueType<256, 26>;   //  4 x i64 vector value
+def v8i64  : ValueType<512, 27>;   //  8 x i64 vector value
+
+def v2f32  : ValueType<64 , 28>;   //  2 x f32 vector value
+def v4f32  : ValueType<128, 29>;   //  4 x f32 vector value
+def v8f32  : ValueType<256, 30>;   //  8 x f32 vector value
+def v2f64  : ValueType<128, 31>;   //  2 x f64 vector value
+def v4f64  : ValueType<256, 32>;   //  4 x f64 vector value
+
+def x86mmx : ValueType<64 , 33>;   // X86 MMX value
+def FlagVT : ValueType<0  , 34>;   // Pre-RA sched glue
+def isVoid : ValueType<0  , 35>;   // Produces no value
+
+def MetadataVT: ValueType<0, 250>; // Metadata
+
+// Pseudo valuetype mapped to the current pointer size to any address space.
+// Should only be used in TableGen.
+def iPTRAny   : ValueType<0, 251>;
+
+// Pseudo valuetype to represent "vector of any size"
+def vAny   : ValueType<0  , 252>;
+
+// Pseudo valuetype to represent "float of any format"
+def fAny   : ValueType<0  , 253>;
+
+// Pseudo valuetype to represent "integer of any bit width"
+def iAny   : ValueType<0  , 254>;
+
+// Pseudo valuetype mapped to the current pointer size.
+def iPTR   : ValueType<0  , 255>;
diff --git a/final/include/llvm/CompilerDriver/Action.h b/final/include/llvm/CompilerDriver/Action.h
new file mode 100644
index 00000000000..f2b79655f60
--- /dev/null
+++ b/final/include/llvm/CompilerDriver/Action.h
@@ -0,0 +1,54 @@
+//===--- Action.h - The LLVM Compiler Driver --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Action - encapsulates a single shell command.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INCLUDE_COMPILER_DRIVER_ACTION_H
+#define LLVM_INCLUDE_COMPILER_DRIVER_ACTION_H
+
+#include <string>
+#include <vector>
+
+namespace llvmc {
+
+  typedef std::vector<std::string> StrVector;
+
+  /// Action - A class that encapsulates a single shell command.
+  class Action {
+    /// Command_ - The actual command (for example, 'ls').
+    std::string Command_;
+    /// Args_ - Command arguments. Stdout redirection ("> file") is allowed.
+    std::vector<std::string> Args_;
+    /// StopCompilation_ - Should we stop compilation after executing
+    /// this action?
+    bool StopCompilation_;
+    /// OutFile_ - The output file name.
+    std::string OutFile_;
+
+  public:
+    void Construct (const std::string& C, const StrVector& A,
+                    bool S, const std::string& O) {
+      Command_ = C;
+      Args_ = A;
+      StopCompilation_ = S;
+      OutFile_ = O;
+    }
+    bool IsConstructed () { return (Command_.size() != 0);}
+
+    /// Execute - Executes the command. Returns -1 on error.
+    int Execute () const;
+    bool StopCompilation () const { return StopCompilation_; }
+    const std::string& OutFile() { return OutFile_; }
+  };
+
+}
+
+#endif // LLVM_INCLUDE_COMPILER_DRIVER_ACTION_H
diff --git a/final/include/llvm/CompilerDriver/AutoGenerated.h b/final/include/llvm/CompilerDriver/AutoGenerated.h
new file mode 100644
index 00000000000..7b926c622c9
--- /dev/null
+++ b/final/include/llvm/CompilerDriver/AutoGenerated.h
@@ -0,0 +1,40 @@
+//===--- AutoGenerated.h - The LLVM Compiler Driver -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Interface to the autogenerated driver code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INCLUDE_COMPILER_DRIVER_AUTOGENERATED_H
+#define LLVM_INCLUDE_COMPILER_DRIVER_AUTOGENERATED_H
+
+namespace llvmc {
+  class LanguageMap;
+  class CompilationGraph;
+
+  namespace autogenerated {
+
+    int PreprocessOptions();
+    int PopulateLanguageMap(LanguageMap& langMap);
+    int PopulateCompilationGraph(CompilationGraph& graph);
+
+    inline int RunInitialization (LanguageMap& M, CompilationGraph& G) {
+      if (int ret = PreprocessOptions())
+        return ret;
+      if (int ret = PopulateLanguageMap(M))
+        return ret;
+      if (int ret = PopulateCompilationGraph(G))
+        return ret;
+
+      return 0;
+    }
+  }
+}
+
+#endif // LLVM_INCLUDE_COMPILER_DRIVER_AUTOGENERATED_H
diff --git a/final/include/llvm/CompilerDriver/BuiltinOptions.h b/final/include/llvm/CompilerDriver/BuiltinOptions.h
new file mode 100644
index 00000000000..7b9c15c52f7
--- /dev/null
+++ b/final/include/llvm/CompilerDriver/BuiltinOptions.h
@@ -0,0 +1,39 @@
+//===--- BuiltinOptions.h - The LLVM Compiler Driver ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Declarations of all global command-line option variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INCLUDE_COMPILER_DRIVER_BUILTIN_OPTIONS_H
+#define LLVM_INCLUDE_COMPILER_DRIVER_BUILTIN_OPTIONS_H
+
+#include "llvm/Support/CommandLine.h"
+
+#include <string>
+
+namespace llvmc {
+
+namespace SaveTempsEnum { enum Values { Cwd, Obj, Unset }; }
+
+extern llvm::cl::list<std::string> InputFilenames;
+extern llvm::cl::opt<std::string> OutputFilename;
+extern llvm::cl::opt<std::string> TempDirname;
+extern llvm::cl::list<std::string> Languages;
+extern llvm::cl::opt<bool> DryRun;
+extern llvm::cl::opt<bool> Time;
+extern llvm::cl::opt<bool> VerboseMode;
+extern llvm::cl::opt<bool> CheckGraph;
+extern llvm::cl::opt<bool> ViewGraph;
+extern llvm::cl::opt<bool> WriteGraph;
+extern llvm::cl::opt<SaveTempsEnum::Values> SaveTemps;
+
+} // End namespace llvmc.
+
+#endif // LLVM_INCLUDE_COMPILER_DRIVER_BUILTIN_OPTIONS_H
diff --git a/final/include/llvm/CompilerDriver/Common.td b/final/include/llvm/CompilerDriver/Common.td
new file mode 100644
index 00000000000..84e8783d20a
--- /dev/null
+++ b/final/include/llvm/CompilerDriver/Common.td
@@ -0,0 +1,124 @@
+//===- Common.td - Common definitions for LLVMC2  ----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains common definitions used in llvmc tool description files.
+//
+//===----------------------------------------------------------------------===//
+
+class Tool<list<dag> l> {
+      list<dag> properties = l;
+}
+
+// Possible Tool properties.
+
+def in_language;
+def out_language;
+def output_suffix;
+def command;
+def out_file_option;
+def in_file_option;
+def join;
+def sink;
+def works_on_empty;
+def actions;
+
+// Possible option types.
+
+def alias_option;
+def switch_option;
+def switch_list_option;
+def parameter_option;
+def parameter_list_option;
+def prefix_option;
+def prefix_list_option;
+
+// Possible option properties.
+
+def help;
+def hidden;
+def init;
+def multi_val;
+def one_or_more;
+def zero_or_more;
+def optional;
+def really_hidden;
+def required;
+def comma_separated;
+def forward_not_split;
+
+// The 'case' construct.
+def case;
+
+// Boolean constants.
+def true;
+def false;
+
+// Boolean operators.
+def and;
+def or;
+def not;
+
+// Primitive tests.
+def switch_on;
+def parameter_equals;
+def element_in_list;
+def input_languages_contain;
+def empty;
+def not_empty;
+def default;
+def single_input_file;
+def multiple_input_files;
+def any_switch_on;
+def any_not_empty;
+def any_empty;
+
+// Possible actions.
+
+def append_cmd;
+def forward;
+def forward_as;
+def forward_value;
+def forward_transformed_value;
+def stop_compilation;
+def no_out_file;
+def unpack_values;
+def warning;
+def error;
+def set_option;
+def unset_option;
+
+// Increase the edge weight.
+def inc_weight;
+
+// Option list - a single place to specify options.
+class OptionList<list<dag> l> {
+      list<dag> options = l;
+}
+
+// Option preprocessor - actions taken during plugin loading.
+class OptionPreprocessor<dag d> {
+      dag preprocessor = d;
+}
+
+// Map from suffixes to language names
+
+def lang_to_suffixes;
+
+class LanguageMap<list<dag> l> {
+      list<dag> map = l;
+}
+
+// Compilation graph
+
+def edge;
+def optional_edge;
+
+class CompilationGraph<list<dag> l> {
+      list<dag> edges = l;
+}
diff --git a/final/include/llvm/CompilerDriver/CompilationGraph.h b/final/include/llvm/CompilerDriver/CompilationGraph.h
new file mode 100644
index 00000000000..e1eea325e34
--- /dev/null
+++ b/final/include/llvm/CompilerDriver/CompilationGraph.h
@@ -0,0 +1,330 @@
+//===--- CompilationGraph.h - The LLVM Compiler Driver ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Compilation graph - definition.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INCLUDE_COMPILER_DRIVER_COMPILATION_GRAPH_H
+#define LLVM_INCLUDE_COMPILER_DRIVER_COMPILATION_GRAPH_H
+
+#include "llvm/CompilerDriver/Tool.h"
+
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Path.h"
+
+#include <cassert>
+#include <string>
+
+namespace llvmc {
+
+  class CompilationGraph;
+  typedef llvm::StringSet<> InputLanguagesSet;
+
+  /// LanguageMap - Maps from extensions to language names.
+  class LanguageMap : public llvm::StringMap<std::string> {
+  public:
+
+    /// GetLanguage -  Find the language name corresponding to a given file.
+    const std::string* GetLanguage(const llvm::sys::Path&) const;
+  };
+
+  /// Edge - Represents an edge of the compilation graph.
+  class Edge : public llvm::RefCountedBaseVPTR<Edge> {
+  public:
+    Edge(const std::string& T) : ToolName_(T) {}
+    virtual ~Edge() {}
+
+    const std::string& ToolName() const { return ToolName_; }
+    virtual int Weight(const InputLanguagesSet& InLangs) const = 0;
+  private:
+    std::string ToolName_;
+  };
+
+  /// SimpleEdge - An edge that has no properties.
+  class SimpleEdge : public Edge {
+  public:
+    SimpleEdge(const std::string& T) : Edge(T) {}
+    int Weight(const InputLanguagesSet&) const { return 1; }
+  };
+
+  /// Node - A node (vertex) of the compilation graph.
+  struct Node {
+    // A Node holds a list of the outward edges.
+    typedef llvm::SmallVector<llvm::IntrusiveRefCntPtr<Edge>, 3> container_type;
+    typedef container_type::iterator iterator;
+    typedef container_type::const_iterator const_iterator;
+
+    Node() : OwningGraph(0), InEdges(0) {}
+    Node(CompilationGraph* G) : OwningGraph(G), InEdges(0) {}
+    Node(CompilationGraph* G, Tool* T) :
+      OwningGraph(G), ToolPtr(T), InEdges(0) {}
+
+    bool HasChildren() const { return !OutEdges.empty(); }
+    const std::string Name() const
+    { return ToolPtr ? ToolPtr->Name() : "root"; }
+
+    // Iteration.
+    iterator EdgesBegin() { return OutEdges.begin(); }
+    const_iterator EdgesBegin() const { return OutEdges.begin(); }
+    iterator EdgesEnd() { return OutEdges.end(); }
+    const_iterator EdgesEnd() const { return OutEdges.end(); }
+
+    /// AddEdge - Add an outward edge. Takes ownership of the provided
+    /// Edge object.
+    void AddEdge(Edge* E);
+
+    // Inward edge counter. Used to implement topological sort.
+    void IncrInEdges() { ++InEdges; }
+    void DecrInEdges() { --InEdges; }
+    bool HasNoInEdges() const { return InEdges == 0; }
+
+    // Needed to implement NodeChildIterator/GraphTraits
+    CompilationGraph* OwningGraph;
+    // The corresponding Tool.
+    // WARNING: ToolPtr can be NULL (for the root node).
+    llvm::IntrusiveRefCntPtr<Tool> ToolPtr;
+    // Links to children.
+    container_type OutEdges;
+    // Inward edge counter. Updated in
+    // CompilationGraph::insertEdge(). Used for topological sorting.
+    unsigned InEdges;
+  };
+
+  class NodesIterator;
+
+  /// CompilationGraph - The compilation graph itself.
+  class CompilationGraph {
+    /// nodes_map_type - The main data structure.
+    typedef llvm::StringMap<Node> nodes_map_type;
+    /// tools_vector_type, tools_map_type - Data structures used to
+    /// map from language names to tools. (We can have several tools
+    /// associated with each language name, hence the need for a
+    /// vector.)
+    typedef
+    llvm::SmallVector<llvm::IntrusiveRefCntPtr<Edge>, 3> tools_vector_type;
+    typedef llvm::StringMap<tools_vector_type> tools_map_type;
+
+    /// ToolsMap - Map from language names to lists of tool names.
+    tools_map_type ToolsMap;
+    /// NodesMap - Map from tool names to Tool objects.
+    nodes_map_type NodesMap;
+
+  public:
+
+    typedef nodes_map_type::iterator nodes_iterator;
+    typedef nodes_map_type::const_iterator const_nodes_iterator;
+
+    CompilationGraph();
+
+    /// insertNode - Insert a new node into the graph. Takes
+    /// ownership of the object.
+    void insertNode(Tool* T);
+
+    /// insertEdge - Insert a new edge into the graph. Takes ownership
+    /// of the Edge object. Returns non-zero value on error.
+    int insertEdge(const std::string& A, Edge* E);
+
+    /// Build - Build target(s) from the input file set. Command-line options
+    /// are passed implicitly as global variables. Returns non-zero value on
+    /// error (usually the failed program's exit code).
+    int Build(llvm::sys::Path const& TempDir, const LanguageMap& LangMap);
+
+    /// Check - Check the compilation graph for common errors like cycles,
+    /// input/output language mismatch and multiple default edges. Prints error
+    /// messages and in case it finds any errors.
+    int Check();
+
+    /// getNode - Return a reference to the node corresponding to the given tool
+    /// name. Returns 0 on error.
+    Node* getNode(const std::string& ToolName);
+    const Node* getNode(const std::string& ToolName) const;
+
+    /// viewGraph - This function is meant for use from the debugger. You can
+    /// just say 'call G->viewGraph()' and a ghostview window should pop up from
+    /// the program, displaying the compilation graph. This depends on there
+    /// being a 'dot' and 'gv' program in your path.
+    void viewGraph();
+
+    /// writeGraph - Write Graphviz .dot source file to the current direcotry.
+    int writeGraph(const std::string& OutputFilename);
+
+    // GraphTraits support.
+    friend NodesIterator GraphBegin(CompilationGraph*);
+    friend NodesIterator GraphEnd(CompilationGraph*);
+
+  private:
+    // Helper functions.
+
+    /// getToolsVector - Return a reference to the list of tool names
+    /// corresponding to the given language name. Returns 0 on error.
+    const tools_vector_type* getToolsVector(const std::string& LangName) const;
+
+    /// PassThroughGraph - Pass the input file through the toolchain starting at
+    /// StartNode.
+    int PassThroughGraph (const llvm::sys::Path& In, const Node* StartNode,
+                          const InputLanguagesSet& InLangs,
+                          const llvm::sys::Path& TempDir,
+                          const LanguageMap& LangMap) const;
+
+    /// FindToolChain - Find head of the toolchain corresponding to
+    /// the given file.
+    const Node* FindToolChain(const llvm::sys::Path& In,
+                              const std::string* ForceLanguage,
+                              InputLanguagesSet& InLangs,
+                              const LanguageMap& LangMap) const;
+
+    /// BuildInitial - Traverse the initial parts of the toolchains. Returns
+    /// non-zero value on error.
+    int BuildInitial(InputLanguagesSet& InLangs,
+                     const llvm::sys::Path& TempDir,
+                     const LanguageMap& LangMap);
+
+    /// TopologicalSort - Sort the nodes in topological order. Returns non-zero
+    /// value on error.
+    int TopologicalSort(std::vector<const Node*>& Out);
+    /// TopologicalSortFilterJoinNodes - Call TopologicalSort and filter the
+    /// resulting list to include only Join nodes. Returns non-zero value on
+    /// error.
+    int TopologicalSortFilterJoinNodes(std::vector<const Node*>& Out);
+
+    // Functions used to implement Check().
+
+    /// CheckLanguageNames - Check that output/input language names match for
+    /// all nodes. Returns non-zero value on error (number of errors
+    /// encountered).
+    int CheckLanguageNames() const;
+    /// CheckMultipleDefaultEdges - check that there are no multiple default
+    /// default edges. Returns non-zero value on error (number of errors
+    /// encountered).
+    int CheckMultipleDefaultEdges() const;
+    /// CheckCycles - Check that there are no cycles in the graph. Returns
+    /// non-zero value on error (number of errors encountered).
+    int CheckCycles();
+
+  };
+
+  // GraphTraits support code.
+
+  /// NodesIterator - Auxiliary class needed to implement GraphTraits
+  /// support. Can be generalised to something like value_iterator
+  /// for map-like containers.
+  class NodesIterator : public CompilationGraph::nodes_iterator {
+    typedef CompilationGraph::nodes_iterator super;
+    typedef NodesIterator ThisType;
+    typedef Node* pointer;
+    typedef Node& reference;
+
+  public:
+    NodesIterator(super I) : super(I) {}
+
+    inline reference operator*() const {
+      return super::operator->()->second;
+    }
+    inline pointer operator->() const {
+      return &super::operator->()->second;
+    }
+  };
+
+  inline NodesIterator GraphBegin(CompilationGraph* G) {
+    return NodesIterator(G->NodesMap.begin());
+  }
+
+  inline NodesIterator GraphEnd(CompilationGraph* G) {
+    return NodesIterator(G->NodesMap.end());
+  }
+
+
+  /// NodeChildIterator - Another auxiliary class needed by GraphTraits.
+  class NodeChildIterator : public
+               std::iterator<std::bidirectional_iterator_tag, Node, ptrdiff_t> {
+    typedef NodeChildIterator ThisType;
+    typedef Node::container_type::iterator iterator;
+
+    CompilationGraph* OwningGraph;
+    iterator EdgeIter;
+  public:
+    typedef Node* pointer;
+    typedef Node& reference;
+
+    NodeChildIterator(Node* N, iterator I) :
+      OwningGraph(N->OwningGraph), EdgeIter(I) {}
+
+    const ThisType& operator=(const ThisType& I) {
+      assert(OwningGraph == I.OwningGraph);
+      EdgeIter = I.EdgeIter;
+      return *this;
+    }
+
+    inline bool operator==(const ThisType& I) const {
+      assert(OwningGraph == I.OwningGraph);
+      return EdgeIter == I.EdgeIter;
+    }
+    inline bool operator!=(const ThisType& I) const {
+      return !this->operator==(I);
+    }
+
+    inline pointer operator*() const {
+      return OwningGraph->getNode((*EdgeIter)->ToolName());
+    }
+    inline pointer operator->() const {
+      return this->operator*();
+    }
+
+    ThisType& operator++() { ++EdgeIter; return *this; } // Preincrement
+    ThisType operator++(int) { // Postincrement
+      ThisType tmp = *this;
+      ++*this;
+      return tmp;
+    }
+
+    inline ThisType& operator--() { --EdgeIter; return *this; }  // Predecrement
+    inline ThisType operator--(int) { // Postdecrement
+      ThisType tmp = *this;
+      --*this;
+      return tmp;
+    }
+
+  };
+}
+
+namespace llvm {
+  template <>
+  struct GraphTraits<llvmc::CompilationGraph*> {
+    typedef llvmc::CompilationGraph GraphType;
+    typedef llvmc::Node NodeType;
+    typedef llvmc::NodeChildIterator ChildIteratorType;
+
+    static NodeType* getEntryNode(GraphType* G) {
+      return G->getNode("root");
+    }
+
+    static ChildIteratorType child_begin(NodeType* N) {
+      return ChildIteratorType(N, N->OutEdges.begin());
+    }
+    static ChildIteratorType child_end(NodeType* N) {
+      return ChildIteratorType(N, N->OutEdges.end());
+    }
+
+    typedef llvmc::NodesIterator nodes_iterator;
+    static nodes_iterator nodes_begin(GraphType *G) {
+      return GraphBegin(G);
+    }
+    static nodes_iterator nodes_end(GraphType *G) {
+      return GraphEnd(G);
+    }
+  };
+
+}
+
+#endif // LLVM_INCLUDE_COMPILER_DRIVER_COMPILATION_GRAPH_H
diff --git a/final/include/llvm/CompilerDriver/Error.h b/final/include/llvm/CompilerDriver/Error.h
new file mode 100644
index 00000000000..013094e5dd7
--- /dev/null
+++ b/final/include/llvm/CompilerDriver/Error.h
@@ -0,0 +1,29 @@
+//===--- Error.h - The LLVM Compiler Driver ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Error handling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INCLUDE_COMPILER_DRIVER_ERROR_H
+#define LLVM_INCLUDE_COMPILER_DRIVER_ERROR_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvmc {
+
+  inline void PrintError(llvm::StringRef Err) {
+    extern const char* ProgramName;
+    llvm::errs() << ProgramName << ": " << Err << '\n';
+  }
+
+}
+
+#endif // LLVM_INCLUDE_COMPILER_DRIVER_ERROR_H
diff --git a/final/include/llvm/CompilerDriver/Main.h b/final/include/llvm/CompilerDriver/Main.h
new file mode 100644
index 00000000000..d136a5d2fa1
--- /dev/null
+++ b/final/include/llvm/CompilerDriver/Main.h
@@ -0,0 +1,21 @@
+//===--- Main.h - The LLVM Compiler Driver ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Entry point for the driver executable.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INCLUDE_COMPILER_DRIVER_MAIN_H
+#define LLVM_INCLUDE_COMPILER_DRIVER_MAIN_H
+
+namespace llvmc {
+  int Main(int argc, char** argv);
+}
+
+#endif // LLVM_INCLUDE_COMPILER_DRIVER_MAIN_H
diff --git a/final/include/llvm/CompilerDriver/Main.inc b/final/include/llvm/CompilerDriver/Main.inc
new file mode 100644
index 00000000000..41640437de8
--- /dev/null
+++ b/final/include/llvm/CompilerDriver/Main.inc
@@ -0,0 +1,23 @@
+//===--- Main.inc - The LLVM Compiler Driver --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Default main() for the driver executable.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INCLUDE_COMPILER_DRIVER_MAIN_INC
+#define LLVM_INCLUDE_COMPILER_DRIVER_MAIN_INC
+
+#include "llvm/CompilerDriver/Main.h"
+
+int main(int argc, char** argv) {
+  return llvmc::Main(argc, argv);
+}
+
+#endif // LLVM_INCLUDE_COMPILER_DRIVER_MAIN_INC
diff --git a/final/include/llvm/CompilerDriver/Tool.h b/final/include/llvm/CompilerDriver/Tool.h
new file mode 100644
index 00000000000..d0926ba9831
--- /dev/null
+++ b/final/include/llvm/CompilerDriver/Tool.h
@@ -0,0 +1,100 @@
+//===--- Tool.h - The LLVM Compiler Driver ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Tool abstract base class - an interface to tool descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INCLUDE_COMPILER_DRIVER_TOOL_H
+#define LLVM_INCLUDE_COMPILER_DRIVER_TOOL_H
+
+#include "llvm/CompilerDriver/Action.h"
+
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Path.h"
+
+#include <string>
+#include <vector>
+#include <utility>
+
+namespace llvmc {
+
+  class LanguageMap;
+  typedef std::vector<std::pair<unsigned, std::string> > ArgsVector;
+  typedef std::vector<llvm::sys::Path> PathVector;
+  typedef std::vector<std::string> StrVector;
+  typedef llvm::StringSet<> InputLanguagesSet;
+
+  /// Tool - Represents a single tool.
+  class Tool : public llvm::RefCountedBaseVPTR<Tool> {
+  public:
+
+    virtual ~Tool() {}
+
+    /// GenerateAction - Generate an Action given particular command-line
+    /// options. Returns non-zero value on error.
+    virtual int GenerateAction (Action& Out,
+                                const PathVector& inFiles,
+                                const bool HasChildren,
+                                const llvm::sys::Path& TempDir,
+                                const InputLanguagesSet& InLangs,
+                                const LanguageMap& LangMap) const = 0;
+
+    /// GenerateAction - Generate an Action given particular command-line
+    /// options. Returns non-zero value on error.
+    virtual int GenerateAction (Action& Out,
+                                const llvm::sys::Path& inFile,
+                                const bool HasChildren,
+                                const llvm::sys::Path& TempDir,
+                                const InputLanguagesSet& InLangs,
+                                const LanguageMap& LangMap) const = 0;
+
+    virtual const char*  Name() const = 0;
+    virtual const char** InputLanguages() const = 0;
+    virtual const char** OutputLanguages() const = 0;
+
+    virtual bool IsJoin() const = 0;
+    virtual bool WorksOnEmpty() const = 0;
+
+  protected:
+    /// OutFileName - Generate the output file name.
+    llvm::sys::Path OutFilename(const llvm::sys::Path& In,
+                                const llvm::sys::Path& TempDir,
+                                bool StopCompilation,
+                                const char* OutputSuffix) const;
+
+    StrVector SortArgs(ArgsVector& Args) const;
+  };
+
+  /// JoinTool - A Tool that has an associated input file list.
+  class JoinTool : public Tool {
+  public:
+    void AddToJoinList(const llvm::sys::Path& P) { JoinList_.push_back(P); }
+    void ClearJoinList() { JoinList_.clear(); }
+    bool JoinListEmpty() const { return JoinList_.empty(); }
+
+    int GenerateAction(Action& Out,
+                       const bool HasChildren,
+                       const llvm::sys::Path& TempDir,
+                       const InputLanguagesSet& InLangs,
+                       const LanguageMap& LangMap) const {
+      return GenerateAction(Out, JoinList_, HasChildren, TempDir, InLangs,
+                            LangMap);
+    }
+    // We shouldn't shadow base class's version of GenerateAction.
+    using Tool::GenerateAction;
+
+  private:
+    PathVector JoinList_;
+  };
+
+}
+
+#endif // LLVM_INCLUDE_COMPILER_DRIVER_TOOL_H
diff --git a/final/include/llvm/Config/AsmParsers.def.in b/final/include/llvm/Config/AsmParsers.def.in
new file mode 100644
index 00000000000..041af837541
--- /dev/null
+++ b/final/include/llvm/Config/AsmParsers.def.in
@@ -0,0 +1,29 @@
+//===- llvm/Config/AsmParsers.def - LLVM Assembly Parsers -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file enumerates all of the assembly-language parsers
+// supported by this build of LLVM. Clients of this file should define
+// the LLVM_ASM_PARSER macro to be a function-like macro with a
+// single parameter (the name of the target whose assembly can be
+// generated); including this file will then enumerate all of the
+// targets with assembly parsers.
+//
+// The set of targets supported by LLVM is generated at configuration
+// time, at which point this header is generated. Do not modify this
+// header directly.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ASM_PARSER
+#  error Please define the macro LLVM_ASM_PARSER(TargetName)
+#endif
+
+@LLVM_ENUM_ASM_PARSERS@
+
+#undef LLVM_ASM_PARSER
diff --git a/final/include/llvm/Config/AsmPrinters.def.in b/final/include/llvm/Config/AsmPrinters.def.in
new file mode 100644
index 00000000000..9729bd75eb4
--- /dev/null
+++ b/final/include/llvm/Config/AsmPrinters.def.in
@@ -0,0 +1,29 @@
+//===- llvm/Config/AsmPrinters.def - LLVM Assembly Printers -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file enumerates all of the assembly-language printers
+// supported by this build of LLVM. Clients of this file should define
+// the LLVM_ASM_PRINTER macro to be a function-like macro with a
+// single parameter (the name of the target whose assembly can be
+// generated); including this file will then enumerate all of the
+// targets with assembly printers.
+//
+// The set of targets supported by LLVM is generated at configuration
+// time, at which point this header is generated. Do not modify this
+// header directly.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ASM_PRINTER
+#  error Please define the macro LLVM_ASM_PRINTER(TargetName)
+#endif
+
+@LLVM_ENUM_ASM_PRINTERS@
+
+#undef LLVM_ASM_PRINTER
diff --git a/final/include/llvm/Config/Disassemblers.def.in b/final/include/llvm/Config/Disassemblers.def.in
new file mode 100644
index 00000000000..1b136570933
--- /dev/null
+++ b/final/include/llvm/Config/Disassemblers.def.in
@@ -0,0 +1,29 @@
+//===- llvm/Config/Disassemblers.def - LLVM Assembly Parsers ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file enumerates all of the assembly-language parsers
+// supported by this build of LLVM. Clients of this file should define
+// the LLVM_ASM_PARSER macro to be a function-like macro with a
+// single parameter (the name of the target whose assembly can be
+// generated); including this file will then enumerate all of the
+// targets with assembly parsers.
+//
+// The set of targets supported by LLVM is generated at configuration
+// time, at which point this header is generated. Do not modify this
+// header directly.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DISASSEMBLER
+#  error Please define the macro LLVM_DISASSEMBLER(TargetName)
+#endif
+
+@LLVM_ENUM_DISASSEMBLERS@
+
+#undef LLVM_DISASSEMBLER
diff --git a/final/include/llvm/Config/Targets.def.in b/final/include/llvm/Config/Targets.def.in
new file mode 100644
index 00000000000..d589ecee12c
--- /dev/null
+++ b/final/include/llvm/Config/Targets.def.in
@@ -0,0 +1,28 @@
+/*===- llvm/Config/Targets.def - LLVM Target Architectures ------*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file enumerates all of the target architectures supported by          *|
+|* this build of LLVM. Clients of this file should define the                 *|
+|* LLVM_TARGET macro to be a function-like macro with a single                *|
+|* parameter (the name of the target); including this file will then          *|
+|* enumerate all of the targets.                                              *|
+|*                                                                            *|
+|* The set of targets supported by LLVM is generated at configuration         *|
+|* time, at which point this header is generated. Do not modify this          *|
+|* header directly.                                                           *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_TARGET
+#  error Please define the macro LLVM_TARGET(TargetName)
+#endif
+
+@LLVM_ENUM_TARGETS@
+
+#undef LLVM_TARGET
diff --git a/final/include/llvm/Config/config.h.cmake b/final/include/llvm/Config/config.h.cmake
new file mode 100644
index 00000000000..b2deb1dccf5
--- /dev/null
+++ b/final/include/llvm/Config/config.h.cmake
@@ -0,0 +1,737 @@
+/**************************************
+** Created by Kevin from config.h.in **
+***************************************/
+
+#ifndef CONFIG_H
+#define CONFIG_H
+
+/* Define if CBE is enabled for printf %a output */
+#cmakedefine ENABLE_CBE_PRINTF_A ${ENABLE_CBE_PRINTF_A}
+
+/* Define if position independent code is enabled */
+#cmakedefine ENABLE_PIC
+
+/* Define if threads enabled */
+#cmakedefine ENABLE_THREADS ${ENABLE_THREADS}
+
+/* Define if timestamp information (e.g., __DATE___) is allowed */
+#cmakedefine ENABLE_TIMESTAMPS ${ENABLE_TIMESTAMPS}
+
+/* Define to 1 if you have the `argz_append' function. */
+#cmakedefine HAVE_ARGZ_APPEND ${HAVE_ARGZ_APPEND}
+
+/* Define to 1 if you have the `argz_create_sep' function. */
+#cmakedefine HAVE_ARGZ_CREATE_SEP ${HAVE_ARGZ_CREATE_SEP}
+
+/* Define to 1 if you have the <argz.h> header file. */
+#cmakedefine HAVE_ARGZ_H ${HAVE_ARGZ_H}
+
+/* Define to 1 if you have the `argz_insert' function. */
+#cmakedefine HAVE_ARGZ_INSERT ${HAVE_ARGZ_INSERT}
+
+/* Define to 1 if you have the `argz_next' function. */
+#cmakedefine HAVE_ARGZ_NEXT ${HAVE_ARGZ_NEXT}
+
+/* Define to 1 if you have the `argz_stringify' function. */
+#cmakedefine HAVE_ARGZ_STRINGIFY ${HAVE_ARGZ_STRINGIFY}
+
+/* Define to 1 if you have the <assert.h> header file. */
+#cmakedefine HAVE_ASSERT_H ${HAVE_ASSERT_H}
+
+/* Define to 1 if you have the `backtrace' function. */
+#undef HAVE_BACKTRACE
+
+/* Define to 1 if you have the `bcopy' function. */
+#undef HAVE_BCOPY
+
+/* Define to 1 if you have the `ceilf' function. */
+#cmakedefine HAVE_CEILF ${HAVE_CEILF}
+
+/* Define if the neat program is available */
+#cmakedefine HAVE_CIRCO ${HAVE_CIRCO}
+
+/* Define to 1 if you have the `closedir' function. */
+#cmakedefine HAVE_CLOSEDIR ${HAVE_CLOSEDIR}
+
+/* Define to 1 if you have the <CrashReporterClient.h> header file. */
+#undef HAVE_CRASHREPORTERCLIENT_H
+
+/* Define if __crashreporter_info__ exists. */
+#undef HAVE_CRASHREPORTER_INFO
+
+/* Define to 1 if you have the <ctype.h> header file. */
+#cmakedefine HAVE_CTYPE_H ${HAVE_CTYPE_H}
+
+/* Define to 1 if you have the declaration of `strerror_s', and to 0 if you
+   don't. */
+#cmakedefine01 HAVE_DECL_STRERROR_S
+
+/* Define to 1 if you have the <dirent.h> header file, and it defines `DIR'.
+   */
+#cmakedefine HAVE_DIRENT_H ${HAVE_DIRENT_H}
+
+/* Define if you have the GNU dld library. */
+#undef HAVE_DLD
+
+/* Define to 1 if you have the <dld.h> header file. */
+#cmakedefine HAVE_DLD_H ${HAVE_DLD_H}
+
+/* Define to 1 if you have the `dlerror' function. */
+#cmakedefine HAVE_DLERROR ${HAVE_DLERROR}
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#cmakedefine HAVE_DLFCN_H ${HAVE_DLFCN_H}
+
+/* Define if dlopen() is available on this platform. */
+#cmakedefine HAVE_DLOPEN ${HAVE_DLOPEN}
+
+/* Define to 1 if you have the <dl.h> header file. */
+#cmakedefine HAVE_DL_H ${HAVE_DL_H}
+
+/* Define if the dot program is available */
+#cmakedefine HAVE_DOT ${HAVE_DOT}
+
+/* Define if the dotty program is available */
+#cmakedefine HAVE_DOTTY ${HAVE_DOTTY}
+
+/* Define if you have the _dyld_func_lookup function. */
+#undef HAVE_DYLD
+
+/* Define to 1 if you have the <errno.h> header file. */
+#cmakedefine HAVE_ERRNO_H ${HAVE_ERRNO_H}
+
+/* Define to 1 if the system has the type `error_t'. */
+#cmakedefine HAVE_ERROR_T ${HAVE_ERROR_T}
+
+/* Define to 1 if you have the <execinfo.h> header file. */
+#cmakedefine HAVE_EXECINFO_H ${HAVE_EXECINFO_H}
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#cmakedefine HAVE_FCNTL_H ${HAVE_FCNTL_H}
+
+/* Define if the neat program is available */
+#cmakedefine HAVE_FDP ${HAVE_FDP}
+
+/* Define to 1 if you have the <fenv.h> header file. */
+#cmakedefine HAVE_FENV_H ${HAVE_FENV_H}
+
+/* Define if libffi is available on this platform. */
+#cmakedefine HAVE_FFI_CALL ${HAVE_FFI_CALL}
+
+/* Define to 1 if you have the <ffi/ffi.h> header file. */
+#cmakedefine HAVE_FFI_FFI_H ${HAVE_FFI_FFI_H}
+
+/* Define to 1 if you have the <ffi.h> header file. */
+#cmakedefine HAVE_FFI_H ${HAVE_FFI_H}
+
+/* Set to 1 if the finite function is found in <ieeefp.h> */
+#cmakedefine HAVE_FINITE_IN_IEEEFP_H ${HAVE_FINITE_IN_IEEEFP_H}
+
+/* Define to 1 if you have the `floorf' function. */
+#cmakedefine HAVE_FLOORF ${HAVE_FLOORF}
+
+/* Define to 1 if you have the `fmodf' function. */
+#cmakedefine HAVE_FMODF ${HAVE_FMODF}
+
+/* Define to 1 if you have the `getcwd' function. */
+#cmakedefine HAVE_GETCWD ${HAVE_GETCWD}
+
+/* Define to 1 if you have the `getpagesize' function. */
+#cmakedefine HAVE_GETPAGESIZE ${HAVE_GETPAGESIZE}
+
+/* Define to 1 if you have the `getrlimit' function. */
+#cmakedefine HAVE_GETRLIMIT ${HAVE_GETRLIMIT}
+
+/* Define to 1 if you have the `getrusage' function. */
+#cmakedefine HAVE_GETRUSAGE ${HAVE_GETRUSAGE}
+
+/* Define to 1 if you have the `gettimeofday' function. */
+#cmakedefine HAVE_GETTIMEOFDAY ${HAVE_GETTIMEOFDAY}
+
+/* Define if the Graphviz program is available */
+#undef HAVE_GRAPHVIZ
+
+/* Define if the gv program is available */
+#cmakedefine HAVE_GV ${HAVE_GV}
+
+/* Define to 1 if you have the `index' function. */
+#cmakedefine HAVE_INDEX ${HAVE_INDEX}
+
+/* Define to 1 if the system has the type `int64_t'. */
+#cmakedefine HAVE_INT64_T ${HAVE_INT64_T}
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#cmakedefine HAVE_INTTYPES_H ${HAVE_INTTYPES_H}
+
+/* Define to 1 if you have the `isatty' function. */
+#cmakedefine HAVE_ISATTY 1
+
+/* Set to 1 if the isinf function is found in <cmath> */
+#cmakedefine HAVE_ISINF_IN_CMATH ${HAVE_ISINF_IN_CMATH}
+
+/* Set to 1 if the isinf function is found in <math.h> */
+#cmakedefine HAVE_ISINF_IN_MATH_H ${HAVE_ISINF_IN_MATH_H}
+
+/* Set to 1 if the isnan function is found in <cmath> */
+#cmakedefine HAVE_ISNAN_IN_CMATH ${HAVE_ISNAN_IN_CMATH}
+
+/* Set to 1 if the isnan function is found in <math.h> */
+#cmakedefine HAVE_ISNAN_IN_MATH_H ${HAVE_ISNAN_IN_MATH_H}
+
+/* Define if you have the libdl library or equivalent. */
+#cmakedefine HAVE_LIBDL ${HAVE_LIBDL}
+
+/* Define to 1 if you have the `imagehlp' library (-limagehlp). */
+#cmakedefine HAVE_LIBIMAGEHLP ${HAVE_LIBIMAGEHLP}
+
+/* Define to 1 if you have the `m' library (-lm). */
+#undef HAVE_LIBM
+
+/* Define to 1 if you have the `psapi' library (-lpsapi). */
+#cmakedefine HAVE_LIBPSAPI ${HAVE_LIBPSAPI}
+
+/* Define to 1 if you have the `pthread' library (-lpthread). */
+#cmakedefine HAVE_LIBPTHREAD ${HAVE_LIBPTHREAD}
+
+/* Define to 1 if you have the `udis86' library (-ludis86). */
+#undef HAVE_LIBUDIS86
+
+/* Define to 1 if you have the <limits.h> header file. */
+#cmakedefine HAVE_LIMITS_H ${HAVE_LIMITS_H}
+
+/* Define if you can use -Wl,-export-dynamic. */
+#define HAVE_LINK_EXPORT_DYNAMIC 1
+
+/* Define to 1 if you have the <link.h> header file. */
+#cmakedefine HAVE_LINK_H ${HAVE_LINK_H}
+
+/* Define if you can use -Wl,-R. to pass -R. to the linker, in order to add
+   the current directory to the dynamic linker search path. */
+#undef HAVE_LINK_R
+
+/* Define to 1 if you have the `longjmp' function. */
+#cmakedefine HAVE_LONGJMP ${HAVE_LONGJMP}
+
+/* Define to 1 if you have the <mach/mach.h> header file. */
+#cmakedefine HAVE_MACH_MACH_H ${HAVE_MACH_MACH_H}
+
+/* Define to 1 if you have the <mach-o/dyld.h> header file. */
+#cmakedefine HAVE_MACH_O_DYLD_H ${HAVE_MACH_O_DYLD_H}
+
+/* Define if mallinfo() is available on this platform. */
+#cmakedefine HAVE_MALLINFO ${HAVE_MALLINFO}
+
+/* Define to 1 if you have the <malloc.h> header file. */
+#cmakedefine HAVE_MALLOC_H ${HAVE_MALLOC_H}
+
+/* Define to 1 if you have the <malloc/malloc.h> header file. */
+#cmakedefine HAVE_MALLOC_MALLOC_H ${HAVE_MALLOC_MALLOC_H}
+
+/* Define to 1 if you have the `malloc_zone_statistics' function. */
+#cmakedefine HAVE_MALLOC_ZONE_STATISTICS ${HAVE_MALLOC_ZONE_STATISTICS}
+
+/* Define to 1 if you have the `memcpy' function. */
+#cmakedefine HAVE_MEMCPY ${HAVE_MEMCPY}
+
+/* Define to 1 if you have the `memmove' function. */
+#cmakedefine HAVE_MEMMOVE ${HAVE_MEMMOVE}
+
+/* Define to 1 if you have the <memory.h> header file. */
+#cmakedefine HAVE_MEMORY_H ${HAVE_MEMORY_H}
+
+/* Define to 1 if you have the `mkdtemp' function. */
+#cmakedefine HAVE_MKDTEMP ${HAVE_MKDTEMP}
+
+/* Define to 1 if you have the `mkstemp' function. */
+#cmakedefine HAVE_MKSTEMP ${HAVE_MKSTEMP}
+
+/* Define to 1 if you have the `mktemp' function. */
+#cmakedefine HAVE_MKTEMP ${HAVE_MKTEMP}
+
+/* Define to 1 if you have a working `mmap' system call. */
+#undef HAVE_MMAP
+
+/* Define if mmap() uses MAP_ANONYMOUS to map anonymous pages, or undefine if
+   it uses MAP_ANON */
+#undef HAVE_MMAP_ANONYMOUS
+
+/* Define if mmap() can map files into memory */
+#undef HAVE_MMAP_FILE
+
+/* Define to 1 if you have the <ndir.h> header file, and it defines `DIR'. */
+#cmakedefine HAVE_NDIR_H ${HAVE_NDIR_H}
+
+/* Define to 1 if you have the `nearbyintf' function. */
+#cmakedefine HAVE_NEARBYINTF ${HAVE_NEARBYINTF}
+
+/* Define if the neat program is available */
+#cmakedefine HAVE_NEATO ${HAVE_NEATO}
+
+/* Define to 1 if you have the `opendir' function. */
+#cmakedefine HAVE_OPENDIR ${HAVE_OPENDIR}
+
+/* Define to 1 if you have the `posix_spawn' function. */
+#cmakedefine HAVE_POSIX_SPAWN ${HAVE_POSIX_SPAWN}
+
+/* Define to 1 if you have the `powf' function. */
+#cmakedefine HAVE_POWF ${HAVE_POWF}
+
+/* Define if libtool can extract symbol lists from object files. */
+#undef HAVE_PRELOADED_SYMBOLS
+
+/* Define to have the %a format string */
+#undef HAVE_PRINTF_A
+
+/* Have pthread_getspecific */
+#cmakedefine HAVE_PTHREAD_GETSPECIFIC ${HAVE_PTHREAD_GETSPECIFIC}
+
+/* Define to 1 if you have the <pthread.h> header file. */
+#cmakedefine HAVE_PTHREAD_H ${HAVE_PTHREAD_H}
+
+/* Have pthread_mutex_lock */
+#cmakedefine HAVE_PTHREAD_MUTEX_LOCK ${HAVE_PTHREAD_MUTEX_LOCK}
+
+/* Have pthread_rwlock_init */
+#cmakedefine HAVE_PTHREAD_RWLOCK_INIT ${HAVE_PTHREAD_RWLOCK_INIT}
+
+/* Define to 1 if srand48/lrand48/drand48 exist in <stdlib.h> */
+#cmakedefine HAVE_RAND48 ${HAVE_RAND48}
+
+/* Define to 1 if you have the `readdir' function. */
+#cmakedefine HAVE_READDIR ${HAVE_READDIR}
+
+/* Define to 1 if you have the `realpath' function. */
+#undef HAVE_REALPATH
+
+/* Define to 1 if you have the `rindex' function. */
+#cmakedefine HAVE_RINDEX ${HAVE_RINDEX}
+
+/* Define to 1 if you have the `rintf' function. */
+#undef HAVE_RINTF
+
+/* Define to 1 if you have the `round' function. */
+#cmakedefine HAVE_ROUND ${HAVE_ROUND}
+
+/* Define to 1 if you have the `roundf' function. */
+#undef HAVE_ROUNDF
+
+/* Define to 1 if you have the `sbrk' function. */
+#cmakedefine HAVE_SBRK ${HAVE_SBRK}
+
+/* Define to 1 if you have the `setenv' function. */
+#cmakedefine HAVE_SETENV ${HAVE_SETENV}
+
+/* Define to 1 if you have the `setjmp' function. */
+#cmakedefine HAVE_SETJMP ${HAVE_SETJMP}
+
+/* Define to 1 if you have the <setjmp.h> header file. */
+#cmakedefine HAVE_SETJMP_H ${HAVE_SETJMP_H}
+
+/* Define to 1 if you have the `setrlimit' function. */
+#cmakedefine HAVE_SETRLIMIT ${HAVE_SETRLIMIT}
+
+/* Define if you have the shl_load function. */
+#undef HAVE_SHL_LOAD
+
+/* Define to 1 if you have the `siglongjmp' function. */
+#cmakedefine HAVE_SIGLONGJMP ${HAVE_SIGLONGJMP}
+
+/* Define to 1 if you have the <signal.h> header file. */
+#cmakedefine HAVE_SIGNAL_H ${HAVE_SIGNAL_H}
+
+/* Define to 1 if you have the `sigsetjmp' function. */
+#cmakedefine HAVE_SIGSETJMP ${HAVE_SIGSETJMP}
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#cmakedefine HAVE_STDINT_H ${HAVE_STDINT_H}
+
+/* Define to 1 if you have the <stdio.h> header file. */
+#cmakedefine HAVE_STDIO_H ${HAVE_STDIO_H}
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#cmakedefine HAVE_STDLIB_H ${HAVE_STDLIB_H}
+
+/* Set to 1 if the std::isinf function is found in <cmath> */
+#undef HAVE_STD_ISINF_IN_CMATH
+
+/* Set to 1 if the std::isnan function is found in <cmath> */
+#undef HAVE_STD_ISNAN_IN_CMATH
+
+/* Define to 1 if you have the `strchr' function. */
+#cmakedefine HAVE_STRCHR ${HAVE_STRCHR}
+
+/* Define to 1 if you have the `strcmp' function. */
+#cmakedefine HAVE_STRCMP ${HAVE_STRCMP}
+
+/* Define to 1 if you have the `strdup' function. */
+#cmakedefine HAVE_STRDUP ${HAVE_STRDUP}
+
+/* Define to 1 if you have the `strerror' function. */
+#cmakedefine HAVE_STRERROR ${HAVE_STRERROR}
+
+/* Define to 1 if you have the `strerror_r' function. */
+#cmakedefine HAVE_STRERROR_R ${HAVE_STRERROR_R}
+
+/* Define to 1 if you have the <strings.h> header file. */
+#cmakedefine HAVE_STRINGS_H ${HAVE_STRINGS_H}
+
+/* Define to 1 if you have the <string.h> header file. */
+#cmakedefine HAVE_STRING_H ${HAVE_STRING_H}
+
+/* Define to 1 if you have the `strrchr' function. */
+#cmakedefine HAVE_STRRCHR ${HAVE_STRRCHR}
+
+/* Define to 1 if you have the `strtof' function. */
+#cmakedefine HAVE_STRTOF ${HAVE_STRTOF}
+
+/* Define to 1 if you have the `strtoll' function. */
+#cmakedefine HAVE_STRTOLL ${HAVE_STRTOLL}
+
+/* Define to 1 if you have the `strtoq' function. */
+#cmakedefine HAVE_STRTOQ ${HAVE_STRTOQ}
+
+/* Define to 1 if you have the `sysconf' function. */
+#undef HAVE_SYSCONF
+
+/* Define to 1 if you have the <sys/dir.h> header file, and it defines `DIR'.
+   */
+#cmakedefine HAVE_SYS_DIR_H ${HAVE_SYS_DIR_H}
+
+/* Define to 1 if you have the <sys/dl.h> header file. */
+#cmakedefine HAVE_SYS_DL_H ${HAVE_SYS_DL_H}
+
+/* Define to 1 if you have the <sys/ioctl.h> header file. */
+#cmakedefine HAVE_SYS_IOCTL_H ${HAVE_SYS_IOCTL_H}
+
+/* Define to 1 if you have the <sys/mman.h> header file. */
+#cmakedefine HAVE_SYS_MMAN_H ${}
+
+/* Define to 1 if you have the <sys/ndir.h> header file, and it defines `DIR'.
+   */
+#cmakedefine HAVE_SYS_NDIR_H ${HAVE_SYS_NDIR_H}
+
+/* Define to 1 if you have the <sys/param.h> header file. */
+#cmakedefine HAVE_SYS_PARAM_H ${HAVE_SYS_PARAM_H}
+
+/* Define to 1 if you have the <sys/resource.h> header file. */
+#cmakedefine HAVE_SYS_RESOURCE_H ${HAVE_SYS_RESOURCE_H}
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#cmakedefine HAVE_SYS_STAT_H ${HAVE_SYS_STAT_H}
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#cmakedefine HAVE_SYS_TIME_H ${HAVE_SYS_TIME_H}
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H}
+
+/* Define to 1 if you have the <sys/uio.h> header file. */
+#cmakedefine HAVE_SYS_UIO_H ${HAVE_SYS_UIO_H}
+
+/* Define to 1 if you have <sys/wait.h> that is POSIX.1 compatible. */
+#cmakedefine HAVE_SYS_WAIT_H ${HAVE_SYS_WAIT_H}
+
+/* Define to 1 if you have the <termios.h> header file. */
+#cmakedefine HAVE_TERMIOS_H ${HAVE_TERMIOS_H}
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#cmakedefine HAVE_UNISTD_H ${HAVE_UNISTD_H}
+
+/* Define to 1 if you have the <utime.h> header file. */
+#cmakedefine HAVE_UTIME_H ${HAVE_UTIME_H}
+
+/* Define to 1 if the system has the type `u_int64_t'. */
+#cmakedefine HAVE_U_INT64_T ${HAVE_U_INT64_T}
+
+/* Define to 1 if you have the <valgrind/valgrind.h> header file. */
+#cmakedefine HAVE_VALGRIND_VALGRIND_H ${HAVE_VALGRIND_VALGRIND_H}
+
+/* Define to 1 if you have the <windows.h> header file. */
+#cmakedefine HAVE_WINDOWS_H ${HAVE_WINDOWS_H}
+
+/* Define to 1 if you have the `writev' function. */
+#cmakedefine HAVE_WRITEV ${HAVE_WRITEV}
+
+/* Define if the xdot.py program is available */
+#cmakedefine HAVE_XDOT_PY ${HAVE_XDOT_PY}
+
+/* Have host's _alloca */
+#cmakedefine HAVE__ALLOCA ${HAVE__ALLOCA}
+
+/* Have host's __alloca */
+#cmakedefine HAVE___ALLOCA ${HAVE___ALLOCA}
+
+/* Have host's __ashldi3 */
+#cmakedefine HAVE___ASHLDI3 ${HAVE___ASHLDI3}
+
+/* Have host's __ashrdi3 */
+#cmakedefine HAVE___ASHRDI3 ${HAVE___ASHRDI3}
+
+/* Have host's __chkstk */
+#cmakedefine HAVE___CHKSTK ${HAVE___CHKSTK}
+
+/* Have host's __cmpdi2 */
+#cmakedefine HAVE___CMPDI2 ${HAVE___CMPDI2}
+
+/* Have host's __divdi3 */
+#cmakedefine HAVE___DIVDI3 ${HAVE___DIVDI3}
+
+/* Define to 1 if you have the `__dso_handle' function. */
+#undef HAVE___DSO_HANDLE
+
+/* Have host's __fixdfdi */
+#cmakedefine HAVE___FIXDFDI ${HAVE___FIXDFDI}
+
+/* Have host's __fixsfdi */
+#cmakedefine HAVE___FIXSFDI ${HAVE___FIXSFDI}
+
+/* Have host's __floatdidf */
+#cmakedefine HAVE___FLOATDIDF ${HAVE___FLOATDIDF}
+
+/* Have host's __lshrdi3 */
+#cmakedefine HAVE___LSHRDI3 ${HAVE___LSHRDI3}
+
+/* Have host's __main */
+#cmakedefine HAVE___MAIN ${HAVE___MAIN}
+
+/* Have host's __moddi3 */
+#cmakedefine HAVE___MODDI3 ${HAVE___MODDI3}
+
+/* Have host's __udivdi3 */
+#cmakedefine HAVE___UDIVDI3 ${HAVE___UDIVDI3}
+
+/* Have host's __umoddi3 */
+#cmakedefine HAVE___UMODDI3 ${HAVE___UMODDI3}
+
+/* Have host's ___chkstk */
+#cmakedefine HAVE____CHKSTK ${HAVE____CHKSTK}
+
+/* Linker version detected at compile time. */
+#undef HOST_LINK_VERSION
+
+/* Installation directory for binary executables */
+#undef LLVM_BINDIR
+
+/* Time at which LLVM was configured */
+#undef LLVM_CONFIGTIME
+
+/* Installation directory for documentation */
+#undef LLVM_DATADIR
+
+/* Installation directory for documentation */
+#undef LLVM_DOCSDIR
+
+/* Installation directory for config files */
+#undef LLVM_ETCDIR
+
+/* Host triple we were built on */
+#cmakedefine LLVM_HOSTTRIPLE "${LLVM_HOSTTRIPLE}"
+
+/* Installation directory for include files */
+#undef LLVM_INCLUDEDIR
+
+/* Installation directory for .info files */
+#undef LLVM_INFODIR
+
+/* Installation directory for libraries */
+#undef LLVM_LIBDIR
+
+/* Installation directory for man pages */
+#undef LLVM_MANDIR
+
+/* Build multithreading support into LLVM */
+#cmakedefine LLVM_MULTITHREADED ${LLVM_MULTITHREADED}
+
+/* LLVM architecture name for the native architecture, if available */
+#cmakedefine LLVM_NATIVE_ARCH ${LLVM_NATIVE_ARCH}
+
+/* LLVM name for the native AsmPrinter init function, if available */
+#cmakedefine LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter
+
+/* LLVM name for the native Target init function, if available */
+#cmakedefine LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target
+
+/* LLVM name for the native TargetInfo init function, if available */
+#cmakedefine LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo
+
+/* Define if this is Unixish platform */
+#cmakedefine LLVM_ON_UNIX ${LLVM_ON_UNIX}
+
+/* Define if this is Win32ish platform */
+#cmakedefine LLVM_ON_WIN32 ${LLVM_ON_WIN32}
+
+/* Define to path to circo program if found or 'echo circo' otherwise */
+#cmakedefine LLVM_PATH_CIRCO "${LLVM_PATH_CIRCO}"
+
+/* Define to path to dot program if found or 'echo dot' otherwise */
+#cmakedefine LLVM_PATH_DOT "${LLVM_PATH_DOT}"
+
+/* Define to path to dotty program if found or 'echo dotty' otherwise */
+#cmakedefine LLVM_PATH_DOTTY "${LLVM_PATH_DOTTY}"
+
+/* Define to path to fdp program if found or 'echo fdp' otherwise */
+#cmakedefine LLVM_PATH_FDP "${LLVM_PATH_FDP}"
+
+/* Define to path to Graphviz program if found or 'echo Graphviz' otherwise */
+#undef LLVM_PATH_GRAPHVIZ
+
+/* Define to path to gv program if found or 'echo gv' otherwise */
+#cmakedefine LLVM_PATH_GV "${LLVM_PATH_GV}"
+
+/* Define to path to neato program if found or 'echo neato' otherwise */
+#cmakedefine LLVM_PATH_NEATO "${LLVM_PATH_NEATO}"
+
+/* Define to path to twopi program if found or 'echo twopi' otherwise */
+#cmakedefine LLVM_PATH_TWOPI "${LLVM_PATH_TWOPI}"
+
+/* Define to path to xdot.py program if found or 'echo xdot.py' otherwise */
+#cmakedefine LLVM_PATH_XDOT_PY "${LLVM_PATH_XDOT_PY}"
+
+/* Installation prefix directory */
+#cmakedefine LLVM_PREFIX "${LLVM_PREFIX}"
+
+/* Define if the OS needs help to load dependent libraries for dlopen(). */
+#cmakedefine LTDL_DLOPEN_DEPLIBS ${LTDL_DLOPEN_DEPLIBS}
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#undef LTDL_OBJDIR
+
+/* Define to the name of the environment variable that determines the dynamic
+   library search path. */
+#cmakedefine LTDL_SHLIBPATH_VAR "${LTDL_SHLIBPATH_VAR}"
+
+/* Define to the extension used for shared libraries, say, ".so". */
+#cmakedefine LTDL_SHLIB_EXT "${LTDL_SHLIB_EXT}"
+
+/* Define to the system default library search path. */
+#cmakedefine LTDL_SYSSEARCHPATH "${LTDL_SYSSEARCHPATH}"
+
+/* Define if /dev/zero should be used when mapping RWX memory, or undefine if
+   its not necessary */
+#undef NEED_DEV_ZERO_FOR_MMAP
+
+/* Define if dlsym() requires a leading underscore in symbol names. */
+#undef NEED_USCORE
+
+/* Define to the address where bug reports for this package should be sent. */
+#cmakedefine PACKAGE_BUGREPORT "${PACKAGE_BUGREPORT}"
+
+/* Define to the full name of this package. */
+#cmakedefine PACKAGE_NAME "${PACKAGE_NAME}"
+
+/* Define to the full name and version of this package. */
+#cmakedefine PACKAGE_STRING "${PACKAGE_STRING}"
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#cmakedefine PACKAGE_VERSION "${PACKAGE_VERSION}"
+
+/* Define as the return type of signal handlers (`int' or `void'). */
+#cmakedefine RETSIGTYPE ${RETSIGTYPE}
+
+/* Define to 1 if the `S_IS*' macros in <sys/stat.h> do not work properly. */
+#undef STAT_MACROS_BROKEN
+
+/* If using the C implementation of alloca, define if you know the
+   direction of stack growth for your system; otherwise it will be
+   automatically deduced at runtime.
+	STACK_DIRECTION > 0 => grows toward higher addresses
+	STACK_DIRECTION < 0 => grows toward lower addresses
+	STACK_DIRECTION = 0 => direction of growth unknown */
+#undef STACK_DIRECTION
+
+/* Define to 1 if the `S_IS*' macros in <sys/stat.h> do not work properly. */
+#undef STAT_MACROS_BROKEN
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
+#undef TIME_WITH_SYS_TIME
+
+/* Define to 1 if your <sys/time.h> declares `struct tm'. */
+#undef TM_IN_SYS_TIME
+
+/* Define if we have the oprofile JIT-support library */
+#undef USE_OPROFILE
+
+/* Define if use udis86 library */
+#undef USE_UDIS86
+
+/* Define to empty if `const' does not conform to ANSI C. */
+#undef const
+
+/* Define to a type to use for `error_t' if it is not otherwise available. */
+#cmakedefine error_t ${error_t}
+
+/* Define to `int' if <sys/types.h> does not define. */
+#undef pid_t
+
+/* Define to `unsigned int' if <sys/types.h> does not define. */
+#undef size_t
+
+/* Define if the neat program is available */
+#cmakedefine HAVE_TWOPI ${HAVE_TWOPI}
+
+/* Define to 1 if the system has the type `uint64_t'. */
+#cmakedefine HAVE_UINT64_T ${HAVE_UINT64_T}
+
+/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a
+   `char[]'. */
+#undef YYTEXT_POINTER
+
+/* Define to a type to use for `mode_t' if it is not otherwise available. */
+#cmakedefine mode_t ${mode_t}
+
+/* Define to a function replacing strtoll */
+#cmakedefine strtoll ${strtoll}
+
+/* Define to a function implementing strtoull */
+#cmakedefine strtoull ${strtoull}
+
+/* Define to a function implementing stricmp */
+#cmakedefine stricmp ${stricmp}
+
+/* Define to a function implementing strdup */
+#cmakedefine strdup ${strdup}
+
+/* Define to 1 if you have the `_chsize_s' function. */
+#cmakedefine HAVE__CHSIZE_S ${HAVE__CHSIZE_S}
+
+/* define if the compiler implements namespaces */
+#undef HAVE_NAMESPACES
+
+/* Does not have std namespace iterator */
+#undef HAVE_STD_ITERATOR
+
+/* Does not have forward iterator */
+#undef HAVE_FWD_ITERATOR
+
+/* Does not have bi-directional iterator */
+#undef HAVE_BI_ITERATOR
+
+/* Does not have <hash_map> */
+#undef HAVE_GLOBAL_HASH_MAP
+
+/* Does not have hash_set in global namespace */
+#undef HAVE_GLOBAL_HASH_SET
+
+/* Does not have ext/hash_map */
+#undef HAVE_GNU_EXT_HASH_MAP
+
+/* Does not have hash_set in gnu namespace */
+#undef HAVE_GNU_EXT_HASH_SET
+
+/* Does not have ext/hash_map> */
+#undef HAVE_STD_EXT_HASH_MAP
+
+/* Does not have hash_set in std namespace */
+#undef HAVE_STD_EXT_HASH_SET
+
+/* Added by Kevin -- Maximum path length */
+#cmakedefine MAXPATHLEN ${MAXPATHLEN}
+
+#endif
diff --git a/final/include/llvm/Config/config.h.in b/final/include/llvm/Config/config.h.in
new file mode 100644
index 00000000000..14c44b4b1e2
--- /dev/null
+++ b/final/include/llvm/Config/config.h.in
@@ -0,0 +1,687 @@
+/* include/llvm/Config/config.h.in.  Generated from autoconf/configure.ac by autoheader.  */
+
+#ifndef CONFIG_H
+#define CONFIG_H
+
+/* Relative directory for resource files */
+#undef CLANG_RESOURCE_DIR
+
+/* 32 bit multilib directory. */
+#undef CXX_INCLUDE_32BIT_DIR
+
+/* 64 bit multilib directory. */
+#undef CXX_INCLUDE_64BIT_DIR
+
+/* Arch the libstdc++ headers. */
+#undef CXX_INCLUDE_ARCH
+
+/* Directory with the libstdc++ headers. */
+#undef CXX_INCLUDE_ROOT
+
+/* Directories clang will search for headers */
+#undef C_INCLUDE_DIRS
+
+/* Define if CBE is enabled for printf %a output */
+#undef ENABLE_CBE_PRINTF_A
+
+/* Define if position independent code is enabled */
+#undef ENABLE_PIC
+
+/* Define if threads enabled */
+#undef ENABLE_THREADS
+
+/* Define if timestamp information (e.g., __DATE___) is allowed */
+#undef ENABLE_TIMESTAMPS
+
+/* Define to 1 if you have the `argz_append' function. */
+#undef HAVE_ARGZ_APPEND
+
+/* Define to 1 if you have the `argz_create_sep' function. */
+#undef HAVE_ARGZ_CREATE_SEP
+
+/* Define to 1 if you have the <argz.h> header file. */
+#undef HAVE_ARGZ_H
+
+/* Define to 1 if you have the `argz_insert' function. */
+#undef HAVE_ARGZ_INSERT
+
+/* Define to 1 if you have the `argz_next' function. */
+#undef HAVE_ARGZ_NEXT
+
+/* Define to 1 if you have the `argz_stringify' function. */
+#undef HAVE_ARGZ_STRINGIFY
+
+/* Define to 1 if you have the <assert.h> header file. */
+#undef HAVE_ASSERT_H
+
+/* Define to 1 if you have the `backtrace' function. */
+#undef HAVE_BACKTRACE
+
+/* Define to 1 if you have the `bcopy' function. */
+#undef HAVE_BCOPY
+
+/* Define to 1 if you have the `ceilf' function. */
+#undef HAVE_CEILF
+
+/* Define if the neat program is available */
+#undef HAVE_CIRCO
+
+/* Define to 1 if you have the `closedir' function. */
+#undef HAVE_CLOSEDIR
+
+/* Define to 1 if you have the <CrashReporterClient.h> header file. */
+#undef HAVE_CRASHREPORTERCLIENT_H
+
+/* Define if __crashreporter_info__ exists. */
+#undef HAVE_CRASHREPORTER_INFO
+
+/* Define to 1 if you have the <ctype.h> header file. */
+#undef HAVE_CTYPE_H
+
+/* Define to 1 if you have the declaration of `strerror_s', and to 0 if you
+   don't. */
+#undef HAVE_DECL_STRERROR_S
+
+/* Define to 1 if you have the <dirent.h> header file, and it defines `DIR'.
+   */
+#undef HAVE_DIRENT_H
+
+/* Define if you have the GNU dld library. */
+#undef HAVE_DLD
+
+/* Define to 1 if you have the <dld.h> header file. */
+#undef HAVE_DLD_H
+
+/* Define to 1 if you have the `dlerror' function. */
+#undef HAVE_DLERROR
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+
+/* Define if dlopen() is available on this platform. */
+#undef HAVE_DLOPEN
+
+/* Define to 1 if you have the <dl.h> header file. */
+#undef HAVE_DL_H
+
+/* Define if the dot program is available */
+#undef HAVE_DOT
+
+/* Define if the dotty program is available */
+#undef HAVE_DOTTY
+
+/* Define if you have the _dyld_func_lookup function. */
+#undef HAVE_DYLD
+
+/* Define to 1 if you have the <errno.h> header file. */
+#undef HAVE_ERRNO_H
+
+/* Define to 1 if the system has the type `error_t'. */
+#undef HAVE_ERROR_T
+
+/* Define to 1 if you have the <execinfo.h> header file. */
+#undef HAVE_EXECINFO_H
+
+/* Define to 1 if you have the <fcntl.h> header file. */
+#undef HAVE_FCNTL_H
+
+/* Define if the neat program is available */
+#undef HAVE_FDP
+
+/* Define to 1 if you have the <fenv.h> header file. */
+#undef HAVE_FENV_H
+
+/* Define if libffi is available on this platform. */
+#undef HAVE_FFI_CALL
+
+/* Define to 1 if you have the <ffi/ffi.h> header file. */
+#undef HAVE_FFI_FFI_H
+
+/* Define to 1 if you have the <ffi.h> header file. */
+#undef HAVE_FFI_H
+
+/* Set to 1 if the finite function is found in <ieeefp.h> */
+#undef HAVE_FINITE_IN_IEEEFP_H
+
+/* Define to 1 if you have the `floorf' function. */
+#undef HAVE_FLOORF
+
+/* Define to 1 if you have the `fmodf' function. */
+#undef HAVE_FMODF
+
+/* Define to 1 if you have the `getcwd' function. */
+#undef HAVE_GETCWD
+
+/* Define to 1 if you have the `getpagesize' function. */
+#undef HAVE_GETPAGESIZE
+
+/* Define to 1 if you have the `getrlimit' function. */
+#undef HAVE_GETRLIMIT
+
+/* Define to 1 if you have the `getrusage' function. */
+#undef HAVE_GETRUSAGE
+
+/* Define to 1 if you have the `gettimeofday' function. */
+#undef HAVE_GETTIMEOFDAY
+
+/* Define if the Graphviz program is available */
+#undef HAVE_GRAPHVIZ
+
+/* Define if the gv program is available */
+#undef HAVE_GV
+
+/* Define to 1 if you have the `index' function. */
+#undef HAVE_INDEX
+
+/* Define to 1 if the system has the type `int64_t'. */
+#undef HAVE_INT64_T
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the `isatty' function. */
+#undef HAVE_ISATTY
+
+/* Set to 1 if the isinf function is found in <cmath> */
+#undef HAVE_ISINF_IN_CMATH
+
+/* Set to 1 if the isinf function is found in <math.h> */
+#undef HAVE_ISINF_IN_MATH_H
+
+/* Set to 1 if the isnan function is found in <cmath> */
+#undef HAVE_ISNAN_IN_CMATH
+
+/* Set to 1 if the isnan function is found in <math.h> */
+#undef HAVE_ISNAN_IN_MATH_H
+
+/* Define if you have the libdl library or equivalent. */
+#undef HAVE_LIBDL
+
+/* Define to 1 if you have the `imagehlp' library (-limagehlp). */
+#undef HAVE_LIBIMAGEHLP
+
+/* Define to 1 if you have the `m' library (-lm). */
+#undef HAVE_LIBM
+
+/* Define to 1 if you have the `psapi' library (-lpsapi). */
+#undef HAVE_LIBPSAPI
+
+/* Define to 1 if you have the `pthread' library (-lpthread). */
+#undef HAVE_LIBPTHREAD
+
+/* Define to 1 if you have the `udis86' library (-ludis86). */
+#undef HAVE_LIBUDIS86
+
+/* Define to 1 if you have the <limits.h> header file. */
+#undef HAVE_LIMITS_H
+
+/* Define if you can use -Wl,-export-dynamic. */
+#undef HAVE_LINK_EXPORT_DYNAMIC
+
+/* Define to 1 if you have the <link.h> header file. */
+#undef HAVE_LINK_H
+
+/* Define if you can use -Wl,-R. to pass -R. to the linker, in order to add
+   the current directory to the dynamic linker search path. */
+#undef HAVE_LINK_R
+
+/* Define to 1 if you have the `longjmp' function. */
+#undef HAVE_LONGJMP
+
+/* Define to 1 if you have the <mach/mach.h> header file. */
+#undef HAVE_MACH_MACH_H
+
+/* Define to 1 if you have the <mach-o/dyld.h> header file. */
+#undef HAVE_MACH_O_DYLD_H
+
+/* Define if mallinfo() is available on this platform. */
+#undef HAVE_MALLINFO
+
+/* Define to 1 if you have the <malloc.h> header file. */
+#undef HAVE_MALLOC_H
+
+/* Define to 1 if you have the <malloc/malloc.h> header file. */
+#undef HAVE_MALLOC_MALLOC_H
+
+/* Define to 1 if you have the `malloc_zone_statistics' function. */
+#undef HAVE_MALLOC_ZONE_STATISTICS
+
+/* Define to 1 if you have the `memcpy' function. */
+#undef HAVE_MEMCPY
+
+/* Define to 1 if you have the `memmove' function. */
+#undef HAVE_MEMMOVE
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define to 1 if you have the `mkdtemp' function. */
+#undef HAVE_MKDTEMP
+
+/* Define to 1 if you have the `mkstemp' function. */
+#undef HAVE_MKSTEMP
+
+/* Define to 1 if you have the `mktemp' function. */
+#undef HAVE_MKTEMP
+
+/* Define to 1 if you have a working `mmap' system call. */
+#undef HAVE_MMAP
+
+/* Define if mmap() uses MAP_ANONYMOUS to map anonymous pages, or undefine if
+   it uses MAP_ANON */
+#undef HAVE_MMAP_ANONYMOUS
+
+/* Define if mmap() can map files into memory */
+#undef HAVE_MMAP_FILE
+
+/* Define to 1 if you have the <ndir.h> header file, and it defines `DIR'. */
+#undef HAVE_NDIR_H
+
+/* Define to 1 if you have the `nearbyintf' function. */
+#undef HAVE_NEARBYINTF
+
+/* Define if the neat program is available */
+#undef HAVE_NEATO
+
+/* Define to 1 if you have the `opendir' function. */
+#undef HAVE_OPENDIR
+
+/* Define to 1 if you have the `posix_spawn' function. */
+#undef HAVE_POSIX_SPAWN
+
+/* Define to 1 if you have the `powf' function. */
+#undef HAVE_POWF
+
+/* Define if libtool can extract symbol lists from object files. */
+#undef HAVE_PRELOADED_SYMBOLS
+
+/* Define to have the %a format string */
+#undef HAVE_PRINTF_A
+
+/* Have pthread_getspecific */
+#undef HAVE_PTHREAD_GETSPECIFIC
+
+/* Define to 1 if you have the <pthread.h> header file. */
+#undef HAVE_PTHREAD_H
+
+/* Have pthread_mutex_lock */
+#undef HAVE_PTHREAD_MUTEX_LOCK
+
+/* Have pthread_rwlock_init */
+#undef HAVE_PTHREAD_RWLOCK_INIT
+
+/* Define to 1 if srand48/lrand48/drand48 exist in <stdlib.h> */
+#undef HAVE_RAND48
+
+/* Define to 1 if you have the `readdir' function. */
+#undef HAVE_READDIR
+
+/* Define to 1 if you have the `realpath' function. */
+#undef HAVE_REALPATH
+
+/* Define to 1 if you have the `rindex' function. */
+#undef HAVE_RINDEX
+
+/* Define to 1 if you have the `rintf' function. */
+#undef HAVE_RINTF
+
+/* Define to 1 if you have the `round' function. */
+#undef HAVE_ROUND
+
+/* Define to 1 if you have the `roundf' function. */
+#undef HAVE_ROUNDF
+
+/* Define to 1 if you have the `sbrk' function. */
+#undef HAVE_SBRK
+
+/* Define to 1 if you have the `setenv' function. */
+#undef HAVE_SETENV
+
+/* Define to 1 if you have the `setjmp' function. */
+#undef HAVE_SETJMP
+
+/* Define to 1 if you have the <setjmp.h> header file. */
+#undef HAVE_SETJMP_H
+
+/* Define to 1 if you have the `setrlimit' function. */
+#undef HAVE_SETRLIMIT
+
+/* Define if you have the shl_load function. */
+#undef HAVE_SHL_LOAD
+
+/* Define to 1 if you have the `siglongjmp' function. */
+#undef HAVE_SIGLONGJMP
+
+/* Define to 1 if you have the <signal.h> header file. */
+#undef HAVE_SIGNAL_H
+
+/* Define to 1 if you have the `sigsetjmp' function. */
+#undef HAVE_SIGSETJMP
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdio.h> header file. */
+#undef HAVE_STDIO_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Set to 1 if the std::isinf function is found in <cmath> */
+#undef HAVE_STD_ISINF_IN_CMATH
+
+/* Set to 1 if the std::isnan function is found in <cmath> */
+#undef HAVE_STD_ISNAN_IN_CMATH
+
+/* Define to 1 if you have the `strchr' function. */
+#undef HAVE_STRCHR
+
+/* Define to 1 if you have the `strcmp' function. */
+#undef HAVE_STRCMP
+
+/* Define to 1 if you have the `strdup' function. */
+#undef HAVE_STRDUP
+
+/* Define to 1 if you have the `strerror' function. */
+#undef HAVE_STRERROR
+
+/* Define to 1 if you have the `strerror_r' function. */
+#undef HAVE_STRERROR_R
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the `strrchr' function. */
+#undef HAVE_STRRCHR
+
+/* Define to 1 if you have the `strtof' function. */
+#undef HAVE_STRTOF
+
+/* Define to 1 if you have the `strtoll' function. */
+#undef HAVE_STRTOLL
+
+/* Define to 1 if you have the `strtoq' function. */
+#undef HAVE_STRTOQ
+
+/* Define to 1 if you have the `sysconf' function. */
+#undef HAVE_SYSCONF
+
+/* Define to 1 if you have the <sys/dir.h> header file, and it defines `DIR'.
+   */
+#undef HAVE_SYS_DIR_H
+
+/* Define to 1 if you have the <sys/dl.h> header file. */
+#undef HAVE_SYS_DL_H
+
+/* Define to 1 if you have the <sys/ioctl.h> header file. */
+#undef HAVE_SYS_IOCTL_H
+
+/* Define to 1 if you have the <sys/mman.h> header file. */
+#undef HAVE_SYS_MMAN_H
+
+/* Define to 1 if you have the <sys/ndir.h> header file, and it defines `DIR'.
+   */
+#undef HAVE_SYS_NDIR_H
+
+/* Define to 1 if you have the <sys/param.h> header file. */
+#undef HAVE_SYS_PARAM_H
+
+/* Define to 1 if you have the <sys/resource.h> header file. */
+#undef HAVE_SYS_RESOURCE_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/time.h> header file. */
+#undef HAVE_SYS_TIME_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the <sys/uio.h> header file. */
+#undef HAVE_SYS_UIO_H
+
+/* Define to 1 if you have <sys/wait.h> that is POSIX.1 compatible. */
+#undef HAVE_SYS_WAIT_H
+
+/* Define to 1 if you have the <termios.h> header file. */
+#undef HAVE_TERMIOS_H
+
+/* Define if the neat program is available */
+#undef HAVE_TWOPI
+
+/* Define to 1 if the system has the type `uint64_t'. */
+#undef HAVE_UINT64_T
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to 1 if you have the <utime.h> header file. */
+#undef HAVE_UTIME_H
+
+/* Define to 1 if the system has the type `u_int64_t'. */
+#undef HAVE_U_INT64_T
+
+/* Define to 1 if you have the <valgrind/valgrind.h> header file. */
+#undef HAVE_VALGRIND_VALGRIND_H
+
+/* Define to 1 if you have the <windows.h> header file. */
+#undef HAVE_WINDOWS_H
+
+/* Define to 1 if you have the `writev' function. */
+#undef HAVE_WRITEV
+
+/* Define if the xdot.py program is available */
+#undef HAVE_XDOT_PY
+
+/* Have host's _alloca */
+#undef HAVE__ALLOCA
+
+/* Have host's __alloca */
+#undef HAVE___ALLOCA
+
+/* Have host's __ashldi3 */
+#undef HAVE___ASHLDI3
+
+/* Have host's __ashrdi3 */
+#undef HAVE___ASHRDI3
+
+/* Have host's __chkstk */
+#undef HAVE___CHKSTK
+
+/* Have host's __cmpdi2 */
+#undef HAVE___CMPDI2
+
+/* Have host's __divdi3 */
+#undef HAVE___DIVDI3
+
+/* Define to 1 if you have the `__dso_handle' function. */
+#undef HAVE___DSO_HANDLE
+
+/* Have host's __fixdfdi */
+#undef HAVE___FIXDFDI
+
+/* Have host's __fixsfdi */
+#undef HAVE___FIXSFDI
+
+/* Have host's __floatdidf */
+#undef HAVE___FLOATDIDF
+
+/* Have host's __lshrdi3 */
+#undef HAVE___LSHRDI3
+
+/* Have host's __main */
+#undef HAVE___MAIN
+
+/* Have host's __moddi3 */
+#undef HAVE___MODDI3
+
+/* Have host's __udivdi3 */
+#undef HAVE___UDIVDI3
+
+/* Have host's __umoddi3 */
+#undef HAVE___UMODDI3
+
+/* Have host's ___chkstk */
+#undef HAVE____CHKSTK
+
+/* Linker version detected at compile time. */
+#undef HOST_LINK_VERSION
+
+/* Installation directory for binary executables */
+#undef LLVM_BINDIR
+
+/* Time at which LLVM was configured */
+#undef LLVM_CONFIGTIME
+
+/* Installation directory for data files */
+#undef LLVM_DATADIR
+
+/* Installation directory for documentation */
+#undef LLVM_DOCSDIR
+
+/* Installation directory for config files */
+#undef LLVM_ETCDIR
+
+/* Host triple we were built on */
+#undef LLVM_HOSTTRIPLE
+
+/* Installation directory for include files */
+#undef LLVM_INCLUDEDIR
+
+/* Installation directory for .info files */
+#undef LLVM_INFODIR
+
+/* Installation directory for libraries */
+#undef LLVM_LIBDIR
+
+/* Installation directory for man pages */
+#undef LLVM_MANDIR
+
+/* Build multithreading support into LLVM */
+#undef LLVM_MULTITHREADED
+
+/* LLVM architecture name for the native architecture, if available */
+#undef LLVM_NATIVE_ARCH
+
+/* LLVM name for the native AsmPrinter init function, if available */
+#undef LLVM_NATIVE_ASMPRINTER
+
+/* LLVM name for the native Target init function, if available */
+#undef LLVM_NATIVE_TARGET
+
+/* LLVM name for the native TargetInfo init function, if available */
+#undef LLVM_NATIVE_TARGETINFO
+
+/* Define if this is Unixish platform */
+#undef LLVM_ON_UNIX
+
+/* Define if this is Win32ish platform */
+#undef LLVM_ON_WIN32
+
+/* Define to path to circo program if found or 'echo circo' otherwise */
+#undef LLVM_PATH_CIRCO
+
+/* Define to path to dot program if found or 'echo dot' otherwise */
+#undef LLVM_PATH_DOT
+
+/* Define to path to dotty program if found or 'echo dotty' otherwise */
+#undef LLVM_PATH_DOTTY
+
+/* Define to path to fdp program if found or 'echo fdp' otherwise */
+#undef LLVM_PATH_FDP
+
+/* Define to path to Graphviz program if found or 'echo Graphviz' otherwise */
+#undef LLVM_PATH_GRAPHVIZ
+
+/* Define to path to gv program if found or 'echo gv' otherwise */
+#undef LLVM_PATH_GV
+
+/* Define to path to neato program if found or 'echo neato' otherwise */
+#undef LLVM_PATH_NEATO
+
+/* Define to path to twopi program if found or 'echo twopi' otherwise */
+#undef LLVM_PATH_TWOPI
+
+/* Define to path to xdot.py program if found or 'echo xdot.py' otherwise */
+#undef LLVM_PATH_XDOT_PY
+
+/* Installation prefix directory */
+#undef LLVM_PREFIX
+
+/* Define if the OS needs help to load dependent libraries for dlopen(). */
+#undef LTDL_DLOPEN_DEPLIBS
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#undef LTDL_OBJDIR
+
+/* Define to the name of the environment variable that determines the dynamic
+   library search path. */
+#undef LTDL_SHLIBPATH_VAR
+
+/* Define to the extension used for shared libraries, say, ".so". */
+#undef LTDL_SHLIB_EXT
+
+/* Define to the system default library search path. */
+#undef LTDL_SYSSEARCHPATH
+
+/* Define if /dev/zero should be used when mapping RWX memory, or undefine if
+   its not necessary */
+#undef NEED_DEV_ZERO_FOR_MMAP
+
+/* Define if dlsym() requires a leading underscore in symbol names. */
+#undef NEED_USCORE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* Define as the return type of signal handlers (`int' or `void'). */
+#undef RETSIGTYPE
+
+/* Define to 1 if the `S_IS*' macros in <sys/stat.h> do not work properly. */
+#undef STAT_MACROS_BROKEN
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */
+#undef TIME_WITH_SYS_TIME
+
+/* Define to 1 if your <sys/time.h> declares `struct tm'. */
+#undef TM_IN_SYS_TIME
+
+/* Define if we have the oprofile JIT-support library */
+#undef USE_OPROFILE
+
+/* Define if use udis86 library */
+#undef USE_UDIS86
+
+/* Define to empty if `const' does not conform to ANSI C. */
+#undef const
+
+/* Define to a type to use for `error_t' if it is not otherwise available. */
+#undef error_t
+
+/* Define to `int' if <sys/types.h> does not define. */
+#undef pid_t
+
+/* Define to `unsigned int' if <sys/types.h> does not define. */
+#undef size_t
+
+#endif
diff --git a/final/include/llvm/Config/llvm-config.h.cmake b/final/include/llvm/Config/llvm-config.h.cmake
new file mode 100644
index 00000000000..33a40350d5c
--- /dev/null
+++ b/final/include/llvm/Config/llvm-config.h.cmake
@@ -0,0 +1,100 @@
+/*===-- llvm/config/llvm-config.h - llvm configure variable -------*- C -*-===*/
+/*                                                                            */
+/*                     The LLVM Compiler Infrastructure                       */
+/*                                                                            */
+/* This file is distributed under the University of Illinois Open Source      */
+/* License. See LICENSE.TXT for details.                                      */
+/*                                                                            */
+/*===----------------------------------------------------------------------===*/
+
+/* This file enumerates all of the llvm variables from configure so that
+   they can be in exported headers and won't override package specific
+   directives.  This is a C file so we can include it in the llvm-c headers.  */
+
+/* To avoid multiple inclusions of these variables when we include the exported
+   headers and config.h, conditionally include these.  */
+/* TODO: This is a bit of a hack.  */
+#ifndef CONFIG_H
+
+/* Installation directory for binary executables */
+#cmakedefine LLVM_BINDIR "${LLVM_BINDIR}"
+
+/* Time at which LLVM was configured */
+#cmakedefine LLVM_CONFIGTIME "${LLVM_CONFIGTIME}"
+
+/* Installation directory for data files */
+#cmakedefine LLVM_DATADIR "${LLVM_DATADIR}"
+
+/* Installation directory for documentation */
+#cmakedefine LLVM_DOCSDIR "${LLVM_DOCSDIR}"
+
+/* Installation directory for config files */
+#cmakedefine LLVM_ETCDIR "${LLVM_ETCDIR}"
+
+/* Host triple we were built on */
+#cmakedefine LLVM_HOSTTRIPLE "${LLVM_HOSTTRIPLE}"
+
+/* Installation directory for include files */
+#cmakedefine LLVM_INCLUDEDIR "${LLVM_INCLUDEDIR}"
+
+/* Installation directory for .info files */
+#cmakedefine LLVM_INFODIR "${LLVM_INFODIR}"
+
+/* Installation directory for libraries */
+#cmakedefine LLVM_LIBDIR "${LLVM_LIBDIR}"
+
+/* Installation directory for man pages */
+#cmakedefine LLVM_MANDIR "${LLVM_MANDIR}"
+
+/* Build multithreading support into LLVM */
+#cmakedefine LLVM_MULTITHREADED ${LLVM_MULTITHREADED}
+
+/* LLVM architecture name for the native architecture, if available */
+#cmakedefine LLVM_NATIVE_ARCH ${LLVM_NATIVE_ARCH}
+
+/* LLVM name for the native Target init function, if available */
+#cmakedefine LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target
+
+/* LLVM name for the native TargetInfo init function, if available */
+#cmakedefine LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo
+
+/* LLVM name for the native AsmPrinter init function, if available */
+#cmakedefine LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter
+
+/* Define if this is Unixish platform */
+#cmakedefine LLVM_ON_UNIX ${LLVM_ON_UNIX}
+
+/* Define if this is Win32ish platform */
+#cmakedefine LLVM_ON_WIN32 ${LLVM_ON_WIN32}
+
+/* Define to path to circo program if found or 'echo circo' otherwise */
+#cmakedefine LLVM_PATH_CIRCO "${LLVM_PATH_CIRCO}"
+
+/* Define to path to dot program if found or 'echo dot' otherwise */
+#cmakedefine LLVM_PATH_DOT "${LLVM_PATH_DOT}"
+
+/* Define to path to dotty program if found or 'echo dotty' otherwise */
+#cmakedefine LLVM_PATH_DOTTY "${LLVM_PATH_DOTTY}"
+
+/* Define to path to fdp program if found or 'echo fdp' otherwise */
+#cmakedefine LLVM_PATH_FDP "${LLVM_PATH_FDP}"
+
+/* Define to path to Graphviz program if found or 'echo Graphviz' otherwise */
+#cmakedefine LLVM_PATH_GRAPHVIZ "${LLVM_PATH_GRAPHVIZ}"
+
+/* Define to path to gv program if found or 'echo gv' otherwise */
+#cmakedefine LLVM_PATH_GV "${LLVM_PATH_GV}"
+
+/* Define to path to neato program if found or 'echo neato' otherwise */
+#cmakedefine LLVM_PATH_NEATO "${LLVM_PATH_NEATO}"
+
+/* Define to path to twopi program if found or 'echo twopi' otherwise */
+#cmakedefine LLVM_PATH_TWOPI "${LLVM_PATH_TWOPI}"
+
+/* Define to path to xdot.py program if found or 'echo xdot.py' otherwise */
+#cmakedefine LLVM_PATH_XDOT_PY "${LLVM_PATH_XDOT.PY}"
+
+/* Installation prefix directory */
+#cmakedefine LLVM_PREFIX "${LLVM_PREFIX}"
+
+#endif
diff --git a/final/include/llvm/Config/llvm-config.h.in b/final/include/llvm/Config/llvm-config.h.in
new file mode 100644
index 00000000000..e7a04ee91bb
--- /dev/null
+++ b/final/include/llvm/Config/llvm-config.h.in
@@ -0,0 +1,97 @@
+/*===-- llvm/config/llvm-config.h - llvm configure variable -------*- C -*-===*/
+/*                                                                            */
+/*                     The LLVM Compiler Infrastructure                       */
+/*                                                                            */
+/* This file is distributed under the University of Illinois Open Source      */
+/* License. See LICENSE.TXT for details.                                      */
+/*                                                                            */
+/*===----------------------------------------------------------------------===*/
+
+/* This file enumerates all of the llvm variables from configure so that
+   they can be in exported headers and won't override package specific
+   directives.  This is a C file so we can include it in the llvm-c headers.  */
+
+/* To avoid multiple inclusions of these variables when we include the exported
+   headers and config.h, conditionally include these.  */
+/* TODO: This is a bit of a hack.  */
+#ifndef CONFIG_H
+
+/* Installation directory for binary executables */
+#undef LLVM_BINDIR
+
+/* Time at which LLVM was configured */
+#undef LLVM_CONFIGTIME
+
+/* Installation directory for data files */
+#undef LLVM_DATADIR
+
+/* Installation directory for documentation */
+#undef LLVM_DOCSDIR
+
+/* Installation directory for config files */
+#undef LLVM_ETCDIR
+
+/* Host triple we were built on */
+#undef LLVM_HOSTTRIPLE
+
+/* Installation directory for include files */
+#undef LLVM_INCLUDEDIR
+
+/* Installation directory for .info files */
+#undef LLVM_INFODIR
+
+/* Installation directory for libraries */
+#undef LLVM_LIBDIR
+
+/* Installation directory for man pages */
+#undef LLVM_MANDIR
+
+/* Build multithreading support into LLVM */
+#undef LLVM_MULTITHREADED
+
+/* LLVM architecture name for the native architecture, if available */
+#undef LLVM_NATIVE_ARCH
+
+/* LLVM name for the native Target init function, if available */
+#undef LLVM_NATIVE_TARGET
+
+/* LLVM name for the native TargetInfo init function, if available */
+#undef LLVM_NATIVE_TARGETINFO
+
+/* LLVM name for the native AsmPrinter init function, if available */
+#undef LLVM_NATIVE_ASMPRINTER
+
+/* Define if this is Unixish platform */
+#undef LLVM_ON_UNIX
+
+/* Define if this is Win32ish platform */
+#undef LLVM_ON_WIN32
+
+/* Define to path to circo program if found or 'echo circo' otherwise */
+#undef LLVM_PATH_CIRCO
+
+/* Define to path to dot program if found or 'echo dot' otherwise */
+#undef LLVM_PATH_DOT
+
+/* Define to path to dotty program if found or 'echo dotty' otherwise */
+#undef LLVM_PATH_DOTTY
+
+/* Define to path to fdp program if found or 'echo fdp' otherwise */
+#undef LLVM_PATH_FDP
+
+/* Define to path to Graphviz program if found or 'echo Graphviz' otherwise */
+#undef LLVM_PATH_GRAPHVIZ
+
+/* Define to path to gv program if found or 'echo gv' otherwise */
+#undef LLVM_PATH_GV
+
+/* Define to path to neato program if found or 'echo neato' otherwise */
+#undef LLVM_PATH_NEATO
+
+/* Define to path to twopi program if found or 'echo twopi' otherwise */
+#undef LLVM_PATH_TWOPI
+
+/* Installation prefix directory */
+#undef LLVM_PREFIX
+
+#endif
diff --git a/final/include/llvm/Constant.h b/final/include/llvm/Constant.h
new file mode 100644
index 00000000000..38045fc0c1d
--- /dev/null
+++ b/final/include/llvm/Constant.h
@@ -0,0 +1,164 @@
+//===-- llvm/Constant.h - Constant class definition -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the Constant class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CONSTANT_H
+#define LLVM_CONSTANT_H
+
+#include "llvm/User.h"
+
+namespace llvm {
+  class APInt;
+
+  template<typename T> class SmallVectorImpl;
+
+/// This is an important base class in LLVM. It provides the common facilities
+/// of all constant values in an LLVM program. A constant is a value that is
+/// immutable at runtime. Functions are constants because their address is
+/// immutable. Same with global variables. 
+/// 
+/// All constants share the capabilities provided in this class. All constants
+/// can have a null value. They can have an operand list. Constants can be
+/// simple (integer and floating point values), complex (arrays and structures),
+/// or expression based (computations yielding a constant value composed of 
+/// only certain operators and other constant values).
+/// 
+/// Note that Constants are immutable (once created they never change) 
+/// and are fully shared by structural equivalence.  This means that two 
+/// structurally equivalent constants will always have the same address.  
+/// Constants are created on demand as needed and never deleted: thus clients 
+/// don't have to worry about the lifetime of the objects.
+/// @brief LLVM Constant Representation
+class Constant : public User {
+  void operator=(const Constant &);     // Do not implement
+  Constant(const Constant &);           // Do not implement
+  
+protected:
+  Constant(const Type *ty, ValueTy vty, Use *Ops, unsigned NumOps)
+    : User(ty, vty, Ops, NumOps) {}
+
+  void destroyConstantImpl();
+  
+  void setOperand(unsigned i, Value *V) {
+    User::setOperand(i, V);
+  }
+public:
+  /// isNullValue - Return true if this is the value that would be returned by
+  /// getNullValue.
+  virtual bool isNullValue() const = 0;
+
+  /// isNegativeZeroValue - Return true if the value is what would be returned 
+  /// by getZeroValueForNegation.
+  virtual bool isNegativeZeroValue() const { return isNullValue(); }
+
+  /// canTrap - Return true if evaluation of this constant could trap.  This is
+  /// true for things like constant expressions that could divide by zero.
+  bool canTrap() const;
+
+  /// isConstantUsed - Return true if the constant has users other than constant
+  /// exprs and other dangling things.
+  bool isConstantUsed() const;
+  
+  enum PossibleRelocationsTy {
+    NoRelocation = 0,
+    LocalRelocation = 1,
+    GlobalRelocations = 2
+  };
+  
+  /// getRelocationInfo - This method classifies the entry according to
+  /// whether or not it may generate a relocation entry.  This must be
+  /// conservative, so if it might codegen to a relocatable entry, it should say
+  /// so.  The return values are:
+  /// 
+  ///  NoRelocation: This constant pool entry is guaranteed to never have a
+  ///     relocation applied to it (because it holds a simple constant like
+  ///     '4').
+  ///  LocalRelocation: This entry has relocations, but the entries are
+  ///     guaranteed to be resolvable by the static linker, so the dynamic
+  ///     linker will never see them.
+  ///  GlobalRelocations: This entry may have arbitrary relocations.
+  ///
+  /// FIXME: This really should not be in VMCore.
+  PossibleRelocationsTy getRelocationInfo() const;
+  
+  // Specialize get/setOperand for Users as their operands are always
+  // constants or BasicBlocks as well.
+  User *getOperand(unsigned i) {
+    return static_cast<User*>(User::getOperand(i));
+  }
+  const User *getOperand(unsigned i) const {
+    return static_cast<const User*>(User::getOperand(i));
+  }
+  
+  /// getVectorElements - This method, which is only valid on constant of vector
+  /// type, returns the elements of the vector in the specified smallvector.
+  /// This handles breaking down a vector undef into undef elements, etc.  For
+  /// constant exprs and other cases we can't handle, we return an empty vector.
+  void getVectorElements(SmallVectorImpl<Constant*> &Elts) const;
+
+  /// destroyConstant - Called if some element of this constant is no longer
+  /// valid.  At this point only other constants may be on the use_list for this
+  /// constant.  Any constants on our Use list must also be destroy'd.  The
+  /// implementation must be sure to remove the constant from the list of
+  /// available cached constants.  Implementations should call
+  /// destroyConstantImpl as the last thing they do, to destroy all users and
+  /// delete this.
+  virtual void destroyConstant() { assert(0 && "Not reached!"); }
+
+  //// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Constant *) { return true; }
+  static inline bool classof(const GlobalValue *) { return true; }
+  static inline bool classof(const Value *V) {
+    return V->getValueID() >= ConstantFirstVal &&
+           V->getValueID() <= ConstantLastVal;
+  }
+
+  /// replaceUsesOfWithOnConstant - This method is a special form of
+  /// User::replaceUsesOfWith (which does not work on constants) that does work
+  /// on constants.  Basically this method goes through the trouble of building
+  /// a new constant that is equivalent to the current one, with all uses of
+  /// From replaced with uses of To.  After this construction is completed, all
+  /// of the users of 'this' are replaced to use the new constant, and then
+  /// 'this' is deleted.  In general, you should not call this method, instead,
+  /// use Value::replaceAllUsesWith, which automatically dispatches to this
+  /// method as needed.
+  ///
+  virtual void replaceUsesOfWithOnConstant(Value *, Value *, Use *) {
+    // Provide a default implementation for constants (like integers) that
+    // cannot use any other values.  This cannot be called at runtime, but needs
+    // to be here to avoid link errors.
+    assert(getNumOperands() == 0 && "replaceUsesOfWithOnConstant must be "
+           "implemented for all constants that have operands!");
+    assert(0 && "Constants that do not have operands cannot be using 'From'!");
+  }
+  
+  static Constant *getNullValue(const Type* Ty);
+  
+  /// @returns the value for an integer constant of the given type that has all
+  /// its bits set to true.
+  /// @brief Get the all ones value
+  static Constant *getAllOnesValue(const Type* Ty);
+
+  /// getIntegerValue - Return the value for an integer or pointer constant,
+  /// or a vector thereof, with the given scalar value.
+  static Constant *getIntegerValue(const Type* Ty, const APInt &V);
+  
+  /// removeDeadConstantUsers - If there are any dead constant users dangling
+  /// off of this constant, remove them.  This method is useful for clients
+  /// that want to check to see if a global is unused, but don't want to deal
+  /// with potentially dead constants hanging off of the globals.
+  void removeDeadConstantUsers() const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Constants.h b/final/include/llvm/Constants.h
new file mode 100644
index 00000000000..c12b33fae71
--- /dev/null
+++ b/final/include/llvm/Constants.h
@@ -0,0 +1,971 @@
+//===-- llvm/Constants.h - Constant class subclass definitions --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// @file
+/// This file contains the declarations for the subclasses of Constant, 
+/// which represent the different flavors of constant values that live in LLVM.
+/// Note that Constants are immutable (once created they never change) and are 
+/// fully shared by structural equivalence.  This means that two structurally
+/// equivalent constants will always have the same address.  Constant's are
+/// created on demand as needed and never deleted: thus clients don't have to
+/// worry about the lifetime of the objects.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CONSTANTS_H
+#define LLVM_CONSTANTS_H
+
+#include "llvm/Constant.h"
+#include "llvm/OperandTraits.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+
+class ArrayType;
+class IntegerType;
+class StructType;
+class PointerType;
+class VectorType;
+
+template<class ConstantClass, class TypeClass, class ValType>
+struct ConstantCreator;
+template<class ConstantClass, class TypeClass>
+struct ConvertConstantType;
+
+//===----------------------------------------------------------------------===//
+/// This is the shared class of boolean and integer constants. This class 
+/// represents both boolean and integral constants.
+/// @brief Class for constant integers.
+class ConstantInt : public Constant {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+  ConstantInt(const ConstantInt &);      // DO NOT IMPLEMENT
+  ConstantInt(const IntegerType *Ty, const APInt& V);
+  APInt Val;
+protected:
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+public:
+  static ConstantInt *getTrue(LLVMContext &Context);
+  static ConstantInt *getFalse(LLVMContext &Context);
+  static Constant *getTrue(const Type *Ty);
+  static Constant *getFalse(const Type *Ty);
+  
+  /// If Ty is a vector type, return a Constant with a splat of the given
+  /// value. Otherwise return a ConstantInt for the given value.
+  static Constant *get(const Type *Ty, uint64_t V, bool isSigned = false);
+                              
+  /// Return a ConstantInt with the specified integer value for the specified
+  /// type. If the type is wider than 64 bits, the value will be zero-extended
+  /// to fit the type, unless isSigned is true, in which case the value will
+  /// be interpreted as a 64-bit signed integer and sign-extended to fit
+  /// the type.
+  /// @brief Get a ConstantInt for a specific value.
+  static ConstantInt *get(const IntegerType *Ty, uint64_t V,
+                          bool isSigned = false);
+
+  /// Return a ConstantInt with the specified value for the specified type. The
+  /// value V will be canonicalized to a an unsigned APInt. Accessing it with
+  /// either getSExtValue() or getZExtValue() will yield a correctly sized and
+  /// signed value for the type Ty.
+  /// @brief Get a ConstantInt for a specific signed value.
+  static ConstantInt *getSigned(const IntegerType *Ty, int64_t V);
+  static Constant *getSigned(const Type *Ty, int64_t V);
+  
+  /// Return a ConstantInt with the specified value and an implied Type. The
+  /// type is the integer type that corresponds to the bit width of the value.
+  static ConstantInt *get(LLVMContext &Context, const APInt &V);
+
+  /// Return a ConstantInt constructed from the string strStart with the given
+  /// radix. 
+  static ConstantInt *get(const IntegerType *Ty, StringRef Str,
+                          uint8_t radix);
+  
+  /// If Ty is a vector type, return a Constant with a splat of the given
+  /// value. Otherwise return a ConstantInt for the given value.
+  static Constant *get(const Type* Ty, const APInt& V);
+  
+  /// Return the constant as an APInt value reference. This allows clients to
+  /// obtain a copy of the value, with all its precision in tact.
+  /// @brief Return the constant's value.
+  inline const APInt &getValue() const {
+    return Val;
+  }
+  
+  /// getBitWidth - Return the bitwidth of this constant.
+  unsigned getBitWidth() const { return Val.getBitWidth(); }
+
+  /// Return the constant as a 64-bit unsigned integer value after it
+  /// has been zero extended as appropriate for the type of this constant. Note
+  /// that this method can assert if the value does not fit in 64 bits.
+  /// @deprecated
+  /// @brief Return the zero extended value.
+  inline uint64_t getZExtValue() const {
+    return Val.getZExtValue();
+  }
+
+  /// Return the constant as a 64-bit integer value after it has been sign
+  /// extended as appropriate for the type of this constant. Note that
+  /// this method can assert if the value does not fit in 64 bits.
+  /// @deprecated
+  /// @brief Return the sign extended value.
+  inline int64_t getSExtValue() const {
+    return Val.getSExtValue();
+  }
+
+  /// A helper method that can be used to determine if the constant contained 
+  /// within is equal to a constant.  This only works for very small values, 
+  /// because this is all that can be represented with all types.
+  /// @brief Determine if this constant's value is same as an unsigned char.
+  bool equalsInt(uint64_t V) const {
+    return Val == V;
+  }
+
+  /// getType - Specialize the getType() method to always return an IntegerType,
+  /// which reduces the amount of casting needed in parts of the compiler.
+  ///
+  inline const IntegerType *getType() const {
+    return reinterpret_cast<const IntegerType*>(Value::getType());
+  }
+
+  /// This static method returns true if the type Ty is big enough to 
+  /// represent the value V. This can be used to avoid having the get method 
+  /// assert when V is larger than Ty can represent. Note that there are two
+  /// versions of this method, one for unsigned and one for signed integers.
+  /// Although ConstantInt canonicalizes everything to an unsigned integer, 
+  /// the signed version avoids callers having to convert a signed quantity
+  /// to the appropriate unsigned type before calling the method.
+  /// @returns true if V is a valid value for type Ty
+  /// @brief Determine if the value is in range for the given type.
+  static bool isValueValidForType(const Type *Ty, uint64_t V);
+  static bool isValueValidForType(const Type *Ty, int64_t V);
+
+  /// This function will return true iff this constant represents the "null"
+  /// value that would be returned by the getNullValue method.
+  /// @returns true if this is the null integer value.
+  /// @brief Determine if the value is null.
+  virtual bool isNullValue() const { 
+    return Val == 0; 
+  }
+
+  /// This is just a convenience method to make client code smaller for a
+  /// common code. It also correctly performs the comparison without the
+  /// potential for an assertion from getZExtValue().
+  bool isZero() const {
+    return Val == 0;
+  }
+
+  /// This is just a convenience method to make client code smaller for a 
+  /// common case. It also correctly performs the comparison without the
+  /// potential for an assertion from getZExtValue().
+  /// @brief Determine if the value is one.
+  bool isOne() const {
+    return Val == 1;
+  }
+
+  /// This function will return true iff every bit in this constant is set
+  /// to true.
+  /// @returns true iff this constant's bits are all set to true.
+  /// @brief Determine if the value is all ones.
+  bool isAllOnesValue() const { 
+    return Val.isAllOnesValue();
+  }
+
+  /// This function will return true iff this constant represents the largest
+  /// value that may be represented by the constant's type.
+  /// @returns true iff this is the largest value that may be represented 
+  /// by this type.
+  /// @brief Determine if the value is maximal.
+  bool isMaxValue(bool isSigned) const {
+    if (isSigned) 
+      return Val.isMaxSignedValue();
+    else
+      return Val.isMaxValue();
+  }
+
+  /// This function will return true iff this constant represents the smallest
+  /// value that may be represented by this constant's type.
+  /// @returns true if this is the smallest value that may be represented by 
+  /// this type.
+  /// @brief Determine if the value is minimal.
+  bool isMinValue(bool isSigned) const {
+    if (isSigned) 
+      return Val.isMinSignedValue();
+    else
+      return Val.isMinValue();
+  }
+
+  /// This function will return true iff this constant represents a value with
+  /// active bits bigger than 64 bits or a value greater than the given uint64_t
+  /// value.
+  /// @returns true iff this constant is greater or equal to the given number.
+  /// @brief Determine if the value is greater or equal to the given number.
+  bool uge(uint64_t Num) {
+    return Val.getActiveBits() > 64 || Val.getZExtValue() >= Num;
+  }
+
+  /// getLimitedValue - If the value is smaller than the specified limit,
+  /// return it, otherwise return the limit value.  This causes the value
+  /// to saturate to the limit.
+  /// @returns the min of the value of the constant and the specified value
+  /// @brief Get the constant's value with a saturation limit
+  uint64_t getLimitedValue(uint64_t Limit = ~0ULL) const {
+    return Val.getLimitedValue(Limit);
+  }
+
+  /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
+  static inline bool classof(const ConstantInt *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantIntVal;
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+/// ConstantFP - Floating Point Values [float, double]
+///
+class ConstantFP : public Constant {
+  APFloat Val;
+  void *operator new(size_t, unsigned);// DO NOT IMPLEMENT
+  ConstantFP(const ConstantFP &);      // DO NOT IMPLEMENT
+  friend class LLVMContextImpl;
+protected:
+  ConstantFP(const Type *Ty, const APFloat& V);
+protected:
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+public:
+  /// Floating point negation must be implemented with f(x) = -0.0 - x. This
+  /// method returns the negative zero constant for floating point or vector
+  /// floating point types; for all other types, it returns the null value.
+  static Constant *getZeroValueForNegation(const Type *Ty);
+  
+  /// get() - This returns a ConstantFP, or a vector containing a splat of a
+  /// ConstantFP, for the specified value in the specified type.  This should
+  /// only be used for simple constant values like 2.0/1.0 etc, that are
+  /// known-valid both as host double and as the target format.
+  static Constant *get(const Type* Ty, double V);
+  static Constant *get(const Type* Ty, StringRef Str);
+  static ConstantFP *get(LLVMContext &Context, const APFloat &V);
+  static ConstantFP *getNegativeZero(const Type* Ty);
+  static ConstantFP *getInfinity(const Type *Ty, bool Negative = false);
+  
+  /// isValueValidForType - return true if Ty is big enough to represent V.
+  static bool isValueValidForType(const Type *Ty, const APFloat &V);
+  inline const APFloat& getValueAPF() const { return Val; }
+
+  /// isNullValue - Return true if this is the value that would be returned by
+  /// getNullValue.  For ConstantFP, this is +0.0, but not -0.0.  To handle the
+  /// two the same, use isZero().
+  virtual bool isNullValue() const;
+  
+  /// isNegativeZeroValue - Return true if the value is what would be returned 
+  /// by getZeroValueForNegation.
+  virtual bool isNegativeZeroValue() const {
+    return Val.isZero() && Val.isNegative();
+  }
+
+  /// isZero - Return true if the value is positive or negative zero.
+  bool isZero() const { return Val.isZero(); }
+
+  /// isNaN - Return true if the value is a NaN.
+  bool isNaN() const { return Val.isNaN(); }
+
+  /// isExactlyValue - We don't rely on operator== working on double values, as
+  /// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+  /// As such, this method can be used to do an exact bit-for-bit comparison of
+  /// two floating point values.  The version with a double operand is retained
+  /// because it's so convenient to write isExactlyValue(2.0), but please use
+  /// it only for simple constants.
+  bool isExactlyValue(const APFloat &V) const;
+
+  bool isExactlyValue(double V) const {
+    bool ignored;
+    // convert is not supported on this type
+    if (&Val.getSemantics() == &APFloat::PPCDoubleDouble)
+      return false;
+    APFloat FV(V);
+    FV.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &ignored);
+    return isExactlyValue(FV);
+  }
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const ConstantFP *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantFPVal;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// ConstantAggregateZero - All zero aggregate value
+///
+class ConstantAggregateZero : public Constant {
+  friend struct ConstantCreator<ConstantAggregateZero, Type, char>;
+  void *operator new(size_t, unsigned);                      // DO NOT IMPLEMENT
+  ConstantAggregateZero(const ConstantAggregateZero &);      // DO NOT IMPLEMENT
+protected:
+  explicit ConstantAggregateZero(const Type *ty)
+    : Constant(ty, ConstantAggregateZeroVal, 0, 0) {}
+protected:
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+public:
+  static ConstantAggregateZero* get(const Type *Ty);
+  
+  /// isNullValue - Return true if this is the value that would be returned by
+  /// getNullValue.
+  virtual bool isNullValue() const { return true; }
+
+  virtual void destroyConstant();
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  ///
+  static bool classof(const ConstantAggregateZero *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantAggregateZeroVal;
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+/// ConstantArray - Constant Array Declarations
+///
+class ConstantArray : public Constant {
+  friend struct ConstantCreator<ConstantArray, ArrayType,
+                                    std::vector<Constant*> >;
+  ConstantArray(const ConstantArray &);      // DO NOT IMPLEMENT
+protected:
+  ConstantArray(const ArrayType *T, const std::vector<Constant*> &Val);
+public:
+  // ConstantArray accessors
+  static Constant *get(const ArrayType *T, const std::vector<Constant*> &V);
+  static Constant *get(const ArrayType *T, Constant *const *Vals, 
+                       unsigned NumVals);
+                             
+  /// This method constructs a ConstantArray and initializes it with a text
+  /// string. The default behavior (AddNull==true) causes a null terminator to
+  /// be placed at the end of the array. This effectively increases the length
+  /// of the array by one (you've been warned).  However, in some situations 
+  /// this is not desired so if AddNull==false then the string is copied without
+  /// null termination.
+  static Constant *get(LLVMContext &Context, StringRef Initializer,
+                       bool AddNull = true);
+  
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
+
+  /// getType - Specialize the getType() method to always return an ArrayType,
+  /// which reduces the amount of casting needed in parts of the compiler.
+  ///
+  inline const ArrayType *getType() const {
+    return reinterpret_cast<const ArrayType*>(Value::getType());
+  }
+
+  /// isString - This method returns true if the array is an array of i8 and
+  /// the elements of the array are all ConstantInt's.
+  bool isString() const;
+
+  /// isCString - This method returns true if the array is a string (see
+  /// @verbatim
+  /// isString) and it ends in a null byte \0 and does not contains any other
+  /// @endverbatim
+  /// null bytes except its terminator.
+  bool isCString() const;
+
+  /// getAsString - If this array is isString(), then this method converts the
+  /// array to an std::string and returns it.  Otherwise, it asserts out.
+  ///
+  std::string getAsString() const;
+
+  /// isNullValue - Return true if this is the value that would be returned by
+  /// getNullValue.  This always returns false because zero arrays are always
+  /// created as ConstantAggregateZero objects.
+  virtual bool isNullValue() const { return false; }
+
+  virtual void destroyConstant();
+  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const ConstantArray *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantArrayVal;
+  }
+};
+
+template <>
+struct OperandTraits<ConstantArray> :
+  public VariadicOperandTraits<ConstantArray> {
+};
+
+DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantArray, Constant)
+
+//===----------------------------------------------------------------------===//
+// ConstantStruct - Constant Struct Declarations
+//
+class ConstantStruct : public Constant {
+  friend struct ConstantCreator<ConstantStruct, StructType,
+                                    std::vector<Constant*> >;
+  ConstantStruct(const ConstantStruct &);      // DO NOT IMPLEMENT
+protected:
+  ConstantStruct(const StructType *T, const std::vector<Constant*> &Val);
+public:
+  // ConstantStruct accessors
+  static Constant *get(const StructType *T, const std::vector<Constant*> &V);
+  static Constant *get(LLVMContext &Context, 
+                       const std::vector<Constant*> &V, bool Packed);
+  static Constant *get(LLVMContext &Context,
+                       Constant *const *Vals, unsigned NumVals, bool Packed);
+  static Constant *get(LLVMContext &Context, bool Packed,
+                       Constant * Val, ...) END_WITH_NULL;
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
+
+  /// getType() specialization - Reduce amount of casting...
+  ///
+  inline const StructType *getType() const {
+    return reinterpret_cast<const StructType*>(Value::getType());
+  }
+
+  /// isNullValue - Return true if this is the value that would be returned by
+  /// getNullValue.  This always returns false because zero structs are always
+  /// created as ConstantAggregateZero objects.
+  virtual bool isNullValue() const {
+    return false;
+  }
+
+  virtual void destroyConstant();
+  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const ConstantStruct *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantStructVal;
+  }
+};
+
+template <>
+struct OperandTraits<ConstantStruct> :
+  public VariadicOperandTraits<ConstantStruct> {
+};
+
+DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantStruct, Constant)
+
+
+//===----------------------------------------------------------------------===//
+/// ConstantVector - Constant Vector Declarations
+///
+class ConstantVector : public Constant {
+  friend struct ConstantCreator<ConstantVector, VectorType,
+                                    std::vector<Constant*> >;
+  ConstantVector(const ConstantVector &);      // DO NOT IMPLEMENT
+protected:
+  ConstantVector(const VectorType *T, const std::vector<Constant*> &Val);
+public:
+  // ConstantVector accessors
+  static Constant *get(ArrayRef<Constant*> V);
+  // FIXME: Eliminate this constructor form.
+  static Constant *get(const VectorType *T, const std::vector<Constant*> &V);
+  
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
+
+  /// getType - Specialize the getType() method to always return a VectorType,
+  /// which reduces the amount of casting needed in parts of the compiler.
+  ///
+  inline const VectorType *getType() const {
+    return reinterpret_cast<const VectorType*>(Value::getType());
+  }
+  
+  /// isNullValue - Return true if this is the value that would be returned by
+  /// getNullValue.  This always returns false because zero vectors are always
+  /// created as ConstantAggregateZero objects.
+  virtual bool isNullValue() const { return false; }
+
+  /// This function will return true iff every element in this vector constant
+  /// is set to all ones.
+  /// @returns true iff this constant's emements are all set to all ones.
+  /// @brief Determine if the value is all ones.
+  bool isAllOnesValue() const;
+
+  /// getSplatValue - If this is a splat constant, meaning that all of the
+  /// elements have the same value, return that value. Otherwise return NULL.
+  Constant *getSplatValue() const;
+
+  virtual void destroyConstant();
+  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const ConstantVector *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantVectorVal;
+  }
+};
+
+template <>
+struct OperandTraits<ConstantVector> :
+  public VariadicOperandTraits<ConstantVector> {
+};
+
+DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantVector, Constant)
+
+//===----------------------------------------------------------------------===//
+/// ConstantPointerNull - a constant pointer value that points to null
+///
+class ConstantPointerNull : public Constant {
+  friend struct ConstantCreator<ConstantPointerNull, PointerType, char>;
+  void *operator new(size_t, unsigned);                  // DO NOT IMPLEMENT
+  ConstantPointerNull(const ConstantPointerNull &);      // DO NOT IMPLEMENT
+protected:
+  explicit ConstantPointerNull(const PointerType *T)
+    : Constant(reinterpret_cast<const Type*>(T),
+               Value::ConstantPointerNullVal, 0, 0) {}
+
+protected:
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+public:
+  /// get() - Static factory methods - Return objects of the specified value
+  static ConstantPointerNull *get(const PointerType *T);
+
+  /// isNullValue - Return true if this is the value that would be returned by
+  /// getNullValue.
+  virtual bool isNullValue() const { return true; }
+
+  virtual void destroyConstant();
+
+  /// getType - Specialize the getType() method to always return an PointerType,
+  /// which reduces the amount of casting needed in parts of the compiler.
+  ///
+  inline const PointerType *getType() const {
+    return reinterpret_cast<const PointerType*>(Value::getType());
+  }
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const ConstantPointerNull *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == ConstantPointerNullVal;
+  }
+};
+
+/// BlockAddress - The address of a basic block.
+///
+class BlockAddress : public Constant {
+  void *operator new(size_t, unsigned);                  // DO NOT IMPLEMENT
+  void *operator new(size_t s) { return User::operator new(s, 2); }
+  BlockAddress(Function *F, BasicBlock *BB);
+public:
+  /// get - Return a BlockAddress for the specified function and basic block.
+  static BlockAddress *get(Function *F, BasicBlock *BB);
+  
+  /// get - Return a BlockAddress for the specified basic block.  The basic
+  /// block must be embedded into a function.
+  static BlockAddress *get(BasicBlock *BB);
+  
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+  
+  Function *getFunction() const { return (Function*)Op<0>().get(); }
+  BasicBlock *getBasicBlock() const { return (BasicBlock*)Op<1>().get(); }
+  
+  /// isNullValue - Return true if this is the value that would be returned by
+  /// getNullValue.
+  virtual bool isNullValue() const { return false; }
+  
+  virtual void destroyConstant();
+  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
+  
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const BlockAddress *) { return true; }
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == BlockAddressVal;
+  }
+};
+
+template <>
+struct OperandTraits<BlockAddress> :
+  public FixedNumOperandTraits<BlockAddress, 2> {
+};
+
+DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(BlockAddress, Value)
+  
+
+//===----------------------------------------------------------------------===//
+/// ConstantExpr - a constant value that is initialized with an expression using
+/// other constant values.
+///
+/// This class uses the standard Instruction opcodes to define the various
+/// constant expressions.  The Opcode field for the ConstantExpr class is
+/// maintained in the Value::SubclassData field.
+class ConstantExpr : public Constant {
+  friend struct ConstantCreator<ConstantExpr,Type,
+                            std::pair<unsigned, std::vector<Constant*> > >;
+  friend struct ConvertConstantType<ConstantExpr, Type>;
+
+protected:
+  ConstantExpr(const Type *ty, unsigned Opcode, Use *Ops, unsigned NumOps)
+    : Constant(ty, ConstantExprVal, Ops, NumOps) {
+    // Operation type (an Instruction opcode) is stored as the SubclassData.
+    setValueSubclassData(Opcode);
+  }
+
+  // These private methods are used by the type resolution code to create
+  // ConstantExprs in intermediate forms.
+  static Constant *getTy(const Type *Ty, unsigned Opcode,
+                         Constant *C1, Constant *C2,
+                         unsigned Flags = 0);
+  static Constant *getCompareTy(unsigned short pred, Constant *C1,
+                                Constant *C2);
+  static Constant *getSelectTy(const Type *Ty,
+                               Constant *C1, Constant *C2, Constant *C3);
+  template<typename IndexTy>
+  static Constant *getGetElementPtrTy(const Type *Ty, Constant *C,
+                                      IndexTy const *Idxs, unsigned NumIdxs,
+                                      bool InBounds);
+  static Constant *getExtractElementTy(const Type *Ty, Constant *Val,
+                                       Constant *Idx);
+  static Constant *getInsertElementTy(const Type *Ty, Constant *Val,
+                                      Constant *Elt, Constant *Idx);
+  static Constant *getShuffleVectorTy(const Type *Ty, Constant *V1,
+                                      Constant *V2, Constant *Mask);
+  static Constant *getExtractValueTy(const Type *Ty, Constant *Agg,
+                                     const unsigned *Idxs, unsigned NumIdxs);
+  static Constant *getInsertValueTy(const Type *Ty, Constant *Agg,
+                                    Constant *Val,
+                                    const unsigned *Idxs, unsigned NumIdxs);
+  template<typename IndexTy>
+  static Constant *getGetElementPtrImpl(Constant *C,
+                                        IndexTy const *IdxList,
+                                        unsigned NumIdx, bool InBounds);
+
+public:
+  // Static methods to construct a ConstantExpr of different kinds.  Note that
+  // these methods may return a object that is not an instance of the
+  // ConstantExpr class, because they will attempt to fold the constant
+  // expression into something simpler if possible.
+
+  /// getAlignOf constant expr - computes the alignment of a type in a target
+  /// independent way (Note: the return type is an i64).
+  static Constant *getAlignOf(const Type *Ty);
+  
+  /// getSizeOf constant expr - computes the (alloc) size of a type (in
+  /// address-units, not bits) in a target independent way (Note: the return
+  /// type is an i64).
+  ///
+  static Constant *getSizeOf(const Type *Ty);
+
+  /// getOffsetOf constant expr - computes the offset of a struct field in a 
+  /// target independent way (Note: the return type is an i64).
+  ///
+  static Constant *getOffsetOf(const StructType *STy, unsigned FieldNo);
+
+  /// getOffsetOf constant expr - This is a generalized form of getOffsetOf,
+  /// which supports any aggregate type, and any Constant index.
+  ///
+  static Constant *getOffsetOf(const Type *Ty, Constant *FieldNo);
+  
+  static Constant *getNeg(Constant *C, bool HasNUW = false, bool HasNSW =false);
+  static Constant *getFNeg(Constant *C);
+  static Constant *getNot(Constant *C);
+  static Constant *getAdd(Constant *C1, Constant *C2,
+                          bool HasNUW = false, bool HasNSW = false);
+  static Constant *getFAdd(Constant *C1, Constant *C2);
+  static Constant *getSub(Constant *C1, Constant *C2,
+                          bool HasNUW = false, bool HasNSW = false);
+  static Constant *getFSub(Constant *C1, Constant *C2);
+  static Constant *getMul(Constant *C1, Constant *C2,
+                          bool HasNUW = false, bool HasNSW = false);
+  static Constant *getFMul(Constant *C1, Constant *C2);
+  static Constant *getUDiv(Constant *C1, Constant *C2, bool isExact = false);
+  static Constant *getSDiv(Constant *C1, Constant *C2, bool isExact = false);
+  static Constant *getFDiv(Constant *C1, Constant *C2);
+  static Constant *getURem(Constant *C1, Constant *C2);
+  static Constant *getSRem(Constant *C1, Constant *C2);
+  static Constant *getFRem(Constant *C1, Constant *C2);
+  static Constant *getAnd(Constant *C1, Constant *C2);
+  static Constant *getOr(Constant *C1, Constant *C2);
+  static Constant *getXor(Constant *C1, Constant *C2);
+  static Constant *getShl(Constant *C1, Constant *C2,
+                          bool HasNUW = false, bool HasNSW = false);
+  static Constant *getLShr(Constant *C1, Constant *C2, bool isExact = false);
+  static Constant *getAShr(Constant *C1, Constant *C2, bool isExact = false);
+  static Constant *getTrunc   (Constant *C, const Type *Ty);
+  static Constant *getSExt    (Constant *C, const Type *Ty);
+  static Constant *getZExt    (Constant *C, const Type *Ty);
+  static Constant *getFPTrunc (Constant *C, const Type *Ty);
+  static Constant *getFPExtend(Constant *C, const Type *Ty);
+  static Constant *getUIToFP  (Constant *C, const Type *Ty);
+  static Constant *getSIToFP  (Constant *C, const Type *Ty);
+  static Constant *getFPToUI  (Constant *C, const Type *Ty);
+  static Constant *getFPToSI  (Constant *C, const Type *Ty);
+  static Constant *getPtrToInt(Constant *C, const Type *Ty);
+  static Constant *getIntToPtr(Constant *C, const Type *Ty);
+  static Constant *getBitCast (Constant *C, const Type *Ty);
+
+  static Constant *getNSWNeg(Constant *C) { return getNeg(C, false, true); }
+  static Constant *getNUWNeg(Constant *C) { return getNeg(C, true, false); }
+  static Constant *getNSWAdd(Constant *C1, Constant *C2) {
+    return getAdd(C1, C2, false, true);
+  }
+  static Constant *getNUWAdd(Constant *C1, Constant *C2) {
+    return getAdd(C1, C2, true, false);
+  }
+  static Constant *getNSWSub(Constant *C1, Constant *C2) {
+    return getSub(C1, C2, false, true);
+  }
+  static Constant *getNUWSub(Constant *C1, Constant *C2) {
+    return getSub(C1, C2, true, false);
+  }
+  static Constant *getNSWMul(Constant *C1, Constant *C2) {
+    return getMul(C1, C2, false, true);
+  }
+  static Constant *getNUWMul(Constant *C1, Constant *C2) {
+    return getMul(C1, C2, true, false);
+  }
+  static Constant *getNSWShl(Constant *C1, Constant *C2) {
+    return getShl(C1, C2, false, true);
+  }
+  static Constant *getNUWShl(Constant *C1, Constant *C2) {
+    return getShl(C1, C2, true, false);
+  }
+  static Constant *getExactSDiv(Constant *C1, Constant *C2) {
+    return getSDiv(C1, C2, true);
+  }
+  static Constant *getExactUDiv(Constant *C1, Constant *C2) {
+    return getUDiv(C1, C2, true);
+  }
+  static Constant *getExactAShr(Constant *C1, Constant *C2) {
+    return getAShr(C1, C2, true);
+  }
+  static Constant *getExactLShr(Constant *C1, Constant *C2) {
+    return getLShr(C1, C2, true);
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
+
+  // @brief Convenience function for getting one of the casting operations
+  // using a CastOps opcode.
+  static Constant *getCast(
+    unsigned ops,  ///< The opcode for the conversion
+    Constant *C,   ///< The constant to be converted
+    const Type *Ty ///< The type to which the constant is converted
+  );
+
+  // @brief Create a ZExt or BitCast cast constant expression
+  static Constant *getZExtOrBitCast(
+    Constant *C,   ///< The constant to zext or bitcast
+    const Type *Ty ///< The type to zext or bitcast C to
+  );
+
+  // @brief Create a SExt or BitCast cast constant expression 
+  static Constant *getSExtOrBitCast(
+    Constant *C,   ///< The constant to sext or bitcast
+    const Type *Ty ///< The type to sext or bitcast C to
+  );
+
+  // @brief Create a Trunc or BitCast cast constant expression
+  static Constant *getTruncOrBitCast(
+    Constant *C,   ///< The constant to trunc or bitcast
+    const Type *Ty ///< The type to trunc or bitcast C to
+  );
+
+  /// @brief Create a BitCast or a PtrToInt cast constant expression
+  static Constant *getPointerCast(
+    Constant *C,   ///< The pointer value to be casted (operand 0)
+    const Type *Ty ///< The type to which cast should be made
+  );
+
+  /// @brief Create a ZExt, Bitcast or Trunc for integer -> integer casts
+  static Constant *getIntegerCast(
+    Constant *C,    ///< The integer constant to be casted 
+    const Type *Ty, ///< The integer type to cast to
+    bool isSigned   ///< Whether C should be treated as signed or not
+  );
+
+  /// @brief Create a FPExt, Bitcast or FPTrunc for fp -> fp casts
+  static Constant *getFPCast(
+    Constant *C,    ///< The integer constant to be casted 
+    const Type *Ty ///< The integer type to cast to
+  );
+
+  /// @brief Return true if this is a convert constant expression
+  bool isCast() const;
+
+  /// @brief Return true if this is a compare constant expression
+  bool isCompare() const;
+
+  /// @brief Return true if this is an insertvalue or extractvalue expression,
+  /// and the getIndices() method may be used.
+  bool hasIndices() const;
+
+  /// @brief Return true if this is a getelementptr expression and all
+  /// the index operands are compile-time known integers within the
+  /// corresponding notional static array extents. Note that this is
+  /// not equivalant to, a subset of, or a superset of the "inbounds"
+  /// property.
+  bool isGEPWithNoNotionalOverIndexing() const;
+
+  /// Select constant expr
+  ///
+  static Constant *getSelect(Constant *C, Constant *V1, Constant *V2) {
+    return getSelectTy(V1->getType(), C, V1, V2);
+  }
+
+  /// get - Return a binary or shift operator constant expression,
+  /// folding if possible.
+  ///
+  static Constant *get(unsigned Opcode, Constant *C1, Constant *C2,
+                       unsigned Flags = 0);
+
+  /// @brief Return an ICmp or FCmp comparison operator constant expression.
+  static Constant *getCompare(unsigned short pred, Constant *C1, Constant *C2);
+
+  /// get* - Return some common constants without having to
+  /// specify the full Instruction::OPCODE identifier.
+  ///
+  static Constant *getICmp(unsigned short pred, Constant *LHS, Constant *RHS);
+  static Constant *getFCmp(unsigned short pred, Constant *LHS, Constant *RHS);
+
+  /// Getelementptr form.  std::vector<Value*> is only accepted for convenience:
+  /// all elements must be Constant's.
+  ///
+  static Constant *getGetElementPtr(Constant *C,
+                                    Constant *const *IdxList, unsigned NumIdx,
+                                    bool InBounds = false);
+  static Constant *getGetElementPtr(Constant *C,
+                                    Value *const *IdxList, unsigned NumIdx,
+                                    bool InBounds = false);
+
+  /// Create an "inbounds" getelementptr. See the documentation for the
+  /// "inbounds" flag in LangRef.html for details.
+  static Constant *getInBoundsGetElementPtr(Constant *C,
+                                            Constant *const *IdxList,
+                                            unsigned NumIdx) {
+    return getGetElementPtr(C, IdxList, NumIdx, true);
+  }
+  static Constant *getInBoundsGetElementPtr(Constant *C,
+                                            Value* const *IdxList,
+                                            unsigned NumIdx) {
+    return getGetElementPtr(C, IdxList, NumIdx, true);
+  }
+
+  static Constant *getExtractElement(Constant *Vec, Constant *Idx);
+  static Constant *getInsertElement(Constant *Vec, Constant *Elt,Constant *Idx);
+  static Constant *getShuffleVector(Constant *V1, Constant *V2, Constant *Mask);
+  static Constant *getExtractValue(Constant *Agg,
+                                   const unsigned *IdxList, unsigned NumIdx);
+  static Constant *getInsertValue(Constant *Agg, Constant *Val,
+                                  const unsigned *IdxList, unsigned NumIdx);
+
+  /// isNullValue - Return true if this is the value that would be returned by
+  /// getNullValue.
+  virtual bool isNullValue() const { return false; }
+
+  /// getOpcode - Return the opcode at the root of this constant expression
+  unsigned getOpcode() const { return getSubclassDataFromValue(); }
+
+  /// getPredicate - Return the ICMP or FCMP predicate value. Assert if this is
+  /// not an ICMP or FCMP constant expression.
+  unsigned getPredicate() const;
+
+  /// getIndices - Assert that this is an insertvalue or exactvalue
+  /// expression and return the list of indices.
+  const SmallVector<unsigned, 4> &getIndices() const;
+
+  /// getOpcodeName - Return a string representation for an opcode.
+  const char *getOpcodeName() const;
+
+  /// getWithOperandReplaced - Return a constant expression identical to this
+  /// one, but with the specified operand set to the specified value.
+  Constant *getWithOperandReplaced(unsigned OpNo, Constant *Op) const;
+  
+  /// getWithOperands - This returns the current constant expression with the
+  /// operands replaced with the specified values.  The specified operands must
+  /// match count and type with the existing ones.
+  Constant *getWithOperands(const std::vector<Constant*> &Ops) const {
+    return getWithOperands(&Ops[0], (unsigned)Ops.size());
+  }
+  Constant *getWithOperands(Constant *const *Ops, unsigned NumOps) const;
+  
+  virtual void destroyConstant();
+  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const ConstantExpr *) { return true; }
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == ConstantExprVal;
+  }
+  
+private:
+  // Shadow Value::setValueSubclassData with a private forwarding method so that
+  // subclasses cannot accidentally use it.
+  void setValueSubclassData(unsigned short D) {
+    Value::setValueSubclassData(D);
+  }
+};
+
+template <>
+struct OperandTraits<ConstantExpr> :
+  public VariadicOperandTraits<ConstantExpr, 1> {
+};
+
+DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(ConstantExpr, Constant)
+
+//===----------------------------------------------------------------------===//
+/// UndefValue - 'undef' values are things that do not have specified contents.
+/// These are used for a variety of purposes, including global variable
+/// initializers and operands to instructions.  'undef' values can occur with
+/// any first-class type.
+///
+/// Undef values aren't exactly constants; if they have multiple uses, they
+/// can appear to have different bit patterns at each use. See
+/// LangRef.html#undefvalues for details.
+///
+class UndefValue : public Constant {
+  friend struct ConstantCreator<UndefValue, Type, char>;
+  void *operator new(size_t, unsigned); // DO NOT IMPLEMENT
+  UndefValue(const UndefValue &);      // DO NOT IMPLEMENT
+protected:
+  explicit UndefValue(const Type *T) : Constant(T, UndefValueVal, 0, 0) {}
+protected:
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+public:
+  /// get() - Static factory methods - Return an 'undef' object of the specified
+  /// type.
+  ///
+  static UndefValue *get(const Type *T);
+
+  /// isNullValue - Return true if this is the value that would be returned by
+  /// getNullValue.
+  virtual bool isNullValue() const { return false; }
+
+  virtual void destroyConstant();
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const UndefValue *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == UndefValueVal;
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/DerivedTypes.h b/final/include/llvm/DerivedTypes.h
new file mode 100644
index 00000000000..56d1e3e237d
--- /dev/null
+++ b/final/include/llvm/DerivedTypes.h
@@ -0,0 +1,513 @@
+//===-- llvm/DerivedTypes.h - Classes for handling data types ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of classes that represent "derived
+// types".  These are things like "arrays of x" or "structure of x, y, z" or
+// "function returning x taking (y,z) as parameters", etc...
+//
+// The implementations of these classes live in the Type.cpp file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DERIVED_TYPES_H
+#define LLVM_DERIVED_TYPES_H
+
+#include "llvm/Type.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class Value;
+template<class ValType, class TypeClass> class TypeMap;
+class FunctionValType;
+class ArrayValType;
+class StructValType;
+class PointerValType;
+class VectorValType;
+class IntegerValType;
+class APInt;
+class LLVMContext;
+
+class DerivedType : public Type {
+  friend class Type;
+
+protected:
+  explicit DerivedType(LLVMContext &C, TypeID id) : Type(C, id) {}
+
+  /// notifyUsesThatTypeBecameConcrete - Notify AbstractTypeUsers of this type
+  /// that the current type has transitioned from being abstract to being
+  /// concrete.
+  ///
+  void notifyUsesThatTypeBecameConcrete();
+
+  /// dropAllTypeUses - When this (abstract) type is resolved to be equal to
+  /// another (more concrete) type, we must eliminate all references to other
+  /// types, to avoid some circular reference problems.
+  ///
+  void dropAllTypeUses();
+
+public:
+
+  //===--------------------------------------------------------------------===//
+  // Abstract Type handling methods - These types have special lifetimes, which
+  // are managed by (add|remove)AbstractTypeUser. See comments in
+  // AbstractTypeUser.h for more information.
+
+  /// refineAbstractTypeTo - This function is used to when it is discovered that
+  /// the 'this' abstract type is actually equivalent to the NewType specified.
+  /// This causes all users of 'this' to switch to reference the more concrete
+  /// type NewType and for 'this' to be deleted.
+  ///
+  void refineAbstractTypeTo(const Type *NewType);
+
+  void dump() const { Type::dump(); }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const DerivedType *) { return true; }
+  static inline bool classof(const Type *T) {
+    return T->isDerivedType();
+  }
+};
+
+/// Class to represent integer types. Note that this class is also used to
+/// represent the built-in integer types: Int1Ty, Int8Ty, Int16Ty, Int32Ty and
+/// Int64Ty.
+/// @brief Integer representation type
+class IntegerType : public DerivedType {
+  friend class LLVMContextImpl;
+  
+protected:
+  explicit IntegerType(LLVMContext &C, unsigned NumBits) : 
+      DerivedType(C, IntegerTyID) {
+    setSubclassData(NumBits);
+  }
+  friend class TypeMap<IntegerValType, IntegerType>;
+public:
+  /// This enum is just used to hold constants we need for IntegerType.
+  enum {
+    MIN_INT_BITS = 1,        ///< Minimum number of bits that can be specified
+    MAX_INT_BITS = (1<<23)-1 ///< Maximum number of bits that can be specified
+      ///< Note that bit width is stored in the Type classes SubclassData field
+      ///< which has 23 bits. This yields a maximum bit width of 8,388,607 bits.
+  };
+
+  /// This static method is the primary way of constructing an IntegerType.
+  /// If an IntegerType with the same NumBits value was previously instantiated,
+  /// that instance will be returned. Otherwise a new one will be created. Only
+  /// one instance with a given NumBits value is ever created.
+  /// @brief Get or create an IntegerType instance.
+  static const IntegerType* get(LLVMContext &C, unsigned NumBits);
+
+  /// @brief Get the number of bits in this IntegerType
+  unsigned getBitWidth() const { return getSubclassData(); }
+
+  /// getBitMask - Return a bitmask with ones set for all of the bits
+  /// that can be set by an unsigned version of this type.  This is 0xFF for
+  /// i8, 0xFFFF for i16, etc.
+  uint64_t getBitMask() const {
+    return ~uint64_t(0UL) >> (64-getBitWidth());
+  }
+
+  /// getSignBit - Return a uint64_t with just the most significant bit set (the
+  /// sign bit, if the value is treated as a signed number).
+  uint64_t getSignBit() const {
+    return 1ULL << (getBitWidth()-1);
+  }
+
+  /// For example, this is 0xFF for an 8 bit integer, 0xFFFF for i16, etc.
+  /// @returns a bit mask with ones set for all the bits of this type.
+  /// @brief Get a bit mask for this type.
+  APInt getMask() const;
+
+  /// This method determines if the width of this IntegerType is a power-of-2
+  /// in terms of 8 bit bytes.
+  /// @returns true if this is a power-of-2 byte width.
+  /// @brief Is this a power-of-2 byte-width IntegerType ?
+  bool isPowerOf2ByteWidth() const;
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const IntegerType *) { return true; }
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == IntegerTyID;
+  }
+};
+
+
+/// FunctionType - Class to represent function types
+///
+class FunctionType : public DerivedType {
+  friend class TypeMap<FunctionValType, FunctionType>;
+  bool isVarArgs;
+
+  FunctionType(const FunctionType &);                   // Do not implement
+  const FunctionType &operator=(const FunctionType &);  // Do not implement
+  FunctionType(const Type *Result, const std::vector<const Type*> &Params,
+               bool IsVarArgs);
+
+public:
+  /// FunctionType::get - This static method is the primary way of constructing
+  /// a FunctionType.
+  ///
+  static FunctionType *get(
+    const Type *Result, ///< The result type
+    const std::vector<const Type*> &Params, ///< The types of the parameters
+    bool isVarArg  ///< Whether this is a variable argument length function
+  );
+
+  /// FunctionType::get - Create a FunctionType taking no parameters.
+  ///
+  static FunctionType *get(
+    const Type *Result, ///< The result type
+    bool isVarArg  ///< Whether this is a variable argument length function
+  ) {
+    return get(Result, std::vector<const Type *>(), isVarArg);
+  }
+
+  /// isValidReturnType - Return true if the specified type is valid as a return
+  /// type.
+  static bool isValidReturnType(const Type *RetTy);
+
+  /// isValidArgumentType - Return true if the specified type is valid as an
+  /// argument type.
+  static bool isValidArgumentType(const Type *ArgTy);
+
+  inline bool isVarArg() const { return isVarArgs; }
+  inline const Type *getReturnType() const { return ContainedTys[0]; }
+
+  typedef Type::subtype_iterator param_iterator;
+  param_iterator param_begin() const { return ContainedTys + 1; }
+  param_iterator param_end() const { return &ContainedTys[NumContainedTys]; }
+
+  // Parameter type accessors...
+  const Type *getParamType(unsigned i) const { return ContainedTys[i+1]; }
+
+  /// getNumParams - Return the number of fixed parameters this function type
+  /// requires.  This does not consider varargs.
+  ///
+  unsigned getNumParams() const { return NumContainedTys - 1; }
+
+  // Implement the AbstractTypeUser interface.
+  virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
+  virtual void typeBecameConcrete(const DerivedType *AbsTy);
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const FunctionType *) { return true; }
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == FunctionTyID;
+  }
+};
+
+
+/// CompositeType - Common super class of ArrayType, StructType, PointerType
+/// and VectorType
+class CompositeType : public DerivedType {
+protected:
+  inline explicit CompositeType(LLVMContext &C, TypeID id) :
+    DerivedType(C, id) { }
+public:
+
+  /// getTypeAtIndex - Given an index value into the type, return the type of
+  /// the element.
+  ///
+  virtual const Type *getTypeAtIndex(const Value *V) const = 0;
+  virtual const Type *getTypeAtIndex(unsigned Idx) const = 0;
+  virtual bool indexValid(const Value *V) const = 0;
+  virtual bool indexValid(unsigned Idx) const = 0;
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const CompositeType *) { return true; }
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == ArrayTyID ||
+           T->getTypeID() == StructTyID ||
+           T->getTypeID() == PointerTyID ||
+           T->getTypeID() == VectorTyID;
+  }
+};
+
+
+/// StructType - Class to represent struct types
+///
+class StructType : public CompositeType {
+  friend class TypeMap<StructValType, StructType>;
+  StructType(const StructType &);                   // Do not implement
+  const StructType &operator=(const StructType &);  // Do not implement
+  StructType(LLVMContext &C,
+             const std::vector<const Type*> &Types, bool isPacked);
+public:
+  /// StructType::get - This static method is the primary way to create a
+  /// StructType.
+  ///
+  static StructType *get(LLVMContext &Context, 
+                         const std::vector<const Type*> &Params,
+                         bool isPacked=false);
+
+  /// StructType::get - Create an empty structure type.
+  ///
+  static StructType *get(LLVMContext &Context, bool isPacked=false) {
+    return get(Context, std::vector<const Type*>(), isPacked);
+  }
+
+  /// StructType::get - This static method is a convenience method for
+  /// creating structure types by specifying the elements as arguments.
+  /// Note that this method always returns a non-packed struct.  To get
+  /// an empty struct, pass NULL, NULL.
+  static StructType *get(LLVMContext &Context, 
+                         const Type *type, ...) END_WITH_NULL;
+
+  /// isValidElementType - Return true if the specified type is valid as a
+  /// element type.
+  static bool isValidElementType(const Type *ElemTy);
+
+  // Iterator access to the elements
+  typedef Type::subtype_iterator element_iterator;
+  element_iterator element_begin() const { return ContainedTys; }
+  element_iterator element_end() const { return &ContainedTys[NumContainedTys];}
+
+  // Random access to the elements
+  unsigned getNumElements() const { return NumContainedTys; }
+  const Type *getElementType(unsigned N) const {
+    assert(N < NumContainedTys && "Element number out of range!");
+    return ContainedTys[N];
+  }
+
+  /// getTypeAtIndex - Given an index value into the type, return the type of
+  /// the element.  For a structure type, this must be a constant value...
+  ///
+  virtual const Type *getTypeAtIndex(const Value *V) const;
+  virtual const Type *getTypeAtIndex(unsigned Idx) const;
+  virtual bool indexValid(const Value *V) const;
+  virtual bool indexValid(unsigned Idx) const;
+
+  // Implement the AbstractTypeUser interface.
+  virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
+  virtual void typeBecameConcrete(const DerivedType *AbsTy);
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const StructType *) { return true; }
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == StructTyID;
+  }
+
+  bool isPacked() const { return (0 != getSubclassData()) ? true : false; }
+};
+
+/// SequentialType - This is the superclass of the array, pointer and vector
+/// type classes.  All of these represent "arrays" in memory.  The array type
+/// represents a specifically sized array, pointer types are unsized/unknown
+/// size arrays, vector types represent specifically sized arrays that
+/// allow for use of SIMD instructions.  SequentialType holds the common
+/// features of all, which stem from the fact that all three lay their
+/// components out in memory identically.
+///
+class SequentialType : public CompositeType {
+  PATypeHandle ContainedType; ///< Storage for the single contained type
+  SequentialType(const SequentialType &);                  // Do not implement!
+  const SequentialType &operator=(const SequentialType &); // Do not implement!
+
+  // avoiding warning: 'this' : used in base member initializer list
+  SequentialType* this_() { return this; }
+protected:
+  SequentialType(TypeID TID, const Type *ElType)
+    : CompositeType(ElType->getContext(), TID), ContainedType(ElType, this_()) {
+    ContainedTys = &ContainedType;
+    NumContainedTys = 1;
+  }
+
+public:
+  inline const Type *getElementType() const { return ContainedTys[0]; }
+
+  virtual bool indexValid(const Value *V) const;
+  virtual bool indexValid(unsigned) const {
+    return true;
+  }
+
+  /// getTypeAtIndex - Given an index value into the type, return the type of
+  /// the element.  For sequential types, there is only one subtype...
+  ///
+  virtual const Type *getTypeAtIndex(const Value *) const {
+    return ContainedTys[0];
+  }
+  virtual const Type *getTypeAtIndex(unsigned) const {
+    return ContainedTys[0];
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const SequentialType *) { return true; }
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == ArrayTyID ||
+           T->getTypeID() == PointerTyID ||
+           T->getTypeID() == VectorTyID;
+  }
+};
+
+
+/// ArrayType - Class to represent array types
+///
+class ArrayType : public SequentialType {
+  friend class TypeMap<ArrayValType, ArrayType>;
+  uint64_t NumElements;
+
+  ArrayType(const ArrayType &);                   // Do not implement
+  const ArrayType &operator=(const ArrayType &);  // Do not implement
+  ArrayType(const Type *ElType, uint64_t NumEl);
+public:
+  /// ArrayType::get - This static method is the primary way to construct an
+  /// ArrayType
+  ///
+  static ArrayType *get(const Type *ElementType, uint64_t NumElements);
+
+  /// isValidElementType - Return true if the specified type is valid as a
+  /// element type.
+  static bool isValidElementType(const Type *ElemTy);
+
+  inline uint64_t getNumElements() const { return NumElements; }
+
+  // Implement the AbstractTypeUser interface.
+  virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
+  virtual void typeBecameConcrete(const DerivedType *AbsTy);
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const ArrayType *) { return true; }
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == ArrayTyID;
+  }
+};
+
+/// VectorType - Class to represent vector types
+///
+class VectorType : public SequentialType {
+  friend class TypeMap<VectorValType, VectorType>;
+  unsigned NumElements;
+
+  VectorType(const VectorType &);                   // Do not implement
+  const VectorType &operator=(const VectorType &);  // Do not implement
+  VectorType(const Type *ElType, unsigned NumEl);
+public:
+  /// VectorType::get - This static method is the primary way to construct an
+  /// VectorType
+  ///
+  static VectorType *get(const Type *ElementType, unsigned NumElements);
+
+  /// VectorType::getInteger - This static method gets a VectorType with the
+  /// same number of elements as the input type, and the element type is an
+  /// integer type of the same width as the input element type.
+  ///
+  static VectorType *getInteger(const VectorType *VTy) {
+    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
+    const Type *EltTy = IntegerType::get(VTy->getContext(), EltBits);
+    return VectorType::get(EltTy, VTy->getNumElements());
+  }
+
+  /// VectorType::getExtendedElementVectorType - This static method is like
+  /// getInteger except that the element types are twice as wide as the
+  /// elements in the input type.
+  ///
+  static VectorType *getExtendedElementVectorType(const VectorType *VTy) {
+    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
+    const Type *EltTy = IntegerType::get(VTy->getContext(), EltBits * 2);
+    return VectorType::get(EltTy, VTy->getNumElements());
+  }
+
+  /// VectorType::getTruncatedElementVectorType - This static method is like
+  /// getInteger except that the element types are half as wide as the
+  /// elements in the input type.
+  ///
+  static VectorType *getTruncatedElementVectorType(const VectorType *VTy) {
+    unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
+    assert((EltBits & 1) == 0 &&
+           "Cannot truncate vector element with odd bit-width");
+    const Type *EltTy = IntegerType::get(VTy->getContext(), EltBits / 2);
+    return VectorType::get(EltTy, VTy->getNumElements());
+  }
+
+  /// isValidElementType - Return true if the specified type is valid as a
+  /// element type.
+  static bool isValidElementType(const Type *ElemTy);
+
+  /// @brief Return the number of elements in the Vector type.
+  inline unsigned getNumElements() const { return NumElements; }
+
+  /// @brief Return the number of bits in the Vector type.
+  inline unsigned getBitWidth() const {
+    return NumElements * getElementType()->getPrimitiveSizeInBits();
+  }
+
+  // Implement the AbstractTypeUser interface.
+  virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
+  virtual void typeBecameConcrete(const DerivedType *AbsTy);
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const VectorType *) { return true; }
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == VectorTyID;
+  }
+};
+
+
+/// PointerType - Class to represent pointers
+///
+class PointerType : public SequentialType {
+  friend class TypeMap<PointerValType, PointerType>;
+  unsigned AddressSpace;
+
+  PointerType(const PointerType &);                   // Do not implement
+  const PointerType &operator=(const PointerType &);  // Do not implement
+  explicit PointerType(const Type *ElType, unsigned AddrSpace);
+public:
+  /// PointerType::get - This constructs a pointer to an object of the specified
+  /// type in a numbered address space.
+  static PointerType *get(const Type *ElementType, unsigned AddressSpace);
+
+  /// PointerType::getUnqual - This constructs a pointer to an object of the
+  /// specified type in the generic address space (address space zero).
+  static PointerType *getUnqual(const Type *ElementType) {
+    return PointerType::get(ElementType, 0);
+  }
+
+  /// isValidElementType - Return true if the specified type is valid as a
+  /// element type.
+  static bool isValidElementType(const Type *ElemTy);
+
+  /// @brief Return the address space of the Pointer type.
+  inline unsigned getAddressSpace() const { return AddressSpace; }
+
+  // Implement the AbstractTypeUser interface.
+  virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
+  virtual void typeBecameConcrete(const DerivedType *AbsTy);
+
+  // Implement support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const PointerType *) { return true; }
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == PointerTyID;
+  }
+};
+
+
+/// OpaqueType - Class to represent abstract types
+///
+class OpaqueType : public DerivedType {
+  friend class LLVMContextImpl;
+  OpaqueType(const OpaqueType &);                   // DO NOT IMPLEMENT
+  const OpaqueType &operator=(const OpaqueType &);  // DO NOT IMPLEMENT
+  OpaqueType(LLVMContext &C);
+public:
+  /// OpaqueType::get - Static factory method for the OpaqueType class...
+  ///
+  static OpaqueType *get(LLVMContext &C);
+
+  // Implement support for type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const OpaqueType *) { return true; }
+  static inline bool classof(const Type *T) {
+    return T->getTypeID() == OpaqueTyID;
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/ExecutionEngine/ExecutionEngine.h b/final/include/llvm/ExecutionEngine/ExecutionEngine.h
new file mode 100644
index 00000000000..dcc9743d697
--- /dev/null
+++ b/final/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -0,0 +1,574 @@
+//===- ExecutionEngine.h - Abstract Execution Engine Interface --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the abstract interface that implements execution support
+// for LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_H
+#define LLVM_EXECUTION_ENGINE_H
+
+#include <vector>
+#include <map>
+#include <string>
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+struct GenericValue;
+class Constant;
+class ExecutionEngine;
+class Function;
+class GlobalVariable;
+class GlobalValue;
+class JITEventListener;
+class JITMemoryManager;
+class MachineCodeInfo;
+class Module;
+class MutexGuard;
+class TargetData;
+class Type;
+
+/// \brief Helper class for helping synchronize access to the global address map
+/// table.
+class ExecutionEngineState {
+public:
+  struct AddressMapConfig : public ValueMapConfig<const GlobalValue*> {
+    typedef ExecutionEngineState *ExtraData;
+    static sys::Mutex *getMutex(ExecutionEngineState *EES);
+    static void onDelete(ExecutionEngineState *EES, const GlobalValue *Old);
+    static void onRAUW(ExecutionEngineState *, const GlobalValue *,
+                       const GlobalValue *);
+  };
+
+  typedef ValueMap<const GlobalValue *, void *, AddressMapConfig>
+      GlobalAddressMapTy;
+
+private:
+  ExecutionEngine &EE;
+
+  /// GlobalAddressMap - A mapping between LLVM global values and their
+  /// actualized version...
+  GlobalAddressMapTy GlobalAddressMap;
+
+  /// GlobalAddressReverseMap - This is the reverse mapping of GlobalAddressMap,
+  /// used to convert raw addresses into the LLVM global value that is emitted
+  /// at the address.  This map is not computed unless getGlobalValueAtAddress
+  /// is called at some point.
+  std::map<void *, AssertingVH<const GlobalValue> > GlobalAddressReverseMap;
+
+public:
+  ExecutionEngineState(ExecutionEngine &EE);
+
+  GlobalAddressMapTy &getGlobalAddressMap(const MutexGuard &) {
+    return GlobalAddressMap;
+  }
+
+  std::map<void*, AssertingVH<const GlobalValue> > &
+  getGlobalAddressReverseMap(const MutexGuard &) {
+    return GlobalAddressReverseMap;
+  }
+
+  /// \brief Erase an entry from the mapping table.
+  ///
+  /// \returns The address that \arg ToUnmap was happed to.
+  void *RemoveMapping(const MutexGuard &, const GlobalValue *ToUnmap);
+};
+
+/// \brief Abstract interface for implementation execution of LLVM modules,
+/// designed to support both interpreter and just-in-time (JIT) compiler
+/// implementations.
+class ExecutionEngine {
+  /// The state object holding the global address mapping, which must be
+  /// accessed synchronously.
+  //
+  // FIXME: There is no particular need the entire map needs to be
+  // synchronized.  Wouldn't a reader-writer design be better here?
+  ExecutionEngineState EEState;
+
+  /// The target data for the platform for which execution is being performed.
+  const TargetData *TD;
+
+  /// Whether lazy JIT compilation is enabled.
+  bool CompilingLazily;
+
+  /// Whether JIT compilation of external global variables is allowed.
+  bool GVCompilationDisabled;
+
+  /// Whether the JIT should perform lookups of external symbols (e.g.,
+  /// using dlsym).
+  bool SymbolSearchingDisabled;
+
+  friend class EngineBuilder;  // To allow access to JITCtor and InterpCtor.
+
+protected:
+  /// The list of Modules that we are JIT'ing from.  We use a SmallVector to
+  /// optimize for the case where there is only one module.
+  SmallVector<Module*, 1> Modules;
+  
+  void setTargetData(const TargetData *td) {
+    TD = td;
+  }
+  
+  /// getMemoryforGV - Allocate memory for a global variable.
+  virtual char *getMemoryForGV(const GlobalVariable *GV);
+
+  // To avoid having libexecutionengine depend on the JIT and interpreter
+  // libraries, the execution engine implementations set these functions to ctor
+  // pointers at startup time if they are linked in.
+  static ExecutionEngine *(*JITCtor)(
+    Module *M,
+    std::string *ErrorStr,
+    JITMemoryManager *JMM,
+    CodeGenOpt::Level OptLevel,
+    bool GVsWithCode,
+    CodeModel::Model CMM,
+    StringRef MArch,
+    StringRef MCPU,
+    const SmallVectorImpl<std::string>& MAttrs);
+  static ExecutionEngine *(*MCJITCtor)(
+    Module *M,
+    std::string *ErrorStr,
+    JITMemoryManager *JMM,
+    CodeGenOpt::Level OptLevel,
+    bool GVsWithCode,
+    CodeModel::Model CMM,
+    StringRef MArch,
+    StringRef MCPU,
+    const SmallVectorImpl<std::string>& MAttrs);
+  static ExecutionEngine *(*InterpCtor)(Module *M,
+                                        std::string *ErrorStr);
+
+  /// LazyFunctionCreator - If an unknown function is needed, this function
+  /// pointer is invoked to create it.  If this returns null, the JIT will
+  /// abort.
+  void *(*LazyFunctionCreator)(const std::string &);
+  
+  /// ExceptionTableRegister - If Exception Handling is set, the JIT will
+  /// register dwarf tables with this function.
+  typedef void (*EERegisterFn)(void*);
+  EERegisterFn ExceptionTableRegister;
+  EERegisterFn ExceptionTableDeregister;
+  /// This maps functions to their exception tables frames.
+  DenseMap<const Function*, void*> AllExceptionTables;
+
+
+public:
+  /// lock - This lock protects the ExecutionEngine, JIT, JITResolver and
+  /// JITEmitter classes.  It must be held while changing the internal state of
+  /// any of those classes.
+  sys::Mutex lock;
+
+  //===--------------------------------------------------------------------===//
+  //  ExecutionEngine Startup
+  //===--------------------------------------------------------------------===//
+
+  virtual ~ExecutionEngine();
+
+  /// create - This is the factory method for creating an execution engine which
+  /// is appropriate for the current machine.  This takes ownership of the
+  /// module.
+  ///
+  /// \param GVsWithCode - Allocating globals with code breaks
+  /// freeMachineCodeForFunction and is probably unsafe and bad for performance.
+  /// However, we have clients who depend on this behavior, so we must support
+  /// it.  Eventually, when we're willing to break some backwards compatability,
+  /// this flag should be flipped to false, so that by default
+  /// freeMachineCodeForFunction works.
+  static ExecutionEngine *create(Module *M,
+                                 bool ForceInterpreter = false,
+                                 std::string *ErrorStr = 0,
+                                 CodeGenOpt::Level OptLevel =
+                                   CodeGenOpt::Default,
+                                 bool GVsWithCode = true);
+
+  /// createJIT - This is the factory method for creating a JIT for the current
+  /// machine, it does not fall back to the interpreter.  This takes ownership
+  /// of the Module and JITMemoryManager if successful.
+  ///
+  /// Clients should make sure to initialize targets prior to calling this
+  /// function.
+  static ExecutionEngine *createJIT(Module *M,
+                                    std::string *ErrorStr = 0,
+                                    JITMemoryManager *JMM = 0,
+                                    CodeGenOpt::Level OptLevel =
+                                      CodeGenOpt::Default,
+                                    bool GVsWithCode = true,
+                                    CodeModel::Model CMM =
+                                      CodeModel::Default);
+
+  /// addModule - Add a Module to the list of modules that we can JIT from.
+  /// Note that this takes ownership of the Module: when the ExecutionEngine is
+  /// destroyed, it destroys the Module as well.
+  virtual void addModule(Module *M) {
+    Modules.push_back(M);
+  }
+  
+  //===--------------------------------------------------------------------===//
+
+  const TargetData *getTargetData() const { return TD; }
+
+  /// removeModule - Remove a Module from the list of modules.  Returns true if
+  /// M is found.
+  virtual bool removeModule(Module *M);
+
+  /// FindFunctionNamed - Search all of the active modules to find the one that
+  /// defines FnName.  This is very slow operation and shouldn't be used for
+  /// general code.
+  Function *FindFunctionNamed(const char *FnName);
+  
+  /// runFunction - Execute the specified function with the specified arguments,
+  /// and return the result.
+  virtual GenericValue runFunction(Function *F,
+                                const std::vector<GenericValue> &ArgValues) = 0;
+
+  /// runStaticConstructorsDestructors - This method is used to execute all of
+  /// the static constructors or destructors for a program.
+  ///
+  /// \param isDtors - Run the destructors instead of constructors.
+  void runStaticConstructorsDestructors(bool isDtors);
+
+  /// runStaticConstructorsDestructors - This method is used to execute all of
+  /// the static constructors or destructors for a particular module.
+  ///
+  /// \param isDtors - Run the destructors instead of constructors.
+  void runStaticConstructorsDestructors(Module *module, bool isDtors);
+  
+  
+  /// runFunctionAsMain - This is a helper function which wraps runFunction to
+  /// handle the common task of starting up main with the specified argc, argv,
+  /// and envp parameters.
+  int runFunctionAsMain(Function *Fn, const std::vector<std::string> &argv,
+                        const char * const * envp);
+
+
+  /// addGlobalMapping - Tell the execution engine that the specified global is
+  /// at the specified location.  This is used internally as functions are JIT'd
+  /// and as global variables are laid out in memory.  It can and should also be
+  /// used by clients of the EE that want to have an LLVM global overlay
+  /// existing data in memory.  Mappings are automatically removed when their
+  /// GlobalValue is destroyed.
+  void addGlobalMapping(const GlobalValue *GV, void *Addr);
+  
+  /// clearAllGlobalMappings - Clear all global mappings and start over again,
+  /// for use in dynamic compilation scenarios to move globals.
+  void clearAllGlobalMappings();
+  
+  /// clearGlobalMappingsFromModule - Clear all global mappings that came from a
+  /// particular module, because it has been removed from the JIT.
+  void clearGlobalMappingsFromModule(Module *M);
+  
+  /// updateGlobalMapping - Replace an existing mapping for GV with a new
+  /// address.  This updates both maps as required.  If "Addr" is null, the
+  /// entry for the global is removed from the mappings.  This returns the old
+  /// value of the pointer, or null if it was not in the map.
+  void *updateGlobalMapping(const GlobalValue *GV, void *Addr);
+  
+  /// getPointerToGlobalIfAvailable - This returns the address of the specified
+  /// global value if it is has already been codegen'd, otherwise it returns
+  /// null.
+  void *getPointerToGlobalIfAvailable(const GlobalValue *GV);
+
+  /// getPointerToGlobal - This returns the address of the specified global
+  /// value. This may involve code generation if it's a function.
+  void *getPointerToGlobal(const GlobalValue *GV);
+
+  /// getPointerToFunction - The different EE's represent function bodies in
+  /// different ways.  They should each implement this to say what a function
+  /// pointer should look like.  When F is destroyed, the ExecutionEngine will
+  /// remove its global mapping and free any machine code.  Be sure no threads
+  /// are running inside F when that happens.
+  virtual void *getPointerToFunction(Function *F) = 0;
+
+  /// getPointerToBasicBlock - The different EE's represent basic blocks in
+  /// different ways.  Return the representation for a blockaddress of the
+  /// specified block.
+  virtual void *getPointerToBasicBlock(BasicBlock *BB) = 0;
+  
+  /// getPointerToFunctionOrStub - If the specified function has been
+  /// code-gen'd, return a pointer to the function.  If not, compile it, or use
+  /// a stub to implement lazy compilation if available.  See
+  /// getPointerToFunction for the requirements on destroying F.
+  virtual void *getPointerToFunctionOrStub(Function *F) {
+    // Default implementation, just codegen the function.
+    return getPointerToFunction(F);
+  }
+
+  // The JIT overrides a version that actually does this.
+  virtual void runJITOnFunction(Function *, MachineCodeInfo * = 0) { }
+
+  /// getGlobalValueAtAddress - Return the LLVM global value object that starts
+  /// at the specified address.
+  ///
+  const GlobalValue *getGlobalValueAtAddress(void *Addr);
+
+  /// StoreValueToMemory - Stores the data in Val of type Ty at address Ptr.
+  /// Ptr is the address of the memory at which to store Val, cast to
+  /// GenericValue *.  It is not a pointer to a GenericValue containing the
+  /// address at which to store Val.
+  void StoreValueToMemory(const GenericValue &Val, GenericValue *Ptr,
+                          const Type *Ty);
+
+  void InitializeMemory(const Constant *Init, void *Addr);
+
+  /// recompileAndRelinkFunction - This method is used to force a function which
+  /// has already been compiled to be compiled again, possibly after it has been
+  /// modified.  Then the entry to the old copy is overwritten with a branch to
+  /// the new copy.  If there was no old copy, this acts just like
+  /// VM::getPointerToFunction().
+  virtual void *recompileAndRelinkFunction(Function *F) = 0;
+
+  /// freeMachineCodeForFunction - Release memory in the ExecutionEngine
+  /// corresponding to the machine code emitted to execute this function, useful
+  /// for garbage-collecting generated code.
+  virtual void freeMachineCodeForFunction(Function *F) = 0;
+
+  /// getOrEmitGlobalVariable - Return the address of the specified global
+  /// variable, possibly emitting it to memory if needed.  This is used by the
+  /// Emitter.
+  virtual void *getOrEmitGlobalVariable(const GlobalVariable *GV) {
+    return getPointerToGlobal((GlobalValue*)GV);
+  }
+
+  /// Registers a listener to be called back on various events within
+  /// the JIT.  See JITEventListener.h for more details.  Does not
+  /// take ownership of the argument.  The argument may be NULL, in
+  /// which case these functions do nothing.
+  virtual void RegisterJITEventListener(JITEventListener *) {}
+  virtual void UnregisterJITEventListener(JITEventListener *) {}
+
+  /// DisableLazyCompilation - When lazy compilation is off (the default), the
+  /// JIT will eagerly compile every function reachable from the argument to
+  /// getPointerToFunction.  If lazy compilation is turned on, the JIT will only
+  /// compile the one function and emit stubs to compile the rest when they're
+  /// first called.  If lazy compilation is turned off again while some lazy
+  /// stubs are still around, and one of those stubs is called, the program will
+  /// abort.
+  ///
+  /// In order to safely compile lazily in a threaded program, the user must
+  /// ensure that 1) only one thread at a time can call any particular lazy
+  /// stub, and 2) any thread modifying LLVM IR must hold the JIT's lock
+  /// (ExecutionEngine::lock) or otherwise ensure that no other thread calls a
+  /// lazy stub.  See http://llvm.org/PR5184 for details.
+  void DisableLazyCompilation(bool Disabled = true) {
+    CompilingLazily = !Disabled;
+  }
+  bool isCompilingLazily() const {
+    return CompilingLazily;
+  }
+  // Deprecated in favor of isCompilingLazily (to reduce double-negatives).
+  // Remove this in LLVM 2.8.
+  bool isLazyCompilationDisabled() const {
+    return !CompilingLazily;
+  }
+
+  /// DisableGVCompilation - If called, the JIT will abort if it's asked to
+  /// allocate space and populate a GlobalVariable that is not internal to
+  /// the module.
+  void DisableGVCompilation(bool Disabled = true) {
+    GVCompilationDisabled = Disabled;
+  }
+  bool isGVCompilationDisabled() const {
+    return GVCompilationDisabled;
+  }
+
+  /// DisableSymbolSearching - If called, the JIT will not try to lookup unknown
+  /// symbols with dlsym.  A client can still use InstallLazyFunctionCreator to
+  /// resolve symbols in a custom way.
+  void DisableSymbolSearching(bool Disabled = true) {
+    SymbolSearchingDisabled = Disabled;
+  }
+  bool isSymbolSearchingDisabled() const {
+    return SymbolSearchingDisabled;
+  }
+
+  /// InstallLazyFunctionCreator - If an unknown function is needed, the
+  /// specified function pointer is invoked to create it.  If it returns null,
+  /// the JIT will abort.
+  void InstallLazyFunctionCreator(void* (*P)(const std::string &)) {
+    LazyFunctionCreator = P;
+  }
+  
+  /// InstallExceptionTableRegister - The JIT will use the given function
+  /// to register the exception tables it generates.
+  void InstallExceptionTableRegister(EERegisterFn F) {
+    ExceptionTableRegister = F;
+  }
+  void InstallExceptionTableDeregister(EERegisterFn F) {
+    ExceptionTableDeregister = F;
+  }
+  
+  /// RegisterTable - Registers the given pointer as an exception table.  It
+  /// uses the ExceptionTableRegister function.
+  void RegisterTable(const Function *fn, void* res) {
+    if (ExceptionTableRegister) {
+      ExceptionTableRegister(res);
+      AllExceptionTables[fn] = res;
+    }
+  }
+
+  /// DeregisterTable - Deregisters the exception frame previously registered for the given function.
+  void DeregisterTable(const Function *Fn) {
+    if (ExceptionTableDeregister) {
+      DenseMap<const Function*, void*>::iterator frame = AllExceptionTables.find(Fn);
+      if(frame != AllExceptionTables.end()) {
+        ExceptionTableDeregister(frame->second);
+        AllExceptionTables.erase(frame);
+      }
+    }
+  }
+
+  /// DeregisterAllTables - Deregisters all previously registered pointers to an
+  /// exception tables.  It uses the ExceptionTableoDeregister function.
+  void DeregisterAllTables();
+
+protected:
+  explicit ExecutionEngine(Module *M);
+
+  void emitGlobals();
+
+  void EmitGlobalVariable(const GlobalVariable *GV);
+
+  GenericValue getConstantValue(const Constant *C);
+  void LoadValueFromMemory(GenericValue &Result, GenericValue *Ptr, 
+                           const Type *Ty);
+};
+
+namespace EngineKind {
+  // These are actually bitmasks that get or-ed together.
+  enum Kind {
+    JIT         = 0x1,
+    Interpreter = 0x2
+  };
+  const static Kind Either = (Kind)(JIT | Interpreter);
+}
+
+/// EngineBuilder - Builder class for ExecutionEngines.  Use this by
+/// stack-allocating a builder, chaining the various set* methods, and
+/// terminating it with a .create() call.
+class EngineBuilder {
+private:
+  Module *M;
+  EngineKind::Kind WhichEngine;
+  std::string *ErrorStr;
+  CodeGenOpt::Level OptLevel;
+  JITMemoryManager *JMM;
+  bool AllocateGVsWithCode;
+  CodeModel::Model CMModel;
+  std::string MArch;
+  std::string MCPU;
+  SmallVector<std::string, 4> MAttrs;
+  bool UseMCJIT;
+
+  /// InitEngine - Does the common initialization of default options.
+  void InitEngine() {
+    WhichEngine = EngineKind::Either;
+    ErrorStr = NULL;
+    OptLevel = CodeGenOpt::Default;
+    JMM = NULL;
+    AllocateGVsWithCode = false;
+    CMModel = CodeModel::Default;
+    UseMCJIT = false;
+  }
+
+public:
+  /// EngineBuilder - Constructor for EngineBuilder.  If create() is called and
+  /// is successful, the created engine takes ownership of the module.
+  EngineBuilder(Module *m) : M(m) {
+    InitEngine();
+  }
+
+  /// setEngineKind - Controls whether the user wants the interpreter, the JIT,
+  /// or whichever engine works.  This option defaults to EngineKind::Either.
+  EngineBuilder &setEngineKind(EngineKind::Kind w) {
+    WhichEngine = w;
+    return *this;
+  }
+
+  /// setJITMemoryManager - Sets the memory manager to use.  This allows
+  /// clients to customize their memory allocation policies.  If create() is
+  /// called and is successful, the created engine takes ownership of the
+  /// memory manager.  This option defaults to NULL.
+  EngineBuilder &setJITMemoryManager(JITMemoryManager *jmm) {
+    JMM = jmm;
+    return *this;
+  }
+
+  /// setErrorStr - Set the error string to write to on error.  This option
+  /// defaults to NULL.
+  EngineBuilder &setErrorStr(std::string *e) {
+    ErrorStr = e;
+    return *this;
+  }
+
+  /// setOptLevel - Set the optimization level for the JIT.  This option
+  /// defaults to CodeGenOpt::Default.
+  EngineBuilder &setOptLevel(CodeGenOpt::Level l) {
+    OptLevel = l;
+    return *this;
+  }
+
+  /// setCodeModel - Set the CodeModel that the ExecutionEngine target
+  /// data is using. Defaults to target specific default "CodeModel::Default".
+  EngineBuilder &setCodeModel(CodeModel::Model M) {
+    CMModel = M;
+    return *this;
+  }
+
+  /// setAllocateGVsWithCode - Sets whether global values should be allocated
+  /// into the same buffer as code.  For most applications this should be set
+  /// to false.  Allocating globals with code breaks freeMachineCodeForFunction
+  /// and is probably unsafe and bad for performance.  However, we have clients
+  /// who depend on this behavior, so we must support it.  This option defaults
+  /// to false so that users of the new API can safely use the new memory
+  /// manager and free machine code.
+  EngineBuilder &setAllocateGVsWithCode(bool a) {
+    AllocateGVsWithCode = a;
+    return *this;
+  }
+
+  /// setMArch - Override the architecture set by the Module's triple.
+  EngineBuilder &setMArch(StringRef march) {
+    MArch.assign(march.begin(), march.end());
+    return *this;
+  }
+
+  /// setMCPU - Target a specific cpu type.
+  EngineBuilder &setMCPU(StringRef mcpu) {
+    MCPU.assign(mcpu.begin(), mcpu.end());
+    return *this;
+  }
+
+  /// setUseMCJIT - Set whether the MC-JIT implementation should be used
+  /// (experimental).
+  void setUseMCJIT(bool Value) {
+    UseMCJIT = Value;
+  }
+
+  /// setMAttrs - Set cpu-specific attributes.
+  template<typename StringSequence>
+  EngineBuilder &setMAttrs(const StringSequence &mattrs) {
+    MAttrs.clear();
+    MAttrs.append(mattrs.begin(), mattrs.end());
+    return *this;
+  }
+
+  ExecutionEngine *create();
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/ExecutionEngine/GenericValue.h b/final/include/llvm/ExecutionEngine/GenericValue.h
new file mode 100644
index 00000000000..a2fed98c150
--- /dev/null
+++ b/final/include/llvm/ExecutionEngine/GenericValue.h
@@ -0,0 +1,44 @@
+//===-- GenericValue.h - Represent any type of LLVM value -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The GenericValue class is used to represent an LLVM value of arbitrary type.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef GENERIC_VALUE_H
+#define GENERIC_VALUE_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+typedef void* PointerTy;
+class APInt;
+
+struct GenericValue {
+  union {
+    double          DoubleVal;
+    float           FloatVal;
+    PointerTy       PointerVal;
+    struct { unsigned int first; unsigned int second; } UIntPairVal;
+    unsigned char   Untyped[8];
+  };
+  APInt IntVal;   // also used for long doubles
+
+  GenericValue() : DoubleVal(0.0), IntVal(1,0) {}
+  explicit GenericValue(void *V) : PointerVal(V), IntVal(1,0) { }
+};
+
+inline GenericValue PTOGV(void *P) { return GenericValue(P); }
+inline void* GVTOP(const GenericValue &GV) { return GV.PointerVal; }
+
+} // End llvm namespace
+#endif
diff --git a/final/include/llvm/ExecutionEngine/Interpreter.h b/final/include/llvm/ExecutionEngine/Interpreter.h
new file mode 100644
index 00000000000..7425cdbcfda
--- /dev/null
+++ b/final/include/llvm/ExecutionEngine/Interpreter.h
@@ -0,0 +1,38 @@
+//===-- Interpreter.h - Abstract Execution Engine Interface -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file forces the interpreter to link in on certain operating systems.
+// (Windows).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef EXECUTION_ENGINE_INTERPRETER_H
+#define EXECUTION_ENGINE_INTERPRETER_H
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include <cstdlib>
+
+extern "C" void LLVMLinkInInterpreter();
+
+namespace {
+  struct ForceInterpreterLinking {
+    ForceInterpreterLinking() {
+      // We must reference the passes in such a way that compilers will not
+      // delete it all as dead code, even with whole program optimization,
+      // yet is effectively a NO-OP. As the compiler isn't smart enough
+      // to know that getenv() never returns -1, this will do the job.
+      if (std::getenv("bar") != (char*) -1)
+        return;
+
+      LLVMLinkInInterpreter();
+    }
+  } ForceInterpreterLinking;
+}
+
+#endif
diff --git a/final/include/llvm/ExecutionEngine/JIT.h b/final/include/llvm/ExecutionEngine/JIT.h
new file mode 100644
index 00000000000..6013db48ce6
--- /dev/null
+++ b/final/include/llvm/ExecutionEngine/JIT.h
@@ -0,0 +1,38 @@
+//===-- JIT.h - Abstract Execution Engine Interface -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file forces the JIT to link in on certain operating systems.
+// (Windows).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_JIT_H
+#define LLVM_EXECUTION_ENGINE_JIT_H
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include <cstdlib>
+
+extern "C" void LLVMLinkInJIT();
+
+namespace {
+  struct ForceJITLinking {
+    ForceJITLinking() {
+      // We must reference the passes in such a way that compilers will not
+      // delete it all as dead code, even with whole program optimization,
+      // yet is effectively a NO-OP. As the compiler isn't smart enough
+      // to know that getenv() never returns -1, this will do the job.
+      if (std::getenv("bar") != (char*) -1)
+        return;
+
+      LLVMLinkInJIT();
+    }
+  } ForceJITLinking;
+}
+
+#endif
diff --git a/final/include/llvm/ExecutionEngine/JITEventListener.h b/final/include/llvm/ExecutionEngine/JITEventListener.h
new file mode 100644
index 00000000000..abc063b0703
--- /dev/null
+++ b/final/include/llvm/ExecutionEngine/JITEventListener.h
@@ -0,0 +1,82 @@
+//===- JITEventListener.h - Exposes events from JIT compilation -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the JITEventListener interface, which lets users get
+// callbacks when significant events happen during the JIT compilation process.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
+#define LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/DebugLoc.h"
+
+#include <vector>
+
+namespace llvm {
+class Function;
+class MachineFunction;
+
+/// JITEvent_EmittedFunctionDetails - Helper struct for containing information
+/// about a generated machine code function.
+struct JITEvent_EmittedFunctionDetails {
+  struct LineStart {
+    /// The address at which the current line changes.
+    uintptr_t Address;
+
+    /// The new location information.  These can be translated to DebugLocTuples
+    /// using MF->getDebugLocTuple().
+    DebugLoc Loc;
+  };
+
+  /// The machine function the struct contains information for.
+  const MachineFunction *MF;
+
+  /// The list of line boundary information, sorted by address.
+  std::vector<LineStart> LineStarts;
+};
+
+/// JITEventListener - Abstract interface for use by the JIT to notify clients
+/// about significant events during compilation. For example, to notify
+/// profilers and debuggers that need to know where functions have been emitted.
+///
+/// The default implementation of each method does nothing.
+class JITEventListener {
+public:
+  typedef JITEvent_EmittedFunctionDetails EmittedFunctionDetails;
+
+public:
+  JITEventListener() {}
+  virtual ~JITEventListener();
+
+  /// NotifyFunctionEmitted - Called after a function has been successfully
+  /// emitted to memory.  The function still has its MachineFunction attached,
+  /// if you should happen to need that.
+  virtual void NotifyFunctionEmitted(const Function &F,
+                                     void *Code, size_t Size,
+                                     const EmittedFunctionDetails &Details) {}
+
+  /// NotifyFreeingMachineCode - Called from freeMachineCodeForFunction(), after
+  /// the global mapping is removed, but before the machine code is returned to
+  /// the allocator.
+  ///
+  /// OldPtr is the address of the machine code and will be the same as the Code
+  /// parameter to a previous NotifyFunctionEmitted call.  The Function passed
+  /// to NotifyFunctionEmitted may have been destroyed by the time of the
+  /// matching NotifyFreeingMachineCode call.
+  virtual void NotifyFreeingMachineCode(void *OldPtr) {}
+};
+
+// This returns NULL if support isn't available.
+JITEventListener *createOProfileJITEventListener();
+
+} // end namespace llvm.
+
+#endif
diff --git a/final/include/llvm/ExecutionEngine/JITMemoryManager.h b/final/include/llvm/ExecutionEngine/JITMemoryManager.h
new file mode 100644
index 00000000000..38414180166
--- /dev/null
+++ b/final/include/llvm/ExecutionEngine/JITMemoryManager.h
@@ -0,0 +1,179 @@
+//===-- JITMemoryManager.h - Interface JIT uses to Allocate Mem -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_JIT_MEMMANAGER_H
+#define LLVM_EXECUTION_ENGINE_JIT_MEMMANAGER_H
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+namespace llvm {
+
+  class Function;
+  class GlobalValue;
+
+/// JITMemoryManager - This interface is used by the JIT to allocate and manage
+/// memory for the code generated by the JIT.  This can be reimplemented by
+/// clients that have a strong desire to control how the layout of JIT'd memory
+/// works.
+class JITMemoryManager {
+protected:
+  bool HasGOT;
+
+public:
+  JITMemoryManager() : HasGOT(false) {}
+  virtual ~JITMemoryManager();
+  
+  /// CreateDefaultMemManager - This is used to create the default
+  /// JIT Memory Manager if the client does not provide one to the JIT.
+  static JITMemoryManager *CreateDefaultMemManager();
+  
+  /// setMemoryWritable - When code generation is in progress,
+  /// the code pages may need permissions changed.
+  virtual void setMemoryWritable() = 0;
+
+  /// setMemoryExecutable - When code generation is done and we're ready to
+  /// start execution, the code pages may need permissions changed.
+  virtual void setMemoryExecutable() = 0;
+
+  /// setPoisonMemory - Setting this flag to true makes the memory manager
+  /// garbage values over freed memory.  This is useful for testing and
+  /// debugging, and may be turned on by default in debug mode.
+  virtual void setPoisonMemory(bool poison) = 0;
+
+  //===--------------------------------------------------------------------===//
+  // Global Offset Table Management
+  //===--------------------------------------------------------------------===//
+
+  /// AllocateGOT - If the current table requires a Global Offset Table, this
+  /// method is invoked to allocate it.  This method is required to set HasGOT
+  /// to true.
+  virtual void AllocateGOT() = 0;
+  
+  /// isManagingGOT - Return true if the AllocateGOT method is called.
+  bool isManagingGOT() const {
+    return HasGOT;
+  }
+  
+  /// getGOTBase - If this is managing a Global Offset Table, this method should
+  /// return a pointer to its base.
+  virtual uint8_t *getGOTBase() const = 0;
+  
+  //===--------------------------------------------------------------------===//
+  // Main Allocation Functions
+  //===--------------------------------------------------------------------===//
+
+  /// startFunctionBody - When we start JITing a function, the JIT calls this
+  /// method to allocate a block of free RWX memory, which returns a pointer to
+  /// it.  If the JIT wants to request a block of memory of at least a certain
+  /// size, it passes that value as ActualSize, and this method returns a block
+  /// with at least that much space.  If the JIT doesn't know ahead of time how
+  /// much space it will need to emit the function, it passes 0 for the
+  /// ActualSize.  In either case, this method is required to pass back the size
+  /// of the allocated block through ActualSize.  The JIT will be careful to
+  /// not write more than the returned ActualSize bytes of memory.
+  virtual uint8_t *startFunctionBody(const Function *F,
+                                     uintptr_t &ActualSize) = 0;
+
+  /// allocateStub - This method is called by the JIT to allocate space for a
+  /// function stub (used to handle limited branch displacements) while it is
+  /// JIT compiling a function.  For example, if foo calls bar, and if bar
+  /// either needs to be lazily compiled or is a native function that exists too
+  /// far away from the call site to work, this method will be used to make a
+  /// thunk for it.  The stub should be "close" to the current function body,
+  /// but should not be included in the 'actualsize' returned by
+  /// startFunctionBody.
+  virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
+                                unsigned Alignment) = 0;
+  
+  /// endFunctionBody - This method is called when the JIT is done codegen'ing
+  /// the specified function.  At this point we know the size of the JIT
+  /// compiled function.  This passes in FunctionStart (which was returned by
+  /// the startFunctionBody method) and FunctionEnd which is a pointer to the 
+  /// actual end of the function.  This method should mark the space allocated
+  /// and remember where it is in case the client wants to deallocate it.
+  virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart,
+                               uint8_t *FunctionEnd) = 0;
+
+  /// allocateSpace - Allocate a memory block of the given size.  This method
+  /// cannot be called between calls to startFunctionBody and endFunctionBody.
+  virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) = 0;
+
+  /// allocateGlobal - Allocate memory for a global.
+  virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) = 0;
+
+  /// deallocateFunctionBody - Free the specified function body.  The argument
+  /// must be the return value from a call to startFunctionBody() that hasn't
+  /// been deallocated yet.  This is never called when the JIT is currently
+  /// emitting a function.
+  virtual void deallocateFunctionBody(void *Body) = 0;
+  
+  /// startExceptionTable - When we finished JITing the function, if exception
+  /// handling is set, we emit the exception table.
+  virtual uint8_t* startExceptionTable(const Function* F,
+                                       uintptr_t &ActualSize) = 0;
+  
+  /// endExceptionTable - This method is called when the JIT is done emitting
+  /// the exception table.
+  virtual void endExceptionTable(const Function *F, uint8_t *TableStart,
+                                 uint8_t *TableEnd, uint8_t* FrameRegister) = 0;
+
+  /// deallocateExceptionTable - Free the specified exception table's memory.
+  /// The argument must be the return value from a call to startExceptionTable()
+  /// that hasn't been deallocated yet.  This is never called when the JIT is
+  /// currently emitting an exception table.
+  virtual void deallocateExceptionTable(void *ET) = 0;
+
+  /// CheckInvariants - For testing only.  Return true if all internal
+  /// invariants are preserved, or return false and set ErrorStr to a helpful
+  /// error message.
+  virtual bool CheckInvariants(std::string &) {
+    return true;
+  }
+
+  /// GetDefaultCodeSlabSize - For testing only.  Returns DefaultCodeSlabSize
+  /// from DefaultJITMemoryManager.
+  virtual size_t GetDefaultCodeSlabSize() {
+    return 0;
+  }
+
+  /// GetDefaultDataSlabSize - For testing only.  Returns DefaultCodeSlabSize
+  /// from DefaultJITMemoryManager.
+  virtual size_t GetDefaultDataSlabSize() {
+    return 0;
+  }
+
+  /// GetDefaultStubSlabSize - For testing only.  Returns DefaultCodeSlabSize
+  /// from DefaultJITMemoryManager.
+  virtual size_t GetDefaultStubSlabSize() {
+    return 0;
+  }
+
+  /// GetNumCodeSlabs - For testing only.  Returns the number of MemoryBlocks
+  /// allocated for code.
+  virtual unsigned GetNumCodeSlabs() {
+    return 0;
+  }
+
+  /// GetNumDataSlabs - For testing only.  Returns the number of MemoryBlocks
+  /// allocated for data.
+  virtual unsigned GetNumDataSlabs() {
+    return 0;
+  }
+
+  /// GetNumStubSlabs - For testing only.  Returns the number of MemoryBlocks
+  /// allocated for function stubs.
+  virtual unsigned GetNumStubSlabs() {
+    return 0;
+  }
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/final/include/llvm/ExecutionEngine/MCJIT.h b/final/include/llvm/ExecutionEngine/MCJIT.h
new file mode 100644
index 00000000000..f956a5029b1
--- /dev/null
+++ b/final/include/llvm/ExecutionEngine/MCJIT.h
@@ -0,0 +1,38 @@
+//===-- MCJIT.h - MC-Based Just-In-Time Execution Engine --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file forces the MCJIT to link in on certain operating systems.
+// (Windows).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_MCJIT_H
+#define LLVM_EXECUTION_ENGINE_MCJIT_H
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include <cstdlib>
+
+extern "C" void LLVMLinkInMCJIT();
+
+namespace {
+  struct ForceMCJITLinking {
+    ForceMCJITLinking() {
+      // We must reference the passes in such a way that compilers will not
+      // delete it all as dead code, even with whole program optimization,
+      // yet is effectively a NO-OP. As the compiler isn't smart enough
+      // to know that getenv() never returns -1, this will do the job.
+      if (std::getenv("bar") != (char*) -1)
+        return;
+
+      LLVMLinkInMCJIT();
+    }
+  } ForceMCJITLinking;
+}
+
+#endif
diff --git a/final/include/llvm/Function.h b/final/include/llvm/Function.h
new file mode 100644
index 00000000000..9a0825ab4a9
--- /dev/null
+++ b/final/include/llvm/Function.h
@@ -0,0 +1,437 @@
+//===-- llvm/Function.h - Class to represent a single function --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the Function class, which represents a
+// single function/procedure in LLVM.
+//
+// A function basically consists of a list of basic blocks, a list of arguments,
+// and a symbol table.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUNCTION_H
+#define LLVM_FUNCTION_H
+
+#include "llvm/GlobalValue.h"
+#include "llvm/CallingConv.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Argument.h"
+#include "llvm/Attributes.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class FunctionType;
+class LLVMContext;
+
+// Traits for intrusive list of basic blocks...
+template<> struct ilist_traits<BasicBlock>
+  : public SymbolTableListTraits<BasicBlock, Function> {
+
+  // createSentinel is used to get hold of the node that marks the end of the
+  // list... (same trick used here as in ilist_traits<Instruction>)
+  BasicBlock *createSentinel() const {
+    return static_cast<BasicBlock*>(&Sentinel);
+  }
+  static void destroySentinel(BasicBlock*) {}
+
+  BasicBlock *provideInitialHead() const { return createSentinel(); }
+  BasicBlock *ensureHead(BasicBlock*) const { return createSentinel(); }
+  static void noteHead(BasicBlock*, BasicBlock*) {}
+
+  static ValueSymbolTable *getSymTab(Function *ItemParent);
+private:
+  mutable ilist_half_node<BasicBlock> Sentinel;
+};
+
+template<> struct ilist_traits<Argument>
+  : public SymbolTableListTraits<Argument, Function> {
+
+  Argument *createSentinel() const {
+    return static_cast<Argument*>(&Sentinel);
+  }
+  static void destroySentinel(Argument*) {}
+
+  Argument *provideInitialHead() const { return createSentinel(); }
+  Argument *ensureHead(Argument*) const { return createSentinel(); }
+  static void noteHead(Argument*, Argument*) {}
+
+  static ValueSymbolTable *getSymTab(Function *ItemParent);
+private:
+  mutable ilist_half_node<Argument> Sentinel;
+};
+
+class Function : public GlobalValue,
+                 public ilist_node<Function> {
+public:
+  typedef iplist<Argument> ArgumentListType;
+  typedef iplist<BasicBlock> BasicBlockListType;
+
+  // BasicBlock iterators...
+  typedef BasicBlockListType::iterator iterator;
+  typedef BasicBlockListType::const_iterator const_iterator;
+
+  typedef ArgumentListType::iterator arg_iterator;
+  typedef ArgumentListType::const_iterator const_arg_iterator;
+
+private:
+  // Important things that make up a function!
+  BasicBlockListType  BasicBlocks;        ///< The basic blocks
+  mutable ArgumentListType ArgumentList;  ///< The formal arguments
+  ValueSymbolTable *SymTab;               ///< Symbol table of args/instructions
+  AttrListPtr AttributeList;              ///< Parameter attributes
+
+  // HasLazyArguments is stored in Value::SubclassData.
+  /*bool HasLazyArguments;*/
+                   
+  // The Calling Convention is stored in Value::SubclassData.
+  /*CallingConv::ID CallingConvention;*/
+
+  friend class SymbolTableListTraits<Function, Module>;
+
+  void setParent(Module *parent);
+
+  /// hasLazyArguments/CheckLazyArguments - The argument list of a function is
+  /// built on demand, so that the list isn't allocated until the first client
+  /// needs it.  The hasLazyArguments predicate returns true if the arg list
+  /// hasn't been set up yet.
+  bool hasLazyArguments() const {
+    return getSubclassDataFromValue() & 1;
+  }
+  void CheckLazyArguments() const {
+    if (hasLazyArguments())
+      BuildLazyArguments();
+  }
+  void BuildLazyArguments() const;
+  
+  Function(const Function&); // DO NOT IMPLEMENT
+  void operator=(const Function&); // DO NOT IMPLEMENT
+
+  /// Function ctor - If the (optional) Module argument is specified, the
+  /// function is automatically inserted into the end of the function list for
+  /// the module.
+  ///
+  Function(const FunctionType *Ty, LinkageTypes Linkage,
+           const Twine &N = "", Module *M = 0);
+
+public:
+  static Function *Create(const FunctionType *Ty, LinkageTypes Linkage,
+                          const Twine &N = "", Module *M = 0) {
+    return new(0) Function(Ty, Linkage, N, M);
+  }
+
+  ~Function();
+
+  const Type *getReturnType() const;           // Return the type of the ret val
+  const FunctionType *getFunctionType() const; // Return the FunctionType for me
+
+  /// getContext - Return a pointer to the LLVMContext associated with this 
+  /// function, or NULL if this function is not bound to a context yet.
+  LLVMContext &getContext() const;
+
+  /// isVarArg - Return true if this function takes a variable number of
+  /// arguments.
+  bool isVarArg() const;
+
+  /// isDeclaration - Is the body of this function unknown? (The basic block 
+  /// list is empty if so.) This is true for function declarations, but not 
+  /// true for function definitions.
+  ///
+  virtual bool isDeclaration() const { return BasicBlocks.empty(); }
+
+  /// getIntrinsicID - This method returns the ID number of the specified
+  /// function, or Intrinsic::not_intrinsic if the function is not an
+  /// instrinsic, or if the pointer is null.  This value is always defined to be
+  /// zero to allow easy checking for whether a function is intrinsic or not.
+  /// The particular intrinsic functions which correspond to this value are
+  /// defined in llvm/Intrinsics.h.
+  ///
+  unsigned getIntrinsicID() const LLVM_ATTRIBUTE_READONLY;
+  bool isIntrinsic() const { return getIntrinsicID() != 0; }
+
+  /// getCallingConv()/setCallingConv(CC) - These method get and set the
+  /// calling convention of this function.  The enum values for the known
+  /// calling conventions are defined in CallingConv.h.
+  CallingConv::ID getCallingConv() const {
+    return static_cast<CallingConv::ID>(getSubclassDataFromValue() >> 1);
+  }
+  void setCallingConv(CallingConv::ID CC) {
+    setValueSubclassData((getSubclassDataFromValue() & 1) |
+                         (static_cast<unsigned>(CC) << 1));
+  }
+  
+  /// getAttributes - Return the attribute list for this Function.
+  ///
+  const AttrListPtr &getAttributes() const { return AttributeList; }
+
+  /// setAttributes - Set the attribute list for this Function.
+  ///
+  void setAttributes(const AttrListPtr &attrs) { AttributeList = attrs; }
+
+  /// hasFnAttr - Return true if this function has the given attribute.
+  bool hasFnAttr(Attributes N) const {
+    // Function Attributes are stored at ~0 index 
+    return AttributeList.paramHasAttr(~0U, N);
+  }
+
+  /// addFnAttr - Add function attributes to this function.
+  ///
+  void addFnAttr(Attributes N) { 
+    // Function Attributes are stored at ~0 index 
+    addAttribute(~0U, N);
+  }
+
+  /// removeFnAttr - Remove function attributes from this function.
+  ///
+  void removeFnAttr(Attributes N) {
+    // Function Attributes are stored at ~0 index 
+    removeAttribute(~0U, N);
+  }
+
+  /// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm
+  ///                             to use during code generation.
+  bool hasGC() const;
+  const char *getGC() const;
+  void setGC(const char *Str);
+  void clearGC();
+
+  /// @brief Determine whether the function has the given attribute.
+  bool paramHasAttr(unsigned i, Attributes attr) const {
+    return AttributeList.paramHasAttr(i, attr);
+  }
+
+  /// addAttribute - adds the attribute to the list of attributes.
+  void addAttribute(unsigned i, Attributes attr);
+  
+  /// removeAttribute - removes the attribute from the list of attributes.
+  void removeAttribute(unsigned i, Attributes attr);
+
+  /// @brief Extract the alignment for a call or parameter (0=unknown).
+  unsigned getParamAlignment(unsigned i) const {
+    return AttributeList.getParamAlignment(i);
+  }
+
+  /// @brief Determine if the function does not access memory.
+  bool doesNotAccessMemory() const {
+    return hasFnAttr(Attribute::ReadNone);
+  }
+  void setDoesNotAccessMemory(bool DoesNotAccessMemory = true) {
+    if (DoesNotAccessMemory) addFnAttr(Attribute::ReadNone);
+    else removeFnAttr(Attribute::ReadNone);
+  }
+
+  /// @brief Determine if the function does not access or only reads memory.
+  bool onlyReadsMemory() const {
+    return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
+  }
+  void setOnlyReadsMemory(bool OnlyReadsMemory = true) {
+    if (OnlyReadsMemory) addFnAttr(Attribute::ReadOnly);
+    else removeFnAttr(Attribute::ReadOnly | Attribute::ReadNone);
+  }
+
+  /// @brief Determine if the function cannot return.
+  bool doesNotReturn() const {
+    return hasFnAttr(Attribute::NoReturn);
+  }
+  void setDoesNotReturn(bool DoesNotReturn = true) {
+    if (DoesNotReturn) addFnAttr(Attribute::NoReturn);
+    else removeFnAttr(Attribute::NoReturn);
+  }
+
+  /// @brief Determine if the function cannot unwind.
+  bool doesNotThrow() const {
+    return hasFnAttr(Attribute::NoUnwind);
+  }
+  void setDoesNotThrow(bool DoesNotThrow = true) {
+    if (DoesNotThrow) addFnAttr(Attribute::NoUnwind);
+    else removeFnAttr(Attribute::NoUnwind);
+  }
+
+  /// @brief Determine if the function returns a structure through first 
+  /// pointer argument.
+  bool hasStructRetAttr() const {
+    return paramHasAttr(1, Attribute::StructRet);
+  }
+
+  /// @brief Determine if the parameter does not alias other parameters.
+  /// @param n The parameter to check. 1 is the first parameter, 0 is the return
+  bool doesNotAlias(unsigned n) const {
+    return paramHasAttr(n, Attribute::NoAlias);
+  }
+  void setDoesNotAlias(unsigned n, bool DoesNotAlias = true) {
+    if (DoesNotAlias) addAttribute(n, Attribute::NoAlias);
+    else removeAttribute(n, Attribute::NoAlias);
+  }
+
+  /// @brief Determine if the parameter can be captured.
+  /// @param n The parameter to check. 1 is the first parameter, 0 is the return
+  bool doesNotCapture(unsigned n) const {
+    return paramHasAttr(n, Attribute::NoCapture);
+  }
+  void setDoesNotCapture(unsigned n, bool DoesNotCapture = true) {
+    if (DoesNotCapture) addAttribute(n, Attribute::NoCapture);
+    else removeAttribute(n, Attribute::NoCapture);
+  }
+
+  /// copyAttributesFrom - copy all additional attributes (those not needed to
+  /// create a Function) from the Function Src to this one.
+  void copyAttributesFrom(const GlobalValue *Src);
+
+  /// deleteBody - This method deletes the body of the function, and converts
+  /// the linkage to external.
+  ///
+  void deleteBody() {
+    dropAllReferences();
+    setLinkage(ExternalLinkage);
+  }
+
+  /// removeFromParent - This method unlinks 'this' from the containing module,
+  /// but does not delete it.
+  ///
+  virtual void removeFromParent();
+
+  /// eraseFromParent - This method unlinks 'this' from the containing module
+  /// and deletes it.
+  ///
+  virtual void eraseFromParent();
+
+
+  /// Get the underlying elements of the Function... the basic block list is
+  /// empty for external functions.
+  ///
+  const ArgumentListType &getArgumentList() const {
+    CheckLazyArguments();
+    return ArgumentList;
+  }
+  ArgumentListType &getArgumentList() {
+    CheckLazyArguments();
+    return ArgumentList;
+  }
+  static iplist<Argument> Function::*getSublistAccess(Argument*) {
+    return &Function::ArgumentList;
+  }
+
+  const BasicBlockListType &getBasicBlockList() const { return BasicBlocks; }
+        BasicBlockListType &getBasicBlockList()       { return BasicBlocks; }
+  static iplist<BasicBlock> Function::*getSublistAccess(BasicBlock*) {
+    return &Function::BasicBlocks;
+  }
+
+  const BasicBlock       &getEntryBlock() const   { return front(); }
+        BasicBlock       &getEntryBlock()         { return front(); }
+
+  //===--------------------------------------------------------------------===//
+  // Symbol Table Accessing functions...
+
+  /// getSymbolTable() - Return the symbol table...
+  ///
+  inline       ValueSymbolTable &getValueSymbolTable()       { return *SymTab; }
+  inline const ValueSymbolTable &getValueSymbolTable() const { return *SymTab; }
+
+
+  //===--------------------------------------------------------------------===//
+  // BasicBlock iterator forwarding functions
+  //
+  iterator                begin()       { return BasicBlocks.begin(); }
+  const_iterator          begin() const { return BasicBlocks.begin(); }
+  iterator                end  ()       { return BasicBlocks.end();   }
+  const_iterator          end  () const { return BasicBlocks.end();   }
+
+  size_t                   size() const { return BasicBlocks.size();  }
+  bool                    empty() const { return BasicBlocks.empty(); }
+  const BasicBlock       &front() const { return BasicBlocks.front(); }
+        BasicBlock       &front()       { return BasicBlocks.front(); }
+  const BasicBlock        &back() const { return BasicBlocks.back();  }
+        BasicBlock        &back()       { return BasicBlocks.back();  }
+
+  //===--------------------------------------------------------------------===//
+  // Argument iterator forwarding functions
+  //
+  arg_iterator arg_begin() {
+    CheckLazyArguments();
+    return ArgumentList.begin();
+  }
+  const_arg_iterator arg_begin() const {
+    CheckLazyArguments();
+    return ArgumentList.begin();
+  }
+  arg_iterator arg_end() {
+    CheckLazyArguments();
+    return ArgumentList.end();
+  }
+  const_arg_iterator arg_end() const {
+    CheckLazyArguments();
+    return ArgumentList.end();
+  }
+
+  size_t arg_size() const;
+  bool arg_empty() const;
+
+  /// viewCFG - This function is meant for use from the debugger.  You can just
+  /// say 'call F->viewCFG()' and a ghostview window should pop up from the
+  /// program, displaying the CFG of the current function with the code for each
+  /// basic block inside.  This depends on there being a 'dot' and 'gv' program
+  /// in your path.
+  ///
+  void viewCFG() const;
+
+  /// viewCFGOnly - This function is meant for use from the debugger.  It works
+  /// just like viewCFG, but it does not include the contents of basic blocks
+  /// into the nodes, just the label.  If you are only interested in the CFG
+  /// this can make the graph smaller.
+  ///
+  void viewCFGOnly() const;
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Function *) { return true; }
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == Value::FunctionVal;
+  }
+
+  /// dropAllReferences() - This method causes all the subinstructions to "let
+  /// go" of all references that they are maintaining.  This allows one to
+  /// 'delete' a whole module at a time, even though there may be circular
+  /// references... first all references are dropped, and all use counts go to
+  /// zero.  Then everything is deleted for real.  Note that no operations are
+  /// valid on an object that has "dropped all references", except operator
+  /// delete.
+  ///
+  /// Since no other object in the module can have references into the body of a
+  /// function, dropping all references deletes the entire body of the function,
+  /// including any contained basic blocks.
+  ///
+  void dropAllReferences();
+
+  /// hasAddressTaken - returns true if there are any uses of this function
+  /// other than direct calls or invokes to it. Optionally passes back the
+  /// offending user for diagnostic purposes.
+  ///
+  bool hasAddressTaken(const User** = 0) const;
+
+private:
+  // Shadow Value::setValueSubclassData with a private forwarding method so that
+  // subclasses cannot accidentally use it.
+  void setValueSubclassData(unsigned short D) {
+    Value::setValueSubclassData(D);
+  }
+};
+
+inline ValueSymbolTable *
+ilist_traits<BasicBlock>::getSymTab(Function *F) {
+  return F ? &F->getValueSymbolTable() : 0;
+}
+
+inline ValueSymbolTable *
+ilist_traits<Argument>::getSymTab(Function *F) {
+  return F ? &F->getValueSymbolTable() : 0;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/GVMaterializer.h b/final/include/llvm/GVMaterializer.h
new file mode 100644
index 00000000000..c1435523886
--- /dev/null
+++ b/final/include/llvm/GVMaterializer.h
@@ -0,0 +1,66 @@
+//===-- llvm/GVMaterializer.h - Interface for GV materializers --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides an abstract interface for loading a module from some
+// place.  This interface allows incremental or random access loading of
+// functions from the file.  This is useful for applications like JIT compilers
+// or interprocedural optimizers that do not need the entire program in memory
+// at the same time.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef GVMATERIALIZER_H
+#define GVMATERIALIZER_H
+
+#include <string>
+
+namespace llvm {
+
+class Function;
+class GlobalValue;
+class Module;
+
+class GVMaterializer {
+protected:
+  GVMaterializer() {}
+
+public:
+  virtual ~GVMaterializer();
+
+  /// isMaterializable - True if GV can be materialized from whatever backing
+  /// store this GVMaterializer uses and has not been materialized yet.
+  virtual bool isMaterializable(const GlobalValue *GV) const = 0;
+
+  /// isDematerializable - True if GV has been materialized and can be
+  /// dematerialized back to whatever backing store this GVMaterializer uses.
+  virtual bool isDematerializable(const GlobalValue *GV) const = 0;
+
+  /// Materialize - make sure the given GlobalValue is fully read.  If the
+  /// module is corrupt, this returns true and fills in the optional string with
+  /// information about the problem.  If successful, this returns false.
+  ///
+  virtual bool Materialize(GlobalValue *GV, std::string *ErrInfo = 0) = 0;
+
+  /// Dematerialize - If the given GlobalValue is read in, and if the
+  /// GVMaterializer supports it, release the memory for the GV, and set it up
+  /// to be materialized lazily.  If the Materializer doesn't support this
+  /// capability, this method is a noop.
+  ///
+  virtual void Dematerialize(GlobalValue *) {}
+
+  /// MaterializeModule - make sure the entire Module has been completely read.
+  /// On error, this returns true and fills in the optional string with
+  /// information about the problem.  If successful, this returns false.
+  ///
+  virtual bool MaterializeModule(Module *M, std::string *ErrInfo = 0) = 0;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/GlobalAlias.h b/final/include/llvm/GlobalAlias.h
new file mode 100644
index 00000000000..f4af5b1202c
--- /dev/null
+++ b/final/include/llvm/GlobalAlias.h
@@ -0,0 +1,100 @@
+//===-------- llvm/GlobalAlias.h - GlobalAlias class ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the GlobalAlias class, which
+// represents a single function or variable alias in the IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_GLOBAL_ALIAS_H
+#define LLVM_GLOBAL_ALIAS_H
+
+#include "llvm/GlobalValue.h"
+#include "llvm/OperandTraits.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/Twine.h"
+
+namespace llvm {
+
+class Module;
+class Constant;
+template<typename ValueSubClass, typename ItemParentClass>
+  class SymbolTableListTraits;
+
+class GlobalAlias : public GlobalValue, public ilist_node<GlobalAlias> {
+  friend class SymbolTableListTraits<GlobalAlias, Module>;
+  void operator=(const GlobalAlias &);     // Do not implement
+  GlobalAlias(const GlobalAlias &);     // Do not implement
+
+  void setParent(Module *parent);
+
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+  /// GlobalAlias ctor - If a parent module is specified, the alias is
+  /// automatically inserted into the end of the specified module's alias list.
+  GlobalAlias(const Type *Ty, LinkageTypes Linkage, const Twine &Name = "",
+              Constant* Aliasee = 0, Module *Parent = 0);
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// isDeclaration - Is this global variable lacking an initializer?  If so, 
+  /// the global variable is defined in some other translation unit, and is thus
+  /// only a declaration here.
+  virtual bool isDeclaration() const;
+
+  /// removeFromParent - This method unlinks 'this' from the containing module,
+  /// but does not delete it.
+  ///
+  virtual void removeFromParent();
+
+  /// eraseFromParent - This method unlinks 'this' from the containing module
+  /// and deletes it.
+  ///
+  virtual void eraseFromParent();
+
+  /// set/getAliasee - These methods retrive and set alias target.
+  void setAliasee(Constant* GV);
+  const Constant* getAliasee() const {
+    return cast_or_null<Constant>(getOperand(0));
+  }
+  Constant* getAliasee() {
+    return cast_or_null<Constant>(getOperand(0));
+  }
+  /// getAliasedGlobal() - Aliasee can be either global or bitcast of
+  /// global. This method retrives the global for both aliasee flavours.
+  const GlobalValue* getAliasedGlobal() const;
+
+  /// resolveAliasedGlobal() - This method tries to ultimately resolve the alias
+  /// by going through the aliasing chain and trying to find the very last
+  /// global. Returns NULL if a cycle was found. If stopOnWeak is false, then
+  /// the whole chain aliasing chain is traversed, otherwise - only strong
+  /// aliases.
+  const GlobalValue* resolveAliasedGlobal(bool stopOnWeak = true) const;
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const GlobalAlias *) { return true; }
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == Value::GlobalAliasVal;
+  }
+};
+
+template <>
+struct OperandTraits<GlobalAlias> :
+  public FixedNumOperandTraits<GlobalAlias, 1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalAlias, Value)
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/GlobalValue.h b/final/include/llvm/GlobalValue.h
new file mode 100644
index 00000000000..b184b8e4497
--- /dev/null
+++ b/final/include/llvm/GlobalValue.h
@@ -0,0 +1,296 @@
+//===-- llvm/GlobalValue.h - Class to represent a global value --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a common base class of all globally definable objects.  As such,
+// it is subclassed by GlobalVariable, GlobalAlias and by Function.  This is
+// used because you can do certain things with these global objects that you
+// can't do to anything else.  For example, use the address of one as a
+// constant.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_GLOBALVALUE_H
+#define LLVM_GLOBALVALUE_H
+
+#include "llvm/Constant.h"
+
+namespace llvm {
+
+class PointerType;
+class Module;
+
+class GlobalValue : public Constant {
+  GlobalValue(const GlobalValue &);             // do not implement
+public:
+  /// @brief An enumeration for the kinds of linkage for global values.
+  enum LinkageTypes {
+    ExternalLinkage = 0,///< Externally visible function
+    AvailableExternallyLinkage, ///< Available for inspection, not emission.
+    LinkOnceAnyLinkage, ///< Keep one copy of function when linking (inline)
+    LinkOnceODRLinkage, ///< Same, but only replaced by something equivalent.
+    WeakAnyLinkage,     ///< Keep one copy of named function when linking (weak)
+    WeakODRLinkage,     ///< Same, but only replaced by something equivalent.
+    AppendingLinkage,   ///< Special purpose, only applies to global arrays
+    InternalLinkage,    ///< Rename collisions when linking (static functions).
+    PrivateLinkage,     ///< Like Internal, but omit from symbol table.
+    LinkerPrivateLinkage, ///< Like Private, but linker removes.
+    LinkerPrivateWeakLinkage, ///< Like LinkerPrivate, but weak.
+    LinkerPrivateWeakDefAutoLinkage, ///< Like LinkerPrivateWeak, but possibly
+                                     ///  hidden.
+    DLLImportLinkage,   ///< Function to be imported from DLL
+    DLLExportLinkage,   ///< Function to be accessible from DLL.
+    ExternalWeakLinkage,///< ExternalWeak linkage description.
+    CommonLinkage       ///< Tentative definitions.
+  };
+
+  /// @brief An enumeration for the kinds of visibility of global values.
+  enum VisibilityTypes {
+    DefaultVisibility = 0,  ///< The GV is visible
+    HiddenVisibility,       ///< The GV is hidden
+    ProtectedVisibility     ///< The GV is protected
+  };
+
+protected:
+  GlobalValue(const Type *ty, ValueTy vty, Use *Ops, unsigned NumOps,
+              LinkageTypes linkage, const Twine &Name)
+    : Constant(ty, vty, Ops, NumOps), Parent(0),
+      Linkage(linkage), Visibility(DefaultVisibility), Alignment(0),
+      UnnamedAddr(0) {
+    setName(Name);
+  }
+
+  Module *Parent;
+  // Note: VC++ treats enums as signed, so an extra bit is required to prevent
+  // Linkage and Visibility from turning into negative values.
+  LinkageTypes Linkage : 5;   // The linkage of this global
+  unsigned Visibility : 2;    // The visibility style of this global
+  unsigned Alignment : 16;    // Alignment of this symbol, must be power of two
+  unsigned UnnamedAddr : 1;   // This value's address is not significant
+  std::string Section;        // Section to emit this into, empty mean default
+public:
+  ~GlobalValue() {
+    removeDeadConstantUsers();   // remove any dead constants using this.
+  }
+
+  unsigned getAlignment() const {
+    return (1u << Alignment) >> 1;
+  }
+  void setAlignment(unsigned Align);
+
+  bool hasUnnamedAddr() const { return UnnamedAddr; }
+  void setUnnamedAddr(bool Val) { UnnamedAddr = Val; }
+
+  VisibilityTypes getVisibility() const { return VisibilityTypes(Visibility); }
+  bool hasDefaultVisibility() const { return Visibility == DefaultVisibility; }
+  bool hasHiddenVisibility() const { return Visibility == HiddenVisibility; }
+  bool hasProtectedVisibility() const {
+    return Visibility == ProtectedVisibility;
+  }
+  void setVisibility(VisibilityTypes V) { Visibility = V; }
+  
+  bool hasSection() const { return !Section.empty(); }
+  const std::string &getSection() const { return Section; }
+  void setSection(StringRef S) { Section = S; }
+  
+  /// If the usage is empty (except transitively dead constants), then this
+  /// global value can be safely deleted since the destructor will
+  /// delete the dead constants as well.
+  /// @brief Determine if the usage of this global value is empty except
+  /// for transitively dead constants.
+  bool use_empty_except_constants();
+
+  /// getType - Global values are always pointers.
+  inline const PointerType *getType() const {
+    return reinterpret_cast<const PointerType*>(User::getType());
+  }
+
+  static LinkageTypes getLinkOnceLinkage(bool ODR) {
+    return ODR ? LinkOnceODRLinkage : LinkOnceAnyLinkage;
+  }
+  static LinkageTypes getWeakLinkage(bool ODR) {
+    return ODR ? WeakODRLinkage : WeakAnyLinkage;
+  }
+
+  static bool isExternalLinkage(LinkageTypes Linkage) {
+    return Linkage == ExternalLinkage;
+  }
+  static bool isAvailableExternallyLinkage(LinkageTypes Linkage) {
+    return Linkage == AvailableExternallyLinkage;
+  }
+  static bool isLinkOnceLinkage(LinkageTypes Linkage) {
+    return Linkage == LinkOnceAnyLinkage || Linkage == LinkOnceODRLinkage;
+  }
+  static bool isWeakLinkage(LinkageTypes Linkage) {
+    return Linkage == WeakAnyLinkage || Linkage == WeakODRLinkage;
+  }
+  static bool isAppendingLinkage(LinkageTypes Linkage) {
+    return Linkage == AppendingLinkage;
+  }
+  static bool isInternalLinkage(LinkageTypes Linkage) {
+    return Linkage == InternalLinkage;
+  }
+  static bool isPrivateLinkage(LinkageTypes Linkage) {
+    return Linkage == PrivateLinkage;
+  }
+  static bool isLinkerPrivateLinkage(LinkageTypes Linkage) {
+    return Linkage == LinkerPrivateLinkage;
+  }
+  static bool isLinkerPrivateWeakLinkage(LinkageTypes Linkage) {
+    return Linkage == LinkerPrivateWeakLinkage;
+  }
+  static bool isLinkerPrivateWeakDefAutoLinkage(LinkageTypes Linkage) {
+    return Linkage == LinkerPrivateWeakDefAutoLinkage;
+  }
+  static bool isLocalLinkage(LinkageTypes Linkage) {
+    return isInternalLinkage(Linkage) || isPrivateLinkage(Linkage) ||
+      isLinkerPrivateLinkage(Linkage) || isLinkerPrivateWeakLinkage(Linkage) ||
+      isLinkerPrivateWeakDefAutoLinkage(Linkage);
+  }
+  static bool isDLLImportLinkage(LinkageTypes Linkage) {
+    return Linkage == DLLImportLinkage;
+  }
+  static bool isDLLExportLinkage(LinkageTypes Linkage) {
+    return Linkage == DLLExportLinkage;
+  }
+  static bool isExternalWeakLinkage(LinkageTypes Linkage) {
+    return Linkage == ExternalWeakLinkage;
+  }
+  static bool isCommonLinkage(LinkageTypes Linkage) {
+    return Linkage == CommonLinkage;
+  }
+
+  /// mayBeOverridden - Whether the definition of this global may be replaced
+  /// by something non-equivalent at link time.  For example, if a function has
+  /// weak linkage then the code defining it may be replaced by different code.
+  static bool mayBeOverridden(LinkageTypes Linkage) {
+    return Linkage == WeakAnyLinkage ||
+           Linkage == LinkOnceAnyLinkage ||
+           Linkage == CommonLinkage ||
+           Linkage == ExternalWeakLinkage ||
+           Linkage == LinkerPrivateWeakLinkage ||
+           Linkage == LinkerPrivateWeakDefAutoLinkage;
+  }
+
+  /// isWeakForLinker - Whether the definition of this global may be replaced at
+  /// link time.  NB: Using this method outside of the code generators is almost
+  /// always a mistake: when working at the IR level use mayBeOverridden instead
+  /// as it knows about ODR semantics.
+  static bool isWeakForLinker(LinkageTypes Linkage)  {
+    return Linkage == AvailableExternallyLinkage ||
+           Linkage == WeakAnyLinkage ||
+           Linkage == WeakODRLinkage ||
+           Linkage == LinkOnceAnyLinkage ||
+           Linkage == LinkOnceODRLinkage ||
+           Linkage == CommonLinkage ||
+           Linkage == ExternalWeakLinkage ||
+           Linkage == LinkerPrivateWeakLinkage ||
+           Linkage == LinkerPrivateWeakDefAutoLinkage;
+  }
+
+  bool hasExternalLinkage() const { return isExternalLinkage(Linkage); }
+  bool hasAvailableExternallyLinkage() const {
+    return isAvailableExternallyLinkage(Linkage);
+  }
+  bool hasLinkOnceLinkage() const {
+    return isLinkOnceLinkage(Linkage);
+  }
+  bool hasWeakLinkage() const {
+    return isWeakLinkage(Linkage);
+  }
+  bool hasAppendingLinkage() const { return isAppendingLinkage(Linkage); }
+  bool hasInternalLinkage() const { return isInternalLinkage(Linkage); }
+  bool hasPrivateLinkage() const { return isPrivateLinkage(Linkage); }
+  bool hasLinkerPrivateLinkage() const { return isLinkerPrivateLinkage(Linkage); }
+  bool hasLinkerPrivateWeakLinkage() const {
+    return isLinkerPrivateWeakLinkage(Linkage);
+  }
+  bool hasLinkerPrivateWeakDefAutoLinkage() const {
+    return isLinkerPrivateWeakDefAutoLinkage(Linkage);
+  }
+  bool hasLocalLinkage() const { return isLocalLinkage(Linkage); }
+  bool hasDLLImportLinkage() const { return isDLLImportLinkage(Linkage); }
+  bool hasDLLExportLinkage() const { return isDLLExportLinkage(Linkage); }
+  bool hasExternalWeakLinkage() const { return isExternalWeakLinkage(Linkage); }
+  bool hasCommonLinkage() const { return isCommonLinkage(Linkage); }
+
+  void setLinkage(LinkageTypes LT) { Linkage = LT; }
+  LinkageTypes getLinkage() const { return Linkage; }
+
+  bool mayBeOverridden() const { return mayBeOverridden(Linkage); }
+
+  bool isWeakForLinker() const { return isWeakForLinker(Linkage); }
+
+  /// copyAttributesFrom - copy all additional attributes (those not needed to
+  /// create a GlobalValue) from the GlobalValue Src to this one.
+  virtual void copyAttributesFrom(const GlobalValue *Src);
+
+/// @name Materialization
+/// Materialization is used to construct functions only as they're needed. This
+/// is useful to reduce memory usage in LLVM or parsing work done by the
+/// BitcodeReader to load the Module.
+/// @{
+
+  /// isMaterializable - If this function's Module is being lazily streamed in
+  /// functions from disk or some other source, this method can be used to check
+  /// to see if the function has been read in yet or not.
+  bool isMaterializable() const;
+
+  /// isDematerializable - Returns true if this function was loaded from a
+  /// GVMaterializer that's still attached to its Module and that knows how to
+  /// dematerialize the function.
+  bool isDematerializable() const;
+
+  /// Materialize - make sure this GlobalValue is fully read.  If the module is
+  /// corrupt, this returns true and fills in the optional string with
+  /// information about the problem.  If successful, this returns false.
+  bool Materialize(std::string *ErrInfo = 0);
+
+  /// Dematerialize - If this GlobalValue is read in, and if the GVMaterializer
+  /// supports it, release the memory for the function, and set it up to be
+  /// materialized lazily.  If !isDematerializable(), this method is a noop.
+  void Dematerialize();
+
+/// @}
+
+  /// Override from Constant class. No GlobalValue's are null values so this
+  /// always returns false.
+  virtual bool isNullValue() const { return false; }
+
+  /// Override from Constant class.
+  virtual void destroyConstant();
+
+  /// isDeclaration - Return true if the primary definition of this global 
+  /// value is outside of the current translation unit...
+  virtual bool isDeclaration() const = 0;
+
+  /// removeFromParent - This method unlinks 'this' from the containing module,
+  /// but does not delete it.
+  virtual void removeFromParent() = 0;
+
+  /// eraseFromParent - This method unlinks 'this' from the containing module
+  /// and deletes it.
+  virtual void eraseFromParent() = 0;
+
+  /// getParent - Get the module that this global value is contained inside
+  /// of...
+  inline Module *getParent() { return Parent; }
+  inline const Module *getParent() const { return Parent; }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const GlobalValue *) { return true; }
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == Value::FunctionVal ||
+           V->getValueID() == Value::GlobalVariableVal ||
+           V->getValueID() == Value::GlobalAliasVal;
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/GlobalVariable.h b/final/include/llvm/GlobalVariable.h
new file mode 100644
index 00000000000..1769c665d06
--- /dev/null
+++ b/final/include/llvm/GlobalVariable.h
@@ -0,0 +1,180 @@
+//===-- llvm/GlobalVariable.h - GlobalVariable class ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the GlobalVariable class, which
+// represents a single global variable (or constant) in the VM.
+//
+// Global variables are constant pointers that refer to hunks of space that are
+// allocated by either the VM, or by the linker in a static compiler.  A global
+// variable may have an intial value, which is copied into the executables .data
+// area.  Global Constants are required to have initializers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_GLOBAL_VARIABLE_H
+#define LLVM_GLOBAL_VARIABLE_H
+
+#include "llvm/GlobalValue.h"
+#include "llvm/OperandTraits.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/Twine.h"
+
+namespace llvm {
+
+class Module;
+class Constant;
+template<typename ValueSubClass, typename ItemParentClass>
+  class SymbolTableListTraits;
+
+class GlobalVariable : public GlobalValue, public ilist_node<GlobalVariable> {
+  friend class SymbolTableListTraits<GlobalVariable, Module>;
+  void *operator new(size_t, unsigned);       // Do not implement
+  void operator=(const GlobalVariable &);     // Do not implement
+  GlobalVariable(const GlobalVariable &);     // Do not implement
+
+  void setParent(Module *parent);
+
+  bool isConstantGlobal : 1;           // Is this a global constant?
+  bool isThreadLocalSymbol : 1;        // Is this symbol "Thread Local"?
+
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+  /// GlobalVariable ctor - If a parent module is specified, the global is
+  /// automatically inserted into the end of the specified modules global list.
+  GlobalVariable(const Type *Ty, bool isConstant, LinkageTypes Linkage,
+                 Constant *Initializer = 0, const Twine &Name = "",
+                 bool ThreadLocal = false, unsigned AddressSpace = 0);
+  /// GlobalVariable ctor - This creates a global and inserts it before the
+  /// specified other global.
+  GlobalVariable(Module &M, const Type *Ty, bool isConstant,
+                 LinkageTypes Linkage, Constant *Initializer,
+                 const Twine &Name,
+                 GlobalVariable *InsertBefore = 0, bool ThreadLocal = false,
+                 unsigned AddressSpace = 0);
+
+  ~GlobalVariable() {
+    NumOperands = 1; // FIXME: needed by operator delete
+  }
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// isDeclaration - Is this global variable lacking an initializer?  If so,
+  /// the global variable is defined in some other translation unit, and is thus
+  /// only a declaration here.
+  virtual bool isDeclaration() const { return getNumOperands() == 0; }
+
+  /// hasInitializer - Unless a global variable isExternal(), it has an
+  /// initializer.  The initializer for the global variable/constant is held by
+  /// Initializer if an initializer is specified.
+  ///
+  inline bool hasInitializer() const { return !isDeclaration(); }
+
+  /// hasDefinitiveInitializer - Whether the global variable has an initializer,
+  /// and any other instances of the global (this can happen due to weak
+  /// linkage) are guaranteed to have the same initializer.
+  ///
+  /// Note that if you want to transform a global, you must use
+  /// hasUniqueInitializer() instead, because of the *_odr linkage type.
+  ///
+  /// Example:
+  ///
+  /// @a = global SomeType* null - Initializer is both definitive and unique.
+  ///
+  /// @b = global weak SomeType* null - Initializer is neither definitive nor
+  /// unique.
+  ///
+  /// @c = global weak_odr SomeType* null - Initializer is definitive, but not
+  /// unique.
+  inline bool hasDefinitiveInitializer() const {
+    return hasInitializer() &&
+      // The initializer of a global variable with weak linkage may change at
+      // link time.
+      !mayBeOverridden();
+  }
+
+  /// hasUniqueInitializer - Whether the global variable has an initializer, and
+  /// any changes made to the initializer will turn up in the final executable.
+  inline bool hasUniqueInitializer() const {
+    return hasInitializer() &&
+      // It's not safe to modify initializers of global variables with weak
+      // linkage, because the linker might choose to discard the initializer and
+      // use the initializer from another instance of the global variable
+      // instead. It is wrong to modify the initializer of a global variable
+      // with *_odr linkage because then different instances of the global may
+      // have different initializers, breaking the One Definition Rule.
+      !isWeakForLinker();
+  }
+
+  /// getInitializer - Return the initializer for this global variable.  It is
+  /// illegal to call this method if the global is external, because we cannot
+  /// tell what the value is initialized to!
+  ///
+  inline /*const FIXME*/ Constant *getInitializer() const {
+    assert(hasInitializer() && "GV doesn't have initializer!");
+    return static_cast<Constant*>(Op<0>().get());
+  }
+  inline Constant *getInitializer() {
+    assert(hasInitializer() && "GV doesn't have initializer!");
+    return static_cast<Constant*>(Op<0>().get());
+  }
+  /// setInitializer - Sets the initializer for this global variable, removing
+  /// any existing initializer if InitVal==NULL.  If this GV has type T*, the
+  /// initializer must have type T.
+  void setInitializer(Constant *InitVal);
+
+  /// If the value is a global constant, its value is immutable throughout the
+  /// runtime execution of the program.  Assigning a value into the constant
+  /// leads to undefined behavior.
+  ///
+  bool isConstant() const { return isConstantGlobal; }
+  void setConstant(bool Val) { isConstantGlobal = Val; }
+
+  /// If the value is "Thread Local", its value isn't shared by the threads.
+  bool isThreadLocal() const { return isThreadLocalSymbol; }
+  void setThreadLocal(bool Val) { isThreadLocalSymbol = Val; }
+
+  /// copyAttributesFrom - copy all additional attributes (those not needed to
+  /// create a GlobalVariable) from the GlobalVariable Src to this one.
+  void copyAttributesFrom(const GlobalValue *Src);
+
+  /// removeFromParent - This method unlinks 'this' from the containing module,
+  /// but does not delete it.
+  ///
+  virtual void removeFromParent();
+
+  /// eraseFromParent - This method unlinks 'this' from the containing module
+  /// and deletes it.
+  ///
+  virtual void eraseFromParent();
+
+  /// Override Constant's implementation of this method so we can
+  /// replace constant initializers.
+  virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const GlobalVariable *) { return true; }
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == Value::GlobalVariableVal;
+  }
+};
+
+template <>
+struct OperandTraits<GlobalVariable> :
+  public OptionalOperandTraits<GlobalVariable> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalVariable, Value)
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/InitializePasses.h b/final/include/llvm/InitializePasses.h
new file mode 100644
index 00000000000..2758b299ecc
--- /dev/null
+++ b/final/include/llvm/InitializePasses.h
@@ -0,0 +1,232 @@
+//===- llvm/InitializePasses.h -------- Initialize All Passes ---*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations for the pass initialization routines
+// for the entire LLVM project.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INITIALIZEPASSES_H
+#define LLVM_INITIALIZEPASSES_H
+
+namespace llvm {
+
+class PassRegistry;
+
+/// initializeCore - Initialize all passes linked into the
+/// TransformUtils library.
+void initializeCore(PassRegistry&);
+
+/// initializeTransformUtils - Initialize all passes linked into the
+/// TransformUtils library.
+void initializeTransformUtils(PassRegistry&);
+
+/// initializeScalarOpts - Initialize all passes linked into the
+/// ScalarOpts library.
+void initializeScalarOpts(PassRegistry&);
+
+/// initializeInstCombine - Initialize all passes linked into the
+/// ScalarOpts library.
+void initializeInstCombine(PassRegistry&);
+
+/// initializeIPO - Initialize all passes linked into the IPO library.
+void initializeIPO(PassRegistry&);
+
+/// initializeInstrumentation - Initialize all passes linked into the
+/// Instrumentation library.
+void initializeInstrumentation(PassRegistry&);
+
+/// initializeAnalysis - Initialize all passes linked into the Analysis library.
+void initializeAnalysis(PassRegistry&);
+
+/// initializeIPA - Initialize all passes linked into the IPA library.
+void initializeIPA(PassRegistry&);
+
+/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
+void initializeCodeGen(PassRegistry&);
+
+/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
+void initializeTarget(PassRegistry&);
+
+void initializeAAEvalPass(PassRegistry&);
+void initializeADCEPass(PassRegistry&);
+void initializeAliasAnalysisAnalysisGroup(PassRegistry&);
+void initializeAliasAnalysisCounterPass(PassRegistry&);
+void initializeAliasDebuggerPass(PassRegistry&);
+void initializeAliasSetPrinterPass(PassRegistry&);
+void initializeAlwaysInlinerPass(PassRegistry&);
+void initializeArgPromotionPass(PassRegistry&);
+void initializeBasicAliasAnalysisPass(PassRegistry&);
+void initializeBasicCallGraphPass(PassRegistry&);
+void initializeBlockExtractorPassPass(PassRegistry&);
+void initializeBlockPlacementPass(PassRegistry&);
+void initializeBreakCriticalEdgesPass(PassRegistry&);
+void initializeCFGOnlyPrinterPass(PassRegistry&);
+void initializeCFGOnlyViewerPass(PassRegistry&);
+void initializeCFGPrinterPass(PassRegistry&);
+void initializeCFGSimplifyPassPass(PassRegistry&);
+void initializeCFGViewerPass(PassRegistry&);
+void initializeCalculateSpillWeightsPass(PassRegistry&);
+void initializeCallGraphAnalysisGroup(PassRegistry&);
+void initializeCodeGenPreparePass(PassRegistry&);
+void initializeConstantMergePass(PassRegistry&);
+void initializeConstantPropagationPass(PassRegistry&);
+void initializeCorrelatedValuePropagationPass(PassRegistry&);
+void initializeDAEPass(PassRegistry&);
+void initializeDAHPass(PassRegistry&);
+void initializeDCEPass(PassRegistry&);
+void initializeDSEPass(PassRegistry&);
+void initializeDTEPass(PassRegistry&);
+void initializeDeadInstEliminationPass(PassRegistry&);
+void initializeDeadMachineInstructionElimPass(PassRegistry&);
+void initializeDomOnlyPrinterPass(PassRegistry&);
+void initializeDomOnlyViewerPass(PassRegistry&);
+void initializeDomPrinterPass(PassRegistry&);
+void initializeDomViewerPass(PassRegistry&);
+void initializeDominanceFrontierPass(PassRegistry&);
+void initializeDominatorTreePass(PassRegistry&);
+void initializeEdgeBundlesPass(PassRegistry&);
+void initializeEdgeProfilerPass(PassRegistry&);
+void initializePathProfilerPass(PassRegistry&);
+void initializeEarlyCSEPass(PassRegistry&);
+void initializeExpandISelPseudosPass(PassRegistry&);
+void initializeFindUsedTypesPass(PassRegistry&);
+void initializeFunctionAttrsPass(PassRegistry&);
+void initializeGCModuleInfoPass(PassRegistry&);
+void initializeGVNPass(PassRegistry&);
+void initializeGlobalDCEPass(PassRegistry&);
+void initializeGlobalOptPass(PassRegistry&);
+void initializeGlobalsModRefPass(PassRegistry&);
+void initializeIPCPPass(PassRegistry&);
+void initializeIPSCCPPass(PassRegistry&);
+void initializeIVUsersPass(PassRegistry&);
+void initializeIfConverterPass(PassRegistry&);
+void initializeIndVarSimplifyPass(PassRegistry&);
+void initializeInstCombinerPass(PassRegistry&);
+void initializeInstCountPass(PassRegistry&);
+void initializeInstNamerPass(PassRegistry&);
+void initializeInternalizePassPass(PassRegistry&);
+void initializeIntervalPartitionPass(PassRegistry&);
+void initializeJumpThreadingPass(PassRegistry&);
+void initializeLCSSAPass(PassRegistry&);
+void initializeLICMPass(PassRegistry&);
+void initializeLazyValueInfoPass(PassRegistry&);
+void initializeLibCallAliasAnalysisPass(PassRegistry&);
+void initializeLintPass(PassRegistry&);
+void initializeLiveDebugVariablesPass(PassRegistry&);
+void initializeLiveIntervalsPass(PassRegistry&);
+void initializeLiveStacksPass(PassRegistry&);
+void initializeLiveVariablesPass(PassRegistry&);
+void initializeLoaderPassPass(PassRegistry&);
+void initializePathProfileLoaderPassPass(PassRegistry&);
+void initializeLoopDeletionPass(PassRegistry&);
+void initializeLoopDependenceAnalysisPass(PassRegistry&);
+void initializeLoopExtractorPass(PassRegistry&);
+void initializeLoopInfoPass(PassRegistry&);
+void initializeLoopInstSimplifyPass(PassRegistry&);
+void initializeLoopRotatePass(PassRegistry&);
+void initializeLoopSimplifyPass(PassRegistry&);
+void initializeLoopSplitterPass(PassRegistry&);
+void initializeLoopStrengthReducePass(PassRegistry&);
+void initializeLoopUnrollPass(PassRegistry&);
+void initializeLoopUnswitchPass(PassRegistry&);
+void initializeLoopIdiomRecognizePass(PassRegistry&);
+void initializeLowerAtomicPass(PassRegistry&);
+void initializeLowerIntrinsicsPass(PassRegistry&);
+void initializeLowerInvokePass(PassRegistry&);
+void initializeLowerSetJmpPass(PassRegistry&);
+void initializeLowerSwitchPass(PassRegistry&);
+void initializeMachineCSEPass(PassRegistry&);
+void initializeMachineDominatorTreePass(PassRegistry&);
+void initializeMachineLICMPass(PassRegistry&);
+void initializeMachineLoopInfoPass(PassRegistry&);
+void initializeMachineLoopRangesPass(PassRegistry&);
+void initializeMachineModuleInfoPass(PassRegistry&);
+void initializeMachineSinkingPass(PassRegistry&);
+void initializeMachineVerifierPassPass(PassRegistry&);
+void initializeMemCpyOptPass(PassRegistry&);
+void initializeMemDepPrinterPass(PassRegistry&);
+void initializeMemoryDependenceAnalysisPass(PassRegistry&);
+void initializeMergeFunctionsPass(PassRegistry&);
+void initializeModuleDebugInfoPrinterPass(PassRegistry&);
+void initializeNoAAPass(PassRegistry&);
+void initializeNoProfileInfoPass(PassRegistry&);
+void initializeNoPathProfileInfoPass(PassRegistry&);
+void initializeOptimalEdgeProfilerPass(PassRegistry&);
+void initializeOptimizePHIsPass(PassRegistry&);
+void initializePEIPass(PassRegistry&);
+void initializePHIEliminationPass(PassRegistry&);
+void initializePartialInlinerPass(PassRegistry&);
+void initializePeepholeOptimizerPass(PassRegistry&);
+void initializePostDomOnlyPrinterPass(PassRegistry&);
+void initializePostDomOnlyViewerPass(PassRegistry&);
+void initializePostDomPrinterPass(PassRegistry&);
+void initializePostDomViewerPass(PassRegistry&);
+void initializePostDominanceFrontierPass(PassRegistry&);
+void initializePostDominatorTreePass(PassRegistry&);
+void initializePreAllocSplittingPass(PassRegistry&);
+void initializePreVerifierPass(PassRegistry&);
+void initializePrintDbgInfoPass(PassRegistry&);
+void initializePrintFunctionPassPass(PassRegistry&);
+void initializePrintModulePassPass(PassRegistry&);
+void initializeProcessImplicitDefsPass(PassRegistry&);
+void initializeProfileEstimatorPassPass(PassRegistry&);
+void initializeProfileInfoAnalysisGroup(PassRegistry&);
+void initializePathProfileInfoAnalysisGroup(PassRegistry&);
+void initializePathProfileVerifierPass(PassRegistry&);
+void initializeProfileVerifierPassPass(PassRegistry&);
+void initializePromotePassPass(PassRegistry&);
+void initializePruneEHPass(PassRegistry&);
+void initializeRALinScanPass(PassRegistry&);
+void initializeReassociatePass(PassRegistry&);
+void initializeRegToMemPass(PassRegistry&);
+void initializeRegionInfoPass(PassRegistry&);
+void initializeRegionOnlyPrinterPass(PassRegistry&);
+void initializeRegionOnlyViewerPass(PassRegistry&);
+void initializeRegionPrinterPass(PassRegistry&);
+void initializeRegionViewerPass(PassRegistry&);
+void initializeRegisterCoalescerAnalysisGroup(PassRegistry&);
+void initializeRenderMachineFunctionPass(PassRegistry&);
+void initializeSCCPPass(PassRegistry&);
+void initializeSRETPromotionPass(PassRegistry&);
+void initializeSROA_DTPass(PassRegistry&);
+void initializeSROA_SSAUpPass(PassRegistry&);
+void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&);
+void initializeScalarEvolutionPass(PassRegistry&);
+void initializeSimpleInlinerPass(PassRegistry&);
+void initializeSimpleRegisterCoalescingPass(PassRegistry&);
+void initializeSimplifyLibCallsPass(PassRegistry&);
+void initializeSingleLoopExtractorPass(PassRegistry&);
+void initializeSinkingPass(PassRegistry&);
+void initializeSlotIndexesPass(PassRegistry&);
+void initializeSpillPlacementPass(PassRegistry&);
+void initializeStackProtectorPass(PassRegistry&);
+void initializeStackSlotColoringPass(PassRegistry&);
+void initializeStripDeadDebugInfoPass(PassRegistry&);
+void initializeStripDeadPrototypesPassPass(PassRegistry&);
+void initializeStripDebugDeclarePass(PassRegistry&);
+void initializeStripNonDebugSymbolsPass(PassRegistry&);
+void initializeStripSymbolsPass(PassRegistry&);
+void initializeStrongPHIEliminationPass(PassRegistry&);
+void initializeTailCallElimPass(PassRegistry&);
+void initializeTailDupPass(PassRegistry&);
+void initializeTargetDataPass(PassRegistry&);
+void initializeTargetLibraryInfoPass(PassRegistry&);
+void initializeTwoAddressInstructionPassPass(PassRegistry&);
+void initializeTypeBasedAliasAnalysisPass(PassRegistry&);
+void initializeUnifyFunctionExitNodesPass(PassRegistry&);
+void initializeUnreachableBlockElimPass(PassRegistry&);
+void initializeUnreachableMachineBlockElimPass(PassRegistry&);
+void initializeVerifierPass(PassRegistry&);
+void initializeVirtRegMapPass(PassRegistry&);
+void initializeInstSimplifierPass(PassRegistry&);
+
+}
+
+#endif
diff --git a/final/include/llvm/InlineAsm.h b/final/include/llvm/InlineAsm.h
new file mode 100644
index 00000000000..ed8f0f7f615
--- /dev/null
+++ b/final/include/llvm/InlineAsm.h
@@ -0,0 +1,256 @@
+//===-- llvm/InlineAsm.h - Class to represent inline asm strings-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents the inline asm strings, which are Value*'s that are
+// used as the callee operand of call instructions.  InlineAsm's are uniqued
+// like constants, and created via InlineAsm::get(...).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INLINEASM_H
+#define LLVM_INLINEASM_H
+
+#include "llvm/Value.h"
+#include <vector>
+
+namespace llvm {
+
+class PointerType;
+class FunctionType;
+class Module;
+struct InlineAsmKeyType;
+template<class ValType, class TypeClass, class ConstantClass, bool HasLargeKey>
+class ConstantUniqueMap;
+template<class ConstantClass, class TypeClass, class ValType>
+struct ConstantCreator;
+
+class InlineAsm : public Value {
+  friend struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType>;
+  friend class ConstantUniqueMap<InlineAsmKeyType, PointerType, InlineAsm,
+                                 false>;
+
+  InlineAsm(const InlineAsm &);             // do not implement
+  void operator=(const InlineAsm&);         // do not implement
+
+  std::string AsmString, Constraints;
+  bool HasSideEffects;
+  bool IsAlignStack;
+  
+  InlineAsm(const PointerType *Ty, const std::string &AsmString,
+            const std::string &Constraints, bool hasSideEffects,
+            bool isAlignStack);
+  virtual ~InlineAsm();
+
+  /// When the ConstantUniqueMap merges two types and makes two InlineAsms
+  /// identical, it destroys one of them with this method.
+  void destroyConstant();
+public:
+
+  /// InlineAsm::get - Return the specified uniqued inline asm string.
+  ///
+  static InlineAsm *get(const FunctionType *Ty, StringRef AsmString,
+                        StringRef Constraints, bool hasSideEffects,
+                        bool isAlignStack = false);
+  
+  bool hasSideEffects() const { return HasSideEffects; }
+  bool isAlignStack() const { return IsAlignStack; }
+  
+  /// getType - InlineAsm's are always pointers.
+  ///
+  const PointerType *getType() const {
+    return reinterpret_cast<const PointerType*>(Value::getType());
+  }
+  
+  /// getFunctionType - InlineAsm's are always pointers to functions.
+  ///
+  const FunctionType *getFunctionType() const;
+  
+  const std::string &getAsmString() const { return AsmString; }
+  const std::string &getConstraintString() const { return Constraints; }
+
+  /// Verify - This static method can be used by the parser to check to see if
+  /// the specified constraint string is legal for the type.  This returns true
+  /// if legal, false if not.
+  ///
+  static bool Verify(const FunctionType *Ty, StringRef Constraints);
+
+  // Constraint String Parsing 
+  enum ConstraintPrefix {
+    isInput,            // 'x'
+    isOutput,           // '=x'
+    isClobber           // '~x'
+  };
+  
+  typedef std::vector<std::string> ConstraintCodeVector;
+  
+  struct SubConstraintInfo {
+    /// MatchingInput - If this is not -1, this is an output constraint where an
+    /// input constraint is required to match it (e.g. "0").  The value is the
+    /// constraint number that matches this one (for example, if this is
+    /// constraint #0 and constraint #4 has the value "0", this will be 4).
+    signed char MatchingInput;
+    /// Code - The constraint code, either the register name (in braces) or the
+    /// constraint letter/number.
+    ConstraintCodeVector Codes;
+    /// Default constructor.
+    SubConstraintInfo() : MatchingInput(-1) {}
+  };
+
+  typedef std::vector<SubConstraintInfo> SubConstraintInfoVector;
+  struct ConstraintInfo;
+  typedef std::vector<ConstraintInfo> ConstraintInfoVector;
+  
+  struct ConstraintInfo {
+    /// Type - The basic type of the constraint: input/output/clobber
+    ///
+    ConstraintPrefix Type;
+    
+    /// isEarlyClobber - "&": output operand writes result before inputs are all
+    /// read.  This is only ever set for an output operand.
+    bool isEarlyClobber; 
+    
+    /// MatchingInput - If this is not -1, this is an output constraint where an
+    /// input constraint is required to match it (e.g. "0").  The value is the
+    /// constraint number that matches this one (for example, if this is
+    /// constraint #0 and constraint #4 has the value "0", this will be 4).
+    signed char MatchingInput;
+    
+    /// hasMatchingInput - Return true if this is an output constraint that has
+    /// a matching input constraint.
+    bool hasMatchingInput() const { return MatchingInput != -1; }
+    
+    /// isCommutative - This is set to true for a constraint that is commutative
+    /// with the next operand.
+    bool isCommutative;
+    
+    /// isIndirect - True if this operand is an indirect operand.  This means
+    /// that the address of the source or destination is present in the call
+    /// instruction, instead of it being returned or passed in explicitly.  This
+    /// is represented with a '*' in the asm string.
+    bool isIndirect;
+    
+    /// Code - The constraint code, either the register name (in braces) or the
+    /// constraint letter/number.
+    ConstraintCodeVector Codes;
+    
+    /// isMultipleAlternative - '|': has multiple-alternative constraints.
+    bool isMultipleAlternative;
+    
+    /// multipleAlternatives - If there are multiple alternative constraints,
+    /// this array will contain them.  Otherwise it will be empty.
+    SubConstraintInfoVector multipleAlternatives;
+    
+    /// The currently selected alternative constraint index.
+    unsigned currentAlternativeIndex;
+    
+    ///Default constructor.
+    ConstraintInfo();
+    
+    /// Copy constructor.
+    ConstraintInfo(const ConstraintInfo &other);
+    
+    /// Parse - Analyze the specified string (e.g. "=*&{eax}") and fill in the
+    /// fields in this structure.  If the constraint string is not understood,
+    /// return true, otherwise return false.
+    bool Parse(StringRef Str, ConstraintInfoVector &ConstraintsSoFar);
+               
+    /// selectAlternative - Point this constraint to the alternative constraint
+    /// indicated by the index.
+    void selectAlternative(unsigned index);
+  };
+  
+  /// ParseConstraints - Split up the constraint string into the specific
+  /// constraints and their prefixes.  If this returns an empty vector, and if
+  /// the constraint string itself isn't empty, there was an error parsing.
+  static ConstraintInfoVector ParseConstraints(StringRef ConstraintString);
+  
+  /// ParseConstraints - Parse the constraints of this inlineasm object, 
+  /// returning them the same way that ParseConstraints(str) does.
+  ConstraintInfoVector ParseConstraints() const {
+    return ParseConstraints(Constraints);
+  }
+  
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const InlineAsm *) { return true; }
+  static inline bool classof(const Value *V) {
+    return V->getValueID() == Value::InlineAsmVal;
+  }
+
+  
+  // These are helper methods for dealing with flags in the INLINEASM SDNode
+  // in the backend.
+  
+  enum {
+    Op_InputChain = 0,
+    Op_AsmString = 1,
+    Op_MDNode = 2,
+    Op_ExtraInfo = 3,    // HasSideEffects, IsAlignStack
+    Op_FirstOperand = 4,
+
+    MIOp_AsmString = 0,
+    MIOp_ExtraInfo = 1,    // HasSideEffects, IsAlignStack
+    MIOp_FirstOperand = 2,
+
+    Extra_HasSideEffects = 1,
+    Extra_IsAlignStack = 2,
+    
+    Kind_RegUse = 1,
+    Kind_RegDef = 2,
+    Kind_Imm = 3,
+    Kind_Mem = 4,
+    Kind_RegDefEarlyClobber = 6,
+    
+    Flag_MatchingOperand = 0x80000000
+  };
+  
+  static unsigned getFlagWord(unsigned Kind, unsigned NumOps) {
+    assert(((NumOps << 3) & ~0xffff) == 0 && "Too many inline asm operands!");
+    return Kind | (NumOps << 3);
+  }
+  
+  /// getFlagWordForMatchingOp - Augment an existing flag word returned by
+  /// getFlagWord with information indicating that this input operand is tied 
+  /// to a previous output operand.
+  static unsigned getFlagWordForMatchingOp(unsigned InputFlag,
+                                           unsigned MatchedOperandNo) {
+    return InputFlag | Flag_MatchingOperand | (MatchedOperandNo << 16);
+  }
+
+  static unsigned getKind(unsigned Flags) {
+    return Flags & 7;
+  }
+
+  static bool isRegDefKind(unsigned Flag){ return getKind(Flag) == Kind_RegDef;}
+  static bool isImmKind(unsigned Flag) { return getKind(Flag) == Kind_Imm; }
+  static bool isMemKind(unsigned Flag) { return getKind(Flag) == Kind_Mem; }
+  static bool isRegDefEarlyClobberKind(unsigned Flag) {
+    return getKind(Flag) == Kind_RegDefEarlyClobber;
+  }
+  
+  /// getNumOperandRegisters - Extract the number of registers field from the
+  /// inline asm operand flag.
+  static unsigned getNumOperandRegisters(unsigned Flag) {
+    return (Flag & 0xffff) >> 3;
+  }
+
+  /// isUseOperandTiedToDef - Return true if the flag of the inline asm
+  /// operand indicates it is an use operand that's matched to a def operand.
+  static bool isUseOperandTiedToDef(unsigned Flag, unsigned &Idx) {
+    if ((Flag & Flag_MatchingOperand) == 0)
+      return false;
+    Idx = (Flag & ~Flag_MatchingOperand) >> 16;
+    return true;
+  }
+
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/InstrTypes.h b/final/include/llvm/InstrTypes.h
new file mode 100644
index 00000000000..a166956e1a6
--- /dev/null
+++ b/final/include/llvm/InstrTypes.h
@@ -0,0 +1,854 @@
+//===-- llvm/InstrTypes.h - Important Instruction subclasses ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines various meta classes of instructions that exist in the VM
+// representation.  Specific concrete subclasses of these may be found in the
+// i*.h files...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INSTRUCTION_TYPES_H
+#define LLVM_INSTRUCTION_TYPES_H
+
+#include "llvm/Instruction.h"
+#include "llvm/OperandTraits.h"
+#include "llvm/Operator.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/Twine.h"
+
+namespace llvm {
+
+class LLVMContext;
+
+//===----------------------------------------------------------------------===//
+//                            TerminatorInst Class
+//===----------------------------------------------------------------------===//
+
+/// TerminatorInst - Subclasses of this class are all able to terminate a basic
+/// block.  Thus, these are all the flow control type of operations.
+///
+class TerminatorInst : public Instruction {
+protected:
+  TerminatorInst(const Type *Ty, Instruction::TermOps iType,
+                 Use *Ops, unsigned NumOps,
+                 Instruction *InsertBefore = 0)
+    : Instruction(Ty, iType, Ops, NumOps, InsertBefore) {}
+
+  TerminatorInst(const Type *Ty, Instruction::TermOps iType,
+                 Use *Ops, unsigned NumOps, BasicBlock *InsertAtEnd)
+    : Instruction(Ty, iType, Ops, NumOps, InsertAtEnd) {}
+
+  // Out of line virtual method, so the vtable, etc has a home.
+  ~TerminatorInst();
+
+  /// Virtual methods - Terminators should overload these and provide inline
+  /// overrides of non-V methods.
+  virtual BasicBlock *getSuccessorV(unsigned idx) const = 0;
+  virtual unsigned getNumSuccessorsV() const = 0;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B) = 0;
+  virtual TerminatorInst *clone_impl() const = 0;
+public:
+
+  /// getNumSuccessors - Return the number of successors that this terminator
+  /// has.
+  unsigned getNumSuccessors() const {
+    return getNumSuccessorsV();
+  }
+
+  /// getSuccessor - Return the specified successor.
+  ///
+  BasicBlock *getSuccessor(unsigned idx) const {
+    return getSuccessorV(idx);
+  }
+
+  /// setSuccessor - Update the specified successor to point at the provided
+  /// block.
+  void setSuccessor(unsigned idx, BasicBlock *B) {
+    setSuccessorV(idx, B);
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const TerminatorInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->isTerminator();
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+//                          UnaryInstruction Class
+//===----------------------------------------------------------------------===//
+
+class UnaryInstruction : public Instruction {
+  void *operator new(size_t, unsigned);      // Do not implement
+
+protected:
+  UnaryInstruction(const Type *Ty, unsigned iType, Value *V,
+                   Instruction *IB = 0)
+    : Instruction(Ty, iType, &Op<0>(), 1, IB) {
+    Op<0>() = V;
+  }
+  UnaryInstruction(const Type *Ty, unsigned iType, Value *V, BasicBlock *IAE)
+    : Instruction(Ty, iType, &Op<0>(), 1, IAE) {
+    Op<0>() = V;
+  }
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+
+  // Out of line virtual method, so the vtable, etc has a home.
+  ~UnaryInstruction();
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const UnaryInstruction *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Alloca ||
+           I->getOpcode() == Instruction::Load ||
+           I->getOpcode() == Instruction::VAArg ||
+           I->getOpcode() == Instruction::ExtractValue ||
+           (I->getOpcode() >= CastOpsBegin && I->getOpcode() < CastOpsEnd);
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<UnaryInstruction> :
+  public FixedNumOperandTraits<UnaryInstruction, 1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryInstruction, Value)
+
+//===----------------------------------------------------------------------===//
+//                           BinaryOperator Class
+//===----------------------------------------------------------------------===//
+
+class BinaryOperator : public Instruction {
+  void *operator new(size_t, unsigned); // Do not implement
+protected:
+  void init(BinaryOps iType);
+  BinaryOperator(BinaryOps iType, Value *S1, Value *S2, const Type *Ty,
+                 const Twine &Name, Instruction *InsertBefore);
+  BinaryOperator(BinaryOps iType, Value *S1, Value *S2, const Type *Ty,
+                 const Twine &Name, BasicBlock *InsertAtEnd);
+  virtual BinaryOperator *clone_impl() const;
+public:
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// Create() - Construct a binary instruction, given the opcode and the two
+  /// operands.  Optionally (if InstBefore is specified) insert the instruction
+  /// into a BasicBlock right before the specified instruction.  The specified
+  /// Instruction is allowed to be a dereferenced end iterator.
+  ///
+  static BinaryOperator *Create(BinaryOps Op, Value *S1, Value *S2,
+                                const Twine &Name = Twine(),
+                                Instruction *InsertBefore = 0);
+
+  /// Create() - Construct a binary instruction, given the opcode and the two
+  /// operands.  Also automatically insert this instruction to the end of the
+  /// BasicBlock specified.
+  ///
+  static BinaryOperator *Create(BinaryOps Op, Value *S1, Value *S2,
+                                const Twine &Name, BasicBlock *InsertAtEnd);
+
+  /// Create* - These methods just forward to Create, and are useful when you
+  /// statically know what type of instruction you're going to create.  These
+  /// helpers just save some typing.
+#define HANDLE_BINARY_INST(N, OPC, CLASS) \
+  static BinaryOperator *Create##OPC(Value *V1, Value *V2, \
+                                     const Twine &Name = "") {\
+    return Create(Instruction::OPC, V1, V2, Name);\
+  }
+#include "llvm/Instruction.def"
+#define HANDLE_BINARY_INST(N, OPC, CLASS) \
+  static BinaryOperator *Create##OPC(Value *V1, Value *V2, \
+                                     const Twine &Name, BasicBlock *BB) {\
+    return Create(Instruction::OPC, V1, V2, Name, BB);\
+  }
+#include "llvm/Instruction.def"
+#define HANDLE_BINARY_INST(N, OPC, CLASS) \
+  static BinaryOperator *Create##OPC(Value *V1, Value *V2, \
+                                     const Twine &Name, Instruction *I) {\
+    return Create(Instruction::OPC, V1, V2, Name, I);\
+  }
+#include "llvm/Instruction.def"
+
+  static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name = "") {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name);
+    BO->setHasNoSignedWrap(true);
+    return BO;
+  }
+  static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name, BasicBlock *BB) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, BB);
+    BO->setHasNoSignedWrap(true);
+    return BO;
+  }
+  static BinaryOperator *CreateNSW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name, Instruction *I) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, I);
+    BO->setHasNoSignedWrap(true);
+    return BO;
+  }
+  
+  static BinaryOperator *CreateNUW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name = "") {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name);
+    BO->setHasNoUnsignedWrap(true);
+    return BO;
+  }
+  static BinaryOperator *CreateNUW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name, BasicBlock *BB) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, BB);
+    BO->setHasNoUnsignedWrap(true);
+    return BO;
+  }
+  static BinaryOperator *CreateNUW(BinaryOps Opc, Value *V1, Value *V2,
+                                   const Twine &Name, Instruction *I) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, I);
+    BO->setHasNoUnsignedWrap(true);
+    return BO;
+  }
+  
+  static BinaryOperator *CreateExact(BinaryOps Opc, Value *V1, Value *V2,
+                                     const Twine &Name = "") {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name);
+    BO->setIsExact(true);
+    return BO;
+  }
+  static BinaryOperator *CreateExact(BinaryOps Opc, Value *V1, Value *V2,
+                                     const Twine &Name, BasicBlock *BB) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, BB);
+    BO->setIsExact(true);
+    return BO;
+  }
+  static BinaryOperator *CreateExact(BinaryOps Opc, Value *V1, Value *V2,
+                                     const Twine &Name, Instruction *I) {
+    BinaryOperator *BO = Create(Opc, V1, V2, Name, I);
+    BO->setIsExact(true);
+    return BO;
+  }
+  
+#define DEFINE_HELPERS(OPC, NUWNSWEXACT)                                     \
+  static BinaryOperator *Create ## NUWNSWEXACT ## OPC                        \
+           (Value *V1, Value *V2, const Twine &Name = "") {                  \
+    return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name);            \
+  }                                                                          \
+  static BinaryOperator *Create ## NUWNSWEXACT ## OPC                        \
+           (Value *V1, Value *V2, const Twine &Name, BasicBlock *BB) {       \
+    return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name, BB);        \
+  }                                                                          \
+  static BinaryOperator *Create ## NUWNSWEXACT ## OPC                        \
+           (Value *V1, Value *V2, const Twine &Name, Instruction *I) {       \
+    return Create ## NUWNSWEXACT(Instruction::OPC, V1, V2, Name, I);         \
+  }
+  
+  DEFINE_HELPERS(Add, NSW)  // CreateNSWAdd
+  DEFINE_HELPERS(Add, NUW)  // CreateNUWAdd
+  DEFINE_HELPERS(Sub, NSW)  // CreateNSWSub
+  DEFINE_HELPERS(Sub, NUW)  // CreateNUWSub
+  DEFINE_HELPERS(Mul, NSW)  // CreateNSWMul
+  DEFINE_HELPERS(Mul, NUW)  // CreateNUWMul
+  DEFINE_HELPERS(Shl, NSW)  // CreateNSWShl
+  DEFINE_HELPERS(Shl, NUW)  // CreateNUWShl
+
+  DEFINE_HELPERS(SDiv, Exact)  // CreateExactSDiv
+  DEFINE_HELPERS(UDiv, Exact)  // CreateExactUDiv
+  DEFINE_HELPERS(AShr, Exact)  // CreateExactAShr
+  DEFINE_HELPERS(LShr, Exact)  // CreateExactLShr
+
+#undef DEFINE_HELPERS
+  
+  /// Helper functions to construct and inspect unary operations (NEG and NOT)
+  /// via binary operators SUB and XOR:
+  ///
+  /// CreateNeg, CreateNot - Create the NEG and NOT
+  ///     instructions out of SUB and XOR instructions.
+  ///
+  static BinaryOperator *CreateNeg(Value *Op, const Twine &Name = "",
+                                   Instruction *InsertBefore = 0);
+  static BinaryOperator *CreateNeg(Value *Op, const Twine &Name,
+                                   BasicBlock *InsertAtEnd);
+  static BinaryOperator *CreateNSWNeg(Value *Op, const Twine &Name = "",
+                                      Instruction *InsertBefore = 0);
+  static BinaryOperator *CreateNSWNeg(Value *Op, const Twine &Name,
+                                      BasicBlock *InsertAtEnd);
+  static BinaryOperator *CreateNUWNeg(Value *Op, const Twine &Name = "",
+                                      Instruction *InsertBefore = 0);
+  static BinaryOperator *CreateNUWNeg(Value *Op, const Twine &Name,
+                                      BasicBlock *InsertAtEnd);
+  static BinaryOperator *CreateFNeg(Value *Op, const Twine &Name = "",
+                                    Instruction *InsertBefore = 0);
+  static BinaryOperator *CreateFNeg(Value *Op, const Twine &Name,
+                                    BasicBlock *InsertAtEnd);
+  static BinaryOperator *CreateNot(Value *Op, const Twine &Name = "",
+                                   Instruction *InsertBefore = 0);
+  static BinaryOperator *CreateNot(Value *Op, const Twine &Name,
+                                   BasicBlock *InsertAtEnd);
+
+  /// isNeg, isFNeg, isNot - Check if the given Value is a
+  /// NEG, FNeg, or NOT instruction.
+  ///
+  static bool isNeg(const Value *V);
+  static bool isFNeg(const Value *V);
+  static bool isNot(const Value *V);
+
+  /// getNegArgument, getNotArgument - Helper functions to extract the
+  ///     unary argument of a NEG, FNEG or NOT operation implemented via
+  ///     Sub, FSub, or Xor.
+  ///
+  static const Value *getNegArgument(const Value *BinOp);
+  static       Value *getNegArgument(      Value *BinOp);
+  static const Value *getFNegArgument(const Value *BinOp);
+  static       Value *getFNegArgument(      Value *BinOp);
+  static const Value *getNotArgument(const Value *BinOp);
+  static       Value *getNotArgument(      Value *BinOp);
+
+  BinaryOps getOpcode() const {
+    return static_cast<BinaryOps>(Instruction::getOpcode());
+  }
+
+  /// swapOperands - Exchange the two operands to this instruction.
+  /// This instruction is safe to use on any binary instruction and
+  /// does not modify the semantics of the instruction.  If the instruction
+  /// cannot be reversed (ie, it's a Div), then return true.
+  ///
+  bool swapOperands();
+
+  /// setHasNoUnsignedWrap - Set or clear the nsw flag on this instruction,
+  /// which must be an operator which supports this flag. See LangRef.html
+  /// for the meaning of this flag.
+  void setHasNoUnsignedWrap(bool b = true);
+
+  /// setHasNoSignedWrap - Set or clear the nsw flag on this instruction,
+  /// which must be an operator which supports this flag. See LangRef.html
+  /// for the meaning of this flag.
+  void setHasNoSignedWrap(bool b = true);
+
+  /// setIsExact - Set or clear the exact flag on this instruction,
+  /// which must be an operator which supports this flag. See LangRef.html
+  /// for the meaning of this flag.
+  void setIsExact(bool b = true);
+
+  /// hasNoUnsignedWrap - Determine whether the no unsigned wrap flag is set.
+  bool hasNoUnsignedWrap() const;
+
+  /// hasNoSignedWrap - Determine whether the no signed wrap flag is set.
+  bool hasNoSignedWrap() const;
+
+  /// isExact - Determine whether the exact flag is set.
+  bool isExact() const;
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const BinaryOperator *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->isBinaryOp();
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<BinaryOperator> :
+  public FixedNumOperandTraits<BinaryOperator, 2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryOperator, Value)
+
+//===----------------------------------------------------------------------===//
+//                               CastInst Class
+//===----------------------------------------------------------------------===//
+
+/// CastInst - This is the base class for all instructions that perform data
+/// casts. It is simply provided so that instruction category testing
+/// can be performed with code like:
+///
+/// if (isa<CastInst>(Instr)) { ... }
+/// @brief Base class of casting instructions.
+class CastInst : public UnaryInstruction {
+protected:
+  /// @brief Constructor with insert-before-instruction semantics for subclasses
+  CastInst(const Type *Ty, unsigned iType, Value *S,
+           const Twine &NameStr = "", Instruction *InsertBefore = 0)
+    : UnaryInstruction(Ty, iType, S, InsertBefore) {
+    setName(NameStr);
+  }
+  /// @brief Constructor with insert-at-end-of-block semantics for subclasses
+  CastInst(const Type *Ty, unsigned iType, Value *S,
+           const Twine &NameStr, BasicBlock *InsertAtEnd)
+    : UnaryInstruction(Ty, iType, S, InsertAtEnd) {
+    setName(NameStr);
+  }
+public:
+  /// Provides a way to construct any of the CastInst subclasses using an
+  /// opcode instead of the subclass's constructor. The opcode must be in the
+  /// CastOps category (Instruction::isCast(opcode) returns true). This
+  /// constructor has insert-before-instruction semantics to automatically
+  /// insert the new CastInst before InsertBefore (if it is non-null).
+  /// @brief Construct any of the CastInst subclasses
+  static CastInst *Create(
+    Instruction::CastOps,    ///< The opcode of the cast instruction
+    Value *S,                ///< The value to be casted (operand 0)
+    const Type *Ty,          ///< The type to which cast should be made
+    const Twine &Name = "", ///< Name for the instruction
+    Instruction *InsertBefore = 0 ///< Place to insert the instruction
+  );
+  /// Provides a way to construct any of the CastInst subclasses using an
+  /// opcode instead of the subclass's constructor. The opcode must be in the
+  /// CastOps category. This constructor has insert-at-end-of-block semantics
+  /// to automatically insert the new CastInst at the end of InsertAtEnd (if
+  /// its non-null).
+  /// @brief Construct any of the CastInst subclasses
+  static CastInst *Create(
+    Instruction::CastOps,    ///< The opcode for the cast instruction
+    Value *S,                ///< The value to be casted (operand 0)
+    const Type *Ty,          ///< The type to which operand is casted
+    const Twine &Name, ///< The name for the instruction
+    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
+  );
+
+  /// @brief Create a ZExt or BitCast cast instruction
+  static CastInst *CreateZExtOrBitCast(
+    Value *S,                ///< The value to be casted (operand 0)
+    const Type *Ty,          ///< The type to which cast should be made
+    const Twine &Name = "", ///< Name for the instruction
+    Instruction *InsertBefore = 0 ///< Place to insert the instruction
+  );
+
+  /// @brief Create a ZExt or BitCast cast instruction
+  static CastInst *CreateZExtOrBitCast(
+    Value *S,                ///< The value to be casted (operand 0)
+    const Type *Ty,          ///< The type to which operand is casted
+    const Twine &Name, ///< The name for the instruction
+    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
+  );
+
+  /// @brief Create a SExt or BitCast cast instruction
+  static CastInst *CreateSExtOrBitCast(
+    Value *S,                ///< The value to be casted (operand 0)
+    const Type *Ty,          ///< The type to which cast should be made
+    const Twine &Name = "", ///< Name for the instruction
+    Instruction *InsertBefore = 0 ///< Place to insert the instruction
+  );
+
+  /// @brief Create a SExt or BitCast cast instruction
+  static CastInst *CreateSExtOrBitCast(
+    Value *S,                ///< The value to be casted (operand 0)
+    const Type *Ty,          ///< The type to which operand is casted
+    const Twine &Name, ///< The name for the instruction
+    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
+  );
+
+  /// @brief Create a BitCast or a PtrToInt cast instruction
+  static CastInst *CreatePointerCast(
+    Value *S,                ///< The pointer value to be casted (operand 0)
+    const Type *Ty,          ///< The type to which operand is casted
+    const Twine &Name, ///< The name for the instruction
+    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
+  );
+
+  /// @brief Create a BitCast or a PtrToInt cast instruction
+  static CastInst *CreatePointerCast(
+    Value *S,                ///< The pointer value to be casted (operand 0)
+    const Type *Ty,          ///< The type to which cast should be made
+    const Twine &Name = "", ///< Name for the instruction
+    Instruction *InsertBefore = 0 ///< Place to insert the instruction
+  );
+
+  /// @brief Create a ZExt, BitCast, or Trunc for int -> int casts.
+  static CastInst *CreateIntegerCast(
+    Value *S,                ///< The pointer value to be casted (operand 0)
+    const Type *Ty,          ///< The type to which cast should be made
+    bool isSigned,           ///< Whether to regard S as signed or not
+    const Twine &Name = "", ///< Name for the instruction
+    Instruction *InsertBefore = 0 ///< Place to insert the instruction
+  );
+
+  /// @brief Create a ZExt, BitCast, or Trunc for int -> int casts.
+  static CastInst *CreateIntegerCast(
+    Value *S,                ///< The integer value to be casted (operand 0)
+    const Type *Ty,          ///< The integer type to which operand is casted
+    bool isSigned,           ///< Whether to regard S as signed or not
+    const Twine &Name, ///< The name for the instruction
+    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
+  );
+
+  /// @brief Create an FPExt, BitCast, or FPTrunc for fp -> fp casts
+  static CastInst *CreateFPCast(
+    Value *S,                ///< The floating point value to be casted
+    const Type *Ty,          ///< The floating point type to cast to
+    const Twine &Name = "", ///< Name for the instruction
+    Instruction *InsertBefore = 0 ///< Place to insert the instruction
+  );
+
+  /// @brief Create an FPExt, BitCast, or FPTrunc for fp -> fp casts
+  static CastInst *CreateFPCast(
+    Value *S,                ///< The floating point value to be casted
+    const Type *Ty,          ///< The floating point type to cast to
+    const Twine &Name, ///< The name for the instruction
+    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
+  );
+
+  /// @brief Create a Trunc or BitCast cast instruction
+  static CastInst *CreateTruncOrBitCast(
+    Value *S,                ///< The value to be casted (operand 0)
+    const Type *Ty,          ///< The type to which cast should be made
+    const Twine &Name = "", ///< Name for the instruction
+    Instruction *InsertBefore = 0 ///< Place to insert the instruction
+  );
+
+  /// @brief Create a Trunc or BitCast cast instruction
+  static CastInst *CreateTruncOrBitCast(
+    Value *S,                ///< The value to be casted (operand 0)
+    const Type *Ty,          ///< The type to which operand is casted
+    const Twine &Name, ///< The name for the instruction
+    BasicBlock *InsertAtEnd  ///< The block to insert the instruction into
+  );
+
+  /// @brief Check whether it is valid to call getCastOpcode for these types.
+  static bool isCastable(
+    const Type *SrcTy, ///< The Type from which the value should be cast.
+    const Type *DestTy ///< The Type to which the value should be cast.
+  );
+
+  /// Returns the opcode necessary to cast Val into Ty using usual casting
+  /// rules.
+  /// @brief Infer the opcode for cast operand and type
+  static Instruction::CastOps getCastOpcode(
+    const Value *Val, ///< The value to cast
+    bool SrcIsSigned, ///< Whether to treat the source as signed
+    const Type *Ty,   ///< The Type to which the value should be casted
+    bool DstIsSigned  ///< Whether to treate the dest. as signed
+  );
+
+  /// There are several places where we need to know if a cast instruction
+  /// only deals with integer source and destination types. To simplify that
+  /// logic, this method is provided.
+  /// @returns true iff the cast has only integral typed operand and dest type.
+  /// @brief Determine if this is an integer-only cast.
+  bool isIntegerCast() const;
+
+  /// A lossless cast is one that does not alter the basic value. It implies
+  /// a no-op cast but is more stringent, preventing things like int->float,
+  /// long->double, or int->ptr.
+  /// @returns true iff the cast is lossless.
+  /// @brief Determine if this is a lossless cast.
+  bool isLosslessCast() const;
+
+  /// A no-op cast is one that can be effected without changing any bits.
+  /// It implies that the source and destination types are the same size. The
+  /// IntPtrTy argument is used to make accurate determinations for casts
+  /// involving Integer and Pointer types. They are no-op casts if the integer
+  /// is the same size as the pointer. However, pointer size varies with
+  /// platform. Generally, the result of TargetData::getIntPtrType() should be
+  /// passed in. If that's not available, use Type::Int64Ty, which will make
+  /// the isNoopCast call conservative.
+  /// @brief Determine if the described cast is a no-op cast.
+  static bool isNoopCast(
+    Instruction::CastOps Opcode,  ///< Opcode of cast
+    const Type *SrcTy,   ///< SrcTy of cast
+    const Type *DstTy,   ///< DstTy of cast
+    const Type *IntPtrTy ///< Integer type corresponding to Ptr types, or null
+  );
+
+  /// @brief Determine if this cast is a no-op cast.
+  bool isNoopCast(
+    const Type *IntPtrTy ///< Integer type corresponding to pointer
+  ) const;
+
+  /// Determine how a pair of casts can be eliminated, if they can be at all.
+  /// This is a helper function for both CastInst and ConstantExpr.
+  /// @returns 0 if the CastInst pair can't be eliminated
+  /// @returns Instruction::CastOps value for a cast that can replace
+  /// the pair, casting SrcTy to DstTy.
+  /// @brief Determine if a cast pair is eliminable
+  static unsigned isEliminableCastPair(
+    Instruction::CastOps firstOpcode,  ///< Opcode of first cast
+    Instruction::CastOps secondOpcode, ///< Opcode of second cast
+    const Type *SrcTy, ///< SrcTy of 1st cast
+    const Type *MidTy, ///< DstTy of 1st cast & SrcTy of 2nd cast
+    const Type *DstTy, ///< DstTy of 2nd cast
+    const Type *IntPtrTy ///< Integer type corresponding to Ptr types, or null
+  );
+
+  /// @brief Return the opcode of this CastInst
+  Instruction::CastOps getOpcode() const {
+    return Instruction::CastOps(Instruction::getOpcode());
+  }
+
+  /// @brief Return the source type, as a convenience
+  const Type* getSrcTy() const { return getOperand(0)->getType(); }
+  /// @brief Return the destination type, as a convenience
+  const Type* getDestTy() const { return getType(); }
+
+  /// This method can be used to determine if a cast from S to DstTy using
+  /// Opcode op is valid or not.
+  /// @returns true iff the proposed cast is valid.
+  /// @brief Determine if a cast is valid without creating one.
+  static bool castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy);
+
+  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const CastInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->isCast();
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                               CmpInst Class
+//===----------------------------------------------------------------------===//
+
+/// This class is the base class for the comparison instructions.
+/// @brief Abstract base class of comparison instructions.
+class CmpInst : public Instruction {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+  CmpInst(); // do not implement
+protected:
+  CmpInst(const Type *ty, Instruction::OtherOps op, unsigned short pred,
+          Value *LHS, Value *RHS, const Twine &Name = "",
+          Instruction *InsertBefore = 0);
+
+  CmpInst(const Type *ty, Instruction::OtherOps op, unsigned short pred,
+          Value *LHS, Value *RHS, const Twine &Name,
+          BasicBlock *InsertAtEnd);
+
+  virtual void Anchor() const; // Out of line virtual method.
+public:
+  /// This enumeration lists the possible predicates for CmpInst subclasses.
+  /// Values in the range 0-31 are reserved for FCmpInst, while values in the
+  /// range 32-64 are reserved for ICmpInst. This is necessary to ensure the
+  /// predicate values are not overlapping between the classes.
+  enum Predicate {
+    // Opcode              U L G E    Intuitive operation
+    FCMP_FALSE =  0,  ///< 0 0 0 0    Always false (always folded)
+    FCMP_OEQ   =  1,  ///< 0 0 0 1    True if ordered and equal
+    FCMP_OGT   =  2,  ///< 0 0 1 0    True if ordered and greater than
+    FCMP_OGE   =  3,  ///< 0 0 1 1    True if ordered and greater than or equal
+    FCMP_OLT   =  4,  ///< 0 1 0 0    True if ordered and less than
+    FCMP_OLE   =  5,  ///< 0 1 0 1    True if ordered and less than or equal
+    FCMP_ONE   =  6,  ///< 0 1 1 0    True if ordered and operands are unequal
+    FCMP_ORD   =  7,  ///< 0 1 1 1    True if ordered (no nans)
+    FCMP_UNO   =  8,  ///< 1 0 0 0    True if unordered: isnan(X) | isnan(Y)
+    FCMP_UEQ   =  9,  ///< 1 0 0 1    True if unordered or equal
+    FCMP_UGT   = 10,  ///< 1 0 1 0    True if unordered or greater than
+    FCMP_UGE   = 11,  ///< 1 0 1 1    True if unordered, greater than, or equal
+    FCMP_ULT   = 12,  ///< 1 1 0 0    True if unordered or less than
+    FCMP_ULE   = 13,  ///< 1 1 0 1    True if unordered, less than, or equal
+    FCMP_UNE   = 14,  ///< 1 1 1 0    True if unordered or not equal
+    FCMP_TRUE  = 15,  ///< 1 1 1 1    Always true (always folded)
+    FIRST_FCMP_PREDICATE = FCMP_FALSE,
+    LAST_FCMP_PREDICATE = FCMP_TRUE,
+    BAD_FCMP_PREDICATE = FCMP_TRUE + 1,
+    ICMP_EQ    = 32,  ///< equal
+    ICMP_NE    = 33,  ///< not equal
+    ICMP_UGT   = 34,  ///< unsigned greater than
+    ICMP_UGE   = 35,  ///< unsigned greater or equal
+    ICMP_ULT   = 36,  ///< unsigned less than
+    ICMP_ULE   = 37,  ///< unsigned less or equal
+    ICMP_SGT   = 38,  ///< signed greater than
+    ICMP_SGE   = 39,  ///< signed greater or equal
+    ICMP_SLT   = 40,  ///< signed less than
+    ICMP_SLE   = 41,  ///< signed less or equal
+    FIRST_ICMP_PREDICATE = ICMP_EQ,
+    LAST_ICMP_PREDICATE = ICMP_SLE,
+    BAD_ICMP_PREDICATE = ICMP_SLE + 1
+  };
+
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  /// Construct a compare instruction, given the opcode, the predicate and
+  /// the two operands.  Optionally (if InstBefore is specified) insert the
+  /// instruction into a BasicBlock right before the specified instruction.
+  /// The specified Instruction is allowed to be a dereferenced end iterator.
+  /// @brief Create a CmpInst
+  static CmpInst *Create(OtherOps Op,
+                         unsigned short predicate, Value *S1,
+                         Value *S2, const Twine &Name = "",
+                         Instruction *InsertBefore = 0);
+
+  /// Construct a compare instruction, given the opcode, the predicate and the
+  /// two operands.  Also automatically insert this instruction to the end of
+  /// the BasicBlock specified.
+  /// @brief Create a CmpInst
+  static CmpInst *Create(OtherOps Op, unsigned short predicate, Value *S1,
+                         Value *S2, const Twine &Name, BasicBlock *InsertAtEnd);
+  
+  /// @brief Get the opcode casted to the right type
+  OtherOps getOpcode() const {
+    return static_cast<OtherOps>(Instruction::getOpcode());
+  }
+
+  /// @brief Return the predicate for this instruction.
+  Predicate getPredicate() const {
+    return Predicate(getSubclassDataFromInstruction());
+  }
+
+  /// @brief Set the predicate for this instruction to the specified value.
+  void setPredicate(Predicate P) { setInstructionSubclassData(P); }
+
+  static bool isFPPredicate(Predicate P) {
+    return P >= FIRST_FCMP_PREDICATE && P <= LAST_FCMP_PREDICATE;
+  }
+  
+  static bool isIntPredicate(Predicate P) {
+    return P >= FIRST_ICMP_PREDICATE && P <= LAST_ICMP_PREDICATE;
+  }
+  
+  bool isFPPredicate() const { return isFPPredicate(getPredicate()); }
+  bool isIntPredicate() const { return isIntPredicate(getPredicate()); }
+  
+  
+  /// For example, EQ -> NE, UGT -> ULE, SLT -> SGE,
+  ///              OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
+  /// @returns the inverse predicate for the instruction's current predicate.
+  /// @brief Return the inverse of the instruction's predicate.
+  Predicate getInversePredicate() const {
+    return getInversePredicate(getPredicate());
+  }
+
+  /// For example, EQ -> NE, UGT -> ULE, SLT -> SGE,
+  ///              OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
+  /// @returns the inverse predicate for predicate provided in \p pred.
+  /// @brief Return the inverse of a given predicate
+  static Predicate getInversePredicate(Predicate pred);
+
+  /// For example, EQ->EQ, SLE->SGE, ULT->UGT,
+  ///              OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
+  /// @returns the predicate that would be the result of exchanging the two
+  /// operands of the CmpInst instruction without changing the result
+  /// produced.
+  /// @brief Return the predicate as if the operands were swapped
+  Predicate getSwappedPredicate() const {
+    return getSwappedPredicate(getPredicate());
+  }
+
+  /// This is a static version that you can use without an instruction
+  /// available.
+  /// @brief Return the predicate as if the operands were swapped.
+  static Predicate getSwappedPredicate(Predicate pred);
+
+  /// @brief Provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// This is just a convenience that dispatches to the subclasses.
+  /// @brief Swap the operands and adjust predicate accordingly to retain
+  /// the same comparison.
+  void swapOperands();
+
+  /// This is just a convenience that dispatches to the subclasses.
+  /// @brief Determine if this CmpInst is commutative.
+  bool isCommutative() const;
+
+  /// This is just a convenience that dispatches to the subclasses.
+  /// @brief Determine if this is an equals/not equals predicate.
+  bool isEquality() const;
+
+  /// @returns true if the comparison is signed, false otherwise.
+  /// @brief Determine if this instruction is using a signed comparison.
+  bool isSigned() const {
+    return isSigned(getPredicate());
+  }
+
+  /// @returns true if the comparison is unsigned, false otherwise.
+  /// @brief Determine if this instruction is using an unsigned comparison.
+  bool isUnsigned() const {
+    return isUnsigned(getPredicate());
+  }
+
+  /// This is just a convenience.
+  /// @brief Determine if this is true when both operands are the same.
+  bool isTrueWhenEqual() const {
+    return isTrueWhenEqual(getPredicate());
+  }
+
+  /// This is just a convenience.
+  /// @brief Determine if this is false when both operands are the same.
+  bool isFalseWhenEqual() const {
+    return isFalseWhenEqual(getPredicate());
+  }
+
+  /// @returns true if the predicate is unsigned, false otherwise.
+  /// @brief Determine if the predicate is an unsigned operation.
+  static bool isUnsigned(unsigned short predicate);
+
+  /// @returns true if the predicate is signed, false otherwise.
+  /// @brief Determine if the predicate is an signed operation.
+  static bool isSigned(unsigned short predicate);
+
+  /// @brief Determine if the predicate is an ordered operation.
+  static bool isOrdered(unsigned short predicate);
+
+  /// @brief Determine if the predicate is an unordered operation.
+  static bool isUnordered(unsigned short predicate);
+
+  /// Determine if the predicate is true when comparing a value with itself.
+  static bool isTrueWhenEqual(unsigned short predicate);
+
+  /// Determine if the predicate is false when comparing a value with itself.
+  static bool isFalseWhenEqual(unsigned short predicate);
+
+  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const CmpInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::ICmp ||
+           I->getOpcode() == Instruction::FCmp;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+  
+  /// @brief Create a result type for fcmp/icmp
+  static const Type* makeCmpResultType(const Type* opnd_type) {
+    if (const VectorType* vt = dyn_cast<const VectorType>(opnd_type)) {
+      return VectorType::get(Type::getInt1Ty(opnd_type->getContext()),
+                             vt->getNumElements());
+    }
+    return Type::getInt1Ty(opnd_type->getContext());
+  }
+private:
+  // Shadow Value::setValueSubclassData with a private forwarding method so that
+  // subclasses cannot accidentally use it.
+  void setValueSubclassData(unsigned short D) {
+    Value::setValueSubclassData(D);
+  }
+};
+
+
+// FIXME: these are redundant if CmpInst < BinaryOperator
+template <>
+struct OperandTraits<CmpInst> : public FixedNumOperandTraits<CmpInst, 2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CmpInst, Value)
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Instruction.def b/final/include/llvm/Instruction.def
new file mode 100644
index 00000000000..205f30313e7
--- /dev/null
+++ b/final/include/llvm/Instruction.def
@@ -0,0 +1,196 @@
+//===-- llvm/Instruction.def - File that describes Instructions -*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file contains descriptions of the various LLVM instructions.  This is
+// used as a central place for enumerating the different instructions and 
+// should eventually be the place to put comments about the instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+// Provide definitions of macros so that users of this file do not have to 
+// define everything to use it...
+//
+#ifndef FIRST_TERM_INST
+#define FIRST_TERM_INST(num)
+#endif
+#ifndef HANDLE_TERM_INST
+#ifndef HANDLE_INST
+#define HANDLE_TERM_INST(num, opcode, Class)
+#else
+#define HANDLE_TERM_INST(num, opcode, Class) HANDLE_INST(num, opcode, Class)
+#endif
+#endif
+#ifndef LAST_TERM_INST
+#define LAST_TERM_INST(num)
+#endif
+
+#ifndef FIRST_BINARY_INST
+#define FIRST_BINARY_INST(num)
+#endif
+#ifndef HANDLE_BINARY_INST
+#ifndef HANDLE_INST
+#define HANDLE_BINARY_INST(num, opcode, instclass)
+#else
+#define HANDLE_BINARY_INST(num, opcode, Class) HANDLE_INST(num, opcode, Class)
+#endif
+#endif
+#ifndef LAST_BINARY_INST
+#define LAST_BINARY_INST(num)
+#endif
+
+#ifndef FIRST_MEMORY_INST
+#define FIRST_MEMORY_INST(num)
+#endif
+#ifndef HANDLE_MEMORY_INST
+#ifndef HANDLE_INST
+#define HANDLE_MEMORY_INST(num, opcode, Class)
+#else
+#define HANDLE_MEMORY_INST(num, opcode, Class) HANDLE_INST(num, opcode, Class)
+#endif
+#endif
+#ifndef LAST_MEMORY_INST
+#define LAST_MEMORY_INST(num)
+#endif
+
+#ifndef FIRST_CAST_INST
+#define FIRST_CAST_INST(num)
+#endif
+#ifndef HANDLE_CAST_INST
+#ifndef HANDLE_INST
+#define HANDLE_CAST_INST(num, opcode, Class)
+#else
+#define HANDLE_CAST_INST(num, opcode, Class) HANDLE_INST(num, opcode, Class)
+#endif
+#endif
+#ifndef LAST_CAST_INST
+#define LAST_CAST_INST(num)
+#endif
+
+#ifndef FIRST_OTHER_INST
+#define FIRST_OTHER_INST(num)
+#endif
+#ifndef HANDLE_OTHER_INST
+#ifndef HANDLE_INST
+#define HANDLE_OTHER_INST(num, opcode, Class)
+#else
+#define HANDLE_OTHER_INST(num, opcode, Class) HANDLE_INST(num, opcode, Class)
+#endif
+#endif
+#ifndef LAST_OTHER_INST
+#define LAST_OTHER_INST(num)
+#endif
+
+
+// Terminator Instructions - These instructions are used to terminate a basic
+// block of the program.   Every basic block must end with one of these
+// instructions for it to be a well formed basic block.
+//
+ FIRST_TERM_INST  ( 1)
+HANDLE_TERM_INST  ( 1, Ret        , ReturnInst)
+HANDLE_TERM_INST  ( 2, Br         , BranchInst)
+HANDLE_TERM_INST  ( 3, Switch     , SwitchInst)
+HANDLE_TERM_INST  ( 4, IndirectBr , IndirectBrInst)
+HANDLE_TERM_INST  ( 5, Invoke     , InvokeInst)
+HANDLE_TERM_INST  ( 6, Unwind     , UnwindInst)
+HANDLE_TERM_INST  ( 7, Unreachable, UnreachableInst)
+  LAST_TERM_INST  ( 7)
+
+// Standard binary operators...
+ FIRST_BINARY_INST( 8)
+HANDLE_BINARY_INST( 8, Add  , BinaryOperator)
+HANDLE_BINARY_INST( 9, FAdd  , BinaryOperator)
+HANDLE_BINARY_INST(10, Sub  , BinaryOperator)
+HANDLE_BINARY_INST(11, FSub  , BinaryOperator)
+HANDLE_BINARY_INST(12, Mul  , BinaryOperator)
+HANDLE_BINARY_INST(13, FMul  , BinaryOperator)
+HANDLE_BINARY_INST(14, UDiv , BinaryOperator)
+HANDLE_BINARY_INST(15, SDiv , BinaryOperator)
+HANDLE_BINARY_INST(16, FDiv , BinaryOperator)
+HANDLE_BINARY_INST(17, URem , BinaryOperator)
+HANDLE_BINARY_INST(18, SRem , BinaryOperator)
+HANDLE_BINARY_INST(19, FRem , BinaryOperator)
+
+// Logical operators (integer operands)
+HANDLE_BINARY_INST(20, Shl  , BinaryOperator) // Shift left  (logical)
+HANDLE_BINARY_INST(21, LShr , BinaryOperator) // Shift right (logical)
+HANDLE_BINARY_INST(22, AShr , BinaryOperator) // Shift right (arithmetic)
+HANDLE_BINARY_INST(23, And  , BinaryOperator)
+HANDLE_BINARY_INST(24, Or   , BinaryOperator)
+HANDLE_BINARY_INST(25, Xor  , BinaryOperator)
+  LAST_BINARY_INST(25)
+
+// Memory operators...
+ FIRST_MEMORY_INST(26)
+HANDLE_MEMORY_INST(26, Alloca, AllocaInst)  // Stack management
+HANDLE_MEMORY_INST(27, Load  , LoadInst  )  // Memory manipulation instrs
+HANDLE_MEMORY_INST(28, Store , StoreInst )
+HANDLE_MEMORY_INST(29, GetElementPtr, GetElementPtrInst)
+  LAST_MEMORY_INST(29)
+
+// Cast operators ...
+// NOTE: The order matters here because CastInst::isEliminableCastPair 
+// NOTE: (see Instructions.cpp) encodes a table based on this ordering.
+ FIRST_CAST_INST(30)
+HANDLE_CAST_INST(30, Trunc   , TruncInst   )  // Truncate integers
+HANDLE_CAST_INST(31, ZExt    , ZExtInst    )  // Zero extend integers
+HANDLE_CAST_INST(32, SExt    , SExtInst    )  // Sign extend integers
+HANDLE_CAST_INST(33, FPToUI  , FPToUIInst  )  // floating point -> UInt
+HANDLE_CAST_INST(34, FPToSI  , FPToSIInst  )  // floating point -> SInt
+HANDLE_CAST_INST(35, UIToFP  , UIToFPInst  )  // UInt -> floating point
+HANDLE_CAST_INST(36, SIToFP  , SIToFPInst  )  // SInt -> floating point
+HANDLE_CAST_INST(37, FPTrunc , FPTruncInst )  // Truncate floating point
+HANDLE_CAST_INST(38, FPExt   , FPExtInst   )  // Extend floating point
+HANDLE_CAST_INST(39, PtrToInt, PtrToIntInst)  // Pointer -> Integer
+HANDLE_CAST_INST(40, IntToPtr, IntToPtrInst)  // Integer -> Pointer
+HANDLE_CAST_INST(41, BitCast , BitCastInst )  // Type cast
+  LAST_CAST_INST(41)
+
+// Other operators...
+ FIRST_OTHER_INST(42)
+HANDLE_OTHER_INST(42, ICmp   , ICmpInst   )  // Integer comparison instruction
+HANDLE_OTHER_INST(43, FCmp   , FCmpInst   )  // Floating point comparison instr.
+HANDLE_OTHER_INST(44, PHI    , PHINode    )  // PHI node instruction
+HANDLE_OTHER_INST(45, Call   , CallInst   )  // Call a function
+HANDLE_OTHER_INST(46, Select , SelectInst )  // select instruction
+HANDLE_OTHER_INST(47, UserOp1, Instruction)  // May be used internally in a pass
+HANDLE_OTHER_INST(48, UserOp2, Instruction)  // Internal to passes only
+HANDLE_OTHER_INST(49, VAArg  , VAArgInst  )  // vaarg instruction
+HANDLE_OTHER_INST(50, ExtractElement, ExtractElementInst)// extract from vector
+HANDLE_OTHER_INST(51, InsertElement, InsertElementInst)  // insert into vector
+HANDLE_OTHER_INST(52, ShuffleVector, ShuffleVectorInst)  // shuffle two vectors.
+HANDLE_OTHER_INST(53, ExtractValue, ExtractValueInst)// extract from aggregate
+HANDLE_OTHER_INST(54, InsertValue, InsertValueInst)  // insert into aggregate
+
+  LAST_OTHER_INST(54)
+
+#undef  FIRST_TERM_INST
+#undef HANDLE_TERM_INST
+#undef   LAST_TERM_INST
+
+#undef  FIRST_BINARY_INST
+#undef HANDLE_BINARY_INST
+#undef   LAST_BINARY_INST
+
+#undef  FIRST_MEMORY_INST
+#undef HANDLE_MEMORY_INST
+#undef   LAST_MEMORY_INST
+
+#undef  FIRST_CAST_INST
+#undef HANDLE_CAST_INST
+#undef   LAST_CAST_INST
+
+#undef  FIRST_OTHER_INST
+#undef HANDLE_OTHER_INST
+#undef   LAST_OTHER_INST
+
+#ifdef HANDLE_INST
+#undef HANDLE_INST
+#endif
diff --git a/final/include/llvm/Instruction.h b/final/include/llvm/Instruction.h
new file mode 100644
index 00000000000..89bb9fdf423
--- /dev/null
+++ b/final/include/llvm/Instruction.h
@@ -0,0 +1,390 @@
+//===-- llvm/Instruction.h - Instruction class definition -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the Instruction class, which is the
+// base class for all of the LLVM instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INSTRUCTION_H
+#define LLVM_INSTRUCTION_H
+
+#include "llvm/User.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+
+class LLVMContext;
+class MDNode;
+
+template<typename ValueSubClass, typename ItemParentClass>
+  class SymbolTableListTraits;
+
+class Instruction : public User, public ilist_node<Instruction> {
+  void operator=(const Instruction &);     // Do not implement
+  Instruction(const Instruction &);        // Do not implement
+
+  BasicBlock *Parent;
+  DebugLoc DbgLoc;                         // 'dbg' Metadata cache.
+  
+  enum {
+    /// HasMetadataBit - This is a bit stored in the SubClassData field which
+    /// indicates whether this instruction has metadata attached to it or not.
+    HasMetadataBit = 1 << 15
+  };
+public:
+  // Out of line virtual method, so the vtable, etc has a home.
+  ~Instruction();
+  
+  /// use_back - Specialize the methods defined in Value, as we know that an
+  /// instruction can only be used by other instructions.
+  Instruction       *use_back()       { return cast<Instruction>(*use_begin());}
+  const Instruction *use_back() const { return cast<Instruction>(*use_begin());}
+  
+  inline const BasicBlock *getParent() const { return Parent; }
+  inline       BasicBlock *getParent()       { return Parent; }
+
+  /// removeFromParent - This method unlinks 'this' from the containing basic
+  /// block, but does not delete it.
+  ///
+  void removeFromParent();
+
+  /// eraseFromParent - This method unlinks 'this' from the containing basic
+  /// block and deletes it.
+  ///
+  void eraseFromParent();
+
+  /// insertBefore - Insert an unlinked instructions into a basic block
+  /// immediately before the specified instruction.
+  void insertBefore(Instruction *InsertPos);
+
+  /// insertAfter - Insert an unlinked instructions into a basic block
+  /// immediately after the specified instruction.
+  void insertAfter(Instruction *InsertPos);
+
+  /// moveBefore - Unlink this instruction from its current basic block and
+  /// insert it into the basic block that MovePos lives in, right before
+  /// MovePos.
+  void moveBefore(Instruction *MovePos);
+
+  //===--------------------------------------------------------------------===//
+  // Subclass classification.
+  //===--------------------------------------------------------------------===//
+  
+  /// getOpcode() returns a member of one of the enums like Instruction::Add.
+  unsigned getOpcode() const { return getValueID() - InstructionVal; }
+  
+  const char *getOpcodeName() const { return getOpcodeName(getOpcode()); }
+  bool isTerminator() const { return isTerminator(getOpcode()); }
+  bool isBinaryOp() const { return isBinaryOp(getOpcode()); }
+  bool isShift() { return isShift(getOpcode()); }
+  bool isCast() const { return isCast(getOpcode()); }
+  
+  static const char* getOpcodeName(unsigned OpCode);
+
+  static inline bool isTerminator(unsigned OpCode) {
+    return OpCode >= TermOpsBegin && OpCode < TermOpsEnd;
+  }
+
+  static inline bool isBinaryOp(unsigned Opcode) {
+    return Opcode >= BinaryOpsBegin && Opcode < BinaryOpsEnd;
+  }
+
+  /// @brief Determine if the Opcode is one of the shift instructions.
+  static inline bool isShift(unsigned Opcode) {
+    return Opcode >= Shl && Opcode <= AShr;
+  }
+
+  /// isLogicalShift - Return true if this is a logical shift left or a logical
+  /// shift right.
+  inline bool isLogicalShift() const {
+    return getOpcode() == Shl || getOpcode() == LShr;
+  }
+
+  /// isArithmeticShift - Return true if this is an arithmetic shift right.
+  inline bool isArithmeticShift() const {
+    return getOpcode() == AShr;
+  }
+
+  /// @brief Determine if the OpCode is one of the CastInst instructions.
+  static inline bool isCast(unsigned OpCode) {
+    return OpCode >= CastOpsBegin && OpCode < CastOpsEnd;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Metadata manipulation.
+  //===--------------------------------------------------------------------===//
+  
+  /// hasMetadata() - Return true if this instruction has any metadata attached
+  /// to it.
+  bool hasMetadata() const {
+    return !DbgLoc.isUnknown() || hasMetadataHashEntry();
+  }
+  
+  /// hasMetadataOtherThanDebugLoc - Return true if this instruction has
+  /// metadata attached to it other than a debug location.
+  bool hasMetadataOtherThanDebugLoc() const {
+    return hasMetadataHashEntry();
+  }
+  
+  /// getMetadata - Get the metadata of given kind attached to this Instruction.
+  /// If the metadata is not found then return null.
+  MDNode *getMetadata(unsigned KindID) const {
+    if (!hasMetadata()) return 0;
+    return getMetadataImpl(KindID);
+  }
+  
+  /// getMetadata - Get the metadata of given kind attached to this Instruction.
+  /// If the metadata is not found then return null.
+  MDNode *getMetadata(const char *Kind) const {
+    if (!hasMetadata()) return 0;
+    return getMetadataImpl(Kind);
+  }
+  
+  /// getAllMetadata - Get all metadata attached to this Instruction.  The first
+  /// element of each pair returned is the KindID, the second element is the
+  /// metadata value.  This list is returned sorted by the KindID.
+  void getAllMetadata(SmallVectorImpl<std::pair<unsigned, MDNode*> > &MDs)const{
+    if (hasMetadata())
+      getAllMetadataImpl(MDs);
+  }
+  
+  /// getAllMetadataOtherThanDebugLoc - This does the same thing as
+  /// getAllMetadata, except that it filters out the debug location.
+  void getAllMetadataOtherThanDebugLoc(SmallVectorImpl<std::pair<unsigned,
+                                       MDNode*> > &MDs) const {
+    if (hasMetadataOtherThanDebugLoc())
+      getAllMetadataOtherThanDebugLocImpl(MDs);
+  }
+  
+  /// setMetadata - Set the metadata of the specified kind to the specified
+  /// node.  This updates/replaces metadata if already present, or removes it if
+  /// Node is null.
+  void setMetadata(unsigned KindID, MDNode *Node);
+  void setMetadata(const char *Kind, MDNode *Node);
+
+  /// setDebugLoc - Set the debug location information for this instruction.
+  void setDebugLoc(const DebugLoc &Loc) { DbgLoc = Loc; }
+  
+  /// getDebugLoc - Return the debug location for this node as a DebugLoc.
+  const DebugLoc &getDebugLoc() const { return DbgLoc; }
+  
+private:
+  /// hasMetadataHashEntry - Return true if we have an entry in the on-the-side
+  /// metadata hash.
+  bool hasMetadataHashEntry() const {
+    return (getSubclassDataFromValue() & HasMetadataBit) != 0;
+  }
+  
+  // These are all implemented in Metadata.cpp.
+  MDNode *getMetadataImpl(unsigned KindID) const;
+  MDNode *getMetadataImpl(const char *Kind) const;
+  void getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,MDNode*> > &)const;
+  void getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
+                                           MDNode*> > &) const;
+  void clearMetadataHashEntries();
+public:
+  //===--------------------------------------------------------------------===//
+  // Predicates and helper methods.
+  //===--------------------------------------------------------------------===//
+  
+  
+  /// isAssociative - Return true if the instruction is associative:
+  ///
+  ///   Associative operators satisfy:  x op (y op z) === (x op y) op z
+  ///
+  /// In LLVM, the Add, Mul, And, Or, and Xor operators are associative.
+  ///
+  bool isAssociative() const { return isAssociative(getOpcode()); }
+  static bool isAssociative(unsigned op);
+
+  /// isCommutative - Return true if the instruction is commutative:
+  ///
+  ///   Commutative operators satisfy: (x op y) === (y op x)
+  ///
+  /// In LLVM, these are the associative operators, plus SetEQ and SetNE, when
+  /// applied to any type.
+  ///
+  bool isCommutative() const { return isCommutative(getOpcode()); }
+  static bool isCommutative(unsigned op);
+
+  /// mayWriteToMemory - Return true if this instruction may modify memory.
+  ///
+  bool mayWriteToMemory() const;
+
+  /// mayReadFromMemory - Return true if this instruction may read memory.
+  ///
+  bool mayReadFromMemory() const;
+
+  /// mayThrow - Return true if this instruction may throw an exception.
+  ///
+  bool mayThrow() const;
+
+  /// mayHaveSideEffects - Return true if the instruction may have side effects.
+  ///
+  /// Note that this does not consider malloc and alloca to have side
+  /// effects because the newly allocated memory is completely invisible to
+  /// instructions which don't used the returned value.  For cases where this
+  /// matters, isSafeToSpeculativelyExecute may be more appropriate.
+  bool mayHaveSideEffects() const {
+    return mayWriteToMemory() || mayThrow();
+  }
+
+  /// isSafeToSpeculativelyExecute - Return true if the instruction does not
+  /// have any effects besides calculating the result and does not have
+  /// undefined behavior.
+  ///
+  /// This method never returns true for an instruction that returns true for
+  /// mayHaveSideEffects; however, this method also does some other checks in
+  /// addition. It checks for undefined behavior, like dividing by zero or
+  /// loading from an invalid pointer (but not for undefined results, like a
+  /// shift with a shift amount larger than the width of the result). It checks
+  /// for malloc and alloca because speculatively executing them might cause a
+  /// memory leak. It also returns false for instructions related to control
+  /// flow, specifically terminators and PHI nodes.
+  ///
+  /// This method only looks at the instruction itself and its operands, so if
+  /// this method returns true, it is safe to move the instruction as long as
+  /// the correct dominance relationships for the operands and users hold.
+  /// However, this method can return true for instructions that read memory;
+  /// for such instructions, moving them may change the resulting value.
+  bool isSafeToSpeculativelyExecute() const;
+
+  /// clone() - Create a copy of 'this' instruction that is identical in all
+  /// ways except the following:
+  ///   * The instruction has no parent
+  ///   * The instruction has no name
+  ///
+  Instruction *clone() const;
+  
+  /// isIdenticalTo - Return true if the specified instruction is exactly
+  /// identical to the current one.  This means that all operands match and any
+  /// extra information (e.g. load is volatile) agree.
+  bool isIdenticalTo(const Instruction *I) const;
+  
+  /// isIdenticalToWhenDefined - This is like isIdenticalTo, except that it
+  /// ignores the SubclassOptionalData flags, which specify conditions
+  /// under which the instruction's result is undefined.
+  bool isIdenticalToWhenDefined(const Instruction *I) const;
+  
+  /// This function determines if the specified instruction executes the same
+  /// operation as the current one. This means that the opcodes, type, operand
+  /// types and any other factors affecting the operation must be the same. This
+  /// is similar to isIdenticalTo except the operands themselves don't have to
+  /// be identical.
+  /// @returns true if the specified instruction is the same operation as
+  /// the current one.
+  /// @brief Determine if one instruction is the same operation as another.
+  bool isSameOperationAs(const Instruction *I) const;
+  
+  /// isUsedOutsideOfBlock - Return true if there are any uses of this
+  /// instruction in blocks other than the specified block.  Note that PHI nodes
+  /// are considered to evaluate their operands in the corresponding predecessor
+  /// block.
+  bool isUsedOutsideOfBlock(const BasicBlock *BB) const;
+  
+  
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Instruction *) { return true; }
+  static inline bool classof(const Value *V) {
+    return V->getValueID() >= Value::InstructionVal;
+  }
+
+  //----------------------------------------------------------------------
+  // Exported enumerations.
+  //
+  enum TermOps {       // These terminate basic blocks
+#define  FIRST_TERM_INST(N)             TermOpsBegin = N,
+#define HANDLE_TERM_INST(N, OPC, CLASS) OPC = N,
+#define   LAST_TERM_INST(N)             TermOpsEnd = N+1
+#include "llvm/Instruction.def"
+  };
+
+  enum BinaryOps {
+#define  FIRST_BINARY_INST(N)             BinaryOpsBegin = N,
+#define HANDLE_BINARY_INST(N, OPC, CLASS) OPC = N,
+#define   LAST_BINARY_INST(N)             BinaryOpsEnd = N+1
+#include "llvm/Instruction.def"
+  };
+
+  enum MemoryOps {
+#define  FIRST_MEMORY_INST(N)             MemoryOpsBegin = N,
+#define HANDLE_MEMORY_INST(N, OPC, CLASS) OPC = N,
+#define   LAST_MEMORY_INST(N)             MemoryOpsEnd = N+1
+#include "llvm/Instruction.def"
+  };
+
+  enum CastOps {
+#define  FIRST_CAST_INST(N)             CastOpsBegin = N,
+#define HANDLE_CAST_INST(N, OPC, CLASS) OPC = N,
+#define   LAST_CAST_INST(N)             CastOpsEnd = N+1
+#include "llvm/Instruction.def"
+  };
+
+  enum OtherOps {
+#define  FIRST_OTHER_INST(N)             OtherOpsBegin = N,
+#define HANDLE_OTHER_INST(N, OPC, CLASS) OPC = N,
+#define   LAST_OTHER_INST(N)             OtherOpsEnd = N+1
+#include "llvm/Instruction.def"
+  };
+private:
+  // Shadow Value::setValueSubclassData with a private forwarding method so that
+  // subclasses cannot accidentally use it.
+  void setValueSubclassData(unsigned short D) {
+    Value::setValueSubclassData(D);
+  }
+  unsigned short getSubclassDataFromValue() const {
+    return Value::getSubclassDataFromValue();
+  }
+  
+  void setHasMetadataHashEntry(bool V) {
+    setValueSubclassData((getSubclassDataFromValue() & ~HasMetadataBit) |
+                         (V ? HasMetadataBit : 0));
+  }
+  
+  friend class SymbolTableListTraits<Instruction, BasicBlock>;
+  void setParent(BasicBlock *P);
+protected:
+  // Instruction subclasses can stick up to 15 bits of stuff into the
+  // SubclassData field of instruction with these members.
+  
+  // Verify that only the low 15 bits are used.
+  void setInstructionSubclassData(unsigned short D) {
+    assert((D & HasMetadataBit) == 0 && "Out of range value put into field");
+    setValueSubclassData((getSubclassDataFromValue() & HasMetadataBit) | D);
+  }
+  
+  unsigned getSubclassDataFromInstruction() const {
+    return getSubclassDataFromValue() & ~HasMetadataBit;
+  }
+  
+  Instruction(const Type *Ty, unsigned iType, Use *Ops, unsigned NumOps,
+              Instruction *InsertBefore = 0);
+  Instruction(const Type *Ty, unsigned iType, Use *Ops, unsigned NumOps,
+              BasicBlock *InsertAtEnd);
+  virtual Instruction *clone_impl() const = 0;
+  
+};
+
+// Instruction* is only 4-byte aligned.
+template<>
+class PointerLikeTypeTraits<Instruction*> {
+  typedef Instruction* PT;
+public:
+  static inline void *getAsVoidPointer(PT P) { return P; }
+  static inline PT getFromVoidPointer(void *P) {
+    return static_cast<PT>(P);
+  }
+  enum { NumLowBitsAvailable = 2 };
+};
+  
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Instructions.h b/final/include/llvm/Instructions.h
new file mode 100644
index 00000000000..17ff763c52b
--- /dev/null
+++ b/final/include/llvm/Instructions.h
@@ -0,0 +1,3186 @@
+//===-- llvm/Instructions.h - Instruction subclass definitions --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes the class definitions of all of the subclasses of the
+// Instruction class.  This is meant to be an easy way to get access to all
+// instruction subclasses.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INSTRUCTIONS_H
+#define LLVM_INSTRUCTIONS_H
+
+#include "llvm/InstrTypes.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Attributes.h"
+#include "llvm/CallingConv.h"
+#include "llvm/ADT/SmallVector.h"
+#include <iterator>
+
+namespace llvm {
+
+class ConstantInt;
+class ConstantRange;
+class APInt;
+class LLVMContext;
+
+//===----------------------------------------------------------------------===//
+//                                AllocaInst Class
+//===----------------------------------------------------------------------===//
+
+/// AllocaInst - an instruction to allocate memory on the stack
+///
+class AllocaInst : public UnaryInstruction {
+protected:
+  virtual AllocaInst *clone_impl() const;
+public:
+  explicit AllocaInst(const Type *Ty, Value *ArraySize = 0,
+                      const Twine &Name = "", Instruction *InsertBefore = 0);
+  AllocaInst(const Type *Ty, Value *ArraySize,
+             const Twine &Name, BasicBlock *InsertAtEnd);
+
+  AllocaInst(const Type *Ty, const Twine &Name, Instruction *InsertBefore = 0);
+  AllocaInst(const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd);
+
+  AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
+             const Twine &Name = "", Instruction *InsertBefore = 0);
+  AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
+             const Twine &Name, BasicBlock *InsertAtEnd);
+
+  // Out of line virtual method, so the vtable, etc. has a home.
+  virtual ~AllocaInst();
+
+  /// isArrayAllocation - Return true if there is an allocation size parameter
+  /// to the allocation instruction that is not 1.
+  ///
+  bool isArrayAllocation() const;
+
+  /// getArraySize - Get the number of elements allocated. For a simple
+  /// allocation of a single element, this will return a constant 1 value.
+  ///
+  const Value *getArraySize() const { return getOperand(0); }
+  Value *getArraySize() { return getOperand(0); }
+
+  /// getType - Overload to return most specific pointer type
+  ///
+  const PointerType *getType() const {
+    return reinterpret_cast<const PointerType*>(Instruction::getType());
+  }
+
+  /// getAllocatedType - Return the type that is being allocated by the
+  /// instruction.
+  ///
+  const Type *getAllocatedType() const;
+
+  /// getAlignment - Return the alignment of the memory that is being allocated
+  /// by the instruction.
+  ///
+  unsigned getAlignment() const {
+    return (1u << getSubclassDataFromInstruction()) >> 1;
+  }
+  void setAlignment(unsigned Align);
+
+  /// isStaticAlloca - Return true if this alloca is in the entry block of the
+  /// function and is a constant size.  If so, the code generator will fold it
+  /// into the prolog/epilog code, so it is basically free.
+  bool isStaticAlloca() const;
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const AllocaInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return (I->getOpcode() == Instruction::Alloca);
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+//                                LoadInst Class
+//===----------------------------------------------------------------------===//
+
+/// LoadInst - an instruction for reading from memory.  This uses the
+/// SubclassData field in Value to store whether or not the load is volatile.
+///
+class LoadInst : public UnaryInstruction {
+  void AssertOK();
+protected:
+  virtual LoadInst *clone_impl() const;
+public:
+  LoadInst(Value *Ptr, const Twine &NameStr, Instruction *InsertBefore);
+  LoadInst(Value *Ptr, const Twine &NameStr, BasicBlock *InsertAtEnd);
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile = false,
+           Instruction *InsertBefore = 0);
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+           unsigned Align, Instruction *InsertBefore = 0);
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+           BasicBlock *InsertAtEnd);
+  LoadInst(Value *Ptr, const Twine &NameStr, bool isVolatile,
+           unsigned Align, BasicBlock *InsertAtEnd);
+
+  LoadInst(Value *Ptr, const char *NameStr, Instruction *InsertBefore);
+  LoadInst(Value *Ptr, const char *NameStr, BasicBlock *InsertAtEnd);
+  explicit LoadInst(Value *Ptr, const char *NameStr = 0,
+                    bool isVolatile = false,  Instruction *InsertBefore = 0);
+  LoadInst(Value *Ptr, const char *NameStr, bool isVolatile,
+           BasicBlock *InsertAtEnd);
+
+  /// isVolatile - Return true if this is a load from a volatile memory
+  /// location.
+  ///
+  bool isVolatile() const { return getSubclassDataFromInstruction() & 1; }
+
+  /// setVolatile - Specify whether this is a volatile load or not.
+  ///
+  void setVolatile(bool V) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
+                               (V ? 1 : 0));
+  }
+
+  /// getAlignment - Return the alignment of the access that is being performed
+  ///
+  unsigned getAlignment() const {
+    return (1 << (getSubclassDataFromInstruction() >> 1)) >> 1;
+  }
+
+  void setAlignment(unsigned Align);
+
+  Value *getPointerOperand() { return getOperand(0); }
+  const Value *getPointerOperand() const { return getOperand(0); }
+  static unsigned getPointerOperandIndex() { return 0U; }
+
+  unsigned getPointerAddressSpace() const {
+    return cast<PointerType>(getPointerOperand()->getType())->getAddressSpace();
+  }
+
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const LoadInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Load;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+//                                StoreInst Class
+//===----------------------------------------------------------------------===//
+
+/// StoreInst - an instruction for storing to memory
+///
+class StoreInst : public Instruction {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+  void AssertOK();
+protected:
+  virtual StoreInst *clone_impl() const;
+public:
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  StoreInst(Value *Val, Value *Ptr, Instruction *InsertBefore);
+  StoreInst(Value *Val, Value *Ptr, BasicBlock *InsertAtEnd);
+  StoreInst(Value *Val, Value *Ptr, bool isVolatile = false,
+            Instruction *InsertBefore = 0);
+  StoreInst(Value *Val, Value *Ptr, bool isVolatile,
+            unsigned Align, Instruction *InsertBefore = 0);
+  StoreInst(Value *Val, Value *Ptr, bool isVolatile, BasicBlock *InsertAtEnd);
+  StoreInst(Value *Val, Value *Ptr, bool isVolatile,
+            unsigned Align, BasicBlock *InsertAtEnd);
+
+
+  /// isVolatile - Return true if this is a load from a volatile memory
+  /// location.
+  ///
+  bool isVolatile() const { return getSubclassDataFromInstruction() & 1; }
+
+  /// setVolatile - Specify whether this is a volatile load or not.
+  ///
+  void setVolatile(bool V) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
+                               (V ? 1 : 0));
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// getAlignment - Return the alignment of the access that is being performed
+  ///
+  unsigned getAlignment() const {
+    return (1 << (getSubclassDataFromInstruction() >> 1)) >> 1;
+  }
+
+  void setAlignment(unsigned Align);
+
+  Value *getValueOperand() { return getOperand(0); }
+  const Value *getValueOperand() const { return getOperand(0); }
+
+  Value *getPointerOperand() { return getOperand(1); }
+  const Value *getPointerOperand() const { return getOperand(1); }
+  static unsigned getPointerOperandIndex() { return 1U; }
+
+  unsigned getPointerAddressSpace() const {
+    return cast<PointerType>(getPointerOperand()->getType())->getAddressSpace();
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const StoreInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Store;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+template <>
+struct OperandTraits<StoreInst> : public FixedNumOperandTraits<StoreInst, 2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                             GetElementPtrInst Class
+//===----------------------------------------------------------------------===//
+
+// checkType - Simple wrapper function to give a better assertion failure
+// message on bad indexes for a gep instruction.
+//
+static inline const Type *checkType(const Type *Ty) {
+  assert(Ty && "Invalid GetElementPtrInst indices for type!");
+  return Ty;
+}
+
+/// GetElementPtrInst - an instruction for type-safe pointer arithmetic to
+/// access elements of arrays and structs
+///
+class GetElementPtrInst : public Instruction {
+  GetElementPtrInst(const GetElementPtrInst &GEPI);
+  void init(Value *Ptr, Value* const *Idx, unsigned NumIdx,
+            const Twine &NameStr);
+  void init(Value *Ptr, Value *Idx, const Twine &NameStr);
+
+  template<typename RandomAccessIterator>
+  void init(Value *Ptr,
+            RandomAccessIterator IdxBegin,
+            RandomAccessIterator IdxEnd,
+            const Twine &NameStr,
+            // This argument ensures that we have an iterator we can
+            // do arithmetic on in constant time
+            std::random_access_iterator_tag) {
+    unsigned NumIdx = static_cast<unsigned>(std::distance(IdxBegin, IdxEnd));
+
+    if (NumIdx > 0) {
+      // This requires that the iterator points to contiguous memory.
+      init(Ptr, &*IdxBegin, NumIdx, NameStr); // FIXME: for the general case
+                                     // we have to build an array here
+    }
+    else {
+      init(Ptr, 0, NumIdx, NameStr);
+    }
+  }
+
+  /// getIndexedType - Returns the type of the element that would be loaded with
+  /// a load instruction with the specified parameters.
+  ///
+  /// Null is returned if the indices are invalid for the specified
+  /// pointer type.
+  ///
+  template<typename RandomAccessIterator>
+  static const Type *getIndexedType(const Type *Ptr,
+                                    RandomAccessIterator IdxBegin,
+                                    RandomAccessIterator IdxEnd,
+                                    // This argument ensures that we
+                                    // have an iterator we can do
+                                    // arithmetic on in constant time
+                                    std::random_access_iterator_tag) {
+    unsigned NumIdx = static_cast<unsigned>(std::distance(IdxBegin, IdxEnd));
+
+    if (NumIdx > 0)
+      // This requires that the iterator points to contiguous memory.
+      return getIndexedType(Ptr, &*IdxBegin, NumIdx);
+    else
+      return getIndexedType(Ptr, (Value *const*)0, NumIdx);
+  }
+
+  /// Constructors - Create a getelementptr instruction with a base pointer an
+  /// list of indices. The first ctor can optionally insert before an existing
+  /// instruction, the second appends the new instruction to the specified
+  /// BasicBlock.
+  template<typename RandomAccessIterator>
+  inline GetElementPtrInst(Value *Ptr, RandomAccessIterator IdxBegin,
+                           RandomAccessIterator IdxEnd,
+                           unsigned Values,
+                           const Twine &NameStr,
+                           Instruction *InsertBefore);
+  template<typename RandomAccessIterator>
+  inline GetElementPtrInst(Value *Ptr,
+                           RandomAccessIterator IdxBegin,
+                           RandomAccessIterator IdxEnd,
+                           unsigned Values,
+                           const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+  /// Constructors - These two constructors are convenience methods because one
+  /// and two index getelementptr instructions are so common.
+  GetElementPtrInst(Value *Ptr, Value *Idx, const Twine &NameStr = "",
+                    Instruction *InsertBefore = 0);
+  GetElementPtrInst(Value *Ptr, Value *Idx,
+                    const Twine &NameStr, BasicBlock *InsertAtEnd);
+protected:
+  virtual GetElementPtrInst *clone_impl() const;
+public:
+  template<typename RandomAccessIterator>
+  static GetElementPtrInst *Create(Value *Ptr, RandomAccessIterator IdxBegin,
+                                   RandomAccessIterator IdxEnd,
+                                   const Twine &NameStr = "",
+                                   Instruction *InsertBefore = 0) {
+    typename std::iterator_traits<RandomAccessIterator>::difference_type
+      Values = 1 + std::distance(IdxBegin, IdxEnd);
+    return new(Values)
+      GetElementPtrInst(Ptr, IdxBegin, IdxEnd, Values, NameStr, InsertBefore);
+  }
+  template<typename RandomAccessIterator>
+  static GetElementPtrInst *Create(Value *Ptr,
+                                   RandomAccessIterator IdxBegin,
+                                   RandomAccessIterator IdxEnd,
+                                   const Twine &NameStr,
+                                   BasicBlock *InsertAtEnd) {
+    typename std::iterator_traits<RandomAccessIterator>::difference_type
+      Values = 1 + std::distance(IdxBegin, IdxEnd);
+    return new(Values)
+      GetElementPtrInst(Ptr, IdxBegin, IdxEnd, Values, NameStr, InsertAtEnd);
+  }
+
+  /// Constructors - These two creators are convenience methods because one
+  /// index getelementptr instructions are so common.
+  static GetElementPtrInst *Create(Value *Ptr, Value *Idx,
+                                   const Twine &NameStr = "",
+                                   Instruction *InsertBefore = 0) {
+    return new(2) GetElementPtrInst(Ptr, Idx, NameStr, InsertBefore);
+  }
+  static GetElementPtrInst *Create(Value *Ptr, Value *Idx,
+                                   const Twine &NameStr,
+                                   BasicBlock *InsertAtEnd) {
+    return new(2) GetElementPtrInst(Ptr, Idx, NameStr, InsertAtEnd);
+  }
+
+  /// Create an "inbounds" getelementptr. See the documentation for the
+  /// "inbounds" flag in LangRef.html for details.
+  template<typename RandomAccessIterator>
+  static GetElementPtrInst *CreateInBounds(Value *Ptr,
+                                           RandomAccessIterator IdxBegin,
+                                           RandomAccessIterator IdxEnd,
+                                           const Twine &NameStr = "",
+                                           Instruction *InsertBefore = 0) {
+    GetElementPtrInst *GEP = Create(Ptr, IdxBegin, IdxEnd,
+                                    NameStr, InsertBefore);
+    GEP->setIsInBounds(true);
+    return GEP;
+  }
+  template<typename RandomAccessIterator>
+  static GetElementPtrInst *CreateInBounds(Value *Ptr,
+                                           RandomAccessIterator IdxBegin,
+                                           RandomAccessIterator IdxEnd,
+                                           const Twine &NameStr,
+                                           BasicBlock *InsertAtEnd) {
+    GetElementPtrInst *GEP = Create(Ptr, IdxBegin, IdxEnd,
+                                    NameStr, InsertAtEnd);
+    GEP->setIsInBounds(true);
+    return GEP;
+  }
+  static GetElementPtrInst *CreateInBounds(Value *Ptr, Value *Idx,
+                                           const Twine &NameStr = "",
+                                           Instruction *InsertBefore = 0) {
+    GetElementPtrInst *GEP = Create(Ptr, Idx, NameStr, InsertBefore);
+    GEP->setIsInBounds(true);
+    return GEP;
+  }
+  static GetElementPtrInst *CreateInBounds(Value *Ptr, Value *Idx,
+                                           const Twine &NameStr,
+                                           BasicBlock *InsertAtEnd) {
+    GetElementPtrInst *GEP = Create(Ptr, Idx, NameStr, InsertAtEnd);
+    GEP->setIsInBounds(true);
+    return GEP;
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  // getType - Overload to return most specific pointer type...
+  const PointerType *getType() const {
+    return reinterpret_cast<const PointerType*>(Instruction::getType());
+  }
+
+  /// getIndexedType - Returns the type of the element that would be loaded with
+  /// a load instruction with the specified parameters.
+  ///
+  /// Null is returned if the indices are invalid for the specified
+  /// pointer type.
+  ///
+  template<typename RandomAccessIterator>
+  static const Type *getIndexedType(const Type *Ptr,
+                                    RandomAccessIterator IdxBegin,
+                                    RandomAccessIterator IdxEnd) {
+    return getIndexedType(Ptr, IdxBegin, IdxEnd,
+                          typename std::iterator_traits<RandomAccessIterator>::
+                          iterator_category());
+  }
+
+  static const Type *getIndexedType(const Type *Ptr,
+                                    Value* const *Idx, unsigned NumIdx);
+
+  static const Type *getIndexedType(const Type *Ptr,
+                                    Constant* const *Idx, unsigned NumIdx);
+
+  static const Type *getIndexedType(const Type *Ptr,
+                                    uint64_t const *Idx, unsigned NumIdx);
+
+  static const Type *getIndexedType(const Type *Ptr, Value *Idx);
+
+  inline op_iterator       idx_begin()       { return op_begin()+1; }
+  inline const_op_iterator idx_begin() const { return op_begin()+1; }
+  inline op_iterator       idx_end()         { return op_end(); }
+  inline const_op_iterator idx_end()   const { return op_end(); }
+
+  Value *getPointerOperand() {
+    return getOperand(0);
+  }
+  const Value *getPointerOperand() const {
+    return getOperand(0);
+  }
+  static unsigned getPointerOperandIndex() {
+    return 0U;                      // get index for modifying correct operand
+  }
+
+  unsigned getPointerAddressSpace() const {
+    return cast<PointerType>(getType())->getAddressSpace();
+  }
+
+  /// getPointerOperandType - Method to return the pointer operand as a
+  /// PointerType.
+  const PointerType *getPointerOperandType() const {
+    return reinterpret_cast<const PointerType*>(getPointerOperand()->getType());
+  }
+
+
+  unsigned getNumIndices() const {  // Note: always non-negative
+    return getNumOperands() - 1;
+  }
+
+  bool hasIndices() const {
+    return getNumOperands() > 1;
+  }
+
+  /// hasAllZeroIndices - Return true if all of the indices of this GEP are
+  /// zeros.  If so, the result pointer and the first operand have the same
+  /// value, just potentially different types.
+  bool hasAllZeroIndices() const;
+
+  /// hasAllConstantIndices - Return true if all of the indices of this GEP are
+  /// constant integers.  If so, the result pointer and the first operand have
+  /// a constant offset between them.
+  bool hasAllConstantIndices() const;
+
+  /// setIsInBounds - Set or clear the inbounds flag on this GEP instruction.
+  /// See LangRef.html for the meaning of inbounds on a getelementptr.
+  void setIsInBounds(bool b = true);
+
+  /// isInBounds - Determine whether the GEP has the inbounds flag.
+  bool isInBounds() const;
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const GetElementPtrInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return (I->getOpcode() == Instruction::GetElementPtr);
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<GetElementPtrInst> :
+  public VariadicOperandTraits<GetElementPtrInst, 1> {
+};
+
+template<typename RandomAccessIterator>
+GetElementPtrInst::GetElementPtrInst(Value *Ptr,
+                                     RandomAccessIterator IdxBegin,
+                                     RandomAccessIterator IdxEnd,
+                                     unsigned Values,
+                                     const Twine &NameStr,
+                                     Instruction *InsertBefore)
+  : Instruction(PointerType::get(checkType(
+                                   getIndexedType(Ptr->getType(),
+                                                  IdxBegin, IdxEnd)),
+                                 cast<PointerType>(Ptr->getType())
+                                   ->getAddressSpace()),
+                GetElementPtr,
+                OperandTraits<GetElementPtrInst>::op_end(this) - Values,
+                Values, InsertBefore) {
+  init(Ptr, IdxBegin, IdxEnd, NameStr,
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
+}
+template<typename RandomAccessIterator>
+GetElementPtrInst::GetElementPtrInst(Value *Ptr,
+                                     RandomAccessIterator IdxBegin,
+                                     RandomAccessIterator IdxEnd,
+                                     unsigned Values,
+                                     const Twine &NameStr,
+                                     BasicBlock *InsertAtEnd)
+  : Instruction(PointerType::get(checkType(
+                                   getIndexedType(Ptr->getType(),
+                                                  IdxBegin, IdxEnd)),
+                                 cast<PointerType>(Ptr->getType())
+                                   ->getAddressSpace()),
+                GetElementPtr,
+                OperandTraits<GetElementPtrInst>::op_end(this) - Values,
+                Values, InsertAtEnd) {
+  init(Ptr, IdxBegin, IdxEnd, NameStr,
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
+}
+
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value)
+
+
+//===----------------------------------------------------------------------===//
+//                               ICmpInst Class
+//===----------------------------------------------------------------------===//
+
+/// This instruction compares its operands according to the predicate given
+/// to the constructor. It only operates on integers or pointers. The operands
+/// must be identical types.
+/// @brief Represent an integer comparison operator.
+class ICmpInst: public CmpInst {
+protected:
+  /// @brief Clone an indentical ICmpInst
+  virtual ICmpInst *clone_impl() const;
+public:
+  /// @brief Constructor with insert-before-instruction semantics.
+  ICmpInst(
+    Instruction *InsertBefore,  ///< Where to insert
+    Predicate pred,  ///< The predicate to use for the comparison
+    Value *LHS,      ///< The left-hand-side of the expression
+    Value *RHS,      ///< The right-hand-side of the expression
+    const Twine &NameStr = ""  ///< Name of the instruction
+  ) : CmpInst(makeCmpResultType(LHS->getType()),
+              Instruction::ICmp, pred, LHS, RHS, NameStr,
+              InsertBefore) {
+    assert(pred >= CmpInst::FIRST_ICMP_PREDICATE &&
+           pred <= CmpInst::LAST_ICMP_PREDICATE &&
+           "Invalid ICmp predicate value");
+    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
+          "Both operands to ICmp instruction are not of the same type!");
+    // Check that the operands are the right type
+    assert((getOperand(0)->getType()->isIntOrIntVectorTy() ||
+            getOperand(0)->getType()->isPointerTy()) &&
+           "Invalid operand types for ICmp instruction");
+  }
+
+  /// @brief Constructor with insert-at-end semantics.
+  ICmpInst(
+    BasicBlock &InsertAtEnd, ///< Block to insert into.
+    Predicate pred,  ///< The predicate to use for the comparison
+    Value *LHS,      ///< The left-hand-side of the expression
+    Value *RHS,      ///< The right-hand-side of the expression
+    const Twine &NameStr = ""  ///< Name of the instruction
+  ) : CmpInst(makeCmpResultType(LHS->getType()),
+              Instruction::ICmp, pred, LHS, RHS, NameStr,
+              &InsertAtEnd) {
+    assert(pred >= CmpInst::FIRST_ICMP_PREDICATE &&
+          pred <= CmpInst::LAST_ICMP_PREDICATE &&
+          "Invalid ICmp predicate value");
+    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
+          "Both operands to ICmp instruction are not of the same type!");
+    // Check that the operands are the right type
+    assert((getOperand(0)->getType()->isIntOrIntVectorTy() ||
+            getOperand(0)->getType()->isPointerTy()) &&
+           "Invalid operand types for ICmp instruction");
+  }
+
+  /// @brief Constructor with no-insertion semantics
+  ICmpInst(
+    Predicate pred, ///< The predicate to use for the comparison
+    Value *LHS,     ///< The left-hand-side of the expression
+    Value *RHS,     ///< The right-hand-side of the expression
+    const Twine &NameStr = "" ///< Name of the instruction
+  ) : CmpInst(makeCmpResultType(LHS->getType()),
+              Instruction::ICmp, pred, LHS, RHS, NameStr) {
+    assert(pred >= CmpInst::FIRST_ICMP_PREDICATE &&
+           pred <= CmpInst::LAST_ICMP_PREDICATE &&
+           "Invalid ICmp predicate value");
+    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
+          "Both operands to ICmp instruction are not of the same type!");
+    // Check that the operands are the right type
+    assert((getOperand(0)->getType()->isIntOrIntVectorTy() ||
+            getOperand(0)->getType()->isPointerTy()) &&
+           "Invalid operand types for ICmp instruction");
+  }
+
+  /// For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
+  /// @returns the predicate that would be the result if the operand were
+  /// regarded as signed.
+  /// @brief Return the signed version of the predicate
+  Predicate getSignedPredicate() const {
+    return getSignedPredicate(getPredicate());
+  }
+
+  /// This is a static version that you can use without an instruction.
+  /// @brief Return the signed version of the predicate.
+  static Predicate getSignedPredicate(Predicate pred);
+
+  /// For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
+  /// @returns the predicate that would be the result if the operand were
+  /// regarded as unsigned.
+  /// @brief Return the unsigned version of the predicate
+  Predicate getUnsignedPredicate() const {
+    return getUnsignedPredicate(getPredicate());
+  }
+
+  /// This is a static version that you can use without an instruction.
+  /// @brief Return the unsigned version of the predicate.
+  static Predicate getUnsignedPredicate(Predicate pred);
+
+  /// isEquality - Return true if this predicate is either EQ or NE.  This also
+  /// tests for commutativity.
+  static bool isEquality(Predicate P) {
+    return P == ICMP_EQ || P == ICMP_NE;
+  }
+
+  /// isEquality - Return true if this predicate is either EQ or NE.  This also
+  /// tests for commutativity.
+  bool isEquality() const {
+    return isEquality(getPredicate());
+  }
+
+  /// @returns true if the predicate of this ICmpInst is commutative
+  /// @brief Determine if this relation is commutative.
+  bool isCommutative() const { return isEquality(); }
+
+  /// isRelational - Return true if the predicate is relational (not EQ or NE).
+  ///
+  bool isRelational() const {
+    return !isEquality();
+  }
+
+  /// isRelational - Return true if the predicate is relational (not EQ or NE).
+  ///
+  static bool isRelational(Predicate P) {
+    return !isEquality(P);
+  }
+
+  /// Initialize a set of values that all satisfy the predicate with C.
+  /// @brief Make a ConstantRange for a relation with a constant value.
+  static ConstantRange makeConstantRange(Predicate pred, const APInt &C);
+
+  /// Exchange the two operands to this instruction in such a way that it does
+  /// not modify the semantics of the instruction. The predicate value may be
+  /// changed to retain the same result if the predicate is order dependent
+  /// (e.g. ult).
+  /// @brief Swap operands and adjust predicate.
+  void swapOperands() {
+    setPredicate(getSwappedPredicate());
+    Op<0>().swap(Op<1>());
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const ICmpInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::ICmp;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+
+};
+
+//===----------------------------------------------------------------------===//
+//                               FCmpInst Class
+//===----------------------------------------------------------------------===//
+
+/// This instruction compares its operands according to the predicate given
+/// to the constructor. It only operates on floating point values or packed
+/// vectors of floating point values. The operands must be identical types.
+/// @brief Represents a floating point comparison operator.
+class FCmpInst: public CmpInst {
+protected:
+  /// @brief Clone an indentical FCmpInst
+  virtual FCmpInst *clone_impl() const;
+public:
+  /// @brief Constructor with insert-before-instruction semantics.
+  FCmpInst(
+    Instruction *InsertBefore, ///< Where to insert
+    Predicate pred,  ///< The predicate to use for the comparison
+    Value *LHS,      ///< The left-hand-side of the expression
+    Value *RHS,      ///< The right-hand-side of the expression
+    const Twine &NameStr = ""  ///< Name of the instruction
+  ) : CmpInst(makeCmpResultType(LHS->getType()),
+              Instruction::FCmp, pred, LHS, RHS, NameStr,
+              InsertBefore) {
+    assert(pred <= FCmpInst::LAST_FCMP_PREDICATE &&
+           "Invalid FCmp predicate value");
+    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
+           "Both operands to FCmp instruction are not of the same type!");
+    // Check that the operands are the right type
+    assert(getOperand(0)->getType()->isFPOrFPVectorTy() &&
+           "Invalid operand types for FCmp instruction");
+  }
+
+  /// @brief Constructor with insert-at-end semantics.
+  FCmpInst(
+    BasicBlock &InsertAtEnd, ///< Block to insert into.
+    Predicate pred,  ///< The predicate to use for the comparison
+    Value *LHS,      ///< The left-hand-side of the expression
+    Value *RHS,      ///< The right-hand-side of the expression
+    const Twine &NameStr = ""  ///< Name of the instruction
+  ) : CmpInst(makeCmpResultType(LHS->getType()),
+              Instruction::FCmp, pred, LHS, RHS, NameStr,
+              &InsertAtEnd) {
+    assert(pred <= FCmpInst::LAST_FCMP_PREDICATE &&
+           "Invalid FCmp predicate value");
+    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
+           "Both operands to FCmp instruction are not of the same type!");
+    // Check that the operands are the right type
+    assert(getOperand(0)->getType()->isFPOrFPVectorTy() &&
+           "Invalid operand types for FCmp instruction");
+  }
+
+  /// @brief Constructor with no-insertion semantics
+  FCmpInst(
+    Predicate pred, ///< The predicate to use for the comparison
+    Value *LHS,     ///< The left-hand-side of the expression
+    Value *RHS,     ///< The right-hand-side of the expression
+    const Twine &NameStr = "" ///< Name of the instruction
+  ) : CmpInst(makeCmpResultType(LHS->getType()),
+              Instruction::FCmp, pred, LHS, RHS, NameStr) {
+    assert(pred <= FCmpInst::LAST_FCMP_PREDICATE &&
+           "Invalid FCmp predicate value");
+    assert(getOperand(0)->getType() == getOperand(1)->getType() &&
+           "Both operands to FCmp instruction are not of the same type!");
+    // Check that the operands are the right type
+    assert(getOperand(0)->getType()->isFPOrFPVectorTy() &&
+           "Invalid operand types for FCmp instruction");
+  }
+
+  /// @returns true if the predicate of this instruction is EQ or NE.
+  /// @brief Determine if this is an equality predicate.
+  bool isEquality() const {
+    return getPredicate() == FCMP_OEQ || getPredicate() == FCMP_ONE ||
+           getPredicate() == FCMP_UEQ || getPredicate() == FCMP_UNE;
+  }
+
+  /// @returns true if the predicate of this instruction is commutative.
+  /// @brief Determine if this is a commutative predicate.
+  bool isCommutative() const {
+    return isEquality() ||
+           getPredicate() == FCMP_FALSE ||
+           getPredicate() == FCMP_TRUE ||
+           getPredicate() == FCMP_ORD ||
+           getPredicate() == FCMP_UNO;
+  }
+
+  /// @returns true if the predicate is relational (not EQ or NE).
+  /// @brief Determine if this a relational predicate.
+  bool isRelational() const { return !isEquality(); }
+
+  /// Exchange the two operands to this instruction in such a way that it does
+  /// not modify the semantics of the instruction. The predicate value may be
+  /// changed to retain the same result if the predicate is order dependent
+  /// (e.g. ult).
+  /// @brief Swap operands and adjust predicate.
+  void swapOperands() {
+    setPredicate(getSwappedPredicate());
+    Op<0>().swap(Op<1>());
+  }
+
+  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const FCmpInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::FCmp;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// CallInst - This class represents a function call, abstracting a target
+/// machine's calling convention.  This class uses low bit of the SubClassData
+/// field to indicate whether or not this is a tail call.  The rest of the bits
+/// hold the calling convention of the call.
+///
+class CallInst : public Instruction {
+  AttrListPtr AttributeList; ///< parameter attributes for call
+  CallInst(const CallInst &CI);
+  void init(Value *Func, Value* const *Params, unsigned NumParams);
+  void init(Value *Func, Value *Actual1, Value *Actual2);
+  void init(Value *Func, Value *Actual);
+  void init(Value *Func);
+
+  template<typename RandomAccessIterator>
+  void init(Value *Func,
+            RandomAccessIterator ArgBegin,
+            RandomAccessIterator ArgEnd,
+            const Twine &NameStr,
+            // This argument ensures that we have an iterator we can
+            // do arithmetic on in constant time
+            std::random_access_iterator_tag) {
+    unsigned NumArgs = (unsigned)std::distance(ArgBegin, ArgEnd);
+
+    // This requires that the iterator points to contiguous memory.
+    init(Func, NumArgs ? &*ArgBegin : 0, NumArgs);
+    setName(NameStr);
+  }
+
+  /// Construct a CallInst given a range of arguments. RandomAccessIterator
+  /// must be a random-access iterator pointing to contiguous storage
+  /// (e.g. a std::vector<>::iterator). Checks are made for
+  /// random-accessness but not for contiguous storage as that would
+  /// incur runtime overhead.
+  /// @brief Construct a CallInst from a range of arguments
+  template<typename RandomAccessIterator>
+  CallInst(Value *Func,
+           RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
+           const Twine &NameStr, Instruction *InsertBefore);
+
+  /// Construct a CallInst given a range of arguments.  RandomAccessIterator
+  /// must be a random-access iterator pointing to contiguous storage
+  /// (e.g. a std::vector<>::iterator).  Checks are made for
+  /// random-accessness but not for contiguous storage as that would
+  /// incur runtime overhead.
+  /// @brief Construct a CallInst from a range of arguments
+  template<typename RandomAccessIterator>
+  inline CallInst(Value *Func,
+                  RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
+                  const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+  CallInst(Value *F, Value *Actual, const Twine &NameStr,
+           Instruction *InsertBefore);
+  CallInst(Value *F, Value *Actual, const Twine &NameStr,
+           BasicBlock *InsertAtEnd);
+  explicit CallInst(Value *F, const Twine &NameStr,
+                    Instruction *InsertBefore);
+  CallInst(Value *F, const Twine &NameStr, BasicBlock *InsertAtEnd);
+protected:
+  virtual CallInst *clone_impl() const;
+public:
+  template<typename RandomAccessIterator>
+  static CallInst *Create(Value *Func,
+                          RandomAccessIterator ArgBegin,
+                          RandomAccessIterator ArgEnd,
+                          const Twine &NameStr = "",
+                          Instruction *InsertBefore = 0) {
+    return new(unsigned(ArgEnd - ArgBegin + 1))
+      CallInst(Func, ArgBegin, ArgEnd, NameStr, InsertBefore);
+  }
+  template<typename RandomAccessIterator>
+  static CallInst *Create(Value *Func,
+                          RandomAccessIterator ArgBegin,
+                          RandomAccessIterator ArgEnd,
+                          const Twine &NameStr, BasicBlock *InsertAtEnd) {
+    return new(unsigned(ArgEnd - ArgBegin + 1))
+      CallInst(Func, ArgBegin, ArgEnd, NameStr, InsertAtEnd);
+  }
+  static CallInst *Create(Value *F, Value *Actual,
+                          const Twine &NameStr = "",
+                          Instruction *InsertBefore = 0) {
+    return new(2) CallInst(F, Actual, NameStr, InsertBefore);
+  }
+  static CallInst *Create(Value *F, Value *Actual, const Twine &NameStr,
+                          BasicBlock *InsertAtEnd) {
+    return new(2) CallInst(F, Actual, NameStr, InsertAtEnd);
+  }
+  static CallInst *Create(Value *F, const Twine &NameStr = "",
+                          Instruction *InsertBefore = 0) {
+    return new(1) CallInst(F, NameStr, InsertBefore);
+  }
+  static CallInst *Create(Value *F, const Twine &NameStr,
+                          BasicBlock *InsertAtEnd) {
+    return new(1) CallInst(F, NameStr, InsertAtEnd);
+  }
+  /// CreateMalloc - Generate the IR for a call to malloc:
+  /// 1. Compute the malloc call's argument as the specified type's size,
+  ///    possibly multiplied by the array size if the array size is not
+  ///    constant 1.
+  /// 2. Call malloc with that argument.
+  /// 3. Bitcast the result of the malloc call to the specified type.
+  static Instruction *CreateMalloc(Instruction *InsertBefore,
+                                   const Type *IntPtrTy, const Type *AllocTy,
+                                   Value *AllocSize, Value *ArraySize = 0,
+                                   Function* MallocF = 0,
+                                   const Twine &Name = "");
+  static Instruction *CreateMalloc(BasicBlock *InsertAtEnd,
+                                   const Type *IntPtrTy, const Type *AllocTy,
+                                   Value *AllocSize, Value *ArraySize = 0,
+                                   Function* MallocF = 0,
+                                   const Twine &Name = "");
+  /// CreateFree - Generate the IR for a call to the builtin free function.
+  static Instruction* CreateFree(Value* Source, Instruction *InsertBefore);
+  static Instruction* CreateFree(Value* Source, BasicBlock *InsertAtEnd);
+
+  ~CallInst();
+
+  bool isTailCall() const { return getSubclassDataFromInstruction() & 1; }
+  void setTailCall(bool isTC = true) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
+                               unsigned(isTC));
+  }
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// getNumArgOperands - Return the number of call arguments.
+  ///
+  unsigned getNumArgOperands() const { return getNumOperands() - 1; }
+
+  /// getArgOperand/setArgOperand - Return/set the i-th call argument.
+  ///
+  Value *getArgOperand(unsigned i) const { return getOperand(i); }
+  void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
+
+  /// getCallingConv/setCallingConv - Get or set the calling convention of this
+  /// function call.
+  CallingConv::ID getCallingConv() const {
+    return static_cast<CallingConv::ID>(getSubclassDataFromInstruction() >> 1);
+  }
+  void setCallingConv(CallingConv::ID CC) {
+    setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
+                               (static_cast<unsigned>(CC) << 1));
+  }
+
+  /// getAttributes - Return the parameter attributes for this call.
+  ///
+  const AttrListPtr &getAttributes() const { return AttributeList; }
+
+  /// setAttributes - Set the parameter attributes for this call.
+  ///
+  void setAttributes(const AttrListPtr &Attrs) { AttributeList = Attrs; }
+
+  /// addAttribute - adds the attribute to the list of attributes.
+  void addAttribute(unsigned i, Attributes attr);
+
+  /// removeAttribute - removes the attribute from the list of attributes.
+  void removeAttribute(unsigned i, Attributes attr);
+
+  /// @brief Determine whether the call or the callee has the given attribute.
+  bool paramHasAttr(unsigned i, Attributes attr) const;
+
+  /// @brief Extract the alignment for a call or parameter (0=unknown).
+  unsigned getParamAlignment(unsigned i) const {
+    return AttributeList.getParamAlignment(i);
+  }
+
+  /// @brief Return true if the call should not be inlined.
+  bool isNoInline() const { return paramHasAttr(~0, Attribute::NoInline); }
+  void setIsNoInline(bool Value = true) {
+    if (Value) addAttribute(~0, Attribute::NoInline);
+    else removeAttribute(~0, Attribute::NoInline);
+  }
+
+  /// @brief Determine if the call does not access memory.
+  bool doesNotAccessMemory() const {
+    return paramHasAttr(~0, Attribute::ReadNone);
+  }
+  void setDoesNotAccessMemory(bool NotAccessMemory = true) {
+    if (NotAccessMemory) addAttribute(~0, Attribute::ReadNone);
+    else removeAttribute(~0, Attribute::ReadNone);
+  }
+
+  /// @brief Determine if the call does not access or only reads memory.
+  bool onlyReadsMemory() const {
+    return doesNotAccessMemory() || paramHasAttr(~0, Attribute::ReadOnly);
+  }
+  void setOnlyReadsMemory(bool OnlyReadsMemory = true) {
+    if (OnlyReadsMemory) addAttribute(~0, Attribute::ReadOnly);
+    else removeAttribute(~0, Attribute::ReadOnly | Attribute::ReadNone);
+  }
+
+  /// @brief Determine if the call cannot return.
+  bool doesNotReturn() const { return paramHasAttr(~0, Attribute::NoReturn); }
+  void setDoesNotReturn(bool DoesNotReturn = true) {
+    if (DoesNotReturn) addAttribute(~0, Attribute::NoReturn);
+    else removeAttribute(~0, Attribute::NoReturn);
+  }
+
+  /// @brief Determine if the call cannot unwind.
+  bool doesNotThrow() const { return paramHasAttr(~0, Attribute::NoUnwind); }
+  void setDoesNotThrow(bool DoesNotThrow = true) {
+    if (DoesNotThrow) addAttribute(~0, Attribute::NoUnwind);
+    else removeAttribute(~0, Attribute::NoUnwind);
+  }
+
+  /// @brief Determine if the call returns a structure through first
+  /// pointer argument.
+  bool hasStructRetAttr() const {
+    // Be friendly and also check the callee.
+    return paramHasAttr(1, Attribute::StructRet);
+  }
+
+  /// @brief Determine if any call argument is an aggregate passed by value.
+  bool hasByValArgument() const {
+    return AttributeList.hasAttrSomewhere(Attribute::ByVal);
+  }
+
+  /// getCalledFunction - Return the function called, or null if this is an
+  /// indirect function invocation.
+  ///
+  Function *getCalledFunction() const {
+    return dyn_cast<Function>(Op<-1>());
+  }
+
+  /// getCalledValue - Get a pointer to the function that is invoked by this
+  /// instruction.
+  const Value *getCalledValue() const { return Op<-1>(); }
+        Value *getCalledValue()       { return Op<-1>(); }
+
+  /// setCalledFunction - Set the function called.
+  void setCalledFunction(Value* Fn) {
+    Op<-1>() = Fn;
+  }
+
+  /// isInlineAsm - Check if this call is an inline asm statement.
+  bool isInlineAsm() const {
+    return isa<InlineAsm>(Op<-1>());
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const CallInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Call;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+template <>
+struct OperandTraits<CallInst> : public VariadicOperandTraits<CallInst, 1> {
+};
+
+template<typename RandomAccessIterator>
+CallInst::CallInst(Value *Func,
+                   RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
+                   const Twine &NameStr, BasicBlock *InsertAtEnd)
+  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+                                   ->getElementType())->getReturnType(),
+                Instruction::Call,
+                OperandTraits<CallInst>::op_end(this) - (ArgEnd - ArgBegin + 1),
+                unsigned(ArgEnd - ArgBegin + 1), InsertAtEnd) {
+  init(Func, ArgBegin, ArgEnd, NameStr,
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
+}
+
+template<typename RandomAccessIterator>
+CallInst::CallInst(Value *Func,
+                   RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
+                   const Twine &NameStr, Instruction *InsertBefore)
+  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+                                   ->getElementType())->getReturnType(),
+                Instruction::Call,
+                OperandTraits<CallInst>::op_end(this) - (ArgEnd - ArgBegin + 1),
+                unsigned(ArgEnd - ArgBegin + 1), InsertBefore) {
+  init(Func, ArgBegin, ArgEnd, NameStr,
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
+}
+
+
+// Note: if you get compile errors about private methods then
+//       please update your code to use the high-level operand
+//       interfaces. See line 943 above.
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CallInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                               SelectInst Class
+//===----------------------------------------------------------------------===//
+
+/// SelectInst - This class represents the LLVM 'select' instruction.
+///
+class SelectInst : public Instruction {
+  void init(Value *C, Value *S1, Value *S2) {
+    assert(!areInvalidOperands(C, S1, S2) && "Invalid operands for select");
+    Op<0>() = C;
+    Op<1>() = S1;
+    Op<2>() = S2;
+  }
+
+  SelectInst(Value *C, Value *S1, Value *S2, const Twine &NameStr,
+             Instruction *InsertBefore)
+    : Instruction(S1->getType(), Instruction::Select,
+                  &Op<0>(), 3, InsertBefore) {
+    init(C, S1, S2);
+    setName(NameStr);
+  }
+  SelectInst(Value *C, Value *S1, Value *S2, const Twine &NameStr,
+             BasicBlock *InsertAtEnd)
+    : Instruction(S1->getType(), Instruction::Select,
+                  &Op<0>(), 3, InsertAtEnd) {
+    init(C, S1, S2);
+    setName(NameStr);
+  }
+protected:
+  virtual SelectInst *clone_impl() const;
+public:
+  static SelectInst *Create(Value *C, Value *S1, Value *S2,
+                            const Twine &NameStr = "",
+                            Instruction *InsertBefore = 0) {
+    return new(3) SelectInst(C, S1, S2, NameStr, InsertBefore);
+  }
+  static SelectInst *Create(Value *C, Value *S1, Value *S2,
+                            const Twine &NameStr,
+                            BasicBlock *InsertAtEnd) {
+    return new(3) SelectInst(C, S1, S2, NameStr, InsertAtEnd);
+  }
+
+  const Value *getCondition() const { return Op<0>(); }
+  const Value *getTrueValue() const { return Op<1>(); }
+  const Value *getFalseValue() const { return Op<2>(); }
+  Value *getCondition() { return Op<0>(); }
+  Value *getTrueValue() { return Op<1>(); }
+  Value *getFalseValue() { return Op<2>(); }
+
+  /// areInvalidOperands - Return a string if the specified operands are invalid
+  /// for a select operation, otherwise return null.
+  static const char *areInvalidOperands(Value *Cond, Value *True, Value *False);
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  OtherOps getOpcode() const {
+    return static_cast<OtherOps>(Instruction::getOpcode());
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const SelectInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Select;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<SelectInst> : public FixedNumOperandTraits<SelectInst, 3> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                                VAArgInst Class
+//===----------------------------------------------------------------------===//
+
+/// VAArgInst - This class represents the va_arg llvm instruction, which returns
+/// an argument of the specified type given a va_list and increments that list
+///
+class VAArgInst : public UnaryInstruction {
+protected:
+  virtual VAArgInst *clone_impl() const;
+
+public:
+  VAArgInst(Value *List, const Type *Ty, const Twine &NameStr = "",
+             Instruction *InsertBefore = 0)
+    : UnaryInstruction(Ty, VAArg, List, InsertBefore) {
+    setName(NameStr);
+  }
+  VAArgInst(Value *List, const Type *Ty, const Twine &NameStr,
+            BasicBlock *InsertAtEnd)
+    : UnaryInstruction(Ty, VAArg, List, InsertAtEnd) {
+    setName(NameStr);
+  }
+
+  Value *getPointerOperand() { return getOperand(0); }
+  const Value *getPointerOperand() const { return getOperand(0); }
+  static unsigned getPointerOperandIndex() { return 0U; }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const VAArgInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == VAArg;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                ExtractElementInst Class
+//===----------------------------------------------------------------------===//
+
+/// ExtractElementInst - This instruction extracts a single (scalar)
+/// element from a VectorType value
+///
+class ExtractElementInst : public Instruction {
+  ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr = "",
+                     Instruction *InsertBefore = 0);
+  ExtractElementInst(Value *Vec, Value *Idx, const Twine &NameStr,
+                     BasicBlock *InsertAtEnd);
+protected:
+  virtual ExtractElementInst *clone_impl() const;
+
+public:
+  static ExtractElementInst *Create(Value *Vec, Value *Idx,
+                                   const Twine &NameStr = "",
+                                   Instruction *InsertBefore = 0) {
+    return new(2) ExtractElementInst(Vec, Idx, NameStr, InsertBefore);
+  }
+  static ExtractElementInst *Create(Value *Vec, Value *Idx,
+                                   const Twine &NameStr,
+                                   BasicBlock *InsertAtEnd) {
+    return new(2) ExtractElementInst(Vec, Idx, NameStr, InsertAtEnd);
+  }
+
+  /// isValidOperands - Return true if an extractelement instruction can be
+  /// formed with the specified operands.
+  static bool isValidOperands(const Value *Vec, const Value *Idx);
+
+  Value *getVectorOperand() { return Op<0>(); }
+  Value *getIndexOperand() { return Op<1>(); }
+  const Value *getVectorOperand() const { return Op<0>(); }
+  const Value *getIndexOperand() const { return Op<1>(); }
+
+  const VectorType *getVectorOperandType() const {
+    return reinterpret_cast<const VectorType*>(getVectorOperand()->getType());
+  }
+
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const ExtractElementInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::ExtractElement;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<ExtractElementInst> :
+  public FixedNumOperandTraits<ExtractElementInst, 2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                                InsertElementInst Class
+//===----------------------------------------------------------------------===//
+
+/// InsertElementInst - This instruction inserts a single (scalar)
+/// element into a VectorType value
+///
+class InsertElementInst : public Instruction {
+  InsertElementInst(Value *Vec, Value *NewElt, Value *Idx,
+                    const Twine &NameStr = "",
+                    Instruction *InsertBefore = 0);
+  InsertElementInst(Value *Vec, Value *NewElt, Value *Idx,
+                    const Twine &NameStr, BasicBlock *InsertAtEnd);
+protected:
+  virtual InsertElementInst *clone_impl() const;
+
+public:
+  static InsertElementInst *Create(Value *Vec, Value *NewElt, Value *Idx,
+                                   const Twine &NameStr = "",
+                                   Instruction *InsertBefore = 0) {
+    return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertBefore);
+  }
+  static InsertElementInst *Create(Value *Vec, Value *NewElt, Value *Idx,
+                                   const Twine &NameStr,
+                                   BasicBlock *InsertAtEnd) {
+    return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertAtEnd);
+  }
+
+  /// isValidOperands - Return true if an insertelement instruction can be
+  /// formed with the specified operands.
+  static bool isValidOperands(const Value *Vec, const Value *NewElt,
+                              const Value *Idx);
+
+  /// getType - Overload to return most specific vector type.
+  ///
+  const VectorType *getType() const {
+    return reinterpret_cast<const VectorType*>(Instruction::getType());
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const InsertElementInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::InsertElement;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<InsertElementInst> :
+  public FixedNumOperandTraits<InsertElementInst, 3> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                           ShuffleVectorInst Class
+//===----------------------------------------------------------------------===//
+
+/// ShuffleVectorInst - This instruction constructs a fixed permutation of two
+/// input vectors.
+///
+class ShuffleVectorInst : public Instruction {
+protected:
+  virtual ShuffleVectorInst *clone_impl() const;
+
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
+                    const Twine &NameStr = "",
+                    Instruction *InsertBefor = 0);
+  ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
+                    const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+  /// isValidOperands - Return true if a shufflevector instruction can be
+  /// formed with the specified operands.
+  static bool isValidOperands(const Value *V1, const Value *V2,
+                              const Value *Mask);
+
+  /// getType - Overload to return most specific vector type.
+  ///
+  const VectorType *getType() const {
+    return reinterpret_cast<const VectorType*>(Instruction::getType());
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// getMaskValue - Return the index from the shuffle mask for the specified
+  /// output result.  This is either -1 if the element is undef or a number less
+  /// than 2*numelements.
+  int getMaskValue(unsigned i) const;
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const ShuffleVectorInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::ShuffleVector;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<ShuffleVectorInst> :
+  public FixedNumOperandTraits<ShuffleVectorInst, 3> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                                ExtractValueInst Class
+//===----------------------------------------------------------------------===//
+
+/// ExtractValueInst - This instruction extracts a struct member or array
+/// element value from an aggregate value.
+///
+class ExtractValueInst : public UnaryInstruction {
+  SmallVector<unsigned, 4> Indices;
+
+  ExtractValueInst(const ExtractValueInst &EVI);
+  void init(const unsigned *Idx, unsigned NumIdx,
+            const Twine &NameStr);
+  void init(unsigned Idx, const Twine &NameStr);
+
+  template<typename RandomAccessIterator>
+  void init(RandomAccessIterator IdxBegin,
+            RandomAccessIterator IdxEnd,
+            const Twine &NameStr,
+            // This argument ensures that we have an iterator we can
+            // do arithmetic on in constant time
+            std::random_access_iterator_tag) {
+    unsigned NumIdx = static_cast<unsigned>(std::distance(IdxBegin, IdxEnd));
+
+    // There's no fundamental reason why we require at least one index
+    // (other than weirdness with &*IdxBegin being invalid; see
+    // getelementptr's init routine for example). But there's no
+    // present need to support it.
+    assert(NumIdx > 0 && "ExtractValueInst must have at least one index");
+
+    // This requires that the iterator points to contiguous memory.
+    init(&*IdxBegin, NumIdx, NameStr); // FIXME: for the general case
+                                         // we have to build an array here
+  }
+
+  /// getIndexedType - Returns the type of the element that would be extracted
+  /// with an extractvalue instruction with the specified parameters.
+  ///
+  /// Null is returned if the indices are invalid for the specified type.
+  ///
+  static const Type *getIndexedType(const Type *Agg,
+                                    const unsigned *Idx, unsigned NumIdx);
+
+  template<typename RandomAccessIterator>
+  static const Type *getIndexedType(const Type *Ptr,
+                                    RandomAccessIterator IdxBegin,
+                                    RandomAccessIterator IdxEnd,
+                                    // This argument ensures that we
+                                    // have an iterator we can do
+                                    // arithmetic on in constant time
+                                    std::random_access_iterator_tag) {
+    unsigned NumIdx = static_cast<unsigned>(std::distance(IdxBegin, IdxEnd));
+
+    if (NumIdx > 0)
+      // This requires that the iterator points to contiguous memory.
+      return getIndexedType(Ptr, &*IdxBegin, NumIdx);
+    else
+      return getIndexedType(Ptr, (const unsigned *)0, NumIdx);
+  }
+
+  /// Constructors - Create a extractvalue instruction with a base aggregate
+  /// value and a list of indices.  The first ctor can optionally insert before
+  /// an existing instruction, the second appends the new instruction to the
+  /// specified BasicBlock.
+  template<typename RandomAccessIterator>
+  inline ExtractValueInst(Value *Agg,
+                          RandomAccessIterator IdxBegin,
+                          RandomAccessIterator IdxEnd,
+                          const Twine &NameStr,
+                          Instruction *InsertBefore);
+  template<typename RandomAccessIterator>
+  inline ExtractValueInst(Value *Agg,
+                          RandomAccessIterator IdxBegin,
+                          RandomAccessIterator IdxEnd,
+                          const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+protected:
+  virtual ExtractValueInst *clone_impl() const;
+
+public:
+  template<typename RandomAccessIterator>
+  static ExtractValueInst *Create(Value *Agg,
+                                  RandomAccessIterator IdxBegin,
+                                  RandomAccessIterator IdxEnd,
+                                  const Twine &NameStr = "",
+                                  Instruction *InsertBefore = 0) {
+    return new
+      ExtractValueInst(Agg, IdxBegin, IdxEnd, NameStr, InsertBefore);
+  }
+  template<typename RandomAccessIterator>
+  static ExtractValueInst *Create(Value *Agg,
+                                  RandomAccessIterator IdxBegin,
+                                  RandomAccessIterator IdxEnd,
+                                  const Twine &NameStr,
+                                  BasicBlock *InsertAtEnd) {
+    return new ExtractValueInst(Agg, IdxBegin, IdxEnd, NameStr, InsertAtEnd);
+  }
+
+  /// Constructors - These two creators are convenience methods because one
+  /// index extractvalue instructions are much more common than those with
+  /// more than one.
+  static ExtractValueInst *Create(Value *Agg, unsigned Idx,
+                                  const Twine &NameStr = "",
+                                  Instruction *InsertBefore = 0) {
+    unsigned Idxs[1] = { Idx };
+    return new ExtractValueInst(Agg, Idxs, Idxs + 1, NameStr, InsertBefore);
+  }
+  static ExtractValueInst *Create(Value *Agg, unsigned Idx,
+                                  const Twine &NameStr,
+                                  BasicBlock *InsertAtEnd) {
+    unsigned Idxs[1] = { Idx };
+    return new ExtractValueInst(Agg, Idxs, Idxs + 1, NameStr, InsertAtEnd);
+  }
+
+  /// getIndexedType - Returns the type of the element that would be extracted
+  /// with an extractvalue instruction with the specified parameters.
+  ///
+  /// Null is returned if the indices are invalid for the specified type.
+  ///
+  template<typename RandomAccessIterator>
+  static const Type *getIndexedType(const Type *Ptr,
+                                    RandomAccessIterator IdxBegin,
+                                    RandomAccessIterator IdxEnd) {
+    return getIndexedType(Ptr, IdxBegin, IdxEnd,
+                          typename std::iterator_traits<RandomAccessIterator>::
+                          iterator_category());
+  }
+  static const Type *getIndexedType(const Type *Ptr, unsigned Idx);
+
+  typedef const unsigned* idx_iterator;
+  inline idx_iterator idx_begin() const { return Indices.begin(); }
+  inline idx_iterator idx_end()   const { return Indices.end(); }
+
+  Value *getAggregateOperand() {
+    return getOperand(0);
+  }
+  const Value *getAggregateOperand() const {
+    return getOperand(0);
+  }
+  static unsigned getAggregateOperandIndex() {
+    return 0U;                      // get index for modifying correct operand
+  }
+
+  unsigned getNumIndices() const {  // Note: always non-negative
+    return (unsigned)Indices.size();
+  }
+
+  bool hasIndices() const {
+    return true;
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const ExtractValueInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::ExtractValue;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template<typename RandomAccessIterator>
+ExtractValueInst::ExtractValueInst(Value *Agg,
+                                   RandomAccessIterator IdxBegin,
+                                   RandomAccessIterator IdxEnd,
+                                   const Twine &NameStr,
+                                   Instruction *InsertBefore)
+  : UnaryInstruction(checkType(getIndexedType(Agg->getType(),
+                                              IdxBegin, IdxEnd)),
+                     ExtractValue, Agg, InsertBefore) {
+  init(IdxBegin, IdxEnd, NameStr,
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
+}
+template<typename RandomAccessIterator>
+ExtractValueInst::ExtractValueInst(Value *Agg,
+                                   RandomAccessIterator IdxBegin,
+                                   RandomAccessIterator IdxEnd,
+                                   const Twine &NameStr,
+                                   BasicBlock *InsertAtEnd)
+  : UnaryInstruction(checkType(getIndexedType(Agg->getType(),
+                                              IdxBegin, IdxEnd)),
+                     ExtractValue, Agg, InsertAtEnd) {
+  init(IdxBegin, IdxEnd, NameStr,
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
+}
+
+
+//===----------------------------------------------------------------------===//
+//                                InsertValueInst Class
+//===----------------------------------------------------------------------===//
+
+/// InsertValueInst - This instruction inserts a struct field of array element
+/// value into an aggregate value.
+///
+class InsertValueInst : public Instruction {
+  SmallVector<unsigned, 4> Indices;
+
+  void *operator new(size_t, unsigned); // Do not implement
+  InsertValueInst(const InsertValueInst &IVI);
+  void init(Value *Agg, Value *Val, const unsigned *Idx, unsigned NumIdx,
+            const Twine &NameStr);
+  void init(Value *Agg, Value *Val, unsigned Idx, const Twine &NameStr);
+
+  template<typename RandomAccessIterator>
+  void init(Value *Agg, Value *Val,
+            RandomAccessIterator IdxBegin, RandomAccessIterator IdxEnd,
+            const Twine &NameStr,
+            // This argument ensures that we have an iterator we can
+            // do arithmetic on in constant time
+            std::random_access_iterator_tag) {
+    unsigned NumIdx = static_cast<unsigned>(std::distance(IdxBegin, IdxEnd));
+
+    // There's no fundamental reason why we require at least one index
+    // (other than weirdness with &*IdxBegin being invalid; see
+    // getelementptr's init routine for example). But there's no
+    // present need to support it.
+    assert(NumIdx > 0 && "InsertValueInst must have at least one index");
+
+    // This requires that the iterator points to contiguous memory.
+    init(Agg, Val, &*IdxBegin, NumIdx, NameStr); // FIXME: for the general case
+                                              // we have to build an array here
+  }
+
+  /// Constructors - Create a insertvalue instruction with a base aggregate
+  /// value, a value to insert, and a list of indices.  The first ctor can
+  /// optionally insert before an existing instruction, the second appends
+  /// the new instruction to the specified BasicBlock.
+  template<typename RandomAccessIterator>
+  inline InsertValueInst(Value *Agg, Value *Val,
+                         RandomAccessIterator IdxBegin,
+                         RandomAccessIterator IdxEnd,
+                         const Twine &NameStr,
+                         Instruction *InsertBefore);
+  template<typename RandomAccessIterator>
+  inline InsertValueInst(Value *Agg, Value *Val,
+                         RandomAccessIterator IdxBegin,
+                         RandomAccessIterator IdxEnd,
+                         const Twine &NameStr, BasicBlock *InsertAtEnd);
+
+  /// Constructors - These two constructors are convenience methods because one
+  /// and two index insertvalue instructions are so common.
+  InsertValueInst(Value *Agg, Value *Val,
+                  unsigned Idx, const Twine &NameStr = "",
+                  Instruction *InsertBefore = 0);
+  InsertValueInst(Value *Agg, Value *Val, unsigned Idx,
+                  const Twine &NameStr, BasicBlock *InsertAtEnd);
+protected:
+  virtual InsertValueInst *clone_impl() const;
+public:
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+
+  template<typename RandomAccessIterator>
+  static InsertValueInst *Create(Value *Agg, Value *Val,
+                                 RandomAccessIterator IdxBegin,
+                                 RandomAccessIterator IdxEnd,
+                                 const Twine &NameStr = "",
+                                 Instruction *InsertBefore = 0) {
+    return new InsertValueInst(Agg, Val, IdxBegin, IdxEnd,
+                               NameStr, InsertBefore);
+  }
+  template<typename RandomAccessIterator>
+  static InsertValueInst *Create(Value *Agg, Value *Val,
+                                 RandomAccessIterator IdxBegin,
+                                 RandomAccessIterator IdxEnd,
+                                 const Twine &NameStr,
+                                 BasicBlock *InsertAtEnd) {
+    return new InsertValueInst(Agg, Val, IdxBegin, IdxEnd,
+                               NameStr, InsertAtEnd);
+  }
+
+  /// Constructors - These two creators are convenience methods because one
+  /// index insertvalue instructions are much more common than those with
+  /// more than one.
+  static InsertValueInst *Create(Value *Agg, Value *Val, unsigned Idx,
+                                 const Twine &NameStr = "",
+                                 Instruction *InsertBefore = 0) {
+    return new InsertValueInst(Agg, Val, Idx, NameStr, InsertBefore);
+  }
+  static InsertValueInst *Create(Value *Agg, Value *Val, unsigned Idx,
+                                 const Twine &NameStr,
+                                 BasicBlock *InsertAtEnd) {
+    return new InsertValueInst(Agg, Val, Idx, NameStr, InsertAtEnd);
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  typedef const unsigned* idx_iterator;
+  inline idx_iterator idx_begin() const { return Indices.begin(); }
+  inline idx_iterator idx_end()   const { return Indices.end(); }
+
+  Value *getAggregateOperand() {
+    return getOperand(0);
+  }
+  const Value *getAggregateOperand() const {
+    return getOperand(0);
+  }
+  static unsigned getAggregateOperandIndex() {
+    return 0U;                      // get index for modifying correct operand
+  }
+
+  Value *getInsertedValueOperand() {
+    return getOperand(1);
+  }
+  const Value *getInsertedValueOperand() const {
+    return getOperand(1);
+  }
+  static unsigned getInsertedValueOperandIndex() {
+    return 1U;                      // get index for modifying correct operand
+  }
+
+  unsigned getNumIndices() const {  // Note: always non-negative
+    return (unsigned)Indices.size();
+  }
+
+  bool hasIndices() const {
+    return true;
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const InsertValueInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::InsertValue;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+template <>
+struct OperandTraits<InsertValueInst> :
+  public FixedNumOperandTraits<InsertValueInst, 2> {
+};
+
+template<typename RandomAccessIterator>
+InsertValueInst::InsertValueInst(Value *Agg,
+                                 Value *Val,
+                                 RandomAccessIterator IdxBegin,
+                                 RandomAccessIterator IdxEnd,
+                                 const Twine &NameStr,
+                                 Instruction *InsertBefore)
+  : Instruction(Agg->getType(), InsertValue,
+                OperandTraits<InsertValueInst>::op_begin(this),
+                2, InsertBefore) {
+  init(Agg, Val, IdxBegin, IdxEnd, NameStr,
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
+}
+template<typename RandomAccessIterator>
+InsertValueInst::InsertValueInst(Value *Agg,
+                                 Value *Val,
+                                 RandomAccessIterator IdxBegin,
+                                 RandomAccessIterator IdxEnd,
+                                 const Twine &NameStr,
+                                 BasicBlock *InsertAtEnd)
+  : Instruction(Agg->getType(), InsertValue,
+                OperandTraits<InsertValueInst>::op_begin(this),
+                2, InsertAtEnd) {
+  init(Agg, Val, IdxBegin, IdxEnd, NameStr,
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
+}
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                               PHINode Class
+//===----------------------------------------------------------------------===//
+
+// PHINode - The PHINode class is used to represent the magical mystical PHI
+// node, that can not exist in nature, but can be synthesized in a computer
+// scientist's overactive imagination.
+//
+class PHINode : public Instruction {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+  /// ReservedSpace - The number of operands actually allocated.  NumOperands is
+  /// the number actually in use.
+  unsigned ReservedSpace;
+  PHINode(const PHINode &PN);
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+  explicit PHINode(const Type *Ty, const Twine &NameStr = "",
+                   Instruction *InsertBefore = 0)
+    : Instruction(Ty, Instruction::PHI, 0, 0, InsertBefore),
+      ReservedSpace(0) {
+    setName(NameStr);
+  }
+
+  PHINode(const Type *Ty, const Twine &NameStr, BasicBlock *InsertAtEnd)
+    : Instruction(Ty, Instruction::PHI, 0, 0, InsertAtEnd),
+      ReservedSpace(0) {
+    setName(NameStr);
+  }
+protected:
+  virtual PHINode *clone_impl() const;
+public:
+  static PHINode *Create(const Type *Ty, const Twine &NameStr = "",
+                         Instruction *InsertBefore = 0) {
+    return new PHINode(Ty, NameStr, InsertBefore);
+  }
+  static PHINode *Create(const Type *Ty, const Twine &NameStr,
+                         BasicBlock *InsertAtEnd) {
+    return new PHINode(Ty, NameStr, InsertAtEnd);
+  }
+  ~PHINode();
+
+  /// reserveOperandSpace - This method can be used to avoid repeated
+  /// reallocation of PHI operand lists by reserving space for the correct
+  /// number of operands before adding them.  Unlike normal vector reserves,
+  /// this method can also be used to trim the operand space.
+  void reserveOperandSpace(unsigned NumValues) {
+    resizeOperands(NumValues*2);
+  }
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// getNumIncomingValues - Return the number of incoming edges
+  ///
+  unsigned getNumIncomingValues() const { return getNumOperands()/2; }
+
+  /// getIncomingValue - Return incoming value number x
+  ///
+  Value *getIncomingValue(unsigned i) const {
+    assert(i*2 < getNumOperands() && "Invalid value number!");
+    return getOperand(i*2);
+  }
+  void setIncomingValue(unsigned i, Value *V) {
+    assert(i*2 < getNumOperands() && "Invalid value number!");
+    setOperand(i*2, V);
+  }
+  static unsigned getOperandNumForIncomingValue(unsigned i) {
+    return i*2;
+  }
+  static unsigned getIncomingValueNumForOperand(unsigned i) {
+    assert(i % 2 == 0 && "Invalid incoming-value operand index!");
+    return i/2;
+  }
+
+  /// getIncomingBlock - Return incoming basic block number @p i.
+  ///
+  BasicBlock *getIncomingBlock(unsigned i) const {
+    return cast<BasicBlock>(getOperand(i*2+1));
+  }
+
+  /// getIncomingBlock - Return incoming basic block corresponding
+  /// to an operand of the PHI.
+  ///
+  BasicBlock *getIncomingBlock(const Use &U) const {
+    assert(this == U.getUser() && "Iterator doesn't point to PHI's Uses?");
+    return cast<BasicBlock>((&U + 1)->get());
+  }
+
+  /// getIncomingBlock - Return incoming basic block corresponding
+  /// to value use iterator.
+  ///
+  template <typename U>
+  BasicBlock *getIncomingBlock(value_use_iterator<U> I) const {
+    return getIncomingBlock(I.getUse());
+  }
+
+
+  void setIncomingBlock(unsigned i, BasicBlock *BB) {
+    setOperand(i*2+1, (Value*)BB);
+  }
+  static unsigned getOperandNumForIncomingBlock(unsigned i) {
+    return i*2+1;
+  }
+  static unsigned getIncomingBlockNumForOperand(unsigned i) {
+    assert(i % 2 == 1 && "Invalid incoming-block operand index!");
+    return i/2;
+  }
+
+  /// addIncoming - Add an incoming value to the end of the PHI list
+  ///
+  void addIncoming(Value *V, BasicBlock *BB) {
+    assert(V && "PHI node got a null value!");
+    assert(BB && "PHI node got a null basic block!");
+    assert(getType() == V->getType() &&
+           "All operands to PHI node must be the same type as the PHI node!");
+    unsigned OpNo = NumOperands;
+    if (OpNo+2 > ReservedSpace)
+      resizeOperands(0);  // Get more space!
+    // Initialize some new operands.
+    NumOperands = OpNo+2;
+    OperandList[OpNo] = V;
+    OperandList[OpNo+1] = (Value*)BB;
+  }
+
+  /// removeIncomingValue - Remove an incoming value.  This is useful if a
+  /// predecessor basic block is deleted.  The value removed is returned.
+  ///
+  /// If the last incoming value for a PHI node is removed (and DeletePHIIfEmpty
+  /// is true), the PHI node is destroyed and any uses of it are replaced with
+  /// dummy values.  The only time there should be zero incoming values to a PHI
+  /// node is when the block is dead, so this strategy is sound.
+  ///
+  Value *removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty = true);
+
+  Value *removeIncomingValue(const BasicBlock *BB, bool DeletePHIIfEmpty=true) {
+    int Idx = getBasicBlockIndex(BB);
+    assert(Idx >= 0 && "Invalid basic block argument to remove!");
+    return removeIncomingValue(Idx, DeletePHIIfEmpty);
+  }
+
+  /// getBasicBlockIndex - Return the first index of the specified basic
+  /// block in the value list for this PHI.  Returns -1 if no instance.
+  ///
+  int getBasicBlockIndex(const BasicBlock *BB) const {
+    Use *OL = OperandList;
+    for (unsigned i = 0, e = getNumOperands(); i != e; i += 2)
+      if (OL[i+1].get() == (const Value*)BB) return i/2;
+    return -1;
+  }
+
+  Value *getIncomingValueForBlock(const BasicBlock *BB) const {
+    return getIncomingValue(getBasicBlockIndex(BB));
+  }
+
+  /// hasConstantValue - If the specified PHI node always merges together the
+  /// same value, return the value, otherwise return null.
+  Value *hasConstantValue() const;
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const PHINode *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::PHI;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+ private:
+  void resizeOperands(unsigned NumOperands);
+};
+
+template <>
+struct OperandTraits<PHINode> : public HungoffOperandTraits<2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(PHINode, Value)
+
+
+//===----------------------------------------------------------------------===//
+//                               ReturnInst Class
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------------
+/// ReturnInst - Return a value (possibly void), from a function.  Execution
+/// does not continue in this function any longer.
+///
+class ReturnInst : public TerminatorInst {
+  ReturnInst(const ReturnInst &RI);
+
+private:
+  // ReturnInst constructors:
+  // ReturnInst()                  - 'ret void' instruction
+  // ReturnInst(    null)          - 'ret void' instruction
+  // ReturnInst(Value* X)          - 'ret X'    instruction
+  // ReturnInst(    null, Inst *I) - 'ret void' instruction, insert before I
+  // ReturnInst(Value* X, Inst *I) - 'ret X'    instruction, insert before I
+  // ReturnInst(    null, BB *B)   - 'ret void' instruction, insert @ end of B
+  // ReturnInst(Value* X, BB *B)   - 'ret X'    instruction, insert @ end of B
+  //
+  // NOTE: If the Value* passed is of type void then the constructor behaves as
+  // if it was passed NULL.
+  explicit ReturnInst(LLVMContext &C, Value *retVal = 0,
+                      Instruction *InsertBefore = 0);
+  ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd);
+  explicit ReturnInst(LLVMContext &C, BasicBlock *InsertAtEnd);
+protected:
+  virtual ReturnInst *clone_impl() const;
+public:
+  static ReturnInst* Create(LLVMContext &C, Value *retVal = 0,
+                            Instruction *InsertBefore = 0) {
+    return new(!!retVal) ReturnInst(C, retVal, InsertBefore);
+  }
+  static ReturnInst* Create(LLVMContext &C, Value *retVal,
+                            BasicBlock *InsertAtEnd) {
+    return new(!!retVal) ReturnInst(C, retVal, InsertAtEnd);
+  }
+  static ReturnInst* Create(LLVMContext &C, BasicBlock *InsertAtEnd) {
+    return new(0) ReturnInst(C, InsertAtEnd);
+  }
+  virtual ~ReturnInst();
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// Convenience accessor. Returns null if there is no return value.
+  Value *getReturnValue() const {
+    return getNumOperands() != 0 ? getOperand(0) : 0;
+  }
+
+  unsigned getNumSuccessors() const { return 0; }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const ReturnInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return (I->getOpcode() == Instruction::Ret);
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+ private:
+  virtual BasicBlock *getSuccessorV(unsigned idx) const;
+  virtual unsigned getNumSuccessorsV() const;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
+};
+
+template <>
+struct OperandTraits<ReturnInst> : public VariadicOperandTraits<ReturnInst> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ReturnInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                               BranchInst Class
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------------
+/// BranchInst - Conditional or Unconditional Branch instruction.
+///
+class BranchInst : public TerminatorInst {
+  /// Ops list - Branches are strange.  The operands are ordered:
+  ///  [Cond, FalseDest,] TrueDest.  This makes some accessors faster because
+  /// they don't have to check for cond/uncond branchness. These are mostly
+  /// accessed relative from op_end().
+  BranchInst(const BranchInst &BI);
+  void AssertOK();
+  // BranchInst constructors (where {B, T, F} are blocks, and C is a condition):
+  // BranchInst(BB *B)                           - 'br B'
+  // BranchInst(BB* T, BB *F, Value *C)          - 'br C, T, F'
+  // BranchInst(BB* B, Inst *I)                  - 'br B'        insert before I
+  // BranchInst(BB* T, BB *F, Value *C, Inst *I) - 'br C, T, F', insert before I
+  // BranchInst(BB* B, BB *I)                    - 'br B'        insert at end
+  // BranchInst(BB* T, BB *F, Value *C, BB *I)   - 'br C, T, F', insert at end
+  explicit BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore = 0);
+  BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
+             Instruction *InsertBefore = 0);
+  BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd);
+  BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
+             BasicBlock *InsertAtEnd);
+protected:
+  virtual BranchInst *clone_impl() const;
+public:
+  static BranchInst *Create(BasicBlock *IfTrue, Instruction *InsertBefore = 0) {
+    return new(1) BranchInst(IfTrue, InsertBefore);
+  }
+  static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *IfFalse,
+                            Value *Cond, Instruction *InsertBefore = 0) {
+    return new(3) BranchInst(IfTrue, IfFalse, Cond, InsertBefore);
+  }
+  static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *InsertAtEnd) {
+    return new(1) BranchInst(IfTrue, InsertAtEnd);
+  }
+  static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *IfFalse,
+                            Value *Cond, BasicBlock *InsertAtEnd) {
+    return new(3) BranchInst(IfTrue, IfFalse, Cond, InsertAtEnd);
+  }
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  bool isUnconditional() const { return getNumOperands() == 1; }
+  bool isConditional()   const { return getNumOperands() == 3; }
+
+  Value *getCondition() const {
+    assert(isConditional() && "Cannot get condition of an uncond branch!");
+    return Op<-3>();
+  }
+
+  void setCondition(Value *V) {
+    assert(isConditional() && "Cannot set condition of unconditional branch!");
+    Op<-3>() = V;
+  }
+
+  unsigned getNumSuccessors() const { return 1+isConditional(); }
+
+  BasicBlock *getSuccessor(unsigned i) const {
+    assert(i < getNumSuccessors() && "Successor # out of range for Branch!");
+    return cast_or_null<BasicBlock>((&Op<-1>() - i)->get());
+  }
+
+  void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
+    assert(idx < getNumSuccessors() && "Successor # out of range for Branch!");
+    *(&Op<-1>() - idx) = (Value*)NewSucc;
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const BranchInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return (I->getOpcode() == Instruction::Br);
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  virtual BasicBlock *getSuccessorV(unsigned idx) const;
+  virtual unsigned getNumSuccessorsV() const;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
+};
+
+template <>
+struct OperandTraits<BranchInst> : public VariadicOperandTraits<BranchInst, 1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                               SwitchInst Class
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------------
+/// SwitchInst - Multiway switch
+///
+class SwitchInst : public TerminatorInst {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+  unsigned ReservedSpace;
+  // Operand[0]    = Value to switch on
+  // Operand[1]    = Default basic block destination
+  // Operand[2n  ] = Value to match
+  // Operand[2n+1] = BasicBlock to go to on match
+  SwitchInst(const SwitchInst &SI);
+  void init(Value *Value, BasicBlock *Default, unsigned NumReserved);
+  void resizeOperands(unsigned No);
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+  /// SwitchInst ctor - Create a new switch instruction, specifying a value to
+  /// switch on and a default destination.  The number of additional cases can
+  /// be specified here to make memory allocation more efficient.  This
+  /// constructor can also autoinsert before another instruction.
+  SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
+             Instruction *InsertBefore);
+
+  /// SwitchInst ctor - Create a new switch instruction, specifying a value to
+  /// switch on and a default destination.  The number of additional cases can
+  /// be specified here to make memory allocation more efficient.  This
+  /// constructor also autoinserts at the end of the specified BasicBlock.
+  SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
+             BasicBlock *InsertAtEnd);
+protected:
+  virtual SwitchInst *clone_impl() const;
+public:
+  static SwitchInst *Create(Value *Value, BasicBlock *Default,
+                            unsigned NumCases, Instruction *InsertBefore = 0) {
+    return new SwitchInst(Value, Default, NumCases, InsertBefore);
+  }
+  static SwitchInst *Create(Value *Value, BasicBlock *Default,
+                            unsigned NumCases, BasicBlock *InsertAtEnd) {
+    return new SwitchInst(Value, Default, NumCases, InsertAtEnd);
+  }
+  ~SwitchInst();
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  // Accessor Methods for Switch stmt
+  Value *getCondition() const { return getOperand(0); }
+  void setCondition(Value *V) { setOperand(0, V); }
+
+  BasicBlock *getDefaultDest() const {
+    return cast<BasicBlock>(getOperand(1));
+  }
+
+  /// getNumCases - return the number of 'cases' in this switch instruction.
+  /// Note that case #0 is always the default case.
+  unsigned getNumCases() const {
+    return getNumOperands()/2;
+  }
+
+  /// getCaseValue - Return the specified case value.  Note that case #0, the
+  /// default destination, does not have a case value.
+  ConstantInt *getCaseValue(unsigned i) {
+    assert(i && i < getNumCases() && "Illegal case value to get!");
+    return getSuccessorValue(i);
+  }
+
+  /// getCaseValue - Return the specified case value.  Note that case #0, the
+  /// default destination, does not have a case value.
+  const ConstantInt *getCaseValue(unsigned i) const {
+    assert(i && i < getNumCases() && "Illegal case value to get!");
+    return getSuccessorValue(i);
+  }
+
+  /// findCaseValue - Search all of the case values for the specified constant.
+  /// If it is explicitly handled, return the case number of it, otherwise
+  /// return 0 to indicate that it is handled by the default handler.
+  unsigned findCaseValue(const ConstantInt *C) const {
+    for (unsigned i = 1, e = getNumCases(); i != e; ++i)
+      if (getCaseValue(i) == C)
+        return i;
+    return 0;
+  }
+
+  /// findCaseDest - Finds the unique case value for a given successor. Returns
+  /// null if the successor is not found, not unique, or is the default case.
+  ConstantInt *findCaseDest(BasicBlock *BB) {
+    if (BB == getDefaultDest()) return NULL;
+
+    ConstantInt *CI = NULL;
+    for (unsigned i = 1, e = getNumCases(); i != e; ++i) {
+      if (getSuccessor(i) == BB) {
+        if (CI) return NULL;   // Multiple cases lead to BB.
+        else CI = getCaseValue(i);
+      }
+    }
+    return CI;
+  }
+
+  /// addCase - Add an entry to the switch instruction...
+  ///
+  void addCase(ConstantInt *OnVal, BasicBlock *Dest);
+
+  /// removeCase - This method removes the specified successor from the switch
+  /// instruction.  Note that this cannot be used to remove the default
+  /// destination (successor #0). Also note that this operation may reorder the
+  /// remaining cases at index idx and above.
+  ///
+  void removeCase(unsigned idx);
+
+  unsigned getNumSuccessors() const { return getNumOperands()/2; }
+  BasicBlock *getSuccessor(unsigned idx) const {
+    assert(idx < getNumSuccessors() &&"Successor idx out of range for switch!");
+    return cast<BasicBlock>(getOperand(idx*2+1));
+  }
+  void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
+    assert(idx < getNumSuccessors() && "Successor # out of range for switch!");
+    setOperand(idx*2+1, (Value*)NewSucc);
+  }
+
+  // getSuccessorValue - Return the value associated with the specified
+  // successor.
+  ConstantInt *getSuccessorValue(unsigned idx) const {
+    assert(idx < getNumSuccessors() && "Successor # out of range!");
+    return reinterpret_cast<ConstantInt*>(getOperand(idx*2));
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const SwitchInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Switch;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  virtual BasicBlock *getSuccessorV(unsigned idx) const;
+  virtual unsigned getNumSuccessorsV() const;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
+};
+
+template <>
+struct OperandTraits<SwitchInst> : public HungoffOperandTraits<2> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SwitchInst, Value)
+
+
+//===----------------------------------------------------------------------===//
+//                             IndirectBrInst Class
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------------
+/// IndirectBrInst - Indirect Branch Instruction.
+///
+class IndirectBrInst : public TerminatorInst {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+  unsigned ReservedSpace;
+  // Operand[0]    = Value to switch on
+  // Operand[1]    = Default basic block destination
+  // Operand[2n  ] = Value to match
+  // Operand[2n+1] = BasicBlock to go to on match
+  IndirectBrInst(const IndirectBrInst &IBI);
+  void init(Value *Address, unsigned NumDests);
+  void resizeOperands(unsigned No);
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+  /// IndirectBrInst ctor - Create a new indirectbr instruction, specifying an
+  /// Address to jump to.  The number of expected destinations can be specified
+  /// here to make memory allocation more efficient.  This constructor can also
+  /// autoinsert before another instruction.
+  IndirectBrInst(Value *Address, unsigned NumDests, Instruction *InsertBefore);
+
+  /// IndirectBrInst ctor - Create a new indirectbr instruction, specifying an
+  /// Address to jump to.  The number of expected destinations can be specified
+  /// here to make memory allocation more efficient.  This constructor also
+  /// autoinserts at the end of the specified BasicBlock.
+  IndirectBrInst(Value *Address, unsigned NumDests, BasicBlock *InsertAtEnd);
+protected:
+  virtual IndirectBrInst *clone_impl() const;
+public:
+  static IndirectBrInst *Create(Value *Address, unsigned NumDests,
+                                Instruction *InsertBefore = 0) {
+    return new IndirectBrInst(Address, NumDests, InsertBefore);
+  }
+  static IndirectBrInst *Create(Value *Address, unsigned NumDests,
+                                BasicBlock *InsertAtEnd) {
+    return new IndirectBrInst(Address, NumDests, InsertAtEnd);
+  }
+  ~IndirectBrInst();
+
+  /// Provide fast operand accessors.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  // Accessor Methods for IndirectBrInst instruction.
+  Value *getAddress() { return getOperand(0); }
+  const Value *getAddress() const { return getOperand(0); }
+  void setAddress(Value *V) { setOperand(0, V); }
+
+
+  /// getNumDestinations - return the number of possible destinations in this
+  /// indirectbr instruction.
+  unsigned getNumDestinations() const { return getNumOperands()-1; }
+
+  /// getDestination - Return the specified destination.
+  BasicBlock *getDestination(unsigned i) { return getSuccessor(i); }
+  const BasicBlock *getDestination(unsigned i) const { return getSuccessor(i); }
+
+  /// addDestination - Add a destination.
+  ///
+  void addDestination(BasicBlock *Dest);
+
+  /// removeDestination - This method removes the specified successor from the
+  /// indirectbr instruction.
+  void removeDestination(unsigned i);
+
+  unsigned getNumSuccessors() const { return getNumOperands()-1; }
+  BasicBlock *getSuccessor(unsigned i) const {
+    return cast<BasicBlock>(getOperand(i+1));
+  }
+  void setSuccessor(unsigned i, BasicBlock *NewSucc) {
+    setOperand(i+1, (Value*)NewSucc);
+  }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const IndirectBrInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::IndirectBr;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  virtual BasicBlock *getSuccessorV(unsigned idx) const;
+  virtual unsigned getNumSuccessorsV() const;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
+};
+
+template <>
+struct OperandTraits<IndirectBrInst> : public HungoffOperandTraits<1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value)
+
+
+//===----------------------------------------------------------------------===//
+//                               InvokeInst Class
+//===----------------------------------------------------------------------===//
+
+/// InvokeInst - Invoke instruction.  The SubclassData field is used to hold the
+/// calling convention of the call.
+///
+class InvokeInst : public TerminatorInst {
+  AttrListPtr AttributeList;
+  InvokeInst(const InvokeInst &BI);
+  void init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
+            Value* const *Args, unsigned NumArgs);
+
+  template<typename RandomAccessIterator>
+  void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
+            RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
+            const Twine &NameStr,
+            // This argument ensures that we have an iterator we can
+            // do arithmetic on in constant time
+            std::random_access_iterator_tag) {
+    unsigned NumArgs = (unsigned)std::distance(ArgBegin, ArgEnd);
+
+    // This requires that the iterator points to contiguous memory.
+    init(Func, IfNormal, IfException, NumArgs ? &*ArgBegin : 0, NumArgs);
+    setName(NameStr);
+  }
+
+  /// Construct an InvokeInst given a range of arguments.
+  /// RandomAccessIterator must be a random-access iterator pointing to
+  /// contiguous storage (e.g. a std::vector<>::iterator).  Checks are
+  /// made for random-accessness but not for contiguous storage as
+  /// that would incur runtime overhead.
+  ///
+  /// @brief Construct an InvokeInst from a range of arguments
+  template<typename RandomAccessIterator>
+  inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
+                    RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
+                    unsigned Values,
+                    const Twine &NameStr, Instruction *InsertBefore);
+
+  /// Construct an InvokeInst given a range of arguments.
+  /// RandomAccessIterator must be a random-access iterator pointing to
+  /// contiguous storage (e.g. a std::vector<>::iterator).  Checks are
+  /// made for random-accessness but not for contiguous storage as
+  /// that would incur runtime overhead.
+  ///
+  /// @brief Construct an InvokeInst from a range of arguments
+  template<typename RandomAccessIterator>
+  inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
+                    RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
+                    unsigned Values,
+                    const Twine &NameStr, BasicBlock *InsertAtEnd);
+protected:
+  virtual InvokeInst *clone_impl() const;
+public:
+  template<typename RandomAccessIterator>
+  static InvokeInst *Create(Value *Func,
+                            BasicBlock *IfNormal, BasicBlock *IfException,
+                            RandomAccessIterator ArgBegin,
+                            RandomAccessIterator ArgEnd,
+                            const Twine &NameStr = "",
+                            Instruction *InsertBefore = 0) {
+    unsigned Values(ArgEnd - ArgBegin + 3);
+    return new(Values) InvokeInst(Func, IfNormal, IfException, ArgBegin, ArgEnd,
+                                  Values, NameStr, InsertBefore);
+  }
+  template<typename RandomAccessIterator>
+  static InvokeInst *Create(Value *Func,
+                            BasicBlock *IfNormal, BasicBlock *IfException,
+                            RandomAccessIterator ArgBegin,
+                            RandomAccessIterator ArgEnd,
+                            const Twine &NameStr,
+                            BasicBlock *InsertAtEnd) {
+    unsigned Values(ArgEnd - ArgBegin + 3);
+    return new(Values) InvokeInst(Func, IfNormal, IfException, ArgBegin, ArgEnd,
+                                  Values, NameStr, InsertAtEnd);
+  }
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+
+  /// getNumArgOperands - Return the number of invoke arguments.
+  ///
+  unsigned getNumArgOperands() const { return getNumOperands() - 3; }
+
+  /// getArgOperand/setArgOperand - Return/set the i-th invoke argument.
+  ///
+  Value *getArgOperand(unsigned i) const { return getOperand(i); }
+  void setArgOperand(unsigned i, Value *v) { setOperand(i, v); }
+
+  /// getCallingConv/setCallingConv - Get or set the calling convention of this
+  /// function call.
+  CallingConv::ID getCallingConv() const {
+    return static_cast<CallingConv::ID>(getSubclassDataFromInstruction());
+  }
+  void setCallingConv(CallingConv::ID CC) {
+    setInstructionSubclassData(static_cast<unsigned>(CC));
+  }
+
+  /// getAttributes - Return the parameter attributes for this invoke.
+  ///
+  const AttrListPtr &getAttributes() const { return AttributeList; }
+
+  /// setAttributes - Set the parameter attributes for this invoke.
+  ///
+  void setAttributes(const AttrListPtr &Attrs) { AttributeList = Attrs; }
+
+  /// addAttribute - adds the attribute to the list of attributes.
+  void addAttribute(unsigned i, Attributes attr);
+
+  /// removeAttribute - removes the attribute from the list of attributes.
+  void removeAttribute(unsigned i, Attributes attr);
+
+  /// @brief Determine whether the call or the callee has the given attribute.
+  bool paramHasAttr(unsigned i, Attributes attr) const;
+
+  /// @brief Extract the alignment for a call or parameter (0=unknown).
+  unsigned getParamAlignment(unsigned i) const {
+    return AttributeList.getParamAlignment(i);
+  }
+
+  /// @brief Return true if the call should not be inlined.
+  bool isNoInline() const { return paramHasAttr(~0, Attribute::NoInline); }
+  void setIsNoInline(bool Value = true) {
+    if (Value) addAttribute(~0, Attribute::NoInline);
+    else removeAttribute(~0, Attribute::NoInline);
+  }
+
+  /// @brief Determine if the call does not access memory.
+  bool doesNotAccessMemory() const {
+    return paramHasAttr(~0, Attribute::ReadNone);
+  }
+  void setDoesNotAccessMemory(bool NotAccessMemory = true) {
+    if (NotAccessMemory) addAttribute(~0, Attribute::ReadNone);
+    else removeAttribute(~0, Attribute::ReadNone);
+  }
+
+  /// @brief Determine if the call does not access or only reads memory.
+  bool onlyReadsMemory() const {
+    return doesNotAccessMemory() || paramHasAttr(~0, Attribute::ReadOnly);
+  }
+  void setOnlyReadsMemory(bool OnlyReadsMemory = true) {
+    if (OnlyReadsMemory) addAttribute(~0, Attribute::ReadOnly);
+    else removeAttribute(~0, Attribute::ReadOnly | Attribute::ReadNone);
+  }
+
+  /// @brief Determine if the call cannot return.
+  bool doesNotReturn() const { return paramHasAttr(~0, Attribute::NoReturn); }
+  void setDoesNotReturn(bool DoesNotReturn = true) {
+    if (DoesNotReturn) addAttribute(~0, Attribute::NoReturn);
+    else removeAttribute(~0, Attribute::NoReturn);
+  }
+
+  /// @brief Determine if the call cannot unwind.
+  bool doesNotThrow() const { return paramHasAttr(~0, Attribute::NoUnwind); }
+  void setDoesNotThrow(bool DoesNotThrow = true) {
+    if (DoesNotThrow) addAttribute(~0, Attribute::NoUnwind);
+    else removeAttribute(~0, Attribute::NoUnwind);
+  }
+
+  /// @brief Determine if the call returns a structure through first
+  /// pointer argument.
+  bool hasStructRetAttr() const {
+    // Be friendly and also check the callee.
+    return paramHasAttr(1, Attribute::StructRet);
+  }
+
+  /// @brief Determine if any call argument is an aggregate passed by value.
+  bool hasByValArgument() const {
+    return AttributeList.hasAttrSomewhere(Attribute::ByVal);
+  }
+
+  /// getCalledFunction - Return the function called, or null if this is an
+  /// indirect function invocation.
+  ///
+  Function *getCalledFunction() const {
+    return dyn_cast<Function>(Op<-3>());
+  }
+
+  /// getCalledValue - Get a pointer to the function that is invoked by this
+  /// instruction
+  const Value *getCalledValue() const { return Op<-3>(); }
+        Value *getCalledValue()       { return Op<-3>(); }
+
+  /// setCalledFunction - Set the function called.
+  void setCalledFunction(Value* Fn) {
+    Op<-3>() = Fn;
+  }
+
+  // get*Dest - Return the destination basic blocks...
+  BasicBlock *getNormalDest() const {
+    return cast<BasicBlock>(Op<-2>());
+  }
+  BasicBlock *getUnwindDest() const {
+    return cast<BasicBlock>(Op<-1>());
+  }
+  void setNormalDest(BasicBlock *B) {
+    Op<-2>() = reinterpret_cast<Value*>(B);
+  }
+  void setUnwindDest(BasicBlock *B) {
+    Op<-1>() = reinterpret_cast<Value*>(B);
+  }
+
+  BasicBlock *getSuccessor(unsigned i) const {
+    assert(i < 2 && "Successor # out of range for invoke!");
+    return i == 0 ? getNormalDest() : getUnwindDest();
+  }
+
+  void setSuccessor(unsigned idx, BasicBlock *NewSucc) {
+    assert(idx < 2 && "Successor # out of range for invoke!");
+    *(&Op<-2>() + idx) = reinterpret_cast<Value*>(NewSucc);
+  }
+
+  unsigned getNumSuccessors() const { return 2; }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const InvokeInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return (I->getOpcode() == Instruction::Invoke);
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+
+private:
+  virtual BasicBlock *getSuccessorV(unsigned idx) const;
+  virtual unsigned getNumSuccessorsV() const;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
+
+  // Shadow Instruction::setInstructionSubclassData with a private forwarding
+  // method so that subclasses cannot accidentally use it.
+  void setInstructionSubclassData(unsigned short D) {
+    Instruction::setInstructionSubclassData(D);
+  }
+};
+
+template <>
+struct OperandTraits<InvokeInst> : public VariadicOperandTraits<InvokeInst, 3> {
+};
+
+template<typename RandomAccessIterator>
+InvokeInst::InvokeInst(Value *Func,
+                       BasicBlock *IfNormal, BasicBlock *IfException,
+                       RandomAccessIterator ArgBegin,
+                       RandomAccessIterator ArgEnd,
+                       unsigned Values,
+                       const Twine &NameStr, Instruction *InsertBefore)
+  : TerminatorInst(cast<FunctionType>(cast<PointerType>(Func->getType())
+                                      ->getElementType())->getReturnType(),
+                   Instruction::Invoke,
+                   OperandTraits<InvokeInst>::op_end(this) - Values,
+                   Values, InsertBefore) {
+  init(Func, IfNormal, IfException, ArgBegin, ArgEnd, NameStr,
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
+}
+template<typename RandomAccessIterator>
+InvokeInst::InvokeInst(Value *Func,
+                       BasicBlock *IfNormal, BasicBlock *IfException,
+                       RandomAccessIterator ArgBegin,
+                       RandomAccessIterator ArgEnd,
+                       unsigned Values,
+                       const Twine &NameStr, BasicBlock *InsertAtEnd)
+  : TerminatorInst(cast<FunctionType>(cast<PointerType>(Func->getType())
+                                      ->getElementType())->getReturnType(),
+                   Instruction::Invoke,
+                   OperandTraits<InvokeInst>::op_end(this) - Values,
+                   Values, InsertAtEnd) {
+  init(Func, IfNormal, IfException, ArgBegin, ArgEnd, NameStr,
+       typename std::iterator_traits<RandomAccessIterator>
+       ::iterator_category());
+}
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InvokeInst, Value)
+
+//===----------------------------------------------------------------------===//
+//                              UnwindInst Class
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------------
+/// UnwindInst - Immediately exit the current function, unwinding the stack
+/// until an invoke instruction is found.
+///
+class UnwindInst : public TerminatorInst {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+protected:
+  virtual UnwindInst *clone_impl() const;
+public:
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+  explicit UnwindInst(LLVMContext &C, Instruction *InsertBefore = 0);
+  explicit UnwindInst(LLVMContext &C, BasicBlock *InsertAtEnd);
+
+  unsigned getNumSuccessors() const { return 0; }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const UnwindInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Unwind;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  virtual BasicBlock *getSuccessorV(unsigned idx) const;
+  virtual unsigned getNumSuccessorsV() const;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
+};
+
+//===----------------------------------------------------------------------===//
+//                           UnreachableInst Class
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------------
+/// UnreachableInst - This function has undefined behavior.  In particular, the
+/// presence of this instruction indicates some higher level knowledge that the
+/// end of the block cannot be reached.
+///
+class UnreachableInst : public TerminatorInst {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+protected:
+  virtual UnreachableInst *clone_impl() const;
+
+public:
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+  explicit UnreachableInst(LLVMContext &C, Instruction *InsertBefore = 0);
+  explicit UnreachableInst(LLVMContext &C, BasicBlock *InsertAtEnd);
+
+  unsigned getNumSuccessors() const { return 0; }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const UnreachableInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Unreachable;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+private:
+  virtual BasicBlock *getSuccessorV(unsigned idx) const;
+  virtual unsigned getNumSuccessorsV() const;
+  virtual void setSuccessorV(unsigned idx, BasicBlock *B);
+};
+
+//===----------------------------------------------------------------------===//
+//                                 TruncInst Class
+//===----------------------------------------------------------------------===//
+
+/// @brief This class represents a truncation of integer types.
+class TruncInst : public CastInst {
+protected:
+  /// @brief Clone an identical TruncInst
+  virtual TruncInst *clone_impl() const;
+
+public:
+  /// @brief Constructor with insert-before-instruction semantics
+  TruncInst(
+    Value *S,                     ///< The value to be truncated
+    const Type *Ty,               ///< The (smaller) type to truncate to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// @brief Constructor with insert-at-end-of-block semantics
+  TruncInst(
+    Value *S,                     ///< The value to be truncated
+    const Type *Ty,               ///< The (smaller) type to truncate to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const TruncInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Trunc;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 ZExtInst Class
+//===----------------------------------------------------------------------===//
+
+/// @brief This class represents zero extension of integer types.
+class ZExtInst : public CastInst {
+protected:
+  /// @brief Clone an identical ZExtInst
+  virtual ZExtInst *clone_impl() const;
+
+public:
+  /// @brief Constructor with insert-before-instruction semantics
+  ZExtInst(
+    Value *S,                     ///< The value to be zero extended
+    const Type *Ty,               ///< The type to zero extend to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// @brief Constructor with insert-at-end semantics.
+  ZExtInst(
+    Value *S,                     ///< The value to be zero extended
+    const Type *Ty,               ///< The type to zero extend to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const ZExtInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == ZExt;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 SExtInst Class
+//===----------------------------------------------------------------------===//
+
+/// @brief This class represents a sign extension of integer types.
+class SExtInst : public CastInst {
+protected:
+  /// @brief Clone an identical SExtInst
+  virtual SExtInst *clone_impl() const;
+
+public:
+  /// @brief Constructor with insert-before-instruction semantics
+  SExtInst(
+    Value *S,                     ///< The value to be sign extended
+    const Type *Ty,               ///< The type to sign extend to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// @brief Constructor with insert-at-end-of-block semantics
+  SExtInst(
+    Value *S,                     ///< The value to be sign extended
+    const Type *Ty,               ///< The type to sign extend to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const SExtInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == SExt;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 FPTruncInst Class
+//===----------------------------------------------------------------------===//
+
+/// @brief This class represents a truncation of floating point types.
+class FPTruncInst : public CastInst {
+protected:
+  /// @brief Clone an identical FPTruncInst
+  virtual FPTruncInst *clone_impl() const;
+
+public:
+  /// @brief Constructor with insert-before-instruction semantics
+  FPTruncInst(
+    Value *S,                     ///< The value to be truncated
+    const Type *Ty,               ///< The type to truncate to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// @brief Constructor with insert-before-instruction semantics
+  FPTruncInst(
+    Value *S,                     ///< The value to be truncated
+    const Type *Ty,               ///< The type to truncate to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const FPTruncInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == FPTrunc;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 FPExtInst Class
+//===----------------------------------------------------------------------===//
+
+/// @brief This class represents an extension of floating point types.
+class FPExtInst : public CastInst {
+protected:
+  /// @brief Clone an identical FPExtInst
+  virtual FPExtInst *clone_impl() const;
+
+public:
+  /// @brief Constructor with insert-before-instruction semantics
+  FPExtInst(
+    Value *S,                     ///< The value to be extended
+    const Type *Ty,               ///< The type to extend to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// @brief Constructor with insert-at-end-of-block semantics
+  FPExtInst(
+    Value *S,                     ///< The value to be extended
+    const Type *Ty,               ///< The type to extend to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const FPExtInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == FPExt;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 UIToFPInst Class
+//===----------------------------------------------------------------------===//
+
+/// @brief This class represents a cast unsigned integer to floating point.
+class UIToFPInst : public CastInst {
+protected:
+  /// @brief Clone an identical UIToFPInst
+  virtual UIToFPInst *clone_impl() const;
+
+public:
+  /// @brief Constructor with insert-before-instruction semantics
+  UIToFPInst(
+    Value *S,                     ///< The value to be converted
+    const Type *Ty,               ///< The type to convert to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// @brief Constructor with insert-at-end-of-block semantics
+  UIToFPInst(
+    Value *S,                     ///< The value to be converted
+    const Type *Ty,               ///< The type to convert to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const UIToFPInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == UIToFP;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 SIToFPInst Class
+//===----------------------------------------------------------------------===//
+
+/// @brief This class represents a cast from signed integer to floating point.
+class SIToFPInst : public CastInst {
+protected:
+  /// @brief Clone an identical SIToFPInst
+  virtual SIToFPInst *clone_impl() const;
+
+public:
+  /// @brief Constructor with insert-before-instruction semantics
+  SIToFPInst(
+    Value *S,                     ///< The value to be converted
+    const Type *Ty,               ///< The type to convert to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// @brief Constructor with insert-at-end-of-block semantics
+  SIToFPInst(
+    Value *S,                     ///< The value to be converted
+    const Type *Ty,               ///< The type to convert to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const SIToFPInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == SIToFP;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 FPToUIInst Class
+//===----------------------------------------------------------------------===//
+
+/// @brief This class represents a cast from floating point to unsigned integer
+class FPToUIInst  : public CastInst {
+protected:
+  /// @brief Clone an identical FPToUIInst
+  virtual FPToUIInst *clone_impl() const;
+
+public:
+  /// @brief Constructor with insert-before-instruction semantics
+  FPToUIInst(
+    Value *S,                     ///< The value to be converted
+    const Type *Ty,               ///< The type to convert to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// @brief Constructor with insert-at-end-of-block semantics
+  FPToUIInst(
+    Value *S,                     ///< The value to be converted
+    const Type *Ty,               ///< The type to convert to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< Where to insert the new instruction
+  );
+
+  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const FPToUIInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == FPToUI;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 FPToSIInst Class
+//===----------------------------------------------------------------------===//
+
+/// @brief This class represents a cast from floating point to signed integer.
+class FPToSIInst  : public CastInst {
+protected:
+  /// @brief Clone an identical FPToSIInst
+  virtual FPToSIInst *clone_impl() const;
+
+public:
+  /// @brief Constructor with insert-before-instruction semantics
+  FPToSIInst(
+    Value *S,                     ///< The value to be converted
+    const Type *Ty,               ///< The type to convert to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// @brief Constructor with insert-at-end-of-block semantics
+  FPToSIInst(
+    Value *S,                     ///< The value to be converted
+    const Type *Ty,               ///< The type to convert to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// @brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const FPToSIInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == FPToSI;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 IntToPtrInst Class
+//===----------------------------------------------------------------------===//
+
+/// @brief This class represents a cast from an integer to a pointer.
+class IntToPtrInst : public CastInst {
+public:
+  /// @brief Constructor with insert-before-instruction semantics
+  IntToPtrInst(
+    Value *S,                     ///< The value to be converted
+    const Type *Ty,               ///< The type to convert to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// @brief Constructor with insert-at-end-of-block semantics
+  IntToPtrInst(
+    Value *S,                     ///< The value to be converted
+    const Type *Ty,               ///< The type to convert to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  /// @brief Clone an identical IntToPtrInst
+  virtual IntToPtrInst *clone_impl() const;
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const IntToPtrInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == IntToPtr;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                                 PtrToIntInst Class
+//===----------------------------------------------------------------------===//
+
+/// @brief This class represents a cast from a pointer to an integer
+class PtrToIntInst : public CastInst {
+protected:
+  /// @brief Clone an identical PtrToIntInst
+  virtual PtrToIntInst *clone_impl() const;
+
+public:
+  /// @brief Constructor with insert-before-instruction semantics
+  PtrToIntInst(
+    Value *S,                     ///< The value to be converted
+    const Type *Ty,               ///< The type to convert to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// @brief Constructor with insert-at-end-of-block semantics
+  PtrToIntInst(
+    Value *S,                     ///< The value to be converted
+    const Type *Ty,               ///< The type to convert to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const PtrToIntInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == PtrToInt;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                             BitCastInst Class
+//===----------------------------------------------------------------------===//
+
+/// @brief This class represents a no-op cast from one type to another.
+class BitCastInst : public CastInst {
+protected:
+  /// @brief Clone an identical BitCastInst
+  virtual BitCastInst *clone_impl() const;
+
+public:
+  /// @brief Constructor with insert-before-instruction semantics
+  BitCastInst(
+    Value *S,                     ///< The value to be casted
+    const Type *Ty,               ///< The type to casted to
+    const Twine &NameStr = "",    ///< A name for the new instruction
+    Instruction *InsertBefore = 0 ///< Where to insert the new instruction
+  );
+
+  /// @brief Constructor with insert-at-end-of-block semantics
+  BitCastInst(
+    Value *S,                     ///< The value to be casted
+    const Type *Ty,               ///< The type to casted to
+    const Twine &NameStr,         ///< A name for the new instruction
+    BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
+  );
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const BitCastInst *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == BitCast;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) && classof(cast<Instruction>(V));
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/IntrinsicInst.h b/final/include/llvm/IntrinsicInst.h
new file mode 100644
index 00000000000..74c30fbddd7
--- /dev/null
+++ b/final/include/llvm/IntrinsicInst.h
@@ -0,0 +1,306 @@
+//===-- llvm/IntrinsicInst.h - Intrinsic Instruction Wrappers ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines classes that make it really easy to deal with intrinsic
+// functions with the isa/dyncast family of functions.  In particular, this
+// allows you to do things like:
+//
+//     if (MemCpyInst *MCI = dyn_cast<MemCpyInst>(Inst))
+//        ... MCI->getDest() ... MCI->getSource() ...
+//
+// All intrinsic function calls are instances of the call instruction, so these
+// are all subclasses of the CallInst class.  Note that none of these classes
+// has state or virtual methods, which is an important part of this gross/neat
+// hack working.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INTRINSICINST_H
+#define LLVM_INTRINSICINST_H
+
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+
+namespace llvm {
+  /// IntrinsicInst - A useful wrapper class for inspecting calls to intrinsic
+  /// functions.  This allows the standard isa/dyncast/cast functionality to
+  /// work with calls to intrinsic functions.
+  class IntrinsicInst : public CallInst {
+    IntrinsicInst();                      // DO NOT IMPLEMENT
+    IntrinsicInst(const IntrinsicInst&);  // DO NOT IMPLEMENT
+    void operator=(const IntrinsicInst&); // DO NOT IMPLEMENT
+  public:
+    /// getIntrinsicID - Return the intrinsic ID of this intrinsic.
+    ///
+    Intrinsic::ID getIntrinsicID() const {
+      return (Intrinsic::ID)getCalledFunction()->getIntrinsicID();
+    }
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const IntrinsicInst *) { return true; }
+    static inline bool classof(const CallInst *I) {
+      if (const Function *CF = I->getCalledFunction())
+        return CF->getIntrinsicID() != 0;
+      return false;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<CallInst>(V) && classof(cast<CallInst>(V));
+    }
+  };
+  
+  /// DbgInfoIntrinsic - This is the common base class for debug info intrinsics
+  ///
+  class DbgInfoIntrinsic : public IntrinsicInst {
+  public:
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const DbgInfoIntrinsic *) { return true; }
+    static inline bool classof(const IntrinsicInst *I) {
+      switch (I->getIntrinsicID()) {
+      case Intrinsic::dbg_declare:
+      case Intrinsic::dbg_value:
+        return true;
+      default: return false;
+      }
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+
+    static Value *StripCast(Value *C);
+  };
+
+  /// DbgDeclareInst - This represents the llvm.dbg.declare instruction.
+  ///
+  class DbgDeclareInst : public DbgInfoIntrinsic {
+  public:
+    Value *getAddress() const;
+    MDNode *getVariable() const { return cast<MDNode>(getArgOperand(1)); }
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const DbgDeclareInst *) { return true; }
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::dbg_declare;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+  /// DbgValueInst - This represents the llvm.dbg.value instruction.
+  ///
+  class DbgValueInst : public DbgInfoIntrinsic {
+  public:
+    const Value *getValue() const;
+    Value *getValue();
+    uint64_t getOffset() const {
+      return cast<ConstantInt>(
+                          const_cast<Value*>(getArgOperand(1)))->getZExtValue();
+    }
+    MDNode *getVariable() const { return cast<MDNode>(getArgOperand(2)); }
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const DbgValueInst *) { return true; }
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::dbg_value;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+  /// MemIntrinsic - This is the common base class for memset/memcpy/memmove.
+  ///
+  class MemIntrinsic : public IntrinsicInst {
+  public:
+    Value *getRawDest() const { return const_cast<Value*>(getArgOperand(0)); }
+
+    Value *getLength() const { return const_cast<Value*>(getArgOperand(2)); }
+    ConstantInt *getAlignmentCst() const {
+      return cast<ConstantInt>(const_cast<Value*>(getArgOperand(3)));
+    }
+
+    unsigned getAlignment() const {
+      return getAlignmentCst()->getZExtValue();
+    }
+
+    ConstantInt *getVolatileCst() const {
+      return cast<ConstantInt>(const_cast<Value*>(getArgOperand(4)));
+    }
+    bool isVolatile() const {
+      return !getVolatileCst()->isZero();
+    }
+
+    unsigned getAddressSpace() const {
+      return cast<PointerType>(getRawDest()->getType())->getAddressSpace();
+    }
+
+    /// getDest - This is just like getRawDest, but it strips off any cast
+    /// instructions that feed it, giving the original input.  The returned
+    /// value is guaranteed to be a pointer.
+    Value *getDest() const { return getRawDest()->stripPointerCasts(); }
+
+    /// set* - Set the specified arguments of the instruction.
+    ///
+    void setDest(Value *Ptr) {
+      assert(getRawDest()->getType() == Ptr->getType() &&
+             "setDest called with pointer of wrong type!");
+      setArgOperand(0, Ptr);
+    }
+
+    void setLength(Value *L) {
+      assert(getLength()->getType() == L->getType() &&
+             "setLength called with value of wrong type!");
+      setArgOperand(2, L);
+    }
+
+    void setAlignment(Constant* A) {
+      setArgOperand(3, A);
+    }
+
+    void setVolatile(Constant* V) {
+      setArgOperand(4, V);
+    }
+
+    const Type *getAlignmentType() const {
+      return getArgOperand(3)->getType();
+    }
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const MemIntrinsic *) { return true; }
+    static inline bool classof(const IntrinsicInst *I) {
+      switch (I->getIntrinsicID()) {
+      case Intrinsic::memcpy:
+      case Intrinsic::memmove:
+      case Intrinsic::memset:
+        return true;
+      default: return false;
+      }
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+  /// MemSetInst - This class wraps the llvm.memset intrinsic.
+  ///
+  class MemSetInst : public MemIntrinsic {
+  public:
+    /// get* - Return the arguments to the instruction.
+    ///
+    Value *getValue() const { return const_cast<Value*>(getArgOperand(1)); }
+
+    void setValue(Value *Val) {
+      assert(getValue()->getType() == Val->getType() &&
+             "setValue called with value of wrong type!");
+      setArgOperand(1, Val);
+    }
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const MemSetInst *) { return true; }
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::memset;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+  /// MemTransferInst - This class wraps the llvm.memcpy/memmove intrinsics.
+  ///
+  class MemTransferInst : public MemIntrinsic {
+  public:
+    /// get* - Return the arguments to the instruction.
+    ///
+    Value *getRawSource() const { return const_cast<Value*>(getArgOperand(1)); }
+
+    /// getSource - This is just like getRawSource, but it strips off any cast
+    /// instructions that feed it, giving the original input.  The returned
+    /// value is guaranteed to be a pointer.
+    Value *getSource() const { return getRawSource()->stripPointerCasts(); }
+
+    void setSource(Value *Ptr) {
+      assert(getRawSource()->getType() == Ptr->getType() &&
+             "setSource called with pointer of wrong type!");
+      setArgOperand(1, Ptr);
+    }
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const MemTransferInst *) { return true; }
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::memcpy ||
+             I->getIntrinsicID() == Intrinsic::memmove;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+
+  /// MemCpyInst - This class wraps the llvm.memcpy intrinsic.
+  ///
+  class MemCpyInst : public MemTransferInst {
+  public:
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const MemCpyInst *) { return true; }
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::memcpy;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+  /// MemMoveInst - This class wraps the llvm.memmove intrinsic.
+  ///
+  class MemMoveInst : public MemTransferInst {
+  public:
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const MemMoveInst *) { return true; }
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::memmove;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+  /// EHExceptionInst - This represents the llvm.eh.exception instruction.
+  ///
+  class EHExceptionInst : public IntrinsicInst {
+  public:
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const EHExceptionInst *) { return true; }
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::eh_exception;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+  /// EHSelectorInst - This represents the llvm.eh.selector instruction.
+  ///
+  class EHSelectorInst : public IntrinsicInst {
+  public:
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const EHSelectorInst *) { return true; }
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::eh_selector;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
+}
+
+#endif
diff --git a/final/include/llvm/Intrinsics.h b/final/include/llvm/Intrinsics.h
new file mode 100644
index 00000000000..5cfe55181f5
--- /dev/null
+++ b/final/include/llvm/Intrinsics.h
@@ -0,0 +1,80 @@
+//===-- llvm/Instrinsics.h - LLVM Intrinsic Function Handling ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a set of enums which allow processing of intrinsic
+// functions.  Values of these enum types are returned by
+// Function::getIntrinsicID.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INTRINSICS_H
+#define LLVM_INTRINSICS_H
+
+#include <string>
+
+namespace llvm {
+
+class Type;
+class FunctionType;
+class Function;
+class LLVMContext;
+class Module;
+class AttrListPtr;
+
+/// Intrinsic Namespace - This namespace contains an enum with a value for
+/// every intrinsic/builtin function known by LLVM.  These enum values are
+/// returned by Function::getIntrinsicID().
+///
+namespace Intrinsic {
+  enum ID {
+    not_intrinsic = 0,   // Must be zero
+
+    // Get the intrinsic enums generated from Intrinsics.td
+#define GET_INTRINSIC_ENUM_VALUES
+#include "llvm/Intrinsics.gen"    
+#undef GET_INTRINSIC_ENUM_VALUES
+    , num_intrinsics
+  };
+  
+  /// Intrinsic::getName(ID) - Return the LLVM name for an intrinsic, such as
+  /// "llvm.ppc.altivec.lvx".
+  std::string getName(ID id, const Type **Tys = 0, unsigned numTys = 0);
+  
+  /// Intrinsic::getType(ID) - Return the function type for an intrinsic.
+  ///
+  const FunctionType *getType(LLVMContext &Context, ID id,
+                              const Type **Tys = 0, unsigned numTys = 0);
+
+  /// Intrinsic::isOverloaded(ID) - Returns true if the intrinsic can be
+  /// overloaded.
+  bool isOverloaded(ID id);
+
+  /// Intrinsic::getAttributes(ID) - Return the attributes for an intrinsic.
+  ///
+  AttrListPtr getAttributes(ID id);
+
+  /// Intrinsic::getDeclaration(M, ID) - Create or insert an LLVM Function
+  /// declaration for an intrinsic, and return it.
+  ///
+  /// The Tys and numTys parameters are for intrinsics with overloaded types
+  /// (e.g., those using iAny, fAny, vAny, or iPTRAny). For a declaration for an
+  /// overloaded intrinsic, Tys should point to an array of numTys pointers to
+  /// Type, and must provide exactly one type for each overloaded type in the
+  /// intrinsic.
+  Function *getDeclaration(Module *M, ID id, const Type **Tys = 0, 
+                           unsigned numTys = 0);
+                           
+  /// Map a GCC builtin name to an intrinsic ID.
+  ID getIntrinsicForGCCBuiltin(const char *Prefix, const char *BuiltinName);
+  
+} // End Intrinsic namespace
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Intrinsics.td b/final/include/llvm/Intrinsics.td
new file mode 100644
index 00000000000..ba66555c4e0
--- /dev/null
+++ b/final/include/llvm/Intrinsics.td
@@ -0,0 +1,493 @@
+//===- Intrinsics.td - Defines all LLVM intrinsics ---------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines properties of all LLVM intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/CodeGen/ValueTypes.td"
+
+//===----------------------------------------------------------------------===//
+//  Properties we keep track of for intrinsics.
+//===----------------------------------------------------------------------===//
+
+class IntrinsicProperty;
+
+// Intr*Mem - Memory properties.  An intrinsic is allowed to have at most one of
+// these properties set.  They are listed from the most aggressive (best to use
+// if correct) to the least aggressive.  If no property is set, the worst case
+// is assumed (it may read and write any memory it can get access to and it may
+// have other side effects).
+
+// IntrNoMem - The intrinsic does not access memory or have any other side
+// effects.  It may be CSE'd deleted if dead, etc.
+def IntrNoMem : IntrinsicProperty;
+
+// IntrReadArgMem - This intrinsic reads only from memory that one of its
+// arguments points to, but may read an unspecified amount.
+def IntrReadArgMem : IntrinsicProperty;
+
+// IntrReadMem - This intrinsic reads from unspecified memory, so it cannot be
+// moved across stores.  However, it can be reordered otherwise and can be
+// deleted if dead.
+def IntrReadMem : IntrinsicProperty;
+
+// IntrReadWriteArgMem - This intrinsic reads and writes only from memory that
+// one of its arguments points to, but may access an unspecified amount.  The
+// reads and writes may be volatile, but except for this it has no other side
+// effects.
+def IntrReadWriteArgMem : IntrinsicProperty;
+
+// Commutative - This intrinsic is commutative: X op Y == Y op X.
+def Commutative : IntrinsicProperty;
+
+// NoCapture - The specified argument pointer is not captured by the intrinsic.
+class NoCapture<int argNo> : IntrinsicProperty {
+  int ArgNo = argNo;
+}
+
+//===----------------------------------------------------------------------===//
+// Types used by intrinsics.
+//===----------------------------------------------------------------------===//
+
+class LLVMType<ValueType vt> {
+  ValueType VT = vt;
+}
+
+class LLVMPointerType<LLVMType elty>
+  : LLVMType<iPTR>{
+  LLVMType ElTy = elty;
+}
+
+class LLVMAnyPointerType<LLVMType elty>
+  : LLVMType<iPTRAny>{
+  LLVMType ElTy = elty;
+}
+
+// Match the type of another intrinsic parameter.  Number is an index into the
+// list of overloaded types for the intrinsic, excluding all the fixed types.
+// The Number value must refer to a previously listed type.  For example:
+//   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyfloat_ty, LLVMMatchType<0>]>
+// has two overloaded types, the 2nd and 3rd arguments.  LLVMMatchType<0>
+// refers to the first overloaded type, which is the 2nd argument.
+class LLVMMatchType<int num>
+  : LLVMType<OtherVT>{
+  int Number = num;
+}
+
+// Match the type of another intrinsic parameter that is expected to be
+// an integral vector type, but change the element size to be twice as wide
+// or half as wide as the other type.  This is only useful when the intrinsic
+// is overloaded, so the matched type should be declared as iAny.
+class LLVMExtendedElementVectorType<int num> : LLVMMatchType<num>;
+class LLVMTruncatedElementVectorType<int num> : LLVMMatchType<num>;
+
+def llvm_void_ty       : LLVMType<isVoid>;
+def llvm_anyint_ty     : LLVMType<iAny>;
+def llvm_anyfloat_ty   : LLVMType<fAny>;
+def llvm_anyvector_ty  : LLVMType<vAny>;
+def llvm_i1_ty         : LLVMType<i1>;
+def llvm_i8_ty         : LLVMType<i8>;
+def llvm_i16_ty        : LLVMType<i16>;
+def llvm_i32_ty        : LLVMType<i32>;
+def llvm_i64_ty        : LLVMType<i64>;
+def llvm_float_ty      : LLVMType<f32>;
+def llvm_double_ty     : LLVMType<f64>;
+def llvm_f80_ty        : LLVMType<f80>;
+def llvm_f128_ty       : LLVMType<f128>;
+def llvm_ppcf128_ty    : LLVMType<ppcf128>;
+def llvm_ptr_ty        : LLVMPointerType<llvm_i8_ty>;             // i8*
+def llvm_ptrptr_ty     : LLVMPointerType<llvm_ptr_ty>;            // i8**
+def llvm_anyptr_ty     : LLVMAnyPointerType<llvm_i8_ty>;          // (space)i8*
+def llvm_empty_ty      : LLVMType<OtherVT>;                       // { }
+def llvm_descriptor_ty : LLVMPointerType<llvm_empty_ty>;          // { }*
+def llvm_metadata_ty   : LLVMType<MetadataVT>;                    // !{...}
+
+def llvm_x86mmx_ty     : LLVMType<x86mmx>;
+def llvm_ptrx86mmx_ty  : LLVMPointerType<llvm_x86mmx_ty>;         // <1 x i64>*
+
+def llvm_v2i8_ty       : LLVMType<v2i8>;     //  2 x i8
+def llvm_v4i8_ty       : LLVMType<v4i8>;     //  4 x i8
+def llvm_v8i8_ty       : LLVMType<v8i8>;     //  8 x i8
+def llvm_v16i8_ty      : LLVMType<v16i8>;    // 16 x i8
+def llvm_v32i8_ty      : LLVMType<v32i8>;    // 32 x i8
+def llvm_v2i16_ty      : LLVMType<v2i16>;    //  2 x i16
+def llvm_v4i16_ty      : LLVMType<v4i16>;    //  4 x i16
+def llvm_v8i16_ty      : LLVMType<v8i16>;    //  8 x i16
+def llvm_v16i16_ty     : LLVMType<v16i16>;   // 16 x i16
+def llvm_v2i32_ty      : LLVMType<v2i32>;    //  2 x i32
+def llvm_v4i32_ty      : LLVMType<v4i32>;    //  4 x i32
+def llvm_v8i32_ty      : LLVMType<v8i32>;    //  8 x i32
+def llvm_v1i64_ty      : LLVMType<v1i64>;    //  1 x i64
+def llvm_v2i64_ty      : LLVMType<v2i64>;    //  2 x i64
+def llvm_v4i64_ty      : LLVMType<v4i64>;    //  4 x i64
+
+def llvm_v2f32_ty      : LLVMType<v2f32>;    //  2 x float
+def llvm_v4f32_ty      : LLVMType<v4f32>;    //  4 x float
+def llvm_v8f32_ty      : LLVMType<v8f32>;    //  8 x float
+def llvm_v2f64_ty      : LLVMType<v2f64>;    //  2 x double
+def llvm_v4f64_ty      : LLVMType<v4f64>;    //  4 x double
+
+def llvm_vararg_ty     : LLVMType<isVoid>;   // this means vararg here
+
+
+//===----------------------------------------------------------------------===//
+// Intrinsic Definitions.
+//===----------------------------------------------------------------------===//
+
+// Intrinsic class - This is used to define one LLVM intrinsic.  The name of the
+// intrinsic definition should start with "int_", then match the LLVM intrinsic
+// name with the "llvm." prefix removed, and all "."s turned into "_"s.  For
+// example, llvm.bswap.i16 -> int_bswap_i16.
+//
+//  * RetTypes is a list containing the return types expected for the
+//    intrinsic.
+//  * ParamTypes is a list containing the parameter types expected for the
+//    intrinsic.
+//  * Properties can be set to describe the behavior of the intrinsic.
+//
+class Intrinsic<list<LLVMType> ret_types,
+                list<LLVMType> param_types = [],
+                list<IntrinsicProperty> properties = [],
+                string name = ""> {
+  string LLVMName = name;
+  string TargetPrefix = "";   // Set to a prefix for target-specific intrinsics.
+  list<LLVMType> RetTypes = ret_types;
+  list<LLVMType> ParamTypes = param_types;
+  list<IntrinsicProperty> Properties = properties;
+
+  bit isTarget = 0;
+}
+
+/// GCCBuiltin - If this intrinsic exactly corresponds to a GCC builtin, this
+/// specifies the name of the builtin.  This provides automatic CBE and CFE
+/// support.
+class GCCBuiltin<string name> {
+  string GCCBuiltinName = name;
+}
+
+
+//===--------------- Variable Argument Handling Intrinsics ----------------===//
+//
+
+def int_vastart : Intrinsic<[], [llvm_ptr_ty], [], "llvm.va_start">;
+def int_vacopy  : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [],
+                            "llvm.va_copy">;
+def int_vaend   : Intrinsic<[], [llvm_ptr_ty], [], "llvm.va_end">;
+
+//===------------------- Garbage Collection Intrinsics --------------------===//
+//
+def int_gcroot  : Intrinsic<[],
+                            [llvm_ptrptr_ty, llvm_ptr_ty]>;
+def int_gcread  : Intrinsic<[llvm_ptr_ty],
+                            [llvm_ptr_ty, llvm_ptrptr_ty],
+                            [IntrReadArgMem]>;
+def int_gcwrite : Intrinsic<[],
+                            [llvm_ptr_ty, llvm_ptr_ty, llvm_ptrptr_ty],
+                            [IntrReadWriteArgMem, NoCapture<1>, NoCapture<2>]>;
+
+//===--------------------- Code Generator Intrinsics ----------------------===//
+//
+def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_frameaddress  : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
+
+// Note: we treat stacksave/stackrestore as writemem because we don't otherwise
+// model their dependencies on allocas.
+def int_stacksave     : Intrinsic<[llvm_ptr_ty]>,
+                        GCCBuiltin<"__builtin_stack_save">;
+def int_stackrestore  : Intrinsic<[], [llvm_ptr_ty]>,
+                        GCCBuiltin<"__builtin_stack_restore">;
+
+// IntrReadWriteArgMem is more pessimistic than strictly necessary for prefetch,
+// however it does conveniently prevent the prefetch from being reordered
+// with respect to nearby accesses to the same memory.
+def int_prefetch      : Intrinsic<[],
+                                  [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+                                  [IntrReadWriteArgMem, NoCapture<0>]>;
+def int_pcmarker      : Intrinsic<[], [llvm_i32_ty]>;
+
+def int_readcyclecounter : Intrinsic<[llvm_i64_ty]>;
+
+// Stack Protector Intrinsic - The stackprotector intrinsic writes the stack
+// guard to the correct place on the stack frame.
+def int_stackprotector : Intrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>;
+
+//===------------------- Standard C Library Intrinsics --------------------===//
+//
+
+def int_memcpy  : Intrinsic<[],
+                             [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
+                              llvm_i32_ty, llvm_i1_ty],
+                            [IntrReadWriteArgMem, NoCapture<0>, NoCapture<1>]>;
+def int_memmove : Intrinsic<[],
+                            [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty,
+                             llvm_i32_ty, llvm_i1_ty],
+                            [IntrReadWriteArgMem, NoCapture<0>, NoCapture<1>]>;
+def int_memset  : Intrinsic<[],
+                            [llvm_anyptr_ty, llvm_i8_ty, llvm_anyint_ty,
+                             llvm_i32_ty, llvm_i1_ty],
+                            [IntrReadWriteArgMem, NoCapture<0>]>;
+
+// These functions do not actually read memory, but they are sensitive to the
+// rounding mode.  This needs to be modelled separately; in the meantime
+// declaring them as reading memory is conservatively correct.
+let Properties = [IntrReadMem] in {
+  def int_sqrt : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_powi : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty]>;
+  def int_sin  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_cos  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_pow  : Intrinsic<[llvm_anyfloat_ty],
+                           [LLVMMatchType<0>, LLVMMatchType<0>]>;
+  def int_log  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_log10: Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_log2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_exp  : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+  def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
+}
+
+// NOTE: these are internal interfaces.
+def int_setjmp     : Intrinsic<[llvm_i32_ty],  [llvm_ptr_ty]>;
+def int_longjmp    : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty]>;
+def int_sigsetjmp  : Intrinsic<[llvm_i32_ty] , [llvm_ptr_ty, llvm_i32_ty]>;
+def int_siglongjmp : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty]>;
+
+// Internal interface for object size checking
+def int_objectsize : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i1_ty],
+                               [IntrNoMem]>,
+                               GCCBuiltin<"__builtin_object_size">;
+
+//===-------------------- Bit Manipulation Intrinsics ---------------------===//
+//
+
+// None of these intrinsics accesses memory at all.
+let Properties = [IntrNoMem] in {
+  def int_bswap: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
+  def int_ctpop: Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
+  def int_ctlz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
+  def int_cttz : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>]>;
+}
+
+//===------------------------ Debugger Intrinsics -------------------------===//
+//
+
+// None of these intrinsics accesses memory at all...but that doesn't mean the
+// optimizers can change them aggressively.  Special handling needed in a few
+// places.
+let Properties = [IntrNoMem] in {
+  def int_dbg_declare      : Intrinsic<[],
+                                       [llvm_metadata_ty, llvm_metadata_ty]>;
+  def int_dbg_value  	   : Intrinsic<[],
+                                       [llvm_metadata_ty, llvm_i64_ty,
+                                        llvm_metadata_ty]>;
+}
+
+//===------------------ Exception Handling Intrinsics----------------------===//
+//
+def int_eh_exception : Intrinsic<[llvm_ptr_ty], [], [IntrReadMem]>;
+def int_eh_selector  : Intrinsic<[llvm_i32_ty],
+                                 [llvm_ptr_ty, llvm_ptr_ty, llvm_vararg_ty]>;
+
+def int_eh_typeid_for : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
+
+def int_eh_return_i32 : Intrinsic<[], [llvm_i32_ty, llvm_ptr_ty]>;
+def int_eh_return_i64 : Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty]>;
+
+def int_eh_unwind_init: Intrinsic<[]>,
+                        GCCBuiltin<"__builtin_unwind_init">;
+
+def int_eh_dwarf_cfa  : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty]>;
+
+let Properties = [IntrNoMem] in {
+  def int_eh_sjlj_lsda    : Intrinsic<[llvm_ptr_ty]>;
+  def int_eh_sjlj_callsite: Intrinsic<[], [llvm_i32_ty]>;
+}
+def int_eh_sjlj_dispatch_setup : Intrinsic<[], [llvm_ptr_ty]>;
+def int_eh_sjlj_setjmp  : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
+def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty]>;
+
+//===---------------- Generic Variable Attribute Intrinsics----------------===//
+//
+def int_var_annotation : Intrinsic<[],
+                                   [llvm_ptr_ty, llvm_ptr_ty,
+                                    llvm_ptr_ty, llvm_i32_ty],
+                                   [], "llvm.var.annotation">;
+def int_ptr_annotation : Intrinsic<[LLVMAnyPointerType<llvm_anyint_ty>],
+                                   [LLVMMatchType<0>, llvm_ptr_ty, llvm_ptr_ty,
+                                    llvm_i32_ty],
+                                   [], "llvm.ptr.annotation">;
+def int_annotation : Intrinsic<[llvm_anyint_ty],
+                               [LLVMMatchType<0>, llvm_ptr_ty,
+                                llvm_ptr_ty, llvm_i32_ty],
+                               [], "llvm.annotation">;
+
+//===------------------------ Trampoline Intrinsics -----------------------===//
+//
+def int_init_trampoline : Intrinsic<[llvm_ptr_ty],
+                                    [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
+                                    [IntrReadWriteArgMem]>,
+                          GCCBuiltin<"__builtin_init_trampoline">;
+
+//===------------------------ Overflow Intrinsics -------------------------===//
+//
+
+// Expose the carry flag from add operations on two integrals.
+def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>],
+                                       [IntrNoMem]>;
+def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>],
+                                       [IntrNoMem]>;
+
+def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>],
+                                       [IntrNoMem]>;
+def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>],
+                                       [IntrNoMem]>;
+
+def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>],
+                                       [IntrNoMem]>;
+def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
+                                       [LLVMMatchType<0>, LLVMMatchType<0>],
+                                       [IntrNoMem]>;
+
+//===------------------------- Atomic Intrinsics --------------------------===//
+//
+def int_memory_barrier : Intrinsic<[],
+                                   [llvm_i1_ty, llvm_i1_ty,
+                                    llvm_i1_ty, llvm_i1_ty, llvm_i1_ty], []>,
+                                    GCCBuiltin<"__builtin_llvm_memory_barrier">;
+
+def int_atomic_cmp_swap : Intrinsic<[llvm_anyint_ty],
+                                    [LLVMAnyPointerType<LLVMMatchType<0>>,
+                                     LLVMMatchType<0>, LLVMMatchType<0>],
+                                    [IntrReadWriteArgMem, NoCapture<0>]>,
+                           GCCBuiltin<"__sync_val_compare_and_swap">;
+def int_atomic_load_add : Intrinsic<[llvm_anyint_ty],
+                                    [LLVMAnyPointerType<LLVMMatchType<0>>,
+                                     LLVMMatchType<0>],
+                                    [IntrReadWriteArgMem, NoCapture<0>]>,
+                           GCCBuiltin<"__sync_fetch_and_add">;
+def int_atomic_swap     : Intrinsic<[llvm_anyint_ty],
+                                    [LLVMAnyPointerType<LLVMMatchType<0>>,
+                                     LLVMMatchType<0>],
+                                    [IntrReadWriteArgMem, NoCapture<0>]>,
+                           GCCBuiltin<"__sync_lock_test_and_set">;
+def int_atomic_load_sub : Intrinsic<[llvm_anyint_ty],
+                                    [LLVMAnyPointerType<LLVMMatchType<0>>,
+                                     LLVMMatchType<0>],
+                                    [IntrReadWriteArgMem, NoCapture<0>]>,
+                           GCCBuiltin<"__sync_fetch_and_sub">;
+def int_atomic_load_and : Intrinsic<[llvm_anyint_ty],
+                                    [LLVMAnyPointerType<LLVMMatchType<0>>,
+                                     LLVMMatchType<0>],
+                                    [IntrReadWriteArgMem, NoCapture<0>]>,
+                           GCCBuiltin<"__sync_fetch_and_and">;
+def int_atomic_load_or   : Intrinsic<[llvm_anyint_ty],
+                                     [LLVMAnyPointerType<LLVMMatchType<0>>,
+                                      LLVMMatchType<0>],
+                                     [IntrReadWriteArgMem, NoCapture<0>]>,
+                           GCCBuiltin<"__sync_fetch_and_or">;
+def int_atomic_load_xor : Intrinsic<[llvm_anyint_ty],
+                                    [LLVMAnyPointerType<LLVMMatchType<0>>,
+                                     LLVMMatchType<0>],
+                                    [IntrReadWriteArgMem, NoCapture<0>]>,
+                           GCCBuiltin<"__sync_fetch_and_xor">;
+def int_atomic_load_nand : Intrinsic<[llvm_anyint_ty],
+                                     [LLVMAnyPointerType<LLVMMatchType<0>>,
+                                      LLVMMatchType<0>],
+                                     [IntrReadWriteArgMem, NoCapture<0>]>,
+                           GCCBuiltin<"__sync_fetch_and_nand">;
+def int_atomic_load_min  : Intrinsic<[llvm_anyint_ty],
+                                     [LLVMAnyPointerType<LLVMMatchType<0>>,
+                                      LLVMMatchType<0>],
+                                     [IntrReadWriteArgMem, NoCapture<0>]>,
+                           GCCBuiltin<"__sync_fetch_and_min">;
+def int_atomic_load_max  : Intrinsic<[llvm_anyint_ty],
+                                     [LLVMAnyPointerType<LLVMMatchType<0>>,
+                                      LLVMMatchType<0>],
+                                     [IntrReadWriteArgMem, NoCapture<0>]>,
+                           GCCBuiltin<"__sync_fetch_and_max">;
+def int_atomic_load_umin : Intrinsic<[llvm_anyint_ty],
+                                     [LLVMAnyPointerType<LLVMMatchType<0>>,
+                                      LLVMMatchType<0>],
+                                     [IntrReadWriteArgMem, NoCapture<0>]>,
+                           GCCBuiltin<"__sync_fetch_and_umin">;
+def int_atomic_load_umax : Intrinsic<[llvm_anyint_ty],
+                                     [LLVMAnyPointerType<LLVMMatchType<0>>,
+                                      LLVMMatchType<0>],
+                                     [IntrReadWriteArgMem, NoCapture<0>]>,
+                           GCCBuiltin<"__sync_fetch_and_umax">;
+
+//===------------------------- Memory Use Markers -------------------------===//
+//
+def int_lifetime_start  : Intrinsic<[],
+                                    [llvm_i64_ty, llvm_ptr_ty],
+                                    [IntrReadWriteArgMem, NoCapture<1>]>;
+def int_lifetime_end    : Intrinsic<[],
+                                    [llvm_i64_ty, llvm_ptr_ty],
+                                    [IntrReadWriteArgMem, NoCapture<1>]>;
+def int_invariant_start : Intrinsic<[llvm_descriptor_ty],
+                                    [llvm_i64_ty, llvm_ptr_ty],
+                                    [IntrReadWriteArgMem, NoCapture<1>]>;
+def int_invariant_end   : Intrinsic<[],
+                                    [llvm_descriptor_ty, llvm_i64_ty,
+                                     llvm_ptr_ty],
+                                    [IntrReadWriteArgMem, NoCapture<2>]>;
+
+//===-------------------------- Other Intrinsics --------------------------===//
+//
+def int_flt_rounds : Intrinsic<[llvm_i32_ty]>,
+                     GCCBuiltin<"__builtin_flt_rounds">;
+def int_trap : Intrinsic<[]>,
+               GCCBuiltin<"__builtin_trap">;
+
+// Intrisics to support half precision floating point format
+let Properties = [IntrNoMem] in {
+def int_convert_to_fp16   : Intrinsic<[llvm_i16_ty], [llvm_float_ty]>,
+                            GCCBuiltin<"__gnu_f2h_ieee">;
+def int_convert_from_fp16 : Intrinsic<[llvm_float_ty], [llvm_i16_ty]>,
+                            GCCBuiltin<"__gnu_h2f_ieee">;
+}
+
+// These convert intrinsics are to support various conversions between
+// various types with rounding and saturation. NOTE: avoid using these
+// intrinsics as they might be removed sometime in the future and
+// most targets don't support them.
+def int_convertff  : Intrinsic<[llvm_anyfloat_ty],
+                               [llvm_anyfloat_ty, llvm_i32_ty, llvm_i32_ty]>;
+def int_convertfsi : Intrinsic<[llvm_anyfloat_ty],
+                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
+def int_convertfui : Intrinsic<[llvm_anyfloat_ty],
+                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
+def int_convertsif : Intrinsic<[llvm_anyint_ty],
+                               [llvm_anyfloat_ty, llvm_i32_ty, llvm_i32_ty]>;
+def int_convertuif : Intrinsic<[llvm_anyint_ty],
+                               [llvm_anyfloat_ty, llvm_i32_ty, llvm_i32_ty]>;
+def int_convertss  : Intrinsic<[llvm_anyint_ty],
+                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
+def int_convertsu  : Intrinsic<[llvm_anyint_ty],
+                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
+def int_convertus  : Intrinsic<[llvm_anyint_ty],
+                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
+def int_convertuu  : Intrinsic<[llvm_anyint_ty],
+                               [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
+
+//===----------------------------------------------------------------------===//
+// Target-specific intrinsics
+//===----------------------------------------------------------------------===//
+
+include "llvm/IntrinsicsPowerPC.td"
+include "llvm/IntrinsicsX86.td"
+include "llvm/IntrinsicsARM.td"
+include "llvm/IntrinsicsCellSPU.td"
+include "llvm/IntrinsicsAlpha.td"
+include "llvm/IntrinsicsXCore.td"
+include "llvm/IntrinsicsPTX.td"
diff --git a/final/include/llvm/IntrinsicsARM.td b/final/include/llvm/IntrinsicsARM.td
new file mode 100644
index 00000000000..823f095dbdc
--- /dev/null
+++ b/final/include/llvm/IntrinsicsARM.td
@@ -0,0 +1,391 @@
+//===- IntrinsicsARM.td - Defines ARM intrinsics -----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the ARM-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// TLS
+
+let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
+  def int_arm_thread_pointer : GCCBuiltin<"__builtin_thread_pointer">,
+              Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Saturating Arithmentic
+
+let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
+  def int_arm_qadd : GCCBuiltin<"__builtin_arm_qadd">,
+              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+                        [IntrNoMem, Commutative]>;
+  def int_arm_qsub : GCCBuiltin<"__builtin_arm_qsub">,
+              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_arm_ssat : GCCBuiltin<"__builtin_arm_ssat">,
+              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_arm_usat : GCCBuiltin<"__builtin_arm_usat">,
+              Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// VFP
+
+let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
+  def int_arm_get_fpscr : GCCBuiltin<"__builtin_arm_get_fpscr">, 
+                         Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+  def int_arm_set_fpscr : GCCBuiltin<"__builtin_arm_set_fpscr">, 
+                         Intrinsic<[], [llvm_i32_ty], []>;
+  def int_arm_vcvtr     : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
+                                    [IntrNoMem]>;
+  def int_arm_vcvtru    : Intrinsic<[llvm_float_ty], [llvm_anyfloat_ty],
+                                    [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD (NEON)
+
+let TargetPrefix = "arm" in {  // All intrinsics start with "llvm.arm.".
+
+  // The following classes do not correspond directly to GCC builtins.
+  class Neon_1Arg_Intrinsic
+    : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+  class Neon_1Arg_Narrow_Intrinsic
+    : Intrinsic<[llvm_anyvector_ty],
+                [LLVMExtendedElementVectorType<0>], [IntrNoMem]>;
+  class Neon_2Arg_Intrinsic
+    : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
+                [IntrNoMem]>;
+  class Neon_2Arg_Narrow_Intrinsic
+    : Intrinsic<[llvm_anyvector_ty],
+                [LLVMExtendedElementVectorType<0>,
+                 LLVMExtendedElementVectorType<0>],
+                [IntrNoMem]>;
+  class Neon_2Arg_Long_Intrinsic
+    : Intrinsic<[llvm_anyvector_ty],
+                [LLVMTruncatedElementVectorType<0>,
+                 LLVMTruncatedElementVectorType<0>],
+                [IntrNoMem]>;
+  class Neon_3Arg_Intrinsic
+    : Intrinsic<[llvm_anyvector_ty],
+                [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
+                [IntrNoMem]>;
+  class Neon_3Arg_Long_Intrinsic
+    : Intrinsic<[llvm_anyvector_ty],
+                [LLVMMatchType<0>,
+                 LLVMTruncatedElementVectorType<0>,
+                 LLVMTruncatedElementVectorType<0>],
+                [IntrNoMem]>;
+  class Neon_CvtFxToFP_Intrinsic
+    : Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
+  class Neon_CvtFPToFx_Intrinsic
+    : Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
+
+  // The table operands for VTBL and VTBX consist of 1 to 4 v8i8 vectors.
+  // Besides the table, VTBL has one other v8i8 argument and VTBX has two.
+  // Overall, the classes range from 2 to 6 v8i8 arguments.
+  class Neon_Tbl2Arg_Intrinsic
+    : Intrinsic<[llvm_v8i8_ty],
+                [llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
+  class Neon_Tbl3Arg_Intrinsic
+    : Intrinsic<[llvm_v8i8_ty],
+                [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
+  class Neon_Tbl4Arg_Intrinsic
+    : Intrinsic<[llvm_v8i8_ty],
+                [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty],
+                [IntrNoMem]>;
+  class Neon_Tbl5Arg_Intrinsic
+    : Intrinsic<[llvm_v8i8_ty],
+                [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
+                 llvm_v8i8_ty], [IntrNoMem]>;
+  class Neon_Tbl6Arg_Intrinsic
+    : Intrinsic<[llvm_v8i8_ty],
+                [llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty, llvm_v8i8_ty,
+                 llvm_v8i8_ty, llvm_v8i8_ty], [IntrNoMem]>;
+}
+
+// Arithmetic ops
+
+let Properties = [IntrNoMem, Commutative] in {
+
+  // Vector Add.
+  def int_arm_neon_vhadds : Neon_2Arg_Intrinsic;
+  def int_arm_neon_vhaddu : Neon_2Arg_Intrinsic;
+  def int_arm_neon_vrhadds : Neon_2Arg_Intrinsic;
+  def int_arm_neon_vrhaddu : Neon_2Arg_Intrinsic;
+  def int_arm_neon_vqadds : Neon_2Arg_Intrinsic;
+  def int_arm_neon_vqaddu : Neon_2Arg_Intrinsic;
+  def int_arm_neon_vaddhn : Neon_2Arg_Narrow_Intrinsic;
+  def int_arm_neon_vraddhn : Neon_2Arg_Narrow_Intrinsic;
+
+  // Vector Multiply.
+  def int_arm_neon_vmulp : Neon_2Arg_Intrinsic;
+  def int_arm_neon_vqdmulh : Neon_2Arg_Intrinsic;
+  def int_arm_neon_vqrdmulh : Neon_2Arg_Intrinsic;
+  def int_arm_neon_vmullp : Neon_2Arg_Long_Intrinsic;
+  def int_arm_neon_vqdmull : Neon_2Arg_Long_Intrinsic;
+  def int_arm_neon_vqdmlal : Neon_3Arg_Long_Intrinsic;
+  def int_arm_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic;
+
+  // Vector Maximum.
+  def int_arm_neon_vmaxs : Neon_2Arg_Intrinsic;
+  def int_arm_neon_vmaxu : Neon_2Arg_Intrinsic;
+
+  // Vector Minimum.
+  def int_arm_neon_vmins : Neon_2Arg_Intrinsic;
+  def int_arm_neon_vminu : Neon_2Arg_Intrinsic;
+
+  // Vector Reciprocal Step.
+  def int_arm_neon_vrecps : Neon_2Arg_Intrinsic;
+
+  // Vector Reciprocal Square Root Step.
+  def int_arm_neon_vrsqrts : Neon_2Arg_Intrinsic;
+}
+
+// Vector Subtract.
+def int_arm_neon_vhsubs : Neon_2Arg_Intrinsic;
+def int_arm_neon_vhsubu : Neon_2Arg_Intrinsic;
+def int_arm_neon_vqsubs : Neon_2Arg_Intrinsic;
+def int_arm_neon_vqsubu : Neon_2Arg_Intrinsic;
+def int_arm_neon_vsubhn : Neon_2Arg_Narrow_Intrinsic;
+def int_arm_neon_vrsubhn : Neon_2Arg_Narrow_Intrinsic;
+
+// Vector Absolute Compare.
+let TargetPrefix = "arm" in {
+  def int_arm_neon_vacged : Intrinsic<[llvm_v2i32_ty],
+                                      [llvm_v2f32_ty, llvm_v2f32_ty],
+                                      [IntrNoMem]>;
+  def int_arm_neon_vacgeq : Intrinsic<[llvm_v4i32_ty],
+                                      [llvm_v4f32_ty, llvm_v4f32_ty],
+                                      [IntrNoMem]>;
+  def int_arm_neon_vacgtd : Intrinsic<[llvm_v2i32_ty],
+                                      [llvm_v2f32_ty, llvm_v2f32_ty],
+                                      [IntrNoMem]>;
+  def int_arm_neon_vacgtq : Intrinsic<[llvm_v4i32_ty],
+                                      [llvm_v4f32_ty, llvm_v4f32_ty],
+                                      [IntrNoMem]>;
+}
+
+// Vector Absolute Differences.
+def int_arm_neon_vabds : Neon_2Arg_Intrinsic;
+def int_arm_neon_vabdu : Neon_2Arg_Intrinsic;
+
+// Vector Pairwise Add.
+def int_arm_neon_vpadd : Neon_2Arg_Intrinsic;
+
+// Vector Pairwise Add Long.
+// Note: This is different than the other "long" NEON intrinsics because
+// the result vector has half as many elements as the source vector.
+// The source and destination vector types must be specified separately.
+let TargetPrefix = "arm" in {
+  def int_arm_neon_vpaddls : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
+                                       [IntrNoMem]>;
+  def int_arm_neon_vpaddlu : Intrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty],
+                                       [IntrNoMem]>;
+}
+
+// Vector Pairwise Add and Accumulate Long.
+// Note: This is similar to vpaddl but the destination vector also appears
+// as the first argument.
+let TargetPrefix = "arm" in {
+  def int_arm_neon_vpadals : Intrinsic<[llvm_anyvector_ty],
+                                       [LLVMMatchType<0>, llvm_anyvector_ty],
+                                       [IntrNoMem]>;
+  def int_arm_neon_vpadalu : Intrinsic<[llvm_anyvector_ty],
+                                       [LLVMMatchType<0>, llvm_anyvector_ty],
+                                       [IntrNoMem]>;
+}
+
+// Vector Pairwise Maximum and Minimum.
+def int_arm_neon_vpmaxs : Neon_2Arg_Intrinsic;
+def int_arm_neon_vpmaxu : Neon_2Arg_Intrinsic;
+def int_arm_neon_vpmins : Neon_2Arg_Intrinsic;
+def int_arm_neon_vpminu : Neon_2Arg_Intrinsic;
+
+// Vector Shifts:
+//
+// The various saturating and rounding vector shift operations need to be
+// represented by intrinsics in LLVM, and even the basic VSHL variable shift
+// operation cannot be safely translated to LLVM's shift operators.  VSHL can
+// be used for both left and right shifts, or even combinations of the two,
+// depending on the signs of the shift amounts.  It also has well-defined
+// behavior for shift amounts that LLVM leaves undefined.  Only basic shifts
+// by constants can be represented with LLVM's shift operators.
+//
+// The shift counts for these intrinsics are always vectors, even for constant
+// shifts, where the constant is replicated.  For consistency with VSHL (and
+// other variable shift instructions), left shifts have positive shift counts
+// and right shifts have negative shift counts.  This convention is also used
+// for constant right shift intrinsics, and to help preserve sanity, the
+// intrinsic names use "shift" instead of either "shl" or "shr".  Where
+// applicable, signed and unsigned versions of the intrinsics are
+// distinguished with "s" and "u" suffixes.  A few NEON shift instructions,
+// such as VQSHLU, take signed operands but produce unsigned results; these
+// use a "su" suffix.
+
+// Vector Shift.
+def int_arm_neon_vshifts : Neon_2Arg_Intrinsic;
+def int_arm_neon_vshiftu : Neon_2Arg_Intrinsic;
+def int_arm_neon_vshiftls : Neon_2Arg_Long_Intrinsic;
+def int_arm_neon_vshiftlu : Neon_2Arg_Long_Intrinsic;
+def int_arm_neon_vshiftn : Neon_2Arg_Narrow_Intrinsic;
+
+// Vector Rounding Shift.
+def int_arm_neon_vrshifts : Neon_2Arg_Intrinsic;
+def int_arm_neon_vrshiftu : Neon_2Arg_Intrinsic;
+def int_arm_neon_vrshiftn : Neon_2Arg_Narrow_Intrinsic;
+
+// Vector Saturating Shift.
+def int_arm_neon_vqshifts : Neon_2Arg_Intrinsic;
+def int_arm_neon_vqshiftu : Neon_2Arg_Intrinsic;
+def int_arm_neon_vqshiftsu : Neon_2Arg_Intrinsic;
+def int_arm_neon_vqshiftns : Neon_2Arg_Narrow_Intrinsic;
+def int_arm_neon_vqshiftnu : Neon_2Arg_Narrow_Intrinsic;
+def int_arm_neon_vqshiftnsu : Neon_2Arg_Narrow_Intrinsic;
+
+// Vector Saturating Rounding Shift.
+def int_arm_neon_vqrshifts : Neon_2Arg_Intrinsic;
+def int_arm_neon_vqrshiftu : Neon_2Arg_Intrinsic;
+def int_arm_neon_vqrshiftns : Neon_2Arg_Narrow_Intrinsic;
+def int_arm_neon_vqrshiftnu : Neon_2Arg_Narrow_Intrinsic;
+def int_arm_neon_vqrshiftnsu : Neon_2Arg_Narrow_Intrinsic;
+
+// Vector Shift and Insert.
+def int_arm_neon_vshiftins : Neon_3Arg_Intrinsic;
+
+// Vector Absolute Value and Saturating Absolute Value.
+def int_arm_neon_vabs : Neon_1Arg_Intrinsic;
+def int_arm_neon_vqabs : Neon_1Arg_Intrinsic;
+
+// Vector Saturating Negate.
+def int_arm_neon_vqneg : Neon_1Arg_Intrinsic;
+
+// Vector Count Leading Sign/Zero Bits.
+def int_arm_neon_vcls : Neon_1Arg_Intrinsic;
+def int_arm_neon_vclz : Neon_1Arg_Intrinsic;
+
+// Vector Count One Bits.
+def int_arm_neon_vcnt : Neon_1Arg_Intrinsic;
+
+// Vector Reciprocal Estimate.
+def int_arm_neon_vrecpe : Neon_1Arg_Intrinsic;
+
+// Vector Reciprocal Square Root Estimate.
+def int_arm_neon_vrsqrte : Neon_1Arg_Intrinsic;
+
+// Vector Conversions Between Floating-point and Fixed-point.
+def int_arm_neon_vcvtfp2fxs : Neon_CvtFPToFx_Intrinsic;
+def int_arm_neon_vcvtfp2fxu : Neon_CvtFPToFx_Intrinsic;
+def int_arm_neon_vcvtfxs2fp : Neon_CvtFxToFP_Intrinsic;
+def int_arm_neon_vcvtfxu2fp : Neon_CvtFxToFP_Intrinsic;
+
+// Vector Conversions Between Half-Precision and Single-Precision.
+def int_arm_neon_vcvtfp2hf
+    : Intrinsic<[llvm_v4i16_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+def int_arm_neon_vcvthf2fp
+    : Intrinsic<[llvm_v4f32_ty], [llvm_v4i16_ty], [IntrNoMem]>;
+
+// Narrowing Saturating Vector Moves.
+def int_arm_neon_vqmovns : Neon_1Arg_Narrow_Intrinsic;
+def int_arm_neon_vqmovnu : Neon_1Arg_Narrow_Intrinsic;
+def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic;
+
+// Vector Table Lookup.
+// The first 1-4 arguments are the table.
+def int_arm_neon_vtbl1 : Neon_Tbl2Arg_Intrinsic;
+def int_arm_neon_vtbl2 : Neon_Tbl3Arg_Intrinsic;
+def int_arm_neon_vtbl3 : Neon_Tbl4Arg_Intrinsic;
+def int_arm_neon_vtbl4 : Neon_Tbl5Arg_Intrinsic;
+
+// Vector Table Extension.
+// Some elements of the destination vector may not be updated, so the original
+// value of that vector is passed as the first argument.  The next 1-4
+// arguments after that are the table.
+def int_arm_neon_vtbx1 : Neon_Tbl3Arg_Intrinsic;
+def int_arm_neon_vtbx2 : Neon_Tbl4Arg_Intrinsic;
+def int_arm_neon_vtbx3 : Neon_Tbl5Arg_Intrinsic;
+def int_arm_neon_vtbx4 : Neon_Tbl6Arg_Intrinsic;
+
+let TargetPrefix = "arm" in {
+
+  // De-interleaving vector loads from N-element structures.
+  // Source operands are the address and alignment.
+  def int_arm_neon_vld1 : Intrinsic<[llvm_anyvector_ty],
+                                    [llvm_ptr_ty, llvm_i32_ty],
+                                    [IntrReadArgMem]>;
+  def int_arm_neon_vld2 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+                                    [llvm_ptr_ty, llvm_i32_ty],
+                                    [IntrReadArgMem]>;
+  def int_arm_neon_vld3 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                     LLVMMatchType<0>],
+                                    [llvm_ptr_ty, llvm_i32_ty],
+                                    [IntrReadArgMem]>;
+  def int_arm_neon_vld4 : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                     LLVMMatchType<0>, LLVMMatchType<0>],
+                                    [llvm_ptr_ty, llvm_i32_ty],
+                                    [IntrReadArgMem]>;
+
+  // Vector load N-element structure to one lane.
+  // Source operands are: the address, the N input vectors (since only one
+  // lane is assigned), the lane number, and the alignment.
+  def int_arm_neon_vld2lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+                                        [llvm_ptr_ty, LLVMMatchType<0>,
+                                         LLVMMatchType<0>, llvm_i32_ty,
+                                         llvm_i32_ty], [IntrReadArgMem]>;
+  def int_arm_neon_vld3lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                         LLVMMatchType<0>],
+                                        [llvm_ptr_ty, LLVMMatchType<0>,
+                                         LLVMMatchType<0>, LLVMMatchType<0>,
+                                         llvm_i32_ty, llvm_i32_ty],
+                                        [IntrReadArgMem]>;
+  def int_arm_neon_vld4lane : Intrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+                                         LLVMMatchType<0>, LLVMMatchType<0>],
+                                        [llvm_ptr_ty, LLVMMatchType<0>,
+                                         LLVMMatchType<0>, LLVMMatchType<0>,
+                                         LLVMMatchType<0>, llvm_i32_ty,
+                                         llvm_i32_ty], [IntrReadArgMem]>;
+
+  // Interleaving vector stores from N-element structures.
+  // Source operands are: the address, the N vectors, and the alignment.
+  def int_arm_neon_vst1 : Intrinsic<[],
+                                    [llvm_ptr_ty, llvm_anyvector_ty,
+                                     llvm_i32_ty], [IntrReadWriteArgMem]>;
+  def int_arm_neon_vst2 : Intrinsic<[],
+                                    [llvm_ptr_ty, llvm_anyvector_ty,
+                                     LLVMMatchType<0>, llvm_i32_ty],
+                                    [IntrReadWriteArgMem]>;
+  def int_arm_neon_vst3 : Intrinsic<[],
+                                    [llvm_ptr_ty, llvm_anyvector_ty,
+                                     LLVMMatchType<0>, LLVMMatchType<0>,
+                                     llvm_i32_ty], [IntrReadWriteArgMem]>;
+  def int_arm_neon_vst4 : Intrinsic<[],
+                                    [llvm_ptr_ty, llvm_anyvector_ty,
+                                     LLVMMatchType<0>, LLVMMatchType<0>,
+                                     LLVMMatchType<0>, llvm_i32_ty],
+                                    [IntrReadWriteArgMem]>;
+
+  // Vector store N-element structure from one lane.
+  // Source operands are: the address, the N vectors, the lane number, and
+  // the alignment.
+  def int_arm_neon_vst2lane : Intrinsic<[],
+                                        [llvm_ptr_ty, llvm_anyvector_ty,
+                                         LLVMMatchType<0>, llvm_i32_ty,
+                                         llvm_i32_ty], [IntrReadWriteArgMem]>;
+  def int_arm_neon_vst3lane : Intrinsic<[],
+                                        [llvm_ptr_ty, llvm_anyvector_ty,
+                                         LLVMMatchType<0>, LLVMMatchType<0>,
+                                         llvm_i32_ty, llvm_i32_ty],
+                                        [IntrReadWriteArgMem]>;
+  def int_arm_neon_vst4lane : Intrinsic<[],
+                                        [llvm_ptr_ty, llvm_anyvector_ty,
+                                         LLVMMatchType<0>, LLVMMatchType<0>,
+                                         LLVMMatchType<0>, llvm_i32_ty,
+                                         llvm_i32_ty], [IntrReadWriteArgMem]>;
+}
diff --git a/final/include/llvm/IntrinsicsAlpha.td b/final/include/llvm/IntrinsicsAlpha.td
new file mode 100644
index 00000000000..59865cf8a3e
--- /dev/null
+++ b/final/include/llvm/IntrinsicsAlpha.td
@@ -0,0 +1,18 @@
+//===- IntrinsicsAlpha.td - Defines Alpha intrinsics -------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the Alpha-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+
+let TargetPrefix = "alpha" in {  // All intrinsics start with "llvm.alpha.".
+  def int_alpha_umulh : GCCBuiltin<"__builtin_alpha_umulh">,
+              Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
+}
diff --git a/final/include/llvm/IntrinsicsCellSPU.td b/final/include/llvm/IntrinsicsCellSPU.td
new file mode 100644
index 00000000000..1e311bbecbc
--- /dev/null
+++ b/final/include/llvm/IntrinsicsCellSPU.td
@@ -0,0 +1,242 @@
+//==- IntrinsicsCellSPU.td - Cell SDK intrinsics           -*- tablegen -*-==//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// Department at The Aerospace Corporation and is distributed under the
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+// Cell SPU Instructions:
+//===----------------------------------------------------------------------===//
+// TODO Items (not urgent today, but would be nice, low priority)
+//
+// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by
+// concatenating the byte argument b as "bbbb". Could recognize this bit pattern
+// in 16-bit and 32-bit constants and reduce instruction count.
+//===----------------------------------------------------------------------===//
+
+// 7-bit integer type, used as an immediate:
+def cell_i7_ty: LLVMType<i8>;
+def cell_i8_ty: LLVMType<i8>;
+
+// Keep this here until it's actually supported:
+def llvm_i128_ty : LLVMType<i128>;
+
+class v16i8_u7imm<string builtin_suffix> :
+  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, cell_i7_ty],
+            [IntrNoMem]>;
+
+class v16i8_u8imm<string builtin_suffix> :
+  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i8_ty],
+            [IntrNoMem]>;
+
+class v16i8_s10imm<string builtin_suffix> :
+  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i16_ty],
+            [IntrNoMem]>;
+
+class v16i8_u16imm<string builtin_suffix> :
+  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i16_ty],
+            [IntrNoMem]>;
+
+class v16i8_rr<string builtin_suffix> :
+  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
+  Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+            [IntrNoMem]>;
+
+class v8i16_s10imm<string builtin_suffix> :
+  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i16_ty],
+            [IntrNoMem]>;
+
+class v8i16_u16imm<string builtin_suffix> :
+  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i16_ty],
+            [IntrNoMem]>;
+
+class v8i16_rr<string builtin_suffix> :
+  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
+  Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+            [IntrNoMem]>;
+
+class v4i32_rr<string builtin_suffix> :
+  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+            [IntrNoMem]>;
+
+class v4i32_u7imm<string builtin_suffix> :
+  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, cell_i7_ty],
+            [IntrNoMem]>;
+
+class v4i32_s10imm<string builtin_suffix> :
+  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i16_ty],
+            [IntrNoMem]>;
+
+class v4i32_u16imm<string builtin_suffix> :
+  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
+  Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i16_ty],
+            [IntrNoMem]>;
+
+class v4f32_rr<string builtin_suffix> :
+  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+            [IntrNoMem]>;
+
+class v4f32_rrr<string builtin_suffix> :
+  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
+  Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+            [IntrNoMem]>;
+
+class v2f64_rr<string builtin_suffix> :
+  GCCBuiltin<!strconcat("__builtin_si_", builtin_suffix)>,
+  Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
+            [IntrNoMem]>;
+
+// All Cell SPU intrinsics start with "llvm.spu.".
+let TargetPrefix = "spu" in {
+  def int_spu_si_fsmbi  : v8i16_u16imm<"fsmbi">;
+  def int_spu_si_ah     : v8i16_rr<"ah">;
+  def int_spu_si_ahi    : v8i16_s10imm<"ahi">;
+  def int_spu_si_a      : v4i32_rr<"a">;
+  def int_spu_si_ai     : v4i32_s10imm<"ai">;
+  def int_spu_si_sfh    : v8i16_rr<"sfh">;
+  def int_spu_si_sfhi   : v8i16_s10imm<"sfhi">;
+  def int_spu_si_sf     : v4i32_rr<"sf">;
+  def int_spu_si_sfi    : v4i32_s10imm<"sfi">;
+  def int_spu_si_addx   : v4i32_rr<"addx">;
+  def int_spu_si_cg     : v4i32_rr<"cg">;
+  def int_spu_si_cgx    : v4i32_rr<"cgx">;
+  def int_spu_si_sfx    : v4i32_rr<"sfx">;
+  def int_spu_si_bg     : v4i32_rr<"bg">;
+  def int_spu_si_bgx    : v4i32_rr<"bgx">;
+  def int_spu_si_mpy    : // This is special:
+    GCCBuiltin<"__builtin_si_mpy">,
+    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+              [IntrNoMem]>;
+  def int_spu_si_mpyu   : // This is special:
+    GCCBuiltin<"__builtin_si_mpyu">,
+    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+              [IntrNoMem]>;
+  def int_spu_si_mpyi   : // This is special:
+    GCCBuiltin<"__builtin_si_mpyi">,
+    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i16_ty],
+              [IntrNoMem]>;
+  def int_spu_si_mpyui  : // This is special:
+    GCCBuiltin<"__builtin_si_mpyui">,
+    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_i16_ty],
+              [IntrNoMem]>;
+  def int_spu_si_mpya   : // This is special:
+    GCCBuiltin<"__builtin_si_mpya">,
+    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty],
+              [IntrNoMem]>;
+  def int_spu_si_mpyh   : // This is special:
+    GCCBuiltin<"__builtin_si_mpyh">,
+    Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v8i16_ty],
+              [IntrNoMem]>;
+  def int_spu_si_mpys   : // This is special:
+    GCCBuiltin<"__builtin_si_mpys">,
+    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+              [IntrNoMem]>;
+  def int_spu_si_mpyhh  : // This is special:
+    GCCBuiltin<"__builtin_si_mpyhh">,
+    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+              [IntrNoMem]>;
+  def int_spu_si_mpyhha : // This is special:
+    GCCBuiltin<"__builtin_si_mpyhha">,
+    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+              [IntrNoMem]>;
+  def int_spu_si_mpyhhu : // This is special:
+    GCCBuiltin<"__builtin_si_mpyhhu">,
+    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+              [IntrNoMem]>;
+  def int_spu_si_mpyhhau : // This is special:
+    GCCBuiltin<"__builtin_si_mpyhhau">,
+    Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+              [IntrNoMem]>;
+
+  def int_spu_si_shli:          v4i32_u7imm<"shli">;
+
+  def int_spu_si_shlqbi:
+    GCCBuiltin<!strconcat("__builtin_si_", "shlqbi")>,
+    Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty],
+              [IntrNoMem]>;
+
+  def int_spu_si_shlqbii:       v16i8_u7imm<"shlqbii">;
+  def int_spu_si_shlqby:
+    GCCBuiltin<!strconcat("__builtin_si_", "shlqby")>,
+    Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty],
+              [IntrNoMem]>;
+  def int_spu_si_shlqbyi:       v16i8_u7imm<"shlqbyi">;
+  
+  def int_spu_si_ceq:           v4i32_rr<"ceq">;
+  def int_spu_si_ceqi:          v4i32_s10imm<"ceqi">;
+  def int_spu_si_ceqb:          v16i8_rr<"ceqb">;
+  def int_spu_si_ceqbi:         v16i8_u8imm<"ceqbi">;
+  def int_spu_si_ceqh:          v8i16_rr<"ceqh">;
+  def int_spu_si_ceqhi:         v8i16_s10imm<"ceqhi">;
+  def int_spu_si_cgt:           v4i32_rr<"cgt">;
+  def int_spu_si_cgti:          v4i32_s10imm<"cgti">;
+  def int_spu_si_cgtb:          v16i8_rr<"cgtb">;
+  def int_spu_si_cgtbi:         v16i8_u8imm<"cgtbi">;
+  def int_spu_si_cgth:          v8i16_rr<"cgth">;
+  def int_spu_si_cgthi:         v8i16_s10imm<"cgthi">;
+  def int_spu_si_clgtb:         v16i8_rr<"clgtb">;
+  def int_spu_si_clgtbi:        v16i8_u8imm<"clgtbi">;
+  def int_spu_si_clgth:         v8i16_rr<"clgth">;
+  def int_spu_si_clgthi:        v8i16_s10imm<"clgthi">;
+  def int_spu_si_clgt:          v4i32_rr<"clgt">;
+  def int_spu_si_clgti:         v4i32_s10imm<"clgti">;
+  
+  def int_spu_si_and:           v4i32_rr<"and">;
+  def int_spu_si_andbi:         v16i8_u8imm<"andbi">;
+  def int_spu_si_andc:          v4i32_rr<"andc">;
+  def int_spu_si_andhi:         v8i16_s10imm<"andhi">;
+  def int_spu_si_andi:          v4i32_s10imm<"andi">;
+  
+  def int_spu_si_or:            v4i32_rr<"or">;
+  def int_spu_si_orbi:          v16i8_u8imm<"orbi">;
+  def int_spu_si_orc:           v4i32_rr<"orc">;
+  def int_spu_si_orhi:          v8i16_s10imm<"orhi">;
+  def int_spu_si_ori:           v4i32_s10imm<"ori">;
+  
+  def int_spu_si_xor:           v4i32_rr<"xor">;
+  def int_spu_si_xorbi:         v16i8_u8imm<"xorbi">;
+  def int_spu_si_xorhi:         v8i16_s10imm<"xorhi">;
+  def int_spu_si_xori:          v4i32_s10imm<"xori">;
+
+  def int_spu_si_nor:           v4i32_rr<"nor">;
+  def int_spu_si_nand:          v4i32_rr<"nand">;
+  
+  def int_spu_si_fa:            v4f32_rr<"fa">;
+  def int_spu_si_fs:            v4f32_rr<"fs">;
+  def int_spu_si_fm:            v4f32_rr<"fm">;
+  
+  def int_spu_si_fceq:          v4f32_rr<"fceq">;
+  def int_spu_si_fcmeq:         v4f32_rr<"fcmeq">;
+  def int_spu_si_fcgt:          v4f32_rr<"fcgt">;
+  def int_spu_si_fcmgt:         v4f32_rr<"fcmgt">;
+  
+  def int_spu_si_fma:           v4f32_rrr<"fma">;
+  def int_spu_si_fnms:          v4f32_rrr<"fnms">;
+  def int_spu_si_fms:           v4f32_rrr<"fms">;
+
+  def int_spu_si_dfa:           v2f64_rr<"dfa">;
+  def int_spu_si_dfs:           v2f64_rr<"dfs">;
+  def int_spu_si_dfm:           v2f64_rr<"dfm">;
+  
+//def int_spu_si_dfceq:         v2f64_rr<"dfceq">;
+//def int_spu_si_dfcmeq:        v2f64_rr<"dfcmeq">;
+//def int_spu_si_dfcgt:         v2f64_rr<"dfcgt">;
+//def int_spu_si_dfcmgt:        v2f64_rr<"dfcmgt">;
+  
+  def int_spu_si_dfnma:         v2f64_rr<"dfnma">;
+  def int_spu_si_dfma:          v2f64_rr<"dfma">;
+  def int_spu_si_dfnms:         v2f64_rr<"dfnms">;
+  def int_spu_si_dfms:          v2f64_rr<"dfms">;
+}
diff --git a/final/include/llvm/IntrinsicsPTX.td b/final/include/llvm/IntrinsicsPTX.td
new file mode 100644
index 00000000000..ec291e467ec
--- /dev/null
+++ b/final/include/llvm/IntrinsicsPTX.td
@@ -0,0 +1,32 @@
+//===- IntrinsicsPTX.td - Defines PTX intrinsics -----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the PTX-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "ptx" in {
+  multiclass PTXReadSpecialRegisterIntrinsic {
+    def _r64   : Intrinsic<[llvm_i64_ty],   [], [IntrNoMem]>;
+    def _v4i16 : Intrinsic<[llvm_v4i16_ty], [], [IntrNoMem]>;
+  }
+
+  multiclass PTXReadSpecialSubRegisterIntrinsic {
+    def _x : Intrinsic<[llvm_i16_ty], [], [IntrNoMem]>;
+    def _y : Intrinsic<[llvm_i16_ty], [], [IntrNoMem]>;
+    def _z : Intrinsic<[llvm_i16_ty], [], [IntrNoMem]>;
+    def _w : Intrinsic<[llvm_i16_ty], [], [IntrNoMem]>;
+  }
+}
+
+defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic;
+defm int_ptx_read_tid : PTXReadSpecialSubRegisterIntrinsic;
+
+let TargetPrefix = "ptx" in
+  def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], []>;
diff --git a/final/include/llvm/IntrinsicsPowerPC.td b/final/include/llvm/IntrinsicsPowerPC.td
new file mode 100644
index 00000000000..da85bfba863
--- /dev/null
+++ b/final/include/llvm/IntrinsicsPowerPC.td
@@ -0,0 +1,465 @@
+//===- IntrinsicsPowerPC.td - Defines PowerPC intrinsics ---*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the PowerPC-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Definitions for all PowerPC intrinsics.
+//
+
+// Non-altivec intrinsics.
+let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
+  // dcba/dcbf/dcbi/dcbst/dcbt/dcbz/dcbzl(PPC970) instructions.
+  def int_ppc_dcba  : Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_ppc_dcbf  : Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_ppc_dcbi  : Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_ppc_dcbst : Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_ppc_dcbt  : Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_ppc_dcbtst: Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_ppc_dcbz  : Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>;
+
+  // sync instruction
+  def int_ppc_sync : Intrinsic<[], [], []>;
+}
+
+
+let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
+  /// PowerPC_Vec_Intrinsic - Base class for all altivec intrinsics.
+  class PowerPC_Vec_Intrinsic<string GCCIntSuffix, list<LLVMType> ret_types,
+                              list<LLVMType> param_types,
+                              list<IntrinsicProperty> properties>
+    : GCCBuiltin<!strconcat("__builtin_altivec_", GCCIntSuffix)>,
+      Intrinsic<ret_types, param_types, properties>;
+}
+
+//===----------------------------------------------------------------------===//
+// PowerPC Altivec Intrinsic Class Definitions.
+//
+
+/// PowerPC_Vec_FF_Intrinsic - A PowerPC intrinsic that takes one v4f32
+/// vector and returns one.  These intrinsics have no side effects.
+class PowerPC_Vec_FF_Intrinsic<string GCCIntSuffix>
+  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
+                          [llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+
+/// PowerPC_Vec_FFF_Intrinsic - A PowerPC intrinsic that takes two v4f32
+/// vectors and returns one.  These intrinsics have no side effects.
+class PowerPC_Vec_FFF_Intrinsic<string GCCIntSuffix>
+  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
+                          [llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                          [IntrNoMem]>;
+
+/// PowerPC_Vec_BBB_Intrinsic - A PowerPC intrinsic that takes two v16f8
+/// vectors and returns one.  These intrinsics have no side effects.
+class PowerPC_Vec_BBB_Intrinsic<string GCCIntSuffix> 
+  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
+                          [llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                          [IntrNoMem]>;
+
+/// PowerPC_Vec_HHH_Intrinsic - A PowerPC intrinsic that takes two v8i16
+/// vectors and returns one.  These intrinsics have no side effects.
+class PowerPC_Vec_HHH_Intrinsic<string GCCIntSuffix> 
+  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
+                          [llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                          [IntrNoMem]>;
+
+/// PowerPC_Vec_WWW_Intrinsic - A PowerPC intrinsic that takes two v4i32
+/// vectors and returns one.  These intrinsics have no side effects.
+class PowerPC_Vec_WWW_Intrinsic<string GCCIntSuffix> 
+  : PowerPC_Vec_Intrinsic<GCCIntSuffix,
+                          [llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                          [IntrNoMem]>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Altivec Intrinsic Definitions.
+
+let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
+  // Data Stream Control.
+  def int_ppc_altivec_dss : GCCBuiltin<"__builtin_altivec_dss">,
+              Intrinsic<[], [llvm_i32_ty], []>;
+  def int_ppc_altivec_dssall : GCCBuiltin<"__builtin_altivec_dssall">,
+              Intrinsic<[], [], []>;
+  def int_ppc_altivec_dst : GCCBuiltin<"__builtin_altivec_dst">,
+              Intrinsic<[],
+                        [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+                        []>;
+  def int_ppc_altivec_dstt : GCCBuiltin<"__builtin_altivec_dstt">,
+              Intrinsic<[],
+                        [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+                        []>;
+  def int_ppc_altivec_dstst : GCCBuiltin<"__builtin_altivec_dstst">,
+              Intrinsic<[],
+                        [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+                        []>;
+  def int_ppc_altivec_dststt : GCCBuiltin<"__builtin_altivec_dststt">,
+              Intrinsic<[],
+                        [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+                        []>;
+
+  // VSCR access.
+  def int_ppc_altivec_mfvscr : GCCBuiltin<"__builtin_altivec_mfvscr">,
+              Intrinsic<[llvm_v8i16_ty], [], [IntrReadMem]>;
+  def int_ppc_altivec_mtvscr : GCCBuiltin<"__builtin_altivec_mtvscr">,
+              Intrinsic<[], [llvm_v4i32_ty], []>;
+
+
+  // Loads.  These don't map directly to GCC builtins because they represent the
+  // source address with a single pointer.
+  def int_ppc_altivec_lvx :
+              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_ppc_altivec_lvxl :
+              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_ppc_altivec_lvebx :
+              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_ppc_altivec_lvehx :
+              Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_ppc_altivec_lvewx :
+              Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+
+  // Stores.  These don't map directly to GCC builtins because they represent the
+  // source address with a single pointer.
+  def int_ppc_altivec_stvx :
+              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
+  def int_ppc_altivec_stvxl :
+              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
+  def int_ppc_altivec_stvebx :
+              Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty], []>;
+  def int_ppc_altivec_stvehx :
+              Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty], []>;
+  def int_ppc_altivec_stvewx :
+              Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], []>;
+
+  // Comparisons setting a vector.
+  def int_ppc_altivec_vcmpbfp : GCCBuiltin<"__builtin_altivec_vcmpbfp">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpeqfp : GCCBuiltin<"__builtin_altivec_vcmpeqfp">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgefp : GCCBuiltin<"__builtin_altivec_vcmpgefp">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtfp : GCCBuiltin<"__builtin_altivec_vcmpgtfp">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+                        
+  def int_ppc_altivec_vcmpequw : GCCBuiltin<"__builtin_altivec_vcmpequw">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtsw : GCCBuiltin<"__builtin_altivec_vcmpgtsw">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtuw : GCCBuiltin<"__builtin_altivec_vcmpgtuw">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+                        
+  def int_ppc_altivec_vcmpequh : GCCBuiltin<"__builtin_altivec_vcmpequh">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtsh : GCCBuiltin<"__builtin_altivec_vcmpgtsh">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtuh : GCCBuiltin<"__builtin_altivec_vcmpgtuh">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+
+  def int_ppc_altivec_vcmpequb : GCCBuiltin<"__builtin_altivec_vcmpequb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtsb : GCCBuiltin<"__builtin_altivec_vcmpgtsb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtub : GCCBuiltin<"__builtin_altivec_vcmpgtub">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+
+  // Predicate Comparisons.  The first operand specifies interpretation of CR6.
+  def int_ppc_altivec_vcmpbfp_p : GCCBuiltin<"__builtin_altivec_vcmpbfp_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpeqfp_p : GCCBuiltin<"__builtin_altivec_vcmpeqfp_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgefp_p : GCCBuiltin<"__builtin_altivec_vcmpgefp_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtfp_p : GCCBuiltin<"__builtin_altivec_vcmpgtfp_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4f32_ty,llvm_v4f32_ty],
+                        [IntrNoMem]>;
+                        
+  def int_ppc_altivec_vcmpequw_p : GCCBuiltin<"__builtin_altivec_vcmpequw_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtsw_p : GCCBuiltin<"__builtin_altivec_vcmpgtsw_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtuw_p : GCCBuiltin<"__builtin_altivec_vcmpgtuw_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
+                        [IntrNoMem]>;
+                        
+  def int_ppc_altivec_vcmpequh_p : GCCBuiltin<"__builtin_altivec_vcmpequh_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtsh_p : GCCBuiltin<"__builtin_altivec_vcmpgtsh_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtuh_p : GCCBuiltin<"__builtin_altivec_vcmpgtuh_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
+                        [IntrNoMem]>;
+
+  def int_ppc_altivec_vcmpequb_p : GCCBuiltin<"__builtin_altivec_vcmpequb_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtsb_p : GCCBuiltin<"__builtin_altivec_vcmpgtsb_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpgtub_p : GCCBuiltin<"__builtin_altivec_vcmpgtub_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
+                        [IntrNoMem]>;
+}
+
+// Vector average.
+def int_ppc_altivec_vavgsb : PowerPC_Vec_BBB_Intrinsic<"vavgsb">;
+def int_ppc_altivec_vavgsh : PowerPC_Vec_HHH_Intrinsic<"vavgsh">;
+def int_ppc_altivec_vavgsw : PowerPC_Vec_WWW_Intrinsic<"vavgsw">;
+def int_ppc_altivec_vavgub : PowerPC_Vec_BBB_Intrinsic<"vavgub">;
+def int_ppc_altivec_vavguh : PowerPC_Vec_HHH_Intrinsic<"vavguh">;
+def int_ppc_altivec_vavguw : PowerPC_Vec_WWW_Intrinsic<"vavguw">;
+
+// Vector maximum.
+def int_ppc_altivec_vmaxfp : PowerPC_Vec_FFF_Intrinsic<"vmaxfp">;
+def int_ppc_altivec_vmaxsb : PowerPC_Vec_BBB_Intrinsic<"vmaxsb">;
+def int_ppc_altivec_vmaxsh : PowerPC_Vec_HHH_Intrinsic<"vmaxsh">;
+def int_ppc_altivec_vmaxsw : PowerPC_Vec_WWW_Intrinsic<"vmaxsw">;
+def int_ppc_altivec_vmaxub : PowerPC_Vec_BBB_Intrinsic<"vmaxub">;
+def int_ppc_altivec_vmaxuh : PowerPC_Vec_HHH_Intrinsic<"vmaxuh">;
+def int_ppc_altivec_vmaxuw : PowerPC_Vec_WWW_Intrinsic<"vmaxuw">;
+
+// Vector minimum.
+def int_ppc_altivec_vminfp : PowerPC_Vec_FFF_Intrinsic<"vminfp">;
+def int_ppc_altivec_vminsb : PowerPC_Vec_BBB_Intrinsic<"vminsb">;
+def int_ppc_altivec_vminsh : PowerPC_Vec_HHH_Intrinsic<"vminsh">;
+def int_ppc_altivec_vminsw : PowerPC_Vec_WWW_Intrinsic<"vminsw">;
+def int_ppc_altivec_vminub : PowerPC_Vec_BBB_Intrinsic<"vminub">;
+def int_ppc_altivec_vminuh : PowerPC_Vec_HHH_Intrinsic<"vminuh">;
+def int_ppc_altivec_vminuw : PowerPC_Vec_WWW_Intrinsic<"vminuw">;
+
+// Saturating adds.
+def int_ppc_altivec_vaddubs : PowerPC_Vec_BBB_Intrinsic<"vaddubs">;
+def int_ppc_altivec_vaddsbs : PowerPC_Vec_BBB_Intrinsic<"vaddsbs">;
+def int_ppc_altivec_vadduhs : PowerPC_Vec_HHH_Intrinsic<"vadduhs">;
+def int_ppc_altivec_vaddshs : PowerPC_Vec_HHH_Intrinsic<"vaddshs">;
+def int_ppc_altivec_vadduws : PowerPC_Vec_WWW_Intrinsic<"vadduws">;
+def int_ppc_altivec_vaddsws : PowerPC_Vec_WWW_Intrinsic<"vaddsws">;
+def int_ppc_altivec_vaddcuw : PowerPC_Vec_WWW_Intrinsic<"vaddcuw">;
+
+// Saturating subs.
+def int_ppc_altivec_vsububs : PowerPC_Vec_BBB_Intrinsic<"vsububs">;
+def int_ppc_altivec_vsubsbs : PowerPC_Vec_BBB_Intrinsic<"vsubsbs">;
+def int_ppc_altivec_vsubuhs : PowerPC_Vec_HHH_Intrinsic<"vsubuhs">;
+def int_ppc_altivec_vsubshs : PowerPC_Vec_HHH_Intrinsic<"vsubshs">;
+def int_ppc_altivec_vsubuws : PowerPC_Vec_WWW_Intrinsic<"vsubuws">;
+def int_ppc_altivec_vsubsws : PowerPC_Vec_WWW_Intrinsic<"vsubsws">;
+def int_ppc_altivec_vsubcuw : PowerPC_Vec_WWW_Intrinsic<"vsubcuw">;
+
+let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
+  // Saturating multiply-adds.
+  def int_ppc_altivec_vmhaddshs : GCCBuiltin<"__builtin_altivec_vmhaddshs">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vmhraddshs : GCCBuiltin<"__builtin_altivec_vmhraddshs">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty, llvm_v8i16_ty], [IntrNoMem]>;
+
+  def int_ppc_altivec_vmaddfp : GCCBuiltin<"__builtin_altivec_vmaddfp">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vnmsubfp : GCCBuiltin<"__builtin_altivec_vnmsubfp">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>;
+
+  // Vector Multiply Sum Intructions.
+  def int_ppc_altivec_vmsummbm : GCCBuiltin<"__builtin_altivec_vmsummbm">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
+                       llvm_v4i32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vmsumshm : GCCBuiltin<"__builtin_altivec_vmsumshm">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+                       llvm_v4i32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vmsumshs : GCCBuiltin<"__builtin_altivec_vmsumshs">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty, 
+                       llvm_v4i32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vmsumubm : GCCBuiltin<"__builtin_altivec_vmsumubm">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v16i8_ty, 
+                       llvm_v4i32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vmsumuhm : GCCBuiltin<"__builtin_altivec_vmsumuhm">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+                       llvm_v4i32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vmsumuhs : GCCBuiltin<"__builtin_altivec_vmsumuhs">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+                       llvm_v4i32_ty], [IntrNoMem]>;
+
+  // Vector Multiply Intructions.
+  def int_ppc_altivec_vmulesb : GCCBuiltin<"__builtin_altivec_vmulesb">,
+          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                    [IntrNoMem]>;
+  def int_ppc_altivec_vmulesh : GCCBuiltin<"__builtin_altivec_vmulesh">,
+          Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                    [IntrNoMem]>;
+  def int_ppc_altivec_vmuleub : GCCBuiltin<"__builtin_altivec_vmuleub">,
+          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                    [IntrNoMem]>;
+  def int_ppc_altivec_vmuleuh : GCCBuiltin<"__builtin_altivec_vmuleuh">,
+          Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                    [IntrNoMem]>;
+
+  def int_ppc_altivec_vmulosb : GCCBuiltin<"__builtin_altivec_vmulosb">,
+          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                    [IntrNoMem]>;
+  def int_ppc_altivec_vmulosh : GCCBuiltin<"__builtin_altivec_vmulosh">,
+          Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                    [IntrNoMem]>;
+  def int_ppc_altivec_vmuloub : GCCBuiltin<"__builtin_altivec_vmuloub">,
+          Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                    [IntrNoMem]>;
+  def int_ppc_altivec_vmulouh : GCCBuiltin<"__builtin_altivec_vmulouh">,
+          Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                    [IntrNoMem]>;
+
+  // Vector Sum Intructions.
+  def int_ppc_altivec_vsumsws : GCCBuiltin<"__builtin_altivec_vsumsws">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+  def int_ppc_altivec_vsum2sws : GCCBuiltin<"__builtin_altivec_vsum2sws">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+  def int_ppc_altivec_vsum4sbs : GCCBuiltin<"__builtin_altivec_vsum4sbs">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+  def int_ppc_altivec_vsum4shs : GCCBuiltin<"__builtin_altivec_vsum4shs">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+  def int_ppc_altivec_vsum4ubs : GCCBuiltin<"__builtin_altivec_vsum4ubs">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+
+  // Other multiplies.
+  def int_ppc_altivec_vmladduhm : GCCBuiltin<"__builtin_altivec_vmladduhm">,
+            Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, 
+                       llvm_v8i16_ty], [IntrNoMem]>;
+
+  // Packs.
+  def int_ppc_altivec_vpkpx : GCCBuiltin<"__builtin_altivec_vpkpx">,
+            Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+  def int_ppc_altivec_vpkshss : GCCBuiltin<"__builtin_altivec_vpkshss">,
+            Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                      [IntrNoMem]>;
+  def int_ppc_altivec_vpkshus : GCCBuiltin<"__builtin_altivec_vpkshus">,
+            Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                      [IntrNoMem]>;
+  def int_ppc_altivec_vpkswss : GCCBuiltin<"__builtin_altivec_vpkswss">,
+            Intrinsic<[llvm_v16i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+  def int_ppc_altivec_vpkswus : GCCBuiltin<"__builtin_altivec_vpkswus">,
+            Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+  // vpkuhum is lowered to a shuffle.
+  def int_ppc_altivec_vpkuhus : GCCBuiltin<"__builtin_altivec_vpkuhus">,
+            Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                      [IntrNoMem]>;
+  // vpkuwum is lowered to a shuffle.
+  def int_ppc_altivec_vpkuwus : GCCBuiltin<"__builtin_altivec_vpkuwus">,
+            Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                      [IntrNoMem]>;
+
+  // Unpacks.
+  def int_ppc_altivec_vupkhpx : GCCBuiltin<"__builtin_altivec_vupkhpx">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vupkhsb : GCCBuiltin<"__builtin_altivec_vupkhsb">,
+            Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vupkhsh : GCCBuiltin<"__builtin_altivec_vupkhsh">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vupklpx : GCCBuiltin<"__builtin_altivec_vupklpx">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vupklsb : GCCBuiltin<"__builtin_altivec_vupklsb">,
+            Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vupklsh : GCCBuiltin<"__builtin_altivec_vupklsh">,
+            Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+
+
+  // FP <-> integer conversion.
+  def int_ppc_altivec_vcfsx : GCCBuiltin<"__builtin_altivec_vcfsx">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcfux : GCCBuiltin<"__builtin_altivec_vcfux">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vctsxs : GCCBuiltin<"__builtin_altivec_vctsxs">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vctuxs : GCCBuiltin<"__builtin_altivec_vctuxs">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+
+  def int_ppc_altivec_vrfim : GCCBuiltin<"__builtin_altivec_vrfim">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vrfin : GCCBuiltin<"__builtin_altivec_vrfin">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vrfip : GCCBuiltin<"__builtin_altivec_vrfip">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vrfiz : GCCBuiltin<"__builtin_altivec_vrfiz">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+}
+
+def int_ppc_altivec_vsl   : PowerPC_Vec_WWW_Intrinsic<"vsl">;
+def int_ppc_altivec_vslo  : PowerPC_Vec_WWW_Intrinsic<"vslo">;
+
+def int_ppc_altivec_vslb  : PowerPC_Vec_BBB_Intrinsic<"vslb">;
+def int_ppc_altivec_vslh  : PowerPC_Vec_HHH_Intrinsic<"vslh">;
+def int_ppc_altivec_vslw  : PowerPC_Vec_WWW_Intrinsic<"vslw">;
+
+// Right Shifts.
+def int_ppc_altivec_vsr   : PowerPC_Vec_WWW_Intrinsic<"vsr">;
+def int_ppc_altivec_vsro  : PowerPC_Vec_WWW_Intrinsic<"vsro">;
+  
+def int_ppc_altivec_vsrb  : PowerPC_Vec_BBB_Intrinsic<"vsrb">;
+def int_ppc_altivec_vsrh  : PowerPC_Vec_HHH_Intrinsic<"vsrh">;
+def int_ppc_altivec_vsrw  : PowerPC_Vec_WWW_Intrinsic<"vsrw">;
+def int_ppc_altivec_vsrab : PowerPC_Vec_BBB_Intrinsic<"vsrab">;
+def int_ppc_altivec_vsrah : PowerPC_Vec_HHH_Intrinsic<"vsrah">;
+def int_ppc_altivec_vsraw : PowerPC_Vec_WWW_Intrinsic<"vsraw">;
+
+// Rotates.
+def int_ppc_altivec_vrlb  : PowerPC_Vec_BBB_Intrinsic<"vrlb">;
+def int_ppc_altivec_vrlh  : PowerPC_Vec_HHH_Intrinsic<"vrlh">;
+def int_ppc_altivec_vrlw  : PowerPC_Vec_WWW_Intrinsic<"vrlw">;
+
+let TargetPrefix = "ppc" in {  // All PPC intrinsics start with "llvm.ppc.".
+  // Miscellaneous.
+  def int_ppc_altivec_lvsl :
+              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrNoMem]>;
+  def int_ppc_altivec_lvsr :
+              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrNoMem]>;
+
+  def int_ppc_altivec_vperm : GCCBuiltin<"__builtin_altivec_vperm_4si">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, 
+                         llvm_v4i32_ty, llvm_v16i8_ty], [IntrNoMem]>;
+  def int_ppc_altivec_vsel : GCCBuiltin<"__builtin_altivec_vsel_4si">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, 
+                         llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+}
+
+def int_ppc_altivec_vexptefp  : PowerPC_Vec_FF_Intrinsic<"vexptefp">;
+def int_ppc_altivec_vlogefp   : PowerPC_Vec_FF_Intrinsic<"vlogefp">;
+def int_ppc_altivec_vrefp     : PowerPC_Vec_FF_Intrinsic<"vrefp">;
+def int_ppc_altivec_vrsqrtefp : PowerPC_Vec_FF_Intrinsic<"vrsqrtefp">;
diff --git a/final/include/llvm/IntrinsicsX86.td b/final/include/llvm/IntrinsicsX86.td
new file mode 100644
index 00000000000..49462200f09
--- /dev/null
+++ b/final/include/llvm/IntrinsicsX86.td
@@ -0,0 +1,1584 @@
+//===- IntrinsicsX86.td - Defines X86 intrinsics -----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the X86-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Interrupt traps
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_int : Intrinsic<[], [llvm_i8_ty]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE1
+
+// Arithmetic ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse_add_ss : GCCBuiltin<"__builtin_ia32_addss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_sub_ss : GCCBuiltin<"__builtin_ia32_subss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_mul_ss : GCCBuiltin<"__builtin_ia32_mulss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_div_ss : GCCBuiltin<"__builtin_ia32_divss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse_sqrt_ps : GCCBuiltin<"__builtin_ia32_sqrtps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse_rcp_ss : GCCBuiltin<"__builtin_ia32_rcpss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse_rcp_ps : GCCBuiltin<"__builtin_ia32_rcpps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse_rsqrt_ss : GCCBuiltin<"__builtin_ia32_rsqrtss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse_rsqrt_ps : GCCBuiltin<"__builtin_ia32_rsqrtps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse_min_ss : GCCBuiltin<"__builtin_ia32_minss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_min_ps : GCCBuiltin<"__builtin_ia32_minps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_max_ss : GCCBuiltin<"__builtin_ia32_maxss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_max_ps : GCCBuiltin<"__builtin_ia32_maxps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+}
+
+// Comparison ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse_cmp_ss :
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_sse_cmp_ps :
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_sse_comieq_ss : GCCBuiltin<"__builtin_ia32_comieq">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_comilt_ss : GCCBuiltin<"__builtin_ia32_comilt">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_comile_ss : GCCBuiltin<"__builtin_ia32_comile">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_comigt_ss : GCCBuiltin<"__builtin_ia32_comigt">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_comige_ss : GCCBuiltin<"__builtin_ia32_comige">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_comineq_ss : GCCBuiltin<"__builtin_ia32_comineq">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_ucomieq_ss : GCCBuiltin<"__builtin_ia32_ucomieq">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_ucomilt_ss : GCCBuiltin<"__builtin_ia32_ucomilt">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_ucomile_ss : GCCBuiltin<"__builtin_ia32_ucomile">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_ucomigt_ss : GCCBuiltin<"__builtin_ia32_ucomigt">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_ucomige_ss : GCCBuiltin<"__builtin_ia32_ucomige">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_ucomineq_ss : GCCBuiltin<"__builtin_ia32_ucomineq">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+}
+
+
+// Conversion ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse_cvtss2si : GCCBuiltin<"__builtin_ia32_cvtss2si">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_cvtss2si64 : GCCBuiltin<"__builtin_ia32_cvtss2si64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_cvttss2si : GCCBuiltin<"__builtin_ia32_cvttss2si">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_cvttss2si64 : GCCBuiltin<"__builtin_ia32_cvttss2si64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_cvtsi2ss : GCCBuiltin<"__builtin_ia32_cvtsi2ss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse_cvtsi642ss : GCCBuiltin<"__builtin_ia32_cvtsi642ss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_sse_cvtps2pi : GCCBuiltin<"__builtin_ia32_cvtps2pi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_cvttps2pi: GCCBuiltin<"__builtin_ia32_cvttps2pi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_cvtpi2ps : GCCBuiltin<"__builtin_ia32_cvtpi2ps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+}
+
+// SIMD load ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse_loadu_ps : GCCBuiltin<"__builtin_ia32_loadups">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+}
+
+// SIMD store ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse_storeu_ps : GCCBuiltin<"__builtin_ia32_storeups">,
+              Intrinsic<[], [llvm_ptr_ty,
+                         llvm_v4f32_ty], []>;
+}
+
+// Cacheability support ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse_movnt_ps : GCCBuiltin<"__builtin_ia32_movntps">,
+              Intrinsic<[], [llvm_ptr_ty,
+                         llvm_v4f32_ty], []>;
+  def int_x86_sse_sfence : GCCBuiltin<"__builtin_ia32_sfence">,
+              Intrinsic<[], [], []>;
+}
+
+// Control register.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse_stmxcsr :
+              Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_x86_sse_ldmxcsr :
+              Intrinsic<[], [llvm_ptr_ty], []>;
+}
+
+// Misc.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse_movmsk_ps : GCCBuiltin<"__builtin_ia32_movmskps">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE2
+
+// FP arithmetic ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_add_sd : GCCBuiltin<"__builtin_ia32_addsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_sub_sd : GCCBuiltin<"__builtin_ia32_subsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_mul_sd : GCCBuiltin<"__builtin_ia32_mulsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_div_sd : GCCBuiltin<"__builtin_ia32_divsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse2_sqrt_pd : GCCBuiltin<"__builtin_ia32_sqrtpd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse2_min_sd : GCCBuiltin<"__builtin_ia32_minsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_min_pd : GCCBuiltin<"__builtin_ia32_minpd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_max_sd : GCCBuiltin<"__builtin_ia32_maxsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_max_pd : GCCBuiltin<"__builtin_ia32_maxpd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+}
+
+// FP comparison ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_cmp_sd :
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_cmp_pd :
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_comieq_sd : GCCBuiltin<"__builtin_ia32_comisdeq">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_comilt_sd : GCCBuiltin<"__builtin_ia32_comisdlt">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_comile_sd : GCCBuiltin<"__builtin_ia32_comisdle">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_comigt_sd : GCCBuiltin<"__builtin_ia32_comisdgt">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_comige_sd : GCCBuiltin<"__builtin_ia32_comisdge">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_comineq_sd : GCCBuiltin<"__builtin_ia32_comisdneq">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_ucomieq_sd : GCCBuiltin<"__builtin_ia32_ucomisdeq">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_ucomilt_sd : GCCBuiltin<"__builtin_ia32_ucomisdlt">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_ucomile_sd : GCCBuiltin<"__builtin_ia32_ucomisdle">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_ucomigt_sd : GCCBuiltin<"__builtin_ia32_ucomisdgt">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_ucomige_sd : GCCBuiltin<"__builtin_ia32_ucomisdge">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_ucomineq_sd : GCCBuiltin<"__builtin_ia32_ucomisdneq">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+}
+
+// Integer arithmetic ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_padds_b : GCCBuiltin<"__builtin_ia32_paddsb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_padds_w : GCCBuiltin<"__builtin_ia32_paddsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pminu_b : GCCBuiltin<"__builtin_ia32_pminub128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+  def int_x86_sse2_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem, Commutative]>;
+}
+
+// Integer shift ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_psll_w : GCCBuiltin<"__builtin_ia32_psllw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_psll_d : GCCBuiltin<"__builtin_ia32_pslld128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psll_q : GCCBuiltin<"__builtin_ia32_psllq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrl_d : GCCBuiltin<"__builtin_ia32_psrld128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_sse2_psra_w : GCCBuiltin<"__builtin_ia32_psraw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_psra_d : GCCBuiltin<"__builtin_ia32_psrad128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+
+  def int_x86_sse2_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrai_d : GCCBuiltin<"__builtin_ia32_psradi128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_sse2_psll_dq : GCCBuiltin<"__builtin_ia32_pslldqi128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrl_dq : GCCBuiltin<"__builtin_ia32_psrldqi128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psll_dq_bs : GCCBuiltin<"__builtin_ia32_pslldqi128_byteshift">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_psrl_dq_bs : GCCBuiltin<"__builtin_ia32_psrldqi128_byteshift">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+}
+
+// Integer comparison ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_pcmpgt_w : GCCBuiltin<"__builtin_ia32_pcmpgtw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_pcmpgt_d : GCCBuiltin<"__builtin_ia32_pcmpgtd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+}
+
+// Conversion ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_cvtdq2pd : GCCBuiltin<"__builtin_ia32_cvtdq2pd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtdq2ps : GCCBuiltin<"__builtin_ia32_cvtdq2ps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtpd2dq : GCCBuiltin<"__builtin_ia32_cvtpd2dq">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvttpd2dq : GCCBuiltin<"__builtin_ia32_cvttpd2dq">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtpd2ps : GCCBuiltin<"__builtin_ia32_cvtpd2ps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtps2dq : GCCBuiltin<"__builtin_ia32_cvtps2dq">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvttps2dq : GCCBuiltin<"__builtin_ia32_cvttps2dq">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtps2pd : GCCBuiltin<"__builtin_ia32_cvtps2pd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtsd2si : GCCBuiltin<"__builtin_ia32_cvtsd2si">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtsd2si64 : GCCBuiltin<"__builtin_ia32_cvtsd2si64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvttsd2si : GCCBuiltin<"__builtin_ia32_cvttsd2si">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_cvttsd2si64">,
+              Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtsi2sd : GCCBuiltin<"__builtin_ia32_cvtsi2sd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtsi642sd : GCCBuiltin<"__builtin_ia32_cvtsi642sd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtsd2ss : GCCBuiltin<"__builtin_ia32_cvtsd2ss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_cvtss2sd : GCCBuiltin<"__builtin_ia32_cvtss2sd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse_cvtpd2pi : GCCBuiltin<"__builtin_ia32_cvtpd2pi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse_cvttpd2pi: GCCBuiltin<"__builtin_ia32_cvttpd2pi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse_cvtpi2pd : GCCBuiltin<"__builtin_ia32_cvtpi2pd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+}
+
+// SIMD load ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_loadu_pd : GCCBuiltin<"__builtin_ia32_loadupd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_x86_sse2_loadu_dq : GCCBuiltin<"__builtin_ia32_loaddqu">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
+}
+
+// SIMD store ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_storeu_pd : GCCBuiltin<"__builtin_ia32_storeupd">,
+              Intrinsic<[], [llvm_ptr_ty,
+                         llvm_v2f64_ty], []>;
+  def int_x86_sse2_storeu_dq : GCCBuiltin<"__builtin_ia32_storedqu">,
+              Intrinsic<[], [llvm_ptr_ty,
+                         llvm_v16i8_ty], []>;
+  def int_x86_sse2_storel_dq : GCCBuiltin<"__builtin_ia32_storelv4si">,
+              Intrinsic<[], [llvm_ptr_ty,
+                         llvm_v4i32_ty], []>;
+}
+
+// Cacheability support ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_movnt_dq : GCCBuiltin<"__builtin_ia32_movntdq">,
+              Intrinsic<[], [llvm_ptr_ty,
+                         llvm_v2i64_ty], []>;
+  def int_x86_sse2_movnt_pd : GCCBuiltin<"__builtin_ia32_movntpd">,
+              Intrinsic<[], [llvm_ptr_ty,
+                         llvm_v2f64_ty], []>;
+  def int_x86_sse2_movnt_i : GCCBuiltin<"__builtin_ia32_movnti">,
+              Intrinsic<[], [llvm_ptr_ty,
+                         llvm_i32_ty], []>;
+}
+
+// Misc.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse2_packsswb_128 : GCCBuiltin<"__builtin_ia32_packsswb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_packssdw_128 : GCCBuiltin<"__builtin_ia32_packssdw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_sse2_packuswb_128 : GCCBuiltin<"__builtin_ia32_packuswb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_sse2_movmsk_pd : GCCBuiltin<"__builtin_ia32_movmskpd">,
+              Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse2_pmovmskb_128 : GCCBuiltin<"__builtin_ia32_pmovmskb128">,
+              Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse2_maskmov_dqu : GCCBuiltin<"__builtin_ia32_maskmovdqu">,
+              Intrinsic<[], [llvm_v16i8_ty,
+                         llvm_v16i8_ty, llvm_ptr_ty], []>;
+  def int_x86_sse2_clflush : GCCBuiltin<"__builtin_ia32_clflush">,
+              Intrinsic<[], [llvm_ptr_ty], []>;
+  def int_x86_sse2_lfence : GCCBuiltin<"__builtin_ia32_lfence">,
+              Intrinsic<[], [], []>;
+  def int_x86_sse2_mfence : GCCBuiltin<"__builtin_ia32_mfence">,
+              Intrinsic<[], [], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE3
+
+// Addition / subtraction ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse3_addsub_ps : GCCBuiltin<"__builtin_ia32_addsubps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse3_addsub_pd : GCCBuiltin<"__builtin_ia32_addsubpd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+}
+
+// Horizontal ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse3_hadd_ps : GCCBuiltin<"__builtin_ia32_haddps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse3_hadd_pd : GCCBuiltin<"__builtin_ia32_haddpd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_sse3_hsub_ps : GCCBuiltin<"__builtin_ia32_hsubps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_sse3_hsub_pd : GCCBuiltin<"__builtin_ia32_hsubpd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_v2f64_ty], [IntrNoMem]>;
+}
+
+// Specialized unaligned load.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse3_ldu_dq : GCCBuiltin<"__builtin_ia32_lddqu">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
+}
+
+// Thread synchronization ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse3_monitor : GCCBuiltin<"__builtin_ia32_monitor">,
+              Intrinsic<[], [llvm_ptr_ty,
+                         llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_sse3_mwait : GCCBuiltin<"__builtin_ia32_mwait">,
+              Intrinsic<[], [llvm_i32_ty,
+                         llvm_i32_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSSE3
+
+// Horizontal arithmetic ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_ssse3_phadd_w         : GCCBuiltin<"__builtin_ia32_phaddw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_phadd_w_128     : GCCBuiltin<"__builtin_ia32_phaddw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_phadd_d         : GCCBuiltin<"__builtin_ia32_phaddd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_phadd_d_128     : GCCBuiltin<"__builtin_ia32_phaddd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_phadd_sw        : GCCBuiltin<"__builtin_ia32_phaddsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_phadd_sw_128    : GCCBuiltin<"__builtin_ia32_phaddsw128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_phsub_w         : GCCBuiltin<"__builtin_ia32_phsubw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_phsub_w_128     : GCCBuiltin<"__builtin_ia32_phsubw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_phsub_d         : GCCBuiltin<"__builtin_ia32_phsubd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_phsub_d_128     : GCCBuiltin<"__builtin_ia32_phsubd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_phsub_sw        : GCCBuiltin<"__builtin_ia32_phsubsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_phsub_sw_128    : GCCBuiltin<"__builtin_ia32_phsubsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_pmadd_ub_sw     : GCCBuiltin<"__builtin_ia32_pmaddubsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_pmadd_ub_sw_128 : GCCBuiltin<"__builtin_ia32_pmaddubsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+}
+
+// Packed multiply high with round and scale
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_ssse3_pmul_hr_sw      : GCCBuiltin<"__builtin_ia32_pmulhrsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_ssse3_pmul_hr_sw_128  : GCCBuiltin<"__builtin_ia32_pmulhrsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem, Commutative]>;
+}
+
+// Shuffle ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_ssse3_pshuf_b         : GCCBuiltin<"__builtin_ia32_pshufb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_pshuf_b_128     : GCCBuiltin<"__builtin_ia32_pshufb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+  def int_x86_sse_pshuf_w           : GCCBuiltin<"__builtin_ia32_pshufw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i8_ty],
+                         [IntrNoMem]>;
+}
+
+// Sign ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_ssse3_psign_b         : GCCBuiltin<"__builtin_ia32_psignb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_psign_b_128     : GCCBuiltin<"__builtin_ia32_psignb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
+                         llvm_v16i8_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_psign_w         : GCCBuiltin<"__builtin_ia32_psignw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_psign_w_128     : GCCBuiltin<"__builtin_ia32_psignw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_psign_d         : GCCBuiltin<"__builtin_ia32_psignd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_psign_d_128     : GCCBuiltin<"__builtin_ia32_psignd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+}
+
+// Absolute value ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_ssse3_pabs_b     : GCCBuiltin<"__builtin_ia32_pabsb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_pabs_b_128 : GCCBuiltin<"__builtin_ia32_pabsb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_pabs_w     : GCCBuiltin<"__builtin_ia32_pabsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_pabs_w_128 : GCCBuiltin<"__builtin_ia32_pabsw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
+
+  def int_x86_ssse3_pabs_d     : GCCBuiltin<"__builtin_ia32_pabsd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_ssse3_pabs_d_128 : GCCBuiltin<"__builtin_ia32_pabsd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.1
+
+// FP rounding ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_round_ss        : GCCBuiltin<"__builtin_ia32_roundss">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse41_round_ps        : GCCBuiltin<"__builtin_ia32_roundps">,
+              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse41_round_sd        : GCCBuiltin<"__builtin_ia32_roundsd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_sse41_round_pd        : GCCBuiltin<"__builtin_ia32_roundpd">,
+              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+}
+
+// Vector sign and zero extend
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_pmovsxbd        : GCCBuiltin<"__builtin_ia32_pmovsxbd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovsxbq        : GCCBuiltin<"__builtin_ia32_pmovsxbq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovsxbw        : GCCBuiltin<"__builtin_ia32_pmovsxbw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovsxdq        : GCCBuiltin<"__builtin_ia32_pmovsxdq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovsxwd        : GCCBuiltin<"__builtin_ia32_pmovsxwd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovsxwq        : GCCBuiltin<"__builtin_ia32_pmovsxwq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovzxbd        : GCCBuiltin<"__builtin_ia32_pmovzxbd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovzxbq        : GCCBuiltin<"__builtin_ia32_pmovzxbq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovzxbw        : GCCBuiltin<"__builtin_ia32_pmovzxbw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovzxdq        : GCCBuiltin<"__builtin_ia32_pmovzxdq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovzxwd        : GCCBuiltin<"__builtin_ia32_pmovzxwd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmovzxwq        : GCCBuiltin<"__builtin_ia32_pmovzxwq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+}
+
+// Vector min element
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_phminposuw     : GCCBuiltin<"__builtin_ia32_phminposuw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty],
+                        [IntrNoMem]>;
+}
+
+// Vector compare, min, max
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_pcmpeqq         : GCCBuiltin<"__builtin_ia32_pcmpeqq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem, Commutative]>;
+  def int_x86_sse42_pcmpgtq         : GCCBuiltin<"__builtin_ia32_pcmpgtq">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pmaxsb          : GCCBuiltin<"__builtin_ia32_pmaxsb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem, Commutative]>;
+  def int_x86_sse41_pmaxsd          : GCCBuiltin<"__builtin_ia32_pmaxsd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem, Commutative]>;
+  def int_x86_sse41_pmaxud          : GCCBuiltin<"__builtin_ia32_pmaxud128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem, Commutative]>;
+  def int_x86_sse41_pmaxuw          : GCCBuiltin<"__builtin_ia32_pmaxuw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem, Commutative]>;
+  def int_x86_sse41_pminsb          : GCCBuiltin<"__builtin_ia32_pminsb128">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem, Commutative]>;
+  def int_x86_sse41_pminsd          : GCCBuiltin<"__builtin_ia32_pminsd128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem, Commutative]>;
+  def int_x86_sse41_pminud          : GCCBuiltin<"__builtin_ia32_pminud128">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem, Commutative]>;
+  def int_x86_sse41_pminuw          : GCCBuiltin<"__builtin_ia32_pminuw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem, Commutative]>;
+}
+
+// Advanced Encryption Standard (AES) Instructions
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_aesni_aesimc          : GCCBuiltin<"__builtin_ia32_aesimc128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_aesni_aesenc          : GCCBuiltin<"__builtin_ia32_aesenc128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_aesni_aesenclast : GCCBuiltin<"__builtin_ia32_aesenclast128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_aesni_aesdec          : GCCBuiltin<"__builtin_ia32_aesdec128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_aesni_aesdeclast : GCCBuiltin<"__builtin_ia32_aesdeclast128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
+                        [IntrNoMem]>;
+  def int_x86_aesni_aeskeygenassist :
+              GCCBuiltin<"__builtin_ia32_aeskeygenassist128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i8_ty],
+                        [IntrNoMem]>;
+}
+
+// Vector pack
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_packusdw        : GCCBuiltin<"__builtin_ia32_packusdw128">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+}
+
+// Vector multiply
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_pmuldq          : GCCBuiltin<"__builtin_ia32_pmuldq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem, Commutative]>;
+}
+
+// Vector extract
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_pextrb         :
+              Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pextrd         :
+              Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_pextrq         :
+              Intrinsic<[llvm_i64_ty], [llvm_v2i64_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_sse41_extractps      : GCCBuiltin<"__builtin_ia32_extractps128">,
+              Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty],
+                        [IntrNoMem]>;
+}
+
+// Vector insert
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_insertps       : GCCBuiltin<"__builtin_ia32_insertps128">,
+          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty],
+                    [IntrNoMem]>;
+}
+
+// Vector blend
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_pblendvb         : GCCBuiltin<"__builtin_ia32_pblendvb128">,
+        Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_v16i8_ty],
+                  [IntrNoMem]>;
+  def int_x86_sse41_pblendw          : GCCBuiltin<"__builtin_ia32_pblendw128">,
+        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty],
+                  [IntrNoMem]>;
+  def int_x86_sse41_blendpd          : GCCBuiltin<"__builtin_ia32_blendpd">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty],
+                  [IntrNoMem]>;
+  def int_x86_sse41_blendps          : GCCBuiltin<"__builtin_ia32_blendps">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty],
+                  [IntrNoMem]>;
+  def int_x86_sse41_blendvpd         : GCCBuiltin<"__builtin_ia32_blendvpd">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_v2f64_ty],
+                  [IntrNoMem]>;
+  def int_x86_sse41_blendvps         : GCCBuiltin<"__builtin_ia32_blendvps">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_v4f32_ty],
+                  [IntrNoMem]>;
+}
+
+// Vector dot product
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_dppd            : GCCBuiltin<"__builtin_ia32_dppd">,
+          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,llvm_i32_ty],
+                    [IntrNoMem, Commutative]>;
+  def int_x86_sse41_dpps            : GCCBuiltin<"__builtin_ia32_dpps">,
+          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,llvm_i32_ty],
+                    [IntrNoMem, Commutative]>;
+}
+
+// Vector sum of absolute differences
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_mpsadbw         : GCCBuiltin<"__builtin_ia32_mpsadbw128">,
+          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,llvm_i32_ty],
+                    [IntrNoMem, Commutative]>;
+}
+
+// Cacheability support ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_movntdqa        : GCCBuiltin<"__builtin_ia32_movntdqa">,
+          Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
+}
+
+// Test instruction with bitwise comparison.
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+  def int_x86_sse41_ptestz          : GCCBuiltin<"__builtin_ia32_ptestz128">,
+          Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse41_ptestc          : GCCBuiltin<"__builtin_ia32_ptestc128">,
+          Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse41_ptestnzc        : GCCBuiltin<"__builtin_ia32_ptestnzc128">,
+          Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
+                    [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.2
+
+// Miscellaneous
+// CRC Instruction
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+  def int_x86_sse42_crc32_8         : GCCBuiltin<"__builtin_ia32_crc32qi">,
+          Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse42_crc32_16         : GCCBuiltin<"__builtin_ia32_crc32hi">,
+          Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i16_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse42_crc32_32         : GCCBuiltin<"__builtin_ia32_crc32si">,
+          Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse42_crc64_8         :
+          Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i8_ty],
+                    [IntrNoMem]>;
+  def int_x86_sse42_crc64_64         : GCCBuiltin<"__builtin_ia32_crc32di">,
+          Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+                    [IntrNoMem]>;
+}
+
+// String/text processing ops.
+let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
+  def int_x86_sse42_pcmpistrm128  : GCCBuiltin<"__builtin_ia32_pcmpistrm128">,
+	  Intrinsic<[llvm_v16i8_ty],
+		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpistri128  : GCCBuiltin<"__builtin_ia32_pcmpistri128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpistria128 : GCCBuiltin<"__builtin_ia32_pcmpistria128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpistric128 : GCCBuiltin<"__builtin_ia32_pcmpistric128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpistrio128 : GCCBuiltin<"__builtin_ia32_pcmpistrio128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpistris128 : GCCBuiltin<"__builtin_ia32_pcmpistris128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpistriz128 : GCCBuiltin<"__builtin_ia32_pcmpistriz128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpestrm128  : GCCBuiltin<"__builtin_ia32_pcmpestrm128">,
+	  Intrinsic<[llvm_v16i8_ty],
+		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+		     llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpestri128  : GCCBuiltin<"__builtin_ia32_pcmpestri128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+		     llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpestria128 : GCCBuiltin<"__builtin_ia32_pcmpestria128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+		     llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpestric128 : GCCBuiltin<"__builtin_ia32_pcmpestric128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+		     llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpestrio128 : GCCBuiltin<"__builtin_ia32_pcmpestrio128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+		     llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpestris128 : GCCBuiltin<"__builtin_ia32_pcmpestris128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+		     llvm_i8_ty],
+		    [IntrNoMem]>;
+  def int_x86_sse42_pcmpestriz128 : GCCBuiltin<"__builtin_ia32_pcmpestriz128">,
+	  Intrinsic<[llvm_i32_ty],
+		    [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty,
+		     llvm_i8_ty],
+		    [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// AVX
+
+// Arithmetic ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_addsub_pd_256 : GCCBuiltin<"__builtin_ia32_addsubpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_addsub_ps_256 : GCCBuiltin<"__builtin_ia32_addsubps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx_max_pd_256 : GCCBuiltin<"__builtin_ia32_maxpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_max_ps_256 : GCCBuiltin<"__builtin_ia32_maxps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx_min_pd_256 : GCCBuiltin<"__builtin_ia32_minpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_min_ps_256 : GCCBuiltin<"__builtin_ia32_minps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty], [IntrNoMem]>;
+
+  def int_x86_avx_sqrt_pd_256 : GCCBuiltin<"__builtin_ia32_sqrtpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_sqrt_ps_256 : GCCBuiltin<"__builtin_ia32_sqrtps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+
+  def int_x86_avx_rsqrt_ps_256 : GCCBuiltin<"__builtin_ia32_rsqrtps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+
+  def int_x86_avx_rcp_ps_256 : GCCBuiltin<"__builtin_ia32_rcpps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+
+  def int_x86_avx_round_pd_256 : GCCBuiltin<"__builtin_ia32_roundpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx_round_ps_256 : GCCBuiltin<"__builtin_ia32_roundps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_i32_ty], [IntrNoMem]>;
+}
+
+// Horizontal ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_hadd_pd_256 : GCCBuiltin<"__builtin_ia32_haddpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_hsub_ps_256 : GCCBuiltin<"__builtin_ia32_hsubps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx_hsub_pd_256 : GCCBuiltin<"__builtin_ia32_hsubpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_hadd_ps_256 : GCCBuiltin<"__builtin_ia32_haddps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty], [IntrNoMem]>;
+}
+
+// Vector permutation
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_vpermilvar_pd : GCCBuiltin<"__builtin_ia32_vpermilvarpd">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                  llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx_vpermilvar_ps : GCCBuiltin<"__builtin_ia32_vpermilvarps">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                  llvm_v4i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx_vpermilvar_pd_256 :
+        GCCBuiltin<"__builtin_ia32_vpermilvarpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4i64_ty], [IntrNoMem]>;
+  def int_x86_avx_vpermilvar_ps_256 :
+        GCCBuiltin<"__builtin_ia32_vpermilvarps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx_vperm2f128_pd_256 :
+        GCCBuiltin<"__builtin_ia32_vperm2f128_pd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx_vperm2f128_ps_256 :
+        GCCBuiltin<"__builtin_ia32_vperm2f128_ps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx_vperm2f128_si_256 :
+        GCCBuiltin<"__builtin_ia32_vperm2f128_si256">,
+        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                  llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx_vpermil_pd : GCCBuiltin<"__builtin_ia32_vpermilpd">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
+                  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx_vpermil_ps : GCCBuiltin<"__builtin_ia32_vpermilps">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
+                  llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx_vpermil_pd_256 : GCCBuiltin<"__builtin_ia32_vpermilpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx_vpermil_ps_256 : GCCBuiltin<"__builtin_ia32_vpermilps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_i8_ty], [IntrNoMem]>;
+}
+
+// Vector blend
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_blend_pd_256 : GCCBuiltin<"__builtin_ia32_blendpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx_blend_ps_256 : GCCBuiltin<"__builtin_ia32_blendps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx_blendv_pd_256 : GCCBuiltin<"__builtin_ia32_blendvpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty, llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_blendv_ps_256 : GCCBuiltin<"__builtin_ia32_blendvps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty, llvm_v8f32_ty], [IntrNoMem]>;
+}
+
+// Vector dot product
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty, llvm_i32_ty], [IntrNoMem]>;
+}
+
+// Vector compare
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_cmp_pd_256 : GCCBuiltin<"__builtin_ia32_cmppd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx_cmp_ps_256 : GCCBuiltin<"__builtin_ia32_cmpps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+}
+
+// Vector extract and insert
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_vextractf128_pd_256 :
+        GCCBuiltin<"__builtin_ia32_vextractf128_pd256">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx_vextractf128_ps_256 :
+        GCCBuiltin<"__builtin_ia32_vextractf128_ps256">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx_vextractf128_si_256 :
+        GCCBuiltin<"__builtin_ia32_vextractf128_si256">,
+        Intrinsic<[llvm_v4i32_ty], [llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx_vinsertf128_pd_256 :
+        GCCBuiltin<"__builtin_ia32_vinsertf128_pd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty,
+                  llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx_vinsertf128_ps_256 :
+        GCCBuiltin<"__builtin_ia32_vinsertf128_ps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
+                  llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx_vinsertf128_si_256 :
+        GCCBuiltin<"__builtin_ia32_vinsertf128_si256">,
+        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+                  llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+}
+
+// Vector convert
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_cvtdq2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtdq2pd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx_cvtdq2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtdq2ps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
+  def int_x86_avx_cvt_pd2_ps_256 : GCCBuiltin<"__builtin_ia32_cvtpd2ps256">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_cvt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvtps2dq256">,
+        Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx_cvt_ps2_pd_256 : GCCBuiltin<"__builtin_ia32_cvtps2pd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_avx_cvtt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvttpd2dq256">,
+        Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_cvt_pd2dq_256 : GCCBuiltin<"__builtin_ia32_cvtpd2dq256">,
+        Intrinsic<[llvm_v4i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_cvtt_ps2dq_256 : GCCBuiltin<"__builtin_ia32_cvttps2dq256">,
+        Intrinsic<[llvm_v8i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+}
+
+// Vector bit test
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_vtestz_pd : GCCBuiltin<"__builtin_ia32_vtestzpd">,
+        Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                  llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestc_pd : GCCBuiltin<"__builtin_ia32_vtestcpd">,
+        Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                  llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestnzc_pd : GCCBuiltin<"__builtin_ia32_vtestnzcpd">,
+        Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty,
+                  llvm_v2f64_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestz_ps : GCCBuiltin<"__builtin_ia32_vtestzps">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                  llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestc_ps : GCCBuiltin<"__builtin_ia32_vtestcps">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                  llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestnzc_ps : GCCBuiltin<"__builtin_ia32_vtestnzcps">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty,
+                  llvm_v4f32_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestz_pd_256 : GCCBuiltin<"__builtin_ia32_vtestzpd256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestc_pd_256 : GCCBuiltin<"__builtin_ia32_vtestcpd256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestnzc_pd_256 : GCCBuiltin<"__builtin_ia32_vtestnzcpd256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty,
+                  llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestz_ps_256 : GCCBuiltin<"__builtin_ia32_vtestzps256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestc_ps_256 : GCCBuiltin<"__builtin_ia32_vtestcps256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx_vtestnzc_ps_256 : GCCBuiltin<"__builtin_ia32_vtestnzcps256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty,
+                  llvm_v8f32_ty], [IntrNoMem]>;
+  def int_x86_avx_ptestz_256 : GCCBuiltin<"__builtin_ia32_ptestz256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
+                  llvm_v4i64_ty], [IntrNoMem]>;
+  def int_x86_avx_ptestc_256 : GCCBuiltin<"__builtin_ia32_ptestc256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
+                  llvm_v4i64_ty], [IntrNoMem]>;
+  def int_x86_avx_ptestnzc_256 : GCCBuiltin<"__builtin_ia32_ptestnzc256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
+                  llvm_v4i64_ty], [IntrNoMem]>;
+}
+
+// Vector extract sign mask
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_movmsk_pd_256 : GCCBuiltin<"__builtin_ia32_movmskpd256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v4f64_ty], [IntrNoMem]>;
+  def int_x86_avx_movmsk_ps_256 : GCCBuiltin<"__builtin_ia32_movmskps256">,
+        Intrinsic<[llvm_i32_ty], [llvm_v8f32_ty], [IntrNoMem]>;
+}
+
+// Vector zero
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_vzeroall : GCCBuiltin<"__builtin_ia32_vzeroall">,
+        Intrinsic<[], [], []>;
+  def int_x86_avx_vzeroupper : GCCBuiltin<"__builtin_ia32_vzeroupper">,
+        Intrinsic<[], [], []>;
+}
+
+// Vector load with broadcast
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_vbroadcastss :
+        GCCBuiltin<"__builtin_ia32_vbroadcastss">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_x86_avx_vbroadcast_sd_256 :
+        GCCBuiltin<"__builtin_ia32_vbroadcastsd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_x86_avx_vbroadcastss_256 :
+        GCCBuiltin<"__builtin_ia32_vbroadcastss256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_x86_avx_vbroadcastf128_pd_256 :
+        GCCBuiltin<"__builtin_ia32_vbroadcastf128_pd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_x86_avx_vbroadcastf128_ps_256 :
+        GCCBuiltin<"__builtin_ia32_vbroadcastf128_ps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+}
+
+// SIMD load ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_loadu_pd_256 : GCCBuiltin<"__builtin_ia32_loadupd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_x86_avx_loadu_ps_256 : GCCBuiltin<"__builtin_ia32_loadups256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_x86_avx_loadu_dq_256 : GCCBuiltin<"__builtin_ia32_loaddqu256">,
+        Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
+  def int_x86_avx_ldu_dq_256 : GCCBuiltin<"__builtin_ia32_lddqu256">,
+        Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty], [IntrReadMem]>;
+}
+
+// SIMD store ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_storeu_pd_256 : GCCBuiltin<"__builtin_ia32_storeupd256">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty], []>;
+  def int_x86_avx_storeu_ps_256 : GCCBuiltin<"__builtin_ia32_storeups256">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty], []>;
+  def int_x86_avx_storeu_dq_256 : GCCBuiltin<"__builtin_ia32_storedqu256">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty], []>;
+}
+
+// Cacheability support ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_movnt_dq_256 : GCCBuiltin<"__builtin_ia32_movntdq256">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty], []>;
+  def int_x86_avx_movnt_pd_256 : GCCBuiltin<"__builtin_ia32_movntpd256">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty], []>;
+  def int_x86_avx_movnt_ps_256 : GCCBuiltin<"__builtin_ia32_movntps256">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty], []>;
+}
+
+// Conditional load ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_maskload_pd : GCCBuiltin<"__builtin_ia32_maskloadpd">,
+        Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty, llvm_v2f64_ty], [IntrReadMem]>;
+  def int_x86_avx_maskload_ps : GCCBuiltin<"__builtin_ia32_maskloadps">,
+        Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty, llvm_v4f32_ty], [IntrReadMem]>;
+  def int_x86_avx_maskload_pd_256 : GCCBuiltin<"__builtin_ia32_maskloadpd256">,
+        Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty, llvm_v4f64_ty], [IntrReadMem]>;
+  def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">,
+        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8f32_ty], [IntrReadMem]>;
+}
+
+// Conditional store ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx_maskstore_pd : GCCBuiltin<"__builtin_ia32_maskstorepd">,
+        Intrinsic<[], [llvm_ptr_ty,
+                  llvm_v2f64_ty, llvm_v2f64_ty], []>;
+  def int_x86_avx_maskstore_ps : GCCBuiltin<"__builtin_ia32_maskstoreps">,
+        Intrinsic<[], [llvm_ptr_ty,
+                  llvm_v4f32_ty, llvm_v4f32_ty], []>;
+  def int_x86_avx_maskstore_pd_256 :
+        GCCBuiltin<"__builtin_ia32_maskstorepd256">,
+        Intrinsic<[], [llvm_ptr_ty,
+                  llvm_v4f64_ty, llvm_v4f64_ty], []>;
+  def int_x86_avx_maskstore_ps_256 :
+        GCCBuiltin<"__builtin_ia32_maskstoreps256">,
+        Intrinsic<[], [llvm_ptr_ty,
+                  llvm_v8f32_ty, llvm_v8f32_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// MMX
+
+// Empty MMX state op.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_mmx_emms  : GCCBuiltin<"__builtin_ia32_emms">,
+              Intrinsic<[], [], []>;
+  def int_x86_mmx_femms : GCCBuiltin<"__builtin_ia32_femms">,
+              Intrinsic<[], [], []>;
+}
+
+// Integer arithmetic ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  // Addition
+  def int_x86_mmx_padd_b : GCCBuiltin<"__builtin_ia32_paddb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_padd_w : GCCBuiltin<"__builtin_ia32_paddw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_padd_d : GCCBuiltin<"__builtin_ia32_paddd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_padd_q : GCCBuiltin<"__builtin_ia32_paddq">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_mmx_padds_b : GCCBuiltin<"__builtin_ia32_paddsb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_padds_w : GCCBuiltin<"__builtin_ia32_paddsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+
+  def int_x86_mmx_paddus_b : GCCBuiltin<"__builtin_ia32_paddusb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_paddus_w : GCCBuiltin<"__builtin_ia32_paddusw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+
+  // Subtraction
+  def int_x86_mmx_psub_b : GCCBuiltin<"__builtin_ia32_psubb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_psub_w : GCCBuiltin<"__builtin_ia32_psubw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_psub_d : GCCBuiltin<"__builtin_ia32_psubd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_psub_q : GCCBuiltin<"__builtin_ia32_psubq">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+
+  def int_x86_mmx_psubs_b : GCCBuiltin<"__builtin_ia32_psubsb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_psubs_w : GCCBuiltin<"__builtin_ia32_psubsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_psubus_b : GCCBuiltin<"__builtin_ia32_psubusb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_psubus_w : GCCBuiltin<"__builtin_ia32_psubusw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+
+  // Multiplication
+  def int_x86_mmx_pmulh_w : GCCBuiltin<"__builtin_ia32_pmulhw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_pmull_w : GCCBuiltin<"__builtin_ia32_pmullw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_pmulu_dq : GCCBuiltin<"__builtin_ia32_pmuludq">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_pmadd_wd : GCCBuiltin<"__builtin_ia32_pmaddwd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+
+  // Bitwise operations
+  def int_x86_mmx_pand : GCCBuiltin<"__builtin_ia32_pand">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_pandn : GCCBuiltin<"__builtin_ia32_pandn">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_por : GCCBuiltin<"__builtin_ia32_por">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_pxor : GCCBuiltin<"__builtin_ia32_pxor">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+
+  // Averages
+  def int_x86_mmx_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_pavg_w : GCCBuiltin<"__builtin_ia32_pavgw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+
+  // Maximum
+  def int_x86_mmx_pmaxu_b : GCCBuiltin<"__builtin_ia32_pmaxub">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_pmaxs_w : GCCBuiltin<"__builtin_ia32_pmaxsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+
+  // Minimum
+  def int_x86_mmx_pminu_b : GCCBuiltin<"__builtin_ia32_pminub">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+  def int_x86_mmx_pmins_w : GCCBuiltin<"__builtin_ia32_pminsw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+
+  // Packed sum of absolute differences
+  def int_x86_mmx_psad_bw : GCCBuiltin<"__builtin_ia32_psadbw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem, Commutative]>;
+}
+
+// Integer shift ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  // Shift left logical
+  def int_x86_mmx_psll_w : GCCBuiltin<"__builtin_ia32_psllw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_psll_d : GCCBuiltin<"__builtin_ia32_pslld">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_psll_q : GCCBuiltin<"__builtin_ia32_psllq">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_psrl_w : GCCBuiltin<"__builtin_ia32_psrlw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_psrl_d : GCCBuiltin<"__builtin_ia32_psrld">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_psra_w : GCCBuiltin<"__builtin_ia32_psraw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_psra_d : GCCBuiltin<"__builtin_ia32_psrad">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_pslli_w : GCCBuiltin<"__builtin_ia32_psllwi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_mmx_pslli_d : GCCBuiltin<"__builtin_ia32_pslldi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_mmx_pslli_q : GCCBuiltin<"__builtin_ia32_psllqi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_psrli_w : GCCBuiltin<"__builtin_ia32_psrlwi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_mmx_psrli_d : GCCBuiltin<"__builtin_ia32_psrldi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_mmx_psrli_q : GCCBuiltin<"__builtin_ia32_psrlqi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_psrai_w : GCCBuiltin<"__builtin_ia32_psrawi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_mmx_psrai_d : GCCBuiltin<"__builtin_ia32_psradi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+}
+
+// Pack ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_mmx_packsswb : GCCBuiltin<"__builtin_ia32_packsswb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_packssdw : GCCBuiltin<"__builtin_ia32_packssdw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_packuswb : GCCBuiltin<"__builtin_ia32_packuswb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+}
+
+// Unpacking ops.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_mmx_punpckhbw : GCCBuiltin<"__builtin_ia32_punpckhbw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_punpckhwd : GCCBuiltin<"__builtin_ia32_punpckhwd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_punpckhdq : GCCBuiltin<"__builtin_ia32_punpckhdq">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_punpcklbw : GCCBuiltin<"__builtin_ia32_punpcklbw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_punpcklwd : GCCBuiltin<"__builtin_ia32_punpcklwd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+  def int_x86_mmx_punpckldq : GCCBuiltin<"__builtin_ia32_punpckldq">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
+                        [IntrNoMem]>;
+}
+
+// Integer comparison ops
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_mmx_pcmpeq_b : GCCBuiltin<"__builtin_ia32_pcmpeqb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_pcmpeq_w : GCCBuiltin<"__builtin_ia32_pcmpeqw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_pcmpeq_d : GCCBuiltin<"__builtin_ia32_pcmpeqd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_pcmpgt_b : GCCBuiltin<"__builtin_ia32_pcmpgtb">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_pcmpgt_w : GCCBuiltin<"__builtin_ia32_pcmpgtw">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+  def int_x86_mmx_pcmpgt_d : GCCBuiltin<"__builtin_ia32_pcmpgtd">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
+                         llvm_x86mmx_ty], [IntrNoMem]>;
+}
+
+// Misc.
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_mmx_maskmovq : GCCBuiltin<"__builtin_ia32_maskmovq">,
+              Intrinsic<[], [llvm_x86mmx_ty, llvm_x86mmx_ty, llvm_ptr_ty], []>;
+
+  def int_x86_mmx_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb">,
+              Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_movnt_dq : GCCBuiltin<"__builtin_ia32_movntq">,
+              Intrinsic<[], [llvm_ptrx86mmx_ty, llvm_x86mmx_ty], []>;
+
+  def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, 
+                        llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_mmx_pextr_w : GCCBuiltin<"__builtin_ia32_vec_ext_v4hi">,
+              Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty, llvm_i32_ty], 
+                        [IntrNoMem]>;
+
+  def int_x86_mmx_pinsr_w : GCCBuiltin<"__builtin_ia32_vec_set_v4hi">,
+              Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, 
+                        llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+}
diff --git a/final/include/llvm/IntrinsicsXCore.td b/final/include/llvm/IntrinsicsXCore.td
new file mode 100644
index 00000000000..944120fc8c6
--- /dev/null
+++ b/final/include/llvm/IntrinsicsXCore.td
@@ -0,0 +1,55 @@
+//==- IntrinsicsXCore.td - XCore intrinsics                 -*- tablegen -*-==//
+// 
+// Copyright (C) 2008 XMOS
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the XCore-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "xcore" in {  // All intrinsics start with "llvm.xcore.".
+  def int_xcore_bitrev : Intrinsic<[llvm_i32_ty],[llvm_i32_ty],[IntrNoMem]>;
+  def int_xcore_getid : Intrinsic<[llvm_i32_ty],[],[IntrNoMem]>;
+
+  // Resource instructions.
+  def int_xcore_getr : Intrinsic<[llvm_anyptr_ty],[llvm_i32_ty]>;
+  def int_xcore_freer : Intrinsic<[],[llvm_anyptr_ty],
+                                   [NoCapture<0>]>;
+  def int_xcore_in : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty],[NoCapture<0>]>;
+  def int_xcore_int : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty],
+                                [NoCapture<0>]>;
+  def int_xcore_inct : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty],
+                                 [NoCapture<0>]>;
+  def int_xcore_out : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty],
+                                [NoCapture<0>]>;
+  def int_xcore_outt : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty],
+                                 [NoCapture<0>]>;
+  def int_xcore_outct : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty],
+                                  [NoCapture<0>]>;
+  def int_xcore_chkct : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty],
+                                  [NoCapture<0>]>;
+  def int_xcore_setd : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty],
+                                  [NoCapture<0>]>;
+  def int_xcore_setc : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty],
+                                  [NoCapture<0>]>;
+  def int_xcore_inshr : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty, llvm_i32_ty],
+                                  [NoCapture<0>]>;
+  def int_xcore_outshr : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty, llvm_i32_ty],
+                                  [NoCapture<0>]>;
+  def int_xcore_setpt : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty],
+                                  [NoCapture<0>]>;
+  def int_xcore_getts : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty],
+                                  [NoCapture<0>]>;
+  def int_xcore_syncr : Intrinsic<[],[llvm_anyptr_ty],
+                                  [NoCapture<0>]>;
+  def int_xcore_settw : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty],
+                                  [NoCapture<0>]>;
+  def int_xcore_setv : Intrinsic<[],[llvm_anyptr_ty, llvm_ptr_ty],
+                                 [NoCapture<0>]>;
+  def int_xcore_eeu : Intrinsic<[],[llvm_anyptr_ty], [NoCapture<0>]>;
+
+  // Intrinsics for events.
+  def int_xcore_waitevent : Intrinsic<[llvm_ptr_ty],[], [IntrReadMem]>;
+  def int_xcore_clre : Intrinsic<[],[],[]>;
+}
diff --git a/final/include/llvm/LLVMContext.h b/final/include/llvm/LLVMContext.h
new file mode 100644
index 00000000000..3502ff73c19
--- /dev/null
+++ b/final/include/llvm/LLVMContext.h
@@ -0,0 +1,107 @@
+//===-- llvm/LLVMContext.h - Class for managing "global" state --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares LLVMContext, a container of "global" state in LLVM, such
+// as the global type and constant uniquing tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LLVMCONTEXT_H
+#define LLVM_LLVMCONTEXT_H
+
+namespace llvm {
+
+class LLVMContextImpl;
+class StringRef;
+class Instruction;
+class Module;
+class SMDiagnostic;
+template <typename T> class SmallVectorImpl;
+
+/// This is an important class for using LLVM in a threaded context.  It
+/// (opaquely) owns and manages the core "global" data of LLVM's core 
+/// infrastructure, including the type and constant uniquing tables.
+/// LLVMContext itself provides no locking guarantees, so you should be careful
+/// to have one context per thread.
+class LLVMContext {
+public:
+  LLVMContextImpl *const pImpl;
+  LLVMContext();
+  ~LLVMContext();
+  
+  // Pinned metadata names, which always have the same value.  This is a
+  // compile-time performance optimization, not a correctness optimization.
+  enum {
+    MD_dbg = 0,  // "dbg"
+    MD_tbaa = 1  // "tbaa"
+  };
+  
+  /// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
+  /// This ID is uniqued across modules in the current LLVMContext.
+  unsigned getMDKindID(StringRef Name) const;
+  
+  /// getMDKindNames - Populate client supplied SmallVector with the name for
+  /// custom metadata IDs registered in this LLVMContext.
+  void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;
+  
+  
+  typedef void (*InlineAsmDiagHandlerTy)(const SMDiagnostic&, void *Context,
+                                         unsigned LocCookie);
+  
+  /// setInlineAsmDiagnosticHandler - This method sets a handler that is invoked
+  /// when problems with inline asm are detected by the backend.  The first
+  /// argument is a function pointer and the second is a context pointer that
+  /// gets passed into the DiagHandler.
+  ///
+  /// LLVMContext doesn't take ownership or interpret either of these
+  /// pointers.
+  void setInlineAsmDiagnosticHandler(InlineAsmDiagHandlerTy DiagHandler,
+                                     void *DiagContext = 0);
+
+  /// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by
+  /// setInlineAsmDiagnosticHandler.
+  InlineAsmDiagHandlerTy getInlineAsmDiagnosticHandler() const;
+
+  /// getInlineAsmDiagnosticContext - Return the diagnostic context set by
+  /// setInlineAsmDiagnosticHandler.
+  void *getInlineAsmDiagnosticContext() const;
+  
+  
+  /// emitError - Emit an error message to the currently installed error handler
+  /// with optional location information.  This function returns, so code should
+  /// be prepared to drop the erroneous construct on the floor and "not crash".
+  /// The generated code need not be correct.  The error message will be
+  /// implicitly prefixed with "error: " and should not end with a ".".
+  void emitError(unsigned LocCookie, StringRef ErrorStr);
+  void emitError(const Instruction *I, StringRef ErrorStr);
+  void emitError(StringRef ErrorStr);
+
+private:
+  // DO NOT IMPLEMENT
+  LLVMContext(LLVMContext&);
+  void operator=(LLVMContext&);
+
+  /// addModule - Register a module as being instantiated in this context.  If
+  /// the context is deleted, the module will be deleted as well.
+  void addModule(Module*);
+  
+  /// removeModule - Unregister a module from this context.
+  void removeModule(Module*);
+  
+  // Module needs access to the add/removeModule methods.
+  friend class Module;
+};
+
+/// getGlobalContext - Returns a global context.  This is for LLVM clients that
+/// only care about operating on a single thread.
+extern LLVMContext &getGlobalContext();
+
+}
+
+#endif
diff --git a/final/include/llvm/LinkAllPasses.h b/final/include/llvm/LinkAllPasses.h
new file mode 100644
index 00000000000..1769ba1ef4e
--- /dev/null
+++ b/final/include/llvm/LinkAllPasses.h
@@ -0,0 +1,165 @@
+//===- llvm/LinkAllPasses.h ------------ Reference All Passes ---*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file pulls in all transformation and analysis passes for tools
+// like opt and bugpoint that need this functionality.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LINKALLPASSES_H
+#define LLVM_LINKALLPASSES_H
+
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/DomPrinter.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Analysis/IntervalPartition.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/RegionPrinter.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/Lint.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include <cstdlib>
+
+namespace {
+  struct ForcePassLinking {
+    ForcePassLinking() {
+      // We must reference the passes in such a way that compilers will not
+      // delete it all as dead code, even with whole program optimization,
+      // yet is effectively a NO-OP. As the compiler isn't smart enough
+      // to know that getenv() never returns -1, this will do the job.
+      if (std::getenv("bar") != (char*) -1)
+        return;
+
+      (void) llvm::createAAEvalPass();
+      (void) llvm::createAggressiveDCEPass();
+      (void) llvm::createAliasAnalysisCounterPass();
+      (void) llvm::createAliasDebugger();
+      (void) llvm::createArgumentPromotionPass();
+      (void) llvm::createStructRetPromotionPass();
+      (void) llvm::createBasicAliasAnalysisPass();
+      (void) llvm::createLibCallAliasAnalysisPass(0);
+      (void) llvm::createScalarEvolutionAliasAnalysisPass();
+      (void) llvm::createTypeBasedAliasAnalysisPass();
+      (void) llvm::createBlockPlacementPass();
+      (void) llvm::createBreakCriticalEdgesPass();
+      (void) llvm::createCFGSimplificationPass();
+      (void) llvm::createConstantMergePass();
+      (void) llvm::createConstantPropagationPass();
+      (void) llvm::createDeadArgEliminationPass();
+      (void) llvm::createDeadCodeEliminationPass();
+      (void) llvm::createDeadInstEliminationPass();
+      (void) llvm::createDeadStoreEliminationPass();
+      (void) llvm::createDeadTypeEliminationPass();
+      (void) llvm::createDomOnlyPrinterPass();
+      (void) llvm::createDomPrinterPass();
+      (void) llvm::createDomOnlyViewerPass();
+      (void) llvm::createDomViewerPass();
+      (void) llvm::createEdgeProfilerPass();
+      (void) llvm::createOptimalEdgeProfilerPass();
+      (void) llvm::createPathProfilerPass();
+      (void) llvm::createFunctionInliningPass();
+      (void) llvm::createAlwaysInlinerPass();
+      (void) llvm::createGlobalDCEPass();
+      (void) llvm::createGlobalOptimizerPass();
+      (void) llvm::createGlobalsModRefPass();
+      (void) llvm::createIPConstantPropagationPass();
+      (void) llvm::createIPSCCPPass();
+      (void) llvm::createIndVarSimplifyPass();
+      (void) llvm::createInstructionCombiningPass();
+      (void) llvm::createInternalizePass(false);
+      (void) llvm::createLCSSAPass();
+      (void) llvm::createLICMPass();
+      (void) llvm::createLazyValueInfoPass();
+      (void) llvm::createLoopDependenceAnalysisPass();
+      (void) llvm::createLoopExtractorPass();
+      (void) llvm::createLoopSimplifyPass();
+      (void) llvm::createLoopStrengthReducePass();
+      (void) llvm::createLoopUnrollPass();
+      (void) llvm::createLoopUnswitchPass();
+      (void) llvm::createLoopIdiomPass();
+      (void) llvm::createLoopRotatePass();
+      (void) llvm::createLowerInvokePass();
+      (void) llvm::createLowerSetJmpPass();
+      (void) llvm::createLowerSwitchPass();
+      (void) llvm::createNoAAPass();
+      (void) llvm::createNoProfileInfoPass();
+      (void) llvm::createProfileEstimatorPass();
+      (void) llvm::createProfileVerifierPass();
+      (void) llvm::createPathProfileVerifierPass();
+      (void) llvm::createProfileLoaderPass();
+      (void) llvm::createPathProfileLoaderPass();
+      (void) llvm::createPromoteMemoryToRegisterPass();
+      (void) llvm::createDemoteRegisterToMemoryPass();
+      (void) llvm::createPruneEHPass();
+      (void) llvm::createPostDomOnlyPrinterPass();
+      (void) llvm::createPostDomPrinterPass();
+      (void) llvm::createPostDomOnlyViewerPass();
+      (void) llvm::createPostDomViewerPass();
+      (void) llvm::createReassociatePass();
+      (void) llvm::createRegionInfoPass();
+      (void) llvm::createRegionOnlyPrinterPass();
+      (void) llvm::createRegionOnlyViewerPass();
+      (void) llvm::createRegionPrinterPass();
+      (void) llvm::createRegionViewerPass();
+      (void) llvm::createSCCPPass();
+      (void) llvm::createScalarReplAggregatesPass();
+      (void) llvm::createSimplifyLibCallsPass();
+      (void) llvm::createSingleLoopExtractorPass();
+      (void) llvm::createStripSymbolsPass();
+      (void) llvm::createStripNonDebugSymbolsPass();
+      (void) llvm::createStripDeadDebugInfoPass();
+      (void) llvm::createStripDeadPrototypesPass();
+      (void) llvm::createTailCallEliminationPass();
+      (void) llvm::createTailDuplicationPass();
+      (void) llvm::createJumpThreadingPass();
+      (void) llvm::createUnifyFunctionExitNodesPass();
+      (void) llvm::createInstCountPass();
+      (void) llvm::createCodeGenPreparePass();
+      (void) llvm::createEarlyCSEPass();
+      (void) llvm::createGVNPass();
+      (void) llvm::createMemCpyOptPass();
+      (void) llvm::createLoopDeletionPass();
+      (void) llvm::createPostDomTree();
+      (void) llvm::createPostDomFrontier();
+      (void) llvm::createInstructionNamerPass();
+      (void) llvm::createFunctionAttrsPass();
+      (void) llvm::createMergeFunctionsPass();
+      (void) llvm::createPrintModulePass(0);
+      (void) llvm::createPrintFunctionPass("", 0);
+      (void) llvm::createDbgInfoPrinterPass();
+      (void) llvm::createModuleDebugInfoPrinterPass();
+      (void) llvm::createPartialInliningPass();
+      (void) llvm::createLintPass();
+      (void) llvm::createSinkingPass();
+      (void) llvm::createLowerAtomicPass();
+      (void) llvm::createCorrelatedValuePropagationPass();
+      (void) llvm::createMemDepPrinter();
+      (void) llvm::createInstructionSimplifierPass();
+
+      (void)new llvm::IntervalPartition();
+      (void)new llvm::FindUsedTypes();
+      (void)new llvm::ScalarEvolution();
+      ((llvm::Function*)0)->viewCFGOnly();
+      llvm::RGPassManager RGM(0);
+      ((llvm::RegionPass*)0)->runOnRegion((llvm::Region*)0, RGM);
+      llvm::AliasSetTracker X(*(llvm::AliasAnalysis*)0);
+      X.add((llvm::Value*)0, 0, 0);  // for -print-alias-sets
+    }
+  } ForcePassLinking; // Force link by creating a global definition.
+}
+
+#endif
diff --git a/final/include/llvm/LinkAllVMCore.h b/final/include/llvm/LinkAllVMCore.h
new file mode 100644
index 00000000000..83684c0fb65
--- /dev/null
+++ b/final/include/llvm/LinkAllVMCore.h
@@ -0,0 +1,53 @@
+//===- LinkAllVMCore.h - Reference All VMCore Code --------------*- C++ -*-===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file pulls in all the object modules of the VMCore library so
+// that tools like llc, opt, and lli can ensure they are linked with all symbols
+// from libVMCore.a It should only be used from a tool's main program.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LINKALLVMCORE_H
+#define LLVM_LINKALLVMCORE_H
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/TimeValue.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstdlib>
+
+namespace {
+  struct ForceVMCoreLinking {
+    ForceVMCoreLinking() {
+      // We must reference VMCore in such a way that compilers will not
+      // delete it all as dead code, even with whole program optimization,
+      // yet is effectively a NO-OP. As the compiler isn't smart enough
+      // to know that getenv() never returns -1, this will do the job.
+      if (std::getenv("bar") != (char*) -1)
+        return;
+      (void)new llvm::Module("", llvm::getGlobalContext());
+      (void)new llvm::UnreachableInst(llvm::getGlobalContext());
+      (void)    llvm::createVerifierPass(); 
+    }
+  } ForceVMCoreLinking;
+}
+
+#endif
diff --git a/final/include/llvm/Linker.h b/final/include/llvm/Linker.h
new file mode 100644
index 00000000000..b402a6090e2
--- /dev/null
+++ b/final/include/llvm/Linker.h
@@ -0,0 +1,299 @@
+//===- llvm/Linker.h - Module Linker Interface ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface to the module/file/archive linker.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LINKER_H
+#define LLVM_LINKER_H
+
+#include <memory>
+#include <vector>
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+  namespace sys { class Path; }
+
+class Module;
+class LLVMContext;
+
+/// This class provides the core functionality of linking in LLVM. It retains a
+/// Module object which is the composite of the modules and libraries linked
+/// into it. The composite Module can be retrieved via the getModule() method.
+/// In this case the Linker still retains ownership of the Module. If the
+/// releaseModule() method is used, the ownership of the Module is transferred
+/// to the caller and the Linker object is only suitable for destruction.
+/// The Linker can link Modules from memory, bitcode files, or bitcode
+/// archives.  It retains a set of search paths in which to find any libraries
+/// presented to it. By default, the linker will generate error and warning
+/// messages to stderr but this capability can be turned off with the
+/// QuietWarnings and QuietErrors flags. It can also be instructed to verbosely
+/// print out the linking actions it is taking with the Verbose flag.
+/// @brief The LLVM Linker.
+class Linker {
+
+  /// @name Types
+  /// @{
+  public:
+    /// This type is used to pass the linkage items (libraries and files) to
+    /// the LinkItems function. It is composed of string/bool pairs. The string
+    /// provides the name of the file or library (as with the -l option). The
+    /// bool should be true for libraries and false for files, signifying
+    /// "isLibrary".
+    /// @brief A list of linkage items
+    typedef std::vector<std::pair<std::string,bool> > ItemList;
+
+    /// This enumeration is used to control various optional features of the
+    /// linker.
+    enum ControlFlags {
+      Verbose       = 1, ///< Print to stderr what steps the linker is taking
+      QuietWarnings = 2, ///< Don't print warnings to stderr.
+      QuietErrors   = 4  ///< Don't print errors to stderr.
+    };
+
+  /// @}
+  /// @name Constructors
+  /// @{
+  public:
+    /// Construct the Linker with an empty module which will be given the
+    /// name \p progname. \p progname will also be used for error messages.
+    /// @brief Construct with empty module
+    Linker(StringRef progname, ///< name of tool running linker
+           StringRef modulename, ///< name of linker's end-result module
+           LLVMContext &C, ///< Context for global info
+           unsigned Flags = 0  ///< ControlFlags (one or more |'d together)
+    );
+
+    /// Construct the Linker with a previously defined module, \p aModule. Use
+    /// \p progname for the name of the program in error messages.
+    /// @brief Construct with existing module
+    Linker(StringRef progname, Module* aModule, unsigned Flags = 0);
+
+    /// Destruct the Linker.
+    /// @brief Destructor
+    ~Linker();
+
+  /// @}
+  /// @name Accessors
+  /// @{
+  public:
+    /// This method gets the composite module into which linking is being
+    /// done. The Composite module starts out empty and accumulates modules
+    /// linked into it via the various LinkIn* methods. This method does not
+    /// release the Module to the caller. The Linker retains ownership and will
+    /// destruct the Module when the Linker is destructed.
+    /// @see releaseModule
+    /// @brief Get the linked/composite module.
+    Module* getModule() const { return Composite; }
+
+    /// This method releases the composite Module into which linking is being
+    /// done. Ownership of the composite Module is transferred to the caller who
+    /// must arrange for its destruct. After this method is called, the Linker
+    /// terminates the linking session for the returned Module. It will no
+    /// longer utilize the returned Module but instead resets itself for
+    /// subsequent linking as if the constructor had been called. The Linker's
+    /// LibPaths and flags to be reset, and memory will be released.
+    /// @brief Release the linked/composite module.
+    Module* releaseModule();
+
+    /// This method gets the list of libraries that form the path that the
+    /// Linker will search when it is presented with a library name.
+    /// @brief Get the Linkers library path
+    const std::vector<sys::Path>& getLibPaths() const { return LibPaths; }
+
+    /// This method returns an error string suitable for printing to the user.
+    /// The return value will be empty unless an error occurred in one of the
+    /// LinkIn* methods. In those cases, the LinkIn* methods will have returned
+    /// true, indicating an error occurred. At most one error is retained so
+    /// this function always returns the last error that occurred. Note that if
+    /// the Quiet control flag is not set, the error string will have already
+    /// been printed to stderr.
+    /// @brief Get the text of the last error that occurred.
+    const std::string &getLastError() const { return Error; }
+
+  /// @}
+  /// @name Mutators
+  /// @{
+  public:
+    /// Add a path to the list of paths that the Linker will search. The Linker
+    /// accumulates the set of libraries added
+    /// library paths for the target platform. The standard libraries will
+    /// always be searched last. The added libraries will be searched in the
+    /// order added.
+    /// @brief Add a path.
+    void addPath(const sys::Path& path);
+
+    /// Add a set of paths to the list of paths that the linker will search. The
+    /// Linker accumulates the set of libraries added. The \p paths will be
+    /// added to the end of the Linker's list. Order will be retained.
+    /// @brief Add a set of paths.
+    void addPaths(const std::vector<std::string>& paths);
+
+    /// This method augments the Linker's list of library paths with the system
+    /// paths of the host operating system, include LLVM_LIB_SEARCH_PATH.
+    /// @brief Add the system paths.
+    void addSystemPaths();
+
+    /// Control optional linker behavior by setting a group of flags. The flags
+    /// are defined in the ControlFlags enumeration.
+    /// @see ControlFlags
+    /// @brief Set control flags.
+    void setFlags(unsigned flags) { Flags = flags; }
+
+    /// This method is the main interface to the linker. It can be used to
+    /// link a set of linkage items into a module. A linkage item is either a
+    /// file name with fully qualified path, or a library for which the Linker's
+    /// LibraryPath will be utilized to locate the library. The bool value in
+    /// the LinkItemKind should be set to true for libraries.  This function
+    /// allows linking to preserve the order of specification associated with
+    /// the command line, or for other purposes. Each item will be linked in
+    /// turn as it occurs in \p Items.
+    /// @returns true if an error occurred, false otherwise
+    /// @see LinkItemKind
+    /// @see getLastError
+    bool LinkInItems (
+      const ItemList& Items, ///< Set of libraries/files to link in
+      ItemList& NativeItems  ///< Output list of native files/libs
+    );
+
+    /// This function links the bitcode \p Files into the composite module.
+    /// Note that this does not do any linking of unresolved symbols. The \p
+    /// Files are all completely linked into \p HeadModule regardless of
+    /// unresolved symbols. This function just loads each bitcode file and
+    /// calls LinkInModule on them.
+    /// @returns true if an error occurs, false otherwise
+    /// @see getLastError
+    /// @brief Link in multiple files.
+    bool LinkInFiles (
+      const std::vector<sys::Path> & Files ///< Files to link in
+    );
+
+    /// This function links a single bitcode file, \p File, into the composite
+    /// module. Note that this does not attempt to resolve symbols. This method
+    /// just loads the bitcode file and calls LinkInModule on it. If an error
+    /// occurs, the Linker's error string is set.
+    /// @returns true if an error occurs, false otherwise
+    /// @see getLastError
+    /// @brief Link in a single file.
+    bool LinkInFile(
+      const sys::Path& File, ///< File to link in.
+      bool &is_native        ///< Indicates if the file is native object file
+    );
+
+    /// This function provides a way to selectively link in a set of modules,
+    /// found in libraries, based on the unresolved symbols in the composite
+    /// module. Each item in \p Libraries should be the base name of a library,
+    /// as if given with the -l option of a linker tool.  The Linker's LibPaths
+    /// are searched for the \p Libraries and any found will be linked in with
+    /// LinkInArchive.  If an error occurs, the Linker's error string is set.
+    /// @see LinkInArchive
+    /// @see getLastError
+    /// @returns true if an error occurs, false otherwise
+    /// @brief Link libraries into the module
+    bool LinkInLibraries (
+      const std::vector<std::string> & Libraries ///< Libraries to link in
+    );
+
+    /// This function provides a way to selectively link in a set of modules,
+    /// found in one library, based on the unresolved symbols in the composite
+    /// module.The \p Library should be the base name of a library, as if given
+    /// with the -l option of a linker tool. The Linker's LibPaths are searched
+    /// for the \p Library and if found, it will be linked in with via the
+    /// LinkInArchive method. If an error occurs, the Linker's error string is
+    /// set.
+    /// @see LinkInArchive
+    /// @see getLastError
+    /// @returns true if an error occurs, false otherwise
+    /// @brief Link one library into the module
+    bool LinkInLibrary (
+      StringRef Library, ///< The library to link in
+      bool& is_native    ///< Indicates if lib a native library
+    );
+
+    /// This function links one bitcode archive, \p Filename, into the module.
+    /// The archive is searched to resolve outstanding symbols. Any modules in
+    /// the archive that resolve outstanding symbols will be linked in. The
+    /// library is searched repeatedly until no more modules that resolve
+    /// symbols can be found. If an error occurs, the error string is  set.
+    /// To speed up this function, ensure the archive has been processed
+    /// llvm-ranlib or the S option was given to llvm-ar when the archive was
+    /// created. These tools add a symbol table to the archive which makes the
+    /// search for undefined symbols much faster.
+    /// @see getLastError
+    /// @returns true if an error occurs, otherwise false.
+    /// @brief Link in one archive.
+    bool LinkInArchive(
+      const sys::Path& Filename, ///< Filename of the archive to link
+      bool& is_native            ///<  Indicates if archive is a native archive
+    );
+
+    /// This method links the \p Src module into the Linker's Composite module
+    /// by calling LinkModules.  All the other LinkIn* methods eventually
+    /// result in calling this method to link a Module into the Linker's
+    /// composite.
+    /// @see LinkModules
+    /// @returns True if an error occurs, false otherwise.
+    /// @brief Link in a module.
+    bool LinkInModule(
+      Module* Src,              ///< Module linked into \p Dest
+      std::string* ErrorMsg = 0 /// Error/diagnostic string
+    ) { 
+      return LinkModules(Composite, Src, ErrorMsg ); 
+    }
+
+    /// This is the heart of the linker. This method will take unconditional
+    /// control of the \p Src module and link it into the \p Dest module. The
+    /// \p Src module will be destructed or subsumed by this method. In either
+    /// case it is not usable by the caller after this method is invoked. Only
+    /// the \p Dest module will remain. The \p Src module is linked into the
+    /// Linker's composite module such that types, global variables, functions,
+    /// and etc. are matched and resolved.  If an error occurs, this function
+    /// returns true and ErrorMsg is set to a descriptive message about the
+    /// error.
+    /// @returns True if an error occurs, false otherwise.
+    /// @brief Generically link two modules together.
+    static bool LinkModules(Module* Dest, Module* Src, std::string* ErrorMsg);
+
+    /// This function looks through the Linker's LibPaths to find a library with
+    /// the name \p Filename. If the library cannot be found, the returned path
+    /// will be empty (i.e. sys::Path::isEmpty() will return true).
+    /// @returns A sys::Path to the found library
+    /// @brief Find a library from its short name.
+    sys::Path FindLib(StringRef Filename);
+
+  /// @}
+  /// @name Implementation
+  /// @{
+  private:
+    /// Read in and parse the bitcode file named by FN and return the
+    /// Module it contains (wrapped in an auto_ptr), or 0 if an error occurs.
+    std::auto_ptr<Module> LoadObject(const sys::Path& FN);
+
+    bool warning(StringRef message);
+    bool error(StringRef message);
+    void verbose(StringRef message);
+
+  /// @}
+  /// @name Data
+  /// @{
+  private:
+    LLVMContext& Context; ///< The context for global information
+    Module* Composite; ///< The composite module linked together
+    std::vector<sys::Path> LibPaths; ///< The library search paths
+    unsigned Flags;    ///< Flags to control optional behavior.
+    std::string Error; ///< Text of error that occurred.
+    std::string ProgramName; ///< Name of the program being linked
+  /// @}
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/MC/EDInstInfo.h b/final/include/llvm/MC/EDInstInfo.h
new file mode 100644
index 00000000000..83d9e780feb
--- /dev/null
+++ b/final/include/llvm/MC/EDInstInfo.h
@@ -0,0 +1,29 @@
+//===-- llvm/MC/EDInstInfo.h - EDis instruction info ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef EDINSTINFO_H
+#define EDINSTINFO_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+  
+#define EDIS_MAX_OPERANDS 13
+#define EDIS_MAX_SYNTAXES 2
+
+struct EDInstInfo {
+  uint8_t       instructionType;
+  uint8_t       numOperands;
+  uint8_t       operandTypes[EDIS_MAX_OPERANDS];
+  uint8_t       operandFlags[EDIS_MAX_OPERANDS];
+  const char    operandOrders[EDIS_MAX_SYNTAXES][EDIS_MAX_OPERANDS];
+};
+  
+} // namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCAsmInfo.h b/final/include/llvm/MC/MCAsmInfo.h
new file mode 100644
index 00000000000..7e24a3d1d3b
--- /dev/null
+++ b/final/include/llvm/MC/MCAsmInfo.h
@@ -0,0 +1,479 @@
+//===-- llvm/MC/MCAsmInfo.h - Asm info --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a class to be used as the basis for target specific
+// asm writers.  This class primarily takes care of global printing constants,
+// which are used in very similar ways across all targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ASM_INFO_H
+#define LLVM_TARGET_ASM_INFO_H
+
+#include "llvm/MC/MCDirectives.h"
+#include <cassert>
+
+namespace llvm {
+  class MCSection;
+  class MCContext;
+
+  /// MCAsmInfo - This class is intended to be used as a base class for asm
+  /// properties and features specific to the target.
+  namespace ExceptionHandling {
+    enum ExceptionsType { None, DwarfTable, DwarfCFI, SjLj, ARM };
+  }
+
+  class MCAsmInfo {
+  protected:
+    //===------------------------------------------------------------------===//
+    // Properties to be set by the target writer, used to configure asm printer.
+    //
+
+    /// HasSubsectionsViaSymbols - True if this target has the MachO
+    /// .subsections_via_symbols directive.
+    bool HasSubsectionsViaSymbols;           // Default is false.
+
+    /// HasMachoZeroFillDirective - True if this is a MachO target that supports
+    /// the macho-specific .zerofill directive for emitting BSS Symbols.
+    bool HasMachoZeroFillDirective;               // Default is false.
+
+    /// HasMachoTBSSDirective - True if this is a MachO target that supports
+    /// the macho-specific .tbss directive for emitting thread local BSS Symbols
+    bool HasMachoTBSSDirective;                 // Default is false.
+
+    /// HasStaticCtorDtorReferenceInStaticMode - True if the compiler should
+    /// emit a ".reference .constructors_used" or ".reference .destructors_used"
+    /// directive after the a static ctor/dtor list.  This directive is only
+    /// emitted in Static relocation model.
+    bool HasStaticCtorDtorReferenceInStaticMode;  // Default is false.
+
+    /// LinkerRequiresNonEmptyDwarfLines - True if the linker has a bug and
+    /// requires that the debug_line section be of a minimum size. In practice
+    /// such a linker requires a non empty line sequence if a file is present.
+    bool LinkerRequiresNonEmptyDwarfLines; // Default to false.
+
+    /// MaxInstLength - This is the maximum possible length of an instruction,
+    /// which is needed to compute the size of an inline asm.
+    unsigned MaxInstLength;                  // Defaults to 4.
+
+    /// PCSymbol - The symbol used to represent the current PC.  Used in PC
+    /// relative expressions.
+    const char *PCSymbol;                    // Defaults to "$".
+
+    /// SeparatorChar - This character, if specified, is used to separate
+    /// instructions from each other when on the same line.  This is used to
+    /// measure inline asm instructions.
+    char SeparatorChar;                      // Defaults to ';'
+
+    /// CommentColumn - This indicates the comment num (zero-based) at
+    /// which asm comments should be printed.
+    unsigned CommentColumn;                  // Defaults to 40
+
+    /// CommentString - This indicates the comment character used by the
+    /// assembler.
+    const char *CommentString;               // Defaults to "#"
+
+    /// LabelSuffix - This is appended to emitted labels.
+    const char *LabelSuffix;                 // Defaults to ":"
+
+    /// GlobalPrefix - If this is set to a non-empty string, it is prepended
+    /// onto all global symbols.  This is often used for "_" or ".".
+    const char *GlobalPrefix;                // Defaults to ""
+
+    /// PrivateGlobalPrefix - This prefix is used for globals like constant
+    /// pool entries that are completely private to the .s file and should not
+    /// have names in the .o file.  This is often "." or "L".
+    const char *PrivateGlobalPrefix;         // Defaults to "."
+
+    /// LinkerPrivateGlobalPrefix - This prefix is used for symbols that should
+    /// be passed through the assembler but be removed by the linker.  This
+    /// is "l" on Darwin, currently used for some ObjC metadata.
+    const char *LinkerPrivateGlobalPrefix;   // Defaults to ""
+
+    /// InlineAsmStart/End - If these are nonempty, they contain a directive to
+    /// emit before and after an inline assembly statement.
+    const char *InlineAsmStart;              // Defaults to "#APP\n"
+    const char *InlineAsmEnd;                // Defaults to "#NO_APP\n"
+
+    /// AssemblerDialect - Which dialect of an assembler variant to use.
+    unsigned AssemblerDialect;               // Defaults to 0
+
+    /// AllowQuotesInName - This is true if the assembler allows for complex
+    /// symbol names to be surrounded in quotes.  This defaults to false.
+    bool AllowQuotesInName;
+
+    /// AllowNameToStartWithDigit - This is true if the assembler allows symbol
+    /// names to start with a digit (e.g., "0x0021").  This defaults to false.
+    bool AllowNameToStartWithDigit;
+
+    /// AllowPeriodsInName - This is true if the assembler allows periods in
+    /// symbol names.  This defaults to true.
+    bool AllowPeriodsInName;
+
+    //===--- Data Emission Directives -------------------------------------===//
+
+    /// ZeroDirective - this should be set to the directive used to get some
+    /// number of zero bytes emitted to the current section.  Common cases are
+    /// "\t.zero\t" and "\t.space\t".  If this is set to null, the
+    /// Data*bitsDirective's will be used to emit zero bytes.
+    const char *ZeroDirective;               // Defaults to "\t.zero\t"
+
+    /// AsciiDirective - This directive allows emission of an ascii string with
+    /// the standard C escape characters embedded into it.
+    const char *AsciiDirective;              // Defaults to "\t.ascii\t"
+
+    /// AscizDirective - If not null, this allows for special handling of
+    /// zero terminated strings on this target.  This is commonly supported as
+    /// ".asciz".  If a target doesn't support this, it can be set to null.
+    const char *AscizDirective;              // Defaults to "\t.asciz\t"
+
+    /// DataDirectives - These directives are used to output some unit of
+    /// integer data to the current section.  If a data directive is set to
+    /// null, smaller data directives will be used to emit the large sizes.
+    const char *Data8bitsDirective;          // Defaults to "\t.byte\t"
+    const char *Data16bitsDirective;         // Defaults to "\t.short\t"
+    const char *Data32bitsDirective;         // Defaults to "\t.long\t"
+    const char *Data64bitsDirective;         // Defaults to "\t.quad\t"
+
+    /// GPRel32Directive - if non-null, a directive that is used to emit a word
+    /// which should be relocated as a 32-bit GP-relative offset, e.g. .gpword
+    /// on Mips or .gprel32 on Alpha.
+    const char *GPRel32Directive;            // Defaults to NULL.
+
+    /// getDataASDirective - Return the directive that should be used to emit
+    /// data of the specified size to the specified numeric address space.
+    virtual const char *getDataASDirective(unsigned Size, unsigned AS) const {
+      assert(AS != 0 && "Don't know the directives for default addr space");
+      return 0;
+    }
+
+    /// SunStyleELFSectionSwitchSyntax - This is true if this target uses "Sun
+    /// Style" syntax for section switching ("#alloc,#write" etc) instead of the
+    /// normal ELF syntax (,"a,w") in .section directives.
+    bool SunStyleELFSectionSwitchSyntax;     // Defaults to false.
+
+    /// UsesELFSectionDirectiveForBSS - This is true if this target uses ELF
+    /// '.section' directive before the '.bss' one. It's used for PPC/Linux
+    /// which doesn't support the '.bss' directive only.
+    bool UsesELFSectionDirectiveForBSS;      // Defaults to false.
+
+    /// HasMicrosoftFastStdCallMangling - True if this target uses microsoft
+    /// style mangling for functions with X86_StdCall/X86_FastCall calling
+    /// convention.
+    bool HasMicrosoftFastStdCallMangling;    // Defaults to false.
+
+    //===--- Alignment Information ----------------------------------------===//
+
+    /// AlignDirective - The directive used to emit round up to an alignment
+    /// boundary.
+    ///
+    const char *AlignDirective;              // Defaults to "\t.align\t"
+
+    /// AlignmentIsInBytes - If this is true (the default) then the asmprinter
+    /// emits ".align N" directives, where N is the number of bytes to align to.
+    /// Otherwise, it emits ".align log2(N)", e.g. 3 to align to an 8 byte
+    /// boundary.
+    bool AlignmentIsInBytes;                 // Defaults to true
+
+    /// TextAlignFillValue - If non-zero, this is used to fill the executable
+    /// space created as the result of a alignment directive.
+    unsigned TextAlignFillValue;             // Defaults to 0
+
+    //===--- Global Variable Emission Directives --------------------------===//
+
+    /// GlobalDirective - This is the directive used to declare a global entity.
+    ///
+    const char *GlobalDirective;             // Defaults to NULL.
+
+    /// ExternDirective - This is the directive used to declare external
+    /// globals.
+    ///
+    const char *ExternDirective;             // Defaults to NULL.
+
+    /// HasSetDirective - True if the assembler supports the .set directive.
+    bool HasSetDirective;                    // Defaults to true.
+
+    /// HasAggressiveSymbolFolding - False if the assembler requires that we use
+    /// Lc = a - b
+    /// .long Lc
+    /// instead of
+    /// .long a - b
+    bool HasAggressiveSymbolFolding;           // Defaults to true.
+
+    /// HasLCOMMDirective - This is true if the target supports the .lcomm
+    /// directive.
+    bool HasLCOMMDirective;                  // Defaults to false.
+
+    /// COMMDirectiveAlignmentIsInBytes - True is COMMDirective's optional
+    /// alignment is to be specified in bytes instead of log2(n).
+    bool COMMDirectiveAlignmentIsInBytes;    // Defaults to true;
+
+    /// HasDotTypeDotSizeDirective - True if the target has .type and .size
+    /// directives, this is true for most ELF targets.
+    bool HasDotTypeDotSizeDirective;         // Defaults to true.
+
+    /// HasSingleParameterDotFile - True if the target has a single parameter
+    /// .file directive, this is true for ELF targets.
+    bool HasSingleParameterDotFile;          // Defaults to true.
+
+    /// HasNoDeadStrip - True if this target supports the MachO .no_dead_strip
+    /// directive.
+    bool HasNoDeadStrip;                     // Defaults to false.
+
+    /// HasSymbolResolver - True if this target supports the MachO
+    /// .symbol_resolver directive.
+    bool HasSymbolResolver;                     // Defaults to false.
+
+    /// WeakRefDirective - This directive, if non-null, is used to declare a
+    /// global as being a weak undefined symbol.
+    const char *WeakRefDirective;            // Defaults to NULL.
+
+    /// WeakDefDirective - This directive, if non-null, is used to declare a
+    /// global as being a weak defined symbol.
+    const char *WeakDefDirective;            // Defaults to NULL.
+
+    /// LinkOnceDirective - This directive, if non-null is used to declare a
+    /// global as being a weak defined symbol.  This is used on cygwin/mingw.
+    const char *LinkOnceDirective;           // Defaults to NULL.
+
+    /// HiddenVisibilityAttr - This attribute, if not MCSA_Invalid, is used to
+    /// declare a symbol as having hidden visibility.
+    MCSymbolAttr HiddenVisibilityAttr;       // Defaults to MCSA_Hidden.
+
+    /// HiddenDeclarationVisibilityAttr - This attribute, if not MCSA_Invalid,
+    /// is used to declare an undefined symbol as having hidden visibility.
+    MCSymbolAttr HiddenDeclarationVisibilityAttr;   // Defaults to MCSA_Hidden.
+
+
+    /// ProtectedVisibilityAttr - This attribute, if not MCSA_Invalid, is used
+    /// to declare a symbol as having protected visibility.
+    MCSymbolAttr ProtectedVisibilityAttr;    // Defaults to MCSA_Protected
+
+    //===--- Dwarf Emission Directives -----------------------------------===//
+
+    /// HasLEB128 - True if target asm supports leb128 directives.
+    bool HasLEB128;                          // Defaults to false.
+
+    /// SupportsDebugInformation - True if target supports emission of debugging
+    /// information.
+    bool SupportsDebugInformation;           // Defaults to false.
+
+    /// SupportsExceptionHandling - True if target supports exception handling.
+    ExceptionHandling::ExceptionsType ExceptionsType; // Defaults to None
+
+    /// RequiresFrameSection - true if the Dwarf2 output needs a frame section
+    bool DwarfRequiresFrameSection;          // Defaults to true.
+
+    /// DwarfUsesInlineInfoSection - True if DwarfDebugInlineSection is used to
+    /// encode inline subroutine information.
+    bool DwarfUsesInlineInfoSection;         // Defaults to false.
+
+    /// DwarfSectionOffsetDirective - Special section offset directive.
+    const char* DwarfSectionOffsetDirective; // Defaults to NULL
+
+    /// DwarfUsesAbsoluteLabelForStmtList - True if DW_AT_stmt_list needs
+    /// absolute label instead of offset.
+    bool DwarfUsesAbsoluteLabelForStmtList;  // Defaults to true;
+
+    // DwarfUsesLabelOffsetDifference - True if Dwarf2 output can
+    // use EmitLabelOffsetDifference.
+    bool DwarfUsesLabelOffsetForRanges;
+
+    //===--- CBE Asm Translation Table -----------------------------------===//
+
+    const char *const *AsmTransCBE;          // Defaults to empty
+
+  public:
+    explicit MCAsmInfo();
+    virtual ~MCAsmInfo();
+
+    // FIXME: move these methods to DwarfPrinter when the JIT stops using them.
+    static unsigned getSLEB128Size(int Value);
+    static unsigned getULEB128Size(unsigned Value);
+
+    bool hasSubsectionsViaSymbols() const { return HasSubsectionsViaSymbols; }
+
+    // Data directive accessors.
+    //
+    const char *getData8bitsDirective(unsigned AS = 0) const {
+      return AS == 0 ? Data8bitsDirective : getDataASDirective(8, AS);
+    }
+    const char *getData16bitsDirective(unsigned AS = 0) const {
+      return AS == 0 ? Data16bitsDirective : getDataASDirective(16, AS);
+    }
+    const char *getData32bitsDirective(unsigned AS = 0) const {
+      return AS == 0 ? Data32bitsDirective : getDataASDirective(32, AS);
+    }
+    const char *getData64bitsDirective(unsigned AS = 0) const {
+      return AS == 0 ? Data64bitsDirective : getDataASDirective(64, AS);
+    }
+    const char *getGPRel32Directive() const { return GPRel32Directive; }
+
+    /// getNonexecutableStackSection - Targets can implement this method to
+    /// specify a section to switch to if the translation unit doesn't have any
+    /// trampolines that require an executable stack.
+    virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const{
+      return 0;
+    }
+
+    bool usesSunStyleELFSectionSwitchSyntax() const {
+      return SunStyleELFSectionSwitchSyntax;
+    }
+
+    bool usesELFSectionDirectiveForBSS() const {
+      return UsesELFSectionDirectiveForBSS;
+    }
+
+    bool hasMicrosoftFastStdCallMangling() const {
+      return HasMicrosoftFastStdCallMangling;
+    }
+
+    // Accessors.
+    //
+    bool hasMachoZeroFillDirective() const { return HasMachoZeroFillDirective; }
+    bool hasMachoTBSSDirective() const { return HasMachoTBSSDirective; }
+    bool hasStaticCtorDtorReferenceInStaticMode() const {
+      return HasStaticCtorDtorReferenceInStaticMode;
+    }
+    bool getLinkerRequiresNonEmptyDwarfLines() const {
+      return LinkerRequiresNonEmptyDwarfLines;
+    }
+    unsigned getMaxInstLength() const {
+      return MaxInstLength;
+    }
+    const char *getPCSymbol() const {
+      return PCSymbol;
+    }
+    char getSeparatorChar() const {
+      return SeparatorChar;
+    }
+    unsigned getCommentColumn() const {
+      return CommentColumn;
+    }
+    const char *getCommentString() const {
+      return CommentString;
+    }
+    const char *getLabelSuffix() const {
+      return LabelSuffix;
+    }
+    const char *getGlobalPrefix() const {
+      return GlobalPrefix;
+    }
+    const char *getPrivateGlobalPrefix() const {
+      return PrivateGlobalPrefix;
+    }
+    const char *getLinkerPrivateGlobalPrefix() const {
+      return LinkerPrivateGlobalPrefix;
+    }
+    const char *getInlineAsmStart() const {
+      return InlineAsmStart;
+    }
+    const char *getInlineAsmEnd() const {
+      return InlineAsmEnd;
+    }
+    unsigned getAssemblerDialect() const {
+      return AssemblerDialect;
+    }
+    bool doesAllowQuotesInName() const {
+      return AllowQuotesInName;
+    }
+    bool doesAllowNameToStartWithDigit() const {
+      return AllowNameToStartWithDigit;
+    }
+    bool doesAllowPeriodsInName() const {
+      return AllowPeriodsInName;
+    }
+    const char *getZeroDirective() const {
+      return ZeroDirective;
+    }
+    const char *getAsciiDirective() const {
+      return AsciiDirective;
+    }
+    const char *getAscizDirective() const {
+      return AscizDirective;
+    }
+    const char *getAlignDirective() const {
+      return AlignDirective;
+    }
+    bool getAlignmentIsInBytes() const {
+      return AlignmentIsInBytes;
+    }
+    unsigned getTextAlignFillValue() const {
+      return TextAlignFillValue;
+    }
+    const char *getGlobalDirective() const {
+      return GlobalDirective;
+    }
+    const char *getExternDirective() const {
+      return ExternDirective;
+    }
+    bool hasSetDirective() const { return HasSetDirective; }
+    bool hasAggressiveSymbolFolding() const {
+      return HasAggressiveSymbolFolding;
+    }
+    bool hasLCOMMDirective() const { return HasLCOMMDirective; }
+    bool hasDotTypeDotSizeDirective() const {return HasDotTypeDotSizeDirective;}
+    bool getCOMMDirectiveAlignmentIsInBytes() const {
+      return COMMDirectiveAlignmentIsInBytes;
+    }
+    bool hasSingleParameterDotFile() const { return HasSingleParameterDotFile; }
+    bool hasNoDeadStrip() const { return HasNoDeadStrip; }
+    bool hasSymbolResolver() const { return HasSymbolResolver; }
+    const char *getWeakRefDirective() const { return WeakRefDirective; }
+    const char *getWeakDefDirective() const { return WeakDefDirective; }
+    const char *getLinkOnceDirective() const { return LinkOnceDirective; }
+
+    MCSymbolAttr getHiddenVisibilityAttr() const { return HiddenVisibilityAttr;}
+    MCSymbolAttr getHiddenDeclarationVisibilityAttr() const {
+      return HiddenDeclarationVisibilityAttr;
+    }
+    MCSymbolAttr getProtectedVisibilityAttr() const {
+      return ProtectedVisibilityAttr;
+    }
+    bool hasLEB128() const {
+      return HasLEB128;
+    }
+    bool doesSupportDebugInformation() const {
+      return SupportsDebugInformation;
+    }
+    bool doesSupportExceptionHandling() const {
+      return ExceptionsType != ExceptionHandling::None;
+    }
+    ExceptionHandling::ExceptionsType getExceptionHandlingType() const {
+      return ExceptionsType;
+    }
+    bool isExceptionHandlingDwarf() const {
+      return
+        (ExceptionsType == ExceptionHandling::DwarfTable ||
+         ExceptionsType == ExceptionHandling::DwarfCFI ||
+         ExceptionsType == ExceptionHandling::ARM);
+    }
+
+    bool doesDwarfRequireFrameSection() const {
+      return DwarfRequiresFrameSection;
+    }
+    bool doesDwarfUsesInlineInfoSection() const {
+      return DwarfUsesInlineInfoSection;
+    }
+    const char *getDwarfSectionOffsetDirective() const {
+      return DwarfSectionOffsetDirective;
+    }
+    bool doesDwarfUsesAbsoluteLabelForStmtList() const {
+      return DwarfUsesAbsoluteLabelForStmtList;
+    }
+    bool doesDwarfUsesLabelOffsetForRanges() const {
+      return DwarfUsesLabelOffsetForRanges;
+    }
+    const char *const *getAsmCBE() const {
+      return AsmTransCBE;
+    }
+  };
+}
+
+#endif
diff --git a/final/include/llvm/MC/MCAsmInfoCOFF.h b/final/include/llvm/MC/MCAsmInfoCOFF.h
new file mode 100644
index 00000000000..a3ee1593c3a
--- /dev/null
+++ b/final/include/llvm/MC/MCAsmInfoCOFF.h
@@ -0,0 +1,24 @@
+//===-- MCAsmInfoCOFF.h - COFF asm properties -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_COFF_TARGET_ASM_INFO_H
+#define LLVM_COFF_TARGET_ASM_INFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class MCAsmInfoCOFF : public MCAsmInfo {
+  protected:
+    explicit MCAsmInfoCOFF();
+      
+  };
+}
+
+
+#endif // LLVM_COFF_TARGET_ASM_INFO_H
diff --git a/final/include/llvm/MC/MCAsmInfoDarwin.h b/final/include/llvm/MC/MCAsmInfoDarwin.h
new file mode 100644
index 00000000000..c85aa3da957
--- /dev/null
+++ b/final/include/llvm/MC/MCAsmInfoDarwin.h
@@ -0,0 +1,32 @@
+//===---- MCAsmInfoDarwin.h - Darwin asm properties -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on Darwin-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DARWIN_TARGET_ASM_INFO_H
+#define LLVM_DARWIN_TARGET_ASM_INFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class GlobalValue;
+  class GlobalVariable;
+  class Type;
+  class Mangler;
+
+  struct MCAsmInfoDarwin : public MCAsmInfo {
+    explicit MCAsmInfoDarwin();
+  };
+}
+
+
+#endif // LLVM_DARWIN_TARGET_ASM_INFO_H
diff --git a/final/include/llvm/MC/MCAsmLayout.h b/final/include/llvm/MC/MCAsmLayout.h
new file mode 100644
index 00000000000..01cb0006b36
--- /dev/null
+++ b/final/include/llvm/MC/MCAsmLayout.h
@@ -0,0 +1,104 @@
+//===- MCAsmLayout.h - Assembly Layout Object -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCASMLAYOUT_H
+#define LLVM_MC_MCASMLAYOUT_H
+
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+class MCAssembler;
+class MCFragment;
+class MCSectionData;
+class MCSymbolData;
+
+/// Encapsulates the layout of an assembly file at a particular point in time.
+///
+/// Assembly may requiring compute multiple layouts for a particular assembly
+/// file as part of the relaxation process. This class encapsulates the layout
+/// at a single point in time in such a way that it is always possible to
+/// efficiently compute the exact addresses of any symbol in the assembly file,
+/// even during the relaxation process.
+class MCAsmLayout {
+public:
+  typedef llvm::SmallVectorImpl<MCSectionData*>::const_iterator const_iterator;
+  typedef llvm::SmallVectorImpl<MCSectionData*>::iterator iterator;
+
+private:
+  MCAssembler &Assembler;
+
+  /// List of sections in layout order.
+  llvm::SmallVector<MCSectionData*, 16> SectionOrder;
+
+  /// The last fragment which was layed out, or 0 if nothing has been layed
+  /// out. Fragments are always layed out in order, so all fragments with a
+  /// lower ordinal will be up to date.
+  mutable DenseMap<const MCSectionData*, MCFragment *> LastValidFragment;
+
+  /// \brief Make sure that the layout for the given fragment is valid, lazily
+  /// computing it if necessary.
+  void EnsureValid(const MCFragment *F) const;
+
+  bool isFragmentUpToDate(const MCFragment *F) const;
+
+public:
+  MCAsmLayout(MCAssembler &_Assembler);
+
+  /// Get the assembler object this is a layout for.
+  MCAssembler &getAssembler() const { return Assembler; }
+
+  /// \brief Invalidate all following fragments because a fragment has been
+  /// resized. The fragments size should have already been updated.
+  void Invalidate(MCFragment *F);
+
+  /// \brief Perform layout for a single fragment, assuming that the previous
+  /// fragment has already been layed out correctly, and the parent section has
+  /// been initialized.
+  void LayoutFragment(MCFragment *Fragment);
+
+  /// @name Section Access (in layout order)
+  /// @{
+
+  llvm::SmallVectorImpl<MCSectionData*> &getSectionOrder() {
+    return SectionOrder;
+  }
+  const llvm::SmallVectorImpl<MCSectionData*> &getSectionOrder() const {
+    return SectionOrder;
+  }
+
+  /// @}
+  /// @name Fragment Layout Data
+  /// @{
+
+  /// \brief Get the offset of the given fragment inside its containing section.
+  uint64_t getFragmentOffset(const MCFragment *F) const;
+
+  /// @}
+  /// @name Utility Functions
+  /// @{
+
+  /// \brief Get the address space size of the given section, as it effects
+  /// layout. This may differ from the size reported by \see getSectionSize() by
+  /// not including section tail padding.
+  uint64_t getSectionAddressSize(const MCSectionData *SD) const;
+
+  /// \brief Get the data size of the given section, as emitted to the object
+  /// file. This may include additional padding, or be 0 for virtual sections.
+  uint64_t getSectionFileSize(const MCSectionData *SD) const;
+
+  /// \brief Get the offset of the given symbol, as computed in the current
+  /// layout.
+  uint64_t getSymbolOffset(const MCSymbolData *SD) const;
+
+  /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCAssembler.h b/final/include/llvm/MC/MCAssembler.h
new file mode 100644
index 00000000000..30971c62a97
--- /dev/null
+++ b/final/include/llvm/MC/MCAssembler.h
@@ -0,0 +1,916 @@
+//===- MCAssembler.h - Object File Generation -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCASSEMBLER_H
+#define LLVM_MC_MCASSEMBLER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/DataTypes.h"
+#include <vector> // FIXME: Shouldn't be needed.
+
+namespace llvm {
+class raw_ostream;
+class MCAsmLayout;
+class MCAssembler;
+class MCBinaryExpr;
+class MCContext;
+class MCCodeEmitter;
+class MCExpr;
+class MCFragment;
+class MCObjectWriter;
+class MCSection;
+class MCSectionData;
+class MCSymbol;
+class MCSymbolData;
+class MCValue;
+class TargetAsmBackend;
+
+class MCFragment : public ilist_node<MCFragment> {
+  friend class MCAsmLayout;
+
+  MCFragment(const MCFragment&);     // DO NOT IMPLEMENT
+  void operator=(const MCFragment&); // DO NOT IMPLEMENT
+
+public:
+  enum FragmentType {
+    FT_Align,
+    FT_Data,
+    FT_Fill,
+    FT_Inst,
+    FT_Org,
+    FT_Dwarf,
+    FT_DwarfFrame,
+    FT_LEB
+  };
+
+private:
+  FragmentType Kind;
+
+  /// Parent - The data for the section this fragment is in.
+  MCSectionData *Parent;
+
+  /// Atom - The atom this fragment is in, as represented by it's defining
+  /// symbol. Atom's are only used by backends which set
+  /// \see MCAsmBackend::hasReliableSymbolDifference().
+  MCSymbolData *Atom;
+
+  /// @name Assembler Backend Data
+  /// @{
+  //
+  // FIXME: This could all be kept private to the assembler implementation.
+
+  /// Offset - The offset of this fragment in its section. This is ~0 until
+  /// initialized.
+  uint64_t Offset;
+
+  /// LayoutOrder - The layout order of this fragment.
+  unsigned LayoutOrder;
+
+  /// @}
+
+protected:
+  MCFragment(FragmentType _Kind, MCSectionData *_Parent = 0);
+
+public:
+  // Only for sentinel.
+  MCFragment();
+  virtual ~MCFragment();
+
+  FragmentType getKind() const { return Kind; }
+
+  MCSectionData *getParent() const { return Parent; }
+  void setParent(MCSectionData *Value) { Parent = Value; }
+
+  MCSymbolData *getAtom() const { return Atom; }
+  void setAtom(MCSymbolData *Value) { Atom = Value; }
+
+  unsigned getLayoutOrder() const { return LayoutOrder; }
+  void setLayoutOrder(unsigned Value) { LayoutOrder = Value; }
+
+  static bool classof(const MCFragment *O) { return true; }
+
+  void dump();
+};
+
+class MCDataFragment : public MCFragment {
+  SmallString<32> Contents;
+
+  /// Fixups - The list of fixups in this fragment.
+  std::vector<MCFixup> Fixups;
+
+public:
+  typedef std::vector<MCFixup>::const_iterator const_fixup_iterator;
+  typedef std::vector<MCFixup>::iterator fixup_iterator;
+
+public:
+  MCDataFragment(MCSectionData *SD = 0) : MCFragment(FT_Data, SD) {}
+
+  /// @name Accessors
+  /// @{
+
+  SmallString<32> &getContents() { return Contents; }
+  const SmallString<32> &getContents() const { return Contents; }
+
+  /// @}
+  /// @name Fixup Access
+  /// @{
+
+  void addFixup(MCFixup Fixup) {
+    // Enforce invariant that fixups are in offset order.
+    assert((Fixups.empty() || Fixup.getOffset() > Fixups.back().getOffset()) &&
+           "Fixups must be added in order!");
+    Fixups.push_back(Fixup);
+  }
+
+  std::vector<MCFixup> &getFixups() { return Fixups; }
+  const std::vector<MCFixup> &getFixups() const { return Fixups; }
+
+  fixup_iterator fixup_begin() { return Fixups.begin(); }
+  const_fixup_iterator fixup_begin() const { return Fixups.begin(); }
+
+  fixup_iterator fixup_end() {return Fixups.end();}
+  const_fixup_iterator fixup_end() const {return Fixups.end();}
+
+  size_t fixup_size() const { return Fixups.size(); }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_Data;
+  }
+  static bool classof(const MCDataFragment *) { return true; }
+};
+
+// FIXME: This current incarnation of MCInstFragment doesn't make much sense, as
+// it is almost entirely a duplicate of MCDataFragment. If we decide to stick
+// with this approach (as opposed to making MCInstFragment a very light weight
+// object with just the MCInst and a code size, then we should just change
+// MCDataFragment to have an optional MCInst at its end.
+class MCInstFragment : public MCFragment {
+  /// Inst - The instruction this is a fragment for.
+  MCInst Inst;
+
+  /// Code - Binary data for the currently encoded instruction.
+  SmallString<8> Code;
+
+  /// Fixups - The list of fixups in this fragment.
+  SmallVector<MCFixup, 1> Fixups;
+
+public:
+  typedef SmallVectorImpl<MCFixup>::const_iterator const_fixup_iterator;
+  typedef SmallVectorImpl<MCFixup>::iterator fixup_iterator;
+
+public:
+  MCInstFragment(MCInst _Inst, MCSectionData *SD = 0)
+    : MCFragment(FT_Inst, SD), Inst(_Inst) {
+  }
+
+  /// @name Accessors
+  /// @{
+
+  SmallVectorImpl<char> &getCode() { return Code; }
+  const SmallVectorImpl<char> &getCode() const { return Code; }
+
+  unsigned getInstSize() const { return Code.size(); }
+
+  MCInst &getInst() { return Inst; }
+  const MCInst &getInst() const { return Inst; }
+
+  void setInst(MCInst Value) { Inst = Value; }
+
+  /// @}
+  /// @name Fixup Access
+  /// @{
+
+  SmallVectorImpl<MCFixup> &getFixups() { return Fixups; }
+  const SmallVectorImpl<MCFixup> &getFixups() const { return Fixups; }
+
+  fixup_iterator fixup_begin() { return Fixups.begin(); }
+  const_fixup_iterator fixup_begin() const { return Fixups.begin(); }
+
+  fixup_iterator fixup_end() {return Fixups.end();}
+  const_fixup_iterator fixup_end() const {return Fixups.end();}
+
+  size_t fixup_size() const { return Fixups.size(); }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_Inst;
+  }
+  static bool classof(const MCInstFragment *) { return true; }
+};
+
+class MCAlignFragment : public MCFragment {
+  /// Alignment - The alignment to ensure, in bytes.
+  unsigned Alignment;
+
+  /// Value - Value to use for filling padding bytes.
+  int64_t Value;
+
+  /// ValueSize - The size of the integer (in bytes) of \arg Value.
+  unsigned ValueSize;
+
+  /// MaxBytesToEmit - The maximum number of bytes to emit; if the alignment
+  /// cannot be satisfied in this width then this fragment is ignored.
+  unsigned MaxBytesToEmit;
+
+  /// EmitNops - Flag to indicate that (optimal) NOPs should be emitted instead
+  /// of using the provided value. The exact interpretation of this flag is
+  /// target dependent.
+  bool EmitNops : 1;
+
+public:
+  MCAlignFragment(unsigned _Alignment, int64_t _Value, unsigned _ValueSize,
+                  unsigned _MaxBytesToEmit, MCSectionData *SD = 0)
+    : MCFragment(FT_Align, SD), Alignment(_Alignment),
+      Value(_Value),ValueSize(_ValueSize),
+      MaxBytesToEmit(_MaxBytesToEmit), EmitNops(false) {}
+
+  /// @name Accessors
+  /// @{
+
+  unsigned getAlignment() const { return Alignment; }
+
+  int64_t getValue() const { return Value; }
+
+  unsigned getValueSize() const { return ValueSize; }
+
+  unsigned getMaxBytesToEmit() const { return MaxBytesToEmit; }
+
+  bool hasEmitNops() const { return EmitNops; }
+  void setEmitNops(bool Value) { EmitNops = Value; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_Align;
+  }
+  static bool classof(const MCAlignFragment *) { return true; }
+};
+
+class MCFillFragment : public MCFragment {
+  /// Value - Value to use for filling bytes.
+  int64_t Value;
+
+  /// ValueSize - The size (in bytes) of \arg Value to use when filling, or 0 if
+  /// this is a virtual fill fragment.
+  unsigned ValueSize;
+
+  /// Size - The number of bytes to insert.
+  uint64_t Size;
+
+public:
+  MCFillFragment(int64_t _Value, unsigned _ValueSize, uint64_t _Size,
+                 MCSectionData *SD = 0)
+    : MCFragment(FT_Fill, SD),
+      Value(_Value), ValueSize(_ValueSize), Size(_Size) {
+    assert((!ValueSize || (Size % ValueSize) == 0) &&
+           "Fill size must be a multiple of the value size!");
+  }
+
+  /// @name Accessors
+  /// @{
+
+  int64_t getValue() const { return Value; }
+
+  unsigned getValueSize() const { return ValueSize; }
+
+  uint64_t getSize() const { return Size; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_Fill;
+  }
+  static bool classof(const MCFillFragment *) { return true; }
+};
+
+class MCOrgFragment : public MCFragment {
+  /// Offset - The offset this fragment should start at.
+  const MCExpr *Offset;
+
+  /// Value - Value to use for filling bytes.
+  int8_t Value;
+
+public:
+  MCOrgFragment(const MCExpr &_Offset, int8_t _Value, MCSectionData *SD = 0)
+    : MCFragment(FT_Org, SD),
+      Offset(&_Offset), Value(_Value) {}
+
+  /// @name Accessors
+  /// @{
+
+  const MCExpr &getOffset() const { return *Offset; }
+
+  uint8_t getValue() const { return Value; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_Org;
+  }
+  static bool classof(const MCOrgFragment *) { return true; }
+};
+
+class MCLEBFragment : public MCFragment {
+  /// Value - The value this fragment should contain.
+  const MCExpr *Value;
+
+  /// IsSigned - True if this is a sleb128, false if uleb128.
+  bool IsSigned;
+
+  SmallString<8> Contents;
+public:
+  MCLEBFragment(const MCExpr &Value_, bool IsSigned_, MCSectionData *SD)
+    : MCFragment(FT_LEB, SD),
+      Value(&Value_), IsSigned(IsSigned_) { Contents.push_back(0); }
+
+  /// @name Accessors
+  /// @{
+
+  const MCExpr &getValue() const { return *Value; }
+
+  bool isSigned() const { return IsSigned; }
+
+  SmallString<8> &getContents() { return Contents; }
+  const SmallString<8> &getContents() const { return Contents; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_LEB;
+  }
+  static bool classof(const MCLEBFragment *) { return true; }
+};
+
+class MCDwarfLineAddrFragment : public MCFragment {
+  /// LineDelta - the value of the difference between the two line numbers
+  /// between two .loc dwarf directives.
+  int64_t LineDelta;
+
+  /// AddrDelta - The expression for the difference of the two symbols that
+  /// make up the address delta between two .loc dwarf directives.
+  const MCExpr *AddrDelta;
+
+  SmallString<8> Contents;
+
+public:
+  MCDwarfLineAddrFragment(int64_t _LineDelta, const MCExpr &_AddrDelta,
+                      MCSectionData *SD)
+    : MCFragment(FT_Dwarf, SD),
+      LineDelta(_LineDelta), AddrDelta(&_AddrDelta) { Contents.push_back(0); }
+
+  /// @name Accessors
+  /// @{
+
+  int64_t getLineDelta() const { return LineDelta; }
+
+  const MCExpr &getAddrDelta() const { return *AddrDelta; }
+
+  SmallString<8> &getContents() { return Contents; }
+  const SmallString<8> &getContents() const { return Contents; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_Dwarf;
+  }
+  static bool classof(const MCDwarfLineAddrFragment *) { return true; }
+};
+
+class MCDwarfCallFrameFragment : public MCFragment {
+  /// AddrDelta - The expression for the difference of the two symbols that
+  /// make up the address delta between two .cfi_* dwarf directives.
+  const MCExpr *AddrDelta;
+
+  SmallString<8> Contents;
+
+public:
+  MCDwarfCallFrameFragment(const MCExpr &_AddrDelta,  MCSectionData *SD)
+    : MCFragment(FT_DwarfFrame, SD),
+      AddrDelta(&_AddrDelta) { Contents.push_back(0); }
+
+  /// @name Accessors
+  /// @{
+
+  const MCExpr &getAddrDelta() const { return *AddrDelta; }
+
+  SmallString<8> &getContents() { return Contents; }
+  const SmallString<8> &getContents() const { return Contents; }
+
+  /// @}
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_DwarfFrame;
+  }
+  static bool classof(const MCDwarfCallFrameFragment *) { return true; }
+};
+
+// FIXME: Should this be a separate class, or just merged into MCSection? Since
+// we anticipate the fast path being through an MCAssembler, the only reason to
+// keep it out is for API abstraction.
+class MCSectionData : public ilist_node<MCSectionData> {
+  friend class MCAsmLayout;
+
+  MCSectionData(const MCSectionData&);  // DO NOT IMPLEMENT
+  void operator=(const MCSectionData&); // DO NOT IMPLEMENT
+
+public:
+  typedef iplist<MCFragment> FragmentListType;
+
+  typedef FragmentListType::const_iterator const_iterator;
+  typedef FragmentListType::iterator iterator;
+
+  typedef FragmentListType::const_reverse_iterator const_reverse_iterator;
+  typedef FragmentListType::reverse_iterator reverse_iterator;
+
+private:
+  FragmentListType Fragments;
+  const MCSection *Section;
+
+  /// Ordinal - The section index in the assemblers section list.
+  unsigned Ordinal;
+
+  /// LayoutOrder - The index of this section in the layout order.
+  unsigned LayoutOrder;
+
+  /// Alignment - The maximum alignment seen in this section.
+  unsigned Alignment;
+
+  /// @name Assembler Backend Data
+  /// @{
+  //
+  // FIXME: This could all be kept private to the assembler implementation.
+
+  /// HasInstructions - Whether this section has had instructions emitted into
+  /// it.
+  unsigned HasInstructions : 1;
+
+  /// @}
+
+public:
+  // Only for use as sentinel.
+  MCSectionData();
+  MCSectionData(const MCSection &Section, MCAssembler *A = 0);
+
+  const MCSection &getSection() const { return *Section; }
+
+  unsigned getAlignment() const { return Alignment; }
+  void setAlignment(unsigned Value) { Alignment = Value; }
+
+  bool hasInstructions() const { return HasInstructions; }
+  void setHasInstructions(bool Value) { HasInstructions = Value; }
+
+  unsigned getOrdinal() const { return Ordinal; }
+  void setOrdinal(unsigned Value) { Ordinal = Value; }
+
+  unsigned getLayoutOrder() const { return LayoutOrder; }
+  void setLayoutOrder(unsigned Value) { LayoutOrder = Value; }
+
+  /// @name Fragment Access
+  /// @{
+
+  const FragmentListType &getFragmentList() const { return Fragments; }
+  FragmentListType &getFragmentList() { return Fragments; }
+
+  iterator begin() { return Fragments.begin(); }
+  const_iterator begin() const { return Fragments.begin(); }
+
+  iterator end() { return Fragments.end(); }
+  const_iterator end() const { return Fragments.end(); }
+
+  reverse_iterator rbegin() { return Fragments.rbegin(); }
+  const_reverse_iterator rbegin() const { return Fragments.rbegin(); }
+
+  reverse_iterator rend() { return Fragments.rend(); }
+  const_reverse_iterator rend() const { return Fragments.rend(); }
+
+  size_t size() const { return Fragments.size(); }
+
+  bool empty() const { return Fragments.empty(); }
+
+  void dump();
+
+  /// @}
+};
+
+// FIXME: Same concerns as with SectionData.
+class MCSymbolData : public ilist_node<MCSymbolData> {
+public:
+  const MCSymbol *Symbol;
+
+  /// Fragment - The fragment this symbol's value is relative to, if any.
+  MCFragment *Fragment;
+
+  /// Offset - The offset to apply to the fragment address to form this symbol's
+  /// value.
+  uint64_t Offset;
+
+  /// IsExternal - True if this symbol is visible outside this translation
+  /// unit.
+  unsigned IsExternal : 1;
+
+  /// IsPrivateExtern - True if this symbol is private extern.
+  unsigned IsPrivateExtern : 1;
+
+  /// CommonSize - The size of the symbol, if it is 'common', or 0.
+  //
+  // FIXME: Pack this in with other fields? We could put it in offset, since a
+  // common symbol can never get a definition.
+  uint64_t CommonSize;
+
+  /// SymbolSize - An expression describing how to calculate the size of
+  /// a symbol. If a symbol has no size this field will be NULL.
+  const MCExpr *SymbolSize;
+
+  /// CommonAlign - The alignment of the symbol, if it is 'common'.
+  //
+  // FIXME: Pack this in with other fields?
+  unsigned CommonAlign;
+
+  /// Flags - The Flags field is used by object file implementations to store
+  /// additional per symbol information which is not easily classified.
+  uint32_t Flags;
+
+  /// Index - Index field, for use by the object file implementation.
+  uint64_t Index;
+
+public:
+  // Only for use as sentinel.
+  MCSymbolData();
+  MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment, uint64_t _Offset,
+               MCAssembler *A = 0);
+
+  /// @name Accessors
+  /// @{
+
+  const MCSymbol &getSymbol() const { return *Symbol; }
+
+  MCFragment *getFragment() const { return Fragment; }
+  void setFragment(MCFragment *Value) { Fragment = Value; }
+
+  uint64_t getOffset() const { return Offset; }
+  void setOffset(uint64_t Value) { Offset = Value; }
+
+  /// @}
+  /// @name Symbol Attributes
+  /// @{
+
+  bool isExternal() const { return IsExternal; }
+  void setExternal(bool Value) { IsExternal = Value; }
+
+  bool isPrivateExtern() const { return IsPrivateExtern; }
+  void setPrivateExtern(bool Value) { IsPrivateExtern = Value; }
+
+  /// isCommon - Is this a 'common' symbol.
+  bool isCommon() const { return CommonSize != 0; }
+
+  /// setCommon - Mark this symbol as being 'common'.
+  ///
+  /// \param Size - The size of the symbol.
+  /// \param Align - The alignment of the symbol.
+  void setCommon(uint64_t Size, unsigned Align) {
+    CommonSize = Size;
+    CommonAlign = Align;
+  }
+
+  /// getCommonSize - Return the size of a 'common' symbol.
+  uint64_t getCommonSize() const {
+    assert(isCommon() && "Not a 'common' symbol!");
+    return CommonSize;
+  }
+
+  void setSize(const MCExpr *SS) {
+    SymbolSize = SS;
+  }
+
+  const MCExpr *getSize() const {
+    return SymbolSize;
+  }
+
+
+  /// getCommonAlignment - Return the alignment of a 'common' symbol.
+  unsigned getCommonAlignment() const {
+    assert(isCommon() && "Not a 'common' symbol!");
+    return CommonAlign;
+  }
+
+  /// getFlags - Get the (implementation defined) symbol flags.
+  uint32_t getFlags() const { return Flags; }
+
+  /// setFlags - Set the (implementation defined) symbol flags.
+  void setFlags(uint32_t Value) { Flags = Value; }
+
+  /// modifyFlags - Modify the flags via a mask
+  void modifyFlags(uint32_t Value, uint32_t Mask) {
+    Flags = (Flags & ~Mask) | Value;
+  }
+
+  /// getIndex - Get the (implementation defined) index.
+  uint64_t getIndex() const { return Index; }
+
+  /// setIndex - Set the (implementation defined) index.
+  void setIndex(uint64_t Value) { Index = Value; }
+
+  /// @}
+
+  void dump();
+};
+
+// FIXME: This really doesn't belong here. See comments below.
+struct IndirectSymbolData {
+  MCSymbol *Symbol;
+  MCSectionData *SectionData;
+};
+
+class MCAssembler {
+  friend class MCAsmLayout;
+
+public:
+  typedef iplist<MCSectionData> SectionDataListType;
+  typedef iplist<MCSymbolData> SymbolDataListType;
+
+  typedef SectionDataListType::const_iterator const_iterator;
+  typedef SectionDataListType::iterator iterator;
+
+  typedef SymbolDataListType::const_iterator const_symbol_iterator;
+  typedef SymbolDataListType::iterator symbol_iterator;
+
+  typedef std::vector<IndirectSymbolData>::const_iterator
+    const_indirect_symbol_iterator;
+  typedef std::vector<IndirectSymbolData>::iterator indirect_symbol_iterator;
+
+private:
+  MCAssembler(const MCAssembler&);    // DO NOT IMPLEMENT
+  void operator=(const MCAssembler&); // DO NOT IMPLEMENT
+
+  MCContext &Context;
+
+  TargetAsmBackend &Backend;
+
+  MCCodeEmitter &Emitter;
+
+  MCObjectWriter &Writer;
+
+  raw_ostream &OS;
+
+  iplist<MCSectionData> Sections;
+
+  iplist<MCSymbolData> Symbols;
+
+  /// The map of sections to their associated assembler backend data.
+  //
+  // FIXME: Avoid this indirection?
+  DenseMap<const MCSection*, MCSectionData*> SectionMap;
+
+  /// The map of symbols to their associated assembler backend data.
+  //
+  // FIXME: Avoid this indirection?
+  DenseMap<const MCSymbol*, MCSymbolData*> SymbolMap;
+
+  std::vector<IndirectSymbolData> IndirectSymbols;
+
+  /// The set of function symbols for which a .thumb_func directive has
+  /// been seen.
+  //
+  // FIXME: We really would like this in target specific code rather than
+  // here. Maybe when the relocation stuff moves to target specific,
+  // this can go with it? The streamer would need some target specific
+  // refactoring too.
+  SmallPtrSet<const MCSymbol*, 64> ThumbFuncs;
+
+  unsigned RelaxAll : 1;
+  unsigned NoExecStack : 1;
+  unsigned SubsectionsViaSymbols : 1;
+
+private:
+  /// Evaluate a fixup to a relocatable expression and the value which should be
+  /// placed into the fixup.
+  ///
+  /// \param Layout The layout to use for evaluation.
+  /// \param Fixup The fixup to evaluate.
+  /// \param DF The fragment the fixup is inside.
+  /// \param Target [out] On return, the relocatable expression the fixup
+  /// evaluates to.
+  /// \param Value [out] On return, the value of the fixup as currently layed
+  /// out.
+  /// \return Whether the fixup value was fully resolved. This is true if the
+  /// \arg Value result is fixed, otherwise the value may change due to
+  /// relocation.
+  bool EvaluateFixup(const MCAsmLayout &Layout,
+                     const MCFixup &Fixup, const MCFragment *DF,
+                     MCValue &Target, uint64_t &Value) const;
+
+  /// Check whether a fixup can be satisfied, or whether it needs to be relaxed
+  /// (increased in size, in order to hold its value correctly).
+  bool FixupNeedsRelaxation(const MCFixup &Fixup, const MCFragment *DF,
+                            const MCAsmLayout &Layout) const;
+
+  /// Check whether the given fragment needs relaxation.
+  bool FragmentNeedsRelaxation(const MCInstFragment *IF,
+                               const MCAsmLayout &Layout) const;
+
+  /// LayoutOnce - Perform one layout iteration and return true if any offsets
+  /// were adjusted.
+  bool LayoutOnce(MCAsmLayout &Layout);
+
+  bool LayoutSectionOnce(MCAsmLayout &Layout, MCSectionData &SD);
+
+  bool RelaxInstruction(MCAsmLayout &Layout, MCInstFragment &IF);
+
+  bool RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF);
+
+  bool RelaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF);
+  bool RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
+                                   MCDwarfCallFrameFragment &DF);
+
+  /// FinishLayout - Finalize a layout, including fragment lowering.
+  void FinishLayout(MCAsmLayout &Layout);
+
+  uint64_t HandleFixup(const MCAsmLayout &Layout,
+                       MCFragment &F, const MCFixup &Fixup);
+
+public:
+  /// Compute the effective fragment size assuming it is layed out at the given
+  /// \arg SectionAddress and \arg FragmentOffset.
+  uint64_t ComputeFragmentSize(const MCAsmLayout &Layout, const MCFragment &F) const;
+
+  /// Find the symbol which defines the atom containing the given symbol, or
+  /// null if there is no such symbol.
+  const MCSymbolData *getAtom(const MCSymbolData *Symbol) const;
+
+  /// Check whether a particular symbol is visible to the linker and is required
+  /// in the symbol table, or whether it can be discarded by the assembler. This
+  /// also effects whether the assembler treats the label as potentially
+  /// defining a separate atom.
+  bool isSymbolLinkerVisible(const MCSymbol &SD) const;
+
+  /// Emit the section contents using the given object writer.
+  void WriteSectionData(const MCSectionData *Section,
+                        const MCAsmLayout &Layout) const;
+
+  /// Check whether a given symbol has been flagged with .thumb_func.
+  bool isThumbFunc(const MCSymbol *Func) const {
+    return ThumbFuncs.count(Func);
+  }
+
+  /// Flag a function symbol as the target of a .thumb_func directive.
+  void setIsThumbFunc(const MCSymbol *Func) { ThumbFuncs.insert(Func); }
+
+public:
+  /// Construct a new assembler instance.
+  ///
+  /// \arg OS - The stream to output to.
+  //
+  // FIXME: How are we going to parameterize this? Two obvious options are stay
+  // concrete and require clients to pass in a target like object. The other
+  // option is to make this abstract, and have targets provide concrete
+  // implementations as we do with AsmParser.
+  MCAssembler(MCContext &Context_, TargetAsmBackend &Backend_,
+              MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
+              raw_ostream &OS);
+  ~MCAssembler();
+
+  MCContext &getContext() const { return Context; }
+
+  TargetAsmBackend &getBackend() const { return Backend; }
+
+  MCCodeEmitter &getEmitter() const { return Emitter; }
+
+  MCObjectWriter &getWriter() const { return Writer; }
+
+  /// Finish - Do final processing and write the object to the output stream.
+  /// \arg Writer is used for custom object writer (as the MCJIT does),
+  /// if not specified it is automatically created from backend.
+  void Finish();
+
+  // FIXME: This does not belong here.
+  bool getSubsectionsViaSymbols() const {
+    return SubsectionsViaSymbols;
+  }
+  void setSubsectionsViaSymbols(bool Value) {
+    SubsectionsViaSymbols = Value;
+  }
+
+  bool getRelaxAll() const { return RelaxAll; }
+  void setRelaxAll(bool Value) { RelaxAll = Value; }
+
+  bool getNoExecStack() const { return NoExecStack; }
+  void setNoExecStack(bool Value) { NoExecStack = Value; }
+
+  /// @name Section List Access
+  /// @{
+
+  const SectionDataListType &getSectionList() const { return Sections; }
+  SectionDataListType &getSectionList() { return Sections; }
+
+  iterator begin() { return Sections.begin(); }
+  const_iterator begin() const { return Sections.begin(); }
+
+  iterator end() { return Sections.end(); }
+  const_iterator end() const { return Sections.end(); }
+
+  size_t size() const { return Sections.size(); }
+
+  /// @}
+  /// @name Symbol List Access
+  /// @{
+
+  const SymbolDataListType &getSymbolList() const { return Symbols; }
+  SymbolDataListType &getSymbolList() { return Symbols; }
+
+  symbol_iterator symbol_begin() { return Symbols.begin(); }
+  const_symbol_iterator symbol_begin() const { return Symbols.begin(); }
+
+  symbol_iterator symbol_end() { return Symbols.end(); }
+  const_symbol_iterator symbol_end() const { return Symbols.end(); }
+
+  size_t symbol_size() const { return Symbols.size(); }
+
+  /// @}
+  /// @name Indirect Symbol List Access
+  /// @{
+
+  // FIXME: This is a total hack, this should not be here. Once things are
+  // factored so that the streamer has direct access to the .o writer, it can
+  // disappear.
+  std::vector<IndirectSymbolData> &getIndirectSymbols() {
+    return IndirectSymbols;
+  }
+
+  indirect_symbol_iterator indirect_symbol_begin() {
+    return IndirectSymbols.begin();
+  }
+  const_indirect_symbol_iterator indirect_symbol_begin() const {
+    return IndirectSymbols.begin();
+  }
+
+  indirect_symbol_iterator indirect_symbol_end() {
+    return IndirectSymbols.end();
+  }
+  const_indirect_symbol_iterator indirect_symbol_end() const {
+    return IndirectSymbols.end();
+  }
+
+  size_t indirect_symbol_size() const { return IndirectSymbols.size(); }
+
+  /// @}
+  /// @name Backend Data Access
+  /// @{
+
+  MCSectionData &getSectionData(const MCSection &Section) const {
+    MCSectionData *Entry = SectionMap.lookup(&Section);
+    assert(Entry && "Missing section data!");
+    return *Entry;
+  }
+
+  MCSectionData &getOrCreateSectionData(const MCSection &Section,
+                                        bool *Created = 0) {
+    MCSectionData *&Entry = SectionMap[&Section];
+
+    if (Created) *Created = !Entry;
+    if (!Entry)
+      Entry = new MCSectionData(Section, this);
+
+    return *Entry;
+  }
+
+  MCSymbolData &getSymbolData(const MCSymbol &Symbol) const {
+    MCSymbolData *Entry = SymbolMap.lookup(&Symbol);
+    assert(Entry && "Missing symbol data!");
+    return *Entry;
+  }
+
+  MCSymbolData &getOrCreateSymbolData(const MCSymbol &Symbol,
+                                      bool *Created = 0) {
+    MCSymbolData *&Entry = SymbolMap[&Symbol];
+
+    if (Created) *Created = !Entry;
+    if (!Entry)
+      Entry = new MCSymbolData(Symbol, 0, 0, this);
+
+    return *Entry;
+  }
+
+  /// @}
+
+  void dump();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCCodeEmitter.h b/final/include/llvm/MC/MCCodeEmitter.h
new file mode 100644
index 00000000000..bc63241bece
--- /dev/null
+++ b/final/include/llvm/MC/MCCodeEmitter.h
@@ -0,0 +1,42 @@
+//===-- llvm/MC/MCCodeEmitter.h - Instruction Encoding ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCCODEEMITTER_H
+#define LLVM_MC_MCCODEEMITTER_H
+
+#include "llvm/MC/MCFixup.h"
+
+#include <cassert>
+
+namespace llvm {
+class MCExpr;
+class MCInst;
+class raw_ostream;
+template<typename T> class SmallVectorImpl;
+
+/// MCCodeEmitter - Generic instruction encoding interface.
+class MCCodeEmitter {
+private:
+  MCCodeEmitter(const MCCodeEmitter &);   // DO NOT IMPLEMENT
+  void operator=(const MCCodeEmitter &);  // DO NOT IMPLEMENT
+protected: // Can only create subclasses.
+  MCCodeEmitter();
+
+public:
+  virtual ~MCCodeEmitter();
+
+  /// EncodeInstruction - Encode the given \arg Inst to bytes on the output
+  /// stream \arg OS.
+  virtual void EncodeInstruction(const MCInst &Inst, raw_ostream &OS,
+                                 SmallVectorImpl<MCFixup> &Fixups) const = 0;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/MC/MCContext.h b/final/include/llvm/MC/MCContext.h
new file mode 100644
index 00000000000..7b26d549377
--- /dev/null
+++ b/final/include/llvm/MC/MCContext.h
@@ -0,0 +1,322 @@
+//===- MCContext.h - Machine Code Context -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCCONTEXT_H
+#define LLVM_MC_MCCONTEXT_H
+
+#include "llvm/MC/SectionKind.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector> // FIXME: Shouldn't be needed.
+
+namespace llvm {
+  class MCAsmInfo;
+  class MCExpr;
+  class MCSection;
+  class MCSymbol;
+  class MCLabel;
+  class MCDwarfFile;
+  class MCDwarfLoc;
+  class MCLineSection;
+  class StringRef;
+  class Twine;
+  class TargetAsmInfo;
+  class MCSectionMachO;
+  class MCSectionELF;
+
+  /// MCContext - Context object for machine code objects.  This class owns all
+  /// of the sections that it creates.
+  ///
+  class MCContext {
+    MCContext(const MCContext&); // DO NOT IMPLEMENT
+    MCContext &operator=(const MCContext&); // DO NOT IMPLEMENT
+
+    /// The MCAsmInfo for this target.
+    const MCAsmInfo &MAI;
+
+    const TargetAsmInfo *TAI;
+
+    /// Symbols - Bindings of names to symbols.
+    StringMap<MCSymbol*> Symbols;
+
+    /// UsedNames - Keeps tracks of names that were used both for used declared
+    /// and artificial symbols.
+    StringMap<bool> UsedNames;
+
+    /// NextUniqueID - The next ID to dole out to an unnamed assembler temporary
+    /// symbol.
+    unsigned NextUniqueID;
+
+    /// Instances of directional local labels.
+    DenseMap<unsigned, MCLabel *> Instances;
+    /// NextInstance() creates the next instance of the directional local label
+    /// for the LocalLabelVal and adds it to the map if needed.
+    unsigned NextInstance(int64_t LocalLabelVal);
+    /// GetInstance() gets the current instance of the directional local label
+    /// for the LocalLabelVal and adds it to the map if needed.
+    unsigned GetInstance(int64_t LocalLabelVal);
+
+    /// The file name of the log file from the environment variable
+    /// AS_SECURE_LOG_FILE.  Which must be set before the .secure_log_unique
+    /// directive is used or it is an error.
+    char *SecureLogFile;
+    /// The stream that gets written to for the .secure_log_unique directive.
+    raw_ostream *SecureLog;
+    /// Boolean toggled when .secure_log_unique / .secure_log_reset is seen to
+    /// catch errors if .secure_log_unique appears twice without
+    /// .secure_log_reset appearing between them.
+    bool SecureLogUsed;
+
+    /// The dwarf file and directory tables from the dwarf .file directive.
+    std::vector<MCDwarfFile *> MCDwarfFiles;
+    std::vector<StringRef> MCDwarfDirs;
+
+    /// The current dwarf line information from the last dwarf .loc directive.
+    MCDwarfLoc CurrentDwarfLoc;
+    bool DwarfLocSeen;
+
+    /// The dwarf line information from the .loc directives for the sections
+    /// with assembled machine instructions have after seeing .loc directives.
+    DenseMap<const MCSection *, MCLineSection *> MCLineSections;
+    /// We need a deterministic iteration order, so we remember the order
+    /// the elements were added.
+    std::vector<const MCSection *> MCLineSectionOrder;
+
+    /// Allocator - Allocator object used for creating machine code objects.
+    ///
+    /// We use a bump pointer allocator to avoid the need to track all allocated
+    /// objects.
+    BumpPtrAllocator Allocator;
+
+    void *MachOUniquingMap, *ELFUniquingMap, *COFFUniquingMap;
+
+    MCSymbol *CreateSymbol(StringRef Name);
+
+  public:
+    explicit MCContext(const MCAsmInfo &MAI, const TargetAsmInfo *TAI);
+    ~MCContext();
+
+    const MCAsmInfo &getAsmInfo() const { return MAI; }
+
+    const TargetAsmInfo &getTargetAsmInfo() const { return *TAI; }
+
+    /// @name Symbol Management
+    /// @{
+
+    /// CreateTempSymbol - Create and return a new assembler temporary symbol
+    /// with a unique but unspecified name.
+    MCSymbol *CreateTempSymbol();
+
+    /// CreateDirectionalLocalSymbol - Create the definition of a directional
+    /// local symbol for numbered label (used for "1:" definitions).
+    MCSymbol *CreateDirectionalLocalSymbol(int64_t LocalLabelVal);
+
+    /// GetDirectionalLocalSymbol - Create and return a directional local
+    /// symbol for numbered label (used for "1b" or 1f" references).
+    MCSymbol *GetDirectionalLocalSymbol(int64_t LocalLabelVal, int bORf);
+
+    /// GetOrCreateSymbol - Lookup the symbol inside with the specified
+    /// @p Name.  If it exists, return it.  If not, create a forward
+    /// reference and return it.
+    ///
+    /// @param Name - The symbol name, which must be unique across all symbols.
+    MCSymbol *GetOrCreateSymbol(StringRef Name);
+    MCSymbol *GetOrCreateSymbol(const Twine &Name);
+
+    /// LookupSymbol - Get the symbol for \p Name, or null.
+    MCSymbol *LookupSymbol(StringRef Name) const;
+
+    /// @}
+
+    /// @name Section Management
+    /// @{
+
+    /// getMachOSection - Return the MCSection for the specified mach-o section.
+    /// This requires the operands to be valid.
+    const MCSectionMachO *getMachOSection(StringRef Segment,
+                                          StringRef Section,
+                                          unsigned TypeAndAttributes,
+                                          unsigned Reserved2,
+                                          SectionKind K);
+    const MCSectionMachO *getMachOSection(StringRef Segment,
+                                          StringRef Section,
+                                          unsigned TypeAndAttributes,
+                                          SectionKind K) {
+      return getMachOSection(Segment, Section, TypeAndAttributes, 0, K);
+    }
+
+    const MCSectionELF *getELFSection(StringRef Section, unsigned Type,
+                                      unsigned Flags, SectionKind Kind);
+
+    const MCSectionELF *getELFSection(StringRef Section, unsigned Type,
+                                      unsigned Flags, SectionKind Kind,
+                                      unsigned EntrySize, StringRef Group);
+
+    const MCSectionELF *CreateELFGroupSection();
+
+    const MCSection *getCOFFSection(StringRef Section, unsigned Characteristics,
+                                    int Selection, SectionKind Kind);
+
+    const MCSection *getCOFFSection(StringRef Section, unsigned Characteristics,
+                                    SectionKind Kind) {
+      return getCOFFSection (Section, Characteristics, 0, Kind);
+    }
+
+
+    /// @}
+
+    /// @name Dwarf Management
+    /// @{
+
+    /// GetDwarfFile - creates an entry in the dwarf file and directory tables.
+    unsigned GetDwarfFile(StringRef FileName, unsigned FileNumber);
+
+    bool isValidDwarfFileNumber(unsigned FileNumber);
+
+    bool hasDwarfFiles() const {
+      return !MCDwarfFiles.empty();
+    }
+
+    const std::vector<MCDwarfFile *> &getMCDwarfFiles() {
+      return MCDwarfFiles;
+    }
+    const std::vector<StringRef> &getMCDwarfDirs() {
+      return MCDwarfDirs;
+    }
+
+    const DenseMap<const MCSection *, MCLineSection *>
+    &getMCLineSections() const {
+      return MCLineSections;
+    }
+    const std::vector<const MCSection *> &getMCLineSectionOrder() const {
+      return MCLineSectionOrder;
+    }
+    void addMCLineSection(const MCSection *Sec, MCLineSection *Line) {
+      MCLineSections[Sec] = Line;
+      MCLineSectionOrder.push_back(Sec);
+    }
+
+    /// setCurrentDwarfLoc - saves the information from the currently parsed
+    /// dwarf .loc directive and sets DwarfLocSeen.  When the next instruction
+    /// is assembled an entry in the line number table with this information and
+    /// the address of the instruction will be created.
+    void setCurrentDwarfLoc(unsigned FileNum, unsigned Line, unsigned Column,
+                            unsigned Flags, unsigned Isa,
+                            unsigned Discriminator) {
+      CurrentDwarfLoc.setFileNum(FileNum);
+      CurrentDwarfLoc.setLine(Line);
+      CurrentDwarfLoc.setColumn(Column);
+      CurrentDwarfLoc.setFlags(Flags);
+      CurrentDwarfLoc.setIsa(Isa);
+      CurrentDwarfLoc.setDiscriminator(Discriminator);
+      DwarfLocSeen = true;
+    }
+    void ClearDwarfLocSeen() { DwarfLocSeen = false; }
+
+    bool getDwarfLocSeen() { return DwarfLocSeen; }
+    const MCDwarfLoc &getCurrentDwarfLoc() { return CurrentDwarfLoc; }
+
+    /// @}
+
+    char *getSecureLogFile() { return SecureLogFile; }
+    raw_ostream *getSecureLog() { return SecureLog; }
+    bool getSecureLogUsed() { return SecureLogUsed; }
+    void setSecureLog(raw_ostream *Value) {
+      SecureLog = Value;
+    }
+    void setSecureLogUsed(bool Value) {
+      SecureLogUsed = Value;
+    }
+
+    void *Allocate(unsigned Size, unsigned Align = 8) {
+      return Allocator.Allocate(Size, Align);
+    }
+    void Deallocate(void *Ptr) {
+    }
+  };
+
+} // end namespace llvm
+
+// operator new and delete aren't allowed inside namespaces.
+// The throw specifications are mandated by the standard.
+/// @brief Placement new for using the MCContext's allocator.
+///
+/// This placement form of operator new uses the MCContext's allocator for
+/// obtaining memory. It is a non-throwing new, which means that it returns
+/// null on error. (If that is what the allocator does. The current does, so if
+/// this ever changes, this operator will have to be changed, too.)
+/// Usage looks like this (assuming there's an MCContext 'Context' in scope):
+/// @code
+/// // Default alignment (16)
+/// IntegerLiteral *Ex = new (Context) IntegerLiteral(arguments);
+/// // Specific alignment
+/// IntegerLiteral *Ex2 = new (Context, 8) IntegerLiteral(arguments);
+/// @endcode
+/// Please note that you cannot use delete on the pointer; it must be
+/// deallocated using an explicit destructor call followed by
+/// @c Context.Deallocate(Ptr).
+///
+/// @param Bytes The number of bytes to allocate. Calculated by the compiler.
+/// @param C The MCContext that provides the allocator.
+/// @param Alignment The alignment of the allocated memory (if the underlying
+///                  allocator supports it).
+/// @return The allocated memory. Could be NULL.
+inline void *operator new(size_t Bytes, llvm::MCContext &C,
+                          size_t Alignment = 16) throw () {
+  return C.Allocate(Bytes, Alignment);
+}
+/// @brief Placement delete companion to the new above.
+///
+/// This operator is just a companion to the new above. There is no way of
+/// invoking it directly; see the new operator for more details. This operator
+/// is called implicitly by the compiler if a placement new expression using
+/// the MCContext throws in the object constructor.
+inline void operator delete(void *Ptr, llvm::MCContext &C, size_t)
+              throw () {
+  C.Deallocate(Ptr);
+}
+
+/// This placement form of operator new[] uses the MCContext's allocator for
+/// obtaining memory. It is a non-throwing new[], which means that it returns
+/// null on error.
+/// Usage looks like this (assuming there's an MCContext 'Context' in scope):
+/// @code
+/// // Default alignment (16)
+/// char *data = new (Context) char[10];
+/// // Specific alignment
+/// char *data = new (Context, 8) char[10];
+/// @endcode
+/// Please note that you cannot use delete on the pointer; it must be
+/// deallocated using an explicit destructor call followed by
+/// @c Context.Deallocate(Ptr).
+///
+/// @param Bytes The number of bytes to allocate. Calculated by the compiler.
+/// @param C The MCContext that provides the allocator.
+/// @param Alignment The alignment of the allocated memory (if the underlying
+///                  allocator supports it).
+/// @return The allocated memory. Could be NULL.
+inline void *operator new[](size_t Bytes, llvm::MCContext& C,
+                            size_t Alignment = 16) throw () {
+  return C.Allocate(Bytes, Alignment);
+}
+
+/// @brief Placement delete[] companion to the new[] above.
+///
+/// This operator is just a companion to the new[] above. There is no way of
+/// invoking it directly; see the new[] operator for more details. This operator
+/// is called implicitly by the compiler if a placement new[] expression using
+/// the MCContext throws in the object constructor.
+inline void operator delete[](void *Ptr, llvm::MCContext &C) throw () {
+  C.Deallocate(Ptr);
+}
+
+#endif
diff --git a/final/include/llvm/MC/MCDirectives.h b/final/include/llvm/MC/MCDirectives.h
new file mode 100644
index 00000000000..1df55dc252e
--- /dev/null
+++ b/final/include/llvm/MC/MCDirectives.h
@@ -0,0 +1,56 @@
+//===- MCDirectives.h - Enums for directives on various targets -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines various enums that represent target-specific directives.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCDIRECTIVES_H
+#define LLVM_MC_MCDIRECTIVES_H
+
+namespace llvm {
+
+enum MCSymbolAttr {
+  MCSA_Invalid = 0,    ///< Not a valid directive.
+
+  // Various directives in alphabetical order.
+  MCSA_ELF_TypeFunction,    ///< .type _foo, STT_FUNC  # aka @function
+  MCSA_ELF_TypeIndFunction, ///< .type _foo, STT_GNU_IFUNC
+  MCSA_ELF_TypeObject,      ///< .type _foo, STT_OBJECT  # aka @object
+  MCSA_ELF_TypeTLS,         ///< .type _foo, STT_TLS     # aka @tls_object
+  MCSA_ELF_TypeCommon,      ///< .type _foo, STT_COMMON  # aka @common
+  MCSA_ELF_TypeNoType,      ///< .type _foo, STT_NOTYPE  # aka @notype
+  MCSA_ELF_TypeGnuUniqueObject, /// .type _foo, @gnu_unique_object
+  MCSA_Global,              ///< .globl
+  MCSA_Hidden,              ///< .hidden (ELF)
+  MCSA_IndirectSymbol,      ///< .indirect_symbol (MachO)
+  MCSA_Internal,            ///< .internal (ELF)
+  MCSA_LazyReference,       ///< .lazy_reference (MachO)
+  MCSA_Local,               ///< .local (ELF)
+  MCSA_NoDeadStrip,         ///< .no_dead_strip (MachO)
+  MCSA_SymbolResolver,      ///< .symbol_resolver (MachO)
+  MCSA_PrivateExtern,       ///< .private_extern (MachO)
+  MCSA_Protected,           ///< .protected (ELF)
+  MCSA_Reference,           ///< .reference (MachO)
+  MCSA_Weak,                ///< .weak
+  MCSA_WeakDefinition,      ///< .weak_definition (MachO)
+  MCSA_WeakReference,       ///< .weak_reference (MachO)
+  MCSA_WeakDefAutoPrivate   ///< .weak_def_can_be_hidden (MachO)
+};
+
+enum MCAssemblerFlag {
+  MCAF_SyntaxUnified,         ///< .syntax (ARM/ELF)
+  MCAF_SubsectionsViaSymbols, ///< .subsections_via_symbols (MachO)
+  MCAF_Code16,                ///< .code 16
+  MCAF_Code32                 ///< .code 32
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCDisassembler.h b/final/include/llvm/MC/MCDisassembler.h
new file mode 100644
index 00000000000..c9e42eb6c79
--- /dev/null
+++ b/final/include/llvm/MC/MCDisassembler.h
@@ -0,0 +1,60 @@
+//===-- llvm/MC/MCDisassembler.h - Disassembler interface -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MCDISASSEMBLER_H
+#define MCDISASSEMBLER_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+  
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+  
+struct EDInstInfo;
+
+/// MCDisassembler - Superclass for all disassemblers.  Consumes a memory region
+///   and provides an array of assembly instructions.
+class MCDisassembler {
+public:
+  /// Constructor     - Performs initial setup for the disassembler.
+  MCDisassembler() {}
+  
+  virtual ~MCDisassembler();
+  
+  /// getInstruction  - Returns the disassembly of a single instruction.
+  ///
+  /// @param instr    - An MCInst to populate with the contents of the 
+  ///                   instruction.
+  /// @param size     - A value to populate with the size of the instruction, or
+  ///                   the number of bytes consumed while attempting to decode
+  ///                   an invalid instruction.
+  /// @param region   - The memory object to use as a source for machine code.
+  /// @param address  - The address, in the memory space of region, of the first
+  ///                   byte of the instruction.
+  /// @param vStream  - The stream to print warnings and diagnostic messages on.
+  /// @return         - True if the instruction is valid; false otherwise.
+  virtual bool          getInstruction(MCInst& instr,
+                                       uint64_t& size,
+                                       const MemoryObject &region,
+                                       uint64_t address,
+                                       raw_ostream &vStream) const = 0;
+
+  /// getEDInfo - Returns the enhanced insturction information corresponding to
+  ///   the disassembler.
+  ///
+  /// @return         - An array of instruction information, with one entry for
+  ///                   each MCInst opcode this disassembler returns.
+  ///                   NULL if there is no info for this target.
+  virtual EDInstInfo   *getEDInfo() const { return (EDInstInfo*)0; }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCDwarf.h b/final/include/llvm/MC/MCDwarf.h
new file mode 100644
index 00000000000..07a7bad15b1
--- /dev/null
+++ b/final/include/llvm/MC/MCDwarf.h
@@ -0,0 +1,279 @@
+//===- MCDwarf.h - Machine Code Dwarf support -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCDwarfFile to support the dwarf
+// .file directive and the .loc directive.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCDWARF_H
+#define LLVM_MC_MCDWARF_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineLocation.h" // FIXME
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Dwarf.h"
+#include <vector>
+
+namespace llvm {
+  class MachineMove;
+  class MCContext;
+  class MCExpr;
+  class MCSection;
+  class MCSectionData;
+  class MCStreamer;
+  class MCSymbol;
+  class MCObjectStreamer;
+  class raw_ostream;
+
+  /// MCDwarfFile - Instances of this class represent the name of the dwarf
+  /// .file directive and its associated dwarf file number in the MC file,
+  /// and MCDwarfFile's are created and unique'd by the MCContext class where
+  /// the file number for each is its index into the vector of DwarfFiles (note
+  /// index 0 is not used and not a valid dwarf file number).
+  class MCDwarfFile {
+    // Name - the base name of the file without its directory path.
+    // The StringRef references memory allocated in the MCContext.
+    StringRef Name;
+
+    // DirIndex - the index into the list of directory names for this file name.
+    unsigned DirIndex;
+
+  private:  // MCContext creates and uniques these.
+    friend class MCContext;
+    MCDwarfFile(StringRef name, unsigned dirIndex)
+      : Name(name), DirIndex(dirIndex) {}
+
+    MCDwarfFile(const MCDwarfFile&);       // DO NOT IMPLEMENT
+    void operator=(const MCDwarfFile&); // DO NOT IMPLEMENT
+  public:
+    /// getName - Get the base name of this MCDwarfFile.
+    StringRef getName() const { return Name; }
+
+    /// getDirIndex - Get the dirIndex of this MCDwarfFile.
+    unsigned getDirIndex() const { return DirIndex; }
+
+
+    /// print - Print the value to the stream \arg OS.
+    void print(raw_ostream &OS) const;
+
+    /// dump - Print the value to stderr.
+    void dump() const;
+  };
+
+  inline raw_ostream &operator<<(raw_ostream &OS, const MCDwarfFile &DwarfFile){
+    DwarfFile.print(OS);
+    return OS;
+  }
+
+  /// MCDwarfLoc - Instances of this class represent the information from a
+  /// dwarf .loc directive.
+  class MCDwarfLoc {
+    // FileNum - the file number.
+    unsigned FileNum;
+    // Line - the line number.
+    unsigned Line;
+    // Column - the column position.
+    unsigned Column;
+    // Flags (see #define's below)
+    unsigned Flags;
+    // Isa
+    unsigned Isa;
+    // Discriminator
+    unsigned Discriminator;
+
+// Flag that indicates the initial value of the is_stmt_start flag.
+#define DWARF2_LINE_DEFAULT_IS_STMT     1
+
+#define DWARF2_FLAG_IS_STMT        (1 << 0)
+#define DWARF2_FLAG_BASIC_BLOCK    (1 << 1)
+#define DWARF2_FLAG_PROLOGUE_END   (1 << 2)
+#define DWARF2_FLAG_EPILOGUE_BEGIN (1 << 3)
+
+  private:  // MCContext manages these
+    friend class MCContext;
+    friend class MCLineEntry;
+    MCDwarfLoc(unsigned fileNum, unsigned line, unsigned column, unsigned flags,
+               unsigned isa, unsigned discriminator)
+      : FileNum(fileNum), Line(line), Column(column), Flags(flags), Isa(isa),
+        Discriminator(discriminator) {}
+
+    // Allow the default copy constructor and assignment operator to be used
+    // for an MCDwarfLoc object.
+
+  public:
+    /// getFileNum - Get the FileNum of this MCDwarfLoc.
+    unsigned getFileNum() const { return FileNum; }
+
+    /// getLine - Get the Line of this MCDwarfLoc.
+    unsigned getLine() const { return Line; }
+
+    /// getColumn - Get the Column of this MCDwarfLoc.
+    unsigned getColumn() const { return Column; }
+
+    /// getFlags - Get the Flags of this MCDwarfLoc.
+    unsigned getFlags() const { return Flags; }
+
+    /// getIsa - Get the Isa of this MCDwarfLoc.
+    unsigned getIsa() const { return Isa; }
+
+    /// getDiscriminator - Get the Discriminator of this MCDwarfLoc.
+    unsigned getDiscriminator() const { return Discriminator; }
+
+    /// setFileNum - Set the FileNum of this MCDwarfLoc.
+    void setFileNum(unsigned fileNum) { FileNum = fileNum; }
+
+    /// setLine - Set the Line of this MCDwarfLoc.
+    void setLine(unsigned line) { Line = line; }
+
+    /// setColumn - Set the Column of this MCDwarfLoc.
+    void setColumn(unsigned column) { Column = column; }
+
+    /// setFlags - Set the Flags of this MCDwarfLoc.
+    void setFlags(unsigned flags) { Flags = flags; }
+
+    /// setIsa - Set the Isa of this MCDwarfLoc.
+    void setIsa(unsigned isa) { Isa = isa; }
+
+    /// setDiscriminator - Set the Discriminator of this MCDwarfLoc.
+    void setDiscriminator(unsigned discriminator) {
+      Discriminator = discriminator;
+    }
+  };
+
+  /// MCLineEntry - Instances of this class represent the line information for
+  /// the dwarf line table entries.  Which is created after a machine
+  /// instruction is assembled and uses an address from a temporary label
+  /// created at the current address in the current section and the info from
+  /// the last .loc directive seen as stored in the context.
+  class MCLineEntry : public MCDwarfLoc {
+    MCSymbol *Label;
+
+  private:
+    // Allow the default copy constructor and assignment operator to be used
+    // for an MCLineEntry object.
+
+  public:
+    // Constructor to create an MCLineEntry given a symbol and the dwarf loc.
+    MCLineEntry(MCSymbol *label, const MCDwarfLoc loc) : MCDwarfLoc(loc),
+                Label(label) {}
+
+    MCSymbol *getLabel() const { return Label; }
+
+    // This is called when an instruction is assembled into the specified
+    // section and if there is information from the last .loc directive that
+    // has yet to have a line entry made for it is made.
+    static void Make(MCStreamer *MCOS, const MCSection *Section);
+  };
+
+  /// MCLineSection - Instances of this class represent the line information
+  /// for a section where machine instructions have been assembled after seeing
+  /// .loc directives.  This is the information used to build the dwarf line
+  /// table for a section.
+  class MCLineSection {
+
+  private:
+    MCLineSection(const MCLineSection&);  // DO NOT IMPLEMENT
+    void operator=(const MCLineSection&); // DO NOT IMPLEMENT
+
+  public:
+    // Constructor to create an MCLineSection with an empty MCLineEntries
+    // vector.
+    MCLineSection() {}
+
+    // addLineEntry - adds an entry to this MCLineSection's line entries
+    void addLineEntry(const MCLineEntry &LineEntry) {
+      MCLineEntries.push_back(LineEntry);
+    }
+
+    typedef std::vector<MCLineEntry> MCLineEntryCollection;
+    typedef MCLineEntryCollection::iterator iterator;
+    typedef MCLineEntryCollection::const_iterator const_iterator;
+
+  private:
+    MCLineEntryCollection MCLineEntries;
+
+  public:
+    const MCLineEntryCollection *getMCLineEntries() const {
+      return &MCLineEntries;
+    }
+  };
+
+  class MCDwarfFileTable {
+  public:
+    //
+    // This emits the Dwarf file and the line tables.
+    //
+    static void Emit(MCStreamer *MCOS);
+  };
+
+  class MCDwarfLineAddr {
+  public:
+    /// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas.
+    static void Encode(int64_t LineDelta, uint64_t AddrDelta, raw_ostream &OS);
+
+    /// Utility function to emit the encoding to a streamer.
+    static void Emit(MCStreamer *MCOS,
+                     int64_t LineDelta,uint64_t AddrDelta);
+
+    /// Utility function to write the encoding to an object writer.
+    static void Write(MCObjectWriter *OW,
+                      int64_t LineDelta, uint64_t AddrDelta);
+  };
+
+  class MCCFIInstruction {
+  public:
+    enum OpType { Remember, Restore, Move };
+  private:
+    OpType Operation;
+    MCSymbol *Label;
+    // Move to & from location.
+    MachineLocation Destination;
+    MachineLocation Source;
+  public:
+    MCCFIInstruction(OpType Op, MCSymbol *L)
+      : Operation(Op), Label(L) {
+      assert(Op == Remember || Op == Restore);
+    }
+    MCCFIInstruction(MCSymbol *L, const MachineLocation &D,
+                     const MachineLocation &S)
+      : Operation(Move), Label(L), Destination(D), Source(S) {
+    }
+    OpType getOperation() const { return Operation; }
+    MCSymbol *getLabel() const { return Label; }
+    const MachineLocation &getDestination() const { return Destination; }
+    const MachineLocation &getSource() const { return Source; }
+  };
+
+  struct MCDwarfFrameInfo {
+    MCDwarfFrameInfo() : Begin(0), End(0), Personality(0), Lsda(0),
+                         Instructions(), PersonalityEncoding(0),
+                         LsdaEncoding(0) {}
+    MCSymbol *Begin;
+    MCSymbol *End;
+    const MCSymbol *Personality;
+    const MCSymbol *Lsda;
+    std::vector<MCCFIInstruction> Instructions;
+    unsigned PersonalityEncoding;
+    unsigned LsdaEncoding;
+  };
+
+  class MCDwarfFrameEmitter {
+  public:
+    //
+    // This emits the frame info section.
+    //
+    static void Emit(MCStreamer &streamer);
+    static void EmitAdvanceLoc(MCStreamer &Streamer, uint64_t AddrDelta);
+    static void EncodeAdvanceLoc(uint64_t AddrDelta, raw_ostream &OS);
+  };
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCELFObjectWriter.h b/final/include/llvm/MC/MCELFObjectWriter.h
new file mode 100644
index 00000000000..3c150dca9e6
--- /dev/null
+++ b/final/include/llvm/MC/MCELFObjectWriter.h
@@ -0,0 +1,47 @@
+//===-- llvm/MC/MCELFObjectWriter.h - ELF Object Writer ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELFOBJECTWRITER_H
+#define LLVM_MC_MCELFOBJECTWRITER_H
+
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCELFObjectTargetWriter {
+  const Triple::OSType OSType;
+  const uint16_t EMachine;
+  const unsigned HasRelocationAddend : 1;
+  const unsigned Is64Bit : 1;
+protected:
+  MCELFObjectTargetWriter(bool Is64Bit_, Triple::OSType OSType_,
+                          uint16_t EMachine_,  bool HasRelocationAddend_);
+
+public:
+  virtual ~MCELFObjectTargetWriter();
+
+  /// @name Accessors
+  /// @{
+  Triple::OSType getOSType() { return OSType; }
+  uint16_t getEMachine() { return EMachine; }
+  bool hasRelocationAddend() { return HasRelocationAddend; }
+  bool is64Bit() { return Is64Bit; }
+  /// @}
+};
+
+/// \brief Construct a new ELF writer instance.
+///
+/// \param MOTW - The target specific ELF writer subclass.
+/// \param OS - The stream to write to.
+/// \returns The constructed object writer.
+MCObjectWriter *createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                                      raw_ostream &OS, bool IsLittleEndian);
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/MC/MCELFSymbolFlags.h b/final/include/llvm/MC/MCELFSymbolFlags.h
new file mode 100644
index 00000000000..d798fb07727
--- /dev/null
+++ b/final/include/llvm/MC/MCELFSymbolFlags.h
@@ -0,0 +1,57 @@
+//===- MCELFSymbolFlags.h - ELF Symbol Flags ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SymbolFlags used for the ELF target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELFSYMBOLFLAGS_H
+#define LLVM_MC_MCELFSYMBOLFLAGS_H
+
+#include "llvm/Support/ELF.h"
+
+// Because all the symbol flags need to be stored in the MCSymbolData
+// 'flags' variable we need to provide shift constants per flag type.
+
+namespace llvm {
+  enum {
+    ELF_STT_Shift   = 0, // Shift value for STT_* flags.
+    ELF_STB_Shift   = 4, // Shift value for STB_* flags.
+    ELF_STV_Shift   = 8, // Shift value for STV_* flags.
+    ELF_Other_Shift = 10 // Shift value for other flags.
+  };
+
+  enum SymbolFlags {
+    ELF_STB_Local     = (ELF::STB_LOCAL     << ELF_STB_Shift),
+      ELF_STB_Global    = (ELF::STB_GLOBAL    << ELF_STB_Shift),
+      ELF_STB_Weak      = (ELF::STB_WEAK      << ELF_STB_Shift),
+      ELF_STB_Loproc    = (ELF::STB_LOPROC    << ELF_STB_Shift),
+      ELF_STB_Hiproc    = (ELF::STB_HIPROC    << ELF_STB_Shift),
+
+      ELF_STT_Notype    = (ELF::STT_NOTYPE    << ELF_STT_Shift),
+      ELF_STT_Object    = (ELF::STT_OBJECT    << ELF_STT_Shift),
+      ELF_STT_Func      = (ELF::STT_FUNC      << ELF_STT_Shift),
+      ELF_STT_Section   = (ELF::STT_SECTION   << ELF_STT_Shift),
+      ELF_STT_File      = (ELF::STT_FILE      << ELF_STT_Shift),
+      ELF_STT_Common    = (ELF::STT_COMMON    << ELF_STT_Shift),
+      ELF_STT_Tls       = (ELF::STT_TLS       << ELF_STT_Shift),
+      ELF_STT_Loproc    = (ELF::STT_LOPROC    << ELF_STT_Shift),
+      ELF_STT_Hiproc    = (ELF::STT_HIPROC    << ELF_STT_Shift),
+
+      ELF_STV_Default   = (ELF::STV_DEFAULT   << ELF_STV_Shift),
+      ELF_STV_Internal  = (ELF::STV_INTERNAL  << ELF_STV_Shift),
+      ELF_STV_Hidden    = (ELF::STV_HIDDEN    << ELF_STV_Shift),
+      ELF_STV_Protected = (ELF::STV_PROTECTED << ELF_STV_Shift),
+
+      ELF_Other_Weakref = (1                  << ELF_Other_Shift)
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCExpr.h b/final/include/llvm/MC/MCExpr.h
new file mode 100644
index 00000000000..fea5249eaba
--- /dev/null
+++ b/final/include/llvm/MC/MCExpr.h
@@ -0,0 +1,432 @@
+//===- MCExpr.h - Assembly Level Expressions --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCEXPR_H
+#define LLVM_MC_MCEXPR_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCAsmInfo;
+class MCAsmLayout;
+class MCAssembler;
+class MCContext;
+class MCSectionData;
+class MCSymbol;
+class MCValue;
+class raw_ostream;
+class StringRef;
+typedef DenseMap<const MCSectionData*, uint64_t> SectionAddrMap;
+
+/// MCExpr - Base class for the full range of assembler expressions which are
+/// needed for parsing.
+class MCExpr {
+public:
+  enum ExprKind {
+    Binary,    ///< Binary expressions.
+    Constant,  ///< Constant expressions.
+    SymbolRef, ///< References to labels and assigned expressions.
+    Unary,     ///< Unary expressions.
+    Target     ///< Target specific expression.
+  };
+
+private:
+  ExprKind Kind;
+
+  MCExpr(const MCExpr&); // DO NOT IMPLEMENT
+  void operator=(const MCExpr&); // DO NOT IMPLEMENT
+
+  bool EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
+                          const MCAsmLayout *Layout,
+                          const SectionAddrMap *Addrs) const;
+protected:
+  explicit MCExpr(ExprKind _Kind) : Kind(_Kind) {}
+
+  bool EvaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
+                                 const MCAsmLayout *Layout,
+                                 const SectionAddrMap *Addrs,
+                                 bool InSet) const;
+public:
+  /// @name Accessors
+  /// @{
+
+  ExprKind getKind() const { return Kind; }
+
+  /// @}
+  /// @name Utility Methods
+  /// @{
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+
+  /// @}
+  /// @name Expression Evaluation
+  /// @{
+
+  /// EvaluateAsAbsolute - Try to evaluate the expression to an absolute value.
+  ///
+  /// @param Res - The absolute value, if evaluation succeeds.
+  /// @param Layout - The assembler layout object to use for evaluating symbol
+  /// values. If not given, then only non-symbolic expressions will be
+  /// evaluated.
+  /// @result - True on success.
+  bool EvaluateAsAbsolute(int64_t &Res) const;
+  bool EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const;
+  bool EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout) const;
+  bool EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout &Layout,
+                          const SectionAddrMap &Addrs) const;
+
+  /// EvaluateAsRelocatable - Try to evaluate the expression to a relocatable
+  /// value, i.e. an expression of the fixed form (a - b + constant).
+  ///
+  /// @param Res - The relocatable value, if evaluation succeeds.
+  /// @param Layout - The assembler layout object to use for evaluating values.
+  /// @result - True on success.
+  bool EvaluateAsRelocatable(MCValue &Res, const MCAsmLayout &Layout) const;
+
+  /// @}
+
+  static bool classof(const MCExpr *) { return true; }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const MCExpr &E) {
+  E.print(OS);
+  return OS;
+}
+
+//// MCConstantExpr - Represent a constant integer expression.
+class MCConstantExpr : public MCExpr {
+  int64_t Value;
+
+  explicit MCConstantExpr(int64_t _Value)
+    : MCExpr(MCExpr::Constant), Value(_Value) {}
+
+public:
+  /// @name Construction
+  /// @{
+
+  static const MCConstantExpr *Create(int64_t Value, MCContext &Ctx);
+
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  int64_t getValue() const { return Value; }
+
+  /// @}
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Constant;
+  }
+  static bool classof(const MCConstantExpr *) { return true; }
+};
+
+/// MCSymbolRefExpr - Represent a reference to a symbol from inside an
+/// expression.
+///
+/// A symbol reference in an expression may be a use of a label, a use of an
+/// assembler variable (defined constant), or constitute an implicit definition
+/// of the symbol as external.
+class MCSymbolRefExpr : public MCExpr {
+public:
+  enum VariantKind {
+    VK_None,
+    VK_Invalid,
+
+    VK_GOT,
+    VK_GOTOFF,
+    VK_GOTPCREL,
+    VK_GOTTPOFF,
+    VK_INDNTPOFF,
+    VK_NTPOFF,
+    VK_GOTNTPOFF,
+    VK_PLT,
+    VK_TLSGD,
+    VK_TLSLD,
+    VK_TLSLDM,
+    VK_TPOFF,
+    VK_DTPOFF,
+    VK_TLVP,      // Mach-O thread local variable relocation
+    // FIXME: We'd really like to use the generic Kinds listed above for these.
+    VK_ARM_PLT,   // ARM-style PLT references. i.e., (PLT) instead of @PLT
+    VK_ARM_TLSGD, //   ditto for TLSGD, GOT, GOTOFF, TPOFF and GOTTPOFF
+    VK_ARM_GOT,
+    VK_ARM_GOTOFF,
+    VK_ARM_TPOFF,
+    VK_ARM_GOTTPOFF,
+
+    VK_PPC_TOC,
+    VK_PPC_HA16,  // ha16(symbol)
+    VK_PPC_LO16   // lo16(symbol)
+  };
+
+private:
+  /// The symbol being referenced.
+  const MCSymbol *Symbol;
+
+  /// The symbol reference modifier.
+  const VariantKind Kind;
+
+  explicit MCSymbolRefExpr(const MCSymbol *_Symbol, VariantKind _Kind)
+    : MCExpr(MCExpr::SymbolRef), Symbol(_Symbol), Kind(_Kind) {}
+
+public:
+  /// @name Construction
+  /// @{
+
+  static const MCSymbolRefExpr *Create(const MCSymbol *Symbol, MCContext &Ctx) {
+    return MCSymbolRefExpr::Create(Symbol, VK_None, Ctx);
+  }
+
+  static const MCSymbolRefExpr *Create(const MCSymbol *Symbol, VariantKind Kind,
+                                       MCContext &Ctx);
+  static const MCSymbolRefExpr *Create(StringRef Name, VariantKind Kind,
+                                       MCContext &Ctx);
+
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  const MCSymbol &getSymbol() const { return *Symbol; }
+
+  VariantKind getKind() const { return Kind; }
+
+  /// @}
+  /// @name Static Utility Functions
+  /// @{
+
+  static StringRef getVariantKindName(VariantKind Kind);
+
+  static VariantKind getVariantKindForName(StringRef Name);
+
+  /// @}
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::SymbolRef;
+  }
+  static bool classof(const MCSymbolRefExpr *) { return true; }
+};
+
+/// MCUnaryExpr - Unary assembler expressions.
+class MCUnaryExpr : public MCExpr {
+public:
+  enum Opcode {
+    LNot,  ///< Logical negation.
+    Minus, ///< Unary minus.
+    Not,   ///< Bitwise negation.
+    Plus   ///< Unary plus.
+  };
+
+private:
+  Opcode Op;
+  const MCExpr *Expr;
+
+  MCUnaryExpr(Opcode _Op, const MCExpr *_Expr)
+    : MCExpr(MCExpr::Unary), Op(_Op), Expr(_Expr) {}
+
+public:
+  /// @name Construction
+  /// @{
+
+  static const MCUnaryExpr *Create(Opcode Op, const MCExpr *Expr,
+                                   MCContext &Ctx);
+  static const MCUnaryExpr *CreateLNot(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(LNot, Expr, Ctx);
+  }
+  static const MCUnaryExpr *CreateMinus(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(Minus, Expr, Ctx);
+  }
+  static const MCUnaryExpr *CreateNot(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(Not, Expr, Ctx);
+  }
+  static const MCUnaryExpr *CreatePlus(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(Plus, Expr, Ctx);
+  }
+
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  /// getOpcode - Get the kind of this unary expression.
+  Opcode getOpcode() const { return Op; }
+
+  /// getSubExpr - Get the child of this unary expression.
+  const MCExpr *getSubExpr() const { return Expr; }
+
+  /// @}
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Unary;
+  }
+  static bool classof(const MCUnaryExpr *) { return true; }
+};
+
+/// MCBinaryExpr - Binary assembler expressions.
+class MCBinaryExpr : public MCExpr {
+public:
+  enum Opcode {
+    Add,  ///< Addition.
+    And,  ///< Bitwise and.
+    Div,  ///< Signed division.
+    EQ,   ///< Equality comparison.
+    GT,   ///< Signed greater than comparison (result is either 0 or some
+          ///< target-specific non-zero value)
+    GTE,  ///< Signed greater than or equal comparison (result is either 0 or
+          ///< some target-specific non-zero value).
+    LAnd, ///< Logical and.
+    LOr,  ///< Logical or.
+    LT,   ///< Signed less than comparison (result is either 0 or
+          ///< some target-specific non-zero value).
+    LTE,  ///< Signed less than or equal comparison (result is either 0 or
+          ///< some target-specific non-zero value).
+    Mod,  ///< Signed remainder.
+    Mul,  ///< Multiplication.
+    NE,   ///< Inequality comparison.
+    Or,   ///< Bitwise or.
+    Shl,  ///< Shift left.
+    Shr,  ///< Shift right (arithmetic or logical, depending on target)
+    Sub,  ///< Subtraction.
+    Xor   ///< Bitwise exclusive or.
+  };
+
+private:
+  Opcode Op;
+  const MCExpr *LHS, *RHS;
+
+  MCBinaryExpr(Opcode _Op, const MCExpr *_LHS, const MCExpr *_RHS)
+    : MCExpr(MCExpr::Binary), Op(_Op), LHS(_LHS), RHS(_RHS) {}
+
+public:
+  /// @name Construction
+  /// @{
+
+  static const MCBinaryExpr *Create(Opcode Op, const MCExpr *LHS,
+                                    const MCExpr *RHS, MCContext &Ctx);
+  static const MCBinaryExpr *CreateAdd(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(Add, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateAnd(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(And, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateDiv(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(Div, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateEQ(const MCExpr *LHS, const MCExpr *RHS,
+                                      MCContext &Ctx) {
+    return Create(EQ, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateGT(const MCExpr *LHS, const MCExpr *RHS,
+                                      MCContext &Ctx) {
+    return Create(GT, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateGTE(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(GTE, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateLAnd(const MCExpr *LHS, const MCExpr *RHS,
+                                        MCContext &Ctx) {
+    return Create(LAnd, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateLOr(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(LOr, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateLT(const MCExpr *LHS, const MCExpr *RHS,
+                                      MCContext &Ctx) {
+    return Create(LT, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateLTE(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(LTE, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateMod(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(Mod, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateMul(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(Mul, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateNE(const MCExpr *LHS, const MCExpr *RHS,
+                                      MCContext &Ctx) {
+    return Create(NE, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateOr(const MCExpr *LHS, const MCExpr *RHS,
+                                      MCContext &Ctx) {
+    return Create(Or, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateShl(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(Shl, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateShr(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(Shr, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateSub(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(Sub, LHS, RHS, Ctx);
+  }
+  static const MCBinaryExpr *CreateXor(const MCExpr *LHS, const MCExpr *RHS,
+                                       MCContext &Ctx) {
+    return Create(Xor, LHS, RHS, Ctx);
+  }
+
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  /// getOpcode - Get the kind of this binary expression.
+  Opcode getOpcode() const { return Op; }
+
+  /// getLHS - Get the left-hand side expression of the binary operator.
+  const MCExpr *getLHS() const { return LHS; }
+
+  /// getRHS - Get the right-hand side expression of the binary operator.
+  const MCExpr *getRHS() const { return RHS; }
+
+  /// @}
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Binary;
+  }
+  static bool classof(const MCBinaryExpr *) { return true; }
+};
+
+/// MCTargetExpr - This is an extension point for target-specific MCExpr
+/// subclasses to implement.
+///
+/// NOTE: All subclasses are required to have trivial destructors because
+/// MCExprs are bump pointer allocated and not destructed.
+class MCTargetExpr : public MCExpr {
+  virtual void Anchor();
+protected:
+  MCTargetExpr() : MCExpr(Target) {}
+  virtual ~MCTargetExpr() {}
+public:
+
+  virtual void PrintImpl(raw_ostream &OS) const = 0;
+  virtual bool EvaluateAsRelocatableImpl(MCValue &Res,
+                                         const MCAsmLayout *Layout) const = 0;
+  virtual void AddValueSymbols(MCAssembler *) const = 0;
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Target;
+  }
+  static bool classof(const MCTargetExpr *) { return true; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCFixup.h b/final/include/llvm/MC/MCFixup.h
new file mode 100644
index 00000000000..6fde797e40f
--- /dev/null
+++ b/final/include/llvm/MC/MCFixup.h
@@ -0,0 +1,97 @@
+//===-- llvm/MC/MCFixup.h - Instruction Relocation and Patching -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCFIXUP_H
+#define LLVM_MC_MCFIXUP_H
+
+#include "llvm/Support/DataTypes.h"
+#include <cassert>
+
+namespace llvm {
+class MCExpr;
+
+/// MCFixupKind - Extensible enumeration to represent the type of a fixup.
+enum MCFixupKind {
+  FK_Data_1 = 0, ///< A one-byte fixup.
+  FK_Data_2,     ///< A two-byte fixup.
+  FK_Data_4,     ///< A four-byte fixup.
+  FK_Data_8,     ///< A eight-byte fixup.
+  FK_PCRel_1,    ///< A one-byte pc relative fixup.
+  FK_PCRel_2,    ///< A two-byte pc relative fixup.
+  FK_PCRel_4,    ///< A four-byte pc relative fixup.
+  FK_PCRel_8,    ///< A eight-byte pc relative fixup.
+
+  FirstTargetFixupKind = 128,
+
+  // Limit range of target fixups, in case we want to pack more efficiently
+  // later.
+  MaxTargetFixupKind = (1 << 8)
+};
+
+/// MCFixup - Encode information on a single operation to perform on a byte
+/// sequence (e.g., an encoded instruction) which requires assemble- or run-
+/// time patching.
+///
+/// Fixups are used any time the target instruction encoder needs to represent
+/// some value in an instruction which is not yet concrete. The encoder will
+/// encode the instruction assuming the value is 0, and emit a fixup which
+/// communicates to the assembler backend how it should rewrite the encoded
+/// value.
+///
+/// During the process of relaxation, the assembler will apply fixups as
+/// symbolic values become concrete. When relaxation is complete, any remaining
+/// fixups become relocations in the object file (or errors, if the fixup cannot
+/// be encoded on the target).
+class MCFixup {
+  /// The value to put into the fixup location. The exact interpretation of the
+  /// expression is target dependent, usually it will be one of the operands to
+  /// an instruction or an assembler directive.
+  const MCExpr *Value;
+
+  /// The byte index of start of the relocation inside the encoded instruction.
+  uint32_t Offset;
+
+  /// The target dependent kind of fixup item this is. The kind is used to
+  /// determine how the operand value should be encoded into the instruction.
+  unsigned Kind;
+
+public:
+  static MCFixup Create(uint32_t Offset, const MCExpr *Value,
+                        MCFixupKind Kind) {
+    assert(unsigned(Kind) < MaxTargetFixupKind && "Kind out of range!");
+    MCFixup FI;
+    FI.Value = Value;
+    FI.Offset = Offset;
+    FI.Kind = unsigned(Kind);
+    return FI;
+  }
+
+  MCFixupKind getKind() const { return MCFixupKind(Kind); }
+
+  uint32_t getOffset() const { return Offset; }
+  void setOffset(uint32_t Value) { Offset = Value; }
+
+  const MCExpr *getValue() const { return Value; }
+
+  /// getKindForSize - Return the generic fixup kind for a value with the given
+  /// size. It is an error to pass an unsupported size.
+  static MCFixupKind getKindForSize(unsigned Size, bool isPCRel) {
+    switch (Size) {
+    default: assert(0 && "Invalid generic fixup size!");
+    case 1: return isPCRel ? FK_PCRel_1 : FK_Data_1;
+    case 2: return isPCRel ? FK_PCRel_2 : FK_Data_2;
+    case 4: return isPCRel ? FK_PCRel_4 : FK_Data_4;
+    case 8: return isPCRel ? FK_PCRel_8 : FK_Data_8;
+    }
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/MC/MCFixupKindInfo.h b/final/include/llvm/MC/MCFixupKindInfo.h
new file mode 100644
index 00000000000..1961687146a
--- /dev/null
+++ b/final/include/llvm/MC/MCFixupKindInfo.h
@@ -0,0 +1,43 @@
+//===-- llvm/MC/MCFixupKindInfo.h - Fixup Descriptors -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCFIXUPKINDINFO_H
+#define LLVM_MC_MCFIXUPKINDINFO_H
+
+namespace llvm {
+
+/// MCFixupKindInfo - Target independent information on a fixup kind.
+struct MCFixupKindInfo {
+  enum FixupKindFlags {
+    /// Is this fixup kind PCrelative? This is used by the assembler backend to
+    /// evaluate fixup values in a target independent manner when possible.
+    FKF_IsPCRel = (1 << 0),
+    
+    /// Should this fixup kind force a 4-byte aligned effective PC value?
+    FKF_IsAlignedDownTo32Bits = (1 << 1)
+  };
+
+  /// A target specific name for the fixup kind. The names will be unique for
+  /// distinct kinds on any given target.
+  const char *Name;
+
+  /// The bit offset to write the relocation into.
+  unsigned TargetOffset;
+
+  /// The number of bits written by this fixup. The bits are assumed to be
+  /// contiguous.
+  unsigned TargetSize;
+
+  /// Flags describing additional information on this fixup kind.
+  unsigned Flags;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/MC/MCInst.h b/final/include/llvm/MC/MCInst.h
new file mode 100644
index 00000000000..d6ef7b4c33c
--- /dev/null
+++ b/final/include/llvm/MC/MCInst.h
@@ -0,0 +1,170 @@
+//===-- llvm/MC/MCInst.h - MCInst class -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCInst and MCOperand classes, which
+// is the basic representation used to represent low-level machine code
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCINST_H
+#define LLVM_MC_MCINST_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class raw_ostream;
+class MCAsmInfo;
+class MCInstPrinter;
+class MCExpr;
+
+/// MCOperand - Instances of this class represent operands of the MCInst class.
+/// This is a simple discriminated union.
+class MCOperand {
+  enum MachineOperandType {
+    kInvalid,                 ///< Uninitialized.
+    kRegister,                ///< Register operand.
+    kImmediate,               ///< Immediate operand.
+    kFPImmediate,             ///< Floating-point immediate operand.
+    kExpr                     ///< Relocatable immediate operand.
+  };
+  unsigned char Kind;
+
+  union {
+    unsigned RegVal;
+    int64_t ImmVal;
+    double FPImmVal;
+    const MCExpr *ExprVal;
+  };
+public:
+
+  MCOperand() : Kind(kInvalid), FPImmVal(0.0) {}
+
+  bool isValid() const { return Kind != kInvalid; }
+  bool isReg() const { return Kind == kRegister; }
+  bool isImm() const { return Kind == kImmediate; }
+  bool isFPImm() const { return Kind == kFPImmediate; }
+  bool isExpr() const { return Kind == kExpr; }
+
+  /// getReg - Returns the register number.
+  unsigned getReg() const {
+    assert(isReg() && "This is not a register operand!");
+    return RegVal;
+  }
+
+  /// setReg - Set the register number.
+  void setReg(unsigned Reg) {
+    assert(isReg() && "This is not a register operand!");
+    RegVal = Reg;
+  }
+
+  int64_t getImm() const {
+    assert(isImm() && "This is not an immediate");
+    return ImmVal;
+  }
+  void setImm(int64_t Val) {
+    assert(isImm() && "This is not an immediate");
+    ImmVal = Val;
+  }
+
+  double getFPImm() const {
+    assert(isFPImm() && "This is not an FP immediate");
+    return FPImmVal;
+  }
+
+  void setFPImm(double Val) {
+    assert(isFPImm() && "This is not an FP immediate");
+    FPImmVal = Val;
+  }
+
+  const MCExpr *getExpr() const {
+    assert(isExpr() && "This is not an expression");
+    return ExprVal;
+  }
+  void setExpr(const MCExpr *Val) {
+    assert(isExpr() && "This is not an expression");
+    ExprVal = Val;
+  }
+
+  static MCOperand CreateReg(unsigned Reg) {
+    MCOperand Op;
+    Op.Kind = kRegister;
+    Op.RegVal = Reg;
+    return Op;
+  }
+  static MCOperand CreateImm(int64_t Val) {
+    MCOperand Op;
+    Op.Kind = kImmediate;
+    Op.ImmVal = Val;
+    return Op;
+  }
+  static MCOperand CreateFPImm(double Val) {
+    MCOperand Op;
+    Op.Kind = kFPImmediate;
+    Op.FPImmVal = Val;
+    return Op;
+  }
+  static MCOperand CreateExpr(const MCExpr *Val) {
+    MCOperand Op;
+    Op.Kind = kExpr;
+    Op.ExprVal = Val;
+    return Op;
+  }
+
+  void print(raw_ostream &OS, const MCAsmInfo *MAI) const;
+  void dump() const;
+};
+
+
+/// MCInst - Instances of this class represent a single low-level machine
+/// instruction.
+class MCInst {
+  unsigned Opcode;
+  SmallVector<MCOperand, 8> Operands;
+public:
+  MCInst() : Opcode(0) {}
+
+  void setOpcode(unsigned Op) { Opcode = Op; }
+
+  unsigned getOpcode() const { return Opcode; }
+
+  const MCOperand &getOperand(unsigned i) const { return Operands[i]; }
+  MCOperand &getOperand(unsigned i) { return Operands[i]; }
+  unsigned getNumOperands() const { return Operands.size(); }
+
+  void addOperand(const MCOperand &Op) {
+    Operands.push_back(Op);
+  }
+
+  void print(raw_ostream &OS, const MCAsmInfo *MAI) const;
+  void dump() const;
+
+  /// \brief Dump the MCInst as prettily as possible using the additional MC
+  /// structures, if given. Operators are separated by the \arg Separator
+  /// string.
+  void dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI = 0,
+                   const MCInstPrinter *Printer = 0,
+                   StringRef Separator = " ") const;
+};
+
+inline raw_ostream& operator<<(raw_ostream &OS, const MCOperand &MO) {
+  MO.print(OS, 0);
+  return OS;
+}
+
+inline raw_ostream& operator<<(raw_ostream &OS, const MCInst &MI) {
+  MI.print(OS, 0);
+  return OS;
+}
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCInstPrinter.h b/final/include/llvm/MC/MCInstPrinter.h
new file mode 100644
index 00000000000..92f06ed09c6
--- /dev/null
+++ b/final/include/llvm/MC/MCInstPrinter.h
@@ -0,0 +1,51 @@
+//===-- MCInstPrinter.h - Convert an MCInst to target assembly syntax -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCINSTPRINTER_H
+#define LLVM_MC_MCINSTPRINTER_H
+
+namespace llvm {
+class MCInst;
+class raw_ostream;
+class MCAsmInfo;
+class StringRef;
+
+/// MCInstPrinter - This is an instance of a target assembly language printer
+/// that converts an MCInst to valid target assembly syntax.
+class MCInstPrinter {
+protected:
+  /// CommentStream - a stream that comments can be emitted to if desired.
+  /// Each comment must end with a newline.  This will be null if verbose
+  /// assembly emission is disable.
+  raw_ostream *CommentStream;
+  const MCAsmInfo &MAI;
+public:
+  MCInstPrinter(const MCAsmInfo &mai)
+    : CommentStream(0), MAI(mai) {}
+
+  virtual ~MCInstPrinter();
+
+  /// setCommentStream - Specify a stream to emit comments to.
+  void setCommentStream(raw_ostream &OS) { CommentStream = &OS; }
+
+  /// printInst - Print the specified MCInst to the specified raw_ostream.
+  ///
+  virtual void printInst(const MCInst *MI, raw_ostream &OS) = 0;
+
+  /// getOpcodeName - Return the name of the specified opcode enum (e.g.
+  /// "MOV32ri") or empty if we can't resolve it.
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+
+  /// getRegName - Return the assembler register name.
+  virtual StringRef getRegName(unsigned RegNo) const;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCLabel.h b/final/include/llvm/MC/MCLabel.h
new file mode 100644
index 00000000000..727520d4af9
--- /dev/null
+++ b/final/include/llvm/MC/MCLabel.h
@@ -0,0 +1,56 @@
+//===- MCLabel.h - Machine Code Directional Local Labels --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCLabel class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCLABEL_H
+#define LLVM_MC_MCLABEL_H
+
+namespace llvm {
+  class MCContext;
+  class raw_ostream;
+
+  /// MCLabel - Instances of this class represent a label name in the MC file,
+  /// and MCLabel are created and unique'd by the MCContext class.  MCLabel
+  /// should only be constructed for valid instances in the object file.
+  class MCLabel {
+    // Instance - the instance number of this Directional Local Label
+    unsigned Instance;
+
+  private:  // MCContext creates and uniques these.
+    friend class MCContext;
+    MCLabel(unsigned instance)
+      : Instance(instance) {}
+
+    MCLabel(const MCLabel&);       // DO NOT IMPLEMENT
+    void operator=(const MCLabel&); // DO NOT IMPLEMENT
+  public:
+    /// getInstance - Get the current instance of this Directional Local Label.
+    unsigned getInstance() const { return Instance; }
+
+    /// incInstance - Increment the current instance of this Directional Local
+    /// Label.
+    unsigned incInstance() { return ++Instance; }
+
+    /// print - Print the value to the stream \arg OS.
+    void print(raw_ostream &OS) const;
+
+    /// dump - Print the value to stderr.
+    void dump() const;
+  };
+
+  inline raw_ostream &operator<<(raw_ostream &OS, const MCLabel &Label) {
+    Label.print(OS);
+    return OS;
+  }
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCMachOSymbolFlags.h b/final/include/llvm/MC/MCMachOSymbolFlags.h
new file mode 100644
index 00000000000..696436dffa6
--- /dev/null
+++ b/final/include/llvm/MC/MCMachOSymbolFlags.h
@@ -0,0 +1,46 @@
+//===- MCMachOSymbolFlags.h - MachO Symbol Flags ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SymbolFlags used for the MachO target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCMACHOSYMBOLFLAGS_H
+#define LLVM_MC_MCMACHOSYMBOLFLAGS_H
+
+// These flags are mostly used in MCMachOStreamer.cpp but also needed in
+// MachObjectWriter.cpp to test for Weak Definitions of symbols to emit
+// the correct relocation information.
+
+namespace llvm {
+  /// SymbolFlags - We store the value for the 'desc' symbol field in the lowest
+  /// 16 bits of the implementation defined flags.
+  enum SymbolFlags { // See <mach-o/nlist.h>.
+    SF_DescFlagsMask                        = 0xFFFF,
+
+    // Reference type flags.
+    SF_ReferenceTypeMask                    = 0x0007,
+    SF_ReferenceTypeUndefinedNonLazy        = 0x0000,
+    SF_ReferenceTypeUndefinedLazy           = 0x0001,
+    SF_ReferenceTypeDefined                 = 0x0002,
+    SF_ReferenceTypePrivateDefined          = 0x0003,
+    SF_ReferenceTypePrivateUndefinedNonLazy = 0x0004,
+    SF_ReferenceTypePrivateUndefinedLazy    = 0x0005,
+
+    // Other 'desc' flags.
+    SF_ThumbFunc                            = 0x0008,
+    SF_NoDeadStrip                          = 0x0020,
+    SF_WeakReference                        = 0x0040,
+    SF_WeakDefinition                       = 0x0080,
+    SF_SymbolResolver                       = 0x0100
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCMachObjectWriter.h b/final/include/llvm/MC/MCMachObjectWriter.h
new file mode 100644
index 00000000000..ec51031d0bb
--- /dev/null
+++ b/final/include/llvm/MC/MCMachObjectWriter.h
@@ -0,0 +1,65 @@
+//===-- llvm/MC/MCMachObjectWriter.h - Mach Object Writer -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCMACHOBJECTWRITER_H
+#define LLVM_MC_MCMACHOBJECTWRITER_H
+
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MCMachObjectTargetWriter {
+  const unsigned Is64Bit : 1;
+  const uint32_t CPUType;
+  const uint32_t CPUSubtype;
+  // FIXME: Remove this, we should just always use it once we no longer care
+  // about Darwin 'as' compatibility.
+  const unsigned UseAggressiveSymbolFolding : 1;
+  unsigned LocalDifference_RIT;
+
+protected:
+  MCMachObjectTargetWriter(bool Is64Bit_, uint32_t CPUType_,
+                           uint32_t CPUSubtype_,
+                           bool UseAggressiveSymbolFolding_ = false);
+
+  void setLocalDifferenceRelocationType(unsigned Type) {
+    LocalDifference_RIT = Type;
+  }
+
+public:
+  virtual ~MCMachObjectTargetWriter();
+
+  /// @name Accessors
+  /// @{
+
+  bool is64Bit() const { return Is64Bit; }
+  bool useAggressiveSymbolFolding() const { return UseAggressiveSymbolFolding; }
+  uint32_t getCPUType() const { return CPUType; }
+  uint32_t getCPUSubtype() const { return CPUSubtype; }
+  unsigned getLocalDifferenceRelocationType() const {
+    return LocalDifference_RIT;
+  }
+
+  /// @}
+};
+
+/// \brief Construct a new Mach-O writer instance.
+///
+/// This routine takes ownership of the target writer subclass.
+///
+/// \param MOTW - The target specific Mach-O writer subclass.
+/// \param OS - The stream to write to.
+/// \returns The constructed object writer.
+MCObjectWriter *createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
+                                       raw_ostream &OS, bool IsLittleEndian);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/MC/MCObjectStreamer.h b/final/include/llvm/MC/MCObjectStreamer.h
new file mode 100644
index 00000000000..833341eb97f
--- /dev/null
+++ b/final/include/llvm/MC/MCObjectStreamer.h
@@ -0,0 +1,83 @@
+//===- MCObjectStreamer.h - MCStreamer Object File Interface ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCOBJECTSTREAMER_H
+#define LLVM_MC_MCOBJECTSTREAMER_H
+
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+class MCAssembler;
+class MCCodeEmitter;
+class MCSectionData;
+class MCExpr;
+class MCFragment;
+class MCDataFragment;
+class TargetAsmBackend;
+class raw_ostream;
+
+/// \brief Streaming object file generation interface.
+///
+/// This class provides an implementation of the MCStreamer interface which is
+/// suitable for use with the assembler backend. Specific object file formats
+/// are expected to subclass this interface to implement directives specific
+/// to that file format or custom semantics expected by the object writer
+/// implementation.
+class MCObjectStreamer : public MCStreamer {
+  MCAssembler *Assembler;
+  MCSectionData *CurSectionData;
+
+  virtual void EmitInstToData(const MCInst &Inst) = 0;
+
+protected:
+  MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                   raw_ostream &_OS, MCCodeEmitter *_Emitter);
+  ~MCObjectStreamer();
+
+  MCSectionData *getCurrentSectionData() const {
+    return CurSectionData;
+  }
+
+  MCFragment *getCurrentFragment() const;
+
+  /// Get a data fragment to write into, creating a new one if the current
+  /// fragment is not a data fragment.
+  MCDataFragment *getOrCreateDataFragment() const;
+
+  const MCExpr *AddValueSymbols(const MCExpr *Value);
+
+public:
+  MCAssembler &getAssembler() { return *Assembler; }
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             bool isPCRel, unsigned AddrSpace);
+  virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+  virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+  virtual void ChangeSection(const MCSection *Section);
+  virtual void EmitInstruction(const MCInst &Inst);
+  virtual void EmitInstToFragment(const MCInst &Inst);
+  virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value);
+  virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                        const MCSymbol *LastLabel,
+                                        const MCSymbol *Label);
+  virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+                                         const MCSymbol *Label);
+  virtual void Finish();
+
+  /// @}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCObjectWriter.h b/final/include/llvm/MC/MCObjectWriter.h
new file mode 100644
index 00000000000..782d844598b
--- /dev/null
+++ b/final/include/llvm/MC/MCObjectWriter.h
@@ -0,0 +1,198 @@
+//===-- llvm/MC/MCObjectWriter.h - Object File Writer Interface -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCOBJECTWRITER_H
+#define LLVM_MC_MCOBJECTWRITER_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/DataTypes.h"
+#include <cassert>
+
+namespace llvm {
+class MCAsmLayout;
+class MCAssembler;
+class MCFixup;
+class MCFragment;
+class MCSymbol;
+class MCSymbolData;
+class MCSymbolRefExpr;
+class MCValue;
+class raw_ostream;
+
+/// MCObjectWriter - Defines the object file and target independent interfaces
+/// used by the assembler backend to write native file format object files.
+///
+/// The object writer contains a few callbacks used by the assembler to allow
+/// the object writer to modify the assembler data structures at appropriate
+/// points. Once assembly is complete, the object writer is given the
+/// MCAssembler instance, which contains all the symbol and section data which
+/// should be emitted as part of WriteObject().
+///
+/// The object writer also contains a number of helper methods for writing
+/// binary data to the output stream.
+class MCObjectWriter {
+  MCObjectWriter(const MCObjectWriter &); // DO NOT IMPLEMENT
+  void operator=(const MCObjectWriter &); // DO NOT IMPLEMENT
+
+protected:
+  raw_ostream &OS;
+
+  unsigned IsLittleEndian : 1;
+
+protected: // Can only create subclasses.
+  MCObjectWriter(raw_ostream &_OS, bool _IsLittleEndian)
+    : OS(_OS), IsLittleEndian(_IsLittleEndian) {}
+
+public:
+  virtual ~MCObjectWriter();
+
+  bool isLittleEndian() const { return IsLittleEndian; }
+
+  raw_ostream &getStream() { return OS; }
+
+  /// @name High-Level API
+  /// @{
+
+  /// Perform any late binding of symbols (for example, to assign symbol indices
+  /// for use when generating relocations).
+  ///
+  /// This routine is called by the assembler after layout and relaxation is
+  /// complete.
+  virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+                                        const MCAsmLayout &Layout) = 0;
+
+  /// Record a relocation entry.
+  ///
+  /// This routine is called by the assembler after layout and relaxation, and
+  /// post layout binding. The implementation is responsible for storing
+  /// information about the relocation so that it can be emitted during
+  /// WriteObject().
+  virtual void RecordRelocation(const MCAssembler &Asm,
+                                const MCAsmLayout &Layout,
+                                const MCFragment *Fragment,
+                                const MCFixup &Fixup, MCValue Target,
+                                uint64_t &FixedValue) = 0;
+
+  /// \brief Check whether the difference (A - B) between two symbol
+  /// references is fully resolved.
+  ///
+  /// Clients are not required to answer precisely and may conservatively return
+  /// false, even when a difference is fully resolved.
+  bool
+  IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
+                                     const MCSymbolRefExpr *A,
+                                     const MCSymbolRefExpr *B,
+                                     bool InSet) const;
+
+  virtual bool
+  IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                         const MCSymbolData &DataA,
+                                         const MCFragment &FB,
+                                         bool InSet,
+                                         bool IsPCRel) const;
+
+
+  /// Write the object file.
+  ///
+  /// This routine is called by the assembler after layout and relaxation is
+  /// complete, fixups have been evaluated and applied, and relocations
+  /// generated.
+  virtual void WriteObject(MCAssembler &Asm,
+                           const MCAsmLayout &Layout) = 0;
+
+  /// @}
+  /// @name Binary Output
+  /// @{
+
+  void Write8(uint8_t Value) {
+    OS << char(Value);
+  }
+
+  void WriteLE16(uint16_t Value) {
+    Write8(uint8_t(Value >> 0));
+    Write8(uint8_t(Value >> 8));
+  }
+
+  void WriteLE32(uint32_t Value) {
+    WriteLE16(uint16_t(Value >> 0));
+    WriteLE16(uint16_t(Value >> 16));
+  }
+
+  void WriteLE64(uint64_t Value) {
+    WriteLE32(uint32_t(Value >> 0));
+    WriteLE32(uint32_t(Value >> 32));
+  }
+
+  void WriteBE16(uint16_t Value) {
+    Write8(uint8_t(Value >> 8));
+    Write8(uint8_t(Value >> 0));
+  }
+
+  void WriteBE32(uint32_t Value) {
+    WriteBE16(uint16_t(Value >> 16));
+    WriteBE16(uint16_t(Value >> 0));
+  }
+
+  void WriteBE64(uint64_t Value) {
+    WriteBE32(uint32_t(Value >> 32));
+    WriteBE32(uint32_t(Value >> 0));
+  }
+
+  void Write16(uint16_t Value) {
+    if (IsLittleEndian)
+      WriteLE16(Value);
+    else
+      WriteBE16(Value);
+  }
+
+  void Write32(uint32_t Value) {
+    if (IsLittleEndian)
+      WriteLE32(Value);
+    else
+      WriteBE32(Value);
+  }
+
+  void Write64(uint64_t Value) {
+    if (IsLittleEndian)
+      WriteLE64(Value);
+    else
+      WriteBE64(Value);
+  }
+
+  void WriteZeros(unsigned N) {
+    const char Zeros[16] = { 0 };
+
+    for (unsigned i = 0, e = N / 16; i != e; ++i)
+      OS << StringRef(Zeros, 16);
+
+    OS << StringRef(Zeros, N % 16);
+  }
+
+  void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) {
+    assert((ZeroFillSize == 0 || Str.size () <= ZeroFillSize) &&
+      "data size greater than fill size, unexpected large write will occur");
+    OS << Str;
+    if (ZeroFillSize)
+      WriteZeros(ZeroFillSize - Str.size());
+  }
+
+  /// @}
+
+  /// Utility function to encode a SLEB128 value.
+  static void EncodeSLEB128(int64_t Value, raw_ostream &OS);
+  /// Utility function to encode a ULEB128 value.
+  static void EncodeULEB128(uint64_t Value, raw_ostream &OS);
+};
+
+MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS, bool is64Bit);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/MC/MCParser/AsmCond.h b/final/include/llvm/MC/MCParser/AsmCond.h
new file mode 100644
index 00000000000..92a115eb803
--- /dev/null
+++ b/final/include/llvm/MC/MCParser/AsmCond.h
@@ -0,0 +1,40 @@
+//===- AsmCond.h - Assembly file conditional assembly  ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ASMCOND_H
+#define ASMCOND_H
+
+namespace llvm {
+
+/// AsmCond - Class to support conditional assembly
+///
+/// The conditional assembly feature (.if, .else, .elseif and .endif) is
+/// implemented with AsmCond that tells us what we are in the middle of 
+/// processing.  Ignore can be either true or false.  When true we are ignoring
+/// the block of code in the middle of a conditional.
+
+class AsmCond {
+public:
+  enum ConditionalAssemblyType {
+    NoCond,     // no conditional is being processed
+    IfCond,     // inside if conditional
+    ElseIfCond, // inside elseif conditional
+    ElseCond    // inside else conditional
+  };
+
+  ConditionalAssemblyType TheCond;
+  bool CondMet;
+  bool Ignore;
+
+  AsmCond() : TheCond(NoCond), CondMet(false), Ignore(false) {}
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCParser/AsmLexer.h b/final/include/llvm/MC/MCParser/AsmLexer.h
new file mode 100644
index 00000000000..252696bec31
--- /dev/null
+++ b/final/include/llvm/MC/MCParser/AsmLexer.h
@@ -0,0 +1,70 @@
+//===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class declares the lexer for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ASMLEXER_H
+#define ASMLEXER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/DataTypes.h"
+#include <string>
+#include <cassert>
+
+namespace llvm {
+class MemoryBuffer;
+class SMLoc;
+class MCAsmInfo;
+
+/// AsmLexer - Lexer class for assembly files.
+class AsmLexer : public MCAsmLexer {
+  const MCAsmInfo &MAI;
+
+  const char *CurPtr;
+  const MemoryBuffer *CurBuf;
+
+  void operator=(const AsmLexer&); // DO NOT IMPLEMENT
+  AsmLexer(const AsmLexer&);       // DO NOT IMPLEMENT
+
+protected:
+  /// LexToken - Read the next token and return its code.
+  virtual AsmToken LexToken();
+
+public:
+  AsmLexer(const MCAsmInfo &MAI);
+  ~AsmLexer();
+
+  void setBuffer(const MemoryBuffer *buf, const char *ptr = NULL);
+
+  virtual StringRef LexUntilEndOfStatement();
+
+  bool isAtStartOfComment(char Char);
+
+  const MCAsmInfo &getMAI() const { return MAI; }
+
+private:
+  int getNextChar();
+  AsmToken ReturnError(const char *Loc, const std::string &Msg);
+
+  AsmToken LexIdentifier();
+  AsmToken LexSlash();
+  AsmToken LexLineComment();
+  AsmToken LexDigit();
+  AsmToken LexSingleQuote();
+  AsmToken LexQuote();
+  AsmToken LexFloatLiteral();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCParser/MCAsmLexer.h b/final/include/llvm/MC/MCParser/MCAsmLexer.h
new file mode 100644
index 00000000000..606725a9851
--- /dev/null
+++ b/final/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -0,0 +1,181 @@
+//===-- llvm/MC/MCAsmLexer.h - Abstract Asm Lexer Interface -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCASMLEXER_H
+#define LLVM_MC_MCASMLEXER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/SMLoc.h"
+
+namespace llvm {
+class MCAsmLexer;
+class MCInst;
+class Target;
+
+/// AsmToken - Target independent representation for an assembler token.
+class AsmToken {
+public:
+  enum TokenKind {
+    // Markers
+    Eof, Error,
+
+    // String values.
+    Identifier,
+    String,
+
+    // Integer values.
+    Integer,
+
+    // Real values.
+    Real,
+
+    // Register values (stored in IntVal).  Only used by TargetAsmLexer.
+    Register,
+
+    // No-value.
+    EndOfStatement,
+    Colon,
+    Plus, Minus, Tilde,
+    Slash,    // '/'
+    LParen, RParen, LBrac, RBrac, LCurly, RCurly,
+    Star, Dot, Comma, Dollar, Equal, EqualEqual,
+
+    Pipe, PipePipe, Caret,
+    Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
+    Less, LessEqual, LessLess, LessGreater,
+    Greater, GreaterEqual, GreaterGreater, At
+  };
+
+  TokenKind Kind;
+
+  /// A reference to the entire token contents; this is always a pointer into
+  /// a memory buffer owned by the source manager.
+  StringRef Str;
+
+  int64_t IntVal;
+
+public:
+  AsmToken() {}
+  AsmToken(TokenKind _Kind, StringRef _Str, int64_t _IntVal = 0)
+    : Kind(_Kind), Str(_Str), IntVal(_IntVal) {}
+
+  TokenKind getKind() const { return Kind; }
+  bool is(TokenKind K) const { return Kind == K; }
+  bool isNot(TokenKind K) const { return Kind != K; }
+
+  SMLoc getLoc() const;
+
+  /// getStringContents - Get the contents of a string token (without quotes).
+  StringRef getStringContents() const {
+    assert(Kind == String && "This token isn't a string!");
+    return Str.slice(1, Str.size() - 1);
+  }
+
+  /// getIdentifier - Get the identifier string for the current token, which
+  /// should be an identifier or a string. This gets the portion of the string
+  /// which should be used as the identifier, e.g., it does not include the
+  /// quotes on strings.
+  StringRef getIdentifier() const {
+    if (Kind == Identifier)
+      return getString();
+    return getStringContents();
+  }
+
+  /// getString - Get the string for the current token, this includes all
+  /// characters (for example, the quotes on strings) in the token.
+  ///
+  /// The returned StringRef points into the source manager's memory buffer, and
+  /// is safe to store across calls to Lex().
+  StringRef getString() const { return Str; }
+
+  // FIXME: Don't compute this in advance, it makes every token larger, and is
+  // also not generally what we want (it is nicer for recovery etc. to lex 123br
+  // as a single token, then diagnose as an invalid number).
+  int64_t getIntVal() const {
+    assert(Kind == Integer && "This token isn't an integer!");
+    return IntVal;
+  }
+
+  /// getRegVal - Get the register number for the current token, which should
+  /// be a register.
+  unsigned getRegVal() const {
+    assert(Kind == Register && "This token isn't a register!");
+    return static_cast<unsigned>(IntVal);
+  }
+};
+
+/// MCAsmLexer - Generic assembler lexer interface, for use by target specific
+/// assembly lexers.
+class MCAsmLexer {
+  /// The current token, stored in the base class for faster access.
+  AsmToken CurTok;
+
+  /// The location and description of the current error
+  SMLoc ErrLoc;
+  std::string Err;
+
+  MCAsmLexer(const MCAsmLexer &);   // DO NOT IMPLEMENT
+  void operator=(const MCAsmLexer &);  // DO NOT IMPLEMENT
+protected: // Can only create subclasses.
+  const char *TokStart;
+
+  MCAsmLexer();
+
+  virtual AsmToken LexToken() = 0;
+
+  void SetError(const SMLoc &errLoc, const std::string &err) {
+    ErrLoc = errLoc;
+    Err = err;
+  }
+
+public:
+  virtual ~MCAsmLexer();
+
+  /// Lex - Consume the next token from the input stream and return it.
+  ///
+  /// The lexer will continuosly return the end-of-file token once the end of
+  /// the main input file has been reached.
+  const AsmToken &Lex() {
+    return CurTok = LexToken();
+  }
+
+  virtual StringRef LexUntilEndOfStatement() = 0;
+
+  /// getLoc - Get the current source location.
+  SMLoc getLoc() const;
+
+  /// getTok - Get the current (last) lexed token.
+  const AsmToken &getTok() {
+    return CurTok;
+  }
+
+  /// getErrLoc - Get the current error location
+  const SMLoc &getErrLoc() {
+    return ErrLoc;
+  }
+
+  /// getErr - Get the current error string
+  const std::string &getErr() {
+    return Err;
+  }
+
+  /// getKind - Get the kind of current token.
+  AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
+
+  /// is - Check if the current token has kind \arg K.
+  bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
+
+  /// isNot - Check if the current token has kind \arg K.
+  bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/MC/MCParser/MCAsmParser.h b/final/include/llvm/MC/MCParser/MCAsmParser.h
new file mode 100644
index 00000000000..54979d977db
--- /dev/null
+++ b/final/include/llvm/MC/MCParser/MCAsmParser.h
@@ -0,0 +1,137 @@
+//===-- llvm/MC/MCAsmParser.h - Abstract Asm Parser Interface ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCASMPARSER_H
+#define LLVM_MC_MCASMPARSER_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class AsmToken;
+class MCAsmInfo;
+class MCAsmLexer;
+class MCAsmParserExtension;
+class MCContext;
+class MCExpr;
+class MCStreamer;
+class SMLoc;
+class SourceMgr;
+class StringRef;
+class Target;
+class TargetAsmParser;
+class Twine;
+
+/// MCAsmParser - Generic assembler parser interface, for use by target specific
+/// assembly parsers.
+class MCAsmParser {
+public:
+  typedef bool (*DirectiveHandler)(MCAsmParserExtension*, StringRef, SMLoc);
+
+private:
+  MCAsmParser(const MCAsmParser &);   // DO NOT IMPLEMENT
+  void operator=(const MCAsmParser &);  // DO NOT IMPLEMENT
+
+  TargetAsmParser *TargetParser;
+
+  unsigned ShowParsedOperands : 1;
+
+protected: // Can only create subclasses.
+  MCAsmParser();
+
+public:
+  virtual ~MCAsmParser();
+
+  virtual void AddDirectiveHandler(MCAsmParserExtension *Object,
+                                   StringRef Directive,
+                                   DirectiveHandler Handler) = 0;
+
+  virtual SourceMgr &getSourceManager() = 0;
+
+  virtual MCAsmLexer &getLexer() = 0;
+
+  virtual MCContext &getContext() = 0;
+
+  /// getStreamer - Return the output streamer for the assembler.
+  virtual MCStreamer &getStreamer() = 0;
+
+  TargetAsmParser &getTargetParser() const { return *TargetParser; }
+  void setTargetParser(TargetAsmParser &P);
+
+  bool getShowParsedOperands() const { return ShowParsedOperands; }
+  void setShowParsedOperands(bool Value) { ShowParsedOperands = Value; }
+
+  /// Run - Run the parser on the input source buffer.
+  virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false) = 0;
+
+  /// Warning - Emit a warning at the location \arg L, with the message \arg
+  /// Msg.
+  virtual void Warning(SMLoc L, const Twine &Msg) = 0;
+
+  /// Error - Emit an error at the location \arg L, with the message \arg
+  /// Msg.
+  ///
+  /// \return The return value is always true, as an idiomatic convenience to
+  /// clients.
+  virtual bool Error(SMLoc L, const Twine &Msg) = 0;
+
+  /// Lex - Get the next AsmToken in the stream, possibly handling file
+  /// inclusion first.
+  virtual const AsmToken &Lex() = 0;
+
+  /// getTok - Get the current AsmToken from the stream.
+  const AsmToken &getTok();
+
+  /// \brief Report an error at the current lexer location.
+  bool TokError(const Twine &Msg);
+
+  /// ParseIdentifier - Parse an identifier or string (as a quoted identifier)
+  /// and set \arg Res to the identifier contents.
+  virtual bool ParseIdentifier(StringRef &Res) = 0;
+
+  /// \brief Parse up to the end of statement and return the contents from the
+  /// current token until the end of the statement; the current token on exit
+  /// will be either the EndOfStatement or EOF.
+  virtual StringRef ParseStringToEndOfStatement() = 0;
+
+  /// EatToEndOfStatement - Skip to the end of the current statement, for error
+  /// recovery.
+  virtual void EatToEndOfStatement() = 0;
+
+  /// ParseExpression - Parse an arbitrary expression.
+  ///
+  /// @param Res - The value of the expression. The result is undefined
+  /// on error.
+  /// @result - False on success.
+  virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) = 0;
+  bool ParseExpression(const MCExpr *&Res);
+
+  /// ParseParenExpression - Parse an arbitrary expression, assuming that an
+  /// initial '(' has already been consumed.
+  ///
+  /// @param Res - The value of the expression. The result is undefined
+  /// on error.
+  /// @result - False on success.
+  virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) = 0;
+
+  /// ParseAbsoluteExpression - Parse an expression which must evaluate to an
+  /// absolute value.
+  ///
+  /// @param Res - The value of the absolute expression. The result is undefined
+  /// on error.
+  /// @result - False on success.
+  virtual bool ParseAbsoluteExpression(int64_t &Res) = 0;
+};
+
+/// \brief Create an MCAsmParser instance.
+MCAsmParser *createMCAsmParser(const Target &, SourceMgr &, MCContext &,
+                               MCStreamer &, const MCAsmInfo &);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/MC/MCParser/MCAsmParserExtension.h b/final/include/llvm/MC/MCParser/MCAsmParserExtension.h
new file mode 100644
index 00000000000..ceb57f57e9e
--- /dev/null
+++ b/final/include/llvm/MC/MCParser/MCAsmParserExtension.h
@@ -0,0 +1,80 @@
+//===-- llvm/MC/MCAsmParserExtension.h - Asm Parser Hooks -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCASMPARSEREXTENSION_H
+#define LLVM_MC_MCASMPARSEREXTENSION_H
+
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/SMLoc.h"
+
+namespace llvm {
+class Twine;
+
+/// \brief Generic interface for extending the MCAsmParser,
+/// which is implemented by target and object file assembly parser
+/// implementations.
+class MCAsmParserExtension {
+  MCAsmParserExtension(const MCAsmParserExtension &);   // DO NOT IMPLEMENT
+  void operator=(const MCAsmParserExtension &);  // DO NOT IMPLEMENT
+
+  MCAsmParser *Parser;
+
+protected:
+  MCAsmParserExtension();
+
+  // Helper template for implementing static dispatch functions.
+  template<typename T, bool (T::*Handler)(StringRef, SMLoc)>
+  static bool HandleDirective(MCAsmParserExtension *Target,
+                              StringRef Directive,
+                              SMLoc DirectiveLoc) {
+    T *Obj = static_cast<T*>(Target);
+    return (Obj->*Handler)(Directive, DirectiveLoc);
+  }
+
+  bool BracketExpressionsSupported;
+
+public:
+  virtual ~MCAsmParserExtension();
+
+  /// \brief Initialize the extension for parsing using the given \arg
+  /// Parser. The extension should use the AsmParser interfaces to register its
+  /// parsing routines.
+  virtual void Initialize(MCAsmParser &Parser);
+
+  /// @name MCAsmParser Proxy Interfaces
+  /// @{
+
+  MCContext &getContext() { return getParser().getContext(); }
+  MCAsmLexer &getLexer() { return getParser().getLexer(); }
+  MCAsmParser &getParser() { return *Parser; }
+  SourceMgr &getSourceManager() { return getParser().getSourceManager(); }
+  MCStreamer &getStreamer() { return getParser().getStreamer(); }
+  void Warning(SMLoc L, const Twine &Msg) {
+    return getParser().Warning(L, Msg);
+  }
+  bool Error(SMLoc L, const Twine &Msg) {
+    return getParser().Error(L, Msg);
+  }
+  bool TokError(const Twine &Msg) {
+    return getParser().TokError(Msg);
+  }
+
+  const AsmToken &Lex() { return getParser().Lex(); }
+
+  const AsmToken &getTok() { return getParser().getTok(); }
+
+  bool HasBracketExpressions() const { return BracketExpressionsSupported; }
+
+  /// @}
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/final/include/llvm/MC/MCParser/MCParsedAsmOperand.h
new file mode 100644
index 00000000000..91f5773b8df
--- /dev/null
+++ b/final/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -0,0 +1,37 @@
+//===-- llvm/MC/MCParsedAsmOperand.h - Asm Parser Operand -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCASMOPERAND_H
+#define LLVM_MC_MCASMOPERAND_H
+
+namespace llvm {
+class SMLoc;
+class raw_ostream;
+
+/// MCParsedAsmOperand - This abstract class represents a source-level assembly
+/// instruction operand.  It should be subclassed by target-specific code.  This
+/// base class is used by target-independent clients and is the interface
+/// between parsing an asm instruction and recognizing it.
+class MCParsedAsmOperand {
+public:
+  MCParsedAsmOperand() {}
+  virtual ~MCParsedAsmOperand() {}
+
+  /// getStartLoc - Get the location of the first token of this operand.
+  virtual SMLoc getStartLoc() const = 0;
+  /// getEndLoc - Get the location of the last token of this operand.
+  virtual SMLoc getEndLoc() const = 0;
+
+  /// dump - Print a debug representation of the operand to the given stream.
+  virtual void dump(raw_ostream &OS) const = 0;
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/final/include/llvm/MC/MCSection.h b/final/include/llvm/MC/MCSection.h
new file mode 100644
index 00000000000..1c01b2f8f3c
--- /dev/null
+++ b/final/include/llvm/MC/MCSection.h
@@ -0,0 +1,76 @@
+//===- MCSection.h - Machine Code Sections ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MCSection class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSECTION_H
+#define LLVM_MC_MCSECTION_H
+
+#include <string>
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/Casting.h"
+
+namespace llvm {
+  class MCContext;
+  class MCAsmInfo;
+  class raw_ostream;
+
+  /// MCSection - Instances of this class represent a uniqued identifier for a
+  /// section in the current translation unit.  The MCContext class uniques and
+  /// creates these.
+  class MCSection {
+  public:
+    enum SectionVariant {
+      SV_COFF = 0,
+      SV_ELF,
+      SV_MachO
+    };
+
+  private:
+    MCSection(const MCSection&);      // DO NOT IMPLEMENT
+    void operator=(const MCSection&); // DO NOT IMPLEMENT
+  protected:
+    MCSection(SectionVariant V, SectionKind K) : Variant(V), Kind(K) {}
+    SectionVariant Variant;
+    SectionKind Kind;
+  public:
+    virtual ~MCSection();
+
+    SectionKind getKind() const { return Kind; }
+
+    SectionVariant getVariant() const { return Variant; }
+
+    virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
+                                      raw_ostream &OS) const = 0;
+
+    /// isBaseAddressKnownZero - Return true if we know that this section will
+    /// get a base address of zero.  In cases where we know that this is true we
+    /// can emit section offsets as direct references to avoid a subtraction
+    /// from the base of the section, saving a relocation.
+    virtual bool isBaseAddressKnownZero() const {
+      return false;
+    }
+
+    // UseCodeAlign - Return true if a .align directive should use
+    // "optimized nops" to fill instead of 0s.
+    virtual bool UseCodeAlign() const = 0;
+
+    /// isVirtualSection - Check whether this section is "virtual", that is
+    /// has no actual object file contents.
+    virtual bool isVirtualSection() const = 0;
+
+    static bool classof(const MCSection *) { return true; }
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCSectionCOFF.h b/final/include/llvm/MC/MCSectionCOFF.h
new file mode 100644
index 00000000000..b154cf59d10
--- /dev/null
+++ b/final/include/llvm/MC/MCSectionCOFF.h
@@ -0,0 +1,69 @@
+//===- MCSectionCOFF.h - COFF Machine Code Sections -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MCSectionCOFF class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSECTIONCOFF_H
+#define LLVM_MC_MCSECTIONCOFF_H
+
+#include "llvm/MC/MCSection.h"
+
+#include "llvm/Support/COFF.h"
+
+namespace llvm {
+
+/// MCSectionCOFF - This represents a section on Windows
+  class MCSectionCOFF : public MCSection {
+    // The memory for this string is stored in the same MCContext as *this.
+    StringRef SectionName;
+
+    /// Characteristics - This is the Characteristics field of a section,
+    //  drawn from the enums below.
+    unsigned Characteristics;
+
+    /// Selection - This is the Selection field for the section symbol, if
+    /// it is a COMDAT section (Characteristics & IMAGE_SCN_LNK_COMDAT) != 0
+    int Selection;
+
+  private:
+    friend class MCContext;
+    MCSectionCOFF(StringRef Section, unsigned Characteristics,
+                  int Selection, SectionKind K)
+      : MCSection(SV_COFF, K), SectionName(Section),
+        Characteristics(Characteristics), Selection (Selection) {
+      assert ((Characteristics & 0x00F00000) == 0 &&
+        "alignment must not be set upon section creation");
+    }
+    ~MCSectionCOFF();
+
+  public:
+    /// ShouldOmitSectionDirective - Decides whether a '.section' directive
+    /// should be printed before the section name
+    bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
+
+    StringRef getSectionName() const { return SectionName; }
+    unsigned getCharacteristics() const { return Characteristics; }
+    int getSelection () const { return Selection; }
+
+    virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
+                                      raw_ostream &OS) const;
+    virtual bool UseCodeAlign() const;
+    virtual bool isVirtualSection() const;
+
+    static bool classof(const MCSection *S) {
+      return S->getVariant() == SV_COFF;
+    }
+    static bool classof(const MCSectionCOFF *) { return true; }
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCSectionELF.h b/final/include/llvm/MC/MCSectionELF.h
new file mode 100644
index 00000000000..c82de712820
--- /dev/null
+++ b/final/include/llvm/MC/MCSectionELF.h
@@ -0,0 +1,87 @@
+//===- MCSectionELF.h - ELF Machine Code Sections ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MCSectionELF class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSECTIONELF_H
+#define LLVM_MC_MCSECTIONELF_H
+
+#include "llvm/MC/MCSection.h"
+#include "llvm/Support/ELF.h"
+
+namespace llvm {
+
+class MCSymbol;
+
+/// MCSectionELF - This represents a section on linux, lots of unix variants
+/// and some bare metal systems.
+class MCSectionELF : public MCSection {
+  /// SectionName - This is the name of the section.  The referenced memory is
+  /// owned by TargetLoweringObjectFileELF's ELFUniqueMap.
+  StringRef SectionName;
+
+  /// Type - This is the sh_type field of a section, drawn from the enums below.
+  unsigned Type;
+
+  /// Flags - This is the sh_flags field of a section, drawn from the enums.
+  /// below.
+  unsigned Flags;
+
+  /// EntrySize - The size of each entry in this section. This size only
+  /// makes sense for sections that contain fixed-sized entries. If a
+  /// section does not contain fixed-sized entries 'EntrySize' will be 0.
+  unsigned EntrySize;
+
+  const MCSymbol *Group;
+
+private:
+  friend class MCContext;
+  MCSectionELF(StringRef Section, unsigned type, unsigned flags,
+               SectionKind K, unsigned entrySize, const MCSymbol *group)
+    : MCSection(SV_ELF, K), SectionName(Section), Type(type), Flags(flags),
+      EntrySize(entrySize), Group(group) {}
+  ~MCSectionELF();
+public:
+
+  /// ShouldOmitSectionDirective - Decides whether a '.section' directive
+  /// should be printed before the section name
+  bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const;
+
+  StringRef getSectionName() const { return SectionName; }
+  unsigned getType() const { return Type; }
+  unsigned getFlags() const { return Flags; }
+  unsigned getEntrySize() const { return EntrySize; }
+  const MCSymbol *getGroup() const { return Group; }
+
+  void PrintSwitchToSection(const MCAsmInfo &MAI,
+                            raw_ostream &OS) const;
+  virtual bool UseCodeAlign() const;
+  virtual bool isVirtualSection() const;
+
+  /// isBaseAddressKnownZero - We know that non-allocatable sections (like
+  /// debug info) have a base of zero.
+  virtual bool isBaseAddressKnownZero() const {
+    return (getFlags() & ELF::SHF_ALLOC) == 0;
+  }
+
+  static bool classof(const MCSection *S) {
+    return S->getVariant() == SV_ELF;
+  }
+  static bool classof(const MCSectionELF *) { return true; }
+
+  // Return the entry size for sections with fixed-width data.
+  static unsigned DetermineEntrySize(SectionKind Kind);
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCSectionMachO.h b/final/include/llvm/MC/MCSectionMachO.h
new file mode 100644
index 00000000000..7633515f274
--- /dev/null
+++ b/final/include/llvm/MC/MCSectionMachO.h
@@ -0,0 +1,179 @@
+//===- MCSectionMachO.h - MachO Machine Code Sections -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MCSectionMachO class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSECTIONMACHO_H
+#define LLVM_MC_MCSECTIONMACHO_H
+
+#include "llvm/MC/MCSection.h"
+
+namespace llvm {
+
+/// MCSectionMachO - This represents a section on a Mach-O system (used by
+/// Mac OS X).  On a Mac system, these are also described in
+/// /usr/include/mach-o/loader.h.
+class MCSectionMachO : public MCSection {
+  char SegmentName[16];  // Not necessarily null terminated!
+  char SectionName[16];  // Not necessarily null terminated!
+
+  /// TypeAndAttributes - This is the SECTION_TYPE and SECTION_ATTRIBUTES
+  /// field of a section, drawn from the enums below.
+  unsigned TypeAndAttributes;
+
+  /// Reserved2 - The 'reserved2' field of a section, used to represent the
+  /// size of stubs, for example.
+  unsigned Reserved2;
+
+  MCSectionMachO(StringRef Segment, StringRef Section,
+                 unsigned TAA, unsigned reserved2, SectionKind K);
+  friend class MCContext;
+public:
+
+  /// These are the section type and attributes fields.  A MachO section can
+  /// have only one Type, but can have any of the attributes specified.
+  enum {
+    // TypeAndAttributes bitmasks.
+    SECTION_TYPE       = 0x000000FFU,
+    SECTION_ATTRIBUTES = 0xFFFFFF00U,
+
+    // Valid section types.
+
+    /// S_REGULAR - Regular section.
+    S_REGULAR                    = 0x00U,
+    /// S_ZEROFILL - Zero fill on demand section.
+    S_ZEROFILL                   = 0x01U,
+    /// S_CSTRING_LITERALS - Section with literal C strings.
+    S_CSTRING_LITERALS           = 0x02U,
+    /// S_4BYTE_LITERALS - Section with 4 byte literals.
+    S_4BYTE_LITERALS             = 0x03U,
+    /// S_8BYTE_LITERALS - Section with 8 byte literals.
+    S_8BYTE_LITERALS             = 0x04U,
+    /// S_LITERAL_POINTERS - Section with pointers to literals.
+    S_LITERAL_POINTERS           = 0x05U,
+    /// S_NON_LAZY_SYMBOL_POINTERS - Section with non-lazy symbol pointers.
+    S_NON_LAZY_SYMBOL_POINTERS   = 0x06U,
+    /// S_LAZY_SYMBOL_POINTERS - Section with lazy symbol pointers.
+    S_LAZY_SYMBOL_POINTERS       = 0x07U,
+    /// S_SYMBOL_STUBS - Section with symbol stubs, byte size of stub in
+    /// the Reserved2 field.
+    S_SYMBOL_STUBS               = 0x08U,
+    /// S_SYMBOL_STUBS - Section with only function pointers for
+    /// initialization.
+    S_MOD_INIT_FUNC_POINTERS     = 0x09U,
+    /// S_MOD_INIT_FUNC_POINTERS - Section with only function pointers for
+    /// termination.
+    S_MOD_TERM_FUNC_POINTERS     = 0x0AU,
+    /// S_COALESCED - Section contains symbols that are to be coalesced.
+    S_COALESCED                  = 0x0BU,
+    /// S_GB_ZEROFILL - Zero fill on demand section (that can be larger than 4
+    /// gigabytes).
+    S_GB_ZEROFILL                = 0x0CU,
+    /// S_INTERPOSING - Section with only pairs of function pointers for
+    /// interposing.
+    S_INTERPOSING                = 0x0DU,
+    /// S_16BYTE_LITERALS - Section with only 16 byte literals.
+    S_16BYTE_LITERALS            = 0x0EU,
+    /// S_DTRACE_DOF - Section contains DTrace Object Format.
+    S_DTRACE_DOF                 = 0x0FU,
+    /// S_LAZY_DYLIB_SYMBOL_POINTERS - Section with lazy symbol pointers to
+    /// lazy loaded dylibs.
+    S_LAZY_DYLIB_SYMBOL_POINTERS = 0x10U,
+    /// S_THREAD_LOCAL_REGULAR - Section with ....
+    S_THREAD_LOCAL_REGULAR = 0x11U,
+    /// S_THREAD_LOCAL_ZEROFILL - Thread local zerofill section.
+    S_THREAD_LOCAL_ZEROFILL = 0x12U,
+    /// S_THREAD_LOCAL_VARIABLES - Section with thread local variable structure
+    /// data.
+    S_THREAD_LOCAL_VARIABLES = 0x13U,
+    /// S_THREAD_LOCAL_VARIABLE_POINTERS - Section with ....
+    S_THREAD_LOCAL_VARIABLE_POINTERS = 0x14U,
+    /// S_THREAD_LOCAL_INIT_FUNCTION_POINTERS - Section with thread local
+    /// variable initialization pointers to functions.
+    S_THREAD_LOCAL_INIT_FUNCTION_POINTERS = 0x15U,
+
+    LAST_KNOWN_SECTION_TYPE = S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
+
+
+    // Valid section attributes.
+
+    /// S_ATTR_PURE_INSTRUCTIONS - Section contains only true machine
+    /// instructions.
+    S_ATTR_PURE_INSTRUCTIONS   = 1U << 31,
+    /// S_ATTR_NO_TOC - Section contains coalesced symbols that are not to be
+    /// in a ranlib table of contents.
+    S_ATTR_NO_TOC              = 1U << 30,
+    /// S_ATTR_STRIP_STATIC_SYMS - Ok to strip static symbols in this section
+    /// in files with the MY_DYLDLINK flag.
+    S_ATTR_STRIP_STATIC_SYMS   = 1U << 29,
+    /// S_ATTR_NO_DEAD_STRIP - No dead stripping.
+    S_ATTR_NO_DEAD_STRIP       = 1U << 28,
+    /// S_ATTR_LIVE_SUPPORT - Blocks are live if they reference live blocks.
+    S_ATTR_LIVE_SUPPORT        = 1U << 27,
+    /// S_ATTR_SELF_MODIFYING_CODE - Used with i386 code stubs written on by
+    /// dyld.
+    S_ATTR_SELF_MODIFYING_CODE = 1U << 26,
+    /// S_ATTR_DEBUG - A debug section.
+    S_ATTR_DEBUG               = 1U << 25,
+    /// S_ATTR_SOME_INSTRUCTIONS - Section contains some machine instructions.
+    S_ATTR_SOME_INSTRUCTIONS   = 1U << 10,
+    /// S_ATTR_EXT_RELOC - Section has external relocation entries.
+    S_ATTR_EXT_RELOC           = 1U << 9,
+    /// S_ATTR_LOC_RELOC - Section has local relocation entries.
+    S_ATTR_LOC_RELOC           = 1U << 8
+  };
+
+  StringRef getSegmentName() const {
+    // SegmentName is not necessarily null terminated!
+    if (SegmentName[15])
+      return StringRef(SegmentName, 16);
+    return StringRef(SegmentName);
+  }
+  StringRef getSectionName() const {
+    // SectionName is not necessarily null terminated!
+    if (SectionName[15])
+      return StringRef(SectionName, 16);
+    return StringRef(SectionName);
+  }
+
+  unsigned getTypeAndAttributes() const { return TypeAndAttributes; }
+  unsigned getStubSize() const { return Reserved2; }
+
+  unsigned getType() const { return TypeAndAttributes & SECTION_TYPE; }
+  bool hasAttribute(unsigned Value) const {
+    return (TypeAndAttributes & Value) != 0;
+  }
+
+  /// ParseSectionSpecifier - Parse the section specifier indicated by "Spec".
+  /// This is a string that can appear after a .section directive in a mach-o
+  /// flavored .s file.  If successful, this fills in the specified Out
+  /// parameters and returns an empty string.  When an invalid section
+  /// specifier is present, this returns a string indicating the problem.
+  static std::string ParseSectionSpecifier(StringRef Spec,       // In.
+                                           StringRef &Segment,   // Out.
+                                           StringRef &Section,   // Out.
+                                           unsigned  &TAA,       // Out.
+                                           unsigned  &StubSize); // Out.
+
+  virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
+                                    raw_ostream &OS) const;
+  virtual bool UseCodeAlign() const;
+  virtual bool isVirtualSection() const;
+
+  static bool classof(const MCSection *S) {
+    return S->getVariant() == SV_MachO;
+  }
+  static bool classof(const MCSectionMachO *) { return true; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCStreamer.h b/final/include/llvm/MC/MCStreamer.h
new file mode 100644
index 00000000000..4fdbc44b25e
--- /dev/null
+++ b/final/include/llvm/MC/MCStreamer.h
@@ -0,0 +1,544 @@
+//===- MCStreamer.h - High-level Streaming Machine Code Output --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MCStreamer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSTREAMER_H
+#define LLVM_MC_MCSTREAMER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCDwarf.h"
+
+namespace llvm {
+  class MCAsmInfo;
+  class MCCodeEmitter;
+  class MCContext;
+  class MCExpr;
+  class MCInst;
+  class MCInstPrinter;
+  class MCSection;
+  class MCSymbol;
+  class StringRef;
+  class TargetAsmBackend;
+  class TargetLoweringObjectFile;
+  class Twine;
+  class raw_ostream;
+  class formatted_raw_ostream;
+
+  /// MCStreamer - Streaming machine code generation interface.  This interface
+  /// is intended to provide a programatic interface that is very similar to the
+  /// level that an assembler .s file provides.  It has callbacks to emit bytes,
+  /// handle directives, etc.  The implementation of this interface retains
+  /// state to know what the current section is etc.
+  ///
+  /// There are multiple implementations of this interface: one for writing out
+  /// a .s file, and implementations that write out .o files of various formats.
+  ///
+  class MCStreamer {
+    MCContext &Context;
+
+    MCStreamer(const MCStreamer&); // DO NOT IMPLEMENT
+    MCStreamer &operator=(const MCStreamer&); // DO NOT IMPLEMENT
+
+    void EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
+                         bool isPCRel, unsigned AddrSpace);
+
+    std::vector<MCDwarfFrameInfo> FrameInfos;
+    MCDwarfFrameInfo *getCurrentFrameInfo();
+    void EnsureValidFrame();
+
+    /// SectionStack - This is stack of current and previous section
+    /// values saved by PushSection.
+    SmallVector<std::pair<const MCSection *,
+                const MCSection *>, 4> SectionStack;
+
+  protected:
+    MCStreamer(MCContext &Ctx);
+
+  public:
+    virtual ~MCStreamer();
+
+    MCContext &getContext() const { return Context; }
+
+    unsigned getNumFrameInfos() {
+      return FrameInfos.size();
+    }
+
+    const MCDwarfFrameInfo &getFrameInfo(unsigned i) {
+      return FrameInfos[i];
+    }
+
+    /// @name Assembly File Formatting.
+    /// @{
+
+    /// isVerboseAsm - Return true if this streamer supports verbose assembly
+    /// and if it is enabled.
+    virtual bool isVerboseAsm() const { return false; }
+
+    /// hasRawTextSupport - Return true if this asm streamer supports emitting
+    /// unformatted text to the .s file with EmitRawText.
+    virtual bool hasRawTextSupport() const { return false; }
+
+    /// AddComment - Add a comment that can be emitted to the generated .s
+    /// file if applicable as a QoI issue to make the output of the compiler
+    /// more readable.  This only affects the MCAsmStreamer, and only when
+    /// verbose assembly output is enabled.
+    ///
+    /// If the comment includes embedded \n's, they will each get the comment
+    /// prefix as appropriate.  The added comment should not end with a \n.
+    virtual void AddComment(const Twine &T) {}
+
+    /// GetCommentOS - Return a raw_ostream that comments can be written to.
+    /// Unlike AddComment, you are required to terminate comments with \n if you
+    /// use this method.
+    virtual raw_ostream &GetCommentOS();
+
+    /// AddBlankLine - Emit a blank line to a .s file to pretty it up.
+    virtual void AddBlankLine() {}
+
+    /// @}
+
+    /// @name Symbol & Section Management
+    /// @{
+
+    /// getCurrentSection - Return the current section that the streamer is
+    /// emitting code to.
+    const MCSection *getCurrentSection() const {
+      if (!SectionStack.empty())
+        return SectionStack.back().first;
+      return NULL;
+    }
+
+    /// getPreviousSection - Return the previous section that the streamer is
+    /// emitting code to.
+    const MCSection *getPreviousSection() const {
+      if (!SectionStack.empty())
+        return SectionStack.back().second;
+      return NULL;
+    }
+
+    /// ChangeSection - Update streamer for a new active section.
+    ///
+    /// This is called by PopSection and SwitchSection, if the current
+    /// section changes.
+    virtual void ChangeSection(const MCSection *) = 0;
+
+    /// pushSection - Save the current and previous section on the
+    /// section stack.
+    void PushSection() {
+      SectionStack.push_back(std::make_pair(getCurrentSection(),
+                                            getPreviousSection()));
+    }
+
+    /// popSection - Restore the current and previous section from
+    /// the section stack.  Calls ChangeSection as needed.
+    ///
+    /// Returns false if the stack was empty.
+    bool PopSection() {
+      if (SectionStack.size() <= 1)
+        return false;
+      const MCSection *oldSection = SectionStack.pop_back_val().first;
+      const MCSection *curSection = SectionStack.back().first;
+
+      if (oldSection != curSection)
+        ChangeSection(curSection);
+      return true;
+    }
+
+    /// SwitchSection - Set the current section where code is being emitted to
+    /// @p Section.  This is required to update CurSection.
+    ///
+    /// This corresponds to assembler directives like .section, .text, etc.
+    void SwitchSection(const MCSection *Section) {
+      assert(Section && "Cannot switch to a null section!");
+      const MCSection *curSection = SectionStack.back().first;
+      SectionStack.back().second = curSection;
+      if (Section != curSection) {
+        SectionStack.back().first = Section;
+        ChangeSection(Section);
+      }
+    }
+
+    /// InitSections - Create the default sections and set the initial one.
+    virtual void InitSections() = 0;
+
+    /// EmitLabel - Emit a label for @p Symbol into the current section.
+    ///
+    /// This corresponds to an assembler statement such as:
+    ///   foo:
+    ///
+    /// @param Symbol - The symbol to emit. A given symbol should only be
+    /// emitted as a label once, and symbols emitted as a label should never be
+    /// used in an assignment.
+    virtual void EmitLabel(MCSymbol *Symbol) = 0;
+
+    /// EmitAssemblerFlag - Note in the output the specified @p Flag
+    virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) = 0;
+
+    /// EmitThumbFunc - Note in the output that the specified @p Func is
+    /// a Thumb mode function (ARM target only).
+    virtual void EmitThumbFunc(MCSymbol *Func) = 0;
+
+    /// EmitAssignment - Emit an assignment of @p Value to @p Symbol.
+    ///
+    /// This corresponds to an assembler statement such as:
+    ///  symbol = value
+    ///
+    /// The assignment generates no code, but has the side effect of binding the
+    /// value in the current context. For the assembly streamer, this prints the
+    /// binding into the .s file.
+    ///
+    /// @param Symbol - The symbol being assigned to.
+    /// @param Value - The value for the symbol.
+    virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) = 0;
+
+    /// EmitWeakReference - Emit an weak reference from @p Alias to @p Symbol.
+    ///
+    /// This corresponds to an assembler statement such as:
+    ///  .weakref alias, symbol
+    ///
+    /// @param Alias - The alias that is being created.
+    /// @param Symbol - The symbol being aliased.
+    virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) = 0;
+
+    /// EmitSymbolAttribute - Add the given @p Attribute to @p Symbol.
+    virtual void EmitSymbolAttribute(MCSymbol *Symbol,
+                                     MCSymbolAttr Attribute) = 0;
+
+    /// EmitSymbolDesc - Set the @p DescValue for the @p Symbol.
+    ///
+    /// @param Symbol - The symbol to have its n_desc field set.
+    /// @param DescValue - The value to set into the n_desc field.
+    virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) = 0;
+
+    /// BeginCOFFSymbolDef - Start emitting COFF symbol definition
+    ///
+    /// @param Symbol - The symbol to have its External & Type fields set.
+    virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) = 0;
+
+    /// EmitCOFFSymbolStorageClass - Emit the storage class of the symbol.
+    ///
+    /// @param StorageClass - The storage class the symbol should have.
+    virtual void EmitCOFFSymbolStorageClass(int StorageClass) = 0;
+
+    /// EmitCOFFSymbolType - Emit the type of the symbol.
+    ///
+    /// @param Type - A COFF type identifier (see COFF::SymbolType in X86COFF.h)
+    virtual void EmitCOFFSymbolType(int Type) = 0;
+
+    /// EndCOFFSymbolDef - Marks the end of the symbol definition.
+    virtual void EndCOFFSymbolDef() = 0;
+
+    /// EmitELFSize - Emit an ELF .size directive.
+    ///
+    /// This corresponds to an assembler statement such as:
+    ///  .size symbol, expression
+    ///
+    virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) = 0;
+
+    /// EmitCommonSymbol - Emit a common symbol.
+    ///
+    /// @param Symbol - The common symbol to emit.
+    /// @param Size - The size of the common symbol.
+    /// @param ByteAlignment - The alignment of the symbol if
+    /// non-zero. This must be a power of 2.
+    virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                  unsigned ByteAlignment) = 0;
+
+    /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
+    ///
+    /// @param Symbol - The common symbol to emit.
+    /// @param Size - The size of the common symbol.
+    virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) = 0;
+
+    /// EmitZerofill - Emit the zerofill section and an optional symbol.
+    ///
+    /// @param Section - The zerofill section to create and or to put the symbol
+    /// @param Symbol - The zerofill symbol to emit, if non-NULL.
+    /// @param Size - The size of the zerofill symbol.
+    /// @param ByteAlignment - The alignment of the zerofill symbol if
+    /// non-zero. This must be a power of 2 on some targets.
+    virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                              unsigned Size = 0,unsigned ByteAlignment = 0) = 0;
+
+    /// EmitTBSSSymbol - Emit a thread local bss (.tbss) symbol.
+    ///
+    /// @param Section - The thread local common section.
+    /// @param Symbol - The thread local common symbol to emit.
+    /// @param Size - The size of the symbol.
+    /// @param ByteAlignment - The alignment of the thread local common symbol
+    /// if non-zero.  This must be a power of 2 on some targets.
+    virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                uint64_t Size, unsigned ByteAlignment = 0) = 0;
+    /// @}
+    /// @name Generating Data
+    /// @{
+
+    /// EmitBytes - Emit the bytes in \arg Data into the output.
+    ///
+    /// This is used to implement assembler directives such as .byte, .ascii,
+    /// etc.
+    virtual void EmitBytes(StringRef Data, unsigned AddrSpace) = 0;
+
+    /// EmitValue - Emit the expression @p Value into the output as a native
+    /// integer of the given @p Size bytes.
+    ///
+    /// This is used to implement assembler directives such as .word, .quad,
+    /// etc.
+    ///
+    /// @param Value - The value to emit.
+    /// @param Size - The size of the integer (in bytes) to emit. This must
+    /// match a native machine width.
+    virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                               bool isPCRel, unsigned AddrSpace) = 0;
+
+    void EmitValue(const MCExpr *Value, unsigned Size, unsigned AddrSpace = 0);
+
+    void EmitPCRelValue(const MCExpr *Value, unsigned Size,
+                        unsigned AddrSpace = 0);
+
+    /// EmitIntValue - Special case of EmitValue that avoids the client having
+    /// to pass in a MCExpr for constant integers.
+    virtual void EmitIntValue(uint64_t Value, unsigned Size,
+                              unsigned AddrSpace = 0);
+
+    /// EmitAbsValue - Emit the Value, but try to avoid relocations. On MachO
+    /// this is done by producing
+    /// foo = value
+    /// .long foo
+    void EmitAbsValue(const MCExpr *Value, unsigned Size,
+                      unsigned AddrSpace = 0);
+
+    virtual void EmitULEB128Value(const MCExpr *Value,
+                                  unsigned AddrSpace = 0) = 0;
+
+    virtual void EmitSLEB128Value(const MCExpr *Value,
+                                  unsigned AddrSpace = 0) = 0;
+
+    /// EmitULEB128Value - Special case of EmitULEB128Value that avoids the
+    /// client having to pass in a MCExpr for constant integers.
+    void EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace = 0);
+
+    /// EmitSLEB128Value - Special case of EmitSLEB128Value that avoids the
+    /// client having to pass in a MCExpr for constant integers.
+    void EmitSLEB128IntValue(int64_t Value, unsigned AddrSpace = 0);
+
+    /// EmitSymbolValue - Special case of EmitValue that avoids the client
+    /// having to pass in a MCExpr for MCSymbols.
+    void EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
+                         unsigned AddrSpace = 0);
+
+    void EmitPCRelSymbolValue(const MCSymbol *Sym, unsigned Size,
+                              unsigned AddrSpace = 0);
+
+    /// EmitGPRel32Value - Emit the expression @p Value into the output as a
+    /// gprel32 (32-bit GP relative) value.
+    ///
+    /// This is used to implement assembler directives such as .gprel32 on
+    /// targets that support them.
+    virtual void EmitGPRel32Value(const MCExpr *Value);
+
+    /// EmitFill - Emit NumBytes bytes worth of the value specified by
+    /// FillValue.  This implements directives such as '.space'.
+    virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+                          unsigned AddrSpace);
+
+    /// EmitZeros - Emit NumBytes worth of zeros.  This is a convenience
+    /// function that just wraps EmitFill.
+    void EmitZeros(uint64_t NumBytes, unsigned AddrSpace) {
+      EmitFill(NumBytes, 0, AddrSpace);
+    }
+
+
+    /// EmitValueToAlignment - Emit some number of copies of @p Value until
+    /// the byte alignment @p ByteAlignment is reached.
+    ///
+    /// If the number of bytes need to emit for the alignment is not a multiple
+    /// of @p ValueSize, then the contents of the emitted fill bytes is
+    /// undefined.
+    ///
+    /// This used to implement the .align assembler directive.
+    ///
+    /// @param ByteAlignment - The alignment to reach. This must be a power of
+    /// two on some targets.
+    /// @param Value - The value to use when filling bytes.
+    /// @param ValueSize - The size of the integer (in bytes) to emit for
+    /// @p Value. This must match a native machine width.
+    /// @param MaxBytesToEmit - The maximum numbers of bytes to emit, or 0. If
+    /// the alignment cannot be reached in this many bytes, no bytes are
+    /// emitted.
+    virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                      unsigned ValueSize = 1,
+                                      unsigned MaxBytesToEmit = 0) = 0;
+
+    /// EmitCodeAlignment - Emit nops until the byte alignment @p ByteAlignment
+    /// is reached.
+    ///
+    /// This used to align code where the alignment bytes may be executed.  This
+    /// can emit different bytes for different sizes to optimize execution.
+    ///
+    /// @param ByteAlignment - The alignment to reach. This must be a power of
+    /// two on some targets.
+    /// @param MaxBytesToEmit - The maximum numbers of bytes to emit, or 0. If
+    /// the alignment cannot be reached in this many bytes, no bytes are
+    /// emitted.
+    virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                   unsigned MaxBytesToEmit = 0) = 0;
+
+    /// EmitValueToOffset - Emit some number of copies of @p Value until the
+    /// byte offset @p Offset is reached.
+    ///
+    /// This is used to implement assembler directives such as .org.
+    ///
+    /// @param Offset - The offset to reach. This may be an expression, but the
+    /// expression must be associated with the current section.
+    /// @param Value - The value to use when filling bytes.
+    virtual void EmitValueToOffset(const MCExpr *Offset,
+                                   unsigned char Value = 0) = 0;
+
+    /// @}
+
+    /// EmitFileDirective - Switch to a new logical file.  This is used to
+    /// implement the '.file "foo.c"' assembler directive.
+    virtual void EmitFileDirective(StringRef Filename) = 0;
+
+    /// EmitDwarfFileDirective - Associate a filename with a specified logical
+    /// file number.  This implements the DWARF2 '.file 4 "foo.c"' assembler
+    /// directive.
+    virtual bool EmitDwarfFileDirective(unsigned FileNo,StringRef Filename);
+
+    /// EmitDwarfLocDirective - This implements the DWARF2
+    // '.loc fileno lineno ...' assembler directive.
+    virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                       unsigned Column, unsigned Flags,
+                                       unsigned Isa,
+                                       unsigned Discriminator);
+
+    virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                          const MCSymbol *LastLabel,
+                                          const MCSymbol *Label) = 0;
+
+    virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+                                           const MCSymbol *Label) {
+    }
+
+    void EmitDwarfSetLineAddr(int64_t LineDelta, const MCSymbol *Label,
+                              int PointerSize);
+
+    virtual bool EmitCFIStartProc();
+    virtual bool EmitCFIEndProc();
+    virtual bool EmitCFIDefCfa(int64_t Register, int64_t Offset);
+    virtual bool EmitCFIDefCfaOffset(int64_t Offset);
+    virtual bool EmitCFIDefCfaRegister(int64_t Register);
+    virtual bool EmitCFIOffset(int64_t Register, int64_t Offset);
+    virtual bool EmitCFIPersonality(const MCSymbol *Sym,
+                                    unsigned Encoding);
+    virtual bool EmitCFILsda(const MCSymbol *Sym, unsigned Encoding);
+    virtual bool EmitCFIRememberState();
+    virtual bool EmitCFIRestoreState();
+
+    /// EmitInstruction - Emit the given @p Instruction into the current
+    /// section.
+    virtual void EmitInstruction(const MCInst &Inst) = 0;
+
+    /// EmitRawText - If this file is backed by a assembly streamer, this dumps
+    /// the specified string in the output .s file.  This capability is
+    /// indicated by the hasRawTextSupport() predicate.  By default this aborts.
+    virtual void EmitRawText(StringRef String);
+    void EmitRawText(const Twine &String);
+
+    /// ARM-related methods.
+    /// FIXME: Eventually we should have some "target MC streamer" and move
+    /// these methods there.
+    virtual void EmitFnStart();
+    virtual void EmitFnEnd();
+    virtual void EmitCantUnwind();
+    virtual void EmitPersonality(const MCSymbol *Personality);
+    virtual void EmitHandlerData();
+    virtual void EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0);
+    virtual void EmitPad(int64_t Offset);
+    virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+                             bool isVector);
+
+    /// Finish - Finish emission of machine code.
+    virtual void Finish() = 0;
+  };
+
+  /// createNullStreamer - Create a dummy machine code streamer, which does
+  /// nothing. This is useful for timing the assembler front end.
+  MCStreamer *createNullStreamer(MCContext &Ctx);
+
+  /// createAsmStreamer - Create a machine code streamer which will print out
+  /// assembly for the native target, suitable for compiling with a native
+  /// assembler.
+  ///
+  /// \param InstPrint - If given, the instruction printer to use. If not given
+  /// the MCInst representation will be printed.  This method takes ownership of
+  /// InstPrint.
+  ///
+  /// \param CE - If given, a code emitter to use to show the instruction
+  /// encoding inline with the assembly. This method takes ownership of \arg CE.
+  ///
+  /// \param TAB - If given, a target asm backend to use to show the fixup
+  /// information in conjunction with encoding information. This method takes
+  /// ownership of \arg TAB.
+  ///
+  /// \param ShowInst - Whether to show the MCInst representation inline with
+  /// the assembly.
+  MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+                                bool isVerboseAsm,
+                                bool useLoc,
+                                MCInstPrinter *InstPrint = 0,
+                                MCCodeEmitter *CE = 0,
+                                TargetAsmBackend *TAB = 0,
+                                bool ShowInst = false);
+
+  /// createMachOStreamer - Create a machine code streamer which will generate
+  /// Mach-O format object files.
+  ///
+  /// Takes ownership of \arg TAB and \arg CE.
+  MCStreamer *createMachOStreamer(MCContext &Ctx, TargetAsmBackend &TAB,
+                                  raw_ostream &OS, MCCodeEmitter *CE,
+                                  bool RelaxAll = false);
+
+  /// createWinCOFFStreamer - Create a machine code streamer which will
+  /// generate Microsoft COFF format object files.
+  ///
+  /// Takes ownership of \arg TAB and \arg CE.
+  MCStreamer *createWinCOFFStreamer(MCContext &Ctx,
+                                    TargetAsmBackend &TAB,
+                                    MCCodeEmitter &CE, raw_ostream &OS,
+                                    bool RelaxAll = false);
+
+  /// createELFStreamer - Create a machine code streamer which will generate
+  /// ELF format object files.
+  MCStreamer *createELFStreamer(MCContext &Ctx, TargetAsmBackend &TAB,
+				raw_ostream &OS, MCCodeEmitter *CE,
+				bool RelaxAll, bool NoExecStack);
+
+  /// createLoggingStreamer - Create a machine code streamer which just logs the
+  /// API calls and then dispatches to another streamer.
+  ///
+  /// The new streamer takes ownership of the \arg Child.
+  MCStreamer *createLoggingStreamer(MCStreamer *Child, raw_ostream &OS);
+
+  /// createPureStreamer - Create a machine code streamer which will generate
+  /// "pure" MC object files, for use with MC-JIT and testing tools.
+  ///
+  /// Takes ownership of \arg TAB and \arg CE.
+  MCStreamer *createPureStreamer(MCContext &Ctx, TargetAsmBackend &TAB,
+                                 raw_ostream &OS, MCCodeEmitter *CE);
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCSymbol.h b/final/include/llvm/MC/MCSymbol.h
new file mode 100644
index 00000000000..7da4d7c15e3
--- /dev/null
+++ b/final/include/llvm/MC/MCSymbol.h
@@ -0,0 +1,163 @@
+//===- MCSymbol.h - Machine Code Symbols ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCSymbol class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSYMBOL_H
+#define LLVM_MC_MCSYMBOL_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+  class MCExpr;
+  class MCSection;
+  class MCContext;
+  class raw_ostream;
+
+  /// MCSymbol - Instances of this class represent a symbol name in the MC file,
+  /// and MCSymbols are created and unique'd by the MCContext class.  MCSymbols
+  /// should only be constructed with valid names for the object file.
+  ///
+  /// If the symbol is defined/emitted into the current translation unit, the
+  /// Section member is set to indicate what section it lives in.  Otherwise, if
+  /// it is a reference to an external entity, it has a null section.
+  class MCSymbol {
+    // Special sentinal value for the absolute pseudo section.
+    //
+    // FIXME: Use a PointerInt wrapper for this?
+    static const MCSection *AbsolutePseudoSection;
+
+    /// Name - The name of the symbol.  The referred-to string data is actually
+    /// held by the StringMap that lives in MCContext.
+    StringRef Name;
+
+    /// Section - The section the symbol is defined in. This is null for
+    /// undefined symbols, and the special AbsolutePseudoSection value for
+    /// absolute symbols.
+    const MCSection *Section;
+
+    /// Value - If non-null, the value for a variable symbol.
+    const MCExpr *Value;
+
+    /// IsTemporary - True if this is an assembler temporary label, which
+    /// typically does not survive in the .o file's symbol table.  Usually
+    /// "Lfoo" or ".foo".
+    unsigned IsTemporary : 1;
+
+    /// IsUsed - True if this symbol has been used.
+    mutable unsigned IsUsed : 1;
+
+  private:  // MCContext creates and uniques these.
+    friend class MCContext;
+    MCSymbol(StringRef name, bool isTemporary)
+      : Name(name), Section(0), Value(0),
+        IsTemporary(isTemporary), IsUsed(false) {}
+
+    MCSymbol(const MCSymbol&);       // DO NOT IMPLEMENT
+    void operator=(const MCSymbol&); // DO NOT IMPLEMENT
+  public:
+    /// getName - Get the symbol name.
+    StringRef getName() const { return Name; }
+
+    /// @name Accessors
+    /// @{
+
+    /// isTemporary - Check if this is an assembler temporary symbol.
+    bool isTemporary() const { return IsTemporary; }
+
+    /// isUsed - Check if this is used.
+    bool isUsed() const { return IsUsed; }
+    void setUsed(bool Value) const { IsUsed = Value; }
+
+    /// @}
+    /// @name Associated Sections
+    /// @{
+
+    /// isDefined - Check if this symbol is defined (i.e., it has an address).
+    ///
+    /// Defined symbols are either absolute or in some section.
+    bool isDefined() const {
+      return Section != 0;
+    }
+
+    /// isInSection - Check if this symbol is defined in some section (i.e., it
+    /// is defined but not absolute).
+    bool isInSection() const {
+      return isDefined() && !isAbsolute();
+    }
+
+    /// isUndefined - Check if this symbol undefined (i.e., implicitly defined).
+    bool isUndefined() const {
+      return !isDefined();
+    }
+
+    /// isAbsolute - Check if this is an absolute symbol.
+    bool isAbsolute() const {
+      return Section == AbsolutePseudoSection;
+    }
+
+    /// getSection - Get the section associated with a defined, non-absolute
+    /// symbol.
+    const MCSection &getSection() const {
+      assert(isInSection() && "Invalid accessor!");
+      return *Section;
+    }
+
+    /// setSection - Mark the symbol as defined in the section \arg S.
+    void setSection(const MCSection &S) { Section = &S; }
+
+    /// setUndefined - Mark the symbol as undefined.
+    void setUndefined() {
+      Section = 0;
+    }
+
+    /// setAbsolute - Mark the symbol as absolute.
+    void setAbsolute() { Section = AbsolutePseudoSection; }
+
+    /// @}
+    /// @name Variable Symbols
+    /// @{
+
+    /// isVariable - Check if this is a variable symbol.
+    bool isVariable() const {
+      return Value != 0;
+    }
+
+    /// getValue() - Get the value for variable symbols.
+    const MCExpr *getVariableValue() const {
+      assert(isVariable() && "Invalid accessor!");
+      IsUsed = true;
+      return Value;
+    }
+
+    // AliasedSymbol() - If this is an alias (a = b), return the symbol
+    // we ultimately point to. For a non alias, this just returns the symbol
+    // itself.
+    const MCSymbol &AliasedSymbol() const;
+
+    void setVariableValue(const MCExpr *Value);
+
+    /// @}
+
+    /// print - Print the value to the stream \arg OS.
+    void print(raw_ostream &OS) const;
+
+    /// dump - Print the value to stderr.
+    void dump() const;
+  };
+
+  inline raw_ostream &operator<<(raw_ostream &OS, const MCSymbol &Sym) {
+    Sym.print(OS);
+    return OS;
+  }
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/MCValue.h b/final/include/llvm/MC/MCValue.h
new file mode 100644
index 00000000000..df8dbd930bf
--- /dev/null
+++ b/final/include/llvm/MC/MCValue.h
@@ -0,0 +1,87 @@
+//===-- llvm/MC/MCValue.h - MCValue class -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCVALUE_H
+#define LLVM_MC_MCVALUE_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/MC/MCSymbol.h"
+#include <cassert>
+
+namespace llvm {
+class MCAsmInfo;
+class MCSymbol;
+class MCSymbolRefExpr;
+class raw_ostream;
+
+/// MCValue - This represents an "assembler immediate".  In its most general
+/// form, this can hold "SymbolA - SymbolB + imm64".  Not all targets supports
+/// relocations of this general form, but we need to represent this anyway.
+///
+/// In the general form, SymbolB can only be defined if SymbolA is, and both
+/// must be in the same (non-external) section. The latter constraint is not
+/// enforced, since a symbol's section may not be known at construction.
+///
+/// Note that this class must remain a simple POD value class, because we need
+/// it to live in unions etc.
+class MCValue {
+  const MCSymbolRefExpr *SymA, *SymB;
+  int64_t Cst;
+public:
+
+  int64_t getConstant() const { return Cst; }
+  const MCSymbolRefExpr *getSymA() const { return SymA; }
+  const MCSymbolRefExpr *getSymB() const { return SymB; }
+
+  /// isAbsolute - Is this an absolute (as opposed to relocatable) value.
+  bool isAbsolute() const { return !SymA && !SymB; }
+
+  /// getAssociatedSection - For relocatable values, return the section the
+  /// value is associated with.
+  ///
+  /// @result - The value's associated section, or null for external or constant
+  /// values.
+  //
+  // FIXME: Switch to a tagged section, so this can return the tagged section
+  // value.
+  const MCSection *getAssociatedSection() const;
+
+  /// print - Print the value to the stream \arg OS.
+  void print(raw_ostream &OS, const MCAsmInfo *MAI) const;
+
+  /// dump - Print the value to stderr.
+  void dump() const;
+
+  static MCValue get(const MCSymbolRefExpr *SymA, const MCSymbolRefExpr *SymB=0,
+                     int64_t Val = 0) {
+    MCValue R;
+    assert((!SymB || SymA) && "Invalid relocatable MCValue!");
+    R.Cst = Val;
+    R.SymA = SymA;
+    R.SymB = SymB;
+    return R;
+  }
+
+  static MCValue get(int64_t Val) {
+    MCValue R;
+    R.Cst = Val;
+    R.SymA = 0;
+    R.SymB = 0;
+    return R;
+  }
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/MC/SectionKind.h b/final/include/llvm/MC/SectionKind.h
new file mode 100644
index 00000000000..85a91c6b169
--- /dev/null
+++ b/final/include/llvm/MC/SectionKind.h
@@ -0,0 +1,240 @@
+//===-- llvm/Target/TargetLoweringObjectFile.h - Object Info ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_SECTIONKIND_H
+#define LLVM_MC_SECTIONKIND_H
+
+namespace llvm {
+
+/// SectionKind - This is a simple POD value that classifies the properties of
+/// a section.  A section is classified into the deepest possible
+/// classification, and then the target maps them onto their sections based on
+/// what capabilities they have.
+///
+/// The comments below describe these as if they were an inheritance hierarchy
+/// in order to explain the predicates below.
+///
+class SectionKind {
+  enum Kind {
+    /// Metadata - Debug info sections or other metadata.
+    Metadata,
+
+    /// Text - Text section, used for functions and other executable code.
+    Text,
+
+    /// ReadOnly - Data that is never written to at program runtime by the
+    /// program or the dynamic linker.  Things in the top-level readonly
+    /// SectionKind are not mergeable.
+    ReadOnly,
+
+        /// MergableCString - Any null-terminated string which allows merging.
+        /// These values are known to end in a nul value of the specified size,
+        /// not otherwise contain a nul value, and be mergable.  This allows the
+        /// linker to unique the strings if it so desires.
+
+           /// Mergeable1ByteCString - 1 byte mergable, null terminated, string.
+           Mergeable1ByteCString,
+
+           /// Mergeable2ByteCString - 2 byte mergable, null terminated, string.
+           Mergeable2ByteCString,
+
+           /// Mergeable4ByteCString - 4 byte mergable, null terminated, string.
+           Mergeable4ByteCString,
+
+        /// MergeableConst - These are sections for merging fixed-length
+        /// constants together.  For example, this can be used to unique
+        /// constant pool entries etc.
+        MergeableConst,
+
+            /// MergeableConst4 - This is a section used by 4-byte constants,
+            /// for example, floats.
+            MergeableConst4,
+
+            /// MergeableConst8 - This is a section used by 8-byte constants,
+            /// for example, doubles.
+            MergeableConst8,
+
+            /// MergeableConst16 - This is a section used by 16-byte constants,
+            /// for example, vectors.
+            MergeableConst16,
+
+    /// Writeable - This is the base of all segments that need to be written
+    /// to during program runtime.
+
+       /// ThreadLocal - This is the base of all TLS segments.  All TLS
+       /// objects must be writeable, otherwise there is no reason for them to
+       /// be thread local!
+
+           /// ThreadBSS - Zero-initialized TLS data objects.
+           ThreadBSS,
+
+           /// ThreadData - Initialized TLS data objects.
+           ThreadData,
+
+       /// GlobalWriteableData - Writeable data that is global (not thread
+       /// local).
+
+           /// BSS - Zero initialized writeable data.
+           BSS,
+
+               /// BSSLocal - This is BSS (zero initialized and writable) data
+               /// which has local linkage.
+               BSSLocal,
+
+               /// BSSExtern - This is BSS data with normal external linkage.
+               BSSExtern,
+
+           /// Common - Data with common linkage.  These represent tentative
+           /// definitions, which always have a zero initializer and are never
+           /// marked 'constant'.
+           Common,
+
+           /// DataRel - This is the most general form of data that is written
+           /// to by the program, it can have random relocations to arbitrary
+           /// globals.
+           DataRel,
+
+               /// DataRelLocal - This is writeable data that has a non-zero
+               /// initializer and has relocations in it, but all of the
+               /// relocations are known to be within the final linked image
+               /// the global is linked into.
+               DataRelLocal,
+
+                   /// DataNoRel - This is writeable data that has a non-zero
+                   /// initializer, but whose initializer is known to have no
+                   /// relocations.
+                   DataNoRel,
+
+           /// ReadOnlyWithRel - These are global variables that are never
+           /// written to by the program, but that have relocations, so they
+           /// must be stuck in a writeable section so that the dynamic linker
+           /// can write to them.  If it chooses to, the dynamic linker can
+           /// mark the pages these globals end up on as read-only after it is
+           /// done with its relocation phase.
+           ReadOnlyWithRel,
+
+               /// ReadOnlyWithRelLocal - This is data that is readonly by the
+               /// program, but must be writeable so that the dynamic linker
+               /// can perform relocations in it.  This is used when we know
+               /// that all the relocations are to globals in this final
+               /// linked image.
+               ReadOnlyWithRelLocal
+
+  } K : 8;
+public:
+
+  bool isMetadata() const { return K == Metadata; }
+  bool isText() const { return K == Text; }
+
+  bool isReadOnly() const {
+    return K == ReadOnly || isMergeableCString() ||
+           isMergeableConst();
+  }
+
+  bool isMergeableCString() const {
+    return K == Mergeable1ByteCString || K == Mergeable2ByteCString ||
+           K == Mergeable4ByteCString;
+  }
+  bool isMergeable1ByteCString() const { return K == Mergeable1ByteCString; }
+  bool isMergeable2ByteCString() const { return K == Mergeable2ByteCString; }
+  bool isMergeable4ByteCString() const { return K == Mergeable4ByteCString; }
+
+  bool isMergeableConst() const {
+    return K == MergeableConst || K == MergeableConst4 ||
+           K == MergeableConst8 || K == MergeableConst16;
+  }
+  bool isMergeableConst4() const { return K == MergeableConst4; }
+  bool isMergeableConst8() const { return K == MergeableConst8; }
+  bool isMergeableConst16() const { return K == MergeableConst16; }
+
+  bool isWriteable() const {
+    return isThreadLocal() || isGlobalWriteableData();
+  }
+
+  bool isThreadLocal() const {
+    return K == ThreadData || K == ThreadBSS;
+  }
+
+  bool isThreadBSS() const { return K == ThreadBSS; }
+  bool isThreadData() const { return K == ThreadData; }
+
+  bool isGlobalWriteableData() const {
+    return isBSS() || isCommon() || isDataRel() || isReadOnlyWithRel();
+  }
+
+  bool isBSS() const { return K == BSS || K == BSSLocal || K == BSSExtern; }
+  bool isBSSLocal() const { return K == BSSLocal; }
+  bool isBSSExtern() const { return K == BSSExtern; }
+
+  bool isCommon() const { return K == Common; }
+
+  bool isDataRel() const {
+    return K == DataRel || K == DataRelLocal || K == DataNoRel;
+  }
+
+  bool isDataRelLocal() const {
+    return K == DataRelLocal || K == DataNoRel;
+  }
+
+  bool isDataNoRel() const { return K == DataNoRel; }
+
+  bool isReadOnlyWithRel() const {
+    return K == ReadOnlyWithRel || K == ReadOnlyWithRelLocal;
+  }
+
+  bool isReadOnlyWithRelLocal() const {
+    return K == ReadOnlyWithRelLocal;
+  }
+private:
+  static SectionKind get(Kind K) {
+    SectionKind Res;
+    Res.K = K;
+    return Res;
+  }
+public:
+
+  static SectionKind getMetadata() { return get(Metadata); }
+  static SectionKind getText() { return get(Text); }
+  static SectionKind getReadOnly() { return get(ReadOnly); }
+  static SectionKind getMergeable1ByteCString() {
+    return get(Mergeable1ByteCString);
+  }
+  static SectionKind getMergeable2ByteCString() {
+    return get(Mergeable2ByteCString);
+  }
+  static SectionKind getMergeable4ByteCString() {
+    return get(Mergeable4ByteCString);
+  }
+  static SectionKind getMergeableConst() { return get(MergeableConst); }
+  static SectionKind getMergeableConst4() { return get(MergeableConst4); }
+  static SectionKind getMergeableConst8() { return get(MergeableConst8); }
+  static SectionKind getMergeableConst16() { return get(MergeableConst16); }
+  static SectionKind getThreadBSS() { return get(ThreadBSS); }
+  static SectionKind getThreadData() { return get(ThreadData); }
+  static SectionKind getBSS() { return get(BSS); }
+  static SectionKind getBSSLocal() { return get(BSSLocal); }
+  static SectionKind getBSSExtern() { return get(BSSExtern); }
+  static SectionKind getCommon() { return get(Common); }
+  static SectionKind getDataRel() { return get(DataRel); }
+  static SectionKind getDataRelLocal() { return get(DataRelLocal); }
+  static SectionKind getDataNoRel() { return get(DataNoRel); }
+  static SectionKind getReadOnlyWithRel() { return get(ReadOnlyWithRel); }
+  static SectionKind getReadOnlyWithRelLocal(){
+    return get(ReadOnlyWithRelLocal);
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Metadata.h b/final/include/llvm/Metadata.h
new file mode 100644
index 00000000000..6a61996ff76
--- /dev/null
+++ b/final/include/llvm/Metadata.h
@@ -0,0 +1,237 @@
+//===-- llvm/Metadata.h - Metadata definitions ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// @file
+/// This file contains the declarations for metadata subclasses.
+/// They represent the different flavors of metadata that live in LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_METADATA_H
+#define LLVM_METADATA_H
+
+#include "llvm/Value.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/ilist_node.h"
+
+namespace llvm {
+class Constant;
+class Instruction;
+class LLVMContext;
+class Module;
+template <typename T> class SmallVectorImpl;
+template<typename ValueSubClass, typename ItemParentClass>
+  class SymbolTableListTraits;
+  
+  
+//===----------------------------------------------------------------------===//
+/// MDString - a single uniqued string.
+/// These are used to efficiently contain a byte sequence for metadata.
+/// MDString is always unnamd.
+class MDString : public Value {
+  MDString(const MDString &);            // DO NOT IMPLEMENT
+
+  StringRef Str;
+  explicit MDString(LLVMContext &C, StringRef S);
+
+public:
+  static MDString *get(LLVMContext &Context, StringRef Str);
+  static MDString *get(LLVMContext &Context, const char *Str) {
+    return get(Context, Str ? StringRef(Str) : StringRef());
+  }
+
+  StringRef getString() const { return Str; }
+
+  unsigned getLength() const { return (unsigned)Str.size(); }
+
+  typedef StringRef::iterator iterator;
+  
+  /// begin() - Pointer to the first byte of the string.
+  ///
+  iterator begin() const { return Str.begin(); }
+
+  /// end() - Pointer to one byte past the end of the string.
+  ///
+  iterator end() const { return Str.end(); }
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const MDString *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == MDStringVal;
+  }
+};
+
+  
+class MDNodeOperand;
+  
+//===----------------------------------------------------------------------===//
+/// MDNode - a tuple of other values.
+class MDNode : public Value, public FoldingSetNode {
+  MDNode(const MDNode &);                // DO NOT IMPLEMENT
+  void operator=(const MDNode &);        // DO NOT IMPLEMENT
+  friend class MDNodeOperand;
+  friend class LLVMContextImpl;
+
+  /// NumOperands - This many 'MDNodeOperand' items are co-allocated onto the
+  /// end of this MDNode.
+  unsigned NumOperands;
+  
+  // Subclass data enums.
+  enum {
+    /// FunctionLocalBit - This bit is set if this MDNode is function local.
+    /// This is true when it (potentially transitively) contains a reference to
+    /// something in a function, like an argument, basicblock, or instruction.
+    FunctionLocalBit = 1 << 0,
+    
+    /// NotUniquedBit - This is set on MDNodes that are not uniqued because they
+    /// have a null operand.
+    NotUniquedBit    = 1 << 1,
+    
+    /// DestroyFlag - This bit is set by destroy() so the destructor can assert
+    /// that the node isn't being destroyed with a plain 'delete'.
+    DestroyFlag      = 1 << 2
+  };
+  
+  // FunctionLocal enums.
+  enum FunctionLocalness {
+    FL_Unknown = -1,
+    FL_No = 0,
+    FL_Yes = 1
+  };
+  
+  /// replaceOperand - Replace each instance of F from the operand list of this 
+  /// node with T.
+  void replaceOperand(MDNodeOperand *Op, Value *NewVal);
+  ~MDNode();
+
+  MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals,
+         bool isFunctionLocal);
+  
+  static MDNode *getMDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals,
+                           FunctionLocalness FL, bool Insert = true);
+public:
+  // Constructors and destructors.
+  static MDNode *get(LLVMContext &Context, ArrayRef<Value*> V);
+  // FIXME: Eliminate this constructor form.
+  static MDNode *get(LLVMContext &Context, Value *const *Vals,
+                     unsigned NumVals);
+  // getWhenValsUnresolved - Construct MDNode determining function-localness
+  // from isFunctionLocal argument, not by analyzing Vals.
+  static MDNode *getWhenValsUnresolved(LLVMContext &Context, Value *const *Vals,
+                                       unsigned NumVals, bool isFunctionLocal);
+                                       
+  static MDNode *getIfExists(LLVMContext &Context, Value *const *Vals,
+                             unsigned NumVals);
+
+  /// getTemporary - Return a temporary MDNode, for use in constructing
+  /// cyclic MDNode structures. A temporary MDNode is not uniqued,
+  /// may be RAUW'd, and must be manually deleted with deleteTemporary.
+  static MDNode *getTemporary(LLVMContext &Context, Value *const *Vals,
+                              unsigned NumVals);
+
+  /// deleteTemporary - Deallocate a node created by getTemporary. The
+  /// node must not have any users.
+  static void deleteTemporary(MDNode *N);
+  
+  /// getOperand - Return specified operand.
+  Value *getOperand(unsigned i) const;
+  
+  /// getNumOperands - Return number of MDNode operands.
+  unsigned getNumOperands() const { return NumOperands; }
+  
+  /// isFunctionLocal - Return whether MDNode is local to a function.
+  bool isFunctionLocal() const {
+    return (getSubclassDataFromValue() & FunctionLocalBit) != 0;
+  }
+  
+  // getFunction - If this metadata is function-local and recursively has a
+  // function-local operand, return the first such operand's parent function.
+  // Otherwise, return null. getFunction() should not be used for performance-
+  // critical code because it recursively visits all the MDNode's operands.  
+  const Function *getFunction() const;
+
+  /// Profile - calculate a unique identifier for this MDNode to collapse
+  /// duplicates
+  void Profile(FoldingSetNodeID &ID) const;
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const MDNode *) { return true; }
+  static bool classof(const Value *V) {
+    return V->getValueID() == MDNodeVal;
+  }
+private:
+  // destroy - Delete this node.  Only when there are no uses.
+  void destroy();
+
+  bool isNotUniqued() const { 
+    return (getSubclassDataFromValue() & NotUniquedBit) != 0;
+  }
+  void setIsNotUniqued();
+  
+  // Shadow Value::setValueSubclassData with a private forwarding method so that
+  // any future subclasses cannot accidentally use it.
+  void setValueSubclassData(unsigned short D) {
+    Value::setValueSubclassData(D);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// NamedMDNode - a tuple of MDNodes. Despite its name, a NamedMDNode isn't
+/// itself an MDNode. NamedMDNodes belong to modules, have names, and contain
+/// lists of MDNodes.
+class NamedMDNode : public ilist_node<NamedMDNode> {
+  friend class SymbolTableListTraits<NamedMDNode, Module>;
+  friend struct ilist_traits<NamedMDNode>;
+  friend class LLVMContextImpl;
+  friend class Module;
+  NamedMDNode(const NamedMDNode &);      // DO NOT IMPLEMENT
+
+  std::string Name;
+  Module *Parent;
+  void *Operands; // SmallVector<TrackingVH<MDNode>, 4>
+
+  void setParent(Module *M) { Parent = M; }
+
+  explicit NamedMDNode(const Twine &N);
+
+public:
+  /// eraseFromParent - Drop all references and remove the node from parent
+  /// module.
+  void eraseFromParent();
+
+  /// dropAllReferences - Remove all uses and clear node vector.
+  void dropAllReferences();
+
+  /// ~NamedMDNode - Destroy NamedMDNode.
+  ~NamedMDNode();
+
+  /// getParent - Get the module that holds this named metadata collection.
+  inline Module *getParent() { return Parent; }
+  inline const Module *getParent() const { return Parent; }
+
+  /// getOperand - Return specified operand.
+  MDNode *getOperand(unsigned i) const;
+  
+  /// getNumOperands - Return the number of NamedMDNode operands.
+  unsigned getNumOperands() const;
+
+  /// addOperand - Add metadata operand.
+  void addOperand(MDNode *M);
+
+  /// getName - Return a constant reference to this named metadata's name.
+  StringRef getName() const;
+
+  /// print - Implement operator<< on NamedMDNode.
+  void print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW = 0) const;
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/final/include/llvm/Module.h b/final/include/llvm/Module.h
new file mode 100644
index 00000000000..c7063f5e1c4
--- /dev/null
+++ b/final/include/llvm/Module.h
@@ -0,0 +1,556 @@
+//===-- llvm/Module.h - C++ class to represent a VM module ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// @file
+/// Module.h This file contains the declarations for the Module class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MODULE_H
+#define LLVM_MODULE_H
+
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/Metadata.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/DataTypes.h"
+#include <vector>
+
+namespace llvm {
+
+class FunctionType;
+class GVMaterializer;
+class LLVMContext;
+
+template<> struct ilist_traits<Function>
+  : public SymbolTableListTraits<Function, Module> {
+
+  // createSentinel is used to get hold of the node that marks the end of the
+  // list... (same trick used here as in ilist_traits<Instruction>)
+  Function *createSentinel() const {
+    return static_cast<Function*>(&Sentinel);
+  }
+  static void destroySentinel(Function*) {}
+
+  Function *provideInitialHead() const { return createSentinel(); }
+  Function *ensureHead(Function*) const { return createSentinel(); }
+  static void noteHead(Function*, Function*) {}
+
+private:
+  mutable ilist_node<Function> Sentinel;
+};
+template<> struct ilist_traits<GlobalVariable>
+  : public SymbolTableListTraits<GlobalVariable, Module> {
+  // createSentinel is used to create a node that marks the end of the list.
+  static GlobalVariable *createSentinel();
+  static void destroySentinel(GlobalVariable *GV) { delete GV; }
+};
+template<> struct ilist_traits<GlobalAlias>
+  : public SymbolTableListTraits<GlobalAlias, Module> {
+  // createSentinel is used to create a node that marks the end of the list.
+  static GlobalAlias *createSentinel();
+  static void destroySentinel(GlobalAlias *GA) { delete GA; }
+};
+
+template<> struct ilist_traits<NamedMDNode>
+  : public ilist_default_traits<NamedMDNode> {
+  // createSentinel is used to get hold of a node that marks the end of
+  // the list...
+  NamedMDNode *createSentinel() const {
+    return static_cast<NamedMDNode*>(&Sentinel);
+  }
+  static void destroySentinel(NamedMDNode*) {}
+
+  NamedMDNode *provideInitialHead() const { return createSentinel(); }
+  NamedMDNode *ensureHead(NamedMDNode*) const { return createSentinel(); }
+  static void noteHead(NamedMDNode*, NamedMDNode*) {}
+  void addNodeToList(NamedMDNode *) {}
+  void removeNodeFromList(NamedMDNode *) {}
+private:
+  mutable ilist_node<NamedMDNode> Sentinel;
+};
+
+/// A Module instance is used to store all the information related to an
+/// LLVM module. Modules are the top level container of all other LLVM
+/// Intermediate Representation (IR) objects. Each module directly contains a
+/// list of globals variables, a list of functions, a list of libraries (or
+/// other modules) this module depends on, a symbol table, and various data
+/// about the target's characteristics.
+///
+/// A module maintains a GlobalValRefMap object that is used to hold all
+/// constant references to global variables in the module.  When a global
+/// variable is destroyed, it should have no entries in the GlobalValueRefMap.
+/// @brief The main container class for the LLVM Intermediate Representation.
+class Module {
+/// @name Types And Enumerations
+/// @{
+public:
+  /// The type for the list of global variables.
+  typedef iplist<GlobalVariable> GlobalListType;
+  /// The type for the list of functions.
+  typedef iplist<Function> FunctionListType;
+  /// The type for the list of aliases.
+  typedef iplist<GlobalAlias> AliasListType;
+  /// The type for the list of named metadata.
+  typedef ilist<NamedMDNode> NamedMDListType;
+
+  /// The type for the list of dependent libraries.
+  typedef std::vector<std::string> LibraryListType;
+
+  /// The Global Variable iterator.
+  typedef GlobalListType::iterator                      global_iterator;
+  /// The Global Variable constant iterator.
+  typedef GlobalListType::const_iterator          const_global_iterator;
+
+  /// The Function iterators.
+  typedef FunctionListType::iterator                           iterator;
+  /// The Function constant iterator
+  typedef FunctionListType::const_iterator               const_iterator;
+
+  /// The Global Alias iterators.
+  typedef AliasListType::iterator                        alias_iterator;
+  /// The Global Alias constant iterator
+  typedef AliasListType::const_iterator            const_alias_iterator;
+
+  /// The named metadata iterators.
+  typedef NamedMDListType::iterator             named_metadata_iterator;
+  /// The named metadata constant interators.
+  typedef NamedMDListType::const_iterator const_named_metadata_iterator;
+  /// The Library list iterator.
+  typedef LibraryListType::const_iterator lib_iterator;
+
+  /// An enumeration for describing the endianess of the target machine.
+  enum Endianness  { AnyEndianness, LittleEndian, BigEndian };
+
+  /// An enumeration for describing the size of a pointer on the target machine.
+  enum PointerSize { AnyPointerSize, Pointer32, Pointer64 };
+
+/// @}
+/// @name Member Variables
+/// @{
+private:
+  LLVMContext &Context;           ///< The LLVMContext from which types and
+                                  ///< constants are allocated.
+  GlobalListType GlobalList;      ///< The Global Variables in the module
+  FunctionListType FunctionList;  ///< The Functions in the module
+  AliasListType AliasList;        ///< The Aliases in the module
+  LibraryListType LibraryList;    ///< The Libraries needed by the module
+  NamedMDListType NamedMDList;    ///< The named metadata in the module
+  std::string GlobalScopeAsm;     ///< Inline Asm at global scope.
+  ValueSymbolTable *ValSymTab;    ///< Symbol table for values
+  TypeSymbolTable *TypeSymTab;    ///< Symbol table for types
+  OwningPtr<GVMaterializer> Materializer;  ///< Used to materialize GlobalValues
+  std::string ModuleID;           ///< Human readable identifier for the module
+  std::string TargetTriple;       ///< Platform target triple Module compiled on
+  std::string DataLayout;         ///< Target data description
+  void *NamedMDSymTab;            ///< NamedMDNode names.
+
+  friend class Constant;
+
+/// @}
+/// @name Constructors
+/// @{
+public:
+  /// The Module constructor. Note that there is no default constructor. You
+  /// must provide a name for the module upon construction.
+  explicit Module(StringRef ModuleID, LLVMContext& C);
+  /// The module destructor. This will dropAllReferences.
+  ~Module();
+
+/// @}
+/// @name Module Level Accessors
+/// @{
+
+  /// Get the module identifier which is, essentially, the name of the module.
+  /// @returns the module identifier as a string
+  const std::string &getModuleIdentifier() const { return ModuleID; }
+
+  /// Get the data layout string for the module's target platform.  This encodes
+  /// the type sizes and alignments expected by this module.
+  /// @returns the data layout as a string
+  const std::string &getDataLayout() const { return DataLayout; }
+
+  /// Get the target triple which is a string describing the target host.
+  /// @returns a string containing the target triple.
+  const std::string &getTargetTriple() const { return TargetTriple; }
+
+  /// Get the target endian information.
+  /// @returns Endianess - an enumeration for the endianess of the target
+  Endianness getEndianness() const;
+
+  /// Get the target pointer size.
+  /// @returns PointerSize - an enumeration for the size of the target's pointer
+  PointerSize getPointerSize() const;
+
+  /// Get the global data context.
+  /// @returns LLVMContext - a container for LLVM's global information
+  LLVMContext &getContext() const { return Context; }
+
+  /// Get any module-scope inline assembly blocks.
+  /// @returns a string containing the module-scope inline assembly blocks.
+  const std::string &getModuleInlineAsm() const { return GlobalScopeAsm; }
+
+/// @}
+/// @name Module Level Mutators
+/// @{
+
+  /// Set the module identifier.
+  void setModuleIdentifier(StringRef ID) { ModuleID = ID; }
+
+  /// Set the data layout
+  void setDataLayout(StringRef DL) { DataLayout = DL; }
+
+  /// Set the target triple.
+  void setTargetTriple(StringRef T) { TargetTriple = T; }
+
+  /// Set the module-scope inline assembly blocks.
+  void setModuleInlineAsm(StringRef Asm) {
+    GlobalScopeAsm = Asm;
+    if (!GlobalScopeAsm.empty() &&
+        GlobalScopeAsm[GlobalScopeAsm.size()-1] != '\n')
+      GlobalScopeAsm += '\n';
+  }
+
+  /// Append to the module-scope inline assembly blocks, automatically inserting
+  /// a separating newline if necessary.
+  void appendModuleInlineAsm(StringRef Asm) {
+    GlobalScopeAsm += Asm;
+    if (!GlobalScopeAsm.empty() &&
+        GlobalScopeAsm[GlobalScopeAsm.size()-1] != '\n')
+      GlobalScopeAsm += '\n';
+  }
+
+/// @}
+/// @name Generic Value Accessors
+/// @{
+
+  /// getNamedValue - Return the first global value in the module with
+  /// the specified name, of arbitrary type.  This method returns null
+  /// if a global with the specified name is not found.
+  GlobalValue *getNamedValue(StringRef Name) const;
+
+  /// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
+  /// This ID is uniqued across modules in the current LLVMContext.
+  unsigned getMDKindID(StringRef Name) const;
+
+  /// getMDKindNames - Populate client supplied SmallVector with the name for
+  /// custom metadata IDs registered in this LLVMContext.
+  void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;
+
+/// @}
+/// @name Function Accessors
+/// @{
+
+  /// getOrInsertFunction - Look up the specified function in the module symbol
+  /// table.  Four possibilities:
+  ///   1. If it does not exist, add a prototype for the function and return it.
+  ///   2. If it exists, and has a local linkage, the existing function is
+  ///      renamed and a new one is inserted.
+  ///   3. Otherwise, if the existing function has the correct prototype, return
+  ///      the existing function.
+  ///   4. Finally, the function exists but has the wrong prototype: return the
+  ///      function with a constantexpr cast to the right prototype.
+  Constant *getOrInsertFunction(StringRef Name, const FunctionType *T,
+                                AttrListPtr AttributeList);
+
+  Constant *getOrInsertFunction(StringRef Name, const FunctionType *T);
+
+  /// getOrInsertFunction - Look up the specified function in the module symbol
+  /// table.  If it does not exist, add a prototype for the function and return
+  /// it.  This function guarantees to return a constant of pointer to the
+  /// specified function type or a ConstantExpr BitCast of that type if the
+  /// named function has a different type.  This version of the method takes a
+  /// null terminated list of function arguments, which makes it easier for
+  /// clients to use.
+  Constant *getOrInsertFunction(StringRef Name,
+                                AttrListPtr AttributeList,
+                                const Type *RetTy, ...)  END_WITH_NULL;
+
+  /// getOrInsertFunction - Same as above, but without the attributes.
+  Constant *getOrInsertFunction(StringRef Name, const Type *RetTy, ...)
+    END_WITH_NULL;
+
+  Constant *getOrInsertTargetIntrinsic(StringRef Name,
+                                       const FunctionType *Ty,
+                                       AttrListPtr AttributeList);
+
+  /// getFunction - Look up the specified function in the module symbol table.
+  /// If it does not exist, return null.
+  Function *getFunction(StringRef Name) const;
+
+/// @}
+/// @name Global Variable Accessors
+/// @{
+
+  /// getGlobalVariable - Look up the specified global variable in the module
+  /// symbol table.  If it does not exist, return null. If AllowInternal is set
+  /// to true, this function will return types that have InternalLinkage. By
+  /// default, these types are not returned.
+  GlobalVariable *getGlobalVariable(StringRef Name,
+                                    bool AllowInternal = false) const;
+
+  /// getNamedGlobal - Return the first global variable in the module with the
+  /// specified name, of arbitrary type.  This method returns null if a global
+  /// with the specified name is not found.
+  GlobalVariable *getNamedGlobal(StringRef Name) const {
+    return getGlobalVariable(Name, true);
+  }
+
+  /// getOrInsertGlobal - Look up the specified global in the module symbol
+  /// table.
+  ///   1. If it does not exist, add a declaration of the global and return it.
+  ///   2. Else, the global exists but has the wrong type: return the function
+  ///      with a constantexpr cast to the right type.
+  ///   3. Finally, if the existing global is the correct delclaration, return
+  ///      the existing global.
+  Constant *getOrInsertGlobal(StringRef Name, const Type *Ty);
+
+/// @}
+/// @name Global Alias Accessors
+/// @{
+
+  /// getNamedAlias - Return the first global alias in the module with the
+  /// specified name, of arbitrary type.  This method returns null if a global
+  /// with the specified name is not found.
+  GlobalAlias *getNamedAlias(StringRef Name) const;
+
+/// @}
+/// @name Named Metadata Accessors
+/// @{
+
+  /// getNamedMetadata - Return the first NamedMDNode in the module with the
+  /// specified name. This method returns null if a NamedMDNode with the
+  /// specified name is not found.
+  NamedMDNode *getNamedMetadata(const Twine &Name) const;
+
+  /// getOrInsertNamedMetadata - Return the first named MDNode in the module
+  /// with the specified name. This method returns a new NamedMDNode if a
+  /// NamedMDNode with the specified name is not found.
+  NamedMDNode *getOrInsertNamedMetadata(StringRef Name);
+
+  /// eraseNamedMetadata - Remove the given NamedMDNode from this module
+  /// and delete it.
+  void eraseNamedMetadata(NamedMDNode *NMD);
+
+/// @}
+/// @name Type Accessors
+/// @{
+
+  /// addTypeName - Insert an entry in the symbol table mapping Str to Type.  If
+  /// there is already an entry for this name, true is returned and the symbol
+  /// table is not modified.
+  bool addTypeName(StringRef Name, const Type *Ty);
+
+  /// getTypeName - If there is at least one entry in the symbol table for the
+  /// specified type, return it.
+  std::string getTypeName(const Type *Ty) const;
+
+  /// getTypeByName - Return the type with the specified name in this module, or
+  /// null if there is none by that name.
+  const Type *getTypeByName(StringRef Name) const;
+
+/// @}
+/// @name Materialization
+/// @{
+
+  /// setMaterializer - Sets the GVMaterializer to GVM.  This module must not
+  /// yet have a Materializer.  To reset the materializer for a module that
+  /// already has one, call MaterializeAllPermanently first.  Destroying this
+  /// module will destroy its materializer without materializing any more
+  /// GlobalValues.  Without destroying the Module, there is no way to detach or
+  /// destroy a materializer without materializing all the GVs it controls, to
+  /// avoid leaving orphan unmaterialized GVs.
+  void setMaterializer(GVMaterializer *GVM);
+  /// getMaterializer - Retrieves the GVMaterializer, if any, for this Module.
+  GVMaterializer *getMaterializer() const { return Materializer.get(); }
+
+  /// isMaterializable - True if the definition of GV has yet to be materialized
+  /// from the GVMaterializer.
+  bool isMaterializable(const GlobalValue *GV) const;
+  /// isDematerializable - Returns true if this GV was loaded from this Module's
+  /// GVMaterializer and the GVMaterializer knows how to dematerialize the GV.
+  bool isDematerializable(const GlobalValue *GV) const;
+
+  /// Materialize - Make sure the GlobalValue is fully read.  If the module is
+  /// corrupt, this returns true and fills in the optional string with
+  /// information about the problem.  If successful, this returns false.
+  bool Materialize(GlobalValue *GV, std::string *ErrInfo = 0);
+  /// Dematerialize - If the GlobalValue is read in, and if the GVMaterializer
+  /// supports it, release the memory for the function, and set it up to be
+  /// materialized lazily.  If !isDematerializable(), this method is a noop.
+  void Dematerialize(GlobalValue *GV);
+
+  /// MaterializeAll - Make sure all GlobalValues in this Module are fully read.
+  /// If the module is corrupt, this returns true and fills in the optional
+  /// string with information about the problem.  If successful, this returns
+  /// false.
+  bool MaterializeAll(std::string *ErrInfo = 0);
+
+  /// MaterializeAllPermanently - Make sure all GlobalValues in this Module are
+  /// fully read and clear the Materializer.  If the module is corrupt, this
+  /// returns true, fills in the optional string with information about the
+  /// problem, and DOES NOT clear the old Materializer.  If successful, this
+  /// returns false.
+  bool MaterializeAllPermanently(std::string *ErrInfo = 0);
+
+/// @}
+/// @name Direct access to the globals list, functions list, and symbol table
+/// @{
+
+  /// Get the Module's list of global variables (constant).
+  const GlobalListType   &getGlobalList() const       { return GlobalList; }
+  /// Get the Module's list of global variables.
+  GlobalListType         &getGlobalList()             { return GlobalList; }
+  static iplist<GlobalVariable> Module::*getSublistAccess(GlobalVariable*) {
+    return &Module::GlobalList;
+  }
+  /// Get the Module's list of functions (constant).
+  const FunctionListType &getFunctionList() const     { return FunctionList; }
+  /// Get the Module's list of functions.
+  FunctionListType       &getFunctionList()           { return FunctionList; }
+  static iplist<Function> Module::*getSublistAccess(Function*) {
+    return &Module::FunctionList;
+  }
+  /// Get the Module's list of aliases (constant).
+  const AliasListType    &getAliasList() const        { return AliasList; }
+  /// Get the Module's list of aliases.
+  AliasListType          &getAliasList()              { return AliasList; }
+  static iplist<GlobalAlias> Module::*getSublistAccess(GlobalAlias*) {
+    return &Module::AliasList;
+  }
+  /// Get the symbol table of global variable and function identifiers
+  const ValueSymbolTable &getValueSymbolTable() const { return *ValSymTab; }
+  /// Get the Module's symbol table of global variable and function identifiers.
+  ValueSymbolTable       &getValueSymbolTable()       { return *ValSymTab; }
+  /// Get the symbol table of types
+  const TypeSymbolTable  &getTypeSymbolTable() const  { return *TypeSymTab; }
+  /// Get the Module's symbol table of types
+  TypeSymbolTable        &getTypeSymbolTable()        { return *TypeSymTab; }
+
+/// @}
+/// @name Global Variable Iteration
+/// @{
+
+  /// Get an iterator to the first global variable
+  global_iterator       global_begin()       { return GlobalList.begin(); }
+  /// Get a constant iterator to the first global variable
+  const_global_iterator global_begin() const { return GlobalList.begin(); }
+  /// Get an iterator to the last global variable
+  global_iterator       global_end  ()       { return GlobalList.end(); }
+  /// Get a constant iterator to the last global variable
+  const_global_iterator global_end  () const { return GlobalList.end(); }
+  /// Determine if the list of globals is empty.
+  bool                  global_empty() const { return GlobalList.empty(); }
+
+/// @}
+/// @name Function Iteration
+/// @{
+
+  /// Get an iterator to the first function.
+  iterator                begin()       { return FunctionList.begin(); }
+  /// Get a constant iterator to the first function.
+  const_iterator          begin() const { return FunctionList.begin(); }
+  /// Get an iterator to the last function.
+  iterator                end  ()       { return FunctionList.end();   }
+  /// Get a constant iterator to the last function.
+  const_iterator          end  () const { return FunctionList.end();   }
+  /// Determine how many functions are in the Module's list of functions.
+  size_t                  size() const  { return FunctionList.size(); }
+  /// Determine if the list of functions is empty.
+  bool                    empty() const { return FunctionList.empty(); }
+
+/// @}
+/// @name Dependent Library Iteration
+/// @{
+
+  /// @brief Get a constant iterator to beginning of dependent library list.
+  inline lib_iterator lib_begin() const { return LibraryList.begin(); }
+  /// @brief Get a constant iterator to end of dependent library list.
+  inline lib_iterator lib_end()   const { return LibraryList.end();   }
+  /// @brief Returns the number of items in the list of libraries.
+  inline size_t       lib_size()  const { return LibraryList.size();  }
+  /// @brief Add a library to the list of dependent libraries
+  void addLibrary(StringRef Lib);
+  /// @brief Remove a library from the list of dependent libraries
+  void removeLibrary(StringRef Lib);
+  /// @brief Get all the libraries
+  inline const LibraryListType& getLibraries() const { return LibraryList; }
+
+/// @}
+/// @name Alias Iteration
+/// @{
+
+  /// Get an iterator to the first alias.
+  alias_iterator       alias_begin()            { return AliasList.begin(); }
+  /// Get a constant iterator to the first alias.
+  const_alias_iterator alias_begin() const      { return AliasList.begin(); }
+  /// Get an iterator to the last alias.
+  alias_iterator       alias_end  ()            { return AliasList.end();   }
+  /// Get a constant iterator to the last alias.
+  const_alias_iterator alias_end  () const      { return AliasList.end();   }
+  /// Determine how many aliases are in the Module's list of aliases.
+  size_t               alias_size () const      { return AliasList.size();  }
+  /// Determine if the list of aliases is empty.
+  bool                 alias_empty() const      { return AliasList.empty(); }
+
+
+/// @}
+/// @name Named Metadata Iteration
+/// @{
+
+  /// Get an iterator to the first named metadata.
+  named_metadata_iterator named_metadata_begin() { return NamedMDList.begin(); }
+  /// Get a constant iterator to the first named metadata.
+  const_named_metadata_iterator named_metadata_begin() const {
+    return NamedMDList.begin();
+  }
+
+  /// Get an iterator to the last named metadata.
+  named_metadata_iterator named_metadata_end() { return NamedMDList.end(); }
+  /// Get a constant iterator to the last named metadata.
+  const_named_metadata_iterator named_metadata_end() const {
+    return NamedMDList.end();
+  }
+
+  /// Determine how many NamedMDNodes are in the Module's list of named
+  /// metadata.
+  size_t named_metadata_size() const { return NamedMDList.size();  }
+  /// Determine if the list of named metadata is empty.
+  bool named_metadata_empty() const { return NamedMDList.empty(); }
+
+
+/// @}
+/// @name Utility functions for printing and dumping Module objects
+/// @{
+
+  /// Print the module to an output stream with AssemblyAnnotationWriter.
+  void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW) const;
+
+  /// Dump the module to stderr (for debugging).
+  void dump() const;
+  /// This function causes all the subinstructions to "let go" of all references
+  /// that they are maintaining.  This allows one to 'delete' a whole class at
+  /// a time, even though there may be circular references... first all
+  /// references are dropped, and all use counts go to zero.  Then everything
+  /// is delete'd for real.  Note that no operations are valid on an object
+  /// that has "dropped all references", except operator delete.
+  void dropAllReferences();
+/// @}
+};
+
+/// An raw_ostream inserter for modules.
+inline raw_ostream &operator<<(raw_ostream &O, const Module &M) {
+  M.print(O, 0);
+  return O;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Object/MachOFormat.h b/final/include/llvm/Object/MachOFormat.h
new file mode 100644
index 00000000000..31cd523ea21
--- /dev/null
+++ b/final/include/llvm/Object/MachOFormat.h
@@ -0,0 +1,367 @@
+//===- MachOFormat.h - Mach-O Format Structures And Constants ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares various structures and constants which are platform
+// independent and can be shared by any client which wishes to interact with
+// Mach object files.
+//
+// The definitions here are purposely chosen to match the LLVM style as opposed
+// to following the platform specific definition of the format.
+//
+// On a Mach system, see the <mach-o/...> includes for more information, in
+// particular <mach-o/loader.h>.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_MACHOFORMAT_H
+#define LLVM_OBJECT_MACHOFORMAT_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+namespace object {
+
+/// General Mach platform information.
+namespace mach {
+  /// @name CPU Type and Subtype Information
+  /// {
+
+  /// \brief Capability bits used in CPU type encoding.
+  enum CPUTypeFlagsMask {
+    CTFM_ArchMask =  0xFF000000,
+    CTFM_ArchABI64 = 0x01000000
+  };
+
+  /// \brief Machine type IDs used in CPU type encoding.
+  enum CPUTypeMachine {
+    CTM_i386      = 7,
+    CTM_x86_64    = CTM_i386 | CTFM_ArchABI64,
+    CTM_ARM       = 12,
+    CTM_SPARC     = 14,
+    CTM_PowerPC   = 18,
+    CTM_PowerPC64 = CTM_PowerPC | CTFM_ArchABI64
+  };
+
+  /// \brief Capability bits used in CPU subtype encoding.
+  enum CPUSubtypeFlagsMask {
+    CSFM_SubtypeMask =  0xFF000000,
+    CSFM_SubtypeLib64 = 0x80000000
+  };
+
+  /// \brief ARM Machine Subtypes.
+  enum CPUSubtypeARM {
+    CSARM_ALL    = 0,
+    CSARM_V4T    = 5,
+    CSARM_V6     = 6,
+    CSARM_V5TEJ  = 7,
+    CSARM_XSCALE = 8,
+    CSARM_V7     = 9
+  };
+
+  /// \brief PowerPC Machine Subtypes.
+  enum CPUSubtypePowerPC {
+    CSPPC_ALL = 0
+  };
+
+  /// \brief SPARC Machine Subtypes.
+  enum CPUSubtypeSPARC {
+    CSSPARC_ALL = 0
+  };
+
+  /// \brief x86 Machine Subtypes.
+  enum CPUSubtypeX86 {
+    CSX86_ALL = 3
+  };
+
+  /// @}
+
+} // end namespace mach
+
+/// Format information for Mach object files.
+namespace macho {
+  /// \brief Constants for structure sizes.
+  enum StructureSizes {
+    Header32Size = 28,
+    Header64Size = 32,
+    SegmentLoadCommand32Size = 56,
+    SegmentLoadCommand64Size = 72,
+    Section32Size = 68,
+    Section64Size = 80,
+    SymtabLoadCommandSize = 24,
+    DysymtabLoadCommandSize = 80,
+    Nlist32Size = 12,
+    Nlist64Size = 16,
+    RelocationInfoSize = 8
+  };
+
+  /// \brief Constants for header magic field.
+  enum HeaderMagic {
+    HM_Object32 = 0xFEEDFACE,  ///< 32-bit mach object file
+    HM_Object64 = 0xFEEDFACF,  ///< 64-bit mach object file
+    HM_Universal = 0xCAFEBABE  ///< Universal object file
+  };
+
+  /// \brief Header common to all Mach object files.
+  struct Header {
+    uint32_t Magic;
+    uint32_t CPUType;
+    uint32_t CPUSubtype;
+    uint32_t FileType;
+    uint32_t NumLoadCommands;
+    uint32_t SizeOfLoadCommands;
+    uint32_t Flags;
+  };
+
+  /// \brief Extended header for 64-bit object files.
+  struct Header64Ext {
+    uint32_t Reserved;
+  };
+
+  // See <mach-o/loader.h>.
+  enum HeaderFileType {
+    HFT_Object = 0x1
+  };
+
+  enum HeaderFlags {
+    HF_SubsectionsViaSymbols = 0x2000
+  };
+
+  enum LoadCommandType {
+    LCT_Segment = 0x1,
+    LCT_Symtab = 0x2,
+    LCT_Dysymtab = 0xb,
+    LCT_Segment64 = 0x19,
+    LCT_UUID = 0x1b
+  };
+
+  /// \brief Load command structure.
+  struct LoadCommand {
+    uint32_t Type;
+    uint32_t Size;
+  };
+
+  /// @name Load Command Structures
+  /// @{
+
+  struct SegmentLoadCommand {
+    uint32_t Type;
+    uint32_t Size;
+    char Name[16];
+    uint32_t VMAddress;
+    uint32_t VMSize;
+    uint32_t FileOffset;
+    uint32_t FileSize;
+    uint32_t MaxVMProtection;
+    uint32_t InitialVMProtection;
+    uint32_t NumSections;
+    uint32_t Flags;
+  };
+
+  struct Segment64LoadCommand {
+    uint32_t Type;
+    uint32_t Size;
+    char Name[16];
+    uint64_t VMAddress;
+    uint64_t VMSize;
+    uint64_t FileOffset;
+    uint64_t FileSize;
+    uint32_t MaxVMProtection;
+    uint32_t InitialVMProtection;
+    uint32_t NumSections;
+    uint32_t Flags;
+  };
+
+  struct SymtabLoadCommand {
+    uint32_t Type;
+    uint32_t Size;
+    uint32_t SymbolTableOffset;
+    uint32_t NumSymbolTableEntries;
+    uint32_t StringTableOffset;
+    uint32_t StringTableSize;
+  };
+
+  struct DysymtabLoadCommand {
+    uint32_t Type;
+    uint32_t Size;
+
+    uint32_t LocalSymbolsIndex;
+    uint32_t NumLocalSymbols;
+
+    uint32_t ExternalSymbolsIndex;
+    uint32_t NumExternalSymbols;
+
+    uint32_t UndefinedSymbolsIndex;
+    uint32_t NumUndefinedSymbols;
+
+    uint32_t TOCOffset;
+    uint32_t NumTOCEntries;
+
+    uint32_t ModuleTableOffset;
+    uint32_t NumModuleTableEntries;
+
+    uint32_t ReferenceSymbolTableOffset;
+    uint32_t NumReferencedSymbolTableEntries;
+
+    uint32_t IndirectSymbolTableOffset;
+    uint32_t NumIndirectSymbolTableEntries;
+
+    uint32_t ExternalRelocationTableOffset;
+    uint32_t NumExternalRelocationTableEntries;
+
+    uint32_t LocalRelocationTableOffset;
+    uint32_t NumLocalRelocationTableEntries;
+  };
+
+  /// @}
+  /// @name Section Data
+  /// @{
+
+  struct Section {
+    char Name[16];
+    char SegmentName[16];
+    uint32_t Address;
+    uint32_t Size;
+    uint32_t Offset;
+    uint32_t Align;
+    uint32_t RelocationTableOffset;
+    uint32_t NumRelocationTableEntries;
+    uint32_t Flags;
+    uint32_t Reserved1;
+    uint32_t Reserved2;
+  };
+  struct Section64 {
+    char Name[16];
+    char SegmentName[16];
+    uint64_t Address;
+    uint64_t Size;
+    uint32_t Offset;
+    uint32_t Align;
+    uint32_t RelocationTableOffset;
+    uint32_t NumRelocationTableEntries;
+    uint32_t Flags;
+    uint32_t Reserved1;
+    uint32_t Reserved2;
+    uint32_t Reserved3;
+  };
+
+  /// @}
+  /// @name Symbol Table Entries
+  /// @{
+
+  struct SymbolTableEntry {
+    uint32_t StringIndex;
+    uint8_t Type;
+    uint8_t SectionIndex;
+    uint16_t Flags;
+    uint32_t Value;
+  };
+  struct Symbol64TableEntry {
+    uint32_t StringIndex;
+    uint8_t Type;
+    uint8_t SectionIndex;
+    uint16_t Flags;
+    uint64_t Value;
+  };
+
+  /// @}
+  /// @name Indirect Symbol Table
+  /// @{
+
+  struct IndirectSymbolTableEntry {
+    uint32_t Index;
+  };
+
+  /// @}
+  /// @name Relocation Data
+  /// @{
+
+  struct RelocationEntry {
+    uint32_t Word0;
+    uint32_t Word1;
+  };
+
+  /// @}
+
+  // See <mach-o/nlist.h>.
+  enum SymbolTypeType {
+    STT_Undefined = 0x00,
+    STT_Absolute  = 0x02,
+    STT_Section   = 0x0e
+  };
+
+  enum SymbolTypeFlags {
+    // If any of these bits are set, then the entry is a stab entry number (see
+    // <mach-o/stab.h>. Otherwise the other masks apply.
+    STF_StabsEntryMask = 0xe0,
+
+    STF_TypeMask       = 0x0e,
+    STF_External       = 0x01,
+    STF_PrivateExtern  = 0x10
+  };
+
+  /// IndirectSymbolFlags - Flags for encoding special values in the indirect
+  /// symbol entry.
+  enum IndirectSymbolFlags {
+    ISF_Local    = 0x80000000,
+    ISF_Absolute = 0x40000000
+  };
+
+  /// RelocationFlags - Special flags for addresses.
+  enum RelocationFlags {
+    RF_Scattered = 0x80000000
+  };
+
+  /// Common relocation info types.
+  enum RelocationInfoType {
+    RIT_Vanilla             = 0,
+    RIT_Pair                = 1,
+    RIT_Difference          = 2
+  };
+
+  /// Generic relocation info types, which are shared by some (but not all)
+  /// platforms.
+  enum RelocationInfoType_Generic {
+    RIT_Generic_PreboundLazyPointer = 3,
+    RIT_Generic_LocalDifference     = 4,
+    RIT_Generic_TLV                 = 5
+  };
+
+  /// X86_64 uses its own relocation types.
+  enum RelocationInfoTypeX86_64 {
+    // Note that x86_64 doesn't even share the common relocation types.
+    RIT_X86_64_Unsigned   = 0,
+    RIT_X86_64_Signed     = 1,
+    RIT_X86_64_Branch     = 2,
+    RIT_X86_64_GOTLoad    = 3,
+    RIT_X86_64_GOT        = 4,
+    RIT_X86_64_Subtractor = 5,
+    RIT_X86_64_Signed1    = 6,
+    RIT_X86_64_Signed2    = 7,
+    RIT_X86_64_Signed4    = 8,
+    RIT_X86_64_TLV        = 9
+  };
+
+  /// ARM uses its own relocation types.
+  enum RelocationInfoTypeARM {
+    RIT_ARM_LocalDifference = 3,
+    RIT_ARM_PreboundLazyPointer = 4,
+    RIT_ARM_Branch24Bit = 5,
+    RIT_ARM_ThumbBranch22Bit = 6,
+    RIT_ARM_ThumbBranch32Bit = 7,
+    RIT_ARM_Half = 8,
+    RIT_ARM_HalfDifference = 9
+
+  };
+
+} // end namespace macho
+
+} // end namespace object
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Object/MachOObject.h b/final/include/llvm/Object/MachOObject.h
new file mode 100644
index 00000000000..03d9c147b41
--- /dev/null
+++ b/final/include/llvm/Object/MachOObject.h
@@ -0,0 +1,180 @@
+//===- MachOObject.h - Mach-O Object File Wrapper ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_MACHOOBJECT_H
+#define LLVM_OBJECT_MACHOOBJECT_H
+
+#include <string>
+#include "llvm/ADT/InMemoryStruct.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/MachOFormat.h"
+
+namespace llvm {
+
+class MemoryBuffer;
+
+namespace object {
+
+/// \brief Wrapper object for manipulating Mach-O object files.
+///
+/// This class is designed to implement a full-featured, efficient, portable,
+/// and robust Mach-O interface to Mach-O object files. It does not attempt to
+/// smooth over rough edges in the Mach-O format or generalize access to object
+/// independent features.
+///
+/// The class is designed around accessing the Mach-O object which is expected
+/// to be fully loaded into memory.
+///
+/// This class is *not* suitable for concurrent use. For efficient operation,
+/// the class uses APIs which rely on the ability to cache the results of
+/// certain calls in internal objects which are not safe for concurrent
+/// access. This allows the API to be zero-copy on the common paths.
+//
+// FIXME: It would be cool if we supported a "paged" MemoryBuffer
+// implementation. This would allow us to implement a more sensible version of
+// MemoryObject which can work like a MemoryBuffer, but be more efficient for
+// objects which are in the current address space.
+class MachOObject {
+public:
+  struct LoadCommandInfo {
+    /// The load command information.
+    macho::LoadCommand Command;
+
+    /// The offset to the start of the load command in memory.
+    uint64_t Offset;
+  };
+
+private:
+  OwningPtr<MemoryBuffer> Buffer;
+
+  /// Whether the object is little endian.
+  bool IsLittleEndian;
+  /// Whether the object is 64-bit.
+  bool Is64Bit;
+  /// Whether the object is swapped endianness from the host.
+  bool IsSwappedEndian;
+  /// Whether the string table has been registered.
+  bool HasStringTable;
+
+  /// The cached information on the load commands.
+  LoadCommandInfo *LoadCommands;
+  mutable unsigned NumLoadedCommands;
+
+  /// The cached copy of the header.
+  macho::Header Header;
+  macho::Header64Ext Header64Ext;
+
+  /// Cache string table information.
+  StringRef StringTable;
+
+private:
+  MachOObject(MemoryBuffer *Buffer, bool IsLittleEndian, bool Is64Bit);
+
+public:
+  ~MachOObject();
+
+  /// \brief Load a Mach-O object from a MemoryBuffer object.
+  ///
+  /// \param Buffer - The buffer to load the object from. This routine takes
+  /// exclusive ownership of the buffer (which is passed to the returned object
+  /// on success).
+  /// \param ErrorStr [out] - If given, will be set to a user readable error
+  /// message on failure.
+  /// \returns The loaded object, or null on error.
+  static MachOObject *LoadFromBuffer(MemoryBuffer *Buffer,
+                                     std::string *ErrorStr = 0);
+
+  /// @name File Information
+  /// @{
+
+  bool isLittleEndian() const { return IsLittleEndian; }
+  bool isSwappedEndian() const { return IsSwappedEndian; }
+  bool is64Bit() const { return Is64Bit; }
+
+  unsigned getHeaderSize() const {
+    return Is64Bit ? macho::Header64Size : macho::Header32Size;
+  }
+
+  StringRef getData(size_t Offset, size_t Size) const;
+
+  /// @}
+  /// @name String Table Data
+  /// @{
+
+  StringRef getStringTableData() const {
+    assert(HasStringTable && "String table has not been registered!");
+    return StringTable;
+  }
+
+  StringRef getStringAtIndex(unsigned Index) const {
+    size_t End = getStringTableData().find('\0', Index);
+    return getStringTableData().slice(Index, End);
+  }
+
+  void RegisterStringTable(macho::SymtabLoadCommand &SLC);
+
+  /// @}
+  /// @name Object Header Access
+  /// @{
+
+  const macho::Header &getHeader() const { return Header; }
+  const macho::Header64Ext &getHeader64Ext() const {
+    assert(is64Bit() && "Invalid access!");
+    return Header64Ext;
+  }
+
+  /// @}
+  /// @name Object Structure Access
+  /// @{
+
+  /// \brief Retrieve the information for the given load command.
+  const LoadCommandInfo &getLoadCommandInfo(unsigned Index) const;
+
+  void ReadSegmentLoadCommand(
+    const LoadCommandInfo &LCI,
+    InMemoryStruct<macho::SegmentLoadCommand> &Res) const;
+  void ReadSegment64LoadCommand(
+    const LoadCommandInfo &LCI,
+    InMemoryStruct<macho::Segment64LoadCommand> &Res) const;
+  void ReadSymtabLoadCommand(
+    const LoadCommandInfo &LCI,
+    InMemoryStruct<macho::SymtabLoadCommand> &Res) const;
+  void ReadDysymtabLoadCommand(
+    const LoadCommandInfo &LCI,
+    InMemoryStruct<macho::DysymtabLoadCommand> &Res) const;
+  void ReadIndirectSymbolTableEntry(
+    const macho::DysymtabLoadCommand &DLC,
+    unsigned Index,
+    InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const;
+  void ReadSection(
+    const LoadCommandInfo &LCI,
+    unsigned Index,
+    InMemoryStruct<macho::Section> &Res) const;
+  void ReadSection64(
+    const LoadCommandInfo &LCI,
+    unsigned Index,
+    InMemoryStruct<macho::Section64> &Res) const;
+  void ReadRelocationEntry(
+    uint64_t RelocationTableOffset, unsigned Index,
+    InMemoryStruct<macho::RelocationEntry> &Res) const;
+  void ReadSymbolTableEntry(
+    uint64_t SymbolTableOffset, unsigned Index,
+    InMemoryStruct<macho::SymbolTableEntry> &Res) const;
+  void ReadSymbol64TableEntry(
+    uint64_t SymbolTableOffset, unsigned Index,
+    InMemoryStruct<macho::Symbol64TableEntry> &Res) const;
+
+  /// @}
+};
+
+} // end namespace object
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Object/ObjectFile.h b/final/include/llvm/Object/ObjectFile.h
new file mode 100644
index 00000000000..eee9d447cdd
--- /dev/null
+++ b/final/include/llvm/Object/ObjectFile.h
@@ -0,0 +1,262 @@
+//===- ObjectFile.h - File format independent object file -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares a file format independent ObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_OBJECT_FILE_H
+#define LLVM_OBJECT_OBJECT_FILE_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+#include <cstring>
+
+namespace llvm {
+
+class MemoryBuffer;
+class StringRef;
+
+namespace object {
+
+class ObjectFile;
+
+union DataRefImpl {
+  struct {
+    uint32_t a, b;
+  } d;
+  intptr_t p;
+};
+
+static bool operator ==(const DataRefImpl &a, const DataRefImpl &b) {
+  // Check bitwise identical. This is the only legal way to compare a union w/o
+  // knowing which member is in use.
+  return std::memcmp(&a, &b, sizeof(DataRefImpl)) == 0;
+}
+
+/// SymbolRef - This is a value type class that represents a single symbol in
+/// the list of symbols in the object file.
+class SymbolRef {
+  DataRefImpl SymbolPimpl;
+  const ObjectFile *OwningObject;
+
+public:
+  SymbolRef(DataRefImpl SymbolP, const ObjectFile *Owner);
+
+  bool operator==(const SymbolRef &Other) const;
+
+  SymbolRef getNext() const;
+
+  StringRef getName() const;
+  uint64_t  getAddress() const;
+  uint64_t  getSize() const;
+
+  /// Returns the ascii char that should be displayed in a symbol table dump via
+  /// nm for this symbol.
+  char      getNMTypeChar() const;
+
+  /// Returns true for symbols that are internal to the object file format such
+  /// as section symbols.
+  bool      isInternal() const;
+};
+
+/// SectionRef - This is a value type class that represents a single section in
+/// the list of sections in the object file.
+class SectionRef {
+  DataRefImpl SectionPimpl;
+  const ObjectFile *OwningObject;
+
+public:
+  SectionRef(DataRefImpl SectionP, const ObjectFile *Owner);
+
+  bool operator==(const SectionRef &Other) const;
+
+  SectionRef getNext() const;
+
+  StringRef getName() const;
+  uint64_t  getAddress() const;
+  uint64_t  getSize() const;
+  StringRef getContents() const;
+
+  // FIXME: Move to the normalization layer when it's created.
+  bool      isText() const;
+};
+
+const uint64_t UnknownAddressOrSize = ~0ULL;
+
+/// ObjectFile - This class is the base class for all object file types.
+/// Concrete instances of this object are created by createObjectFile, which
+/// figure out which type to create.
+class ObjectFile {
+private:
+  ObjectFile(); // = delete
+  ObjectFile(const ObjectFile &other); // = delete
+
+protected:
+  MemoryBuffer *MapFile;
+  const uint8_t *base;
+
+  ObjectFile(MemoryBuffer *Object);
+
+  // These functions are for SymbolRef to call internally. The main goal of
+  // this is to allow SymbolRef::SymbolPimpl to point directly to the symbol
+  // entry in the memory mapped object file. SymbolPimpl cannot contain any
+  // virtual functions because then it could not point into the memory mapped
+  // file.
+  friend class SymbolRef;
+  virtual SymbolRef getSymbolNext(DataRefImpl Symb) const = 0;
+  virtual StringRef getSymbolName(DataRefImpl Symb) const = 0;
+  virtual uint64_t  getSymbolAddress(DataRefImpl Symb) const = 0;
+  virtual uint64_t  getSymbolSize(DataRefImpl Symb) const = 0;
+  virtual char      getSymbolNMTypeChar(DataRefImpl Symb) const = 0;
+  virtual bool      isSymbolInternal(DataRefImpl Symb) const = 0;
+
+  // Same as above for SectionRef.
+  friend class SectionRef;
+  virtual SectionRef getSectionNext(DataRefImpl Sec) const = 0;
+  virtual StringRef  getSectionName(DataRefImpl Sec) const = 0;
+  virtual uint64_t   getSectionAddress(DataRefImpl Sec) const = 0;
+  virtual uint64_t   getSectionSize(DataRefImpl Sec) const = 0;
+  virtual StringRef  getSectionContents(DataRefImpl Sec) const = 0;
+  virtual bool       isSectionText(DataRefImpl Sec) const = 0;
+
+
+public:
+  template<class content_type>
+  class content_iterator {
+    content_type Current;
+  public:
+    content_iterator(content_type symb)
+      : Current(symb) {}
+
+    const content_type* operator->() const {
+      return &Current;
+    }
+
+    bool operator==(const content_iterator &other) const {
+      return Current == other.Current;
+    }
+
+    bool operator!=(const content_iterator &other) const {
+      return !(*this == other);
+    }
+
+    content_iterator& operator++() {  // Preincrement
+      Current = Current.getNext();
+      return *this;
+    }
+  };
+
+  typedef content_iterator<SymbolRef> symbol_iterator;
+  typedef content_iterator<SectionRef> section_iterator;
+
+  virtual ~ObjectFile();
+
+  virtual symbol_iterator begin_symbols() const = 0;
+  virtual symbol_iterator end_symbols() const = 0;
+
+  virtual section_iterator begin_sections() const = 0;
+  virtual section_iterator end_sections() const = 0;
+
+  /// @brief The number of bytes used to represent an address in this object
+  ///        file format.
+  virtual uint8_t getBytesInAddress() const = 0;
+
+  virtual StringRef getFileFormatName() const = 0;
+  virtual /* Triple::ArchType */ unsigned getArch() const = 0;
+
+  StringRef getFilename() const;
+
+  /// @returns Pointer to ObjectFile subclass to handle this type of object.
+  /// @param ObjectPath The path to the object file. ObjectPath.isObject must
+  ///        return true.
+  /// @brief Create ObjectFile from path.
+  static ObjectFile *createObjectFile(StringRef ObjectPath);
+  static ObjectFile *createObjectFile(MemoryBuffer *Object);
+
+private:
+  static ObjectFile *createCOFFObjectFile(MemoryBuffer *Object);
+  static ObjectFile *createELFObjectFile(MemoryBuffer *Object);
+  static ObjectFile *createMachOObjectFile(MemoryBuffer *Object);
+  static ObjectFile *createArchiveObjectFile(MemoryBuffer *Object);
+  static ObjectFile *createLibObjectFile(MemoryBuffer *Object);
+};
+
+// Inline function definitions.
+inline SymbolRef::SymbolRef(DataRefImpl SymbolP, const ObjectFile *Owner)
+  : SymbolPimpl(SymbolP)
+  , OwningObject(Owner) {}
+
+inline bool SymbolRef::operator==(const SymbolRef &Other) const {
+  return SymbolPimpl == Other.SymbolPimpl;
+}
+
+inline SymbolRef SymbolRef::getNext() const {
+  return OwningObject->getSymbolNext(SymbolPimpl);
+}
+
+inline StringRef SymbolRef::getName() const {
+  return OwningObject->getSymbolName(SymbolPimpl);
+}
+
+inline uint64_t SymbolRef::getAddress() const {
+  return OwningObject->getSymbolAddress(SymbolPimpl);
+}
+
+inline uint64_t SymbolRef::getSize() const {
+  return OwningObject->getSymbolSize(SymbolPimpl);
+}
+
+inline char SymbolRef::getNMTypeChar() const {
+  return OwningObject->getSymbolNMTypeChar(SymbolPimpl);
+}
+
+inline bool SymbolRef::isInternal() const {
+  return OwningObject->isSymbolInternal(SymbolPimpl);
+}
+
+
+/// SectionRef
+inline SectionRef::SectionRef(DataRefImpl SectionP,
+                              const ObjectFile *Owner)
+  : SectionPimpl(SectionP)
+  , OwningObject(Owner) {}
+
+inline bool SectionRef::operator==(const SectionRef &Other) const {
+  return SectionPimpl == Other.SectionPimpl;
+}
+
+inline SectionRef SectionRef::getNext() const {
+  return OwningObject->getSectionNext(SectionPimpl);
+}
+
+inline StringRef SectionRef::getName() const {
+  return OwningObject->getSectionName(SectionPimpl);
+}
+
+inline uint64_t SectionRef::getAddress() const {
+  return OwningObject->getSectionAddress(SectionPimpl);
+}
+
+inline uint64_t SectionRef::getSize() const {
+  return OwningObject->getSectionSize(SectionPimpl);
+}
+
+inline StringRef SectionRef::getContents() const {
+  return OwningObject->getSectionContents(SectionPimpl);
+}
+
+inline bool SectionRef::isText() const {
+  return OwningObject->isSectionText(SectionPimpl);
+}
+
+} // end namespace object
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/OperandTraits.h b/final/include/llvm/OperandTraits.h
new file mode 100644
index 00000000000..f0df5fa9bde
--- /dev/null
+++ b/final/include/llvm/OperandTraits.h
@@ -0,0 +1,197 @@
+//===-- llvm/OperandTraits.h - OperandTraits class definition ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the traits classes that are handy for enforcing the correct
+// layout of various User subclasses. It also provides the means for accessing
+// the operands in the most efficient manner.
+//
+
+#ifndef LLVM_OPERAND_TRAITS_H
+#define LLVM_OPERAND_TRAITS_H
+
+#include "llvm/User.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//                          FixedNumOperand Trait Class
+//===----------------------------------------------------------------------===//
+
+/// FixedNumOperandTraits - determine the allocation regime of the Use array
+/// when it is a prefix to the User object, and the number of Use objects is
+/// known at compile time.
+
+template <typename SubClass, unsigned ARITY>
+struct FixedNumOperandTraits {
+  static Use *op_begin(SubClass* U) {
+    return reinterpret_cast<Use*>(U) - ARITY;
+  }
+  static Use *op_end(SubClass* U) {
+    return reinterpret_cast<Use*>(U);
+  }
+  static unsigned operands(const User*) {
+    return ARITY;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                          OptionalOperand Trait Class
+//===----------------------------------------------------------------------===//
+
+/// OptionalOperandTraits - when the number of operands may change at runtime.
+/// Naturally it may only decrease, because the allocations may not change.
+
+template <typename SubClass, unsigned ARITY = 1>
+struct OptionalOperandTraits : public FixedNumOperandTraits<SubClass, ARITY> {
+  static unsigned operands(const User *U) {
+    return U->getNumOperands();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                          VariadicOperand Trait Class
+//===----------------------------------------------------------------------===//
+
+/// VariadicOperandTraits - determine the allocation regime of the Use array
+/// when it is a prefix to the User object, and the number of Use objects is
+/// only known at allocation time.
+
+template <typename SubClass, unsigned MINARITY = 0>
+struct VariadicOperandTraits {
+  static Use *op_begin(SubClass* U) {
+    return reinterpret_cast<Use*>(U) - static_cast<User*>(U)->getNumOperands();
+  }
+  static Use *op_end(SubClass* U) {
+    return reinterpret_cast<Use*>(U);
+  }
+  static unsigned operands(const User *U) {
+    return U->getNumOperands();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                          HungoffOperand Trait Class
+//===----------------------------------------------------------------------===//
+
+/// HungoffOperandTraits - determine the allocation regime of the Use array
+/// when it is not a prefix to the User object, but allocated at an unrelated
+/// heap address.
+/// Assumes that the User subclass that is determined by this traits class
+/// has an OperandList member of type User::op_iterator. [Note: this is now
+/// trivially satisfied, because User has that member for historic reasons.]
+///
+/// This is the traits class that is needed when the Use array must be
+/// resizable.
+
+template <unsigned MINARITY = 1>
+struct HungoffOperandTraits {
+  static Use *op_begin(User* U) {
+    return U->OperandList;
+  }
+  static Use *op_end(User* U) {
+    return U->OperandList + U->getNumOperands();
+  }
+  static unsigned operands(const User *U) {
+    return U->getNumOperands();
+  }
+};
+
+/// Macro for generating in-class operand accessor declarations.
+/// It should only be called in the public section of the interface.
+///
+#define DECLARE_TRANSPARENT_OPERAND_ACCESSORS(VALUECLASS) \
+  public: \
+  inline VALUECLASS *getOperand(unsigned) const; \
+  inline void setOperand(unsigned, VALUECLASS*); \
+  inline op_iterator op_begin(); \
+  inline const_op_iterator op_begin() const; \
+  inline op_iterator op_end(); \
+  inline const_op_iterator op_end() const; \
+  protected: \
+  template <int> inline Use &Op(); \
+  template <int> inline const Use &Op() const; \
+  public: \
+  inline unsigned getNumOperands() const
+
+/// Macro for generating out-of-class operand accessor definitions
+#define DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CLASS, VALUECLASS) \
+CLASS::op_iterator CLASS::op_begin() { \
+  return OperandTraits<CLASS>::op_begin(this); \
+} \
+CLASS::const_op_iterator CLASS::op_begin() const { \
+  return OperandTraits<CLASS>::op_begin(const_cast<CLASS*>(this)); \
+} \
+CLASS::op_iterator CLASS::op_end() { \
+  return OperandTraits<CLASS>::op_end(this); \
+} \
+CLASS::const_op_iterator CLASS::op_end() const { \
+  return OperandTraits<CLASS>::op_end(const_cast<CLASS*>(this)); \
+} \
+VALUECLASS *CLASS::getOperand(unsigned i_nocapture) const { \
+  assert(i_nocapture < OperandTraits<CLASS>::operands(this) \
+         && "getOperand() out of range!"); \
+  return static_cast<VALUECLASS*>( \
+    OperandTraits<CLASS>::op_begin(const_cast<CLASS*>(this))[i_nocapture]); \
+} \
+void CLASS::setOperand(unsigned i_nocapture, VALUECLASS *Val_nocapture) { \
+  assert(i_nocapture < OperandTraits<CLASS>::operands(this) \
+         && "setOperand() out of range!"); \
+  OperandTraits<CLASS>::op_begin(this)[i_nocapture] = Val_nocapture; \
+} \
+unsigned CLASS::getNumOperands() const { \
+  return OperandTraits<CLASS>::operands(this);  \
+} \
+template <int Idx_nocapture> Use &CLASS::Op() { \
+  return this->OpFrom<Idx_nocapture>(this); \
+} \
+template <int Idx_nocapture> const Use &CLASS::Op() const { \
+  return this->OpFrom<Idx_nocapture>(this); \
+}
+
+
+/// Macro for generating out-of-class operand accessor
+/// definitions with casted result
+#define DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(CLASS, VALUECLASS) \
+CLASS::op_iterator CLASS::op_begin() { \
+  return OperandTraits<CLASS>::op_begin(this); \
+} \
+CLASS::const_op_iterator CLASS::op_begin() const { \
+  return OperandTraits<CLASS>::op_begin(const_cast<CLASS*>(this)); \
+} \
+CLASS::op_iterator CLASS::op_end() { \
+  return OperandTraits<CLASS>::op_end(this); \
+} \
+CLASS::const_op_iterator CLASS::op_end() const { \
+  return OperandTraits<CLASS>::op_end(const_cast<CLASS*>(this)); \
+} \
+VALUECLASS *CLASS::getOperand(unsigned i_nocapture) const { \
+  assert(i_nocapture < OperandTraits<CLASS>::operands(this) \
+         && "getOperand() out of range!"); \
+  return cast<VALUECLASS>( \
+    OperandTraits<CLASS>::op_begin(const_cast<CLASS*>(this))[i_nocapture]); \
+} \
+void CLASS::setOperand(unsigned i_nocapture, VALUECLASS *Val_nocapture) { \
+  assert(i_nocapture < OperandTraits<CLASS>::operands(this) \
+         && "setOperand() out of range!"); \
+  OperandTraits<CLASS>::op_begin(this)[i_nocapture] = Val_nocapture; \
+} \
+unsigned CLASS::getNumOperands() const { \
+  return OperandTraits<CLASS>::operands(this); \
+} \
+template <int Idx_nocapture> Use &CLASS::Op() { \
+  return this->OpFrom<Idx_nocapture>(this); \
+} \
+template <int Idx_nocapture> const Use &CLASS::Op() const { \
+  return this->OpFrom<Idx_nocapture>(this); \
+}
+
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Operator.h b/final/include/llvm/Operator.h
new file mode 100644
index 00000000000..ff2a0ad5e4e
--- /dev/null
+++ b/final/include/llvm/Operator.h
@@ -0,0 +1,285 @@
+//===-- llvm/Operator.h - Operator utility subclass -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines various classes for working with Instructions and
+// ConstantExprs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OPERATOR_H
+#define LLVM_OPERATOR_H
+
+#include "llvm/Instruction.h"
+#include "llvm/Constants.h"
+
+namespace llvm {
+
+class GetElementPtrInst;
+class BinaryOperator;
+class ConstantExpr;
+
+/// Operator - This is a utility class that provides an abstraction for the
+/// common functionality between Instructions and ConstantExprs.
+///
+class Operator : public User {
+private:
+  // Do not implement any of these. The Operator class is intended to be used
+  // as a utility, and is never itself instantiated.
+  void *operator new(size_t, unsigned);
+  void *operator new(size_t s);
+  Operator();
+  ~Operator();
+
+public:
+  /// getOpcode - Return the opcode for this Instruction or ConstantExpr.
+  ///
+  unsigned getOpcode() const {
+    if (const Instruction *I = dyn_cast<Instruction>(this))
+      return I->getOpcode();
+    return cast<ConstantExpr>(this)->getOpcode();
+  }
+
+  /// getOpcode - If V is an Instruction or ConstantExpr, return its
+  /// opcode. Otherwise return UserOp1.
+  ///
+  static unsigned getOpcode(const Value *V) {
+    if (const Instruction *I = dyn_cast<Instruction>(V))
+      return I->getOpcode();
+    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      return CE->getOpcode();
+    return Instruction::UserOp1;
+  }
+
+  static inline bool classof(const Operator *) { return true; }
+  static inline bool classof(const Instruction *) { return true; }
+  static inline bool classof(const ConstantExpr *) { return true; }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) || isa<ConstantExpr>(V);
+  }
+};
+
+/// OverflowingBinaryOperator - Utility class for integer arithmetic operators
+/// which may exhibit overflow - Add, Sub, and Mul. It does not include SDiv,
+/// despite that operator having the potential for overflow.
+///
+class OverflowingBinaryOperator : public Operator {
+public:
+  enum {
+    NoUnsignedWrap = (1 << 0),
+    NoSignedWrap   = (1 << 1)
+  };
+
+private:
+  ~OverflowingBinaryOperator(); // do not implement
+
+  friend class BinaryOperator;
+  friend class ConstantExpr;
+  void setHasNoUnsignedWrap(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~NoUnsignedWrap) | (B * NoUnsignedWrap);
+  }
+  void setHasNoSignedWrap(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~NoSignedWrap) | (B * NoSignedWrap);
+  }
+
+public:
+  /// hasNoUnsignedWrap - Test whether this operation is known to never
+  /// undergo unsigned overflow, aka the nuw property.
+  bool hasNoUnsignedWrap() const {
+    return SubclassOptionalData & NoUnsignedWrap;
+  }
+
+  /// hasNoSignedWrap - Test whether this operation is known to never
+  /// undergo signed overflow, aka the nsw property.
+  bool hasNoSignedWrap() const {
+    return (SubclassOptionalData & NoSignedWrap) != 0;
+  }
+
+  static inline bool classof(const OverflowingBinaryOperator *) { return true; }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Instruction::Add ||
+           I->getOpcode() == Instruction::Sub ||
+           I->getOpcode() == Instruction::Mul ||
+           I->getOpcode() == Instruction::Shl;
+  }
+  static inline bool classof(const ConstantExpr *CE) {
+    return CE->getOpcode() == Instruction::Add ||
+           CE->getOpcode() == Instruction::Sub ||
+           CE->getOpcode() == Instruction::Mul ||
+           CE->getOpcode() == Instruction::Shl;
+  }
+  static inline bool classof(const Value *V) {
+    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
+           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+  }
+};
+
+/// PossiblyExactOperator - A udiv or sdiv instruction, which can be marked as
+/// "exact", indicating that no bits are destroyed.
+class PossiblyExactOperator : public Operator {
+public:
+  enum {
+    IsExact = (1 << 0)
+  };
+  
+  friend class BinaryOperator;
+  friend class ConstantExpr;
+  void setIsExact(bool B) {
+    SubclassOptionalData = (SubclassOptionalData & ~IsExact) | (B * IsExact);
+  }
+  
+private:
+  ~PossiblyExactOperator(); // do not implement
+public:
+  /// isExact - Test whether this division is known to be exact, with
+  /// zero remainder.
+  bool isExact() const {
+    return SubclassOptionalData & IsExact;
+  }
+  
+  static bool isPossiblyExactOpcode(unsigned OpC) {
+    return OpC == Instruction::SDiv ||
+           OpC == Instruction::UDiv ||
+           OpC == Instruction::AShr ||
+           OpC == Instruction::LShr;
+  }
+  static inline bool classof(const ConstantExpr *CE) {
+    return isPossiblyExactOpcode(CE->getOpcode());
+  }
+  static inline bool classof(const Instruction *I) {
+    return isPossiblyExactOpcode(I->getOpcode());
+  }
+  static inline bool classof(const Value *V) {
+    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
+           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+  }
+};
+  
+
+  
+/// ConcreteOperator - A helper template for defining operators for individual
+/// opcodes.
+template<typename SuperClass, unsigned Opc>
+class ConcreteOperator : public SuperClass {
+  ~ConcreteOperator(); // DO NOT IMPLEMENT
+public:
+  static inline bool classof(const ConcreteOperator<SuperClass, Opc> *) {
+    return true;
+  }
+  static inline bool classof(const Instruction *I) {
+    return I->getOpcode() == Opc;
+  }
+  static inline bool classof(const ConstantExpr *CE) {
+    return CE->getOpcode() == Opc;
+  }
+  static inline bool classof(const Value *V) {
+    return (isa<Instruction>(V) && classof(cast<Instruction>(V))) ||
+           (isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V)));
+  }
+};
+
+class AddOperator
+  : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Add> {};
+class SubOperator
+  : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Sub> {};
+class MulOperator
+  : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Mul> {};
+class ShlOperator
+  : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Shl> {};
+
+  
+class SDivOperator
+  : public ConcreteOperator<PossiblyExactOperator, Instruction::SDiv> {};
+class UDivOperator
+  : public ConcreteOperator<PossiblyExactOperator, Instruction::UDiv> {};
+class AShrOperator
+  : public ConcreteOperator<PossiblyExactOperator, Instruction::AShr> {};
+class LShrOperator
+  : public ConcreteOperator<PossiblyExactOperator, Instruction::LShr> {};
+  
+  
+  
+class GEPOperator
+  : public ConcreteOperator<Operator, Instruction::GetElementPtr> {
+  enum {
+    IsInBounds = (1 << 0)
+  };
+
+  friend class GetElementPtrInst;
+  friend class ConstantExpr;
+  void setIsInBounds(bool B) {
+    SubclassOptionalData =
+      (SubclassOptionalData & ~IsInBounds) | (B * IsInBounds);
+  }
+
+public:
+  /// isInBounds - Test whether this is an inbounds GEP, as defined
+  /// by LangRef.html.
+  bool isInBounds() const {
+    return SubclassOptionalData & IsInBounds;
+  }
+
+  inline op_iterator       idx_begin()       { return op_begin()+1; }
+  inline const_op_iterator idx_begin() const { return op_begin()+1; }
+  inline op_iterator       idx_end()         { return op_end(); }
+  inline const_op_iterator idx_end()   const { return op_end(); }
+
+  Value *getPointerOperand() {
+    return getOperand(0);
+  }
+  const Value *getPointerOperand() const {
+    return getOperand(0);
+  }
+  static unsigned getPointerOperandIndex() {
+    return 0U;                      // get index for modifying correct operand
+  }
+
+  /// getPointerOperandType - Method to return the pointer operand as a
+  /// PointerType.
+  const PointerType *getPointerOperandType() const {
+    return reinterpret_cast<const PointerType*>(getPointerOperand()->getType());
+  }
+
+  unsigned getNumIndices() const {  // Note: always non-negative
+    return getNumOperands() - 1;
+  }
+
+  bool hasIndices() const {
+    return getNumOperands() > 1;
+  }
+
+  /// hasAllZeroIndices - Return true if all of the indices of this GEP are
+  /// zeros.  If so, the result pointer and the first operand have the same
+  /// value, just potentially different types.
+  bool hasAllZeroIndices() const {
+    for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) {
+      if (ConstantInt *C = dyn_cast<ConstantInt>(I))
+        if (C->isZero())
+          continue;
+      return false;
+    }
+    return true;
+  }
+
+  /// hasAllConstantIndices - Return true if all of the indices of this GEP are
+  /// constant integers.  If so, the result pointer and the first operand have
+  /// a constant offset between them.
+  bool hasAllConstantIndices() const {
+    for (const_op_iterator I = idx_begin(), E = idx_end(); I != E; ++I) {
+      if (!isa<ConstantInt>(I))
+        return false;
+    }
+    return true;
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Pass.h b/final/include/llvm/Pass.h
new file mode 100644
index 00000000000..ed0fb39f5d6
--- /dev/null
+++ b/final/include/llvm/Pass.h
@@ -0,0 +1,371 @@
+//===- llvm/Pass.h - Base class for Passes ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a base class that indicates that a specified class is a
+// transformation pass implementation.
+//
+// Passes are designed this way so that it is possible to run passes in a cache
+// and organizationally optimal order without having to specify it at the front
+// end.  This allows arbitrary passes to be strung together and have them
+// executed as effeciently as possible.
+//
+// Passes should extend one of the classes below, depending on the guarantees
+// that it can make about what will be modified as it is run.  For example, most
+// global optimizations should derive from FunctionPass, because they do not add
+// or delete functions, they operate on the internals of the function.
+//
+// Note that this file #includes PassSupport.h and PassAnalysisSupport.h (at the
+// bottom), so the APIs exposed by these files are also automatically available
+// to all users of this file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PASS_H
+#define LLVM_PASS_H
+
+#include <string>
+
+namespace llvm {
+
+class BasicBlock;
+class Function;
+class Module;
+class AnalysisUsage;
+class PassInfo;
+class ImmutablePass;
+class PMStack;
+class AnalysisResolver;
+class PMDataManager;
+class raw_ostream;
+class StringRef;
+
+// AnalysisID - Use the PassInfo to identify a pass...
+typedef const void* AnalysisID;
+
+/// Different types of internal pass managers. External pass managers
+/// (PassManager and FunctionPassManager) are not represented here.
+/// Ordering of pass manager types is important here.
+enum PassManagerType {
+  PMT_Unknown = 0,
+  PMT_ModulePassManager = 1, ///< MPPassManager 
+  PMT_CallGraphPassManager,  ///< CGPassManager
+  PMT_FunctionPassManager,   ///< FPPassManager
+  PMT_LoopPassManager,       ///< LPPassManager
+  PMT_RegionPassManager,     ///< RGPassManager
+  PMT_BasicBlockPassManager, ///< BBPassManager
+  PMT_Last
+};
+
+// Different types of passes.
+enum PassKind {
+  PT_BasicBlock,
+  PT_Region,
+  PT_Loop,
+  PT_Function,
+  PT_CallGraphSCC,
+  PT_Module,
+  PT_PassManager
+};
+
+//===----------------------------------------------------------------------===//
+/// Pass interface - Implemented by all 'passes'.  Subclass this if you are an
+/// interprocedural optimization or you do not fit into any of the more
+/// constrained passes described below.
+///
+class Pass {
+  AnalysisResolver *Resolver;  // Used to resolve analysis
+  const void *PassID;
+  PassKind Kind;
+  void operator=(const Pass&);  // DO NOT IMPLEMENT
+  Pass(const Pass &);           // DO NOT IMPLEMENT
+  
+public:
+  explicit Pass(PassKind K, char &pid);
+  virtual ~Pass();
+
+  
+  PassKind getPassKind() const { return Kind; }
+  
+  /// getPassName - Return a nice clean name for a pass.  This usually
+  /// implemented in terms of the name that is registered by one of the
+  /// Registration templates, but can be overloaded directly.
+  ///
+  virtual const char *getPassName() const;
+
+  /// getPassID - Return the PassID number that corresponds to this pass.
+  virtual AnalysisID getPassID() const {
+    return PassID;
+  }
+
+  /// print - Print out the internal state of the pass.  This is called by
+  /// Analyze to print out the contents of an analysis.  Otherwise it is not
+  /// necessary to implement this method.  Beware that the module pointer MAY be
+  /// null.  This automatically forwards to a virtual function that does not
+  /// provide the Module* in case the analysis doesn't need it it can just be
+  /// ignored.
+  ///
+  virtual void print(raw_ostream &O, const Module *M) const;
+  void dump() const; // dump - Print to stderr.
+
+  /// createPrinterPass - Get a Pass appropriate to print the IR this
+  /// pass operates one (Module, Function or MachineFunction).
+  virtual Pass *createPrinterPass(raw_ostream &O,
+                                  const std::string &Banner) const = 0;
+
+  /// Each pass is responsible for assigning a pass manager to itself.
+  /// PMS is the stack of available pass manager. 
+  virtual void assignPassManager(PMStack &, 
+                                 PassManagerType) {}
+  /// Check if available pass managers are suitable for this pass or not.
+  virtual void preparePassManager(PMStack &);
+  
+  ///  Return what kind of Pass Manager can manage this pass.
+  virtual PassManagerType getPotentialPassManagerType() const;
+
+  // Access AnalysisResolver
+  void setResolver(AnalysisResolver *AR);
+  AnalysisResolver *getResolver() const { return Resolver; }
+
+  /// getAnalysisUsage - This function should be overriden by passes that need
+  /// analysis information to do their job.  If a pass specifies that it uses a
+  /// particular analysis result to this function, it can then use the
+  /// getAnalysis<AnalysisType>() function, below.
+  ///
+  virtual void getAnalysisUsage(AnalysisUsage &) const;
+
+  /// releaseMemory() - This member can be implemented by a pass if it wants to
+  /// be able to release its memory when it is no longer needed.  The default
+  /// behavior of passes is to hold onto memory for the entire duration of their
+  /// lifetime (which is the entire compile time).  For pipelined passes, this
+  /// is not a big deal because that memory gets recycled every time the pass is
+  /// invoked on another program unit.  For IP passes, it is more important to
+  /// free memory when it is unused.
+  ///
+  /// Optionally implement this function to release pass memory when it is no
+  /// longer used.
+  ///
+  virtual void releaseMemory();
+
+  /// getAdjustedAnalysisPointer - This method is used when a pass implements
+  /// an analysis interface through multiple inheritance.  If needed, it should
+  /// override this to adjust the this pointer as needed for the specified pass
+  /// info.
+  virtual void *getAdjustedAnalysisPointer(AnalysisID ID);
+  virtual ImmutablePass *getAsImmutablePass();
+  virtual PMDataManager *getAsPMDataManager();
+  
+  /// verifyAnalysis() - This member can be implemented by a analysis pass to
+  /// check state of analysis information. 
+  virtual void verifyAnalysis() const;
+
+  // dumpPassStructure - Implement the -debug-passes=PassStructure option
+  virtual void dumpPassStructure(unsigned Offset = 0);
+
+  // lookupPassInfo - Return the pass info object for the specified pass class,
+  // or null if it is not known.
+  static const PassInfo *lookupPassInfo(const void *TI);
+
+  // lookupPassInfo - Return the pass info object for the pass with the given
+  // argument string, or null if it is not known.
+  static const PassInfo *lookupPassInfo(StringRef Arg);
+
+  /// getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to
+  /// get analysis information that might be around, for example to update it.
+  /// This is different than getAnalysis in that it can fail (if the analysis
+  /// results haven't been computed), so should only be used if you can handle
+  /// the case when the analysis is not available.  This method is often used by
+  /// transformation APIs to update analysis results for a pass automatically as
+  /// the transform is performed.
+  ///
+  template<typename AnalysisType> AnalysisType *
+    getAnalysisIfAvailable() const; // Defined in PassAnalysisSupport.h
+
+  /// mustPreserveAnalysisID - This method serves the same function as
+  /// getAnalysisIfAvailable, but works if you just have an AnalysisID.  This
+  /// obviously cannot give you a properly typed instance of the class if you
+  /// don't have the class name available (use getAnalysisIfAvailable if you
+  /// do), but it can tell you if you need to preserve the pass at least.
+  ///
+  bool mustPreserveAnalysisID(char &AID) const;
+
+  /// getAnalysis<AnalysisType>() - This function is used by subclasses to get
+  /// to the analysis information that they claim to use by overriding the
+  /// getAnalysisUsage function.
+  ///
+  template<typename AnalysisType>
+  AnalysisType &getAnalysis() const; // Defined in PassAnalysisSupport.h
+
+  template<typename AnalysisType>
+  AnalysisType &getAnalysis(Function &F); // Defined in PassAnalysisSupport.h
+
+  template<typename AnalysisType>
+  AnalysisType &getAnalysisID(AnalysisID PI) const;
+
+  template<typename AnalysisType>
+  AnalysisType &getAnalysisID(AnalysisID PI, Function &F);
+};
+
+
+//===----------------------------------------------------------------------===//
+/// ModulePass class - This class is used to implement unstructured
+/// interprocedural optimizations and analyses.  ModulePasses may do anything
+/// they want to the program.
+///
+class ModulePass : public Pass {
+public:
+  /// createPrinterPass - Get a module printer pass.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
+
+  /// runOnModule - Virtual method overriden by subclasses to process the module
+  /// being operated on.
+  virtual bool runOnModule(Module &M) = 0;
+
+  virtual void assignPassManager(PMStack &PMS, 
+                                 PassManagerType T);
+
+  ///  Return what kind of Pass Manager can manage this pass.
+  virtual PassManagerType getPotentialPassManagerType() const;
+
+  explicit ModulePass(char &pid) : Pass(PT_Module, pid) {}
+  // Force out-of-line virtual method.
+  virtual ~ModulePass();
+};
+
+
+//===----------------------------------------------------------------------===//
+/// ImmutablePass class - This class is used to provide information that does
+/// not need to be run.  This is useful for things like target information and
+/// "basic" versions of AnalysisGroups.
+///
+class ImmutablePass : public ModulePass {
+public:
+  /// initializePass - This method may be overriden by immutable passes to allow
+  /// them to perform various initialization actions they require.  This is
+  /// primarily because an ImmutablePass can "require" another ImmutablePass,
+  /// and if it does, the overloaded version of initializePass may get access to
+  /// these passes with getAnalysis<>.
+  ///
+  virtual void initializePass();
+
+  virtual ImmutablePass *getAsImmutablePass() { return this; }
+
+  /// ImmutablePasses are never run.
+  ///
+  bool runOnModule(Module &) { return false; }
+
+  explicit ImmutablePass(char &pid) 
+  : ModulePass(pid) {}
+  
+  // Force out-of-line virtual method.
+  virtual ~ImmutablePass();
+};
+
+//===----------------------------------------------------------------------===//
+/// FunctionPass class - This class is used to implement most global
+/// optimizations.  Optimizations should subclass this class if they meet the
+/// following constraints:
+///
+///  1. Optimizations are organized globally, i.e., a function at a time
+///  2. Optimizing a function does not cause the addition or removal of any
+///     functions in the module
+///
+class FunctionPass : public Pass {
+public:
+  explicit FunctionPass(char &pid) : Pass(PT_Function, pid) {}
+
+  /// createPrinterPass - Get a function printer pass.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
+
+  /// doInitialization - Virtual method overridden by subclasses to do
+  /// any necessary per-module initialization.
+  ///
+  virtual bool doInitialization(Module &);
+  
+  /// runOnFunction - Virtual method overriden by subclasses to do the
+  /// per-function processing of the pass.
+  ///
+  virtual bool runOnFunction(Function &F) = 0;
+
+  /// doFinalization - Virtual method overriden by subclasses to do any post
+  /// processing needed after all passes have run.
+  ///
+  virtual bool doFinalization(Module &);
+
+  virtual void assignPassManager(PMStack &PMS, 
+                                 PassManagerType T);
+
+  ///  Return what kind of Pass Manager can manage this pass.
+  virtual PassManagerType getPotentialPassManagerType() const;
+};
+
+
+
+//===----------------------------------------------------------------------===//
+/// BasicBlockPass class - This class is used to implement most local
+/// optimizations.  Optimizations should subclass this class if they
+/// meet the following constraints:
+///   1. Optimizations are local, operating on either a basic block or
+///      instruction at a time.
+///   2. Optimizations do not modify the CFG of the contained function, or any
+///      other basic block in the function.
+///   3. Optimizations conform to all of the constraints of FunctionPasses.
+///
+class BasicBlockPass : public Pass {
+public:
+  explicit BasicBlockPass(char &pid) : Pass(PT_BasicBlock, pid) {}
+
+  /// createPrinterPass - Get a function printer pass.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const;
+
+  /// doInitialization - Virtual method overridden by subclasses to do
+  /// any necessary per-module initialization.
+  ///
+  virtual bool doInitialization(Module &);
+
+  /// doInitialization - Virtual method overridden by BasicBlockPass subclasses
+  /// to do any necessary per-function initialization.
+  ///
+  virtual bool doInitialization(Function &);
+
+  /// runOnBasicBlock - Virtual method overriden by subclasses to do the
+  /// per-basicblock processing of the pass.
+  ///
+  virtual bool runOnBasicBlock(BasicBlock &BB) = 0;
+
+  /// doFinalization - Virtual method overriden by BasicBlockPass subclasses to
+  /// do any post processing needed after all passes have run.
+  ///
+  virtual bool doFinalization(Function &);
+
+  /// doFinalization - Virtual method overriden by subclasses to do any post
+  /// processing needed after all passes have run.
+  ///
+  virtual bool doFinalization(Module &);
+
+  virtual void assignPassManager(PMStack &PMS, 
+                                 PassManagerType T);
+
+  ///  Return what kind of Pass Manager can manage this pass.
+  virtual PassManagerType getPotentialPassManagerType() const;
+};
+
+/// If the user specifies the -time-passes argument on an LLVM tool command line
+/// then the value of this boolean will be true, otherwise false.
+/// @brief This is the storage for the -time-passes option.
+extern bool TimePassesIsEnabled;
+
+} // End llvm namespace
+
+// Include support files that contain important APIs commonly used by Passes,
+// but that we want to separate out to make it easier to read the header files.
+//
+#include "llvm/PassSupport.h"
+#include "llvm/PassAnalysisSupport.h"
+
+#endif
diff --git a/final/include/llvm/PassAnalysisSupport.h b/final/include/llvm/PassAnalysisSupport.h
new file mode 100644
index 00000000000..a3342d51386
--- /dev/null
+++ b/final/include/llvm/PassAnalysisSupport.h
@@ -0,0 +1,250 @@
+//===- llvm/PassAnalysisSupport.h - Analysis Pass Support code --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines stuff that is used to define and "use" Analysis Passes.
+// This file is automatically #included by Pass.h, so:
+//
+//           NO .CPP FILES SHOULD INCLUDE THIS FILE DIRECTLY
+//
+// Instead, #include Pass.h
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PASS_ANALYSIS_SUPPORT_H
+#define LLVM_PASS_ANALYSIS_SUPPORT_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include <vector>
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// AnalysisUsage - Represent the analysis usage information of a pass.  This
+// tracks analyses that the pass REQUIRES (must be available when the pass
+// runs), REQUIRES TRANSITIVE (must be available throughout the lifetime of the
+// pass), and analyses that the pass PRESERVES (the pass does not invalidate the
+// results of these analyses).  This information is provided by a pass to the
+// Pass infrastructure through the getAnalysisUsage virtual function.
+//
+class AnalysisUsage {
+public:
+  typedef SmallVector<AnalysisID, 32> VectorType;
+
+private:
+  // Sets of analyses required and preserved by a pass
+  VectorType Required, RequiredTransitive, Preserved;
+  bool PreservesAll;
+
+public:
+  AnalysisUsage() : PreservesAll(false) {}
+
+  // addRequired - Add the specified ID to the required set of the usage info
+  // for a pass.
+  //
+  AnalysisUsage &addRequiredID(const void *ID);
+  AnalysisUsage &addRequiredID(char &ID);
+  template<class PassClass>
+  AnalysisUsage &addRequired() {
+    return addRequiredID(PassClass::ID);
+  }
+
+  AnalysisUsage &addRequiredTransitiveID(char &ID);
+  template<class PassClass>
+  AnalysisUsage &addRequiredTransitive() {
+    return addRequiredTransitiveID(PassClass::ID);
+  }
+
+  // addPreserved - Add the specified ID to the set of analyses preserved by
+  // this pass
+  //
+  AnalysisUsage &addPreservedID(const void *ID) {
+    Preserved.push_back(ID);
+    return *this;
+  }
+  AnalysisUsage &addPreservedID(char &ID) {
+    Preserved.push_back(&ID);
+    return *this;
+  }
+
+  // addPreserved - Add the specified Pass class to the set of analyses
+  // preserved by this pass.
+  //
+  template<class PassClass>
+  AnalysisUsage &addPreserved() {
+    Preserved.push_back(&PassClass::ID);
+    return *this;
+  }
+
+  // addPreserved - Add the Pass with the specified argument string to the set
+  // of analyses preserved by this pass. If no such Pass exists, do nothing.
+  // This can be useful when a pass is trivially preserved, but may not be
+  // linked in. Be careful about spelling!
+  //
+  AnalysisUsage &addPreserved(StringRef Arg);
+
+  // setPreservesAll - Set by analyses that do not transform their input at all
+  void setPreservesAll() { PreservesAll = true; }
+  bool getPreservesAll() const { return PreservesAll; }
+
+  /// setPreservesCFG - This function should be called by the pass, iff they do
+  /// not:
+  ///
+  ///  1. Add or remove basic blocks from the function
+  ///  2. Modify terminator instructions in any way.
+  ///
+  /// This function annotates the AnalysisUsage info object to say that analyses
+  /// that only depend on the CFG are preserved by this pass.
+  ///
+  void setPreservesCFG();
+
+  const VectorType &getRequiredSet() const { return Required; }
+  const VectorType &getRequiredTransitiveSet() const {
+    return RequiredTransitive;
+  }
+  const VectorType &getPreservedSet() const { return Preserved; }
+};
+
+//===----------------------------------------------------------------------===//
+// AnalysisResolver - Simple interface used by Pass objects to pull all
+// analysis information out of pass manager that is responsible to manage
+// the pass.
+//
+class PMDataManager;
+class AnalysisResolver {
+private:
+  AnalysisResolver();  // DO NOT IMPLEMENT
+
+public:
+  explicit AnalysisResolver(PMDataManager &P) : PM(P) { }
+  
+  inline PMDataManager &getPMDataManager() { return PM; }
+
+  // Find pass that is implementing PI.
+  Pass *findImplPass(AnalysisID PI) {
+    Pass *ResultPass = 0;
+    for (unsigned i = 0; i < AnalysisImpls.size() ; ++i) {
+      if (AnalysisImpls[i].first == PI) {
+        ResultPass = AnalysisImpls[i].second;
+        break;
+      }
+    }
+    return ResultPass;
+  }
+
+  // Find pass that is implementing PI. Initialize pass for Function F.
+  Pass *findImplPass(Pass *P, AnalysisID PI, Function &F);
+
+  void addAnalysisImplsPair(AnalysisID PI, Pass *P) {
+    std::pair<AnalysisID, Pass*> pir = std::make_pair(PI,P);
+    AnalysisImpls.push_back(pir);
+  }
+
+  /// clearAnalysisImpls - Clear cache that is used to connect a pass to the
+  /// the analysis (PassInfo).
+  void clearAnalysisImpls() {
+    AnalysisImpls.clear();
+  }
+
+  // getAnalysisIfAvailable - Return analysis result or null if it doesn't exist
+  Pass *getAnalysisIfAvailable(AnalysisID ID, bool Direction) const;
+
+private:
+  // AnalysisImpls - This keeps track of which passes implements the interfaces
+  // that are required by the current pass (to implement getAnalysis()).
+  std::vector<std::pair<AnalysisID, Pass*> > AnalysisImpls;
+
+  // PassManager that is used to resolve analysis info
+  PMDataManager &PM;
+};
+
+/// getAnalysisIfAvailable<AnalysisType>() - Subclasses use this function to
+/// get analysis information that might be around, for example to update it.
+/// This is different than getAnalysis in that it can fail (if the analysis
+/// results haven't been computed), so should only be used if you can handle
+/// the case when the analysis is not available.  This method is often used by
+/// transformation APIs to update analysis results for a pass automatically as
+/// the transform is performed.
+///
+template<typename AnalysisType>
+AnalysisType *Pass::getAnalysisIfAvailable() const {
+  assert(Resolver && "Pass not resident in a PassManager object!");
+
+  const void *PI = &AnalysisType::ID;
+
+  Pass *ResultPass = Resolver->getAnalysisIfAvailable(PI, true);
+  if (ResultPass == 0) return 0;
+
+  // Because the AnalysisType may not be a subclass of pass (for
+  // AnalysisGroups), we use getAdjustedAnalysisPointer here to potentially
+  // adjust the return pointer (because the class may multiply inherit, once
+  // from pass, once from AnalysisType).
+  return (AnalysisType*)ResultPass->getAdjustedAnalysisPointer(PI);
+}
+
+/// getAnalysis<AnalysisType>() - This function is used by subclasses to get
+/// to the analysis information that they claim to use by overriding the
+/// getAnalysisUsage function.
+///
+template<typename AnalysisType>
+AnalysisType &Pass::getAnalysis() const {
+  assert(Resolver && "Pass has not been inserted into a PassManager object!");
+  return getAnalysisID<AnalysisType>(&AnalysisType::ID);
+}
+
+template<typename AnalysisType>
+AnalysisType &Pass::getAnalysisID(AnalysisID PI) const {
+  assert(PI && "getAnalysis for unregistered pass!");
+  assert(Resolver&&"Pass has not been inserted into a PassManager object!");
+  // PI *must* appear in AnalysisImpls.  Because the number of passes used
+  // should be a small number, we just do a linear search over a (dense)
+  // vector.
+  Pass *ResultPass = Resolver->findImplPass(PI);
+  assert (ResultPass && 
+          "getAnalysis*() called on an analysis that was not "
+          "'required' by pass!");
+
+  // Because the AnalysisType may not be a subclass of pass (for
+  // AnalysisGroups), we use getAdjustedAnalysisPointer here to potentially
+  // adjust the return pointer (because the class may multiply inherit, once
+  // from pass, once from AnalysisType).
+  return *(AnalysisType*)ResultPass->getAdjustedAnalysisPointer(PI);
+}
+
+/// getAnalysis<AnalysisType>() - This function is used by subclasses to get
+/// to the analysis information that they claim to use by overriding the
+/// getAnalysisUsage function.
+///
+template<typename AnalysisType>
+AnalysisType &Pass::getAnalysis(Function &F) {
+  assert(Resolver &&"Pass has not been inserted into a PassManager object!");
+
+  return getAnalysisID<AnalysisType>(&AnalysisType::ID, F);
+}
+
+template<typename AnalysisType>
+AnalysisType &Pass::getAnalysisID(AnalysisID PI, Function &F) {
+  assert(PI && "getAnalysis for unregistered pass!");
+  assert(Resolver && "Pass has not been inserted into a PassManager object!");
+  // PI *must* appear in AnalysisImpls.  Because the number of passes used
+  // should be a small number, we just do a linear search over a (dense)
+  // vector.
+  Pass *ResultPass = Resolver->findImplPass(this, PI, F);
+  assert(ResultPass && "Unable to find requested analysis info");
+  
+  // Because the AnalysisType may not be a subclass of pass (for
+  // AnalysisGroups), we use getAdjustedAnalysisPointer here to potentially
+  // adjust the return pointer (because the class may multiply inherit, once
+  // from pass, once from AnalysisType).
+  return *(AnalysisType*)ResultPass->getAdjustedAnalysisPointer(PI);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/PassManager.h b/final/include/llvm/PassManager.h
new file mode 100644
index 00000000000..c8b5dcaf0f2
--- /dev/null
+++ b/final/include/llvm/PassManager.h
@@ -0,0 +1,111 @@
+//===- llvm/PassManager.h - Container for Passes ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PassManager class.  This class is used to hold,
+// maintain, and optimize execution of Passes.  The PassManager class ensures
+// that analysis results are available before a pass runs, and that Pass's are
+// destroyed when the PassManager is destroyed.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PASSMANAGER_H
+#define LLVM_PASSMANAGER_H
+
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+class Pass;
+class Module;
+
+class PassManagerImpl;
+class FunctionPassManagerImpl;
+
+/// PassManagerBase - An abstract interface to allow code to add passes to
+/// a pass manager without having to hard-code what kind of pass manager
+/// it is.
+class PassManagerBase {
+public:
+  virtual ~PassManagerBase();
+
+  /// add - Add a pass to the queue of passes to run.  This passes ownership of
+  /// the Pass to the PassManager.  When the PassManager is destroyed, the pass
+  /// will be destroyed as well, so there is no need to delete the pass.  This
+  /// implies that all passes MUST be allocated with 'new'.
+  virtual void add(Pass *P) = 0;
+};
+
+/// PassManager manages ModulePassManagers
+class PassManager : public PassManagerBase {
+public:
+
+  PassManager();
+  ~PassManager();
+
+  /// add - Add a pass to the queue of passes to run.  This passes ownership of
+  /// the Pass to the PassManager.  When the PassManager is destroyed, the pass
+  /// will be destroyed as well, so there is no need to delete the pass.  This
+  /// implies that all passes MUST be allocated with 'new'.
+  void add(Pass *P);
+ 
+  /// run - Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the module, and if so, return true.
+  bool run(Module &M);
+
+private:
+  /// addImpl - Add a pass to the queue of passes to run, without
+  /// checking whether to add a printer pass.
+  void addImpl(Pass *P);
+
+  /// PassManagerImpl_New is the actual class. PassManager is just the 
+  /// wraper to publish simple pass manager interface
+  PassManagerImpl *PM;
+};
+
+/// FunctionPassManager manages FunctionPasses and BasicBlockPassManagers.
+class FunctionPassManager : public PassManagerBase {
+public:
+  /// FunctionPassManager ctor - This initializes the pass manager.  It needs,
+  /// but does not take ownership of, the specified Module.
+  explicit FunctionPassManager(Module *M);
+  ~FunctionPassManager();
+ 
+  /// add - Add a pass to the queue of passes to run.  This passes
+  /// ownership of the Pass to the PassManager.  When the
+  /// PassManager_X is destroyed, the pass will be destroyed as well, so
+  /// there is no need to delete the pass. (TODO delete passes.)
+  /// This implies that all passes MUST be allocated with 'new'.
+  void add(Pass *P);
+
+  /// run - Execute all of the passes scheduled for execution.  Keep
+  /// track of whether any of the passes modifies the function, and if
+  /// so, return true.
+  ///
+  bool run(Function &F);
+  
+  /// doInitialization - Run all of the initializers for the function passes.
+  ///
+  bool doInitialization();
+  
+  /// doFinalization - Run all of the finalizers for the function passes.
+  ///
+  bool doFinalization();
+  
+private:
+  /// addImpl - Add a pass to the queue of passes to run, without
+  /// checking whether to add a printer pass.
+  void addImpl(Pass *P);
+
+  FunctionPassManagerImpl *FPM;
+  Module *M;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/PassManagers.h b/final/include/llvm/PassManagers.h
new file mode 100644
index 00000000000..c4f409ef525
--- /dev/null
+++ b/final/include/llvm/PassManagers.h
@@ -0,0 +1,460 @@
+//===- llvm/PassManagers.h - Pass Infrastructure classes  -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the LLVM Pass Manager infrastructure. 
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PASSMANAGERS_H
+#define LLVM_PASSMANAGERS_H
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include <vector>
+#include <map>
+
+//===----------------------------------------------------------------------===//
+// Overview:
+// The Pass Manager Infrastructure manages passes. It's responsibilities are:
+// 
+//   o Manage optimization pass execution order
+//   o Make required Analysis information available before pass P is run
+//   o Release memory occupied by dead passes
+//   o If Analysis information is dirtied by a pass then regenerate Analysis 
+//     information before it is consumed by another pass.
+//
+// Pass Manager Infrastructure uses multiple pass managers.  They are
+// PassManager, FunctionPassManager, MPPassManager, FPPassManager, BBPassManager.
+// This class hierarchy uses multiple inheritance but pass managers do not
+// derive from another pass manager.
+//
+// PassManager and FunctionPassManager are two top-level pass manager that
+// represents the external interface of this entire pass manager infrastucture.
+//
+// Important classes :
+//
+// [o] class PMTopLevelManager;
+//
+// Two top level managers, PassManager and FunctionPassManager, derive from 
+// PMTopLevelManager. PMTopLevelManager manages information used by top level 
+// managers such as last user info.
+//
+// [o] class PMDataManager;
+//
+// PMDataManager manages information, e.g. list of available analysis info, 
+// used by a pass manager to manage execution order of passes. It also provides
+// a place to implement common pass manager APIs. All pass managers derive from
+// PMDataManager.
+//
+// [o] class BBPassManager : public FunctionPass, public PMDataManager;
+//
+// BBPassManager manages BasicBlockPasses.
+//
+// [o] class FunctionPassManager;
+//
+// This is a external interface used by JIT to manage FunctionPasses. This
+// interface relies on FunctionPassManagerImpl to do all the tasks.
+//
+// [o] class FunctionPassManagerImpl : public ModulePass, PMDataManager,
+//                                     public PMTopLevelManager;
+//
+// FunctionPassManagerImpl is a top level manager. It manages FPPassManagers
+//
+// [o] class FPPassManager : public ModulePass, public PMDataManager;
+//
+// FPPassManager manages FunctionPasses and BBPassManagers
+//
+// [o] class MPPassManager : public Pass, public PMDataManager;
+//
+// MPPassManager manages ModulePasses and FPPassManagers
+//
+// [o] class PassManager;
+//
+// This is a external interface used by various tools to manages passes. It
+// relies on PassManagerImpl to do all the tasks.
+//
+// [o] class PassManagerImpl : public Pass, public PMDataManager,
+//                             public PMDTopLevelManager
+//
+// PassManagerImpl is a top level pass manager responsible for managing
+// MPPassManagers.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/PrettyStackTrace.h"
+
+namespace llvm {
+  class Module;
+  class Pass;
+  class StringRef;
+  class Value;
+  class Timer;
+  class PMDataManager;
+
+// enums for debugging strings
+enum PassDebuggingString {
+  EXECUTION_MSG, // "Executing Pass '"
+  MODIFICATION_MSG, // "' Made Modification '"
+  FREEING_MSG, // " Freeing Pass '"
+  ON_BASICBLOCK_MSG, // "'  on BasicBlock '" + PassName + "'...\n"
+  ON_FUNCTION_MSG, // "' on Function '" + FunctionName + "'...\n"
+  ON_MODULE_MSG, // "' on Module '" + ModuleName + "'...\n"
+  ON_REGION_MSG, // " 'on Region ...\n'"
+  ON_LOOP_MSG, // " 'on Loop ...\n'"
+  ON_CG_MSG // "' on Call Graph ...\n'"
+};  
+
+/// PassManagerPrettyStackEntry - This is used to print informative information
+/// about what pass is running when/if a stack trace is generated.
+class PassManagerPrettyStackEntry : public PrettyStackTraceEntry {
+  Pass *P;
+  Value *V;
+  Module *M;
+public:
+  explicit PassManagerPrettyStackEntry(Pass *p)
+    : P(p), V(0), M(0) {}  // When P is releaseMemory'd.
+  PassManagerPrettyStackEntry(Pass *p, Value &v)
+    : P(p), V(&v), M(0) {} // When P is run on V
+  PassManagerPrettyStackEntry(Pass *p, Module &m)
+    : P(p), V(0), M(&m) {} // When P is run on M
+  
+  /// print - Emit information about this stack frame to OS.
+  virtual void print(raw_ostream &OS) const;
+};
+  
+  
+//===----------------------------------------------------------------------===//
+// PMStack
+//
+/// PMStack - This class implements a stack data structure of PMDataManager
+/// pointers.
+///
+/// Top level pass managers (see PassManager.cpp) maintain active Pass Managers 
+/// using PMStack. Each Pass implements assignPassManager() to connect itself
+/// with appropriate manager. assignPassManager() walks PMStack to find
+/// suitable manager.
+class PMStack {
+public:
+  typedef std::vector<PMDataManager *>::const_reverse_iterator iterator;
+  iterator begin() const { return S.rbegin(); }
+  iterator end() const { return S.rend(); }
+
+  void pop();
+  PMDataManager *top() const { return S.back(); }
+  void push(PMDataManager *PM);
+  bool empty() const { return S.empty(); }
+
+  void dump() const;
+
+private:
+  std::vector<PMDataManager *> S;
+};
+
+
+//===----------------------------------------------------------------------===//
+// PMTopLevelManager
+//
+/// PMTopLevelManager manages LastUser info and collects common APIs used by
+/// top level pass managers.
+class PMTopLevelManager {
+protected:
+  explicit PMTopLevelManager(PMDataManager *PMDM);
+
+  virtual unsigned getNumContainedManagers() const {
+    return (unsigned)PassManagers.size();
+  }
+
+  void initializeAllAnalysisInfo();
+
+private:
+  /// This is implemented by top level pass manager and used by 
+  /// schedulePass() to add analysis info passes that are not available.
+  virtual void addTopLevelPass(Pass  *P) = 0;
+
+public:
+  /// Schedule pass P for execution. Make sure that passes required by
+  /// P are run before P is run. Update analysis info maintained by
+  /// the manager. Remove dead passes. This is a recursive function.
+  void schedulePass(Pass *P);
+
+  /// Set pass P as the last user of the given analysis passes.
+  void setLastUser(const SmallVectorImpl<Pass *> &AnalysisPasses, Pass *P);
+
+  /// Collect passes whose last user is P
+  void collectLastUses(SmallVectorImpl<Pass *> &LastUses, Pass *P);
+
+  /// Find the pass that implements Analysis AID. Search immutable
+  /// passes and all pass managers. If desired pass is not found
+  /// then return NULL.
+  Pass *findAnalysisPass(AnalysisID AID);
+
+  /// Find analysis usage information for the pass P.
+  AnalysisUsage *findAnalysisUsage(Pass *P);
+
+  virtual ~PMTopLevelManager(); 
+
+  /// Add immutable pass and initialize it.
+  inline void addImmutablePass(ImmutablePass *P) {
+    P->initializePass();
+    ImmutablePasses.push_back(P);
+  }
+
+  inline SmallVectorImpl<ImmutablePass *>& getImmutablePasses() {
+    return ImmutablePasses;
+  }
+
+  void addPassManager(PMDataManager *Manager) {
+    PassManagers.push_back(Manager);
+  }
+
+  // Add Manager into the list of managers that are not directly
+  // maintained by this top level pass manager
+  inline void addIndirectPassManager(PMDataManager *Manager) {
+    IndirectPassManagers.push_back(Manager);
+  }
+
+  // Print passes managed by this top level manager.
+  void dumpPasses() const;
+  void dumpArguments() const;
+
+  // Active Pass Managers
+  PMStack activeStack;
+
+protected:
+  
+  /// Collection of pass managers
+  SmallVector<PMDataManager *, 8> PassManagers;
+
+private:
+
+  /// Collection of pass managers that are not directly maintained
+  /// by this pass manager
+  SmallVector<PMDataManager *, 8> IndirectPassManagers;
+
+  // Map to keep track of last user of the analysis pass.
+  // LastUser->second is the last user of Lastuser->first.
+  DenseMap<Pass *, Pass *> LastUser;
+
+  // Map to keep track of passes that are last used by a pass.
+  // This inverse map is initialized at PM->run() based on
+  // LastUser map.
+  DenseMap<Pass *, SmallPtrSet<Pass *, 8> > InversedLastUser;
+
+  /// Immutable passes are managed by top level manager.
+  SmallVector<ImmutablePass *, 8> ImmutablePasses;
+
+  DenseMap<Pass *, AnalysisUsage *> AnUsageMap;
+};
+
+
+  
+//===----------------------------------------------------------------------===//
+// PMDataManager
+
+/// PMDataManager provides the common place to manage the analysis data
+/// used by pass managers.
+class PMDataManager {
+public:
+
+  explicit PMDataManager(int Depth) : TPM(NULL), Depth(Depth) {
+    initializeAnalysisInfo();
+  }
+
+  virtual ~PMDataManager();
+  
+  virtual Pass *getAsPass() = 0;
+
+  /// Augment AvailableAnalysis by adding analysis made available by pass P.
+  void recordAvailableAnalysis(Pass *P);
+
+  /// verifyPreservedAnalysis -- Verify analysis presreved by pass P.
+  void verifyPreservedAnalysis(Pass *P);
+
+  /// Remove Analysis that is not preserved by the pass
+  void removeNotPreservedAnalysis(Pass *P);
+  
+  /// Remove dead passes used by P.
+  void removeDeadPasses(Pass *P, StringRef Msg, 
+                        enum PassDebuggingString);
+
+  /// Remove P.
+  void freePass(Pass *P, StringRef Msg, 
+                enum PassDebuggingString);
+
+  /// Add pass P into the PassVector. Update 
+  /// AvailableAnalysis appropriately if ProcessAnalysis is true.
+  void add(Pass *P, bool ProcessAnalysis = true);
+
+  /// Add RequiredPass into list of lower level passes required by pass P.
+  /// RequiredPass is run on the fly by Pass Manager when P requests it
+  /// through getAnalysis interface.
+  virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass);
+
+  virtual Pass *getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F);
+
+  /// Initialize available analysis information.
+  void initializeAnalysisInfo() { 
+    AvailableAnalysis.clear();
+    for (unsigned i = 0; i < PMT_Last; ++i)
+      InheritedAnalysis[i] = NULL;
+  }
+
+  // Return true if P preserves high level analysis used by other
+  // passes that are managed by this manager.
+  bool preserveHigherLevelAnalysis(Pass *P);
+
+
+  /// Populate RequiredPasses with analysis pass that are required by
+  /// pass P and are available. Populate ReqPassNotAvailable with analysis
+  /// pass that are required by pass P but are not available.
+  void collectRequiredAnalysis(SmallVectorImpl<Pass *> &RequiredPasses,
+                               SmallVectorImpl<AnalysisID> &ReqPassNotAvailable,
+                               Pass *P);
+
+  /// All Required analyses should be available to the pass as it runs!  Here
+  /// we fill in the AnalysisImpls member of the pass so that it can
+  /// successfully use the getAnalysis() method to retrieve the
+  /// implementations it needs.
+  void initializeAnalysisImpl(Pass *P);
+
+  /// Find the pass that implements Analysis AID. If desired pass is not found
+  /// then return NULL.
+  Pass *findAnalysisPass(AnalysisID AID, bool Direction);
+
+  // Access toplevel manager
+  PMTopLevelManager *getTopLevelManager() { return TPM; }
+  void setTopLevelManager(PMTopLevelManager *T) { TPM = T; }
+
+  unsigned getDepth() const { return Depth; }
+
+  // Print routines used by debug-pass
+  void dumpLastUses(Pass *P, unsigned Offset) const;
+  void dumpPassArguments() const;
+  void dumpPassInfo(Pass *P, enum PassDebuggingString S1,
+                    enum PassDebuggingString S2, StringRef Msg);
+  void dumpRequiredSet(const Pass *P) const;
+  void dumpPreservedSet(const Pass *P) const;
+
+  virtual unsigned getNumContainedPasses() const {
+    return (unsigned)PassVector.size();
+  }
+
+  virtual PassManagerType getPassManagerType() const { 
+    assert ( 0 && "Invalid use of getPassManagerType");
+    return PMT_Unknown; 
+  }
+
+  std::map<AnalysisID, Pass*> *getAvailableAnalysis() {
+    return &AvailableAnalysis;
+  }
+
+  // Collect AvailableAnalysis from all the active Pass Managers.
+  void populateInheritedAnalysis(PMStack &PMS) {
+    unsigned Index = 0;
+    for (PMStack::iterator I = PMS.begin(), E = PMS.end();
+         I != E; ++I)
+      InheritedAnalysis[Index++] = (*I)->getAvailableAnalysis();
+  }
+
+protected:
+
+  // Top level manager.
+  PMTopLevelManager *TPM;
+
+  // Collection of pass that are managed by this manager
+  SmallVector<Pass *, 16> PassVector;
+
+  // Collection of Analysis provided by Parent pass manager and
+  // used by current pass manager. At at time there can not be more
+  // then PMT_Last active pass mangers.
+  std::map<AnalysisID, Pass *> *InheritedAnalysis[PMT_Last];
+
+  
+  /// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
+  /// or higher is specified.
+  bool isPassDebuggingExecutionsOrMore() const;
+  
+private:
+  void dumpAnalysisUsage(StringRef Msg, const Pass *P,
+                         const AnalysisUsage::VectorType &Set) const;
+
+  // Set of available Analysis. This information is used while scheduling 
+  // pass. If a pass requires an analysis which is not available then 
+  // the required analysis pass is scheduled to run before the pass itself is
+  // scheduled to run.
+  std::map<AnalysisID, Pass*> AvailableAnalysis;
+
+  // Collection of higher level analysis used by the pass managed by
+  // this manager.
+  SmallVector<Pass *, 8> HigherLevelAnalysis;
+
+  unsigned Depth;
+};
+
+//===----------------------------------------------------------------------===//
+// FPPassManager
+//
+/// FPPassManager manages BBPassManagers and FunctionPasses.
+/// It batches all function passes and basic block pass managers together and 
+/// sequence them to process one function at a time before processing next 
+/// function.
+class FPPassManager : public ModulePass, public PMDataManager {
+public:
+  static char ID;
+  explicit FPPassManager(int Depth) 
+  : ModulePass(ID), PMDataManager(Depth) { }
+  
+  /// run - Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the module, and if so, return true.
+  bool runOnFunction(Function &F);
+  bool runOnModule(Module &M);
+  
+  /// cleanup - After running all passes, clean up pass manager cache.
+  void cleanup();
+
+  /// doInitialization - Run all of the initializers for the function passes.
+  ///
+  bool doInitialization(Module &M);
+  
+  /// doFinalization - Run all of the finalizers for the function passes.
+  ///
+  bool doFinalization(Module &M);
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  void getAnalysisUsage(AnalysisUsage &Info) const {
+    Info.setPreservesAll();
+  }
+
+  // Print passes managed by this manager
+  void dumpPassStructure(unsigned Offset);
+
+  virtual const char *getPassName() const {
+    return "Function Pass Manager";
+  }
+
+  FunctionPass *getContainedPass(unsigned N) {
+    assert ( N < PassVector.size() && "Pass number out of range!");
+    FunctionPass *FP = static_cast<FunctionPass *>(PassVector[N]);
+    return FP;
+  }
+
+  virtual PassManagerType getPassManagerType() const { 
+    return PMT_FunctionPassManager; 
+  }
+};
+
+Timer *getPassTimer(Pass *);
+
+}
+
+#endif
diff --git a/final/include/llvm/PassRegistry.h b/final/include/llvm/PassRegistry.h
new file mode 100644
index 00000000000..5d89c492218
--- /dev/null
+++ b/final/include/llvm/PassRegistry.h
@@ -0,0 +1,84 @@
+//===- llvm/PassRegistry.h - Pass Information Registry ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines PassRegistry, a class that is used in the initialization
+// and registration of passes.  At application startup, passes are registered
+// with the PassRegistry, which is later provided to the PassManager for 
+// dependency resolution and similar tasks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PASSREGISTRY_H
+#define LLVM_PASSREGISTRY_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+
+class PassInfo;
+struct PassRegistrationListener;
+
+/// PassRegistry - This class manages the registration and intitialization of
+/// the pass subsystem as application startup, and assists the PassManager
+/// in resolving pass dependencies.
+/// NOTE: PassRegistry is NOT thread-safe.  If you want to use LLVM on multiple
+/// threads simultaneously, you will need to use a separate PassRegistry on
+/// each thread.
+class PassRegistry {
+  mutable void *pImpl;
+  void *getImpl() const;
+   
+public:
+  PassRegistry() : pImpl(0) { }
+  ~PassRegistry();
+  
+  /// getPassRegistry - Access the global registry object, which is 
+  /// automatically initialized at application launch and destroyed by
+  /// llvm_shutdown.
+  static PassRegistry *getPassRegistry();
+  
+  /// getPassInfo - Look up a pass' corresponding PassInfo, indexed by the pass'
+  /// type identifier (&MyPass::ID).
+  const PassInfo *getPassInfo(const void *TI) const;
+  
+  /// getPassInfo - Look up a pass' corresponding PassInfo, indexed by the pass'
+  /// argument string.
+  const PassInfo *getPassInfo(StringRef Arg) const;
+  
+  /// registerPass - Register a pass (by means of its PassInfo) with the 
+  /// registry.  Required in order to use the pass with a PassManager.
+  void registerPass(const PassInfo &PI, bool ShouldFree = false);
+  
+  /// registerPass - Unregister a pass (by means of its PassInfo) with the 
+  /// registry.
+  void unregisterPass(const PassInfo &PI);
+  
+  /// registerAnalysisGroup - Register an analysis group (or a pass implementing
+  // an analysis group) with the registry.  Like registerPass, this is required 
+  // in order for a PassManager to be able to use this group/pass.
+  void registerAnalysisGroup(const void *InterfaceID, const void *PassID,
+                             PassInfo& Registeree, bool isDefault,
+                             bool ShouldFree = false);
+  
+  /// enumerateWith - Enumerate the registered passes, calling the provided
+  /// PassRegistrationListener's passEnumerate() callback on each of them.
+  void enumerateWith(PassRegistrationListener *L);
+  
+  /// addRegistrationListener - Register the given PassRegistrationListener
+  /// to receive passRegistered() callbacks whenever a new pass is registered.
+  void addRegistrationListener(PassRegistrationListener *L);
+  
+  /// removeRegistrationListener - Unregister a PassRegistrationListener so that
+  /// it no longer receives passRegistered() callbacks.
+  void removeRegistrationListener(PassRegistrationListener *L);
+};
+
+}
+
+#endif
diff --git a/final/include/llvm/PassSupport.h b/final/include/llvm/PassSupport.h
new file mode 100644
index 00000000000..082790956c4
--- /dev/null
+++ b/final/include/llvm/PassSupport.h
@@ -0,0 +1,336 @@
+//===- llvm/PassSupport.h - Pass Support code -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines stuff that is used to define and "use" Passes.  This file
+// is automatically #included by Pass.h, so:
+//
+//           NO .CPP FILES SHOULD INCLUDE THIS FILE DIRECTLY
+//
+// Instead, #include Pass.h.
+//
+// This file defines Pass registration code and classes used for it.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PASS_SUPPORT_H
+#define LLVM_PASS_SUPPORT_H
+
+#include "Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Support/Atomic.h"
+#include <vector>
+
+namespace llvm {
+
+//===---------------------------------------------------------------------------
+/// PassInfo class - An instance of this class exists for every pass known by
+/// the system, and can be obtained from a live Pass by calling its
+/// getPassInfo() method.  These objects are set up by the RegisterPass<>
+/// template, defined below.
+///
+class PassInfo {
+public:
+  typedef Pass* (*NormalCtor_t)();
+
+private:
+  const char      *const PassName;     // Nice name for Pass
+  const char      *const PassArgument; // Command Line argument to run this pass
+  const void *PassID;      
+  const bool IsCFGOnlyPass;            // Pass only looks at the CFG.
+  const bool IsAnalysis;               // True if an analysis pass.
+  const bool IsAnalysisGroup;          // True if an analysis group.
+  std::vector<const PassInfo*> ItfImpl;// Interfaces implemented by this pass
+
+  NormalCtor_t NormalCtor;
+
+public:
+  /// PassInfo ctor - Do not call this directly, this should only be invoked
+  /// through RegisterPass.
+  PassInfo(const char *name, const char *arg, const void *pi,
+           NormalCtor_t normal, bool isCFGOnly, bool is_analysis)
+    : PassName(name), PassArgument(arg), PassID(pi), 
+      IsCFGOnlyPass(isCFGOnly), 
+      IsAnalysis(is_analysis), IsAnalysisGroup(false), NormalCtor(normal) { }
+  /// PassInfo ctor - Do not call this directly, this should only be invoked
+  /// through RegisterPass. This version is for use by analysis groups; it
+  /// does not auto-register the pass.
+  PassInfo(const char *name, const void *pi)
+    : PassName(name), PassArgument(""), PassID(pi), 
+      IsCFGOnlyPass(false), 
+      IsAnalysis(false), IsAnalysisGroup(true), NormalCtor(0) { }
+
+  /// getPassName - Return the friendly name for the pass, never returns null
+  ///
+  const char *getPassName() const { return PassName; }
+
+  /// getPassArgument - Return the command line option that may be passed to
+  /// 'opt' that will cause this pass to be run.  This will return null if there
+  /// is no argument.
+  ///
+  const char *getPassArgument() const { return PassArgument; }
+
+  /// getTypeInfo - Return the id object for the pass...
+  /// TODO : Rename
+  const void *getTypeInfo() const { return PassID; }
+
+  /// Return true if this PassID implements the specified ID pointer.
+  bool isPassID(const void *IDPtr) const {
+    return PassID == IDPtr;
+  }
+  
+  /// isAnalysisGroup - Return true if this is an analysis group, not a normal
+  /// pass.
+  ///
+  bool isAnalysisGroup() const { return IsAnalysisGroup; }
+  bool isAnalysis() const { return IsAnalysis; }
+
+  /// isCFGOnlyPass - return true if this pass only looks at the CFG for the
+  /// function.
+  bool isCFGOnlyPass() const { return IsCFGOnlyPass; }
+  
+  /// getNormalCtor - Return a pointer to a function, that when called, creates
+  /// an instance of the pass and returns it.  This pointer may be null if there
+  /// is no default constructor for the pass.
+  ///
+  NormalCtor_t getNormalCtor() const {
+    return NormalCtor;
+  }
+  void setNormalCtor(NormalCtor_t Ctor) {
+    NormalCtor = Ctor;
+  }
+
+  /// createPass() - Use this method to create an instance of this pass.
+  Pass *createPass() const;
+
+  /// addInterfaceImplemented - This method is called when this pass is
+  /// registered as a member of an analysis group with the RegisterAnalysisGroup
+  /// template.
+  ///
+  void addInterfaceImplemented(const PassInfo *ItfPI) {
+    ItfImpl.push_back(ItfPI);
+  }
+
+  /// getInterfacesImplemented - Return a list of all of the analysis group
+  /// interfaces implemented by this pass.
+  ///
+  const std::vector<const PassInfo*> &getInterfacesImplemented() const {
+    return ItfImpl;
+  }
+
+private:
+  void operator=(const PassInfo &); // do not implement
+  PassInfo(const PassInfo &);       // do not implement
+};
+
+#define CALL_ONCE_INITIALIZATION(function) \
+  static volatile sys::cas_flag initialized = 0; \
+  sys::cas_flag old_val = sys::CompareAndSwap(&initialized, 1, 0); \
+  if (old_val == 0) { \
+    function(Registry); \
+    sys::MemoryFence(); \
+    initialized = 2; \
+  } else { \
+    sys::cas_flag tmp = initialized; \
+    sys::MemoryFence(); \
+    while (tmp != 2) { \
+      tmp = initialized; \
+      sys::MemoryFence(); \
+    } \
+  }
+
+#define INITIALIZE_PASS(passName, arg, name, cfg, analysis) \
+  static void* initialize##passName##PassOnce(PassRegistry &Registry) { \
+    PassInfo *PI = new PassInfo(name, arg, & passName ::ID, \
+      PassInfo::NormalCtor_t(callDefaultCtor< passName >), cfg, analysis); \
+    Registry.registerPass(*PI, true); \
+    return PI; \
+  } \
+  void llvm::initialize##passName##Pass(PassRegistry &Registry) { \
+    CALL_ONCE_INITIALIZATION(initialize##passName##PassOnce) \
+  }
+
+#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis) \
+  static void* initialize##passName##PassOnce(PassRegistry &Registry) {
+
+#define INITIALIZE_PASS_DEPENDENCY(depName) \
+    initialize##depName##Pass(Registry);
+#define INITIALIZE_AG_DEPENDENCY(depName) \
+    initialize##depName##AnalysisGroup(Registry);
+
+#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis) \
+    PassInfo *PI = new PassInfo(name, arg, & passName ::ID, \
+      PassInfo::NormalCtor_t(callDefaultCtor< passName >), cfg, analysis); \
+    Registry.registerPass(*PI, true); \
+    return PI; \
+  } \
+  void llvm::initialize##passName##Pass(PassRegistry &Registry) { \
+    CALL_ONCE_INITIALIZATION(initialize##passName##PassOnce) \
+  }
+
+template<typename PassName>
+Pass *callDefaultCtor() { return new PassName(); }
+
+//===---------------------------------------------------------------------------
+/// RegisterPass<t> template - This template class is used to notify the system
+/// that a Pass is available for use, and registers it into the internal
+/// database maintained by the PassManager.  Unless this template is used, opt,
+/// for example will not be able to see the pass and attempts to create the pass
+/// will fail. This template is used in the follow manner (at global scope, in
+/// your .cpp file):
+///
+/// static RegisterPass<YourPassClassName> tmp("passopt", "My Pass Name");
+///
+/// This statement will cause your pass to be created by calling the default
+/// constructor exposed by the pass.  If you have a different constructor that
+/// must be called, create a global constructor function (which takes the
+/// arguments you need and returns a Pass*) and register your pass like this:
+///
+/// static RegisterPass<PassClassName> tmp("passopt", "My Name");
+///
+template<typename passName>
+struct RegisterPass : public PassInfo {
+
+  // Register Pass using default constructor...
+  RegisterPass(const char *PassArg, const char *Name, bool CFGOnly = false,
+               bool is_analysis = false)
+    : PassInfo(Name, PassArg, &passName::ID,
+               PassInfo::NormalCtor_t(callDefaultCtor<passName>),
+               CFGOnly, is_analysis) {
+    PassRegistry::getPassRegistry()->registerPass(*this);
+  }
+};
+
+
+/// RegisterAnalysisGroup - Register a Pass as a member of an analysis _group_.
+/// Analysis groups are used to define an interface (which need not derive from
+/// Pass) that is required by passes to do their job.  Analysis Groups differ
+/// from normal analyses because any available implementation of the group will
+/// be used if it is available.
+///
+/// If no analysis implementing the interface is available, a default
+/// implementation is created and added.  A pass registers itself as the default
+/// implementation by specifying 'true' as the second template argument of this
+/// class.
+///
+/// In addition to registering itself as an analysis group member, a pass must
+/// register itself normally as well.  Passes may be members of multiple groups
+/// and may still be "required" specifically by name.
+///
+/// The actual interface may also be registered as well (by not specifying the
+/// second template argument).  The interface should be registered to associate
+/// a nice name with the interface.
+///
+class RegisterAGBase : public PassInfo {
+public:
+  RegisterAGBase(const char *Name,
+                 const void *InterfaceID,
+                 const void *PassID = 0,
+                 bool isDefault = false);
+};
+
+template<typename Interface, bool Default = false>
+struct RegisterAnalysisGroup : public RegisterAGBase {
+  explicit RegisterAnalysisGroup(PassInfo &RPB)
+    : RegisterAGBase(RPB.getPassName(),
+                     &Interface::ID, RPB.getTypeInfo(),
+                     Default) {
+  }
+
+  explicit RegisterAnalysisGroup(const char *Name)
+    : RegisterAGBase(Name, &Interface::ID) {
+  }
+};
+
+#define INITIALIZE_ANALYSIS_GROUP(agName, name, defaultPass) \
+  static void* initialize##agName##AnalysisGroupOnce(PassRegistry &Registry) { \
+    initialize##defaultPass##Pass(Registry); \
+    PassInfo *AI = new PassInfo(name, & agName :: ID); \
+    Registry.registerAnalysisGroup(& agName ::ID, 0, *AI, false, true); \
+    return AI; \
+  } \
+  void llvm::initialize##agName##AnalysisGroup(PassRegistry &Registry) { \
+    CALL_ONCE_INITIALIZATION(initialize##agName##AnalysisGroupOnce) \
+  }
+
+
+#define INITIALIZE_AG_PASS(passName, agName, arg, name, cfg, analysis, def) \
+  static void* initialize##passName##PassOnce(PassRegistry &Registry) { \
+    if (!def) initialize##agName##AnalysisGroup(Registry); \
+    PassInfo *PI = new PassInfo(name, arg, & passName ::ID, \
+      PassInfo::NormalCtor_t(callDefaultCtor< passName >), cfg, analysis); \
+    Registry.registerPass(*PI, true); \
+    \
+    PassInfo *AI = new PassInfo(name, & agName :: ID); \
+    Registry.registerAnalysisGroup(& agName ::ID, & passName ::ID, \
+                                   *AI, def, true); \
+    return AI; \
+  } \
+  void llvm::initialize##passName##Pass(PassRegistry &Registry) { \
+    CALL_ONCE_INITIALIZATION(initialize##passName##PassOnce) \
+  }
+
+
+#define INITIALIZE_AG_PASS_BEGIN(passName, agName, arg, n, cfg, analysis, def) \
+  static void* initialize##passName##PassOnce(PassRegistry &Registry) { \
+    if (!def) initialize##agName##AnalysisGroup(Registry);
+
+#define INITIALIZE_AG_PASS_END(passName, agName, arg, n, cfg, analysis, def) \
+    PassInfo *PI = new PassInfo(n, arg, & passName ::ID, \
+      PassInfo::NormalCtor_t(callDefaultCtor< passName >), cfg, analysis); \
+    Registry.registerPass(*PI, true); \
+    \
+    PassInfo *AI = new PassInfo(n, & agName :: ID); \
+    Registry.registerAnalysisGroup(& agName ::ID, & passName ::ID, \
+                                   *AI, def, true); \
+    return AI; \
+  } \
+  void llvm::initialize##passName##Pass(PassRegistry &Registry) { \
+    CALL_ONCE_INITIALIZATION(initialize##passName##PassOnce) \
+  }
+
+//===---------------------------------------------------------------------------
+/// PassRegistrationListener class - This class is meant to be derived from by
+/// clients that are interested in which passes get registered and unregistered
+/// at runtime (which can be because of the RegisterPass constructors being run
+/// as the program starts up, or may be because a shared object just got
+/// loaded).  Deriving from the PassRegistationListener class automatically
+/// registers your object to receive callbacks indicating when passes are loaded
+/// and removed.
+///
+struct PassRegistrationListener {
+
+  /// PassRegistrationListener ctor - Add the current object to the list of
+  /// PassRegistrationListeners...
+  PassRegistrationListener();
+
+  /// dtor - Remove object from list of listeners...
+  ///
+  virtual ~PassRegistrationListener();
+
+  /// Callback functions - These functions are invoked whenever a pass is loaded
+  /// or removed from the current executable.
+  ///
+  virtual void passRegistered(const PassInfo *) {}
+
+  /// enumeratePasses - Iterate over the registered passes, calling the
+  /// passEnumerate callback on each PassInfo object.
+  ///
+  void enumeratePasses();
+
+  /// passEnumerate - Callback function invoked when someone calls
+  /// enumeratePasses on this PassRegistrationListener object.
+  ///
+  virtual void passEnumerate(const PassInfo *) {}
+};
+
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/AIXDataTypesFix.h b/final/include/llvm/Support/AIXDataTypesFix.h
new file mode 100644
index 00000000000..a9a9147de29
--- /dev/null
+++ b/final/include/llvm/Support/AIXDataTypesFix.h
@@ -0,0 +1,25 @@
+//===-- llvm/Support/AIXDataTypesFix.h - Fix datatype defs ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file overrides default system-defined types and limits which cannot be
+// done in DataTypes.h.in because it is processed by autoheader first, which
+// comments out any #undef statement
+//
+//===----------------------------------------------------------------------===//
+
+// No include guards desired!
+
+#ifndef SUPPORT_DATATYPES_H
+#error "AIXDataTypesFix.h must only be included via DataTypes.h!"
+#endif
+
+// GCC is strict about defining large constants: they must have LL modifier.
+// These will be defined properly at the end of DataTypes.h
+#undef INT64_MAX
+#undef INT64_MIN
diff --git a/final/include/llvm/Support/AlignOf.h b/final/include/llvm/Support/AlignOf.h
new file mode 100644
index 00000000000..cebfa7982d6
--- /dev/null
+++ b/final/include/llvm/Support/AlignOf.h
@@ -0,0 +1,60 @@
+//===--- AlignOf.h - Portable calculation of type alignment -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AlignOf function that computes alignments for
+// arbitrary types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ALIGNOF_H
+#define LLVM_SUPPORT_ALIGNOF_H
+
+namespace llvm {
+
+template <typename T>
+struct AlignmentCalcImpl {
+  char x;
+  T t;
+private:
+  AlignmentCalcImpl() {} // Never instantiate.
+};
+
+/// AlignOf - A templated class that contains an enum value representing
+///  the alignment of the template argument.  For example,
+///  AlignOf<int>::Alignment represents the alignment of type "int".  The
+///  alignment calculated is the minimum alignment, and not necessarily
+///  the "desired" alignment returned by GCC's __alignof__ (for example).  Note
+///  that because the alignment is an enum value, it can be used as a
+///  compile-time constant (e.g., for template instantiation).
+template <typename T>
+struct AlignOf {
+  enum { Alignment =
+         static_cast<unsigned int>(sizeof(AlignmentCalcImpl<T>) - sizeof(T)) };
+
+  enum { Alignment_GreaterEqual_2Bytes = Alignment >= 2 ? 1 : 0 };
+  enum { Alignment_GreaterEqual_4Bytes = Alignment >= 4 ? 1 : 0 };
+  enum { Alignment_GreaterEqual_8Bytes = Alignment >= 8 ? 1 : 0 };
+  enum { Alignment_GreaterEqual_16Bytes = Alignment >= 16 ? 1 : 0 };
+
+  enum { Alignment_LessEqual_2Bytes = Alignment <= 2 ? 1 : 0 };
+  enum { Alignment_LessEqual_4Bytes = Alignment <= 4 ? 1 : 0 };
+  enum { Alignment_LessEqual_8Bytes = Alignment <= 8 ? 1 : 0 };
+  enum { Alignment_LessEqual_16Bytes = Alignment <= 16 ? 1 : 0 };
+
+};
+
+/// alignOf - A templated function that returns the minimum alignment of
+///  of a type.  This provides no extra functionality beyond the AlignOf
+///  class besides some cosmetic cleanliness.  Example usage:
+///  alignOf<int>() returns the alignment of an int.
+template <typename T>
+static inline unsigned alignOf() { return AlignOf<T>::Alignment; }
+
+} // end namespace llvm
+#endif
diff --git a/final/include/llvm/Support/Allocator.h b/final/include/llvm/Support/Allocator.h
new file mode 100644
index 00000000000..c6807099f85
--- /dev/null
+++ b/final/include/llvm/Support/Allocator.h
@@ -0,0 +1,239 @@
+//===--- Allocator.h - Simple memory allocation abstraction -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MallocAllocator and BumpPtrAllocator interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ALLOCATOR_H
+#define LLVM_SUPPORT_ALLOCATOR_H
+
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/DataTypes.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdlib>
+#include <cstddef>
+
+namespace llvm {
+template <typename T> struct ReferenceAdder { typedef T& result; };
+template <typename T> struct ReferenceAdder<T&> { typedef T result; };
+
+class MallocAllocator {
+public:
+  MallocAllocator() {}
+  ~MallocAllocator() {}
+
+  void Reset() {}
+
+  void *Allocate(size_t Size, size_t /*Alignment*/) { return malloc(Size); }
+
+  template <typename T>
+  T *Allocate() { return static_cast<T*>(malloc(sizeof(T))); }
+
+  template <typename T>
+  T *Allocate(size_t Num) {
+    return static_cast<T*>(malloc(sizeof(T)*Num));
+  }
+
+  void Deallocate(const void *Ptr) { free(const_cast<void*>(Ptr)); }
+
+  void PrintStats() const {}
+};
+
+/// MemSlab - This structure lives at the beginning of every slab allocated by
+/// the bump allocator.
+class MemSlab {
+public:
+  size_t Size;
+  MemSlab *NextPtr;
+};
+
+/// SlabAllocator - This class can be used to parameterize the underlying
+/// allocation strategy for the bump allocator.  In particular, this is used
+/// by the JIT to allocate contiguous swathes of executable memory.  The
+/// interface uses MemSlab's instead of void *'s so that the allocator
+/// doesn't have to remember the size of the pointer it allocated.
+class SlabAllocator {
+public:
+  virtual ~SlabAllocator();
+  virtual MemSlab *Allocate(size_t Size) = 0;
+  virtual void Deallocate(MemSlab *Slab) = 0;
+};
+
+/// MallocSlabAllocator - The default slab allocator for the bump allocator
+/// is an adapter class for MallocAllocator that just forwards the method
+/// calls and translates the arguments.
+class MallocSlabAllocator : public SlabAllocator {
+  /// Allocator - The underlying allocator that we forward to.
+  ///
+  MallocAllocator Allocator;
+
+public:
+  MallocSlabAllocator() : Allocator() { }
+  virtual ~MallocSlabAllocator();
+  virtual MemSlab *Allocate(size_t Size);
+  virtual void Deallocate(MemSlab *Slab);
+};
+
+/// BumpPtrAllocator - This allocator is useful for containers that need
+/// very simple memory allocation strategies.  In particular, this just keeps
+/// allocating memory, and never deletes it until the entire block is dead. This
+/// makes allocation speedy, but must only be used when the trade-off is ok.
+class BumpPtrAllocator {
+  BumpPtrAllocator(const BumpPtrAllocator &); // do not implement
+  void operator=(const BumpPtrAllocator &);   // do not implement
+
+  /// SlabSize - Allocate data into slabs of this size unless we get an
+  /// allocation above SizeThreshold.
+  size_t SlabSize;
+
+  /// SizeThreshold - For any allocation larger than this threshold, we should
+  /// allocate a separate slab.
+  size_t SizeThreshold;
+
+  /// Allocator - The underlying allocator we use to get slabs of memory.  This
+  /// defaults to MallocSlabAllocator, which wraps malloc, but it could be
+  /// changed to use a custom allocator.
+  SlabAllocator &Allocator;
+
+  /// CurSlab - The slab that we are currently allocating into.
+  ///
+  MemSlab *CurSlab;
+
+  /// CurPtr - The current pointer into the current slab.  This points to the
+  /// next free byte in the slab.
+  char *CurPtr;
+
+  /// End - The end of the current slab.
+  ///
+  char *End;
+
+  /// BytesAllocated - This field tracks how many bytes we've allocated, so
+  /// that we can compute how much space was wasted.
+  size_t BytesAllocated;
+
+  /// AlignPtr - Align Ptr to Alignment bytes, rounding up.  Alignment should
+  /// be a power of two.  This method rounds up, so AlignPtr(7, 4) == 8 and
+  /// AlignPtr(8, 4) == 8.
+  static char *AlignPtr(char *Ptr, size_t Alignment);
+
+  /// StartNewSlab - Allocate a new slab and move the bump pointers over into
+  /// the new slab.  Modifies CurPtr and End.
+  void StartNewSlab();
+
+  /// DeallocateSlabs - Deallocate all memory slabs after and including this
+  /// one.
+  void DeallocateSlabs(MemSlab *Slab);
+
+  static MallocSlabAllocator DefaultSlabAllocator;
+
+  template<typename T> friend class SpecificBumpPtrAllocator;
+public:
+  BumpPtrAllocator(size_t size = 4096, size_t threshold = 4096,
+                   SlabAllocator &allocator = DefaultSlabAllocator);
+  ~BumpPtrAllocator();
+
+  /// Reset - Deallocate all but the current slab and reset the current pointer
+  /// to the beginning of it, freeing all memory allocated so far.
+  void Reset();
+
+  /// Allocate - Allocate space at the specified alignment.
+  ///
+  void *Allocate(size_t Size, size_t Alignment);
+
+  /// Allocate space, but do not construct, one object.
+  ///
+  template <typename T>
+  T *Allocate() {
+    return static_cast<T*>(Allocate(sizeof(T),AlignOf<T>::Alignment));
+  }
+
+  /// Allocate space for an array of objects.  This does not construct the
+  /// objects though.
+  template <typename T>
+  T *Allocate(size_t Num) {
+    return static_cast<T*>(Allocate(Num * sizeof(T), AlignOf<T>::Alignment));
+  }
+
+  /// Allocate space for a specific count of elements and with a specified
+  /// alignment.
+  template <typename T>
+  T *Allocate(size_t Num, size_t Alignment) {
+    // Round EltSize up to the specified alignment.
+    size_t EltSize = (sizeof(T)+Alignment-1)&(-Alignment);
+    return static_cast<T*>(Allocate(Num * EltSize, Alignment));
+  }
+
+  void Deallocate(const void * /*Ptr*/) {}
+
+  unsigned GetNumSlabs() const;
+
+  void PrintStats() const;
+};
+
+/// SpecificBumpPtrAllocator - Same as BumpPtrAllocator but allows only
+/// elements of one type to be allocated. This allows calling the destructor
+/// in DestroyAll() and when the allocator is destroyed.
+template <typename T>
+class SpecificBumpPtrAllocator {
+  BumpPtrAllocator Allocator;
+public:
+  SpecificBumpPtrAllocator(size_t size = 4096, size_t threshold = 4096,
+              SlabAllocator &allocator = BumpPtrAllocator::DefaultSlabAllocator)
+    : Allocator(size, threshold, allocator) {}
+
+  ~SpecificBumpPtrAllocator() {
+    DestroyAll();
+  }
+
+  /// Call the destructor of each allocated object and deallocate all but the
+  /// current slab and reset the current pointer to the beginning of it, freeing
+  /// all memory allocated so far.
+  void DestroyAll() {
+    MemSlab *Slab = Allocator.CurSlab;
+    while (Slab) {
+      char *End = Slab == Allocator.CurSlab ? Allocator.CurPtr :
+                                              (char *)Slab + Slab->Size;
+      for (char *Ptr = (char*)(Slab+1); Ptr < End; Ptr += sizeof(T)) {
+        Ptr = Allocator.AlignPtr(Ptr, alignOf<T>());
+        if (Ptr + sizeof(T) <= End)
+          reinterpret_cast<T*>(Ptr)->~T();
+      }
+      Slab = Slab->NextPtr;
+    }
+    Allocator.Reset();
+  }
+
+  /// Allocate space for a specific count of elements.
+  T *Allocate(size_t num = 1) {
+    return Allocator.Allocate<T>(num);
+  }
+};
+
+}  // end namespace llvm
+
+inline void *operator new(size_t Size, llvm::BumpPtrAllocator &Allocator) {
+  struct S {
+    char c;
+    union {
+      double D;
+      long double LD;
+      long long L;
+      void *P;
+    } x;
+  };
+  return Allocator.Allocate(Size, std::min((size_t)llvm::NextPowerOf2(Size),
+                                           offsetof(S, x)));
+}
+
+inline void operator delete(void *, llvm::BumpPtrAllocator &) {}
+
+#endif // LLVM_SUPPORT_ALLOCATOR_H
diff --git a/final/include/llvm/Support/Atomic.h b/final/include/llvm/Support/Atomic.h
new file mode 100644
index 00000000000..1a6c606aa5f
--- /dev/null
+++ b/final/include/llvm/Support/Atomic.h
@@ -0,0 +1,39 @@
+//===- llvm/Support/Atomic.h - Atomic Operations -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys atomic operations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_ATOMIC_H
+#define LLVM_SYSTEM_ATOMIC_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+  namespace sys {
+    void MemoryFence();
+
+#ifdef _MSC_VER
+    typedef long cas_flag;
+#else
+    typedef uint32_t cas_flag;
+#endif
+    cas_flag CompareAndSwap(volatile cas_flag* ptr,
+                            cas_flag new_value,
+                            cas_flag old_value);
+    cas_flag AtomicIncrement(volatile cas_flag* ptr);
+    cas_flag AtomicDecrement(volatile cas_flag* ptr);
+    cas_flag AtomicAdd(volatile cas_flag* ptr, cas_flag val);
+    cas_flag AtomicMul(volatile cas_flag* ptr, cas_flag val);
+    cas_flag AtomicDiv(volatile cas_flag* ptr, cas_flag val);
+  }
+}
+
+#endif
diff --git a/final/include/llvm/Support/CFG.h b/final/include/llvm/Support/CFG.h
new file mode 100644
index 00000000000..d2ea12364e9
--- /dev/null
+++ b/final/include/llvm/Support/CFG.h
@@ -0,0 +1,341 @@
+//===-- llvm/Support/CFG.h - Process LLVM structures as graphs --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines specializations of GraphTraits that allow Function and
+// BasicBlock graphs to be treated as proper graphs for generic algorithms.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CFG_H
+#define LLVM_SUPPORT_CFG_H
+
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/Function.h"
+#include "llvm/InstrTypes.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// BasicBlock pred_iterator definition
+//===----------------------------------------------------------------------===//
+
+template <class Ptr, class USE_iterator> // Predecessor Iterator
+class PredIterator : public std::iterator<std::forward_iterator_tag,
+                                          Ptr, ptrdiff_t> {
+  typedef std::iterator<std::forward_iterator_tag, Ptr, ptrdiff_t> super;
+  typedef PredIterator<Ptr, USE_iterator> Self;
+  USE_iterator It;
+
+  inline void advancePastNonTerminators() {
+    // Loop to ignore non terminator uses (for example PHI nodes).
+    while (!It.atEnd() && !isa<TerminatorInst>(*It))
+      ++It;
+  }
+
+public:
+  typedef typename super::pointer pointer;
+
+  PredIterator() {}
+  explicit inline PredIterator(Ptr *bb) : It(bb->use_begin()) {
+    advancePastNonTerminators();
+  }
+  inline PredIterator(Ptr *bb, bool) : It(bb->use_end()) {}
+
+  inline bool operator==(const Self& x) const { return It == x.It; }
+  inline bool operator!=(const Self& x) const { return !operator==(x); }
+
+  inline pointer operator*() const {
+    assert(!It.atEnd() && "pred_iterator out of range!");
+    return cast<TerminatorInst>(*It)->getParent();
+  }
+  inline pointer *operator->() const { return &operator*(); }
+
+  inline Self& operator++() {   // Preincrement
+    assert(!It.atEnd() && "pred_iterator out of range!");
+    ++It; advancePastNonTerminators();
+    return *this;
+  }
+
+  inline Self operator++(int) { // Postincrement
+    Self tmp = *this; ++*this; return tmp;
+  }
+
+  /// getOperandNo - Return the operand number in the predecessor's
+  /// terminator of the successor.
+  unsigned getOperandNo() const {
+    return It.getOperandNo();
+  }
+};
+
+typedef PredIterator<BasicBlock, Value::use_iterator> pred_iterator;
+typedef PredIterator<const BasicBlock,
+                     Value::const_use_iterator> const_pred_iterator;
+
+inline pred_iterator pred_begin(BasicBlock *BB) { return pred_iterator(BB); }
+inline const_pred_iterator pred_begin(const BasicBlock *BB) {
+  return const_pred_iterator(BB);
+}
+inline pred_iterator pred_end(BasicBlock *BB) { return pred_iterator(BB, true);}
+inline const_pred_iterator pred_end(const BasicBlock *BB) {
+  return const_pred_iterator(BB, true);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// BasicBlock succ_iterator definition
+//===----------------------------------------------------------------------===//
+
+template <class Term_, class BB_>           // Successor Iterator
+class SuccIterator : public std::iterator<std::bidirectional_iterator_tag,
+                                          BB_, ptrdiff_t> {
+  const Term_ Term;
+  unsigned idx;
+  typedef std::iterator<std::bidirectional_iterator_tag, BB_, ptrdiff_t> super;
+  typedef SuccIterator<Term_, BB_> Self;
+
+  inline bool index_is_valid(int idx) {
+    return idx >= 0 && (unsigned) idx < Term->getNumSuccessors();
+  }
+
+public:
+  typedef typename super::pointer pointer;
+  // TODO: This can be random access iterator, only operator[] missing.
+
+  explicit inline SuccIterator(Term_ T) : Term(T), idx(0) {// begin iterator
+    assert(T && "getTerminator returned null!");
+  }
+  inline SuccIterator(Term_ T, bool)                       // end iterator
+    : Term(T), idx(Term->getNumSuccessors()) {
+    assert(T && "getTerminator returned null!");
+  }
+
+  inline const Self &operator=(const Self &I) {
+    assert(Term == I.Term &&"Cannot assign iterators to two different blocks!");
+    idx = I.idx;
+    return *this;
+  }
+
+  /// getSuccessorIndex - This is used to interface between code that wants to
+  /// operate on terminator instructions directly.
+  unsigned getSuccessorIndex() const { return idx; }
+
+  inline bool operator==(const Self& x) const { return idx == x.idx; }
+  inline bool operator!=(const Self& x) const { return !operator==(x); }
+
+  inline pointer operator*() const { return Term->getSuccessor(idx); }
+  inline pointer operator->() const { return operator*(); }
+
+  inline Self& operator++() { ++idx; return *this; } // Preincrement
+
+  inline Self operator++(int) { // Postincrement
+    Self tmp = *this; ++*this; return tmp;
+  }
+
+  inline Self& operator--() { --idx; return *this; }  // Predecrement
+  inline Self operator--(int) { // Postdecrement
+    Self tmp = *this; --*this; return tmp;
+  }
+
+  inline bool operator<(const Self& x) const {
+    assert(Term == x.Term && "Cannot compare iterators of different blocks!");
+    return idx < x.idx;
+  }
+
+  inline bool operator<=(const Self& x) const {
+    assert(Term == x.Term && "Cannot compare iterators of different blocks!");
+    return idx <= x.idx;
+  }
+  inline bool operator>=(const Self& x) const {
+    assert(Term == x.Term && "Cannot compare iterators of different blocks!");
+    return idx >= x.idx;
+  }
+
+  inline bool operator>(const Self& x) const {
+    assert(Term == x.Term && "Cannot compare iterators of different blocks!");
+    return idx > x.idx;
+  }
+
+  inline Self& operator+=(int Right) {
+    unsigned new_idx = idx + Right;
+    assert(index_is_valid(new_idx) && "Iterator index out of bound");
+    idx = new_idx;
+    return *this;
+  }
+
+  inline Self operator+(int Right) {
+    Self tmp = *this;
+    tmp += Right;
+    return tmp;
+  }
+
+  inline Self& operator-=(int Right) {
+    return operator+=(-Right);
+  }
+
+  inline Self operator-(int Right) {
+    return operator+(-Right);
+  }
+
+  inline int operator-(const Self& x) {
+    assert(Term == x.Term && "Cannot work on iterators of different blocks!");
+    int distance = idx - x.idx;
+    return distance;
+  }
+
+  // This works for read access, however write access is difficult as changes
+  // to Term are only possible with Term->setSuccessor(idx). Pointers that can
+  // be modified are not available.
+  //
+  // inline pointer operator[](int offset) {
+  //  Self tmp = *this;
+  //  tmp += offset;
+  //  return tmp.operator*();
+  // }
+
+  /// Get the source BB of this iterator.
+  inline BB_ *getSource() {
+    return Term->getParent();
+  }
+};
+
+typedef SuccIterator<TerminatorInst*, BasicBlock> succ_iterator;
+typedef SuccIterator<const TerminatorInst*,
+                     const BasicBlock> succ_const_iterator;
+
+inline succ_iterator succ_begin(BasicBlock *BB) {
+  return succ_iterator(BB->getTerminator());
+}
+inline succ_const_iterator succ_begin(const BasicBlock *BB) {
+  return succ_const_iterator(BB->getTerminator());
+}
+inline succ_iterator succ_end(BasicBlock *BB) {
+  return succ_iterator(BB->getTerminator(), true);
+}
+inline succ_const_iterator succ_end(const BasicBlock *BB) {
+  return succ_const_iterator(BB->getTerminator(), true);
+}
+
+
+
+//===--------------------------------------------------------------------===//
+// GraphTraits specializations for basic block graphs (CFGs)
+//===--------------------------------------------------------------------===//
+
+// Provide specializations of GraphTraits to be able to treat a function as a
+// graph of basic blocks...
+
+template <> struct GraphTraits<BasicBlock*> {
+  typedef BasicBlock NodeType;
+  typedef succ_iterator ChildIteratorType;
+
+  static NodeType *getEntryNode(BasicBlock *BB) { return BB; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return succ_begin(N);
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return succ_end(N);
+  }
+};
+
+template <> struct GraphTraits<const BasicBlock*> {
+  typedef const BasicBlock NodeType;
+  typedef succ_const_iterator ChildIteratorType;
+
+  static NodeType *getEntryNode(const BasicBlock *BB) { return BB; }
+
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return succ_begin(N);
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return succ_end(N);
+  }
+};
+
+// Provide specializations of GraphTraits to be able to treat a function as a
+// graph of basic blocks... and to walk it in inverse order.  Inverse order for
+// a function is considered to be when traversing the predecessor edges of a BB
+// instead of the successor edges.
+//
+template <> struct GraphTraits<Inverse<BasicBlock*> > {
+  typedef BasicBlock NodeType;
+  typedef pred_iterator ChildIteratorType;
+  static NodeType *getEntryNode(Inverse<BasicBlock *> G) { return G.Graph; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return pred_begin(N);
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return pred_end(N);
+  }
+};
+
+template <> struct GraphTraits<Inverse<const BasicBlock*> > {
+  typedef const BasicBlock NodeType;
+  typedef const_pred_iterator ChildIteratorType;
+  static NodeType *getEntryNode(Inverse<const BasicBlock*> G) {
+    return G.Graph;
+  }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return pred_begin(N);
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return pred_end(N);
+  }
+};
+
+
+
+//===--------------------------------------------------------------------===//
+// GraphTraits specializations for function basic block graphs (CFGs)
+//===--------------------------------------------------------------------===//
+
+// Provide specializations of GraphTraits to be able to treat a function as a
+// graph of basic blocks... these are the same as the basic block iterators,
+// except that the root node is implicitly the first node of the function.
+//
+template <> struct GraphTraits<Function*> : public GraphTraits<BasicBlock*> {
+  static NodeType *getEntryNode(Function *F) { return &F->getEntryBlock(); }
+
+  // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
+  typedef Function::iterator nodes_iterator;
+  static nodes_iterator nodes_begin(Function *F) { return F->begin(); }
+  static nodes_iterator nodes_end  (Function *F) { return F->end(); }
+};
+template <> struct GraphTraits<const Function*> :
+  public GraphTraits<const BasicBlock*> {
+  static NodeType *getEntryNode(const Function *F) {return &F->getEntryBlock();}
+
+  // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
+  typedef Function::const_iterator nodes_iterator;
+  static nodes_iterator nodes_begin(const Function *F) { return F->begin(); }
+  static nodes_iterator nodes_end  (const Function *F) { return F->end(); }
+};
+
+
+// Provide specializations of GraphTraits to be able to treat a function as a
+// graph of basic blocks... and to walk it in inverse order.  Inverse order for
+// a function is considered to be when traversing the predecessor edges of a BB
+// instead of the successor edges.
+//
+template <> struct GraphTraits<Inverse<Function*> > :
+  public GraphTraits<Inverse<BasicBlock*> > {
+  static NodeType *getEntryNode(Inverse<Function*> G) {
+    return &G.Graph->getEntryBlock();
+  }
+};
+template <> struct GraphTraits<Inverse<const Function*> > :
+  public GraphTraits<Inverse<const BasicBlock*> > {
+  static NodeType *getEntryNode(Inverse<const Function *> G) {
+    return &G.Graph->getEntryBlock();
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/COFF.h b/final/include/llvm/Support/COFF.h
new file mode 100644
index 00000000000..673925593e6
--- /dev/null
+++ b/final/include/llvm/Support/COFF.h
@@ -0,0 +1,298 @@
+//===-- llvm/Support/COFF.h -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an definitions used in Windows COFF Files.
+//
+// Structures and enums defined within this file where created using
+// information from Microsoft's publicly available PE/COFF format document:
+//
+// Microsoft Portable Executable and Common Object File Format Specification
+// Revision 8.1 - February 15, 2008
+//
+// As of 5/2/2010, hosted by Microsoft at:
+// http://www.microsoft.com/whdc/system/platform/firmware/pecoff.mspx
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_WIN_COFF_H
+#define LLVM_SUPPORT_WIN_COFF_H
+
+#include "llvm/Support/DataTypes.h"
+#include <cstring>
+
+namespace llvm {
+namespace COFF {
+
+  // Sizes in bytes of various things in the COFF format.
+  enum {
+    HeaderSize     = 20,
+    NameSize       = 8,
+    SymbolSize     = 18,
+    SectionSize    = 40,
+    RelocationSize = 10
+  };
+
+  struct header {
+    uint16_t Machine;
+    uint16_t NumberOfSections;
+    uint32_t TimeDateStamp;
+    uint32_t PointerToSymbolTable;
+    uint32_t NumberOfSymbols;
+    uint16_t SizeOfOptionalHeader;
+    uint16_t Characteristics;
+  };
+
+  enum MachineTypes {
+    IMAGE_FILE_MACHINE_I386 = 0x14C,
+    IMAGE_FILE_MACHINE_AMD64 = 0x8664
+  };
+
+  struct symbol {
+    char     Name[NameSize];
+    uint32_t Value;
+    uint16_t Type;
+    uint8_t  StorageClass;
+    uint16_t SectionNumber;
+    uint8_t  NumberOfAuxSymbols;
+  };
+
+  enum SymbolFlags {
+    SF_TypeMask = 0x0000FFFF,
+    SF_TypeShift = 0,
+
+    SF_ClassMask = 0x00FF0000,
+    SF_ClassShift = 16,
+
+    SF_WeakExternal = 0x01000000
+  };
+
+  enum SymbolSectionNumber {
+    IMAGE_SYM_DEBUG     = -2,
+    IMAGE_SYM_ABSOLUTE  = -1,
+    IMAGE_SYM_UNDEFINED = 0
+  };
+
+  /// Storage class tells where and what the symbol represents
+  enum SymbolStorageClass {
+    IMAGE_SYM_CLASS_END_OF_FUNCTION  = -1,  ///< Physical end of function
+    IMAGE_SYM_CLASS_NULL             = 0,   ///< No symbol
+    IMAGE_SYM_CLASS_AUTOMATIC        = 1,   ///< Stack variable
+    IMAGE_SYM_CLASS_EXTERNAL         = 2,   ///< External symbol
+    IMAGE_SYM_CLASS_STATIC           = 3,   ///< Static
+    IMAGE_SYM_CLASS_REGISTER         = 4,   ///< Register variable
+    IMAGE_SYM_CLASS_EXTERNAL_DEF     = 5,   ///< External definition
+    IMAGE_SYM_CLASS_LABEL            = 6,   ///< Label
+    IMAGE_SYM_CLASS_UNDEFINED_LABEL  = 7,   ///< Undefined label
+    IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8,   ///< Member of structure
+    IMAGE_SYM_CLASS_ARGUMENT         = 9,   ///< Function argument
+    IMAGE_SYM_CLASS_STRUCT_TAG       = 10,  ///< Structure tag
+    IMAGE_SYM_CLASS_MEMBER_OF_UNION  = 11,  ///< Member of union
+    IMAGE_SYM_CLASS_UNION_TAG        = 12,  ///< Union tag
+    IMAGE_SYM_CLASS_TYPE_DEFINITION  = 13,  ///< Type definition
+    IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14,  ///< Undefined static
+    IMAGE_SYM_CLASS_ENUM_TAG         = 15,  ///< Enumeration tag
+    IMAGE_SYM_CLASS_MEMBER_OF_ENUM   = 16,  ///< Member of enumeration
+    IMAGE_SYM_CLASS_REGISTER_PARAM   = 17,  ///< Register parameter
+    IMAGE_SYM_CLASS_BIT_FIELD        = 18,  ///< Bit field
+    /// ".bb" or ".eb" - beginning or end of block
+    IMAGE_SYM_CLASS_BLOCK            = 100,
+    /// ".bf" or ".ef" - beginning or end of function
+    IMAGE_SYM_CLASS_FUNCTION         = 101,
+    IMAGE_SYM_CLASS_END_OF_STRUCT    = 102, ///< End of structure
+    IMAGE_SYM_CLASS_FILE             = 103, ///< File name
+    /// Line number, reformatted as symbol
+    IMAGE_SYM_CLASS_SECTION          = 104,
+    IMAGE_SYM_CLASS_WEAK_EXTERNAL    = 105, ///< Duplicate tag
+    /// External symbol in dmert public lib
+    IMAGE_SYM_CLASS_CLR_TOKEN        = 107
+  };
+
+  enum SymbolBaseType {
+    IMAGE_SYM_TYPE_NULL   = 0,  ///< No type information or unknown base type.
+    IMAGE_SYM_TYPE_VOID   = 1,  ///< Used with void pointers and functions.
+    IMAGE_SYM_TYPE_CHAR   = 2,  ///< A character (signed byte).
+    IMAGE_SYM_TYPE_SHORT  = 3,  ///< A 2-byte signed integer.
+    IMAGE_SYM_TYPE_INT    = 4,  ///< A natural integer type on the target.
+    IMAGE_SYM_TYPE_LONG   = 5,  ///< A 4-byte signed integer.
+    IMAGE_SYM_TYPE_FLOAT  = 6,  ///< A 4-byte floating-point number.
+    IMAGE_SYM_TYPE_DOUBLE = 7,  ///< An 8-byte floating-point number.
+    IMAGE_SYM_TYPE_STRUCT = 8,  ///< A structure.
+    IMAGE_SYM_TYPE_UNION  = 9,  ///< An union.
+    IMAGE_SYM_TYPE_ENUM   = 10, ///< An enumerated type.
+    IMAGE_SYM_TYPE_MOE    = 11, ///< A member of enumeration (a specific value).
+    IMAGE_SYM_TYPE_BYTE   = 12, ///< A byte; unsigned 1-byte integer.
+    IMAGE_SYM_TYPE_WORD   = 13, ///< A word; unsigned 2-byte integer.
+    IMAGE_SYM_TYPE_UINT   = 14, ///< An unsigned integer of natural size.
+    IMAGE_SYM_TYPE_DWORD  = 15  ///< An unsigned 4-byte integer.
+  };
+
+  enum SymbolComplexType {
+    IMAGE_SYM_DTYPE_NULL     = 0, ///< No complex type; simple scalar variable.
+    IMAGE_SYM_DTYPE_POINTER  = 1, ///< A pointer to base type.
+    IMAGE_SYM_DTYPE_FUNCTION = 2, ///< A function that returns a base type.
+    IMAGE_SYM_DTYPE_ARRAY    = 3, ///< An array of base type.
+
+    /// Type is formed as (base + (derived << SCT_COMPLEX_TYPE_SHIFT))
+    SCT_COMPLEX_TYPE_SHIFT   = 4
+  };
+
+  struct section {
+    char     Name[NameSize];
+    uint32_t VirtualSize;
+    uint32_t VirtualAddress;
+    uint32_t SizeOfRawData;
+    uint32_t PointerToRawData;
+    uint32_t PointerToRelocations;
+    uint32_t PointerToLineNumbers;
+    uint16_t NumberOfRelocations;
+    uint16_t NumberOfLineNumbers;
+    uint32_t Characteristics;
+  };
+
+  enum SectionCharacteristics {
+    IMAGE_SCN_TYPE_NO_PAD            = 0x00000008,
+    IMAGE_SCN_CNT_CODE               = 0x00000020,
+    IMAGE_SCN_CNT_INITIALIZED_DATA   = 0x00000040,
+    IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080,
+    IMAGE_SCN_LNK_OTHER              = 0x00000100,
+    IMAGE_SCN_LNK_INFO               = 0x00000200,
+    IMAGE_SCN_LNK_REMOVE             = 0x00000800,
+    IMAGE_SCN_LNK_COMDAT             = 0x00001000,
+    IMAGE_SCN_GPREL                  = 0x00008000,
+    IMAGE_SCN_MEM_PURGEABLE          = 0x00020000,
+    IMAGE_SCN_MEM_16BIT              = 0x00020000,
+    IMAGE_SCN_MEM_LOCKED             = 0x00040000,
+    IMAGE_SCN_MEM_PRELOAD            = 0x00080000,
+    IMAGE_SCN_ALIGN_1BYTES           = 0x00100000,
+    IMAGE_SCN_ALIGN_2BYTES           = 0x00200000,
+    IMAGE_SCN_ALIGN_4BYTES           = 0x00300000,
+    IMAGE_SCN_ALIGN_8BYTES           = 0x00400000,
+    IMAGE_SCN_ALIGN_16BYTES          = 0x00500000,
+    IMAGE_SCN_ALIGN_32BYTES          = 0x00600000,
+    IMAGE_SCN_ALIGN_64BYTES          = 0x00700000,
+    IMAGE_SCN_ALIGN_128BYTES         = 0x00800000,
+    IMAGE_SCN_ALIGN_256BYTES         = 0x00900000,
+    IMAGE_SCN_ALIGN_512BYTES         = 0x00A00000,
+    IMAGE_SCN_ALIGN_1024BYTES        = 0x00B00000,
+    IMAGE_SCN_ALIGN_2048BYTES        = 0x00C00000,
+    IMAGE_SCN_ALIGN_4096BYTES        = 0x00D00000,
+    IMAGE_SCN_ALIGN_8192BYTES        = 0x00E00000,
+    IMAGE_SCN_LNK_NRELOC_OVFL        = 0x01000000,
+    IMAGE_SCN_MEM_DISCARDABLE        = 0x02000000,
+    IMAGE_SCN_MEM_NOT_CACHED         = 0x04000000,
+    IMAGE_SCN_MEM_NOT_PAGED          = 0x08000000,
+    IMAGE_SCN_MEM_SHARED             = 0x10000000,
+    IMAGE_SCN_MEM_EXECUTE            = 0x20000000,
+    IMAGE_SCN_MEM_READ               = 0x40000000,
+    IMAGE_SCN_MEM_WRITE              = 0x80000000
+  };
+
+  struct relocation {
+    uint32_t VirtualAddress;
+    uint32_t SymbolTableIndex;
+    uint16_t Type;
+  };
+
+  enum RelocationTypeX86 {
+    IMAGE_REL_I386_ABSOLUTE = 0x0000,
+    IMAGE_REL_I386_DIR16    = 0x0001,
+    IMAGE_REL_I386_REL16    = 0x0002,
+    IMAGE_REL_I386_DIR32    = 0x0006,
+    IMAGE_REL_I386_DIR32NB  = 0x0007,
+    IMAGE_REL_I386_SEG12    = 0x0009,
+    IMAGE_REL_I386_SECTION  = 0x000A,
+    IMAGE_REL_I386_SECREL   = 0x000B,
+    IMAGE_REL_I386_TOKEN    = 0x000C,
+    IMAGE_REL_I386_SECREL7  = 0x000D,
+    IMAGE_REL_I386_REL32    = 0x0014,
+
+    IMAGE_REL_AMD64_ABSOLUTE  = 0x0000,
+    IMAGE_REL_AMD64_ADDR64    = 0x0001,
+    IMAGE_REL_AMD64_ADDR32    = 0x0002,
+    IMAGE_REL_AMD64_ADDR32NB  = 0x0003,
+    IMAGE_REL_AMD64_REL32     = 0x0004,
+    IMAGE_REL_AMD64_REL32_1   = 0x0005,
+    IMAGE_REL_AMD64_REL32_2   = 0x0006,
+    IMAGE_REL_AMD64_REL32_3   = 0x0007,
+    IMAGE_REL_AMD64_REL32_4   = 0x0008,
+    IMAGE_REL_AMD64_REL32_5   = 0x0009,
+    IMAGE_REL_AMD64_SECTION   = 0x000A,
+    IMAGE_REL_AMD64_SECREL    = 0x000B,
+    IMAGE_REL_AMD64_SECREL7   = 0x000C,
+    IMAGE_REL_AMD64_TOKEN     = 0x000D,
+    IMAGE_REL_AMD64_SREL32    = 0x000E,
+    IMAGE_REL_AMD64_PAIR      = 0x000F,
+    IMAGE_REL_AMD64_SSPAN32   = 0x0010
+  };
+
+  enum COMDATType {
+    IMAGE_COMDAT_SELECT_NODUPLICATES = 1,
+    IMAGE_COMDAT_SELECT_ANY,
+    IMAGE_COMDAT_SELECT_SAME_SIZE,
+    IMAGE_COMDAT_SELECT_EXACT_MATCH,
+    IMAGE_COMDAT_SELECT_ASSOCIATIVE,
+    IMAGE_COMDAT_SELECT_LARGEST
+  };
+
+  // Auxiliary Symbol Formats
+  struct AuxiliaryFunctionDefinition {
+    uint32_t TagIndex;
+    uint32_t TotalSize;
+    uint32_t PointerToLinenumber;
+    uint32_t PointerToNextFunction;
+    uint8_t  unused[2];
+  };
+
+  struct AuxiliarybfAndefSymbol {
+    uint8_t  unused1[4];
+    uint16_t Linenumber;
+    uint8_t  unused2[6];
+    uint32_t PointerToNextFunction;
+    uint8_t  unused3[2];
+  };
+
+  struct AuxiliaryWeakExternal {
+    uint32_t TagIndex;
+    uint32_t Characteristics;
+    uint8_t  unused[10];
+  };
+
+  /// These are not documented in the spec, but are located in WinNT.h.
+  enum WeakExternalCharacteristics {
+    IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY = 1,
+    IMAGE_WEAK_EXTERN_SEARCH_LIBRARY   = 2,
+    IMAGE_WEAK_EXTERN_SEARCH_ALIAS     = 3
+  };
+
+  struct AuxiliaryFile {
+    uint8_t FileName[18];
+  };
+
+  struct AuxiliarySectionDefinition {
+    uint32_t Length;
+    uint16_t NumberOfRelocations;
+    uint16_t NumberOfLinenumbers;
+    uint32_t CheckSum;
+    uint16_t Number;
+    uint8_t  Selection;
+    uint8_t  unused[3];
+  };
+
+  union Auxiliary {
+    AuxiliaryFunctionDefinition FunctionDefinition;
+    AuxiliarybfAndefSymbol      bfAndefSymbol;
+    AuxiliaryWeakExternal       WeakExternal;
+    AuxiliaryFile               File;
+    AuxiliarySectionDefinition  SectionDefinition;
+  };
+
+} // End namespace llvm.
+} // End namespace COFF.
+
+#endif
diff --git a/final/include/llvm/Support/CallSite.h b/final/include/llvm/Support/CallSite.h
new file mode 100644
index 00000000000..8a998a8cd0d
--- /dev/null
+++ b/final/include/llvm/Support/CallSite.h
@@ -0,0 +1,301 @@
+//===-- llvm/Support/CallSite.h - Abstract Call & Invoke instrs -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CallSite class, which is a handy wrapper for code that
+// wants to treat Call and Invoke instructions in a generic way. When in non-
+// mutation context (e.g. an analysis) ImmutableCallSite should be used.
+// Finally, when some degree of customization is necessary between these two
+// extremes, CallSiteBase<> can be supplied with fine-tuned parameters.
+//
+// NOTE: These classes are supposed to have "value semantics". So they should be
+// passed by value, not by reference; they should not be "new"ed or "delete"d.
+// They are efficiently copyable, assignable and constructable, with cost
+// equivalent to copying a pointer (notice that they have only a single data
+// member). The internal representation carries a flag which indicates which of
+// the two variants is enclosed. This allows for cheaper checks when various
+// accessors of CallSite are employed.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CALLSITE_H
+#define LLVM_SUPPORT_CALLSITE_H
+
+#include "llvm/Attributes.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Instructions.h"
+
+namespace llvm {
+
+class CallInst;
+class InvokeInst;
+
+template <typename FunTy = const Function,
+          typename ValTy = const Value,
+          typename UserTy = const User,
+          typename InstrTy = const Instruction,
+          typename CallTy = const CallInst,
+          typename InvokeTy = const InvokeInst,
+          typename IterTy = User::const_op_iterator>
+class CallSiteBase {
+protected:
+  PointerIntPair<InstrTy*, 1, bool> I;
+public:
+  CallSiteBase() : I(0, false) {}
+  CallSiteBase(CallTy *CI) : I(CI, true) { assert(CI); }
+  CallSiteBase(InvokeTy *II) : I(II, false) { assert(II); }
+  CallSiteBase(ValTy *II) { *this = get(II); }
+protected:
+  /// CallSiteBase::get - This static method is sort of like a constructor.  It
+  /// will create an appropriate call site for a Call or Invoke instruction, but
+  /// it can also create a null initialized CallSiteBase object for something
+  /// which is NOT a call site.
+  ///
+  static CallSiteBase get(ValTy *V) {
+    if (InstrTy *II = dyn_cast<InstrTy>(V)) {
+      if (II->getOpcode() == Instruction::Call)
+        return CallSiteBase(static_cast<CallTy*>(II));
+      else if (II->getOpcode() == Instruction::Invoke)
+        return CallSiteBase(static_cast<InvokeTy*>(II));
+    }
+    return CallSiteBase();
+  }
+public:
+  /// isCall - true if a CallInst is enclosed.
+  /// Note that !isCall() does not mean it is an InvokeInst enclosed,
+  /// it also could signify a NULL Instruction pointer.
+  bool isCall() const { return I.getInt(); }
+
+  /// isInvoke - true if a InvokeInst is enclosed.
+  ///
+  bool isInvoke() const { return getInstruction() && !I.getInt(); }
+
+  InstrTy *getInstruction() const { return I.getPointer(); }
+  InstrTy *operator->() const { return I.getPointer(); }
+  operator bool() const { return I.getPointer(); }
+
+  /// getCalledValue - Return the pointer to function that is being called...
+  ///
+  ValTy *getCalledValue() const {
+    assert(getInstruction() && "Not a call or invoke instruction!");
+    return *getCallee();
+  }
+
+  /// getCalledFunction - Return the function being called if this is a direct
+  /// call, otherwise return null (if it's an indirect call).
+  ///
+  FunTy *getCalledFunction() const {
+    return dyn_cast<FunTy>(getCalledValue());
+  }
+
+  /// setCalledFunction - Set the callee to the specified value...
+  ///
+  void setCalledFunction(Value *V) {
+    assert(getInstruction() && "Not a call or invoke instruction!");
+    *getCallee() = V;
+  }
+
+  /// isCallee - Determine whether the passed iterator points to the
+  /// callee operand's Use.
+  ///
+  bool isCallee(value_use_iterator<UserTy> UI) const {
+    return getCallee() == &UI.getUse();
+  }
+
+  ValTy *getArgument(unsigned ArgNo) const {
+    assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!");
+    return *(arg_begin() + ArgNo);
+  }
+
+  void setArgument(unsigned ArgNo, Value* newVal) {
+    assert(getInstruction() && "Not a call or invoke instruction!");
+    assert(arg_begin() + ArgNo < arg_end() && "Argument # out of range!");
+    getInstruction()->setOperand(ArgNo, newVal);
+  }
+
+  /// Given a value use iterator, returns the argument that corresponds to it.
+  /// Iterator must actually correspond to an argument.
+  unsigned getArgumentNo(value_use_iterator<UserTy> I) const {
+    assert(getInstruction() && "Not a call or invoke instruction!");
+    assert(arg_begin() <= &I.getUse() && &I.getUse() < arg_end()
+           && "Argument # out of range!");
+    return &I.getUse() - arg_begin();
+  }
+
+  /// arg_iterator - The type of iterator to use when looping over actual
+  /// arguments at this call site...
+  typedef IterTy arg_iterator;
+
+  /// arg_begin/arg_end - Return iterators corresponding to the actual argument
+  /// list for a call site.
+  IterTy arg_begin() const {
+    assert(getInstruction() && "Not a call or invoke instruction!");
+    // Skip non-arguments
+    return (*this)->op_begin();
+  }
+
+  IterTy arg_end() const { return (*this)->op_end() - getArgumentEndOffset(); }
+  bool arg_empty() const { return arg_end() == arg_begin(); }
+  unsigned arg_size() const { return unsigned(arg_end() - arg_begin()); }
+  
+  /// getType - Return the type of the instruction that generated this call site
+  ///
+  const Type *getType() const { return (*this)->getType(); }
+
+  /// getCaller - Return the caller function for this call site
+  ///
+  FunTy *getCaller() const { return (*this)->getParent()->getParent(); }
+
+#define CALLSITE_DELEGATE_GETTER(METHOD) \
+  InstrTy *II = getInstruction();    \
+  return isCall()                        \
+    ? cast<CallInst>(II)->METHOD         \
+    : cast<InvokeInst>(II)->METHOD
+
+#define CALLSITE_DELEGATE_SETTER(METHOD) \
+  InstrTy *II = getInstruction();    \
+  if (isCall())                          \
+    cast<CallInst>(II)->METHOD;          \
+  else                                   \
+    cast<InvokeInst>(II)->METHOD
+
+  /// getCallingConv/setCallingConv - get or set the calling convention of the
+  /// call.
+  CallingConv::ID getCallingConv() const {
+    CALLSITE_DELEGATE_GETTER(getCallingConv());
+  }
+  void setCallingConv(CallingConv::ID CC) {
+    CALLSITE_DELEGATE_SETTER(setCallingConv(CC));
+  }
+
+  /// getAttributes/setAttributes - get or set the parameter attributes of
+  /// the call.
+  const AttrListPtr &getAttributes() const {
+    CALLSITE_DELEGATE_GETTER(getAttributes());
+  }
+  void setAttributes(const AttrListPtr &PAL) {
+    CALLSITE_DELEGATE_SETTER(setAttributes(PAL));
+  }
+
+  /// paramHasAttr - whether the call or the callee has the given attribute.
+  bool paramHasAttr(uint16_t i, Attributes attr) const {
+    CALLSITE_DELEGATE_GETTER(paramHasAttr(i, attr));
+  }
+
+  /// @brief Extract the alignment for a call or parameter (0=unknown).
+  uint16_t getParamAlignment(uint16_t i) const {
+    CALLSITE_DELEGATE_GETTER(getParamAlignment(i));
+  }
+
+  /// @brief Return true if the call should not be inlined.
+  bool isNoInline() const {
+    CALLSITE_DELEGATE_GETTER(isNoInline());
+  }
+  void setIsNoInline(bool Value = true) {
+    CALLSITE_DELEGATE_SETTER(setIsNoInline(Value));
+  }
+
+  /// @brief Determine if the call does not access memory.
+  bool doesNotAccessMemory() const {
+    CALLSITE_DELEGATE_GETTER(doesNotAccessMemory());
+  }
+  void setDoesNotAccessMemory(bool doesNotAccessMemory = true) {
+    CALLSITE_DELEGATE_SETTER(setDoesNotAccessMemory(doesNotAccessMemory));
+  }
+
+  /// @brief Determine if the call does not access or only reads memory.
+  bool onlyReadsMemory() const {
+    CALLSITE_DELEGATE_GETTER(onlyReadsMemory());
+  }
+  void setOnlyReadsMemory(bool onlyReadsMemory = true) {
+    CALLSITE_DELEGATE_SETTER(setOnlyReadsMemory(onlyReadsMemory));
+  }
+
+  /// @brief Determine if the call cannot return.
+  bool doesNotReturn() const {
+    CALLSITE_DELEGATE_GETTER(doesNotReturn());
+  }
+  void setDoesNotReturn(bool doesNotReturn = true) {
+    CALLSITE_DELEGATE_SETTER(setDoesNotReturn(doesNotReturn));
+  }
+
+  /// @brief Determine if the call cannot unwind.
+  bool doesNotThrow() const {
+    CALLSITE_DELEGATE_GETTER(doesNotThrow());
+  }
+  void setDoesNotThrow(bool doesNotThrow = true) {
+    CALLSITE_DELEGATE_SETTER(setDoesNotThrow(doesNotThrow));
+  }
+
+#undef CALLSITE_DELEGATE_GETTER
+#undef CALLSITE_DELEGATE_SETTER
+
+  /// hasArgument - Returns true if this CallSite passes the given Value* as an
+  /// argument to the called function.
+  bool hasArgument(const Value *Arg) const {
+    for (arg_iterator AI = this->arg_begin(), E = this->arg_end(); AI != E;
+         ++AI)
+      if (AI->get() == Arg)
+        return true;
+    return false;
+  }
+
+private:
+  unsigned getArgumentEndOffset() const {
+    if (isCall())
+      return 1; // Skip Callee
+    else
+      return 3; // Skip BB, BB, Callee
+  }
+
+  IterTy getCallee() const {
+    if (isCall()) // Skip Callee
+      return cast<CallInst>(getInstruction())->op_end() - 1;
+    else // Skip BB, BB, Callee
+      return cast<InvokeInst>(getInstruction())->op_end() - 3;
+  }
+};
+
+class CallSite : public CallSiteBase<Function, Value, User, Instruction,
+                                     CallInst, InvokeInst, User::op_iterator> {
+  typedef CallSiteBase<Function, Value, User, Instruction,
+                       CallInst, InvokeInst, User::op_iterator> Base;
+public:
+  CallSite() {}
+  CallSite(Base B) : Base(B) {}
+  CallSite(Value* V) : Base(V) {}
+  CallSite(CallInst *CI) : Base(CI) {}
+  CallSite(InvokeInst *II) : Base(II) {}
+  CallSite(Instruction *II) : Base(II) {}
+
+  bool operator==(const CallSite &CS) const { return I == CS.I; }
+  bool operator!=(const CallSite &CS) const { return I != CS.I; }
+  bool operator<(const CallSite &CS) const {
+    return getInstruction() < CS.getInstruction();
+  }
+
+private:
+  User::op_iterator getCallee() const;
+};
+
+/// ImmutableCallSite - establish a view to a call site for examination
+class ImmutableCallSite : public CallSiteBase<> {
+  typedef CallSiteBase<> Base;
+public:
+  ImmutableCallSite(const Value* V) : Base(V) {}
+  ImmutableCallSite(const CallInst *CI) : Base(CI) {}
+  ImmutableCallSite(const InvokeInst *II) : Base(II) {}
+  ImmutableCallSite(const Instruction *II) : Base(II) {}
+  ImmutableCallSite(CallSite CS) : Base(CS.getInstruction()) {}
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/Casting.h b/final/include/llvm/Support/Casting.h
new file mode 100644
index 00000000000..6bb98064382
--- /dev/null
+++ b/final/include/llvm/Support/Casting.h
@@ -0,0 +1,241 @@
+//===-- llvm/Support/Casting.h - Allow flexible, checked, casts -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the isa<X>(), cast<X>(), dyn_cast<X>(), cast_or_null<X>(),
+// and dyn_cast_or_null<X>() templates.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CASTING_H
+#define LLVM_SUPPORT_CASTING_H
+
+#include <cassert>
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//                          isa<x> Support Templates
+//===----------------------------------------------------------------------===//
+
+template<typename FromCl> struct isa_impl_cl;
+
+// Define a template that can be specialized by smart pointers to reflect the
+// fact that they are automatically dereferenced, and are not involved with the
+// template selection process...  the default implementation is a noop.
+//
+template<typename From> struct simplify_type {
+  typedef       From SimpleType;        // The real type this represents...
+
+  // An accessor to get the real value...
+  static SimpleType &getSimplifiedValue(From &Val) { return Val; }
+};
+
+template<typename From> struct simplify_type<const From> {
+  typedef const From SimpleType;
+  static SimpleType &getSimplifiedValue(const From &Val) {
+    return simplify_type<From>::getSimplifiedValue(static_cast<From&>(Val));
+  }
+};
+
+
+// isa<X> - Return true if the parameter to the template is an instance of the
+// template type argument.  Used like this:
+//
+//  if (isa<Type*>(myVal)) { ... }
+//
+template <typename To, typename From>
+struct isa_impl {
+  static inline bool doit(const From &Val) {
+    return To::classof(&Val);
+  }
+};
+
+template<typename To, typename From, typename SimpleType>
+struct isa_impl_wrap {
+  // When From != SimplifiedType, we can simplify the type some more by using
+  // the simplify_type template.
+  static bool doit(const From &Val) {
+    return isa_impl_cl<const SimpleType>::template
+                    isa<To>(simplify_type<const From>::getSimplifiedValue(Val));
+  }
+};
+
+template<typename To, typename FromTy>
+struct isa_impl_wrap<To, const FromTy, const FromTy> {
+  // When From == SimpleType, we are as simple as we are going to get.
+  static bool doit(const FromTy &Val) {
+    return isa_impl<To,FromTy>::doit(Val);
+  }
+};
+
+// isa_impl_cl - Use class partial specialization to transform types to a single
+// canonical form for isa_impl.
+//
+template<typename FromCl>
+struct isa_impl_cl {
+  template<class ToCl>
+  static bool isa(const FromCl &Val) {
+    return isa_impl_wrap<ToCl,const FromCl,
+                   typename simplify_type<const FromCl>::SimpleType>::doit(Val);
+  }
+};
+
+// Specialization used to strip const qualifiers off of the FromCl type...
+template<typename FromCl>
+struct isa_impl_cl<const FromCl> {
+  template<class ToCl>
+  static bool isa(const FromCl &Val) {
+    return isa_impl_cl<FromCl>::template isa<ToCl>(Val);
+  }
+};
+
+// Define pointer traits in terms of base traits...
+template<class FromCl>
+struct isa_impl_cl<FromCl*> {
+  template<class ToCl>
+  static bool isa(FromCl *Val) {
+    return isa_impl_cl<FromCl>::template isa<ToCl>(*Val);
+  }
+};
+
+// Define reference traits in terms of base traits...
+template<class FromCl>
+struct isa_impl_cl<FromCl&> {
+  template<class ToCl>
+  static bool isa(FromCl &Val) {
+    return isa_impl_cl<FromCl>::template isa<ToCl>(&Val);
+  }
+};
+
+template <class X, class Y>
+inline bool isa(const Y &Val) {
+  return isa_impl_cl<Y>::template isa<X>(Val);
+}
+
+//===----------------------------------------------------------------------===//
+//                          cast<x> Support Templates
+//===----------------------------------------------------------------------===//
+
+template<class To, class From> struct cast_retty;
+
+
+// Calculate what type the 'cast' function should return, based on a requested
+// type of To and a source type of From.
+template<class To, class From> struct cast_retty_impl {
+  typedef To& ret_type;         // Normal case, return Ty&
+};
+template<class To, class From> struct cast_retty_impl<To, const From> {
+  typedef const To &ret_type;   // Normal case, return Ty&
+};
+
+template<class To, class From> struct cast_retty_impl<To, From*> {
+  typedef To* ret_type;         // Pointer arg case, return Ty*
+};
+
+template<class To, class From> struct cast_retty_impl<To, const From*> {
+  typedef const To* ret_type;   // Constant pointer arg case, return const Ty*
+};
+
+template<class To, class From> struct cast_retty_impl<To, const From*const> {
+  typedef const To* ret_type;   // Constant pointer arg case, return const Ty*
+};
+
+
+template<class To, class From, class SimpleFrom>
+struct cast_retty_wrap {
+  // When the simplified type and the from type are not the same, use the type
+  // simplifier to reduce the type, then reuse cast_retty_impl to get the
+  // resultant type.
+  typedef typename cast_retty<To, SimpleFrom>::ret_type ret_type;
+};
+
+template<class To, class FromTy>
+struct cast_retty_wrap<To, FromTy, FromTy> {
+  // When the simplified type is equal to the from type, use it directly.
+  typedef typename cast_retty_impl<To,FromTy>::ret_type ret_type;
+};
+
+template<class To, class From>
+struct cast_retty {
+  typedef typename cast_retty_wrap<To, From,
+                   typename simplify_type<From>::SimpleType>::ret_type ret_type;
+};
+
+// Ensure the non-simple values are converted using the simplify_type template
+// that may be specialized by smart pointers...
+//
+template<class To, class From, class SimpleFrom> struct cast_convert_val {
+  // This is not a simple type, use the template to simplify it...
+  static typename cast_retty<To, From>::ret_type doit(const From &Val) {
+    return cast_convert_val<To, SimpleFrom,
+      typename simplify_type<SimpleFrom>::SimpleType>::doit(
+                          simplify_type<From>::getSimplifiedValue(Val));
+  }
+};
+
+template<class To, class FromTy> struct cast_convert_val<To,FromTy,FromTy> {
+  // This _is_ a simple type, just cast it.
+  static typename cast_retty<To, FromTy>::ret_type doit(const FromTy &Val) {
+    typename cast_retty<To, FromTy>::ret_type Res2
+     = (typename cast_retty<To, FromTy>::ret_type)const_cast<FromTy&>(Val);
+    return Res2;
+  }
+};
+
+
+
+// cast<X> - Return the argument parameter cast to the specified type.  This
+// casting operator asserts that the type is correct, so it does not return null
+// on failure.  But it will correctly return NULL when the input is NULL.
+// Used Like this:
+//
+//  cast<Instruction>(myVal)->getParent()
+//
+template <class X, class Y>
+inline typename cast_retty<X, Y>::ret_type cast(const Y &Val) {
+  assert(isa<X>(Val) && "cast<Ty>() argument of incompatible type!");
+  return cast_convert_val<X, Y,
+                          typename simplify_type<Y>::SimpleType>::doit(Val);
+}
+
+// cast_or_null<X> - Functionally identical to cast, except that a null value is
+// accepted.
+//
+template <class X, class Y>
+inline typename cast_retty<X, Y*>::ret_type cast_or_null(Y *Val) {
+  if (Val == 0) return 0;
+  assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!");
+  return cast<X>(Val);
+}
+
+
+// dyn_cast<X> - Return the argument parameter cast to the specified type.  This
+// casting operator returns null if the argument is of the wrong type, so it can
+// be used to test for a type as well as cast if successful.  This should be
+// used in the context of an if statement like this:
+//
+//  if (const Instruction *I = dyn_cast<Instruction>(myVal)) { ... }
+//
+
+template <class X, class Y>
+inline typename cast_retty<X, Y>::ret_type dyn_cast(const Y &Val) {
+  return isa<X>(Val) ? cast<X, Y>(Val) : 0;
+}
+
+// dyn_cast_or_null<X> - Functionally identical to dyn_cast, except that a null
+// value is accepted.
+//
+template <class X, class Y>
+inline typename cast_retty<X, Y*>::ret_type dyn_cast_or_null(Y *Val) {
+  return (Val && isa<X>(Val)) ? cast<X>(Val) : 0;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/CommandLine.h b/final/include/llvm/Support/CommandLine.h
new file mode 100644
index 00000000000..9ae3d6af32e
--- /dev/null
+++ b/final/include/llvm/Support/CommandLine.h
@@ -0,0 +1,1391 @@
+//===- llvm/Support/CommandLine.h - Command line handler --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements a command line argument processor that is useful when
+// creating a tool.  It provides a simple, minimalistic interface that is easily
+// extensible and supports nonlocal (library) command line options.
+//
+// Note that rather than trying to figure out what this code does, you should
+// read the library documentation located in docs/CommandLine.html or looks at
+// the many example usages in tools/*/*.cpp
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_COMMANDLINE_H
+#define LLVM_SUPPORT_COMMANDLINE_H
+
+#include "llvm/Support/type_traits.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include <cassert>
+#include <climits>
+#include <cstdarg>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+/// cl Namespace - This namespace contains all of the command line option
+/// processing machinery.  It is intentionally a short name to make qualified
+/// usage concise.
+namespace cl {
+
+//===----------------------------------------------------------------------===//
+// ParseCommandLineOptions - Command line option processing entry point.
+//
+void ParseCommandLineOptions(int argc, char **argv,
+                             const char *Overview = 0,
+                             bool ReadResponseFiles = false);
+
+//===----------------------------------------------------------------------===//
+// ParseEnvironmentOptions - Environment variable option processing alternate
+//                           entry point.
+//
+void ParseEnvironmentOptions(const char *progName, const char *envvar,
+                             const char *Overview = 0,
+                             bool ReadResponseFiles = false);
+
+///===---------------------------------------------------------------------===//
+/// SetVersionPrinter - Override the default (LLVM specific) version printer
+///                     used to print out the version when --version is given
+///                     on the command line. This allows other systems using the
+///                     CommandLine utilities to print their own version string.
+void SetVersionPrinter(void (*func)());
+
+
+// MarkOptionsChanged - Internal helper function.
+void MarkOptionsChanged();
+
+//===----------------------------------------------------------------------===//
+// Flags permitted to be passed to command line arguments
+//
+
+enum NumOccurrencesFlag {      // Flags for the number of occurrences allowed
+  Optional        = 0x01,      // Zero or One occurrence
+  ZeroOrMore      = 0x02,      // Zero or more occurrences allowed
+  Required        = 0x03,      // One occurrence required
+  OneOrMore       = 0x04,      // One or more occurrences required
+
+  // ConsumeAfter - Indicates that this option is fed anything that follows the
+  // last positional argument required by the application (it is an error if
+  // there are zero positional arguments, and a ConsumeAfter option is used).
+  // Thus, for example, all arguments to LLI are processed until a filename is
+  // found.  Once a filename is found, all of the succeeding arguments are
+  // passed, unprocessed, to the ConsumeAfter option.
+  //
+  ConsumeAfter    = 0x05,
+
+  OccurrencesMask  = 0x07
+};
+
+enum ValueExpected {           // Is a value required for the option?
+  ValueOptional   = 0x08,      // The value can appear... or not
+  ValueRequired   = 0x10,      // The value is required to appear!
+  ValueDisallowed = 0x18,      // A value may not be specified (for flags)
+  ValueMask       = 0x18
+};
+
+enum OptionHidden {            // Control whether -help shows this option
+  NotHidden       = 0x20,      // Option included in -help & -help-hidden
+  Hidden          = 0x40,      // -help doesn't, but -help-hidden does
+  ReallyHidden    = 0x60,      // Neither -help nor -help-hidden show this arg
+  HiddenMask      = 0x60
+};
+
+// Formatting flags - This controls special features that the option might have
+// that cause it to be parsed differently...
+//
+// Prefix - This option allows arguments that are otherwise unrecognized to be
+// matched by options that are a prefix of the actual value.  This is useful for
+// cases like a linker, where options are typically of the form '-lfoo' or
+// '-L../../include' where -l or -L are the actual flags.  When prefix is
+// enabled, and used, the value for the flag comes from the suffix of the
+// argument.
+//
+// Grouping - With this option enabled, multiple letter options are allowed to
+// bunch together with only a single hyphen for the whole group.  This allows
+// emulation of the behavior that ls uses for example: ls -la === ls -l -a
+//
+
+enum FormattingFlags {
+  NormalFormatting = 0x000,     // Nothing special
+  Positional       = 0x080,     // Is a positional argument, no '-' required
+  Prefix           = 0x100,     // Can this option directly prefix its value?
+  Grouping         = 0x180,     // Can this option group with other options?
+  FormattingMask   = 0x180      // Union of the above flags.
+};
+
+enum MiscFlags {               // Miscellaneous flags to adjust argument
+  CommaSeparated     = 0x200,  // Should this cl::list split between commas?
+  PositionalEatsArgs = 0x400,  // Should this positional cl::list eat -args?
+  Sink               = 0x800,  // Should this cl::list eat all unknown options?
+  MiscMask           = 0xE00   // Union of the above flags.
+};
+
+
+
+//===----------------------------------------------------------------------===//
+// Option Base class
+//
+class alias;
+class Option {
+  friend class alias;
+
+  // handleOccurrences - Overriden by subclasses to handle the value passed into
+  // an argument.  Should return true if there was an error processing the
+  // argument and the program should exit.
+  //
+  virtual bool handleOccurrence(unsigned pos, StringRef ArgName,
+                                StringRef Arg) = 0;
+
+  virtual enum ValueExpected getValueExpectedFlagDefault() const {
+    return ValueOptional;
+  }
+
+  // Out of line virtual function to provide home for the class.
+  virtual void anchor();
+
+  int NumOccurrences;     // The number of times specified
+  int Flags;              // Flags for the argument
+  unsigned Position;      // Position of last occurrence of the option
+  unsigned AdditionalVals;// Greater than 0 for multi-valued option.
+  Option *NextRegistered; // Singly linked list of registered options.
+public:
+  const char *ArgStr;     // The argument string itself (ex: "help", "o")
+  const char *HelpStr;    // The descriptive text message for -help
+  const char *ValueStr;   // String describing what the value of this option is
+
+  inline enum NumOccurrencesFlag getNumOccurrencesFlag() const {
+    return static_cast<enum NumOccurrencesFlag>(Flags & OccurrencesMask);
+  }
+  inline enum ValueExpected getValueExpectedFlag() const {
+    int VE = Flags & ValueMask;
+    return VE ? static_cast<enum ValueExpected>(VE)
+              : getValueExpectedFlagDefault();
+  }
+  inline enum OptionHidden getOptionHiddenFlag() const {
+    return static_cast<enum OptionHidden>(Flags & HiddenMask);
+  }
+  inline enum FormattingFlags getFormattingFlag() const {
+    return static_cast<enum FormattingFlags>(Flags & FormattingMask);
+  }
+  inline unsigned getMiscFlags() const {
+    return Flags & MiscMask;
+  }
+  inline unsigned getPosition() const { return Position; }
+  inline unsigned getNumAdditionalVals() const { return AdditionalVals; }
+
+  // hasArgStr - Return true if the argstr != ""
+  bool hasArgStr() const { return ArgStr[0] != 0; }
+
+  //-------------------------------------------------------------------------===
+  // Accessor functions set by OptionModifiers
+  //
+  void setArgStr(const char *S) { ArgStr = S; }
+  void setDescription(const char *S) { HelpStr = S; }
+  void setValueStr(const char *S) { ValueStr = S; }
+
+  void setFlag(unsigned Flag, unsigned FlagMask) {
+    Flags &= ~FlagMask;
+    Flags |= Flag;
+  }
+
+  void setNumOccurrencesFlag(enum NumOccurrencesFlag Val) {
+    setFlag(Val, OccurrencesMask);
+  }
+  void setValueExpectedFlag(enum ValueExpected Val) { setFlag(Val, ValueMask); }
+  void setHiddenFlag(enum OptionHidden Val) { setFlag(Val, HiddenMask); }
+  void setFormattingFlag(enum FormattingFlags V) { setFlag(V, FormattingMask); }
+  void setMiscFlag(enum MiscFlags M) { setFlag(M, M); }
+  void setPosition(unsigned pos) { Position = pos; }
+protected:
+  explicit Option(unsigned DefaultFlags)
+    : NumOccurrences(0), Flags(DefaultFlags | NormalFormatting), Position(0),
+      AdditionalVals(0), NextRegistered(0),
+      ArgStr(""), HelpStr(""), ValueStr("") {
+    assert(getNumOccurrencesFlag() != 0 &&
+           getOptionHiddenFlag() != 0 && "Not all default flags specified!");
+  }
+
+  inline void setNumAdditionalVals(unsigned n) { AdditionalVals = n; }
+public:
+  // addArgument - Register this argument with the commandline system.
+  //
+  void addArgument();
+
+  Option *getNextRegisteredOption() const { return NextRegistered; }
+
+  // Return the width of the option tag for printing...
+  virtual size_t getOptionWidth() const = 0;
+
+  // printOptionInfo - Print out information about this option.  The
+  // to-be-maintained width is specified.
+  //
+  virtual void printOptionInfo(size_t GlobalWidth) const = 0;
+
+  virtual void getExtraOptionNames(SmallVectorImpl<const char*> &) {}
+
+  // addOccurrence - Wrapper around handleOccurrence that enforces Flags.
+  //
+  bool addOccurrence(unsigned pos, StringRef ArgName,
+                     StringRef Value, bool MultiArg = false);
+
+  // Prints option name followed by message.  Always returns true.
+  bool error(const Twine &Message, StringRef ArgName = StringRef());
+
+public:
+  inline int getNumOccurrences() const { return NumOccurrences; }
+  virtual ~Option() {}
+};
+
+
+//===----------------------------------------------------------------------===//
+// Command line option modifiers that can be used to modify the behavior of
+// command line option parsers...
+//
+
+// desc - Modifier to set the description shown in the -help output...
+struct desc {
+  const char *Desc;
+  desc(const char *Str) : Desc(Str) {}
+  void apply(Option &O) const { O.setDescription(Desc); }
+};
+
+// value_desc - Modifier to set the value description shown in the -help
+// output...
+struct value_desc {
+  const char *Desc;
+  value_desc(const char *Str) : Desc(Str) {}
+  void apply(Option &O) const { O.setValueStr(Desc); }
+};
+
+// init - Specify a default (initial) value for the command line argument, if
+// the default constructor for the argument type does not give you what you
+// want.  This is only valid on "opt" arguments, not on "list" arguments.
+//
+template<class Ty>
+struct initializer {
+  const Ty &Init;
+  initializer(const Ty &Val) : Init(Val) {}
+
+  template<class Opt>
+  void apply(Opt &O) const { O.setInitialValue(Init); }
+};
+
+template<class Ty>
+initializer<Ty> init(const Ty &Val) {
+  return initializer<Ty>(Val);
+}
+
+
+// location - Allow the user to specify which external variable they want to
+// store the results of the command line argument processing into, if they don't
+// want to store it in the option itself.
+//
+template<class Ty>
+struct LocationClass {
+  Ty &Loc;
+  LocationClass(Ty &L) : Loc(L) {}
+
+  template<class Opt>
+  void apply(Opt &O) const { O.setLocation(O, Loc); }
+};
+
+template<class Ty>
+LocationClass<Ty> location(Ty &L) { return LocationClass<Ty>(L); }
+
+
+//===----------------------------------------------------------------------===//
+// Enum valued command line option
+//
+#define clEnumVal(ENUMVAL, DESC) #ENUMVAL, int(ENUMVAL), DESC
+#define clEnumValN(ENUMVAL, FLAGNAME, DESC) FLAGNAME, int(ENUMVAL), DESC
+#define clEnumValEnd (reinterpret_cast<void*>(0))
+
+// values - For custom data types, allow specifying a group of values together
+// as the values that go into the mapping that the option handler uses.  Note
+// that the values list must always have a 0 at the end of the list to indicate
+// that the list has ended.
+//
+template<class DataType>
+class ValuesClass {
+  // Use a vector instead of a map, because the lists should be short,
+  // the overhead is less, and most importantly, it keeps them in the order
+  // inserted so we can print our option out nicely.
+  SmallVector<std::pair<const char *, std::pair<int, const char *> >,4> Values;
+  void processValues(va_list Vals);
+public:
+  ValuesClass(const char *EnumName, DataType Val, const char *Desc,
+              va_list ValueArgs) {
+    // Insert the first value, which is required.
+    Values.push_back(std::make_pair(EnumName, std::make_pair(Val, Desc)));
+
+    // Process the varargs portion of the values...
+    while (const char *enumName = va_arg(ValueArgs, const char *)) {
+      DataType EnumVal = static_cast<DataType>(va_arg(ValueArgs, int));
+      const char *EnumDesc = va_arg(ValueArgs, const char *);
+      Values.push_back(std::make_pair(enumName,      // Add value to value map
+                                      std::make_pair(EnumVal, EnumDesc)));
+    }
+  }
+
+  template<class Opt>
+  void apply(Opt &O) const {
+    for (unsigned i = 0, e = static_cast<unsigned>(Values.size());
+         i != e; ++i)
+      O.getParser().addLiteralOption(Values[i].first, Values[i].second.first,
+                                     Values[i].second.second);
+  }
+};
+
+template<class DataType>
+ValuesClass<DataType> END_WITH_NULL values(const char *Arg, DataType Val,
+                                           const char *Desc, ...) {
+    va_list ValueArgs;
+    va_start(ValueArgs, Desc);
+    ValuesClass<DataType> Vals(Arg, Val, Desc, ValueArgs);
+    va_end(ValueArgs);
+    return Vals;
+}
+
+
+//===----------------------------------------------------------------------===//
+// parser class - Parameterizable parser for different data types.  By default,
+// known data types (string, int, bool) have specialized parsers, that do what
+// you would expect.  The default parser, used for data types that are not
+// built-in, uses a mapping table to map specific options to values, which is
+// used, among other things, to handle enum types.
+
+//--------------------------------------------------
+// generic_parser_base - This class holds all the non-generic code that we do
+// not need replicated for every instance of the generic parser.  This also
+// allows us to put stuff into CommandLine.cpp
+//
+struct generic_parser_base {
+  virtual ~generic_parser_base() {}  // Base class should have virtual-dtor
+
+  // getNumOptions - Virtual function implemented by generic subclass to
+  // indicate how many entries are in Values.
+  //
+  virtual unsigned getNumOptions() const = 0;
+
+  // getOption - Return option name N.
+  virtual const char *getOption(unsigned N) const = 0;
+
+  // getDescription - Return description N
+  virtual const char *getDescription(unsigned N) const = 0;
+
+  // Return the width of the option tag for printing...
+  virtual size_t getOptionWidth(const Option &O) const;
+
+  // printOptionInfo - Print out information about this option.  The
+  // to-be-maintained width is specified.
+  //
+  virtual void printOptionInfo(const Option &O, size_t GlobalWidth) const;
+
+  void initialize(Option &O) {
+    // All of the modifiers for the option have been processed by now, so the
+    // argstr field should be stable, copy it down now.
+    //
+    hasArgStr = O.hasArgStr();
+  }
+
+  void getExtraOptionNames(SmallVectorImpl<const char*> &OptionNames) {
+    // If there has been no argstr specified, that means that we need to add an
+    // argument for every possible option.  This ensures that our options are
+    // vectored to us.
+    if (!hasArgStr)
+      for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
+        OptionNames.push_back(getOption(i));
+  }
+
+
+  enum ValueExpected getValueExpectedFlagDefault() const {
+    // If there is an ArgStr specified, then we are of the form:
+    //
+    //    -opt=O2   or   -opt O2  or  -optO2
+    //
+    // In which case, the value is required.  Otherwise if an arg str has not
+    // been specified, we are of the form:
+    //
+    //    -O2 or O2 or -la (where -l and -a are separate options)
+    //
+    // If this is the case, we cannot allow a value.
+    //
+    if (hasArgStr)
+      return ValueRequired;
+    else
+      return ValueDisallowed;
+  }
+
+  // findOption - Return the option number corresponding to the specified
+  // argument string.  If the option is not found, getNumOptions() is returned.
+  //
+  unsigned findOption(const char *Name);
+
+protected:
+  bool hasArgStr;
+};
+
+// Default parser implementation - This implementation depends on having a
+// mapping of recognized options to values of some sort.  In addition to this,
+// each entry in the mapping also tracks a help message that is printed with the
+// command line option for -help.  Because this is a simple mapping parser, the
+// data type can be any unsupported type.
+//
+template <class DataType>
+class parser : public generic_parser_base {
+protected:
+  class OptionInfo {
+  public:
+    OptionInfo(const char *name, DataType v, const char *helpStr) :
+      Name(name), V(v), HelpStr(helpStr) {}
+    const char *Name;
+    DataType V;
+    const char *HelpStr;
+  };
+  SmallVector<OptionInfo, 8> Values;
+public:
+  typedef DataType parser_data_type;
+
+  // Implement virtual functions needed by generic_parser_base
+  unsigned getNumOptions() const { return unsigned(Values.size()); }
+  const char *getOption(unsigned N) const { return Values[N].Name; }
+  const char *getDescription(unsigned N) const {
+    return Values[N].HelpStr;
+  }
+
+  // parse - Return true on error.
+  bool parse(Option &O, StringRef ArgName, StringRef Arg, DataType &V) {
+    StringRef ArgVal;
+    if (hasArgStr)
+      ArgVal = Arg;
+    else
+      ArgVal = ArgName;
+
+    for (unsigned i = 0, e = static_cast<unsigned>(Values.size());
+         i != e; ++i)
+      if (Values[i].Name == ArgVal) {
+        V = Values[i].V;
+        return false;
+      }
+
+    return O.error("Cannot find option named '" + ArgVal + "'!");
+  }
+
+  /// addLiteralOption - Add an entry to the mapping table.
+  ///
+  template <class DT>
+  void addLiteralOption(const char *Name, const DT &V, const char *HelpStr) {
+    assert(findOption(Name) == Values.size() && "Option already exists!");
+    OptionInfo X(Name, static_cast<DataType>(V), HelpStr);
+    Values.push_back(X);
+    MarkOptionsChanged();
+  }
+
+  /// removeLiteralOption - Remove the specified option.
+  ///
+  void removeLiteralOption(const char *Name) {
+    unsigned N = findOption(Name);
+    assert(N != Values.size() && "Option not found!");
+    Values.erase(Values.begin()+N);
+  }
+};
+
+//--------------------------------------------------
+// basic_parser - Super class of parsers to provide boilerplate code
+//
+class basic_parser_impl {  // non-template implementation of basic_parser<t>
+public:
+  virtual ~basic_parser_impl() {}
+
+  enum ValueExpected getValueExpectedFlagDefault() const {
+    return ValueRequired;
+  }
+
+  void getExtraOptionNames(SmallVectorImpl<const char*> &) {}
+
+  void initialize(Option &) {}
+
+  // Return the width of the option tag for printing...
+  size_t getOptionWidth(const Option &O) const;
+
+  // printOptionInfo - Print out information about this option.  The
+  // to-be-maintained width is specified.
+  //
+  void printOptionInfo(const Option &O, size_t GlobalWidth) const;
+
+  // getValueName - Overload in subclass to provide a better default value.
+  virtual const char *getValueName() const { return "value"; }
+
+  // An out-of-line virtual method to provide a 'home' for this class.
+  virtual void anchor();
+};
+
+// basic_parser - The real basic parser is just a template wrapper that provides
+// a typedef for the provided data type.
+//
+template<class DataType>
+class basic_parser : public basic_parser_impl {
+public:
+  typedef DataType parser_data_type;
+};
+
+//--------------------------------------------------
+// parser<bool>
+//
+template<>
+class parser<bool> : public basic_parser<bool> {
+  const char *ArgStr;
+public:
+
+  // parse - Return true on error.
+  bool parse(Option &O, StringRef ArgName, StringRef Arg, bool &Val);
+
+  template <class Opt>
+  void initialize(Opt &O) {
+    ArgStr = O.ArgStr;
+  }
+
+  enum ValueExpected getValueExpectedFlagDefault() const {
+    return ValueOptional;
+  }
+
+  // getValueName - Do not print =<value> at all.
+  virtual const char *getValueName() const { return 0; }
+
+  // An out-of-line virtual method to provide a 'home' for this class.
+  virtual void anchor();
+};
+
+EXTERN_TEMPLATE_INSTANTIATION(class basic_parser<bool>);
+
+//--------------------------------------------------
+// parser<boolOrDefault>
+enum boolOrDefault { BOU_UNSET, BOU_TRUE, BOU_FALSE };
+template<>
+class parser<boolOrDefault> : public basic_parser<boolOrDefault> {
+public:
+  // parse - Return true on error.
+  bool parse(Option &O, StringRef ArgName, StringRef Arg, boolOrDefault &Val);
+
+  enum ValueExpected getValueExpectedFlagDefault() const {
+    return ValueOptional;
+  }
+
+  // getValueName - Do not print =<value> at all.
+  virtual const char *getValueName() const { return 0; }
+
+  // An out-of-line virtual method to provide a 'home' for this class.
+  virtual void anchor();
+};
+
+EXTERN_TEMPLATE_INSTANTIATION(class basic_parser<boolOrDefault>);
+
+//--------------------------------------------------
+// parser<int>
+//
+template<>
+class parser<int> : public basic_parser<int> {
+public:
+  // parse - Return true on error.
+  bool parse(Option &O, StringRef ArgName, StringRef Arg, int &Val);
+
+  // getValueName - Overload in subclass to provide a better default value.
+  virtual const char *getValueName() const { return "int"; }
+
+  // An out-of-line virtual method to provide a 'home' for this class.
+  virtual void anchor();
+};
+
+EXTERN_TEMPLATE_INSTANTIATION(class basic_parser<int>);
+
+
+//--------------------------------------------------
+// parser<unsigned>
+//
+template<>
+class parser<unsigned> : public basic_parser<unsigned> {
+public:
+  // parse - Return true on error.
+  bool parse(Option &O, StringRef ArgName, StringRef Arg, unsigned &Val);
+
+  // getValueName - Overload in subclass to provide a better default value.
+  virtual const char *getValueName() const { return "uint"; }
+
+  // An out-of-line virtual method to provide a 'home' for this class.
+  virtual void anchor();
+};
+
+EXTERN_TEMPLATE_INSTANTIATION(class basic_parser<unsigned>);
+
+//--------------------------------------------------
+// parser<double>
+//
+template<>
+class parser<double> : public basic_parser<double> {
+public:
+  // parse - Return true on error.
+  bool parse(Option &O, StringRef ArgName, StringRef Arg, double &Val);
+
+  // getValueName - Overload in subclass to provide a better default value.
+  virtual const char *getValueName() const { return "number"; }
+
+  // An out-of-line virtual method to provide a 'home' for this class.
+  virtual void anchor();
+};
+
+EXTERN_TEMPLATE_INSTANTIATION(class basic_parser<double>);
+
+//--------------------------------------------------
+// parser<float>
+//
+template<>
+class parser<float> : public basic_parser<float> {
+public:
+  // parse - Return true on error.
+  bool parse(Option &O, StringRef ArgName, StringRef Arg, float &Val);
+
+  // getValueName - Overload in subclass to provide a better default value.
+  virtual const char *getValueName() const { return "number"; }
+
+  // An out-of-line virtual method to provide a 'home' for this class.
+  virtual void anchor();
+};
+
+EXTERN_TEMPLATE_INSTANTIATION(class basic_parser<float>);
+
+//--------------------------------------------------
+// parser<std::string>
+//
+template<>
+class parser<std::string> : public basic_parser<std::string> {
+public:
+  // parse - Return true on error.
+  bool parse(Option &, StringRef, StringRef Arg, std::string &Value) {
+    Value = Arg.str();
+    return false;
+  }
+
+  // getValueName - Overload in subclass to provide a better default value.
+  virtual const char *getValueName() const { return "string"; }
+
+  // An out-of-line virtual method to provide a 'home' for this class.
+  virtual void anchor();
+};
+
+EXTERN_TEMPLATE_INSTANTIATION(class basic_parser<std::string>);
+
+//--------------------------------------------------
+// parser<char>
+//
+template<>
+class parser<char> : public basic_parser<char> {
+public:
+  // parse - Return true on error.
+  bool parse(Option &, StringRef, StringRef Arg, char &Value) {
+    Value = Arg[0];
+    return false;
+  }
+
+  // getValueName - Overload in subclass to provide a better default value.
+  virtual const char *getValueName() const { return "char"; }
+
+  // An out-of-line virtual method to provide a 'home' for this class.
+  virtual void anchor();
+};
+
+EXTERN_TEMPLATE_INSTANTIATION(class basic_parser<char>);
+
+//===----------------------------------------------------------------------===//
+// applicator class - This class is used because we must use partial
+// specialization to handle literal string arguments specially (const char* does
+// not correctly respond to the apply method).  Because the syntax to use this
+// is a pain, we have the 'apply' method below to handle the nastiness...
+//
+template<class Mod> struct applicator {
+  template<class Opt>
+  static void opt(const Mod &M, Opt &O) { M.apply(O); }
+};
+
+// Handle const char* as a special case...
+template<unsigned n> struct applicator<char[n]> {
+  template<class Opt>
+  static void opt(const char *Str, Opt &O) { O.setArgStr(Str); }
+};
+template<unsigned n> struct applicator<const char[n]> {
+  template<class Opt>
+  static void opt(const char *Str, Opt &O) { O.setArgStr(Str); }
+};
+template<> struct applicator<const char*> {
+  template<class Opt>
+  static void opt(const char *Str, Opt &O) { O.setArgStr(Str); }
+};
+
+template<> struct applicator<NumOccurrencesFlag> {
+  static void opt(NumOccurrencesFlag NO, Option &O) {
+    O.setNumOccurrencesFlag(NO);
+  }
+};
+template<> struct applicator<ValueExpected> {
+  static void opt(ValueExpected VE, Option &O) { O.setValueExpectedFlag(VE); }
+};
+template<> struct applicator<OptionHidden> {
+  static void opt(OptionHidden OH, Option &O) { O.setHiddenFlag(OH); }
+};
+template<> struct applicator<FormattingFlags> {
+  static void opt(FormattingFlags FF, Option &O) { O.setFormattingFlag(FF); }
+};
+template<> struct applicator<MiscFlags> {
+  static void opt(MiscFlags MF, Option &O) { O.setMiscFlag(MF); }
+};
+
+// apply method - Apply a modifier to an option in a type safe way.
+template<class Mod, class Opt>
+void apply(const Mod &M, Opt *O) {
+  applicator<Mod>::opt(M, *O);
+}
+
+
+//===----------------------------------------------------------------------===//
+// opt_storage class
+
+// Default storage class definition: external storage.  This implementation
+// assumes the user will specify a variable to store the data into with the
+// cl::location(x) modifier.
+//
+template<class DataType, bool ExternalStorage, bool isClass>
+class opt_storage {
+  DataType *Location;   // Where to store the object...
+
+  void check() const {
+    assert(Location != 0 && "cl::location(...) not specified for a command "
+           "line option with external storage, "
+           "or cl::init specified before cl::location()!!");
+  }
+public:
+  opt_storage() : Location(0) {}
+
+  bool setLocation(Option &O, DataType &L) {
+    if (Location)
+      return O.error("cl::location(x) specified more than once!");
+    Location = &L;
+    return false;
+  }
+
+  template<class T>
+  void setValue(const T &V) {
+    check();
+    *Location = V;
+  }
+
+  DataType &getValue() { check(); return *Location; }
+  const DataType &getValue() const { check(); return *Location; }
+
+  operator DataType() const { return this->getValue(); }
+};
+
+
+// Define how to hold a class type object, such as a string.  Since we can
+// inherit from a class, we do so.  This makes us exactly compatible with the
+// object in all cases that it is used.
+//
+template<class DataType>
+class opt_storage<DataType,false,true> : public DataType {
+public:
+  template<class T>
+  void setValue(const T &V) { DataType::operator=(V); }
+
+  DataType &getValue() { return *this; }
+  const DataType &getValue() const { return *this; }
+};
+
+// Define a partial specialization to handle things we cannot inherit from.  In
+// this case, we store an instance through containment, and overload operators
+// to get at the value.
+//
+template<class DataType>
+class opt_storage<DataType, false, false> {
+public:
+  DataType Value;
+
+  // Make sure we initialize the value with the default constructor for the
+  // type.
+  opt_storage() : Value(DataType()) {}
+
+  template<class T>
+  void setValue(const T &V) { Value = V; }
+  DataType &getValue() { return Value; }
+  DataType getValue() const { return Value; }
+
+  operator DataType() const { return getValue(); }
+
+  // If the datatype is a pointer, support -> on it.
+  DataType operator->() const { return Value; }
+};
+
+
+//===----------------------------------------------------------------------===//
+// opt - A scalar command line option.
+//
+template <class DataType, bool ExternalStorage = false,
+          class ParserClass = parser<DataType> >
+class opt : public Option,
+            public opt_storage<DataType, ExternalStorage,
+                               is_class<DataType>::value> {
+  ParserClass Parser;
+
+  virtual bool handleOccurrence(unsigned pos, StringRef ArgName,
+                                StringRef Arg) {
+    typename ParserClass::parser_data_type Val =
+       typename ParserClass::parser_data_type();
+    if (Parser.parse(*this, ArgName, Arg, Val))
+      return true;                            // Parse error!
+    this->setValue(Val);
+    this->setPosition(pos);
+    return false;
+  }
+
+  virtual enum ValueExpected getValueExpectedFlagDefault() const {
+    return Parser.getValueExpectedFlagDefault();
+  }
+  virtual void getExtraOptionNames(SmallVectorImpl<const char*> &OptionNames) {
+    return Parser.getExtraOptionNames(OptionNames);
+  }
+
+  // Forward printing stuff to the parser...
+  virtual size_t getOptionWidth() const {return Parser.getOptionWidth(*this);}
+  virtual void printOptionInfo(size_t GlobalWidth) const {
+    Parser.printOptionInfo(*this, GlobalWidth);
+  }
+
+  void done() {
+    addArgument();
+    Parser.initialize(*this);
+  }
+public:
+  // setInitialValue - Used by the cl::init modifier...
+  void setInitialValue(const DataType &V) { this->setValue(V); }
+
+  ParserClass &getParser() { return Parser; }
+
+  template<class T>
+  DataType &operator=(const T &Val) {
+    this->setValue(Val);
+    return this->getValue();
+  }
+
+  // One option...
+  template<class M0t>
+  explicit opt(const M0t &M0) : Option(Optional | NotHidden) {
+    apply(M0, this);
+    done();
+  }
+
+  // Two options...
+  template<class M0t, class M1t>
+  opt(const M0t &M0, const M1t &M1) : Option(Optional | NotHidden) {
+    apply(M0, this); apply(M1, this);
+    done();
+  }
+
+  // Three options...
+  template<class M0t, class M1t, class M2t>
+  opt(const M0t &M0, const M1t &M1,
+      const M2t &M2) : Option(Optional | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this);
+    done();
+  }
+  // Four options...
+  template<class M0t, class M1t, class M2t, class M3t>
+  opt(const M0t &M0, const M1t &M1, const M2t &M2,
+      const M3t &M3) : Option(Optional | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
+    done();
+  }
+  // Five options...
+  template<class M0t, class M1t, class M2t, class M3t, class M4t>
+  opt(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
+      const M4t &M4) : Option(Optional | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
+    apply(M4, this);
+    done();
+  }
+  // Six options...
+  template<class M0t, class M1t, class M2t, class M3t,
+           class M4t, class M5t>
+  opt(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
+      const M4t &M4, const M5t &M5) : Option(Optional | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
+    apply(M4, this); apply(M5, this);
+    done();
+  }
+  // Seven options...
+  template<class M0t, class M1t, class M2t, class M3t,
+           class M4t, class M5t, class M6t>
+  opt(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
+      const M4t &M4, const M5t &M5,
+      const M6t &M6) : Option(Optional | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
+    apply(M4, this); apply(M5, this); apply(M6, this);
+    done();
+  }
+  // Eight options...
+  template<class M0t, class M1t, class M2t, class M3t,
+           class M4t, class M5t, class M6t, class M7t>
+  opt(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
+      const M4t &M4, const M5t &M5, const M6t &M6,
+      const M7t &M7) : Option(Optional | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
+    apply(M4, this); apply(M5, this); apply(M6, this); apply(M7, this);
+    done();
+  }
+};
+
+EXTERN_TEMPLATE_INSTANTIATION(class opt<unsigned>);
+EXTERN_TEMPLATE_INSTANTIATION(class opt<int>);
+EXTERN_TEMPLATE_INSTANTIATION(class opt<std::string>);
+EXTERN_TEMPLATE_INSTANTIATION(class opt<char>);
+EXTERN_TEMPLATE_INSTANTIATION(class opt<bool>);
+
+//===----------------------------------------------------------------------===//
+// list_storage class
+
+// Default storage class definition: external storage.  This implementation
+// assumes the user will specify a variable to store the data into with the
+// cl::location(x) modifier.
+//
+template<class DataType, class StorageClass>
+class list_storage {
+  StorageClass *Location;   // Where to store the object...
+
+public:
+  list_storage() : Location(0) {}
+
+  bool setLocation(Option &O, StorageClass &L) {
+    if (Location)
+      return O.error("cl::location(x) specified more than once!");
+    Location = &L;
+    return false;
+  }
+
+  template<class T>
+  void addValue(const T &V) {
+    assert(Location != 0 && "cl::location(...) not specified for a command "
+           "line option with external storage!");
+    Location->push_back(V);
+  }
+};
+
+
+// Define how to hold a class type object, such as a string.  Since we can
+// inherit from a class, we do so.  This makes us exactly compatible with the
+// object in all cases that it is used.
+//
+template<class DataType>
+class list_storage<DataType, bool> : public std::vector<DataType> {
+public:
+  template<class T>
+  void addValue(const T &V) { std::vector<DataType>::push_back(V); }
+};
+
+
+//===----------------------------------------------------------------------===//
+// list - A list of command line options.
+//
+template <class DataType, class Storage = bool,
+          class ParserClass = parser<DataType> >
+class list : public Option, public list_storage<DataType, Storage> {
+  std::vector<unsigned> Positions;
+  ParserClass Parser;
+
+  virtual enum ValueExpected getValueExpectedFlagDefault() const {
+    return Parser.getValueExpectedFlagDefault();
+  }
+  virtual void getExtraOptionNames(SmallVectorImpl<const char*> &OptionNames) {
+    return Parser.getExtraOptionNames(OptionNames);
+  }
+
+  virtual bool handleOccurrence(unsigned pos, StringRef ArgName, StringRef Arg){
+    typename ParserClass::parser_data_type Val =
+      typename ParserClass::parser_data_type();
+    if (Parser.parse(*this, ArgName, Arg, Val))
+      return true;  // Parse Error!
+    list_storage<DataType, Storage>::addValue(Val);
+    setPosition(pos);
+    Positions.push_back(pos);
+    return false;
+  }
+
+  // Forward printing stuff to the parser...
+  virtual size_t getOptionWidth() const {return Parser.getOptionWidth(*this);}
+  virtual void printOptionInfo(size_t GlobalWidth) const {
+    Parser.printOptionInfo(*this, GlobalWidth);
+  }
+
+  void done() {
+    addArgument();
+    Parser.initialize(*this);
+  }
+public:
+  ParserClass &getParser() { return Parser; }
+
+  unsigned getPosition(unsigned optnum) const {
+    assert(optnum < this->size() && "Invalid option index");
+    return Positions[optnum];
+  }
+
+  void setNumAdditionalVals(unsigned n) {
+    Option::setNumAdditionalVals(n);
+  }
+
+  // One option...
+  template<class M0t>
+  explicit list(const M0t &M0) : Option(ZeroOrMore | NotHidden) {
+    apply(M0, this);
+    done();
+  }
+  // Two options...
+  template<class M0t, class M1t>
+  list(const M0t &M0, const M1t &M1) : Option(ZeroOrMore | NotHidden) {
+    apply(M0, this); apply(M1, this);
+    done();
+  }
+  // Three options...
+  template<class M0t, class M1t, class M2t>
+  list(const M0t &M0, const M1t &M1, const M2t &M2)
+    : Option(ZeroOrMore | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this);
+    done();
+  }
+  // Four options...
+  template<class M0t, class M1t, class M2t, class M3t>
+  list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3)
+    : Option(ZeroOrMore | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
+    done();
+  }
+  // Five options...
+  template<class M0t, class M1t, class M2t, class M3t, class M4t>
+  list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
+       const M4t &M4) : Option(ZeroOrMore | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
+    apply(M4, this);
+    done();
+  }
+  // Six options...
+  template<class M0t, class M1t, class M2t, class M3t,
+           class M4t, class M5t>
+  list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
+       const M4t &M4, const M5t &M5) : Option(ZeroOrMore | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
+    apply(M4, this); apply(M5, this);
+    done();
+  }
+  // Seven options...
+  template<class M0t, class M1t, class M2t, class M3t,
+           class M4t, class M5t, class M6t>
+  list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
+       const M4t &M4, const M5t &M5, const M6t &M6)
+    : Option(ZeroOrMore | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
+    apply(M4, this); apply(M5, this); apply(M6, this);
+    done();
+  }
+  // Eight options...
+  template<class M0t, class M1t, class M2t, class M3t,
+           class M4t, class M5t, class M6t, class M7t>
+  list(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
+       const M4t &M4, const M5t &M5, const M6t &M6,
+       const M7t &M7) : Option(ZeroOrMore | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
+    apply(M4, this); apply(M5, this); apply(M6, this); apply(M7, this);
+    done();
+  }
+};
+
+// multi_val - Modifier to set the number of additional values.
+struct multi_val {
+  unsigned AdditionalVals;
+  explicit multi_val(unsigned N) : AdditionalVals(N) {}
+
+  template <typename D, typename S, typename P>
+  void apply(list<D, S, P> &L) const { L.setNumAdditionalVals(AdditionalVals); }
+};
+
+
+//===----------------------------------------------------------------------===//
+// bits_storage class
+
+// Default storage class definition: external storage.  This implementation
+// assumes the user will specify a variable to store the data into with the
+// cl::location(x) modifier.
+//
+template<class DataType, class StorageClass>
+class bits_storage {
+  unsigned *Location;   // Where to store the bits...
+
+  template<class T>
+  static unsigned Bit(const T &V) {
+    unsigned BitPos = reinterpret_cast<unsigned>(V);
+    assert(BitPos < sizeof(unsigned) * CHAR_BIT &&
+          "enum exceeds width of bit vector!");
+    return 1 << BitPos;
+  }
+
+public:
+  bits_storage() : Location(0) {}
+
+  bool setLocation(Option &O, unsigned &L) {
+    if (Location)
+      return O.error("cl::location(x) specified more than once!");
+    Location = &L;
+    return false;
+  }
+
+  template<class T>
+  void addValue(const T &V) {
+    assert(Location != 0 && "cl::location(...) not specified for a command "
+           "line option with external storage!");
+    *Location |= Bit(V);
+  }
+
+  unsigned getBits() { return *Location; }
+
+  template<class T>
+  bool isSet(const T &V) {
+    return (*Location & Bit(V)) != 0;
+  }
+};
+
+
+// Define how to hold bits.  Since we can inherit from a class, we do so.
+// This makes us exactly compatible with the bits in all cases that it is used.
+//
+template<class DataType>
+class bits_storage<DataType, bool> {
+  unsigned Bits;   // Where to store the bits...
+
+  template<class T>
+  static unsigned Bit(const T &V) {
+    unsigned BitPos = (unsigned)V;
+    assert(BitPos < sizeof(unsigned) * CHAR_BIT &&
+          "enum exceeds width of bit vector!");
+    return 1 << BitPos;
+  }
+
+public:
+  template<class T>
+  void addValue(const T &V) {
+    Bits |=  Bit(V);
+  }
+
+  unsigned getBits() { return Bits; }
+
+  template<class T>
+  bool isSet(const T &V) {
+    return (Bits & Bit(V)) != 0;
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+// bits - A bit vector of command options.
+//
+template <class DataType, class Storage = bool,
+          class ParserClass = parser<DataType> >
+class bits : public Option, public bits_storage<DataType, Storage> {
+  std::vector<unsigned> Positions;
+  ParserClass Parser;
+
+  virtual enum ValueExpected getValueExpectedFlagDefault() const {
+    return Parser.getValueExpectedFlagDefault();
+  }
+  virtual void getExtraOptionNames(SmallVectorImpl<const char*> &OptionNames) {
+    return Parser.getExtraOptionNames(OptionNames);
+  }
+
+  virtual bool handleOccurrence(unsigned pos, StringRef ArgName, StringRef Arg){
+    typename ParserClass::parser_data_type Val =
+      typename ParserClass::parser_data_type();
+    if (Parser.parse(*this, ArgName, Arg, Val))
+      return true;  // Parse Error!
+    addValue(Val);
+    setPosition(pos);
+    Positions.push_back(pos);
+    return false;
+  }
+
+  // Forward printing stuff to the parser...
+  virtual size_t getOptionWidth() const {return Parser.getOptionWidth(*this);}
+  virtual void printOptionInfo(size_t GlobalWidth) const {
+    Parser.printOptionInfo(*this, GlobalWidth);
+  }
+
+  void done() {
+    addArgument();
+    Parser.initialize(*this);
+  }
+public:
+  ParserClass &getParser() { return Parser; }
+
+  unsigned getPosition(unsigned optnum) const {
+    assert(optnum < this->size() && "Invalid option index");
+    return Positions[optnum];
+  }
+
+  // One option...
+  template<class M0t>
+  explicit bits(const M0t &M0) : Option(ZeroOrMore | NotHidden) {
+    apply(M0, this);
+    done();
+  }
+  // Two options...
+  template<class M0t, class M1t>
+  bits(const M0t &M0, const M1t &M1) : Option(ZeroOrMore | NotHidden) {
+    apply(M0, this); apply(M1, this);
+    done();
+  }
+  // Three options...
+  template<class M0t, class M1t, class M2t>
+  bits(const M0t &M0, const M1t &M1, const M2t &M2)
+    : Option(ZeroOrMore | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this);
+    done();
+  }
+  // Four options...
+  template<class M0t, class M1t, class M2t, class M3t>
+  bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3)
+    : Option(ZeroOrMore | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
+    done();
+  }
+  // Five options...
+  template<class M0t, class M1t, class M2t, class M3t, class M4t>
+  bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
+       const M4t &M4) : Option(ZeroOrMore | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
+    apply(M4, this);
+    done();
+  }
+  // Six options...
+  template<class M0t, class M1t, class M2t, class M3t,
+           class M4t, class M5t>
+  bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
+       const M4t &M4, const M5t &M5) : Option(ZeroOrMore | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
+    apply(M4, this); apply(M5, this);
+    done();
+  }
+  // Seven options...
+  template<class M0t, class M1t, class M2t, class M3t,
+           class M4t, class M5t, class M6t>
+  bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
+       const M4t &M4, const M5t &M5, const M6t &M6)
+    : Option(ZeroOrMore | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
+    apply(M4, this); apply(M5, this); apply(M6, this);
+    done();
+  }
+  // Eight options...
+  template<class M0t, class M1t, class M2t, class M3t,
+           class M4t, class M5t, class M6t, class M7t>
+  bits(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3,
+       const M4t &M4, const M5t &M5, const M6t &M6,
+       const M7t &M7) : Option(ZeroOrMore | NotHidden) {
+    apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
+    apply(M4, this); apply(M5, this); apply(M6, this); apply(M7, this);
+    done();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Aliased command line option (alias this name to a preexisting name)
+//
+
+class alias : public Option {
+  Option *AliasFor;
+  virtual bool handleOccurrence(unsigned pos, StringRef /*ArgName*/,
+                                StringRef Arg) {
+    return AliasFor->handleOccurrence(pos, AliasFor->ArgStr, Arg);
+  }
+  // Handle printing stuff...
+  virtual size_t getOptionWidth() const;
+  virtual void printOptionInfo(size_t GlobalWidth) const;
+
+  void done() {
+    if (!hasArgStr())
+      error("cl::alias must have argument name specified!");
+    if (AliasFor == 0)
+      error("cl::alias must have an cl::aliasopt(option) specified!");
+      addArgument();
+  }
+public:
+  void setAliasFor(Option &O) {
+    if (AliasFor)
+      error("cl::alias must only have one cl::aliasopt(...) specified!");
+    AliasFor = &O;
+  }
+
+  // One option...
+  template<class M0t>
+  explicit alias(const M0t &M0) : Option(Optional | Hidden), AliasFor(0) {
+    apply(M0, this);
+    done();
+  }
+  // Two options...
+  template<class M0t, class M1t>
+  alias(const M0t &M0, const M1t &M1) : Option(Optional | Hidden), AliasFor(0) {
+    apply(M0, this); apply(M1, this);
+    done();
+  }
+  // Three options...
+  template<class M0t, class M1t, class M2t>
+  alias(const M0t &M0, const M1t &M1, const M2t &M2)
+    : Option(Optional | Hidden), AliasFor(0) {
+    apply(M0, this); apply(M1, this); apply(M2, this);
+    done();
+  }
+  // Four options...
+  template<class M0t, class M1t, class M2t, class M3t>
+  alias(const M0t &M0, const M1t &M1, const M2t &M2, const M3t &M3)
+    : Option(Optional | Hidden), AliasFor(0) {
+    apply(M0, this); apply(M1, this); apply(M2, this); apply(M3, this);
+    done();
+  }
+};
+
+// aliasfor - Modifier to set the option an alias aliases.
+struct aliasopt {
+  Option &Opt;
+  explicit aliasopt(Option &O) : Opt(O) {}
+  void apply(alias &A) const { A.setAliasFor(Opt); }
+};
+
+// extrahelp - provide additional help at the end of the normal help
+// output. All occurrences of cl::extrahelp will be accumulated and
+// printed to stderr at the end of the regular help, just before
+// exit is called.
+struct extrahelp {
+  const char * morehelp;
+  explicit extrahelp(const char* help);
+};
+
+void PrintVersionMessage();
+// This function just prints the help message, exactly the same way as if the
+// -help option had been given on the command line.
+// NOTE: THIS FUNCTION TERMINATES THE PROGRAM!
+void PrintHelpMessage();
+
+} // End namespace cl
+
+} // End namespace llvm
+
+#endif
diff --git a/final/include/llvm/Support/Compiler.h b/final/include/llvm/Support/Compiler.h
new file mode 100644
index 00000000000..67f0fd7e0dc
--- /dev/null
+++ b/final/include/llvm/Support/Compiler.h
@@ -0,0 +1,129 @@
+//===-- llvm/Support/Compiler.h - Compiler abstraction support --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines several macros, based on the current compiler.  This allows
+// use of compiler-specific features in a way that remains portable.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_COMPILER_H
+#define LLVM_SUPPORT_COMPILER_H
+
+#ifndef __has_feature
+# define __has_feature(x) 0
+#endif
+
+/// LLVM_LIBRARY_VISIBILITY - If a class marked with this attribute is linked
+/// into a shared library, then the class should be private to the library and
+/// not accessible from outside it.  Can also be used to mark variables and
+/// functions, making them private to any shared library they are linked into.
+#if (__GNUC__ >= 4) && !defined(__MINGW32__) && !defined(__CYGWIN__)
+#define LLVM_LIBRARY_VISIBILITY __attribute__ ((visibility("hidden")))
+#else
+#define LLVM_LIBRARY_VISIBILITY
+#endif
+
+#if (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
+#define LLVM_ATTRIBUTE_USED __attribute__((__used__))
+#else
+#define LLVM_ATTRIBUTE_USED
+#endif
+
+// Some compilers warn about unused functions. When a function is sometimes
+// used or not depending on build settings (e.g. a function only called from
+// within "assert"), this attribute can be used to suppress such warnings.
+//
+// However, it shouldn't be used for unused *variables*, as those have a much
+// more portable solution:
+//   (void)unused_var_name;
+// Prefer cast-to-void wherever it is sufficient.
+#if (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
+#define LLVM_ATTRIBUTE_UNUSED __attribute__((__unused__))
+#else
+#define LLVM_ATTRIBUTE_UNUSED
+#endif
+
+#ifdef __GNUC__ // aka 'ATTRIBUTE_CONST' but following LLVM Conventions.
+#define LLVM_ATTRIBUTE_READNONE __attribute__((__const__))
+#else
+#define LLVM_ATTRIBUTE_READNONE
+#endif
+
+#ifdef __GNUC__  // aka 'ATTRIBUTE_PURE' but following LLVM Conventions.
+#define LLVM_ATTRIBUTE_READONLY __attribute__((__pure__))
+#else
+#define LLVM_ATTRIBUTE_READONLY
+#endif
+
+#if (__GNUC__ >= 4)
+#define BUILTIN_EXPECT(EXPR, VALUE) __builtin_expect((EXPR), (VALUE))
+#else
+#define BUILTIN_EXPECT(EXPR, VALUE) (EXPR)
+#endif
+
+// C++ doesn't support 'extern template' of template specializations.  GCC does,
+// but requires __extension__ before it.  In the header, use this:
+//   EXTERN_TEMPLATE_INSTANTIATION(class foo<bar>);
+// in the .cpp file, use this:
+//   TEMPLATE_INSTANTIATION(class foo<bar>);
+#ifdef __GNUC__
+#define EXTERN_TEMPLATE_INSTANTIATION(X) __extension__ extern template X
+#define TEMPLATE_INSTANTIATION(X) template X
+#else
+#define EXTERN_TEMPLATE_INSTANTIATION(X)
+#define TEMPLATE_INSTANTIATION(X)
+#endif
+
+// LLVM_ATTRIBUTE_NOINLINE - On compilers where we have a directive to do so,
+// mark a method "not for inlining".
+#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#define LLVM_ATTRIBUTE_NOINLINE __attribute__((noinline))
+#elif defined(_MSC_VER)
+#define LLVM_ATTRIBUTE_NOINLINE __declspec(noinline)
+#else
+#define LLVM_ATTRIBUTE_NOINLINE
+#endif
+
+// LLVM_ATTRIBUTE_ALWAYS_INLINE - On compilers where we have a directive to do
+// so, mark a method "always inline" because it is performance sensitive. GCC
+// 3.4 supported this but is buggy in various cases and produces unimplemented
+// errors, just use it in GCC 4.0 and later.
+#if __GNUC__ > 3
+#define LLVM_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
+#elif defined(_MSC_VER)
+#define LLVM_ATTRIBUTE_ALWAYS_INLINE __forceinline
+#else
+#define LLVM_ATTRIBUTE_ALWAYS_INLINE
+#endif
+
+
+#ifdef __GNUC__
+#define LLVM_ATTRIBUTE_NORETURN __attribute__((noreturn))
+#elif defined(_MSC_VER)
+#define LLVM_ATTRIBUTE_NORETURN __declspec(noreturn)
+#else
+#define LLVM_ATTRIBUTE_NORETURN
+#endif
+
+// LLVM_ATTRIBUTE_DEPRECATED(decl, "message")
+#if __has_feature(attribute_deprecated_with_message)
+# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \
+  decl __attribute__((deprecated(message)))
+#elif defined(__GNUC__)
+# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \
+  decl __attribute__((deprecated))
+#elif defined(_MSC_VER)
+# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \
+  __declspec(deprecated(message)) decl
+#else
+# define LLVM_ATTRIBUTE_DEPRECATED(decl, message) \
+  decl
+#endif
+
+#endif
diff --git a/final/include/llvm/Support/ConstantFolder.h b/final/include/llvm/Support/ConstantFolder.h
new file mode 100644
index 00000000000..bd3765d592d
--- /dev/null
+++ b/final/include/llvm/Support/ConstantFolder.h
@@ -0,0 +1,228 @@
+//===-- llvm/Support/ConstantFolder.h - Constant folding helper -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ConstantFolder class, a helper for IRBuilder.
+// It provides IRBuilder with a set of methods for creating constants
+// with minimal folding.  For general constant creation and folding,
+// use ConstantExpr and the routines in llvm/Analysis/ConstantFolding.h.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CONSTANTFOLDER_H
+#define LLVM_SUPPORT_CONSTANTFOLDER_H
+
+#include "llvm/Constants.h"
+#include "llvm/InstrTypes.h"
+
+namespace llvm {
+
+class LLVMContext;
+
+/// ConstantFolder - Create constants with minimum, target independent, folding.
+class ConstantFolder {
+public:
+  explicit ConstantFolder(LLVMContext &) {}
+
+  //===--------------------------------------------------------------------===//
+  // Binary Operators
+  //===--------------------------------------------------------------------===//
+
+  Constant *CreateAdd(Constant *LHS, Constant *RHS,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return ConstantExpr::getAdd(LHS, RHS, HasNUW, HasNSW);
+  }
+  Constant *CreateFAdd(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getFAdd(LHS, RHS);
+  }
+  Constant *CreateSub(Constant *LHS, Constant *RHS,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return ConstantExpr::getSub(LHS, RHS, HasNUW, HasNSW);
+  }
+  Constant *CreateFSub(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getFSub(LHS, RHS);
+  }
+  Constant *CreateMul(Constant *LHS, Constant *RHS,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return ConstantExpr::getMul(LHS, RHS, HasNUW, HasNSW);
+  }
+  Constant *CreateFMul(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getFMul(LHS, RHS);
+  }
+  Constant *CreateUDiv(Constant *LHS, Constant *RHS,
+                       bool isExact = false) const {
+    return ConstantExpr::getUDiv(LHS, RHS, isExact);
+  }
+  Constant *CreateSDiv(Constant *LHS, Constant *RHS,
+                       bool isExact = false) const {
+    return ConstantExpr::getSDiv(LHS, RHS, isExact);
+  }
+  Constant *CreateFDiv(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getFDiv(LHS, RHS);
+  }
+  Constant *CreateURem(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getURem(LHS, RHS);
+  }
+  Constant *CreateSRem(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getSRem(LHS, RHS);
+  }
+  Constant *CreateFRem(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getFRem(LHS, RHS);
+  }
+  Constant *CreateShl(Constant *LHS, Constant *RHS,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return ConstantExpr::getShl(LHS, RHS, HasNUW, HasNSW);
+  }
+  Constant *CreateLShr(Constant *LHS, Constant *RHS,
+                       bool isExact = false) const {
+    return ConstantExpr::getLShr(LHS, RHS, isExact);
+  }
+  Constant *CreateAShr(Constant *LHS, Constant *RHS,
+                       bool isExact = false) const {
+    return ConstantExpr::getAShr(LHS, RHS, isExact);
+  }
+  Constant *CreateAnd(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getAnd(LHS, RHS);
+  }
+  Constant *CreateOr(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getOr(LHS, RHS);
+  }
+  Constant *CreateXor(Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::getXor(LHS, RHS);
+  }
+
+  Constant *CreateBinOp(Instruction::BinaryOps Opc,
+                        Constant *LHS, Constant *RHS) const {
+    return ConstantExpr::get(Opc, LHS, RHS);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Unary Operators
+  //===--------------------------------------------------------------------===//
+
+  Constant *CreateNeg(Constant *C,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return ConstantExpr::getNeg(C, HasNUW, HasNSW);
+  }
+  Constant *CreateFNeg(Constant *C) const {
+    return ConstantExpr::getFNeg(C);
+  }
+  Constant *CreateNot(Constant *C) const {
+    return ConstantExpr::getNot(C);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Memory Instructions
+  //===--------------------------------------------------------------------===//
+
+  Constant *CreateGetElementPtr(Constant *C, Constant* const *IdxList,
+                                unsigned NumIdx) const {
+    return ConstantExpr::getGetElementPtr(C, IdxList, NumIdx);
+  }
+  Constant *CreateGetElementPtr(Constant *C, Value* const *IdxList,
+                                unsigned NumIdx) const {
+    return ConstantExpr::getGetElementPtr(C, IdxList, NumIdx);
+  }
+
+  Constant *CreateInBoundsGetElementPtr(Constant *C, Constant* const *IdxList,
+                                        unsigned NumIdx) const {
+    return ConstantExpr::getInBoundsGetElementPtr(C, IdxList, NumIdx);
+  }
+  Constant *CreateInBoundsGetElementPtr(Constant *C, Value* const *IdxList,
+                                        unsigned NumIdx) const {
+    return ConstantExpr::getInBoundsGetElementPtr(C, IdxList, NumIdx);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Cast/Conversion Operators
+  //===--------------------------------------------------------------------===//
+
+  Constant *CreateCast(Instruction::CastOps Op, Constant *C,
+                       const Type *DestTy) const {
+    return ConstantExpr::getCast(Op, C, DestTy);
+  }
+  Constant *CreatePointerCast(Constant *C, const Type *DestTy) const {
+    return ConstantExpr::getPointerCast(C, DestTy);
+  }
+  Constant *CreateIntCast(Constant *C, const Type *DestTy,
+                          bool isSigned) const {
+    return ConstantExpr::getIntegerCast(C, DestTy, isSigned);
+  }
+  Constant *CreateFPCast(Constant *C, const Type *DestTy) const {
+    return ConstantExpr::getFPCast(C, DestTy);
+  }
+
+  Constant *CreateBitCast(Constant *C, const Type *DestTy) const {
+    return CreateCast(Instruction::BitCast, C, DestTy);
+  }
+  Constant *CreateIntToPtr(Constant *C, const Type *DestTy) const {
+    return CreateCast(Instruction::IntToPtr, C, DestTy);
+  }
+  Constant *CreatePtrToInt(Constant *C, const Type *DestTy) const {
+    return CreateCast(Instruction::PtrToInt, C, DestTy);
+  }
+  Constant *CreateZExtOrBitCast(Constant *C, const Type *DestTy) const {
+    return ConstantExpr::getZExtOrBitCast(C, DestTy);
+  }
+  Constant *CreateSExtOrBitCast(Constant *C, const Type *DestTy) const {
+    return ConstantExpr::getSExtOrBitCast(C, DestTy);
+  }
+
+  Constant *CreateTruncOrBitCast(Constant *C, const Type *DestTy) const {
+    return ConstantExpr::getTruncOrBitCast(C, DestTy);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Compare Instructions
+  //===--------------------------------------------------------------------===//
+
+  Constant *CreateICmp(CmpInst::Predicate P, Constant *LHS,
+                       Constant *RHS) const {
+    return ConstantExpr::getCompare(P, LHS, RHS);
+  }
+  Constant *CreateFCmp(CmpInst::Predicate P, Constant *LHS,
+                       Constant *RHS) const {
+    return ConstantExpr::getCompare(P, LHS, RHS);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Other Instructions
+  //===--------------------------------------------------------------------===//
+
+  Constant *CreateSelect(Constant *C, Constant *True, Constant *False) const {
+    return ConstantExpr::getSelect(C, True, False);
+  }
+
+  Constant *CreateExtractElement(Constant *Vec, Constant *Idx) const {
+    return ConstantExpr::getExtractElement(Vec, Idx);
+  }
+
+  Constant *CreateInsertElement(Constant *Vec, Constant *NewElt,
+                                Constant *Idx) const {
+    return ConstantExpr::getInsertElement(Vec, NewElt, Idx);
+  }
+
+  Constant *CreateShuffleVector(Constant *V1, Constant *V2,
+                                Constant *Mask) const {
+    return ConstantExpr::getShuffleVector(V1, V2, Mask);
+  }
+
+  Constant *CreateExtractValue(Constant *Agg, const unsigned *IdxList,
+                               unsigned NumIdx) const {
+    return ConstantExpr::getExtractValue(Agg, IdxList, NumIdx);
+  }
+
+  Constant *CreateInsertValue(Constant *Agg, Constant *Val,
+                              const unsigned *IdxList, unsigned NumIdx) const {
+    return ConstantExpr::getInsertValue(Agg, Val, IdxList, NumIdx);
+  }
+};
+
+}
+
+#endif
diff --git a/final/include/llvm/Support/ConstantRange.h b/final/include/llvm/Support/ConstantRange.h
new file mode 100644
index 00000000000..ced3a2cf2db
--- /dev/null
+++ b/final/include/llvm/Support/ConstantRange.h
@@ -0,0 +1,265 @@
+//===-- llvm/Support/ConstantRange.h - Represent a range --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Represent a range of possible values that may occur when the program is run
+// for an integral value.  This keeps track of a lower and upper bound for the
+// constant, which MAY wrap around the end of the numeric range.  To do this, it
+// keeps track of a [lower, upper) bound, which specifies an interval just like
+// STL iterators.  When used with boolean values, the following are important
+// ranges: :
+//
+//  [F, F) = {}     = Empty set
+//  [T, F) = {T}
+//  [F, T) = {F}
+//  [T, T) = {F, T} = Full set
+//
+// The other integral ranges use min/max values for special range values. For
+// example, for 8-bit types, it uses:
+// [0, 0)     = {}       = Empty set
+// [255, 255) = {0..255} = Full Set
+//
+// Note that ConstantRange can be used to represent either signed or
+// unsigned ranges.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CONSTANT_RANGE_H
+#define LLVM_SUPPORT_CONSTANT_RANGE_H
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+/// ConstantRange - This class represents an range of values.
+///
+class ConstantRange {
+  APInt Lower, Upper;
+
+public:
+  /// Initialize a full (the default) or empty set for the specified bit width.
+  ///
+  explicit ConstantRange(uint32_t BitWidth, bool isFullSet = true);
+
+  /// Initialize a range to hold the single specified value.
+  ///
+  ConstantRange(const APInt &Value);
+
+  /// @brief Initialize a range of values explicitly. This will assert out if
+  /// Lower==Upper and Lower != Min or Max value for its type. It will also
+  /// assert out if the two APInt's are not the same bit width.
+  ConstantRange(const APInt &Lower, const APInt &Upper);
+
+  /// makeICmpRegion - Produce the smallest range that contains all values that
+  /// might satisfy the comparison specified by Pred when compared to any value
+  /// contained within Other.
+  ///
+  /// Solves for range X in 'for all x in X, there exists a y in Y such that
+  /// icmp op x, y is true'. Every value that might make the comparison true
+  /// is included in the resulting range.
+  static ConstantRange makeICmpRegion(unsigned Pred,
+                                      const ConstantRange &Other);
+
+  /// getLower - Return the lower value for this range...
+  ///
+  const APInt &getLower() const { return Lower; }
+
+  /// getUpper - Return the upper value for this range...
+  ///
+  const APInt &getUpper() const { return Upper; }
+
+  /// getBitWidth - get the bit width of this ConstantRange
+  ///
+  uint32_t getBitWidth() const { return Lower.getBitWidth(); }
+
+  /// isFullSet - Return true if this set contains all of the elements possible
+  /// for this data-type
+  ///
+  bool isFullSet() const;
+
+  /// isEmptySet - Return true if this set contains no members.
+  ///
+  bool isEmptySet() const;
+
+  /// isWrappedSet - Return true if this set wraps around the top of the range,
+  /// for example: [100, 8)
+  ///
+  bool isWrappedSet() const;
+
+  /// isSignWrappedSet - Return true if this set wraps around the INT_MIN of
+  /// its bitwidth, for example: i8 [120, 140).
+  ///
+  bool isSignWrappedSet() const;
+
+  /// contains - Return true if the specified value is in the set.
+  ///
+  bool contains(const APInt &Val) const;
+
+  /// contains - Return true if the other range is a subset of this one.
+  ///
+  bool contains(const ConstantRange &CR) const;
+
+  /// getSingleElement - If this set contains a single element, return it,
+  /// otherwise return null.
+  ///
+  const APInt *getSingleElement() const {
+    if (Upper == Lower + 1)
+      return &Lower;
+    return 0;
+  }
+
+  /// isSingleElement - Return true if this set contains exactly one member.
+  ///
+  bool isSingleElement() const { return getSingleElement() != 0; }
+
+  /// getSetSize - Return the number of elements in this set.
+  ///
+  APInt getSetSize() const;
+
+  /// getUnsignedMax - Return the largest unsigned value contained in the
+  /// ConstantRange.
+  ///
+  APInt getUnsignedMax() const;
+
+  /// getUnsignedMin - Return the smallest unsigned value contained in the
+  /// ConstantRange.
+  ///
+  APInt getUnsignedMin() const;
+
+  /// getSignedMax - Return the largest signed value contained in the
+  /// ConstantRange.
+  ///
+  APInt getSignedMax() const;
+
+  /// getSignedMin - Return the smallest signed value contained in the
+  /// ConstantRange.
+  ///
+  APInt getSignedMin() const;
+
+  /// operator== - Return true if this range is equal to another range.
+  ///
+  bool operator==(const ConstantRange &CR) const {
+    return Lower == CR.Lower && Upper == CR.Upper;
+  }
+  bool operator!=(const ConstantRange &CR) const {
+    return !operator==(CR);
+  }
+
+  /// subtract - Subtract the specified constant from the endpoints of this
+  /// constant range.
+  ConstantRange subtract(const APInt &CI) const;
+
+  /// intersectWith - Return the range that results from the intersection of
+  /// this range with another range.  The resultant range is guaranteed to
+  /// include all elements contained in both input ranges, and to have the
+  /// smallest possible set size that does so.  Because there may be two
+  /// intersections with the same set size, A.intersectWith(B) might not
+  /// be equal to B.intersectWith(A).
+  ///
+  ConstantRange intersectWith(const ConstantRange &CR) const;
+
+  /// unionWith - Return the range that results from the union of this range
+  /// with another range.  The resultant range is guaranteed to include the
+  /// elements of both sets, but may contain more.  For example, [3, 9) union
+  /// [12,15) is [3, 15), which includes 9, 10, and 11, which were not included
+  /// in either set before.
+  ///
+  ConstantRange unionWith(const ConstantRange &CR) const;
+
+  /// zeroExtend - Return a new range in the specified integer type, which must
+  /// be strictly larger than the current type.  The returned range will
+  /// correspond to the possible range of values if the source range had been
+  /// zero extended to BitWidth.
+  ConstantRange zeroExtend(uint32_t BitWidth) const;
+
+  /// signExtend - Return a new range in the specified integer type, which must
+  /// be strictly larger than the current type.  The returned range will
+  /// correspond to the possible range of values if the source range had been
+  /// sign extended to BitWidth.
+  ConstantRange signExtend(uint32_t BitWidth) const;
+
+  /// truncate - Return a new range in the specified integer type, which must be
+  /// strictly smaller than the current type.  The returned range will
+  /// correspond to the possible range of values if the source range had been
+  /// truncated to the specified type.
+  ConstantRange truncate(uint32_t BitWidth) const;
+
+  /// zextOrTrunc - make this range have the bit width given by \p BitWidth. The
+  /// value is zero extended, truncated, or left alone to make it that width.
+  ConstantRange zextOrTrunc(uint32_t BitWidth) const;
+  
+  /// sextOrTrunc - make this range have the bit width given by \p BitWidth. The
+  /// value is sign extended, truncated, or left alone to make it that width.
+  ConstantRange sextOrTrunc(uint32_t BitWidth) const;
+
+  /// add - Return a new range representing the possible values resulting
+  /// from an addition of a value in this range and a value in \p Other.
+  ConstantRange add(const ConstantRange &Other) const;
+
+  /// sub - Return a new range representing the possible values resulting
+  /// from a subtraction of a value in this range and a value in \p Other.
+  ConstantRange sub(const ConstantRange &Other) const;
+
+  /// multiply - Return a new range representing the possible values resulting
+  /// from a multiplication of a value in this range and a value in \p Other.
+  /// TODO: This isn't fully implemented yet.
+  ConstantRange multiply(const ConstantRange &Other) const;
+
+  /// smax - Return a new range representing the possible values resulting
+  /// from a signed maximum of a value in this range and a value in \p Other.
+  ConstantRange smax(const ConstantRange &Other) const;
+
+  /// umax - Return a new range representing the possible values resulting
+  /// from an unsigned maximum of a value in this range and a value in \p Other.
+  ConstantRange umax(const ConstantRange &Other) const;
+
+  /// udiv - Return a new range representing the possible values resulting
+  /// from an unsigned division of a value in this range and a value in
+  /// \p Other.
+  ConstantRange udiv(const ConstantRange &Other) const;
+
+  /// binaryAnd - return a new range representing the possible values resulting
+  /// from a binary-and of a value in this range by a value in \p Other.
+  ConstantRange binaryAnd(const ConstantRange &Other) const;
+
+  /// binaryOr - return a new range representing the possible values resulting
+  /// from a binary-or of a value in this range by a value in \p Other.
+  ConstantRange binaryOr(const ConstantRange &Other) const;
+
+  /// shl - Return a new range representing the possible values resulting
+  /// from a left shift of a value in this range by a value in \p Other.
+  /// TODO: This isn't fully implemented yet.
+  ConstantRange shl(const ConstantRange &Other) const;
+
+  /// lshr - Return a new range representing the possible values resulting
+  /// from a logical right shift of a value in this range and a value in
+  /// \p Other.
+  ConstantRange lshr(const ConstantRange &Other) const;
+
+  /// inverse - Return a new range that is the logical not of the current set.
+  ///
+  ConstantRange inverse() const;
+  
+  /// print - Print out the bounds to a stream...
+  ///
+  void print(raw_ostream &OS) const;
+
+  /// dump - Allow printing from a debugger easily...
+  ///
+  void dump() const;
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const ConstantRange &CR) {
+  CR.print(OS);
+  return OS;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/CrashRecoveryContext.h b/final/include/llvm/Support/CrashRecoveryContext.h
new file mode 100644
index 00000000000..2e9b5d4aa54
--- /dev/null
+++ b/final/include/llvm/Support/CrashRecoveryContext.h
@@ -0,0 +1,92 @@
+//===--- CrashRecoveryContext.h - Crash Recovery ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CRASHRECOVERYCONTEXT_H
+#define LLVM_SUPPORT_CRASHRECOVERYCONTEXT_H
+
+#include <string>
+
+namespace llvm {
+class StringRef;
+
+/// \brief Crash recovery helper object.
+///
+/// This class implements support for running operations in a safe context so
+/// that crashes (memory errors, stack overflow, assertion violations) can be
+/// detected and control restored to the crashing thread. Crash detection is
+/// purely "best effort", the exact set of failures which can be recovered from
+/// is platform dependent.
+///
+/// Clients make use of this code by first calling
+/// CrashRecoveryContext::Enable(), and then executing unsafe operations via a
+/// CrashRecoveryContext object. For example:
+///
+///    void actual_work(void *);
+///
+///    void foo() {
+///      CrashRecoveryContext CRC;
+///
+///      if (!CRC.RunSafely(actual_work, 0)) {
+///         ... a crash was detected, report error to user ...
+///      }
+///
+///      ... no crash was detected ...
+///    }
+///
+/// Crash recovery contexts may not be nested.
+class CrashRecoveryContext {
+  void *Impl;
+
+public:
+  CrashRecoveryContext() : Impl(0) {}
+  ~CrashRecoveryContext();
+
+  /// \brief Enable crash recovery.
+  static void Enable();
+
+  /// \brief Disable crash recovery.
+  static void Disable();
+
+  /// \brief Return the active context, if the code is currently executing in a
+  /// thread which is in a protected context.
+  static CrashRecoveryContext *GetCurrent();
+
+  /// \brief Execute the provide callback function (with the given arguments) in
+  /// a protected context.
+  ///
+  /// \return True if the function completed successfully, and false if the
+  /// function crashed (or HandleCrash was called explicitly). Clients should
+  /// make as little assumptions as possible about the program state when
+  /// RunSafely has returned false. Clients can use getBacktrace() to retrieve
+  /// the backtrace of the crash on failures.
+  bool RunSafely(void (*Fn)(void*), void *UserData);
+
+  /// \brief Execute the provide callback function (with the given arguments) in
+  /// a protected context which is run in another thread (optionally with a
+  /// requested stack size).
+  ///
+  /// See RunSafely() and llvm_execute_on_thread().
+  bool RunSafelyOnThread(void (*Fn)(void*), void *UserData,
+                         unsigned RequestedStackSize = 0);
+
+  /// \brief Explicitly trigger a crash recovery in the current process, and
+  /// return failure from RunSafely(). This function does not return.
+  void HandleCrash();
+
+  /// \brief Return a string containing the backtrace where the crash was
+  /// detected; or empty if the backtrace wasn't recovered.
+  ///
+  /// This function is only valid when a crash has been detected (i.e.,
+  /// RunSafely() has returned false.
+  const std::string &getBacktrace() const;
+};
+
+}
+
+#endif
diff --git a/final/include/llvm/Support/DOTGraphTraits.h b/final/include/llvm/Support/DOTGraphTraits.h
new file mode 100644
index 00000000000..3cb8164c3c3
--- /dev/null
+++ b/final/include/llvm/Support/DOTGraphTraits.h
@@ -0,0 +1,161 @@
+//===-- llvm/Support/DotGraphTraits.h - Customize .dot output ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a template class that can be used to customize dot output
+// graphs generated by the GraphWriter.h file.  The default implementation of
+// this file will produce a simple, but not very polished graph.  By
+// specializing this template, lots of customization opportunities are possible.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_DOTGRAPHTRAITS_H
+#define LLVM_SUPPORT_DOTGRAPHTRAITS_H
+
+#include <string>
+
+namespace llvm {
+
+/// DefaultDOTGraphTraits - This class provides the default implementations of
+/// all of the DOTGraphTraits methods.  If a specialization does not need to
+/// override all methods here it should inherit so that it can get the default
+/// implementations.
+///
+struct DefaultDOTGraphTraits {
+private:
+  bool IsSimple;
+
+protected:
+  bool isSimple() {
+    return IsSimple;
+  }
+
+public:
+  explicit DefaultDOTGraphTraits(bool simple=false) : IsSimple (simple) {}
+
+  /// getGraphName - Return the label for the graph as a whole.  Printed at the
+  /// top of the graph.
+  ///
+  template<typename GraphType>
+  static std::string getGraphName(const GraphType& Graph) { return ""; }
+
+  /// getGraphProperties - Return any custom properties that should be included
+  /// in the top level graph structure for dot.
+  ///
+  template<typename GraphType>
+  static std::string getGraphProperties(const GraphType& Graph) {
+    return "";
+  }
+
+  /// renderGraphFromBottomUp - If this function returns true, the graph is
+  /// emitted bottom-up instead of top-down.  This requires graphviz 2.0 to work
+  /// though.
+  static bool renderGraphFromBottomUp() {
+    return false;
+  }
+
+  /// isNodeHidden - If the function returns true, the given node is not
+  /// displayed in the graph.
+  static bool isNodeHidden(const void *Node) {
+    return false;
+  }
+
+  /// getNodeLabel - Given a node and a pointer to the top level graph, return
+  /// the label to print in the node.
+  template<typename GraphType>
+  std::string getNodeLabel(const void *Node, const GraphType& Graph) {
+    return "";
+  }
+
+  /// hasNodeAddressLabel - If this method returns true, the address of the node
+  /// is added to the label of the node.
+  template<typename GraphType>
+  static bool hasNodeAddressLabel(const void *Node, const GraphType& Graph) {
+    return false;
+  }
+
+  /// If you want to specify custom node attributes, this is the place to do so
+  ///
+  template<typename GraphType>
+  static std::string getNodeAttributes(const void *Node,
+                                       const GraphType& Graph) {
+    return "";
+  }
+
+  /// If you want to override the dot attributes printed for a particular edge,
+  /// override this method.
+  template<typename EdgeIter, typename GraphType>
+  static std::string getEdgeAttributes(const void *Node, EdgeIter EI,
+                                       const GraphType& Graph) {
+    return "";
+  }
+
+  /// getEdgeSourceLabel - If you want to label the edge source itself,
+  /// implement this method.
+  template<typename EdgeIter>
+  static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) {
+    return "";
+  }
+
+  /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
+  /// should actually target another edge source, not a node.  If this method is
+  /// implemented, getEdgeTarget should be implemented.
+  template<typename EdgeIter>
+  static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {
+    return false;
+  }
+
+  /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is
+  /// called to determine which outgoing edge of Node is the target of this
+  /// edge.
+  template<typename EdgeIter>
+  static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) {
+    return I;
+  }
+
+  /// hasEdgeDestLabels - If this function returns true, the graph is able
+  /// to provide labels for edge destinations.
+  static bool hasEdgeDestLabels() {
+    return false;
+  }
+
+  /// numEdgeDestLabels - If hasEdgeDestLabels, this function returns the
+  /// number of incoming edge labels the given node has.
+  static unsigned numEdgeDestLabels(const void *Node) {
+    return 0;
+  }
+
+  /// getEdgeDestLabel - If hasEdgeDestLabels, this function returns the
+  /// incoming edge label with the given index in the given node.
+  static std::string getEdgeDestLabel(const void *Node, unsigned i) {
+    return "";
+  }
+
+  /// addCustomGraphFeatures - If a graph is made up of more than just
+  /// straight-forward nodes and edges, this is the place to put all of the
+  /// custom stuff necessary.  The GraphWriter object, instantiated with your
+  /// GraphType is passed in as an argument.  You may call arbitrary methods on
+  /// it to add things to the output graph.
+  ///
+  template<typename GraphType, typename GraphWriter>
+  static void addCustomGraphFeatures(const GraphType& Graph, GraphWriter &GW) {}
+};
+
+
+/// DOTGraphTraits - Template class that can be specialized to customize how
+/// graphs are converted to 'dot' graphs.  When specializing, you may inherit
+/// from DefaultDOTGraphTraits if you don't need to override everything.
+///
+template <typename Ty>
+struct DOTGraphTraits : public DefaultDOTGraphTraits {
+  DOTGraphTraits (bool simple=false) : DefaultDOTGraphTraits (simple) {}
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/DataFlow.h b/final/include/llvm/Support/DataFlow.h
new file mode 100644
index 00000000000..355c402f542
--- /dev/null
+++ b/final/include/llvm/Support/DataFlow.h
@@ -0,0 +1,103 @@
+//===-- llvm/Support/DataFlow.h - dataflow as graphs ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines specializations of GraphTraits that allows Use-Def and
+// Def-Use relations to be treated as proper graphs for generic algorithms.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_DATAFLOW_H
+#define LLVM_SUPPORT_DATAFLOW_H
+
+#include "llvm/User.h"
+#include "llvm/ADT/GraphTraits.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// Provide specializations of GraphTraits to be able to treat def-use/use-def
+// chains as graphs
+
+template <> struct GraphTraits<const Value*> {
+  typedef const Value NodeType;
+  typedef Value::const_use_iterator ChildIteratorType;
+
+  static NodeType *getEntryNode(const Value *G) {
+    return G;
+  }
+
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->use_begin();
+  }
+
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->use_end();
+  }
+};
+
+template <> struct GraphTraits<Value*> {
+  typedef Value NodeType;
+  typedef Value::use_iterator ChildIteratorType;
+
+  static NodeType *getEntryNode(Value *G) {
+    return G;
+  }
+
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->use_begin();
+  }
+
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->use_end();
+  }
+};
+
+template <> struct GraphTraits<Inverse<const User*> > {
+  typedef const Value NodeType;
+  typedef User::const_op_iterator ChildIteratorType;
+
+  static NodeType *getEntryNode(Inverse<const User*> G) {
+    return G.Graph;
+  }
+
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    if (const User *U = dyn_cast<User>(N))
+      return U->op_begin();
+    return NULL;
+  }
+
+  static inline ChildIteratorType child_end(NodeType *N) {
+    if(const User *U = dyn_cast<User>(N))
+      return U->op_end();
+    return NULL;
+  }
+};
+
+template <> struct GraphTraits<Inverse<User*> > {
+  typedef Value NodeType;
+  typedef User::op_iterator ChildIteratorType;
+
+  static NodeType *getEntryNode(Inverse<User*> G) {
+    return G.Graph;
+  }
+
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    if (User *U = dyn_cast<User>(N))
+      return U->op_begin();
+    return NULL;
+  }
+
+  static inline ChildIteratorType child_end(NodeType *N) {
+    if (User *U = dyn_cast<User>(N))
+      return U->op_end();
+    return NULL;
+  }
+};
+
+}
+#endif
diff --git a/final/include/llvm/Support/DataTypes.h.cmake b/final/include/llvm/Support/DataTypes.h.cmake
new file mode 100644
index 00000000000..72c451873c0
--- /dev/null
+++ b/final/include/llvm/Support/DataTypes.h.cmake
@@ -0,0 +1,189 @@
+/*===-- include/Support/DataTypes.h - Define fixed size types -----*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file contains definitions to figure out the size of _HOST_ data types.*|
+|* This file is important because different host OS's define different macros,*|
+|* which makes portability tough.  This file exports the following            *|
+|* definitions:                                                               *|
+|*                                                                            *|
+|*   [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types*|
+|*   [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values.     *|
+|*                                                                            *|
+|* No library is required when using these functinons.                        *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*/
+
+/* Please leave this file C-compatible. */
+
+#ifndef SUPPORT_DATATYPES_H
+#define SUPPORT_DATATYPES_H
+
+#cmakedefine HAVE_SYS_TYPES_H ${HAVE_SYS_TYPES_H}
+#cmakedefine HAVE_INTTYPES_H ${HAVE_INTTYPES_H}
+#cmakedefine HAVE_STDINT_H ${HAVE_STDINT_H}
+#cmakedefine HAVE_UINT64_T ${HAVE_UINT64_T}
+#cmakedefine HAVE_U_INT64_T ${HAVE_U_INT64_T}
+
+#ifdef __cplusplus
+#include <cmath>
+#else
+#include <math.h>
+#endif
+
+#ifndef _MSC_VER
+
+/* Note that this header's correct operation depends on __STDC_LIMIT_MACROS
+   being defined.  We would define it here, but in order to prevent Bad Things
+   happening when system headers or C++ STL headers include stdint.h before we
+   define it here, we define it on the g++ command line (in Makefile.rules). */
+#if !defined(__STDC_LIMIT_MACROS)
+# error "Must #define __STDC_LIMIT_MACROS before #including Support/DataTypes.h"
+#endif
+
+#if !defined(__STDC_CONSTANT_MACROS)
+# error "Must #define __STDC_CONSTANT_MACROS before " \
+        "#including Support/DataTypes.h"
+#endif
+
+/* Note that <inttypes.h> includes <stdint.h>, if this is a C99 system. */
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+
+#ifdef _AIX
+#include "llvm/Support/AIXDataTypesFix.h"
+#endif
+
+/* Handle incorrect definition of uint64_t as u_int64_t */
+#ifndef HAVE_UINT64_T
+#ifdef HAVE_U_INT64_T
+typedef u_int64_t uint64_t;
+#else
+# error "Don't have a definition for uint64_t on this platform"
+#endif
+#endif
+
+#ifdef _OpenBSD_
+#define INT8_MAX 127
+#define INT8_MIN -128
+#define UINT8_MAX 255
+#define INT16_MAX 32767
+#define INT16_MIN -32768
+#define UINT16_MAX 65535
+#define INT32_MAX 2147483647
+#define INT32_MIN -2147483648
+#define UINT32_MAX 4294967295U
+#endif
+
+#else /* _MSC_VER */
+/* Visual C++ doesn't provide standard integer headers, but it does provide
+   built-in data types. */
+#include <stdlib.h>
+#include <stddef.h>
+#include <sys/types.h>
+#ifdef __cplusplus
+#include <cmath>
+#else
+#include <math.h>
+#endif
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+typedef signed int int32_t;
+typedef unsigned int uint32_t;
+typedef short int16_t;
+typedef unsigned short uint16_t;
+typedef signed char int8_t;
+typedef unsigned char uint8_t;
+typedef signed int ssize_t;
+#ifndef INT8_MAX
+# define INT8_MAX 127
+#endif
+#ifndef INT8_MIN
+# define INT8_MIN -128
+#endif
+#ifndef UINT8_MAX
+# define UINT8_MAX 255
+#endif
+#ifndef INT16_MAX
+# define INT16_MAX 32767
+#endif
+#ifndef INT16_MIN
+# define INT16_MIN -32768
+#endif
+#ifndef UINT16_MAX
+# define UINT16_MAX 65535
+#endif
+#ifndef INT32_MAX
+# define INT32_MAX 2147483647
+#endif
+#ifndef INT32_MIN
+# define INT32_MIN -2147483648
+#endif
+#ifndef UINT32_MAX
+# define UINT32_MAX 4294967295U
+#endif
+/* Certain compatibility updates to VC++ introduce the `cstdint'
+ * header, which defines the INT*_C macros. On default installs they
+ * are absent. */
+#ifndef INT8_C
+# define INT8_C(C)   C##i8
+#endif
+#ifndef UINT8_C
+# define UINT8_C(C)  C##ui8
+#endif
+#ifndef INT16_C
+# define INT16_C(C)  C##i16
+#endif
+#ifndef UINT16_C
+# define UINT16_C(C) C##ui16
+#endif
+#ifndef INT32_C
+# define INT32_C(C)  C##i32
+#endif
+#ifndef UINT32_C
+# define UINT32_C(C) C##ui32
+#endif
+#ifndef INT64_C
+# define INT64_C(C)  C##i64
+#endif
+#ifndef UINT64_C
+# define UINT64_C(C) C##ui64
+#endif
+#endif /* _MSC_VER */
+
+/* Set defaults for constants which we cannot find. */
+#if !defined(INT64_MAX)
+# define INT64_MAX 9223372036854775807LL
+#endif
+#if !defined(INT64_MIN)
+# define INT64_MIN ((-INT64_MAX)-1)
+#endif
+#if !defined(UINT64_MAX)
+# define UINT64_MAX 0xffffffffffffffffULL
+#endif
+
+#if __GNUC__ > 3
+#define END_WITH_NULL __attribute__((sentinel))
+#else
+#define END_WITH_NULL
+#endif
+
+#ifndef HUGE_VALF
+#define HUGE_VALF (float)HUGE_VAL
+#endif
+
+#endif  /* SUPPORT_DATATYPES_H */
diff --git a/final/include/llvm/Support/DataTypes.h.in b/final/include/llvm/Support/DataTypes.h.in
new file mode 100644
index 00000000000..5965e8c0b2a
--- /dev/null
+++ b/final/include/llvm/Support/DataTypes.h.in
@@ -0,0 +1,111 @@
+/*===-- include/System/DataTypes.h - Define fixed size types -----*- C -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file contains definitions to figure out the size of _HOST_ data types.*|
+|* This file is important because different host OS's define different macros,*|
+|* which makes portability tough.  This file exports the following            *|
+|* definitions:                                                               *|
+|*                                                                            *|
+|*   [u]int(32|64)_t : typedefs for signed and unsigned 32/64 bit system types*|
+|*   [U]INT(8|16|32|64)_(MIN|MAX) : Constants for the min and max values.     *|
+|*                                                                            *|
+|* No library is required when using these functions.                         *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*/
+
+/* Please leave this file C-compatible. */
+
+#ifndef SUPPORT_DATATYPES_H
+#define SUPPORT_DATATYPES_H
+
+#undef HAVE_SYS_TYPES_H
+#undef HAVE_INTTYPES_H
+#undef HAVE_STDINT_H
+#undef HAVE_UINT64_T
+#undef HAVE_U_INT64_T
+
+#ifdef __cplusplus
+#include <cmath>
+#else
+#include <math.h>
+#endif
+
+/* Note that this header's correct operation depends on __STDC_LIMIT_MACROS
+   being defined.  We would define it here, but in order to prevent Bad Things
+   happening when system headers or C++ STL headers include stdint.h before we
+   define it here, we define it on the g++ command line (in Makefile.rules). */
+#if !defined(__STDC_LIMIT_MACROS)
+# error "Must #define __STDC_LIMIT_MACROS before #including System/DataTypes.h"
+#endif
+
+#if !defined(__STDC_CONSTANT_MACROS)
+# error "Must #define __STDC_CONSTANT_MACROS before " \
+        "#including System/DataTypes.h"
+#endif
+
+/* Note that <inttypes.h> includes <stdint.h>, if this is a C99 system. */
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+
+#ifdef HAVE_STDINT_H
+#include <stdint.h>
+#endif
+
+#ifdef _AIX
+#include "llvm/Support/AIXDataTypesFix.h"
+#endif
+
+/* Handle incorrect definition of uint64_t as u_int64_t */
+#ifndef HAVE_UINT64_T
+#ifdef HAVE_U_INT64_T
+typedef u_int64_t uint64_t;
+#else
+# error "Don't have a definition for uint64_t on this platform"
+#endif
+#endif
+
+#ifdef _OpenBSD_
+#define INT8_MAX 127
+#define INT8_MIN -128
+#define UINT8_MAX 255
+#define INT16_MAX 32767
+#define INT16_MIN -32768
+#define UINT16_MAX 65535
+#define INT32_MAX 2147483647
+#define INT32_MIN -2147483648
+#define UINT32_MAX 4294967295U
+#endif
+
+/* Set defaults for constants which we cannot find. */
+#if !defined(INT64_MAX)
+# define INT64_MAX 9223372036854775807LL
+#endif
+#if !defined(INT64_MIN)
+# define INT64_MIN ((-INT64_MAX)-1)
+#endif
+#if !defined(UINT64_MAX)
+# define UINT64_MAX 0xffffffffffffffffULL
+#endif
+
+#if __GNUC__ > 3
+#define END_WITH_NULL __attribute__((sentinel))
+#else
+#define END_WITH_NULL
+#endif
+
+#ifndef HUGE_VALF
+#define HUGE_VALF (float)HUGE_VAL
+#endif
+
+#endif  /* SUPPORT_DATATYPES_H */
diff --git a/final/include/llvm/Support/Debug.h b/final/include/llvm/Support/Debug.h
new file mode 100644
index 00000000000..8651fc1abea
--- /dev/null
+++ b/final/include/llvm/Support/Debug.h
@@ -0,0 +1,101 @@
+//===- llvm/Support/Debug.h - Easy way to add debug output ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a handy way of adding debugging information to your
+// code, without it being enabled all of the time, and without having to add
+// command line options to enable it.
+//
+// In particular, just wrap your code with the DEBUG() macro, and it will be
+// enabled automatically if you specify '-debug' on the command-line.
+// Alternatively, you can also use the SET_DEBUG_TYPE("foo") macro to specify
+// that your debug code belongs to class "foo".  Then, on the command line, you
+// can specify '-debug-only=foo' to enable JUST the debug information for the
+// foo class.
+//
+// When compiling without assertions, the -debug-* options and all code in
+// DEBUG() statements disappears, so it does not effect the runtime of the code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_DEBUG_H
+#define LLVM_SUPPORT_DEBUG_H
+
+namespace llvm {
+
+class raw_ostream;
+
+/// DEBUG_TYPE macro - Files can specify a DEBUG_TYPE as a string, which causes
+/// all of their DEBUG statements to be activatable with -debug-only=thatstring.
+#ifndef DEBUG_TYPE
+#define DEBUG_TYPE ""
+#endif
+  
+#ifndef NDEBUG
+/// DebugFlag - This boolean is set to true if the '-debug' command line option
+/// is specified.  This should probably not be referenced directly, instead, use
+/// the DEBUG macro below.
+///
+extern bool DebugFlag;
+  
+/// isCurrentDebugType - Return true if the specified string is the debug type
+/// specified on the command line, or if none was specified on the command line
+/// with the -debug-only=X option.
+///
+bool isCurrentDebugType(const char *Type);
+
+/// SetCurrentDebugType - Set the current debug type, as if the -debug-only=X
+/// option were specified.  Note that DebugFlag also needs to be set to true for
+/// debug output to be produced.
+///
+void SetCurrentDebugType(const char *Type);
+  
+/// DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug
+/// information.  In the '-debug' option is specified on the commandline, and if
+/// this is a debug build, then the code specified as the option to the macro
+/// will be executed.  Otherwise it will not be.  Example:
+///
+/// DEBUG_WITH_TYPE("bitset", dbgs() << "Bitset contains: " << Bitset << "\n");
+///
+/// This will emit the debug information if -debug is present, and -debug-only
+/// is not specified, or is specified as "bitset".
+#define DEBUG_WITH_TYPE(TYPE, X)                                        \
+  do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType(TYPE)) { X; } \
+  } while (0)
+
+#else
+#define isCurrentDebugType(X) (false)
+#define SetCurrentDebugType(X)
+#define DEBUG_WITH_TYPE(TYPE, X) do { } while (0)
+#endif
+
+/// EnableDebugBuffering - This defaults to false.  If true, the debug
+/// stream will install signal handlers to dump any buffered debug
+/// output.  It allows clients to selectively allow the debug stream
+/// to install signal handlers if they are certain there will be no
+/// conflict.
+///
+extern bool EnableDebugBuffering;
+
+/// dbgs() - This returns a reference to a raw_ostream for debugging
+/// messages.  If debugging is disabled it returns errs().  Use it
+/// like: dbgs() << "foo" << "bar";
+raw_ostream &dbgs();
+
+// DEBUG macro - This macro should be used by passes to emit debug information.
+// In the '-debug' option is specified on the commandline, and if this is a
+// debug build, then the code specified as the option to the macro will be
+// executed.  Otherwise it will not be.  Example:
+//
+// DEBUG(dbgs() << "Bitset contains: " << Bitset << "\n");
+//
+#define DEBUG(X) DEBUG_WITH_TYPE(DEBUG_TYPE, X)
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/DebugLoc.h b/final/include/llvm/Support/DebugLoc.h
new file mode 100644
index 00000000000..ccc34461291
--- /dev/null
+++ b/final/include/llvm/Support/DebugLoc.h
@@ -0,0 +1,80 @@
+//===---- llvm/Support/DebugLoc.h - Debug Location Information --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a number of light weight data structures used
+// to describe and track debug location information.
+// 
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_DEBUGLOC_H
+#define LLVM_SUPPORT_DEBUGLOC_H
+
+namespace llvm {
+  class MDNode;
+  class LLVMContext;
+  
+  /// DebugLoc - Debug location id.  This is carried by Instruction, SDNode,
+  /// and MachineInstr to compactly encode file/line/scope information for an
+  /// operation.
+  class DebugLoc {
+    /// LineCol - This 32-bit value encodes the line and column number for the
+    /// location, encoded as 24-bits for line and 8 bits for col.  A value of 0
+    /// for either means unknown.
+    unsigned LineCol;
+    
+    /// ScopeIdx - This is an opaque ID# for Scope/InlinedAt information,
+    /// decoded by LLVMContext.  0 is unknown.
+    int ScopeIdx;
+  public:
+    DebugLoc() : LineCol(0), ScopeIdx(0) {}  // Defaults to unknown.
+    
+    /// get - Get a new DebugLoc that corresponds to the specified line/col
+    /// scope/inline location.
+    static DebugLoc get(unsigned Line, unsigned Col,
+                        MDNode *Scope, MDNode *InlinedAt = 0);
+    
+    /// getFromDILocation - Translate the DILocation quad into a DebugLoc.
+    static DebugLoc getFromDILocation(MDNode *N);
+    
+    /// isUnknown - Return true if this is an unknown location.
+    bool isUnknown() const { return ScopeIdx == 0; }
+    
+    unsigned getLine() const {
+      return (LineCol << 8) >> 8;  // Mask out column.
+    }
+    
+    unsigned getCol() const {
+      return LineCol >> 24;
+    }
+    
+    /// getScope - This returns the scope pointer for this DebugLoc, or null if
+    /// invalid.
+    MDNode *getScope(const LLVMContext &Ctx) const;
+    
+    /// getInlinedAt - This returns the InlinedAt pointer for this DebugLoc, or
+    /// null if invalid or not present.
+    MDNode *getInlinedAt(const LLVMContext &Ctx) const;
+    
+    /// getScopeAndInlinedAt - Return both the Scope and the InlinedAt values.
+    void getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA,
+                              const LLVMContext &Ctx) const;
+    
+    
+    /// getAsMDNode - This method converts the compressed DebugLoc node into a
+    /// DILocation compatible MDNode.
+    MDNode *getAsMDNode(const LLVMContext &Ctx) const;
+    
+    bool operator==(const DebugLoc &DL) const {
+      return LineCol == DL.LineCol && ScopeIdx == DL.ScopeIdx;
+    }
+    bool operator!=(const DebugLoc &DL) const { return !(*this == DL); }
+  };
+} // end namespace llvm
+
+#endif /* LLVM_DEBUGLOC_H */
diff --git a/final/include/llvm/Support/Disassembler.h b/final/include/llvm/Support/Disassembler.h
new file mode 100644
index 00000000000..6d1cc0fdcb5
--- /dev/null
+++ b/final/include/llvm/Support/Disassembler.h
@@ -0,0 +1,35 @@
+//===- llvm/Support/Disassembler.h ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the necessary glue to call external disassembler
+// libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_DISASSEMBLER_H
+#define LLVM_SYSTEM_DISASSEMBLER_H
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+namespace llvm {
+namespace sys {
+
+/// This function returns true, if there is possible to use some external
+/// disassembler library. False otherwise.
+bool hasDisassembler();
+
+/// This function provides some "glue" code to call external disassembler
+/// libraries.
+std::string disassembleBuffer(uint8_t* start, size_t length, uint64_t pc = 0);
+
+}
+}
+
+#endif // LLVM_SYSTEM_DISASSEMBLER_H
diff --git a/final/include/llvm/Support/Dwarf.h b/final/include/llvm/Support/Dwarf.h
new file mode 100644
index 00000000000..5d0b5a943d5
--- /dev/null
+++ b/final/include/llvm/Support/Dwarf.h
@@ -0,0 +1,677 @@
+//===-- llvm/Support/Dwarf.h ---Dwarf Constants------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains constants used for implementing Dwarf debug support.  For
+// Details on the Dwarf 3 specfication see DWARF Debugging Information Format
+// V.3 reference manual http://dwarf.freestandards.org ,
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_DWARF_H
+#define LLVM_SUPPORT_DWARF_H
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// Debug info constants.
+
+enum {
+  LLVMDebugVersion = (9 << 16),         // Current version of debug information.
+  LLVMDebugVersion8 = (8 << 16),         // Cconstant for version 8.
+  LLVMDebugVersion7 = (7 << 16),        // Constant for version 7.
+  LLVMDebugVersion6 = (6 << 16),        // Constant for version 6.
+  LLVMDebugVersion5 = (5 << 16),        // Constant for version 5.
+  LLVMDebugVersion4 = (4 << 16),        // Constant for version 4.
+  LLVMDebugVersionMask = 0xffff0000     // Mask for version number.
+};
+
+namespace dwarf {
+
+//===----------------------------------------------------------------------===//
+// Dwarf constants as gleaned from the DWARF Debugging Information Format V.3
+// reference manual http://dwarf.freestandards.org .
+//
+
+// Do not mix the following two enumerations sets.  DW_TAG_invalid changes the
+// enumeration base type.
+
+enum llvm_dwarf_constants {
+  // llvm mock tags
+  DW_TAG_invalid = ~0U,                 // Tag for invalid results.
+
+  DW_TAG_auto_variable = 0x100,         // Tag for local (auto) variables.
+  DW_TAG_arg_variable = 0x101,          // Tag for argument variables.
+  DW_TAG_return_variable = 0x102,       // Tag for return variables.
+  DW_TAG_vector_type = 0x103,           // Tag for vector types.
+
+  DW_TAG_user_base = 0x1000,            // Recommended base for user tags.
+
+  DW_CIE_VERSION = 1,                   // Common frame information version.
+  DW_CIE_ID       = 0xffffffff          // Common frame information mark.
+};
+
+enum dwarf_constants {
+  DWARF_VERSION = 2,
+
+  // Tags
+  DW_TAG_array_type = 0x01,
+  DW_TAG_class_type = 0x02,
+  DW_TAG_entry_point = 0x03,
+  DW_TAG_enumeration_type = 0x04,
+  DW_TAG_formal_parameter = 0x05,
+  DW_TAG_imported_declaration = 0x08,
+  DW_TAG_label = 0x0a,
+  DW_TAG_lexical_block = 0x0b,
+  DW_TAG_member = 0x0d,
+  DW_TAG_pointer_type = 0x0f,
+  DW_TAG_reference_type = 0x10,
+  DW_TAG_compile_unit = 0x11,
+  DW_TAG_string_type = 0x12,
+  DW_TAG_structure_type = 0x13,
+  DW_TAG_subroutine_type = 0x15,
+  DW_TAG_typedef = 0x16,
+  DW_TAG_union_type = 0x17,
+  DW_TAG_unspecified_parameters = 0x18,
+  DW_TAG_variant = 0x19,
+  DW_TAG_common_block = 0x1a,
+  DW_TAG_common_inclusion = 0x1b,
+  DW_TAG_inheritance = 0x1c,
+  DW_TAG_inlined_subroutine = 0x1d,
+  DW_TAG_module = 0x1e,
+  DW_TAG_ptr_to_member_type = 0x1f,
+  DW_TAG_set_type = 0x20,
+  DW_TAG_subrange_type = 0x21,
+  DW_TAG_with_stmt = 0x22,
+  DW_TAG_access_declaration = 0x23,
+  DW_TAG_base_type = 0x24,
+  DW_TAG_catch_block = 0x25,
+  DW_TAG_const_type = 0x26,
+  DW_TAG_constant = 0x27,
+  DW_TAG_enumerator = 0x28,
+  DW_TAG_file_type = 0x29,
+  DW_TAG_friend = 0x2a,
+  DW_TAG_namelist = 0x2b,
+  DW_TAG_namelist_item = 0x2c,
+  DW_TAG_packed_type = 0x2d,
+  DW_TAG_subprogram = 0x2e,
+  DW_TAG_template_type_parameter = 0x2f,
+  DW_TAG_template_value_parameter = 0x30,
+  DW_TAG_thrown_type = 0x31,
+  DW_TAG_try_block = 0x32,
+  DW_TAG_variant_part = 0x33,
+  DW_TAG_variable = 0x34,
+  DW_TAG_volatile_type = 0x35,
+  DW_TAG_dwarf_procedure = 0x36,
+  DW_TAG_restrict_type = 0x37,
+  DW_TAG_interface_type = 0x38,
+  DW_TAG_namespace = 0x39,
+  DW_TAG_imported_module = 0x3a,
+  DW_TAG_unspecified_type = 0x3b,
+  DW_TAG_partial_unit = 0x3c,
+  DW_TAG_imported_unit = 0x3d,
+  DW_TAG_condition = 0x3f,
+  DW_TAG_shared_type = 0x40,
+  DW_TAG_rvalue_reference_type = 0x41,
+  DW_TAG_lo_user = 0x4080,
+  DW_TAG_hi_user = 0xffff,
+
+  // Children flag
+  DW_CHILDREN_no = 0x00,
+  DW_CHILDREN_yes = 0x01,
+
+  // Attributes
+  DW_AT_sibling = 0x01,
+  DW_AT_location = 0x02,
+  DW_AT_name = 0x03,
+  DW_AT_ordering = 0x09,
+  DW_AT_byte_size = 0x0b,
+  DW_AT_bit_offset = 0x0c,
+  DW_AT_bit_size = 0x0d,
+  DW_AT_stmt_list = 0x10,
+  DW_AT_low_pc = 0x11,
+  DW_AT_high_pc = 0x12,
+  DW_AT_language = 0x13,
+  DW_AT_discr = 0x15,
+  DW_AT_discr_value = 0x16,
+  DW_AT_visibility = 0x17,
+  DW_AT_import = 0x18,
+  DW_AT_string_length = 0x19,
+  DW_AT_common_reference = 0x1a,
+  DW_AT_comp_dir = 0x1b,
+  DW_AT_const_value = 0x1c,
+  DW_AT_containing_type = 0x1d,
+  DW_AT_default_value = 0x1e,
+  DW_AT_inline = 0x20,
+  DW_AT_is_optional = 0x21,
+  DW_AT_lower_bound = 0x22,
+  DW_AT_producer = 0x25,
+  DW_AT_prototyped = 0x27,
+  DW_AT_return_addr = 0x2a,
+  DW_AT_start_scope = 0x2c,
+  DW_AT_bit_stride = 0x2e,
+  DW_AT_upper_bound = 0x2f,
+  DW_AT_abstract_origin = 0x31,
+  DW_AT_accessibility = 0x32,
+  DW_AT_address_class = 0x33,
+  DW_AT_artificial = 0x34,
+  DW_AT_base_types = 0x35,
+  DW_AT_calling_convention = 0x36,
+  DW_AT_count = 0x37,
+  DW_AT_data_member_location = 0x38,
+  DW_AT_decl_column = 0x39,
+  DW_AT_decl_file = 0x3a,
+  DW_AT_decl_line = 0x3b,
+  DW_AT_declaration = 0x3c,
+  DW_AT_discr_list = 0x3d,
+  DW_AT_encoding = 0x3e,
+  DW_AT_external = 0x3f,
+  DW_AT_frame_base = 0x40,
+  DW_AT_friend = 0x41,
+  DW_AT_identifier_case = 0x42,
+  DW_AT_macro_info = 0x43,
+  DW_AT_namelist_item = 0x44,
+  DW_AT_priority = 0x45,
+  DW_AT_segment = 0x46,
+  DW_AT_specification = 0x47,
+  DW_AT_static_link = 0x48,
+  DW_AT_type = 0x49,
+  DW_AT_use_location = 0x4a,
+  DW_AT_variable_parameter = 0x4b,
+  DW_AT_virtuality = 0x4c,
+  DW_AT_vtable_elem_location = 0x4d,
+  DW_AT_allocated = 0x4e,
+  DW_AT_associated = 0x4f,
+  DW_AT_data_location = 0x50,
+  DW_AT_byte_stride = 0x51,
+  DW_AT_entry_pc = 0x52,
+  DW_AT_use_UTF8 = 0x53,
+  DW_AT_extension = 0x54,
+  DW_AT_ranges = 0x55,
+  DW_AT_trampoline = 0x56,
+  DW_AT_call_column = 0x57,
+  DW_AT_call_file = 0x58,
+  DW_AT_call_line = 0x59,
+  DW_AT_description = 0x5a,
+  DW_AT_binary_scale = 0x5b,
+  DW_AT_decimal_scale = 0x5c,
+  DW_AT_small = 0x5d,
+  DW_AT_decimal_sign = 0x5e,
+  DW_AT_digit_count = 0x5f,
+  DW_AT_picture_string = 0x60,
+  DW_AT_mutable = 0x61,
+  DW_AT_threads_scaled = 0x62,
+  DW_AT_explicit = 0x63,
+  DW_AT_object_pointer = 0x64,
+  DW_AT_endianity = 0x65,
+  DW_AT_elemental = 0x66,
+  DW_AT_pure = 0x67,
+  DW_AT_recursive = 0x68,
+  DW_AT_MIPS_linkage_name = 0x2007,
+  DW_AT_sf_names   = 0x2101,
+  DW_AT_src_info = 0x2102,
+  DW_AT_mac_info = 0x2103,
+  DW_AT_src_coords = 0x2104,
+  DW_AT_body_begin = 0x2105,
+  DW_AT_body_end = 0x2106,
+  DW_AT_GNU_vector = 0x2107,
+  DW_AT_lo_user = 0x2000,
+  DW_AT_hi_user = 0x3fff,
+
+  // Apple extensions.
+  DW_AT_APPLE_optimized = 0x3fe1,
+  DW_AT_APPLE_flags = 0x3fe2,
+  DW_AT_APPLE_isa = 0x3fe3,
+  DW_AT_APPLE_block = 0x3fe4,
+  DW_AT_APPLE_major_runtime_vers = 0x3fe5,
+  DW_AT_APPLE_runtime_class = 0x3fe6,
+  DW_AT_APPLE_omit_frame_ptr = 0x3fe7,
+
+  // Attribute form encodings
+  DW_FORM_addr = 0x01,
+  DW_FORM_block2 = 0x03,
+  DW_FORM_block4 = 0x04,
+  DW_FORM_data2 = 0x05,
+  DW_FORM_data4 = 0x06,
+  DW_FORM_data8 = 0x07,
+  DW_FORM_string = 0x08,
+  DW_FORM_block = 0x09,
+  DW_FORM_block1 = 0x0a,
+  DW_FORM_data1 = 0x0b,
+  DW_FORM_flag = 0x0c,
+  DW_FORM_sdata = 0x0d,
+  DW_FORM_strp = 0x0e,
+  DW_FORM_udata = 0x0f,
+  DW_FORM_ref_addr = 0x10,
+  DW_FORM_ref1 = 0x11,
+  DW_FORM_ref2 = 0x12,
+  DW_FORM_ref4 = 0x13,
+  DW_FORM_ref8 = 0x14,
+  DW_FORM_ref_udata = 0x15,
+  DW_FORM_indirect = 0x16,
+
+  // Operation encodings
+  DW_OP_addr = 0x03,
+  DW_OP_deref = 0x06,
+  DW_OP_const1u = 0x08,
+  DW_OP_const1s = 0x09,
+  DW_OP_const2u = 0x0a,
+  DW_OP_const2s = 0x0b,
+  DW_OP_const4u = 0x0c,
+  DW_OP_const4s = 0x0d,
+  DW_OP_const8u = 0x0e,
+  DW_OP_const8s = 0x0f,
+  DW_OP_constu = 0x10,
+  DW_OP_consts = 0x11,
+  DW_OP_dup = 0x12,
+  DW_OP_drop = 0x13,
+  DW_OP_over = 0x14,
+  DW_OP_pick = 0x15,
+  DW_OP_swap = 0x16,
+  DW_OP_rot = 0x17,
+  DW_OP_xderef = 0x18,
+  DW_OP_abs = 0x19,
+  DW_OP_and = 0x1a,
+  DW_OP_div = 0x1b,
+  DW_OP_minus = 0x1c,
+  DW_OP_mod = 0x1d,
+  DW_OP_mul = 0x1e,
+  DW_OP_neg = 0x1f,
+  DW_OP_not = 0x20,
+  DW_OP_or = 0x21,
+  DW_OP_plus = 0x22,
+  DW_OP_plus_uconst = 0x23,
+  DW_OP_shl = 0x24,
+  DW_OP_shr = 0x25,
+  DW_OP_shra = 0x26,
+  DW_OP_xor = 0x27,
+  DW_OP_skip = 0x2f,
+  DW_OP_bra = 0x28,
+  DW_OP_eq = 0x29,
+  DW_OP_ge = 0x2a,
+  DW_OP_gt = 0x2b,
+  DW_OP_le = 0x2c,
+  DW_OP_lt = 0x2d,
+  DW_OP_ne = 0x2e,
+  DW_OP_lit0 = 0x30,
+  DW_OP_lit1 = 0x31,
+  DW_OP_lit2 = 0x32,
+  DW_OP_lit3 = 0x33,
+  DW_OP_lit4 = 0x34,
+  DW_OP_lit5 = 0x35,
+  DW_OP_lit6 = 0x36,
+  DW_OP_lit7 = 0x37,
+  DW_OP_lit8 = 0x38,
+  DW_OP_lit9 = 0x39,
+  DW_OP_lit10 = 0x3a,
+  DW_OP_lit11 = 0x3b,
+  DW_OP_lit12 = 0x3c,
+  DW_OP_lit13 = 0x3d,
+  DW_OP_lit14 = 0x3e,
+  DW_OP_lit15 = 0x3f,
+  DW_OP_lit16 = 0x40,
+  DW_OP_lit17 = 0x41,
+  DW_OP_lit18 = 0x42,
+  DW_OP_lit19 = 0x43,
+  DW_OP_lit20 = 0x44,
+  DW_OP_lit21 = 0x45,
+  DW_OP_lit22 = 0x46,
+  DW_OP_lit23 = 0x47,
+  DW_OP_lit24 = 0x48,
+  DW_OP_lit25 = 0x49,
+  DW_OP_lit26 = 0x4a,
+  DW_OP_lit27 = 0x4b,
+  DW_OP_lit28 = 0x4c,
+  DW_OP_lit29 = 0x4d,
+  DW_OP_lit30 = 0x4e,
+  DW_OP_lit31 = 0x4f,
+  DW_OP_reg0 = 0x50,
+  DW_OP_reg1 = 0x51,
+  DW_OP_reg2 = 0x52,
+  DW_OP_reg3 = 0x53,
+  DW_OP_reg4 = 0x54,
+  DW_OP_reg5 = 0x55,
+  DW_OP_reg6 = 0x56,
+  DW_OP_reg7 = 0x57,
+  DW_OP_reg8 = 0x58,
+  DW_OP_reg9 = 0x59,
+  DW_OP_reg10 = 0x5a,
+  DW_OP_reg11 = 0x5b,
+  DW_OP_reg12 = 0x5c,
+  DW_OP_reg13 = 0x5d,
+  DW_OP_reg14 = 0x5e,
+  DW_OP_reg15 = 0x5f,
+  DW_OP_reg16 = 0x60,
+  DW_OP_reg17 = 0x61,
+  DW_OP_reg18 = 0x62,
+  DW_OP_reg19 = 0x63,
+  DW_OP_reg20 = 0x64,
+  DW_OP_reg21 = 0x65,
+  DW_OP_reg22 = 0x66,
+  DW_OP_reg23 = 0x67,
+  DW_OP_reg24 = 0x68,
+  DW_OP_reg25 = 0x69,
+  DW_OP_reg26 = 0x6a,
+  DW_OP_reg27 = 0x6b,
+  DW_OP_reg28 = 0x6c,
+  DW_OP_reg29 = 0x6d,
+  DW_OP_reg30 = 0x6e,
+  DW_OP_reg31 = 0x6f,
+  DW_OP_breg0 = 0x70,
+  DW_OP_breg1 = 0x71,
+  DW_OP_breg2 = 0x72,
+  DW_OP_breg3 = 0x73,
+  DW_OP_breg4 = 0x74,
+  DW_OP_breg5 = 0x75,
+  DW_OP_breg6 = 0x76,
+  DW_OP_breg7 = 0x77,
+  DW_OP_breg8 = 0x78,
+  DW_OP_breg9 = 0x79,
+  DW_OP_breg10 = 0x7a,
+  DW_OP_breg11 = 0x7b,
+  DW_OP_breg12 = 0x7c,
+  DW_OP_breg13 = 0x7d,
+  DW_OP_breg14 = 0x7e,
+  DW_OP_breg15 = 0x7f,
+  DW_OP_breg16 = 0x80,
+  DW_OP_breg17 = 0x81,
+  DW_OP_breg18 = 0x82,
+  DW_OP_breg19 = 0x83,
+  DW_OP_breg20 = 0x84,
+  DW_OP_breg21 = 0x85,
+  DW_OP_breg22 = 0x86,
+  DW_OP_breg23 = 0x87,
+  DW_OP_breg24 = 0x88,
+  DW_OP_breg25 = 0x89,
+  DW_OP_breg26 = 0x8a,
+  DW_OP_breg27 = 0x8b,
+  DW_OP_breg28 = 0x8c,
+  DW_OP_breg29 = 0x8d,
+  DW_OP_breg30 = 0x8e,
+  DW_OP_breg31 = 0x8f,
+  DW_OP_regx = 0x90,
+  DW_OP_fbreg = 0x91,
+  DW_OP_bregx = 0x92,
+  DW_OP_piece = 0x93,
+  DW_OP_deref_size = 0x94,
+  DW_OP_xderef_size = 0x95,
+  DW_OP_nop = 0x96,
+  DW_OP_push_object_address = 0x97,
+  DW_OP_call2 = 0x98,
+  DW_OP_call4 = 0x99,
+  DW_OP_call_ref = 0x9a,
+  DW_OP_form_tls_address = 0x9b,
+  DW_OP_call_frame_cfa = 0x9c,
+  DW_OP_lo_user = 0xe0,
+  DW_OP_hi_user = 0xff,
+
+  // Encoding attribute values
+  DW_ATE_address = 0x01,
+  DW_ATE_boolean = 0x02,
+  DW_ATE_complex_float = 0x03,
+  DW_ATE_float = 0x04,
+  DW_ATE_signed = 0x05,
+  DW_ATE_signed_char = 0x06,
+  DW_ATE_unsigned = 0x07,
+  DW_ATE_unsigned_char = 0x08,
+  DW_ATE_imaginary_float = 0x09,
+  DW_ATE_packed_decimal = 0x0a,
+  DW_ATE_numeric_string = 0x0b,
+  DW_ATE_edited = 0x0c,
+  DW_ATE_signed_fixed = 0x0d,
+  DW_ATE_unsigned_fixed = 0x0e,
+  DW_ATE_decimal_float = 0x0f,
+  DW_ATE_lo_user = 0x80,
+  DW_ATE_hi_user = 0xff,
+
+  // Decimal sign attribute values
+  DW_DS_unsigned = 0x01,
+  DW_DS_leading_overpunch = 0x02,
+  DW_DS_trailing_overpunch = 0x03,
+  DW_DS_leading_separate = 0x04,
+  DW_DS_trailing_separate = 0x05,
+
+  // Endianity attribute values
+  DW_END_default = 0x00,
+  DW_END_big = 0x01,
+  DW_END_little = 0x02,
+  DW_END_lo_user = 0x40,
+  DW_END_hi_user = 0xff,
+
+  // Accessibility codes
+  DW_ACCESS_public = 0x01,
+  DW_ACCESS_protected = 0x02,
+  DW_ACCESS_private = 0x03,
+
+  // Visibility codes
+  DW_VIS_local = 0x01,
+  DW_VIS_exported = 0x02,
+  DW_VIS_qualified = 0x03,
+
+  // Virtuality codes
+  DW_VIRTUALITY_none = 0x00,
+  DW_VIRTUALITY_virtual = 0x01,
+  DW_VIRTUALITY_pure_virtual = 0x02,
+
+  // Language names
+  DW_LANG_C89 = 0x0001,
+  DW_LANG_C = 0x0002,
+  DW_LANG_Ada83 = 0x0003,
+  DW_LANG_C_plus_plus = 0x0004,
+  DW_LANG_Cobol74 = 0x0005,
+  DW_LANG_Cobol85 = 0x0006,
+  DW_LANG_Fortran77 = 0x0007,
+  DW_LANG_Fortran90 = 0x0008,
+  DW_LANG_Pascal83 = 0x0009,
+  DW_LANG_Modula2 = 0x000a,
+  DW_LANG_Java = 0x000b,
+  DW_LANG_C99 = 0x000c,
+  DW_LANG_Ada95 = 0x000d,
+  DW_LANG_Fortran95 = 0x000e,
+  DW_LANG_PLI = 0x000f,
+  DW_LANG_ObjC = 0x0010,
+  DW_LANG_ObjC_plus_plus = 0x0011,
+  DW_LANG_UPC = 0x0012,
+  DW_LANG_D = 0x0013,
+  DW_LANG_lo_user = 0x8000,
+  DW_LANG_hi_user = 0xffff,
+
+  // Identifier case codes
+  DW_ID_case_sensitive = 0x00,
+  DW_ID_up_case = 0x01,
+  DW_ID_down_case = 0x02,
+  DW_ID_case_insensitive = 0x03,
+
+  // Calling convention codes
+  DW_CC_normal = 0x01,
+  DW_CC_program = 0x02,
+  DW_CC_nocall = 0x03,
+  DW_CC_lo_user = 0x40,
+  DW_CC_hi_user = 0xff,
+
+  // Inline codes
+  DW_INL_not_inlined = 0x00,
+  DW_INL_inlined = 0x01,
+  DW_INL_declared_not_inlined = 0x02,
+  DW_INL_declared_inlined = 0x03,
+
+  // Array ordering
+  DW_ORD_row_major = 0x00,
+  DW_ORD_col_major = 0x01,
+
+  // Discriminant descriptor values
+  DW_DSC_label = 0x00,
+  DW_DSC_range = 0x01,
+
+  // Line Number Standard Opcode Encodings
+  DW_LNS_extended_op = 0x00,
+  DW_LNS_copy = 0x01,
+  DW_LNS_advance_pc = 0x02,
+  DW_LNS_advance_line = 0x03,
+  DW_LNS_set_file = 0x04,
+  DW_LNS_set_column = 0x05,
+  DW_LNS_negate_stmt = 0x06,
+  DW_LNS_set_basic_block = 0x07,
+  DW_LNS_const_add_pc = 0x08,
+  DW_LNS_fixed_advance_pc = 0x09,
+  DW_LNS_set_prologue_end = 0x0a,
+  DW_LNS_set_epilogue_begin = 0x0b,
+  DW_LNS_set_isa = 0x0c,
+
+  // Line Number Extended Opcode Encodings
+  DW_LNE_end_sequence = 0x01,
+  DW_LNE_set_address = 0x02,
+  DW_LNE_define_file = 0x03,
+  DW_LNE_lo_user = 0x80,
+  DW_LNE_hi_user = 0xff,
+
+  // Macinfo Type Encodings
+  DW_MACINFO_define = 0x01,
+  DW_MACINFO_undef = 0x02,
+  DW_MACINFO_start_file = 0x03,
+  DW_MACINFO_end_file = 0x04,
+  DW_MACINFO_vendor_ext = 0xff,
+
+  // Call frame instruction encodings
+  DW_CFA_extended = 0x00,
+  DW_CFA_nop = 0x00,
+  DW_CFA_advance_loc = 0x40,
+  DW_CFA_offset = 0x80,
+  DW_CFA_restore = 0xc0,
+  DW_CFA_set_loc = 0x01,
+  DW_CFA_advance_loc1 = 0x02,
+  DW_CFA_advance_loc2 = 0x03,
+  DW_CFA_advance_loc4 = 0x04,
+  DW_CFA_offset_extended = 0x05,
+  DW_CFA_restore_extended = 0x06,
+  DW_CFA_undefined = 0x07,
+  DW_CFA_same_value = 0x08,
+  DW_CFA_register = 0x09,
+  DW_CFA_remember_state = 0x0a,
+  DW_CFA_restore_state = 0x0b,
+  DW_CFA_def_cfa = 0x0c,
+  DW_CFA_def_cfa_register = 0x0d,
+  DW_CFA_def_cfa_offset = 0x0e,
+  DW_CFA_def_cfa_expression = 0x0f,
+  DW_CFA_expression = 0x10,
+  DW_CFA_offset_extended_sf = 0x11,
+  DW_CFA_def_cfa_sf = 0x12,
+  DW_CFA_def_cfa_offset_sf = 0x13,
+  DW_CFA_val_offset = 0x14,
+  DW_CFA_val_offset_sf = 0x15,
+  DW_CFA_val_expression = 0x16,
+  DW_CFA_lo_user = 0x1c,
+  DW_CFA_hi_user = 0x3f,
+
+  DW_EH_PE_absptr = 0x00,
+  DW_EH_PE_omit = 0xff,
+  DW_EH_PE_uleb128 = 0x01,
+  DW_EH_PE_udata2 = 0x02,
+  DW_EH_PE_udata4 = 0x03,
+  DW_EH_PE_udata8 = 0x04,
+  DW_EH_PE_sleb128 = 0x09,
+  DW_EH_PE_sdata2 = 0x0A,
+  DW_EH_PE_sdata4 = 0x0B,
+  DW_EH_PE_sdata8 = 0x0C,
+  DW_EH_PE_signed = 0x08,
+  DW_EH_PE_pcrel = 0x10,
+  DW_EH_PE_textrel = 0x20,
+  DW_EH_PE_datarel = 0x30,
+  DW_EH_PE_funcrel = 0x40,
+  DW_EH_PE_aligned = 0x50,
+  DW_EH_PE_indirect = 0x80
+};
+
+/// TagString - Return the string for the specified tag.
+///
+const char *TagString(unsigned Tag);
+
+/// ChildrenString - Return the string for the specified children flag.
+///
+const char *ChildrenString(unsigned Children);
+
+/// AttributeString - Return the string for the specified attribute.
+///
+const char *AttributeString(unsigned Attribute);
+
+/// FormEncodingString - Return the string for the specified form encoding.
+///
+const char *FormEncodingString(unsigned Encoding);
+
+/// OperationEncodingString - Return the string for the specified operation
+/// encoding.
+const char *OperationEncodingString(unsigned Encoding);
+
+/// AttributeEncodingString - Return the string for the specified attribute
+/// encoding.
+const char *AttributeEncodingString(unsigned Encoding);
+
+/// DecimalSignString - Return the string for the specified decimal sign
+/// attribute.
+const char *DecimalSignString(unsigned Sign);
+
+/// EndianityString - Return the string for the specified endianity.
+///
+const char *EndianityString(unsigned Endian);
+
+/// AccessibilityString - Return the string for the specified accessibility.
+///
+const char *AccessibilityString(unsigned Access);
+
+/// VisibilityString - Return the string for the specified visibility.
+///
+const char *VisibilityString(unsigned Visibility);
+
+/// VirtualityString - Return the string for the specified virtuality.
+///
+const char *VirtualityString(unsigned Virtuality);
+
+/// LanguageString - Return the string for the specified language.
+///
+const char *LanguageString(unsigned Language);
+
+/// CaseString - Return the string for the specified identifier case.
+///
+const char *CaseString(unsigned Case);
+
+/// ConventionString - Return the string for the specified calling convention.
+///
+const char *ConventionString(unsigned Convention);
+
+/// InlineCodeString - Return the string for the specified inline code.
+///
+const char *InlineCodeString(unsigned Code);
+
+/// ArrayOrderString - Return the string for the specified array order.
+///
+const char *ArrayOrderString(unsigned Order);
+
+/// DiscriminantString - Return the string for the specified discriminant
+/// descriptor.
+const char *DiscriminantString(unsigned Discriminant);
+
+/// LNStandardString - Return the string for the specified line number standard.
+///
+const char *LNStandardString(unsigned Standard);
+
+/// LNExtendedString - Return the string for the specified line number extended
+/// opcode encodings.
+const char *LNExtendedString(unsigned Encoding);
+
+/// MacinfoString - Return the string for the specified macinfo type encodings.
+///
+const char *MacinfoString(unsigned Encoding);
+
+/// CallFrameString - Return the string for the specified call frame instruction
+/// encodings.
+const char *CallFrameString(unsigned Encoding);
+} // End of namespace dwarf
+
+} // End of namespace llvm
+
+#endif
diff --git a/final/include/llvm/Support/DynamicLibrary.h b/final/include/llvm/Support/DynamicLibrary.h
new file mode 100644
index 00000000000..e6d9ff57ae8
--- /dev/null
+++ b/final/include/llvm/Support/DynamicLibrary.h
@@ -0,0 +1,86 @@
+//===-- llvm/Support/DynamicLibrary.h - Portable Dynamic Library -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the sys::DynamicLibrary class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_DYNAMIC_LIBRARY_H
+#define LLVM_SYSTEM_DYNAMIC_LIBRARY_H
+
+#include <string>
+
+namespace llvm {
+namespace sys {
+
+  /// This class provides a portable interface to dynamic libraries which also
+  /// might be known as shared libraries, shared objects, dynamic shared
+  /// objects, or dynamic link libraries. Regardless of the terminology or the
+  /// operating system interface, this class provides a portable interface that
+  /// allows dynamic libraries to be loaded and searched for externally
+  /// defined symbols. This is typically used to provide "plug-in" support.
+  /// It also allows for symbols to be defined which don't live in any library,
+  /// but rather the main program itself, useful on Windows where the main
+  /// executable cannot be searched.
+  class DynamicLibrary {
+    DynamicLibrary(); // DO NOT IMPLEMENT
+  public:
+    /// This function allows a library to be loaded without instantiating a
+    /// DynamicLibrary object. Consequently, it is marked as being permanent
+    /// and will only be unloaded when the program terminates.  This returns
+    /// false on success or returns true and fills in *ErrMsg on failure.
+    /// @brief Open a dynamic library permanently.
+    ///
+    /// NOTE: This function is not thread safe.
+    ///
+    static bool LoadLibraryPermanently(const char *filename,
+                                       std::string *ErrMsg = 0);
+
+    /// This function will search through all previously loaded dynamic
+    /// libraries for the symbol \p symbolName. If it is found, the addressof
+    /// that symbol is returned. If not, null is returned. Note that this will
+    /// search permanently loaded libraries (LoadLibraryPermanently) as well
+    /// as ephemerally loaded libraries (constructors).
+    /// @throws std::string on error.
+    /// @brief Search through libraries for address of a symbol
+    ///
+    /// NOTE: This function is not thread safe.
+    ///
+    static void *SearchForAddressOfSymbol(const char *symbolName);
+
+    /// @brief Convenience function for C++ophiles.
+    ///
+    /// NOTE: This function is not thread safe.
+    ///
+    static void *SearchForAddressOfSymbol(const std::string &symbolName) {
+      return SearchForAddressOfSymbol(symbolName.c_str());
+    }
+
+    /// This functions permanently adds the symbol \p symbolName with the
+    /// value \p symbolValue.  These symbols are searched before any
+    /// libraries.
+    /// @brief Add searchable symbol/value pair.
+    ///
+    /// NOTE: This function is not thread safe.
+    ///
+    static void AddSymbol(const char *symbolName, void *symbolValue);
+
+    /// @brief Convenience function for C++ophiles.
+    ///
+    /// NOTE: This function is not thread safe.
+    ///
+    static void AddSymbol(const std::string &symbolName, void *symbolValue) {
+      AddSymbol(symbolName.c_str(), symbolValue);
+    }
+  };
+
+} // End sys namespace
+} // End llvm namespace
+
+#endif // LLVM_SYSTEM_DYNAMIC_LIBRARY_H
diff --git a/final/include/llvm/Support/ELF.h b/final/include/llvm/Support/ELF.h
new file mode 100644
index 00000000000..cc72bd59cb7
--- /dev/null
+++ b/final/include/llvm/Support/ELF.h
@@ -0,0 +1,824 @@
+//===-- llvm/Support/ELF.h - ELF constants and data structures --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header contains common, non-processor-specific data structures and
+// constants for the ELF file format.
+//
+// The details of the ELF32 bits in this file are largely based on the Tool
+// Interface Standard (TIS) Executable and Linking Format (ELF) Specification
+// Version 1.2, May 1995. The ELF64 stuff is based on ELF-64 Object File Format
+// Version 1.5, Draft 2, May 1998 as well as OpenBSD header files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ELF_H
+#define LLVM_SUPPORT_ELF_H
+
+#include "llvm/Support/DataTypes.h"
+#include <cstring>
+
+namespace llvm {
+
+namespace ELF {
+
+typedef uint32_t Elf32_Addr; // Program address
+typedef uint16_t Elf32_Half;
+typedef uint32_t Elf32_Off;  // File offset
+typedef int32_t  Elf32_Sword;
+typedef uint32_t Elf32_Word;
+
+typedef uint64_t Elf64_Addr;
+typedef uint64_t Elf64_Off;
+typedef int32_t  Elf64_Shalf;
+typedef int32_t  Elf64_Sword;
+typedef uint32_t Elf64_Word;
+typedef int64_t  Elf64_Sxword;
+typedef uint64_t Elf64_Xword;
+typedef uint32_t Elf64_Half;
+typedef uint16_t Elf64_Quarter;
+
+// Object file magic string.
+static const char ElfMagic[] = { 0x7f, 'E', 'L', 'F', '\0' };
+
+// e_ident size and indices.
+enum {
+  EI_MAG0       = 0,          // File identification index.
+  EI_MAG1       = 1,          // File identification index.
+  EI_MAG2       = 2,          // File identification index.
+  EI_MAG3       = 3,          // File identification index.
+  EI_CLASS      = 4,          // File class.
+  EI_DATA       = 5,          // Data encoding.
+  EI_VERSION    = 6,          // File version.
+  EI_OSABI      = 7,          // OS/ABI identification.
+  EI_ABIVERSION = 8,          // ABI version.
+  EI_PAD        = 9,          // Start of padding bytes.
+  EI_NIDENT     = 16          // Number of bytes in e_ident.
+};
+
+struct Elf32_Ehdr {
+  unsigned char e_ident[EI_NIDENT]; // ELF Identification bytes
+  Elf32_Half    e_type;      // Type of file (see ET_* below)
+  Elf32_Half    e_machine;   // Required architecture for this file (see EM_*)
+  Elf32_Word    e_version;   // Must be equal to 1
+  Elf32_Addr    e_entry;     // Address to jump to in order to start program
+  Elf32_Off     e_phoff;     // Program header table's file offset, in bytes
+  Elf32_Off     e_shoff;     // Section header table's file offset, in bytes
+  Elf32_Word    e_flags;     // Processor-specific flags
+  Elf32_Half    e_ehsize;    // Size of ELF header, in bytes
+  Elf32_Half    e_phentsize; // Size of an entry in the program header table
+  Elf32_Half    e_phnum;     // Number of entries in the program header table
+  Elf32_Half    e_shentsize; // Size of an entry in the section header table
+  Elf32_Half    e_shnum;     // Number of entries in the section header table
+  Elf32_Half    e_shstrndx;  // Sect hdr table index of sect name string table
+  bool checkMagic() const {
+    return (memcmp(e_ident, ElfMagic, strlen(ElfMagic))) == 0;
+  }
+  unsigned char getFileClass() const { return e_ident[EI_CLASS]; }
+  unsigned char getDataEncoding() const { return e_ident[EI_DATA]; }
+};
+
+// 64-bit ELF header. Fields are the same as for ELF32, but with different
+// types (see above).
+struct Elf64_Ehdr {
+  unsigned char e_ident[EI_NIDENT];
+  Elf64_Quarter e_type;
+  Elf64_Quarter e_machine;
+  Elf64_Half    e_version;
+  Elf64_Addr    e_entry;
+  Elf64_Off     e_phoff;
+  Elf64_Off     e_shoff;
+  Elf64_Half    e_flags;
+  Elf64_Quarter e_ehsize;
+  Elf64_Quarter e_phentsize;
+  Elf64_Quarter e_phnum;
+  Elf64_Quarter e_shentsize;
+  Elf64_Quarter e_shnum;
+  Elf64_Quarter e_shstrndx;
+  bool checkMagic() const {
+    return (memcmp(e_ident, ElfMagic, strlen(ElfMagic))) == 0;
+  }
+  unsigned char getFileClass() const { return e_ident[EI_CLASS]; }
+  unsigned char getDataEncoding() const { return e_ident[EI_DATA]; }
+};
+
+// File types
+enum {
+  ET_NONE   = 0,      // No file type
+  ET_REL    = 1,      // Relocatable file
+  ET_EXEC   = 2,      // Executable file
+  ET_DYN    = 3,      // Shared object file
+  ET_CORE   = 4,      // Core file
+  ET_LOPROC = 0xff00, // Beginning of processor-specific codes
+  ET_HIPROC = 0xffff  // Processor-specific
+};
+
+// Versioning
+enum {
+  EV_NONE = 0,
+  EV_CURRENT = 1
+};
+
+// Machine architectures
+enum {
+  EM_NONE = 0,      // No machine
+  EM_M32 = 1,       // AT&T WE 32100
+  EM_SPARC = 2,     // SPARC
+  EM_386 = 3,       // Intel 386
+  EM_68K = 4,       // Motorola 68000
+  EM_88K = 5,       // Motorola 88000
+  EM_486 = 6,       // Intel 486 (deprecated)
+  EM_860 = 7,       // Intel 80860
+  EM_MIPS = 8,      // MIPS R3000
+  EM_PPC = 20,      // PowerPC
+  EM_PPC64 = 21,    // PowerPC64
+  EM_ARM = 40,      // ARM
+  EM_ALPHA = 41,    // DEC Alpha
+  EM_SPARCV9 = 43,  // SPARC V9
+  EM_X86_64 = 62,   // AMD64
+  EM_MBLAZE = 47787 // Xilinx MicroBlaze
+};
+
+// Object file classes.
+enum {
+  ELFCLASSNONE = 0,
+  ELFCLASS32 = 1, // 32-bit object file
+  ELFCLASS64 = 2  // 64-bit object file
+};
+
+// Object file byte orderings.
+enum {
+  ELFDATANONE = 0, // Invalid data encoding.
+  ELFDATA2LSB = 1, // Little-endian object file
+  ELFDATA2MSB = 2  // Big-endian object file
+};
+
+// OS ABI identification.
+enum {
+  ELFOSABI_NONE = 0,          // UNIX System V ABI
+  ELFOSABI_HPUX = 1,          // HP-UX operating system
+  ELFOSABI_NETBSD = 2,        // NetBSD
+  ELFOSABI_LINUX = 3,         // GNU/Linux
+  ELFOSABI_HURD = 4,          // GNU/Hurd
+  ELFOSABI_SOLARIS = 6,       // Solaris
+  ELFOSABI_AIX = 7,           // AIX
+  ELFOSABI_IRIX = 8,          // IRIX
+  ELFOSABI_FREEBSD = 9,       // FreeBSD
+  ELFOSABI_TRU64 = 10,        // TRU64 UNIX
+  ELFOSABI_MODESTO = 11,      // Novell Modesto
+  ELFOSABI_OPENBSD = 12,      // OpenBSD
+  ELFOSABI_OPENVMS = 13,      // OpenVMS
+  ELFOSABI_NSK = 14,          // Hewlett-Packard Non-Stop Kernel
+  ELFOSABI_AROS = 15,         // AROS
+  ELFOSABI_FENIXOS = 16,      // FenixOS
+  ELFOSABI_C6000_ELFABI = 64, // Bare-metal TMS320C6000
+  ELFOSABI_C6000_LINUX = 65,  // Linux TMS320C6000
+  ELFOSABI_ARM = 97,          // ARM
+  ELFOSABI_STANDALONE = 255   // Standalone (embedded) application
+};
+
+// X86_64 relocations.
+enum {
+  R_X86_64_NONE       = 0,
+  R_X86_64_64         = 1,
+  R_X86_64_PC32       = 2,
+  R_X86_64_GOT32      = 3,
+  R_X86_64_PLT32      = 4,
+  R_X86_64_COPY       = 5,
+  R_X86_64_GLOB_DAT   = 6,
+  R_X86_64_JUMP_SLOT  = 7,
+  R_X86_64_RELATIVE   = 8,
+  R_X86_64_GOTPCREL   = 9,
+  R_X86_64_32         = 10,
+  R_X86_64_32S        = 11,
+  R_X86_64_16         = 12,
+  R_X86_64_PC16       = 13,
+  R_X86_64_8          = 14,
+  R_X86_64_PC8        = 15,
+  R_X86_64_DTPMOD64   = 16,
+  R_X86_64_DTPOFF64   = 17,
+  R_X86_64_TPOFF64    = 18,
+  R_X86_64_TLSGD      = 19,
+  R_X86_64_TLSLD      = 20,
+  R_X86_64_DTPOFF32   = 21,
+  R_X86_64_GOTTPOFF   = 22,
+  R_X86_64_TPOFF32    = 23,
+  R_X86_64_PC64       = 24,
+  R_X86_64_GOTOFF64   = 25,
+  R_X86_64_GOTPC32    = 26,
+  R_X86_64_SIZE32     = 32,
+  R_X86_64_SIZE64     = 33,
+  R_X86_64_GOTPC32_TLSDESC = 34,
+  R_X86_64_TLSDESC_CALL    = 35,
+  R_X86_64_TLSDESC    = 36
+};
+
+// i386 relocations.
+// TODO: this is just a subset
+enum {
+  R_386_NONE          = 0,
+  R_386_32            = 1,
+  R_386_PC32          = 2,
+  R_386_GOT32         = 3,
+  R_386_PLT32         = 4,
+  R_386_COPY          = 5,
+  R_386_GLOB_DAT      = 6,
+  R_386_JUMP_SLOT     = 7,
+  R_386_RELATIVE      = 8,
+  R_386_GOTOFF        = 9,
+  R_386_GOTPC         = 10,
+  R_386_32PLT         = 11,
+  R_386_TLS_TPOFF     = 14,
+  R_386_TLS_IE        = 15,
+  R_386_TLS_GOTIE     = 16,
+  R_386_TLS_LE        = 17,
+  R_386_TLS_GD        = 18,
+  R_386_TLS_LDM       = 19,
+  R_386_16            = 20,
+  R_386_PC16          = 21,
+  R_386_8             = 22,
+  R_386_PC8           = 23,
+  R_386_TLS_GD_32     = 24,
+  R_386_TLS_GD_PUSH   = 25,
+  R_386_TLS_GD_CALL   = 26,
+  R_386_TLS_GD_POP    = 27,
+  R_386_TLS_LDM_32    = 28,
+  R_386_TLS_LDM_PUSH  = 29,
+  R_386_TLS_LDM_CALL  = 30,
+  R_386_TLS_LDM_POP   = 31,
+  R_386_TLS_LDO_32    = 32,
+  R_386_TLS_IE_32     = 33,
+  R_386_TLS_LE_32     = 34,
+  R_386_TLS_DTPMOD32  = 35,
+  R_386_TLS_DTPOFF32  = 36,
+  R_386_TLS_TPOFF32   = 37,
+  R_386_TLS_GOTDESC   = 39,
+  R_386_TLS_DESC_CALL = 40,
+  R_386_TLS_DESC      = 41,
+  R_386_IRELATIVE     = 42,
+  R_386_NUM           = 43
+};
+
+// MBlaze relocations.
+enum {
+  R_MICROBLAZE_NONE           = 0,
+  R_MICROBLAZE_32             = 1,
+  R_MICROBLAZE_32_PCREL       = 2,
+  R_MICROBLAZE_64_PCREL       = 3,
+  R_MICROBLAZE_32_PCREL_LO    = 4,
+  R_MICROBLAZE_64             = 5,
+  R_MICROBLAZE_32_LO          = 6,
+  R_MICROBLAZE_SRO32          = 7,
+  R_MICROBLAZE_SRW32          = 8,
+  R_MICROBLAZE_64_NONE        = 9,
+  R_MICROBLAZE_32_SYM_OP_SYM  = 10,
+  R_MICROBLAZE_GNU_VTINHERIT  = 11,
+  R_MICROBLAZE_GNU_VTENTRY    = 12,
+  R_MICROBLAZE_GOTPC_64       = 13,
+  R_MICROBLAZE_GOT_64         = 14,
+  R_MICROBLAZE_PLT_64         = 15,
+  R_MICROBLAZE_REL            = 16,
+  R_MICROBLAZE_JUMP_SLOT      = 17,
+  R_MICROBLAZE_GLOB_DAT       = 18,
+  R_MICROBLAZE_GOTOFF_64      = 19,
+  R_MICROBLAZE_GOTOFF_32      = 20,
+  R_MICROBLAZE_COPY           = 21
+};
+
+
+// ARM Specific e_flags
+enum { EF_ARM_EABIMASK = 0xFF000000U };
+
+// ELF Relocation types for ARM
+// Meets 2.08 ABI Specs.
+
+enum {
+  R_ARM_NONE                  = 0x00,
+  R_ARM_PC24                  = 0x01,
+  R_ARM_ABS32                 = 0x02,
+  R_ARM_REL32                 = 0x03,
+  R_ARM_LDR_PC_G0             = 0x04,
+  R_ARM_ABS16                 = 0x05,
+  R_ARM_ABS12                 = 0x06,
+  R_ARM_THM_ABS5              = 0x07,
+  R_ARM_ABS8                  = 0x08,
+  R_ARM_SBREL32               = 0x09,
+  R_ARM_THM_CALL              = 0x0a,
+  R_ARM_THM_PC8               = 0x0b,
+  R_ARM_BREL_ADJ              = 0x0c,
+  R_ARM_TLS_DESC              = 0x0d,
+  R_ARM_THM_SWI8              = 0x0e,
+  R_ARM_XPC25                 = 0x0f,
+  R_ARM_THM_XPC22             = 0x10,
+  R_ARM_TLS_DTPMOD32          = 0x11,
+  R_ARM_TLS_DTPOFF32          = 0x12,
+  R_ARM_TLS_TPOFF32           = 0x13,
+  R_ARM_COPY                  = 0x14,
+  R_ARM_GLOB_DAT              = 0x15,
+  R_ARM_JUMP_SLOT             = 0x16,
+  R_ARM_RELATIVE              = 0x17,
+  R_ARM_GOTOFF32              = 0x18,
+  R_ARM_BASE_PREL             = 0x19,
+  R_ARM_GOT_BREL              = 0x1a,
+  R_ARM_PLT32                 = 0x1b,
+  R_ARM_CALL                  = 0x1c,
+  R_ARM_JUMP24                = 0x1d,
+  R_ARM_THM_JUMP24            = 0x1e,
+  R_ARM_BASE_ABS              = 0x1f,
+  R_ARM_ALU_PCREL_7_0         = 0x20,
+  R_ARM_ALU_PCREL_15_8        = 0x21,
+  R_ARM_ALU_PCREL_23_15       = 0x22,
+  R_ARM_LDR_SBREL_11_0_NC     = 0x23,
+  R_ARM_ALU_SBREL_19_12_NC    = 0x24,
+  R_ARM_ALU_SBREL_27_20_CK    = 0x25,
+  R_ARM_TARGET1               = 0x26,
+  R_ARM_SBREL31               = 0x27,
+  R_ARM_V4BX                  = 0x28,
+  R_ARM_TARGET2               = 0x29,
+  R_ARM_PREL31                = 0x2a,
+  R_ARM_MOVW_ABS_NC           = 0x2b,
+  R_ARM_MOVT_ABS              = 0x2c,
+  R_ARM_MOVW_PREL_NC          = 0x2d,
+  R_ARM_MOVT_PREL             = 0x2e,
+  R_ARM_THM_MOVW_ABS_NC       = 0x2f,
+  R_ARM_THM_MOVT_ABS          = 0x30,
+  R_ARM_THM_MOVW_PREL_NC      = 0x31,
+  R_ARM_THM_MOVT_PREL         = 0x32,
+  R_ARM_THM_JUMP19            = 0x33,
+  R_ARM_THM_JUMP6             = 0x34,
+  R_ARM_THM_ALU_PREL_11_0     = 0x35,
+  R_ARM_THM_PC12              = 0x36,
+  R_ARM_ABS32_NOI             = 0x37,
+  R_ARM_REL32_NOI             = 0x38,
+  R_ARM_ALU_PC_G0_NC          = 0x39,
+  R_ARM_ALU_PC_G0             = 0x3a,
+  R_ARM_ALU_PC_G1_NC          = 0x3b,
+  R_ARM_ALU_PC_G1             = 0x3c,
+  R_ARM_ALU_PC_G2             = 0x3d,
+  R_ARM_LDR_PC_G1             = 0x3e,
+  R_ARM_LDR_PC_G2             = 0x3f,
+  R_ARM_LDRS_PC_G0            = 0x40,
+  R_ARM_LDRS_PC_G1            = 0x41,
+  R_ARM_LDRS_PC_G2            = 0x42,
+  R_ARM_LDC_PC_G0             = 0x43,
+  R_ARM_LDC_PC_G1             = 0x44,
+  R_ARM_LDC_PC_G2             = 0x45,
+  R_ARM_ALU_SB_G0_NC          = 0x46,
+  R_ARM_ALU_SB_G0             = 0x47,
+  R_ARM_ALU_SB_G1_NC          = 0x48,
+  R_ARM_ALU_SB_G1             = 0x49,
+  R_ARM_ALU_SB_G2             = 0x4a,
+  R_ARM_LDR_SB_G0             = 0x4b,
+  R_ARM_LDR_SB_G1             = 0x4c,
+  R_ARM_LDR_SB_G2             = 0x4d,
+  R_ARM_LDRS_SB_G0            = 0x4e,
+  R_ARM_LDRS_SB_G1            = 0x4f,
+  R_ARM_LDRS_SB_G2            = 0x50,
+  R_ARM_LDC_SB_G0             = 0x51,
+  R_ARM_LDC_SB_G1             = 0x52,
+  R_ARM_LDC_SB_G2             = 0x53,
+  R_ARM_MOVW_BREL_NC          = 0x54,
+  R_ARM_MOVT_BREL             = 0x55,
+  R_ARM_MOVW_BREL             = 0x56,
+  R_ARM_THM_MOVW_BREL_NC      = 0x57,
+  R_ARM_THM_MOVT_BREL         = 0x58,
+  R_ARM_THM_MOVW_BREL         = 0x59,
+  R_ARM_TLS_GOTDESC           = 0x5a,
+  R_ARM_TLS_CALL              = 0x5b,
+  R_ARM_TLS_DESCSEQ           = 0x5c,
+  R_ARM_THM_TLS_CALL          = 0x5d,
+  R_ARM_PLT32_ABS             = 0x5e,
+  R_ARM_GOT_ABS               = 0x5f,
+  R_ARM_GOT_PREL              = 0x60,
+  R_ARM_GOT_BREL12            = 0x61,
+  R_ARM_GOTOFF12              = 0x62,
+  R_ARM_GOTRELAX              = 0x63,
+  R_ARM_GNU_VTENTRY           = 0x64,
+  R_ARM_GNU_VTINHERIT         = 0x65,
+  R_ARM_THM_JUMP11            = 0x66,
+  R_ARM_THM_JUMP8             = 0x67,
+  R_ARM_TLS_GD32              = 0x68,
+  R_ARM_TLS_LDM32             = 0x69,
+  R_ARM_TLS_LDO32             = 0x6a,
+  R_ARM_TLS_IE32              = 0x6b,
+  R_ARM_TLS_LE32              = 0x6c,
+  R_ARM_TLS_LDO12             = 0x6d,
+  R_ARM_TLS_LE12              = 0x6e,
+  R_ARM_TLS_IE12GP            = 0x6f,
+  R_ARM_PRIVATE_0             = 0x70,
+  R_ARM_PRIVATE_1             = 0x71,
+  R_ARM_PRIVATE_2             = 0x72,
+  R_ARM_PRIVATE_3             = 0x73,
+  R_ARM_PRIVATE_4             = 0x74,
+  R_ARM_PRIVATE_5             = 0x75,
+  R_ARM_PRIVATE_6             = 0x76,
+  R_ARM_PRIVATE_7             = 0x77,
+  R_ARM_PRIVATE_8             = 0x78,
+  R_ARM_PRIVATE_9             = 0x79,
+  R_ARM_PRIVATE_10            = 0x7a,
+  R_ARM_PRIVATE_11            = 0x7b,
+  R_ARM_PRIVATE_12            = 0x7c,
+  R_ARM_PRIVATE_13            = 0x7d,
+  R_ARM_PRIVATE_14            = 0x7e,
+  R_ARM_PRIVATE_15            = 0x7f,
+  R_ARM_ME_TOO                = 0x80,
+  R_ARM_THM_TLS_DESCSEQ16     = 0x81,
+  R_ARM_THM_TLS_DESCSEQ32     = 0x82
+};
+
+
+
+// Section header.
+struct Elf32_Shdr {
+  Elf32_Word sh_name;      // Section name (index into string table)
+  Elf32_Word sh_type;      // Section type (SHT_*)
+  Elf32_Word sh_flags;     // Section flags (SHF_*)
+  Elf32_Addr sh_addr;      // Address where section is to be loaded
+  Elf32_Off  sh_offset;    // File offset of section data, in bytes
+  Elf32_Word sh_size;      // Size of section, in bytes
+  Elf32_Word sh_link;      // Section type-specific header table index link
+  Elf32_Word sh_info;      // Section type-specific extra information
+  Elf32_Word sh_addralign; // Section address alignment
+  Elf32_Word sh_entsize;   // Size of records contained within the section
+};
+
+// Section header for ELF64 - same fields as ELF32, different types.
+struct Elf64_Shdr {
+  Elf64_Half  sh_name;
+  Elf64_Half  sh_type;
+  Elf64_Xword sh_flags;
+  Elf64_Addr  sh_addr;
+  Elf64_Off   sh_offset;
+  Elf64_Xword sh_size;
+  Elf64_Half  sh_link;
+  Elf64_Half  sh_info;
+  Elf64_Xword sh_addralign;
+  Elf64_Xword sh_entsize;
+};
+
+// Special section indices.
+enum {
+  SHN_UNDEF     = 0,      // Undefined, missing, irrelevant, or meaningless
+  SHN_LORESERVE = 0xff00, // Lowest reserved index
+  SHN_LOPROC    = 0xff00, // Lowest processor-specific index
+  SHN_HIPROC    = 0xff1f, // Highest processor-specific index
+  SHN_ABS       = 0xfff1, // Symbol has absolute value; does not need relocation
+  SHN_COMMON    = 0xfff2, // FORTRAN COMMON or C external global variables
+  SHN_XINDEX    = 0xffff, // Mark that the index is >= SHN_LORESERVE
+  SHN_HIRESERVE = 0xffff  // Highest reserved index
+};
+
+// Section types.
+enum {
+  SHT_NULL          = 0,  // No associated section (inactive entry).
+  SHT_PROGBITS      = 1,  // Program-defined contents.
+  SHT_SYMTAB        = 2,  // Symbol table.
+  SHT_STRTAB        = 3,  // String table.
+  SHT_RELA          = 4,  // Relocation entries; explicit addends.
+  SHT_HASH          = 5,  // Symbol hash table.
+  SHT_DYNAMIC       = 6,  // Information for dynamic linking.
+  SHT_NOTE          = 7,  // Information about the file.
+  SHT_NOBITS        = 8,  // Data occupies no space in the file.
+  SHT_REL           = 9,  // Relocation entries; no explicit addends.
+  SHT_SHLIB         = 10, // Reserved.
+  SHT_DYNSYM        = 11, // Symbol table.
+  SHT_INIT_ARRAY    = 14, // Pointers to initialisation functions.
+  SHT_FINI_ARRAY    = 15, // Pointers to termination functions.
+  SHT_PREINIT_ARRAY = 16, // Pointers to pre-init functions.
+  SHT_GROUP         = 17, // Section group.
+  SHT_SYMTAB_SHNDX  = 18, // Indicies for SHN_XINDEX entries.
+  SHT_LOOS          = 0x60000000, // Lowest operating system-specific type.
+  SHT_HIOS          = 0x6fffffff, // Highest operating system-specific type.
+  SHT_LOPROC        = 0x70000000, // Lowest processor architecture-specific type.
+  // Fixme: All this is duplicated in MCSectionELF. Why??
+  // Exception Index table
+  SHT_ARM_EXIDX           = 0x70000001U,
+  // BPABI DLL dynamic linking pre-emption map
+  SHT_ARM_PREEMPTMAP      = 0x70000002U,
+  //  Object file compatibility attributes
+  SHT_ARM_ATTRIBUTES      = 0x70000003U,
+  SHT_ARM_DEBUGOVERLAY    = 0x70000004U,
+  SHT_ARM_OVERLAYSECTION  = 0x70000005U,
+
+  SHT_X86_64_UNWIND       = 0x70000001, // Unwind information
+
+  SHT_HIPROC        = 0x7fffffff, // Highest processor architecture-specific type.
+  SHT_LOUSER        = 0x80000000, // Lowest type reserved for applications.
+  SHT_HIUSER        = 0xffffffff  // Highest type reserved for applications.
+};
+
+// Section flags.
+enum {
+  // Section data should be writable during execution.
+  SHF_WRITE = 0x1,
+
+  // Section occupies memory during program execution.
+  SHF_ALLOC = 0x2,
+
+  // Section contains executable machine instructions.
+  SHF_EXECINSTR = 0x4,
+
+  // The data in this section may be merged.
+  SHF_MERGE = 0x10,
+
+  // The data in this section is null-terminated strings.
+  SHF_STRINGS = 0x20,
+
+  // A field in this section holds a section header table index.
+  SHF_INFO_LINK = 0x40U,
+
+  // Adds special ordering requirements for link editors.
+  SHF_LINK_ORDER = 0x80U,
+
+  // This section requires special OS-specific processing to avoid incorrect
+  // behavior.
+  SHF_OS_NONCONFORMING = 0x100U,
+
+  // This section is a member of a section group.
+  SHF_GROUP = 0x200U,
+
+  // This section holds Thread-Local Storage.
+  SHF_TLS = 0x400U,
+
+  // Start of target-specific flags.
+
+  /// XCORE_SHF_CP_SECTION - All sections with the "c" flag are grouped
+  /// together by the linker to form the constant pool and the cp register is
+  /// set to the start of the constant pool by the boot code.
+  XCORE_SHF_CP_SECTION = 0x800U,
+
+  /// XCORE_SHF_DP_SECTION - All sections with the "d" flag are grouped
+  /// together by the linker to form the data section and the dp register is
+  /// set to the start of the section by the boot code.
+  XCORE_SHF_DP_SECTION = 0x1000U,
+
+  // Bits indicating processor-specific flags.
+  SHF_MASKPROC = 0xf0000000
+};
+
+// Section Group Flags
+enum {
+  GRP_COMDAT = 0x1,
+  GRP_MASKOS = 0x0ff00000,
+  GRP_MASKPROC = 0xf0000000
+};
+
+// Symbol table entries for ELF32.
+struct Elf32_Sym {
+  Elf32_Word    st_name;  // Symbol name (index into string table)
+  Elf32_Addr    st_value; // Value or address associated with the symbol
+  Elf32_Word    st_size;  // Size of the symbol
+  unsigned char st_info;  // Symbol's type and binding attributes
+  unsigned char st_other; // Must be zero; reserved
+  Elf32_Half    st_shndx; // Which section (header table index) it's defined in
+
+  // These accessors and mutators correspond to the ELF32_ST_BIND,
+  // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification:
+  unsigned char getBinding() const { return st_info >> 4; }
+  unsigned char getType() const { return st_info & 0x0f; }
+  void setBinding(unsigned char b) { setBindingAndType(b, getType()); }
+  void setType(unsigned char t) { setBindingAndType(getBinding(), t); }
+  void setBindingAndType(unsigned char b, unsigned char t) {
+    st_info = (b << 4) + (t & 0x0f);
+  }
+};
+
+// Symbol table entries for ELF64.
+struct Elf64_Sym {
+  Elf64_Word      st_name;  // Symbol name (index into string table)
+  unsigned char   st_info;  // Symbol's type and binding attributes
+  unsigned char   st_other; // Must be zero; reserved
+  Elf64_Half      st_shndx; // Which section (header table index) it's defined in
+  Elf64_Addr      st_value; // Value or address associated with the symbol
+  Elf64_Xword     st_size;  // Size of the symbol
+
+  // These accessors and mutators are identical to those defined for ELF32
+  // symbol table entries.
+  unsigned char getBinding() const { return st_info >> 4; }
+  unsigned char getType() const { return st_info & 0x0f; }
+  void setBinding(unsigned char b) { setBindingAndType(b, getType()); }
+  void setType(unsigned char t) { setBindingAndType(getBinding(), t); }
+  void setBindingAndType(unsigned char b, unsigned char t) {
+    st_info = (b << 4) + (t & 0x0f);
+  }
+};
+
+// The size (in bytes) of symbol table entries.
+enum {
+  SYMENTRY_SIZE32 = 16, // 32-bit symbol entry size
+  SYMENTRY_SIZE64 = 24  // 64-bit symbol entry size.
+};
+
+// Symbol bindings.
+enum {
+  STB_LOCAL = 0,   // Local symbol, not visible outside obj file containing def
+  STB_GLOBAL = 1,  // Global symbol, visible to all object files being combined
+  STB_WEAK = 2,    // Weak symbol, like global but lower-precedence
+  STB_LOPROC = 13, // Lowest processor-specific binding type
+  STB_HIPROC = 15  // Highest processor-specific binding type
+};
+
+// Symbol types.
+enum {
+  STT_NOTYPE  = 0,   // Symbol's type is not specified
+  STT_OBJECT  = 1,   // Symbol is a data object (variable, array, etc.)
+  STT_FUNC    = 2,   // Symbol is executable code (function, etc.)
+  STT_SECTION = 3,   // Symbol refers to a section
+  STT_FILE    = 4,   // Local, absolute symbol that refers to a file
+  STT_COMMON  = 5,   // An uninitialised common block
+  STT_TLS     = 6,   // Thread local data object
+  STT_LOPROC  = 13,  // Lowest processor-specific symbol type
+  STT_HIPROC  = 15   // Highest processor-specific symbol type
+};
+
+enum {
+  STV_DEFAULT   = 0,  // Visibility is specified by binding type
+  STV_INTERNAL  = 1,  // Defined by processor supplements
+  STV_HIDDEN    = 2,  // Not visible to other components
+  STV_PROTECTED = 3   // Visible in other components but not preemptable
+};
+
+// Relocation entry, without explicit addend.
+struct Elf32_Rel {
+  Elf32_Addr r_offset; // Location (file byte offset, or program virtual addr)
+  Elf32_Word r_info;   // Symbol table index and type of relocation to apply
+
+  // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
+  // and ELF32_R_INFO macros defined in the ELF specification:
+  Elf32_Word getSymbol() const { return (r_info >> 8); }
+  unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); }
+  void setSymbol(Elf32_Word s) { setSymbolAndType(s, getType()); }
+  void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
+  void setSymbolAndType(Elf32_Word s, unsigned char t) {
+    r_info = (s << 8) + t;
+  }
+};
+
+// Relocation entry with explicit addend.
+struct Elf32_Rela {
+  Elf32_Addr  r_offset; // Location (file byte offset, or program virtual addr)
+  Elf32_Word  r_info;   // Symbol table index and type of relocation to apply
+  Elf32_Sword r_addend; // Compute value for relocatable field by adding this
+
+  // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
+  // and ELF32_R_INFO macros defined in the ELF specification:
+  Elf32_Word getSymbol() const { return (r_info >> 8); }
+  unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); }
+  void setSymbol(Elf32_Word s) { setSymbolAndType(s, getType()); }
+  void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
+  void setSymbolAndType(Elf32_Word s, unsigned char t) {
+    r_info = (s << 8) + t;
+  }
+};
+
+// Relocation entry, without explicit addend.
+struct Elf64_Rel {
+  Elf64_Addr r_offset; // Location (file byte offset, or program virtual addr).
+  Elf64_Xword r_info;   // Symbol table index and type of relocation to apply.
+
+  // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
+  // and ELF64_R_INFO macros defined in the ELF specification:
+  Elf64_Xword getSymbol() const { return (r_info >> 32); }
+  unsigned char getType() const {
+    return (unsigned char) (r_info & 0xffffffffL);
+  }
+  void setSymbol(Elf32_Word s) { setSymbolAndType(s, getType()); }
+  void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
+  void setSymbolAndType(Elf64_Xword s, unsigned char t) {
+    r_info = (s << 32) + (t&0xffffffffL);
+  }
+};
+
+// Relocation entry with explicit addend.
+struct Elf64_Rela {
+  Elf64_Addr  r_offset; // Location (file byte offset, or program virtual addr).
+  Elf64_Xword  r_info;   // Symbol table index and type of relocation to apply.
+  Elf64_Sxword r_addend; // Compute value for relocatable field by adding this.
+
+  // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
+  // and ELF64_R_INFO macros defined in the ELF specification:
+  Elf64_Xword getSymbol() const { return (r_info >> 32); }
+  unsigned char getType() const {
+    return (unsigned char) (r_info & 0xffffffffL);
+  }
+  void setSymbol(Elf64_Xword s) { setSymbolAndType(s, getType()); }
+  void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
+  void setSymbolAndType(Elf64_Xword s, unsigned char t) {
+    r_info = (s << 32) + (t&0xffffffffL);
+  }
+};
+
+// Program header for ELF32.
+struct Elf32_Phdr {
+  Elf32_Word p_type;   // Type of segment
+  Elf32_Off  p_offset; // File offset where segment is located, in bytes
+  Elf32_Addr p_vaddr;  // Virtual address of beginning of segment
+  Elf32_Addr p_paddr;  // Physical address of beginning of segment (OS-specific)
+  Elf32_Word p_filesz; // Num. of bytes in file image of segment (may be zero)
+  Elf32_Word p_memsz;  // Num. of bytes in mem image of segment (may be zero)
+  Elf32_Word p_flags;  // Segment flags
+  Elf32_Word p_align;  // Segment alignment constraint
+};
+
+// Program header for ELF64.
+struct Elf64_Phdr {
+  Elf64_Word   p_type;   // Type of segment
+  Elf64_Word   p_flags;  // Segment flags
+  Elf64_Off    p_offset; // File offset where segment is located, in bytes
+  Elf64_Addr   p_vaddr;  // Virtual address of beginning of segment
+  Elf64_Addr   p_paddr;  // Physical address of beginning of segment (OS-specific)
+  Elf64_Xword  p_filesz; // Num. of bytes in file image of segment (may be zero)
+  Elf64_Xword  p_memsz;  // Num. of bytes in mem image of segment (may be zero)
+  Elf64_Xword  p_align;  // Segment alignment constraint
+};
+
+// Segment types.
+enum {
+  PT_NULL    = 0, // Unused segment.
+  PT_LOAD    = 1, // Loadable segment.
+  PT_DYNAMIC = 2, // Dynamic linking information.
+  PT_INTERP  = 3, // Interpreter pathname.
+  PT_NOTE    = 4, // Auxiliary information.
+  PT_SHLIB   = 5, // Reserved.
+  PT_PHDR    = 6, // The program header table itself.
+  PT_LOPROC  = 0x70000000, // Lowest processor-specific program hdr entry type.
+  PT_HIPROC  = 0x7fffffff  // Highest processor-specific program hdr entry type.
+};
+
+// Segment flag bits.
+enum {
+  PF_X        = 1,         // Execute
+  PF_W        = 2,         // Write
+  PF_R        = 4,         // Read
+  PF_MASKPROC = 0xf0000000 // Unspecified
+};
+
+// Dynamic table entry for ELF32.
+struct Elf32_Dyn
+{
+  Elf32_Sword d_tag;            // Type of dynamic table entry.
+  union
+  {
+      Elf32_Word d_val;         // Integer value of entry.
+      Elf32_Addr d_ptr;         // Pointer value of entry.
+  } d_un;
+};
+
+// Dynamic table entry for ELF64.
+struct Elf64_Dyn
+{
+  Elf64_Sxword d_tag;           // Type of dynamic table entry.
+  union
+  {
+      Elf64_Xword d_val;        // Integer value of entry.
+      Elf64_Addr  d_ptr;        // Pointer value of entry.
+  } d_un;
+};
+
+// Dynamic table entry tags.
+enum {
+  DT_NULL         = 0,        // Marks end of dynamic array.
+  DT_NEEDED       = 1,        // String table offset of needed library.
+  DT_PLTRELSZ     = 2,        // Size of relocation entries in PLT.
+  DT_PLTGOT       = 3,        // Address associated with linkage table.
+  DT_HASH         = 4,        // Address of symbolic hash table.
+  DT_STRTAB       = 5,        // Address of dynamic string table.
+  DT_SYMTAB       = 6,        // Address of dynamic symbol table.
+  DT_RELA         = 7,        // Address of relocation table (Rela entries).
+  DT_RELASZ       = 8,        // Size of Rela relocation table.
+  DT_RELAENT      = 9,        // Size of a Rela relocation entry.
+  DT_STRSZ        = 10,       // Total size of the string table.
+  DT_SYMENT       = 11,       // Size of a symbol table entry.
+  DT_INIT         = 12,       // Address of initialization function.
+  DT_FINI         = 13,       // Address of termination function.
+  DT_SONAME       = 14,       // String table offset of a shared objects name.
+  DT_RPATH        = 15,       // String table offset of library search path.
+  DT_SYMBOLIC     = 16,       // Changes symbol resolution algorithm.
+  DT_REL          = 17,       // Address of relocation table (Rel entries).
+  DT_RELSZ        = 18,       // Size of Rel relocation table.
+  DT_RELENT       = 19,       // Size of a Rel relocation entry.
+  DT_PLTREL       = 20,       // Type of relocation entry used for linking.
+  DT_DEBUG        = 21,       // Reserved for debugger.
+  DT_TEXTREL      = 22,       // Relocations exist for non-writable segements.
+  DT_JMPREL       = 23,       // Address of relocations associated with PLT.
+  DT_BIND_NOW     = 24,       // Process all relocations before execution.
+  DT_INIT_ARRAY   = 25,       // Pointer to array of initialization functions.
+  DT_FINI_ARRAY   = 26,       // Pointer to array of termination functions.
+  DT_INIT_ARRAYSZ = 27,       // Size of DT_INIT_ARRAY.
+  DT_FINI_ARRAYSZ = 28,       // Size of DT_FINI_ARRAY.
+  DT_LOOS         = 0x60000000, // Start of environment specific tags.
+  DT_HIOS         = 0x6FFFFFFF, // End of environment specific tags.
+  DT_LOPROC       = 0x70000000, // Start of processor specific tags.
+  DT_HIPROC       = 0x7FFFFFFF  // End of processor specific tags.
+};
+
+} // end namespace ELF
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Support/Endian.h b/final/include/llvm/Support/Endian.h
new file mode 100644
index 00000000000..f62eab0702b
--- /dev/null
+++ b/final/include/llvm/Support/Endian.h
@@ -0,0 +1,213 @@
+//===- Endian.h - Utilities for IO with endian specific data ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares generic functions to read and write endian specific data.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ENDIAN_H
+#define LLVM_SUPPORT_ENDIAN_H
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/type_traits.h"
+
+namespace llvm {
+namespace support {
+
+enum endianness {big, little};
+enum alignment {unaligned, aligned};
+
+namespace detail {
+
+template<typename value_type, alignment align>
+struct alignment_access_helper;
+
+template<typename value_type>
+struct alignment_access_helper<value_type, aligned>
+{
+  value_type val;
+};
+
+// Provides unaligned loads and stores.
+#pragma pack(push)
+#pragma pack(1)
+template<typename value_type>
+struct alignment_access_helper<value_type, unaligned>
+{
+  value_type val;
+};
+#pragma pack(pop)
+
+} // end namespace detail
+
+namespace endian {
+  template<typename value_type, alignment align>
+  static value_type read_le(const void *memory) {
+    value_type t =
+      reinterpret_cast<const detail::alignment_access_helper
+        <value_type, align> *>(memory)->val;
+    if (sys::isBigEndianHost())
+      return sys::SwapByteOrder(t);
+    return t;
+  }
+
+  template<typename value_type, alignment align>
+  static void write_le(void *memory, value_type value) {
+    if (sys::isBigEndianHost())
+      value = sys::SwapByteOrder(value);
+    reinterpret_cast<detail::alignment_access_helper<value_type, align> *>
+      (memory)->val = value;
+  }
+
+  template<typename value_type, alignment align>
+  static value_type read_be(const void *memory) {
+    value_type t =
+      reinterpret_cast<const detail::alignment_access_helper
+        <value_type, align> *>(memory)->val;
+    if (sys::isLittleEndianHost())
+      return sys::SwapByteOrder(t);
+    return t;
+  }
+
+  template<typename value_type, alignment align>
+  static void write_be(void *memory, value_type value) {
+    if (sys::isLittleEndianHost())
+      value = sys::SwapByteOrder(value);
+    reinterpret_cast<detail::alignment_access_helper<value_type, align> *>
+      (memory)->val = value;
+  }
+}
+
+namespace detail {
+
+template<typename value_type,
+         endianness endian,
+         alignment  align>
+class packed_endian_specific_integral;
+
+template<typename value_type>
+class packed_endian_specific_integral<value_type, little, unaligned> {
+public:
+  operator value_type() const {
+    return endian::read_le<value_type, unaligned>(Value);
+  }
+private:
+  uint8_t Value[sizeof(value_type)];
+};
+
+template<typename value_type>
+class packed_endian_specific_integral<value_type, big, unaligned> {
+public:
+  operator value_type() const {
+    return endian::read_be<value_type, unaligned>(Value);
+  }
+private:
+  uint8_t Value[sizeof(value_type)];
+};
+
+template<typename value_type>
+class packed_endian_specific_integral<value_type, little, aligned> {
+public:
+  operator value_type() const {
+    return endian::read_le<value_type, aligned>(&Value);
+  }
+private:
+  value_type Value;
+};
+
+template<typename value_type>
+class packed_endian_specific_integral<value_type, big, aligned> {
+public:
+  operator value_type() const {
+    return endian::read_be<value_type, aligned>(&Value);
+  }
+private:
+  value_type Value;
+};
+
+} // end namespace detail
+
+typedef detail::packed_endian_specific_integral
+                  <uint8_t, little, unaligned>  ulittle8_t;
+typedef detail::packed_endian_specific_integral
+                  <uint16_t, little, unaligned> ulittle16_t;
+typedef detail::packed_endian_specific_integral
+                  <uint32_t, little, unaligned> ulittle32_t;
+typedef detail::packed_endian_specific_integral
+                  <uint64_t, little, unaligned> ulittle64_t;
+
+typedef detail::packed_endian_specific_integral
+                   <int8_t, little, unaligned>  little8_t;
+typedef detail::packed_endian_specific_integral
+                   <int16_t, little, unaligned> little16_t;
+typedef detail::packed_endian_specific_integral
+                   <int32_t, little, unaligned> little32_t;
+typedef detail::packed_endian_specific_integral
+                   <int64_t, little, unaligned> little64_t;
+
+typedef detail::packed_endian_specific_integral
+                    <uint8_t, little, aligned>  aligned_ulittle8_t;
+typedef detail::packed_endian_specific_integral
+                    <uint16_t, little, aligned> aligned_ulittle16_t;
+typedef detail::packed_endian_specific_integral
+                    <uint32_t, little, aligned> aligned_ulittle32_t;
+typedef detail::packed_endian_specific_integral
+                    <uint64_t, little, aligned> aligned_ulittle64_t;
+
+typedef detail::packed_endian_specific_integral
+                     <int8_t, little, aligned>  aligned_little8_t;
+typedef detail::packed_endian_specific_integral
+                     <int16_t, little, aligned> aligned_little16_t;
+typedef detail::packed_endian_specific_integral
+                     <int32_t, little, aligned> aligned_little32_t;
+typedef detail::packed_endian_specific_integral
+                     <int64_t, little, aligned> aligned_little64_t;
+
+typedef detail::packed_endian_specific_integral
+                  <uint8_t, big, unaligned>     ubig8_t;
+typedef detail::packed_endian_specific_integral
+                  <uint16_t, big, unaligned>    ubig16_t;
+typedef detail::packed_endian_specific_integral
+                  <uint32_t, big, unaligned>    ubig32_t;
+typedef detail::packed_endian_specific_integral
+                  <uint64_t, big, unaligned>    ubig64_t;
+
+typedef detail::packed_endian_specific_integral
+                   <int8_t, big, unaligned>     big8_t;
+typedef detail::packed_endian_specific_integral
+                   <int16_t, big, unaligned>    big16_t;
+typedef detail::packed_endian_specific_integral
+                   <int32_t, big, unaligned>    big32_t;
+typedef detail::packed_endian_specific_integral
+                   <int64_t, big, unaligned>    big64_t;
+
+typedef detail::packed_endian_specific_integral
+                    <uint8_t, big, aligned>     aligned_ubig8_t;
+typedef detail::packed_endian_specific_integral
+                    <uint16_t, big, aligned>    aligned_ubig16_t;
+typedef detail::packed_endian_specific_integral
+                    <uint32_t, big, aligned>    aligned_ubig32_t;
+typedef detail::packed_endian_specific_integral
+                    <uint64_t, big, aligned>    aligned_ubig64_t;
+
+typedef detail::packed_endian_specific_integral
+                     <int8_t, big, aligned>     aligned_big8_t;
+typedef detail::packed_endian_specific_integral
+                     <int16_t, big, aligned>    aligned_big16_t;
+typedef detail::packed_endian_specific_integral
+                     <int32_t, big, aligned>    aligned_big32_t;
+typedef detail::packed_endian_specific_integral
+                     <int64_t, big, aligned>    aligned_big64_t;
+
+} // end namespace llvm
+} // end namespace support
+
+#endif
diff --git a/final/include/llvm/Support/Errno.h b/final/include/llvm/Support/Errno.h
new file mode 100644
index 00000000000..150bdb70162
--- /dev/null
+++ b/final/include/llvm/Support/Errno.h
@@ -0,0 +1,34 @@
+//===- llvm/Support/Errno.h - Portable+convenient errno handling -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares some portable and convenient functions to deal with errno.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_ERRNO_H
+#define LLVM_SYSTEM_ERRNO_H
+
+#include <string>
+
+namespace llvm {
+namespace sys {
+
+/// Returns a string representation of the errno value, using whatever
+/// thread-safe variant of strerror() is available.  Be sure to call this
+/// immediately after the function that set errno, or errno may have been
+/// overwritten by an intervening call.
+std::string StrError();
+
+/// Like the no-argument version above, but uses \p errnum instead of errno.
+std::string StrError(int errnum);
+
+}  // namespace sys
+}  // namespace llvm
+
+#endif  // LLVM_SYSTEM_ERRNO_H
diff --git a/final/include/llvm/Support/ErrorHandling.h b/final/include/llvm/Support/ErrorHandling.h
new file mode 100644
index 00000000000..5eca438d8b4
--- /dev/null
+++ b/final/include/llvm/Support/ErrorHandling.h
@@ -0,0 +1,103 @@
+//===- llvm/Support/ErrorHandling.h - Fatal error handling ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an API used to indicate fatal error conditions.  Non-fatal
+// errors (most of them) should be handled through LLVMContext.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_ERRORHANDLING_H
+#define LLVM_SUPPORT_ERRORHANDLING_H
+
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/StringRef.h"
+#include <string>
+
+namespace llvm {
+  class Twine;
+
+  /// An error handler callback.
+  typedef void (*fatal_error_handler_t)(void *user_data,
+                                        const std::string& reason);
+
+  /// install_fatal_error_handler - Installs a new error handler to be used
+  /// whenever a serious (non-recoverable) error is encountered by LLVM.
+  ///
+  /// If you are using llvm_start_multithreaded, you should register the handler
+  /// before doing that.
+  ///
+  /// If no error handler is installed the default is to print the error message
+  /// to stderr, and call exit(1).  If an error handler is installed then it is
+  /// the handler's responsibility to log the message, it will no longer be
+  /// printed to stderr.  If the error handler returns, then exit(1) will be
+  /// called.
+  ///
+  /// It is dangerous to naively use an error handler which throws an exception.
+  /// Even though some applications desire to gracefully recover from arbitrary
+  /// faults, blindly throwing exceptions through unfamiliar code isn't a way to
+  /// achieve this.
+  ///
+  /// \param user_data - An argument which will be passed to the install error
+  /// handler.
+  void install_fatal_error_handler(fatal_error_handler_t handler,
+                                   void *user_data = 0);
+
+  /// Restores default error handling behaviour.
+  /// This must not be called between llvm_start_multithreaded() and
+  /// llvm_stop_multithreaded().
+  void remove_fatal_error_handler();
+
+  /// ScopedFatalErrorHandler - This is a simple helper class which just
+  /// calls install_fatal_error_handler in its constructor and
+  /// remove_fatal_error_handler in its destructor.
+  struct ScopedFatalErrorHandler {
+    explicit ScopedFatalErrorHandler(fatal_error_handler_t handler,
+                                     void *user_data = 0) {
+      install_fatal_error_handler(handler, user_data);
+    }
+
+    ~ScopedFatalErrorHandler() { remove_fatal_error_handler(); }
+  };
+
+  /// Reports a serious error, calling any installed error handler. These
+  /// functions are intended to be used for error conditions which are outside
+  /// the control of the compiler (I/O errors, invalid user input, etc.)
+  ///
+  /// If no error handler is installed the default is to print the message to
+  /// standard error, followed by a newline.
+  /// After the error handler is called this function will call exit(1), it 
+  /// does not return.
+  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason);
+  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const std::string &reason);
+  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(StringRef reason);
+  LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const Twine &reason);
+
+  /// This function calls abort(), and prints the optional message to stderr.
+  /// Use the llvm_unreachable macro (that adds location info), instead of
+  /// calling this function directly.
+  LLVM_ATTRIBUTE_NORETURN void llvm_unreachable_internal(const char *msg=0,
+                                                         const char *file=0,
+                                                         unsigned line=0);
+}
+
+/// Prints the message and location info to stderr in !NDEBUG builds.
+/// This is intended to be used for "impossible" situations that imply
+/// a bug in the compiler.
+///
+/// In NDEBUG mode it only prints "UNREACHABLE executed".
+/// Use this instead of assert(0), so that the compiler knows this path
+/// is not reachable even for NDEBUG builds.
+#ifndef NDEBUG
+#define llvm_unreachable(msg) \
+  ::llvm::llvm_unreachable_internal(msg, __FILE__, __LINE__)
+#else
+#define llvm_unreachable(msg) ::llvm::llvm_unreachable_internal()
+#endif
+
+#endif
diff --git a/final/include/llvm/Support/FEnv.h b/final/include/llvm/Support/FEnv.h
new file mode 100644
index 00000000000..f6f43337bd2
--- /dev/null
+++ b/final/include/llvm/Support/FEnv.h
@@ -0,0 +1,56 @@
+//===- llvm/Support/FEnv.h - Host floating-point exceptions ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides an operating system independent interface to
+// floating-point exception interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_FENV_H
+#define LLVM_SYSTEM_FENV_H
+
+#include "llvm/Config/config.h"
+#include <cerrno>
+#ifdef HAVE_FENV_H
+#include <fenv.h>
+#endif
+
+// FIXME: Clang's #include handling apparently doesn't work for libstdc++'s
+// fenv.h; see PR6907 for details.
+#if defined(__clang__) && defined(_GLIBCXX_FENV_H)
+#undef HAVE_FENV_H
+#endif
+
+namespace llvm {
+namespace sys {
+
+/// llvm_fenv_clearexcept - Clear the floating-point exception state.
+static inline void llvm_fenv_clearexcept() {
+#ifdef HAVE_FENV_H
+  feclearexcept(FE_ALL_EXCEPT);
+#endif
+  errno = 0;
+}
+
+/// llvm_fenv_testexcept - Test if a floating-point exception was raised.
+static inline bool llvm_fenv_testexcept() {
+  int errno_val = errno;
+  if (errno_val == ERANGE || errno_val == EDOM)
+    return true;
+#ifdef HAVE_FENV_H
+  if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT))
+    return true;
+#endif
+  return false;
+}
+
+} // End sys namespace
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/FileSystem.h b/final/include/llvm/Support/FileSystem.h
new file mode 100644
index 00000000000..4f013f89e86
--- /dev/null
+++ b/final/include/llvm/Support/FileSystem.h
@@ -0,0 +1,690 @@
+//===- llvm/Support/FileSystem.h - File System OS Concept -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::fs namespace. It is designed after
+// TR2/boost filesystem (v3), but modified to remove exception handling and the
+// path class.
+//
+// All functions return an error_code and their actual work via the last out
+// argument. The out argument is defined if and only if errc::success is
+// returned. A function may return any error code in the generic or system
+// category. However, they shall be equivalent to any error conditions listed
+// in each functions respective documentation if the condition applies. [ note:
+// this does not guarantee that error_code will be in the set of explicitly
+// listed codes, but it does guarantee that if any of the explicitly listed
+// errors occur, the correct error_code will be used ]. All functions may
+// return errc::not_enough_memory if there is not enough memory to complete the
+// operation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_FILE_SYSTEM_H
+#define LLVM_SUPPORT_FILE_SYSTEM_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/PathV1.h"
+#include "llvm/Support/system_error.h"
+#include <ctime>
+#include <iterator>
+#include <string>
+
+namespace llvm {
+namespace sys {
+namespace fs {
+
+/// file_type - An "enum class" enumeration for the file system's view of the
+///             type.
+struct file_type {
+  enum _ {
+    status_error,
+    file_not_found,
+    regular_file,
+    directory_file,
+    symlink_file,
+    block_file,
+    character_file,
+    fifo_file,
+    socket_file,
+    type_unknown
+  };
+
+  file_type(_ v) : v_(v) {}
+  explicit file_type(int v) : v_(_(v)) {}
+  operator int() const {return v_;}
+
+private:
+  int v_;
+};
+
+/// copy_option - An "enum class" enumeration of copy semantics for copy
+///               operations.
+struct copy_option {
+  enum _ {
+    fail_if_exists,
+    overwrite_if_exists
+  };
+
+  copy_option(_ v) : v_(v) {}
+  explicit copy_option(int v) : v_(_(v)) {}
+  operator int() const {return v_;}
+
+private:
+  int v_;
+};
+
+/// space_info - Self explanatory.
+struct space_info {
+  uint64_t capacity;
+  uint64_t free;
+  uint64_t available;
+};
+
+/// file_status - Represents the result of a call to stat and friends. It has
+///               a platform specific member to store the result.
+class file_status
+{
+  // implementation defined status field.
+  file_type Type;
+public:
+  explicit file_status(file_type v=file_type::status_error)
+    : Type(v) {}
+
+  file_type type() const { return Type; }
+  void type(file_type v) { Type = v; }
+};
+
+/// @}
+/// @name Physical Operators
+/// @{
+
+/// @brief Make \a path an absolute path.
+///
+/// Makes \a path absolute using the current directory if it is not already. An
+/// empty \a path will result in the current directory.
+///
+/// /absolute/path   => /absolute/path
+/// relative/../path => <current-directory>/relative/../path
+///
+/// @param path A path that is modified to be an absolute path.
+/// @returns errc::success if \a path has been made absolute, otherwise a
+///          platform specific error_code.
+error_code make_absolute(SmallVectorImpl<char> &path);
+
+/// @brief Copy the file at \a from to the path \a to.
+///
+/// @param from The path to copy the file from.
+/// @param to The path to copy the file to.
+/// @param copt Behavior if \a to already exists.
+/// @returns errc::success if the file has been successfully copied.
+///          errc::file_exists if \a to already exists and \a copt ==
+///          copy_option::fail_if_exists. Otherwise a platform specific
+///          error_code.
+error_code copy_file(const Twine &from, const Twine &to,
+                     copy_option copt = copy_option::fail_if_exists);
+
+/// @brief Create all the non-existent directories in path.
+///
+/// @param path Directories to create.
+/// @param existed Set to true if \a path already existed, false otherwise.
+/// @returns errc::success if is_directory(path) and existed have been set,
+///          otherwise a platform specific error_code.
+error_code create_directories(const Twine &path, bool &existed);
+
+/// @brief Create the directory in path.
+///
+/// @param path Directory to create.
+/// @param existed Set to true if \a path already existed, false otherwise.
+/// @returns errc::success if is_directory(path) and existed have been set,
+///          otherwise a platform specific error_code.
+error_code create_directory(const Twine &path, bool &existed);
+
+/// @brief Create a hard link from \a from to \a to.
+///
+/// @param to The path to hard link to.
+/// @param from The path to hard link from. This is created.
+/// @returns errc::success if exists(to) && exists(from) && equivalent(to, from)
+///          , otherwise a platform specific error_code.
+error_code create_hard_link(const Twine &to, const Twine &from);
+
+/// @brief Create a symbolic link from \a from to \a to.
+///
+/// @param to The path to symbolically link to.
+/// @param from The path to symbolically link from. This is created.
+/// @returns errc::success if exists(to) && exists(from) && is_symlink(from),
+///          otherwise a platform specific error_code.
+error_code create_symlink(const Twine &to, const Twine &from);
+
+/// @brief Get the current path.
+///
+/// @param result Holds the current path on return.
+/// @results errc::success if the current path has been stored in result,
+///          otherwise a platform specific error_code.
+error_code current_path(SmallVectorImpl<char> &result);
+
+/// @brief Remove path. Equivalent to POSIX remove().
+///
+/// @param path Input path.
+/// @param existed Set to true if \a path existed, false if it did not.
+///                undefined otherwise.
+/// @results errc::success if path has been removed and existed has been
+///          successfully set, otherwise a platform specific error_code.
+error_code remove(const Twine &path, bool &existed);
+
+/// @brief Recursively remove all files below \a path, then \a path. Files are
+///        removed as if by POSIX remove().
+///
+/// @param path Input path.
+/// @param num_removed Number of files removed.
+/// @results errc::success if path has been removed and num_removed has been
+///          successfully set, otherwise a platform specific error_code.
+error_code remove_all(const Twine &path, uint32_t &num_removed);
+
+/// @brief Rename \a from to \a to. Files are renamed as if by POSIX rename().
+///
+/// @param from The path to rename from.
+/// @param to The path to rename to. This is created.
+error_code rename(const Twine &from, const Twine &to);
+
+/// @brief Resize path to size. File is resized as if by POSIX truncate().
+///
+/// @param path Input path.
+/// @param size Size to resize to.
+/// @returns errc::success if \a path has been resized to \a size, otherwise a
+///          platform specific error_code.
+error_code resize_file(const Twine &path, uint64_t size);
+
+/// @brief Make file readable.
+///
+/// @param path Input path.
+/// @param value If true, make readable, else, make unreadable.
+/// @results errc::success if readability has been successfully set, otherwise a
+///          platform specific error_code.
+error_code set_read(const Twine &path, bool value);
+
+/// @brief Make file writeable.
+///
+/// @param path Input path.
+/// @param value If true, make writeable, else, make unwriteable.
+/// @results errc::success if writeability has been successfully set, otherwise
+///          a platform specific error_code.
+error_code set_write(const Twine &path, bool value);
+
+/// @brief Make file executable.
+///
+/// @param path Input path.
+/// @param value If true, make executable, else, make unexecutable.
+/// @results errc::success if executability has been successfully set, otherwise
+///          a platform specific error_code.
+error_code set_execute(const Twine &path, bool value);
+
+/// @}
+/// @name Physical Observers
+/// @{
+
+/// @brief Does file exist?
+///
+/// @param status A file_status previously returned from stat.
+/// @results True if the file represented by status exists, false if it does
+///          not.
+bool exists(file_status status);
+
+/// @brief Does file exist?
+///
+/// @param path Input path.
+/// @param result Set to true if the file represented by status exists, false if
+///               it does not. Undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code exists(const Twine &path, bool &result);
+
+/// @brief Do file_status's represent the same thing?
+///
+/// @param A Input file_status.
+/// @param B Input file_status.
+///
+/// assert(status_known(A) || status_known(B));
+///
+/// @results True if A and B both represent the same file system entity, false
+///          otherwise.
+bool equivalent(file_status A, file_status B);
+
+/// @brief Do paths represent the same thing?
+///
+/// @param A Input path A.
+/// @param B Input path B.
+/// @param result Set to true if stat(A) and stat(B) have the same device and
+///               inode (or equivalent).
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code equivalent(const Twine &A, const Twine &B, bool &result);
+
+/// @brief Get file size.
+///
+/// @param path Input path.
+/// @param result Set to the size of the file in \a path.
+/// @returns errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code file_size(const Twine &path, uint64_t &result);
+
+/// @brief Does status represent a directory?
+///
+/// @param status A file_status previously returned from status.
+/// @results status.type() == file_type::directory_file.
+bool is_directory(file_status status);
+
+/// @brief Is path a directory?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is a directory, false if it is not.
+///               Undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code is_directory(const Twine &path, bool &result);
+
+/// @brief Is path an empty file?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is a an empty file, false if it is not.
+///               Undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code is_empty(const Twine &path, bool &result);
+
+/// @brief Does status represent a regular file?
+///
+/// @param status A file_status previously returned from status.
+/// @results status_known(status) && status.type() == file_type::regular_file.
+bool is_regular_file(file_status status);
+
+/// @brief Is path a regular file?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is a regular file, false if it is not.
+///               Undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code is_regular_file(const Twine &path, bool &result);
+
+/// @brief Does this status represent something that exists but is not a
+///        directory, regular file, or symlink?
+///
+/// @param status A file_status previously returned from status.
+/// @results exists(s) && !is_regular_file(s) && !is_directory(s) &&
+///          !is_symlink(s)
+bool is_other(file_status status);
+
+/// @brief Is path something that exists but is not a directory,
+///        regular file, or symlink?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path exists, but is not a directory, regular
+///               file, or a symlink, false if it does not. Undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code is_other(const Twine &path, bool &result);
+
+/// @brief Does status represent a symlink?
+///
+/// @param status A file_status previously returned from stat.
+/// @param result status.type() == symlink_file.
+bool is_symlink(file_status status);
+
+/// @brief Is path a symlink?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is a symlink, false if it is not.
+///               Undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code is_symlink(const Twine &path, bool &result);
+
+/// @brief Get last write time without changing it.
+///
+/// @param path Input path.
+/// @param result Set to the last write time (UNIX time) of \a path if it
+///               exists.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code last_write_time(const Twine &path, std::time_t &result);
+
+/// @brief Set last write time.
+///
+/// @param path Input path.
+/// @param value Time to set (UNIX time) \a path's last write time to.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code set_last_write_time(const Twine &path, std::time_t value);
+
+/// @brief Read a symlink's value.
+///
+/// @param path Input path.
+/// @param result Set to the value of the symbolic link \a path.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code read_symlink(const Twine &path, SmallVectorImpl<char> &result);
+
+/// @brief Get disk space usage information.
+///
+/// @param path Input path.
+/// @param result Set to the capacity, free, and available space on the device
+///               \a path is on.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code disk_space(const Twine &path, space_info &result);
+
+/// @brief Get file status as if by POSIX stat().
+///
+/// @param path Input path.
+/// @param result Set to the file status.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code status(const Twine &path, file_status &result);
+
+/// @brief Is status available?
+///
+/// @param path Input path.
+/// @results True if status() != status_error.
+bool status_known(file_status s);
+
+/// @brief Is status available?
+///
+/// @param path Input path.
+/// @param result Set to true if status() != status_error.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code status_known(const Twine &path, bool &result);
+
+/// @brief Get file status as if by POSIX lstat().
+///
+/// Does not resolve symlinks.
+///
+/// @param path Input path.
+/// @param result Set to the file status.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code symlink_status(const Twine &path, file_status &result);
+
+/// @brief Generate a unique path and open it as a file.
+///
+/// Generates a unique path suitable for a temporary file and then opens it as a
+/// file. The name is based on \a model with '%' replaced by a random char in
+/// [0-9a-f]. If \a model is not an absolute path, a suitable temporary
+/// directory will be prepended.
+///
+/// This is an atomic operation. Either the file is created and opened, or the
+/// file system is left untouched.
+///
+/// clang-%%-%%-%%-%%-%%.s => /tmp/clang-a0-b1-c2-d3-e4.s
+///
+/// @param model Name to base unique path off of.
+/// @param result_fs Set to the opened file's file descriptor.
+/// @param result_path Set to the opened file's absolute path.
+/// @results errc::success if result_{fd,path} have been successfully set,
+///          otherwise a platform specific error_code.
+error_code unique_file(const Twine &model, int &result_fd,
+                             SmallVectorImpl<char> &result_path);
+
+/// @brief Canonicalize path.
+///
+/// Sets result to the file system's idea of what path is. The result is always
+/// absolute and has the same capitalization as the file system.
+///
+/// @param path Input path.
+/// @param result Set to the canonicalized version of \a path.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code canonicalize(const Twine &path, SmallVectorImpl<char> &result);
+
+/// @brief Are \a path's first bytes \a magic?
+///
+/// @param path Input path.
+/// @param magic Byte sequence to compare \a path's first len(magic) bytes to.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code has_magic(const Twine &path, const Twine &magic, bool &result);
+
+/// @brief Get \a path's first \a len bytes.
+///
+/// @param path Input path.
+/// @param len Number of magic bytes to get.
+/// @param result Set to the first \a len bytes in the file pointed to by
+///               \a path. Or the entire file if file_size(path) < len, in which
+///               case result.size() returns the size of the file.
+/// @results errc::success if result has been successfully set,
+///          errc::value_too_large if len is larger then the file pointed to by
+///          \a path, otherwise a platform specific error_code.
+error_code get_magic(const Twine &path, uint32_t len,
+                     SmallVectorImpl<char> &result);
+
+/// @brief Get and identify \a path's type based on its content.
+///
+/// @param path Input path.
+/// @param result Set to the type of file, or LLVMFileType::Unknown_FileType.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code identify_magic(const Twine &path, LLVMFileType &result);
+
+/// @brief Is file bitcode?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is a bitcode file, false if it is not,
+///               undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code is_bitcode(const Twine &path, bool &result);
+
+/// @brief Is file a dynamic library?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is a dynamic library, false if it is
+///               not, undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code is_dynamic_library(const Twine &path, bool &result);
+
+/// @brief Is an object file?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is an object file, false if it is not,
+///               undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code is_object_file(const Twine &path, bool &result);
+
+/// @brief Can file be read?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is readable, false it it is not,
+///               undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code can_read(const Twine &path, bool &result);
+
+/// @brief Can file be written?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is writeable, false it it is not,
+///               undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code can_write(const Twine &path, bool &result);
+
+/// @brief Can file be executed?
+///
+/// @param path Input path.
+/// @param result Set to true if \a path is executable, false it it is not,
+///               undefined otherwise.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code can_execute(const Twine &path, bool &result);
+
+/// @brief Get library paths the system linker uses.
+///
+/// @param result Set to the list of system library paths.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code GetSystemLibraryPaths(SmallVectorImpl<std::string> &result);
+
+/// @brief Get bitcode library paths the system linker uses
+///        + LLVM_LIB_SEARCH_PATH + LLVM_LIBDIR.
+///
+/// @param result Set to the list of bitcode library paths.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code GetBitcodeLibraryPaths(SmallVectorImpl<std::string> &result);
+
+/// @brief Find a library.
+///
+/// Find the path to a library using its short name. Use the system
+/// dependent library paths to locate the library.
+///
+/// c => /usr/lib/libc.so
+///
+/// @param short_name Library name one would give to the system linker.
+/// @param result Set to the absolute path \a short_name represents.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code FindLibrary(const Twine &short_name, SmallVectorImpl<char> &result);
+
+/// @brief Get absolute path of main executable.
+///
+/// @param argv0 The program name as it was spelled on the command line.
+/// @param MainAddr Address of some symbol in the executable (not in a library).
+/// @param result Set to the absolute path of the current executable.
+/// @results errc::success if result has been successfully set, otherwise a
+///          platform specific error_code.
+error_code GetMainExecutable(const char *argv0, void *MainAddr,
+                             SmallVectorImpl<char> &result);
+
+/// @}
+/// @name Iterators
+/// @{
+
+/// directory_entry - A single entry in a directory. Caches the status either
+/// from the result of the iteration syscall, or the first time status or
+/// symlink_status is called.
+class directory_entry {
+  std::string Path;
+  mutable file_status Status;
+  mutable file_status SymlinkStatus;
+
+public:
+  explicit directory_entry(const Twine &path, file_status st = file_status(),
+                                       file_status symlink_st = file_status())
+    : Path(path.str())
+    , Status(st)
+    , SymlinkStatus(symlink_st) {}
+
+  directory_entry() {}
+
+  void assign(const Twine &path, file_status st = file_status(),
+                          file_status symlink_st = file_status()) {
+    Path = path.str();
+    Status = st;
+    SymlinkStatus = symlink_st;
+  }
+
+  void replace_filename(const Twine &filename, file_status st = file_status(),
+                              file_status symlink_st = file_status());
+
+  const std::string &path() const { return Path; }
+  error_code status(file_status &result) const;
+  error_code symlink_status(file_status &result) const;
+
+  bool operator==(const directory_entry& rhs) const { return Path == rhs.Path; }
+  bool operator!=(const directory_entry& rhs) const { return !(*this == rhs); }
+  bool operator< (const directory_entry& rhs) const;
+  bool operator<=(const directory_entry& rhs) const;
+  bool operator> (const directory_entry& rhs) const;
+  bool operator>=(const directory_entry& rhs) const;
+};
+
+/// directory_iterator - Iterates through the entries in path. There is no
+/// operator++ because we need an error_code. If it's really needed we can make
+/// it call report_fatal_error on error.
+class directory_iterator {
+  intptr_t IterationHandle;
+  directory_entry CurrentEntry;
+
+  // Platform implementations implement these functions to handle iteration.
+  friend error_code directory_iterator_construct(directory_iterator &it,
+                                                 StringRef path);
+  friend error_code directory_iterator_increment(directory_iterator &it);
+  friend error_code directory_iterator_destruct(directory_iterator &it);
+
+public:
+  explicit directory_iterator(const Twine &path, error_code &ec)
+  : IterationHandle(0) {
+    SmallString<128> path_storage;
+    ec = directory_iterator_construct(*this, path.toStringRef(path_storage));
+  }
+
+  /// Construct end iterator.
+  directory_iterator() : IterationHandle(0) {}
+
+  ~directory_iterator() {
+    directory_iterator_destruct(*this);
+  }
+
+  // No operator++ because we need error_code.
+  directory_iterator &increment(error_code &ec) {
+    ec = directory_iterator_increment(*this);
+    return *this;
+  }
+
+  const directory_entry &operator*() const { return CurrentEntry; }
+  const directory_entry *operator->() const { return &CurrentEntry; }
+
+  bool operator!=(const directory_iterator &RHS) const {
+    return CurrentEntry != RHS.CurrentEntry;
+  }
+  // Other members as required by
+  // C++ Std, 24.1.1 Input iterators [input.iterators]
+};
+
+/// recursive_directory_iterator - Same as directory_iterator except for it
+/// recurses down into child directories.
+class recursive_directory_iterator {
+  uint16_t  Level;
+  bool HasNoPushRequest;
+  // implementation directory iterator status
+
+public:
+  explicit recursive_directory_iterator(const Twine &path, error_code &ec);
+  // No operator++ because we need error_code.
+  directory_iterator &increment(error_code &ec);
+
+  const directory_entry &operator*() const;
+  const directory_entry *operator->() const;
+
+  // observers
+  /// Gets the current level. path is at level 0.
+  int level() const;
+  /// Returns true if no_push has been called for this directory_entry.
+  bool no_push_request() const;
+
+  // modifiers
+  /// Goes up one level if Level > 0.
+  void pop();
+  /// Does not go down into the current directory_entry.
+  void no_push();
+
+  // Other members as required by
+  // C++ Std, 24.1.1 Input iterators [input.iterators]
+};
+
+/// @}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Support/FileUtilities.h b/final/include/llvm/Support/FileUtilities.h
new file mode 100644
index 00000000000..748ce7cea7b
--- /dev/null
+++ b/final/include/llvm/Support/FileUtilities.h
@@ -0,0 +1,72 @@
+//===- llvm/Support/FileUtilities.h - File System Utilities -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a family of utility functions which are useful for doing
+// various things with files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_FILEUTILITIES_H
+#define LLVM_SUPPORT_FILEUTILITIES_H
+
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+
+  /// DiffFilesWithTolerance - Compare the two files specified, returning 0 if
+  /// the files match, 1 if they are different, and 2 if there is a file error.
+  /// This function allows you to specify an absolete and relative FP error that
+  /// is allowed to exist.  If you specify a string to fill in for the error
+  /// option, it will set the string to an error message if an error occurs, or
+  /// if the files are different.
+  ///
+  int DiffFilesWithTolerance(const sys::PathWithStatus &FileA,
+                             const sys::PathWithStatus &FileB,
+                             double AbsTol, double RelTol,
+                             std::string *Error = 0);
+
+
+  /// FileRemover - This class is a simple object meant to be stack allocated.
+  /// If an exception is thrown from a region, the object removes the filename
+  /// specified (if deleteIt is true).
+  ///
+  class FileRemover {
+    sys::Path Filename;
+    bool DeleteIt;
+  public:
+    FileRemover() : DeleteIt(false) {}
+
+    explicit FileRemover(const sys::Path &filename, bool deleteIt = true)
+      : Filename(filename), DeleteIt(deleteIt) {}
+
+    ~FileRemover() {
+      if (DeleteIt) {
+        // Ignore problems deleting the file.
+        Filename.eraseFromDisk();
+      }
+    }
+
+    /// setFile - Give ownership of the file to the FileRemover so it will
+    /// be removed when the object is destroyed.  If the FileRemover already
+    /// had ownership of a file, remove it first.
+    void setFile(const sys::Path &filename, bool deleteIt = true) {
+      if (DeleteIt)
+        Filename.eraseFromDisk();
+
+      Filename = filename;
+      DeleteIt = deleteIt;
+    }
+
+    /// releaseFile - Take ownership of the file away from the FileRemover so it
+    /// will not be removed when the object is destroyed.
+    void releaseFile() { DeleteIt = false; }
+  };
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/Format.h b/final/include/llvm/Support/Format.h
new file mode 100644
index 00000000000..f64e3db7d65
--- /dev/null
+++ b/final/include/llvm/Support/Format.h
@@ -0,0 +1,154 @@
+//===- Format.h - Efficient printf-style formatting for streams -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the format() function, which can be used with other
+// LLVM subsystems to provide printf-style formatting.  This gives all the power
+// and risk of printf.  This can be used like this (with raw_ostreams as an
+// example):
+//
+//    OS << "mynumber: " << format("%4.5f", 1234.412) << '\n';
+//
+// Or if you prefer:
+//
+//  OS << format("mynumber: %4.5f\n", 1234.412);
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_FORMAT_H
+#define LLVM_SUPPORT_FORMAT_H
+
+#include <cassert>
+#include <cstdio>
+#ifdef _MSC_VER
+// FIXME: This define is wrong:
+//  - _snprintf does not guarantee that trailing null is always added - if
+//    there is no space for null, it does not report any error.
+//  - According to C++ standard, snprintf should be visible in the 'std' 
+//    namespace - this define makes this impossible.
+#define snprintf _snprintf
+#endif
+
+namespace llvm {
+
+/// format_object_base - This is a helper class used for handling formatted
+/// output.  It is the abstract base class of a templated derived class.
+class format_object_base {
+protected:
+  const char *Fmt;
+  virtual void home(); // Out of line virtual method.
+
+  /// snprint - Call snprintf() for this object, on the given buffer and size.
+  virtual int snprint(char *Buffer, unsigned BufferSize) const = 0;
+
+public:
+  format_object_base(const char *fmt) : Fmt(fmt) {}
+  virtual ~format_object_base() {}
+
+  /// print - Format the object into the specified buffer.  On success, this
+  /// returns the length of the formatted string.  If the buffer is too small,
+  /// this returns a length to retry with, which will be larger than BufferSize.
+  unsigned print(char *Buffer, unsigned BufferSize) const {
+    assert(BufferSize && "Invalid buffer size!");
+
+    // Print the string, leaving room for the terminating null.
+    int N = snprint(Buffer, BufferSize);
+
+    // VC++ and old GlibC return negative on overflow, just double the size.
+    if (N < 0)
+      return BufferSize*2;
+
+    // Other impls yield number of bytes needed, not including the final '\0'.
+    if (unsigned(N) >= BufferSize)
+      return N+1;
+
+    // Otherwise N is the length of output (not including the final '\0').
+    return N;
+  }
+};
+
+/// format_object1 - This is a templated helper class used by the format
+/// function that captures the object to be formated and the format string. When
+/// actually printed, this synthesizes the string into a temporary buffer
+/// provided and returns whether or not it is big enough.
+template <typename T>
+class format_object1 : public format_object_base {
+  T Val;
+public:
+  format_object1(const char *fmt, const T &val)
+    : format_object_base(fmt), Val(val) {
+  }
+
+  virtual int snprint(char *Buffer, unsigned BufferSize) const {
+    return snprintf(Buffer, BufferSize, Fmt, Val);
+  }
+};
+
+/// format_object2 - This is a templated helper class used by the format
+/// function that captures the object to be formated and the format string. When
+/// actually printed, this synthesizes the string into a temporary buffer
+/// provided and returns whether or not it is big enough.
+template <typename T1, typename T2>
+class format_object2 : public format_object_base {
+  T1 Val1;
+  T2 Val2;
+public:
+  format_object2(const char *fmt, const T1 &val1, const T2 &val2)
+  : format_object_base(fmt), Val1(val1), Val2(val2) {
+  }
+
+  virtual int snprint(char *Buffer, unsigned BufferSize) const {
+    return snprintf(Buffer, BufferSize, Fmt, Val1, Val2);
+  }
+};
+
+/// format_object3 - This is a templated helper class used by the format
+/// function that captures the object to be formated and the format string. When
+/// actually printed, this synthesizes the string into a temporary buffer
+/// provided and returns whether or not it is big enough.
+template <typename T1, typename T2, typename T3>
+class format_object3 : public format_object_base {
+  T1 Val1;
+  T2 Val2;
+  T3 Val3;
+public:
+  format_object3(const char *fmt, const T1 &val1, const T2 &val2,const T3 &val3)
+    : format_object_base(fmt), Val1(val1), Val2(val2), Val3(val3) {
+  }
+
+  virtual int snprint(char *Buffer, unsigned BufferSize) const {
+    return snprintf(Buffer, BufferSize, Fmt, Val1, Val2, Val3);
+  }
+};
+
+/// format - This is a helper function that is used to produce formatted output.
+/// This is typically used like:  OS << format("%0.4f", myfloat) << '\n';
+template <typename T>
+inline format_object1<T> format(const char *Fmt, const T &Val) {
+  return format_object1<T>(Fmt, Val);
+}
+
+/// format - This is a helper function that is used to produce formatted output.
+/// This is typically used like:  OS << format("%0.4f", myfloat) << '\n';
+template <typename T1, typename T2>
+inline format_object2<T1, T2> format(const char *Fmt, const T1 &Val1,
+                                     const T2 &Val2) {
+  return format_object2<T1, T2>(Fmt, Val1, Val2);
+}
+
+/// format - This is a helper function that is used to produce formatted output.
+/// This is typically used like:  OS << format("%0.4f", myfloat) << '\n';
+template <typename T1, typename T2, typename T3>
+  inline format_object3<T1, T2, T3> format(const char *Fmt, const T1 &Val1,
+                                           const T2 &Val2, const T3 &Val3) {
+  return format_object3<T1, T2, T3>(Fmt, Val1, Val2, Val3);
+}
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Support/FormattedStream.h b/final/include/llvm/Support/FormattedStream.h
new file mode 100644
index 00000000000..58a18851687
--- /dev/null
+++ b/final/include/llvm/Support/FormattedStream.h
@@ -0,0 +1,154 @@
+//===-- llvm/Support/FormattedStream.h - Formatted streams ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains raw_ostream implementations for streams to do
+// things like pretty-print comments.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_FORMATTEDSTREAM_H
+#define LLVM_SUPPORT_FORMATTEDSTREAM_H
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm 
+{
+  /// formatted_raw_ostream - Formatted raw_fd_ostream to handle
+  /// asm-specific constructs.
+  ///
+  class formatted_raw_ostream : public raw_ostream {
+  public:
+    /// DELETE_STREAM - Tell the destructor to delete the held stream.
+    ///
+    static const bool DELETE_STREAM = true;
+
+    /// PRESERVE_STREAM - Tell the destructor to not delete the held
+    /// stream.
+    ///
+    static const bool PRESERVE_STREAM = false;
+
+  private:
+    /// TheStream - The real stream we output to. We set it to be
+    /// unbuffered, since we're already doing our own buffering.
+    ///
+    raw_ostream *TheStream;
+
+    /// DeleteStream - Do we need to delete TheStream in the
+    /// destructor?
+    ///
+    bool DeleteStream;
+
+    /// ColumnScanned - The current output column of the data that's
+    /// been flushed and the portion of the buffer that's been
+    /// scanned.  The column scheme is zero-based.
+    ///
+    unsigned ColumnScanned;
+
+    /// Scanned - This points to one past the last character in the
+    /// buffer we've scanned.
+    ///
+    const char *Scanned;
+
+    virtual void write_impl(const char *Ptr, size_t Size);
+
+    /// current_pos - Return the current position within the stream,
+    /// not counting the bytes currently in the buffer.
+    virtual uint64_t current_pos() const { 
+      // This has the same effect as calling TheStream.current_pos(),
+      // but that interface is private.
+      return TheStream->tell() - TheStream->GetNumBytesInBuffer();
+    }
+
+    /// ComputeColumn - Examine the given output buffer and figure out which
+    /// column we end up in after output.
+    ///
+    void ComputeColumn(const char *Ptr, size_t size);
+
+  public:
+    /// formatted_raw_ostream - Open the specified file for
+    /// writing. If an error occurs, information about the error is
+    /// put into ErrorInfo, and the stream should be immediately
+    /// destroyed; the string will be empty if no error occurred.
+    ///
+    /// As a side effect, the given Stream is set to be Unbuffered.
+    /// This is because formatted_raw_ostream does its own buffering,
+    /// so it doesn't want another layer of buffering to be happening
+    /// underneath it.
+    ///
+    formatted_raw_ostream(raw_ostream &Stream, bool Delete = false) 
+      : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) {
+      setStream(Stream, Delete);
+    }
+    explicit formatted_raw_ostream()
+      : raw_ostream(), TheStream(0), DeleteStream(false), ColumnScanned(0) {
+      Scanned = 0;
+    }
+
+    ~formatted_raw_ostream() {
+      flush();
+      releaseStream();
+    }
+
+    void setStream(raw_ostream &Stream, bool Delete = false) {
+      releaseStream();
+
+      TheStream = &Stream;
+      DeleteStream = Delete;
+
+      // This formatted_raw_ostream inherits from raw_ostream, so it'll do its
+      // own buffering, and it doesn't need or want TheStream to do another
+      // layer of buffering underneath. Resize the buffer to what TheStream
+      // had been using, and tell TheStream not to do its own buffering.
+      if (size_t BufferSize = TheStream->GetBufferSize())
+        SetBufferSize(BufferSize);
+      else
+        SetUnbuffered();
+      TheStream->SetUnbuffered();
+
+      Scanned = 0;
+    }
+
+    /// PadToColumn - Align the output to some column number.  If the current
+    /// column is already equal to or more than NewCol, PadToColumn inserts one
+    /// space.
+    ///
+    /// \param NewCol - The column to move to.
+    formatted_raw_ostream &PadToColumn(unsigned NewCol);
+
+  private:
+    void releaseStream() {
+      // Delete the stream if needed. Otherwise, transfer the buffer
+      // settings from this raw_ostream back to the underlying stream.
+      if (!TheStream)
+        return;
+      if (DeleteStream)
+        delete TheStream;
+      else if (size_t BufferSize = GetBufferSize())
+        TheStream->SetBufferSize(BufferSize);
+      else
+        TheStream->SetUnbuffered();
+    }
+  };
+
+/// fouts() - This returns a reference to a formatted_raw_ostream for
+/// standard output.  Use it like: fouts() << "foo" << "bar";
+formatted_raw_ostream &fouts();
+
+/// ferrs() - This returns a reference to a formatted_raw_ostream for
+/// standard error.  Use it like: ferrs() << "foo" << "bar";
+formatted_raw_ostream &ferrs();
+
+/// fdbgs() - This returns a reference to a formatted_raw_ostream for
+/// debug output.  Use it like: fdbgs() << "foo" << "bar";
+formatted_raw_ostream &fdbgs();
+
+} // end llvm namespace
+
+
+#endif
diff --git a/final/include/llvm/Support/GetElementPtrTypeIterator.h b/final/include/llvm/Support/GetElementPtrTypeIterator.h
new file mode 100644
index 00000000000..e5e7fc74095
--- /dev/null
+++ b/final/include/llvm/Support/GetElementPtrTypeIterator.h
@@ -0,0 +1,113 @@
+//===- llvm/Support/GetElementPtrTypeIterator.h -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an iterator for walking through the types indexed by
+// getelementptr instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_GETELEMENTPTRTYPE_H
+#define LLVM_SUPPORT_GETELEMENTPTRTYPE_H
+
+#include "llvm/User.h"
+#include "llvm/DerivedTypes.h"
+
+namespace llvm {
+  template<typename ItTy = User::const_op_iterator>
+  class generic_gep_type_iterator
+    : public std::iterator<std::forward_iterator_tag, const Type *, ptrdiff_t> {
+    typedef std::iterator<std::forward_iterator_tag,
+                          const Type *, ptrdiff_t> super;
+
+    ItTy OpIt;
+    const Type *CurTy;
+    generic_gep_type_iterator() {}
+  public:
+
+    static generic_gep_type_iterator begin(const Type *Ty, ItTy It) {
+      generic_gep_type_iterator I;
+      I.CurTy = Ty;
+      I.OpIt = It;
+      return I;
+    }
+    static generic_gep_type_iterator end(ItTy It) {
+      generic_gep_type_iterator I;
+      I.CurTy = 0;
+      I.OpIt = It;
+      return I;
+    }
+
+    bool operator==(const generic_gep_type_iterator& x) const {
+      return OpIt == x.OpIt;
+    }
+    bool operator!=(const generic_gep_type_iterator& x) const {
+      return !operator==(x);
+    }
+
+    const Type *operator*() const {
+      return CurTy;
+    }
+
+    const Type *getIndexedType() const {
+      const CompositeType *CT = cast<CompositeType>(CurTy);
+      return CT->getTypeAtIndex(getOperand());
+    }
+
+    // This is a non-standard operator->.  It allows you to call methods on the
+    // current type directly.
+    const Type *operator->() const { return operator*(); }
+
+    Value *getOperand() const { return *OpIt; }
+
+    generic_gep_type_iterator& operator++() {   // Preincrement
+      if (const CompositeType *CT = dyn_cast<CompositeType>(CurTy)) {
+        CurTy = CT->getTypeAtIndex(getOperand());
+      } else {
+        CurTy = 0;
+      }
+      ++OpIt;
+      return *this;
+    }
+
+    generic_gep_type_iterator operator++(int) { // Postincrement
+      generic_gep_type_iterator tmp = *this; ++*this; return tmp;
+    }
+  };
+
+  typedef generic_gep_type_iterator<> gep_type_iterator;
+
+  inline gep_type_iterator gep_type_begin(const User *GEP) {
+    return gep_type_iterator::begin(GEP->getOperand(0)->getType(),
+                                    GEP->op_begin()+1);
+  }
+  inline gep_type_iterator gep_type_end(const User *GEP) {
+    return gep_type_iterator::end(GEP->op_end());
+  }
+  inline gep_type_iterator gep_type_begin(const User &GEP) {
+    return gep_type_iterator::begin(GEP.getOperand(0)->getType(),
+                                    GEP.op_begin()+1);
+  }
+  inline gep_type_iterator gep_type_end(const User &GEP) {
+    return gep_type_iterator::end(GEP.op_end());
+  }
+
+  template<typename ItTy>
+  inline generic_gep_type_iterator<ItTy>
+  gep_type_begin(const Type *Op0, ItTy I, ItTy E) {
+    return generic_gep_type_iterator<ItTy>::begin(Op0, I);
+  }
+
+  template<typename ItTy>
+  inline generic_gep_type_iterator<ItTy>
+  gep_type_end(const Type *Op0, ItTy I, ItTy E) {
+    return generic_gep_type_iterator<ItTy>::end(E);
+  }
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Support/GraphWriter.h b/final/include/llvm/Support/GraphWriter.h
new file mode 100644
index 00000000000..a5165f44d54
--- /dev/null
+++ b/final/include/llvm/Support/GraphWriter.h
@@ -0,0 +1,357 @@
+//===-- llvm/Support/GraphWriter.h - Write graph to a .dot file -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a simple interface that can be used to print out generic
+// LLVM graphs to ".dot" files.  "dot" is a tool that is part of the AT&T
+// graphviz package (http://www.research.att.com/sw/tools/graphviz/) which can
+// be used to turn the files output by this interface into a variety of
+// different graphics formats.
+//
+// Graphs do not need to implement any interface past what is already required
+// by the GraphTraits template, but they can choose to implement specializations
+// of the DOTGraphTraits template if they want to customize the graphs output in
+// any way.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_GRAPHWRITER_H
+#define LLVM_SUPPORT_GRAPHWRITER_H
+
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/Support/Path.h"
+#include <vector>
+#include <cassert>
+
+namespace llvm {
+
+namespace DOT {  // Private functions...
+  std::string EscapeString(const std::string &Label);
+}
+
+namespace GraphProgram {
+   enum Name {
+      DOT,
+      FDP,
+      NEATO,
+      TWOPI,
+      CIRCO
+   };
+}
+
+void DisplayGraph(const sys::Path& Filename, bool wait=true, GraphProgram::Name program = GraphProgram::DOT);
+
+template<typename GraphType>
+class GraphWriter {
+  raw_ostream &O;
+  const GraphType &G;
+
+  typedef DOTGraphTraits<GraphType>           DOTTraits;
+  typedef GraphTraits<GraphType>              GTraits;
+  typedef typename GTraits::NodeType          NodeType;
+  typedef typename GTraits::nodes_iterator    node_iterator;
+  typedef typename GTraits::ChildIteratorType child_iterator;
+  DOTTraits DTraits;
+
+  // Writes the edge labels of the node to O and returns true if there are any
+  // edge labels not equal to the empty string "".
+  bool getEdgeSourceLabels(raw_ostream &O, NodeType *Node) {
+    child_iterator EI = GTraits::child_begin(Node);
+    child_iterator EE = GTraits::child_end(Node);
+    bool hasEdgeSourceLabels = false;
+
+    for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i) {
+      std::string label = DTraits.getEdgeSourceLabel(Node, EI);
+
+      if (label.empty())
+        continue;
+
+      hasEdgeSourceLabels = true;
+
+      if (i)
+        O << "|";
+
+      O << "<s" << i << ">" << DOT::EscapeString(label);
+    }
+
+    if (EI != EE && hasEdgeSourceLabels)
+      O << "|<s64>truncated...";
+
+    return hasEdgeSourceLabels;
+  }
+
+public:
+  GraphWriter(raw_ostream &o, const GraphType &g, bool SN) : O(o), G(g) {
+    DTraits = DOTTraits(SN);
+  }
+
+  void writeGraph(const std::string &Title = "") {
+    // Output the header for the graph...
+    writeHeader(Title);
+
+    // Emit all of the nodes in the graph...
+    writeNodes();
+
+    // Output any customizations on the graph
+    DOTGraphTraits<GraphType>::addCustomGraphFeatures(G, *this);
+
+    // Output the end of the graph
+    writeFooter();
+  }
+
+  void writeHeader(const std::string &Title) {
+    std::string GraphName = DTraits.getGraphName(G);
+
+    if (!Title.empty())
+      O << "digraph \"" << DOT::EscapeString(Title) << "\" {\n";
+    else if (!GraphName.empty())
+      O << "digraph \"" << DOT::EscapeString(GraphName) << "\" {\n";
+    else
+      O << "digraph unnamed {\n";
+
+    if (DTraits.renderGraphFromBottomUp())
+      O << "\trankdir=\"BT\";\n";
+
+    if (!Title.empty())
+      O << "\tlabel=\"" << DOT::EscapeString(Title) << "\";\n";
+    else if (!GraphName.empty())
+      O << "\tlabel=\"" << DOT::EscapeString(GraphName) << "\";\n";
+    O << DTraits.getGraphProperties(G);
+    O << "\n";
+  }
+
+  void writeFooter() {
+    // Finish off the graph
+    O << "}\n";
+  }
+
+  void writeNodes() {
+    // Loop over the graph, printing it out...
+    for (node_iterator I = GTraits::nodes_begin(G), E = GTraits::nodes_end(G);
+         I != E; ++I)
+      if (!isNodeHidden(*I))
+        writeNode(*I);
+  }
+
+  bool isNodeHidden(NodeType &Node) {
+    return isNodeHidden(&Node);
+  }
+
+  bool isNodeHidden(NodeType *const *Node) {
+    return isNodeHidden(*Node);
+  }
+
+  bool isNodeHidden(NodeType *Node) {
+    return DTraits.isNodeHidden(Node);
+  }
+
+  void writeNode(NodeType& Node) {
+    writeNode(&Node);
+  }
+
+  void writeNode(NodeType *const *Node) {
+    writeNode(*Node);
+  }
+
+  void writeNode(NodeType *Node) {
+    std::string NodeAttributes = DTraits.getNodeAttributes(Node, G);
+
+    O << "\tNode" << static_cast<const void*>(Node) << " [shape=record,";
+    if (!NodeAttributes.empty()) O << NodeAttributes << ",";
+    O << "label=\"{";
+
+    if (!DTraits.renderGraphFromBottomUp()) {
+      O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
+
+      // If we should include the address of the node in the label, do so now.
+      if (DTraits.hasNodeAddressLabel(Node, G))
+        O << "|" << (void*)Node;
+    }
+
+    std::string edgeSourceLabels;
+    raw_string_ostream EdgeSourceLabels(edgeSourceLabels);
+    bool hasEdgeSourceLabels = getEdgeSourceLabels(EdgeSourceLabels, Node);
+
+    if (hasEdgeSourceLabels) {
+      if (!DTraits.renderGraphFromBottomUp()) O << "|";
+
+      O << "{" << EdgeSourceLabels.str() << "}";
+
+      if (DTraits.renderGraphFromBottomUp()) O << "|";
+    }
+
+    if (DTraits.renderGraphFromBottomUp()) {
+      O << DOT::EscapeString(DTraits.getNodeLabel(Node, G));
+
+      // If we should include the address of the node in the label, do so now.
+      if (DTraits.hasNodeAddressLabel(Node, G))
+        O << "|" << (void*)Node;
+    }
+
+    if (DTraits.hasEdgeDestLabels()) {
+      O << "|{";
+
+      unsigned i = 0, e = DTraits.numEdgeDestLabels(Node);
+      for (; i != e && i != 64; ++i) {
+        if (i) O << "|";
+        O << "<d" << i << ">"
+          << DOT::EscapeString(DTraits.getEdgeDestLabel(Node, i));
+      }
+
+      if (i != e)
+        O << "|<d64>truncated...";
+      O << "}";
+    }
+
+    O << "}\"];\n";   // Finish printing the "node" line
+
+    // Output all of the edges now
+    child_iterator EI = GTraits::child_begin(Node);
+    child_iterator EE = GTraits::child_end(Node);
+    for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i)
+      if (!DTraits.isNodeHidden(*EI))
+        writeEdge(Node, i, EI);
+    for (; EI != EE; ++EI)
+      if (!DTraits.isNodeHidden(*EI))
+        writeEdge(Node, 64, EI);
+  }
+
+  void writeEdge(NodeType *Node, unsigned edgeidx, child_iterator EI) {
+    if (NodeType *TargetNode = *EI) {
+      int DestPort = -1;
+      if (DTraits.edgeTargetsEdgeSource(Node, EI)) {
+        child_iterator TargetIt = DTraits.getEdgeTarget(Node, EI);
+
+        // Figure out which edge this targets...
+        unsigned Offset =
+          (unsigned)std::distance(GTraits::child_begin(TargetNode), TargetIt);
+        DestPort = static_cast<int>(Offset);
+      }
+
+      if (DTraits.getEdgeSourceLabel(Node, EI).empty())
+        edgeidx = -1;
+
+      emitEdge(static_cast<const void*>(Node), edgeidx,
+               static_cast<const void*>(TargetNode), DestPort,
+               DTraits.getEdgeAttributes(Node, EI, G));
+    }
+  }
+
+  /// emitSimpleNode - Outputs a simple (non-record) node
+  void emitSimpleNode(const void *ID, const std::string &Attr,
+                      const std::string &Label, unsigned NumEdgeSources = 0,
+                      const std::vector<std::string> *EdgeSourceLabels = 0) {
+    O << "\tNode" << ID << "[ ";
+    if (!Attr.empty())
+      O << Attr << ",";
+    O << " label =\"";
+    if (NumEdgeSources) O << "{";
+    O << DOT::EscapeString(Label);
+    if (NumEdgeSources) {
+      O << "|{";
+
+      for (unsigned i = 0; i != NumEdgeSources; ++i) {
+        if (i) O << "|";
+        O << "<s" << i << ">";
+        if (EdgeSourceLabels) O << DOT::EscapeString((*EdgeSourceLabels)[i]);
+      }
+      O << "}}";
+    }
+    O << "\"];\n";
+  }
+
+  /// emitEdge - Output an edge from a simple node into the graph...
+  void emitEdge(const void *SrcNodeID, int SrcNodePort,
+                const void *DestNodeID, int DestNodePort,
+                const std::string &Attrs) {
+    if (SrcNodePort  > 64) return;             // Eminating from truncated part?
+    if (DestNodePort > 64) DestNodePort = 64;  // Targetting the truncated part?
+
+    O << "\tNode" << SrcNodeID;
+    if (SrcNodePort >= 0)
+      O << ":s" << SrcNodePort;
+    O << " -> Node" << DestNodeID;
+    if (DestNodePort >= 0 && DTraits.hasEdgeDestLabels())
+      O << ":d" << DestNodePort;
+
+    if (!Attrs.empty())
+      O << "[" << Attrs << "]";
+    O << ";\n";
+  }
+
+  /// getOStream - Get the raw output stream into the graph file. Useful to
+  /// write fancy things using addCustomGraphFeatures().
+  raw_ostream &getOStream() {
+    return O;
+  }
+};
+
+template<typename GraphType>
+raw_ostream &WriteGraph(raw_ostream &O, const GraphType &G,
+                        bool ShortNames = false,
+                        const std::string &Title = "") {
+  // Start the graph emission process...
+  GraphWriter<GraphType> W(O, G, ShortNames);
+
+  // Emit the graph.
+  W.writeGraph(Title);
+
+  return O;
+}
+
+template<typename GraphType>
+sys::Path WriteGraph(const GraphType &G, const std::string &Name,
+                     bool ShortNames = false, const std::string &Title = "") {
+  std::string ErrMsg;
+  sys::Path Filename = sys::Path::GetTemporaryDirectory(&ErrMsg);
+  if (Filename.isEmpty()) {
+    errs() << "Error: " << ErrMsg << "\n";
+    return Filename;
+  }
+  Filename.appendComponent(Name + ".dot");
+  if (Filename.makeUnique(true,&ErrMsg)) {
+    errs() << "Error: " << ErrMsg << "\n";
+    return sys::Path();
+  }
+
+  errs() << "Writing '" << Filename.str() << "'... ";
+
+  std::string ErrorInfo;
+  raw_fd_ostream O(Filename.c_str(), ErrorInfo);
+
+  if (ErrorInfo.empty()) {
+    llvm::WriteGraph(O, G, ShortNames, Title);
+    errs() << " done. \n";
+  } else {
+    errs() << "error opening file '" << Filename.str() << "' for writing!\n";
+    Filename.clear();
+  }
+
+  return Filename;
+}
+
+/// ViewGraph - Emit a dot graph, run 'dot', run gv on the postscript file,
+/// then cleanup.  For use from the debugger.
+///
+template<typename GraphType>
+void ViewGraph(const GraphType &G, const std::string &Name,
+               bool ShortNames = false, const std::string &Title = "",
+               GraphProgram::Name Program = GraphProgram::DOT) {
+  sys::Path Filename = llvm::WriteGraph(G, Name, ShortNames, Title);
+
+  if (Filename.isEmpty())
+    return;
+
+  DisplayGraph(Filename, true, Program);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/Host.h b/final/include/llvm/Support/Host.h
new file mode 100644
index 00000000000..f77d4c1182b
--- /dev/null
+++ b/final/include/llvm/Support/Host.h
@@ -0,0 +1,66 @@
+//===- llvm/Support/Host.h - Host machine characteristics --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods for querying the nature of the host machine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_HOST_H
+#define LLVM_SYSTEM_HOST_H
+
+#include "llvm/ADT/StringMap.h"
+#include <string>
+
+namespace llvm {
+namespace sys {
+
+  inline bool isLittleEndianHost() {
+    union {
+      int i;
+      char c;
+    };
+    i = 1;
+    return c;
+  }
+
+  inline bool isBigEndianHost() {
+    return !isLittleEndianHost();
+  }
+
+  /// getHostTriple() - Return the target triple of the running
+  /// system.
+  ///
+  /// The target triple is a string in the format of:
+  ///   CPU_TYPE-VENDOR-OPERATING_SYSTEM
+  /// or
+  ///   CPU_TYPE-VENDOR-KERNEL-OPERATING_SYSTEM
+  std::string getHostTriple();
+
+  /// getHostCPUName - Get the LLVM name for the host CPU. The particular format
+  /// of the name is target dependent, and suitable for passing as -mcpu to the
+  /// target which matches the host.
+  ///
+  /// \return - The host CPU name, or empty if the CPU could not be determined.
+  std::string getHostCPUName();
+
+  /// getHostCPUFeatures - Get the LLVM names for the host CPU features.
+  /// The particular format of the names are target dependent, and suitable for
+  /// passing as -mattr to the target which matches the host.
+  ///
+  /// \param Features - A string mapping feature names to either
+  /// true (if enabled) or false (if disabled). This routine makes no guarantees
+  /// about exactly which features may appear in this map, except that they are
+  /// all valid LLVM feature names.
+  ///
+  /// \return - True on success.
+  bool getHostCPUFeatures(StringMap<bool> &Features);
+}
+}
+
+#endif
diff --git a/final/include/llvm/Support/IRBuilder.h b/final/include/llvm/Support/IRBuilder.h
new file mode 100644
index 00000000000..2394a59c09c
--- /dev/null
+++ b/final/include/llvm/Support/IRBuilder.h
@@ -0,0 +1,1224 @@
+//===---- llvm/Support/IRBuilder.h - Builder for LLVM Instrs ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the IRBuilder class, which is used as a convenient way
+// to create LLVM instructions with a consistent and simplified interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_IRBUILDER_H
+#define LLVM_SUPPORT_IRBUILDER_H
+
+#include "llvm/Instructions.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ConstantFolder.h"
+
+namespace llvm {
+  class MDNode;
+
+/// IRBuilderDefaultInserter - This provides the default implementation of the
+/// IRBuilder 'InsertHelper' method that is called whenever an instruction is
+/// created by IRBuilder and needs to be inserted.  By default, this inserts the
+/// instruction at the insertion point.
+template <bool preserveNames = true>
+class IRBuilderDefaultInserter {
+protected:
+  void InsertHelper(Instruction *I, const Twine &Name,
+                    BasicBlock *BB, BasicBlock::iterator InsertPt) const {
+    if (BB) BB->getInstList().insert(InsertPt, I);
+    if (preserveNames)
+      I->setName(Name);
+  }
+};
+
+/// IRBuilderBase - Common base class shared among various IRBuilders.
+class IRBuilderBase {
+  DebugLoc CurDbgLocation;
+protected:
+  BasicBlock *BB;
+  BasicBlock::iterator InsertPt;
+  LLVMContext &Context;
+public:
+
+  IRBuilderBase(LLVMContext &context)
+    : Context(context) {
+    ClearInsertionPoint();
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Builder configuration methods
+  //===--------------------------------------------------------------------===//
+
+  /// ClearInsertionPoint - Clear the insertion point: created instructions will
+  /// not be inserted into a block.
+  void ClearInsertionPoint() {
+    BB = 0;
+  }
+
+  BasicBlock *GetInsertBlock() const { return BB; }
+  BasicBlock::iterator GetInsertPoint() const { return InsertPt; }
+  LLVMContext &getContext() const { return Context; }
+
+  /// SetInsertPoint - This specifies that created instructions should be
+  /// appended to the end of the specified block.
+  void SetInsertPoint(BasicBlock *TheBB) {
+    BB = TheBB;
+    InsertPt = BB->end();
+  }
+
+  /// SetInsertPoint - This specifies that created instructions should be
+  /// inserted before the specified instruction.
+  void SetInsertPoint(Instruction *I) {
+    BB = I->getParent();
+    InsertPt = I;
+  }
+  
+  /// SetInsertPoint - This specifies that created instructions should be
+  /// inserted at the specified point.
+  void SetInsertPoint(BasicBlock *TheBB, BasicBlock::iterator IP) {
+    BB = TheBB;
+    InsertPt = IP;
+  }
+
+  /// SetCurrentDebugLocation - Set location information used by debugging
+  /// information.
+  void SetCurrentDebugLocation(const DebugLoc &L) {
+    CurDbgLocation = L;
+  }
+
+  /// getCurrentDebugLocation - Get location information used by debugging
+  /// information.
+  const DebugLoc &getCurrentDebugLocation() const { return CurDbgLocation; }
+
+  /// SetInstDebugLocation - If this builder has a current debug location, set
+  /// it on the specified instruction.
+  void SetInstDebugLocation(Instruction *I) const {
+    if (!CurDbgLocation.isUnknown())
+      I->setDebugLoc(CurDbgLocation);
+  }
+
+  /// InsertPoint - A saved insertion point.
+  class InsertPoint {
+    BasicBlock *Block;
+    BasicBlock::iterator Point;
+
+  public:
+    /// Creates a new insertion point which doesn't point to anything.
+    InsertPoint() : Block(0) {}
+
+    /// Creates a new insertion point at the given location.
+    InsertPoint(BasicBlock *InsertBlock, BasicBlock::iterator InsertPoint)
+      : Block(InsertBlock), Point(InsertPoint) {}
+
+    /// isSet - Returns true if this insert point is set.
+    bool isSet() const { return (Block != 0); }
+
+    llvm::BasicBlock *getBlock() const { return Block; }
+    llvm::BasicBlock::iterator getPoint() const { return Point; }
+  };
+
+  /// saveIP - Returns the current insert point.
+  InsertPoint saveIP() const {
+    return InsertPoint(GetInsertBlock(), GetInsertPoint());
+  }
+
+  /// saveAndClearIP - Returns the current insert point, clearing it
+  /// in the process.
+  InsertPoint saveAndClearIP() {
+    InsertPoint IP(GetInsertBlock(), GetInsertPoint());
+    ClearInsertionPoint();
+    return IP;
+  }
+
+  /// restoreIP - Sets the current insert point to a previously-saved
+  /// location.
+  void restoreIP(InsertPoint IP) {
+    if (IP.isSet())
+      SetInsertPoint(IP.getBlock(), IP.getPoint());
+    else
+      ClearInsertionPoint();
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Miscellaneous creation methods.
+  //===--------------------------------------------------------------------===//
+
+  /// CreateGlobalString - Make a new global variable with an initializer that
+  /// has array of i8 type filled in with the nul terminated string value
+  /// specified.  If Name is specified, it is the name of the global variable
+  /// created.
+  Value *CreateGlobalString(const char *Str = "", const Twine &Name = "");
+
+  /// getInt1 - Get a constant value representing either true or false.
+  ConstantInt *getInt1(bool V) {
+    return ConstantInt::get(getInt1Ty(), V);
+  }
+
+  /// getTrue - Get the constant value for i1 true.
+  ConstantInt *getTrue() {
+    return ConstantInt::getTrue(Context);
+  }
+
+  /// getFalse - Get the constant value for i1 false.
+  ConstantInt *getFalse() {
+    return ConstantInt::getFalse(Context);
+  }
+
+  /// getInt8 - Get a constant 8-bit value.
+  ConstantInt *getInt8(uint8_t C) {
+    return ConstantInt::get(getInt8Ty(), C);
+  }
+
+  /// getInt16 - Get a constant 16-bit value.
+  ConstantInt *getInt16(uint16_t C) {
+    return ConstantInt::get(getInt16Ty(), C);
+  }
+
+  /// getInt32 - Get a constant 32-bit value.
+  ConstantInt *getInt32(uint32_t C) {
+    return ConstantInt::get(getInt32Ty(), C);
+  }
+
+  /// getInt64 - Get a constant 64-bit value.
+  ConstantInt *getInt64(uint64_t C) {
+    return ConstantInt::get(getInt64Ty(), C);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Type creation methods
+  //===--------------------------------------------------------------------===//
+
+  /// getInt1Ty - Fetch the type representing a single bit
+  const IntegerType *getInt1Ty() {
+    return Type::getInt1Ty(Context);
+  }
+
+  /// getInt8Ty - Fetch the type representing an 8-bit integer.
+  const IntegerType *getInt8Ty() {
+    return Type::getInt8Ty(Context);
+  }
+
+  /// getInt16Ty - Fetch the type representing a 16-bit integer.
+  const IntegerType *getInt16Ty() {
+    return Type::getInt16Ty(Context);
+  }
+
+  /// getInt32Ty - Fetch the type resepresenting a 32-bit integer.
+  const IntegerType *getInt32Ty() {
+    return Type::getInt32Ty(Context);
+  }
+
+  /// getInt64Ty - Fetch the type representing a 64-bit integer.
+  const IntegerType *getInt64Ty() {
+    return Type::getInt64Ty(Context);
+  }
+
+  /// getFloatTy - Fetch the type representing a 32-bit floating point value.
+  const Type *getFloatTy() {
+    return Type::getFloatTy(Context);
+  }
+
+  /// getDoubleTy - Fetch the type representing a 64-bit floating point value.
+  const Type *getDoubleTy() {
+    return Type::getDoubleTy(Context);
+  }
+
+  /// getVoidTy - Fetch the type representing void.
+  const Type *getVoidTy() {
+    return Type::getVoidTy(Context);
+  }
+
+  const PointerType *getInt8PtrTy(unsigned AddrSpace = 0) {
+    return Type::getInt8PtrTy(Context, AddrSpace);
+  }
+
+  /// getCurrentFunctionReturnType - Get the return type of the current function
+  /// that we're emitting into.
+  const Type *getCurrentFunctionReturnType() const;
+  
+  /// CreateMemSet - Create and insert a memset to the specified pointer and the
+  /// specified value.  If the pointer isn't an i8*, it will be converted.  If a
+  /// TBAA tag is specified, it will be added to the instruction.
+  CallInst *CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align,
+                         bool isVolatile = false, MDNode *TBAATag = 0) {
+    return CreateMemSet(Ptr, Val, getInt64(Size), Align, isVolatile, TBAATag);
+  }
+  
+  CallInst *CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
+                         bool isVolatile = false, MDNode *TBAATag = 0);
+
+  /// CreateMemCpy - Create and insert a memcpy between the specified pointers.
+  /// If the pointers aren't i8*, they will be converted.  If a TBAA tag is
+  /// specified, it will be added to the instruction.
+  CallInst *CreateMemCpy(Value *Dst, Value *Src, uint64_t Size, unsigned Align,
+                         bool isVolatile = false, MDNode *TBAATag = 0) {
+    return CreateMemCpy(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag);
+  }
+  
+  CallInst *CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
+                         bool isVolatile = false, MDNode *TBAATag = 0);
+
+  /// CreateMemMove - Create and insert a memmove between the specified
+  /// pointers.  If the pointers aren't i8*, they will be converted.  If a TBAA
+  /// tag is specified, it will be added to the instruction.
+  CallInst *CreateMemMove(Value *Dst, Value *Src, uint64_t Size, unsigned Align,
+                          bool isVolatile = false, MDNode *TBAATag = 0) {
+    return CreateMemMove(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag);
+  }
+  
+  CallInst *CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
+                          bool isVolatile = false, MDNode *TBAATag = 0);  
+private:
+  Value *getCastedInt8PtrValue(Value *Ptr);
+};
+
+/// IRBuilder - This provides a uniform API for creating instructions and
+/// inserting them into a basic block: either at the end of a BasicBlock, or
+/// at a specific iterator location in a block.
+///
+/// Note that the builder does not expose the full generality of LLVM
+/// instructions.  For access to extra instruction properties, use the mutators
+/// (e.g. setVolatile) on the instructions after they have been created.
+/// The first template argument handles whether or not to preserve names in the
+/// final instruction output. This defaults to on.  The second template argument
+/// specifies a class to use for creating constants.  This defaults to creating
+/// minimally folded constants.  The fourth template argument allows clients to
+/// specify custom insertion hooks that are called on every newly created
+/// insertion.
+template<bool preserveNames = true, typename T = ConstantFolder,
+         typename Inserter = IRBuilderDefaultInserter<preserveNames> >
+class IRBuilder : public IRBuilderBase, public Inserter {
+  T Folder;
+public:
+  IRBuilder(LLVMContext &C, const T &F, const Inserter &I = Inserter())
+    : IRBuilderBase(C), Inserter(I), Folder(F) {
+  }
+
+  explicit IRBuilder(LLVMContext &C) : IRBuilderBase(C), Folder(C) {
+  }
+
+  explicit IRBuilder(BasicBlock *TheBB, const T &F)
+    : IRBuilderBase(TheBB->getContext()), Folder(F) {
+    SetInsertPoint(TheBB);
+  }
+
+  explicit IRBuilder(BasicBlock *TheBB)
+    : IRBuilderBase(TheBB->getContext()), Folder(Context) {
+    SetInsertPoint(TheBB);
+  }
+
+  explicit IRBuilder(Instruction *IP)
+    : IRBuilderBase(IP->getContext()), Folder(Context) {
+    SetInsertPoint(IP);
+  }
+  
+  IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F)
+    : IRBuilderBase(TheBB->getContext()), Folder(F) {
+    SetInsertPoint(TheBB, IP);
+  }
+
+  IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP)
+    : IRBuilderBase(TheBB->getContext()), Folder(Context) {
+    SetInsertPoint(TheBB, IP);
+  }
+
+  /// getFolder - Get the constant folder being used.
+  const T &getFolder() { return Folder; }
+
+  /// isNamePreserving - Return true if this builder is configured to actually
+  /// add the requested names to IR created through it.
+  bool isNamePreserving() const { return preserveNames; }
+
+  /// Insert - Insert and return the specified instruction.
+  template<typename InstTy>
+  InstTy *Insert(InstTy *I, const Twine &Name = "") const {
+    this->InsertHelper(I, Name, BB, InsertPt);
+    if (!getCurrentDebugLocation().isUnknown())
+      this->SetInstDebugLocation(I);
+    return I;
+  }
+
+  /// Insert - No-op overload to handle constants.
+  Constant *Insert(Constant *C, const Twine& = "") const {
+    return C;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Instruction creation methods: Terminators
+  //===--------------------------------------------------------------------===//
+
+  /// CreateRetVoid - Create a 'ret void' instruction.
+  ReturnInst *CreateRetVoid() {
+    return Insert(ReturnInst::Create(Context));
+  }
+
+  /// @verbatim
+  /// CreateRet - Create a 'ret <val>' instruction.
+  /// @endverbatim
+  ReturnInst *CreateRet(Value *V) {
+    return Insert(ReturnInst::Create(Context, V));
+  }
+
+  /// CreateAggregateRet - Create a sequence of N insertvalue instructions,
+  /// with one Value from the retVals array each, that build a aggregate
+  /// return value one value at a time, and a ret instruction to return
+  /// the resulting aggregate value. This is a convenience function for
+  /// code that uses aggregate return values as a vehicle for having
+  /// multiple return values.
+  ///
+  ReturnInst *CreateAggregateRet(Value *const *retVals, unsigned N) {
+    Value *V = UndefValue::get(getCurrentFunctionReturnType());
+    for (unsigned i = 0; i != N; ++i)
+      V = CreateInsertValue(V, retVals[i], i, "mrv");
+    return Insert(ReturnInst::Create(Context, V));
+  }
+
+  /// CreateBr - Create an unconditional 'br label X' instruction.
+  BranchInst *CreateBr(BasicBlock *Dest) {
+    return Insert(BranchInst::Create(Dest));
+  }
+
+  /// CreateCondBr - Create a conditional 'br Cond, TrueDest, FalseDest'
+  /// instruction.
+  BranchInst *CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False) {
+    return Insert(BranchInst::Create(True, False, Cond));
+  }
+
+  /// CreateSwitch - Create a switch instruction with the specified value,
+  /// default dest, and with a hint for the number of cases that will be added
+  /// (for efficient allocation).
+  SwitchInst *CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases = 10) {
+    return Insert(SwitchInst::Create(V, Dest, NumCases));
+  }
+
+  /// CreateIndirectBr - Create an indirect branch instruction with the
+  /// specified address operand, with an optional hint for the number of
+  /// destinations that will be added (for efficient allocation).
+  IndirectBrInst *CreateIndirectBr(Value *Addr, unsigned NumDests = 10) {
+    return Insert(IndirectBrInst::Create(Addr, NumDests));
+  }
+
+  InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
+                           BasicBlock *UnwindDest, const Twine &Name = "") {
+    Value *Args[] = { 0 };
+    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args,
+                                     Args), Name);
+  }
+  InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
+                           BasicBlock *UnwindDest, Value *Arg1,
+                           const Twine &Name = "") {
+    Value *Args[] = { Arg1 };
+    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args,
+                                     Args+1), Name);
+  }
+  InvokeInst *CreateInvoke3(Value *Callee, BasicBlock *NormalDest,
+                            BasicBlock *UnwindDest, Value *Arg1,
+                            Value *Arg2, Value *Arg3,
+                            const Twine &Name = "") {
+    Value *Args[] = { Arg1, Arg2, Arg3 };
+    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args,
+                                     Args+3), Name);
+  }
+  /// CreateInvoke - Create an invoke instruction.
+  template<typename RandomAccessIterator>
+  InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
+                           BasicBlock *UnwindDest,
+                           RandomAccessIterator ArgBegin,
+                           RandomAccessIterator ArgEnd,
+                           const Twine &Name = "") {
+    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest,
+                                     ArgBegin, ArgEnd), Name);
+  }
+
+  UnwindInst *CreateUnwind() {
+    return Insert(new UnwindInst(Context));
+  }
+
+  UnreachableInst *CreateUnreachable() {
+    return Insert(new UnreachableInst(Context));
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Instruction creation methods: Binary Operators
+  //===--------------------------------------------------------------------===//
+private:
+  BinaryOperator *CreateInsertNUWNSWBinOp(BinaryOperator::BinaryOps Opc,
+                                          Value *LHS, Value *RHS,
+                                          const Twine &Name,
+                                          bool HasNUW, bool HasNSW) {
+    BinaryOperator *BO = Insert(BinaryOperator::Create(Opc, LHS, RHS), Name);
+    if (HasNUW) BO->setHasNoUnsignedWrap();
+    if (HasNSW) BO->setHasNoSignedWrap();
+    return BO;
+  }
+public:
+  Value *CreateAdd(Value *LHS, Value *RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateAdd(LC, RC, HasNUW, HasNSW), Name);
+    return CreateInsertNUWNSWBinOp(Instruction::Add, LHS, RHS, Name,
+                                   HasNUW, HasNSW);
+  }
+  Value *CreateNSWAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateAdd(LHS, RHS, Name, false, true);
+  }
+  Value *CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateAdd(LHS, RHS, Name, true, false);
+  }
+  Value *CreateFAdd(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateFAdd(LC, RC), Name);
+    return Insert(BinaryOperator::CreateFAdd(LHS, RHS), Name);
+  }
+  Value *CreateSub(Value *LHS, Value *RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateSub(LC, RC), Name);
+    return CreateInsertNUWNSWBinOp(Instruction::Sub, LHS, RHS, Name,
+                                   HasNUW, HasNSW);
+  }
+  Value *CreateNSWSub(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateSub(LHS, RHS, Name, false, true);
+  }
+  Value *CreateNUWSub(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateSub(LHS, RHS, Name, true, false);
+  }
+  Value *CreateFSub(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateFSub(LC, RC), Name);
+    return Insert(BinaryOperator::CreateFSub(LHS, RHS), Name);
+  }
+  Value *CreateMul(Value *LHS, Value *RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateMul(LC, RC), Name);
+    return CreateInsertNUWNSWBinOp(Instruction::Mul, LHS, RHS, Name,
+                                   HasNUW, HasNSW);
+  }
+  Value *CreateNSWMul(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateMul(LHS, RHS, Name, false, true);
+  }
+  Value *CreateNUWMul(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateMul(LHS, RHS, Name, true, false);
+  }
+  Value *CreateFMul(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateFMul(LC, RC), Name);
+    return Insert(BinaryOperator::CreateFMul(LHS, RHS), Name);
+  }
+  Value *CreateUDiv(Value *LHS, Value *RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateUDiv(LC, RC, isExact), Name);
+    if (!isExact)
+      return Insert(BinaryOperator::CreateUDiv(LHS, RHS), Name);
+    return Insert(BinaryOperator::CreateExactUDiv(LHS, RHS), Name);
+  }
+  Value *CreateExactUDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateUDiv(LHS, RHS, Name, true);
+  }
+  Value *CreateSDiv(Value *LHS, Value *RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateSDiv(LC, RC, isExact), Name);
+    if (!isExact)
+      return Insert(BinaryOperator::CreateSDiv(LHS, RHS), Name);
+    return Insert(BinaryOperator::CreateExactSDiv(LHS, RHS), Name);
+  }
+  Value *CreateExactSDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateSDiv(LHS, RHS, Name, true);
+  }
+  Value *CreateFDiv(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateFDiv(LC, RC), Name);
+    return Insert(BinaryOperator::CreateFDiv(LHS, RHS), Name);
+  }
+  Value *CreateURem(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateURem(LC, RC), Name);
+    return Insert(BinaryOperator::CreateURem(LHS, RHS), Name);
+  }
+  Value *CreateSRem(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateSRem(LC, RC), Name);
+    return Insert(BinaryOperator::CreateSRem(LHS, RHS), Name);
+  }
+  Value *CreateFRem(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateFRem(LC, RC), Name);
+    return Insert(BinaryOperator::CreateFRem(LHS, RHS), Name);
+  }
+
+  Value *CreateShl(Value *LHS, Value *RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateShl(LC, RC, HasNUW, HasNSW), Name);
+    return CreateInsertNUWNSWBinOp(Instruction::Shl, LHS, RHS, Name,
+                                   HasNUW, HasNSW);
+  }
+  Value *CreateShl(Value *LHS, const APInt &RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
+    return CreateShl(LHS, ConstantInt::get(LHS->getType(), RHS), Name,
+                     HasNUW, HasNSW);
+  }
+  Value *CreateShl(Value *LHS, uint64_t RHS, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
+    return CreateShl(LHS, ConstantInt::get(LHS->getType(), RHS), Name,
+                     HasNUW, HasNSW);
+  }
+
+  Value *CreateLShr(Value *LHS, Value *RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateLShr(LC, RC, isExact), Name);
+    if (!isExact)
+      return Insert(BinaryOperator::CreateLShr(LHS, RHS), Name);
+    return Insert(BinaryOperator::CreateExactLShr(LHS, RHS), Name);
+  }
+  Value *CreateLShr(Value *LHS, const APInt &RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    return CreateLShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
+  }
+  Value *CreateLShr(Value *LHS, uint64_t RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    return CreateLShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
+  }
+
+  Value *CreateAShr(Value *LHS, Value *RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateAShr(LC, RC, isExact), Name);
+    if (!isExact)
+      return Insert(BinaryOperator::CreateAShr(LHS, RHS), Name);
+    return Insert(BinaryOperator::CreateExactAShr(LHS, RHS), Name);
+  }
+  Value *CreateAShr(Value *LHS, const APInt &RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    return CreateAShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
+  }
+  Value *CreateAShr(Value *LHS, uint64_t RHS, const Twine &Name = "",
+                    bool isExact = false) {
+    return CreateAShr(LHS, ConstantInt::get(LHS->getType(), RHS), Name,isExact);
+  }
+
+  Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *RC = dyn_cast<Constant>(RHS)) {
+      if (isa<ConstantInt>(RC) && cast<ConstantInt>(RC)->isAllOnesValue())
+        return LHS;  // LHS & -1 -> LHS
+      if (Constant *LC = dyn_cast<Constant>(LHS))
+        return Insert(Folder.CreateAnd(LC, RC), Name);
+    }
+    return Insert(BinaryOperator::CreateAnd(LHS, RHS), Name);
+  }
+  Value *CreateAnd(Value *LHS, const APInt &RHS, const Twine &Name = "") {
+    return CreateAnd(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
+  }
+  Value *CreateAnd(Value *LHS, uint64_t RHS, const Twine &Name = "") {
+    return CreateAnd(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
+  }
+
+  Value *CreateOr(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *RC = dyn_cast<Constant>(RHS)) {
+      if (RC->isNullValue())
+        return LHS;  // LHS | 0 -> LHS
+      if (Constant *LC = dyn_cast<Constant>(LHS))
+        return Insert(Folder.CreateOr(LC, RC), Name);
+    }
+    return Insert(BinaryOperator::CreateOr(LHS, RHS), Name);
+  }
+  Value *CreateOr(Value *LHS, const APInt &RHS, const Twine &Name = "") {
+    return CreateOr(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
+  }
+  Value *CreateOr(Value *LHS, uint64_t RHS, const Twine &Name = "") {
+    return CreateOr(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
+  }
+
+  Value *CreateXor(Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateXor(LC, RC), Name);
+    return Insert(BinaryOperator::CreateXor(LHS, RHS), Name);
+  }
+  Value *CreateXor(Value *LHS, const APInt &RHS, const Twine &Name = "") {
+    return CreateXor(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
+  }
+  Value *CreateXor(Value *LHS, uint64_t RHS, const Twine &Name = "") {
+    return CreateXor(LHS, ConstantInt::get(LHS->getType(), RHS), Name);
+  }
+
+  Value *CreateBinOp(Instruction::BinaryOps Opc,
+                     Value *LHS, Value *RHS, const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateBinOp(Opc, LC, RC), Name);
+    return Insert(BinaryOperator::Create(Opc, LHS, RHS), Name);
+  }
+
+  Value *CreateNeg(Value *V, const Twine &Name = "",
+                   bool HasNUW = false, bool HasNSW = false) {
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateNeg(VC, HasNUW, HasNSW), Name);
+    BinaryOperator *BO = Insert(BinaryOperator::CreateNeg(V), Name);
+    if (HasNUW) BO->setHasNoUnsignedWrap();
+    if (HasNSW) BO->setHasNoSignedWrap();
+    return BO;
+  }
+  Value *CreateNSWNeg(Value *V, const Twine &Name = "") {
+    return CreateNeg(V, Name, false, true);
+  }
+  Value *CreateNUWNeg(Value *V, const Twine &Name = "") {
+    return CreateNeg(V, Name, true, false);
+  }
+  Value *CreateFNeg(Value *V, const Twine &Name = "") {
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateFNeg(VC), Name);
+    return Insert(BinaryOperator::CreateFNeg(V), Name);
+  }
+  Value *CreateNot(Value *V, const Twine &Name = "") {
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateNot(VC), Name);
+    return Insert(BinaryOperator::CreateNot(V), Name);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Instruction creation methods: Memory Instructions
+  //===--------------------------------------------------------------------===//
+
+  AllocaInst *CreateAlloca(const Type *Ty, Value *ArraySize = 0,
+                           const Twine &Name = "") {
+    return Insert(new AllocaInst(Ty, ArraySize), Name);
+  }
+  // Provided to resolve 'CreateLoad(Ptr, "...")' correctly, instead of
+  // converting the string to 'bool' for the isVolatile parameter.
+  LoadInst *CreateLoad(Value *Ptr, const char *Name) {
+    return Insert(new LoadInst(Ptr), Name);
+  }
+  LoadInst *CreateLoad(Value *Ptr, const Twine &Name = "") {
+    return Insert(new LoadInst(Ptr), Name);
+  }
+  LoadInst *CreateLoad(Value *Ptr, bool isVolatile, const Twine &Name = "") {
+    return Insert(new LoadInst(Ptr, 0, isVolatile), Name);
+  }
+  StoreInst *CreateStore(Value *Val, Value *Ptr, bool isVolatile = false) {
+    return Insert(new StoreInst(Val, Ptr, isVolatile));
+  }
+  template<typename RandomAccessIterator>
+  Value *CreateGEP(Value *Ptr,
+                   RandomAccessIterator IdxBegin,
+                   RandomAccessIterator IdxEnd,
+                   const Twine &Name = "") {
+    if (Constant *PC = dyn_cast<Constant>(Ptr)) {
+      // Every index must be constant.
+      RandomAccessIterator i;
+      for (i = IdxBegin; i < IdxEnd; ++i)
+        if (!isa<Constant>(*i))
+          break;
+      if (i == IdxEnd)
+        return Insert(Folder.CreateGetElementPtr(PC, &IdxBegin[0],
+                                                 IdxEnd - IdxBegin),
+                      Name);
+    }
+    return Insert(GetElementPtrInst::Create(Ptr, IdxBegin, IdxEnd), Name);
+  }
+  template<typename RandomAccessIterator>
+  Value *CreateInBoundsGEP(Value *Ptr, RandomAccessIterator IdxBegin,
+                           RandomAccessIterator IdxEnd,
+                           const Twine &Name = "") {
+    if (Constant *PC = dyn_cast<Constant>(Ptr)) {
+      // Every index must be constant.
+      RandomAccessIterator i;
+      for (i = IdxBegin; i < IdxEnd; ++i)
+        if (!isa<Constant>(*i))
+          break;
+      if (i == IdxEnd)
+        return Insert(Folder.CreateInBoundsGetElementPtr(PC,
+                                                         &IdxBegin[0],
+                                                         IdxEnd - IdxBegin),
+                      Name);
+    }
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, IdxBegin, IdxEnd),
+                  Name);
+  }
+  Value *CreateGEP(Value *Ptr, Value *Idx, const Twine &Name = "") {
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      if (Constant *IC = dyn_cast<Constant>(Idx))
+        return Insert(Folder.CreateGetElementPtr(PC, &IC, 1), Name);
+    return Insert(GetElementPtrInst::Create(Ptr, Idx), Name);
+  }
+  Value *CreateInBoundsGEP(Value *Ptr, Value *Idx, const Twine &Name = "") {
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      if (Constant *IC = dyn_cast<Constant>(Idx))
+        return Insert(Folder.CreateInBoundsGetElementPtr(PC, &IC, 1), Name);
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idx), Name);
+  }
+  Value *CreateConstGEP1_32(Value *Ptr, unsigned Idx0, const Twine &Name = "") {
+    Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Insert(Folder.CreateGetElementPtr(PC, &Idx, 1), Name);
+
+    return Insert(GetElementPtrInst::Create(Ptr, &Idx, &Idx+1), Name);
+  }
+  Value *CreateConstInBoundsGEP1_32(Value *Ptr, unsigned Idx0,
+                                    const Twine &Name = "") {
+    Value *Idx = ConstantInt::get(Type::getInt32Ty(Context), Idx0);
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Insert(Folder.CreateInBoundsGetElementPtr(PC, &Idx, 1), Name);
+
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, &Idx, &Idx+1), Name);
+  }
+  Value *CreateConstGEP2_32(Value *Ptr, unsigned Idx0, unsigned Idx1,
+                    const Twine &Name = "") {
+    Value *Idxs[] = {
+      ConstantInt::get(Type::getInt32Ty(Context), Idx0),
+      ConstantInt::get(Type::getInt32Ty(Context), Idx1)
+    };
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Insert(Folder.CreateGetElementPtr(PC, Idxs, 2), Name);
+
+    return Insert(GetElementPtrInst::Create(Ptr, Idxs, Idxs+2), Name);
+  }
+  Value *CreateConstInBoundsGEP2_32(Value *Ptr, unsigned Idx0, unsigned Idx1,
+                                    const Twine &Name = "") {
+    Value *Idxs[] = {
+      ConstantInt::get(Type::getInt32Ty(Context), Idx0),
+      ConstantInt::get(Type::getInt32Ty(Context), Idx1)
+    };
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Insert(Folder.CreateInBoundsGetElementPtr(PC, Idxs, 2), Name);
+
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idxs, Idxs+2), Name);
+  }
+  Value *CreateConstGEP1_64(Value *Ptr, uint64_t Idx0, const Twine &Name = "") {
+    Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Insert(Folder.CreateGetElementPtr(PC, &Idx, 1), Name);
+
+    return Insert(GetElementPtrInst::Create(Ptr, &Idx, &Idx+1), Name);
+  }
+  Value *CreateConstInBoundsGEP1_64(Value *Ptr, uint64_t Idx0,
+                                    const Twine &Name = "") {
+    Value *Idx = ConstantInt::get(Type::getInt64Ty(Context), Idx0);
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Insert(Folder.CreateInBoundsGetElementPtr(PC, &Idx, 1), Name);
+
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, &Idx, &Idx+1), Name);
+  }
+  Value *CreateConstGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
+                    const Twine &Name = "") {
+    Value *Idxs[] = {
+      ConstantInt::get(Type::getInt64Ty(Context), Idx0),
+      ConstantInt::get(Type::getInt64Ty(Context), Idx1)
+    };
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Insert(Folder.CreateGetElementPtr(PC, Idxs, 2), Name);
+
+    return Insert(GetElementPtrInst::Create(Ptr, Idxs, Idxs+2), Name);
+  }
+  Value *CreateConstInBoundsGEP2_64(Value *Ptr, uint64_t Idx0, uint64_t Idx1,
+                                    const Twine &Name = "") {
+    Value *Idxs[] = {
+      ConstantInt::get(Type::getInt64Ty(Context), Idx0),
+      ConstantInt::get(Type::getInt64Ty(Context), Idx1)
+    };
+
+    if (Constant *PC = dyn_cast<Constant>(Ptr))
+      return Insert(Folder.CreateInBoundsGetElementPtr(PC, Idxs, 2), Name);
+
+    return Insert(GetElementPtrInst::CreateInBounds(Ptr, Idxs, Idxs+2), Name);
+  }
+  Value *CreateStructGEP(Value *Ptr, unsigned Idx, const Twine &Name = "") {
+    return CreateConstInBoundsGEP2_32(Ptr, 0, Idx, Name);
+  }
+
+  /// CreateGlobalStringPtr - Same as CreateGlobalString, but return a pointer
+  /// with "i8*" type instead of a pointer to array of i8.
+  Value *CreateGlobalStringPtr(const char *Str = "", const Twine &Name = "") {
+    Value *gv = CreateGlobalString(Str, Name);
+    Value *zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
+    Value *Args[] = { zero, zero };
+    return CreateInBoundsGEP(gv, Args, Args+2, Name);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Instruction creation methods: Cast/Conversion Operators
+  //===--------------------------------------------------------------------===//
+
+  Value *CreateTrunc(Value *V, const Type *DestTy, const Twine &Name = "") {
+    return CreateCast(Instruction::Trunc, V, DestTy, Name);
+  }
+  Value *CreateZExt(Value *V, const Type *DestTy, const Twine &Name = "") {
+    return CreateCast(Instruction::ZExt, V, DestTy, Name);
+  }
+  Value *CreateSExt(Value *V, const Type *DestTy, const Twine &Name = "") {
+    return CreateCast(Instruction::SExt, V, DestTy, Name);
+  }
+  Value *CreateFPToUI(Value *V, const Type *DestTy, const Twine &Name = ""){
+    return CreateCast(Instruction::FPToUI, V, DestTy, Name);
+  }
+  Value *CreateFPToSI(Value *V, const Type *DestTy, const Twine &Name = ""){
+    return CreateCast(Instruction::FPToSI, V, DestTy, Name);
+  }
+  Value *CreateUIToFP(Value *V, const Type *DestTy, const Twine &Name = ""){
+    return CreateCast(Instruction::UIToFP, V, DestTy, Name);
+  }
+  Value *CreateSIToFP(Value *V, const Type *DestTy, const Twine &Name = ""){
+    return CreateCast(Instruction::SIToFP, V, DestTy, Name);
+  }
+  Value *CreateFPTrunc(Value *V, const Type *DestTy,
+                       const Twine &Name = "") {
+    return CreateCast(Instruction::FPTrunc, V, DestTy, Name);
+  }
+  Value *CreateFPExt(Value *V, const Type *DestTy, const Twine &Name = "") {
+    return CreateCast(Instruction::FPExt, V, DestTy, Name);
+  }
+  Value *CreatePtrToInt(Value *V, const Type *DestTy,
+                        const Twine &Name = "") {
+    return CreateCast(Instruction::PtrToInt, V, DestTy, Name);
+  }
+  Value *CreateIntToPtr(Value *V, const Type *DestTy,
+                        const Twine &Name = "") {
+    return CreateCast(Instruction::IntToPtr, V, DestTy, Name);
+  }
+  Value *CreateBitCast(Value *V, const Type *DestTy,
+                       const Twine &Name = "") {
+    return CreateCast(Instruction::BitCast, V, DestTy, Name);
+  }
+  Value *CreateZExtOrBitCast(Value *V, const Type *DestTy,
+                             const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateZExtOrBitCast(VC, DestTy), Name);
+    return Insert(CastInst::CreateZExtOrBitCast(V, DestTy), Name);
+  }
+  Value *CreateSExtOrBitCast(Value *V, const Type *DestTy,
+                             const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateSExtOrBitCast(VC, DestTy), Name);
+    return Insert(CastInst::CreateSExtOrBitCast(V, DestTy), Name);
+  }
+  Value *CreateTruncOrBitCast(Value *V, const Type *DestTy,
+                              const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateTruncOrBitCast(VC, DestTy), Name);
+    return Insert(CastInst::CreateTruncOrBitCast(V, DestTy), Name);
+  }
+  Value *CreateCast(Instruction::CastOps Op, Value *V, const Type *DestTy,
+                    const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateCast(Op, VC, DestTy), Name);
+    return Insert(CastInst::Create(Op, V, DestTy), Name);
+  }
+  Value *CreatePointerCast(Value *V, const Type *DestTy,
+                           const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreatePointerCast(VC, DestTy), Name);
+    return Insert(CastInst::CreatePointerCast(V, DestTy), Name);
+  }
+  Value *CreateIntCast(Value *V, const Type *DestTy, bool isSigned,
+                       const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateIntCast(VC, DestTy, isSigned), Name);
+    return Insert(CastInst::CreateIntegerCast(V, DestTy, isSigned), Name);
+  }
+private:
+  // Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a compile time
+  // error, instead of converting the string to bool for the isSigned parameter.
+  Value *CreateIntCast(Value *, const Type *, const char *); // DO NOT IMPLEMENT
+public:
+  Value *CreateFPCast(Value *V, const Type *DestTy, const Twine &Name = "") {
+    if (V->getType() == DestTy)
+      return V;
+    if (Constant *VC = dyn_cast<Constant>(V))
+      return Insert(Folder.CreateFPCast(VC, DestTy), Name);
+    return Insert(CastInst::CreateFPCast(V, DestTy), Name);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Instruction creation methods: Compare Instructions
+  //===--------------------------------------------------------------------===//
+
+  Value *CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_EQ, LHS, RHS, Name);
+  }
+  Value *CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_NE, LHS, RHS, Name);
+  }
+  Value *CreateICmpUGT(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_UGT, LHS, RHS, Name);
+  }
+  Value *CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_UGE, LHS, RHS, Name);
+  }
+  Value *CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_ULT, LHS, RHS, Name);
+  }
+  Value *CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_ULE, LHS, RHS, Name);
+  }
+  Value *CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_SGT, LHS, RHS, Name);
+  }
+  Value *CreateICmpSGE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_SGE, LHS, RHS, Name);
+  }
+  Value *CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_SLT, LHS, RHS, Name);
+  }
+  Value *CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateICmp(ICmpInst::ICMP_SLE, LHS, RHS, Name);
+  }
+
+  Value *CreateFCmpOEQ(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_OEQ, LHS, RHS, Name);
+  }
+  Value *CreateFCmpOGT(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_OGT, LHS, RHS, Name);
+  }
+  Value *CreateFCmpOGE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_OGE, LHS, RHS, Name);
+  }
+  Value *CreateFCmpOLT(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_OLT, LHS, RHS, Name);
+  }
+  Value *CreateFCmpOLE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_OLE, LHS, RHS, Name);
+  }
+  Value *CreateFCmpONE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_ONE, LHS, RHS, Name);
+  }
+  Value *CreateFCmpORD(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_ORD, LHS, RHS, Name);
+  }
+  Value *CreateFCmpUNO(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_UNO, LHS, RHS, Name);
+  }
+  Value *CreateFCmpUEQ(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_UEQ, LHS, RHS, Name);
+  }
+  Value *CreateFCmpUGT(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_UGT, LHS, RHS, Name);
+  }
+  Value *CreateFCmpUGE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_UGE, LHS, RHS, Name);
+  }
+  Value *CreateFCmpULT(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_ULT, LHS, RHS, Name);
+  }
+  Value *CreateFCmpULE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_ULE, LHS, RHS, Name);
+  }
+  Value *CreateFCmpUNE(Value *LHS, Value *RHS, const Twine &Name = "") {
+    return CreateFCmp(FCmpInst::FCMP_UNE, LHS, RHS, Name);
+  }
+
+  Value *CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
+                    const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateICmp(P, LC, RC), Name);
+    return Insert(new ICmpInst(P, LHS, RHS), Name);
+  }
+  Value *CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS,
+                    const Twine &Name = "") {
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      if (Constant *RC = dyn_cast<Constant>(RHS))
+        return Insert(Folder.CreateFCmp(P, LC, RC), Name);
+    return Insert(new FCmpInst(P, LHS, RHS), Name);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Instruction creation methods: Other Instructions
+  //===--------------------------------------------------------------------===//
+
+  PHINode *CreatePHI(const Type *Ty, const Twine &Name = "") {
+    return Insert(PHINode::Create(Ty), Name);
+  }
+
+  CallInst *CreateCall(Value *Callee, const Twine &Name = "") {
+    return Insert(CallInst::Create(Callee), Name);
+  }
+  CallInst *CreateCall(Value *Callee, Value *Arg, const Twine &Name = "") {
+    return Insert(CallInst::Create(Callee, Arg), Name);
+  }
+  CallInst *CreateCall2(Value *Callee, Value *Arg1, Value *Arg2,
+                        const Twine &Name = "") {
+    Value *Args[] = { Arg1, Arg2 };
+    return Insert(CallInst::Create(Callee, Args, Args+2), Name);
+  }
+  CallInst *CreateCall3(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3,
+                        const Twine &Name = "") {
+    Value *Args[] = { Arg1, Arg2, Arg3 };
+    return Insert(CallInst::Create(Callee, Args, Args+3), Name);
+  }
+  CallInst *CreateCall4(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3,
+                        Value *Arg4, const Twine &Name = "") {
+    Value *Args[] = { Arg1, Arg2, Arg3, Arg4 };
+    return Insert(CallInst::Create(Callee, Args, Args+4), Name);
+  }
+  CallInst *CreateCall5(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3,
+                        Value *Arg4, Value *Arg5, const Twine &Name = "") {
+    Value *Args[] = { Arg1, Arg2, Arg3, Arg4, Arg5 };
+    return Insert(CallInst::Create(Callee, Args, Args+5), Name);
+  }
+
+  template<typename RandomAccessIterator>
+  CallInst *CreateCall(Value *Callee, RandomAccessIterator ArgBegin,
+                       RandomAccessIterator ArgEnd, const Twine &Name = "") {
+    return Insert(CallInst::Create(Callee, ArgBegin, ArgEnd), Name);
+  }
+
+  Value *CreateSelect(Value *C, Value *True, Value *False,
+                      const Twine &Name = "") {
+    if (Constant *CC = dyn_cast<Constant>(C))
+      if (Constant *TC = dyn_cast<Constant>(True))
+        if (Constant *FC = dyn_cast<Constant>(False))
+          return Insert(Folder.CreateSelect(CC, TC, FC), Name);
+    return Insert(SelectInst::Create(C, True, False), Name);
+  }
+
+  VAArgInst *CreateVAArg(Value *List, const Type *Ty, const Twine &Name = "") {
+    return Insert(new VAArgInst(List, Ty), Name);
+  }
+
+  Value *CreateExtractElement(Value *Vec, Value *Idx,
+                              const Twine &Name = "") {
+    if (Constant *VC = dyn_cast<Constant>(Vec))
+      if (Constant *IC = dyn_cast<Constant>(Idx))
+        return Insert(Folder.CreateExtractElement(VC, IC), Name);
+    return Insert(ExtractElementInst::Create(Vec, Idx), Name);
+  }
+
+  Value *CreateInsertElement(Value *Vec, Value *NewElt, Value *Idx,
+                             const Twine &Name = "") {
+    if (Constant *VC = dyn_cast<Constant>(Vec))
+      if (Constant *NC = dyn_cast<Constant>(NewElt))
+        if (Constant *IC = dyn_cast<Constant>(Idx))
+          return Insert(Folder.CreateInsertElement(VC, NC, IC), Name);
+    return Insert(InsertElementInst::Create(Vec, NewElt, Idx), Name);
+  }
+
+  Value *CreateShuffleVector(Value *V1, Value *V2, Value *Mask,
+                             const Twine &Name = "") {
+    if (Constant *V1C = dyn_cast<Constant>(V1))
+      if (Constant *V2C = dyn_cast<Constant>(V2))
+        if (Constant *MC = dyn_cast<Constant>(Mask))
+          return Insert(Folder.CreateShuffleVector(V1C, V2C, MC), Name);
+    return Insert(new ShuffleVectorInst(V1, V2, Mask), Name);
+  }
+
+  Value *CreateExtractValue(Value *Agg, unsigned Idx,
+                            const Twine &Name = "") {
+    if (Constant *AggC = dyn_cast<Constant>(Agg))
+      return Insert(Folder.CreateExtractValue(AggC, &Idx, 1), Name);
+    return Insert(ExtractValueInst::Create(Agg, Idx), Name);
+  }
+
+  template<typename RandomAccessIterator>
+  Value *CreateExtractValue(Value *Agg,
+                            RandomAccessIterator IdxBegin,
+                            RandomAccessIterator IdxEnd,
+                            const Twine &Name = "") {
+    if (Constant *AggC = dyn_cast<Constant>(Agg))
+      return Insert(Folder.CreateExtractValue(AggC, IdxBegin, IdxEnd-IdxBegin),
+                    Name);
+    return Insert(ExtractValueInst::Create(Agg, IdxBegin, IdxEnd), Name);
+  }
+
+  Value *CreateInsertValue(Value *Agg, Value *Val, unsigned Idx,
+                           const Twine &Name = "") {
+    if (Constant *AggC = dyn_cast<Constant>(Agg))
+      if (Constant *ValC = dyn_cast<Constant>(Val))
+        return Insert(Folder.CreateInsertValue(AggC, ValC, &Idx, 1), Name);
+    return Insert(InsertValueInst::Create(Agg, Val, Idx), Name);
+  }
+
+  template<typename RandomAccessIterator>
+  Value *CreateInsertValue(Value *Agg, Value *Val,
+                           RandomAccessIterator IdxBegin,
+                           RandomAccessIterator IdxEnd,
+                           const Twine &Name = "") {
+    if (Constant *AggC = dyn_cast<Constant>(Agg))
+      if (Constant *ValC = dyn_cast<Constant>(Val))
+        return Insert(Folder.CreateInsertValue(AggC, ValC, IdxBegin,
+                                               IdxEnd - IdxBegin),
+                      Name);
+    return Insert(InsertValueInst::Create(Agg, Val, IdxBegin, IdxEnd), Name);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Utility creation methods
+  //===--------------------------------------------------------------------===//
+
+  /// CreateIsNull - Return an i1 value testing if \arg Arg is null.
+  Value *CreateIsNull(Value *Arg, const Twine &Name = "") {
+    return CreateICmpEQ(Arg, Constant::getNullValue(Arg->getType()),
+                        Name);
+  }
+
+  /// CreateIsNotNull - Return an i1 value testing if \arg Arg is not null.
+  Value *CreateIsNotNull(Value *Arg, const Twine &Name = "") {
+    return CreateICmpNE(Arg, Constant::getNullValue(Arg->getType()),
+                        Name);
+  }
+
+  /// CreatePtrDiff - Return the i64 difference between two pointer values,
+  /// dividing out the size of the pointed-to objects.  This is intended to
+  /// implement C-style pointer subtraction. As such, the pointers must be
+  /// appropriately aligned for their element types and pointing into the
+  /// same object.
+  Value *CreatePtrDiff(Value *LHS, Value *RHS, const Twine &Name = "") {
+    assert(LHS->getType() == RHS->getType() &&
+           "Pointer subtraction operand types must match!");
+    const PointerType *ArgType = cast<PointerType>(LHS->getType());
+    Value *LHS_int = CreatePtrToInt(LHS, Type::getInt64Ty(Context));
+    Value *RHS_int = CreatePtrToInt(RHS, Type::getInt64Ty(Context));
+    Value *Difference = CreateSub(LHS_int, RHS_int);
+    return CreateExactSDiv(Difference,
+                           ConstantExpr::getSizeOf(ArgType->getElementType()),
+                           Name);
+  }
+};
+
+}
+
+#endif
diff --git a/final/include/llvm/Support/IRReader.h b/final/include/llvm/Support/IRReader.h
new file mode 100644
index 00000000000..292c001e09f
--- /dev/null
+++ b/final/include/llvm/Support/IRReader.h
@@ -0,0 +1,110 @@
+//===---- llvm/Support/IRReader.h - Reader for LLVM IR files ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines functions for reading LLVM IR. They support both
+// Bitcode and Assembly, automatically detecting the input format.
+//
+// These functions must be defined in a header file in order to avoid
+// library dependencies, since they reference both Bitcode and Assembly
+// functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_IRREADER_H
+#define LLVM_SUPPORT_IRREADER_H
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Assembly/Parser.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/system_error.h"
+
+namespace llvm {
+
+  /// If the given MemoryBuffer holds a bitcode image, return a Module for it
+  /// which does lazy deserialization of function bodies.  Otherwise, attempt to
+  /// parse it as LLVM Assembly and return a fully populated Module. This
+  /// function *always* takes ownership of the given MemoryBuffer.
+  inline Module *getLazyIRModule(MemoryBuffer *Buffer,
+                                 SMDiagnostic &Err,
+                                 LLVMContext &Context) {
+    if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
+                  (const unsigned char *)Buffer->getBufferEnd())) {
+      std::string ErrMsg;
+      Module *M = getLazyBitcodeModule(Buffer, Context, &ErrMsg);
+      if (M == 0) {
+        Err = SMDiagnostic(Buffer->getBufferIdentifier(), ErrMsg);
+        // ParseBitcodeFile does not take ownership of the Buffer in the
+        // case of an error.
+        delete Buffer;
+      }
+      return M;
+    }
+
+    return ParseAssembly(Buffer, 0, Err, Context);
+  }
+
+  /// If the given file holds a bitcode image, return a Module
+  /// for it which does lazy deserialization of function bodies.  Otherwise,
+  /// attempt to parse it as LLVM Assembly and return a fully populated
+  /// Module.
+  inline Module *getLazyIRFileModule(const std::string &Filename,
+                                     SMDiagnostic &Err,
+                                     LLVMContext &Context) {
+    OwningPtr<MemoryBuffer> File;
+    if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+      Err = SMDiagnostic(Filename,
+                         "Could not open input file: " + ec.message());
+      return 0;
+    }
+
+    return getLazyIRModule(File.take(), Err, Context);
+  }
+
+  /// If the given MemoryBuffer holds a bitcode image, return a Module
+  /// for it.  Otherwise, attempt to parse it as LLVM Assembly and return
+  /// a Module for it. This function *always* takes ownership of the given
+  /// MemoryBuffer.
+  inline Module *ParseIR(MemoryBuffer *Buffer,
+                         SMDiagnostic &Err,
+                         LLVMContext &Context) {
+    if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
+                  (const unsigned char *)Buffer->getBufferEnd())) {
+      std::string ErrMsg;
+      Module *M = ParseBitcodeFile(Buffer, Context, &ErrMsg);
+      if (M == 0)
+        Err = SMDiagnostic(Buffer->getBufferIdentifier(), ErrMsg);
+      // ParseBitcodeFile does not take ownership of the Buffer.
+      delete Buffer;
+      return M;
+    }
+
+    return ParseAssembly(Buffer, 0, Err, Context);
+  }
+
+  /// If the given file holds a bitcode image, return a Module for it.
+  /// Otherwise, attempt to parse it as LLVM Assembly and return a Module
+  /// for it.
+  inline Module *ParseIRFile(const std::string &Filename,
+                             SMDiagnostic &Err,
+                             LLVMContext &Context) {
+    OwningPtr<MemoryBuffer> File;
+    if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+      Err = SMDiagnostic(Filename,
+                         "Could not open input file: " + ec.message());
+      return 0;
+    }
+
+    return ParseIR(File.take(), Err, Context);
+  }
+
+}
+
+#endif
diff --git a/final/include/llvm/Support/IncludeFile.h b/final/include/llvm/Support/IncludeFile.h
new file mode 100644
index 00000000000..a9319725d47
--- /dev/null
+++ b/final/include/llvm/Support/IncludeFile.h
@@ -0,0 +1,79 @@
+//===- llvm/Support/IncludeFile.h - Ensure Linking Of Library ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the FORCE_DEFINING_FILE_TO_BE_LINKED and DEFINE_FILE_FOR
+// macros.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_INCLUDEFILE_H
+#define LLVM_SYSTEM_INCLUDEFILE_H
+
+/// This macro is the public interface that IncludeFile.h exports. This gives
+/// us the option to implement the "link the definition" capability in any
+/// manner that we choose. All header files that depend on a specific .cpp
+/// file being linked at run time should use this macro instead of the
+/// IncludeFile class directly.
+///
+/// For example, foo.h would use:<br/>
+/// <tt>FORCE_DEFINING_FILE_TO_BE_LINKED(foo)</tt><br/>
+///
+/// And, foo.cp would use:<br/>
+/// <tt>DEFINING_FILE_FOR(foo)</tt><br/>
+#ifdef __GNUC__
+// If the `used' attribute is available, use it to create a variable
+// with an initializer that will force the linking of the defining file.
+#define FORCE_DEFINING_FILE_TO_BE_LINKED(name) \
+  namespace llvm { \
+    extern const char name ## LinkVar; \
+    __attribute__((used)) static const char *const name ## LinkObj = \
+      &name ## LinkVar; \
+  }
+#else
+// Otherwise use a constructor call.
+#define FORCE_DEFINING_FILE_TO_BE_LINKED(name) \
+  namespace llvm { \
+    extern const char name ## LinkVar; \
+    static const IncludeFile name ## LinkObj ( &name ## LinkVar ); \
+  }
+#endif
+
+/// This macro is the counterpart to FORCE_DEFINING_FILE_TO_BE_LINKED. It should
+/// be used in a .cpp file to define the name referenced in a header file that
+/// will cause linkage of the .cpp file. It should only be used at extern level.
+#define DEFINING_FILE_FOR(name) \
+  namespace llvm { const char name ## LinkVar = 0; }
+
+namespace llvm {
+
+/// This class is used in the implementation of FORCE_DEFINING_FILE_TO_BE_LINKED
+/// macro to make sure that the implementation of a header file is included
+/// into a tool that uses the header.  This is solely
+/// to overcome problems linking .a files and not getting the implementation
+/// of compilation units we need. This is commonly an issue with the various
+/// Passes but also occurs elsewhere in LLVM. We like to use .a files because
+/// they link faster and provide the smallest executables. However, sometimes
+/// those executables are too small, if the program doesn't reference something
+/// that might be needed, especially by a loaded share object. This little class
+/// helps to resolve that problem. The basic strategy is to use this class in
+/// a header file and pass the address of a variable to the constructor. If the
+/// variable is defined in the header file's corresponding .cpp file then all
+/// tools/libraries that \#include the header file will require the .cpp as
+/// well.
+/// For example:<br/>
+/// <tt>extern int LinkMyCodeStub;</tt><br/>
+/// <tt>static IncludeFile LinkMyModule(&LinkMyCodeStub);</tt><br/>
+/// @brief Class to ensure linking of corresponding object file.
+struct IncludeFile {
+  explicit IncludeFile(const void *);
+};
+
+}
+
+#endif
diff --git a/final/include/llvm/Support/InstIterator.h b/final/include/llvm/Support/InstIterator.h
new file mode 100644
index 00000000000..7d3f8835098
--- /dev/null
+++ b/final/include/llvm/Support/InstIterator.h
@@ -0,0 +1,147 @@
+//===- llvm/Support/InstIterator.h - Classes for inst iteration -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains definitions of two iterators for iterating over the
+// instructions in a function.  This is effectively a wrapper around a two level
+// iterator that can probably be genericized later.
+//
+// Note that this iterator gets invalidated any time that basic blocks or
+// instructions are moved around.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_INSTITERATOR_H
+#define LLVM_SUPPORT_INSTITERATOR_H
+
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+
+namespace llvm {
+
+// This class implements inst_begin() & inst_end() for
+// inst_iterator and const_inst_iterator's.
+//
+template <class _BB_t, class _BB_i_t, class _BI_t, class _II_t>
+class InstIterator {
+  typedef _BB_t   BBty;
+  typedef _BB_i_t BBIty;
+  typedef _BI_t   BIty;
+  typedef _II_t   IIty;
+  _BB_t  *BBs;      // BasicBlocksType
+  _BB_i_t BB;       // BasicBlocksType::iterator
+  _BI_t   BI;       // BasicBlock::iterator
+public:
+  typedef std::bidirectional_iterator_tag iterator_category;
+  typedef IIty                            value_type;
+  typedef signed                        difference_type;
+  typedef IIty*                           pointer;
+  typedef IIty&                           reference;
+
+  // Default constructor
+  InstIterator() {}
+
+  // Copy constructor...
+  template<typename A, typename B, typename C, typename D>
+  InstIterator(const InstIterator<A,B,C,D> &II)
+    : BBs(II.BBs), BB(II.BB), BI(II.BI) {}
+
+  template<typename A, typename B, typename C, typename D>
+  InstIterator(InstIterator<A,B,C,D> &II)
+    : BBs(II.BBs), BB(II.BB), BI(II.BI) {}
+
+  template<class M> InstIterator(M &m)
+    : BBs(&m.getBasicBlockList()), BB(BBs->begin()) {    // begin ctor
+    if (BB != BBs->end()) {
+      BI = BB->begin();
+      advanceToNextBB();
+    }
+  }
+
+  template<class M> InstIterator(M &m, bool)
+    : BBs(&m.getBasicBlockList()), BB(BBs->end()) {    // end ctor
+  }
+
+  // Accessors to get at the underlying iterators...
+  inline BBIty &getBasicBlockIterator()  { return BB; }
+  inline BIty  &getInstructionIterator() { return BI; }
+
+  inline reference operator*()  const { return *BI; }
+  inline pointer operator->() const { return &operator*(); }
+
+  inline bool operator==(const InstIterator &y) const {
+    return BB == y.BB && (BB == BBs->end() || BI == y.BI);
+  }
+  inline bool operator!=(const InstIterator& y) const {
+    return !operator==(y);
+  }
+
+  InstIterator& operator++() {
+    ++BI;
+    advanceToNextBB();
+    return *this;
+  }
+  inline InstIterator operator++(int) {
+    InstIterator tmp = *this; ++*this; return tmp;
+  }
+
+  InstIterator& operator--() {
+    while (BB == BBs->end() || BI == BB->begin()) {
+      --BB;
+      BI = BB->end();
+    }
+    --BI;
+    return *this;
+  }
+  inline InstIterator  operator--(int) {
+    InstIterator tmp = *this; --*this; return tmp;
+  }
+
+  inline bool atEnd() const { return BB == BBs->end(); }
+
+private:
+  inline void advanceToNextBB() {
+    // The only way that the II could be broken is if it is now pointing to
+    // the end() of the current BasicBlock and there are successor BBs.
+    while (BI == BB->end()) {
+      ++BB;
+      if (BB == BBs->end()) break;
+      BI = BB->begin();
+    }
+  }
+};
+
+
+typedef InstIterator<iplist<BasicBlock>,
+                     Function::iterator, BasicBlock::iterator,
+                     Instruction> inst_iterator;
+typedef InstIterator<const iplist<BasicBlock>,
+                     Function::const_iterator,
+                     BasicBlock::const_iterator,
+                     const Instruction> const_inst_iterator;
+
+inline inst_iterator inst_begin(Function *F) { return inst_iterator(*F); }
+inline inst_iterator inst_end(Function *F)   { return inst_iterator(*F, true); }
+inline const_inst_iterator inst_begin(const Function *F) {
+  return const_inst_iterator(*F);
+}
+inline const_inst_iterator inst_end(const Function *F) {
+  return const_inst_iterator(*F, true);
+}
+inline inst_iterator inst_begin(Function &F) { return inst_iterator(F); }
+inline inst_iterator inst_end(Function &F)   { return inst_iterator(F, true); }
+inline const_inst_iterator inst_begin(const Function &F) {
+  return const_inst_iterator(F);
+}
+inline const_inst_iterator inst_end(const Function &F) {
+  return const_inst_iterator(F, true);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/InstVisitor.h b/final/include/llvm/Support/InstVisitor.h
new file mode 100644
index 00000000000..b2e5d58b7c3
--- /dev/null
+++ b/final/include/llvm/Support/InstVisitor.h
@@ -0,0 +1,217 @@
+//===- llvm/Support/InstVisitor.h - Define instruction visitors -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_SUPPORT_INSTVISITOR_H
+#define LLVM_SUPPORT_INSTVISITOR_H
+
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+// We operate on opaque instruction classes, so forward declare all instruction
+// types now...
+//
+#define HANDLE_INST(NUM, OPCODE, CLASS)   class CLASS;
+#include "llvm/Instruction.def"
+
+#define DELEGATE(CLASS_TO_VISIT) \
+  return static_cast<SubClass*>(this)-> \
+               visit##CLASS_TO_VISIT(static_cast<CLASS_TO_VISIT&>(I))
+
+
+/// @brief Base class for instruction visitors
+///
+/// Instruction visitors are used when you want to perform different actions
+/// for different kinds of instructions without having to use lots of casts
+/// and a big switch statement (in your code, that is).
+///
+/// To define your own visitor, inherit from this class, specifying your
+/// new type for the 'SubClass' template parameter, and "override" visitXXX
+/// functions in your class. I say "override" because this class is defined
+/// in terms of statically resolved overloading, not virtual functions.
+///
+/// For example, here is a visitor that counts the number of malloc
+/// instructions processed:
+///
+///  /// Declare the class.  Note that we derive from InstVisitor instantiated
+///  /// with _our new subclasses_ type.
+///  ///
+///  struct CountAllocaVisitor : public InstVisitor<CountAllocaVisitor> {
+///    unsigned Count;
+///    CountAllocaVisitor() : Count(0) {}
+///
+///    void visitAllocaInst(AllocaInst &AI) { ++Count; }
+///  };
+///
+///  And this class would be used like this:
+///    CountAllocaVisitor CAV;
+///    CAV.visit(function);
+///    NumAllocas = CAV.Count;
+///
+/// The defined has 'visit' methods for Instruction, and also for BasicBlock,
+/// Function, and Module, which recursively process all contained instructions.
+///
+/// Note that if you don't implement visitXXX for some instruction type,
+/// the visitXXX method for instruction superclass will be invoked. So
+/// if instructions are added in the future, they will be automatically
+/// supported, if you handle one of their superclasses.
+///
+/// The optional second template argument specifies the type that instruction
+/// visitation functions should return. If you specify this, you *MUST* provide
+/// an implementation of visitInstruction though!.
+///
+/// Note that this class is specifically designed as a template to avoid
+/// virtual function call overhead.  Defining and using an InstVisitor is just
+/// as efficient as having your own switch statement over the instruction
+/// opcode.
+template<typename SubClass, typename RetTy=void>
+class InstVisitor {
+  //===--------------------------------------------------------------------===//
+  // Interface code - This is the public interface of the InstVisitor that you
+  // use to visit instructions...
+  //
+
+public:
+  // Generic visit method - Allow visitation to all instructions in a range
+  template<class Iterator>
+  void visit(Iterator Start, Iterator End) {
+    while (Start != End)
+      static_cast<SubClass*>(this)->visit(*Start++);
+  }
+
+  // Define visitors for functions and basic blocks...
+  //
+  void visit(Module &M) {
+    static_cast<SubClass*>(this)->visitModule(M);
+    visit(M.begin(), M.end());
+  }
+  void visit(Function &F) {
+    static_cast<SubClass*>(this)->visitFunction(F);
+    visit(F.begin(), F.end());
+  }
+  void visit(BasicBlock &BB) {
+    static_cast<SubClass*>(this)->visitBasicBlock(BB);
+    visit(BB.begin(), BB.end());
+  }
+
+  // Forwarding functions so that the user can visit with pointers AND refs.
+  void visit(Module       *M)  { visit(*M); }
+  void visit(Function     *F)  { visit(*F); }
+  void visit(BasicBlock   *BB) { visit(*BB); }
+  RetTy visit(Instruction *I)  { return visit(*I); }
+
+  // visit - Finally, code to visit an instruction...
+  //
+  RetTy visit(Instruction &I) {
+    switch (I.getOpcode()) {
+    default: llvm_unreachable("Unknown instruction type encountered!");
+      // Build the switch statement using the Instruction.def file...
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+    case Instruction::OPCODE: return \
+           static_cast<SubClass*>(this)-> \
+                      visit##OPCODE(static_cast<CLASS&>(I));
+#include "llvm/Instruction.def"
+    }
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Visitation functions... these functions provide default fallbacks in case
+  // the user does not specify what to do for a particular instruction type.
+  // The default behavior is to generalize the instruction type to its subtype
+  // and try visiting the subtype.  All of this should be inlined perfectly,
+  // because there are no virtual functions to get in the way.
+  //
+
+  // When visiting a module, function or basic block directly, these methods get
+  // called to indicate when transitioning into a new unit.
+  //
+  void visitModule    (Module &M) {}
+  void visitFunction  (Function &F) {}
+  void visitBasicBlock(BasicBlock &BB) {}
+
+  // Define instruction specific visitor functions that can be overridden to
+  // handle SPECIFIC instructions.  These functions automatically define
+  // visitMul to proxy to visitBinaryOperator for instance in case the user does
+  // not need this generality.
+  //
+  // The one problem case we have to handle here though is that the PHINode
+  // class and opcode name are the exact same.  Because of this, we cannot
+  // define visitPHINode (the inst version) to forward to visitPHINode (the
+  // generic version) without multiply defined symbols and recursion.  To handle
+  // this, we do not autoexpand "Other" instructions, we do it manually.
+  //
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+    RetTy visit##OPCODE(CLASS &I) { DELEGATE(CLASS); }
+#include "llvm/Instruction.def"
+
+  // Specific Instruction type classes... note that all of the casts are
+  // necessary because we use the instruction classes as opaque types...
+  //
+  RetTy visitReturnInst(ReturnInst &I)              { DELEGATE(TerminatorInst);}
+  RetTy visitBranchInst(BranchInst &I)              { DELEGATE(TerminatorInst);}
+  RetTy visitSwitchInst(SwitchInst &I)              { DELEGATE(TerminatorInst);}
+  RetTy visitIndirectBrInst(IndirectBrInst &I)      { DELEGATE(TerminatorInst);}
+  RetTy visitInvokeInst(InvokeInst &I)              { DELEGATE(TerminatorInst);}
+  RetTy visitUnwindInst(UnwindInst &I)              { DELEGATE(TerminatorInst);}
+  RetTy visitUnreachableInst(UnreachableInst &I)    { DELEGATE(TerminatorInst);}
+  RetTy visitICmpInst(ICmpInst &I)                  { DELEGATE(CmpInst);}
+  RetTy visitFCmpInst(FCmpInst &I)                  { DELEGATE(CmpInst);}
+  RetTy visitAllocaInst(AllocaInst &I)              { DELEGATE(Instruction); }
+  RetTy visitLoadInst(LoadInst     &I)              { DELEGATE(Instruction); }
+  RetTy visitStoreInst(StoreInst   &I)              { DELEGATE(Instruction); }
+  RetTy visitGetElementPtrInst(GetElementPtrInst &I){ DELEGATE(Instruction); }
+  RetTy visitPHINode(PHINode       &I)              { DELEGATE(Instruction); }
+  RetTy visitTruncInst(TruncInst &I)                { DELEGATE(CastInst); }
+  RetTy visitZExtInst(ZExtInst &I)                  { DELEGATE(CastInst); }
+  RetTy visitSExtInst(SExtInst &I)                  { DELEGATE(CastInst); }
+  RetTy visitFPTruncInst(FPTruncInst &I)            { DELEGATE(CastInst); }
+  RetTy visitFPExtInst(FPExtInst &I)                { DELEGATE(CastInst); }
+  RetTy visitFPToUIInst(FPToUIInst &I)              { DELEGATE(CastInst); }
+  RetTy visitFPToSIInst(FPToSIInst &I)              { DELEGATE(CastInst); }
+  RetTy visitUIToFPInst(UIToFPInst &I)              { DELEGATE(CastInst); }
+  RetTy visitSIToFPInst(SIToFPInst &I)              { DELEGATE(CastInst); }
+  RetTy visitPtrToIntInst(PtrToIntInst &I)          { DELEGATE(CastInst); }
+  RetTy visitIntToPtrInst(IntToPtrInst &I)          { DELEGATE(CastInst); }
+  RetTy visitBitCastInst(BitCastInst &I)            { DELEGATE(CastInst); }
+  RetTy visitSelectInst(SelectInst &I)              { DELEGATE(Instruction); }
+  RetTy visitCallInst(CallInst     &I)              { DELEGATE(Instruction); }
+  RetTy visitVAArgInst(VAArgInst   &I)              { DELEGATE(Instruction); }
+  RetTy visitExtractElementInst(ExtractElementInst &I) { DELEGATE(Instruction);}
+  RetTy visitInsertElementInst(InsertElementInst &I) { DELEGATE(Instruction); }
+  RetTy visitShuffleVectorInst(ShuffleVectorInst &I) { DELEGATE(Instruction); }
+  RetTy visitExtractValueInst(ExtractValueInst &I)  { DELEGATE(Instruction);}
+  RetTy visitInsertValueInst(InsertValueInst &I)    { DELEGATE(Instruction); }
+
+  // Next level propagators: If the user does not overload a specific
+  // instruction type, they can overload one of these to get the whole class
+  // of instructions...
+  //
+  RetTy visitTerminatorInst(TerminatorInst &I) { DELEGATE(Instruction); }
+  RetTy visitBinaryOperator(BinaryOperator &I) { DELEGATE(Instruction); }
+  RetTy visitCmpInst(CmpInst &I)               { DELEGATE(Instruction); }
+  RetTy visitCastInst(CastInst &I)             { DELEGATE(Instruction); }
+
+  // If the user wants a 'default' case, they can choose to override this
+  // function.  If this function is not overloaded in the user's subclass, then
+  // this instruction just gets ignored.
+  //
+  // Note that you MUST override this function if your return type is not void.
+  //
+  void visitInstruction(Instruction &I) {}  // Ignore unhandled instructions
+};
+
+#undef DELEGATE
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/LICENSE.TXT b/final/include/llvm/Support/LICENSE.TXT
new file mode 100644
index 00000000000..3479b3fd74d
--- /dev/null
+++ b/final/include/llvm/Support/LICENSE.TXT
@@ -0,0 +1,6 @@
+LLVM System Interface Library
+-------------------------------------------------------------------------------
+The LLVM System Interface Library is licensed under the Illinois Open Source
+License and has the following additional copyright:
+
+Copyright (C) 2004 eXtensible Systems, Inc.
diff --git a/final/include/llvm/Support/LeakDetector.h b/final/include/llvm/Support/LeakDetector.h
new file mode 100644
index 00000000000..501a9db72c1
--- /dev/null
+++ b/final/include/llvm/Support/LeakDetector.h
@@ -0,0 +1,92 @@
+//===-- llvm/Support/LeakDetector.h - Provide leak detection ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a class that can be used to provide very simple memory leak
+// checks for an API.  Basically LLVM uses this to make sure that Instructions,
+// for example, are deleted when they are supposed to be, and not leaked away.
+//
+// When compiling with NDEBUG (Release build), this class does nothing, thus
+// adding no checking overhead to release builds.  Note that this class is
+// implemented in a very simple way, requiring completely manual manipulation
+// and checking for garbage, but this is intentional: users should not be using
+// this API, only other APIs should.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_LEAKDETECTOR_H
+#define LLVM_SUPPORT_LEAKDETECTOR_H
+
+#include <string>
+
+namespace llvm {
+
+class LLVMContext;
+class Value;
+
+struct LeakDetector {
+  /// addGarbageObject - Add a pointer to the internal set of "garbage" object
+  /// pointers.  This should be called when objects are created, or if they are
+  /// taken out of an owning collection.
+  ///
+  static void addGarbageObject(void *Object) {
+#ifndef NDEBUG
+    addGarbageObjectImpl(Object);
+#endif
+  }
+
+  /// removeGarbageObject - Remove a pointer from our internal representation of
+  /// our "garbage" objects.  This should be called when an object is added to
+  /// an "owning" collection.
+  ///
+  static void removeGarbageObject(void *Object) {
+#ifndef NDEBUG
+    removeGarbageObjectImpl(Object);
+#endif
+  }
+
+  /// checkForGarbage - Traverse the internal representation of garbage
+  /// pointers.  If there are any pointers that have been add'ed, but not
+  /// remove'd, big obnoxious warnings about memory leaks are issued.
+  ///
+  /// The specified message will be printed indicating when the check was
+  /// performed.
+  ///
+  static void checkForGarbage(LLVMContext &C, const std::string &Message) {
+#ifndef NDEBUG
+    checkForGarbageImpl(C, Message);
+#endif
+  }
+
+  /// Overload the normal methods to work better with Value*'s because they are
+  /// by far the most common in LLVM.  This does not affect the actual
+  /// functioning of this class, it just makes the warning messages nicer.
+  ///
+  static void addGarbageObject(const Value *Object) {
+#ifndef NDEBUG
+    addGarbageObjectImpl(Object);
+#endif
+  }
+  static void removeGarbageObject(const Value *Object) {
+#ifndef NDEBUG
+    removeGarbageObjectImpl(Object);
+#endif
+  }
+
+private:
+  // If we are debugging, the actual implementations will be called...
+  static void addGarbageObjectImpl(const Value *Object);
+  static void removeGarbageObjectImpl(const Value *Object);
+  static void addGarbageObjectImpl(void *Object);
+  static void removeGarbageObjectImpl(void *Object);
+  static void checkForGarbageImpl(LLVMContext &C, const std::string &Message);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/MachO.h b/final/include/llvm/Support/MachO.h
new file mode 100644
index 00000000000..6841a0f1fc1
--- /dev/null
+++ b/final/include/llvm/Support/MachO.h
@@ -0,0 +1,696 @@
+//===-- llvm/Support/MachO.h - The MachO file format ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines manifest constants for the MachO object file format.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MACHO_H
+#define LLVM_SUPPORT_MACHO_H
+
+#include "llvm/Support/DataTypes.h"
+
+// NOTE: The enums in this file are intentially named to be different than those
+// in the headers in /usr/include/mach (on darwin systems) to avoid conflicts
+// with those macros.
+namespace llvm {
+  namespace MachO {
+    // Enums from <mach-o/loader.h>
+    enum {
+      // Constants for the "magic" field in llvm::MachO::mach_header and
+      // llvm::MachO::mach_header_64
+      HeaderMagic32         = 0xFEEDFACEu, // MH_MAGIC
+      HeaderMagic32Swapped  = 0xCEFAEDFEu, // MH_CIGAM
+      HeaderMagic64         = 0xFEEDFACFu, // MH_MAGIC_64
+      HeaderMagic64Swapped  = 0xCFFAEDFEu, // MH_CIGAM_64
+      UniversalMagic        = 0xCAFEBABEu, // FAT_MAGIC
+      UniversalMagicSwapped = 0xBEBAFECAu, // FAT_CIGAM
+
+      // Constants for the "filetype" field in llvm::MachO::mach_header and
+      // llvm::MachO::mach_header_64
+      HeaderFileTypeObject              = 0x1u, // MH_OBJECT
+      HeaderFileTypeExecutable          = 0x2u, // MH_EXECUTE
+      HeaderFileTypeFixedVMShlib        = 0x3u, // MH_FVMLIB
+      HeaderFileTypeCore                = 0x4u, // MH_CORE
+      HeaderFileTypePreloadedExecutable = 0x5u, // MH_PRELOAD
+      HeaderFileTypeDynamicShlib        = 0x6u, // MH_DYLIB
+      HeaderFileTypeDynamicLinkEditor   = 0x7u, // MH_DYLINKER
+      HeaderFileTypeBundle              = 0x8u, // MH_BUNDLE
+      HeaderFileTypeDynamicShlibStub    = 0x9u, // MH_DYLIB_STUB
+      HeaderFileTypeDSYM                = 0xAu, // MH_DSYM
+      HeaderFileTypeKextBundle          = 0xBu, // MH_KEXT_BUNDLE
+
+      // Constant bits for the "flags" field in llvm::MachO::mach_header and
+      // llvm::MachO::mach_header_64
+      HeaderFlagBitNoUndefinedSymbols     = 0x00000001u, // MH_NOUNDEFS
+      HeaderFlagBitIsIncrementalLinkObject= 0x00000002u, // MH_INCRLINK
+      HeaderFlagBitIsDynamicLinkObject    = 0x00000004u, // MH_DYLDLINK
+      HeaderFlagBitBindAtLoad             = 0x00000008u, // MH_BINDATLOAD
+      HeaderFlagBitPrebound               = 0x00000010u, // MH_PREBOUND
+      HeaderFlagBitSplitSegments          = 0x00000020u, // MH_SPLIT_SEGS
+      HeaderFlagBitLazyInit               = 0x00000040u, // MH_LAZY_INIT
+      HeaderFlagBitTwoLevelNamespace      = 0x00000080u, // MH_TWOLEVEL
+      HeaderFlagBitForceFlatNamespace     = 0x00000100u, // MH_FORCE_FLAT
+      HeaderFlagBitNoMultipleDefintions   = 0x00000200u, // MH_NOMULTIDEFS
+      HeaderFlagBitNoFixPrebinding        = 0x00000400u, // MH_NOFIXPREBINDING
+      HeaderFlagBitPrebindable            = 0x00000800u, // MH_PREBINDABLE
+      HeaderFlagBitAllModulesBound        = 0x00001000u, // MH_ALLMODSBOUND
+      HeaderFlagBitSubsectionsViaSymbols  = 0x00002000u, // MH_SUBSECTIONS_VIA_SYMBOLS
+      HeaderFlagBitCanonical              = 0x00004000u, // MH_CANONICAL
+      HeaderFlagBitWeakDefines            = 0x00008000u, // MH_WEAK_DEFINES
+      HeaderFlagBitBindsToWeak            = 0x00010000u, // MH_BINDS_TO_WEAK
+      HeaderFlagBitAllowStackExecution    = 0x00020000u, // MH_ALLOW_STACK_EXECUTION
+      HeaderFlagBitRootSafe               = 0x00040000u, // MH_ROOT_SAFE
+      HeaderFlagBitSetUIDSafe             = 0x00080000u, // MH_SETUID_SAFE
+      HeaderFlagBitNoReexportedDylibs     = 0x00100000u, // MH_NO_REEXPORTED_DYLIBS
+      HeaderFlagBitPIE                    = 0x00200000u, // MH_PIE
+      HeaderFlagBitDeadStrippableDylib    = 0x00400000u, // MH_DEAD_STRIPPABLE_DYLIB
+
+      // Constants for the "cmd" field in llvm::MachO::load_command
+      LoadCommandDynamicLinkerRequired    = 0x80000000u, // LC_REQ_DYLD
+      LoadCommandSegment32                = 0x00000001u, // LC_SEGMENT
+      LoadCommandSymtab                   = 0x00000002u, // LC_SYMTAB
+      LoadCommandSymSeg                   = 0x00000003u, // LC_SYMSEG
+      LoadCommandThread                   = 0x00000004u, // LC_THREAD
+      LoadCommandUnixThread               = 0x00000005u, // LC_UNIXTHREAD
+      LoadCommandFixedVMShlibLoad         = 0x00000006u, // LC_LOADFVMLIB
+      LoadCommandFixedVMShlibIdent        = 0x00000007u, // LC_IDFVMLIB
+      LoadCommandIdent                    = 0x00000008u, // LC_IDENT
+      LoadCommandFixedVMFileInclusion     = 0x00000009u, // LC_FVMFILE
+      LoadCommandPrePage                  = 0x0000000Au, // LC_PREPAGE
+      LoadCommandDynamicSymtabInfo        = 0x0000000Bu, // LC_DYSYMTAB
+      LoadCommandDylibLoad                = 0x0000000Cu, // LC_LOAD_DYLIB
+      LoadCommandDylibIdent               = 0x0000000Du, // LC_ID_DYLIB
+      LoadCommandDynamicLinkerLoad        = 0x0000000Eu, // LC_LOAD_DYLINKER
+      LoadCommandDynamicLinkerIdent       = 0x0000000Fu, // LC_ID_DYLINKER
+      LoadCommandDylibPrebound            = 0x00000010u, // LC_PREBOUND_DYLIB
+      LoadCommandRoutines32               = 0x00000011u, // LC_ROUTINES
+      LoadCommandSubFramework             = 0x00000012u, // LC_SUB_FRAMEWORK
+      LoadCommandSubUmbrella              = 0x00000013u, // LC_SUB_UMBRELLA
+      LoadCommandSubClient                = 0x00000014u, // LC_SUB_CLIENT
+      LoadCommandSubLibrary               = 0x00000015u, // LC_SUB_LIBRARY
+      LoadCommandTwoLevelHints            = 0x00000016u, // LC_TWOLEVEL_HINTS
+      LoadCommandPreBindChecksum          = 0x00000017u, // LC_PREBIND_CKSUM
+      LoadCommandDylibLoadWeak            = 0x80000018u, // LC_LOAD_WEAK_DYLIB
+      LoadCommandSegment64                = 0x00000019u, // LC_SEGMENT_64
+      LoadCommandRoutines64               = 0x0000001Au, // LC_ROUTINES_64
+      LoadCommandUUID                     = 0x0000001Bu, // LC_UUID
+      LoadCommandRunpath                  = 0x8000001Cu, // LC_RPATH
+      LoadCommandCodeSignature            = 0x0000001Du, // LC_CODE_SIGNATURE
+      LoadCommandSegmentSplitInfo         = 0x0000001Eu, // LC_SEGMENT_SPLIT_INFO
+      LoadCommandDylibReexport            = 0x8000001Fu, // LC_REEXPORT_DYLIB
+      LoadCommandDylibLazyLoad            = 0x00000020u, // LC_LAZY_LOAD_DYLIB
+      LoadCommandEncryptionInfo           = 0x00000021u, // LC_ENCRYPTION_INFO
+      LoadCommandDynamicLinkerInfo        = 0x00000022u, // LC_DYLD_INFO
+      LoadCommandDynamicLinkerInfoOnly    = 0x80000022u, // LC_DYLD_INFO_ONLY
+      LoadCommandDylibLoadUpward          = 0x80000023u, // LC_LOAD_UPWARD_DYLIB
+
+      // Constant bits for the "flags" field in llvm::MachO::segment_command
+      SegmentCommandFlagBitHighVM             = 0x1u, // SG_HIGHVM
+      SegmentCommandFlagBitFixedVMLibrary     = 0x2u, // SG_FVMLIB
+      SegmentCommandFlagBitNoRelocations      = 0x4u, // SG_NORELOC
+      SegmentCommandFlagBitProtectedVersion1  = 0x8u, // SG_PROTECTED_VERSION_1
+
+
+      // Constant masks for the "flags" field in llvm::MachO::section and
+      // llvm::MachO::section_64
+      SectionFlagMaskSectionType      = 0x000000ffu, // SECTION_TYPE
+      SectionFlagMaskAllAttributes    = 0xffffff00u, // SECTION_ATTRIBUTES
+      SectionFlagMaskUserAttributes   = 0xff000000u, // SECTION_ATTRIBUTES_USR
+      SectionFlagMaskSystemAttributes = 0x00ffff00u, // SECTION_ATTRIBUTES_SYS
+
+      // Constant masks for the "flags[7:0]" field in llvm::MachO::section and
+      // llvm::MachO::section_64 (mask "flags" with SECTION_TYPE)
+      SectionTypeRegular                    = 0x00u, // S_REGULAR
+      SectionTypeZeroFill                   = 0x01u, // S_ZEROFILL
+      SectionTypeCStringLiterals            = 0x02u, // S_CSTRING_LITERALS
+      SectionType4ByteLiterals              = 0x03u, // S_4BYTE_LITERALS
+      SectionType8ByteLiterals              = 0x04u, // S_8BYTE_LITERALS
+      SectionTypeLiteralPointers            = 0x05u, // S_LITERAL_POINTERS
+      SectionTypeNonLazySymbolPointers      = 0x06u, // S_NON_LAZY_SYMBOL_POINTERS
+      SectionTypeLazySymbolPointers         = 0x07u, // S_LAZY_SYMBOL_POINTERS
+      SectionTypeSymbolStubs                = 0x08u, // S_SYMBOL_STUBS
+      SectionTypeModuleInitFunctionPointers = 0x09u, // S_MOD_INIT_FUNC_POINTERS
+      SectionTypeModuleTermFunctionPointers = 0x0au, // S_MOD_TERM_FUNC_POINTERS
+      SectionTypeCoalesced                  = 0x0bu, // S_COALESCED
+      SectionTypeZeroFillLarge              = 0x0cu, // S_GB_ZEROFILL
+      SectionTypeInterposing                = 0x0du, // S_INTERPOSING
+      SectionType16ByteLiterals             = 0x0eu, // S_16BYTE_LITERALS
+      SectionTypeDTraceObjectFormat         = 0x0fu, // S_DTRACE_DOF
+      SectionTypeLazyDylibSymbolPointers    = 0x10u, // S_LAZY_DYLIB_SYMBOL_POINTERS
+
+      // Constant masks for the "flags[31:24]" field in llvm::MachO::section and
+      // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_USR)
+      SectionAttrUserPureInstructions       = 0x80000000u, // S_ATTR_PURE_INSTRUCTIONS
+      SectionAttrUserNoTableOfContents      = 0x40000000u, // S_ATTR_NO_TOC
+      SectionAttrUserCanStripStaticSymbols  = 0x20000000u, // S_ATTR_STRIP_STATIC_SYMS
+      SectionAttrUserNoDeadStrip            = 0x10000000u, // S_ATTR_NO_DEAD_STRIP
+      SectionAttrUserLiveSupport            = 0x08000000u, // S_ATTR_LIVE_SUPPORT
+      SectionAttrUserSelfModifyingCode      = 0x04000000u, // S_ATTR_SELF_MODIFYING_CODE
+      SectionAttrUserDebug                  = 0x02000000u, // S_ATTR_DEBUG
+
+      // Constant masks for the "flags[23:8]" field in llvm::MachO::section and
+      // llvm::MachO::section_64 (mask "flags" with SECTION_ATTRIBUTES_SYS)
+      SectionAttrSytemSomeInstructions      = 0x00000400u, // S_ATTR_SOME_INSTRUCTIONS
+      SectionAttrSytemHasExternalRelocations= 0x00000200u, // S_ATTR_EXT_RELOC
+      SectionAttrSytemHasLocalRelocations   = 0x00000100u, // S_ATTR_LOC_RELOC
+
+      IndirectSymbolLocal                   = 0x80000000u, // INDIRECT_SYMBOL_LOCAL
+      IndirectSymbolAbsolute                = 0x40000000u, // INDIRECT_SYMBOL_ABS
+
+      RebaseTypePointer                     = 1u, // REBASE_TYPE_POINTER
+      RebaseTypeTextAbsolute32              = 2u, // REBASE_TYPE_TEXT_ABSOLUTE32
+      RebaseTypeTextPCRelative32	    = 3u, // REBASE_TYPE_TEXT_PCREL32
+
+      RebaseOpcodeMask                          = 0xF0u, // REBASE_OPCODE_MASK
+      RebaseImmediateMask                       = 0x0Fu, // REBASE_IMMEDIATE_MASK
+      RebaseOpcodeDone                          = 0x00u, // REBASE_OPCODE_DONE
+      RebaseOpcodeSetTypeImmediate              = 0x10u, // REBASE_OPCODE_SET_TYPE_IMM
+      RebaseOpcodeSetSegmentAndOffsetULEB	= 0x20u, // REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
+      RebaseOpcodeAddAddressULEB                = 0x30u, // REBASE_OPCODE_ADD_ADDR_ULEB
+      RebaseOpcodeAddAddressImmediateScaled	= 0x40u, // REBASE_OPCODE_ADD_ADDR_IMM_SCALED
+      RebaseOpcodeDoRebaseImmediateTimes	= 0x50u, // REBASE_OPCODE_DO_REBASE_IMM_TIMES
+      RebaseOpcodeDoRebaseULEBTimes             = 0x60u, // REBASE_OPCODE_DO_REBASE_ULEB_TIMES
+      RebaseOpcodeDoRebaseAddAddressULEB        = 0x70u, // REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB
+      RebaseOpcodeDoRebaseULEBTimesSkippingULEB = 0x80u, // REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB
+
+
+      BindTypePointer           = 1u, // BIND_TYPE_POINTER
+      BindTypeTextAbsolute32	= 2u, // BIND_TYPE_TEXT_ABSOLUTE32
+      BindTypeTextPCRelative32	= 3u, // BIND_TYPE_TEXT_PCREL32
+
+      BindSpecialDylibSelf            =  0u, // BIND_SPECIAL_DYLIB_SELF
+      BindSpecialDylibMainExecutable  = -1u, // BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE
+      BindSpecialDylibFlatLookup      = -2u, // BIND_SPECIAL_DYLIB_FLAT_LOOKUP
+
+      BindSymbolFlagsWeakImport         = 0x1u, // BIND_SYMBOL_FLAGS_WEAK_IMPORT
+      BindSymbolFlagsNonWeakDefinition	= 0x8u, // BIND_SYMBOL_FLAGS_NON_WEAK_DEFINITION
+
+      BindOpcodeMask                            = 0xF0u, // BIND_OPCODE_MASK
+      BindImmediateMask                         = 0x0Fu, // BIND_IMMEDIATE_MASK
+      BindOpcodeDone                            = 0x00u, // BIND_OPCODE_DONE
+      BindOpcodeSetDylibOrdinalImmediate        = 0x10u, // BIND_OPCODE_SET_DYLIB_ORDINAL_IMM
+      BindOpcodeSetDylibOrdinalULEB             = 0x20u, // BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB
+      BindOpcodeSetDylibSpecialImmediate	= 0x30u, // BIND_OPCODE_SET_DYLIB_SPECIAL_IMM
+      BindOpcodeSetSymbolTrailingFlagsImmediate	= 0x40u, // BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM
+      BindOpcodeSetTypeImmediate		= 0x50u, // BIND_OPCODE_SET_TYPE_IMM
+      BindOpcodeSetAppendSLEB                   = 0x60u, // BIND_OPCODE_SET_ADDEND_SLEB
+      BindOpcodeSetSegmentAndOffsetULEB         = 0x70u, // BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB
+      BindOpcodeAddAddressULEB                  = 0x80u, // BIND_OPCODE_ADD_ADDR_ULEB
+      BindOpcodeDoBind                          = 0x90u, // BIND_OPCODE_DO_BIND
+      BindOpcodeDoBindAddAddressULEB		= 0xA0u, // BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB
+      BindOpcodeDoBindAddAddressImmediateScaled	= 0xB0u, // BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED
+      BindOpcodeDoBindULEBTimesSkippingULEB     = 0xC0u, // BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB
+
+      ExportSymbolFlagsKindMask           = 0x03u, // EXPORT_SYMBOL_FLAGS_KIND_MASK
+      ExportSymbolFlagsKindRegular	  = 0x00u, // EXPORT_SYMBOL_FLAGS_KIND_REGULAR
+      ExportSymbolFlagsKindThreadLocal    = 0x01u, // EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL
+      ExportSymbolFlagsWeakDefinition     = 0x04u, // EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION
+      ExportSymbolFlagsIndirectDefinition = 0x08u, // EXPORT_SYMBOL_FLAGS_INDIRECT_DEFINITION
+      ExportSymbolFlagsHasSpecializations = 0x10u, // EXPORT_SYMBOL_FLAGS_HAS_SPECIALIZATIONS
+
+
+      // Constant masks for the "n_type" field in llvm::MachO::nlist and
+      // llvm::MachO::nlist_64
+      NlistMaskStab             = 0xe0, // N_STAB
+      NlistMaskPrivateExternal	= 0x10, // N_PEXT
+      NlistMaskType             = 0x0e, // N_TYPE
+      NlistMaskExternal         = 0x01, // N_EXT
+
+      // Constants for the "n_type & N_TYPE" llvm::MachO::nlist and
+      // llvm::MachO::nlist_64
+      NListTypeUndefined          = 0x0u, // N_UNDF
+      NListTypeAbsolute           = 0x2u, // N_ABS
+      NListTypeSection            = 0xeu, // N_SECT
+      NListTypePreboundUndefined  = 0xcu, // N_PBUD
+      NListTypeIndirect           = 0xau, // N_INDR
+
+      // Constant masks for the "n_sect" field in llvm::MachO::nlist and
+      // llvm::MachO::nlist_64
+      NListSectionNoSection     = 0u, // NO_SECT
+      NListSectionMaxSection    = 0xffu, // MAX_SECT
+
+      // Constant values for the "n_type" field in llvm::MachO::nlist and
+      // llvm::MachO::nlist_64 when "(n_type & NlistMaskStab) != 0"
+      StabGlobalSymbol          = 0x20u,  // N_GSYM	
+      StabFunctionName          = 0x22u,  // N_FNAME	
+      StabFunction              = 0x24u,  // N_FUN	
+      StabStaticSymbol          = 0x26u,  // N_STSYM	
+      StabLocalCommon           = 0x28u,  // N_LCSYM	
+      StabBeginSymbol           = 0x2Eu,  // N_BNSYM
+      StabSourceFileOptions     = 0x3Cu,  // N_OPT	
+      StabRegisterSymbol        = 0x40u,  // N_RSYM	
+      StabSourceLine            = 0x44u,  // N_SLINE	
+      StabEndSymbol             = 0x4Eu,  // N_ENSYM
+      StabStructureType         = 0x60u,  // N_SSYM	
+      StabSourceFileName        = 0x64u,  // N_SO	
+      StabObjectFileName        = 0x66u,  // N_OSO	
+      StabLocalSymbol           = 0x80u,  // N_LSYM	
+      StabBeginIncludeFileName  = 0x82u,  // N_BINCL	
+      StabIncludeFileName       = 0x84u,  // N_SOL	
+      StabCompilerParameters    = 0x86u,  // N_PARAMS
+      StabCompilerVersion       = 0x88u,  // N_VERSION
+      StabCompilerOptLevel      = 0x8Au,  // N_OLEVEL
+      StabParameter             = 0xA0u,  // N_PSYM	
+      StabEndIncludeFile        = 0xA2u,  // N_EINCL	
+      StabAlternateEntry        = 0xA4u,  // N_ENTRY	
+      StabLeftBracket           = 0xC0u,  // N_LBRAC	
+      StabDeletedIncludeFile    = 0xC2u,  // N_EXCL	
+      StabRightBracket          = 0xE0u,  // N_RBRAC	
+      StabBeginCommon           = 0xE2u,  // N_BCOMM	
+      StabEndCommon             = 0xE4u,  // N_ECOMM	
+      StabEndCommonLocal        = 0xE8u,  // N_ECOML	
+      StabLength                = 0xFEu   // N_LENG	
+
+    };
+
+    // Structs from <mach-o/loader.h>
+
+    struct mach_header {
+      uint32_t magic;
+      uint32_t cputype;
+      uint32_t cpusubtype;
+      uint32_t filetype;
+      uint32_t ncmds;
+      uint32_t sizeofcmds;
+      uint32_t flags;
+    };
+
+    struct mach_header_64 {
+      uint32_t magic;
+      uint32_t cputype;
+      uint32_t cpusubtype;
+      uint32_t filetype;
+      uint32_t ncmds;
+      uint32_t sizeofcmds;
+      uint32_t flags;
+      uint32_t reserved;
+    };
+
+    struct load_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+    };
+
+    struct segment_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      char segname[16];
+      uint32_t vmaddr;
+      uint32_t vmsize;
+      uint32_t fileoff;
+      uint32_t filesize;
+      uint32_t maxprot;
+      uint32_t initprot;
+      uint32_t nsects;
+      uint32_t flags;
+    };
+
+    struct segment_command_64 {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      char segname[16];
+      uint64_t vmaddr;
+      uint64_t vmsize;
+      uint64_t fileoff;
+      uint64_t filesize;
+      uint32_t maxprot;
+      uint32_t initprot;
+      uint32_t nsects;
+      uint32_t flags;
+    };
+
+    struct section {
+      char sectname[16];
+      char segname[16];
+      uint32_t addr;
+      uint32_t size;
+      uint32_t offset;
+      uint32_t align;
+      uint32_t reloff;
+      uint32_t nreloc;
+      uint32_t flags;
+      uint32_t reserved1;
+      uint32_t reserved2;
+    };
+
+    struct section_64 {
+      char sectname[16];
+      char segname[16];
+      uint64_t addr;
+      uint64_t size;
+      uint32_t offset;
+      uint32_t align;
+      uint32_t reloff;
+      uint32_t nreloc;
+      uint32_t flags;
+      uint32_t reserved1;
+      uint32_t reserved2;
+      uint32_t reserved3;
+    };
+
+    struct fvmlib {
+      uint32_t name;
+      uint32_t minor_version;
+      uint32_t header_addr;
+    };
+
+    struct fvmlib_command {
+      uint32_t  cmd;
+      uint32_t cmdsize;
+      struct fvmlib fvmlib;
+    };
+
+    struct dylib {
+      uint32_t name;
+      uint32_t timestamp;
+      uint32_t current_version;
+      uint32_t compatibility_version;
+    };
+
+    struct dylib_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      struct dylib dylib;
+    };
+
+    struct sub_framework_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t umbrella;
+    };
+
+    struct sub_client_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t client;
+    };
+
+    struct sub_umbrella_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t sub_umbrella;
+    };
+
+    struct sub_library_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t sub_library;
+    };
+
+    struct prebound_dylib_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t name;
+      uint32_t nmodules;
+      uint32_t linked_modules;
+    };
+
+    struct dylinker_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t name;
+    };
+
+    struct thread_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+    };
+
+    struct routines_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t init_address;
+      uint32_t init_module;
+      uint32_t reserved1;
+      uint32_t reserved2;
+      uint32_t reserved3;
+      uint32_t reserved4;
+      uint32_t reserved5;
+      uint32_t reserved6;
+    };
+
+    struct routines_command_64 {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint64_t init_address;
+      uint64_t init_module;
+      uint64_t reserved1;
+      uint64_t reserved2;
+      uint64_t reserved3;
+      uint64_t reserved4;
+      uint64_t reserved5;
+      uint64_t reserved6;
+    };
+
+    struct symtab_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t symoff;
+      uint32_t nsyms;
+      uint32_t stroff;
+      uint32_t strsize;
+    };
+
+    struct dysymtab_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t ilocalsym;
+      uint32_t nlocalsym;
+      uint32_t iextdefsym;
+      uint32_t nextdefsym;
+      uint32_t iundefsym;
+      uint32_t nundefsym;
+      uint32_t tocoff;
+      uint32_t ntoc;
+      uint32_t modtaboff;
+      uint32_t nmodtab;
+      uint32_t extrefsymoff;
+      uint32_t nextrefsyms;
+      uint32_t indirectsymoff;
+      uint32_t nindirectsyms;
+      uint32_t extreloff;
+      uint32_t nextrel;
+      uint32_t locreloff;
+      uint32_t nlocrel;
+    };	
+
+    struct dylib_table_of_contents {
+      uint32_t symbol_index;
+      uint32_t module_index;
+    };	
+
+    struct dylib_module {
+      uint32_t module_name;
+      uint32_t iextdefsym;
+      uint32_t nextdefsym;
+      uint32_t irefsym;
+      uint32_t nrefsym;
+      uint32_t ilocalsym;
+      uint32_t nlocalsym;
+      uint32_t iextrel;
+      uint32_t nextrel;
+      uint32_t iinit_iterm;
+      uint32_t ninit_nterm;
+      uint32_t objc_module_info_addr;
+      uint32_t objc_module_info_size;
+    };	
+
+    struct dylib_module_64 {
+      uint32_t module_name;
+      uint32_t iextdefsym;
+      uint32_t nextdefsym;
+      uint32_t irefsym;
+      uint32_t nrefsym;
+      uint32_t ilocalsym;
+      uint32_t nlocalsym;
+      uint32_t iextrel;
+      uint32_t nextrel;
+      uint32_t iinit_iterm;
+      uint32_t ninit_nterm;
+      uint32_t objc_module_info_size;
+      uint64_t objc_module_info_addr;
+    };
+
+    struct dylib_reference {
+      uint32_t isym:24,
+               flags:8;
+    };
+
+
+    struct twolevel_hints_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t offset;
+      uint32_t nhints;
+    };
+
+    struct twolevel_hint {
+      uint32_t isub_image:8,
+               itoc:24;
+    };
+
+    struct prebind_cksum_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t cksum;
+    };
+
+    struct uuid_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint8_t uuid[16];
+    };
+
+    struct rpath_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t path;
+    };
+
+    struct linkedit_data_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t dataoff;
+      uint32_t datasize;
+    };
+
+    struct encryption_info_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t cryptoff;
+      uint32_t cryptsize;
+      uint32_t cryptid;
+    };
+
+    struct dyld_info_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t rebase_off;
+      uint32_t rebase_size;
+      uint32_t bind_off;
+      uint32_t bind_size;
+      uint32_t weak_bind_off;
+      uint32_t weak_bind_size;
+      uint32_t lazy_bind_off;
+      uint32_t lazy_bind_size;
+      uint32_t export_off;
+      uint32_t export_size;
+    };
+
+    struct symseg_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t offset;
+      uint32_t size;
+    };
+
+    struct ident_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+    };
+
+    struct fvmfile_command {
+      uint32_t cmd;
+      uint32_t cmdsize;
+      uint32_t name;
+      uint32_t header_addr;
+    };
+
+
+    // Structs from <mach-o/fat.h>
+    struct fat_header {
+      uint32_t magic;
+      uint32_t nfat_arch;
+    };
+
+    struct fat_arch {
+      uint32_t cputype;
+      uint32_t cpusubtype;
+      uint32_t offset;
+      uint32_t size;
+      uint32_t align;
+    };
+
+    // Structs from <mach-o/fat.h>
+    struct nlist {
+      uint32_t n_strx;
+      uint8_t n_type;
+      uint8_t n_sect;
+      int16_t n_desc;
+      uint32_t n_value;
+    };
+
+    struct nlist_64 {
+      uint32_t n_strx;
+      uint8_t n_type;
+      uint8_t n_sect;
+      uint16_t n_desc;
+      uint64_t n_value;
+    };
+
+    // Get/Set functions from <mach-o/nlist.h>
+
+    static inline uint16_t GET_LIBRARY_ORDINAL(uint16_t n_desc)
+    {
+      return (((n_desc) >> 8u) & 0xffu);
+    }
+
+    static inline void SET_LIBRARY_ORDINAL(uint16_t &n_desc, uint8_t ordinal)
+    {
+      n_desc = (((n_desc) & 0x00ff) | (((ordinal) & 0xff) << 8));
+    }
+
+    static inline uint8_t GET_COMM_ALIGN (uint16_t n_desc)
+    {
+      return (n_desc >> 8u) & 0x0fu;
+    }
+
+    static inline void SET_COMM_ALIGN (uint16_t &n_desc, uint8_t align)
+    {
+      n_desc = ((n_desc & 0xf0ffu) | ((align & 0x0fu) << 8u));
+    }
+
+    // Enums from <mach/machine.h>
+    enum {
+      // Capability bits used in the definition of cpu_type.
+      CPUArchMask = 0xff000000,   // Mask for architecture bits
+      CPUArchABI64 = 0x01000000,  // 64 bit ABI
+
+      // Constants for the cputype field.
+      CPUTypeI386      = 7,
+      CPUTypeX86_64    = CPUTypeI386 | CPUArchABI64,
+      CPUTypeARM       = 12,
+      CPUTypeSPARC     = 14,
+      CPUTypePowerPC   = 18,
+      CPUTypePowerPC64 = CPUTypePowerPC | CPUArchABI64,
+
+
+      // Constants for the cpusubtype field.
+
+      // X86
+      CPUSubType_I386_ALL    = 3,
+      CPUSubType_X86_64_ALL  = 3,
+
+      // ARM
+      CPUSubType_ARM_ALL     = 0,
+      CPUSubType_ARM_V4T     = 5,
+      CPUSubType_ARM_V5      = 7,
+      CPUSubType_ARM_V6      = 6,
+      CPUSubType_ARM_V7      = 9,
+
+      // PowerPC
+      CPUSubType_POWERPC_ALL = 0,
+
+      CPUSubType_SPARC_ALL   = 0
+    };
+  } // end namespace MachO
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Support/ManagedStatic.h b/final/include/llvm/Support/ManagedStatic.h
new file mode 100644
index 00000000000..53e73ad35f4
--- /dev/null
+++ b/final/include/llvm/Support/ManagedStatic.h
@@ -0,0 +1,110 @@
+//===-- llvm/Support/ManagedStatic.h - Static Global wrapper ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ManagedStatic class and the llvm_shutdown() function.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MANAGED_STATIC_H
+#define LLVM_SUPPORT_MANAGED_STATIC_H
+
+#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Threading.h"
+
+namespace llvm {
+
+/// object_creator - Helper method for ManagedStatic.
+template<class C>
+void* object_creator() {
+  return new C();
+}
+
+/// object_deleter - Helper method for ManagedStatic.
+///
+template<typename T> struct object_deleter {
+  static void call(void * Ptr) { delete (T*)Ptr; }
+};
+template<typename T, size_t N> struct object_deleter<T[N]> {
+  static void call(void * Ptr) { delete[] (T*)Ptr; }
+};
+
+/// ManagedStaticBase - Common base class for ManagedStatic instances.
+class ManagedStaticBase {
+protected:
+  // This should only be used as a static variable, which guarantees that this
+  // will be zero initialized.
+  mutable void *Ptr;
+  mutable void (*DeleterFn)(void*);
+  mutable const ManagedStaticBase *Next;
+
+  void RegisterManagedStatic(void *(*creator)(), void (*deleter)(void*)) const;
+public:
+  /// isConstructed - Return true if this object has not been created yet.
+  bool isConstructed() const { return Ptr != 0; }
+
+  void destroy() const;
+};
+
+/// ManagedStatic - This transparently changes the behavior of global statics to
+/// be lazily constructed on demand (good for reducing startup times of dynamic
+/// libraries that link in LLVM components) and for making destruction be
+/// explicit through the llvm_shutdown() function call.
+///
+template<class C>
+class ManagedStatic : public ManagedStaticBase {
+public:
+
+  // Accessors.
+  C &operator*() {
+    void* tmp = Ptr;
+    if (llvm_is_multithreaded()) sys::MemoryFence();
+    if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>::call);
+
+    return *static_cast<C*>(Ptr);
+  }
+  C *operator->() {
+    void* tmp = Ptr;
+    if (llvm_is_multithreaded()) sys::MemoryFence();
+    if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>::call);
+
+    return static_cast<C*>(Ptr);
+  }
+  const C &operator*() const {
+    void* tmp = Ptr;
+    if (llvm_is_multithreaded()) sys::MemoryFence();
+    if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>::call);
+
+    return *static_cast<C*>(Ptr);
+  }
+  const C *operator->() const {
+    void* tmp = Ptr;
+    if (llvm_is_multithreaded()) sys::MemoryFence();
+    if (!tmp) RegisterManagedStatic(object_creator<C>, object_deleter<C>::call);
+
+    return static_cast<C*>(Ptr);
+  }
+};
+
+/// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
+void llvm_shutdown();
+
+
+/// llvm_shutdown_obj - This is a simple helper class that calls
+/// llvm_shutdown() when it is destroyed.
+struct llvm_shutdown_obj {
+  llvm_shutdown_obj() { }
+  explicit llvm_shutdown_obj(bool multithreaded) {
+    if (multithreaded) llvm_start_multithreaded();
+  }
+  ~llvm_shutdown_obj() { llvm_shutdown(); }
+};
+
+}
+
+#endif
diff --git a/final/include/llvm/Support/MathExtras.h b/final/include/llvm/Support/MathExtras.h
new file mode 100644
index 00000000000..4627557f7f1
--- /dev/null
+++ b/final/include/llvm/Support/MathExtras.h
@@ -0,0 +1,460 @@
+//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some functions that are useful for math stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MATHEXTRAS_H
+#define LLVM_SUPPORT_MATHEXTRAS_H
+
+#include "llvm/Support/SwapByteOrder.h"
+
+namespace llvm {
+
+// NOTE: The following support functions use the _32/_64 extensions instead of
+// type overloading so that signed and unsigned integers can be used without
+// ambiguity.
+
+/// Hi_32 - This function returns the high 32 bits of a 64 bit value.
+inline uint32_t Hi_32(uint64_t Value) {
+  return static_cast<uint32_t>(Value >> 32);
+}
+
+/// Lo_32 - This function returns the low 32 bits of a 64 bit value.
+inline uint32_t Lo_32(uint64_t Value) {
+  return static_cast<uint32_t>(Value);
+}
+
+/// isInt - Checks if an integer fits into the given bit width.
+template<unsigned N>
+inline bool isInt(int64_t x) {
+  return N >= 64 || (-(INT64_C(1)<<(N-1)) <= x && x < (INT64_C(1)<<(N-1)));
+}
+// Template specializations to get better code for common cases.
+template<>
+inline bool isInt<8>(int64_t x) {
+  return static_cast<int8_t>(x) == x;
+}
+template<>
+inline bool isInt<16>(int64_t x) {
+  return static_cast<int16_t>(x) == x;
+}
+template<>
+inline bool isInt<32>(int64_t x) {
+  return static_cast<int32_t>(x) == x;
+}
+
+/// isUInt - Checks if an unsigned integer fits into the given bit width.
+template<unsigned N>
+inline bool isUInt(uint64_t x) {
+  return N >= 64 || x < (UINT64_C(1)<<N);
+}
+// Template specializations to get better code for common cases.
+template<>
+inline bool isUInt<8>(uint64_t x) {
+  return static_cast<uint8_t>(x) == x;
+}
+template<>
+inline bool isUInt<16>(uint64_t x) {
+  return static_cast<uint16_t>(x) == x;
+}
+template<>
+inline bool isUInt<32>(uint64_t x) {
+  return static_cast<uint32_t>(x) == x;
+}
+
+/// isUIntN - Checks if an unsigned integer fits into the given (dynamic)
+/// bit width.
+inline bool isUIntN(unsigned N, uint64_t x) {
+  return x == (x & (~0ULL >> (64 - N)));
+}
+
+/// isIntN - Checks if an signed integer fits into the given (dynamic)
+/// bit width.
+inline bool isIntN(unsigned N, int64_t x) {
+  return N >= 64 || (-(INT64_C(1)<<(N-1)) <= x && x < (INT64_C(1)<<(N-1)));
+}
+
+/// isMask_32 - This function returns true if the argument is a sequence of ones
+/// starting at the least significant bit with the remainder zero (32 bit
+/// version).   Ex. isMask_32(0x0000FFFFU) == true.
+inline bool isMask_32(uint32_t Value) {
+  return Value && ((Value + 1) & Value) == 0;
+}
+
+/// isMask_64 - This function returns true if the argument is a sequence of ones
+/// starting at the least significant bit with the remainder zero (64 bit
+/// version).
+inline bool isMask_64(uint64_t Value) {
+  return Value && ((Value + 1) & Value) == 0;
+}
+
+/// isShiftedMask_32 - This function returns true if the argument contains a
+/// sequence of ones with the remainder zero (32 bit version.)
+/// Ex. isShiftedMask_32(0x0000FF00U) == true.
+inline bool isShiftedMask_32(uint32_t Value) {
+  return isMask_32((Value - 1) | Value);
+}
+
+/// isShiftedMask_64 - This function returns true if the argument contains a
+/// sequence of ones with the remainder zero (64 bit version.)
+inline bool isShiftedMask_64(uint64_t Value) {
+  return isMask_64((Value - 1) | Value);
+}
+
+/// isPowerOf2_32 - This function returns true if the argument is a power of
+/// two > 0. Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
+inline bool isPowerOf2_32(uint32_t Value) {
+  return Value && !(Value & (Value - 1));
+}
+
+/// isPowerOf2_64 - This function returns true if the argument is a power of two
+/// > 0 (64 bit edition.)
+inline bool isPowerOf2_64(uint64_t Value) {
+  return Value && !(Value & (Value - int64_t(1L)));
+}
+
+/// ByteSwap_16 - This function returns a byte-swapped representation of the
+/// 16-bit argument, Value.
+inline uint16_t ByteSwap_16(uint16_t Value) {
+  return sys::SwapByteOrder_16(Value);
+}
+
+/// ByteSwap_32 - This function returns a byte-swapped representation of the
+/// 32-bit argument, Value.
+inline uint32_t ByteSwap_32(uint32_t Value) {
+  return sys::SwapByteOrder_32(Value);
+}
+
+/// ByteSwap_64 - This function returns a byte-swapped representation of the
+/// 64-bit argument, Value.
+inline uint64_t ByteSwap_64(uint64_t Value) {
+  return sys::SwapByteOrder_64(Value);
+}
+
+/// CountLeadingZeros_32 - this function performs the platform optimal form of
+/// counting the number of zeros from the most significant bit to the first one
+/// bit.  Ex. CountLeadingZeros_32(0x00F000FF) == 8.
+/// Returns 32 if the word is zero.
+inline unsigned CountLeadingZeros_32(uint32_t Value) {
+  unsigned Count; // result
+#if __GNUC__ >= 4
+  // PowerPC is defined for __builtin_clz(0)
+#if !defined(__ppc__) && !defined(__ppc64__)
+  if (!Value) return 32;
+#endif
+  Count = __builtin_clz(Value);
+#else
+  if (!Value) return 32;
+  Count = 0;
+  // bisection method for count leading zeros
+  for (unsigned Shift = 32 >> 1; Shift; Shift >>= 1) {
+    uint32_t Tmp = Value >> Shift;
+    if (Tmp) {
+      Value = Tmp;
+    } else {
+      Count |= Shift;
+    }
+  }
+#endif
+  return Count;
+}
+
+/// CountLeadingOnes_32 - this function performs the operation of
+/// counting the number of ones from the most significant bit to the first zero
+/// bit.  Ex. CountLeadingOnes_32(0xFF0FFF00) == 8.
+/// Returns 32 if the word is all ones.
+inline unsigned CountLeadingOnes_32(uint32_t Value) {
+  return CountLeadingZeros_32(~Value);
+}
+
+/// CountLeadingZeros_64 - This function performs the platform optimal form
+/// of counting the number of zeros from the most significant bit to the first
+/// one bit (64 bit edition.)
+/// Returns 64 if the word is zero.
+inline unsigned CountLeadingZeros_64(uint64_t Value) {
+  unsigned Count; // result
+#if __GNUC__ >= 4
+  // PowerPC is defined for __builtin_clzll(0)
+#if !defined(__ppc__) && !defined(__ppc64__)
+  if (!Value) return 64;
+#endif
+  Count = __builtin_clzll(Value);
+#else
+  if (sizeof(long) == sizeof(int64_t)) {
+    if (!Value) return 64;
+    Count = 0;
+    // bisection method for count leading zeros
+    for (unsigned Shift = 64 >> 1; Shift; Shift >>= 1) {
+      uint64_t Tmp = Value >> Shift;
+      if (Tmp) {
+        Value = Tmp;
+      } else {
+        Count |= Shift;
+      }
+    }
+  } else {
+    // get hi portion
+    uint32_t Hi = Hi_32(Value);
+
+    // if some bits in hi portion
+    if (Hi) {
+        // leading zeros in hi portion plus all bits in lo portion
+        Count = CountLeadingZeros_32(Hi);
+    } else {
+        // get lo portion
+        uint32_t Lo = Lo_32(Value);
+        // same as 32 bit value
+        Count = CountLeadingZeros_32(Lo)+32;
+    }
+  }
+#endif
+  return Count;
+}
+
+/// CountLeadingOnes_64 - This function performs the operation
+/// of counting the number of ones from the most significant bit to the first
+/// zero bit (64 bit edition.)
+/// Returns 64 if the word is all ones.
+inline unsigned CountLeadingOnes_64(uint64_t Value) {
+  return CountLeadingZeros_64(~Value);
+}
+
+/// CountTrailingZeros_32 - this function performs the platform optimal form of
+/// counting the number of zeros from the least significant bit to the first one
+/// bit.  Ex. CountTrailingZeros_32(0xFF00FF00) == 8.
+/// Returns 32 if the word is zero.
+inline unsigned CountTrailingZeros_32(uint32_t Value) {
+#if __GNUC__ >= 4
+  return Value ? __builtin_ctz(Value) : 32;
+#else
+  static const unsigned Mod37BitPosition[] = {
+    32, 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13,
+    4, 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9,
+    5, 20, 8, 19, 18
+  };
+  return Mod37BitPosition[(-Value & Value) % 37];
+#endif
+}
+
+/// CountTrailingOnes_32 - this function performs the operation of
+/// counting the number of ones from the least significant bit to the first zero
+/// bit.  Ex. CountTrailingOnes_32(0x00FF00FF) == 8.
+/// Returns 32 if the word is all ones.
+inline unsigned CountTrailingOnes_32(uint32_t Value) {
+  return CountTrailingZeros_32(~Value);
+}
+
+/// CountTrailingZeros_64 - This function performs the platform optimal form
+/// of counting the number of zeros from the least significant bit to the first
+/// one bit (64 bit edition.)
+/// Returns 64 if the word is zero.
+inline unsigned CountTrailingZeros_64(uint64_t Value) {
+#if __GNUC__ >= 4
+  return Value ? __builtin_ctzll(Value) : 64;
+#else
+  static const unsigned Mod67Position[] = {
+    64, 0, 1, 39, 2, 15, 40, 23, 3, 12, 16, 59, 41, 19, 24, 54,
+    4, 64, 13, 10, 17, 62, 60, 28, 42, 30, 20, 51, 25, 44, 55,
+    47, 5, 32, 65, 38, 14, 22, 11, 58, 18, 53, 63, 9, 61, 27,
+    29, 50, 43, 46, 31, 37, 21, 57, 52, 8, 26, 49, 45, 36, 56,
+    7, 48, 35, 6, 34, 33, 0
+  };
+  return Mod67Position[(-Value & Value) % 67];
+#endif
+}
+
+/// CountTrailingOnes_64 - This function performs the operation
+/// of counting the number of ones from the least significant bit to the first
+/// zero bit (64 bit edition.)
+/// Returns 64 if the word is all ones.
+inline unsigned CountTrailingOnes_64(uint64_t Value) {
+  return CountTrailingZeros_64(~Value);
+}
+
+/// CountPopulation_32 - this function counts the number of set bits in a value.
+/// Ex. CountPopulation(0xF000F000) = 8
+/// Returns 0 if the word is zero.
+inline unsigned CountPopulation_32(uint32_t Value) {
+#if __GNUC__ >= 4
+  return __builtin_popcount(Value);
+#else
+  uint32_t v = Value - ((Value >> 1) & 0x55555555);
+  v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
+  return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
+#endif
+}
+
+/// CountPopulation_64 - this function counts the number of set bits in a value,
+/// (64 bit edition.)
+inline unsigned CountPopulation_64(uint64_t Value) {
+#if __GNUC__ >= 4
+  return __builtin_popcountll(Value);
+#else
+  uint64_t v = Value - ((Value >> 1) & 0x5555555555555555ULL);
+  v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
+  v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
+  return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
+#endif
+}
+
+/// Log2_32 - This function returns the floor log base 2 of the specified value,
+/// -1 if the value is zero. (32 bit edition.)
+/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
+inline unsigned Log2_32(uint32_t Value) {
+  return 31 - CountLeadingZeros_32(Value);
+}
+
+/// Log2_64 - This function returns the floor log base 2 of the specified value,
+/// -1 if the value is zero. (64 bit edition.)
+inline unsigned Log2_64(uint64_t Value) {
+  return 63 - CountLeadingZeros_64(Value);
+}
+
+/// Log2_32_Ceil - This function returns the ceil log base 2 of the specified
+/// value, 32 if the value is zero. (32 bit edition).
+/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
+inline unsigned Log2_32_Ceil(uint32_t Value) {
+  return 32-CountLeadingZeros_32(Value-1);
+}
+
+/// Log2_64_Ceil - This function returns the ceil log base 2 of the specified
+/// value, 64 if the value is zero. (64 bit edition.)
+inline unsigned Log2_64_Ceil(uint64_t Value) {
+  return 64-CountLeadingZeros_64(Value-1);
+}
+
+/// GreatestCommonDivisor64 - Return the greatest common divisor of the two
+/// values using Euclid's algorithm.
+inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
+  while (B) {
+    uint64_t T = B;
+    B = A % B;
+    A = T;
+  }
+  return A;
+}
+
+/// BitsToDouble - This function takes a 64-bit integer and returns the bit
+/// equivalent double.
+inline double BitsToDouble(uint64_t Bits) {
+  union {
+    uint64_t L;
+    double D;
+  } T;
+  T.L = Bits;
+  return T.D;
+}
+
+/// BitsToFloat - This function takes a 32-bit integer and returns the bit
+/// equivalent float.
+inline float BitsToFloat(uint32_t Bits) {
+  union {
+    uint32_t I;
+    float F;
+  } T;
+  T.I = Bits;
+  return T.F;
+}
+
+/// DoubleToBits - This function takes a double and returns the bit
+/// equivalent 64-bit integer.  Note that copying doubles around
+/// changes the bits of NaNs on some hosts, notably x86, so this
+/// routine cannot be used if these bits are needed.
+inline uint64_t DoubleToBits(double Double) {
+  union {
+    uint64_t L;
+    double D;
+  } T;
+  T.D = Double;
+  return T.L;
+}
+
+/// FloatToBits - This function takes a float and returns the bit
+/// equivalent 32-bit integer.  Note that copying floats around
+/// changes the bits of NaNs on some hosts, notably x86, so this
+/// routine cannot be used if these bits are needed.
+inline uint32_t FloatToBits(float Float) {
+  union {
+    uint32_t I;
+    float F;
+  } T;
+  T.F = Float;
+  return T.I;
+}
+
+/// Platform-independent wrappers for the C99 isnan() function.
+int IsNAN(float f);
+int IsNAN(double d);
+
+/// Platform-independent wrappers for the C99 isinf() function.
+int IsInf(float f);
+int IsInf(double d);
+
+/// MinAlign - A and B are either alignments or offsets.  Return the minimum
+/// alignment that may be assumed after adding the two together.
+static inline uint64_t MinAlign(uint64_t A, uint64_t B) {
+  // The largest power of 2 that divides both A and B.
+  return (A | B) & -(A | B);
+}
+
+/// NextPowerOf2 - Returns the next power of two (in 64-bits)
+/// that is strictly greater than A.  Returns zero on overflow.
+static inline uint64_t NextPowerOf2(uint64_t A) {
+  A |= (A >> 1);
+  A |= (A >> 2);
+  A |= (A >> 4);
+  A |= (A >> 8);
+  A |= (A >> 16);
+  A |= (A >> 32);
+  return A + 1;
+}
+
+/// RoundUpToAlignment - Returns the next integer (mod 2**64) that is
+/// greater than or equal to \arg Value and is a multiple of \arg
+/// Align. Align must be non-zero.
+///
+/// Examples:
+/// RoundUpToAlignment(5, 8) = 8
+/// RoundUpToAlignment(17, 8) = 24
+/// RoundUpToAlignment(~0LL, 8) = 0
+inline uint64_t RoundUpToAlignment(uint64_t Value, uint64_t Align) {
+  return ((Value + Align - 1) / Align) * Align;
+}
+
+/// OffsetToAlignment - Return the offset to the next integer (mod 2**64) that
+/// is greater than or equal to \arg Value and is a multiple of \arg
+/// Align. Align must be non-zero.
+inline uint64_t OffsetToAlignment(uint64_t Value, uint64_t Align) {
+  return RoundUpToAlignment(Value, Align) - Value;
+}
+
+/// abs64 - absolute value of a 64-bit int.  Not all environments support
+/// "abs" on whatever their name for the 64-bit int type is.  The absolute
+/// value of the largest negative number is undefined, as with "abs".
+inline int64_t abs64(int64_t x) {
+  return (x < 0) ? -x : x;
+}
+
+/// SignExtend32 - Sign extend B-bit number x to 32-bit int.
+/// Usage int32_t r = SignExtend32<5>(x);
+template <unsigned B> inline int32_t SignExtend32(uint32_t x) {
+  return int32_t(x << (32 - B)) >> (32 - B);
+}
+
+/// SignExtend64 - Sign extend B-bit number x to 64-bit int.
+/// Usage int64_t r = SignExtend64<5>(x);
+template <unsigned B> inline int64_t SignExtend64(uint64_t x) {
+  return int64_t(x << (64 - B)) >> (64 - B);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/Memory.h b/final/include/llvm/Support/Memory.h
new file mode 100644
index 00000000000..9c3f85b958b
--- /dev/null
+++ b/final/include/llvm/Support/Memory.h
@@ -0,0 +1,96 @@
+//===- llvm/Support/Memory.h - Memory Support --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::Memory class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_MEMORY_H
+#define LLVM_SYSTEM_MEMORY_H
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+namespace llvm {
+namespace sys {
+
+  /// This class encapsulates the notion of a memory block which has an address
+  /// and a size. It is used by the Memory class (a friend) as the result of
+  /// various memory allocation operations.
+  /// @see Memory
+  /// @brief Memory block abstraction.
+  class MemoryBlock {
+  public:
+    MemoryBlock() : Address(0), Size(0) { }
+    MemoryBlock(void *addr, size_t size) : Address(addr), Size(size) { }
+    void *base() const { return Address; }
+    size_t size() const { return Size; }
+  private:
+    void *Address;    ///< Address of first byte of memory area
+    size_t Size;      ///< Size, in bytes of the memory area
+    friend class Memory;
+  };
+
+  /// This class provides various memory handling functions that manipulate
+  /// MemoryBlock instances.
+  /// @since 1.4
+  /// @brief An abstraction for memory operations.
+  class Memory {
+  public:
+    /// This method allocates a block of Read/Write/Execute memory that is
+    /// suitable for executing dynamically generated code (e.g. JIT). An
+    /// attempt to allocate \p NumBytes bytes of virtual memory is made.
+    /// \p NearBlock may point to an existing allocation in which case
+    /// an attempt is made to allocate more memory near the existing block.
+    ///
+    /// On success, this returns a non-null memory block, otherwise it returns
+    /// a null memory block and fills in *ErrMsg.
+    ///
+    /// @brief Allocate Read/Write/Execute memory.
+    static MemoryBlock AllocateRWX(size_t NumBytes,
+                                   const MemoryBlock *NearBlock,
+                                   std::string *ErrMsg = 0);
+
+    /// This method releases a block of Read/Write/Execute memory that was
+    /// allocated with the AllocateRWX method. It should not be used to
+    /// release any memory block allocated any other way.
+    ///
+    /// On success, this returns false, otherwise it returns true and fills
+    /// in *ErrMsg.
+    /// @brief Release Read/Write/Execute memory.
+    static bool ReleaseRWX(MemoryBlock &block, std::string *ErrMsg = 0);
+
+
+    /// InvalidateInstructionCache - Before the JIT can run a block of code
+    /// that has been emitted it must invalidate the instruction cache on some
+    /// platforms.
+    static void InvalidateInstructionCache(const void *Addr, size_t Len);
+
+    /// setExecutable - Before the JIT can run a block of code, it has to be
+    /// given read and executable privilege. Return true if it is already r-x
+    /// or the system is able to change its previlege.
+    static bool setExecutable (MemoryBlock &M, std::string *ErrMsg = 0);
+
+    /// setWritable - When adding to a block of code, the JIT may need
+    /// to mark a block of code as RW since the protections are on page
+    /// boundaries, and the JIT internal allocations are not page aligned.
+    static bool setWritable (MemoryBlock &M, std::string *ErrMsg = 0);
+
+    /// setRangeExecutable - Mark the page containing a range of addresses
+    /// as executable.
+    static bool setRangeExecutable(const void *Addr, size_t Size);
+
+    /// setRangeWritable - Mark the page containing a range of addresses
+    /// as writable.
+    static bool setRangeWritable(const void *Addr, size_t Size);
+  };
+}
+}
+
+#endif
diff --git a/final/include/llvm/Support/MemoryBuffer.h b/final/include/llvm/Support/MemoryBuffer.h
new file mode 100644
index 00000000000..b6243b7b10d
--- /dev/null
+++ b/final/include/llvm/Support/MemoryBuffer.h
@@ -0,0 +1,119 @@
+//===--- MemoryBuffer.h - Memory Buffer Interface ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the MemoryBuffer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MEMORYBUFFER_H
+#define LLVM_SUPPORT_MEMORYBUFFER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class error_code;
+template<class T> class OwningPtr;
+
+/// MemoryBuffer - This interface provides simple read-only access to a block
+/// of memory, and provides simple methods for reading files and standard input
+/// into a memory buffer.  In addition to basic access to the characters in the
+/// file, this interface guarantees you can read one character past the end of
+/// the file, and that this character will read as '\0'.
+///
+/// The '\0' guarantee is needed to support an optimization -- it's intended to
+/// be more efficient for clients which are reading all the data to stop
+/// reading when they encounter a '\0' than to continually check the file
+/// position to see if it has reached the end of the file.
+class MemoryBuffer {
+  const char *BufferStart; // Start of the buffer.
+  const char *BufferEnd;   // End of the buffer.
+
+  MemoryBuffer(const MemoryBuffer &); // DO NOT IMPLEMENT
+  MemoryBuffer &operator=(const MemoryBuffer &); // DO NOT IMPLEMENT
+protected:
+  MemoryBuffer() {}
+  void init(const char *BufStart, const char *BufEnd);
+public:
+  virtual ~MemoryBuffer();
+
+  const char *getBufferStart() const { return BufferStart; }
+  const char *getBufferEnd() const   { return BufferEnd; }
+  size_t getBufferSize() const { return BufferEnd-BufferStart; }
+
+  StringRef getBuffer() const {
+    return StringRef(BufferStart, getBufferSize());
+  }
+
+  /// getBufferIdentifier - Return an identifier for this buffer, typically the
+  /// filename it was read from.
+  virtual const char *getBufferIdentifier() const {
+    return "Unknown buffer";
+  }
+
+  /// getFile - Open the specified file as a MemoryBuffer, returning a new
+  /// MemoryBuffer if successful, otherwise returning null.  If FileSize is
+  /// specified, this means that the client knows that the file exists and that
+  /// it has the specified size.
+  static error_code getFile(StringRef Filename, OwningPtr<MemoryBuffer> &result,
+                            int64_t FileSize = -1);
+  static error_code getFile(const char *Filename,
+                            OwningPtr<MemoryBuffer> &result,
+                            int64_t FileSize = -1);
+
+  /// getOpenFile - Given an already-open file descriptor, read the file and
+  /// return a MemoryBuffer.
+  static error_code getOpenFile(int FD, const char *Filename,
+                                OwningPtr<MemoryBuffer> &result,
+                                int64_t FileSize = -1);
+
+  /// getMemBuffer - Open the specified memory range as a MemoryBuffer.  Note
+  /// that InputData must be null terminated.
+  static MemoryBuffer *getMemBuffer(StringRef InputData,
+                                    StringRef BufferName = "");
+
+  /// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
+  /// copying the contents and taking ownership of it.  InputData does not
+  /// have to be null terminated.
+  static MemoryBuffer *getMemBufferCopy(StringRef InputData,
+                                        StringRef BufferName = "");
+
+  /// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that
+  /// is completely initialized to zeros.  Note that the caller should
+  /// initialize the memory allocated by this method.  The memory is owned by
+  /// the MemoryBuffer object.
+  static MemoryBuffer *getNewMemBuffer(size_t Size, StringRef BufferName = "");
+
+  /// getNewUninitMemBuffer - Allocate a new MemoryBuffer of the specified size
+  /// that is not initialized.  Note that the caller should initialize the
+  /// memory allocated by this method.  The memory is owned by the MemoryBuffer
+  /// object.
+  static MemoryBuffer *getNewUninitMemBuffer(size_t Size,
+                                             StringRef BufferName = "");
+
+  /// getSTDIN - Read all of stdin into a file buffer, and return it.
+  /// If an error occurs, this returns null and sets ec.
+  static error_code getSTDIN(OwningPtr<MemoryBuffer> &result);
+
+
+  /// getFileOrSTDIN - Open the specified file as a MemoryBuffer, or open stdin
+  /// if the Filename is "-".  If an error occurs, this returns null and sets
+  /// ec.
+  static error_code getFileOrSTDIN(StringRef Filename,
+                                   OwningPtr<MemoryBuffer> &result,
+                                   int64_t FileSize = -1);
+  static error_code getFileOrSTDIN(const char *Filename,
+                                   OwningPtr<MemoryBuffer> &result,
+                                   int64_t FileSize = -1);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Support/MemoryObject.h b/final/include/llvm/Support/MemoryObject.h
new file mode 100644
index 00000000000..dec0f134b30
--- /dev/null
+++ b/final/include/llvm/Support/MemoryObject.h
@@ -0,0 +1,70 @@
+//===- MemoryObject.h - Abstract memory interface ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MEMORYOBJECT_H
+#define MEMORYOBJECT_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+/// MemoryObject - Abstract base class for contiguous addressable memory.
+///   Necessary for cases in which the memory is in another process, in a
+///   file, or on a remote machine.
+///   All size and offset parameters are uint64_ts, to allow 32-bit processes
+///   access to 64-bit address spaces.
+class MemoryObject {
+public:
+  /// Destructor      - Override as necessary.
+  virtual ~MemoryObject();
+  
+  /// getBase         - Returns the lowest valid address in the region.
+  ///
+  /// @result         - The lowest valid address.
+  virtual uint64_t getBase() const = 0;
+  
+  /// getExtent       - Returns the size of the region in bytes.  (The region is
+  ///                   contiguous, so the highest valid address of the region 
+  ///                   is getBase() + getExtent() - 1).
+  ///
+  /// @result         - The size of the region.
+  virtual uint64_t getExtent() const = 0;
+  
+  /// readByte        - Tries to read a single byte from the region.
+  ///
+  /// @param address  - The address of the byte, in the same space as getBase().
+  /// @param ptr      - A pointer to a byte to be filled in.  Must be non-NULL.
+  /// @result         - 0 if successful; -1 if not.  Failure may be due to a
+  ///                   bounds violation or an implementation-specific error.
+  virtual int readByte(uint64_t address, uint8_t* ptr) const = 0;
+  
+  /// readBytes       - Tries to read a contiguous range of bytes from the
+  ///                   region, up to the end of the region.
+  ///                   You should override this function if there is a quicker
+  ///                   way than going back and forth with individual bytes.
+  ///
+  /// @param address  - The address of the first byte, in the same space as 
+  ///                   getBase().
+  /// @param size     - The maximum number of bytes to copy.
+  /// @param buf      - A pointer to a buffer to be filled in.  Must be non-NULL
+  ///                   and large enough to hold size bytes.
+  /// @param copied   - A pointer to a nunber that is filled in with the number
+  ///                   of bytes actually read.  May be NULL.
+  /// @result         - 0 if successful; -1 if not.  Failure may be due to a
+  ///                   bounds violation or an implementation-specific error.
+  virtual int readBytes(uint64_t address,
+                        uint64_t size,
+                        uint8_t* buf,
+                        uint64_t* copied) const;
+};
+
+}
+
+#endif
+
diff --git a/final/include/llvm/Support/Mutex.h b/final/include/llvm/Support/Mutex.h
new file mode 100644
index 00000000000..42ea63060f6
--- /dev/null
+++ b/final/include/llvm/Support/Mutex.h
@@ -0,0 +1,154 @@
+//===- llvm/Support/Mutex.h - Mutex Operating System Concept -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_MUTEX_H
+#define LLVM_SYSTEM_MUTEX_H
+
+#include "llvm/Support/Threading.h"
+#include <cassert>
+
+namespace llvm
+{
+  namespace sys
+  {
+    /// @brief Platform agnostic Mutex class.
+    class MutexImpl
+    {
+    /// @name Constructors
+    /// @{
+    public:
+
+      /// Initializes the lock but doesn't acquire it. if \p recursive is set
+      /// to false, the lock will not be recursive which makes it cheaper but
+      /// also more likely to deadlock (same thread can't acquire more than
+      /// once).
+      /// @brief Default Constructor.
+      explicit MutexImpl(bool recursive = true);
+
+      /// Releases and removes the lock
+      /// @brief Destructor
+      ~MutexImpl();
+
+    /// @}
+    /// @name Methods
+    /// @{
+    public:
+
+      /// Attempts to unconditionally acquire the lock. If the lock is held by
+      /// another thread, this method will wait until it can acquire the lock.
+      /// @returns false if any kind of error occurs, true otherwise.
+      /// @brief Unconditionally acquire the lock.
+      bool acquire();
+
+      /// Attempts to release the lock. If the lock is held by the current
+      /// thread, the lock is released allowing other threads to acquire the
+      /// lock.
+      /// @returns false if any kind of error occurs, true otherwise.
+      /// @brief Unconditionally release the lock.
+      bool release();
+
+      /// Attempts to acquire the lock without blocking. If the lock is not
+      /// available, this function returns false quickly (without blocking). If
+      /// the lock is available, it is acquired.
+      /// @returns false if any kind of error occurs or the lock is not
+      /// available, true otherwise.
+      /// @brief Try to acquire the lock.
+      bool tryacquire();
+
+    //@}
+    /// @name Platform Dependent Data
+    /// @{
+    private:
+      void* data_; ///< We don't know what the data will be
+
+    /// @}
+    /// @name Do Not Implement
+    /// @{
+    private:
+      MutexImpl(const MutexImpl & original);
+      void operator=(const MutexImpl &);
+    /// @}
+    };
+
+
+    /// SmartMutex - A mutex with a compile time constant parameter that
+    /// indicates whether this mutex should become a no-op when we're not
+    /// running in multithreaded mode.
+    template<bool mt_only>
+    class SmartMutex : public MutexImpl {
+      unsigned acquired;
+      bool recursive;
+    public:
+      explicit SmartMutex(bool rec = true) :
+        MutexImpl(rec), acquired(0), recursive(rec) { }
+
+      bool acquire() {
+        if (!mt_only || llvm_is_multithreaded()) {
+          return MutexImpl::acquire();
+        } else {
+          // Single-threaded debugging code.  This would be racy in
+          // multithreaded mode, but provides not sanity checks in single
+          // threaded mode.
+          assert((recursive || acquired == 0) && "Lock already acquired!!");
+          ++acquired;
+          return true;
+        }
+      }
+
+      bool release() {
+        if (!mt_only || llvm_is_multithreaded()) {
+          return MutexImpl::release();
+        } else {
+          // Single-threaded debugging code.  This would be racy in
+          // multithreaded mode, but provides not sanity checks in single
+          // threaded mode.
+          assert(((recursive && acquired) || (acquired == 1)) &&
+                 "Lock not acquired before release!");
+          --acquired;
+          return true;
+        }
+      }
+
+      bool tryacquire() {
+        if (!mt_only || llvm_is_multithreaded())
+          return MutexImpl::tryacquire();
+        else return true;
+      }
+
+      private:
+        SmartMutex(const SmartMutex<mt_only> & original);
+        void operator=(const SmartMutex<mt_only> &);
+    };
+
+    /// Mutex - A standard, always enforced mutex.
+    typedef SmartMutex<false> Mutex;
+
+    template<bool mt_only>
+    class SmartScopedLock  {
+      SmartMutex<mt_only>& mtx;
+
+    public:
+      SmartScopedLock(SmartMutex<mt_only>& m) : mtx(m) {
+        mtx.acquire();
+      }
+
+      ~SmartScopedLock() {
+        mtx.release();
+      }
+    };
+
+    typedef SmartScopedLock<false> ScopedLock;
+  }
+}
+
+#endif
diff --git a/final/include/llvm/Support/MutexGuard.h b/final/include/llvm/Support/MutexGuard.h
new file mode 100644
index 00000000000..cd13bfe6eeb
--- /dev/null
+++ b/final/include/llvm/Support/MutexGuard.h
@@ -0,0 +1,41 @@
+//===-- Support/MutexGuard.h - Acquire/Release Mutex In Scope ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a guard for a block of code that ensures a Mutex is locked
+// upon construction and released upon destruction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MUTEXGUARD_H
+#define LLVM_SUPPORT_MUTEXGUARD_H
+
+#include "llvm/Support/Mutex.h"
+
+namespace llvm {
+  /// Instances of this class acquire a given Mutex Lock when constructed and
+  /// hold that lock until destruction. The intention is to instantiate one of
+  /// these on the stack at the top of some scope to be assured that C++
+  /// destruction of the object will always release the Mutex and thus avoid
+  /// a host of nasty multi-threading problems in the face of exceptions, etc.
+  /// @brief Guard a section of code with a Mutex.
+  class MutexGuard {
+    sys::Mutex &M;
+    MutexGuard(const MutexGuard &);    // DO NOT IMPLEMENT
+    void operator=(const MutexGuard &); // DO NOT IMPLEMENT
+  public:
+    MutexGuard(sys::Mutex &m) : M(m) { M.acquire(); }
+    ~MutexGuard() { M.release(); }
+    /// holds - Returns true if this locker instance holds the specified lock.
+    /// This is mostly used in assertions to validate that the correct mutex
+    /// is held.
+    bool holds(const sys::Mutex& lock) const { return &M == &lock; }
+  };
+}
+
+#endif // LLVM_SUPPORT_MUTEXGUARD_H
diff --git a/final/include/llvm/Support/NoFolder.h b/final/include/llvm/Support/NoFolder.h
new file mode 100644
index 00000000000..92a9fd695e5
--- /dev/null
+++ b/final/include/llvm/Support/NoFolder.h
@@ -0,0 +1,288 @@
+//======-- llvm/Support/NoFolder.h - Constant folding helper -*- C++ -*-======//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the NoFolder class, a helper for IRBuilder.  It provides
+// IRBuilder with a set of methods for creating unfolded constants.  This is
+// useful for learners trying to understand how LLVM IR works, and who don't
+// want details to be hidden by the constant folder.  For general constant
+// creation and folding, use ConstantExpr and the routines in
+// llvm/Analysis/ConstantFolding.h.
+//
+// Note: since it is not actually possible to create unfolded constants, this
+// class returns instructions rather than constants.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_NOFOLDER_H
+#define LLVM_SUPPORT_NOFOLDER_H
+
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+
+namespace llvm {
+
+class LLVMContext;
+
+/// NoFolder - Create "constants" (actually, instructions) with no folding.
+class NoFolder {
+public:
+  explicit NoFolder(LLVMContext &) {}
+
+  //===--------------------------------------------------------------------===//
+  // Binary Operators
+  //===--------------------------------------------------------------------===//
+
+  Instruction *CreateAdd(Constant *LHS, Constant *RHS,
+                         bool HasNUW = false, bool HasNSW = false) const {
+    BinaryOperator *BO = BinaryOperator::CreateAdd(LHS, RHS);
+    if (HasNUW) BO->setHasNoUnsignedWrap();
+    if (HasNSW) BO->setHasNoSignedWrap();
+    return BO;
+  }
+  Instruction *CreateNSWAdd(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateNSWAdd(LHS, RHS);
+  }
+  Instruction *CreateNUWAdd(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateNUWAdd(LHS, RHS);
+  }
+  Instruction *CreateFAdd(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateFAdd(LHS, RHS);
+  }
+  Instruction *CreateSub(Constant *LHS, Constant *RHS,
+                         bool HasNUW = false, bool HasNSW = false) const {
+    BinaryOperator *BO = BinaryOperator::CreateSub(LHS, RHS);
+    if (HasNUW) BO->setHasNoUnsignedWrap();
+    if (HasNSW) BO->setHasNoSignedWrap();
+    return BO;
+  }
+  Instruction *CreateNSWSub(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateNSWSub(LHS, RHS);
+  }
+  Instruction *CreateNUWSub(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateNUWSub(LHS, RHS);
+  }
+  Instruction *CreateFSub(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateFSub(LHS, RHS);
+  }
+  Instruction *CreateMul(Constant *LHS, Constant *RHS,
+                         bool HasNUW = false, bool HasNSW = false) const {
+    BinaryOperator *BO = BinaryOperator::CreateMul(LHS, RHS);
+    if (HasNUW) BO->setHasNoUnsignedWrap();
+    if (HasNSW) BO->setHasNoSignedWrap();
+    return BO;
+  }
+  Instruction *CreateNSWMul(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateNSWMul(LHS, RHS);
+  }
+  Instruction *CreateNUWMul(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateNUWMul(LHS, RHS);
+  }
+  Instruction *CreateFMul(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateFMul(LHS, RHS);
+  }
+  Instruction *CreateUDiv(Constant *LHS, Constant *RHS,
+                          bool isExact = false) const {
+    if (!isExact)
+      return BinaryOperator::CreateUDiv(LHS, RHS);
+    return BinaryOperator::CreateExactUDiv(LHS, RHS);
+  }
+  Instruction *CreateExactUDiv(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateExactUDiv(LHS, RHS);
+  }
+  Instruction *CreateSDiv(Constant *LHS, Constant *RHS,
+                          bool isExact = false) const {
+    if (!isExact)
+      return BinaryOperator::CreateSDiv(LHS, RHS);
+    return BinaryOperator::CreateExactSDiv(LHS, RHS);
+  }
+  Instruction *CreateExactSDiv(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateExactSDiv(LHS, RHS);
+  }
+  Instruction *CreateFDiv(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateFDiv(LHS, RHS);
+  }
+  Instruction *CreateURem(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateURem(LHS, RHS);
+  }
+  Instruction *CreateSRem(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateSRem(LHS, RHS);
+  }
+  Instruction *CreateFRem(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateFRem(LHS, RHS);
+  }
+  Instruction *CreateShl(Constant *LHS, Constant *RHS, bool HasNUW = false,
+                         bool HasNSW = false) const {
+    BinaryOperator *BO = BinaryOperator::CreateShl(LHS, RHS);
+    if (HasNUW) BO->setHasNoUnsignedWrap();
+    if (HasNSW) BO->setHasNoSignedWrap();
+    return BO;
+  }
+  Instruction *CreateLShr(Constant *LHS, Constant *RHS,
+                          bool isExact = false) const {
+    if (!isExact)
+      return BinaryOperator::CreateLShr(LHS, RHS);
+    return BinaryOperator::CreateExactLShr(LHS, RHS);
+  }
+  Instruction *CreateAShr(Constant *LHS, Constant *RHS,
+                          bool isExact = false) const {
+    if (!isExact)
+      return BinaryOperator::CreateAShr(LHS, RHS);
+    return BinaryOperator::CreateExactAShr(LHS, RHS);
+  }
+  Instruction *CreateAnd(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateAnd(LHS, RHS);
+  }
+  Instruction *CreateOr(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateOr(LHS, RHS);
+  }
+  Instruction *CreateXor(Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::CreateXor(LHS, RHS);
+  }
+
+  Instruction *CreateBinOp(Instruction::BinaryOps Opc,
+                           Constant *LHS, Constant *RHS) const {
+    return BinaryOperator::Create(Opc, LHS, RHS);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Unary Operators
+  //===--------------------------------------------------------------------===//
+
+  Instruction *CreateNeg(Constant *C,
+                         bool HasNUW = false, bool HasNSW = false) const {
+    BinaryOperator *BO = BinaryOperator::CreateNeg(C);
+    if (HasNUW) BO->setHasNoUnsignedWrap();
+    if (HasNSW) BO->setHasNoSignedWrap();
+    return BO;
+  }
+  Instruction *CreateNSWNeg(Constant *C) const {
+    return BinaryOperator::CreateNSWNeg(C);
+  }
+  Instruction *CreateNUWNeg(Constant *C) const {
+    return BinaryOperator::CreateNUWNeg(C);
+  }
+  Instruction *CreateFNeg(Constant *C) const {
+    return BinaryOperator::CreateFNeg(C);
+  }
+  Instruction *CreateNot(Constant *C) const {
+    return BinaryOperator::CreateNot(C);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Memory Instructions
+  //===--------------------------------------------------------------------===//
+
+  Constant *CreateGetElementPtr(Constant *C, Constant* const *IdxList,
+                                unsigned NumIdx) const {
+    return ConstantExpr::getGetElementPtr(C, IdxList, NumIdx);
+  }
+  Instruction *CreateGetElementPtr(Constant *C, Value* const *IdxList,
+                                   unsigned NumIdx) const {
+    return GetElementPtrInst::Create(C, IdxList, IdxList+NumIdx);
+  }
+
+  Constant *CreateInBoundsGetElementPtr(Constant *C, Constant* const *IdxList,
+                                        unsigned NumIdx) const {
+    return ConstantExpr::getInBoundsGetElementPtr(C, IdxList, NumIdx);
+  }
+  Instruction *CreateInBoundsGetElementPtr(Constant *C, Value* const *IdxList,
+                                           unsigned NumIdx) const {
+    return GetElementPtrInst::CreateInBounds(C, IdxList, IdxList+NumIdx);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Cast/Conversion Operators
+  //===--------------------------------------------------------------------===//
+
+  Instruction *CreateCast(Instruction::CastOps Op, Constant *C,
+                    const Type *DestTy) const {
+    return CastInst::Create(Op, C, DestTy);
+  }
+  Instruction *CreatePointerCast(Constant *C, const Type *DestTy) const {
+    return CastInst::CreatePointerCast(C, DestTy);
+  }
+  Instruction *CreateIntCast(Constant *C, const Type *DestTy,
+                       bool isSigned) const {
+    return CastInst::CreateIntegerCast(C, DestTy, isSigned);
+  }
+  Instruction *CreateFPCast(Constant *C, const Type *DestTy) const {
+    return CastInst::CreateFPCast(C, DestTy);
+  }
+
+  Instruction *CreateBitCast(Constant *C, const Type *DestTy) const {
+    return CreateCast(Instruction::BitCast, C, DestTy);
+  }
+  Instruction *CreateIntToPtr(Constant *C, const Type *DestTy) const {
+    return CreateCast(Instruction::IntToPtr, C, DestTy);
+  }
+  Instruction *CreatePtrToInt(Constant *C, const Type *DestTy) const {
+    return CreateCast(Instruction::PtrToInt, C, DestTy);
+  }
+  Instruction *CreateZExtOrBitCast(Constant *C, const Type *DestTy) const {
+    return CastInst::CreateZExtOrBitCast(C, DestTy);
+  }
+  Instruction *CreateSExtOrBitCast(Constant *C, const Type *DestTy) const {
+    return CastInst::CreateSExtOrBitCast(C, DestTy);
+  }
+
+  Instruction *CreateTruncOrBitCast(Constant *C, const Type *DestTy) const {
+    return CastInst::CreateTruncOrBitCast(C, DestTy);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Compare Instructions
+  //===--------------------------------------------------------------------===//
+
+  Instruction *CreateICmp(CmpInst::Predicate P,
+                          Constant *LHS, Constant *RHS) const {
+    return new ICmpInst(P, LHS, RHS);
+  }
+  Instruction *CreateFCmp(CmpInst::Predicate P,
+                          Constant *LHS, Constant *RHS) const {
+    return new FCmpInst(P, LHS, RHS);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Other Instructions
+  //===--------------------------------------------------------------------===//
+
+  Instruction *CreateSelect(Constant *C,
+                            Constant *True, Constant *False) const {
+    return SelectInst::Create(C, True, False);
+  }
+
+  Instruction *CreateExtractElement(Constant *Vec, Constant *Idx) const {
+    return ExtractElementInst::Create(Vec, Idx);
+  }
+
+  Instruction *CreateInsertElement(Constant *Vec, Constant *NewElt,
+                                   Constant *Idx) const {
+    return InsertElementInst::Create(Vec, NewElt, Idx);
+  }
+
+  Instruction *CreateShuffleVector(Constant *V1, Constant *V2,
+                                   Constant *Mask) const {
+    return new ShuffleVectorInst(V1, V2, Mask);
+  }
+
+  Instruction *CreateExtractValue(Constant *Agg, const unsigned *IdxList,
+                                  unsigned NumIdx) const {
+    return ExtractValueInst::Create(Agg, IdxList, IdxList+NumIdx);
+  }
+
+  Instruction *CreateInsertValue(Constant *Agg, Constant *Val,
+                                 const unsigned *IdxList,
+                                 unsigned NumIdx) const {
+    return InsertValueInst::Create(Agg, Val, IdxList, IdxList+NumIdx);
+  }
+};
+
+}
+
+#endif
diff --git a/final/include/llvm/Support/OutputBuffer.h b/final/include/llvm/Support/OutputBuffer.h
new file mode 100644
index 00000000000..6b98e99e28e
--- /dev/null
+++ b/final/include/llvm/Support/OutputBuffer.h
@@ -0,0 +1,166 @@
+//=== OutputBuffer.h - Output Buffer ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods to output values to a data buffer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_OUTPUTBUFFER_H
+#define LLVM_SUPPORT_OUTPUTBUFFER_H
+
+#include <cassert>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+  class OutputBuffer {
+    /// Output buffer.
+    std::vector<unsigned char> &Output;
+
+    /// is64Bit/isLittleEndian - This information is inferred from the target
+    /// machine directly, indicating what header values and flags to set.
+    bool is64Bit, isLittleEndian;
+  public:
+    OutputBuffer(std::vector<unsigned char> &Out,
+                 bool is64bit, bool le)
+      : Output(Out), is64Bit(is64bit), isLittleEndian(le) {}
+
+    // align - Emit padding into the file until the current output position is
+    // aligned to the specified power of two boundary.
+    void align(unsigned Boundary) {
+      assert(Boundary && (Boundary & (Boundary - 1)) == 0 &&
+             "Must align to 2^k boundary");
+      size_t Size = Output.size();
+
+      if (Size & (Boundary - 1)) {
+        // Add padding to get alignment to the correct place.
+        size_t Pad = Boundary - (Size & (Boundary - 1));
+        Output.resize(Size + Pad);
+      }
+    }
+
+    //===------------------------------------------------------------------===//
+    // Out Functions - Output the specified value to the data buffer.
+
+    void outbyte(unsigned char X) {
+      Output.push_back(X);
+    }
+    void outhalf(unsigned short X) {
+      if (isLittleEndian) {
+        Output.push_back(X & 255);
+        Output.push_back(X >> 8);
+      } else {
+        Output.push_back(X >> 8);
+        Output.push_back(X & 255);
+      }
+    }
+    void outword(unsigned X) {
+      if (isLittleEndian) {
+        Output.push_back((X >>  0) & 255);
+        Output.push_back((X >>  8) & 255);
+        Output.push_back((X >> 16) & 255);
+        Output.push_back((X >> 24) & 255);
+      } else {
+        Output.push_back((X >> 24) & 255);
+        Output.push_back((X >> 16) & 255);
+        Output.push_back((X >>  8) & 255);
+        Output.push_back((X >>  0) & 255);
+      }
+    }
+    void outxword(uint64_t X) {
+      if (isLittleEndian) {
+        Output.push_back(unsigned(X >>  0) & 255);
+        Output.push_back(unsigned(X >>  8) & 255);
+        Output.push_back(unsigned(X >> 16) & 255);
+        Output.push_back(unsigned(X >> 24) & 255);
+        Output.push_back(unsigned(X >> 32) & 255);
+        Output.push_back(unsigned(X >> 40) & 255);
+        Output.push_back(unsigned(X >> 48) & 255);
+        Output.push_back(unsigned(X >> 56) & 255);
+      } else {
+        Output.push_back(unsigned(X >> 56) & 255);
+        Output.push_back(unsigned(X >> 48) & 255);
+        Output.push_back(unsigned(X >> 40) & 255);
+        Output.push_back(unsigned(X >> 32) & 255);
+        Output.push_back(unsigned(X >> 24) & 255);
+        Output.push_back(unsigned(X >> 16) & 255);
+        Output.push_back(unsigned(X >>  8) & 255);
+        Output.push_back(unsigned(X >>  0) & 255);
+      }
+    }
+    void outaddr32(unsigned X) {
+      outword(X);
+    }
+    void outaddr64(uint64_t X) {
+      outxword(X);
+    }
+    void outaddr(uint64_t X) {
+      if (!is64Bit)
+        outword((unsigned)X);
+      else
+        outxword(X);
+    }
+    void outstring(const std::string &S, unsigned Length) {
+      unsigned len_to_copy = static_cast<unsigned>(S.length()) < Length
+        ? static_cast<unsigned>(S.length()) : Length;
+      unsigned len_to_fill = static_cast<unsigned>(S.length()) < Length
+        ? Length - static_cast<unsigned>(S.length()) : 0;
+
+      for (unsigned i = 0; i < len_to_copy; ++i)
+        outbyte(S[i]);
+
+      for (unsigned i = 0; i < len_to_fill; ++i)
+        outbyte(0);
+    }
+
+    //===------------------------------------------------------------------===//
+    // Fix Functions - Replace an existing entry at an offset.
+
+    void fixhalf(unsigned short X, unsigned Offset) {
+      unsigned char *P = &Output[Offset];
+      P[0] = (X >> (isLittleEndian ?  0 : 8)) & 255;
+      P[1] = (X >> (isLittleEndian ?  8 : 0)) & 255;
+    }
+    void fixword(unsigned X, unsigned Offset) {
+      unsigned char *P = &Output[Offset];
+      P[0] = (X >> (isLittleEndian ?  0 : 24)) & 255;
+      P[1] = (X >> (isLittleEndian ?  8 : 16)) & 255;
+      P[2] = (X >> (isLittleEndian ? 16 :  8)) & 255;
+      P[3] = (X >> (isLittleEndian ? 24 :  0)) & 255;
+    }
+    void fixxword(uint64_t X, unsigned Offset) {
+      unsigned char *P = &Output[Offset];
+      P[0] = (X >> (isLittleEndian ?  0 : 56)) & 255;
+      P[1] = (X >> (isLittleEndian ?  8 : 48)) & 255;
+      P[2] = (X >> (isLittleEndian ? 16 : 40)) & 255;
+      P[3] = (X >> (isLittleEndian ? 24 : 32)) & 255;
+      P[4] = (X >> (isLittleEndian ? 32 : 24)) & 255;
+      P[5] = (X >> (isLittleEndian ? 40 : 16)) & 255;
+      P[6] = (X >> (isLittleEndian ? 48 :  8)) & 255;
+      P[7] = (X >> (isLittleEndian ? 56 :  0)) & 255;
+    }
+    void fixaddr(uint64_t X, unsigned Offset) {
+      if (!is64Bit)
+        fixword((unsigned)X, Offset);
+      else
+        fixxword(X, Offset);
+    }
+
+    unsigned char &operator[](unsigned Index) {
+      return Output[Index];
+    }
+    const unsigned char &operator[](unsigned Index) const {
+      return Output[Index];
+    }
+  };
+
+} // end llvm namespace
+
+#endif // LLVM_SUPPORT_OUTPUTBUFFER_H
diff --git a/final/include/llvm/Support/PassNameParser.h b/final/include/llvm/Support/PassNameParser.h
new file mode 100644
index 00000000000..a24a6f0c5e9
--- /dev/null
+++ b/final/include/llvm/Support/PassNameParser.h
@@ -0,0 +1,137 @@
+//===- llvm/Support/PassNameParser.h ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file the PassNameParser and FilteredPassNameParser<> classes, which are
+// used to add command line arguments to a utility for all of the passes that
+// have been registered into the system.
+//
+// The PassNameParser class adds ALL passes linked into the system (that are
+// creatable) as command line arguments to the tool (when instantiated with the
+// appropriate command line option template).  The FilteredPassNameParser<>
+// template is used for the same purposes as PassNameParser, except that it only
+// includes passes that have a PassType that are compatible with the filter
+// (which is the template argument).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PASS_NAME_PARSER_H
+#define LLVM_SUPPORT_PASS_NAME_PARSER_H
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// PassNameParser class - Make use of the pass registration mechanism to
+// automatically add a command line argument to opt for each pass.
+//
+class PassNameParser : public PassRegistrationListener,
+                       public cl::parser<const PassInfo*> {
+  cl::Option *Opt;
+public:
+  PassNameParser() : Opt(0) {}
+  virtual ~PassNameParser();
+
+  void initialize(cl::Option &O) {
+    Opt = &O;
+    cl::parser<const PassInfo*>::initialize(O);
+
+    // Add all of the passes to the map that got initialized before 'this' did.
+    enumeratePasses();
+  }
+
+  // ignorablePassImpl - Can be overriden in subclasses to refine the list of
+  // which passes we want to include.
+  //
+  virtual bool ignorablePassImpl(const PassInfo *P) const { return false; }
+
+  inline bool ignorablePass(const PassInfo *P) const {
+    // Ignore non-selectable and non-constructible passes!  Ignore
+    // non-optimizations.
+    return P->getPassArgument() == 0 || *P->getPassArgument() == 0 ||
+           P->getNormalCtor() == 0 || ignorablePassImpl(P);
+  }
+
+  // Implement the PassRegistrationListener callbacks used to populate our map
+  //
+  virtual void passRegistered(const PassInfo *P) {
+    if (ignorablePass(P) || !Opt) return;
+    if (findOption(P->getPassArgument()) != getNumOptions()) {
+      errs() << "Two passes with the same argument (-"
+           << P->getPassArgument() << ") attempted to be registered!\n";
+      llvm_unreachable(0);
+    }
+    addLiteralOption(P->getPassArgument(), P, P->getPassName());
+  }
+  virtual void passEnumerate(const PassInfo *P) { passRegistered(P); }
+
+  // printOptionInfo - Print out information about this option.  Override the
+  // default implementation to sort the table before we print...
+  virtual void printOptionInfo(const cl::Option &O, size_t GlobalWidth) const {
+    PassNameParser *PNP = const_cast<PassNameParser*>(this);
+    array_pod_sort(PNP->Values.begin(), PNP->Values.end(), ValLessThan);
+    cl::parser<const PassInfo*>::printOptionInfo(O, GlobalWidth);
+  }
+
+private:
+  // ValLessThan - Provide a sorting comparator for Values elements...
+  static int ValLessThan(const void *VT1, const void *VT2) {
+    typedef PassNameParser::OptionInfo ValType;
+    return std::strcmp(static_cast<const ValType *>(VT1)->Name,
+                       static_cast<const ValType *>(VT2)->Name);
+  }
+};
+
+///===----------------------------------------------------------------------===//
+/// FilteredPassNameParser class - Make use of the pass registration
+/// mechanism to automatically add a command line argument to opt for
+/// each pass that satisfies a filter criteria.  Filter should return
+/// true for passes to be registered as command-line options.
+///
+template<typename Filter>
+class FilteredPassNameParser : public PassNameParser {
+private:
+  Filter filter;
+
+public:
+  bool ignorablePassImpl(const PassInfo *P) const { return !filter(*P); }
+};
+
+///===----------------------------------------------------------------------===//
+/// PassArgFilter - A filter for use with PassNameFilterParser that only
+/// accepts a Pass whose Arg matches certain strings.
+///
+/// Use like this:
+///
+/// extern const char AllowedPassArgs[] = "-anders_aa -dse";
+///
+/// static cl::list<
+///   const PassInfo*,
+///   bool,
+///   FilteredPassNameParser<PassArgFilter<AllowedPassArgs> > >
+/// PassList(cl::desc("Passes available:"));
+///
+/// Only the -anders_aa and -dse options will be available to the user.
+///
+template<const char *Args>
+class PassArgFilter {
+public:
+  bool operator()(const PassInfo &P) const {
+    return(std::strstr(Args, P.getPassArgument()));
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/Path.h b/final/include/llvm/Support/Path.h
new file mode 100644
index 00000000000..196eecce818
--- /dev/null
+++ b/final/include/llvm/Support/Path.h
@@ -0,0 +1,16 @@
+//===- llvm/Support/Path.h - Path Operating System Concept ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file currently includes both PathV1 and PathV2 to facilitate moving
+// clients over to the new interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/PathV1.h"
+#include "llvm/Support/PathV2.h"
diff --git a/final/include/llvm/Support/PathV1.h b/final/include/llvm/Support/PathV1.h
new file mode 100644
index 00000000000..d7753a3e71e
--- /dev/null
+++ b/final/include/llvm/Support/PathV1.h
@@ -0,0 +1,755 @@
+//===- llvm/Support/PathV1.h - Path Operating System Concept ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::Path class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_PATH_H
+#define LLVM_SYSTEM_PATH_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/TimeValue.h"
+#include <set>
+#include <string>
+#include <vector>
+
+#define LLVM_PATH_DEPRECATED_MSG(replacement) \
+  "PathV1 has been deprecated and will be removed as soon as all LLVM and" \
+  " Clang clients have been moved over to PathV2. Please use `" #replacement \
+  "` from PathV2 instead."
+
+namespace llvm {
+namespace sys {
+
+  /// This structure provides basic file system information about a file. It
+  /// is patterned after the stat(2) Unix operating system call but made
+  /// platform independent and eliminates many of the unix-specific fields.
+  /// However, to support llvm-ar, the mode, user, and group fields are
+  /// retained. These pertain to unix security and may not have a meaningful
+  /// value on non-Unix platforms. However, the other fields should
+  /// always be applicable on all platforms.  The structure is filled in by
+  /// the PathWithStatus class.
+  /// @brief File status structure
+  class FileStatus {
+  public:
+    uint64_t    fileSize;   ///< Size of the file in bytes
+    TimeValue   modTime;    ///< Time of file's modification
+    uint32_t    mode;       ///< Mode of the file, if applicable
+    uint32_t    user;       ///< User ID of owner, if applicable
+    uint32_t    group;      ///< Group ID of owner, if applicable
+    uint64_t    uniqueID;   ///< A number to uniquely ID this file
+    bool        isDir  : 1; ///< True if this is a directory.
+    bool        isFile : 1; ///< True if this is a file.
+
+    FileStatus() : fileSize(0), modTime(0,0), mode(0777), user(999),
+                   group(999), uniqueID(0), isDir(false), isFile(false) { }
+
+    TimeValue getTimestamp() const { return modTime; }
+    uint64_t getSize() const { return fileSize; }
+    uint32_t getMode() const { return mode; }
+    uint32_t getUser() const { return user; }
+    uint32_t getGroup() const { return group; }
+    uint64_t getUniqueID() const { return uniqueID; }
+  };
+
+  /// This class provides an abstraction for the path to a file or directory
+  /// in the operating system's filesystem and provides various basic operations
+  /// on it.  Note that this class only represents the name of a path to a file
+  /// or directory which may or may not be valid for a given machine's file
+  /// system. The class is patterned after the java.io.File class with various
+  /// extensions and several omissions (not relevant to LLVM).  A Path object
+  /// ensures that the path it encapsulates is syntactically valid for the
+  /// operating system it is running on but does not ensure correctness for
+  /// any particular file system. That is, a syntactically valid path might
+  /// specify path components that do not exist in the file system and using
+  /// such a Path to act on the file system could produce errors. There is one
+  /// invalid Path value which is permitted: the empty path.  The class should
+  /// never allow a syntactically invalid non-empty path name to be assigned.
+  /// Empty paths are required in order to indicate an error result in some
+  /// situations. If the path is empty, the isValid operation will return
+  /// false. All operations will fail if isValid is false. Operations that
+  /// change the path will either return false if it would cause a syntactically
+  /// invalid path name (in which case the Path object is left unchanged) or
+  /// throw an std::string exception indicating the error. The methods are
+  /// grouped into four basic categories: Path Accessors (provide information
+  /// about the path without accessing disk), Disk Accessors (provide
+  /// information about the underlying file or directory), Path Mutators
+  /// (change the path information, not the disk), and Disk Mutators (change
+  /// the disk file/directory referenced by the path). The Disk Mutator methods
+  /// all have the word "disk" embedded in their method name to reinforce the
+  /// notion that the operation modifies the file system.
+  /// @since 1.4
+  /// @brief An abstraction for operating system paths.
+  class Path {
+    /// @name Constructors
+    /// @{
+    public:
+      /// Construct a path to the root directory of the file system. The root
+      /// directory is a top level directory above which there are no more
+      /// directories. For example, on UNIX, the root directory is /. On Windows
+      /// it is file:///. Other operating systems may have different notions of
+      /// what the root directory is or none at all. In that case, a consistent
+      /// default root directory will be used.
+      LLVM_ATTRIBUTE_DEPRECATED(static Path GetRootDirectory(),
+        LLVM_PATH_DEPRECATED_MSG(NOTHING));
+
+      /// Construct a path to a unique temporary directory that is created in
+      /// a "standard" place for the operating system. The directory is
+      /// guaranteed to be created on exit from this function. If the directory
+      /// cannot be created, the function will throw an exception.
+      /// @returns an invalid path (empty) on error
+      /// @param ErrMsg Optional place for an error message if an error occurs
+      /// @brief Construct a path to an new, unique, existing temporary
+      /// directory.
+      static Path GetTemporaryDirectory(std::string* ErrMsg = 0);
+
+      /// Construct a vector of sys::Path that contains the "standard" system
+      /// library paths suitable for linking into programs.
+      /// @brief Construct a path to the system library directory
+      static void GetSystemLibraryPaths(std::vector<sys::Path>& Paths);
+
+      /// Construct a vector of sys::Path that contains the "standard" bitcode
+      /// library paths suitable for linking into an llvm program. This function
+      /// *must* return the value of LLVM_LIB_SEARCH_PATH as well as the value
+      /// of LLVM_LIBDIR. It also must provide the System library paths as
+      /// returned by GetSystemLibraryPaths.
+      /// @see GetSystemLibraryPaths
+      /// @brief Construct a list of directories in which bitcode could be
+      /// found.
+      static void GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths);
+
+      /// Find the path to a library using its short name. Use the system
+      /// dependent library paths to locate the library.
+      /// @brief Find a library.
+      static Path FindLibrary(std::string& short_name);
+
+      /// Construct a path to the default LLVM configuration directory. The
+      /// implementation must ensure that this is a well-known (same on many
+      /// systems) directory in which llvm configuration files exist. For
+      /// example, on Unix, the /etc/llvm directory has been selected.
+      /// @brief Construct a path to the default LLVM configuration directory
+      static Path GetLLVMDefaultConfigDir();
+
+      /// Construct a path to the LLVM installed configuration directory. The
+      /// implementation must ensure that this refers to the "etc" directory of
+      /// the LLVM installation. This is the location where configuration files
+      /// will be located for a particular installation of LLVM on a machine.
+      /// @brief Construct a path to the LLVM installed configuration directory
+      static Path GetLLVMConfigDir();
+
+      /// Construct a path to the current user's home directory. The
+      /// implementation must use an operating system specific mechanism for
+      /// determining the user's home directory. For example, the environment
+      /// variable "HOME" could be used on Unix. If a given operating system
+      /// does not have the concept of a user's home directory, this static
+      /// constructor must provide the same result as GetRootDirectory.
+      /// @brief Construct a path to the current user's "home" directory
+      static Path GetUserHomeDirectory();
+
+      /// Construct a path to the current directory for the current process.
+      /// @returns The current working directory.
+      /// @brief Returns the current working directory.
+      static Path GetCurrentDirectory();
+
+      /// Return the suffix commonly used on file names that contain an
+      /// executable.
+      /// @returns The executable file suffix for the current platform.
+      /// @brief Return the executable file suffix.
+      static StringRef GetEXESuffix();
+
+      /// Return the suffix commonly used on file names that contain a shared
+      /// object, shared archive, or dynamic link library. Such files are
+      /// linked at runtime into a process and their code images are shared
+      /// between processes.
+      /// @returns The dynamic link library suffix for the current platform.
+      /// @brief Return the dynamic link library suffix.
+      static StringRef GetDLLSuffix();
+
+      /// GetMainExecutable - Return the path to the main executable, given the
+      /// value of argv[0] from program startup and the address of main itself.
+      /// In extremis, this function may fail and return an empty path.
+      static Path GetMainExecutable(const char *argv0, void *MainAddr);
+
+      /// This is one of the very few ways in which a path can be constructed
+      /// with a syntactically invalid name. The only *legal* invalid name is an
+      /// empty one. Other invalid names are not permitted. Empty paths are
+      /// provided so that they can be used to indicate null or error results in
+      /// other lib/System functionality.
+      /// @brief Construct an empty (and invalid) path.
+      Path() : path() {}
+      Path(const Path &that) : path(that.path) {}
+
+      /// This constructor will accept a char* or std::string as a path. No
+      /// checking is done on this path to determine if it is valid. To
+      /// determine validity of the path, use the isValid method.
+      /// @param p The path to assign.
+      /// @brief Construct a Path from a string.
+      explicit Path(StringRef p);
+
+      /// This constructor will accept a character range as a path.  No checking
+      /// is done on this path to determine if it is valid.  To determine
+      /// validity of the path, use the isValid method.
+      /// @param StrStart A pointer to the first character of the path name
+      /// @param StrLen The length of the path name at StrStart
+      /// @brief Construct a Path from a string.
+      Path(const char *StrStart, unsigned StrLen);
+
+    /// @}
+    /// @name Operators
+    /// @{
+    public:
+      /// Makes a copy of \p that to \p this.
+      /// @returns \p this
+      /// @brief Assignment Operator
+      Path &operator=(const Path &that) {
+        path = that.path;
+        return *this;
+      }
+
+      /// Makes a copy of \p that to \p this.
+      /// @param that A StringRef denoting the path
+      /// @returns \p this
+      /// @brief Assignment Operator
+      Path &operator=(StringRef that);
+
+      /// Compares \p this Path with \p that Path for equality.
+      /// @returns true if \p this and \p that refer to the same thing.
+      /// @brief Equality Operator
+      bool operator==(const Path &that) const;
+
+      /// Compares \p this Path with \p that Path for inequality.
+      /// @returns true if \p this and \p that refer to different things.
+      /// @brief Inequality Operator
+      bool operator!=(const Path &that) const { return !(*this == that); }
+
+      /// Determines if \p this Path is less than \p that Path. This is required
+      /// so that Path objects can be placed into ordered collections (e.g.
+      /// std::map). The comparison is done lexicographically as defined by
+      /// the std::string::compare method.
+      /// @returns true if \p this path is lexicographically less than \p that.
+      /// @brief Less Than Operator
+      bool operator<(const Path& that) const;
+
+    /// @}
+    /// @name Path Accessors
+    /// @{
+    public:
+      /// This function will use an operating system specific algorithm to
+      /// determine if the current value of \p this is a syntactically valid
+      /// path name for the operating system. The path name does not need to
+      /// exist, validity is simply syntactical. Empty paths are always invalid.
+      /// @returns true iff the path name is syntactically legal for the
+      /// host operating system.
+      /// @brief Determine if a path is syntactically valid or not.
+      bool isValid() const;
+
+      /// This function determines if the contents of the path name are empty.
+      /// That is, the path name has a zero length. This does NOT determine if
+      /// if the file is empty. To get the length of the file itself, Use the
+      /// PathWithStatus::getFileStatus() method and then the getSize() method
+      /// on the returned FileStatus object.
+      /// @returns true iff the path is empty.
+      /// @brief Determines if the path name is empty (invalid).
+      bool isEmpty() const { return path.empty(); }
+
+       /// This function returns the last component of the path name. The last
+      /// component is the file or directory name occurring after the last
+      /// directory separator. If no directory separator is present, the entire
+      /// path name is returned (i.e. same as toString).
+      /// @returns StringRef containing the last component of the path name.
+      /// @brief Returns the last component of the path name.
+      LLVM_ATTRIBUTE_DEPRECATED(
+        StringRef getLast() const,
+        LLVM_PATH_DEPRECATED_MSG(path::filename));
+
+      /// This function strips off the path and suffix of the file or directory
+      /// name and returns just the basename. For example /a/foo.bar would cause
+      /// this function to return "foo".
+      /// @returns StringRef containing the basename of the path
+      /// @brief Get the base name of the path
+      LLVM_ATTRIBUTE_DEPRECATED(StringRef getBasename() const,
+        LLVM_PATH_DEPRECATED_MSG(path::stem));
+
+      /// This function strips off the suffix of the path beginning with the
+      /// path separator ('/' on Unix, '\' on Windows) and returns the result.
+      LLVM_ATTRIBUTE_DEPRECATED(StringRef getDirname() const,
+        LLVM_PATH_DEPRECATED_MSG(path::parent_path));
+
+      /// This function strips off the path and basename(up to and
+      /// including the last dot) of the file or directory name and
+      /// returns just the suffix. For example /a/foo.bar would cause
+      /// this function to return "bar".
+      /// @returns StringRef containing the suffix of the path
+      /// @brief Get the suffix of the path
+      LLVM_ATTRIBUTE_DEPRECATED(StringRef getSuffix() const,
+        LLVM_PATH_DEPRECATED_MSG(path::extension));
+
+      /// Obtain a 'C' string for the path name.
+      /// @returns a 'C' string containing the path name.
+      /// @brief Returns the path as a C string.
+      const char *c_str() const { return path.c_str(); }
+      const std::string &str() const { return path; }
+
+
+      /// size - Return the length in bytes of this path name.
+      size_t size() const { return path.size(); }
+
+      /// empty - Returns true if the path is empty.
+      unsigned empty() const { return path.empty(); }
+
+    /// @}
+    /// @name Disk Accessors
+    /// @{
+    public:
+      /// This function determines if the path name is absolute, as opposed to
+      /// relative.
+      /// @brief Determine if the path is absolute.
+      LLVM_ATTRIBUTE_DEPRECATED(
+        bool isAbsolute() const,
+        LLVM_PATH_DEPRECATED_MSG(path::is_absolute));
+
+      /// This function determines if the path name is absolute, as opposed to
+      /// relative.
+      /// @brief Determine if the path is absolute.
+      LLVM_ATTRIBUTE_DEPRECATED(
+        static bool isAbsolute(const char *NameStart, unsigned NameLen),
+        LLVM_PATH_DEPRECATED_MSG(path::is_absolute));
+
+      /// This function opens the file associated with the path name provided by
+      /// the Path object and reads its magic number. If the magic number at the
+      /// start of the file matches \p magic, true is returned. In all other
+      /// cases (file not found, file not accessible, etc.) it returns false.
+      /// @returns true if the magic number of the file matches \p magic.
+      /// @brief Determine if file has a specific magic number
+      LLVM_ATTRIBUTE_DEPRECATED(bool hasMagicNumber(StringRef magic) const,
+        LLVM_PATH_DEPRECATED_MSG(fs::has_magic));
+
+      /// This function retrieves the first \p len bytes of the file associated
+      /// with \p this. These bytes are returned as the "magic number" in the
+      /// \p Magic parameter.
+      /// @returns true if the Path is a file and the magic number is retrieved,
+      /// false otherwise.
+      /// @brief Get the file's magic number.
+      bool getMagicNumber(std::string& Magic, unsigned len) const;
+
+      /// This function determines if the path name in the object references an
+      /// archive file by looking at its magic number.
+      /// @returns true if the file starts with the magic number for an archive
+      /// file.
+      /// @brief Determine if the path references an archive file.
+      bool isArchive() const;
+
+      /// This function determines if the path name in the object references an
+      /// LLVM Bitcode file by looking at its magic number.
+      /// @returns true if the file starts with the magic number for LLVM
+      /// bitcode files.
+      /// @brief Determine if the path references a bitcode file.
+      bool isBitcodeFile() const;
+
+      /// This function determines if the path name in the object references a
+      /// native Dynamic Library (shared library, shared object) by looking at
+      /// the file's magic number. The Path object must reference a file, not a
+      /// directory.
+      /// @returns true if the file starts with the magic number for a native
+      /// shared library.
+      /// @brief Determine if the path references a dynamic library.
+      bool isDynamicLibrary() const;
+
+      /// This function determines if the path name in the object references a
+      /// native object file by looking at it's magic number. The term object
+      /// file is defined as "an organized collection of separate, named
+      /// sequences of binary data." This covers the obvious file formats such
+      /// as COFF and ELF, but it also includes llvm ir bitcode, archives,
+      /// libraries, etc...
+      /// @returns true if the file starts with the magic number for an object
+      /// file.
+      /// @brief Determine if the path references an object file.
+      bool isObjectFile() const;
+
+      /// This function determines if the path name references an existing file
+      /// or directory in the file system.
+      /// @returns true if the pathname references an existing file or
+      /// directory.
+      /// @brief Determines if the path is a file or directory in
+      /// the file system.
+      LLVM_ATTRIBUTE_DEPRECATED(bool exists() const,
+        LLVM_PATH_DEPRECATED_MSG(fs::exists));
+
+      /// This function determines if the path name references an
+      /// existing directory.
+      /// @returns true if the pathname references an existing directory.
+      /// @brief Determines if the path is a directory in the file system.
+      LLVM_ATTRIBUTE_DEPRECATED(bool isDirectory() const,
+        LLVM_PATH_DEPRECATED_MSG(fs::is_directory));
+
+      /// This function determines if the path name references an
+      /// existing symbolic link.
+      /// @returns true if the pathname references an existing symlink.
+      /// @brief Determines if the path is a symlink in the file system.
+      LLVM_ATTRIBUTE_DEPRECATED(bool isSymLink() const,
+        LLVM_PATH_DEPRECATED_MSG(fs::is_symlink));
+
+      /// This function determines if the path name references a readable file
+      /// or directory in the file system. This function checks for
+      /// the existence and readability (by the current program) of the file
+      /// or directory.
+      /// @returns true if the pathname references a readable file.
+      /// @brief Determines if the path is a readable file or directory
+      /// in the file system.
+      bool canRead() const;
+
+      /// This function determines if the path name references a writable file
+      /// or directory in the file system. This function checks for the
+      /// existence and writability (by the current program) of the file or
+      /// directory.
+      /// @returns true if the pathname references a writable file.
+      /// @brief Determines if the path is a writable file or directory
+      /// in the file system.
+      bool canWrite() const;
+
+      /// This function checks that what we're trying to work only on a regular
+      /// file. Check for things like /dev/null, any block special file, or
+      /// other things that aren't "regular" regular files.
+      /// @returns true if the file is S_ISREG.
+      /// @brief Determines if the file is a regular file
+      bool isRegularFile() const;
+
+      /// This function determines if the path name references an executable
+      /// file in the file system. This function checks for the existence and
+      /// executability (by the current program) of the file.
+      /// @returns true if the pathname references an executable file.
+      /// @brief Determines if the path is an executable file in the file
+      /// system.
+      bool canExecute() const;
+
+      /// This function builds a list of paths that are the names of the
+      /// files and directories in a directory.
+      /// @returns true if an error occurs, true otherwise
+      /// @brief Build a list of directory's contents.
+      bool getDirectoryContents(
+        std::set<Path> &paths, ///< The resulting list of file & directory names
+        std::string* ErrMsg    ///< Optional place to return an error message.
+      ) const;
+
+    /// @}
+    /// @name Path Mutators
+    /// @{
+    public:
+      /// The path name is cleared and becomes empty. This is an invalid
+      /// path name but is the *only* invalid path name. This is provided
+      /// so that path objects can be used to indicate the lack of a
+      /// valid path being found.
+      /// @brief Make the path empty.
+      void clear() { path.clear(); }
+
+      /// This method sets the Path object to \p unverified_path. This can fail
+      /// if the \p unverified_path does not pass the syntactic checks of the
+      /// isValid() method. If verification fails, the Path object remains
+      /// unchanged and false is returned. Otherwise true is returned and the
+      /// Path object takes on the path value of \p unverified_path
+      /// @returns true if the path was set, false otherwise.
+      /// @param unverified_path The path to be set in Path object.
+      /// @brief Set a full path from a StringRef
+      bool set(StringRef unverified_path);
+
+      /// One path component is removed from the Path. If only one component is
+      /// present in the path, the Path object becomes empty. If the Path object
+      /// is empty, no change is made.
+      /// @returns false if the path component could not be removed.
+      /// @brief Removes the last directory component of the Path.
+      bool eraseComponent();
+
+      /// The \p component is added to the end of the Path if it is a legal
+      /// name for the operating system. A directory separator will be added if
+      /// needed.
+      /// @returns false if the path component could not be added.
+      /// @brief Appends one path component to the Path.
+      bool appendComponent(StringRef component);
+
+      /// A period and the \p suffix are appended to the end of the pathname.
+      /// When the \p suffix is empty, no action is performed.
+      /// @brief Adds a period and the \p suffix to the end of the pathname.
+      void appendSuffix(StringRef suffix);
+
+      /// The suffix of the filename is erased. The suffix begins with and
+      /// includes the last . character in the filename after the last directory
+      /// separator and extends until the end of the name. If no . character is
+      /// after the last directory separator, then the file name is left
+      /// unchanged (i.e. it was already without a suffix) but the function
+      /// returns false.
+      /// @returns false if there was no suffix to remove, true otherwise.
+      /// @brief Remove the suffix from a path name.
+      bool eraseSuffix();
+
+      /// The current Path name is made unique in the file system. Upon return,
+      /// the Path will have been changed to make a unique file in the file
+      /// system or it will not have been changed if the current path name is
+      /// already unique.
+      /// @throws std::string if an unrecoverable error occurs.
+      /// @brief Make the current path name unique in the file system.
+      bool makeUnique( bool reuse_current /*= true*/, std::string* ErrMsg );
+
+      /// The current Path name is made absolute by prepending the
+      /// current working directory if necessary.
+      LLVM_ATTRIBUTE_DEPRECATED(
+        void makeAbsolute(),
+        LLVM_PATH_DEPRECATED_MSG(fs::make_absolute));
+
+    /// @}
+    /// @name Disk Mutators
+    /// @{
+    public:
+      /// This method attempts to make the file referenced by the Path object
+      /// available for reading so that the canRead() method will return true.
+      /// @brief Make the file readable;
+      bool makeReadableOnDisk(std::string* ErrMsg = 0);
+
+      /// This method attempts to make the file referenced by the Path object
+      /// available for writing so that the canWrite() method will return true.
+      /// @brief Make the file writable;
+      bool makeWriteableOnDisk(std::string* ErrMsg = 0);
+
+      /// This method attempts to make the file referenced by the Path object
+      /// available for execution so that the canExecute() method will return
+      /// true.
+      /// @brief Make the file readable;
+      bool makeExecutableOnDisk(std::string* ErrMsg = 0);
+
+      /// This method allows the last modified time stamp and permission bits
+      /// to be set on the disk object referenced by the Path.
+      /// @throws std::string if an error occurs.
+      /// @returns true on error.
+      /// @brief Set the status information.
+      bool setStatusInfoOnDisk(const FileStatus &SI,
+                               std::string *ErrStr = 0) const;
+
+      /// This method attempts to create a directory in the file system with the
+      /// same name as the Path object. The \p create_parents parameter controls
+      /// whether intermediate directories are created or not. if \p
+      /// create_parents is true, then an attempt will be made to create all
+      /// intermediate directories, as needed. If \p create_parents is false,
+      /// then only the final directory component of the Path name will be
+      /// created. The created directory will have no entries.
+      /// @returns true if the directory could not be created, false otherwise
+      /// @brief Create the directory this Path refers to.
+      bool createDirectoryOnDisk(
+        bool create_parents = false, ///<  Determines whether non-existent
+           ///< directory components other than the last one (the "parents")
+           ///< are created or not.
+        std::string* ErrMsg = 0 ///< Optional place to put error messages.
+      );
+
+      /// This method attempts to create a file in the file system with the same
+      /// name as the Path object. The intermediate directories must all exist
+      /// at the time this method is called. Use createDirectoriesOnDisk to
+      /// accomplish that. The created file will be empty upon return from this
+      /// function.
+      /// @returns true if the file could not be created, false otherwise.
+      /// @brief Create the file this Path refers to.
+      bool createFileOnDisk(
+        std::string* ErrMsg = 0 ///< Optional place to put error messages.
+      );
+
+      /// This is like createFile except that it creates a temporary file. A
+      /// unique temporary file name is generated based on the contents of
+      /// \p this before the call. The new name is assigned to \p this and the
+      /// file is created.  Note that this will both change the Path object
+      /// *and* create the corresponding file. This function will ensure that
+      /// the newly generated temporary file name is unique in the file system.
+      /// @returns true if the file couldn't be created, false otherwise.
+      /// @brief Create a unique temporary file
+      bool createTemporaryFileOnDisk(
+        bool reuse_current = false, ///< When set to true, this parameter
+          ///< indicates that if the current file name does not exist then
+          ///< it will be used without modification.
+        std::string* ErrMsg = 0 ///< Optional place to put error messages
+      );
+
+      /// This method renames the file referenced by \p this as \p newName. The
+      /// file referenced by \p this must exist. The file referenced by
+      /// \p newName does not need to exist.
+      /// @returns true on error, false otherwise
+      /// @brief Rename one file as another.
+      bool renamePathOnDisk(const Path& newName, std::string* ErrMsg);
+
+      /// This method attempts to destroy the file or directory named by the
+      /// last component of the Path. If the Path refers to a directory and the
+      /// \p destroy_contents is false, an attempt will be made to remove just
+      /// the directory (the final Path component). If \p destroy_contents is
+      /// true, an attempt will be made to remove the entire contents of the
+      /// directory, recursively. If the Path refers to a file, the
+      /// \p destroy_contents parameter is ignored.
+      /// @param destroy_contents Indicates whether the contents of a destroyed
+      /// @param Err An optional string to receive an error message.
+      /// directory should also be destroyed (recursively).
+      /// @returns false if the file/directory was destroyed, true on error.
+      /// @brief Removes the file or directory from the filesystem.
+      bool eraseFromDisk(bool destroy_contents = false,
+                         std::string *Err = 0) const;
+
+
+      /// MapInFilePages - This is a low level system API to map in the file
+      /// that is currently opened as FD into the current processes' address
+      /// space for read only access.  This function may return null on failure
+      /// or if the system cannot provide the following constraints:
+      ///  1) The pages must be valid after the FD is closed, until
+      ///     UnMapFilePages is called.
+      ///  2) Any padding after the end of the file must be zero filled, if
+      ///     present.
+      ///  3) The pages must be contiguous.
+      ///
+      /// This API is not intended for general use, clients should use
+      /// MemoryBuffer::getFile instead.
+      static const char *MapInFilePages(int FD, uint64_t FileSize);
+
+      /// UnMapFilePages - Free pages mapped into the current process by
+      /// MapInFilePages.
+      ///
+      /// This API is not intended for general use, clients should use
+      /// MemoryBuffer::getFile instead.
+      static void UnMapFilePages(const char *Base, uint64_t FileSize);
+
+    /// @}
+    /// @name Data
+    /// @{
+    protected:
+      // Our win32 implementation relies on this string being mutable.
+      mutable std::string path;   ///< Storage for the path name.
+
+
+    /// @}
+  };
+
+  /// This class is identical to Path class except it allows you to obtain the
+  /// file status of the Path as well. The reason for the distinction is one of
+  /// efficiency. First, the file status requires additional space and the space
+  /// is incorporated directly into PathWithStatus without an additional malloc.
+  /// Second, obtaining status information is an expensive operation on most
+  /// operating systems so we want to be careful and explicit about where we
+  /// allow this operation in LLVM.
+  /// @brief Path with file status class.
+  class PathWithStatus : public Path {
+    /// @name Constructors
+    /// @{
+    public:
+      /// @brief Default constructor
+      PathWithStatus() : Path(), status(), fsIsValid(false) {}
+
+      /// @brief Copy constructor
+      PathWithStatus(const PathWithStatus &that)
+        : Path(static_cast<const Path&>(that)), status(that.status),
+           fsIsValid(that.fsIsValid) {}
+
+      /// This constructor allows construction from a Path object
+      /// @brief Path constructor
+      PathWithStatus(const Path &other)
+        : Path(other), status(), fsIsValid(false) {}
+
+      /// This constructor will accept a char* or std::string as a path. No
+      /// checking is done on this path to determine if it is valid. To
+      /// determine validity of the path, use the isValid method.
+      /// @brief Construct a Path from a string.
+      explicit PathWithStatus(
+        StringRef p ///< The path to assign.
+      ) : Path(p), status(), fsIsValid(false) {}
+
+      /// This constructor will accept a character range as a path.  No checking
+      /// is done on this path to determine if it is valid.  To determine
+      /// validity of the path, use the isValid method.
+      /// @brief Construct a Path from a string.
+      explicit PathWithStatus(
+        const char *StrStart,  ///< Pointer to the first character of the path
+        unsigned StrLen        ///< Length of the path.
+      ) : Path(StrStart, StrLen), status(), fsIsValid(false) {}
+
+      /// Makes a copy of \p that to \p this.
+      /// @returns \p this
+      /// @brief Assignment Operator
+      PathWithStatus &operator=(const PathWithStatus &that) {
+        static_cast<Path&>(*this) = static_cast<const Path&>(that);
+        status = that.status;
+        fsIsValid = that.fsIsValid;
+        return *this;
+      }
+
+      /// Makes a copy of \p that to \p this.
+      /// @returns \p this
+      /// @brief Assignment Operator
+      PathWithStatus &operator=(const Path &that) {
+        static_cast<Path&>(*this) = static_cast<const Path&>(that);
+        fsIsValid = false;
+        return *this;
+      }
+
+    /// @}
+    /// @name Methods
+    /// @{
+    public:
+      /// This function returns status information about the file. The type of
+      /// path (file or directory) is updated to reflect the actual contents
+      /// of the file system.
+      /// @returns 0 on failure, with Error explaining why (if non-zero)
+      /// @returns a pointer to a FileStatus structure on success.
+      /// @brief Get file status.
+      const FileStatus *getFileStatus(
+        bool forceUpdate = false, ///< Force an update from the file system
+        std::string *Error = 0    ///< Optional place to return an error msg.
+      ) const;
+
+    /// @}
+    /// @name Data
+    /// @{
+    private:
+      mutable FileStatus status; ///< Status information.
+      mutable bool fsIsValid;    ///< Whether we've obtained it or not
+
+    /// @}
+  };
+
+  /// This enumeration delineates the kinds of files that LLVM knows about.
+  enum LLVMFileType {
+    Unknown_FileType = 0,              ///< Unrecognized file
+    Bitcode_FileType,                  ///< Bitcode file
+    Archive_FileType,                  ///< ar style archive file
+    ELF_Relocatable_FileType,          ///< ELF Relocatable object file
+    ELF_Executable_FileType,           ///< ELF Executable image
+    ELF_SharedObject_FileType,         ///< ELF dynamically linked shared lib
+    ELF_Core_FileType,                 ///< ELF core image
+    Mach_O_Object_FileType,            ///< Mach-O Object file
+    Mach_O_Executable_FileType,        ///< Mach-O Executable
+    Mach_O_FixedVirtualMemorySharedLib_FileType, ///< Mach-O Shared Lib, FVM
+    Mach_O_Core_FileType,              ///< Mach-O Core File
+    Mach_O_PreloadExecutable_FileType, ///< Mach-O Preloaded Executable
+    Mach_O_DynamicallyLinkedSharedLib_FileType, ///< Mach-O dynlinked shared lib
+    Mach_O_DynamicLinker_FileType,     ///< The Mach-O dynamic linker
+    Mach_O_Bundle_FileType,            ///< Mach-O Bundle file
+    Mach_O_DynamicallyLinkedSharedLibStub_FileType, ///< Mach-O Shared lib stub
+    COFF_FileType                      ///< COFF object file or lib
+  };
+
+  /// This utility function allows any memory block to be examined in order
+  /// to determine its file type.
+  LLVMFileType IdentifyFileType(const char*magic, unsigned length);
+
+  /// This function can be used to copy the file specified by Src to the
+  /// file specified by Dest. If an error occurs, Dest is removed.
+  /// @returns true if an error occurs, false otherwise
+  /// @brief Copy one file to another.
+  bool CopyFile(const Path& Dest, const Path& Src, std::string* ErrMsg);
+
+  /// This is the OS-specific path separator: a colon on Unix or a semicolon
+  /// on Windows.
+  extern const char PathSeparator;
+}
+
+}
+
+#endif
diff --git a/final/include/llvm/Support/PathV2.h b/final/include/llvm/Support/PathV2.h
new file mode 100644
index 00000000000..251563398fb
--- /dev/null
+++ b/final/include/llvm/Support/PathV2.h
@@ -0,0 +1,347 @@
+//===- llvm/Support/PathV2.h - Path Operating System Concept ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::path namespace. It is designed after
+// TR2/boost filesystem (v3), but modified to remove exception handling and the
+// path class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PATHV2_H
+#define LLVM_SUPPORT_PATHV2_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/DataTypes.h"
+#include <iterator>
+
+namespace llvm {
+namespace sys {
+namespace path {
+
+/// @name Lexical Component Iterator
+/// @{
+
+/// @brief Path iterator.
+///
+/// This is a bidirectional iterator that iterates over the individual
+/// components in \a path. The forward traversal order is as follows:
+/// * The root-name element, if present.
+/// * The root-directory element, if present.
+/// * Each successive filename element, if present.
+/// * Dot, if one or more trailing non-root slash characters are present.
+/// The backwards traversal order is the reverse of forward traversal.
+///
+/// Iteration examples. Each component is separated by ',':
+/// /          => /
+/// /foo       => /,foo
+/// foo/       => foo,.
+/// /foo/bar   => /,foo,bar
+/// ../        => ..,.
+/// C:\foo\bar => C:,/,foo,bar
+///
+class const_iterator {
+  StringRef Path;      //< The entire path.
+  StringRef Component; //< The current component. Not necessarily in Path.
+  size_t    Position;  //< The iterators current position within Path.
+
+  // An end iterator has Position = Path.size() + 1.
+  friend const_iterator begin(StringRef path);
+  friend const_iterator end(StringRef path);
+
+public:
+  typedef const StringRef value_type;
+  typedef ptrdiff_t difference_type;
+  typedef value_type &reference;
+  typedef value_type *pointer;
+  typedef std::bidirectional_iterator_tag iterator_category;
+
+  reference operator*() const { return Component; }
+  pointer   operator->() const { return &Component; }
+  const_iterator &operator++();    // preincrement
+  const_iterator &operator++(int); // postincrement
+  const_iterator &operator--();    // predecrement
+  const_iterator &operator--(int); // postdecrement
+  bool operator==(const const_iterator &RHS) const;
+  bool operator!=(const const_iterator &RHS) const;
+
+  /// @brief Difference in bytes between this and RHS.
+  ptrdiff_t operator-(const const_iterator &RHS) const;
+};
+
+typedef std::reverse_iterator<const_iterator> reverse_iterator;
+
+/// @brief Get begin iterator over \a path.
+/// @param path Input path.
+/// @returns Iterator initialized with the first component of \a path.
+const_iterator begin(StringRef path);
+
+/// @brief Get end iterator over \a path.
+/// @param path Input path.
+/// @returns Iterator initialized to the end of \a path.
+const_iterator end(StringRef path);
+
+/// @brief Get reverse begin iterator over \a path.
+/// @param path Input path.
+/// @returns Iterator initialized with the first reverse component of \a path.
+inline reverse_iterator rbegin(StringRef path) {
+  return reverse_iterator(end(path));
+}
+
+/// @brief Get reverse end iterator over \a path.
+/// @param path Input path.
+/// @returns Iterator initialized to the reverse end of \a path.
+inline reverse_iterator rend(StringRef path) {
+  return reverse_iterator(begin(path));
+}
+
+/// @}
+/// @name Lexical Modifiers
+/// @{
+
+/// @brief Remove the last component from \a path unless it is the root dir.
+///
+/// directory/filename.cpp => directory/
+/// directory/             => directory
+/// /                      => /
+///
+/// @param path A path that is modified to not have a file component.
+void remove_filename(SmallVectorImpl<char> &path);
+
+/// @brief Replace the file extension of \a path with \a extension.
+///
+/// ./filename.cpp => ./filename.extension
+/// ./filename     => ./filename.extension
+/// ./             => ./.extension
+///
+/// @param path A path that has its extension replaced with \a extension.
+/// @param extension The extension to be added. It may be empty. It may also
+///                  optionally start with a '.', if it does not, one will be
+///                  prepended.
+void replace_extension(SmallVectorImpl<char> &path, const Twine &extension);
+
+/// @brief Append to path.
+///
+/// /foo  + bar/f => /foo/bar/f
+/// /foo/ + bar/f => /foo/bar/f
+/// foo   + bar/f => foo/bar/f
+///
+/// @param path Set to \a path + \a component.
+/// @param component The component to be appended to \a path.
+void append(SmallVectorImpl<char> &path, const Twine &a,
+                                         const Twine &b = "",
+                                         const Twine &c = "",
+                                         const Twine &d = "");
+
+/// @brief Append to path.
+///
+/// /foo  + [bar,f] => /foo/bar/f
+/// /foo/ + [bar,f] => /foo/bar/f
+/// foo   + [bar,f] => foo/bar/f
+///
+/// @param path Set to \a path + [\a begin, \a end).
+/// @param begin Start of components to append.
+/// @param end One past the end of components to append.
+void append(SmallVectorImpl<char> &path,
+            const_iterator begin, const_iterator end);
+
+/// @}
+/// @name Transforms (or some other better name)
+/// @{
+
+/// Convert path to the native form. This is used to give paths to users and
+/// operating system calls in the platform's normal way. For example, on Windows
+/// all '/' are converted to '\'.
+///
+/// @param path A path that is transformed to native format.
+/// @param result Holds the result of the transformation.
+void native(const Twine &path, SmallVectorImpl<char> &result);
+
+/// @}
+/// @name Lexical Observers
+/// @{
+
+/// @brief Get root name.
+///
+/// //net/hello => //net
+/// c:/hello    => c: (on Windows, on other platforms nothing)
+/// /hello      => <empty>
+///
+/// @param path Input path.
+/// @result The root name of \a path if it has one, otherwise "".
+const StringRef root_name(StringRef path);
+
+/// @brief Get root directory.
+///
+/// /goo/hello => /
+/// c:/hello   => /
+/// d/file.txt => <empty>
+///
+/// @param path Input path.
+/// @result The root directory of \a path if it has one, otherwise
+///               "".
+const StringRef root_directory(StringRef path);
+
+/// @brief Get root path.
+///
+/// Equivalent to root_name + root_directory.
+///
+/// @param path Input path.
+/// @result The root path of \a path if it has one, otherwise "".
+const StringRef root_path(StringRef path);
+
+/// @brief Get relative path.
+///
+/// C:\hello\world => hello\world
+/// foo/bar        => foo/bar
+/// /foo/bar       => foo/bar
+///
+/// @param path Input path.
+/// @result The path starting after root_path if one exists, otherwise "".
+const StringRef relative_path(StringRef path);
+
+/// @brief Get parent path.
+///
+/// /          => <empty>
+/// /foo       => /
+/// foo/../bar => foo/..
+///
+/// @param path Input path.
+/// @result The parent path of \a path if one exists, otherwise "".
+const StringRef parent_path(StringRef path);
+
+/// @brief Get filename.
+///
+/// /foo.txt    => foo.txt
+/// .          => .
+/// ..         => ..
+/// /          => /
+///
+/// @param path Input path.
+/// @result The filename part of \a path. This is defined as the last component
+///         of \a path.
+const StringRef filename(StringRef path);
+
+/// @brief Get stem.
+///
+/// If filename contains a dot but not solely one or two dots, result is the
+/// substring of filename ending at (but not including) the last dot. Otherwise
+/// it is filename.
+///
+/// /foo/bar.txt => bar
+/// /foo/bar     => bar
+/// /foo/.txt    => <empty>
+/// /foo/.       => .
+/// /foo/..      => ..
+///
+/// @param path Input path.
+/// @result The stem of \a path.
+const StringRef stem(StringRef path);
+
+/// @brief Get extension.
+///
+/// If filename contains a dot but not solely one or two dots, result is the
+/// substring of filename starting at (and including) the last dot, and ending
+/// at the end of \a path. Otherwise "".
+///
+/// /foo/bar.txt => .txt
+/// /foo/bar     => <empty>
+/// /foo/.txt    => .txt
+///
+/// @param path Input path.
+/// @result The extension of \a path.
+const StringRef extension(StringRef path);
+
+/// @brief Check whether the given char is a path separator on the host OS.
+///
+/// @param value a character
+/// @result true if \a value is a path separator character on the host OS
+bool is_separator(char value);
+
+/// @brief Has root name?
+///
+/// root_name != ""
+///
+/// @param path Input path.
+/// @result True if the path has a root name, false otherwise.
+bool has_root_name(const Twine &path);
+
+/// @brief Has root directory?
+///
+/// root_directory != ""
+///
+/// @param path Input path.
+/// @result True if the path has a root directory, false otherwise.
+bool has_root_directory(const Twine &path);
+
+/// @brief Has root path?
+///
+/// root_path != ""
+///
+/// @param path Input path.
+/// @result True if the path has a root path, false otherwise.
+bool has_root_path(const Twine &path);
+
+/// @brief Has relative path?
+///
+/// relative_path != ""
+///
+/// @param path Input path.
+/// @result True if the path has a relative path, false otherwise.
+bool has_relative_path(const Twine &path);
+
+/// @brief Has parent path?
+///
+/// parent_path != ""
+///
+/// @param path Input path.
+/// @result True if the path has a parent path, false otherwise.
+bool has_parent_path(const Twine &path);
+
+/// @brief Has filename?
+///
+/// filename != ""
+///
+/// @param path Input path.
+/// @result True if the path has a filename, false otherwise.
+bool has_filename(const Twine &path);
+
+/// @brief Has stem?
+///
+/// stem != ""
+///
+/// @param path Input path.
+/// @result True if the path has a stem, false otherwise.
+bool has_stem(const Twine &path);
+
+/// @brief Has extension?
+///
+/// extension != ""
+///
+/// @param path Input path.
+/// @result True if the path has a extension, false otherwise.
+bool has_extension(const Twine &path);
+
+/// @brief Is path absolute?
+///
+/// @param path Input path.
+/// @result True if the path is absolute, false if it is not.
+bool is_absolute(const Twine &path);
+
+/// @brief Is path relative?
+///
+/// @param path Input path.
+/// @result True if the path is relative, false if it is not.
+bool is_relative(const Twine &path);
+
+} // end namespace path
+} // end namespace sys
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Support/PatternMatch.h b/final/include/llvm/Support/PatternMatch.h
new file mode 100644
index 00000000000..948ae5176ee
--- /dev/null
+++ b/final/include/llvm/Support/PatternMatch.h
@@ -0,0 +1,665 @@
+//===-- llvm/Support/PatternMatch.h - Match on the LLVM IR ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a simple and efficient mechanism for performing general
+// tree-based pattern matches on the LLVM IR.  The power of these routines is
+// that it allows you to write concise patterns that are expressive and easy to
+// understand.  The other major advantage of this is that it allows you to
+// trivially capture/bind elements in the pattern to variables.  For example,
+// you can do something like this:
+//
+//  Value *Exp = ...
+//  Value *X, *Y;  ConstantInt *C1, *C2;      // (X & C1) | (Y & C2)
+//  if (match(Exp, m_Or(m_And(m_Value(X), m_ConstantInt(C1)),
+//                      m_And(m_Value(Y), m_ConstantInt(C2))))) {
+//    ... Pattern is matched and variables are bound ...
+//  }
+//
+// This is primarily useful to things like the instruction combiner, but can
+// also be useful for static analysis tools or code generators.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PATTERNMATCH_H
+#define LLVM_SUPPORT_PATTERNMATCH_H
+
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+
+namespace llvm {
+namespace PatternMatch {
+
+template<typename Val, typename Pattern>
+bool match(Val *V, const Pattern &P) {
+  return const_cast<Pattern&>(P).match(V);
+}
+
+template<typename Class>
+struct class_match {
+  template<typename ITy>
+  bool match(ITy *V) { return isa<Class>(V); }
+};
+
+/// m_Value() - Match an arbitrary value and ignore it.
+inline class_match<Value> m_Value() { return class_match<Value>(); }
+/// m_ConstantInt() - Match an arbitrary ConstantInt and ignore it.
+inline class_match<ConstantInt> m_ConstantInt() {
+  return class_match<ConstantInt>();
+}
+/// m_Undef() - Match an arbitrary undef constant.
+inline class_match<UndefValue> m_Undef() { return class_match<UndefValue>(); }
+
+inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
+  
+struct match_zero {
+  template<typename ITy>
+  bool match(ITy *V) {
+    if (const Constant *C = dyn_cast<Constant>(V))
+      return C->isNullValue();
+    return false;
+  }
+};
+  
+/// m_Zero() - Match an arbitrary zero/null constant.  This includes
+/// zero_initializer for vectors and ConstantPointerNull for pointers.
+inline match_zero m_Zero() { return match_zero(); }
+  
+  
+struct apint_match {
+  const APInt *&Res;
+  apint_match(const APInt *&R) : Res(R) {}
+  template<typename ITy>
+  bool match(ITy *V) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      Res = &CI->getValue();
+      return true;
+    }
+    if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
+      if (ConstantInt *CI =
+          dyn_cast_or_null<ConstantInt>(CV->getSplatValue())) {
+        Res = &CI->getValue();
+        return true;
+      }
+    return false;
+  }
+};
+  
+/// m_APInt - Match a ConstantInt or splatted ConstantVector, binding the
+/// specified pointer to the contained APInt.
+inline apint_match m_APInt(const APInt *&Res) { return Res; }
+
+  
+template<int64_t Val>
+struct constantint_match {
+  template<typename ITy>
+  bool match(ITy *V) {
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      const APInt &CIV = CI->getValue();
+      if (Val >= 0)
+        return CIV == static_cast<uint64_t>(Val);
+      // If Val is negative, and CI is shorter than it, truncate to the right
+      // number of bits.  If it is larger, then we have to sign extend.  Just
+      // compare their negated values.
+      return -CIV == -Val;
+    }
+    return false;
+  }
+};
+
+/// m_ConstantInt<int64_t> - Match a ConstantInt with a specific value.
+template<int64_t Val>
+inline constantint_match<Val> m_ConstantInt() {
+  return constantint_match<Val>();
+}
+
+/// cst_pred_ty - This helper class is used to match scalar and vector constants
+/// that satisfy a specified predicate.
+template<typename Predicate>
+struct cst_pred_ty : public Predicate {
+  template<typename ITy>
+  bool match(ITy *V) {
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
+      return this->isValue(CI->getValue());
+    if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
+      if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()))
+        return this->isValue(CI->getValue());
+    return false;
+  }
+};
+  
+/// api_pred_ty - This helper class is used to match scalar and vector constants
+/// that satisfy a specified predicate, and bind them to an APInt.
+template<typename Predicate>
+struct api_pred_ty : public Predicate {
+  const APInt *&Res;
+  api_pred_ty(const APInt *&R) : Res(R) {}
+  template<typename ITy>
+  bool match(ITy *V) {
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
+      if (this->isValue(CI->getValue())) {
+        Res = &CI->getValue();
+        return true;
+      }
+    if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
+      if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CV->getSplatValue()))
+        if (this->isValue(CI->getValue())) {
+          Res = &CI->getValue();
+          return true;
+        }
+    return false;
+  }
+};
+  
+  
+struct is_one {
+  bool isValue(const APInt &C) { return C == 1; }
+};
+
+/// m_One() - Match an integer 1 or a vector with all elements equal to 1.
+inline cst_pred_ty<is_one> m_One() { return cst_pred_ty<is_one>(); }
+inline api_pred_ty<is_one> m_One(const APInt *&V) { return V; }
+    
+struct is_all_ones {
+  bool isValue(const APInt &C) { return C.isAllOnesValue(); }
+};
+  
+/// m_AllOnes() - Match an integer or vector with all bits set to true.
+inline cst_pred_ty<is_all_ones> m_AllOnes() {return cst_pred_ty<is_all_ones>();}
+inline api_pred_ty<is_all_ones> m_AllOnes(const APInt *&V) { return V; }
+
+struct is_sign_bit {
+  bool isValue(const APInt &C) { return C.isSignBit(); }
+};
+
+/// m_SignBit() - Match an integer or vector with only the sign bit(s) set.
+inline cst_pred_ty<is_sign_bit> m_SignBit() {return cst_pred_ty<is_sign_bit>();}
+inline api_pred_ty<is_sign_bit> m_SignBit(const APInt *&V) { return V; }
+
+struct is_power2 {
+  bool isValue(const APInt &C) { return C.isPowerOf2(); }
+};
+
+/// m_Power2() - Match an integer or vector power of 2.
+inline cst_pred_ty<is_power2> m_Power2() { return cst_pred_ty<is_power2>(); }
+inline api_pred_ty<is_power2> m_Power2(const APInt *&V) { return V; }
+
+template<typename Class>
+struct bind_ty {
+  Class *&VR;
+  bind_ty(Class *&V) : VR(V) {}
+
+  template<typename ITy>
+  bool match(ITy *V) {
+    if (Class *CV = dyn_cast<Class>(V)) {
+      VR = CV;
+      return true;
+    }
+    return false;
+  }
+};
+
+/// m_Value - Match a value, capturing it if we match.
+inline bind_ty<Value> m_Value(Value *&V) { return V; }
+
+/// m_ConstantInt - Match a ConstantInt, capturing the value if we match.
+inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; }
+
+/// m_Constant - Match a Constant, capturing the value if we match.
+inline bind_ty<Constant> m_Constant(Constant *&C) { return C; }
+
+/// specificval_ty - Match a specified Value*.
+struct specificval_ty {
+  const Value *Val;
+  specificval_ty(const Value *V) : Val(V) {}
+
+  template<typename ITy>
+  bool match(ITy *V) {
+    return V == Val;
+  }
+};
+
+/// m_Specific - Match if we have a specific specified value.
+inline specificval_ty m_Specific(const Value *V) { return V; }
+
+
+//===----------------------------------------------------------------------===//
+// Matchers for specific binary operators.
+//
+
+template<typename LHS_t, typename RHS_t, unsigned Opcode>
+struct BinaryOp_match {
+  LHS_t L;
+  RHS_t R;
+
+  BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
+
+  template<typename OpTy>
+  bool match(OpTy *V) {
+    if (V->getValueID() == Value::InstructionVal + Opcode) {
+      BinaryOperator *I = cast<BinaryOperator>(V);
+      return L.match(I->getOperand(0)) && R.match(I->getOperand(1));
+    }
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      return CE->getOpcode() == Opcode && L.match(CE->getOperand(0)) &&
+             R.match(CE->getOperand(1));
+    return false;
+  }
+};
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::Add>
+m_Add(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::Add>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::FAdd>
+m_FAdd(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::Sub>
+m_Sub(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::FSub>
+m_FSub(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::Mul>
+m_Mul(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::FMul>
+m_FMul(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::UDiv>
+m_UDiv(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::SDiv>
+m_SDiv(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::SDiv>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::FDiv>
+m_FDiv(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::FDiv>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::URem>
+m_URem(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::URem>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::SRem>
+m_SRem(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::SRem>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::FRem>
+m_FRem(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::FRem>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::And>
+m_And(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::And>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::Or>
+m_Or(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::Or>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::Xor>
+m_Xor(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::Xor>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::Shl>
+m_Shl(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::Shl>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::LShr>
+m_LShr(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::LShr>(L, R);
+}
+
+template<typename LHS, typename RHS>
+inline BinaryOp_match<LHS, RHS, Instruction::AShr>
+m_AShr(const LHS &L, const RHS &R) {
+  return BinaryOp_match<LHS, RHS, Instruction::AShr>(L, R);
+}
+
+//===----------------------------------------------------------------------===//
+// Class that matches two different binary ops.
+//
+template<typename LHS_t, typename RHS_t, unsigned Opc1, unsigned Opc2>
+struct BinOp2_match {
+  LHS_t L;
+  RHS_t R;
+
+  BinOp2_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
+
+  template<typename OpTy>
+  bool match(OpTy *V) {
+    if (V->getValueID() == Value::InstructionVal + Opc1 ||
+        V->getValueID() == Value::InstructionVal + Opc2) {
+      BinaryOperator *I = cast<BinaryOperator>(V);
+      return L.match(I->getOperand(0)) && R.match(I->getOperand(1));
+    }
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      return (CE->getOpcode() == Opc1 || CE->getOpcode() == Opc2) &&
+             L.match(CE->getOperand(0)) && R.match(CE->getOperand(1));
+    return false;
+  }
+};
+
+/// m_Shr - Matches LShr or AShr.
+template<typename LHS, typename RHS>
+inline BinOp2_match<LHS, RHS, Instruction::LShr, Instruction::AShr>
+m_Shr(const LHS &L, const RHS &R) {
+  return BinOp2_match<LHS, RHS, Instruction::LShr, Instruction::AShr>(L, R);
+}
+
+/// m_LogicalShift - Matches LShr or Shl.
+template<typename LHS, typename RHS>
+inline BinOp2_match<LHS, RHS, Instruction::LShr, Instruction::Shl>
+m_LogicalShift(const LHS &L, const RHS &R) {
+  return BinOp2_match<LHS, RHS, Instruction::LShr, Instruction::Shl>(L, R);
+}
+
+/// m_IDiv - Matches UDiv and SDiv.
+template<typename LHS, typename RHS>
+inline BinOp2_match<LHS, RHS, Instruction::SDiv, Instruction::UDiv>
+m_IDiv(const LHS &L, const RHS &R) {
+  return BinOp2_match<LHS, RHS, Instruction::SDiv, Instruction::UDiv>(L, R);
+}
+
+//===----------------------------------------------------------------------===//
+// Matchers for CmpInst classes
+//
+
+template<typename LHS_t, typename RHS_t, typename Class, typename PredicateTy>
+struct CmpClass_match {
+  PredicateTy &Predicate;
+  LHS_t L;
+  RHS_t R;
+
+  CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS)
+    : Predicate(Pred), L(LHS), R(RHS) {}
+
+  template<typename OpTy>
+  bool match(OpTy *V) {
+    if (Class *I = dyn_cast<Class>(V))
+      if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) {
+        Predicate = I->getPredicate();
+        return true;
+      }
+    return false;
+  }
+};
+
+template<typename LHS, typename RHS>
+inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>
+m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
+  return CmpClass_match<LHS, RHS,
+                        ICmpInst, ICmpInst::Predicate>(Pred, L, R);
+}
+
+template<typename LHS, typename RHS>
+inline CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>
+m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
+  return CmpClass_match<LHS, RHS,
+                        FCmpInst, FCmpInst::Predicate>(Pred, L, R);
+}
+
+//===----------------------------------------------------------------------===//
+// Matchers for SelectInst classes
+//
+
+template<typename Cond_t, typename LHS_t, typename RHS_t>
+struct SelectClass_match {
+  Cond_t C;
+  LHS_t L;
+  RHS_t R;
+
+  SelectClass_match(const Cond_t &Cond, const LHS_t &LHS,
+                    const RHS_t &RHS)
+    : C(Cond), L(LHS), R(RHS) {}
+
+  template<typename OpTy>
+  bool match(OpTy *V) {
+    if (SelectInst *I = dyn_cast<SelectInst>(V))
+      return C.match(I->getOperand(0)) &&
+             L.match(I->getOperand(1)) &&
+             R.match(I->getOperand(2));
+    return false;
+  }
+};
+
+template<typename Cond, typename LHS, typename RHS>
+inline SelectClass_match<Cond, LHS, RHS>
+m_Select(const Cond &C, const LHS &L, const RHS &R) {
+  return SelectClass_match<Cond, LHS, RHS>(C, L, R);
+}
+
+/// m_SelectCst - This matches a select of two constants, e.g.:
+///    m_SelectCst<-1, 0>(m_Value(V))
+template<int64_t L, int64_t R, typename Cond>
+inline SelectClass_match<Cond, constantint_match<L>, constantint_match<R> >
+m_SelectCst(const Cond &C) {
+  return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Matchers for CastInst classes
+//
+
+template<typename Op_t, unsigned Opcode>
+struct CastClass_match {
+  Op_t Op;
+
+  CastClass_match(const Op_t &OpMatch) : Op(OpMatch) {}
+
+  template<typename OpTy>
+  bool match(OpTy *V) {
+    if (CastInst *I = dyn_cast<CastInst>(V))
+      return I->getOpcode() == Opcode && Op.match(I->getOperand(0));
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      return CE->getOpcode() == Opcode && Op.match(CE->getOperand(0));
+    return false;
+  }
+};
+
+/// m_BitCast
+template<typename OpTy>
+inline CastClass_match<OpTy, Instruction::BitCast>
+m_BitCast(const OpTy &Op) {
+  return CastClass_match<OpTy, Instruction::BitCast>(Op);
+}
+  
+/// m_PtrToInt
+template<typename OpTy>
+inline CastClass_match<OpTy, Instruction::PtrToInt>
+m_PtrToInt(const OpTy &Op) {
+  return CastClass_match<OpTy, Instruction::PtrToInt>(Op);
+}
+
+/// m_Trunc
+template<typename OpTy>
+inline CastClass_match<OpTy, Instruction::Trunc>
+m_Trunc(const OpTy &Op) {
+  return CastClass_match<OpTy, Instruction::Trunc>(Op);
+}
+
+/// m_SExt
+template<typename OpTy>
+inline CastClass_match<OpTy, Instruction::SExt>
+m_SExt(const OpTy &Op) {
+  return CastClass_match<OpTy, Instruction::SExt>(Op);
+}
+
+/// m_ZExt
+template<typename OpTy>
+inline CastClass_match<OpTy, Instruction::ZExt>
+m_ZExt(const OpTy &Op) {
+  return CastClass_match<OpTy, Instruction::ZExt>(Op);
+}
+  
+
+//===----------------------------------------------------------------------===//
+// Matchers for unary operators
+//
+
+template<typename LHS_t>
+struct not_match {
+  LHS_t L;
+
+  not_match(const LHS_t &LHS) : L(LHS) {}
+
+  template<typename OpTy>
+  bool match(OpTy *V) {
+    if (Instruction *I = dyn_cast<Instruction>(V))
+      if (I->getOpcode() == Instruction::Xor)
+        return matchIfNot(I->getOperand(0), I->getOperand(1));
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      if (CE->getOpcode() == Instruction::Xor)
+        return matchIfNot(CE->getOperand(0), CE->getOperand(1));
+    return false;
+  }
+private:
+  bool matchIfNot(Value *LHS, Value *RHS) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS))
+      return CI->isAllOnesValue() && L.match(LHS);
+    if (ConstantVector *CV = dyn_cast<ConstantVector>(RHS))
+      return CV->isAllOnesValue() && L.match(LHS);
+    return false;
+  }
+};
+
+template<typename LHS>
+inline not_match<LHS> m_Not(const LHS &L) { return L; }
+
+
+template<typename LHS_t>
+struct neg_match {
+  LHS_t L;
+
+  neg_match(const LHS_t &LHS) : L(LHS) {}
+
+  template<typename OpTy>
+  bool match(OpTy *V) {
+    if (Instruction *I = dyn_cast<Instruction>(V))
+      if (I->getOpcode() == Instruction::Sub)
+        return matchIfNeg(I->getOperand(0), I->getOperand(1));
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      if (CE->getOpcode() == Instruction::Sub)
+        return matchIfNeg(CE->getOperand(0), CE->getOperand(1));
+    return false;
+  }
+private:
+  bool matchIfNeg(Value *LHS, Value *RHS) {
+    if (ConstantInt *C = dyn_cast<ConstantInt>(LHS))
+      return C->isZero() && L.match(RHS);
+    return false;
+  }
+};
+
+/// m_Neg - Match an integer negate.
+template<typename LHS>
+inline neg_match<LHS> m_Neg(const LHS &L) { return L; }
+
+
+template<typename LHS_t>
+struct fneg_match {
+  LHS_t L;
+
+  fneg_match(const LHS_t &LHS) : L(LHS) {}
+
+  template<typename OpTy>
+  bool match(OpTy *V) {
+    if (Instruction *I = dyn_cast<Instruction>(V))
+      if (I->getOpcode() == Instruction::FSub)
+        return matchIfFNeg(I->getOperand(0), I->getOperand(1));
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      if (CE->getOpcode() == Instruction::FSub)
+        return matchIfFNeg(CE->getOperand(0), CE->getOperand(1));
+    return false;
+  }
+private:
+  bool matchIfFNeg(Value *LHS, Value *RHS) {
+    if (ConstantFP *C = dyn_cast<ConstantFP>(LHS))
+      return C->isNegativeZeroValue() && L.match(RHS);
+    return false;
+  }
+};
+
+/// m_FNeg - Match a floating point negate.
+template<typename LHS>
+inline fneg_match<LHS> m_FNeg(const LHS &L) { return L; }
+
+
+//===----------------------------------------------------------------------===//
+// Matchers for control flow.
+//
+
+template<typename Cond_t>
+struct brc_match {
+  Cond_t Cond;
+  BasicBlock *&T, *&F;
+  brc_match(const Cond_t &C, BasicBlock *&t, BasicBlock *&f)
+    : Cond(C), T(t), F(f) {
+  }
+
+  template<typename OpTy>
+  bool match(OpTy *V) {
+    if (BranchInst *BI = dyn_cast<BranchInst>(V))
+      if (BI->isConditional() && Cond.match(BI->getCondition())) {
+        T = BI->getSuccessor(0);
+        F = BI->getSuccessor(1);
+        return true;
+      }
+    return false;
+  }
+};
+
+template<typename Cond_t>
+inline brc_match<Cond_t> m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) {
+  return brc_match<Cond_t>(C, T, F);
+}
+
+} // end namespace PatternMatch
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Support/PluginLoader.h b/final/include/llvm/Support/PluginLoader.h
new file mode 100644
index 00000000000..bdbb134b28e
--- /dev/null
+++ b/final/include/llvm/Support/PluginLoader.h
@@ -0,0 +1,37 @@
+//===-- llvm/Support/PluginLoader.h - Plugin Loader for Tools ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A tool can #include this file to get a -load option that allows the user to
+// load arbitrary shared objects into the tool's address space.  Note that this
+// header can only be included by a program ONCE, so it should never to used by
+// library authors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PLUGINLOADER_H
+#define LLVM_SUPPORT_PLUGINLOADER_H
+
+#include "llvm/Support/CommandLine.h"
+
+namespace llvm {
+  struct PluginLoader {
+    void operator=(const std::string &Filename);
+    static unsigned getNumPlugins();
+    static std::string& getPlugin(unsigned num);
+  };
+
+#ifndef DONT_GET_PLUGIN_LOADER_OPTION
+  // This causes operator= above to be invoked for every -load option.
+  static cl::opt<PluginLoader, false, cl::parser<std::string> >
+    LoadOpt("load", cl::ZeroOrMore, cl::value_desc("pluginfilename"),
+            cl::desc("Load the specified plugin"));
+#endif
+}
+
+#endif
diff --git a/final/include/llvm/Support/PointerLikeTypeTraits.h b/final/include/llvm/Support/PointerLikeTypeTraits.h
new file mode 100644
index 00000000000..83708213921
--- /dev/null
+++ b/final/include/llvm/Support/PointerLikeTypeTraits.h
@@ -0,0 +1,81 @@
+//===- llvm/Support/PointerLikeTypeTraits.h - Pointer Traits ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PointerLikeTypeTraits class.  This allows data
+// structures to reason about pointers and other things that are pointer sized.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
+#define LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+  
+/// PointerLikeTypeTraits - This is a traits object that is used to handle
+/// pointer types and things that are just wrappers for pointers as a uniform
+/// entity.
+template <typename T>
+class PointerLikeTypeTraits {
+  // getAsVoidPointer
+  // getFromVoidPointer
+  // getNumLowBitsAvailable
+};
+
+// Provide PointerLikeTypeTraits for non-cvr pointers.
+template<typename T>
+class PointerLikeTypeTraits<T*> {
+public:
+  static inline void *getAsVoidPointer(T* P) { return P; }
+  static inline T *getFromVoidPointer(void *P) {
+    return static_cast<T*>(P);
+  }
+  
+  /// Note, we assume here that malloc returns objects at least 4-byte aligned.
+  /// However, this may be wrong, or pointers may be from something other than
+  /// malloc.  In this case, you should specialize this template to reduce this.
+  ///
+  /// All clients should use assertions to do a run-time check to ensure that
+  /// this is actually true.
+  enum { NumLowBitsAvailable = 2 };
+};
+  
+// Provide PointerLikeTypeTraits for const pointers.
+template<typename T>
+class PointerLikeTypeTraits<const T*> {
+  typedef PointerLikeTypeTraits<T*> NonConst;
+
+public:
+  static inline const void *getAsVoidPointer(const T* P) {
+    return NonConst::getAsVoidPointer(const_cast<T*>(P));
+  }
+  static inline const T *getFromVoidPointer(const void *P) {
+    return NonConst::getFromVoidPointer(const_cast<void*>(P));
+  }
+  enum { NumLowBitsAvailable = NonConst::NumLowBitsAvailable };
+};
+
+// Provide PointerLikeTypeTraits for uintptr_t.
+template<>
+class PointerLikeTypeTraits<uintptr_t> {
+public:
+  static inline void *getAsVoidPointer(uintptr_t P) {
+    return reinterpret_cast<void*>(P);
+  }
+  static inline uintptr_t getFromVoidPointer(void *P) {
+    return reinterpret_cast<uintptr_t>(P);
+  }
+  // No bits are available!
+  enum { NumLowBitsAvailable = 0 };
+};
+  
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Support/PredIteratorCache.h b/final/include/llvm/Support/PredIteratorCache.h
new file mode 100644
index 00000000000..bb66a8ed58b
--- /dev/null
+++ b/final/include/llvm/Support/PredIteratorCache.h
@@ -0,0 +1,70 @@
+//===- llvm/Support/PredIteratorCache.h - pred_iterator Cache ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PredIteratorCache class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+#ifndef LLVM_SUPPORT_PREDITERATORCACHE_H
+#define LLVM_SUPPORT_PREDITERATORCACHE_H
+
+namespace llvm {
+
+  /// PredIteratorCache - This class is an extremely trivial cache for
+  /// predecessor iterator queries.  This is useful for code that repeatedly
+  /// wants the predecessor list for the same blocks.
+  class PredIteratorCache {
+    /// BlockToPredsMap - Pointer to null-terminated list.
+    DenseMap<BasicBlock*, BasicBlock**> BlockToPredsMap;
+    DenseMap<BasicBlock*, unsigned> BlockToPredCountMap;
+
+    /// Memory - This is the space that holds cached preds.
+    BumpPtrAllocator Memory;
+  public:
+
+    /// GetPreds - Get a cached list for the null-terminated predecessor list of
+    /// the specified block.  This can be used in a loop like this:
+    ///   for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI)
+    ///      use(*PI);
+    /// instead of:
+    /// for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    BasicBlock **GetPreds(BasicBlock *BB) {
+      BasicBlock **&Entry = BlockToPredsMap[BB];
+      if (Entry) return Entry;
+
+      SmallVector<BasicBlock*, 32> PredCache(pred_begin(BB), pred_end(BB));
+      PredCache.push_back(0); // null terminator.
+      
+      BlockToPredCountMap[BB] = PredCache.size()-1;
+
+      Entry = Memory.Allocate<BasicBlock*>(PredCache.size());
+      std::copy(PredCache.begin(), PredCache.end(), Entry);
+      return Entry;
+    }
+    
+    unsigned GetNumPreds(BasicBlock *BB) {
+      GetPreds(BB);
+      return BlockToPredCountMap[BB];
+    }
+
+    /// clear - Remove all information.
+    void clear() {
+      BlockToPredsMap.clear();
+      BlockToPredCountMap.clear();
+      Memory.Reset();
+    }
+  };
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Support/PrettyStackTrace.h b/final/include/llvm/Support/PrettyStackTrace.h
new file mode 100644
index 00000000000..6dbce393b97
--- /dev/null
+++ b/final/include/llvm/Support/PrettyStackTrace.h
@@ -0,0 +1,71 @@
+//===- llvm/Support/PrettyStackTrace.h - Pretty Crash Handling --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PrettyStackTraceEntry class, which is used to make
+// crashes give more contextual information about what the program was doing
+// when it crashed.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PRETTYSTACKTRACE_H
+#define LLVM_SUPPORT_PRETTYSTACKTRACE_H
+
+namespace llvm {
+  class raw_ostream;
+
+  /// DisablePrettyStackTrace - Set this to true to disable this module. This
+  /// might be neccessary if the host application installs its own signal
+  /// handlers which conflict with the ones installed by this module.
+  /// Defaults to false.
+  extern bool DisablePrettyStackTrace;
+
+  /// PrettyStackTraceEntry - This class is used to represent a frame of the
+  /// "pretty" stack trace that is dumped when a program crashes. You can define
+  /// subclasses of this and declare them on the program stack: when they are
+  /// constructed and destructed, they will add their symbolic frames to a
+  /// virtual stack trace.  This gets dumped out if the program crashes.
+  class PrettyStackTraceEntry {
+    const PrettyStackTraceEntry *NextEntry;
+    PrettyStackTraceEntry(const PrettyStackTraceEntry &); // DO NOT IMPLEMENT
+    void operator=(const PrettyStackTraceEntry&);         // DO NOT IMPLEMENT
+  public:
+    PrettyStackTraceEntry();
+    virtual ~PrettyStackTraceEntry();
+
+    /// print - Emit information about this stack frame to OS.
+    virtual void print(raw_ostream &OS) const = 0;
+
+    /// getNextEntry - Return the next entry in the list of frames.
+    const PrettyStackTraceEntry *getNextEntry() const { return NextEntry; }
+  };
+
+  /// PrettyStackTraceString - This object prints a specified string (which
+  /// should not contain newlines) to the stream as the stack trace when a crash
+  /// occurs.
+  class PrettyStackTraceString : public PrettyStackTraceEntry {
+    const char *Str;
+  public:
+    PrettyStackTraceString(const char *str) : Str(str) {}
+    virtual void print(raw_ostream &OS) const;
+  };
+
+  /// PrettyStackTraceProgram - This object prints a specified program arguments
+  /// to the stream as the stack trace when a crash occurs.
+  class PrettyStackTraceProgram : public PrettyStackTraceEntry {
+    int ArgC;
+    const char *const *ArgV;
+  public:
+    PrettyStackTraceProgram(int argc, const char * const*argv)
+      : ArgC(argc), ArgV(argv) {}
+    virtual void print(raw_ostream &OS) const;
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Support/Process.h b/final/include/llvm/Support/Process.h
new file mode 100644
index 00000000000..33799229ff3
--- /dev/null
+++ b/final/include/llvm/Support/Process.h
@@ -0,0 +1,146 @@
+//===- llvm/Support/Process.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::Process class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_PROCESS_H
+#define LLVM_SYSTEM_PROCESS_H
+
+#include "llvm/Support/TimeValue.h"
+
+namespace llvm {
+namespace sys {
+
+  /// This class provides an abstraction for getting information about the
+  /// currently executing process.
+  /// @since 1.4
+  /// @brief An abstraction for operating system processes.
+  class Process {
+    /// @name Accessors
+    /// @{
+    public:
+      /// This static function will return the operating system's virtual memory
+      /// page size.
+      /// @returns The number of bytes in a virtual memory page.
+      /// @brief Get the virtual memory page size
+      static unsigned GetPageSize();
+
+      /// This static function will return the total amount of memory allocated
+      /// by the process. This only counts the memory allocated via the malloc,
+      /// calloc and realloc functions and includes any "free" holes in the
+      /// allocated space.
+      /// @brief Return process memory usage.
+      static size_t GetMallocUsage();
+
+      /// This static function will return the total memory usage of the
+      /// process. This includes code, data, stack and mapped pages usage. Notei
+      /// that the value returned here is not necessarily the Running Set Size,
+      /// it is the total virtual memory usage, regardless of mapped state of
+      /// that memory.
+      static size_t GetTotalMemoryUsage();
+
+      /// This static function will set \p user_time to the amount of CPU time
+      /// spent in user (non-kernel) mode and \p sys_time to the amount of CPU
+      /// time spent in system (kernel) mode.  If the operating system does not
+      /// support collection of these metrics, a zero TimeValue will be for both
+      /// values.
+      static void GetTimeUsage(
+        TimeValue& elapsed,
+          ///< Returns the TimeValue::now() giving current time
+        TimeValue& user_time,
+          ///< Returns the current amount of user time for the process
+        TimeValue& sys_time
+          ///< Returns the current amount of system time for the process
+      );
+
+      /// This static function will return the process' current user id number.
+      /// Not all operating systems support this feature. Where it is not
+      /// supported, the function should return 65536 as the value.
+      static int GetCurrentUserId();
+
+      /// This static function will return the process' current group id number.
+      /// Not all operating systems support this feature. Where it is not
+      /// supported, the function should return 65536 as the value.
+      static int GetCurrentGroupId();
+
+      /// This function makes the necessary calls to the operating system to
+      /// prevent core files or any other kind of large memory dumps that can
+      /// occur when a program fails.
+      /// @brief Prevent core file generation.
+      static void PreventCoreFiles();
+
+      /// This function determines if the standard input is connected directly
+      /// to a user's input (keyboard probably), rather than coming from a file
+      /// or pipe.
+      static bool StandardInIsUserInput();
+
+      /// This function determines if the standard output is connected to a
+      /// "tty" or "console" window. That is, the output would be displayed to
+      /// the user rather than being put on a pipe or stored in a file.
+      static bool StandardOutIsDisplayed();
+
+      /// This function determines if the standard error is connected to a
+      /// "tty" or "console" window. That is, the output would be displayed to
+      /// the user rather than being put on a pipe or stored in a file.
+      static bool StandardErrIsDisplayed();
+
+      /// This function determines if the given file descriptor is connected to
+      /// a "tty" or "console" window. That is, the output would be displayed to
+      /// the user rather than being put on a pipe or stored in a file.
+      static bool FileDescriptorIsDisplayed(int fd);
+
+      /// This function determines the number of columns in the window
+      /// if standard output is connected to a "tty" or "console"
+      /// window. If standard output is not connected to a tty or
+      /// console, or if the number of columns cannot be determined,
+      /// this routine returns zero.
+      static unsigned StandardOutColumns();
+
+      /// This function determines the number of columns in the window
+      /// if standard error is connected to a "tty" or "console"
+      /// window. If standard error is not connected to a tty or
+      /// console, or if the number of columns cannot be determined,
+      /// this routine returns zero.
+      static unsigned StandardErrColumns();
+
+      /// This function determines whether the terminal connected to standard
+      /// output supports colors. If standard output is not connected to a
+      /// terminal, this function returns false.
+      static bool StandardOutHasColors();
+
+      /// This function determines whether the terminal connected to standard
+      /// error supports colors. If standard error is not connected to a
+      /// terminal, this function returns false.
+      static bool StandardErrHasColors();
+
+      /// Whether changing colors requires the output to be flushed.
+      /// This is needed on systems that don't support escape sequences for
+      /// changing colors.
+      static bool ColorNeedsFlush();
+
+      /// This function returns the colorcode escape sequences.
+      /// If ColorNeedsFlush() is true then this function will change the colors
+      /// and return an empty escape sequence. In that case it is the
+      /// responsibility of the client to flush the output stream prior to
+      /// calling this function.
+      static const char *OutputColor(char c, bool bold, bool bg);
+
+      /// Same as OutputColor, but only enables the bold attribute.
+      static const char *OutputBold(bool bg);
+
+      /// Resets the terminals colors, or returns an escape sequence to do so.
+      static const char *ResetColor();
+    /// @}
+  };
+}
+}
+
+#endif
diff --git a/final/include/llvm/Support/Program.h b/final/include/llvm/Support/Program.h
new file mode 100644
index 00000000000..78a495ef210
--- /dev/null
+++ b/final/include/llvm/Support/Program.h
@@ -0,0 +1,157 @@
+//===- llvm/Support/Program.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::Program class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_PROGRAM_H
+#define LLVM_SYSTEM_PROGRAM_H
+
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+namespace sys {
+
+  // TODO: Add operations to communicate with the process, redirect its I/O,
+  // etc.
+
+  /// This class provides an abstraction for programs that are executable by the
+  /// operating system. It provides a platform generic way to find executable
+  /// programs from the path and to execute them in various ways. The sys::Path
+  /// class is used to specify the location of the Program.
+  /// @since 1.4
+  /// @brief An abstraction for finding and executing programs.
+  class Program {
+    /// Opaque handle for target specific data.
+    void *Data_;
+
+    // Noncopyable.
+    Program(const Program& other);
+    Program& operator=(const Program& other);
+
+    /// @name Methods
+    /// @{
+  public:
+
+    Program();
+    ~Program();
+
+    /// Return process ID of this program.
+    unsigned GetPid() const;
+
+    /// This function executes the program using the \p arguments provided.  The
+    /// invoked program will inherit the stdin, stdout, and stderr file
+    /// descriptors, the environment and other configuration settings of the
+    /// invoking program. If Path::executable() does not return true when this
+    /// function is called then a std::string is thrown.
+    /// @returns false in case of error, true otherwise.
+    /// @see FindProgramByName
+    /// @brief Executes the program with the given set of \p args.
+    bool Execute
+    ( const Path& path,  ///< sys::Path object providing the path of the
+      ///< program to be executed. It is presumed this is the result of
+      ///< the FindProgramByName method.
+      const char** args, ///< A vector of strings that are passed to the
+      ///< program.  The first element should be the name of the program.
+      ///< The list *must* be terminated by a null char* entry.
+      const char ** env = 0, ///< An optional vector of strings to use for
+      ///< the program's environment. If not provided, the current program's
+      ///< environment will be used.
+      const sys::Path** redirects = 0, ///< An optional array of pointers to
+      ///< Paths. If the array is null, no redirection is done. The array
+      ///< should have a size of at least three. If the pointer in the array
+      ///< are not null, then the inferior process's stdin(0), stdout(1),
+      ///< and stderr(2) will be redirected to the corresponding Paths.
+      ///< When an empty Path is passed in, the corresponding file
+      ///< descriptor will be disconnected (ie, /dev/null'd) in a portable
+      ///< way.
+      unsigned memoryLimit = 0, ///< If non-zero, this specifies max. amount
+      ///< of memory can be allocated by process. If memory usage will be
+      ///< higher limit, the child is killed and this call returns. If zero
+      ///< - no memory limit.
+      std::string* ErrMsg = 0 ///< If non-zero, provides a pointer to a string
+      ///< instance in which error messages will be returned. If the string
+      ///< is non-empty upon return an error occurred while invoking the
+      ///< program.
+      );
+
+    /// This function waits for the program to exit. This function will block
+    /// the current program until the invoked program exits.
+    /// @returns an integer result code indicating the status of the program.
+    /// A zero or positive value indicates the result code of the program. A
+    /// negative value is the signal number on which it terminated.
+    /// @see Execute
+    /// @brief Waits for the program to exit.
+    int Wait
+    ( const Path& path, ///< The path to the child process executable.
+      unsigned secondsToWait, ///< If non-zero, this specifies the amount
+      ///< of time to wait for the child process to exit. If the time
+      ///< expires, the child is killed and this call returns. If zero,
+      ///< this function will wait until the child finishes or forever if
+      ///< it doesn't.
+      std::string* ErrMsg ///< If non-zero, provides a pointer to a string
+      ///< instance in which error messages will be returned. If the string
+      ///< is non-empty upon return an error occurred while waiting.
+      );
+
+    /// This function terminates the program.
+    /// @returns true if an error occured.
+    /// @see Execute
+    /// @brief Terminates the program.
+    bool Kill
+    ( std::string* ErrMsg = 0 ///< If non-zero, provides a pointer to a string
+      ///< instance in which error messages will be returned. If the string
+      ///< is non-empty upon return an error occurred while killing the
+      ///< program.
+      );
+
+    /// This static constructor (factory) will attempt to locate a program in
+    /// the operating system's file system using some pre-determined set of
+    /// locations to search (e.g. the PATH on Unix). Paths with slashes are
+    /// returned unmodified.
+    /// @returns A Path object initialized to the path of the program or a
+    /// Path object that is empty (invalid) if the program could not be found.
+    /// @brief Construct a Program by finding it by name.
+    static Path FindProgramByName(const std::string& name);
+
+    // These methods change the specified standard stream (stdin,
+    // stdout, or stderr) to binary mode. They return true if an error
+    // occurred
+    static bool ChangeStdinToBinary();
+    static bool ChangeStdoutToBinary();
+    static bool ChangeStderrToBinary();
+
+    /// A convenience function equivalent to Program prg; prg.Execute(..);
+    /// prg.Wait(..);
+    /// @see Execute, Wait
+    static int ExecuteAndWait(const Path& path,
+                              const char** args,
+                              const char ** env = 0,
+                              const sys::Path** redirects = 0,
+                              unsigned secondsToWait = 0,
+                              unsigned memoryLimit = 0,
+                              std::string* ErrMsg = 0);
+
+    /// A convenience function equivalent to Program prg; prg.Execute(..);
+    /// @see Execute
+    static void ExecuteNoWait(const Path& path,
+                              const char** args,
+                              const char ** env = 0,
+                              const sys::Path** redirects = 0,
+                              unsigned memoryLimit = 0,
+                              std::string* ErrMsg = 0);
+
+    /// @}
+
+  };
+}
+}
+
+#endif
diff --git a/final/include/llvm/Support/RWMutex.h b/final/include/llvm/Support/RWMutex.h
new file mode 100644
index 00000000000..0d4cb81de39
--- /dev/null
+++ b/final/include/llvm/Support/RWMutex.h
@@ -0,0 +1,173 @@
+//===- RWMutex.h - Reader/Writer Mutual Exclusion Lock ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_RWMUTEX_H
+#define LLVM_SYSTEM_RWMUTEX_H
+
+#include "llvm/Support/Threading.h"
+#include <cassert>
+
+namespace llvm
+{
+  namespace sys
+  {
+    /// @brief Platform agnostic RWMutex class.
+    class RWMutexImpl
+    {
+    /// @name Constructors
+    /// @{
+    public:
+
+      /// Initializes the lock but doesn't acquire it.
+      /// @brief Default Constructor.
+      explicit RWMutexImpl();
+
+      /// Releases and removes the lock
+      /// @brief Destructor
+      ~RWMutexImpl();
+
+    /// @}
+    /// @name Methods
+    /// @{
+    public:
+
+      /// Attempts to unconditionally acquire the lock in reader mode. If the
+      /// lock is held by a writer, this method will wait until it can acquire
+      /// the lock.
+      /// @returns false if any kind of error occurs, true otherwise.
+      /// @brief Unconditionally acquire the lock in reader mode.
+      bool reader_acquire();
+
+      /// Attempts to release the lock in reader mode.
+      /// @returns false if any kind of error occurs, true otherwise.
+      /// @brief Unconditionally release the lock in reader mode.
+      bool reader_release();
+
+      /// Attempts to unconditionally acquire the lock in reader mode. If the
+      /// lock is held by any readers, this method will wait until it can
+      /// acquire the lock.
+      /// @returns false if any kind of error occurs, true otherwise.
+      /// @brief Unconditionally acquire the lock in writer mode.
+      bool writer_acquire();
+
+      /// Attempts to release the lock in writer mode.
+      /// @returns false if any kind of error occurs, true otherwise.
+      /// @brief Unconditionally release the lock in write mode.
+      bool writer_release();
+
+    //@}
+    /// @name Platform Dependent Data
+    /// @{
+    private:
+      void* data_; ///< We don't know what the data will be
+
+    /// @}
+    /// @name Do Not Implement
+    /// @{
+    private:
+      RWMutexImpl(const RWMutexImpl & original);
+      void operator=(const RWMutexImpl &);
+    /// @}
+    };
+
+    /// SmartMutex - An R/W mutex with a compile time constant parameter that
+    /// indicates whether this mutex should become a no-op when we're not
+    /// running in multithreaded mode.
+    template<bool mt_only>
+    class SmartRWMutex : public RWMutexImpl {
+      unsigned readers, writers;
+    public:
+      explicit SmartRWMutex() : RWMutexImpl(), readers(0), writers(0) { }
+
+      bool reader_acquire() {
+        if (!mt_only || llvm_is_multithreaded())
+          return RWMutexImpl::reader_acquire();
+
+        // Single-threaded debugging code.  This would be racy in multithreaded
+        // mode, but provides not sanity checks in single threaded mode.
+        ++readers;
+        return true;
+      }
+
+      bool reader_release() {
+        if (!mt_only || llvm_is_multithreaded())
+          return RWMutexImpl::reader_release();
+
+        // Single-threaded debugging code.  This would be racy in multithreaded
+        // mode, but provides not sanity checks in single threaded mode.
+        assert(readers > 0 && "Reader lock not acquired before release!");
+        --readers;
+        return true;
+      }
+
+      bool writer_acquire() {
+        if (!mt_only || llvm_is_multithreaded())
+          return RWMutexImpl::writer_acquire();
+
+        // Single-threaded debugging code.  This would be racy in multithreaded
+        // mode, but provides not sanity checks in single threaded mode.
+        assert(writers == 0 && "Writer lock already acquired!");
+        ++writers;
+        return true;
+      }
+
+      bool writer_release() {
+        if (!mt_only || llvm_is_multithreaded())
+          return RWMutexImpl::writer_release();
+
+        // Single-threaded debugging code.  This would be racy in multithreaded
+        // mode, but provides not sanity checks in single threaded mode.
+        assert(writers == 1 && "Writer lock not acquired before release!");
+        --writers;
+        return true;
+      }
+
+    private:
+      SmartRWMutex(const SmartRWMutex<mt_only> & original);
+      void operator=(const SmartRWMutex<mt_only> &);
+    };
+    typedef SmartRWMutex<false> RWMutex;
+
+    /// ScopedReader - RAII acquisition of a reader lock
+    template<bool mt_only>
+    struct SmartScopedReader {
+      SmartRWMutex<mt_only>& mutex;
+
+      explicit SmartScopedReader(SmartRWMutex<mt_only>& m) : mutex(m) {
+        mutex.reader_acquire();
+      }
+
+      ~SmartScopedReader() {
+        mutex.reader_release();
+      }
+    };
+    typedef SmartScopedReader<false> ScopedReader;
+
+    /// ScopedWriter - RAII acquisition of a writer lock
+    template<bool mt_only>
+    struct SmartScopedWriter {
+      SmartRWMutex<mt_only>& mutex;
+
+      explicit SmartScopedWriter(SmartRWMutex<mt_only>& m) : mutex(m) {
+        mutex.writer_acquire();
+      }
+
+      ~SmartScopedWriter() {
+        mutex.writer_release();
+      }
+    };
+    typedef SmartScopedWriter<false> ScopedWriter;
+  }
+}
+
+#endif
diff --git a/final/include/llvm/Support/Recycler.h b/final/include/llvm/Support/Recycler.h
new file mode 100644
index 00000000000..d8f8c789414
--- /dev/null
+++ b/final/include/llvm/Support/Recycler.h
@@ -0,0 +1,117 @@
+//==- llvm/Support/Recycler.h - Recycling Allocator --------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Recycler class template.  See the doxygen comment for
+// Recycler for more details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_RECYCLER_H
+#define LLVM_SUPPORT_RECYCLER_H
+
+#include "llvm/ADT/ilist.h"
+#include "llvm/Support/AlignOf.h"
+#include <cassert>
+
+namespace llvm {
+
+/// PrintRecyclingAllocatorStats - Helper for RecyclingAllocator for
+/// printing statistics.
+///
+void PrintRecyclerStats(size_t Size, size_t Align, size_t FreeListSize);
+
+/// RecyclerStruct - Implementation detail for Recycler. This is a
+/// class that the recycler imposes on free'd memory to carve out
+/// next/prev pointers.
+struct RecyclerStruct {
+  RecyclerStruct *Prev, *Next;
+};
+
+template<>
+struct ilist_traits<RecyclerStruct> :
+    public ilist_default_traits<RecyclerStruct> {
+  static RecyclerStruct *getPrev(const RecyclerStruct *t) { return t->Prev; }
+  static RecyclerStruct *getNext(const RecyclerStruct *t) { return t->Next; }
+  static void setPrev(RecyclerStruct *t, RecyclerStruct *p) { t->Prev = p; }
+  static void setNext(RecyclerStruct *t, RecyclerStruct *n) { t->Next = n; }
+
+  mutable RecyclerStruct Sentinel;
+  RecyclerStruct *createSentinel() const {
+    return &Sentinel;
+  }
+  static void destroySentinel(RecyclerStruct *) {}
+
+  RecyclerStruct *provideInitialHead() const { return createSentinel(); }
+  RecyclerStruct *ensureHead(RecyclerStruct*) const { return createSentinel(); }
+  static void noteHead(RecyclerStruct*, RecyclerStruct*) {}
+
+  static void deleteNode(RecyclerStruct *) {
+    assert(0 && "Recycler's ilist_traits shouldn't see a deleteNode call!");
+  }
+};
+
+/// Recycler - This class manages a linked-list of deallocated nodes
+/// and facilitates reusing deallocated memory in place of allocating
+/// new memory.
+///
+template<class T, size_t Size = sizeof(T), size_t Align = AlignOf<T>::Alignment>
+class Recycler {
+  /// FreeList - Doubly-linked list of nodes that have deleted contents and
+  /// are not in active use.
+  ///
+  iplist<RecyclerStruct> FreeList;
+
+public:
+  ~Recycler() {
+    // If this fails, either the callee has lost track of some allocation,
+    // or the callee isn't tracking allocations and should just call
+    // clear() before deleting the Recycler.
+    assert(FreeList.empty() && "Non-empty recycler deleted!");
+  }
+
+  /// clear - Release all the tracked allocations to the allocator. The
+  /// recycler must be free of any tracked allocations before being
+  /// deleted; calling clear is one way to ensure this.
+  template<class AllocatorType>
+  void clear(AllocatorType &Allocator) {
+    while (!FreeList.empty()) {
+      T *t = reinterpret_cast<T *>(FreeList.remove(FreeList.begin()));
+      Allocator.Deallocate(t);
+    }
+  }
+
+  template<class SubClass, class AllocatorType>
+  SubClass *Allocate(AllocatorType &Allocator) {
+    assert(sizeof(SubClass) <= Size &&
+           "Recycler allocation size is less than object size!");
+    assert(AlignOf<SubClass>::Alignment <= Align &&
+           "Recycler allocation alignment is less than object alignment!");
+    return !FreeList.empty() ?
+           reinterpret_cast<SubClass *>(FreeList.remove(FreeList.begin())) :
+           static_cast<SubClass *>(Allocator.Allocate(Size, Align));
+  }
+
+  template<class AllocatorType>
+  T *Allocate(AllocatorType &Allocator) {
+    return Allocate<T>(Allocator);
+  }
+
+  template<class SubClass, class AllocatorType>
+  void Deallocate(AllocatorType & /*Allocator*/, SubClass* Element) {
+    FreeList.push_front(reinterpret_cast<RecyclerStruct *>(Element));
+  }
+
+  void PrintStats() {
+    PrintRecyclerStats(Size, Align, FreeList.size());
+  }
+};
+
+}
+
+#endif
diff --git a/final/include/llvm/Support/RecyclingAllocator.h b/final/include/llvm/Support/RecyclingAllocator.h
new file mode 100644
index 00000000000..34ab874778c
--- /dev/null
+++ b/final/include/llvm/Support/RecyclingAllocator.h
@@ -0,0 +1,73 @@
+//==- llvm/Support/RecyclingAllocator.h - Recycling Allocator ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RecyclingAllocator class.  See the doxygen comment for
+// RecyclingAllocator for more details on the implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_RECYCLINGALLOCATOR_H
+#define LLVM_SUPPORT_RECYCLINGALLOCATOR_H
+
+#include "llvm/Support/Recycler.h"
+
+namespace llvm {
+
+/// RecyclingAllocator - This class wraps an Allocator, adding the
+/// functionality of recycling deleted objects.
+///
+template<class AllocatorType, class T,
+         size_t Size = sizeof(T), size_t Align = AlignOf<T>::Alignment>
+class RecyclingAllocator {
+private:
+  /// Base - Implementation details.
+  ///
+  Recycler<T, Size, Align> Base;
+
+  /// Allocator - The wrapped allocator.
+  ///
+  AllocatorType Allocator;
+
+public:
+  ~RecyclingAllocator() { Base.clear(Allocator); }
+
+  /// Allocate - Return a pointer to storage for an object of type
+  /// SubClass. The storage may be either newly allocated or recycled.
+  ///
+  template<class SubClass>
+  SubClass *Allocate() { return Base.template Allocate<SubClass>(Allocator); }
+
+  T *Allocate() { return Base.Allocate(Allocator); }
+
+  /// Deallocate - Release storage for the pointed-to object. The
+  /// storage will be kept track of and may be recycled.
+  ///
+  template<class SubClass>
+  void Deallocate(SubClass* E) { return Base.Deallocate(Allocator, E); }
+
+  void PrintStats() { Base.PrintStats(); }
+};
+
+}
+
+template<class AllocatorType, class T, size_t Size, size_t Align>
+inline void *operator new(size_t,
+                          llvm::RecyclingAllocator<AllocatorType,
+                                                   T, Size, Align> &Allocator) {
+  return Allocator.Allocate();
+}
+
+template<class AllocatorType, class T, size_t Size, size_t Align>
+inline void operator delete(void *E,
+                            llvm::RecyclingAllocator<AllocatorType,
+                                                     T, Size, Align> &A) {
+  A.Deallocate(E);
+}
+
+#endif
diff --git a/final/include/llvm/Support/Regex.h b/final/include/llvm/Support/Regex.h
new file mode 100644
index 00000000000..b46a66889e9
--- /dev/null
+++ b/final/include/llvm/Support/Regex.h
@@ -0,0 +1,81 @@
+//===-- Regex.h - Regular Expression matcher implementation -*- C++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a POSIX regular expression matcher.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_REGEX_H
+#define LLVM_SUPPORT_REGEX_H
+
+#include <string>
+
+struct llvm_regex;
+
+namespace llvm {
+  class StringRef;
+  template<typename T> class SmallVectorImpl;
+
+  class Regex {
+  public:
+    enum {
+      NoFlags=0,
+      /// Compile for matching that ignores upper/lower case distinctions.
+      IgnoreCase=1,
+      /// Compile for newline-sensitive matching. With this flag '[^' bracket
+      /// expressions and '.' never match newline. A ^ anchor matches the
+      /// null string after any newline in the string in addition to its normal
+      /// function, and the $ anchor matches the null string before any
+      /// newline in the string in addition to its normal function.
+      Newline=2
+    };
+
+    /// Compiles the given POSIX Extended Regular Expression \arg Regex.
+    /// This implementation supports regexes and matching strings with embedded
+    /// NUL characters.
+    Regex(StringRef Regex, unsigned Flags = NoFlags);
+    ~Regex();
+
+    /// isValid - returns the error encountered during regex compilation, or
+    /// matching, if any.
+    bool isValid(std::string &Error);
+
+    /// getNumMatches - In a valid regex, return the number of parenthesized
+    /// matches it contains.  The number filled in by match will include this
+    /// many entries plus one for the whole regex (as element 0).
+    unsigned getNumMatches() const;
+
+    /// matches - Match the regex against a given \arg String.
+    ///
+    /// \param Matches - If given, on a succesful match this will be filled in
+    /// with references to the matched group expressions (inside \arg String),
+    /// the first group is always the entire pattern.
+    ///
+    /// This returns true on a successful match.
+    bool match(StringRef String, SmallVectorImpl<StringRef> *Matches = 0);
+
+    /// sub - Return the result of replacing the first match of the regex in
+    /// \arg String with the \arg Repl string. Backreferences like "\0" in the
+    /// replacement string are replaced with the appropriate match substring.
+    ///
+    /// Note that the replacement string has backslash escaping performed on
+    /// it. Invalid backreferences are ignored (replaced by empty strings).
+    ///
+    /// \param Error If non-null, any errors in the substitution (invalid
+    /// backreferences, trailing backslashes) will be recorded as a non-empty
+    /// string.
+    std::string sub(StringRef Repl, StringRef String, std::string *Error = 0);
+
+  private:
+    struct llvm_regex *preg;
+    int error;
+  };
+}
+
+#endif // LLVM_SUPPORT_REGEX_H
diff --git a/final/include/llvm/Support/Registry.h b/final/include/llvm/Support/Registry.h
new file mode 100644
index 00000000000..d0375bedd9f
--- /dev/null
+++ b/final/include/llvm/Support/Registry.h
@@ -0,0 +1,223 @@
+//=== Registry.h - Linker-supported plugin registries -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines a registry template for discovering pluggable modules.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_REGISTRY_H
+#define LLVM_SUPPORT_REGISTRY_H
+
+namespace llvm {
+  /// A simple registry entry which provides only a name, description, and
+  /// no-argument constructor.
+  template <typename T>
+  class SimpleRegistryEntry {
+    const char *Name, *Desc;
+    T *(*Ctor)();
+
+  public:
+    SimpleRegistryEntry(const char *N, const char *D, T *(*C)())
+      : Name(N), Desc(D), Ctor(C)
+    {}
+
+    const char *getName() const { return Name; }
+    const char *getDesc() const { return Desc; }
+    T *instantiate() const { return Ctor(); }
+  };
+
+
+  /// Traits for registry entries. If using other than SimpleRegistryEntry, it
+  /// is necessary to define an alternate traits class.
+  template <typename T>
+  class RegistryTraits {
+    RegistryTraits(); // Do not implement.
+
+  public:
+    typedef SimpleRegistryEntry<T> entry;
+
+    /// nameof/descof - Accessors for name and description of entries. These are
+    //                  used to generate help for command-line options.
+    static const char *nameof(const entry &Entry) { return Entry.getName(); }
+    static const char *descof(const entry &Entry) { return Entry.getDesc(); }
+  };
+
+
+  /// A global registry used in conjunction with static constructors to make
+  /// pluggable components (like targets or garbage collectors) "just work" when
+  /// linked with an executable.
+  template <typename T, typename U = RegistryTraits<T> >
+  class Registry {
+  public:
+    typedef U traits;
+    typedef typename U::entry entry;
+
+    class node;
+    class listener;
+    class iterator;
+
+  private:
+    Registry(); // Do not implement.
+
+    static void Announce(const entry &E) {
+      for (listener *Cur = ListenerHead; Cur; Cur = Cur->Next)
+        Cur->registered(E);
+    }
+
+    friend class node;
+    static node *Head, *Tail;
+
+    friend class listener;
+    static listener *ListenerHead, *ListenerTail;
+
+  public:
+    /// Node in linked list of entries.
+    ///
+    class node {
+      friend class iterator;
+
+      node *Next;
+      const entry& Val;
+
+    public:
+      node(const entry& V) : Next(0), Val(V) {
+        if (Tail)
+          Tail->Next = this;
+        else
+          Head = this;
+        Tail = this;
+
+        Announce(V);
+      }
+    };
+
+
+    /// Iterators for registry entries.
+    ///
+    class iterator {
+      const node *Cur;
+
+    public:
+      explicit iterator(const node *N) : Cur(N) {}
+
+      bool operator==(const iterator &That) const { return Cur == That.Cur; }
+      bool operator!=(const iterator &That) const { return Cur != That.Cur; }
+      iterator &operator++() { Cur = Cur->Next; return *this; }
+      const entry &operator*() const { return Cur->Val; }
+      const entry *operator->() const { return &Cur->Val; }
+    };
+
+    static iterator begin() { return iterator(Head); }
+    static iterator end()   { return iterator(0); }
+
+
+    /// Abstract base class for registry listeners, which are informed when new
+    /// entries are added to the registry. Simply subclass and instantiate:
+    ///
+    ///   class CollectorPrinter : public Registry<Collector>::listener {
+    ///   protected:
+    ///     void registered(const Registry<Collector>::entry &e) {
+    ///       cerr << "collector now available: " << e->getName() << "\n";
+    ///     }
+    ///
+    ///   public:
+    ///     CollectorPrinter() { init(); }  // Print those already registered.
+    ///   };
+    ///
+    ///   CollectorPrinter Printer;
+    ///
+    class listener {
+      listener *Prev, *Next;
+
+      friend void Registry::Announce(const entry &E);
+
+    protected:
+      /// Called when an entry is added to the registry.
+      ///
+      virtual void registered(const entry &) = 0;
+
+      /// Calls 'registered' for each pre-existing entry.
+      ///
+      void init() {
+        for (iterator I = begin(), E = end(); I != E; ++I)
+          registered(*I);
+      }
+
+    public:
+      listener() : Prev(ListenerTail), Next(0) {
+        if (Prev)
+          Prev->Next = this;
+        else
+          ListenerHead = this;
+        ListenerTail = this;
+      }
+
+      virtual ~listener() {
+        if (Next)
+          Next->Prev = Prev;
+        else
+          ListenerTail = Prev;
+        if (Prev)
+          Prev->Next = Next;
+        else
+          ListenerHead = Next;
+      }
+    };
+
+
+    /// A static registration template. Use like such:
+    ///
+    ///   Registry<Collector>::Add<FancyGC>
+    ///   X("fancy-gc", "Newfangled garbage collector.");
+    ///
+    /// Use of this template requires that:
+    ///
+    ///  1. The registered subclass has a default constructor.
+    //
+    ///  2. The registry entry type has a constructor compatible with this
+    ///     signature:
+    ///
+    ///       entry(const char *Name, const char *ShortDesc, T *(*Ctor)());
+    ///
+    /// If you have more elaborate requirements, then copy and modify.
+    ///
+    template <typename V>
+    class Add {
+      entry Entry;
+      node Node;
+
+      static T *CtorFn() { return new V(); }
+
+    public:
+      Add(const char *Name, const char *Desc)
+        : Entry(Name, Desc, CtorFn), Node(Entry) {}
+    };
+
+    /// Registry::Parser now lives in llvm/Support/RegistryParser.h.
+
+  };
+
+  // Since these are defined in a header file, plugins must be sure to export
+  // these symbols.
+
+  template <typename T, typename U>
+  typename Registry<T,U>::node *Registry<T,U>::Head;
+
+  template <typename T, typename U>
+  typename Registry<T,U>::node *Registry<T,U>::Tail;
+
+  template <typename T, typename U>
+  typename Registry<T,U>::listener *Registry<T,U>::ListenerHead;
+
+  template <typename T, typename U>
+  typename Registry<T,U>::listener *Registry<T,U>::ListenerTail;
+
+}
+
+#endif
diff --git a/final/include/llvm/Support/RegistryParser.h b/final/include/llvm/Support/RegistryParser.h
new file mode 100644
index 00000000000..2cc578370fe
--- /dev/null
+++ b/final/include/llvm/Support/RegistryParser.h
@@ -0,0 +1,55 @@
+//=== RegistryParser.h - Linker-supported plugin registries -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines a command-line parser for a registry.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_REGISTRY_PARSER_H
+#define LLVM_SUPPORT_REGISTRY_PARSER_H
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Registry.h"
+
+namespace llvm {
+
+  /// A command-line parser for a registry. Use like such:
+  ///
+  ///   static cl::opt<Registry<Collector>::entry, false,
+  ///                  RegistryParser<Collector> >
+  ///   GCOpt("gc", cl::desc("Garbage collector to use."),
+  ///               cl::value_desc());
+  ///
+  /// To make use of the value:
+  ///
+  ///   Collector *TheCollector = GCOpt->instantiate();
+  ///
+  template <typename T, typename U = RegistryTraits<T> >
+  class RegistryParser :
+  public cl::parser<const typename U::entry*>,
+    public Registry<T, U>::listener {
+    typedef U traits;
+    typedef typename U::entry entry;
+    typedef typename Registry<T, U>::listener listener;
+
+  protected:
+    void registered(const entry &E) {
+      addLiteralOption(traits::nameof(E), &E, traits::descof(E));
+    }
+
+  public:
+    void initialize(cl::Option &O) {
+      listener::init();
+      cl::parser<const typename U::entry*>::initialize(O);
+    }
+  };
+
+}
+
+#endif // LLVM_SUPPORT_REGISTRY_PARSER_H
diff --git a/final/include/llvm/Support/SMLoc.h b/final/include/llvm/Support/SMLoc.h
new file mode 100644
index 00000000000..967bf1432c6
--- /dev/null
+++ b/final/include/llvm/Support/SMLoc.h
@@ -0,0 +1,44 @@
+//===- SMLoc.h - Source location for use with diagnostics -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SMLoc class.  This class encapsulates a location in
+// source code for use in diagnostics.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUPPORT_SMLOC_H
+#define SUPPORT_SMLOC_H
+
+namespace llvm {
+
+// SMLoc - Represents a location in source code.
+class SMLoc {    
+  const char *Ptr;
+public:
+  SMLoc() : Ptr(0) {}
+  SMLoc(const SMLoc &RHS) : Ptr(RHS.Ptr) {}
+    
+  bool isValid() const { return Ptr != 0; }
+    
+  bool operator==(const SMLoc &RHS) const { return RHS.Ptr == Ptr; }
+  bool operator!=(const SMLoc &RHS) const { return RHS.Ptr != Ptr; }
+    
+  const char *getPointer() const { return Ptr; }
+    
+  static SMLoc getFromPointer(const char *Ptr) {
+    SMLoc L;
+    L.Ptr = Ptr;
+    return L;
+  }
+};
+
+}
+
+#endif
+
diff --git a/final/include/llvm/Support/Signals.h b/final/include/llvm/Support/Signals.h
new file mode 100644
index 00000000000..9a84df68ddb
--- /dev/null
+++ b/final/include/llvm/Support/Signals.h
@@ -0,0 +1,59 @@
+//===- llvm/Support/Signals.h - Signal Handling support ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// unix signals occuring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_SIGNALS_H
+#define LLVM_SYSTEM_SIGNALS_H
+
+#include "llvm/Support/Path.h"
+
+namespace llvm {
+namespace sys {
+
+  /// This function runs all the registered interrupt handlers, including the
+  /// removal of files registered by RemoveFileOnSignal.
+  void RunInterruptHandlers();
+
+  /// This function registers signal handlers to ensure that if a signal gets
+  /// delivered that the named file is removed.
+  /// @brief Remove a file if a fatal signal occurs.
+  bool RemoveFileOnSignal(const Path &Filename, std::string* ErrMsg = 0);
+
+  /// This function removes a file from the list of files to be removed on
+  /// signal delivery.
+  void DontRemoveFileOnSignal(const Path &Filename);
+
+  /// When an error signal (such as SIBABRT or SIGSEGV) is delivered to the
+  /// process, print a stack trace and then exit.
+  /// @brief Print a stack trace if a fatal signal occurs.
+  void PrintStackTraceOnErrorSignal();
+
+  /// AddSignalHandler - Add a function to be called when an abort/kill signal
+  /// is delivered to the process.  The handler can have a cookie passed to it
+  /// to identify what instance of the handler it is.
+  void AddSignalHandler(void (*FnPtr)(void *), void *Cookie);
+
+  /// This function registers a function to be called when the user "interrupts"
+  /// the program (typically by pressing ctrl-c).  When the user interrupts the
+  /// program, the specified interrupt function is called instead of the program
+  /// being killed, and the interrupt function automatically disabled.  Note
+  /// that interrupt functions are not allowed to call any non-reentrant
+  /// functions.  An null interrupt function pointer disables the current
+  /// installed function.  Note also that the handler may be executed on a
+  /// different thread on some platforms.
+  /// @brief Register a function to be called when ctrl-c is pressed.
+  void SetInterruptFunction(void (*IF)());
+} // End sys namespace
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/Solaris.h b/final/include/llvm/Support/Solaris.h
new file mode 100644
index 00000000000..57eee2cb497
--- /dev/null
+++ b/final/include/llvm/Support/Solaris.h
@@ -0,0 +1,40 @@
+/*===- llvm/Support/Solaris.h ------------------------------------*- C++ -*-===*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file contains portability fixes for Solaris hosts.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_SYSTEM_SOLARIS_H
+#define LLVM_SYSTEM_SOLARIS_H
+
+#include <sys/types.h>
+#include <sys/regset.h>
+
+#undef CS
+#undef DS
+#undef ES
+#undef FS
+#undef GS
+#undef SS
+#undef EAX
+#undef ECX
+#undef EDX
+#undef EBX
+#undef ESP
+#undef EBP
+#undef ESI
+#undef EDI
+#undef EIP
+#undef UESP
+#undef EFL
+#undef ERR
+#undef TRAPNO
+
+#endif
diff --git a/final/include/llvm/Support/SourceMgr.h b/final/include/llvm/Support/SourceMgr.h
new file mode 100644
index 00000000000..2a712e44bd1
--- /dev/null
+++ b/final/include/llvm/Support/SourceMgr.h
@@ -0,0 +1,185 @@
+//===- SourceMgr.h - Manager for Source Buffers & Diagnostics ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SMDiagnostic and SourceMgr classes.  This
+// provides a simple substrate for diagnostics, #include handling, and other low
+// level things for simple parsers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUPPORT_SOURCEMGR_H
+#define SUPPORT_SOURCEMGR_H
+
+#include "llvm/Support/SMLoc.h"
+
+#include <string>
+#include <vector>
+#include <cassert>
+
+namespace llvm {
+  class MemoryBuffer;
+  class SourceMgr;
+  class SMDiagnostic;
+  class Twine;
+  class raw_ostream;
+
+/// SourceMgr - This owns the files read by a parser, handles include stacks,
+/// and handles diagnostic wrangling.
+class SourceMgr {
+public:
+  /// DiagHandlerTy - Clients that want to handle their own diagnostics in a
+  /// custom way can register a function pointer+context as a diagnostic
+  /// handler.  It gets called each time PrintMessage is invoked.
+  typedef void (*DiagHandlerTy)(const SMDiagnostic&, void *Context);
+private:
+  struct SrcBuffer {
+    /// Buffer - The memory buffer for the file.
+    MemoryBuffer *Buffer;
+
+    /// IncludeLoc - This is the location of the parent include, or null if at
+    /// the top level.
+    SMLoc IncludeLoc;
+  };
+
+  /// Buffers - This is all of the buffers that we are reading from.
+  std::vector<SrcBuffer> Buffers;
+
+  // IncludeDirectories - This is the list of directories we should search for
+  // include files in.
+  std::vector<std::string> IncludeDirectories;
+
+  /// LineNoCache - This is a cache for line number queries, its implementation
+  /// is really private to SourceMgr.cpp.
+  mutable void *LineNoCache;
+
+  DiagHandlerTy DiagHandler;
+  void *DiagContext;
+  
+  SourceMgr(const SourceMgr&);    // DO NOT IMPLEMENT
+  void operator=(const SourceMgr&); // DO NOT IMPLEMENT
+public:
+  SourceMgr() : LineNoCache(0), DiagHandler(0), DiagContext(0) {}
+  ~SourceMgr();
+
+  void setIncludeDirs(const std::vector<std::string> &Dirs) {
+    IncludeDirectories = Dirs;
+  }
+
+  /// setDiagHandler - Specify a diagnostic handler to be invoked every time
+  /// PrintMessage is called. Ctx is passed into the handler when it is invoked.
+  void setDiagHandler(DiagHandlerTy DH, void *Ctx = 0) {
+    DiagHandler = DH;
+    DiagContext = Ctx;
+  }
+
+  const SrcBuffer &getBufferInfo(unsigned i) const {
+    assert(i < Buffers.size() && "Invalid Buffer ID!");
+    return Buffers[i];
+  }
+
+  const MemoryBuffer *getMemoryBuffer(unsigned i) const {
+    assert(i < Buffers.size() && "Invalid Buffer ID!");
+    return Buffers[i].Buffer;
+  }
+
+  SMLoc getParentIncludeLoc(unsigned i) const {
+    assert(i < Buffers.size() && "Invalid Buffer ID!");
+    return Buffers[i].IncludeLoc;
+  }
+
+  /// AddNewSourceBuffer - Add a new source buffer to this source manager.  This
+  /// takes ownership of the memory buffer.
+  unsigned AddNewSourceBuffer(MemoryBuffer *F, SMLoc IncludeLoc) {
+    SrcBuffer NB;
+    NB.Buffer = F;
+    NB.IncludeLoc = IncludeLoc;
+    Buffers.push_back(NB);
+    return Buffers.size()-1;
+  }
+
+  /// AddIncludeFile - Search for a file with the specified name in the current
+  /// directory or in one of the IncludeDirs.  If no file is found, this returns
+  /// ~0, otherwise it returns the buffer ID of the stacked file.
+  unsigned AddIncludeFile(const std::string &Filename, SMLoc IncludeLoc);
+
+  /// FindBufferContainingLoc - Return the ID of the buffer containing the
+  /// specified location, returning -1 if not found.
+  int FindBufferContainingLoc(SMLoc Loc) const;
+
+  /// FindLineNumber - Find the line number for the specified location in the
+  /// specified file.  This is not a fast method.
+  unsigned FindLineNumber(SMLoc Loc, int BufferID = -1) const;
+
+  /// PrintMessage - Emit a message about the specified location with the
+  /// specified string.
+  ///
+  /// @param Type - If non-null, the kind of message (e.g., "error") which is
+  /// prefixed to the message.
+  /// @param ShowLine - Should the diagnostic show the source line.
+  void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type,
+                    bool ShowLine = true) const;
+
+
+  /// GetMessage - Return an SMDiagnostic at the specified location with the
+  /// specified string.
+  ///
+  /// @param Type - If non-null, the kind of message (e.g., "error") which is
+  /// prefixed to the message.
+  /// @param ShowLine - Should the diagnostic show the source line.
+  SMDiagnostic GetMessage(SMLoc Loc,
+                          const Twine &Msg, const char *Type,
+                          bool ShowLine = true) const;
+
+
+private:
+  void PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const;
+};
+
+
+/// SMDiagnostic - Instances of this class encapsulate one diagnostic report,
+/// allowing printing to a raw_ostream as a caret diagnostic.
+class SMDiagnostic {
+  const SourceMgr *SM;
+  SMLoc Loc;
+  std::string Filename;
+  int LineNo, ColumnNo;
+  std::string Message, LineContents;
+  unsigned ShowLine : 1;
+
+public:
+  // Null diagnostic.
+  SMDiagnostic() : SM(0), LineNo(0), ColumnNo(0), ShowLine(0) {}
+  // Diagnostic with no location (e.g. file not found, command line arg error).
+  SMDiagnostic(const std::string &filename, const std::string &Msg)
+    : SM(0), Filename(filename), LineNo(-1), ColumnNo(-1),
+      Message(Msg), ShowLine(false) {}
+  
+  // Diagnostic with a location.
+  SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN,
+               int Line, int Col,
+               const std::string &Msg, const std::string &LineStr,
+               bool showline = true)
+    : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Message(Msg),
+      LineContents(LineStr), ShowLine(showline) {}
+
+  const SourceMgr *getSourceMgr() const { return SM; }
+  SMLoc getLoc() const { return Loc; }
+  const std::string &getFilename() const { return Filename; }
+  int getLineNo() const { return LineNo; }
+  int getColumnNo() const { return ColumnNo; }
+  const std::string &getMessage() const { return Message; }
+  const std::string &getLineContents() const { return LineContents; }
+  bool getShowLine() const { return ShowLine; }
+  
+  void Print(const char *ProgName, raw_ostream &S) const;
+};
+
+}  // end llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/StandardPasses.h b/final/include/llvm/Support/StandardPasses.h
new file mode 100644
index 00000000000..d774faf3864
--- /dev/null
+++ b/final/include/llvm/Support/StandardPasses.h
@@ -0,0 +1,242 @@
+//===-- llvm/Support/StandardPasses.h - Standard pass lists -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines utility functions for creating a "standard" set of
+// optimization passes, so that compilers and tools which use optimization
+// passes use the same set of standard passes.
+//
+// These are implemented as inline functions so that we do not have to worry
+// about link issues.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_STANDARDPASSES_H
+#define LLVM_SUPPORT_STANDARDPASSES_H
+
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/IPO.h"
+
+namespace llvm {
+
+  static inline void createStandardAliasAnalysisPasses(PassManagerBase *PM) {
+    // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
+    // BasicAliasAnalysis wins if they disagree. This is intended to help
+    // support "obvious" type-punning idioms.
+    PM->add(createTypeBasedAliasAnalysisPass());
+    PM->add(createBasicAliasAnalysisPass());
+  }
+
+  /// createStandardFunctionPasses - Add the standard list of function passes to
+  /// the provided pass manager.
+  ///
+  /// \arg OptimizationLevel - The optimization level, corresponding to -O0,
+  /// -O1, etc.
+  static inline void createStandardFunctionPasses(PassManagerBase *PM,
+                                                  unsigned OptimizationLevel) {
+    if (OptimizationLevel > 0) {
+      createStandardAliasAnalysisPasses(PM);
+      PM->add(createCFGSimplificationPass());
+      PM->add(createScalarReplAggregatesPass());
+      PM->add(createEarlyCSEPass());
+    }
+  }
+
+  /// createStandardModulePasses - Add the standard list of module passes to the
+  /// provided pass manager.
+  ///
+  /// \arg OptimizationLevel - The optimization level, corresponding to -O0,
+  /// -O1, etc.
+  /// \arg OptimizeSize - Whether the transformations should optimize for size.
+  /// \arg UnitAtATime - Allow passes which may make global module changes.
+  /// \arg UnrollLoops - Allow loop unrolling.
+  /// \arg SimplifyLibCalls - Allow library calls to be simplified.
+  /// \arg HaveExceptions - Whether the module may have code using exceptions.
+  /// \arg InliningPass - The inlining pass to use, if any, or null. This will
+  /// always be added, even at -O0.a
+  static inline void createStandardModulePasses(PassManagerBase *PM,
+                                                unsigned OptimizationLevel,
+                                                bool OptimizeSize,
+                                                bool UnitAtATime,
+                                                bool UnrollLoops,
+                                                bool SimplifyLibCalls,
+                                                bool HaveExceptions,
+                                                Pass *InliningPass) {
+    createStandardAliasAnalysisPasses(PM);
+
+    if (OptimizationLevel == 0) {
+      if (InliningPass)
+        PM->add(InliningPass);
+      return;
+    }
+    
+    if (UnitAtATime) {
+      PM->add(createGlobalOptimizerPass());     // Optimize out global vars
+      
+      PM->add(createIPSCCPPass());              // IP SCCP
+      PM->add(createDeadArgEliminationPass());  // Dead argument elimination
+    }
+    PM->add(createInstructionCombiningPass());  // Clean up after IPCP & DAE
+    PM->add(createCFGSimplificationPass());     // Clean up after IPCP & DAE
+    
+    // Start of CallGraph SCC passes.
+    if (UnitAtATime && HaveExceptions)
+      PM->add(createPruneEHPass());             // Remove dead EH info
+    if (InliningPass)
+      PM->add(InliningPass);
+    if (UnitAtATime)
+      PM->add(createFunctionAttrsPass());       // Set readonly/readnone attrs
+    if (OptimizationLevel > 2)
+      PM->add(createArgumentPromotionPass());   // Scalarize uninlined fn args
+    
+    // Start of function pass.
+    // Break up aggregate allocas, using SSAUpdater.
+    PM->add(createScalarReplAggregatesPass(-1, false));
+    PM->add(createEarlyCSEPass());              // Catch trivial redundancies
+    if (SimplifyLibCalls)
+      PM->add(createSimplifyLibCallsPass());    // Library Call Optimizations
+    PM->add(createJumpThreadingPass());         // Thread jumps.
+    PM->add(createCorrelatedValuePropagationPass()); // Propagate conditionals
+    PM->add(createCFGSimplificationPass());     // Merge & remove BBs
+    PM->add(createInstructionCombiningPass());  // Combine silly seq's
+    
+    PM->add(createTailCallEliminationPass());   // Eliminate tail calls
+    PM->add(createCFGSimplificationPass());     // Merge & remove BBs
+    PM->add(createReassociatePass());           // Reassociate expressions
+    PM->add(createLoopRotatePass());            // Rotate Loop
+    PM->add(createLICMPass());                  // Hoist loop invariants
+    PM->add(createLoopUnswitchPass(OptimizeSize || OptimizationLevel < 3));
+    PM->add(createInstructionCombiningPass());  
+    PM->add(createIndVarSimplifyPass());        // Canonicalize indvars
+    PM->add(createLoopIdiomPass());             // Recognize idioms like memset.
+    PM->add(createLoopDeletionPass());          // Delete dead loops
+    if (UnrollLoops)
+      PM->add(createLoopUnrollPass());          // Unroll small loops
+    PM->add(createInstructionCombiningPass());  // Clean up after the unroller
+    if (OptimizationLevel > 1)
+      PM->add(createGVNPass());                 // Remove redundancies
+    PM->add(createMemCpyOptPass());             // Remove memcpy / form memset
+    PM->add(createSCCPPass());                  // Constant prop with SCCP
+  
+    // Run instcombine after redundancy elimination to exploit opportunities
+    // opened up by them.
+    PM->add(createInstructionCombiningPass());
+    PM->add(createJumpThreadingPass());         // Thread jumps
+    PM->add(createCorrelatedValuePropagationPass());
+    PM->add(createDeadStoreEliminationPass());  // Delete dead stores
+    PM->add(createAggressiveDCEPass());         // Delete dead instructions
+    PM->add(createCFGSimplificationPass());     // Merge & remove BBs
+
+    if (UnitAtATime) {
+      PM->add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
+      PM->add(createDeadTypeEliminationPass()); // Eliminate dead types
+
+      // GlobalOpt already deletes dead functions and globals, at -O3 try a
+      // late pass of GlobalDCE.  It is capable of deleting dead cycles.
+      if (OptimizationLevel > 2)
+        PM->add(createGlobalDCEPass());         // Remove dead fns and globals.
+    
+      if (OptimizationLevel > 1)
+        PM->add(createConstantMergePass());       // Merge dup global constants
+    }
+  }
+
+  static inline void addOnePass(PassManagerBase *PM, Pass *P, bool AndVerify) {
+    PM->add(P);
+
+    if (AndVerify)
+      PM->add(createVerifierPass());
+  }
+
+  /// createStandardLTOPasses - Add the standard list of module passes suitable
+  /// for link time optimization.
+  ///
+  /// Internalize - Run the internalize pass.
+  /// RunInliner - Use a function inlining pass.
+  /// VerifyEach - Run the verifier after each pass.
+  static inline void createStandardLTOPasses(PassManagerBase *PM,
+                                             bool Internalize,
+                                             bool RunInliner,
+                                             bool VerifyEach) {
+    // Provide AliasAnalysis services for optimizations.
+    createStandardAliasAnalysisPasses(PM);
+
+    // Now that composite has been compiled, scan through the module, looking
+    // for a main function.  If main is defined, mark all other functions
+    // internal.
+    if (Internalize)
+      addOnePass(PM, createInternalizePass(true), VerifyEach);
+
+    // Propagate constants at call sites into the functions they call.  This
+    // opens opportunities for globalopt (and inlining) by substituting function
+    // pointers passed as arguments to direct uses of functions.  
+    addOnePass(PM, createIPSCCPPass(), VerifyEach);
+
+    // Now that we internalized some globals, see if we can hack on them!
+    addOnePass(PM, createGlobalOptimizerPass(), VerifyEach);
+    
+    // Linking modules together can lead to duplicated global constants, only
+    // keep one copy of each constant...
+    addOnePass(PM, createConstantMergePass(), VerifyEach);
+    
+    // Remove unused arguments from functions...
+    addOnePass(PM, createDeadArgEliminationPass(), VerifyEach);
+
+    // Reduce the code after globalopt and ipsccp.  Both can open up significant
+    // simplification opportunities, and both can propagate functions through
+    // function pointers.  When this happens, we often have to resolve varargs
+    // calls, etc, so let instcombine do this.
+    addOnePass(PM, createInstructionCombiningPass(), VerifyEach);
+
+    // Inline small functions
+    if (RunInliner)
+      addOnePass(PM, createFunctionInliningPass(), VerifyEach);
+
+    addOnePass(PM, createPruneEHPass(), VerifyEach);   // Remove dead EH info.
+    // Optimize globals again if we ran the inliner.
+    if (RunInliner)
+      addOnePass(PM, createGlobalOptimizerPass(), VerifyEach);
+    addOnePass(PM, createGlobalDCEPass(), VerifyEach); // Remove dead functions.
+
+    // If we didn't decide to inline a function, check to see if we can
+    // transform it to pass arguments by value instead of by reference.
+    addOnePass(PM, createArgumentPromotionPass(), VerifyEach);
+
+    // The IPO passes may leave cruft around.  Clean up after them.
+    addOnePass(PM, createInstructionCombiningPass(), VerifyEach);
+    addOnePass(PM, createJumpThreadingPass(), VerifyEach);
+    // Break up allocas
+    addOnePass(PM, createScalarReplAggregatesPass(), VerifyEach);
+
+    // Run a few AA driven optimizations here and now, to cleanup the code.
+    addOnePass(PM, createFunctionAttrsPass(), VerifyEach); // Add nocapture.
+    addOnePass(PM, createGlobalsModRefPass(), VerifyEach); // IP alias analysis.
+
+    addOnePass(PM, createLICMPass(), VerifyEach);      // Hoist loop invariants.
+    addOnePass(PM, createGVNPass(), VerifyEach);       // Remove redundancies.
+    addOnePass(PM, createMemCpyOptPass(), VerifyEach); // Remove dead memcpys.
+    // Nuke dead stores.
+    addOnePass(PM, createDeadStoreEliminationPass(), VerifyEach);
+
+    // Cleanup and simplify the code after the scalar optimizations.
+    addOnePass(PM, createInstructionCombiningPass(), VerifyEach);
+
+    addOnePass(PM, createJumpThreadingPass(), VerifyEach);
+    
+    // Delete basic blocks, which optimization passes may have killed.
+    addOnePass(PM, createCFGSimplificationPass(), VerifyEach);
+
+    // Now that we have optimized the program, discard unreachable functions.
+    addOnePass(PM, createGlobalDCEPass(), VerifyEach);
+  }
+}
+
+#endif
diff --git a/final/include/llvm/Support/StringPool.h b/final/include/llvm/Support/StringPool.h
new file mode 100644
index 00000000000..de05e0b547a
--- /dev/null
+++ b/final/include/llvm/Support/StringPool.h
@@ -0,0 +1,139 @@
+//===-- StringPool.h - Interned string pool ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares an interned string pool, which helps reduce the cost of
+// strings by using the same storage for identical strings.
+//
+// To intern a string:
+//
+//   StringPool Pool;
+//   PooledStringPtr Str = Pool.intern("wakka wakka");
+//
+// To use the value of an interned string, use operator bool and operator*:
+//
+//   if (Str)
+//     cerr << "the string is" << *Str << "\n";
+//
+// Pooled strings are immutable, but you can change a PooledStringPtr to point
+// to another instance. So that interned strings can eventually be freed,
+// strings in the string pool are reference-counted (automatically).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_STRINGPOOL_H
+#define LLVM_SUPPORT_STRINGPOOL_H
+
+#include "llvm/ADT/StringMap.h"
+#include <new>
+#include <cassert>
+
+namespace llvm {
+
+  class PooledStringPtr;
+
+  /// StringPool - An interned string pool. Use the intern method to add a
+  /// string. Strings are removed automatically as PooledStringPtrs are
+  /// destroyed.
+  class StringPool {
+    /// PooledString - This is the value of an entry in the pool's interning
+    /// table.
+    struct PooledString {
+      StringPool *Pool;  ///< So the string can remove itself.
+      unsigned Refcount; ///< Number of referencing PooledStringPtrs.
+
+    public:
+      PooledString() : Pool(0), Refcount(0) { }
+    };
+
+    friend class PooledStringPtr;
+
+    typedef StringMap<PooledString> table_t;
+    typedef StringMapEntry<PooledString> entry_t;
+    table_t InternTable;
+
+  public:
+    StringPool();
+    ~StringPool();
+
+    /// intern - Adds a string to the pool and returns a reference-counted
+    /// pointer to it. No additional memory is allocated if the string already
+    /// exists in the pool.
+    PooledStringPtr intern(StringRef Str);
+
+    /// empty - Checks whether the pool is empty. Returns true if so.
+    ///
+    inline bool empty() const { return InternTable.empty(); }
+  };
+
+  /// PooledStringPtr - A pointer to an interned string. Use operator bool to
+  /// test whether the pointer is valid, and operator * to get the string if so.
+  /// This is a lightweight value class with storage requirements equivalent to
+  /// a single pointer, but it does have reference-counting overhead when
+  /// copied.
+  class PooledStringPtr {
+    typedef StringPool::entry_t entry_t;
+    entry_t *S;
+
+  public:
+    PooledStringPtr() : S(0) {}
+
+    explicit PooledStringPtr(entry_t *E) : S(E) {
+      if (S) ++S->getValue().Refcount;
+    }
+
+    PooledStringPtr(const PooledStringPtr &That) : S(That.S) {
+      if (S) ++S->getValue().Refcount;
+    }
+
+    PooledStringPtr &operator=(const PooledStringPtr &That) {
+      if (S != That.S) {
+        clear();
+        S = That.S;
+        if (S) ++S->getValue().Refcount;
+      }
+      return *this;
+    }
+
+    void clear() {
+      if (!S)
+        return;
+      if (--S->getValue().Refcount == 0) {
+        S->getValue().Pool->InternTable.remove(S);
+        S->Destroy();
+      }
+      S = 0;
+    }
+
+    ~PooledStringPtr() { clear(); }
+
+    inline const char *begin() const {
+      assert(*this && "Attempt to dereference empty PooledStringPtr!");
+      return S->getKeyData();
+    }
+
+    inline const char *end() const {
+      assert(*this && "Attempt to dereference empty PooledStringPtr!");
+      return S->getKeyData() + S->getKeyLength();
+    }
+
+    inline unsigned size() const {
+      assert(*this && "Attempt to dereference empty PooledStringPtr!");
+      return S->getKeyLength();
+    }
+
+    inline const char *operator*() const { return begin(); }
+    inline operator bool() const { return S != 0; }
+
+    inline bool operator==(const PooledStringPtr &That) { return S == That.S; }
+    inline bool operator!=(const PooledStringPtr &That) { return S != That.S; }
+  };
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/SwapByteOrder.h b/final/include/llvm/Support/SwapByteOrder.h
new file mode 100644
index 00000000000..6c0592c05ad
--- /dev/null
+++ b/final/include/llvm/Support/SwapByteOrder.h
@@ -0,0 +1,101 @@
+//===- SwapByteOrder.h - Generic and optimized byte swaps -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares generic and optimized functions to swap the byte order of
+// an integral type.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_SWAP_BYTE_ORDER_H
+#define LLVM_SYSTEM_SWAP_BYTE_ORDER_H
+
+#include "llvm/Support/DataTypes.h"
+#include <cstddef>
+#include <limits>
+
+namespace llvm {
+namespace sys {
+
+/// SwapByteOrder_16 - This function returns a byte-swapped representation of
+/// the 16-bit argument.
+inline uint16_t SwapByteOrder_16(uint16_t value) {
+#if defined(_MSC_VER) && !defined(_DEBUG)
+  // The DLL version of the runtime lacks these functions (bug!?), but in a
+  // release build they're replaced with BSWAP instructions anyway.
+  return _byteswap_ushort(value);
+#else
+  uint16_t Hi = value << 8;
+  uint16_t Lo = value >> 8;
+  return Hi | Lo;
+#endif
+}
+
+/// SwapByteOrder_32 - This function returns a byte-swapped representation of
+/// the 32-bit argument.
+inline uint32_t SwapByteOrder_32(uint32_t value) {
+#if defined(__llvm__) || \
+(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) && !defined(__ICC)
+  return __builtin_bswap32(value);
+#elif defined(_MSC_VER) && !defined(_DEBUG)
+  return _byteswap_ulong(value);
+#else
+  uint32_t Byte0 = value & 0x000000FF;
+  uint32_t Byte1 = value & 0x0000FF00;
+  uint32_t Byte2 = value & 0x00FF0000;
+  uint32_t Byte3 = value & 0xFF000000;
+  return (Byte0 << 24) | (Byte1 << 8) | (Byte2 >> 8) | (Byte3 >> 24);
+#endif
+}
+
+/// SwapByteOrder_64 - This function returns a byte-swapped representation of
+/// the 64-bit argument.
+inline uint64_t SwapByteOrder_64(uint64_t value) {
+#if defined(__llvm__) || \
+(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) && !defined(__ICC)
+  return __builtin_bswap64(value);
+#elif defined(_MSC_VER) && !defined(_DEBUG)
+  return _byteswap_uint64(value);
+#else
+  uint64_t Hi = SwapByteOrder_32(uint32_t(value));
+  uint32_t Lo = SwapByteOrder_32(uint32_t(value >> 32));
+  return (Hi << 32) | Lo;
+#endif
+}
+
+inline unsigned char  SwapByteOrder(unsigned char C) { return C; }
+inline   signed char  SwapByteOrder(signed char C) { return C; }
+inline          char  SwapByteOrder(char C) { return C; }
+
+inline unsigned short SwapByteOrder(unsigned short C) { return SwapByteOrder_16(C); }
+inline   signed short SwapByteOrder(  signed short C) { return SwapByteOrder_16(C); }
+
+inline unsigned int   SwapByteOrder(unsigned int   C) { return SwapByteOrder_32(C); }
+inline   signed int   SwapByteOrder(  signed int   C) { return SwapByteOrder_32(C); }
+
+#if __LONG_MAX__ == __INT_MAX__
+inline unsigned long  SwapByteOrder(unsigned long  C) { return SwapByteOrder_32(C); }
+inline   signed long  SwapByteOrder(  signed long  C) { return SwapByteOrder_32(C); }
+#elif __LONG_MAX__ == __LONG_LONG_MAX__
+inline unsigned long  SwapByteOrder(unsigned long  C) { return SwapByteOrder_64(C); }
+inline   signed long  SwapByteOrder(  signed long  C) { return SwapByteOrder_64(C); }
+#else
+#error "Unknown long size!"
+#endif
+
+inline unsigned long long SwapByteOrder(unsigned long long C) {
+  return SwapByteOrder_64(C);
+}
+inline signed long long SwapByteOrder(signed long long C) {
+  return SwapByteOrder_64(C);
+}
+
+} // end namespace sys
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Support/SystemUtils.h b/final/include/llvm/Support/SystemUtils.h
new file mode 100644
index 00000000000..399aee51eb7
--- /dev/null
+++ b/final/include/llvm/Support/SystemUtils.h
@@ -0,0 +1,44 @@
+//===- SystemUtils.h - Utilities to do low-level system stuff ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions used to do a variety of low-level, often
+// system-specific, tasks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_SYSTEMUTILS_H
+#define LLVM_SUPPORT_SYSTEMUTILS_H
+
+#include <string>
+
+namespace llvm {
+  class raw_ostream;
+  namespace sys { class Path; }
+
+/// Determine if the raw_ostream provided is connected to a terminal. If so,
+/// generate a warning message to errs() advising against display of bitcode
+/// and return true. Otherwise just return false.
+/// @brief Check for output written to a console
+bool CheckBitcodeOutputToConsole(
+  raw_ostream &stream_to_check, ///< The stream to be checked
+  bool print_warning = true     ///< Control whether warnings are printed
+);
+
+/// PrependMainExecutablePath - Prepend the path to the program being executed
+/// to \p ExeName, given the value of argv[0] and the address of main()
+/// itself. This allows us to find another LLVM tool if it is built in the same
+/// directory. An empty string is returned on error; note that this function
+/// just mainpulates the path and doesn't check for executability.
+/// @brief Find a named executable.
+sys::Path PrependMainExecutablePath(const std::string &ExeName,
+                                    const char *Argv0, void *MainAddr);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/TargetFolder.h b/final/include/llvm/Support/TargetFolder.h
new file mode 100644
index 00000000000..20ca5571ffa
--- /dev/null
+++ b/final/include/llvm/Support/TargetFolder.h
@@ -0,0 +1,242 @@
+//====-- llvm/Support/TargetFolder.h - Constant folding helper -*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TargetFolder class, a helper for IRBuilder.
+// It provides IRBuilder with a set of methods for creating constants with
+// target dependent folding, in addition to the same target-independent
+// folding that the ConstantFolder class provides.  For general constant
+// creation and folding, use ConstantExpr and the routines in
+// llvm/Analysis/ConstantFolding.h.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_TARGETFOLDER_H
+#define LLVM_SUPPORT_TARGETFOLDER_H
+
+#include "llvm/Constants.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Analysis/ConstantFolding.h"
+
+namespace llvm {
+
+class TargetData;
+
+/// TargetFolder - Create constants with target dependent folding.
+class TargetFolder {
+  const TargetData *TD;
+
+  /// Fold - Fold the constant using target specific information.
+  Constant *Fold(Constant *C) const {
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+      if (Constant *CF = ConstantFoldConstantExpression(CE, TD))
+        return CF;
+    return C;
+  }
+
+public:
+  explicit TargetFolder(const TargetData *TheTD) : TD(TheTD) {}
+
+  //===--------------------------------------------------------------------===//
+  // Binary Operators
+  //===--------------------------------------------------------------------===//
+
+  Constant *CreateAdd(Constant *LHS, Constant *RHS,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return Fold(ConstantExpr::getAdd(LHS, RHS, HasNUW, HasNSW));
+  }
+  Constant *CreateFAdd(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getFAdd(LHS, RHS));
+  }
+  Constant *CreateSub(Constant *LHS, Constant *RHS,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return Fold(ConstantExpr::getSub(LHS, RHS, HasNUW, HasNSW));
+  }
+  Constant *CreateFSub(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getFSub(LHS, RHS));
+  }
+  Constant *CreateMul(Constant *LHS, Constant *RHS,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return Fold(ConstantExpr::getMul(LHS, RHS, HasNUW, HasNSW));
+  }
+  Constant *CreateFMul(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getFMul(LHS, RHS));
+  }
+  Constant *CreateUDiv(Constant *LHS, Constant *RHS, bool isExact = false)const{
+    return Fold(ConstantExpr::getUDiv(LHS, RHS, isExact));
+  }
+  Constant *CreateSDiv(Constant *LHS, Constant *RHS, bool isExact = false)const{
+    return Fold(ConstantExpr::getSDiv(LHS, RHS, isExact));
+  }
+  Constant *CreateFDiv(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getFDiv(LHS, RHS));
+  }
+  Constant *CreateURem(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getURem(LHS, RHS));
+  }
+  Constant *CreateSRem(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getSRem(LHS, RHS));
+  }
+  Constant *CreateFRem(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getFRem(LHS, RHS));
+  }
+  Constant *CreateShl(Constant *LHS, Constant *RHS,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return Fold(ConstantExpr::getShl(LHS, RHS, HasNUW, HasNSW));
+  }
+  Constant *CreateLShr(Constant *LHS, Constant *RHS, bool isExact = false)const{
+    return Fold(ConstantExpr::getLShr(LHS, RHS, isExact));
+  }
+  Constant *CreateAShr(Constant *LHS, Constant *RHS, bool isExact = false)const{
+    return Fold(ConstantExpr::getAShr(LHS, RHS, isExact));
+  }
+  Constant *CreateAnd(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getAnd(LHS, RHS));
+  }
+  Constant *CreateOr(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getOr(LHS, RHS));
+  }
+  Constant *CreateXor(Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::getXor(LHS, RHS));
+  }
+
+  Constant *CreateBinOp(Instruction::BinaryOps Opc,
+                        Constant *LHS, Constant *RHS) const {
+    return Fold(ConstantExpr::get(Opc, LHS, RHS));
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Unary Operators
+  //===--------------------------------------------------------------------===//
+
+  Constant *CreateNeg(Constant *C,
+                      bool HasNUW = false, bool HasNSW = false) const {
+    return Fold(ConstantExpr::getNeg(C, HasNUW, HasNSW));
+  }
+  Constant *CreateFNeg(Constant *C) const {
+    return Fold(ConstantExpr::getFNeg(C));
+  }
+  Constant *CreateNot(Constant *C) const {
+    return Fold(ConstantExpr::getNot(C));
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Memory Instructions
+  //===--------------------------------------------------------------------===//
+
+  Constant *CreateGetElementPtr(Constant *C, Constant* const *IdxList,
+                                unsigned NumIdx) const {
+    return Fold(ConstantExpr::getGetElementPtr(C, IdxList, NumIdx));
+  }
+  Constant *CreateGetElementPtr(Constant *C, Value* const *IdxList,
+                                unsigned NumIdx) const {
+    return Fold(ConstantExpr::getGetElementPtr(C, IdxList, NumIdx));
+  }
+
+  Constant *CreateInBoundsGetElementPtr(Constant *C, Constant* const *IdxList,
+                                        unsigned NumIdx) const {
+    return Fold(ConstantExpr::getInBoundsGetElementPtr(C, IdxList, NumIdx));
+  }
+  Constant *CreateInBoundsGetElementPtr(Constant *C, Value* const *IdxList,
+                                        unsigned NumIdx) const {
+    return Fold(ConstantExpr::getInBoundsGetElementPtr(C, IdxList, NumIdx));
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Cast/Conversion Operators
+  //===--------------------------------------------------------------------===//
+
+  Constant *CreateCast(Instruction::CastOps Op, Constant *C,
+                       const Type *DestTy) const {
+    if (C->getType() == DestTy)
+      return C; // avoid calling Fold
+    return Fold(ConstantExpr::getCast(Op, C, DestTy));
+  }
+  Constant *CreateIntCast(Constant *C, const Type *DestTy,
+                          bool isSigned) const {
+    if (C->getType() == DestTy)
+      return C; // avoid calling Fold
+    return Fold(ConstantExpr::getIntegerCast(C, DestTy, isSigned));
+  }
+  Constant *CreatePointerCast(Constant *C, const Type *DestTy) const {
+    return ConstantExpr::getPointerCast(C, DestTy);
+  }
+  Constant *CreateBitCast(Constant *C, const Type *DestTy) const {
+    return CreateCast(Instruction::BitCast, C, DestTy);
+  }
+  Constant *CreateIntToPtr(Constant *C, const Type *DestTy) const {
+    return CreateCast(Instruction::IntToPtr, C, DestTy);
+  }
+  Constant *CreatePtrToInt(Constant *C, const Type *DestTy) const {
+    return CreateCast(Instruction::PtrToInt, C, DestTy);
+  }
+  Constant *CreateZExtOrBitCast(Constant *C, const Type *DestTy) const {
+    if (C->getType() == DestTy)
+      return C; // avoid calling Fold
+    return Fold(ConstantExpr::getZExtOrBitCast(C, DestTy));
+  }
+  Constant *CreateSExtOrBitCast(Constant *C, const Type *DestTy) const {
+    if (C->getType() == DestTy)
+      return C; // avoid calling Fold
+    return Fold(ConstantExpr::getSExtOrBitCast(C, DestTy));
+  }
+  Constant *CreateTruncOrBitCast(Constant *C, const Type *DestTy) const {
+    if (C->getType() == DestTy)
+      return C; // avoid calling Fold
+    return Fold(ConstantExpr::getTruncOrBitCast(C, DestTy));
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Compare Instructions
+  //===--------------------------------------------------------------------===//
+
+  Constant *CreateICmp(CmpInst::Predicate P, Constant *LHS,
+                       Constant *RHS) const {
+    return Fold(ConstantExpr::getCompare(P, LHS, RHS));
+  }
+  Constant *CreateFCmp(CmpInst::Predicate P, Constant *LHS,
+                       Constant *RHS) const {
+    return Fold(ConstantExpr::getCompare(P, LHS, RHS));
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Other Instructions
+  //===--------------------------------------------------------------------===//
+
+  Constant *CreateSelect(Constant *C, Constant *True, Constant *False) const {
+    return Fold(ConstantExpr::getSelect(C, True, False));
+  }
+
+  Constant *CreateExtractElement(Constant *Vec, Constant *Idx) const {
+    return Fold(ConstantExpr::getExtractElement(Vec, Idx));
+  }
+
+  Constant *CreateInsertElement(Constant *Vec, Constant *NewElt,
+                                Constant *Idx) const {
+    return Fold(ConstantExpr::getInsertElement(Vec, NewElt, Idx));
+  }
+
+  Constant *CreateShuffleVector(Constant *V1, Constant *V2,
+                                Constant *Mask) const {
+    return Fold(ConstantExpr::getShuffleVector(V1, V2, Mask));
+  }
+
+  Constant *CreateExtractValue(Constant *Agg, const unsigned *IdxList,
+                               unsigned NumIdx) const {
+    return Fold(ConstantExpr::getExtractValue(Agg, IdxList, NumIdx));
+  }
+
+  Constant *CreateInsertValue(Constant *Agg, Constant *Val,
+                              const unsigned *IdxList, unsigned NumIdx) const {
+    return Fold(ConstantExpr::getInsertValue(Agg, Val, IdxList, NumIdx));
+  }
+};
+
+}
+
+#endif
diff --git a/final/include/llvm/Support/ThreadLocal.h b/final/include/llvm/Support/ThreadLocal.h
new file mode 100644
index 00000000000..15350a7afff
--- /dev/null
+++ b/final/include/llvm/Support/ThreadLocal.h
@@ -0,0 +1,54 @@
+//===- llvm/Support/ThreadLocal.h - Thread Local Data ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_THREAD_LOCAL_H
+#define LLVM_SYSTEM_THREAD_LOCAL_H
+
+#include "llvm/Support/Threading.h"
+#include <cassert>
+
+namespace llvm {
+  namespace sys {
+    // ThreadLocalImpl - Common base class of all ThreadLocal instantiations.
+    // YOU SHOULD NEVER USE THIS DIRECTLY.
+    class ThreadLocalImpl {
+      void* data;
+    public:
+      ThreadLocalImpl();
+      virtual ~ThreadLocalImpl();
+      void setInstance(const void* d);
+      const void* getInstance();
+      void removeInstance();
+    };
+
+    /// ThreadLocal - A class used to abstract thread-local storage.  It holds,
+    /// for each thread, a pointer a single object of type T.
+    template<class T>
+    class ThreadLocal : public ThreadLocalImpl {
+    public:
+      ThreadLocal() : ThreadLocalImpl() { }
+
+      /// get - Fetches a pointer to the object associated with the current
+      /// thread.  If no object has yet been associated, it returns NULL;
+      T* get() { return static_cast<T*>(getInstance()); }
+
+      // set - Associates a pointer to an object with the current thread.
+      void set(T* d) { setInstance(d); }
+
+      // erase - Removes the pointer associated with the current thread.
+      void erase() { removeInstance(); }
+    };
+  }
+}
+
+#endif
diff --git a/final/include/llvm/Support/Threading.h b/final/include/llvm/Support/Threading.h
new file mode 100644
index 00000000000..c0e842c2fe7
--- /dev/null
+++ b/final/include/llvm/Support/Threading.h
@@ -0,0 +1,59 @@
+//===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TThis file defines llvm_start_multithreaded() and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_THREADING_H
+#define LLVM_SYSTEM_THREADING_H
+
+namespace llvm {
+  /// llvm_start_multithreaded - Allocate and initialize structures needed to
+  /// make LLVM safe for multithreading.  The return value indicates whether
+  /// multithreaded initialization succeeded.  LLVM will still be operational
+  /// on "failed" return, and will still be safe for hosting threading
+  /// applications in the JIT, but will not be safe for concurrent calls to the
+  /// LLVM APIs.
+  /// THIS MUST EXECUTE IN ISOLATION FROM ALL OTHER LLVM API CALLS.
+  bool llvm_start_multithreaded();
+
+  /// llvm_stop_multithreaded - Deallocate structures necessary to make LLVM
+  /// safe for multithreading.
+  /// THIS MUST EXECUTE IN ISOLATION FROM ALL OTHER LLVM API CALLS.
+  void llvm_stop_multithreaded();
+
+  /// llvm_is_multithreaded - Check whether LLVM is executing in thread-safe
+  /// mode or not.
+  bool llvm_is_multithreaded();
+
+  /// acquire_global_lock - Acquire the global lock.  This is a no-op if called
+  /// before llvm_start_multithreaded().
+  void llvm_acquire_global_lock();
+
+  /// release_global_lock - Release the global lock.  This is a no-op if called
+  /// before llvm_start_multithreaded().
+  void llvm_release_global_lock();
+
+  /// llvm_execute_on_thread - Execute the given \arg UserFn on a separate
+  /// thread, passing it the provided \arg UserData.
+  ///
+  /// This function does not guarantee that the code will actually be executed
+  /// on a separate thread or honoring the requested stack size, but tries to do
+  /// so where system support is available.
+  ///
+  /// \param UserFn - The callback to execute.
+  /// \param UserData - An argument to pass to the callback function.
+  /// \param RequestedStackSize - If non-zero, a requested size (in bytes) for
+  /// the thread stack.
+  void llvm_execute_on_thread(void (*UserFn)(void*), void *UserData,
+                              unsigned RequestedStackSize = 0);
+}
+
+#endif
diff --git a/final/include/llvm/Support/TimeValue.h b/final/include/llvm/Support/TimeValue.h
new file mode 100644
index 00000000000..e1227118c22
--- /dev/null
+++ b/final/include/llvm/Support/TimeValue.h
@@ -0,0 +1,382 @@
+//===-- TimeValue.h - Declare OS TimeValue Concept --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file declares the operating system TimeValue concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+#ifndef LLVM_SYSTEM_TIMEVALUE_H
+#define LLVM_SYSTEM_TIMEVALUE_H
+
+namespace llvm {
+namespace sys {
+  /// This class is used where a precise fixed point in time is required. The
+  /// range of TimeValue spans many hundreds of billions of years both past and
+  /// present.  The precision of TimeValue is to the nanosecond. However, the
+  /// actual precision of its values will be determined by the resolution of
+  /// the system clock. The TimeValue class is used in conjunction with several
+  /// other lib/System interfaces to specify the time at which a call should
+  /// timeout, etc.
+  /// @since 1.4
+  /// @brief Provides an abstraction for a fixed point in time.
+  class TimeValue {
+
+  /// @name Constants
+  /// @{
+  public:
+
+    /// A constant TimeValue representing the smallest time
+    /// value permissable by the class. MinTime is some point
+    /// in the distant past, about 300 billion years BCE.
+    /// @brief The smallest possible time value.
+    static const TimeValue MinTime;
+
+    /// A constant TimeValue representing the largest time
+    /// value permissable by the class. MaxTime is some point
+    /// in the distant future, about 300 billion years AD.
+    /// @brief The largest possible time value.
+    static const TimeValue MaxTime;
+
+    /// A constant TimeValue representing the base time,
+    /// or zero time of 00:00:00 (midnight) January 1st, 2000.
+    /// @brief 00:00:00 Jan 1, 2000 UTC.
+    static const TimeValue ZeroTime;
+
+    /// A constant TimeValue for the Posix base time which is
+    /// 00:00:00 (midnight) January 1st, 1970.
+    /// @brief 00:00:00 Jan 1, 1970 UTC.
+    static const TimeValue PosixZeroTime;
+
+    /// A constant TimeValue for the Win32 base time which is
+    /// 00:00:00 (midnight) January 1st, 1601.
+    /// @brief 00:00:00 Jan 1, 1601 UTC.
+    static const TimeValue Win32ZeroTime;
+
+  /// @}
+  /// @name Types
+  /// @{
+  public:
+    typedef int64_t SecondsType;    ///< Type used for representing seconds.
+    typedef int32_t NanoSecondsType;///< Type used for representing nanoseconds.
+
+    enum TimeConversions {
+      NANOSECONDS_PER_SECOND = 1000000000,  ///< One Billion
+      MICROSECONDS_PER_SECOND = 1000000,    ///< One Million
+      MILLISECONDS_PER_SECOND = 1000,       ///< One Thousand
+      NANOSECONDS_PER_MICROSECOND = 1000,   ///< One Thousand
+      NANOSECONDS_PER_MILLISECOND = 1000000,///< One Million
+      NANOSECONDS_PER_POSIX_TICK = 100,     ///< Posix tick is 100 Hz (10ms)
+      NANOSECONDS_PER_WIN32_TICK = 100      ///< Win32 tick is 100 Hz (10ms)
+    };
+
+  /// @}
+  /// @name Constructors
+  /// @{
+  public:
+    /// Caller provides the exact value in seconds and nanoseconds. The
+    /// \p nanos argument defaults to zero for convenience.
+    /// @brief Explicit constructor
+    explicit TimeValue (SecondsType seconds, NanoSecondsType nanos = 0)
+      : seconds_( seconds ), nanos_( nanos ) { this->normalize(); }
+
+    /// Caller provides the exact value as a double in seconds with the
+    /// fractional part representing nanoseconds.
+    /// @brief Double Constructor.
+    explicit TimeValue( double new_time )
+      : seconds_( 0 ) , nanos_ ( 0 ) {
+      SecondsType integer_part = static_cast<SecondsType>( new_time );
+      seconds_ = integer_part;
+      nanos_ = static_cast<NanoSecondsType>( (new_time -
+               static_cast<double>(integer_part)) * NANOSECONDS_PER_SECOND );
+      this->normalize();
+    }
+
+    /// This is a static constructor that returns a TimeValue that represents
+    /// the current time.
+    /// @brief Creates a TimeValue with the current time (UTC).
+    static TimeValue now();
+
+  /// @}
+  /// @name Operators
+  /// @{
+  public:
+    /// Add \p that to \p this.
+    /// @returns this
+    /// @brief Incrementing assignment operator.
+    TimeValue& operator += (const TimeValue& that ) {
+      this->seconds_ += that.seconds_  ;
+      this->nanos_ += that.nanos_ ;
+      this->normalize();
+      return *this;
+    }
+
+    /// Subtract \p that from \p this.
+    /// @returns this
+    /// @brief Decrementing assignment operator.
+    TimeValue& operator -= (const TimeValue &that ) {
+      this->seconds_ -= that.seconds_ ;
+      this->nanos_ -= that.nanos_ ;
+      this->normalize();
+      return *this;
+    }
+
+    /// Determine if \p this is less than \p that.
+    /// @returns True iff *this < that.
+    /// @brief True if this < that.
+    int operator < (const TimeValue &that) const { return that > *this; }
+
+    /// Determine if \p this is greather than \p that.
+    /// @returns True iff *this > that.
+    /// @brief True if this > that.
+    int operator > (const TimeValue &that) const {
+      if ( this->seconds_ > that.seconds_ ) {
+          return 1;
+      } else if ( this->seconds_ == that.seconds_ ) {
+          if ( this->nanos_ > that.nanos_ ) return 1;
+      }
+      return 0;
+    }
+
+    /// Determine if \p this is less than or equal to \p that.
+    /// @returns True iff *this <= that.
+    /// @brief True if this <= that.
+    int operator <= (const TimeValue &that) const { return that >= *this; }
+
+    /// Determine if \p this is greater than or equal to \p that.
+    /// @returns True iff *this >= that.
+    /// @brief True if this >= that.
+    int operator >= (const TimeValue &that) const {
+      if ( this->seconds_ > that.seconds_ ) {
+          return 1;
+      } else if ( this->seconds_ == that.seconds_ ) {
+          if ( this->nanos_ >= that.nanos_ ) return 1;
+      }
+      return 0;
+    }
+
+    /// Determines if two TimeValue objects represent the same moment in time.
+    /// @brief True iff *this == that.
+    /// @brief True if this == that.
+    int operator == (const TimeValue &that) const {
+      return (this->seconds_ == that.seconds_) &&
+             (this->nanos_ == that.nanos_);
+    }
+
+    /// Determines if two TimeValue objects represent times that are not the
+    /// same.
+    /// @return True iff *this != that.
+    /// @brief True if this != that.
+    int operator != (const TimeValue &that) const { return !(*this == that); }
+
+    /// Adds two TimeValue objects together.
+    /// @returns The sum of the two operands as a new TimeValue
+    /// @brief Addition operator.
+    friend TimeValue operator + (const TimeValue &tv1, const TimeValue &tv2);
+
+    /// Subtracts two TimeValue objects.
+    /// @returns The difference of the two operands as a new TimeValue
+    /// @brief Subtraction operator.
+    friend TimeValue operator - (const TimeValue &tv1, const TimeValue &tv2);
+
+  /// @}
+  /// @name Accessors
+  /// @{
+  public:
+
+    /// Returns only the seconds component of the TimeValue. The nanoseconds
+    /// portion is ignored. No rounding is performed.
+    /// @brief Retrieve the seconds component
+    SecondsType seconds() const { return seconds_; }
+
+    /// Returns only the nanoseconds component of the TimeValue. The seconds
+    /// portion is ignored.
+    /// @brief Retrieve the nanoseconds component.
+    NanoSecondsType nanoseconds() const { return nanos_; }
+
+    /// Returns only the fractional portion of the TimeValue rounded down to the
+    /// nearest microsecond (divide by one thousand).
+    /// @brief Retrieve the fractional part as microseconds;
+    uint32_t microseconds() const {
+      return nanos_ / NANOSECONDS_PER_MICROSECOND;
+    }
+
+    /// Returns only the fractional portion of the TimeValue rounded down to the
+    /// nearest millisecond (divide by one million).
+    /// @brief Retrieve the fractional part as milliseconds;
+    uint32_t milliseconds() const {
+      return nanos_ / NANOSECONDS_PER_MILLISECOND;
+    }
+
+    /// Returns the TimeValue as a number of microseconds. Note that the value
+    /// returned can overflow because the range of a uint64_t is smaller than
+    /// the range of a TimeValue. Nevertheless, this is useful on some operating
+    /// systems and is therefore provided.
+    /// @brief Convert to a number of microseconds (can overflow)
+    uint64_t usec() const {
+      return seconds_ * MICROSECONDS_PER_SECOND +
+             ( nanos_ / NANOSECONDS_PER_MICROSECOND );
+    }
+
+    /// Returns the TimeValue as a number of milliseconds. Note that the value
+    /// returned can overflow because the range of a uint64_t is smaller than
+    /// the range of a TimeValue. Nevertheless, this is useful on some operating
+    /// systems and is therefore provided.
+    /// @brief Convert to a number of milliseconds (can overflow)
+    uint64_t msec() const {
+      return seconds_ * MILLISECONDS_PER_SECOND +
+             ( nanos_ / NANOSECONDS_PER_MILLISECOND );
+    }
+
+    /// Converts the TimeValue into the corresponding number of "ticks" for
+    /// Posix, correcting for the difference in Posix zero time.
+    /// @brief Convert to unix time (100 nanoseconds since 12:00:00a Jan 1,1970)
+    uint64_t toPosixTime() const {
+      uint64_t result = seconds_ - PosixZeroTime.seconds_;
+      result += nanos_ / NANOSECONDS_PER_POSIX_TICK;
+      return result;
+    }
+
+    /// Converts the TimeValue into the corresponding number of seconds
+    /// since the epoch (00:00:00 Jan 1,1970).
+    uint64_t toEpochTime() const {
+      return seconds_ - PosixZeroTime.seconds_;
+    }
+
+    /// Converts the TimeValue into the corresponding number of "ticks" for
+    /// Win32 platforms, correcting for the difference in Win32 zero time.
+    /// @brief Convert to windows time (seconds since 12:00:00a Jan 1, 1601)
+    uint64_t toWin32Time() const {
+      uint64_t result = seconds_ - Win32ZeroTime.seconds_;
+      result += nanos_ / NANOSECONDS_PER_WIN32_TICK;
+      return result;
+    }
+
+    /// Provides the seconds and nanoseconds as results in its arguments after
+    /// correction for the Posix zero time.
+    /// @brief Convert to timespec time (ala POSIX.1b)
+    void getTimespecTime( uint64_t& seconds, uint32_t& nanos ) const {
+      seconds = seconds_ - PosixZeroTime.seconds_;
+      nanos = nanos_;
+    }
+
+    /// Provides conversion of the TimeValue into a readable time & date.
+    /// @returns std::string containing the readable time value
+    /// @brief Convert time to a string.
+    std::string str() const;
+
+  /// @}
+  /// @name Mutators
+  /// @{
+  public:
+    /// The seconds component of the TimeValue is set to \p sec without
+    /// modifying the nanoseconds part.  This is useful for whole second
+    /// arithmetic.
+    /// @brief Set the seconds component.
+    void seconds (SecondsType sec ) {
+      this->seconds_ = sec;
+      this->normalize();
+    }
+
+    /// The nanoseconds component of the TimeValue is set to \p nanos without
+    /// modifying the seconds part. This is useful for basic computations
+    /// involving just the nanoseconds portion. Note that the TimeValue will be
+    /// normalized after this call so that the fractional (nanoseconds) portion
+    /// will have the smallest equivalent value.
+    /// @brief Set the nanoseconds component using a number of nanoseconds.
+    void nanoseconds ( NanoSecondsType nanos ) {
+      this->nanos_ = nanos;
+      this->normalize();
+    }
+
+    /// The seconds component remains unchanged.
+    /// @brief Set the nanoseconds component using a number of microseconds.
+    void microseconds ( int32_t micros ) {
+      this->nanos_ = micros * NANOSECONDS_PER_MICROSECOND;
+      this->normalize();
+    }
+
+    /// The seconds component remains unchanged.
+    /// @brief Set the nanoseconds component using a number of milliseconds.
+    void milliseconds ( int32_t millis ) {
+      this->nanos_ = millis * NANOSECONDS_PER_MILLISECOND;
+      this->normalize();
+    }
+
+    /// @brief Converts from microsecond format to TimeValue format
+    void usec( int64_t microseconds ) {
+      this->seconds_ = microseconds / MICROSECONDS_PER_SECOND;
+      this->nanos_ = NanoSecondsType(microseconds % MICROSECONDS_PER_SECOND) *
+        NANOSECONDS_PER_MICROSECOND;
+      this->normalize();
+    }
+
+    /// @brief Converts from millisecond format to TimeValue format
+    void msec( int64_t milliseconds ) {
+      this->seconds_ = milliseconds / MILLISECONDS_PER_SECOND;
+      this->nanos_ = NanoSecondsType(milliseconds % MILLISECONDS_PER_SECOND) *
+        NANOSECONDS_PER_MILLISECOND;
+      this->normalize();
+    }
+
+    /// Converts the \p seconds argument from PosixTime to the corresponding
+    /// TimeValue and assigns that value to \p this.
+    /// @brief Convert seconds form PosixTime to TimeValue
+    void fromEpochTime( SecondsType seconds ) {
+      seconds_ = seconds + PosixZeroTime.seconds_;
+      nanos_ = 0;
+      this->normalize();
+    }
+
+    /// Converts the \p win32Time argument from Windows FILETIME to the
+    /// corresponding TimeValue and assigns that value to \p this.
+    /// @brief Convert seconds form Windows FILETIME to TimeValue
+    void fromWin32Time( uint64_t win32Time ) {
+      this->seconds_ = win32Time / 10000000 + Win32ZeroTime.seconds_;
+      this->nanos_ = NanoSecondsType(win32Time  % 10000000) * 100;
+    }
+
+  /// @}
+  /// @name Implementation
+  /// @{
+  private:
+    /// This causes the values to be represented so that the fractional
+    /// part is minimized, possibly incrementing the seconds part.
+    /// @brief Normalize to canonical form.
+    void normalize();
+
+  /// @}
+  /// @name Data
+  /// @{
+  private:
+    /// Store the values as a <timeval>.
+    SecondsType      seconds_;///< Stores the seconds part of the TimeVal
+    NanoSecondsType  nanos_;  ///< Stores the nanoseconds part of the TimeVal
+  /// @}
+
+  };
+
+inline TimeValue operator + (const TimeValue &tv1, const TimeValue &tv2) {
+  TimeValue sum (tv1.seconds_ + tv2.seconds_, tv1.nanos_ + tv2.nanos_);
+  sum.normalize ();
+  return sum;
+}
+
+inline TimeValue operator - (const TimeValue &tv1, const TimeValue &tv2) {
+  TimeValue difference (tv1.seconds_ - tv2.seconds_, tv1.nanos_ - tv2.nanos_ );
+  difference.normalize ();
+  return difference;
+}
+
+}
+}
+
+#endif
diff --git a/final/include/llvm/Support/Timer.h b/final/include/llvm/Support/Timer.h
new file mode 100644
index 00000000000..404cb6d6c8b
--- /dev/null
+++ b/final/include/llvm/Support/Timer.h
@@ -0,0 +1,194 @@
+//===-- llvm/Support/Timer.h - Interval Timing Support ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines three classes: Timer, TimeRegion, and TimerGroup,
+// documented below.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_TIMER_H
+#define LLVM_SUPPORT_TIMER_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/StringRef.h"
+#include <cassert>
+#include <string>
+#include <vector>
+#include <utility>
+
+namespace llvm {
+
+class Timer;
+class TimerGroup;
+class raw_ostream;
+
+class TimeRecord {
+  double WallTime;       // Wall clock time elapsed in seconds
+  double UserTime;       // User time elapsed
+  double SystemTime;     // System time elapsed
+  ssize_t MemUsed;       // Memory allocated (in bytes)
+public:
+  TimeRecord() : WallTime(0), UserTime(0), SystemTime(0), MemUsed(0) {}
+  
+  /// getCurrentTime - Get the current time and memory usage.  If Start is true
+  /// we get the memory usage before the time, otherwise we get time before
+  /// memory usage.  This matters if the time to get the memory usage is
+  /// significant and shouldn't be counted as part of a duration.
+  static TimeRecord getCurrentTime(bool Start = true);
+  
+  double getProcessTime() const { return UserTime+SystemTime; }
+  double getUserTime() const { return UserTime; }
+  double getSystemTime() const { return SystemTime; }
+  double getWallTime() const { return WallTime; }
+  ssize_t getMemUsed() const { return MemUsed; }
+  
+  
+  // operator< - Allow sorting.
+  bool operator<(const TimeRecord &T) const {
+    // Sort by Wall Time elapsed, as it is the only thing really accurate
+    return WallTime < T.WallTime;
+  }
+  
+  void operator+=(const TimeRecord &RHS) {
+    WallTime   += RHS.WallTime;
+    UserTime   += RHS.UserTime;
+    SystemTime += RHS.SystemTime;
+    MemUsed    += RHS.MemUsed;
+  }
+  void operator-=(const TimeRecord &RHS) {
+    WallTime   -= RHS.WallTime;
+    UserTime   -= RHS.UserTime;
+    SystemTime -= RHS.SystemTime;
+    MemUsed    -= RHS.MemUsed;
+  }
+  
+  /// print - Print the current timer to standard error, and reset the "Started"
+  /// flag.
+  void print(const TimeRecord &Total, raw_ostream &OS) const;
+};
+  
+/// Timer - This class is used to track the amount of time spent between
+/// invocations of its startTimer()/stopTimer() methods.  Given appropriate OS
+/// support it can also keep track of the RSS of the program at various points.
+/// By default, the Timer will print the amount of time it has captured to
+/// standard error when the laster timer is destroyed, otherwise it is printed
+/// when its TimerGroup is destroyed.  Timers do not print their information
+/// if they are never started.
+///
+class Timer {
+  TimeRecord Time;
+  std::string Name;      // The name of this time variable.
+  bool Started;          // Has this time variable ever been started?
+  TimerGroup *TG;        // The TimerGroup this Timer is in.
+  
+  Timer **Prev, *Next;   // Doubly linked list of timers in the group.
+public:
+  explicit Timer(StringRef N) : TG(0) { init(N); }
+  Timer(StringRef N, TimerGroup &tg) : TG(0) { init(N, tg); }
+  Timer(const Timer &RHS) : TG(0) {
+    assert(RHS.TG == 0 && "Can only copy uninitialized timers");
+  }
+  const Timer &operator=(const Timer &T) {
+    assert(TG == 0 && T.TG == 0 && "Can only assign uninit timers");
+    return *this;
+  }
+  ~Timer();
+
+  // Create an uninitialized timer, client must use 'init'.
+  explicit Timer() : TG(0) {}
+  void init(StringRef N);
+  void init(StringRef N, TimerGroup &tg);
+  
+  const std::string &getName() const { return Name; }
+  bool isInitialized() const { return TG != 0; }
+  
+  /// startTimer - Start the timer running.  Time between calls to
+  /// startTimer/stopTimer is counted by the Timer class.  Note that these calls
+  /// must be correctly paired.
+  ///
+  void startTimer();
+
+  /// stopTimer - Stop the timer.
+  ///
+  void stopTimer();
+
+private:
+  friend class TimerGroup;
+};
+
+
+/// The TimeRegion class is used as a helper class to call the startTimer() and
+/// stopTimer() methods of the Timer class.  When the object is constructed, it
+/// starts the timer specified as it's argument.  When it is destroyed, it stops
+/// the relevant timer.  This makes it easy to time a region of code.
+///
+class TimeRegion {
+  Timer *T;
+  TimeRegion(const TimeRegion &); // DO NOT IMPLEMENT
+public:
+  explicit TimeRegion(Timer &t) : T(&t) {
+    T->startTimer();
+  }
+  explicit TimeRegion(Timer *t) : T(t) {
+    if (T) T->startTimer();
+  }
+  ~TimeRegion() {
+    if (T) T->stopTimer();
+  }
+};
+
+
+/// NamedRegionTimer - This class is basically a combination of TimeRegion and
+/// Timer.  It allows you to declare a new timer, AND specify the region to
+/// time, all in one statement.  All timers with the same name are merged.  This
+/// is primarily used for debugging and for hunting performance problems.
+///
+struct NamedRegionTimer : public TimeRegion {
+  explicit NamedRegionTimer(StringRef Name,
+                            bool Enabled = true);
+  explicit NamedRegionTimer(StringRef Name, StringRef GroupName,
+                            bool Enabled = true);
+};
+
+
+/// The TimerGroup class is used to group together related timers into a single
+/// report that is printed when the TimerGroup is destroyed.  It is illegal to
+/// destroy a TimerGroup object before all of the Timers in it are gone.  A
+/// TimerGroup can be specified for a newly created timer in its constructor.
+///
+class TimerGroup {
+  std::string Name;
+  Timer *FirstTimer;   // First timer in the group.
+  std::vector<std::pair<TimeRecord, std::string> > TimersToPrint;
+  
+  TimerGroup **Prev, *Next; // Doubly linked list of TimerGroup's.
+  TimerGroup(const TimerGroup &TG);      // DO NOT IMPLEMENT
+  void operator=(const TimerGroup &TG);  // DO NOT IMPLEMENT
+public:
+  explicit TimerGroup(StringRef name);
+  ~TimerGroup();
+
+  void setName(StringRef name) { Name.assign(name.begin(), name.end()); }
+
+  /// print - Print any started timers in this group and zero them.
+  void print(raw_ostream &OS);
+  
+  /// printAll - This static method prints all timers and clears them all out.
+  static void printAll(raw_ostream &OS);
+  
+private:
+  friend class Timer;
+  void addTimer(Timer &T);
+  void removeTimer(Timer &T);
+  void PrintQueuedTimers(raw_ostream &OS);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/ToolOutputFile.h b/final/include/llvm/Support/ToolOutputFile.h
new file mode 100644
index 00000000000..65b182a2453
--- /dev/null
+++ b/final/include/llvm/Support/ToolOutputFile.h
@@ -0,0 +1,62 @@
+//===- ToolOutputFile.h - Output files for compiler-like tools -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the tool_output_file class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_TOOL_OUTPUT_FILE_H
+#define LLVM_SUPPORT_TOOL_OUTPUT_FILE_H
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+/// tool_output_file - This class contains a raw_fd_ostream and adds a
+/// few extra features commonly needed for compiler-like tool output files:
+///   - The file is automatically deleted if the process is killed.
+///   - The file is automatically deleted when the tool_output_file
+///     object is destroyed unless the client calls keep().
+class tool_output_file {
+  /// Installer - This class is declared before the raw_fd_ostream so that
+  /// it is constructed before the raw_fd_ostream is constructed and
+  /// destructed after the raw_fd_ostream is destructed. It installs
+  /// cleanups in its constructor and uninstalls them in its destructor.
+  class CleanupInstaller {
+    /// Filename - The name of the file.
+    std::string Filename;
+  public:
+    /// Keep - The flag which indicates whether we should not delete the file.
+    bool Keep;
+
+    explicit CleanupInstaller(const char *filename);
+    ~CleanupInstaller();
+  } Installer;
+
+  /// OS - The contained stream. This is intentionally declared after
+  /// Installer.
+  raw_fd_ostream OS;
+
+public:
+  /// tool_output_file - This constructor's arguments are passed to
+  /// to raw_fd_ostream's constructor.
+  tool_output_file(const char *filename, std::string &ErrorInfo,
+                   unsigned Flags = 0);
+
+  /// os - Return the contained raw_fd_ostream.
+  raw_fd_ostream &os() { return OS; }
+
+  /// keep - Indicate that the tool's job wrt this output file has been
+  /// successful and the file should not be deleted.
+  void keep() { Installer.Keep = true; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/TypeBuilder.h b/final/include/llvm/Support/TypeBuilder.h
new file mode 100644
index 00000000000..ea63da00edc
--- /dev/null
+++ b/final/include/llvm/Support/TypeBuilder.h
@@ -0,0 +1,399 @@
+//===---- llvm/Support/TypeBuilder.h - Builder for LLVM types ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TypeBuilder class, which is used as a convenient way to
+// create LLVM types with a consistent and simplified interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_TYPEBUILDER_H
+#define LLVM_SUPPORT_TYPEBUILDER_H
+
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include <limits.h>
+
+namespace llvm {
+
+/// TypeBuilder - This provides a uniform API for looking up types
+/// known at compile time.  To support cross-compilation, we define a
+/// series of tag types in the llvm::types namespace, like i<N>,
+/// ieee_float, ppc_fp128, etc.  TypeBuilder<T, false> allows T to be
+/// any of these, a native C type (whose size may depend on the host
+/// compiler), or a pointer, function, or struct type built out of
+/// these.  TypeBuilder<T, true> removes native C types from this set
+/// to guarantee that its result is suitable for cross-compilation.
+/// We define the primitive types, pointer types, and functions up to
+/// 5 arguments here, but to use this class with your own types,
+/// you'll need to specialize it.  For example, say you want to call a
+/// function defined externally as:
+///
+///   struct MyType {
+///     int32 a;
+///     int32 *b;
+///     void *array[1];  // Intended as a flexible array.
+///   };
+///   int8 AFunction(struct MyType *value);
+///
+/// You'll want to use
+///   Function::Create(TypeBuilder<types::i<8>(MyType*), true>::get(), ...)
+/// to declare the function, but when you first try this, your compiler will
+/// complain that TypeBuilder<MyType, true>::get() doesn't exist. To fix this,
+/// write:
+///
+///   namespace llvm {
+///   template<bool xcompile> class TypeBuilder<MyType, xcompile> {
+///   public:
+///     static const StructType *get(LLVMContext &Context) {
+///       // If you cache this result, be sure to cache it separately
+///       // for each LLVMContext.
+///       return StructType::get(
+///         TypeBuilder<types::i<32>, xcompile>::get(Context),
+///         TypeBuilder<types::i<32>*, xcompile>::get(Context),
+///         TypeBuilder<types::i<8>*[], xcompile>::get(Context),
+///         NULL);
+///     }
+///
+///     // You may find this a convenient place to put some constants
+///     // to help with getelementptr.  They don't have any effect on
+///     // the operation of TypeBuilder.
+///     enum Fields {
+///       FIELD_A,
+///       FIELD_B,
+///       FIELD_ARRAY
+///     };
+///   }
+///   }  // namespace llvm
+///
+/// TypeBuilder cannot handle recursive types or types you only know at runtime.
+/// If you try to give it a recursive type, it will deadlock, infinitely
+/// recurse, or do something similarly undesirable.
+template<typename T, bool cross_compilable> class TypeBuilder {};
+
+// Types for use with cross-compilable TypeBuilders.  These correspond
+// exactly with an LLVM-native type.
+namespace types {
+/// i<N> corresponds to the LLVM IntegerType with N bits.
+template<uint32_t num_bits> class i {};
+
+// The following classes represent the LLVM floating types.
+class ieee_float {};
+class ieee_double {};
+class x86_fp80 {};
+class fp128 {};
+class ppc_fp128 {};
+// X86 MMX.
+class x86_mmx {};
+}  // namespace types
+
+// LLVM doesn't have const or volatile types.
+template<typename T, bool cross> class TypeBuilder<const T, cross>
+  : public TypeBuilder<T, cross> {};
+template<typename T, bool cross> class TypeBuilder<volatile T, cross>
+  : public TypeBuilder<T, cross> {};
+template<typename T, bool cross> class TypeBuilder<const volatile T, cross>
+  : public TypeBuilder<T, cross> {};
+
+// Pointers
+template<typename T, bool cross> class TypeBuilder<T*, cross> {
+public:
+  static const PointerType *get(LLVMContext &Context) {
+    return PointerType::getUnqual(TypeBuilder<T,cross>::get(Context));
+  }
+};
+
+/// There is no support for references
+template<typename T, bool cross> class TypeBuilder<T&, cross> {};
+
+// Arrays
+template<typename T, size_t N, bool cross> class TypeBuilder<T[N], cross> {
+public:
+  static const ArrayType *get(LLVMContext &Context) {
+    return ArrayType::get(TypeBuilder<T, cross>::get(Context), N);
+  }
+};
+/// LLVM uses an array of length 0 to represent an unknown-length array.
+template<typename T, bool cross> class TypeBuilder<T[], cross> {
+public:
+  static const ArrayType *get(LLVMContext &Context) {
+    return ArrayType::get(TypeBuilder<T, cross>::get(Context), 0);
+  }
+};
+
+// Define the C integral types only for TypeBuilder<T, false>.
+//
+// C integral types do not have a defined size. It would be nice to use the
+// stdint.h-defined typedefs that do have defined sizes, but we'd run into the
+// following problem:
+//
+// On an ILP32 machine, stdint.h might define:
+//
+//   typedef int int32_t;
+//   typedef long long int64_t;
+//   typedef long size_t;
+//
+// If we defined TypeBuilder<int32_t> and TypeBuilder<int64_t>, then any use of
+// TypeBuilder<size_t> would fail.  We couldn't define TypeBuilder<size_t> in
+// addition to the defined-size types because we'd get duplicate definitions on
+// platforms where stdint.h instead defines:
+//
+//   typedef int int32_t;
+//   typedef long long int64_t;
+//   typedef int size_t;
+//
+// So we define all the primitive C types and nothing else.
+#define DEFINE_INTEGRAL_TYPEBUILDER(T) \
+template<> class TypeBuilder<T, false> { \
+public: \
+  static const IntegerType *get(LLVMContext &Context) { \
+    return IntegerType::get(Context, sizeof(T) * CHAR_BIT); \
+  } \
+}; \
+template<> class TypeBuilder<T, true> { \
+  /* We provide a definition here so users don't accidentally */ \
+  /* define these types to work. */ \
+}
+DEFINE_INTEGRAL_TYPEBUILDER(char);
+DEFINE_INTEGRAL_TYPEBUILDER(signed char);
+DEFINE_INTEGRAL_TYPEBUILDER(unsigned char);
+DEFINE_INTEGRAL_TYPEBUILDER(short);
+DEFINE_INTEGRAL_TYPEBUILDER(unsigned short);
+DEFINE_INTEGRAL_TYPEBUILDER(int);
+DEFINE_INTEGRAL_TYPEBUILDER(unsigned int);
+DEFINE_INTEGRAL_TYPEBUILDER(long);
+DEFINE_INTEGRAL_TYPEBUILDER(unsigned long);
+#ifdef _MSC_VER
+DEFINE_INTEGRAL_TYPEBUILDER(__int64);
+DEFINE_INTEGRAL_TYPEBUILDER(unsigned __int64);
+#else /* _MSC_VER */
+DEFINE_INTEGRAL_TYPEBUILDER(long long);
+DEFINE_INTEGRAL_TYPEBUILDER(unsigned long long);
+#endif /* _MSC_VER */
+#undef DEFINE_INTEGRAL_TYPEBUILDER
+
+template<uint32_t num_bits, bool cross>
+class TypeBuilder<types::i<num_bits>, cross> {
+public:
+  static const IntegerType *get(LLVMContext &C) {
+    return IntegerType::get(C, num_bits);
+  }
+};
+
+template<> class TypeBuilder<float, false> {
+public:
+  static const Type *get(LLVMContext& C) {
+    return Type::getFloatTy(C);
+  }
+};
+template<> class TypeBuilder<float, true> {};
+
+template<> class TypeBuilder<double, false> {
+public:
+  static const Type *get(LLVMContext& C) {
+    return Type::getDoubleTy(C);
+  }
+};
+template<> class TypeBuilder<double, true> {};
+
+template<bool cross> class TypeBuilder<types::ieee_float, cross> {
+public:
+  static const Type *get(LLVMContext& C) { return Type::getFloatTy(C); }
+};
+template<bool cross> class TypeBuilder<types::ieee_double, cross> {
+public:
+  static const Type *get(LLVMContext& C) { return Type::getDoubleTy(C); }
+};
+template<bool cross> class TypeBuilder<types::x86_fp80, cross> {
+public:
+  static const Type *get(LLVMContext& C) { return Type::getX86_FP80Ty(C); }
+};
+template<bool cross> class TypeBuilder<types::fp128, cross> {
+public:
+  static const Type *get(LLVMContext& C) { return Type::getFP128Ty(C); }
+};
+template<bool cross> class TypeBuilder<types::ppc_fp128, cross> {
+public:
+  static const Type *get(LLVMContext& C) { return Type::getPPC_FP128Ty(C); }
+};
+template<bool cross> class TypeBuilder<types::x86_mmx, cross> {
+public:
+  static const Type *get(LLVMContext& C) { return Type::getX86_MMXTy(C); }
+};
+
+template<bool cross> class TypeBuilder<void, cross> {
+public:
+  static const Type *get(LLVMContext &C) {
+    return Type::getVoidTy(C);
+  }
+};
+
+/// void* is disallowed in LLVM types, but it occurs often enough in C code that
+/// we special case it.
+template<> class TypeBuilder<void*, false>
+  : public TypeBuilder<types::i<8>*, false> {};
+template<> class TypeBuilder<const void*, false>
+  : public TypeBuilder<types::i<8>*, false> {};
+template<> class TypeBuilder<volatile void*, false>
+  : public TypeBuilder<types::i<8>*, false> {};
+template<> class TypeBuilder<const volatile void*, false>
+  : public TypeBuilder<types::i<8>*, false> {};
+
+template<typename R, bool cross> class TypeBuilder<R(), cross> {
+public:
+  static const FunctionType *get(LLVMContext &Context) {
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context), false);
+  }
+};
+template<typename R, typename A1, bool cross> class TypeBuilder<R(A1), cross> {
+public:
+  static const FunctionType *get(LLVMContext &Context) {
+    std::vector<const Type*> params;
+    params.reserve(1);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, false);
+  }
+};
+template<typename R, typename A1, typename A2, bool cross>
+class TypeBuilder<R(A1, A2), cross> {
+public:
+  static const FunctionType *get(LLVMContext &Context) {
+    std::vector<const Type*> params;
+    params.reserve(2);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    params.push_back(TypeBuilder<A2, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, false);
+  }
+};
+template<typename R, typename A1, typename A2, typename A3, bool cross>
+class TypeBuilder<R(A1, A2, A3), cross> {
+public:
+  static const FunctionType *get(LLVMContext &Context) {
+    std::vector<const Type*> params;
+    params.reserve(3);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    params.push_back(TypeBuilder<A2, cross>::get(Context));
+    params.push_back(TypeBuilder<A3, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, false);
+  }
+};
+
+template<typename R, typename A1, typename A2, typename A3, typename A4,
+         bool cross>
+class TypeBuilder<R(A1, A2, A3, A4), cross> {
+public:
+  static const FunctionType *get(LLVMContext &Context) {
+    std::vector<const Type*> params;
+    params.reserve(4);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    params.push_back(TypeBuilder<A2, cross>::get(Context));
+    params.push_back(TypeBuilder<A3, cross>::get(Context));
+    params.push_back(TypeBuilder<A4, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, false);
+  }
+};
+
+template<typename R, typename A1, typename A2, typename A3, typename A4,
+         typename A5, bool cross>
+class TypeBuilder<R(A1, A2, A3, A4, A5), cross> {
+public:
+  static const FunctionType *get(LLVMContext &Context) {
+    std::vector<const Type*> params;
+    params.reserve(5);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    params.push_back(TypeBuilder<A2, cross>::get(Context));
+    params.push_back(TypeBuilder<A3, cross>::get(Context));
+    params.push_back(TypeBuilder<A4, cross>::get(Context));
+    params.push_back(TypeBuilder<A5, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, false);
+  }
+};
+
+template<typename R, bool cross> class TypeBuilder<R(...), cross> {
+public:
+  static const FunctionType *get(LLVMContext &Context) {
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context), true);
+  }
+};
+template<typename R, typename A1, bool cross>
+class TypeBuilder<R(A1, ...), cross> {
+public:
+  static const FunctionType *get(LLVMContext &Context) {
+    std::vector<const Type*> params;
+    params.reserve(1);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context), params, true);
+  }
+};
+template<typename R, typename A1, typename A2, bool cross>
+class TypeBuilder<R(A1, A2, ...), cross> {
+public:
+  static const FunctionType *get(LLVMContext &Context) {
+    std::vector<const Type*> params;
+    params.reserve(2);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    params.push_back(TypeBuilder<A2, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                                   params, true);
+  }
+};
+template<typename R, typename A1, typename A2, typename A3, bool cross>
+class TypeBuilder<R(A1, A2, A3, ...), cross> {
+public:
+  static const FunctionType *get(LLVMContext &Context) {
+    std::vector<const Type*> params;
+    params.reserve(3);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    params.push_back(TypeBuilder<A2, cross>::get(Context));
+    params.push_back(TypeBuilder<A3, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                                   params, true);
+  }
+};
+
+template<typename R, typename A1, typename A2, typename A3, typename A4,
+         bool cross>
+class TypeBuilder<R(A1, A2, A3, A4, ...), cross> {
+public:
+  static const FunctionType *get(LLVMContext &Context) {
+    std::vector<const Type*> params;
+    params.reserve(4);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    params.push_back(TypeBuilder<A2, cross>::get(Context));
+    params.push_back(TypeBuilder<A3, cross>::get(Context));
+    params.push_back(TypeBuilder<A4, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                             params, true);
+  }
+};
+
+template<typename R, typename A1, typename A2, typename A3, typename A4,
+         typename A5, bool cross>
+class TypeBuilder<R(A1, A2, A3, A4, A5, ...), cross> {
+public:
+  static const FunctionType *get(LLVMContext &Context) {
+    std::vector<const Type*> params;
+    params.reserve(5);
+    params.push_back(TypeBuilder<A1, cross>::get(Context));
+    params.push_back(TypeBuilder<A2, cross>::get(Context));
+    params.push_back(TypeBuilder<A3, cross>::get(Context));
+    params.push_back(TypeBuilder<A4, cross>::get(Context));
+    params.push_back(TypeBuilder<A5, cross>::get(Context));
+    return FunctionType::get(TypeBuilder<R, cross>::get(Context),
+                                   params, true);
+  }
+};
+
+}  // namespace llvm
+
+#endif
diff --git a/final/include/llvm/Support/Valgrind.h b/final/include/llvm/Support/Valgrind.h
new file mode 100644
index 00000000000..7662eaaff5a
--- /dev/null
+++ b/final/include/llvm/Support/Valgrind.h
@@ -0,0 +1,32 @@
+//===- llvm/Support/Valgrind.h - Communication with Valgrind -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods for communicating with a valgrind instance this program is running
+// under.  These are all no-ops unless LLVM was configured on a system with the
+// valgrind headers installed and valgrind is controlling this process.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_VALGRIND_H
+#define LLVM_SYSTEM_VALGRIND_H
+
+#include <stddef.h>
+
+namespace llvm {
+namespace sys {
+  // True if Valgrind is controlling this process.
+  bool RunningOnValgrind();
+
+  // Discard valgrind's translation of code in the range [Addr .. Addr + Len).
+  // Otherwise valgrind may continue to execute the old version of the code.
+  void ValgrindDiscardTranslations(const void *Addr, size_t Len);
+}
+}
+
+#endif
diff --git a/final/include/llvm/Support/ValueHandle.h b/final/include/llvm/Support/ValueHandle.h
new file mode 100644
index 00000000000..c0cdc35e99b
--- /dev/null
+++ b/final/include/llvm/Support/ValueHandle.h
@@ -0,0 +1,408 @@
+//===- llvm/Support/ValueHandle.h - Value Smart Pointer classes -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ValueHandle class and its sub-classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_VALUEHANDLE_H
+#define LLVM_SUPPORT_VALUEHANDLE_H
+
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Value.h"
+
+namespace llvm {
+class ValueHandleBase;
+
+// ValueHandleBase** is only 4-byte aligned.
+template<>
+class PointerLikeTypeTraits<ValueHandleBase**> {
+public:
+  static inline void *getAsVoidPointer(ValueHandleBase** P) { return P; }
+  static inline ValueHandleBase **getFromVoidPointer(void *P) {
+    return static_cast<ValueHandleBase**>(P);
+  }
+  enum { NumLowBitsAvailable = 2 };
+};
+
+/// ValueHandleBase - This is the common base class of value handles.
+/// ValueHandle's are smart pointers to Value's that have special behavior when
+/// the value is deleted or ReplaceAllUsesWith'd.  See the specific handles
+/// below for details.
+///
+class ValueHandleBase {
+  friend class Value;
+protected:
+  /// HandleBaseKind - This indicates what sub class the handle actually is.
+  /// This is to avoid having a vtable for the light-weight handle pointers. The
+  /// fully general Callback version does have a vtable.
+  enum HandleBaseKind {
+    Assert,
+    Callback,
+    Tracking,
+    Weak
+  };
+private:
+
+  PointerIntPair<ValueHandleBase**, 2, HandleBaseKind> PrevPair;
+  ValueHandleBase *Next;
+  Value *VP;
+  
+  explicit ValueHandleBase(const ValueHandleBase&); // DO NOT IMPLEMENT.
+public:
+  explicit ValueHandleBase(HandleBaseKind Kind)
+    : PrevPair(0, Kind), Next(0), VP(0) {}
+  ValueHandleBase(HandleBaseKind Kind, Value *V)
+    : PrevPair(0, Kind), Next(0), VP(V) {
+    if (isValid(VP))
+      AddToUseList();
+  }
+  ValueHandleBase(HandleBaseKind Kind, const ValueHandleBase &RHS)
+    : PrevPair(0, Kind), Next(0), VP(RHS.VP) {
+    if (isValid(VP))
+      AddToExistingUseList(RHS.getPrevPtr());
+  }
+  ~ValueHandleBase() {
+    if (isValid(VP))
+      RemoveFromUseList();
+  }
+
+  Value *operator=(Value *RHS) {
+    if (VP == RHS) return RHS;
+    if (isValid(VP)) RemoveFromUseList();
+    VP = RHS;
+    if (isValid(VP)) AddToUseList();
+    return RHS;
+  }
+
+  Value *operator=(const ValueHandleBase &RHS) {
+    if (VP == RHS.VP) return RHS.VP;
+    if (isValid(VP)) RemoveFromUseList();
+    VP = RHS.VP;
+    if (isValid(VP)) AddToExistingUseList(RHS.getPrevPtr());
+    return VP;
+  }
+
+  Value *operator->() const { return getValPtr(); }
+  Value &operator*() const { return *getValPtr(); }
+
+protected:
+  Value *getValPtr() const { return VP; }
+  static bool isValid(Value *V) {
+    return V &&
+           V != DenseMapInfo<Value *>::getEmptyKey() &&
+           V != DenseMapInfo<Value *>::getTombstoneKey();
+  }
+
+private:
+  // Callbacks made from Value.
+  static void ValueIsDeleted(Value *V);
+  static void ValueIsRAUWd(Value *Old, Value *New);
+
+  // Internal implementation details.
+  ValueHandleBase **getPrevPtr() const { return PrevPair.getPointer(); }
+  HandleBaseKind getKind() const { return PrevPair.getInt(); }
+  void setPrevPtr(ValueHandleBase **Ptr) { PrevPair.setPointer(Ptr); }
+
+  /// AddToExistingUseList - Add this ValueHandle to the use list for VP, where
+  /// List is the address of either the head of the list or a Next node within
+  /// the existing use list.
+  void AddToExistingUseList(ValueHandleBase **List);
+
+  /// AddToExistingUseListAfter - Add this ValueHandle to the use list after
+  /// Node.
+  void AddToExistingUseListAfter(ValueHandleBase *Node);
+
+  /// AddToUseList - Add this ValueHandle to the use list for VP.
+  void AddToUseList();
+  /// RemoveFromUseList - Remove this ValueHandle from its current use list.
+  void RemoveFromUseList();
+};
+
+/// WeakVH - This is a value handle that tries hard to point to a Value, even
+/// across RAUW operations, but will null itself out if the value is destroyed.
+/// this is useful for advisory sorts of information, but should not be used as
+/// the key of a map (since the map would have to rearrange itself when the
+/// pointer changes).
+class WeakVH : public ValueHandleBase {
+public:
+  WeakVH() : ValueHandleBase(Weak) {}
+  WeakVH(Value *P) : ValueHandleBase(Weak, P) {}
+  WeakVH(const WeakVH &RHS)
+    : ValueHandleBase(Weak, RHS) {}
+
+  Value *operator=(Value *RHS) {
+    return ValueHandleBase::operator=(RHS);
+  }
+  Value *operator=(const ValueHandleBase &RHS) {
+    return ValueHandleBase::operator=(RHS);
+  }
+
+  operator Value*() const {
+    return getValPtr();
+  }
+};
+
+// Specialize simplify_type to allow WeakVH to participate in
+// dyn_cast, isa, etc.
+template<typename From> struct simplify_type;
+template<> struct simplify_type<const WeakVH> {
+  typedef Value* SimpleType;
+  static SimpleType getSimplifiedValue(const WeakVH &WVH) {
+    return static_cast<Value *>(WVH);
+  }
+};
+template<> struct simplify_type<WeakVH> : public simplify_type<const WeakVH> {};
+
+/// AssertingVH - This is a Value Handle that points to a value and asserts out
+/// if the value is destroyed while the handle is still live.  This is very
+/// useful for catching dangling pointer bugs and other things which can be
+/// non-obvious.  One particularly useful place to use this is as the Key of a
+/// map.  Dangling pointer bugs often lead to really subtle bugs that only occur
+/// if another object happens to get allocated to the same address as the old
+/// one.  Using an AssertingVH ensures that an assert is triggered as soon as
+/// the bad delete occurs.
+///
+/// Note that an AssertingVH handle does *not* follow values across RAUW
+/// operations.  This means that RAUW's need to explicitly update the
+/// AssertingVH's as it moves.  This is required because in non-assert mode this
+/// class turns into a trivial wrapper around a pointer.
+template <typename ValueTy>
+class AssertingVH
+#ifndef NDEBUG
+  : public ValueHandleBase
+#endif
+  {
+
+#ifndef NDEBUG
+  ValueTy *getValPtr() const {
+    return static_cast<ValueTy*>(ValueHandleBase::getValPtr());
+  }
+  void setValPtr(ValueTy *P) {
+    ValueHandleBase::operator=(GetAsValue(P));
+  }
+#else
+  ValueTy *ThePtr;
+  ValueTy *getValPtr() const { return ThePtr; }
+  void setValPtr(ValueTy *P) { ThePtr = P; }
+#endif
+
+  // Convert a ValueTy*, which may be const, to the type the base
+  // class expects.
+  static Value *GetAsValue(Value *V) { return V; }
+  static Value *GetAsValue(const Value *V) { return const_cast<Value*>(V); }
+
+public:
+#ifndef NDEBUG
+  AssertingVH() : ValueHandleBase(Assert) {}
+  AssertingVH(ValueTy *P) : ValueHandleBase(Assert, GetAsValue(P)) {}
+  AssertingVH(const AssertingVH &RHS) : ValueHandleBase(Assert, RHS) {}
+#else
+  AssertingVH() : ThePtr(0) {}
+  AssertingVH(ValueTy *P) : ThePtr(P) {}
+#endif
+
+  operator ValueTy*() const {
+    return getValPtr();
+  }
+
+  ValueTy *operator=(ValueTy *RHS) {
+    setValPtr(RHS);
+    return getValPtr();
+  }
+  ValueTy *operator=(const AssertingVH<ValueTy> &RHS) {
+    setValPtr(RHS.getValPtr());
+    return getValPtr();
+  }
+
+  ValueTy *operator->() const { return getValPtr(); }
+  ValueTy &operator*() const { return *getValPtr(); }
+};
+
+// Specialize simplify_type to allow AssertingVH to participate in
+// dyn_cast, isa, etc.
+template<typename From> struct simplify_type;
+template<> struct simplify_type<const AssertingVH<Value> > {
+  typedef Value* SimpleType;
+  static SimpleType getSimplifiedValue(const AssertingVH<Value> &AVH) {
+    return static_cast<Value *>(AVH);
+  }
+};
+template<> struct simplify_type<AssertingVH<Value> >
+  : public simplify_type<const AssertingVH<Value> > {};
+
+// Specialize DenseMapInfo to allow AssertingVH to participate in DenseMap.
+template<typename T>
+struct DenseMapInfo<AssertingVH<T> > {
+  typedef DenseMapInfo<T*> PointerInfo;
+  static inline AssertingVH<T> getEmptyKey() {
+    return AssertingVH<T>(PointerInfo::getEmptyKey());
+  }
+  static inline T* getTombstoneKey() {
+    return AssertingVH<T>(PointerInfo::getTombstoneKey());
+  }
+  static unsigned getHashValue(const AssertingVH<T> &Val) {
+    return PointerInfo::getHashValue(Val);
+  }
+  static bool isEqual(const AssertingVH<T> &LHS, const AssertingVH<T> &RHS) {
+    return LHS == RHS;
+  }
+};
+  
+template <typename T>
+struct isPodLike<AssertingVH<T> > {
+#ifdef NDEBUG
+  static const bool value = true;
+#else
+  static const bool value = false;
+#endif
+};
+
+
+/// TrackingVH - This is a value handle that tracks a Value (or Value subclass),
+/// even across RAUW operations.
+///
+/// TrackingVH is designed for situations where a client needs to hold a handle
+/// to a Value (or subclass) across some operations which may move that value,
+/// but should never destroy it or replace it with some unacceptable type.
+///
+/// It is an error to do anything with a TrackingVH whose value has been
+/// destroyed, except to destruct it.
+///
+/// It is an error to attempt to replace a value with one of a type which is
+/// incompatible with any of its outstanding TrackingVHs.
+template<typename ValueTy>
+class TrackingVH : public ValueHandleBase {
+  void CheckValidity() const {
+    Value *VP = ValueHandleBase::getValPtr();
+
+    // Null is always ok.
+    if (!VP) return;
+
+    // Check that this value is valid (i.e., it hasn't been deleted). We
+    // explicitly delay this check until access to avoid requiring clients to be
+    // unnecessarily careful w.r.t. destruction.
+    assert(ValueHandleBase::isValid(VP) && "Tracked Value was deleted!");
+
+    // Check that the value is a member of the correct subclass. We would like
+    // to check this property on assignment for better debugging, but we don't
+    // want to require a virtual interface on this VH. Instead we allow RAUW to
+    // replace this value with a value of an invalid type, and check it here.
+    assert(isa<ValueTy>(VP) &&
+           "Tracked Value was replaced by one with an invalid type!");
+  }
+
+  ValueTy *getValPtr() const {
+    CheckValidity();
+    return (ValueTy*)ValueHandleBase::getValPtr();
+  }
+  void setValPtr(ValueTy *P) {
+    CheckValidity();
+    ValueHandleBase::operator=(GetAsValue(P));
+  }
+
+  // Convert a ValueTy*, which may be const, to the type the base
+  // class expects.
+  static Value *GetAsValue(Value *V) { return V; }
+  static Value *GetAsValue(const Value *V) { return const_cast<Value*>(V); }
+
+public:
+  TrackingVH() : ValueHandleBase(Tracking) {}
+  TrackingVH(ValueTy *P) : ValueHandleBase(Tracking, GetAsValue(P)) {}
+  TrackingVH(const TrackingVH &RHS) : ValueHandleBase(Tracking, RHS) {}
+
+  operator ValueTy*() const {
+    return getValPtr();
+  }
+
+  ValueTy *operator=(ValueTy *RHS) {
+    setValPtr(RHS);
+    return getValPtr();
+  }
+  ValueTy *operator=(const TrackingVH<ValueTy> &RHS) {
+    setValPtr(RHS.getValPtr());
+    return getValPtr();
+  }
+
+  ValueTy *operator->() const { return getValPtr(); }
+  ValueTy &operator*() const { return *getValPtr(); }
+};
+
+// Specialize simplify_type to allow TrackingVH to participate in
+// dyn_cast, isa, etc.
+template<typename From> struct simplify_type;
+template<> struct simplify_type<const TrackingVH<Value> > {
+  typedef Value* SimpleType;
+  static SimpleType getSimplifiedValue(const TrackingVH<Value> &AVH) {
+    return static_cast<Value *>(AVH);
+  }
+};
+template<> struct simplify_type<TrackingVH<Value> >
+  : public simplify_type<const TrackingVH<Value> > {};
+
+/// CallbackVH - This is a value handle that allows subclasses to define
+/// callbacks that run when the underlying Value has RAUW called on it or is
+/// destroyed.  This class can be used as the key of a map, as long as the user
+/// takes it out of the map before calling setValPtr() (since the map has to
+/// rearrange itself when the pointer changes).  Unlike ValueHandleBase, this
+/// class has a vtable and a virtual destructor.
+class CallbackVH : public ValueHandleBase {
+protected:
+  CallbackVH(const CallbackVH &RHS)
+    : ValueHandleBase(Callback, RHS) {}
+
+  virtual ~CallbackVH();
+
+  void setValPtr(Value *P) {
+    ValueHandleBase::operator=(P);
+  }
+
+public:
+  CallbackVH() : ValueHandleBase(Callback) {}
+  CallbackVH(Value *P) : ValueHandleBase(Callback, P) {}
+
+  operator Value*() const {
+    return getValPtr();
+  }
+
+  /// Called when this->getValPtr() is destroyed, inside ~Value(), so you may
+  /// call any non-virtual Value method on getValPtr(), but no subclass methods.
+  /// If WeakVH were implemented as a CallbackVH, it would use this method to
+  /// call setValPtr(NULL).  AssertingVH would use this method to cause an
+  /// assertion failure.
+  ///
+  /// All implementations must remove the reference from this object to the
+  /// Value that's being destroyed.
+  virtual void deleted() {
+    setValPtr(NULL);
+  }
+
+  /// Called when this->getValPtr()->replaceAllUsesWith(new_value) is called,
+  /// _before_ any of the uses have actually been replaced.  If WeakVH were
+  /// implemented as a CallbackVH, it would use this method to call
+  /// setValPtr(new_value).  AssertingVH would do nothing in this method.
+  virtual void allUsesReplacedWith(Value *) {}
+};
+
+// Specialize simplify_type to allow CallbackVH to participate in
+// dyn_cast, isa, etc.
+template<typename From> struct simplify_type;
+template<> struct simplify_type<const CallbackVH> {
+  typedef Value* SimpleType;
+  static SimpleType getSimplifiedValue(const CallbackVH &CVH) {
+    return static_cast<Value *>(CVH);
+  }
+};
+template<> struct simplify_type<CallbackVH>
+  : public simplify_type<const CallbackVH> {};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/circular_raw_ostream.h b/final/include/llvm/Support/circular_raw_ostream.h
new file mode 100644
index 00000000000..2b3c329b586
--- /dev/null
+++ b/final/include/llvm/Support/circular_raw_ostream.h
@@ -0,0 +1,171 @@
+//===-- llvm/Support/circular_raw_ostream.h - Buffered streams --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains raw_ostream implementations for streams to do circular
+// buffering of their output.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CIRCULAR_RAW_OSTREAM_H
+#define LLVM_SUPPORT_CIRCULAR_RAW_OSTREAM_H
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm 
+{
+  /// circular_raw_ostream - A raw_ostream which *can* save its data
+  /// to a circular buffer, or can pass it through directly to an
+  /// underlying stream if specified with a buffer of zero.
+  ///
+  class circular_raw_ostream : public raw_ostream {
+  public:
+    /// TAKE_OWNERSHIP - Tell this stream that it owns the underlying
+    /// stream and is responsible for cleanup, memory management
+    /// issues, etc.
+    ///
+    static const bool TAKE_OWNERSHIP = true;
+
+    /// REFERENCE_ONLY - Tell this stream it should not manage the
+    /// held stream.
+    ///
+    static const bool REFERENCE_ONLY = false;
+
+  private:
+    /// TheStream - The real stream we output to. We set it to be
+    /// unbuffered, since we're already doing our own buffering.
+    ///
+    raw_ostream *TheStream;
+
+    /// OwnsStream - Are we responsible for managing the underlying
+    /// stream?
+    ///
+    bool OwnsStream;
+
+    /// BufferSize - The size of the buffer in bytes.
+    ///
+    size_t BufferSize;
+
+    /// BufferArray - The actual buffer storage.
+    ///
+    char *BufferArray;
+
+    /// Cur - Pointer to the current output point in BufferArray.
+    ///
+    char *Cur;
+
+    /// Filled - Indicate whether the buffer has been completely
+    /// filled.  This helps avoid garbage output.
+    ///
+    bool Filled;
+
+    /// Banner - A pointer to a banner to print before dumping the
+    /// log.
+    ///
+    const char *Banner;
+
+    /// flushBuffer - Dump the contents of the buffer to Stream.
+    ///
+    void flushBuffer(void) {
+      if (Filled)
+        // Write the older portion of the buffer.
+        TheStream->write(Cur, BufferArray + BufferSize - Cur);
+      // Write the newer portion of the buffer.
+      TheStream->write(BufferArray, Cur - BufferArray);
+      Cur = BufferArray;
+      Filled = false;
+    }
+
+    virtual void write_impl(const char *Ptr, size_t Size);
+
+    /// current_pos - Return the current position within the stream,
+    /// not counting the bytes currently in the buffer.
+    ///
+    virtual uint64_t current_pos() const { 
+      // This has the same effect as calling TheStream.current_pos(),
+      // but that interface is private.
+      return TheStream->tell() - TheStream->GetNumBytesInBuffer();
+    }
+
+  public:
+    /// circular_raw_ostream - Construct an optionally
+    /// circular-buffered stream, handing it an underlying stream to
+    /// do the "real" output.
+    ///
+    /// As a side effect, if BuffSize is nonzero, the given Stream is
+    /// set to be Unbuffered.  This is because circular_raw_ostream
+    /// does its own buffering, so it doesn't want another layer of
+    /// buffering to be happening underneath it.
+    ///
+    /// "Owns" tells the circular_raw_ostream whether it is
+    /// responsible for managing the held stream, doing memory
+    /// management of it, etc.
+    ///
+    circular_raw_ostream(raw_ostream &Stream, const char *Header,
+                         size_t BuffSize = 0, bool Owns = REFERENCE_ONLY) 
+        : raw_ostream(/*unbuffered*/true),
+            TheStream(0),
+            OwnsStream(Owns),
+            BufferSize(BuffSize),
+            BufferArray(0),
+            Filled(false),
+            Banner(Header) {
+      if (BufferSize != 0)
+        BufferArray = new char[BufferSize];
+      Cur = BufferArray;
+      setStream(Stream, Owns);
+    }
+    explicit circular_raw_ostream()
+        : raw_ostream(/*unbuffered*/true),
+            TheStream(0),
+            OwnsStream(REFERENCE_ONLY),
+            BufferArray(0),
+            Filled(false),
+            Banner("") {
+      Cur = BufferArray;
+    }
+
+    ~circular_raw_ostream() {
+      flush();
+      flushBufferWithBanner();
+      releaseStream();
+      delete[] BufferArray;
+    }
+
+    /// setStream - Tell the circular_raw_ostream to output a
+    /// different stream.  "Owns" tells circular_raw_ostream whether
+    /// it should take responsibility for managing the underlying
+    /// stream.
+    ///
+    void setStream(raw_ostream &Stream, bool Owns = REFERENCE_ONLY) {
+      releaseStream();
+      TheStream = &Stream;
+      OwnsStream = Owns;
+    }
+
+    /// flushBufferWithBanner - Force output of the buffer along with
+    /// a small header.
+    ///
+    void flushBufferWithBanner(void);
+
+  private:
+    /// releaseStream - Delete the held stream if needed. Otherwise,
+    /// transfer the buffer settings from this circular_raw_ostream
+    /// back to the underlying stream.
+    ///
+    void releaseStream() {
+      if (!TheStream)
+        return;
+      if (OwnsStream)
+        delete TheStream;
+    }
+  };
+} // end llvm namespace
+
+
+#endif
diff --git a/final/include/llvm/Support/raw_os_ostream.h b/final/include/llvm/Support/raw_os_ostream.h
new file mode 100644
index 00000000000..4f5d3612da1
--- /dev/null
+++ b/final/include/llvm/Support/raw_os_ostream.h
@@ -0,0 +1,42 @@
+//===- raw_os_ostream.h - std::ostream adaptor for raw_ostream --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the raw_os_ostream class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_RAW_OS_OSTREAM_H
+#define LLVM_SUPPORT_RAW_OS_OSTREAM_H
+
+#include "llvm/Support/raw_ostream.h"
+#include <iosfwd>
+
+namespace llvm {
+
+/// raw_os_ostream - A raw_ostream that writes to an std::ostream.  This is a
+/// simple adaptor class.  It does not check for output errors; clients should
+/// use the underlying stream to detect errors.
+class raw_os_ostream : public raw_ostream {
+  std::ostream &OS;
+  
+  /// write_impl - See raw_ostream::write_impl.
+  virtual void write_impl(const char *Ptr, size_t Size);
+  
+  /// current_pos - Return the current position within the stream, not
+  /// counting the bytes currently in the buffer.
+  virtual uint64_t current_pos() const;
+  
+public:
+  raw_os_ostream(std::ostream &O) : OS(O) {}
+  ~raw_os_ostream();
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/raw_ostream.h b/final/include/llvm/Support/raw_ostream.h
new file mode 100644
index 00000000000..6bfae5e2982
--- /dev/null
+++ b/final/include/llvm/Support/raw_ostream.h
@@ -0,0 +1,491 @@
+//===--- raw_ostream.h - Raw output stream ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the raw_ostream class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_RAW_OSTREAM_H
+#define LLVM_SUPPORT_RAW_OSTREAM_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+  class format_object_base;
+  template <typename T>
+  class SmallVectorImpl;
+
+/// raw_ostream - This class implements an extremely fast bulk output stream
+/// that can *only* output to a stream.  It does not support seeking, reopening,
+/// rewinding, line buffered disciplines etc. It is a simple buffer that outputs
+/// a chunk at a time.
+class raw_ostream {
+private:
+  // Do not implement. raw_ostream is noncopyable.
+  void operator=(const raw_ostream &);
+  raw_ostream(const raw_ostream &);
+
+  /// The buffer is handled in such a way that the buffer is
+  /// uninitialized, unbuffered, or out of space when OutBufCur >=
+  /// OutBufEnd. Thus a single comparison suffices to determine if we
+  /// need to take the slow path to write a single character.
+  ///
+  /// The buffer is in one of three states:
+  ///  1. Unbuffered (BufferMode == Unbuffered)
+  ///  1. Uninitialized (BufferMode != Unbuffered && OutBufStart == 0).
+  ///  2. Buffered (BufferMode != Unbuffered && OutBufStart != 0 &&
+  ///               OutBufEnd - OutBufStart >= 1).
+  ///
+  /// If buffered, then the raw_ostream owns the buffer if (BufferMode ==
+  /// InternalBuffer); otherwise the buffer has been set via SetBuffer and is
+  /// managed by the subclass.
+  ///
+  /// If a subclass installs an external buffer using SetBuffer then it can wait
+  /// for a \see write_impl() call to handle the data which has been put into
+  /// this buffer.
+  char *OutBufStart, *OutBufEnd, *OutBufCur;
+
+  enum BufferKind {
+    Unbuffered = 0,
+    InternalBuffer,
+    ExternalBuffer
+  } BufferMode;
+
+public:
+  // color order matches ANSI escape sequence, don't change
+  enum Colors {
+    BLACK=0,
+    RED,
+    GREEN,
+    YELLOW,
+    BLUE,
+    MAGENTA,
+    CYAN,
+    WHITE,
+    SAVEDCOLOR
+  };
+
+  explicit raw_ostream(bool unbuffered=false)
+    : BufferMode(unbuffered ? Unbuffered : InternalBuffer) {
+    // Start out ready to flush.
+    OutBufStart = OutBufEnd = OutBufCur = 0;
+  }
+
+  virtual ~raw_ostream();
+
+  /// tell - Return the current offset with the file.
+  uint64_t tell() const { return current_pos() + GetNumBytesInBuffer(); }
+
+  //===--------------------------------------------------------------------===//
+  // Configuration Interface
+  //===--------------------------------------------------------------------===//
+
+  /// SetBuffered - Set the stream to be buffered, with an automatically
+  /// determined buffer size.
+  void SetBuffered();
+
+  /// SetBufferSize - Set the stream to be buffered, using the
+  /// specified buffer size.
+  void SetBufferSize(size_t Size) {
+    flush();
+    SetBufferAndMode(new char[Size], Size, InternalBuffer);
+  }
+
+  size_t GetBufferSize() const {
+    // If we're supposed to be buffered but haven't actually gotten around
+    // to allocating the buffer yet, return the value that would be used.
+    if (BufferMode != Unbuffered && OutBufStart == 0)
+      return preferred_buffer_size();
+
+    // Otherwise just return the size of the allocated buffer.
+    return OutBufEnd - OutBufStart;
+  }
+
+  /// SetUnbuffered - Set the stream to be unbuffered. When
+  /// unbuffered, the stream will flush after every write. This routine
+  /// will also flush the buffer immediately when the stream is being
+  /// set to unbuffered.
+  void SetUnbuffered() {
+    flush();
+    SetBufferAndMode(0, 0, Unbuffered);
+  }
+
+  size_t GetNumBytesInBuffer() const {
+    return OutBufCur - OutBufStart;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Data Output Interface
+  //===--------------------------------------------------------------------===//
+
+  void flush() {
+    if (OutBufCur != OutBufStart)
+      flush_nonempty();
+  }
+
+  raw_ostream &operator<<(char C) {
+    if (OutBufCur >= OutBufEnd)
+      return write(C);
+    *OutBufCur++ = C;
+    return *this;
+  }
+
+  raw_ostream &operator<<(unsigned char C) {
+    if (OutBufCur >= OutBufEnd)
+      return write(C);
+    *OutBufCur++ = C;
+    return *this;
+  }
+
+  raw_ostream &operator<<(signed char C) {
+    if (OutBufCur >= OutBufEnd)
+      return write(C);
+    *OutBufCur++ = C;
+    return *this;
+  }
+
+  raw_ostream &operator<<(StringRef Str) {
+    // Inline fast path, particularly for strings with a known length.
+    size_t Size = Str.size();
+
+    // Make sure we can use the fast path.
+    if (OutBufCur+Size > OutBufEnd)
+      return write(Str.data(), Size);
+
+    memcpy(OutBufCur, Str.data(), Size);
+    OutBufCur += Size;
+    return *this;
+  }
+
+  raw_ostream &operator<<(const char *Str) {
+    // Inline fast path, particularly for constant strings where a sufficiently
+    // smart compiler will simplify strlen.
+
+    return this->operator<<(StringRef(Str));
+  }
+
+  raw_ostream &operator<<(const std::string &Str) {
+    // Avoid the fast path, it would only increase code size for a marginal win.
+    return write(Str.data(), Str.length());
+  }
+
+  raw_ostream &operator<<(unsigned long N);
+  raw_ostream &operator<<(long N);
+  raw_ostream &operator<<(unsigned long long N);
+  raw_ostream &operator<<(long long N);
+  raw_ostream &operator<<(const void *P);
+  raw_ostream &operator<<(unsigned int N) {
+    return this->operator<<(static_cast<unsigned long>(N));
+  }
+
+  raw_ostream &operator<<(int N) {
+    return this->operator<<(static_cast<long>(N));
+  }
+
+  raw_ostream &operator<<(double N);
+
+  /// write_hex - Output \arg N in hexadecimal, without any prefix or padding.
+  raw_ostream &write_hex(unsigned long long N);
+
+  /// write_escaped - Output \arg Str, turning '\\', '\t', '\n', '"', and
+  /// anything that doesn't satisfy std::isprint into an escape sequence.
+  raw_ostream &write_escaped(StringRef Str, bool UseHexEscapes = false);
+
+  raw_ostream &write(unsigned char C);
+  raw_ostream &write(const char *Ptr, size_t Size);
+
+  // Formatted output, see the format() function in Support/Format.h.
+  raw_ostream &operator<<(const format_object_base &Fmt);
+
+  /// indent - Insert 'NumSpaces' spaces.
+  raw_ostream &indent(unsigned NumSpaces);
+
+
+  /// Changes the foreground color of text that will be output from this point
+  /// forward.
+  /// @param colors ANSI color to use, the special SAVEDCOLOR can be used to
+  /// change only the bold attribute, and keep colors untouched
+  /// @param bold bold/brighter text, default false
+  /// @param bg if true change the background, default: change foreground
+  /// @returns itself so it can be used within << invocations
+  virtual raw_ostream &changeColor(enum Colors, bool = false, bool = false) {
+    return *this; }
+
+  /// Resets the colors to terminal defaults. Call this when you are done
+  /// outputting colored text, or before program exit.
+  virtual raw_ostream &resetColor() { return *this; }
+
+  /// This function determines if this stream is connected to a "tty" or
+  /// "console" window. That is, the output would be displayed to the user
+  /// rather than being put on a pipe or stored in a file.
+  virtual bool is_displayed() const { return false; }
+
+  //===--------------------------------------------------------------------===//
+  // Subclass Interface
+  //===--------------------------------------------------------------------===//
+
+private:
+  /// write_impl - The is the piece of the class that is implemented
+  /// by subclasses.  This writes the \args Size bytes starting at
+  /// \arg Ptr to the underlying stream.
+  ///
+  /// This function is guaranteed to only be called at a point at which it is
+  /// safe for the subclass to install a new buffer via SetBuffer.
+  ///
+  /// \arg Ptr - The start of the data to be written. For buffered streams this
+  /// is guaranteed to be the start of the buffer.
+  /// \arg Size - The number of bytes to be written.
+  ///
+  /// \invariant { Size > 0 }
+  virtual void write_impl(const char *Ptr, size_t Size) = 0;
+
+  // An out of line virtual method to provide a home for the class vtable.
+  virtual void handle();
+
+  /// current_pos - Return the current position within the stream, not
+  /// counting the bytes currently in the buffer.
+  virtual uint64_t current_pos() const = 0;
+
+protected:
+  /// SetBuffer - Use the provided buffer as the raw_ostream buffer. This is
+  /// intended for use only by subclasses which can arrange for the output to go
+  /// directly into the desired output buffer, instead of being copied on each
+  /// flush.
+  void SetBuffer(char *BufferStart, size_t Size) {
+    SetBufferAndMode(BufferStart, Size, ExternalBuffer);
+  }
+
+  /// preferred_buffer_size - Return an efficient buffer size for the
+  /// underlying output mechanism.
+  virtual size_t preferred_buffer_size() const;
+
+  /// getBufferStart - Return the beginning of the current stream buffer, or 0
+  /// if the stream is unbuffered.
+  const char *getBufferStart() const { return OutBufStart; }
+
+  //===--------------------------------------------------------------------===//
+  // Private Interface
+  //===--------------------------------------------------------------------===//
+private:
+  /// SetBufferAndMode - Install the given buffer and mode.
+  void SetBufferAndMode(char *BufferStart, size_t Size, BufferKind Mode);
+
+  /// flush_nonempty - Flush the current buffer, which is known to be
+  /// non-empty. This outputs the currently buffered data and resets
+  /// the buffer to empty.
+  void flush_nonempty();
+
+  /// copy_to_buffer - Copy data into the buffer. Size must not be
+  /// greater than the number of unused bytes in the buffer.
+  void copy_to_buffer(const char *Ptr, size_t Size);
+};
+
+//===----------------------------------------------------------------------===//
+// File Output Streams
+//===----------------------------------------------------------------------===//
+
+/// raw_fd_ostream - A raw_ostream that writes to a file descriptor.
+///
+class raw_fd_ostream : public raw_ostream {
+  int FD;
+  bool ShouldClose;
+
+  /// Error This flag is true if an error of any kind has been detected.
+  ///
+  bool Error;
+
+  /// Controls whether the stream should attempt to use atomic writes, when
+  /// possible.
+  bool UseAtomicWrites;
+
+  uint64_t pos;
+
+  /// write_impl - See raw_ostream::write_impl.
+  virtual void write_impl(const char *Ptr, size_t Size);
+
+  /// current_pos - Return the current position within the stream, not
+  /// counting the bytes currently in the buffer.
+  virtual uint64_t current_pos() const { return pos; }
+
+  /// preferred_buffer_size - Determine an efficient buffer size.
+  virtual size_t preferred_buffer_size() const;
+
+  /// error_detected - Set the flag indicating that an output error has
+  /// been encountered.
+  void error_detected() { Error = true; }
+
+public:
+
+  enum {
+    /// F_Excl - When opening a file, this flag makes raw_fd_ostream
+    /// report an error if the file already exists.
+    F_Excl  = 1,
+
+    /// F_Append - When opening a file, if it already exists append to the
+    /// existing file instead of returning an error.  This may not be specified
+    /// with F_Excl.
+    F_Append = 2,
+
+    /// F_Binary - The file should be opened in binary mode on platforms that
+    /// make this distinction.
+    F_Binary = 4
+  };
+
+  /// raw_fd_ostream - Open the specified file for writing. If an error occurs,
+  /// information about the error is put into ErrorInfo, and the stream should
+  /// be immediately destroyed; the string will be empty if no error occurred.
+  /// This allows optional flags to control how the file will be opened.
+  ///
+  /// As a special case, if Filename is "-", then the stream will use
+  /// STDOUT_FILENO instead of opening a file. Note that it will still consider
+  /// itself to own the file descriptor. In particular, it will close the
+  /// file descriptor when it is done (this is necessary to detect
+  /// output errors).
+  raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
+                 unsigned Flags = 0);
+
+  /// raw_fd_ostream ctor - FD is the file descriptor that this writes to.  If
+  /// ShouldClose is true, this closes the file when the stream is destroyed.
+  raw_fd_ostream(int fd, bool shouldClose, bool unbuffered=false);
+
+  ~raw_fd_ostream();
+
+  /// close - Manually flush the stream and close the file.
+  /// Note that this does not call fsync.
+  void close();
+
+  /// seek - Flushes the stream and repositions the underlying file descriptor
+  /// position to the offset specified from the beginning of the file.
+  uint64_t seek(uint64_t off);
+
+  /// SetUseAtomicWrite - Set the stream to attempt to use atomic writes for
+  /// individual output routines where possible.
+  ///
+  /// Note that because raw_ostream's are typically buffered, this flag is only
+  /// sensible when used on unbuffered streams which will flush their output
+  /// immediately.
+  void SetUseAtomicWrites(bool Value) {
+    UseAtomicWrites = Value;
+  }
+
+  virtual raw_ostream &changeColor(enum Colors colors, bool bold=false,
+                                   bool bg=false);
+  virtual raw_ostream &resetColor();
+
+  virtual bool is_displayed() const;
+
+  /// has_error - Return the value of the flag in this raw_fd_ostream indicating
+  /// whether an output error has been encountered.
+  /// This doesn't implicitly flush any pending output.  Also, it doesn't
+  /// guarantee to detect all errors unless the the stream has been closed.
+  bool has_error() const {
+    return Error;
+  }
+
+  /// clear_error - Set the flag read by has_error() to false. If the error
+  /// flag is set at the time when this raw_ostream's destructor is called,
+  /// report_fatal_error is called to report the error. Use clear_error()
+  /// after handling the error to avoid this behavior.
+  ///
+  ///   "Errors should never pass silently.
+  ///    Unless explicitly silenced."
+  ///      - from The Zen of Python, by Tim Peters
+  ///
+  void clear_error() {
+    Error = false;
+  }
+};
+
+/// outs() - This returns a reference to a raw_ostream for standard output.
+/// Use it like: outs() << "foo" << "bar";
+raw_ostream &outs();
+
+/// errs() - This returns a reference to a raw_ostream for standard error.
+/// Use it like: errs() << "foo" << "bar";
+raw_ostream &errs();
+
+/// nulls() - This returns a reference to a raw_ostream which simply discards
+/// output.
+raw_ostream &nulls();
+
+//===----------------------------------------------------------------------===//
+// Output Stream Adaptors
+//===----------------------------------------------------------------------===//
+
+/// raw_string_ostream - A raw_ostream that writes to an std::string.  This is a
+/// simple adaptor class. This class does not encounter output errors.
+class raw_string_ostream : public raw_ostream {
+  std::string &OS;
+
+  /// write_impl - See raw_ostream::write_impl.
+  virtual void write_impl(const char *Ptr, size_t Size);
+
+  /// current_pos - Return the current position within the stream, not
+  /// counting the bytes currently in the buffer.
+  virtual uint64_t current_pos() const { return OS.size(); }
+public:
+  explicit raw_string_ostream(std::string &O) : OS(O) {}
+  ~raw_string_ostream();
+
+  /// str - Flushes the stream contents to the target string and returns
+  ///  the string's reference.
+  std::string& str() {
+    flush();
+    return OS;
+  }
+};
+
+/// raw_svector_ostream - A raw_ostream that writes to an SmallVector or
+/// SmallString.  This is a simple adaptor class. This class does not
+/// encounter output errors.
+class raw_svector_ostream : public raw_ostream {
+  SmallVectorImpl<char> &OS;
+
+  /// write_impl - See raw_ostream::write_impl.
+  virtual void write_impl(const char *Ptr, size_t Size);
+
+  /// current_pos - Return the current position within the stream, not
+  /// counting the bytes currently in the buffer.
+  virtual uint64_t current_pos() const;
+public:
+  /// Construct a new raw_svector_ostream.
+  ///
+  /// \arg O - The vector to write to; this should generally have at least 128
+  /// bytes free to avoid any extraneous memory overhead.
+  explicit raw_svector_ostream(SmallVectorImpl<char> &O);
+  ~raw_svector_ostream();
+
+  /// resync - This is called when the SmallVector we're appending to is changed
+  /// outside of the raw_svector_ostream's control.  It is only safe to do this
+  /// if the raw_svector_ostream has previously been flushed.
+  void resync();
+
+  /// str - Flushes the stream contents to the target vector and return a
+  /// StringRef for the vector contents.
+  StringRef str();
+};
+
+/// raw_null_ostream - A raw_ostream that discards all output.
+class raw_null_ostream : public raw_ostream {
+  /// write_impl - See raw_ostream::write_impl.
+  virtual void write_impl(const char *Ptr, size_t size);
+
+  /// current_pos - Return the current position within the stream, not
+  /// counting the bytes currently in the buffer.
+  virtual uint64_t current_pos() const;
+
+public:
+  explicit raw_null_ostream() {}
+  ~raw_null_ostream();
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/final/include/llvm/Support/system_error.h b/final/include/llvm/Support/system_error.h
new file mode 100644
index 00000000000..e5306ecfb35
--- /dev/null
+++ b/final/include/llvm/Support/system_error.h
@@ -0,0 +1,910 @@
+//===---------------------------- system_error ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This was lifted from libc++ and modified for C++03. This is called
+// system_error even though it does not define that class because that's what
+// it's called in C++0x. We don't define system_error because it is only used
+// for exception handling, which we don't use in LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_SYSTEM_ERROR_H
+#define LLVM_SYSTEM_SYSTEM_ERROR_H
+
+/*
+    system_error synopsis
+
+namespace std
+{
+
+class error_category
+{
+public:
+    virtual ~error_category();
+
+    error_category(const error_category&) = delete;
+    error_category& operator=(const error_category&) = delete;
+
+    virtual const char* name() const = 0;
+    virtual error_condition default_error_condition(int ev) const;
+    virtual bool equivalent(int code, const error_condition& condition) const;
+    virtual bool equivalent(const error_code& code, int condition) const;
+    virtual std::string message(int ev) const = 0;
+
+    bool operator==(const error_category& rhs) const;
+    bool operator!=(const error_category& rhs) const;
+    bool operator<(const error_category& rhs) const;
+};
+
+const error_category& generic_category();
+const error_category& system_category();
+
+template <class T> struct is_error_code_enum
+    : public false_type {};
+
+template <class T> struct is_error_condition_enum
+    : public false_type {};
+
+class error_code
+{
+public:
+    // constructors:
+    error_code();
+    error_code(int val, const error_category& cat);
+    template <class ErrorCodeEnum>
+        error_code(ErrorCodeEnum e);
+
+    // modifiers:
+    void assign(int val, const error_category& cat);
+    template <class ErrorCodeEnum>
+        error_code& operator=(ErrorCodeEnum e);
+    void clear();
+
+    // observers:
+    int value() const;
+    const error_category& category() const;
+    error_condition default_error_condition() const;
+    std::string message() const;
+    explicit operator bool() const;
+};
+
+// non-member functions:
+bool operator<(const error_code& lhs, const error_code& rhs);
+template <class charT, class traits>
+    basic_ostream<charT,traits>&
+    operator<<(basic_ostream<charT,traits>& os, const error_code& ec);
+
+class error_condition
+{
+public:
+    // constructors:
+    error_condition();
+    error_condition(int val, const error_category& cat);
+    template <class ErrorConditionEnum>
+        error_condition(ErrorConditionEnum e);
+
+    // modifiers:
+    void assign(int val, const error_category& cat);
+    template <class ErrorConditionEnum>
+        error_condition& operator=(ErrorConditionEnum e);
+    void clear();
+
+    // observers:
+    int value() const;
+    const error_category& category() const;
+    std::string message() const;
+    explicit operator bool() const;
+};
+
+bool operator<(const error_condition& lhs, const error_condition& rhs);
+
+class system_error
+    : public runtime_error
+{
+public:
+    system_error(error_code ec, const std::string& what_arg);
+    system_error(error_code ec, const char* what_arg);
+    system_error(error_code ec);
+    system_error(int ev, const error_category& ecat, const std::string& what_arg);
+    system_error(int ev, const error_category& ecat, const char* what_arg);
+    system_error(int ev, const error_category& ecat);
+
+    const error_code& code() const throw();
+    const char* what() const throw();
+};
+
+enum class errc
+{
+    address_family_not_supported,       // EAFNOSUPPORT
+    address_in_use,                     // EADDRINUSE
+    address_not_available,              // EADDRNOTAVAIL
+    already_connected,                  // EISCONN
+    argument_list_too_long,             // E2BIG
+    argument_out_of_domain,             // EDOM
+    bad_address,                        // EFAULT
+    bad_file_descriptor,                // EBADF
+    bad_message,                        // EBADMSG
+    broken_pipe,                        // EPIPE
+    connection_aborted,                 // ECONNABORTED
+    connection_already_in_progress,     // EALREADY
+    connection_refused,                 // ECONNREFUSED
+    connection_reset,                   // ECONNRESET
+    cross_device_link,                  // EXDEV
+    destination_address_required,       // EDESTADDRREQ
+    device_or_resource_busy,            // EBUSY
+    directory_not_empty,                // ENOTEMPTY
+    executable_format_error,            // ENOEXEC
+    file_exists,                        // EEXIST
+    file_too_large,                     // EFBIG
+    filename_too_long,                  // ENAMETOOLONG
+    function_not_supported,             // ENOSYS
+    host_unreachable,                   // EHOSTUNREACH
+    identifier_removed,                 // EIDRM
+    illegal_byte_sequence,              // EILSEQ
+    inappropriate_io_control_operation, // ENOTTY
+    interrupted,                        // EINTR
+    invalid_argument,                   // EINVAL
+    invalid_seek,                       // ESPIPE
+    io_error,                           // EIO
+    is_a_directory,                     // EISDIR
+    message_size,                       // EMSGSIZE
+    network_down,                       // ENETDOWN
+    network_reset,                      // ENETRESET
+    network_unreachable,                // ENETUNREACH
+    no_buffer_space,                    // ENOBUFS
+    no_child_process,                   // ECHILD
+    no_link,                            // ENOLINK
+    no_lock_available,                  // ENOLCK
+    no_message_available,               // ENODATA
+    no_message,                         // ENOMSG
+    no_protocol_option,                 // ENOPROTOOPT
+    no_space_on_device,                 // ENOSPC
+    no_stream_resources,                // ENOSR
+    no_such_device_or_address,          // ENXIO
+    no_such_device,                     // ENODEV
+    no_such_file_or_directory,          // ENOENT
+    no_such_process,                    // ESRCH
+    not_a_directory,                    // ENOTDIR
+    not_a_socket,                       // ENOTSOCK
+    not_a_stream,                       // ENOSTR
+    not_connected,                      // ENOTCONN
+    not_enough_memory,                  // ENOMEM
+    not_supported,                      // ENOTSUP
+    operation_canceled,                 // ECANCELED
+    operation_in_progress,              // EINPROGRESS
+    operation_not_permitted,            // EPERM
+    operation_not_supported,            // EOPNOTSUPP
+    operation_would_block,              // EWOULDBLOCK
+    owner_dead,                         // EOWNERDEAD
+    permission_denied,                  // EACCES
+    protocol_error,                     // EPROTO
+    protocol_not_supported,             // EPROTONOSUPPORT
+    read_only_file_system,              // EROFS
+    resource_deadlock_would_occur,      // EDEADLK
+    resource_unavailable_try_again,     // EAGAIN
+    result_out_of_range,                // ERANGE
+    state_not_recoverable,              // ENOTRECOVERABLE
+    stream_timeout,                     // ETIME
+    text_file_busy,                     // ETXTBSY
+    timed_out,                          // ETIMEDOUT
+    too_many_files_open_in_system,      // ENFILE
+    too_many_files_open,                // EMFILE
+    too_many_links,                     // EMLINK
+    too_many_symbolic_link_levels,      // ELOOP
+    value_too_large,                    // EOVERFLOW
+    wrong_protocol_type                 // EPROTOTYPE
+};
+
+template <> struct is_error_condition_enum<errc> : true_type { }
+
+error_code make_error_code(errc e);
+error_condition make_error_condition(errc e);
+
+// Comparison operators:
+bool operator==(const error_code& lhs, const error_code& rhs);
+bool operator==(const error_code& lhs, const error_condition& rhs);
+bool operator==(const error_condition& lhs, const error_code& rhs);
+bool operator==(const error_condition& lhs, const error_condition& rhs);
+bool operator!=(const error_code& lhs, const error_code& rhs);
+bool operator!=(const error_code& lhs, const error_condition& rhs);
+bool operator!=(const error_condition& lhs, const error_code& rhs);
+bool operator!=(const error_condition& lhs, const error_condition& rhs);
+
+template <> struct hash<std::error_code>;
+
+}  // std
+
+*/
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/type_traits.h"
+#include <cerrno>
+#include <string>
+
+// This must be here instead of a .inc file because it is used in the definition
+// of the enum values below.
+#ifdef LLVM_ON_WIN32
+
+  // The following numbers were taken from VS2010.
+# ifndef EAFNOSUPPORT
+#   define EAFNOSUPPORT 102
+# endif
+# ifndef EADDRINUSE
+#   define EADDRINUSE 100
+# endif
+# ifndef EADDRNOTAVAIL
+#   define EADDRNOTAVAIL 101
+# endif
+# ifndef EISCONN
+#   define EISCONN 113
+# endif
+# ifndef E2BIG
+#   define E2BIG 7
+# endif
+# ifndef EDOM
+#   define EDOM 33
+# endif
+# ifndef EFAULT
+#   define EFAULT 14
+# endif
+# ifndef EBADF
+#   define EBADF 9
+# endif
+# ifndef EBADMSG
+#   define EBADMSG 104
+# endif
+# ifndef EPIPE
+#   define EPIPE 32
+# endif
+# ifndef ECONNABORTED
+#   define ECONNABORTED 106
+# endif
+# ifndef EALREADY
+#   define EALREADY 103
+# endif
+# ifndef ECONNREFUSED
+#   define ECONNREFUSED 107
+# endif
+# ifndef ECONNRESET
+#   define ECONNRESET 108
+# endif
+# ifndef EXDEV
+#   define EXDEV 18
+# endif
+# ifndef EDESTADDRREQ
+#   define EDESTADDRREQ 109
+# endif
+# ifndef EBUSY
+#   define EBUSY 16
+# endif
+# ifndef ENOTEMPTY
+#   define ENOTEMPTY 41
+# endif
+# ifndef ENOEXEC
+#   define ENOEXEC 8
+# endif
+# ifndef EEXIST
+#   define EEXIST 17
+# endif
+# ifndef EFBIG
+#   define EFBIG 27
+# endif
+# ifndef ENAMETOOLONG
+#   define ENAMETOOLONG 38
+# endif
+# ifndef ENOSYS
+#   define ENOSYS 40
+# endif
+# ifndef EHOSTUNREACH
+#   define EHOSTUNREACH 110
+# endif
+# ifndef EIDRM
+#   define EIDRM 111
+# endif
+# ifndef EILSEQ
+#   define EILSEQ 42
+# endif
+# ifndef ENOTTY
+#   define ENOTTY 25
+# endif
+# ifndef EINTR
+#   define EINTR 4
+# endif
+# ifndef EINVAL
+#   define EINVAL 22
+# endif
+# ifndef ESPIPE
+#   define ESPIPE 29
+# endif
+# ifndef EIO
+#   define EIO 5
+# endif
+# ifndef EISDIR
+#   define EISDIR 21
+# endif
+# ifndef EMSGSIZE
+#   define EMSGSIZE 115
+# endif
+# ifndef ENETDOWN
+#   define ENETDOWN 116
+# endif
+# ifndef ENETRESET
+#   define ENETRESET 117
+# endif
+# ifndef ENETUNREACH
+#   define ENETUNREACH 118
+# endif
+# ifndef ENOBUFS
+#   define ENOBUFS 119
+# endif
+# ifndef ECHILD
+#   define ECHILD 10
+# endif
+# ifndef ENOLINK
+#   define ENOLINK 121
+# endif
+# ifndef ENOLCK
+#   define ENOLCK 39
+# endif
+# ifndef ENODATA
+#   define ENODATA 120
+# endif
+# ifndef ENOMSG
+#   define ENOMSG 122
+# endif
+# ifndef ENOPROTOOPT
+#   define ENOPROTOOPT 123
+# endif
+# ifndef ENOSPC
+#   define ENOSPC 28
+# endif
+# ifndef ENOSR
+#   define ENOSR 124
+# endif
+# ifndef ENXIO
+#   define ENXIO 6
+# endif
+# ifndef ENODEV
+#   define ENODEV 19
+# endif
+# ifndef ENOENT
+#   define ENOENT 2
+# endif
+# ifndef ESRCH
+#   define ESRCH 3
+# endif
+# ifndef ENOTDIR
+#   define ENOTDIR 20
+# endif
+# ifndef ENOTSOCK
+#   define ENOTSOCK 128
+# endif
+# ifndef ENOSTR
+#   define ENOSTR 125
+# endif
+# ifndef ENOTCONN
+#   define ENOTCONN 126
+# endif
+# ifndef ENOMEM
+#   define ENOMEM 12
+# endif
+# ifndef ENOTSUP
+#   define ENOTSUP 129
+# endif
+# ifndef ECANCELED
+#   define ECANCELED 105
+# endif
+# ifndef EINPROGRESS
+#   define EINPROGRESS 112
+# endif
+# ifndef EPERM
+#   define EPERM 1
+# endif
+# ifndef EOPNOTSUPP
+#   define EOPNOTSUPP 130
+# endif
+# ifndef EWOULDBLOCK
+#   define EWOULDBLOCK 140
+# endif
+# ifndef EOWNERDEAD
+#   define EOWNERDEAD 133
+# endif
+# ifndef EACCES
+#   define EACCES 13
+# endif
+# ifndef EPROTO
+#   define EPROTO 134
+# endif
+# ifndef EPROTONOSUPPORT
+#   define EPROTONOSUPPORT 135
+# endif
+# ifndef EROFS
+#   define EROFS 30
+# endif
+# ifndef EDEADLK
+#   define EDEADLK 36
+# endif
+# ifndef EAGAIN
+#   define EAGAIN 11
+# endif
+# ifndef ERANGE
+#   define ERANGE 34
+# endif
+# ifndef ENOTRECOVERABLE
+#   define ENOTRECOVERABLE 127
+# endif
+# ifndef ETIME
+#   define ETIME 137
+# endif
+# ifndef ETXTBSY
+#   define ETXTBSY 139
+# endif
+# ifndef ETIMEDOUT
+#   define ETIMEDOUT 138
+# endif
+# ifndef ENFILE
+#   define ENFILE 23
+# endif
+# ifndef EMFILE
+#   define EMFILE 24
+# endif
+# ifndef EMLINK
+#   define EMLINK 31
+# endif
+# ifndef ELOOP
+#   define ELOOP 114
+# endif
+# ifndef EOVERFLOW
+#   define EOVERFLOW 132
+# endif
+# ifndef EPROTOTYPE
+#   define EPROTOTYPE 136
+# endif
+#endif
+
+namespace llvm {
+
+template <class T, T v>
+struct integral_constant {
+  typedef T value_type;
+  static const value_type value = v;
+  typedef integral_constant<T,v> type;
+  operator value_type() { return value; }
+};
+
+typedef integral_constant<bool, true> true_type;
+typedef integral_constant<bool, false> false_type;
+
+// is_error_code_enum
+
+template <class Tp> struct is_error_code_enum : public false_type {};
+
+// is_error_condition_enum
+
+template <class Tp> struct is_error_condition_enum : public false_type {};
+
+// Some error codes are not present on all platforms, so we provide equivalents
+// for them:
+
+//enum class errc
+struct errc {
+enum _ {
+  success                             = 0,
+  address_family_not_supported        = EAFNOSUPPORT,
+  address_in_use                      = EADDRINUSE,
+  address_not_available               = EADDRNOTAVAIL,
+  already_connected                   = EISCONN,
+  argument_list_too_long              = E2BIG,
+  argument_out_of_domain              = EDOM,
+  bad_address                         = EFAULT,
+  bad_file_descriptor                 = EBADF,
+#ifdef EBADMSG
+  bad_message                         = EBADMSG,
+#else
+  bad_message                         = EINVAL,
+#endif
+  broken_pipe                         = EPIPE,
+  connection_aborted                  = ECONNABORTED,
+  connection_already_in_progress      = EALREADY,
+  connection_refused                  = ECONNREFUSED,
+  connection_reset                    = ECONNRESET,
+  cross_device_link                   = EXDEV,
+  destination_address_required        = EDESTADDRREQ,
+  device_or_resource_busy             = EBUSY,
+  directory_not_empty                 = ENOTEMPTY,
+  executable_format_error             = ENOEXEC,
+  file_exists                         = EEXIST,
+  file_too_large                      = EFBIG,
+  filename_too_long                   = ENAMETOOLONG,
+  function_not_supported              = ENOSYS,
+  host_unreachable                    = EHOSTUNREACH,
+  identifier_removed                  = EIDRM,
+  illegal_byte_sequence               = EILSEQ,
+  inappropriate_io_control_operation  = ENOTTY,
+  interrupted                         = EINTR,
+  invalid_argument                    = EINVAL,
+  invalid_seek                        = ESPIPE,
+  io_error                            = EIO,
+  is_a_directory                      = EISDIR,
+  message_size                        = EMSGSIZE,
+  network_down                        = ENETDOWN,
+  network_reset                       = ENETRESET,
+  network_unreachable                 = ENETUNREACH,
+  no_buffer_space                     = ENOBUFS,
+  no_child_process                    = ECHILD,
+#ifdef ENOLINK
+  no_link                             = ENOLINK,
+#else
+  no_link                             = EINVAL,
+#endif
+  no_lock_available                   = ENOLCK,
+#ifdef ENODATA
+  no_message_available                = ENODATA,
+#else
+  no_message_available                = ENOMSG,
+#endif
+  no_message                          = ENOMSG,
+  no_protocol_option                  = ENOPROTOOPT,
+  no_space_on_device                  = ENOSPC,
+#ifdef ENOSR
+  no_stream_resources                 = ENOSR,
+#else
+  no_stream_resources                 = ENOMEM,
+#endif
+  no_such_device_or_address           = ENXIO,
+  no_such_device                      = ENODEV,
+  no_such_file_or_directory           = ENOENT,
+  no_such_process                     = ESRCH,
+  not_a_directory                     = ENOTDIR,
+  not_a_socket                        = ENOTSOCK,
+#ifdef ENOSTR
+  not_a_stream                        = ENOSTR,
+#else
+  not_a_stream                        = EINVAL,
+#endif
+  not_connected                       = ENOTCONN,
+  not_enough_memory                   = ENOMEM,
+  not_supported                       = ENOTSUP,
+#ifdef ECANCELED
+  operation_canceled                  = ECANCELED,
+#else
+  operation_canceled                  = EINVAL,
+#endif
+  operation_in_progress               = EINPROGRESS,
+  operation_not_permitted             = EPERM,
+  operation_not_supported             = EOPNOTSUPP,
+  operation_would_block               = EWOULDBLOCK,
+#ifdef EOWNERDEAD
+  owner_dead                          = EOWNERDEAD,
+#else
+  owner_dead                          = EINVAL,
+#endif
+  permission_denied                   = EACCES,
+#ifdef EPROTO
+  protocol_error                      = EPROTO,
+#else
+  protocol_error                      = EINVAL,
+#endif
+  protocol_not_supported              = EPROTONOSUPPORT,
+  read_only_file_system               = EROFS,
+  resource_deadlock_would_occur       = EDEADLK,
+  resource_unavailable_try_again      = EAGAIN,
+  result_out_of_range                 = ERANGE,
+#ifdef ENOTRECOVERABLE
+  state_not_recoverable               = ENOTRECOVERABLE,
+#else
+  state_not_recoverable               = EINVAL,
+#endif
+#ifdef ETIME
+  stream_timeout                      = ETIME,
+#else
+  stream_timeout                      = ETIMEDOUT,
+#endif
+  text_file_busy                      = ETXTBSY,
+  timed_out                           = ETIMEDOUT,
+  too_many_files_open_in_system       = ENFILE,
+  too_many_files_open                 = EMFILE,
+  too_many_links                      = EMLINK,
+  too_many_symbolic_link_levels       = ELOOP,
+  value_too_large                     = EOVERFLOW,
+  wrong_protocol_type                 = EPROTOTYPE
+};
+
+  _ v_;
+
+  errc(_ v) : v_(v) {}
+  operator int() const {return v_;}
+};
+
+template <> struct is_error_condition_enum<errc> : true_type { };
+
+template <> struct is_error_condition_enum<errc::_> : true_type { };
+
+class error_condition;
+class error_code;
+
+// class error_category
+
+class _do_message;
+
+class error_category
+{
+public:
+  virtual ~error_category();
+
+private:
+  error_category();
+  error_category(const error_category&);// = delete;
+  error_category& operator=(const error_category&);// = delete;
+
+public:
+  virtual const char* name() const = 0;
+  virtual error_condition default_error_condition(int _ev) const;
+  virtual bool equivalent(int _code, const error_condition& _condition) const;
+  virtual bool equivalent(const error_code& _code, int _condition) const;
+  virtual std::string message(int _ev) const = 0;
+
+  bool operator==(const error_category& _rhs) const {return this == &_rhs;}
+
+  bool operator!=(const error_category& _rhs) const {return !(*this == _rhs);}
+
+  bool operator< (const error_category& _rhs) const {return this < &_rhs;}
+
+  friend class _do_message;
+};
+
+class _do_message : public error_category
+{
+public:
+  virtual std::string message(int ev) const;
+};
+
+const error_category& generic_category();
+const error_category& system_category();
+
+/// Get the error_category used for errno values from POSIX functions. This is
+/// the same as the system_category on POISIX systems, but is the same as the
+/// generic_category on Windows.
+const error_category& posix_category();
+
+class error_condition
+{
+  int _val_;
+  const error_category* _cat_;
+public:
+  error_condition() : _val_(0), _cat_(&generic_category()) {}
+
+  error_condition(int _val, const error_category& _cat)
+    : _val_(_val), _cat_(&_cat) {}
+
+  template <class E>
+  error_condition(E _e, typename enable_if_c<
+                          is_error_condition_enum<E>::value
+                        >::type* = 0)
+    {*this = make_error_condition(_e);}
+
+  void assign(int _val, const error_category& _cat) {
+    _val_ = _val;
+    _cat_ = &_cat;
+  }
+
+  template <class E>
+    typename enable_if_c
+    <
+      is_error_condition_enum<E>::value,
+      error_condition&
+    >::type
+    operator=(E _e)
+      {*this = make_error_condition(_e); return *this;}
+
+  void clear() {
+    _val_ = 0;
+    _cat_ = &generic_category();
+  }
+
+  int value() const {return _val_;}
+
+  const error_category& category() const {return *_cat_;}
+  std::string message() const;
+
+  typedef void (*unspecified_bool_type)();
+  static void unspecified_bool_true() {}
+
+  operator unspecified_bool_type() const { // true if error
+    return _val_ == 0 ? 0 : unspecified_bool_true;
+  }
+};
+
+inline error_condition make_error_condition(errc _e) {
+  return error_condition(static_cast<int>(_e), generic_category());
+}
+
+inline bool operator<(const error_condition& _x, const error_condition& _y) {
+  return _x.category() < _y.category()
+      || (_x.category() == _y.category() && _x.value() < _y.value());
+}
+
+// error_code
+
+class error_code {
+  int _val_;
+  const error_category* _cat_;
+public:
+  error_code() : _val_(0), _cat_(&system_category()) {}
+
+  error_code(int _val, const error_category& _cat)
+    : _val_(_val), _cat_(&_cat) {}
+
+  template <class E>
+  error_code(E _e, typename enable_if_c<
+                     is_error_code_enum<E>::value
+                   >::type* = 0) {
+    *this = make_error_code(_e);
+  }
+
+  void assign(int _val, const error_category& _cat) {
+      _val_ = _val;
+      _cat_ = &_cat;
+  }
+
+  template <class E>
+    typename enable_if_c
+    <
+      is_error_code_enum<E>::value,
+      error_code&
+    >::type
+    operator=(E _e)
+      {*this = make_error_code(_e); return *this;}
+
+  void clear() {
+    _val_ = 0;
+    _cat_ = &system_category();
+  }
+
+  int value() const {return _val_;}
+
+  const error_category& category() const {return *_cat_;}
+
+  error_condition default_error_condition() const
+    {return _cat_->default_error_condition(_val_);}
+
+  std::string message() const;
+
+  typedef void (*unspecified_bool_type)();
+  static void unspecified_bool_true() {}
+
+  operator unspecified_bool_type() const { // true if error
+    return _val_ == 0 ? 0 : unspecified_bool_true;
+  }
+};
+
+inline error_code make_error_code(errc _e) {
+  return error_code(static_cast<int>(_e), generic_category());
+}
+
+inline bool operator<(const error_code& _x, const error_code& _y) {
+  return _x.category() < _y.category()
+      || (_x.category() == _y.category() && _x.value() < _y.value());
+}
+
+inline bool operator==(const error_code& _x, const error_code& _y) {
+  return _x.category() == _y.category() && _x.value() == _y.value();
+}
+
+inline bool operator==(const error_code& _x, const error_condition& _y) {
+  return _x.category().equivalent(_x.value(), _y)
+      || _y.category().equivalent(_x, _y.value());
+}
+
+inline bool operator==(const error_condition& _x, const error_code& _y) {
+  return _y == _x;
+}
+
+inline bool operator==(const error_condition& _x, const error_condition& _y) {
+   return _x.category() == _y.category() && _x.value() == _y.value();
+}
+
+inline bool operator!=(const error_code& _x, const error_code& _y) {
+  return !(_x == _y);
+}
+
+inline bool operator!=(const error_code& _x, const error_condition& _y) {
+  return !(_x == _y);
+}
+
+inline bool operator!=(const error_condition& _x, const error_code& _y) {
+  return !(_x == _y);
+}
+
+inline bool operator!=(const error_condition& _x, const error_condition& _y) {
+  return !(_x == _y);
+}
+
+// Windows errors.
+
+//  To construct an error_code after an API error:
+//
+//      error_code( ::GetLastError(), system_category() )
+struct windows_error {
+enum _ {
+  success = 0,
+  // These names and values are based on Windows WinError.h
+  // This is not a complete list. Add to this list if you need to explicitly
+  // check for it.
+  invalid_function        = 1, // ERROR_INVALID_FUNCTION,
+  file_not_found          = 2, // ERROR_FILE_NOT_FOUND,
+  path_not_found          = 3, // ERROR_PATH_NOT_FOUND,
+  too_many_open_files     = 4, // ERROR_TOO_MANY_OPEN_FILES,
+  access_denied           = 5, // ERROR_ACCESS_DENIED,
+  invalid_handle          = 6, // ERROR_INVALID_HANDLE,
+  arena_trashed           = 7, // ERROR_ARENA_TRASHED,
+  not_enough_memory       = 8, // ERROR_NOT_ENOUGH_MEMORY,
+  invalid_block           = 9, // ERROR_INVALID_BLOCK,
+  bad_environment         = 10, // ERROR_BAD_ENVIRONMENT,
+  bad_format              = 11, // ERROR_BAD_FORMAT,
+  invalid_access          = 12, // ERROR_INVALID_ACCESS,
+  outofmemory             = 14, // ERROR_OUTOFMEMORY,
+  invalid_drive           = 15, // ERROR_INVALID_DRIVE,
+  current_directory       = 16, // ERROR_CURRENT_DIRECTORY,
+  not_same_device         = 17, // ERROR_NOT_SAME_DEVICE,
+  no_more_files           = 18, // ERROR_NO_MORE_FILES,
+  write_protect           = 19, // ERROR_WRITE_PROTECT,
+  bad_unit                = 20, // ERROR_BAD_UNIT,
+  not_ready               = 21, // ERROR_NOT_READY,
+  bad_command             = 22, // ERROR_BAD_COMMAND,
+  crc                     = 23, // ERROR_CRC,
+  bad_length              = 24, // ERROR_BAD_LENGTH,
+  seek                    = 25, // ERROR_SEEK,
+  not_dos_disk            = 26, // ERROR_NOT_DOS_DISK,
+  sector_not_found        = 27, // ERROR_SECTOR_NOT_FOUND,
+  out_of_paper            = 28, // ERROR_OUT_OF_PAPER,
+  write_fault             = 29, // ERROR_WRITE_FAULT,
+  read_fault              = 30, // ERROR_READ_FAULT,
+  gen_failure             = 31, // ERROR_GEN_FAILURE,
+  sharing_violation       = 32, // ERROR_SHARING_VIOLATION,
+  lock_violation          = 33, // ERROR_LOCK_VIOLATION,
+  wrong_disk              = 34, // ERROR_WRONG_DISK,
+  sharing_buffer_exceeded = 36, // ERROR_SHARING_BUFFER_EXCEEDED,
+  handle_eof              = 38, // ERROR_HANDLE_EOF,
+  handle_disk_full        = 39, // ERROR_HANDLE_DISK_FULL,
+  rem_not_list            = 51, // ERROR_REM_NOT_LIST,
+  dup_name                = 52, // ERROR_DUP_NAME,
+  bad_net_path            = 53, // ERROR_BAD_NETPATH,
+  network_busy            = 54, // ERROR_NETWORK_BUSY,
+  file_exists             = 80, // ERROR_FILE_EXISTS,
+  cannot_make             = 82, // ERROR_CANNOT_MAKE,
+  broken_pipe             = 109, // ERROR_BROKEN_PIPE,
+  open_failed             = 110, // ERROR_OPEN_FAILED,
+  buffer_overflow         = 111, // ERROR_BUFFER_OVERFLOW,
+  disk_full               = 112, // ERROR_DISK_FULL,
+  insufficient_buffer     = 122, // ERROR_INSUFFICIENT_BUFFER,
+  lock_failed             = 167, // ERROR_LOCK_FAILED,
+  busy                    = 170, // ERROR_BUSY,
+  cancel_violation        = 173, // ERROR_CANCEL_VIOLATION,
+  already_exists          = 183  // ERROR_ALREADY_EXISTS
+};
+  _ v_;
+
+  windows_error(_ v) : v_(v) {}
+  explicit windows_error(int v) : v_(_(v)) {}
+  operator int() const {return v_;}
+};
+
+
+template <> struct is_error_code_enum<windows_error> : true_type { };
+
+template <> struct is_error_code_enum<windows_error::_> : true_type { };
+
+inline error_code make_error_code(windows_error e) {
+  return error_code(static_cast<int>(e), system_category());
+}
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Support/type_traits.h b/final/include/llvm/Support/type_traits.h
new file mode 100644
index 00000000000..515295bdd66
--- /dev/null
+++ b/final/include/llvm/Support/type_traits.h
@@ -0,0 +1,126 @@
+//===- llvm/Support/type_traits.h - Simplfied type traits -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a template class that determines if a type is a class or
+// not. The basic mechanism, based on using the pointer to member function of
+// a zero argument to a function was "boosted" from the boost type_traits
+// library. See http://www.boost.org/ for all the gory details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_TYPE_TRAITS_H
+#define LLVM_SUPPORT_TYPE_TRAITS_H
+
+#include <utility>
+
+// This is actually the conforming implementation which works with abstract
+// classes.  However, enough compilers have trouble with it that most will use
+// the one in boost/type_traits/object_traits.hpp. This implementation actually
+// works with VC7.0, but other interactions seem to fail when we use it.
+
+namespace llvm {
+  
+namespace dont_use
+{
+    // These two functions should never be used. They are helpers to
+    // the is_class template below. They cannot be located inside
+    // is_class because doing so causes at least GCC to think that
+    // the value of the "value" enumerator is not constant. Placing
+    // them out here (for some strange reason) allows the sizeof
+    // operator against them to magically be constant. This is
+    // important to make the is_class<T>::value idiom zero cost. it
+    // evaluates to a constant 1 or 0 depending on whether the
+    // parameter T is a class or not (respectively).
+    template<typename T> char is_class_helper(void(T::*)());
+    template<typename T> double is_class_helper(...);
+}
+
+template <typename T>
+struct is_class
+{
+  // is_class<> metafunction due to Paul Mensonides (leavings@attbi.com). For
+  // more details:
+  // http://groups.google.com/groups?hl=en&selm=000001c1cc83%24e154d5e0%247772e50c%40c161550a&rnum=1
+ public:
+    enum { value = sizeof(char) == sizeof(dont_use::is_class_helper<T>(0)) };
+};
+  
+  
+/// isPodLike - This is a type trait that is used to determine whether a given
+/// type can be copied around with memcpy instead of running ctors etc.
+template <typename T>
+struct isPodLike {
+  // If we don't know anything else, we can (at least) assume that all non-class
+  // types are PODs.
+  static const bool value = !is_class<T>::value;
+};
+
+// std::pair's are pod-like if their elements are.
+template<typename T, typename U>
+struct isPodLike<std::pair<T, U> > {
+  static const bool value = isPodLike<T>::value & isPodLike<U>::value;
+};
+  
+
+/// \brief Metafunction that determines whether the two given types are 
+/// equivalent.
+template<typename T, typename U>
+struct is_same {
+  static const bool value = false;
+};
+
+template<typename T>
+struct is_same<T, T> {
+  static const bool value = true;
+};
+  
+// enable_if_c - Enable/disable a template based on a metafunction
+template<bool Cond, typename T = void>
+struct enable_if_c {
+  typedef T type;
+};
+
+template<typename T> struct enable_if_c<false, T> { };
+  
+// enable_if - Enable/disable a template based on a metafunction
+template<typename Cond, typename T = void>
+struct enable_if : public enable_if_c<Cond::value, T> { };
+
+namespace dont_use {
+  template<typename Base> char base_of_helper(const volatile Base*);
+  template<typename Base> double base_of_helper(...);
+}
+
+/// is_base_of - Metafunction to determine whether one type is a base class of
+/// (or identical to) another type.
+template<typename Base, typename Derived>
+struct is_base_of {
+  static const bool value 
+    = is_class<Base>::value && is_class<Derived>::value &&
+      sizeof(char) == sizeof(dont_use::base_of_helper<Base>((Derived*)0));
+};
+
+// remove_pointer - Metafunction to turn Foo* into Foo.  Defined in
+// C++0x [meta.trans.ptr].
+template <typename T> struct remove_pointer { typedef T type; };
+template <typename T> struct remove_pointer<T*> { typedef T type; };
+template <typename T> struct remove_pointer<T*const> { typedef T type; };
+template <typename T> struct remove_pointer<T*volatile> { typedef T type; };
+template <typename T> struct remove_pointer<T*const volatile> {
+    typedef T type; };
+
+template <bool, typename T, typename F>
+struct conditional { typedef T type; };
+
+template <typename T, typename F>
+struct conditional<false, T, F> { typedef F type; };
+
+}
+
+#endif
diff --git a/final/include/llvm/SymbolTableListTraits.h b/final/include/llvm/SymbolTableListTraits.h
new file mode 100644
index 00000000000..91a4eb99ff0
--- /dev/null
+++ b/final/include/llvm/SymbolTableListTraits.h
@@ -0,0 +1,79 @@
+//===-- llvm/SymbolTableListTraits.h - Traits for iplist --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a generic class that is used to implement the automatic
+// symbol table manipulation that occurs when you put (for example) a named
+// instruction into a basic block.
+//
+// The way that this is implemented is by using a special traits class with the
+// intrusive list that makes up the list of instructions in a basic block.  When
+// a new element is added to the list of instructions, the traits class is
+// notified, allowing the symbol table to be updated.
+//
+// This generic class implements the traits class.  It must be generic so that
+// it can work for all uses it, which include lists of instructions, basic
+// blocks, arguments, functions, global variables, etc...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYMBOLTABLELISTTRAITS_H
+#define LLVM_SYMBOLTABLELISTTRAITS_H
+
+#include "llvm/ADT/ilist.h"
+
+namespace llvm {
+class ValueSymbolTable;
+  
+template<typename NodeTy> class ilist_iterator;
+template<typename NodeTy, typename Traits> class iplist;
+template<typename Ty> struct ilist_traits;
+
+// ValueSubClass   - The type of objects that I hold, e.g. Instruction.
+// ItemParentClass - The type of object that owns the list, e.g. BasicBlock.
+//
+template<typename ValueSubClass, typename ItemParentClass>
+class SymbolTableListTraits : public ilist_default_traits<ValueSubClass> {
+  typedef ilist_traits<ValueSubClass> TraitsClass;
+public:
+  SymbolTableListTraits() {}
+
+  /// getListOwner - Return the object that owns this list.  If this is a list
+  /// of instructions, it returns the BasicBlock that owns them.
+  ItemParentClass *getListOwner() {
+    typedef iplist<ValueSubClass> ItemParentClass::*Sublist;
+    size_t Offset(size_t(&((ItemParentClass*)0->*ItemParentClass::
+                           getSublistAccess(static_cast<ValueSubClass*>(0)))));
+    iplist<ValueSubClass>* Anchor(static_cast<iplist<ValueSubClass>*>(this));
+    return reinterpret_cast<ItemParentClass*>(reinterpret_cast<char*>(Anchor)-
+                                              Offset);
+  }
+
+  static iplist<ValueSubClass> &getList(ItemParentClass *Par) {
+    return Par->*(Par->getSublistAccess((ValueSubClass*)0));
+  }
+
+  static ValueSymbolTable *getSymTab(ItemParentClass *Par) {
+    return Par ? toPtr(Par->getValueSymbolTable()) : 0;
+  }
+
+  void addNodeToList(ValueSubClass *V);
+  void removeNodeFromList(ValueSubClass *V);
+  void transferNodesFromList(ilist_traits<ValueSubClass> &L2,
+                             ilist_iterator<ValueSubClass> first,
+                             ilist_iterator<ValueSubClass> last);
+//private:
+  template<typename TPtr>
+  void setSymTabObject(TPtr *, TPtr);
+  static ValueSymbolTable *toPtr(ValueSymbolTable *P) { return P; }
+  static ValueSymbolTable *toPtr(ValueSymbolTable &R) { return &R; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Target/Mangler.h b/final/include/llvm/Target/Mangler.h
new file mode 100644
index 00000000000..c1c118b08ca
--- /dev/null
+++ b/final/include/llvm/Target/Mangler.h
@@ -0,0 +1,75 @@
+//===-- llvm/Target/Mangler.h - Self-contained name mangler -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Unified name mangler for various backends.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_MANGLER_H
+#define LLVM_SUPPORT_MANGLER_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+class StringRef;
+class Twine;
+class Value;
+class GlobalValue;
+template <typename T> class SmallVectorImpl; 
+class MCContext;
+class MCSymbol;
+class TargetData;
+
+class Mangler {
+public:
+  enum ManglerPrefixTy {
+    Default,               ///< Emit default string before each symbol.
+    Private,               ///< Emit "private" prefix before each symbol.
+    LinkerPrivate          ///< Emit "linker private" prefix before each symbol.
+  };
+
+private:
+  MCContext &Context;
+  const TargetData &TD;
+
+  /// AnonGlobalIDs - We need to give global values the same name every time
+  /// they are mangled.  This keeps track of the number we give to anonymous
+  /// ones.
+  ///
+  DenseMap<const GlobalValue*, unsigned> AnonGlobalIDs;
+
+  /// NextAnonGlobalID - This simple counter is used to unique value names.
+  ///
+  unsigned NextAnonGlobalID;
+
+public:
+  Mangler(MCContext &context, const TargetData &td)
+    : Context(context), TD(td), NextAnonGlobalID(1) {}
+
+  /// getSymbol - Return the MCSymbol for the specified global value.  This
+  /// symbol is the main label that is the address of the global.
+  MCSymbol *getSymbol(const GlobalValue *GV);
+
+  
+  /// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+  /// and the specified global variable's name.  If the global variable doesn't
+  /// have a name, this fills in a unique name for the global.
+  void getNameWithPrefix(SmallVectorImpl<char> &OutName, const GlobalValue *GV,
+                         bool isImplicitlyPrivate);
+  
+  /// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+  /// and the specified name as the global variable name.  GVName must not be
+  /// empty.
+  void getNameWithPrefix(SmallVectorImpl<char> &OutName, const Twine &GVName,
+                         ManglerPrefixTy PrefixTy = Mangler::Default);
+};
+
+} // End llvm namespace
+
+#endif // LLVM_SUPPORT_MANGLER_H
diff --git a/final/include/llvm/Target/SubtargetFeature.h b/final/include/llvm/Target/SubtargetFeature.h
new file mode 100644
index 00000000000..6c21ae9583e
--- /dev/null
+++ b/final/include/llvm/Target/SubtargetFeature.h
@@ -0,0 +1,119 @@
+//===-- llvm/Target/SubtargetFeature.h - CPU characteristics ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines and manages user or tool specified CPU characteristics.
+// The intent is to be able to package specific features that should or should
+// not be used on a specific target processor.  A tool, such as llc, could, as
+// as example, gather chip info from the command line, a long with features
+// that should be used on that chip.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SUBTARGETFEATURE_H
+#define LLVM_TARGET_SUBTARGETFEATURE_H
+
+#include <string>
+#include <vector>
+#include <cstring>
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+  class raw_ostream;
+  
+//===----------------------------------------------------------------------===//
+///
+/// SubtargetFeatureKV - Used to provide key value pairs for feature and
+/// CPU bit flags.
+//
+struct SubtargetFeatureKV {
+  const char *Key;                      // K-V key string
+  const char *Desc;                     // Help descriptor
+  uint32_t Value;                       // K-V integer value
+  uint32_t Implies;                     // K-V bit mask
+  
+  // Compare routine for std binary search
+  bool operator<(const SubtargetFeatureKV &S) const {
+    return strcmp(Key, S.Key) < 0;
+  }
+};
+  
+//===----------------------------------------------------------------------===//
+///
+/// SubtargetInfoKV - Used to provide key value pairs for CPU and arbitrary
+/// pointers.
+//
+struct SubtargetInfoKV {
+  const char *Key;                      // K-V key string
+  void *Value;                          // K-V pointer value
+  
+  // Compare routine for std binary search
+  bool operator<(const SubtargetInfoKV &S) const {
+    return strcmp(Key, S.Key) < 0;
+  }
+};
+  
+//===----------------------------------------------------------------------===//
+///
+/// SubtargetFeatures - Manages the enabling and disabling of subtarget 
+/// specific features.  Features are encoded as a string of the form
+///   "cpu,+attr1,+attr2,-attr3,...,+attrN"
+/// A comma separates each feature from the next (all lowercase.)
+/// The first feature is always the CPU subtype (eg. pentiumm).  If the CPU
+/// value is "generic" then the CPU subtype should be generic for the target.
+/// Each of the remaining features is prefixed with + or - indicating whether
+/// that feature should be enabled or disabled contrary to the cpu
+/// specification.
+///
+
+class SubtargetFeatures {
+  std::vector<std::string> Features;    // Subtarget features as a vector
+public:
+  explicit SubtargetFeatures(const std::string &Initial = std::string());
+
+  /// Features string accessors.
+  std::string getString() const;
+  void setString(const std::string &Initial);
+
+  /// Set the CPU string.  Replaces previous setting.  Setting to "" clears CPU.
+  void setCPU(const std::string &String);
+
+  /// Setting CPU string only if no string is set.
+  void setCPUIfNone(const std::string &String);
+
+  /// Returns current CPU string.
+  const std::string & getCPU() const;
+
+  /// Adding Features.
+  void AddFeature(const std::string &String, bool IsEnabled = true);
+           
+  /// Get feature bits.
+  uint32_t getBits(const SubtargetFeatureKV *CPUTable,
+                         size_t CPUTableSize,
+                   const SubtargetFeatureKV *FeatureTable,
+                         size_t FeatureTableSize);
+                         
+  /// Get info pointer
+  void *getInfo(const SubtargetInfoKV *Table, size_t TableSize);
+  
+  /// Print feature string.
+  void print(raw_ostream &OS) const;
+  
+  // Dump feature info.
+  void dump() const;
+
+  /// Retrieve a formatted string of the default features for the specified
+  /// target triple.
+  void getDefaultSubtargetFeatures(const std::string &CPU,
+                                   const Triple& Triple);
+};
+
+} // End namespace llvm
+
+#endif
diff --git a/final/include/llvm/Target/Target.td b/final/include/llvm/Target/Target.td
new file mode 100644
index 00000000000..0f7e6aaaf2f
--- /dev/null
+++ b/final/include/llvm/Target/Target.td
@@ -0,0 +1,711 @@
+//===- Target.td - Target Independent TableGen interface ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the target-independent interfaces which should be
+// implemented by each target which is using a TableGen based code generator.
+//
+//===----------------------------------------------------------------------===//
+
+// Include all information about LLVM intrinsics.
+include "llvm/Intrinsics.td"
+
+//===----------------------------------------------------------------------===//
+// Register file description - These classes are used to fill in the target
+// description classes.
+
+class RegisterClass; // Forward def
+
+// SubRegIndex - Use instances of SubRegIndex to identify subregisters.
+class SubRegIndex {
+  string Namespace = "";
+}
+
+// Register - You should define one instance of this class for each register
+// in the target machine.  String n will become the "name" of the register.
+class Register<string n> {
+  string Namespace = "";
+  string AsmName = n;
+
+  // SpillSize - If this value is set to a non-zero value, it is the size in
+  // bits of the spill slot required to hold this register.  If this value is
+  // set to zero, the information is inferred from any register classes the
+  // register belongs to.
+  int SpillSize = 0;
+
+  // SpillAlignment - This value is used to specify the alignment required for
+  // spilling the register.  Like SpillSize, this should only be explicitly
+  // specified if the register is not in a register class.
+  int SpillAlignment = 0;
+
+  // Aliases - A list of registers that this register overlaps with.  A read or
+  // modification of this register can potentially read or modify the aliased
+  // registers.
+  list<Register> Aliases = [];
+
+  // SubRegs - A list of registers that are parts of this register. Note these
+  // are "immediate" sub-registers and the registers within the list do not
+  // themselves overlap. e.g. For X86, EAX's SubRegs list contains only [AX],
+  // not [AX, AH, AL].
+  list<Register> SubRegs = [];
+
+  // SubRegIndices - For each register in SubRegs, specify the SubRegIndex used
+  // to address it. Sub-sub-register indices are automatically inherited from
+  // SubRegs.
+  list<SubRegIndex> SubRegIndices = [];
+
+  // CompositeIndices - Specify subreg indices that don't correspond directly to
+  // a register in SubRegs and are not inherited. The following formats are
+  // supported:
+  //
+  // (a)     Identity  - Reg:a == Reg
+  // (a b)   Alias     - Reg:a == Reg:b
+  // (a b,c) Composite - Reg:a == (Reg:b):c
+  //
+  // This can be used to disambiguate a sub-sub-register that exists in more
+  // than one subregister and other weird stuff.
+  list<dag> CompositeIndices = [];
+
+  // DwarfNumbers - Numbers used internally by gcc/gdb to identify the register.
+  // These values can be determined by locating the <target>.h file in the
+  // directory llvmgcc/gcc/config/<target>/ and looking for REGISTER_NAMES.  The
+  // order of these names correspond to the enumeration used by gcc.  A value of
+  // -1 indicates that the gcc number is undefined and -2 that register number
+  // is invalid for this mode/flavour.
+  list<int> DwarfNumbers = [];
+}
+
+// RegisterWithSubRegs - This can be used to define instances of Register which
+// need to specify sub-registers.
+// List "subregs" specifies which registers are sub-registers to this one. This
+// is used to populate the SubRegs and AliasSet fields of TargetRegisterDesc.
+// This allows the code generator to be careful not to put two values with
+// overlapping live ranges into registers which alias.
+class RegisterWithSubRegs<string n, list<Register> subregs> : Register<n> {
+  let SubRegs = subregs;
+}
+
+// RegisterClass - Now that all of the registers are defined, and aliases
+// between registers are defined, specify which registers belong to which
+// register classes.  This also defines the default allocation order of
+// registers by register allocators.
+//
+class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
+                    list<Register> regList> {
+  string Namespace = namespace;
+
+  // RegType - Specify the list ValueType of the registers in this register
+  // class.  Note that all registers in a register class must have the same
+  // ValueTypes.  This is a list because some targets permit storing different
+  // types in same register, for example vector values with 128-bit total size,
+  // but different count/size of items, like SSE on x86.
+  //
+  list<ValueType> RegTypes = regTypes;
+
+  // Size - Specify the spill size in bits of the registers.  A default value of
+  // zero lets tablgen pick an appropriate size.
+  int Size = 0;
+
+  // Alignment - Specify the alignment required of the registers when they are
+  // stored or loaded to memory.
+  //
+  int Alignment = alignment;
+
+  // CopyCost - This value is used to specify the cost of copying a value
+  // between two registers in this register class. The default value is one
+  // meaning it takes a single instruction to perform the copying. A negative
+  // value means copying is extremely expensive or impossible.
+  int CopyCost = 1;
+
+  // MemberList - Specify which registers are in this class.  If the
+  // allocation_order_* method are not specified, this also defines the order of
+  // allocation used by the register allocator.
+  //
+  list<Register> MemberList = regList;
+
+  // SubRegClasses - Specify the register class of subregisters as a list of
+  // dags: (RegClass SubRegIndex, SubRegindex, ...)
+  list<dag> SubRegClasses = [];
+
+  // MethodProtos/MethodBodies - These members can be used to insert arbitrary
+  // code into a generated register class.   The normal usage of this is to
+  // overload virtual methods.
+  code MethodProtos = [{}];
+  code MethodBodies = [{}];
+}
+
+
+//===----------------------------------------------------------------------===//
+// DwarfRegNum - This class provides a mapping of the llvm register enumeration
+// to the register numbering used by gcc and gdb.  These values are used by a
+// debug information writer to describe where values may be located during
+// execution.
+class DwarfRegNum<list<int> Numbers> {
+  // DwarfNumbers - Numbers used internally by gcc/gdb to identify the register.
+  // These values can be determined by locating the <target>.h file in the
+  // directory llvmgcc/gcc/config/<target>/ and looking for REGISTER_NAMES.  The
+  // order of these names correspond to the enumeration used by gcc.  A value of
+  // -1 indicates that the gcc number is undefined and -2 that register number
+  // is invalid for this mode/flavour.
+  list<int> DwarfNumbers = Numbers;
+}
+
+//===----------------------------------------------------------------------===//
+// Pull in the common support for scheduling
+//
+include "llvm/Target/TargetSchedule.td"
+
+class Predicate; // Forward def
+
+//===----------------------------------------------------------------------===//
+// Instruction set description - These classes correspond to the C++ classes in
+// the Target/TargetInstrInfo.h file.
+//
+class Instruction {
+  string Namespace = "";
+
+  dag OutOperandList;       // An dag containing the MI def operand list.
+  dag InOperandList;        // An dag containing the MI use operand list.
+  string AsmString = "";    // The .s format to print the instruction with.
+
+  // Pattern - Set to the DAG pattern for this instruction, if we know of one,
+  // otherwise, uninitialized.
+  list<dag> Pattern;
+
+  // The follow state will eventually be inferred automatically from the
+  // instruction pattern.
+
+  list<Register> Uses = []; // Default to using no non-operand registers
+  list<Register> Defs = []; // Default to modifying no non-operand registers
+
+  // Predicates - List of predicates which will be turned into isel matching
+  // code.
+  list<Predicate> Predicates = [];
+
+  // Code size.
+  int CodeSize = 0;
+
+  // Added complexity passed onto matching pattern.
+  int AddedComplexity  = 0;
+
+  // These bits capture information about the high-level semantics of the
+  // instruction.
+  bit isReturn     = 0;     // Is this instruction a return instruction?
+  bit isBranch     = 0;     // Is this instruction a branch instruction?
+  bit isIndirectBranch = 0; // Is this instruction an indirect branch?
+  bit isCompare    = 0;     // Is this instruction a comparison instruction?
+  bit isMoveImm    = 0;     // Is this instruction a move immediate instruction?
+  bit isBarrier    = 0;     // Can control flow fall through this instruction?
+  bit isCall       = 0;     // Is this instruction a call instruction?
+  bit canFoldAsLoad = 0;    // Can this be folded as a simple memory operand?
+  bit mayLoad      = 0;     // Is it possible for this inst to read memory?
+  bit mayStore     = 0;     // Is it possible for this inst to write memory?
+  bit isConvertibleToThreeAddress = 0;  // Can this 2-addr instruction promote?
+  bit isCommutable = 0;     // Is this 3 operand instruction commutable?
+  bit isTerminator = 0;     // Is this part of the terminator for a basic block?
+  bit isReMaterializable = 0; // Is this instruction re-materializable?
+  bit isPredicable = 0;     // Is this instruction predicable?
+  bit hasDelaySlot = 0;     // Does this instruction have an delay slot?
+  bit usesCustomInserter = 0; // Pseudo instr needing special help.
+  bit hasCtrlDep   = 0;     // Does this instruction r/w ctrl-flow chains?
+  bit isNotDuplicable = 0;  // Is it unsafe to duplicate this instruction?
+  bit isAsCheapAsAMove = 0; // As cheap (or cheaper) than a move instruction.
+  bit hasExtraSrcRegAllocReq = 0; // Sources have special regalloc requirement?
+  bit hasExtraDefRegAllocReq = 0; // Defs have special regalloc requirement?
+
+  // Side effect flags - When set, the flags have these meanings:
+  //
+  //  hasSideEffects - The instruction has side effects that are not
+  //    captured by any operands of the instruction or other flags.
+  //
+  //  neverHasSideEffects - Set on an instruction with no pattern if it has no
+  //    side effects.
+  bit hasSideEffects = 0;
+  bit neverHasSideEffects = 0;
+
+  // Is this instruction a "real" instruction (with a distinct machine
+  // encoding), or is it a pseudo instruction used for codegen modeling
+  // purposes.
+  bit isCodeGenOnly = 0;
+
+  // Is this instruction a pseudo instruction for use by the assembler parser.
+  bit isAsmParserOnly = 0;
+
+  InstrItinClass Itinerary = NoItinerary;// Execution steps used for scheduling.
+
+  string Constraints = "";  // OperandConstraint, e.g. $src = $dst.
+
+  /// DisableEncoding - List of operand names (e.g. "$op1,$op2") that should not
+  /// be encoded into the output machineinstr.
+  string DisableEncoding = "";
+
+  string PostEncoderMethod = "";
+  string DecoderMethod = "";
+
+  /// Target-specific flags. This becomes the TSFlags field in TargetInstrDesc.
+  bits<64> TSFlags = 0;
+
+  ///@name Assembler Parser Support
+  ///@{
+
+  string AsmMatchConverter = "";
+
+  ///@}
+}
+
+/// Predicates - These are extra conditionals which are turned into instruction
+/// selector matching code. Currently each predicate is just a string.
+class Predicate<string cond> {
+  string CondString = cond;
+
+  /// AssemblerMatcherPredicate - If this feature can be used by the assembler
+  /// matcher, this is true.  Targets should set this by inheriting their
+  /// feature from the AssemblerPredicate class in addition to Predicate.
+  bit AssemblerMatcherPredicate = 0;
+}
+
+/// NoHonorSignDependentRounding - This predicate is true if support for
+/// sign-dependent-rounding is not enabled.
+def NoHonorSignDependentRounding
+ : Predicate<"!HonorSignDependentRoundingFPMath()">;
+
+class Requires<list<Predicate> preds> {
+  list<Predicate> Predicates = preds;
+}
+
+/// ops definition - This is just a simple marker used to identify the operand
+/// list for an instruction. outs and ins are identical both syntactically and
+/// semanticallyr; they are used to define def operands and use operands to
+/// improve readibility. This should be used like this:
+///     (outs R32:$dst), (ins R32:$src1, R32:$src2) or something similar.
+def ops;
+def outs;
+def ins;
+
+/// variable_ops definition - Mark this instruction as taking a variable number
+/// of operands.
+def variable_ops;
+
+
+/// PointerLikeRegClass - Values that are designed to have pointer width are
+/// derived from this.  TableGen treats the register class as having a symbolic
+/// type that it doesn't know, and resolves the actual regclass to use by using
+/// the TargetRegisterInfo::getPointerRegClass() hook at codegen time.
+class PointerLikeRegClass<int Kind> {
+  int RegClassKind = Kind;
+}
+
+
+/// ptr_rc definition - Mark this operand as being a pointer value whose
+/// register class is resolved dynamically via a callback to TargetInstrInfo.
+/// FIXME: We should probably change this to a class which contain a list of
+/// flags. But currently we have but one flag.
+def ptr_rc : PointerLikeRegClass<0>;
+
+/// unknown definition - Mark this operand as being of unknown type, causing
+/// it to be resolved by inference in the context it is used.
+def unknown;
+
+/// AsmOperandClass - Representation for the kinds of operands which the target
+/// specific parser can create and the assembly matcher may need to distinguish.
+///
+/// Operand classes are used to define the order in which instructions are
+/// matched, to ensure that the instruction which gets matched for any
+/// particular list of operands is deterministic.
+///
+/// The target specific parser must be able to classify a parsed operand into a
+/// unique class which does not partially overlap with any other classes. It can
+/// match a subset of some other class, in which case the super class field
+/// should be defined.
+class AsmOperandClass {
+  /// The name to use for this class, which should be usable as an enum value.
+  string Name = ?;
+
+  /// The super classes of this operand.
+  list<AsmOperandClass> SuperClasses = [];
+
+  /// The name of the method on the target specific operand to call to test
+  /// whether the operand is an instance of this class. If not set, this will
+  /// default to "isFoo", where Foo is the AsmOperandClass name. The method
+  /// signature should be:
+  ///   bool isFoo() const;
+  string PredicateMethod = ?;
+
+  /// The name of the method on the target specific operand to call to add the
+  /// target specific operand to an MCInst. If not set, this will default to
+  /// "addFooOperands", where Foo is the AsmOperandClass name. The method
+  /// signature should be:
+  ///   void addFooOperands(MCInst &Inst, unsigned N) const;
+  string RenderMethod = ?;
+
+  /// The name of the method on the target specific operand to call to custom
+  /// handle the operand parsing. This is useful when the operands do not relate
+  /// to immediates or registers and are very instruction specific (as flags to
+  /// set in a processor register, coprocessor number, ...).
+  string ParserMethod = ?;
+}
+
+def ImmAsmOperand : AsmOperandClass {
+  let Name = "Imm";
+}
+
+/// Operand Types - These provide the built-in operand types that may be used
+/// by a target.  Targets can optionally provide their own operand types as
+/// needed, though this should not be needed for RISC targets.
+class Operand<ValueType ty> {
+  ValueType Type = ty;
+  string PrintMethod = "printOperand";
+  string EncoderMethod = "";
+  string DecoderMethod = "";
+  string AsmOperandLowerMethod = ?;
+  dag MIOperandInfo = (ops);
+
+  // ParserMatchClass - The "match class" that operands of this type fit
+  // in. Match classes are used to define the order in which instructions are
+  // match, to ensure that which instructions gets matched is deterministic.
+  //
+  // The target specific parser must be able to classify an parsed operand into
+  // a unique class, which does not partially overlap with any other classes. It
+  // can match a subset of some other class, in which case the AsmOperandClass
+  // should declare the other operand as one of its super classes.
+  AsmOperandClass ParserMatchClass = ImmAsmOperand;
+}
+
+def i1imm  : Operand<i1>;
+def i8imm  : Operand<i8>;
+def i16imm : Operand<i16>;
+def i32imm : Operand<i32>;
+def i64imm : Operand<i64>;
+
+def f32imm : Operand<f32>;
+def f64imm : Operand<f64>;
+
+/// zero_reg definition - Special node to stand for the zero register.
+///
+def zero_reg;
+
+/// PredicateOperand - This can be used to define a predicate operand for an
+/// instruction.  OpTypes specifies the MIOperandInfo for the operand, and
+/// AlwaysVal specifies the value of this predicate when set to "always
+/// execute".
+class PredicateOperand<ValueType ty, dag OpTypes, dag AlwaysVal>
+  : Operand<ty> {
+  let MIOperandInfo = OpTypes;
+  dag DefaultOps = AlwaysVal;
+}
+
+/// OptionalDefOperand - This is used to define a optional definition operand
+/// for an instruction. DefaultOps is the register the operand represents if
+/// none is supplied, e.g. zero_reg.
+class OptionalDefOperand<ValueType ty, dag OpTypes, dag defaultops>
+  : Operand<ty> {
+  let MIOperandInfo = OpTypes;
+  dag DefaultOps = defaultops;
+}
+
+
+// InstrInfo - This class should only be instantiated once to provide parameters
+// which are global to the target machine.
+//
+class InstrInfo {
+  // Target can specify its instructions in either big or little-endian formats.
+  // For instance, while both Sparc and PowerPC are big-endian platforms, the
+  // Sparc manual specifies its instructions in the format [31..0] (big), while
+  // PowerPC specifies them using the format [0..31] (little).
+  bit isLittleEndianEncoding = 0;
+}
+
+// Standard Pseudo Instructions.
+// This list must match TargetOpcodes.h and CodeGenTarget.cpp.
+// Only these instructions are allowed in the TargetOpcode namespace.
+let isCodeGenOnly = 1, Namespace = "TargetOpcode" in {
+def PHI : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
+  let AsmString = "PHINODE";
+}
+def INLINEASM : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
+  let AsmString = "";
+  let neverHasSideEffects = 1;  // Note side effect is encoded in an operand.
+}
+def PROLOG_LABEL : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$id);
+  let AsmString = "";
+  let hasCtrlDep = 1;
+  let isNotDuplicable = 1;
+}
+def EH_LABEL : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$id);
+  let AsmString = "";
+  let hasCtrlDep = 1;
+  let isNotDuplicable = 1;
+}
+def GC_LABEL : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$id);
+  let AsmString = "";
+  let hasCtrlDep = 1;
+  let isNotDuplicable = 1;
+}
+def KILL : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
+  let AsmString = "";
+  let neverHasSideEffects = 1;
+}
+def EXTRACT_SUBREG : Instruction {
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins unknown:$supersrc, i32imm:$subidx);
+  let AsmString = "";
+  let neverHasSideEffects = 1;
+}
+def INSERT_SUBREG : Instruction {
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins unknown:$supersrc, unknown:$subsrc, i32imm:$subidx);
+  let AsmString = "";
+  let neverHasSideEffects = 1;
+  let Constraints = "$supersrc = $dst";
+}
+def IMPLICIT_DEF : Instruction {
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins);
+  let AsmString = "";
+  let neverHasSideEffects = 1;
+  let isReMaterializable = 1;
+  let isAsCheapAsAMove = 1;
+}
+def SUBREG_TO_REG : Instruction {
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins unknown:$implsrc, unknown:$subsrc, i32imm:$subidx);
+  let AsmString = "";
+  let neverHasSideEffects = 1;
+}
+def COPY_TO_REGCLASS : Instruction {
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins unknown:$src, i32imm:$regclass);
+  let AsmString = "";
+  let neverHasSideEffects = 1;
+  let isAsCheapAsAMove = 1;
+}
+def DBG_VALUE : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
+  let AsmString = "DBG_VALUE";
+  let neverHasSideEffects = 1;
+}
+def REG_SEQUENCE : Instruction {
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins variable_ops);
+  let AsmString = "";
+  let neverHasSideEffects = 1;
+  let isAsCheapAsAMove = 1;
+}
+def COPY : Instruction {
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins unknown:$src);
+  let AsmString = "";
+  let neverHasSideEffects = 1;
+  let isAsCheapAsAMove = 1;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// AsmParser - This class can be implemented by targets that wish to implement
+// .s file parsing.
+//
+// Subtargets can have multiple different assembly parsers (e.g. AT&T vs Intel
+// syntax on X86 for example).
+//
+class AsmParser {
+  // AsmParserClassName - This specifies the suffix to use for the asmparser
+  // class.  Generated AsmParser classes are always prefixed with the target
+  // name.
+  string AsmParserClassName  = "AsmParser";
+
+  // AsmParserInstCleanup - If non-empty, this is the name of a custom member
+  // function of the AsmParser class to call on every matched instruction.
+  // This can be used to perform target specific instruction post-processing.
+  string AsmParserInstCleanup  = "";
+
+  // Variant - AsmParsers can be of multiple different variants.  Variants are
+  // used to support targets that need to parser multiple formats for the
+  // assembly language.
+  int Variant = 0;
+
+  // CommentDelimiter - If given, the delimiter string used to recognize
+  // comments which are hard coded in the .td assembler strings for individual
+  // instructions.
+  string CommentDelimiter = "";
+
+  // RegisterPrefix - If given, the token prefix which indicates a register
+  // token. This is used by the matcher to automatically recognize hard coded
+  // register tokens as constrained registers, instead of tokens, for the
+  // purposes of matching.
+  string RegisterPrefix = "";
+}
+def DefaultAsmParser : AsmParser;
+
+/// AssemblerPredicate - This is a Predicate that can be used when the assembler
+/// matches instructions and aliases.
+class AssemblerPredicate {
+  bit AssemblerMatcherPredicate = 1;
+}
+
+
+
+/// MnemonicAlias - This class allows targets to define assembler mnemonic
+/// aliases.  This should be used when all forms of one mnemonic are accepted
+/// with a different mnemonic.  For example, X86 allows:
+///   sal %al, 1    -> shl %al, 1
+///   sal %ax, %cl  -> shl %ax, %cl
+///   sal %eax, %cl -> shl %eax, %cl
+/// etc.  Though "sal" is accepted with many forms, all of them are directly
+/// translated to a shl, so it can be handled with (in the case of X86, it
+/// actually has one for each suffix as well):
+///   def : MnemonicAlias<"sal", "shl">;
+///
+/// Mnemonic aliases are mapped before any other translation in the match phase,
+/// and do allow Requires predicates, e.g.:
+///
+///  def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
+///  def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
+///
+class MnemonicAlias<string From, string To> {
+  string FromMnemonic = From;
+  string ToMnemonic = To;
+
+  // Predicates - Predicates that must be true for this remapping to happen.
+  list<Predicate> Predicates = [];
+}
+
+/// InstAlias - This defines an alternate assembly syntax that is allowed to
+/// match an instruction that has a different (more canonical) assembly
+/// representation.
+class InstAlias<string Asm, dag Result> {
+  string AsmString = Asm;      // The .s format to match the instruction with.
+  dag ResultInst = Result;     // The MCInst to generate.
+
+  // Predicates - Predicates that must be true for this to match.
+  list<Predicate> Predicates = [];
+}
+
+//===----------------------------------------------------------------------===//
+// AsmWriter - This class can be implemented by targets that need to customize
+// the format of the .s file writer.
+//
+// Subtargets can have multiple different asmwriters (e.g. AT&T vs Intel syntax
+// on X86 for example).
+//
+class AsmWriter {
+  // AsmWriterClassName - This specifies the suffix to use for the asmwriter
+  // class.  Generated AsmWriter classes are always prefixed with the target
+  // name.
+  string AsmWriterClassName  = "AsmPrinter";
+
+  // Variant - AsmWriters can be of multiple different variants.  Variants are
+  // used to support targets that need to emit assembly code in ways that are
+  // mostly the same for different targets, but have minor differences in
+  // syntax.  If the asmstring contains {|} characters in them, this integer
+  // will specify which alternative to use.  For example "{x|y|z}" with Variant
+  // == 1, will expand to "y".
+  int Variant = 0;
+
+
+  // FirstOperandColumn/OperandSpacing - If the assembler syntax uses a columnar
+  // layout, the asmwriter can actually generate output in this columns (in
+  // verbose-asm mode).  These two values indicate the width of the first column
+  // (the "opcode" area) and the width to reserve for subsequent operands.  When
+  // verbose asm mode is enabled, operands will be indented to respect this.
+  int FirstOperandColumn = -1;
+
+  // OperandSpacing - Space between operand columns.
+  int OperandSpacing = -1;
+
+  // isMCAsmWriter - Is this assembly writer for an MC emitter? This controls
+  // generation of the printInstruction() method. For MC printers, it takes
+  // an MCInstr* operand, otherwise it takes a MachineInstr*.
+  bit isMCAsmWriter = 0;
+}
+def DefaultAsmWriter : AsmWriter;
+
+
+//===----------------------------------------------------------------------===//
+// Target - This class contains the "global" target information
+//
+class Target {
+  // InstructionSet - Instruction set description for this target.
+  InstrInfo InstructionSet;
+
+  // AssemblyParsers - The AsmParser instances available for this target.
+  list<AsmParser> AssemblyParsers = [DefaultAsmParser];
+
+  // AssemblyWriters - The AsmWriter instances available for this target.
+  list<AsmWriter> AssemblyWriters = [DefaultAsmWriter];
+}
+
+//===----------------------------------------------------------------------===//
+// SubtargetFeature - A characteristic of the chip set.
+//
+class SubtargetFeature<string n, string a,  string v, string d,
+                       list<SubtargetFeature> i = []> {
+  // Name - Feature name.  Used by command line (-mattr=) to determine the
+  // appropriate target chip.
+  //
+  string Name = n;
+
+  // Attribute - Attribute to be set by feature.
+  //
+  string Attribute = a;
+
+  // Value - Value the attribute to be set to by feature.
+  //
+  string Value = v;
+
+  // Desc - Feature description.  Used by command line (-mattr=) to display help
+  // information.
+  //
+  string Desc = d;
+
+  // Implies - Features that this feature implies are present. If one of those
+  // features isn't set, then this one shouldn't be set either.
+  //
+  list<SubtargetFeature> Implies = i;
+}
+
+//===----------------------------------------------------------------------===//
+// Processor chip sets - These values represent each of the chip sets supported
+// by the scheduler.  Each Processor definition requires corresponding
+// instruction itineraries.
+//
+class Processor<string n, ProcessorItineraries pi, list<SubtargetFeature> f> {
+  // Name - Chip set name.  Used by command line (-mcpu=) to determine the
+  // appropriate target chip.
+  //
+  string Name = n;
+
+  // ProcItin - The scheduling information for the target processor.
+  //
+  ProcessorItineraries ProcItin = pi;
+
+  // Features - list of
+  list<SubtargetFeature> Features = f;
+}
+
+//===----------------------------------------------------------------------===//
+// Pull in the common support for calling conventions.
+//
+include "llvm/Target/TargetCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Pull in the common support for DAG isel generation.
+//
+include "llvm/Target/TargetSelectionDAG.td"
diff --git a/final/include/llvm/Target/TargetAsmBackend.h b/final/include/llvm/Target/TargetAsmBackend.h
new file mode 100644
index 00000000000..7527298efa9
--- /dev/null
+++ b/final/include/llvm/Target/TargetAsmBackend.h
@@ -0,0 +1,123 @@
+//===-- llvm/Target/TargetAsmBackend.h - Target Asm Backend -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETASMBACKEND_H
+#define LLVM_TARGET_TARGETASMBACKEND_H
+
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class MCFixup;
+class MCInst;
+class MCObjectWriter;
+class MCSection;
+template<typename T>
+class SmallVectorImpl;
+class raw_ostream;
+
+/// TargetAsmBackend - Generic interface to target specific assembler backends.
+class TargetAsmBackend {
+  TargetAsmBackend(const TargetAsmBackend &);   // DO NOT IMPLEMENT
+  void operator=(const TargetAsmBackend &);  // DO NOT IMPLEMENT
+protected: // Can only create subclasses.
+  TargetAsmBackend();
+
+  unsigned HasReliableSymbolDifference : 1;
+
+public:
+  virtual ~TargetAsmBackend();
+
+  /// createObjectWriter - Create a new MCObjectWriter instance for use by the
+  /// assembler backend to emit the final object file.
+  virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const = 0;
+
+  /// hasReliableSymbolDifference - Check whether this target implements
+  /// accurate relocations for differences between symbols. If not, differences
+  /// between symbols will always be relocatable expressions and any references
+  /// to temporary symbols will be assumed to be in the same atom, unless they
+  /// reside in a different section.
+  ///
+  /// This should always be true (since it results in fewer relocations with no
+  /// loss of functionality), but is currently supported as a way to maintain
+  /// exact object compatibility with Darwin 'as' (on non-x86_64). It should
+  /// eventually should be eliminated.
+  bool hasReliableSymbolDifference() const {
+    return HasReliableSymbolDifference;
+  }
+
+  /// doesSectionRequireSymbols - Check whether the given section requires that
+  /// all symbols (even temporaries) have symbol table entries.
+  virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+    return false;
+  }
+
+  /// isSectionAtomizable - Check whether the given section can be split into
+  /// atoms.
+  ///
+  /// \see MCAssembler::isSymbolLinkerVisible().
+  virtual bool isSectionAtomizable(const MCSection &Section) const {
+    return true;
+  }
+
+  /// @name Target Fixup Interfaces
+  /// @{
+
+  /// getNumFixupKinds - Get the number of target specific fixup kinds.
+  virtual unsigned getNumFixupKinds() const = 0;
+
+  /// getFixupKindInfo - Get information on a fixup kind.
+  virtual const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const;
+
+  /// @}
+
+  /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided
+  /// data fragment, at the offset specified by the fixup and following the
+  /// fixup kind as appropriate.
+  virtual void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                          uint64_t Value) const = 0;
+
+  /// @}
+
+  /// @name Target Relaxation Interfaces
+  /// @{
+
+  /// MayNeedRelaxation - Check whether the given instruction may need
+  /// relaxation.
+  ///
+  /// \param Inst - The instruction to test.
+  virtual bool MayNeedRelaxation(const MCInst &Inst) const = 0;
+
+  /// RelaxInstruction - Relax the instruction in the given fragment to the next
+  /// wider instruction.
+  ///
+  /// \param Inst - The instruction to relax, which may be the same as the
+  /// output.
+  /// \parm Res [output] - On return, the relaxed instruction.
+  virtual void RelaxInstruction(const MCInst &Inst, MCInst &Res) const = 0;
+
+  /// @}
+
+  /// WriteNopData - Write an (optimal) nop sequence of Count bytes to the given
+  /// output. If the target cannot generate such a sequence, it should return an
+  /// error.
+  ///
+  /// \return - True on success.
+  virtual bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const = 0;
+
+  /// HandleAssemblerFlag - Handle any target-specific assembler flags.
+  /// By default, do nothing.
+  virtual void HandleAssemblerFlag(MCAssemblerFlag Flag) {}
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Target/TargetAsmInfo.h b/final/include/llvm/Target/TargetAsmInfo.h
new file mode 100644
index 00000000000..98aab142b8e
--- /dev/null
+++ b/final/include/llvm/Target/TargetAsmInfo.h
@@ -0,0 +1,75 @@
+//===-- llvm/Target/TargetAsmInfo.h -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to provide the information necessary for producing assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETASMINFO_H
+#define LLVM_TARGET_TARGETASMINFO_H
+
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+  class MCSection;
+  class MCContext;
+  class TargetMachine;
+  class TargetLoweringObjectFile;
+
+class TargetAsmInfo {
+  unsigned PointerSize;
+  bool IsLittleEndian;
+  TargetFrameLowering::StackDirection StackDir;
+  const TargetRegisterInfo *TRI;
+  std::vector<MachineMove> InitialFrameState;
+  const TargetLoweringObjectFile *TLOF;
+
+public:
+  explicit TargetAsmInfo(const TargetMachine &TM);
+
+  /// getPointerSize - Get the pointer size in bytes.
+  unsigned getPointerSize() const {
+    return PointerSize;
+  }
+
+  /// islittleendian - True if the target is little endian.
+  bool isLittleEndian() const {
+    return IsLittleEndian;
+  }
+
+  TargetFrameLowering::StackDirection getStackGrowthDirection() const {
+    return StackDir;
+  }
+
+  const MCSection *getDwarfLineSection() const {
+    return TLOF->getDwarfLineSection();
+  }
+
+  const MCSection *getEHFrameSection() const {
+    return TLOF->getEHFrameSection();
+  }
+
+  unsigned getDwarfRARegNum(bool isEH) const {
+    return TRI->getDwarfRegNum(TRI->getRARegister(), isEH);
+  }
+
+  const std::vector<MachineMove> &getInitialFrameState() const {
+    return InitialFrameState;
+  }
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const {
+    return TRI->getDwarfRegNum(RegNum, isEH);
+  }
+};
+
+}
+#endif
diff --git a/final/include/llvm/Target/TargetAsmLexer.h b/final/include/llvm/Target/TargetAsmLexer.h
new file mode 100644
index 00000000000..9fcf449a86c
--- /dev/null
+++ b/final/include/llvm/Target/TargetAsmLexer.h
@@ -0,0 +1,89 @@
+//===-- llvm/Target/TargetAsmLexer.h - Target Assembly Lexer ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETASMLEXER_H
+#define LLVM_TARGET_TARGETASMLEXER_H
+
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+
+namespace llvm {
+class Target;
+  
+/// TargetAsmLexer - Generic interface to target specific assembly lexers.
+class TargetAsmLexer {
+  /// The current token
+  AsmToken CurTok;
+  
+  /// The location and description of the current error
+  SMLoc ErrLoc;
+  std::string Err;
+  
+  TargetAsmLexer(const TargetAsmLexer &);   // DO NOT IMPLEMENT
+  void operator=(const TargetAsmLexer &);  // DO NOT IMPLEMENT
+protected: // Can only create subclasses.
+  TargetAsmLexer(const Target &);
+  
+  virtual AsmToken LexToken() = 0;
+  
+  void SetError(const SMLoc &errLoc, const std::string &err) {
+    ErrLoc = errLoc;
+    Err = err;
+  }
+  
+  /// TheTarget - The Target that this machine was created for.
+  const Target &TheTarget;
+  MCAsmLexer *Lexer;
+  
+public:
+  virtual ~TargetAsmLexer();
+  
+  const Target &getTarget() const { return TheTarget; }
+  
+  /// InstallLexer - Set the lexer to get tokens from lower-level lexer \arg L.
+  void InstallLexer(MCAsmLexer &L) {
+    Lexer = &L;
+  }
+  
+  MCAsmLexer *getLexer() {
+    return Lexer;
+  }
+  
+  /// Lex - Consume the next token from the input stream and return it.
+  const AsmToken &Lex() {
+    return CurTok = LexToken();
+  }
+  
+  /// getTok - Get the current (last) lexed token.
+  const AsmToken &getTok() {
+    return CurTok;
+  }
+  
+  /// getErrLoc - Get the current error location
+  const SMLoc &getErrLoc() {
+    return ErrLoc;
+  }
+  
+  /// getErr - Get the current error string
+  const std::string &getErr() {
+    return Err;
+  }
+  
+  /// getKind - Get the kind of current token.
+  AsmToken::TokenKind getKind() const { return CurTok.getKind(); }
+  
+  /// is - Check if the current token has kind \arg K.
+  bool is(AsmToken::TokenKind K) const { return CurTok.is(K); }
+  
+  /// isNot - Check if the current token has kind \arg K.
+  bool isNot(AsmToken::TokenKind K) const { return CurTok.isNot(K); }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Target/TargetAsmParser.h b/final/include/llvm/Target/TargetAsmParser.h
new file mode 100644
index 00000000000..9ff50cb275b
--- /dev/null
+++ b/final/include/llvm/Target/TargetAsmParser.h
@@ -0,0 +1,90 @@
+//===-- llvm/Target/TargetAsmParser.h - Target Assembly Parser --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETPARSER_H
+#define LLVM_TARGET_TARGETPARSER_H
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+
+namespace llvm {
+class MCStreamer;
+class StringRef;
+class Target;
+class SMLoc;
+class AsmToken;
+class MCParsedAsmOperand;
+template <typename T> class SmallVectorImpl;
+
+/// TargetAsmParser - Generic interface to target specific assembly parsers.
+class TargetAsmParser : public MCAsmParserExtension {
+  TargetAsmParser(const TargetAsmParser &);   // DO NOT IMPLEMENT
+  void operator=(const TargetAsmParser &);  // DO NOT IMPLEMENT
+protected: // Can only create subclasses.
+  TargetAsmParser(const Target &);
+ 
+  /// The Target that this machine was created for.
+  const Target &TheTarget;
+
+  /// The current set of available features.
+  unsigned AvailableFeatures;
+
+public:
+  virtual ~TargetAsmParser();
+
+  const Target &getTarget() const { return TheTarget; }
+
+  unsigned getAvailableFeatures() const { return AvailableFeatures; }
+  void setAvailableFeatures(unsigned Value) { AvailableFeatures = Value; }
+
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) = 0;
+
+  /// ParseInstruction - Parse one assembly instruction.
+  ///
+  /// The parser is positioned following the instruction name. The target
+  /// specific instruction parser should parse the entire instruction and
+  /// construct the appropriate MCInst, or emit an error. On success, the entire
+  /// line should be parsed up to and including the end-of-statement token. On
+  /// failure, the parser is not required to read to the end of the line.
+  //
+  /// \param Name - The instruction name.
+  /// \param NameLoc - The source location of the name.
+  /// \param Operands [out] - The list of parsed operands, this returns
+  ///        ownership of them to the caller.
+  /// \return True on failure.
+  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+                            SmallVectorImpl<MCParsedAsmOperand*> &Operands) = 0;
+
+  /// ParseDirective - Parse a target specific assembler directive
+  ///
+  /// The parser is positioned following the directive name.  The target
+  /// specific directive parser should parse the entire directive doing or
+  /// recording any target specific work, or return true and do nothing if the
+  /// directive is not target specific. If the directive is specific for
+  /// the target, the entire line is parsed up to and including the
+  /// end-of-statement token and false is returned.
+  ///
+  /// \param DirectiveID - the identifier token of the directive.
+  virtual bool ParseDirective(AsmToken DirectiveID) = 0;
+  
+  /// MatchAndEmitInstruction - Recognize a series of operands of a parsed
+  /// instruction as an actual MCInst and emit it to the specified MCStreamer.
+  /// This returns false on success and returns true on failure to match.
+  ///
+  /// On failure, the target parser is responsible for emitting a diagnostic
+  /// explaining the match failure.
+  virtual bool 
+  MatchAndEmitInstruction(SMLoc IDLoc,
+                          SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                          MCStreamer &Out) = 0;
+  
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Target/TargetCallingConv.h b/final/include/llvm/Target/TargetCallingConv.h
new file mode 100644
index 00000000000..275957e0153
--- /dev/null
+++ b/final/include/llvm/Target/TargetCallingConv.h
@@ -0,0 +1,140 @@
+//===-- llvm/Target/TargetCallingConv.h - Calling Convention ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines types for working with calling-convention information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETCALLINGCONV_H
+#define LLVM_TARGET_TARGETCALLINGCONV_H
+
+namespace llvm {
+
+namespace ISD {
+  struct ArgFlagsTy {
+  private:
+    static const uint64_t NoFlagSet      = 0ULL;
+    static const uint64_t ZExt           = 1ULL<<0;  ///< Zero extended
+    static const uint64_t ZExtOffs       = 0;
+    static const uint64_t SExt           = 1ULL<<1;  ///< Sign extended
+    static const uint64_t SExtOffs       = 1;
+    static const uint64_t InReg          = 1ULL<<2;  ///< Passed in register
+    static const uint64_t InRegOffs      = 2;
+    static const uint64_t SRet           = 1ULL<<3;  ///< Hidden struct-ret ptr
+    static const uint64_t SRetOffs       = 3;
+    static const uint64_t ByVal          = 1ULL<<4;  ///< Struct passed by value
+    static const uint64_t ByValOffs      = 4;
+    static const uint64_t Nest           = 1ULL<<5;  ///< Nested fn static chain
+    static const uint64_t NestOffs       = 5;
+    static const uint64_t ByValAlign     = 0xFULL << 6; //< Struct alignment
+    static const uint64_t ByValAlignOffs = 6;
+    static const uint64_t Split          = 1ULL << 10;
+    static const uint64_t SplitOffs      = 10;
+    static const uint64_t OrigAlign      = 0x1FULL<<27;
+    static const uint64_t OrigAlignOffs  = 27;
+    static const uint64_t ByValSize      = 0xffffffffULL << 32; //< Struct size
+    static const uint64_t ByValSizeOffs  = 32;
+
+    static const uint64_t One            = 1ULL; //< 1 of this type, for shifts
+
+    uint64_t Flags;
+  public:
+    ArgFlagsTy() : Flags(0) { }
+
+    bool isZExt()   const { return Flags & ZExt; }
+    void setZExt()  { Flags |= One << ZExtOffs; }
+
+    bool isSExt()   const { return Flags & SExt; }
+    void setSExt()  { Flags |= One << SExtOffs; }
+
+    bool isInReg()  const { return Flags & InReg; }
+    void setInReg() { Flags |= One << InRegOffs; }
+
+    bool isSRet()   const { return Flags & SRet; }
+    void setSRet()  { Flags |= One << SRetOffs; }
+
+    bool isByVal()  const { return Flags & ByVal; }
+    void setByVal() { Flags |= One << ByValOffs; }
+
+    bool isNest()   const { return Flags & Nest; }
+    void setNest()  { Flags |= One << NestOffs; }
+
+    unsigned getByValAlign() const {
+      return (unsigned)
+        ((One << ((Flags & ByValAlign) >> ByValAlignOffs)) / 2);
+    }
+    void setByValAlign(unsigned A) {
+      Flags = (Flags & ~ByValAlign) |
+        (uint64_t(Log2_32(A) + 1) << ByValAlignOffs);
+    }
+
+    bool isSplit()   const { return Flags & Split; }
+    void setSplit()  { Flags |= One << SplitOffs; }
+
+    unsigned getOrigAlign() const {
+      return (unsigned)
+        ((One << ((Flags & OrigAlign) >> OrigAlignOffs)) / 2);
+    }
+    void setOrigAlign(unsigned A) {
+      Flags = (Flags & ~OrigAlign) |
+        (uint64_t(Log2_32(A) + 1) << OrigAlignOffs);
+    }
+
+    unsigned getByValSize() const {
+      return (unsigned)((Flags & ByValSize) >> ByValSizeOffs);
+    }
+    void setByValSize(unsigned S) {
+      Flags = (Flags & ~ByValSize) | (uint64_t(S) << ByValSizeOffs);
+    }
+
+    /// getArgFlagsString - Returns the flags as a string, eg: "zext align:4".
+    std::string getArgFlagsString();
+
+    /// getRawBits - Represent the flags as a bunch of bits.
+    uint64_t getRawBits() const { return Flags; }
+  };
+
+  /// InputArg - This struct carries flags and type information about a
+  /// single incoming (formal) argument or incoming (from the perspective
+  /// of the caller) return value virtual register.
+  ///
+  struct InputArg {
+    ArgFlagsTy Flags;
+    MVT VT;
+    bool Used;
+
+    InputArg() : VT(MVT::Other), Used(false) {}
+    InputArg(ArgFlagsTy flags, EVT vt, bool used)
+      : Flags(flags), Used(used) {
+      VT = vt.getSimpleVT();
+    }
+  };
+
+  /// OutputArg - This struct carries flags and a value for a
+  /// single outgoing (actual) argument or outgoing (from the perspective
+  /// of the caller) return value virtual register.
+  ///
+  struct OutputArg {
+    ArgFlagsTy Flags;
+    MVT VT;
+
+    /// IsFixed - Is this a "fixed" value, ie not passed through a vararg "...".
+    bool IsFixed;
+
+    OutputArg() : IsFixed(false) {}
+    OutputArg(ArgFlagsTy flags, EVT vt, bool isfixed)
+      : Flags(flags), IsFixed(isfixed) {
+      VT = vt.getSimpleVT();
+    }
+  };
+}
+
+} // end llvm namespace
+
+#endif
diff --git a/final/include/llvm/Target/TargetCallingConv.td b/final/include/llvm/Target/TargetCallingConv.td
new file mode 100644
index 00000000000..6da3ba13bb3
--- /dev/null
+++ b/final/include/llvm/Target/TargetCallingConv.td
@@ -0,0 +1,135 @@
+//===- TargetCallingConv.td - Target Calling Conventions ---*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the target-independent interfaces with which targets
+// describe their calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+class CCAction;
+class CallingConv;
+
+/// CCCustom - Calls a custom arg handling function.
+class CCCustom<string fn> : CCAction {
+  string FuncName = fn;
+}
+
+/// CCPredicateAction - Instances of this class check some predicate, then
+/// delegate to another action if the predicate is true.
+class CCPredicateAction<CCAction A> : CCAction {
+  CCAction SubAction = A;
+}
+
+/// CCIfType - If the current argument is one of the specified types, apply
+/// Action A.
+class CCIfType<list<ValueType> vts, CCAction A> : CCPredicateAction<A> {
+  list<ValueType> VTs = vts;
+}
+
+/// CCIf - If the predicate matches, apply A.
+class CCIf<string predicate, CCAction A> : CCPredicateAction<A> {
+  string Predicate = predicate;
+}
+
+/// CCIfByVal - If the current argument has ByVal parameter attribute, apply
+/// Action A.
+class CCIfByVal<CCAction A> : CCIf<"ArgFlags.isByVal()", A> {
+}
+
+/// CCIfCC - Match if the current calling convention is 'CC'.
+class CCIfCC<string CC, CCAction A>
+  : CCIf<!strconcat("State.getCallingConv() == ", CC), A> {}
+
+/// CCIfInReg - If this argument is marked with the 'inreg' attribute, apply
+/// the specified action.
+class CCIfInReg<CCAction A> : CCIf<"ArgFlags.isInReg()", A> {}
+
+/// CCIfNest - If this argument is marked with the 'nest' attribute, apply
+/// the specified action.
+class CCIfNest<CCAction A> : CCIf<"ArgFlags.isNest()", A> {}
+
+/// CCIfSplit - If this argument is marked with the 'split' attribute, apply
+/// the specified action.
+class CCIfSplit<CCAction A> : CCIf<"ArgFlags.isSplit()", A> {}
+
+/// CCIfSRet - If this argument is marked with the 'sret' attribute, apply
+/// the specified action.
+class CCIfSRet<CCAction A> : CCIf<"ArgFlags.isSRet()", A> {}
+
+/// CCIfNotVarArg - If the current function is not vararg - apply the action
+class CCIfNotVarArg<CCAction A> : CCIf<"!State.isVarArg()", A> {}
+
+/// CCAssignToReg - This action matches if there is a register in the specified
+/// list that is still available.  If so, it assigns the value to the first
+/// available register and succeeds.
+class CCAssignToReg<list<Register> regList> : CCAction {
+  list<Register> RegList = regList;
+}
+
+/// CCAssignToRegWithShadow - Same as CCAssignToReg, but with list of registers
+/// which became shadowed, when some register is used.
+class CCAssignToRegWithShadow<list<Register> regList,
+                              list<Register> shadowList> : CCAction {
+  list<Register> RegList = regList;
+  list<Register> ShadowRegList = shadowList;
+}
+
+/// CCAssignToStack - This action always matches: it assigns the value to a
+/// stack slot of the specified size and alignment on the stack.  If size is
+/// zero then the ABI size is used; if align is zero then the ABI alignment
+/// is used - these may depend on the target or subtarget.
+class CCAssignToStack<int size, int align> : CCAction {
+  int Size = size;
+  int Align = align;
+}
+
+/// CCAssignToStackWithShadow - Same as CCAssignToStack, but with a register
+/// to be shadowed.
+class CCAssignToStackWithShadow<int size, int align, Register reg> :
+        CCAssignToStack<size, align> {
+  Register ShadowReg = reg;
+}
+
+/// CCPassByVal - This action always matches: it assigns the value to a stack
+/// slot to implement ByVal aggregate parameter passing. Size and alignment
+/// specify the minimum size and alignment for the stack slot.
+class CCPassByVal<int size, int align> : CCAction {
+  int Size = size;
+  int Align = align;
+}
+
+/// CCPromoteToType - If applied, this promotes the specified current value to
+/// the specified type.
+class CCPromoteToType<ValueType destTy> : CCAction {
+  ValueType DestTy = destTy;
+}
+
+/// CCBitConvertToType - If applied, this bitconverts the specified current
+/// value to the specified type.
+class CCBitConvertToType<ValueType destTy> : CCAction {
+  ValueType DestTy = destTy;
+}
+
+/// CCPassIndirect - If applied, this stores the value to stack and passes the pointer
+/// as normal argument.
+class CCPassIndirect<ValueType destTy> : CCAction {
+  ValueType DestTy = destTy;
+}
+
+/// CCDelegateTo - This action invokes the specified sub-calling-convention.  It
+/// is successful if the specified CC matches.
+class CCDelegateTo<CallingConv cc> : CCAction {
+  CallingConv CC = cc;
+}
+
+/// CallingConv - An instance of this is used to define each calling convention
+/// that the target supports.
+class CallingConv<list<CCAction> actions> {
+  list<CCAction> Actions = actions;
+}
diff --git a/final/include/llvm/Target/TargetData.h b/final/include/llvm/Target/TargetData.h
new file mode 100644
index 00000000000..25065d30bb6
--- /dev/null
+++ b/final/include/llvm/Target/TargetData.h
@@ -0,0 +1,336 @@
+//===-- llvm/Target/TargetData.h - Data size & alignment info ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target properties related to datatype size/offset/alignment
+// information.  It uses lazy annotations to cache information about how
+// structure types are laid out and used.
+//
+// This structure should be created once, filled in if the defaults are not
+// correct and then passed around by const&.  None of the members functions
+// require modification to the object.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETDATA_H
+#define LLVM_TARGET_TARGETDATA_H
+
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class Value;
+class Type;
+class IntegerType;
+class StructType;
+class StructLayout;
+class GlobalVariable;
+class LLVMContext;
+
+/// Enum used to categorize the alignment types stored by TargetAlignElem
+enum AlignTypeEnum {
+  INTEGER_ALIGN = 'i',               ///< Integer type alignment
+  VECTOR_ALIGN = 'v',                ///< Vector type alignment
+  FLOAT_ALIGN = 'f',                 ///< Floating point type alignment
+  AGGREGATE_ALIGN = 'a',             ///< Aggregate alignment
+  STACK_ALIGN = 's'                  ///< Stack objects alignment
+};
+/// Target alignment element.
+///
+/// Stores the alignment data associated with a given alignment type (pointer,
+/// integer, vector, float) and type bit width.
+///
+/// @note The unusual order of elements in the structure attempts to reduce
+/// padding and make the structure slightly more cache friendly.
+struct TargetAlignElem {
+  AlignTypeEnum       AlignType : 8;  //< Alignment type (AlignTypeEnum)
+  unsigned            ABIAlign;       //< ABI alignment for this type/bitw
+  unsigned            PrefAlign;      //< Pref. alignment for this type/bitw
+  uint32_t            TypeBitWidth;   //< Type bit width
+
+  /// Initializer
+  static TargetAlignElem get(AlignTypeEnum align_type, unsigned abi_align,
+                             unsigned pref_align, uint32_t bit_width);
+  /// Equality predicate
+  bool operator==(const TargetAlignElem &rhs) const;
+};
+
+class TargetData : public ImmutablePass {
+private:
+  bool          LittleEndian;          ///< Defaults to false
+  unsigned      PointerMemSize;        ///< Pointer size in bytes
+  unsigned      PointerABIAlign;       ///< Pointer ABI alignment
+  unsigned      PointerPrefAlign;      ///< Pointer preferred alignment
+
+  SmallVector<unsigned char, 8> LegalIntWidths; ///< Legal Integers.
+  
+  /// Alignments- Where the primitive type alignment data is stored.
+  ///
+  /// @sa init().
+  /// @note Could support multiple size pointer alignments, e.g., 32-bit
+  /// pointers vs. 64-bit pointers by extending TargetAlignment, but for now,
+  /// we don't.
+  SmallVector<TargetAlignElem, 16> Alignments;
+  
+  /// InvalidAlignmentElem - This member is a signal that a requested alignment
+  /// type and bit width were not found in the SmallVector.
+  static const TargetAlignElem InvalidAlignmentElem;
+
+  // The StructType -> StructLayout map.
+  mutable void *LayoutMap;
+
+  //! Set/initialize target alignments
+  void setAlignment(AlignTypeEnum align_type, unsigned abi_align,
+                    unsigned pref_align, uint32_t bit_width);
+  unsigned getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width,
+                            bool ABIAlign, const Type *Ty) const;
+  //! Internal helper method that returns requested alignment for type.
+  unsigned getAlignment(const Type *Ty, bool abi_or_pref) const;
+
+  /// Valid alignment predicate.
+  ///
+  /// Predicate that tests a TargetAlignElem reference returned by get() against
+  /// InvalidAlignmentElem.
+  bool validAlignment(const TargetAlignElem &align) const {
+    return &align != &InvalidAlignmentElem;
+  }
+
+public:
+  /// Default ctor.
+  ///
+  /// @note This has to exist, because this is a pass, but it should never be
+  /// used.
+  TargetData();
+  
+  /// Constructs a TargetData from a specification string. See init().
+  explicit TargetData(StringRef TargetDescription)
+    : ImmutablePass(ID) {
+    init(TargetDescription);
+  }
+
+  /// Initialize target data from properties stored in the module.
+  explicit TargetData(const Module *M);
+
+  TargetData(const TargetData &TD) :
+    ImmutablePass(ID),
+    LittleEndian(TD.isLittleEndian()),
+    PointerMemSize(TD.PointerMemSize),
+    PointerABIAlign(TD.PointerABIAlign),
+    PointerPrefAlign(TD.PointerPrefAlign),
+    LegalIntWidths(TD.LegalIntWidths),
+    Alignments(TD.Alignments),
+    LayoutMap(0)
+  { }
+
+  ~TargetData();  // Not virtual, do not subclass this class
+
+  //! Parse a target data layout string and initialize TargetData alignments.
+  void init(StringRef TargetDescription);
+
+  /// Target endianness...
+  bool isLittleEndian() const { return LittleEndian; }
+  bool isBigEndian() const { return !LittleEndian; }
+
+  /// getStringRepresentation - Return the string representation of the
+  /// TargetData.  This representation is in the same format accepted by the
+  /// string constructor above.
+  std::string getStringRepresentation() const;
+  
+  /// isLegalInteger - This function returns true if the specified type is
+  /// known to be a native integer type supported by the CPU.  For example,
+  /// i64 is not native on most 32-bit CPUs and i37 is not native on any known
+  /// one.  This returns false if the integer width is not legal.
+  ///
+  /// The width is specified in bits.
+  ///
+  bool isLegalInteger(unsigned Width) const {
+    for (unsigned i = 0, e = (unsigned)LegalIntWidths.size(); i != e; ++i)
+      if (LegalIntWidths[i] == Width)
+        return true;
+    return false;
+  }
+  
+  bool isIllegalInteger(unsigned Width) const {
+    return !isLegalInteger(Width);
+  }
+  
+  /// Target pointer alignment
+  unsigned getPointerABIAlignment() const { return PointerABIAlign; }
+  /// Return target's alignment for stack-based pointers
+  unsigned getPointerPrefAlignment() const { return PointerPrefAlign; }
+  /// Target pointer size
+  unsigned getPointerSize()         const { return PointerMemSize; }
+  /// Target pointer size, in bits
+  unsigned getPointerSizeInBits()   const { return 8*PointerMemSize; }
+
+  /// Size examples:
+  ///
+  /// Type        SizeInBits  StoreSizeInBits  AllocSizeInBits[*]
+  /// ----        ----------  ---------------  ---------------
+  ///  i1            1           8                8
+  ///  i8            8           8                8
+  ///  i19          19          24               32
+  ///  i32          32          32               32
+  ///  i100        100         104              128
+  ///  i128        128         128              128
+  ///  Float        32          32               32
+  ///  Double       64          64               64
+  ///  X86_FP80     80          80               96
+  ///
+  /// [*] The alloc size depends on the alignment, and thus on the target.
+  ///     These values are for x86-32 linux.
+
+  /// getTypeSizeInBits - Return the number of bits necessary to hold the
+  /// specified type.  For example, returns 36 for i36 and 80 for x86_fp80.
+  uint64_t getTypeSizeInBits(const Type* Ty) const;
+
+  /// getTypeStoreSize - Return the maximum number of bytes that may be
+  /// overwritten by storing the specified type.  For example, returns 5
+  /// for i36 and 10 for x86_fp80.
+  uint64_t getTypeStoreSize(const Type *Ty) const {
+    return (getTypeSizeInBits(Ty)+7)/8;
+  }
+
+  /// getTypeStoreSizeInBits - Return the maximum number of bits that may be
+  /// overwritten by storing the specified type; always a multiple of 8.  For
+  /// example, returns 40 for i36 and 80 for x86_fp80.
+  uint64_t getTypeStoreSizeInBits(const Type *Ty) const {
+    return 8*getTypeStoreSize(Ty);
+  }
+
+  /// getTypeAllocSize - Return the offset in bytes between successive objects
+  /// of the specified type, including alignment padding.  This is the amount
+  /// that alloca reserves for this type.  For example, returns 12 or 16 for
+  /// x86_fp80, depending on alignment.
+  uint64_t getTypeAllocSize(const Type* Ty) const {
+    // Round up to the next alignment boundary.
+    return RoundUpAlignment(getTypeStoreSize(Ty), getABITypeAlignment(Ty));
+  }
+
+  /// getTypeAllocSizeInBits - Return the offset in bits between successive
+  /// objects of the specified type, including alignment padding; always a
+  /// multiple of 8.  This is the amount that alloca reserves for this type.
+  /// For example, returns 96 or 128 for x86_fp80, depending on alignment.
+  uint64_t getTypeAllocSizeInBits(const Type* Ty) const {
+    return 8*getTypeAllocSize(Ty);
+  }
+
+  /// getABITypeAlignment - Return the minimum ABI-required alignment for the
+  /// specified type.
+  unsigned getABITypeAlignment(const Type *Ty) const;
+  
+  /// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
+  /// an integer type of the specified bitwidth.
+  unsigned getABIIntegerTypeAlignment(unsigned BitWidth) const;
+  
+
+  /// getCallFrameTypeAlignment - Return the minimum ABI-required alignment
+  /// for the specified type when it is part of a call frame.
+  unsigned getCallFrameTypeAlignment(const Type *Ty) const;
+
+
+  /// getPrefTypeAlignment - Return the preferred stack/global alignment for
+  /// the specified type.  This is always at least as good as the ABI alignment.
+  unsigned getPrefTypeAlignment(const Type *Ty) const;
+
+  /// getPreferredTypeAlignmentShift - Return the preferred alignment for the
+  /// specified type, returned as log2 of the value (a shift amount).
+  ///
+  unsigned getPreferredTypeAlignmentShift(const Type *Ty) const;
+
+  /// getIntPtrType - Return an unsigned integer type that is the same size or
+  /// greater to the host pointer size.
+  ///
+  const IntegerType *getIntPtrType(LLVMContext &C) const;
+
+  /// getIndexedOffset - return the offset from the beginning of the type for
+  /// the specified indices.  This is used to implement getelementptr.
+  ///
+  uint64_t getIndexedOffset(const Type *Ty,
+                            Value* const* Indices, unsigned NumIndices) const;
+
+  /// getStructLayout - Return a StructLayout object, indicating the alignment
+  /// of the struct, its size, and the offsets of its fields.  Note that this
+  /// information is lazily cached.
+  const StructLayout *getStructLayout(const StructType *Ty) const;
+
+  /// InvalidateStructLayoutInfo - TargetData speculatively caches StructLayout
+  /// objects.  If a TargetData object is alive when types are being refined and
+  /// removed, this method must be called whenever a StructType is removed to
+  /// avoid a dangling pointer in this cache.
+  void InvalidateStructLayoutInfo(const StructType *Ty) const;
+
+  /// getPreferredAlignment - Return the preferred alignment of the specified
+  /// global.  This includes an explicitly requested alignment (if the global
+  /// has one).
+  unsigned getPreferredAlignment(const GlobalVariable *GV) const;
+
+  /// getPreferredAlignmentLog - Return the preferred alignment of the
+  /// specified global, returned in log form.  This includes an explicitly
+  /// requested alignment (if the global has one).
+  unsigned getPreferredAlignmentLog(const GlobalVariable *GV) const;
+
+  /// RoundUpAlignment - Round the specified value up to the next alignment
+  /// boundary specified by Alignment.  For example, 7 rounded up to an
+  /// alignment boundary of 4 is 8.  8 rounded up to the alignment boundary of 4
+  /// is 8 because it is already aligned.
+  template <typename UIntTy>
+  static UIntTy RoundUpAlignment(UIntTy Val, unsigned Alignment) {
+    assert((Alignment & (Alignment-1)) == 0 && "Alignment must be power of 2!");
+    return (Val + (Alignment-1)) & ~UIntTy(Alignment-1);
+  }
+  
+  static char ID; // Pass identification, replacement for typeid
+};
+
+/// StructLayout - used to lazily calculate structure layout information for a
+/// target machine, based on the TargetData structure.
+///
+class StructLayout {
+  uint64_t StructSize;
+  unsigned StructAlignment;
+  unsigned NumElements;
+  uint64_t MemberOffsets[1];  // variable sized array!
+public:
+
+  uint64_t getSizeInBytes() const {
+    return StructSize;
+  }
+
+  uint64_t getSizeInBits() const {
+    return 8*StructSize;
+  }
+
+  unsigned getAlignment() const {
+    return StructAlignment;
+  }
+
+  /// getElementContainingOffset - Given a valid byte offset into the structure,
+  /// return the structure index that contains it.
+  ///
+  unsigned getElementContainingOffset(uint64_t Offset) const;
+
+  uint64_t getElementOffset(unsigned Idx) const {
+    assert(Idx < NumElements && "Invalid element idx!");
+    return MemberOffsets[Idx];
+  }
+
+  uint64_t getElementOffsetInBits(unsigned Idx) const {
+    return getElementOffset(Idx)*8;
+  }
+
+private:
+  friend class TargetData;   // Only TargetData can create this class
+  StructLayout(const StructType *ST, const TargetData &TD);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Target/TargetELFWriterInfo.h b/final/include/llvm/Target/TargetELFWriterInfo.h
new file mode 100644
index 00000000000..b97f3e2f4d0
--- /dev/null
+++ b/final/include/llvm/Target/TargetELFWriterInfo.h
@@ -0,0 +1,123 @@
+//===-- llvm/Target/TargetELFWriterInfo.h - ELF Writer Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TargetELFWriterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETELFWRITERINFO_H
+#define LLVM_TARGET_TARGETELFWRITERINFO_H
+
+namespace llvm {
+  class Function;
+  class TargetData;
+  class TargetMachine;
+
+  //===--------------------------------------------------------------------===//
+  //                          TargetELFWriterInfo
+  //===--------------------------------------------------------------------===//
+
+  class TargetELFWriterInfo {
+  protected:
+    // EMachine - This field is the target specific value to emit as the
+    // e_machine member of the ELF header.
+    unsigned short EMachine;
+    bool is64Bit, isLittleEndian;
+  public:
+
+    // Machine architectures
+    enum MachineType {
+      EM_NONE = 0,     // No machine
+      EM_M32 = 1,      // AT&T WE 32100
+      EM_SPARC = 2,    // SPARC
+      EM_386 = 3,      // Intel 386
+      EM_68K = 4,      // Motorola 68000
+      EM_88K = 5,      // Motorola 88000
+      EM_486 = 6,      // Intel 486 (deprecated)
+      EM_860 = 7,      // Intel 80860
+      EM_MIPS = 8,     // MIPS R3000
+      EM_PPC = 20,     // PowerPC
+      EM_ARM = 40,     // ARM
+      EM_ALPHA = 41,   // DEC Alpha
+      EM_SPARCV9 = 43, // SPARC V9
+      EM_X86_64 = 62   // AMD64
+    };
+
+    // ELF File classes
+    enum {
+      ELFCLASS32 = 1, // 32-bit object file
+      ELFCLASS64 = 2  // 64-bit object file
+    };
+
+    // ELF Endianess
+    enum {
+      ELFDATA2LSB = 1, // Little-endian object file
+      ELFDATA2MSB = 2  // Big-endian object file
+    };
+
+    explicit TargetELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
+    virtual ~TargetELFWriterInfo();
+
+    unsigned short getEMachine() const { return EMachine; }
+    unsigned getEFlags() const { return 0; }
+    unsigned getEIClass() const { return is64Bit ? ELFCLASS64 : ELFCLASS32; }
+    unsigned getEIData() const {
+      return isLittleEndian ? ELFDATA2LSB : ELFDATA2MSB;
+    }
+
+    /// ELF Header and ELF Section Header Info
+    unsigned getHdrSize() const { return is64Bit ? 64 : 52; }
+    unsigned getSHdrSize() const { return is64Bit ? 64 : 40; }
+
+    /// Symbol Table Info
+    unsigned getSymTabEntrySize() const { return is64Bit ? 24 : 16; }
+
+    /// getPrefELFAlignment - Returns the preferred alignment for ELF. This
+    /// is used to align some sections.
+    unsigned getPrefELFAlignment() const { return is64Bit ? 8 : 4; }
+
+    /// getRelocationEntrySize - Entry size used in the relocation section
+    unsigned getRelocationEntrySize() const {
+      return is64Bit ? (hasRelocationAddend() ? 24 : 16)
+                     : (hasRelocationAddend() ? 12 : 8);
+    }
+
+    /// getRelocationType - Returns the target specific ELF Relocation type.
+    /// 'MachineRelTy' contains the object code independent relocation type
+    virtual unsigned getRelocationType(unsigned MachineRelTy) const = 0;
+
+    /// hasRelocationAddend - True if the target uses an addend in the
+    /// ELF relocation entry.
+    virtual bool hasRelocationAddend() const = 0;
+
+    /// getDefaultAddendForRelTy - Gets the default addend value for a
+    /// relocation entry based on the target ELF relocation type.
+    virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+                                              long int Modifier = 0) const = 0;
+
+    /// getRelTySize - Returns the size of relocatable field in bits
+    virtual unsigned getRelocationTySize(unsigned RelTy) const = 0;
+
+    /// isPCRelativeRel - True if the relocation type is pc relative
+    virtual bool isPCRelativeRel(unsigned RelTy) const = 0;
+
+    /// getJumpTableRelocationTy - Returns the machine relocation type used
+    /// to reference a jumptable.
+    virtual unsigned getAbsoluteLabelMachineRelTy() const = 0;
+
+    /// computeRelocation - Some relocatable fields could be relocated
+    /// directly, avoiding the relocation symbol emission, compute the
+    /// final relocation value for this symbol.
+    virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+                                       unsigned RelTy) const = 0;
+  };
+
+} // end llvm namespace
+
+#endif // LLVM_TARGET_TARGETELFWRITERINFO_H
diff --git a/final/include/llvm/Target/TargetFrameLowering.h b/final/include/llvm/Target/TargetFrameLowering.h
new file mode 100644
index 00000000000..e104b1663fd
--- /dev/null
+++ b/final/include/llvm/Target/TargetFrameLowering.h
@@ -0,0 +1,196 @@
+//===-- llvm/Target/TargetFrameLowering.h ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface to describe the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETFRAMELOWERING_H
+#define LLVM_TARGET_TARGETFRAMELOWERING_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+#include <utility>
+#include <vector>
+
+namespace llvm {
+  class CalleeSavedInfo;
+  class MachineFunction;
+  class MachineBasicBlock;
+  class MachineMove;
+  class RegScavenger;
+
+/// Information about stack frame layout on the target.  It holds the direction
+/// of stack growth, the known stack alignment on entry to each function, and
+/// the offset to the locals area.
+///
+/// The offset to the local area is the offset from the stack pointer on
+/// function entry to the first location where function data (local variables,
+/// spill locations) can be stored.
+class TargetFrameLowering {
+public:
+  enum StackDirection {
+    StackGrowsUp,        // Adding to the stack increases the stack address
+    StackGrowsDown       // Adding to the stack decreases the stack address
+  };
+
+  // Maps a callee saved register to a stack slot with a fixed offset.
+  struct SpillSlot {
+    unsigned Reg;
+    int Offset; // Offset relative to stack pointer on function entry.
+  };
+private:
+  StackDirection StackDir;
+  unsigned StackAlignment;
+  unsigned TransientStackAlignment;
+  int LocalAreaOffset;
+public:
+  TargetFrameLowering(StackDirection D, unsigned StackAl, int LAO,
+                      unsigned TransAl = 1)
+    : StackDir(D), StackAlignment(StackAl), TransientStackAlignment(TransAl),
+      LocalAreaOffset(LAO) {}
+
+  virtual ~TargetFrameLowering();
+
+  // These methods return information that describes the abstract stack layout
+  // of the target machine.
+
+  /// getStackGrowthDirection - Return the direction the stack grows
+  ///
+  StackDirection getStackGrowthDirection() const { return StackDir; }
+
+  /// getStackAlignment - This method returns the number of bytes to which the
+  /// stack pointer must be aligned on entry to a function.  Typically, this
+  /// is the largest alignment for any data object in the target.
+  ///
+  unsigned getStackAlignment() const { return StackAlignment; }
+
+  /// getTransientStackAlignment - This method returns the number of bytes to
+  /// which the stack pointer must be aligned at all times, even between
+  /// calls.
+  ///
+  unsigned getTransientStackAlignment() const {
+    return TransientStackAlignment;
+  }
+
+  /// getOffsetOfLocalArea - This method returns the offset of the local area
+  /// from the stack pointer on entrance to a function.
+  ///
+  int getOffsetOfLocalArea() const { return LocalAreaOffset; }
+
+  /// getCalleeSavedSpillSlots - This method returns a pointer to an array of
+  /// pairs, that contains an entry for each callee saved register that must be
+  /// spilled to a particular stack location if it is spilled.
+  ///
+  /// Each entry in this array contains a <register,offset> pair, indicating the
+  /// fixed offset from the incoming stack pointer that each register should be
+  /// spilled at. If a register is not listed here, the code generator is
+  /// allowed to spill it anywhere it chooses.
+  ///
+  virtual const SpillSlot *
+  getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+    NumEntries = 0;
+    return 0;
+  }
+
+  /// targetHandlesStackFrameRounding - Returns true if the target is
+  /// responsible for rounding up the stack frame (probably at emitPrologue
+  /// time).
+  virtual bool targetHandlesStackFrameRounding() const {
+    return false;
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  virtual void emitPrologue(MachineFunction &MF) const = 0;
+  virtual void emitEpilogue(MachineFunction &MF,
+                            MachineBasicBlock &MBB) const = 0;
+
+  /// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee
+  /// saved registers and returns true if it isn't possible / profitable to do
+  /// so by issuing a series of store instructions via
+  /// storeRegToStackSlot(). Returns false otherwise.
+  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                         const TargetRegisterInfo *TRI) const {
+    return false;
+  }
+
+  /// restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee
+  /// saved registers and returns true if it isn't possible / profitable to do
+  /// so by issuing a series of load instructions via loadRegToStackSlot().
+  /// Returns false otherwise.
+  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+    return false;
+  }
+
+  /// hasFP - Return true if the specified function should have a dedicated
+  /// frame pointer register. For most targets this is true only if the function
+  /// has variable sized allocas or if frame pointer elimination is disabled.
+  virtual bool hasFP(const MachineFunction &MF) const = 0;
+
+  /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+  /// not required, we reserve argument space for call sites in the function
+  /// immediately on entry to the current function. This eliminates the need for
+  /// add/sub sp brackets around call sites. Returns true if the call frame is
+  /// included as part of the stack frame.
+  virtual bool hasReservedCallFrame(const MachineFunction &MF) const {
+    return !hasFP(MF);
+  }
+
+  /// canSimplifyCallFramePseudos - When possible, it's best to simplify the
+  /// call frame pseudo ops before doing frame index elimination. This is
+  /// possible only when frame index references between the pseudos won't
+  /// need adjusting for the call frame adjustments. Normally, that's true
+  /// if the function has a reserved call frame or a frame pointer. Some
+  /// targets (Thumb2, for example) may have more complicated criteria,
+  /// however, and can override this behavior.
+  virtual bool canSimplifyCallFramePseudos(const MachineFunction &MF) const {
+    return hasReservedCallFrame(MF) || hasFP(MF);
+  }
+
+  /// getInitialFrameState - Returns a list of machine moves that are assumed
+  /// on entry to all functions.  Note that LabelID is ignored (assumed to be
+  /// the beginning of the function.)
+  virtual void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+  /// getFrameIndexOffset - Returns the displacement from the frame register to
+  /// the stack frame of the specified index.
+  virtual int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+  /// getFrameIndexReference - This method should return the base register
+  /// and offset used to reference a frame index location. The offset is
+  /// returned directly, and the base register is returned via FrameReg.
+  virtual int getFrameIndexReference(const MachineFunction &MF, int FI,
+                                     unsigned &FrameReg) const;
+
+  /// processFunctionBeforeCalleeSavedScan - This method is called immediately
+  /// before PrologEpilogInserter scans the physical registers used to determine
+  /// what callee saved registers should be spilled. This method is optional.
+  virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                RegScavenger *RS = NULL) const {
+
+  }
+
+  /// processFunctionBeforeFrameFinalized - This method is called immediately
+  /// before the specified function's frame layout (MF.getFrameInfo()) is
+  /// finalized.  Once the frame is finalized, MO_FrameIndex operands are
+  /// replaced with direct constants.  This method is optional.
+  ///
+  virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Target/TargetInstrDesc.h b/final/include/llvm/Target/TargetInstrDesc.h
new file mode 100644
index 00000000000..8823d5a4d17
--- /dev/null
+++ b/final/include/llvm/Target/TargetInstrDesc.h
@@ -0,0 +1,513 @@
+//===-- llvm/Target/TargetInstrDesc.h - Instruction Descriptors -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TargetOperandInfo and TargetInstrDesc classes, which
+// are used to describe target instructions and their operands. 
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETINSTRDESC_H
+#define LLVM_TARGET_TARGETINSTRDESC_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class TargetRegisterClass;
+class TargetRegisterInfo;
+  
+//===----------------------------------------------------------------------===//
+// Machine Operand Flags and Description
+//===----------------------------------------------------------------------===//
+  
+namespace TOI {
+  // Operand constraints
+  enum OperandConstraint {
+    TIED_TO = 0,    // Must be allocated the same register as.
+    EARLY_CLOBBER   // Operand is an early clobber register operand
+  };
+  
+  /// OperandFlags - These are flags set on operands, but should be considered
+  /// private, all access should go through the TargetOperandInfo accessors.
+  /// See the accessors for a description of what these are.
+  enum OperandFlags {
+    LookupPtrRegClass = 0,
+    Predicate,
+    OptionalDef
+  };
+}
+
+/// TargetOperandInfo - This holds information about one operand of a machine
+/// instruction, indicating the register class for register operands, etc.
+///
+class TargetOperandInfo {
+public:
+  /// RegClass - This specifies the register class enumeration of the operand 
+  /// if the operand is a register.  If isLookupPtrRegClass is set, then this is
+  /// an index that is passed to TargetRegisterInfo::getPointerRegClass(x) to
+  /// get a dynamic register class.
+  ///
+  /// NOTE: This member should be considered to be private, all access should go
+  /// through "getRegClass(TRI)" below.
+  short RegClass;
+  
+  /// Flags - These are flags from the TOI::OperandFlags enum.
+  unsigned short Flags;
+  
+  /// Lower 16 bits are used to specify which constraints are set. The higher 16
+  /// bits are used to specify the value of constraints (4 bits each).
+  unsigned Constraints;
+  /// Currently no other information.
+  
+  /// getRegClass - Get the register class for the operand, handling resolution
+  /// of "symbolic" pointer register classes etc.  If this is not a register
+  /// operand, this returns null.
+  const TargetRegisterClass *getRegClass(const TargetRegisterInfo *TRI) const;
+  
+  
+  /// isLookupPtrRegClass - Set if this operand is a pointer value and it
+  /// requires a callback to look up its register class.
+  bool isLookupPtrRegClass() const { return Flags&(1 <<TOI::LookupPtrRegClass);}
+  
+  /// isPredicate - Set if this is one of the operands that made up of
+  /// the predicate operand that controls an isPredicable() instruction.
+  bool isPredicate() const { return Flags & (1 << TOI::Predicate); }
+  
+  /// isOptionalDef - Set if this operand is a optional def.
+  ///
+  bool isOptionalDef() const { return Flags & (1 << TOI::OptionalDef); }
+};
+
+  
+//===----------------------------------------------------------------------===//
+// Machine Instruction Flags and Description
+//===----------------------------------------------------------------------===//
+
+/// TargetInstrDesc flags - These should be considered private to the
+/// implementation of the TargetInstrDesc class.  Clients should use the
+/// predicate methods on TargetInstrDesc, not use these directly.  These
+/// all correspond to bitfields in the TargetInstrDesc::Flags field.
+namespace TID {
+  enum {
+    Variadic = 0,
+    HasOptionalDef,
+    Return,
+    Call,
+    Barrier,
+    Terminator,
+    Branch,
+    IndirectBranch,
+    Compare,
+    MoveImm,
+    DelaySlot,
+    FoldableAsLoad,
+    MayLoad,
+    MayStore,
+    Predicable,
+    NotDuplicable,
+    UnmodeledSideEffects,
+    Commutable,
+    ConvertibleTo3Addr,
+    UsesCustomInserter,
+    Rematerializable,
+    CheapAsAMove,
+    ExtraSrcRegAllocReq,
+    ExtraDefRegAllocReq
+  };
+}
+
+/// TargetInstrDesc - Describe properties that are true of each
+/// instruction in the target description file.  This captures information about
+/// side effects, register use and many other things.  There is one instance of
+/// this struct for each target instruction class, and the MachineInstr class
+/// points to this struct directly to describe itself.
+class TargetInstrDesc {
+public:
+  unsigned short  Opcode;        // The opcode number
+  unsigned short  NumOperands;   // Num of args (may be more if variable_ops)
+  unsigned short  NumDefs;       // Num of args that are definitions
+  unsigned short  SchedClass;    // enum identifying instr sched class
+  const char *    Name;          // Name of the instruction record in td file
+  unsigned        Flags;         // Flags identifying machine instr class
+  uint64_t        TSFlags;       // Target Specific Flag values
+  const unsigned *ImplicitUses;  // Registers implicitly read by this instr
+  const unsigned *ImplicitDefs;  // Registers implicitly defined by this instr
+  const TargetRegisterClass **RCBarriers; // Reg classes completely "clobbered"
+  const TargetOperandInfo *OpInfo; // 'NumOperands' entries about operands
+
+  /// getOperandConstraint - Returns the value of the specific constraint if
+  /// it is set. Returns -1 if it is not set.
+  int getOperandConstraint(unsigned OpNum,
+                           TOI::OperandConstraint Constraint) const {
+    if (OpNum < NumOperands &&
+        (OpInfo[OpNum].Constraints & (1 << Constraint))) {
+      unsigned Pos = 16 + Constraint * 4;
+      return (int)(OpInfo[OpNum].Constraints >> Pos) & 0xf;
+    }
+    return -1;
+  }
+
+  /// getRegClass - Returns the register class constraint for OpNum, or NULL.
+  const TargetRegisterClass *getRegClass(unsigned OpNum,
+                                         const TargetRegisterInfo *TRI) const {
+    return OpNum < NumOperands ? OpInfo[OpNum].getRegClass(TRI) : 0;
+  }
+
+  /// getOpcode - Return the opcode number for this descriptor.
+  unsigned getOpcode() const {
+    return Opcode;
+  }
+  
+  /// getName - Return the name of the record in the .td file for this
+  /// instruction, for example "ADD8ri".
+  const char *getName() const {
+    return Name;
+  }
+  
+  /// getNumOperands - Return the number of declared MachineOperands for this
+  /// MachineInstruction.  Note that variadic (isVariadic() returns true)
+  /// instructions may have additional operands at the end of the list, and note
+  /// that the machine instruction may include implicit register def/uses as
+  /// well.
+  unsigned getNumOperands() const {
+    return NumOperands;
+  }
+  
+  /// getNumDefs - Return the number of MachineOperands that are register
+  /// definitions.  Register definitions always occur at the start of the 
+  /// machine operand list.  This is the number of "outs" in the .td file,
+  /// and does not include implicit defs.
+  unsigned getNumDefs() const {
+    return NumDefs;
+  }
+  
+  /// isVariadic - Return true if this instruction can have a variable number of
+  /// operands.  In this case, the variable operands will be after the normal
+  /// operands but before the implicit definitions and uses (if any are
+  /// present).
+  bool isVariadic() const {
+    return Flags & (1 << TID::Variadic);
+  }
+  
+  /// hasOptionalDef - Set if this instruction has an optional definition, e.g.
+  /// ARM instructions which can set condition code if 's' bit is set.
+  bool hasOptionalDef() const {
+    return Flags & (1 << TID::HasOptionalDef);
+  }
+  
+  /// getImplicitUses - Return a list of registers that are potentially
+  /// read by any instance of this machine instruction.  For example, on X86,
+  /// the "adc" instruction adds two register operands and adds the carry bit in
+  /// from the flags register.  In this case, the instruction is marked as
+  /// implicitly reading the flags.  Likewise, the variable shift instruction on
+  /// X86 is marked as implicitly reading the 'CL' register, which it always
+  /// does.
+  ///
+  /// This method returns null if the instruction has no implicit uses.
+  const unsigned *getImplicitUses() const {
+    return ImplicitUses;
+  }
+  
+  /// getNumImplicitUses - Return the number of implicit uses this instruction
+  /// has.
+  unsigned getNumImplicitUses() const {
+    if (ImplicitUses == 0) return 0;
+    unsigned i = 0;
+    for (; ImplicitUses[i]; ++i) /*empty*/;
+    return i;
+  }
+  
+  
+  /// getImplicitDefs - Return a list of registers that are potentially
+  /// written by any instance of this machine instruction.  For example, on X86,
+  /// many instructions implicitly set the flags register.  In this case, they
+  /// are marked as setting the FLAGS.  Likewise, many instructions always
+  /// deposit their result in a physical register.  For example, the X86 divide
+  /// instruction always deposits the quotient and remainder in the EAX/EDX
+  /// registers.  For that instruction, this will return a list containing the
+  /// EAX/EDX/EFLAGS registers.
+  ///
+  /// This method returns null if the instruction has no implicit defs.
+  const unsigned *getImplicitDefs() const {
+    return ImplicitDefs;
+  }
+  
+  /// getNumImplicitDefs - Return the number of implicit defs this instruction
+  /// has.
+  unsigned getNumImplicitDefs() const {
+    if (ImplicitDefs == 0) return 0;
+    unsigned i = 0;
+    for (; ImplicitDefs[i]; ++i) /*empty*/;
+    return i;
+  }
+  
+  /// hasImplicitUseOfPhysReg - Return true if this instruction implicitly
+  /// uses the specified physical register.
+  bool hasImplicitUseOfPhysReg(unsigned Reg) const {
+    if (const unsigned *ImpUses = ImplicitUses)
+      for (; *ImpUses; ++ImpUses)
+        if (*ImpUses == Reg) return true;
+    return false;
+  }
+  
+  /// hasImplicitDefOfPhysReg - Return true if this instruction implicitly
+  /// defines the specified physical register.
+  bool hasImplicitDefOfPhysReg(unsigned Reg) const {
+    if (const unsigned *ImpDefs = ImplicitDefs)
+      for (; *ImpDefs; ++ImpDefs)
+        if (*ImpDefs == Reg) return true;
+    return false;
+  }
+
+  /// getRegClassBarriers - Return a list of register classes that are
+  /// completely clobbered by this machine instruction. For example, on X86
+  /// the call instructions will completely clobber all the registers in the
+  /// fp stack and XMM classes.
+  ///
+  /// This method returns null if the instruction doesn't completely clobber
+  /// any register class.
+  const TargetRegisterClass **getRegClassBarriers() const {
+    return RCBarriers;
+  }
+
+  /// getSchedClass - Return the scheduling class for this instruction.  The
+  /// scheduling class is an index into the InstrItineraryData table.  This
+  /// returns zero if there is no known scheduling information for the
+  /// instruction.
+  ///
+  unsigned getSchedClass() const {
+    return SchedClass;
+  }
+  
+  bool isReturn() const {
+    return Flags & (1 << TID::Return);
+  }
+  
+  bool isCall() const {
+    return Flags & (1 << TID::Call);
+  }
+  
+  /// isBarrier - Returns true if the specified instruction stops control flow
+  /// from executing the instruction immediately following it.  Examples include
+  /// unconditional branches and return instructions.
+  bool isBarrier() const {
+    return Flags & (1 << TID::Barrier);
+  }
+  
+  /// isTerminator - Returns true if this instruction part of the terminator for
+  /// a basic block.  Typically this is things like return and branch
+  /// instructions.
+  ///
+  /// Various passes use this to insert code into the bottom of a basic block,
+  /// but before control flow occurs.
+  bool isTerminator() const {
+    return Flags & (1 << TID::Terminator);
+  }
+  
+  /// isBranch - Returns true if this is a conditional, unconditional, or
+  /// indirect branch.  Predicates below can be used to discriminate between
+  /// these cases, and the TargetInstrInfo::AnalyzeBranch method can be used to
+  /// get more information.
+  bool isBranch() const {
+    return Flags & (1 << TID::Branch);
+  }
+
+  /// isIndirectBranch - Return true if this is an indirect branch, such as a
+  /// branch through a register.
+  bool isIndirectBranch() const {
+    return Flags & (1 << TID::IndirectBranch);
+  }
+
+  /// isConditionalBranch - Return true if this is a branch which may fall
+  /// through to the next instruction or may transfer control flow to some other
+  /// block.  The TargetInstrInfo::AnalyzeBranch method can be used to get more
+  /// information about this branch.
+  bool isConditionalBranch() const {
+    return isBranch() & !isBarrier() & !isIndirectBranch();
+  }
+  
+  /// isUnconditionalBranch - Return true if this is a branch which always
+  /// transfers control flow to some other block.  The
+  /// TargetInstrInfo::AnalyzeBranch method can be used to get more information
+  /// about this branch.
+  bool isUnconditionalBranch() const {
+    return isBranch() & isBarrier() & !isIndirectBranch();
+  }
+  
+  // isPredicable - Return true if this instruction has a predicate operand that
+  // controls execution.  It may be set to 'always', or may be set to other
+  /// values.   There are various methods in TargetInstrInfo that can be used to
+  /// control and modify the predicate in this instruction.
+  bool isPredicable() const {
+    return Flags & (1 << TID::Predicable);
+  }
+  
+  /// isCompare - Return true if this instruction is a comparison.
+  bool isCompare() const {
+    return Flags & (1 << TID::Compare);
+  }
+  
+  /// isMoveImmediate - Return true if this instruction is a move immediate
+  /// (including conditional moves) instruction. 
+  bool isMoveImmediate() const {
+    return Flags & (1 << TID::MoveImm);
+  }
+  
+  /// isNotDuplicable - Return true if this instruction cannot be safely
+  /// duplicated.  For example, if the instruction has a unique labels attached
+  /// to it, duplicating it would cause multiple definition errors.
+  bool isNotDuplicable() const {
+    return Flags & (1 << TID::NotDuplicable);
+  }
+  
+  /// hasDelaySlot - Returns true if the specified instruction has a delay slot
+  /// which must be filled by the code generator.
+  bool hasDelaySlot() const {
+    return Flags & (1 << TID::DelaySlot);
+  }
+  
+  /// canFoldAsLoad - Return true for instructions that can be folded as
+  /// memory operands in other instructions. The most common use for this
+  /// is instructions that are simple loads from memory that don't modify
+  /// the loaded value in any way, but it can also be used for instructions
+  /// that can be expressed as constant-pool loads, such as V_SETALLONES
+  /// on x86, to allow them to be folded when it is beneficial.
+  /// This should only be set on instructions that return a value in their
+  /// only virtual register definition.
+  bool canFoldAsLoad() const {
+    return Flags & (1 << TID::FoldableAsLoad);
+  }
+  
+  //===--------------------------------------------------------------------===//
+  // Side Effect Analysis
+  //===--------------------------------------------------------------------===//
+
+  /// mayLoad - Return true if this instruction could possibly read memory.
+  /// Instructions with this flag set are not necessarily simple load
+  /// instructions, they may load a value and modify it, for example.
+  bool mayLoad() const {
+    return Flags & (1 << TID::MayLoad);
+  }
+  
+  
+  /// mayStore - Return true if this instruction could possibly modify memory.
+  /// Instructions with this flag set are not necessarily simple store
+  /// instructions, they may store a modified value based on their operands, or
+  /// may not actually modify anything, for example.
+  bool mayStore() const {
+    return Flags & (1 << TID::MayStore);
+  }
+  
+  /// hasUnmodeledSideEffects - Return true if this instruction has side
+  /// effects that are not modeled by other flags.  This does not return true
+  /// for instructions whose effects are captured by:
+  ///
+  ///  1. Their operand list and implicit definition/use list.  Register use/def
+  ///     info is explicit for instructions.
+  ///  2. Memory accesses.  Use mayLoad/mayStore.
+  ///  3. Calling, branching, returning: use isCall/isReturn/isBranch.
+  ///
+  /// Examples of side effects would be modifying 'invisible' machine state like
+  /// a control register, flushing a cache, modifying a register invisible to
+  /// LLVM, etc.
+  ///
+  bool hasUnmodeledSideEffects() const {
+    return Flags & (1 << TID::UnmodeledSideEffects);
+  }
+  
+  //===--------------------------------------------------------------------===//
+  // Flags that indicate whether an instruction can be modified by a method.
+  //===--------------------------------------------------------------------===//
+  
+  /// isCommutable - Return true if this may be a 2- or 3-address
+  /// instruction (of the form "X = op Y, Z, ..."), which produces the same
+  /// result if Y and Z are exchanged.  If this flag is set, then the 
+  /// TargetInstrInfo::commuteInstruction method may be used to hack on the
+  /// instruction.
+  ///
+  /// Note that this flag may be set on instructions that are only commutable
+  /// sometimes.  In these cases, the call to commuteInstruction will fail.
+  /// Also note that some instructions require non-trivial modification to
+  /// commute them.
+  bool isCommutable() const {
+    return Flags & (1 << TID::Commutable);
+  }
+  
+  /// isConvertibleTo3Addr - Return true if this is a 2-address instruction
+  /// which can be changed into a 3-address instruction if needed.  Doing this
+  /// transformation can be profitable in the register allocator, because it
+  /// means that the instruction can use a 2-address form if possible, but
+  /// degrade into a less efficient form if the source and dest register cannot
+  /// be assigned to the same register.  For example, this allows the x86
+  /// backend to turn a "shl reg, 3" instruction into an LEA instruction, which
+  /// is the same speed as the shift but has bigger code size.
+  ///
+  /// If this returns true, then the target must implement the
+  /// TargetInstrInfo::convertToThreeAddress method for this instruction, which
+  /// is allowed to fail if the transformation isn't valid for this specific
+  /// instruction (e.g. shl reg, 4 on x86).
+  ///
+  bool isConvertibleTo3Addr() const {
+    return Flags & (1 << TID::ConvertibleTo3Addr);
+  }
+  
+  /// usesCustomInsertionHook - Return true if this instruction requires
+  /// custom insertion support when the DAG scheduler is inserting it into a
+  /// machine basic block.  If this is true for the instruction, it basically
+  /// means that it is a pseudo instruction used at SelectionDAG time that is 
+  /// expanded out into magic code by the target when MachineInstrs are formed.
+  ///
+  /// If this is true, the TargetLoweringInfo::InsertAtEndOfBasicBlock method
+  /// is used to insert this into the MachineBasicBlock.
+  bool usesCustomInsertionHook() const {
+    return Flags & (1 << TID::UsesCustomInserter);
+  }
+  
+  /// isRematerializable - Returns true if this instruction is a candidate for
+  /// remat.  This flag is deprecated, please don't use it anymore.  If this
+  /// flag is set, the isReallyTriviallyReMaterializable() method is called to
+  /// verify the instruction is really rematable.
+  bool isRematerializable() const {
+    return Flags & (1 << TID::Rematerializable);
+  }
+
+  /// isAsCheapAsAMove - Returns true if this instruction has the same cost (or
+  /// less) than a move instruction. This is useful during certain types of
+  /// optimizations (e.g., remat during two-address conversion or machine licm)
+  /// where we would like to remat or hoist the instruction, but not if it costs
+  /// more than moving the instruction into the appropriate register. Note, we
+  /// are not marking copies from and to the same register class with this flag.
+  bool isAsCheapAsAMove() const {
+    return Flags & (1 << TID::CheapAsAMove);
+  }
+
+  /// hasExtraSrcRegAllocReq - Returns true if this instruction source operands
+  /// have special register allocation requirements that are not captured by the
+  /// operand register classes. e.g. ARM::STRD's two source registers must be an
+  /// even / odd pair, ARM::STM registers have to be in ascending order.
+  /// Post-register allocation passes should not attempt to change allocations
+  /// for sources of instructions with this flag.
+  bool hasExtraSrcRegAllocReq() const {
+    return Flags & (1 << TID::ExtraSrcRegAllocReq);
+  }
+
+  /// hasExtraDefRegAllocReq - Returns true if this instruction def operands
+  /// have special register allocation requirements that are not captured by the
+  /// operand register classes. e.g. ARM::LDRD's two def registers must be an
+  /// even / odd pair, ARM::LDM registers have to be in ascending order.
+  /// Post-register allocation passes should not attempt to change allocations
+  /// for definitions of instructions with this flag.
+  bool hasExtraDefRegAllocReq() const {
+    return Flags & (1 << TID::ExtraDefRegAllocReq);
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Target/TargetInstrInfo.h b/final/include/llvm/Target/TargetInstrInfo.h
new file mode 100644
index 00000000000..c903f3153e1
--- /dev/null
+++ b/final/include/llvm/Target/TargetInstrInfo.h
@@ -0,0 +1,713 @@
+//===-- llvm/Target/TargetInstrInfo.h - Instruction Info --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the target machine instruction set to the code generator.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETINSTRINFO_H
+#define LLVM_TARGET_TARGETINSTRINFO_H
+
+#include "llvm/Target/TargetInstrDesc.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class InstrItineraryData;
+class LiveVariables;
+class MCAsmInfo;
+class MachineMemOperand;
+class MachineRegisterInfo;
+class MDNode;
+class MCInst;
+class SDNode;
+class ScheduleHazardRecognizer;
+class SelectionDAG;
+class ScheduleDAG;
+class TargetRegisterClass;
+class TargetRegisterInfo;
+
+template<class T> class SmallVectorImpl;
+
+
+//---------------------------------------------------------------------------
+///
+/// TargetInstrInfo - Interface to description of machine instruction set
+///
+class TargetInstrInfo {
+  const TargetInstrDesc *Descriptors; // Raw array to allow static init'n
+  unsigned NumOpcodes;                // Number of entries in the desc array
+
+  TargetInstrInfo(const TargetInstrInfo &);  // DO NOT IMPLEMENT
+  void operator=(const TargetInstrInfo &);   // DO NOT IMPLEMENT
+public:
+  TargetInstrInfo(const TargetInstrDesc *desc, unsigned NumOpcodes);
+  virtual ~TargetInstrInfo();
+
+  unsigned getNumOpcodes() const { return NumOpcodes; }
+
+  /// get - Return the machine instruction descriptor that corresponds to the
+  /// specified instruction opcode.
+  ///
+  const TargetInstrDesc &get(unsigned Opcode) const {
+    assert(Opcode < NumOpcodes && "Invalid opcode!");
+    return Descriptors[Opcode];
+  }
+
+  /// isTriviallyReMaterializable - Return true if the instruction is trivially
+  /// rematerializable, meaning it has no side effects and requires no operands
+  /// that aren't always available.
+  bool isTriviallyReMaterializable(const MachineInstr *MI,
+                                   AliasAnalysis *AA = 0) const {
+    return MI->getOpcode() == TargetOpcode::IMPLICIT_DEF ||
+           (MI->getDesc().isRematerializable() &&
+            (isReallyTriviallyReMaterializable(MI, AA) ||
+             isReallyTriviallyReMaterializableGeneric(MI, AA)));
+  }
+
+protected:
+  /// isReallyTriviallyReMaterializable - For instructions with opcodes for
+  /// which the M_REMATERIALIZABLE flag is set, this hook lets the target
+  /// specify whether the instruction is actually trivially rematerializable,
+  /// taking into consideration its operands. This predicate must return false
+  /// if the instruction has any side effects other than producing a value, or
+  /// if it requres any address registers that are not always available.
+  virtual bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
+                                                 AliasAnalysis *AA) const {
+    return false;
+  }
+
+private:
+  /// isReallyTriviallyReMaterializableGeneric - For instructions with opcodes
+  /// for which the M_REMATERIALIZABLE flag is set and the target hook
+  /// isReallyTriviallyReMaterializable returns false, this function does
+  /// target-independent tests to determine if the instruction is really
+  /// trivially rematerializable.
+  bool isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
+                                                AliasAnalysis *AA) const;
+
+public:
+  /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
+  /// extension instruction. That is, it's like a copy where it's legal for the
+  /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
+  /// true, then it's expected the pre-extension value is available as a subreg
+  /// of the result register. This also returns the sub-register index in
+  /// SubIdx.
+  virtual bool isCoalescableExtInstr(const MachineInstr &MI,
+                                     unsigned &SrcReg, unsigned &DstReg,
+                                     unsigned &SubIdx) const {
+    return false;
+  }
+
+  /// isLoadFromStackSlot - If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const {
+    return 0;
+  }
+
+  /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
+  /// stack locations as well.  This uses a heuristic so it isn't
+  /// reliable for correctness.
+  virtual unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+                                             int &FrameIndex) const {
+    return 0;
+  }
+
+  /// hasLoadFromStackSlot - If the specified machine instruction has
+  /// a load from a stack slot, return true along with the FrameIndex
+  /// of the loaded stack slot and the machine mem operand containing
+  /// the reference.  If not, return false.  Unlike
+  /// isLoadFromStackSlot, this returns true for any instructions that
+  /// loads from the stack.  This is just a hint, as some cases may be
+  /// missed.
+  virtual bool hasLoadFromStackSlot(const MachineInstr *MI,
+                                    const MachineMemOperand *&MMO,
+                                    int &FrameIndex) const {
+    return 0;
+  }
+
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const {
+    return 0;
+  }
+
+  /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
+  /// stack locations as well.  This uses a heuristic so it isn't
+  /// reliable for correctness.
+  virtual unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+                                            int &FrameIndex) const {
+    return 0;
+  }
+
+  /// hasStoreToStackSlot - If the specified machine instruction has a
+  /// store to a stack slot, return true along with the FrameIndex of
+  /// the loaded stack slot and the machine mem operand containing the
+  /// reference.  If not, return false.  Unlike isStoreToStackSlot,
+  /// this returns true for any instructions that stores to the
+  /// stack.  This is just a hint, as some cases may be missed.
+  virtual bool hasStoreToStackSlot(const MachineInstr *MI,
+                                   const MachineMemOperand *&MMO,
+                                   int &FrameIndex) const {
+    return 0;
+  }
+
+  /// reMaterialize - Re-issue the specified 'original' instruction at the
+  /// specific location targeting a new destination register.
+  /// The register in Orig->getOperand(0).getReg() will be substituted by
+  /// DestReg:SubIdx. Any existing subreg index is preserved or composed with
+  /// SubIdx.
+  virtual void reMaterialize(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator MI,
+                             unsigned DestReg, unsigned SubIdx,
+                             const MachineInstr *Orig,
+                             const TargetRegisterInfo &TRI) const = 0;
+
+  /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+  /// two-addrss instruction inserted by two-address pass.
+  virtual void scheduleTwoAddrSource(MachineInstr *SrcMI,
+                                     MachineInstr *UseMI,
+                                     const TargetRegisterInfo &TRI) const {
+    // Do nothing.
+  }
+
+  /// duplicate - Create a duplicate of the Orig instruction in MF. This is like
+  /// MachineFunction::CloneMachineInstr(), but the target may update operands
+  /// that are required to be unique.
+  ///
+  /// The instruction must be duplicable as indicated by isNotDuplicable().
+  virtual MachineInstr *duplicate(MachineInstr *Orig,
+                                  MachineFunction &MF) const = 0;
+
+  /// convertToThreeAddress - This method must be implemented by targets that
+  /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
+  /// may be able to convert a two-address instruction into one or more true
+  /// three-address instructions on demand.  This allows the X86 target (for
+  /// example) to convert ADD and SHL instructions into LEA instructions if they
+  /// would require register copies due to two-addressness.
+  ///
+  /// This method returns a null pointer if the transformation cannot be
+  /// performed, otherwise it returns the last new instruction.
+  ///
+  virtual MachineInstr *
+  convertToThreeAddress(MachineFunction::iterator &MFI,
+                   MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const {
+    return 0;
+  }
+
+  /// commuteInstruction - If a target has any instructions that are
+  /// commutable but require converting to different instructions or making
+  /// non-trivial changes to commute them, this method can overloaded to do
+  /// that.  The default implementation simply swaps the commutable operands.
+  /// If NewMI is false, MI is modified in place and returned; otherwise, a
+  /// new machine instruction is created and returned.  Do not call this
+  /// method for a non-commutable instruction, but there may be some cases
+  /// where this method fails and returns null.
+  virtual MachineInstr *commuteInstruction(MachineInstr *MI,
+                                           bool NewMI = false) const = 0;
+
+  /// findCommutedOpIndices - If specified MI is commutable, return the two
+  /// operand indices that would swap value. Return false if the instruction
+  /// is not in a form which this routine understands.
+  virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+                                     unsigned &SrcOpIdx2) const = 0;
+
+  /// produceSameValue - Return true if two machine instructions would produce
+  /// identical values. By default, this is only true when the two instructions
+  /// are deemed identical except for defs. If this function is called when the
+  /// IR is still in SSA form, the caller can pass the MachineRegisterInfo for
+  /// aggressive checks.
+  virtual bool produceSameValue(const MachineInstr *MI0,
+                                const MachineInstr *MI1,
+                                const MachineRegisterInfo *MRI = 0) const = 0;
+
+  /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
+  /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+  /// implemented for a target).  Upon success, this returns false and returns
+  /// with the following information in various cases:
+  ///
+  /// 1. If this block ends with no branches (it just falls through to its succ)
+  ///    just return false, leaving TBB/FBB null.
+  /// 2. If this block ends with only an unconditional branch, it sets TBB to be
+  ///    the destination block.
+  /// 3. If this block ends with a conditional branch and it falls through to a
+  ///    successor block, it sets TBB to be the branch destination block and a
+  ///    list of operands that evaluate the condition. These operands can be
+  ///    passed to other TargetInstrInfo methods to create new branches.
+  /// 4. If this block ends with a conditional branch followed by an
+  ///    unconditional branch, it returns the 'true' destination in TBB, the
+  ///    'false' destination in FBB, and a list of operands that evaluate the
+  ///    condition.  These operands can be passed to other TargetInstrInfo
+  ///    methods to create new branches.
+  ///
+  /// Note that RemoveBranch and InsertBranch must be implemented to support
+  /// cases where this method returns success.
+  ///
+  /// If AllowModify is true, then this routine is allowed to modify the basic
+  /// block (e.g. delete instructions after the unconditional branch).
+  ///
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify = false) const {
+    return true;
+  }
+
+  /// RemoveBranch - Remove the branching code at the end of the specific MBB.
+  /// This is only invoked in cases where AnalyzeBranch returns success. It
+  /// returns the number of instructions that were removed.
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const {
+    assert(0 && "Target didn't implement TargetInstrInfo::RemoveBranch!");
+    return 0;
+  }
+
+  /// InsertBranch - Insert branch code into the end of the specified
+  /// MachineBasicBlock.  The operands to this method are the same as those
+  /// returned by AnalyzeBranch.  This is only invoked in cases where
+  /// AnalyzeBranch returns success. It returns the number of instructions
+  /// inserted.
+  ///
+  /// It is also invoked by tail merging to add unconditional branches in
+  /// cases where AnalyzeBranch doesn't apply because there was no original
+  /// branch to analyze.  At least this much must be implemented, else tail
+  /// merging needs to be disabled.
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const {
+    assert(0 && "Target didn't implement TargetInstrInfo::InsertBranch!");
+    return 0;
+  }
+
+  /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+  /// after it, replacing it with an unconditional branch to NewDest. This is
+  /// used by the tail merging pass.
+  virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+                                       MachineBasicBlock *NewDest) const = 0;
+
+  /// isLegalToSplitMBBAt - Return true if it's legal to split the given basic
+  /// block at the specified instruction (i.e. instruction would be the start
+  /// of a new basic block).
+  virtual bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI) const {
+    return true;
+  }
+
+  /// isProfitableToIfCvt - Return true if it's profitable to predicate
+  /// instructions with accumulated instruction latency of "NumCycles"
+  /// of the specified basic block, where the probability of the instructions
+  /// being executed is given by Probability, and Confidence is a measure
+  /// of our confidence that it will be properly predicted.
+  virtual
+  bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+                           unsigned ExtraPredCycles,
+                           float Probability, float Confidence) const {
+    return false;
+  }
+
+  /// isProfitableToIfCvt - Second variant of isProfitableToIfCvt, this one
+  /// checks for the case where two basic blocks from true and false path
+  /// of a if-then-else (diamond) are predicated on mutally exclusive
+  /// predicates, where the probability of the true path being taken is given
+  /// by Probability, and Confidence is a measure of our confidence that it
+  /// will be properly predicted.
+  virtual bool
+  isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                      unsigned NumTCycles, unsigned ExtraTCycles,
+                      MachineBasicBlock &FMBB,
+                      unsigned NumFCycles, unsigned ExtraFCycles,
+                      float Probability, float Confidence) const {
+    return false;
+  }
+
+  /// isProfitableToDupForIfCvt - Return true if it's profitable for
+  /// if-converter to duplicate instructions of specified accumulated
+  /// instruction latencies in the specified MBB to enable if-conversion.
+  /// The probability of the instructions being executed is given by
+  /// Probability, and Confidence is a measure of our confidence that it
+  /// will be properly predicted.
+  virtual bool
+  isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
+                            float Probability, float Confidence) const {
+    return false;
+  }
+
+  /// copyPhysReg - Emit instructions to copy a pair of physical registers.
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const {
+    assert(0 && "Target didn't implement TargetInstrInfo::copyPhysReg!");
+  }
+
+  /// storeRegToStackSlot - Store the specified register of the given register
+  /// class to the specified stack frame index. The store instruction is to be
+  /// added to the given machine basic block before the specified machine
+  /// instruction. If isKill is true, the register operand is the last use and
+  /// must be marked kill.
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const {
+  assert(0 && "Target didn't implement TargetInstrInfo::storeRegToStackSlot!");
+  }
+
+  /// loadRegFromStackSlot - Load the specified register of the given register
+  /// class from the specified stack frame index. The load instruction is to be
+  /// added to the given machine basic block before the specified machine
+  /// instruction.
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const {
+  assert(0 && "Target didn't implement TargetInstrInfo::loadRegFromStackSlot!");
+  }
+
+  /// emitFrameIndexDebugValue - Emit a target-dependent form of
+  /// DBG_VALUE encoding the address of a frame index.  Addresses would
+  /// normally be lowered the same way as other addresses on the target,
+  /// e.g. in load instructions.  For targets that do not support this
+  /// the debug info is simply lost.
+  /// If you add this for a target you should handle this DBG_VALUE in the
+  /// target-specific AsmPrinter code as well; you will probably get invalid
+  /// assembly output if you don't.
+  virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx,
+                                                 uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc dl) const {
+    return 0;
+  }
+
+  /// foldMemoryOperand - Attempt to fold a load or store of the specified stack
+  /// slot into the specified machine instruction for the specified operand(s).
+  /// If this is possible, a new instruction is returned with the specified
+  /// operand folded, otherwise NULL is returned.
+  /// The new instruction is inserted before MI, and the client is responsible
+  /// for removing the old instruction.
+  MachineInstr* foldMemoryOperand(MachineBasicBlock::iterator MI,
+                                  const SmallVectorImpl<unsigned> &Ops,
+                                  int FrameIndex) const;
+
+  /// foldMemoryOperand - Same as the previous version except it allows folding
+  /// of any load and store from / to any address, not just from a specific
+  /// stack slot.
+  MachineInstr* foldMemoryOperand(MachineBasicBlock::iterator MI,
+                                  const SmallVectorImpl<unsigned> &Ops,
+                                  MachineInstr* LoadMI) const;
+
+protected:
+  /// foldMemoryOperandImpl - Target-dependent implementation for
+  /// foldMemoryOperand. Target-independent code in foldMemoryOperand will
+  /// take care of adding a MachineMemOperand to the newly created instruction.
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                          MachineInstr* MI,
+                                          const SmallVectorImpl<unsigned> &Ops,
+                                          int FrameIndex) const {
+    return 0;
+  }
+
+  /// foldMemoryOperandImpl - Target-dependent implementation for
+  /// foldMemoryOperand. Target-independent code in foldMemoryOperand will
+  /// take care of adding a MachineMemOperand to the newly created instruction.
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                          const SmallVectorImpl<unsigned> &Ops,
+                                              MachineInstr* LoadMI) const {
+    return 0;
+  }
+
+public:
+  /// canFoldMemoryOperand - Returns true for the specified load / store if
+  /// folding is possible.
+  virtual
+  bool canFoldMemoryOperand(const MachineInstr *MI,
+                            const SmallVectorImpl<unsigned> &Ops) const =0;
+
+  /// unfoldMemoryOperand - Separate a single instruction which folded a load or
+  /// a store or a load and a store into two or more instruction. If this is
+  /// possible, returns true as well as the new instructions by reference.
+  virtual bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+                                unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+                                 SmallVectorImpl<MachineInstr*> &NewMIs) const{
+    return false;
+  }
+
+  virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+                                   SmallVectorImpl<SDNode*> &NewNodes) const {
+    return false;
+  }
+
+  /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new
+  /// instruction after load / store are unfolded from an instruction of the
+  /// specified opcode. It returns zero if the specified unfolding is not
+  /// possible. If LoadRegIndex is non-null, it is filled in with the operand
+  /// index of the operand which will hold the register holding the loaded
+  /// value.
+  virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+                                      bool UnfoldLoad, bool UnfoldStore,
+                                      unsigned *LoadRegIndex = 0) const {
+    return 0;
+  }
+
+  /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler
+  /// to determine if two loads are loading from the same base address. It
+  /// should only return true if the base pointers are the same and the
+  /// only differences between the two addresses are the offset. It also returns
+  /// the offsets by reference.
+  virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+                                    int64_t &Offset1, int64_t &Offset2) const {
+    return false;
+  }
+
+  /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+  /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+  /// be scheduled togther. On some targets if two loads are loading from
+  /// addresses in the same cache line, it's better if they are scheduled
+  /// together. This function takes two integers that represent the load offsets
+  /// from the common base address. It returns true if it decides it's desirable
+  /// to schedule the two loads together. "NumLoads" is the number of loads that
+  /// have already been scheduled after Load1.
+  virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+                                       int64_t Offset1, int64_t Offset2,
+                                       unsigned NumLoads) const {
+    return false;
+  }
+
+  /// ReverseBranchCondition - Reverses the branch condition of the specified
+  /// condition list, returning false on success and true if it cannot be
+  /// reversed.
+  virtual
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+    return true;
+  }
+
+  /// insertNoop - Insert a noop into the instruction stream at the specified
+  /// point.
+  virtual void insertNoop(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI) const;
+
+
+  /// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+  virtual void getNoopForMachoTarget(MCInst &NopInst) const {
+    // Default to just using 'nop' string.
+  }
+
+
+  /// isPredicated - Returns true if the instruction is already predicated.
+  ///
+  virtual bool isPredicated(const MachineInstr *MI) const {
+    return false;
+  }
+
+  /// isUnpredicatedTerminator - Returns true if the instruction is a
+  /// terminator instruction that has not been predicated.
+  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+
+  /// PredicateInstruction - Convert the instruction into a predicated
+  /// instruction. It returns true if the operation was successful.
+  virtual
+  bool PredicateInstruction(MachineInstr *MI,
+                        const SmallVectorImpl<MachineOperand> &Pred) const = 0;
+
+  /// SubsumesPredicate - Returns true if the first specified predicate
+  /// subsumes the second, e.g. GE subsumes GT.
+  virtual
+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                         const SmallVectorImpl<MachineOperand> &Pred2) const {
+    return false;
+  }
+
+  /// DefinesPredicate - If the specified instruction defines any predicate
+  /// or condition code register(s) used for predication, returns true as well
+  /// as the definition predicate(s) by reference.
+  virtual bool DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const {
+    return false;
+  }
+
+  /// isPredicable - Return true if the specified instruction can be predicated.
+  /// By default, this returns true for every instruction with a
+  /// PredicateOperand.
+  virtual bool isPredicable(MachineInstr *MI) const {
+    return MI->getDesc().isPredicable();
+  }
+
+  /// isSafeToMoveRegClassDefs - Return true if it's safe to move a machine
+  /// instruction that defines the specified register class.
+  virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+    return true;
+  }
+
+  /// isSchedulingBoundary - Test if the given instruction should be
+  /// considered a scheduling boundary. This primarily includes labels and
+  /// terminators.
+  virtual bool isSchedulingBoundary(const MachineInstr *MI,
+                                    const MachineBasicBlock *MBB,
+                                    const MachineFunction &MF) const = 0;
+
+  /// Measure the specified inline asm to determine an approximation of its
+  /// length.
+  virtual unsigned getInlineAsmLength(const char *Str,
+                                      const MCAsmInfo &MAI) const;
+
+  /// CreateTargetHazardRecognizer - Allocate and return a hazard recognizer to
+  /// use for this target when scheduling the machine instructions before
+  /// register allocation.
+  virtual ScheduleHazardRecognizer*
+  CreateTargetHazardRecognizer(const TargetMachine *TM,
+                               const ScheduleDAG *DAG) const = 0;
+
+  /// CreateTargetPostRAHazardRecognizer - Allocate and return a hazard
+  /// recognizer to use for this target when scheduling the machine instructions
+  /// after register allocation.
+  virtual ScheduleHazardRecognizer*
+  CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
+                                     const ScheduleDAG *DAG) const = 0;
+
+  /// AnalyzeCompare - For a comparison instruction, return the source register
+  /// in SrcReg and the value it compares against in CmpValue. Return true if
+  /// the comparison instruction can be analyzed.
+  virtual bool AnalyzeCompare(const MachineInstr *MI,
+                              unsigned &SrcReg, int &Mask, int &Value) const {
+    return false;
+  }
+
+  /// OptimizeCompareInstr - See if the comparison instruction can be converted
+  /// into something more efficient. E.g., on ARM most instructions can set the
+  /// flags register, obviating the need for a separate CMP.
+  virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr,
+                                    unsigned SrcReg, int Mask, int Value,
+                                    const MachineRegisterInfo *MRI) const {
+    return false;
+  }
+
+  /// FoldImmediate - 'Reg' is known to be defined by a move immediate
+  /// instruction, try to fold the immediate into the use instruction.
+  virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+                             unsigned Reg, MachineRegisterInfo *MRI) const {
+    return false;
+  }
+
+  /// getNumMicroOps - Return the number of u-operations the given machine
+  /// instruction will be decoded to on the target cpu.
+  virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+                                  const MachineInstr *MI) const;
+
+  /// isZeroCost - Return true for pseudo instructions that don't consume any
+  /// machine resources in their current form. These are common cases that the
+  /// scheduler should consider free, rather than conservatively handling them
+  /// as instructions with no itinerary.
+  bool isZeroCost(unsigned Opcode) const {
+    return Opcode <= TargetOpcode::COPY;
+  }
+
+  /// getOperandLatency - Compute and return the use operand latency of a given
+  /// pair of def and use.
+  /// In most cases, the static scheduling itinerary was enough to determine the
+  /// operand latency. But it may not be possible for instructions with variable
+  /// number of defs / uses.
+  virtual int getOperandLatency(const InstrItineraryData *ItinData,
+                              const MachineInstr *DefMI, unsigned DefIdx,
+                              const MachineInstr *UseMI, unsigned UseIdx) const;
+
+  virtual int getOperandLatency(const InstrItineraryData *ItinData,
+                                SDNode *DefNode, unsigned DefIdx,
+                                SDNode *UseNode, unsigned UseIdx) const;
+
+  /// getInstrLatency - Compute the instruction latency of a given instruction.
+  /// If the instruction has higher cost when predicated, it's returned via
+  /// PredCost.
+  virtual int getInstrLatency(const InstrItineraryData *ItinData,
+                              const MachineInstr *MI,
+                              unsigned *PredCost = 0) const;
+
+  virtual int getInstrLatency(const InstrItineraryData *ItinData,
+                              SDNode *Node) const;
+
+  /// isHighLatencyDef - Return true if this opcode has high latency to its
+  /// result.
+  virtual bool isHighLatencyDef(int opc) const { return false; }
+
+  /// hasHighOperandLatency - Compute operand latency between a def of 'Reg'
+  /// and an use in the current loop, return true if the target considered
+  /// it 'high'. This is used by optimization passes such as machine LICM to
+  /// determine whether it makes sense to hoist an instruction out even in
+  /// high register pressure situation.
+  virtual
+  bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+                             const MachineRegisterInfo *MRI,
+                             const MachineInstr *DefMI, unsigned DefIdx,
+                             const MachineInstr *UseMI, unsigned UseIdx) const {
+    return false;
+  }
+
+  /// hasLowDefLatency - Compute operand latency of a def of 'Reg', return true
+  /// if the target considered it 'low'.
+  virtual
+  bool hasLowDefLatency(const InstrItineraryData *ItinData,
+                        const MachineInstr *DefMI, unsigned DefIdx) const;
+};
+
+/// TargetInstrInfoImpl - This is the default implementation of
+/// TargetInstrInfo, which just provides a couple of default implementations
+/// for various methods.  This separated out because it is implemented in
+/// libcodegen, not in libtarget.
+class TargetInstrInfoImpl : public TargetInstrInfo {
+protected:
+  TargetInstrInfoImpl(const TargetInstrDesc *desc, unsigned NumOpcodes)
+  : TargetInstrInfo(desc, NumOpcodes) {}
+public:
+  virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+                                       MachineBasicBlock *NewDest) const;
+  virtual MachineInstr *commuteInstruction(MachineInstr *MI,
+                                           bool NewMI = false) const;
+  virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1,
+                                     unsigned &SrcOpIdx2) const;
+  virtual bool canFoldMemoryOperand(const MachineInstr *MI,
+                                    const SmallVectorImpl<unsigned> &Ops) const;
+  virtual bool PredicateInstruction(MachineInstr *MI,
+                            const SmallVectorImpl<MachineOperand> &Pred) const;
+  virtual void reMaterialize(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator MI,
+                             unsigned DestReg, unsigned SubReg,
+                             const MachineInstr *Orig,
+                             const TargetRegisterInfo &TRI) const;
+  virtual MachineInstr *duplicate(MachineInstr *Orig,
+                                  MachineFunction &MF) const;
+  virtual bool produceSameValue(const MachineInstr *MI0,
+                                const MachineInstr *MI1,
+                                const MachineRegisterInfo *MRI) const;
+  virtual bool isSchedulingBoundary(const MachineInstr *MI,
+                                    const MachineBasicBlock *MBB,
+                                    const MachineFunction &MF) const;
+
+  bool usePreRAHazardRecognizer() const;
+
+  virtual ScheduleHazardRecognizer *
+  CreateTargetHazardRecognizer(const TargetMachine*, const ScheduleDAG*) const;
+
+  virtual ScheduleHazardRecognizer *
+  CreateTargetPostRAHazardRecognizer(const InstrItineraryData*,
+                                     const ScheduleDAG*) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Target/TargetInstrItineraries.h b/final/include/llvm/Target/TargetInstrItineraries.h
new file mode 100644
index 00000000000..a95b70f6b99
--- /dev/null
+++ b/final/include/llvm/Target/TargetInstrItineraries.h
@@ -0,0 +1,248 @@
+//===-- llvm/Target/TargetInstrItineraries.h - Scheduling -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the structures used for instruction
+// itineraries, stages, and operand reads/writes.  This is used by
+// schedulers to determine instruction stages and latencies.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETINSTRITINERARIES_H
+#define LLVM_TARGET_TARGETINSTRITINERARIES_H
+
+#include <algorithm>
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// Instruction stage - These values represent a non-pipelined step in
+/// the execution of an instruction.  Cycles represents the number of
+/// discrete time slots needed to complete the stage.  Units represent
+/// the choice of functional units that can be used to complete the
+/// stage.  Eg. IntUnit1, IntUnit2. NextCycles indicates how many
+/// cycles should elapse from the start of this stage to the start of
+/// the next stage in the itinerary. A value of -1 indicates that the
+/// next stage should start immediately after the current one.
+/// For example:
+///
+///   { 1, x, -1 }
+///      indicates that the stage occupies FU x for 1 cycle and that
+///      the next stage starts immediately after this one.
+///
+///   { 2, x|y, 1 }
+///      indicates that the stage occupies either FU x or FU y for 2
+///      consecuative cycles and that the next stage starts one cycle
+///      after this stage starts. That is, the stage requirements
+///      overlap in time.
+///
+///   { 1, x, 0 }
+///      indicates that the stage occupies FU x for 1 cycle and that
+///      the next stage starts in this same cycle. This can be used to
+///      indicate that the instruction requires multiple stages at the
+///      same time.
+///
+/// FU reservation can be of two different kinds:
+///  - FUs which instruction actually requires
+///  - FUs which instruction just reserves. Reserved unit is not available for
+///    execution of other instruction. However, several instructions can reserve
+///    the same unit several times.
+/// Such two types of units reservation is used to model instruction domain
+/// change stalls, FUs using the same resource (e.g. same register file), etc.
+
+struct InstrStage {
+  enum ReservationKinds {
+    Required = 0,
+    Reserved = 1
+  };
+
+  unsigned Cycles_;  ///< Length of stage in machine cycles
+  unsigned Units_;   ///< Choice of functional units
+  int NextCycles_;   ///< Number of machine cycles to next stage
+  ReservationKinds Kind_; ///< Kind of the FU reservation
+
+  /// getCycles - returns the number of cycles the stage is occupied
+  unsigned getCycles() const {
+    return Cycles_;
+  }
+
+  /// getUnits - returns the choice of FUs
+  unsigned getUnits() const {
+    return Units_;
+  }
+
+  ReservationKinds getReservationKind() const {
+    return Kind_;
+  }
+
+  /// getNextCycles - returns the number of cycles from the start of
+  /// this stage to the start of the next stage in the itinerary
+  unsigned getNextCycles() const {
+    return (NextCycles_ >= 0) ? (unsigned)NextCycles_ : Cycles_;
+  }
+};
+
+
+//===----------------------------------------------------------------------===//
+/// Instruction itinerary - An itinerary represents the scheduling
+/// information for an instruction. This includes a set of stages
+/// occupies by the instruction, and the pipeline cycle in which
+/// operands are read and written.
+///
+struct InstrItinerary {
+  unsigned NumMicroOps;        ///< # of micro-ops, 0 means it's variable
+  unsigned FirstStage;         ///< Index of first stage in itinerary
+  unsigned LastStage;          ///< Index of last + 1 stage in itinerary
+  unsigned FirstOperandCycle;  ///< Index of first operand rd/wr
+  unsigned LastOperandCycle;   ///< Index of last + 1 operand rd/wr
+};
+
+
+//===----------------------------------------------------------------------===//
+/// Instruction itinerary Data - Itinerary data supplied by a subtarget to be
+/// used by a target.
+///
+class InstrItineraryData {
+public:
+  const InstrStage     *Stages;         ///< Array of stages selected
+  const unsigned       *OperandCycles;  ///< Array of operand cycles selected
+  const unsigned       *Forwardings;    ///< Array of pipeline forwarding pathes
+  const InstrItinerary *Itineraries;    ///< Array of itineraries selected
+  unsigned              IssueWidth;     ///< Max issue per cycle. 0=Unknown.
+
+  /// Ctors.
+  ///
+  InstrItineraryData() : Stages(0), OperandCycles(0), Forwardings(0),
+                         Itineraries(0), IssueWidth(0) {}
+
+  InstrItineraryData(const InstrStage *S, const unsigned *OS,
+                     const unsigned *F, const InstrItinerary *I)
+    : Stages(S), OperandCycles(OS), Forwardings(F), Itineraries(I) {}
+
+  /// isEmpty - Returns true if there are no itineraries.
+  ///
+  bool isEmpty() const { return Itineraries == 0; }
+
+  /// isEndMarker - Returns true if the index is for the end marker
+  /// itinerary.
+  ///
+  bool isEndMarker(unsigned ItinClassIndx) const {
+    return ((Itineraries[ItinClassIndx].FirstStage == ~0U) &&
+            (Itineraries[ItinClassIndx].LastStage == ~0U));
+  }
+
+  /// beginStage - Return the first stage of the itinerary.
+  ///
+  const InstrStage *beginStage(unsigned ItinClassIndx) const {
+    unsigned StageIdx = Itineraries[ItinClassIndx].FirstStage;
+    return Stages + StageIdx;
+  }
+
+  /// endStage - Return the last+1 stage of the itinerary.
+  ///
+  const InstrStage *endStage(unsigned ItinClassIndx) const {
+    unsigned StageIdx = Itineraries[ItinClassIndx].LastStage;
+    return Stages + StageIdx;
+  }
+
+  /// getStageLatency - Return the total stage latency of the given
+  /// class.  The latency is the maximum completion time for any stage
+  /// in the itinerary.
+  ///
+  unsigned getStageLatency(unsigned ItinClassIndx) const {
+    // If the target doesn't provide itinerary information, use a
+    // simple non-zero default value for all instructions.
+    if (isEmpty())
+      return 1;
+
+    // Calculate the maximum completion time for any stage.
+    unsigned Latency = 0, StartCycle = 0;
+    for (const InstrStage *IS = beginStage(ItinClassIndx),
+           *E = endStage(ItinClassIndx); IS != E; ++IS) {
+      Latency = std::max(Latency, StartCycle + IS->getCycles());
+      StartCycle += IS->getNextCycles();
+    }
+
+    return Latency;
+  }
+
+  /// getOperandCycle - Return the cycle for the given class and
+  /// operand. Return -1 if no cycle is specified for the operand.
+  ///
+  int getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const {
+    if (isEmpty())
+      return -1;
+
+    unsigned FirstIdx = Itineraries[ItinClassIndx].FirstOperandCycle;
+    unsigned LastIdx = Itineraries[ItinClassIndx].LastOperandCycle;
+    if ((FirstIdx + OperandIdx) >= LastIdx)
+      return -1;
+
+    return (int)OperandCycles[FirstIdx + OperandIdx];
+  }
+
+  /// hasPipelineForwarding - Return true if there is a pipeline forwarding
+  /// between instructions of itinerary classes DefClass and UseClasses so that
+  /// value produced by an instruction of itinerary class DefClass, operand
+  /// index DefIdx can be bypassed when it's read by an instruction of
+  /// itinerary class UseClass, operand index UseIdx.
+  bool hasPipelineForwarding(unsigned DefClass, unsigned DefIdx,
+                             unsigned UseClass, unsigned UseIdx) const {
+    unsigned FirstDefIdx = Itineraries[DefClass].FirstOperandCycle;
+    unsigned LastDefIdx = Itineraries[DefClass].LastOperandCycle;
+    if ((FirstDefIdx + DefIdx) >= LastDefIdx)
+      return false;
+    if (Forwardings[FirstDefIdx + DefIdx] == 0)
+      return false;
+
+    unsigned FirstUseIdx = Itineraries[UseClass].FirstOperandCycle;
+    unsigned LastUseIdx = Itineraries[UseClass].LastOperandCycle;
+    if ((FirstUseIdx + UseIdx) >= LastUseIdx)
+      return false;
+
+    return Forwardings[FirstDefIdx + DefIdx] ==
+      Forwardings[FirstUseIdx + UseIdx];
+  }
+
+  /// getOperandLatency - Compute and return the use operand latency of a given
+  /// itinerary class and operand index if the value is produced by an
+  /// instruction of the specified itinerary class and def operand index.
+  int getOperandLatency(unsigned DefClass, unsigned DefIdx,
+                        unsigned UseClass, unsigned UseIdx) const {
+    if (isEmpty())
+      return -1;
+
+    int DefCycle = getOperandCycle(DefClass, DefIdx);
+    if (DefCycle == -1)
+      return -1;
+
+    int UseCycle = getOperandCycle(UseClass, UseIdx);
+    if (UseCycle == -1)
+      return -1;
+
+    UseCycle = DefCycle - UseCycle + 1;
+    if (UseCycle > 0 &&
+        hasPipelineForwarding(DefClass, DefIdx, UseClass, UseIdx))
+      // FIXME: This assumes one cycle benefit for every pipeline forwarding.
+      --UseCycle;
+    return UseCycle;
+  }
+
+  /// isMicroCoded - Return true if the instructions in the given class decode
+  /// to more than one micro-ops.
+  bool isMicroCoded(unsigned ItinClassIndx) const {
+    if (isEmpty())
+      return false;
+    return Itineraries[ItinClassIndx].NumMicroOps != 1;
+  }
+};
+
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Target/TargetIntrinsicInfo.h b/final/include/llvm/Target/TargetIntrinsicInfo.h
new file mode 100644
index 00000000000..ad8ac925e93
--- /dev/null
+++ b/final/include/llvm/Target/TargetIntrinsicInfo.h
@@ -0,0 +1,64 @@
+//===-- llvm/Target/TargetIntrinsicInfo.h - Instruction Info ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the target intrinsic instructions to the code generator.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETINTRINSICINFO_H
+#define LLVM_TARGET_TARGETINTRINSICINFO_H
+
+#include <string>
+
+namespace llvm {
+
+class Function;
+class Module;
+class Type;
+
+//---------------------------------------------------------------------------
+///
+/// TargetIntrinsicInfo - Interface to description of machine instruction set
+///
+class TargetIntrinsicInfo {
+  TargetIntrinsicInfo(const TargetIntrinsicInfo &); // DO NOT IMPLEMENT
+  void operator=(const TargetIntrinsicInfo &);      // DO NOT IMPLEMENT
+public:
+  TargetIntrinsicInfo();
+  virtual ~TargetIntrinsicInfo();
+
+  /// Return the name of a target intrinsic, e.g. "llvm.bfin.ssync".
+  /// The Tys and numTys parameters are for intrinsics with overloaded types
+  /// (e.g., those using iAny or fAny). For a declaration for an overloaded
+  /// intrinsic, Tys should point to an array of numTys pointers to Type,
+  /// and must provide exactly one type for each overloaded type in the
+  /// intrinsic.
+  virtual std::string getName(unsigned IID, const Type **Tys = 0,
+                              unsigned numTys = 0) const = 0;
+
+  /// Look up target intrinsic by name. Return intrinsic ID or 0 for unknown
+  /// names.
+  virtual unsigned lookupName(const char *Name, unsigned Len) const =0;
+
+  /// Return the target intrinsic ID of a function, or 0.
+  virtual unsigned getIntrinsicID(Function *F) const;
+
+  /// Returns true if the intrinsic can be overloaded.
+  virtual bool isOverloaded(unsigned IID) const = 0;
+  
+  /// Create or insert an LLVM Function declaration for an intrinsic,
+  /// and return it. The Tys and numTys are for intrinsics with overloaded
+  /// types. See above for more information.
+  virtual Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0,
+                                   unsigned numTys = 0) const = 0;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Target/TargetJITInfo.h b/final/include/llvm/Target/TargetJITInfo.h
new file mode 100644
index 00000000000..b198eb62f0c
--- /dev/null
+++ b/final/include/llvm/Target/TargetJITInfo.h
@@ -0,0 +1,142 @@
+//===- Target/TargetJITInfo.h - Target Information for JIT ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes an abstract interface used by the Just-In-Time code
+// generator to perform target-specific activities, such as emitting stubs.  If
+// a TargetMachine supports JIT code generation, it should provide one of these
+// objects through the getJITInfo() method.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETJITINFO_H
+#define LLVM_TARGET_TARGETJITINFO_H
+
+#include <cassert>
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+  class Function;
+  class GlobalValue;
+  class JITCodeEmitter;
+  class MachineRelocation;
+
+  /// TargetJITInfo - Target specific information required by the Just-In-Time
+  /// code generator.
+  class TargetJITInfo {
+  public:
+    virtual ~TargetJITInfo() {}
+
+    /// replaceMachineCodeForFunction - Make it so that calling the function
+    /// whose machine code is at OLD turns into a call to NEW, perhaps by
+    /// overwriting OLD with a branch to NEW.  This is used for self-modifying
+    /// code.
+    ///
+    virtual void replaceMachineCodeForFunction(void *Old, void *New) = 0;
+
+    /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+    /// to emit an indirect symbol which contains the address of the specified
+    /// ptr.
+    virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+                                             JITCodeEmitter &JCE) {
+      assert(0 && "This target doesn't implement emitGlobalValueIndirectSym!");
+      return 0;
+    }
+
+    /// Records the required size and alignment for a call stub in bytes.
+    struct StubLayout {
+      size_t Size;
+      size_t Alignment;
+    };
+    /// Returns the maximum size and alignment for a call stub on this target.
+    virtual StubLayout getStubLayout() {
+      llvm_unreachable("This target doesn't implement getStubLayout!");
+      StubLayout Result = {0, 0};
+      return Result;
+    }
+
+    /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+    /// small native function that simply calls the function at the specified
+    /// address.  The JITCodeEmitter must already have storage allocated for the
+    /// stub.  Return the address of the resultant function, which may have been
+    /// aligned from the address the JCE was set up to emit at.
+    virtual void *emitFunctionStub(const Function* F, void *Target,
+                                   JITCodeEmitter &JCE) {
+      assert(0 && "This target doesn't implement emitFunctionStub!");
+      return 0;
+    }
+
+    /// getPICJumpTableEntry - Returns the value of the jumptable entry for the
+    /// specific basic block.
+    virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase) {
+      assert(0 && "This target doesn't implement getPICJumpTableEntry!");
+      return 0;
+    }
+
+    /// LazyResolverFn - This typedef is used to represent the function that
+    /// unresolved call points should invoke.  This is a target specific
+    /// function that knows how to walk the stack and find out which stub the
+    /// call is coming from.
+    typedef void (*LazyResolverFn)();
+
+    /// JITCompilerFn - This typedef is used to represent the JIT function that
+    /// lazily compiles the function corresponding to a stub.  The JIT keeps
+    /// track of the mapping between stubs and LLVM Functions, the target
+    /// provides the ability to figure out the address of a stub that is called
+    /// by the LazyResolverFn.
+    typedef void* (*JITCompilerFn)(void *);
+
+    /// getLazyResolverFunction - This method is used to initialize the JIT,
+    /// giving the target the function that should be used to compile a
+    /// function, and giving the JIT the target function used to do the lazy
+    /// resolving.
+    virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn) {
+      assert(0 && "Not implemented for this target!");
+      return 0;
+    }
+
+    /// relocate - Before the JIT can run a block of code that has been emitted,
+    /// it must rewrite the code to contain the actual addresses of any
+    /// referenced global symbols.
+    virtual void relocate(void *Function, MachineRelocation *MR,
+                          unsigned NumRelocs, unsigned char* GOTBase) {
+      assert(NumRelocs == 0 && "This target does not have relocations!");
+    }
+    
+
+    /// allocateThreadLocalMemory - Each target has its own way of
+    /// handling thread local variables. This method returns a value only
+    /// meaningful to the target.
+    virtual char* allocateThreadLocalMemory(size_t size) {
+      assert(0 && "This target does not implement thread local storage!");
+      return 0;
+    }
+
+    /// needsGOT - Allows a target to specify that it would like the
+    /// JIT to manage a GOT for it.
+    bool needsGOT() const { return useGOT; }
+
+    /// hasCustomConstantPool - Allows a target to specify that constant
+    /// pool address resolution is handled by the target.
+    virtual bool hasCustomConstantPool() const { return false; }
+
+    /// hasCustomJumpTables - Allows a target to specify that jumptables
+    /// are emitted by the target.
+    virtual bool hasCustomJumpTables() const { return false; }
+
+    /// allocateSeparateGVMemory - If true, globals should be placed in
+    /// separately allocated heap memory rather than in the same
+    /// code memory allocated by JITCodeEmitter.
+    virtual bool allocateSeparateGVMemory() const { return false; }
+  protected:
+    bool useGOT;
+  };
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Target/TargetLibraryInfo.h b/final/include/llvm/Target/TargetLibraryInfo.h
new file mode 100644
index 00000000000..1847a3725e6
--- /dev/null
+++ b/final/include/llvm/Target/TargetLibraryInfo.h
@@ -0,0 +1,75 @@
+//===-- llvm/Target/TargetLibraryInfo.h - Library information ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETLIBRARYINFO_H
+#define LLVM_TARGET_TARGETLIBRARYINFO_H
+
+#include "llvm/Pass.h"
+
+namespace llvm {
+  class Triple;
+
+  namespace LibFunc {
+    enum Func {
+      /// void *memset(void *b, int c, size_t len);
+      memset,
+      
+      // void *memcpy(void *s1, const void *s2, size_t n);
+      memcpy,
+      
+      /// void memset_pattern16(void *b, const void *pattern16, size_t len);
+      memset_pattern16,
+      
+      /// int iprintf(const char *format, ...);
+      iprintf,
+      
+      /// int siprintf(char *str, const char *format, ...);
+      siprintf,
+      
+      /// int fiprintf(FILE *stream, const char *format, ...);
+      fiprintf,
+      
+      NumLibFuncs
+    };
+  }
+
+/// TargetLibraryInfo - This immutable pass captures information about what
+/// library functions are available for the current target, and allows a
+/// frontend to disable optimizations through -fno-builtin etc.
+class TargetLibraryInfo : public ImmutablePass {
+  unsigned char AvailableArray[(LibFunc::NumLibFuncs+7)/8];
+public:
+  static char ID;
+  TargetLibraryInfo();
+  TargetLibraryInfo(const Triple &T);
+  
+  /// has - This function is used by optimizations that want to match on or form
+  /// a given library function.
+  bool has(LibFunc::Func F) const {
+    return (AvailableArray[F/8] & (1 << (F&7))) != 0;
+  }
+
+  /// setUnavailable - this can be used by whatever sets up TargetLibraryInfo to
+  /// ban use of specific library functions.
+  void setUnavailable(LibFunc::Func F) {
+    AvailableArray[F/8] &= ~(1 << (F&7));
+  }
+
+  void setAvailable(LibFunc::Func F) {
+    AvailableArray[F/8] |= 1 << (F&7);
+  }
+  
+  /// disableAllFunctions - This disables all builtins, which is used for
+  /// options like -fno-builtin.
+  void disableAllFunctions();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Target/TargetLowering.h b/final/include/llvm/Target/TargetLowering.h
new file mode 100644
index 00000000000..63bf8a2aa0c
--- /dev/null
+++ b/final/include/llvm/Target/TargetLowering.h
@@ -0,0 +1,1864 @@
+//===-- llvm/Target/TargetLowering.h - Target Lowering Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes how to lower LLVM code to machine code.  This has two
+// main components:
+//
+//  1. Which ValueTypes are natively supported by the target.
+//  2. Which operations are supported for supported ValueTypes.
+//  3. Cost thresholds for alternative implementations of certain operations.
+//
+// In addition it has a few other components, like information about FP
+// immediates.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETLOWERING_H
+#define LLVM_TARGET_TARGETLOWERING_H
+
+#include "llvm/CallingConv.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Attributes.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Target/TargetCallingConv.h"
+#include "llvm/Target/TargetMachine.h"
+#include <climits>
+#include <map>
+#include <vector>
+
+namespace llvm {
+  class AllocaInst;
+  class APFloat;
+  class CallInst;
+  class CCState;
+  class Function;
+  class FastISel;
+  class FunctionLoweringInfo;
+  class ImmutableCallSite;
+  class MachineBasicBlock;
+  class MachineFunction;
+  class MachineFrameInfo;
+  class MachineInstr;
+  class MachineJumpTableInfo;
+  class MCContext;
+  class MCExpr;
+  class SDNode;
+  class SDValue;
+  class SelectionDAG;
+  template<typename T> class SmallVectorImpl;
+  class TargetData;
+  class TargetMachine;
+  class TargetRegisterClass;
+  class TargetLoweringObjectFile;
+  class Value;
+
+  // FIXME: should this be here?
+  namespace TLSModel {
+    enum Model {
+      GeneralDynamic,
+      LocalDynamic,
+      InitialExec,
+      LocalExec
+    };
+  }
+  TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc);
+
+
+//===----------------------------------------------------------------------===//
+/// TargetLowering - This class defines information used to lower LLVM code to
+/// legal SelectionDAG operators that the target instruction selector can accept
+/// natively.
+///
+/// This class also defines callbacks that targets must implement to lower
+/// target-specific constructs to SelectionDAG operators.
+///
+class TargetLowering {
+  TargetLowering(const TargetLowering&);  // DO NOT IMPLEMENT
+  void operator=(const TargetLowering&);  // DO NOT IMPLEMENT
+public:
+  /// LegalizeAction - This enum indicates whether operations are valid for a
+  /// target, and if not, what action should be used to make them valid.
+  enum LegalizeAction {
+    Legal,      // The target natively supports this operation.
+    Promote,    // This operation should be executed in a larger type.
+    Expand,     // Try to expand this to other ops, otherwise use a libcall.
+    Custom      // Use the LowerOperation hook to implement custom lowering.
+  };
+
+  enum BooleanContent { // How the target represents true/false values.
+    UndefinedBooleanContent,    // Only bit 0 counts, the rest can hold garbage.
+    ZeroOrOneBooleanContent,        // All bits zero except for bit 0.
+    ZeroOrNegativeOneBooleanContent // All bits equal to bit 0.
+  };
+
+  /// NOTE: The constructor takes ownership of TLOF.
+  explicit TargetLowering(const TargetMachine &TM,
+                          const TargetLoweringObjectFile *TLOF);
+  virtual ~TargetLowering();
+
+  const TargetMachine &getTargetMachine() const { return TM; }
+  const TargetData *getTargetData() const { return TD; }
+  const TargetLoweringObjectFile &getObjFileLowering() const { return TLOF; }
+
+  bool isBigEndian() const { return !IsLittleEndian; }
+  bool isLittleEndian() const { return IsLittleEndian; }
+  MVT getPointerTy() const { return PointerTy; }
+  virtual MVT getShiftAmountTy(EVT LHSTy) const;
+
+  /// isSelectExpensive - Return true if the select operation is expensive for
+  /// this target.
+  bool isSelectExpensive() const { return SelectIsExpensive; }
+
+  /// isIntDivCheap() - Return true if integer divide is usually cheaper than
+  /// a sequence of several shifts, adds, and multiplies for this target.
+  bool isIntDivCheap() const { return IntDivIsCheap; }
+
+  /// isPow2DivCheap() - Return true if pow2 div is cheaper than a chain of
+  /// srl/add/sra.
+  bool isPow2DivCheap() const { return Pow2DivIsCheap; }
+
+  /// isJumpExpensive() - Return true if Flow Control is an expensive operation
+  /// that should be avoided.
+  bool isJumpExpensive() const { return JumpIsExpensive; }
+
+  /// getSetCCResultType - Return the ValueType of the result of SETCC
+  /// operations.  Also used to obtain the target's preferred type for
+  /// the condition operand of SELECT and BRCOND nodes.  In the case of
+  /// BRCOND the argument passed is MVT::Other since there are no other
+  /// operands to get a type hint from.
+  virtual
+  MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+  /// getCmpLibcallReturnType - Return the ValueType for comparison
+  /// libcalls. Comparions libcalls include floating point comparion calls,
+  /// and Ordered/Unordered check calls on floating point numbers.
+  virtual
+  MVT::SimpleValueType getCmpLibcallReturnType() const;
+
+  /// getBooleanContents - For targets without i1 registers, this gives the
+  /// nature of the high-bits of boolean values held in types wider than i1.
+  /// "Boolean values" are special true/false values produced by nodes like
+  /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND.
+  /// Not to be confused with general values promoted from i1.
+  BooleanContent getBooleanContents() const { return BooleanContents;}
+
+  /// getSchedulingPreference - Return target scheduling preference.
+  Sched::Preference getSchedulingPreference() const {
+    return SchedPreferenceInfo;
+  }
+
+  /// getSchedulingPreference - Some scheduler, e.g. hybrid, can switch to
+  /// different scheduling heuristics for different nodes. This function returns
+  /// the preference (or none) for the given node.
+  virtual Sched::Preference getSchedulingPreference(SDNode *N) const {
+    return Sched::None;
+  }
+
+  /// getRegClassFor - Return the register class that should be used for the
+  /// specified value type.
+  virtual TargetRegisterClass *getRegClassFor(EVT VT) const {
+    assert(VT.isSimple() && "getRegClassFor called on illegal type!");
+    TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy];
+    assert(RC && "This value type is not natively supported!");
+    return RC;
+  }
+
+  /// getRepRegClassFor - Return the 'representative' register class for the
+  /// specified value type. The 'representative' register class is the largest
+  /// legal super-reg register class for the register class of the value type.
+  /// For example, on i386 the rep register class for i8, i16, and i32 are GR32;
+  /// while the rep register class is GR64 on x86_64.
+  virtual const TargetRegisterClass *getRepRegClassFor(EVT VT) const {
+    assert(VT.isSimple() && "getRepRegClassFor called on illegal type!");
+    const TargetRegisterClass *RC = RepRegClassForVT[VT.getSimpleVT().SimpleTy];
+    return RC;
+  }
+
+  /// getRepRegClassCostFor - Return the cost of the 'representative' register
+  /// class for the specified value type.
+  virtual uint8_t getRepRegClassCostFor(EVT VT) const {
+    assert(VT.isSimple() && "getRepRegClassCostFor called on illegal type!");
+    return RepRegClassCostForVT[VT.getSimpleVT().SimpleTy];
+  }
+
+  /// isTypeLegal - Return true if the target has native support for the
+  /// specified value type.  This means that it has a register that directly
+  /// holds it without promotions or expansions.
+  bool isTypeLegal(EVT VT) const {
+    assert(!VT.isSimple() ||
+           (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT));
+    return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != 0;
+  }
+
+  class ValueTypeActionImpl {
+    /// ValueTypeActions - For each value type, keep a LegalizeAction enum
+    /// that indicates how instruction selection should deal with the type.
+    uint8_t ValueTypeActions[MVT::LAST_VALUETYPE];
+
+    LegalizeAction getExtendedTypeAction(EVT VT) const {
+      // Handle non-vector integers.
+      if (!VT.isVector()) {
+        assert(VT.isInteger() && "Unsupported extended type!");
+        unsigned BitSize = VT.getSizeInBits();
+        // First promote to a power-of-two size, then expand if necessary.
+        if (BitSize < 8 || !isPowerOf2_32(BitSize))
+          return Promote;
+        return Expand;
+      }
+
+      // Vectors with only one element are always scalarized.
+      if (VT.getVectorNumElements() == 1)
+        return Expand;
+
+      // Vectors with a number of elements that is not a power of two are always
+      // widened, for example <3 x float> -> <4 x float>.
+      if (!VT.isPow2VectorType())
+        return Promote;
+
+      // Vectors with a crazy element type are always expanded, for example
+      // <4 x i2> is expanded into two vectors of type <2 x i2>.
+      if (!VT.getVectorElementType().isSimple())
+        return Expand;
+
+      // If this type is smaller than a legal vector type then widen it,
+      // otherwise expand it.  E.g. <2 x float> -> <4 x float>.
+      MVT EltType = VT.getVectorElementType().getSimpleVT();
+      unsigned NumElts = VT.getVectorNumElements();
+      while (1) {
+        // Round up to the next power of 2.
+        NumElts = (unsigned)NextPowerOf2(NumElts);
+
+        // If there is no simple vector type with this many elements then there
+        // cannot be a larger legal vector type.  Note that this assumes that
+        // there are no skipped intermediate vector types in the simple types.
+        MVT LargerVector = MVT::getVectorVT(EltType, NumElts);
+        if (LargerVector == MVT())
+          return Expand;
+
+        // If this type is legal then widen the vector.
+        if (getTypeAction(LargerVector) == Legal)
+          return Promote;
+      }
+    }
+  public:
+    ValueTypeActionImpl() {
+      std::fill(ValueTypeActions, array_endof(ValueTypeActions), 0);
+    }
+
+    LegalizeAction getTypeAction(EVT VT) const {
+      if (!VT.isExtended())
+        return getTypeAction(VT.getSimpleVT());
+      return getExtendedTypeAction(VT);
+    }
+
+    LegalizeAction getTypeAction(MVT VT) const {
+      return (LegalizeAction)ValueTypeActions[VT.SimpleTy];
+    }
+
+    void setTypeAction(EVT VT, LegalizeAction Action) {
+      unsigned I = VT.getSimpleVT().SimpleTy;
+      ValueTypeActions[I] = Action;
+    }
+  };
+
+  const ValueTypeActionImpl &getValueTypeActions() const {
+    return ValueTypeActions;
+  }
+
+  /// getTypeAction - Return how we should legalize values of this type, either
+  /// it is already legal (return 'Legal') or we need to promote it to a larger
+  /// type (return 'Promote'), or we need to expand it into multiple registers
+  /// of smaller integer type (return 'Expand').  'Custom' is not an option.
+  LegalizeAction getTypeAction(EVT VT) const {
+    return ValueTypeActions.getTypeAction(VT);
+  }
+  LegalizeAction getTypeAction(MVT VT) const {
+    return ValueTypeActions.getTypeAction(VT);
+  }
+
+  /// getTypeToTransformTo - For types supported by the target, this is an
+  /// identity function.  For types that must be promoted to larger types, this
+  /// returns the larger type to promote to.  For integer types that are larger
+  /// than the largest integer register, this contains one step in the expansion
+  /// to get to the smaller register. For illegal floating point types, this
+  /// returns the integer type to transform to.
+  EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const {
+    if (VT.isSimple()) {
+      assert((unsigned)VT.getSimpleVT().SimpleTy <
+             array_lengthof(TransformToType));
+      EVT NVT = TransformToType[VT.getSimpleVT().SimpleTy];
+      assert(getTypeAction(NVT) != Promote &&
+             "Promote may not follow Expand or Promote");
+      return NVT;
+    }
+
+    if (VT.isVector()) {
+      EVT NVT = VT.getPow2VectorType(Context);
+      if (NVT == VT) {
+        // Vector length is a power of 2 - split to half the size.
+        unsigned NumElts = VT.getVectorNumElements();
+        EVT EltVT = VT.getVectorElementType();
+        return (NumElts == 1) ?
+          EltVT : EVT::getVectorVT(Context, EltVT, NumElts / 2);
+      }
+      // Promote to a power of two size, avoiding multi-step promotion.
+      return getTypeAction(NVT) == Promote ?
+        getTypeToTransformTo(Context, NVT) : NVT;
+    } else if (VT.isInteger()) {
+      EVT NVT = VT.getRoundIntegerType(Context);
+      if (NVT == VT)      // Size is a power of two - expand to half the size.
+        return EVT::getIntegerVT(Context, VT.getSizeInBits() / 2);
+
+      // Promote to a power of two size, avoiding multi-step promotion.
+      return getTypeAction(NVT) == Promote ?
+        getTypeToTransformTo(Context, NVT) : NVT;
+    }
+    assert(0 && "Unsupported extended type!");
+    return MVT(MVT::Other); // Not reached
+  }
+
+  /// getTypeToExpandTo - For types supported by the target, this is an
+  /// identity function.  For types that must be expanded (i.e. integer types
+  /// that are larger than the largest integer register or illegal floating
+  /// point types), this returns the largest legal type it will be expanded to.
+  EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const {
+    assert(!VT.isVector());
+    while (true) {
+      switch (getTypeAction(VT)) {
+      case Legal:
+        return VT;
+      case Expand:
+        VT = getTypeToTransformTo(Context, VT);
+        break;
+      default:
+        assert(false && "Type is not legal nor is it to be expanded!");
+        return VT;
+      }
+    }
+    return VT;
+  }
+
+  /// getVectorTypeBreakdown - Vector types are broken down into some number of
+  /// legal first class types.  For example, EVT::v8f32 maps to 2 EVT::v4f32
+  /// with Altivec or SSE1, or 8 promoted EVT::f64 values with the X86 FP stack.
+  /// Similarly, EVT::v2i64 turns into 4 EVT::i32 values with both PPC and X86.
+  ///
+  /// This method returns the number of registers needed, and the VT for each
+  /// register.  It also returns the VT and quantity of the intermediate values
+  /// before they are promoted/expanded.
+  ///
+  unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
+                                  EVT &IntermediateVT,
+                                  unsigned &NumIntermediates,
+                                  EVT &RegisterVT) const;
+
+  /// getTgtMemIntrinsic: Given an intrinsic, checks if on the target the
+  /// intrinsic will need to map to a MemIntrinsicNode (touches memory). If
+  /// this is the case, it returns true and store the intrinsic
+  /// information into the IntrinsicInfo that was passed to the function.
+  struct IntrinsicInfo {
+    unsigned     opc;         // target opcode
+    EVT          memVT;       // memory VT
+    const Value* ptrVal;      // value representing memory location
+    int          offset;      // offset off of ptrVal
+    unsigned     align;       // alignment
+    bool         vol;         // is volatile?
+    bool         readMem;     // reads memory?
+    bool         writeMem;    // writes memory?
+  };
+
+  virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                  const CallInst &I, unsigned Intrinsic) const {
+    return false;
+  }
+
+  /// isFPImmLegal - Returns true if the target can instruction select the
+  /// specified FP immediate natively. If false, the legalizer will materialize
+  /// the FP immediate as a load from a constant pool.
+  virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const {
+    return false;
+  }
+
+  /// isShuffleMaskLegal - Targets can use this to indicate that they only
+  /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+  /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+  /// are assumed to be legal.
+  virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
+                                  EVT VT) const {
+    return true;
+  }
+
+  /// canOpTrap - Returns true if the operation can trap for the value type.
+  /// VT must be a legal type. By default, we optimistically assume most
+  /// operations don't trap except for divide and remainder.
+  virtual bool canOpTrap(unsigned Op, EVT VT) const;
+
+  /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
+  /// used by Targets can use this to indicate if there is a suitable
+  /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
+  /// pool entry.
+  virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
+                                      EVT VT) const {
+    return false;
+  }
+
+  /// getOperationAction - Return how this operation should be treated: either
+  /// it is legal, needs to be promoted to a larger size, needs to be
+  /// expanded to some other code sequence, or the target has a custom expander
+  /// for it.
+  LegalizeAction getOperationAction(unsigned Op, EVT VT) const {
+    if (VT.isExtended()) return Expand;
+    assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!");
+    unsigned I = (unsigned) VT.getSimpleVT().SimpleTy;
+    return (LegalizeAction)OpActions[I][Op];
+  }
+
+  /// isOperationLegalOrCustom - Return true if the specified operation is
+  /// legal on this target or can be made legal with custom lowering. This
+  /// is used to help guide high-level lowering decisions.
+  bool isOperationLegalOrCustom(unsigned Op, EVT VT) const {
+    return (VT == MVT::Other || isTypeLegal(VT)) &&
+      (getOperationAction(Op, VT) == Legal ||
+       getOperationAction(Op, VT) == Custom);
+  }
+
+  /// isOperationLegal - Return true if the specified operation is legal on this
+  /// target.
+  bool isOperationLegal(unsigned Op, EVT VT) const {
+    return (VT == MVT::Other || isTypeLegal(VT)) &&
+           getOperationAction(Op, VT) == Legal;
+  }
+
+  /// getLoadExtAction - Return how this load with extension should be treated:
+  /// either it is legal, needs to be promoted to a larger size, needs to be
+  /// expanded to some other code sequence, or the target has a custom expander
+  /// for it.
+  LegalizeAction getLoadExtAction(unsigned ExtType, EVT VT) const {
+    assert(ExtType < ISD::LAST_LOADEXT_TYPE &&
+           VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+           "Table isn't big enough!");
+    return (LegalizeAction)LoadExtActions[VT.getSimpleVT().SimpleTy][ExtType];
+  }
+
+  /// isLoadExtLegal - Return true if the specified load with extension is legal
+  /// on this target.
+  bool isLoadExtLegal(unsigned ExtType, EVT VT) const {
+    return VT.isSimple() &&
+      (getLoadExtAction(ExtType, VT) == Legal ||
+       getLoadExtAction(ExtType, VT) == Custom);
+  }
+
+  /// getTruncStoreAction - Return how this store with truncation should be
+  /// treated: either it is legal, needs to be promoted to a larger size, needs
+  /// to be expanded to some other code sequence, or the target has a custom
+  /// expander for it.
+  LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const {
+    assert(ValVT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+           MemVT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+           "Table isn't big enough!");
+    return (LegalizeAction)TruncStoreActions[ValVT.getSimpleVT().SimpleTy]
+                                            [MemVT.getSimpleVT().SimpleTy];
+  }
+
+  /// isTruncStoreLegal - Return true if the specified store with truncation is
+  /// legal on this target.
+  bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const {
+    return isTypeLegal(ValVT) && MemVT.isSimple() &&
+      (getTruncStoreAction(ValVT, MemVT) == Legal ||
+       getTruncStoreAction(ValVT, MemVT) == Custom);
+  }
+
+  /// getIndexedLoadAction - Return how the indexed load should be treated:
+  /// either it is legal, needs to be promoted to a larger size, needs to be
+  /// expanded to some other code sequence, or the target has a custom expander
+  /// for it.
+  LegalizeAction
+  getIndexedLoadAction(unsigned IdxMode, EVT VT) const {
+    assert(IdxMode < ISD::LAST_INDEXED_MODE &&
+           VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+           "Table isn't big enough!");
+    unsigned Ty = (unsigned)VT.getSimpleVT().SimpleTy;
+    return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4);
+  }
+
+  /// isIndexedLoadLegal - Return true if the specified indexed load is legal
+  /// on this target.
+  bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const {
+    return VT.isSimple() &&
+      (getIndexedLoadAction(IdxMode, VT) == Legal ||
+       getIndexedLoadAction(IdxMode, VT) == Custom);
+  }
+
+  /// getIndexedStoreAction - Return how the indexed store should be treated:
+  /// either it is legal, needs to be promoted to a larger size, needs to be
+  /// expanded to some other code sequence, or the target has a custom expander
+  /// for it.
+  LegalizeAction
+  getIndexedStoreAction(unsigned IdxMode, EVT VT) const {
+    assert(IdxMode < ISD::LAST_INDEXED_MODE &&
+           VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+           "Table isn't big enough!");
+    unsigned Ty = (unsigned)VT.getSimpleVT().SimpleTy;
+    return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f);
+  }
+
+  /// isIndexedStoreLegal - Return true if the specified indexed load is legal
+  /// on this target.
+  bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const {
+    return VT.isSimple() &&
+      (getIndexedStoreAction(IdxMode, VT) == Legal ||
+       getIndexedStoreAction(IdxMode, VT) == Custom);
+  }
+
+  /// getCondCodeAction - Return how the condition code should be treated:
+  /// either it is legal, needs to be expanded to some other code sequence,
+  /// or the target has a custom expander for it.
+  LegalizeAction
+  getCondCodeAction(ISD::CondCode CC, EVT VT) const {
+    assert((unsigned)CC < array_lengthof(CondCodeActions) &&
+           (unsigned)VT.getSimpleVT().SimpleTy < sizeof(CondCodeActions[0])*4 &&
+           "Table isn't big enough!");
+    LegalizeAction Action = (LegalizeAction)
+      ((CondCodeActions[CC] >> (2*VT.getSimpleVT().SimpleTy)) & 3);
+    assert(Action != Promote && "Can't promote condition code!");
+    return Action;
+  }
+
+  /// isCondCodeLegal - Return true if the specified condition code is legal
+  /// on this target.
+  bool isCondCodeLegal(ISD::CondCode CC, EVT VT) const {
+    return getCondCodeAction(CC, VT) == Legal ||
+           getCondCodeAction(CC, VT) == Custom;
+  }
+
+
+  /// getTypeToPromoteTo - If the action for this operation is to promote, this
+  /// method returns the ValueType to promote to.
+  EVT getTypeToPromoteTo(unsigned Op, EVT VT) const {
+    assert(getOperationAction(Op, VT) == Promote &&
+           "This operation isn't promoted!");
+
+    // See if this has an explicit type specified.
+    std::map<std::pair<unsigned, MVT::SimpleValueType>,
+             MVT::SimpleValueType>::const_iterator PTTI =
+      PromoteToType.find(std::make_pair(Op, VT.getSimpleVT().SimpleTy));
+    if (PTTI != PromoteToType.end()) return PTTI->second;
+
+    assert((VT.isInteger() || VT.isFloatingPoint()) &&
+           "Cannot autopromote this type, add it with AddPromotedToType.");
+
+    EVT NVT = VT;
+    do {
+      NVT = (MVT::SimpleValueType)(NVT.getSimpleVT().SimpleTy+1);
+      assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid &&
+             "Didn't find type to promote to!");
+    } while (!isTypeLegal(NVT) ||
+              getOperationAction(Op, NVT) == Promote);
+    return NVT;
+  }
+
+  /// getValueType - Return the EVT corresponding to this LLVM type.
+  /// This is fixed by the LLVM operations except for the pointer size.  If
+  /// AllowUnknown is true, this will return MVT::Other for types with no EVT
+  /// counterpart (e.g. structs), otherwise it will assert.
+  EVT getValueType(const Type *Ty, bool AllowUnknown = false) const {
+    EVT VT = EVT::getEVT(Ty, AllowUnknown);
+    return VT == MVT::iPTR ? PointerTy : VT;
+  }
+
+  /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+  /// function arguments in the caller parameter area.  This is the actual
+  /// alignment, not its logarithm.
+  virtual unsigned getByValTypeAlignment(const Type *Ty) const;
+
+  /// getRegisterType - Return the type of registers that this ValueType will
+  /// eventually require.
+  EVT getRegisterType(MVT VT) const {
+    assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT));
+    return RegisterTypeForVT[VT.SimpleTy];
+  }
+
+  /// getRegisterType - Return the type of registers that this ValueType will
+  /// eventually require.
+  EVT getRegisterType(LLVMContext &Context, EVT VT) const {
+    if (VT.isSimple()) {
+      assert((unsigned)VT.getSimpleVT().SimpleTy <
+                array_lengthof(RegisterTypeForVT));
+      return RegisterTypeForVT[VT.getSimpleVT().SimpleTy];
+    }
+    if (VT.isVector()) {
+      EVT VT1, RegisterVT;
+      unsigned NumIntermediates;
+      (void)getVectorTypeBreakdown(Context, VT, VT1,
+                                   NumIntermediates, RegisterVT);
+      return RegisterVT;
+    }
+    if (VT.isInteger()) {
+      return getRegisterType(Context, getTypeToTransformTo(Context, VT));
+    }
+    assert(0 && "Unsupported extended type!");
+    return EVT(MVT::Other); // Not reached
+  }
+
+  /// getNumRegisters - Return the number of registers that this ValueType will
+  /// eventually require.  This is one for any types promoted to live in larger
+  /// registers, but may be more than one for types (like i64) that are split
+  /// into pieces.  For types like i140, which are first promoted then expanded,
+  /// it is the number of registers needed to hold all the bits of the original
+  /// type.  For an i140 on a 32 bit machine this means 5 registers.
+  unsigned getNumRegisters(LLVMContext &Context, EVT VT) const {
+    if (VT.isSimple()) {
+      assert((unsigned)VT.getSimpleVT().SimpleTy <
+                array_lengthof(NumRegistersForVT));
+      return NumRegistersForVT[VT.getSimpleVT().SimpleTy];
+    }
+    if (VT.isVector()) {
+      EVT VT1, VT2;
+      unsigned NumIntermediates;
+      return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2);
+    }
+    if (VT.isInteger()) {
+      unsigned BitWidth = VT.getSizeInBits();
+      unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits();
+      return (BitWidth + RegWidth - 1) / RegWidth;
+    }
+    assert(0 && "Unsupported extended type!");
+    return 0; // Not reached
+  }
+
+  /// ShouldShrinkFPConstant - If true, then instruction selection should
+  /// seek to shrink the FP constant of the specified type to a smaller type
+  /// in order to save space and / or reduce runtime.
+  virtual bool ShouldShrinkFPConstant(EVT VT) const { return true; }
+
+  /// hasTargetDAGCombine - If true, the target has custom DAG combine
+  /// transformations that it can perform for the specified node.
+  bool hasTargetDAGCombine(ISD::NodeType NT) const {
+    assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray));
+    return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7));
+  }
+
+  /// This function returns the maximum number of store operations permitted
+  /// to replace a call to llvm.memset. The value is set by the target at the
+  /// performance threshold for such a replacement. If OptSize is true,
+  /// return the limit for functions that have OptSize attribute.
+  /// @brief Get maximum # of store operations permitted for llvm.memset
+  unsigned getMaxStoresPerMemset(bool OptSize) const {
+    return OptSize ? maxStoresPerMemsetOptSize : maxStoresPerMemset;
+  }
+
+  /// This function returns the maximum number of store operations permitted
+  /// to replace a call to llvm.memcpy. The value is set by the target at the
+  /// performance threshold for such a replacement. If OptSize is true,
+  /// return the limit for functions that have OptSize attribute.
+  /// @brief Get maximum # of store operations permitted for llvm.memcpy
+  unsigned getMaxStoresPerMemcpy(bool OptSize) const {
+    return OptSize ? maxStoresPerMemcpyOptSize : maxStoresPerMemcpy;
+  }
+
+  /// This function returns the maximum number of store operations permitted
+  /// to replace a call to llvm.memmove. The value is set by the target at the
+  /// performance threshold for such a replacement. If OptSize is true,
+  /// return the limit for functions that have OptSize attribute.
+  /// @brief Get maximum # of store operations permitted for llvm.memmove
+  unsigned getMaxStoresPerMemmove(bool OptSize) const {
+    return OptSize ? maxStoresPerMemmoveOptSize : maxStoresPerMemmove;
+  }
+
+  /// This function returns true if the target allows unaligned memory accesses.
+  /// of the specified type. This is used, for example, in situations where an
+  /// array copy/move/set is  converted to a sequence of store operations. It's
+  /// use helps to ensure that such replacements don't generate code that causes
+  /// an alignment error  (trap) on the target machine.
+  /// @brief Determine if the target supports unaligned memory accesses.
+  virtual bool allowsUnalignedMemoryAccesses(EVT VT) const {
+    return false;
+  }
+
+  /// This function returns true if the target would benefit from code placement
+  /// optimization.
+  /// @brief Determine if the target should perform code placement optimization.
+  bool shouldOptimizeCodePlacement() const {
+    return benefitFromCodePlacementOpt;
+  }
+
+  /// getOptimalMemOpType - Returns the target specific optimal type for load
+  /// and store operations as a result of memset, memcpy, and memmove
+  /// lowering. If DstAlign is zero that means it's safe to destination
+  /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+  /// means there isn't a need to check it against alignment requirement,
+  /// probably because the source does not need to be loaded. If
+  /// 'NonScalarIntSafe' is true, that means it's safe to return a
+  /// non-scalar-integer type, e.g. empty string source, constant, or loaded
+  /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+  /// constant so it does not need to be loaded.
+  /// It returns EVT::Other if the type should be determined using generic
+  /// target-independent logic.
+  virtual EVT getOptimalMemOpType(uint64_t Size,
+                                  unsigned DstAlign, unsigned SrcAlign,
+                                  bool NonScalarIntSafe, bool MemcpyStrSrc,
+                                  MachineFunction &MF) const {
+    return MVT::Other;
+  }
+
+  /// usesUnderscoreSetJmp - Determine if we should use _setjmp or setjmp
+  /// to implement llvm.setjmp.
+  bool usesUnderscoreSetJmp() const {
+    return UseUnderscoreSetJmp;
+  }
+
+  /// usesUnderscoreLongJmp - Determine if we should use _longjmp or longjmp
+  /// to implement llvm.longjmp.
+  bool usesUnderscoreLongJmp() const {
+    return UseUnderscoreLongJmp;
+  }
+
+  /// getStackPointerRegisterToSaveRestore - If a physical register, this
+  /// specifies the register that llvm.savestack/llvm.restorestack should save
+  /// and restore.
+  unsigned getStackPointerRegisterToSaveRestore() const {
+    return StackPointerRegisterToSaveRestore;
+  }
+
+  /// getExceptionAddressRegister - If a physical register, this returns
+  /// the register that receives the exception address on entry to a landing
+  /// pad.
+  unsigned getExceptionAddressRegister() const {
+    return ExceptionPointerRegister;
+  }
+
+  /// getExceptionSelectorRegister - If a physical register, this returns
+  /// the register that receives the exception typeid on entry to a landing
+  /// pad.
+  unsigned getExceptionSelectorRegister() const {
+    return ExceptionSelectorRegister;
+  }
+
+  /// getJumpBufSize - returns the target's jmp_buf size in bytes (if never
+  /// set, the default is 200)
+  unsigned getJumpBufSize() const {
+    return JumpBufSize;
+  }
+
+  /// getJumpBufAlignment - returns the target's jmp_buf alignment in bytes
+  /// (if never set, the default is 0)
+  unsigned getJumpBufAlignment() const {
+    return JumpBufAlignment;
+  }
+
+  /// getMinStackArgumentAlignment - return the minimum stack alignment of an
+  /// argument.
+  unsigned getMinStackArgumentAlignment() const {
+    return MinStackArgumentAlignment;
+  }
+
+  /// getPrefLoopAlignment - return the preferred loop alignment.
+  ///
+  unsigned getPrefLoopAlignment() const {
+    return PrefLoopAlignment;
+  }
+
+  /// getShouldFoldAtomicFences - return whether the combiner should fold
+  /// fence MEMBARRIER instructions into the atomic intrinsic instructions.
+  ///
+  bool getShouldFoldAtomicFences() const {
+    return ShouldFoldAtomicFences;
+  }
+
+  /// getPreIndexedAddressParts - returns true by value, base pointer and
+  /// offset pointer and addressing mode by reference if the node's address
+  /// can be legally represented as pre-indexed load / store address.
+  virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+                                         SDValue &Offset,
+                                         ISD::MemIndexedMode &AM,
+                                         SelectionDAG &DAG) const {
+    return false;
+  }
+
+  /// getPostIndexedAddressParts - returns true by value, base pointer and
+  /// offset pointer and addressing mode by reference if this node can be
+  /// combined with a load / store to form a post-indexed load / store.
+  virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                          SDValue &Base, SDValue &Offset,
+                                          ISD::MemIndexedMode &AM,
+                                          SelectionDAG &DAG) const {
+    return false;
+  }
+
+  /// getJumpTableEncoding - Return the entry encoding for a jump table in the
+  /// current function.  The returned value is a member of the
+  /// MachineJumpTableInfo::JTEntryKind enum.
+  virtual unsigned getJumpTableEncoding() const;
+
+  virtual const MCExpr *
+  LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                            const MachineBasicBlock *MBB, unsigned uid,
+                            MCContext &Ctx) const {
+    assert(0 && "Need to implement this hook if target has custom JTIs");
+    return 0;
+  }
+
+  /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+  /// jumptable.
+  virtual SDValue getPICJumpTableRelocBase(SDValue Table,
+                                           SelectionDAG &DAG) const;
+
+  /// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
+  /// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
+  /// MCExpr.
+  virtual const MCExpr *
+  getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+                               unsigned JTI, MCContext &Ctx) const;
+
+  /// isOffsetFoldingLegal - Return true if folding a constant offset
+  /// with the given GlobalAddress is legal.  It is frequently not legal in
+  /// PIC relocation models.
+  virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+  /// getFunctionAlignment - Return the Log2 alignment of this function.
+  virtual unsigned getFunctionAlignment(const Function *) const = 0;
+
+  /// getStackCookieLocation - Return true if the target stores stack
+  /// protector cookies at a fixed offset in some non-standard address
+  /// space, and populates the address space and offset as
+  /// appropriate.
+  virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const {
+    return false;
+  }
+
+  /// getMaximalGlobalOffset - Returns the maximal possible offset which can be
+  /// used for loads / stores from the global.
+  virtual unsigned getMaximalGlobalOffset() const {
+    return 0;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // TargetLowering Optimization Methods
+  //
+
+  /// TargetLoweringOpt - A convenience struct that encapsulates a DAG, and two
+  /// SDValues for returning information from TargetLowering to its clients
+  /// that want to combine
+  struct TargetLoweringOpt {
+    SelectionDAG &DAG;
+    bool LegalTys;
+    bool LegalOps;
+    SDValue Old;
+    SDValue New;
+
+    explicit TargetLoweringOpt(SelectionDAG &InDAG,
+                               bool LT, bool LO) :
+      DAG(InDAG), LegalTys(LT), LegalOps(LO) {}
+
+    bool LegalTypes() const { return LegalTys; }
+    bool LegalOperations() const { return LegalOps; }
+
+    bool CombineTo(SDValue O, SDValue N) {
+      Old = O;
+      New = N;
+      return true;
+    }
+
+    /// ShrinkDemandedConstant - Check to see if the specified operand of the
+    /// specified instruction is a constant integer.  If so, check to see if
+    /// there are any bits set in the constant that are not demanded.  If so,
+    /// shrink the constant and return true.
+    bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded);
+
+    /// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
+    /// casts are free.  This uses isZExtFree and ZERO_EXTEND for the widening
+    /// cast, but it could be generalized for targets with other types of
+    /// implicit widening casts.
+    bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded,
+                          DebugLoc dl);
+  };
+
+  /// SimplifyDemandedBits - Look at Op.  At this point, we know that only the
+  /// DemandedMask bits of the result of Op are ever used downstream.  If we can
+  /// use this information to simplify Op, create a new simplified DAG node and
+  /// return true, returning the original and new nodes in Old and New.
+  /// Otherwise, analyze the expression and return a mask of KnownOne and
+  /// KnownZero bits for the expression (used to simplify the caller).
+  /// The KnownZero/One bits may only be accurate for those bits in the
+  /// DemandedMask.
+  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
+                            APInt &KnownZero, APInt &KnownOne,
+                            TargetLoweringOpt &TLO, unsigned Depth = 0) const;
+
+  /// computeMaskedBitsForTargetNode - Determine which of the bits specified in
+  /// Mask are known to be either zero or one and return them in the
+  /// KnownZero/KnownOne bitsets.
+  virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+                                              const APInt &Mask,
+                                              APInt &KnownZero,
+                                              APInt &KnownOne,
+                                              const SelectionDAG &DAG,
+                                              unsigned Depth = 0) const;
+
+  /// ComputeNumSignBitsForTargetNode - This method can be implemented by
+  /// targets that want to expose additional information about sign bits to the
+  /// DAG Combiner.
+  virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                   unsigned Depth = 0) const;
+
+  struct DAGCombinerInfo {
+    void *DC;  // The DAG Combiner object.
+    bool BeforeLegalize;
+    bool BeforeLegalizeOps;
+    bool CalledByLegalizer;
+  public:
+    SelectionDAG &DAG;
+
+    DAGCombinerInfo(SelectionDAG &dag, bool bl, bool blo, bool cl, void *dc)
+      : DC(dc), BeforeLegalize(bl), BeforeLegalizeOps(blo),
+        CalledByLegalizer(cl), DAG(dag) {}
+
+    bool isBeforeLegalize() const { return BeforeLegalize; }
+    bool isBeforeLegalizeOps() const { return BeforeLegalizeOps; }
+    bool isCalledByLegalizer() const { return CalledByLegalizer; }
+
+    void AddToWorklist(SDNode *N);
+    SDValue CombineTo(SDNode *N, const std::vector<SDValue> &To,
+                      bool AddTo = true);
+    SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true);
+    SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true);
+
+    void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO);
+  };
+
+  /// SimplifySetCC - Try to simplify a setcc built with the specified operands
+  /// and cc. If it is unable to simplify it, return a null SDValue.
+  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+                          ISD::CondCode Cond, bool foldBooleans,
+                          DAGCombinerInfo &DCI, DebugLoc dl) const;
+
+  /// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+  /// node is a GlobalAddress + offset.
+  virtual bool
+  isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
+
+  /// PerformDAGCombine - This method will be invoked for all target nodes and
+  /// for any target-independent nodes that the target has registered with
+  /// invoke it for.
+  ///
+  /// The semantics are as follows:
+  /// Return Value:
+  ///   SDValue.Val == 0   - No change was made
+  ///   SDValue.Val == N   - N was replaced, is dead, and is already handled.
+  ///   otherwise          - N should be replaced by the returned Operand.
+  ///
+  /// In addition, methods provided by DAGCombinerInfo may be used to perform
+  /// more complex transformations.
+  ///
+  virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+  /// isTypeDesirableForOp - Return true if the target has native support for
+  /// the specified value type and it is 'desirable' to use the type for the
+  /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+  /// instruction encodings are longer and some i16 instructions are slow.
+  virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const {
+    // By default, assume all legal types are desirable.
+    return isTypeLegal(VT);
+  }
+
+  /// isDesirableToPromoteOp - Return true if it is profitable for dag combiner
+  /// to transform a floating point op of specified opcode to a equivalent op of
+  /// an integer type. e.g. f32 load -> i32 load can be profitable on ARM.
+  virtual bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const {
+    return false;
+  }
+
+  /// IsDesirableToPromoteOp - This method query the target whether it is
+  /// beneficial for dag combiner to promote the specified node. If true, it
+  /// should return the desired promotion type by reference.
+  virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
+    return false;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // TargetLowering Configuration Methods - These methods should be invoked by
+  // the derived class constructor to configure this object for the target.
+  //
+
+protected:
+  /// setBooleanContents - Specify how the target extends the result of a
+  /// boolean value from i1 to a wider type.  See getBooleanContents.
+  void setBooleanContents(BooleanContent Ty) { BooleanContents = Ty; }
+
+  /// setSchedulingPreference - Specify the target scheduling preference.
+  void setSchedulingPreference(Sched::Preference Pref) {
+    SchedPreferenceInfo = Pref;
+  }
+
+  /// setUseUnderscoreSetJmp - Indicate whether this target prefers to
+  /// use _setjmp to implement llvm.setjmp or the non _ version.
+  /// Defaults to false.
+  void setUseUnderscoreSetJmp(bool Val) {
+    UseUnderscoreSetJmp = Val;
+  }
+
+  /// setUseUnderscoreLongJmp - Indicate whether this target prefers to
+  /// use _longjmp to implement llvm.longjmp or the non _ version.
+  /// Defaults to false.
+  void setUseUnderscoreLongJmp(bool Val) {
+    UseUnderscoreLongJmp = Val;
+  }
+
+  /// setStackPointerRegisterToSaveRestore - If set to a physical register, this
+  /// specifies the register that llvm.savestack/llvm.restorestack should save
+  /// and restore.
+  void setStackPointerRegisterToSaveRestore(unsigned R) {
+    StackPointerRegisterToSaveRestore = R;
+  }
+
+  /// setExceptionPointerRegister - If set to a physical register, this sets
+  /// the register that receives the exception address on entry to a landing
+  /// pad.
+  void setExceptionPointerRegister(unsigned R) {
+    ExceptionPointerRegister = R;
+  }
+
+  /// setExceptionSelectorRegister - If set to a physical register, this sets
+  /// the register that receives the exception typeid on entry to a landing
+  /// pad.
+  void setExceptionSelectorRegister(unsigned R) {
+    ExceptionSelectorRegister = R;
+  }
+
+  /// SelectIsExpensive - Tells the code generator not to expand operations
+  /// into sequences that use the select operations if possible.
+  void setSelectIsExpensive(bool isExpensive = true) {
+    SelectIsExpensive = isExpensive;
+  }
+
+  /// JumpIsExpensive - Tells the code generator not to expand sequence of
+  /// operations into a seperate sequences that increases the amount of
+  /// flow control.
+  void setJumpIsExpensive(bool isExpensive = true) {
+    JumpIsExpensive = isExpensive;
+  }
+
+  /// setIntDivIsCheap - Tells the code generator that integer divide is
+  /// expensive, and if possible, should be replaced by an alternate sequence
+  /// of instructions not containing an integer divide.
+  void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; }
+
+  /// setPow2DivIsCheap - Tells the code generator that it shouldn't generate
+  /// srl/add/sra for a signed divide by power of two, and let the target handle
+  /// it.
+  void setPow2DivIsCheap(bool isCheap = true) { Pow2DivIsCheap = isCheap; }
+
+  /// addRegisterClass - Add the specified register class as an available
+  /// regclass for the specified value type.  This indicates the selector can
+  /// handle values of that class natively.
+  void addRegisterClass(EVT VT, TargetRegisterClass *RC) {
+    assert((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT));
+    AvailableRegClasses.push_back(std::make_pair(VT, RC));
+    RegClassForVT[VT.getSimpleVT().SimpleTy] = RC;
+  }
+
+  /// findRepresentativeClass - Return the largest legal super-reg register class
+  /// of the register class for the specified type and its associated "cost".
+  virtual std::pair<const TargetRegisterClass*, uint8_t>
+  findRepresentativeClass(EVT VT) const;
+
+  /// computeRegisterProperties - Once all of the register classes are added,
+  /// this allows us to compute derived properties we expose.
+  void computeRegisterProperties();
+
+  /// setOperationAction - Indicate that the specified operation does not work
+  /// with the specified type and indicate what to do about it.
+  void setOperationAction(unsigned Op, MVT VT,
+                          LegalizeAction Action) {
+    assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!");
+    OpActions[(unsigned)VT.SimpleTy][Op] = (uint8_t)Action;
+  }
+
+  /// setLoadExtAction - Indicate that the specified load with extension does
+  /// not work with the specified type and indicate what to do about it.
+  void setLoadExtAction(unsigned ExtType, MVT VT,
+                        LegalizeAction Action) {
+    assert(ExtType < ISD::LAST_LOADEXT_TYPE && VT < MVT::LAST_VALUETYPE &&
+           "Table isn't big enough!");
+    LoadExtActions[VT.SimpleTy][ExtType] = (uint8_t)Action;
+  }
+
+  /// setTruncStoreAction - Indicate that the specified truncating store does
+  /// not work with the specified type and indicate what to do about it.
+  void setTruncStoreAction(MVT ValVT, MVT MemVT,
+                           LegalizeAction Action) {
+    assert(ValVT < MVT::LAST_VALUETYPE && MemVT < MVT::LAST_VALUETYPE &&
+           "Table isn't big enough!");
+    TruncStoreActions[ValVT.SimpleTy][MemVT.SimpleTy] = (uint8_t)Action;
+  }
+
+  /// setIndexedLoadAction - Indicate that the specified indexed load does or
+  /// does not work with the specified type and indicate what to do abort
+  /// it. NOTE: All indexed mode loads are initialized to Expand in
+  /// TargetLowering.cpp
+  void setIndexedLoadAction(unsigned IdxMode, MVT VT,
+                            LegalizeAction Action) {
+    assert(VT < MVT::LAST_VALUETYPE && IdxMode < ISD::LAST_INDEXED_MODE &&
+           (unsigned)Action < 0xf && "Table isn't big enough!");
+    // Load action are kept in the upper half.
+    IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0;
+    IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4;
+  }
+
+  /// setIndexedStoreAction - Indicate that the specified indexed store does or
+  /// does not work with the specified type and indicate what to do about
+  /// it. NOTE: All indexed mode stores are initialized to Expand in
+  /// TargetLowering.cpp
+  void setIndexedStoreAction(unsigned IdxMode, MVT VT,
+                             LegalizeAction Action) {
+    assert(VT < MVT::LAST_VALUETYPE && IdxMode < ISD::LAST_INDEXED_MODE &&
+           (unsigned)Action < 0xf && "Table isn't big enough!");
+    // Store action are kept in the lower half.
+    IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f;
+    IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action);
+  }
+
+  /// setCondCodeAction - Indicate that the specified condition code is or isn't
+  /// supported on the target and indicate what to do about it.
+  void setCondCodeAction(ISD::CondCode CC, MVT VT,
+                         LegalizeAction Action) {
+    assert(VT < MVT::LAST_VALUETYPE &&
+           (unsigned)CC < array_lengthof(CondCodeActions) &&
+           "Table isn't big enough!");
+    CondCodeActions[(unsigned)CC] &= ~(uint64_t(3UL)  << VT.SimpleTy*2);
+    CondCodeActions[(unsigned)CC] |= (uint64_t)Action << VT.SimpleTy*2;
+  }
+
+  /// AddPromotedToType - If Opc/OrigVT is specified as being promoted, the
+  /// promotion code defaults to trying a larger integer/fp until it can find
+  /// one that works.  If that default is insufficient, this method can be used
+  /// by the target to override the default.
+  void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
+    PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy;
+  }
+
+  /// setTargetDAGCombine - Targets should invoke this method for each target
+  /// independent node that they want to provide a custom DAG combiner for by
+  /// implementing the PerformDAGCombine virtual method.
+  void setTargetDAGCombine(ISD::NodeType NT) {
+    assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray));
+    TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7);
+  }
+
+  /// setJumpBufSize - Set the target's required jmp_buf buffer size (in
+  /// bytes); default is 200
+  void setJumpBufSize(unsigned Size) {
+    JumpBufSize = Size;
+  }
+
+  /// setJumpBufAlignment - Set the target's required jmp_buf buffer
+  /// alignment (in bytes); default is 0
+  void setJumpBufAlignment(unsigned Align) {
+    JumpBufAlignment = Align;
+  }
+
+  /// setPrefLoopAlignment - Set the target's preferred loop alignment. Default
+  /// alignment is zero, it means the target does not care about loop alignment.
+  void setPrefLoopAlignment(unsigned Align) {
+    PrefLoopAlignment = Align;
+  }
+
+  /// setMinStackArgumentAlignment - Set the minimum stack alignment of an
+  /// argument.
+  void setMinStackArgumentAlignment(unsigned Align) {
+    MinStackArgumentAlignment = Align;
+  }
+
+  /// setShouldFoldAtomicFences - Set if the target's implementation of the
+  /// atomic operation intrinsics includes locking. Default is false.
+  void setShouldFoldAtomicFences(bool fold) {
+    ShouldFoldAtomicFences = fold;
+  }
+
+public:
+  //===--------------------------------------------------------------------===//
+  // Lowering methods - These methods must be implemented by targets so that
+  // the SelectionDAGLowering code knows how to lower these.
+  //
+
+  /// LowerFormalArguments - This hook must be implemented to lower the
+  /// incoming (formal) arguments, described by the Ins array, into the
+  /// specified DAG. The implementation should fill in the InVals array
+  /// with legal-type argument values, and return the resulting token
+  /// chain value.
+  ///
+  virtual SDValue
+    LowerFormalArguments(SDValue Chain,
+                         CallingConv::ID CallConv, bool isVarArg,
+                         const SmallVectorImpl<ISD::InputArg> &Ins,
+                         DebugLoc dl, SelectionDAG &DAG,
+                         SmallVectorImpl<SDValue> &InVals) const {
+    assert(0 && "Not Implemented");
+    return SDValue();    // this is here to silence compiler errors
+  }
+
+  /// LowerCallTo - This function lowers an abstract call to a function into an
+  /// actual call.  This returns a pair of operands.  The first element is the
+  /// return value for the function (if RetTy is not VoidTy).  The second
+  /// element is the outgoing token chain. It calls LowerCall to do the actual
+  /// lowering.
+  struct ArgListEntry {
+    SDValue Node;
+    const Type* Ty;
+    bool isSExt  : 1;
+    bool isZExt  : 1;
+    bool isInReg : 1;
+    bool isSRet  : 1;
+    bool isNest  : 1;
+    bool isByVal : 1;
+    uint16_t Alignment;
+
+    ArgListEntry() : isSExt(false), isZExt(false), isInReg(false),
+      isSRet(false), isNest(false), isByVal(false), Alignment(0) { }
+  };
+  typedef std::vector<ArgListEntry> ArgListTy;
+  std::pair<SDValue, SDValue>
+  LowerCallTo(SDValue Chain, const Type *RetTy, bool RetSExt, bool RetZExt,
+              bool isVarArg, bool isInreg, unsigned NumFixedArgs,
+              CallingConv::ID CallConv, bool isTailCall,
+              bool isReturnValueUsed, SDValue Callee, ArgListTy &Args,
+              SelectionDAG &DAG, DebugLoc dl) const;
+
+  /// LowerCall - This hook must be implemented to lower calls into the
+  /// the specified DAG. The outgoing arguments to the call are described
+  /// by the Outs array, and the values to be returned by the call are
+  /// described by the Ins array. The implementation should fill in the
+  /// InVals array with legal-type return values from the call, and return
+  /// the resulting token chain value.
+  virtual SDValue
+    LowerCall(SDValue Chain, SDValue Callee,
+              CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+              const SmallVectorImpl<ISD::OutputArg> &Outs,
+              const SmallVectorImpl<SDValue> &OutVals,
+              const SmallVectorImpl<ISD::InputArg> &Ins,
+              DebugLoc dl, SelectionDAG &DAG,
+              SmallVectorImpl<SDValue> &InVals) const {
+    assert(0 && "Not Implemented");
+    return SDValue();    // this is here to silence compiler errors
+  }
+
+  /// HandleByVal - Target-specific cleanup for formal ByVal parameters.
+  virtual void HandleByVal(CCState *) const {}
+
+  /// CanLowerReturn - This hook should be implemented to check whether the
+  /// return values described by the Outs array can fit into the return
+  /// registers.  If false is returned, an sret-demotion is performed.
+  ///
+  virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+               const SmallVectorImpl<ISD::OutputArg> &Outs,
+               LLVMContext &Context) const
+  {
+    // Return true by default to get preexisting behavior.
+    return true;
+  }
+
+  /// LowerReturn - This hook must be implemented to lower outgoing
+  /// return values, described by the Outs array, into the specified
+  /// DAG. The implementation should return the resulting token chain
+  /// value.
+  ///
+  virtual SDValue
+    LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                DebugLoc dl, SelectionDAG &DAG) const {
+    assert(0 && "Not Implemented");
+    return SDValue();    // this is here to silence compiler errors
+  }
+
+  /// isUsedByReturnOnly - Return true if result of the specified node is used
+  /// by a return node only. This is used to determine whether it is possible
+  /// to codegen a libcall as tail call at legalization time.
+  virtual bool isUsedByReturnOnly(SDNode *N) const {
+    return false;
+  }
+
+  /// mayBeEmittedAsTailCall - Return true if the target may be able emit the
+  /// call instruction as a tail call. This is used by optimization passes to
+  /// determine if it's profitable to duplicate return instructions to enable
+  /// tailcall optimization.
+  virtual bool mayBeEmittedAsTailCall(CallInst *CI) const {
+    return false;
+  }
+
+  /// LowerOperationWrapper - This callback is invoked by the type legalizer
+  /// to legalize nodes with an illegal operand type but legal result types.
+  /// It replaces the LowerOperation callback in the type Legalizer.
+  /// The reason we can not do away with LowerOperation entirely is that
+  /// LegalizeDAG isn't yet ready to use this callback.
+  /// TODO: Consider merging with ReplaceNodeResults.
+
+  /// The target places new result values for the node in Results (their number
+  /// and types must exactly match those of the original return values of
+  /// the node), or leaves Results empty, which indicates that the node is not
+  /// to be custom lowered after all.
+  /// The default implementation calls LowerOperation.
+  virtual void LowerOperationWrapper(SDNode *N,
+                                     SmallVectorImpl<SDValue> &Results,
+                                     SelectionDAG &DAG) const;
+
+  /// LowerOperation - This callback is invoked for operations that are
+  /// unsupported by the target, which are registered to use 'custom' lowering,
+  /// and whose defined values are all legal.
+  /// If the target has no operations that require custom lowering, it need not
+  /// implement this.  The default implementation of this aborts.
+  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+  /// ReplaceNodeResults - This callback is invoked when a node result type is
+  /// illegal for the target, and the operation was registered to use 'custom'
+  /// lowering for that result type.  The target places new result values for
+  /// the node in Results (their number and types must exactly match those of
+  /// the original return values of the node), or leaves Results empty, which
+  /// indicates that the node is not to be custom lowered after all.
+  ///
+  /// If the target has no operations that require custom lowering, it need not
+  /// implement this.  The default implementation aborts.
+  virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                                  SelectionDAG &DAG) const {
+    assert(0 && "ReplaceNodeResults not implemented for this target!");
+  }
+
+  /// getTargetNodeName() - This method returns the name of a target specific
+  /// DAG node.
+  virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+  /// createFastISel - This method returns a target specific FastISel object,
+  /// or null if the target does not support "fast" ISel.
+  virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const {
+    return 0;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Inline Asm Support hooks
+  //
+
+  /// ExpandInlineAsm - This hook allows the target to expand an inline asm
+  /// call to be explicit llvm code if it wants to.  This is useful for
+  /// turning simple inline asms into LLVM intrinsics, which gives the
+  /// compiler more information about the behavior of the code.
+  virtual bool ExpandInlineAsm(CallInst *CI) const {
+    return false;
+  }
+
+  enum ConstraintType {
+    C_Register,            // Constraint represents specific register(s).
+    C_RegisterClass,       // Constraint represents any of register(s) in class.
+    C_Memory,              // Memory constraint.
+    C_Other,               // Something else.
+    C_Unknown              // Unsupported constraint.
+  };
+
+  enum ConstraintWeight {
+    // Generic weights.
+    CW_Invalid  = -1,     // No match.
+    CW_Okay     = 0,      // Acceptable.
+    CW_Good     = 1,      // Good weight.
+    CW_Better   = 2,      // Better weight.
+    CW_Best     = 3,      // Best weight.
+
+    // Well-known weights.
+    CW_SpecificReg  = CW_Okay,    // Specific register operands.
+    CW_Register     = CW_Good,    // Register operands.
+    CW_Memory       = CW_Better,  // Memory operands.
+    CW_Constant     = CW_Best,    // Constant operand.
+    CW_Default      = CW_Okay     // Default or don't know type.
+  };
+
+  /// AsmOperandInfo - This contains information for each constraint that we are
+  /// lowering.
+  struct AsmOperandInfo : public InlineAsm::ConstraintInfo {
+    /// ConstraintCode - This contains the actual string for the code, like "m".
+    /// TargetLowering picks the 'best' code from ConstraintInfo::Codes that
+    /// most closely matches the operand.
+    std::string ConstraintCode;
+
+    /// ConstraintType - Information about the constraint code, e.g. Register,
+    /// RegisterClass, Memory, Other, Unknown.
+    TargetLowering::ConstraintType ConstraintType;
+
+    /// CallOperandval - If this is the result output operand or a
+    /// clobber, this is null, otherwise it is the incoming operand to the
+    /// CallInst.  This gets modified as the asm is processed.
+    Value *CallOperandVal;
+
+    /// ConstraintVT - The ValueType for the operand value.
+    EVT ConstraintVT;
+
+    /// isMatchingInputConstraint - Return true of this is an input operand that
+    /// is a matching constraint like "4".
+    bool isMatchingInputConstraint() const;
+
+    /// getMatchedOperand - If this is an input matching constraint, this method
+    /// returns the output operand it matches.
+    unsigned getMatchedOperand() const;
+
+    /// Copy constructor for copying from an AsmOperandInfo.
+    AsmOperandInfo(const AsmOperandInfo &info)
+      : InlineAsm::ConstraintInfo(info),
+        ConstraintCode(info.ConstraintCode),
+        ConstraintType(info.ConstraintType),
+        CallOperandVal(info.CallOperandVal),
+        ConstraintVT(info.ConstraintVT) {
+    }
+
+    /// Copy constructor for copying from a ConstraintInfo.
+    AsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+      : InlineAsm::ConstraintInfo(info),
+        ConstraintType(TargetLowering::C_Unknown),
+        CallOperandVal(0), ConstraintVT(MVT::Other) {
+    }
+  };
+
+  typedef std::vector<AsmOperandInfo> AsmOperandInfoVector;
+
+  /// ParseConstraints - Split up the constraint string from the inline
+  /// assembly value into the specific constraints and their prefixes,
+  /// and also tie in the associated operand values.
+  /// If this returns an empty vector, and if the constraint string itself
+  /// isn't empty, there was an error parsing.
+  virtual AsmOperandInfoVector ParseConstraints(ImmutableCallSite CS) const;
+
+  /// Examine constraint type and operand type and determine a weight value.
+  /// The operand object must already have been set up with the operand type.
+  virtual ConstraintWeight getMultipleConstraintMatchWeight(
+      AsmOperandInfo &info, int maIndex) const;
+
+  /// Examine constraint string and operand type and determine a weight value.
+  /// The operand object must already have been set up with the operand type.
+  virtual ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+  /// ComputeConstraintToUse - Determines the constraint code and constraint
+  /// type to use for the specific AsmOperandInfo, setting
+  /// OpInfo.ConstraintCode and OpInfo.ConstraintType.  If the actual operand
+  /// being passed in is available, it can be passed in as Op, otherwise an
+  /// empty SDValue can be passed.
+  virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo,
+                                      SDValue Op,
+                                      SelectionDAG *DAG = 0) const;
+
+  /// getConstraintType - Given a constraint, return the type of constraint it
+  /// is for this target.
+  virtual ConstraintType getConstraintType(const std::string &Constraint) const;
+
+  /// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
+  /// return a list of registers that can be used to satisfy the constraint.
+  /// This should only be used for C_RegisterClass constraints.
+  virtual std::vector<unsigned>
+  getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                    EVT VT) const;
+
+  /// getRegForInlineAsmConstraint - Given a physical register constraint (e.g.
+  /// {edx}), return the register number and the register class for the
+  /// register.
+  ///
+  /// Given a register class constraint, like 'r', if this corresponds directly
+  /// to an LLVM register class, return a register of 0 and the register class
+  /// pointer.
+  ///
+  /// This should only be used for C_Register constraints.  On error,
+  /// this returns a register number of 0 and a null register class pointer..
+  virtual std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint,
+                                 EVT VT) const;
+
+  /// LowerXConstraint - try to replace an X constraint, which matches anything,
+  /// with another that has more specific requirements based on the type of the
+  /// corresponding operand.  This returns null if there is no replacement to
+  /// make.
+  virtual const char *LowerXConstraint(EVT ConstraintVT) const;
+
+  /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+  /// vector.  If it is invalid, don't add anything to Ops.
+  virtual void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter,
+                                            std::vector<SDValue> &Ops,
+                                            SelectionDAG &DAG) const;
+
+  //===--------------------------------------------------------------------===//
+  // Instruction Emitting Hooks
+  //
+
+  // EmitInstrWithCustomInserter - This method should be implemented by targets
+  // that mark instructions with the 'usesCustomInserter' flag.  These
+  // instructions are special in various ways, which require special support to
+  // insert.  The specified MachineInstr is created but not inserted into any
+  // basic blocks, and this method is called to expand it into a sequence of
+  // instructions, potentially also creating new basic blocks and control flow.
+  virtual MachineBasicBlock *
+    EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+  //===--------------------------------------------------------------------===//
+  // Addressing mode description hooks (used by LSR etc).
+  //
+
+  /// AddrMode - This represents an addressing mode of:
+  ///    BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
+  /// If BaseGV is null,  there is no BaseGV.
+  /// If BaseOffs is zero, there is no base offset.
+  /// If HasBaseReg is false, there is no base register.
+  /// If Scale is zero, there is no ScaleReg.  Scale of 1 indicates a reg with
+  /// no scale.
+  ///
+  struct AddrMode {
+    GlobalValue *BaseGV;
+    int64_t      BaseOffs;
+    bool         HasBaseReg;
+    int64_t      Scale;
+    AddrMode() : BaseGV(0), BaseOffs(0), HasBaseReg(false), Scale(0) {}
+  };
+
+  /// isLegalAddressingMode - Return true if the addressing mode represented by
+  /// AM is legal for this target, for a load/store of the specified type.
+  /// The type may be VoidTy, in which case only return true if the addressing
+  /// mode is legal for a load/store of any legal type.
+  /// TODO: Handle pre/postinc as well.
+  virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const;
+
+  /// isTruncateFree - Return true if it's free to truncate a value of
+  /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+  /// register EAX to i16 by referencing its sub-register AX.
+  virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+    return false;
+  }
+
+  virtual bool isTruncateFree(EVT VT1, EVT VT2) const {
+    return false;
+  }
+
+  /// isZExtFree - Return true if any actual instruction that defines a
+  /// value of type Ty1 implicitly zero-extends the value to Ty2 in the result
+  /// register. This does not necessarily include registers defined in
+  /// unknown ways, such as incoming arguments, or copies from unknown
+  /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
+  /// does not necessarily apply to truncate instructions. e.g. on x86-64,
+  /// all instructions that define 32-bit values implicit zero-extend the
+  /// result out to 64 bits.
+  virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const {
+    return false;
+  }
+
+  virtual bool isZExtFree(EVT VT1, EVT VT2) const {
+    return false;
+  }
+
+  /// isNarrowingProfitable - Return true if it's profitable to narrow
+  /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
+  /// from i32 to i8 but not from i32 to i16.
+  virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const {
+    return false;
+  }
+
+  /// isLegalICmpImmediate - Return true if the specified immediate is legal
+  /// icmp immediate, that is the target has icmp instructions which can compare
+  /// a register against the immediate without having to materialize the
+  /// immediate into a register.
+  virtual bool isLegalICmpImmediate(int64_t Imm) const {
+    return true;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Div utility functions
+  //
+  SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG,
+                      std::vector<SDNode*>* Created) const;
+  SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG,
+                      std::vector<SDNode*>* Created) const;
+
+
+  //===--------------------------------------------------------------------===//
+  // Runtime Library hooks
+  //
+
+  /// setLibcallName - Rename the default libcall routine name for the specified
+  /// libcall.
+  void setLibcallName(RTLIB::Libcall Call, const char *Name) {
+    LibcallRoutineNames[Call] = Name;
+  }
+
+  /// getLibcallName - Get the libcall routine name for the specified libcall.
+  ///
+  const char *getLibcallName(RTLIB::Libcall Call) const {
+    return LibcallRoutineNames[Call];
+  }
+
+  /// setCmpLibcallCC - Override the default CondCode to be used to test the
+  /// result of the comparison libcall against zero.
+  void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) {
+    CmpLibcallCCs[Call] = CC;
+  }
+
+  /// getCmpLibcallCC - Get the CondCode that's to be used to test the result of
+  /// the comparison libcall against zero.
+  ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const {
+    return CmpLibcallCCs[Call];
+  }
+
+  /// setLibcallCallingConv - Set the CallingConv that should be used for the
+  /// specified libcall.
+  void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) {
+    LibcallCallingConvs[Call] = CC;
+  }
+
+  /// getLibcallCallingConv - Get the CallingConv that should be used for the
+  /// specified libcall.
+  CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const {
+    return LibcallCallingConvs[Call];
+  }
+
+private:
+  const TargetMachine &TM;
+  const TargetData *TD;
+  const TargetLoweringObjectFile &TLOF;
+
+  /// PointerTy - The type to use for pointers, usually i32 or i64.
+  ///
+  MVT PointerTy;
+
+  /// IsLittleEndian - True if this is a little endian target.
+  ///
+  bool IsLittleEndian;
+
+  /// SelectIsExpensive - Tells the code generator not to expand operations
+  /// into sequences that use the select operations if possible.
+  bool SelectIsExpensive;
+
+  /// IntDivIsCheap - Tells the code generator not to expand integer divides by
+  /// constants into a sequence of muls, adds, and shifts.  This is a hack until
+  /// a real cost model is in place.  If we ever optimize for size, this will be
+  /// set to true unconditionally.
+  bool IntDivIsCheap;
+
+  /// Pow2DivIsCheap - Tells the code generator that it shouldn't generate
+  /// srl/add/sra for a signed divide by power of two, and let the target handle
+  /// it.
+  bool Pow2DivIsCheap;
+
+  /// JumpIsExpensive - Tells the code generator that it shouldn't generate
+  /// extra flow control instructions and should attempt to combine flow
+  /// control instructions via predication.
+  bool JumpIsExpensive;
+
+  /// UseUnderscoreSetJmp - This target prefers to use _setjmp to implement
+  /// llvm.setjmp.  Defaults to false.
+  bool UseUnderscoreSetJmp;
+
+  /// UseUnderscoreLongJmp - This target prefers to use _longjmp to implement
+  /// llvm.longjmp.  Defaults to false.
+  bool UseUnderscoreLongJmp;
+
+  /// BooleanContents - Information about the contents of the high-bits in
+  /// boolean values held in a type wider than i1.  See getBooleanContents.
+  BooleanContent BooleanContents;
+
+  /// SchedPreferenceInfo - The target scheduling preference: shortest possible
+  /// total cycles or lowest register usage.
+  Sched::Preference SchedPreferenceInfo;
+
+  /// JumpBufSize - The size, in bytes, of the target's jmp_buf buffers
+  unsigned JumpBufSize;
+
+  /// JumpBufAlignment - The alignment, in bytes, of the target's jmp_buf
+  /// buffers
+  unsigned JumpBufAlignment;
+
+  /// MinStackArgumentAlignment - The minimum alignment that any argument
+  /// on the stack needs to have.
+  ///
+  unsigned MinStackArgumentAlignment;
+
+  /// PrefLoopAlignment - The perferred loop alignment.
+  ///
+  unsigned PrefLoopAlignment;
+
+  /// ShouldFoldAtomicFences - Whether fencing MEMBARRIER instructions should
+  /// be folded into the enclosed atomic intrinsic instruction by the
+  /// combiner.
+  bool ShouldFoldAtomicFences;
+
+  /// StackPointerRegisterToSaveRestore - If set to a physical register, this
+  /// specifies the register that llvm.savestack/llvm.restorestack should save
+  /// and restore.
+  unsigned StackPointerRegisterToSaveRestore;
+
+  /// ExceptionPointerRegister - If set to a physical register, this specifies
+  /// the register that receives the exception address on entry to a landing
+  /// pad.
+  unsigned ExceptionPointerRegister;
+
+  /// ExceptionSelectorRegister - If set to a physical register, this specifies
+  /// the register that receives the exception typeid on entry to a landing
+  /// pad.
+  unsigned ExceptionSelectorRegister;
+
+  /// RegClassForVT - This indicates the default register class to use for
+  /// each ValueType the target supports natively.
+  TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE];
+  unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE];
+  EVT RegisterTypeForVT[MVT::LAST_VALUETYPE];
+
+  /// RepRegClassForVT - This indicates the "representative" register class to
+  /// use for each ValueType the target supports natively. This information is
+  /// used by the scheduler to track register pressure. By default, the
+  /// representative register class is the largest legal super-reg register
+  /// class of the register class of the specified type. e.g. On x86, i8, i16,
+  /// and i32's representative class would be GR32.
+  const TargetRegisterClass *RepRegClassForVT[MVT::LAST_VALUETYPE];
+
+  /// RepRegClassCostForVT - This indicates the "cost" of the "representative"
+  /// register class for each ValueType. The cost is used by the scheduler to
+  /// approximate register pressure.
+  uint8_t RepRegClassCostForVT[MVT::LAST_VALUETYPE];
+
+  /// TransformToType - For any value types we are promoting or expanding, this
+  /// contains the value type that we are changing to.  For Expanded types, this
+  /// contains one step of the expand (e.g. i64 -> i32), even if there are
+  /// multiple steps required (e.g. i64 -> i16).  For types natively supported
+  /// by the system, this holds the same type (e.g. i32 -> i32).
+  EVT TransformToType[MVT::LAST_VALUETYPE];
+
+  /// OpActions - For each operation and each value type, keep a LegalizeAction
+  /// that indicates how instruction selection should deal with the operation.
+  /// Most operations are Legal (aka, supported natively by the target), but
+  /// operations that are not should be described.  Note that operations on
+  /// non-legal value types are not described here.
+  uint8_t OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END];
+
+  /// LoadExtActions - For each load extension type and each value type,
+  /// keep a LegalizeAction that indicates how instruction selection should deal
+  /// with a load of a specific value type and extension type.
+  uint8_t LoadExtActions[MVT::LAST_VALUETYPE][ISD::LAST_LOADEXT_TYPE];
+
+  /// TruncStoreActions - For each value type pair keep a LegalizeAction that
+  /// indicates whether a truncating store of a specific value type and
+  /// truncating type is legal.
+  uint8_t TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE];
+
+  /// IndexedModeActions - For each indexed mode and each value type,
+  /// keep a pair of LegalizeAction that indicates how instruction
+  /// selection should deal with the load / store.  The first dimension is the
+  /// value_type for the reference. The second dimension represents the various
+  /// modes for load store.
+  uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE];
+
+  /// CondCodeActions - For each condition code (ISD::CondCode) keep a
+  /// LegalizeAction that indicates how instruction selection should
+  /// deal with the condition code.
+  uint64_t CondCodeActions[ISD::SETCC_INVALID];
+
+  ValueTypeActionImpl ValueTypeActions;
+
+  std::vector<std::pair<EVT, TargetRegisterClass*> > AvailableRegClasses;
+
+  /// TargetDAGCombineArray - Targets can specify ISD nodes that they would
+  /// like PerformDAGCombine callbacks for by calling setTargetDAGCombine(),
+  /// which sets a bit in this array.
+  unsigned char
+  TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT];
+
+  /// PromoteToType - For operations that must be promoted to a specific type,
+  /// this holds the destination type.  This map should be sparse, so don't hold
+  /// it as an array.
+  ///
+  /// Targets add entries to this map with AddPromotedToType(..), clients access
+  /// this with getTypeToPromoteTo(..).
+  std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType>
+    PromoteToType;
+
+  /// LibcallRoutineNames - Stores the name each libcall.
+  ///
+  const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL];
+
+  /// CmpLibcallCCs - The ISD::CondCode that should be used to test the result
+  /// of each of the comparison libcall against zero.
+  ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL];
+
+  /// LibcallCallingConvs - Stores the CallingConv that should be used for each
+  /// libcall.
+  CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL];
+
+protected:
+  /// When lowering \@llvm.memset this field specifies the maximum number of
+  /// store operations that may be substituted for the call to memset. Targets
+  /// must set this value based on the cost threshold for that target. Targets
+  /// should assume that the memset will be done using as many of the largest
+  /// store operations first, followed by smaller ones, if necessary, per
+  /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine
+  /// with 16-bit alignment would result in four 2-byte stores and one 1-byte
+  /// store.  This only applies to setting a constant array of a constant size.
+  /// @brief Specify maximum number of store instructions per memset call.
+  unsigned maxStoresPerMemset;
+
+  /// Maximum number of stores operations that may be substituted for the call
+  /// to memset, used for functions with OptSize attribute.
+  unsigned maxStoresPerMemsetOptSize;
+
+  /// When lowering \@llvm.memcpy this field specifies the maximum number of
+  /// store operations that may be substituted for a call to memcpy. Targets
+  /// must set this value based on the cost threshold for that target. Targets
+  /// should assume that the memcpy will be done using as many of the largest
+  /// store operations first, followed by smaller ones, if necessary, per
+  /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine
+  /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store
+  /// and one 1-byte store. This only applies to copying a constant array of
+  /// constant size.
+  /// @brief Specify maximum bytes of store instructions per memcpy call.
+  unsigned maxStoresPerMemcpy;
+
+  /// Maximum number of store operations that may be substituted for a call
+  /// to memcpy, used for functions with OptSize attribute.
+  unsigned maxStoresPerMemcpyOptSize;
+
+  /// When lowering \@llvm.memmove this field specifies the maximum number of
+  /// store instructions that may be substituted for a call to memmove. Targets
+  /// must set this value based on the cost threshold for that target. Targets
+  /// should assume that the memmove will be done using as many of the largest
+  /// store operations first, followed by smaller ones, if necessary, per
+  /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine
+  /// with 8-bit alignment would result in nine 1-byte stores.  This only
+  /// applies to copying a constant array of constant size.
+  /// @brief Specify maximum bytes of store instructions per memmove call.
+  unsigned maxStoresPerMemmove;
+
+  /// Maximum number of store instructions that may be substituted for a call
+  /// to memmove, used for functions with OpSize attribute.
+  unsigned maxStoresPerMemmoveOptSize;
+
+  /// This field specifies whether the target can benefit from code placement
+  /// optimization.
+  bool benefitFromCodePlacementOpt;
+
+private:
+  /// isLegalRC - Return true if the value types that can be represented by the
+  /// specified register class are all legal.
+  bool isLegalRC(const TargetRegisterClass *RC) const;
+
+  /// hasLegalSuperRegRegClasses - Return true if the specified register class
+  /// has one or more super-reg register classes that are legal.
+  bool hasLegalSuperRegRegClasses(const TargetRegisterClass *RC) const;
+};
+
+/// GetReturnInfo - Given an LLVM IR type and return type attributes,
+/// compute the return value EVTs and flags, and optionally also
+/// the offsets, if the return value is being lowered to memory.
+void GetReturnInfo(const Type* ReturnType, Attributes attr,
+                   SmallVectorImpl<ISD::OutputArg> &Outs,
+                   const TargetLowering &TLI,
+                   SmallVectorImpl<uint64_t> *Offsets = 0);
+
+} // end llvm namespace
+
+#endif
diff --git a/final/include/llvm/Target/TargetLoweringObjectFile.h b/final/include/llvm/Target/TargetLoweringObjectFile.h
new file mode 100644
index 00000000000..34bf27132de
--- /dev/null
+++ b/final/include/llvm/Target/TargetLoweringObjectFile.h
@@ -0,0 +1,240 @@
+//===-- llvm/Target/TargetLoweringObjectFile.h - Object Info ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H
+#define LLVM_TARGET_TARGETLOWERINGOBJECTFILE_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/SectionKind.h"
+
+namespace llvm {
+  class MachineModuleInfo;
+  class Mangler;
+  class MCAsmInfo;
+  class MCContext;
+  class MCExpr;
+  class MCSection;
+  class MCSectionMachO;
+  class MCSymbol;
+  class MCStreamer;
+  class GlobalValue;
+  class TargetMachine;
+  
+class TargetLoweringObjectFile {
+  MCContext *Ctx;
+  
+  TargetLoweringObjectFile(const TargetLoweringObjectFile&); // DO NOT IMPLEMENT
+  void operator=(const TargetLoweringObjectFile&);           // DO NOT IMPLEMENT
+protected:
+  
+  TargetLoweringObjectFile();
+  
+  /// TextSection - Section directive for standard text.
+  ///
+  const MCSection *TextSection;
+  
+  /// DataSection - Section directive for standard data.
+  ///
+  const MCSection *DataSection;
+  
+  /// BSSSection - Section that is default initialized to zero.
+  const MCSection *BSSSection;
+  
+  /// ReadOnlySection - Section that is readonly and can contain arbitrary
+  /// initialized data.  Targets are not required to have a readonly section.
+  /// If they don't, various bits of code will fall back to using the data
+  /// section for constants.
+  const MCSection *ReadOnlySection;
+  
+  /// StaticCtorSection - This section contains the static constructor pointer
+  /// list.
+  const MCSection *StaticCtorSection;
+
+  /// StaticDtorSection - This section contains the static destructor pointer
+  /// list.
+  const MCSection *StaticDtorSection;
+  
+  /// LSDASection - If exception handling is supported by the target, this is
+  /// the section the Language Specific Data Area information is emitted to.
+  const MCSection *LSDASection;
+  
+  // Dwarf sections for debug info.  If a target supports debug info, these must
+  // be set.
+  const MCSection *DwarfAbbrevSection;
+  const MCSection *DwarfInfoSection;
+  const MCSection *DwarfLineSection;
+  const MCSection *DwarfFrameSection;
+  const MCSection *DwarfPubNamesSection;
+  const MCSection *DwarfPubTypesSection;
+  const MCSection *DwarfDebugInlineSection;
+  const MCSection *DwarfStrSection;
+  const MCSection *DwarfLocSection;
+  const MCSection *DwarfARangesSection;
+  const MCSection *DwarfRangesSection;
+  const MCSection *DwarfMacroInfoSection;
+  
+  // Extra TLS Variable Data section.  If the target needs to put additional
+  // information for a TLS variable, it'll go here.
+  const MCSection *TLSExtraDataSection;
+  
+  /// CommDirectiveSupportsAlignment - True if .comm supports alignment.  This
+  /// is a hack for as long as we support 10.4 Tiger, whose assembler doesn't
+  /// support alignment on comm.
+  bool CommDirectiveSupportsAlignment;
+  
+  /// SupportsWeakEmptyEHFrame - True if target object file supports a
+  /// weak_definition of constant 0 for an omitted EH frame.
+  bool SupportsWeakOmittedEHFrame;
+  
+  /// IsFunctionEHSymbolGlobal - This flag is set to true if the ".eh" symbol
+  /// for a function should be marked .globl.
+  bool IsFunctionEHSymbolGlobal;
+  
+  /// IsFunctionEHFrameSymbolPrivate - This flag is set to true if the
+  /// "EH_frame" symbol for EH information should be an assembler temporary (aka
+  /// private linkage, aka an L or .L label) or false if it should be a normal
+  /// non-.globl label.  This defaults to true.
+  bool IsFunctionEHFrameSymbolPrivate;
+public:
+  
+  MCContext &getContext() const { return *Ctx; }
+  
+  virtual ~TargetLoweringObjectFile();
+  
+  /// Initialize - this method must be called before any actual lowering is
+  /// done.  This specifies the current context for codegen, and gives the
+  /// lowering implementations a chance to set up their default sections.
+  virtual void Initialize(MCContext &ctx, const TargetMachine &TM) {
+    Ctx = &ctx;
+  }
+  
+  bool isFunctionEHSymbolGlobal() const {
+    return IsFunctionEHSymbolGlobal;
+  }
+  bool isFunctionEHFrameSymbolPrivate() const {
+    return IsFunctionEHFrameSymbolPrivate;
+  }
+  bool getSupportsWeakOmittedEHFrame() const {
+    return SupportsWeakOmittedEHFrame;
+  }
+  
+  bool getCommDirectiveSupportsAlignment() const {
+    return CommDirectiveSupportsAlignment;
+  }
+
+  const MCSection *getTextSection() const { return TextSection; }
+  const MCSection *getDataSection() const { return DataSection; }
+  const MCSection *getBSSSection() const { return BSSSection; }
+  const MCSection *getStaticCtorSection() const { return StaticCtorSection; }
+  const MCSection *getStaticDtorSection() const { return StaticDtorSection; }
+  const MCSection *getLSDASection() const { return LSDASection; }
+  virtual const MCSection *getEHFrameSection() const = 0;
+  const MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; }
+  const MCSection *getDwarfInfoSection() const { return DwarfInfoSection; }
+  const MCSection *getDwarfLineSection() const { return DwarfLineSection; }
+  const MCSection *getDwarfFrameSection() const { return DwarfFrameSection; }
+  const MCSection *getDwarfPubNamesSection() const{return DwarfPubNamesSection;}
+  const MCSection *getDwarfPubTypesSection() const{return DwarfPubTypesSection;}
+  const MCSection *getDwarfDebugInlineSection() const {
+    return DwarfDebugInlineSection;
+  }
+  const MCSection *getDwarfStrSection() const { return DwarfStrSection; }
+  const MCSection *getDwarfLocSection() const { return DwarfLocSection; }
+  const MCSection *getDwarfARangesSection() const { return DwarfARangesSection;}
+  const MCSection *getDwarfRangesSection() const { return DwarfRangesSection; }
+  const MCSection *getDwarfMacroInfoSection() const {
+    return DwarfMacroInfoSection;
+  }
+  const MCSection *getTLSExtraDataSection() const {
+    return TLSExtraDataSection;
+  }
+  
+  /// shouldEmitUsedDirectiveFor - This hook allows targets to selectively
+  /// decide not to emit the UsedDirective for some symbols in llvm.used.
+  /// FIXME: REMOVE this (rdar://7071300)
+  virtual bool shouldEmitUsedDirectiveFor(const GlobalValue *GV,
+                                          Mangler *) const {
+    return GV != 0;
+  }
+  
+  /// getSectionForConstant - Given a constant with the SectionKind, return a
+  /// section that it should be placed in.
+  virtual const MCSection *getSectionForConstant(SectionKind Kind) const;
+  
+  /// getKindForGlobal - Classify the specified global variable into a set of
+  /// target independent categories embodied in SectionKind.
+  static SectionKind getKindForGlobal(const GlobalValue *GV,
+                                      const TargetMachine &TM);
+  
+  /// SectionForGlobal - This method computes the appropriate section to emit
+  /// the specified global variable or function definition.  This should not
+  /// be passed external (or available externally) globals.
+  const MCSection *SectionForGlobal(const GlobalValue *GV,
+                                    SectionKind Kind, Mangler *Mang,
+                                    const TargetMachine &TM) const;
+  
+  /// SectionForGlobal - This method computes the appropriate section to emit
+  /// the specified global variable or function definition.  This should not
+  /// be passed external (or available externally) globals.
+  const MCSection *SectionForGlobal(const GlobalValue *GV,
+                                    Mangler *Mang,
+                                    const TargetMachine &TM) const {
+    return SectionForGlobal(GV, getKindForGlobal(GV, TM), Mang, TM);
+  }
+  
+  
+  
+  /// getExplicitSectionGlobal - Targets should implement this method to assign
+  /// a section to globals with an explicit section specfied.  The
+  /// implementation of this method can assume that GV->hasSection() is true.
+  virtual const MCSection *
+  getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, 
+                           Mangler *Mang, const TargetMachine &TM) const = 0;
+  
+  /// getSpecialCasedSectionGlobals - Allow the target to completely override
+  /// section assignment of a global.
+  virtual const MCSection *
+  getSpecialCasedSectionGlobals(const GlobalValue *GV, Mangler *Mang,
+                                SectionKind Kind) const {
+    return 0;
+  }
+  
+  /// getExprForDwarfGlobalReference - Return an MCExpr to use for a reference
+  /// to the specified global variable from exception handling information.
+  ///
+  virtual const MCExpr *
+  getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                                 MachineModuleInfo *MMI, unsigned Encoding,
+                                 MCStreamer &Streamer) const;
+
+  /// 
+  const MCExpr *
+  getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang,
+                           MachineModuleInfo *MMI, unsigned Encoding,
+                           MCStreamer &Streamer) const;
+  
+  virtual unsigned getPersonalityEncoding() const;
+  virtual unsigned getLSDAEncoding() const;
+  virtual unsigned getFDEEncoding() const;
+  virtual unsigned getTTypeEncoding() const;
+
+protected:
+  virtual const MCSection *
+  SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Target/TargetMachine.h b/final/include/llvm/Target/TargetMachine.h
new file mode 100644
index 00000000000..030bf5b89f7
--- /dev/null
+++ b/final/include/llvm/Target/TargetMachine.h
@@ -0,0 +1,390 @@
+//===-- llvm/Target/TargetMachine.h - Target Information --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TargetMachine and LLVMTargetMachine classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETMACHINE_H
+#define LLVM_TARGET_TARGETMACHINE_H
+
+#include "llvm/Target/TargetInstrItineraries.h"
+#include <cassert>
+#include <string>
+
+namespace llvm {
+
+class Target;
+class MCAsmInfo;
+class TargetData;
+class TargetSubtarget;
+class TargetInstrInfo;
+class TargetIntrinsicInfo;
+class TargetJITInfo;
+class TargetLowering;
+class TargetSelectionDAGInfo;
+class TargetFrameLowering;
+class JITCodeEmitter;
+class MCContext;
+class TargetRegisterInfo;
+class PassManagerBase;
+class PassManager;
+class Pass;
+class TargetELFWriterInfo;
+class formatted_raw_ostream;
+
+// Relocation model types.
+namespace Reloc {
+  enum Model {
+    Default,
+    Static,
+    PIC_,         // Cannot be named PIC due to collision with -DPIC
+    DynamicNoPIC
+  };
+}
+
+// Code model types.
+namespace CodeModel {
+  enum Model {
+    Default,
+    Small,
+    Kernel,
+    Medium,
+    Large
+  };
+}
+
+// Code generation optimization level.
+namespace CodeGenOpt {
+  enum Level {
+    None,        // -O0
+    Less,        // -O1
+    Default,     // -O2, -Os
+    Aggressive   // -O3
+  };
+}
+
+namespace Sched {
+  enum Preference {
+    None,             // No preference
+    Latency,          // Scheduling for shortest total latency.
+    RegPressure,      // Scheduling for lowest register pressure.
+    Hybrid,           // Scheduling for both latency and register pressure.
+    ILP               // Scheduling for ILP in low register pressure mode.
+  };
+}
+
+//===----------------------------------------------------------------------===//
+///
+/// TargetMachine - Primary interface to the complete machine description for
+/// the target machine.  All target-specific information should be accessible
+/// through this interface.
+///
+class TargetMachine {
+  TargetMachine(const TargetMachine &);   // DO NOT IMPLEMENT
+  void operator=(const TargetMachine &);  // DO NOT IMPLEMENT
+protected: // Can only create subclasses.
+  TargetMachine(const Target &);
+
+  /// getSubtargetImpl - virtual method implemented by subclasses that returns
+  /// a reference to that target's TargetSubtarget-derived member variable.
+  virtual const TargetSubtarget *getSubtargetImpl() const { return 0; }
+
+  /// TheTarget - The Target that this machine was created for.
+  const Target &TheTarget;
+
+  /// AsmInfo - Contains target specific asm information.
+  ///
+  const MCAsmInfo *AsmInfo;
+
+  unsigned MCRelaxAll : 1;
+  unsigned MCNoExecStack : 1;
+  unsigned MCUseLoc : 1;
+
+public:
+  virtual ~TargetMachine();
+
+  const Target &getTarget() const { return TheTarget; }
+
+  // Interfaces to the major aspects of target machine information:
+  // -- Instruction opcode and operand information
+  // -- Pipelines and scheduling information
+  // -- Stack frame information
+  // -- Selection DAG lowering information
+  //
+  virtual const TargetInstrInfo         *getInstrInfo() const { return 0; }
+  virtual const TargetFrameLowering *getFrameLowering() const { return 0; }
+  virtual const TargetLowering    *getTargetLowering() const { return 0; }
+  virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const{ return 0; }
+  virtual const TargetData             *getTargetData() const { return 0; }
+
+  /// getMCAsmInfo - Return target specific asm information.
+  ///
+  const MCAsmInfo *getMCAsmInfo() const { return AsmInfo; }
+
+  /// getSubtarget - This method returns a pointer to the specified type of
+  /// TargetSubtarget.  In debug builds, it verifies that the object being
+  /// returned is of the correct type.
+  template<typename STC> const STC &getSubtarget() const {
+    return *static_cast<const STC*>(getSubtargetImpl());
+  }
+
+  /// getRegisterInfo - If register information is available, return it.  If
+  /// not, return null.  This is kept separate from RegInfo until RegInfo has
+  /// details of graph coloring register allocation removed from it.
+  ///
+  virtual const TargetRegisterInfo *getRegisterInfo() const { return 0; }
+
+  /// getIntrinsicInfo - If intrinsic information is available, return it.  If
+  /// not, return null.
+  ///
+  virtual const TargetIntrinsicInfo *getIntrinsicInfo() const { return 0; }
+
+  /// getJITInfo - If this target supports a JIT, return information for it,
+  /// otherwise return null.
+  ///
+  virtual TargetJITInfo *getJITInfo() { return 0; }
+
+  /// getInstrItineraryData - Returns instruction itinerary data for the target
+  /// or specific subtarget.
+  ///
+  virtual const InstrItineraryData *getInstrItineraryData() const {
+    return 0;
+  }
+
+  /// getELFWriterInfo - If this target supports an ELF writer, return
+  /// information for it, otherwise return null.
+  ///
+  virtual const TargetELFWriterInfo *getELFWriterInfo() const { return 0; }
+
+  /// hasMCRelaxAll - Check whether all machine code instructions should be
+  /// relaxed.
+  bool hasMCRelaxAll() const { return MCRelaxAll; }
+
+  /// setMCRelaxAll - Set whether all machine code instructions should be
+  /// relaxed.
+  void setMCRelaxAll(bool Value) { MCRelaxAll = Value; }
+
+  /// hasMCNoExecStack - Check whether an executable stack is not needed.
+  bool hasMCNoExecStack() const { return MCNoExecStack; }
+
+  /// setMCNoExecStack - Set whether an executabel stack is not needed.
+  void setMCNoExecStack(bool Value) { MCNoExecStack = Value; }
+
+  /// hasMCUseLoc - Check whether we should use dwarf's .loc directive.
+  bool hasMCUseLoc() const { return MCUseLoc; }
+
+  /// setMCUseLoc - Set whether all we should use dwarf's .loc directive.
+  void setMCUseLoc(bool Value) { MCUseLoc = Value; }
+
+  /// getRelocationModel - Returns the code generation relocation model. The
+  /// choices are static, PIC, and dynamic-no-pic, and target default.
+  static Reloc::Model getRelocationModel();
+
+  /// setRelocationModel - Sets the code generation relocation model.
+  ///
+  static void setRelocationModel(Reloc::Model Model);
+
+  /// getCodeModel - Returns the code model. The choices are small, kernel,
+  /// medium, large, and target default.
+  static CodeModel::Model getCodeModel();
+
+  /// setCodeModel - Sets the code model.
+  ///
+  static void setCodeModel(CodeModel::Model Model);
+
+  /// getAsmVerbosityDefault - Returns the default value of asm verbosity.
+  ///
+  static bool getAsmVerbosityDefault();
+
+  /// setAsmVerbosityDefault - Set the default value of asm verbosity. Default
+  /// is false.
+  static void setAsmVerbosityDefault(bool);
+
+  /// getDataSections - Return true if data objects should be emitted into their
+  /// own section, corresponds to -fdata-sections.
+  static bool getDataSections();
+
+  /// getFunctionSections - Return true if functions should be emitted into
+  /// their own section, corresponding to -ffunction-sections.
+  static bool getFunctionSections();
+
+  /// setDataSections - Set if the data are emit into separate sections.
+  static void setDataSections(bool);
+
+  /// setFunctionSections - Set if the functions are emit into separate
+  /// sections.
+  static void setFunctionSections(bool);
+
+  /// CodeGenFileType - These enums are meant to be passed into
+  /// addPassesToEmitFile to indicate what type of file to emit, and returned by
+  /// it to indicate what type of file could actually be made.
+  enum CodeGenFileType {
+    CGFT_AssemblyFile,
+    CGFT_ObjectFile,
+    CGFT_Null         // Do not emit any output.
+  };
+
+  /// getEnableTailMergeDefault - the default setting for -enable-tail-merge
+  /// on this target.  User flag overrides.
+  virtual bool getEnableTailMergeDefault() const { return true; }
+
+  /// addPassesToEmitFile - Add passes to the specified pass manager to get the
+  /// specified file emitted.  Typically this will involve several steps of code
+  /// generation.  This method should return true if emission of this file type
+  /// is not supported, or false on success.
+  virtual bool addPassesToEmitFile(PassManagerBase &,
+                                   formatted_raw_ostream &,
+                                   CodeGenFileType,
+                                   CodeGenOpt::Level,
+                                   bool = true) {
+    return true;
+  }
+
+  /// addPassesToEmitMachineCode - Add passes to the specified pass manager to
+  /// get machine code emitted.  This uses a JITCodeEmitter object to handle
+  /// actually outputting the machine code and resolving things like the address
+  /// of functions.  This method returns true if machine code emission is
+  /// not supported.
+  ///
+  virtual bool addPassesToEmitMachineCode(PassManagerBase &,
+                                          JITCodeEmitter &,
+                                          CodeGenOpt::Level,
+                                          bool = true) {
+    return true;
+  }
+
+  /// addPassesToEmitMC - Add passes to the specified pass manager to get
+  /// machine code emitted with the MCJIT. This method returns true if machine
+  /// code is not supported. It fills the MCContext Ctx pointer which can be
+  /// used to build custom MCStreamer.
+  ///
+  virtual bool addPassesToEmitMC(PassManagerBase &,
+                                 MCContext *&,
+                                 CodeGenOpt::Level,
+                                 bool = true) {
+    return true;
+  }
+};
+
+/// LLVMTargetMachine - This class describes a target machine that is
+/// implemented with the LLVM target-independent code generator.
+///
+class LLVMTargetMachine : public TargetMachine {
+  std::string TargetTriple;
+
+protected: // Can only create subclasses.
+  LLVMTargetMachine(const Target &T, const std::string &TargetTriple);
+
+private:
+  /// addCommonCodeGenPasses - Add standard LLVM codegen passes used for
+  /// both emitting to assembly files or machine code output.
+  ///
+  bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
+                              bool DisableVerify, MCContext *&OutCtx);
+
+  virtual void setCodeModelForJIT();
+  virtual void setCodeModelForStatic();
+
+public:
+
+  const std::string &getTargetTriple() const { return TargetTriple; }
+
+  /// addPassesToEmitFile - Add passes to the specified pass manager to get the
+  /// specified file emitted.  Typically this will involve several steps of code
+  /// generation.  If OptLevel is None, the code generator should emit code as
+  /// fast as possible, though the generated code may be less efficient.
+  virtual bool addPassesToEmitFile(PassManagerBase &PM,
+                                   formatted_raw_ostream &Out,
+                                   CodeGenFileType FileType,
+                                   CodeGenOpt::Level,
+                                   bool DisableVerify = true);
+
+  /// addPassesToEmitMachineCode - Add passes to the specified pass manager to
+  /// get machine code emitted.  This uses a JITCodeEmitter object to handle
+  /// actually outputting the machine code and resolving things like the address
+  /// of functions.  This method returns true if machine code emission is
+  /// not supported.
+  ///
+  virtual bool addPassesToEmitMachineCode(PassManagerBase &PM,
+                                          JITCodeEmitter &MCE,
+                                          CodeGenOpt::Level,
+                                          bool DisableVerify = true);
+
+  /// addPassesToEmitMC - Add passes to the specified pass manager to get
+  /// machine code emitted with the MCJIT. This method returns true if machine
+  /// code is not supported. It fills the MCContext Ctx pointer which can be
+  /// used to build custom MCStreamer.
+  ///
+  virtual bool addPassesToEmitMC(PassManagerBase &PM,
+                                 MCContext *&Ctx,
+                                 CodeGenOpt::Level OptLevel,
+                                 bool DisableVerify = true);
+
+  /// Target-Independent Code Generator Pass Configuration Options.
+
+  /// addPreISelPasses - This method should add any "last minute" LLVM->LLVM
+  /// passes (which are run just before instruction selector).
+  virtual bool addPreISel(PassManagerBase &, CodeGenOpt::Level) {
+    return true;
+  }
+
+  /// addInstSelector - This method should install an instruction selector pass,
+  /// which converts from LLVM code to machine instructions.
+  virtual bool addInstSelector(PassManagerBase &, CodeGenOpt::Level) {
+    return true;
+  }
+
+  /// addPreRegAlloc - This method may be implemented by targets that want to
+  /// run passes immediately before register allocation. This should return
+  /// true if -print-machineinstrs should print after these passes.
+  virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level) {
+    return false;
+  }
+
+  /// addPostRegAlloc - This method may be implemented by targets that want
+  /// to run passes after register allocation but before prolog-epilog
+  /// insertion.  This should return true if -print-machineinstrs should print
+  /// after these passes.
+  virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level) {
+    return false;
+  }
+
+  /// addPreSched2 - This method may be implemented by targets that want to
+  /// run passes after prolog-epilog insertion and before the second instruction
+  /// scheduling pass.  This should return true if -print-machineinstrs should
+  /// print after these passes.
+  virtual bool addPreSched2(PassManagerBase &, CodeGenOpt::Level) {
+    return false;
+  }
+
+  /// addPreEmitPass - This pass may be implemented by targets that want to run
+  /// passes immediately before machine code is emitted.  This should return
+  /// true if -print-machineinstrs should print out the code after the passes.
+  virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level) {
+    return false;
+  }
+
+
+  /// addCodeEmitter - This pass should be overridden by the target to add a
+  /// code emitter, if supported.  If this is not supported, 'true' should be
+  /// returned.
+  virtual bool addCodeEmitter(PassManagerBase &, CodeGenOpt::Level,
+                              JITCodeEmitter &) {
+    return true;
+  }
+
+  /// getEnableTailMergeDefault - the default setting for -enable-tail-merge
+  /// on this target.  User flag overrides.
+  virtual bool getEnableTailMergeDefault() const { return true; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Target/TargetOpcodes.h b/final/include/llvm/Target/TargetOpcodes.h
new file mode 100644
index 00000000000..01fba6628ef
--- /dev/null
+++ b/final/include/llvm/Target/TargetOpcodes.h
@@ -0,0 +1,86 @@
+//===-- llvm/Target/TargetOpcodes.h - Target Indep Opcodes ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the target independent instruction opcodes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETOPCODES_H
+#define LLVM_TARGET_TARGETOPCODES_H
+
+namespace llvm {
+
+/// Invariant opcodes: All instruction sets have these as their low opcodes.
+///
+/// Every instruction defined here must also appear in Target.td and the order
+/// must be the same as in CodeGenTarget.cpp.
+///
+namespace TargetOpcode {
+  enum {
+    PHI = 0,
+    INLINEASM = 1,
+    PROLOG_LABEL = 2,
+    EH_LABEL = 3,
+    GC_LABEL = 4,
+
+    /// KILL - This instruction is a noop that is used only to adjust the
+    /// liveness of registers. This can be useful when dealing with
+    /// sub-registers.
+    KILL = 5,
+
+    /// EXTRACT_SUBREG - This instruction takes two operands: a register
+    /// that has subregisters, and a subregister index. It returns the
+    /// extracted subregister value. This is commonly used to implement
+    /// truncation operations on target architectures which support it.
+    EXTRACT_SUBREG = 6,
+
+    /// INSERT_SUBREG - This instruction takes three operands: a register that
+    /// has subregisters, a register providing an insert value, and a
+    /// subregister index. It returns the value of the first register with the
+    /// value of the second register inserted. The first register is often
+    /// defined by an IMPLICIT_DEF, because it is commonly used to implement
+    /// anyext operations on target architectures which support it.
+    INSERT_SUBREG = 7,
+
+    /// IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
+    IMPLICIT_DEF = 8,
+
+    /// SUBREG_TO_REG - This instruction is similar to INSERT_SUBREG except that
+    /// the first operand is an immediate integer constant. This constant is
+    /// often zero, because it is commonly used to assert that the instruction
+    /// defining the register implicitly clears the high bits.
+    SUBREG_TO_REG = 9,
+
+    /// COPY_TO_REGCLASS - This instruction is a placeholder for a plain
+    /// register-to-register copy into a specific register class. This is only
+    /// used between instruction selection and MachineInstr creation, before
+    /// virtual registers have been created for all the instructions, and it's
+    /// only needed in cases where the register classes implied by the
+    /// instructions are insufficient. It is emitted as a COPY MachineInstr.
+    COPY_TO_REGCLASS = 10,
+
+    /// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic
+    DBG_VALUE = 11,
+
+    /// REG_SEQUENCE - This variadic instruction is used to form a register that
+    /// represent a consecutive sequence of sub-registers. It's used as register
+    /// coalescing / allocation aid and must be eliminated before code emission.
+    /// e.g. v1027 = REG_SEQUENCE v1024, 3, v1025, 4, v1026, 5
+    /// After register coalescing references of v1024 should be replace with
+    /// v1027:3, v1025 with v1027:4, etc.
+    REG_SEQUENCE = 12,
+
+    /// COPY - Target-independent register copy. This instruction can also be
+    /// used to copy between subregisters of virtual registers.
+    COPY = 13
+  };
+} // end namespace TargetOpcode
+} // end namespace llvm
+
+#endif
diff --git a/final/include/llvm/Target/TargetOptions.h b/final/include/llvm/Target/TargetOptions.h
new file mode 100644
index 00000000000..97ceffdaecb
--- /dev/null
+++ b/final/include/llvm/Target/TargetOptions.h
@@ -0,0 +1,162 @@
+//===-- llvm/Target/TargetOptions.h - Target Options ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines command line option flags that are shared across various
+// targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETOPTIONS_H
+#define LLVM_TARGET_TARGETOPTIONS_H
+
+namespace llvm {
+  class MachineFunction;
+
+  // Possible float ABI settings. Used with FloatABIType in TargetOptions.h.
+  namespace FloatABI {
+    enum ABIType {
+      Default, // Target-specific (either soft of hard depending on triple, etc).
+      Soft, // Soft float.
+      Hard  // Hard float.
+    };
+  }
+  
+  /// PrintMachineCode - This flag is enabled when the -print-machineinstrs
+  /// option is specified on the command line, and should enable debugging
+  /// output from the code generator.
+  extern bool PrintMachineCode;
+
+  /// NoFramePointerElim - This flag is enabled when the -disable-fp-elim is
+  /// specified on the command line.  If the target supports the frame pointer
+  /// elimination optimization, this option should disable it.
+  extern bool NoFramePointerElim;
+
+  /// NoFramePointerElimNonLeaf - This flag is enabled when the
+  /// -disable-non-leaf-fp-elim is specified on the command line. If the target
+  /// supports the frame pointer elimination optimization, this option should
+  /// disable it for non-leaf functions.
+  extern bool NoFramePointerElimNonLeaf;
+
+  /// DisableFramePointerElim - This returns true if frame pointer elimination
+  /// optimization should be disabled for the given machine function.
+  extern bool DisableFramePointerElim(const MachineFunction &MF);
+
+  /// LessPreciseFPMAD - This flag is enabled when the
+  /// -enable-fp-mad is specified on the command line.  When this flag is off
+  /// (the default), the code generator is not allowed to generate mad
+  /// (multiply add) if the result is "less precise" than doing those operations
+  /// individually.
+  extern bool LessPreciseFPMADOption;
+  extern bool LessPreciseFPMAD();
+
+  /// NoExcessFPPrecision - This flag is enabled when the
+  /// -disable-excess-fp-precision flag is specified on the command line.  When
+  /// this flag is off (the default), the code generator is allowed to produce
+  /// results that are "more precise" than IEEE allows.  This includes use of
+  /// FMA-like operations and use of the X86 FP registers without rounding all
+  /// over the place.
+  extern bool NoExcessFPPrecision;
+
+  /// UnsafeFPMath - This flag is enabled when the
+  /// -enable-unsafe-fp-math flag is specified on the command line.  When
+  /// this flag is off (the default), the code generator is not allowed to
+  /// produce results that are "less precise" than IEEE allows.  This includes
+  /// use of X86 instructions like FSIN and FCOS instead of libcalls.
+  /// UnsafeFPMath implies LessPreciseFPMAD.
+  extern bool UnsafeFPMath;
+
+  /// NoInfsFPMath - This flag is enabled when the
+  /// -enable-no-infs-fp-math flag is specified on the command line. When
+  /// this flag is off (the default), the code generator is not allowed to
+  /// assume the FP arithmetic arguments and results are never +-Infs.
+  extern bool NoInfsFPMath;
+
+  /// NoNaNsFPMath - This flag is enabled when the
+  /// -enable-no-nans-fp-math flag is specified on the command line. When
+  /// this flag is off (the default), the code generator is not allowed to
+  /// assume the FP arithmetic arguments and results are never NaNs.
+  extern bool NoNaNsFPMath;
+
+  /// HonorSignDependentRoundingFPMath - This returns true when the
+  /// -enable-sign-dependent-rounding-fp-math is specified.  If this returns
+  /// false (the default), the code generator is allowed to assume that the
+  /// rounding behavior is the default (round-to-zero for all floating point to
+  /// integer conversions, and round-to-nearest for all other arithmetic
+  /// truncations).  If this is enabled (set to true), the code generator must
+  /// assume that the rounding mode may dynamically change.
+  extern bool HonorSignDependentRoundingFPMathOption;
+  extern bool HonorSignDependentRoundingFPMath();
+  
+  /// UseSoftFloat - This flag is enabled when the -soft-float flag is specified
+  /// on the command line.  When this flag is on, the code generator will
+  /// generate libcalls to the software floating point library instead of
+  /// target FP instructions.
+  extern bool UseSoftFloat;
+
+  /// FloatABIType - This setting is set by -float-abi=xxx option is specfied
+  /// on the command line. This setting may either be Default, Soft, or Hard.
+  /// Default selects the target's default behavior. Soft selects the ABI for
+  /// UseSoftFloat, but does not inidcate that FP hardware may not be used.
+  /// Such a combination is unfortunately popular (e.g. arm-apple-darwin).
+  /// Hard presumes that the normal FP ABI is used.
+  extern FloatABI::ABIType FloatABIType;
+
+  /// NoZerosInBSS - By default some codegens place zero-initialized data to
+  /// .bss section. This flag disables such behaviour (necessary, e.g. for
+  /// crt*.o compiling).
+  extern bool NoZerosInBSS;
+
+  /// JITExceptionHandling - This flag indicates that the JIT should emit
+  /// exception handling information.
+  extern bool JITExceptionHandling;
+
+  /// JITEmitDebugInfo - This flag indicates that the JIT should try to emit
+  /// debug information and notify a debugger about it.
+  extern bool JITEmitDebugInfo;
+
+  /// JITEmitDebugInfoToDisk - This flag indicates that the JIT should write
+  /// the object files generated by the JITEmitDebugInfo flag to disk.  This
+  /// flag is hidden and is only for debugging the debug info.
+  extern bool JITEmitDebugInfoToDisk;
+
+  /// UnwindTablesMandatory - This flag indicates that unwind tables should
+  /// be emitted for all functions.
+  extern bool UnwindTablesMandatory;
+
+  /// GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is
+  /// specified on the commandline. When the flag is on, participating targets
+  /// will perform tail call optimization on all calls which use the fastcc
+  /// calling convention and which satisfy certain target-independent
+  /// criteria (being at the end of a function, having the same return type
+  /// as their parent function, etc.), using an alternate ABI if necessary.
+  extern bool GuaranteedTailCallOpt;
+
+  /// StackAlignment - Override default stack alignment for target.
+  extern unsigned StackAlignment;
+
+  /// RealignStack - This flag indicates whether the stack should be
+  /// automatically realigned, if needed.
+  extern bool RealignStack;
+
+  /// DisableJumpTables - This flag indicates jump tables should not be 
+  /// generated.
+  extern bool DisableJumpTables;
+
+  /// EnableFastISel - This flag enables fast-path instruction selection
+  /// which trades away generated code quality in favor of reducing
+  /// compile time.
+  extern bool EnableFastISel;
+  
+  /// StrongPHIElim - This flag enables more aggressive PHI elimination
+  /// wth earlier copy coalescing.
+  extern bool StrongPHIElim;
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Target/TargetRegisterInfo.h b/final/include/llvm/Target/TargetRegisterInfo.h
new file mode 100644
index 00000000000..1c5d7c71187
--- /dev/null
+++ b/final/include/llvm/Target/TargetRegisterInfo.h
@@ -0,0 +1,828 @@
+//=== Target/TargetRegisterInfo.h - Target Register Information -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes an abstract interface used to get information about a
+// target machines register file.  This information is used for a variety of
+// purposed, especially register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETREGISTERINFO_H
+#define LLVM_TARGET_TARGETREGISTERINFO_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/ADT/DenseSet.h"
+#include <cassert>
+#include <functional>
+
+namespace llvm {
+
+class BitVector;
+class MachineFunction;
+class MachineMove;
+class RegScavenger;
+template<class T> class SmallVectorImpl;
+class raw_ostream;
+
+/// TargetRegisterDesc - This record contains all of the information known about
+/// a particular register.  The Overlaps field contains a pointer to a zero
+/// terminated array of registers that this register aliases, starting with
+/// itself. This is needed for architectures like X86 which have AL alias AX
+/// alias EAX. The SubRegs field is a zero terminated array of registers that
+/// are sub-registers of the specific register, e.g. AL, AH are sub-registers of
+/// AX. The SuperRegs field is a zero terminated array of registers that are
+/// super-registers of the specific register, e.g. RAX, EAX, are super-registers
+/// of AX.
+///
+struct TargetRegisterDesc {
+  const char     *Name;         // Printable name for the reg (for debugging)
+  const unsigned *Overlaps;     // Overlapping registers, described above
+  const unsigned *SubRegs;      // Sub-register set, described above
+  const unsigned *SuperRegs;    // Super-register set, described above
+};
+
+class TargetRegisterClass {
+public:
+  typedef const unsigned* iterator;
+  typedef const unsigned* const_iterator;
+
+  typedef const EVT* vt_iterator;
+  typedef const TargetRegisterClass* const * sc_iterator;
+private:
+  unsigned ID;
+  const char *Name;
+  const vt_iterator VTs;
+  const sc_iterator SubClasses;
+  const sc_iterator SuperClasses;
+  const sc_iterator SubRegClasses;
+  const sc_iterator SuperRegClasses;
+  const unsigned RegSize, Alignment;    // Size & Alignment of register in bytes
+  const int CopyCost;
+  const iterator RegsBegin, RegsEnd;
+  DenseSet<unsigned> RegSet;
+public:
+  TargetRegisterClass(unsigned id,
+                      const char *name,
+                      const EVT *vts,
+                      const TargetRegisterClass * const *subcs,
+                      const TargetRegisterClass * const *supcs,
+                      const TargetRegisterClass * const *subregcs,
+                      const TargetRegisterClass * const *superregcs,
+                      unsigned RS, unsigned Al, int CC,
+                      iterator RB, iterator RE)
+    : ID(id), Name(name), VTs(vts), SubClasses(subcs), SuperClasses(supcs),
+    SubRegClasses(subregcs), SuperRegClasses(superregcs),
+    RegSize(RS), Alignment(Al), CopyCost(CC), RegsBegin(RB), RegsEnd(RE) {
+      for (iterator I = RegsBegin, E = RegsEnd; I != E; ++I)
+        RegSet.insert(*I);
+    }
+  virtual ~TargetRegisterClass() {}     // Allow subclasses
+
+  /// getID() - Return the register class ID number.
+  ///
+  unsigned getID() const { return ID; }
+
+  /// getName() - Return the register class name for debugging.
+  ///
+  const char *getName() const { return Name; }
+
+  /// begin/end - Return all of the registers in this class.
+  ///
+  iterator       begin() const { return RegsBegin; }
+  iterator         end() const { return RegsEnd; }
+
+  /// getNumRegs - Return the number of registers in this class.
+  ///
+  unsigned getNumRegs() const { return (unsigned)(RegsEnd-RegsBegin); }
+
+  /// getRegister - Return the specified register in the class.
+  ///
+  unsigned getRegister(unsigned i) const {
+    assert(i < getNumRegs() && "Register number out of range!");
+    return RegsBegin[i];
+  }
+
+  /// contains - Return true if the specified register is included in this
+  /// register class.  This does not include virtual registers.
+  bool contains(unsigned Reg) const {
+    return RegSet.count(Reg);
+  }
+
+  /// contains - Return true if both registers are in this class.
+  bool contains(unsigned Reg1, unsigned Reg2) const {
+    return contains(Reg1) && contains(Reg2);
+  }
+
+  /// hasType - return true if this TargetRegisterClass has the ValueType vt.
+  ///
+  bool hasType(EVT vt) const {
+    for(int i = 0; VTs[i] != MVT::Other; ++i)
+      if (VTs[i] == vt)
+        return true;
+    return false;
+  }
+
+  /// vt_begin / vt_end - Loop over all of the value types that can be
+  /// represented by values in this register class.
+  vt_iterator vt_begin() const {
+    return VTs;
+  }
+
+  vt_iterator vt_end() const {
+    vt_iterator I = VTs;
+    while (*I != MVT::Other) ++I;
+    return I;
+  }
+
+  /// subregclasses_begin / subregclasses_end - Loop over all of
+  /// the subreg register classes of this register class.
+  sc_iterator subregclasses_begin() const {
+    return SubRegClasses;
+  }
+
+  sc_iterator subregclasses_end() const {
+    sc_iterator I = SubRegClasses;
+    while (*I != NULL) ++I;
+    return I;
+  }
+
+  /// getSubRegisterRegClass - Return the register class of subregisters with
+  /// index SubIdx, or NULL if no such class exists.
+  const TargetRegisterClass* getSubRegisterRegClass(unsigned SubIdx) const {
+    assert(SubIdx>0 && "Invalid subregister index");
+    return SubRegClasses[SubIdx-1];
+  }
+
+  /// superregclasses_begin / superregclasses_end - Loop over all of
+  /// the superreg register classes of this register class.
+  sc_iterator superregclasses_begin() const {
+    return SuperRegClasses;
+  }
+
+  sc_iterator superregclasses_end() const {
+    sc_iterator I = SuperRegClasses;
+    while (*I != NULL) ++I;
+    return I;
+  }
+
+  /// hasSubClass - return true if the specified TargetRegisterClass
+  /// is a proper subset of this TargetRegisterClass.
+  bool hasSubClass(const TargetRegisterClass *cs) const {
+    for (int i = 0; SubClasses[i] != NULL; ++i)
+      if (SubClasses[i] == cs)
+        return true;
+    return false;
+  }
+
+  /// subclasses_begin / subclasses_end - Loop over all of the classes
+  /// that are proper subsets of this register class.
+  sc_iterator subclasses_begin() const {
+    return SubClasses;
+  }
+
+  sc_iterator subclasses_end() const {
+    sc_iterator I = SubClasses;
+    while (*I != NULL) ++I;
+    return I;
+  }
+
+  /// hasSuperClass - return true if the specified TargetRegisterClass is a
+  /// proper superset of this TargetRegisterClass.
+  bool hasSuperClass(const TargetRegisterClass *cs) const {
+    for (int i = 0; SuperClasses[i] != NULL; ++i)
+      if (SuperClasses[i] == cs)
+        return true;
+    return false;
+  }
+
+  /// superclasses_begin / superclasses_end - Loop over all of the classes
+  /// that are proper supersets of this register class.
+  sc_iterator superclasses_begin() const {
+    return SuperClasses;
+  }
+
+  sc_iterator superclasses_end() const {
+    sc_iterator I = SuperClasses;
+    while (*I != NULL) ++I;
+    return I;
+  }
+
+  /// isASubClass - return true if this TargetRegisterClass is a subset
+  /// class of at least one other TargetRegisterClass.
+  bool isASubClass() const {
+    return SuperClasses[0] != 0;
+  }
+
+  /// allocation_order_begin/end - These methods define a range of registers
+  /// which specify the registers in this class that are valid to register
+  /// allocate, and the preferred order to allocate them in.  For example,
+  /// callee saved registers should be at the end of the list, because it is
+  /// cheaper to allocate caller saved registers.
+  ///
+  /// These methods take a MachineFunction argument, which can be used to tune
+  /// the allocatable registers based on the characteristics of the function,
+  /// subtarget, or other criteria.
+  ///
+  /// Register allocators should account for the fact that an allocation
+  /// order iterator may return a reserved register and always check
+  /// if the register is allocatable (getAllocatableSet()) before using it.
+  ///
+  /// By default, these methods return all registers in the class.
+  ///
+  virtual iterator allocation_order_begin(const MachineFunction &MF) const {
+    return begin();
+  }
+  virtual iterator allocation_order_end(const MachineFunction &MF)   const {
+    return end();
+  }
+
+  /// getSize - Return the size of the register in bytes, which is also the size
+  /// of a stack slot allocated to hold a spilled copy of this register.
+  unsigned getSize() const { return RegSize; }
+
+  /// getAlignment - Return the minimum required alignment for a register of
+  /// this class.
+  unsigned getAlignment() const { return Alignment; }
+
+  /// getCopyCost - Return the cost of copying a value between two registers in
+  /// this class. A negative number means the register class is very expensive
+  /// to copy e.g. status flag register classes.
+  int getCopyCost() const { return CopyCost; }
+};
+
+
+/// TargetRegisterInfo base class - We assume that the target defines a static
+/// array of TargetRegisterDesc objects that represent all of the machine
+/// registers that the target has.  As such, we simply have to track a pointer
+/// to this array so that we can turn register number into a register
+/// descriptor.
+///
+class TargetRegisterInfo {
+protected:
+  const unsigned* SubregHash;
+  const unsigned SubregHashSize;
+  const unsigned* AliasesHash;
+  const unsigned AliasesHashSize;
+public:
+  typedef const TargetRegisterClass * const * regclass_iterator;
+private:
+  const TargetRegisterDesc *Desc;             // Pointer to the descriptor array
+  const char *const *SubRegIndexNames;        // Names of subreg indexes.
+  unsigned NumRegs;                           // Number of entries in the array
+
+  regclass_iterator RegClassBegin, RegClassEnd;   // List of regclasses
+
+  int CallFrameSetupOpcode, CallFrameDestroyOpcode;
+
+protected:
+  TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
+                     regclass_iterator RegClassBegin,
+                     regclass_iterator RegClassEnd,
+                     const char *const *subregindexnames,
+                     int CallFrameSetupOpcode = -1,
+                     int CallFrameDestroyOpcode = -1,
+                     const unsigned* subregs = 0,
+                     const unsigned subregsize = 0,
+                     const unsigned* aliases = 0,
+                     const unsigned aliasessize = 0);
+  virtual ~TargetRegisterInfo();
+public:
+
+  // Register numbers can represent physical registers, virtual registers, and
+  // sometimes stack slots. The unsigned values are divided into these ranges:
+  //
+  //   0           Not a register, can be used as a sentinel.
+  //   [1;2^30)    Physical registers assigned by TableGen.
+  //   [2^30;2^31) Stack slots. (Rarely used.)
+  //   [2^31;2^32) Virtual registers assigned by MachineRegisterInfo.
+  //
+  // Further sentinels can be allocated from the small negative integers.
+  // DenseMapInfo<unsigned> uses -1u and -2u.
+
+  /// isStackSlot - Sometimes it is useful the be able to store a non-negative
+  /// frame index in a variable that normally holds a register. isStackSlot()
+  /// returns true if Reg is in the range used for stack slots.
+  ///
+  /// Note that isVirtualRegister() and isPhysicalRegister() cannot handle stack
+  /// slots, so if a variable may contains a stack slot, always check
+  /// isStackSlot() first.
+  ///
+  static bool isStackSlot(unsigned Reg) {
+    return int(Reg) >= (1 << 30);
+  }
+
+  /// stackSlot2Index - Compute the frame index from a register value
+  /// representing a stack slot.
+  static int stackSlot2Index(unsigned Reg) {
+    assert(isStackSlot(Reg) && "Not a stack slot");
+    return int(Reg - (1u << 30));
+  }
+
+  /// index2StackSlot - Convert a non-negative frame index to a stack slot
+  /// register value.
+  static unsigned index2StackSlot(int FI) {
+    assert(FI >= 0 && "Cannot hold a negative frame index.");
+    return FI + (1u << 30);
+  }
+
+  /// isPhysicalRegister - Return true if the specified register number is in
+  /// the physical register namespace.
+  static bool isPhysicalRegister(unsigned Reg) {
+    assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
+    return int(Reg) > 0;
+  }
+
+  /// isVirtualRegister - Return true if the specified register number is in
+  /// the virtual register namespace.
+  static bool isVirtualRegister(unsigned Reg) {
+    assert(!isStackSlot(Reg) && "Not a register! Check isStackSlot() first.");
+    return int(Reg) < 0;
+  }
+
+  /// virtReg2Index - Convert a virtual register number to a 0-based index.
+  /// The first virtual register in a function will get the index 0.
+  static unsigned virtReg2Index(unsigned Reg) {
+    assert(isVirtualRegister(Reg) && "Not a virtual register");
+    return Reg - (1u << 31);
+  }
+
+  /// index2VirtReg - Convert a 0-based index to a virtual register number.
+  /// This is the inverse operation of VirtReg2IndexFunctor below.
+  static unsigned index2VirtReg(unsigned Index) {
+    return Index + (1u << 31);
+  }
+
+  /// getMinimalPhysRegClass - Returns the Register Class of a physical
+  /// register of the given type, picking the most sub register class of
+  /// the right type that contains this physreg.
+  const TargetRegisterClass *
+    getMinimalPhysRegClass(unsigned Reg, EVT VT = MVT::Other) const;
+
+  /// getAllocatableSet - Returns a bitset indexed by register number
+  /// indicating if a register is allocatable or not. If a register class is
+  /// specified, returns the subset for the class.
+  BitVector getAllocatableSet(const MachineFunction &MF,
+                              const TargetRegisterClass *RC = NULL) const;
+
+  const TargetRegisterDesc &operator[](unsigned RegNo) const {
+    assert(RegNo < NumRegs &&
+           "Attempting to access record for invalid register number!");
+    return Desc[RegNo];
+  }
+
+  /// Provide a get method, equivalent to [], but more useful if we have a
+  /// pointer to this object.
+  ///
+  const TargetRegisterDesc &get(unsigned RegNo) const {
+    return operator[](RegNo);
+  }
+
+  /// getAliasSet - Return the set of registers aliased by the specified
+  /// register, or a null list of there are none.  The list returned is zero
+  /// terminated.
+  ///
+  const unsigned *getAliasSet(unsigned RegNo) const {
+    // The Overlaps set always begins with Reg itself.
+    return get(RegNo).Overlaps + 1;
+  }
+
+  /// getOverlaps - Return a list of registers that overlap Reg, including
+  /// itself. This is the same as the alias set except Reg is included in the
+  /// list.
+  /// These are exactly the registers in { x | regsOverlap(x, Reg) }.
+  ///
+  const unsigned *getOverlaps(unsigned RegNo) const {
+    return get(RegNo).Overlaps;
+  }
+
+  /// getSubRegisters - Return the list of registers that are sub-registers of
+  /// the specified register, or a null list of there are none. The list
+  /// returned is zero terminated and sorted according to super-sub register
+  /// relations. e.g. X86::RAX's sub-register list is EAX, AX, AL, AH.
+  ///
+  const unsigned *getSubRegisters(unsigned RegNo) const {
+    return get(RegNo).SubRegs;
+  }
+
+  /// getSuperRegisters - Return the list of registers that are super-registers
+  /// of the specified register, or a null list of there are none. The list
+  /// returned is zero terminated and sorted according to super-sub register
+  /// relations. e.g. X86::AL's super-register list is RAX, EAX, AX.
+  ///
+  const unsigned *getSuperRegisters(unsigned RegNo) const {
+    return get(RegNo).SuperRegs;
+  }
+
+  /// getName - Return the human-readable symbolic target-specific name for the
+  /// specified physical register.
+  const char *getName(unsigned RegNo) const {
+    return get(RegNo).Name;
+  }
+
+  /// getNumRegs - Return the number of registers this target has (useful for
+  /// sizing arrays holding per register information)
+  unsigned getNumRegs() const {
+    return NumRegs;
+  }
+
+  /// getSubRegIndexName - Return the human-readable symbolic target-specific
+  /// name for the specified SubRegIndex.
+  const char *getSubRegIndexName(unsigned SubIdx) const {
+    assert(SubIdx && "This is not a subregister index");
+    return SubRegIndexNames[SubIdx-1];
+  }
+
+  /// regsOverlap - Returns true if the two registers are equal or alias each
+  /// other. The registers may be virtual register.
+  bool regsOverlap(unsigned regA, unsigned regB) const {
+    if (regA == regB)
+      return true;
+
+    if (isVirtualRegister(regA) || isVirtualRegister(regB))
+      return false;
+
+    // regA and regB are distinct physical registers. Do they alias?
+    size_t index = (regA + regB * 37) & (AliasesHashSize-1);
+    unsigned ProbeAmt = 0;
+    while (AliasesHash[index*2] != 0 &&
+           AliasesHash[index*2+1] != 0) {
+      if (AliasesHash[index*2] == regA && AliasesHash[index*2+1] == regB)
+        return true;
+
+      index = (index + ProbeAmt) & (AliasesHashSize-1);
+      ProbeAmt += 2;
+    }
+
+    return false;
+  }
+
+  /// isSubRegister - Returns true if regB is a sub-register of regA.
+  ///
+  bool isSubRegister(unsigned regA, unsigned regB) const {
+    // SubregHash is a simple quadratically probed hash table.
+    size_t index = (regA + regB * 37) & (SubregHashSize-1);
+    unsigned ProbeAmt = 2;
+    while (SubregHash[index*2] != 0 &&
+           SubregHash[index*2+1] != 0) {
+      if (SubregHash[index*2] == regA && SubregHash[index*2+1] == regB)
+        return true;
+
+      index = (index + ProbeAmt) & (SubregHashSize-1);
+      ProbeAmt += 2;
+    }
+
+    return false;
+  }
+
+  /// isSuperRegister - Returns true if regB is a super-register of regA.
+  ///
+  bool isSuperRegister(unsigned regA, unsigned regB) const {
+    return isSubRegister(regB, regA);
+  }
+
+  /// getCalleeSavedRegs - Return a null-terminated list of all of the
+  /// callee saved registers on this target. The register should be in the
+  /// order of desired callee-save stack frame offset. The first register is
+  /// closed to the incoming stack pointer if stack grows down, and vice versa.
+  virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF = 0)
+                                                                      const = 0;
+
+
+  /// getReservedRegs - Returns a bitset indexed by physical register number
+  /// indicating if a register is a special register that has particular uses
+  /// and should be considered unavailable at all times, e.g. SP, RA. This is
+  /// used by register scavenger to determine what registers are free.
+  virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
+
+  /// getSubReg - Returns the physical register number of sub-register "Index"
+  /// for physical register RegNo. Return zero if the sub-register does not
+  /// exist.
+  virtual unsigned getSubReg(unsigned RegNo, unsigned Index) const = 0;
+
+  /// getSubRegIndex - For a given register pair, return the sub-register index
+  /// if the second register is a sub-register of the first. Return zero
+  /// otherwise.
+  virtual unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const = 0;
+
+  /// getMatchingSuperReg - Return a super-register of the specified register
+  /// Reg so its sub-register of index SubIdx is Reg.
+  unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx,
+                               const TargetRegisterClass *RC) const {
+    for (const unsigned *SRs = getSuperRegisters(Reg); unsigned SR = *SRs;++SRs)
+      if (Reg == getSubReg(SR, SubIdx) && RC->contains(SR))
+        return SR;
+    return 0;
+  }
+
+  /// canCombineSubRegIndices - Given a register class and a list of
+  /// subregister indices, return true if it's possible to combine the
+  /// subregister indices into one that corresponds to a larger
+  /// subregister. Return the new subregister index by reference. Note the
+  /// new index may be zero if the given subregisters can be combined to
+  /// form the whole register.
+  virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC,
+                                       SmallVectorImpl<unsigned> &SubIndices,
+                                       unsigned &NewSubIdx) const {
+    return 0;
+  }
+
+  /// getMatchingSuperRegClass - Return a subclass of the specified register
+  /// class A so that each register in it has a sub-register of the
+  /// specified sub-register index which is in the specified register class B.
+  virtual const TargetRegisterClass *
+  getMatchingSuperRegClass(const TargetRegisterClass *A,
+                           const TargetRegisterClass *B, unsigned Idx) const {
+    return 0;
+  }
+
+  /// composeSubRegIndices - Return the subregister index you get from composing
+  /// two subregister indices.
+  ///
+  /// If R:a:b is the same register as R:c, then composeSubRegIndices(a, b)
+  /// returns c. Note that composeSubRegIndices does not tell you about illegal
+  /// compositions. If R does not have a subreg a, or R:a does not have a subreg
+  /// b, composeSubRegIndices doesn't tell you.
+  ///
+  /// The ARM register Q0 has two D subregs dsub_0:D0 and dsub_1:D1. It also has
+  /// ssub_0:S0 - ssub_3:S3 subregs.
+  /// If you compose subreg indices dsub_1, ssub_0 you get ssub_2.
+  ///
+  virtual unsigned composeSubRegIndices(unsigned a, unsigned b) const {
+    // This default implementation is correct for most targets.
+    return b;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Register Class Information
+  //
+
+  /// Register class iterators
+  ///
+  regclass_iterator regclass_begin() const { return RegClassBegin; }
+  regclass_iterator regclass_end() const { return RegClassEnd; }
+
+  unsigned getNumRegClasses() const {
+    return (unsigned)(regclass_end()-regclass_begin());
+  }
+
+  /// getRegClass - Returns the register class associated with the enumeration
+  /// value.  See class TargetOperandInfo.
+  const TargetRegisterClass *getRegClass(unsigned i) const {
+    assert(i < getNumRegClasses() && "Register Class ID out of range");
+    return RegClassBegin[i];
+  }
+
+  /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
+  /// values.  If a target supports multiple different pointer register classes,
+  /// kind specifies which one is indicated.
+  virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const {
+    assert(0 && "Target didn't implement getPointerRegClass!");
+    return 0; // Must return a value in order to compile with VS 2005
+  }
+
+  /// getCrossCopyRegClass - Returns a legal register class to copy a register
+  /// in the specified class to or from. Returns NULL if it is possible to copy
+  /// between a two registers of the specified class.
+  virtual const TargetRegisterClass *
+  getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+    return NULL;
+  }
+
+  /// getRegPressureLimit - Return the register pressure "high water mark" for
+  /// the specific register class. The scheduler is in high register pressure
+  /// mode (for the specific register class) if it goes over the limit.
+  virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                                       MachineFunction &MF) const {
+    return 0;
+  }
+
+  /// getAllocationOrder - Returns the register allocation order for a specified
+  /// register class in the form of a pair of TargetRegisterClass iterators.
+  virtual std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
+  getAllocationOrder(const TargetRegisterClass *RC,
+                     unsigned HintType, unsigned HintReg,
+                     const MachineFunction &MF) const {
+    return std::make_pair(RC->allocation_order_begin(MF),
+                          RC->allocation_order_end(MF));
+  }
+
+  /// ResolveRegAllocHint - Resolves the specified register allocation hint
+  /// to a physical register. Returns the physical register if it is successful.
+  virtual unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
+                                       const MachineFunction &MF) const {
+    if (Type == 0 && Reg && isPhysicalRegister(Reg))
+      return Reg;
+    return 0;
+  }
+
+  /// UpdateRegAllocHint - A callback to allow target a chance to update
+  /// register allocation hints when a register is "changed" (e.g. coalesced)
+  /// to another register. e.g. On ARM, some virtual registers should target
+  /// register pairs, if one of pair is coalesced to another register, the
+  /// allocation hint of the other half of the pair should be changed to point
+  /// to the new register.
+  virtual void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+                                  MachineFunction &MF) const {
+    // Do nothing.
+  }
+
+  /// requiresRegisterScavenging - returns true if the target requires (and can
+  /// make use of) the register scavenger.
+  virtual bool requiresRegisterScavenging(const MachineFunction &MF) const {
+    return false;
+  }
+
+  /// useFPForScavengingIndex - returns true if the target wants to use
+  /// frame pointer based accesses to spill to the scavenger emergency spill
+  /// slot.
+  virtual bool useFPForScavengingIndex(const MachineFunction &MF) const {
+    return true;
+  }
+
+  /// requiresFrameIndexScavenging - returns true if the target requires post
+  /// PEI scavenging of registers for materializing frame index constants.
+  virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const {
+    return false;
+  }
+
+  /// requiresVirtualBaseRegisters - Returns true if the target wants the
+  /// LocalStackAllocation pass to be run and virtual base registers
+  /// used for more efficient stack access.
+  virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+    return false;
+  }
+
+  /// hasReservedSpillSlot - Return true if target has reserved a spill slot in
+  /// the stack frame of the given function for the specified register. e.g. On
+  /// x86, if the frame register is required, the first fixed stack object is
+  /// reserved as its spill slot. This tells PEI not to create a new stack frame
+  /// object for the given register. It should be called only after
+  /// processFunctionBeforeCalleeSavedScan().
+  virtual bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
+                                    int &FrameIdx) const {
+    return false;
+  }
+
+  /// needsStackRealignment - true if storage within the function requires the
+  /// stack pointer to be aligned more than the normal calling convention calls
+  /// for.
+  virtual bool needsStackRealignment(const MachineFunction &MF) const {
+    return false;
+  }
+
+  /// getFrameIndexInstrOffset - Get the offset from the referenced frame
+  /// index in the instruction, if there is one.
+  virtual int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
+                                           int Idx) const {
+    return 0;
+  }
+
+  /// needsFrameBaseReg - Returns true if the instruction's frame index
+  /// reference would be better served by a base register other than FP
+  /// or SP. Used by LocalStackFrameAllocation to determine which frame index
+  /// references it should create new base registers for.
+  virtual bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+    return false;
+  }
+
+  /// materializeFrameBaseRegister - Insert defining instruction(s) for
+  /// BaseReg to be a pointer to FrameIdx before insertion point I.
+  virtual void materializeFrameBaseRegister(MachineBasicBlock *MBB,
+                                            unsigned BaseReg, int FrameIdx,
+                                            int64_t Offset) const {
+    assert(0 && "materializeFrameBaseRegister does not exist on this target");
+  }
+
+  /// resolveFrameIndex - Resolve a frame index operand of an instruction
+  /// to reference the indicated base register plus offset instead.
+  virtual void resolveFrameIndex(MachineBasicBlock::iterator I,
+                                 unsigned BaseReg, int64_t Offset) const {
+    assert(0 && "resolveFrameIndex does not exist on this target");
+  }
+
+  /// isFrameOffsetLegal - Determine whether a given offset immediate is
+  /// encodable to resolve a frame index.
+  virtual bool isFrameOffsetLegal(const MachineInstr *MI,
+                                  int64_t Offset) const {
+    assert(0 && "isFrameOffsetLegal does not exist on this target");
+    return false; // Must return a value in order to compile with VS 2005
+  }
+
+  /// getCallFrameSetup/DestroyOpcode - These methods return the opcode of the
+  /// frame setup/destroy instructions if they exist (-1 otherwise).  Some
+  /// targets use pseudo instructions in order to abstract away the difference
+  /// between operating with a frame pointer and operating without, through the
+  /// use of these two instructions.
+  ///
+  int getCallFrameSetupOpcode() const { return CallFrameSetupOpcode; }
+  int getCallFrameDestroyOpcode() const { return CallFrameDestroyOpcode; }
+
+  /// eliminateCallFramePseudoInstr - This method is called during prolog/epilog
+  /// code insertion to eliminate call frame setup and destroy pseudo
+  /// instructions (but only if the Target is using them).  It is responsible
+  /// for eliminating these instructions, replacing them with concrete
+  /// instructions.  This method need only be implemented if using call frame
+  /// setup/destroy pseudo instructions.
+  ///
+  virtual void
+  eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MI) const {
+    assert(getCallFrameSetupOpcode()== -1 && getCallFrameDestroyOpcode()== -1 &&
+           "eliminateCallFramePseudoInstr must be implemented if using"
+           " call frame setup/destroy pseudo instructions!");
+    assert(0 && "Call Frame Pseudo Instructions do not exist on this target!");
+  }
+
+
+  /// saveScavengerRegister - Spill the register so it can be used by the
+  /// register scavenger. Return true if the register was spilled, false
+  /// otherwise. If this function does not spill the register, the scavenger
+  /// will instead spill it to the emergency spill slot.
+  ///
+  virtual bool saveScavengerRegister(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I,
+                                     MachineBasicBlock::iterator &UseMI,
+                                     const TargetRegisterClass *RC,
+                                     unsigned Reg) const {
+    return false;
+  }
+
+  /// eliminateFrameIndex - This method must be overriden to eliminate abstract
+  /// frame indices from instructions which may use them.  The instruction
+  /// referenced by the iterator contains an MO_FrameIndex operand which must be
+  /// eliminated by this method.  This method may modify or replace the
+  /// specified instruction, as long as it keeps the iterator pointing at the
+  /// finished product. SPAdj is the SP adjustment due to call frame setup
+  /// instruction.
+  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                                   int SPAdj, RegScavenger *RS=NULL) const = 0;
+
+  //===--------------------------------------------------------------------===//
+  /// Debug information queries.
+
+  /// getDwarfRegNum - Map a target register to an equivalent dwarf register
+  /// number.  Returns -1 if there is no equivalent value.  The second
+  /// parameter allows targets to use different numberings for EH info and
+  /// debugging info.
+  virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const = 0;
+
+  /// getFrameRegister - This method should return the register used as a base
+  /// for values allocated in the current stack frame.
+  virtual unsigned getFrameRegister(const MachineFunction &MF) const = 0;
+
+  /// getRARegister - This method should return the register where the return
+  /// address can be found.
+  virtual unsigned getRARegister() const = 0;
+};
+
+
+// This is useful when building IndexedMaps keyed on virtual registers
+struct VirtReg2IndexFunctor : public std::unary_function<unsigned, unsigned> {
+  unsigned operator()(unsigned Reg) const {
+    return TargetRegisterInfo::virtReg2Index(Reg);
+  }
+};
+
+/// getCommonSubClass - find the largest common subclass of A and B. Return NULL
+/// if there is no common subclass.
+const TargetRegisterClass *getCommonSubClass(const TargetRegisterClass *A,
+                                             const TargetRegisterClass *B);
+
+/// PrintReg - Helper class for printing registers on a raw_ostream.
+/// Prints virtual and physical registers with or without a TRI instance.
+///
+/// The format is:
+///   %noreg          - NoRegister
+///   %vreg5          - a virtual register.
+///   %vreg5:sub_8bit - a virtual register with sub-register index (with TRI).
+///   %EAX            - a physical register
+///   %physreg17      - a physical register when no TRI instance given.
+///
+/// Usage: OS << PrintReg(Reg, TRI) << '\n';
+///
+class PrintReg {
+  const TargetRegisterInfo *TRI;
+  unsigned Reg;
+  unsigned SubIdx;
+public:
+  PrintReg(unsigned reg, const TargetRegisterInfo *tri = 0, unsigned subidx = 0)
+    : TRI(tri), Reg(reg), SubIdx(subidx) {}
+  void print(raw_ostream&) const;
+};
+
+static inline raw_ostream &operator<<(raw_ostream &OS, const PrintReg &PR) {
+  PR.print(OS);
+  return OS;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Target/TargetRegistry.h b/final/include/llvm/Target/TargetRegistry.h
new file mode 100644
index 00000000000..f851ad0a9bf
--- /dev/null
+++ b/final/include/llvm/Target/TargetRegistry.h
@@ -0,0 +1,779 @@
+//===-- Target/TargetRegistry.h - Target Registration -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes the TargetRegistry interface, which tools can use to access
+// the appropriate target specific classes (TargetMachine, AsmPrinter, etc.)
+// which have been registered.
+//
+// Target specific class implementations should register themselves using the
+// appropriate TargetRegistry interfaces.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETREGISTRY_H
+#define LLVM_TARGET_TARGETREGISTRY_H
+
+#include "llvm/ADT/Triple.h"
+#include <string>
+#include <cassert>
+
+namespace llvm {
+  class AsmPrinter;
+  class Module;
+  class MCAssembler;
+  class MCAsmInfo;
+  class MCAsmParser;
+  class MCCodeEmitter;
+  class MCContext;
+  class MCDisassembler;
+  class MCInstPrinter;
+  class MCStreamer;
+  class TargetAsmBackend;
+  class TargetAsmLexer;
+  class TargetAsmParser;
+  class TargetMachine;
+  class raw_ostream;
+  class formatted_raw_ostream;
+
+  MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+                                bool isVerboseAsm,
+                                bool useLoc,
+                                MCInstPrinter *InstPrint,
+                                MCCodeEmitter *CE,
+                                TargetAsmBackend *TAB,
+                                bool ShowInst);
+
+  /// Target - Wrapper for Target specific information.
+  ///
+  /// For registration purposes, this is a POD type so that targets can be
+  /// registered without the use of static constructors.
+  ///
+  /// Targets should implement a single global instance of this class (which
+  /// will be zero initialized), and pass that instance to the TargetRegistry as
+  /// part of their initialization.
+  class Target {
+  public:
+    friend struct TargetRegistry;
+
+    typedef unsigned (*TripleMatchQualityFnTy)(const std::string &TT);
+
+    typedef MCAsmInfo *(*AsmInfoCtorFnTy)(const Target &T,
+                                                StringRef TT);
+    typedef TargetMachine *(*TargetMachineCtorTy)(const Target &T,
+                                                  const std::string &TT,
+                                                  const std::string &Features);
+    typedef AsmPrinter *(*AsmPrinterCtorTy)(TargetMachine &TM,
+                                            MCStreamer &Streamer);
+    typedef TargetAsmBackend *(*AsmBackendCtorTy)(const Target &T,
+                                                  const std::string &TT);
+    typedef TargetAsmLexer *(*AsmLexerCtorTy)(const Target &T,
+                                              const MCAsmInfo &MAI);
+    typedef TargetAsmParser *(*AsmParserCtorTy)(const Target &T,MCAsmParser &P,
+                                                TargetMachine &TM);
+    typedef MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T);
+    typedef MCInstPrinter *(*MCInstPrinterCtorTy)(const Target &T,
+                                                  unsigned SyntaxVariant,
+                                                  const MCAsmInfo &MAI);
+    typedef MCCodeEmitter *(*CodeEmitterCtorTy)(const Target &T,
+                                                TargetMachine &TM,
+                                                MCContext &Ctx);
+    typedef MCStreamer *(*ObjectStreamerCtorTy)(const Target &T,
+                                                const std::string &TT,
+                                                MCContext &Ctx,
+                                                TargetAsmBackend &TAB,
+                                                raw_ostream &_OS,
+                                                MCCodeEmitter *_Emitter,
+                                                bool RelaxAll,
+                                                bool NoExecStack);
+    typedef MCStreamer *(*AsmStreamerCtorTy)(MCContext &Ctx,
+                                             formatted_raw_ostream &OS,
+                                             bool isVerboseAsm,
+                                             bool useLoc,
+                                             MCInstPrinter *InstPrint,
+                                             MCCodeEmitter *CE,
+                                             TargetAsmBackend *TAB,
+                                             bool ShowInst);
+
+  private:
+    /// Next - The next registered target in the linked list, maintained by the
+    /// TargetRegistry.
+    Target *Next;
+
+    /// TripleMatchQualityFn - The target function for rating the match quality
+    /// of a triple.
+    TripleMatchQualityFnTy TripleMatchQualityFn;
+
+    /// Name - The target name.
+    const char *Name;
+
+    /// ShortDesc - A short description of the target.
+    const char *ShortDesc;
+
+    /// HasJIT - Whether this target supports the JIT.
+    bool HasJIT;
+
+    AsmInfoCtorFnTy AsmInfoCtorFn;
+
+    /// TargetMachineCtorFn - Construction function for this target's
+    /// TargetMachine, if registered.
+    TargetMachineCtorTy TargetMachineCtorFn;
+
+    /// AsmBackendCtorFn - Construction function for this target's
+    /// TargetAsmBackend, if registered.
+    AsmBackendCtorTy AsmBackendCtorFn;
+
+    /// AsmLexerCtorFn - Construction function for this target's TargetAsmLexer,
+    /// if registered.
+    AsmLexerCtorTy AsmLexerCtorFn;
+
+    /// AsmParserCtorFn - Construction function for this target's
+    /// TargetAsmParser, if registered.
+    AsmParserCtorTy AsmParserCtorFn;
+
+    /// AsmPrinterCtorFn - Construction function for this target's AsmPrinter,
+    /// if registered.
+    AsmPrinterCtorTy AsmPrinterCtorFn;
+
+    /// MCDisassemblerCtorFn - Construction function for this target's
+    /// MCDisassembler, if registered.
+    MCDisassemblerCtorTy MCDisassemblerCtorFn;
+
+    /// MCInstPrinterCtorFn - Construction function for this target's
+    /// MCInstPrinter, if registered.
+    MCInstPrinterCtorTy MCInstPrinterCtorFn;
+
+    /// CodeEmitterCtorFn - Construction function for this target's CodeEmitter,
+    /// if registered.
+    CodeEmitterCtorTy CodeEmitterCtorFn;
+
+    /// ObjectStreamerCtorFn - Construction function for this target's
+    /// ObjectStreamer, if registered.
+    ObjectStreamerCtorTy ObjectStreamerCtorFn;
+
+    /// AsmStreamerCtorFn - Construction function for this target's
+    /// AsmStreamer, if registered (default = llvm::createAsmStreamer).
+    AsmStreamerCtorTy AsmStreamerCtorFn;
+
+  public:
+    Target() : AsmStreamerCtorFn(llvm::createAsmStreamer) {}
+
+    /// @name Target Information
+    /// @{
+
+    // getNext - Return the next registered target.
+    const Target *getNext() const { return Next; }
+
+    /// getName - Get the target name.
+    const char *getName() const { return Name; }
+
+    /// getShortDescription - Get a short description of the target.
+    const char *getShortDescription() const { return ShortDesc; }
+
+    /// @}
+    /// @name Feature Predicates
+    /// @{
+
+    /// hasJIT - Check if this targets supports the just-in-time compilation.
+    bool hasJIT() const { return HasJIT; }
+
+    /// hasTargetMachine - Check if this target supports code generation.
+    bool hasTargetMachine() const { return TargetMachineCtorFn != 0; }
+
+    /// hasAsmBackend - Check if this target supports .o generation.
+    bool hasAsmBackend() const { return AsmBackendCtorFn != 0; }
+
+    /// hasAsmLexer - Check if this target supports .s lexing.
+    bool hasAsmLexer() const { return AsmLexerCtorFn != 0; }
+
+    /// hasAsmParser - Check if this target supports .s parsing.
+    bool hasAsmParser() const { return AsmParserCtorFn != 0; }
+
+    /// hasAsmPrinter - Check if this target supports .s printing.
+    bool hasAsmPrinter() const { return AsmPrinterCtorFn != 0; }
+
+    /// hasMCDisassembler - Check if this target has a disassembler.
+    bool hasMCDisassembler() const { return MCDisassemblerCtorFn != 0; }
+
+    /// hasMCInstPrinter - Check if this target has an instruction printer.
+    bool hasMCInstPrinter() const { return MCInstPrinterCtorFn != 0; }
+
+    /// hasCodeEmitter - Check if this target supports instruction encoding.
+    bool hasCodeEmitter() const { return CodeEmitterCtorFn != 0; }
+
+    /// hasObjectStreamer - Check if this target supports streaming to files.
+    bool hasObjectStreamer() const { return ObjectStreamerCtorFn != 0; }
+
+    /// hasAsmStreamer - Check if this target supports streaming to files.
+    bool hasAsmStreamer() const { return AsmStreamerCtorFn != 0; }
+
+    /// @}
+    /// @name Feature Constructors
+    /// @{
+
+    /// createAsmInfo - Create a MCAsmInfo implementation for the specified
+    /// target triple.
+    ///
+    /// \arg Triple - This argument is used to determine the target machine
+    /// feature set; it should always be provided. Generally this should be
+    /// either the target triple from the module, or the target triple of the
+    /// host if that does not exist.
+    MCAsmInfo *createAsmInfo(StringRef Triple) const {
+      if (!AsmInfoCtorFn)
+        return 0;
+      return AsmInfoCtorFn(*this, Triple);
+    }
+
+    /// createTargetMachine - Create a target specific machine implementation
+    /// for the specified \arg Triple.
+    ///
+    /// \arg Triple - This argument is used to determine the target machine
+    /// feature set; it should always be provided. Generally this should be
+    /// either the target triple from the module, or the target triple of the
+    /// host if that does not exist.
+    TargetMachine *createTargetMachine(const std::string &Triple,
+                                       const std::string &Features) const {
+      if (!TargetMachineCtorFn)
+        return 0;
+      return TargetMachineCtorFn(*this, Triple, Features);
+    }
+
+    /// createAsmBackend - Create a target specific assembly parser.
+    ///
+    /// \arg Triple - The target triple string.
+    /// \arg Backend - The target independent assembler object.
+    TargetAsmBackend *createAsmBackend(const std::string &Triple) const {
+      if (!AsmBackendCtorFn)
+        return 0;
+      return AsmBackendCtorFn(*this, Triple);
+    }
+
+    /// createAsmLexer - Create a target specific assembly lexer.
+    ///
+    TargetAsmLexer *createAsmLexer(const MCAsmInfo &MAI) const {
+      if (!AsmLexerCtorFn)
+        return 0;
+      return AsmLexerCtorFn(*this, MAI);
+    }
+
+    /// createAsmParser - Create a target specific assembly parser.
+    ///
+    /// \arg Parser - The target independent parser implementation to use for
+    /// parsing and lexing.
+    TargetAsmParser *createAsmParser(MCAsmParser &Parser,
+                                     TargetMachine &TM) const {
+      if (!AsmParserCtorFn)
+        return 0;
+      return AsmParserCtorFn(*this, Parser, TM);
+    }
+
+    /// createAsmPrinter - Create a target specific assembly printer pass.  This
+    /// takes ownership of the MCStreamer object.
+    AsmPrinter *createAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) const{
+      if (!AsmPrinterCtorFn)
+        return 0;
+      return AsmPrinterCtorFn(TM, Streamer);
+    }
+
+    MCDisassembler *createMCDisassembler() const {
+      if (!MCDisassemblerCtorFn)
+        return 0;
+      return MCDisassemblerCtorFn(*this);
+    }
+
+    MCInstPrinter *createMCInstPrinter(unsigned SyntaxVariant,
+                                       const MCAsmInfo &MAI) const {
+      if (!MCInstPrinterCtorFn)
+        return 0;
+      return MCInstPrinterCtorFn(*this, SyntaxVariant, MAI);
+    }
+
+
+    /// createCodeEmitter - Create a target specific code emitter.
+    MCCodeEmitter *createCodeEmitter(TargetMachine &TM, MCContext &Ctx) const {
+      if (!CodeEmitterCtorFn)
+        return 0;
+      return CodeEmitterCtorFn(*this, TM, Ctx);
+    }
+
+    /// createObjectStreamer - Create a target specific MCStreamer.
+    ///
+    /// \arg TT - The target triple.
+    /// \arg Ctx - The target context.
+    /// \arg TAB - The target assembler backend object. Takes ownership.
+    /// \arg _OS - The stream object.
+    /// \arg _Emitter - The target independent assembler object.Takes ownership.
+    /// \arg RelaxAll - Relax all fixups?
+    /// \arg NoExecStack - Mark file as not needing a executable stack.
+    MCStreamer *createObjectStreamer(const std::string &TT, MCContext &Ctx,
+                                     TargetAsmBackend &TAB,
+                                     raw_ostream &_OS,
+                                     MCCodeEmitter *_Emitter,
+                                     bool RelaxAll,
+                                     bool NoExecStack) const {
+      if (!ObjectStreamerCtorFn)
+        return 0;
+      return ObjectStreamerCtorFn(*this, TT, Ctx, TAB, _OS, _Emitter, RelaxAll,
+                                  NoExecStack);
+    }
+
+    /// createAsmStreamer - Create a target specific MCStreamer.
+    MCStreamer *createAsmStreamer(MCContext &Ctx,
+                                  formatted_raw_ostream &OS,
+                                  bool isVerboseAsm,
+                                  bool useLoc,
+                                  MCInstPrinter *InstPrint,
+                                  MCCodeEmitter *CE,
+                                  TargetAsmBackend *TAB,
+                                  bool ShowInst) const {
+      // AsmStreamerCtorFn is default to llvm::createAsmStreamer
+      return AsmStreamerCtorFn(Ctx, OS, isVerboseAsm, useLoc,
+                               InstPrint, CE, TAB, ShowInst);
+    }
+
+    /// @}
+  };
+
+  /// TargetRegistry - Generic interface to target specific features.
+  struct TargetRegistry {
+    class iterator {
+      const Target *Current;
+      explicit iterator(Target *T) : Current(T) {}
+      friend struct TargetRegistry;
+    public:
+      iterator(const iterator &I) : Current(I.Current) {}
+      iterator() : Current(0) {}
+
+      bool operator==(const iterator &x) const {
+        return Current == x.Current;
+      }
+      bool operator!=(const iterator &x) const {
+        return !operator==(x);
+      }
+
+      // Iterator traversal: forward iteration only
+      iterator &operator++() {          // Preincrement
+        assert(Current && "Cannot increment end iterator!");
+        Current = Current->getNext();
+        return *this;
+      }
+      iterator operator++(int) {        // Postincrement
+        iterator tmp = *this;
+        ++*this;
+        return tmp;
+      }
+
+      const Target &operator*() const {
+        assert(Current && "Cannot dereference end iterator!");
+        return *Current;
+      }
+
+      const Target *operator->() const {
+        return &operator*();
+      }
+    };
+
+    /// @name Registry Access
+    /// @{
+
+    static iterator begin();
+
+    static iterator end() { return iterator(); }
+
+    /// lookupTarget - Lookup a target based on a target triple.
+    ///
+    /// \param Triple - The triple to use for finding a target.
+    /// \param Error - On failure, an error string describing why no target was
+    /// found.
+    static const Target *lookupTarget(const std::string &Triple,
+                                      std::string &Error);
+
+    /// getClosestTargetForJIT - Pick the best target that is compatible with
+    /// the current host.  If no close target can be found, this returns null
+    /// and sets the Error string to a reason.
+    ///
+    /// Maintained for compatibility through 2.6.
+    static const Target *getClosestTargetForJIT(std::string &Error);
+
+    /// @}
+    /// @name Target Registration
+    /// @{
+
+    /// RegisterTarget - Register the given target. Attempts to register a
+    /// target which has already been registered will be ignored.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Name - The target name. This should be a static string.
+    /// @param ShortDesc - A short target description. This should be a static
+    /// string.
+    /// @param TQualityFn - The triple match quality computation function for
+    /// this target.
+    /// @param HasJIT - Whether the target supports JIT code
+    /// generation.
+    static void RegisterTarget(Target &T,
+                               const char *Name,
+                               const char *ShortDesc,
+                               Target::TripleMatchQualityFnTy TQualityFn,
+                               bool HasJIT = false);
+
+    /// RegisterAsmInfo - Register a MCAsmInfo implementation for the
+    /// given target.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct a MCAsmInfo for the target.
+    static void RegisterAsmInfo(Target &T, Target::AsmInfoCtorFnTy Fn) {
+      // Ignore duplicate registration.
+      if (!T.AsmInfoCtorFn)
+        T.AsmInfoCtorFn = Fn;
+    }
+
+    /// RegisterTargetMachine - Register a TargetMachine implementation for the
+    /// given target.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct a TargetMachine for the target.
+    static void RegisterTargetMachine(Target &T,
+                                      Target::TargetMachineCtorTy Fn) {
+      // Ignore duplicate registration.
+      if (!T.TargetMachineCtorFn)
+        T.TargetMachineCtorFn = Fn;
+    }
+
+    /// RegisterAsmBackend - Register a TargetAsmBackend implementation for the
+    /// given target.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct an AsmBackend for the target.
+    static void RegisterAsmBackend(Target &T, Target::AsmBackendCtorTy Fn) {
+      if (!T.AsmBackendCtorFn)
+        T.AsmBackendCtorFn = Fn;
+    }
+
+    /// RegisterAsmLexer - Register a TargetAsmLexer implementation for the
+    /// given target.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct an AsmLexer for the target.
+    static void RegisterAsmLexer(Target &T, Target::AsmLexerCtorTy Fn) {
+      if (!T.AsmLexerCtorFn)
+        T.AsmLexerCtorFn = Fn;
+    }
+
+    /// RegisterAsmParser - Register a TargetAsmParser implementation for the
+    /// given target.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct an AsmParser for the target.
+    static void RegisterAsmParser(Target &T, Target::AsmParserCtorTy Fn) {
+      if (!T.AsmParserCtorFn)
+        T.AsmParserCtorFn = Fn;
+    }
+
+    /// RegisterAsmPrinter - Register an AsmPrinter implementation for the given
+    /// target.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct an AsmPrinter for the target.
+    static void RegisterAsmPrinter(Target &T, Target::AsmPrinterCtorTy Fn) {
+      // Ignore duplicate registration.
+      if (!T.AsmPrinterCtorFn)
+        T.AsmPrinterCtorFn = Fn;
+    }
+
+    /// RegisterMCDisassembler - Register a MCDisassembler implementation for
+    /// the given target.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct an MCDisassembler for the target.
+    static void RegisterMCDisassembler(Target &T,
+                                       Target::MCDisassemblerCtorTy Fn) {
+      if (!T.MCDisassemblerCtorFn)
+        T.MCDisassemblerCtorFn = Fn;
+    }
+
+    /// RegisterMCInstPrinter - Register a MCInstPrinter implementation for the
+    /// given target.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct an MCInstPrinter for the target.
+    static void RegisterMCInstPrinter(Target &T,
+                                      Target::MCInstPrinterCtorTy Fn) {
+      if (!T.MCInstPrinterCtorFn)
+        T.MCInstPrinterCtorFn = Fn;
+    }
+
+    /// RegisterCodeEmitter - Register a MCCodeEmitter implementation for the
+    /// given target.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct an MCCodeEmitter for the target.
+    static void RegisterCodeEmitter(Target &T, Target::CodeEmitterCtorTy Fn) {
+      if (!T.CodeEmitterCtorFn)
+        T.CodeEmitterCtorFn = Fn;
+    }
+
+    /// RegisterObjectStreamer - Register a object code MCStreamer implementation
+    /// for the given target.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct an MCStreamer for the target.
+    static void RegisterObjectStreamer(Target &T, Target::ObjectStreamerCtorTy Fn) {
+      if (!T.ObjectStreamerCtorFn)
+        T.ObjectStreamerCtorFn = Fn;
+    }
+
+    /// RegisterAsmStreamer - Register an assembly MCStreamer implementation
+    /// for the given target.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct an MCStreamer for the target.
+    static void RegisterAsmStreamer(Target &T, Target::AsmStreamerCtorTy Fn) {
+      if (T.AsmStreamerCtorFn == createAsmStreamer)
+        T.AsmStreamerCtorFn = Fn;
+    }
+
+    /// @}
+  };
+
+
+  //===--------------------------------------------------------------------===//
+
+  /// RegisterTarget - Helper template for registering a target, for use in the
+  /// target's initialization function. Usage:
+  ///
+  ///
+  /// Target TheFooTarget; // The global target instance.
+  ///
+  /// extern "C" void LLVMInitializeFooTargetInfo() {
+  ///   RegisterTarget<Triple::foo> X(TheFooTarget, "foo", "Foo description");
+  /// }
+  template<Triple::ArchType TargetArchType = Triple::InvalidArch,
+           bool HasJIT = false>
+  struct RegisterTarget {
+    RegisterTarget(Target &T, const char *Name, const char *Desc) {
+      TargetRegistry::RegisterTarget(T, Name, Desc,
+                                     &getTripleMatchQuality,
+                                     HasJIT);
+    }
+
+    static unsigned getTripleMatchQuality(const std::string &TT) {
+      if (Triple(TT).getArch() == TargetArchType)
+        return 20;
+      return 0;
+    }
+  };
+
+  /// RegisterAsmInfo - Helper template for registering a target assembly info
+  /// implementation.  This invokes the static "Create" method on the class to
+  /// actually do the construction.  Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooTarget() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterAsmInfo<FooMCAsmInfo> X(TheFooTarget);
+  /// }
+  template<class MCAsmInfoImpl>
+  struct RegisterAsmInfo {
+    RegisterAsmInfo(Target &T) {
+      TargetRegistry::RegisterAsmInfo(T, &Allocator);
+    }
+  private:
+    static MCAsmInfo *Allocator(const Target &T, StringRef TT) {
+      return new MCAsmInfoImpl(T, TT);
+    }
+
+  };
+
+  /// RegisterAsmInfoFn - Helper template for registering a target assembly info
+  /// implementation.  This invokes the specified function to do the
+  /// construction.  Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooTarget() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterAsmInfoFn X(TheFooTarget, TheFunction);
+  /// }
+  struct RegisterAsmInfoFn {
+    RegisterAsmInfoFn(Target &T, Target::AsmInfoCtorFnTy Fn) {
+      TargetRegistry::RegisterAsmInfo(T, Fn);
+    }
+  };
+
+
+  /// RegisterTargetMachine - Helper template for registering a target machine
+  /// implementation, for use in the target machine initialization
+  /// function. Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooTarget() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterTargetMachine<FooTargetMachine> X(TheFooTarget);
+  /// }
+  template<class TargetMachineImpl>
+  struct RegisterTargetMachine {
+    RegisterTargetMachine(Target &T) {
+      TargetRegistry::RegisterTargetMachine(T, &Allocator);
+    }
+
+  private:
+    static TargetMachine *Allocator(const Target &T, const std::string &TT,
+                                    const std::string &FS) {
+      return new TargetMachineImpl(T, TT, FS);
+    }
+  };
+
+  /// RegisterAsmBackend - Helper template for registering a target specific
+  /// assembler backend. Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooAsmBackend() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterAsmBackend<FooAsmLexer> X(TheFooTarget);
+  /// }
+  template<class AsmBackendImpl>
+  struct RegisterAsmBackend {
+    RegisterAsmBackend(Target &T) {
+      TargetRegistry::RegisterAsmBackend(T, &Allocator);
+    }
+
+  private:
+    static TargetAsmBackend *Allocator(const Target &T,
+                                       const std::string &Triple) {
+      return new AsmBackendImpl(T, Triple);
+    }
+  };
+
+  /// RegisterAsmLexer - Helper template for registering a target specific
+  /// assembly lexer, for use in the target machine initialization
+  /// function. Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooAsmLexer() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterAsmLexer<FooAsmLexer> X(TheFooTarget);
+  /// }
+  template<class AsmLexerImpl>
+  struct RegisterAsmLexer {
+    RegisterAsmLexer(Target &T) {
+      TargetRegistry::RegisterAsmLexer(T, &Allocator);
+    }
+
+  private:
+    static TargetAsmLexer *Allocator(const Target &T, const MCAsmInfo &MAI) {
+      return new AsmLexerImpl(T, MAI);
+    }
+  };
+
+  /// RegisterAsmParser - Helper template for registering a target specific
+  /// assembly parser, for use in the target machine initialization
+  /// function. Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooAsmParser() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterAsmParser<FooAsmParser> X(TheFooTarget);
+  /// }
+  template<class AsmParserImpl>
+  struct RegisterAsmParser {
+    RegisterAsmParser(Target &T) {
+      TargetRegistry::RegisterAsmParser(T, &Allocator);
+    }
+
+  private:
+    static TargetAsmParser *Allocator(const Target &T, MCAsmParser &P,
+                                      TargetMachine &TM) {
+      return new AsmParserImpl(T, P, TM);
+    }
+  };
+
+  /// RegisterAsmPrinter - Helper template for registering a target specific
+  /// assembly printer, for use in the target machine initialization
+  /// function. Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooAsmPrinter() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterAsmPrinter<FooAsmPrinter> X(TheFooTarget);
+  /// }
+  template<class AsmPrinterImpl>
+  struct RegisterAsmPrinter {
+    RegisterAsmPrinter(Target &T) {
+      TargetRegistry::RegisterAsmPrinter(T, &Allocator);
+    }
+
+  private:
+    static AsmPrinter *Allocator(TargetMachine &TM, MCStreamer &Streamer) {
+      return new AsmPrinterImpl(TM, Streamer);
+    }
+  };
+
+  /// RegisterCodeEmitter - Helper template for registering a target specific
+  /// machine code emitter, for use in the target initialization
+  /// function. Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooCodeEmitter() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterCodeEmitter<FooCodeEmitter> X(TheFooTarget);
+  /// }
+  template<class CodeEmitterImpl>
+  struct RegisterCodeEmitter {
+    RegisterCodeEmitter(Target &T) {
+      TargetRegistry::RegisterCodeEmitter(T, &Allocator);
+    }
+
+  private:
+    static MCCodeEmitter *Allocator(const Target &T, TargetMachine &TM,
+                                    MCContext &Ctx) {
+      return new CodeEmitterImpl(T, TM, Ctx);
+    }
+  };
+
+}
+
+#endif
diff --git a/final/include/llvm/Target/TargetSchedule.td b/final/include/llvm/Target/TargetSchedule.td
new file mode 100644
index 00000000000..97ea82ab9e3
--- /dev/null
+++ b/final/include/llvm/Target/TargetSchedule.td
@@ -0,0 +1,130 @@
+//===- TargetSchedule.td - Target Independent Scheduling ---*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the target-independent scheduling interfaces which should
+// be implemented by each target which is using TableGen based scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Processor functional unit - These values represent the function units
+// available across all chip sets for the target.  Eg., IntUnit, FPUnit, ...
+// These may be independent values for each chip set or may be shared across
+// all chip sets of the target.  Each functional unit is treated as a resource
+// during scheduling and has an affect instruction order based on availability
+// during a time interval.
+//  
+class FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Pipeline bypass / forwarding - These values specifies the symbolic names of
+// pipeline bypasses which can be used to forward results of instructions
+// that are forwarded to uses.
+class Bypass;
+def NoBypass : Bypass;
+
+class ReservationKind<bits<1> val> {
+  int Value = val;
+}
+
+def Required : ReservationKind<0>;
+def Reserved : ReservationKind<1>;
+
+//===----------------------------------------------------------------------===//
+// Instruction stage - These values represent a non-pipelined step in
+// the execution of an instruction.  Cycles represents the number of
+// discrete time slots needed to complete the stage.  Units represent
+// the choice of functional units that can be used to complete the
+// stage.  Eg. IntUnit1, IntUnit2. NextCycles indicates how many
+// cycles should elapse from the start of this stage to the start of
+// the next stage in the itinerary.  For example:
+//
+// A stage is specified in one of two ways:
+//
+//   InstrStage<1, [FU_x, FU_y]>     - TimeInc defaults to Cycles
+//   InstrStage<1, [FU_x, FU_y], 0>  - TimeInc explicit
+//
+
+class InstrStage<int cycles, list<FuncUnit> units,
+                 int timeinc = -1,
+                 ReservationKind kind = Required> {
+  int Cycles          = cycles;       // length of stage in machine cycles
+  list<FuncUnit> Units = units;       // choice of functional units
+  int TimeInc         = timeinc;      // cycles till start of next stage
+  int Kind            = kind.Value;   // kind of FU reservation
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction itinerary - An itinerary represents a sequential series of steps
+// required to complete an instruction.  Itineraries are represented as lists of
+// instruction stages.
+//
+
+//===----------------------------------------------------------------------===//
+// Instruction itinerary classes - These values represent 'named' instruction
+// itinerary.  Using named itineraries simplifies managing groups of
+// instructions across chip sets.  An instruction uses the same itinerary class
+// across all chip sets.  Thus a new chip set can be added without modifying
+// instruction information.
+//
+// NumMicroOps represents the number of micro-operations that each instruction
+// in the class are decoded to. If the number is zero, then it means the
+// instruction can decode into variable number of micro-ops and it must be
+// determined dynamically.
+//
+class InstrItinClass<int ops = 1> {
+  int NumMicroOps = ops;
+}
+def NoItinerary : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Instruction itinerary data - These values provide a runtime map of an 
+// instruction itinerary class (name) to its itinerary data.
+//
+// OperandCycles are optional "cycle counts". They specify the cycle after
+// instruction issue the values which correspond to specific operand indices
+// are defined or read. Bypasses are optional "pipeline forwarding pathes", if
+// a def by an instruction is available on a specific bypass and the use can
+// read from the same bypass, then the operand use latency is reduced by one.
+//
+//  InstrItinData<IIC_iLoad_i , [InstrStage<1, [A9_Pipe1]>,
+//                               InstrStage<1, [A9_AGU]>],
+//                              [3, 1], [A9_LdBypass]>,
+//  InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>],
+//                              [1, 1], [NoBypass, A9_LdBypass]>,
+//
+// In this example, the instruction of IIC_iLoadi reads its input on cycle 1
+// (after issue) and the result of the load is available on cycle 3. The result
+// is available via forwarding path A9_LdBypass. If it's used by the first
+// source operand of instructions of IIC_iMVNr class, then the operand latency
+// is reduced by 1.
+class InstrItinData<InstrItinClass Class, list<InstrStage> stages,
+                    list<int> operandcycles = [],
+                    list<Bypass> bypasses = []> {
+  InstrItinClass TheClass = Class;
+  list<InstrStage> Stages = stages;
+  list<int> OperandCycles = operandcycles;
+  list<Bypass> Bypasses = bypasses;
+}
+
+//===----------------------------------------------------------------------===//
+// Processor itineraries - These values represent the set of all itinerary
+// classes for a given chip set.
+//
+class ProcessorItineraries<list<FuncUnit> fu, list<Bypass> bp,
+                           list<InstrItinData> iid> {
+  list<FuncUnit> FU = fu;
+  list<Bypass> BP = bp;
+  list<InstrItinData> IID = iid;
+}
+
+// NoItineraries - A marker that can be used by processors without schedule
+// info.
+def NoItineraries : ProcessorItineraries<[], [], []>;
+
diff --git a/final/include/llvm/Target/TargetSelect.h b/final/include/llvm/Target/TargetSelect.h
new file mode 100644
index 00000000000..1891f879741
--- /dev/null
+++ b/final/include/llvm/Target/TargetSelect.h
@@ -0,0 +1,125 @@
+//===- TargetSelect.h - Target Selection & Registration ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides utilities to make sure that certain classes of targets are
+// linked into the main application executable, and initialize them as
+// appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETSELECT_H
+#define LLVM_TARGET_TARGETSELECT_H
+
+#include "llvm/Config/llvm-config.h"
+
+extern "C" {
+  // Declare all of the target-initialization functions that are available.
+#define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##TargetInfo();
+#include "llvm/Config/Targets.def"
+
+#define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##Target();
+#include "llvm/Config/Targets.def"
+  
+  // Declare all of the available assembly printer initialization functions.
+#define LLVM_ASM_PRINTER(TargetName) void LLVMInitialize##TargetName##AsmPrinter();
+#include "llvm/Config/AsmPrinters.def"
+
+  // Declare all of the available assembly parser initialization functions.
+#define LLVM_ASM_PARSER(TargetName) void LLVMInitialize##TargetName##AsmParser();
+#include "llvm/Config/AsmParsers.def"
+
+  // Declare all of the available disassembler initialization functions.
+#define LLVM_DISASSEMBLER(TargetName) void LLVMInitialize##TargetName##Disassembler();
+#include "llvm/Config/Disassemblers.def"
+}
+
+namespace llvm {
+  /// InitializeAllTargetInfos - The main program should call this function if
+  /// it wants access to all available targets that LLVM is configured to
+  /// support, to make them available via the TargetRegistry.
+  ///
+  /// It is legal for a client to make multiple calls to this function.
+  inline void InitializeAllTargetInfos() {
+#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##TargetInfo();
+#include "llvm/Config/Targets.def"
+  }
+  
+  /// InitializeAllTargets - The main program should call this function if it
+  /// wants access to all available target machines that LLVM is configured to
+  /// support, to make them available via the TargetRegistry.
+  ///
+  /// It is legal for a client to make multiple calls to this function.
+  inline void InitializeAllTargets() {
+    // FIXME: Remove this, clients should do it.
+    InitializeAllTargetInfos();
+
+#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##Target();
+#include "llvm/Config/Targets.def"
+  }
+  
+  /// InitializeAllAsmPrinters - The main program should call this function if
+  /// it wants all asm printers that LLVM is configured to support, to make them
+  /// available via the TargetRegistry.
+  ///
+  /// It is legal for a client to make multiple calls to this function.
+  inline void InitializeAllAsmPrinters() {
+#define LLVM_ASM_PRINTER(TargetName) LLVMInitialize##TargetName##AsmPrinter();
+#include "llvm/Config/AsmPrinters.def"
+  }
+  
+  /// InitializeAllAsmParsers - The main program should call this function if it
+  /// wants all asm parsers that LLVM is configured to support, to make them
+  /// available via the TargetRegistry.
+  ///
+  /// It is legal for a client to make multiple calls to this function.
+  inline void InitializeAllAsmParsers() {
+#define LLVM_ASM_PARSER(TargetName) LLVMInitialize##TargetName##AsmParser();
+#include "llvm/Config/AsmParsers.def"
+  }
+  
+  /// InitializeAllDisassemblers - The main program should call this function if
+  /// it wants all disassemblers that LLVM is configured to support, to make
+  /// them available via the TargetRegistry.
+  ///
+  /// It is legal for a client to make multiple calls to this function.
+  inline void InitializeAllDisassemblers() {
+#define LLVM_DISASSEMBLER(TargetName) LLVMInitialize##TargetName##Disassembler();
+#include "llvm/Config/Disassemblers.def"
+  }
+  
+  /// InitializeNativeTarget - The main program should call this function to
+  /// initialize the native target corresponding to the host.  This is useful 
+  /// for JIT applications to ensure that the target gets linked in correctly.
+  ///
+  /// It is legal for a client to make multiple calls to this function.
+  inline bool InitializeNativeTarget() {
+  // If we have a native target, initialize it to ensure it is linked in.
+#ifdef LLVM_NATIVE_TARGET
+    LLVM_NATIVE_TARGETINFO();
+    LLVM_NATIVE_TARGET();
+    return false;
+#else
+    return true;
+#endif
+  }  
+
+  /// InitializeNativeTargetAsmPrinter - The main program should call
+  /// this function to initialize the native target asm printer.
+  inline bool InitializeNativeTargetAsmPrinter() {
+  // If we have a native target, initialize the corresponding asm printer.
+#ifdef LLVM_NATIVE_ASMPRINTER
+    LLVM_NATIVE_ASMPRINTER();
+    return false;
+#else
+    return true;
+#endif
+  }  
+}
+
+#endif
diff --git a/final/include/llvm/Target/TargetSelectionDAG.td b/final/include/llvm/Target/TargetSelectionDAG.td
new file mode 100644
index 00000000000..c9be40d23f0
--- /dev/null
+++ b/final/include/llvm/Target/TargetSelectionDAG.td
@@ -0,0 +1,893 @@
+//===- TargetSelectionDAG.td - Common code for DAG isels ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the target-independent interfaces used by SelectionDAG
+// instruction selection generators.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Type Constraint definitions.
+//
+// Note that the semantics of these constraints are hard coded into tblgen.  To
+// modify or add constraints, you have to hack tblgen.
+//
+
+class SDTypeConstraint<int opnum> {
+  int OperandNum = opnum;
+}
+
+// SDTCisVT - The specified operand has exactly this VT.
+class SDTCisVT<int OpNum, ValueType vt> : SDTypeConstraint<OpNum> {
+  ValueType VT = vt;
+}
+
+class SDTCisPtrTy<int OpNum> : SDTypeConstraint<OpNum>;
+
+// SDTCisInt - The specified operand has integer type.
+class SDTCisInt<int OpNum> : SDTypeConstraint<OpNum>;
+
+// SDTCisFP - The specified operand has floating-point type.
+class SDTCisFP<int OpNum> : SDTypeConstraint<OpNum>;
+
+// SDTCisVec - The specified operand has a vector type.
+class SDTCisVec<int OpNum> : SDTypeConstraint<OpNum>;
+
+// SDTCisSameAs - The two specified operands have identical types.
+class SDTCisSameAs<int OpNum, int OtherOp> : SDTypeConstraint<OpNum> {
+  int OtherOperandNum = OtherOp;
+}
+
+// SDTCisVTSmallerThanOp - The specified operand is a VT SDNode, and its type is
+// smaller than the 'Other' operand.
+class SDTCisVTSmallerThanOp<int OpNum, int OtherOp> : SDTypeConstraint<OpNum> {
+  int OtherOperandNum = OtherOp;
+}
+
+class SDTCisOpSmallerThanOp<int SmallOp, int BigOp> : SDTypeConstraint<SmallOp>{
+  int BigOperandNum = BigOp;
+}
+
+/// SDTCisEltOfVec - This indicates that ThisOp is a scalar type of the same
+/// type as the element type of OtherOp, which is a vector type.
+class SDTCisEltOfVec<int ThisOp, int OtherOp>
+  : SDTypeConstraint<ThisOp> {
+  int OtherOpNum = OtherOp;
+}
+
+/// SDTCisSubVecOfVec - This indicates that ThisOp is a vector type
+/// with length less that of OtherOp, which is a vector type.
+class SDTCisSubVecOfVec<int ThisOp, int OtherOp>
+  : SDTypeConstraint<ThisOp> {
+  int OtherOpNum = OtherOp;
+}
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Type Profile definitions.
+//
+// These use the constraints defined above to describe the type requirements of
+// the various nodes.  These are not hard coded into tblgen, allowing targets to
+// add their own if needed.
+//
+
+// SDTypeProfile - This profile describes the type requirements of a Selection
+// DAG node.
+class SDTypeProfile<int numresults, int numoperands,
+                    list<SDTypeConstraint> constraints> {
+  int NumResults = numresults;
+  int NumOperands = numoperands;
+  list<SDTypeConstraint> Constraints = constraints;
+}
+
+// Builtin profiles.
+def SDTIntLeaf: SDTypeProfile<1, 0, [SDTCisInt<0>]>;         // for 'imm'.
+def SDTFPLeaf : SDTypeProfile<1, 0, [SDTCisFP<0>]>;          // for 'fpimm'.
+def SDTPtrLeaf: SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;       // for '&g'.
+def SDTOther  : SDTypeProfile<1, 0, [SDTCisVT<0, OtherVT>]>; // for 'vt'.
+def SDTUNDEF  : SDTypeProfile<1, 0, []>;                     // for 'undef'.
+def SDTUnaryOp  : SDTypeProfile<1, 1, []>;                   // for bitconvert.
+
+def SDTIntBinOp : SDTypeProfile<1, 2, [     // add, and, or, xor, udiv, etc.
+  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>
+]>;
+def SDTIntShiftOp : SDTypeProfile<1, 2, [   // shl, sra, srl
+  SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>
+]>;
+def SDTIntBinHiLoOp : SDTypeProfile<2, 2, [ // mulhi, mullo, sdivrem, udivrem
+  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>,SDTCisInt<0>
+]>;
+
+def SDTFPBinOp : SDTypeProfile<1, 2, [      // fadd, fmul, etc.
+  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>
+]>;
+def SDTFPSignOp : SDTypeProfile<1, 2, [     // fcopysign.
+  SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisFP<2>
+]>;
+def SDTFPTernaryOp : SDTypeProfile<1, 3, [  // fmadd, fnmsub, etc.
+  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisFP<0>
+]>;
+def SDTIntUnaryOp : SDTypeProfile<1, 1, [   // ctlz
+  SDTCisSameAs<0, 1>, SDTCisInt<0>
+]>;
+def SDTIntExtendOp : SDTypeProfile<1, 1, [  // sext, zext, anyext
+  SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>
+]>;
+def SDTIntTruncOp  : SDTypeProfile<1, 1, [  // trunc
+  SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<0, 1>
+]>;
+def SDTFPUnaryOp  : SDTypeProfile<1, 1, [   // fneg, fsqrt, etc
+  SDTCisSameAs<0, 1>, SDTCisFP<0>
+]>;
+def SDTFPRoundOp  : SDTypeProfile<1, 1, [   // fround
+  SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>
+]>;
+def SDTFPExtendOp  : SDTypeProfile<1, 1, [  // fextend
+  SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<1, 0>
+]>;
+def SDTIntToFPOp : SDTypeProfile<1, 1, [    // [su]int_to_fp
+  SDTCisFP<0>, SDTCisInt<1>
+]>;
+def SDTFPToIntOp : SDTypeProfile<1, 1, [    // fp_to_[su]int
+  SDTCisInt<0>, SDTCisFP<1>
+]>;
+def SDTExtInreg : SDTypeProfile<1, 2, [     // sext_inreg
+  SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisVT<2, OtherVT>,
+  SDTCisVTSmallerThanOp<2, 1>
+]>;
+
+def SDTSetCC : SDTypeProfile<1, 3, [        // setcc
+  SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
+]>;
+
+def SDTSelect : SDTypeProfile<1, 3, [       // select
+  SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>
+]>;
+
+def SDTSelectCC : SDTypeProfile<1, 5, [     // select_cc
+  SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>, SDTCisSameAs<0, 3>,
+  SDTCisVT<5, OtherVT>
+]>;
+
+def SDTBr : SDTypeProfile<0, 1, [           // br
+  SDTCisVT<0, OtherVT>
+]>;
+
+def SDTBrcond : SDTypeProfile<0, 2, [       // brcond
+  SDTCisInt<0>, SDTCisVT<1, OtherVT>
+]>;
+
+def SDTBrind : SDTypeProfile<0, 1, [        // brind
+  SDTCisPtrTy<0>
+]>;
+
+def SDTNone : SDTypeProfile<0, 0, []>;      // ret, trap
+
+def SDTLoad : SDTypeProfile<1, 1, [         // load
+  SDTCisPtrTy<1>
+]>;
+
+def SDTStore : SDTypeProfile<0, 2, [        // store
+  SDTCisPtrTy<1>
+]>;
+
+def SDTIStore : SDTypeProfile<1, 3, [       // indexed store
+  SDTCisSameAs<0, 2>, SDTCisPtrTy<0>, SDTCisPtrTy<3>
+]>;
+
+def SDTVecShuffle : SDTypeProfile<1, 2, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+def SDTVecExtract : SDTypeProfile<1, 2, [   // vector extract
+  SDTCisEltOfVec<0, 1>, SDTCisPtrTy<2>
+]>;
+def SDTVecInsert : SDTypeProfile<1, 3, [    // vector insert
+  SDTCisEltOfVec<2, 1>, SDTCisSameAs<0, 1>, SDTCisPtrTy<3>
+]>;
+
+def SDTSubVecExtract : SDTypeProfile<1, 2, [// subvector extract
+  SDTCisSubVecOfVec<0,1>, SDTCisInt<2>
+]>;
+def SDTSubVecInsert : SDTypeProfile<1, 3, [ // subvector insert
+  SDTCisSubVecOfVec<2, 1>, SDTCisSameAs<0,1>, SDTCisInt<3>
+]>;
+
+def SDTPrefetch : SDTypeProfile<0, 3, [     // prefetch
+  SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, SDTCisInt<1>
+]>;
+
+def SDTMemBarrier : SDTypeProfile<0, 5, [   // memory barier
+  SDTCisSameAs<0,1>,  SDTCisSameAs<0,2>,  SDTCisSameAs<0,3>, SDTCisSameAs<0,4>,
+  SDTCisInt<0>
+]>;
+def SDTAtomic3 : SDTypeProfile<1, 3, [
+  SDTCisSameAs<0,2>,  SDTCisSameAs<0,3>, SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+def SDTAtomic2 : SDTypeProfile<1, 2, [
+  SDTCisSameAs<0,2>, SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+
+def SDTConvertOp : SDTypeProfile<1, 5, [ //cvtss, su, us, uu, ff, fs, fu, sf, su
+  SDTCisVT<2, OtherVT>, SDTCisVT<3, OtherVT>, SDTCisPtrTy<4>, SDTCisPtrTy<5>
+]>;
+
+class SDCallSeqStart<list<SDTypeConstraint> constraints> :
+        SDTypeProfile<0, 1, constraints>;
+class SDCallSeqEnd<list<SDTypeConstraint> constraints> :
+        SDTypeProfile<0, 2, constraints>;
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Node Properties.
+//
+// Note: These are hard coded into tblgen.
+//
+class SDNodeProperty;
+def SDNPCommutative : SDNodeProperty;   // X op Y == Y op X
+def SDNPAssociative : SDNodeProperty;   // (X op Y) op Z == X op (Y op Z)
+def SDNPHasChain    : SDNodeProperty;   // R/W chain operand and result
+def SDNPOutGlue     : SDNodeProperty;   // Write a flag result
+def SDNPInGlue      : SDNodeProperty;   // Read a flag operand
+def SDNPOptInGlue   : SDNodeProperty;   // Optionally read a flag operand
+def SDNPMayStore    : SDNodeProperty;   // May write to memory, sets 'mayStore'.
+def SDNPMayLoad     : SDNodeProperty;   // May read memory, sets 'mayLoad'.
+def SDNPSideEffect  : SDNodeProperty;   // Sets 'HasUnmodelledSideEffects'.
+def SDNPMemOperand  : SDNodeProperty;   // Touches memory, has assoc MemOperand
+def SDNPVariadic    : SDNodeProperty;   // Node has variable arguments.
+def SDNPWantRoot    : SDNodeProperty;   // ComplexPattern gets the root of match
+def SDNPWantParent  : SDNodeProperty;   // ComplexPattern gets the parent
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Pattern Operations
+class SDPatternOperator;
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Node definitions.
+//
+class SDNode<string opcode, SDTypeProfile typeprof,
+             list<SDNodeProperty> props = [], string sdclass = "SDNode">
+             : SDPatternOperator {
+  string Opcode  = opcode;
+  string SDClass = sdclass;
+  list<SDNodeProperty> Properties = props;
+  SDTypeProfile TypeProfile = typeprof;
+}
+
+// Special TableGen-recognized dag nodes
+def set;
+def implicit;
+def node;
+def srcvalue;
+
+def imm        : SDNode<"ISD::Constant"  , SDTIntLeaf , [], "ConstantSDNode">;
+def timm       : SDNode<"ISD::TargetConstant",SDTIntLeaf, [], "ConstantSDNode">;
+def fpimm      : SDNode<"ISD::ConstantFP", SDTFPLeaf  , [], "ConstantFPSDNode">;
+def vt         : SDNode<"ISD::VALUETYPE" , SDTOther   , [], "VTSDNode">;
+def bb         : SDNode<"ISD::BasicBlock", SDTOther   , [], "BasicBlockSDNode">;
+def cond       : SDNode<"ISD::CONDCODE"  , SDTOther   , [], "CondCodeSDNode">;
+def undef      : SDNode<"ISD::UNDEF"     , SDTUNDEF   , []>;
+def globaladdr : SDNode<"ISD::GlobalAddress",         SDTPtrLeaf, [],
+                        "GlobalAddressSDNode">;
+def tglobaladdr : SDNode<"ISD::TargetGlobalAddress",  SDTPtrLeaf, [],
+                         "GlobalAddressSDNode">;
+def globaltlsaddr : SDNode<"ISD::GlobalTLSAddress",         SDTPtrLeaf, [],
+                          "GlobalAddressSDNode">;
+def tglobaltlsaddr : SDNode<"ISD::TargetGlobalTLSAddress",  SDTPtrLeaf, [],
+                           "GlobalAddressSDNode">;
+def constpool   : SDNode<"ISD::ConstantPool",         SDTPtrLeaf, [],
+                         "ConstantPoolSDNode">;
+def tconstpool  : SDNode<"ISD::TargetConstantPool",   SDTPtrLeaf, [],
+                         "ConstantPoolSDNode">;
+def jumptable   : SDNode<"ISD::JumpTable",            SDTPtrLeaf, [],
+                         "JumpTableSDNode">;
+def tjumptable  : SDNode<"ISD::TargetJumpTable",      SDTPtrLeaf, [],
+                         "JumpTableSDNode">;
+def frameindex  : SDNode<"ISD::FrameIndex",           SDTPtrLeaf, [],
+                         "FrameIndexSDNode">;
+def tframeindex : SDNode<"ISD::TargetFrameIndex",     SDTPtrLeaf, [],
+                         "FrameIndexSDNode">;
+def externalsym : SDNode<"ISD::ExternalSymbol",       SDTPtrLeaf, [],
+                         "ExternalSymbolSDNode">;
+def texternalsym: SDNode<"ISD::TargetExternalSymbol", SDTPtrLeaf, [],
+                         "ExternalSymbolSDNode">;
+def blockaddress : SDNode<"ISD::BlockAddress",        SDTPtrLeaf, [],
+                         "BlockAddressSDNode">;
+def tblockaddress: SDNode<"ISD::TargetBlockAddress",  SDTPtrLeaf, [],
+                         "BlockAddressSDNode">;
+
+def add        : SDNode<"ISD::ADD"       , SDTIntBinOp   ,
+                        [SDNPCommutative, SDNPAssociative]>;
+def sub        : SDNode<"ISD::SUB"       , SDTIntBinOp>;
+def mul        : SDNode<"ISD::MUL"       , SDTIntBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def mulhs      : SDNode<"ISD::MULHS"     , SDTIntBinOp, [SDNPCommutative]>;
+def mulhu      : SDNode<"ISD::MULHU"     , SDTIntBinOp, [SDNPCommutative]>;
+def smullohi   : SDNode<"ISD::SMUL_LOHI" , SDTIntBinHiLoOp, [SDNPCommutative]>;
+def umullohi   : SDNode<"ISD::UMUL_LOHI" , SDTIntBinHiLoOp, [SDNPCommutative]>;
+def sdiv       : SDNode<"ISD::SDIV"      , SDTIntBinOp>;
+def udiv       : SDNode<"ISD::UDIV"      , SDTIntBinOp>;
+def srem       : SDNode<"ISD::SREM"      , SDTIntBinOp>;
+def urem       : SDNode<"ISD::UREM"      , SDTIntBinOp>;
+def sdivrem    : SDNode<"ISD::SDIVREM"   , SDTIntBinHiLoOp>;
+def udivrem    : SDNode<"ISD::UDIVREM"   , SDTIntBinHiLoOp>;
+def srl        : SDNode<"ISD::SRL"       , SDTIntShiftOp>;
+def sra        : SDNode<"ISD::SRA"       , SDTIntShiftOp>;
+def shl        : SDNode<"ISD::SHL"       , SDTIntShiftOp>;
+def rotl       : SDNode<"ISD::ROTL"      , SDTIntShiftOp>;
+def rotr       : SDNode<"ISD::ROTR"      , SDTIntShiftOp>;
+def and        : SDNode<"ISD::AND"       , SDTIntBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def or         : SDNode<"ISD::OR"        , SDTIntBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def xor        : SDNode<"ISD::XOR"       , SDTIntBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def addc       : SDNode<"ISD::ADDC"      , SDTIntBinOp,
+                        [SDNPCommutative, SDNPOutGlue]>;
+def adde       : SDNode<"ISD::ADDE"      , SDTIntBinOp,
+                        [SDNPCommutative, SDNPOutGlue, SDNPInGlue]>;
+def subc       : SDNode<"ISD::SUBC"      , SDTIntBinOp,
+                        [SDNPOutGlue]>;
+def sube       : SDNode<"ISD::SUBE"      , SDTIntBinOp,
+                        [SDNPOutGlue, SDNPInGlue]>;
+
+def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
+def bswap      : SDNode<"ISD::BSWAP"      , SDTIntUnaryOp>;
+def ctlz       : SDNode<"ISD::CTLZ"       , SDTIntUnaryOp>;
+def cttz       : SDNode<"ISD::CTTZ"       , SDTIntUnaryOp>;
+def ctpop      : SDNode<"ISD::CTPOP"      , SDTIntUnaryOp>;
+def sext       : SDNode<"ISD::SIGN_EXTEND", SDTIntExtendOp>;
+def zext       : SDNode<"ISD::ZERO_EXTEND", SDTIntExtendOp>;
+def anyext     : SDNode<"ISD::ANY_EXTEND" , SDTIntExtendOp>;
+def trunc      : SDNode<"ISD::TRUNCATE"   , SDTIntTruncOp>;
+def bitconvert : SDNode<"ISD::BITCAST"    , SDTUnaryOp>;
+def extractelt : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDTVecExtract>;
+def insertelt  : SDNode<"ISD::INSERT_VECTOR_ELT", SDTVecInsert>;
+
+
+def fadd       : SDNode<"ISD::FADD"       , SDTFPBinOp, [SDNPCommutative]>;
+def fsub       : SDNode<"ISD::FSUB"       , SDTFPBinOp>;
+def fmul       : SDNode<"ISD::FMUL"       , SDTFPBinOp, [SDNPCommutative]>;
+def fdiv       : SDNode<"ISD::FDIV"       , SDTFPBinOp>;
+def frem       : SDNode<"ISD::FREM"       , SDTFPBinOp>;
+def fabs       : SDNode<"ISD::FABS"       , SDTFPUnaryOp>;
+def fneg       : SDNode<"ISD::FNEG"       , SDTFPUnaryOp>;
+def fsqrt      : SDNode<"ISD::FSQRT"      , SDTFPUnaryOp>;
+def fsin       : SDNode<"ISD::FSIN"       , SDTFPUnaryOp>;
+def fcos       : SDNode<"ISD::FCOS"       , SDTFPUnaryOp>;
+def fexp2      : SDNode<"ISD::FEXP2"      , SDTFPUnaryOp>;
+def flog2      : SDNode<"ISD::FLOG2"      , SDTFPUnaryOp>;
+def frint      : SDNode<"ISD::FRINT"      , SDTFPUnaryOp>;
+def ftrunc     : SDNode<"ISD::FTRUNC"     , SDTFPUnaryOp>;
+def fceil      : SDNode<"ISD::FCEIL"      , SDTFPUnaryOp>;
+def ffloor     : SDNode<"ISD::FFLOOR"     , SDTFPUnaryOp>;
+def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp>;
+
+def fround     : SDNode<"ISD::FP_ROUND"   , SDTFPRoundOp>;
+def fextend    : SDNode<"ISD::FP_EXTEND"  , SDTFPExtendOp>;
+def fcopysign  : SDNode<"ISD::FCOPYSIGN"  , SDTFPSignOp>;
+
+def sint_to_fp : SDNode<"ISD::SINT_TO_FP" , SDTIntToFPOp>;
+def uint_to_fp : SDNode<"ISD::UINT_TO_FP" , SDTIntToFPOp>;
+def fp_to_sint : SDNode<"ISD::FP_TO_SINT" , SDTFPToIntOp>;
+def fp_to_uint : SDNode<"ISD::FP_TO_UINT" , SDTFPToIntOp>;
+def f16_to_f32 : SDNode<"ISD::FP16_TO_FP32", SDTIntToFPOp>;
+def f32_to_f16 : SDNode<"ISD::FP32_TO_FP16", SDTFPToIntOp>;
+
+def setcc      : SDNode<"ISD::SETCC"      , SDTSetCC>;
+def select     : SDNode<"ISD::SELECT"     , SDTSelect>;
+def selectcc   : SDNode<"ISD::SELECT_CC"  , SDTSelectCC>;
+def vsetcc     : SDNode<"ISD::VSETCC"     , SDTSetCC>;
+
+def brcond     : SDNode<"ISD::BRCOND"     , SDTBrcond, [SDNPHasChain]>;
+def brind      : SDNode<"ISD::BRIND"      , SDTBrind,  [SDNPHasChain]>;
+def br         : SDNode<"ISD::BR"         , SDTBr,     [SDNPHasChain]>;
+def trap       : SDNode<"ISD::TRAP"       , SDTNone,
+                        [SDNPHasChain, SDNPSideEffect]>;
+
+def prefetch   : SDNode<"ISD::PREFETCH"   , SDTPrefetch,
+                        [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+                         SDNPMemOperand]>;
+
+def membarrier : SDNode<"ISD::MEMBARRIER" , SDTMemBarrier,
+                        [SDNPHasChain, SDNPSideEffect]>;
+
+def atomic_cmp_swap : SDNode<"ISD::ATOMIC_CMP_SWAP" , SDTAtomic3,
+                    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_add : SDNode<"ISD::ATOMIC_LOAD_ADD" , SDTAtomic2,
+                    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_swap     : SDNode<"ISD::ATOMIC_SWAP", SDTAtomic2,
+                    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_sub : SDNode<"ISD::ATOMIC_LOAD_SUB" , SDTAtomic2,
+                    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_and : SDNode<"ISD::ATOMIC_LOAD_AND" , SDTAtomic2,
+                    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_or  : SDNode<"ISD::ATOMIC_LOAD_OR" , SDTAtomic2,
+                    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_xor : SDNode<"ISD::ATOMIC_LOAD_XOR" , SDTAtomic2,
+                    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_nand: SDNode<"ISD::ATOMIC_LOAD_NAND", SDTAtomic2,
+                    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_min : SDNode<"ISD::ATOMIC_LOAD_MIN", SDTAtomic2,
+                    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_max : SDNode<"ISD::ATOMIC_LOAD_MAX", SDTAtomic2,
+                    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_umin : SDNode<"ISD::ATOMIC_LOAD_UMIN", SDTAtomic2,
+                    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+def atomic_load_umax : SDNode<"ISD::ATOMIC_LOAD_UMAX", SDTAtomic2,
+                    [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
+
+// Do not use ld, st directly. Use load, extload, sextload, zextload, store,
+// and truncst (see below).
+def ld         : SDNode<"ISD::LOAD"       , SDTLoad,
+                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def st         : SDNode<"ISD::STORE"      , SDTStore,
+                        [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def ist        : SDNode<"ISD::STORE"      , SDTIStore,
+                        [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def vector_shuffle : SDNode<"ISD::VECTOR_SHUFFLE", SDTVecShuffle, []>;
+def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>;
+def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,
+                              []>;
+def vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT",
+    SDTypeProfile<1, 2, [SDTCisPtrTy<2>]>, []>;
+def vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT",
+    SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>;
+
+// This operator does not do subvector type checking.  The ARM
+// backend, at least, needs it.
+def vector_extract_subvec : SDNode<"ISD::EXTRACT_SUBVECTOR",
+    SDTypeProfile<1, 2, [SDTCisInt<2>, SDTCisVec<1>, SDTCisVec<0>]>, 
+    []>;
+
+// This operator does subvector type checking.
+def extract_subvector : SDNode<"ISD::EXTRACT_SUBVECTOR", SDTSubVecExtract, []>;
+def insert_subvector : SDNode<"ISD::INSERT_SUBVECTOR", SDTSubVecInsert, []>;
+
+// Nodes for intrinsics, you should use the intrinsic itself and let tblgen use
+// these internally.  Don't reference these directly.
+def intrinsic_void : SDNode<"ISD::INTRINSIC_VOID",
+                            SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
+                            [SDNPHasChain]>;
+def intrinsic_w_chain : SDNode<"ISD::INTRINSIC_W_CHAIN",
+                               SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>,
+                               [SDNPHasChain]>;
+def intrinsic_wo_chain : SDNode<"ISD::INTRINSIC_WO_CHAIN",
+                                SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>, []>;
+
+// Do not use cvt directly. Use cvt forms below
+def cvt : SDNode<"ISD::CONVERT_RNDSAT", SDTConvertOp>;
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Condition Codes
+
+class CondCode; // ISD::CondCode enums
+def SETOEQ : CondCode; def SETOGT : CondCode;
+def SETOGE : CondCode; def SETOLT : CondCode; def SETOLE : CondCode;
+def SETONE : CondCode; def SETO   : CondCode; def SETUO  : CondCode;
+def SETUEQ : CondCode; def SETUGT : CondCode; def SETUGE : CondCode;
+def SETULT : CondCode; def SETULE : CondCode; def SETUNE : CondCode;
+
+def SETEQ : CondCode; def SETGT : CondCode; def SETGE : CondCode;
+def SETLT : CondCode; def SETLE : CondCode; def SETNE : CondCode;
+
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Node Transformation Functions.
+//
+// This mechanism allows targets to manipulate nodes in the output DAG once a
+// match has been formed.  This is typically used to manipulate immediate
+// values.
+//
+class SDNodeXForm<SDNode opc, code xformFunction> {
+  SDNode Opcode = opc;
+  code XFormFunction = xformFunction;
+}
+
+def NOOP_SDNodeXForm : SDNodeXForm<imm, [{}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Pattern Fragments.
+//
+// Pattern fragments are reusable chunks of dags that match specific things.
+// They can take arguments and have C++ predicates that control whether they
+// match.  They are intended to make the patterns for common instructions more
+// compact and readable.
+//
+
+/// PatFrag - Represents a pattern fragment.  This can match something on the
+/// DAG, from a single node to multiple nested other fragments.
+///
+class PatFrag<dag ops, dag frag, code pred = [{}],
+              SDNodeXForm xform = NOOP_SDNodeXForm> : SDPatternOperator {
+  dag Operands = ops;
+  dag Fragment = frag;
+  code Predicate = pred;
+  SDNodeXForm OperandTransform = xform;
+}
+
+// PatLeaf's are pattern fragments that have no operands.  This is just a helper
+// to define immediates and other common things concisely.
+class PatLeaf<dag frag, code pred = [{}], SDNodeXForm xform = NOOP_SDNodeXForm>
+ : PatFrag<(ops), frag, pred, xform>;
+
+// Leaf fragments.
+
+def vtInt      : PatLeaf<(vt),  [{ return N->getVT().isInteger(); }]>;
+def vtFP       : PatLeaf<(vt),  [{ return N->getVT().isFloatingPoint(); }]>;
+
+def immAllOnesV: PatLeaf<(build_vector), [{
+  return ISD::isBuildVectorAllOnes(N);
+}]>;
+def immAllZerosV: PatLeaf<(build_vector), [{
+  return ISD::isBuildVectorAllZeros(N);
+}]>;
+
+
+
+// Other helper fragments.
+def not  : PatFrag<(ops node:$in), (xor node:$in, -1)>;
+def vnot : PatFrag<(ops node:$in), (xor node:$in, immAllOnesV)>;
+def ineg : PatFrag<(ops node:$in), (sub 0, node:$in)>;
+
+// load fragments.
+def unindexedload : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
+}]>;
+def load : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
+}]>;
+
+// extending load fragments.
+def extload   : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD;
+}]>;
+def sextload  : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD;
+}]>;
+def zextload  : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD;
+}]>;
+
+def extloadi1  : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i1;
+}]>;
+def extloadi8  : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def extloadi16 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def extloadi32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def extloadf32 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::f32;
+}]>;
+def extloadf64 : PatFrag<(ops node:$ptr), (extload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::f64;
+}]>;
+
+def sextloadi1  : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i1;
+}]>;
+def sextloadi8  : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def sextloadi16 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def sextloadi32 : PatFrag<(ops node:$ptr), (sextload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+def zextloadi1  : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i1;
+}]>;
+def zextloadi8  : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def zextloadi16 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def zextloadi32 : PatFrag<(ops node:$ptr), (zextload node:$ptr), [{
+  return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+
+// store fragments.
+def unindexedstore : PatFrag<(ops node:$val, node:$ptr),
+                             (st node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
+}]>;
+def store : PatFrag<(ops node:$val, node:$ptr),
+                    (unindexedstore node:$val, node:$ptr), [{
+  return !cast<StoreSDNode>(N)->isTruncatingStore();
+}]>;
+
+// truncstore fragments.
+def truncstore : PatFrag<(ops node:$val, node:$ptr),
+                         (unindexedstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->isTruncatingStore();
+}]>;
+def truncstorei8 : PatFrag<(ops node:$val, node:$ptr),
+                           (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def truncstorei16 : PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def truncstorei32 : PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def truncstoref32 : PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::f32;
+}]>;
+def truncstoref64 : PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::f64;
+}]>;
+
+// indexed store fragments.
+def istore : PatFrag<(ops node:$val, node:$base, node:$offset),
+                     (ist node:$val, node:$base, node:$offset), [{
+  return !cast<StoreSDNode>(N)->isTruncatingStore();
+}]>;
+
+def pre_store : PatFrag<(ops node:$val, node:$base, node:$offset),
+                        (istore node:$val, node:$base, node:$offset), [{
+  ISD::MemIndexedMode AM = cast<StoreSDNode>(N)->getAddressingMode();
+  return AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
+}]>;
+
+def itruncstore : PatFrag<(ops node:$val, node:$base, node:$offset),
+                          (ist node:$val, node:$base, node:$offset), [{
+  return cast<StoreSDNode>(N)->isTruncatingStore();
+}]>;
+def pre_truncst : PatFrag<(ops node:$val, node:$base, node:$offset),
+                          (itruncstore node:$val, node:$base, node:$offset), [{
+  ISD::MemIndexedMode AM = cast<StoreSDNode>(N)->getAddressingMode();
+  return AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
+}]>;
+def pre_truncsti1 : PatFrag<(ops node:$val, node:$base, node:$offset),
+                            (pre_truncst node:$val, node:$base, node:$offset), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
+}]>;
+def pre_truncsti8 : PatFrag<(ops node:$val, node:$base, node:$offset),
+                            (pre_truncst node:$val, node:$base, node:$offset), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def pre_truncsti16 : PatFrag<(ops node:$val, node:$base, node:$offset),
+                             (pre_truncst node:$val, node:$base, node:$offset), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def pre_truncsti32 : PatFrag<(ops node:$val, node:$base, node:$offset),
+                             (pre_truncst node:$val, node:$base, node:$offset), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def pre_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
+                             (pre_truncst node:$val, node:$base, node:$offset), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::f32;
+}]>;
+
+def post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
+                         (istore node:$val, node:$ptr, node:$offset), [{
+  ISD::MemIndexedMode AM = cast<StoreSDNode>(N)->getAddressingMode();
+  return AM == ISD::POST_INC || AM == ISD::POST_DEC;
+}]>;
+
+def post_truncst : PatFrag<(ops node:$val, node:$base, node:$offset),
+                           (itruncstore node:$val, node:$base, node:$offset), [{
+  ISD::MemIndexedMode AM = cast<StoreSDNode>(N)->getAddressingMode();
+  return AM == ISD::POST_INC || AM == ISD::POST_DEC;
+}]>;
+def post_truncsti1 : PatFrag<(ops node:$val, node:$base, node:$offset),
+                             (post_truncst node:$val, node:$base, node:$offset), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i1;
+}]>;
+def post_truncsti8 : PatFrag<(ops node:$val, node:$base, node:$offset),
+                             (post_truncst node:$val, node:$base, node:$offset), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def post_truncsti16 : PatFrag<(ops node:$val, node:$base, node:$offset),
+                              (post_truncst node:$val, node:$base, node:$offset), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def post_truncsti32 : PatFrag<(ops node:$val, node:$base, node:$offset),
+                              (post_truncst node:$val, node:$base, node:$offset), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
+                              (post_truncst node:$val, node:$base, node:$offset), [{
+  return cast<StoreSDNode>(N)->getMemoryVT() == MVT::f32;
+}]>;
+
+// setcc convenience fragments.
+def setoeq : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETOEQ)>;
+def setogt : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETOGT)>;
+def setoge : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETOGE)>;
+def setolt : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETOLT)>;
+def setole : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETOLE)>;
+def setone : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETONE)>;
+def seto   : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETO)>;
+def setuo  : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETUO)>;
+def setueq : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETUEQ)>;
+def setugt : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETUGT)>;
+def setuge : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETUGE)>;
+def setult : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETULT)>;
+def setule : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETULE)>;
+def setune : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETUNE)>;
+def seteq  : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETEQ)>;
+def setgt  : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETGT)>;
+def setge  : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETGE)>;
+def setlt  : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETLT)>;
+def setle  : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETLE)>;
+def setne  : PatFrag<(ops node:$lhs, node:$rhs),
+                     (setcc node:$lhs, node:$rhs, SETNE)>;
+
+def atomic_cmp_swap_8 :
+  PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
+          (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
+  return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i8;
+}]>;
+def atomic_cmp_swap_16 :
+  PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
+          (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
+  return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i16;
+}]>;
+def atomic_cmp_swap_32 :
+  PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
+          (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
+  return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i32;
+}]>;
+def atomic_cmp_swap_64 :
+  PatFrag<(ops node:$ptr, node:$cmp, node:$swap),
+          (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{
+  return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64;
+}]>;
+
+multiclass binary_atomic_op<SDNode atomic_op> {
+  def _8 : PatFrag<(ops node:$ptr, node:$val),
+                   (atomic_op node:$ptr, node:$val), [{
+    return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i8;
+  }]>;
+  def _16 : PatFrag<(ops node:$ptr, node:$val),
+                   (atomic_op node:$ptr, node:$val), [{
+    return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i16;
+  }]>;
+  def _32 : PatFrag<(ops node:$ptr, node:$val),
+                   (atomic_op node:$ptr, node:$val), [{
+    return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i32;
+  }]>;
+  def _64 : PatFrag<(ops node:$ptr, node:$val),
+                   (atomic_op node:$ptr, node:$val), [{
+    return cast<AtomicSDNode>(N)->getMemoryVT() == MVT::i64;
+  }]>;
+}
+
+defm atomic_load_add  : binary_atomic_op<atomic_load_add>;
+defm atomic_swap      : binary_atomic_op<atomic_swap>;
+defm atomic_load_sub  : binary_atomic_op<atomic_load_sub>;
+defm atomic_load_and  : binary_atomic_op<atomic_load_and>;
+defm atomic_load_or   : binary_atomic_op<atomic_load_or>;
+defm atomic_load_xor  : binary_atomic_op<atomic_load_xor>;
+defm atomic_load_nand : binary_atomic_op<atomic_load_nand>;
+defm atomic_load_min  : binary_atomic_op<atomic_load_min>;
+defm atomic_load_max  : binary_atomic_op<atomic_load_max>;
+defm atomic_load_umin : binary_atomic_op<atomic_load_umin>;
+defm atomic_load_umax : binary_atomic_op<atomic_load_umax>;
+
+//===----------------------------------------------------------------------===//
+// Selection DAG CONVERT_RNDSAT patterns
+
+def cvtff : PatFrag<(ops node:$val, node:$dty, node:$sty, node:$rd, node:$sat),
+    (cvt node:$val, node:$dty, node:$sty, node:$rd, node:$sat), [{
+       return cast<CvtRndSatSDNode>(N)->getCvtCode() == ISD::CVT_FF;
+    }]>;
+
+def cvtss : PatFrag<(ops node:$val, node:$dty, node:$sty, node:$rd, node:$sat),
+    (cvt node:$val, node:$dty, node:$sty, node:$rd, node:$sat), [{
+       return cast<CvtRndSatSDNode>(N)->getCvtCode() == ISD::CVT_SS;
+    }]>;
+
+def cvtsu : PatFrag<(ops node:$val, node:$dty, node:$sty, node:$rd, node:$sat),
+    (cvt node:$val, node:$dty, node:$sty, node:$rd, node:$sat), [{
+       return cast<CvtRndSatSDNode>(N)->getCvtCode() == ISD::CVT_SU;
+    }]>;
+
+def cvtus : PatFrag<(ops node:$val, node:$dty, node:$sty, node:$rd, node:$sat),
+    (cvt node:$val, node:$dty, node:$sty, node:$rd, node:$sat), [{
+       return cast<CvtRndSatSDNode>(N)->getCvtCode() == ISD::CVT_US;
+    }]>;
+
+def cvtuu : PatFrag<(ops node:$val, node:$dty, node:$sty, node:$rd, node:$sat),
+    (cvt node:$val, node:$dty, node:$sty, node:$rd, node:$sat), [{
+       return cast<CvtRndSatSDNode>(N)->getCvtCode() == ISD::CVT_UU;
+    }]>;
+
+def cvtsf : PatFrag<(ops node:$val, node:$dty, node:$sty, node:$rd, node:$sat),
+    (cvt node:$val, node:$dty, node:$sty, node:$rd, node:$sat), [{
+       return cast<CvtRndSatSDNode>(N)->getCvtCode() == ISD::CVT_SF;
+    }]>;
+
+def cvtuf : PatFrag<(ops node:$val, node:$dty, node:$sty, node:$rd, node:$sat),
+    (cvt node:$val, node:$dty, node:$sty, node:$rd, node:$sat), [{
+       return cast<CvtRndSatSDNode>(N)->getCvtCode() == ISD::CVT_UF;
+    }]>;
+
+def cvtfs : PatFrag<(ops node:$val, node:$dty, node:$sty, node:$rd, node:$sat),
+    (cvt node:$val, node:$dty, node:$sty, node:$rd, node:$sat), [{
+       return cast<CvtRndSatSDNode>(N)->getCvtCode() == ISD::CVT_FS;
+    }]>;
+
+def cvtfu : PatFrag<(ops node:$val, node:$dty, node:$sty, node:$rd, node:$sat),
+    (cvt node:$val, node:$dty, node:$sty, node:$rd, node:$sat), [{
+       return cast<CvtRndSatSDNode>(N)->getCvtCode() == ISD::CVT_FU;
+    }]>;
+
+//===----------------------------------------------------------------------===//
+// Selection DAG Pattern Support.
+//
+// Patterns are what are actually matched against by the target-flavored
+// instruction selection DAG.  Instructions defined by the target implicitly
+// define patterns in most cases, but patterns can also be explicitly added when
+// an operation is defined by a sequence of instructions (e.g. loading a large
+// immediate value on RISC targets that do not support immediates as large as
+// their GPRs).
+//
+
+class Pattern<dag patternToMatch, list<dag> resultInstrs> {
+  dag             PatternToMatch  = patternToMatch;
+  list<dag>       ResultInstrs    = resultInstrs;
+  list<Predicate> Predicates      = [];  // See class Instruction in Target.td.
+  int             AddedComplexity = 0;   // See class Instruction in Target.td.
+}
+
+// Pat - A simple (but common) form of a pattern, which produces a simple result
+// not needing a full list.
+class Pat<dag pattern, dag result> : Pattern<pattern, [result]>;
+
+//===----------------------------------------------------------------------===//
+// Complex pattern definitions.
+//
+
+// Complex patterns, e.g. X86 addressing mode, requires pattern matching code
+// in C++. NumOperands is the number of operands returned by the select function;
+// SelectFunc is the name of the function used to pattern match the max. pattern;
+// RootNodes are the list of possible root nodes of the sub-dags to match.
+// e.g. X86 addressing mode - def addr : ComplexPattern<4, "SelectAddr", [add]>;
+//
+class ComplexPattern<ValueType ty, int numops, string fn,
+                     list<SDNode> roots = [], list<SDNodeProperty> props = []> {
+  ValueType Ty = ty;
+  int NumOperands = numops;
+  string SelectFunc = fn;
+  list<SDNode> RootNodes = roots;
+  list<SDNodeProperty> Properties = props;
+}
diff --git a/final/include/llvm/Target/TargetSelectionDAGInfo.h b/final/include/llvm/Target/TargetSelectionDAGInfo.h
new file mode 100644
index 00000000000..c9ca7223b5f
--- /dev/null
+++ b/final/include/llvm/Target/TargetSelectionDAGInfo.h
@@ -0,0 +1,101 @@
+//==-- llvm/Target/TargetSelectionDAGInfo.h - SelectionDAG Info --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TargetSelectionDAGInfo class, which targets can
+// subclass to parameterize the SelectionDAG lowering and instruction
+// selection process.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETSELECTIONDAGINFO_H
+#define LLVM_TARGET_TARGETSELECTIONDAGINFO_H
+
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+
+namespace llvm {
+
+class TargetData;
+class TargetMachine;
+
+//===----------------------------------------------------------------------===//
+/// TargetSelectionDAGInfo - Targets can subclass this to parameterize the
+/// SelectionDAG lowering and instruction selection process.
+///
+class TargetSelectionDAGInfo {
+  TargetSelectionDAGInfo(const TargetSelectionDAGInfo &); // DO NOT IMPLEMENT
+  void operator=(const TargetSelectionDAGInfo &);         // DO NOT IMPLEMENT
+
+  const TargetData *TD;
+
+protected:
+  const TargetData *getTargetData() const { return TD; }
+
+public:
+  explicit TargetSelectionDAGInfo(const TargetMachine &TM);
+  virtual ~TargetSelectionDAGInfo();
+
+  /// EmitTargetCodeForMemcpy - Emit target-specific code that performs a
+  /// memcpy. This can be used by targets to provide code sequences for cases
+  /// that don't fit the target's parameters for simple loads/stores and can be
+  /// more efficient than using a library call. This function can return a null
+  /// SDValue if the target declines to use custom code and a different
+  /// lowering strategy should be used.
+  /// 
+  /// If AlwaysInline is true, the size is constant and the target should not
+  /// emit any calls and is strongly encouraged to attempt to emit inline code
+  /// even if it is beyond the usual threshold because this intrinsic is being
+  /// expanded in a place where calls are not feasible (e.g. within the prologue
+  /// for another call). If the target chooses to decline an AlwaysInline
+  /// request here, legalize will resort to using simple loads and stores.
+  virtual SDValue
+  EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                          SDValue Chain,
+                          SDValue Op1, SDValue Op2,
+                          SDValue Op3, unsigned Align, bool isVolatile,
+                          bool AlwaysInline,
+                          MachinePointerInfo DstPtrInfo,
+                          MachinePointerInfo SrcPtrInfo) const {
+    return SDValue();
+  }
+
+  /// EmitTargetCodeForMemmove - Emit target-specific code that performs a
+  /// memmove. This can be used by targets to provide code sequences for cases
+  /// that don't fit the target's parameters for simple loads/stores and can be
+  /// more efficient than using a library call. This function can return a null
+  /// SDValue if the target declines to use custom code and a different
+  /// lowering strategy should be used.
+  virtual SDValue
+  EmitTargetCodeForMemmove(SelectionDAG &DAG, DebugLoc dl,
+                           SDValue Chain,
+                           SDValue Op1, SDValue Op2,
+                           SDValue Op3, unsigned Align, bool isVolatile,
+                           MachinePointerInfo DstPtrInfo,
+                           MachinePointerInfo SrcPtrInfo) const {
+    return SDValue();
+  }
+
+  /// EmitTargetCodeForMemset - Emit target-specific code that performs a
+  /// memset. This can be used by targets to provide code sequences for cases
+  /// that don't fit the target's parameters for simple stores and can be more
+  /// efficient than using a library call. This function can return a null
+  /// SDValue if the target declines to use custom code and a different
+  /// lowering strategy should be used.
+  virtual SDValue
+  EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+                          SDValue Chain,
+                          SDValue Op1, SDValue Op2,
+                          SDValue Op3, unsigned Align, bool isVolatile,
+                          MachinePointerInfo DstPtrInfo) const {
+    return SDValue();
+  }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/final/include/llvm/Target/TargetSubtarget.h b/final/include/llvm/Target/TargetSubtarget.h
new file mode 100644
index 00000000000..22b09bac073
--- /dev/null
+++ b/final/include/llvm/Target/TargetSubtarget.h
@@ -0,0 +1,67 @@
+//==-- llvm/Target/TargetSubtarget.h - Target Information --------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the subtarget options of a Target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_TARGETSUBTARGET_H
+#define LLVM_TARGET_TARGETSUBTARGET_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class SDep;
+class SUnit;
+class TargetRegisterClass;
+template <typename T> class SmallVectorImpl;
+
+//===----------------------------------------------------------------------===//
+///
+/// TargetSubtarget - Generic base class for all target subtargets.  All
+/// Target-specific options that control code generation and printing should
+/// be exposed through a TargetSubtarget-derived class.
+///
+class TargetSubtarget {
+  TargetSubtarget(const TargetSubtarget&);   // DO NOT IMPLEMENT
+  void operator=(const TargetSubtarget&);  // DO NOT IMPLEMENT
+protected: // Can only create subclasses...
+  TargetSubtarget();
+public:
+  // AntiDepBreakMode - Type of anti-dependence breaking that should
+  // be performed before post-RA scheduling.
+  typedef enum { ANTIDEP_NONE, ANTIDEP_CRITICAL, ANTIDEP_ALL } AntiDepBreakMode;
+  typedef SmallVectorImpl<TargetRegisterClass*> RegClassVector;
+
+  virtual ~TargetSubtarget();
+
+  /// getSpecialAddressLatency - For targets where it is beneficial to
+  /// backschedule instructions that compute addresses, return a value
+  /// indicating the number of scheduling cycles of backscheduling that
+  /// should be attempted.
+  virtual unsigned getSpecialAddressLatency() const { return 0; }
+
+  // enablePostRAScheduler - If the target can benefit from post-regalloc
+  // scheduling and the specified optimization level meets the requirement
+  // return true to enable post-register-allocation scheduling. In
+  // CriticalPathRCs return any register classes that should only be broken
+  // if on the critical path. 
+  virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+                                     AntiDepBreakMode& Mode,
+                                     RegClassVector& CriticalPathRCs) const;
+  // adjustSchedDependency - Perform target specific adjustments to
+  // the latency of a schedule dependency.
+  virtual void adjustSchedDependency(SUnit *def, SUnit *use, 
+                                     SDep& dep) const { }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Transforms/IPO.h b/final/include/llvm/Transforms/IPO.h
new file mode 100644
index 00000000000..12398813cc7
--- /dev/null
+++ b/final/include/llvm/Transforms/IPO.h
@@ -0,0 +1,212 @@
+//===- llvm/Transforms/IPO.h - Interprocedural Transformations --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines prototypes for accessor functions that expose passes
+// in the IPO transformations library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_H
+#define LLVM_TRANSFORMS_IPO_H
+
+#include <vector>
+
+namespace llvm {
+
+class ModulePass;
+class Pass;
+class Function;
+class BasicBlock;
+class GlobalValue;
+
+//===----------------------------------------------------------------------===//
+//
+// These functions removes symbols from functions and modules.  If OnlyDebugInfo
+// is true, only debugging information is removed from the module.
+//
+ModulePass *createStripSymbolsPass(bool OnlyDebugInfo = false);
+
+//===----------------------------------------------------------------------===//
+//
+// These functions strips symbols from functions and modules.  
+// Only debugging information is not stripped.
+//
+ModulePass *createStripNonDebugSymbolsPass();
+
+//===----------------------------------------------------------------------===//
+//
+// These pass removes llvm.dbg.declare intrinsics.
+ModulePass *createStripDebugDeclarePass();
+
+//===----------------------------------------------------------------------===//
+//
+// These pass removes unused symbols' debug info.
+ModulePass *createStripDeadDebugInfoPass();
+
+//===----------------------------------------------------------------------===//
+/// createLowerSetJmpPass - This function lowers the setjmp/longjmp intrinsics
+/// to invoke/unwind instructions.  This should really be part of the C/C++
+/// front-end, but it's so much easier to write transformations in LLVM proper.
+///
+ModulePass *createLowerSetJmpPass();
+
+//===----------------------------------------------------------------------===//
+/// createConstantMergePass - This function returns a new pass that merges
+/// duplicate global constants together into a single constant that is shared.
+/// This is useful because some passes (ie TraceValues) insert a lot of string
+/// constants into the program, regardless of whether or not they duplicate an
+/// existing string.
+///
+ModulePass *createConstantMergePass();
+
+
+//===----------------------------------------------------------------------===//
+/// createGlobalOptimizerPass - This function returns a new pass that optimizes
+/// non-address taken internal globals.
+///
+ModulePass *createGlobalOptimizerPass();
+
+
+//===----------------------------------------------------------------------===//
+/// createDeadTypeEliminationPass - Return a new pass that eliminates symbol
+/// table entries for types that are never used.
+///
+ModulePass *createDeadTypeEliminationPass();
+
+
+//===----------------------------------------------------------------------===//
+/// createGlobalDCEPass - This transform is designed to eliminate unreachable
+/// internal globals (functions or global variables)
+///
+ModulePass *createGlobalDCEPass();
+
+
+//===----------------------------------------------------------------------===//
+/// createGVExtractionPass - If deleteFn is true, this pass deletes as
+/// the specified global values. Otherwise, it deletes as much of the module as
+/// possible, except for the global values specified.
+///
+ModulePass *createGVExtractionPass(std::vector<GlobalValue*>& GVs, bool 
+                                   deleteFn = false);
+
+//===----------------------------------------------------------------------===//
+/// createFunctionInliningPass - Return a new pass object that uses a heuristic
+/// to inline direct function calls to small functions.
+///
+/// The -inline-threshold command line option takes precedence over the
+/// threshold given here.
+Pass *createFunctionInliningPass();
+Pass *createFunctionInliningPass(int Threshold);
+
+//===----------------------------------------------------------------------===//
+/// createAlwaysInlinerPass - Return a new pass object that inlines only 
+/// functions that are marked as "always_inline".
+Pass *createAlwaysInlinerPass();
+
+//===----------------------------------------------------------------------===//
+/// createPruneEHPass - Return a new pass object which transforms invoke
+/// instructions into calls, if the callee can _not_ unwind the stack.
+///
+Pass *createPruneEHPass();
+
+//===----------------------------------------------------------------------===//
+/// createInternalizePass - This pass loops over all of the functions in the
+/// input module, internalizing all globals (functions and variables) not part
+/// of the api.  If a list of symbols is specified with the
+/// -internalize-public-api-* command line options, those symbols are not
+/// internalized and all others are.  Otherwise if AllButMain is set and the
+/// main function is found, all other globals are marked as internal. If no api
+/// is supplied and AllButMain is not set, or no main function is found, nothing
+/// is internalized.
+///
+ModulePass *createInternalizePass(bool AllButMain);
+
+/// createInternalizePass - This pass loops over all of the functions in the
+/// input module, internalizing all globals (functions and variables) not in the
+/// given exportList.
+///
+/// Note that commandline options that are used with the above function are not
+/// used now! Also, when exportList is empty, nothing is internalized.
+ModulePass *createInternalizePass(const std::vector<const char *> &exportList);
+
+//===----------------------------------------------------------------------===//
+/// createDeadArgEliminationPass - This pass removes arguments from functions
+/// which are not used by the body of the function.
+///
+ModulePass *createDeadArgEliminationPass();
+
+/// DeadArgHacking pass - Same as DAE, but delete arguments of external
+/// functions as well.  This is definitely not safe, and should only be used by
+/// bugpoint.
+ModulePass *createDeadArgHackingPass();
+
+//===----------------------------------------------------------------------===//
+/// createArgumentPromotionPass - This pass promotes "by reference" arguments to
+/// be passed by value if the number of elements passed is smaller or
+/// equal to maxElements (maxElements == 0 means always promote).
+///
+Pass *createArgumentPromotionPass(unsigned maxElements = 3);
+Pass *createStructRetPromotionPass();
+
+//===----------------------------------------------------------------------===//
+/// createIPConstantPropagationPass - This pass propagates constants from call
+/// sites into the bodies of functions.
+///
+ModulePass *createIPConstantPropagationPass();
+
+//===----------------------------------------------------------------------===//
+/// createIPSCCPPass - This pass propagates constants from call sites into the
+/// bodies of functions, and keeps track of whether basic blocks are executable
+/// in the process.
+///
+ModulePass *createIPSCCPPass();
+
+//===----------------------------------------------------------------------===//
+//
+/// createLoopExtractorPass - This pass extracts all natural loops from the
+/// program into a function if it can.
+///
+Pass *createLoopExtractorPass();
+
+/// createSingleLoopExtractorPass - This pass extracts one natural loop from the
+/// program into a function if it can.  This is used by bugpoint.
+///
+Pass *createSingleLoopExtractorPass();
+
+/// createBlockExtractorPass - This pass extracts all blocks (except those
+/// specified in the argument list) from the functions in the module.
+///
+ModulePass *createBlockExtractorPass();
+
+/// createStripDeadPrototypesPass - This pass removes any function declarations
+/// (prototypes) that are not used.
+ModulePass *createStripDeadPrototypesPass();
+
+//===----------------------------------------------------------------------===//
+/// createFunctionAttrsPass - This pass discovers functions that do not access
+/// memory, or only read memory, and gives them the readnone/readonly attribute.
+/// It also discovers function arguments that are not captured by the function
+/// and marks them with the nocapture attribute.
+///
+Pass *createFunctionAttrsPass();
+
+//===----------------------------------------------------------------------===//
+/// createMergeFunctionsPass - This pass discovers identical functions and
+/// collapses them.
+///
+ModulePass *createMergeFunctionsPass();
+
+//===----------------------------------------------------------------------===//
+/// createPartialInliningPass - This pass inlines parts of functions.
+///
+ModulePass *createPartialInliningPass();
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Transforms/IPO/InlinerPass.h b/final/include/llvm/Transforms/IPO/InlinerPass.h
new file mode 100644
index 00000000000..3ac4c591c94
--- /dev/null
+++ b/final/include/llvm/Transforms/IPO/InlinerPass.h
@@ -0,0 +1,97 @@
+//===- InlinerPass.h - Code common to all inliners --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a simple policy-based bottom-up inliner.  This file
+// implements all of the boring mechanics of the bottom-up inlining, while the
+// subclass determines WHAT to inline, which is the much more interesting
+// component.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_INLINERPASS_H
+#define LLVM_TRANSFORMS_IPO_INLINERPASS_H
+
+#include "llvm/CallGraphSCCPass.h"
+
+namespace llvm {
+  class CallSite;
+  class TargetData;
+  class InlineCost;
+  template<class PtrType, unsigned SmallSize>
+  class SmallPtrSet;
+
+/// Inliner - This class contains all of the helper code which is used to
+/// perform the inlining operations that do not depend on the policy.
+///
+struct Inliner : public CallGraphSCCPass {
+  explicit Inliner(char &ID);
+  explicit Inliner(char &ID, int Threshold);
+
+  /// getAnalysisUsage - For this class, we declare that we require and preserve
+  /// the call graph.  If the derived class implements this method, it should
+  /// always explicitly call the implementation here.
+  virtual void getAnalysisUsage(AnalysisUsage &Info) const;
+
+  // Main run interface method, this implements the interface required by the
+  // Pass class.
+  virtual bool runOnSCC(CallGraphSCC &SCC);
+
+  // doFinalization - Remove now-dead linkonce functions at the end of
+  // processing to avoid breaking the SCC traversal.
+  virtual bool doFinalization(CallGraph &CG);
+
+  /// This method returns the value specified by the -inline-threshold value,
+  /// specified on the command line.  This is typically not directly needed.
+  ///
+  unsigned getInlineThreshold() const { return InlineThreshold; }
+
+  /// Calculate the inline threshold for given Caller. This threshold is lower
+  /// if the caller is marked with OptimizeForSize and -inline-threshold is not
+  /// given on the comand line. It is higher if the callee is marked with the
+  /// inlinehint attribute.
+  ///
+  unsigned getInlineThreshold(CallSite CS) const;
+
+  /// getInlineCost - This method must be implemented by the subclass to
+  /// determine the cost of inlining the specified call site.  If the cost
+  /// returned is greater than the current inline threshold, the call site is
+  /// not inlined.
+  ///
+  virtual InlineCost getInlineCost(CallSite CS) = 0;
+
+  // getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+  // higher threshold to determine if the function call should be inlined.
+  ///
+  virtual float getInlineFudgeFactor(CallSite CS) = 0;
+
+  /// resetCachedCostInfo - erase any cached cost data from the derived class.
+  /// If the derived class has no such data this can be empty.
+  /// 
+  virtual void resetCachedCostInfo(Function* Caller) = 0;
+
+  /// growCachedCostInfo - update the cached cost info for Caller after Callee
+  /// has been inlined.
+  virtual void growCachedCostInfo(Function *Caller, Function *Callee) = 0;
+
+  /// removeDeadFunctions - Remove dead functions that are not included in
+  /// DNR (Do Not Remove) list.
+  bool removeDeadFunctions(CallGraph &CG, 
+                           SmallPtrSet<const Function *, 16> *DNR = NULL);
+private:
+  // InlineThreshold - Cache the value here for easy access.
+  unsigned InlineThreshold;
+
+  /// shouldInline - Return true if the inliner should attempt to
+  /// inline at the given CallSite.
+  bool shouldInline(CallSite CS);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Transforms/Instrumentation.h b/final/include/llvm/Transforms/Instrumentation.h
new file mode 100644
index 00000000000..aa9873fb8af
--- /dev/null
+++ b/final/include/llvm/Transforms/Instrumentation.h
@@ -0,0 +1,33 @@
+//===- Transforms/Instrumentation.h - Instrumentation passes ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines constructor functions for instrumentation passes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_H
+
+namespace llvm {
+
+class ModulePass;
+class FunctionPass;
+
+// Insert edge profiling instrumentation
+ModulePass *createEdgeProfilerPass();
+
+// Insert optimal edge profiling instrumentation
+ModulePass *createOptimalEdgeProfilerPass();
+
+// Insert path profiling instrumentation
+ModulePass *createPathProfilerPass();
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Transforms/Scalar.h b/final/include/llvm/Transforms/Scalar.h
new file mode 100644
index 00000000000..8d5ed44cff3
--- /dev/null
+++ b/final/include/llvm/Transforms/Scalar.h
@@ -0,0 +1,348 @@
+//===-- Scalar.h - Scalar Transformations -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines prototypes for accessor functions that expose passes
+// in the Scalar transformations library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_H
+#define LLVM_TRANSFORMS_SCALAR_H
+
+namespace llvm {
+
+class FunctionPass;
+class Pass;
+class GetElementPtrInst;
+class PassInfo;
+class TerminatorInst;
+class TargetLowering;
+
+//===----------------------------------------------------------------------===//
+//
+// ConstantPropagation - A worklist driven constant propagation pass
+//
+FunctionPass *createConstantPropagationPass();
+
+//===----------------------------------------------------------------------===//
+//
+// SCCP - Sparse conditional constant propagation.
+//
+FunctionPass *createSCCPPass();
+
+//===----------------------------------------------------------------------===//
+//
+// DeadInstElimination - This pass quickly removes trivially dead instructions
+// without modifying the CFG of the function.  It is a BasicBlockPass, so it
+// runs efficiently when queued next to other BasicBlockPass's.
+//
+Pass *createDeadInstEliminationPass();
+
+//===----------------------------------------------------------------------===//
+//
+// DeadCodeElimination - This pass is more powerful than DeadInstElimination,
+// because it is worklist driven that can potentially revisit instructions when
+// their other instructions become dead, to eliminate chains of dead
+// computations.
+//
+FunctionPass *createDeadCodeEliminationPass();
+
+//===----------------------------------------------------------------------===//
+//
+// DeadStoreElimination - This pass deletes stores that are post-dominated by
+// must-aliased stores and are not loaded used between the stores.
+//
+FunctionPass *createDeadStoreEliminationPass();
+
+//===----------------------------------------------------------------------===//
+//
+// AggressiveDCE - This pass uses the SSA based Aggressive DCE algorithm.  This
+// algorithm assumes instructions are dead until proven otherwise, which makes
+// it more successful are removing non-obviously dead instructions.
+//
+FunctionPass *createAggressiveDCEPass();
+
+//===----------------------------------------------------------------------===//
+//
+// ScalarReplAggregates - Break up alloca's of aggregates into multiple allocas
+// if possible.
+//
+FunctionPass *createScalarReplAggregatesPass(signed Threshold = -1,
+                                             bool UseDomTree = true);
+
+//===----------------------------------------------------------------------===//
+//
+// InductionVariableSimplify - Transform induction variables in a program to all
+// use a single canonical induction variable per loop.
+//
+Pass *createIndVarSimplifyPass();
+
+//===----------------------------------------------------------------------===//
+//
+// InstructionCombining - Combine instructions to form fewer, simple
+// instructions. This pass does not modify the CFG, and has a tendency to make
+// instructions dead, so a subsequent DCE pass is useful.
+//
+// This pass combines things like:
+//    %Y = add int 1, %X
+//    %Z = add int 1, %Y
+// into:
+//    %Z = add int 2, %X
+//
+FunctionPass *createInstructionCombiningPass();
+
+//===----------------------------------------------------------------------===//
+//
+// LICM - This pass is a loop invariant code motion and memory promotion pass.
+//
+Pass *createLICMPass();
+
+//===----------------------------------------------------------------------===//
+//
+// LoopStrengthReduce - This pass is strength reduces GEP instructions that use
+// a loop's canonical induction variable as one of their indices.  It takes an
+// optional parameter used to consult the target machine whether certain
+// transformations are profitable.
+//
+Pass *createLoopStrengthReducePass(const TargetLowering *TLI = 0);
+
+//===----------------------------------------------------------------------===//
+//
+// LoopUnswitch - This pass is a simple loop unswitching pass.
+//
+Pass *createLoopUnswitchPass(bool OptimizeForSize = false);
+
+//===----------------------------------------------------------------------===//
+//
+// LoopInstSimplify - This pass simplifies instructions in a loop's body.
+//
+Pass *createLoopInstSimplifyPass();
+
+//===----------------------------------------------------------------------===//
+//
+// LoopUnroll - This pass is a simple loop unrolling pass.
+//
+Pass *createLoopUnrollPass();
+
+//===----------------------------------------------------------------------===//
+//
+// LoopRotate - This pass is a simple loop rotating pass.
+//
+Pass *createLoopRotatePass();
+
+//===----------------------------------------------------------------------===//
+//
+// LoopIdiom - This pass recognizes and replaces idioms in loops.
+//
+Pass *createLoopIdiomPass();
+  
+//===----------------------------------------------------------------------===//
+//
+// PromoteMemoryToRegister - This pass is used to promote memory references to
+// be register references. A simple example of the transformation performed by
+// this pass is:
+//
+//        FROM CODE                           TO CODE
+//   %X = alloca i32, i32 1                 ret i32 42
+//   store i32 42, i32 *%X
+//   %Y = load i32* %X
+//   ret i32 %Y
+//
+FunctionPass *createPromoteMemoryToRegisterPass();
+
+//===----------------------------------------------------------------------===//
+//
+// DemoteRegisterToMemoryPass - This pass is used to demote registers to memory
+// references. In basically undoes the PromoteMemoryToRegister pass to make cfg
+// hacking easier.
+//
+FunctionPass *createDemoteRegisterToMemoryPass();
+extern char &DemoteRegisterToMemoryID;
+
+//===----------------------------------------------------------------------===//
+//
+// Reassociate - This pass reassociates commutative expressions in an order that
+// is designed to promote better constant propagation, GCSE, LICM, PRE...
+//
+// For example:  4 + (x + 5)  ->  x + (4 + 5)
+//
+FunctionPass *createReassociatePass();
+
+//===----------------------------------------------------------------------===//
+//
+// TailDuplication - Eliminate unconditional branches through controlled code
+// duplication, creating simpler CFG structures.
+//
+FunctionPass *createTailDuplicationPass();
+
+//===----------------------------------------------------------------------===//
+//
+// JumpThreading - Thread control through mult-pred/multi-succ blocks where some
+// preds always go to some succ.
+//
+FunctionPass *createJumpThreadingPass();
+  
+//===----------------------------------------------------------------------===//
+//
+// CFGSimplification - Merge basic blocks, eliminate unreachable blocks,
+// simplify terminator instructions, etc...
+//
+FunctionPass *createCFGSimplificationPass();
+
+//===----------------------------------------------------------------------===//
+//
+// BreakCriticalEdges - Break all of the critical edges in the CFG by inserting
+// a dummy basic block. This pass may be "required" by passes that cannot deal
+// with critical edges. For this usage, a pass must call:
+//
+//   AU.addRequiredID(BreakCriticalEdgesID);
+//
+// This pass obviously invalidates the CFG, but can update forward dominator
+// (set, immediate dominators, tree, and frontier) information.
+//
+FunctionPass *createBreakCriticalEdgesPass();
+extern char &BreakCriticalEdgesID;
+
+//===----------------------------------------------------------------------===//
+//
+// LoopSimplify - Insert Pre-header blocks into the CFG for every function in
+// the module.  This pass updates dominator information, loop information, and
+// does not add critical edges to the CFG.
+//
+//   AU.addRequiredID(LoopSimplifyID);
+//
+Pass *createLoopSimplifyPass();
+extern char &LoopSimplifyID;
+
+//===----------------------------------------------------------------------===//
+//
+// TailCallElimination - This pass eliminates call instructions to the current
+// function which occur immediately before return instructions.
+//
+FunctionPass *createTailCallEliminationPass();
+
+//===----------------------------------------------------------------------===//
+//
+// LowerSwitch - This pass converts SwitchInst instructions into a sequence of
+// chained binary branch instructions.
+//
+FunctionPass *createLowerSwitchPass();
+extern char &LowerSwitchID;
+
+//===----------------------------------------------------------------------===//
+//
+// LowerInvoke - This pass converts invoke and unwind instructions to use sjlj
+// exception handling mechanisms.  Note that after this pass runs the CFG is not
+// entirely accurate (exceptional control flow edges are not correct anymore) so
+// only very simple things should be done after the lowerinvoke pass has run
+// (like generation of native code).  This should *NOT* be used as a general
+// purpose "my LLVM-to-LLVM pass doesn't support the invoke instruction yet"
+// lowering pass.
+//
+FunctionPass *createLowerInvokePass(const TargetLowering *TLI = 0);
+FunctionPass *createLowerInvokePass(const TargetLowering *TLI,
+                                    bool useExpensiveEHSupport);
+extern char &LowerInvokePassID;
+
+//===----------------------------------------------------------------------===//
+//
+// BlockPlacement - This pass reorders basic blocks in order to increase the
+// number of fall-through conditional branches.
+//
+FunctionPass *createBlockPlacementPass();
+
+//===----------------------------------------------------------------------===//
+//
+// LCSSA - This pass inserts phi nodes at loop boundaries to simplify other loop
+// optimizations.
+//
+Pass *createLCSSAPass();
+extern char &LCSSAID;
+
+//===----------------------------------------------------------------------===//
+//
+// EarlyCSE - This pass performs a simple and fast CSE pass over the dominator
+// tree.
+//
+FunctionPass *createEarlyCSEPass();
+  
+//===----------------------------------------------------------------------===//
+//
+// GVN - This pass performs global value numbering and redundant load 
+// elimination cotemporaneously.
+//
+FunctionPass *createGVNPass(bool NoLoads = false);
+
+//===----------------------------------------------------------------------===//
+//
+// MemCpyOpt - This pass performs optimizations related to eliminating memcpy
+// calls and/or combining multiple stores into memset's.
+//
+FunctionPass *createMemCpyOptPass();
+
+//===----------------------------------------------------------------------===//
+//
+// LoopDeletion - This pass performs DCE of non-infinite loops that it
+// can prove are dead.
+//
+Pass *createLoopDeletionPass();
+  
+//===----------------------------------------------------------------------===//
+//
+/// createSimplifyLibCallsPass - This pass optimizes specific calls to
+/// specific well-known (library) functions.
+FunctionPass *createSimplifyLibCallsPass();
+
+//===----------------------------------------------------------------------===//
+//
+// CodeGenPrepare - This pass prepares a function for instruction selection.
+//
+FunctionPass *createCodeGenPreparePass(const TargetLowering *TLI = 0);
+
+//===----------------------------------------------------------------------===//
+//
+// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
+//
+FunctionPass *createInstructionNamerPass();
+extern char &InstructionNamerID;
+  
+//===----------------------------------------------------------------------===//
+//
+// GEPSplitter - Split complex GEPs into simple ones
+//
+FunctionPass *createGEPSplitterPass();
+
+//===----------------------------------------------------------------------===//
+//
+// Sink - Code Sinking
+//
+FunctionPass *createSinkingPass();
+
+//===----------------------------------------------------------------------===//
+//
+// LowerAtomic - Lower atomic intrinsics to non-atomic form
+//
+Pass *createLowerAtomicPass();
+
+//===----------------------------------------------------------------------===//
+//
+// ValuePropagation - Propagate CFG-derived value information
+//
+Pass *createCorrelatedValuePropagationPass();
+
+//===----------------------------------------------------------------------===//
+//
+// InstructionSimplifier - Remove redundant instructions.
+//
+FunctionPass *createInstructionSimplifierPass();
+extern char &InstructionSimplifierID;
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Transforms/Utils/AddrModeMatcher.h b/final/include/llvm/Transforms/Utils/AddrModeMatcher.h
new file mode 100644
index 00000000000..0678eccb5d6
--- /dev/null
+++ b/final/include/llvm/Transforms/Utils/AddrModeMatcher.h
@@ -0,0 +1,108 @@
+//===- AddrModeMatcher.h - Addressing mode matching facility ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AddressingModeMatcher - This class exposes a single public method, which is
+// used to construct a "maximal munch" of the addressing mode for the target
+// specified by TLI for an access to "V" with an access type of AccessTy.  This
+// returns the addressing mode that is actually matched by value, but also
+// returns the list of instructions involved in that addressing computation in
+// AddrModeInsts.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_ADDRMODEMATCHER_H
+#define LLVM_TRANSFORMS_UTILS_ADDRMODEMATCHER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+
+class GlobalValue;
+class Instruction;
+class Value;
+class Type;
+class User;
+class raw_ostream;
+
+/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
+/// which holds actual Value*'s for register values.
+struct ExtAddrMode : public TargetLowering::AddrMode {
+  Value *BaseReg;
+  Value *ScaledReg;
+  ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
+  void print(raw_ostream &OS) const;
+  void dump() const;
+  
+  bool operator==(const ExtAddrMode& O) const {
+    return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
+           (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
+           (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
+  }
+};
+
+static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
+  AM.print(OS);
+  return OS;
+}
+
+class AddressingModeMatcher {
+  SmallVectorImpl<Instruction*> &AddrModeInsts;
+  const TargetLowering &TLI;
+
+  /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
+  /// the memory instruction that we're computing this address for.
+  const Type *AccessTy;
+  Instruction *MemoryInst;
+  
+  /// AddrMode - This is the addressing mode that we're building up.  This is
+  /// part of the return value of this addressing mode matching stuff.
+  ExtAddrMode &AddrMode;
+  
+  /// IgnoreProfitability - This is set to true when we should not do
+  /// profitability checks.  When true, IsProfitableToFoldIntoAddressingMode
+  /// always returns true.
+  bool IgnoreProfitability;
+  
+  AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
+                        const TargetLowering &T, const Type *AT,
+                        Instruction *MI, ExtAddrMode &AM)
+    : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) {
+    IgnoreProfitability = false;
+  }
+public:
+  
+  /// Match - Find the maximal addressing mode that a load/store of V can fold,
+  /// give an access type of AccessTy.  This returns a list of involved
+  /// instructions in AddrModeInsts.
+  static ExtAddrMode Match(Value *V, const Type *AccessTy,
+                           Instruction *MemoryInst,
+                           SmallVectorImpl<Instruction*> &AddrModeInsts,
+                           const TargetLowering &TLI) {
+    ExtAddrMode Result;
+
+    bool Success = 
+      AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
+                            MemoryInst, Result).MatchAddr(V, 0);
+    (void)Success; assert(Success && "Couldn't select *anything*?");
+    return Result;
+  }
+private:
+  bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
+  bool MatchAddr(Value *V, unsigned Depth);
+  bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth);
+  bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
+                                            ExtAddrMode &AMBefore,
+                                            ExtAddrMode &AMAfter);
+  bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Transforms/Utils/BasicBlockUtils.h b/final/include/llvm/Transforms/Utils/BasicBlockUtils.h
new file mode 100644
index 00000000000..53358602870
--- /dev/null
+++ b/final/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -0,0 +1,186 @@
+//===-- Transform/Utils/BasicBlockUtils.h - BasicBlock Utils ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on basic blocks, and
+// instructions contained within basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_BASICBLOCK_H
+#define LLVM_TRANSFORMS_UTILS_BASICBLOCK_H
+
+// FIXME: Move to this file: BasicBlock::removePredecessor, BB::splitBasicBlock
+
+#include "llvm/BasicBlock.h"
+#include "llvm/Support/CFG.h"
+
+namespace llvm {
+
+class AliasAnalysis;
+class Instruction;
+class Pass;
+class ReturnInst;
+
+/// DeleteDeadBlock - Delete the specified block, which must have no
+/// predecessors.
+void DeleteDeadBlock(BasicBlock *BB);
+  
+  
+/// FoldSingleEntryPHINodes - We know that BB has one predecessor.  If there are
+/// any single-entry PHI nodes in it, fold them away.  This handles the case
+/// when all entries to the PHI nodes in a block are guaranteed equal, such as
+/// when the block has exactly one predecessor.
+void FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P = 0);
+
+/// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it
+/// is dead. Also recursively delete any operands that become dead as
+/// a result. This includes tracing the def-use list from the PHI to see if
+/// it is ultimately unused or if it reaches an unused cycle. Return true
+/// if any PHIs were deleted.
+bool DeleteDeadPHIs(BasicBlock *BB);
+
+/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
+/// if possible.  The return value indicates success or failure.
+bool MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P = 0);
+
+// ReplaceInstWithValue - Replace all uses of an instruction (specified by BI)
+// with a value, then remove and delete the original instruction.
+//
+void ReplaceInstWithValue(BasicBlock::InstListType &BIL,
+                          BasicBlock::iterator &BI, Value *V);
+
+// ReplaceInstWithInst - Replace the instruction specified by BI with the
+// instruction specified by I.  The original instruction is deleted and BI is
+// updated to point to the new instruction.
+//
+void ReplaceInstWithInst(BasicBlock::InstListType &BIL,
+                         BasicBlock::iterator &BI, Instruction *I);
+
+// ReplaceInstWithInst - Replace the instruction specified by From with the
+// instruction specified by To.
+//
+void ReplaceInstWithInst(Instruction *From, Instruction *To);
+
+/// FindFunctionBackedges - Analyze the specified function to find all of the
+/// loop backedges in the function and return them.  This is a relatively cheap
+/// (compared to computing dominators and loop info) analysis.
+///
+/// The output is added to Result, as pairs of <from,to> edge info.
+void FindFunctionBackedges(const Function &F,
+      SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result);
+  
+
+/// GetSuccessorNumber - Search for the specified successor of basic block BB
+/// and return its position in the terminator instruction's list of
+/// successors.  It is an error to call this with a block that is not a
+/// successor.
+unsigned GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ);
+
+/// isCriticalEdge - Return true if the specified edge is a critical edge.
+/// Critical edges are edges from a block with multiple successors to a block
+/// with multiple predecessors.
+///
+bool isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
+                    bool AllowIdenticalEdges = false);
+
+/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
+/// split the critical edge.  This will update DominatorTree and
+/// DominatorFrontier information if it is available, thus calling this pass
+/// will not invalidate either of them. This returns the new block if the edge
+/// was split, null otherwise.
+///
+/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
+/// specified successor will be merged into the same critical edge block.  
+/// This is most commonly interesting with switch instructions, which may 
+/// have many edges to any one destination.  This ensures that all edges to that
+/// dest go to one block instead of each going to a different block, but isn't 
+/// the standard definition of a "critical edge".
+///
+/// It is invalid to call this function on a critical edge that starts at an
+/// IndirectBrInst.  Splitting these edges will almost always create an invalid
+/// program because the address of the new block won't be the one that is jumped
+/// to.
+///
+BasicBlock *SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+                              Pass *P = 0, bool MergeIdenticalEdges = false);
+
+inline BasicBlock *SplitCriticalEdge(BasicBlock *BB, succ_iterator SI,
+                                     Pass *P = 0) {
+  return SplitCriticalEdge(BB->getTerminator(), SI.getSuccessorIndex(), P);
+}
+
+/// SplitCriticalEdge - If the edge from *PI to BB is not critical, return
+/// false.  Otherwise, split all edges between the two blocks and return true.
+/// This updates all of the same analyses as the other SplitCriticalEdge
+/// function.  If P is specified, it updates the analyses
+/// described above.
+inline bool SplitCriticalEdge(BasicBlock *Succ, pred_iterator PI, Pass *P = 0) {
+  bool MadeChange = false;
+  TerminatorInst *TI = (*PI)->getTerminator();
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+    if (TI->getSuccessor(i) == Succ)
+      MadeChange |= !!SplitCriticalEdge(TI, i, P);
+  return MadeChange;
+}
+
+/// SplitCriticalEdge - If an edge from Src to Dst is critical, split the edge
+/// and return true, otherwise return false.  This method requires that there be
+/// an edge between the two blocks.  If P is specified, it updates the analyses
+/// described above.
+inline BasicBlock *SplitCriticalEdge(BasicBlock *Src, BasicBlock *Dst,
+                                     Pass *P = 0,
+                                     bool MergeIdenticalEdges = false) {
+  TerminatorInst *TI = Src->getTerminator();
+  unsigned i = 0;
+  while (1) {
+    assert(i != TI->getNumSuccessors() && "Edge doesn't exist!");
+    if (TI->getSuccessor(i) == Dst)
+      return SplitCriticalEdge(TI, i, P, MergeIdenticalEdges);
+    ++i;
+  }
+}
+
+/// SplitEdge -  Split the edge connecting specified block. Pass P must 
+/// not be NULL. 
+BasicBlock *SplitEdge(BasicBlock *From, BasicBlock *To, Pass *P);
+
+/// SplitBlock - Split the specified block at the specified instruction - every
+/// thing before SplitPt stays in Old and everything starting with SplitPt moves
+/// to a new block.  The two blocks are joined by an unconditional branch and
+/// the loop info is updated.
+///
+BasicBlock *SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P);
+ 
+/// SplitBlockPredecessors - This method transforms BB by introducing a new
+/// basic block into the function, and moving some of the predecessors of BB to
+/// be predecessors of the new block.  The new predecessors are indicated by the
+/// Preds array, which has NumPreds elements in it.  The new block is given a
+/// suffix of 'Suffix'.  This function returns the new block.
+///
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// DominanceFrontier, LoopInfo, and LCCSA but no other analyses.
+/// In particular, it does not preserve LoopSimplify (because it's
+/// complicated to handle the case where one of the edges being split
+/// is an exit of a loop with other exits).
+///
+BasicBlock *SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds,
+                                   unsigned NumPreds, const char *Suffix,
+                                   Pass *P = 0);
+
+/// FoldReturnIntoUncondBranch - This method duplicates the specified return
+/// instruction into a predecessor which ends in an unconditional branch. If
+/// the return instruction returns a value defined by a PHI, propagate the
+/// right value into the return. It returns the new return instruction in the
+/// predecessor.
+ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
+                                       BasicBlock *Pred);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Transforms/Utils/BasicInliner.h b/final/include/llvm/Transforms/Utils/BasicInliner.h
new file mode 100644
index 00000000000..4bca6b8c441
--- /dev/null
+++ b/final/include/llvm/Transforms/Utils/BasicInliner.h
@@ -0,0 +1,55 @@
+//===- BasicInliner.h - Basic function level inliner ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a simple function based inliner that does not use
+// call graph information. 
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BASICINLINER_H
+#define BASICINLINER_H
+
+#include "llvm/Analysis/InlineCost.h"
+
+namespace llvm {
+
+  class Function;
+  class TargetData;
+  struct BasicInlinerImpl;
+
+  /// BasicInliner - BasicInliner provides function level inlining interface.
+  /// Clients provide list of functions which are inline without using
+  /// module level call graph information. Note that the BasicInliner is
+  /// free to delete a function if it is inlined into all call sites.
+  class BasicInliner {
+  public:
+    
+    explicit BasicInliner(TargetData *T = NULL);
+    ~BasicInliner();
+
+    /// addFunction - Add function into the list of functions to process.
+    /// All functions must be inserted using this interface before invoking
+    /// inlineFunctions().
+    void addFunction(Function *F);
+
+    /// neverInlineFunction - Sometimes a function is never to be inlined 
+    /// because of one or other reason. 
+    void neverInlineFunction(Function *F);
+
+    /// inlineFuctions - Walk all call sites in all functions supplied by
+    /// client. Inline as many call sites as possible. Delete completely
+    /// inlined functions.
+    void inlineFunctions();
+
+  private:
+    BasicInlinerImpl *Impl;
+  };
+}
+
+#endif
diff --git a/final/include/llvm/Transforms/Utils/BuildLibCalls.h b/final/include/llvm/Transforms/Utils/BuildLibCalls.h
new file mode 100644
index 00000000000..e8259383846
--- /dev/null
+++ b/final/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -0,0 +1,110 @@
+//===- BuildLibCalls.h - Utility builder for libcalls -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes an interface to build some C language libcalls for
+// optimization passes that need to call the various functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TRANSFORMS_UTILS_BUILDLIBCALLS_H
+#define TRANSFORMS_UTILS_BUILDLIBCALLS_H
+
+#include "llvm/Support/IRBuilder.h"
+
+namespace llvm {
+  class Value;
+  class TargetData;
+  
+  /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
+  Value *CastToCStr(Value *V, IRBuilder<> &B);
+
+  /// EmitStrLen - Emit a call to the strlen function to the builder, for the
+  /// specified pointer.  Ptr is required to be some pointer type, and the
+  /// return value has 'intptr_t' type.
+  Value *EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD);
+
+  /// EmitStrChr - Emit a call to the strchr function to the builder, for the
+  /// specified pointer and character.  Ptr is required to be some pointer type,
+  /// and the return value has 'i8*' type.
+  Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, const TargetData *TD);
+
+  /// EmitStrNCmp - Emit a call to the strncmp function to the builder.
+  Value *EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+                     const TargetData *TD);
+
+  /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
+  /// specified pointer arguments.
+  Value *EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+                    const TargetData *TD, StringRef Name = "strcpy");
+
+  /// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
+  /// specified pointer arguments and length.
+  Value *EmitStrNCpy(Value *Dst, Value *Src, Value *Len, IRBuilder<> &B,
+                    const TargetData *TD, StringRef Name = "strncpy");
+
+  /// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
+  /// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
+  /// are pointers.
+  Value *EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
+                       IRBuilder<> &B, const TargetData *TD);
+
+  /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
+  /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
+  Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B,
+                    const TargetData *TD);
+
+  /// EmitMemCmp - Emit a call to the memcmp function.
+  Value *EmitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B,
+                    const TargetData *TD);
+
+  /// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name'
+  /// (e.g.  'floor').  This function is known to take a single of type matching
+  /// 'Op' and returns one value with the same type.  If 'Op' is a long double,
+  /// 'l' is added as the suffix of name, if 'Op' is a float, we add a 'f'
+  /// suffix.
+  Value *EmitUnaryFloatFnCall(Value *Op, const char *Name, IRBuilder<> &B,
+                              const AttrListPtr &Attrs);
+
+  /// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
+  /// is an integer.
+  Value *EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD);
+
+  /// EmitPutS - Emit a call to the puts function.  This assumes that Str is
+  /// some pointer.
+  void EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD);
+
+  /// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
+  /// an i32, and File is a pointer to FILE.
+  void EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+                 const TargetData *TD);
+
+  /// EmitFPutS - Emit a call to the puts function.  Str is required to be a
+  /// pointer and File is a pointer to FILE.
+  void EmitFPutS(Value *Str, Value *File, IRBuilder<> &B, const TargetData *TD);
+
+  /// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
+  /// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
+  void EmitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,
+                  const TargetData *TD);
+
+  /// SimplifyFortifiedLibCalls - Helper class for folding checked library
+  /// calls (e.g. __strcpy_chk) into their unchecked counterparts.
+  class SimplifyFortifiedLibCalls {
+  protected:
+    CallInst *CI;
+    virtual void replaceCall(Value *With) = 0;
+    virtual bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp,
+                            bool isString) const = 0;
+  public:
+    virtual ~SimplifyFortifiedLibCalls();
+    bool fold(CallInst *CI, const TargetData *TD);
+  };
+}
+
+#endif
diff --git a/final/include/llvm/Transforms/Utils/Cloning.h b/final/include/llvm/Transforms/Utils/Cloning.h
new file mode 100644
index 00000000000..24ebb109a0a
--- /dev/null
+++ b/final/include/llvm/Transforms/Utils/Cloning.h
@@ -0,0 +1,219 @@
+//===- Cloning.h - Clone various parts of LLVM programs ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines various functions that are used to clone chunks of LLVM
+// code for various purposes.  This varies from copying whole modules into new
+// modules, to cloning functions with different arguments, to inlining
+// functions, to copying basic blocks to support loop unrolling or superblock
+// formation, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_CLONING_H
+#define LLVM_TRANSFORMS_UTILS_CLONING_H
+
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+
+namespace llvm {
+
+class Module;
+class Function;
+class Instruction;
+class Pass;
+class LPPassManager;
+class BasicBlock;
+class Value;
+class CallInst;
+class InvokeInst;
+class ReturnInst;
+class CallSite;
+class Trace;
+class CallGraph;
+class TargetData;
+class Loop;
+class LoopInfo;
+class AllocaInst;
+
+/// CloneModule - Return an exact copy of the specified module
+///
+Module *CloneModule(const Module *M);
+Module *CloneModule(const Module *M, ValueToValueMapTy &VMap);
+
+/// ClonedCodeInfo - This struct can be used to capture information about code
+/// being cloned, while it is being cloned.
+struct ClonedCodeInfo {
+  /// ContainsCalls - This is set to true if the cloned code contains a normal
+  /// call instruction.
+  bool ContainsCalls;
+  
+  /// ContainsUnwinds - This is set to true if the cloned code contains an
+  /// unwind instruction.
+  bool ContainsUnwinds;
+  
+  /// ContainsDynamicAllocas - This is set to true if the cloned code contains
+  /// a 'dynamic' alloca.  Dynamic allocas are allocas that are either not in
+  /// the entry block or they are in the entry block but are not a constant
+  /// size.
+  bool ContainsDynamicAllocas;
+  
+  ClonedCodeInfo() {
+    ContainsCalls = false;
+    ContainsUnwinds = false;
+    ContainsDynamicAllocas = false;
+  }
+};
+
+
+/// CloneBasicBlock - Return a copy of the specified basic block, but without
+/// embedding the block into a particular function.  The block returned is an
+/// exact copy of the specified basic block, without any remapping having been
+/// performed.  Because of this, this is only suitable for applications where
+/// the basic block will be inserted into the same function that it was cloned
+/// from (loop unrolling would use this, for example).
+///
+/// Also, note that this function makes a direct copy of the basic block, and
+/// can thus produce illegal LLVM code.  In particular, it will copy any PHI
+/// nodes from the original block, even though there are no predecessors for the
+/// newly cloned block (thus, phi nodes will have to be updated).  Also, this
+/// block will branch to the old successors of the original block: these
+/// successors will have to have any PHI nodes updated to account for the new
+/// incoming edges.
+///
+/// The correlation between instructions in the source and result basic blocks
+/// is recorded in the VMap map.
+///
+/// If you have a particular suffix you'd like to use to add to any cloned
+/// names, specify it as the optional third parameter.
+///
+/// If you would like the basic block to be auto-inserted into the end of a
+/// function, you can specify it as the optional fourth parameter.
+///
+/// If you would like to collect additional information about the cloned
+/// function, you can specify a ClonedCodeInfo object with the optional fifth
+/// parameter.
+///
+BasicBlock *CloneBasicBlock(const BasicBlock *BB,
+                            ValueToValueMapTy &VMap,
+                            const Twine &NameSuffix = "", Function *F = 0,
+                            ClonedCodeInfo *CodeInfo = 0);
+
+
+/// CloneLoop - Clone Loop. Clone dominator info for loop insiders. Populate
+/// VMap using old blocks to new blocks mapping.
+Loop *CloneLoop(Loop *L, LPPassManager *LPM, LoopInfo *LI, 
+                ValueToValueMapTy &VMap, Pass *P);
+
+/// CloneFunction - Return a copy of the specified function, but without
+/// embedding the function into another module.  Also, any references specified
+/// in the VMap are changed to refer to their mapped value instead of the
+/// original one.  If any of the arguments to the function are in the VMap,
+/// the arguments are deleted from the resultant function.  The VMap is
+/// updated to include mappings from all of the instructions and basicblocks in
+/// the function from their old to new values.  The final argument captures
+/// information about the cloned code if non-null.
+///
+/// If ModuleLevelChanges is false, VMap contains no non-identity GlobalValue
+/// mappings.
+///
+Function *CloneFunction(const Function *F,
+                        ValueToValueMapTy &VMap,
+                        bool ModuleLevelChanges,
+                        ClonedCodeInfo *CodeInfo = 0);
+
+/// CloneFunction - Version of the function that doesn't need the VMap.
+///
+inline Function *CloneFunction(const Function *F, ClonedCodeInfo *CodeInfo = 0){
+  ValueToValueMapTy VMap;
+  return CloneFunction(F, VMap, CodeInfo);
+}
+
+/// Clone OldFunc into NewFunc, transforming the old arguments into references
+/// to VMap values.  Note that if NewFunc already has basic blocks, the ones
+/// cloned into it will be added to the end of the function.  This function
+/// fills in a list of return instructions, and can optionally append the
+/// specified suffix to all values cloned.
+///
+/// If ModuleLevelChanges is false, VMap contains no non-identity GlobalValue
+/// mappings.
+///
+void CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
+                       ValueToValueMapTy &VMap,
+                       bool ModuleLevelChanges,
+                       SmallVectorImpl<ReturnInst*> &Returns,
+                       const char *NameSuffix = "", 
+                       ClonedCodeInfo *CodeInfo = 0);
+
+/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
+/// except that it does some simple constant prop and DCE on the fly.  The
+/// effect of this is to copy significantly less code in cases where (for
+/// example) a function call with constant arguments is inlined, and those
+/// constant arguments cause a significant amount of code in the callee to be
+/// dead.  Since this doesn't produce an exactly copy of the input, it can't be
+/// used for things like CloneFunction or CloneModule.
+///
+/// If ModuleLevelChanges is false, VMap contains no non-identity GlobalValue
+/// mappings.
+///
+void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+                               ValueToValueMapTy &VMap,
+                               bool ModuleLevelChanges,
+                               SmallVectorImpl<ReturnInst*> &Returns,
+                               const char *NameSuffix = "", 
+                               ClonedCodeInfo *CodeInfo = 0,
+                               const TargetData *TD = 0,
+                               Instruction *TheCall = 0);
+
+  
+/// InlineFunctionInfo - This class captures the data input to the
+/// InlineFunction call, and records the auxiliary results produced by it. 
+class InlineFunctionInfo {
+public:
+  explicit InlineFunctionInfo(CallGraph *cg = 0, const TargetData *td = 0)
+    : CG(cg), TD(td) {}
+  
+  /// CG - If non-null, InlineFunction will update the callgraph to reflect the
+  /// changes it makes.
+  CallGraph *CG;
+  const TargetData *TD;
+
+  /// StaticAllocas - InlineFunction fills this in with all static allocas that
+  /// get copied into the caller.
+  SmallVector<AllocaInst*, 4> StaticAllocas;
+
+  /// InlinedCalls - InlineFunction fills this in with callsites that were
+  /// inlined from the callee.  This is only filled in if CG is non-null.
+  SmallVector<WeakVH, 8> InlinedCalls;
+  
+  void reset() {
+    StaticAllocas.clear();
+    InlinedCalls.clear();
+  }
+};
+  
+/// InlineFunction - This function inlines the called function into the basic
+/// block of the caller.  This returns false if it is not possible to inline
+/// this call.  The program is still in a well defined state if this occurs
+/// though.
+///
+/// Note that this only does one level of inlining.  For example, if the
+/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
+/// exists in the instruction stream.  Similiarly this will inline a recursive
+/// function by one level.
+///
+bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI);
+bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI);
+bool InlineFunction(CallSite CS, InlineFunctionInfo &IFI);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Transforms/Utils/FunctionUtils.h b/final/include/llvm/Transforms/Utils/FunctionUtils.h
new file mode 100644
index 00000000000..785b08f8291
--- /dev/null
+++ b/final/include/llvm/Transforms/Utils/FunctionUtils.h
@@ -0,0 +1,41 @@
+//===-- Transform/Utils/FunctionUtils.h - Function Utils --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of transformations manipulate LLVM functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_FUNCTION_H
+#define LLVM_TRANSFORMS_UTILS_FUNCTION_H
+
+#include <vector>
+
+namespace llvm {
+  class BasicBlock;
+  class DominatorTree;
+  class Function;
+  class Loop;
+
+  /// ExtractCodeRegion - rip out a sequence of basic blocks into a new function
+  ///
+  Function* ExtractCodeRegion(DominatorTree& DT,
+                              const std::vector<BasicBlock*> &code,
+                              bool AggregateArgs = false);
+
+  /// ExtractLoop - rip out a natural loop into a new function
+  ///
+  Function* ExtractLoop(DominatorTree& DT, Loop *L,
+                        bool AggregateArgs = false);
+
+  /// ExtractBasicBlock - rip out a basic block into a new function
+  ///
+  Function* ExtractBasicBlock(BasicBlock *BB, bool AggregateArgs = false);
+}
+
+#endif
diff --git a/final/include/llvm/Transforms/Utils/Local.h b/final/include/llvm/Transforms/Utils/Local.h
new file mode 100644
index 00000000000..2823fbb7199
--- /dev/null
+++ b/final/include/llvm/Transforms/Utils/Local.h
@@ -0,0 +1,162 @@
+//===-- Local.h - Functions to perform local transformations ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform various local transformations to the
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_LOCAL_H
+#define LLVM_TRANSFORMS_UTILS_LOCAL_H
+
+namespace llvm {
+
+class User;
+class BasicBlock;
+class BranchInst;
+class Instruction;
+class Value;
+class Pass;
+class PHINode;
+class AllocaInst;
+class ConstantExpr;
+class TargetData;
+
+template<typename T> class SmallVectorImpl;
+  
+//===----------------------------------------------------------------------===//
+//  Local constant propagation.
+//
+
+/// ConstantFoldTerminator - If a terminator instruction is predicated on a
+/// constant value, convert it into an unconditional branch to the constant
+/// destination.  This is a nontrivial operation because the successors of this
+/// basic block must have their PHI nodes updated.
+///
+bool ConstantFoldTerminator(BasicBlock *BB);
+
+//===----------------------------------------------------------------------===//
+//  Local dead code elimination.
+//
+
+/// isInstructionTriviallyDead - Return true if the result produced by the
+/// instruction is not used, and the instruction has no side effects.
+///
+bool isInstructionTriviallyDead(Instruction *I);
+
+/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a
+/// trivially dead instruction, delete it.  If that makes any of its operands
+/// trivially dead, delete them too, recursively.  Return true if any
+/// instructions were deleted.
+bool RecursivelyDeleteTriviallyDeadInstructions(Value *V);
+
+/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
+/// dead PHI node, due to being a def-use chain of single-use nodes that
+/// either forms a cycle or is terminated by a trivially dead instruction,
+/// delete it.  If that makes any of its operands trivially dead, delete them
+/// too, recursively.  Return true if a change was made.
+bool RecursivelyDeleteDeadPHINode(PHINode *PN);
+
+  
+/// SimplifyInstructionsInBlock - Scan the specified basic block and try to
+/// simplify any instructions in it and recursively delete dead instructions.
+///
+/// This returns true if it changed the code, note that it can delete
+/// instructions in other blocks as well in this block.
+///
+/// WARNING: Do not use this function on unreachable blocks, as recursive
+/// simplification is not able to handle corner-case scenarios that can
+/// arise in them.
+bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD = 0);
+    
+//===----------------------------------------------------------------------===//
+//  Control Flow Graph Restructuring.
+//
+
+/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
+/// method is called when we're about to delete Pred as a predecessor of BB.  If
+/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
+///
+/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
+/// nodes that collapse into identity values.  For example, if we have:
+///   x = phi(1, 0, 0, 0)
+///   y = and x, z
+///
+/// .. and delete the predecessor corresponding to the '1', this will attempt to
+/// recursively fold the 'and' to 0.
+void RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
+                                  TargetData *TD = 0);
+    
+  
+/// MergeBasicBlockIntoOnlyPred - BB is a block with one predecessor and its
+/// predecessor is known to have one successor (BB!).  Eliminate the edge
+/// between them, moving the instructions in the predecessor into BB.  This
+/// deletes the predecessor block.
+///
+void MergeBasicBlockIntoOnlyPred(BasicBlock *BB, Pass *P = 0);
+    
+
+/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
+/// unconditional branch, and contains no instructions other than PHI nodes,
+/// potential debug intrinsics and the branch.  If possible, eliminate BB by
+/// rewriting all the predecessors to branch to the successor block and return
+/// true.  If we can't transform, return false.
+bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB);
+
+/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
+/// nodes in this block. This doesn't try to be clever about PHI nodes
+/// which differ only in the order of the incoming values, but instcombine
+/// orders them so it usually won't matter.
+///
+bool EliminateDuplicatePHINodes(BasicBlock *BB);
+
+/// SimplifyCFG - This function is used to do simplification of a CFG.  For
+/// example, it adjusts branches to branches to eliminate the extra hop, it
+/// eliminates unreachable basic blocks, and does other "peephole" optimization
+/// of the CFG.  It returns true if a modification was made, possibly deleting
+/// the basic block that was pointed to.
+///
+bool SimplifyCFG(BasicBlock *BB, const TargetData *TD = 0);
+
+/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch,
+/// and if a predecessor branches to us and one of our successors, fold the
+/// setcc into the predecessor and use logical operations to pick the right
+/// destination.
+bool FoldBranchToCommonDest(BranchInst *BI);
+
+/// DemoteRegToStack - This function takes a virtual register computed by an
+/// Instruction and replaces it with a slot in the stack frame, allocated via
+/// alloca.  This allows the CFG to be changed around without fear of
+/// invalidating the SSA information for the value.  It returns the pointer to
+/// the alloca inserted to create a stack slot for X.
+///
+AllocaInst *DemoteRegToStack(Instruction &X,
+                             bool VolatileLoads = false,
+                             Instruction *AllocaPoint = 0);
+
+/// DemotePHIToStack - This function takes a virtual register computed by a phi
+/// node and replaces it with a slot in the stack frame, allocated via alloca.
+/// The phi node is deleted and it returns the pointer to the alloca inserted. 
+AllocaInst *DemotePHIToStack(PHINode *P, Instruction *AllocaPoint = 0);
+
+/// getOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
+                                    const TargetData *TD = 0);
+
+/// getKnownAlignment - Try to infer an alignment for the specified pointer.
+static inline unsigned getKnownAlignment(Value *V, const TargetData *TD = 0) {
+  return getOrEnforceKnownAlignment(V, 0, TD);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Transforms/Utils/PromoteMemToReg.h b/final/include/llvm/Transforms/Utils/PromoteMemToReg.h
new file mode 100644
index 00000000000..98d51a29ad7
--- /dev/null
+++ b/final/include/llvm/Transforms/Utils/PromoteMemToReg.h
@@ -0,0 +1,45 @@
+//===- PromoteMemToReg.h - Promote Allocas to Scalars -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes an interface to promote alloca instructions to SSA
+// registers, by using the SSA construction algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TRANSFORMS_UTILS_PROMOTEMEMTOREG_H
+#define TRANSFORMS_UTILS_PROMOTEMEMTOREG_H
+
+#include <vector>
+
+namespace llvm {
+
+class AllocaInst;
+class DominatorTree;
+class DominanceFrontier;
+class AliasSetTracker;
+
+/// isAllocaPromotable - Return true if this alloca is legal for promotion.
+/// This is true if there are only loads and stores to the alloca...
+///
+bool isAllocaPromotable(const AllocaInst *AI);
+
+/// PromoteMemToReg - Promote the specified list of alloca instructions into
+/// scalar registers, inserting PHI nodes as appropriate.  This function makes
+/// use of DominanceFrontier information.  This function does not modify the CFG
+/// of the function at all.  All allocas must be from the same function.
+///
+/// If AST is specified, the specified tracker is updated to reflect changes
+/// made to the IR.
+///
+void PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
+                     DominatorTree &DT, AliasSetTracker *AST = 0);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Transforms/Utils/SSAUpdater.h b/final/include/llvm/Transforms/Utils/SSAUpdater.h
new file mode 100644
index 00000000000..b4048b9b440
--- /dev/null
+++ b/final/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -0,0 +1,163 @@
+//===-- SSAUpdater.h - Unstructured SSA Update Tool -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_SSAUPDATER_H
+#define LLVM_TRANSFORMS_UTILS_SSAUPDATER_H
+
+namespace llvm {
+  class Value;
+  class BasicBlock;
+  class Use;
+  class PHINode;
+  template<typename T> class SmallVectorImpl;
+  template<typename T> class SSAUpdaterTraits;
+  class BumpPtrAllocator;
+
+/// SSAUpdater - This class updates SSA form for a set of values defined in
+/// multiple blocks.  This is used when code duplication or another unstructured
+/// transformation wants to rewrite a set of uses of one value with uses of a
+/// set of values.
+class SSAUpdater {
+  friend class SSAUpdaterTraits<SSAUpdater>;
+
+private:
+  /// AvailableVals - This keeps track of which value to use on a per-block
+  /// basis.  When we insert PHI nodes, we keep track of them here.
+  //typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
+  void *AV;
+
+  /// ProtoType holds the type of the values being rewritten.
+  const Type *ProtoType;
+
+  // PHI nodes are given a name based on ProtoName.
+  std::string ProtoName;
+
+  /// InsertedPHIs - If this is non-null, the SSAUpdater adds all PHI nodes that
+  /// it creates to the vector.
+  SmallVectorImpl<PHINode*> *InsertedPHIs;
+
+public:
+  /// SSAUpdater constructor.  If InsertedPHIs is specified, it will be filled
+  /// in with all PHI Nodes created by rewriting.
+  explicit SSAUpdater(SmallVectorImpl<PHINode*> *InsertedPHIs = 0);
+  ~SSAUpdater();
+
+  /// Initialize - Reset this object to get ready for a new set of SSA
+  /// updates with type 'Ty'.  PHI nodes get a name based on 'Name'.
+  void Initialize(const Type *Ty, StringRef Name);
+
+  /// AddAvailableValue - Indicate that a rewritten value is available at the
+  /// end of the specified block with the specified value.
+  void AddAvailableValue(BasicBlock *BB, Value *V);
+
+  /// HasValueForBlock - Return true if the SSAUpdater already has a value for
+  /// the specified block.
+  bool HasValueForBlock(BasicBlock *BB) const;
+
+  /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+  /// live at the end of the specified block.
+  Value *GetValueAtEndOfBlock(BasicBlock *BB);
+
+  /// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+  /// is live in the middle of the specified block.
+  ///
+  /// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+  /// important case: if there is a definition of the rewritten value after the
+  /// 'use' in BB.  Consider code like this:
+  ///
+  ///      X1 = ...
+  ///   SomeBB:
+  ///      use(X)
+  ///      X2 = ...
+  ///      br Cond, SomeBB, OutBB
+  ///
+  /// In this case, there are two values (X1 and X2) added to the AvailableVals
+  /// set by the client of the rewriter, and those values are both live out of
+  /// their respective blocks.  However, the use of X happens in the *middle* of
+  /// a block.  Because of this, we need to insert a new PHI node in SomeBB to
+  /// merge the appropriate values, and this value isn't live out of the block.
+  ///
+  Value *GetValueInMiddleOfBlock(BasicBlock *BB);
+
+  /// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
+  /// which use their value in the corresponding predecessor.  Note that this
+  /// will not work if the use is supposed to be rewritten to a value defined in
+  /// the same block as the use, but above it.  Any 'AddAvailableValue's added
+  /// for the use's block will be considered to be below it.
+  void RewriteUse(Use &U);
+
+  /// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse.  However,
+  /// this version of the method can rewrite uses in the same block as a
+  /// definition, because it assumes that all uses of a value are below any
+  /// inserted values.
+  void RewriteUseAfterInsertions(Use &U);
+
+private:
+  Value *GetValueAtEndOfBlockInternal(BasicBlock *BB);
+
+  void operator=(const SSAUpdater&); // DO NOT IMPLEMENT
+  SSAUpdater(const SSAUpdater&);     // DO NOT IMPLEMENT
+};
+  
+/// LoadAndStorePromoter - This little helper class provides a convenient way to
+/// promote a collection of loads and stores into SSA Form using the SSAUpdater.
+/// This handles complexities that SSAUpdater doesn't, such as multiple loads
+/// and stores in one block.
+///
+/// Clients of this class are expected to subclass this and implement the
+/// virtual methods.
+///
+class LoadAndStorePromoter {
+protected:
+  SSAUpdater &SSA;
+public:
+  LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
+                       SSAUpdater &S, StringRef Name = StringRef());
+  virtual ~LoadAndStorePromoter() {}
+  
+  /// run - This does the promotion.  Insts is a list of loads and stores to
+  /// promote, and Name is the basename for the PHIs to insert.  After this is
+  /// complete, the loads and stores are removed from the code.
+  void run(const SmallVectorImpl<Instruction*> &Insts) const;
+  
+  
+  /// Return true if the specified instruction is in the Inst list (which was
+  /// passed into the run method).  Clients should implement this with a more
+  /// efficient version if possible.
+  virtual bool isInstInList(Instruction *I,
+                            const SmallVectorImpl<Instruction*> &Insts) const {
+    for (unsigned i = 0, e = Insts.size(); i != e; ++i)
+      if (Insts[i] == I)
+        return true;
+    return false;
+  }
+  
+  /// doExtraRewritesBeforeFinalDeletion - This hook is invoked after all the
+  /// stores are found and inserted as available values, but 
+  virtual void doExtraRewritesBeforeFinalDeletion() const {
+  }
+  
+  /// replaceLoadWithValue - Clients can choose to implement this to get
+  /// notified right before a load is RAUW'd another value.
+  virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const {
+  }
+
+  /// This is called before each instruction is deleted.
+  virtual void instructionDeleted(Instruction *I) const {
+  }
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/final/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
new file mode 100644
index 00000000000..5a03d224ff7
--- /dev/null
+++ b/final/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
@@ -0,0 +1,469 @@
+//===-- SSAUpdaterImpl.h - SSA Updater Implementation -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides a template that implements the core algorithm for the
+// SSAUpdater and MachineSSAUpdater.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_SSAUPDATERIMPL_H
+#define LLVM_TRANSFORMS_UTILS_SSAUPDATERIMPL_H
+
+namespace llvm {
+
+template<typename T> class SSAUpdaterTraits;
+
+template<typename UpdaterT>
+class SSAUpdaterImpl {
+private:
+  UpdaterT *Updater;
+
+  typedef SSAUpdaterTraits<UpdaterT> Traits;
+  typedef typename Traits::BlkT BlkT;
+  typedef typename Traits::ValT ValT;
+  typedef typename Traits::PhiT PhiT;
+
+  /// BBInfo - Per-basic block information used internally by SSAUpdaterImpl.
+  /// The predecessors of each block are cached here since pred_iterator is
+  /// slow and we need to iterate over the blocks at least a few times.
+  class BBInfo {
+  public:
+    BlkT *BB;          // Back-pointer to the corresponding block.
+    ValT AvailableVal; // Value to use in this block.
+    BBInfo *DefBB;     // Block that defines the available value.
+    int BlkNum;        // Postorder number.
+    BBInfo *IDom;      // Immediate dominator.
+    unsigned NumPreds; // Number of predecessor blocks.
+    BBInfo **Preds;    // Array[NumPreds] of predecessor blocks.
+    PhiT *PHITag;      // Marker for existing PHIs that match.
+
+    BBInfo(BlkT *ThisBB, ValT V)
+      : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0),
+      NumPreds(0), Preds(0), PHITag(0) { }
+  };
+
+  typedef DenseMap<BlkT*, ValT> AvailableValsTy;
+  AvailableValsTy *AvailableVals;
+
+  SmallVectorImpl<PhiT*> *InsertedPHIs;
+
+  typedef SmallVectorImpl<BBInfo*> BlockListTy;
+  typedef DenseMap<BlkT*, BBInfo*> BBMapTy;
+  BBMapTy BBMap;
+  BumpPtrAllocator Allocator;
+
+public:
+  explicit SSAUpdaterImpl(UpdaterT *U, AvailableValsTy *A,
+                          SmallVectorImpl<PhiT*> *Ins) :
+    Updater(U), AvailableVals(A), InsertedPHIs(Ins) { }
+
+  /// GetValue - Check to see if AvailableVals has an entry for the specified
+  /// BB and if so, return it.  If not, construct SSA form by first
+  /// calculating the required placement of PHIs and then inserting new PHIs
+  /// where needed.
+  ValT GetValue(BlkT *BB) {
+    SmallVector<BBInfo*, 100> BlockList;
+    BBInfo *PseudoEntry = BuildBlockList(BB, &BlockList);
+
+    // Special case: bail out if BB is unreachable.
+    if (BlockList.size() == 0) {
+      ValT V = Traits::GetUndefVal(BB, Updater);
+      (*AvailableVals)[BB] = V;
+      return V;
+    }
+
+    FindDominators(&BlockList, PseudoEntry);
+    FindPHIPlacement(&BlockList);
+    FindAvailableVals(&BlockList);
+
+    return BBMap[BB]->DefBB->AvailableVal;
+  }
+
+  /// BuildBlockList - Starting from the specified basic block, traverse back
+  /// through its predecessors until reaching blocks with known values.
+  /// Create BBInfo structures for the blocks and append them to the block
+  /// list.
+  BBInfo *BuildBlockList(BlkT *BB, BlockListTy *BlockList) {
+    SmallVector<BBInfo*, 10> RootList;
+    SmallVector<BBInfo*, 64> WorkList;
+
+    BBInfo *Info = new (Allocator) BBInfo(BB, 0);
+    BBMap[BB] = Info;
+    WorkList.push_back(Info);
+
+    // Search backward from BB, creating BBInfos along the way and stopping
+    // when reaching blocks that define the value.  Record those defining
+    // blocks on the RootList.
+    SmallVector<BlkT*, 10> Preds;
+    while (!WorkList.empty()) {
+      Info = WorkList.pop_back_val();
+      Preds.clear();
+      Traits::FindPredecessorBlocks(Info->BB, &Preds);
+      Info->NumPreds = Preds.size();
+      if (Info->NumPreds == 0)
+        Info->Preds = 0;
+      else
+        Info->Preds = static_cast<BBInfo**>
+          (Allocator.Allocate(Info->NumPreds * sizeof(BBInfo*),
+                              AlignOf<BBInfo*>::Alignment));
+
+      for (unsigned p = 0; p != Info->NumPreds; ++p) {
+        BlkT *Pred = Preds[p];
+        // Check if BBMap already has a BBInfo for the predecessor block.
+        typename BBMapTy::value_type &BBMapBucket =
+          BBMap.FindAndConstruct(Pred);
+        if (BBMapBucket.second) {
+          Info->Preds[p] = BBMapBucket.second;
+          continue;
+        }
+
+        // Create a new BBInfo for the predecessor.
+        ValT PredVal = AvailableVals->lookup(Pred);
+        BBInfo *PredInfo = new (Allocator) BBInfo(Pred, PredVal);
+        BBMapBucket.second = PredInfo;
+        Info->Preds[p] = PredInfo;
+
+        if (PredInfo->AvailableVal) {
+          RootList.push_back(PredInfo);
+          continue;
+        }
+        WorkList.push_back(PredInfo);
+      }
+    }
+
+    // Now that we know what blocks are backwards-reachable from the starting
+    // block, do a forward depth-first traversal to assign postorder numbers
+    // to those blocks.
+    BBInfo *PseudoEntry = new (Allocator) BBInfo(0, 0);
+    unsigned BlkNum = 1;
+
+    // Initialize the worklist with the roots from the backward traversal.
+    while (!RootList.empty()) {
+      Info = RootList.pop_back_val();
+      Info->IDom = PseudoEntry;
+      Info->BlkNum = -1;
+      WorkList.push_back(Info);
+    }
+
+    while (!WorkList.empty()) {
+      Info = WorkList.back();
+
+      if (Info->BlkNum == -2) {
+        // All the successors have been handled; assign the postorder number.
+        Info->BlkNum = BlkNum++;
+        // If not a root, put it on the BlockList.
+        if (!Info->AvailableVal)
+          BlockList->push_back(Info);
+        WorkList.pop_back();
+        continue;
+      }
+
+      // Leave this entry on the worklist, but set its BlkNum to mark that its
+      // successors have been put on the worklist.  When it returns to the top
+      // the list, after handling its successors, it will be assigned a
+      // number.
+      Info->BlkNum = -2;
+
+      // Add unvisited successors to the work list.
+      for (typename Traits::BlkSucc_iterator SI =
+             Traits::BlkSucc_begin(Info->BB),
+             E = Traits::BlkSucc_end(Info->BB); SI != E; ++SI) {
+        BBInfo *SuccInfo = BBMap[*SI];
+        if (!SuccInfo || SuccInfo->BlkNum)
+          continue;
+        SuccInfo->BlkNum = -1;
+        WorkList.push_back(SuccInfo);
+      }
+    }
+    PseudoEntry->BlkNum = BlkNum;
+    return PseudoEntry;
+  }
+
+  /// IntersectDominators - This is the dataflow lattice "meet" operation for
+  /// finding dominators.  Given two basic blocks, it walks up the dominator
+  /// tree until it finds a common dominator of both.  It uses the postorder
+  /// number of the blocks to determine how to do that.
+  BBInfo *IntersectDominators(BBInfo *Blk1, BBInfo *Blk2) {
+    while (Blk1 != Blk2) {
+      while (Blk1->BlkNum < Blk2->BlkNum) {
+        Blk1 = Blk1->IDom;
+        if (!Blk1)
+          return Blk2;
+      }
+      while (Blk2->BlkNum < Blk1->BlkNum) {
+        Blk2 = Blk2->IDom;
+        if (!Blk2)
+          return Blk1;
+      }
+    }
+    return Blk1;
+  }
+
+  /// FindDominators - Calculate the dominator tree for the subset of the CFG
+  /// corresponding to the basic blocks on the BlockList.  This uses the
+  /// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey
+  /// and Kennedy, published in Software--Practice and Experience, 2001,
+  /// 4:1-10.  Because the CFG subset does not include any edges leading into
+  /// blocks that define the value, the results are not the usual dominator
+  /// tree.  The CFG subset has a single pseudo-entry node with edges to a set
+  /// of root nodes for blocks that define the value.  The dominators for this
+  /// subset CFG are not the standard dominators but they are adequate for
+  /// placing PHIs within the subset CFG.
+  void FindDominators(BlockListTy *BlockList, BBInfo *PseudoEntry) {
+    bool Changed;
+    do {
+      Changed = false;
+      // Iterate over the list in reverse order, i.e., forward on CFG edges.
+      for (typename BlockListTy::reverse_iterator I = BlockList->rbegin(),
+             E = BlockList->rend(); I != E; ++I) {
+        BBInfo *Info = *I;
+        BBInfo *NewIDom = 0;
+
+        // Iterate through the block's predecessors.
+        for (unsigned p = 0; p != Info->NumPreds; ++p) {
+          BBInfo *Pred = Info->Preds[p];
+
+          // Treat an unreachable predecessor as a definition with 'undef'.
+          if (Pred->BlkNum == 0) {
+            Pred->AvailableVal = Traits::GetUndefVal(Pred->BB, Updater);
+            (*AvailableVals)[Pred->BB] = Pred->AvailableVal;
+            Pred->DefBB = Pred;
+            Pred->BlkNum = PseudoEntry->BlkNum;
+            PseudoEntry->BlkNum++;
+          }
+
+          if (!NewIDom)
+            NewIDom = Pred;
+          else
+            NewIDom = IntersectDominators(NewIDom, Pred);
+        }
+
+        // Check if the IDom value has changed.
+        if (NewIDom && NewIDom != Info->IDom) {
+          Info->IDom = NewIDom;
+          Changed = true;
+        }
+      }
+    } while (Changed);
+  }
+
+  /// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for
+  /// any blocks containing definitions of the value.  If one is found, then
+  /// the successor of Pred is in the dominance frontier for the definition,
+  /// and this function returns true.
+  bool IsDefInDomFrontier(const BBInfo *Pred, const BBInfo *IDom) {
+    for (; Pred != IDom; Pred = Pred->IDom) {
+      if (Pred->DefBB == Pred)
+        return true;
+    }
+    return false;
+  }
+
+  /// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers
+  /// of the known definitions.  Iteratively add PHIs in the dom frontiers
+  /// until nothing changes.  Along the way, keep track of the nearest
+  /// dominating definitions for non-PHI blocks.
+  void FindPHIPlacement(BlockListTy *BlockList) {
+    bool Changed;
+    do {
+      Changed = false;
+      // Iterate over the list in reverse order, i.e., forward on CFG edges.
+      for (typename BlockListTy::reverse_iterator I = BlockList->rbegin(),
+             E = BlockList->rend(); I != E; ++I) {
+        BBInfo *Info = *I;
+
+        // If this block already needs a PHI, there is nothing to do here.
+        if (Info->DefBB == Info)
+          continue;
+
+        // Default to use the same def as the immediate dominator.
+        BBInfo *NewDefBB = Info->IDom->DefBB;
+        for (unsigned p = 0; p != Info->NumPreds; ++p) {
+          if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) {
+            // Need a PHI here.
+            NewDefBB = Info;
+            break;
+          }
+        }
+
+        // Check if anything changed.
+        if (NewDefBB != Info->DefBB) {
+          Info->DefBB = NewDefBB;
+          Changed = true;
+        }
+      }
+    } while (Changed);
+  }
+
+  /// FindAvailableVal - If this block requires a PHI, first check if an
+  /// existing PHI matches the PHI placement and reaching definitions computed
+  /// earlier, and if not, create a new PHI.  Visit all the block's
+  /// predecessors to calculate the available value for each one and fill in
+  /// the incoming values for a new PHI.
+  void FindAvailableVals(BlockListTy *BlockList) {
+    // Go through the worklist in forward order (i.e., backward through the CFG)
+    // and check if existing PHIs can be used.  If not, create empty PHIs where
+    // they are needed.
+    for (typename BlockListTy::iterator I = BlockList->begin(),
+           E = BlockList->end(); I != E; ++I) {
+      BBInfo *Info = *I;
+      // Check if there needs to be a PHI in BB.
+      if (Info->DefBB != Info)
+        continue;
+
+      // Look for an existing PHI.
+      FindExistingPHI(Info->BB, BlockList);
+      if (Info->AvailableVal)
+        continue;
+
+      ValT PHI = Traits::CreateEmptyPHI(Info->BB, Info->NumPreds, Updater);
+      Info->AvailableVal = PHI;
+      (*AvailableVals)[Info->BB] = PHI;
+    }
+
+    // Now go back through the worklist in reverse order to fill in the
+    // arguments for any new PHIs added in the forward traversal.
+    for (typename BlockListTy::reverse_iterator I = BlockList->rbegin(),
+           E = BlockList->rend(); I != E; ++I) {
+      BBInfo *Info = *I;
+
+      if (Info->DefBB != Info) {
+        // Record the available value at join nodes to speed up subsequent
+        // uses of this SSAUpdater for the same value.
+        if (Info->NumPreds > 1)
+          (*AvailableVals)[Info->BB] = Info->DefBB->AvailableVal;
+        continue;
+      }
+
+      // Check if this block contains a newly added PHI.
+      PhiT *PHI = Traits::ValueIsNewPHI(Info->AvailableVal, Updater);
+      if (!PHI)
+        continue;
+
+      // Iterate through the block's predecessors.
+      for (unsigned p = 0; p != Info->NumPreds; ++p) {
+        BBInfo *PredInfo = Info->Preds[p];
+        BlkT *Pred = PredInfo->BB;
+        // Skip to the nearest preceding definition.
+        if (PredInfo->DefBB != PredInfo)
+          PredInfo = PredInfo->DefBB;
+        Traits::AddPHIOperand(PHI, PredInfo->AvailableVal, Pred);
+      }
+
+      DEBUG(dbgs() << "  Inserted PHI: " << *PHI << "\n");
+
+      // If the client wants to know about all new instructions, tell it.
+      if (InsertedPHIs) InsertedPHIs->push_back(PHI);
+    }
+  }
+
+  /// FindExistingPHI - Look through the PHI nodes in a block to see if any of
+  /// them match what is needed.
+  void FindExistingPHI(BlkT *BB, BlockListTy *BlockList) {
+    for (typename BlkT::iterator BBI = BB->begin(), BBE = BB->end();
+         BBI != BBE; ++BBI) {
+      PhiT *SomePHI = Traits::InstrIsPHI(BBI);
+      if (!SomePHI)
+        break;
+      if (CheckIfPHIMatches(SomePHI)) {
+        RecordMatchingPHI(SomePHI);
+        break;
+      }
+      // Match failed: clear all the PHITag values.
+      for (typename BlockListTy::iterator I = BlockList->begin(),
+             E = BlockList->end(); I != E; ++I)
+        (*I)->PHITag = 0;
+    }
+  }
+
+  /// CheckIfPHIMatches - Check if a PHI node matches the placement and values
+  /// in the BBMap.
+  bool CheckIfPHIMatches(PhiT *PHI) {
+    SmallVector<PhiT*, 20> WorkList;
+    WorkList.push_back(PHI);
+
+    // Mark that the block containing this PHI has been visited.
+    BBMap[PHI->getParent()]->PHITag = PHI;
+
+    while (!WorkList.empty()) {
+      PHI = WorkList.pop_back_val();
+
+      // Iterate through the PHI's incoming values.
+      for (typename Traits::PHI_iterator I = Traits::PHI_begin(PHI),
+             E = Traits::PHI_end(PHI); I != E; ++I) {
+        ValT IncomingVal = I.getIncomingValue();
+        BBInfo *PredInfo = BBMap[I.getIncomingBlock()];
+        // Skip to the nearest preceding definition.
+        if (PredInfo->DefBB != PredInfo)
+          PredInfo = PredInfo->DefBB;
+
+        // Check if it matches the expected value.
+        if (PredInfo->AvailableVal) {
+          if (IncomingVal == PredInfo->AvailableVal)
+            continue;
+          return false;
+        }
+
+        // Check if the value is a PHI in the correct block.
+        PhiT *IncomingPHIVal = Traits::ValueIsPHI(IncomingVal, Updater);
+        if (!IncomingPHIVal || IncomingPHIVal->getParent() != PredInfo->BB)
+          return false;
+
+        // If this block has already been visited, check if this PHI matches.
+        if (PredInfo->PHITag) {
+          if (IncomingPHIVal == PredInfo->PHITag)
+            continue;
+          return false;
+        }
+        PredInfo->PHITag = IncomingPHIVal;
+
+        WorkList.push_back(IncomingPHIVal);
+      }
+    }
+    return true;
+  }
+
+  /// RecordMatchingPHI - For a PHI node that matches, record it and its input
+  /// PHIs in both the BBMap and the AvailableVals mapping.
+  void RecordMatchingPHI(PhiT *PHI) {
+    SmallVector<PhiT*, 20> WorkList;
+    WorkList.push_back(PHI);
+
+    // Record this PHI.
+    BlkT *BB = PHI->getParent();
+    ValT PHIVal = Traits::GetPHIValue(PHI);
+    (*AvailableVals)[BB] = PHIVal;
+    BBMap[BB]->AvailableVal = PHIVal;
+
+    while (!WorkList.empty()) {
+      PHI = WorkList.pop_back_val();
+
+      // Iterate through the PHI's incoming values.
+      for (typename Traits::PHI_iterator I = Traits::PHI_begin(PHI),
+             E = Traits::PHI_end(PHI); I != E; ++I) {
+        ValT IncomingVal = I.getIncomingValue();
+        PhiT *IncomingPHI = Traits::ValueIsPHI(IncomingVal, Updater);
+        if (!IncomingPHI) continue;
+        BB = IncomingPHI->getParent();
+        BBInfo *Info = BBMap[BB];
+        if (!Info || Info->AvailableVal)
+          continue;
+
+        // Record the PHI and add it to the worklist.
+        (*AvailableVals)[BB] = IncomingVal;
+        Info->AvailableVal = IncomingVal;
+        WorkList.push_back(IncomingPHI);
+      }
+    }
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/final/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
new file mode 100644
index 00000000000..54506cfff4c
--- /dev/null
+++ b/final/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
@@ -0,0 +1,51 @@
+//===-- UnifyFunctionExitNodes.h - Ensure fn's have one return --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to ensure that functions have at most one return and one
+// unwind instruction in them.  Additionally, it keeps track of which node is
+// the new exit node of the CFG.  If there are no return or unwind instructions
+// in the function, the getReturnBlock/getUnwindBlock methods will return a null
+// pointer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UNIFYFUNCTIONEXITNODES_H
+#define LLVM_TRANSFORMS_UNIFYFUNCTIONEXITNODES_H
+
+#include "llvm/Pass.h"
+
+namespace llvm {
+
+struct UnifyFunctionExitNodes : public FunctionPass {
+  BasicBlock *ReturnBlock, *UnwindBlock, *UnreachableBlock;
+public:
+  static char ID; // Pass identification, replacement for typeid
+  UnifyFunctionExitNodes() : FunctionPass(ID),
+                             ReturnBlock(0), UnwindBlock(0) {
+    initializeUnifyFunctionExitNodesPass(*PassRegistry::getPassRegistry());
+  }
+
+  // We can preserve non-critical-edgeness when we unify function exit nodes
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  // getReturn|Unwind|UnreachableBlock - Return the new single (or nonexistant)
+  // return, unwind, or unreachable  basic blocks in the CFG.
+  //
+  BasicBlock *getReturnBlock() const { return ReturnBlock; }
+  BasicBlock *getUnwindBlock() const { return UnwindBlock; }
+  BasicBlock *getUnreachableBlock() const { return UnreachableBlock; }
+
+  virtual bool runOnFunction(Function &F);
+};
+
+Pass *createUnifyFunctionExitNodesPass();
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Transforms/Utils/UnrollLoop.h b/final/include/llvm/Transforms/Utils/UnrollLoop.h
new file mode 100644
index 00000000000..3d5ee1a62b8
--- /dev/null
+++ b/final/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -0,0 +1,29 @@
+//===- llvm/Transforms/Utils/UnrollLoop.h - Unrolling utilities -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some loop unrolling utilities. It does not define any
+// actual pass or policy, but provides a single function to perform loop
+// unrolling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H
+#define LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H
+
+namespace llvm {
+
+class Loop;
+class LoopInfo;
+class LPPassManager;
+
+bool UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM);
+
+}
+
+#endif
diff --git a/final/include/llvm/Transforms/Utils/ValueMapper.h b/final/include/llvm/Transforms/Utils/ValueMapper.h
new file mode 100644
index 00000000000..d612213a871
--- /dev/null
+++ b/final/include/llvm/Transforms/Utils/ValueMapper.h
@@ -0,0 +1,50 @@
+//===- ValueMapper.h - Remapping for constants and metadata -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MapValue interface which is used by various parts of
+// the Transforms/Utils library to implement cloning and linking facilities.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_VALUEMAPPER_H
+#define LLVM_TRANSFORMS_UTILS_VALUEMAPPER_H
+
+#include "llvm/ADT/ValueMap.h"
+
+namespace llvm {
+  class Value;
+  class Instruction;
+  typedef ValueMap<const Value *, TrackingVH<Value> > ValueToValueMapTy;
+
+  /// RemapFlags - These are flags that the value mapping APIs allow.
+  enum RemapFlags {
+    RF_None = 0,
+    
+    /// RF_NoModuleLevelChanges - If this flag is set, the remapper knows that
+    /// only local values within a function (such as an instruction or argument)
+    /// are mapped, not global values like functions and global metadata.
+    RF_NoModuleLevelChanges = 1,
+    
+    /// RF_IgnoreMissingEntries - If this flag is set, the remapper ignores
+    /// entries that are not in the value map.  If it is unset, it aborts if an
+    /// operand is asked to be remapped which doesn't exist in the mapping.
+    RF_IgnoreMissingEntries = 2
+  };
+  
+  static inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) {
+    return RemapFlags(unsigned(LHS)|unsigned(RHS));
+  }
+  
+  Value *MapValue(const Value *V, ValueToValueMapTy &VM,
+                  RemapFlags Flags = RF_None);
+  void RemapInstruction(Instruction *I, ValueToValueMapTy &VM,
+                        RemapFlags Flags = RF_None);
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Type.h b/final/include/llvm/Type.h
new file mode 100644
index 00000000000..0939d67265b
--- /dev/null
+++ b/final/include/llvm/Type.h
@@ -0,0 +1,564 @@
+//===-- llvm/Type.h - Classes for handling data types -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TYPE_H
+#define LLVM_TYPE_H
+
+#include "llvm/AbstractTypeUser.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/ADT/GraphTraits.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class DerivedType;
+class PointerType;
+class IntegerType;
+class TypeMapBase;
+class raw_ostream;
+class Module;
+class LLVMContext;
+
+/// This file contains the declaration of the Type class.  For more "Type" type
+/// stuff, look in DerivedTypes.h.
+///
+/// The instances of the Type class are immutable: once they are created,
+/// they are never changed.  Also note that only one instance of a particular
+/// type is ever created.  Thus seeing if two types are equal is a matter of
+/// doing a trivial pointer comparison. To enforce that no two equal instances
+/// are created, Type instances can only be created via static factory methods 
+/// in class Type and in derived classes.
+/// 
+/// Once allocated, Types are never free'd, unless they are an abstract type
+/// that is resolved to a more concrete type.
+/// 
+/// Types themself don't have a name, and can be named either by:
+/// - using SymbolTable instance, typically from some Module,
+/// - using convenience methods in the Module class (which uses module's 
+///    SymbolTable too).
+///
+/// Opaque types are simple derived types with no state.  There may be many
+/// different Opaque type objects floating around, but two are only considered
+/// identical if they are pointer equals of each other.  This allows us to have
+/// two opaque types that end up resolving to different concrete types later.
+///
+/// Opaque types are also kinda weird and scary and different because they have
+/// to keep a list of uses of the type.  When, through linking, parsing, or
+/// bitcode reading, they become resolved, they need to find and update all
+/// users of the unknown type, causing them to reference a new, more concrete
+/// type.  Opaque types are deleted when their use list dwindles to zero users.
+///
+/// @brief Root of type hierarchy
+class Type : public AbstractTypeUser {
+public:
+  //===-------------------------------------------------------------------===//
+  /// Definitions of all of the base types for the Type system.  Based on this
+  /// value, you can cast to a "DerivedType" subclass (see DerivedTypes.h)
+  /// Note: If you add an element to this, you need to add an element to the
+  /// Type::getPrimitiveType function, or else things will break!
+  /// Also update LLVMTypeKind and LLVMGetTypeKind () in the C binding.
+  ///
+  enum TypeID {
+    // PrimitiveTypes .. make sure LastPrimitiveTyID stays up to date
+    VoidTyID = 0,    ///<  0: type with no size
+    FloatTyID,       ///<  1: 32 bit floating point type
+    DoubleTyID,      ///<  2: 64 bit floating point type
+    X86_FP80TyID,    ///<  3: 80 bit floating point type (X87)
+    FP128TyID,       ///<  4: 128 bit floating point type (112-bit mantissa)
+    PPC_FP128TyID,   ///<  5: 128 bit floating point type (two 64-bits)
+    LabelTyID,       ///<  6: Labels
+    MetadataTyID,    ///<  7: Metadata
+    X86_MMXTyID,     ///<  8: MMX vectors (64 bits)
+
+    // Derived types... see DerivedTypes.h file...
+    // Make sure FirstDerivedTyID stays up to date!!!
+    IntegerTyID,     ///<  9: Arbitrary bit width integers
+    FunctionTyID,    ///< 10: Functions
+    StructTyID,      ///< 11: Structures
+    ArrayTyID,       ///< 12: Arrays
+    PointerTyID,     ///< 13: Pointers
+    OpaqueTyID,      ///< 14: Opaque: type with unknown structure
+    VectorTyID,      ///< 15: SIMD 'packed' format, or other vector type
+
+    NumTypeIDs,                         // Must remain as last defined ID
+    LastPrimitiveTyID = X86_MMXTyID,
+    FirstDerivedTyID = IntegerTyID
+  };
+
+private:
+  TypeID   ID : 8;    // The current base type of this type.
+  bool     Abstract : 1;  // True if type contains an OpaqueType
+  unsigned SubclassData : 23; //Space for subclasses to store data
+
+  /// RefCount - This counts the number of PATypeHolders that are pointing to
+  /// this type.  When this number falls to zero, if the type is abstract and
+  /// has no AbstractTypeUsers, the type is deleted.  This is only sensical for
+  /// derived types.
+  ///
+  mutable unsigned RefCount;
+
+  /// Context - This refers to the LLVMContext in which this type was uniqued.
+  LLVMContext &Context;
+  friend class LLVMContextImpl;
+
+  const Type *getForwardedTypeInternal() const;
+
+  // Some Type instances are allocated as arrays, some aren't. So we provide
+  // this method to get the right kind of destruction for the type of Type.
+  void destroy() const; // const is a lie, this does "delete this"!
+
+protected:
+  explicit Type(LLVMContext &C, TypeID id) :
+                             ID(id), Abstract(false), SubclassData(0),
+                             RefCount(0), Context(C),
+                             ForwardType(0), NumContainedTys(0),
+                             ContainedTys(0) {}
+  virtual ~Type() {
+    assert(AbstractTypeUsers.empty() && "Abstract types remain");
+  }
+
+  /// Types can become nonabstract later, if they are refined.
+  ///
+  inline void setAbstract(bool Val) { Abstract = Val; }
+
+  unsigned getRefCount() const { return RefCount; }
+
+  unsigned getSubclassData() const { return SubclassData; }
+  void setSubclassData(unsigned val) { SubclassData = val; }
+
+  /// ForwardType - This field is used to implement the union find scheme for
+  /// abstract types.  When types are refined to other types, this field is set
+  /// to the more refined type.  Only abstract types can be forwarded.
+  mutable const Type *ForwardType;
+
+
+  /// AbstractTypeUsers - Implement a list of the users that need to be notified
+  /// if I am a type, and I get resolved into a more concrete type.
+  ///
+  mutable std::vector<AbstractTypeUser *> AbstractTypeUsers;
+
+  /// NumContainedTys - Keeps track of how many PATypeHandle instances there
+  /// are at the end of this type instance for the list of contained types. It
+  /// is the subclasses responsibility to set this up. Set to 0 if there are no
+  /// contained types in this type.
+  unsigned NumContainedTys;
+
+  /// ContainedTys - A pointer to the array of Types (PATypeHandle) contained 
+  /// by this Type.  For example, this includes the arguments of a function 
+  /// type, the elements of a structure, the pointee of a pointer, the element
+  /// type of an array, etc.  This pointer may be 0 for types that don't 
+  /// contain other types (Integer, Double, Float).  In general, the subclass 
+  /// should arrange for space for the PATypeHandles to be included in the 
+  /// allocation of the type object and set this pointer to the address of the 
+  /// first element. This allows the Type class to manipulate the ContainedTys 
+  /// without understanding the subclass's placement for this array.  keeping 
+  /// it here also allows the subtype_* members to be implemented MUCH more 
+  /// efficiently, and dynamically very few types do not contain any elements.
+  PATypeHandle *ContainedTys;
+
+public:
+  void print(raw_ostream &O) const;
+
+  /// @brief Debugging support: print to stderr
+  void dump() const;
+
+  /// @brief Debugging support: print to stderr (use type names from context
+  /// module).
+  void dump(const Module *Context) const;
+
+  /// getContext - Fetch the LLVMContext in which this type was uniqued.
+  LLVMContext &getContext() const { return Context; }
+
+  //===--------------------------------------------------------------------===//
+  // Property accessors for dealing with types... Some of these virtual methods
+  // are defined in private classes defined in Type.cpp for primitive types.
+  //
+
+  /// getDescription - Return the string representation of the type.
+  std::string getDescription() const;
+
+  /// getTypeID - Return the type id for the type.  This will return one
+  /// of the TypeID enum elements defined above.
+  ///
+  inline TypeID getTypeID() const { return ID; }
+
+  /// isVoidTy - Return true if this is 'void'.
+  bool isVoidTy() const { return ID == VoidTyID; }
+
+  /// isFloatTy - Return true if this is 'float', a 32-bit IEEE fp type.
+  bool isFloatTy() const { return ID == FloatTyID; }
+  
+  /// isDoubleTy - Return true if this is 'double', a 64-bit IEEE fp type.
+  bool isDoubleTy() const { return ID == DoubleTyID; }
+
+  /// isX86_FP80Ty - Return true if this is x86 long double.
+  bool isX86_FP80Ty() const { return ID == X86_FP80TyID; }
+
+  /// isFP128Ty - Return true if this is 'fp128'.
+  bool isFP128Ty() const { return ID == FP128TyID; }
+
+  /// isPPC_FP128Ty - Return true if this is powerpc long double.
+  bool isPPC_FP128Ty() const { return ID == PPC_FP128TyID; }
+
+  /// isFloatingPointTy - Return true if this is one of the five floating point
+  /// types
+  bool isFloatingPointTy() const { return ID == FloatTyID || ID == DoubleTyID ||
+      ID == X86_FP80TyID || ID == FP128TyID || ID == PPC_FP128TyID; }
+
+  /// isX86_MMXTy - Return true if this is X86 MMX.
+  bool isX86_MMXTy() const { return ID == X86_MMXTyID; }
+
+  /// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP.
+  ///
+  bool isFPOrFPVectorTy() const;
+ 
+  /// isLabelTy - Return true if this is 'label'.
+  bool isLabelTy() const { return ID == LabelTyID; }
+
+  /// isMetadataTy - Return true if this is 'metadata'.
+  bool isMetadataTy() const { return ID == MetadataTyID; }
+
+  /// isIntegerTy - True if this is an instance of IntegerType.
+  ///
+  bool isIntegerTy() const { return ID == IntegerTyID; } 
+
+  /// isIntegerTy - Return true if this is an IntegerType of the given width.
+  bool isIntegerTy(unsigned Bitwidth) const;
+
+  /// isIntOrIntVectorTy - Return true if this is an integer type or a vector of
+  /// integer types.
+  ///
+  bool isIntOrIntVectorTy() const;
+  
+  /// isFunctionTy - True if this is an instance of FunctionType.
+  ///
+  bool isFunctionTy() const { return ID == FunctionTyID; }
+
+  /// isStructTy - True if this is an instance of StructType.
+  ///
+  bool isStructTy() const { return ID == StructTyID; }
+
+  /// isArrayTy - True if this is an instance of ArrayType.
+  ///
+  bool isArrayTy() const { return ID == ArrayTyID; }
+
+  /// isPointerTy - True if this is an instance of PointerType.
+  ///
+  bool isPointerTy() const { return ID == PointerTyID; }
+
+  /// isOpaqueTy - True if this is an instance of OpaqueType.
+  ///
+  bool isOpaqueTy() const { return ID == OpaqueTyID; }
+
+  /// isVectorTy - True if this is an instance of VectorType.
+  ///
+  bool isVectorTy() const { return ID == VectorTyID; }
+
+  /// isAbstract - True if the type is either an Opaque type, or is a derived
+  /// type that includes an opaque type somewhere in it.
+  ///
+  inline bool isAbstract() const { return Abstract; }
+
+  /// canLosslesslyBitCastTo - Return true if this type could be converted 
+  /// with a lossless BitCast to type 'Ty'. For example, i8* to i32*. BitCasts 
+  /// are valid for types of the same size only where no re-interpretation of 
+  /// the bits is done.
+  /// @brief Determine if this type could be losslessly bitcast to Ty
+  bool canLosslesslyBitCastTo(const Type *Ty) const;
+
+
+  /// Here are some useful little methods to query what type derived types are
+  /// Note that all other types can just compare to see if this == Type::xxxTy;
+  ///
+  inline bool isPrimitiveType() const { return ID <= LastPrimitiveTyID; }
+  inline bool isDerivedType()   const { return ID >= FirstDerivedTyID; }
+
+  /// isFirstClassType - Return true if the type is "first class", meaning it
+  /// is a valid type for a Value.
+  ///
+  inline bool isFirstClassType() const {
+    // There are more first-class kinds than non-first-class kinds, so a
+    // negative test is simpler than a positive one.
+    return ID != FunctionTyID && ID != VoidTyID && ID != OpaqueTyID;
+  }
+
+  /// isSingleValueType - Return true if the type is a valid type for a
+  /// virtual register in codegen.  This includes all first-class types
+  /// except struct and array types.
+  ///
+  inline bool isSingleValueType() const {
+    return (ID != VoidTyID && ID <= LastPrimitiveTyID) ||
+            ID == IntegerTyID || ID == PointerTyID || ID == VectorTyID;
+  }
+
+  /// isAggregateType - Return true if the type is an aggregate type. This
+  /// means it is valid as the first operand of an insertvalue or
+  /// extractvalue instruction. This includes struct and array types, but
+  /// does not include vector types.
+  ///
+  inline bool isAggregateType() const {
+    return ID == StructTyID || ID == ArrayTyID;
+  }
+
+  /// isSized - Return true if it makes sense to take the size of this type.  To
+  /// get the actual size for a particular target, it is reasonable to use the
+  /// TargetData subsystem to do this.
+  ///
+  bool isSized() const {
+    // If it's a primitive, it is always sized.
+    if (ID == IntegerTyID || isFloatingPointTy() || ID == PointerTyID ||
+        ID == X86_MMXTyID)
+      return true;
+    // If it is not something that can have a size (e.g. a function or label),
+    // it doesn't have a size.
+    if (ID != StructTyID && ID != ArrayTyID && ID != VectorTyID)
+      return false;
+    // If it is something that can have a size and it's concrete, it definitely
+    // has a size, otherwise we have to try harder to decide.
+    return !isAbstract() || isSizedDerivedType();
+  }
+
+  /// getPrimitiveSizeInBits - Return the basic size of this type if it is a
+  /// primitive type.  These are fixed by LLVM and are not target dependent.
+  /// This will return zero if the type does not have a size or is not a
+  /// primitive type.
+  ///
+  /// Note that this may not reflect the size of memory allocated for an
+  /// instance of the type or the number of bytes that are written when an
+  /// instance of the type is stored to memory. The TargetData class provides
+  /// additional query functions to provide this information.
+  ///
+  unsigned getPrimitiveSizeInBits() const;
+
+  /// getScalarSizeInBits - If this is a vector type, return the
+  /// getPrimitiveSizeInBits value for the element type. Otherwise return the
+  /// getPrimitiveSizeInBits value for this type.
+  unsigned getScalarSizeInBits() const;
+
+  /// getFPMantissaWidth - Return the width of the mantissa of this type.  This
+  /// is only valid on floating point types.  If the FP type does not
+  /// have a stable mantissa (e.g. ppc long double), this method returns -1.
+  int getFPMantissaWidth() const;
+
+  /// getForwardedType - Return the type that this type has been resolved to if
+  /// it has been resolved to anything.  This is used to implement the
+  /// union-find algorithm for type resolution, and shouldn't be used by general
+  /// purpose clients.
+  const Type *getForwardedType() const {
+    if (!ForwardType) return 0;
+    return getForwardedTypeInternal();
+  }
+
+  /// getVAArgsPromotedType - Return the type an argument of this type
+  /// will be promoted to if passed through a variable argument
+  /// function.
+  const Type *getVAArgsPromotedType(LLVMContext &C) const; 
+
+  /// getScalarType - If this is a vector type, return the element type,
+  /// otherwise return this.
+  const Type *getScalarType() const;
+
+  //===--------------------------------------------------------------------===//
+  // Type Iteration support
+  //
+  typedef PATypeHandle *subtype_iterator;
+  subtype_iterator subtype_begin() const { return ContainedTys; }
+  subtype_iterator subtype_end() const { return &ContainedTys[NumContainedTys];}
+
+  /// getContainedType - This method is used to implement the type iterator
+  /// (defined a the end of the file).  For derived types, this returns the
+  /// types 'contained' in the derived type.
+  ///
+  const Type *getContainedType(unsigned i) const {
+    assert(i < NumContainedTys && "Index out of range!");
+    return ContainedTys[i].get();
+  }
+
+  /// getNumContainedTypes - Return the number of types in the derived type.
+  ///
+  unsigned getNumContainedTypes() const { return NumContainedTys; }
+
+  //===--------------------------------------------------------------------===//
+  // Static members exported by the Type class itself.  Useful for getting
+  // instances of Type.
+  //
+
+  /// getPrimitiveType - Return a type based on an identifier.
+  static const Type *getPrimitiveType(LLVMContext &C, TypeID IDNumber);
+
+  //===--------------------------------------------------------------------===//
+  // These are the builtin types that are always available...
+  //
+  static const Type *getVoidTy(LLVMContext &C);
+  static const Type *getLabelTy(LLVMContext &C);
+  static const Type *getFloatTy(LLVMContext &C);
+  static const Type *getDoubleTy(LLVMContext &C);
+  static const Type *getMetadataTy(LLVMContext &C);
+  static const Type *getX86_FP80Ty(LLVMContext &C);
+  static const Type *getFP128Ty(LLVMContext &C);
+  static const Type *getPPC_FP128Ty(LLVMContext &C);
+  static const Type *getX86_MMXTy(LLVMContext &C);
+  static const IntegerType *getIntNTy(LLVMContext &C, unsigned N);
+  static const IntegerType *getInt1Ty(LLVMContext &C);
+  static const IntegerType *getInt8Ty(LLVMContext &C);
+  static const IntegerType *getInt16Ty(LLVMContext &C);
+  static const IntegerType *getInt32Ty(LLVMContext &C);
+  static const IntegerType *getInt64Ty(LLVMContext &C);
+
+  //===--------------------------------------------------------------------===//
+  // Convenience methods for getting pointer types with one of the above builtin
+  // types as pointee.
+  //
+  static const PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getX86_MMXPtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getIntNPtrTy(LLVMContext &C, unsigned N,
+                                         unsigned AS = 0);
+  static const PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0);
+  static const PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0);
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Type *) { return true; }
+
+  void addRef() const {
+    assert(isAbstract() && "Cannot add a reference to a non-abstract type!");
+    ++RefCount;
+  }
+
+  void dropRef() const {
+    assert(isAbstract() && "Cannot drop a reference to a non-abstract type!");
+    assert(RefCount && "No objects are currently referencing this object!");
+
+    // If this is the last PATypeHolder using this object, and there are no
+    // PATypeHandles using it, the type is dead, delete it now.
+    if (--RefCount == 0 && AbstractTypeUsers.empty())
+      this->destroy();
+  }
+  
+  /// addAbstractTypeUser - Notify an abstract type that there is a new user of
+  /// it.  This function is called primarily by the PATypeHandle class.
+  ///
+  void addAbstractTypeUser(AbstractTypeUser *U) const;
+  
+  /// removeAbstractTypeUser - Notify an abstract type that a user of the class
+  /// no longer has a handle to the type.  This function is called primarily by
+  /// the PATypeHandle class.  When there are no users of the abstract type, it
+  /// is annihilated, because there is no way to get a reference to it ever
+  /// again.
+  ///
+  void removeAbstractTypeUser(AbstractTypeUser *U) const;
+
+  /// getPointerTo - Return a pointer to the current type.  This is equivalent
+  /// to PointerType::get(Foo, AddrSpace).
+  const PointerType *getPointerTo(unsigned AddrSpace = 0) const;
+
+private:
+  /// isSizedDerivedType - Derived types like structures and arrays are sized
+  /// iff all of the members of the type are sized as well.  Since asking for
+  /// their size is relatively uncommon, move this operation out of line.
+  bool isSizedDerivedType() const;
+
+  virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
+  virtual void typeBecameConcrete(const DerivedType *AbsTy);
+
+protected:
+  // PromoteAbstractToConcrete - This is an internal method used to calculate
+  // change "Abstract" from true to false when types are refined.
+  void PromoteAbstractToConcrete();
+  friend class TypeMapBase;
+};
+
+//===----------------------------------------------------------------------===//
+// Define some inline methods for the AbstractTypeUser.h:PATypeHandle class.
+// These are defined here because they MUST be inlined, yet are dependent on
+// the definition of the Type class.
+//
+inline void PATypeHandle::addUser() {
+  assert(Ty && "Type Handle has a null type!");
+  if (Ty->isAbstract())
+    Ty->addAbstractTypeUser(User);
+}
+inline void PATypeHandle::removeUser() {
+  if (Ty->isAbstract())
+    Ty->removeAbstractTypeUser(User);
+}
+
+// Define inline methods for PATypeHolder.
+
+/// get - This implements the forwarding part of the union-find algorithm for
+/// abstract types.  Before every access to the Type*, we check to see if the
+/// type we are pointing to is forwarding to a new type.  If so, we drop our
+/// reference to the type.
+///
+inline Type* PATypeHolder::get() const {
+  if (Ty == 0) return 0;
+  const Type *NewTy = Ty->getForwardedType();
+  if (!NewTy) return const_cast<Type*>(Ty);
+  return *const_cast<PATypeHolder*>(this) = NewTy;
+}
+
+inline void PATypeHolder::addRef() {
+  if (Ty && Ty->isAbstract())
+    Ty->addRef();
+}
+
+inline void PATypeHolder::dropRef() {
+  if (Ty && Ty->isAbstract())
+    Ty->dropRef();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Provide specializations of GraphTraits to be able to treat a type as a
+// graph of sub types...
+
+template <> struct GraphTraits<Type*> {
+  typedef Type NodeType;
+  typedef Type::subtype_iterator ChildIteratorType;
+
+  static inline NodeType *getEntryNode(Type *T) { return T; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->subtype_begin();
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->subtype_end();
+  }
+};
+
+template <> struct GraphTraits<const Type*> {
+  typedef const Type NodeType;
+  typedef Type::subtype_iterator ChildIteratorType;
+
+  static inline NodeType *getEntryNode(const Type *T) { return T; }
+  static inline ChildIteratorType child_begin(NodeType *N) {
+    return N->subtype_begin();
+  }
+  static inline ChildIteratorType child_end(NodeType *N) {
+    return N->subtype_end();
+  }
+};
+
+template <> struct isa_impl<PointerType, Type> {
+  static inline bool doit(const Type &Ty) {
+    return Ty.getTypeID() == Type::PointerTyID;
+  }
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const Type &T);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/TypeSymbolTable.h b/final/include/llvm/TypeSymbolTable.h
new file mode 100644
index 00000000000..9fdcb983232
--- /dev/null
+++ b/final/include/llvm/TypeSymbolTable.h
@@ -0,0 +1,152 @@
+//===-- llvm/TypeSymbolTable.h - Implement a Type Symtab --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the name/type symbol table for LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TYPE_SYMBOL_TABLE_H
+#define LLVM_TYPE_SYMBOL_TABLE_H
+
+#include "llvm/Type.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+#include <map>
+
+namespace llvm {
+
+/// This class provides a symbol table of name/type pairs with operations to
+/// support constructing, searching and iterating over the symbol table. The
+/// class derives from AbstractTypeUser so that the contents of the symbol
+/// table can be updated when abstract types become concrete.
+class TypeSymbolTable : public AbstractTypeUser {
+
+/// @name Types
+/// @{
+public:
+
+  /// @brief A mapping of names to types.
+  typedef std::map<const std::string, const Type*> TypeMap;
+
+  /// @brief An iterator over the TypeMap.
+  typedef TypeMap::iterator iterator;
+
+  /// @brief A const_iterator over the TypeMap.
+  typedef TypeMap::const_iterator const_iterator;
+
+/// @}
+/// @name Constructors
+/// @{
+public:
+
+  TypeSymbolTable():LastUnique(0) {}
+  ~TypeSymbolTable();
+
+/// @}
+/// @name Accessors
+/// @{
+public:
+
+  /// Generates a unique name for a type based on the \p BaseName by
+  /// incrementing an integer and appending it to the name, if necessary
+  /// @returns the unique name
+  /// @brief Get a unique name for a type
+  std::string getUniqueName(StringRef BaseName) const;
+
+  /// This method finds the type with the given \p name in the type map
+  /// and returns it.
+  /// @returns null if the name is not found, otherwise the Type
+  /// associated with the \p name.
+  /// @brief Lookup a type by name.
+  Type *lookup(StringRef name) const;
+
+  /// Lookup the type associated with name.
+  /// @returns end() if the name is not found, or an iterator at the entry for
+  /// Type.
+  iterator find(StringRef Name) {
+    return tmap.find(Name);
+  }
+
+  /// Lookup the type associated with name.
+  /// @returns end() if the name is not found, or an iterator at the entry for
+  /// Type.
+  const_iterator find(StringRef Name) const {
+    return tmap.find(Name);
+  }
+
+  /// @returns true iff the symbol table is empty.
+  /// @brief Determine if the symbol table is empty
+  inline bool empty() const { return tmap.empty(); }
+
+  /// @returns the size of the symbol table
+  /// @brief The number of name/type pairs is returned.
+  inline unsigned size() const { return unsigned(tmap.size()); }
+
+  /// This function can be used from the debugger to display the
+  /// content of the symbol table while debugging.
+  /// @brief Print out symbol table on stderr
+  void dump() const;
+
+/// @}
+/// @name Iteration
+/// @{
+public:
+  /// Get an iterator to the start of the symbol table
+  inline iterator begin() { return tmap.begin(); }
+
+  /// @brief Get a const_iterator to the start of the symbol table
+  inline const_iterator begin() const { return tmap.begin(); }
+
+  /// Get an iterator to the end of the symbol table.
+  inline iterator end() { return tmap.end(); }
+
+  /// Get a const_iterator to the end of the symbol table.
+  inline const_iterator end() const { return tmap.end(); }
+
+/// @}
+/// @name Mutators
+/// @{
+public:
+
+  /// Inserts a type into the symbol table with the specified name. There can be
+  /// a many-to-one mapping between names and types. This method allows a type
+  /// with an existing entry in the symbol table to get a new name.
+  /// @brief Insert a type under a new name.
+  void insert(StringRef Name, const Type *Typ);
+
+  /// Remove a type at the specified position in the symbol table.
+  /// @returns the removed Type.
+  /// @returns the Type that was erased from the symbol table.
+  Type* remove(iterator TI);
+
+/// @}
+/// @name AbstractTypeUser Methods
+/// @{
+private:
+  /// This function is called when one of the types in the type plane
+  /// is refined.
+  virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
+
+  /// This function markes a type as being concrete (defined).
+  virtual void typeBecameConcrete(const DerivedType *AbsTy);
+
+/// @}
+/// @name Internal Data
+/// @{
+private:
+  TypeMap tmap; ///< This is the mapping of names to types.
+  mutable uint32_t LastUnique; ///< Counter for tracking unique names
+
+/// @}
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Use.h b/final/include/llvm/Use.h
new file mode 100644
index 00000000000..ccbdd7fcae1
--- /dev/null
+++ b/final/include/llvm/Use.h
@@ -0,0 +1,222 @@
+//===-- llvm/Use.h - Definition of the Use class ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines the Use class.  The Use class represents the operand of an
+// instruction or some other User instance which refers to a Value.  The Use
+// class keeps the "use list" of the referenced value up to date.
+//
+// Pointer tagging is used to efficiently find the User corresponding
+// to a Use without having to store a User pointer in every Use. A
+// User is preceded in memory by all the Uses corresponding to its
+// operands, and the low bits of one of the fields (Prev) of the Use
+// class are used to encode offsets to be able to find that User given
+// a pointer to any Use. For details, see:
+//
+//   http://www.llvm.org/docs/ProgrammersManual.html#UserLayout
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_USE_H
+#define LLVM_USE_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include <cstddef>
+#include <iterator>
+
+namespace llvm {
+
+class Value;
+class User;
+class Use;
+template<typename>
+struct simplify_type;
+
+// Use** is only 4-byte aligned.
+template<>
+class PointerLikeTypeTraits<Use**> {
+public:
+  static inline void *getAsVoidPointer(Use** P) { return P; }
+  static inline Use **getFromVoidPointer(void *P) {
+    return static_cast<Use**>(P);
+  }
+  enum { NumLowBitsAvailable = 2 };
+};
+
+//===----------------------------------------------------------------------===//
+//                                  Use Class
+//===----------------------------------------------------------------------===//
+
+/// Use is here to make keeping the "use" list of a Value up-to-date really
+/// easy.
+class Use {
+public:
+  /// swap - provide a fast substitute to std::swap<Use>
+  /// that also works with less standard-compliant compilers
+  void swap(Use &RHS);
+
+private:
+  /// Copy ctor - do not implement
+  Use(const Use &U);
+
+  /// Destructor - Only for zap()
+  ~Use() {
+    if (Val) removeFromList();
+  }
+
+  enum PrevPtrTag { zeroDigitTag
+                  , oneDigitTag
+                  , stopTag
+                  , fullStopTag };
+
+  /// Constructor
+  Use(PrevPtrTag tag) : Val(0) {
+    Prev.setInt(tag);
+  }
+
+public:
+  /// Normally Use will just implicitly convert to a Value* that it holds.
+  operator Value*() const { return Val; }
+  
+  /// If implicit conversion to Value* doesn't work, the get() method returns
+  /// the Value*.
+  Value *get() const { return Val; }
+  
+  /// getUser - This returns the User that contains this Use.  For an
+  /// instruction operand, for example, this will return the instruction.
+  User *getUser() const;
+
+  inline void set(Value *Val);
+
+  Value *operator=(Value *RHS) {
+    set(RHS);
+    return RHS;
+  }
+  const Use &operator=(const Use &RHS) {
+    set(RHS.Val);
+    return *this;
+  }
+
+        Value *operator->()       { return Val; }
+  const Value *operator->() const { return Val; }
+
+  Use *getNext() const { return Next; }
+
+  
+  /// zap - This is used to destroy Use operands when the number of operands of
+  /// a User changes.
+  static void zap(Use *Start, const Use *Stop, bool del = false);
+
+private:
+  const Use* getImpliedUser() const;
+  static Use *initTags(Use *Start, Use *Stop);
+  
+  Value *Val;
+  Use *Next;
+  PointerIntPair<Use**, 2, PrevPtrTag> Prev;
+
+  void setPrev(Use **NewPrev) {
+    Prev.setPointer(NewPrev);
+  }
+  void addToList(Use **List) {
+    Next = *List;
+    if (Next) Next->setPrev(&Next);
+    setPrev(List);
+    *List = this;
+  }
+  void removeFromList() {
+    Use **StrippedPrev = Prev.getPointer();
+    *StrippedPrev = Next;
+    if (Next) Next->setPrev(StrippedPrev);
+  }
+
+  friend class Value;
+  friend class User;
+};
+
+// simplify_type - Allow clients to treat uses just like values when using
+// casting operators.
+template<> struct simplify_type<Use> {
+  typedef Value* SimpleType;
+  static SimpleType getSimplifiedValue(const Use &Val) {
+    return static_cast<SimpleType>(Val.get());
+  }
+};
+template<> struct simplify_type<const Use> {
+  typedef Value* SimpleType;
+  static SimpleType getSimplifiedValue(const Use &Val) {
+    return static_cast<SimpleType>(Val.get());
+  }
+};
+
+
+
+template<typename UserTy>  // UserTy == 'User' or 'const User'
+class value_use_iterator : public std::iterator<std::forward_iterator_tag,
+                                                UserTy*, ptrdiff_t> {
+  typedef std::iterator<std::forward_iterator_tag, UserTy*, ptrdiff_t> super;
+  typedef value_use_iterator<UserTy> _Self;
+
+  Use *U;
+  explicit value_use_iterator(Use *u) : U(u) {}
+  friend class Value;
+public:
+  typedef typename super::reference reference;
+  typedef typename super::pointer pointer;
+
+  value_use_iterator(const _Self &I) : U(I.U) {}
+  value_use_iterator() {}
+
+  bool operator==(const _Self &x) const {
+    return U == x.U;
+  }
+  bool operator!=(const _Self &x) const {
+    return !operator==(x);
+  }
+
+  /// atEnd - return true if this iterator is equal to use_end() on the value.
+  bool atEnd() const { return U == 0; }
+
+  // Iterator traversal: forward iteration only
+  _Self &operator++() {          // Preincrement
+    assert(U && "Cannot increment end iterator!");
+    U = U->getNext();
+    return *this;
+  }
+  _Self operator++(int) {        // Postincrement
+    _Self tmp = *this; ++*this; return tmp;
+  }
+
+  // Retrieve a pointer to the current User.
+  UserTy *operator*() const {
+    assert(U && "Cannot dereference end iterator!");
+    return U->getUser();
+  }
+
+  UserTy *operator->() const { return operator*(); }
+
+  Use &getUse() const { return *U; }
+  
+  /// getOperandNo - Return the operand # of this use in its User.  Defined in
+  /// User.h
+  ///
+  unsigned getOperandNo() const;
+};
+
+//===----------------------------------------------------------------------===//
+//                         AugmentedUse layout struct
+//===----------------------------------------------------------------------===//
+
+struct AugmentedUse : public Use {
+  PointerIntPair<User*, 1, unsigned> ref;
+  AugmentedUse(); // not implemented
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/User.h b/final/include/llvm/User.h
new file mode 100644
index 00000000000..1363495f7c0
--- /dev/null
+++ b/final/include/llvm/User.h
@@ -0,0 +1,175 @@
+//===-- llvm/User.h - User class definition ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class defines the interface that one who 'use's a Value must implement.
+// Each instance of the Value class keeps track of what User's have handles
+// to it.
+//
+//  * Instructions are the largest class of User's.
+//  * Constants may be users of other constants (think arrays and stuff)
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_USER_H
+#define LLVM_USER_H
+
+#include "llvm/Value.h"
+
+namespace llvm {
+
+/// OperandTraits - Compile-time customization of
+/// operand-related allocators and accessors
+/// for use of the User class
+template <class>
+struct OperandTraits;
+
+class User : public Value {
+  User(const User &);             // Do not implement
+  void *operator new(size_t);     // Do not implement
+  template <unsigned>
+  friend struct HungoffOperandTraits;
+protected:
+  /// OperandList - This is a pointer to the array of Uses for this User.
+  /// For nodes of fixed arity (e.g. a binary operator) this array will live
+  /// prefixed to some derived class instance.  For nodes of resizable variable
+  /// arity (e.g. PHINodes, SwitchInst etc.), this memory will be dynamically
+  /// allocated and should be destroyed by the classes' virtual dtor.
+  Use *OperandList;
+
+  /// NumOperands - The number of values used by this User.
+  ///
+  unsigned NumOperands;
+
+  void *operator new(size_t s, unsigned Us);
+  User(const Type *ty, unsigned vty, Use *OpList, unsigned NumOps)
+    : Value(ty, vty), OperandList(OpList), NumOperands(NumOps) {}
+  Use *allocHungoffUses(unsigned) const;
+  void dropHungoffUses() {
+    Use::zap(OperandList, OperandList + NumOperands, true);
+    OperandList = 0;
+    // Reset NumOperands so User::operator delete() does the right thing.
+    NumOperands = 0;
+  }
+public:
+  ~User() {
+    Use::zap(OperandList, OperandList + NumOperands);
+  }
+  /// operator delete - free memory allocated for User and Use objects
+  void operator delete(void *Usr);
+  /// placement delete - required by std, but never called.
+  void operator delete(void*, unsigned) {
+    assert(0 && "Constructor throws?");
+  }
+  /// placement delete - required by std, but never called.
+  void operator delete(void*, unsigned, bool) {
+    assert(0 && "Constructor throws?");
+  }
+protected:
+  template <int Idx, typename U> static Use &OpFrom(const U *that) {
+    return Idx < 0
+      ? OperandTraits<U>::op_end(const_cast<U*>(that))[Idx]
+      : OperandTraits<U>::op_begin(const_cast<U*>(that))[Idx];
+  }
+  template <int Idx> Use &Op() {
+    return OpFrom<Idx>(this);
+  }
+  template <int Idx> const Use &Op() const {
+    return OpFrom<Idx>(this);
+  }
+public:
+  Value *getOperand(unsigned i) const {
+    assert(i < NumOperands && "getOperand() out of range!");
+    return OperandList[i];
+  }
+  void setOperand(unsigned i, Value *Val) {
+    assert(i < NumOperands && "setOperand() out of range!");
+    assert((!isa<Constant>((const Value*)this) ||
+            isa<GlobalValue>((const Value*)this)) &&
+           "Cannot mutate a constant with setOperand!");
+    OperandList[i] = Val;
+  }
+  const Use &getOperandUse(unsigned i) const {
+    assert(i < NumOperands && "getOperand() out of range!");
+    return OperandList[i];
+  }
+  Use &getOperandUse(unsigned i) {
+    assert(i < NumOperands && "getOperand() out of range!");
+    return OperandList[i];
+  }
+  
+  unsigned getNumOperands() const { return NumOperands; }
+
+  // ---------------------------------------------------------------------------
+  // Operand Iterator interface...
+  //
+  typedef Use*       op_iterator;
+  typedef const Use* const_op_iterator;
+
+  inline op_iterator       op_begin()       { return OperandList; }
+  inline const_op_iterator op_begin() const { return OperandList; }
+  inline op_iterator       op_end()         { return OperandList+NumOperands; }
+  inline const_op_iterator op_end()   const { return OperandList+NumOperands; }
+
+  // dropAllReferences() - This function is in charge of "letting go" of all
+  // objects that this User refers to.  This allows one to
+  // 'delete' a whole class at a time, even though there may be circular
+  // references...  First all references are dropped, and all use counts go to
+  // zero.  Then everything is deleted for real.  Note that no operations are
+  // valid on an object that has "dropped all references", except operator
+  // delete.
+  //
+  void dropAllReferences() {
+    for (op_iterator i = op_begin(), e = op_end(); i != e; ++i)
+      i->set(0);
+  }
+
+  /// replaceUsesOfWith - Replaces all references to the "From" definition with
+  /// references to the "To" definition.
+  ///
+  void replaceUsesOfWith(Value *From, Value *To);
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const User *) { return true; }
+  static inline bool classof(const Value *V) {
+    return isa<Instruction>(V) || isa<Constant>(V);
+  }
+};
+
+template<> struct simplify_type<User::op_iterator> {
+  typedef Value* SimpleType;
+
+  static SimpleType getSimplifiedValue(const User::op_iterator &Val) {
+    return static_cast<SimpleType>(Val->get());
+  }
+};
+
+template<> struct simplify_type<const User::op_iterator>
+  : public simplify_type<User::op_iterator> {};
+
+template<> struct simplify_type<User::const_op_iterator> {
+  typedef Value* SimpleType;
+
+  static SimpleType getSimplifiedValue(const User::const_op_iterator &Val) {
+    return static_cast<SimpleType>(Val->get());
+  }
+};
+
+template<> struct simplify_type<const User::const_op_iterator>
+  : public simplify_type<User::const_op_iterator> {};
+
+
+// value_use_iterator::getOperandNo - Requires the definition of the User class.
+template<typename UserTy>
+unsigned value_use_iterator<UserTy>::getOperandNo() const {
+  return U - U->getUser()->op_begin();
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/Value.h b/final/include/llvm/Value.h
new file mode 100644
index 00000000000..130e2735f52
--- /dev/null
+++ b/final/include/llvm/Value.h
@@ -0,0 +1,409 @@
+//===-- llvm/Value.h - Definition of the Value class ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Value class. 
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_VALUE_H
+#define LLVM_VALUE_H
+
+#include "llvm/AbstractTypeUser.h"
+#include "llvm/Use.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include <string>
+
+namespace llvm {
+
+class Constant;
+class Argument;
+class Instruction;
+class BasicBlock;
+class GlobalValue;
+class Function;
+class GlobalVariable;
+class GlobalAlias;
+class InlineAsm;
+class ValueSymbolTable;
+class TypeSymbolTable;
+template<typename ValueTy> class StringMapEntry;
+template <typename ValueTy = Value>
+class AssertingVH;
+typedef StringMapEntry<Value*> ValueName;
+class raw_ostream;
+class AssemblyAnnotationWriter;
+class ValueHandleBase;
+class LLVMContext;
+class Twine;
+class MDNode;
+
+//===----------------------------------------------------------------------===//
+//                                 Value Class
+//===----------------------------------------------------------------------===//
+
+/// This is a very important LLVM class. It is the base class of all values 
+/// computed by a program that may be used as operands to other values. Value is
+/// the super class of other important classes such as Instruction and Function.
+/// All Values have a Type. Type is not a subclass of Value. All types can have
+/// a name and they should belong to some Module. Setting the name on the Value
+/// automatically updates the module's symbol table.
+///
+/// Every value has a "use list" that keeps track of which other Values are
+/// using this Value.  A Value can also have an arbitrary number of ValueHandle
+/// objects that watch it and listen to RAUW and Destroy events.  See
+/// llvm/Support/ValueHandle.h for details.
+///
+/// @brief LLVM Value Representation
+class Value {
+  const unsigned char SubclassID;   // Subclass identifier (for isa/dyn_cast)
+  unsigned char HasValueHandle : 1; // Has a ValueHandle pointing to this?
+protected:
+  /// SubclassOptionalData - This member is similar to SubclassData, however it
+  /// is for holding information which may be used to aid optimization, but
+  /// which may be cleared to zero without affecting conservative
+  /// interpretation.
+  unsigned char SubclassOptionalData : 7;
+
+private:
+  /// SubclassData - This member is defined by this class, but is not used for
+  /// anything.  Subclasses can use it to hold whatever state they find useful.
+  /// This field is initialized to zero by the ctor.
+  unsigned short SubclassData;
+
+  PATypeHolder VTy;
+  Use *UseList;
+
+  friend class ValueSymbolTable; // Allow ValueSymbolTable to directly mod Name.
+  friend class ValueHandleBase;
+  friend class AbstractTypeUser;
+  ValueName *Name;
+
+  void operator=(const Value &);     // Do not implement
+  Value(const Value &);              // Do not implement
+
+protected:
+  /// printCustom - Value subclasses can override this to implement custom
+  /// printing behavior.
+  virtual void printCustom(raw_ostream &O) const;
+
+  Value(const Type *Ty, unsigned scid);
+public:
+  virtual ~Value();
+
+  /// dump - Support for debugging, callable in GDB: V->dump()
+  //
+  void dump() const;
+
+  /// print - Implement operator<< on Value.
+  ///
+  void print(raw_ostream &O, AssemblyAnnotationWriter *AAW = 0) const;
+
+  /// All values are typed, get the type of this value.
+  ///
+  inline const Type *getType() const { return VTy; }
+
+  /// All values hold a context through their type.
+  LLVMContext &getContext() const;
+
+  // All values can potentially be named...
+  inline bool hasName() const { return Name != 0; }
+  ValueName *getValueName() const { return Name; }
+  
+  /// getName() - Return a constant reference to the value's name. This is cheap
+  /// and guaranteed to return the same reference as long as the value is not
+  /// modified.
+  ///
+  /// This is currently guaranteed to return a StringRef for which data() points
+  /// to a valid null terminated string. The use of StringRef.data() is 
+  /// deprecated here, however, and clients should not rely on it. If such 
+  /// behavior is needed, clients should use expensive getNameStr(), or switch 
+  /// to an interface that does not depend on null termination.
+  StringRef getName() const;
+
+  /// getNameStr() - Return the name of the specified value, *constructing a
+  /// string* to hold it.  This is guaranteed to construct a string and is very
+  /// expensive, clients should use getName() unless necessary.
+  std::string getNameStr() const;
+
+  /// setName() - Change the name of the value, choosing a new unique name if
+  /// the provided name is taken.
+  ///
+  /// \arg Name - The new name; or "" if the value's name should be removed.
+  void setName(const Twine &Name);
+
+  
+  /// takeName - transfer the name from V to this value, setting V's name to
+  /// empty.  It is an error to call V->takeName(V). 
+  void takeName(Value *V);
+
+  /// replaceAllUsesWith - Go through the uses list for this definition and make
+  /// each use point to "V" instead of "this".  After this completes, 'this's
+  /// use list is guaranteed to be empty.
+  ///
+  void replaceAllUsesWith(Value *V);
+
+  // uncheckedReplaceAllUsesWith - Just like replaceAllUsesWith but dangerous.
+  // Only use when in type resolution situations!
+  void uncheckedReplaceAllUsesWith(Value *V);
+
+  //----------------------------------------------------------------------
+  // Methods for handling the chain of uses of this Value.
+  //
+  typedef value_use_iterator<User>       use_iterator;
+  typedef value_use_iterator<const User> const_use_iterator;
+
+  bool               use_empty() const { return UseList == 0; }
+  use_iterator       use_begin()       { return use_iterator(UseList); }
+  const_use_iterator use_begin() const { return const_use_iterator(UseList); }
+  use_iterator       use_end()         { return use_iterator(0);   }
+  const_use_iterator use_end()   const { return const_use_iterator(0);   }
+  User              *use_back()        { return *use_begin(); }
+  const User        *use_back()  const { return *use_begin(); }
+
+  /// hasOneUse - Return true if there is exactly one user of this value.  This
+  /// is specialized because it is a common request and does not require
+  /// traversing the whole use list.
+  ///
+  bool hasOneUse() const {
+    const_use_iterator I = use_begin(), E = use_end();
+    if (I == E) return false;
+    return ++I == E;
+  }
+
+  /// hasNUses - Return true if this Value has exactly N users.
+  ///
+  bool hasNUses(unsigned N) const;
+
+  /// hasNUsesOrMore - Return true if this value has N users or more.  This is
+  /// logically equivalent to getNumUses() >= N.
+  ///
+  bool hasNUsesOrMore(unsigned N) const;
+
+  bool isUsedInBasicBlock(const BasicBlock *BB) const;
+
+  /// getNumUses - This method computes the number of uses of this Value.  This
+  /// is a linear time operation.  Use hasOneUse, hasNUses, or hasMoreThanNUses
+  /// to check for specific values.
+  unsigned getNumUses() const;
+
+  /// addUse - This method should only be used by the Use class.
+  ///
+  void addUse(Use &U) { U.addToList(&UseList); }
+
+  /// An enumeration for keeping track of the concrete subclass of Value that
+  /// is actually instantiated. Values of this enumeration are kept in the 
+  /// Value classes SubclassID field. They are used for concrete type
+  /// identification.
+  enum ValueTy {
+    ArgumentVal,              // This is an instance of Argument
+    BasicBlockVal,            // This is an instance of BasicBlock
+    FunctionVal,              // This is an instance of Function
+    GlobalAliasVal,           // This is an instance of GlobalAlias
+    GlobalVariableVal,        // This is an instance of GlobalVariable
+    UndefValueVal,            // This is an instance of UndefValue
+    BlockAddressVal,          // This is an instance of BlockAddress
+    ConstantExprVal,          // This is an instance of ConstantExpr
+    ConstantAggregateZeroVal, // This is an instance of ConstantAggregateZero
+    ConstantIntVal,           // This is an instance of ConstantInt
+    ConstantFPVal,            // This is an instance of ConstantFP
+    ConstantArrayVal,         // This is an instance of ConstantArray
+    ConstantStructVal,        // This is an instance of ConstantStruct
+    ConstantVectorVal,        // This is an instance of ConstantVector
+    ConstantPointerNullVal,   // This is an instance of ConstantPointerNull
+    MDNodeVal,                // This is an instance of MDNode
+    MDStringVal,              // This is an instance of MDString
+    InlineAsmVal,             // This is an instance of InlineAsm
+    PseudoSourceValueVal,     // This is an instance of PseudoSourceValue
+    FixedStackPseudoSourceValueVal, // This is an instance of 
+                                    // FixedStackPseudoSourceValue
+    InstructionVal,           // This is an instance of Instruction
+    // Enum values starting at InstructionVal are used for Instructions;
+    // don't add new values here!
+
+    // Markers:
+    ConstantFirstVal = FunctionVal,
+    ConstantLastVal  = ConstantPointerNullVal
+  };
+
+  /// getValueID - Return an ID for the concrete type of this object.  This is
+  /// used to implement the classof checks.  This should not be used for any
+  /// other purpose, as the values may change as LLVM evolves.  Also, note that
+  /// for instructions, the Instruction's opcode is added to InstructionVal. So
+  /// this means three things:
+  /// # there is no value with code InstructionVal (no opcode==0).
+  /// # there are more possible values for the value type than in ValueTy enum.
+  /// # the InstructionVal enumerator must be the highest valued enumerator in
+  ///   the ValueTy enum.
+  unsigned getValueID() const {
+    return SubclassID;
+  }
+
+  /// getRawSubclassOptionalData - Return the raw optional flags value
+  /// contained in this value. This should only be used when testing two
+  /// Values for equivalence.
+  unsigned getRawSubclassOptionalData() const {
+    return SubclassOptionalData;
+  }
+
+  /// clearSubclassOptionalData - Clear the optional flags contained in
+  /// this value.
+  void clearSubclassOptionalData() {
+    SubclassOptionalData = 0;
+  }
+
+  /// hasSameSubclassOptionalData - Test whether the optional flags contained
+  /// in this value are equal to the optional flags in the given value.
+  bool hasSameSubclassOptionalData(const Value *V) const {
+    return SubclassOptionalData == V->SubclassOptionalData;
+  }
+
+  /// intersectOptionalDataWith - Clear any optional flags in this value
+  /// that are not also set in the given value.
+  void intersectOptionalDataWith(const Value *V) {
+    SubclassOptionalData &= V->SubclassOptionalData;
+  }
+
+  /// hasValueHandle - Return true if there is a value handle associated with
+  /// this value.
+  bool hasValueHandle() const { return HasValueHandle; }
+  
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const Value *) {
+    return true; // Values are always values.
+  }
+
+  /// getRawType - This should only be used to implement the vmcore library.
+  ///
+  const Type *getRawType() const { return VTy.getRawType(); }
+
+  /// stripPointerCasts - This method strips off any unneeded pointer
+  /// casts from the specified value, returning the original uncasted value.
+  /// Note that the returned value has pointer type if the specified value does.
+  Value *stripPointerCasts();
+  const Value *stripPointerCasts() const {
+    return const_cast<Value*>(this)->stripPointerCasts();
+  }
+
+  /// isDereferenceablePointer - Test if this value is always a pointer to
+  /// allocated and suitably aligned memory for a simple load or store.
+  bool isDereferenceablePointer() const;
+  
+  /// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
+  /// return the value in the PHI node corresponding to PredBB.  If not, return
+  /// ourself.  This is useful if you want to know the value something has in a
+  /// predecessor block.
+  Value *DoPHITranslation(const BasicBlock *CurBB, const BasicBlock *PredBB);
+
+  const Value *DoPHITranslation(const BasicBlock *CurBB,
+                                const BasicBlock *PredBB) const{
+    return const_cast<Value*>(this)->DoPHITranslation(CurBB, PredBB);
+  }
+  
+  /// MaximumAlignment - This is the greatest alignment value supported by
+  /// load, store, and alloca instructions, and global values.
+  static const unsigned MaximumAlignment = 1u << 29;
+  
+protected:
+  unsigned short getSubclassDataFromValue() const { return SubclassData; }
+  void setValueSubclassData(unsigned short D) { SubclassData = D; }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const Value &V) {
+  V.print(OS);
+  return OS;
+}
+  
+void Use::set(Value *V) {
+  if (Val) removeFromList();
+  Val = V;
+  if (V) V->addUse(*this);
+}
+
+
+// isa - Provide some specializations of isa so that we don't have to include
+// the subtype header files to test to see if the value is a subclass...
+//
+template <> struct isa_impl<Constant, Value> {
+  static inline bool doit(const Value &Val) {
+    return Val.getValueID() >= Value::ConstantFirstVal &&
+      Val.getValueID() <= Value::ConstantLastVal;
+  }
+};
+
+template <> struct isa_impl<Argument, Value> {
+  static inline bool doit (const Value &Val) {
+    return Val.getValueID() == Value::ArgumentVal;
+  }
+};
+
+template <> struct isa_impl<InlineAsm, Value> { 
+  static inline bool doit(const Value &Val) {
+    return Val.getValueID() == Value::InlineAsmVal;
+  }
+};
+
+template <> struct isa_impl<Instruction, Value> { 
+  static inline bool doit(const Value &Val) {
+    return Val.getValueID() >= Value::InstructionVal;
+  }
+};
+
+template <> struct isa_impl<BasicBlock, Value> { 
+  static inline bool doit(const Value &Val) {
+    return Val.getValueID() == Value::BasicBlockVal;
+  }
+};
+
+template <> struct isa_impl<Function, Value> { 
+  static inline bool doit(const Value &Val) {
+    return Val.getValueID() == Value::FunctionVal;
+  }
+};
+
+template <> struct isa_impl<GlobalVariable, Value> { 
+  static inline bool doit(const Value &Val) {
+    return Val.getValueID() == Value::GlobalVariableVal;
+  }
+};
+
+template <> struct isa_impl<GlobalAlias, Value> { 
+  static inline bool doit(const Value &Val) {
+    return Val.getValueID() == Value::GlobalAliasVal;
+  }
+};
+
+template <> struct isa_impl<GlobalValue, Value> { 
+  static inline bool doit(const Value &Val) {
+    return isa<GlobalVariable>(Val) || isa<Function>(Val) ||
+      isa<GlobalAlias>(Val);
+  }
+};
+
+template <> struct isa_impl<MDNode, Value> { 
+  static inline bool doit(const Value &Val) {
+    return Val.getValueID() == Value::MDNodeVal;
+  }
+};
+  
+// Value* is only 4-byte aligned.
+template<>
+class PointerLikeTypeTraits<Value*> {
+  typedef Value* PT;
+public:
+  static inline void *getAsVoidPointer(PT P) { return P; }
+  static inline PT getFromVoidPointer(void *P) {
+    return static_cast<PT>(P);
+  }
+  enum { NumLowBitsAvailable = 2 };
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/include/llvm/ValueSymbolTable.h b/final/include/llvm/ValueSymbolTable.h
new file mode 100644
index 00000000000..1738cc4a7a7
--- /dev/null
+++ b/final/include/llvm/ValueSymbolTable.h
@@ -0,0 +1,133 @@
+//===-- llvm/ValueSymbolTable.h - Implement a Value Symtab ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the name/Value symbol table for LLVM.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_VALUE_SYMBOL_TABLE_H
+#define LLVM_VALUE_SYMBOL_TABLE_H
+
+#include "llvm/Value.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+  template<typename ValueSubClass, typename ItemParentClass>
+        class SymbolTableListTraits;
+  class BasicBlock;
+  class Function;
+  class NamedMDNode;
+  class Module;
+  class StringRef;
+
+/// This class provides a symbol table of name/value pairs. It is essentially
+/// a std::map<std::string,Value*> but has a controlled interface provided by
+/// LLVM as well as ensuring uniqueness of names.
+///
+class ValueSymbolTable {
+  friend class Value;
+  friend class SymbolTableListTraits<Argument, Function>;
+  friend class SymbolTableListTraits<BasicBlock, Function>;
+  friend class SymbolTableListTraits<Instruction, BasicBlock>;
+  friend class SymbolTableListTraits<Function, Module>;
+  friend class SymbolTableListTraits<GlobalVariable, Module>;
+  friend class SymbolTableListTraits<GlobalAlias, Module>;
+/// @name Types
+/// @{
+public:
+  /// @brief A mapping of names to values.
+  typedef StringMap<Value*> ValueMap;
+
+  /// @brief An iterator over a ValueMap.
+  typedef ValueMap::iterator iterator;
+
+  /// @brief A const_iterator over a ValueMap.
+  typedef ValueMap::const_iterator const_iterator;
+
+/// @}
+/// @name Constructors
+/// @{
+public:
+
+  ValueSymbolTable() : vmap(0), LastUnique(0) {}
+  ~ValueSymbolTable();
+
+/// @}
+/// @name Accessors
+/// @{
+public:
+
+  /// This method finds the value with the given \p Name in the
+  /// the symbol table. 
+  /// @returns the value associated with the \p Name
+  /// @brief Lookup a named Value.
+  Value *lookup(StringRef Name) const { return vmap.lookup(Name); }
+
+  /// @returns true iff the symbol table is empty
+  /// @brief Determine if the symbol table is empty
+  inline bool empty() const { return vmap.empty(); }
+
+  /// @brief The number of name/type pairs is returned.
+  inline unsigned size() const { return unsigned(vmap.size()); }
+
+  /// This function can be used from the debugger to display the
+  /// content of the symbol table while debugging.
+  /// @brief Print out symbol table on stderr
+  void dump() const;
+
+/// @}
+/// @name Iteration
+/// @{
+public:
+  /// @brief Get an iterator that from the beginning of the symbol table.
+  inline iterator begin() { return vmap.begin(); }
+
+  /// @brief Get a const_iterator that from the beginning of the symbol table.
+  inline const_iterator begin() const { return vmap.begin(); }
+
+  /// @brief Get an iterator to the end of the symbol table.
+  inline iterator end() { return vmap.end(); }
+
+  /// @brief Get a const_iterator to the end of the symbol table.
+  inline const_iterator end() const { return vmap.end(); }
+  
+/// @}
+/// @name Mutators
+/// @{
+private:
+  /// This method adds the provided value \p N to the symbol table.  The Value
+  /// must have a name which is used to place the value in the symbol table. 
+  /// If the inserted name conflicts, this renames the value.
+  /// @brief Add a named value to the symbol table
+  void reinsertValue(Value *V);
+    
+  /// createValueName - This method attempts to create a value name and insert
+  /// it into the symbol table with the specified name.  If it conflicts, it
+  /// auto-renames the name and returns that instead.
+  ValueName *createValueName(StringRef Name, Value *V);
+  
+  /// This method removes a value from the symbol table.  It leaves the
+  /// ValueName attached to the value, but it is no longer inserted in the
+  /// symtab.
+  void removeValueName(ValueName *V);
+  
+/// @}
+/// @name Internal Data
+/// @{
+private:
+  ValueMap vmap;                    ///< The map that holds the symbol table.
+  mutable uint32_t LastUnique; ///< Counter for tracking unique names
+
+/// @}
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Analysis/AliasAnalysis.cpp b/final/lib/Analysis/AliasAnalysis.cpp
new file mode 100644
index 00000000000..be02ddbaa53
--- /dev/null
+++ b/final/lib/Analysis/AliasAnalysis.cpp
@@ -0,0 +1,383 @@
+//===- AliasAnalysis.cpp - Generic Alias Analysis Interface Implementation -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic AliasAnalysis interface which is used as the
+// common interface used by all clients and implementations of alias analysis.
+//
+// This file also implements the default version of the AliasAnalysis interface
+// that is to be used when no other implementation is specified.  This does some
+// simple tests that detect obvious cases: two different global pointers cannot
+// alias, a global cannot alias a malloc, two different mallocs cannot alias,
+// etc.
+//
+// This alias analysis implementation really isn't very good for anything, but
+// it is very fast, and makes a nice clean default implementation.  Because it
+// handles lots of little corner cases, other, more complex, alias analysis
+// implementations may choose to rely on this pass to resolve these simple and
+// easy cases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Pass.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+// Register the AliasAnalysis interface, providing a nice name to refer to.
+INITIALIZE_ANALYSIS_GROUP(AliasAnalysis, "Alias Analysis", NoAA)
+char AliasAnalysis::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Default chaining methods
+//===----------------------------------------------------------------------===//
+
+AliasAnalysis::AliasResult
+AliasAnalysis::alias(const Location &LocA, const Location &LocB) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+  return AA->alias(LocA, LocB);
+}
+
+bool AliasAnalysis::pointsToConstantMemory(const Location &Loc,
+                                           bool OrLocal) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+  return AA->pointsToConstantMemory(Loc, OrLocal);
+}
+
+void AliasAnalysis::deleteValue(Value *V) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+  AA->deleteValue(V);
+}
+
+void AliasAnalysis::copyValue(Value *From, Value *To) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+  AA->copyValue(From, To);
+}
+
+void AliasAnalysis::addEscapingUse(Use &U) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+  AA->addEscapingUse(U);
+}
+
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+                             const Location &Loc) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+
+  ModRefBehavior MRB = getModRefBehavior(CS);
+  if (MRB == DoesNotAccessMemory)
+    return NoModRef;
+
+  ModRefResult Mask = ModRef;
+  if (onlyReadsMemory(MRB))
+    Mask = Ref;
+
+  if (onlyAccessesArgPointees(MRB)) {
+    bool doesAlias = false;
+    if (doesAccessArgPointees(MRB))
+      for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+           AI != AE; ++AI)
+        if (!isNoAlias(Location(*AI), Loc)) {
+          doesAlias = true;
+          break;
+        }
+
+    if (!doesAlias)
+      return NoModRef;
+  }
+
+  // If Loc is a constant memory location, the call definitely could not
+  // modify the memory location.
+  if ((Mask & Mod) && pointsToConstantMemory(Loc))
+    Mask = ModRefResult(Mask & ~Mod);
+
+  // If this is the end of the chain, don't forward.
+  if (!AA) return Mask;
+
+  // Otherwise, fall back to the next AA in the chain. But we can merge
+  // in any mask we've managed to compute.
+  return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask);
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+
+  // If CS1 or CS2 are readnone, they don't interact.
+  ModRefBehavior CS1B = getModRefBehavior(CS1);
+  if (CS1B == DoesNotAccessMemory) return NoModRef;
+
+  ModRefBehavior CS2B = getModRefBehavior(CS2);
+  if (CS2B == DoesNotAccessMemory) return NoModRef;
+
+  // If they both only read from memory, there is no dependence.
+  if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B))
+    return NoModRef;
+
+  AliasAnalysis::ModRefResult Mask = ModRef;
+
+  // If CS1 only reads memory, the only dependence on CS2 can be
+  // from CS1 reading memory written by CS2.
+  if (onlyReadsMemory(CS1B))
+    Mask = ModRefResult(Mask & Ref);
+
+  // If CS2 only access memory through arguments, accumulate the mod/ref
+  // information from CS1's references to the memory referenced by
+  // CS2's arguments.
+  if (onlyAccessesArgPointees(CS2B)) {
+    AliasAnalysis::ModRefResult R = NoModRef;
+    if (doesAccessArgPointees(CS2B))
+      for (ImmutableCallSite::arg_iterator
+           I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) {
+        R = ModRefResult((R | getModRefInfo(CS1, *I, UnknownSize)) & Mask);
+        if (R == Mask)
+          break;
+      }
+    return R;
+  }
+
+  // If CS1 only accesses memory through arguments, check if CS2 references
+  // any of the memory referenced by CS1's arguments. If not, return NoModRef.
+  if (onlyAccessesArgPointees(CS1B)) {
+    AliasAnalysis::ModRefResult R = NoModRef;
+    if (doesAccessArgPointees(CS1B))
+      for (ImmutableCallSite::arg_iterator
+           I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I)
+        if (getModRefInfo(CS2, *I, UnknownSize) != NoModRef) {
+          R = Mask;
+          break;
+        }
+    if (R == NoModRef)
+      return R;
+  }
+
+  // If this is the end of the chain, don't forward.
+  if (!AA) return Mask;
+
+  // Otherwise, fall back to the next AA in the chain. But we can merge
+  // in any mask we've managed to compute.
+  return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask);
+}
+
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+
+  ModRefBehavior Min = UnknownModRefBehavior;
+
+  // Call back into the alias analysis with the other form of getModRefBehavior
+  // to see if it can give a better response.
+  if (const Function *F = CS.getCalledFunction())
+    Min = getModRefBehavior(F);
+
+  // If this is the end of the chain, don't forward.
+  if (!AA) return Min;
+
+  // Otherwise, fall back to the next AA in the chain. But we can merge
+  // in any result we've managed to compute.
+  return ModRefBehavior(AA->getModRefBehavior(CS) & Min);
+}
+
+AliasAnalysis::ModRefBehavior
+AliasAnalysis::getModRefBehavior(const Function *F) {
+  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!");
+  return AA->getModRefBehavior(F);
+}
+
+//===----------------------------------------------------------------------===//
+// AliasAnalysis non-virtual helper method implementation
+//===----------------------------------------------------------------------===//
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const LoadInst *LI) {
+  return Location(LI->getPointerOperand(),
+                  getTypeStoreSize(LI->getType()),
+                  LI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const StoreInst *SI) {
+  return Location(SI->getPointerOperand(),
+                  getTypeStoreSize(SI->getValueOperand()->getType()),
+                  SI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+AliasAnalysis::Location AliasAnalysis::getLocation(const VAArgInst *VI) {
+  return Location(VI->getPointerOperand(),
+                  UnknownSize,
+                  VI->getMetadata(LLVMContext::MD_tbaa));
+}
+
+
+AliasAnalysis::Location 
+AliasAnalysis::getLocationForSource(const MemTransferInst *MTI) {
+  uint64_t Size = UnknownSize;
+  if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+    Size = C->getValue().getZExtValue();
+
+  // memcpy/memmove can have TBAA tags. For memcpy, they apply
+  // to both the source and the destination.
+  MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
+
+  return Location(MTI->getRawSource(), Size, TBAATag);
+}
+
+AliasAnalysis::Location 
+AliasAnalysis::getLocationForDest(const MemIntrinsic *MTI) {
+  uint64_t Size = UnknownSize;
+  if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+    Size = C->getValue().getZExtValue();
+
+  // memcpy/memmove can have TBAA tags. For memcpy, they apply
+  // to both the source and the destination.
+  MDNode *TBAATag = MTI->getMetadata(LLVMContext::MD_tbaa);
+  
+  return Location(MTI->getRawDest(), Size, TBAATag);
+}
+
+
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const LoadInst *L, const Location &Loc) {
+  // Be conservative in the face of volatile.
+  if (L->isVolatile())
+    return ModRef;
+
+  // If the load address doesn't alias the given address, it doesn't read
+  // or write the specified memory.
+  if (!alias(getLocation(L), Loc))
+    return NoModRef;
+
+  // Otherwise, a load just reads.
+  return Ref;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const StoreInst *S, const Location &Loc) {
+  // Be conservative in the face of volatile.
+  if (S->isVolatile())
+    return ModRef;
+
+  // If the store address cannot alias the pointer in question, then the
+  // specified memory cannot be modified by the store.
+  if (!alias(getLocation(S), Loc))
+    return NoModRef;
+
+  // If the pointer is a pointer to constant memory, then it could not have been
+  // modified by this store.
+  if (pointsToConstantMemory(Loc))
+    return NoModRef;
+
+  // Otherwise, a store just writes.
+  return Mod;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysis::getModRefInfo(const VAArgInst *V, const Location &Loc) {
+  // If the va_arg address cannot alias the pointer in question, then the
+  // specified memory cannot be accessed by the va_arg.
+  if (!alias(getLocation(V), Loc))
+    return NoModRef;
+
+  // If the pointer is a pointer to constant memory, then it could not have been
+  // modified by this va_arg.
+  if (pointsToConstantMemory(Loc))
+    return NoModRef;
+
+  // Otherwise, a va_arg reads and writes.
+  return ModRef;
+}
+
+// AliasAnalysis destructor: DO NOT move this to the header file for
+// AliasAnalysis or else clients of the AliasAnalysis class may not depend on
+// the AliasAnalysis.o file in the current .a file, causing alias analysis
+// support to not be included in the tool correctly!
+//
+AliasAnalysis::~AliasAnalysis() {}
+
+/// InitializeAliasAnalysis - Subclasses must call this method to initialize the
+/// AliasAnalysis interface before any other methods are called.
+///
+void AliasAnalysis::InitializeAliasAnalysis(Pass *P) {
+  TD = P->getAnalysisIfAvailable<TargetData>();
+  AA = &P->getAnalysis<AliasAnalysis>();
+}
+
+// getAnalysisUsage - All alias analysis implementations should invoke this
+// directly (using AliasAnalysis::getAnalysisUsage(AU)).
+void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AliasAnalysis>();         // All AA's chain
+}
+
+/// getTypeStoreSize - Return the TargetData store size for the given type,
+/// if known, or a conservative value otherwise.
+///
+uint64_t AliasAnalysis::getTypeStoreSize(const Type *Ty) {
+  return TD ? TD->getTypeStoreSize(Ty) : UnknownSize;
+}
+
+/// canBasicBlockModify - Return true if it is possible for execution of the
+/// specified basic block to modify the value pointed to by Ptr.
+///
+bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,
+                                        const Location &Loc) {
+  return canInstructionRangeModify(BB.front(), BB.back(), Loc);
+}
+
+/// canInstructionRangeModify - Return true if it is possible for the execution
+/// of the specified instructions to modify the value pointed to by Ptr.  The
+/// instructions to consider are all of the instructions in the range of [I1,I2]
+/// INCLUSIVE.  I1 and I2 must be in the same basic block.
+///
+bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1,
+                                              const Instruction &I2,
+                                              const Location &Loc) {
+  assert(I1.getParent() == I2.getParent() &&
+         "Instructions not in same basic block!");
+  BasicBlock::const_iterator I = &I1;
+  BasicBlock::const_iterator E = &I2;
+  ++E;  // Convert from inclusive to exclusive range.
+
+  for (; I != E; ++I) // Check every instruction in range
+    if (getModRefInfo(I, Loc) & Mod)
+      return true;
+  return false;
+}
+
+/// isNoAliasCall - Return true if this pointer is returned by a noalias
+/// function.
+bool llvm::isNoAliasCall(const Value *V) {
+  if (isa<CallInst>(V) || isa<InvokeInst>(V))
+    return ImmutableCallSite(cast<Instruction>(V))
+      .paramHasAttr(0, Attribute::NoAlias);
+  return false;
+}
+
+/// isIdentifiedObject - Return true if this pointer refers to a distinct and
+/// identifiable object.  This returns true for:
+///    Global Variables and Functions (but not Global Aliases)
+///    Allocas and Mallocs
+///    ByVal and NoAlias Arguments
+///    NoAlias returns
+///
+bool llvm::isIdentifiedObject(const Value *V) {
+  if (isa<AllocaInst>(V))
+    return true;
+  if (isa<GlobalValue>(V) && !isa<GlobalAlias>(V))
+    return true;
+  if (isNoAliasCall(V))
+    return true;
+  if (const Argument *A = dyn_cast<Argument>(V))
+    return A->hasNoAliasAttr() || A->hasByValAttr();
+  return false;
+}
diff --git a/final/lib/Analysis/AliasAnalysisCounter.cpp b/final/lib/Analysis/AliasAnalysisCounter.cpp
new file mode 100644
index 00000000000..d947220e078
--- /dev/null
+++ b/final/lib/Analysis/AliasAnalysisCounter.cpp
@@ -0,0 +1,173 @@
+//===- AliasAnalysisCounter.cpp - Alias Analysis Query Counter ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass which can be used to count how many alias queries
+// are being made and how the alias analysis implementation being used responds.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true));
+static cl::opt<bool>
+PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden);
+
+namespace {
+  class AliasAnalysisCounter : public ModulePass, public AliasAnalysis {
+    unsigned No, May, Partial, Must;
+    unsigned NoMR, JustRef, JustMod, MR;
+    Module *M;
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    AliasAnalysisCounter() : ModulePass(ID) {
+      initializeAliasAnalysisCounterPass(*PassRegistry::getPassRegistry());
+      No = May = Partial = Must = 0;
+      NoMR = JustRef = JustMod = MR = 0;
+    }
+
+    void printLine(const char *Desc, unsigned Val, unsigned Sum) {
+      errs() <<  "  " << Val << " " << Desc << " responses ("
+             << Val*100/Sum << "%)\n";
+    }
+    ~AliasAnalysisCounter() {
+      unsigned AASum = No+May+Partial+Must;
+      unsigned MRSum = NoMR+JustRef+JustMod+MR;
+      if (AASum + MRSum) { // Print a report if any counted queries occurred...
+        errs() << "\n===== Alias Analysis Counter Report =====\n"
+               << "  Analysis counted:\n"
+               << "  " << AASum << " Total Alias Queries Performed\n";
+        if (AASum) {
+          printLine("no alias",     No, AASum);
+          printLine("may alias",   May, AASum);
+          printLine("partial alias", Partial, AASum);
+          printLine("must alias", Must, AASum);
+          errs() << "  Alias Analysis Counter Summary: " << No*100/AASum << "%/"
+                 << May*100/AASum << "%/"
+                 << Partial*100/AASum << "%/"
+                 << Must*100/AASum<<"%\n\n";
+        }
+
+        errs() << "  " << MRSum    << " Total Mod/Ref Queries Performed\n";
+        if (MRSum) {
+          printLine("no mod/ref",    NoMR, MRSum);
+          printLine("ref",        JustRef, MRSum);
+          printLine("mod",        JustMod, MRSum);
+          printLine("mod/ref",         MR, MRSum);
+          errs() << "  Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum
+                 << "%/" << JustRef*100/MRSum << "%/" << JustMod*100/MRSum
+                 << "%/" << MR*100/MRSum <<"%\n\n";
+        }
+      }
+    }
+
+    bool runOnModule(Module &M) {
+      this->M = &M;
+      InitializeAliasAnalysis(this);
+      return false;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AliasAnalysis::getAnalysisUsage(AU);
+      AU.addRequired<AliasAnalysis>();
+      AU.setPreservesAll();
+    }
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+    
+    // FIXME: We could count these too...
+    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+      return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal);
+    }
+
+    // Forwarding functions: just delegate to a real AA implementation, counting
+    // the number of responses...
+    AliasResult alias(const Location &LocA, const Location &LocB);
+
+    ModRefResult getModRefInfo(ImmutableCallSite CS,
+                               const Location &Loc);
+    ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                               ImmutableCallSite CS2) {
+      return AliasAnalysis::getModRefInfo(CS1,CS2);
+    }
+  };
+}
+
+char AliasAnalysisCounter::ID = 0;
+INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa",
+                   "Count Alias Analysis Query Responses", false, true, false)
+
+ModulePass *llvm::createAliasAnalysisCounterPass() {
+  return new AliasAnalysisCounter();
+}
+
+AliasAnalysis::AliasResult
+AliasAnalysisCounter::alias(const Location &LocA, const Location &LocB) {
+  AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB);
+
+  const char *AliasString;
+  switch (R) {
+  default: llvm_unreachable("Unknown alias type!");
+  case NoAlias:   No++;   AliasString = "No alias"; break;
+  case MayAlias:  May++;  AliasString = "May alias"; break;
+  case PartialAlias: Partial++; AliasString = "Partial alias"; break;
+  case MustAlias: Must++; AliasString = "Must alias"; break;
+  }
+
+  if (PrintAll || (PrintAllFailures && R == MayAlias)) {
+    errs() << AliasString << ":\t";
+    errs() << "[" << LocA.Size << "B] ";
+    WriteAsOperand(errs(), LocA.Ptr, true, M);
+    errs() << ", ";
+    errs() << "[" << LocB.Size << "B] ";
+    WriteAsOperand(errs(), LocB.Ptr, true, M);
+    errs() << "\n";
+  }
+
+  return R;
+}
+
+AliasAnalysis::ModRefResult
+AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS,
+                                    const Location &Loc) {
+  ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc);
+
+  const char *MRString;
+  switch (R) {
+  default:       llvm_unreachable("Unknown mod/ref type!");
+  case NoModRef: NoMR++;     MRString = "NoModRef"; break;
+  case Ref:      JustRef++;  MRString = "JustRef"; break;
+  case Mod:      JustMod++;  MRString = "JustMod"; break;
+  case ModRef:   MR++;       MRString = "ModRef"; break;
+  }
+
+  if (PrintAll || (PrintAllFailures && R == ModRef)) {
+    errs() << MRString << ":  Ptr: ";
+    errs() << "[" << Loc.Size << "B] ";
+    WriteAsOperand(errs(), Loc.Ptr, true, M);
+    errs() << "\t<->" << *CS.getInstruction() << '\n';
+  }
+  return R;
+}
diff --git a/final/lib/Analysis/AliasAnalysisEvaluator.cpp b/final/lib/Analysis/AliasAnalysisEvaluator.cpp
new file mode 100644
index 00000000000..1afc1b71d93
--- /dev/null
+++ b/final/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -0,0 +1,304 @@
+//===- AliasAnalysisEvaluator.cpp - Alias Analysis Accuracy Evaluator -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple N^2 alias analysis accuracy evaluator.
+// Basically, for each function in the program, it simply queries to see how the
+// alias analysis implementation answers alias queries between each pair of
+// pointers in the function.
+//
+// This is inspired and adapted from code by: Naveen Neelakantam, Francesco
+// Spadini, and Wojciech Stryjewski.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+using namespace llvm;
+
+static cl::opt<bool> PrintAll("print-all-alias-modref-info", cl::ReallyHidden);
+
+static cl::opt<bool> PrintNoAlias("print-no-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintMayAlias("print-may-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintPartialAlias("print-partial-aliases", cl::ReallyHidden);
+static cl::opt<bool> PrintMustAlias("print-must-aliases", cl::ReallyHidden);
+
+static cl::opt<bool> PrintNoModRef("print-no-modref", cl::ReallyHidden);
+static cl::opt<bool> PrintMod("print-mod", cl::ReallyHidden);
+static cl::opt<bool> PrintRef("print-ref", cl::ReallyHidden);
+static cl::opt<bool> PrintModRef("print-modref", cl::ReallyHidden);
+
+namespace {
+  class AAEval : public FunctionPass {
+    unsigned NoAlias, MayAlias, PartialAlias, MustAlias;
+    unsigned NoModRef, Mod, Ref, ModRef;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    AAEval() : FunctionPass(ID) {
+      initializeAAEvalPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<AliasAnalysis>();
+      AU.setPreservesAll();
+    }
+
+    bool doInitialization(Module &M) {
+      NoAlias = MayAlias = PartialAlias = MustAlias = 0;
+      NoModRef = Mod = Ref = ModRef = 0;
+
+      if (PrintAll) {
+        PrintNoAlias = PrintMayAlias = true;
+        PrintPartialAlias = PrintMustAlias = true;
+        PrintNoModRef = PrintMod = PrintRef = PrintModRef = true;
+      }
+      return false;
+    }
+
+    bool runOnFunction(Function &F);
+    bool doFinalization(Module &M);
+  };
+}
+
+char AAEval::ID = 0;
+INITIALIZE_PASS_BEGIN(AAEval, "aa-eval",
+                "Exhaustive Alias Analysis Precision Evaluator", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(AAEval, "aa-eval",
+                "Exhaustive Alias Analysis Precision Evaluator", false, true)
+
+FunctionPass *llvm::createAAEvalPass() { return new AAEval(); }
+
+static void PrintResults(const char *Msg, bool P, const Value *V1,
+                         const Value *V2, const Module *M) {
+  if (P) {
+    std::string o1, o2;
+    {
+      raw_string_ostream os1(o1), os2(o2);
+      WriteAsOperand(os1, V1, true, M);
+      WriteAsOperand(os2, V2, true, M);
+    }
+    
+    if (o2 < o1)
+      std::swap(o1, o2);
+    errs() << "  " << Msg << ":\t"
+           << o1 << ", "
+           << o2 << "\n";
+  }
+}
+
+static inline void
+PrintModRefResults(const char *Msg, bool P, Instruction *I, Value *Ptr,
+                   Module *M) {
+  if (P) {
+    errs() << "  " << Msg << ":  Ptr: ";
+    WriteAsOperand(errs(), Ptr, true, M);
+    errs() << "\t<->" << *I << '\n';
+  }
+}
+
+static inline void
+PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB,
+                   Module *M) {
+  if (P) {
+    errs() << "  " << Msg << ": " << *CSA.getInstruction()
+           << " <-> " << *CSB.getInstruction() << '\n';
+  }
+}
+
+static inline bool isInterestingPointer(Value *V) {
+  return V->getType()->isPointerTy()
+      && !isa<ConstantPointerNull>(V);
+}
+
+bool AAEval::runOnFunction(Function &F) {
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+  SetVector<Value *> Pointers;
+  SetVector<CallSite> CallSites;
+
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+    if (I->getType()->isPointerTy())    // Add all pointer arguments.
+      Pointers.insert(I);
+
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+    if (I->getType()->isPointerTy()) // Add all pointer instructions.
+      Pointers.insert(&*I);
+    Instruction &Inst = *I;
+    if (CallSite CS = cast<Value>(&Inst)) {
+      Value *Callee = CS.getCalledValue();
+      // Skip actual functions for direct function calls.
+      if (!isa<Function>(Callee) && isInterestingPointer(Callee))
+        Pointers.insert(Callee);
+      // Consider formals.
+      for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+           AI != AE; ++AI)
+        if (isInterestingPointer(*AI))
+          Pointers.insert(*AI);
+      CallSites.insert(CS);
+    } else {
+      // Consider all operands.
+      for (Instruction::op_iterator OI = Inst.op_begin(), OE = Inst.op_end();
+           OI != OE; ++OI)
+        if (isInterestingPointer(*OI))
+          Pointers.insert(*OI);
+    }
+  }
+
+  if (PrintNoAlias || PrintMayAlias || PrintPartialAlias || PrintMustAlias ||
+      PrintNoModRef || PrintMod || PrintRef || PrintModRef)
+    errs() << "Function: " << F.getName() << ": " << Pointers.size()
+           << " pointers, " << CallSites.size() << " call sites\n";
+
+  // iterate over the worklist, and run the full (n^2)/2 disambiguations
+  for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
+       I1 != E; ++I1) {
+    uint64_t I1Size = AliasAnalysis::UnknownSize;
+    const Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
+    if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy);
+
+    for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
+      uint64_t I2Size = AliasAnalysis::UnknownSize;
+      const Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType();
+      if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy);
+
+      switch (AA.alias(*I1, I1Size, *I2, I2Size)) {
+      case AliasAnalysis::NoAlias:
+        PrintResults("NoAlias", PrintNoAlias, *I1, *I2, F.getParent());
+        ++NoAlias; break;
+      case AliasAnalysis::MayAlias:
+        PrintResults("MayAlias", PrintMayAlias, *I1, *I2, F.getParent());
+        ++MayAlias; break;
+      case AliasAnalysis::PartialAlias:
+        PrintResults("PartialAlias", PrintPartialAlias, *I1, *I2,
+                     F.getParent());
+        ++PartialAlias; break;
+      case AliasAnalysis::MustAlias:
+        PrintResults("MustAlias", PrintMustAlias, *I1, *I2, F.getParent());
+        ++MustAlias; break;
+      default:
+        errs() << "Unknown alias query result!\n";
+      }
+    }
+  }
+
+  // Mod/ref alias analysis: compare all pairs of calls and values
+  for (SetVector<CallSite>::iterator C = CallSites.begin(),
+         Ce = CallSites.end(); C != Ce; ++C) {
+    Instruction *I = C->getInstruction();
+
+    for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();
+         V != Ve; ++V) {
+      uint64_t Size = AliasAnalysis::UnknownSize;
+      const Type *ElTy = cast<PointerType>((*V)->getType())->getElementType();
+      if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy);
+
+      switch (AA.getModRefInfo(*C, *V, Size)) {
+      case AliasAnalysis::NoModRef:
+        PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent());
+        ++NoModRef; break;
+      case AliasAnalysis::Mod:
+        PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent());
+        ++Mod; break;
+      case AliasAnalysis::Ref:
+        PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent());
+        ++Ref; break;
+      case AliasAnalysis::ModRef:
+        PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent());
+        ++ModRef; break;
+      default:
+        errs() << "Unknown alias query result!\n";
+      }
+    }
+  }
+
+  // Mod/ref alias analysis: compare all pairs of calls
+  for (SetVector<CallSite>::iterator C = CallSites.begin(),
+         Ce = CallSites.end(); C != Ce; ++C) {
+    for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) {
+      if (D == C)
+        continue;
+      switch (AA.getModRefInfo(*C, *D)) {
+      case AliasAnalysis::NoModRef:
+        PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());
+        ++NoModRef; break;
+      case AliasAnalysis::Mod:
+        PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());
+        ++Mod; break;
+      case AliasAnalysis::Ref:
+        PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());
+        ++Ref; break;
+      case AliasAnalysis::ModRef:
+        PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());
+        ++ModRef; break;
+      }
+    }
+  }
+
+  return false;
+}
+
+static void PrintPercent(unsigned Num, unsigned Sum) {
+  errs() << "(" << Num*100ULL/Sum << "."
+         << ((Num*1000ULL/Sum) % 10) << "%)\n";
+}
+
+bool AAEval::doFinalization(Module &M) {
+  unsigned AliasSum = NoAlias + MayAlias + PartialAlias + MustAlias;
+  errs() << "===== Alias Analysis Evaluator Report =====\n";
+  if (AliasSum == 0) {
+    errs() << "  Alias Analysis Evaluator Summary: No pointers!\n";
+  } else {
+    errs() << "  " << AliasSum << " Total Alias Queries Performed\n";
+    errs() << "  " << NoAlias << " no alias responses ";
+    PrintPercent(NoAlias, AliasSum);
+    errs() << "  " << MayAlias << " may alias responses ";
+    PrintPercent(MayAlias, AliasSum);
+    errs() << "  " << PartialAlias << " partial alias responses ";
+    PrintPercent(PartialAlias, AliasSum);
+    errs() << "  " << MustAlias << " must alias responses ";
+    PrintPercent(MustAlias, AliasSum);
+    errs() << "  Alias Analysis Evaluator Pointer Alias Summary: "
+           << NoAlias*100/AliasSum  << "%/" << MayAlias*100/AliasSum << "%/"
+           << PartialAlias*100/AliasSum << "%/"
+           << MustAlias*100/AliasSum << "%\n";
+  }
+
+  // Display the summary for mod/ref analysis
+  unsigned ModRefSum = NoModRef + Mod + Ref + ModRef;
+  if (ModRefSum == 0) {
+    errs() << "  Alias Analysis Mod/Ref Evaluator Summary: no mod/ref!\n";
+  } else {
+    errs() << "  " << ModRefSum << " Total ModRef Queries Performed\n";
+    errs() << "  " << NoModRef << " no mod/ref responses ";
+    PrintPercent(NoModRef, ModRefSum);
+    errs() << "  " << Mod << " mod responses ";
+    PrintPercent(Mod, ModRefSum);
+    errs() << "  " << Ref << " ref responses ";
+    PrintPercent(Ref, ModRefSum);
+    errs() << "  " << ModRef << " mod & ref responses ";
+    PrintPercent(ModRef, ModRefSum);
+    errs() << "  Alias Analysis Evaluator Mod/Ref Summary: "
+           << NoModRef*100/ModRefSum  << "%/" << Mod*100/ModRefSum << "%/"
+           << Ref*100/ModRefSum << "%/" << ModRef*100/ModRefSum << "%\n";
+  }
+
+  return false;
+}
diff --git a/final/lib/Analysis/AliasDebugger.cpp b/final/lib/Analysis/AliasDebugger.cpp
new file mode 100644
index 00000000000..f15c05153e1
--- /dev/null
+++ b/final/lib/Analysis/AliasDebugger.cpp
@@ -0,0 +1,138 @@
+//===- AliasDebugger.cpp - Simple Alias Analysis Use Checker --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass checks alias analysis users to ensure that if they
+// create a new value, they do not query AA without informing it of the value.
+// It acts as a shim over any other AA pass you want.
+//
+// Yes keeping track of every value in the program is expensive, but this is 
+// a debugging pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include <set>
+using namespace llvm;
+
+namespace {
+  
+  class AliasDebugger : public ModulePass, public AliasAnalysis {
+
+    //What we do is simple.  Keep track of every value the AA could
+    //know about, and verify that queries are one of those.
+    //A query to a value that didn't exist when the AA was created
+    //means someone forgot to update the AA when creating new values
+
+    std::set<const Value*> Vals;
+    
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    AliasDebugger() : ModulePass(ID) {
+      initializeAliasDebuggerPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnModule(Module &M) {
+      InitializeAliasAnalysis(this);                 // set up super class
+
+      for(Module::global_iterator I = M.global_begin(),
+            E = M.global_end(); I != E; ++I) {
+        Vals.insert(&*I);
+        for (User::const_op_iterator OI = I->op_begin(),
+             OE = I->op_end(); OI != OE; ++OI)
+          Vals.insert(*OI);
+      }
+
+      for(Module::iterator I = M.begin(),
+            E = M.end(); I != E; ++I){
+        Vals.insert(&*I);
+        if(!I->isDeclaration()) {
+          for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end();
+               AI != AE; ++AI) 
+            Vals.insert(&*AI);     
+          for (Function::const_iterator FI = I->begin(), FE = I->end();
+               FI != FE; ++FI) 
+            for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end();
+                 BI != BE; ++BI) {
+              Vals.insert(&*BI);
+              for (User::const_op_iterator OI = BI->op_begin(),
+                   OE = BI->op_end(); OI != OE; ++OI)
+                Vals.insert(*OI);
+            }
+        }
+        
+      }
+      return false;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AliasAnalysis::getAnalysisUsage(AU);
+      AU.setPreservesAll();                         // Does not transform code
+    }
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+    
+    //------------------------------------------------
+    // Implement the AliasAnalysis API
+    //
+    AliasResult alias(const Location &LocA, const Location &LocB) {
+      assert(Vals.find(LocA.Ptr) != Vals.end() &&
+             "Never seen value in AA before");
+      assert(Vals.find(LocB.Ptr) != Vals.end() &&
+             "Never seen value in AA before");
+      return AliasAnalysis::alias(LocA, LocB);
+    }
+
+    ModRefResult getModRefInfo(ImmutableCallSite CS,
+                               const Location &Loc) {
+      assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
+      return AliasAnalysis::getModRefInfo(CS, Loc);
+    }
+
+    ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                               ImmutableCallSite CS2) {
+      return AliasAnalysis::getModRefInfo(CS1,CS2);
+    }
+    
+    bool pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+      assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before");
+      return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+    }
+
+    virtual void deleteValue(Value *V) {
+      assert(Vals.find(V) != Vals.end() && "Never seen value in AA before");
+      AliasAnalysis::deleteValue(V);
+    }
+    virtual void copyValue(Value *From, Value *To) {
+      Vals.insert(To);
+      AliasAnalysis::copyValue(From, To);
+    }
+
+  };
+}
+
+char AliasDebugger::ID = 0;
+INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa",
+                   "AA use debugger", false, true, false)
+
+Pass *llvm::createAliasDebugger() { return new AliasDebugger(); }
+
diff --git a/final/lib/Analysis/AliasSetTracker.cpp b/final/lib/Analysis/AliasSetTracker.cpp
new file mode 100644
index 00000000000..3a46976d66f
--- /dev/null
+++ b/final/lib/Analysis/AliasSetTracker.cpp
@@ -0,0 +1,648 @@
+//===- AliasSetTracker.cpp - Alias Sets Tracker implementation-------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AliasSetTracker and AliasSet classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// mergeSetIn - Merge the specified alias set into this alias set.
+///
+void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
+  assert(!AS.Forward && "Alias set is already forwarding!");
+  assert(!Forward && "This set is a forwarding set!!");
+
+  // Update the alias and access types of this set...
+  AccessTy |= AS.AccessTy;
+  AliasTy  |= AS.AliasTy;
+  Volatile |= AS.Volatile;
+
+  if (AliasTy == MustAlias) {
+    // Check that these two merged sets really are must aliases.  Since both
+    // used to be must-alias sets, we can just check any pointer from each set
+    // for aliasing.
+    AliasAnalysis &AA = AST.getAliasAnalysis();
+    PointerRec *L = getSomePointer();
+    PointerRec *R = AS.getSomePointer();
+
+    // If the pointers are not a must-alias pair, this set becomes a may alias.
+    if (AA.alias(AliasAnalysis::Location(L->getValue(),
+                                         L->getSize(),
+                                         L->getTBAAInfo()),
+                 AliasAnalysis::Location(R->getValue(),
+                                         R->getSize(),
+                                         R->getTBAAInfo()))
+        != AliasAnalysis::MustAlias)
+      AliasTy = MayAlias;
+  }
+
+  if (CallSites.empty()) {            // Merge call sites...
+    if (!AS.CallSites.empty())
+      std::swap(CallSites, AS.CallSites);
+  } else if (!AS.CallSites.empty()) {
+    CallSites.insert(CallSites.end(), AS.CallSites.begin(), AS.CallSites.end());
+    AS.CallSites.clear();
+  }
+
+  AS.Forward = this;  // Forward across AS now...
+  addRef();           // AS is now pointing to us...
+
+  // Merge the list of constituent pointers...
+  if (AS.PtrList) {
+    *PtrListEnd = AS.PtrList;
+    AS.PtrList->setPrevInList(PtrListEnd);
+    PtrListEnd = AS.PtrListEnd;
+
+    AS.PtrList = 0;
+    AS.PtrListEnd = &AS.PtrList;
+    assert(*AS.PtrListEnd == 0 && "End of list is not null?");
+  }
+}
+
+void AliasSetTracker::removeAliasSet(AliasSet *AS) {
+  if (AliasSet *Fwd = AS->Forward) {
+    Fwd->dropRef(*this);
+    AS->Forward = 0;
+  }
+  AliasSets.erase(AS);
+}
+
+void AliasSet::removeFromTracker(AliasSetTracker &AST) {
+  assert(RefCount == 0 && "Cannot remove non-dead alias set from tracker!");
+  AST.removeAliasSet(this);
+}
+
+void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
+                          uint64_t Size, const MDNode *TBAAInfo,
+                          bool KnownMustAlias) {
+  assert(!Entry.hasAliasSet() && "Entry already in set!");
+
+  // Check to see if we have to downgrade to _may_ alias.
+  if (isMustAlias() && !KnownMustAlias)
+    if (PointerRec *P = getSomePointer()) {
+      AliasAnalysis &AA = AST.getAliasAnalysis();
+      AliasAnalysis::AliasResult Result =
+        AA.alias(AliasAnalysis::Location(P->getValue(), P->getSize(),
+                                         P->getTBAAInfo()),
+                 AliasAnalysis::Location(Entry.getValue(), Size, TBAAInfo));
+      if (Result != AliasAnalysis::MustAlias)
+        AliasTy = MayAlias;
+      else                  // First entry of must alias must have maximum size!
+        P->updateSizeAndTBAAInfo(Size, TBAAInfo);
+      assert(Result != AliasAnalysis::NoAlias && "Cannot be part of must set!");
+    }
+
+  Entry.setAliasSet(this);
+  Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
+
+  // Add it to the end of the list...
+  assert(*PtrListEnd == 0 && "End of list is not null?");
+  *PtrListEnd = &Entry;
+  PtrListEnd = Entry.setPrevInList(PtrListEnd);
+  assert(*PtrListEnd == 0 && "End of list is not null?");
+  addRef();               // Entry points to alias set.
+}
+
+void AliasSet::addCallSite(CallSite CS, AliasAnalysis &AA) {
+  CallSites.push_back(CS.getInstruction());
+
+  AliasAnalysis::ModRefBehavior Behavior = AA.getModRefBehavior(CS);
+  if (Behavior == AliasAnalysis::DoesNotAccessMemory)
+    return;
+  if (AliasAnalysis::onlyReadsMemory(Behavior)) {
+    AliasTy = MayAlias;
+    AccessTy |= Refs;
+    return;
+  }
+
+  // FIXME: This should use mod/ref information to make this not suck so bad
+  AliasTy = MayAlias;
+  AccessTy = ModRef;
+}
+
+/// aliasesPointer - Return true if the specified pointer "may" (or must)
+/// alias one of the members in the set.
+///
+bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
+                              const MDNode *TBAAInfo,
+                              AliasAnalysis &AA) const {
+  if (AliasTy == MustAlias) {
+    assert(CallSites.empty() && "Illegal must alias set!");
+
+    // If this is a set of MustAliases, only check to see if the pointer aliases
+    // SOME value in the set.
+    PointerRec *SomePtr = getSomePointer();
+    assert(SomePtr && "Empty must-alias set??");
+    return AA.alias(AliasAnalysis::Location(SomePtr->getValue(),
+                                            SomePtr->getSize(),
+                                            SomePtr->getTBAAInfo()),
+                    AliasAnalysis::Location(Ptr, Size, TBAAInfo));
+  }
+
+  // If this is a may-alias set, we have to check all of the pointers in the set
+  // to be sure it doesn't alias the set...
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    if (AA.alias(AliasAnalysis::Location(Ptr, Size, TBAAInfo),
+                 AliasAnalysis::Location(I.getPointer(), I.getSize(),
+                                         I.getTBAAInfo())))
+      return true;
+
+  // Check the call sites list and invoke list...
+  if (!CallSites.empty()) {
+    for (unsigned i = 0, e = CallSites.size(); i != e; ++i)
+      if (AA.getModRefInfo(CallSites[i],
+                           AliasAnalysis::Location(Ptr, Size, TBAAInfo)) !=
+            AliasAnalysis::NoModRef)
+        return true;
+  }
+
+  return false;
+}
+
+bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const {
+  if (AA.doesNotAccessMemory(CS))
+    return false;
+
+  for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
+    if (AA.getModRefInfo(getCallSite(i), CS) != AliasAnalysis::NoModRef ||
+        AA.getModRefInfo(CS, getCallSite(i)) != AliasAnalysis::NoModRef)
+      return true;
+  }
+
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    if (AA.getModRefInfo(CS, I.getPointer(), I.getSize()) !=
+           AliasAnalysis::NoModRef)
+      return true;
+
+  return false;
+}
+
+void AliasSetTracker::clear() {
+  // Delete all the PointerRec entries.
+  for (PointerMapType::iterator I = PointerMap.begin(), E = PointerMap.end();
+       I != E; ++I)
+    I->second->eraseFromList();
+  
+  PointerMap.clear();
+  
+  // The alias sets should all be clear now.
+  AliasSets.clear();
+}
+
+
+/// findAliasSetForPointer - Given a pointer, find the one alias set to put the
+/// instruction referring to the pointer into.  If there are multiple alias sets
+/// that may alias the pointer, merge them together and return the unified set.
+///
+AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,
+                                                  uint64_t Size,
+                                                  const MDNode *TBAAInfo) {
+  AliasSet *FoundSet = 0;
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    if (I->Forward || !I->aliasesPointer(Ptr, Size, TBAAInfo, AA)) continue;
+    
+    if (FoundSet == 0) {  // If this is the first alias set ptr can go into.
+      FoundSet = I;       // Remember it.
+    } else {              // Otherwise, we must merge the sets.
+      FoundSet->mergeSetIn(*I, *this);     // Merge in contents.
+    }
+  }
+
+  return FoundSet;
+}
+
+/// containsPointer - Return true if the specified location is represented by
+/// this alias set, false otherwise.  This does not modify the AST object or
+/// alias sets.
+bool AliasSetTracker::containsPointer(Value *Ptr, uint64_t Size,
+                                      const MDNode *TBAAInfo) const {
+  for (const_iterator I = begin(), E = end(); I != E; ++I)
+    if (!I->Forward && I->aliasesPointer(Ptr, Size, TBAAInfo, AA))
+      return true;
+  return false;
+}
+
+
+
+AliasSet *AliasSetTracker::findAliasSetForCallSite(CallSite CS) {
+  AliasSet *FoundSet = 0;
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    if (I->Forward || !I->aliasesCallSite(CS, AA))
+      continue;
+    
+    if (FoundSet == 0)        // If this is the first alias set ptr can go into.
+      FoundSet = I;           // Remember it.
+    else if (!I->Forward)     // Otherwise, we must merge the sets.
+      FoundSet->mergeSetIn(*I, *this);     // Merge in contents.
+  }
+  return FoundSet;
+}
+
+
+
+
+/// getAliasSetForPointer - Return the alias set that the specified pointer
+/// lives in.
+AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
+                                                 const MDNode *TBAAInfo,
+                                                 bool *New) {
+  AliasSet::PointerRec &Entry = getEntryFor(Pointer);
+
+  // Check to see if the pointer is already known.
+  if (Entry.hasAliasSet()) {
+    Entry.updateSizeAndTBAAInfo(Size, TBAAInfo);
+    // Return the set!
+    return *Entry.getAliasSet(*this)->getForwardedTarget(*this);
+  }
+  
+  if (AliasSet *AS = findAliasSetForPointer(Pointer, Size, TBAAInfo)) {
+    // Add it to the alias set it aliases.
+    AS->addPointer(*this, Entry, Size, TBAAInfo);
+    return *AS;
+  }
+  
+  if (New) *New = true;
+  // Otherwise create a new alias set to hold the loaded pointer.
+  AliasSets.push_back(new AliasSet());
+  AliasSets.back().addPointer(*this, Entry, Size, TBAAInfo);
+  return AliasSets.back();
+}
+
+bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
+  bool NewPtr;
+  addPointer(Ptr, Size, TBAAInfo, AliasSet::NoModRef, NewPtr);
+  return NewPtr;
+}
+
+
+bool AliasSetTracker::add(LoadInst *LI) {
+  bool NewPtr;
+  AliasSet &AS = addPointer(LI->getOperand(0),
+                            AA.getTypeStoreSize(LI->getType()),
+                            LI->getMetadata(LLVMContext::MD_tbaa),
+                            AliasSet::Refs, NewPtr);
+  if (LI->isVolatile()) AS.setVolatile();
+  return NewPtr;
+}
+
+bool AliasSetTracker::add(StoreInst *SI) {
+  bool NewPtr;
+  Value *Val = SI->getOperand(0);
+  AliasSet &AS = addPointer(SI->getOperand(1),
+                            AA.getTypeStoreSize(Val->getType()),
+                            SI->getMetadata(LLVMContext::MD_tbaa),
+                            AliasSet::Mods, NewPtr);
+  if (SI->isVolatile()) AS.setVolatile();
+  return NewPtr;
+}
+
+bool AliasSetTracker::add(VAArgInst *VAAI) {
+  bool NewPtr;
+  addPointer(VAAI->getOperand(0), AliasAnalysis::UnknownSize, 
+             VAAI->getMetadata(LLVMContext::MD_tbaa),
+             AliasSet::ModRef, NewPtr);
+  return NewPtr;
+}
+
+
+bool AliasSetTracker::add(CallSite CS) {
+  if (isa<DbgInfoIntrinsic>(CS.getInstruction())) 
+    return true; // Ignore DbgInfo Intrinsics.
+  if (AA.doesNotAccessMemory(CS))
+    return true; // doesn't alias anything
+
+  AliasSet *AS = findAliasSetForCallSite(CS);
+  if (AS) {
+    AS->addCallSite(CS, AA);
+    return false;
+  }
+  AliasSets.push_back(new AliasSet());
+  AS = &AliasSets.back();
+  AS->addCallSite(CS, AA);
+  return true;
+}
+
+bool AliasSetTracker::add(Instruction *I) {
+  // Dispatch to one of the other add methods.
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
+    return add(LI);
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return add(SI);
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    return add(CI);
+  if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+    return add(II);
+  if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+    return add(VAAI);
+  return true;
+}
+
+void AliasSetTracker::add(BasicBlock &BB) {
+  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
+    add(I);
+}
+
+void AliasSetTracker::add(const AliasSetTracker &AST) {
+  assert(&AA == &AST.AA &&
+         "Merging AliasSetTracker objects with different Alias Analyses!");
+
+  // Loop over all of the alias sets in AST, adding the pointers contained
+  // therein into the current alias sets.  This can cause alias sets to be
+  // merged together in the current AST.
+  for (const_iterator I = AST.begin(), E = AST.end(); I != E; ++I) {
+    if (I->Forward) continue;   // Ignore forwarding alias sets
+    
+    AliasSet &AS = const_cast<AliasSet&>(*I);
+
+    // If there are any call sites in the alias set, add them to this AST.
+    for (unsigned i = 0, e = AS.CallSites.size(); i != e; ++i)
+      add(AS.CallSites[i]);
+
+    // Loop over all of the pointers in this alias set.
+    bool X;
+    for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+      AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(),
+                                   ASI.getTBAAInfo(),
+                                   (AliasSet::AccessType)AS.AccessTy, X);
+      if (AS.isVolatile()) NewAS.setVolatile();
+    }
+  }
+}
+
+/// remove - Remove the specified (potentially non-empty) alias set from the
+/// tracker.
+void AliasSetTracker::remove(AliasSet &AS) {
+  // Drop all call sites.
+  AS.CallSites.clear();
+  
+  // Clear the alias set.
+  unsigned NumRefs = 0;
+  while (!AS.empty()) {
+    AliasSet::PointerRec *P = AS.PtrList;
+
+    Value *ValToRemove = P->getValue();
+    
+    // Unlink and delete entry from the list of values.
+    P->eraseFromList();
+    
+    // Remember how many references need to be dropped.
+    ++NumRefs;
+
+    // Finally, remove the entry.
+    PointerMap.erase(ValToRemove);
+  }
+  
+  // Stop using the alias set, removing it.
+  AS.RefCount -= NumRefs;
+  if (AS.RefCount == 0)
+    AS.removeFromTracker(*this);
+}
+
+bool
+AliasSetTracker::remove(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) {
+  AliasSet *AS = findAliasSetForPointer(Ptr, Size, TBAAInfo);
+  if (!AS) return false;
+  remove(*AS);
+  return true;
+}
+
+bool AliasSetTracker::remove(LoadInst *LI) {
+  uint64_t Size = AA.getTypeStoreSize(LI->getType());
+  const MDNode *TBAAInfo = LI->getMetadata(LLVMContext::MD_tbaa);
+  AliasSet *AS = findAliasSetForPointer(LI->getOperand(0), Size, TBAAInfo);
+  if (!AS) return false;
+  remove(*AS);
+  return true;
+}
+
+bool AliasSetTracker::remove(StoreInst *SI) {
+  uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType());
+  const MDNode *TBAAInfo = SI->getMetadata(LLVMContext::MD_tbaa);
+  AliasSet *AS = findAliasSetForPointer(SI->getOperand(1), Size, TBAAInfo);
+  if (!AS) return false;
+  remove(*AS);
+  return true;
+}
+
+bool AliasSetTracker::remove(VAArgInst *VAAI) {
+  AliasSet *AS = findAliasSetForPointer(VAAI->getOperand(0),
+                                        AliasAnalysis::UnknownSize,
+                                        VAAI->getMetadata(LLVMContext::MD_tbaa));
+  if (!AS) return false;
+  remove(*AS);
+  return true;
+}
+
+bool AliasSetTracker::remove(CallSite CS) {
+  if (AA.doesNotAccessMemory(CS))
+    return false; // doesn't alias anything
+
+  AliasSet *AS = findAliasSetForCallSite(CS);
+  if (!AS) return false;
+  remove(*AS);
+  return true;
+}
+
+bool AliasSetTracker::remove(Instruction *I) {
+  // Dispatch to one of the other remove methods...
+  if (LoadInst *LI = dyn_cast<LoadInst>(I))
+    return remove(LI);
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return remove(SI);
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    return remove(CI);
+  if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
+    return remove(VAAI);
+  return true;
+}
+
+
+// deleteValue method - This method is used to remove a pointer value from the
+// AliasSetTracker entirely.  It should be used when an instruction is deleted
+// from the program to update the AST.  If you don't use this, you would have
+// dangling pointers to deleted instructions.
+//
+void AliasSetTracker::deleteValue(Value *PtrVal) {
+  // Notify the alias analysis implementation that this value is gone.
+  AA.deleteValue(PtrVal);
+
+  // If this is a call instruction, remove the callsite from the appropriate
+  // AliasSet (if present).
+  if (CallSite CS = PtrVal) {
+    if (!AA.doesNotAccessMemory(CS)) {
+      // Scan all the alias sets to see if this call site is contained.
+      for (iterator I = begin(), E = end(); I != E; ++I) {
+        if (I->Forward) continue;
+        
+        I->removeCallSite(CS);
+      }
+    }
+  }
+
+  // First, look up the PointerRec for this pointer.
+  PointerMapType::iterator I = PointerMap.find(PtrVal);
+  if (I == PointerMap.end()) return;  // Noop
+
+  // If we found one, remove the pointer from the alias set it is in.
+  AliasSet::PointerRec *PtrValEnt = I->second;
+  AliasSet *AS = PtrValEnt->getAliasSet(*this);
+
+  // Unlink and delete from the list of values.
+  PtrValEnt->eraseFromList();
+  
+  // Stop using the alias set.
+  AS->dropRef(*this);
+  
+  PointerMap.erase(I);
+}
+
+// copyValue - This method should be used whenever a preexisting value in the
+// program is copied or cloned, introducing a new value.  Note that it is ok for
+// clients that use this method to introduce the same value multiple times: if
+// the tracker already knows about a value, it will ignore the request.
+//
+void AliasSetTracker::copyValue(Value *From, Value *To) {
+  // Notify the alias analysis implementation that this value is copied.
+  AA.copyValue(From, To);
+
+  // First, look up the PointerRec for this pointer.
+  PointerMapType::iterator I = PointerMap.find(From);
+  if (I == PointerMap.end())
+    return;  // Noop
+  assert(I->second->hasAliasSet() && "Dead entry?");
+
+  AliasSet::PointerRec &Entry = getEntryFor(To);
+  if (Entry.hasAliasSet()) return;    // Already in the tracker!
+
+  // Add it to the alias set it aliases...
+  I = PointerMap.find(From);
+  AliasSet *AS = I->second->getAliasSet(*this);
+  AS->addPointer(*this, Entry, I->second->getSize(),
+                 I->second->getTBAAInfo(),
+                 true);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//               AliasSet/AliasSetTracker Printing Support
+//===----------------------------------------------------------------------===//
+
+void AliasSet::print(raw_ostream &OS) const {
+  OS << "  AliasSet[" << (void*)this << ", " << RefCount << "] ";
+  OS << (AliasTy == MustAlias ? "must" : "may") << " alias, ";
+  switch (AccessTy) {
+  case NoModRef: OS << "No access "; break;
+  case Refs    : OS << "Ref       "; break;
+  case Mods    : OS << "Mod       "; break;
+  case ModRef  : OS << "Mod/Ref   "; break;
+  default: llvm_unreachable("Bad value for AccessTy!");
+  }
+  if (isVolatile()) OS << "[volatile] ";
+  if (Forward)
+    OS << " forwarding to " << (void*)Forward;
+
+
+  if (!empty()) {
+    OS << "Pointers: ";
+    for (iterator I = begin(), E = end(); I != E; ++I) {
+      if (I != begin()) OS << ", ";
+      WriteAsOperand(OS << "(", I.getPointer());
+      OS << ", " << I.getSize() << ")";
+    }
+  }
+  if (!CallSites.empty()) {
+    OS << "\n    " << CallSites.size() << " Call Sites: ";
+    for (unsigned i = 0, e = CallSites.size(); i != e; ++i) {
+      if (i) OS << ", ";
+      WriteAsOperand(OS, CallSites[i]);
+    }
+  }
+  OS << "\n";
+}
+
+void AliasSetTracker::print(raw_ostream &OS) const {
+  OS << "Alias Set Tracker: " << AliasSets.size() << " alias sets for "
+     << PointerMap.size() << " pointer values.\n";
+  for (const_iterator I = begin(), E = end(); I != E; ++I)
+    I->print(OS);
+  OS << "\n";
+}
+
+void AliasSet::dump() const { print(dbgs()); }
+void AliasSetTracker::dump() const { print(dbgs()); }
+
+//===----------------------------------------------------------------------===//
+//                     ASTCallbackVH Class Implementation
+//===----------------------------------------------------------------------===//
+
+void AliasSetTracker::ASTCallbackVH::deleted() {
+  assert(AST && "ASTCallbackVH called with a null AliasSetTracker!");
+  AST->deleteValue(getValPtr());
+  // this now dangles!
+}
+
+AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast)
+  : CallbackVH(V), AST(ast) {}
+
+AliasSetTracker::ASTCallbackVH &
+AliasSetTracker::ASTCallbackVH::operator=(Value *V) {
+  return *this = ASTCallbackVH(V, AST);
+}
+
+//===----------------------------------------------------------------------===//
+//                            AliasSetPrinter Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+  class AliasSetPrinter : public FunctionPass {
+    AliasSetTracker *Tracker;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    AliasSetPrinter() : FunctionPass(ID) {
+      initializeAliasSetPrinterPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<AliasAnalysis>();
+    }
+
+    virtual bool runOnFunction(Function &F) {
+      Tracker = new AliasSetTracker(getAnalysis<AliasAnalysis>());
+
+      for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+        Tracker->add(&*I);
+      Tracker->print(errs());
+      delete Tracker;
+      return false;
+    }
+  };
+}
+
+char AliasSetPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets",
+                "Alias Set Printer", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets",
+                "Alias Set Printer", false, true)
diff --git a/final/lib/Analysis/Analysis.cpp b/final/lib/Analysis/Analysis.cpp
new file mode 100644
index 00000000000..74a27643269
--- /dev/null
+++ b/final/lib/Analysis/Analysis.cpp
@@ -0,0 +1,103 @@
+//===-- Analysis.cpp ------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Analysis.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/Verifier.h"
+#include <cstring>
+
+using namespace llvm;
+
+/// initializeAnalysis - Initialize all passes linked into the Analysis library.
+void llvm::initializeAnalysis(PassRegistry &Registry) {
+  initializeAliasAnalysisAnalysisGroup(Registry);
+  initializeAliasAnalysisCounterPass(Registry);
+  initializeAAEvalPass(Registry);
+  initializeAliasDebuggerPass(Registry);
+  initializeAliasSetPrinterPass(Registry);
+  initializeNoAAPass(Registry);
+  initializeBasicAliasAnalysisPass(Registry);
+  initializeCFGViewerPass(Registry);
+  initializeCFGPrinterPass(Registry);
+  initializeCFGOnlyViewerPass(Registry);
+  initializeCFGOnlyPrinterPass(Registry);
+  initializePrintDbgInfoPass(Registry);
+  initializeDominanceFrontierPass(Registry);
+  initializeDomViewerPass(Registry);
+  initializeDomPrinterPass(Registry);
+  initializeDomOnlyViewerPass(Registry);
+  initializePostDomViewerPass(Registry);
+  initializeDomOnlyPrinterPass(Registry);
+  initializePostDomPrinterPass(Registry);
+  initializePostDomOnlyViewerPass(Registry);
+  initializePostDomOnlyPrinterPass(Registry);
+  initializeIVUsersPass(Registry);
+  initializeInstCountPass(Registry);
+  initializeIntervalPartitionPass(Registry);
+  initializeLazyValueInfoPass(Registry);
+  initializeLibCallAliasAnalysisPass(Registry);
+  initializeLintPass(Registry);
+  initializeLoopDependenceAnalysisPass(Registry);
+  initializeLoopInfoPass(Registry);
+  initializeMemDepPrinterPass(Registry);
+  initializeMemoryDependenceAnalysisPass(Registry);
+  initializeModuleDebugInfoPrinterPass(Registry);
+  initializePostDominatorTreePass(Registry);
+  initializePostDominanceFrontierPass(Registry);
+  initializeProfileEstimatorPassPass(Registry);
+  initializeNoProfileInfoPass(Registry);
+  initializeNoPathProfileInfoPass(Registry);
+  initializeProfileInfoAnalysisGroup(Registry);
+  initializePathProfileInfoAnalysisGroup(Registry);
+  initializeLoaderPassPass(Registry);
+  initializePathProfileLoaderPassPass(Registry);
+  initializeProfileVerifierPassPass(Registry);
+  initializePathProfileVerifierPass(Registry);
+  initializeRegionInfoPass(Registry);
+  initializeRegionViewerPass(Registry);
+  initializeRegionPrinterPass(Registry);
+  initializeRegionOnlyViewerPass(Registry);
+  initializeRegionOnlyPrinterPass(Registry);
+  initializeScalarEvolutionPass(Registry);
+  initializeScalarEvolutionAliasAnalysisPass(Registry);
+  initializeTypeBasedAliasAnalysisPass(Registry);
+}
+
+void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
+  initializeAnalysis(*unwrap(R));
+}
+
+LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
+                          char **OutMessages) {
+  std::string Messages;
+
+  LLVMBool Result = verifyModule(*unwrap(M),
+                            static_cast<VerifierFailureAction>(Action),
+                            OutMessages? &Messages : 0);
+
+  if (OutMessages)
+    *OutMessages = strdup(Messages.c_str());
+
+  return Result;
+}
+
+LLVMBool LLVMVerifyFunction(LLVMValueRef Fn, LLVMVerifierFailureAction Action) {
+  return verifyFunction(*unwrap<Function>(Fn),
+                        static_cast<VerifierFailureAction>(Action));
+}
+
+void LLVMViewFunctionCFG(LLVMValueRef Fn) {
+  Function *F = unwrap<Function>(Fn);
+  F->viewCFG();
+}
+
+void LLVMViewFunctionCFGOnly(LLVMValueRef Fn) {
+  Function *F = unwrap<Function>(Fn);
+  F->viewCFGOnly();
+}
diff --git a/final/lib/Analysis/BasicAliasAnalysis.cpp b/final/lib/Analysis/BasicAliasAnalysis.cpp
new file mode 100644
index 00000000000..f7bcd9ec44d
--- /dev/null
+++ b/final/lib/Analysis/BasicAliasAnalysis.cpp
@@ -0,0 +1,1170 @@
+//===- BasicAliasAnalysis.cpp - Stateless Alias Analysis Impl -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the primary stateless implementation of the
+// Alias Analysis interface that implements identities (two different
+// globals cannot alias, etc), but does no stateful analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include <algorithm>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Useful predicates
+//===----------------------------------------------------------------------===//
+
+/// isKnownNonNull - Return true if we know that the specified value is never
+/// null.
+static bool isKnownNonNull(const Value *V) {
+  // Alloca never returns null, malloc might.
+  if (isa<AllocaInst>(V)) return true;
+  
+  // A byval argument is never null.
+  if (const Argument *A = dyn_cast<Argument>(V))
+    return A->hasByValAttr();
+
+  // Global values are not null unless extern weak.
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+    return !GV->hasExternalWeakLinkage();
+  return false;
+}
+
+/// isNonEscapingLocalObject - Return true if the pointer is to a function-local
+/// object that never escapes from the function.
+static bool isNonEscapingLocalObject(const Value *V) {
+  // If this is a local allocation, check to see if it escapes.
+  if (isa<AllocaInst>(V) || isNoAliasCall(V))
+    // Set StoreCaptures to True so that we can assume in our callers that the
+    // pointer is not the result of a load instruction. Currently
+    // PointerMayBeCaptured doesn't have any special analysis for the
+    // StoreCaptures=false case; if it did, our callers could be refined to be
+    // more precise.
+    return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+
+  // If this is an argument that corresponds to a byval or noalias argument,
+  // then it has not escaped before entering the function.  Check if it escapes
+  // inside the function.
+  if (const Argument *A = dyn_cast<Argument>(V))
+    if (A->hasByValAttr() || A->hasNoAliasAttr()) {
+      // Don't bother analyzing arguments already known not to escape.
+      if (A->hasNoCaptureAttr())
+        return true;
+      return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+    }
+  return false;
+}
+
+/// isEscapeSource - Return true if the pointer is one which would have
+/// been considered an escape by isNonEscapingLocalObject.
+static bool isEscapeSource(const Value *V) {
+  if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V))
+    return true;
+
+  // The load case works because isNonEscapingLocalObject considers all
+  // stores to be escapes (it passes true for the StoreCaptures argument
+  // to PointerMayBeCaptured).
+  if (isa<LoadInst>(V))
+    return true;
+
+  return false;
+}
+
+/// getObjectSize - Return the size of the object specified by V, or
+/// UnknownSize if unknown.
+static uint64_t getObjectSize(const Value *V, const TargetData &TD) {
+  const Type *AccessTy;
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+    if (!GV->hasDefinitiveInitializer())
+      return AliasAnalysis::UnknownSize;
+    AccessTy = GV->getType()->getElementType();
+  } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+    if (!AI->isArrayAllocation())
+      AccessTy = AI->getType()->getElementType();
+    else
+      return AliasAnalysis::UnknownSize;
+  } else if (const CallInst* CI = extractMallocCall(V)) {
+    if (!isArrayMalloc(V, &TD))
+      // The size is the argument to the malloc call.
+      if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getArgOperand(0)))
+        return C->getZExtValue();
+    return AliasAnalysis::UnknownSize;
+  } else if (const Argument *A = dyn_cast<Argument>(V)) {
+    if (A->hasByValAttr())
+      AccessTy = cast<PointerType>(A->getType())->getElementType();
+    else
+      return AliasAnalysis::UnknownSize;
+  } else {
+    return AliasAnalysis::UnknownSize;
+  }
+  
+  if (AccessTy->isSized())
+    return TD.getTypeAllocSize(AccessTy);
+  return AliasAnalysis::UnknownSize;
+}
+
+/// isObjectSmallerThan - Return true if we can prove that the object specified
+/// by V is smaller than Size.
+static bool isObjectSmallerThan(const Value *V, uint64_t Size,
+                                const TargetData &TD) {
+  uint64_t ObjectSize = getObjectSize(V, TD);
+  return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size;
+}
+
+/// isObjectSize - Return true if we can prove that the object specified
+/// by V has size Size.
+static bool isObjectSize(const Value *V, uint64_t Size,
+                         const TargetData &TD) {
+  uint64_t ObjectSize = getObjectSize(V, TD);
+  return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size;
+}
+
+//===----------------------------------------------------------------------===//
+// GetElementPtr Instruction Decomposition and Analysis
+//===----------------------------------------------------------------------===//
+
+namespace {
+  enum ExtensionKind {
+    EK_NotExtended,
+    EK_SignExt,
+    EK_ZeroExt
+  };
+  
+  struct VariableGEPIndex {
+    const Value *V;
+    ExtensionKind Extension;
+    int64_t Scale;
+  };
+}
+
+
+/// GetLinearExpression - Analyze the specified value as a linear expression:
+/// "A*V + B", where A and B are constant integers.  Return the scale and offset
+/// values as APInts and return V as a Value*, and return whether we looked
+/// through any sign or zero extends.  The incoming Value is known to have
+/// IntegerType and it may already be sign or zero extended.
+///
+/// Note that this looks through extends, so the high bits may not be
+/// represented in the result.
+static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
+                                  ExtensionKind &Extension,
+                                  const TargetData &TD, unsigned Depth) {
+  assert(V->getType()->isIntegerTy() && "Not an integer value");
+
+  // Limit our recursion depth.
+  if (Depth == 6) {
+    Scale = 1;
+    Offset = 0;
+    return V;
+  }
+  
+  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {
+    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+      switch (BOp->getOpcode()) {
+      default: break;
+      case Instruction::Or:
+        // X|C == X+C if all the bits in C are unset in X.  Otherwise we can't
+        // analyze it.
+        if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), &TD))
+          break;
+        // FALL THROUGH.
+      case Instruction::Add:
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+                                TD, Depth+1);
+        Offset += RHSC->getValue();
+        return V;
+      case Instruction::Mul:
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+                                TD, Depth+1);
+        Offset *= RHSC->getValue();
+        Scale *= RHSC->getValue();
+        return V;
+      case Instruction::Shl:
+        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension,
+                                TD, Depth+1);
+        Offset <<= RHSC->getValue().getLimitedValue();
+        Scale <<= RHSC->getValue().getLimitedValue();
+        return V;
+      }
+    }
+  }
+  
+  // Since GEP indices are sign extended anyway, we don't care about the high
+  // bits of a sign or zero extended value - just scales and offsets.  The
+  // extensions have to be consistent though.
+  if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) ||
+      (isa<ZExtInst>(V) && Extension != EK_SignExt)) {
+    Value *CastOp = cast<CastInst>(V)->getOperand(0);
+    unsigned OldWidth = Scale.getBitWidth();
+    unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits();
+    Scale = Scale.trunc(SmallWidth);
+    Offset = Offset.trunc(SmallWidth);
+    Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt;
+
+    Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension,
+                                        TD, Depth+1);
+    Scale = Scale.zext(OldWidth);
+    Offset = Offset.zext(OldWidth);
+    
+    return Result;
+  }
+  
+  Scale = 1;
+  Offset = 0;
+  return V;
+}
+
+/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it
+/// into a base pointer with a constant offset and a number of scaled symbolic
+/// offsets.
+///
+/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in
+/// the VarIndices vector) are Value*'s that are known to be scaled by the
+/// specified amount, but which may have other unrepresented high bits. As such,
+/// the gep cannot necessarily be reconstructed from its decomposed form.
+///
+/// When TargetData is around, this function is capable of analyzing everything
+/// that GetUnderlyingObject can look through.  When not, it just looks
+/// through pointer casts.
+///
+static const Value *
+DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,
+                       SmallVectorImpl<VariableGEPIndex> &VarIndices,
+                       const TargetData *TD) {
+  // Limit recursion depth to limit compile time in crazy cases.
+  unsigned MaxLookup = 6;
+  
+  BaseOffs = 0;
+  do {
+    // See if this is a bitcast or GEP.
+    const Operator *Op = dyn_cast<Operator>(V);
+    if (Op == 0) {
+      // The only non-operator case we can handle are GlobalAliases.
+      if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+        if (!GA->mayBeOverridden()) {
+          V = GA->getAliasee();
+          continue;
+        }
+      }
+      return V;
+    }
+    
+    if (Op->getOpcode() == Instruction::BitCast) {
+      V = Op->getOperand(0);
+      continue;
+    }
+
+    if (const Instruction *I = dyn_cast<Instruction>(V))
+      // TODO: Get a DominatorTree and use it here.
+      if (const Value *Simplified =
+            SimplifyInstruction(const_cast<Instruction *>(I), TD)) {
+        V = Simplified;
+        continue;
+      }
+    
+    const GEPOperator *GEPOp = dyn_cast<GEPOperator>(Op);
+    if (GEPOp == 0)
+      return V;
+    
+    // Don't attempt to analyze GEPs over unsized objects.
+    if (!cast<PointerType>(GEPOp->getOperand(0)->getType())
+        ->getElementType()->isSized())
+      return V;
+    
+    // If we are lacking TargetData information, we can't compute the offets of
+    // elements computed by GEPs.  However, we can handle bitcast equivalent
+    // GEPs.
+    if (TD == 0) {
+      if (!GEPOp->hasAllZeroIndices())
+        return V;
+      V = GEPOp->getOperand(0);
+      continue;
+    }
+    
+    // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
+    gep_type_iterator GTI = gep_type_begin(GEPOp);
+    for (User::const_op_iterator I = GEPOp->op_begin()+1,
+         E = GEPOp->op_end(); I != E; ++I) {
+      Value *Index = *I;
+      // Compute the (potentially symbolic) offset in bytes for this index.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
+        // For a struct, add the member offset.
+        unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
+        if (FieldNo == 0) continue;
+        
+        BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo);
+        continue;
+      }
+      
+      // For an array/pointer, add the element offset, explicitly scaled.
+      if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) {
+        if (CIdx->isZero()) continue;
+        BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue();
+        continue;
+      }
+      
+      uint64_t Scale = TD->getTypeAllocSize(*GTI);
+      ExtensionKind Extension = EK_NotExtended;
+      
+      // If the integer type is smaller than the pointer size, it is implicitly
+      // sign extended to pointer size.
+      unsigned Width = cast<IntegerType>(Index->getType())->getBitWidth();
+      if (TD->getPointerSizeInBits() > Width)
+        Extension = EK_SignExt;
+      
+      // Use GetLinearExpression to decompose the index into a C1*V+C2 form.
+      APInt IndexScale(Width, 0), IndexOffset(Width, 0);
+      Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension,
+                                  *TD, 0);
+      
+      // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.
+      // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale.
+      BaseOffs += IndexOffset.getSExtValue()*Scale;
+      Scale *= IndexScale.getSExtValue();
+      
+      
+      // If we already had an occurrance of this index variable, merge this
+      // scale into it.  For example, we want to handle:
+      //   A[x][x] -> x*16 + x*4 -> x*20
+      // This also ensures that 'x' only appears in the index list once.
+      for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) {
+        if (VarIndices[i].V == Index &&
+            VarIndices[i].Extension == Extension) {
+          Scale += VarIndices[i].Scale;
+          VarIndices.erase(VarIndices.begin()+i);
+          break;
+        }
+      }
+      
+      // Make sure that we have a scale that makes sense for this target's
+      // pointer size.
+      if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) {
+        Scale <<= ShiftBits;
+        Scale = (int64_t)Scale >> ShiftBits;
+      }
+      
+      if (Scale) {
+        VariableGEPIndex Entry = {Index, Extension, Scale};
+        VarIndices.push_back(Entry);
+      }
+    }
+    
+    // Analyze the base pointer next.
+    V = GEPOp->getOperand(0);
+  } while (--MaxLookup);
+  
+  // If the chain of expressions is too deep, just return early.
+  return V;
+}
+
+/// GetIndexDifference - Dest and Src are the variable indices from two
+/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base
+/// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic
+/// difference between the two pointers. 
+static void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest,
+                               const SmallVectorImpl<VariableGEPIndex> &Src) {
+  if (Src.empty()) return;
+
+  for (unsigned i = 0, e = Src.size(); i != e; ++i) {
+    const Value *V = Src[i].V;
+    ExtensionKind Extension = Src[i].Extension;
+    int64_t Scale = Src[i].Scale;
+    
+    // Find V in Dest.  This is N^2, but pointer indices almost never have more
+    // than a few variable indexes.
+    for (unsigned j = 0, e = Dest.size(); j != e; ++j) {
+      if (Dest[j].V != V || Dest[j].Extension != Extension) continue;
+      
+      // If we found it, subtract off Scale V's from the entry in Dest.  If it
+      // goes to zero, remove the entry.
+      if (Dest[j].Scale != Scale)
+        Dest[j].Scale -= Scale;
+      else
+        Dest.erase(Dest.begin()+j);
+      Scale = 0;
+      break;
+    }
+    
+    // If we didn't consume this entry, add it to the end of the Dest list.
+    if (Scale) {
+      VariableGEPIndex Entry = { V, Extension, -Scale };
+      Dest.push_back(Entry);
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// BasicAliasAnalysis Pass
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+static const Function *getParent(const Value *V) {
+  if (const Instruction *inst = dyn_cast<Instruction>(V))
+    return inst->getParent()->getParent();
+
+  if (const Argument *arg = dyn_cast<Argument>(V))
+    return arg->getParent();
+
+  return NULL;
+}
+
+static bool notDifferentParent(const Value *O1, const Value *O2) {
+
+  const Function *F1 = getParent(O1);
+  const Function *F2 = getParent(O2);
+
+  return !F1 || !F2 || F1 == F2;
+}
+#endif
+
+namespace {
+  /// BasicAliasAnalysis - This is the primary alias analysis implementation.
+  struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis {
+    static char ID; // Class identification, replacement for typeinfo
+    BasicAliasAnalysis() : ImmutablePass(ID) {
+      initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void initializePass() {
+      InitializeAliasAnalysis(this);
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<AliasAnalysis>();
+    }
+
+    virtual AliasResult alias(const Location &LocA,
+                              const Location &LocB) {
+      assert(Visited.empty() && "Visited must be cleared after use!");
+      assert(notDifferentParent(LocA.Ptr, LocB.Ptr) &&
+             "BasicAliasAnalysis doesn't support interprocedural queries.");
+      AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.TBAATag,
+                                     LocB.Ptr, LocB.Size, LocB.TBAATag);
+      Visited.clear();
+      return Alias;
+    }
+
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+                                       const Location &Loc);
+
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                                       ImmutableCallSite CS2) {
+      // The AliasAnalysis base class has some smarts, lets use them.
+      return AliasAnalysis::getModRefInfo(CS1, CS2);
+    }
+
+    /// pointsToConstantMemory - Chase pointers until we find a (constant
+    /// global) or not.
+    virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+
+    /// getModRefBehavior - Return the behavior when calling the given
+    /// call site.
+    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+
+    /// getModRefBehavior - Return the behavior when calling the given function.
+    /// For use when the call site is not known.
+    virtual ModRefBehavior getModRefBehavior(const Function *F);
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(const void *ID) {
+      if (ID == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+    
+  private:
+    // Visited - Track instructions visited by a aliasPHI, aliasSelect(), and aliasGEP().
+    SmallPtrSet<const Value*, 16> Visited;
+
+    // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
+    // instruction against another.
+    AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size,
+                         const Value *V2, uint64_t V2Size,
+                         const MDNode *V2TBAAInfo,
+                         const Value *UnderlyingV1, const Value *UnderlyingV2);
+
+    // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI
+    // instruction against another.
+    AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize,
+                         const MDNode *PNTBAAInfo,
+                         const Value *V2, uint64_t V2Size,
+                         const MDNode *V2TBAAInfo);
+
+    /// aliasSelect - Disambiguate a Select instruction against another value.
+    AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize,
+                            const MDNode *SITBAAInfo,
+                            const Value *V2, uint64_t V2Size,
+                            const MDNode *V2TBAAInfo);
+
+    AliasResult aliasCheck(const Value *V1, uint64_t V1Size,
+                           const MDNode *V1TBAATag,
+                           const Value *V2, uint64_t V2Size,
+                           const MDNode *V2TBAATag);
+  };
+}  // End of anonymous namespace
+
+// Register this pass...
+char BasicAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(BasicAliasAnalysis, AliasAnalysis, "basicaa",
+                   "Basic Alias Analysis (stateless AA impl)",
+                   false, true, false)
+
+ImmutablePass *llvm::createBasicAliasAnalysisPass() {
+  return new BasicAliasAnalysis();
+}
+
+/// pointsToConstantMemory - Returns whether the given pointer value
+/// points to memory that is local to the function, with global constants being
+/// considered local to all functions.
+bool
+BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) {
+  assert(Visited.empty() && "Visited must be cleared after use!");
+
+  unsigned MaxLookup = 8;
+  SmallVector<const Value *, 16> Worklist;
+  Worklist.push_back(Loc.Ptr);
+  do {
+    const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), TD);
+    if (!Visited.insert(V)) {
+      Visited.clear();
+      return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+    }
+
+    // An alloca instruction defines local memory.
+    if (OrLocal && isa<AllocaInst>(V))
+      continue;
+
+    // A global constant counts as local memory for our purposes.
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
+      // Note: this doesn't require GV to be "ODR" because it isn't legal for a
+      // global to be marked constant in some modules and non-constant in
+      // others.  GV may even be a declaration, not a definition.
+      if (!GV->isConstant()) {
+        Visited.clear();
+        return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+      }
+      continue;
+    }
+
+    // If both select values point to local memory, then so does the select.
+    if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
+      Worklist.push_back(SI->getTrueValue());
+      Worklist.push_back(SI->getFalseValue());
+      continue;
+    }
+
+    // If all values incoming to a phi node point to local memory, then so does
+    // the phi.
+    if (const PHINode *PN = dyn_cast<PHINode>(V)) {
+      // Don't bother inspecting phi nodes with many operands.
+      if (PN->getNumIncomingValues() > MaxLookup) {
+        Visited.clear();
+        return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+      }
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        Worklist.push_back(PN->getIncomingValue(i));
+      continue;
+    }
+
+    // Otherwise be conservative.
+    Visited.clear();
+    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+  } while (!Worklist.empty() && --MaxLookup);
+
+  Visited.clear();
+  return Worklist.empty();
+}
+
+/// getModRefBehavior - Return the behavior when calling the given call site.
+AliasAnalysis::ModRefBehavior
+BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+  if (CS.doesNotAccessMemory())
+    // Can't do better than this.
+    return DoesNotAccessMemory;
+
+  ModRefBehavior Min = UnknownModRefBehavior;
+
+  // If the callsite knows it only reads memory, don't return worse
+  // than that.
+  if (CS.onlyReadsMemory())
+    Min = OnlyReadsMemory;
+
+  // The AliasAnalysis base class has some smarts, lets use them.
+  return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+}
+
+/// getModRefBehavior - Return the behavior when calling the given function.
+/// For use when the call site is not known.
+AliasAnalysis::ModRefBehavior
+BasicAliasAnalysis::getModRefBehavior(const Function *F) {
+  // If the function declares it doesn't access memory, we can't do better.
+  if (F->doesNotAccessMemory())
+    return DoesNotAccessMemory;
+
+  // For intrinsics, we can check the table.
+  if (unsigned iid = F->getIntrinsicID()) {
+#define GET_INTRINSIC_MODREF_BEHAVIOR
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_MODREF_BEHAVIOR
+  }
+
+  ModRefBehavior Min = UnknownModRefBehavior;
+
+  // If the function declares it only reads memory, go with that.
+  if (F->onlyReadsMemory())
+    Min = OnlyReadsMemory;
+
+  // Otherwise be conservative.
+  return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
+}
+
+/// getModRefInfo - Check to see if the specified callsite can clobber the
+/// specified memory object.  Since we only look at local properties of this
+/// function, we really can't say much about this query.  We do, however, use
+/// simple "address taken" analysis on local objects.
+AliasAnalysis::ModRefResult
+BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+                                  const Location &Loc) {
+  assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&
+         "AliasAnalysis query involving multiple functions!");
+
+  const Value *Object = GetUnderlyingObject(Loc.Ptr, TD);
+  
+  // If this is a tail call and Loc.Ptr points to a stack location, we know that
+  // the tail call cannot access or modify the local stack.
+  // We cannot exclude byval arguments here; these belong to the caller of
+  // the current function not to the current function, and a tail callee
+  // may reference them.
+  if (isa<AllocaInst>(Object))
+    if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))
+      if (CI->isTailCall())
+        return NoModRef;
+  
+  // If the pointer is to a locally allocated object that does not escape,
+  // then the call can not mod/ref the pointer unless the call takes the pointer
+  // as an argument, and itself doesn't capture it.
+  if (!isa<Constant>(Object) && CS.getInstruction() != Object &&
+      isNonEscapingLocalObject(Object)) {
+    bool PassedAsArg = false;
+    unsigned ArgNo = 0;
+    for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+         CI != CE; ++CI, ++ArgNo) {
+      // Only look at the no-capture pointer arguments.
+      if (!(*CI)->getType()->isPointerTy() ||
+          !CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))
+        continue;
+      
+      // If this is a no-capture pointer argument, see if we can tell that it
+      // is impossible to alias the pointer we're checking.  If not, we have to
+      // assume that the call could touch the pointer, even though it doesn't
+      // escape.
+      if (!isNoAlias(Location(cast<Value>(CI)), Loc)) {
+        PassedAsArg = true;
+        break;
+      }
+    }
+    
+    if (!PassedAsArg)
+      return NoModRef;
+  }
+
+  ModRefResult Min = ModRef;
+
+  // Finally, handle specific knowledge of intrinsics.
+  const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction());
+  if (II != 0)
+    switch (II->getIntrinsicID()) {
+    default: break;
+    case Intrinsic::memcpy:
+    case Intrinsic::memmove: {
+      uint64_t Len = UnknownSize;
+      if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
+        Len = LenCI->getZExtValue();
+      Value *Dest = II->getArgOperand(0);
+      Value *Src = II->getArgOperand(1);
+      // If it can't overlap the source dest, then it doesn't modref the loc.
+      if (isNoAlias(Location(Dest, Len), Loc)) {
+        if (isNoAlias(Location(Src, Len), Loc))
+          return NoModRef;
+        // If it can't overlap the dest, then worst case it reads the loc.
+        Min = Ref;
+      } else if (isNoAlias(Location(Src, Len), Loc)) {
+        // If it can't overlap the source, then worst case it mutates the loc.
+        Min = Mod;
+      }
+      break;
+    }
+    case Intrinsic::memset:
+      // Since memset is 'accesses arguments' only, the AliasAnalysis base class
+      // will handle it for the variable length case.
+      if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
+        uint64_t Len = LenCI->getZExtValue();
+        Value *Dest = II->getArgOperand(0);
+        if (isNoAlias(Location(Dest, Len), Loc))
+          return NoModRef;
+      }
+      // We know that memset doesn't load anything.
+      Min = Mod;
+      break;
+    case Intrinsic::atomic_cmp_swap:
+    case Intrinsic::atomic_swap:
+    case Intrinsic::atomic_load_add:
+    case Intrinsic::atomic_load_sub:
+    case Intrinsic::atomic_load_and:
+    case Intrinsic::atomic_load_nand:
+    case Intrinsic::atomic_load_or:
+    case Intrinsic::atomic_load_xor:
+    case Intrinsic::atomic_load_max:
+    case Intrinsic::atomic_load_min:
+    case Intrinsic::atomic_load_umax:
+    case Intrinsic::atomic_load_umin:
+      if (TD) {
+        Value *Op1 = II->getArgOperand(0);
+        uint64_t Op1Size = TD->getTypeStoreSize(Op1->getType());
+        MDNode *Tag = II->getMetadata(LLVMContext::MD_tbaa);
+        if (isNoAlias(Location(Op1, Op1Size, Tag), Loc))
+          return NoModRef;
+      }
+      break;
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+    case Intrinsic::invariant_start: {
+      uint64_t PtrSize =
+        cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+      if (isNoAlias(Location(II->getArgOperand(1),
+                             PtrSize,
+                             II->getMetadata(LLVMContext::MD_tbaa)),
+                    Loc))
+        return NoModRef;
+      break;
+    }
+    case Intrinsic::invariant_end: {
+      uint64_t PtrSize =
+        cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
+      if (isNoAlias(Location(II->getArgOperand(2),
+                             PtrSize,
+                             II->getMetadata(LLVMContext::MD_tbaa)),
+                    Loc))
+        return NoModRef;
+      break;
+    }
+    }
+
+  // The AliasAnalysis base class has some smarts, lets use them.
+  return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min);
+}
+
+/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction
+/// against another pointer.  We know that V1 is a GEP, but we don't know
+/// anything about V2.  UnderlyingV1 is GetUnderlyingObject(GEP1, TD),
+/// UnderlyingV2 is the same for V2.
+///
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
+                             const Value *V2, uint64_t V2Size,
+                             const MDNode *V2TBAAInfo,
+                             const Value *UnderlyingV1,
+                             const Value *UnderlyingV2) {
+  // If this GEP has been visited before, we're on a use-def cycle.
+  // Such cycles are only valid when PHI nodes are involved or in unreachable
+  // code. The visitPHI function catches cycles containing PHIs, but there
+  // could still be a cycle without PHIs in unreachable code.
+  if (!Visited.insert(GEP1))
+    return MayAlias;
+
+  int64_t GEP1BaseOffset;
+  SmallVector<VariableGEPIndex, 4> GEP1VariableIndices;
+
+  // If we have two gep instructions with must-alias'ing base pointers, figure
+  // out if the indexes to the GEP tell us anything about the derived pointer.
+  if (const GEPOperator *GEP2 = dyn_cast<GEPOperator>(V2)) {
+    // Do the base pointers alias?
+    AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0,
+                                       UnderlyingV2, UnknownSize, 0);
+    
+    // If we get a No or May, then return it immediately, no amount of analysis
+    // will improve this situation.
+    if (BaseAlias != MustAlias) return BaseAlias;
+    
+    // Otherwise, we have a MustAlias.  Since the base pointers alias each other
+    // exactly, see if the computed offset from the common pointer tells us
+    // about the relation of the resulting pointer.
+    const Value *GEP1BasePtr =
+      DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
+    
+    int64_t GEP2BaseOffset;
+    SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;
+    const Value *GEP2BasePtr =
+      DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD);
+    
+    // If DecomposeGEPExpression isn't able to look all the way through the
+    // addressing operation, we must not have TD and this is too complex for us
+    // to handle without it.
+    if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) {
+      assert(TD == 0 &&
+             "DecomposeGEPExpression and GetUnderlyingObject disagree!");
+      return MayAlias;
+    }
+    
+    // Subtract the GEP2 pointer from the GEP1 pointer to find out their
+    // symbolic difference.
+    GEP1BaseOffset -= GEP2BaseOffset;
+    GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices);
+    
+  } else {
+    // Check to see if these two pointers are related by the getelementptr
+    // instruction.  If one pointer is a GEP with a non-zero index of the other
+    // pointer, we know they cannot alias.
+
+    // If both accesses are unknown size, we can't do anything useful here.
+    if (V1Size == UnknownSize && V2Size == UnknownSize)
+      return MayAlias;
+
+    AliasResult R = aliasCheck(UnderlyingV1, UnknownSize, 0,
+                               V2, V2Size, V2TBAAInfo);
+    if (R != MustAlias)
+      // If V2 may alias GEP base pointer, conservatively returns MayAlias.
+      // If V2 is known not to alias GEP base pointer, then the two values
+      // cannot alias per GEP semantics: "A pointer value formed from a
+      // getelementptr instruction is associated with the addresses associated
+      // with the first operand of the getelementptr".
+      return R;
+
+    const Value *GEP1BasePtr =
+      DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD);
+    
+    // If DecomposeGEPExpression isn't able to look all the way through the
+    // addressing operation, we must not have TD and this is too complex for us
+    // to handle without it.
+    if (GEP1BasePtr != UnderlyingV1) {
+      assert(TD == 0 &&
+             "DecomposeGEPExpression and GetUnderlyingObject disagree!");
+      return MayAlias;
+    }
+  }
+  
+  // In the two GEP Case, if there is no difference in the offsets of the
+  // computed pointers, the resultant pointers are a must alias.  This
+  // hapens when we have two lexically identical GEP's (for example).
+  //
+  // In the other case, if we have getelementptr <ptr>, 0, 0, 0, 0, ... and V2
+  // must aliases the GEP, the end result is a must alias also.
+  if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty())
+    return MustAlias;
+
+  // If there is a difference betwen the pointers, but the difference is
+  // less than the size of the associated memory object, then we know
+  // that the objects are partially overlapping.
+  if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) {
+    if (GEP1BaseOffset >= 0 ?
+        (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset < V2Size) :
+        (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset < V1Size &&
+         GEP1BaseOffset != INT64_MIN))
+      return PartialAlias;
+  }
+
+  // If we have a known constant offset, see if this offset is larger than the
+  // access size being queried.  If so, and if no variable indices can remove
+  // pieces of this constant, then we know we have a no-alias.  For example,
+  //   &A[100] != &A.
+  
+  // In order to handle cases like &A[100][i] where i is an out of range
+  // subscript, we have to ignore all constant offset pieces that are a multiple
+  // of a scaled index.  Do this by removing constant offsets that are a
+  // multiple of any of our variable indices.  This allows us to transform
+  // things like &A[i][1] because i has a stride of (e.g.) 8 bytes but the 1
+  // provides an offset of 4 bytes (assuming a <= 4 byte access).
+  for (unsigned i = 0, e = GEP1VariableIndices.size();
+       i != e && GEP1BaseOffset;++i)
+    if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].Scale)
+      GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].Scale;
+  
+  // If our known offset is bigger than the access size, we know we don't have
+  // an alias.
+  if (GEP1BaseOffset) {
+    if (GEP1BaseOffset >= 0 ?
+        (V2Size != UnknownSize && (uint64_t)GEP1BaseOffset >= V2Size) :
+        (V1Size != UnknownSize && -(uint64_t)GEP1BaseOffset >= V1Size &&
+         GEP1BaseOffset != INT64_MIN))
+      return NoAlias;
+  }
+  
+  return MayAlias;
+}
+
+/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select
+/// instruction against another.
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasSelect(const SelectInst *SI, uint64_t SISize,
+                                const MDNode *SITBAAInfo,
+                                const Value *V2, uint64_t V2Size,
+                                const MDNode *V2TBAAInfo) {
+  // If this select has been visited before, we're on a use-def cycle.
+  // Such cycles are only valid when PHI nodes are involved or in unreachable
+  // code. The visitPHI function catches cycles containing PHIs, but there
+  // could still be a cycle without PHIs in unreachable code.
+  if (!Visited.insert(SI))
+    return MayAlias;
+
+  // If the values are Selects with the same condition, we can do a more precise
+  // check: just check for aliases between the values on corresponding arms.
+  if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
+    if (SI->getCondition() == SI2->getCondition()) {
+      AliasResult Alias =
+        aliasCheck(SI->getTrueValue(), SISize, SITBAAInfo,
+                   SI2->getTrueValue(), V2Size, V2TBAAInfo);
+      if (Alias == MayAlias)
+        return MayAlias;
+      AliasResult ThisAlias =
+        aliasCheck(SI->getFalseValue(), SISize, SITBAAInfo,
+                   SI2->getFalseValue(), V2Size, V2TBAAInfo);
+      if (ThisAlias != Alias)
+        return MayAlias;
+      return Alias;
+    }
+
+  // If both arms of the Select node NoAlias or MustAlias V2, then returns
+  // NoAlias / MustAlias. Otherwise, returns MayAlias.
+  AliasResult Alias =
+    aliasCheck(V2, V2Size, V2TBAAInfo, SI->getTrueValue(), SISize, SITBAAInfo);
+  if (Alias == MayAlias)
+    return MayAlias;
+
+  // If V2 is visited, the recursive case will have been caught in the
+  // above aliasCheck call, so these subsequent calls to aliasCheck
+  // don't need to assume that V2 is being visited recursively.
+  Visited.erase(V2);
+
+  AliasResult ThisAlias =
+    aliasCheck(V2, V2Size, V2TBAAInfo, SI->getFalseValue(), SISize, SITBAAInfo);
+  if (ThisAlias != Alias)
+    return MayAlias;
+  return Alias;
+}
+
+// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction
+// against another.
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,
+                             const MDNode *PNTBAAInfo,
+                             const Value *V2, uint64_t V2Size,
+                             const MDNode *V2TBAAInfo) {
+  // The PHI node has already been visited, avoid recursion any further.
+  if (!Visited.insert(PN))
+    return MayAlias;
+
+  // If the values are PHIs in the same block, we can do a more precise
+  // as well as efficient check: just check for aliases between the values
+  // on corresponding edges.
+  if (const PHINode *PN2 = dyn_cast<PHINode>(V2))
+    if (PN2->getParent() == PN->getParent()) {
+      AliasResult Alias =
+        aliasCheck(PN->getIncomingValue(0), PNSize, PNTBAAInfo,
+                   PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)),
+                   V2Size, V2TBAAInfo);
+      if (Alias == MayAlias)
+        return MayAlias;
+      for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+        AliasResult ThisAlias =
+          aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo,
+                     PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)),
+                     V2Size, V2TBAAInfo);
+        if (ThisAlias != Alias)
+          return MayAlias;
+      }
+      return Alias;
+    }
+
+  SmallPtrSet<Value*, 4> UniqueSrc;
+  SmallVector<Value*, 4> V1Srcs;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *PV1 = PN->getIncomingValue(i);
+    if (isa<PHINode>(PV1))
+      // If any of the source itself is a PHI, return MayAlias conservatively
+      // to avoid compile time explosion. The worst possible case is if both
+      // sides are PHI nodes. In which case, this is O(m x n) time where 'm'
+      // and 'n' are the number of PHI sources.
+      return MayAlias;
+    if (UniqueSrc.insert(PV1))
+      V1Srcs.push_back(PV1);
+  }
+
+  AliasResult Alias = aliasCheck(V2, V2Size, V2TBAAInfo,
+                                 V1Srcs[0], PNSize, PNTBAAInfo);
+  // Early exit if the check of the first PHI source against V2 is MayAlias.
+  // Other results are not possible.
+  if (Alias == MayAlias)
+    return MayAlias;
+
+  // If all sources of the PHI node NoAlias or MustAlias V2, then returns
+  // NoAlias / MustAlias. Otherwise, returns MayAlias.
+  for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
+    Value *V = V1Srcs[i];
+
+    // If V2 is visited, the recursive case will have been caught in the
+    // above aliasCheck call, so these subsequent calls to aliasCheck
+    // don't need to assume that V2 is being visited recursively.
+    Visited.erase(V2);
+
+    AliasResult ThisAlias = aliasCheck(V2, V2Size, V2TBAAInfo,
+                                       V, PNSize, PNTBAAInfo);
+    if (ThisAlias != Alias || ThisAlias == MayAlias)
+      return MayAlias;
+  }
+
+  return Alias;
+}
+
+// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases,
+// such as array references.
+//
+AliasAnalysis::AliasResult
+BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,
+                               const MDNode *V1TBAAInfo,
+                               const Value *V2, uint64_t V2Size,
+                               const MDNode *V2TBAAInfo) {
+  // If either of the memory references is empty, it doesn't matter what the
+  // pointer values are.
+  if (V1Size == 0 || V2Size == 0)
+    return NoAlias;
+
+  // Strip off any casts if they exist.
+  V1 = V1->stripPointerCasts();
+  V2 = V2->stripPointerCasts();
+
+  // Are we checking for alias of the same value?
+  if (V1 == V2) return MustAlias;
+
+  if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
+    return NoAlias;  // Scalars cannot alias each other
+
+  // Figure out what objects these things are pointing to if we can.
+  const Value *O1 = GetUnderlyingObject(V1, TD);
+  const Value *O2 = GetUnderlyingObject(V2, TD);
+
+  // Null values in the default address space don't point to any object, so they
+  // don't alias any other pointer.
+  if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O1))
+    if (CPN->getType()->getAddressSpace() == 0)
+      return NoAlias;
+  if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O2))
+    if (CPN->getType()->getAddressSpace() == 0)
+      return NoAlias;
+
+  if (O1 != O2) {
+    // If V1/V2 point to two different objects we know that we have no alias.
+    if (isIdentifiedObject(O1) && isIdentifiedObject(O2))
+      return NoAlias;
+
+    // Constant pointers can't alias with non-const isIdentifiedObject objects.
+    if ((isa<Constant>(O1) && isIdentifiedObject(O2) && !isa<Constant>(O2)) ||
+        (isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1)))
+      return NoAlias;
+
+    // Arguments can't alias with local allocations or noalias calls
+    // in the same function.
+    if (((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) ||
+         (isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1)))))
+      return NoAlias;
+
+    // Most objects can't alias null.
+    if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) ||
+        (isa<ConstantPointerNull>(O1) && isKnownNonNull(O2)))
+      return NoAlias;
+  
+    // If one pointer is the result of a call/invoke or load and the other is a
+    // non-escaping local object within the same function, then we know the
+    // object couldn't escape to a point where the call could return it.
+    //
+    // Note that if the pointers are in different functions, there are a
+    // variety of complications. A call with a nocapture argument may still
+    // temporary store the nocapture argument's value in a temporary memory
+    // location if that memory location doesn't escape. Or it may pass a
+    // nocapture value to other functions as long as they don't capture it.
+    if (isEscapeSource(O1) && isNonEscapingLocalObject(O2))
+      return NoAlias;
+    if (isEscapeSource(O2) && isNonEscapingLocalObject(O1))
+      return NoAlias;
+  }
+
+  // If the size of one access is larger than the entire object on the other
+  // side, then we know such behavior is undefined and can assume no alias.
+  if (TD)
+    if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD)) ||
+        (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD)))
+      return NoAlias;
+  
+  // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the
+  // GEP can't simplify, we don't even look at the PHI cases.
+  if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) {
+    std::swap(V1, V2);
+    std::swap(V1Size, V2Size);
+    std::swap(O1, O2);
+  }
+  if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) {
+    AliasResult Result = aliasGEP(GV1, V1Size, V2, V2Size, V2TBAAInfo, O1, O2);
+    if (Result != MayAlias) return Result;
+  }
+
+  if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
+    std::swap(V1, V2);
+    std::swap(V1Size, V2Size);
+  }
+  if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
+    AliasResult Result = aliasPHI(PN, V1Size, V1TBAAInfo,
+                                  V2, V2Size, V2TBAAInfo);
+    if (Result != MayAlias) return Result;
+  }
+
+  if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
+    std::swap(V1, V2);
+    std::swap(V1Size, V2Size);
+  }
+  if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
+    AliasResult Result = aliasSelect(S1, V1Size, V1TBAAInfo,
+                                     V2, V2Size, V2TBAAInfo);
+    if (Result != MayAlias) return Result;
+  }
+
+  // If both pointers are pointing into the same object and one of them
+  // accesses is accessing the entire object, then the accesses must
+  // overlap in some way.
+  if (TD && O1 == O2)
+    if ((V1Size != UnknownSize && isObjectSize(O1, V1Size, *TD)) ||
+        (V2Size != UnknownSize && isObjectSize(O2, V2Size, *TD)))
+      return PartialAlias;
+
+  return AliasAnalysis::alias(Location(V1, V1Size, V1TBAAInfo),
+                              Location(V2, V2Size, V2TBAAInfo));
+}
diff --git a/final/lib/Analysis/CFGPrinter.cpp b/final/lib/Analysis/CFGPrinter.cpp
new file mode 100644
index 00000000000..7bb063fbbbc
--- /dev/null
+++ b/final/lib/Analysis/CFGPrinter.cpp
@@ -0,0 +1,165 @@
+//===- CFGPrinter.cpp - DOT printer for the control flow graph ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a '-dot-cfg' analysis pass, which emits the
+// cfg.<fnname>.dot file for each function in the program, with a graph of the
+// CFG for that function.
+//
+// The other main feature of this file is that it implements the
+// Function::viewCFG method, which is useful for debugging passes which operate
+// on the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CFGPrinter.h"
+
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+  struct CFGViewer : public FunctionPass {
+    static char ID; // Pass identifcation, replacement for typeid
+    CFGViewer() : FunctionPass(ID) {
+      initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F) {
+      F.viewCFG();
+      return false;
+    }
+
+    void print(raw_ostream &OS, const Module* = 0) const {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char CFGViewer::ID = 0;
+INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true)
+
+namespace {
+  struct CFGOnlyViewer : public FunctionPass {
+    static char ID; // Pass identifcation, replacement for typeid
+    CFGOnlyViewer() : FunctionPass(ID) {
+      initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F) {
+      F.viewCFGOnly();
+      return false;
+    }
+
+    void print(raw_ostream &OS, const Module* = 0) const {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char CFGOnlyViewer::ID = 0;
+INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only",
+                "View CFG of function (with no function bodies)", false, true)
+
+namespace {
+  struct CFGPrinter : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    CFGPrinter() : FunctionPass(ID) {
+      initializeCFGPrinterPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F) {
+      std::string Filename = "cfg." + F.getNameStr() + ".dot";
+      errs() << "Writing '" << Filename << "'...";
+      
+      std::string ErrorInfo;
+      raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+
+      if (ErrorInfo.empty())
+        WriteGraph(File, (const Function*)&F);
+      else
+        errs() << "  error opening file for writing!";
+      errs() << "\n";
+      return false;
+    }
+
+    void print(raw_ostream &OS, const Module* = 0) const {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char CFGPrinter::ID = 0;
+INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file", 
+                false, true)
+
+namespace {
+  struct CFGOnlyPrinter : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    CFGOnlyPrinter() : FunctionPass(ID) {
+      initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry());
+    }
+    
+    virtual bool runOnFunction(Function &F) {
+      std::string Filename = "cfg." + F.getNameStr() + ".dot";
+      errs() << "Writing '" << Filename << "'...";
+
+      std::string ErrorInfo;
+      raw_fd_ostream File(Filename.c_str(), ErrorInfo);
+      
+      if (ErrorInfo.empty())
+        WriteGraph(File, (const Function*)&F, true);
+      else
+        errs() << "  error opening file for writing!";
+      errs() << "\n";
+      return false;
+    }
+    void print(raw_ostream &OS, const Module* = 0) const {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char CFGOnlyPrinter::ID = 0;
+INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
+   "Print CFG of function to 'dot' file (with no function bodies)",
+   false, true)
+
+/// viewCFG - This function is meant for use from the debugger.  You can just
+/// say 'call F->viewCFG()' and a ghostview window should pop up from the
+/// program, displaying the CFG of the current function.  This depends on there
+/// being a 'dot' and 'gv' program in your path.
+///
+void Function::viewCFG() const {
+  ViewGraph(this, "cfg" + getNameStr());
+}
+
+/// viewCFGOnly - This function is meant for use from the debugger.  It works
+/// just like viewCFG, but it does not include the contents of basic blocks
+/// into the nodes, just the label.  If you are only interested in the CFG t
+/// his can make the graph smaller.
+///
+void Function::viewCFGOnly() const {
+  ViewGraph(this, "cfg" + getNameStr(), true);
+}
+
+FunctionPass *llvm::createCFGPrinterPass () {
+  return new CFGPrinter();
+}
+
+FunctionPass *llvm::createCFGOnlyPrinterPass () {
+  return new CFGOnlyPrinter();
+}
+
diff --git a/final/lib/Analysis/CMakeLists.txt b/final/lib/Analysis/CMakeLists.txt
new file mode 100644
index 00000000000..6be561718c7
--- /dev/null
+++ b/final/lib/Analysis/CMakeLists.txt
@@ -0,0 +1,59 @@
+add_llvm_library(LLVMAnalysis
+  AliasAnalysis.cpp
+  AliasAnalysisCounter.cpp
+  AliasAnalysisEvaluator.cpp
+  AliasDebugger.cpp
+  AliasSetTracker.cpp
+  Analysis.cpp
+  BasicAliasAnalysis.cpp
+  CFGPrinter.cpp
+  CaptureTracking.cpp
+  ConstantFolding.cpp
+  DIBuilder.cpp
+  DbgInfoPrinter.cpp
+  DebugInfo.cpp
+  DomPrinter.cpp
+  DominanceFrontier.cpp
+  IVUsers.cpp
+  InlineCost.cpp
+  InstCount.cpp
+  InstructionSimplify.cpp
+  Interval.cpp
+  IntervalPartition.cpp
+  LazyValueInfo.cpp
+  LibCallAliasAnalysis.cpp
+  LibCallSemantics.cpp
+  Lint.cpp
+  Loads.cpp
+  LoopDependenceAnalysis.cpp
+  LoopInfo.cpp
+  LoopPass.cpp
+  MemDepPrinter.cpp
+  MemoryBuiltins.cpp
+  MemoryDependenceAnalysis.cpp
+  ModuleDebugInfoPrinter.cpp
+  NoAliasAnalysis.cpp
+  PHITransAddr.cpp
+  PathNumbering.cpp
+  PathProfileInfo.cpp
+  PathProfileVerifier.cpp
+  PostDominators.cpp
+  ProfileEstimatorPass.cpp
+  ProfileInfo.cpp
+  ProfileInfoLoader.cpp
+  ProfileInfoLoaderPass.cpp
+  ProfileVerifierPass.cpp
+  RegionInfo.cpp
+  RegionPass.cpp
+  RegionPrinter.cpp
+  ScalarEvolution.cpp
+  ScalarEvolutionAliasAnalysis.cpp
+  ScalarEvolutionExpander.cpp
+  ScalarEvolutionNormalization.cpp
+  SparsePropagation.cpp
+  Trace.cpp
+  TypeBasedAliasAnalysis.cpp
+  ValueTracking.cpp
+  )
+
+add_subdirectory(IPA)
diff --git a/final/lib/Analysis/CaptureTracking.cpp b/final/lib/Analysis/CaptureTracking.cpp
new file mode 100644
index 00000000000..42a54d9d1eb
--- /dev/null
+++ b/final/lib/Analysis/CaptureTracking.cpp
@@ -0,0 +1,147 @@
+//===--- CaptureTracking.cpp - Determine whether a pointer is captured ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines that help determine which pointers are captured.
+// A pointer value is captured if the function makes a copy of any part of the
+// pointer that outlives the call.  Not being captured means, more or less, that
+// the pointer is only dereferenced and not stored in a global.  Returning part
+// of the pointer as the function return value may or may not count as capturing
+// the pointer, depending on the context.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Instructions.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CallSite.h"
+using namespace llvm;
+
+/// As its comment mentions, PointerMayBeCaptured can be expensive.
+/// However, it's not easy for BasicAA to cache the result, because
+/// it's an ImmutablePass. To work around this, bound queries at a
+/// fixed number of uses.
+///
+/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep
+/// a cache. Then we can move the code from BasicAliasAnalysis into
+/// that path, and remove this threshold.
+static int const Threshold = 20;
+
+/// PointerMayBeCaptured - Return true if this pointer value may be captured
+/// by the enclosing function (which is required to exist).  This routine can
+/// be expensive, so consider caching the results.  The boolean ReturnCaptures
+/// specifies whether returning the value (or part of it) from the function
+/// counts as capturing it or not.  The boolean StoreCaptures specified whether
+/// storing the value (or part of it) into memory anywhere automatically
+/// counts as capturing it or not.
+bool llvm::PointerMayBeCaptured(const Value *V,
+                                bool ReturnCaptures, bool StoreCaptures) {
+  assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
+  SmallVector<Use*, Threshold> Worklist;
+  SmallSet<Use*, Threshold> Visited;
+  int Count = 0;
+
+  for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+       UI != UE; ++UI) {
+    // If there are lots of uses, conservatively say that the value
+    // is captured to avoid taking too much compile time.
+    if (Count++ >= Threshold)
+      return true;
+
+    Use *U = &UI.getUse();
+    Visited.insert(U);
+    Worklist.push_back(U);
+  }
+
+  while (!Worklist.empty()) {
+    Use *U = Worklist.pop_back_val();
+    Instruction *I = cast<Instruction>(U->getUser());
+    V = U->get();
+
+    switch (I->getOpcode()) {
+    case Instruction::Call:
+    case Instruction::Invoke: {
+      CallSite CS(I);
+      // Not captured if the callee is readonly, doesn't return a copy through
+      // its return value and doesn't unwind (a readonly function can leak bits
+      // by throwing an exception or not depending on the input value).
+      if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy())
+        break;
+
+      // Not captured if only passed via 'nocapture' arguments.  Note that
+      // calling a function pointer does not in itself cause the pointer to
+      // be captured.  This is a subtle point considering that (for example)
+      // the callee might return its own address.  It is analogous to saying
+      // that loading a value from a pointer does not cause the pointer to be
+      // captured, even though the loaded value might be the pointer itself
+      // (think of self-referential objects).
+      CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end();
+      for (CallSite::arg_iterator A = B; A != E; ++A)
+        if (A->get() == V && !CS.paramHasAttr(A - B + 1, Attribute::NoCapture))
+          // The parameter is not marked 'nocapture' - captured.
+          return true;
+      // Only passed via 'nocapture' arguments, or is the called function - not
+      // captured.
+      break;
+    }
+    case Instruction::Load:
+      // Loading from a pointer does not cause it to be captured.
+      break;
+    case Instruction::VAArg:
+      // "va-arg" from a pointer does not cause it to be captured.
+      break;
+    case Instruction::Ret:
+      if (ReturnCaptures)
+        return true;
+      break;
+    case Instruction::Store:
+      if (V == I->getOperand(0))
+        // Stored the pointer - conservatively assume it may be captured.
+        // TODO: If StoreCaptures is not true, we could do Fancy analysis
+        // to determine whether this store is not actually an escape point.
+        // In that case, BasicAliasAnalysis should be updated as well to
+        // take advantage of this.
+        return true;
+      // Storing to the pointee does not cause the pointer to be captured.
+      break;
+    case Instruction::BitCast:
+    case Instruction::GetElementPtr:
+    case Instruction::PHI:
+    case Instruction::Select:
+      // The original value is not captured via this if the new value isn't.
+      for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end();
+           UI != UE; ++UI) {
+        Use *U = &UI.getUse();
+        if (Visited.insert(U))
+          Worklist.push_back(U);
+      }
+      break;
+    case Instruction::ICmp:
+      // Don't count comparisons of a no-alias return value against null as
+      // captures. This allows us to ignore comparisons of malloc results
+      // with null, for example.
+      if (isNoAliasCall(V->stripPointerCasts()))
+        if (ConstantPointerNull *CPN =
+              dyn_cast<ConstantPointerNull>(I->getOperand(1)))
+          if (CPN->getType()->getAddressSpace() == 0)
+            break;
+      // Otherwise, be conservative. There are crazy ways to capture pointers
+      // using comparisons.
+      return true;
+    default:
+      // Something else - be conservative and say it is captured.
+      return true;
+    }
+  }
+
+  // All uses examined - not captured.
+  return false;
+}
diff --git a/final/lib/Analysis/ConstantFolding.cpp b/final/lib/Analysis/ConstantFolding.cpp
new file mode 100644
index 00000000000..cd8d52c1c46
--- /dev/null
+++ b/final/lib/Analysis/ConstantFolding.cpp
@@ -0,0 +1,1400 @@
+//===-- ConstantFolding.cpp - Fold instructions into constants ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines routines for folding instructions into constants.
+//
+// Also, to supplement the basic VMCore ConstantExpr simplifications,
+// this file defines some additional folding routines that can make use of
+// TargetData information. These functions cannot go in VMCore due to library
+// dependency issues.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/FEnv.h"
+#include <cerrno>
+#include <cmath>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Constant Folding internal helper functions
+//===----------------------------------------------------------------------===//
+
+/// FoldBitCast - Constant fold bitcast, symbolically evaluating it with 
+/// TargetData.  This always returns a non-null constant, but it may be a
+/// ConstantExpr if unfoldable.
+static Constant *FoldBitCast(Constant *C, const Type *DestTy,
+                             const TargetData &TD) {
+  
+  // This only handles casts to vectors currently.
+  const VectorType *DestVTy = dyn_cast<VectorType>(DestTy);
+  if (DestVTy == 0)
+    return ConstantExpr::getBitCast(C, DestTy);
+  
+  // If this is a scalar -> vector cast, convert the input into a <1 x scalar>
+  // vector so the code below can handle it uniformly.
+  if (isa<ConstantFP>(C) || isa<ConstantInt>(C)) {
+    Constant *Ops = C; // don't take the address of C!
+    return FoldBitCast(ConstantVector::get(Ops), DestTy, TD);
+  }
+  
+  // If this is a bitcast from constant vector -> vector, fold it.
+  ConstantVector *CV = dyn_cast<ConstantVector>(C);
+  if (CV == 0)
+    return ConstantExpr::getBitCast(C, DestTy);
+  
+  // If the element types match, VMCore can fold it.
+  unsigned NumDstElt = DestVTy->getNumElements();
+  unsigned NumSrcElt = CV->getNumOperands();
+  if (NumDstElt == NumSrcElt)
+    return ConstantExpr::getBitCast(C, DestTy);
+  
+  const Type *SrcEltTy = CV->getType()->getElementType();
+  const Type *DstEltTy = DestVTy->getElementType();
+  
+  // Otherwise, we're changing the number of elements in a vector, which 
+  // requires endianness information to do the right thing.  For example,
+  //    bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+  // folds to (little endian):
+  //    <4 x i32> <i32 0, i32 0, i32 1, i32 0>
+  // and to (big endian):
+  //    <4 x i32> <i32 0, i32 0, i32 0, i32 1>
+  
+  // First thing is first.  We only want to think about integer here, so if
+  // we have something in FP form, recast it as integer.
+  if (DstEltTy->isFloatingPointTy()) {
+    // Fold to an vector of integers with same size as our FP type.
+    unsigned FPWidth = DstEltTy->getPrimitiveSizeInBits();
+    const Type *DestIVTy =
+      VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumDstElt);
+    // Recursively handle this integer conversion, if possible.
+    C = FoldBitCast(C, DestIVTy, TD);
+    if (!C) return ConstantExpr::getBitCast(C, DestTy);
+    
+    // Finally, VMCore can handle this now that #elts line up.
+    return ConstantExpr::getBitCast(C, DestTy);
+  }
+  
+  // Okay, we know the destination is integer, if the input is FP, convert
+  // it to integer first.
+  if (SrcEltTy->isFloatingPointTy()) {
+    unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+    const Type *SrcIVTy =
+      VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt);
+    // Ask VMCore to do the conversion now that #elts line up.
+    C = ConstantExpr::getBitCast(C, SrcIVTy);
+    CV = dyn_cast<ConstantVector>(C);
+    if (!CV)  // If VMCore wasn't able to fold it, bail out.
+      return C;
+  }
+  
+  // Now we know that the input and output vectors are both integer vectors
+  // of the same size, and that their #elements is not the same.  Do the
+  // conversion here, which depends on whether the input or output has
+  // more elements.
+  bool isLittleEndian = TD.isLittleEndian();
+  
+  SmallVector<Constant*, 32> Result;
+  if (NumDstElt < NumSrcElt) {
+    // Handle: bitcast (<4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>)
+    Constant *Zero = Constant::getNullValue(DstEltTy);
+    unsigned Ratio = NumSrcElt/NumDstElt;
+    unsigned SrcBitSize = SrcEltTy->getPrimitiveSizeInBits();
+    unsigned SrcElt = 0;
+    for (unsigned i = 0; i != NumDstElt; ++i) {
+      // Build each element of the result.
+      Constant *Elt = Zero;
+      unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
+      for (unsigned j = 0; j != Ratio; ++j) {
+        Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(SrcElt++));
+        if (!Src)  // Reject constantexpr elements.
+          return ConstantExpr::getBitCast(C, DestTy);
+        
+        // Zero extend the element to the right size.
+        Src = ConstantExpr::getZExt(Src, Elt->getType());
+        
+        // Shift it to the right place, depending on endianness.
+        Src = ConstantExpr::getShl(Src, 
+                                   ConstantInt::get(Src->getType(), ShiftAmt));
+        ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
+        
+        // Mix it in.
+        Elt = ConstantExpr::getOr(Elt, Src);
+      }
+      Result.push_back(Elt);
+    }
+  } else {
+    // Handle: bitcast (<2 x i64> <i64 0, i64 1> to <4 x i32>)
+    unsigned Ratio = NumDstElt/NumSrcElt;
+    unsigned DstBitSize = DstEltTy->getPrimitiveSizeInBits();
+    
+    // Loop over each source value, expanding into multiple results.
+    for (unsigned i = 0; i != NumSrcElt; ++i) {
+      Constant *Src = dyn_cast<ConstantInt>(CV->getOperand(i));
+      if (!Src)  // Reject constantexpr elements.
+        return ConstantExpr::getBitCast(C, DestTy);
+      
+      unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
+      for (unsigned j = 0; j != Ratio; ++j) {
+        // Shift the piece of the value into the right place, depending on
+        // endianness.
+        Constant *Elt = ConstantExpr::getLShr(Src, 
+                                    ConstantInt::get(Src->getType(), ShiftAmt));
+        ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
+        
+        // Truncate and remember this piece.
+        Result.push_back(ConstantExpr::getTrunc(Elt, DstEltTy));
+      }
+    }
+  }
+  
+  return ConstantVector::get(Result);
+}
+
+
+/// IsConstantOffsetFromGlobal - If this constant is actually a constant offset
+/// from a global, return the global and the constant.  Because of
+/// constantexprs, this function is recursive.
+static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
+                                       int64_t &Offset, const TargetData &TD) {
+  // Trivial case, constant is the global.
+  if ((GV = dyn_cast<GlobalValue>(C))) {
+    Offset = 0;
+    return true;
+  }
+  
+  // Otherwise, if this isn't a constant expr, bail out.
+  ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+  if (!CE) return false;
+  
+  // Look through ptr->int and ptr->ptr casts.
+  if (CE->getOpcode() == Instruction::PtrToInt ||
+      CE->getOpcode() == Instruction::BitCast)
+    return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD);
+  
+  // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5)    
+  if (CE->getOpcode() == Instruction::GetElementPtr) {
+    // Cannot compute this if the element type of the pointer is missing size
+    // info.
+    if (!cast<PointerType>(CE->getOperand(0)->getType())
+                 ->getElementType()->isSized())
+      return false;
+    
+    // If the base isn't a global+constant, we aren't either.
+    if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD))
+      return false;
+    
+    // Otherwise, add any offset that our operands provide.
+    gep_type_iterator GTI = gep_type_begin(CE);
+    for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end();
+         i != e; ++i, ++GTI) {
+      ConstantInt *CI = dyn_cast<ConstantInt>(*i);
+      if (!CI) return false;  // Index isn't a simple constant?
+      if (CI->isZero()) continue;  // Not adding anything.
+      
+      if (const StructType *ST = dyn_cast<StructType>(*GTI)) {
+        // N = N + Offset
+        Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue());
+      } else {
+        const SequentialType *SQT = cast<SequentialType>(*GTI);
+        Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue();
+      }
+    }
+    return true;
+  }
+  
+  return false;
+}
+
+/// ReadDataFromGlobal - Recursive helper to read bits out of global.  C is the
+/// constant being copied out of. ByteOffset is an offset into C.  CurPtr is the
+/// pointer to copy results into and BytesLeft is the number of bytes left in
+/// the CurPtr buffer.  TD is the target data.
+static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset,
+                               unsigned char *CurPtr, unsigned BytesLeft,
+                               const TargetData &TD) {
+  assert(ByteOffset <= TD.getTypeAllocSize(C->getType()) &&
+         "Out of range access");
+  
+  // If this element is zero or undefined, we can just return since *CurPtr is
+  // zero initialized.
+  if (isa<ConstantAggregateZero>(C) || isa<UndefValue>(C))
+    return true;
+  
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+    if (CI->getBitWidth() > 64 ||
+        (CI->getBitWidth() & 7) != 0)
+      return false;
+    
+    uint64_t Val = CI->getZExtValue();
+    unsigned IntBytes = unsigned(CI->getBitWidth()/8);
+    
+    for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
+      CurPtr[i] = (unsigned char)(Val >> (ByteOffset * 8));
+      ++ByteOffset;
+    }
+    return true;
+  }
+  
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+    if (CFP->getType()->isDoubleTy()) {
+      C = FoldBitCast(C, Type::getInt64Ty(C->getContext()), TD);
+      return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+    }
+    if (CFP->getType()->isFloatTy()){
+      C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD);
+      return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD);
+    }
+    return false;
+  }
+
+  if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+    const StructLayout *SL = TD.getStructLayout(CS->getType());
+    unsigned Index = SL->getElementContainingOffset(ByteOffset);
+    uint64_t CurEltOffset = SL->getElementOffset(Index);
+    ByteOffset -= CurEltOffset;
+    
+    while (1) {
+      // If the element access is to the element itself and not to tail padding,
+      // read the bytes from the element.
+      uint64_t EltSize = TD.getTypeAllocSize(CS->getOperand(Index)->getType());
+
+      if (ByteOffset < EltSize &&
+          !ReadDataFromGlobal(CS->getOperand(Index), ByteOffset, CurPtr,
+                              BytesLeft, TD))
+        return false;
+      
+      ++Index;
+      
+      // Check to see if we read from the last struct element, if so we're done.
+      if (Index == CS->getType()->getNumElements())
+        return true;
+
+      // If we read all of the bytes we needed from this element we're done.
+      uint64_t NextEltOffset = SL->getElementOffset(Index);
+
+      if (BytesLeft <= NextEltOffset-CurEltOffset-ByteOffset)
+        return true;
+
+      // Move to the next element of the struct.
+      CurPtr += NextEltOffset-CurEltOffset-ByteOffset;
+      BytesLeft -= NextEltOffset-CurEltOffset-ByteOffset;
+      ByteOffset = 0;
+      CurEltOffset = NextEltOffset;
+    }
+    // not reached.
+  }
+
+  if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
+    uint64_t EltSize = TD.getTypeAllocSize(CA->getType()->getElementType());
+    uint64_t Index = ByteOffset / EltSize;
+    uint64_t Offset = ByteOffset - Index * EltSize;
+    for (; Index != CA->getType()->getNumElements(); ++Index) {
+      if (!ReadDataFromGlobal(CA->getOperand(Index), Offset, CurPtr,
+                              BytesLeft, TD))
+        return false;
+      if (EltSize >= BytesLeft)
+        return true;
+      
+      Offset = 0;
+      BytesLeft -= EltSize;
+      CurPtr += EltSize;
+    }
+    return true;
+  }
+  
+  if (ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
+    uint64_t EltSize = TD.getTypeAllocSize(CV->getType()->getElementType());
+    uint64_t Index = ByteOffset / EltSize;
+    uint64_t Offset = ByteOffset - Index * EltSize;
+    for (; Index != CV->getType()->getNumElements(); ++Index) {
+      if (!ReadDataFromGlobal(CV->getOperand(Index), Offset, CurPtr,
+                              BytesLeft, TD))
+        return false;
+      if (EltSize >= BytesLeft)
+        return true;
+      
+      Offset = 0;
+      BytesLeft -= EltSize;
+      CurPtr += EltSize;
+    }
+    return true;
+  }
+  
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    if (CE->getOpcode() == Instruction::IntToPtr &&
+        CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext())) 
+        return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, 
+                                  BytesLeft, TD);
+  }
+
+  // Otherwise, unknown initializer type.
+  return false;
+}
+
+static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
+                                                 const TargetData &TD) {
+  const Type *LoadTy = cast<PointerType>(C->getType())->getElementType();
+  const IntegerType *IntType = dyn_cast<IntegerType>(LoadTy);
+  
+  // If this isn't an integer load we can't fold it directly.
+  if (!IntType) {
+    // If this is a float/double load, we can try folding it as an int32/64 load
+    // and then bitcast the result.  This can be useful for union cases.  Note
+    // that address spaces don't matter here since we're not going to result in
+    // an actual new load.
+    const Type *MapTy;
+    if (LoadTy->isFloatTy())
+      MapTy = Type::getInt32PtrTy(C->getContext());
+    else if (LoadTy->isDoubleTy())
+      MapTy = Type::getInt64PtrTy(C->getContext());
+    else if (LoadTy->isVectorTy()) {
+      MapTy = IntegerType::get(C->getContext(),
+                               TD.getTypeAllocSizeInBits(LoadTy));
+      MapTy = PointerType::getUnqual(MapTy);
+    } else
+      return 0;
+
+    C = FoldBitCast(C, MapTy, TD);
+    if (Constant *Res = FoldReinterpretLoadFromConstPtr(C, TD))
+      return FoldBitCast(Res, LoadTy, TD);
+    return 0;
+  }
+  
+  unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8;
+  if (BytesLoaded > 32 || BytesLoaded == 0) return 0;
+  
+  GlobalValue *GVal;
+  int64_t Offset;
+  if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD))
+    return 0;
+  
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(GVal);
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+      !GV->getInitializer()->getType()->isSized())
+    return 0;
+
+  // If we're loading off the beginning of the global, some bytes may be valid,
+  // but we don't try to handle this.
+  if (Offset < 0) return 0;
+  
+  // If we're not accessing anything in this constant, the result is undefined.
+  if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType()))
+    return UndefValue::get(IntType);
+  
+  unsigned char RawBytes[32] = {0};
+  if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes,
+                          BytesLoaded, TD))
+    return 0;
+
+  APInt ResultVal = APInt(IntType->getBitWidth(), RawBytes[BytesLoaded-1]);
+  for (unsigned i = 1; i != BytesLoaded; ++i) {
+    ResultVal <<= 8;
+    ResultVal |= RawBytes[BytesLoaded-1-i];
+  }
+
+  return ConstantInt::get(IntType->getContext(), ResultVal);
+}
+
+/// ConstantFoldLoadFromConstPtr - Return the value that a load from C would
+/// produce if it is constant and determinable.  If this is not determinable,
+/// return null.
+Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
+                                             const TargetData *TD) {
+  // First, try the easy cases:
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+    if (GV->isConstant() && GV->hasDefinitiveInitializer())
+      return GV->getInitializer();
+
+  // If the loaded value isn't a constant expr, we can't handle it.
+  ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+  if (!CE) return 0;
+  
+  if (CE->getOpcode() == Instruction::GetElementPtr) {
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
+      if (GV->isConstant() && GV->hasDefinitiveInitializer())
+        if (Constant *V = 
+             ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE))
+          return V;
+  }
+  
+  // Instead of loading constant c string, use corresponding integer value
+  // directly if string length is small enough.
+  std::string Str;
+  if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) {
+    unsigned StrLen = Str.length();
+    const Type *Ty = cast<PointerType>(CE->getType())->getElementType();
+    unsigned NumBits = Ty->getPrimitiveSizeInBits();
+    // Replace load with immediate integer if the result is an integer or fp
+    // value.
+    if ((NumBits >> 3) == StrLen + 1 && (NumBits & 7) == 0 &&
+        (isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) {
+      APInt StrVal(NumBits, 0);
+      APInt SingleChar(NumBits, 0);
+      if (TD->isLittleEndian()) {
+        for (signed i = StrLen-1; i >= 0; i--) {
+          SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
+          StrVal = (StrVal << 8) | SingleChar;
+        }
+      } else {
+        for (unsigned i = 0; i < StrLen; i++) {
+          SingleChar = (uint64_t) Str[i] & UCHAR_MAX;
+          StrVal = (StrVal << 8) | SingleChar;
+        }
+        // Append NULL at the end.
+        SingleChar = 0;
+        StrVal = (StrVal << 8) | SingleChar;
+      }
+      
+      Constant *Res = ConstantInt::get(CE->getContext(), StrVal);
+      if (Ty->isFloatingPointTy())
+        Res = ConstantExpr::getBitCast(Res, Ty);
+      return Res;
+    }
+  }
+  
+  // If this load comes from anywhere in a constant global, and if the global
+  // is all undef or zero, we know what it loads.
+  if (GlobalVariable *GV =
+        dyn_cast<GlobalVariable>(GetUnderlyingObject(CE, TD))) {
+    if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
+      const Type *ResTy = cast<PointerType>(C->getType())->getElementType();
+      if (GV->getInitializer()->isNullValue())
+        return Constant::getNullValue(ResTy);
+      if (isa<UndefValue>(GV->getInitializer()))
+        return UndefValue::get(ResTy);
+    }
+  }
+  
+  // Try hard to fold loads from bitcasted strange and non-type-safe things.  We
+  // currently don't do any of this for big endian systems.  It can be
+  // generalized in the future if someone is interested.
+  if (TD && TD->isLittleEndian())
+    return FoldReinterpretLoadFromConstPtr(CE, *TD);
+  return 0;
+}
+
+static Constant *ConstantFoldLoadInst(const LoadInst *LI, const TargetData *TD){
+  if (LI->isVolatile()) return 0;
+  
+  if (Constant *C = dyn_cast<Constant>(LI->getOperand(0)))
+    return ConstantFoldLoadFromConstPtr(C, TD);
+
+  return 0;
+}
+
+/// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression.
+/// Attempt to symbolically evaluate the result of a binary operator merging
+/// these together.  If target data info is available, it is provided as TD, 
+/// otherwise TD is null.
+static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
+                                           Constant *Op1, const TargetData *TD){
+  // SROA
+  
+  // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
+  // Fold (lshr (or X, Y), 32) -> (lshr [X/Y], 32) if one doesn't contribute
+  // bits.
+  
+  
+  // If the constant expr is something like &A[123] - &A[4].f, fold this into a
+  // constant.  This happens frequently when iterating over a global array.
+  if (Opc == Instruction::Sub && TD) {
+    GlobalValue *GV1, *GV2;
+    int64_t Offs1, Offs2;
+    
+    if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD))
+      if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) &&
+          GV1 == GV2) {
+        // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow.
+        return ConstantInt::get(Op0->getType(), Offs1-Offs2);
+      }
+  }
+    
+  return 0;
+}
+
+/// CastGEPIndices - If array indices are not pointer-sized integers,
+/// explicitly cast them so that they aren't implicitly casted by the
+/// getelementptr.
+static Constant *CastGEPIndices(Constant *const *Ops, unsigned NumOps,
+                                const Type *ResultTy,
+                                const TargetData *TD) {
+  if (!TD) return 0;
+  const Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext());
+
+  bool Any = false;
+  SmallVector<Constant*, 32> NewIdxs;
+  for (unsigned i = 1; i != NumOps; ++i) {
+    if ((i == 1 ||
+         !isa<StructType>(GetElementPtrInst::getIndexedType(Ops[0]->getType(),
+                                        reinterpret_cast<Value *const *>(Ops+1),
+                                                            i-1))) &&
+        Ops[i]->getType() != IntPtrTy) {
+      Any = true;
+      NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
+                                                                      true,
+                                                                      IntPtrTy,
+                                                                      true),
+                                              Ops[i], IntPtrTy));
+    } else
+      NewIdxs.push_back(Ops[i]);
+  }
+  if (!Any) return 0;
+
+  Constant *C =
+    ConstantExpr::getGetElementPtr(Ops[0], &NewIdxs[0], NewIdxs.size());
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+      C = Folded;
+  return C;
+}
+
+/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
+/// constant expression, do so.
+static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
+                                         const Type *ResultTy,
+                                         const TargetData *TD) {
+  Constant *Ptr = Ops[0];
+  if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized())
+    return 0;
+  
+  const Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext());
+
+  // If this is a constant expr gep that is effectively computing an
+  // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12'
+  for (unsigned i = 1; i != NumOps; ++i)
+    if (!isa<ConstantInt>(Ops[i])) {
+      
+      // If this is "gep i8* Ptr, (sub 0, V)", fold this as:
+      // "inttoptr (sub (ptrtoint Ptr), V)"
+      if (NumOps == 2 &&
+          cast<PointerType>(ResultTy)->getElementType()->isIntegerTy(8)) {
+        ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[1]);
+        assert((CE == 0 || CE->getType() == IntPtrTy) &&
+               "CastGEPIndices didn't canonicalize index types!");
+        if (CE && CE->getOpcode() == Instruction::Sub &&
+            CE->getOperand(0)->isNullValue()) {
+          Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
+          Res = ConstantExpr::getSub(Res, CE->getOperand(1));
+          Res = ConstantExpr::getIntToPtr(Res, ResultTy);
+          if (ConstantExpr *ResCE = dyn_cast<ConstantExpr>(Res))
+            Res = ConstantFoldConstantExpression(ResCE, TD);
+          return Res;
+        }
+      }
+      return 0;
+    }
+  
+  unsigned BitWidth = TD->getTypeSizeInBits(IntPtrTy);
+  APInt Offset = APInt(BitWidth,
+                       TD->getIndexedOffset(Ptr->getType(),
+                                            (Value**)Ops+1, NumOps-1));
+  Ptr = cast<Constant>(Ptr->stripPointerCasts());
+
+  // If this is a GEP of a GEP, fold it all into a single GEP.
+  while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
+    SmallVector<Value *, 4> NestedOps(GEP->op_begin()+1, GEP->op_end());
+
+    // Do not try the incorporate the sub-GEP if some index is not a number.
+    bool AllConstantInt = true;
+    for (unsigned i = 0, e = NestedOps.size(); i != e; ++i)
+      if (!isa<ConstantInt>(NestedOps[i])) {
+        AllConstantInt = false;
+        break;
+      }
+    if (!AllConstantInt)
+      break;
+
+    Ptr = cast<Constant>(GEP->getOperand(0));
+    Offset += APInt(BitWidth,
+                    TD->getIndexedOffset(Ptr->getType(),
+                                         (Value**)NestedOps.data(),
+                                         NestedOps.size()));
+    Ptr = cast<Constant>(Ptr->stripPointerCasts());
+  }
+
+  // If the base value for this address is a literal integer value, fold the
+  // getelementptr to the resulting integer value casted to the pointer type.
+  APInt BasePtr(BitWidth, 0);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+    if (CE->getOpcode() == Instruction::IntToPtr)
+      if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0)))
+        BasePtr = Base->getValue().zextOrTrunc(BitWidth);
+  if (Ptr->isNullValue() || BasePtr != 0) {
+    Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr);
+    return ConstantExpr::getIntToPtr(C, ResultTy);
+  }
+
+  // Otherwise form a regular getelementptr. Recompute the indices so that
+  // we eliminate over-indexing of the notional static type array bounds.
+  // This makes it easy to determine if the getelementptr is "inbounds".
+  // Also, this helps GlobalOpt do SROA on GlobalVariables.
+  const Type *Ty = Ptr->getType();
+  SmallVector<Constant*, 32> NewIdxs;
+  do {
+    if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
+      if (ATy->isPointerTy()) {
+        // The only pointer indexing we'll do is on the first index of the GEP.
+        if (!NewIdxs.empty())
+          break;
+       
+        // Only handle pointers to sized types, not pointers to functions.
+        if (!ATy->getElementType()->isSized())
+          return 0;
+      }
+        
+      // Determine which element of the array the offset points into.
+      APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
+      const IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+      if (ElemSize == 0)
+        // The element size is 0. This may be [0 x Ty]*, so just use a zero
+        // index for this level and proceed to the next level to see if it can
+        // accommodate the offset.
+        NewIdxs.push_back(ConstantInt::get(IntPtrTy, 0));
+      else {
+        // The element size is non-zero divide the offset by the element
+        // size (rounding down), to compute the index at this level.
+        APInt NewIdx = Offset.udiv(ElemSize);
+        Offset -= NewIdx * ElemSize;
+        NewIdxs.push_back(ConstantInt::get(IntPtrTy, NewIdx));
+      }
+      Ty = ATy->getElementType();
+    } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+      // Determine which field of the struct the offset points into. The
+      // getZExtValue is at least as safe as the StructLayout API because we
+      // know the offset is within the struct at this point.
+      const StructLayout &SL = *TD->getStructLayout(STy);
+      unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue());
+      NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+                                         ElIdx));
+      Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx));
+      Ty = STy->getTypeAtIndex(ElIdx);
+    } else {
+      // We've reached some non-indexable type.
+      break;
+    }
+  } while (Ty != cast<PointerType>(ResultTy)->getElementType());
+
+  // If we haven't used up the entire offset by descending the static
+  // type, then the offset is pointing into the middle of an indivisible
+  // member, so we can't simplify it.
+  if (Offset != 0)
+    return 0;
+
+  // Create a GEP.
+  Constant *C =
+    ConstantExpr::getGetElementPtr(Ptr, &NewIdxs[0], NewIdxs.size());
+  assert(cast<PointerType>(C->getType())->getElementType() == Ty &&
+         "Computed GetElementPtr has unexpected type!");
+
+  // If we ended up indexing a member with a type that doesn't match
+  // the type of what the original indices indexed, add a cast.
+  if (Ty != cast<PointerType>(ResultTy)->getElementType())
+    C = FoldBitCast(C, ResultTy, *TD);
+
+  return C;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Constant Folding public APIs
+//===----------------------------------------------------------------------===//
+
+/// ConstantFoldInstruction - Try to constant fold the specified instruction.
+/// If successful, the constant result is returned, if not, null is returned.
+/// Note that this fails if not all of the operands are constant.  Otherwise,
+/// this function can only fail when attempting to fold instructions like loads
+/// and stores, which have no constant expression form.
+Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
+  // Handle PHI nodes quickly here...
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    Constant *CommonValue = 0;
+
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *Incoming = PN->getIncomingValue(i);
+      // If the incoming value is undef then skip it.  Note that while we could
+      // skip the value if it is equal to the phi node itself we choose not to
+      // because that would break the rule that constant folding only applies if
+      // all operands are constants.
+      if (isa<UndefValue>(Incoming))
+        continue;
+      // If the incoming value is not a constant, or is a different constant to
+      // the one we saw previously, then give up.
+      Constant *C = dyn_cast<Constant>(Incoming);
+      if (!C || (CommonValue && C != CommonValue))
+        return 0;
+      CommonValue = C;
+    }
+
+    // If we reach here, all incoming values are the same constant or undef.
+    return CommonValue ? CommonValue : UndefValue::get(PN->getType());
+  }
+
+  // Scan the operand list, checking to see if they are all constants, if so,
+  // hand off to ConstantFoldInstOperands.
+  SmallVector<Constant*, 8> Ops;
+  for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+    if (Constant *Op = dyn_cast<Constant>(*i))
+      Ops.push_back(Op);
+    else
+      return 0;  // All operands not constant!
+
+  if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+    return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
+                                           TD);
+  
+  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+    return ConstantFoldLoadInst(LI, TD);
+
+  if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I))
+    return ConstantExpr::getInsertValue(
+                                cast<Constant>(IVI->getAggregateOperand()),
+                                cast<Constant>(IVI->getInsertedValueOperand()),
+                                IVI->idx_begin(), IVI->getNumIndices());
+
+  if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I))
+    return ConstantExpr::getExtractValue(
+                                    cast<Constant>(EVI->getAggregateOperand()),
+                                    EVI->idx_begin(), EVI->getNumIndices());
+
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+                                  Ops.data(), Ops.size(), TD);
+}
+
+/// ConstantFoldConstantExpression - Attempt to fold the constant expression
+/// using the specified TargetData.  If successful, the constant result is
+/// result is returned, if not, null is returned.
+Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
+                                               const TargetData *TD) {
+  SmallVector<Constant*, 8> Ops;
+  for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
+       i != e; ++i) {
+    Constant *NewC = cast<Constant>(*i);
+    // Recursively fold the ConstantExpr's operands.
+    if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
+      NewC = ConstantFoldConstantExpression(NewCE, TD);
+    Ops.push_back(NewC);
+  }
+
+  if (CE->isCompare())
+    return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
+                                           TD);
+  return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(),
+                                  Ops.data(), Ops.size(), TD);
+}
+
+/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
+/// specified opcode and operands.  If successful, the constant result is
+/// returned, if not, null is returned.  Note that this function can fail when
+/// attempting to fold instructions like loads and stores, which have no
+/// constant expression form.
+///
+/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/etc
+/// information, due to only being passed an opcode and operands. Constant
+/// folding using this function strips this information.
+///
+Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, 
+                                         Constant* const* Ops, unsigned NumOps,
+                                         const TargetData *TD) {
+  // Handle easy binops first.
+  if (Instruction::isBinaryOp(Opcode)) {
+    if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
+      if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
+        return C;
+    
+    return ConstantExpr::get(Opcode, Ops[0], Ops[1]);
+  }
+  
+  switch (Opcode) {
+  default: return 0;
+  case Instruction::ICmp:
+  case Instruction::FCmp: assert(0 && "Invalid for compares");
+  case Instruction::Call:
+    if (Function *F = dyn_cast<Function>(Ops[NumOps - 1]))
+      if (canConstantFoldCallTo(F))
+        return ConstantFoldCall(F, Ops, NumOps - 1);
+    return 0;
+  case Instruction::PtrToInt:
+    // If the input is a inttoptr, eliminate the pair.  This requires knowing
+    // the width of a pointer, so it can't be done in ConstantExpr::getCast.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
+      if (TD && CE->getOpcode() == Instruction::IntToPtr) {
+        Constant *Input = CE->getOperand(0);
+        unsigned InWidth = Input->getType()->getScalarSizeInBits();
+        if (TD->getPointerSizeInBits() < InWidth) {
+          Constant *Mask = 
+            ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth,
+                                                  TD->getPointerSizeInBits()));
+          Input = ConstantExpr::getAnd(Input, Mask);
+        }
+        // Do a zext or trunc to get to the dest size.
+        return ConstantExpr::getIntegerCast(Input, DestTy, false);
+      }
+    }
+    return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+  case Instruction::IntToPtr:
+    // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
+    // the int size is >= the ptr size.  This requires knowing the width of a
+    // pointer, so it can't be done in ConstantExpr::getCast.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
+      if (TD &&
+          TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() &&
+          CE->getOpcode() == Instruction::PtrToInt)
+        return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+
+    return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+      return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
+  case Instruction::BitCast:
+    if (TD)
+      return FoldBitCast(Ops[0], DestTy, *TD);
+    return ConstantExpr::getBitCast(Ops[0], DestTy);
+  case Instruction::Select:
+    return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
+  case Instruction::ExtractElement:
+    return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+  case Instruction::InsertElement:
+    return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
+  case Instruction::ShuffleVector:
+    return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
+  case Instruction::GetElementPtr:
+    if (Constant *C = CastGEPIndices(Ops, NumOps, DestTy, TD))
+      return C;
+    if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD))
+      return C;
+    
+    return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1);
+  }
+}
+
+/// ConstantFoldCompareInstOperands - Attempt to constant fold a compare
+/// instruction (icmp/fcmp) with the specified operands.  If it fails, it
+/// returns a constant expression of the specified operands.
+///
+Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
+                                                Constant *Ops0, Constant *Ops1, 
+                                                const TargetData *TD) {
+  // fold: icmp (inttoptr x), null         -> icmp x, 0
+  // fold: icmp (ptrtoint x), 0            -> icmp x, null
+  // fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
+  // fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
+  //
+  // ConstantExpr::getCompare cannot do this, because it doesn't have TD
+  // around to know if bit truncation is happening.
+  if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
+    if (TD && Ops1->isNullValue()) {
+      const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
+      if (CE0->getOpcode() == Instruction::IntToPtr) {
+        // Convert the integer value to the right size to ensure we get the
+        // proper extension or truncation.
+        Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
+                                                   IntPtrTy, false);
+        Constant *Null = Constant::getNullValue(C->getType());
+        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
+      }
+      
+      // Only do this transformation if the int is intptrty in size, otherwise
+      // there is a truncation or extension that we aren't modeling.
+      if (CE0->getOpcode() == Instruction::PtrToInt && 
+          CE0->getType() == IntPtrTy) {
+        Constant *C = CE0->getOperand(0);
+        Constant *Null = Constant::getNullValue(C->getType());
+        return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
+      }
+    }
+    
+    if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
+      if (TD && CE0->getOpcode() == CE1->getOpcode()) {
+        const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
+
+        if (CE0->getOpcode() == Instruction::IntToPtr) {
+          // Convert the integer value to the right size to ensure we get the
+          // proper extension or truncation.
+          Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0),
+                                                      IntPtrTy, false);
+          Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
+                                                      IntPtrTy, false);
+          return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD);
+        }
+
+        // Only do this transformation if the int is intptrty in size, otherwise
+        // there is a truncation or extension that we aren't modeling.
+        if ((CE0->getOpcode() == Instruction::PtrToInt &&
+             CE0->getType() == IntPtrTy &&
+             CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
+          return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
+                                                 CE1->getOperand(0), TD);
+      }
+    }
+    
+    // icmp eq (or x, y), 0 -> (icmp eq x, 0) & (icmp eq y, 0)
+    // icmp ne (or x, y), 0 -> (icmp ne x, 0) | (icmp ne y, 0)
+    if ((Predicate == ICmpInst::ICMP_EQ || Predicate == ICmpInst::ICMP_NE) &&
+        CE0->getOpcode() == Instruction::Or && Ops1->isNullValue()) {
+      Constant *LHS = 
+        ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), Ops1,TD);
+      Constant *RHS = 
+        ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(1), Ops1,TD);
+      unsigned OpC = 
+        Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
+      Constant *Ops[] = { LHS, RHS };
+      return ConstantFoldInstOperands(OpC, LHS->getType(), Ops, 2, TD);
+    }
+  }
+  
+  return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
+}
+
+
+/// ConstantFoldLoadThroughGEPConstantExpr - Given a constant and a
+/// getelementptr constantexpr, return the constant value being addressed by the
+/// constant expression, or null if something is funny and we can't decide.
+Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, 
+                                                       ConstantExpr *CE) {
+  if (CE->getOperand(1) != Constant::getNullValue(CE->getOperand(1)->getType()))
+    return 0;  // Do not allow stepping over the value!
+  
+  // Loop over all of the operands, tracking down which value we are
+  // addressing...
+  gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
+  for (++I; I != E; ++I)
+    if (const StructType *STy = dyn_cast<StructType>(*I)) {
+      ConstantInt *CU = cast<ConstantInt>(I.getOperand());
+      assert(CU->getZExtValue() < STy->getNumElements() &&
+             "Struct index out of range!");
+      unsigned El = (unsigned)CU->getZExtValue();
+      if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+        C = CS->getOperand(El);
+      } else if (isa<ConstantAggregateZero>(C)) {
+        C = Constant::getNullValue(STy->getElementType(El));
+      } else if (isa<UndefValue>(C)) {
+        C = UndefValue::get(STy->getElementType(El));
+      } else {
+        return 0;
+      }
+    } else if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand())) {
+      if (const ArrayType *ATy = dyn_cast<ArrayType>(*I)) {
+        if (CI->getZExtValue() >= ATy->getNumElements())
+         return 0;
+        if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
+          C = CA->getOperand(CI->getZExtValue());
+        else if (isa<ConstantAggregateZero>(C))
+          C = Constant::getNullValue(ATy->getElementType());
+        else if (isa<UndefValue>(C))
+          C = UndefValue::get(ATy->getElementType());
+        else
+          return 0;
+      } else if (const VectorType *VTy = dyn_cast<VectorType>(*I)) {
+        if (CI->getZExtValue() >= VTy->getNumElements())
+          return 0;
+        if (ConstantVector *CP = dyn_cast<ConstantVector>(C))
+          C = CP->getOperand(CI->getZExtValue());
+        else if (isa<ConstantAggregateZero>(C))
+          C = Constant::getNullValue(VTy->getElementType());
+        else if (isa<UndefValue>(C))
+          C = UndefValue::get(VTy->getElementType());
+        else
+          return 0;
+      } else {
+        return 0;
+      }
+    } else {
+      return 0;
+    }
+  return C;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Constant Folding for Calls
+//
+
+/// canConstantFoldCallTo - Return true if its even possible to fold a call to
+/// the specified function.
+bool
+llvm::canConstantFoldCallTo(const Function *F) {
+  switch (F->getIntrinsicID()) {
+  case Intrinsic::sqrt:
+  case Intrinsic::powi:
+  case Intrinsic::bswap:
+  case Intrinsic::ctpop:
+  case Intrinsic::ctlz:
+  case Intrinsic::cttz:
+  case Intrinsic::uadd_with_overflow:
+  case Intrinsic::usub_with_overflow:
+  case Intrinsic::sadd_with_overflow:
+  case Intrinsic::ssub_with_overflow:
+  case Intrinsic::smul_with_overflow:
+  case Intrinsic::convert_from_fp16:
+  case Intrinsic::convert_to_fp16:
+  case Intrinsic::x86_sse_cvtss2si:
+  case Intrinsic::x86_sse_cvtss2si64:
+  case Intrinsic::x86_sse_cvttss2si:
+  case Intrinsic::x86_sse_cvttss2si64:
+  case Intrinsic::x86_sse2_cvtsd2si:
+  case Intrinsic::x86_sse2_cvtsd2si64:
+  case Intrinsic::x86_sse2_cvttsd2si:
+  case Intrinsic::x86_sse2_cvttsd2si64:
+    return true;
+  default:
+    return false;
+  case 0: break;
+  }
+
+  if (!F->hasName()) return false;
+  StringRef Name = F->getName();
+  
+  // In these cases, the check of the length is required.  We don't want to
+  // return true for a name like "cos\0blah" which strcmp would return equal to
+  // "cos", but has length 8.
+  switch (Name[0]) {
+  default: return false;
+  case 'a':
+    return Name == "acos" || Name == "asin" || 
+      Name == "atan" || Name == "atan2";
+  case 'c':
+    return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh";
+  case 'e':
+    return Name == "exp";
+  case 'f':
+    return Name == "fabs" || Name == "fmod" || Name == "floor";
+  case 'l':
+    return Name == "log" || Name == "log10";
+  case 'p':
+    return Name == "pow";
+  case 's':
+    return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
+      Name == "sinf" || Name == "sqrtf";
+  case 't':
+    return Name == "tan" || Name == "tanh";
+  }
+}
+
+static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, 
+                                const Type *Ty) {
+  sys::llvm_fenv_clearexcept();
+  V = NativeFP(V);
+  if (sys::llvm_fenv_testexcept()) {
+    sys::llvm_fenv_clearexcept();
+    return 0;
+  }
+  
+  if (Ty->isFloatTy())
+    return ConstantFP::get(Ty->getContext(), APFloat((float)V));
+  if (Ty->isDoubleTy())
+    return ConstantFP::get(Ty->getContext(), APFloat(V));
+  llvm_unreachable("Can only constant fold float/double");
+  return 0; // dummy return to suppress warning
+}
+
+static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
+                                      double V, double W, const Type *Ty) {
+  sys::llvm_fenv_clearexcept();
+  V = NativeFP(V, W);
+  if (sys::llvm_fenv_testexcept()) {
+    sys::llvm_fenv_clearexcept();
+    return 0;
+  }
+  
+  if (Ty->isFloatTy())
+    return ConstantFP::get(Ty->getContext(), APFloat((float)V));
+  if (Ty->isDoubleTy())
+    return ConstantFP::get(Ty->getContext(), APFloat(V));
+  llvm_unreachable("Can only constant fold float/double");
+  return 0; // dummy return to suppress warning
+}
+
+/// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer
+/// conversion of a constant floating point. If roundTowardZero is false, the
+/// default IEEE rounding is used (toward nearest, ties to even). This matches
+/// the behavior of the non-truncating SSE instructions in the default rounding
+/// mode. The desired integer type Ty is used to select how many bits are
+/// available for the result. Returns null if the conversion cannot be
+/// performed, otherwise returns the Constant value resulting from the
+/// conversion.
+static Constant *ConstantFoldConvertToInt(ConstantFP *Op, bool roundTowardZero,
+                                          const Type *Ty) {
+  assert(Op && "Called with NULL operand");
+  APFloat Val(Op->getValueAPF());
+
+  // All of these conversion intrinsics form an integer of at most 64bits.
+  unsigned ResultWidth = cast<IntegerType>(Ty)->getBitWidth();
+  assert(ResultWidth <= 64 &&
+         "Can only constant fold conversions to 64 and 32 bit ints");
+
+  uint64_t UIntVal;
+  bool isExact = false;
+  APFloat::roundingMode mode = roundTowardZero? APFloat::rmTowardZero
+                                              : APFloat::rmNearestTiesToEven;
+  APFloat::opStatus status = Val.convertToInteger(&UIntVal, ResultWidth,
+                                                  /*isSigned=*/true, mode,
+                                                  &isExact);
+  if (status != APFloat::opOK && status != APFloat::opInexact)
+    return 0;
+  return ConstantInt::get(Ty, UIntVal, /*isSigned=*/true);
+}
+
+/// ConstantFoldCall - Attempt to constant fold a call to the specified function
+/// with the specified arguments, returning null if unsuccessful.
+Constant *
+llvm::ConstantFoldCall(Function *F, 
+                       Constant *const *Operands, unsigned NumOperands) {
+  if (!F->hasName()) return 0;
+  StringRef Name = F->getName();
+
+  const Type *Ty = F->getReturnType();
+  if (NumOperands == 1) {
+    if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) {
+      if (F->getIntrinsicID() == Intrinsic::convert_to_fp16) {
+        APFloat Val(Op->getValueAPF());
+
+        bool lost = false;
+        Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost);
+
+        return ConstantInt::get(F->getContext(), Val.bitcastToAPInt());
+      }
+
+      if (!Ty->isFloatTy() && !Ty->isDoubleTy())
+        return 0;
+
+      /// We only fold functions with finite arguments. Folding NaN and inf is
+      /// likely to be aborted with an exception anyway, and some host libms
+      /// have known errors raising exceptions.
+      if (Op->getValueAPF().isNaN() || Op->getValueAPF().isInfinity())
+        return 0;
+
+      /// Currently APFloat versions of these functions do not exist, so we use
+      /// the host native double versions.  Float versions are not called
+      /// directly but for all these it is true (float)(f((double)arg)) ==
+      /// f(arg).  Long double not supported yet.
+      double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() :
+                                     Op->getValueAPF().convertToDouble();
+      switch (Name[0]) {
+      case 'a':
+        if (Name == "acos")
+          return ConstantFoldFP(acos, V, Ty);
+        else if (Name == "asin")
+          return ConstantFoldFP(asin, V, Ty);
+        else if (Name == "atan")
+          return ConstantFoldFP(atan, V, Ty);
+        break;
+      case 'c':
+        if (Name == "ceil")
+          return ConstantFoldFP(ceil, V, Ty);
+        else if (Name == "cos")
+          return ConstantFoldFP(cos, V, Ty);
+        else if (Name == "cosh")
+          return ConstantFoldFP(cosh, V, Ty);
+        else if (Name == "cosf")
+          return ConstantFoldFP(cos, V, Ty);
+        break;
+      case 'e':
+        if (Name == "exp")
+          return ConstantFoldFP(exp, V, Ty);
+        break;
+      case 'f':
+        if (Name == "fabs")
+          return ConstantFoldFP(fabs, V, Ty);
+        else if (Name == "floor")
+          return ConstantFoldFP(floor, V, Ty);
+        break;
+      case 'l':
+        if (Name == "log" && V > 0)
+          return ConstantFoldFP(log, V, Ty);
+        else if (Name == "log10" && V > 0)
+          return ConstantFoldFP(log10, V, Ty);
+        else if (F->getIntrinsicID() == Intrinsic::sqrt &&
+                 (Ty->isFloatTy() || Ty->isDoubleTy())) {
+          if (V >= -0.0)
+            return ConstantFoldFP(sqrt, V, Ty);
+          else // Undefined
+            return Constant::getNullValue(Ty);
+        }
+        break;
+      case 's':
+        if (Name == "sin")
+          return ConstantFoldFP(sin, V, Ty);
+        else if (Name == "sinh")
+          return ConstantFoldFP(sinh, V, Ty);
+        else if (Name == "sqrt" && V >= 0)
+          return ConstantFoldFP(sqrt, V, Ty);
+        else if (Name == "sqrtf" && V >= 0)
+          return ConstantFoldFP(sqrt, V, Ty);
+        else if (Name == "sinf")
+          return ConstantFoldFP(sin, V, Ty);
+        break;
+      case 't':
+        if (Name == "tan")
+          return ConstantFoldFP(tan, V, Ty);
+        else if (Name == "tanh")
+          return ConstantFoldFP(tanh, V, Ty);
+        break;
+      default:
+        break;
+      }
+      return 0;
+    }
+
+    if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
+      switch (F->getIntrinsicID()) {
+      case Intrinsic::bswap:
+        return ConstantInt::get(F->getContext(), Op->getValue().byteSwap());
+      case Intrinsic::ctpop:
+        return ConstantInt::get(Ty, Op->getValue().countPopulation());
+      case Intrinsic::cttz:
+        return ConstantInt::get(Ty, Op->getValue().countTrailingZeros());
+      case Intrinsic::ctlz:
+        return ConstantInt::get(Ty, Op->getValue().countLeadingZeros());
+      case Intrinsic::convert_from_fp16: {
+        APFloat Val(Op->getValue());
+
+        bool lost = false;
+        APFloat::opStatus status =
+          Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost);
+
+        // Conversion is always precise.
+        (void)status;
+        assert(status == APFloat::opOK && !lost &&
+               "Precision lost during fp16 constfolding");
+
+        return ConstantFP::get(F->getContext(), Val);
+      }
+      default:
+        return 0;
+      }
+    }
+
+    if (ConstantVector *Op = dyn_cast<ConstantVector>(Operands[0])) {
+      switch (F->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::x86_sse_cvtss2si:
+      case Intrinsic::x86_sse_cvtss2si64:
+      case Intrinsic::x86_sse2_cvtsd2si:
+      case Intrinsic::x86_sse2_cvtsd2si64:
+        if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
+          return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/false, Ty);
+      case Intrinsic::x86_sse_cvttss2si:
+      case Intrinsic::x86_sse_cvttss2si64:
+      case Intrinsic::x86_sse2_cvttsd2si:
+      case Intrinsic::x86_sse2_cvttsd2si64:
+        if (ConstantFP *FPOp = dyn_cast<ConstantFP>(Op->getOperand(0)))
+          return ConstantFoldConvertToInt(FPOp, /*roundTowardZero=*/true, Ty);
+      }
+    }
+
+    if (isa<UndefValue>(Operands[0])) {
+      if (F->getIntrinsicID() == Intrinsic::bswap)
+        return Operands[0];
+      return 0;
+    }
+
+    return 0;
+  }
+
+  if (NumOperands == 2) {
+    if (ConstantFP *Op1 = dyn_cast<ConstantFP>(Operands[0])) {
+      if (!Ty->isFloatTy() && !Ty->isDoubleTy())
+        return 0;
+      double Op1V = Ty->isFloatTy() ? 
+                      (double)Op1->getValueAPF().convertToFloat() :
+                      Op1->getValueAPF().convertToDouble();
+      if (ConstantFP *Op2 = dyn_cast<ConstantFP>(Operands[1])) {
+        if (Op2->getType() != Op1->getType())
+          return 0;
+        
+        double Op2V = Ty->isFloatTy() ? 
+                      (double)Op2->getValueAPF().convertToFloat():
+                      Op2->getValueAPF().convertToDouble();
+
+        if (Name == "pow")
+          return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
+        if (Name == "fmod")
+          return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
+        if (Name == "atan2")
+          return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
+      } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
+        if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy())
+          return ConstantFP::get(F->getContext(),
+                                 APFloat((float)std::pow((float)Op1V,
+                                                 (int)Op2C->getZExtValue())));
+        if (F->getIntrinsicID() == Intrinsic::powi && Ty->isDoubleTy())
+          return ConstantFP::get(F->getContext(),
+                                 APFloat((double)std::pow((double)Op1V,
+                                                   (int)Op2C->getZExtValue())));
+      }
+      return 0;
+    }
+    
+    
+    if (ConstantInt *Op1 = dyn_cast<ConstantInt>(Operands[0])) {
+      if (ConstantInt *Op2 = dyn_cast<ConstantInt>(Operands[1])) {
+        switch (F->getIntrinsicID()) {
+        default: break;
+        case Intrinsic::sadd_with_overflow:
+        case Intrinsic::uadd_with_overflow:
+        case Intrinsic::ssub_with_overflow:
+        case Intrinsic::usub_with_overflow:
+        case Intrinsic::smul_with_overflow: {
+          APInt Res;
+          bool Overflow;
+          switch (F->getIntrinsicID()) {
+          default: assert(0 && "Invalid case");
+          case Intrinsic::sadd_with_overflow:
+            Res = Op1->getValue().sadd_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::uadd_with_overflow:
+            Res = Op1->getValue().uadd_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::ssub_with_overflow:
+            Res = Op1->getValue().ssub_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::usub_with_overflow:
+            Res = Op1->getValue().usub_ov(Op2->getValue(), Overflow);
+            break;
+          case Intrinsic::smul_with_overflow:
+            Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow);
+            break;
+          }
+          Constant *Ops[] = {
+            ConstantInt::get(F->getContext(), Res),
+            ConstantInt::get(Type::getInt1Ty(F->getContext()), Overflow)
+          };
+          return ConstantStruct::get(F->getContext(), Ops, 2, false);
+        }
+        }
+      }
+      
+      return 0;
+    }
+    return 0;
+  }
+  return 0;
+}
diff --git a/final/lib/Analysis/DIBuilder.cpp b/final/lib/Analysis/DIBuilder.cpp
new file mode 100644
index 00000000000..766624f87ce
--- /dev/null
+++ b/final/lib/Analysis/DIBuilder.cpp
@@ -0,0 +1,801 @@
+//===--- DIBuilder.cpp - Debug Information Builder ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the DIBuilder.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Dwarf.h"
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) {
+  assert((Tag & LLVMDebugVersionMask) == 0 &&
+         "Tag too large for debug encoding!");
+  return ConstantInt::get(Type::getInt32Ty(VMContext), Tag | LLVMDebugVersion);
+}
+
+DIBuilder::DIBuilder(Module &m)
+  : M(m), VMContext(M.getContext()), TheCU(0), DeclareFn(0), ValueFn(0) {}
+
+/// createCompileUnit - A CompileUnit provides an anchor for all debugging
+/// information generated during this instance of compilation.
+void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, 
+                                  StringRef Directory, StringRef Producer, 
+                                  bool isOptimized, StringRef Flags, 
+                                  unsigned RunTimeVer) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_compile_unit),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Lang),
+    MDString::get(VMContext, Filename),
+    MDString::get(VMContext, Directory),
+    MDString::get(VMContext, Producer),
+    // Deprecate isMain field.
+    ConstantInt::get(Type::getInt1Ty(VMContext), true), // isMain
+    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+    MDString::get(VMContext, Flags),
+    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer)
+  };
+  TheCU = DICompileUnit(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createFile - Create a file descriptor to hold debugging information
+/// for a file.
+DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) {
+  assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit");
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_file_type),
+    MDString::get(VMContext, Filename),
+    MDString::get(VMContext, Directory),
+    TheCU
+  };
+  return DIFile(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createEnumerator - Create a single enumerator value.
+DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_enumerator),
+    MDString::get(VMContext, Name),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Val)
+  };
+  return DIEnumerator(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createBasicType - Create debugging information entry for a basic 
+/// type, e.g 'char'.
+DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, 
+                                  uint64_t AlignInBits,
+                                  unsigned Encoding) {
+  // Basic types are encoded in DIBasicType format. Line number, filename,
+  // offset and flags are always empty here.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_base_type),
+    TheCU,
+    MDString::get(VMContext, Name),
+    NULL, // Filename
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags;
+    ConstantInt::get(Type::getInt32Ty(VMContext), Encoding)
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createQaulifiedType - Create debugging information entry for a qualified
+/// type, e.g. 'const int'.
+DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) {
+  // Qualified types are encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, Tag),
+    TheCU,
+    MDString::get(VMContext, StringRef()), // Empty name.
+    NULL, // Filename
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    FromTy
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createPointerType - Create debugging information entry for a pointer.
+DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits,
+                                    uint64_t AlignInBits, StringRef Name) {
+  // Pointer types are encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type),
+    TheCU,
+    MDString::get(VMContext, Name),
+    NULL, // Filename
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    PointeeTy
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createReferenceType - Create debugging information entry for a reference.
+DIType DIBuilder::createReferenceType(DIType RTy) {
+  // References are encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_reference_type),
+    TheCU,
+    NULL, // Name
+    NULL, // Filename
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    RTy
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createTypedef - Create debugging information entry for a typedef.
+DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File,
+                                unsigned LineNo) {
+  // typedefs are encoded in DIDerivedType format.
+  assert(Ty.Verify() && "Invalid typedef type!");
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_typedef),
+    Ty.getContext(),
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    Ty
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createFriend - Create debugging information entry for a 'friend'.
+DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) {
+  // typedefs are encoded in DIDerivedType format.
+  assert(Ty.Verify() && "Invalid type!");
+  assert(FriendTy.Verify() && "Invalid friend type!");
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_friend),
+    Ty,
+    NULL, // Name
+    Ty.getFile(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags
+    FriendTy
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createInheritance - Create debugging information entry to establish
+/// inheritnace relationship between two types.
+DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, 
+                                    uint64_t BaseOffset, unsigned Flags) {
+  // TAG_inheritance is encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_inheritance),
+    Ty,
+    NULL, // Name
+    Ty.getFile(),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align
+    ConstantInt::get(Type::getInt64Ty(VMContext), BaseOffset),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    BaseTy
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createMemberType - Create debugging information entry for a member.
+DIType DIBuilder::createMemberType(StringRef Name, 
+                                   DIFile File, unsigned LineNumber, 
+                                   uint64_t SizeInBits, uint64_t AlignInBits,
+                                   uint64_t OffsetInBits, unsigned Flags, 
+                                   DIType Ty) {
+  // TAG_member is encoded in DIDerivedType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_member),
+    File, // Or TheCU ? Ty ?
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    Ty
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createClassType - Create debugging information entry for a class.
+DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, 
+                                  DIFile File, unsigned LineNumber, 
+                                  uint64_t SizeInBits, uint64_t AlignInBits,
+                                  uint64_t OffsetInBits, unsigned Flags,
+                                  DIType DerivedFrom, DIArray Elements,
+                                  MDNode *VTableHoder, MDNode *TemplateParams) {
+ // TAG_class_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_class_type),
+    Context,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), OffsetInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    DerivedFrom,
+    Elements,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    VTableHoder,
+    TemplateParams
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createTemplateTypeParameter - Create debugging information for template
+/// type parameter.
+DITemplateTypeParameter 
+DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name,
+                                       DIType Ty, MDNode *File, unsigned LineNo,
+                                       unsigned ColumnNo) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter),
+    Context,
+    MDString::get(VMContext, Name),
+    Ty,
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
+  };
+  return DITemplateTypeParameter(MDNode::get(VMContext, &Elts[0], 
+                                             array_lengthof(Elts)));
+}
+
+/// createTemplateValueParameter - Create debugging information for template
+/// value parameter.
+DITemplateValueParameter 
+DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name,
+                                        DIType Ty, uint64_t Val,
+                                        MDNode *File, unsigned LineNo,
+                                        unsigned ColumnNo) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter),
+    Context,
+    MDString::get(VMContext, Name),
+    Ty,
+    ConstantInt::get(Type::getInt64Ty(VMContext), Val),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo)
+  };
+  return DITemplateValueParameter(MDNode::get(VMContext, &Elts[0], 
+                                              array_lengthof(Elts)));
+}
+
+/// createStructType - Create debugging information entry for a struct.
+DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, 
+                                   DIFile File, unsigned LineNumber, 
+                                   uint64_t SizeInBits, uint64_t AlignInBits,
+                                   unsigned Flags, DIArray Elements, 
+                                   unsigned RunTimeLang) {
+ // TAG_structure_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_structure_type),
+    Context,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Elements,
+    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createUnionType - Create debugging information entry for an union.
+DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, 
+                                  DIFile File,
+                                  unsigned LineNumber, uint64_t SizeInBits,
+                                  uint64_t AlignInBits, unsigned Flags,
+                                  DIArray Elements, unsigned RunTimeLang) {
+  // TAG_union_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_union_type),
+    Scope,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Elements,
+    ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createSubroutineType - Create subroutine type.
+DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) {
+  // TAG_subroutine_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type),
+    File,
+    MDString::get(VMContext, ""),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    ParameterTypes,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createEnumerationType - Create debugging information entry for an 
+/// enumeration.
+DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, 
+                                        DIFile File, unsigned LineNumber, 
+                                        uint64_t SizeInBits, 
+                                        uint64_t AlignInBits, DIArray Elements) {
+  // TAG_enumeration_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type),
+    Scope,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Elements,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum");
+  NMD->addOperand(Node);
+  return DIType(Node);
+}
+
+/// createArrayType - Create debugging information entry for an array.
+DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, 
+                                  DIType Ty, DIArray Subscripts) {
+  // TAG_array_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_array_type),
+    TheCU,
+    MDString::get(VMContext, ""),
+    TheCU,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Size),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    Ty,
+    Subscripts,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createVectorType - Create debugging information entry for a vector.
+DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, 
+                                   DIType Ty, DIArray Subscripts) {
+  // TAG_vector_type is encoded in DICompositeType format.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_vector_type),
+    TheCU,
+    MDString::get(VMContext, ""),
+    TheCU,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Size),
+    ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    Ty,
+    Subscripts,
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+  };
+  return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// createArtificialType - Create a new DIType with "artificial" flag set.
+DIType DIBuilder::createArtificialType(DIType Ty) {
+  if (Ty.isArtificial())
+    return Ty;
+
+  SmallVector<Value *, 9> Elts;
+  MDNode *N = Ty;
+  assert (N && "Unexpected input DIType!");
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (Value *V = N->getOperand(i))
+      Elts.push_back(V);
+    else
+      Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext)));
+  }
+
+  unsigned CurFlags = Ty.getFlags();
+  CurFlags = CurFlags | DIType::FlagArtificial;
+
+  // Flags are stored at this slot.
+  Elts[8] =  ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags);
+
+  return DIType(MDNode::get(VMContext, Elts.data(), Elts.size()));
+}
+
+/// retainType - Retain DIType in a module even if it is not referenced 
+/// through debug info anchors.
+void DIBuilder::retainType(DIType T) {
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty");
+  NMD->addOperand(T);
+}
+
+/// createUnspecifiedParameter - Create unspeicified type descriptor
+/// for the subroutine type.
+DIDescriptor DIBuilder::createUnspecifiedParameter() {
+  Value *Elts[] = { 
+    GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) 
+  };
+  return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1));
+}
+
+/// createTemporaryType - Create a temporary forward-declared type.
+DIType DIBuilder::createTemporaryType() {
+  // Give the temporary MDNode a tag. It doesn't matter what tag we
+  // use here as long as DIType accepts it.
+  Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) };
+  MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts));
+  return DIType(Node);
+}
+
+/// createTemporaryType - Create a temporary forward-declared type.
+DIType DIBuilder::createTemporaryType(DIFile F) {
+  // Give the temporary MDNode a tag. It doesn't matter what tag we
+  // use here as long as DIType accepts it.
+  Value *Elts[] = {
+    GetTagConstant(VMContext, DW_TAG_base_type),
+    F.getCompileUnit(),
+    NULL,
+    F
+  };
+  MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts));
+  return DIType(Node);
+}
+
+/// getOrCreateArray - Get a DIArray, create one if required.
+DIArray DIBuilder::getOrCreateArray(Value *const *Elements, unsigned NumElements) {
+  if (NumElements == 0) {
+    Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext));
+    return DIArray(MDNode::get(VMContext, &Null, 1));
+  }
+  return DIArray(MDNode::get(VMContext, Elements, NumElements));
+}
+
+/// getOrCreateSubrange - Create a descriptor for a value range.  This
+/// implicitly uniques the values returned.
+DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Lo),
+    ConstantInt::get(Type::getInt64Ty(VMContext), Hi)
+  };
+
+  return DISubrange(MDNode::get(VMContext, &Elts[0], 3));
+}
+
+/// createGlobalVariable - Create a new descriptor for the specified global.
+DIGlobalVariable DIBuilder::
+createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, 
+                     DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_variable),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    TheCU,
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    F,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    Ty,
+    ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
+    Val
+  };
+  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  // Create a named metadata so that we do not lose this mdnode.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+  NMD->addOperand(Node);
+  return DIGlobalVariable(Node);
+}
+
+/// createStaticVariable - Create a new descriptor for the specified static
+/// variable.
+DIGlobalVariable DIBuilder::
+createStaticVariable(DIDescriptor Context, StringRef Name, 
+                     StringRef LinkageName, DIFile F, unsigned LineNumber, 
+                     DIType Ty, bool isLocalToUnit, llvm::Value *Val) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_variable),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Context,
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, LinkageName),
+    F,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
+    Ty,
+    ConstantInt::get(Type::getInt32Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/
+    Val
+  };
+  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  // Create a named metadata so that we do not lose this mdnode.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+  NMD->addOperand(Node);
+  return DIGlobalVariable(Node);
+}
+
+/// createVariable - Create a new descriptor for the specified variable.
+DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope,
+                                          StringRef Name, DIFile File,
+                                          unsigned LineNo, DIType Ty, 
+                                          bool AlwaysPreserve, unsigned Flags,
+                                          unsigned ArgNo) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, Tag),
+    Scope,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))),
+    Ty,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags)
+  };
+  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+  if (AlwaysPreserve) {
+    // The optimizer may remove local variable. If there is an interest
+    // to preserve variable info in such situation then stash it in a
+    // named mdnode.
+    DISubprogram Fn(getDISubprogram(Scope));
+    StringRef FName = "fn";
+    if (Fn.getFunction())
+      FName = Fn.getFunction()->getName();
+    char One = '\1';
+    if (FName.startswith(StringRef(&One, 1)))
+      FName = FName.substr(1);
+    NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName);
+    FnLocals->addOperand(Node);
+  }
+  return DIVariable(Node);
+}
+
+/// createComplexVariable - Create a new descriptor for the specified variable
+/// which has a complex address expression for its address.
+DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope,
+                                            StringRef Name, DIFile F,
+                                            unsigned LineNo,
+                                            DIType Ty, Value *const *Addr,
+                                            unsigned NumAddr, unsigned ArgNo) {
+  SmallVector<Value *, 15> Elts;
+  Elts.push_back(GetTagConstant(VMContext, Tag));
+  Elts.push_back(Scope);
+  Elts.push_back(MDString::get(VMContext, Name));
+  Elts.push_back(F);
+  Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))));
+  Elts.push_back(Ty);
+  Elts.append(Addr, Addr+NumAddr);
+
+  return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size()));
+}
+
+/// createFunction - Create a new descriptor for the specified function.
+DISubprogram DIBuilder::createFunction(DIDescriptor Context,
+                                       StringRef Name,
+                                       StringRef LinkageName,
+                                       DIFile File, unsigned LineNo,
+                                       DIType Ty,
+                                       bool isLocalToUnit, bool isDefinition,
+                                       unsigned Flags, bool isOptimized,
+                                       Function *Fn) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Context,
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, LinkageName),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    Ty,
+    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    ConstantInt::get(Type::getInt32Ty(VMContext), 0),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+    Fn
+  };
+  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+
+  // Create a named metadata so that we do not lose this mdnode.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+  NMD->addOperand(Node);
+  return DISubprogram(Node);
+}
+
+/// createMethod - Create a new descriptor for the specified C++ method.
+DISubprogram DIBuilder::createMethod(DIDescriptor Context,
+                                     StringRef Name,
+                                     StringRef LinkageName,
+                                     DIFile F,
+                                     unsigned LineNo, DIType Ty,
+                                     bool isLocalToUnit,
+                                     bool isDefinition,
+                                     unsigned VK, unsigned VIndex,
+                                     MDNode *VTableHolder,
+                                     unsigned Flags,
+                                     bool isOptimized,
+                                     Function *Fn) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_subprogram),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    Context,
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, Name),
+    MDString::get(VMContext, LinkageName),
+    F,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+    Ty,
+    ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+    ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
+    ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
+    VTableHolder,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
+    ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+    Fn
+  };
+  MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts));
+
+  // Create a named metadata so that we do not lose this mdnode.
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+  NMD->addOperand(Node);
+  return DISubprogram(Node);
+}
+
+/// createNameSpace - This creates new descriptor for a namespace
+/// with the specified parent scope.
+DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name,
+                                       DIFile File, unsigned LineNo) {
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_namespace),
+    Scope,
+    MDString::get(VMContext, Name),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
+  };
+  return DINameSpace(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File,
+                                             unsigned Line, unsigned Col) {
+  // Defeat MDNode uniqing for lexical blocks by using unique id.
+  static unsigned int unique_id = 0;
+  Value *Elts[] = {
+    GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block),
+    Scope,
+    ConstantInt::get(Type::getInt32Ty(VMContext), Line),
+    ConstantInt::get(Type::getInt32Ty(VMContext), Col),
+    File,
+    ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++)
+  };
+  return DILexicalBlock(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)));
+}
+
+/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
+                                      Instruction *InsertBefore) {
+  assert(Storage && "no storage passed to dbg.declare");
+  assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare");
+  if (!DeclareFn)
+    DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+  Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo };
+  return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
+}
+
+/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
+Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
+                                      BasicBlock *InsertAtEnd) {
+  assert(Storage && "no storage passed to dbg.declare");
+  assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare");
+  if (!DeclareFn)
+    DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
+
+  Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo };
+
+  // If this block already has a terminator then insert this intrinsic
+  // before the terminator.
+  if (TerminatorInst *T = InsertAtEnd->getTerminator())
+    return CallInst::Create(DeclareFn, Args, Args+2, "", T);
+  else
+    return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
+}
+
+/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
+                                                DIVariable VarInfo,
+                                                Instruction *InsertBefore) {
+  assert(V && "no value passed to dbg.value");
+  assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
+  if (!ValueFn)
+    ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+  Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
+                    ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+                    VarInfo };
+  return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore);
+}
+
+/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
+                                                DIVariable VarInfo,
+                                                BasicBlock *InsertAtEnd) {
+  assert(V && "no value passed to dbg.value");
+  assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value");
+  if (!ValueFn)
+    ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+  Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
+                    ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
+                    VarInfo };
+  return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
+}
+
diff --git a/final/lib/Analysis/DbgInfoPrinter.cpp b/final/lib/Analysis/DbgInfoPrinter.cpp
new file mode 100644
index 00000000000..b23c3514d0b
--- /dev/null
+++ b/final/lib/Analysis/DbgInfoPrinter.cpp
@@ -0,0 +1,224 @@
+//===- DbgInfoPrinter.cpp - Print debug info in a human readable form ------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that prints instructions, and associated debug
+// info:
+// 
+//   - source/line/col information
+//   - original variable name
+//   - original type name
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+PrintDirectory("print-fullpath",
+               cl::desc("Print fullpath when printing debug info"),
+               cl::Hidden);
+
+namespace {
+  class PrintDbgInfo : public FunctionPass {
+    raw_ostream &Out;
+    void printVariableDeclaration(const Value *V);
+  public:
+    static char ID; // Pass identification
+    PrintDbgInfo() : FunctionPass(ID), Out(errs()) {
+      initializePrintDbgInfoPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+  char PrintDbgInfo::ID = 0;
+}
+
+INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo",
+                "Print debug info in human readable form", false, false)
+
+FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); }
+
+/// Find the debug info descriptor corresponding to this global variable.
+static Value *findDbgGlobalDeclare(GlobalVariable *V) {
+  const Module *M = V->getParent();
+  NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv");
+  if (!NMD)
+    return 0;
+
+  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+    DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
+    if (!DIG.isGlobalVariable())
+      continue;
+    if (DIGlobalVariable(DIG).getGlobal() == V)
+      return DIG;
+  }
+  return 0;
+}
+
+/// Find the debug info descriptor corresponding to this function.
+static Value *findDbgSubprogramDeclare(Function *V) {
+  const Module *M = V->getParent();
+  NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp");
+  if (!NMD)
+    return 0;
+
+  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+    DIDescriptor DIG(cast<MDNode>(NMD->getOperand(i)));
+    if (!DIG.isSubprogram())
+      continue;
+    if (DISubprogram(DIG).getFunction() == V)
+      return DIG;
+  }
+  return 0;
+}
+
+/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any.
+/// It looks through pointer casts too.
+static const DbgDeclareInst *findDbgDeclare(const Value *V) {
+  V = V->stripPointerCasts();
+
+  if (!isa<Instruction>(V) && !isa<Argument>(V))
+    return 0;
+
+  const Function *F = NULL;
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    F = I->getParent()->getParent();
+  else if (const Argument *A = dyn_cast<Argument>(V))
+    F = A->getParent();
+
+  for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI)
+    for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end();
+         BI != BE; ++BI)
+      if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+        if (DDI->getAddress() == V)
+          return DDI;
+
+  return 0;
+}
+
+static bool getLocationInfo(const Value *V, std::string &DisplayName,
+                            std::string &Type, unsigned &LineNo,
+                            std::string &File, std::string &Dir) {
+  DICompileUnit Unit;
+  DIType TypeD;
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(const_cast<Value*>(V))) {
+    Value *DIGV = findDbgGlobalDeclare(GV);
+    if (!DIGV) return false;
+    DIGlobalVariable Var(cast<MDNode>(DIGV));
+
+    StringRef D = Var.getDisplayName();
+    if (!D.empty())
+      DisplayName = D;
+    LineNo = Var.getLineNumber();
+    Unit = Var.getCompileUnit();
+    TypeD = Var.getType();
+  } else if (Function *F = dyn_cast<Function>(const_cast<Value*>(V))){
+    Value *DIF = findDbgSubprogramDeclare(F);
+    if (!DIF) return false;
+    DISubprogram Var(cast<MDNode>(DIF));
+
+    StringRef D = Var.getDisplayName();
+    if (!D.empty())
+      DisplayName = D;
+    LineNo = Var.getLineNumber();
+    Unit = Var.getCompileUnit();
+    TypeD = Var.getType();
+  } else {
+    const DbgDeclareInst *DDI = findDbgDeclare(V);
+    if (!DDI) return false;
+    DIVariable Var(cast<MDNode>(DDI->getVariable()));
+
+    StringRef D = Var.getName();
+    if (!D.empty())
+      DisplayName = D;
+    LineNo = Var.getLineNumber();
+    Unit = Var.getCompileUnit();
+    TypeD = Var.getType();
+  }
+
+  StringRef T = TypeD.getName();
+  if (!T.empty())
+    Type = T;
+  StringRef F = Unit.getFilename();
+  if (!F.empty())
+    File = F;
+  StringRef D = Unit.getDirectory();
+  if (!D.empty())
+    Dir = D;
+  return true;
+}
+
+void PrintDbgInfo::printVariableDeclaration(const Value *V) {
+  std::string DisplayName, File, Directory, Type;
+  unsigned LineNo;
+
+  if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory))
+    return;
+
+  Out << "; ";
+  WriteAsOperand(Out, V, false, 0);
+  if (isa<Function>(V)) 
+    Out << " is function " << DisplayName
+        << " of type " << Type << " declared at ";
+  else
+    Out << " is variable " << DisplayName
+        << " of type " << Type << " declared at ";
+
+  if (PrintDirectory)
+    Out << Directory << "/";
+
+  Out << File << ":" << LineNo << "\n";
+}
+
+bool PrintDbgInfo::runOnFunction(Function &F) {
+  if (F.isDeclaration())
+    return false;
+
+  Out << "function " << F.getName() << "\n\n";
+
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    BasicBlock *BB = I;
+
+    if (I != F.begin() && (pred_begin(BB) == pred_end(BB)))
+      // Skip dead blocks.
+      continue;
+
+    Out << BB->getName();
+    Out << ":";
+
+    Out << "\n";
+
+    for (BasicBlock::const_iterator i = BB->begin(), e = BB->end();
+         i != e; ++i) {
+
+        printVariableDeclaration(i);
+
+        if (const User *U = dyn_cast<User>(i)) {
+          for(unsigned i=0;i<U->getNumOperands();i++)
+            printVariableDeclaration(U->getOperand(i));
+        }
+    }
+  }
+  return false;
+}
diff --git a/final/lib/Analysis/DebugInfo.cpp b/final/lib/Analysis/DebugInfo.cpp
new file mode 100644
index 00000000000..67f8147f4d6
--- /dev/null
+++ b/final/lib/Analysis/DebugInfo.cpp
@@ -0,0 +1,948 @@
+//===--- DebugInfo.cpp - Debug Information Helper Classes -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the helper classes used to build and interpret debug
+// information in LLVM IR form.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+//===----------------------------------------------------------------------===//
+// DIDescriptor
+//===----------------------------------------------------------------------===//
+
+DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) {
+}
+
+DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) {
+}
+
+StringRef
+DIDescriptor::getStringField(unsigned Elt) const {
+  if (DbgNode == 0)
+    return StringRef();
+
+  if (Elt < DbgNode->getNumOperands())
+    if (MDString *MDS = dyn_cast_or_null<MDString>(DbgNode->getOperand(Elt)))
+      return MDS->getString();
+
+  return StringRef();
+}
+
+uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
+  if (DbgNode == 0)
+    return 0;
+
+  if (Elt < DbgNode->getNumOperands())
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(DbgNode->getOperand(Elt)))
+      return CI->getZExtValue();
+
+  return 0;
+}
+
+DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
+  if (DbgNode == 0)
+    return DIDescriptor();
+
+  if (Elt < DbgNode->getNumOperands())
+    return
+      DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt)));
+  return DIDescriptor();
+}
+
+GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
+  if (DbgNode == 0)
+    return 0;
+
+  if (Elt < DbgNode->getNumOperands())
+      return dyn_cast_or_null<GlobalVariable>(DbgNode->getOperand(Elt));
+  return 0;
+}
+
+Constant *DIDescriptor::getConstantField(unsigned Elt) const {
+  if (DbgNode == 0)
+    return 0;
+
+  if (Elt < DbgNode->getNumOperands())
+      return dyn_cast_or_null<Constant>(DbgNode->getOperand(Elt));
+  return 0;
+}
+
+Function *DIDescriptor::getFunctionField(unsigned Elt) const {
+  if (DbgNode == 0)
+    return 0;
+
+  if (Elt < DbgNode->getNumOperands())
+      return dyn_cast_or_null<Function>(DbgNode->getOperand(Elt));
+  return 0;
+}
+
+unsigned DIVariable::getNumAddrElements() const {
+  if (getVersion() <= llvm::LLVMDebugVersion8)
+    return DbgNode->getNumOperands()-6;
+  return DbgNode->getNumOperands()-7;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Predicates
+//===----------------------------------------------------------------------===//
+
+/// isBasicType - Return true if the specified tag is legal for
+/// DIBasicType.
+bool DIDescriptor::isBasicType() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_base_type;
+}
+
+/// isDerivedType - Return true if the specified tag is legal for DIDerivedType.
+bool DIDescriptor::isDerivedType() const {
+  if (!DbgNode) return false;
+  switch (getTag()) {
+  case dwarf::DW_TAG_typedef:
+  case dwarf::DW_TAG_pointer_type:
+  case dwarf::DW_TAG_reference_type:
+  case dwarf::DW_TAG_const_type:
+  case dwarf::DW_TAG_volatile_type:
+  case dwarf::DW_TAG_restrict_type:
+  case dwarf::DW_TAG_member:
+  case dwarf::DW_TAG_inheritance:
+  case dwarf::DW_TAG_friend:
+    return true;
+  default:
+    // CompositeTypes are currently modelled as DerivedTypes.
+    return isCompositeType();
+  }
+}
+
+/// isCompositeType - Return true if the specified tag is legal for
+/// DICompositeType.
+bool DIDescriptor::isCompositeType() const {
+  if (!DbgNode) return false;
+  switch (getTag()) {
+  case dwarf::DW_TAG_array_type:
+  case dwarf::DW_TAG_structure_type:
+  case dwarf::DW_TAG_union_type:
+  case dwarf::DW_TAG_enumeration_type:
+  case dwarf::DW_TAG_vector_type:
+  case dwarf::DW_TAG_subroutine_type:
+  case dwarf::DW_TAG_class_type:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// isVariable - Return true if the specified tag is legal for DIVariable.
+bool DIDescriptor::isVariable() const {
+  if (!DbgNode) return false;
+  switch (getTag()) {
+  case dwarf::DW_TAG_auto_variable:
+  case dwarf::DW_TAG_arg_variable:
+  case dwarf::DW_TAG_return_variable:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// isType - Return true if the specified tag is legal for DIType.
+bool DIDescriptor::isType() const {
+  return isBasicType() || isCompositeType() || isDerivedType();
+}
+
+/// isSubprogram - Return true if the specified tag is legal for
+/// DISubprogram.
+bool DIDescriptor::isSubprogram() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_subprogram;
+}
+
+/// isGlobalVariable - Return true if the specified tag is legal for
+/// DIGlobalVariable.
+bool DIDescriptor::isGlobalVariable() const {
+  return DbgNode && (getTag() == dwarf::DW_TAG_variable ||
+                     getTag() == dwarf::DW_TAG_constant);
+}
+
+/// isGlobal - Return true if the specified tag is legal for DIGlobal.
+bool DIDescriptor::isGlobal() const {
+  return isGlobalVariable();
+}
+
+/// isUnspecifiedParmeter - Return true if the specified tag is
+/// DW_TAG_unspecified_parameters.
+bool DIDescriptor::isUnspecifiedParameter() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_unspecified_parameters;
+}
+
+/// isScope - Return true if the specified tag is one of the scope
+/// related tag.
+bool DIDescriptor::isScope() const {
+  if (!DbgNode) return false;
+  switch (getTag()) {
+  case dwarf::DW_TAG_compile_unit:
+  case dwarf::DW_TAG_lexical_block:
+  case dwarf::DW_TAG_subprogram:
+  case dwarf::DW_TAG_namespace:
+    return true;
+  default:
+    break;
+  }
+  return false;
+}
+
+/// isTemplateTypeParameter - Return true if the specified tag is
+/// DW_TAG_template_type_parameter.
+bool DIDescriptor::isTemplateTypeParameter() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_template_type_parameter;
+}
+
+/// isTemplateValueParameter - Return true if the specified tag is
+/// DW_TAG_template_value_parameter.
+bool DIDescriptor::isTemplateValueParameter() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_template_value_parameter;
+}
+
+/// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit.
+bool DIDescriptor::isCompileUnit() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_compile_unit;
+}
+
+/// isFile - Return true if the specified tag is DW_TAG_file_type.
+bool DIDescriptor::isFile() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_file_type;
+}
+
+/// isNameSpace - Return true if the specified tag is DW_TAG_namespace.
+bool DIDescriptor::isNameSpace() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_namespace;
+}
+
+/// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block.
+bool DIDescriptor::isLexicalBlock() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_lexical_block;
+}
+
+/// isSubrange - Return true if the specified tag is DW_TAG_subrange_type.
+bool DIDescriptor::isSubrange() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_subrange_type;
+}
+
+/// isEnumerator - Return true if the specified tag is DW_TAG_enumerator.
+bool DIDescriptor::isEnumerator() const {
+  return DbgNode && getTag() == dwarf::DW_TAG_enumerator;
+}
+
+//===----------------------------------------------------------------------===//
+// Simple Descriptor Constructors and other Methods
+//===----------------------------------------------------------------------===//
+
+DIType::DIType(const MDNode *N) : DIScope(N) {
+  if (!N) return;
+  if (!isBasicType() && !isDerivedType() && !isCompositeType()) {
+    DbgNode = 0;
+  }
+}
+
+unsigned DIArray::getNumElements() const {
+  if (!DbgNode)
+    return 0;
+  return DbgNode->getNumOperands();
+}
+
+/// replaceAllUsesWith - Replace all uses of debug info referenced by
+/// this descriptor.
+void DIType::replaceAllUsesWith(DIDescriptor &D) {
+  if (!DbgNode)
+    return;
+
+  // Since we use a TrackingVH for the node, its easy for clients to manufacture
+  // legitimate situations where they want to replaceAllUsesWith() on something
+  // which, due to uniquing, has merged with the source. We shield clients from
+  // this detail by allowing a value to be replaced with replaceAllUsesWith()
+  // itself.
+  if (DbgNode != D) {
+    MDNode *Node = const_cast<MDNode*>(DbgNode);
+    const MDNode *DN = D;
+    const Value *V = cast_or_null<Value>(DN);
+    Node->replaceAllUsesWith(const_cast<Value*>(V));
+    MDNode::deleteTemporary(Node);
+  }
+}
+
+/// replaceAllUsesWith - Replace all uses of debug info referenced by
+/// this descriptor.
+void DIType::replaceAllUsesWith(MDNode *D) {
+  if (!DbgNode)
+    return;
+
+  // Since we use a TrackingVH for the node, its easy for clients to manufacture
+  // legitimate situations where they want to replaceAllUsesWith() on something
+  // which, due to uniquing, has merged with the source. We shield clients from
+  // this detail by allowing a value to be replaced with replaceAllUsesWith()
+  // itself.
+  if (DbgNode != D) {
+    MDNode *Node = const_cast<MDNode*>(DbgNode);
+    const MDNode *DN = D;
+    const Value *V = cast_or_null<Value>(DN);
+    Node->replaceAllUsesWith(const_cast<Value*>(V));
+    MDNode::deleteTemporary(Node);
+  }
+}
+
+/// Verify - Verify that a compile unit is well formed.
+bool DICompileUnit::Verify() const {
+  if (!DbgNode)
+    return false;
+  StringRef N = getFilename();
+  if (N.empty())
+    return false;
+  // It is possible that directory and produce string is empty.
+  return true;
+}
+
+/// Verify - Verify that a type descriptor is well formed.
+bool DIType::Verify() const {
+  if (!DbgNode)
+    return false;
+  if (!getContext().Verify())
+    return false;
+  unsigned Tag = getTag();
+  if (!isBasicType() && Tag != dwarf::DW_TAG_const_type &&
+      Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type &&
+      Tag != dwarf::DW_TAG_reference_type && Tag != dwarf::DW_TAG_restrict_type 
+      && Tag != dwarf::DW_TAG_vector_type && Tag != dwarf::DW_TAG_array_type
+      && Tag != dwarf::DW_TAG_enumeration_type 
+      && getFilename().empty())
+    return false;
+  return true;
+}
+
+/// Verify - Verify that a basic type descriptor is well formed.
+bool DIBasicType::Verify() const {
+  return isBasicType();
+}
+
+/// Verify - Verify that a derived type descriptor is well formed.
+bool DIDerivedType::Verify() const {
+  return isDerivedType();
+}
+
+/// Verify - Verify that a composite type descriptor is well formed.
+bool DICompositeType::Verify() const {
+  if (!DbgNode)
+    return false;
+  if (!getContext().Verify())
+    return false;
+
+  DICompileUnit CU = getCompileUnit();
+  if (!CU.Verify())
+    return false;
+  return true;
+}
+
+/// Verify - Verify that a subprogram descriptor is well formed.
+bool DISubprogram::Verify() const {
+  if (!DbgNode)
+    return false;
+
+  if (!getContext().Verify())
+    return false;
+
+  DICompileUnit CU = getCompileUnit();
+  if (!CU.Verify())
+    return false;
+
+  DICompositeType Ty = getType();
+  if (!Ty.Verify())
+    return false;
+  return true;
+}
+
+/// Verify - Verify that a global variable descriptor is well formed.
+bool DIGlobalVariable::Verify() const {
+  if (!DbgNode)
+    return false;
+
+  if (getDisplayName().empty())
+    return false;
+
+  if (!getContext().Verify())
+    return false;
+
+  DICompileUnit CU = getCompileUnit();
+  if (!CU.Verify())
+    return false;
+
+  DIType Ty = getType();
+  if (!Ty.Verify())
+    return false;
+
+  if (!getGlobal() && !getConstant())
+    return false;
+
+  return true;
+}
+
+/// Verify - Verify that a variable descriptor is well formed.
+bool DIVariable::Verify() const {
+  if (!DbgNode)
+    return false;
+
+  if (!getContext().Verify())
+    return false;
+
+  if (!getCompileUnit().Verify())
+    return false;
+
+  DIType Ty = getType();
+  if (!Ty.Verify())
+    return false;
+
+  return true;
+}
+
+/// Verify - Verify that a location descriptor is well formed.
+bool DILocation::Verify() const {
+  if (!DbgNode)
+    return false;
+
+  return DbgNode->getNumOperands() == 4;
+}
+
+/// Verify - Verify that a namespace descriptor is well formed.
+bool DINameSpace::Verify() const {
+  if (!DbgNode)
+    return false;
+  if (getName().empty())
+    return false;
+  if (!getCompileUnit().Verify())
+    return false;
+  return true;
+}
+
+/// getOriginalTypeSize - If this type is derived from a base type then
+/// return base type size.
+uint64_t DIDerivedType::getOriginalTypeSize() const {
+  unsigned Tag = getTag();
+  if (Tag == dwarf::DW_TAG_member || Tag == dwarf::DW_TAG_typedef ||
+      Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
+      Tag == dwarf::DW_TAG_restrict_type) {
+    DIType BaseType = getTypeDerivedFrom();
+    // If this type is not derived from any type then take conservative
+    // approach.
+    if (!BaseType.isValid())
+      return getSizeInBits();
+    if (BaseType.isDerivedType())
+      return DIDerivedType(BaseType).getOriginalTypeSize();
+    else
+      return BaseType.getSizeInBits();
+  }
+
+  return getSizeInBits();
+}
+
+/// isInlinedFnArgument - Return true if this variable provides debugging
+/// information for an inlined function arguments.
+bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
+  assert(CurFn && "Invalid function");
+  if (!getContext().isSubprogram())
+    return false;
+  // This variable is not inlined function argument if its scope
+  // does not describe current function.
+  return !(DISubprogram(getContext()).describes(CurFn));
+}
+
+/// describes - Return true if this subprogram provides debugging
+/// information for the function F.
+bool DISubprogram::describes(const Function *F) {
+  assert(F && "Invalid function");
+  if (F == getFunction())
+    return true;
+  StringRef Name = getLinkageName();
+  if (Name.empty())
+    Name = getName();
+  if (F->getName() == Name)
+    return true;
+  return false;
+}
+
+unsigned DISubprogram::isOptimized() const {
+  assert (DbgNode && "Invalid subprogram descriptor!");
+  if (DbgNode->getNumOperands() == 16)
+    return getUnsignedField(15);
+  return 0;
+}
+
+StringRef DIScope::getFilename() const {
+  if (!DbgNode)
+    return StringRef();
+  if (isLexicalBlock())
+    return DILexicalBlock(DbgNode).getFilename();
+  if (isSubprogram())
+    return DISubprogram(DbgNode).getFilename();
+  if (isCompileUnit())
+    return DICompileUnit(DbgNode).getFilename();
+  if (isNameSpace())
+    return DINameSpace(DbgNode).getFilename();
+  if (isType())
+    return DIType(DbgNode).getFilename();
+  if (isFile())
+    return DIFile(DbgNode).getFilename();
+  assert(0 && "Invalid DIScope!");
+  return StringRef();
+}
+
+StringRef DIScope::getDirectory() const {
+  if (!DbgNode)
+    return StringRef();
+  if (isLexicalBlock())
+    return DILexicalBlock(DbgNode).getDirectory();
+  if (isSubprogram())
+    return DISubprogram(DbgNode).getDirectory();
+  if (isCompileUnit())
+    return DICompileUnit(DbgNode).getDirectory();
+  if (isNameSpace())
+    return DINameSpace(DbgNode).getDirectory();
+  if (isType())
+    return DIType(DbgNode).getDirectory();
+  if (isFile())
+    return DIFile(DbgNode).getDirectory();
+  assert(0 && "Invalid DIScope!");
+  return StringRef();
+}
+
+//===----------------------------------------------------------------------===//
+// DIDescriptor: dump routines for all descriptors.
+//===----------------------------------------------------------------------===//
+
+
+/// print - Print descriptor.
+void DIDescriptor::print(raw_ostream &OS) const {
+  OS << "[" << dwarf::TagString(getTag()) << "] ";
+  OS.write_hex((intptr_t) &*DbgNode) << ']';
+}
+
+/// print - Print compile unit.
+void DICompileUnit::print(raw_ostream &OS) const {
+  if (getLanguage())
+    OS << " [" << dwarf::LanguageString(getLanguage()) << "] ";
+
+  OS << " [" << getDirectory() << "/" << getFilename() << "]";
+}
+
+/// print - Print type.
+void DIType::print(raw_ostream &OS) const {
+  if (!DbgNode) return;
+
+  StringRef Res = getName();
+  if (!Res.empty())
+    OS << " [" << Res << "] ";
+
+  unsigned Tag = getTag();
+  OS << " [" << dwarf::TagString(Tag) << "] ";
+
+  // TODO : Print context
+  getCompileUnit().print(OS);
+  OS << " ["
+         << "line " << getLineNumber() << ", "
+         << getSizeInBits() << " bits, "
+         << getAlignInBits() << " bit alignment, "
+         << getOffsetInBits() << " bit offset"
+         << "] ";
+
+  if (isPrivate())
+    OS << " [private] ";
+  else if (isProtected())
+    OS << " [protected] ";
+
+  if (isForwardDecl())
+    OS << " [fwd] ";
+
+  if (isBasicType())
+    DIBasicType(DbgNode).print(OS);
+  else if (isDerivedType())
+    DIDerivedType(DbgNode).print(OS);
+  else if (isCompositeType())
+    DICompositeType(DbgNode).print(OS);
+  else {
+    OS << "Invalid DIType\n";
+    return;
+  }
+
+  OS << "\n";
+}
+
+/// print - Print basic type.
+void DIBasicType::print(raw_ostream &OS) const {
+  OS << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] ";
+}
+
+/// print - Print derived type.
+void DIDerivedType::print(raw_ostream &OS) const {
+  OS << "\n\t Derived From: "; getTypeDerivedFrom().print(OS);
+}
+
+/// print - Print composite type.
+void DICompositeType::print(raw_ostream &OS) const {
+  DIArray A = getTypeArray();
+  OS << " [" << A.getNumElements() << " elements]";
+}
+
+/// print - Print subprogram.
+void DISubprogram::print(raw_ostream &OS) const {
+  StringRef Res = getName();
+  if (!Res.empty())
+    OS << " [" << Res << "] ";
+
+  unsigned Tag = getTag();
+  OS << " [" << dwarf::TagString(Tag) << "] ";
+
+  // TODO : Print context
+  getCompileUnit().print(OS);
+  OS << " [" << getLineNumber() << "] ";
+
+  if (isLocalToUnit())
+    OS << " [local] ";
+
+  if (isDefinition())
+    OS << " [def] ";
+
+  OS << "\n";
+}
+
+/// print - Print global variable.
+void DIGlobalVariable::print(raw_ostream &OS) const {
+  OS << " [";
+  StringRef Res = getName();
+  if (!Res.empty())
+    OS << " [" << Res << "] ";
+
+  unsigned Tag = getTag();
+  OS << " [" << dwarf::TagString(Tag) << "] ";
+
+  // TODO : Print context
+  getCompileUnit().print(OS);
+  OS << " [" << getLineNumber() << "] ";
+
+  if (isLocalToUnit())
+    OS << " [local] ";
+
+  if (isDefinition())
+    OS << " [def] ";
+
+  if (isGlobalVariable())
+    DIGlobalVariable(DbgNode).print(OS);
+  OS << "]\n";
+}
+
+/// print - Print variable.
+void DIVariable::print(raw_ostream &OS) const {
+  StringRef Res = getName();
+  if (!Res.empty())
+    OS << " [" << Res << "] ";
+
+  getCompileUnit().print(OS);
+  OS << " [" << getLineNumber() << "] ";
+  getType().print(OS);
+  OS << "\n";
+
+  // FIXME: Dump complex addresses
+}
+
+/// dump - Print descriptor to dbgs() with a newline.
+void DIDescriptor::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print compile unit to dbgs() with a newline.
+void DICompileUnit::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print type to dbgs() with a newline.
+void DIType::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print basic type to dbgs() with a newline.
+void DIBasicType::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print derived type to dbgs() with a newline.
+void DIDerivedType::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print composite type to dbgs() with a newline.
+void DICompositeType::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print subprogram to dbgs() with a newline.
+void DISubprogram::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print global variable.
+void DIGlobalVariable::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print variable.
+void DIVariable::dump() const {
+  print(dbgs()); dbgs() << '\n';
+}
+
+/// fixupObjcLikeName - Replace contains special characters used
+/// in a typical Objective-C names with '.' in a given string.
+static void fixupObjcLikeName(std::string &Str) {
+  for (size_t i = 0, e = Str.size(); i < e; ++i) {
+    char C = Str[i];
+    if (C == '[' || C == ']' || C == ' ' || C == ':' || C == '+' ||
+        C == '(' || C == ')')
+      Str[i] = '.';
+  }
+}
+
+/// getFnSpecificMDNode - Return a NameMDNode, if available, that is 
+/// suitable to hold function specific information.
+NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) {
+  if (FuncName.find('[') == StringRef::npos)
+    return M.getNamedMetadata(Twine("llvm.dbg.lv.", FuncName));
+  std::string Name = FuncName;
+  fixupObjcLikeName(Name);
+  return M.getNamedMetadata(Twine("llvm.dbg.lv.", Name));
+}
+
+/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
+/// to hold function specific information.
+NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) {
+  SmallString<32> Out;
+  if (FuncName.find('[') == StringRef::npos)
+    return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FuncName)
+                                      .toStringRef(Out));
+  
+  std::string Name = FuncName;
+  fixupObjcLikeName(Name);
+  return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", Name)
+                                    .toStringRef(Out));
+}
+
+
+//===----------------------------------------------------------------------===//
+// DebugInfoFinder implementations.
+//===----------------------------------------------------------------------===//
+
+/// processModule - Process entire module and collect debug info.
+void DebugInfoFinder::processModule(Module &M) {
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
+      for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE;
+           ++BI) {
+        if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+          processDeclare(DDI);
+
+        DebugLoc Loc = BI->getDebugLoc();
+        if (Loc.isUnknown())
+          continue;
+
+        LLVMContext &Ctx = BI->getContext();
+        DIDescriptor Scope(Loc.getScope(Ctx));
+
+        if (Scope.isCompileUnit())
+          addCompileUnit(DICompileUnit(Scope));
+        else if (Scope.isSubprogram())
+          processSubprogram(DISubprogram(Scope));
+        else if (Scope.isLexicalBlock())
+          processLexicalBlock(DILexicalBlock(Scope));
+
+        if (MDNode *IA = Loc.getInlinedAt(Ctx))
+          processLocation(DILocation(IA));
+      }
+
+  if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+      DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i)));
+      if (addGlobalVariable(DIG)) {
+        addCompileUnit(DIG.getCompileUnit());
+        processType(DIG.getType());
+      }
+    }
+  }
+
+  if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp"))
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      processSubprogram(DISubprogram(NMD->getOperand(i)));
+}
+
+/// processLocation - Process DILocation.
+void DebugInfoFinder::processLocation(DILocation Loc) {
+  if (!Loc.Verify()) return;
+  DIDescriptor S(Loc.getScope());
+  if (S.isCompileUnit())
+    addCompileUnit(DICompileUnit(S));
+  else if (S.isSubprogram())
+    processSubprogram(DISubprogram(S));
+  else if (S.isLexicalBlock())
+    processLexicalBlock(DILexicalBlock(S));
+  processLocation(Loc.getOrigLocation());
+}
+
+/// processType - Process DIType.
+void DebugInfoFinder::processType(DIType DT) {
+  if (!addType(DT))
+    return;
+
+  addCompileUnit(DT.getCompileUnit());
+  if (DT.isCompositeType()) {
+    DICompositeType DCT(DT);
+    processType(DCT.getTypeDerivedFrom());
+    DIArray DA = DCT.getTypeArray();
+    for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) {
+      DIDescriptor D = DA.getElement(i);
+      if (D.isType())
+        processType(DIType(D));
+      else if (D.isSubprogram())
+        processSubprogram(DISubprogram(D));
+    }
+  } else if (DT.isDerivedType()) {
+    DIDerivedType DDT(DT);
+    processType(DDT.getTypeDerivedFrom());
+  }
+}
+
+/// processLexicalBlock
+void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
+  DIScope Context = LB.getContext();
+  if (Context.isLexicalBlock())
+    return processLexicalBlock(DILexicalBlock(Context));
+  else
+    return processSubprogram(DISubprogram(Context));
+}
+
+/// processSubprogram - Process DISubprogram.
+void DebugInfoFinder::processSubprogram(DISubprogram SP) {
+  if (!addSubprogram(SP))
+    return;
+  addCompileUnit(SP.getCompileUnit());
+  processType(SP.getType());
+}
+
+/// processDeclare - Process DbgDeclareInst.
+void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) {
+  MDNode *N = dyn_cast<MDNode>(DDI->getVariable());
+  if (!N) return;
+
+  DIDescriptor DV(N);
+  if (!DV.isVariable())
+    return;
+
+  if (!NodesSeen.insert(DV))
+    return;
+
+  addCompileUnit(DIVariable(N).getCompileUnit());
+  processType(DIVariable(N).getType());
+}
+
+/// addType - Add type into Tys.
+bool DebugInfoFinder::addType(DIType DT) {
+  if (!DT.isValid())
+    return false;
+
+  if (!NodesSeen.insert(DT))
+    return false;
+
+  TYs.push_back(DT);
+  return true;
+}
+
+/// addCompileUnit - Add compile unit into CUs.
+bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) {
+  if (!CU.Verify())
+    return false;
+
+  if (!NodesSeen.insert(CU))
+    return false;
+
+  CUs.push_back(CU);
+  return true;
+}
+
+/// addGlobalVariable - Add global variable into GVs.
+bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) {
+  if (!DIDescriptor(DIG).isGlobalVariable())
+    return false;
+
+  if (!NodesSeen.insert(DIG))
+    return false;
+
+  GVs.push_back(DIG);
+  return true;
+}
+
+// addSubprogram - Add subprgoram into SPs.
+bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
+  if (!DIDescriptor(SP).isSubprogram())
+    return false;
+
+  if (!NodesSeen.insert(SP))
+    return false;
+
+  SPs.push_back(SP);
+  return true;
+}
+
+/// getDISubprogram - Find subprogram that is enclosing this scope.
+DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
+  DIDescriptor D(Scope);
+  if (D.isSubprogram())
+    return DISubprogram(Scope);
+
+  if (D.isLexicalBlock())
+    return getDISubprogram(DILexicalBlock(Scope).getContext());
+
+  return DISubprogram();
+}
+
+/// getDICompositeType - Find underlying composite type.
+DICompositeType llvm::getDICompositeType(DIType T) {
+  if (T.isCompositeType())
+    return DICompositeType(T);
+
+  if (T.isDerivedType())
+    return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom());
+
+  return DICompositeType();
+}
diff --git a/final/lib/Analysis/DomPrinter.cpp b/final/lib/Analysis/DomPrinter.cpp
new file mode 100644
index 00000000000..cde431459d5
--- /dev/null
+++ b/final/lib/Analysis/DomPrinter.cpp
@@ -0,0 +1,232 @@
+//===- DomPrinter.cpp - DOT printer for the dominance trees    ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines '-dot-dom' and '-dot-postdom' analysis passes, which emit
+// a dom.<fnname>.dot or postdom.<fnname>.dot file for each function in the
+// program, with a graph of the dominance/postdominance tree of that
+// function.
+//
+// There are also passes available to directly call dotty ('-view-dom' or
+// '-view-postdom'). By appending '-only' like '-dot-dom-only' only the
+// names of the bbs are printed, but the content is hidden.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DomPrinter.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/Analysis/PostDominators.h"
+
+using namespace llvm;
+
+namespace llvm {
+template<>
+struct DOTGraphTraits<DomTreeNode*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false)
+    : DefaultDOTGraphTraits(isSimple) {}
+
+  std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph) {
+
+    BasicBlock *BB = Node->getBlock();
+
+    if (!BB)
+      return "Post dominance root node";
+
+
+    if (isSimple())
+      return DOTGraphTraits<const Function*>
+        ::getSimpleNodeLabel(BB, BB->getParent());
+    else
+      return DOTGraphTraits<const Function*>
+        ::getCompleteNodeLabel(BB, BB->getParent());
+  }
+};
+
+template<>
+struct DOTGraphTraits<DominatorTree*> : public DOTGraphTraits<DomTreeNode*> {
+
+  DOTGraphTraits (bool isSimple=false)
+    : DOTGraphTraits<DomTreeNode*>(isSimple) {}
+
+  static std::string getGraphName(DominatorTree *DT) {
+    return "Dominator tree";
+  }
+
+  std::string getNodeLabel(DomTreeNode *Node, DominatorTree *G) {
+    return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
+  }
+};
+
+template<>
+struct DOTGraphTraits<PostDominatorTree*>
+  : public DOTGraphTraits<DomTreeNode*> {
+
+  DOTGraphTraits (bool isSimple=false)
+    : DOTGraphTraits<DomTreeNode*>(isSimple) {}
+
+  static std::string getGraphName(PostDominatorTree *DT) {
+    return "Post dominator tree";
+  }
+
+  std::string getNodeLabel(DomTreeNode *Node, PostDominatorTree *G ) {
+    return DOTGraphTraits<DomTreeNode*>::getNodeLabel(Node, G->getRootNode());
+  }
+};
+}
+
+namespace {
+struct DomViewer
+  : public DOTGraphTraitsViewer<DominatorTree, false> {
+  static char ID;
+  DomViewer() : DOTGraphTraitsViewer<DominatorTree, false>("dom", ID){
+    initializeDomViewerPass(*PassRegistry::getPassRegistry());
+  }
+};
+
+struct DomOnlyViewer
+  : public DOTGraphTraitsViewer<DominatorTree, true> {
+  static char ID;
+  DomOnlyViewer() : DOTGraphTraitsViewer<DominatorTree, true>("domonly", ID){
+    initializeDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+  }
+};
+
+struct PostDomViewer
+  : public DOTGraphTraitsViewer<PostDominatorTree, false> {
+  static char ID;
+  PostDomViewer() :
+    DOTGraphTraitsViewer<PostDominatorTree, false>("postdom", ID){
+      initializePostDomViewerPass(*PassRegistry::getPassRegistry());
+    }
+};
+
+struct PostDomOnlyViewer
+  : public DOTGraphTraitsViewer<PostDominatorTree, true> {
+  static char ID;
+  PostDomOnlyViewer() :
+    DOTGraphTraitsViewer<PostDominatorTree, true>("postdomonly", ID){
+      initializePostDomOnlyViewerPass(*PassRegistry::getPassRegistry());
+    }
+};
+} // end anonymous namespace
+
+char DomViewer::ID = 0;
+INITIALIZE_PASS(DomViewer, "view-dom",
+                "View dominance tree of function", false, false)
+
+char DomOnlyViewer::ID = 0;
+INITIALIZE_PASS(DomOnlyViewer, "view-dom-only",
+                "View dominance tree of function (with no function bodies)",
+                false, false)
+
+char PostDomViewer::ID = 0;
+INITIALIZE_PASS(PostDomViewer, "view-postdom",
+                "View postdominance tree of function", false, false)
+
+char PostDomOnlyViewer::ID = 0;
+INITIALIZE_PASS(PostDomOnlyViewer, "view-postdom-only",
+                "View postdominance tree of function "
+                "(with no function bodies)",
+                false, false)
+
+namespace {
+struct DomPrinter
+  : public DOTGraphTraitsPrinter<DominatorTree, false> {
+  static char ID;
+  DomPrinter() : DOTGraphTraitsPrinter<DominatorTree, false>("dom", ID) {
+    initializeDomPrinterPass(*PassRegistry::getPassRegistry());
+  }
+};
+
+struct DomOnlyPrinter
+  : public DOTGraphTraitsPrinter<DominatorTree, true> {
+  static char ID;
+  DomOnlyPrinter() : DOTGraphTraitsPrinter<DominatorTree, true>("domonly", ID) {
+    initializeDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+  }
+};
+
+struct PostDomPrinter
+  : public DOTGraphTraitsPrinter<PostDominatorTree, false> {
+  static char ID;
+  PostDomPrinter() :
+    DOTGraphTraitsPrinter<PostDominatorTree, false>("postdom", ID) {
+      initializePostDomPrinterPass(*PassRegistry::getPassRegistry());
+    }
+};
+
+struct PostDomOnlyPrinter
+  : public DOTGraphTraitsPrinter<PostDominatorTree, true> {
+  static char ID;
+  PostDomOnlyPrinter() :
+    DOTGraphTraitsPrinter<PostDominatorTree, true>("postdomonly", ID) {
+      initializePostDomOnlyPrinterPass(*PassRegistry::getPassRegistry());
+    }
+};
+} // end anonymous namespace
+
+
+
+char DomPrinter::ID = 0;
+INITIALIZE_PASS(DomPrinter, "dot-dom",
+                "Print dominance tree of function to 'dot' file",
+                false, false)
+
+char DomOnlyPrinter::ID = 0;
+INITIALIZE_PASS(DomOnlyPrinter, "dot-dom-only",
+                "Print dominance tree of function to 'dot' file "
+                "(with no function bodies)",
+                false, false)
+
+char PostDomPrinter::ID = 0;
+INITIALIZE_PASS(PostDomPrinter, "dot-postdom",
+                "Print postdominance tree of function to 'dot' file",
+                false, false)
+
+char PostDomOnlyPrinter::ID = 0;
+INITIALIZE_PASS(PostDomOnlyPrinter, "dot-postdom-only",
+                "Print postdominance tree of function to 'dot' file "
+                "(with no function bodies)",
+                false, false)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+FunctionPass *llvm::createDomPrinterPass() {
+  return new DomPrinter();
+}
+
+FunctionPass *llvm::createDomOnlyPrinterPass() {
+  return new DomOnlyPrinter();
+}
+
+FunctionPass *llvm::createDomViewerPass() {
+  return new DomViewer();
+}
+
+FunctionPass *llvm::createDomOnlyViewerPass() {
+  return new DomOnlyViewer();
+}
+
+FunctionPass *llvm::createPostDomPrinterPass() {
+  return new PostDomPrinter();
+}
+
+FunctionPass *llvm::createPostDomOnlyPrinterPass() {
+  return new PostDomOnlyPrinter();
+}
+
+FunctionPass *llvm::createPostDomViewerPass() {
+  return new PostDomViewer();
+}
+
+FunctionPass *llvm::createPostDomOnlyViewerPass() {
+  return new PostDomOnlyViewer();
+}
diff --git a/final/lib/Analysis/DominanceFrontier.cpp b/final/lib/Analysis/DominanceFrontier.cpp
new file mode 100644
index 00000000000..6de4e1e1d7d
--- /dev/null
+++ b/final/lib/Analysis/DominanceFrontier.cpp
@@ -0,0 +1,137 @@
+//===- DominanceFrontier.cpp - Dominance Frontier Calculation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+char DominanceFrontier::ID = 0;
+INITIALIZE_PASS_BEGIN(DominanceFrontier, "domfrontier",
+                "Dominance Frontier Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(DominanceFrontier, "domfrontier",
+                "Dominance Frontier Construction", true, true)
+
+namespace {
+  class DFCalculateWorkObject {
+  public:
+    DFCalculateWorkObject(BasicBlock *B, BasicBlock *P, 
+                          const DomTreeNode *N,
+                          const DomTreeNode *PN)
+    : currentBB(B), parentBB(P), Node(N), parentNode(PN) {}
+    BasicBlock *currentBB;
+    BasicBlock *parentBB;
+    const DomTreeNode *Node;
+    const DomTreeNode *parentNode;
+  };
+}
+
+const DominanceFrontier::DomSetType &
+DominanceFrontier::calculate(const DominatorTree &DT,
+                             const DomTreeNode *Node) {
+  BasicBlock *BB = Node->getBlock();
+  DomSetType *Result = NULL;
+
+  std::vector<DFCalculateWorkObject> workList;
+  SmallPtrSet<BasicBlock *, 32> visited;
+
+  workList.push_back(DFCalculateWorkObject(BB, NULL, Node, NULL));
+  do {
+    DFCalculateWorkObject *currentW = &workList.back();
+    assert (currentW && "Missing work object.");
+
+    BasicBlock *currentBB = currentW->currentBB;
+    BasicBlock *parentBB = currentW->parentBB;
+    const DomTreeNode *currentNode = currentW->Node;
+    const DomTreeNode *parentNode = currentW->parentNode;
+    assert (currentBB && "Invalid work object. Missing current Basic Block");
+    assert (currentNode && "Invalid work object. Missing current Node");
+    DomSetType &S = Frontiers[currentBB];
+
+    // Visit each block only once.
+    if (visited.count(currentBB) == 0) {
+      visited.insert(currentBB);
+
+      // Loop over CFG successors to calculate DFlocal[currentNode]
+      for (succ_iterator SI = succ_begin(currentBB), SE = succ_end(currentBB);
+           SI != SE; ++SI) {
+        // Does Node immediately dominate this successor?
+        if (DT[*SI]->getIDom() != currentNode)
+          S.insert(*SI);
+      }
+    }
+
+    // At this point, S is DFlocal.  Now we union in DFup's of our children...
+    // Loop through and visit the nodes that Node immediately dominates (Node's
+    // children in the IDomTree)
+    bool visitChild = false;
+    for (DomTreeNode::const_iterator NI = currentNode->begin(), 
+           NE = currentNode->end(); NI != NE; ++NI) {
+      DomTreeNode *IDominee = *NI;
+      BasicBlock *childBB = IDominee->getBlock();
+      if (visited.count(childBB) == 0) {
+        workList.push_back(DFCalculateWorkObject(childBB, currentBB,
+                                                 IDominee, currentNode));
+        visitChild = true;
+      }
+    }
+
+    // If all children are visited or there is any child then pop this block
+    // from the workList.
+    if (!visitChild) {
+
+      if (!parentBB) {
+        Result = &S;
+        break;
+      }
+
+      DomSetType::const_iterator CDFI = S.begin(), CDFE = S.end();
+      DomSetType &parentSet = Frontiers[parentBB];
+      for (; CDFI != CDFE; ++CDFI) {
+        if (!DT.properlyDominates(parentNode, DT[*CDFI]))
+          parentSet.insert(*CDFI);
+      }
+      workList.pop_back();
+    }
+
+  } while (!workList.empty());
+
+  return *Result;
+}
+
+void DominanceFrontierBase::print(raw_ostream &OS, const Module* ) const {
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    OS << "  DomFrontier for BB ";
+    if (I->first)
+      WriteAsOperand(OS, I->first, false);
+    else
+      OS << " <<exit node>>";
+    OS << " is:\t";
+    
+    const std::set<BasicBlock*> &BBs = I->second;
+    
+    for (std::set<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
+         I != E; ++I) {
+      OS << ' ';
+      if (*I)
+        WriteAsOperand(OS, *I, false);
+      else
+        OS << "<<exit node>>";
+    }
+    OS << "\n";
+  }
+}
+
+void DominanceFrontierBase::dump() const {
+  print(dbgs());
+}
+
diff --git a/final/lib/Analysis/IPA/CMakeLists.txt b/final/lib/Analysis/IPA/CMakeLists.txt
new file mode 100644
index 00000000000..8ffef29870a
--- /dev/null
+++ b/final/lib/Analysis/IPA/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMipa
+  CallGraph.cpp
+  CallGraphSCCPass.cpp
+  FindUsedTypes.cpp
+  GlobalsModRef.cpp
+  IPA.cpp
+  )
diff --git a/final/lib/Analysis/IPA/CallGraph.cpp b/final/lib/Analysis/IPA/CallGraph.cpp
new file mode 100644
index 00000000000..690c4b4b6f1
--- /dev/null
+++ b/final/lib/Analysis/IPA/CallGraph.cpp
@@ -0,0 +1,340 @@
+//===- CallGraph.cpp - Build a Module's call graph ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CallGraph class and provides the BasicCallGraph
+// default implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// BasicCallGraph class definition
+//
+class BasicCallGraph : public ModulePass, public CallGraph {
+  // Root is root of the call graph, or the external node if a 'main' function
+  // couldn't be found.
+  //
+  CallGraphNode *Root;
+
+  // ExternalCallingNode - This node has edges to all external functions and
+  // those internal functions that have their address taken.
+  CallGraphNode *ExternalCallingNode;
+
+  // CallsExternalNode - This node has edges to it from all functions making
+  // indirect calls or calling an external function.
+  CallGraphNode *CallsExternalNode;
+
+public:
+  static char ID; // Class identification, replacement for typeinfo
+  BasicCallGraph() : ModulePass(ID), Root(0), 
+    ExternalCallingNode(0), CallsExternalNode(0) {
+      initializeBasicCallGraphPass(*PassRegistry::getPassRegistry());
+    }
+
+  // runOnModule - Compute the call graph for the specified module.
+  virtual bool runOnModule(Module &M) {
+    CallGraph::initialize(M);
+    
+    ExternalCallingNode = getOrInsertFunction(0);
+    CallsExternalNode = new CallGraphNode(0);
+    Root = 0;
+  
+    // Add every function to the call graph.
+    for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+      addToCallGraph(I);
+  
+    // If we didn't find a main function, use the external call graph node
+    if (Root == 0) Root = ExternalCallingNode;
+    
+    return false;
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+
+  virtual void print(raw_ostream &OS, const Module *) const {
+    OS << "CallGraph Root is: ";
+    if (Function *F = getRoot()->getFunction())
+      OS << F->getName() << "\n";
+    else {
+      OS << "<<null function: 0x" << getRoot() << ">>\n";
+    }
+    
+    CallGraph::print(OS, 0);
+  }
+
+  virtual void releaseMemory() {
+    destroy();
+  }
+  
+  /// getAdjustedAnalysisPointer - This method is used when a pass implements
+  /// an analysis interface through multiple inheritance.  If needed, it should
+  /// override this to adjust the this pointer as needed for the specified pass
+  /// info.
+  virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+    if (PI == &CallGraph::ID)
+      return (CallGraph*)this;
+    return this;
+  }
+  
+  CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; }
+  CallGraphNode* getCallsExternalNode()   const { return CallsExternalNode; }
+
+  // getRoot - Return the root of the call graph, which is either main, or if
+  // main cannot be found, the external node.
+  //
+  CallGraphNode *getRoot()             { return Root; }
+  const CallGraphNode *getRoot() const { return Root; }
+
+private:
+  //===---------------------------------------------------------------------
+  // Implementation of CallGraph construction
+  //
+
+  // addToCallGraph - Add a function to the call graph, and link the node to all
+  // of the functions that it calls.
+  //
+  void addToCallGraph(Function *F) {
+    CallGraphNode *Node = getOrInsertFunction(F);
+
+    // If this function has external linkage, anything could call it.
+    if (!F->hasLocalLinkage()) {
+      ExternalCallingNode->addCalledFunction(CallSite(), Node);
+
+      // Found the entry point?
+      if (F->getName() == "main") {
+        if (Root)    // Found multiple external mains?  Don't pick one.
+          Root = ExternalCallingNode;
+        else
+          Root = Node;          // Found a main, keep track of it!
+      }
+    }
+
+    // Loop over all of the users of the function, looking for non-call uses.
+    for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I){
+      User *U = *I;
+      if ((!isa<CallInst>(U) && !isa<InvokeInst>(U))
+          || !CallSite(cast<Instruction>(U)).isCallee(I)) {
+        // Not a call, or being used as a parameter rather than as the callee.
+        ExternalCallingNode->addCalledFunction(CallSite(), Node);
+        break;
+      }
+    }
+
+    // If this function is not defined in this translation unit, it could call
+    // anything.
+    if (F->isDeclaration() && !F->isIntrinsic())
+      Node->addCalledFunction(CallSite(), CallsExternalNode);
+
+    // Look for calls by this function.
+    for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB)
+      for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
+           II != IE; ++II) {
+        CallSite CS(cast<Value>(II));
+        if (CS && !isa<DbgInfoIntrinsic>(II)) {
+          const Function *Callee = CS.getCalledFunction();
+          if (Callee)
+            Node->addCalledFunction(CS, getOrInsertFunction(Callee));
+          else
+            Node->addCalledFunction(CS, CallsExternalNode);
+        }
+      }
+  }
+
+  //
+  // destroy - Release memory for the call graph
+  virtual void destroy() {
+    /// CallsExternalNode is not in the function map, delete it explicitly.
+    if (CallsExternalNode) {
+      CallsExternalNode->allReferencesDropped();
+      delete CallsExternalNode;
+      CallsExternalNode = 0;
+    }
+    CallGraph::destroy();
+  }
+};
+
+} //End anonymous namespace
+
+INITIALIZE_ANALYSIS_GROUP(CallGraph, "Call Graph", BasicCallGraph)
+INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg",
+                   "Basic CallGraph Construction", false, true, true)
+
+char CallGraph::ID = 0;
+char BasicCallGraph::ID = 0;
+
+void CallGraph::initialize(Module &M) {
+  Mod = &M;
+}
+
+void CallGraph::destroy() {
+  if (FunctionMap.empty()) return;
+  
+  // Reset all node's use counts to zero before deleting them to prevent an
+  // assertion from firing.
+#ifndef NDEBUG
+  for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+       I != E; ++I)
+    I->second->allReferencesDropped();
+#endif
+  
+  for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end();
+      I != E; ++I)
+    delete I->second;
+  FunctionMap.clear();
+}
+
+void CallGraph::print(raw_ostream &OS, Module*) const {
+  for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I)
+    I->second->print(OS);
+}
+void CallGraph::dump() const {
+  print(dbgs(), 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Implementations of public modification methods
+//
+
+// removeFunctionFromModule - Unlink the function from this module, returning
+// it.  Because this removes the function from the module, the call graph node
+// is destroyed.  This is only valid if the function does not call any other
+// functions (ie, there are no edges in it's CGN).  The easiest way to do this
+// is to dropAllReferences before calling this.
+//
+Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {
+  assert(CGN->empty() && "Cannot remove function from call "
+         "graph if it references other functions!");
+  Function *F = CGN->getFunction(); // Get the function for the call graph node
+  delete CGN;                       // Delete the call graph node for this func
+  FunctionMap.erase(F);             // Remove the call graph node from the map
+
+  Mod->getFunctionList().remove(F);
+  return F;
+}
+
+/// spliceFunction - Replace the function represented by this node by another.
+/// This does not rescan the body of the function, so it is suitable when
+/// splicing the body of the old function to the new while also updating all
+/// callers from old to new.
+///
+void CallGraph::spliceFunction(const Function *From, const Function *To) {
+  assert(FunctionMap.count(From) && "No CallGraphNode for function!");
+  assert(!FunctionMap.count(To) &&
+         "Pointing CallGraphNode at a function that already exists");
+  FunctionMapTy::iterator I = FunctionMap.find(From);
+  I->second->F = const_cast<Function*>(To);
+  FunctionMap[To] = I->second;
+  FunctionMap.erase(I);
+}
+
+// getOrInsertFunction - This method is identical to calling operator[], but
+// it will insert a new CallGraphNode for the specified function if one does
+// not already exist.
+CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) {
+  CallGraphNode *&CGN = FunctionMap[F];
+  if (CGN) return CGN;
+  
+  assert((!F || F->getParent() == Mod) && "Function not in current module!");
+  return CGN = new CallGraphNode(const_cast<Function*>(F));
+}
+
+void CallGraphNode::print(raw_ostream &OS) const {
+  if (Function *F = getFunction())
+    OS << "Call graph node for function: '" << F->getName() << "'";
+  else
+    OS << "Call graph node <<null function>>";
+  
+  OS << "<<" << this << ">>  #uses=" << getNumReferences() << '\n';
+
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    OS << "  CS<" << I->first << "> calls ";
+    if (Function *FI = I->second->getFunction())
+      OS << "function '" << FI->getName() <<"'\n";
+    else
+      OS << "external node\n";
+  }
+  OS << '\n';
+}
+
+void CallGraphNode::dump() const { print(dbgs()); }
+
+/// removeCallEdgeFor - This method removes the edge in the node for the
+/// specified call site.  Note that this method takes linear time, so it
+/// should be used sparingly.
+void CallGraphNode::removeCallEdgeFor(CallSite CS) {
+  for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+    assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
+    if (I->first == CS.getInstruction()) {
+      I->second->DropRef();
+      *I = CalledFunctions.back();
+      CalledFunctions.pop_back();
+      return;
+    }
+  }
+}
+
+// removeAnyCallEdgeTo - This method removes any call edges from this node to
+// the specified callee function.  This takes more time to execute than
+// removeCallEdgeTo, so it should not be used unless necessary.
+void CallGraphNode::removeAnyCallEdgeTo(CallGraphNode *Callee) {
+  for (unsigned i = 0, e = CalledFunctions.size(); i != e; ++i)
+    if (CalledFunctions[i].second == Callee) {
+      Callee->DropRef();
+      CalledFunctions[i] = CalledFunctions.back();
+      CalledFunctions.pop_back();
+      --i; --e;
+    }
+}
+
+/// removeOneAbstractEdgeTo - Remove one edge associated with a null callsite
+/// from this node to the specified callee function.
+void CallGraphNode::removeOneAbstractEdgeTo(CallGraphNode *Callee) {
+  for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+    assert(I != CalledFunctions.end() && "Cannot find callee to remove!");
+    CallRecord &CR = *I;
+    if (CR.second == Callee && CR.first == 0) {
+      Callee->DropRef();
+      *I = CalledFunctions.back();
+      CalledFunctions.pop_back();
+      return;
+    }
+  }
+}
+
+/// replaceCallEdge - This method replaces the edge in the node for the
+/// specified call site with a new one.  Note that this method takes linear
+/// time, so it should be used sparingly.
+void CallGraphNode::replaceCallEdge(CallSite CS,
+                                    CallSite NewCS, CallGraphNode *NewNode){
+  for (CalledFunctionsVector::iterator I = CalledFunctions.begin(); ; ++I) {
+    assert(I != CalledFunctions.end() && "Cannot find callsite to remove!");
+    if (I->first == CS.getInstruction()) {
+      I->second->DropRef();
+      I->first = NewCS.getInstruction();
+      I->second = NewNode;
+      NewNode->AddRef();
+      return;
+    }
+  }
+}
+
+// Enuse that users of CallGraph.h also link with this file
+DEFINING_FILE_FOR(CallGraph)
diff --git a/final/lib/Analysis/IPA/CallGraphSCCPass.cpp b/final/lib/Analysis/IPA/CallGraphSCCPass.cpp
new file mode 100644
index 00000000000..725ab72f559
--- /dev/null
+++ b/final/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -0,0 +1,608 @@
+//===- CallGraphSCCPass.cpp - Pass that operates BU on call graph ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CallGraphSCCPass class, which is used for passes
+// which are implemented as bottom-up traversals on the call graph.  Because
+// there may be cycles in the call graph, passes of this type operate on the
+// call-graph in SCC order: that is, they process function bottom-up, except for
+// recursive functions, which they process all at once.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "cgscc-passmgr"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/PassManagers.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<unsigned> 
+MaxIterations("max-cg-scc-iterations", cl::ReallyHidden, cl::init(4));
+
+STATISTIC(MaxSCCIterations, "Maximum CGSCCPassMgr iterations on one SCC");
+
+//===----------------------------------------------------------------------===//
+// CGPassManager
+//
+/// CGPassManager manages FPPassManagers and CallGraphSCCPasses.
+
+namespace {
+
+class CGPassManager : public ModulePass, public PMDataManager {
+public:
+  static char ID;
+  explicit CGPassManager(int Depth) 
+    : ModulePass(ID), PMDataManager(Depth) { }
+
+  /// run - Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the module, and if so, return true.
+  bool runOnModule(Module &M);
+
+  bool doInitialization(CallGraph &CG);
+  bool doFinalization(CallGraph &CG);
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  void getAnalysisUsage(AnalysisUsage &Info) const {
+    // CGPassManager walks SCC and it needs CallGraph.
+    Info.addRequired<CallGraph>();
+    Info.setPreservesAll();
+  }
+
+  virtual const char *getPassName() const {
+    return "CallGraph Pass Manager";
+  }
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+
+  // Print passes managed by this manager
+  void dumpPassStructure(unsigned Offset) {
+    errs().indent(Offset*2) << "Call Graph SCC Pass Manager\n";
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      Pass *P = getContainedPass(Index);
+      P->dumpPassStructure(Offset + 1);
+      dumpLastUses(P, Offset+1);
+    }
+  }
+
+  Pass *getContainedPass(unsigned N) {
+    assert(N < PassVector.size() && "Pass number out of range!");
+    return static_cast<Pass *>(PassVector[N]);
+  }
+
+  virtual PassManagerType getPassManagerType() const { 
+    return PMT_CallGraphPassManager; 
+  }
+  
+private:
+  bool RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
+                         bool &DevirtualizedCall);
+  
+  bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
+                    CallGraph &CG, bool &CallGraphUpToDate,
+                    bool &DevirtualizedCall);
+  bool RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG,
+                        bool IsCheckingMode);
+};
+
+} // end anonymous namespace.
+
+char CGPassManager::ID = 0;
+
+
+bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
+                                 CallGraph &CG, bool &CallGraphUpToDate,
+                                 bool &DevirtualizedCall) {
+  bool Changed = false;
+  PMDataManager *PM = P->getAsPMDataManager();
+
+  if (PM == 0) {
+    CallGraphSCCPass *CGSP = (CallGraphSCCPass*)P;
+    if (!CallGraphUpToDate) {
+      DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
+      CallGraphUpToDate = true;
+    }
+
+    {
+      TimeRegion PassTimer(getPassTimer(CGSP));
+      Changed = CGSP->runOnSCC(CurSCC);
+    }
+    
+    // After the CGSCCPass is done, when assertions are enabled, use
+    // RefreshCallGraph to verify that the callgraph was correctly updated.
+#ifndef NDEBUG
+    if (Changed)
+      RefreshCallGraph(CurSCC, CG, true);
+#endif
+    
+    return Changed;
+  }
+  
+  
+  assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+         "Invalid CGPassManager member");
+  FPPassManager *FPP = (FPPassManager*)P;
+  
+  // Run pass P on all functions in the current SCC.
+  for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+       I != E; ++I) {
+    if (Function *F = (*I)->getFunction()) {
+      dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
+      TimeRegion PassTimer(getPassTimer(FPP));
+      Changed |= FPP->runOnFunction(*F);
+    }
+  }
+  
+  // The function pass(es) modified the IR, they may have clobbered the
+  // callgraph.
+  if (Changed && CallGraphUpToDate) {
+    DEBUG(dbgs() << "CGSCCPASSMGR: Pass Dirtied SCC: "
+                 << P->getPassName() << '\n');
+    CallGraphUpToDate = false;
+  }
+  return Changed;
+}
+
+
+/// RefreshCallGraph - Scan the functions in the specified CFG and resync the
+/// callgraph with the call sites found in it.  This is used after
+/// FunctionPasses have potentially munged the callgraph, and can be used after
+/// CallGraphSCC passes to verify that they correctly updated the callgraph.
+///
+/// This function returns true if it devirtualized an existing function call,
+/// meaning it turned an indirect call into a direct call.  This happens when
+/// a function pass like GVN optimizes away stuff feeding the indirect call.
+/// This never happens in checking mode.
+///
+bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
+                                     CallGraph &CG, bool CheckingMode) {
+  DenseMap<Value*, CallGraphNode*> CallSites;
+  
+  DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
+               << " nodes:\n";
+        for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+             I != E; ++I)
+          (*I)->dump();
+        );
+
+  bool MadeChange = false;
+  bool DevirtualizedCall = false;
+  
+  // Scan all functions in the SCC.
+  unsigned FunctionNo = 0;
+  for (CallGraphSCC::iterator SCCIdx = CurSCC.begin(), E = CurSCC.end();
+       SCCIdx != E; ++SCCIdx, ++FunctionNo) {
+    CallGraphNode *CGN = *SCCIdx;
+    Function *F = CGN->getFunction();
+    if (F == 0 || F->isDeclaration()) continue;
+    
+    // Walk the function body looking for call sites.  Sync up the call sites in
+    // CGN with those actually in the function.
+
+    // Keep track of the number of direct and indirect calls that were
+    // invalidated and removed.
+    unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0;
+    
+    // Get the set of call sites currently in the function.
+    for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
+      // If this call site is null, then the function pass deleted the call
+      // entirely and the WeakVH nulled it out.  
+      if (I->first == 0 ||
+          // If we've already seen this call site, then the FunctionPass RAUW'd
+          // one call with another, which resulted in two "uses" in the edge
+          // list of the same call.
+          CallSites.count(I->first) ||
+
+          // If the call edge is not from a call or invoke, then the function
+          // pass RAUW'd a call with another value.  This can happen when
+          // constant folding happens of well known functions etc.
+          !CallSite(I->first)) {
+        assert(!CheckingMode &&
+               "CallGraphSCCPass did not update the CallGraph correctly!");
+        
+        // If this was an indirect call site, count it.
+        if (I->second->getFunction() == 0)
+          ++NumIndirectRemoved;
+        else 
+          ++NumDirectRemoved;
+        
+        // Just remove the edge from the set of callees, keep track of whether
+        // I points to the last element of the vector.
+        bool WasLast = I + 1 == E;
+        CGN->removeCallEdge(I);
+        
+        // If I pointed to the last element of the vector, we have to bail out:
+        // iterator checking rejects comparisons of the resultant pointer with
+        // end.
+        if (WasLast)
+          break;
+        E = CGN->end();
+        continue;
+      }
+      
+      assert(!CallSites.count(I->first) &&
+             "Call site occurs in node multiple times");
+      CallSites.insert(std::make_pair(I->first, I->second));
+      ++I;
+    }
+    
+    // Loop over all of the instructions in the function, getting the callsites.
+    // Keep track of the number of direct/indirect calls added.
+    unsigned NumDirectAdded = 0, NumIndirectAdded = 0;
+    
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+          CallSite CS(cast<Value>(I));
+        if (!CS || isa<DbgInfoIntrinsic>(I)) continue;
+        
+        // If this call site already existed in the callgraph, just verify it
+        // matches up to expectations and remove it from CallSites.
+        DenseMap<Value*, CallGraphNode*>::iterator ExistingIt =
+          CallSites.find(CS.getInstruction());
+        if (ExistingIt != CallSites.end()) {
+          CallGraphNode *ExistingNode = ExistingIt->second;
+
+          // Remove from CallSites since we have now seen it.
+          CallSites.erase(ExistingIt);
+          
+          // Verify that the callee is right.
+          if (ExistingNode->getFunction() == CS.getCalledFunction())
+            continue;
+          
+          // If we are in checking mode, we are not allowed to actually mutate
+          // the callgraph.  If this is a case where we can infer that the
+          // callgraph is less precise than it could be (e.g. an indirect call
+          // site could be turned direct), don't reject it in checking mode, and
+          // don't tweak it to be more precise.
+          if (CheckingMode && CS.getCalledFunction() &&
+              ExistingNode->getFunction() == 0)
+            continue;
+          
+          assert(!CheckingMode &&
+                 "CallGraphSCCPass did not update the CallGraph correctly!");
+          
+          // If not, we either went from a direct call to indirect, indirect to
+          // direct, or direct to different direct.
+          CallGraphNode *CalleeNode;
+          if (Function *Callee = CS.getCalledFunction()) {
+            CalleeNode = CG.getOrInsertFunction(Callee);
+            // Keep track of whether we turned an indirect call into a direct
+            // one.
+            if (ExistingNode->getFunction() == 0) {
+              DevirtualizedCall = true;
+              DEBUG(dbgs() << "  CGSCCPASSMGR: Devirtualized call to '"
+                           << Callee->getName() << "'\n");
+            }
+          } else {
+            CalleeNode = CG.getCallsExternalNode();
+          }
+
+          // Update the edge target in CGN.
+          CGN->replaceCallEdge(CS, CS, CalleeNode);
+          MadeChange = true;
+          continue;
+        }
+        
+        assert(!CheckingMode &&
+               "CallGraphSCCPass did not update the CallGraph correctly!");
+
+        // If the call site didn't exist in the CGN yet, add it.
+        CallGraphNode *CalleeNode;
+        if (Function *Callee = CS.getCalledFunction()) {
+          CalleeNode = CG.getOrInsertFunction(Callee);
+          ++NumDirectAdded;
+        } else {
+          CalleeNode = CG.getCallsExternalNode();
+          ++NumIndirectAdded;
+        }
+        
+        CGN->addCalledFunction(CS, CalleeNode);
+        MadeChange = true;
+      }
+    
+    // We scanned the old callgraph node, removing invalidated call sites and
+    // then added back newly found call sites.  One thing that can happen is
+    // that an old indirect call site was deleted and replaced with a new direct
+    // call.  In this case, we have devirtualized a call, and CGSCCPM would like
+    // to iteratively optimize the new code.  Unfortunately, we don't really
+    // have a great way to detect when this happens.  As an approximation, we
+    // just look at whether the number of indirect calls is reduced and the
+    // number of direct calls is increased.  There are tons of ways to fool this
+    // (e.g. DCE'ing an indirect call and duplicating an unrelated block with a
+    // direct call) but this is close enough.
+    if (NumIndirectRemoved > NumIndirectAdded &&
+        NumDirectRemoved < NumDirectAdded)
+      DevirtualizedCall = true;
+    
+    // After scanning this function, if we still have entries in callsites, then
+    // they are dangling pointers.  WeakVH should save us for this, so abort if
+    // this happens.
+    assert(CallSites.empty() && "Dangling pointers found in call sites map");
+    
+    // Periodically do an explicit clear to remove tombstones when processing
+    // large scc's.
+    if ((FunctionNo & 15) == 15)
+      CallSites.clear();
+  }
+
+  DEBUG(if (MadeChange) {
+          dbgs() << "CGSCCPASSMGR: Refreshed SCC is now:\n";
+          for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+            I != E; ++I)
+              (*I)->dump();
+          if (DevirtualizedCall)
+            dbgs() << "CGSCCPASSMGR: Refresh devirtualized a call!\n";
+
+         } else {
+           dbgs() << "CGSCCPASSMGR: SCC Refresh didn't change call graph.\n";
+         }
+        );
+
+  return DevirtualizedCall;
+}
+
+/// RunAllPassesOnSCC -  Execute the body of the entire pass manager on the
+/// specified SCC.  This keeps track of whether a function pass devirtualizes
+/// any calls and returns it in DevirtualizedCall.
+bool CGPassManager::RunAllPassesOnSCC(CallGraphSCC &CurSCC, CallGraph &CG,
+                                      bool &DevirtualizedCall) {
+  bool Changed = false;
+  
+  // CallGraphUpToDate - Keep track of whether the callgraph is known to be
+  // up-to-date or not.  The CGSSC pass manager runs two types of passes:
+  // CallGraphSCC Passes and other random function passes.  Because other
+  // random function passes are not CallGraph aware, they may clobber the
+  // call graph by introducing new calls or deleting other ones.  This flag
+  // is set to false when we run a function pass so that we know to clean up
+  // the callgraph when we need to run a CGSCCPass again.
+  bool CallGraphUpToDate = true;
+
+  // Run all passes on current SCC.
+  for (unsigned PassNo = 0, e = getNumContainedPasses();
+       PassNo != e; ++PassNo) {
+    Pass *P = getContainedPass(PassNo);
+    
+    // If we're in -debug-pass=Executions mode, construct the SCC node list,
+    // otherwise avoid constructing this string as it is expensive.
+    if (isPassDebuggingExecutionsOrMore()) {
+      std::string Functions;
+  #ifndef NDEBUG
+      raw_string_ostream OS(Functions);
+      for (CallGraphSCC::iterator I = CurSCC.begin(), E = CurSCC.end();
+           I != E; ++I) {
+        if (I != CurSCC.begin()) OS << ", ";
+        (*I)->print(OS);
+      }
+      OS.flush();
+  #endif
+      dumpPassInfo(P, EXECUTION_MSG, ON_CG_MSG, Functions);
+    }
+    dumpRequiredSet(P);
+    
+    initializeAnalysisImpl(P);
+    
+    // Actually run this pass on the current SCC.
+    Changed |= RunPassOnSCC(P, CurSCC, CG,
+                            CallGraphUpToDate, DevirtualizedCall);
+    
+    if (Changed)
+      dumpPassInfo(P, MODIFICATION_MSG, ON_CG_MSG, "");
+    dumpPreservedSet(P);
+    
+    verifyPreservedAnalysis(P);      
+    removeNotPreservedAnalysis(P);
+    recordAvailableAnalysis(P);
+    removeDeadPasses(P, "", ON_CG_MSG);
+  }
+  
+  // If the callgraph was left out of date (because the last pass run was a
+  // functionpass), refresh it before we move on to the next SCC.
+  if (!CallGraphUpToDate)
+    DevirtualizedCall |= RefreshCallGraph(CurSCC, CG, false);
+  return Changed;
+}
+
+/// run - Execute all of the passes scheduled for execution.  Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool CGPassManager::runOnModule(Module &M) {
+  CallGraph &CG = getAnalysis<CallGraph>();
+  bool Changed = doInitialization(CG);
+  
+  // Walk the callgraph in bottom-up SCC order.
+  scc_iterator<CallGraph*> CGI = scc_begin(&CG);
+
+  CallGraphSCC CurSCC(&CGI);
+  while (!CGI.isAtEnd()) {
+    // Copy the current SCC and increment past it so that the pass can hack
+    // on the SCC if it wants to without invalidating our iterator.
+    std::vector<CallGraphNode*> &NodeVec = *CGI;
+    CurSCC.initialize(&NodeVec[0], &NodeVec[0]+NodeVec.size());
+    ++CGI;
+    
+    // At the top level, we run all the passes in this pass manager on the
+    // functions in this SCC.  However, we support iterative compilation in the
+    // case where a function pass devirtualizes a call to a function.  For
+    // example, it is very common for a function pass (often GVN or instcombine)
+    // to eliminate the addressing that feeds into a call.  With that improved
+    // information, we would like the call to be an inline candidate, infer
+    // mod-ref information etc.
+    //
+    // Because of this, we allow iteration up to a specified iteration count.
+    // This only happens in the case of a devirtualized call, so we only burn
+    // compile time in the case that we're making progress.  We also have a hard
+    // iteration count limit in case there is crazy code.
+    unsigned Iteration = 0;
+    bool DevirtualizedCall = false;
+    do {
+      DEBUG(if (Iteration)
+              dbgs() << "  SCCPASSMGR: Re-visiting SCC, iteration #"
+                     << Iteration << '\n');
+      DevirtualizedCall = false;
+      Changed |= RunAllPassesOnSCC(CurSCC, CG, DevirtualizedCall);
+    } while (Iteration++ < MaxIterations && DevirtualizedCall);
+    
+    if (DevirtualizedCall)
+      DEBUG(dbgs() << "  CGSCCPASSMGR: Stopped iteration after " << Iteration
+                   << " times, due to -max-cg-scc-iterations\n");
+    
+    if (Iteration > MaxSCCIterations)
+      MaxSCCIterations = Iteration;
+    
+  }
+  Changed |= doFinalization(CG);
+  return Changed;
+}
+
+
+/// Initialize CG
+bool CGPassManager::doInitialization(CallGraph &CG) {
+  bool Changed = false;
+  for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {  
+    if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) {
+      assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+             "Invalid CGPassManager member");
+      Changed |= ((FPPassManager*)PM)->doInitialization(CG.getModule());
+    } else {
+      Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doInitialization(CG);
+    }
+  }
+  return Changed;
+}
+
+/// Finalize CG
+bool CGPassManager::doFinalization(CallGraph &CG) {
+  bool Changed = false;
+  for (unsigned i = 0, e = getNumContainedPasses(); i != e; ++i) {  
+    if (PMDataManager *PM = getContainedPass(i)->getAsPMDataManager()) {
+      assert(PM->getPassManagerType() == PMT_FunctionPassManager &&
+             "Invalid CGPassManager member");
+      Changed |= ((FPPassManager*)PM)->doFinalization(CG.getModule());
+    } else {
+      Changed |= ((CallGraphSCCPass*)getContainedPass(i))->doFinalization(CG);
+    }
+  }
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// CallGraphSCC Implementation
+//===----------------------------------------------------------------------===//
+
+/// ReplaceNode - This informs the SCC and the pass manager that the specified
+/// Old node has been deleted, and New is to be used in its place.
+void CallGraphSCC::ReplaceNode(CallGraphNode *Old, CallGraphNode *New) {
+  assert(Old != New && "Should not replace node with self");
+  for (unsigned i = 0; ; ++i) {
+    assert(i != Nodes.size() && "Node not in SCC");
+    if (Nodes[i] != Old) continue;
+    Nodes[i] = New;
+    break;
+  }
+  
+  // Update the active scc_iterator so that it doesn't contain dangling
+  // pointers to the old CallGraphNode.
+  scc_iterator<CallGraph*> *CGI = (scc_iterator<CallGraph*>*)Context;
+  CGI->ReplaceNode(Old, New);
+}
+
+
+//===----------------------------------------------------------------------===//
+// CallGraphSCCPass Implementation
+//===----------------------------------------------------------------------===//
+
+/// Assign pass manager to manage this pass.
+void CallGraphSCCPass::assignPassManager(PMStack &PMS,
+                                         PassManagerType PreferredType) {
+  // Find CGPassManager 
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_CallGraphPassManager)
+    PMS.pop();
+
+  assert(!PMS.empty() && "Unable to handle Call Graph Pass");
+  CGPassManager *CGP;
+  
+  if (PMS.top()->getPassManagerType() == PMT_CallGraphPassManager)
+    CGP = (CGPassManager*)PMS.top();
+  else {
+    // Create new Call Graph SCC Pass Manager if it does not exist. 
+    assert(!PMS.empty() && "Unable to create Call Graph Pass Manager");
+    PMDataManager *PMD = PMS.top();
+
+    // [1] Create new Call Graph Pass Manager
+    CGP = new CGPassManager(PMD->getDepth() + 1);
+
+    // [2] Set up new manager's top level manager
+    PMTopLevelManager *TPM = PMD->getTopLevelManager();
+    TPM->addIndirectPassManager(CGP);
+
+    // [3] Assign manager to manage this new manager. This may create
+    // and push new managers into PMS
+    Pass *P = CGP;
+    TPM->schedulePass(P);
+
+    // [4] Push new manager into PMS
+    PMS.push(CGP);
+  }
+
+  CGP->add(this);
+}
+
+/// getAnalysisUsage - For this class, we declare that we require and preserve
+/// the call graph.  If the derived class implements this method, it should
+/// always explicitly call the implementation here.
+void CallGraphSCCPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<CallGraph>();
+  AU.addPreserved<CallGraph>();
+}
+
+
+//===----------------------------------------------------------------------===//
+// PrintCallGraphPass Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// PrintCallGraphPass - Print a Module corresponding to a call graph.
+  ///
+  class PrintCallGraphPass : public CallGraphSCCPass {
+    std::string Banner;
+    raw_ostream &Out;       // raw_ostream to print on.
+    
+  public:
+    static char ID;
+    PrintCallGraphPass(const std::string &B, raw_ostream &o)
+      : CallGraphSCCPass(ID), Banner(B), Out(o) {}
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+    
+    bool runOnSCC(CallGraphSCC &SCC) {
+      Out << Banner;
+      for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+        (*I)->getFunction()->print(Out);
+      return false;
+    }
+  };
+  
+} // end anonymous namespace.
+
+char PrintCallGraphPass::ID = 0;
+
+Pass *CallGraphSCCPass::createPrinterPass(raw_ostream &O,
+                                          const std::string &Banner) const {
+  return new PrintCallGraphPass(Banner, O);
+}
+
diff --git a/final/lib/Analysis/IPA/FindUsedTypes.cpp b/final/lib/Analysis/IPA/FindUsedTypes.cpp
new file mode 100644
index 00000000000..06ae34cfd98
--- /dev/null
+++ b/final/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -0,0 +1,103 @@
+//===- FindUsedTypes.cpp - Find all Types used by a module ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to seek out all of the types in use by the program.  Note
+// that this analysis explicitly does not include types only used by the symbol
+// table.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+char FindUsedTypes::ID = 0;
+INITIALIZE_PASS(FindUsedTypes, "print-used-types",
+                "Find Used Types", false, true)
+
+// IncorporateType - Incorporate one type and all of its subtypes into the
+// collection of used types.
+//
+void FindUsedTypes::IncorporateType(const Type *Ty) {
+  // If ty doesn't already exist in the used types map, add it now, otherwise
+  // return.
+  if (!UsedTypes.insert(Ty).second) return;  // Already contain Ty.
+
+  // Make sure to add any types this type references now.
+  //
+  for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+       I != E; ++I)
+    IncorporateType(*I);
+}
+
+void FindUsedTypes::IncorporateValue(const Value *V) {
+  IncorporateType(V->getType());
+
+  // If this is a constant, it could be using other types...
+  if (const Constant *C = dyn_cast<Constant>(V)) {
+    if (!isa<GlobalValue>(C))
+      for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+           OI != OE; ++OI)
+        IncorporateValue(*OI);
+  }
+}
+
+
+// run - This incorporates all types used by the specified module
+//
+bool FindUsedTypes::runOnModule(Module &m) {
+  UsedTypes.clear();  // reset if run multiple times...
+
+  // Loop over global variables, incorporating their types
+  for (Module::const_global_iterator I = m.global_begin(), E = m.global_end();
+       I != E; ++I) {
+    IncorporateType(I->getType());
+    if (I->hasInitializer())
+      IncorporateValue(I->getInitializer());
+  }
+
+  for (Module::iterator MI = m.begin(), ME = m.end(); MI != ME; ++MI) {
+    IncorporateType(MI->getType());
+    const Function &F = *MI;
+
+    // Loop over all of the instructions in the function, adding their return
+    // type as well as the types of their operands.
+    //
+    for (const_inst_iterator II = inst_begin(F), IE = inst_end(F);
+         II != IE; ++II) {
+      const Instruction &I = *II;
+
+      IncorporateType(I.getType());  // Incorporate the type of the instruction
+      for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
+           OI != OE; ++OI)
+        IncorporateValue(*OI);  // Insert inst operand types as well
+    }
+  }
+
+  return false;
+}
+
+// Print the types found in the module.  If the optional Module parameter is
+// passed in, then the types are printed symbolically if possible, using the
+// symbol table from the module.
+//
+void FindUsedTypes::print(raw_ostream &OS, const Module *M) const {
+  OS << "Types in use by this module:\n";
+  for (std::set<const Type *>::const_iterator I = UsedTypes.begin(),
+       E = UsedTypes.end(); I != E; ++I) {
+    OS << "   ";
+    WriteTypeSymbolic(OS, *I, M);
+    OS << '\n';
+  }
+}
diff --git a/final/lib/Analysis/IPA/GlobalsModRef.cpp b/final/lib/Analysis/IPA/GlobalsModRef.cpp
new file mode 100644
index 00000000000..116aaf418ea
--- /dev/null
+++ b/final/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -0,0 +1,609 @@
+//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This simple pass provides alias and mod/ref information for global values
+// that do not have their address taken, and keeps track of whether functions
+// read or write memory (are "pure").  For this simple (but very common) case,
+// we can provide pretty accurate and useful information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globalsmodref-aa"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SCCIterator.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumNonAddrTakenGlobalVars,
+          "Number of global vars without address taken");
+STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken");
+STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory");
+STATISTIC(NumReadMemFunctions, "Number of functions that only read memory");
+STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects");
+
+namespace {
+  /// FunctionRecord - One instance of this structure is stored for every
+  /// function in the program.  Later, the entries for these functions are
+  /// removed if the function is found to call an external function (in which
+  /// case we know nothing about it.
+  struct FunctionRecord {
+    /// GlobalInfo - Maintain mod/ref info for all of the globals without
+    /// addresses taken that are read or written (transitively) by this
+    /// function.
+    std::map<const GlobalValue*, unsigned> GlobalInfo;
+
+    /// MayReadAnyGlobal - May read global variables, but it is not known which.
+    bool MayReadAnyGlobal;
+
+    unsigned getInfoForGlobal(const GlobalValue *GV) const {
+      unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0;
+      std::map<const GlobalValue*, unsigned>::const_iterator I =
+        GlobalInfo.find(GV);
+      if (I != GlobalInfo.end())
+        Effect |= I->second;
+      return Effect;
+    }
+
+    /// FunctionEffect - Capture whether or not this function reads or writes to
+    /// ANY memory.  If not, we can do a lot of aggressive analysis on it.
+    unsigned FunctionEffect;
+
+    FunctionRecord() : MayReadAnyGlobal (false), FunctionEffect(0) {}
+  };
+
+  /// GlobalsModRef - The actual analysis pass.
+  class GlobalsModRef : public ModulePass, public AliasAnalysis {
+    /// NonAddressTakenGlobals - The globals that do not have their addresses
+    /// taken.
+    std::set<const GlobalValue*> NonAddressTakenGlobals;
+
+    /// IndirectGlobals - The memory pointed to by this global is known to be
+    /// 'owned' by the global.
+    std::set<const GlobalValue*> IndirectGlobals;
+
+    /// AllocsForIndirectGlobals - If an instruction allocates memory for an
+    /// indirect global, this map indicates which one.
+    std::map<const Value*, const GlobalValue*> AllocsForIndirectGlobals;
+
+    /// FunctionInfo - For each function, keep track of what globals are
+    /// modified or read.
+    std::map<const Function*, FunctionRecord> FunctionInfo;
+
+  public:
+    static char ID;
+    GlobalsModRef() : ModulePass(ID) {
+      initializeGlobalsModRefPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnModule(Module &M) {
+      InitializeAliasAnalysis(this);                 // set up super class
+      AnalyzeGlobals(M);                          // find non-addr taken globals
+      AnalyzeCallGraph(getAnalysis<CallGraph>(), M); // Propagate on CG
+      return false;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AliasAnalysis::getAnalysisUsage(AU);
+      AU.addRequired<CallGraph>();
+      AU.setPreservesAll();                         // Does not transform code
+    }
+
+    //------------------------------------------------
+    // Implement the AliasAnalysis API
+    //
+    AliasResult alias(const Location &LocA, const Location &LocB);
+    ModRefResult getModRefInfo(ImmutableCallSite CS,
+                               const Location &Loc);
+    ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                               ImmutableCallSite CS2) {
+      return AliasAnalysis::getModRefInfo(CS1, CS2);
+    }
+
+    /// getModRefBehavior - Return the behavior of the specified function if
+    /// called from the specified call site.  The call site may be null in which
+    /// case the most generic behavior of this function should be returned.
+    ModRefBehavior getModRefBehavior(const Function *F) {
+      ModRefBehavior Min = UnknownModRefBehavior;
+
+      if (FunctionRecord *FR = getFunctionInfo(F)) {
+        if (FR->FunctionEffect == 0)
+          Min = DoesNotAccessMemory;
+        else if ((FR->FunctionEffect & Mod) == 0)
+          Min = OnlyReadsMemory;
+      }
+
+      return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min);
+    }
+    
+    /// getModRefBehavior - Return the behavior of the specified function if
+    /// called from the specified call site.  The call site may be null in which
+    /// case the most generic behavior of this function should be returned.
+    ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+      ModRefBehavior Min = UnknownModRefBehavior;
+
+      if (const Function* F = CS.getCalledFunction())
+        if (FunctionRecord *FR = getFunctionInfo(F)) {
+          if (FR->FunctionEffect == 0)
+            Min = DoesNotAccessMemory;
+          else if ((FR->FunctionEffect & Mod) == 0)
+            Min = OnlyReadsMemory;
+        }
+
+      return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+    }
+
+    virtual void deleteValue(Value *V);
+    virtual void copyValue(Value *From, Value *To);
+    virtual void addEscapingUse(Use &U);
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+    
+  private:
+    /// getFunctionInfo - Return the function info for the function, or null if
+    /// we don't have anything useful to say about it.
+    FunctionRecord *getFunctionInfo(const Function *F) {
+      std::map<const Function*, FunctionRecord>::iterator I =
+        FunctionInfo.find(F);
+      if (I != FunctionInfo.end())
+        return &I->second;
+      return 0;
+    }
+
+    void AnalyzeGlobals(Module &M);
+    void AnalyzeCallGraph(CallGraph &CG, Module &M);
+    bool AnalyzeUsesOfPointer(Value *V, std::vector<Function*> &Readers,
+                              std::vector<Function*> &Writers,
+                              GlobalValue *OkayStoreDest = 0);
+    bool AnalyzeIndirectGlobalMemory(GlobalValue *GV);
+  };
+}
+
+char GlobalsModRef::ID = 0;
+INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis,
+                "globalsmodref-aa", "Simple mod/ref analysis for globals",    
+                false, true, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis,
+                "globalsmodref-aa", "Simple mod/ref analysis for globals",    
+                false, true, false)
+
+Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); }
+
+/// AnalyzeGlobals - Scan through the users of all of the internal
+/// GlobalValue's in the program.  If none of them have their "address taken"
+/// (really, their address passed to something nontrivial), record this fact,
+/// and record the functions that they are used directly in.
+void GlobalsModRef::AnalyzeGlobals(Module &M) {
+  std::vector<Function*> Readers, Writers;
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    if (I->hasLocalLinkage()) {
+      if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
+        // Remember that we are tracking this global.
+        NonAddressTakenGlobals.insert(I);
+        ++NumNonAddrTakenFunctions;
+      }
+      Readers.clear(); Writers.clear();
+    }
+
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
+    if (I->hasLocalLinkage()) {
+      if (!AnalyzeUsesOfPointer(I, Readers, Writers)) {
+        // Remember that we are tracking this global, and the mod/ref fns
+        NonAddressTakenGlobals.insert(I);
+
+        for (unsigned i = 0, e = Readers.size(); i != e; ++i)
+          FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref;
+
+        if (!I->isConstant())  // No need to keep track of writers to constants
+          for (unsigned i = 0, e = Writers.size(); i != e; ++i)
+            FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod;
+        ++NumNonAddrTakenGlobalVars;
+
+        // If this global holds a pointer type, see if it is an indirect global.
+        if (I->getType()->getElementType()->isPointerTy() &&
+            AnalyzeIndirectGlobalMemory(I))
+          ++NumIndirectGlobalVars;
+      }
+      Readers.clear(); Writers.clear();
+    }
+}
+
+/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer.
+/// If this is used by anything complex (i.e., the address escapes), return
+/// true.  Also, while we are at it, keep track of those functions that read and
+/// write to the value.
+///
+/// If OkayStoreDest is non-null, stores into this global are allowed.
+bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
+                                         std::vector<Function*> &Readers,
+                                         std::vector<Function*> &Writers,
+                                         GlobalValue *OkayStoreDest) {
+  if (!V->getType()->isPointerTy()) return true;
+
+  for (Value::use_iterator UI = V->use_begin(), E=V->use_end(); UI != E; ++UI) {
+    User *U = *UI;
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      Readers.push_back(LI->getParent()->getParent());
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (V == SI->getOperand(1)) {
+        Writers.push_back(SI->getParent()->getParent());
+      } else if (SI->getOperand(1) != OkayStoreDest) {
+        return true;  // Storing the pointer
+      }
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true;
+    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+      if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest))
+        return true;
+    } else if (isFreeCall(U)) {
+      Writers.push_back(cast<Instruction>(U)->getParent()->getParent());
+    } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
+      // Make sure that this is just the function being called, not that it is
+      // passing into the function.
+      for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+        if (CI->getArgOperand(i) == V) return true;
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+      // Make sure that this is just the function being called, not that it is
+      // passing into the function.
+      for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i)
+        if (II->getArgOperand(i) == V) return true;
+    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+      if (CE->getOpcode() == Instruction::GetElementPtr ||
+          CE->getOpcode() == Instruction::BitCast) {
+        if (AnalyzeUsesOfPointer(CE, Readers, Writers))
+          return true;
+      } else {
+        return true;
+      }
+    } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) {
+      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+        return true;  // Allow comparison against null.
+    } else {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable
+/// which holds a pointer type.  See if the global always points to non-aliased
+/// heap memory: that is, all initializers of the globals are allocations, and
+/// those allocations have no use other than initialization of the global.
+/// Further, all loads out of GV must directly use the memory, not store the
+/// pointer somewhere.  If this is true, we consider the memory pointed to by
+/// GV to be owned by GV and can disambiguate other pointers from it.
+bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
+  // Keep track of values related to the allocation of the memory, f.e. the
+  // value produced by the malloc call and any casts.
+  std::vector<Value*> AllocRelatedValues;
+
+  // Walk the user list of the global.  If we find anything other than a direct
+  // load or store, bail out.
+  for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
+    User *U = *I;
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      // The pointer loaded from the global can only be used in simple ways:
+      // we allow addressing of it and loading storing to it.  We do *not* allow
+      // storing the loaded pointer somewhere else or passing to a function.
+      std::vector<Function*> ReadersWriters;
+      if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters))
+        return false;  // Loaded pointer escapes.
+      // TODO: Could try some IP mod/ref of the loaded pointer.
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      // Storing the global itself.
+      if (SI->getOperand(0) == GV) return false;
+
+      // If storing the null pointer, ignore it.
+      if (isa<ConstantPointerNull>(SI->getOperand(0)))
+        continue;
+
+      // Check the value being stored.
+      Value *Ptr = GetUnderlyingObject(SI->getOperand(0));
+
+      if (isMalloc(Ptr)) {
+        // Okay, easy case.
+      } else if (CallInst *CI = dyn_cast<CallInst>(Ptr)) {
+        Function *F = CI->getCalledFunction();
+        if (!F || !F->isDeclaration()) return false;     // Too hard to analyze.
+        if (F->getName() != "calloc") return false;   // Not calloc.
+      } else {
+        return false;  // Too hard to analyze.
+      }
+
+      // Analyze all uses of the allocation.  If any of them are used in a
+      // non-simple way (e.g. stored to another global) bail out.
+      std::vector<Function*> ReadersWriters;
+      if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV))
+        return false;  // Loaded pointer escapes.
+
+      // Remember that this allocation is related to the indirect global.
+      AllocRelatedValues.push_back(Ptr);
+    } else {
+      // Something complex, bail out.
+      return false;
+    }
+  }
+
+  // Okay, this is an indirect global.  Remember all of the allocations for
+  // this global in AllocsForIndirectGlobals.
+  while (!AllocRelatedValues.empty()) {
+    AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV;
+    AllocRelatedValues.pop_back();
+  }
+  IndirectGlobals.insert(GV);
+  return true;
+}
+
+/// AnalyzeCallGraph - At this point, we know the functions where globals are
+/// immediately stored to and read from.  Propagate this information up the call
+/// graph to all callers and compute the mod/ref info for all memory for each
+/// function.
+void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) {
+  // We do a bottom-up SCC traversal of the call graph.  In other words, we
+  // visit all callees before callers (leaf-first).
+  for (scc_iterator<CallGraph*> I = scc_begin(&CG), E = scc_end(&CG); I != E;
+       ++I) {
+    std::vector<CallGraphNode *> &SCC = *I;
+    assert(!SCC.empty() && "SCC with no functions?");
+
+    if (!SCC[0]->getFunction()) {
+      // Calls externally - can't say anything useful.  Remove any existing
+      // function records (may have been created when scanning globals).
+      for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+        FunctionInfo.erase(SCC[i]->getFunction());
+      continue;
+    }
+
+    FunctionRecord &FR = FunctionInfo[SCC[0]->getFunction()];
+
+    bool KnowNothing = false;
+    unsigned FunctionEffect = 0;
+
+    // Collect the mod/ref properties due to called functions.  We only compute
+    // one mod-ref set.
+    for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) {
+      Function *F = SCC[i]->getFunction();
+      if (!F) {
+        KnowNothing = true;
+        break;
+      }
+
+      if (F->isDeclaration()) {
+        // Try to get mod/ref behaviour from function attributes.
+        if (F->doesNotAccessMemory()) {
+          // Can't do better than that!
+        } else if (F->onlyReadsMemory()) {
+          FunctionEffect |= Ref;
+          if (!F->isIntrinsic())
+            // This function might call back into the module and read a global -
+            // consider every global as possibly being read by this function.
+            FR.MayReadAnyGlobal = true;
+        } else {
+          FunctionEffect |= ModRef;
+          // Can't say anything useful unless it's an intrinsic - they don't
+          // read or write global variables of the kind considered here.
+          KnowNothing = !F->isIntrinsic();
+        }
+        continue;
+      }
+
+      for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end();
+           CI != E && !KnowNothing; ++CI)
+        if (Function *Callee = CI->second->getFunction()) {
+          if (FunctionRecord *CalleeFR = getFunctionInfo(Callee)) {
+            // Propagate function effect up.
+            FunctionEffect |= CalleeFR->FunctionEffect;
+
+            // Incorporate callee's effects on globals into our info.
+            for (std::map<const GlobalValue*, unsigned>::iterator GI =
+                   CalleeFR->GlobalInfo.begin(), E = CalleeFR->GlobalInfo.end();
+                 GI != E; ++GI)
+              FR.GlobalInfo[GI->first] |= GI->second;
+            FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal;
+          } else {
+            // Can't say anything about it.  However, if it is inside our SCC,
+            // then nothing needs to be done.
+            CallGraphNode *CalleeNode = CG[Callee];
+            if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end())
+              KnowNothing = true;
+          }
+        } else {
+          KnowNothing = true;
+        }
+    }
+
+    // If we can't say anything useful about this SCC, remove all SCC functions
+    // from the FunctionInfo map.
+    if (KnowNothing) {
+      for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+        FunctionInfo.erase(SCC[i]->getFunction());
+      continue;
+    }
+
+    // Scan the function bodies for explicit loads or stores.
+    for (unsigned i = 0, e = SCC.size(); i != e && FunctionEffect != ModRef;++i)
+      for (inst_iterator II = inst_begin(SCC[i]->getFunction()),
+             E = inst_end(SCC[i]->getFunction());
+           II != E && FunctionEffect != ModRef; ++II)
+        if (isa<LoadInst>(*II)) {
+          FunctionEffect |= Ref;
+          if (cast<LoadInst>(*II).isVolatile())
+            // Volatile loads may have side-effects, so mark them as writing
+            // memory (for example, a flag inside the processor).
+            FunctionEffect |= Mod;
+        } else if (isa<StoreInst>(*II)) {
+          FunctionEffect |= Mod;
+          if (cast<StoreInst>(*II).isVolatile())
+            // Treat volatile stores as reading memory somewhere.
+            FunctionEffect |= Ref;
+        } else if (isMalloc(&cast<Instruction>(*II)) ||
+                   isFreeCall(&cast<Instruction>(*II))) {
+          FunctionEffect |= ModRef;
+        }
+
+    if ((FunctionEffect & Mod) == 0)
+      ++NumReadMemFunctions;
+    if (FunctionEffect == 0)
+      ++NumNoMemFunctions;
+    FR.FunctionEffect = FunctionEffect;
+
+    // Finally, now that we know the full effect on this SCC, clone the
+    // information to each function in the SCC.
+    for (unsigned i = 1, e = SCC.size(); i != e; ++i)
+      FunctionInfo[SCC[i]->getFunction()] = FR;
+  }
+}
+
+
+
+/// alias - If one of the pointers is to a global that we are tracking, and the
+/// other is some random pointer, we know there cannot be an alias, because the
+/// address of the global isn't taken.
+AliasAnalysis::AliasResult
+GlobalsModRef::alias(const Location &LocA,
+                     const Location &LocB) {
+  // Get the base object these pointers point to.
+  const Value *UV1 = GetUnderlyingObject(LocA.Ptr);
+  const Value *UV2 = GetUnderlyingObject(LocB.Ptr);
+
+  // If either of the underlying values is a global, they may be non-addr-taken
+  // globals, which we can answer queries about.
+  const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1);
+  const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2);
+  if (GV1 || GV2) {
+    // If the global's address is taken, pretend we don't know it's a pointer to
+    // the global.
+    if (GV1 && !NonAddressTakenGlobals.count(GV1)) GV1 = 0;
+    if (GV2 && !NonAddressTakenGlobals.count(GV2)) GV2 = 0;
+
+    // If the two pointers are derived from two different non-addr-taken
+    // globals, or if one is and the other isn't, we know these can't alias.
+    if ((GV1 || GV2) && GV1 != GV2)
+      return NoAlias;
+
+    // Otherwise if they are both derived from the same addr-taken global, we
+    // can't know the two accesses don't overlap.
+  }
+
+  // These pointers may be based on the memory owned by an indirect global.  If
+  // so, we may be able to handle this.  First check to see if the base pointer
+  // is a direct load from an indirect global.
+  GV1 = GV2 = 0;
+  if (const LoadInst *LI = dyn_cast<LoadInst>(UV1))
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+      if (IndirectGlobals.count(GV))
+        GV1 = GV;
+  if (const LoadInst *LI = dyn_cast<LoadInst>(UV2))
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0)))
+      if (IndirectGlobals.count(GV))
+        GV2 = GV;
+
+  // These pointers may also be from an allocation for the indirect global.  If
+  // so, also handle them.
+  if (AllocsForIndirectGlobals.count(UV1))
+    GV1 = AllocsForIndirectGlobals[UV1];
+  if (AllocsForIndirectGlobals.count(UV2))
+    GV2 = AllocsForIndirectGlobals[UV2];
+
+  // Now that we know whether the two pointers are related to indirect globals,
+  // use this to disambiguate the pointers.  If either pointer is based on an
+  // indirect global and if they are not both based on the same indirect global,
+  // they cannot alias.
+  if ((GV1 || GV2) && GV1 != GV2)
+    return NoAlias;
+
+  return AliasAnalysis::alias(LocA, LocB);
+}
+
+AliasAnalysis::ModRefResult
+GlobalsModRef::getModRefInfo(ImmutableCallSite CS,
+                             const Location &Loc) {
+  unsigned Known = ModRef;
+
+  // If we are asking for mod/ref info of a direct call with a pointer to a
+  // global we are tracking, return information if we have it.
+  if (const GlobalValue *GV =
+        dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr)))
+    if (GV->hasLocalLinkage())
+      if (const Function *F = CS.getCalledFunction())
+        if (NonAddressTakenGlobals.count(GV))
+          if (const FunctionRecord *FR = getFunctionInfo(F))
+            Known = FR->getInfoForGlobal(GV);
+
+  if (Known == NoModRef)
+    return NoModRef; // No need to query other mod/ref analyses
+  return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods to update the analysis as a result of the client transformation.
+//
+void GlobalsModRef::deleteValue(Value *V) {
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    if (NonAddressTakenGlobals.erase(GV)) {
+      // This global might be an indirect global.  If so, remove it and remove
+      // any AllocRelatedValues for it.
+      if (IndirectGlobals.erase(GV)) {
+        // Remove any entries in AllocsForIndirectGlobals for this global.
+        for (std::map<const Value*, const GlobalValue*>::iterator
+             I = AllocsForIndirectGlobals.begin(),
+             E = AllocsForIndirectGlobals.end(); I != E; ) {
+          if (I->second == GV) {
+            AllocsForIndirectGlobals.erase(I++);
+          } else {
+            ++I;
+          }
+        }
+      }
+    }
+  }
+
+  // Otherwise, if this is an allocation related to an indirect global, remove
+  // it.
+  AllocsForIndirectGlobals.erase(V);
+
+  AliasAnalysis::deleteValue(V);
+}
+
+void GlobalsModRef::copyValue(Value *From, Value *To) {
+  AliasAnalysis::copyValue(From, To);
+}
+
+void GlobalsModRef::addEscapingUse(Use &U) {
+  // For the purposes of this analysis, it is conservatively correct to treat
+  // a newly escaping value equivalently to a deleted one.  We could perhaps
+  // be more precise by processing the new use and attempting to update our
+  // saved analysis results to accomodate it.
+  deleteValue(U);
+  
+  AliasAnalysis::addEscapingUse(U);
+}
diff --git a/final/lib/Analysis/IPA/IPA.cpp b/final/lib/Analysis/IPA/IPA.cpp
new file mode 100644
index 00000000000..0ba2e04c630
--- /dev/null
+++ b/final/lib/Analysis/IPA/IPA.cpp
@@ -0,0 +1,29 @@
+//===-- IPA.cpp -----------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the IPA library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeIPA - Initialize all passes linked into the IPA library.
+void llvm::initializeIPA(PassRegistry &Registry) {
+  initializeBasicCallGraphPass(Registry);
+  initializeCallGraphAnalysisGroup(Registry);
+  initializeFindUsedTypesPass(Registry);
+  initializeGlobalsModRefPass(Registry);
+}
+
+void LLVMInitializeIPA(LLVMPassRegistryRef R) {
+  initializeIPA(*unwrap(R));
+}
diff --git a/final/lib/Analysis/IPA/Makefile b/final/lib/Analysis/IPA/Makefile
new file mode 100644
index 00000000000..b850c9ff7f4
--- /dev/null
+++ b/final/lib/Analysis/IPA/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Analysis/IPA/Makefile ---------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMipa
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Analysis/IVUsers.cpp b/final/lib/Analysis/IVUsers.cpp
new file mode 100644
index 00000000000..c8382186df3
--- /dev/null
+++ b/final/lib/Analysis/IVUsers.cpp
@@ -0,0 +1,263 @@
+//===- IVUsers.cpp - Induction Variable Users -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bookkeeping for "interesting" users of expressions
+// computed from induction variables.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "iv-users"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+char IVUsers::ID = 0;
+INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",
+                      "Induction Variable Users", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_END(IVUsers, "iv-users",
+                      "Induction Variable Users", false, true)
+
+Pass *llvm::createIVUsersPass() {
+  return new IVUsers();
+}
+
+/// isInteresting - Test whether the given expression is "interesting" when
+/// used by the given expression, within the context of analyzing the
+/// given loop.
+static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
+                          ScalarEvolution *SE) {
+  // An addrec is interesting if it's affine or if it has an interesting start.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // Keep things simple. Don't touch loop-variant strides.
+    if (AR->getLoop() == L)
+      return AR->isAffine() || !L->contains(I);
+    // Otherwise recurse to see if the start value is interesting, and that
+    // the step value is not interesting, since we don't yet know how to
+    // do effective SCEV expansions for addrecs with interesting steps.
+    return isInteresting(AR->getStart(), I, L, SE) &&
+          !isInteresting(AR->getStepRecurrence(*SE), I, L, SE);
+  }
+
+  // An add is interesting if exactly one of its operands is interesting.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    bool AnyInterestingYet = false;
+    for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
+         OI != OE; ++OI)
+      if (isInteresting(*OI, I, L, SE)) {
+        if (AnyInterestingYet)
+          return false;
+        AnyInterestingYet = true;
+      }
+    return AnyInterestingYet;
+  }
+
+  // Nothing else is interesting here.
+  return false;
+}
+
+/// AddUsersIfInteresting - Inspect the specified instruction.  If it is a
+/// reducible SCEV, recursively add its users to the IVUsesByStride set and
+/// return true.  Otherwise, return false.
+bool IVUsers::AddUsersIfInteresting(Instruction *I) {
+  if (!SE->isSCEVable(I->getType()))
+    return false;   // Void and FP expressions cannot be reduced.
+
+  // LSR is not APInt clean, do not touch integers bigger than 64-bits.
+  if (SE->getTypeSizeInBits(I->getType()) > 64)
+    return false;
+
+  if (!Processed.insert(I))
+    return true;    // Instruction already handled.
+
+  // Get the symbolic expression for this instruction.
+  const SCEV *ISE = SE->getSCEV(I);
+
+  // If we've come to an uninteresting expression, stop the traversal and
+  // call this a user.
+  if (!isInteresting(ISE, I, L, SE))
+    return false;
+
+  SmallPtrSet<Instruction *, 4> UniqueUsers;
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+       UI != E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+    if (!UniqueUsers.insert(User))
+      continue;
+
+    // Do not infinitely recurse on PHI nodes.
+    if (isa<PHINode>(User) && Processed.count(User))
+      continue;
+
+    // Descend recursively, but not into PHI nodes outside the current loop.
+    // It's important to see the entire expression outside the loop to get
+    // choices that depend on addressing mode use right, although we won't
+    // consider references outside the loop in all cases.
+    // If User is already in Processed, we don't want to recurse into it again,
+    // but do want to record a second reference in the same instruction.
+    bool AddUserToIVUsers = false;
+    if (LI->getLoopFor(User->getParent()) != L) {
+      if (isa<PHINode>(User) || Processed.count(User) ||
+          !AddUsersIfInteresting(User)) {
+        DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
+                     << "   OF SCEV: " << *ISE << '\n');
+        AddUserToIVUsers = true;
+      }
+    } else if (Processed.count(User) ||
+               !AddUsersIfInteresting(User)) {
+      DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
+                   << "   OF SCEV: " << *ISE << '\n');
+      AddUserToIVUsers = true;
+    }
+
+    if (AddUserToIVUsers) {
+      // Okay, we found a user that we cannot reduce.
+      IVUses.push_back(new IVStrideUse(this, User, I));
+      IVStrideUse &NewUse = IVUses.back();
+      // Transform the expression into a normalized form.
+      ISE = TransformForPostIncUse(NormalizeAutodetect,
+                                   ISE, User, I,
+                                   NewUse.PostIncLoops,
+                                   *SE, *DT);
+      DEBUG(dbgs() << "   NORMALIZED TO: " << *ISE << '\n');
+    }
+  }
+  return true;
+}
+
+IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
+  IVUses.push_back(new IVStrideUse(this, User, Operand));
+  return IVUses.back();
+}
+
+IVUsers::IVUsers()
+    : LoopPass(ID) {
+  initializeIVUsersPass(*PassRegistry::getPassRegistry());
+}
+
+void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<LoopInfo>();
+  AU.addRequired<DominatorTree>();
+  AU.addRequired<ScalarEvolution>();
+  AU.setPreservesAll();
+}
+
+bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
+
+  L = l;
+  LI = &getAnalysis<LoopInfo>();
+  DT = &getAnalysis<DominatorTree>();
+  SE = &getAnalysis<ScalarEvolution>();
+
+  // Find all uses of induction variables in this loop, and categorize
+  // them by stride.  Start by finding all of the PHI nodes in the header for
+  // this loop.  If they are induction variables, inspect their uses.
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
+    (void)AddUsersIfInteresting(I);
+
+  return false;
+}
+
+void IVUsers::print(raw_ostream &OS, const Module *M) const {
+  OS << "IV Users for loop ";
+  WriteAsOperand(OS, L->getHeader(), false);
+  if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+    OS << " with backedge-taken count "
+       << *SE->getBackedgeTakenCount(L);
+  }
+  OS << ":\n";
+
+  for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(),
+       E = IVUses.end(); UI != E; ++UI) {
+    OS << "  ";
+    WriteAsOperand(OS, UI->getOperandValToReplace(), false);
+    OS << " = " << *getReplacementExpr(*UI);
+    for (PostIncLoopSet::const_iterator
+         I = UI->PostIncLoops.begin(),
+         E = UI->PostIncLoops.end(); I != E; ++I) {
+      OS << " (post-inc with loop ";
+      WriteAsOperand(OS, (*I)->getHeader(), false);
+      OS << ")";
+    }
+    OS << " in  ";
+    UI->getUser()->print(OS);
+    OS << '\n';
+  }
+}
+
+void IVUsers::dump() const {
+  print(dbgs());
+}
+
+void IVUsers::releaseMemory() {
+  Processed.clear();
+  IVUses.clear();
+}
+
+/// getReplacementExpr - Return a SCEV expression which computes the
+/// value of the OperandValToReplace.
+const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const {
+  return SE->getSCEV(IU.getOperandValToReplace());
+}
+
+/// getExpr - Return the expression for the use.
+const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const {
+  return
+    TransformForPostIncUse(Normalize, getReplacementExpr(IU),
+                           IU.getUser(), IU.getOperandValToReplace(),
+                           const_cast<PostIncLoopSet &>(IU.getPostIncLoops()),
+                           *SE, *DT);
+}
+
+static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    if (AR->getLoop() == L)
+      return AR;
+    return findAddRecForLoop(AR->getStart(), L);
+  }
+
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      if (const SCEVAddRecExpr *AR = findAddRecForLoop(*I, L))
+        return AR;
+    return 0;
+  }
+
+  return 0;
+}
+
+const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const {
+  if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L))
+    return AR->getStepRecurrence(*SE);
+  return 0;
+}
+
+void IVStrideUse::transformToPostInc(const Loop *L) {
+  PostIncLoops.insert(L);
+}
+
+void IVStrideUse::deleted() {
+  // Remove this user from the list.
+  Parent->IVUses.erase(this);
+  // this now dangles!
+}
diff --git a/final/lib/Analysis/InlineCost.cpp b/final/lib/Analysis/InlineCost.cpp
new file mode 100644
index 00000000000..47f91cfc3be
--- /dev/null
+++ b/final/lib/Analysis/InlineCost.cpp
@@ -0,0 +1,648 @@
+//===- InlineCost.cpp - Cost analysis for inliner -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inline cost analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/CallingConv.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+
+/// callIsSmall - If a call is likely to lower to a single target instruction,
+/// or is otherwise deemed small return true.
+/// TODO: Perhaps calls like memcpy, strcpy, etc?
+bool llvm::callIsSmall(const Function *F) {
+  if (!F) return false;
+  
+  if (F->hasLocalLinkage()) return false;
+  
+  if (!F->hasName()) return false;
+  
+  StringRef Name = F->getName();
+  
+  // These will all likely lower to a single selection DAG node.
+  if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
+      Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
+      Name == "sin" || Name == "sinf" || Name == "sinl" ||
+      Name == "cos" || Name == "cosf" || Name == "cosl" ||
+      Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" )
+    return true;
+  
+  // These are all likely to be optimized into something smaller.
+  if (Name == "pow" || Name == "powf" || Name == "powl" ||
+      Name == "exp2" || Name == "exp2l" || Name == "exp2f" ||
+      Name == "floor" || Name == "floorf" || Name == "ceil" ||
+      Name == "round" || Name == "ffs" || Name == "ffsl" ||
+      Name == "abs" || Name == "labs" || Name == "llabs")
+    return true;
+  
+  return false;
+}
+
+/// analyzeBasicBlock - Fill in the current structure with information gleaned
+/// from the specified block.
+void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
+  ++NumBlocks;
+  unsigned NumInstsBeforeThisBB = NumInsts;
+  for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
+       II != E; ++II) {
+    if (isa<PHINode>(II)) continue;           // PHI nodes don't count.
+
+    // Special handling for calls.
+    if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
+      if (isa<DbgInfoIntrinsic>(II))
+        continue;  // Debug intrinsics don't count as size.
+
+      ImmutableCallSite CS(cast<Instruction>(II));
+
+      // If this function contains a call to setjmp or _setjmp, never inline
+      // it.  This is a hack because we depend on the user marking their local
+      // variables as volatile if they are live across a setjmp call, and they
+      // probably won't do this in callers.
+      if (const Function *F = CS.getCalledFunction()) {
+        // If a function is both internal and has a single use, then it is 
+        // extremely likely to get inlined in the future (it was probably 
+        // exposed by an interleaved devirtualization pass).
+        if (F->hasInternalLinkage() && F->hasOneUse())
+          ++NumInlineCandidates;
+        
+        if (F->isDeclaration() && 
+            (F->getName() == "setjmp" || F->getName() == "_setjmp"))
+          callsSetJmp = true;
+       
+        // If this call is to function itself, then the function is recursive.
+        // Inlining it into other functions is a bad idea, because this is
+        // basically just a form of loop peeling, and our metrics aren't useful
+        // for that case.
+        if (F == BB->getParent())
+          isRecursive = true;
+      }
+
+      if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
+        // Each argument to a call takes on average one instruction to set up.
+        NumInsts += CS.arg_size();
+
+        // We don't want inline asm to count as a call - that would prevent loop
+        // unrolling. The argument setup cost is still real, though.
+        if (!isa<InlineAsm>(CS.getCalledValue()))
+          ++NumCalls;
+      }
+    }
+    
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+      if (!AI->isStaticAlloca())
+        this->usesDynamicAlloca = true;
+    }
+
+    if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
+      ++NumVectorInsts; 
+    
+    if (const CastInst *CI = dyn_cast<CastInst>(II)) {
+      // Noop casts, including ptr <-> int,  don't count.
+      if (CI->isLosslessCast() || isa<IntToPtrInst>(CI) || 
+          isa<PtrToIntInst>(CI))
+        continue;
+      // Result of a cmp instruction is often extended (to be used by other
+      // cmp instructions, logical or return instructions). These are usually
+      // nop on most sane targets.
+      if (isa<CmpInst>(CI->getOperand(0)))
+        continue;
+    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(II)){
+      // If a GEP has all constant indices, it will probably be folded with
+      // a load/store.
+      if (GEPI->hasAllConstantIndices())
+        continue;
+    }
+
+    ++NumInsts;
+  }
+  
+  if (isa<ReturnInst>(BB->getTerminator()))
+    ++NumRets;
+  
+  // We never want to inline functions that contain an indirectbr.  This is
+  // incorrect because all the blockaddress's (in static global initializers
+  // for example) would be referring to the original function, and this indirect
+  // jump would jump from the inlined copy of the function into the original
+  // function which is extremely undefined behavior.
+  if (isa<IndirectBrInst>(BB->getTerminator()))
+    containsIndirectBr = true;
+
+  // Remember NumInsts for this BB.
+  NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
+}
+
+// CountCodeReductionForConstant - Figure out an approximation for how many
+// instructions will be constant folded if the specified value is constant.
+//
+unsigned CodeMetrics::CountCodeReductionForConstant(Value *V) {
+  unsigned Reduction = 0;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    User *U = *UI;
+    if (isa<BranchInst>(U) || isa<SwitchInst>(U)) {
+      // We will be able to eliminate all but one of the successors.
+      const TerminatorInst &TI = cast<TerminatorInst>(*U);
+      const unsigned NumSucc = TI.getNumSuccessors();
+      unsigned Instrs = 0;
+      for (unsigned I = 0; I != NumSucc; ++I)
+        Instrs += NumBBInsts[TI.getSuccessor(I)];
+      // We don't know which blocks will be eliminated, so use the average size.
+      Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc;
+    } else {
+      // Figure out if this instruction will be removed due to simple constant
+      // propagation.
+      Instruction &Inst = cast<Instruction>(*U);
+
+      // We can't constant propagate instructions which have effects or
+      // read memory.
+      //
+      // FIXME: It would be nice to capture the fact that a load from a
+      // pointer-to-constant-global is actually a *really* good thing to zap.
+      // Unfortunately, we don't know the pointer that may get propagated here,
+      // so we can't make this decision.
+      if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
+          isa<AllocaInst>(Inst))
+        continue;
+
+      bool AllOperandsConstant = true;
+      for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
+        if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
+          AllOperandsConstant = false;
+          break;
+        }
+
+      if (AllOperandsConstant) {
+        // We will get to remove this instruction...
+        Reduction += InlineConstants::InstrCost;
+
+        // And any other instructions that use it which become constants
+        // themselves.
+        Reduction += CountCodeReductionForConstant(&Inst);
+      }
+    }
+  }
+  return Reduction;
+}
+
+// CountCodeReductionForAlloca - Figure out an approximation of how much smaller
+// the function will be if it is inlined into a context where an argument
+// becomes an alloca.
+//
+unsigned CodeMetrics::CountCodeReductionForAlloca(Value *V) {
+  if (!V->getType()->isPointerTy()) return 0;  // Not a pointer
+  unsigned Reduction = 0;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    Instruction *I = cast<Instruction>(*UI);
+    if (isa<LoadInst>(I) || isa<StoreInst>(I))
+      Reduction += InlineConstants::InstrCost;
+    else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+      // If the GEP has variable indices, we won't be able to do much with it.
+      if (GEP->hasAllConstantIndices())
+        Reduction += CountCodeReductionForAlloca(GEP);
+    } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+      // Track pointer through bitcasts.
+      Reduction += CountCodeReductionForAlloca(BCI);
+    } else {
+      // If there is some other strange instruction, we're not going to be able
+      // to do much if we inline this.
+      return 0;
+    }
+  }
+
+  return Reduction;
+}
+
+/// analyzeFunction - Fill in the current structure with information gleaned
+/// from the specified function.
+void CodeMetrics::analyzeFunction(Function *F) {
+  // Look at the size of the callee.
+  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    analyzeBasicBlock(&*BB);
+}
+
+/// analyzeFunction - Fill in the current structure with information gleaned
+/// from the specified function.
+void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
+  Metrics.analyzeFunction(F);
+
+  // A function with exactly one return has it removed during the inlining
+  // process (see InlineFunction), so don't count it.
+  // FIXME: This knowledge should really be encoded outside of FunctionInfo.
+  if (Metrics.NumRets==1)
+    --Metrics.NumInsts;
+
+  // Check out all of the arguments to the function, figuring out how much
+  // code can be eliminated if one of the arguments is a constant.
+  ArgumentWeights.reserve(F->arg_size());
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
+    ArgumentWeights.push_back(ArgInfo(Metrics.CountCodeReductionForConstant(I),
+                                      Metrics.CountCodeReductionForAlloca(I)));
+}
+
+/// NeverInline - returns true if the function should never be inlined into
+/// any caller
+bool InlineCostAnalyzer::FunctionInfo::NeverInline() {
+  return (Metrics.callsSetJmp || Metrics.isRecursive || 
+          Metrics.containsIndirectBr);
+}
+// getSpecializationBonus - The heuristic used to determine the per-call
+// performance boost for using a specialization of Callee with argument
+// specializedArgNo replaced by a constant.
+int InlineCostAnalyzer::getSpecializationBonus(Function *Callee,
+         SmallVectorImpl<unsigned> &SpecializedArgNos)
+{
+  if (Callee->mayBeOverridden())
+    return 0;
+  
+  int Bonus = 0;
+  // If this function uses the coldcc calling convention, prefer not to
+  // specialize it.
+  if (Callee->getCallingConv() == CallingConv::Cold)
+    Bonus -= InlineConstants::ColdccPenalty;
+  
+  // Get information about the callee.
+  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI->Metrics.NumBlocks == 0)
+    CalleeFI->analyzeFunction(Callee);
+
+  unsigned ArgNo = 0;
+  unsigned i = 0;
+  for (Function::arg_iterator I = Callee->arg_begin(), E = Callee->arg_end();
+       I != E; ++I, ++ArgNo)
+    if (ArgNo == SpecializedArgNos[i]) {
+      ++i;
+      Bonus += CountBonusForConstant(I);
+    }
+
+  // Calls usually take a long time, so they make the specialization gain 
+  // smaller.
+  Bonus -= CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+
+  return Bonus;
+}
+
+// ConstantFunctionBonus - Figure out how much of a bonus we can get for
+// possibly devirtualizing a function. We'll subtract the size of the function
+// we may wish to inline from the indirect call bonus providing a limit on
+// growth. Leave an upper limit of 0 for the bonus - we don't want to penalize
+// inlining because we decide we don't want to give a bonus for
+// devirtualizing.
+int InlineCostAnalyzer::ConstantFunctionBonus(CallSite CS, Constant *C) {
+  
+  // This could just be NULL.
+  if (!C) return 0;
+  
+  Function *F = dyn_cast<Function>(C);
+  if (!F) return 0;
+  
+  int Bonus = InlineConstants::IndirectCallBonus + getInlineSize(CS, F);
+  return (Bonus > 0) ? 0 : Bonus;
+}
+
+// CountBonusForConstant - Figure out an approximation for how much per-call
+// performance boost we can expect if the specified value is constant.
+int InlineCostAnalyzer::CountBonusForConstant(Value *V, Constant *C) {
+  unsigned Bonus = 0;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    User *U = *UI;
+    if (CallInst *CI = dyn_cast<CallInst>(U)) {
+      // Turning an indirect call into a direct call is a BIG win
+      if (CI->getCalledValue() == V)
+        Bonus += ConstantFunctionBonus(CallSite(CI), C);
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+      // Turning an indirect call into a direct call is a BIG win
+      if (II->getCalledValue() == V)
+        Bonus += ConstantFunctionBonus(CallSite(II), C);
+    }
+    // FIXME: Eliminating conditional branches and switches should
+    // also yield a per-call performance boost.
+    else {
+      // Figure out the bonuses that wll accrue due to simple constant
+      // propagation.
+      Instruction &Inst = cast<Instruction>(*U);
+
+      // We can't constant propagate instructions which have effects or
+      // read memory.
+      //
+      // FIXME: It would be nice to capture the fact that a load from a
+      // pointer-to-constant-global is actually a *really* good thing to zap.
+      // Unfortunately, we don't know the pointer that may get propagated here,
+      // so we can't make this decision.
+      if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() ||
+          isa<AllocaInst>(Inst))
+        continue;
+
+      bool AllOperandsConstant = true;
+      for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i)
+        if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) {
+          AllOperandsConstant = false;
+          break;
+        }
+
+      if (AllOperandsConstant)
+        Bonus += CountBonusForConstant(&Inst);
+    }
+  }
+  
+  return Bonus;
+}
+
+int InlineCostAnalyzer::getInlineSize(CallSite CS, Function *Callee) {
+  // Get information about the callee.
+  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI->Metrics.NumBlocks == 0)
+    CalleeFI->analyzeFunction(Callee);
+  
+  // InlineCost - This value measures how good of an inline candidate this call
+  // site is to inline.  A lower inline cost make is more likely for the call to
+  // be inlined.  This value may go negative.
+  //
+  int InlineCost = 0;
+
+  // Compute any size reductions we can expect due to arguments being passed into
+  // the function.
+  //
+  unsigned ArgNo = 0;
+  CallSite::arg_iterator I = CS.arg_begin();
+  for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
+       FI != FE; ++I, ++FI, ++ArgNo) {
+
+    // If an alloca is passed in, inlining this function is likely to allow
+    // significant future optimization possibilities (like scalar promotion, and
+    // scalarization), so encourage the inlining of the function.
+    //
+    if (isa<AllocaInst>(I))
+      InlineCost -= CalleeFI->ArgumentWeights[ArgNo].AllocaWeight;
+
+    // If this is a constant being passed into the function, use the argument
+    // weights calculated for the callee to determine how much will be folded
+    // away with this information.
+    else if (isa<Constant>(I))
+      InlineCost -= CalleeFI->ArgumentWeights[ArgNo].ConstantWeight;       
+  }
+  
+  // Each argument passed in has a cost at both the caller and the callee
+  // sides.  Measurements show that each argument costs about the same as an
+  // instruction.
+  InlineCost -= (CS.arg_size() * InlineConstants::InstrCost);
+
+  // Now that we have considered all of the factors that make the call site more
+  // likely to be inlined, look at factors that make us not want to inline it.
+
+  // Calls usually take a long time, so they make the inlining gain smaller.
+  InlineCost += CalleeFI->Metrics.NumCalls * InlineConstants::CallPenalty;
+
+  // Look at the size of the callee. Each instruction counts as 5.
+  InlineCost += CalleeFI->Metrics.NumInsts*InlineConstants::InstrCost;
+  
+  return InlineCost;
+}
+
+int InlineCostAnalyzer::getInlineBonuses(CallSite CS, Function *Callee) {
+  // Get information about the callee.
+  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI->Metrics.NumBlocks == 0)
+    CalleeFI->analyzeFunction(Callee);
+    
+  bool isDirectCall = CS.getCalledFunction() == Callee;
+  Instruction *TheCall = CS.getInstruction();
+  int Bonus = 0;
+  
+  // If there is only one call of the function, and it has internal linkage,
+  // make it almost guaranteed to be inlined.
+  //
+  if (Callee->hasLocalLinkage() && Callee->hasOneUse() && isDirectCall)
+    Bonus += InlineConstants::LastCallToStaticBonus;
+  
+  // If the instruction after the call, or if the normal destination of the
+  // invoke is an unreachable instruction, the function is noreturn.  As such,
+  // there is little point in inlining this.
+  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+    if (isa<UnreachableInst>(II->getNormalDest()->begin()))
+      Bonus += InlineConstants::NoreturnPenalty;
+  } else if (isa<UnreachableInst>(++BasicBlock::iterator(TheCall)))
+    Bonus += InlineConstants::NoreturnPenalty;
+  
+  // If this function uses the coldcc calling convention, prefer not to inline
+  // it.
+  if (Callee->getCallingConv() == CallingConv::Cold)
+    Bonus += InlineConstants::ColdccPenalty;
+  
+  // Add to the inline quality for properties that make the call valuable to
+  // inline.  This includes factors that indicate that the result of inlining
+  // the function will be optimizable.  Currently this just looks at arguments
+  // passed into the function.
+  //
+  CallSite::arg_iterator I = CS.arg_begin();
+  for (Function::arg_iterator FI = Callee->arg_begin(), FE = Callee->arg_end();
+       FI != FE; ++I, ++FI)
+    // Compute any constant bonus due to inlining we want to give here.
+    if (isa<Constant>(I))
+      Bonus += CountBonusForConstant(FI, cast<Constant>(I));
+      
+  return Bonus;
+}
+
+// getInlineCost - The heuristic used to determine if we should inline the
+// function call or not.
+//
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+                               SmallPtrSet<const Function*, 16> &NeverInline) {
+  return getInlineCost(CS, CS.getCalledFunction(), NeverInline);
+}
+
+InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
+                               Function *Callee,
+                               SmallPtrSet<const Function*, 16> &NeverInline) {
+  Instruction *TheCall = CS.getInstruction();
+  Function *Caller = TheCall->getParent()->getParent();
+
+  // Don't inline functions which can be redefined at link-time to mean
+  // something else.  Don't inline functions marked noinline or call sites
+  // marked noinline.
+  if (Callee->mayBeOverridden() ||
+      Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) ||
+      CS.isNoInline())
+    return llvm::InlineCost::getNever();
+
+  // Get information about the callee.
+  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI->Metrics.NumBlocks == 0)
+    CalleeFI->analyzeFunction(Callee);
+
+  // If we should never inline this, return a huge cost.
+  if (CalleeFI->NeverInline())
+    return InlineCost::getNever();
+
+  // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we
+  // could move this up and avoid computing the FunctionInfo for
+  // things we are going to just return always inline for. This
+  // requires handling setjmp somewhere else, however.
+  if (!Callee->isDeclaration() && Callee->hasFnAttr(Attribute::AlwaysInline))
+    return InlineCost::getAlways();
+    
+  if (CalleeFI->Metrics.usesDynamicAlloca) {
+    // Get infomation about the caller.
+    FunctionInfo &CallerFI = CachedFunctionInfo[Caller];
+
+    // If we haven't calculated this information yet, do so now.
+    if (CallerFI.Metrics.NumBlocks == 0) {
+      CallerFI.analyzeFunction(Caller);
+     
+      // Recompute the CalleeFI pointer, getting Caller could have invalidated
+      // it.
+      CalleeFI = &CachedFunctionInfo[Callee];
+    }
+
+    // Don't inline a callee with dynamic alloca into a caller without them.
+    // Functions containing dynamic alloca's are inefficient in various ways;
+    // don't create more inefficiency.
+    if (!CallerFI.Metrics.usesDynamicAlloca)
+      return InlineCost::getNever();
+  }
+
+  // InlineCost - This value measures how good of an inline candidate this call
+  // site is to inline.  A lower inline cost make is more likely for the call to
+  // be inlined.  This value may go negative due to the fact that bonuses
+  // are negative numbers.
+  //
+  int InlineCost = getInlineSize(CS, Callee) + getInlineBonuses(CS, Callee);
+  return llvm::InlineCost::get(InlineCost);
+}
+
+// getSpecializationCost - The heuristic used to determine the code-size
+// impact of creating a specialized version of Callee with argument
+// SpecializedArgNo replaced by a constant.
+InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee,
+                               SmallVectorImpl<unsigned> &SpecializedArgNos)
+{
+  // Don't specialize functions which can be redefined at link-time to mean
+  // something else.
+  if (Callee->mayBeOverridden())
+    return llvm::InlineCost::getNever();
+  
+  // Get information about the callee.
+  FunctionInfo *CalleeFI = &CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI->Metrics.NumBlocks == 0)
+    CalleeFI->analyzeFunction(Callee);
+
+  int Cost = 0;
+  
+  // Look at the orginal size of the callee.  Each instruction counts as 5.
+  Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost;
+
+  // Offset that with the amount of code that can be constant-folded
+  // away with the given arguments replaced by constants.
+  for (SmallVectorImpl<unsigned>::iterator an = SpecializedArgNos.begin(),
+       ae = SpecializedArgNos.end(); an != ae; ++an)
+    Cost -= CalleeFI->ArgumentWeights[*an].ConstantWeight;
+
+  return llvm::InlineCost::get(Cost);
+}
+
+// getInlineFudgeFactor - Return a > 1.0 factor if the inliner should use a
+// higher threshold to determine if the function call should be inlined.
+float InlineCostAnalyzer::getInlineFudgeFactor(CallSite CS) {
+  Function *Callee = CS.getCalledFunction();
+  
+  // Get information about the callee.
+  FunctionInfo &CalleeFI = CachedFunctionInfo[Callee];
+  
+  // If we haven't calculated this information yet, do so now.
+  if (CalleeFI.Metrics.NumBlocks == 0)
+    CalleeFI.analyzeFunction(Callee);
+
+  float Factor = 1.0f;
+  // Single BB functions are often written to be inlined.
+  if (CalleeFI.Metrics.NumBlocks == 1)
+    Factor += 0.5f;
+
+  // Be more aggressive if the function contains a good chunk (if it mades up
+  // at least 10% of the instructions) of vector instructions.
+  if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/2)
+    Factor += 2.0f;
+  else if (CalleeFI.Metrics.NumVectorInsts > CalleeFI.Metrics.NumInsts/10)
+    Factor += 1.5f;
+  return Factor;
+}
+
+/// growCachedCostInfo - update the cached cost info for Caller after Callee has
+/// been inlined.
+void
+InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
+  CodeMetrics &CallerMetrics = CachedFunctionInfo[Caller].Metrics;
+
+  // For small functions we prefer to recalculate the cost for better accuracy.
+  if (CallerMetrics.NumBlocks < 10 || CallerMetrics.NumInsts < 1000) {
+    resetCachedCostInfo(Caller);
+    return;
+  }
+
+  // For large functions, we can save a lot of computation time by skipping
+  // recalculations.
+  if (CallerMetrics.NumCalls > 0)
+    --CallerMetrics.NumCalls;
+
+  if (Callee == 0) return;
+  
+  CodeMetrics &CalleeMetrics = CachedFunctionInfo[Callee].Metrics;
+
+  // If we don't have metrics for the callee, don't recalculate them just to
+  // update an approximation in the caller.  Instead, just recalculate the
+  // caller info from scratch.
+  if (CalleeMetrics.NumBlocks == 0) {
+    resetCachedCostInfo(Caller);
+    return;
+  }
+  
+  // Since CalleeMetrics were already calculated, we know that the CallerMetrics
+  // reference isn't invalidated: both were in the DenseMap.
+  CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca;
+
+  // FIXME: If any of these three are true for the callee, the callee was
+  // not inlined into the caller, so I think they're redundant here.
+  CallerMetrics.callsSetJmp |= CalleeMetrics.callsSetJmp;
+  CallerMetrics.isRecursive |= CalleeMetrics.isRecursive;
+  CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr;
+
+  CallerMetrics.NumInsts += CalleeMetrics.NumInsts;
+  CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks;
+  CallerMetrics.NumCalls += CalleeMetrics.NumCalls;
+  CallerMetrics.NumVectorInsts += CalleeMetrics.NumVectorInsts;
+  CallerMetrics.NumRets += CalleeMetrics.NumRets;
+
+  // analyzeBasicBlock counts each function argument as an inst.
+  if (CallerMetrics.NumInsts >= Callee->arg_size())
+    CallerMetrics.NumInsts -= Callee->arg_size();
+  else
+    CallerMetrics.NumInsts = 0;
+  
+  // We are not updating the argument weights. We have already determined that
+  // Caller is a fairly large function, so we accept the loss of precision.
+}
+
+/// clear - empty the cache of inline costs
+void InlineCostAnalyzer::clear() {
+  CachedFunctionInfo.clear();
+}
diff --git a/final/lib/Analysis/InstCount.cpp b/final/lib/Analysis/InstCount.cpp
new file mode 100644
index 00000000000..3b385d26ba3
--- /dev/null
+++ b/final/lib/Analysis/InstCount.cpp
@@ -0,0 +1,87 @@
+//===-- InstCount.cpp - Collects the count of all instructions ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass collects the count of all instructions and reports them
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instcount"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(TotalInsts , "Number of instructions (of all types)");
+STATISTIC(TotalBlocks, "Number of basic blocks");
+STATISTIC(TotalFuncs , "Number of non-external functions");
+STATISTIC(TotalMemInst, "Number of memory instructions");
+
+#define HANDLE_INST(N, OPCODE, CLASS) \
+  STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts");
+
+#include "llvm/Instruction.def"
+
+
+namespace {
+  class InstCount : public FunctionPass, public InstVisitor<InstCount> {
+    friend class InstVisitor<InstCount>;
+
+    void visitFunction  (Function &F) { ++TotalFuncs; }
+    void visitBasicBlock(BasicBlock &BB) { ++TotalBlocks; }
+
+#define HANDLE_INST(N, OPCODE, CLASS) \
+    void visit##OPCODE(CLASS &) { ++Num##OPCODE##Inst; ++TotalInsts; }
+
+#include "llvm/Instruction.def"
+
+    void visitInstruction(Instruction &I) {
+      errs() << "Instruction Count does not know about " << I;
+      llvm_unreachable(0);
+    }
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    InstCount() : FunctionPass(ID) {
+      initializeInstCountPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+    virtual void print(raw_ostream &O, const Module *M) const {}
+
+  };
+}
+
+char InstCount::ID = 0;
+INITIALIZE_PASS(InstCount, "instcount",
+                "Counts the various types of Instructions", false, true)
+
+FunctionPass *llvm::createInstCountPass() { return new InstCount(); }
+
+// InstCount::run - This is the main Analysis entry point for a
+// function.
+//
+bool InstCount::runOnFunction(Function &F) {
+  unsigned StartMemInsts =
+    NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
+    NumInvokeInst + NumAllocaInst;
+  visit(F);
+  unsigned EndMemInsts =
+    NumGetElementPtrInst + NumLoadInst + NumStoreInst + NumCallInst +
+    NumInvokeInst + NumAllocaInst;
+  TotalMemInst += EndMemInsts-StartMemInsts;
+  return false;
+}
diff --git a/final/lib/Analysis/InstructionSimplify.cpp b/final/lib/Analysis/InstructionSimplify.cpp
new file mode 100644
index 00000000000..48edc5ceaca
--- /dev/null
+++ b/final/lib/Analysis/InstructionSimplify.cpp
@@ -0,0 +1,2170 @@
+//===- InstructionSimplify.cpp - Fold instruction operands ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements routines for folding instructions into simpler forms
+// that do not require creating new instructions.  This does constant folding
+// ("add i32 1, 1" -> "2") but can also handle non-constant operands, either
+// returning a constant ("and i32 %x, 0" -> "0") or an already existing value
+// ("and i32 %x, %x" -> "%x").  All operands are assumed to have already been
+// simplified: This is usually true and assuming it simplifies the logic (if
+// they have not been simplified then results are correct but maybe suboptimal).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instsimplify"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+enum { RecursionLimit = 3 };
+
+STATISTIC(NumExpand,  "Number of expansions");
+STATISTIC(NumFactor , "Number of factorizations");
+STATISTIC(NumReassoc, "Number of reassociations");
+
+static Value *SimplifyAndInst(Value *, Value *, const TargetData *,
+                              const DominatorTree *, unsigned);
+static Value *SimplifyBinOp(unsigned, Value *, Value *, const TargetData *,
+                            const DominatorTree *, unsigned);
+static Value *SimplifyCmpInst(unsigned, Value *, Value *, const TargetData *,
+                              const DominatorTree *, unsigned);
+static Value *SimplifyOrInst(Value *, Value *, const TargetData *,
+                             const DominatorTree *, unsigned);
+static Value *SimplifyXorInst(Value *, Value *, const TargetData *,
+                              const DominatorTree *, unsigned);
+
+/// ValueDominatesPHI - Does the given value dominate the specified phi node?
+static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I)
+    // Arguments and constants dominate all instructions.
+    return true;
+
+  // If we have a DominatorTree then do a precise test.
+  if (DT)
+    return DT->dominates(I, P);
+
+  // Otherwise, if the instruction is in the entry block, and is not an invoke,
+  // then it obviously dominates all phi nodes.
+  if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() &&
+      !isa<InvokeInst>(I))
+    return true;
+
+  return false;
+}
+
+/// ExpandBinOp - Simplify "A op (B op' C)" by distributing op over op', turning
+/// it into "(A op B) op' (A op C)".  Here "op" is given by Opcode and "op'" is
+/// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS.
+/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
+/// Returns the simplified value, or null if no simplification was performed.
+static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                          unsigned OpcToExpand, const TargetData *TD,
+                          const DominatorTree *DT, unsigned MaxRecurse) {
+  Instruction::BinaryOps OpcodeToExpand = (Instruction::BinaryOps)OpcToExpand;
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  // Check whether the expression has the form "(A op' B) op C".
+  if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS))
+    if (Op0->getOpcode() == OpcodeToExpand) {
+      // It does!  Try turning it into "(A op C) op' (B op C)".
+      Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
+      // Do "A op C" and "B op C" both simplify?
+      if (Value *L = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse))
+        if (Value *R = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+          // They do! Return "L op' R" if it simplifies or is already available.
+          // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
+          if ((L == A && R == B) || (Instruction::isCommutative(OpcodeToExpand)
+                                     && L == B && R == A)) {
+            ++NumExpand;
+            return LHS;
+          }
+          // Otherwise return "L op' R" if it simplifies.
+          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
+                                       MaxRecurse)) {
+            ++NumExpand;
+            return V;
+          }
+        }
+    }
+
+  // Check whether the expression has the form "A op (B op' C)".
+  if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS))
+    if (Op1->getOpcode() == OpcodeToExpand) {
+      // It does!  Try turning it into "(A op B) op' (A op C)".
+      Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
+      // Do "A op B" and "A op C" both simplify?
+      if (Value *L = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse))
+        if (Value *R = SimplifyBinOp(Opcode, A, C, TD, DT, MaxRecurse)) {
+          // They do! Return "L op' R" if it simplifies or is already available.
+          // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
+          if ((L == B && R == C) || (Instruction::isCommutative(OpcodeToExpand)
+                                     && L == C && R == B)) {
+            ++NumExpand;
+            return RHS;
+          }
+          // Otherwise return "L op' R" if it simplifies.
+          if (Value *V = SimplifyBinOp(OpcodeToExpand, L, R, TD, DT,
+                                       MaxRecurse)) {
+            ++NumExpand;
+            return V;
+          }
+        }
+    }
+
+  return 0;
+}
+
+/// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term
+/// using the operation OpCodeToExtract.  For example, when Opcode is Add and
+/// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)".
+/// Returns the simplified value, or null if no simplification was performed.
+static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                             unsigned OpcToExtract, const TargetData *TD,
+                             const DominatorTree *DT, unsigned MaxRecurse) {
+  Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract;
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+
+  if (!Op0 || Op0->getOpcode() != OpcodeToExtract ||
+      !Op1 || Op1->getOpcode() != OpcodeToExtract)
+    return 0;
+
+  // The expression has the form "(A op' B) op (C op' D)".
+  Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
+  Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
+
+  // Use left distributivity, i.e. "X op' (Y op Z) = (X op' Y) op (X op' Z)".
+  // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+  // commutative case, "(A op' B) op (C op' A)"?
+  if (A == C || (Instruction::isCommutative(OpcodeToExtract) && A == D)) {
+    Value *DD = A == C ? D : C;
+    // Form "A op' (B op DD)" if it simplifies completely.
+    // Does "B op DD" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, B, DD, TD, DT, MaxRecurse)) {
+      // It does!  Return "A op' V" if it simplifies or is already available.
+      // If V equals B then "A op' V" is just the LHS.  If V equals DD then
+      // "A op' V" is just the RHS.
+      if (V == B || V == DD) {
+        ++NumFactor;
+        return V == B ? LHS : RHS;
+      }
+      // Otherwise return "A op' V" if it simplifies.
+      if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, TD, DT, MaxRecurse)) {
+        ++NumFactor;
+        return W;
+      }
+    }
+  }
+
+  // Use right distributivity, i.e. "(X op Y) op' Z = (X op' Z) op (Y op' Z)".
+  // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+  // commutative case, "(A op' B) op (B op' D)"?
+  if (B == D || (Instruction::isCommutative(OpcodeToExtract) && B == C)) {
+    Value *CC = B == D ? C : D;
+    // Form "(A op CC) op' B" if it simplifies completely..
+    // Does "A op CC" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, A, CC, TD, DT, MaxRecurse)) {
+      // It does!  Return "V op' B" if it simplifies or is already available.
+      // If V equals A then "V op' B" is just the LHS.  If V equals CC then
+      // "V op' B" is just the RHS.
+      if (V == A || V == CC) {
+        ++NumFactor;
+        return V == A ? LHS : RHS;
+      }
+      // Otherwise return "V op' B" if it simplifies.
+      if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, TD, DT, MaxRecurse)) {
+        ++NumFactor;
+        return W;
+      }
+    }
+  }
+
+  return 0;
+}
+
+/// SimplifyAssociativeBinOp - Generic simplifications for associative binary
+/// operations.  Returns the simpler value, or null if none was found.
+static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
+                                       const TargetData *TD,
+                                       const DominatorTree *DT,
+                                       unsigned MaxRecurse) {
+  Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
+  assert(Instruction::isAssociative(Opcode) && "Not an associative operation!");
+
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+
+  // Transform: "(A op B) op C" ==> "A op (B op C)" if it simplifies completely.
+  if (Op0 && Op0->getOpcode() == Opcode) {
+    Value *A = Op0->getOperand(0);
+    Value *B = Op0->getOperand(1);
+    Value *C = RHS;
+
+    // Does "B op C" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, B, C, TD, DT, MaxRecurse)) {
+      // It does!  Return "A op V" if it simplifies or is already available.
+      // If V equals B then "A op V" is just the LHS.
+      if (V == B) return LHS;
+      // Otherwise return "A op V" if it simplifies.
+      if (Value *W = SimplifyBinOp(Opcode, A, V, TD, DT, MaxRecurse)) {
+        ++NumReassoc;
+        return W;
+      }
+    }
+  }
+
+  // Transform: "A op (B op C)" ==> "(A op B) op C" if it simplifies completely.
+  if (Op1 && Op1->getOpcode() == Opcode) {
+    Value *A = LHS;
+    Value *B = Op1->getOperand(0);
+    Value *C = Op1->getOperand(1);
+
+    // Does "A op B" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, A, B, TD, DT, MaxRecurse)) {
+      // It does!  Return "V op C" if it simplifies or is already available.
+      // If V equals B then "V op C" is just the RHS.
+      if (V == B) return RHS;
+      // Otherwise return "V op C" if it simplifies.
+      if (Value *W = SimplifyBinOp(Opcode, V, C, TD, DT, MaxRecurse)) {
+        ++NumReassoc;
+        return W;
+      }
+    }
+  }
+
+  // The remaining transforms require commutativity as well as associativity.
+  if (!Instruction::isCommutative(Opcode))
+    return 0;
+
+  // Transform: "(A op B) op C" ==> "(C op A) op B" if it simplifies completely.
+  if (Op0 && Op0->getOpcode() == Opcode) {
+    Value *A = Op0->getOperand(0);
+    Value *B = Op0->getOperand(1);
+    Value *C = RHS;
+
+    // Does "C op A" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+      // It does!  Return "V op B" if it simplifies or is already available.
+      // If V equals A then "V op B" is just the LHS.
+      if (V == A) return LHS;
+      // Otherwise return "V op B" if it simplifies.
+      if (Value *W = SimplifyBinOp(Opcode, V, B, TD, DT, MaxRecurse)) {
+        ++NumReassoc;
+        return W;
+      }
+    }
+  }
+
+  // Transform: "A op (B op C)" ==> "B op (C op A)" if it simplifies completely.
+  if (Op1 && Op1->getOpcode() == Opcode) {
+    Value *A = LHS;
+    Value *B = Op1->getOperand(0);
+    Value *C = Op1->getOperand(1);
+
+    // Does "C op A" simplify?
+    if (Value *V = SimplifyBinOp(Opcode, C, A, TD, DT, MaxRecurse)) {
+      // It does!  Return "B op V" if it simplifies or is already available.
+      // If V equals C then "B op V" is just the RHS.
+      if (V == C) return RHS;
+      // Otherwise return "B op V" if it simplifies.
+      if (Value *W = SimplifyBinOp(Opcode, B, V, TD, DT, MaxRecurse)) {
+        ++NumReassoc;
+        return W;
+      }
+    }
+  }
+
+  return 0;
+}
+
+/// ThreadBinOpOverSelect - In the case of a binary operation with a select
+/// instruction as an operand, try to simplify the binop by seeing whether
+/// evaluating it on both branches of the select results in the same value.
+/// Returns the common value if so, otherwise returns null.
+static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
+                                    const TargetData *TD,
+                                    const DominatorTree *DT,
+                                    unsigned MaxRecurse) {
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  SelectInst *SI;
+  if (isa<SelectInst>(LHS)) {
+    SI = cast<SelectInst>(LHS);
+  } else {
+    assert(isa<SelectInst>(RHS) && "No select instruction operand!");
+    SI = cast<SelectInst>(RHS);
+  }
+
+  // Evaluate the BinOp on the true and false branches of the select.
+  Value *TV;
+  Value *FV;
+  if (SI == LHS) {
+    TV = SimplifyBinOp(Opcode, SI->getTrueValue(), RHS, TD, DT, MaxRecurse);
+    FV = SimplifyBinOp(Opcode, SI->getFalseValue(), RHS, TD, DT, MaxRecurse);
+  } else {
+    TV = SimplifyBinOp(Opcode, LHS, SI->getTrueValue(), TD, DT, MaxRecurse);
+    FV = SimplifyBinOp(Opcode, LHS, SI->getFalseValue(), TD, DT, MaxRecurse);
+  }
+
+  // If they simplified to the same value, then return the common value.
+  // If they both failed to simplify then return null.
+  if (TV == FV)
+    return TV;
+
+  // If one branch simplified to undef, return the other one.
+  if (TV && isa<UndefValue>(TV))
+    return FV;
+  if (FV && isa<UndefValue>(FV))
+    return TV;
+
+  // If applying the operation did not change the true and false select values,
+  // then the result of the binop is the select itself.
+  if (TV == SI->getTrueValue() && FV == SI->getFalseValue())
+    return SI;
+
+  // If one branch simplified and the other did not, and the simplified
+  // value is equal to the unsimplified one, return the simplified value.
+  // For example, select (cond, X, X & Z) & Z -> X & Z.
+  if ((FV && !TV) || (TV && !FV)) {
+    // Check that the simplified value has the form "X op Y" where "op" is the
+    // same as the original operation.
+    Instruction *Simplified = dyn_cast<Instruction>(FV ? FV : TV);
+    if (Simplified && Simplified->getOpcode() == Opcode) {
+      // The value that didn't simplify is "UnsimplifiedLHS op UnsimplifiedRHS".
+      // We already know that "op" is the same as for the simplified value.  See
+      // if the operands match too.  If so, return the simplified value.
+      Value *UnsimplifiedBranch = FV ? SI->getTrueValue() : SI->getFalseValue();
+      Value *UnsimplifiedLHS = SI == LHS ? UnsimplifiedBranch : LHS;
+      Value *UnsimplifiedRHS = SI == LHS ? RHS : UnsimplifiedBranch;
+      if (Simplified->getOperand(0) == UnsimplifiedLHS &&
+          Simplified->getOperand(1) == UnsimplifiedRHS)
+        return Simplified;
+      if (Simplified->isCommutative() &&
+          Simplified->getOperand(1) == UnsimplifiedLHS &&
+          Simplified->getOperand(0) == UnsimplifiedRHS)
+        return Simplified;
+    }
+  }
+
+  return 0;
+}
+
+/// ThreadCmpOverSelect - In the case of a comparison with a select instruction,
+/// try to simplify the comparison by seeing whether both branches of the select
+/// result in the same value.  Returns the common value if so, otherwise returns
+/// null.
+static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
+                                  Value *RHS, const TargetData *TD,
+                                  const DominatorTree *DT,
+                                  unsigned MaxRecurse) {
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  // Make sure the select is on the LHS.
+  if (!isa<SelectInst>(LHS)) {
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+  assert(isa<SelectInst>(LHS) && "Not comparing with a select instruction!");
+  SelectInst *SI = cast<SelectInst>(LHS);
+
+  // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it.
+  // Does "cmp TV, RHS" simplify?
+  if (Value *TCmp = SimplifyCmpInst(Pred, SI->getTrueValue(), RHS, TD, DT,
+                                    MaxRecurse)) {
+    // It does!  Does "cmp FV, RHS" simplify?
+    if (Value *FCmp = SimplifyCmpInst(Pred, SI->getFalseValue(), RHS, TD, DT,
+                                      MaxRecurse)) {
+      // It does!  If they simplified to the same value, then use it as the
+      // result of the original comparison.
+      if (TCmp == FCmp)
+        return TCmp;
+      Value *Cond = SI->getCondition();
+      // If the false value simplified to false, then the result of the compare
+      // is equal to "Cond && TCmp".  This also catches the case when the false
+      // value simplified to false and the true value to true, returning "Cond".
+      if (match(FCmp, m_Zero()))
+        if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse))
+          return V;
+      // If the true value simplified to true, then the result of the compare
+      // is equal to "Cond || FCmp".
+      if (match(TCmp, m_One()))
+        if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse))
+          return V;
+      // Finally, if the false value simplified to true and the true value to
+      // false, then the result of the compare is equal to "!Cond".
+      if (match(FCmp, m_One()) && match(TCmp, m_Zero()))
+        if (Value *V =
+            SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()),
+                            TD, DT, MaxRecurse))
+          return V;
+    }
+  }
+
+  return 0;
+}
+
+/// ThreadBinOpOverPHI - In the case of a binary operation with an operand that
+/// is a PHI instruction, try to simplify the binop by seeing whether evaluating
+/// it on the incoming phi values yields the same result for every value.  If so
+/// returns the common value, otherwise returns null.
+static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
+                                 const TargetData *TD, const DominatorTree *DT,
+                                 unsigned MaxRecurse) {
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  PHINode *PI;
+  if (isa<PHINode>(LHS)) {
+    PI = cast<PHINode>(LHS);
+    // Bail out if RHS and the phi may be mutually interdependent due to a loop.
+    if (!ValueDominatesPHI(RHS, PI, DT))
+      return 0;
+  } else {
+    assert(isa<PHINode>(RHS) && "No PHI instruction operand!");
+    PI = cast<PHINode>(RHS);
+    // Bail out if LHS and the phi may be mutually interdependent due to a loop.
+    if (!ValueDominatesPHI(LHS, PI, DT))
+      return 0;
+  }
+
+  // Evaluate the BinOp on the incoming phi values.
+  Value *CommonValue = 0;
+  for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
+    Value *Incoming = PI->getIncomingValue(i);
+    // If the incoming value is the phi node itself, it can safely be skipped.
+    if (Incoming == PI) continue;
+    Value *V = PI == LHS ?
+      SimplifyBinOp(Opcode, Incoming, RHS, TD, DT, MaxRecurse) :
+      SimplifyBinOp(Opcode, LHS, Incoming, TD, DT, MaxRecurse);
+    // If the operation failed to simplify, or simplified to a different value
+    // to previously, then give up.
+    if (!V || (CommonValue && V != CommonValue))
+      return 0;
+    CommonValue = V;
+  }
+
+  return CommonValue;
+}
+
+/// ThreadCmpOverPHI - In the case of a comparison with a PHI instruction, try
+/// try to simplify the comparison by seeing whether comparing with all of the
+/// incoming phi values yields the same result every time.  If so returns the
+/// common result, otherwise returns null.
+static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+                               const TargetData *TD, const DominatorTree *DT,
+                               unsigned MaxRecurse) {
+  // Recursion is always used, so bail out at once if we already hit the limit.
+  if (!MaxRecurse--)
+    return 0;
+
+  // Make sure the phi is on the LHS.
+  if (!isa<PHINode>(LHS)) {
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+  assert(isa<PHINode>(LHS) && "Not comparing with a phi instruction!");
+  PHINode *PI = cast<PHINode>(LHS);
+
+  // Bail out if RHS and the phi may be mutually interdependent due to a loop.
+  if (!ValueDominatesPHI(RHS, PI, DT))
+    return 0;
+
+  // Evaluate the BinOp on the incoming phi values.
+  Value *CommonValue = 0;
+  for (unsigned i = 0, e = PI->getNumIncomingValues(); i != e; ++i) {
+    Value *Incoming = PI->getIncomingValue(i);
+    // If the incoming value is the phi node itself, it can safely be skipped.
+    if (Incoming == PI) continue;
+    Value *V = SimplifyCmpInst(Pred, Incoming, RHS, TD, DT, MaxRecurse);
+    // If the operation failed to simplify, or simplified to a different value
+    // to previously, then give up.
+    if (!V || (CommonValue && V != CommonValue))
+      return 0;
+    CommonValue = V;
+  }
+
+  return CommonValue;
+}
+
+/// SimplifyAddInst - Given operands for an Add, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                              const TargetData *TD, const DominatorTree *DT,
+                              unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // X + undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // X + 0 -> X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // X + (Y - X) -> Y
+  // (Y - X) + X -> Y
+  // Eg: X + -X -> 0
+  Value *Y = 0;
+  if (match(Op1, m_Sub(m_Value(Y), m_Specific(Op0))) ||
+      match(Op0, m_Sub(m_Value(Y), m_Specific(Op1))))
+    return Y;
+
+  // X + ~X -> -1   since   ~X = -X-1
+  if (match(Op0, m_Not(m_Specific(Op1))) ||
+      match(Op1, m_Not(m_Specific(Op0))))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  /// i1 add -> xor.
+  if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+    if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+      return V;
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Add, Op0, Op1, TD, DT,
+                                          MaxRecurse))
+    return V;
+
+  // Mul distributes over Add.  Try some generic simplifications based on this.
+  if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul,
+                                TD, DT, MaxRecurse))
+    return V;
+
+  // Threading Add over selects and phi nodes is pointless, so don't bother.
+  // Threading over the select in "A + select(cond, B, C)" means evaluating
+  // "A+B" and "A+C" and seeing if they are equal; but they are equal if and
+  // only if B and C are equal.  If B and C are equal then (since we assume
+  // that operands have already been simplified) "select(cond, B, C)" should
+  // have been simplified to the common value of B and C already.  Analysing
+  // "A+B" and "A+C" thus gains nothing, but costs compile time.  Similarly
+  // for threading over phi nodes.
+
+  return 0;
+}
+
+Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                             const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyAddInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifySubInst - Given operands for a Sub, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                              const TargetData *TD, const DominatorTree *DT,
+                              unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0))
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Sub, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+
+  // X - undef -> undef
+  // undef - X -> undef
+  if (match(Op0, m_Undef()) || match(Op1, m_Undef()))
+    return UndefValue::get(Op0->getType());
+
+  // X - 0 -> X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // X - X -> 0
+  if (Op0 == Op1)
+    return Constant::getNullValue(Op0->getType());
+
+  // (X*2) - X -> X
+  // (X<<1) - X -> X
+  Value *X = 0;
+  if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) ||
+      match(Op0, m_Shl(m_Specific(Op1), m_One())))
+    return Op1;
+
+  // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
+  // For example, (X + Y) - Y -> X; (Y + X) - Y -> X
+  Value *Y = 0, *Z = Op1;
+  if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z
+    // See if "V === Y - Z" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, TD, DT, MaxRecurse-1))
+      // It does!  Now see if "X + V" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Add, X, V, TD, DT,
+                                   MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+    // See if "V === X - Z" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+      // It does!  Now see if "Y + V" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Add, Y, V, TD, DT,
+                                   MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+  }
+
+  // X - (Y + Z) -> (X - Y) - Z or (X - Z) - Y if everything simplifies.
+  // For example, X - (X + 1) -> -1
+  X = Op0;
+  if (MaxRecurse && match(Op1, m_Add(m_Value(Y), m_Value(Z)))) { // X - (Y + Z)
+    // See if "V === X - Y" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, TD, DT, MaxRecurse-1))
+      // It does!  Now see if "V - Z" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Z, TD, DT,
+                                   MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+    // See if "V === X - Z" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, X, Z, TD, DT, MaxRecurse-1))
+      // It does!  Now see if "V - Y" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Sub, V, Y, TD, DT,
+                                   MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+  }
+
+  // Z - (X - Y) -> (Z - X) + Y if everything simplifies.
+  // For example, X - (X - Y) -> Y.
+  Z = Op0;
+  if (MaxRecurse && match(Op1, m_Sub(m_Value(X), m_Value(Y)))) // Z - (X - Y)
+    // See if "V === Z - X" simplifies.
+    if (Value *V = SimplifyBinOp(Instruction::Sub, Z, X, TD, DT, MaxRecurse-1))
+      // It does!  Now see if "V + Y" simplifies.
+      if (Value *W = SimplifyBinOp(Instruction::Add, V, Y, TD, DT,
+                                   MaxRecurse-1)) {
+        // It does, we successfully reassociated!
+        ++NumReassoc;
+        return W;
+      }
+
+  // Mul distributes over Sub.  Try some generic simplifications based on this.
+  if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul,
+                                TD, DT, MaxRecurse))
+    return V;
+
+  // i1 sub -> xor.
+  if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+    if (Value *V = SimplifyXorInst(Op0, Op1, TD, DT, MaxRecurse-1))
+      return V;
+
+  // Threading Sub over selects and phi nodes is pointless, so don't bother.
+  // Threading over the select in "A - select(cond, B, C)" means evaluating
+  // "A-B" and "A-C" and seeing if they are equal; but they are equal if and
+  // only if B and C are equal.  If B and C are equal then (since we assume
+  // that operands have already been simplified) "select(cond, B, C)" should
+  // have been simplified to the common value of B and C already.  Analysing
+  // "A-B" and "A-C" thus gains nothing, but costs compile time.  Similarly
+  // for threading over phi nodes.
+
+  return 0;
+}
+
+Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                             const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifySubInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifyMulInst - Given operands for a Mul, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Mul, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // X * undef -> 0
+  if (match(Op1, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // X * 0 -> 0
+  if (match(Op1, m_Zero()))
+    return Op1;
+
+  // X * 1 -> X
+  if (match(Op1, m_One()))
+    return Op0;
+
+  // (X / Y) * Y -> X if the division is exact.
+  Value *X = 0, *Y = 0;
+  if ((match(Op0, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op1) || // (X / Y) * Y
+      (match(Op1, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op0)) { // Y * (X / Y)
+    BinaryOperator *Div = cast<BinaryOperator>(Y == Op1 ? Op0 : Op1);
+    if (Div->isExact())
+      return X;
+  }
+
+  // i1 mul -> and.
+  if (MaxRecurse && Op0->getType()->isIntegerTy(1))
+    if (Value *V = SimplifyAndInst(Op0, Op1, TD, DT, MaxRecurse-1))
+      return V;
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Mul, Op0, Op1, TD, DT,
+                                          MaxRecurse))
+    return V;
+
+  // Mul distributes over Add.  Try some generic simplifications based on this.
+  if (Value *V = ExpandBinOp(Instruction::Mul, Op0, Op1, Instruction::Add,
+                             TD, DT, MaxRecurse))
+    return V;
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Instruction::Mul, Op0, Op1, TD, DT,
+                                         MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Instruction::Mul, Op0, Op1, TD, DT,
+                                      MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const DominatorTree *DT) {
+  return ::SimplifyMulInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
+                          const TargetData *TD, const DominatorTree *DT,
+                          unsigned MaxRecurse) {
+  if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+    if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { C0, C1 };
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+    }
+  }
+
+  bool isSigned = Opcode == Instruction::SDiv;
+
+  // X / undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // undef / X -> 0
+  if (match(Op0, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // 0 / X -> 0, we don't need to preserve faults!
+  if (match(Op0, m_Zero()))
+    return Op0;
+
+  // X / 1 -> X
+  if (match(Op1, m_One()))
+    return Op0;
+
+  if (Op0->getType()->isIntegerTy(1))
+    // It can't be division by zero, hence it must be division by one.
+    return Op0;
+
+  // X / X -> 1
+  if (Op0 == Op1)
+    return ConstantInt::get(Op0->getType(), 1);
+
+  // (X * Y) / Y -> X if the multiplication does not overflow.
+  Value *X = 0, *Y = 0;
+  if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) {
+    if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1
+    BinaryOperator *Mul = cast<BinaryOperator>(Op0);
+    // If the Mul knows it does not overflow, then we are good to go.
+    if ((isSigned && Mul->hasNoSignedWrap()) ||
+        (!isSigned && Mul->hasNoUnsignedWrap()))
+      return X;
+    // If X has the form X = A / Y then X * Y cannot overflow.
+    if (BinaryOperator *Div = dyn_cast<BinaryOperator>(X))
+      if (Div->getOpcode() == Opcode && Div->getOperand(1) == Y)
+        return X;
+  }
+
+  // (X rem Y) / Y -> 0
+  if ((isSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
+      (!isSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
+    return Constant::getNullValue(Op0->getType());
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+/// SimplifySDivInst - Given operands for an SDiv, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                               const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifySDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyUDivInst - Given operands for a UDiv, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                               const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyUDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifyUDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+static Value *SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *,
+                               const DominatorTree *, unsigned) {
+  // undef / X -> undef    (the undef could be a snan).
+  if (match(Op0, m_Undef()))
+    return Op0;
+
+  // X / undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  return 0;
+}
+
+Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT) {
+  return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
+                            const TargetData *TD, const DominatorTree *DT,
+                            unsigned MaxRecurse) {
+  if (Constant *C0 = dyn_cast<Constant>(Op0)) {
+    if (Constant *C1 = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { C0, C1 };
+      return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD);
+    }
+  }
+
+  // 0 shift by X -> 0
+  if (match(Op0, m_Zero()))
+    return Op0;
+
+  // X shift by 0 -> X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // X shift by undef -> undef because it may shift by the bitwidth.
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // Shifting by the bitwidth or more is undefined.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1))
+    if (CI->getValue().getLimitedValue() >=
+        Op0->getType()->getScalarSizeInBits())
+      return UndefValue::get(Op0->getType());
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+/// SimplifyShlInst - Given operands for an Shl, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                              const TargetData *TD, const DominatorTree *DT,
+                              unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  // undef << X -> 0
+  if (match(Op0, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // (X >> A) << A -> X
+  Value *X;
+  if (match(Op0, m_Shr(m_Value(X), m_Specific(Op1))) &&
+      cast<PossiblyExactOperator>(Op0)->isExact())
+    return X;
+  return 0;
+}
+
+Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
+                             const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyShlInst(Op0, Op1, isNSW, isNUW, TD, DT, RecursionLimit);
+}
+
+/// SimplifyLShrInst - Given operands for an LShr, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+                               const TargetData *TD, const DominatorTree *DT,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  // undef >>l X -> 0
+  if (match(Op0, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // (X << A) >> A -> X
+  Value *X;
+  if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
+      cast<OverflowingBinaryOperator>(Op0)->hasNoUnsignedWrap())
+    return X;
+
+  return 0;
+}
+
+Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
+                              const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyLShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+}
+
+/// SimplifyAShrInst - Given operands for an AShr, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+                               const TargetData *TD, const DominatorTree *DT,
+                               unsigned MaxRecurse) {
+  if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, TD, DT, MaxRecurse))
+    return V;
+
+  // all ones >>a X -> all ones
+  if (match(Op0, m_AllOnes()))
+    return Op0;
+
+  // undef >>a X -> all ones
+  if (match(Op0, m_Undef()))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  // (X << A) >> A -> X
+  Value *X;
+  if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1))) &&
+      cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
+    return X;
+
+  return 0;
+}
+
+Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
+                              const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyAShrInst(Op0, Op1, isExact, TD, DT, RecursionLimit);
+}
+
+/// SimplifyAndInst - Given operands for an And, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // X & undef -> 0
+  if (match(Op1, m_Undef()))
+    return Constant::getNullValue(Op0->getType());
+
+  // X & X = X
+  if (Op0 == Op1)
+    return Op0;
+
+  // X & 0 = 0
+  if (match(Op1, m_Zero()))
+    return Op1;
+
+  // X & -1 = X
+  if (match(Op1, m_AllOnes()))
+    return Op0;
+
+  // A & ~A  =  ~A & A  =  0
+  if (match(Op0, m_Not(m_Specific(Op1))) ||
+      match(Op1, m_Not(m_Specific(Op0))))
+    return Constant::getNullValue(Op0->getType());
+
+  // (A | ?) & A = A
+  Value *A = 0, *B = 0;
+  if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+      (A == Op1 || B == Op1))
+    return Op1;
+
+  // A & (A | ?) = A
+  if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+      (A == Op0 || B == Op0))
+    return Op0;
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, DT,
+                                          MaxRecurse))
+    return V;
+
+  // And distributes over Or.  Try some generic simplifications based on this.
+  if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Or,
+                             TD, DT, MaxRecurse))
+    return V;
+
+  // And distributes over Xor.  Try some generic simplifications based on this.
+  if (Value *V = ExpandBinOp(Instruction::And, Op0, Op1, Instruction::Xor,
+                             TD, DT, MaxRecurse))
+    return V;
+
+  // Or distributes over And.  Try some generic simplifications based on this.
+  if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or,
+                                TD, DT, MaxRecurse))
+    return V;
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Instruction::And, Op0, Op1, TD, DT,
+                                         MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Instruction::And, Op0, Op1, TD, DT,
+                                      MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const DominatorTree *DT) {
+  return ::SimplifyAndInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyOrInst - Given operands for an Or, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // X | undef -> -1
+  if (match(Op1, m_Undef()))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  // X | X = X
+  if (Op0 == Op1)
+    return Op0;
+
+  // X | 0 = X
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // X | -1 = -1
+  if (match(Op1, m_AllOnes()))
+    return Op1;
+
+  // A | ~A  =  ~A | A  =  -1
+  if (match(Op0, m_Not(m_Specific(Op1))) ||
+      match(Op1, m_Not(m_Specific(Op0))))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  // (A & ?) | A = A
+  Value *A = 0, *B = 0;
+  if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+      (A == Op1 || B == Op1))
+    return Op1;
+
+  // A | (A & ?) = A
+  if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
+      (A == Op0 || B == Op0))
+    return Op0;
+
+  // ~(A & ?) | A = -1
+  if (match(Op0, m_Not(m_And(m_Value(A), m_Value(B)))) &&
+      (A == Op1 || B == Op1))
+    return Constant::getAllOnesValue(Op1->getType());
+
+  // A | ~(A & ?) = -1
+  if (match(Op1, m_Not(m_And(m_Value(A), m_Value(B)))) &&
+      (A == Op0 || B == Op0))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT,
+                                          MaxRecurse))
+    return V;
+
+  // Or distributes over And.  Try some generic simplifications based on this.
+  if (Value *V = ExpandBinOp(Instruction::Or, Op0, Op1, Instruction::And,
+                             TD, DT, MaxRecurse))
+    return V;
+
+  // And distributes over Or.  Try some generic simplifications based on this.
+  if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And,
+                                TD, DT, MaxRecurse))
+    return V;
+
+  // If the operation is with the result of a select instruction, check whether
+  // operating on either branch of the select always yields the same value.
+  if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+    if (Value *V = ThreadBinOpOverSelect(Instruction::Or, Op0, Op1, TD, DT,
+                                         MaxRecurse))
+      return V;
+
+  // If the operation is with the result of a phi instruction, check whether
+  // operating on all incoming values of the phi always yields the same value.
+  if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+    if (Value *V = ThreadBinOpOverPHI(Instruction::Or, Op0, Op1, TD, DT,
+                                      MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD,
+                            const DominatorTree *DT) {
+  return ::SimplifyOrInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+/// SimplifyXorInst - Given operands for a Xor, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+                              const DominatorTree *DT, unsigned MaxRecurse) {
+  if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+    if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+      Constant *Ops[] = { CLHS, CRHS };
+      return ConstantFoldInstOperands(Instruction::Xor, CLHS->getType(),
+                                      Ops, 2, TD);
+    }
+
+    // Canonicalize the constant to the RHS.
+    std::swap(Op0, Op1);
+  }
+
+  // A ^ undef -> undef
+  if (match(Op1, m_Undef()))
+    return Op1;
+
+  // A ^ 0 = A
+  if (match(Op1, m_Zero()))
+    return Op0;
+
+  // A ^ A = 0
+  if (Op0 == Op1)
+    return Constant::getNullValue(Op0->getType());
+
+  // A ^ ~A  =  ~A ^ A  =  -1
+  if (match(Op0, m_Not(m_Specific(Op1))) ||
+      match(Op1, m_Not(m_Specific(Op0))))
+    return Constant::getAllOnesValue(Op0->getType());
+
+  // Try some generic simplifications for associative operations.
+  if (Value *V = SimplifyAssociativeBinOp(Instruction::Xor, Op0, Op1, TD, DT,
+                                          MaxRecurse))
+    return V;
+
+  // And distributes over Xor.  Try some generic simplifications based on this.
+  if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And,
+                                TD, DT, MaxRecurse))
+    return V;
+
+  // Threading Xor over selects and phi nodes is pointless, so don't bother.
+  // Threading over the select in "A ^ select(cond, B, C)" means evaluating
+  // "A^B" and "A^C" and seeing if they are equal; but they are equal if and
+  // only if B and C are equal.  If B and C are equal then (since we assume
+  // that operands have already been simplified) "select(cond, B, C)" should
+  // have been simplified to the common value of B and C already.  Analysing
+  // "A^B" and "A^C" thus gains nothing, but costs compile time.  Similarly
+  // for threading over phi nodes.
+
+  return 0;
+}
+
+Value *llvm::SimplifyXorInst(Value *Op0, Value *Op1, const TargetData *TD,
+                             const DominatorTree *DT) {
+  return ::SimplifyXorInst(Op0, Op1, TD, DT, RecursionLimit);
+}
+
+static const Type *GetCompareTy(Value *Op) {
+  return CmpInst::makeCmpResultType(Op->getType());
+}
+
+/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                               const TargetData *TD, const DominatorTree *DT,
+                               unsigned MaxRecurse) {
+  CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+  assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+
+  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+
+    // If we have a constant, make sure it is on the RHS.
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+
+  const Type *ITy = GetCompareTy(LHS); // The return type.
+  const Type *OpTy = LHS->getType();   // The operand type.
+
+  // icmp X, X -> true/false
+  // X icmp undef -> true/false.  For example, icmp ugt %X, undef -> false
+  // because X could be 0.
+  if (LHS == RHS || isa<UndefValue>(RHS))
+    return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
+
+  // Special case logic when the operands have i1 type.
+  if (OpTy->isIntegerTy(1) || (OpTy->isVectorTy() &&
+       cast<VectorType>(OpTy)->getElementType()->isIntegerTy(1))) {
+    switch (Pred) {
+    default: break;
+    case ICmpInst::ICMP_EQ:
+      // X == 1 -> X
+      if (match(RHS, m_One()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_NE:
+      // X != 0 -> X
+      if (match(RHS, m_Zero()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_UGT:
+      // X >u 0 -> X
+      if (match(RHS, m_Zero()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_UGE:
+      // X >=u 1 -> X
+      if (match(RHS, m_One()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_SLT:
+      // X <s 0 -> X
+      if (match(RHS, m_Zero()))
+        return LHS;
+      break;
+    case ICmpInst::ICMP_SLE:
+      // X <=s -1 -> X
+      if (match(RHS, m_One()))
+        return LHS;
+      break;
+    }
+  }
+
+  // icmp <alloca*>, <global/alloca*/null> - Different stack variables have
+  // different addresses, and what's more the address of a stack variable is
+  // never null or equal to the address of a global.  Note that generalizing
+  // to the case where LHS is a global variable address or null is pointless,
+  // since if both LHS and RHS are constants then we already constant folded
+  // the compare, and if only one of them is then we moved it to RHS already.
+  if (isa<AllocaInst>(LHS) && (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) ||
+                               isa<ConstantPointerNull>(RHS)))
+    // We already know that LHS != RHS.
+    return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+
+  // If we are comparing with zero then try hard since this is a common case.
+  if (match(RHS, m_Zero())) {
+    bool LHSKnownNonNegative, LHSKnownNegative;
+    switch (Pred) {
+    default:
+      assert(false && "Unknown ICmp predicate!");
+    case ICmpInst::ICMP_ULT:
+      return ConstantInt::getFalse(LHS->getContext());
+    case ICmpInst::ICMP_UGE:
+      return ConstantInt::getTrue(LHS->getContext());
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_ULE:
+      if (isKnownNonZero(LHS, TD))
+        return ConstantInt::getFalse(LHS->getContext());
+      break;
+    case ICmpInst::ICMP_NE:
+    case ICmpInst::ICMP_UGT:
+      if (isKnownNonZero(LHS, TD))
+        return ConstantInt::getTrue(LHS->getContext());
+      break;
+    case ICmpInst::ICMP_SLT:
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      if (LHSKnownNegative)
+        return ConstantInt::getTrue(LHS->getContext());
+      if (LHSKnownNonNegative)
+        return ConstantInt::getFalse(LHS->getContext());
+      break;
+    case ICmpInst::ICMP_SLE:
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      if (LHSKnownNegative)
+        return ConstantInt::getTrue(LHS->getContext());
+      if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+        return ConstantInt::getFalse(LHS->getContext());
+      break;
+    case ICmpInst::ICMP_SGE:
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      if (LHSKnownNegative)
+        return ConstantInt::getFalse(LHS->getContext());
+      if (LHSKnownNonNegative)
+        return ConstantInt::getTrue(LHS->getContext());
+      break;
+    case ICmpInst::ICMP_SGT:
+      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, TD);
+      if (LHSKnownNegative)
+        return ConstantInt::getFalse(LHS->getContext());
+      if (LHSKnownNonNegative && isKnownNonZero(LHS, TD))
+        return ConstantInt::getTrue(LHS->getContext());
+      break;
+    }
+  }
+
+  // See if we are doing a comparison with a constant integer.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+    // Rule out tautological comparisons (eg., ult 0 or uge 0).
+    ConstantRange RHS_CR = ICmpInst::makeConstantRange(Pred, CI->getValue());
+    if (RHS_CR.isEmptySet())
+      return ConstantInt::getFalse(CI->getContext());
+    if (RHS_CR.isFullSet())
+      return ConstantInt::getTrue(CI->getContext());
+
+    // Many binary operators with constant RHS have easy to compute constant
+    // range.  Use them to check whether the comparison is a tautology.
+    uint32_t Width = CI->getBitWidth();
+    APInt Lower = APInt(Width, 0);
+    APInt Upper = APInt(Width, 0);
+    ConstantInt *CI2;
+    if (match(LHS, m_URem(m_Value(), m_ConstantInt(CI2)))) {
+      // 'urem x, CI2' produces [0, CI2).
+      Upper = CI2->getValue();
+    } else if (match(LHS, m_SRem(m_Value(), m_ConstantInt(CI2)))) {
+      // 'srem x, CI2' produces (-|CI2|, |CI2|).
+      Upper = CI2->getValue().abs();
+      Lower = (-Upper) + 1;
+    } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) {
+      // 'udiv x, CI2' produces [0, UINT_MAX / CI2].
+      APInt NegOne = APInt::getAllOnesValue(Width);
+      if (!CI2->isZero())
+        Upper = NegOne.udiv(CI2->getValue()) + 1;
+    } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) {
+      // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2].
+      APInt IntMin = APInt::getSignedMinValue(Width);
+      APInt IntMax = APInt::getSignedMaxValue(Width);
+      APInt Val = CI2->getValue().abs();
+      if (!Val.isMinValue()) {
+        Lower = IntMin.sdiv(Val);
+        Upper = IntMax.sdiv(Val) + 1;
+      }
+    } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) {
+      // 'lshr x, CI2' produces [0, UINT_MAX >> CI2].
+      APInt NegOne = APInt::getAllOnesValue(Width);
+      if (CI2->getValue().ult(Width))
+        Upper = NegOne.lshr(CI2->getValue()) + 1;
+    } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) {
+      // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2].
+      APInt IntMin = APInt::getSignedMinValue(Width);
+      APInt IntMax = APInt::getSignedMaxValue(Width);
+      if (CI2->getValue().ult(Width)) {
+        Lower = IntMin.ashr(CI2->getValue());
+        Upper = IntMax.ashr(CI2->getValue()) + 1;
+      }
+    } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) {
+      // 'or x, CI2' produces [CI2, UINT_MAX].
+      Lower = CI2->getValue();
+    } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) {
+      // 'and x, CI2' produces [0, CI2].
+      Upper = CI2->getValue() + 1;
+    }
+    if (Lower != Upper) {
+      ConstantRange LHS_CR = ConstantRange(Lower, Upper);
+      if (RHS_CR.contains(LHS_CR))
+        return ConstantInt::getTrue(RHS->getContext());
+      if (RHS_CR.inverse().contains(LHS_CR))
+        return ConstantInt::getFalse(RHS->getContext());
+    }
+  }
+
+  // Compare of cast, for example (zext X) != 0 -> X != 0
+  if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
+    Instruction *LI = cast<CastInst>(LHS);
+    Value *SrcOp = LI->getOperand(0);
+    const Type *SrcTy = SrcOp->getType();
+    const Type *DstTy = LI->getType();
+
+    // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
+    // if the integer type is the same size as the pointer type.
+    if (MaxRecurse && TD && isa<PtrToIntInst>(LI) &&
+        TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) {
+      if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+        // Transfer the cast to the constant.
+        if (Value *V = SimplifyICmpInst(Pred, SrcOp,
+                                        ConstantExpr::getIntToPtr(RHSC, SrcTy),
+                                        TD, DT, MaxRecurse-1))
+          return V;
+      } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
+        if (RI->getOperand(0)->getType() == SrcTy)
+          // Compare without the cast.
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+                                          TD, DT, MaxRecurse-1))
+            return V;
+      }
+    }
+
+    if (isa<ZExtInst>(LHS)) {
+      // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the
+      // same type.
+      if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
+        if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+          // Compare X and Y.  Note that signed predicates become unsigned.
+          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+                                          SrcOp, RI->getOperand(0), TD, DT,
+                                          MaxRecurse-1))
+            return V;
+      }
+      // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended
+      // too.  If not, then try to deduce the result of the comparison.
+      else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+        // Compute the constant that would happen if we truncated to SrcTy then
+        // reextended to DstTy.
+        Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+        Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
+
+        // If the re-extended constant didn't change then this is effectively
+        // also a case of comparing two zero-extended values.
+        if (RExt == CI && MaxRecurse)
+          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+                                          SrcOp, Trunc, TD, DT, MaxRecurse-1))
+            return V;
+
+        // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
+        // there.  Use this to work out the result of the comparison.
+        if (RExt != CI) {
+          switch (Pred) {
+          default:
+            assert(false && "Unknown ICmp predicate!");
+          // LHS <u RHS.
+          case ICmpInst::ICMP_EQ:
+          case ICmpInst::ICMP_UGT:
+          case ICmpInst::ICMP_UGE:
+            return ConstantInt::getFalse(CI->getContext());
+
+          case ICmpInst::ICMP_NE:
+          case ICmpInst::ICMP_ULT:
+          case ICmpInst::ICMP_ULE:
+            return ConstantInt::getTrue(CI->getContext());
+
+          // LHS is non-negative.  If RHS is negative then LHS >s LHS.  If RHS
+          // is non-negative then LHS <s RHS.
+          case ICmpInst::ICMP_SGT:
+          case ICmpInst::ICMP_SGE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getTrue(CI->getContext()) :
+              ConstantInt::getFalse(CI->getContext());
+
+          case ICmpInst::ICMP_SLT:
+          case ICmpInst::ICMP_SLE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getFalse(CI->getContext()) :
+              ConstantInt::getTrue(CI->getContext());
+          }
+        }
+      }
+    }
+
+    if (isa<SExtInst>(LHS)) {
+      // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
+      // same type.
+      if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
+        if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+          // Compare X and Y.  Note that the predicate does not change.
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+                                          TD, DT, MaxRecurse-1))
+            return V;
+      }
+      // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
+      // too.  If not, then try to deduce the result of the comparison.
+      else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+        // Compute the constant that would happen if we truncated to SrcTy then
+        // reextended to DstTy.
+        Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+        Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
+
+        // If the re-extended constant didn't change then this is effectively
+        // also a case of comparing two sign-extended values.
+        if (RExt == CI && MaxRecurse)
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, TD, DT,
+                                          MaxRecurse-1))
+            return V;
+
+        // Otherwise the upper bits of LHS are all equal, while RHS has varying
+        // bits there.  Use this to work out the result of the comparison.
+        if (RExt != CI) {
+          switch (Pred) {
+          default:
+            assert(false && "Unknown ICmp predicate!");
+          case ICmpInst::ICMP_EQ:
+            return ConstantInt::getFalse(CI->getContext());
+          case ICmpInst::ICMP_NE:
+            return ConstantInt::getTrue(CI->getContext());
+
+          // If RHS is non-negative then LHS <s RHS.  If RHS is negative then
+          // LHS >s RHS.
+          case ICmpInst::ICMP_SGT:
+          case ICmpInst::ICMP_SGE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getTrue(CI->getContext()) :
+              ConstantInt::getFalse(CI->getContext());
+          case ICmpInst::ICMP_SLT:
+          case ICmpInst::ICMP_SLE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getFalse(CI->getContext()) :
+              ConstantInt::getTrue(CI->getContext());
+
+          // If LHS is non-negative then LHS <u RHS.  If LHS is negative then
+          // LHS >u RHS.
+          case ICmpInst::ICMP_UGT:
+          case ICmpInst::ICMP_UGE:
+            // Comparison is true iff the LHS <s 0.
+            if (MaxRecurse)
+              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
+                                              Constant::getNullValue(SrcTy),
+                                              TD, DT, MaxRecurse-1))
+                return V;
+            break;
+          case ICmpInst::ICMP_ULT:
+          case ICmpInst::ICMP_ULE:
+            // Comparison is true iff the LHS >=s 0.
+            if (MaxRecurse)
+              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
+                                              Constant::getNullValue(SrcTy),
+                                              TD, DT, MaxRecurse-1))
+                return V;
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  // Special logic for binary operators.
+  BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
+  BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
+  if (MaxRecurse && (LBO || RBO)) {
+    // Analyze the case when either LHS or RHS is an add instruction.
+    Value *A = 0, *B = 0, *C = 0, *D = 0;
+    // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null).
+    bool NoLHSWrapProblem = false, NoRHSWrapProblem = false;
+    if (LBO && LBO->getOpcode() == Instruction::Add) {
+      A = LBO->getOperand(0); B = LBO->getOperand(1);
+      NoLHSWrapProblem = ICmpInst::isEquality(Pred) ||
+        (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
+        (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
+    }
+    if (RBO && RBO->getOpcode() == Instruction::Add) {
+      C = RBO->getOperand(0); D = RBO->getOperand(1);
+      NoRHSWrapProblem = ICmpInst::isEquality(Pred) ||
+        (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
+        (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
+    }
+
+    // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+    if ((A == RHS || B == RHS) && NoLHSWrapProblem)
+      if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
+                                      Constant::getNullValue(RHS->getType()),
+                                      TD, DT, MaxRecurse-1))
+        return V;
+
+    // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+    if ((C == LHS || D == LHS) && NoRHSWrapProblem)
+      if (Value *V = SimplifyICmpInst(Pred,
+                                      Constant::getNullValue(LHS->getType()),
+                                      C == LHS ? D : C, TD, DT, MaxRecurse-1))
+        return V;
+
+    // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
+    if (A && C && (A == C || A == D || B == C || B == D) &&
+        NoLHSWrapProblem && NoRHSWrapProblem) {
+      // Determine Y and Z in the form icmp (X+Y), (X+Z).
+      Value *Y = (A == C || A == D) ? B : A;
+      Value *Z = (C == A || C == B) ? D : C;
+      if (Value *V = SimplifyICmpInst(Pred, Y, Z, TD, DT, MaxRecurse-1))
+        return V;
+    }
+  }
+
+  Value *V;
+  if (LBO && match(LBO, m_URem(m_Value(V), m_Specific(RHS)))) {
+    bool KnownNonNegative, KnownNegative;
+    switch (Pred) {
+    default:
+      break;
+    case ICmpInst::ICMP_SGT:
+    case ICmpInst::ICMP_SGE:
+      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+      if (!KnownNonNegative)
+        break;
+      // fall-through
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_UGT:
+    case ICmpInst::ICMP_UGE:
+      return ConstantInt::getFalse(RHS->getContext());
+    case ICmpInst::ICMP_SLT:
+    case ICmpInst::ICMP_SLE:
+      ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD);
+      if (!KnownNonNegative)
+        break;
+      // fall-through
+    case ICmpInst::ICMP_NE:
+    case ICmpInst::ICMP_ULT:
+    case ICmpInst::ICMP_ULE:
+      return ConstantInt::getTrue(RHS->getContext());
+    }
+  }
+
+  if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
+      LBO->getOperand(1) == RBO->getOperand(1)) {
+    switch (LBO->getOpcode()) {
+    default: break;
+    case Instruction::UDiv:
+    case Instruction::LShr:
+      if (ICmpInst::isSigned(Pred))
+        break;
+      // fall-through
+    case Instruction::SDiv:
+    case Instruction::AShr:
+      if (!LBO->isExact() && !RBO->isExact())
+        break;
+      if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+                                      RBO->getOperand(0), TD, DT, MaxRecurse-1))
+        return V;
+      break;
+    case Instruction::Shl: {
+      bool NUW = LBO->hasNoUnsignedWrap() && LBO->hasNoUnsignedWrap();
+      bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap();
+      if (!NUW && !NSW)
+        break;
+      if (!NSW && ICmpInst::isSigned(Pred))
+        break;
+      if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
+                                      RBO->getOperand(0), TD, DT, MaxRecurse-1))
+        return V;
+      break;
+    }
+    }
+  }
+
+  // If the comparison is with the result of a select instruction, check whether
+  // comparing with either branch of the select always yields the same value.
+  if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+      return V;
+
+  // If the comparison is with the result of a phi instruction, check whether
+  // doing the compare with each incoming phi value yields a common result.
+  if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                              const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
+
+/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                               const TargetData *TD, const DominatorTree *DT,
+                               unsigned MaxRecurse) {
+  CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+  assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
+
+  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+
+    // If we have a constant, make sure it is on the RHS.
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+
+  // Fold trivial predicates.
+  if (Pred == FCmpInst::FCMP_FALSE)
+    return ConstantInt::get(GetCompareTy(LHS), 0);
+  if (Pred == FCmpInst::FCMP_TRUE)
+    return ConstantInt::get(GetCompareTy(LHS), 1);
+
+  if (isa<UndefValue>(RHS))                  // fcmp pred X, undef -> undef
+    return UndefValue::get(GetCompareTy(LHS));
+
+  // fcmp x,x -> true/false.  Not all compares are foldable.
+  if (LHS == RHS) {
+    if (CmpInst::isTrueWhenEqual(Pred))
+      return ConstantInt::get(GetCompareTy(LHS), 1);
+    if (CmpInst::isFalseWhenEqual(Pred))
+      return ConstantInt::get(GetCompareTy(LHS), 0);
+  }
+
+  // Handle fcmp with constant RHS
+  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+    // If the constant is a nan, see if we can fold the comparison based on it.
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+      if (CFP->getValueAPF().isNaN()) {
+        if (FCmpInst::isOrdered(Pred))   // True "if ordered and foo"
+          return ConstantInt::getFalse(CFP->getContext());
+        assert(FCmpInst::isUnordered(Pred) &&
+               "Comparison must be either ordered or unordered!");
+        // True if unordered.
+        return ConstantInt::getTrue(CFP->getContext());
+      }
+      // Check whether the constant is an infinity.
+      if (CFP->getValueAPF().isInfinity()) {
+        if (CFP->getValueAPF().isNegative()) {
+          switch (Pred) {
+          case FCmpInst::FCMP_OLT:
+            // No value is ordered and less than negative infinity.
+            return ConstantInt::getFalse(CFP->getContext());
+          case FCmpInst::FCMP_UGE:
+            // All values are unordered with or at least negative infinity.
+            return ConstantInt::getTrue(CFP->getContext());
+          default:
+            break;
+          }
+        } else {
+          switch (Pred) {
+          case FCmpInst::FCMP_OGT:
+            // No value is ordered and greater than infinity.
+            return ConstantInt::getFalse(CFP->getContext());
+          case FCmpInst::FCMP_ULE:
+            // All values are unordered with and at most infinity.
+            return ConstantInt::getTrue(CFP->getContext());
+          default:
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  // If the comparison is with the result of a select instruction, check whether
+  // comparing with either branch of the select always yields the same value.
+  if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+    if (Value *V = ThreadCmpOverSelect(Pred, LHS, RHS, TD, DT, MaxRecurse))
+      return V;
+
+  // If the comparison is with the result of a phi instruction, check whether
+  // doing the compare with each incoming phi value yields a common result.
+  if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+    if (Value *V = ThreadCmpOverPHI(Pred, LHS, RHS, TD, DT, MaxRecurse))
+      return V;
+
+  return 0;
+}
+
+Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                              const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
+
+/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
+/// the result.  If not, this returns null.
+Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
+                                const TargetData *TD, const DominatorTree *) {
+  // select true, X, Y  -> X
+  // select false, X, Y -> Y
+  if (ConstantInt *CB = dyn_cast<ConstantInt>(CondVal))
+    return CB->getZExtValue() ? TrueVal : FalseVal;
+
+  // select C, X, X -> X
+  if (TrueVal == FalseVal)
+    return TrueVal;
+
+  if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
+    return FalseVal;
+  if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
+    return TrueVal;
+  if (isa<UndefValue>(CondVal)) {  // select undef, X, Y -> X or Y
+    if (isa<Constant>(TrueVal))
+      return TrueVal;
+    return FalseVal;
+  }
+
+  return 0;
+}
+
+/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
+/// fold the result.  If not, this returns null.
+Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps,
+                             const TargetData *TD, const DominatorTree *) {
+  // The type of the GEP pointer operand.
+  const PointerType *PtrTy = cast<PointerType>(Ops[0]->getType());
+
+  // getelementptr P -> P.
+  if (NumOps == 1)
+    return Ops[0];
+
+  if (isa<UndefValue>(Ops[0])) {
+    // Compute the (pointer) type returned by the GEP instruction.
+    const Type *LastType = GetElementPtrInst::getIndexedType(PtrTy, &Ops[1],
+                                                             NumOps-1);
+    const Type *GEPTy = PointerType::get(LastType, PtrTy->getAddressSpace());
+    return UndefValue::get(GEPTy);
+  }
+
+  if (NumOps == 2) {
+    // getelementptr P, 0 -> P.
+    if (ConstantInt *C = dyn_cast<ConstantInt>(Ops[1]))
+      if (C->isZero())
+        return Ops[0];
+    // getelementptr P, N -> P if P points to a type of zero size.
+    if (TD) {
+      const Type *Ty = PtrTy->getElementType();
+      if (Ty->isSized() && TD->getTypeAllocSize(Ty) == 0)
+        return Ops[0];
+    }
+  }
+
+  // Check to see if this is constant foldable.
+  for (unsigned i = 0; i != NumOps; ++i)
+    if (!isa<Constant>(Ops[i]))
+      return 0;
+
+  return ConstantExpr::getGetElementPtr(cast<Constant>(Ops[0]),
+                                        (Constant *const*)Ops+1, NumOps-1);
+}
+
+/// SimplifyPHINode - See if we can fold the given phi.  If not, returns null.
+static Value *SimplifyPHINode(PHINode *PN, const DominatorTree *DT) {
+  // If all of the PHI's incoming values are the same then replace the PHI node
+  // with the common value.
+  Value *CommonValue = 0;
+  bool HasUndefInput = false;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *Incoming = PN->getIncomingValue(i);
+    // If the incoming value is the phi node itself, it can safely be skipped.
+    if (Incoming == PN) continue;
+    if (isa<UndefValue>(Incoming)) {
+      // Remember that we saw an undef value, but otherwise ignore them.
+      HasUndefInput = true;
+      continue;
+    }
+    if (CommonValue && Incoming != CommonValue)
+      return 0;  // Not the same, bail out.
+    CommonValue = Incoming;
+  }
+
+  // If CommonValue is null then all of the incoming values were either undef or
+  // equal to the phi node itself.
+  if (!CommonValue)
+    return UndefValue::get(PN->getType());
+
+  // If we have a PHI node like phi(X, undef, X), where X is defined by some
+  // instruction, we cannot return X as the result of the PHI node unless it
+  // dominates the PHI block.
+  if (HasUndefInput)
+    return ValueDominatesPHI(CommonValue, PN, DT) ? CommonValue : 0;
+
+  return CommonValue;
+}
+
+
+//=== Helper functions for higher up the class hierarchy.
+
+/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
+/// fold the result.  If not, this returns null.
+static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                            const TargetData *TD, const DominatorTree *DT,
+                            unsigned MaxRecurse) {
+  switch (Opcode) {
+  case Instruction::Add:
+    return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+                           TD, DT, MaxRecurse);
+  case Instruction::Sub:
+    return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+                           TD, DT, MaxRecurse);
+  case Instruction::Mul:  return SimplifyMulInst (LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::Shl:
+    return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false,
+                           TD, DT, MaxRecurse);
+  case Instruction::LShr:
+    return SimplifyLShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
+  case Instruction::AShr:
+    return SimplifyAShrInst(LHS, RHS, /*isExact*/false, TD, DT, MaxRecurse);
+  case Instruction::And: return SimplifyAndInst(LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::Or:  return SimplifyOrInst (LHS, RHS, TD, DT, MaxRecurse);
+  case Instruction::Xor: return SimplifyXorInst(LHS, RHS, TD, DT, MaxRecurse);
+  default:
+    if (Constant *CLHS = dyn_cast<Constant>(LHS))
+      if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
+        Constant *COps[] = {CLHS, CRHS};
+        return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, 2, TD);
+      }
+
+    // If the operation is associative, try some generic simplifications.
+    if (Instruction::isAssociative(Opcode))
+      if (Value *V = SimplifyAssociativeBinOp(Opcode, LHS, RHS, TD, DT,
+                                              MaxRecurse))
+        return V;
+
+    // If the operation is with the result of a select instruction, check whether
+    // operating on either branch of the select always yields the same value.
+    if (isa<SelectInst>(LHS) || isa<SelectInst>(RHS))
+      if (Value *V = ThreadBinOpOverSelect(Opcode, LHS, RHS, TD, DT,
+                                           MaxRecurse))
+        return V;
+
+    // If the operation is with the result of a phi instruction, check whether
+    // operating on all incoming values of the phi always yields the same value.
+    if (isa<PHINode>(LHS) || isa<PHINode>(RHS))
+      if (Value *V = ThreadBinOpOverPHI(Opcode, LHS, RHS, TD, DT, MaxRecurse))
+        return V;
+
+    return 0;
+  }
+}
+
+Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+                           const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyBinOp(Opcode, LHS, RHS, TD, DT, RecursionLimit);
+}
+
+/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
+/// fold the result.
+static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                              const TargetData *TD, const DominatorTree *DT,
+                              unsigned MaxRecurse) {
+  if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
+    return SimplifyICmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
+  return SimplifyFCmpInst(Predicate, LHS, RHS, TD, DT, MaxRecurse);
+}
+
+Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                             const TargetData *TD, const DominatorTree *DT) {
+  return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit);
+}
+
+/// SimplifyInstruction - See if we can compute a simplified version of this
+/// instruction.  If not, this returns null.
+Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD,
+                                 const DominatorTree *DT) {
+  Value *Result;
+
+  switch (I->getOpcode()) {
+  default:
+    Result = ConstantFoldInstruction(I, TD);
+    break;
+  case Instruction::Add:
+    Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1),
+                             cast<BinaryOperator>(I)->hasNoSignedWrap(),
+                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+                             TD, DT);
+    break;
+  case Instruction::Sub:
+    Result = SimplifySubInst(I->getOperand(0), I->getOperand(1),
+                             cast<BinaryOperator>(I)->hasNoSignedWrap(),
+                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+                             TD, DT);
+    break;
+  case Instruction::Mul:
+    Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::SDiv:
+    Result = SimplifySDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::UDiv:
+    Result = SimplifyUDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::FDiv:
+    Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::Shl:
+    Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1),
+                             cast<BinaryOperator>(I)->hasNoSignedWrap(),
+                             cast<BinaryOperator>(I)->hasNoUnsignedWrap(),
+                             TD, DT);
+    break;
+  case Instruction::LShr:
+    Result = SimplifyLShrInst(I->getOperand(0), I->getOperand(1),
+                              cast<BinaryOperator>(I)->isExact(),
+                              TD, DT);
+    break;
+  case Instruction::AShr:
+    Result = SimplifyAShrInst(I->getOperand(0), I->getOperand(1),
+                              cast<BinaryOperator>(I)->isExact(),
+                              TD, DT);
+    break;
+  case Instruction::And:
+    Result = SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::Or:
+    Result = SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::Xor:
+    Result = SimplifyXorInst(I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::ICmp:
+    Result = SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
+                              I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::FCmp:
+    Result = SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
+                              I->getOperand(0), I->getOperand(1), TD, DT);
+    break;
+  case Instruction::Select:
+    Result = SimplifySelectInst(I->getOperand(0), I->getOperand(1),
+                                I->getOperand(2), TD, DT);
+    break;
+  case Instruction::GetElementPtr: {
+    SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
+    Result = SimplifyGEPInst(&Ops[0], Ops.size(), TD, DT);
+    break;
+  }
+  case Instruction::PHI:
+    Result = SimplifyPHINode(cast<PHINode>(I), DT);
+    break;
+  }
+
+  /// If called on unreachable code, the above logic may report that the
+  /// instruction simplified to itself.  Make life easier for users by
+  /// detecting that case here, returning a safe value instead.
+  return Result == I ? UndefValue::get(I->getType()) : Result;
+}
+
+/// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then
+/// delete the From instruction.  In addition to a basic RAUW, this does a
+/// recursive simplification of the newly formed instructions.  This catches
+/// things where one simplification exposes other opportunities.  This only
+/// simplifies and deletes scalar operations, it does not change the CFG.
+///
+void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
+                                     const TargetData *TD,
+                                     const DominatorTree *DT) {
+  assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!");
+
+  // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that
+  // we can know if it gets deleted out from under us or replaced in a
+  // recursive simplification.
+  WeakVH FromHandle(From);
+  WeakVH ToHandle(To);
+
+  while (!From->use_empty()) {
+    // Update the instruction to use the new value.
+    Use &TheUse = From->use_begin().getUse();
+    Instruction *User = cast<Instruction>(TheUse.getUser());
+    TheUse = To;
+
+    // Check to see if the instruction can be folded due to the operand
+    // replacement.  For example changing (or X, Y) into (or X, -1) can replace
+    // the 'or' with -1.
+    Value *SimplifiedVal;
+    {
+      // Sanity check to make sure 'User' doesn't dangle across
+      // SimplifyInstruction.
+      AssertingVH<> UserHandle(User);
+
+      SimplifiedVal = SimplifyInstruction(User, TD, DT);
+      if (SimplifiedVal == 0) continue;
+    }
+
+    // Recursively simplify this user to the new value.
+    ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD, DT);
+    From = dyn_cast_or_null<Instruction>((Value*)FromHandle);
+    To = ToHandle;
+
+    assert(ToHandle && "To value deleted by recursive simplification?");
+
+    // If the recursive simplification ended up revisiting and deleting
+    // 'From' then we're done.
+    if (From == 0)
+      return;
+  }
+
+  // If 'From' has value handles referring to it, do a real RAUW to update them.
+  From->replaceAllUsesWith(To);
+
+  From->eraseFromParent();
+}
diff --git a/final/lib/Analysis/Interval.cpp b/final/lib/Analysis/Interval.cpp
new file mode 100644
index 00000000000..ca9cdcaf246
--- /dev/null
+++ b/final/lib/Analysis/Interval.cpp
@@ -0,0 +1,58 @@
+//===- Interval.cpp - Interval class code ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definition of the Interval class, which represents a
+// partition of a control flow graph of some kind.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Interval.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Interval Implementation
+//===----------------------------------------------------------------------===//
+
+// isLoop - Find out if there is a back edge in this interval...
+//
+bool Interval::isLoop() const {
+  // There is a loop in this interval iff one of the predecessors of the header
+  // node lives in the interval.
+  for (::pred_iterator I = ::pred_begin(HeaderNode), E = ::pred_end(HeaderNode);
+       I != E; ++I)
+    if (contains(*I))
+      return true;
+  return false;
+}
+
+
+void Interval::print(raw_ostream &OS) const {
+  OS << "-------------------------------------------------------------\n"
+       << "Interval Contents:\n";
+
+  // Print out all of the basic blocks in the interval...
+  for (std::vector<BasicBlock*>::const_iterator I = Nodes.begin(),
+         E = Nodes.end(); I != E; ++I)
+    OS << **I << "\n";
+
+  OS << "Interval Predecessors:\n";
+  for (std::vector<BasicBlock*>::const_iterator I = Predecessors.begin(),
+         E = Predecessors.end(); I != E; ++I)
+    OS << **I << "\n";
+
+  OS << "Interval Successors:\n";
+  for (std::vector<BasicBlock*>::const_iterator I = Successors.begin(),
+         E = Successors.end(); I != E; ++I)
+    OS << **I << "\n";
+}
diff --git a/final/lib/Analysis/IntervalPartition.cpp b/final/lib/Analysis/IntervalPartition.cpp
new file mode 100644
index 00000000000..2e259b147b8
--- /dev/null
+++ b/final/lib/Analysis/IntervalPartition.cpp
@@ -0,0 +1,114 @@
+//===- IntervalPartition.cpp - Interval Partition module code -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definition of the IntervalPartition class, which
+// calculates and represent the interval partition of a function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/IntervalIterator.h"
+using namespace llvm;
+
+char IntervalPartition::ID = 0;
+INITIALIZE_PASS(IntervalPartition, "intervals",
+                "Interval Partition Construction", true, true)
+
+//===----------------------------------------------------------------------===//
+// IntervalPartition Implementation
+//===----------------------------------------------------------------------===//
+
+// releaseMemory - Reset state back to before function was analyzed
+void IntervalPartition::releaseMemory() {
+  for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+    delete Intervals[i];
+  IntervalMap.clear();
+  Intervals.clear();
+  RootInterval = 0;
+}
+
+void IntervalPartition::print(raw_ostream &O, const Module*) const {
+  for(unsigned i = 0, e = Intervals.size(); i != e; ++i)
+    Intervals[i]->print(O);
+}
+
+// addIntervalToPartition - Add an interval to the internal list of intervals,
+// and then add mappings from all of the basic blocks in the interval to the
+// interval itself (in the IntervalMap).
+//
+void IntervalPartition::addIntervalToPartition(Interval *I) {
+  Intervals.push_back(I);
+
+  // Add mappings for all of the basic blocks in I to the IntervalPartition
+  for (Interval::node_iterator It = I->Nodes.begin(), End = I->Nodes.end();
+       It != End; ++It)
+    IntervalMap.insert(std::make_pair(*It, I));
+}
+
+// updatePredecessors - Interval generation only sets the successor fields of
+// the interval data structures.  After interval generation is complete,
+// run through all of the intervals and propagate successor info as
+// predecessor info.
+//
+void IntervalPartition::updatePredecessors(Interval *Int) {
+  BasicBlock *Header = Int->getHeaderNode();
+  for (Interval::succ_iterator I = Int->Successors.begin(),
+         E = Int->Successors.end(); I != E; ++I)
+    getBlockInterval(*I)->Predecessors.push_back(Header);
+}
+
+// IntervalPartition ctor - Build the first level interval partition for the
+// specified function...
+//
+bool IntervalPartition::runOnFunction(Function &F) {
+  // Pass false to intervals_begin because we take ownership of it's memory
+  function_interval_iterator I = intervals_begin(&F, false);
+  assert(I != intervals_end(&F) && "No intervals in function!?!?!");
+
+  addIntervalToPartition(RootInterval = *I);
+
+  ++I;  // After the first one...
+
+  // Add the rest of the intervals to the partition.
+  for (function_interval_iterator E = intervals_end(&F); I != E; ++I)
+    addIntervalToPartition(*I);
+
+  // Now that we know all of the successor information, propagate this to the
+  // predecessors for each block.
+  for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+    updatePredecessors(Intervals[i]);
+  return false;
+}
+
+
+// IntervalPartition ctor - Build a reduced interval partition from an
+// existing interval graph.  This takes an additional boolean parameter to
+// distinguish it from a copy constructor.  Always pass in false for now.
+//
+IntervalPartition::IntervalPartition(IntervalPartition &IP, bool)
+  : FunctionPass(ID) {
+  assert(IP.getRootInterval() && "Cannot operate on empty IntervalPartitions!");
+
+  // Pass false to intervals_begin because we take ownership of it's memory
+  interval_part_interval_iterator I = intervals_begin(IP, false);
+  assert(I != intervals_end(IP) && "No intervals in interval partition!?!?!");
+
+  addIntervalToPartition(RootInterval = *I);
+
+  ++I;  // After the first one...
+
+  // Add the rest of the intervals to the partition.
+  for (interval_part_interval_iterator E = intervals_end(IP); I != E; ++I)
+    addIntervalToPartition(*I);
+
+  // Now that we know all of the successor information, propagate this to the
+  // predecessors for each block.
+  for (unsigned i = 0, e = Intervals.size(); i != e; ++i)
+    updatePredecessors(Intervals[i]);
+}
+
diff --git a/final/lib/Analysis/LazyValueInfo.cpp b/final/lib/Analysis/LazyValueInfo.cpp
new file mode 100644
index 00000000000..9e7da6ce2de
--- /dev/null
+++ b/final/lib/Analysis/LazyValueInfo.cpp
@@ -0,0 +1,1125 @@
+//===- LazyValueInfo.cpp - Value constraint analysis ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for lazy computation of value constraint
+// information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lazy-value-info"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include <map>
+#include <set>
+#include <stack>
+using namespace llvm;
+
+char LazyValueInfo::ID = 0;
+INITIALIZE_PASS(LazyValueInfo, "lazy-value-info",
+                "Lazy Value Information Analysis", false, true)
+
+namespace llvm {
+  FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                               LVILatticeVal
+//===----------------------------------------------------------------------===//
+
+/// LVILatticeVal - This is the information tracked by LazyValueInfo for each
+/// value.
+///
+/// FIXME: This is basically just for bringup, this can be made a lot more rich
+/// in the future.
+///
+namespace {
+class LVILatticeVal {
+  enum LatticeValueTy {
+    /// undefined - This Value has no known value yet.
+    undefined,
+    
+    /// constant - This Value has a specific constant value.
+    constant,
+    /// notconstant - This Value is known to not have the specified value.
+    notconstant,
+    
+    /// constantrange - The Value falls within this range.
+    constantrange,
+    
+    /// overdefined - This value is not known to be constant, and we know that
+    /// it has a value.
+    overdefined
+  };
+  
+  /// Val: This stores the current lattice value along with the Constant* for
+  /// the constant if this is a 'constant' or 'notconstant' value.
+  LatticeValueTy Tag;
+  Constant *Val;
+  ConstantRange Range;
+  
+public:
+  LVILatticeVal() : Tag(undefined), Val(0), Range(1, true) {}
+
+  static LVILatticeVal get(Constant *C) {
+    LVILatticeVal Res;
+    if (!isa<UndefValue>(C))
+      Res.markConstant(C);
+    return Res;
+  }
+  static LVILatticeVal getNot(Constant *C) {
+    LVILatticeVal Res;
+    if (!isa<UndefValue>(C))
+      Res.markNotConstant(C);
+    return Res;
+  }
+  static LVILatticeVal getRange(ConstantRange CR) {
+    LVILatticeVal Res;
+    Res.markConstantRange(CR);
+    return Res;
+  }
+  
+  bool isUndefined() const     { return Tag == undefined; }
+  bool isConstant() const      { return Tag == constant; }
+  bool isNotConstant() const   { return Tag == notconstant; }
+  bool isConstantRange() const { return Tag == constantrange; }
+  bool isOverdefined() const   { return Tag == overdefined; }
+  
+  Constant *getConstant() const {
+    assert(isConstant() && "Cannot get the constant of a non-constant!");
+    return Val;
+  }
+  
+  Constant *getNotConstant() const {
+    assert(isNotConstant() && "Cannot get the constant of a non-notconstant!");
+    return Val;
+  }
+  
+  ConstantRange getConstantRange() const {
+    assert(isConstantRange() &&
+           "Cannot get the constant-range of a non-constant-range!");
+    return Range;
+  }
+  
+  /// markOverdefined - Return true if this is a change in status.
+  bool markOverdefined() {
+    if (isOverdefined())
+      return false;
+    Tag = overdefined;
+    return true;
+  }
+
+  /// markConstant - Return true if this is a change in status.
+  bool markConstant(Constant *V) {
+    assert(V && "Marking constant with NULL");
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+      return markConstantRange(ConstantRange(CI->getValue()));
+    if (isa<UndefValue>(V))
+      return false;
+
+    assert((!isConstant() || getConstant() == V) &&
+           "Marking constant with different value");
+    assert(isUndefined());
+    Tag = constant;
+    Val = V;
+    return true;
+  }
+  
+  /// markNotConstant - Return true if this is a change in status.
+  bool markNotConstant(Constant *V) {
+    assert(V && "Marking constant with NULL");
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+      return markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
+    if (isa<UndefValue>(V))
+      return false;
+
+    assert((!isConstant() || getConstant() != V) &&
+           "Marking constant !constant with same value");
+    assert((!isNotConstant() || getNotConstant() == V) &&
+           "Marking !constant with different value");
+    assert(isUndefined() || isConstant());
+    Tag = notconstant;
+    Val = V;
+    return true;
+  }
+  
+  /// markConstantRange - Return true if this is a change in status.
+  bool markConstantRange(const ConstantRange NewR) {
+    if (isConstantRange()) {
+      if (NewR.isEmptySet())
+        return markOverdefined();
+      
+      bool changed = Range == NewR;
+      Range = NewR;
+      return changed;
+    }
+    
+    assert(isUndefined());
+    if (NewR.isEmptySet())
+      return markOverdefined();
+    
+    Tag = constantrange;
+    Range = NewR;
+    return true;
+  }
+  
+  /// mergeIn - Merge the specified lattice value into this one, updating this
+  /// one and returning true if anything changed.
+  bool mergeIn(const LVILatticeVal &RHS) {
+    if (RHS.isUndefined() || isOverdefined()) return false;
+    if (RHS.isOverdefined()) return markOverdefined();
+
+    if (isUndefined()) {
+      Tag = RHS.Tag;
+      Val = RHS.Val;
+      Range = RHS.Range;
+      return true;
+    }
+
+    if (isConstant()) {
+      if (RHS.isConstant()) {
+        if (Val == RHS.Val)
+          return false;
+        return markOverdefined();
+      }
+
+      if (RHS.isNotConstant()) {
+        if (Val == RHS.Val)
+          return markOverdefined();
+
+        // Unless we can prove that the two Constants are different, we must
+        // move to overdefined.
+        // FIXME: use TargetData for smarter constant folding.
+        if (ConstantInt *Res = dyn_cast<ConstantInt>(
+                ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
+                                                getConstant(),
+                                                RHS.getNotConstant())))
+          if (Res->isOne())
+            return markNotConstant(RHS.getNotConstant());
+
+        return markOverdefined();
+      }
+
+      // RHS is a ConstantRange, LHS is a non-integer Constant.
+
+      // FIXME: consider the case where RHS is a range [1, 0) and LHS is
+      // a function. The correct result is to pick up RHS.
+
+      return markOverdefined();
+    }
+
+    if (isNotConstant()) {
+      if (RHS.isConstant()) {
+        if (Val == RHS.Val)
+          return markOverdefined();
+
+        // Unless we can prove that the two Constants are different, we must
+        // move to overdefined.
+        // FIXME: use TargetData for smarter constant folding.
+        if (ConstantInt *Res = dyn_cast<ConstantInt>(
+                ConstantFoldCompareInstOperands(CmpInst::ICMP_NE,
+                                                getNotConstant(),
+                                                RHS.getConstant())))
+          if (Res->isOne())
+            return false;
+
+        return markOverdefined();
+      }
+
+      if (RHS.isNotConstant()) {
+        if (Val == RHS.Val)
+          return false;
+        return markOverdefined();
+      }
+
+      return markOverdefined();
+    }
+
+    assert(isConstantRange() && "New LVILattice type?");
+    if (!RHS.isConstantRange())
+      return markOverdefined();
+
+    ConstantRange NewR = Range.unionWith(RHS.getConstantRange());
+    if (NewR.isFullSet())
+      return markOverdefined();
+    return markConstantRange(NewR);
+  }
+};
+  
+} // end anonymous namespace.
+
+namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
+  if (Val.isUndefined())
+    return OS << "undefined";
+  if (Val.isOverdefined())
+    return OS << "overdefined";
+
+  if (Val.isNotConstant())
+    return OS << "notconstant<" << *Val.getNotConstant() << '>';
+  else if (Val.isConstantRange())
+    return OS << "constantrange<" << Val.getConstantRange().getLower() << ", "
+              << Val.getConstantRange().getUpper() << '>';
+  return OS << "constant<" << *Val.getConstant() << '>';
+}
+}
+
+//===----------------------------------------------------------------------===//
+//                          LazyValueInfoCache Decl
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// LVIValueHandle - A callback value handle update the cache when
+  /// values are erased.
+  class LazyValueInfoCache;
+  struct LVIValueHandle : public CallbackVH {
+    LazyValueInfoCache *Parent;
+      
+    LVIValueHandle(Value *V, LazyValueInfoCache *P)
+      : CallbackVH(V), Parent(P) { }
+      
+    void deleted();
+    void allUsesReplacedWith(Value *V) {
+      deleted();
+    }
+  };
+}
+
+namespace llvm {
+  template<>
+  struct DenseMapInfo<LVIValueHandle> {
+    typedef DenseMapInfo<Value*> PointerInfo;
+    static inline LVIValueHandle getEmptyKey() {
+      return LVIValueHandle(PointerInfo::getEmptyKey(),
+                            static_cast<LazyValueInfoCache*>(0));
+    }
+    static inline LVIValueHandle getTombstoneKey() {
+      return LVIValueHandle(PointerInfo::getTombstoneKey(),
+                            static_cast<LazyValueInfoCache*>(0));
+    }
+    static unsigned getHashValue(const LVIValueHandle &Val) {
+      return PointerInfo::getHashValue(Val);
+    }
+    static bool isEqual(const LVIValueHandle &LHS, const LVIValueHandle &RHS) {
+      return LHS == RHS;
+    }
+  };
+  
+  template<>
+  struct DenseMapInfo<std::pair<AssertingVH<BasicBlock>, Value*> > {
+    typedef std::pair<AssertingVH<BasicBlock>, Value*> PairTy;
+    typedef DenseMapInfo<AssertingVH<BasicBlock> > APointerInfo;
+    typedef DenseMapInfo<Value*> BPointerInfo;
+    static inline PairTy getEmptyKey() {
+      return std::make_pair(APointerInfo::getEmptyKey(),
+                            BPointerInfo::getEmptyKey());
+    }
+    static inline PairTy getTombstoneKey() {
+      return std::make_pair(APointerInfo::getTombstoneKey(), 
+                            BPointerInfo::getTombstoneKey());
+    }
+    static unsigned getHashValue( const PairTy &Val) {
+      return APointerInfo::getHashValue(Val.first) ^ 
+             BPointerInfo::getHashValue(Val.second);
+    }
+    static bool isEqual(const PairTy &LHS, const PairTy &RHS) {
+      return APointerInfo::isEqual(LHS.first, RHS.first) &&
+             BPointerInfo::isEqual(LHS.second, RHS.second);
+    }
+  };
+}
+
+namespace { 
+  /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which
+  /// maintains information about queries across the clients' queries.
+  class LazyValueInfoCache {
+    /// ValueCacheEntryTy - This is all of the cached block information for
+    /// exactly one Value*.  The entries are sorted by the BasicBlock* of the
+    /// entries, allowing us to do a lookup with a binary search.
+    typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy;
+
+    /// ValueCache - This is all of the cached information for all values,
+    /// mapped from Value* to key information.
+    DenseMap<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+    
+    /// OverDefinedCache - This tracks, on a per-block basis, the set of 
+    /// values that are over-defined at the end of that block.  This is required
+    /// for cache updating.
+    typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
+    DenseSet<OverDefinedPairTy> OverDefinedCache;
+    
+    /// BlockValueStack - This stack holds the state of the value solver
+    /// during a query.  It basically emulates the callstack of the naive
+    /// recursive value lookup process.
+    std::stack<std::pair<BasicBlock*, Value*> > BlockValueStack;
+    
+    friend struct LVIValueHandle;
+    
+    /// OverDefinedCacheUpdater - A helper object that ensures that the
+    /// OverDefinedCache is updated whenever solveBlockValue returns.
+    struct OverDefinedCacheUpdater {
+      LazyValueInfoCache *Parent;
+      Value *Val;
+      BasicBlock *BB;
+      LVILatticeVal &BBLV;
+      
+      OverDefinedCacheUpdater(Value *V, BasicBlock *B, LVILatticeVal &LV,
+                       LazyValueInfoCache *P)
+        : Parent(P), Val(V), BB(B), BBLV(LV) { }
+      
+      bool markResult(bool changed) { 
+        if (changed && BBLV.isOverdefined())
+          Parent->OverDefinedCache.insert(std::make_pair(BB, Val));
+        return changed;
+      }
+    };
+    
+
+
+    LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
+    bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
+                      LVILatticeVal &Result);
+    bool hasBlockValue(Value *Val, BasicBlock *BB);
+
+    // These methods process one work item and may add more. A false value
+    // returned means that the work item was not completely processed and must
+    // be revisited after going through the new items.
+    bool solveBlockValue(Value *Val, BasicBlock *BB);
+    bool solveBlockValueNonLocal(LVILatticeVal &BBLV,
+                                 Value *Val, BasicBlock *BB);
+    bool solveBlockValuePHINode(LVILatticeVal &BBLV,
+                                PHINode *PN, BasicBlock *BB);
+    bool solveBlockValueConstantRange(LVILatticeVal &BBLV,
+                                      Instruction *BBI, BasicBlock *BB);
+
+    void solve();
+    
+    ValueCacheEntryTy &lookup(Value *V) {
+      return ValueCache[LVIValueHandle(V, this)];
+    }
+
+  public:
+    /// getValueInBlock - This is the query interface to determine the lattice
+    /// value for the specified Value* at the end of the specified block.
+    LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB);
+
+    /// getValueOnEdge - This is the query interface to determine the lattice
+    /// value for the specified Value* that is true on the specified edge.
+    LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB);
+    
+    /// threadEdge - This is the update interface to inform the cache that an
+    /// edge from PredBB to OldSucc has been threaded to be from PredBB to
+    /// NewSucc.
+    void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
+    
+    /// eraseBlock - This is part of the update interface to inform the cache
+    /// that a block has been deleted.
+    void eraseBlock(BasicBlock *BB);
+    
+    /// clear - Empty the cache.
+    void clear() {
+      ValueCache.clear();
+      OverDefinedCache.clear();
+    }
+  };
+} // end anonymous namespace
+
+void LVIValueHandle::deleted() {
+  typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy;
+  
+  SmallVector<OverDefinedPairTy, 4> ToErase;
+  for (DenseSet<OverDefinedPairTy>::iterator 
+       I = Parent->OverDefinedCache.begin(),
+       E = Parent->OverDefinedCache.end();
+       I != E; ++I) {
+    if (I->second == getValPtr())
+      ToErase.push_back(*I);
+  }
+  
+  for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(),
+       E = ToErase.end(); I != E; ++I)
+    Parent->OverDefinedCache.erase(*I);
+  
+  // This erasure deallocates *this, so it MUST happen after we're done
+  // using any and all members of *this.
+  Parent->ValueCache.erase(*this);
+}
+
+void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
+  SmallVector<OverDefinedPairTy, 4> ToErase;
+  for (DenseSet<OverDefinedPairTy>::iterator  I = OverDefinedCache.begin(),
+       E = OverDefinedCache.end(); I != E; ++I) {
+    if (I->first == BB)
+      ToErase.push_back(*I);
+  }
+  
+  for (SmallVector<OverDefinedPairTy, 4>::iterator I = ToErase.begin(),
+       E = ToErase.end(); I != E; ++I)
+    OverDefinedCache.erase(*I);
+
+  for (DenseMap<LVIValueHandle, ValueCacheEntryTy>::iterator
+       I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)
+    I->second.erase(BB);
+}
+
+void LazyValueInfoCache::solve() {
+  while (!BlockValueStack.empty()) {
+    std::pair<BasicBlock*, Value*> &e = BlockValueStack.top();
+    if (solveBlockValue(e.second, e.first))
+      BlockValueStack.pop();
+  }
+}
+
+bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) {
+  // If already a constant, there is nothing to compute.
+  if (isa<Constant>(Val))
+    return true;
+
+  LVIValueHandle ValHandle(Val, this);
+  if (!ValueCache.count(ValHandle)) return false;
+  return ValueCache[ValHandle].count(BB);
+}
+
+LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
+  // If already a constant, there is nothing to compute.
+  if (Constant *VC = dyn_cast<Constant>(Val))
+    return LVILatticeVal::get(VC);
+
+  return lookup(Val)[BB];
+}
+
+bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
+  if (isa<Constant>(Val))
+    return true;
+
+  ValueCacheEntryTy &Cache = lookup(Val);
+  LVILatticeVal &BBLV = Cache[BB];
+  
+  // OverDefinedCacheUpdater is a helper object that will update
+  // the OverDefinedCache for us when this method exits.  Make sure to
+  // call markResult on it as we exist, passing a bool to indicate if the
+  // cache needs updating, i.e. if we have solve a new value or not.
+  OverDefinedCacheUpdater ODCacheUpdater(Val, BB, BBLV, this);
+
+  // If we've already computed this block's value, return it.
+  if (!BBLV.isUndefined()) {
+    DEBUG(dbgs() << "  reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n');
+    
+    // Since we're reusing a cached value here, we don't need to update the 
+    // OverDefinedCahce.  The cache will have been properly updated 
+    // whenever the cached value was inserted.
+    ODCacheUpdater.markResult(false);
+    return true;
+  }
+
+  // Otherwise, this is the first time we're seeing this block.  Reset the
+  // lattice value to overdefined, so that cycles will terminate and be
+  // conservatively correct.
+  BBLV.markOverdefined();
+  
+  Instruction *BBI = dyn_cast<Instruction>(Val);
+  if (BBI == 0 || BBI->getParent() != BB) {
+    return ODCacheUpdater.markResult(solveBlockValueNonLocal(BBLV, Val, BB));
+  }
+
+  if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+    return ODCacheUpdater.markResult(solveBlockValuePHINode(BBLV, PN, BB));
+  }
+
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) {
+    BBLV = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType()));
+    return ODCacheUpdater.markResult(true);
+  }
+
+  // We can only analyze the definitions of certain classes of instructions
+  // (integral binops and casts at the moment), so bail if this isn't one.
+  LVILatticeVal Result;
+  if ((!isa<BinaryOperator>(BBI) && !isa<CastInst>(BBI)) ||
+     !BBI->getType()->isIntegerTy()) {
+    DEBUG(dbgs() << " compute BB '" << BB->getName()
+                 << "' - overdefined because inst def found.\n");
+    BBLV.markOverdefined();
+    return ODCacheUpdater.markResult(true);
+  }
+
+  // FIXME: We're currently limited to binops with a constant RHS.  This should
+  // be improved.
+  BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
+  if (BO && !isa<ConstantInt>(BO->getOperand(1))) { 
+    DEBUG(dbgs() << " compute BB '" << BB->getName()
+                 << "' - overdefined because inst def found.\n");
+
+    BBLV.markOverdefined();
+    return ODCacheUpdater.markResult(true);
+  }
+
+  return ODCacheUpdater.markResult(solveBlockValueConstantRange(BBLV, BBI, BB));
+}
+
+static bool InstructionDereferencesPointer(Instruction *I, Value *Ptr) {
+  if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+    return L->getPointerAddressSpace() == 0 &&
+        GetUnderlyingObject(L->getPointerOperand()) ==
+        GetUnderlyingObject(Ptr);
+  }
+  if (StoreInst *S = dyn_cast<StoreInst>(I)) {
+    return S->getPointerAddressSpace() == 0 &&
+        GetUnderlyingObject(S->getPointerOperand()) ==
+        GetUnderlyingObject(Ptr);
+  }
+  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+    if (MI->isVolatile()) return false;
+    if (MI->getAddressSpace() != 0) return false;
+
+    // FIXME: check whether it has a valuerange that excludes zero?
+    ConstantInt *Len = dyn_cast<ConstantInt>(MI->getLength());
+    if (!Len || Len->isZero()) return false;
+
+    if (MI->getRawDest() == Ptr || MI->getDest() == Ptr)
+      return true;
+    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
+      return MTI->getRawSource() == Ptr || MTI->getSource() == Ptr;
+  }
+  return false;
+}
+
+bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
+                                                 Value *Val, BasicBlock *BB) {
+  LVILatticeVal Result;  // Start Undefined.
+
+  // If this is a pointer, and there's a load from that pointer in this BB,
+  // then we know that the pointer can't be NULL.
+  bool NotNull = false;
+  if (Val->getType()->isPointerTy()) {
+    if (isa<AllocaInst>(Val)) {
+      NotNull = true;
+    } else {
+      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();BI != BE;++BI){
+        if (InstructionDereferencesPointer(BI, Val)) {
+          NotNull = true;
+          break;
+        }
+      }
+    }
+  }
+
+  // If this is the entry block, we must be asking about an argument.  The
+  // value is overdefined.
+  if (BB == &BB->getParent()->getEntryBlock()) {
+    assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
+    if (NotNull) {
+      const PointerType *PTy = cast<PointerType>(Val->getType());
+      Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+    } else {
+      Result.markOverdefined();
+    }
+    BBLV = Result;
+    return true;
+  }
+
+  // Loop over all of our predecessors, merging what we know from them into
+  // result.
+  bool EdgesMissing = false;
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    LVILatticeVal EdgeResult;
+    EdgesMissing |= !getEdgeValue(Val, *PI, BB, EdgeResult);
+    if (EdgesMissing)
+      continue;
+
+    Result.mergeIn(EdgeResult);
+
+    // If we hit overdefined, exit early.  The BlockVals entry is already set
+    // to overdefined.
+    if (Result.isOverdefined()) {
+      DEBUG(dbgs() << " compute BB '" << BB->getName()
+            << "' - overdefined because of pred.\n");
+      // If we previously determined that this is a pointer that can't be null
+      // then return that rather than giving up entirely.
+      if (NotNull) {
+        const PointerType *PTy = cast<PointerType>(Val->getType());
+        Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
+      }
+      
+      BBLV = Result;
+      return true;
+    }
+  }
+  if (EdgesMissing)
+    return false;
+
+  // Return the merged value, which is more precise than 'overdefined'.
+  assert(!Result.isOverdefined());
+  BBLV = Result;
+  return true;
+}
+  
+bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
+                                                PHINode *PN, BasicBlock *BB) {
+  LVILatticeVal Result;  // Start Undefined.
+
+  // Loop over all of our predecessors, merging what we know from them into
+  // result.
+  bool EdgesMissing = false;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    BasicBlock *PhiBB = PN->getIncomingBlock(i);
+    Value *PhiVal = PN->getIncomingValue(i);
+    LVILatticeVal EdgeResult;
+    EdgesMissing |= !getEdgeValue(PhiVal, PhiBB, BB, EdgeResult);
+    if (EdgesMissing)
+      continue;
+
+    Result.mergeIn(EdgeResult);
+
+    // If we hit overdefined, exit early.  The BlockVals entry is already set
+    // to overdefined.
+    if (Result.isOverdefined()) {
+      DEBUG(dbgs() << " compute BB '" << BB->getName()
+            << "' - overdefined because of pred.\n");
+      
+      BBLV = Result;
+      return true;
+    }
+  }
+  if (EdgesMissing)
+    return false;
+
+  // Return the merged value, which is more precise than 'overdefined'.
+  assert(!Result.isOverdefined() && "Possible PHI in entry block?");
+  BBLV = Result;
+  return true;
+}
+
+bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,
+                                                      Instruction *BBI,
+                                                      BasicBlock *BB) {
+  // Figure out the range of the LHS.  If that fails, bail.
+  if (!hasBlockValue(BBI->getOperand(0), BB)) {
+    BlockValueStack.push(std::make_pair(BB, BBI->getOperand(0)));
+    return false;
+  }
+
+  LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
+  if (!LHSVal.isConstantRange()) {
+    BBLV.markOverdefined();
+    return true;
+  }
+  
+  ConstantRange LHSRange = LHSVal.getConstantRange();
+  ConstantRange RHSRange(1);
+  const IntegerType *ResultTy = cast<IntegerType>(BBI->getType());
+  if (isa<BinaryOperator>(BBI)) {
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(BBI->getOperand(1))) {
+      RHSRange = ConstantRange(RHS->getValue());
+    } else {
+      BBLV.markOverdefined();
+      return true;
+    }
+  }
+
+  // NOTE: We're currently limited by the set of operations that ConstantRange
+  // can evaluate symbolically.  Enhancing that set will allows us to analyze
+  // more definitions.
+  LVILatticeVal Result;
+  switch (BBI->getOpcode()) {
+  case Instruction::Add:
+    Result.markConstantRange(LHSRange.add(RHSRange));
+    break;
+  case Instruction::Sub:
+    Result.markConstantRange(LHSRange.sub(RHSRange));
+    break;
+  case Instruction::Mul:
+    Result.markConstantRange(LHSRange.multiply(RHSRange));
+    break;
+  case Instruction::UDiv:
+    Result.markConstantRange(LHSRange.udiv(RHSRange));
+    break;
+  case Instruction::Shl:
+    Result.markConstantRange(LHSRange.shl(RHSRange));
+    break;
+  case Instruction::LShr:
+    Result.markConstantRange(LHSRange.lshr(RHSRange));
+    break;
+  case Instruction::Trunc:
+    Result.markConstantRange(LHSRange.truncate(ResultTy->getBitWidth()));
+    break;
+  case Instruction::SExt:
+    Result.markConstantRange(LHSRange.signExtend(ResultTy->getBitWidth()));
+    break;
+  case Instruction::ZExt:
+    Result.markConstantRange(LHSRange.zeroExtend(ResultTy->getBitWidth()));
+    break;
+  case Instruction::BitCast:
+    Result.markConstantRange(LHSRange);
+    break;
+  case Instruction::And:
+    Result.markConstantRange(LHSRange.binaryAnd(RHSRange));
+    break;
+  case Instruction::Or:
+    Result.markConstantRange(LHSRange.binaryOr(RHSRange));
+    break;
+  
+  // Unhandled instructions are overdefined.
+  default:
+    DEBUG(dbgs() << " compute BB '" << BB->getName()
+                 << "' - overdefined because inst def found.\n");
+    Result.markOverdefined();
+    break;
+  }
+  
+  BBLV = Result;
+  return true;
+}
+
+/// getEdgeValue - This method attempts to infer more complex 
+bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
+                                      BasicBlock *BBTo, LVILatticeVal &Result) {
+  // If already a constant, there is nothing to compute.
+  if (Constant *VC = dyn_cast<Constant>(Val)) {
+    Result = LVILatticeVal::get(VC);
+    return true;
+  }
+  
+  // TODO: Handle more complex conditionals.  If (v == 0 || v2 < 1) is false, we
+  // know that v != 0.
+  if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
+    // If this is a conditional branch and only one successor goes to BBTo, then
+    // we maybe able to infer something from the condition. 
+    if (BI->isConditional() &&
+        BI->getSuccessor(0) != BI->getSuccessor(1)) {
+      bool isTrueDest = BI->getSuccessor(0) == BBTo;
+      assert(BI->getSuccessor(!isTrueDest) == BBTo &&
+             "BBTo isn't a successor of BBFrom");
+      
+      // If V is the condition of the branch itself, then we know exactly what
+      // it is.
+      if (BI->getCondition() == Val) {
+        Result = LVILatticeVal::get(ConstantInt::get(
+                              Type::getInt1Ty(Val->getContext()), isTrueDest));
+        return true;
+      }
+      
+      // If the condition of the branch is an equality comparison, we may be
+      // able to infer the value.
+      ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition());
+      if (ICI && ICI->getOperand(0) == Val &&
+          isa<Constant>(ICI->getOperand(1))) {
+        if (ICI->isEquality()) {
+          // We know that V has the RHS constant if this is a true SETEQ or
+          // false SETNE. 
+          if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
+            Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
+          else
+            Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
+          return true;
+        }
+
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+          // Calculate the range of values that would satisfy the comparison.
+          ConstantRange CmpRange(CI->getValue(), CI->getValue()+1);
+          ConstantRange TrueValues =
+            ConstantRange::makeICmpRegion(ICI->getPredicate(), CmpRange);
+
+          // If we're interested in the false dest, invert the condition.
+          if (!isTrueDest) TrueValues = TrueValues.inverse();
+          
+          // Figure out the possible values of the query BEFORE this branch.  
+          if (!hasBlockValue(Val, BBFrom)) {
+            BlockValueStack.push(std::make_pair(BBFrom, Val));
+            return false;
+          }
+          
+          LVILatticeVal InBlock = getBlockValue(Val, BBFrom);
+          if (!InBlock.isConstantRange()) {
+            Result = LVILatticeVal::getRange(TrueValues);
+            return true;
+          }
+
+          // Find all potential values that satisfy both the input and output
+          // conditions.
+          ConstantRange PossibleValues =
+            TrueValues.intersectWith(InBlock.getConstantRange());
+
+          Result = LVILatticeVal::getRange(PossibleValues);
+          return true;
+        }
+      }
+    }
+  }
+
+  // If the edge was formed by a switch on the value, then we may know exactly
+  // what it is.
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) {
+    if (SI->getCondition() == Val) {
+      // We don't know anything in the default case.
+      if (SI->getDefaultDest() == BBTo) {
+        Result.markOverdefined();
+        return true;
+      }
+      
+      // We only know something if there is exactly one value that goes from
+      // BBFrom to BBTo.
+      unsigned NumEdges = 0;
+      ConstantInt *EdgeVal = 0;
+      for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+        if (SI->getSuccessor(i) != BBTo) continue;
+        if (NumEdges++) break;
+        EdgeVal = SI->getCaseValue(i);
+      }
+      assert(EdgeVal && "Missing successor?");
+      if (NumEdges == 1) {
+        Result = LVILatticeVal::get(EdgeVal);
+        return true;
+      }
+    }
+  }
+  
+  // Otherwise see if the value is known in the block.
+  if (hasBlockValue(Val, BBFrom)) {
+    Result = getBlockValue(Val, BBFrom);
+    return true;
+  }
+  BlockValueStack.push(std::make_pair(BBFrom, Val));
+  return false;
+}
+
+LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) {
+  DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
+        << BB->getName() << "'\n");
+  
+  BlockValueStack.push(std::make_pair(BB, V));
+  solve();
+  LVILatticeVal Result = getBlockValue(V, BB);
+
+  DEBUG(dbgs() << "  Result = " << Result << "\n");
+  return Result;
+}
+
+LVILatticeVal LazyValueInfoCache::
+getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) {
+  DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
+        << FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
+  
+  LVILatticeVal Result;
+  if (!getEdgeValue(V, FromBB, ToBB, Result)) {
+    solve();
+    bool WasFastQuery = getEdgeValue(V, FromBB, ToBB, Result);
+    (void)WasFastQuery;
+    assert(WasFastQuery && "More work to do after problem solved?");
+  }
+
+  DEBUG(dbgs() << "  Result = " << Result << "\n");
+  return Result;
+}
+
+void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+                                    BasicBlock *NewSucc) {
+  // When an edge in the graph has been threaded, values that we could not 
+  // determine a value for before (i.e. were marked overdefined) may be possible
+  // to solve now.  We do NOT try to proactively update these values.  Instead,
+  // we clear their entries from the cache, and allow lazy updating to recompute
+  // them when needed.
+  
+  // The updating process is fairly simple: we need to dropped cached info
+  // for all values that were marked overdefined in OldSucc, and for those same
+  // values in any successor of OldSucc (except NewSucc) in which they were
+  // also marked overdefined.
+  std::vector<BasicBlock*> worklist;
+  worklist.push_back(OldSucc);
+  
+  DenseSet<Value*> ClearSet;
+  for (DenseSet<OverDefinedPairTy>::iterator I = OverDefinedCache.begin(),
+       E = OverDefinedCache.end(); I != E; ++I) {
+    if (I->first == OldSucc)
+      ClearSet.insert(I->second);
+  }
+  
+  // Use a worklist to perform a depth-first search of OldSucc's successors.
+  // NOTE: We do not need a visited list since any blocks we have already
+  // visited will have had their overdefined markers cleared already, and we
+  // thus won't loop to their successors.
+  while (!worklist.empty()) {
+    BasicBlock *ToUpdate = worklist.back();
+    worklist.pop_back();
+    
+    // Skip blocks only accessible through NewSucc.
+    if (ToUpdate == NewSucc) continue;
+    
+    bool changed = false;
+    for (DenseSet<Value*>::iterator I = ClearSet.begin(), E = ClearSet.end();
+         I != E; ++I) {
+      // If a value was marked overdefined in OldSucc, and is here too...
+      DenseSet<OverDefinedPairTy>::iterator OI =
+        OverDefinedCache.find(std::make_pair(ToUpdate, *I));
+      if (OI == OverDefinedCache.end()) continue;
+
+      // Remove it from the caches.
+      ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(*I, this)];
+      ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate);
+
+      assert(CI != Entry.end() && "Couldn't find entry to update?");
+      Entry.erase(CI);
+      OverDefinedCache.erase(OI);
+
+      // If we removed anything, then we potentially need to update 
+      // blocks successors too.
+      changed = true;
+    }
+
+    if (!changed) continue;
+    
+    worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                            LazyValueInfo Impl
+//===----------------------------------------------------------------------===//
+
+/// getCache - This lazily constructs the LazyValueInfoCache.
+static LazyValueInfoCache &getCache(void *&PImpl) {
+  if (!PImpl)
+    PImpl = new LazyValueInfoCache();
+  return *static_cast<LazyValueInfoCache*>(PImpl);
+}
+
+bool LazyValueInfo::runOnFunction(Function &F) {
+  if (PImpl)
+    getCache(PImpl).clear();
+  
+  TD = getAnalysisIfAvailable<TargetData>();
+  // Fully lazy.
+  return false;
+}
+
+void LazyValueInfo::releaseMemory() {
+  // If the cache was allocated, free it.
+  if (PImpl) {
+    delete &getCache(PImpl);
+    PImpl = 0;
+  }
+}
+
+Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) {
+  LVILatticeVal Result = getCache(PImpl).getValueInBlock(V, BB);
+  
+  if (Result.isConstant())
+    return Result.getConstant();
+  if (Result.isConstantRange()) {
+    ConstantRange CR = Result.getConstantRange();
+    if (const APInt *SingleVal = CR.getSingleElement())
+      return ConstantInt::get(V->getContext(), *SingleVal);
+  }
+  return 0;
+}
+
+/// getConstantOnEdge - Determine whether the specified value is known to be a
+/// constant on the specified edge.  Return null if not.
+Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
+                                           BasicBlock *ToBB) {
+  LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
+  
+  if (Result.isConstant())
+    return Result.getConstant();
+  if (Result.isConstantRange()) {
+    ConstantRange CR = Result.getConstantRange();
+    if (const APInt *SingleVal = CR.getSingleElement())
+      return ConstantInt::get(V->getContext(), *SingleVal);
+  }
+  return 0;
+}
+
+/// getPredicateOnEdge - Determine whether the specified value comparison
+/// with a constant is known to be true or false on the specified CFG edge.
+/// Pred is a CmpInst predicate.
+LazyValueInfo::Tristate
+LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
+                                  BasicBlock *FromBB, BasicBlock *ToBB) {
+  LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
+  
+  // If we know the value is a constant, evaluate the conditional.
+  Constant *Res = 0;
+  if (Result.isConstant()) {
+    Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD);
+    if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
+      return ResCI->isZero() ? False : True;
+    return Unknown;
+  }
+  
+  if (Result.isConstantRange()) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(C);
+    if (!CI) return Unknown;
+    
+    ConstantRange CR = Result.getConstantRange();
+    if (Pred == ICmpInst::ICMP_EQ) {
+      if (!CR.contains(CI->getValue()))
+        return False;
+      
+      if (CR.isSingleElement() && CR.contains(CI->getValue()))
+        return True;
+    } else if (Pred == ICmpInst::ICMP_NE) {
+      if (!CR.contains(CI->getValue()))
+        return True;
+      
+      if (CR.isSingleElement() && CR.contains(CI->getValue()))
+        return False;
+    }
+    
+    // Handle more complex predicates.
+    ConstantRange TrueValues =
+        ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
+    if (TrueValues.contains(CR))
+      return True;
+    if (TrueValues.inverse().contains(CR))
+      return False;
+    return Unknown;
+  }
+  
+  if (Result.isNotConstant()) {
+    // If this is an equality comparison, we can try to fold it knowing that
+    // "V != C1".
+    if (Pred == ICmpInst::ICMP_EQ) {
+      // !C1 == C -> false iff C1 == C.
+      Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
+                                            Result.getNotConstant(), C, TD);
+      if (Res->isNullValue())
+        return False;
+    } else if (Pred == ICmpInst::ICMP_NE) {
+      // !C1 != C -> true iff C1 == C.
+      Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
+                                            Result.getNotConstant(), C, TD);
+      if (Res->isNullValue())
+        return True;
+    }
+    return Unknown;
+  }
+  
+  return Unknown;
+}
+
+void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+                               BasicBlock *NewSucc) {
+  if (PImpl) getCache(PImpl).threadEdge(PredBB, OldSucc, NewSucc);
+}
+
+void LazyValueInfo::eraseBlock(BasicBlock *BB) {
+  if (PImpl) getCache(PImpl).eraseBlock(BB);
+}
diff --git a/final/lib/Analysis/LibCallAliasAnalysis.cpp b/final/lib/Analysis/LibCallAliasAnalysis.cpp
new file mode 100644
index 00000000000..efb722bb97c
--- /dev/null
+++ b/final/lib/Analysis/LibCallAliasAnalysis.cpp
@@ -0,0 +1,137 @@
+//===- LibCallAliasAnalysis.cpp - Implement AliasAnalysis for libcalls ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LibCallAliasAnalysis class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LibCallAliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+  
+// Register this pass...
+char LibCallAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa",
+                   "LibCall Alias Analysis", false, true, false)
+
+FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) {
+  return new LibCallAliasAnalysis(LCI);
+}
+
+LibCallAliasAnalysis::~LibCallAliasAnalysis() {
+  delete LCI;
+}
+
+void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AliasAnalysis::getAnalysisUsage(AU);
+  AU.setPreservesAll();                         // Does not transform code
+}
+
+
+
+/// AnalyzeLibCallDetails - Given a call to a function with the specified
+/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call
+/// vs the specified pointer/size.
+AliasAnalysis::ModRefResult
+LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI,
+                                            ImmutableCallSite CS,
+                                            const Location &Loc) {
+  // If we have a function, check to see what kind of mod/ref effects it
+  // has.  Start by including any info globally known about the function.
+  AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior;
+  if (MRInfo == NoModRef) return MRInfo;
+  
+  // If that didn't tell us that the function is 'readnone', check to see
+  // if we have detailed info and if 'P' is any of the locations we know
+  // about.
+  const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails;
+  if (Details == 0)
+    return MRInfo;
+  
+  // If the details array is of the 'DoesNot' kind, we only know something if
+  // the pointer is a match for one of the locations in 'Details'.  If we find a
+  // match, we can prove some interactions cannot happen.
+  // 
+  if (FI->DetailsType == LibCallFunctionInfo::DoesNot) {
+    // Find out if the pointer refers to a known location.
+    for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
+      const LibCallLocationInfo &LocInfo =
+      LCI->getLocationInfo(Details[i].LocationID);
+      LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
+      if (Res != LibCallLocationInfo::Yes) continue;
+      
+      // If we find a match against a location that we 'do not' interact with,
+      // learn this info into MRInfo.
+      return ModRefResult(MRInfo & ~Details[i].MRInfo);
+    }
+    return MRInfo;
+  }
+  
+  // If the details are of the 'DoesOnly' sort, we know something if the pointer
+  // is a match for one of the locations in 'Details'.  Also, if we can prove
+  // that the pointers is *not* one of the locations in 'Details', we know that
+  // the call is NoModRef.
+  assert(FI->DetailsType == LibCallFunctionInfo::DoesOnly);
+  
+  // Find out if the pointer refers to a known location.
+  bool NoneMatch = true;
+  for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) {
+    const LibCallLocationInfo &LocInfo =
+    LCI->getLocationInfo(Details[i].LocationID);
+    LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc);
+    if (Res == LibCallLocationInfo::No) continue;
+    
+    // If we don't know if this pointer points to the location, then we have to
+    // assume it might alias in some case.
+    if (Res == LibCallLocationInfo::Unknown) {
+      NoneMatch = false;
+      continue;
+    }
+    
+    // If we know that this pointer definitely is pointing into the location,
+    // merge in this information.
+    return ModRefResult(MRInfo & Details[i].MRInfo);
+  }
+  
+  // If we found that the pointer is guaranteed to not match any of the
+  // locations in our 'DoesOnly' rule, then we know that the pointer must point
+  // to some other location.  Since the libcall doesn't mod/ref any other
+  // locations, return NoModRef.
+  if (NoneMatch)
+    return NoModRef;
+  
+  // Otherwise, return any other info gained so far.
+  return MRInfo;
+}
+
+// getModRefInfo - Check to see if the specified callsite can clobber the
+// specified memory object.
+//
+AliasAnalysis::ModRefResult
+LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+                                    const Location &Loc) {
+  ModRefResult MRInfo = ModRef;
+  
+  // If this is a direct call to a function that LCI knows about, get the
+  // information about the runtime function.
+  if (LCI) {
+    if (const Function *F = CS.getCalledFunction()) {
+      if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) {
+        MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, Loc));
+        if (MRInfo == NoModRef) return NoModRef;
+      }
+    }
+  }
+  
+  // The AliasAnalysis base class has some smarts, lets use them.
+  return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, Loc));
+}
diff --git a/final/lib/Analysis/LibCallSemantics.cpp b/final/lib/Analysis/LibCallSemantics.cpp
new file mode 100644
index 00000000000..81b0f46f374
--- /dev/null
+++ b/final/lib/Analysis/LibCallSemantics.cpp
@@ -0,0 +1,63 @@
+//===- LibCallSemantics.cpp - Describe library semantics ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements interfaces that can be used to describe language
+// specific runtime library interfaces (e.g. libc, libm, etc) to LLVM
+// optimizers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LibCallSemantics.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Function.h"
+using namespace llvm;
+
+/// getMap - This impl pointer in ~LibCallInfo is actually a StringMap.  This
+/// helper does the cast.
+static StringMap<const LibCallFunctionInfo*> *getMap(void *Ptr) {
+  return static_cast<StringMap<const LibCallFunctionInfo*> *>(Ptr);
+}
+
+LibCallInfo::~LibCallInfo() {
+  delete getMap(Impl);
+}
+
+const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const {
+  // Get location info on the first call.
+  if (NumLocations == 0)
+    NumLocations = getLocationInfo(Locations);
+  
+  assert(LocID < NumLocations && "Invalid location ID!");
+  return Locations[LocID];
+}
+
+
+/// getFunctionInfo - Return the LibCallFunctionInfo object corresponding to
+/// the specified function if we have it.  If not, return null.
+const LibCallFunctionInfo *
+LibCallInfo::getFunctionInfo(const Function *F) const {
+  StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl);
+  
+  /// If this is the first time we are querying for this info, lazily construct
+  /// the StringMap to index it.
+  if (Map == 0) {
+    Impl = Map = new StringMap<const LibCallFunctionInfo*>();
+    
+    const LibCallFunctionInfo *Array = getFunctionInfoArray();
+    if (Array == 0) return 0;
+    
+    // We now have the array of entries.  Populate the StringMap.
+    for (unsigned i = 0; Array[i].Name; ++i)
+      (*Map)[Array[i].Name] = Array+i;
+  }
+  
+  // Look up this function in the string map.
+  return Map->lookup(F->getName());
+}
+
diff --git a/final/lib/Analysis/Lint.cpp b/final/lib/Analysis/Lint.cpp
new file mode 100644
index 00000000000..fc7edc0525f
--- /dev/null
+++ b/final/lib/Analysis/Lint.cpp
@@ -0,0 +1,658 @@
+//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass statically checks for common and easily-identified constructs
+// which produce undefined or likely unintended behavior in LLVM IR.
+//
+// It is not a guarantee of correctness, in two ways. First, it isn't
+// comprehensive. There are checks which could be done statically which are
+// not yet implemented. Some of these are indicated by TODO comments, but
+// those aren't comprehensive either. Second, many conditions cannot be
+// checked statically. This pass does no dynamic instrumentation, so it
+// can't check for all possible problems.
+// 
+// Another limitation is that it assumes all code will be executed. A store
+// through a null pointer in a basic block which is never reached is harmless,
+// but this pass will warn about it anyway. This is the main reason why most
+// of these checks live here instead of in the Verifier pass.
+//
+// Optimization passes may make conditions that this pass checks for more or
+// less obvious. If an optimization pass appears to be introducing a warning,
+// it may be that the optimization pass is merely exposing an existing
+// condition in the code.
+// 
+// This code may be run before instcombine. In many cases, instcombine checks
+// for the same kinds of things and turns instructions with undefined behavior
+// into unreachable (or equivalent). Because of this, this pass makes some
+// effort to look through bitcasts and so on.
+// 
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Lint.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+namespace {
+  namespace MemRef {
+    static unsigned Read     = 1;
+    static unsigned Write    = 2;
+    static unsigned Callee   = 4;
+    static unsigned Branchee = 8;
+  }
+
+  class Lint : public FunctionPass, public InstVisitor<Lint> {
+    friend class InstVisitor<Lint>;
+
+    void visitFunction(Function &F);
+
+    void visitCallSite(CallSite CS);
+    void visitMemoryReference(Instruction &I, Value *Ptr,
+                              uint64_t Size, unsigned Align,
+                              const Type *Ty, unsigned Flags);
+
+    void visitCallInst(CallInst &I);
+    void visitInvokeInst(InvokeInst &I);
+    void visitReturnInst(ReturnInst &I);
+    void visitLoadInst(LoadInst &I);
+    void visitStoreInst(StoreInst &I);
+    void visitXor(BinaryOperator &I);
+    void visitSub(BinaryOperator &I);
+    void visitLShr(BinaryOperator &I);
+    void visitAShr(BinaryOperator &I);
+    void visitShl(BinaryOperator &I);
+    void visitSDiv(BinaryOperator &I);
+    void visitUDiv(BinaryOperator &I);
+    void visitSRem(BinaryOperator &I);
+    void visitURem(BinaryOperator &I);
+    void visitAllocaInst(AllocaInst &I);
+    void visitVAArgInst(VAArgInst &I);
+    void visitIndirectBrInst(IndirectBrInst &I);
+    void visitExtractElementInst(ExtractElementInst &I);
+    void visitInsertElementInst(InsertElementInst &I);
+    void visitUnreachableInst(UnreachableInst &I);
+
+    Value *findValue(Value *V, bool OffsetOk) const;
+    Value *findValueImpl(Value *V, bool OffsetOk,
+                         SmallPtrSet<Value *, 4> &Visited) const;
+
+  public:
+    Module *Mod;
+    AliasAnalysis *AA;
+    DominatorTree *DT;
+    TargetData *TD;
+
+    std::string Messages;
+    raw_string_ostream MessagesStr;
+
+    static char ID; // Pass identification, replacement for typeid
+    Lint() : FunctionPass(ID), MessagesStr(Messages) {
+      initializeLintPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<DominatorTree>();
+    }
+    virtual void print(raw_ostream &O, const Module *M) const {}
+
+    void WriteValue(const Value *V) {
+      if (!V) return;
+      if (isa<Instruction>(V)) {
+        MessagesStr << *V << '\n';
+      } else {
+        WriteAsOperand(MessagesStr, V, true, Mod);
+        MessagesStr << '\n';
+      }
+    }
+
+    // CheckFailed - A check failed, so print out the condition and the message
+    // that failed.  This provides a nice place to put a breakpoint if you want
+    // to see why something is not correct.
+    void CheckFailed(const Twine &Message,
+                     const Value *V1 = 0, const Value *V2 = 0,
+                     const Value *V3 = 0, const Value *V4 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteValue(V1);
+      WriteValue(V2);
+      WriteValue(V3);
+      WriteValue(V4);
+    }
+  };
+}
+
+char Lint::ID = 0;
+INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
+                      false, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
+                    false, true)
+
+// Assert - We know that cond should be true, if not print an error message.
+#define Assert(C, M) \
+    do { if (!(C)) { CheckFailed(M); return; } } while (0)
+#define Assert1(C, M, V1) \
+    do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
+#define Assert2(C, M, V1, V2) \
+    do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
+#define Assert3(C, M, V1, V2, V3) \
+    do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
+#define Assert4(C, M, V1, V2, V3, V4) \
+    do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+
+// Lint::run - This is the main Analysis entry point for a
+// function.
+//
+bool Lint::runOnFunction(Function &F) {
+  Mod = F.getParent();
+  AA = &getAnalysis<AliasAnalysis>();
+  DT = &getAnalysis<DominatorTree>();
+  TD = getAnalysisIfAvailable<TargetData>();
+  visit(F);
+  dbgs() << MessagesStr.str();
+  Messages.clear();
+  return false;
+}
+
+void Lint::visitFunction(Function &F) {
+  // This isn't undefined behavior, it's just a little unusual, and it's a
+  // fairly common mistake to neglect to name a function.
+  Assert1(F.hasName() || F.hasLocalLinkage(),
+          "Unusual: Unnamed function with non-local linkage", &F);
+
+  // TODO: Check for irreducible control flow.
+}
+
+void Lint::visitCallSite(CallSite CS) {
+  Instruction &I = *CS.getInstruction();
+  Value *Callee = CS.getCalledValue();
+
+  visitMemoryReference(I, Callee, AliasAnalysis::UnknownSize,
+                       0, 0, MemRef::Callee);
+
+  if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) {
+    Assert1(CS.getCallingConv() == F->getCallingConv(),
+            "Undefined behavior: Caller and callee calling convention differ",
+            &I);
+
+    const FunctionType *FT = F->getFunctionType();
+    unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+
+    Assert1(FT->isVarArg() ?
+              FT->getNumParams() <= NumActualArgs :
+              FT->getNumParams() == NumActualArgs,
+            "Undefined behavior: Call argument count mismatches callee "
+            "argument count", &I);
+
+    Assert1(FT->getReturnType() == I.getType(),
+            "Undefined behavior: Call return type mismatches "
+            "callee return type", &I);
+
+    // Check argument types (in case the callee was casted) and attributes.
+    // TODO: Verify that caller and callee attributes are compatible.
+    Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
+    CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+    for (; AI != AE; ++AI) {
+      Value *Actual = *AI;
+      if (PI != PE) {
+        Argument *Formal = PI++;
+        Assert1(Formal->getType() == Actual->getType(),
+                "Undefined behavior: Call argument type mismatches "
+                "callee parameter type", &I);
+
+        // Check that noalias arguments don't alias other arguments. This is
+        // not fully precise because we don't know the sizes of the dereferenced
+        // memory regions.
+        if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
+          for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI)
+            if (AI != BI && (*BI)->getType()->isPointerTy()) {
+              AliasAnalysis::AliasResult Result = AA->alias(*AI, *BI);
+              Assert1(Result != AliasAnalysis::MustAlias &&
+                      Result != AliasAnalysis::PartialAlias,
+                      "Unusual: noalias argument aliases another argument", &I);
+            }
+
+        // Check that an sret argument points to valid memory.
+        if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
+          const Type *Ty =
+            cast<PointerType>(Formal->getType())->getElementType();
+          visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
+                               TD ? TD->getABITypeAlignment(Ty) : 0,
+                               Ty, MemRef::Read | MemRef::Write);
+        }
+      }
+    }
+  }
+
+  if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
+    for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+         AI != AE; ++AI) {
+      Value *Obj = findValue(*AI, /*OffsetOk=*/true);
+      Assert1(!isa<AllocaInst>(Obj),
+              "Undefined behavior: Call with \"tail\" keyword references "
+              "alloca", &I);
+    }
+
+
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
+    switch (II->getIntrinsicID()) {
+    default: break;
+
+    // TODO: Check more intrinsics
+
+    case Intrinsic::memcpy: {
+      MemCpyInst *MCI = cast<MemCpyInst>(&I);
+      // TODO: If the size is known, use it.
+      visitMemoryReference(I, MCI->getDest(), AliasAnalysis::UnknownSize,
+                           MCI->getAlignment(), 0,
+                           MemRef::Write);
+      visitMemoryReference(I, MCI->getSource(), AliasAnalysis::UnknownSize,
+                           MCI->getAlignment(), 0,
+                           MemRef::Read);
+
+      // Check that the memcpy arguments don't overlap. The AliasAnalysis API
+      // isn't expressive enough for what we really want to do. Known partial
+      // overlap is not distinguished from the case where nothing is known.
+      uint64_t Size = 0;
+      if (const ConstantInt *Len =
+            dyn_cast<ConstantInt>(findValue(MCI->getLength(),
+                                            /*OffsetOk=*/false)))
+        if (Len->getValue().isIntN(32))
+          Size = Len->getValue().getZExtValue();
+      Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
+              AliasAnalysis::MustAlias,
+              "Undefined behavior: memcpy source and destination overlap", &I);
+      break;
+    }
+    case Intrinsic::memmove: {
+      MemMoveInst *MMI = cast<MemMoveInst>(&I);
+      // TODO: If the size is known, use it.
+      visitMemoryReference(I, MMI->getDest(), AliasAnalysis::UnknownSize,
+                           MMI->getAlignment(), 0,
+                           MemRef::Write);
+      visitMemoryReference(I, MMI->getSource(), AliasAnalysis::UnknownSize,
+                           MMI->getAlignment(), 0,
+                           MemRef::Read);
+      break;
+    }
+    case Intrinsic::memset: {
+      MemSetInst *MSI = cast<MemSetInst>(&I);
+      // TODO: If the size is known, use it.
+      visitMemoryReference(I, MSI->getDest(), AliasAnalysis::UnknownSize,
+                           MSI->getAlignment(), 0,
+                           MemRef::Write);
+      break;
+    }
+
+    case Intrinsic::vastart:
+      Assert1(I.getParent()->getParent()->isVarArg(),
+              "Undefined behavior: va_start called in a non-varargs function",
+              &I);
+
+      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+                           0, 0, MemRef::Read | MemRef::Write);
+      break;
+    case Intrinsic::vacopy:
+      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+                           0, 0, MemRef::Write);
+      visitMemoryReference(I, CS.getArgument(1), AliasAnalysis::UnknownSize,
+                           0, 0, MemRef::Read);
+      break;
+    case Intrinsic::vaend:
+      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+                           0, 0, MemRef::Read | MemRef::Write);
+      break;
+
+    case Intrinsic::stackrestore:
+      // Stackrestore doesn't read or write memory, but it sets the
+      // stack pointer, which the compiler may read from or write to
+      // at any time, so check it for both readability and writeability.
+      visitMemoryReference(I, CS.getArgument(0), AliasAnalysis::UnknownSize,
+                           0, 0, MemRef::Read | MemRef::Write);
+      break;
+    }
+}
+
+void Lint::visitCallInst(CallInst &I) {
+  return visitCallSite(&I);
+}
+
+void Lint::visitInvokeInst(InvokeInst &I) {
+  return visitCallSite(&I);
+}
+
+void Lint::visitReturnInst(ReturnInst &I) {
+  Function *F = I.getParent()->getParent();
+  Assert1(!F->doesNotReturn(),
+          "Unusual: Return statement in function with noreturn attribute",
+          &I);
+
+  if (Value *V = I.getReturnValue()) {
+    Value *Obj = findValue(V, /*OffsetOk=*/true);
+    Assert1(!isa<AllocaInst>(Obj),
+            "Unusual: Returning alloca value", &I);
+  }
+}
+
+// TODO: Check that the reference is in bounds.
+// TODO: Check readnone/readonly function attributes.
+void Lint::visitMemoryReference(Instruction &I,
+                                Value *Ptr, uint64_t Size, unsigned Align,
+                                const Type *Ty, unsigned Flags) {
+  // If no memory is being referenced, it doesn't matter if the pointer
+  // is valid.
+  if (Size == 0)
+    return;
+
+  Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
+  Assert1(!isa<ConstantPointerNull>(UnderlyingObject),
+          "Undefined behavior: Null pointer dereference", &I);
+  Assert1(!isa<UndefValue>(UnderlyingObject),
+          "Undefined behavior: Undef pointer dereference", &I);
+  Assert1(!isa<ConstantInt>(UnderlyingObject) ||
+          !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
+          "Unusual: All-ones pointer dereference", &I);
+  Assert1(!isa<ConstantInt>(UnderlyingObject) ||
+          !cast<ConstantInt>(UnderlyingObject)->isOne(),
+          "Unusual: Address one pointer dereference", &I);
+
+  if (Flags & MemRef::Write) {
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
+      Assert1(!GV->isConstant(),
+              "Undefined behavior: Write to read-only memory", &I);
+    Assert1(!isa<Function>(UnderlyingObject) &&
+            !isa<BlockAddress>(UnderlyingObject),
+            "Undefined behavior: Write to text section", &I);
+  }
+  if (Flags & MemRef::Read) {
+    Assert1(!isa<Function>(UnderlyingObject),
+            "Unusual: Load from function body", &I);
+    Assert1(!isa<BlockAddress>(UnderlyingObject),
+            "Undefined behavior: Load from block address", &I);
+  }
+  if (Flags & MemRef::Callee) {
+    Assert1(!isa<BlockAddress>(UnderlyingObject),
+            "Undefined behavior: Call to block address", &I);
+  }
+  if (Flags & MemRef::Branchee) {
+    Assert1(!isa<Constant>(UnderlyingObject) ||
+            isa<BlockAddress>(UnderlyingObject),
+            "Undefined behavior: Branch to non-blockaddress", &I);
+  }
+
+  if (TD) {
+    if (Align == 0 && Ty) Align = TD->getABITypeAlignment(Ty);
+
+    if (Align != 0) {
+      unsigned BitWidth = TD->getTypeSizeInBits(Ptr->getType());
+      APInt Mask = APInt::getAllOnesValue(BitWidth),
+                   KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      ComputeMaskedBits(Ptr, Mask, KnownZero, KnownOne, TD);
+      Assert1(!(KnownOne & APInt::getLowBitsSet(BitWidth, Log2_32(Align))),
+              "Undefined behavior: Memory reference address is misaligned", &I);
+    }
+  }
+}
+
+void Lint::visitLoadInst(LoadInst &I) {
+  visitMemoryReference(I, I.getPointerOperand(),
+                       AA->getTypeStoreSize(I.getType()), I.getAlignment(),
+                       I.getType(), MemRef::Read);
+}
+
+void Lint::visitStoreInst(StoreInst &I) {
+  visitMemoryReference(I, I.getPointerOperand(),
+                       AA->getTypeStoreSize(I.getOperand(0)->getType()),
+                       I.getAlignment(),
+                       I.getOperand(0)->getType(), MemRef::Write);
+}
+
+void Lint::visitXor(BinaryOperator &I) {
+  Assert1(!isa<UndefValue>(I.getOperand(0)) ||
+          !isa<UndefValue>(I.getOperand(1)),
+          "Undefined result: xor(undef, undef)", &I);
+}
+
+void Lint::visitSub(BinaryOperator &I) {
+  Assert1(!isa<UndefValue>(I.getOperand(0)) ||
+          !isa<UndefValue>(I.getOperand(1)),
+          "Undefined result: sub(undef, undef)", &I);
+}
+
+void Lint::visitLShr(BinaryOperator &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+            "Undefined result: Shift count out of range", &I);
+}
+
+void Lint::visitAShr(BinaryOperator &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+            "Undefined result: Shift count out of range", &I);
+}
+
+void Lint::visitShl(BinaryOperator &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
+    Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
+            "Undefined result: Shift count out of range", &I);
+}
+
+static bool isZero(Value *V, TargetData *TD) {
+  // Assume undef could be zero.
+  if (isa<UndefValue>(V)) return true;
+
+  unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+  APInt Mask = APInt::getAllOnesValue(BitWidth),
+               KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+  return KnownZero.isAllOnesValue();
+}
+
+void Lint::visitSDiv(BinaryOperator &I) {
+  Assert1(!isZero(I.getOperand(1), TD),
+          "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitUDiv(BinaryOperator &I) {
+  Assert1(!isZero(I.getOperand(1), TD),
+          "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitSRem(BinaryOperator &I) {
+  Assert1(!isZero(I.getOperand(1), TD),
+          "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitURem(BinaryOperator &I) {
+  Assert1(!isZero(I.getOperand(1), TD),
+          "Undefined behavior: Division by zero", &I);
+}
+
+void Lint::visitAllocaInst(AllocaInst &I) {
+  if (isa<ConstantInt>(I.getArraySize()))
+    // This isn't undefined behavior, it's just an obvious pessimization.
+    Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
+            "Pessimization: Static alloca outside of entry block", &I);
+
+  // TODO: Check for an unusual size (MSB set?)
+}
+
+void Lint::visitVAArgInst(VAArgInst &I) {
+  visitMemoryReference(I, I.getOperand(0), AliasAnalysis::UnknownSize, 0, 0,
+                       MemRef::Read | MemRef::Write);
+}
+
+void Lint::visitIndirectBrInst(IndirectBrInst &I) {
+  visitMemoryReference(I, I.getAddress(), AliasAnalysis::UnknownSize, 0, 0,
+                       MemRef::Branchee);
+
+  Assert1(I.getNumDestinations() != 0,
+          "Undefined behavior: indirectbr with no destinations", &I);
+}
+
+void Lint::visitExtractElementInst(ExtractElementInst &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
+                                        /*OffsetOk=*/false)))
+    Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
+            "Undefined result: extractelement index out of range", &I);
+}
+
+void Lint::visitInsertElementInst(InsertElementInst &I) {
+  if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(findValue(I.getOperand(2),
+                                        /*OffsetOk=*/false)))
+    Assert1(CI->getValue().ult(I.getType()->getNumElements()),
+            "Undefined result: insertelement index out of range", &I);
+}
+
+void Lint::visitUnreachableInst(UnreachableInst &I) {
+  // This isn't undefined behavior, it's merely suspicious.
+  Assert1(&I == I.getParent()->begin() ||
+          prior(BasicBlock::iterator(&I))->mayHaveSideEffects(),
+          "Unusual: unreachable immediately preceded by instruction without "
+          "side effects", &I);
+}
+
+/// findValue - Look through bitcasts and simple memory reference patterns
+/// to identify an equivalent, but more informative, value.  If OffsetOk
+/// is true, look through getelementptrs with non-zero offsets too.
+///
+/// Most analysis passes don't require this logic, because instcombine
+/// will simplify most of these kinds of things away. But it's a goal of
+/// this Lint pass to be useful even on non-optimized IR.
+Value *Lint::findValue(Value *V, bool OffsetOk) const {
+  SmallPtrSet<Value *, 4> Visited;
+  return findValueImpl(V, OffsetOk, Visited);
+}
+
+/// findValueImpl - Implementation helper for findValue.
+Value *Lint::findValueImpl(Value *V, bool OffsetOk,
+                           SmallPtrSet<Value *, 4> &Visited) const {
+  // Detect self-referential values.
+  if (!Visited.insert(V))
+    return UndefValue::get(V->getType());
+
+  // TODO: Look through sext or zext cast, when the result is known to
+  // be interpreted as signed or unsigned, respectively.
+  // TODO: Look through eliminable cast pairs.
+  // TODO: Look through calls with unique return values.
+  // TODO: Look through vector insert/extract/shuffle.
+  V = OffsetOk ? GetUnderlyingObject(V, TD) : V->stripPointerCasts();
+  if (LoadInst *L = dyn_cast<LoadInst>(V)) {
+    BasicBlock::iterator BBI = L;
+    BasicBlock *BB = L->getParent();
+    SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
+    for (;;) {
+      if (!VisitedBlocks.insert(BB)) break;
+      if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(),
+                                              BB, BBI, 6, AA))
+        return findValueImpl(U, OffsetOk, Visited);
+      if (BBI != BB->begin()) break;
+      BB = BB->getUniquePredecessor();
+      if (!BB) break;
+      BBI = BB->end();
+    }
+  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    if (Value *W = PN->hasConstantValue())
+      if (W != V)
+        return findValueImpl(W, OffsetOk, Visited);
+  } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
+    if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) :
+                            Type::getInt64Ty(V->getContext())))
+      return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
+  } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
+    if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
+                                     Ex->idx_begin(),
+                                     Ex->idx_end()))
+      if (W != V)
+        return findValueImpl(W, OffsetOk, Visited);
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    // Same as above, but for ConstantExpr instead of Instruction.
+    if (Instruction::isCast(CE->getOpcode())) {
+      if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
+                               CE->getOperand(0)->getType(),
+                               CE->getType(),
+                               TD ? TD->getIntPtrType(V->getContext()) :
+                                    Type::getInt64Ty(V->getContext())))
+        return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
+    } else if (CE->getOpcode() == Instruction::ExtractValue) {
+      const SmallVector<unsigned, 4> &Indices = CE->getIndices();
+      if (Value *W = FindInsertedValue(CE->getOperand(0),
+                                       Indices.begin(),
+                                       Indices.end()))
+        if (W != V)
+          return findValueImpl(W, OffsetOk, Visited);
+    }
+  }
+
+  // As a last resort, try SimplifyInstruction or constant folding.
+  if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+    if (Value *W = SimplifyInstruction(Inst, TD, DT))
+      return findValueImpl(W, OffsetOk, Visited);
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (Value *W = ConstantFoldConstantExpression(CE, TD))
+      if (W != V)
+        return findValueImpl(W, OffsetOk, Visited);
+  }
+
+  return V;
+}
+
+//===----------------------------------------------------------------------===//
+//  Implement the public interfaces to this file...
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createLintPass() {
+  return new Lint();
+}
+
+/// lintFunction - Check a function for errors, printing messages on stderr.
+///
+void llvm::lintFunction(const Function &f) {
+  Function &F = const_cast<Function&>(f);
+  assert(!F.isDeclaration() && "Cannot lint external functions");
+
+  FunctionPassManager FPM(F.getParent());
+  Lint *V = new Lint();
+  FPM.add(V);
+  FPM.run(F);
+}
+
+/// lintModule - Check a module for errors, printing messages on stderr.
+///
+void llvm::lintModule(const Module &M) {
+  PassManager PM;
+  Lint *V = new Lint();
+  PM.add(V);
+  PM.run(const_cast<Module&>(M));
+}
diff --git a/final/lib/Analysis/Loads.cpp b/final/lib/Analysis/Loads.cpp
new file mode 100644
index 00000000000..2ea27fb62fc
--- /dev/null
+++ b/final/lib/Analysis/Loads.cpp
@@ -0,0 +1,235 @@
+//===- Loads.cpp - Local load analysis ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines simple local analyses for load instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+using namespace llvm;
+
+/// AreEquivalentAddressValues - Test if A and B will obviously have the same
+/// value. This includes recognizing that %t0 and %t1 will have the same
+/// value in code like this:
+///   %t0 = getelementptr \@a, 0, 3
+///   store i32 0, i32* %t0
+///   %t1 = getelementptr \@a, 0, 3
+///   %t2 = load i32* %t1
+///
+static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
+  // Test if the values are trivially equivalent.
+  if (A == B) return true;
+  
+  // Test if the values come from identical arithmetic instructions.
+  // Use isIdenticalToWhenDefined instead of isIdenticalTo because
+  // this function is only used when one address use dominates the
+  // other, which means that they'll always either have the same
+  // value or one of them will have an undefined value.
+  if (isa<BinaryOperator>(A) || isa<CastInst>(A) ||
+      isa<PHINode>(A) || isa<GetElementPtrInst>(A))
+    if (const Instruction *BI = dyn_cast<Instruction>(B))
+      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
+        return true;
+  
+  // Otherwise they may not be equivalent.
+  return false;
+}
+
+/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and
+/// bitcasts to get back to the underlying object being addressed, keeping
+/// track of the offset in bytes from the GEPs relative to the result.
+/// This is closely related to GetUnderlyingObject but is located
+/// here to avoid making VMCore depend on TargetData.
+static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD,
+                                            uint64_t &ByteOffset,
+                                            unsigned MaxLookup = 6) {
+  if (!V->getType()->isPointerTy())
+    return V;
+  for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      if (!GEP->hasAllConstantIndices())
+        return V;
+      SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end());
+      ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(),
+                                         &Indices[0], Indices.size());
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        return V;
+      V = GA->getAliasee();
+    } else {
+      return V;
+    }
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+  }
+  return V;
+}
+
+/// isSafeToLoadUnconditionally - Return true if we know that executing a load
+/// from this value cannot trap.  If it is not obviously safe to load from the
+/// specified pointer, we do a quick local scan of the basic block containing
+/// ScanFrom, to determine if the address is already accessed.
+bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
+                                       unsigned Align, const TargetData *TD) {
+  uint64_t ByteOffset = 0;
+  Value *Base = V;
+  if (TD)
+    Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset);
+
+  const Type *BaseType = 0;
+  unsigned BaseAlign = 0;
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
+    // An alloca is safe to load from as load as it is suitably aligned.
+    BaseType = AI->getAllocatedType();
+    BaseAlign = AI->getAlignment();
+  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Base)) {
+    // Global variables are safe to load from but their size cannot be
+    // guaranteed if they are overridden.
+    if (!isa<GlobalAlias>(GV) && !GV->mayBeOverridden()) {
+      BaseType = GV->getType()->getElementType();
+      BaseAlign = GV->getAlignment();
+    }
+  }
+
+  if (BaseType && BaseType->isSized()) {
+    if (TD && BaseAlign == 0)
+      BaseAlign = TD->getPrefTypeAlignment(BaseType);
+
+    if (Align <= BaseAlign) {
+      if (!TD)
+        return true; // Loading directly from an alloca or global is OK.
+
+      // Check if the load is within the bounds of the underlying object.
+      const PointerType *AddrTy = cast<PointerType>(V->getType());
+      uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType());
+      if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) &&
+          (Align == 0 || (ByteOffset % Align) == 0))
+        return true;
+    }
+  }
+
+  // Otherwise, be a little bit aggressive by scanning the local block where we
+  // want to check to see if the pointer is already being loaded or stored
+  // from/to.  If so, the previous load or store would have already trapped,
+  // so there is no harm doing an extra load (also, CSE will later eliminate
+  // the load entirely).
+  BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+
+  while (BBI != E) {
+    --BBI;
+
+    // If we see a free or a call which may write to memory (i.e. which might do
+    // a free) the pointer could be marked invalid.
+    if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&
+        !isa<DbgInfoIntrinsic>(BBI))
+      return false;
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+      if (AreEquivalentAddressValues(LI->getOperand(0), V)) return true;
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+      if (AreEquivalentAddressValues(SI->getOperand(1), V)) return true;
+    }
+  }
+  return false;
+}
+
+/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the
+/// instruction before ScanFrom) checking to see if we have the value at the
+/// memory address *Ptr locally available within a small number of instructions.
+/// If the value is available, return it.
+///
+/// If not, return the iterator for the last validated instruction that the 
+/// value would be live through.  If we scanned the entire block and didn't find
+/// something that invalidates *Ptr or provides it, ScanFrom would be left at
+/// begin() and this returns null.  ScanFrom could also be left 
+///
+/// MaxInstsToScan specifies the maximum instructions to scan in the block.  If
+/// it is set to 0, it will scan the whole block. You can also optionally
+/// specify an alias analysis implementation, which makes this more precise.
+Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
+                                      BasicBlock::iterator &ScanFrom,
+                                      unsigned MaxInstsToScan,
+                                      AliasAnalysis *AA) {
+  if (MaxInstsToScan == 0) MaxInstsToScan = ~0U;
+
+  // If we're using alias analysis to disambiguate get the size of *Ptr.
+  uint64_t AccessSize = 0;
+  if (AA) {
+    const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
+    AccessSize = AA->getTypeStoreSize(AccessTy);
+  }
+  
+  while (ScanFrom != ScanBB->begin()) {
+    // We must ignore debug info directives when counting (otherwise they
+    // would affect codegen).
+    Instruction *Inst = --ScanFrom;
+    if (isa<DbgInfoIntrinsic>(Inst))
+      continue;
+
+    // Restore ScanFrom to expected value in case next test succeeds
+    ScanFrom++;
+   
+    // Don't scan huge blocks.
+    if (MaxInstsToScan-- == 0) return 0;
+    
+    --ScanFrom;
+    // If this is a load of Ptr, the loaded value is available.
+    if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+      if (AreEquivalentAddressValues(LI->getOperand(0), Ptr))
+        return LI;
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      // If this is a store through Ptr, the value is available!
+      if (AreEquivalentAddressValues(SI->getOperand(1), Ptr))
+        return SI->getOperand(0);
+      
+      // If Ptr is an alloca and this is a store to a different alloca, ignore
+      // the store.  This is a trivial form of alias analysis that is important
+      // for reg2mem'd code.
+      if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) &&
+          (isa<AllocaInst>(SI->getOperand(1)) ||
+           isa<GlobalVariable>(SI->getOperand(1))))
+        continue;
+      
+      // If we have alias analysis and it says the store won't modify the loaded
+      // value, ignore the store.
+      if (AA &&
+          (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+        continue;
+      
+      // Otherwise the store that may or may not alias the pointer, bail out.
+      ++ScanFrom;
+      return 0;
+    }
+    
+    // If this is some other instruction that may clobber Ptr, bail out.
+    if (Inst->mayWriteToMemory()) {
+      // If alias analysis claims that it really won't modify the load,
+      // ignore it.
+      if (AA &&
+          (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+        continue;
+      
+      // May modify the pointer, bail out.
+      ++ScanFrom;
+      return 0;
+    }
+  }
+  
+  // Got to the start of the block, we didn't find it, but are done for this
+  // block.
+  return 0;
+}
diff --git a/final/lib/Analysis/LoopDependenceAnalysis.cpp b/final/lib/Analysis/LoopDependenceAnalysis.cpp
new file mode 100644
index 00000000000..c1afe8fbd61
--- /dev/null
+++ b/final/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -0,0 +1,358 @@
+//===- LoopDependenceAnalysis.cpp - LDA Implementation ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the (beginning) of an implementation of a loop dependence analysis
+// framework, which is used to detect dependences in memory accesses in loops.
+//
+// Please note that this is work in progress and the interface is subject to
+// change.
+//
+// TODO: adapt as implementation progresses.
+//
+// TODO: document lingo (pair, subscript, index)
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lda"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopDependenceAnalysis.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Instructions.h"
+#include "llvm/Operator.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+STATISTIC(NumAnswered,    "Number of dependence queries answered");
+STATISTIC(NumAnalysed,    "Number of distinct dependence pairs analysed");
+STATISTIC(NumDependent,   "Number of pairs with dependent accesses");
+STATISTIC(NumIndependent, "Number of pairs with independent accesses");
+STATISTIC(NumUnknown,     "Number of pairs with unknown accesses");
+
+LoopPass *llvm::createLoopDependenceAnalysisPass() {
+  return new LoopDependenceAnalysis();
+}
+
+INITIALIZE_PASS_BEGIN(LoopDependenceAnalysis, "lda",
+                "Loop Dependence Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LoopDependenceAnalysis, "lda",
+                "Loop Dependence Analysis", false, true)
+char LoopDependenceAnalysis::ID = 0;
+
+//===----------------------------------------------------------------------===//
+//                             Utility Functions
+//===----------------------------------------------------------------------===//
+
+static inline bool IsMemRefInstr(const Value *V) {
+  const Instruction *I = dyn_cast<const Instruction>(V);
+  return I && (I->mayReadFromMemory() || I->mayWriteToMemory());
+}
+
+static void GetMemRefInstrs(const Loop *L,
+                            SmallVectorImpl<Instruction*> &Memrefs) {
+  for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
+       b != be; ++b)
+    for (BasicBlock::iterator i = (*b)->begin(), ie = (*b)->end();
+         i != ie; ++i)
+      if (IsMemRefInstr(i))
+        Memrefs.push_back(i);
+}
+
+static bool IsLoadOrStoreInst(Value *I) {
+  return isa<LoadInst>(I) || isa<StoreInst>(I);
+}
+
+static Value *GetPointerOperand(Value *I) {
+  if (LoadInst *i = dyn_cast<LoadInst>(I))
+    return i->getPointerOperand();
+  if (StoreInst *i = dyn_cast<StoreInst>(I))
+    return i->getPointerOperand();
+  llvm_unreachable("Value is no load or store instruction!");
+  // Never reached.
+  return 0;
+}
+
+static AliasAnalysis::AliasResult UnderlyingObjectsAlias(AliasAnalysis *AA,
+                                                         const Value *A,
+                                                         const Value *B) {
+  const Value *aObj = GetUnderlyingObject(A);
+  const Value *bObj = GetUnderlyingObject(B);
+  return AA->alias(aObj, AA->getTypeStoreSize(aObj->getType()),
+                   bObj, AA->getTypeStoreSize(bObj->getType()));
+}
+
+static inline const SCEV *GetZeroSCEV(ScalarEvolution *SE) {
+  return SE->getConstant(Type::getInt32Ty(SE->getContext()), 0L);
+}
+
+//===----------------------------------------------------------------------===//
+//                             Dependence Testing
+//===----------------------------------------------------------------------===//
+
+bool LoopDependenceAnalysis::isDependencePair(const Value *A,
+                                              const Value *B) const {
+  return IsMemRefInstr(A) &&
+         IsMemRefInstr(B) &&
+         (cast<const Instruction>(A)->mayWriteToMemory() ||
+          cast<const Instruction>(B)->mayWriteToMemory());
+}
+
+bool LoopDependenceAnalysis::findOrInsertDependencePair(Value *A,
+                                                        Value *B,
+                                                        DependencePair *&P) {
+  void *insertPos = 0;
+  FoldingSetNodeID id;
+  id.AddPointer(A);
+  id.AddPointer(B);
+
+  P = Pairs.FindNodeOrInsertPos(id, insertPos);
+  if (P) return true;
+
+  P = new (PairAllocator) DependencePair(id, A, B);
+  Pairs.InsertNode(P, insertPos);
+  return false;
+}
+
+void LoopDependenceAnalysis::getLoops(const SCEV *S,
+                                      DenseSet<const Loop*>* Loops) const {
+  // Refactor this into an SCEVVisitor, if efficiency becomes a concern.
+  for (const Loop *L = this->L; L != 0; L = L->getParentLoop())
+    if (!SE->isLoopInvariant(S, L))
+      Loops->insert(L);
+}
+
+bool LoopDependenceAnalysis::isLoopInvariant(const SCEV *S) const {
+  DenseSet<const Loop*> loops;
+  getLoops(S, &loops);
+  return loops.empty();
+}
+
+bool LoopDependenceAnalysis::isAffine(const SCEV *S) const {
+  const SCEVAddRecExpr *rec = dyn_cast<SCEVAddRecExpr>(S);
+  return isLoopInvariant(S) || (rec && rec->isAffine());
+}
+
+bool LoopDependenceAnalysis::isZIVPair(const SCEV *A, const SCEV *B) const {
+  return isLoopInvariant(A) && isLoopInvariant(B);
+}
+
+bool LoopDependenceAnalysis::isSIVPair(const SCEV *A, const SCEV *B) const {
+  DenseSet<const Loop*> loops;
+  getLoops(A, &loops);
+  getLoops(B, &loops);
+  return loops.size() == 1;
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseZIV(const SCEV *A,
+                                   const SCEV *B,
+                                   Subscript *S) const {
+  assert(isZIVPair(A, B) && "Attempted to ZIV-test non-ZIV SCEVs!");
+  return A == B ? Dependent : Independent;
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseSIV(const SCEV *A,
+                                   const SCEV *B,
+                                   Subscript *S) const {
+  return Unknown; // TODO: Implement.
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseMIV(const SCEV *A,
+                                   const SCEV *B,
+                                   Subscript *S) const {
+  return Unknown; // TODO: Implement.
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analyseSubscript(const SCEV *A,
+                                         const SCEV *B,
+                                         Subscript *S) const {
+  DEBUG(dbgs() << "  Testing subscript: " << *A << ", " << *B << "\n");
+
+  if (A == B) {
+    DEBUG(dbgs() << "  -> [D] same SCEV\n");
+    return Dependent;
+  }
+
+  if (!isAffine(A) || !isAffine(B)) {
+    DEBUG(dbgs() << "  -> [?] not affine\n");
+    return Unknown;
+  }
+
+  if (isZIVPair(A, B))
+    return analyseZIV(A, B, S);
+
+  if (isSIVPair(A, B))
+    return analyseSIV(A, B, S);
+
+  return analyseMIV(A, B, S);
+}
+
+LoopDependenceAnalysis::DependenceResult
+LoopDependenceAnalysis::analysePair(DependencePair *P) const {
+  DEBUG(dbgs() << "Analysing:\n" << *P->A << "\n" << *P->B << "\n");
+
+  // We only analyse loads and stores but no possible memory accesses by e.g.
+  // free, call, or invoke instructions.
+  if (!IsLoadOrStoreInst(P->A) || !IsLoadOrStoreInst(P->B)) {
+    DEBUG(dbgs() << "--> [?] no load/store\n");
+    return Unknown;
+  }
+
+  Value *aPtr = GetPointerOperand(P->A);
+  Value *bPtr = GetPointerOperand(P->B);
+
+  switch (UnderlyingObjectsAlias(AA, aPtr, bPtr)) {
+  case AliasAnalysis::MayAlias:
+  case AliasAnalysis::PartialAlias:
+    // We can not analyse objects if we do not know about their aliasing.
+    DEBUG(dbgs() << "---> [?] may alias\n");
+    return Unknown;
+
+  case AliasAnalysis::NoAlias:
+    // If the objects noalias, they are distinct, accesses are independent.
+    DEBUG(dbgs() << "---> [I] no alias\n");
+    return Independent;
+
+  case AliasAnalysis::MustAlias:
+    break; // The underlying objects alias, test accesses for dependence.
+  }
+
+  const GEPOperator *aGEP = dyn_cast<GEPOperator>(aPtr);
+  const GEPOperator *bGEP = dyn_cast<GEPOperator>(bPtr);
+
+  if (!aGEP || !bGEP)
+    return Unknown;
+
+  // FIXME: Is filtering coupled subscripts necessary?
+
+  // Collect GEP operand pairs (FIXME: use GetGEPOperands from BasicAA), adding
+  // trailing zeroes to the smaller GEP, if needed.
+  typedef SmallVector<std::pair<const SCEV*, const SCEV*>, 4> GEPOpdPairsTy;
+  GEPOpdPairsTy opds;
+  for(GEPOperator::const_op_iterator aIdx = aGEP->idx_begin(),
+                                     aEnd = aGEP->idx_end(),
+                                     bIdx = bGEP->idx_begin(),
+                                     bEnd = bGEP->idx_end();
+      aIdx != aEnd && bIdx != bEnd;
+      aIdx += (aIdx != aEnd), bIdx += (bIdx != bEnd)) {
+    const SCEV* aSCEV = (aIdx != aEnd) ? SE->getSCEV(*aIdx) : GetZeroSCEV(SE);
+    const SCEV* bSCEV = (bIdx != bEnd) ? SE->getSCEV(*bIdx) : GetZeroSCEV(SE);
+    opds.push_back(std::make_pair(aSCEV, bSCEV));
+  }
+
+  if (!opds.empty() && opds[0].first != opds[0].second) {
+    // We cannot (yet) handle arbitrary GEP pointer offsets. By limiting
+    //
+    // TODO: this could be relaxed by adding the size of the underlying object
+    // to the first subscript. If we have e.g. (GEP x,0,i; GEP x,2,-i) and we
+    // know that x is a [100 x i8]*, we could modify the first subscript to be
+    // (i, 200-i) instead of (i, -i).
+    return Unknown;
+  }
+
+  // Now analyse the collected operand pairs (skipping the GEP ptr offsets).
+  for (GEPOpdPairsTy::const_iterator i = opds.begin() + 1, end = opds.end();
+       i != end; ++i) {
+    Subscript subscript;
+    DependenceResult result = analyseSubscript(i->first, i->second, &subscript);
+    if (result != Dependent) {
+      // We either proved independence or failed to analyse this subscript.
+      // Further subscripts will not improve the situation, so abort early.
+      return result;
+    }
+    P->Subscripts.push_back(subscript);
+  }
+  // We successfully analysed all subscripts but failed to prove independence.
+  return Dependent;
+}
+
+bool LoopDependenceAnalysis::depends(Value *A, Value *B) {
+  assert(isDependencePair(A, B) && "Values form no dependence pair!");
+  ++NumAnswered;
+
+  DependencePair *p;
+  if (!findOrInsertDependencePair(A, B, p)) {
+    // The pair is not cached, so analyse it.
+    ++NumAnalysed;
+    switch (p->Result = analysePair(p)) {
+    case Dependent:   ++NumDependent;   break;
+    case Independent: ++NumIndependent; break;
+    case Unknown:     ++NumUnknown;     break;
+    }
+  }
+  return p->Result != Independent;
+}
+
+//===----------------------------------------------------------------------===//
+//                   LoopDependenceAnalysis Implementation
+//===----------------------------------------------------------------------===//
+
+bool LoopDependenceAnalysis::runOnLoop(Loop *L, LPPassManager &) {
+  this->L = L;
+  AA = &getAnalysis<AliasAnalysis>();
+  SE = &getAnalysis<ScalarEvolution>();
+  return false;
+}
+
+void LoopDependenceAnalysis::releaseMemory() {
+  Pairs.clear();
+  PairAllocator.Reset();
+}
+
+void LoopDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<AliasAnalysis>();
+  AU.addRequiredTransitive<ScalarEvolution>();
+}
+
+static void PrintLoopInfo(raw_ostream &OS,
+                          LoopDependenceAnalysis *LDA, const Loop *L) {
+  if (!L->empty()) return; // ignore non-innermost loops
+
+  SmallVector<Instruction*, 8> memrefs;
+  GetMemRefInstrs(L, memrefs);
+
+  OS << "Loop at depth " << L->getLoopDepth() << ", header block: ";
+  WriteAsOperand(OS, L->getHeader(), false);
+  OS << "\n";
+
+  OS << "  Load/store instructions: " << memrefs.size() << "\n";
+  for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(),
+       end = memrefs.end(); x != end; ++x)
+    OS << "\t" << (x - memrefs.begin()) << ": " << **x << "\n";
+
+  OS << "  Pairwise dependence results:\n";
+  for (SmallVector<Instruction*, 8>::const_iterator x = memrefs.begin(),
+       end = memrefs.end(); x != end; ++x)
+    for (SmallVector<Instruction*, 8>::const_iterator y = x + 1;
+         y != end; ++y)
+      if (LDA->isDependencePair(*x, *y))
+        OS << "\t" << (x - memrefs.begin()) << "," << (y - memrefs.begin())
+           << ": " << (LDA->depends(*x, *y) ? "dependent" : "independent")
+           << "\n";
+}
+
+void LoopDependenceAnalysis::print(raw_ostream &OS, const Module*) const {
+  // TODO: doc why const_cast is safe
+  PrintLoopInfo(OS, const_cast<LoopDependenceAnalysis*>(this), this->L);
+}
diff --git a/final/lib/Analysis/LoopInfo.cpp b/final/lib/Analysis/LoopInfo.cpp
new file mode 100644
index 00000000000..05831402f40
--- /dev/null
+++ b/final/lib/Analysis/LoopInfo.cpp
@@ -0,0 +1,419 @@
+//===- LoopInfo.cpp - Natural Loop Calculator -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LoopInfo class that is used to identify natural loops
+// and determine the loop depth of various nodes of the CFG.  Note that the
+// loops identified may actually be several natural loops that share the same
+// header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <algorithm>
+using namespace llvm;
+
+// Always verify loopinfo if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyLoopInfo = true;
+#else
+static bool VerifyLoopInfo = false;
+#endif
+static cl::opt<bool,true>
+VerifyLoopInfoX("verify-loop-info", cl::location(VerifyLoopInfo),
+                cl::desc("Verify loop info (time consuming)"));
+
+char LoopInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true)
+
+//===----------------------------------------------------------------------===//
+// Loop implementation
+//
+
+/// isLoopInvariant - Return true if the specified value is loop invariant
+///
+bool Loop::isLoopInvariant(Value *V) const {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    return !contains(I);
+  return true;  // All non-instructions are loop invariant
+}
+
+/// hasLoopInvariantOperands - Return true if all the operands of the
+/// specified instruction are loop invariant. 
+bool Loop::hasLoopInvariantOperands(Instruction *I) const {
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (!isLoopInvariant(I->getOperand(i)))
+      return false;
+  
+  return true;
+}
+
+/// makeLoopInvariant - If the given value is an instruciton inside of the
+/// loop and it can be hoisted, do so to make it trivially loop-invariant.
+/// Return true if the value after any hoisting is loop invariant. This
+/// function can be used as a slightly more aggressive replacement for
+/// isLoopInvariant.
+///
+/// If InsertPt is specified, it is the point to hoist instructions to.
+/// If null, the terminator of the loop preheader is used.
+///
+bool Loop::makeLoopInvariant(Value *V, bool &Changed,
+                             Instruction *InsertPt) const {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    return makeLoopInvariant(I, Changed, InsertPt);
+  return true;  // All non-instructions are loop-invariant.
+}
+
+/// makeLoopInvariant - If the given instruction is inside of the
+/// loop and it can be hoisted, do so to make it trivially loop-invariant.
+/// Return true if the instruction after any hoisting is loop invariant. This
+/// function can be used as a slightly more aggressive replacement for
+/// isLoopInvariant.
+///
+/// If InsertPt is specified, it is the point to hoist instructions to.
+/// If null, the terminator of the loop preheader is used.
+///
+bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,
+                             Instruction *InsertPt) const {
+  // Test if the value is already loop-invariant.
+  if (isLoopInvariant(I))
+    return true;
+  if (!I->isSafeToSpeculativelyExecute())
+    return false;
+  if (I->mayReadFromMemory())
+    return false;
+  // Determine the insertion point, unless one was given.
+  if (!InsertPt) {
+    BasicBlock *Preheader = getLoopPreheader();
+    // Without a preheader, hoisting is not feasible.
+    if (!Preheader)
+      return false;
+    InsertPt = Preheader->getTerminator();
+  }
+  // Don't hoist instructions with loop-variant operands.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (!makeLoopInvariant(I->getOperand(i), Changed, InsertPt))
+      return false;
+  
+  // Hoist.
+  I->moveBefore(InsertPt);
+  Changed = true;
+  return true;
+}
+
+/// getCanonicalInductionVariable - Check to see if the loop has a canonical
+/// induction variable: an integer recurrence that starts at 0 and increments
+/// by one each time through the loop.  If so, return the phi node that
+/// corresponds to it.
+///
+/// The IndVarSimplify pass transforms loops to have a canonical induction
+/// variable.
+///
+PHINode *Loop::getCanonicalInductionVariable() const {
+  BasicBlock *H = getHeader();
+
+  BasicBlock *Incoming = 0, *Backedge = 0;
+  pred_iterator PI = pred_begin(H);
+  assert(PI != pred_end(H) &&
+         "Loop must have at least one backedge!");
+  Backedge = *PI++;
+  if (PI == pred_end(H)) return 0;  // dead loop
+  Incoming = *PI++;
+  if (PI != pred_end(H)) return 0;  // multiple backedges?
+
+  if (contains(Incoming)) {
+    if (contains(Backedge))
+      return 0;
+    std::swap(Incoming, Backedge);
+  } else if (!contains(Backedge))
+    return 0;
+
+  // Loop over all of the PHI nodes, looking for a canonical indvar.
+  for (BasicBlock::iterator I = H->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    if (ConstantInt *CI =
+        dyn_cast<ConstantInt>(PN->getIncomingValueForBlock(Incoming)))
+      if (CI->isNullValue())
+        if (Instruction *Inc =
+            dyn_cast<Instruction>(PN->getIncomingValueForBlock(Backedge)))
+          if (Inc->getOpcode() == Instruction::Add &&
+                Inc->getOperand(0) == PN)
+            if (ConstantInt *CI = dyn_cast<ConstantInt>(Inc->getOperand(1)))
+              if (CI->equalsInt(1))
+                return PN;
+  }
+  return 0;
+}
+
+/// getTripCount - Return a loop-invariant LLVM value indicating the number of
+/// times the loop will be executed.  Note that this means that the backedge
+/// of the loop executes N-1 times.  If the trip-count cannot be determined,
+/// this returns null.
+///
+/// The IndVarSimplify pass transforms loops to have a form that this
+/// function easily understands.
+///
+Value *Loop::getTripCount() const {
+  // Canonical loops will end with a 'cmp ne I, V', where I is the incremented
+  // canonical induction variable and V is the trip count of the loop.
+  PHINode *IV = getCanonicalInductionVariable();
+  if (IV == 0 || IV->getNumIncomingValues() != 2) return 0;
+
+  bool P0InLoop = contains(IV->getIncomingBlock(0));
+  Value *Inc = IV->getIncomingValue(!P0InLoop);
+  BasicBlock *BackedgeBlock = IV->getIncomingBlock(!P0InLoop);
+
+  if (BranchInst *BI = dyn_cast<BranchInst>(BackedgeBlock->getTerminator()))
+    if (BI->isConditional()) {
+      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
+        if (ICI->getOperand(0) == Inc) {
+          if (BI->getSuccessor(0) == getHeader()) {
+            if (ICI->getPredicate() == ICmpInst::ICMP_NE)
+              return ICI->getOperand(1);
+          } else if (ICI->getPredicate() == ICmpInst::ICMP_EQ) {
+            return ICI->getOperand(1);
+          }
+        }
+      }
+    }
+
+  return 0;
+}
+
+/// getSmallConstantTripCount - Returns the trip count of this loop as a
+/// normal unsigned value, if possible. Returns 0 if the trip count is unknown
+/// or not constant. Will also return 0 if the trip count is very large
+/// (>= 2^32)
+unsigned Loop::getSmallConstantTripCount() const {
+  Value* TripCount = this->getTripCount();
+  if (TripCount) {
+    if (ConstantInt *TripCountC = dyn_cast<ConstantInt>(TripCount)) {
+      // Guard against huge trip counts.
+      if (TripCountC->getValue().getActiveBits() <= 32) {
+        return (unsigned)TripCountC->getZExtValue();
+      }
+    }
+  }
+  return 0;
+}
+
+/// getSmallConstantTripMultiple - Returns the largest constant divisor of the
+/// trip count of this loop as a normal unsigned value, if possible. This
+/// means that the actual trip count is always a multiple of the returned
+/// value (don't forget the trip count could very well be zero as well!).
+///
+/// Returns 1 if the trip count is unknown or not guaranteed to be the
+/// multiple of a constant (which is also the case if the trip count is simply
+/// constant, use getSmallConstantTripCount for that case), Will also return 1
+/// if the trip count is very large (>= 2^32).
+unsigned Loop::getSmallConstantTripMultiple() const {
+  Value* TripCount = this->getTripCount();
+  // This will hold the ConstantInt result, if any
+  ConstantInt *Result = NULL;
+  if (TripCount) {
+    // See if the trip count is constant itself
+    Result = dyn_cast<ConstantInt>(TripCount);
+    // if not, see if it is a multiplication
+    if (!Result)
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TripCount)) {
+        switch (BO->getOpcode()) {
+        case BinaryOperator::Mul:
+          Result = dyn_cast<ConstantInt>(BO->getOperand(1));
+          break;
+        case BinaryOperator::Shl:
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1)))
+            if (CI->getValue().getActiveBits() <= 5)
+              return 1u << CI->getZExtValue();
+          break;
+        default:
+          break;
+        }
+      }
+  }
+  // Guard against huge trip counts.
+  if (Result && Result->getValue().getActiveBits() <= 32) {
+    return (unsigned)Result->getZExtValue();
+  } else {
+    return 1;
+  }
+}
+
+/// isLCSSAForm - Return true if the Loop is in LCSSA form
+bool Loop::isLCSSAForm(DominatorTree &DT) const {
+  // Sort the blocks vector so that we can use binary search to do quick
+  // lookups.
+  SmallPtrSet<BasicBlock*, 16> LoopBBs(block_begin(), block_end());
+
+  for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
+    BasicBlock *BB = *BI;
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I)
+      for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+           ++UI) {
+        User *U = *UI;
+        BasicBlock *UserBB = cast<Instruction>(U)->getParent();
+        if (PHINode *P = dyn_cast<PHINode>(U))
+          UserBB = P->getIncomingBlock(UI);
+
+        // Check the current block, as a fast-path, before checking whether
+        // the use is anywhere in the loop.  Most values are used in the same
+        // block they are defined in.  Also, blocks not reachable from the
+        // entry are special; uses in them don't need to go through PHIs.
+        if (UserBB != BB &&
+            !LoopBBs.count(UserBB) &&
+            DT.isReachableFromEntry(UserBB))
+          return false;
+      }
+  }
+
+  return true;
+}
+
+/// isLoopSimplifyForm - Return true if the Loop is in the form that
+/// the LoopSimplify form transforms loops to, which is sometimes called
+/// normal form.
+bool Loop::isLoopSimplifyForm() const {
+  // Normal-form loops have a preheader, a single backedge, and all of their
+  // exits have all their predecessors inside the loop.
+  return getLoopPreheader() && getLoopLatch() && hasDedicatedExits();
+}
+
+/// hasDedicatedExits - Return true if no exit block for the loop
+/// has a predecessor that is outside the loop.
+bool Loop::hasDedicatedExits() const {
+  // Sort the blocks vector so that we can use binary search to do quick
+  // lookups.
+  SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
+  // Each predecessor of each exit block of a normal loop is contained
+  // within the loop.
+  SmallVector<BasicBlock *, 4> ExitBlocks;
+  getExitBlocks(ExitBlocks);
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    for (pred_iterator PI = pred_begin(ExitBlocks[i]),
+         PE = pred_end(ExitBlocks[i]); PI != PE; ++PI)
+      if (!LoopBBs.count(*PI))
+        return false;
+  // All the requirements are met.
+  return true;
+}
+
+/// getUniqueExitBlocks - Return all unique successor blocks of this loop.
+/// These are the blocks _outside of the current loop_ which are branched to.
+/// This assumes that loop exits are in canonical form.
+///
+void
+Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
+  assert(hasDedicatedExits() &&
+         "getUniqueExitBlocks assumes the loop has canonical form exits!");
+
+  // Sort the blocks vector so that we can use binary search to do quick
+  // lookups.
+  SmallVector<BasicBlock *, 128> LoopBBs(block_begin(), block_end());
+  std::sort(LoopBBs.begin(), LoopBBs.end());
+
+  SmallVector<BasicBlock *, 32> switchExitBlocks;
+
+  for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) {
+
+    BasicBlock *current = *BI;
+    switchExitBlocks.clear();
+
+    for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) {
+      // If block is inside the loop then it is not a exit block.
+      if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I))
+        continue;
+
+      pred_iterator PI = pred_begin(*I);
+      BasicBlock *firstPred = *PI;
+
+      // If current basic block is this exit block's first predecessor
+      // then only insert exit block in to the output ExitBlocks vector.
+      // This ensures that same exit block is not inserted twice into
+      // ExitBlocks vector.
+      if (current != firstPred)
+        continue;
+
+      // If a terminator has more then two successors, for example SwitchInst,
+      // then it is possible that there are multiple edges from current block
+      // to one exit block.
+      if (std::distance(succ_begin(current), succ_end(current)) <= 2) {
+        ExitBlocks.push_back(*I);
+        continue;
+      }
+
+      // In case of multiple edges from current block to exit block, collect
+      // only one edge in ExitBlocks. Use switchExitBlocks to keep track of
+      // duplicate edges.
+      if (std::find(switchExitBlocks.begin(), switchExitBlocks.end(), *I)
+          == switchExitBlocks.end()) {
+        switchExitBlocks.push_back(*I);
+        ExitBlocks.push_back(*I);
+      }
+    }
+  }
+}
+
+/// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one
+/// block, return that block. Otherwise return null.
+BasicBlock *Loop::getUniqueExitBlock() const {
+  SmallVector<BasicBlock *, 8> UniqueExitBlocks;
+  getUniqueExitBlocks(UniqueExitBlocks);
+  if (UniqueExitBlocks.size() == 1)
+    return UniqueExitBlocks[0];
+  return 0;
+}
+
+void Loop::dump() const {
+  print(dbgs());
+}
+
+//===----------------------------------------------------------------------===//
+// LoopInfo implementation
+//
+bool LoopInfo::runOnFunction(Function &) {
+  releaseMemory();
+  LI.Calculate(getAnalysis<DominatorTree>().getBase());    // Update
+  return false;
+}
+
+void LoopInfo::verifyAnalysis() const {
+  // LoopInfo is a FunctionPass, but verifying every loop in the function
+  // each time verifyAnalysis is called is very expensive. The
+  // -verify-loop-info option can enable this. In order to perform some
+  // checking by default, LoopPass has been taught to call verifyLoop
+  // manually during loop pass sequences.
+
+  if (!VerifyLoopInfo) return;
+
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    assert(!(*I)->getParentLoop() && "Top-level loop has a parent!");
+    (*I)->verifyLoopNest();
+  }
+
+  // TODO: check BBMap consistency.
+}
+
+void LoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<DominatorTree>();
+}
+
+void LoopInfo::print(raw_ostream &OS, const Module*) const {
+  LI.print(OS);
+}
+
diff --git a/final/lib/Analysis/LoopPass.cpp b/final/lib/Analysis/LoopPass.cpp
new file mode 100644
index 00000000000..8e1a7bfef69
--- /dev/null
+++ b/final/lib/Analysis/LoopPass.cpp
@@ -0,0 +1,403 @@
+//===- LoopPass.cpp - Loop Pass and Loop Pass Manager ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements LoopPass and LPPassManager. All loop optimization
+// and transformation passes are derived from LoopPass. LPPassManager is
+// responsible for managing LoopPasses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Timer.h"
+using namespace llvm;
+
+namespace {
+
+/// PrintLoopPass - Print a Function corresponding to a Loop.
+///
+class PrintLoopPass : public LoopPass {
+private:
+  std::string Banner;
+  raw_ostream &Out;       // raw_ostream to print on.
+
+public:
+  static char ID;
+  PrintLoopPass(const std::string &B, raw_ostream &o)
+      : LoopPass(ID), Banner(B), Out(o) {}
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &) {
+    Out << Banner;
+    for (Loop::block_iterator b = L->block_begin(), be = L->block_end();
+         b != be;
+         ++b) {
+      (*b)->print(Out);
+    }
+    return false;
+  }
+};
+
+char PrintLoopPass::ID = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// LPPassManager
+//
+
+char LPPassManager::ID = 0;
+
+LPPassManager::LPPassManager(int Depth) 
+  : FunctionPass(ID), PMDataManager(Depth) { 
+  skipThisLoop = false;
+  redoThisLoop = false;
+  LI = NULL;
+  CurrentLoop = NULL;
+}
+
+/// Delete loop from the loop queue and loop hierarchy (LoopInfo). 
+void LPPassManager::deleteLoopFromQueue(Loop *L) {
+
+  if (Loop *ParentLoop = L->getParentLoop()) { // Not a top-level loop.
+    // Reparent all of the blocks in this loop.  Since BBLoop had a parent,
+    // they are now all in it.
+    for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 
+         I != E; ++I)
+      if (LI->getLoopFor(*I) == L)    // Don't change blocks in subloops.
+        LI->changeLoopFor(*I, ParentLoop);
+    
+    // Remove the loop from its parent loop.
+    for (Loop::iterator I = ParentLoop->begin(), E = ParentLoop->end();;
+         ++I) {
+      assert(I != E && "Couldn't find loop");
+      if (*I == L) {
+        ParentLoop->removeChildLoop(I);
+        break;
+      }
+    }
+    
+    // Move all subloops into the parent loop.
+    while (!L->empty())
+      ParentLoop->addChildLoop(L->removeChildLoop(L->end()-1));
+  } else {
+    // Reparent all of the blocks in this loop.  Since BBLoop had no parent,
+    // they no longer in a loop at all.
+    
+    for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+      // Don't change blocks in subloops.
+      if (LI->getLoopFor(L->getBlocks()[i]) == L) {
+        LI->removeBlock(L->getBlocks()[i]);
+        --i;
+      }
+    }
+
+    // Remove the loop from the top-level LoopInfo object.
+    for (LoopInfo::iterator I = LI->begin(), E = LI->end();; ++I) {
+      assert(I != E && "Couldn't find loop");
+      if (*I == L) {
+        LI->removeLoop(I);
+        break;
+      }
+    }
+
+    // Move all of the subloops to the top-level.
+    while (!L->empty())
+      LI->addTopLevelLoop(L->removeChildLoop(L->end()-1));
+  }
+
+  delete L;
+
+  // If L is current loop then skip rest of the passes and let
+  // runOnFunction remove L from LQ. Otherwise, remove L from LQ now
+  // and continue applying other passes on CurrentLoop.
+  if (CurrentLoop == L) {
+    skipThisLoop = true;
+    return;
+  }
+
+  for (std::deque<Loop *>::iterator I = LQ.begin(),
+         E = LQ.end(); I != E; ++I) {
+    if (*I == L) {
+      LQ.erase(I);
+      break;
+    }
+  }
+}
+
+// Inset loop into loop nest (LoopInfo) and loop queue (LQ).
+void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) {
+
+  assert (CurrentLoop != L && "Cannot insert CurrentLoop");
+
+  // Insert into loop nest
+  if (ParentLoop)
+    ParentLoop->addChildLoop(L);
+  else
+    LI->addTopLevelLoop(L);
+
+  insertLoopIntoQueue(L);
+}
+
+void LPPassManager::insertLoopIntoQueue(Loop *L) {
+  // Insert L into loop queue
+  if (L == CurrentLoop) 
+    redoLoop(L);
+  else if (!L->getParentLoop())
+    // This is top level loop. 
+    LQ.push_front(L);
+  else {
+    // Insert L after the parent loop.
+    for (std::deque<Loop *>::iterator I = LQ.begin(),
+           E = LQ.end(); I != E; ++I) {
+      if (*I == L->getParentLoop()) {
+        // deque does not support insert after.
+        ++I;
+        LQ.insert(I, 1, L);
+        break;
+      }
+    }
+  }
+}
+
+// Reoptimize this loop. LPPassManager will re-insert this loop into the
+// queue. This allows LoopPass to change loop nest for the loop. This
+// utility may send LPPassManager into infinite loops so use caution.
+void LPPassManager::redoLoop(Loop *L) {
+  assert (CurrentLoop == L && "Can redo only CurrentLoop");
+  redoThisLoop = true;
+}
+
+/// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for
+/// all loop passes.
+void LPPassManager::cloneBasicBlockSimpleAnalysis(BasicBlock *From, 
+                                                  BasicBlock *To, Loop *L) {
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+    LoopPass *LP = getContainedPass(Index);
+    LP->cloneBasicBlockAnalysis(From, To, L);
+  }
+}
+
+/// deleteSimpleAnalysisValue - Invoke deleteAnalysisValue hook for all passes.
+void LPPassManager::deleteSimpleAnalysisValue(Value *V, Loop *L) {
+  if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; 
+         ++BI) {
+      Instruction &I = *BI;
+      deleteSimpleAnalysisValue(&I, L);
+    }
+  }
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+    LoopPass *LP = getContainedPass(Index);
+    LP->deleteAnalysisValue(V, L);
+  }
+}
+
+
+// Recurse through all subloops and all loops  into LQ.
+static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
+  LQ.push_back(L);
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    addLoopIntoQueue(*I, LQ);
+}
+
+/// Pass Manager itself does not invalidate any analysis info.
+void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
+  // LPPassManager needs LoopInfo. In the long term LoopInfo class will 
+  // become part of LPPassManager.
+  Info.addRequired<LoopInfo>();
+  Info.setPreservesAll();
+}
+
+/// run - Execute all of the passes scheduled for execution.  Keep track of
+/// whether any of the passes modifies the function, and if so, return true.
+bool LPPassManager::runOnFunction(Function &F) {
+  LI = &getAnalysis<LoopInfo>();
+  bool Changed = false;
+
+  // Collect inherited analysis from Module level pass manager.
+  populateInheritedAnalysis(TPM->activeStack);
+
+  // Populate Loop Queue
+  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+    addLoopIntoQueue(*I, LQ);
+
+  if (LQ.empty()) // No loops, skip calling finalizers
+    return false;
+
+  // Initialization
+  for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end();
+       I != E; ++I) {
+    Loop *L = *I;
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+      LoopPass *P = getContainedPass(Index);
+      Changed |= P->doInitialization(L, *this);
+    }
+  }
+
+  // Walk Loops
+  while (!LQ.empty()) {
+      
+    CurrentLoop  = LQ.back();
+    skipThisLoop = false;
+    redoThisLoop = false;
+
+    // Run all passes on the current Loop.
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+      LoopPass *P = getContainedPass(Index);
+
+      dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
+                   CurrentLoop->getHeader()->getName());
+      dumpRequiredSet(P);
+
+      initializeAnalysisImpl(P);
+
+      {
+        PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
+        TimeRegion PassTimer(getPassTimer(P));
+
+        Changed |= P->runOnLoop(CurrentLoop, *this);
+      }
+
+      if (Changed)
+        dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
+                     skipThisLoop ? "<deleted>" :
+                                    CurrentLoop->getHeader()->getName());
+      dumpPreservedSet(P);
+
+      if (!skipThisLoop) {
+        // Manually check that this loop is still healthy. This is done
+        // instead of relying on LoopInfo::verifyLoop since LoopInfo
+        // is a function pass and it's really expensive to verify every
+        // loop in the function every time. That level of checking can be
+        // enabled with the -verify-loop-info option.
+        {
+          TimeRegion PassTimer(getPassTimer(LI));
+          CurrentLoop->verifyLoop();
+        }
+
+        // Then call the regular verifyAnalysis functions.
+        verifyPreservedAnalysis(P);
+      }
+
+      removeNotPreservedAnalysis(P);
+      recordAvailableAnalysis(P);
+      removeDeadPasses(P,
+                       skipThisLoop ? "<deleted>" :
+                                      CurrentLoop->getHeader()->getName(),
+                       ON_LOOP_MSG);
+
+      if (skipThisLoop)
+        // Do not run other passes on this loop.
+        break;
+    }
+    
+    // If the loop was deleted, release all the loop passes. This frees up
+    // some memory, and avoids trouble with the pass manager trying to call
+    // verifyAnalysis on them.
+    if (skipThisLoop)
+      for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {  
+        Pass *P = getContainedPass(Index);
+        freePass(P, "<deleted>", ON_LOOP_MSG);
+      }
+
+    // Pop the loop from queue after running all passes.
+    LQ.pop_back();
+    
+    if (redoThisLoop)
+      LQ.push_back(CurrentLoop);
+  }
+  
+  // Finalization
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    LoopPass *P = getContainedPass(Index);
+    Changed |= P->doFinalization();
+  }
+
+  return Changed;
+}
+
+/// Print passes managed by this manager
+void LPPassManager::dumpPassStructure(unsigned Offset) {
+  errs().indent(Offset*2) << "Loop Pass Manager\n";
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    Pass *P = getContainedPass(Index);
+    P->dumpPassStructure(Offset + 1);
+    dumpLastUses(P, Offset+1);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+// LoopPass
+
+Pass *LoopPass::createPrinterPass(raw_ostream &O,
+                                  const std::string &Banner) const {
+  return new PrintLoopPass(Banner, O);
+}
+
+// Check if this pass is suitable for the current LPPassManager, if
+// available. This pass P is not suitable for a LPPassManager if P
+// is not preserving higher level analysis info used by other
+// LPPassManager passes. In such case, pop LPPassManager from the
+// stack. This will force assignPassManager() to create new
+// LPPassManger as expected.
+void LoopPass::preparePassManager(PMStack &PMS) {
+
+  // Find LPPassManager 
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_LoopPassManager)
+    PMS.pop();
+
+  // If this pass is destroying high level information that is used
+  // by other passes that are managed by LPM then do not insert
+  // this pass in current LPM. Use new LPPassManager.
+  if (PMS.top()->getPassManagerType() == PMT_LoopPassManager &&
+      !PMS.top()->preserveHigherLevelAnalysis(this)) 
+    PMS.pop();
+}
+
+/// Assign pass manager to manage this pass.
+void LoopPass::assignPassManager(PMStack &PMS,
+                                 PassManagerType PreferredType) {
+  // Find LPPassManager 
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_LoopPassManager)
+    PMS.pop();
+
+  LPPassManager *LPPM;
+  if (PMS.top()->getPassManagerType() == PMT_LoopPassManager)
+    LPPM = (LPPassManager*)PMS.top();
+  else {
+    // Create new Loop Pass Manager if it does not exist. 
+    assert (!PMS.empty() && "Unable to create Loop Pass Manager");
+    PMDataManager *PMD = PMS.top();
+
+    // [1] Create new Call Graph Pass Manager
+    LPPM = new LPPassManager(PMD->getDepth() + 1);
+    LPPM->populateInheritedAnalysis(PMS);
+
+    // [2] Set up new manager's top level manager
+    PMTopLevelManager *TPM = PMD->getTopLevelManager();
+    TPM->addIndirectPassManager(LPPM);
+
+    // [3] Assign manager to manage this new manager. This may create
+    // and push new managers into PMS
+    Pass *P = LPPM->getAsPass();
+    TPM->schedulePass(P);
+
+    // [4] Push new manager into PMS
+    PMS.push(LPPM);
+  }
+
+  LPPM->add(this);
+}
diff --git a/final/lib/Analysis/Makefile b/final/lib/Analysis/Makefile
new file mode 100644
index 00000000000..4af6d350a64
--- /dev/null
+++ b/final/lib/Analysis/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Analysis/Makefile -------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMAnalysis
+DIRS = IPA
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Analysis/MemDepPrinter.cpp b/final/lib/Analysis/MemDepPrinter.cpp
new file mode 100644
index 00000000000..64d215c37cc
--- /dev/null
+++ b/final/lib/Analysis/MemDepPrinter.cpp
@@ -0,0 +1,167 @@
+//===- MemDepPrinter.cpp - Printer for MemoryDependenceAnalysis -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+using namespace llvm;
+
+namespace {
+  struct MemDepPrinter : public FunctionPass {
+    const Function *F;
+
+    typedef PointerIntPair<const Instruction *, 1> InstAndClobberFlag;
+    typedef std::pair<InstAndClobberFlag, const BasicBlock *> Dep;
+    typedef SmallSetVector<Dep, 4> DepSet;
+    typedef DenseMap<const Instruction *, DepSet> DepSetMap;
+    DepSetMap Deps;
+
+    static char ID; // Pass identifcation, replacement for typeid
+    MemDepPrinter() : FunctionPass(ID) {
+      initializeMemDepPrinterPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+
+    void print(raw_ostream &OS, const Module * = 0) const;
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequiredTransitive<AliasAnalysis>();
+      AU.addRequiredTransitive<MemoryDependenceAnalysis>();
+      AU.setPreservesAll();
+    }
+
+    virtual void releaseMemory() {
+      Deps.clear();
+      F = 0;
+    }
+  };
+}
+
+char MemDepPrinter::ID = 0;
+INITIALIZE_PASS_BEGIN(MemDepPrinter, "print-memdeps",
+                      "Print MemDeps of function", false, true)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_END(MemDepPrinter, "print-memdeps",
+                      "Print MemDeps of function", false, true)
+
+FunctionPass *llvm::createMemDepPrinter() {
+  return new MemDepPrinter();
+}
+
+bool MemDepPrinter::runOnFunction(Function &F) {
+  this->F = &F;
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+  MemoryDependenceAnalysis &MDA = getAnalysis<MemoryDependenceAnalysis>();
+
+  // All this code uses non-const interfaces because MemDep is not
+  // const-friendly, though nothing is actually modified.
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+    Instruction *Inst = &*I;
+
+    if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory())
+      continue;
+
+    MemDepResult Res = MDA.getDependency(Inst);
+    if (!Res.isNonLocal()) {
+      assert(Res.isClobber() != Res.isDef() &&
+             "Local dep should be def or clobber!");
+      Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+                                                          Res.isClobber()),
+                                       static_cast<BasicBlock *>(0)));
+    } else if (CallSite CS = cast<Value>(Inst)) {
+      const MemoryDependenceAnalysis::NonLocalDepInfo &NLDI =
+        MDA.getNonLocalCallDependency(CS);
+
+      DepSet &InstDeps = Deps[Inst];
+      for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator
+           I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
+        const MemDepResult &Res = I->getResult();
+        assert(Res.isClobber() != Res.isDef() &&
+               "Resolved non-local call dep should be def or clobber!");
+        InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+                                                          Res.isClobber()),
+                                       I->getBB()));
+      }
+    } else {
+      SmallVector<NonLocalDepResult, 4> NLDI;
+      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+        // FIXME: Volatile is not handled properly here.
+        AliasAnalysis::Location Loc = AA.getLocation(LI);
+        MDA.getNonLocalPointerDependency(Loc, !LI->isVolatile(),
+                                         LI->getParent(), NLDI);
+      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        // FIXME: Volatile is not handled properly here.
+        AliasAnalysis::Location Loc = AA.getLocation(SI);
+        MDA.getNonLocalPointerDependency(Loc, false, SI->getParent(), NLDI);
+      } else if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) {
+        AliasAnalysis::Location Loc = AA.getLocation(VI);
+        MDA.getNonLocalPointerDependency(Loc, false, VI->getParent(), NLDI);
+      } else {
+        llvm_unreachable("Unknown memory instruction!");
+      }
+
+      DepSet &InstDeps = Deps[Inst];
+      for (SmallVectorImpl<NonLocalDepResult>::const_iterator
+           I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
+        const MemDepResult &Res = I->getResult();
+        assert(Res.isClobber() != Res.isDef() &&
+               "Resolved non-local pointer dep should be def or clobber!");
+        InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
+                                                          Res.isClobber()),
+                                       I->getBB()));
+      }
+    }
+  }
+
+  return false;
+}
+
+void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
+  for (const_inst_iterator I = inst_begin(*F), E = inst_end(*F); I != E; ++I) {
+    const Instruction *Inst = &*I;
+
+    DepSetMap::const_iterator DI = Deps.find(Inst);
+    if (DI == Deps.end())
+      continue;
+
+    const DepSet &InstDeps = DI->second;
+
+    for (DepSet::const_iterator I = InstDeps.begin(), E = InstDeps.end();
+         I != E; ++I) {
+      const Instruction *DepInst = I->first.getPointer();
+      bool isClobber = I->first.getInt();
+      const BasicBlock *DepBB = I->second;
+
+      OS << "    " << (isClobber ? "Clobber" : "    Def");
+      if (DepBB) {
+        OS << " in block ";
+        WriteAsOperand(OS, DepBB, /*PrintType=*/false, M);
+      }
+      OS << " from: ";
+      if (DepInst == Inst)
+        OS << "<unspecified>";
+      else
+        DepInst->print(OS);
+      OS << "\n";
+    }
+
+    Inst->print(OS);
+    OS << "\n\n";
+  }
+}
diff --git a/final/lib/Analysis/MemoryBuiltins.cpp b/final/lib/Analysis/MemoryBuiltins.cpp
new file mode 100644
index 00000000000..1ab18ca054a
--- /dev/null
+++ b/final/lib/Analysis/MemoryBuiltins.cpp
@@ -0,0 +1,207 @@
+//===------ MemoryBuiltins.cpp - Identify calls to memory builtins --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions identifies calls to builtin functions that allocate
+// or free memory.  
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  malloc Call Utility Functions.
+//
+
+/// isMalloc - Returns true if the value is either a malloc call or a
+/// bitcast of the result of a malloc call.
+bool llvm::isMalloc(const Value *I) {
+  return extractMallocCall(I) || extractMallocCallFromBitCast(I);
+}
+
+static bool isMallocCall(const CallInst *CI) {
+  if (!CI)
+    return false;
+
+  Function *Callee = CI->getCalledFunction();
+  if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "malloc")
+    return false;
+
+  // Check malloc prototype.
+  // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin 
+  // attribute will exist.
+  const FunctionType *FTy = Callee->getFunctionType();
+  if (FTy->getNumParams() != 1)
+    return false;
+  if (IntegerType *ITy = dyn_cast<IntegerType>(FTy->param_begin()->get())) {
+    if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64)
+      return false;
+    return true;
+  }
+
+  return false;
+}
+
+/// extractMallocCall - Returns the corresponding CallInst if the instruction
+/// is a malloc call.  Since CallInst::CreateMalloc() only creates calls, we
+/// ignore InvokeInst here.
+const CallInst *llvm::extractMallocCall(const Value *I) {
+  const CallInst *CI = dyn_cast<CallInst>(I);
+  return (isMallocCall(CI)) ? CI : NULL;
+}
+
+CallInst *llvm::extractMallocCall(Value *I) {
+  CallInst *CI = dyn_cast<CallInst>(I);
+  return (isMallocCall(CI)) ? CI : NULL;
+}
+
+static bool isBitCastOfMallocCall(const BitCastInst *BCI) {
+  if (!BCI)
+    return false;
+    
+  return isMallocCall(dyn_cast<CallInst>(BCI->getOperand(0)));
+}
+
+/// extractMallocCallFromBitCast - Returns the corresponding CallInst if the
+/// instruction is a bitcast of the result of a malloc call.
+CallInst *llvm::extractMallocCallFromBitCast(Value *I) {
+  BitCastInst *BCI = dyn_cast<BitCastInst>(I);
+  return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0))
+                                      : NULL;
+}
+
+const CallInst *llvm::extractMallocCallFromBitCast(const Value *I) {
+  const BitCastInst *BCI = dyn_cast<BitCastInst>(I);
+  return (isBitCastOfMallocCall(BCI)) ? cast<CallInst>(BCI->getOperand(0))
+                                      : NULL;
+}
+
+static Value *computeArraySize(const CallInst *CI, const TargetData *TD,
+                               bool LookThroughSExt = false) {
+  if (!CI)
+    return NULL;
+
+  // The size of the malloc's result type must be known to determine array size.
+  const Type *T = getMallocAllocatedType(CI);
+  if (!T || !T->isSized() || !TD)
+    return NULL;
+
+  unsigned ElementSize = TD->getTypeAllocSize(T);
+  if (const StructType *ST = dyn_cast<StructType>(T))
+    ElementSize = TD->getStructLayout(ST)->getSizeInBytes();
+
+  // If malloc call's arg can be determined to be a multiple of ElementSize,
+  // return the multiple.  Otherwise, return NULL.
+  Value *MallocArg = CI->getArgOperand(0);
+  Value *Multiple = NULL;
+  if (ComputeMultiple(MallocArg, ElementSize, Multiple,
+                      LookThroughSExt))
+    return Multiple;
+
+  return NULL;
+}
+
+/// isArrayMalloc - Returns the corresponding CallInst if the instruction 
+/// is a call to malloc whose array size can be determined and the array size
+/// is not constant 1.  Otherwise, return NULL.
+const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) {
+  const CallInst *CI = extractMallocCall(I);
+  Value *ArraySize = computeArraySize(CI, TD);
+
+  if (ArraySize &&
+      ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
+    return CI;
+
+  // CI is a non-array malloc or we can't figure out that it is an array malloc.
+  return NULL;
+}
+
+/// getMallocType - Returns the PointerType resulting from the malloc call.
+/// The PointerType depends on the number of bitcast uses of the malloc call:
+///   0: PointerType is the calls' return type.
+///   1: PointerType is the bitcast's result type.
+///  >1: Unique PointerType cannot be determined, return NULL.
+const PointerType *llvm::getMallocType(const CallInst *CI) {
+  assert(isMalloc(CI) && "getMallocType and not malloc call");
+  
+  const PointerType *MallocType = NULL;
+  unsigned NumOfBitCastUses = 0;
+
+  // Determine if CallInst has a bitcast use.
+  for (Value::const_use_iterator UI = CI->use_begin(), E = CI->use_end();
+       UI != E; )
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) {
+      MallocType = cast<PointerType>(BCI->getDestTy());
+      NumOfBitCastUses++;
+    }
+
+  // Malloc call has 1 bitcast use, so type is the bitcast's destination type.
+  if (NumOfBitCastUses == 1)
+    return MallocType;
+
+  // Malloc call was not bitcast, so type is the malloc function's return type.
+  if (NumOfBitCastUses == 0)
+    return cast<PointerType>(CI->getType());
+
+  // Type could not be determined.
+  return NULL;
+}
+
+/// getMallocAllocatedType - Returns the Type allocated by malloc call.
+/// The Type depends on the number of bitcast uses of the malloc call:
+///   0: PointerType is the malloc calls' return type.
+///   1: PointerType is the bitcast's result type.
+///  >1: Unique PointerType cannot be determined, return NULL.
+const Type *llvm::getMallocAllocatedType(const CallInst *CI) {
+  const PointerType *PT = getMallocType(CI);
+  return PT ? PT->getElementType() : NULL;
+}
+
+/// getMallocArraySize - Returns the array size of a malloc call.  If the 
+/// argument passed to malloc is a multiple of the size of the malloced type,
+/// then return that multiple.  For non-array mallocs, the multiple is
+/// constant 1.  Otherwise, return NULL for mallocs whose array size cannot be
+/// determined.
+Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD,
+                                bool LookThroughSExt) {
+  assert(isMalloc(CI) && "getMallocArraySize and not malloc call");
+  return computeArraySize(CI, TD, LookThroughSExt);
+}
+
+//===----------------------------------------------------------------------===//
+//  free Call Utility Functions.
+//
+
+/// isFreeCall - Returns non-null if the value is a call to the builtin free()
+const CallInst *llvm::isFreeCall(const Value *I) {
+  const CallInst *CI = dyn_cast<CallInst>(I);
+  if (!CI)
+    return 0;
+  Function *Callee = CI->getCalledFunction();
+  if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "free")
+    return 0;
+
+  // Check free prototype.
+  // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin 
+  // attribute will exist.
+  const FunctionType *FTy = Callee->getFunctionType();
+  if (!FTy->getReturnType()->isVoidTy())
+    return 0;
+  if (FTy->getNumParams() != 1)
+    return 0;
+  if (FTy->param_begin()->get() != Type::getInt8PtrTy(Callee->getContext()))
+    return 0;
+
+  return CI;
+}
diff --git a/final/lib/Analysis/MemoryDependenceAnalysis.cpp b/final/lib/Analysis/MemoryDependenceAnalysis.cpp
new file mode 100644
index 00000000000..35043bddfaf
--- /dev/null
+++ b/final/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -0,0 +1,1324 @@
+//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation  --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an analysis that determines, for a given memory
+// operation, what preceding memory operations it depends on.  It builds on 
+// alias analysis information, and tries to provide a lazy, caching interface to
+// a common kind of alias information query.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "memdep"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/PredIteratorCache.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses");
+STATISTIC(NumCacheDirtyNonLocal, "Number of dirty cached non-local responses");
+STATISTIC(NumUncacheNonLocal, "Number of uncached non-local responses");
+
+STATISTIC(NumCacheNonLocalPtr,
+          "Number of fully cached non-local ptr responses");
+STATISTIC(NumCacheDirtyNonLocalPtr,
+          "Number of cached, but dirty, non-local ptr responses");
+STATISTIC(NumUncacheNonLocalPtr,
+          "Number of uncached non-local ptr responses");
+STATISTIC(NumCacheCompleteNonLocalPtr,
+          "Number of block queries that were completely cached");
+
+char MemoryDependenceAnalysis::ID = 0;
+  
+// Register this pass...
+INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",
+                "Memory Dependence Analysis", false, true)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",
+                      "Memory Dependence Analysis", false, true)
+
+MemoryDependenceAnalysis::MemoryDependenceAnalysis()
+: FunctionPass(ID), PredCache(0) {
+  initializeMemoryDependenceAnalysisPass(*PassRegistry::getPassRegistry());
+}
+MemoryDependenceAnalysis::~MemoryDependenceAnalysis() {
+}
+
+/// Clean up memory in between runs
+void MemoryDependenceAnalysis::releaseMemory() {
+  LocalDeps.clear();
+  NonLocalDeps.clear();
+  NonLocalPointerDeps.clear();
+  ReverseLocalDeps.clear();
+  ReverseNonLocalDeps.clear();
+  ReverseNonLocalPtrDeps.clear();
+  PredCache->clear();
+}
+
+
+
+/// getAnalysisUsage - Does not modify anything.  It uses Alias Analysis.
+///
+void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<AliasAnalysis>();
+}
+
+bool MemoryDependenceAnalysis::runOnFunction(Function &) {
+  AA = &getAnalysis<AliasAnalysis>();
+  TD = getAnalysisIfAvailable<TargetData>();
+  if (PredCache == 0)
+    PredCache.reset(new PredIteratorCache());
+  return false;
+}
+
+/// RemoveFromReverseMap - This is a helper function that removes Val from
+/// 'Inst's set in ReverseMap.  If the set becomes empty, remove Inst's entry.
+template <typename KeyTy>
+static void RemoveFromReverseMap(DenseMap<Instruction*, 
+                                 SmallPtrSet<KeyTy, 4> > &ReverseMap,
+                                 Instruction *Inst, KeyTy Val) {
+  typename DenseMap<Instruction*, SmallPtrSet<KeyTy, 4> >::iterator
+  InstIt = ReverseMap.find(Inst);
+  assert(InstIt != ReverseMap.end() && "Reverse map out of sync?");
+  bool Found = InstIt->second.erase(Val);
+  assert(Found && "Invalid reverse map!"); (void)Found;
+  if (InstIt->second.empty())
+    ReverseMap.erase(InstIt);
+}
+
+/// GetLocation - If the given instruction references a specific memory
+/// location, fill in Loc with the details, otherwise set Loc.Ptr to null.
+/// Return a ModRefInfo value describing the general behavior of the
+/// instruction.
+static
+AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
+                                        AliasAnalysis::Location &Loc,
+                                        AliasAnalysis *AA) {
+  if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+    if (LI->isVolatile()) {
+      Loc = AliasAnalysis::Location();
+      return AliasAnalysis::ModRef;
+    }
+    Loc = AA->getLocation(LI);
+    return AliasAnalysis::Ref;
+  }
+
+  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+    if (SI->isVolatile()) {
+      Loc = AliasAnalysis::Location();
+      return AliasAnalysis::ModRef;
+    }
+    Loc = AA->getLocation(SI);
+    return AliasAnalysis::Mod;
+  }
+
+  if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
+    Loc = AA->getLocation(V);
+    return AliasAnalysis::ModRef;
+  }
+
+  if (const CallInst *CI = isFreeCall(Inst)) {
+    // calls to free() deallocate the entire structure
+    Loc = AliasAnalysis::Location(CI->getArgOperand(0));
+    return AliasAnalysis::Mod;
+  }
+
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+    case Intrinsic::invariant_start:
+      Loc = AliasAnalysis::Location(II->getArgOperand(1),
+                                    cast<ConstantInt>(II->getArgOperand(0))
+                                      ->getZExtValue(),
+                                    II->getMetadata(LLVMContext::MD_tbaa));
+      // These intrinsics don't really modify the memory, but returning Mod
+      // will allow them to be handled conservatively.
+      return AliasAnalysis::Mod;
+    case Intrinsic::invariant_end:
+      Loc = AliasAnalysis::Location(II->getArgOperand(2),
+                                    cast<ConstantInt>(II->getArgOperand(1))
+                                      ->getZExtValue(),
+                                    II->getMetadata(LLVMContext::MD_tbaa));
+      // These intrinsics don't really modify the memory, but returning Mod
+      // will allow them to be handled conservatively.
+      return AliasAnalysis::Mod;
+    default:
+      break;
+    }
+
+  // Otherwise, just do the coarse-grained thing that always works.
+  if (Inst->mayWriteToMemory())
+    return AliasAnalysis::ModRef;
+  if (Inst->mayReadFromMemory())
+    return AliasAnalysis::Ref;
+  return AliasAnalysis::NoModRef;
+}
+
+/// getCallSiteDependencyFrom - Private helper for finding the local
+/// dependencies of a call site.
+MemDepResult MemoryDependenceAnalysis::
+getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
+                          BasicBlock::iterator ScanIt, BasicBlock *BB) {
+  // Walk backwards through the block, looking for dependencies
+  while (ScanIt != BB->begin()) {
+    Instruction *Inst = --ScanIt;
+    
+    // If this inst is a memory op, get the pointer it accessed
+    AliasAnalysis::Location Loc;
+    AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA);
+    if (Loc.Ptr) {
+      // A simple instruction.
+      if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef)
+        return MemDepResult::getClobber(Inst);
+      continue;
+    }
+
+    if (CallSite InstCS = cast<Value>(Inst)) {
+      // Debug intrinsics don't cause dependences.
+      if (isa<DbgInfoIntrinsic>(Inst)) continue;
+      // If these two calls do not interfere, look past it.
+      switch (AA->getModRefInfo(CS, InstCS)) {
+      case AliasAnalysis::NoModRef:
+        // If the two calls are the same, return InstCS as a Def, so that
+        // CS can be found redundant and eliminated.
+        if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) &&
+            CS.getInstruction()->isIdenticalToWhenDefined(Inst))
+          return MemDepResult::getDef(Inst);
+
+        // Otherwise if the two calls don't interact (e.g. InstCS is readnone)
+        // keep scanning.
+        break;
+      default:
+        return MemDepResult::getClobber(Inst);
+      }
+    }
+  }
+  
+  // No dependence found.  If this is the entry block of the function, it is a
+  // clobber, otherwise it is non-local.
+  if (BB != &BB->getParent()->getEntryBlock())
+    return MemDepResult::getNonLocal();
+  return MemDepResult::getClobber(ScanIt);
+}
+
+/// getPointerDependencyFrom - Return the instruction on which a memory
+/// location depends.  If isLoad is true, this routine ignores may-aliases with
+/// read-only operations.  If isLoad is false, this routine ignores may-aliases
+/// with reads from read-only locations.
+MemDepResult MemoryDependenceAnalysis::
+getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, 
+                         BasicBlock::iterator ScanIt, BasicBlock *BB) {
+
+  Value *InvariantTag = 0;
+
+  // Walk backwards through the basic block, looking for dependencies.
+  while (ScanIt != BB->begin()) {
+    Instruction *Inst = --ScanIt;
+
+    // If we're in an invariant region, no dependencies can be found before
+    // we pass an invariant-begin marker.
+    if (InvariantTag == Inst) {
+      InvariantTag = 0;
+      continue;
+    }
+    
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+      // Debug intrinsics don't (and can't) cause dependences.
+      if (isa<DbgInfoIntrinsic>(II)) continue;
+      
+      // If we pass an invariant-end marker, then we've just entered an
+      // invariant region and can start ignoring dependencies.
+      if (II->getIntrinsicID() == Intrinsic::invariant_end) {
+        // FIXME: This only considers queries directly on the invariant-tagged
+        // pointer, not on query pointers that are indexed off of them.  It'd
+        // be nice to handle that at some point.
+        AliasAnalysis::AliasResult R =
+          AA->alias(AliasAnalysis::Location(II->getArgOperand(2)), MemLoc);
+        if (R == AliasAnalysis::MustAlias)
+          InvariantTag = II->getArgOperand(0);
+
+        continue;
+      }
+
+      // If we reach a lifetime begin or end marker, then the query ends here
+      // because the value is undefined.
+      if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+        // FIXME: This only considers queries directly on the invariant-tagged
+        // pointer, not on query pointers that are indexed off of them.  It'd
+        // be nice to handle that at some point.
+        AliasAnalysis::AliasResult R =
+          AA->alias(AliasAnalysis::Location(II->getArgOperand(1)), MemLoc);
+        if (R == AliasAnalysis::MustAlias)
+          return MemDepResult::getDef(II);
+        continue;
+      }
+    }
+
+    // If we're querying on a load and we're in an invariant region, we're done
+    // at this point. Nothing a load depends on can live in an invariant region.
+    //
+    // FIXME: this will prevent us from returning load/load must-aliases, so GVN
+    // won't remove redundant loads.
+    if (isLoad && InvariantTag) continue;
+
+    // Values depend on loads if the pointers are must aliased.  This means that
+    // a load depends on another must aliased load from the same value.
+    if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+      AliasAnalysis::Location LoadLoc = AA->getLocation(LI);
+      
+      // If we found a pointer, check if it could be the same as our pointer.
+      AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc);
+      if (R == AliasAnalysis::NoAlias)
+        continue;
+      
+      // May-alias loads don't depend on each other without a dependence.
+      if (isLoad && R != AliasAnalysis::MustAlias)
+        continue;
+
+      // Stores don't alias loads from read-only memory.
+      if (!isLoad && AA->pointsToConstantMemory(LoadLoc))
+        continue;
+
+      // Stores depend on may and must aliased loads, loads depend on must-alias
+      // loads.
+      return MemDepResult::getDef(Inst);
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      // There can't be stores to the value we care about inside an 
+      // invariant region.
+      if (InvariantTag) continue;
+      
+      // If alias analysis can tell that this store is guaranteed to not modify
+      // the query pointer, ignore it.  Use getModRefInfo to handle cases where
+      // the query pointer points to constant memory etc.
+      if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef)
+        continue;
+
+      // Ok, this store might clobber the query pointer.  Check to see if it is
+      // a must alias: in this case, we want to return this as a def.
+      AliasAnalysis::Location StoreLoc = AA->getLocation(SI);
+      
+      // If we found a pointer, check if it could be the same as our pointer.
+      AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc);
+      
+      if (R == AliasAnalysis::NoAlias)
+        continue;
+      if (R == AliasAnalysis::MustAlias)
+        return MemDepResult::getDef(Inst);
+      return MemDepResult::getClobber(Inst);
+    }
+
+    // If this is an allocation, and if we know that the accessed pointer is to
+    // the allocation, return Def.  This means that there is no dependence and
+    // the access can be optimized based on that.  For example, a load could
+    // turn into undef.
+    // Note: Only determine this to be a malloc if Inst is the malloc call, not
+    // a subsequent bitcast of the malloc call result.  There can be stores to
+    // the malloced memory between the malloc call and its bitcast uses, and we
+    // need to continue scanning until the malloc call.
+    if (isa<AllocaInst>(Inst) ||
+        (isa<CallInst>(Inst) && extractMallocCall(Inst))) {
+      const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD);
+      
+      if (AccessPtr == Inst ||
+          AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias)
+        return MemDepResult::getDef(Inst);
+      continue;
+    }
+
+    // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
+    switch (AA->getModRefInfo(Inst, MemLoc)) {
+    case AliasAnalysis::NoModRef:
+      // If the call has no effect on the queried pointer, just ignore it.
+      continue;
+    case AliasAnalysis::Mod:
+      // If we're in an invariant region, we can ignore calls that ONLY
+      // modify the pointer.
+      if (InvariantTag) continue;
+      return MemDepResult::getClobber(Inst);
+    case AliasAnalysis::Ref:
+      // If the call is known to never store to the pointer, and if this is a
+      // load query, we can safely ignore it (scan past it).
+      if (isLoad)
+        continue;
+    default:
+      // Otherwise, there is a potential dependence.  Return a clobber.
+      return MemDepResult::getClobber(Inst);
+    }
+  }
+  
+  // No dependence found.  If this is the entry block of the function, it is a
+  // clobber, otherwise it is non-local.
+  if (BB != &BB->getParent()->getEntryBlock())
+    return MemDepResult::getNonLocal();
+  return MemDepResult::getClobber(ScanIt);
+}
+
+/// getDependency - Return the instruction on which a memory operation
+/// depends.
+MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
+  Instruction *ScanPos = QueryInst;
+  
+  // Check for a cached result
+  MemDepResult &LocalCache = LocalDeps[QueryInst];
+  
+  // If the cached entry is non-dirty, just return it.  Note that this depends
+  // on MemDepResult's default constructing to 'dirty'.
+  if (!LocalCache.isDirty())
+    return LocalCache;
+    
+  // Otherwise, if we have a dirty entry, we know we can start the scan at that
+  // instruction, which may save us some work.
+  if (Instruction *Inst = LocalCache.getInst()) {
+    ScanPos = Inst;
+   
+    RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst);
+  }
+  
+  BasicBlock *QueryParent = QueryInst->getParent();
+  
+  // Do the scan.
+  if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
+    // No dependence found.  If this is the entry block of the function, it is a
+    // clobber, otherwise it is non-local.
+    if (QueryParent != &QueryParent->getParent()->getEntryBlock())
+      LocalCache = MemDepResult::getNonLocal();
+    else
+      LocalCache = MemDepResult::getClobber(QueryInst);
+  } else {
+    AliasAnalysis::Location MemLoc;
+    AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
+    if (MemLoc.Ptr) {
+      // If we can do a pointer scan, make it happen.
+      bool isLoad = !(MR & AliasAnalysis::Mod);
+      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
+        isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end;
+
+      LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos,
+                                            QueryParent);
+    } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
+      CallSite QueryCS(QueryInst);
+      bool isReadOnly = AA->onlyReadsMemory(QueryCS);
+      LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
+                                             QueryParent);
+    } else
+      // Non-memory instruction.
+      LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
+  }
+  
+  // Remember the result!
+  if (Instruction *I = LocalCache.getInst())
+    ReverseLocalDeps[I].insert(QueryInst);
+  
+  return LocalCache;
+}
+
+#ifndef NDEBUG
+/// AssertSorted - This method is used when -debug is specified to verify that
+/// cache arrays are properly kept sorted.
+static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
+                         int Count = -1) {
+  if (Count == -1) Count = Cache.size();
+  if (Count == 0) return;
+
+  for (unsigned i = 1; i != unsigned(Count); ++i)
+    assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!");
+}
+#endif
+
+/// getNonLocalCallDependency - Perform a full dependency query for the
+/// specified call, returning the set of blocks that the value is
+/// potentially live across.  The returned set of results will include a
+/// "NonLocal" result for all blocks where the value is live across.
+///
+/// This method assumes the instruction returns a "NonLocal" dependency
+/// within its own block.
+///
+/// This returns a reference to an internal data structure that may be
+/// invalidated on the next non-local query or when an instruction is
+/// removed.  Clients must copy this data if they want it around longer than
+/// that.
+const MemoryDependenceAnalysis::NonLocalDepInfo &
+MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
+  assert(getDependency(QueryCS.getInstruction()).isNonLocal() &&
+ "getNonLocalCallDependency should only be used on calls with non-local deps!");
+  PerInstNLInfo &CacheP = NonLocalDeps[QueryCS.getInstruction()];
+  NonLocalDepInfo &Cache = CacheP.first;
+
+  /// DirtyBlocks - This is the set of blocks that need to be recomputed.  In
+  /// the cached case, this can happen due to instructions being deleted etc. In
+  /// the uncached case, this starts out as the set of predecessors we care
+  /// about.
+  SmallVector<BasicBlock*, 32> DirtyBlocks;
+  
+  if (!Cache.empty()) {
+    // Okay, we have a cache entry.  If we know it is not dirty, just return it
+    // with no computation.
+    if (!CacheP.second) {
+      ++NumCacheNonLocal;
+      return Cache;
+    }
+    
+    // If we already have a partially computed set of results, scan them to
+    // determine what is dirty, seeding our initial DirtyBlocks worklist.
+    for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
+       I != E; ++I)
+      if (I->getResult().isDirty())
+        DirtyBlocks.push_back(I->getBB());
+    
+    // Sort the cache so that we can do fast binary search lookups below.
+    std::sort(Cache.begin(), Cache.end());
+    
+    ++NumCacheDirtyNonLocal;
+    //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: "
+    //     << Cache.size() << " cached: " << *QueryInst;
+  } else {
+    // Seed DirtyBlocks with each of the preds of QueryInst's block.
+    BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
+    for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI)
+      DirtyBlocks.push_back(*PI);
+    ++NumUncacheNonLocal;
+  }
+  
+  // isReadonlyCall - If this is a read-only call, we can be more aggressive.
+  bool isReadonlyCall = AA->onlyReadsMemory(QueryCS);
+
+  SmallPtrSet<BasicBlock*, 64> Visited;
+  
+  unsigned NumSortedEntries = Cache.size();
+  DEBUG(AssertSorted(Cache));
+  
+  // Iterate while we still have blocks to update.
+  while (!DirtyBlocks.empty()) {
+    BasicBlock *DirtyBB = DirtyBlocks.back();
+    DirtyBlocks.pop_back();
+    
+    // Already processed this block?
+    if (!Visited.insert(DirtyBB))
+      continue;
+    
+    // Do a binary search to see if we already have an entry for this block in
+    // the cache set.  If so, find it.
+    DEBUG(AssertSorted(Cache, NumSortedEntries));
+    NonLocalDepInfo::iterator Entry = 
+      std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
+                       NonLocalDepEntry(DirtyBB));
+    if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB)
+      --Entry;
+    
+    NonLocalDepEntry *ExistingResult = 0;
+    if (Entry != Cache.begin()+NumSortedEntries && 
+        Entry->getBB() == DirtyBB) {
+      // If we already have an entry, and if it isn't already dirty, the block
+      // is done.
+      if (!Entry->getResult().isDirty())
+        continue;
+      
+      // Otherwise, remember this slot so we can update the value.
+      ExistingResult = &*Entry;
+    }
+    
+    // If the dirty entry has a pointer, start scanning from it so we don't have
+    // to rescan the entire block.
+    BasicBlock::iterator ScanPos = DirtyBB->end();
+    if (ExistingResult) {
+      if (Instruction *Inst = ExistingResult->getResult().getInst()) {
+        ScanPos = Inst;
+        // We're removing QueryInst's use of Inst.
+        RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
+                             QueryCS.getInstruction());
+      }
+    }
+    
+    // Find out if this block has a local dependency for QueryInst.
+    MemDepResult Dep;
+    
+    if (ScanPos != DirtyBB->begin()) {
+      Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
+    } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
+      // No dependence found.  If this is the entry block of the function, it is
+      // a clobber, otherwise it is non-local.
+      Dep = MemDepResult::getNonLocal();
+    } else {
+      Dep = MemDepResult::getClobber(ScanPos);
+    }
+    
+    // If we had a dirty entry for the block, update it.  Otherwise, just add
+    // a new entry.
+    if (ExistingResult)
+      ExistingResult->setResult(Dep);
+    else
+      Cache.push_back(NonLocalDepEntry(DirtyBB, Dep));
+    
+    // If the block has a dependency (i.e. it isn't completely transparent to
+    // the value), remember the association!
+    if (!Dep.isNonLocal()) {
+      // Keep the ReverseNonLocalDeps map up to date so we can efficiently
+      // update this when we remove instructions.
+      if (Instruction *Inst = Dep.getInst())
+        ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction());
+    } else {
+    
+      // If the block *is* completely transparent to the load, we need to check
+      // the predecessors of this block.  Add them to our worklist.
+      for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI)
+        DirtyBlocks.push_back(*PI);
+    }
+  }
+  
+  return Cache;
+}
+
+/// getNonLocalPointerDependency - Perform a full dependency query for an
+/// access to the specified (non-volatile) memory location, returning the
+/// set of instructions that either define or clobber the value.
+///
+/// This method assumes the pointer has a "NonLocal" dependency within its
+/// own block.
+///
+void MemoryDependenceAnalysis::
+getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
+                             BasicBlock *FromBB,
+                             SmallVectorImpl<NonLocalDepResult> &Result) {
+  assert(Loc.Ptr->getType()->isPointerTy() &&
+         "Can't get pointer deps of a non-pointer!");
+  Result.clear();
+  
+  PHITransAddr Address(const_cast<Value *>(Loc.Ptr), TD);
+  
+  // This is the set of blocks we've inspected, and the pointer we consider in
+  // each block.  Because of critical edges, we currently bail out if querying
+  // a block with multiple different pointers.  This can happen during PHI
+  // translation.
+  DenseMap<BasicBlock*, Value*> Visited;
+  if (!getNonLocalPointerDepFromBB(Address, Loc, isLoad, FromBB,
+                                   Result, Visited, true))
+    return;
+  Result.clear();
+  Result.push_back(NonLocalDepResult(FromBB,
+                                     MemDepResult::getClobber(FromBB->begin()),
+                                     const_cast<Value *>(Loc.Ptr)));
+}
+
+/// GetNonLocalInfoForBlock - Compute the memdep value for BB with
+/// Pointer/PointeeSize using either cached information in Cache or by doing a
+/// lookup (which may use dirty cache info if available).  If we do a lookup,
+/// add the result to the cache.
+MemDepResult MemoryDependenceAnalysis::
+GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
+                        bool isLoad, BasicBlock *BB,
+                        NonLocalDepInfo *Cache, unsigned NumSortedEntries) {
+  
+  // Do a binary search to see if we already have an entry for this block in
+  // the cache set.  If so, find it.
+  NonLocalDepInfo::iterator Entry =
+    std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries,
+                     NonLocalDepEntry(BB));
+  if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
+    --Entry;
+  
+  NonLocalDepEntry *ExistingResult = 0;
+  if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
+    ExistingResult = &*Entry;
+  
+  // If we have a cached entry, and it is non-dirty, use it as the value for
+  // this dependency.
+  if (ExistingResult && !ExistingResult->getResult().isDirty()) {
+    ++NumCacheNonLocalPtr;
+    return ExistingResult->getResult();
+  }    
+  
+  // Otherwise, we have to scan for the value.  If we have a dirty cache
+  // entry, start scanning from its position, otherwise we scan from the end
+  // of the block.
+  BasicBlock::iterator ScanPos = BB->end();
+  if (ExistingResult && ExistingResult->getResult().getInst()) {
+    assert(ExistingResult->getResult().getInst()->getParent() == BB &&
+           "Instruction invalidated?");
+    ++NumCacheDirtyNonLocalPtr;
+    ScanPos = ExistingResult->getResult().getInst();
+    
+    // Eliminating the dirty entry from 'Cache', so update the reverse info.
+    ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
+    RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey);
+  } else {
+    ++NumUncacheNonLocalPtr;
+  }
+  
+  // Scan the block for the dependency.
+  MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB);
+  
+  // If we had a dirty entry for the block, update it.  Otherwise, just add
+  // a new entry.
+  if (ExistingResult)
+    ExistingResult->setResult(Dep);
+  else
+    Cache->push_back(NonLocalDepEntry(BB, Dep));
+  
+  // If the block has a dependency (i.e. it isn't completely transparent to
+  // the value), remember the reverse association because we just added it
+  // to Cache!
+  if (Dep.isNonLocal())
+    return Dep;
+  
+  // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
+  // update MemDep when we remove instructions.
+  Instruction *Inst = Dep.getInst();
+  assert(Inst && "Didn't depend on anything?");
+  ValueIsLoadPair CacheKey(Loc.Ptr, isLoad);
+  ReverseNonLocalPtrDeps[Inst].insert(CacheKey);
+  return Dep;
+}
+
+/// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain
+/// number of elements in the array that are already properly ordered.  This is
+/// optimized for the case when only a few entries are added.
+static void 
+SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
+                         unsigned NumSortedEntries) {
+  switch (Cache.size() - NumSortedEntries) {
+  case 0:
+    // done, no new entries.
+    break;
+  case 2: {
+    // Two new entries, insert the last one into place.
+    NonLocalDepEntry Val = Cache.back();
+    Cache.pop_back();
+    MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+      std::upper_bound(Cache.begin(), Cache.end()-1, Val);
+    Cache.insert(Entry, Val);
+    // FALL THROUGH.
+  }
+  case 1:
+    // One new entry, Just insert the new value at the appropriate position.
+    if (Cache.size() != 1) {
+      NonLocalDepEntry Val = Cache.back();
+      Cache.pop_back();
+      MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
+        std::upper_bound(Cache.begin(), Cache.end(), Val);
+      Cache.insert(Entry, Val);
+    }
+    break;
+  default:
+    // Added many values, do a full scale sort.
+    std::sort(Cache.begin(), Cache.end());
+    break;
+  }
+}
+
+/// getNonLocalPointerDepFromBB - Perform a dependency query based on
+/// pointer/pointeesize starting at the end of StartBB.  Add any clobber/def
+/// results to the results vector and keep track of which blocks are visited in
+/// 'Visited'.
+///
+/// This has special behavior for the first block queries (when SkipFirstBlock
+/// is true).  In this special case, it ignores the contents of the specified
+/// block and starts returning dependence info for its predecessors.
+///
+/// This function returns false on success, or true to indicate that it could
+/// not compute dependence information for some reason.  This should be treated
+/// as a clobber dependence on the first instruction in the predecessor block.
+bool MemoryDependenceAnalysis::
+getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
+                            const AliasAnalysis::Location &Loc,
+                            bool isLoad, BasicBlock *StartBB,
+                            SmallVectorImpl<NonLocalDepResult> &Result,
+                            DenseMap<BasicBlock*, Value*> &Visited,
+                            bool SkipFirstBlock) {
+  
+  // Look up the cached info for Pointer.
+  ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
+
+  // Set up a temporary NLPI value. If the map doesn't yet have an entry for
+  // CacheKey, this value will be inserted as the associated value. Otherwise,
+  // it'll be ignored, and we'll have to check to see if the cached size and
+  // tbaa tag are consistent with the current query.
+  NonLocalPointerInfo InitialNLPI;
+  InitialNLPI.Size = Loc.Size;
+  InitialNLPI.TBAATag = Loc.TBAATag;
+
+  // Get the NLPI for CacheKey, inserting one into the map if it doesn't
+  // already have one.
+  std::pair<CachedNonLocalPointerInfo::iterator, bool> Pair = 
+    NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI));
+  NonLocalPointerInfo *CacheInfo = &Pair.first->second;
+
+  // If we already have a cache entry for this CacheKey, we may need to do some
+  // work to reconcile the cache entry and the current query.
+  if (!Pair.second) {
+    if (CacheInfo->Size < Loc.Size) {
+      // The query's Size is greater than the cached one. Throw out the
+      // cached data and procede with the query at the greater size.
+      CacheInfo->Pair = BBSkipFirstBlockPair();
+      CacheInfo->Size = Loc.Size;
+      for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
+           DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
+        if (Instruction *Inst = DI->getResult().getInst())
+          RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+      CacheInfo->NonLocalDeps.clear();
+    } else if (CacheInfo->Size > Loc.Size) {
+      // This query's Size is less than the cached one. Conservatively restart
+      // the query using the greater size.
+      return getNonLocalPointerDepFromBB(Pointer,
+                                         Loc.getWithNewSize(CacheInfo->Size),
+                                         isLoad, StartBB, Result, Visited,
+                                         SkipFirstBlock);
+    }
+
+    // If the query's TBAATag is inconsistent with the cached one,
+    // conservatively throw out the cached data and restart the query with
+    // no tag if needed.
+    if (CacheInfo->TBAATag != Loc.TBAATag) {
+      if (CacheInfo->TBAATag) {
+        CacheInfo->Pair = BBSkipFirstBlockPair();
+        CacheInfo->TBAATag = 0;
+        for (NonLocalDepInfo::iterator DI = CacheInfo->NonLocalDeps.begin(),
+             DE = CacheInfo->NonLocalDeps.end(); DI != DE; ++DI)
+          if (Instruction *Inst = DI->getResult().getInst())
+            RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey);
+        CacheInfo->NonLocalDeps.clear();
+      }
+      if (Loc.TBAATag)
+        return getNonLocalPointerDepFromBB(Pointer, Loc.getWithoutTBAATag(),
+                                           isLoad, StartBB, Result, Visited,
+                                           SkipFirstBlock);
+    }
+  }
+
+  NonLocalDepInfo *Cache = &CacheInfo->NonLocalDeps;
+
+  // If we have valid cached information for exactly the block we are
+  // investigating, just return it with no recomputation.
+  if (CacheInfo->Pair == BBSkipFirstBlockPair(StartBB, SkipFirstBlock)) {
+    // We have a fully cached result for this query then we can just return the
+    // cached results and populate the visited set.  However, we have to verify
+    // that we don't already have conflicting results for these blocks.  Check
+    // to ensure that if a block in the results set is in the visited set that
+    // it was for the same pointer query.
+    if (!Visited.empty()) {
+      for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
+           I != E; ++I) {
+        DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB());
+        if (VI == Visited.end() || VI->second == Pointer.getAddr())
+          continue;
+        
+        // We have a pointer mismatch in a block.  Just return clobber, saying
+        // that something was clobbered in this result.  We could also do a
+        // non-fully cached query, but there is little point in doing this.
+        return true;
+      }
+    }
+    
+    Value *Addr = Pointer.getAddr();
+    for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
+         I != E; ++I) {
+      Visited.insert(std::make_pair(I->getBB(), Addr));
+      if (!I->getResult().isNonLocal())
+        Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(), Addr));
+    }
+    ++NumCacheCompleteNonLocalPtr;
+    return false;
+  }
+  
+  // Otherwise, either this is a new block, a block with an invalid cache
+  // pointer or one that we're about to invalidate by putting more info into it
+  // than its valid cache info.  If empty, the result will be valid cache info,
+  // otherwise it isn't.
+  if (Cache->empty())
+    CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock);
+  else
+    CacheInfo->Pair = BBSkipFirstBlockPair();
+  
+  SmallVector<BasicBlock*, 32> Worklist;
+  Worklist.push_back(StartBB);
+  
+  // Keep track of the entries that we know are sorted.  Previously cached
+  // entries will all be sorted.  The entries we add we only sort on demand (we
+  // don't insert every element into its sorted position).  We know that we
+  // won't get any reuse from currently inserted values, because we don't
+  // revisit blocks after we insert info for them.
+  unsigned NumSortedEntries = Cache->size();
+  DEBUG(AssertSorted(*Cache));
+  
+  while (!Worklist.empty()) {
+    BasicBlock *BB = Worklist.pop_back_val();
+    
+    // Skip the first block if we have it.
+    if (!SkipFirstBlock) {
+      // Analyze the dependency of *Pointer in FromBB.  See if we already have
+      // been here.
+      assert(Visited.count(BB) && "Should check 'visited' before adding to WL");
+
+      // Get the dependency info for Pointer in BB.  If we have cached
+      // information, we will use it, otherwise we compute it.
+      DEBUG(AssertSorted(*Cache, NumSortedEntries));
+      MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache,
+                                                 NumSortedEntries);
+      
+      // If we got a Def or Clobber, add this to the list of results.
+      if (!Dep.isNonLocal()) {
+        Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr()));
+        continue;
+      }
+    }
+    
+    // If 'Pointer' is an instruction defined in this block, then we need to do
+    // phi translation to change it into a value live in the predecessor block.
+    // If not, we just add the predecessors to the worklist and scan them with
+    // the same Pointer.
+    if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
+      SkipFirstBlock = false;
+      for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
+        // Verify that we haven't looked at this block yet.
+        std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+          InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr()));
+        if (InsertRes.second) {
+          // First time we've looked at *PI.
+          Worklist.push_back(*PI);
+          continue;
+        }
+        
+        // If we have seen this block before, but it was with a different
+        // pointer then we have a phi translation failure and we have to treat
+        // this as a clobber.
+        if (InsertRes.first->second != Pointer.getAddr())
+          goto PredTranslationFailure;
+      }
+      continue;
+    }
+    
+    // We do need to do phi translation, if we know ahead of time we can't phi
+    // translate this value, don't even try.
+    if (!Pointer.IsPotentiallyPHITranslatable())
+      goto PredTranslationFailure;
+    
+    // We may have added values to the cache list before this PHI translation.
+    // If so, we haven't done anything to ensure that the cache remains sorted.
+    // Sort it now (if needed) so that recursive invocations of
+    // getNonLocalPointerDepFromBB and other routines that could reuse the cache
+    // value will only see properly sorted cache arrays.
+    if (Cache && NumSortedEntries != Cache->size()) {
+      SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+      NumSortedEntries = Cache->size();
+    }
+    Cache = 0;
+    
+    for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
+      BasicBlock *Pred = *PI;
+      
+      // Get the PHI translated pointer in this predecessor.  This can fail if
+      // not translatable, in which case the getAddr() returns null.
+      PHITransAddr PredPointer(Pointer);
+      PredPointer.PHITranslateValue(BB, Pred, 0);
+
+      Value *PredPtrVal = PredPointer.getAddr();
+      
+      // Check to see if we have already visited this pred block with another
+      // pointer.  If so, we can't do this lookup.  This failure can occur
+      // with PHI translation when a critical edge exists and the PHI node in
+      // the successor translates to a pointer value different than the
+      // pointer the block was first analyzed with.
+      std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
+        InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal));
+
+      if (!InsertRes.second) {
+        // If the predecessor was visited with PredPtr, then we already did
+        // the analysis and can ignore it.
+        if (InsertRes.first->second == PredPtrVal)
+          continue;
+        
+        // Otherwise, the block was previously analyzed with a different
+        // pointer.  We can't represent the result of this case, so we just
+        // treat this as a phi translation failure.
+        goto PredTranslationFailure;
+      }
+      
+      // If PHI translation was unable to find an available pointer in this
+      // predecessor, then we have to assume that the pointer is clobbered in
+      // that predecessor.  We can still do PRE of the load, which would insert
+      // a computation of the pointer in this predecessor.
+      if (PredPtrVal == 0) {
+        // Add the entry to the Result list.
+        NonLocalDepResult Entry(Pred,
+                                MemDepResult::getClobber(Pred->getTerminator()),
+                                PredPtrVal);
+        Result.push_back(Entry);
+
+        // Since we had a phi translation failure, the cache for CacheKey won't
+        // include all of the entries that we need to immediately satisfy future
+        // queries.  Mark this in NonLocalPointerDeps by setting the
+        // BBSkipFirstBlockPair pointer to null.  This requires reuse of the
+        // cached value to do more work but not miss the phi trans failure.
+        NonLocalPointerInfo &NLPI = NonLocalPointerDeps[CacheKey];
+        NLPI.Pair = BBSkipFirstBlockPair();
+        continue;
+      }
+
+      // FIXME: it is entirely possible that PHI translating will end up with
+      // the same value.  Consider PHI translating something like:
+      // X = phi [x, bb1], [y, bb2].  PHI translating for bb1 doesn't *need*
+      // to recurse here, pedantically speaking.
+      
+      // If we have a problem phi translating, fall through to the code below
+      // to handle the failure condition.
+      if (getNonLocalPointerDepFromBB(PredPointer,
+                                      Loc.getWithNewPtr(PredPointer.getAddr()),
+                                      isLoad, Pred,
+                                      Result, Visited))
+        goto PredTranslationFailure;
+    }
+    
+    // Refresh the CacheInfo/Cache pointer so that it isn't invalidated.
+    CacheInfo = &NonLocalPointerDeps[CacheKey];
+    Cache = &CacheInfo->NonLocalDeps;
+    NumSortedEntries = Cache->size();
+    
+    // Since we did phi translation, the "Cache" set won't contain all of the
+    // results for the query.  This is ok (we can still use it to accelerate
+    // specific block queries) but we can't do the fastpath "return all
+    // results from the set"  Clear out the indicator for this.
+    CacheInfo->Pair = BBSkipFirstBlockPair();
+    SkipFirstBlock = false;
+    continue;
+
+  PredTranslationFailure:
+    
+    if (Cache == 0) {
+      // Refresh the CacheInfo/Cache pointer if it got invalidated.
+      CacheInfo = &NonLocalPointerDeps[CacheKey];
+      Cache = &CacheInfo->NonLocalDeps;
+      NumSortedEntries = Cache->size();
+    }
+    
+    // Since we failed phi translation, the "Cache" set won't contain all of the
+    // results for the query.  This is ok (we can still use it to accelerate
+    // specific block queries) but we can't do the fastpath "return all
+    // results from the set".  Clear out the indicator for this.
+    CacheInfo->Pair = BBSkipFirstBlockPair();
+    
+    // If *nothing* works, mark the pointer as being clobbered by the first
+    // instruction in this block.
+    //
+    // If this is the magic first block, return this as a clobber of the whole
+    // incoming value.  Since we can't phi translate to one of the predecessors,
+    // we have to bail out.
+    if (SkipFirstBlock)
+      return true;
+    
+    for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
+      assert(I != Cache->rend() && "Didn't find current block??");
+      if (I->getBB() != BB)
+        continue;
+      
+      assert(I->getResult().isNonLocal() &&
+             "Should only be here with transparent block");
+      I->setResult(MemDepResult::getClobber(BB->begin()));
+      ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey);
+      Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
+                                         Pointer.getAddr()));
+      break;
+    }
+  }
+
+  // Okay, we're done now.  If we added new values to the cache, re-sort it.
+  SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
+  DEBUG(AssertSorted(*Cache));
+  return false;
+}
+
+/// RemoveCachedNonLocalPointerDependencies - If P exists in
+/// CachedNonLocalPointerInfo, remove it.
+void MemoryDependenceAnalysis::
+RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
+  CachedNonLocalPointerInfo::iterator It = 
+    NonLocalPointerDeps.find(P);
+  if (It == NonLocalPointerDeps.end()) return;
+  
+  // Remove all of the entries in the BB->val map.  This involves removing
+  // instructions from the reverse map.
+  NonLocalDepInfo &PInfo = It->second.NonLocalDeps;
+  
+  for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
+    Instruction *Target = PInfo[i].getResult().getInst();
+    if (Target == 0) continue;  // Ignore non-local dep results.
+    assert(Target->getParent() == PInfo[i].getBB());
+    
+    // Eliminating the dirty entry from 'Cache', so update the reverse info.
+    RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
+  }
+  
+  // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo).
+  NonLocalPointerDeps.erase(It);
+}
+
+
+/// invalidateCachedPointerInfo - This method is used to invalidate cached
+/// information about the specified pointer, because it may be too
+/// conservative in memdep.  This is an optional call that can be used when
+/// the client detects an equivalence between the pointer and some other
+/// value and replaces the other value with ptr. This can make Ptr available
+/// in more places that cached info does not necessarily keep.
+void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
+  // If Ptr isn't really a pointer, just ignore it.
+  if (!Ptr->getType()->isPointerTy()) return;
+  // Flush store info for the pointer.
+  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
+  // Flush load info for the pointer.
+  RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
+}
+
+/// invalidateCachedPredecessors - Clear the PredIteratorCache info.
+/// This needs to be done when the CFG changes, e.g., due to splitting
+/// critical edges.
+void MemoryDependenceAnalysis::invalidateCachedPredecessors() {
+  PredCache->clear();
+}
+
+/// removeInstruction - Remove an instruction from the dependence analysis,
+/// updating the dependence of instructions that previously depended on it.
+/// This method attempts to keep the cache coherent using the reverse map.
+void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
+  // Walk through the Non-local dependencies, removing this one as the value
+  // for any cached queries.
+  NonLocalDepMapType::iterator NLDI = NonLocalDeps.find(RemInst);
+  if (NLDI != NonLocalDeps.end()) {
+    NonLocalDepInfo &BlockMap = NLDI->second.first;
+    for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end();
+         DI != DE; ++DI)
+      if (Instruction *Inst = DI->getResult().getInst())
+        RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
+    NonLocalDeps.erase(NLDI);
+  }
+
+  // If we have a cached local dependence query for this instruction, remove it.
+  //
+  LocalDepMapType::iterator LocalDepEntry = LocalDeps.find(RemInst);
+  if (LocalDepEntry != LocalDeps.end()) {
+    // Remove us from DepInst's reverse set now that the local dep info is gone.
+    if (Instruction *Inst = LocalDepEntry->second.getInst())
+      RemoveFromReverseMap(ReverseLocalDeps, Inst, RemInst);
+
+    // Remove this local dependency info.
+    LocalDeps.erase(LocalDepEntry);
+  }
+  
+  // If we have any cached pointer dependencies on this instruction, remove
+  // them.  If the instruction has non-pointer type, then it can't be a pointer
+  // base.
+  
+  // Remove it from both the load info and the store info.  The instruction
+  // can't be in either of these maps if it is non-pointer.
+  if (RemInst->getType()->isPointerTy()) {
+    RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
+    RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
+  }
+  
+  // Loop over all of the things that depend on the instruction we're removing.
+  // 
+  SmallVector<std::pair<Instruction*, Instruction*>, 8> ReverseDepsToAdd;
+
+  // If we find RemInst as a clobber or Def in any of the maps for other values,
+  // we need to replace its entry with a dirty version of the instruction after
+  // it.  If RemInst is a terminator, we use a null dirty value.
+  //
+  // Using a dirty version of the instruction after RemInst saves having to scan
+  // the entire block to get to this point.
+  MemDepResult NewDirtyVal;
+  if (!RemInst->isTerminator())
+    NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst));
+  
+  ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);
+  if (ReverseDepIt != ReverseLocalDeps.end()) {
+    SmallPtrSet<Instruction*, 4> &ReverseDeps = ReverseDepIt->second;
+    // RemInst can't be the terminator if it has local stuff depending on it.
+    assert(!ReverseDeps.empty() && !isa<TerminatorInst>(RemInst) &&
+           "Nothing can locally depend on a terminator");
+    
+    for (SmallPtrSet<Instruction*, 4>::iterator I = ReverseDeps.begin(),
+         E = ReverseDeps.end(); I != E; ++I) {
+      Instruction *InstDependingOnRemInst = *I;
+      assert(InstDependingOnRemInst != RemInst &&
+             "Already removed our local dep info");
+                        
+      LocalDeps[InstDependingOnRemInst] = NewDirtyVal;
+      
+      // Make sure to remember that new things depend on NewDepInst.
+      assert(NewDirtyVal.getInst() && "There is no way something else can have "
+             "a local dep on this if it is a terminator!");
+      ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), 
+                                                InstDependingOnRemInst));
+    }
+    
+    ReverseLocalDeps.erase(ReverseDepIt);
+
+    // Add new reverse deps after scanning the set, to avoid invalidating the
+    // 'ReverseDeps' reference.
+    while (!ReverseDepsToAdd.empty()) {
+      ReverseLocalDeps[ReverseDepsToAdd.back().first]
+        .insert(ReverseDepsToAdd.back().second);
+      ReverseDepsToAdd.pop_back();
+    }
+  }
+  
+  ReverseDepIt = ReverseNonLocalDeps.find(RemInst);
+  if (ReverseDepIt != ReverseNonLocalDeps.end()) {
+    SmallPtrSet<Instruction*, 4> &Set = ReverseDepIt->second;
+    for (SmallPtrSet<Instruction*, 4>::iterator I = Set.begin(), E = Set.end();
+         I != E; ++I) {
+      assert(*I != RemInst && "Already removed NonLocalDep info for RemInst");
+      
+      PerInstNLInfo &INLD = NonLocalDeps[*I];
+      // The information is now dirty!
+      INLD.second = true;
+      
+      for (NonLocalDepInfo::iterator DI = INLD.first.begin(), 
+           DE = INLD.first.end(); DI != DE; ++DI) {
+        if (DI->getResult().getInst() != RemInst) continue;
+        
+        // Convert to a dirty entry for the subsequent instruction.
+        DI->setResult(NewDirtyVal);
+        
+        if (Instruction *NextI = NewDirtyVal.getInst())
+          ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
+      }
+    }
+
+    ReverseNonLocalDeps.erase(ReverseDepIt);
+
+    // Add new reverse deps after scanning the set, to avoid invalidating 'Set'
+    while (!ReverseDepsToAdd.empty()) {
+      ReverseNonLocalDeps[ReverseDepsToAdd.back().first]
+        .insert(ReverseDepsToAdd.back().second);
+      ReverseDepsToAdd.pop_back();
+    }
+  }
+  
+  // If the instruction is in ReverseNonLocalPtrDeps then it appears as a
+  // value in the NonLocalPointerDeps info.
+  ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt =
+    ReverseNonLocalPtrDeps.find(RemInst);
+  if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) {
+    SmallPtrSet<ValueIsLoadPair, 4> &Set = ReversePtrDepIt->second;
+    SmallVector<std::pair<Instruction*, ValueIsLoadPair>,8> ReversePtrDepsToAdd;
+    
+    for (SmallPtrSet<ValueIsLoadPair, 4>::iterator I = Set.begin(),
+         E = Set.end(); I != E; ++I) {
+      ValueIsLoadPair P = *I;
+      assert(P.getPointer() != RemInst &&
+             "Already removed NonLocalPointerDeps info for RemInst");
+      
+      NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps;
+      
+      // The cache is not valid for any specific block anymore.
+      NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair();
+      
+      // Update any entries for RemInst to use the instruction after it.
+      for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
+           DI != DE; ++DI) {
+        if (DI->getResult().getInst() != RemInst) continue;
+        
+        // Convert to a dirty entry for the subsequent instruction.
+        DI->setResult(NewDirtyVal);
+        
+        if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
+          ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
+      }
+      
+      // Re-sort the NonLocalDepInfo.  Changing the dirty entry to its
+      // subsequent value may invalidate the sortedness.
+      std::sort(NLPDI.begin(), NLPDI.end());
+    }
+    
+    ReverseNonLocalPtrDeps.erase(ReversePtrDepIt);
+    
+    while (!ReversePtrDepsToAdd.empty()) {
+      ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first]
+        .insert(ReversePtrDepsToAdd.back().second);
+      ReversePtrDepsToAdd.pop_back();
+    }
+  }
+  
+  
+  assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?");
+  AA->deleteValue(RemInst);
+  DEBUG(verifyRemoved(RemInst));
+}
+/// verifyRemoved - Verify that the specified instruction does not occur
+/// in our internal data structures.
+void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
+  for (LocalDepMapType::const_iterator I = LocalDeps.begin(),
+       E = LocalDeps.end(); I != E; ++I) {
+    assert(I->first != D && "Inst occurs in data structures");
+    assert(I->second.getInst() != D &&
+           "Inst occurs in data structures");
+  }
+  
+  for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(),
+       E = NonLocalPointerDeps.end(); I != E; ++I) {
+    assert(I->first.getPointer() != D && "Inst occurs in NLPD map key");
+    const NonLocalDepInfo &Val = I->second.NonLocalDeps;
+    for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end();
+         II != E; ++II)
+      assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
+  }
+  
+  for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
+       E = NonLocalDeps.end(); I != E; ++I) {
+    assert(I->first != D && "Inst occurs in data structures");
+    const PerInstNLInfo &INLD = I->second;
+    for (NonLocalDepInfo::const_iterator II = INLD.first.begin(),
+         EE = INLD.first.end(); II  != EE; ++II)
+      assert(II->getResult().getInst() != D && "Inst occurs in data structures");
+  }
+  
+  for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
+       E = ReverseLocalDeps.end(); I != E; ++I) {
+    assert(I->first != D && "Inst occurs in data structures");
+    for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
+         EE = I->second.end(); II != EE; ++II)
+      assert(*II != D && "Inst occurs in data structures");
+  }
+  
+  for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(),
+       E = ReverseNonLocalDeps.end();
+       I != E; ++I) {
+    assert(I->first != D && "Inst occurs in data structures");
+    for (SmallPtrSet<Instruction*, 4>::const_iterator II = I->second.begin(),
+         EE = I->second.end(); II != EE; ++II)
+      assert(*II != D && "Inst occurs in data structures");
+  }
+  
+  for (ReverseNonLocalPtrDepTy::const_iterator
+       I = ReverseNonLocalPtrDeps.begin(),
+       E = ReverseNonLocalPtrDeps.end(); I != E; ++I) {
+    assert(I->first != D && "Inst occurs in rev NLPD map");
+    
+    for (SmallPtrSet<ValueIsLoadPair, 4>::const_iterator II = I->second.begin(),
+         E = I->second.end(); II != E; ++II)
+      assert(*II != ValueIsLoadPair(D, false) &&
+             *II != ValueIsLoadPair(D, true) &&
+             "Inst occurs in ReverseNonLocalPtrDeps map");
+  }
+  
+}
diff --git a/final/lib/Analysis/ModuleDebugInfoPrinter.cpp b/final/lib/Analysis/ModuleDebugInfoPrinter.cpp
new file mode 100644
index 00000000000..e7e999cebeb
--- /dev/null
+++ b/final/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -0,0 +1,87 @@
+//===-- ModuleDebugInfoPrinter.cpp - Prints module debug info metadata ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass decodes the debug info metadata in a module and prints in a
+// (sufficiently-prepared-) human-readable form.
+//
+// For example, run this pass from opt along with the -analyze option, and
+// it'll print to standard output.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+namespace {
+  class ModuleDebugInfoPrinter : public ModulePass {
+    DebugInfoFinder Finder;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ModuleDebugInfoPrinter() : ModulePass(ID) {
+      initializeModuleDebugInfoPrinterPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+    virtual void print(raw_ostream &O, const Module *M) const;
+  };
+}
+
+char ModuleDebugInfoPrinter::ID = 0;
+INITIALIZE_PASS(ModuleDebugInfoPrinter, "module-debuginfo",
+                "Decodes module-level debug info", false, true)
+
+ModulePass *llvm::createModuleDebugInfoPrinterPass() {
+  return new ModuleDebugInfoPrinter();
+}
+
+bool ModuleDebugInfoPrinter::runOnModule(Module &M) {
+  Finder.processModule(M);
+  return false;
+}
+
+void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
+  for (DebugInfoFinder::iterator I = Finder.compile_unit_begin(),
+       E = Finder.compile_unit_end(); I != E; ++I) {
+    O << "Compile Unit: ";
+    DICompileUnit(*I).print(O);
+    O << '\n';
+  }
+
+  for (DebugInfoFinder::iterator I = Finder.subprogram_begin(),
+       E = Finder.subprogram_end(); I != E; ++I) {
+    O << "Subprogram: ";
+    DISubprogram(*I).print(O);
+    O << '\n';
+  }
+
+  for (DebugInfoFinder::iterator I = Finder.global_variable_begin(),
+       E = Finder.global_variable_end(); I != E; ++I) {
+    O << "GlobalVariable: ";
+    DIGlobalVariable(*I).print(O);
+    O << '\n';
+  }
+
+  for (DebugInfoFinder::iterator I = Finder.type_begin(),
+       E = Finder.type_end(); I != E; ++I) {
+    O << "Type: ";
+    DIType(*I).print(O);
+    O << '\n';
+  }
+}
diff --git a/final/lib/Analysis/NoAliasAnalysis.cpp b/final/lib/Analysis/NoAliasAnalysis.cpp
new file mode 100644
index 00000000000..101c2d5b028
--- /dev/null
+++ b/final/lib/Analysis/NoAliasAnalysis.cpp
@@ -0,0 +1,88 @@
+//===- NoAliasAnalysis.cpp - Minimal Alias Analysis Impl ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the default implementation of the Alias Analysis interface
+// that simply returns "I don't know" for all queries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+using namespace llvm;
+
+namespace {
+  /// NoAA - This class implements the -no-aa pass, which always returns "I
+  /// don't know" for alias queries.  NoAA is unlike other alias analysis
+  /// implementations, in that it does not chain to a previous analysis.  As
+  /// such it doesn't follow many of the rules that other alias analyses must.
+  ///
+  struct NoAA : public ImmutablePass, public AliasAnalysis {
+    static char ID; // Class identification, replacement for typeinfo
+    NoAA() : ImmutablePass(ID) {
+      initializeNoAAPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    }
+
+    virtual void initializePass() {
+      // Note: NoAA does not call InitializeAliasAnalysis because it's
+      // special and does not support chaining.
+      TD = getAnalysisIfAvailable<TargetData>();
+    }
+
+    virtual AliasResult alias(const Location &LocA, const Location &LocB) {
+      return MayAlias;
+    }
+
+    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS) {
+      return UnknownModRefBehavior;
+    }
+    virtual ModRefBehavior getModRefBehavior(const Function *F) {
+      return UnknownModRefBehavior;
+    }
+
+    virtual bool pointsToConstantMemory(const Location &Loc,
+                                        bool OrLocal) {
+      return false;
+    }
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+                                       const Location &Loc) {
+      return ModRef;
+    }
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                                       ImmutableCallSite CS2) {
+      return ModRef;
+    }
+
+    virtual void deleteValue(Value *V) {}
+    virtual void copyValue(Value *From, Value *To) {}
+    virtual void addEscapingUse(Use &U) {}
+    
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(const void *ID) {
+      if (ID == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+  };
+}  // End of anonymous namespace
+
+// Register this pass...
+char NoAA::ID = 0;
+INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa",
+                   "No Alias Analysis (always returns 'may' alias)",
+                   true, true, true)
+
+ImmutablePass *llvm::createNoAAPass() { return new NoAA(); }
diff --git a/final/lib/Analysis/PHITransAddr.cpp b/final/lib/Analysis/PHITransAddr.cpp
new file mode 100644
index 00000000000..93da5a48518
--- /dev/null
+++ b/final/lib/Analysis/PHITransAddr.cpp
@@ -0,0 +1,441 @@
+//===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PHITransAddr class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static bool CanPHITrans(Instruction *Inst) {
+  if (isa<PHINode>(Inst) ||
+      isa<GetElementPtrInst>(Inst))
+    return true;
+
+  if (isa<CastInst>(Inst) &&
+      Inst->isSafeToSpeculativelyExecute())
+    return true;
+
+  if (Inst->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Inst->getOperand(1)))
+    return true;
+
+  //   cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
+  //   if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
+  //     cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
+  return false;
+}
+
+void PHITransAddr::dump() const {
+  if (Addr == 0) {
+    dbgs() << "PHITransAddr: null\n";
+    return;
+  }
+  dbgs() << "PHITransAddr: " << *Addr << "\n";
+  for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+    dbgs() << "  Input #" << i << " is " << *InstInputs[i] << "\n";
+}
+
+
+static bool VerifySubExpr(Value *Expr,
+                          SmallVectorImpl<Instruction*> &InstInputs) {
+  // If this is a non-instruction value, there is nothing to do.
+  Instruction *I = dyn_cast<Instruction>(Expr);
+  if (I == 0) return true;
+
+  // If it's an instruction, it is either in Tmp or its operands recursively
+  // are.
+  SmallVectorImpl<Instruction*>::iterator Entry =
+    std::find(InstInputs.begin(), InstInputs.end(), I);
+  if (Entry != InstInputs.end()) {
+    InstInputs.erase(Entry);
+    return true;
+  }
+
+  // If it isn't in the InstInputs list it is a subexpr incorporated into the
+  // address.  Sanity check that it is phi translatable.
+  if (!CanPHITrans(I)) {
+    errs() << "Non phi translatable instruction found in PHITransAddr:\n";
+    errs() << *I << '\n';
+    llvm_unreachable("Either something is missing from InstInputs or "
+                     "CanPHITrans is wrong.");
+    return false;
+  }
+
+  // Validate the operands of the instruction.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (!VerifySubExpr(I->getOperand(i), InstInputs))
+      return false;
+
+  return true;
+}
+
+/// Verify - Check internal consistency of this data structure.  If the
+/// structure is valid, it returns true.  If invalid, it prints errors and
+/// returns false.
+bool PHITransAddr::Verify() const {
+  if (Addr == 0) return true;
+
+  SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end());
+
+  if (!VerifySubExpr(Addr, Tmp))
+    return false;
+
+  if (!Tmp.empty()) {
+    errs() << "PHITransAddr contains extra instructions:\n";
+    for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+      errs() << "  InstInput #" << i << " is " << *InstInputs[i] << "\n";
+    llvm_unreachable("This is unexpected.");
+    return false;
+  }
+
+  // a-ok.
+  return true;
+}
+
+
+/// IsPotentiallyPHITranslatable - If this needs PHI translation, return true
+/// if we have some hope of doing it.  This should be used as a filter to
+/// avoid calling PHITranslateValue in hopeless situations.
+bool PHITransAddr::IsPotentiallyPHITranslatable() const {
+  // If the input value is not an instruction, or if it is not defined in CurBB,
+  // then we don't need to phi translate it.
+  Instruction *Inst = dyn_cast<Instruction>(Addr);
+  return Inst == 0 || CanPHITrans(Inst);
+}
+
+
+static void RemoveInstInputs(Value *V,
+                             SmallVectorImpl<Instruction*> &InstInputs) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return;
+
+  // If the instruction is in the InstInputs list, remove it.
+  SmallVectorImpl<Instruction*>::iterator Entry =
+    std::find(InstInputs.begin(), InstInputs.end(), I);
+  if (Entry != InstInputs.end()) {
+    InstInputs.erase(Entry);
+    return;
+  }
+
+  assert(!isa<PHINode>(I) && "Error, removing something that isn't an input");
+
+  // Otherwise, it must have instruction inputs itself.  Zap them recursively.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+    if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i)))
+      RemoveInstInputs(Op, InstInputs);
+  }
+}
+
+Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
+                                         BasicBlock *PredBB,
+                                         const DominatorTree *DT) {
+  // If this is a non-instruction value, it can't require PHI translation.
+  Instruction *Inst = dyn_cast<Instruction>(V);
+  if (Inst == 0) return V;
+
+  // Determine whether 'Inst' is an input to our PHI translatable expression.
+  bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst);
+
+  // Handle inputs instructions if needed.
+  if (isInput) {
+    if (Inst->getParent() != CurBB) {
+      // If it is an input defined in a different block, then it remains an
+      // input.
+      return Inst;
+    }
+
+    // If 'Inst' is defined in this block and is an input that needs to be phi
+    // translated, we need to incorporate the value into the expression or fail.
+
+    // In either case, the instruction itself isn't an input any longer.
+    InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst));
+
+    // If this is a PHI, go ahead and translate it.
+    if (PHINode *PN = dyn_cast<PHINode>(Inst))
+      return AddAsInput(PN->getIncomingValueForBlock(PredBB));
+
+    // If this is a non-phi value, and it is analyzable, we can incorporate it
+    // into the expression by making all instruction operands be inputs.
+    if (!CanPHITrans(Inst))
+      return 0;
+
+    // All instruction operands are now inputs (and of course, they may also be
+    // defined in this block, so they may need to be phi translated themselves.
+    for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+      if (Instruction *Op = dyn_cast<Instruction>(Inst->getOperand(i)))
+        InstInputs.push_back(Op);
+  }
+
+  // Ok, it must be an intermediate result (either because it started that way
+  // or because we just incorporated it into the expression).  See if its
+  // operands need to be phi translated, and if so, reconstruct it.
+
+  if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
+    if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+    Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
+    if (PHIIn == 0) return 0;
+    if (PHIIn == Cast->getOperand(0))
+      return Cast;
+
+    // Find an available version of this cast.
+
+    // Constants are trivial to find.
+    if (Constant *C = dyn_cast<Constant>(PHIIn))
+      return AddAsInput(ConstantExpr::getCast(Cast->getOpcode(),
+                                              C, Cast->getType()));
+
+    // Otherwise we have to see if a casted version of the incoming pointer
+    // is available.  If so, we can use it, otherwise we have to fail.
+    for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
+         UI != E; ++UI) {
+      if (CastInst *CastI = dyn_cast<CastInst>(*UI))
+        if (CastI->getOpcode() == Cast->getOpcode() &&
+            CastI->getType() == Cast->getType() &&
+            (!DT || DT->dominates(CastI->getParent(), PredBB)))
+          return CastI;
+    }
+    return 0;
+  }
+
+  // Handle getelementptr with at least one PHI translatable operand.
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+    SmallVector<Value*, 8> GEPOps;
+    bool AnyChanged = false;
+    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+      Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT);
+      if (GEPOp == 0) return 0;
+
+      AnyChanged |= GEPOp != GEP->getOperand(i);
+      GEPOps.push_back(GEPOp);
+    }
+
+    if (!AnyChanged)
+      return GEP;
+
+    // Simplify the GEP to handle 'gep x, 0' -> x etc.
+    if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD, DT)) {
+      for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
+        RemoveInstInputs(GEPOps[i], InstInputs);
+
+      return AddAsInput(V);
+    }
+
+    // Scan to see if we have this GEP available.
+    Value *APHIOp = GEPOps[0];
+    for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end();
+         UI != E; ++UI) {
+      if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
+        if (GEPI->getType() == GEP->getType() &&
+            GEPI->getNumOperands() == GEPOps.size() &&
+            GEPI->getParent()->getParent() == CurBB->getParent() &&
+            (!DT || DT->dominates(GEPI->getParent(), PredBB))) {
+          bool Mismatch = false;
+          for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
+            if (GEPI->getOperand(i) != GEPOps[i]) {
+              Mismatch = true;
+              break;
+            }
+          if (!Mismatch)
+            return GEPI;
+        }
+    }
+    return 0;
+  }
+
+  // Handle add with a constant RHS.
+  if (Inst->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Inst->getOperand(1))) {
+    // PHI translate the LHS.
+    Constant *RHS = cast<ConstantInt>(Inst->getOperand(1));
+    bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
+    bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
+
+    Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT);
+    if (LHS == 0) return 0;
+
+    // If the PHI translated LHS is an add of a constant, fold the immediates.
+    if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
+      if (BOp->getOpcode() == Instruction::Add)
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+          LHS = BOp->getOperand(0);
+          RHS = ConstantExpr::getAdd(RHS, CI);
+          isNSW = isNUW = false;
+
+          // If the old 'LHS' was an input, add the new 'LHS' as an input.
+          if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) {
+            RemoveInstInputs(BOp, InstInputs);
+            AddAsInput(LHS);
+          }
+        }
+
+    // See if the add simplifies away.
+    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD, DT)) {
+      // If we simplified the operands, the LHS is no longer an input, but Res
+      // is.
+      RemoveInstInputs(LHS, InstInputs);
+      return AddAsInput(Res);
+    }
+
+    // If we didn't modify the add, just return it.
+    if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1))
+      return Inst;
+
+    // Otherwise, see if we have this add available somewhere.
+    for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end();
+         UI != E; ++UI) {
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
+        if (BO->getOpcode() == Instruction::Add &&
+            BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
+            BO->getParent()->getParent() == CurBB->getParent() &&
+            (!DT || DT->dominates(BO->getParent(), PredBB)))
+          return BO;
+    }
+
+    return 0;
+  }
+
+  // Otherwise, we failed.
+  return 0;
+}
+
+
+/// PHITranslateValue - PHI translate the current address up the CFG from
+/// CurBB to Pred, updating our state to reflect any needed changes.  If the
+/// dominator tree DT is non-null, the translated value must dominate
+/// PredBB.  This returns true on failure and sets Addr to null.
+bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
+                                     const DominatorTree *DT) {
+  assert(Verify() && "Invalid PHITransAddr!");
+  Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT);
+  assert(Verify() && "Invalid PHITransAddr!");
+
+  if (DT) {
+    // Make sure the value is live in the predecessor.
+    if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr))
+      if (!DT->dominates(Inst->getParent(), PredBB))
+        Addr = 0;
+  }
+
+  return Addr == 0;
+}
+
+/// PHITranslateWithInsertion - PHI translate this value into the specified
+/// predecessor block, inserting a computation of the value if it is
+/// unavailable.
+///
+/// All newly created instructions are added to the NewInsts list.  This
+/// returns null on failure.
+///
+Value *PHITransAddr::
+PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
+                          const DominatorTree &DT,
+                          SmallVectorImpl<Instruction*> &NewInsts) {
+  unsigned NISize = NewInsts.size();
+
+  // Attempt to PHI translate with insertion.
+  Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts);
+
+  // If successful, return the new value.
+  if (Addr) return Addr;
+
+  // If not, destroy any intermediate instructions inserted.
+  while (NewInsts.size() != NISize)
+    NewInsts.pop_back_val()->eraseFromParent();
+  return 0;
+}
+
+
+/// InsertPHITranslatedPointer - Insert a computation of the PHI translated
+/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
+/// block.  All newly created instructions are added to the NewInsts list.
+/// This returns null on failure.
+///
+Value *PHITransAddr::
+InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
+                           BasicBlock *PredBB, const DominatorTree &DT,
+                           SmallVectorImpl<Instruction*> &NewInsts) {
+  // See if we have a version of this value already available and dominating
+  // PredBB.  If so, there is no need to insert a new instance of it.
+  PHITransAddr Tmp(InVal, TD);
+  if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT))
+    return Tmp.getAddr();
+
+  // If we don't have an available version of this value, it must be an
+  // instruction.
+  Instruction *Inst = cast<Instruction>(InVal);
+
+  // Handle cast of PHI translatable value.
+  if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
+    if (!Cast->isSafeToSpeculativelyExecute()) return 0;
+    Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0),
+                                              CurBB, PredBB, DT, NewInsts);
+    if (OpVal == 0) return 0;
+
+    // Otherwise insert a cast at the end of PredBB.
+    CastInst *New = CastInst::Create(Cast->getOpcode(),
+                                     OpVal, InVal->getType(),
+                                     InVal->getName()+".phi.trans.insert",
+                                     PredBB->getTerminator());
+    NewInsts.push_back(New);
+    return New;
+  }
+
+  // Handle getelementptr with at least one PHI operand.
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+    SmallVector<Value*, 8> GEPOps;
+    BasicBlock *CurBB = GEP->getParent();
+    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+      Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i),
+                                                CurBB, PredBB, DT, NewInsts);
+      if (OpVal == 0) return 0;
+      GEPOps.push_back(OpVal);
+    }
+
+    GetElementPtrInst *Result =
+    GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(),
+                              InVal->getName()+".phi.trans.insert",
+                              PredBB->getTerminator());
+    Result->setIsInBounds(GEP->isInBounds());
+    NewInsts.push_back(Result);
+    return Result;
+  }
+
+#if 0
+  // FIXME: This code works, but it is unclear that we actually want to insert
+  // a big chain of computation in order to make a value available in a block.
+  // This needs to be evaluated carefully to consider its cost trade offs.
+
+  // Handle add with a constant RHS.
+  if (Inst->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Inst->getOperand(1))) {
+    // PHI translate the LHS.
+    Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0),
+                                              CurBB, PredBB, DT, NewInsts);
+    if (OpVal == 0) return 0;
+
+    BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1),
+                                           InVal->getName()+".phi.trans.insert",
+                                                    PredBB->getTerminator());
+    Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap());
+    Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap());
+    NewInsts.push_back(Res);
+    return Res;
+  }
+#endif
+
+  return 0;
+}
diff --git a/final/lib/Analysis/PathNumbering.cpp b/final/lib/Analysis/PathNumbering.cpp
new file mode 100644
index 00000000000..5d3f6bbc7b6
--- /dev/null
+++ b/final/lib/Analysis/PathNumbering.cpp
@@ -0,0 +1,525 @@
+//===- PathNumbering.cpp --------------------------------------*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Ball-Larus path numbers uniquely identify paths through a directed acyclic
+// graph (DAG) [Ball96].  For a CFG backedges are removed and replaced by phony
+// edges to obtain a DAG, and thus the unique path numbers [Ball96].
+//
+// The purpose of this analysis is to enumerate the edges in a CFG in order
+// to obtain paths from path numbers in a convenient manner.  As described in
+// [Ball96] edges can be enumerated such that given a path number by following
+// the CFG and updating the path number, the path is obtained.
+//
+// [Ball96]
+//  T. Ball and J. R. Larus. "Efficient Path Profiling."
+//  International Symposium on Microarchitecture, pages 46-57, 1996.
+//  http://portal.acm.org/citation.cfm?id=243857
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "ball-larus-numbering"
+
+#include "llvm/Analysis/PathNumbering.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <map>
+#include <queue>
+#include <set>
+#include <stack>
+#include <string>
+#include <utility>
+#include <vector>
+#include <sstream>
+
+using namespace llvm;
+
+// Are we enabling early termination
+static cl::opt<bool> ProcessEarlyTermination(
+  "path-profile-early-termination", cl::Hidden,
+  cl::desc("In path profiling, insert extra instrumentation to account for "
+           "unexpected function termination."));
+
+// Returns the basic block for the BallLarusNode
+BasicBlock* BallLarusNode::getBlock() {
+  return(_basicBlock);
+}
+
+// Returns the number of paths to the exit starting at the node.
+unsigned BallLarusNode::getNumberPaths() {
+  return(_numberPaths);
+}
+
+// Sets the number of paths to the exit starting at the node.
+void BallLarusNode::setNumberPaths(unsigned numberPaths) {
+  _numberPaths = numberPaths;
+}
+
+// Gets the NodeColor used in graph algorithms.
+BallLarusNode::NodeColor BallLarusNode::getColor() {
+  return(_color);
+}
+
+// Sets the NodeColor used in graph algorithms.
+void BallLarusNode::setColor(BallLarusNode::NodeColor color) {
+  _color = color;
+}
+
+// Returns an iterator over predecessor edges. Includes phony and
+// backedges.
+BLEdgeIterator BallLarusNode::predBegin() {
+  return(_predEdges.begin());
+}
+
+// Returns the end sentinel for the predecessor iterator.
+BLEdgeIterator BallLarusNode::predEnd() {
+  return(_predEdges.end());
+}
+
+// Returns the number of predecessor edges.  Includes phony and
+// backedges.
+unsigned BallLarusNode::getNumberPredEdges() {
+  return(_predEdges.size());
+}
+
+// Returns an iterator over successor edges. Includes phony and
+// backedges.
+BLEdgeIterator BallLarusNode::succBegin() {
+  return(_succEdges.begin());
+}
+
+// Returns the end sentinel for the successor iterator.
+BLEdgeIterator BallLarusNode::succEnd() {
+  return(_succEdges.end());
+}
+
+// Returns the number of successor edges.  Includes phony and
+// backedges.
+unsigned BallLarusNode::getNumberSuccEdges() {
+  return(_succEdges.size());
+}
+
+// Add an edge to the predecessor list.
+void BallLarusNode::addPredEdge(BallLarusEdge* edge) {
+  _predEdges.push_back(edge);
+}
+
+// Remove an edge from the predecessor list.
+void BallLarusNode::removePredEdge(BallLarusEdge* edge) {
+  removeEdge(_predEdges, edge);
+}
+
+// Add an edge to the successor list.
+void BallLarusNode::addSuccEdge(BallLarusEdge* edge) {
+  _succEdges.push_back(edge);
+}
+
+// Remove an edge from the successor list.
+void BallLarusNode::removeSuccEdge(BallLarusEdge* edge) {
+  removeEdge(_succEdges, edge);
+}
+
+// Returns the name of the BasicBlock being represented.  If BasicBlock
+// is null then returns "<null>".  If BasicBlock has no name, then
+// "<unnamed>" is returned.  Intended for use with debug output.
+std::string BallLarusNode::getName() {
+  std::stringstream name;
+
+  if(getBlock() != NULL) {
+    if(getBlock()->hasName()) {
+      std::string tempName(getBlock()->getName());
+      name << tempName.c_str() << " (" << _uid << ")";
+    } else
+      name << "<unnamed> (" << _uid << ")";
+  } else
+    name << "<null> (" << _uid << ")";
+
+  return name.str();
+}
+
+// Removes an edge from an edgeVector.  Used by removePredEdge and
+// removeSuccEdge.
+void BallLarusNode::removeEdge(BLEdgeVector& v, BallLarusEdge* e) {
+  // TODO: Avoid linear scan by using a set instead
+  for(BLEdgeIterator i = v.begin(),
+        end = v.end();
+      i != end;
+      ++i) {
+    if((*i) == e) {
+      v.erase(i);
+      break;
+    }
+  }
+}
+
+// Returns the source node of this edge.
+BallLarusNode* BallLarusEdge::getSource() const {
+  return(_source);
+}
+
+// Returns the target node of this edge.
+BallLarusNode* BallLarusEdge::getTarget() const {
+  return(_target);
+}
+
+// Sets the type of the edge.
+BallLarusEdge::EdgeType BallLarusEdge::getType() const {
+  return _edgeType;
+}
+
+// Gets the type of the edge.
+void BallLarusEdge::setType(EdgeType type) {
+  _edgeType = type;
+}
+
+// Returns the weight of this edge.  Used to decode path numbers to sequences
+// of basic blocks.
+unsigned BallLarusEdge::getWeight() {
+  return(_weight);
+}
+
+// Sets the weight of the edge.  Used during path numbering.
+void BallLarusEdge::setWeight(unsigned weight) {
+  _weight = weight;
+}
+
+// Gets the phony edge originating at the root.
+BallLarusEdge* BallLarusEdge::getPhonyRoot() {
+  return _phonyRoot;
+}
+
+// Sets the phony edge originating at the root.
+void BallLarusEdge::setPhonyRoot(BallLarusEdge* phonyRoot) {
+  _phonyRoot = phonyRoot;
+}
+
+// Gets the phony edge terminating at the exit.
+BallLarusEdge* BallLarusEdge::getPhonyExit() {
+  return _phonyExit;
+}
+
+// Sets the phony edge terminating at the exit.
+void BallLarusEdge::setPhonyExit(BallLarusEdge* phonyExit) {
+  _phonyExit = phonyExit;
+}
+
+// Gets the associated real edge if this is a phony edge.
+BallLarusEdge* BallLarusEdge::getRealEdge() {
+  return _realEdge;
+}
+
+// Sets the associated real edge if this is a phony edge.
+void BallLarusEdge::setRealEdge(BallLarusEdge* realEdge) {
+  _realEdge = realEdge;
+}
+
+// Returns the duplicate number of the edge.
+unsigned BallLarusEdge::getDuplicateNumber() {
+  return(_duplicateNumber);
+}
+
+// Initialization that requires virtual functions which are not fully
+// functional in the constructor.
+void BallLarusDag::init() {
+  BLBlockNodeMap inDag;
+  std::stack<BallLarusNode*> dfsStack;
+
+  _root = addNode(&(_function.getEntryBlock()));
+  _exit = addNode(NULL);
+
+  // start search from root
+  dfsStack.push(getRoot());
+
+  // dfs to add each bb into the dag
+  while(dfsStack.size())
+    buildNode(inDag, dfsStack);
+
+  // put in the final edge
+  addEdge(getExit(),getRoot(),0);
+}
+
+// Frees all memory associated with the DAG.
+BallLarusDag::~BallLarusDag() {
+  for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); edge != end;
+      ++edge)
+    delete (*edge);
+
+  for(BLNodeIterator node = _nodes.begin(), end = _nodes.end(); node != end;
+      ++node)
+    delete (*node);
+}
+
+// Calculate the path numbers by assigning edge increments as prescribed
+// in Ball-Larus path profiling.
+void BallLarusDag::calculatePathNumbers() {
+  BallLarusNode* node;
+  std::queue<BallLarusNode*> bfsQueue;
+  bfsQueue.push(getExit());
+
+  while(bfsQueue.size() > 0) {
+    node = bfsQueue.front();
+
+    DEBUG(dbgs() << "calculatePathNumbers on " << node->getName() << "\n");
+
+    bfsQueue.pop();
+    unsigned prevPathNumber = node->getNumberPaths();
+    calculatePathNumbersFrom(node);
+
+    // Check for DAG splitting
+    if( node->getNumberPaths() > 100000000 && node != getRoot() ) {
+      // Add new phony edge from the split-node to the DAG's exit
+      BallLarusEdge* exitEdge = addEdge(node, getExit(), 0);
+      exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
+
+      // Counters to handle the possibilty of a multi-graph
+      BasicBlock* oldTarget = 0;
+      unsigned duplicateNumber = 0;
+
+      // Iterate through each successor edge, adding phony edges
+      for( BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
+           succ != end; oldTarget = (*succ)->getTarget()->getBlock(), succ++ ) {
+
+        if( (*succ)->getType() == BallLarusEdge::NORMAL ) {
+          // is this edge a duplicate?
+          if( oldTarget != (*succ)->getTarget()->getBlock() )
+            duplicateNumber = 0;
+
+          // create the new phony edge: root -> succ
+          BallLarusEdge* rootEdge =
+            addEdge(getRoot(), (*succ)->getTarget(), duplicateNumber++);
+          rootEdge->setType(BallLarusEdge::SPLITEDGE_PHONY);
+          rootEdge->setRealEdge(*succ);
+
+          // split on this edge and reference it's exit/root phony edges
+          (*succ)->setType(BallLarusEdge::SPLITEDGE);
+          (*succ)->setPhonyRoot(rootEdge);
+          (*succ)->setPhonyExit(exitEdge);
+          (*succ)->setWeight(0);
+        }
+      }
+
+      calculatePathNumbersFrom(node);
+    }
+
+    DEBUG(dbgs() << "prev, new number paths " << prevPathNumber << ", "
+          << node->getNumberPaths() << ".\n");
+
+    if(prevPathNumber == 0 && node->getNumberPaths() != 0) {
+      DEBUG(dbgs() << "node ready : " << node->getName() << "\n");
+      for(BLEdgeIterator pred = node->predBegin(), end = node->predEnd();
+          pred != end; pred++) {
+        if( (*pred)->getType() == BallLarusEdge::BACKEDGE ||
+            (*pred)->getType() == BallLarusEdge::SPLITEDGE )
+          continue;
+
+        BallLarusNode* nextNode = (*pred)->getSource();
+        // not yet visited?
+        if(nextNode->getNumberPaths() == 0)
+          bfsQueue.push(nextNode);
+      }
+    }
+  }
+
+  DEBUG(dbgs() << "\tNumber of paths: " << getRoot()->getNumberPaths() << "\n");
+}
+
+// Returns the number of paths for the Dag.
+unsigned BallLarusDag::getNumberOfPaths() {
+  return(getRoot()->getNumberPaths());
+}
+
+// Returns the root (i.e. entry) node for the DAG.
+BallLarusNode* BallLarusDag::getRoot() {
+  return _root;
+}
+
+// Returns the exit node for the DAG.
+BallLarusNode* BallLarusDag::getExit() {
+  return _exit;
+}
+
+// Returns the function for the DAG.
+Function& BallLarusDag::getFunction() {
+  return(_function);
+}
+
+// Clears the node colors.
+void BallLarusDag::clearColors(BallLarusNode::NodeColor color) {
+  for (BLNodeIterator nodeIt = _nodes.begin(); nodeIt != _nodes.end(); nodeIt++)
+    (*nodeIt)->setColor(color);
+}
+
+// Processes one node and its imediate edges for building the DAG.
+void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) {
+  BallLarusNode* currentNode = dfsStack.top();
+  BasicBlock* currentBlock = currentNode->getBlock();
+
+  if(currentNode->getColor() != BallLarusNode::WHITE) {
+    // we have already visited this node
+    dfsStack.pop();
+    currentNode->setColor(BallLarusNode::BLACK);
+  } else {
+    // are there any external procedure calls?
+    if( ProcessEarlyTermination ) {
+      for( BasicBlock::iterator bbCurrent = currentNode->getBlock()->begin(),
+             bbEnd = currentNode->getBlock()->end(); bbCurrent != bbEnd;
+           bbCurrent++ ) {
+        Instruction& instr = *bbCurrent;
+        if( instr.getOpcode() == Instruction::Call ) {
+          BallLarusEdge* callEdge = addEdge(currentNode, getExit(), 0);
+          callEdge->setType(BallLarusEdge::CALLEDGE_PHONY);
+          break;
+        }
+      }
+    }
+
+    TerminatorInst* terminator = currentNode->getBlock()->getTerminator();
+    if(isa<ReturnInst>(terminator) || isa<UnreachableInst>(terminator)
+       || isa<UnwindInst>(terminator))
+      addEdge(currentNode, getExit(),0);
+
+    currentNode->setColor(BallLarusNode::GRAY);
+    inDag[currentBlock] = currentNode;
+
+    BasicBlock* oldSuccessor = 0;
+    unsigned duplicateNumber = 0;
+
+    // iterate through this node's successors
+    for(succ_iterator successor = succ_begin(currentBlock),
+          succEnd = succ_end(currentBlock); successor != succEnd;
+        oldSuccessor = *successor, ++successor ) {
+      BasicBlock* succBB = *successor;
+
+      // is this edge a duplicate?
+      if (oldSuccessor == succBB)
+        duplicateNumber++;
+      else
+        duplicateNumber = 0;
+
+      buildEdge(inDag, dfsStack, currentNode, succBB, duplicateNumber);
+    }
+  }
+}
+
+// Process an edge in the CFG for DAG building.
+void BallLarusDag::buildEdge(BLBlockNodeMap& inDag, std::stack<BallLarusNode*>&
+                             dfsStack, BallLarusNode* currentNode,
+                             BasicBlock* succBB, unsigned duplicateCount) {
+  BallLarusNode* succNode = inDag[succBB];
+
+  if(succNode && succNode->getColor() == BallLarusNode::BLACK) {
+    // visited node and forward edge
+    addEdge(currentNode, succNode, duplicateCount);
+  } else if(succNode && succNode->getColor() == BallLarusNode::GRAY) {
+    // visited node and back edge
+    DEBUG(dbgs() << "Backedge detected.\n");
+    addBackedge(currentNode, succNode, duplicateCount);
+  } else {
+    BallLarusNode* childNode;
+    // not visited node and forward edge
+    if(succNode) // an unvisited node that is child of a gray node
+      childNode = succNode;
+    else { // an unvisited node that is a child of a an unvisted node
+      childNode = addNode(succBB);
+      inDag[succBB] = childNode;
+    }
+    addEdge(currentNode, childNode, duplicateCount);
+    dfsStack.push(childNode);
+  }
+}
+
+// The weight on each edge is the increment required along any path that
+// contains that edge.
+void BallLarusDag::calculatePathNumbersFrom(BallLarusNode* node) {
+  if(node == getExit())
+    // The Exit node must be base case
+    node->setNumberPaths(1);
+  else {
+    unsigned sumPaths = 0;
+    BallLarusNode* succNode;
+
+    for(BLEdgeIterator succ = node->succBegin(), end = node->succEnd();
+        succ != end; succ++) {
+      if( (*succ)->getType() == BallLarusEdge::BACKEDGE ||
+          (*succ)->getType() == BallLarusEdge::SPLITEDGE )
+        continue;
+
+      (*succ)->setWeight(sumPaths);
+      succNode = (*succ)->getTarget();
+
+      if( !succNode->getNumberPaths() )
+        return;
+      sumPaths += succNode->getNumberPaths();
+    }
+
+    node->setNumberPaths(sumPaths);
+  }
+}
+
+// Allows subclasses to determine which type of Node is created.
+// Override this method to produce subclasses of BallLarusNode if
+// necessary. The destructor of BallLarusDag will call free on each
+// pointer created.
+BallLarusNode* BallLarusDag::createNode(BasicBlock* BB) {
+  return( new BallLarusNode(BB) );
+}
+
+// Allows subclasses to determine which type of Edge is created.
+// Override this method to produce subclasses of BallLarusEdge if
+// necessary. The destructor of BallLarusDag will call free on each
+// pointer created.
+BallLarusEdge* BallLarusDag::createEdge(BallLarusNode* source,
+                                        BallLarusNode* target,
+                                        unsigned duplicateCount) {
+  return( new BallLarusEdge(source, target, duplicateCount) );
+}
+
+// Proxy to node's constructor.  Updates the DAG state.
+BallLarusNode* BallLarusDag::addNode(BasicBlock* BB) {
+  BallLarusNode* newNode = createNode(BB);
+  _nodes.push_back(newNode);
+  return( newNode );
+}
+
+// Proxy to edge's constructor. Updates the DAG state.
+BallLarusEdge* BallLarusDag::addEdge(BallLarusNode* source,
+                                     BallLarusNode* target,
+                                     unsigned duplicateCount) {
+  BallLarusEdge* newEdge = createEdge(source, target, duplicateCount);
+  _edges.push_back(newEdge);
+  source->addSuccEdge(newEdge);
+  target->addPredEdge(newEdge);
+  return(newEdge);
+}
+
+// Adds a backedge with its phony edges. Updates the DAG state.
+void BallLarusDag::addBackedge(BallLarusNode* source, BallLarusNode* target,
+                               unsigned duplicateCount) {
+  BallLarusEdge* childEdge = addEdge(source, target, duplicateCount);
+  childEdge->setType(BallLarusEdge::BACKEDGE);
+
+  childEdge->setPhonyRoot(addEdge(getRoot(), target,0));
+  childEdge->setPhonyExit(addEdge(source, getExit(),0));
+
+  childEdge->getPhonyRoot()->setRealEdge(childEdge);
+  childEdge->getPhonyRoot()->setType(BallLarusEdge::BACKEDGE_PHONY);
+
+  childEdge->getPhonyExit()->setRealEdge(childEdge);
+  childEdge->getPhonyExit()->setType(BallLarusEdge::BACKEDGE_PHONY);
+  _backEdges.push_back(childEdge);
+}
diff --git a/final/lib/Analysis/PathProfileInfo.cpp b/final/lib/Analysis/PathProfileInfo.cpp
new file mode 100644
index 00000000000..b361d3f4fa9
--- /dev/null
+++ b/final/lib/Analysis/PathProfileInfo.cpp
@@ -0,0 +1,434 @@
+//===- PathProfileInfo.cpp ------------------------------------*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface used by optimizers to load path profiles,
+// and provides a loader pass which reads a path profile file.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "path-profile-info"
+
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Analysis/PathProfileInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <cstdio>
+
+using namespace llvm;
+
+// command line option for loading path profiles
+static cl::opt<std::string>
+PathProfileInfoFilename("path-profile-loader-file", cl::init("llvmprof.out"),
+  cl::value_desc("filename"),
+  cl::desc("Path profile file loaded by -path-profile-loader"), cl::Hidden);
+
+namespace {
+  class PathProfileLoaderPass : public ModulePass, public PathProfileInfo {
+  public:
+    PathProfileLoaderPass() : ModulePass(ID) { }
+    ~PathProfileLoaderPass();
+
+    // this pass doesn't change anything (only loads information)
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+
+    // the full name of the loader pass
+    virtual const char* getPassName() const {
+      return "Path Profiling Information Loader";
+    }
+
+    // required since this pass implements multiple inheritance
+                virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &PathProfileInfo::ID)
+        return (PathProfileInfo*)this;
+      return this;
+    }
+
+    // entry point to run the pass
+    bool runOnModule(Module &M);
+
+    // pass identification
+    static char ID;
+
+  private:
+    // make a reference table to refer to function by number
+    void buildFunctionRefs(Module &M);
+
+    // process argument info of a program from the input file
+    void handleArgumentInfo();
+
+    // process path number information from the input file
+    void handlePathInfo();
+
+    // array of references to the functions in the module
+    std::vector<Function*> _functions;
+
+    // path profile file handle
+    FILE* _file;
+
+    // path profile file name
+    std::string _filename;
+  };
+}
+
+// register PathLoader
+char PathProfileLoaderPass::ID = 0;
+
+INITIALIZE_ANALYSIS_GROUP(PathProfileInfo, "Path Profile Information",
+                          NoPathProfileInfo)
+INITIALIZE_AG_PASS(PathProfileLoaderPass, PathProfileInfo,
+                   "path-profile-loader",
+                   "Load path profile information from file",
+                   false, true, false)
+
+char &llvm::PathProfileLoaderPassID = PathProfileLoaderPass::ID;
+
+// link PathLoader as a pass, and make it available as an optimisation
+ModulePass *llvm::createPathProfileLoaderPass() {
+  return new PathProfileLoaderPass;
+}
+
+// ----------------------------------------------------------------------------
+// PathEdge implementation
+//
+ProfilePathEdge::ProfilePathEdge (BasicBlock* source, BasicBlock* target,
+                                  unsigned duplicateNumber)
+  : _source(source), _target(target), _duplicateNumber(duplicateNumber) {}
+
+// ----------------------------------------------------------------------------
+// Path implementation
+//
+
+ProfilePath::ProfilePath (unsigned int number, unsigned int count,
+                          double countStdDev,   PathProfileInfo* ppi)
+  : _number(number) , _count(count), _countStdDev(countStdDev), _ppi(ppi) {}
+
+double ProfilePath::getFrequency() const {
+  return 100 * double(_count) /
+    double(_ppi->_functionPathCounts[_ppi->_currentFunction]);
+}
+
+static BallLarusEdge* getNextEdge (BallLarusNode* node,
+                                   unsigned int pathNumber) {
+  BallLarusEdge* best = 0;
+
+  for( BLEdgeIterator next = node->succBegin(),
+         end = node->succEnd(); next != end; next++ ) {
+    if( (*next)->getType() != BallLarusEdge::BACKEDGE && // no backedges
+        (*next)->getType() != BallLarusEdge::SPLITEDGE && // no split edges
+        (*next)->getWeight() <= pathNumber && // weight must be <= pathNumber
+        (!best || (best->getWeight() < (*next)->getWeight())) ) // best one?
+      best = *next;
+  }
+
+  return best;
+}
+
+ProfilePathEdgeVector* ProfilePath::getPathEdges() const {
+  BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
+  unsigned int increment = _number;
+  ProfilePathEdgeVector* pev = new ProfilePathEdgeVector;
+
+  while (currentNode != _ppi->_currentDag->getExit()) {
+    BallLarusEdge* next = getNextEdge(currentNode, increment);
+
+    increment -= next->getWeight();
+
+    if( next->getType() != BallLarusEdge::BACKEDGE_PHONY &&
+        next->getType() != BallLarusEdge::SPLITEDGE_PHONY &&
+        next->getTarget() != _ppi->_currentDag->getExit() )
+      pev->push_back(ProfilePathEdge(
+                       next->getSource()->getBlock(),
+                       next->getTarget()->getBlock(),
+                       next->getDuplicateNumber()));
+
+    if( next->getType() == BallLarusEdge::BACKEDGE_PHONY &&
+        next->getTarget() == _ppi->_currentDag->getExit() )
+      pev->push_back(ProfilePathEdge(
+                       next->getRealEdge()->getSource()->getBlock(),
+                       next->getRealEdge()->getTarget()->getBlock(),
+                       next->getDuplicateNumber()));
+
+    if( next->getType() == BallLarusEdge::SPLITEDGE_PHONY &&
+        next->getSource() == _ppi->_currentDag->getRoot() )
+      pev->push_back(ProfilePathEdge(
+                       next->getRealEdge()->getSource()->getBlock(),
+                       next->getRealEdge()->getTarget()->getBlock(),
+                       next->getDuplicateNumber()));
+
+    // set the new node
+    currentNode = next->getTarget();
+  }
+
+  return pev;
+}
+
+ProfilePathBlockVector* ProfilePath::getPathBlocks() const {
+  BallLarusNode* currentNode = _ppi->_currentDag->getRoot ();
+  unsigned int increment = _number;
+  ProfilePathBlockVector* pbv = new ProfilePathBlockVector;
+
+  while (currentNode != _ppi->_currentDag->getExit()) {
+    BallLarusEdge* next = getNextEdge(currentNode, increment);
+    increment -= next->getWeight();
+
+    // add block to the block list if it is a real edge
+    if( next->getType() == BallLarusEdge::NORMAL)
+      pbv->push_back (currentNode->getBlock());
+    // make the back edge the last edge since we are at the end
+    else if( next->getTarget() == _ppi->_currentDag->getExit() ) {
+      pbv->push_back (currentNode->getBlock());
+      pbv->push_back (next->getRealEdge()->getTarget()->getBlock());
+    }
+
+    // set the new node
+    currentNode = next->getTarget();
+  }
+
+  return pbv;
+}
+
+BasicBlock* ProfilePath::getFirstBlockInPath() const {
+  BallLarusNode* root = _ppi->_currentDag->getRoot();
+  BallLarusEdge* edge = getNextEdge(root, _number);
+
+  if( edge && (edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
+               edge->getType() == BallLarusEdge::SPLITEDGE_PHONY) )
+    return edge->getTarget()->getBlock();
+
+  return root->getBlock();
+}
+
+// ----------------------------------------------------------------------------
+// PathProfileInfo implementation
+//
+
+// Pass identification
+char llvm::PathProfileInfo::ID = 0;
+
+PathProfileInfo::PathProfileInfo () : _currentDag(0) , _currentFunction(0) {
+}
+
+PathProfileInfo::~PathProfileInfo() {
+  if (_currentDag)
+    delete _currentDag;
+}
+
+// set the function for which paths are currently begin processed
+void PathProfileInfo::setCurrentFunction(Function* F) {
+  // Make sure it exists
+  if (!F) return;
+
+  if (_currentDag)
+    delete _currentDag;
+
+  _currentFunction = F;
+  _currentDag = new BallLarusDag(*F);
+  _currentDag->init();
+  _currentDag->calculatePathNumbers();
+}
+
+// get the function for which paths are currently being processed
+Function* PathProfileInfo::getCurrentFunction() const {
+  return _currentFunction;
+}
+
+// get the entry block of the function
+BasicBlock* PathProfileInfo::getCurrentFunctionEntry() {
+  return _currentDag->getRoot()->getBlock();
+}
+
+// return the path based on its number
+ProfilePath* PathProfileInfo::getPath(unsigned int number) {
+  return _functionPaths[_currentFunction][number];
+}
+
+// return the number of paths which a function may potentially execute
+unsigned int PathProfileInfo::getPotentialPathCount() {
+  return _currentDag ? _currentDag->getNumberOfPaths() : 0;
+}
+
+// return an iterator for the beginning of a functions executed paths
+ProfilePathIterator PathProfileInfo::pathBegin() {
+  return _functionPaths[_currentFunction].begin();
+}
+
+// return an iterator for the end of a functions executed paths
+ProfilePathIterator PathProfileInfo::pathEnd() {
+  return _functionPaths[_currentFunction].end();
+}
+
+// returns the total number of paths run in the function
+unsigned int PathProfileInfo::pathsRun() {
+  return _currentFunction ? _functionPaths[_currentFunction].size() : 0;
+}
+
+// ----------------------------------------------------------------------------
+// PathLoader implementation
+//
+
+// remove all generated paths
+PathProfileLoaderPass::~PathProfileLoaderPass() {
+  for( FunctionPathIterator funcNext = _functionPaths.begin(),
+         funcEnd = _functionPaths.end(); funcNext != funcEnd; funcNext++)
+    for( ProfilePathIterator pathNext = funcNext->second.begin(),
+           pathEnd = funcNext->second.end(); pathNext != pathEnd; pathNext++)
+      delete pathNext->second;
+}
+
+// entry point of the pass; this loads and parses a file
+bool PathProfileLoaderPass::runOnModule(Module &M) {
+  // get the filename and setup the module's function references
+  _filename = PathProfileInfoFilename;
+  buildFunctionRefs (M);
+
+  if (!(_file = fopen(_filename.c_str(), "rb"))) {
+    errs () << "error: input '" << _filename << "' file does not exist.\n";
+    return false;
+  }
+
+  ProfilingType profType;
+
+  while( fread(&profType, sizeof(ProfilingType), 1, _file) ) {
+    switch (profType) {
+    case ArgumentInfo:
+      handleArgumentInfo ();
+      break;
+    case PathInfo:
+      handlePathInfo ();
+      break;
+    default:
+      errs () << "error: bad path profiling file syntax, " << profType << "\n";
+      fclose (_file);
+      return false;
+    }
+  }
+
+  fclose (_file);
+
+  return true;
+}
+
+// create a reference table for functions defined in the path profile file
+void PathProfileLoaderPass::buildFunctionRefs (Module &M) {
+  _functions.push_back(0); // make the 0 index a null pointer
+
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
+    if (F->isDeclaration())
+      continue;
+    _functions.push_back(F);
+  }
+}
+
+// handle command like argument infor in the output file
+void PathProfileLoaderPass::handleArgumentInfo() {
+  // get the argument list's length
+  unsigned savedArgsLength;
+  if( fread(&savedArgsLength, sizeof(unsigned), 1, _file) != 1 ) {
+    errs() << "warning: argument info header/data mismatch\n";
+    return;
+  }
+
+  // allocate a buffer, and get the arguments
+  char* args = new char[savedArgsLength+1];
+  if( fread(args, 1, savedArgsLength, _file) != savedArgsLength )
+    errs() << "warning: argument info header/data mismatch\n";
+
+  args[savedArgsLength] = '\0';
+  argList = std::string(args);
+  delete [] args; // cleanup dynamic string
+
+  // byte alignment
+  if (savedArgsLength & 3)
+    fseek(_file, 4-(savedArgsLength&3), SEEK_CUR);
+}
+
+// Handle path profile information in the output file
+void PathProfileLoaderPass::handlePathInfo () {
+  // get the number of functions in this profile
+  unsigned functionCount;
+  if( fread(&functionCount, sizeof(functionCount), 1, _file) != 1 ) {
+    errs() << "warning: path info header/data mismatch\n";
+    return;
+  }
+
+  // gather path information for each function
+  for (unsigned i = 0; i < functionCount; i++) {
+    PathProfileHeader pathHeader;
+    if( fread(&pathHeader, sizeof(pathHeader), 1, _file) != 1 ) {
+      errs() << "warning: bad header for path function info\n";
+      break;
+    }
+
+    Function* f = _functions[pathHeader.fnNumber];
+
+    // dynamically allocate a table to store path numbers
+    PathProfileTableEntry* pathTable =
+      new PathProfileTableEntry[pathHeader.numEntries];
+
+    if( fread(pathTable, sizeof(PathProfileTableEntry),
+              pathHeader.numEntries, _file) != pathHeader.numEntries) {
+      delete [] pathTable;
+      errs() << "warning: path function info header/data mismatch\n";
+      return;
+    }
+
+    // Build a new path for the current function
+    unsigned int totalPaths = 0;
+    for (unsigned int j = 0; j < pathHeader.numEntries; j++) {
+      totalPaths += pathTable[j].pathCounter;
+      _functionPaths[f][pathTable[j].pathNumber]
+        = new ProfilePath(pathTable[j].pathNumber, pathTable[j].pathCounter,
+                          0, this);
+    }
+
+    _functionPathCounts[f] = totalPaths;
+
+    delete [] pathTable;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  NoProfile PathProfileInfo implementation
+//
+
+namespace {
+  struct NoPathProfileInfo : public ImmutablePass, public PathProfileInfo {
+    static char ID; // Class identification, replacement for typeinfo
+    NoPathProfileInfo() : ImmutablePass(ID) {
+      initializeNoPathProfileInfoPass(*PassRegistry::getPassRegistry());
+    }
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &PathProfileInfo::ID)
+        return (PathProfileInfo*)this;
+      return this;
+    }
+
+    virtual const char *getPassName() const {
+      return "NoPathProfileInfo";
+    }
+  };
+}  // End of anonymous namespace
+
+char NoPathProfileInfo::ID = 0;
+// Register this pass...
+INITIALIZE_AG_PASS(NoPathProfileInfo, PathProfileInfo, "no-path-profile",
+                   "No Path Profile Information", false, true, true)
+
+ImmutablePass *llvm::createNoPathProfileInfoPass() { return new NoPathProfileInfo(); }
diff --git a/final/lib/Analysis/PathProfileVerifier.cpp b/final/lib/Analysis/PathProfileVerifier.cpp
new file mode 100644
index 00000000000..c5497731420
--- /dev/null
+++ b/final/lib/Analysis/PathProfileVerifier.cpp
@@ -0,0 +1,207 @@
+//===- PathProfileVerifier.cpp --------------------------------*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This verifier derives an edge profile file from current path profile
+// information
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "path-profile-verifier"
+
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Analysis/PathProfileInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <stdio.h>
+
+using namespace llvm;
+
+namespace {
+  class PathProfileVerifier : public ModulePass {
+  private:
+    bool runOnModule(Module &M);
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    PathProfileVerifier() : ModulePass(ID) {
+      initializePathProfileVerifierPass(*PassRegistry::getPassRegistry());
+    }
+
+
+    virtual const char *getPassName() const {
+      return "Path Profiler Verifier";
+    }
+
+    // The verifier requires the path profile and edge profile.
+    virtual void getAnalysisUsage(AnalysisUsage& AU) const;
+  };
+}
+
+static cl::opt<std::string>
+EdgeProfileFilename("path-profile-verifier-file",
+  cl::init("edgefrompath.llvmprof.out"),
+  cl::value_desc("filename"),
+  cl::desc("Edge profile file generated by -path-profile-verifier"),
+  cl::Hidden);
+
+char PathProfileVerifier::ID = 0;
+INITIALIZE_PASS(PathProfileVerifier, "path-profile-verifier",
+                "Compare the path profile derived edge profile against the "
+                "edge profile.", true, true)
+
+ModulePass *llvm::createPathProfileVerifierPass() {
+  return new PathProfileVerifier();
+}
+
+// The verifier requires the path profile and edge profile.
+void PathProfileVerifier::getAnalysisUsage(AnalysisUsage& AU) const {
+  AU.addRequired<PathProfileInfo>();
+  AU.addPreserved<PathProfileInfo>();
+}
+
+typedef std::map<unsigned, unsigned> DuplicateToIndexMap;
+typedef std::map<BasicBlock*,DuplicateToIndexMap> BlockToDuplicateMap;
+typedef std::map<BasicBlock*,BlockToDuplicateMap> NestedBlockToIndexMap;
+
+// the verifier iterates through each path to gather the total
+// number of edge frequencies
+bool PathProfileVerifier::runOnModule (Module &M) {
+  PathProfileInfo& pathProfileInfo = getAnalysis<PathProfileInfo>();
+
+  // setup a data structure to map path edges which index an
+  // array of edge counters
+  NestedBlockToIndexMap arrayMap;
+  unsigned i = 0;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+
+    arrayMap[0][F->begin()][0] = i++;
+
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      TerminatorInst *TI = BB->getTerminator();
+
+      unsigned duplicate = 0;
+      BasicBlock* prev = 0;
+      for (unsigned s = 0, e = TI->getNumSuccessors(); s != e;
+           prev = TI->getSuccessor(s), ++s) {
+        if (prev == TI->getSuccessor(s))
+          duplicate++;
+        else duplicate = 0;
+
+        arrayMap[BB][TI->getSuccessor(s)][duplicate] = i++;
+      }
+    }
+  }
+
+  std::vector<unsigned> edgeArray(i);
+
+  // iterate through each path and increment the edge counters as needed
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+
+    pathProfileInfo.setCurrentFunction(F);
+
+    DEBUG(dbgs() << "function '" << F->getName() << "' ran "
+          << pathProfileInfo.pathsRun()
+          << "/" << pathProfileInfo.getPotentialPathCount()
+          << " potential paths\n");
+
+    for( ProfilePathIterator nextPath = pathProfileInfo.pathBegin(),
+           endPath = pathProfileInfo.pathEnd();
+         nextPath != endPath; nextPath++ ) {
+      ProfilePath* currentPath = nextPath->second;
+
+      ProfilePathEdgeVector* pev = currentPath->getPathEdges();
+      DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": "
+            << currentPath->getCount() << "\n");
+      // setup the entry edge (normally path profiling doens't care about this)
+      if (currentPath->getFirstBlockInPath() == &F->getEntryBlock())
+        edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]]
+          += currentPath->getCount();
+
+      for( ProfilePathEdgeIterator nextEdge = pev->begin(),
+             endEdge = pev->end(); nextEdge != endEdge; nextEdge++ ) {
+        if (nextEdge != pev->begin())
+          DEBUG(dbgs() << " :: ");
+
+        BasicBlock* source = nextEdge->getSource();
+        BasicBlock* target = nextEdge->getTarget();
+        unsigned duplicateNumber = nextEdge->getDuplicateNumber();
+        DEBUG(dbgs () << source->getNameStr() << " --{" << duplicateNumber
+              << "}--> " << target->getNameStr());
+
+        // Ensure all the referenced edges exist
+        // TODO: make this a separate function
+        if( !arrayMap.count(source) ) {
+          errs() << "  error [" << F->getNameStr() << "()]: source '"
+                 << source->getNameStr()
+                 << "' does not exist in the array map.\n";
+        } else if( !arrayMap[source].count(target) ) {
+          errs() << "  error [" << F->getNameStr() << "()]: target '"
+                 << target->getNameStr()
+                 << "' does not exist in the array map.\n";
+        } else if( !arrayMap[source][target].count(duplicateNumber) ) {
+          errs() << "  error [" << F->getNameStr() << "()]: edge "
+                 << source->getNameStr() << " -> " << target->getNameStr()
+                 << " duplicate number " << duplicateNumber
+                 << " does not exist in the array map.\n";
+        } else {
+          edgeArray[arrayMap[source][target][duplicateNumber]]
+            += currentPath->getCount();
+        }
+      }
+
+      DEBUG(errs() << "\n");
+
+      delete pev;
+    }
+  }
+
+  std::string errorInfo;
+  std::string filename = EdgeProfileFilename;
+
+  // Open a handle to the file
+  FILE* edgeFile = fopen(filename.c_str(),"wb");
+
+  if (!edgeFile) {
+    errs() << "error: unable to open file '" << filename << "' for output.\n";
+    return false;
+  }
+
+  errs() << "Generating edge profile '" << filename << "' ...\n";
+
+  // write argument info
+  unsigned type = ArgumentInfo;
+  unsigned num = pathProfileInfo.argList.size();
+  int zeros = 0;
+
+  fwrite(&type,sizeof(unsigned),1,edgeFile);
+  fwrite(&num,sizeof(unsigned),1,edgeFile);
+  fwrite(pathProfileInfo.argList.c_str(),1,num,edgeFile);
+  if (num&3)
+    fwrite(&zeros, 1, 4-(num&3), edgeFile);
+
+  type = EdgeInfo;
+  num = edgeArray.size();
+  fwrite(&type,sizeof(unsigned),1,edgeFile);
+  fwrite(&num,sizeof(unsigned),1,edgeFile);
+
+  // write each edge to the file
+  for( std::vector<unsigned>::iterator s = edgeArray.begin(),
+         e = edgeArray.end(); s != e; s++)
+    fwrite(&*s, sizeof (unsigned), 1, edgeFile);
+
+  fclose (edgeFile);
+
+  return true;
+}
diff --git a/final/lib/Analysis/PostDominators.cpp b/final/lib/Analysis/PostDominators.cpp
new file mode 100644
index 00000000000..3f0deab9ea8
--- /dev/null
+++ b/final/lib/Analysis/PostDominators.cpp
@@ -0,0 +1,102 @@
+//===- PostDominators.cpp - Post-Dominator Calculation --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the post-dominator construction algorithms.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "postdomtree"
+
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Analysis/DominatorInternals.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  PostDominatorTree Implementation
+//===----------------------------------------------------------------------===//
+
+char PostDominatorTree::ID = 0;
+char PostDominanceFrontier::ID = 0;
+INITIALIZE_PASS(PostDominatorTree, "postdomtree",
+                "Post-Dominator Tree Construction", true, true)
+
+bool PostDominatorTree::runOnFunction(Function &F) {
+  DT->recalculate(F);
+  return false;
+}
+
+PostDominatorTree::~PostDominatorTree() {
+  delete DT;
+}
+
+void PostDominatorTree::print(raw_ostream &OS, const Module *) const {
+  DT->print(OS);
+}
+
+
+FunctionPass* llvm::createPostDomTree() {
+  return new PostDominatorTree();
+}
+
+//===----------------------------------------------------------------------===//
+//  PostDominanceFrontier Implementation
+//===----------------------------------------------------------------------===//
+
+INITIALIZE_PASS_BEGIN(PostDominanceFrontier, "postdomfrontier",
+                "Post-Dominance Frontier Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_END(PostDominanceFrontier, "postdomfrontier",
+                "Post-Dominance Frontier Construction", true, true)
+
+const DominanceFrontier::DomSetType &
+PostDominanceFrontier::calculate(const PostDominatorTree &DT,
+                                 const DomTreeNode *Node) {
+  // Loop over CFG successors to calculate DFlocal[Node]
+  BasicBlock *BB = Node->getBlock();
+  DomSetType &S = Frontiers[BB];       // The new set to fill in...
+  if (getRoots().empty()) return S;
+
+  if (BB)
+    for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB);
+         SI != SE; ++SI) {
+      BasicBlock *P = *SI;
+      // Does Node immediately dominate this predecessor?
+      DomTreeNode *SINode = DT[P];
+      if (SINode && SINode->getIDom() != Node)
+        S.insert(P);
+    }
+
+  // At this point, S is DFlocal.  Now we union in DFup's of our children...
+  // Loop through and visit the nodes that Node immediately dominates (Node's
+  // children in the IDomTree)
+  //
+  for (DomTreeNode::const_iterator
+         NI = Node->begin(), NE = Node->end(); NI != NE; ++NI) {
+    DomTreeNode *IDominee = *NI;
+    const DomSetType &ChildDF = calculate(DT, IDominee);
+
+    DomSetType::const_iterator CDFI = ChildDF.begin(), CDFE = ChildDF.end();
+    for (; CDFI != CDFE; ++CDFI) {
+      if (!DT.properlyDominates(Node, DT[*CDFI]))
+        S.insert(*CDFI);
+    }
+  }
+
+  return S;
+}
+
+FunctionPass* llvm::createPostDomFrontier() {
+  return new PostDominanceFrontier();
+}
diff --git a/final/lib/Analysis/ProfileEstimatorPass.cpp b/final/lib/Analysis/ProfileEstimatorPass.cpp
new file mode 100644
index 00000000000..667ee1cc348
--- /dev/null
+++ b/final/lib/Analysis/ProfileEstimatorPass.cpp
@@ -0,0 +1,426 @@
+//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a concrete implementation of profiling information that
+// estimates the profiling information in a very crude and unimaginative way.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-estimator"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+using namespace llvm;
+
+static cl::opt<double>
+LoopWeight(
+    "profile-estimator-loop-weight", cl::init(10),
+    cl::value_desc("loop-weight"),
+    cl::desc("Number of loop executions used for profile-estimator")
+);
+
+namespace {
+  class ProfileEstimatorPass : public FunctionPass, public ProfileInfo {
+    double ExecCount;
+    LoopInfo *LI;
+    std::set<BasicBlock*>  BBToVisit;
+    std::map<Loop*,double> LoopExitWeights;
+    std::map<Edge,double>  MinimalWeight;
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    explicit ProfileEstimatorPass(const double execcount = 0)
+        : FunctionPass(ID), ExecCount(execcount) {
+      initializeProfileEstimatorPassPass(*PassRegistry::getPassRegistry());
+      if (execcount == 0) ExecCount = LoopWeight;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<LoopInfo>();
+    }
+
+    virtual const char *getPassName() const {
+      return "Profiling information estimator";
+    }
+
+    /// run - Estimate the profile information from the specified file.
+    virtual bool runOnFunction(Function &F);
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &ProfileInfo::ID)
+        return (ProfileInfo*)this;
+      return this;
+    }
+    
+    virtual void recurseBasicBlock(BasicBlock *BB);
+
+    void inline printEdgeWeight(Edge);
+  };
+}  // End of anonymous namespace
+
+char ProfileEstimatorPass::ID = 0;
+INITIALIZE_AG_PASS_BEGIN(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
+                "Estimate profiling information", false, true, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_AG_PASS_END(ProfileEstimatorPass, ProfileInfo, "profile-estimator",
+                "Estimate profiling information", false, true, false)
+
+namespace llvm {
+  char &ProfileEstimatorPassID = ProfileEstimatorPass::ID;
+
+  FunctionPass *createProfileEstimatorPass() {
+    return new ProfileEstimatorPass();
+  }
+
+  /// createProfileEstimatorPass - This function returns a Pass that estimates
+  /// profiling information using the given loop execution count.
+  Pass *createProfileEstimatorPass(const unsigned execcount) {
+    return new ProfileEstimatorPass(execcount);
+  }
+}
+
+static double ignoreMissing(double w) {
+  if (w == ProfileInfo::MissingValue) return 0;
+  return w;
+}
+
+static void inline printEdgeError(ProfileInfo::Edge e, const char *M) {
+  DEBUG(dbgs() << "-- Edge " << e << " is not calculated, " << M << "\n");
+}
+
+void inline ProfileEstimatorPass::printEdgeWeight(Edge E) {
+  DEBUG(dbgs() << "-- Weight of Edge " << E << ":"
+               << format("%20.20g", getEdgeWeight(E)) << "\n");
+}
+
+// recurseBasicBlock() - This calculates the ProfileInfo estimation for a
+// single block and then recurses into the successors.
+// The algorithm preserves the flow condition, meaning that the sum of the
+// weight of the incoming edges must be equal the block weight which must in
+// turn be equal to the sume of the weights of the outgoing edges.
+// Since the flow of an block is deterimined from the current state of the
+// flow, once an edge has a flow assigned this flow is never changed again,
+// otherwise it would be possible to violate the flow condition in another
+// block.
+void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
+
+  // Break the recursion if this BasicBlock was already visited.
+  if (BBToVisit.find(BB) == BBToVisit.end()) return;
+
+  // Read the LoopInfo for this block.
+  bool  BBisHeader = LI->isLoopHeader(BB);
+  Loop* BBLoop     = LI->getLoopFor(BB);
+
+  // To get the block weight, read all incoming edges.
+  double BBWeight = 0;
+  std::set<BasicBlock*> ProcessedPreds;
+  for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+        bbi != bbe; ++bbi ) {
+    // If this block was not considered already, add weight.
+    Edge edge = getEdge(*bbi,BB);
+    double w = getEdgeWeight(edge);
+    if (ProcessedPreds.insert(*bbi).second) {
+      BBWeight += ignoreMissing(w);
+    }
+    // If this block is a loop header and the predecessor is contained in this
+    // loop, thus the edge is a backedge, continue and do not check if the
+    // value is valid.
+    if (BBisHeader && BBLoop->contains(*bbi)) {
+      printEdgeError(edge, "but is backedge, continueing");
+      continue;
+    }
+    // If the edges value is missing (and this is no loop header, and this is
+    // no backedge) return, this block is currently non estimatable.
+    if (w == MissingValue) {
+      printEdgeError(edge, "returning");
+      return;
+    }
+  }
+  if (getExecutionCount(BB) != MissingValue) {
+    BBWeight = getExecutionCount(BB);
+  }
+
+  // Fetch all necessary information for current block.
+  SmallVector<Edge, 8> ExitEdges;
+  SmallVector<Edge, 8> Edges;
+  if (BBLoop) {
+    BBLoop->getExitEdges(ExitEdges);
+  }
+
+  // If this is a loop header, consider the following:
+  // Exactly the flow that is entering this block, must exit this block too. So
+  // do the following: 
+  // *) get all the exit edges, read the flow that is already leaving this
+  // loop, remember the edges that do not have any flow on them right now.
+  // (The edges that have already flow on them are most likely exiting edges of
+  // other loops, do not touch those flows because the previously caclulated
+  // loopheaders would not be exact anymore.)
+  // *) In case there is not a single exiting edge left, create one at the loop
+  // latch to prevent the flow from building up in the loop.
+  // *) Take the flow that is not leaving the loop already and distribute it on
+  // the remaining exiting edges.
+  // (This ensures that all flow that enters the loop also leaves it.)
+  // *) Increase the flow into the loop by increasing the weight of this block.
+  // There is at least one incoming backedge that will bring us this flow later
+  // on. (So that the flow condition in this node is valid again.)
+  if (BBisHeader) {
+    double incoming = BBWeight;
+    // Subtract the flow leaving the loop.
+    std::set<Edge> ProcessedExits;
+    for (SmallVector<Edge, 8>::iterator ei = ExitEdges.begin(),
+         ee = ExitEdges.end(); ei != ee; ++ei) {
+      if (ProcessedExits.insert(*ei).second) {
+        double w = getEdgeWeight(*ei);
+        if (w == MissingValue) {
+          Edges.push_back(*ei);
+          // Check if there is a necessary minimal weight, if yes, subtract it 
+          // from weight.
+          if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+            incoming -= MinimalWeight[*ei];
+            DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+          }
+        } else {
+          incoming -= w;
+        }
+      }
+    }
+    // If no exit edges, create one:
+    if (Edges.size() == 0) {
+      BasicBlock *Latch = BBLoop->getLoopLatch();
+      if (Latch) {
+        Edge edge = getEdge(Latch,0);
+        EdgeInformation[BB->getParent()][edge] = BBWeight;
+        printEdgeWeight(edge);
+        edge = getEdge(Latch, BB);
+        EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount;
+        printEdgeWeight(edge);
+      }
+    }
+
+    // Distribute remaining weight to the exting edges. To prevent fractions
+    // from building up and provoking precision problems the weight which is to
+    // be distributed is split and the rounded, the last edge gets a somewhat
+    // bigger value, but we are close enough for an estimation.
+    double fraction = floor(incoming/Edges.size());
+    for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
+         ei != ee; ++ei) {
+      double w = 0;
+      if (ei != (ee-1)) {
+        w = fraction;
+        incoming -= fraction;
+      } else {
+        w = incoming;
+      }
+      EdgeInformation[BB->getParent()][*ei] += w;
+      // Read necessary minimal weight.
+      if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+        EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
+        DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+      }
+      printEdgeWeight(*ei);
+      
+      // Add minimal weight to paths to all exit edges, this is used to ensure
+      // that enough flow is reaching this edges.
+      Path p;
+      const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest);
+      while (Dest != BB) {
+        const BasicBlock *Parent = p.find(Dest)->second;
+        Edge e = getEdge(Parent, Dest);
+        if (MinimalWeight.find(e) == MinimalWeight.end()) {
+          MinimalWeight[e] = 0;
+        }
+        MinimalWeight[e] += w;
+        DEBUG(dbgs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n");
+        Dest = Parent;
+      }
+    }
+    // Increase flow into the loop.
+    BBWeight *= (ExecCount+1);
+  }
+
+  BlockInformation[BB->getParent()][BB] = BBWeight;
+  // Up until now we considered only the loop exiting edges, now we have a
+  // definite block weight and must distribute this onto the outgoing edges.
+  // Since there may be already flow attached to some of the edges, read this
+  // flow first and remember the edges that have still now flow attached.
+  Edges.clear();
+  std::set<BasicBlock*> ProcessedSuccs;
+
+  succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+  // Also check for (BB,0) edges that may already contain some flow. (But only
+  // in case there are no successors.)
+  if (bbi == bbe) {
+    Edge edge = getEdge(BB,0);
+    EdgeInformation[BB->getParent()][edge] = BBWeight;
+    printEdgeWeight(edge);
+  }
+  for ( ; bbi != bbe; ++bbi ) {
+    if (ProcessedSuccs.insert(*bbi).second) {
+      Edge edge = getEdge(BB,*bbi);
+      double w = getEdgeWeight(edge);
+      if (w != MissingValue) {
+        BBWeight -= getEdgeWeight(edge);
+      } else {
+        Edges.push_back(edge);
+        // If minimal weight is necessary, reserve weight by subtracting weight
+        // from block weight, this is readded later on.
+        if (MinimalWeight.find(edge) != MinimalWeight.end()) {
+          BBWeight -= MinimalWeight[edge];
+          DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n");
+        }
+      }
+    }
+  }
+
+  double fraction = floor(BBWeight/Edges.size());
+  // Finally we know what flow is still not leaving the block, distribute this
+  // flow onto the empty edges.
+  for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
+       ei != ee; ++ei) {
+    if (ei != (ee-1)) {
+      EdgeInformation[BB->getParent()][*ei] += fraction;
+      BBWeight -= fraction;
+    } else {
+      EdgeInformation[BB->getParent()][*ei] += BBWeight;
+    }
+    // Readd minial necessary weight.
+    if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+      EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
+      DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+    }
+    printEdgeWeight(*ei);
+  }
+
+  // This block is visited, mark this before the recursion.
+  BBToVisit.erase(BB);
+
+  // Recurse into successors.
+  for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+       bbi != bbe; ++bbi) {
+    recurseBasicBlock(*bbi);
+  }
+}
+
+bool ProfileEstimatorPass::runOnFunction(Function &F) {
+  if (F.isDeclaration()) return false;
+
+  // Fetch LoopInfo and clear ProfileInfo for this function.
+  LI = &getAnalysis<LoopInfo>();
+  FunctionInformation.erase(&F);
+  BlockInformation[&F].clear();
+  EdgeInformation[&F].clear();
+  BBToVisit.clear();
+
+  // Mark all blocks as to visit.
+  for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi)
+    BBToVisit.insert(bi);
+
+  // Clear Minimal Edges.
+  MinimalWeight.clear();
+
+  DEBUG(dbgs() << "Working on function " << F.getNameStr() << "\n");
+
+  // Since the entry block is the first one and has no predecessors, the edge
+  // (0,entry) is inserted with the starting weight of 1.
+  BasicBlock *entry = &F.getEntryBlock();
+  BlockInformation[&F][entry] = pow(2.0, 32.0);
+  Edge edge = getEdge(0,entry);
+  EdgeInformation[&F][edge] = BlockInformation[&F][entry];
+  printEdgeWeight(edge);
+
+  // Since recurseBasicBlock() maybe returns with a block which was not fully
+  // estimated, use recurseBasicBlock() until everything is calculated.
+  bool cleanup = false;
+  recurseBasicBlock(entry);
+  while (BBToVisit.size() > 0 && !cleanup) {
+    // Remember number of open blocks, this is later used to check if progress
+    // was made.
+    unsigned size = BBToVisit.size();
+
+    // Try to calculate all blocks in turn.
+    for (std::set<BasicBlock*>::iterator bi = BBToVisit.begin(),
+         be = BBToVisit.end(); bi != be; ++bi) {
+      recurseBasicBlock(*bi);
+      // If at least one block was finished, break because iterator may be
+      // invalid.
+      if (BBToVisit.size() < size) break;
+    }
+
+    // If there was not a single block resolved, make some assumptions.
+    if (BBToVisit.size() == size) {
+      bool found = false;
+      for (std::set<BasicBlock*>::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end(); 
+           (BBI != BBE) && (!found); ++BBI) {
+        BasicBlock *BB = *BBI;
+        // Try each predecessor if it can be assumend.
+        for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+             (bbi != bbe) && (!found); ++bbi) {
+          Edge e = getEdge(*bbi,BB);
+          double w = getEdgeWeight(e);
+          // Check that edge from predecessor is still free.
+          if (w == MissingValue) {
+            // Check if there is a circle from this block to predecessor.
+            Path P;
+            const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest);
+            if (Dest != *bbi) {
+              // If there is no circle, just set edge weight to 0
+              EdgeInformation[&F][e] = 0;
+              DEBUG(dbgs() << "Assuming edge weight: ");
+              printEdgeWeight(e);
+              found = true;
+            }
+          }
+        }
+      }
+      if (!found) {
+        cleanup = true;
+        DEBUG(dbgs() << "No assumption possible in Fuction "<<F.getName()<<", setting all to zero\n");
+      }
+    }
+  }
+  // In case there was no safe way to assume edges, set as a last measure, 
+  // set _everything_ to zero.
+  if (cleanup) {
+    FunctionInformation[&F] = 0;
+    BlockInformation[&F].clear();
+    EdgeInformation[&F].clear();
+    for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+      const BasicBlock *BB = &(*FI);
+      BlockInformation[&F][BB] = 0;
+      const_pred_iterator predi = pred_begin(BB), prede = pred_end(BB);
+      if (predi == prede) {
+        Edge e = getEdge(0,BB);
+        setEdgeWeight(e,0);
+      }
+      for (;predi != prede; ++predi) {
+        Edge e = getEdge(*predi,BB);
+        setEdgeWeight(e,0);
+      }
+      succ_const_iterator succi = succ_begin(BB), succe = succ_end(BB);
+      if (succi == succe) {
+        Edge e = getEdge(BB,0);
+        setEdgeWeight(e,0);
+      }
+      for (;succi != succe; ++succi) {
+        Edge e = getEdge(*succi,BB);
+        setEdgeWeight(e,0);
+      }
+    }
+  }
+
+  return false;
+}
diff --git a/final/lib/Analysis/ProfileInfo.cpp b/final/lib/Analysis/ProfileInfo.cpp
new file mode 100644
index 00000000000..36f211e858d
--- /dev/null
+++ b/final/lib/Analysis/ProfileInfo.cpp
@@ -0,0 +1,1105 @@
+//===- ProfileInfo.cpp - Profile Info Interface ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract ProfileInfo interface, and the default
+// "no profile" implementation.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-info"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/SmallSet.h"
+#include <set>
+#include <queue>
+#include <limits>
+using namespace llvm;
+
+namespace llvm {
+  template<> char ProfileInfoT<Function,BasicBlock>::ID = 0;
+}
+
+// Register the ProfileInfo interface, providing a nice name to refer to.
+INITIALIZE_ANALYSIS_GROUP(ProfileInfo, "Profile Information", NoProfileInfo)
+
+namespace llvm {
+
+template <>
+ProfileInfoT<MachineFunction, MachineBasicBlock>::ProfileInfoT() {}
+template <>
+ProfileInfoT<MachineFunction, MachineBasicBlock>::~ProfileInfoT() {}
+
+template <>
+ProfileInfoT<Function, BasicBlock>::ProfileInfoT() {
+  MachineProfile = 0;
+}
+template <>
+ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() {
+  if (MachineProfile) delete MachineProfile;
+}
+
+template<>
+char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0;
+
+template<>
+const double ProfileInfoT<Function,BasicBlock>::MissingValue = -1;
+
+template<> const
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::MissingValue = -1;
+
+template<> double
+ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) {
+  std::map<const Function*, BlockCounts>::iterator J =
+    BlockInformation.find(BB->getParent());
+  if (J != BlockInformation.end()) {
+    BlockCounts::iterator I = J->second.find(BB);
+    if (I != J->second.end())
+      return I->second;
+  }
+
+  double Count = MissingValue;
+
+  const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+  // Are there zero predecessors of this block?
+  if (PI == PE) {
+    Edge e = getEdge(0, BB);
+    Count = getEdgeWeight(e);
+  } else {
+    // Otherwise, if there are predecessors, the execution count of this block is
+    // the sum of the edge frequencies from the incoming edges.
+    std::set<const BasicBlock*> ProcessedPreds;
+    Count = 0;
+    for (; PI != PE; ++PI) {
+      const BasicBlock *P = *PI;
+      if (ProcessedPreds.insert(P).second) {
+        double w = getEdgeWeight(getEdge(P, BB));
+        if (w == MissingValue) {
+          Count = MissingValue;
+          break;
+        }
+        Count += w;
+      }
+    }
+  }
+
+  // If the predecessors did not suffice to get block weight, try successors.
+  if (Count == MissingValue) {
+
+    succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
+
+    // Are there zero successors of this block?
+    if (SI == SE) {
+      Edge e = getEdge(BB,0);
+      Count = getEdgeWeight(e);
+    } else {
+      std::set<const BasicBlock*> ProcessedSuccs;
+      Count = 0;
+      for (; SI != SE; ++SI)
+        if (ProcessedSuccs.insert(*SI).second) {
+          double w = getEdgeWeight(getEdge(BB, *SI));
+          if (w == MissingValue) {
+            Count = MissingValue;
+            break;
+          }
+          Count += w;
+        }
+    }
+  }
+
+  if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count;
+  return Count;
+}
+
+template<>
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::
+        getExecutionCount(const MachineBasicBlock *MBB) {
+  std::map<const MachineFunction*, BlockCounts>::iterator J =
+    BlockInformation.find(MBB->getParent());
+  if (J != BlockInformation.end()) {
+    BlockCounts::iterator I = J->second.find(MBB);
+    if (I != J->second.end())
+      return I->second;
+  }
+
+  return MissingValue;
+}
+
+template<>
+double ProfileInfoT<Function,BasicBlock>::getExecutionCount(const Function *F) {
+  std::map<const Function*, double>::iterator J =
+    FunctionInformation.find(F);
+  if (J != FunctionInformation.end())
+    return J->second;
+
+  // isDeclaration() is checked here and not at start of function to allow
+  // functions without a body still to have a execution count.
+  if (F->isDeclaration()) return MissingValue;
+
+  double Count = getExecutionCount(&F->getEntryBlock());
+  if (Count != MissingValue) FunctionInformation[F] = Count;
+  return Count;
+}
+
+template<>
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::
+        getExecutionCount(const MachineFunction *MF) {
+  std::map<const MachineFunction*, double>::iterator J =
+    FunctionInformation.find(MF);
+  if (J != FunctionInformation.end())
+    return J->second;
+
+  double Count = getExecutionCount(&MF->front());
+  if (Count != MissingValue) FunctionInformation[MF] = Count;
+  return Count;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+        setExecutionCount(const BasicBlock *BB, double w) {
+  DEBUG(dbgs() << "Creating Block " << BB->getName() 
+               << " (weight: " << format("%.20g",w) << ")\n");
+  BlockInformation[BB->getParent()][BB] = w;
+}
+
+template<>
+void ProfileInfoT<MachineFunction, MachineBasicBlock>::
+        setExecutionCount(const MachineBasicBlock *MBB, double w) {
+  DEBUG(dbgs() << "Creating Block " << MBB->getBasicBlock()->getName()
+               << " (weight: " << format("%.20g",w) << ")\n");
+  BlockInformation[MBB->getParent()][MBB] = w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::addEdgeWeight(Edge e, double w) {
+  double oldw = getEdgeWeight(e);
+  assert (oldw != MissingValue && "Adding weight to Edge with no previous weight");
+  DEBUG(dbgs() << "Adding to Edge " << e
+               << " (new weight: " << format("%.20g",oldw + w) << ")\n");
+  EdgeInformation[getFunction(e)][e] = oldw + w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+        addExecutionCount(const BasicBlock *BB, double w) {
+  double oldw = getExecutionCount(BB);
+  assert (oldw != MissingValue && "Adding weight to Block with no previous weight");
+  DEBUG(dbgs() << "Adding to Block " << BB->getName()
+               << " (new weight: " << format("%.20g",oldw + w) << ")\n");
+  BlockInformation[BB->getParent()][BB] = oldw + w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::removeBlock(const BasicBlock *BB) {
+  std::map<const Function*, BlockCounts>::iterator J =
+    BlockInformation.find(BB->getParent());
+  if (J == BlockInformation.end()) return;
+
+  DEBUG(dbgs() << "Deleting " << BB->getName() << "\n");
+  J->second.erase(BB);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::removeEdge(Edge e) {
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(getFunction(e));
+  if (J == EdgeInformation.end()) return;
+
+  DEBUG(dbgs() << "Deleting" << e << "\n");
+  J->second.erase(e);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+        replaceEdge(const Edge &oldedge, const Edge &newedge) {
+  double w;
+  if ((w = getEdgeWeight(newedge)) == MissingValue) {
+    w = getEdgeWeight(oldedge);
+    DEBUG(dbgs() << "Replacing " << oldedge << " with " << newedge  << "\n");
+  } else {
+    w += getEdgeWeight(oldedge);
+    DEBUG(dbgs() << "Adding " << oldedge << " to " << newedge  << "\n");
+  }
+  setEdgeWeight(newedge,w);
+  removeEdge(oldedge);
+}
+
+template<>
+const BasicBlock *ProfileInfoT<Function,BasicBlock>::
+        GetPath(const BasicBlock *Src, const BasicBlock *Dest,
+                Path &P, unsigned Mode) {
+  const BasicBlock *BB = 0;
+  bool hasFoundPath = false;
+
+  std::queue<const BasicBlock *> BFS;
+  BFS.push(Src);
+
+  while(BFS.size() && !hasFoundPath) {
+    BB = BFS.front();
+    BFS.pop();
+
+    succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
+    if (Succ == End) {
+      P[0] = BB;
+      if (Mode & GetPathToExit) {
+        hasFoundPath = true;
+        BB = 0;
+      }
+    }
+    for(;Succ != End; ++Succ) {
+      if (P.find(*Succ) != P.end()) continue;
+      Edge e = getEdge(BB,*Succ);
+      if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue;
+      P[*Succ] = BB;
+      BFS.push(*Succ);
+      if ((Mode & GetPathToDest) && *Succ == Dest) {
+        hasFoundPath = true;
+        BB = *Succ;
+        break;
+      }
+      if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) {
+        hasFoundPath = true;
+        BB = *Succ;
+        break;
+      }
+    }
+  }
+
+  return BB;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+        divertFlow(const Edge &oldedge, const Edge &newedge) {
+  DEBUG(dbgs() << "Diverting " << oldedge << " via " << newedge );
+
+  // First check if the old edge was taken, if not, just delete it...
+  if (getEdgeWeight(oldedge) == 0) {
+    removeEdge(oldedge);
+    return;
+  }
+
+  Path P;
+  P[newedge.first] = 0;
+  P[newedge.second] = newedge.first;
+  const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest);
+
+  double w = getEdgeWeight (oldedge);
+  DEBUG(dbgs() << ", Weight: " << format("%.20g",w) << "\n");
+  do {
+    const BasicBlock *Parent = P.find(BB)->second;
+    Edge e = getEdge(Parent,BB);
+    double oldw = getEdgeWeight(e);
+    double oldc = getExecutionCount(e.first);
+    setEdgeWeight(e, w+oldw);
+    if (Parent != oldedge.first) {
+      setExecutionCount(e.first, w+oldc);
+    }
+    BB = Parent;
+  } while (BB != newedge.first);
+  removeEdge(oldedge);
+}
+
+/// Replaces all occurences of RmBB in the ProfilingInfo with DestBB.
+/// This checks all edges of the function the blocks reside in and replaces the
+/// occurences of RmBB with DestBB.
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+        replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) {
+  DEBUG(dbgs() << "Replacing " << RmBB->getName()
+               << " with " << DestBB->getName() << "\n");
+  const Function *F = DestBB->getParent();
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(F);
+  if (J == EdgeInformation.end()) return;
+
+  Edge e, newedge;
+  bool erasededge = false;
+  EdgeWeights::iterator I = J->second.begin(), E = J->second.end();
+  while(I != E) {
+    e = (I++)->first;
+    bool foundedge = false; bool eraseedge = false;
+    if (e.first == RmBB) {
+      if (e.second == DestBB) {
+        eraseedge = true;
+      } else {
+        newedge = getEdge(DestBB, e.second);
+        foundedge = true;
+      }
+    }
+    if (e.second == RmBB) {
+      if (e.first == DestBB) {
+        eraseedge = true;
+      } else {
+        newedge = getEdge(e.first, DestBB);
+        foundedge = true;
+      }
+    }
+    if (foundedge) {
+      replaceEdge(e, newedge);
+    }
+    if (eraseedge) {
+      if (erasededge) {
+        Edge newedge = getEdge(DestBB, DestBB);
+        replaceEdge(e, newedge);
+      } else {
+        removeEdge(e);
+        erasededge = true;
+      }
+    }
+  }
+}
+
+/// Splits an edge in the ProfileInfo and redirects flow over NewBB.
+/// Since its possible that there is more than one edge in the CFG from FristBB
+/// to SecondBB its necessary to redirect the flow proporionally.
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitEdge(const BasicBlock *FirstBB,
+                                                  const BasicBlock *SecondBB,
+                                                  const BasicBlock *NewBB,
+                                                  bool MergeIdenticalEdges) {
+  const Function *F = FirstBB->getParent();
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(F);
+  if (J == EdgeInformation.end()) return;
+
+  // Generate edges and read current weight.
+  Edge e  = getEdge(FirstBB, SecondBB);
+  Edge n1 = getEdge(FirstBB, NewBB);
+  Edge n2 = getEdge(NewBB, SecondBB);
+  EdgeWeights &ECs = J->second;
+  double w = ECs[e];
+
+  int succ_count = 0;
+  if (!MergeIdenticalEdges) {
+    // First count the edges from FristBB to SecondBB, if there is more than
+    // one, only slice out a proporional part for NewBB.
+    for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB);
+        BBI != BBE; ++BBI) {
+      if (*BBI == SecondBB) succ_count++;  
+    }
+    // When the NewBB is completely new, increment the count by one so that
+    // the counts are properly distributed.
+    if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++;
+  } else {
+    // When the edges are merged anyway, then redirect all flow.
+    succ_count = 1;
+  }
+
+  // We know now how many edges there are from FirstBB to SecondBB, reroute a
+  // proportional part of the edge weight over NewBB.
+  double neww = floor(w / succ_count);
+  ECs[n1] += neww;
+  ECs[n2] += neww;
+  BlockInformation[F][NewBB] += neww;
+  if (succ_count == 1) {
+    ECs.erase(e);
+  } else {
+    ECs[e] -= neww;
+  }
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *Old,
+                                                   const BasicBlock* New) {
+  const Function *F = Old->getParent();
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(F);
+  if (J == EdgeInformation.end()) return;
+
+  DEBUG(dbgs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n");
+
+  std::set<Edge> Edges;
+  for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end(); 
+       ewi != ewe; ++ewi) {
+    Edge old = ewi->first;
+    if (old.first == Old) {
+      Edges.insert(old);
+    }
+  }
+  for (std::set<Edge>::iterator EI = Edges.begin(), EE = Edges.end(); 
+       EI != EE; ++EI) {
+    Edge newedge = getEdge(New, EI->second);
+    replaceEdge(*EI, newedge);
+  }
+
+  double w = getExecutionCount(Old);
+  setEdgeWeight(getEdge(Old, New), w);
+  setExecutionCount(New, w);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *BB,
+                                                   const BasicBlock* NewBB,
+                                                   BasicBlock *const *Preds,
+                                                   unsigned NumPreds) {
+  const Function *F = BB->getParent();
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(F);
+  if (J == EdgeInformation.end()) return;
+
+  DEBUG(dbgs() << "Splitting " << NumPreds << " Edges from " << BB->getName() 
+               << " to " << NewBB->getName() << "\n");
+
+  // Collect weight that was redirected over NewBB.
+  double newweight = 0;
+  
+  std::set<const BasicBlock *> ProcessedPreds;
+  // For all requestes Predecessors.
+  for (unsigned pred = 0; pred < NumPreds; ++pred) {
+    const BasicBlock * Pred = Preds[pred];
+    if (ProcessedPreds.insert(Pred).second) {
+      // Create edges and read old weight.
+      Edge oldedge = getEdge(Pred, BB);
+      Edge newedge = getEdge(Pred, NewBB);
+
+      // Remember how much weight was redirected.
+      newweight += getEdgeWeight(oldedge);
+    
+      replaceEdge(oldedge,newedge);
+    }
+  }
+
+  Edge newedge = getEdge(NewBB,BB);
+  setEdgeWeight(newedge, newweight);
+  setExecutionCount(NewBB, newweight);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::transfer(const Function *Old,
+                                                 const Function *New) {
+  DEBUG(dbgs() << "Replacing Function " << Old->getName() << " with "
+               << New->getName() << "\n");
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(Old);
+  if(J != EdgeInformation.end()) {
+    EdgeInformation[New] = J->second;
+  }
+  EdgeInformation.erase(Old);
+  BlockInformation.erase(Old);
+  FunctionInformation.erase(Old);
+}
+
+static double readEdgeOrRemember(ProfileInfo::Edge edge, double w,
+                                 ProfileInfo::Edge &tocalc, unsigned &uncalc) {
+  if (w == ProfileInfo::MissingValue) {
+    tocalc = edge;
+    uncalc++;
+    return 0;
+  } else {
+    return w;
+  }
+}
+
+template<>
+bool ProfileInfoT<Function,BasicBlock>::
+        CalculateMissingEdge(const BasicBlock *BB, Edge &removed,
+                             bool assumeEmptySelf) {
+  Edge edgetocalc;
+  unsigned uncalculated = 0;
+
+  // collect weights of all incoming and outgoing edges, rememer edges that
+  // have no value
+  double incount = 0;
+  SmallSet<const BasicBlock*,8> pred_visited;
+  const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+  if (bbi==bbe) {
+    Edge e = getEdge(0,BB);
+    incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
+  }
+  for (;bbi != bbe; ++bbi) {
+    if (pred_visited.insert(*bbi)) {
+      Edge e = getEdge(*bbi,BB);
+      incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
+    }
+  }
+
+  double outcount = 0;
+  SmallSet<const BasicBlock*,8> succ_visited;
+  succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
+  if (sbbi==sbbe) {
+    Edge e = getEdge(BB,0);
+    if (getEdgeWeight(e) == MissingValue) {
+      double w = getExecutionCount(BB);
+      if (w != MissingValue) {
+        setEdgeWeight(e,w);
+        removed = e;
+      }
+    }
+    outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
+  }
+  for (;sbbi != sbbe; ++sbbi) {
+    if (succ_visited.insert(*sbbi)) {
+      Edge e = getEdge(BB,*sbbi);
+      outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
+    }
+  }
+
+  // if exactly one edge weight was missing, calculate it and remove it from
+  // spanning tree
+  if (uncalculated == 0 ) {
+    return true;
+  } else
+  if (uncalculated == 1) {
+    if (incount < outcount) {
+      EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount;
+    } else {
+      EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount;
+    }
+    DEBUG(dbgs() << "--Calc Edge Counter for " << edgetocalc << ": "
+                 << format("%.20g", getEdgeWeight(edgetocalc)) << "\n");
+    removed = edgetocalc;
+    return true;
+  } else 
+  if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) {
+    setEdgeWeight(edgetocalc, incount * 10);
+    removed = edgetocalc;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set<ProfileInfo::Edge> &misscount) {
+  double w = PI->getEdgeWeight(e);
+  if (w != ProfileInfo::MissingValue) {
+    calcw += w;
+  } else {
+    misscount.insert(e);
+  }
+}
+
+template<>
+bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) {
+  double inWeight = 0;
+  std::set<Edge> inMissing;
+  std::set<const BasicBlock*> ProcessedPreds;
+  const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+  if (bbi == bbe) {
+    readEdge(this,getEdge(0,BB),inWeight,inMissing);
+  }
+  for( ; bbi != bbe; ++bbi ) {
+    if (ProcessedPreds.insert(*bbi).second) {
+      readEdge(this,getEdge(*bbi,BB),inWeight,inMissing);
+    }
+  }
+
+  double outWeight = 0;
+  std::set<Edge> outMissing;
+  std::set<const BasicBlock*> ProcessedSuccs;
+  succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
+  if (sbbi == sbbe)
+    readEdge(this,getEdge(BB,0),outWeight,outMissing);
+  for ( ; sbbi != sbbe; ++sbbi ) {
+    if (ProcessedSuccs.insert(*sbbi).second) {
+      readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing);
+    }
+  }
+
+  double share;
+  std::set<Edge>::iterator ei,ee;
+  if (inMissing.size() == 0 && outMissing.size() > 0) {
+    ei = outMissing.begin();
+    ee = outMissing.end();
+    share = inWeight/outMissing.size();
+    setExecutionCount(BB,inWeight);
+  } else
+  if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) {
+    ei = inMissing.begin();
+    ee = inMissing.end();
+    share = 0;
+    setExecutionCount(BB,0);
+  } else
+  if (inMissing.size() == 0 && outMissing.size() == 0) {
+    setExecutionCount(BB,outWeight);
+    return true;
+  } else {
+    return false;
+  }
+  for ( ; ei != ee; ++ei ) {
+    setEdgeWeight(*ei,share);
+  }
+  return true;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
+//  if (getExecutionCount(&(F->getEntryBlock())) == 0) {
+//    for (Function::const_iterator FI = F->begin(), FE = F->end();
+//         FI != FE; ++FI) {
+//      const BasicBlock* BB = &(*FI);
+//      {
+//        const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+//        if (NBB == End) {
+//          setEdgeWeight(getEdge(0,BB),0);
+//        }
+//        for(;NBB != End; ++NBB) {
+//          setEdgeWeight(getEdge(*NBB,BB),0);
+//        }
+//      }
+//      {
+//        succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+//        if (NBB == End) {
+//          setEdgeWeight(getEdge(0,BB),0);
+//        }
+//        for(;NBB != End; ++NBB) {
+//          setEdgeWeight(getEdge(*NBB,BB),0);
+//        }
+//      }
+//    }
+//    return;
+//  }
+  // The set of BasicBlocks that are still unvisited.
+  std::set<const BasicBlock*> Unvisited;
+
+  // The set of return edges (Edges with no successors).
+  std::set<Edge> ReturnEdges;
+  double ReturnWeight = 0;
+  
+  // First iterate over the whole function and collect:
+  // 1) The blocks in this function in the Unvisited set.
+  // 2) The return edges in the ReturnEdges set.
+  // 3) The flow that is leaving the function already via return edges.
+
+  // Data structure for searching the function.
+  std::queue<const BasicBlock *> BFS;
+  const BasicBlock *BB = &(F->getEntryBlock());
+  BFS.push(BB);
+  Unvisited.insert(BB);
+
+  while (BFS.size()) {
+    BB = BFS.front(); BFS.pop();
+    succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+    if (NBB == End) {
+      Edge e = getEdge(BB,0);
+      double w = getEdgeWeight(e);
+      if (w == MissingValue) {
+        // If the return edge has no value, try to read value from block.
+        double bw = getExecutionCount(BB);
+        if (bw != MissingValue) {
+          setEdgeWeight(e,bw);
+          ReturnWeight += bw;
+        } else {
+          // If both return edge and block provide no value, collect edge.
+          ReturnEdges.insert(e);
+        }
+      } else {
+        // If the return edge has a proper value, collect it.
+        ReturnWeight += w;
+      }
+    }
+    for (;NBB != End; ++NBB) {
+      if (Unvisited.insert(*NBB).second) {
+        BFS.push(*NBB);
+      }
+    }
+  }
+
+  while (Unvisited.size() > 0) {
+    unsigned oldUnvisitedCount = Unvisited.size();
+    bool FoundPath = false;
+
+    // If there is only one edge left, calculate it.
+    if (ReturnEdges.size() == 1) {
+      ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight;
+
+      Edge e = *ReturnEdges.begin();
+      setEdgeWeight(e,ReturnWeight);
+      setExecutionCount(e.first,ReturnWeight);
+
+      Unvisited.erase(e.first);
+      ReturnEdges.erase(e);
+      continue;
+    }
+
+    // Calculate all blocks where only one edge is missing, this may also
+    // resolve furhter return edges.
+    std::set<const BasicBlock *>::iterator FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE) {
+      const BasicBlock *BB = *FI; ++FI;
+      Edge e;
+      if(CalculateMissingEdge(BB,e,true)) {
+        if (BlockInformation[F].find(BB) == BlockInformation[F].end()) {
+          setExecutionCount(BB,getExecutionCount(BB));
+        }
+        Unvisited.erase(BB);
+        if (e.first != 0 && e.second == 0) {
+          ReturnEdges.erase(e);
+          ReturnWeight += getEdgeWeight(e);
+        }
+      }
+    }
+    if (oldUnvisitedCount > Unvisited.size()) continue;
+
+    // Estimate edge weights by dividing the flow proportionally.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE) {
+      const BasicBlock *BB = *FI; ++FI;
+      const BasicBlock *Dest = 0;
+      bool AllEdgesHaveSameReturn = true;
+      // Check each Successor, these must all end up in the same or an empty
+      // return block otherwise its dangerous to do an estimation on them.
+      for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
+           Succ != End; ++Succ) {
+        Path P;
+        GetPath(*Succ, 0, P, GetPathToExit);
+        if (Dest && Dest != P[0]) {
+          AllEdgesHaveSameReturn = false;
+        }
+        Dest = P[0];
+      }
+      if (AllEdgesHaveSameReturn) {
+        if(EstimateMissingEdges(BB)) {
+          Unvisited.erase(BB);
+          break;
+        }
+      }
+    }
+    if (oldUnvisitedCount > Unvisited.size()) continue;
+
+    // Check if there is a path to an block that has a known value and redirect
+    // flow accordingly.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE && !FoundPath) {
+      // Fetch path.
+      const BasicBlock *BB = *FI; ++FI;
+      Path P;
+      const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue);
+
+      // Calculate incoming flow.
+      double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0;
+      std::set<const BasicBlock *> Processed;
+      for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+           NBB != End; ++NBB) {
+        if (Processed.insert(*NBB).second) {
+          Edge e = getEdge(*NBB, BB);
+          double ew = getEdgeWeight(e);
+          if (ew != MissingValue) {
+            iw += ew;
+            invalid++;
+          } else {
+            // If the path contains the successor, this means its a backedge,
+            // do not count as missing.
+            if (P.find(*NBB) == P.end())
+              inmissing++;
+          }
+          incount++;
+        }
+      }
+      if (inmissing == incount) continue;
+      if (invalid == 0) continue;
+
+      // Subtract (already) outgoing flow.
+      Processed.clear();
+      for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+           NBB != End; ++NBB) {
+        if (Processed.insert(*NBB).second) {
+          Edge e = getEdge(BB, *NBB);
+          double ew = getEdgeWeight(e);
+          if (ew != MissingValue) {
+            iw -= ew;
+          }
+        }
+      }
+      if (iw < 0) continue;
+
+      // Check the recieving end of the path if it can handle the flow.
+      double ow = getExecutionCount(Dest);
+      Processed.clear();
+      for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+           NBB != End; ++NBB) {
+        if (Processed.insert(*NBB).second) {
+          Edge e = getEdge(BB, *NBB);
+          double ew = getEdgeWeight(e);
+          if (ew != MissingValue) {
+            ow -= ew;
+          }
+        }
+      }
+      if (ow < 0) continue;
+
+      // Determine how much flow shall be used.
+      double ew = getEdgeWeight(getEdge(P[Dest],Dest));
+      if (ew != MissingValue) {
+        ew = ew<ow?ew:ow;
+        ew = ew<iw?ew:iw;
+      } else {
+        if (inmissing == 0)
+          ew = iw;
+      }
+
+      // Create flow.
+      if (ew != MissingValue) {
+        do {
+          Edge e = getEdge(P[Dest],Dest);
+          if (getEdgeWeight(e) == MissingValue) {
+            setEdgeWeight(e,ew);
+            FoundPath = true;
+          }
+          Dest = P[Dest];
+        } while (Dest != BB);
+      }
+    }
+    if (FoundPath) continue;
+
+    // Calculate a block with self loop.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE && !FoundPath) {
+      const BasicBlock *BB = *FI; ++FI;
+      bool SelfEdgeFound = false;
+      for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+           NBB != End; ++NBB) {
+        if (*NBB == BB) {
+          SelfEdgeFound = true;
+          break;
+        }
+      }
+      if (SelfEdgeFound) {
+        Edge e = getEdge(BB,BB);
+        if (getEdgeWeight(e) == MissingValue) {
+          double iw = 0;
+          std::set<const BasicBlock *> Processed;
+          for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+               NBB != End; ++NBB) {
+            if (Processed.insert(*NBB).second) {
+              Edge e = getEdge(*NBB, BB);
+              double ew = getEdgeWeight(e);
+              if (ew != MissingValue) {
+                iw += ew;
+              }
+            }
+          }
+          setEdgeWeight(e,iw * 10);
+          FoundPath = true;
+        }
+      }
+    }
+    if (FoundPath) continue;
+
+    // Determine backedges, set them to zero.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE && !FoundPath) {
+      const BasicBlock *BB = *FI; ++FI;
+      const BasicBlock *Dest = 0;
+      Path P;
+      bool BackEdgeFound = false;
+      for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+           NBB != End; ++NBB) {
+        Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges);
+        if (Dest == *NBB) {
+          BackEdgeFound = true;
+          break;
+        }
+      }
+      if (BackEdgeFound) {
+        Edge e = getEdge(Dest,BB);
+        double w = getEdgeWeight(e);
+        if (w == MissingValue) {
+          setEdgeWeight(e,0);
+          FoundPath = true;
+        }
+        do {
+          Edge e = getEdge(P[Dest], Dest);
+          double w = getEdgeWeight(e);
+          if (w == MissingValue) {
+            setEdgeWeight(e,0);
+            FoundPath = true;
+          }
+          Dest = P[Dest];
+        } while (Dest != BB);
+      }
+    }
+    if (FoundPath) continue;
+
+    // Channel flow to return block.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE && !FoundPath) {
+      const BasicBlock *BB = *FI; ++FI;
+
+      Path P;
+      const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges);
+      Dest = P[0];
+      if (!Dest) continue;
+
+      if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) {
+        // Calculate incoming flow.
+        double iw = 0;
+        std::set<const BasicBlock *> Processed;
+        for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+             NBB != End; ++NBB) {
+          if (Processed.insert(*NBB).second) {
+            Edge e = getEdge(*NBB, BB);
+            double ew = getEdgeWeight(e);
+            if (ew != MissingValue) {
+              iw += ew;
+            }
+          }
+        }
+        do {
+          Edge e = getEdge(P[Dest], Dest);
+          double w = getEdgeWeight(e);
+          if (w == MissingValue) {
+            setEdgeWeight(e,iw);
+            FoundPath = true;
+          } else {
+            assert(0 && "Edge should not have value already!");
+          }
+          Dest = P[Dest];
+        } while (Dest != BB);
+      }
+    }
+    if (FoundPath) continue;
+
+    // Speculatively set edges to zero.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE && !FoundPath) {
+      const BasicBlock *BB = *FI; ++FI;
+
+      for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
+           NBB != End; ++NBB) {
+        Edge e = getEdge(*NBB,BB);
+        double w = getEdgeWeight(e);
+        if (w == MissingValue) {
+          setEdgeWeight(e,0);
+          FoundPath = true;
+          break;
+        }
+      }
+    }
+    if (FoundPath) continue;
+
+    errs() << "{";
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE) {
+      const BasicBlock *BB = *FI; ++FI;
+      dbgs() << BB->getName();
+      if (FI != FE)
+        dbgs() << ",";
+    }
+    errs() << "}";
+
+    errs() << "ASSERT: could not repair function";
+    assert(0 && "could not repair function");
+  }
+
+  EdgeWeights J = EdgeInformation[F];
+  for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) {
+    Edge e = EI->first;
+
+    bool SuccFound = false;
+    if (e.first != 0) {
+      succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first);
+      if (NBB == End) {
+        if (0 == e.second) {
+          SuccFound = true;
+        }
+      }
+      for (;NBB != End; ++NBB) {
+        if (*NBB == e.second) {
+          SuccFound = true;
+          break;
+        }
+      }
+      if (!SuccFound) {
+        removeEdge(e);
+      }
+    }
+  }
+}
+
+raw_ostream& operator<<(raw_ostream &O, const Function *F) {
+  return O << F->getName();
+}
+
+raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) {
+  return O << MF->getFunction()->getName() << "(MF)";
+}
+
+raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) {
+  return O << BB->getName();
+}
+
+raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) {
+  return O << MBB->getBasicBlock()->getName() << "(MB)";
+}
+
+raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E) {
+  O << "(";
+
+  if (E.first)
+    O << E.first;
+  else
+    O << "0";
+
+  O << ",";
+
+  if (E.second)
+    O << E.second;
+  else
+    O << "0";
+
+  return O << ")";
+}
+
+raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) {
+  O << "(";
+
+  if (E.first)
+    O << E.first;
+  else
+    O << "0";
+
+  O << ",";
+
+  if (E.second)
+    O << E.second;
+  else
+    O << "0";
+
+  return O << ")";
+}
+
+} // namespace llvm
+
+//===----------------------------------------------------------------------===//
+//  NoProfile ProfileInfo implementation
+//
+
+namespace {
+  struct NoProfileInfo : public ImmutablePass, public ProfileInfo {
+    static char ID; // Class identification, replacement for typeinfo
+    NoProfileInfo() : ImmutablePass(ID) {
+      initializeNoProfileInfoPass(*PassRegistry::getPassRegistry());
+    }
+    
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &ProfileInfo::ID)
+        return (ProfileInfo*)this;
+      return this;
+    }
+    
+    virtual const char *getPassName() const {
+      return "NoProfileInfo";
+    }
+  };
+}  // End of anonymous namespace
+
+char NoProfileInfo::ID = 0;
+// Register this pass...
+INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile",
+                   "No Profile Information", false, true, true)
+
+ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); }
diff --git a/final/lib/Analysis/ProfileInfoLoader.cpp b/final/lib/Analysis/ProfileInfoLoader.cpp
new file mode 100644
index 00000000000..25481b2ee67
--- /dev/null
+++ b/final/lib/Analysis/ProfileInfoLoader.cpp
@@ -0,0 +1,158 @@
+//===- ProfileInfoLoad.cpp - Load profile information from disk -----------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The ProfileInfoLoader class is used to load and represent profiling
+// information read in from the dump file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include "llvm/Module.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdio>
+#include <cstdlib>
+#include <map>
+using namespace llvm;
+
+// ByteSwap - Byteswap 'Var' if 'Really' is true.
+//
+static inline unsigned ByteSwap(unsigned Var, bool Really) {
+  if (!Really) return Var;
+  return ((Var & (255U<< 0U)) << 24U) |
+         ((Var & (255U<< 8U)) <<  8U) |
+         ((Var & (255U<<16U)) >>  8U) |
+         ((Var & (255U<<24U)) >> 24U);
+}
+
+static unsigned AddCounts(unsigned A, unsigned B) {
+  // If either value is undefined, use the other.
+  if (A == ProfileInfoLoader::Uncounted) return B;
+  if (B == ProfileInfoLoader::Uncounted) return A;
+  return A + B;
+}
+
+static void ReadProfilingBlock(const char *ToolName, FILE *F,
+                               bool ShouldByteSwap,
+                               std::vector<unsigned> &Data) {
+  // Read the number of entries...
+  unsigned NumEntries;
+  if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) {
+    errs() << ToolName << ": data packet truncated!\n";
+    perror(0);
+    exit(1);
+  }
+  NumEntries = ByteSwap(NumEntries, ShouldByteSwap);
+
+  // Read the counts...
+  std::vector<unsigned> TempSpace(NumEntries);
+
+  // Read in the block of data...
+  if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) {
+    errs() << ToolName << ": data packet truncated!\n";
+    perror(0);
+    exit(1);
+  }
+
+  // Make sure we have enough space... The space is initialised to -1 to
+  // facitiltate the loading of missing values for OptimalEdgeProfiling.
+  if (Data.size() < NumEntries)
+    Data.resize(NumEntries, ProfileInfoLoader::Uncounted);
+
+  // Accumulate the data we just read into the data.
+  if (!ShouldByteSwap) {
+    for (unsigned i = 0; i != NumEntries; ++i) {
+      Data[i] = AddCounts(TempSpace[i], Data[i]);
+    }
+  } else {
+    for (unsigned i = 0; i != NumEntries; ++i) {
+      Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]);
+    }
+  }
+}
+
+const unsigned ProfileInfoLoader::Uncounted = ~0U;
+
+// ProfileInfoLoader ctor - Read the specified profiling data file, exiting the
+// program if the file is invalid or broken.
+//
+ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
+                                     const std::string &Filename,
+                                     Module &TheModule) :
+                                     Filename(Filename), 
+                                     M(TheModule), Warned(false) {
+  FILE *F = fopen(Filename.c_str(), "rb");
+  if (F == 0) {
+    errs() << ToolName << ": Error opening '" << Filename << "': ";
+    perror(0);
+    exit(1);
+  }
+
+  // Keep reading packets until we run out of them.
+  unsigned PacketType;
+  while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) {
+    // If the low eight bits of the packet are zero, we must be dealing with an
+    // endianness mismatch.  Byteswap all words read from the profiling
+    // information.
+    bool ShouldByteSwap = (char)PacketType == 0;
+    PacketType = ByteSwap(PacketType, ShouldByteSwap);
+
+    switch (PacketType) {
+    case ArgumentInfo: {
+      unsigned ArgLength;
+      if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) {
+        errs() << ToolName << ": arguments packet truncated!\n";
+        perror(0);
+        exit(1);
+      }
+      ArgLength = ByteSwap(ArgLength, ShouldByteSwap);
+
+      // Read in the arguments...
+      std::vector<char> Chars(ArgLength+4);
+
+      if (ArgLength)
+        if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) {
+          errs() << ToolName << ": arguments packet truncated!\n";
+          perror(0);
+          exit(1);
+        }
+      CommandLines.push_back(std::string(&Chars[0], &Chars[ArgLength]));
+      break;
+    }
+
+    case FunctionInfo:
+      ReadProfilingBlock(ToolName, F, ShouldByteSwap, FunctionCounts);
+      break;
+
+    case BlockInfo:
+      ReadProfilingBlock(ToolName, F, ShouldByteSwap, BlockCounts);
+      break;
+
+    case EdgeInfo:
+      ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts);
+      break;
+
+    case OptEdgeInfo:
+      ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts);
+      break;
+
+    case BBTraceInfo:
+      ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace);
+      break;
+
+    default:
+      errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n";
+      exit(1);
+    }
+  }
+
+  fclose(F);
+}
+
diff --git a/final/lib/Analysis/ProfileInfoLoaderPass.cpp b/final/lib/Analysis/ProfileInfoLoaderPass.cpp
new file mode 100644
index 00000000000..098079bcffc
--- /dev/null
+++ b/final/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -0,0 +1,267 @@
+//===- ProfileInfoLoaderPass.cpp - LLVM Pass to load profile info ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a concrete implementation of profiling information that
+// loads the information from a profile dump file.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-loader"
+#include "llvm/BasicBlock.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallSet.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumEdgesRead, "The # of edges read.");
+
+static cl::opt<std::string>
+ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"),
+                    cl::value_desc("filename"),
+                    cl::desc("Profile file loaded by -profile-loader"));
+
+namespace {
+  class LoaderPass : public ModulePass, public ProfileInfo {
+    std::string Filename;
+    std::set<Edge> SpanningTree;
+    std::set<const BasicBlock*> BBisUnvisited;
+    unsigned ReadCount;
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    explicit LoaderPass(const std::string &filename = "")
+      : ModulePass(ID), Filename(filename) {
+      initializeLoaderPassPass(*PassRegistry::getPassRegistry());
+      if (filename.empty()) Filename = ProfileInfoFilename;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+
+    virtual const char *getPassName() const {
+      return "Profiling information loader";
+    }
+
+    // recurseBasicBlock() - Calculates the edge weights for as much basic
+    // blocks as possbile.
+    virtual void recurseBasicBlock(const BasicBlock *BB);
+    virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, double &);
+    virtual void readEdge(ProfileInfo::Edge, std::vector<unsigned>&);
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &ProfileInfo::ID)
+        return (ProfileInfo*)this;
+      return this;
+    }
+    
+    /// run - Load the profile information from the specified file.
+    virtual bool runOnModule(Module &M);
+  };
+}  // End of anonymous namespace
+
+char LoaderPass::ID = 0;
+INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader",
+              "Load profile information from llvmprof.out", false, true, false)
+
+char &llvm::ProfileLoaderPassID = LoaderPass::ID;
+
+ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); }
+
+/// createProfileLoaderPass - This function returns a Pass that loads the
+/// profiling information for the module from the specified filename, making it
+/// available to the optimizers.
+Pass *llvm::createProfileLoaderPass(const std::string &Filename) {
+  return new LoaderPass(Filename);
+}
+
+void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc, 
+                                    unsigned &uncalc, double &count) {
+  double w;
+  if ((w = getEdgeWeight(edge)) == MissingValue) {
+    tocalc = edge;
+    uncalc++;
+  } else {
+    count+=w;
+  }
+}
+
+// recurseBasicBlock - Visits all neighbours of a block and then tries to
+// calculate the missing edge values.
+void LoaderPass::recurseBasicBlock(const BasicBlock *BB) {
+
+  // break recursion if already visited
+  if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return;
+  BBisUnvisited.erase(BB);
+  if (!BB) return;
+
+  for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+       bbi != bbe; ++bbi) {
+    recurseBasicBlock(*bbi);
+  }
+  for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+       bbi != bbe; ++bbi) {
+    recurseBasicBlock(*bbi);
+  }
+
+  Edge tocalc;
+  if (CalculateMissingEdge(BB, tocalc)) {
+    SpanningTree.erase(tocalc);
+  }
+}
+
+void LoaderPass::readEdge(ProfileInfo::Edge e,
+                          std::vector<unsigned> &ECs) {
+  if (ReadCount < ECs.size()) {
+    double weight = ECs[ReadCount++];
+    if (weight != ProfileInfoLoader::Uncounted) {
+      // Here the data realm changes from the unsigned of the file to the
+      // double of the ProfileInfo. This conversion is save because we know
+      // that everything thats representable in unsinged is also representable
+      // in double.
+      EdgeInformation[getFunction(e)][e] += (double)weight;
+
+      DEBUG(dbgs() << "--Read Edge Counter for " << e
+                   << " (# "<< (ReadCount-1) << "): "
+                   << (unsigned)getEdgeWeight(e) << "\n");
+    } else {
+      // This happens only if reading optimal profiling information, not when
+      // reading regular profiling information.
+      SpanningTree.insert(e);
+    }
+  }
+}
+
+bool LoaderPass::runOnModule(Module &M) {
+  ProfileInfoLoader PIL("profile-loader", Filename, M);
+
+  EdgeInformation.clear();
+  std::vector<unsigned> Counters = PIL.getRawEdgeCounts();
+  if (Counters.size() > 0) {
+    ReadCount = 0;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+      if (F->isDeclaration()) continue;
+      DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
+      readEdge(getEdge(0,&F->getEntryBlock()), Counters);
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        TerminatorInst *TI = BB->getTerminator();
+        for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+          readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
+        }
+      }
+    }
+    if (ReadCount != Counters.size()) {
+      errs() << "WARNING: profile information is inconsistent with "
+             << "the current program!\n";
+    }
+    NumEdgesRead = ReadCount;
+  }
+
+  Counters = PIL.getRawOptimalEdgeCounts();
+  if (Counters.size() > 0) {
+    ReadCount = 0;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+      if (F->isDeclaration()) continue;
+      DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
+      readEdge(getEdge(0,&F->getEntryBlock()), Counters);
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        TerminatorInst *TI = BB->getTerminator();
+        if (TI->getNumSuccessors() == 0) {
+          readEdge(getEdge(BB,0), Counters);
+        }
+        for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+          readEdge(getEdge(BB,TI->getSuccessor(s)), Counters);
+        }
+      }
+      while (SpanningTree.size() > 0) {
+
+        unsigned size = SpanningTree.size();
+
+        BBisUnvisited.clear();
+        for (std::set<Edge>::iterator ei = SpanningTree.begin(),
+             ee = SpanningTree.end(); ei != ee; ++ei) {
+          BBisUnvisited.insert(ei->first);
+          BBisUnvisited.insert(ei->second);
+        }
+        while (BBisUnvisited.size() > 0) {
+          recurseBasicBlock(*BBisUnvisited.begin());
+        }
+
+        if (SpanningTree.size() == size) {
+          DEBUG(dbgs()<<"{");
+          for (std::set<Edge>::iterator ei = SpanningTree.begin(),
+               ee = SpanningTree.end(); ei != ee; ++ei) {
+            DEBUG(dbgs()<< *ei <<",");
+          }
+          assert(0 && "No edge calculated!");
+        }
+
+      }
+    }
+    if (ReadCount != Counters.size()) {
+      errs() << "WARNING: profile information is inconsistent with "
+             << "the current program!\n";
+    }
+    NumEdgesRead = ReadCount;
+  }
+
+  BlockInformation.clear();
+  Counters = PIL.getRawBlockCounts();
+  if (Counters.size() > 0) {
+    ReadCount = 0;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+      if (F->isDeclaration()) continue;
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+        if (ReadCount < Counters.size())
+          // Here the data realm changes from the unsigned of the file to the
+          // double of the ProfileInfo. This conversion is save because we know
+          // that everything thats representable in unsinged is also
+          // representable in double.
+          BlockInformation[F][BB] = (double)Counters[ReadCount++];
+    }
+    if (ReadCount != Counters.size()) {
+      errs() << "WARNING: profile information is inconsistent with "
+             << "the current program!\n";
+    }
+  }
+
+  FunctionInformation.clear();
+  Counters = PIL.getRawFunctionCounts();
+  if (Counters.size() > 0) {
+    ReadCount = 0;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+      if (F->isDeclaration()) continue;
+      if (ReadCount < Counters.size())
+        // Here the data realm changes from the unsigned of the file to the
+        // double of the ProfileInfo. This conversion is save because we know
+        // that everything thats representable in unsinged is also
+        // representable in double.
+        FunctionInformation[F] = (double)Counters[ReadCount++];
+    }
+    if (ReadCount != Counters.size()) {
+      errs() << "WARNING: profile information is inconsistent with "
+             << "the current program!\n";
+    }
+  }
+
+  return false;
+}
diff --git a/final/lib/Analysis/ProfileVerifierPass.cpp b/final/lib/Analysis/ProfileVerifierPass.cpp
new file mode 100644
index 00000000000..a01751849c5
--- /dev/null
+++ b/final/lib/Analysis/ProfileVerifierPass.cpp
@@ -0,0 +1,382 @@
+//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a pass that checks profiling information for 
+// plausibility.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "profile-verifier"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Debug.h"
+#include <set>
+using namespace llvm;
+
+static cl::opt<bool,false>
+ProfileVerifierDisableAssertions("profile-verifier-noassert",
+     cl::desc("Disable assertions"));
+
+namespace llvm {
+  template<class FType, class BType>
+  class ProfileVerifierPassT : public FunctionPass {
+
+    struct DetailedBlockInfo {
+      const BType *BB;
+      double      BBWeight;
+      double      inWeight;
+      int         inCount;
+      double      outWeight;
+      int         outCount;
+    };
+
+    ProfileInfoT<FType, BType> *PI;
+    std::set<const BType*> BBisVisited;
+    std::set<const FType*>   FisVisited;
+    bool DisableAssertions;
+
+    // When debugging is enabled, the verifier prints a whole slew of debug
+    // information, otherwise its just the assert. These are all the helper
+    // functions.
+    bool PrintedDebugTree;
+    std::set<const BType*> BBisPrinted;
+    void debugEntry(DetailedBlockInfo*);
+    void printDebugInfo(const BType *BB);
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+
+    explicit ProfileVerifierPassT () : FunctionPass(ID) {
+      initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
+      DisableAssertions = ProfileVerifierDisableAssertions;
+    }
+    explicit ProfileVerifierPassT (bool da) : FunctionPass(ID), 
+                                              DisableAssertions(da) {
+      initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<ProfileInfoT<FType, BType> >();
+    }
+
+    const char *getPassName() const {
+      return "Profiling information verifier";
+    }
+
+    /// run - Verify the profile information.
+    bool runOnFunction(FType &F);
+    void recurseBasicBlock(const BType*);
+
+    bool   exitReachable(const FType*);
+    double ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge);
+    void   CheckValue(bool, const char*, DetailedBlockInfo*);
+  };
+
+  typedef ProfileVerifierPassT<Function, BasicBlock> ProfileVerifierPass;
+
+  template<class FType, class BType>
+  void ProfileVerifierPassT<FType, BType>::printDebugInfo(const BType *BB) {
+
+    if (BBisPrinted.find(BB) != BBisPrinted.end()) return;
+
+    double BBWeight = PI->getExecutionCount(BB);
+    if (BBWeight == ProfileInfoT<FType, BType>::MissingValue) { BBWeight = 0; }
+    double inWeight = 0;
+    int inCount = 0;
+    std::set<const BType*> ProcessedPreds;
+    for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+         bbi != bbe; ++bbi ) {
+      if (ProcessedPreds.insert(*bbi).second) {
+        typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB);
+        double EdgeWeight = PI->getEdgeWeight(E);
+        if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
+        dbgs() << "calculated in-edge " << E << ": " 
+               << format("%20.20g",EdgeWeight) << "\n";
+        inWeight += EdgeWeight;
+        inCount++;
+      }
+    }
+    double outWeight = 0;
+    int outCount = 0;
+    std::set<const BType*> ProcessedSuccs;
+    for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+          bbi != bbe; ++bbi ) {
+      if (ProcessedSuccs.insert(*bbi).second) {
+        typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(BB,*bbi);
+        double EdgeWeight = PI->getEdgeWeight(E);
+        if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
+        dbgs() << "calculated out-edge " << E << ": " 
+               << format("%20.20g",EdgeWeight) << "\n";
+        outWeight += EdgeWeight;
+        outCount++;
+      }
+    }
+    dbgs() << "Block " << BB->getNameStr()                << " in " 
+           << BB->getParent()->getNameStr()               << ":"
+           << "BBWeight="  << format("%20.20g",BBWeight)  << ","
+           << "inWeight="  << format("%20.20g",inWeight)  << ","
+           << "inCount="   << inCount                     << ","
+           << "outWeight=" << format("%20.20g",outWeight) << ","
+           << "outCount"   << outCount                    << "\n";
+
+    // mark as visited and recurse into subnodes
+    BBisPrinted.insert(BB);
+    for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); 
+          bbi != bbe; ++bbi ) {
+      printDebugInfo(*bbi);
+    }
+  }
+
+  template<class FType, class BType>
+  void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) {
+    dbgs() << "TROUBLE: Block " << DI->BB->getNameStr()       << " in "
+           << DI->BB->getParent()->getNameStr()               << ":"
+           << "BBWeight="  << format("%20.20g",DI->BBWeight)  << ","
+           << "inWeight="  << format("%20.20g",DI->inWeight)  << ","
+           << "inCount="   << DI->inCount                     << ","
+           << "outWeight=" << format("%20.20g",DI->outWeight) << ","
+           << "outCount="  << DI->outCount                    << "\n";
+    if (!PrintedDebugTree) {
+      PrintedDebugTree = true;
+      printDebugInfo(&(DI->BB->getParent()->getEntryBlock()));
+    }
+  }
+
+  // This compares A and B for equality.
+  static bool Equals(double A, double B) {
+    return A == B;
+  }
+
+  // This checks if the function "exit" is reachable from an given function
+  // via calls, this is necessary to check if a profile is valid despite the
+  // counts not fitting exactly.
+  template<class FType, class BType>
+  bool ProfileVerifierPassT<FType, BType>::exitReachable(const FType *F) {
+    if (!F) return false;
+
+    if (FisVisited.count(F)) return false;
+
+    FType *Exit = F->getParent()->getFunction("exit");
+    if (Exit == F) {
+      return true;
+    }
+
+    FisVisited.insert(F);
+    bool exits = false;
+    for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+      if (const CallInst *CI = dyn_cast<CallInst>(&*I)) {
+        FType *F = CI->getCalledFunction();
+        if (F) {
+          exits |= exitReachable(F);
+        } else {
+          // This is a call to a pointer, all bets are off...
+          exits = true;
+        }
+        if (exits) break;
+      }
+    }
+    return exits;
+  }
+
+  #define ASSERTMESSAGE(M) \
+    { dbgs() << "ASSERT:" << (M) << "\n"; \
+      if (!DisableAssertions) assert(0 && (M)); }
+
+  template<class FType, class BType>
+  double ProfileVerifierPassT<FType, BType>::ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge E) {
+    double EdgeWeight = PI->getEdgeWeight(E);
+    if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) {
+      dbgs() << "Edge " << E << " in Function " 
+             << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": ";
+      ASSERTMESSAGE("Edge has missing value");
+      return 0;
+    } else {
+      if (EdgeWeight < 0) {
+        dbgs() << "Edge " << E << " in Function " 
+               << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": ";
+        ASSERTMESSAGE("Edge has negative value");
+      }
+      return EdgeWeight;
+    }
+  }
+
+  template<class FType, class BType>
+  void ProfileVerifierPassT<FType, BType>::CheckValue(bool Error, 
+                                                      const char *Message,
+                                                      DetailedBlockInfo *DI) {
+    if (Error) {
+      DEBUG(debugEntry(DI));
+      dbgs() << "Block " << DI->BB->getNameStr() << " in Function " 
+             << DI->BB->getParent()->getNameStr() << ": ";
+      ASSERTMESSAGE(Message);
+    }
+    return;
+  }
+
+  // This calculates the Information for a block and then recurses into the
+  // successors.
+  template<class FType, class BType>
+  void ProfileVerifierPassT<FType, BType>::recurseBasicBlock(const BType *BB) {
+
+    // Break the recursion by remembering all visited blocks.
+    if (BBisVisited.find(BB) != BBisVisited.end()) return;
+
+    // Use a data structure to store all the information, this can then be handed
+    // to debug printers.
+    DetailedBlockInfo DI;
+    DI.BB = BB;
+    DI.outCount = DI.inCount = 0;
+    DI.inWeight = DI.outWeight = 0;
+
+    // Read predecessors.
+    std::set<const BType*> ProcessedPreds;
+    const_pred_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
+    // If there are none, check for (0,BB) edge.
+    if (bpi == bpe) {
+      DI.inWeight += ReadOrAssert(PI->getEdge(0,BB));
+      DI.inCount++;
+    }
+    for (;bpi != bpe; ++bpi) {
+      if (ProcessedPreds.insert(*bpi).second) {
+        DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB));
+        DI.inCount++;
+      }
+    }
+
+    // Read successors.
+    std::set<const BType*> ProcessedSuccs;
+    succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+    // If there is an (0,BB) edge, consider it too. (This is done not only when
+    // there are no successors, but every time; not every function contains
+    // return blocks with no successors (think loop latch as return block)).
+    double w = PI->getEdgeWeight(PI->getEdge(BB,0));
+    if (w != ProfileInfoT<FType, BType>::MissingValue) {
+      DI.outWeight += w;
+      DI.outCount++;
+    }
+    for (;bbi != bbe; ++bbi) {
+      if (ProcessedSuccs.insert(*bbi).second) {
+        DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi));
+        DI.outCount++;
+      }
+    }
+
+    // Read block weight.
+    DI.BBWeight = PI->getExecutionCount(BB);
+    CheckValue(DI.BBWeight == ProfileInfoT<FType, BType>::MissingValue,
+               "BasicBlock has missing value", &DI);
+    CheckValue(DI.BBWeight < 0,
+               "BasicBlock has negative value", &DI);
+
+    // Check if this block is a setjmp target.
+    bool isSetJmpTarget = false;
+    if (DI.outWeight > DI.inWeight) {
+      for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
+           i != ie; ++i) {
+        if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+          FType *F = CI->getCalledFunction();
+          if (F && (F->getName() == "_setjmp")) {
+            isSetJmpTarget = true; break;
+          }
+        }
+      }
+    }
+    // Check if this block is eventually reaching exit.
+    bool isExitReachable = false;
+    if (DI.inWeight > DI.outWeight) {
+      for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
+           i != ie; ++i) {
+        if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+          FType *F = CI->getCalledFunction();
+          if (F) {
+            FisVisited.clear();
+            isExitReachable |= exitReachable(F);
+          } else {
+            // This is a call to a pointer, all bets are off...
+            isExitReachable = true;
+          }
+          if (isExitReachable) break;
+        }
+      }
+    }
+
+    if (DI.inCount > 0 && DI.outCount == 0) {
+       // If this is a block with no successors.
+      if (!isSetJmpTarget) {
+        CheckValue(!Equals(DI.inWeight,DI.BBWeight), 
+                   "inWeight and BBWeight do not match", &DI);
+      }
+    } else if (DI.inCount == 0 && DI.outCount > 0) {
+      // If this is a block with no predecessors.
+      if (!isExitReachable)
+        CheckValue(!Equals(DI.BBWeight,DI.outWeight), 
+                   "BBWeight and outWeight do not match", &DI);
+    } else {
+      // If this block has successors and predecessors.
+      if (DI.inWeight > DI.outWeight && !isExitReachable)
+        CheckValue(!Equals(DI.inWeight,DI.outWeight), 
+                   "inWeight and outWeight do not match", &DI);
+      if (DI.inWeight < DI.outWeight && !isSetJmpTarget)
+        CheckValue(!Equals(DI.inWeight,DI.outWeight), 
+                   "inWeight and outWeight do not match", &DI);
+    }
+
+
+    // Mark this block as visited, rescurse into successors.
+    BBisVisited.insert(BB);
+    for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); 
+          bbi != bbe; ++bbi ) {
+      recurseBasicBlock(*bbi);
+    }
+  }
+
+  template<class FType, class BType>
+  bool ProfileVerifierPassT<FType, BType>::runOnFunction(FType &F) {
+    PI = getAnalysisIfAvailable<ProfileInfoT<FType, BType> >();
+    if (!PI)
+      ASSERTMESSAGE("No ProfileInfo available");
+
+    // Prepare global variables.
+    PrintedDebugTree = false;
+    BBisVisited.clear();
+
+    // Fetch entry block and recurse into it.
+    const BType *entry = &F.getEntryBlock();
+    recurseBasicBlock(entry);
+
+    if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry))
+      ASSERTMESSAGE("Function count and entry block count do not match");
+
+    return false;
+  }
+
+  template<class FType, class BType>
+  char ProfileVerifierPassT<FType, BType>::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(ProfileVerifierPass, "profile-verifier",
+                "Verify profiling information", false, true)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(ProfileVerifierPass, "profile-verifier",
+                "Verify profiling information", false, true)
+
+namespace llvm {
+  FunctionPass *createProfileVerifierPass() {
+    return new ProfileVerifierPass(ProfileVerifierDisableAssertions); 
+  }
+}
+
diff --git a/final/lib/Analysis/README.txt b/final/lib/Analysis/README.txt
new file mode 100644
index 00000000000..0e96e4c950c
--- /dev/null
+++ b/final/lib/Analysis/README.txt
@@ -0,0 +1,30 @@
+Analysis Opportunities:
+
+//===---------------------------------------------------------------------===//
+
+In test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll, the
+ScalarEvolution expression for %r is this:
+
+  {1,+,3,+,2}<loop>
+
+Outside the loop, this could be evaluated simply as (%n * %n), however
+ScalarEvolution currently evaluates it as
+
+  (-2 + (2 * (trunc i65 (((zext i64 (-2 + %n) to i65) * (zext i64 (-1 + %n) to i65)) /u 2) to i64)) + (3 * %n))
+
+In addition to being much more complicated, it involves i65 arithmetic,
+which is very inefficient when expanded into code.
+
+//===---------------------------------------------------------------------===//
+
+In formatValue in test/CodeGen/X86/lsr-delayed-fold.ll,
+
+ScalarEvolution is forming this expression:
+
+((trunc i64 (-1 * %arg5) to i32) + (trunc i64 %arg5 to i32) + (-1 * (trunc i64 undef to i32)))
+
+This could be folded to
+
+(-1 * (trunc i64 undef to i32))
+
+//===---------------------------------------------------------------------===//
diff --git a/final/lib/Analysis/RegionInfo.cpp b/final/lib/Analysis/RegionInfo.cpp
new file mode 100644
index 00000000000..e2f6a8bf5d9
--- /dev/null
+++ b/final/lib/Analysis/RegionInfo.cpp
@@ -0,0 +1,851 @@
+//===- RegionInfo.cpp - SESE region detection analysis --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Detects single entry single exit regions in the control flow graph.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Assembly/Writer.h"
+
+#define DEBUG_TYPE "region"
+#include "llvm/Support/Debug.h"
+
+#include <set>
+#include <algorithm>
+
+using namespace llvm;
+
+// Always verify if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyRegionInfo = true;
+#else
+static bool VerifyRegionInfo = false;
+#endif
+
+static cl::opt<bool,true>
+VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo),
+                cl::desc("Verify region info (time consuming)"));
+
+STATISTIC(numRegions,       "The # of regions");
+STATISTIC(numSimpleRegions, "The # of simple regions");
+
+//===----------------------------------------------------------------------===//
+/// PrintStyle - Print region in difference ways.
+enum PrintStyle { PrintNone, PrintBB, PrintRN  };
+
+static cl::opt<enum PrintStyle> printStyle("print-region-style", cl::Hidden,
+  cl::desc("style of printing regions"),
+  cl::values(
+    clEnumValN(PrintNone, "none",  "print no details"),
+    clEnumValN(PrintBB, "bb",  "print regions in detail with block_iterator"),
+    clEnumValN(PrintRN, "rn",  "print regions in detail with element_iterator"),
+    clEnumValEnd));
+//===----------------------------------------------------------------------===//
+/// Region Implementation
+Region::Region(BasicBlock *Entry, BasicBlock *Exit, RegionInfo* RInfo,
+               DominatorTree *dt, Region *Parent)
+               : RegionNode(Parent, Entry, 1), RI(RInfo), DT(dt), exit(Exit) {}
+
+Region::~Region() {
+  // Free the cached nodes.
+  for (BBNodeMapT::iterator it = BBNodeMap.begin(),
+         ie = BBNodeMap.end(); it != ie; ++it)
+    delete it->second;
+
+  // Only clean the cache for this Region. Caches of child Regions will be
+  // cleaned when the child Regions are deleted.
+  BBNodeMap.clear();
+
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    delete *I;
+}
+
+void Region::replaceEntry(BasicBlock *BB) {
+  entry.setPointer(BB);
+}
+
+void Region::replaceExit(BasicBlock *BB) {
+  assert(exit && "No exit to replace!");
+  exit = BB;
+}
+
+bool Region::contains(const BasicBlock *B) const {
+  BasicBlock *BB = const_cast<BasicBlock*>(B);
+
+  assert(DT->getNode(BB) && "BB not part of the dominance tree");
+
+  BasicBlock *entry = getEntry(), *exit = getExit();
+
+  // Toplevel region.
+  if (!exit)
+    return true;
+
+  return (DT->dominates(entry, BB)
+    && !(DT->dominates(exit, BB) && DT->dominates(entry, exit)));
+}
+
+bool Region::contains(const Loop *L) const {
+  // BBs that are not part of any loop are element of the Loop
+  // described by the NULL pointer. This loop is not part of any region,
+  // except if the region describes the whole function.
+  if (L == 0)
+    return getExit() == 0;
+
+  if (!contains(L->getHeader()))
+    return false;
+
+  SmallVector<BasicBlock *, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  for (SmallVectorImpl<BasicBlock*>::iterator BI = ExitingBlocks.begin(),
+       BE = ExitingBlocks.end(); BI != BE; ++BI)
+    if (!contains(*BI))
+      return false;
+
+  return true;
+}
+
+Loop *Region::outermostLoopInRegion(Loop *L) const {
+  if (!contains(L))
+    return 0;
+
+  while (L && contains(L->getParentLoop())) {
+    L = L->getParentLoop();
+  }
+
+  return L;
+}
+
+Loop *Region::outermostLoopInRegion(LoopInfo *LI, BasicBlock* BB) const {
+  assert(LI && BB && "LI and BB cannot be null!");
+  Loop *L = LI->getLoopFor(BB);
+  return outermostLoopInRegion(L);
+}
+
+BasicBlock *Region::getEnteringBlock() const {
+  BasicBlock *entry = getEntry();
+  BasicBlock *Pred;
+  BasicBlock *enteringBlock = 0;
+
+  for (pred_iterator PI = pred_begin(entry), PE = pred_end(entry); PI != PE;
+       ++PI) {
+    Pred = *PI;
+    if (DT->getNode(Pred) && !contains(Pred)) {
+      if (enteringBlock)
+        return 0;
+
+      enteringBlock = Pred;
+    }
+  }
+
+  return enteringBlock;
+}
+
+BasicBlock *Region::getExitingBlock() const {
+  BasicBlock *exit = getExit();
+  BasicBlock *Pred;
+  BasicBlock *exitingBlock = 0;
+
+  if (!exit)
+    return 0;
+
+  for (pred_iterator PI = pred_begin(exit), PE = pred_end(exit); PI != PE;
+       ++PI) {
+    Pred = *PI;
+    if (contains(Pred)) {
+      if (exitingBlock)
+        return 0;
+
+      exitingBlock = Pred;
+    }
+  }
+
+  return exitingBlock;
+}
+
+bool Region::isSimple() const {
+  return !isTopLevelRegion() && getEnteringBlock() && getExitingBlock();
+}
+
+std::string Region::getNameStr() const {
+  std::string exitName;
+  std::string entryName;
+
+  if (getEntry()->getName().empty()) {
+    raw_string_ostream OS(entryName);
+
+    WriteAsOperand(OS, getEntry(), false);
+    entryName = OS.str();
+  } else
+    entryName = getEntry()->getNameStr();
+
+  if (getExit()) {
+    if (getExit()->getName().empty()) {
+      raw_string_ostream OS(exitName);
+
+      WriteAsOperand(OS, getExit(), false);
+      exitName = OS.str();
+    } else
+      exitName = getExit()->getNameStr();
+  } else
+    exitName = "<Function Return>";
+
+  return entryName + " => " + exitName;
+}
+
+void Region::verifyBBInRegion(BasicBlock *BB) const {
+  if (!contains(BB))
+    llvm_unreachable("Broken region found!");
+
+  BasicBlock *entry = getEntry(), *exit = getExit();
+
+  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+    if (!contains(*SI) && exit != *SI)
+      llvm_unreachable("Broken region found!");
+
+  if (entry != BB)
+    for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI)
+      if (!contains(*SI))
+        llvm_unreachable("Broken region found!");
+}
+
+void Region::verifyWalk(BasicBlock *BB, std::set<BasicBlock*> *visited) const {
+  BasicBlock *exit = getExit();
+
+  visited->insert(BB);
+
+  verifyBBInRegion(BB);
+
+  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+    if (*SI != exit && visited->find(*SI) == visited->end())
+        verifyWalk(*SI, visited);
+}
+
+void Region::verifyRegion() const {
+  // Only do verification when user wants to, otherwise this expensive
+  // check will be invoked by PassManager.
+  if (!VerifyRegionInfo) return;
+
+  std::set<BasicBlock*> visited;
+  verifyWalk(getEntry(), &visited);
+}
+
+void Region::verifyRegionNest() const {
+  for (Region::const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
+    (*RI)->verifyRegionNest();
+
+  verifyRegion();
+}
+
+Region::block_iterator Region::block_begin() {
+  return GraphTraits<FlatIt<Region*> >::nodes_begin(this);
+}
+
+Region::block_iterator Region::block_end() {
+  return GraphTraits<FlatIt<Region*> >::nodes_end(this);
+}
+
+Region::const_block_iterator Region::block_begin() const {
+  return GraphTraits<FlatIt<const Region*> >::nodes_begin(this);
+}
+
+Region::const_block_iterator Region::block_end() const {
+  return GraphTraits<FlatIt<const Region*> >::nodes_end(this);
+}
+
+Region::element_iterator Region::element_begin() {
+  return GraphTraits<Region*>::nodes_begin(this);
+}
+
+Region::element_iterator Region::element_end() {
+  return GraphTraits<Region*>::nodes_end(this);
+}
+
+Region::const_element_iterator Region::element_begin() const {
+  return GraphTraits<const Region*>::nodes_begin(this);
+}
+
+Region::const_element_iterator Region::element_end() const {
+  return GraphTraits<const Region*>::nodes_end(this);
+}
+
+Region* Region::getSubRegionNode(BasicBlock *BB) const {
+  Region *R = RI->getRegionFor(BB);
+
+  if (!R || R == this)
+    return 0;
+
+  // If we pass the BB out of this region, that means our code is broken.
+  assert(contains(R) && "BB not in current region!");
+
+  while (contains(R->getParent()) && R->getParent() != this)
+    R = R->getParent();
+
+  if (R->getEntry() != BB)
+    return 0;
+
+  return R;
+}
+
+RegionNode* Region::getBBNode(BasicBlock *BB) const {
+  assert(contains(BB) && "Can get BB node out of this region!");
+
+  BBNodeMapT::const_iterator at = BBNodeMap.find(BB);
+
+  if (at != BBNodeMap.end())
+    return at->second;
+
+  RegionNode *NewNode = new RegionNode(const_cast<Region*>(this), BB);
+  BBNodeMap.insert(std::make_pair(BB, NewNode));
+  return NewNode;
+}
+
+RegionNode* Region::getNode(BasicBlock *BB) const {
+  assert(contains(BB) && "Can get BB node out of this region!");
+  if (Region* Child = getSubRegionNode(BB))
+    return Child->getNode();
+
+  return getBBNode(BB);
+}
+
+void Region::transferChildrenTo(Region *To) {
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    (*I)->parent = To;
+    To->children.push_back(*I);
+  }
+  children.clear();
+}
+
+void Region::addSubRegion(Region *SubRegion, bool moveChildren) {
+  assert(SubRegion->parent == 0 && "SubRegion already has a parent!");
+  assert(std::find(begin(), end(), SubRegion) == children.end()
+         && "Subregion already exists!");
+
+  SubRegion->parent = this;
+  children.push_back(SubRegion);
+
+  if (!moveChildren)
+    return;
+
+  assert(SubRegion->children.size() == 0
+         && "SubRegions that contain children are not supported");
+
+  for (element_iterator I = element_begin(), E = element_end(); I != E; ++I)
+    if (!(*I)->isSubRegion()) {
+      BasicBlock *BB = (*I)->getNodeAs<BasicBlock>();
+
+      if (SubRegion->contains(BB))
+        RI->setRegionFor(BB, SubRegion);
+    }
+
+  std::vector<Region*> Keep;
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    if (SubRegion->contains(*I) && *I != SubRegion) {
+      SubRegion->children.push_back(*I);
+      (*I)->parent = SubRegion;
+    } else
+      Keep.push_back(*I);
+
+  children.clear();
+  children.insert(children.begin(), Keep.begin(), Keep.end());
+}
+
+
+Region *Region::removeSubRegion(Region *Child) {
+  assert(Child->parent == this && "Child is not a child of this region!");
+  Child->parent = 0;
+  RegionSet::iterator I = std::find(children.begin(), children.end(), Child);
+  assert(I != children.end() && "Region does not exit. Unable to remove.");
+  children.erase(children.begin()+(I-begin()));
+  return Child;
+}
+
+unsigned Region::getDepth() const {
+  unsigned Depth = 0;
+
+  for (Region *R = parent; R != 0; R = R->parent)
+    ++Depth;
+
+  return Depth;
+}
+
+Region *Region::getExpandedRegion() const {
+  unsigned NumSuccessors = exit->getTerminator()->getNumSuccessors();
+
+  if (NumSuccessors == 0)
+    return NULL;
+
+  for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
+       PI != PE; ++PI)
+    if (!DT->dominates(getEntry(), *PI))
+      return NULL;
+
+  Region *R = RI->getRegionFor(exit);
+
+  if (R->getEntry() != exit) {
+    if (exit->getTerminator()->getNumSuccessors() == 1)
+      return new Region(getEntry(), *succ_begin(exit), RI, DT);
+    else
+      return NULL;
+  }
+
+  while (R->getParent() && R->getParent()->getEntry() == exit)
+    R = R->getParent();
+
+  if (!DT->dominates(getEntry(), R->getExit()))
+    for (pred_iterator PI = pred_begin(getExit()), PE = pred_end(getExit());
+         PI != PE; ++PI)
+    if (!DT->dominates(R->getExit(), *PI))
+      return NULL;
+
+  return new Region(getEntry(), R->getExit(), RI, DT);
+}
+
+void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const {
+  if (print_tree)
+    OS.indent(level*2) << "[" << level << "] " << getNameStr();
+  else
+    OS.indent(level*2) << getNameStr();
+
+  OS << "\n";
+
+
+  if (printStyle != PrintNone) {
+    OS.indent(level*2) << "{\n";
+    OS.indent(level*2 + 2);
+
+    if (printStyle == PrintBB) {
+      for (const_block_iterator I = block_begin(), E = block_end(); I!=E; ++I)
+        OS << **I << ", "; // TODO: remove the last ","
+    } else if (printStyle == PrintRN) {
+      for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I)
+        OS << **I << ", "; // TODO: remove the last ",
+    }
+
+    OS << "\n";
+  }
+
+  if (print_tree)
+    for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
+      (*RI)->print(OS, print_tree, level+1);
+
+  if (printStyle != PrintNone)
+    OS.indent(level*2) << "} \n";
+}
+
+void Region::dump() const {
+  print(dbgs(), true, getDepth());
+}
+
+void Region::clearNodeCache() {
+  // Free the cached nodes.
+  for (BBNodeMapT::iterator I = BBNodeMap.begin(),
+       IE = BBNodeMap.end(); I != IE; ++I)
+    delete I->second;
+
+  BBNodeMap.clear();
+  for (Region::iterator RI = begin(), RE = end(); RI != RE; ++RI)
+    (*RI)->clearNodeCache();
+}
+
+//===----------------------------------------------------------------------===//
+// RegionInfo implementation
+//
+
+bool RegionInfo::isCommonDomFrontier(BasicBlock *BB, BasicBlock *entry,
+                                     BasicBlock *exit) const {
+  for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+    BasicBlock *P = *PI;
+    if (DT->dominates(entry, P) && !DT->dominates(exit, P))
+      return false;
+  }
+  return true;
+}
+
+bool RegionInfo::isRegion(BasicBlock *entry, BasicBlock *exit) const {
+  assert(entry && exit && "entry and exit must not be null!");
+  typedef DominanceFrontier::DomSetType DST;
+
+  DST *entrySuccs = &DF->find(entry)->second;
+
+  // Exit is the header of a loop that contains the entry. In this case,
+  // the dominance frontier must only contain the exit.
+  if (!DT->dominates(entry, exit)) {
+    for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
+         SI != SE; ++SI)
+      if (*SI != exit && *SI != entry)
+        return false;
+
+    return true;
+  }
+
+  DST *exitSuccs = &DF->find(exit)->second;
+
+  // Do not allow edges leaving the region.
+  for (DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
+       SI != SE; ++SI) {
+    if (*SI == exit || *SI == entry)
+      continue;
+    if (exitSuccs->find(*SI) == exitSuccs->end())
+      return false;
+    if (!isCommonDomFrontier(*SI, entry, exit))
+      return false;
+  }
+
+  // Do not allow edges pointing into the region.
+  for (DST::iterator SI = exitSuccs->begin(), SE = exitSuccs->end();
+       SI != SE; ++SI)
+    if (DT->properlyDominates(entry, *SI) && *SI != exit)
+      return false;
+
+
+  return true;
+}
+
+void RegionInfo::insertShortCut(BasicBlock *entry, BasicBlock *exit,
+                             BBtoBBMap *ShortCut) const {
+  assert(entry && exit && "entry and exit must not be null!");
+
+  BBtoBBMap::iterator e = ShortCut->find(exit);
+
+  if (e == ShortCut->end())
+    // No further region at exit available.
+    (*ShortCut)[entry] = exit;
+  else {
+    // We found a region e that starts at exit. Therefore (entry, e->second)
+    // is also a region, that is larger than (entry, exit). Insert the
+    // larger one.
+    BasicBlock *BB = e->second;
+    (*ShortCut)[entry] = BB;
+  }
+}
+
+DomTreeNode* RegionInfo::getNextPostDom(DomTreeNode* N,
+                                        BBtoBBMap *ShortCut) const {
+  BBtoBBMap::iterator e = ShortCut->find(N->getBlock());
+
+  if (e == ShortCut->end())
+    return N->getIDom();
+
+  return PDT->getNode(e->second)->getIDom();
+}
+
+bool RegionInfo::isTrivialRegion(BasicBlock *entry, BasicBlock *exit) const {
+  assert(entry && exit && "entry and exit must not be null!");
+
+  unsigned num_successors = succ_end(entry) - succ_begin(entry);
+
+  if (num_successors <= 1 && exit == *(succ_begin(entry)))
+    return true;
+
+  return false;
+}
+
+void RegionInfo::updateStatistics(Region *R) {
+  ++numRegions;
+
+  // TODO: Slow. Should only be enabled if -stats is used.
+  if (R->isSimple()) ++numSimpleRegions;
+}
+
+Region *RegionInfo::createRegion(BasicBlock *entry, BasicBlock *exit) {
+  assert(entry && exit && "entry and exit must not be null!");
+
+  if (isTrivialRegion(entry, exit))
+    return 0;
+
+  Region *region = new Region(entry, exit, this, DT);
+  BBtoRegion.insert(std::make_pair(entry, region));
+
+ #ifdef XDEBUG
+    region->verifyRegion();
+ #else
+    DEBUG(region->verifyRegion());
+ #endif
+
+  updateStatistics(region);
+  return region;
+}
+
+void RegionInfo::findRegionsWithEntry(BasicBlock *entry, BBtoBBMap *ShortCut) {
+  assert(entry);
+
+  DomTreeNode *N = PDT->getNode(entry);
+
+  if (!N)
+    return;
+
+  Region *lastRegion= 0;
+  BasicBlock *lastExit = entry;
+
+  // As only a BasicBlock that postdominates entry can finish a region, walk the
+  // post dominance tree upwards.
+  while ((N = getNextPostDom(N, ShortCut))) {
+    BasicBlock *exit = N->getBlock();
+
+    if (!exit)
+      break;
+
+    if (isRegion(entry, exit)) {
+      Region *newRegion = createRegion(entry, exit);
+
+      if (lastRegion)
+        newRegion->addSubRegion(lastRegion);
+
+      lastRegion = newRegion;
+      lastExit = exit;
+    }
+
+    // This can never be a region, so stop the search.
+    if (!DT->dominates(entry, exit))
+      break;
+  }
+
+  // Tried to create regions from entry to lastExit.  Next time take a
+  // shortcut from entry to lastExit.
+  if (lastExit != entry)
+    insertShortCut(entry, lastExit, ShortCut);
+}
+
+void RegionInfo::scanForRegions(Function &F, BBtoBBMap *ShortCut) {
+  BasicBlock *entry = &(F.getEntryBlock());
+  DomTreeNode *N = DT->getNode(entry);
+
+  // Iterate over the dominance tree in post order to start with the small
+  // regions from the bottom of the dominance tree.  If the small regions are
+  // detected first, detection of bigger regions is faster, as we can jump
+  // over the small regions.
+  for (po_iterator<DomTreeNode*> FI = po_begin(N), FE = po_end(N); FI != FE;
+    ++FI) {
+    findRegionsWithEntry(FI->getBlock(), ShortCut);
+  }
+}
+
+Region *RegionInfo::getTopMostParent(Region *region) {
+  while (region->parent)
+    region = region->getParent();
+
+  return region;
+}
+
+void RegionInfo::buildRegionsTree(DomTreeNode *N, Region *region) {
+  BasicBlock *BB = N->getBlock();
+
+  // Passed region exit
+  while (BB == region->getExit())
+    region = region->getParent();
+
+  BBtoRegionMap::iterator it = BBtoRegion.find(BB);
+
+  // This basic block is a start block of a region. It is already in the
+  // BBtoRegion relation. Only the child basic blocks have to be updated.
+  if (it != BBtoRegion.end()) {
+    Region *newRegion = it->second;;
+    region->addSubRegion(getTopMostParent(newRegion));
+    region = newRegion;
+  } else {
+    BBtoRegion[BB] = region;
+  }
+
+  for (DomTreeNode::iterator CI = N->begin(), CE = N->end(); CI != CE; ++CI)
+    buildRegionsTree(*CI, region);
+}
+
+void RegionInfo::releaseMemory() {
+  BBtoRegion.clear();
+  if (TopLevelRegion)
+    delete TopLevelRegion;
+  TopLevelRegion = 0;
+}
+
+RegionInfo::RegionInfo() : FunctionPass(ID) {
+  initializeRegionInfoPass(*PassRegistry::getPassRegistry());
+  TopLevelRegion = 0;
+}
+
+RegionInfo::~RegionInfo() {
+  releaseMemory();
+}
+
+void RegionInfo::Calculate(Function &F) {
+  // ShortCut a function where for every BB the exit of the largest region
+  // starting with BB is stored. These regions can be threated as single BBS.
+  // This improves performance on linear CFGs.
+  BBtoBBMap ShortCut;
+
+  scanForRegions(F, &ShortCut);
+  BasicBlock *BB = &F.getEntryBlock();
+  buildRegionsTree(DT->getNode(BB), TopLevelRegion);
+}
+
+bool RegionInfo::runOnFunction(Function &F) {
+  releaseMemory();
+
+  DT = &getAnalysis<DominatorTree>();
+  PDT = &getAnalysis<PostDominatorTree>();
+  DF = &getAnalysis<DominanceFrontier>();
+
+  TopLevelRegion = new Region(&F.getEntryBlock(), 0, this, DT, 0);
+  updateStatistics(TopLevelRegion);
+
+  Calculate(F);
+
+  return false;
+}
+
+void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<DominatorTree>();
+  AU.addRequired<PostDominatorTree>();
+  AU.addRequired<DominanceFrontier>();
+}
+
+void RegionInfo::print(raw_ostream &OS, const Module *) const {
+  OS << "Region tree:\n";
+  TopLevelRegion->print(OS, true, 0);
+  OS << "End region tree\n";
+}
+
+void RegionInfo::verifyAnalysis() const {
+  // Only do verification when user wants to, otherwise this expensive check
+  // will be invoked by PMDataManager::verifyPreservedAnalysis when
+  // a regionpass (marked PreservedAll) finish.
+  if (!VerifyRegionInfo) return;
+
+  TopLevelRegion->verifyRegionNest();
+}
+
+// Region pass manager support.
+Region *RegionInfo::getRegionFor(BasicBlock *BB) const {
+  BBtoRegionMap::const_iterator I=
+    BBtoRegion.find(BB);
+  return I != BBtoRegion.end() ? I->second : 0;
+}
+
+void RegionInfo::setRegionFor(BasicBlock *BB, Region *R) {
+  BBtoRegion[BB] = R;
+}
+
+Region *RegionInfo::operator[](BasicBlock *BB) const {
+  return getRegionFor(BB);
+}
+
+BasicBlock *RegionInfo::getMaxRegionExit(BasicBlock *BB) const {
+  BasicBlock *Exit = NULL;
+
+  while (true) {
+    // Get largest region that starts at BB.
+    Region *R = getRegionFor(BB);
+    while (R && R->getParent() && R->getParent()->getEntry() == BB)
+      R = R->getParent();
+
+    // Get the single exit of BB.
+    if (R && R->getEntry() == BB)
+      Exit = R->getExit();
+    else if (++succ_begin(BB) == succ_end(BB))
+      Exit = *succ_begin(BB);
+    else // No single exit exists.
+      return Exit;
+
+    // Get largest region that starts at Exit.
+    Region *ExitR = getRegionFor(Exit);
+    while (ExitR && ExitR->getParent()
+           && ExitR->getParent()->getEntry() == Exit)
+      ExitR = ExitR->getParent();
+
+    for (pred_iterator PI = pred_begin(Exit), PE = pred_end(Exit); PI != PE;
+         ++PI)
+      if (!R->contains(*PI) && !ExitR->contains(*PI))
+        break;
+
+    // This stops infinite cycles.
+    if (DT->dominates(Exit, BB))
+      break;
+
+    BB = Exit;
+  }
+
+  return Exit;
+}
+
+Region*
+RegionInfo::getCommonRegion(Region *A, Region *B) const {
+  assert (A && B && "One of the Regions is NULL");
+
+  if (A->contains(B)) return A;
+
+  while (!B->contains(A))
+    B = B->getParent();
+
+  return B;
+}
+
+Region*
+RegionInfo::getCommonRegion(SmallVectorImpl<Region*> &Regions) const {
+  Region* ret = Regions.back();
+  Regions.pop_back();
+
+  for (SmallVectorImpl<Region*>::const_iterator I = Regions.begin(),
+       E = Regions.end(); I != E; ++I)
+      ret = getCommonRegion(ret, *I);
+
+  return ret;
+}
+
+Region*
+RegionInfo::getCommonRegion(SmallVectorImpl<BasicBlock*> &BBs) const {
+  Region* ret = getRegionFor(BBs.back());
+  BBs.pop_back();
+
+  for (SmallVectorImpl<BasicBlock*>::const_iterator I = BBs.begin(),
+       E = BBs.end(); I != E; ++I)
+      ret = getCommonRegion(ret, getRegionFor(*I));
+
+  return ret;
+}
+
+void RegionInfo::splitBlock(BasicBlock* NewBB, BasicBlock *OldBB)
+{
+  Region *R = getRegionFor(OldBB);
+
+  setRegionFor(NewBB, R);
+
+  while (R->getEntry() == OldBB && !R->isTopLevelRegion()) {
+    R->replaceEntry(NewBB);
+    R = R->getParent();
+  }
+
+  setRegionFor(OldBB, R);
+}
+
+char RegionInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(RegionInfo, "regions",
+                "Detect single entry single exit regions", true, true)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(DominanceFrontier)
+INITIALIZE_PASS_END(RegionInfo, "regions",
+                "Detect single entry single exit regions", true, true)
+
+// Create methods available outside of this file, to use them
+// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by
+// the link time optimization.
+
+namespace llvm {
+  FunctionPass *createRegionInfoPass() {
+    return new RegionInfo();
+  }
+}
+
diff --git a/final/lib/Analysis/RegionPass.cpp b/final/lib/Analysis/RegionPass.cpp
new file mode 100644
index 00000000000..3269dcc63d5
--- /dev/null
+++ b/final/lib/Analysis/RegionPass.cpp
@@ -0,0 +1,275 @@
+//===- RegionPass.cpp - Region Pass and Region Pass Manager ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements RegionPass and RGPassManager. All region optimization
+// and transformation passes are derived from RegionPass. RGPassManager is
+// responsible for managing RegionPasses.
+// most of these codes are COPY from LoopPass.cpp
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Support/Timer.h"
+
+#define DEBUG_TYPE "regionpassmgr"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// RGPassManager
+//
+
+char RGPassManager::ID = 0;
+
+RGPassManager::RGPassManager(int Depth)
+  : FunctionPass(ID), PMDataManager(Depth) {
+  skipThisRegion = false;
+  redoThisRegion = false;
+  RI = NULL;
+  CurrentRegion = NULL;
+}
+
+// Recurse through all subregions and all regions  into RQ.
+static void addRegionIntoQueue(Region *R, std::deque<Region *> &RQ) {
+  RQ.push_back(R);
+  for (Region::iterator I = R->begin(), E = R->end(); I != E; ++I)
+    addRegionIntoQueue(*I, RQ);
+}
+
+/// Pass Manager itself does not invalidate any analysis info.
+void RGPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
+  Info.addRequired<RegionInfo>();
+  Info.setPreservesAll();
+}
+
+/// run - Execute all of the passes scheduled for execution.  Keep track of
+/// whether any of the passes modifies the function, and if so, return true.
+bool RGPassManager::runOnFunction(Function &F) {
+  RI = &getAnalysis<RegionInfo>();
+  bool Changed = false;
+
+  // Collect inherited analysis from Module level pass manager.
+  populateInheritedAnalysis(TPM->activeStack);
+
+  addRegionIntoQueue(RI->getTopLevelRegion(), RQ);
+
+  if (RQ.empty()) // No regions, skip calling finalizers
+    return false;
+
+  // Initialization
+  for (std::deque<Region *>::const_iterator I = RQ.begin(), E = RQ.end();
+       I != E; ++I) {
+    Region *R = *I;
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      RegionPass *RP = (RegionPass *)getContainedPass(Index);
+      Changed |= RP->doInitialization(R, *this);
+    }
+  }
+
+  // Walk Regions
+  while (!RQ.empty()) {
+
+    CurrentRegion  = RQ.back();
+    skipThisRegion = false;
+    redoThisRegion = false;
+
+    // Run all passes on the current Region.
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      RegionPass *P = (RegionPass*)getContainedPass(Index);
+
+      dumpPassInfo(P, EXECUTION_MSG, ON_REGION_MSG,
+                   CurrentRegion->getNameStr());
+      dumpRequiredSet(P);
+
+      initializeAnalysisImpl(P);
+
+      {
+        PassManagerPrettyStackEntry X(P, *CurrentRegion->getEntry());
+
+        TimeRegion PassTimer(getPassTimer(P));
+        Changed |= P->runOnRegion(CurrentRegion, *this);
+      }
+
+      if (Changed)
+        dumpPassInfo(P, MODIFICATION_MSG, ON_REGION_MSG,
+                     skipThisRegion ? "<deleted>" :
+                                    CurrentRegion->getNameStr());
+      dumpPreservedSet(P);
+
+      if (!skipThisRegion) {
+        // Manually check that this region is still healthy. This is done
+        // instead of relying on RegionInfo::verifyRegion since RegionInfo
+        // is a function pass and it's really expensive to verify every
+        // Region in the function every time. That level of checking can be
+        // enabled with the -verify-region-info option.
+        {
+          TimeRegion PassTimer(getPassTimer(P));
+          CurrentRegion->verifyRegion();
+        }
+
+        // Then call the regular verifyAnalysis functions.
+        verifyPreservedAnalysis(P);
+      }
+
+      removeNotPreservedAnalysis(P);
+      recordAvailableAnalysis(P);
+      removeDeadPasses(P,
+                       skipThisRegion ? "<deleted>" :
+                                      CurrentRegion->getNameStr(),
+                       ON_REGION_MSG);
+
+      if (skipThisRegion)
+        // Do not run other passes on this region.
+        break;
+    }
+
+    // If the region was deleted, release all the region passes. This frees up
+    // some memory, and avoids trouble with the pass manager trying to call
+    // verifyAnalysis on them.
+    if (skipThisRegion)
+      for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+        Pass *P = getContainedPass(Index);
+        freePass(P, "<deleted>", ON_REGION_MSG);
+      }
+
+    // Pop the region from queue after running all passes.
+    RQ.pop_back();
+
+    if (redoThisRegion)
+      RQ.push_back(CurrentRegion);
+
+    // Free all region nodes created in region passes.
+    RI->clearNodeCache();
+  }
+
+  // Finalization
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    RegionPass *P = (RegionPass*)getContainedPass(Index);
+    Changed |= P->doFinalization();
+  }
+
+  // Print the region tree after all pass.
+  DEBUG(
+    dbgs() << "\nRegion tree of function " << F.getName()
+           << " after all region Pass:\n";
+    RI->dump();
+    dbgs() << "\n";
+    );
+
+  return Changed;
+}
+
+/// Print passes managed by this manager
+void RGPassManager::dumpPassStructure(unsigned Offset) {
+  errs().indent(Offset*2) << "Region Pass Manager\n";
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    Pass *P = getContainedPass(Index);
+    P->dumpPassStructure(Offset + 1);
+    dumpLastUses(P, Offset+1);
+  }
+}
+
+namespace {
+//===----------------------------------------------------------------------===//
+// PrintRegionPass
+class PrintRegionPass : public RegionPass {
+private:
+  std::string Banner;
+  raw_ostream &Out;       // raw_ostream to print on.
+
+public:
+  static char ID;
+  PrintRegionPass() : RegionPass(ID), Out(dbgs()) {}
+  PrintRegionPass(const std::string &B, raw_ostream &o)
+      : RegionPass(ID), Banner(B), Out(o) {}
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+
+  virtual bool runOnRegion(Region *R, RGPassManager &RGM) {
+    Out << Banner;
+    for (Region::block_iterator I = R->block_begin(), E = R->block_end();
+         I != E; ++I)
+      (*I)->getEntry()->print(Out);
+
+    return false;
+  }
+};
+
+char PrintRegionPass::ID = 0;
+}  //end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// RegionPass
+
+// Check if this pass is suitable for the current RGPassManager, if
+// available. This pass P is not suitable for a RGPassManager if P
+// is not preserving higher level analysis info used by other
+// RGPassManager passes. In such case, pop RGPassManager from the
+// stack. This will force assignPassManager() to create new
+// LPPassManger as expected.
+void RegionPass::preparePassManager(PMStack &PMS) {
+
+  // Find RGPassManager
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_RegionPassManager)
+    PMS.pop();
+
+
+  // If this pass is destroying high level information that is used
+  // by other passes that are managed by LPM then do not insert
+  // this pass in current LPM. Use new RGPassManager.
+  if (PMS.top()->getPassManagerType() == PMT_RegionPassManager &&
+    !PMS.top()->preserveHigherLevelAnalysis(this))
+    PMS.pop();
+}
+
+/// Assign pass manager to manage this pass.
+void RegionPass::assignPassManager(PMStack &PMS,
+                                 PassManagerType PreferredType) {
+  // Find RGPassManager
+  while (!PMS.empty() &&
+         PMS.top()->getPassManagerType() > PMT_RegionPassManager)
+    PMS.pop();
+
+  RGPassManager *RGPM;
+
+  // Create new Region Pass Manager if it does not exist.
+  if (PMS.top()->getPassManagerType() == PMT_RegionPassManager)
+    RGPM = (RGPassManager*)PMS.top();
+  else {
+
+    assert (!PMS.empty() && "Unable to create Region Pass Manager");
+    PMDataManager *PMD = PMS.top();
+
+    // [1] Create new Call Graph Pass Manager
+    RGPM = new RGPassManager(PMD->getDepth() + 1);
+    RGPM->populateInheritedAnalysis(PMS);
+
+    // [2] Set up new manager's top level manager
+    PMTopLevelManager *TPM = PMD->getTopLevelManager();
+    TPM->addIndirectPassManager(RGPM);
+
+    // [3] Assign manager to manage this new manager. This may create
+    // and push new managers into PMS
+    TPM->schedulePass(RGPM);
+
+    // [4] Push new manager into PMS
+    PMS.push(RGPM);
+  }
+
+  RGPM->add(this);
+}
+
+/// Get the printer pass
+Pass *RegionPass::createPrinterPass(raw_ostream &O,
+                                  const std::string &Banner) const {
+  return new PrintRegionPass(Banner, O);
+}
diff --git a/final/lib/Analysis/RegionPrinter.cpp b/final/lib/Analysis/RegionPrinter.cpp
new file mode 100644
index 00000000000..a1730b0a3ca
--- /dev/null
+++ b/final/lib/Analysis/RegionPrinter.cpp
@@ -0,0 +1,220 @@
+//===- RegionPrinter.cpp - Print regions tree pass ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Print out the region tree of a function using dotty/graphviz.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/Analysis/RegionPrinter.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/DOTGraphTraitsPass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// onlySimpleRegion - Show only the simple regions in the RegionViewer.
+static cl::opt<bool>
+onlySimpleRegions("only-simple-regions",
+                  cl::desc("Show only simple regions in the graphviz viewer"),
+                  cl::Hidden,
+                  cl::init(false));
+
+namespace llvm {
+template<>
+struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false)
+    : DefaultDOTGraphTraits(isSimple) {}
+
+  std::string getNodeLabel(RegionNode *Node, RegionNode *Graph) {
+
+    if (!Node->isSubRegion()) {
+      BasicBlock *BB = Node->getNodeAs<BasicBlock>();
+
+      if (isSimple())
+        return DOTGraphTraits<const Function*>
+          ::getSimpleNodeLabel(BB, BB->getParent());
+      else
+        return DOTGraphTraits<const Function*>
+          ::getCompleteNodeLabel(BB, BB->getParent());
+    }
+
+    return "Not implemented";
+  }
+};
+
+template<>
+struct DOTGraphTraits<RegionInfo*> : public DOTGraphTraits<RegionNode*> {
+
+  DOTGraphTraits (bool isSimple=false)
+    : DOTGraphTraits<RegionNode*>(isSimple) {}
+
+  static std::string getGraphName(RegionInfo *DT) {
+    return "Region Graph";
+  }
+
+  std::string getNodeLabel(RegionNode *Node, RegionInfo *G) {
+    return DOTGraphTraits<RegionNode*>::getNodeLabel(Node,
+                                                     G->getTopLevelRegion());
+  }
+
+  std::string getEdgeAttributes(RegionNode *srcNode,
+    GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfo *RI) {
+
+    RegionNode *destNode = *CI;
+
+    if (srcNode->isSubRegion() || destNode->isSubRegion())
+      return "";
+
+    // In case of a backedge, do not use it to define the layout of the nodes.
+    BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>();
+    BasicBlock *destBB = destNode->getNodeAs<BasicBlock>();
+
+    Region *R = RI->getRegionFor(destBB);
+
+    while (R && R->getParent())
+      if (R->getParent()->getEntry() == destBB)
+        R = R->getParent();
+      else
+        break;
+
+    if (R->getEntry() == destBB && R->contains(srcBB))
+      return "constraint=false";
+
+    return "";
+  }
+
+  // Print the cluster of the subregions. This groups the single basic blocks
+  // and adds a different background color for each group.
+  static void printRegionCluster(const Region *R, GraphWriter<RegionInfo*> &GW,
+                                 unsigned depth = 0) {
+    raw_ostream &O = GW.getOStream();
+    O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(R)
+      << " {\n";
+    O.indent(2 * (depth + 1)) << "label = \"\";\n";
+
+    if (!onlySimpleRegions || R->isSimple()) {
+      O.indent(2 * (depth + 1)) << "style = filled;\n";
+      O.indent(2 * (depth + 1)) << "color = "
+        << ((R->getDepth() * 2 % 12) + 1) << "\n";
+
+    } else {
+      O.indent(2 * (depth + 1)) << "style = solid;\n";
+      O.indent(2 * (depth + 1)) << "color = "
+        << ((R->getDepth() * 2 % 12) + 2) << "\n";
+    }
+
+    for (Region::const_iterator RI = R->begin(), RE = R->end(); RI != RE; ++RI)
+      printRegionCluster(*RI, GW, depth + 1);
+
+    RegionInfo *RI = R->getRegionInfo();
+
+    for (Region::const_block_iterator BI = R->block_begin(),
+         BE = R->block_end(); BI != BE; ++BI) {
+      BasicBlock *BB = (*BI)->getNodeAs<BasicBlock>();
+      if (RI->getRegionFor(BB) == R)
+        O.indent(2 * (depth + 1)) << "Node"
+          << static_cast<const void*>(RI->getTopLevelRegion()->getBBNode(BB))
+          << ";\n";
+    }
+
+    O.indent(2 * depth) << "}\n";
+  }
+
+  static void addCustomGraphFeatures(const RegionInfo* RI,
+                                     GraphWriter<RegionInfo*> &GW) {
+    raw_ostream &O = GW.getOStream();
+    O << "\tcolorscheme = \"paired12\"\n";
+    printRegionCluster(RI->getTopLevelRegion(), GW, 4);
+  }
+};
+} //end namespace llvm
+
+namespace {
+
+struct RegionViewer
+  : public DOTGraphTraitsViewer<RegionInfo, false> {
+  static char ID;
+  RegionViewer() : DOTGraphTraitsViewer<RegionInfo, false>("reg", ID){
+    initializeRegionViewerPass(*PassRegistry::getPassRegistry());
+  }
+};
+char RegionViewer::ID = 0;
+
+struct RegionOnlyViewer
+  : public DOTGraphTraitsViewer<RegionInfo, true> {
+  static char ID;
+  RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfo, true>("regonly", ID) {
+    initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry());
+  }
+};
+char RegionOnlyViewer::ID = 0;
+
+struct RegionPrinter
+  : public DOTGraphTraitsPrinter<RegionInfo, false> {
+  static char ID;
+  RegionPrinter() :
+    DOTGraphTraitsPrinter<RegionInfo, false>("reg", ID) {
+      initializeRegionPrinterPass(*PassRegistry::getPassRegistry());
+    }
+};
+char RegionPrinter::ID = 0;
+} //end anonymous namespace
+
+INITIALIZE_PASS(RegionPrinter, "dot-regions",
+                "Print regions of function to 'dot' file", true, true)
+
+INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",
+                true, true)
+                
+INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",
+                "View regions of function (with no function bodies)",
+                true, true)
+
+namespace {
+
+struct RegionOnlyPrinter
+  : public DOTGraphTraitsPrinter<RegionInfo, true> {
+  static char ID;
+  RegionOnlyPrinter() :
+    DOTGraphTraitsPrinter<RegionInfo, true>("reg", ID) {
+      initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry());
+    }
+};
+
+}
+
+char RegionOnlyPrinter::ID = 0;
+INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only",
+                "Print regions of function to 'dot' file "
+                "(with no function bodies)",
+                true, true)
+
+FunctionPass* llvm::createRegionViewerPass() {
+  return new RegionViewer();
+}
+
+FunctionPass* llvm::createRegionOnlyViewerPass() {
+  return new RegionOnlyViewer();
+}
+
+FunctionPass* llvm::createRegionPrinterPass() {
+  return new RegionPrinter();
+}
+
+FunctionPass* llvm::createRegionOnlyPrinterPass() {
+  return new RegionOnlyPrinter();
+}
+
diff --git a/final/lib/Analysis/ScalarEvolution.cpp b/final/lib/Analysis/ScalarEvolution.cpp
new file mode 100644
index 00000000000..62244ccb3a0
--- /dev/null
+++ b/final/lib/Analysis/ScalarEvolution.cpp
@@ -0,0 +1,6317 @@
+//===- ScalarEvolution.cpp - Scalar Evolution Analysis ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the scalar evolution analysis
+// engine, which is used primarily to analyze expressions involving induction
+// variables in loops.
+//
+// There are several aspects to this library.  First is the representation of
+// scalar expressions, which are represented as subclasses of the SCEV class.
+// These classes are used to represent certain types of subexpressions that we
+// can handle. We only create one SCEV of a particular shape, so
+// pointer-comparisons for equality are legal.
+//
+// One important aspect of the SCEV objects is that they are never cyclic, even
+// if there is a cycle in the dataflow for an expression (ie, a PHI node).  If
+// the PHI node is one of the idioms that we can represent (e.g., a polynomial
+// recurrence) then we represent it directly as a recurrence node, otherwise we
+// represent it as a SCEVUnknown node.
+//
+// In addition to being able to represent expressions of various types, we also
+// have folders that are used to build the *canonical* representation for a
+// particular expression.  These folders are capable of using a variety of
+// rewrite rules to simplify the expressions.
+//
+// Once the folders are defined, we can implement the more interesting
+// higher-level code, such as the code that recognizes PHI nodes of various
+// types, computes the execution count of a loop, etc.
+//
+// TODO: We should use these routines and value representations to implement
+// dependence analysis!
+//
+//===----------------------------------------------------------------------===//
+//
+// There are several good references for the techniques used in this analysis.
+//
+//  Chains of recurrences -- a method to expedite the evaluation
+//  of closed-form functions
+//  Olaf Bachmann, Paul S. Wang, Eugene V. Zima
+//
+//  On computational properties of chains of recurrences
+//  Eugene V. Zima
+//
+//  Symbolic Evaluation of Chains of Recurrences for Loop Optimization
+//  Robert A. van Engelen
+//
+//  Efficient Symbolic Analysis for Optimizing Compilers
+//  Robert A. van Engelen
+//
+//  Using the chains of recurrences algebra for data dependence testing and
+//  induction variable substitution
+//  MS Thesis, Johnie Birch
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scalar-evolution"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumArrayLenItCounts,
+          "Number of trip counts computed with array length");
+STATISTIC(NumTripCountsComputed,
+          "Number of loops with predictable loop counts");
+STATISTIC(NumTripCountsNotComputed,
+          "Number of loops without predictable loop counts");
+STATISTIC(NumBruteForceTripCountsComputed,
+          "Number of loops with trip counts computed by force");
+
+static cl::opt<unsigned>
+MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
+                        cl::desc("Maximum number of iterations SCEV will "
+                                 "symbolically execute a constant "
+                                 "derived loop"),
+                        cl::init(100));
+
+INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution",
+                "Scalar Evolution Analysis", false, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution",
+                "Scalar Evolution Analysis", false, true)
+char ScalarEvolution::ID = 0;
+
+//===----------------------------------------------------------------------===//
+//                           SCEV class definitions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Implementation of the SCEV class.
+//
+
+void SCEV::dump() const {
+  print(dbgs());
+  dbgs() << '\n';
+}
+
+void SCEV::print(raw_ostream &OS) const {
+  switch (getSCEVType()) {
+  case scConstant:
+    WriteAsOperand(OS, cast<SCEVConstant>(this)->getValue(), false);
+    return;
+  case scTruncate: {
+    const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this);
+    const SCEV *Op = Trunc->getOperand();
+    OS << "(trunc " << *Op->getType() << " " << *Op << " to "
+       << *Trunc->getType() << ")";
+    return;
+  }
+  case scZeroExtend: {
+    const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this);
+    const SCEV *Op = ZExt->getOperand();
+    OS << "(zext " << *Op->getType() << " " << *Op << " to "
+       << *ZExt->getType() << ")";
+    return;
+  }
+  case scSignExtend: {
+    const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this);
+    const SCEV *Op = SExt->getOperand();
+    OS << "(sext " << *Op->getType() << " " << *Op << " to "
+       << *SExt->getType() << ")";
+    return;
+  }
+  case scAddRecExpr: {
+    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this);
+    OS << "{" << *AR->getOperand(0);
+    for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i)
+      OS << ",+," << *AR->getOperand(i);
+    OS << "}<";
+    if (AR->hasNoUnsignedWrap())
+      OS << "nuw><";
+    if (AR->hasNoSignedWrap())
+      OS << "nsw><";
+    WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false);
+    OS << ">";
+    return;
+  }
+  case scAddExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr: {
+    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
+    const char *OpStr = 0;
+    switch (NAry->getSCEVType()) {
+    case scAddExpr: OpStr = " + "; break;
+    case scMulExpr: OpStr = " * "; break;
+    case scUMaxExpr: OpStr = " umax "; break;
+    case scSMaxExpr: OpStr = " smax "; break;
+    }
+    OS << "(";
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+         I != E; ++I) {
+      OS << **I;
+      if (llvm::next(I) != E)
+        OS << OpStr;
+    }
+    OS << ")";
+    return;
+  }
+  case scUDivExpr: {
+    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this);
+    OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
+    return;
+  }
+  case scUnknown: {
+    const SCEVUnknown *U = cast<SCEVUnknown>(this);
+    const Type *AllocTy;
+    if (U->isSizeOf(AllocTy)) {
+      OS << "sizeof(" << *AllocTy << ")";
+      return;
+    }
+    if (U->isAlignOf(AllocTy)) {
+      OS << "alignof(" << *AllocTy << ")";
+      return;
+    }
+  
+    const Type *CTy;
+    Constant *FieldNo;
+    if (U->isOffsetOf(CTy, FieldNo)) {
+      OS << "offsetof(" << *CTy << ", ";
+      WriteAsOperand(OS, FieldNo, false);
+      OS << ")";
+      return;
+    }
+  
+    // Otherwise just print it normally.
+    WriteAsOperand(OS, U->getValue(), false);
+    return;
+  }
+  case scCouldNotCompute:
+    OS << "***COULDNOTCOMPUTE***";
+    return;
+  default: break;
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+}
+
+const Type *SCEV::getType() const {
+  switch (getSCEVType()) {
+  case scConstant:
+    return cast<SCEVConstant>(this)->getType();
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend:
+    return cast<SCEVCastExpr>(this)->getType();
+  case scAddRecExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr:
+    return cast<SCEVNAryExpr>(this)->getType();
+  case scAddExpr:
+    return cast<SCEVAddExpr>(this)->getType();
+  case scUDivExpr:
+    return cast<SCEVUDivExpr>(this)->getType();
+  case scUnknown:
+    return cast<SCEVUnknown>(this)->getType();
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+    return 0;
+  default: break;
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+  return 0;
+}
+
+bool SCEV::isZero() const {
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+    return SC->getValue()->isZero();
+  return false;
+}
+
+bool SCEV::isOne() const {
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+    return SC->getValue()->isOne();
+  return false;
+}
+
+bool SCEV::isAllOnesValue() const {
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+    return SC->getValue()->isAllOnesValue();
+  return false;
+}
+
+SCEVCouldNotCompute::SCEVCouldNotCompute() :
+  SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {}
+
+bool SCEVCouldNotCompute::classof(const SCEV *S) {
+  return S->getSCEVType() == scCouldNotCompute;
+}
+
+const SCEV *ScalarEvolution::getConstant(ConstantInt *V) {
+  FoldingSetNodeID ID;
+  ID.AddInteger(scConstant);
+  ID.AddPointer(V);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getConstant(const APInt& Val) {
+  return getConstant(ConstantInt::get(getContext(), Val));
+}
+
+const SCEV *
+ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
+  const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
+  return getConstant(ConstantInt::get(ITy, V, isSigned));
+}
+
+SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID,
+                           unsigned SCEVTy, const SCEV *op, const Type *ty)
+  : SCEV(ID, SCEVTy), Op(op), Ty(ty) {}
+
+SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID,
+                                   const SCEV *op, const Type *ty)
+  : SCEVCastExpr(ID, scTruncate, op, ty) {
+  assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot truncate non-integer value!");
+}
+
+SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID,
+                                       const SCEV *op, const Type *ty)
+  : SCEVCastExpr(ID, scZeroExtend, op, ty) {
+  assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot zero extend non-integer value!");
+}
+
+SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID,
+                                       const SCEV *op, const Type *ty)
+  : SCEVCastExpr(ID, scSignExtend, op, ty) {
+  assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot sign extend non-integer value!");
+}
+
+void SCEVUnknown::deleted() {
+  // Clear this SCEVUnknown from various maps.
+  SE->forgetMemoizedResults(this);
+
+  // Remove this SCEVUnknown from the uniquing map.
+  SE->UniqueSCEVs.RemoveNode(this);
+
+  // Release the value.
+  setValPtr(0);
+}
+
+void SCEVUnknown::allUsesReplacedWith(Value *New) {
+  // Clear this SCEVUnknown from various maps.
+  SE->forgetMemoizedResults(this);
+
+  // Remove this SCEVUnknown from the uniquing map.
+  SE->UniqueSCEVs.RemoveNode(this);
+
+  // Update this SCEVUnknown to point to the new value. This is needed
+  // because there may still be outstanding SCEVs which still point to
+  // this SCEVUnknown.
+  setValPtr(New);
+}
+
+bool SCEVUnknown::isSizeOf(const Type *&AllocTy) const {
+  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+    if (VCE->getOpcode() == Instruction::PtrToInt)
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+        if (CE->getOpcode() == Instruction::GetElementPtr &&
+            CE->getOperand(0)->isNullValue() &&
+            CE->getNumOperands() == 2)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
+            if (CI->isOne()) {
+              AllocTy = cast<PointerType>(CE->getOperand(0)->getType())
+                                 ->getElementType();
+              return true;
+            }
+
+  return false;
+}
+
+bool SCEVUnknown::isAlignOf(const Type *&AllocTy) const {
+  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+    if (VCE->getOpcode() == Instruction::PtrToInt)
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+        if (CE->getOpcode() == Instruction::GetElementPtr &&
+            CE->getOperand(0)->isNullValue()) {
+          const Type *Ty =
+            cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+          if (const StructType *STy = dyn_cast<StructType>(Ty))
+            if (!STy->isPacked() &&
+                CE->getNumOperands() == 3 &&
+                CE->getOperand(1)->isNullValue()) {
+              if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
+                if (CI->isOne() &&
+                    STy->getNumElements() == 2 &&
+                    STy->getElementType(0)->isIntegerTy(1)) {
+                  AllocTy = STy->getElementType(1);
+                  return true;
+                }
+            }
+        }
+
+  return false;
+}
+
+bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
+  if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
+    if (VCE->getOpcode() == Instruction::PtrToInt)
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
+        if (CE->getOpcode() == Instruction::GetElementPtr &&
+            CE->getNumOperands() == 3 &&
+            CE->getOperand(0)->isNullValue() &&
+            CE->getOperand(1)->isNullValue()) {
+          const Type *Ty =
+            cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+          // Ignore vector types here so that ScalarEvolutionExpander doesn't
+          // emit getelementptrs that index into vectors.
+          if (Ty->isStructTy() || Ty->isArrayTy()) {
+            CTy = Ty;
+            FieldNo = CE->getOperand(2);
+            return true;
+          }
+        }
+
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+//                               SCEV Utilities
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// SCEVComplexityCompare - Return true if the complexity of the LHS is less
+  /// than the complexity of the RHS.  This comparator is used to canonicalize
+  /// expressions.
+  class SCEVComplexityCompare {
+    const LoopInfo *const LI;
+  public:
+    explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
+
+    // Return true or false if LHS is less than, or at least RHS, respectively.
+    bool operator()(const SCEV *LHS, const SCEV *RHS) const {
+      return compare(LHS, RHS) < 0;
+    }
+
+    // Return negative, zero, or positive, if LHS is less than, equal to, or
+    // greater than RHS, respectively. A three-way result allows recursive
+    // comparisons to be more efficient.
+    int compare(const SCEV *LHS, const SCEV *RHS) const {
+      // Fast-path: SCEVs are uniqued so we can do a quick equality check.
+      if (LHS == RHS)
+        return 0;
+
+      // Primarily, sort the SCEVs by their getSCEVType().
+      unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
+      if (LType != RType)
+        return (int)LType - (int)RType;
+
+      // Aside from the getSCEVType() ordering, the particular ordering
+      // isn't very important except that it's beneficial to be consistent,
+      // so that (a + b) and (b + a) don't end up as different expressions.
+      switch (LType) {
+      case scUnknown: {
+        const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
+        const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
+
+        // Sort SCEVUnknown values with some loose heuristics. TODO: This is
+        // not as complete as it could be.
+        const Value *LV = LU->getValue(), *RV = RU->getValue();
+
+        // Order pointer values after integer values. This helps SCEVExpander
+        // form GEPs.
+        bool LIsPointer = LV->getType()->isPointerTy(),
+             RIsPointer = RV->getType()->isPointerTy();
+        if (LIsPointer != RIsPointer)
+          return (int)LIsPointer - (int)RIsPointer;
+
+        // Compare getValueID values.
+        unsigned LID = LV->getValueID(),
+                 RID = RV->getValueID();
+        if (LID != RID)
+          return (int)LID - (int)RID;
+
+        // Sort arguments by their position.
+        if (const Argument *LA = dyn_cast<Argument>(LV)) {
+          const Argument *RA = cast<Argument>(RV);
+          unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
+          return (int)LArgNo - (int)RArgNo;
+        }
+
+        // For instructions, compare their loop depth, and their operand
+        // count.  This is pretty loose.
+        if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
+          const Instruction *RInst = cast<Instruction>(RV);
+
+          // Compare loop depths.
+          const BasicBlock *LParent = LInst->getParent(),
+                           *RParent = RInst->getParent();
+          if (LParent != RParent) {
+            unsigned LDepth = LI->getLoopDepth(LParent),
+                     RDepth = LI->getLoopDepth(RParent);
+            if (LDepth != RDepth)
+              return (int)LDepth - (int)RDepth;
+          }
+
+          // Compare the number of operands.
+          unsigned LNumOps = LInst->getNumOperands(),
+                   RNumOps = RInst->getNumOperands();
+          return (int)LNumOps - (int)RNumOps;
+        }
+
+        return 0;
+      }
+
+      case scConstant: {
+        const SCEVConstant *LC = cast<SCEVConstant>(LHS);
+        const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+
+        // Compare constant values.
+        const APInt &LA = LC->getValue()->getValue();
+        const APInt &RA = RC->getValue()->getValue();
+        unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
+        if (LBitWidth != RBitWidth)
+          return (int)LBitWidth - (int)RBitWidth;
+        return LA.ult(RA) ? -1 : 1;
+      }
+
+      case scAddRecExpr: {
+        const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
+        const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+
+        // Compare addrec loop depths.
+        const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
+        if (LLoop != RLoop) {
+          unsigned LDepth = LLoop->getLoopDepth(),
+                   RDepth = RLoop->getLoopDepth();
+          if (LDepth != RDepth)
+            return (int)LDepth - (int)RDepth;
+        }
+
+        // Addrec complexity grows with operand count.
+        unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
+        if (LNumOps != RNumOps)
+          return (int)LNumOps - (int)RNumOps;
+
+        // Lexicographically compare.
+        for (unsigned i = 0; i != LNumOps; ++i) {
+          long X = compare(LA->getOperand(i), RA->getOperand(i));
+          if (X != 0)
+            return X;
+        }
+
+        return 0;
+      }
+
+      case scAddExpr:
+      case scMulExpr:
+      case scSMaxExpr:
+      case scUMaxExpr: {
+        const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
+        const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
+
+        // Lexicographically compare n-ary expressions.
+        unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+        for (unsigned i = 0; i != LNumOps; ++i) {
+          if (i >= RNumOps)
+            return 1;
+          long X = compare(LC->getOperand(i), RC->getOperand(i));
+          if (X != 0)
+            return X;
+        }
+        return (int)LNumOps - (int)RNumOps;
+      }
+
+      case scUDivExpr: {
+        const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
+        const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
+
+        // Lexicographically compare udiv expressions.
+        long X = compare(LC->getLHS(), RC->getLHS());
+        if (X != 0)
+          return X;
+        return compare(LC->getRHS(), RC->getRHS());
+      }
+
+      case scTruncate:
+      case scZeroExtend:
+      case scSignExtend: {
+        const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
+        const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+
+        // Compare cast expressions by operand.
+        return compare(LC->getOperand(), RC->getOperand());
+      }
+
+      default:
+        break;
+      }
+
+      llvm_unreachable("Unknown SCEV kind!");
+      return 0;
+    }
+  };
+}
+
+/// GroupByComplexity - Given a list of SCEV objects, order them by their
+/// complexity, and group objects of the same complexity together by value.
+/// When this routine is finished, we know that any duplicates in the vector are
+/// consecutive and that complexity is monotonically increasing.
+///
+/// Note that we go take special precautions to ensure that we get deterministic
+/// results from this routine.  In other words, we don't want the results of
+/// this to depend on where the addresses of various SCEV objects happened to
+/// land in memory.
+///
+static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
+                              LoopInfo *LI) {
+  if (Ops.size() < 2) return;  // Noop
+  if (Ops.size() == 2) {
+    // This is the common case, which also happens to be trivially simple.
+    // Special case it.
+    const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
+    if (SCEVComplexityCompare(LI)(RHS, LHS))
+      std::swap(LHS, RHS);
+    return;
+  }
+
+  // Do the rough sort by complexity.
+  std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
+
+  // Now that we are sorted by complexity, group elements of the same
+  // complexity.  Note that this is, at worst, N^2, but the vector is likely to
+  // be extremely short in practice.  Note that we take this approach because we
+  // do not want to depend on the addresses of the objects we are grouping.
+  for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) {
+    const SCEV *S = Ops[i];
+    unsigned Complexity = S->getSCEVType();
+
+    // If there are any objects of the same complexity and same value as this
+    // one, group them.
+    for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) {
+      if (Ops[j] == S) { // Found a duplicate.
+        // Move it to immediately after i'th element.
+        std::swap(Ops[i+1], Ops[j]);
+        ++i;   // no need to rescan it.
+        if (i == e-2) return;  // Done!
+      }
+    }
+  }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                      Simple SCEV method implementations
+//===----------------------------------------------------------------------===//
+
+/// BinomialCoefficient - Compute BC(It, K).  The result has width W.
+/// Assume, K > 0.
+static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
+                                       ScalarEvolution &SE,
+                                       const Type* ResultTy) {
+  // Handle the simplest case efficiently.
+  if (K == 1)
+    return SE.getTruncateOrZeroExtend(It, ResultTy);
+
+  // We are using the following formula for BC(It, K):
+  //
+  //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K!
+  //
+  // Suppose, W is the bitwidth of the return value.  We must be prepared for
+  // overflow.  Hence, we must assure that the result of our computation is
+  // equal to the accurate one modulo 2^W.  Unfortunately, division isn't
+  // safe in modular arithmetic.
+  //
+  // However, this code doesn't use exactly that formula; the formula it uses
+  // is something like the following, where T is the number of factors of 2 in
+  // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
+  // exponentiation:
+  //
+  //   BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T)
+  //
+  // This formula is trivially equivalent to the previous formula.  However,
+  // this formula can be implemented much more efficiently.  The trick is that
+  // K! / 2^T is odd, and exact division by an odd number *is* safe in modular
+  // arithmetic.  To do exact division in modular arithmetic, all we have
+  // to do is multiply by the inverse.  Therefore, this step can be done at
+  // width W.
+  //
+  // The next issue is how to safely do the division by 2^T.  The way this
+  // is done is by doing the multiplication step at a width of at least W + T
+  // bits.  This way, the bottom W+T bits of the product are accurate. Then,
+  // when we perform the division by 2^T (which is equivalent to a right shift
+  // by T), the bottom W bits are accurate.  Extra bits are okay; they'll get
+  // truncated out after the division by 2^T.
+  //
+  // In comparison to just directly using the first formula, this technique
+  // is much more efficient; using the first formula requires W * K bits,
+  // but this formula less than W + K bits. Also, the first formula requires
+  // a division step, whereas this formula only requires multiplies and shifts.
+  //
+  // It doesn't matter whether the subtraction step is done in the calculation
+  // width or the input iteration count's width; if the subtraction overflows,
+  // the result must be zero anyway.  We prefer here to do it in the width of
+  // the induction variable because it helps a lot for certain cases; CodeGen
+  // isn't smart enough to ignore the overflow, which leads to much less
+  // efficient code if the width of the subtraction is wider than the native
+  // register width.
+  //
+  // (It's possible to not widen at all by pulling out factors of 2 before
+  // the multiplication; for example, K=2 can be calculated as
+  // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires
+  // extra arithmetic, so it's not an obvious win, and it gets
+  // much more complicated for K > 3.)
+
+  // Protection from insane SCEVs; this bound is conservative,
+  // but it probably doesn't matter.
+  if (K > 1000)
+    return SE.getCouldNotCompute();
+
+  unsigned W = SE.getTypeSizeInBits(ResultTy);
+
+  // Calculate K! / 2^T and T; we divide out the factors of two before
+  // multiplying for calculating K! / 2^T to avoid overflow.
+  // Other overflow doesn't matter because we only care about the bottom
+  // W bits of the result.
+  APInt OddFactorial(W, 1);
+  unsigned T = 1;
+  for (unsigned i = 3; i <= K; ++i) {
+    APInt Mult(W, i);
+    unsigned TwoFactors = Mult.countTrailingZeros();
+    T += TwoFactors;
+    Mult = Mult.lshr(TwoFactors);
+    OddFactorial *= Mult;
+  }
+
+  // We need at least W + T bits for the multiplication step
+  unsigned CalculationBits = W + T;
+
+  // Calculate 2^T, at width T+W.
+  APInt DivFactor = APInt(CalculationBits, 1).shl(T);
+
+  // Calculate the multiplicative inverse of K! / 2^T;
+  // this multiplication factor will perform the exact division by
+  // K! / 2^T.
+  APInt Mod = APInt::getSignedMinValue(W+1);
+  APInt MultiplyFactor = OddFactorial.zext(W+1);
+  MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod);
+  MultiplyFactor = MultiplyFactor.trunc(W);
+
+  // Calculate the product, at width T+W
+  const IntegerType *CalculationTy = IntegerType::get(SE.getContext(),
+                                                      CalculationBits);
+  const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy);
+  for (unsigned i = 1; i != K; ++i) {
+    const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i));
+    Dividend = SE.getMulExpr(Dividend,
+                             SE.getTruncateOrZeroExtend(S, CalculationTy));
+  }
+
+  // Divide by 2^T
+  const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor));
+
+  // Truncate the result, and divide by K! / 2^T.
+
+  return SE.getMulExpr(SE.getConstant(MultiplyFactor),
+                       SE.getTruncateOrZeroExtend(DivResult, ResultTy));
+}
+
+/// evaluateAtIteration - Return the value of this chain of recurrences at
+/// the specified iteration number.  We can evaluate this recurrence by
+/// multiplying each element in the chain by the binomial coefficient
+/// corresponding to it.  In other words, we can evaluate {A,+,B,+,C,+,D} as:
+///
+///   A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3)
+///
+/// where BC(It, k) stands for binomial coefficient.
+///
+const SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It,
+                                                ScalarEvolution &SE) const {
+  const SCEV *Result = getStart();
+  for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+    // The computation is correct in the face of overflow provided that the
+    // multiplication is performed _after_ the evaluation of the binomial
+    // coefficient.
+    const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType());
+    if (isa<SCEVCouldNotCompute>(Coeff))
+      return Coeff;
+
+    Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff));
+  }
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+//                    SCEV Expression folder implementations
+//===----------------------------------------------------------------------===//
+
+const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
+                                             const Type *Ty) {
+  assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) &&
+         "This is not a truncating conversion!");
+  assert(isSCEVable(Ty) &&
+         "This is not a conversion to a SCEVable type!");
+  Ty = getEffectiveSCEVType(Ty);
+
+  FoldingSetNodeID ID;
+  ID.AddInteger(scTruncate);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+  // Fold if the operand is constant.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+    return getConstant(
+      cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(),
+                                               getEffectiveSCEVType(Ty))));
+
+  // trunc(trunc(x)) --> trunc(x)
+  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
+    return getTruncateExpr(ST->getOperand(), Ty);
+
+  // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing
+  if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
+    return getTruncateOrSignExtend(SS->getOperand(), Ty);
+
+  // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing
+  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+    return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
+
+  // trunc(x1+x2+...+xN) --> trunc(x1)+trunc(x2)+...+trunc(xN) if we can
+  // eliminate all the truncates.
+  if (const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Op)) {
+    SmallVector<const SCEV *, 4> Operands;
+    bool hasTrunc = false;
+    for (unsigned i = 0, e = SA->getNumOperands(); i != e && !hasTrunc; ++i) {
+      const SCEV *S = getTruncateExpr(SA->getOperand(i), Ty);
+      hasTrunc = isa<SCEVTruncateExpr>(S);
+      Operands.push_back(S);
+    }
+    if (!hasTrunc)
+      return getAddExpr(Operands, false, false);
+    UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
+  }
+
+  // trunc(x1*x2*...*xN) --> trunc(x1)*trunc(x2)*...*trunc(xN) if we can
+  // eliminate all the truncates.
+  if (const SCEVMulExpr *SM = dyn_cast<SCEVMulExpr>(Op)) {
+    SmallVector<const SCEV *, 4> Operands;
+    bool hasTrunc = false;
+    for (unsigned i = 0, e = SM->getNumOperands(); i != e && !hasTrunc; ++i) {
+      const SCEV *S = getTruncateExpr(SM->getOperand(i), Ty);
+      hasTrunc = isa<SCEVTruncateExpr>(S);
+      Operands.push_back(S);
+    }
+    if (!hasTrunc)
+      return getMulExpr(Operands, false, false);
+    UniqueSCEVs.FindNodeOrInsertPos(ID, IP);  // Mutates IP, returns NULL.
+  }
+
+  // If the input value is a chrec scev, truncate the chrec's operands.
+  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
+    SmallVector<const SCEV *, 4> Operands;
+    for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
+      Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
+    return getAddRecExpr(Operands, AddRec->getLoop());
+  }
+
+  // As a special case, fold trunc(undef) to undef. We don't want to
+  // know too much about SCEVUnknowns, but this special case is handy
+  // and harmless.
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op))
+    if (isa<UndefValue>(U->getValue()))
+      return getSCEV(UndefValue::get(Ty));
+
+  // The cast wasn't folded; create an explicit cast node. We can reuse
+  // the existing insert position since if we get here, we won't have
+  // made any changes which would invalidate it.
+  SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
+                                                 Op, Ty);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
+                                               const Type *Ty) {
+  assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+         "This is not an extending conversion!");
+  assert(isSCEVable(Ty) &&
+         "This is not a conversion to a SCEVable type!");
+  Ty = getEffectiveSCEVType(Ty);
+
+  // Fold if the operand is constant.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+    return getConstant(
+      cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(),
+                                              getEffectiveSCEVType(Ty))));
+
+  // zext(zext(x)) --> zext(x)
+  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+    return getZeroExtendExpr(SZ->getOperand(), Ty);
+
+  // Before doing any expensive analysis, check to see if we've already
+  // computed a SCEV for this Op and Ty.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scZeroExtend);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+  // zext(trunc(x)) --> zext(x) or x or trunc(x)
+  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
+    // It's possible the bits taken off by the truncate were all zero bits. If
+    // so, we should be able to simplify this further.
+    const SCEV *X = ST->getOperand();
+    ConstantRange CR = getUnsignedRange(X);
+    unsigned TruncBits = getTypeSizeInBits(ST->getType());
+    unsigned NewBits = getTypeSizeInBits(Ty);
+    if (CR.truncate(TruncBits).zeroExtend(NewBits).contains(
+            CR.zextOrTrunc(NewBits)))
+      return getTruncateOrZeroExtend(X, Ty);
+  }
+
+  // If the input value is a chrec scev, and we can prove that the value
+  // did not overflow the old, smaller, value, we can zero extend all of the
+  // operands (often constants).  This allows analysis of something like
+  // this:  for (unsigned char X = 0; X < 100; ++X) { int Y = X; }
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
+    if (AR->isAffine()) {
+      const SCEV *Start = AR->getStart();
+      const SCEV *Step = AR->getStepRecurrence(*this);
+      unsigned BitWidth = getTypeSizeInBits(AR->getType());
+      const Loop *L = AR->getLoop();
+
+      // If we have special knowledge that this addrec won't overflow,
+      // we don't need to do any further analysis.
+      if (AR->hasNoUnsignedWrap())
+        return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                             getZeroExtendExpr(Step, Ty),
+                             L);
+
+      // Check whether the backedge-taken count is SCEVCouldNotCompute.
+      // Note that this serves two purposes: It filters out loops that are
+      // simply not analyzable, and it covers the case where this code is
+      // being called from within backedge-taken count analysis, such that
+      // attempting to ask for the backedge-taken count would likely result
+      // in infinite recursion. In the later case, the analysis code will
+      // cope with a conservative value, and it will take care to purge
+      // that value once it has finished.
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+      if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
+        // Manually compute the final value for AR, checking for
+        // overflow.
+
+        // Check whether the backedge-taken count can be losslessly casted to
+        // the addrec's type. The count is always unsigned.
+        const SCEV *CastedMaxBECount =
+          getTruncateOrZeroExtend(MaxBECount, Start->getType());
+        const SCEV *RecastedMaxBECount =
+          getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+        if (MaxBECount == RecastedMaxBECount) {
+          const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+          // Check whether Start+Step*MaxBECount has no unsigned overflow.
+          const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
+          const SCEV *Add = getAddExpr(Start, ZMul);
+          const SCEV *OperandExtendedAdd =
+            getAddExpr(getZeroExtendExpr(Start, WideTy),
+                       getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+                                  getZeroExtendExpr(Step, WideTy)));
+          if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd)
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                                 getZeroExtendExpr(Step, Ty),
+                                 L);
+
+          // Similar to above, only this time treat the step value as signed.
+          // This covers loops that count down.
+          const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
+          Add = getAddExpr(Start, SMul);
+          OperandExtendedAdd =
+            getAddExpr(getZeroExtendExpr(Start, WideTy),
+                       getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+                                  getSignExtendExpr(Step, WideTy)));
+          if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd)
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                                 getSignExtendExpr(Step, Ty),
+                                 L);
+        }
+
+        // If the backedge is guarded by a comparison with the pre-inc value
+        // the addrec is safe. Also, if the entry is guarded by a comparison
+        // with the start value and the backedge is guarded by a comparison
+        // with the post-inc value, the addrec is safe.
+        if (isKnownPositive(Step)) {
+          const SCEV *N = getConstant(APInt::getMinValue(BitWidth) -
+                                      getUnsignedRange(Step).getUnsignedMax());
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) ||
+              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) &&
+               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT,
+                                           AR->getPostIncExpr(*this), N)))
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                                 getZeroExtendExpr(Step, Ty),
+                                 L);
+        } else if (isKnownNegative(Step)) {
+          const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) -
+                                      getSignedRange(Step).getSignedMin());
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) ||
+              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) &&
+               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT,
+                                           AR->getPostIncExpr(*this), N)))
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                                 getSignExtendExpr(Step, Ty),
+                                 L);
+        }
+      }
+    }
+
+  // The cast wasn't folded; create an explicit cast node.
+  // Recompute the insert position, as it may have been invalidated.
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator),
+                                                   Op, Ty);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
+                                               const Type *Ty) {
+  assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+         "This is not an extending conversion!");
+  assert(isSCEVable(Ty) &&
+         "This is not a conversion to a SCEVable type!");
+  Ty = getEffectiveSCEVType(Ty);
+
+  // Fold if the operand is constant.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+    return getConstant(
+      cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(),
+                                              getEffectiveSCEVType(Ty))));
+
+  // sext(sext(x)) --> sext(x)
+  if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
+    return getSignExtendExpr(SS->getOperand(), Ty);
+
+  // sext(zext(x)) --> zext(x)
+  if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
+    return getZeroExtendExpr(SZ->getOperand(), Ty);
+
+  // Before doing any expensive analysis, check to see if we've already
+  // computed a SCEV for this Op and Ty.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scSignExtend);
+  ID.AddPointer(Op);
+  ID.AddPointer(Ty);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+
+  // If the input value is provably positive, build a zext instead.
+  if (isKnownNonNegative(Op))
+    return getZeroExtendExpr(Op, Ty);
+
+  // sext(trunc(x)) --> sext(x) or x or trunc(x)
+  if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) {
+    // It's possible the bits taken off by the truncate were all sign bits. If
+    // so, we should be able to simplify this further.
+    const SCEV *X = ST->getOperand();
+    ConstantRange CR = getSignedRange(X);
+    unsigned TruncBits = getTypeSizeInBits(ST->getType());
+    unsigned NewBits = getTypeSizeInBits(Ty);
+    if (CR.truncate(TruncBits).signExtend(NewBits).contains(
+            CR.sextOrTrunc(NewBits)))
+      return getTruncateOrSignExtend(X, Ty);
+  }
+
+  // If the input value is a chrec scev, and we can prove that the value
+  // did not overflow the old, smaller, value, we can sign extend all of the
+  // operands (often constants).  This allows analysis of something like
+  // this:  for (signed char X = 0; X < 100; ++X) { int Y = X; }
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op))
+    if (AR->isAffine()) {
+      const SCEV *Start = AR->getStart();
+      const SCEV *Step = AR->getStepRecurrence(*this);
+      unsigned BitWidth = getTypeSizeInBits(AR->getType());
+      const Loop *L = AR->getLoop();
+
+      // If we have special knowledge that this addrec won't overflow,
+      // we don't need to do any further analysis.
+      if (AR->hasNoSignedWrap())
+        return getAddRecExpr(getSignExtendExpr(Start, Ty),
+                             getSignExtendExpr(Step, Ty),
+                             L);
+
+      // Check whether the backedge-taken count is SCEVCouldNotCompute.
+      // Note that this serves two purposes: It filters out loops that are
+      // simply not analyzable, and it covers the case where this code is
+      // being called from within backedge-taken count analysis, such that
+      // attempting to ask for the backedge-taken count would likely result
+      // in infinite recursion. In the later case, the analysis code will
+      // cope with a conservative value, and it will take care to purge
+      // that value once it has finished.
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(L);
+      if (!isa<SCEVCouldNotCompute>(MaxBECount)) {
+        // Manually compute the final value for AR, checking for
+        // overflow.
+
+        // Check whether the backedge-taken count can be losslessly casted to
+        // the addrec's type. The count is always unsigned.
+        const SCEV *CastedMaxBECount =
+          getTruncateOrZeroExtend(MaxBECount, Start->getType());
+        const SCEV *RecastedMaxBECount =
+          getTruncateOrZeroExtend(CastedMaxBECount, MaxBECount->getType());
+        if (MaxBECount == RecastedMaxBECount) {
+          const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
+          // Check whether Start+Step*MaxBECount has no signed overflow.
+          const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
+          const SCEV *Add = getAddExpr(Start, SMul);
+          const SCEV *OperandExtendedAdd =
+            getAddExpr(getSignExtendExpr(Start, WideTy),
+                       getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+                                  getSignExtendExpr(Step, WideTy)));
+          if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd)
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getSignExtendExpr(Start, Ty),
+                                 getSignExtendExpr(Step, Ty),
+                                 L);
+
+          // Similar to above, only this time treat the step value as unsigned.
+          // This covers loops that count up with an unsigned step.
+          const SCEV *UMul = getMulExpr(CastedMaxBECount, Step);
+          Add = getAddExpr(Start, UMul);
+          OperandExtendedAdd =
+            getAddExpr(getSignExtendExpr(Start, WideTy),
+                       getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy),
+                                  getZeroExtendExpr(Step, WideTy)));
+          if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd)
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getSignExtendExpr(Start, Ty),
+                                 getZeroExtendExpr(Step, Ty),
+                                 L);
+        }
+
+        // If the backedge is guarded by a comparison with the pre-inc value
+        // the addrec is safe. Also, if the entry is guarded by a comparison
+        // with the start value and the backedge is guarded by a comparison
+        // with the post-inc value, the addrec is safe.
+        if (isKnownPositive(Step)) {
+          const SCEV *N = getConstant(APInt::getSignedMinValue(BitWidth) -
+                                      getSignedRange(Step).getSignedMax());
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) ||
+              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) &&
+               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT,
+                                           AR->getPostIncExpr(*this), N)))
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getSignExtendExpr(Start, Ty),
+                                 getSignExtendExpr(Step, Ty),
+                                 L);
+        } else if (isKnownNegative(Step)) {
+          const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) -
+                                      getSignedRange(Step).getSignedMin());
+          if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) ||
+              (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) &&
+               isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT,
+                                           AR->getPostIncExpr(*this), N)))
+            // Return the expression with the addrec on the outside.
+            return getAddRecExpr(getSignExtendExpr(Start, Ty),
+                                 getSignExtendExpr(Step, Ty),
+                                 L);
+        }
+      }
+    }
+
+  // The cast wasn't folded; create an explicit cast node.
+  // Recompute the insert position, as it may have been invalidated.
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator),
+                                                   Op, Ty);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+/// getAnyExtendExpr - Return a SCEV for the given operand extended with
+/// unspecified bits out to the given type.
+///
+const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,
+                                              const Type *Ty) {
+  assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) &&
+         "This is not an extending conversion!");
+  assert(isSCEVable(Ty) &&
+         "This is not a conversion to a SCEVable type!");
+  Ty = getEffectiveSCEVType(Ty);
+
+  // Sign-extend negative constants.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+    if (SC->getValue()->getValue().isNegative())
+      return getSignExtendExpr(Op, Ty);
+
+  // Peel off a truncate cast.
+  if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) {
+    const SCEV *NewOp = T->getOperand();
+    if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty))
+      return getAnyExtendExpr(NewOp, Ty);
+    return getTruncateOrNoop(NewOp, Ty);
+  }
+
+  // Next try a zext cast. If the cast is folded, use it.
+  const SCEV *ZExt = getZeroExtendExpr(Op, Ty);
+  if (!isa<SCEVZeroExtendExpr>(ZExt))
+    return ZExt;
+
+  // Next try a sext cast. If the cast is folded, use it.
+  const SCEV *SExt = getSignExtendExpr(Op, Ty);
+  if (!isa<SCEVSignExtendExpr>(SExt))
+    return SExt;
+
+  // Force the cast to be folded into the operands of an addrec.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) {
+    SmallVector<const SCEV *, 4> Ops;
+    for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+         I != E; ++I)
+      Ops.push_back(getAnyExtendExpr(*I, Ty));
+    return getAddRecExpr(Ops, AR->getLoop());
+  }
+
+  // As a special case, fold anyext(undef) to undef. We don't want to
+  // know too much about SCEVUnknowns, but this special case is handy
+  // and harmless.
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Op))
+    if (isa<UndefValue>(U->getValue()))
+      return getSCEV(UndefValue::get(Ty));
+
+  // If the expression is obviously signed, use the sext cast value.
+  if (isa<SCEVSMaxExpr>(Op))
+    return SExt;
+
+  // Absent any other information, use the zext cast value.
+  return ZExt;
+}
+
+/// CollectAddOperandsWithScales - Process the given Ops list, which is
+/// a list of operands to be added under the given scale, update the given
+/// map. This is a helper function for getAddRecExpr. As an example of
+/// what it does, given a sequence of operands that would form an add
+/// expression like this:
+///
+///    m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r)
+///
+/// where A and B are constants, update the map with these values:
+///
+///    (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
+///
+/// and add 13 + A*B*29 to AccumulatedConstant.
+/// This will allow getAddRecExpr to produce this:
+///
+///    13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
+///
+/// This form often exposes folding opportunities that are hidden in
+/// the original operand list.
+///
+/// Return true iff it appears that any interesting folding opportunities
+/// may be exposed. This helps getAddRecExpr short-circuit extra work in
+/// the common case where no interesting opportunities are present, and
+/// is also used as a check to avoid infinite recursion.
+///
+static bool
+CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
+                             SmallVector<const SCEV *, 8> &NewOps,
+                             APInt &AccumulatedConstant,
+                             const SCEV *const *Ops, size_t NumOperands,
+                             const APInt &Scale,
+                             ScalarEvolution &SE) {
+  bool Interesting = false;
+
+  // Iterate over the add operands. They are sorted, with constants first.
+  unsigned i = 0;
+  while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+    ++i;
+    // Pull a buried constant out to the outside.
+    if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
+      Interesting = true;
+    AccumulatedConstant += Scale * C->getValue()->getValue();
+  }
+
+  // Next comes everything else. We're especially interested in multiplies
+  // here, but they're in the middle, so just visit the rest with one loop.
+  for (; i != NumOperands; ++i) {
+    const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
+    if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
+      APInt NewScale =
+        Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
+      if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
+        // A multiplication of a constant with another add; recurse.
+        const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1));
+        Interesting |=
+          CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+                                       Add->op_begin(), Add->getNumOperands(),
+                                       NewScale, SE);
+      } else {
+        // A multiplication of a constant with some other value. Update
+        // the map.
+        SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
+        const SCEV *Key = SE.getMulExpr(MulOps);
+        std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
+          M.insert(std::make_pair(Key, NewScale));
+        if (Pair.second) {
+          NewOps.push_back(Pair.first->first);
+        } else {
+          Pair.first->second += NewScale;
+          // The map already had an entry for this value, which may indicate
+          // a folding opportunity.
+          Interesting = true;
+        }
+      }
+    } else {
+      // An ordinary operand. Update the map.
+      std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
+        M.insert(std::make_pair(Ops[i], Scale));
+      if (Pair.second) {
+        NewOps.push_back(Pair.first->first);
+      } else {
+        Pair.first->second += Scale;
+        // The map already had an entry for this value, which may indicate
+        // a folding opportunity.
+        Interesting = true;
+      }
+    }
+  }
+
+  return Interesting;
+}
+
+namespace {
+  struct APIntCompare {
+    bool operator()(const APInt &LHS, const APInt &RHS) const {
+      return LHS.ult(RHS);
+    }
+  };
+}
+
+/// getAddExpr - Get a canonical add expression, or something simpler if
+/// possible.
+const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+                                        bool HasNUW, bool HasNSW) {
+  assert(!Ops.empty() && "Cannot get empty add!");
+  if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+  const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+           "SCEVAddExpr operand types don't match!");
+#endif
+
+  // If HasNSW is true and all the operands are non-negative, infer HasNUW.
+  if (!HasNUW && HasNSW) {
+    bool All = true;
+    for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
+         E = Ops.end(); I != E; ++I)
+      if (!isKnownNonNegative(*I)) {
+        All = false;
+        break;
+      }
+    if (All) HasNUW = true;
+  }
+
+  // Sort by complexity, this groups all similar expression types together.
+  GroupByComplexity(Ops, LI);
+
+  // If there are any constants, fold them together.
+  unsigned Idx = 0;
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+    ++Idx;
+    assert(Idx < Ops.size());
+    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+      // We found two constants, fold them together!
+      Ops[0] = getConstant(LHSC->getValue()->getValue() +
+                           RHSC->getValue()->getValue());
+      if (Ops.size() == 2) return Ops[0];
+      Ops.erase(Ops.begin()+1);  // Erase the folded element
+      LHSC = cast<SCEVConstant>(Ops[0]);
+    }
+
+    // If we are left with a constant zero being added, strip it off.
+    if (LHSC->getValue()->isZero()) {
+      Ops.erase(Ops.begin());
+      --Idx;
+    }
+
+    if (Ops.size() == 1) return Ops[0];
+  }
+
+  // Okay, check to see if the same value occurs in the operand list more than
+  // once.  If so, merge them together into an multiply expression.  Since we
+  // sorted the list, these values are required to be adjacent.
+  const Type *Ty = Ops[0]->getType();
+  bool FoundMatch = false;
+  for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
+    if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
+      // Scan ahead to count how many equal operands there are.
+      unsigned Count = 2;
+      while (i+Count != e && Ops[i+Count] == Ops[i])
+        ++Count;
+      // Merge the values into a multiply.
+      const SCEV *Scale = getConstant(Ty, Count);
+      const SCEV *Mul = getMulExpr(Scale, Ops[i]);
+      if (Ops.size() == Count)
+        return Mul;
+      Ops[i] = Mul;
+      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
+      --i; e -= Count - 1;
+      FoundMatch = true;
+    }
+  if (FoundMatch)
+    return getAddExpr(Ops, HasNUW, HasNSW);
+
+  // Check for truncates. If all the operands are truncated from the same
+  // type, see if factoring out the truncate would permit the result to be
+  // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
+  // if the contents of the resulting outer trunc fold to something simple.
+  for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
+    const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
+    const Type *DstType = Trunc->getType();
+    const Type *SrcType = Trunc->getOperand()->getType();
+    SmallVector<const SCEV *, 8> LargeOps;
+    bool Ok = true;
+    // Check all the operands to see if they can be represented in the
+    // source type of the truncate.
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+      if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
+        if (T->getOperand()->getType() != SrcType) {
+          Ok = false;
+          break;
+        }
+        LargeOps.push_back(T->getOperand());
+      } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+        LargeOps.push_back(getAnyExtendExpr(C, SrcType));
+      } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
+        SmallVector<const SCEV *, 8> LargeMulOps;
+        for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
+          if (const SCEVTruncateExpr *T =
+                dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
+            if (T->getOperand()->getType() != SrcType) {
+              Ok = false;
+              break;
+            }
+            LargeMulOps.push_back(T->getOperand());
+          } else if (const SCEVConstant *C =
+                       dyn_cast<SCEVConstant>(M->getOperand(j))) {
+            LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
+          } else {
+            Ok = false;
+            break;
+          }
+        }
+        if (Ok)
+          LargeOps.push_back(getMulExpr(LargeMulOps));
+      } else {
+        Ok = false;
+        break;
+      }
+    }
+    if (Ok) {
+      // Evaluate the expression in the larger type.
+      const SCEV *Fold = getAddExpr(LargeOps, HasNUW, HasNSW);
+      // If it folds to something simple, use it. Otherwise, don't.
+      if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
+        return getTruncateExpr(Fold, DstType);
+    }
+  }
+
+  // Skip past any other cast SCEVs.
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
+    ++Idx;
+
+  // If there are add operands they would be next.
+  if (Idx < Ops.size()) {
+    bool DeletedAdd = false;
+    while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
+      // If we have an add, expand the add operands onto the end of the operands
+      // list.
+      Ops.erase(Ops.begin()+Idx);
+      Ops.append(Add->op_begin(), Add->op_end());
+      DeletedAdd = true;
+    }
+
+    // If we deleted at least one add, we added operands to the end of the list,
+    // and they are not necessarily sorted.  Recurse to resort and resimplify
+    // any operands we just acquired.
+    if (DeletedAdd)
+      return getAddExpr(Ops);
+  }
+
+  // Skip over the add expression until we get to a multiply.
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
+    ++Idx;
+
+  // Check to see if there are any folding opportunities present with
+  // operands multiplied by constant values.
+  if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
+    uint64_t BitWidth = getTypeSizeInBits(Ty);
+    DenseMap<const SCEV *, APInt> M;
+    SmallVector<const SCEV *, 8> NewOps;
+    APInt AccumulatedConstant(BitWidth, 0);
+    if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+                                     Ops.data(), Ops.size(),
+                                     APInt(BitWidth, 1), *this)) {
+      // Some interesting folding opportunity is present, so its worthwhile to
+      // re-generate the operands list. Group the operands by constant scale,
+      // to avoid multiplying by the same constant scale multiple times.
+      std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
+      for (SmallVector<const SCEV *, 8>::const_iterator I = NewOps.begin(),
+           E = NewOps.end(); I != E; ++I)
+        MulOpLists[M.find(*I)->second].push_back(*I);
+      // Re-generate the operands list.
+      Ops.clear();
+      if (AccumulatedConstant != 0)
+        Ops.push_back(getConstant(AccumulatedConstant));
+      for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator
+           I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
+        if (I->first != 0)
+          Ops.push_back(getMulExpr(getConstant(I->first),
+                                   getAddExpr(I->second)));
+      if (Ops.empty())
+        return getConstant(Ty, 0);
+      if (Ops.size() == 1)
+        return Ops[0];
+      return getAddExpr(Ops);
+    }
+  }
+
+  // If we are adding something to a multiply expression, make sure the
+  // something is not already an operand of the multiply.  If so, merge it into
+  // the multiply.
+  for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
+    const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
+    for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
+      const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
+      if (isa<SCEVConstant>(MulOpSCEV))
+        continue;
+      for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
+        if (MulOpSCEV == Ops[AddOp]) {
+          // Fold W + X + (X * Y * Z)  -->  W + (X * ((Y*Z)+1))
+          const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
+          if (Mul->getNumOperands() != 2) {
+            // If the multiply has more than two operands, we must get the
+            // Y*Z term.
+            SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+                                                Mul->op_begin()+MulOp);
+            MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
+            InnerMul = getMulExpr(MulOps);
+          }
+          const SCEV *One = getConstant(Ty, 1);
+          const SCEV *AddOne = getAddExpr(One, InnerMul);
+          const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
+          if (Ops.size() == 2) return OuterMul;
+          if (AddOp < Idx) {
+            Ops.erase(Ops.begin()+AddOp);
+            Ops.erase(Ops.begin()+Idx-1);
+          } else {
+            Ops.erase(Ops.begin()+Idx);
+            Ops.erase(Ops.begin()+AddOp-1);
+          }
+          Ops.push_back(OuterMul);
+          return getAddExpr(Ops);
+        }
+
+      // Check this multiply against other multiplies being added together.
+      for (unsigned OtherMulIdx = Idx+1;
+           OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
+           ++OtherMulIdx) {
+        const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
+        // If MulOp occurs in OtherMul, we can fold the two multiplies
+        // together.
+        for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
+             OMulOp != e; ++OMulOp)
+          if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
+            // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
+            const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
+            if (Mul->getNumOperands() != 2) {
+              SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+                                                  Mul->op_begin()+MulOp);
+              MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
+              InnerMul1 = getMulExpr(MulOps);
+            }
+            const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
+            if (OtherMul->getNumOperands() != 2) {
+              SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
+                                                  OtherMul->op_begin()+OMulOp);
+              MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
+              InnerMul2 = getMulExpr(MulOps);
+            }
+            const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
+            const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
+            if (Ops.size() == 2) return OuterMul;
+            Ops.erase(Ops.begin()+Idx);
+            Ops.erase(Ops.begin()+OtherMulIdx-1);
+            Ops.push_back(OuterMul);
+            return getAddExpr(Ops);
+          }
+      }
+    }
+  }
+
+  // If there are any add recurrences in the operands list, see if any other
+  // added values are loop invariant.  If so, we can fold them into the
+  // recurrence.
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
+    ++Idx;
+
+  // Scan over all recurrences, trying to fold loop invariants into them.
+  for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
+    // Scan all of the other operands to this add and add them to the vector if
+    // they are loop invariant w.r.t. the recurrence.
+    SmallVector<const SCEV *, 8> LIOps;
+    const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+    const Loop *AddRecLoop = AddRec->getLoop();
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+      if (isLoopInvariant(Ops[i], AddRecLoop)) {
+        LIOps.push_back(Ops[i]);
+        Ops.erase(Ops.begin()+i);
+        --i; --e;
+      }
+
+    // If we found some loop invariants, fold them into the recurrence.
+    if (!LIOps.empty()) {
+      //  NLI + LI + {Start,+,Step}  -->  NLI + {LI+Start,+,Step}
+      LIOps.push_back(AddRec->getStart());
+
+      SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
+                                             AddRec->op_end());
+      AddRecOps[0] = getAddExpr(LIOps);
+
+      // Build the new addrec. Propagate the NUW and NSW flags if both the
+      // outer add and the inner addrec are guaranteed to have no overflow.
+      const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop,
+                                         HasNUW && AddRec->hasNoUnsignedWrap(),
+                                         HasNSW && AddRec->hasNoSignedWrap());
+
+      // If all of the other operands were loop invariant, we are done.
+      if (Ops.size() == 1) return NewRec;
+
+      // Otherwise, add the folded AddRec by the non-liv parts.
+      for (unsigned i = 0;; ++i)
+        if (Ops[i] == AddRec) {
+          Ops[i] = NewRec;
+          break;
+        }
+      return getAddExpr(Ops);
+    }
+
+    // Okay, if there weren't any loop invariants to be folded, check to see if
+    // there are multiple AddRec's with the same loop induction variable being
+    // added together.  If so, we can fold them.
+    for (unsigned OtherIdx = Idx+1;
+         OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+         ++OtherIdx)
+      if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
+        // Other + {A,+,B}<L> + {C,+,D}<L>  -->  Other + {A+C,+,B+D}<L>
+        SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
+                                               AddRec->op_end());
+        for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+             ++OtherIdx)
+          if (const SCEVAddRecExpr *OtherAddRec =
+                dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+            if (OtherAddRec->getLoop() == AddRecLoop) {
+              for (unsigned i = 0, e = OtherAddRec->getNumOperands();
+                   i != e; ++i) {
+                if (i >= AddRecOps.size()) {
+                  AddRecOps.append(OtherAddRec->op_begin()+i,
+                                   OtherAddRec->op_end());
+                  break;
+                }
+                AddRecOps[i] = getAddExpr(AddRecOps[i],
+                                          OtherAddRec->getOperand(i));
+              }
+              Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+            }
+        Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop);
+        return getAddExpr(Ops);
+      }
+
+    // Otherwise couldn't fold anything into this recurrence.  Move onto the
+    // next one.
+  }
+
+  // Okay, it looks like we really DO need an add expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scAddExpr);
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = 0;
+  SCEVAddExpr *S =
+    static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+    S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator),
+                                        O, Ops.size());
+    UniqueSCEVs.InsertNode(S, IP);
+  }
+  if (HasNUW) S->setHasNoUnsignedWrap(true);
+  if (HasNSW) S->setHasNoSignedWrap(true);
+  return S;
+}
+
+/// getMulExpr - Get a canonical multiply expression, or something simpler if
+/// possible.
+const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+                                        bool HasNUW, bool HasNSW) {
+  assert(!Ops.empty() && "Cannot get empty mul!");
+  if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+  const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+           "SCEVMulExpr operand types don't match!");
+#endif
+
+  // If HasNSW is true and all the operands are non-negative, infer HasNUW.
+  if (!HasNUW && HasNSW) {
+    bool All = true;
+    for (SmallVectorImpl<const SCEV *>::const_iterator I = Ops.begin(),
+         E = Ops.end(); I != E; ++I)
+      if (!isKnownNonNegative(*I)) {
+        All = false;
+        break;
+      }
+    if (All) HasNUW = true;
+  }
+
+  // Sort by complexity, this groups all similar expression types together.
+  GroupByComplexity(Ops, LI);
+
+  // If there are any constants, fold them together.
+  unsigned Idx = 0;
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+
+    // C1*(C2+V) -> C1*C2 + C1*V
+    if (Ops.size() == 2)
+      if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1]))
+        if (Add->getNumOperands() == 2 &&
+            isa<SCEVConstant>(Add->getOperand(0)))
+          return getAddExpr(getMulExpr(LHSC, Add->getOperand(0)),
+                            getMulExpr(LHSC, Add->getOperand(1)));
+
+    ++Idx;
+    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+      // We found two constants, fold them together!
+      ConstantInt *Fold = ConstantInt::get(getContext(),
+                                           LHSC->getValue()->getValue() *
+                                           RHSC->getValue()->getValue());
+      Ops[0] = getConstant(Fold);
+      Ops.erase(Ops.begin()+1);  // Erase the folded element
+      if (Ops.size() == 1) return Ops[0];
+      LHSC = cast<SCEVConstant>(Ops[0]);
+    }
+
+    // If we are left with a constant one being multiplied, strip it off.
+    if (cast<SCEVConstant>(Ops[0])->getValue()->equalsInt(1)) {
+      Ops.erase(Ops.begin());
+      --Idx;
+    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) {
+      // If we have a multiply of zero, it will always be zero.
+      return Ops[0];
+    } else if (Ops[0]->isAllOnesValue()) {
+      // If we have a mul by -1 of an add, try distributing the -1 among the
+      // add operands.
+      if (Ops.size() == 2)
+        if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {
+          SmallVector<const SCEV *, 4> NewOps;
+          bool AnyFolded = false;
+          for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+               I != E; ++I) {
+            const SCEV *Mul = getMulExpr(Ops[0], *I);
+            if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;
+            NewOps.push_back(Mul);
+          }
+          if (AnyFolded)
+            return getAddExpr(NewOps);
+        }
+    }
+
+    if (Ops.size() == 1)
+      return Ops[0];
+  }
+
+  // Skip over the add expression until we get to a multiply.
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
+    ++Idx;
+
+  // If there are mul operands inline them all into this expression.
+  if (Idx < Ops.size()) {
+    bool DeletedMul = false;
+    while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
+      // If we have an mul, expand the mul operands onto the end of the operands
+      // list.
+      Ops.erase(Ops.begin()+Idx);
+      Ops.append(Mul->op_begin(), Mul->op_end());
+      DeletedMul = true;
+    }
+
+    // If we deleted at least one mul, we added operands to the end of the list,
+    // and they are not necessarily sorted.  Recurse to resort and resimplify
+    // any operands we just acquired.
+    if (DeletedMul)
+      return getMulExpr(Ops);
+  }
+
+  // If there are any add recurrences in the operands list, see if any other
+  // added values are loop invariant.  If so, we can fold them into the
+  // recurrence.
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
+    ++Idx;
+
+  // Scan over all recurrences, trying to fold loop invariants into them.
+  for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
+    // Scan all of the other operands to this mul and add them to the vector if
+    // they are loop invariant w.r.t. the recurrence.
+    SmallVector<const SCEV *, 8> LIOps;
+    const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+    const Loop *AddRecLoop = AddRec->getLoop();
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+      if (isLoopInvariant(Ops[i], AddRecLoop)) {
+        LIOps.push_back(Ops[i]);
+        Ops.erase(Ops.begin()+i);
+        --i; --e;
+      }
+
+    // If we found some loop invariants, fold them into the recurrence.
+    if (!LIOps.empty()) {
+      //  NLI * LI * {Start,+,Step}  -->  NLI * {LI*Start,+,LI*Step}
+      SmallVector<const SCEV *, 4> NewOps;
+      NewOps.reserve(AddRec->getNumOperands());
+      const SCEV *Scale = getMulExpr(LIOps);
+      for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
+        NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
+
+      // Build the new addrec. Propagate the NUW and NSW flags if both the
+      // outer mul and the inner addrec are guaranteed to have no overflow.
+      const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop,
+                                         HasNUW && AddRec->hasNoUnsignedWrap(),
+                                         HasNSW && AddRec->hasNoSignedWrap());
+
+      // If all of the other operands were loop invariant, we are done.
+      if (Ops.size() == 1) return NewRec;
+
+      // Otherwise, multiply the folded AddRec by the non-liv parts.
+      for (unsigned i = 0;; ++i)
+        if (Ops[i] == AddRec) {
+          Ops[i] = NewRec;
+          break;
+        }
+      return getMulExpr(Ops);
+    }
+
+    // Okay, if there weren't any loop invariants to be folded, check to see if
+    // there are multiple AddRec's with the same loop induction variable being
+    // multiplied together.  If so, we can fold them.
+    for (unsigned OtherIdx = Idx+1;
+         OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+         ++OtherIdx)
+      if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
+        // F * G, where F = {A,+,B}<L> and G = {C,+,D}<L>  -->
+        // {A*C,+,F*D + G*B + B*D}<L>
+        for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+             ++OtherIdx)
+          if (const SCEVAddRecExpr *OtherAddRec =
+                dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+            if (OtherAddRec->getLoop() == AddRecLoop) {
+              const SCEVAddRecExpr *F = AddRec, *G = OtherAddRec;
+              const SCEV *NewStart = getMulExpr(F->getStart(), G->getStart());
+              const SCEV *B = F->getStepRecurrence(*this);
+              const SCEV *D = G->getStepRecurrence(*this);
+              const SCEV *NewStep = getAddExpr(getMulExpr(F, D),
+                                               getMulExpr(G, B),
+                                               getMulExpr(B, D));
+              const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep,
+                                                    F->getLoop());
+              if (Ops.size() == 2) return NewAddRec;
+              Ops[Idx] = AddRec = cast<SCEVAddRecExpr>(NewAddRec);
+              Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
+            }
+        return getMulExpr(Ops);
+      }
+
+    // Otherwise couldn't fold anything into this recurrence.  Move onto the
+    // next one.
+  }
+
+  // Okay, it looks like we really DO need an mul expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scMulExpr);
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = 0;
+  SCEVMulExpr *S =
+    static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+    std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+    S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator),
+                                        O, Ops.size());
+    UniqueSCEVs.InsertNode(S, IP);
+  }
+  if (HasNUW) S->setHasNoUnsignedWrap(true);
+  if (HasNSW) S->setHasNoSignedWrap(true);
+  return S;
+}
+
+/// getUDivExpr - Get a canonical unsigned division expression, or something
+/// simpler if possible.
+const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
+  assert(getEffectiveSCEVType(LHS->getType()) ==
+         getEffectiveSCEVType(RHS->getType()) &&
+         "SCEVUDivExpr operand types don't match!");
+
+  if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
+    if (RHSC->getValue()->equalsInt(1))
+      return LHS;                               // X udiv 1 --> x
+    // If the denominator is zero, the result of the udiv is undefined. Don't
+    // try to analyze it, because the resolution chosen here may differ from
+    // the resolution chosen in other parts of the compiler.
+    if (!RHSC->getValue()->isZero()) {
+      // Determine if the division can be folded into the operands of
+      // its operands.
+      // TODO: Generalize this to non-constants by using known-bits information.
+      const Type *Ty = LHS->getType();
+      unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros();
+      unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
+      // For non-power-of-two values, effectively round the value up to the
+      // nearest power of two.
+      if (!RHSC->getValue()->getValue().isPowerOf2())
+        ++MaxShiftAmt;
+      const IntegerType *ExtTy =
+        IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt);
+      // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded.
+      if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
+        if (const SCEVConstant *Step =
+              dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this)))
+          if (!Step->getValue()->getValue()
+                .urem(RHSC->getValue()->getValue()) &&
+              getZeroExtendExpr(AR, ExtTy) ==
+              getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+                            getZeroExtendExpr(Step, ExtTy),
+                            AR->getLoop())) {
+            SmallVector<const SCEV *, 4> Operands;
+            for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
+              Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
+            return getAddRecExpr(Operands, AR->getLoop());
+          }
+      // (A*B)/C --> A*(B/C) if safe and B/C can be folded.
+      if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
+        SmallVector<const SCEV *, 4> Operands;
+        for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
+          Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
+        if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
+          // Find an operand that's safely divisible.
+          for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+            const SCEV *Op = M->getOperand(i);
+            const SCEV *Div = getUDivExpr(Op, RHSC);
+            if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
+              Operands = SmallVector<const SCEV *, 4>(M->op_begin(),
+                                                      M->op_end());
+              Operands[i] = Div;
+              return getMulExpr(Operands);
+            }
+          }
+      }
+      // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
+      if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) {
+        SmallVector<const SCEV *, 4> Operands;
+        for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
+          Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
+        if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
+          Operands.clear();
+          for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
+            const SCEV *Op = getUDivExpr(A->getOperand(i), RHS);
+            if (isa<SCEVUDivExpr>(Op) ||
+                getMulExpr(Op, RHS) != A->getOperand(i))
+              break;
+            Operands.push_back(Op);
+          }
+          if (Operands.size() == A->getNumOperands())
+            return getAddExpr(Operands);
+        }
+      }
+
+      // Fold if both operands are constant.
+      if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+        Constant *LHSCV = LHSC->getValue();
+        Constant *RHSCV = RHSC->getValue();
+        return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
+                                                                   RHSCV)));
+      }
+    }
+  }
+
+  FoldingSetNodeID ID;
+  ID.AddInteger(scUDivExpr);
+  ID.AddPointer(LHS);
+  ID.AddPointer(RHS);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator),
+                                             LHS, RHS);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+
+/// getAddRecExpr - Get an add recurrence expression for the specified loop.
+/// Simplify the expression as much as possible.
+const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start,
+                                           const SCEV *Step, const Loop *L,
+                                           bool HasNUW, bool HasNSW) {
+  SmallVector<const SCEV *, 4> Operands;
+  Operands.push_back(Start);
+  if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
+    if (StepChrec->getLoop() == L) {
+      Operands.append(StepChrec->op_begin(), StepChrec->op_end());
+      return getAddRecExpr(Operands, L);
+    }
+
+  Operands.push_back(Step);
+  return getAddRecExpr(Operands, L, HasNUW, HasNSW);
+}
+
+/// getAddRecExpr - Get an add recurrence expression for the specified loop.
+/// Simplify the expression as much as possible.
+const SCEV *
+ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
+                               const Loop *L,
+                               bool HasNUW, bool HasNSW) {
+  if (Operands.size() == 1) return Operands[0];
+#ifndef NDEBUG
+  const Type *ETy = getEffectiveSCEVType(Operands[0]->getType());
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i)
+    assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy &&
+           "SCEVAddRecExpr operand types don't match!");
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+    assert(isLoopInvariant(Operands[i], L) &&
+           "SCEVAddRecExpr operand is not loop-invariant!");
+#endif
+
+  if (Operands.back()->isZero()) {
+    Operands.pop_back();
+    return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0}  -->  X
+  }
+
+  // It's tempting to want to call getMaxBackedgeTakenCount count here and
+  // use that information to infer NUW and NSW flags. However, computing a
+  // BE count requires calling getAddRecExpr, so we may not yet have a
+  // meaningful BE count at this point (and if we don't, we'd be stuck
+  // with a SCEVCouldNotCompute as the cached BE count).
+
+  // If HasNSW is true and all the operands are non-negative, infer HasNUW.
+  if (!HasNUW && HasNSW) {
+    bool All = true;
+    for (SmallVectorImpl<const SCEV *>::const_iterator I = Operands.begin(),
+         E = Operands.end(); I != E; ++I)
+      if (!isKnownNonNegative(*I)) {
+        All = false;
+        break;
+      }
+    if (All) HasNUW = true;
+  }
+
+  // Canonicalize nested AddRecs in by nesting them in order of loop depth.
+  if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
+    const Loop *NestedLoop = NestedAR->getLoop();
+    if (L->contains(NestedLoop) ?
+        (L->getLoopDepth() < NestedLoop->getLoopDepth()) :
+        (!NestedLoop->contains(L) &&
+         DT->dominates(L->getHeader(), NestedLoop->getHeader()))) {
+      SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),
+                                                  NestedAR->op_end());
+      Operands[0] = NestedAR->getStart();
+      // AddRecs require their operands be loop-invariant with respect to their
+      // loops. Don't perform this transformation if it would break this
+      // requirement.
+      bool AllInvariant = true;
+      for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+        if (!isLoopInvariant(Operands[i], L)) {
+          AllInvariant = false;
+          break;
+        }
+      if (AllInvariant) {
+        NestedOperands[0] = getAddRecExpr(Operands, L);
+        AllInvariant = true;
+        for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
+          if (!isLoopInvariant(NestedOperands[i], NestedLoop)) {
+            AllInvariant = false;
+            break;
+          }
+        if (AllInvariant)
+          // Ok, both add recurrences are valid after the transformation.
+          return getAddRecExpr(NestedOperands, NestedLoop, HasNUW, HasNSW);
+      }
+      // Reset Operands to its original state.
+      Operands[0] = NestedAR;
+    }
+  }
+
+  // Okay, it looks like we really DO need an addrec expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scAddRecExpr);
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+    ID.AddPointer(Operands[i]);
+  ID.AddPointer(L);
+  void *IP = 0;
+  SCEVAddRecExpr *S =
+    static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
+  if (!S) {
+    const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size());
+    std::uninitialized_copy(Operands.begin(), Operands.end(), O);
+    S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator),
+                                           O, Operands.size(), L);
+    UniqueSCEVs.InsertNode(S, IP);
+  }
+  if (HasNUW) S->setHasNoUnsignedWrap(true);
+  if (HasNSW) S->setHasNoSignedWrap(true);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
+  SmallVector<const SCEV *, 2> Ops;
+  Ops.push_back(LHS);
+  Ops.push_back(RHS);
+  return getSMaxExpr(Ops);
+}
+
+const SCEV *
+ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+  assert(!Ops.empty() && "Cannot get empty smax!");
+  if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+  const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+           "SCEVSMaxExpr operand types don't match!");
+#endif
+
+  // Sort by complexity, this groups all similar expression types together.
+  GroupByComplexity(Ops, LI);
+
+  // If there are any constants, fold them together.
+  unsigned Idx = 0;
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+    ++Idx;
+    assert(Idx < Ops.size());
+    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+      // We found two constants, fold them together!
+      ConstantInt *Fold = ConstantInt::get(getContext(),
+                              APIntOps::smax(LHSC->getValue()->getValue(),
+                                             RHSC->getValue()->getValue()));
+      Ops[0] = getConstant(Fold);
+      Ops.erase(Ops.begin()+1);  // Erase the folded element
+      if (Ops.size() == 1) return Ops[0];
+      LHSC = cast<SCEVConstant>(Ops[0]);
+    }
+
+    // If we are left with a constant minimum-int, strip it off.
+    if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
+      Ops.erase(Ops.begin());
+      --Idx;
+    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
+      // If we have an smax with a constant maximum-int, it will always be
+      // maximum-int.
+      return Ops[0];
+    }
+
+    if (Ops.size() == 1) return Ops[0];
+  }
+
+  // Find the first SMax
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
+    ++Idx;
+
+  // Check to see if one of the operands is an SMax. If so, expand its operands
+  // onto our operand list, and recurse to simplify.
+  if (Idx < Ops.size()) {
+    bool DeletedSMax = false;
+    while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
+      Ops.erase(Ops.begin()+Idx);
+      Ops.append(SMax->op_begin(), SMax->op_end());
+      DeletedSMax = true;
+    }
+
+    if (DeletedSMax)
+      return getSMaxExpr(Ops);
+  }
+
+  // Okay, check to see if the same value occurs in the operand list twice.  If
+  // so, delete one.  Since we sorted the list, these values are required to
+  // be adjacent.
+  for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+    //  X smax Y smax Y  -->  X smax Y
+    //  X smax Y         -->  X, if X is always greater than Y
+    if (Ops[i] == Ops[i+1] ||
+        isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
+      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+      --i; --e;
+    } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
+      Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
+      --i; --e;
+    }
+
+  if (Ops.size() == 1) return Ops[0];
+
+  assert(!Ops.empty() && "Reduced smax down to nothing!");
+
+  // Okay, it looks like we really DO need an smax expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scSMaxExpr);
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+  SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
+                                             O, Ops.size());
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
+  SmallVector<const SCEV *, 2> Ops;
+  Ops.push_back(LHS);
+  Ops.push_back(RHS);
+  return getUMaxExpr(Ops);
+}
+
+const SCEV *
+ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+  assert(!Ops.empty() && "Cannot get empty umax!");
+  if (Ops.size() == 1) return Ops[0];
+#ifndef NDEBUG
+  const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+           "SCEVUMaxExpr operand types don't match!");
+#endif
+
+  // Sort by complexity, this groups all similar expression types together.
+  GroupByComplexity(Ops, LI);
+
+  // If there are any constants, fold them together.
+  unsigned Idx = 0;
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+    ++Idx;
+    assert(Idx < Ops.size());
+    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+      // We found two constants, fold them together!
+      ConstantInt *Fold = ConstantInt::get(getContext(),
+                              APIntOps::umax(LHSC->getValue()->getValue(),
+                                             RHSC->getValue()->getValue()));
+      Ops[0] = getConstant(Fold);
+      Ops.erase(Ops.begin()+1);  // Erase the folded element
+      if (Ops.size() == 1) return Ops[0];
+      LHSC = cast<SCEVConstant>(Ops[0]);
+    }
+
+    // If we are left with a constant minimum-int, strip it off.
+    if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
+      Ops.erase(Ops.begin());
+      --Idx;
+    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
+      // If we have an umax with a constant maximum-int, it will always be
+      // maximum-int.
+      return Ops[0];
+    }
+
+    if (Ops.size() == 1) return Ops[0];
+  }
+
+  // Find the first UMax
+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
+    ++Idx;
+
+  // Check to see if one of the operands is a UMax. If so, expand its operands
+  // onto our operand list, and recurse to simplify.
+  if (Idx < Ops.size()) {
+    bool DeletedUMax = false;
+    while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
+      Ops.erase(Ops.begin()+Idx);
+      Ops.append(UMax->op_begin(), UMax->op_end());
+      DeletedUMax = true;
+    }
+
+    if (DeletedUMax)
+      return getUMaxExpr(Ops);
+  }
+
+  // Okay, check to see if the same value occurs in the operand list twice.  If
+  // so, delete one.  Since we sorted the list, these values are required to
+  // be adjacent.
+  for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+    //  X umax Y umax Y  -->  X umax Y
+    //  X umax Y         -->  X, if X is always greater than Y
+    if (Ops[i] == Ops[i+1] ||
+        isKnownPredicate(ICmpInst::ICMP_UGE, Ops[i], Ops[i+1])) {
+      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+      --i; --e;
+    } else if (isKnownPredicate(ICmpInst::ICMP_ULE, Ops[i], Ops[i+1])) {
+      Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
+      --i; --e;
+    }
+
+  if (Ops.size() == 1) return Ops[0];
+
+  assert(!Ops.empty() && "Reduced umax down to nothing!");
+
+  // Okay, it looks like we really DO need a umax expr.  Check to see if we
+  // already have one, otherwise create a new one.
+  FoldingSetNodeID ID;
+  ID.AddInteger(scUMaxExpr);
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    ID.AddPointer(Ops[i]);
+  void *IP = 0;
+  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+  SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
+                                             O, Ops.size());
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
+  // ~smax(~x, ~y) == smin(x, y).
+  return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
+const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
+  // ~umax(~x, ~y) == umin(x, y)
+  return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
+const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) {
+  // If we have TargetData, we can bypass creating a target-independent
+  // constant expression and then folding it back into a ConstantInt.
+  // This is just a compile-time optimization.
+  if (TD)
+    return getConstant(TD->getIntPtrType(getContext()),
+                       TD->getTypeAllocSize(AllocTy));
+
+  Constant *C = ConstantExpr::getSizeOf(AllocTy);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+      C = Folded;
+  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+  return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) {
+  Constant *C = ConstantExpr::getAlignOf(AllocTy);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+      C = Folded;
+  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
+  return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy,
+                                             unsigned FieldNo) {
+  // If we have TargetData, we can bypass creating a target-independent
+  // constant expression and then folding it back into a ConstantInt.
+  // This is just a compile-time optimization.
+  if (TD)
+    return getConstant(TD->getIntPtrType(getContext()),
+                       TD->getStructLayout(STy)->getElementOffset(FieldNo));
+
+  Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+      C = Folded;
+  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
+  return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy,
+                                             Constant *FieldNo) {
+  Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+      C = Folded;
+  const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
+  return getTruncateOrZeroExtend(getSCEV(C), Ty);
+}
+
+const SCEV *ScalarEvolution::getUnknown(Value *V) {
+  // Don't attempt to do anything other than create a SCEVUnknown object
+  // here.  createSCEV only calls getUnknown after checking for all other
+  // interesting possibilities, and any other code that calls getUnknown
+  // is doing so in order to hide a value from SCEV canonicalization.
+
+  FoldingSetNodeID ID;
+  ID.AddInteger(scUnknown);
+  ID.AddPointer(V);
+  void *IP = 0;
+  if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
+    assert(cast<SCEVUnknown>(S)->getValue() == V &&
+           "Stale SCEVUnknown in uniquing map!");
+    return S;
+  }
+  SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
+                                            FirstUnknown);
+  FirstUnknown = cast<SCEVUnknown>(S);
+  UniqueSCEVs.InsertNode(S, IP);
+  return S;
+}
+
+//===----------------------------------------------------------------------===//
+//            Basic SCEV Analysis and PHI Idiom Recognition Code
+//
+
+/// isSCEVable - Test if values of the given type are analyzable within
+/// the SCEV framework. This primarily includes integer types, and it
+/// can optionally include pointer types if the ScalarEvolution class
+/// has access to target-specific information.
+bool ScalarEvolution::isSCEVable(const Type *Ty) const {
+  // Integers and pointers are always SCEVable.
+  return Ty->isIntegerTy() || Ty->isPointerTy();
+}
+
+/// getTypeSizeInBits - Return the size in bits of the specified type,
+/// for which isSCEVable must return true.
+uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
+  assert(isSCEVable(Ty) && "Type is not SCEVable!");
+
+  // If we have a TargetData, use it!
+  if (TD)
+    return TD->getTypeSizeInBits(Ty);
+
+  // Integer types have fixed sizes.
+  if (Ty->isIntegerTy())
+    return Ty->getPrimitiveSizeInBits();
+
+  // The only other support type is pointer. Without TargetData, conservatively
+  // assume pointers are 64-bit.
+  assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
+  return 64;
+}
+
+/// getEffectiveSCEVType - Return a type with the same bitwidth as
+/// the given type and which represents how SCEV will treat the given
+/// type, for which isSCEVable must return true. For pointer types,
+/// this is the pointer-sized integer type.
+const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const {
+  assert(isSCEVable(Ty) && "Type is not SCEVable!");
+
+  if (Ty->isIntegerTy())
+    return Ty;
+
+  // The only other support type is pointer.
+  assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
+  if (TD) return TD->getIntPtrType(getContext());
+
+  // Without TargetData, conservatively assume pointers are 64-bit.
+  return Type::getInt64Ty(getContext());
+}
+
+const SCEV *ScalarEvolution::getCouldNotCompute() {
+  return &CouldNotCompute;
+}
+
+/// getSCEV - Return an existing SCEV if it exists, otherwise analyze the
+/// expression and create a new one.
+const SCEV *ScalarEvolution::getSCEV(Value *V) {
+  assert(isSCEVable(V->getType()) && "Value is not SCEVable!");
+
+  ValueExprMapType::const_iterator I = ValueExprMap.find(V);
+  if (I != ValueExprMap.end()) return I->second;
+  const SCEV *S = createSCEV(V);
+
+  // The process of creating a SCEV for V may have caused other SCEVs
+  // to have been created, so it's necessary to insert the new entry
+  // from scratch, rather than trying to remember the insert position
+  // above.
+  ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S));
+  return S;
+}
+
+/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
+///
+const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
+  if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
+    return getConstant(
+               cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
+
+  const Type *Ty = V->getType();
+  Ty = getEffectiveSCEVType(Ty);
+  return getMulExpr(V,
+                  getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))));
+}
+
+/// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
+const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
+  if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
+    return getConstant(
+                cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
+
+  const Type *Ty = V->getType();
+  Ty = getEffectiveSCEVType(Ty);
+  const SCEV *AllOnes =
+                   getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)));
+  return getMinusSCEV(AllOnes, V);
+}
+
+/// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1,
+/// and thus the HasNUW and HasNSW bits apply to the resultant add, not
+/// whether the sub would have overflowed.
+const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
+                                          bool HasNUW, bool HasNSW) {
+  // Fast path: X - X --> 0.
+  if (LHS == RHS)
+    return getConstant(LHS->getType(), 0);
+
+  // X - Y --> X + -Y
+  return getAddExpr(LHS, getNegativeSCEV(RHS), HasNUW, HasNSW);
+}
+
+/// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type.  If the type must be extended, it is zero
+/// extended.
+const SCEV *
+ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, const Type *Ty) {
+  const Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot truncate or zero extend with non-integer arguments!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
+    return getTruncateExpr(V, Ty);
+  return getZeroExtendExpr(V, Ty);
+}
+
+/// getTruncateOrSignExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type.  If the type must be extended, it is sign
+/// extended.
+const SCEV *
+ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
+                                         const Type *Ty) {
+  const Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot truncate or zero extend with non-integer arguments!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty))
+    return getTruncateExpr(V, Ty);
+  return getSignExtendExpr(V, Ty);
+}
+
+/// getNoopOrZeroExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type.  If the type must be extended, it is zero
+/// extended.  The conversion must not be narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
+  const Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot noop or zero extend with non-integer arguments!");
+  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+         "getNoopOrZeroExtend cannot truncate!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  return getZeroExtendExpr(V, Ty);
+}
+
+/// getNoopOrSignExtend - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type.  If the type must be extended, it is sign
+/// extended.  The conversion must not be narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
+  const Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot noop or sign extend with non-integer arguments!");
+  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+         "getNoopOrSignExtend cannot truncate!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  return getSignExtendExpr(V, Ty);
+}
+
+/// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of
+/// the input value to the specified type. If the type must be extended,
+/// it is extended with unspecified bits. The conversion must not be
+/// narrowing.
+const SCEV *
+ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
+  const Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot noop or any extend with non-integer arguments!");
+  assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
+         "getNoopOrAnyExtend cannot truncate!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  return getAnyExtendExpr(V, Ty);
+}
+
+/// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the
+/// input value to the specified type.  The conversion must not be widening.
+const SCEV *
+ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) {
+  const Type *SrcTy = V->getType();
+  assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+         (Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Cannot truncate or noop with non-integer arguments!");
+  assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
+         "getTruncateOrNoop cannot extend!");
+  if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
+    return V;  // No conversion
+  return getTruncateExpr(V, Ty);
+}
+
+/// getUMaxFromMismatchedTypes - Promote the operands to the wider of
+/// the types using zero-extension, and then perform a umax operation
+/// with them.
+const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
+                                                        const SCEV *RHS) {
+  const SCEV *PromotedLHS = LHS;
+  const SCEV *PromotedRHS = RHS;
+
+  if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
+    PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
+  else
+    PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
+
+  return getUMaxExpr(PromotedLHS, PromotedRHS);
+}
+
+/// getUMinFromMismatchedTypes - Promote the operands to the wider of
+/// the types using zero-extension, and then perform a umin operation
+/// with them.
+const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
+                                                        const SCEV *RHS) {
+  const SCEV *PromotedLHS = LHS;
+  const SCEV *PromotedRHS = RHS;
+
+  if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
+    PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
+  else
+    PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
+
+  return getUMinExpr(PromotedLHS, PromotedRHS);
+}
+
+/// PushDefUseChildren - Push users of the given Instruction
+/// onto the given Worklist.
+static void
+PushDefUseChildren(Instruction *I,
+                   SmallVectorImpl<Instruction *> &Worklist) {
+  // Push the def-use children onto the Worklist stack.
+  for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+       UI != UE; ++UI)
+    Worklist.push_back(cast<Instruction>(*UI));
+}
+
+/// ForgetSymbolicValue - This looks up computed SCEV values for all
+/// instructions that depend on the given instruction and removes them from
+/// the ValueExprMapType map if they reference SymName. This is used during PHI
+/// resolution.
+void
+ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
+  SmallVector<Instruction *, 16> Worklist;
+  PushDefUseChildren(PN, Worklist);
+
+  SmallPtrSet<Instruction *, 8> Visited;
+  Visited.insert(PN);
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.pop_back_val();
+    if (!Visited.insert(I)) continue;
+
+    ValueExprMapType::iterator It =
+      ValueExprMap.find(static_cast<Value *>(I));
+    if (It != ValueExprMap.end()) {
+      const SCEV *Old = It->second;
+
+      // Short-circuit the def-use traversal if the symbolic name
+      // ceases to appear in expressions.
+      if (Old != SymName && !hasOperand(Old, SymName))
+        continue;
+
+      // SCEVUnknown for a PHI either means that it has an unrecognized
+      // structure, it's a PHI that's in the progress of being computed
+      // by createNodeForPHI, or it's a single-value PHI. In the first case,
+      // additional loop trip count information isn't going to change anything.
+      // In the second case, createNodeForPHI will perform the necessary
+      // updates on its own when it gets to that point. In the third, we do
+      // want to forget the SCEVUnknown.
+      if (!isa<PHINode>(I) ||
+          !isa<SCEVUnknown>(Old) ||
+          (I != PN && Old == SymName)) {
+        forgetMemoizedResults(Old);
+        ValueExprMap.erase(It);
+      }
+    }
+
+    PushDefUseChildren(I, Worklist);
+  }
+}
+
+/// createNodeForPHI - PHI nodes have two cases.  Either the PHI node exists in
+/// a loop header, making it a potential recurrence, or it doesn't.
+///
+const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
+  if (const Loop *L = LI->getLoopFor(PN->getParent()))
+    if (L->getHeader() == PN->getParent()) {
+      // The loop may have multiple entrances or multiple exits; we can analyze
+      // this phi as an addrec if it has a unique entry value and a unique
+      // backedge value.
+      Value *BEValueV = 0, *StartValueV = 0;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        Value *V = PN->getIncomingValue(i);
+        if (L->contains(PN->getIncomingBlock(i))) {
+          if (!BEValueV) {
+            BEValueV = V;
+          } else if (BEValueV != V) {
+            BEValueV = 0;
+            break;
+          }
+        } else if (!StartValueV) {
+          StartValueV = V;
+        } else if (StartValueV != V) {
+          StartValueV = 0;
+          break;
+        }
+      }
+      if (BEValueV && StartValueV) {
+        // While we are analyzing this PHI node, handle its value symbolically.
+        const SCEV *SymbolicName = getUnknown(PN);
+        assert(ValueExprMap.find(PN) == ValueExprMap.end() &&
+               "PHI node already processed?");
+        ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName));
+
+        // Using this symbolic name for the PHI, analyze the value coming around
+        // the back-edge.
+        const SCEV *BEValue = getSCEV(BEValueV);
+
+        // NOTE: If BEValue is loop invariant, we know that the PHI node just
+        // has a special value for the first iteration of the loop.
+
+        // If the value coming around the backedge is an add with the symbolic
+        // value we just inserted, then we found a simple induction variable!
+        if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) {
+          // If there is a single occurrence of the symbolic value, replace it
+          // with a recurrence.
+          unsigned FoundIndex = Add->getNumOperands();
+          for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+            if (Add->getOperand(i) == SymbolicName)
+              if (FoundIndex == e) {
+                FoundIndex = i;
+                break;
+              }
+
+          if (FoundIndex != Add->getNumOperands()) {
+            // Create an add with everything but the specified operand.
+            SmallVector<const SCEV *, 8> Ops;
+            for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
+              if (i != FoundIndex)
+                Ops.push_back(Add->getOperand(i));
+            const SCEV *Accum = getAddExpr(Ops);
+
+            // This is not a valid addrec if the step amount is varying each
+            // loop iteration, but is not itself an addrec in this loop.
+            if (isLoopInvariant(Accum, L) ||
+                (isa<SCEVAddRecExpr>(Accum) &&
+                 cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
+              bool HasNUW = false;
+              bool HasNSW = false;
+
+              // If the increment doesn't overflow, then neither the addrec nor
+              // the post-increment will overflow.
+              if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) {
+                if (OBO->hasNoUnsignedWrap())
+                  HasNUW = true;
+                if (OBO->hasNoSignedWrap())
+                  HasNSW = true;
+              } else if (const GEPOperator *GEP = 
+                            dyn_cast<GEPOperator>(BEValueV)) {
+                // If the increment is a GEP, then we know it won't perform a
+                // signed overflow, because the address space cannot be
+                // wrapped around.
+                //
+                // NOTE: This isn't strictly true, because you could have an
+                // object straddling the 2G address boundary in a 32-bit address
+                // space (for example).  We really want to model this as a "has
+                // no signed/unsigned wrap" where the base pointer is treated as
+                // unsigned and the increment is known to not have signed
+                // wrapping.
+                //
+                // This is a highly theoretical concern though, and this is good
+                // enough for all cases we know of at this point. :)
+                //                
+                HasNSW |= GEP->isInBounds();
+              }
+
+              const SCEV *StartVal = getSCEV(StartValueV);
+              const SCEV *PHISCEV =
+                getAddRecExpr(StartVal, Accum, L, HasNUW, HasNSW);
+
+              // Since the no-wrap flags are on the increment, they apply to the
+              // post-incremented value as well.
+              if (isLoopInvariant(Accum, L))
+                (void)getAddRecExpr(getAddExpr(StartVal, Accum),
+                                    Accum, L, HasNUW, HasNSW);
+
+              // Okay, for the entire analysis of this edge we assumed the PHI
+              // to be symbolic.  We now need to go back and purge all of the
+              // entries for the scalars that use the symbolic expression.
+              ForgetSymbolicName(PN, SymbolicName);
+              ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+              return PHISCEV;
+            }
+          }
+        } else if (const SCEVAddRecExpr *AddRec =
+                     dyn_cast<SCEVAddRecExpr>(BEValue)) {
+          // Otherwise, this could be a loop like this:
+          //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; }
+          // In this case, j = {1,+,1}  and BEValue is j.
+          // Because the other in-value of i (0) fits the evolution of BEValue
+          // i really is an addrec evolution.
+          if (AddRec->getLoop() == L && AddRec->isAffine()) {
+            const SCEV *StartVal = getSCEV(StartValueV);
+
+            // If StartVal = j.start - j.stride, we can use StartVal as the
+            // initial step of the addrec evolution.
+            if (StartVal == getMinusSCEV(AddRec->getOperand(0),
+                                         AddRec->getOperand(1))) {
+              const SCEV *PHISCEV =
+                 getAddRecExpr(StartVal, AddRec->getOperand(1), L);
+
+              // Okay, for the entire analysis of this edge we assumed the PHI
+              // to be symbolic.  We now need to go back and purge all of the
+              // entries for the scalars that use the symbolic expression.
+              ForgetSymbolicName(PN, SymbolicName);
+              ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+              return PHISCEV;
+            }
+          }
+        }
+      }
+    }
+
+  // If the PHI has a single incoming value, follow that value, unless the
+  // PHI's incoming blocks are in a different loop, in which case doing so
+  // risks breaking LCSSA form. Instcombine would normally zap these, but
+  // it doesn't have DominatorTree information, so it may miss cases.
+  if (Value *V = SimplifyInstruction(PN, TD, DT))
+    if (LI->replacementPreservesLCSSAForm(PN, V))
+      return getSCEV(V);
+
+  // If it's not a loop phi, we can't handle it yet.
+  return getUnknown(PN);
+}
+
+/// createNodeForGEP - Expand GEP instructions into add and multiply
+/// operations. This allows them to be analyzed by regular SCEV code.
+///
+const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
+
+  // Don't blindly transfer the inbounds flag from the GEP instruction to the
+  // Add expression, because the Instruction may be guarded by control flow
+  // and the no-overflow bits may not be valid for the expression in any
+  // context.
+  bool isInBounds = GEP->isInBounds();
+
+  const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
+  Value *Base = GEP->getOperand(0);
+  // Don't attempt to analyze GEPs over unsized objects.
+  if (!cast<PointerType>(Base->getType())->getElementType()->isSized())
+    return getUnknown(GEP);
+  const SCEV *TotalOffset = getConstant(IntPtrTy, 0);
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()),
+                                      E = GEP->op_end();
+       I != E; ++I) {
+    Value *Index = *I;
+    // Compute the (potentially symbolic) offset in bytes for this index.
+    if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
+      // For a struct, add the member offset.
+      unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
+      const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
+
+      // Add the field offset to the running total offset.
+      TotalOffset = getAddExpr(TotalOffset, FieldOffset);
+    } else {
+      // For an array, add the element offset, explicitly scaled.
+      const SCEV *ElementSize = getSizeOfExpr(*GTI);
+      const SCEV *IndexS = getSCEV(Index);
+      // Getelementptr indices are signed.
+      IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
+
+      // Multiply the index by the element size to compute the element offset.
+      const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, /*NUW*/ false,
+                                           /*NSW*/ isInBounds);
+
+      // Add the element offset to the running total offset.
+      TotalOffset = getAddExpr(TotalOffset, LocalOffset);
+    }
+  }
+
+  // Get the SCEV for the GEP base.
+  const SCEV *BaseS = getSCEV(Base);
+
+  // Add the total offset from all the GEP indices to the base.
+  return getAddExpr(BaseS, TotalOffset, /*NUW*/ false,
+                    /*NSW*/ isInBounds);
+}
+
+/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
+/// guaranteed to end in (at every loop iteration).  It is, at the same time,
+/// the minimum number of times S is divisible by 2.  For example, given {4,+,8}
+/// it returns 2.  If S is guaranteed to be 0, it returns the bitwidth of S.
+uint32_t
+ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+    return C->getValue()->getValue().countTrailingZeros();
+
+  if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
+    return std::min(GetMinTrailingZeros(T->getOperand()),
+                    (uint32_t)getTypeSizeInBits(T->getType()));
+
+  if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
+    uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+    return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+             getTypeSizeInBits(E->getType()) : OpRes;
+  }
+
+  if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
+    uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+    return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+             getTypeSizeInBits(E->getType()) : OpRes;
+  }
+
+  if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+    // The result is the min of all operands results.
+    uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
+    for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
+    return MinOpRes;
+  }
+
+  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+    // The result is the sum of all operands results.
+    uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
+    uint32_t BitWidth = getTypeSizeInBits(M->getType());
+    for (unsigned i = 1, e = M->getNumOperands();
+         SumOpRes != BitWidth && i != e; ++i)
+      SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
+                          BitWidth);
+    return SumOpRes;
+  }
+
+  if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
+    // The result is the min of all operands results.
+    uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
+    for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
+    return MinOpRes;
+  }
+
+  if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
+    // The result is the min of all operands results.
+    uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
+    for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
+    return MinOpRes;
+  }
+
+  if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
+    // The result is the min of all operands results.
+    uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
+    for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
+      MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
+    return MinOpRes;
+  }
+
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // For a SCEVUnknown, ask ValueTracking.
+    unsigned BitWidth = getTypeSizeInBits(U->getType());
+    APInt Mask = APInt::getAllOnesValue(BitWidth);
+    APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+    ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones);
+    return Zeros.countTrailingOnes();
+  }
+
+  // SCEVUDivExpr
+  return 0;
+}
+
+/// getUnsignedRange - Determine the unsigned range for a particular SCEV.
+///
+ConstantRange
+ScalarEvolution::getUnsignedRange(const SCEV *S) {
+  // See if we've computed this range already.
+  DenseMap<const SCEV *, ConstantRange>::iterator I = UnsignedRanges.find(S);
+  if (I != UnsignedRanges.end())
+    return I->second;
+
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+    return setUnsignedRange(C, ConstantRange(C->getValue()->getValue()));
+
+  unsigned BitWidth = getTypeSizeInBits(S->getType());
+  ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
+
+  // If the value has known zeros, the maximum unsigned value will have those
+  // known zeros as well.
+  uint32_t TZ = GetMinTrailingZeros(S);
+  if (TZ != 0)
+    ConservativeResult =
+      ConstantRange(APInt::getMinValue(BitWidth),
+                    APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
+
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    ConstantRange X = getUnsignedRange(Add->getOperand(0));
+    for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
+      X = X.add(getUnsignedRange(Add->getOperand(i)));
+    return setUnsignedRange(Add, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+    ConstantRange X = getUnsignedRange(Mul->getOperand(0));
+    for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
+      X = X.multiply(getUnsignedRange(Mul->getOperand(i)));
+    return setUnsignedRange(Mul, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
+    ConstantRange X = getUnsignedRange(SMax->getOperand(0));
+    for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
+      X = X.smax(getUnsignedRange(SMax->getOperand(i)));
+    return setUnsignedRange(SMax, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
+    ConstantRange X = getUnsignedRange(UMax->getOperand(0));
+    for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
+      X = X.umax(getUnsignedRange(UMax->getOperand(i)));
+    return setUnsignedRange(UMax, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+    ConstantRange X = getUnsignedRange(UDiv->getLHS());
+    ConstantRange Y = getUnsignedRange(UDiv->getRHS());
+    return setUnsignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
+  }
+
+  if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
+    ConstantRange X = getUnsignedRange(ZExt->getOperand());
+    return setUnsignedRange(ZExt,
+      ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+  }
+
+  if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
+    ConstantRange X = getUnsignedRange(SExt->getOperand());
+    return setUnsignedRange(SExt,
+      ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+  }
+
+  if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
+    ConstantRange X = getUnsignedRange(Trunc->getOperand());
+    return setUnsignedRange(Trunc,
+      ConservativeResult.intersectWith(X.truncate(BitWidth)));
+  }
+
+  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+    // If there's no unsigned wrap, the value will never be less than its
+    // initial value.
+    if (AddRec->hasNoUnsignedWrap())
+      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
+        if (!C->getValue()->isZero())
+          ConservativeResult =
+            ConservativeResult.intersectWith(
+              ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
+
+    // TODO: non-affine addrec
+    if (AddRec->isAffine()) {
+      const Type *Ty = AddRec->getType();
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+      if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
+          getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
+        MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+
+        const SCEV *Start = AddRec->getStart();
+        const SCEV *Step = AddRec->getStepRecurrence(*this);
+
+        ConstantRange StartRange = getUnsignedRange(Start);
+        ConstantRange StepRange = getSignedRange(Step);
+        ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+        ConstantRange EndRange =
+          StartRange.add(MaxBECountRange.multiply(StepRange));
+
+        // Check for overflow. This must be done with ConstantRange arithmetic
+        // because we could be called from within the ScalarEvolution overflow
+        // checking code.
+        ConstantRange ExtStartRange = StartRange.zextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtMaxBECountRange =
+          MaxBECountRange.zextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtEndRange = EndRange.zextOrTrunc(BitWidth*2+1);
+        if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
+            ExtEndRange)
+          return setUnsignedRange(AddRec, ConservativeResult);
+
+        APInt Min = APIntOps::umin(StartRange.getUnsignedMin(),
+                                   EndRange.getUnsignedMin());
+        APInt Max = APIntOps::umax(StartRange.getUnsignedMax(),
+                                   EndRange.getUnsignedMax());
+        if (Min.isMinValue() && Max.isMaxValue())
+          return setUnsignedRange(AddRec, ConservativeResult);
+        return setUnsignedRange(AddRec,
+          ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
+      }
+    }
+
+    return setUnsignedRange(AddRec, ConservativeResult);
+  }
+
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // For a SCEVUnknown, ask ValueTracking.
+    APInt Mask = APInt::getAllOnesValue(BitWidth);
+    APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+    ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
+    if (Ones == ~Zeros + 1)
+      return setUnsignedRange(U, ConservativeResult);
+    return setUnsignedRange(U,
+      ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1)));
+  }
+
+  return setUnsignedRange(S, ConservativeResult);
+}
+
+/// getSignedRange - Determine the signed range for a particular SCEV.
+///
+ConstantRange
+ScalarEvolution::getSignedRange(const SCEV *S) {
+  // See if we've computed this range already.
+  DenseMap<const SCEV *, ConstantRange>::iterator I = SignedRanges.find(S);
+  if (I != SignedRanges.end())
+    return I->second;
+
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+    return setSignedRange(C, ConstantRange(C->getValue()->getValue()));
+
+  unsigned BitWidth = getTypeSizeInBits(S->getType());
+  ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true);
+
+  // If the value has known zeros, the maximum signed value will have those
+  // known zeros as well.
+  uint32_t TZ = GetMinTrailingZeros(S);
+  if (TZ != 0)
+    ConservativeResult =
+      ConstantRange(APInt::getSignedMinValue(BitWidth),
+                    APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
+
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    ConstantRange X = getSignedRange(Add->getOperand(0));
+    for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i)
+      X = X.add(getSignedRange(Add->getOperand(i)));
+    return setSignedRange(Add, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+    ConstantRange X = getSignedRange(Mul->getOperand(0));
+    for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i)
+      X = X.multiply(getSignedRange(Mul->getOperand(i)));
+    return setSignedRange(Mul, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) {
+    ConstantRange X = getSignedRange(SMax->getOperand(0));
+    for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i)
+      X = X.smax(getSignedRange(SMax->getOperand(i)));
+    return setSignedRange(SMax, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) {
+    ConstantRange X = getSignedRange(UMax->getOperand(0));
+    for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i)
+      X = X.umax(getSignedRange(UMax->getOperand(i)));
+    return setSignedRange(UMax, ConservativeResult.intersectWith(X));
+  }
+
+  if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
+    ConstantRange X = getSignedRange(UDiv->getLHS());
+    ConstantRange Y = getSignedRange(UDiv->getRHS());
+    return setSignedRange(UDiv, ConservativeResult.intersectWith(X.udiv(Y)));
+  }
+
+  if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) {
+    ConstantRange X = getSignedRange(ZExt->getOperand());
+    return setSignedRange(ZExt,
+      ConservativeResult.intersectWith(X.zeroExtend(BitWidth)));
+  }
+
+  if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) {
+    ConstantRange X = getSignedRange(SExt->getOperand());
+    return setSignedRange(SExt,
+      ConservativeResult.intersectWith(X.signExtend(BitWidth)));
+  }
+
+  if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) {
+    ConstantRange X = getSignedRange(Trunc->getOperand());
+    return setSignedRange(Trunc,
+      ConservativeResult.intersectWith(X.truncate(BitWidth)));
+  }
+
+  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+    // If there's no signed wrap, and all the operands have the same sign or
+    // zero, the value won't ever change sign.
+    if (AddRec->hasNoSignedWrap()) {
+      bool AllNonNeg = true;
+      bool AllNonPos = true;
+      for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
+        if (!isKnownNonNegative(AddRec->getOperand(i))) AllNonNeg = false;
+        if (!isKnownNonPositive(AddRec->getOperand(i))) AllNonPos = false;
+      }
+      if (AllNonNeg)
+        ConservativeResult = ConservativeResult.intersectWith(
+          ConstantRange(APInt(BitWidth, 0),
+                        APInt::getSignedMinValue(BitWidth)));
+      else if (AllNonPos)
+        ConservativeResult = ConservativeResult.intersectWith(
+          ConstantRange(APInt::getSignedMinValue(BitWidth),
+                        APInt(BitWidth, 1)));
+    }
+
+    // TODO: non-affine addrec
+    if (AddRec->isAffine()) {
+      const Type *Ty = AddRec->getType();
+      const SCEV *MaxBECount = getMaxBackedgeTakenCount(AddRec->getLoop());
+      if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
+          getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
+        MaxBECount = getNoopOrZeroExtend(MaxBECount, Ty);
+
+        const SCEV *Start = AddRec->getStart();
+        const SCEV *Step = AddRec->getStepRecurrence(*this);
+
+        ConstantRange StartRange = getSignedRange(Start);
+        ConstantRange StepRange = getSignedRange(Step);
+        ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
+        ConstantRange EndRange =
+          StartRange.add(MaxBECountRange.multiply(StepRange));
+
+        // Check for overflow. This must be done with ConstantRange arithmetic
+        // because we could be called from within the ScalarEvolution overflow
+        // checking code.
+        ConstantRange ExtStartRange = StartRange.sextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtStepRange = StepRange.sextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtMaxBECountRange =
+          MaxBECountRange.zextOrTrunc(BitWidth*2+1);
+        ConstantRange ExtEndRange = EndRange.sextOrTrunc(BitWidth*2+1);
+        if (ExtStartRange.add(ExtMaxBECountRange.multiply(ExtStepRange)) !=
+            ExtEndRange)
+          return setSignedRange(AddRec, ConservativeResult);
+
+        APInt Min = APIntOps::smin(StartRange.getSignedMin(),
+                                   EndRange.getSignedMin());
+        APInt Max = APIntOps::smax(StartRange.getSignedMax(),
+                                   EndRange.getSignedMax());
+        if (Min.isMinSignedValue() && Max.isMaxSignedValue())
+          return setSignedRange(AddRec, ConservativeResult);
+        return setSignedRange(AddRec,
+          ConservativeResult.intersectWith(ConstantRange(Min, Max+1)));
+      }
+    }
+
+    return setSignedRange(AddRec, ConservativeResult);
+  }
+
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // For a SCEVUnknown, ask ValueTracking.
+    if (!U->getValue()->getType()->isIntegerTy() && !TD)
+      return setSignedRange(U, ConservativeResult);
+    unsigned NS = ComputeNumSignBits(U->getValue(), TD);
+    if (NS == 1)
+      return setSignedRange(U, ConservativeResult);
+    return setSignedRange(U, ConservativeResult.intersectWith(
+      ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1),
+                    APInt::getSignedMaxValue(BitWidth).ashr(NS - 1)+1)));
+  }
+
+  return setSignedRange(S, ConservativeResult);
+}
+
+/// createSCEV - We know that there is no SCEV for the specified value.
+/// Analyze the expression.
+///
+const SCEV *ScalarEvolution::createSCEV(Value *V) {
+  if (!isSCEVable(V->getType()))
+    return getUnknown(V);
+
+  unsigned Opcode = Instruction::UserOp1;
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    Opcode = I->getOpcode();
+
+    // Don't attempt to analyze instructions in blocks that aren't
+    // reachable. Such instructions don't matter, and they aren't required
+    // to obey basic rules for definitions dominating uses which this
+    // analysis depends on.
+    if (!DT->isReachableFromEntry(I->getParent()))
+      return getUnknown(V);
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+    Opcode = CE->getOpcode();
+  else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+    return getConstant(CI);
+  else if (isa<ConstantPointerNull>(V))
+    return getConstant(V->getType(), 0);
+  else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+    return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());
+  else
+    return getUnknown(V);
+
+  Operator *U = cast<Operator>(V);
+  switch (Opcode) {
+  case Instruction::Add: {
+    // The simple thing to do would be to just call getSCEV on both operands
+    // and call getAddExpr with the result. However if we're looking at a
+    // bunch of things all added together, this can be quite inefficient,
+    // because it leads to N-1 getAddExpr calls for N ultimate operands.
+    // Instead, gather up all the operands and make a single getAddExpr call.
+    // LLVM IR canonical form means we need only traverse the left operands.
+    SmallVector<const SCEV *, 4> AddOps;
+    AddOps.push_back(getSCEV(U->getOperand(1)));
+    for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) {
+      unsigned Opcode = Op->getValueID() - Value::InstructionVal;
+      if (Opcode != Instruction::Add && Opcode != Instruction::Sub)
+        break;
+      U = cast<Operator>(Op);
+      const SCEV *Op1 = getSCEV(U->getOperand(1));
+      if (Opcode == Instruction::Sub)
+        AddOps.push_back(getNegativeSCEV(Op1));
+      else
+        AddOps.push_back(Op1);
+    }
+    AddOps.push_back(getSCEV(U->getOperand(0)));
+    return getAddExpr(AddOps);
+  }
+  case Instruction::Mul: {
+    // See the Add code above.
+    SmallVector<const SCEV *, 4> MulOps;
+    MulOps.push_back(getSCEV(U->getOperand(1)));
+    for (Value *Op = U->getOperand(0);
+         Op->getValueID() == Instruction::Mul + Value::InstructionVal; 
+         Op = U->getOperand(0)) {
+      U = cast<Operator>(Op);
+      MulOps.push_back(getSCEV(U->getOperand(1)));
+    }
+    MulOps.push_back(getSCEV(U->getOperand(0)));
+    return getMulExpr(MulOps);
+  }
+  case Instruction::UDiv:
+    return getUDivExpr(getSCEV(U->getOperand(0)),
+                       getSCEV(U->getOperand(1)));
+  case Instruction::Sub:
+    return getMinusSCEV(getSCEV(U->getOperand(0)),
+                        getSCEV(U->getOperand(1)));
+  case Instruction::And:
+    // For an expression like x&255 that merely masks off the high bits,
+    // use zext(trunc(x)) as the SCEV expression.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      if (CI->isNullValue())
+        return getSCEV(U->getOperand(1));
+      if (CI->isAllOnesValue())
+        return getSCEV(U->getOperand(0));
+      const APInt &A = CI->getValue();
+
+      // Instcombine's ShrinkDemandedConstant may strip bits out of
+      // constants, obscuring what would otherwise be a low-bits mask.
+      // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
+      // knew about to reconstruct a low-bits mask value.
+      unsigned LZ = A.countLeadingZeros();
+      unsigned BitWidth = A.getBitWidth();
+      APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD);
+
+      APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
+
+      if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
+        return
+          getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
+                                IntegerType::get(getContext(), BitWidth - LZ)),
+                            U->getType());
+    }
+    break;
+
+  case Instruction::Or:
+    // If the RHS of the Or is a constant, we may have something like:
+    // X*4+1 which got turned into X*4|1.  Handle this as an Add so loop
+    // optimizations will transparently handle this case.
+    //
+    // In order for this transformation to be safe, the LHS must be of the
+    // form X*(2^n) and the Or constant must be less than 2^n.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      const SCEV *LHS = getSCEV(U->getOperand(0));
+      const APInt &CIVal = CI->getValue();
+      if (GetMinTrailingZeros(LHS) >=
+          (CIVal.getBitWidth() - CIVal.countLeadingZeros())) {
+        // Build a plain add SCEV.
+        const SCEV *S = getAddExpr(LHS, getSCEV(CI));
+        // If the LHS of the add was an addrec and it has no-wrap flags,
+        // transfer the no-wrap flags, since an or won't introduce a wrap.
+        if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) {
+          const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS);
+          if (OldAR->hasNoUnsignedWrap())
+            const_cast<SCEVAddRecExpr *>(NewAR)->setHasNoUnsignedWrap(true);
+          if (OldAR->hasNoSignedWrap())
+            const_cast<SCEVAddRecExpr *>(NewAR)->setHasNoSignedWrap(true);
+        }
+        return S;
+      }
+    }
+    break;
+  case Instruction::Xor:
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      // If the RHS of the xor is a signbit, then this is just an add.
+      // Instcombine turns add of signbit into xor as a strength reduction step.
+      if (CI->getValue().isSignBit())
+        return getAddExpr(getSCEV(U->getOperand(0)),
+                          getSCEV(U->getOperand(1)));
+
+      // If the RHS of xor is -1, then this is a not operation.
+      if (CI->isAllOnesValue())
+        return getNotSCEV(getSCEV(U->getOperand(0)));
+
+      // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask.
+      // This is a variant of the check for xor with -1, and it handles
+      // the case where instcombine has trimmed non-demanded bits out
+      // of an xor with -1.
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0)))
+        if (ConstantInt *LCI = dyn_cast<ConstantInt>(BO->getOperand(1)))
+          if (BO->getOpcode() == Instruction::And &&
+              LCI->getValue() == CI->getValue())
+            if (const SCEVZeroExtendExpr *Z =
+                  dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
+              const Type *UTy = U->getType();
+              const SCEV *Z0 = Z->getOperand();
+              const Type *Z0Ty = Z0->getType();
+              unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
+
+              // If C is a low-bits mask, the zero extend is serving to
+              // mask off the high bits. Complement the operand and
+              // re-apply the zext.
+              if (APIntOps::isMask(Z0TySize, CI->getValue()))
+                return getZeroExtendExpr(getNotSCEV(Z0), UTy);
+
+              // If C is a single bit, it may be in the sign-bit position
+              // before the zero-extend. In this case, represent the xor
+              // using an add, which is equivalent, and re-apply the zext.
+              APInt Trunc = CI->getValue().trunc(Z0TySize);
+              if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
+                  Trunc.isSignBit())
+                return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
+                                         UTy);
+            }
+    }
+    break;
+
+  case Instruction::Shl:
+    // Turn shift left of a constant amount into a multiply.
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
+
+      // If the shift count is not less than the bitwidth, the result of
+      // the shift is undefined. Don't try to analyze it, because the
+      // resolution chosen here may differ from the resolution chosen in
+      // other parts of the compiler.
+      if (SA->getValue().uge(BitWidth))
+        break;
+
+      Constant *X = ConstantInt::get(getContext(),
+        APInt(BitWidth, 1).shl(SA->getZExtValue()));
+      return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+    }
+    break;
+
+  case Instruction::LShr:
+    // Turn logical shift right of a constant into a unsigned divide.
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      uint32_t BitWidth = cast<IntegerType>(U->getType())->getBitWidth();
+
+      // If the shift count is not less than the bitwidth, the result of
+      // the shift is undefined. Don't try to analyze it, because the
+      // resolution chosen here may differ from the resolution chosen in
+      // other parts of the compiler.
+      if (SA->getValue().uge(BitWidth))
+        break;
+
+      Constant *X = ConstantInt::get(getContext(),
+        APInt(BitWidth, 1).shl(SA->getZExtValue()));
+      return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X));
+    }
+    break;
+
+  case Instruction::AShr:
+    // For a two-shift sext-inreg, use sext(trunc(x)) as the SCEV expression.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1)))
+      if (Operator *L = dyn_cast<Operator>(U->getOperand(0)))
+        if (L->getOpcode() == Instruction::Shl &&
+            L->getOperand(1) == U->getOperand(1)) {
+          uint64_t BitWidth = getTypeSizeInBits(U->getType());
+
+          // If the shift count is not less than the bitwidth, the result of
+          // the shift is undefined. Don't try to analyze it, because the
+          // resolution chosen here may differ from the resolution chosen in
+          // other parts of the compiler.
+          if (CI->getValue().uge(BitWidth))
+            break;
+
+          uint64_t Amt = BitWidth - CI->getZExtValue();
+          if (Amt == BitWidth)
+            return getSCEV(L->getOperand(0));       // shift by zero --> noop
+          return
+            getSignExtendExpr(getTruncateExpr(getSCEV(L->getOperand(0)),
+                                              IntegerType::get(getContext(),
+                                                               Amt)),
+                              U->getType());
+        }
+    break;
+
+  case Instruction::Trunc:
+    return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType());
+
+  case Instruction::ZExt:
+    return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType());
+
+  case Instruction::SExt:
+    return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType());
+
+  case Instruction::BitCast:
+    // BitCasts are no-op casts so we just eliminate the cast.
+    if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType()))
+      return getSCEV(U->getOperand(0));
+    break;
+
+  // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can
+  // lead to pointer expressions which cannot safely be expanded to GEPs,
+  // because ScalarEvolution doesn't respect the GEP aliasing rules when
+  // simplifying integer expressions.
+
+  case Instruction::GetElementPtr:
+    return createNodeForGEP(cast<GEPOperator>(U));
+
+  case Instruction::PHI:
+    return createNodeForPHI(cast<PHINode>(U));
+
+  case Instruction::Select:
+    // This could be a smax or umax that was lowered earlier.
+    // Try to recover it.
+    if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) {
+      Value *LHS = ICI->getOperand(0);
+      Value *RHS = ICI->getOperand(1);
+      switch (ICI->getPredicate()) {
+      case ICmpInst::ICMP_SLT:
+      case ICmpInst::ICMP_SLE:
+        std::swap(LHS, RHS);
+        // fall through
+      case ICmpInst::ICMP_SGT:
+      case ICmpInst::ICMP_SGE:
+        // a >s b ? a+x : b+x  ->  smax(a, b)+x
+        // a >s b ? b+x : a+x  ->  smin(a, b)+x
+        if (LHS->getType() == U->getType()) {
+          const SCEV *LS = getSCEV(LHS);
+          const SCEV *RS = getSCEV(RHS);
+          const SCEV *LA = getSCEV(U->getOperand(1));
+          const SCEV *RA = getSCEV(U->getOperand(2));
+          const SCEV *LDiff = getMinusSCEV(LA, LS);
+          const SCEV *RDiff = getMinusSCEV(RA, RS);
+          if (LDiff == RDiff)
+            return getAddExpr(getSMaxExpr(LS, RS), LDiff);
+          LDiff = getMinusSCEV(LA, RS);
+          RDiff = getMinusSCEV(RA, LS);
+          if (LDiff == RDiff)
+            return getAddExpr(getSMinExpr(LS, RS), LDiff);
+        }
+        break;
+      case ICmpInst::ICMP_ULT:
+      case ICmpInst::ICMP_ULE:
+        std::swap(LHS, RHS);
+        // fall through
+      case ICmpInst::ICMP_UGT:
+      case ICmpInst::ICMP_UGE:
+        // a >u b ? a+x : b+x  ->  umax(a, b)+x
+        // a >u b ? b+x : a+x  ->  umin(a, b)+x
+        if (LHS->getType() == U->getType()) {
+          const SCEV *LS = getSCEV(LHS);
+          const SCEV *RS = getSCEV(RHS);
+          const SCEV *LA = getSCEV(U->getOperand(1));
+          const SCEV *RA = getSCEV(U->getOperand(2));
+          const SCEV *LDiff = getMinusSCEV(LA, LS);
+          const SCEV *RDiff = getMinusSCEV(RA, RS);
+          if (LDiff == RDiff)
+            return getAddExpr(getUMaxExpr(LS, RS), LDiff);
+          LDiff = getMinusSCEV(LA, RS);
+          RDiff = getMinusSCEV(RA, LS);
+          if (LDiff == RDiff)
+            return getAddExpr(getUMinExpr(LS, RS), LDiff);
+        }
+        break;
+      case ICmpInst::ICMP_NE:
+        // n != 0 ? n+x : 1+x  ->  umax(n, 1)+x
+        if (LHS->getType() == U->getType() &&
+            isa<ConstantInt>(RHS) &&
+            cast<ConstantInt>(RHS)->isZero()) {
+          const SCEV *One = getConstant(LHS->getType(), 1);
+          const SCEV *LS = getSCEV(LHS);
+          const SCEV *LA = getSCEV(U->getOperand(1));
+          const SCEV *RA = getSCEV(U->getOperand(2));
+          const SCEV *LDiff = getMinusSCEV(LA, LS);
+          const SCEV *RDiff = getMinusSCEV(RA, One);
+          if (LDiff == RDiff)
+            return getAddExpr(getUMaxExpr(One, LS), LDiff);
+        }
+        break;
+      case ICmpInst::ICMP_EQ:
+        // n == 0 ? 1+x : n+x  ->  umax(n, 1)+x
+        if (LHS->getType() == U->getType() &&
+            isa<ConstantInt>(RHS) &&
+            cast<ConstantInt>(RHS)->isZero()) {
+          const SCEV *One = getConstant(LHS->getType(), 1);
+          const SCEV *LS = getSCEV(LHS);
+          const SCEV *LA = getSCEV(U->getOperand(1));
+          const SCEV *RA = getSCEV(U->getOperand(2));
+          const SCEV *LDiff = getMinusSCEV(LA, One);
+          const SCEV *RDiff = getMinusSCEV(RA, LS);
+          if (LDiff == RDiff)
+            return getAddExpr(getUMaxExpr(One, LS), LDiff);
+        }
+        break;
+      default:
+        break;
+      }
+    }
+
+  default: // We cannot analyze this expression.
+    break;
+  }
+
+  return getUnknown(V);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                   Iteration Count Computation Code
+//
+
+/// getBackedgeTakenCount - If the specified loop has a predictable
+/// backedge-taken count, return it, otherwise return a SCEVCouldNotCompute
+/// object. The backedge-taken count is the number of times the loop header
+/// will be branched to from within the loop. This is one less than the
+/// trip count of the loop, since it doesn't count the first iteration,
+/// when the header is branched to from outside the loop.
+///
+/// Note that it is not valid to call this method on a loop without a
+/// loop-invariant backedge-taken count (see
+/// hasLoopInvariantBackedgeTakenCount).
+///
+const SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L) {
+  return getBackedgeTakenInfo(L).Exact;
+}
+
+/// getMaxBackedgeTakenCount - Similar to getBackedgeTakenCount, except
+/// return the least SCEV value that is known never to be less than the
+/// actual backedge taken count.
+const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
+  return getBackedgeTakenInfo(L).Max;
+}
+
+/// PushLoopPHIs - Push PHI nodes in the header of the given loop
+/// onto the given Worklist.
+static void
+PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
+  BasicBlock *Header = L->getHeader();
+
+  // Push all Loop-header PHIs onto the Worklist stack.
+  for (BasicBlock::iterator I = Header->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    Worklist.push_back(PN);
+}
+
+const ScalarEvolution::BackedgeTakenInfo &
+ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
+  // Initially insert a CouldNotCompute for this loop. If the insertion
+  // succeeds, proceed to actually compute a backedge-taken count and
+  // update the value. The temporary CouldNotCompute value tells SCEV
+  // code elsewhere that it shouldn't attempt to request a new
+  // backedge-taken count, which could result in infinite recursion.
+  std::pair<std::map<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair =
+    BackedgeTakenCounts.insert(std::make_pair(L, getCouldNotCompute()));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L);
+  if (BECount.Exact != getCouldNotCompute()) {
+    assert(isLoopInvariant(BECount.Exact, L) &&
+           isLoopInvariant(BECount.Max, L) &&
+           "Computed backedge-taken count isn't loop invariant for loop!");
+    ++NumTripCountsComputed;
+
+    // Update the value in the map.
+    Pair.first->second = BECount;
+  } else {
+    if (BECount.Max != getCouldNotCompute())
+      // Update the value in the map.
+      Pair.first->second = BECount;
+    if (isa<PHINode>(L->getHeader()->begin()))
+      // Only count loops that have phi nodes as not being computable.
+      ++NumTripCountsNotComputed;
+  }
+
+  // Now that we know more about the trip count for this loop, forget any
+  // existing SCEV values for PHI nodes in this loop since they are only
+  // conservative estimates made without the benefit of trip count
+  // information. This is similar to the code in forgetLoop, except that
+  // it handles SCEVUnknown PHI nodes specially.
+  if (BECount.hasAnyInfo()) {
+    SmallVector<Instruction *, 16> Worklist;
+    PushLoopPHIs(L, Worklist);
+
+    SmallPtrSet<Instruction *, 8> Visited;
+    while (!Worklist.empty()) {
+      Instruction *I = Worklist.pop_back_val();
+      if (!Visited.insert(I)) continue;
+
+      ValueExprMapType::iterator It =
+        ValueExprMap.find(static_cast<Value *>(I));
+      if (It != ValueExprMap.end()) {
+        const SCEV *Old = It->second;
+
+        // SCEVUnknown for a PHI either means that it has an unrecognized
+        // structure, or it's a PHI that's in the progress of being computed
+        // by createNodeForPHI.  In the former case, additional loop trip
+        // count information isn't going to change anything. In the later
+        // case, createNodeForPHI will perform the necessary updates on its
+        // own when it gets to that point.
+        if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
+          forgetMemoizedResults(Old);
+          ValueExprMap.erase(It);
+        }
+        if (PHINode *PN = dyn_cast<PHINode>(I))
+          ConstantEvolutionLoopExitValue.erase(PN);
+      }
+
+      PushDefUseChildren(I, Worklist);
+    }
+  }
+  return Pair.first->second;
+}
+
+/// forgetLoop - This method should be called by the client when it has
+/// changed a loop in a way that may effect ScalarEvolution's ability to
+/// compute a trip count, or if the loop is deleted.
+void ScalarEvolution::forgetLoop(const Loop *L) {
+  // Drop any stored trip count value.
+  BackedgeTakenCounts.erase(L);
+
+  // Drop information about expressions based on loop-header PHIs.
+  SmallVector<Instruction *, 16> Worklist;
+  PushLoopPHIs(L, Worklist);
+
+  SmallPtrSet<Instruction *, 8> Visited;
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.pop_back_val();
+    if (!Visited.insert(I)) continue;
+
+    ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
+    if (It != ValueExprMap.end()) {
+      forgetMemoizedResults(It->second);
+      ValueExprMap.erase(It);
+      if (PHINode *PN = dyn_cast<PHINode>(I))
+        ConstantEvolutionLoopExitValue.erase(PN);
+    }
+
+    PushDefUseChildren(I, Worklist);
+  }
+
+  // Forget all contained loops too, to avoid dangling entries in the
+  // ValuesAtScopes map.
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    forgetLoop(*I);
+}
+
+/// forgetValue - This method should be called by the client when it has
+/// changed a value in a way that may effect its value, or which may
+/// disconnect it from a def-use chain linking it to a loop.
+void ScalarEvolution::forgetValue(Value *V) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return;
+
+  // Drop information about expressions based on loop-header PHIs.
+  SmallVector<Instruction *, 16> Worklist;
+  Worklist.push_back(I);
+
+  SmallPtrSet<Instruction *, 8> Visited;
+  while (!Worklist.empty()) {
+    I = Worklist.pop_back_val();
+    if (!Visited.insert(I)) continue;
+
+    ValueExprMapType::iterator It = ValueExprMap.find(static_cast<Value *>(I));
+    if (It != ValueExprMap.end()) {
+      forgetMemoizedResults(It->second);
+      ValueExprMap.erase(It);
+      if (PHINode *PN = dyn_cast<PHINode>(I))
+        ConstantEvolutionLoopExitValue.erase(PN);
+    }
+
+    PushDefUseChildren(I, Worklist);
+  }
+}
+
+/// ComputeBackedgeTakenCount - Compute the number of times the backedge
+/// of the specified loop will execute.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
+  SmallVector<BasicBlock *, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  // Examine all exits and pick the most conservative values.
+  const SCEV *BECount = getCouldNotCompute();
+  const SCEV *MaxBECount = getCouldNotCompute();
+  bool CouldNotComputeBECount = false;
+  for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+    BackedgeTakenInfo NewBTI =
+      ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
+
+    if (NewBTI.Exact == getCouldNotCompute()) {
+      // We couldn't compute an exact value for this exit, so
+      // we won't be able to compute an exact value for the loop.
+      CouldNotComputeBECount = true;
+      BECount = getCouldNotCompute();
+    } else if (!CouldNotComputeBECount) {
+      if (BECount == getCouldNotCompute())
+        BECount = NewBTI.Exact;
+      else
+        BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact);
+    }
+    if (MaxBECount == getCouldNotCompute())
+      MaxBECount = NewBTI.Max;
+    else if (NewBTI.Max != getCouldNotCompute())
+      MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max);
+  }
+
+  return BackedgeTakenInfo(BECount, MaxBECount);
+}
+
+/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge
+/// of the specified loop will execute if it exits via the specified block.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
+                                                   BasicBlock *ExitingBlock) {
+
+  // Okay, we've chosen an exiting block.  See what condition causes us to
+  // exit at this block.
+  //
+  // FIXME: we should be able to handle switch instructions (with a single exit)
+  BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+  if (ExitBr == 0) return getCouldNotCompute();
+  assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!");
+
+  // At this point, we know we have a conditional branch that determines whether
+  // the loop is exited.  However, we don't know if the branch is executed each
+  // time through the loop.  If not, then the execution count of the branch will
+  // not be equal to the trip count of the loop.
+  //
+  // Currently we check for this by checking to see if the Exit branch goes to
+  // the loop header.  If so, we know it will always execute the same number of
+  // times as the loop.  We also handle the case where the exit block *is* the
+  // loop header.  This is common for un-rotated loops.
+  //
+  // If both of those tests fail, walk up the unique predecessor chain to the
+  // header, stopping if there is an edge that doesn't exit the loop. If the
+  // header is reached, the execution count of the branch will be equal to the
+  // trip count of the loop.
+  //
+  //  More extensive analysis could be done to handle more cases here.
+  //
+  if (ExitBr->getSuccessor(0) != L->getHeader() &&
+      ExitBr->getSuccessor(1) != L->getHeader() &&
+      ExitBr->getParent() != L->getHeader()) {
+    // The simple checks failed, try climbing the unique predecessor chain
+    // up to the header.
+    bool Ok = false;
+    for (BasicBlock *BB = ExitBr->getParent(); BB; ) {
+      BasicBlock *Pred = BB->getUniquePredecessor();
+      if (!Pred)
+        return getCouldNotCompute();
+      TerminatorInst *PredTerm = Pred->getTerminator();
+      for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
+        BasicBlock *PredSucc = PredTerm->getSuccessor(i);
+        if (PredSucc == BB)
+          continue;
+        // If the predecessor has a successor that isn't BB and isn't
+        // outside the loop, assume the worst.
+        if (L->contains(PredSucc))
+          return getCouldNotCompute();
+      }
+      if (Pred == L->getHeader()) {
+        Ok = true;
+        break;
+      }
+      BB = Pred;
+    }
+    if (!Ok)
+      return getCouldNotCompute();
+  }
+
+  // Proceed to the next level to examine the exit condition expression.
+  return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(),
+                                               ExitBr->getSuccessor(0),
+                                               ExitBr->getSuccessor(1));
+}
+
+/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of ExitCond, TBB, and FBB.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
+                                                       Value *ExitCond,
+                                                       BasicBlock *TBB,
+                                                       BasicBlock *FBB) {
+  // Check if the controlling expression for this loop is an And or Or.
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
+    if (BO->getOpcode() == Instruction::And) {
+      // Recurse on the operands of the and.
+      BackedgeTakenInfo BTI0 =
+        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
+      BackedgeTakenInfo BTI1 =
+        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+      const SCEV *BECount = getCouldNotCompute();
+      const SCEV *MaxBECount = getCouldNotCompute();
+      if (L->contains(TBB)) {
+        // Both conditions must be true for the loop to continue executing.
+        // Choose the less conservative count.
+        if (BTI0.Exact == getCouldNotCompute() ||
+            BTI1.Exact == getCouldNotCompute())
+          BECount = getCouldNotCompute();
+        else
+          BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
+        if (BTI0.Max == getCouldNotCompute())
+          MaxBECount = BTI1.Max;
+        else if (BTI1.Max == getCouldNotCompute())
+          MaxBECount = BTI0.Max;
+        else
+          MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
+      } else {
+        // Both conditions must be true at the same time for the loop to exit.
+        // For now, be conservative.
+        assert(L->contains(FBB) && "Loop block has no successor in loop!");
+        if (BTI0.Max == BTI1.Max)
+          MaxBECount = BTI0.Max;
+        if (BTI0.Exact == BTI1.Exact)
+          BECount = BTI0.Exact;
+      }
+
+      return BackedgeTakenInfo(BECount, MaxBECount);
+    }
+    if (BO->getOpcode() == Instruction::Or) {
+      // Recurse on the operands of the or.
+      BackedgeTakenInfo BTI0 =
+        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
+      BackedgeTakenInfo BTI1 =
+        ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+      const SCEV *BECount = getCouldNotCompute();
+      const SCEV *MaxBECount = getCouldNotCompute();
+      if (L->contains(FBB)) {
+        // Both conditions must be false for the loop to continue executing.
+        // Choose the less conservative count.
+        if (BTI0.Exact == getCouldNotCompute() ||
+            BTI1.Exact == getCouldNotCompute())
+          BECount = getCouldNotCompute();
+        else
+          BECount = getUMinFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
+        if (BTI0.Max == getCouldNotCompute())
+          MaxBECount = BTI1.Max;
+        else if (BTI1.Max == getCouldNotCompute())
+          MaxBECount = BTI0.Max;
+        else
+          MaxBECount = getUMinFromMismatchedTypes(BTI0.Max, BTI1.Max);
+      } else {
+        // Both conditions must be false at the same time for the loop to exit.
+        // For now, be conservative.
+        assert(L->contains(TBB) && "Loop block has no successor in loop!");
+        if (BTI0.Max == BTI1.Max)
+          MaxBECount = BTI0.Max;
+        if (BTI0.Exact == BTI1.Exact)
+          BECount = BTI0.Exact;
+      }
+
+      return BackedgeTakenInfo(BECount, MaxBECount);
+    }
+  }
+
+  // With an icmp, it may be feasible to compute an exact backedge-taken count.
+  // Proceed to the next level to examine the icmp.
+  if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
+    return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB);
+
+  // Check for a constant condition. These are normally stripped out by
+  // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
+  // preserve the CFG and is temporarily leaving constant conditions
+  // in place.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
+    if (L->contains(FBB) == !CI->getZExtValue())
+      // The backedge is always taken.
+      return getCouldNotCompute();
+    else
+      // The backedge is never taken.
+      return getConstant(CI->getType(), 0);
+  }
+
+  // If it's not an integer or pointer comparison then compute it the hard way.
+  return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+}
+
+static const SCEVAddRecExpr *
+isSimpleUnwrappingAddRec(const SCEV *S, const Loop *L) {
+  const SCEVAddRecExpr *SA = dyn_cast<SCEVAddRecExpr>(S);
+  
+  // The SCEV must be an addrec of this loop.
+  if (!SA || SA->getLoop() != L || !SA->isAffine())
+    return 0;
+  
+  // The SCEV must be known to not wrap in some way to be interesting.
+  if (!SA->hasNoUnsignedWrap() && !SA->hasNoSignedWrap())
+    return 0;
+
+  // The stride must be a constant so that we know if it is striding up or down.
+  if (!isa<SCEVConstant>(SA->getOperand(1)))
+    return 0;
+  return SA;
+}
+
+/// getMinusSCEVForExitTest - When considering an exit test for a loop with a
+/// "x != y" exit test, we turn this into a computation that evaluates x-y != 0,
+/// and this function returns the expression to use for x-y.  We know and take
+/// advantage of the fact that this subtraction is only being used in a
+/// comparison by zero context.
+///
+static const SCEV *getMinusSCEVForExitTest(const SCEV *LHS, const SCEV *RHS,
+                                           const Loop *L, ScalarEvolution &SE) {
+  // If either LHS or RHS is an AddRec SCEV (of this loop) that is known to not
+  // wrap (either NSW or NUW), then we know that the value will either become
+  // the other one (and thus the loop terminates), that the loop will terminate
+  // through some other exit condition first, or that the loop has undefined
+  // behavior.  This information is useful when the addrec has a stride that is
+  // != 1 or -1, because it means we can't "miss" the exit value.
+  //
+  // In any of these three cases, it is safe to turn the exit condition into a
+  // "counting down" AddRec (to zero) by subtracting the two inputs as normal,
+  // but since we know that the "end cannot be missed" we can force the
+  // resulting AddRec to be a NUW addrec.  Since it is counting down, this means
+  // that the AddRec *cannot* pass zero.
+
+  // See if LHS and RHS are addrec's we can handle.
+  const SCEVAddRecExpr *LHSA = isSimpleUnwrappingAddRec(LHS, L);
+  const SCEVAddRecExpr *RHSA = isSimpleUnwrappingAddRec(RHS, L);
+  
+  // If neither addrec is interesting, just return a minus.
+  if (RHSA == 0 && LHSA == 0)
+    return SE.getMinusSCEV(LHS, RHS);
+  
+  // If only one of LHS and RHS are an AddRec of this loop, make sure it is LHS.
+  if (RHSA && LHSA == 0) {
+    // Safe because a-b === b-a for comparisons against zero.
+    std::swap(LHS, RHS);
+    std::swap(LHSA, RHSA);
+  }
+  
+  // Handle the case when only one is advancing in a non-overflowing way.
+  if (RHSA == 0) {
+    // If RHS is loop varying, then we can't predict when LHS will cross it.
+    if (!SE.isLoopInvariant(RHS, L))
+      return SE.getMinusSCEV(LHS, RHS);
+    
+    // If LHS has a positive stride, then we compute RHS-LHS, because the loop
+    // is counting up until it crosses RHS (which must be larger than LHS).  If
+    // it is negative, we compute LHS-RHS because we're counting down to RHS.
+    const ConstantInt *Stride =
+      cast<SCEVConstant>(LHSA->getOperand(1))->getValue();
+    if (Stride->getValue().isNegative())
+      std::swap(LHS, RHS);
+
+    return SE.getMinusSCEV(RHS, LHS, true /*HasNUW*/);
+  }
+  
+  // If both LHS and RHS are interesting, we have something like:
+  //  a+i*4 != b+i*8.
+  const ConstantInt *LHSStride =
+    cast<SCEVConstant>(LHSA->getOperand(1))->getValue();
+  const ConstantInt *RHSStride =
+    cast<SCEVConstant>(RHSA->getOperand(1))->getValue();
+  
+  // If the strides are equal, then this is just a (complex) loop invariant
+  // comparison of a and b.
+  if (LHSStride == RHSStride)
+    return SE.getMinusSCEV(LHSA->getStart(), RHSA->getStart());
+  
+  // If the signs of the strides differ, then the negative stride is counting
+  // down to the positive stride.
+  if (LHSStride->getValue().isNegative() != RHSStride->getValue().isNegative()){
+    if (RHSStride->getValue().isNegative())
+      std::swap(LHS, RHS);
+  } else {
+    // If LHS's stride is smaller than RHS's stride, then "b" must be less than
+    // "a" and "b" is RHS is counting up (catching up) to LHS.  This is true
+    // whether the strides are positive or negative.
+    if (RHSStride->getValue().slt(LHSStride->getValue()))
+      std::swap(LHS, RHS);
+  }
+    
+  return SE.getMinusSCEV(LHS, RHS, true /*HasNUW*/);
+}
+
+/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
+                                                           ICmpInst *ExitCond,
+                                                           BasicBlock *TBB,
+                                                           BasicBlock *FBB) {
+
+  // If the condition was exit on true, convert the condition to exit on false
+  ICmpInst::Predicate Cond;
+  if (!L->contains(FBB))
+    Cond = ExitCond->getPredicate();
+  else
+    Cond = ExitCond->getInversePredicate();
+
+  // Handle common loops like: for (X = "string"; *X; ++X)
+  if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
+    if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
+      BackedgeTakenInfo ItCnt =
+        ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
+      if (ItCnt.hasAnyInfo())
+        return ItCnt;
+    }
+
+  const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
+  const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
+
+  // Try to evaluate any dependencies out of the loop.
+  LHS = getSCEVAtScope(LHS, L);
+  RHS = getSCEVAtScope(RHS, L);
+
+  // At this point, we would like to compute how many iterations of the
+  // loop the predicate will return true for these inputs.
+  if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) {
+    // If there is a loop-invariant, force it into the RHS.
+    std::swap(LHS, RHS);
+    Cond = ICmpInst::getSwappedPredicate(Cond);
+  }
+
+  // Simplify the operands before analyzing them.
+  (void)SimplifyICmpOperands(Cond, LHS, RHS);
+
+  // If we have a comparison of a chrec against a constant, try to use value
+  // ranges to answer this query.
+  if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS))
+    if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
+      if (AddRec->getLoop() == L) {
+        // Form the constant range.
+        ConstantRange CompRange(
+            ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue()));
+
+        const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
+        if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
+      }
+
+  switch (Cond) {
+  case ICmpInst::ICMP_NE: {                     // while (X != Y)
+    // Convert to: while (X-Y != 0)
+    BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEVForExitTest(LHS, RHS, L,
+                                                                 *this), L);
+    if (BTI.hasAnyInfo()) return BTI;
+    break;
+  }
+  case ICmpInst::ICMP_EQ: {                     // while (X == Y)
+    // Convert to: while (X-Y == 0)
+    BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
+    if (BTI.hasAnyInfo()) return BTI;
+    break;
+  }
+  case ICmpInst::ICMP_SLT: {
+    BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, true);
+    if (BTI.hasAnyInfo()) return BTI;
+    break;
+  }
+  case ICmpInst::ICMP_SGT: {
+    BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
+                                             getNotSCEV(RHS), L, true);
+    if (BTI.hasAnyInfo()) return BTI;
+    break;
+  }
+  case ICmpInst::ICMP_ULT: {
+    BackedgeTakenInfo BTI = HowManyLessThans(LHS, RHS, L, false);
+    if (BTI.hasAnyInfo()) return BTI;
+    break;
+  }
+  case ICmpInst::ICMP_UGT: {
+    BackedgeTakenInfo BTI = HowManyLessThans(getNotSCEV(LHS),
+                                             getNotSCEV(RHS), L, false);
+    if (BTI.hasAnyInfo()) return BTI;
+    break;
+  }
+  default:
+#if 0
+    dbgs() << "ComputeBackedgeTakenCount ";
+    if (ExitCond->getOperand(0)->getType()->isUnsigned())
+      dbgs() << "[unsigned] ";
+    dbgs() << *LHS << "   "
+         << Instruction::getOpcodeName(Instruction::ICmp)
+         << "   " << *RHS << "\n";
+#endif
+    break;
+  }
+  return
+    ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+}
+
+static ConstantInt *
+EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,
+                                ScalarEvolution &SE) {
+  const SCEV *InVal = SE.getConstant(C);
+  const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE);
+  assert(isa<SCEVConstant>(Val) &&
+         "Evaluation of SCEV at constant didn't fold correctly?");
+  return cast<SCEVConstant>(Val)->getValue();
+}
+
+/// GetAddressedElementFromGlobal - Given a global variable with an initializer
+/// and a GEP expression (missing the pointer index) indexing into it, return
+/// the addressed element of the initializer or null if the index expression is
+/// invalid.
+static Constant *
+GetAddressedElementFromGlobal(GlobalVariable *GV,
+                              const std::vector<ConstantInt*> &Indices) {
+  Constant *Init = GV->getInitializer();
+  for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
+    uint64_t Idx = Indices[i]->getZExtValue();
+    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
+      assert(Idx < CS->getNumOperands() && "Bad struct index!");
+      Init = cast<Constant>(CS->getOperand(Idx));
+    } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
+      if (Idx >= CA->getNumOperands()) return 0;  // Bogus program
+      Init = cast<Constant>(CA->getOperand(Idx));
+    } else if (isa<ConstantAggregateZero>(Init)) {
+      if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
+        assert(Idx < STy->getNumElements() && "Bad struct index!");
+        Init = Constant::getNullValue(STy->getElementType(Idx));
+      } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Init->getType())) {
+        if (Idx >= ATy->getNumElements()) return 0;  // Bogus program
+        Init = Constant::getNullValue(ATy->getElementType());
+      } else {
+        llvm_unreachable("Unknown constant aggregate type!");
+      }
+      return 0;
+    } else {
+      return 0; // Unknown initializer type
+    }
+  }
+  return Init;
+}
+
+/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of
+/// 'icmp op load X, cst', try to see if we can compute the backedge
+/// execution count.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
+                                                LoadInst *LI,
+                                                Constant *RHS,
+                                                const Loop *L,
+                                                ICmpInst::Predicate predicate) {
+  if (LI->isVolatile()) return getCouldNotCompute();
+
+  // Check to see if the loaded pointer is a getelementptr of a global.
+  // TODO: Use SCEV instead of manually grubbing with GEPs.
+  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
+  if (!GEP) return getCouldNotCompute();
+
+  // Make sure that it is really a constant global we are gepping, with an
+  // initializer, and make sure the first IDX is really 0.
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+      GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) ||
+      !cast<Constant>(GEP->getOperand(1))->isNullValue())
+    return getCouldNotCompute();
+
+  // Okay, we allow one non-constant index into the GEP instruction.
+  Value *VarIdx = 0;
+  std::vector<ConstantInt*> Indexes;
+  unsigned VarIdxNum = 0;
+  for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i)
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+      Indexes.push_back(CI);
+    } else if (!isa<ConstantInt>(GEP->getOperand(i))) {
+      if (VarIdx) return getCouldNotCompute();  // Multiple non-constant idx's.
+      VarIdx = GEP->getOperand(i);
+      VarIdxNum = i-2;
+      Indexes.push_back(0);
+    }
+
+  // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant.
+  // Check to see if X is a loop variant variable value now.
+  const SCEV *Idx = getSCEV(VarIdx);
+  Idx = getSCEVAtScope(Idx, L);
+
+  // We can only recognize very limited forms of loop index expressions, in
+  // particular, only affine AddRec's like {C1,+,C2}.
+  const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx);
+  if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) ||
+      !isa<SCEVConstant>(IdxExpr->getOperand(0)) ||
+      !isa<SCEVConstant>(IdxExpr->getOperand(1)))
+    return getCouldNotCompute();
+
+  unsigned MaxSteps = MaxBruteForceIterations;
+  for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
+    ConstantInt *ItCst = ConstantInt::get(
+                           cast<IntegerType>(IdxExpr->getType()), IterationNum);
+    ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
+
+    // Form the GEP offset.
+    Indexes[VarIdxNum] = Val;
+
+    Constant *Result = GetAddressedElementFromGlobal(GV, Indexes);
+    if (Result == 0) break;  // Cannot compute!
+
+    // Evaluate the condition for this iteration.
+    Result = ConstantExpr::getICmp(predicate, Result, RHS);
+    if (!isa<ConstantInt>(Result)) break;  // Couldn't decide for sure
+    if (cast<ConstantInt>(Result)->getValue().isMinValue()) {
+#if 0
+      dbgs() << "\n***\n*** Computed loop count " << *ItCst
+             << "\n*** From global " << *GV << "*** BB: " << *L->getHeader()
+             << "***\n";
+#endif
+      ++NumArrayLenItCounts;
+      return getConstant(ItCst);   // Found terminating iteration!
+    }
+  }
+  return getCouldNotCompute();
+}
+
+
+/// CanConstantFold - Return true if we can constant fold an instruction of the
+/// specified type, assuming that all operands were constants.
+static bool CanConstantFold(const Instruction *I) {
+  if (isa<BinaryOperator>(I) || isa<CmpInst>(I) ||
+      isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I))
+    return true;
+
+  if (const CallInst *CI = dyn_cast<CallInst>(I))
+    if (const Function *F = CI->getCalledFunction())
+      return canConstantFoldCallTo(F);
+  return false;
+}
+
+/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node
+/// in the loop that V is derived from.  We allow arbitrary operations along the
+/// way, but the operands of an operation must either be constants or a value
+/// derived from a constant PHI.  If this expression does not fit with these
+/// constraints, return null.
+static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
+  // If this is not an instruction, or if this is an instruction outside of the
+  // loop, it can't be derived from a loop PHI.
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0 || !L->contains(I)) return 0;
+
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    if (L->getHeader() == I->getParent())
+      return PN;
+    else
+      // We don't currently keep track of the control flow needed to evaluate
+      // PHIs, so we cannot handle PHIs inside of loops.
+      return 0;
+  }
+
+  // If we won't be able to constant fold this expression even if the operands
+  // are constants, return early.
+  if (!CanConstantFold(I)) return 0;
+
+  // Otherwise, we can evaluate this instruction if all of its operands are
+  // constant or derived from a PHI node themselves.
+  PHINode *PHI = 0;
+  for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op)
+    if (!isa<Constant>(I->getOperand(Op))) {
+      PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L);
+      if (P == 0) return 0;  // Not evolving from PHI
+      if (PHI == 0)
+        PHI = P;
+      else if (PHI != P)
+        return 0;  // Evolving from multiple different PHIs.
+    }
+
+  // This is a expression evolving from a constant PHI!
+  return PHI;
+}
+
+/// EvaluateExpression - Given an expression that passes the
+/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
+/// in the loop has the value PHIVal.  If we can't fold this expression for some
+/// reason, return null.
+static Constant *EvaluateExpression(Value *V, Constant *PHIVal,
+                                    const TargetData *TD) {
+  if (isa<PHINode>(V)) return PHIVal;
+  if (Constant *C = dyn_cast<Constant>(V)) return C;
+  Instruction *I = cast<Instruction>(V);
+
+  std::vector<Constant*> Operands(I->getNumOperands());
+
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+    Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD);
+    if (Operands[i] == 0) return 0;
+  }
+
+  if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+    return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
+                                           Operands[1], TD);
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+                                  &Operands[0], Operands.size(), TD);
+}
+
+/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
+/// in the header of its containing loop, we know the loop executes a
+/// constant number of times, and the PHI node is just a recurrence
+/// involving constants, fold it.
+Constant *
+ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
+                                                   const APInt &BEs,
+                                                   const Loop *L) {
+  std::map<PHINode*, Constant*>::const_iterator I =
+    ConstantEvolutionLoopExitValue.find(PN);
+  if (I != ConstantEvolutionLoopExitValue.end())
+    return I->second;
+
+  if (BEs.ugt(MaxBruteForceIterations))
+    return ConstantEvolutionLoopExitValue[PN] = 0;  // Not going to evaluate it.
+
+  Constant *&RetVal = ConstantEvolutionLoopExitValue[PN];
+
+  // Since the loop is canonicalized, the PHI node must have two entries.  One
+  // entry must be a constant (coming in from outside of the loop), and the
+  // second must be derived from the same PHI.
+  bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
+  Constant *StartCST =
+    dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
+  if (StartCST == 0)
+    return RetVal = 0;  // Must be a constant.
+
+  Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
+  if (getConstantEvolvingPHI(BEValue, L) != PN &&
+      !isa<Constant>(BEValue))
+    return RetVal = 0;  // Not derived from same PHI.
+
+  // Execute the loop symbolically to determine the exit value.
+  if (BEs.getActiveBits() >= 32)
+    return RetVal = 0; // More than 2^32-1 iterations?? Not doing it!
+
+  unsigned NumIterations = BEs.getZExtValue(); // must be in range
+  unsigned IterationNum = 0;
+  for (Constant *PHIVal = StartCST; ; ++IterationNum) {
+    if (IterationNum == NumIterations)
+      return RetVal = PHIVal;  // Got exit value!
+
+    // Compute the value of the PHI node for the next iteration.
+    Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
+    if (NextPHI == PHIVal)
+      return RetVal = NextPHI;  // Stopped evolving!
+    if (NextPHI == 0)
+      return 0;        // Couldn't evaluate!
+    PHIVal = NextPHI;
+  }
+}
+
+/// ComputeBackedgeTakenCountExhaustively - If the loop is known to execute a
+/// constant number of times (the condition evolves only from constants),
+/// try to evaluate a few iterations of the loop until we get the exit
+/// condition gets a value of ExitWhen (true or false).  If we cannot
+/// evaluate the trip count of the loop, return getCouldNotCompute().
+const SCEV *
+ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
+                                                       Value *Cond,
+                                                       bool ExitWhen) {
+  PHINode *PN = getConstantEvolvingPHI(Cond, L);
+  if (PN == 0) return getCouldNotCompute();
+
+  // If the loop is canonicalized, the PHI will have exactly two entries.
+  // That's the only form we support here.
+  if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
+
+  // One entry must be a constant (coming in from outside of the loop), and the
+  // second must be derived from the same PHI.
+  bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
+  Constant *StartCST =
+    dyn_cast<Constant>(PN->getIncomingValue(!SecondIsBackedge));
+  if (StartCST == 0) return getCouldNotCompute();  // Must be a constant.
+
+  Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
+  if (getConstantEvolvingPHI(BEValue, L) != PN &&
+      !isa<Constant>(BEValue))
+    return getCouldNotCompute();  // Not derived from same PHI.
+
+  // Okay, we find a PHI node that defines the trip count of this loop.  Execute
+  // the loop symbolically to determine when the condition gets a value of
+  // "ExitWhen".
+  unsigned IterationNum = 0;
+  unsigned MaxIterations = MaxBruteForceIterations;   // Limit analysis.
+  for (Constant *PHIVal = StartCST;
+       IterationNum != MaxIterations; ++IterationNum) {
+    ConstantInt *CondVal =
+      dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal, TD));
+
+    // Couldn't symbolically evaluate.
+    if (!CondVal) return getCouldNotCompute();
+
+    if (CondVal->getValue() == uint64_t(ExitWhen)) {
+      ++NumBruteForceTripCountsComputed;
+      return getConstant(Type::getInt32Ty(getContext()), IterationNum);
+    }
+
+    // Compute the value of the PHI node for the next iteration.
+    Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
+    if (NextPHI == 0 || NextPHI == PHIVal)
+      return getCouldNotCompute();// Couldn't evaluate or not making progress...
+    PHIVal = NextPHI;
+  }
+
+  // Too many iterations were needed to evaluate.
+  return getCouldNotCompute();
+}
+
+/// getSCEVAtScope - Return a SCEV expression for the specified value
+/// at the specified scope in the program.  The L value specifies a loop
+/// nest to evaluate the expression at, where null is the top-level or a
+/// specified loop is immediately inside of the loop.
+///
+/// This method can be used to compute the exit value for a variable defined
+/// in a loop by querying what the value will hold in the parent loop.
+///
+/// In the case that a relevant loop exit value cannot be computed, the
+/// original value V is returned.
+const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
+  // Check to see if we've folded this expression at this loop before.
+  std::map<const Loop *, const SCEV *> &Values = ValuesAtScopes[V];
+  std::pair<std::map<const Loop *, const SCEV *>::iterator, bool> Pair =
+    Values.insert(std::make_pair(L, static_cast<const SCEV *>(0)));
+  if (!Pair.second)
+    return Pair.first->second ? Pair.first->second : V;
+
+  // Otherwise compute it.
+  const SCEV *C = computeSCEVAtScope(V, L);
+  ValuesAtScopes[V][L] = C;
+  return C;
+}
+
+const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
+  if (isa<SCEVConstant>(V)) return V;
+
+  // If this instruction is evolved from a constant-evolving PHI, compute the
+  // exit value from the loop without using SCEVs.
+  if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {
+    if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) {
+      const Loop *LI = (*this->LI)[I->getParent()];
+      if (LI && LI->getParentLoop() == L)  // Looking for loop exit value.
+        if (PHINode *PN = dyn_cast<PHINode>(I))
+          if (PN->getParent() == LI->getHeader()) {
+            // Okay, there is no closed form solution for the PHI node.  Check
+            // to see if the loop that contains it has a known backedge-taken
+            // count.  If so, we may be able to force computation of the exit
+            // value.
+            const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI);
+            if (const SCEVConstant *BTCC =
+                  dyn_cast<SCEVConstant>(BackedgeTakenCount)) {
+              // Okay, we know how many times the containing loop executes.  If
+              // this is a constant evolving PHI node, get the final value at
+              // the specified iteration number.
+              Constant *RV = getConstantEvolutionLoopExitValue(PN,
+                                                   BTCC->getValue()->getValue(),
+                                                               LI);
+              if (RV) return getSCEV(RV);
+            }
+          }
+
+      // Okay, this is an expression that we cannot symbolically evaluate
+      // into a SCEV.  Check to see if it's possible to symbolically evaluate
+      // the arguments into constants, and if so, try to constant propagate the
+      // result.  This is particularly useful for computing loop exit values.
+      if (CanConstantFold(I)) {
+        SmallVector<Constant *, 4> Operands;
+        bool MadeImprovement = false;
+        for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+          Value *Op = I->getOperand(i);
+          if (Constant *C = dyn_cast<Constant>(Op)) {
+            Operands.push_back(C);
+            continue;
+          }
+
+          // If any of the operands is non-constant and if they are
+          // non-integer and non-pointer, don't even try to analyze them
+          // with scev techniques.
+          if (!isSCEVable(Op->getType()))
+            return V;
+
+          const SCEV *OrigV = getSCEV(Op);
+          const SCEV *OpV = getSCEVAtScope(OrigV, L);
+          MadeImprovement |= OrigV != OpV;
+
+          Constant *C = 0;
+          if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV))
+            C = SC->getValue();
+          if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV))
+            C = dyn_cast<Constant>(SU->getValue());
+          if (!C) return V;
+          if (C->getType() != Op->getType())
+            C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                              Op->getType(),
+                                                              false),
+                                      C, Op->getType());
+          Operands.push_back(C);
+        }
+
+        // Check to see if getSCEVAtScope actually made an improvement.
+        if (MadeImprovement) {
+          Constant *C = 0;
+          if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+            C = ConstantFoldCompareInstOperands(CI->getPredicate(),
+                                                Operands[0], Operands[1], TD);
+          else
+            C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+                                         &Operands[0], Operands.size(), TD);
+          if (!C) return V;
+          return getSCEV(C);
+        }
+      }
+    }
+
+    // This is some other type of SCEVUnknown, just return it.
+    return V;
+  }
+
+  if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) {
+    // Avoid performing the look-up in the common case where the specified
+    // expression has no loop-variant portions.
+    for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) {
+      const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
+      if (OpAtScope != Comm->getOperand(i)) {
+        // Okay, at least one of these operands is loop variant but might be
+        // foldable.  Build a new instance of the folded commutative expression.
+        SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
+                                            Comm->op_begin()+i);
+        NewOps.push_back(OpAtScope);
+
+        for (++i; i != e; ++i) {
+          OpAtScope = getSCEVAtScope(Comm->getOperand(i), L);
+          NewOps.push_back(OpAtScope);
+        }
+        if (isa<SCEVAddExpr>(Comm))
+          return getAddExpr(NewOps);
+        if (isa<SCEVMulExpr>(Comm))
+          return getMulExpr(NewOps);
+        if (isa<SCEVSMaxExpr>(Comm))
+          return getSMaxExpr(NewOps);
+        if (isa<SCEVUMaxExpr>(Comm))
+          return getUMaxExpr(NewOps);
+        llvm_unreachable("Unknown commutative SCEV type!");
+      }
+    }
+    // If we got here, all operands are loop invariant.
+    return Comm;
+  }
+
+  if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) {
+    const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L);
+    const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L);
+    if (LHS == Div->getLHS() && RHS == Div->getRHS())
+      return Div;   // must be loop invariant
+    return getUDivExpr(LHS, RHS);
+  }
+
+  // If this is a loop recurrence for a loop that does not contain L, then we
+  // are dealing with the final value computed by the loop.
+  if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
+    // First, attempt to evaluate each operand.
+    // Avoid performing the look-up in the common case where the specified
+    // expression has no loop-variant portions.
+    for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
+      const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
+      if (OpAtScope == AddRec->getOperand(i))
+        continue;
+
+      // Okay, at least one of these operands is loop variant but might be
+      // foldable.  Build a new instance of the folded commutative expression.
+      SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
+                                          AddRec->op_begin()+i);
+      NewOps.push_back(OpAtScope);
+      for (++i; i != e; ++i)
+        NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
+
+      AddRec = cast<SCEVAddRecExpr>(getAddRecExpr(NewOps, AddRec->getLoop()));
+      break;
+    }
+
+    // If the scope is outside the addrec's loop, evaluate it by using the
+    // loop exit value of the addrec.
+    if (!AddRec->getLoop()->contains(L)) {
+      // To evaluate this recurrence, we need to know how many times the AddRec
+      // loop iterates.  Compute this now.
+      const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
+      if (BackedgeTakenCount == getCouldNotCompute()) return AddRec;
+
+      // Then, evaluate the AddRec.
+      return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
+    }
+
+    return AddRec;
+  }
+
+  if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) {
+    const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+    if (Op == Cast->getOperand())
+      return Cast;  // must be loop invariant
+    return getZeroExtendExpr(Op, Cast->getType());
+  }
+
+  if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) {
+    const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+    if (Op == Cast->getOperand())
+      return Cast;  // must be loop invariant
+    return getSignExtendExpr(Op, Cast->getType());
+  }
+
+  if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) {
+    const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L);
+    if (Op == Cast->getOperand())
+      return Cast;  // must be loop invariant
+    return getTruncateExpr(Op, Cast->getType());
+  }
+
+  llvm_unreachable("Unknown SCEV type!");
+  return 0;
+}
+
+/// getSCEVAtScope - This is a convenience function which does
+/// getSCEVAtScope(getSCEV(V), L).
+const SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) {
+  return getSCEVAtScope(getSCEV(V), L);
+}
+
+/// SolveLinEquationWithOverflow - Finds the minimum unsigned root of the
+/// following equation:
+///
+///     A * X = B (mod N)
+///
+/// where N = 2^BW and BW is the common bit width of A and B. The signedness of
+/// A and B isn't important.
+///
+/// If the equation does not have a solution, SCEVCouldNotCompute is returned.
+static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const APInt &B,
+                                               ScalarEvolution &SE) {
+  uint32_t BW = A.getBitWidth();
+  assert(BW == B.getBitWidth() && "Bit widths must be the same.");
+  assert(A != 0 && "A must be non-zero.");
+
+  // 1. D = gcd(A, N)
+  //
+  // The gcd of A and N may have only one prime factor: 2. The number of
+  // trailing zeros in A is its multiplicity
+  uint32_t Mult2 = A.countTrailingZeros();
+  // D = 2^Mult2
+
+  // 2. Check if B is divisible by D.
+  //
+  // B is divisible by D if and only if the multiplicity of prime factor 2 for B
+  // is not less than multiplicity of this prime factor for D.
+  if (B.countTrailingZeros() < Mult2)
+    return SE.getCouldNotCompute();
+
+  // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
+  // modulo (N / D).
+  //
+  // (N / D) may need BW+1 bits in its representation.  Hence, we'll use this
+  // bit width during computations.
+  APInt AD = A.lshr(Mult2).zext(BW + 1);  // AD = A / D
+  APInt Mod(BW + 1, 0);
+  Mod.setBit(BW - Mult2);  // Mod = N / D
+  APInt I = AD.multiplicativeInverse(Mod);
+
+  // 4. Compute the minimum unsigned root of the equation:
+  // I * (B / D) mod (N / D)
+  APInt Result = (I * B.lshr(Mult2).zext(BW + 1)).urem(Mod);
+
+  // The result is guaranteed to be less than 2^BW so we may truncate it to BW
+  // bits.
+  return SE.getConstant(Result.trunc(BW));
+}
+
+/// SolveQuadraticEquation - Find the roots of the quadratic equation for the
+/// given quadratic chrec {L,+,M,+,N}.  This returns either the two roots (which
+/// might be the same) or two SCEVCouldNotCompute objects.
+///
+static std::pair<const SCEV *,const SCEV *>
+SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
+  assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!");
+  const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0));
+  const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1));
+  const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2));
+
+  // We currently can only solve this if the coefficients are constants.
+  if (!LC || !MC || !NC) {
+    const SCEV *CNC = SE.getCouldNotCompute();
+    return std::make_pair(CNC, CNC);
+  }
+
+  uint32_t BitWidth = LC->getValue()->getValue().getBitWidth();
+  const APInt &L = LC->getValue()->getValue();
+  const APInt &M = MC->getValue()->getValue();
+  const APInt &N = NC->getValue()->getValue();
+  APInt Two(BitWidth, 2);
+  APInt Four(BitWidth, 4);
+
+  {
+    using namespace APIntOps;
+    const APInt& C = L;
+    // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
+    // The B coefficient is M-N/2
+    APInt B(M);
+    B -= sdiv(N,Two);
+
+    // The A coefficient is N/2
+    APInt A(N.sdiv(Two));
+
+    // Compute the B^2-4ac term.
+    APInt SqrtTerm(B);
+    SqrtTerm *= B;
+    SqrtTerm -= Four * (A * C);
+
+    // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest
+    // integer value or else APInt::sqrt() will assert.
+    APInt SqrtVal(SqrtTerm.sqrt());
+
+    // Compute the two solutions for the quadratic formula.
+    // The divisions must be performed as signed divisions.
+    APInt NegB(-B);
+    APInt TwoA( A << 1 );
+    if (TwoA.isMinValue()) {
+      const SCEV *CNC = SE.getCouldNotCompute();
+      return std::make_pair(CNC, CNC);
+    }
+
+    LLVMContext &Context = SE.getContext();
+
+    ConstantInt *Solution1 =
+      ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA));
+    ConstantInt *Solution2 =
+      ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA));
+
+    return std::make_pair(SE.getConstant(Solution1),
+                          SE.getConstant(Solution2));
+    } // end APIntOps namespace
+}
+
+/// HowFarToZero - Return the number of times a backedge comparing the specified
+/// value to zero will execute.  If not computable, return CouldNotCompute.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
+  // If the value is a constant
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
+    // If the value is already zero, the branch will execute zero times.
+    if (C->getValue()->isZero()) return C;
+    return getCouldNotCompute();  // Otherwise it will loop infinitely.
+  }
+
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V);
+  if (!AddRec || AddRec->getLoop() != L)
+    return getCouldNotCompute();
+
+  // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of
+  // the quadratic equation to solve it.
+  if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) {
+    std::pair<const SCEV *,const SCEV *> Roots =
+      SolveQuadraticEquation(AddRec, *this);
+    const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
+    const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
+    if (R1 && R2) {
+#if 0
+      dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1
+             << "  sol#2: " << *R2 << "\n";
+#endif
+      // Pick the smallest positive root value.
+      if (ConstantInt *CB =
+          dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT,
+                                                      R1->getValue(),
+                                                      R2->getValue()))) {
+        if (CB->getZExtValue() == false)
+          std::swap(R1, R2);   // R1 is the minimum root now.
+        
+        // We can only use this value if the chrec ends up with an exact zero
+        // value at this index.  When solving for "X*X != 5", for example, we
+        // should not accept a root of 2.
+        const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
+        if (Val->isZero())
+          return R1;  // We found a quadratic root!
+      }
+    }
+    return getCouldNotCompute();
+  }
+
+  // Otherwise we can only handle this if it is affine.
+  if (!AddRec->isAffine())
+    return getCouldNotCompute();
+
+  // If this is an affine expression, the execution count of this branch is
+  // the minimum unsigned root of the following equation:
+  //
+  //     Start + Step*N = 0 (mod 2^BW)
+  //
+  // equivalent to:
+  //
+  //             Step*N = -Start (mod 2^BW)
+  //
+  // where BW is the common bit width of Start and Step.
+
+  // Get the initial value for the loop.
+  const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
+  const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
+
+  // If the AddRec is NUW, then (in an unsigned sense) it cannot be counting up
+  // to wrap to 0, it must be counting down to equal 0.  Also, while counting
+  // down, it cannot "miss" 0 (which would cause it to wrap), regardless of what
+  // the stride is.  As such, NUW addrec's will always become zero in
+  // "start / -stride" steps, and we know that the division is exact.
+  if (AddRec->hasNoUnsignedWrap())
+    // FIXME: We really want an "isexact" bit for udiv.
+    return getUDivExpr(Start, getNegativeSCEV(Step));
+  
+  // For now we handle only constant steps.
+  const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step);
+  if (StepC == 0)
+    return getCouldNotCompute();
+
+  // First, handle unitary steps.
+  if (StepC->getValue()->equalsInt(1))      // 1*N = -Start (mod 2^BW), so:
+    return getNegativeSCEV(Start);          //   N = -Start (as unsigned)
+  
+  if (StepC->getValue()->isAllOnesValue())  // -1*N = -Start (mod 2^BW), so:
+    return Start;                           //    N = Start (as unsigned)
+
+  // Then, try to solve the above equation provided that Start is constant.
+  if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start))
+    return SolveLinEquationWithOverflow(StepC->getValue()->getValue(),
+                                        -StartC->getValue()->getValue(),
+                                        *this);
+  return getCouldNotCompute();
+}
+
+/// HowFarToNonZero - Return the number of times a backedge checking the
+/// specified value for nonzero will execute.  If not computable, return
+/// CouldNotCompute
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
+  // Loops that look like: while (X == 0) are very strange indeed.  We don't
+  // handle them yet except for the trivial case.  This could be expanded in the
+  // future as needed.
+
+  // If the value is a constant, check to see if it is known to be non-zero
+  // already.  If so, the backedge will execute zero times.
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
+    if (!C->getValue()->isNullValue())
+      return getConstant(C->getType(), 0);
+    return getCouldNotCompute();  // Otherwise it will loop infinitely.
+  }
+
+  // We could implement others, but I really doubt anyone writes loops like
+  // this, and if they did, they would already be constant folded.
+  return getCouldNotCompute();
+}
+
+/// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
+/// (which may not be an immediate predecessor) which has exactly one
+/// successor from which BB is reachable, or null if no such block is
+/// found.
+///
+std::pair<BasicBlock *, BasicBlock *>
+ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
+  // If the block has a unique predecessor, then there is no path from the
+  // predecessor to the block that does not go through the direct edge
+  // from the predecessor to the block.
+  if (BasicBlock *Pred = BB->getSinglePredecessor())
+    return std::make_pair(Pred, BB);
+
+  // A loop's header is defined to be a block that dominates the loop.
+  // If the header has a unique predecessor outside the loop, it must be
+  // a block that has exactly one successor that can reach the loop.
+  if (Loop *L = LI->getLoopFor(BB))
+    return std::make_pair(L->getLoopPredecessor(), L->getHeader());
+
+  return std::pair<BasicBlock *, BasicBlock *>();
+}
+
+/// HasSameValue - SCEV structural equivalence is usually sufficient for
+/// testing whether two expressions are equal, however for the purposes of
+/// looking for a condition guarding a loop, it can be useful to be a little
+/// more general, since a front-end may have replicated the controlling
+/// expression.
+///
+static bool HasSameValue(const SCEV *A, const SCEV *B) {
+  // Quick check to see if they are the same SCEV.
+  if (A == B) return true;
+
+  // Otherwise, if they're both SCEVUnknown, it's possible that they hold
+  // two different instructions with the same value. Check for this case.
+  if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
+    if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
+      if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
+        if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
+          if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory())
+            return true;
+
+  // Otherwise assume they may have a different value.
+  return false;
+}
+
+/// SimplifyICmpOperands - Simplify LHS and RHS in a comparison with
+/// predicate Pred. Return true iff any changes were made.
+///
+bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
+                                           const SCEV *&LHS, const SCEV *&RHS) {
+  bool Changed = false;
+
+  // Canonicalize a constant to the right side.
+  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
+    // Check for both operands constant.
+    if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) {
+      if (ConstantExpr::getICmp(Pred,
+                                LHSC->getValue(),
+                                RHSC->getValue())->isNullValue())
+        goto trivially_false;
+      else
+        goto trivially_true;
+    }
+    // Otherwise swap the operands to put the constant on the right.
+    std::swap(LHS, RHS);
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+    Changed = true;
+  }
+
+  // If we're comparing an addrec with a value which is loop-invariant in the
+  // addrec's loop, put the addrec on the left. Also make a dominance check,
+  // as both operands could be addrecs loop-invariant in each other's loop.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) {
+    const Loop *L = AR->getLoop();
+    if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) {
+      std::swap(LHS, RHS);
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+      Changed = true;
+    }
+  }
+
+  // If there's a constant operand, canonicalize comparisons with boundary
+  // cases, and canonicalize *-or-equal comparisons to regular comparisons.
+  if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
+    const APInt &RA = RC->getValue()->getValue();
+    switch (Pred) {
+    default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+    case ICmpInst::ICMP_EQ:
+    case ICmpInst::ICMP_NE:
+      break;
+    case ICmpInst::ICMP_UGE:
+      if ((RA - 1).isMinValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA - 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMinValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_UGT;
+      RHS = getConstant(RA - 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_ULE:
+      if ((RA + 1).isMaxValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMinValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_ULT;
+      RHS = getConstant(RA + 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_SGE:
+      if ((RA - 1).isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA - 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMinSignedValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_SGT;
+      RHS = getConstant(RA - 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_SLE:
+      if ((RA + 1).isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxSignedValue()) goto trivially_true;
+
+      Pred = ICmpInst::ICMP_SLT;
+      RHS = getConstant(RA + 1);
+      Changed = true;
+      break;
+    case ICmpInst::ICMP_UGT:
+      if (RA.isMinValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA + 1).isMaxValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxValue()) goto trivially_false;
+      break;
+    case ICmpInst::ICMP_ULT:
+      if (RA.isMaxValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA - 1).isMinValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA - 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMinValue()) goto trivially_false;
+      break;
+    case ICmpInst::ICMP_SGT:
+      if (RA.isMinSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA + 1).isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_EQ;
+        RHS = getConstant(RA + 1);
+        Changed = true;
+        break;
+      }
+      if (RA.isMaxSignedValue()) goto trivially_false;
+      break;
+    case ICmpInst::ICMP_SLT:
+      if (RA.isMaxSignedValue()) {
+        Pred = ICmpInst::ICMP_NE;
+        Changed = true;
+        break;
+      }
+      if ((RA - 1).isMinSignedValue()) {
+       Pred = ICmpInst::ICMP_EQ;
+       RHS = getConstant(RA - 1);
+        Changed = true;
+       break;
+      }
+      if (RA.isMinSignedValue()) goto trivially_false;
+      break;
+    }
+  }
+
+  // Check for obvious equality.
+  if (HasSameValue(LHS, RHS)) {
+    if (ICmpInst::isTrueWhenEqual(Pred))
+      goto trivially_true;
+    if (ICmpInst::isFalseWhenEqual(Pred))
+      goto trivially_false;
+  }
+
+  // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
+  // adding or subtracting 1 from one of the operands.
+  switch (Pred) {
+  case ICmpInst::ICMP_SLE:
+    if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
+                       /*HasNUW=*/false, /*HasNSW=*/true);
+      Pred = ICmpInst::ICMP_SLT;
+      Changed = true;
+    } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
+                       /*HasNUW=*/false, /*HasNSW=*/true);
+      Pred = ICmpInst::ICMP_SLT;
+      Changed = true;
+    }
+    break;
+  case ICmpInst::ICMP_SGE:
+    if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
+                       /*HasNUW=*/false, /*HasNSW=*/true);
+      Pred = ICmpInst::ICMP_SGT;
+      Changed = true;
+    } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
+                       /*HasNUW=*/false, /*HasNSW=*/true);
+      Pred = ICmpInst::ICMP_SGT;
+      Changed = true;
+    }
+    break;
+  case ICmpInst::ICMP_ULE:
+    if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
+                       /*HasNUW=*/true, /*HasNSW=*/false);
+      Pred = ICmpInst::ICMP_ULT;
+      Changed = true;
+    } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS,
+                       /*HasNUW=*/true, /*HasNSW=*/false);
+      Pred = ICmpInst::ICMP_ULT;
+      Changed = true;
+    }
+    break;
+  case ICmpInst::ICMP_UGE:
+    if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) {
+      RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
+                       /*HasNUW=*/true, /*HasNSW=*/false);
+      Pred = ICmpInst::ICMP_UGT;
+      Changed = true;
+    } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) {
+      LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS,
+                       /*HasNUW=*/true, /*HasNSW=*/false);
+      Pred = ICmpInst::ICMP_UGT;
+      Changed = true;
+    }
+    break;
+  default:
+    break;
+  }
+
+  // TODO: More simplifications are possible here.
+
+  return Changed;
+
+trivially_true:
+  // Return 0 == 0.
+  LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
+  Pred = ICmpInst::ICMP_EQ;
+  return true;
+
+trivially_false:
+  // Return 0 != 0.
+  LHS = RHS = getConstant(ConstantInt::getFalse(getContext()));
+  Pred = ICmpInst::ICMP_NE;
+  return true;
+}
+
+bool ScalarEvolution::isKnownNegative(const SCEV *S) {
+  return getSignedRange(S).getSignedMax().isNegative();
+}
+
+bool ScalarEvolution::isKnownPositive(const SCEV *S) {
+  return getSignedRange(S).getSignedMin().isStrictlyPositive();
+}
+
+bool ScalarEvolution::isKnownNonNegative(const SCEV *S) {
+  return !getSignedRange(S).getSignedMin().isNegative();
+}
+
+bool ScalarEvolution::isKnownNonPositive(const SCEV *S) {
+  return !getSignedRange(S).getSignedMax().isStrictlyPositive();
+}
+
+bool ScalarEvolution::isKnownNonZero(const SCEV *S) {
+  return isKnownNegative(S) || isKnownPositive(S);
+}
+
+bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,
+                                       const SCEV *LHS, const SCEV *RHS) {
+  // Canonicalize the inputs first.
+  (void)SimplifyICmpOperands(Pred, LHS, RHS);
+
+  // If LHS or RHS is an addrec, check to see if the condition is true in
+  // every iteration of the loop.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS))
+    if (isLoopEntryGuardedByCond(
+          AR->getLoop(), Pred, AR->getStart(), RHS) &&
+        isLoopBackedgeGuardedByCond(
+          AR->getLoop(), Pred, AR->getPostIncExpr(*this), RHS))
+      return true;
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS))
+    if (isLoopEntryGuardedByCond(
+          AR->getLoop(), Pred, LHS, AR->getStart()) &&
+        isLoopBackedgeGuardedByCond(
+          AR->getLoop(), Pred, LHS, AR->getPostIncExpr(*this)))
+      return true;
+
+  // Otherwise see what can be done with known constant ranges.
+  return isKnownPredicateWithRanges(Pred, LHS, RHS);
+}
+
+bool
+ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,
+                                            const SCEV *LHS, const SCEV *RHS) {
+  if (HasSameValue(LHS, RHS))
+    return ICmpInst::isTrueWhenEqual(Pred);
+
+  // This code is split out from isKnownPredicate because it is called from
+  // within isLoopEntryGuardedByCond.
+  switch (Pred) {
+  default:
+    llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+    break;
+  case ICmpInst::ICMP_SGT:
+    Pred = ICmpInst::ICMP_SLT;
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_SLT: {
+    ConstantRange LHSRange = getSignedRange(LHS);
+    ConstantRange RHSRange = getSignedRange(RHS);
+    if (LHSRange.getSignedMax().slt(RHSRange.getSignedMin()))
+      return true;
+    if (LHSRange.getSignedMin().sge(RHSRange.getSignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_SGE:
+    Pred = ICmpInst::ICMP_SLE;
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_SLE: {
+    ConstantRange LHSRange = getSignedRange(LHS);
+    ConstantRange RHSRange = getSignedRange(RHS);
+    if (LHSRange.getSignedMax().sle(RHSRange.getSignedMin()))
+      return true;
+    if (LHSRange.getSignedMin().sgt(RHSRange.getSignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_UGT:
+    Pred = ICmpInst::ICMP_ULT;
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_ULT: {
+    ConstantRange LHSRange = getUnsignedRange(LHS);
+    ConstantRange RHSRange = getUnsignedRange(RHS);
+    if (LHSRange.getUnsignedMax().ult(RHSRange.getUnsignedMin()))
+      return true;
+    if (LHSRange.getUnsignedMin().uge(RHSRange.getUnsignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_UGE:
+    Pred = ICmpInst::ICMP_ULE;
+    std::swap(LHS, RHS);
+  case ICmpInst::ICMP_ULE: {
+    ConstantRange LHSRange = getUnsignedRange(LHS);
+    ConstantRange RHSRange = getUnsignedRange(RHS);
+    if (LHSRange.getUnsignedMax().ule(RHSRange.getUnsignedMin()))
+      return true;
+    if (LHSRange.getUnsignedMin().ugt(RHSRange.getUnsignedMax()))
+      return false;
+    break;
+  }
+  case ICmpInst::ICMP_NE: {
+    if (getUnsignedRange(LHS).intersectWith(getUnsignedRange(RHS)).isEmptySet())
+      return true;
+    if (getSignedRange(LHS).intersectWith(getSignedRange(RHS)).isEmptySet())
+      return true;
+
+    const SCEV *Diff = getMinusSCEV(LHS, RHS);
+    if (isKnownNonZero(Diff))
+      return true;
+    break;
+  }
+  case ICmpInst::ICMP_EQ:
+    // The check at the top of the function catches the case where
+    // the values are known to be equal.
+    break;
+  }
+  return false;
+}
+
+/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is
+/// protected by a conditional between LHS and RHS.  This is used to
+/// to eliminate casts.
+bool
+ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,
+                                             ICmpInst::Predicate Pred,
+                                             const SCEV *LHS, const SCEV *RHS) {
+  // Interpret a null as meaning no loop, where there is obviously no guard
+  // (interprocedural conditions notwithstanding).
+  if (!L) return true;
+
+  BasicBlock *Latch = L->getLoopLatch();
+  if (!Latch)
+    return false;
+
+  BranchInst *LoopContinuePredicate =
+    dyn_cast<BranchInst>(Latch->getTerminator());
+  if (!LoopContinuePredicate ||
+      LoopContinuePredicate->isUnconditional())
+    return false;
+
+  return isImpliedCond(Pred, LHS, RHS,
+                       LoopContinuePredicate->getCondition(),
+                       LoopContinuePredicate->getSuccessor(0) != L->getHeader());
+}
+
+/// isLoopEntryGuardedByCond - Test whether entry to the loop is protected
+/// by a conditional between LHS and RHS.  This is used to help avoid max
+/// expressions in loop trip counts, and to eliminate casts.
+bool
+ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
+                                          ICmpInst::Predicate Pred,
+                                          const SCEV *LHS, const SCEV *RHS) {
+  // Interpret a null as meaning no loop, where there is obviously no guard
+  // (interprocedural conditions notwithstanding).
+  if (!L) return false;
+
+  // Starting at the loop predecessor, climb up the predecessor chain, as long
+  // as there are predecessors that can be found that have unique successors
+  // leading to the original header.
+  for (std::pair<BasicBlock *, BasicBlock *>
+         Pair(L->getLoopPredecessor(), L->getHeader());
+       Pair.first;
+       Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
+
+    BranchInst *LoopEntryPredicate =
+      dyn_cast<BranchInst>(Pair.first->getTerminator());
+    if (!LoopEntryPredicate ||
+        LoopEntryPredicate->isUnconditional())
+      continue;
+
+    if (isImpliedCond(Pred, LHS, RHS,
+                      LoopEntryPredicate->getCondition(),
+                      LoopEntryPredicate->getSuccessor(0) != Pair.second))
+      return true;
+  }
+
+  return false;
+}
+
+/// isImpliedCond - Test whether the condition described by Pred, LHS,
+/// and RHS is true whenever the given Cond value evaluates to true.
+bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
+                                    const SCEV *LHS, const SCEV *RHS,
+                                    Value *FoundCondValue,
+                                    bool Inverse) {
+  // Recursively handle And and Or conditions.
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
+    if (BO->getOpcode() == Instruction::And) {
+      if (!Inverse)
+        return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
+               isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
+    } else if (BO->getOpcode() == Instruction::Or) {
+      if (Inverse)
+        return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) ||
+               isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse);
+    }
+  }
+
+  ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue);
+  if (!ICI) return false;
+
+  // Bail if the ICmp's operands' types are wider than the needed type
+  // before attempting to call getSCEV on them. This avoids infinite
+  // recursion, since the analysis of widening casts can require loop
+  // exit condition information for overflow checking, which would
+  // lead back here.
+  if (getTypeSizeInBits(LHS->getType()) <
+      getTypeSizeInBits(ICI->getOperand(0)->getType()))
+    return false;
+
+  // Now that we found a conditional branch that dominates the loop, check to
+  // see if it is the comparison we are looking for.
+  ICmpInst::Predicate FoundPred;
+  if (Inverse)
+    FoundPred = ICI->getInversePredicate();
+  else
+    FoundPred = ICI->getPredicate();
+
+  const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));
+  const SCEV *FoundRHS = getSCEV(ICI->getOperand(1));
+
+  // Balance the types. The case where FoundLHS' type is wider than
+  // LHS' type is checked for above.
+  if (getTypeSizeInBits(LHS->getType()) >
+      getTypeSizeInBits(FoundLHS->getType())) {
+    if (CmpInst::isSigned(Pred)) {
+      FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType());
+      FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType());
+    } else {
+      FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType());
+      FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType());
+    }
+  }
+
+  // Canonicalize the query to match the way instcombine will have
+  // canonicalized the comparison.
+  if (SimplifyICmpOperands(Pred, LHS, RHS))
+    if (LHS == RHS)
+      return CmpInst::isTrueWhenEqual(Pred);
+  if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS))
+    if (FoundLHS == FoundRHS)
+      return CmpInst::isFalseWhenEqual(Pred);
+
+  // Check to see if we can make the LHS or RHS match.
+  if (LHS == FoundRHS || RHS == FoundLHS) {
+    if (isa<SCEVConstant>(RHS)) {
+      std::swap(FoundLHS, FoundRHS);
+      FoundPred = ICmpInst::getSwappedPredicate(FoundPred);
+    } else {
+      std::swap(LHS, RHS);
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+    }
+  }
+
+  // Check whether the found predicate is the same as the desired predicate.
+  if (FoundPred == Pred)
+    return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS);
+
+  // Check whether swapping the found predicate makes it the same as the
+  // desired predicate.
+  if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) {
+    if (isa<SCEVConstant>(RHS))
+      return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS);
+    else
+      return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred),
+                                   RHS, LHS, FoundLHS, FoundRHS);
+  }
+
+  // Check whether the actual condition is beyond sufficient.
+  if (FoundPred == ICmpInst::ICMP_EQ)
+    if (ICmpInst::isTrueWhenEqual(Pred))
+      if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS))
+        return true;
+  if (Pred == ICmpInst::ICMP_NE)
+    if (!ICmpInst::isTrueWhenEqual(FoundPred))
+      if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS))
+        return true;
+
+  // Otherwise assume the worst.
+  return false;
+}
+
+/// isImpliedCondOperands - Test whether the condition described by Pred,
+/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
+/// and FoundRHS is true.
+bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
+                                            const SCEV *LHS, const SCEV *RHS,
+                                            const SCEV *FoundLHS,
+                                            const SCEV *FoundRHS) {
+  return isImpliedCondOperandsHelper(Pred, LHS, RHS,
+                                     FoundLHS, FoundRHS) ||
+         // ~x < ~y --> x > y
+         isImpliedCondOperandsHelper(Pred, LHS, RHS,
+                                     getNotSCEV(FoundRHS),
+                                     getNotSCEV(FoundLHS));
+}
+
+/// isImpliedCondOperandsHelper - Test whether the condition described by
+/// Pred, LHS, and RHS is true whenever the condition described by Pred,
+/// FoundLHS, and FoundRHS is true.
+bool
+ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
+                                             const SCEV *LHS, const SCEV *RHS,
+                                             const SCEV *FoundLHS,
+                                             const SCEV *FoundRHS) {
+  switch (Pred) {
+  default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
+  case ICmpInst::ICMP_EQ:
+  case ICmpInst::ICMP_NE:
+    if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_SLT:
+  case ICmpInst::ICMP_SLE:
+    if (isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, LHS, FoundLHS) &&
+        isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_SGT:
+  case ICmpInst::ICMP_SGE:
+    if (isKnownPredicateWithRanges(ICmpInst::ICMP_SGE, LHS, FoundLHS) &&
+        isKnownPredicateWithRanges(ICmpInst::ICMP_SLE, RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_ULE:
+    if (isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, LHS, FoundLHS) &&
+        isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, RHS, FoundRHS))
+      return true;
+    break;
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_UGE:
+    if (isKnownPredicateWithRanges(ICmpInst::ICMP_UGE, LHS, FoundLHS) &&
+        isKnownPredicateWithRanges(ICmpInst::ICMP_ULE, RHS, FoundRHS))
+      return true;
+    break;
+  }
+
+  return false;
+}
+
+/// getBECount - Subtract the end and start values and divide by the step,
+/// rounding up, to get the number of times the backedge is executed. Return
+/// CouldNotCompute if an intermediate computation overflows.
+const SCEV *ScalarEvolution::getBECount(const SCEV *Start,
+                                        const SCEV *End,
+                                        const SCEV *Step,
+                                        bool NoWrap) {
+  assert(!isKnownNegative(Step) &&
+         "This code doesn't handle negative strides yet!");
+
+  const Type *Ty = Start->getType();
+  const SCEV *NegOne = getConstant(Ty, (uint64_t)-1);
+  const SCEV *Diff = getMinusSCEV(End, Start);
+  const SCEV *RoundUp = getAddExpr(Step, NegOne);
+
+  // Add an adjustment to the difference between End and Start so that
+  // the division will effectively round up.
+  const SCEV *Add = getAddExpr(Diff, RoundUp);
+
+  if (!NoWrap) {
+    // Check Add for unsigned overflow.
+    // TODO: More sophisticated things could be done here.
+    const Type *WideTy = IntegerType::get(getContext(),
+                                          getTypeSizeInBits(Ty) + 1);
+    const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy);
+    const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy);
+    const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp);
+    if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
+      return getCouldNotCompute();
+  }
+
+  return getUDivExpr(Add, Step);
+}
+
+/// HowManyLessThans - Return the number of times a backedge containing the
+/// specified less-than comparison will execute.  If not computable, return
+/// CouldNotCompute.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
+                                  const Loop *L, bool isSigned) {
+  // Only handle:  "ADDREC < LoopInvariant".
+  if (!isLoopInvariant(RHS, L)) return getCouldNotCompute();
+
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS);
+  if (!AddRec || AddRec->getLoop() != L)
+    return getCouldNotCompute();
+
+  // Check to see if we have a flag which makes analysis easy.
+  bool NoWrap = isSigned ? AddRec->hasNoSignedWrap() :
+                           AddRec->hasNoUnsignedWrap();
+
+  if (AddRec->isAffine()) {
+    unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
+    const SCEV *Step = AddRec->getStepRecurrence(*this);
+
+    if (Step->isZero())
+      return getCouldNotCompute();
+    if (Step->isOne()) {
+      // With unit stride, the iteration never steps past the limit value.
+    } else if (isKnownPositive(Step)) {
+      // Test whether a positive iteration can step past the limit
+      // value and past the maximum value for its type in a single step.
+      // Note that it's not sufficient to check NoWrap here, because even
+      // though the value after a wrap is undefined, it's not undefined
+      // behavior, so if wrap does occur, the loop could either terminate or
+      // loop infinitely, but in either case, the loop is guaranteed to
+      // iterate at least until the iteration where the wrapping occurs.
+      const SCEV *One = getConstant(Step->getType(), 1);
+      if (isSigned) {
+        APInt Max = APInt::getSignedMaxValue(BitWidth);
+        if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax())
+              .slt(getSignedRange(RHS).getSignedMax()))
+          return getCouldNotCompute();
+      } else {
+        APInt Max = APInt::getMaxValue(BitWidth);
+        if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax())
+              .ult(getUnsignedRange(RHS).getUnsignedMax()))
+          return getCouldNotCompute();
+      }
+    } else
+      // TODO: Handle negative strides here and below.
+      return getCouldNotCompute();
+
+    // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant
+    // m.  So, we count the number of iterations in which {n,+,s} < m is true.
+    // Note that we cannot simply return max(m-n,0)/s because it's not safe to
+    // treat m-n as signed nor unsigned due to overflow possibility.
+
+    // First, we get the value of the LHS in the first iteration: n
+    const SCEV *Start = AddRec->getOperand(0);
+
+    // Determine the minimum constant start value.
+    const SCEV *MinStart = getConstant(isSigned ?
+      getSignedRange(Start).getSignedMin() :
+      getUnsignedRange(Start).getUnsignedMin());
+
+    // If we know that the condition is true in order to enter the loop,
+    // then we know that it will run exactly (m-n)/s times. Otherwise, we
+    // only know that it will execute (max(m,n)-n)/s times. In both cases,
+    // the division must round up.
+    const SCEV *End = RHS;
+    if (!isLoopEntryGuardedByCond(L,
+                                  isSigned ? ICmpInst::ICMP_SLT :
+                                             ICmpInst::ICMP_ULT,
+                                  getMinusSCEV(Start, Step), RHS))
+      End = isSigned ? getSMaxExpr(RHS, Start)
+                     : getUMaxExpr(RHS, Start);
+
+    // Determine the maximum constant end value.
+    const SCEV *MaxEnd = getConstant(isSigned ?
+      getSignedRange(End).getSignedMax() :
+      getUnsignedRange(End).getUnsignedMax());
+
+    // If MaxEnd is within a step of the maximum integer value in its type,
+    // adjust it down to the minimum value which would produce the same effect.
+    // This allows the subsequent ceiling division of (N+(step-1))/step to
+    // compute the correct value.
+    const SCEV *StepMinusOne = getMinusSCEV(Step,
+                                            getConstant(Step->getType(), 1));
+    MaxEnd = isSigned ?
+      getSMinExpr(MaxEnd,
+                  getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)),
+                               StepMinusOne)) :
+      getUMinExpr(MaxEnd,
+                  getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)),
+                               StepMinusOne));
+
+    // Finally, we subtract these two values and divide, rounding up, to get
+    // the number of times the backedge is executed.
+    const SCEV *BECount = getBECount(Start, End, Step, NoWrap);
+
+    // The maximum backedge count is similar, except using the minimum start
+    // value and the maximum end value.
+    const SCEV *MaxBECount = getBECount(MinStart, MaxEnd, Step, NoWrap);
+
+    return BackedgeTakenInfo(BECount, MaxBECount);
+  }
+
+  return getCouldNotCompute();
+}
+
+/// getNumIterationsInRange - Return the number of iterations of this loop that
+/// produce values in the specified constant range.  Another way of looking at
+/// this is that it returns the first iteration number where the value is not in
+/// the condition, thus computing the exit count. If the iteration count can't
+/// be computed, an instance of SCEVCouldNotCompute is returned.
+const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
+                                                    ScalarEvolution &SE) const {
+  if (Range.isFullSet())  // Infinite loop.
+    return SE.getCouldNotCompute();
+
+  // If the start is a non-zero constant, shift the range to simplify things.
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
+    if (!SC->getValue()->isZero()) {
+      SmallVector<const SCEV *, 4> Operands(op_begin(), op_end());
+      Operands[0] = SE.getConstant(SC->getType(), 0);
+      const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop());
+      if (const SCEVAddRecExpr *ShiftedAddRec =
+            dyn_cast<SCEVAddRecExpr>(Shifted))
+        return ShiftedAddRec->getNumIterationsInRange(
+                           Range.subtract(SC->getValue()->getValue()), SE);
+      // This is strange and shouldn't happen.
+      return SE.getCouldNotCompute();
+    }
+
+  // The only time we can solve this is when we have all constant indices.
+  // Otherwise, we cannot determine the overflow conditions.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (!isa<SCEVConstant>(getOperand(i)))
+      return SE.getCouldNotCompute();
+
+
+  // Okay at this point we know that all elements of the chrec are constants and
+  // that the start element is zero.
+
+  // First check to see if the range contains zero.  If not, the first
+  // iteration exits.
+  unsigned BitWidth = SE.getTypeSizeInBits(getType());
+  if (!Range.contains(APInt(BitWidth, 0)))
+    return SE.getConstant(getType(), 0);
+
+  if (isAffine()) {
+    // If this is an affine expression then we have this situation:
+    //   Solve {0,+,A} in Range  ===  Ax in Range
+
+    // We know that zero is in the range.  If A is positive then we know that
+    // the upper value of the range must be the first possible exit value.
+    // If A is negative then the lower of the range is the last possible loop
+    // value.  Also note that we already checked for a full range.
+    APInt One(BitWidth,1);
+    APInt A     = cast<SCEVConstant>(getOperand(1))->getValue()->getValue();
+    APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();
+
+    // The exit value should be (End+A)/A.
+    APInt ExitVal = (End + A).udiv(A);
+    ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal);
+
+    // Evaluate at the exit value.  If we really did fall out of the valid
+    // range, then we computed our trip count, otherwise wrap around or other
+    // things must have happened.
+    ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE);
+    if (Range.contains(Val->getValue()))
+      return SE.getCouldNotCompute();  // Something strange happened
+
+    // Ensure that the previous value is in the range.  This is a sanity check.
+    assert(Range.contains(
+           EvaluateConstantChrecAtConstant(this,
+           ConstantInt::get(SE.getContext(), ExitVal - One), SE)->getValue()) &&
+           "Linear scev computation is off in a bad way!");
+    return SE.getConstant(ExitValue);
+  } else if (isQuadratic()) {
+    // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the
+    // quadratic equation to solve it.  To do this, we must frame our problem in
+    // terms of figuring out when zero is crossed, instead of when
+    // Range.getUpper() is crossed.
+    SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
+    NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
+    const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop());
+
+    // Next, solve the constructed addrec
+    std::pair<const SCEV *,const SCEV *> Roots =
+      SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);
+    const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);
+    const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);
+    if (R1) {
+      // Pick the smallest positive root value.
+      if (ConstantInt *CB =
+          dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
+                         R1->getValue(), R2->getValue()))) {
+        if (CB->getZExtValue() == false)
+          std::swap(R1, R2);   // R1 is the minimum root now.
+
+        // Make sure the root is not off by one.  The returned iteration should
+        // not be in the range, but the previous one should be.  When solving
+        // for "X*X < 5", for example, we should not return a root of 2.
+        ConstantInt *R1Val = EvaluateConstantChrecAtConstant(this,
+                                                             R1->getValue(),
+                                                             SE);
+        if (Range.contains(R1Val->getValue())) {
+          // The next iteration must be out of the range...
+          ConstantInt *NextVal =
+                ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1);
+
+          R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
+          if (!Range.contains(R1Val->getValue()))
+            return SE.getConstant(NextVal);
+          return SE.getCouldNotCompute();  // Something strange happened
+        }
+
+        // If R1 was not in the range, then it is a good return value.  Make
+        // sure that R1-1 WAS in the range though, just in case.
+        ConstantInt *NextVal =
+               ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1);
+        R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);
+        if (Range.contains(R1Val->getValue()))
+          return R1;
+        return SE.getCouldNotCompute();  // Something strange happened
+      }
+    }
+  }
+
+  return SE.getCouldNotCompute();
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                   SCEVCallbackVH Class Implementation
+//===----------------------------------------------------------------------===//
+
+void ScalarEvolution::SCEVCallbackVH::deleted() {
+  assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
+  if (PHINode *PN = dyn_cast<PHINode>(getValPtr()))
+    SE->ConstantEvolutionLoopExitValue.erase(PN);
+  SE->ValueExprMap.erase(getValPtr());
+  // this now dangles!
+}
+
+void ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) {
+  assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!");
+
+  // Forget all the expressions associated with users of the old value,
+  // so that future queries will recompute the expressions using the new
+  // value.
+  Value *Old = getValPtr();
+  SmallVector<User *, 16> Worklist;
+  SmallPtrSet<User *, 8> Visited;
+  for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
+       UI != UE; ++UI)
+    Worklist.push_back(*UI);
+  while (!Worklist.empty()) {
+    User *U = Worklist.pop_back_val();
+    // Deleting the Old value will cause this to dangle. Postpone
+    // that until everything else is done.
+    if (U == Old)
+      continue;
+    if (!Visited.insert(U))
+      continue;
+    if (PHINode *PN = dyn_cast<PHINode>(U))
+      SE->ConstantEvolutionLoopExitValue.erase(PN);
+    SE->ValueExprMap.erase(U);
+    for (Value::use_iterator UI = U->use_begin(), UE = U->use_end();
+         UI != UE; ++UI)
+      Worklist.push_back(*UI);
+  }
+  // Delete the Old value.
+  if (PHINode *PN = dyn_cast<PHINode>(Old))
+    SE->ConstantEvolutionLoopExitValue.erase(PN);
+  SE->ValueExprMap.erase(Old);
+  // this now dangles!
+}
+
+ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
+  : CallbackVH(V), SE(se) {}
+
+//===----------------------------------------------------------------------===//
+//                   ScalarEvolution Class Implementation
+//===----------------------------------------------------------------------===//
+
+ScalarEvolution::ScalarEvolution()
+  : FunctionPass(ID), FirstUnknown(0) {
+  initializeScalarEvolutionPass(*PassRegistry::getPassRegistry());
+}
+
+bool ScalarEvolution::runOnFunction(Function &F) {
+  this->F = &F;
+  LI = &getAnalysis<LoopInfo>();
+  TD = getAnalysisIfAvailable<TargetData>();
+  DT = &getAnalysis<DominatorTree>();
+  return false;
+}
+
+void ScalarEvolution::releaseMemory() {
+  // Iterate through all the SCEVUnknown instances and call their
+  // destructors, so that they release their references to their values.
+  for (SCEVUnknown *U = FirstUnknown; U; U = U->Next)
+    U->~SCEVUnknown();
+  FirstUnknown = 0;
+
+  ValueExprMap.clear();
+  BackedgeTakenCounts.clear();
+  ConstantEvolutionLoopExitValue.clear();
+  ValuesAtScopes.clear();
+  LoopDispositions.clear();
+  BlockDispositions.clear();
+  UnsignedRanges.clear();
+  SignedRanges.clear();
+  UniqueSCEVs.clear();
+  SCEVAllocator.Reset();
+}
+
+void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<LoopInfo>();
+  AU.addRequiredTransitive<DominatorTree>();
+}
+
+bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) {
+  return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L));
+}
+
+static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
+                          const Loop *L) {
+  // Print all inner loops first
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    PrintLoopInfo(OS, SE, *I);
+
+  OS << "Loop ";
+  WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+  OS << ": ";
+
+  SmallVector<BasicBlock *, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+  if (ExitBlocks.size() != 1)
+    OS << "<multiple exits> ";
+
+  if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
+    OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L);
+  } else {
+    OS << "Unpredictable backedge-taken count. ";
+  }
+
+  OS << "\n"
+        "Loop ";
+  WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false);
+  OS << ": ";
+
+  if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
+    OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
+  } else {
+    OS << "Unpredictable max backedge-taken count. ";
+  }
+
+  OS << "\n";
+}
+
+void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
+  // ScalarEvolution's implementation of the print method is to print
+  // out SCEV values of all instructions that are interesting. Doing
+  // this potentially causes it to create new SCEV objects though,
+  // which technically conflicts with the const qualifier. This isn't
+  // observable from outside the class though, so casting away the
+  // const isn't dangerous.
+  ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);
+
+  OS << "Classifying expressions for: ";
+  WriteAsOperand(OS, F, /*PrintType=*/false);
+  OS << "\n";
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+    if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) {
+      OS << *I << '\n';
+      OS << "  -->  ";
+      const SCEV *SV = SE.getSCEV(&*I);
+      SV->print(OS);
+
+      const Loop *L = LI->getLoopFor((*I).getParent());
+
+      const SCEV *AtUse = SE.getSCEVAtScope(SV, L);
+      if (AtUse != SV) {
+        OS << "  -->  ";
+        AtUse->print(OS);
+      }
+
+      if (L) {
+        OS << "\t\t" "Exits: ";
+        const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
+        if (!SE.isLoopInvariant(ExitValue, L)) {
+          OS << "<<Unknown>>";
+        } else {
+          OS << *ExitValue;
+        }
+      }
+
+      OS << "\n";
+    }
+
+  OS << "Determining loop execution counts for: ";
+  WriteAsOperand(OS, F, /*PrintType=*/false);
+  OS << "\n";
+  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+    PrintLoopInfo(OS, &SE, *I);
+}
+
+ScalarEvolution::LoopDisposition
+ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) {
+  std::map<const Loop *, LoopDisposition> &Values = LoopDispositions[S];
+  std::pair<std::map<const Loop *, LoopDisposition>::iterator, bool> Pair =
+    Values.insert(std::make_pair(L, LoopVariant));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  LoopDisposition D = computeLoopDisposition(S, L);
+  return LoopDispositions[S][L] = D;
+}
+
+ScalarEvolution::LoopDisposition
+ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
+  switch (S->getSCEVType()) {
+  case scConstant:
+    return LoopInvariant;
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend:
+    return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L);
+  case scAddRecExpr: {
+    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
+
+    // If L is the addrec's loop, it's computable.
+    if (AR->getLoop() == L)
+      return LoopComputable;
+
+    // Add recurrences are never invariant in the function-body (null loop).
+    if (!L)
+      return LoopVariant;
+
+    // This recurrence is variant w.r.t. L if L contains AR's loop.
+    if (L->contains(AR->getLoop()))
+      return LoopVariant;
+
+    // This recurrence is invariant w.r.t. L if AR's loop contains L.
+    if (AR->getLoop()->contains(L))
+      return LoopInvariant;
+
+    // This recurrence is variant w.r.t. L if any of its operands
+    // are variant.
+    for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+         I != E; ++I)
+      if (!isLoopInvariant(*I, L))
+        return LoopVariant;
+
+    // Otherwise it's loop-invariant.
+    return LoopInvariant;
+  }
+  case scAddExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr: {
+    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+    bool HasVarying = false;
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+         I != E; ++I) {
+      LoopDisposition D = getLoopDisposition(*I, L);
+      if (D == LoopVariant)
+        return LoopVariant;
+      if (D == LoopComputable)
+        HasVarying = true;
+    }
+    return HasVarying ? LoopComputable : LoopInvariant;
+  }
+  case scUDivExpr: {
+    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+    LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L);
+    if (LD == LoopVariant)
+      return LoopVariant;
+    LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L);
+    if (RD == LoopVariant)
+      return LoopVariant;
+    return (LD == LoopInvariant && RD == LoopInvariant) ?
+           LoopInvariant : LoopComputable;
+  }
+  case scUnknown:
+    // All non-instruction values are loop invariant.  All instructions are loop
+    // invariant if they are not contained in the specified loop.
+    // Instructions are never considered invariant in the function body
+    // (null loop) because they are defined within the "loop".
+    if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))
+      return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;
+    return LoopInvariant;
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+    return LoopVariant;
+  default: break;
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+  return LoopVariant;
+}
+
+bool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) {
+  return getLoopDisposition(S, L) == LoopInvariant;
+}
+
+bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) {
+  return getLoopDisposition(S, L) == LoopComputable;
+}
+
+ScalarEvolution::BlockDisposition
+ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+  std::map<const BasicBlock *, BlockDisposition> &Values = BlockDispositions[S];
+  std::pair<std::map<const BasicBlock *, BlockDisposition>::iterator, bool>
+    Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  BlockDisposition D = computeBlockDisposition(S, BB);
+  return BlockDispositions[S][BB] = D;
+}
+
+ScalarEvolution::BlockDisposition
+ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+  switch (S->getSCEVType()) {
+  case scConstant:
+    return ProperlyDominatesBlock;
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend:
+    return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB);
+  case scAddRecExpr: {
+    // This uses a "dominates" query instead of "properly dominates" query
+    // to test for proper dominance too, because the instruction which
+    // produces the addrec's value is a PHI, and a PHI effectively properly
+    // dominates its entire containing block.
+    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
+    if (!DT->dominates(AR->getLoop()->getHeader(), BB))
+      return DoesNotDominateBlock;
+  }
+  // FALL THROUGH into SCEVNAryExpr handling.
+  case scAddExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr: {
+    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+    bool Proper = true;
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+         I != E; ++I) {
+      BlockDisposition D = getBlockDisposition(*I, BB);
+      if (D == DoesNotDominateBlock)
+        return DoesNotDominateBlock;
+      if (D == DominatesBlock)
+        Proper = false;
+    }
+    return Proper ? ProperlyDominatesBlock : DominatesBlock;
+  }
+  case scUDivExpr: {
+    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+    const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
+    BlockDisposition LD = getBlockDisposition(LHS, BB);
+    if (LD == DoesNotDominateBlock)
+      return DoesNotDominateBlock;
+    BlockDisposition RD = getBlockDisposition(RHS, BB);
+    if (RD == DoesNotDominateBlock)
+      return DoesNotDominateBlock;
+    return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ?
+      ProperlyDominatesBlock : DominatesBlock;
+  }
+  case scUnknown:
+    if (Instruction *I =
+          dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {
+      if (I->getParent() == BB)
+        return DominatesBlock;
+      if (DT->properlyDominates(I->getParent(), BB))
+        return ProperlyDominatesBlock;
+      return DoesNotDominateBlock;
+    }
+    return ProperlyDominatesBlock;
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+    return DoesNotDominateBlock;
+  default: break;
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+  return DoesNotDominateBlock;
+}
+
+bool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) {
+  return getBlockDisposition(S, BB) >= DominatesBlock;
+}
+
+bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
+  return getBlockDisposition(S, BB) == ProperlyDominatesBlock;
+}
+
+bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
+  switch (S->getSCEVType()) {
+  case scConstant:
+    return false;
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend: {
+    const SCEVCastExpr *Cast = cast<SCEVCastExpr>(S);
+    const SCEV *CastOp = Cast->getOperand();
+    return Op == CastOp || hasOperand(CastOp, Op);
+  }
+  case scAddRecExpr:
+  case scAddExpr:
+  case scMulExpr:
+  case scUMaxExpr:
+  case scSMaxExpr: {
+    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+         I != E; ++I) {
+      const SCEV *NAryOp = *I;
+      if (NAryOp == Op || hasOperand(NAryOp, Op))
+        return true;
+    }
+    return false;
+  }
+  case scUDivExpr: {
+    const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S);
+    const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS();
+    return LHS == Op || hasOperand(LHS, Op) ||
+           RHS == Op || hasOperand(RHS, Op);
+  }
+  case scUnknown:
+    return false;
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+    return false;
+  default: break;
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+  return false;
+}
+
+void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
+  ValuesAtScopes.erase(S);
+  LoopDispositions.erase(S);
+  BlockDispositions.erase(S);
+  UnsignedRanges.erase(S);
+  SignedRanges.erase(S);
+}
diff --git a/final/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/final/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
new file mode 100644
index 00000000000..e9edb3e083d
--- /dev/null
+++ b/final/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -0,0 +1,173 @@
+//===- ScalarEvolutionAliasAnalysis.cpp - SCEV-based Alias Analysis -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ScalarEvolutionAliasAnalysis pass, which implements a
+// simple alias analysis implemented in terms of ScalarEvolution queries.
+//
+// This differs from traditional loop dependence analysis in that it tests
+// for dependencies within a single iteration of a loop, rather than
+// dependencies between different iterations.
+//
+// ScalarEvolution has a more complete understanding of pointer arithmetic
+// than BasicAliasAnalysis' collection of ad-hoc analyses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+  /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis
+  /// implementation that uses ScalarEvolution to answer queries.
+  class ScalarEvolutionAliasAnalysis : public FunctionPass,
+                                       public AliasAnalysis {
+    ScalarEvolution *SE;
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(0) {
+      initializeScalarEvolutionAliasAnalysisPass(
+        *PassRegistry::getPassRegistry());
+    }
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(AnalysisID PI) {
+      if (PI == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+
+  private:
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool runOnFunction(Function &F);
+    virtual AliasResult alias(const Location &LocA, const Location &LocB);
+
+    Value *GetBaseValue(const SCEV *S);
+  };
+}  // End of anonymous namespace
+
+// Register this pass...
+char ScalarEvolutionAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS_BEGIN(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
+                   "ScalarEvolution-based Alias Analysis", false, true, false)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_AG_PASS_END(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa",
+                    "ScalarEvolution-based Alias Analysis", false, true, false)
+
+FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() {
+  return new ScalarEvolutionAliasAnalysis();
+}
+
+void
+ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequiredTransitive<ScalarEvolution>();
+  AU.setPreservesAll();
+  AliasAnalysis::getAnalysisUsage(AU);
+}
+
+bool
+ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) {
+  InitializeAliasAnalysis(this);
+  SE = &getAnalysis<ScalarEvolution>();
+  return false;
+}
+
+/// GetBaseValue - Given an expression, try to find a
+/// base value. Return null is none was found.
+Value *
+ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // In an addrec, assume that the base will be in the start, rather
+    // than the step.
+    return GetBaseValue(AR->getStart());
+  } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+    // If there's a pointer operand, it'll be sorted at the end of the list.
+    const SCEV *Last = A->getOperand(A->getNumOperands()-1);
+    if (Last->getType()->isPointerTy())
+      return GetBaseValue(Last);
+  } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    // This is a leaf node.
+    return U->getValue();
+  }
+  // No Identified object found.
+  return 0;
+}
+
+AliasAnalysis::AliasResult
+ScalarEvolutionAliasAnalysis::alias(const Location &LocA,
+                                    const Location &LocB) {
+  // If either of the memory references is empty, it doesn't matter what the
+  // pointer values are. This allows the code below to ignore this special
+  // case.
+  if (LocA.Size == 0 || LocB.Size == 0)
+    return NoAlias;
+
+  // This is ScalarEvolutionAliasAnalysis. Get the SCEVs!
+  const SCEV *AS = SE->getSCEV(const_cast<Value *>(LocA.Ptr));
+  const SCEV *BS = SE->getSCEV(const_cast<Value *>(LocB.Ptr));
+
+  // If they evaluate to the same expression, it's a MustAlias.
+  if (AS == BS) return MustAlias;
+
+  // If something is known about the difference between the two addresses,
+  // see if it's enough to prove a NoAlias.
+  if (SE->getEffectiveSCEVType(AS->getType()) ==
+      SE->getEffectiveSCEVType(BS->getType())) {
+    unsigned BitWidth = SE->getTypeSizeInBits(AS->getType());
+    APInt ASizeInt(BitWidth, LocA.Size);
+    APInt BSizeInt(BitWidth, LocB.Size);
+
+    // Compute the difference between the two pointers.
+    const SCEV *BA = SE->getMinusSCEV(BS, AS);
+
+    // Test whether the difference is known to be great enough that memory of
+    // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
+    // are non-zero, which is special-cased above.
+    if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) &&
+        (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax()))
+      return NoAlias;
+
+    // Folding the subtraction while preserving range information can be tricky
+    // (because of INT_MIN, etc.); if the prior test failed, swap AS and BS
+    // and try again to see if things fold better that way.
+
+    // Compute the difference between the two pointers.
+    const SCEV *AB = SE->getMinusSCEV(AS, BS);
+
+    // Test whether the difference is known to be great enough that memory of
+    // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
+    // are non-zero, which is special-cased above.
+    if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) &&
+        (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax()))
+      return NoAlias;
+  }
+
+  // If ScalarEvolution can find an underlying object, form a new query.
+  // The correctness of this depends on ScalarEvolution not recognizing
+  // inttoptr and ptrtoint operators.
+  Value *AO = GetBaseValue(AS);
+  Value *BO = GetBaseValue(BS);
+  if ((AO && AO != LocA.Ptr) || (BO && BO != LocB.Ptr))
+    if (alias(Location(AO ? AO : LocA.Ptr,
+                       AO ? +UnknownSize : LocA.Size,
+                       AO ? 0 : LocA.TBAATag),
+              Location(BO ? BO : LocB.Ptr,
+                       BO ? +UnknownSize : LocB.Size,
+                       BO ? 0 : LocB.TBAATag)) == NoAlias)
+      return NoAlias;
+
+  // Forward the query to the next analysis.
+  return AliasAnalysis::alias(LocA, LocB);
+}
diff --git a/final/lib/Analysis/ScalarEvolutionExpander.cpp b/final/lib/Analysis/ScalarEvolutionExpander.cpp
new file mode 100644
index 00000000000..76a94ea2746
--- /dev/null
+++ b/final/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -0,0 +1,1376 @@
+//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the scalar evolution expander,
+// which is used to generate the code corresponding to a given scalar evolution
+// expression.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
+/// reusing an existing cast if a suitable one exists, moving an existing
+/// cast if a suitable one exists but isn't in the right place, or
+/// creating a new one.
+Value *SCEVExpander::ReuseOrCreateCast(Value *V, const Type *Ty,
+                                       Instruction::CastOps Op,
+                                       BasicBlock::iterator IP) {
+  // Check to see if there is already a cast!
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    User *U = *UI;
+    if (U->getType() == Ty)
+      if (CastInst *CI = dyn_cast<CastInst>(U))
+        if (CI->getOpcode() == Op) {
+          // If the cast isn't where we want it, fix it.
+          if (BasicBlock::iterator(CI) != IP) {
+            // Create a new cast, and leave the old cast in place in case
+            // it is being used as an insert point. Clear its operand
+            // so that it doesn't hold anything live.
+            Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP);
+            NewCI->takeName(CI);
+            CI->replaceAllUsesWith(NewCI);
+            CI->setOperand(0, UndefValue::get(V->getType()));
+            rememberInstruction(NewCI);
+            return NewCI;
+          }
+          rememberInstruction(CI);
+          return CI;
+        }
+  }
+
+  // Create a new cast.
+  Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP);
+  rememberInstruction(I);
+  return I;
+}
+
+/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
+/// which must be possible with a noop cast, doing what we can to share
+/// the casts.
+Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
+  Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
+  assert((Op == Instruction::BitCast ||
+          Op == Instruction::PtrToInt ||
+          Op == Instruction::IntToPtr) &&
+         "InsertNoopCastOfTo cannot perform non-noop casts!");
+  assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
+         "InsertNoopCastOfTo cannot change sizes!");
+
+  // Short-circuit unnecessary bitcasts.
+  if (Op == Instruction::BitCast && V->getType() == Ty)
+    return V;
+
+  // Short-circuit unnecessary inttoptr<->ptrtoint casts.
+  if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
+      SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
+    if (CastInst *CI = dyn_cast<CastInst>(V))
+      if ((CI->getOpcode() == Instruction::PtrToInt ||
+           CI->getOpcode() == Instruction::IntToPtr) &&
+          SE.getTypeSizeInBits(CI->getType()) ==
+          SE.getTypeSizeInBits(CI->getOperand(0)->getType()))
+        return CI->getOperand(0);
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      if ((CE->getOpcode() == Instruction::PtrToInt ||
+           CE->getOpcode() == Instruction::IntToPtr) &&
+          SE.getTypeSizeInBits(CE->getType()) ==
+          SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
+        return CE->getOperand(0);
+  }
+
+  // Fold a cast of a constant.
+  if (Constant *C = dyn_cast<Constant>(V))
+    return ConstantExpr::getCast(Op, C, Ty);
+
+  // Cast the argument at the beginning of the entry block, after
+  // any bitcasts of other arguments.
+  if (Argument *A = dyn_cast<Argument>(V)) {
+    BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
+    while ((isa<BitCastInst>(IP) &&
+            isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
+            cast<BitCastInst>(IP)->getOperand(0) != A) ||
+           isa<DbgInfoIntrinsic>(IP))
+      ++IP;
+    return ReuseOrCreateCast(A, Ty, Op, IP);
+  }
+
+  // Cast the instruction immediately after the instruction.
+  Instruction *I = cast<Instruction>(V);
+  BasicBlock::iterator IP = I; ++IP;
+  if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+    IP = II->getNormalDest()->begin();
+  while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP)) ++IP;
+  return ReuseOrCreateCast(I, Ty, Op, IP);
+}
+
+/// InsertBinop - Insert the specified binary operator, doing a small amount
+/// of work to avoid inserting an obviously redundant operation.
+Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
+                                 Value *LHS, Value *RHS) {
+  // Fold a binop with constant operands.
+  if (Constant *CLHS = dyn_cast<Constant>(LHS))
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      return ConstantExpr::get(Opcode, CLHS, CRHS);
+
+  // Do a quick scan to see if we have this binop nearby.  If so, reuse it.
+  unsigned ScanLimit = 6;
+  BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+  // Scanning starts from the last instruction before the insertion point.
+  BasicBlock::iterator IP = Builder.GetInsertPoint();
+  if (IP != BlockBegin) {
+    --IP;
+    for (; ScanLimit; --IP, --ScanLimit) {
+      // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+      // generated code.
+      if (isa<DbgInfoIntrinsic>(IP))
+        ScanLimit++;
+      if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
+          IP->getOperand(1) == RHS)
+        return IP;
+      if (IP == BlockBegin) break;
+    }
+  }
+
+  // Save the original insertion point so we can restore it when we're done.
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+  // Move the insertion point out of as many loops as we can.
+  while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+    if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
+    BasicBlock *Preheader = L->getLoopPreheader();
+    if (!Preheader) break;
+
+    // Ok, move up a level.
+    Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+  }
+
+  // If we haven't found this binop, insert it.
+  Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp");
+  rememberInstruction(BO);
+
+  // Restore the original insert point.
+  if (SaveInsertBB)
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+  return BO;
+}
+
+/// FactorOutConstant - Test if S is divisible by Factor, using signed
+/// division. If so, update S with Factor divided out and return true.
+/// S need not be evenly divisible if a reasonable remainder can be
+/// computed.
+/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
+/// unnecessary; in its place, just signed-divide Ops[i] by the scale and
+/// check to see if the divide was folded.
+static bool FactorOutConstant(const SCEV *&S,
+                              const SCEV *&Remainder,
+                              const SCEV *Factor,
+                              ScalarEvolution &SE,
+                              const TargetData *TD) {
+  // Everything is divisible by one.
+  if (Factor->isOne())
+    return true;
+
+  // x/x == 1.
+  if (S == Factor) {
+    S = SE.getConstant(S->getType(), 1);
+    return true;
+  }
+
+  // For a Constant, check for a multiple of the given factor.
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+    // 0/x == 0.
+    if (C->isZero())
+      return true;
+    // Check for divisibility.
+    if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
+      ConstantInt *CI =
+        ConstantInt::get(SE.getContext(),
+                         C->getValue()->getValue().sdiv(
+                                                   FC->getValue()->getValue()));
+      // If the quotient is zero and the remainder is non-zero, reject
+      // the value at this scale. It will be considered for subsequent
+      // smaller scales.
+      if (!CI->isZero()) {
+        const SCEV *Div = SE.getConstant(CI);
+        S = Div;
+        Remainder =
+          SE.getAddExpr(Remainder,
+                        SE.getConstant(C->getValue()->getValue().srem(
+                                                  FC->getValue()->getValue())));
+        return true;
+      }
+    }
+  }
+
+  // In a Mul, check if there is a constant operand which is a multiple
+  // of the given factor.
+  if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
+    if (TD) {
+      // With TargetData, the size is known. Check if there is a constant
+      // operand which is a multiple of the given factor. If so, we can
+      // factor it.
+      const SCEVConstant *FC = cast<SCEVConstant>(Factor);
+      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
+        if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
+          SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+          NewMulOps[0] =
+            SE.getConstant(C->getValue()->getValue().sdiv(
+                                                   FC->getValue()->getValue()));
+          S = SE.getMulExpr(NewMulOps);
+          return true;
+        }
+    } else {
+      // Without TargetData, check if Factor can be factored out of any of the
+      // Mul's operands. If so, we can just remove it.
+      for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+        const SCEV *SOp = M->getOperand(i);
+        const SCEV *Remainder = SE.getConstant(SOp->getType(), 0);
+        if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) &&
+            Remainder->isZero()) {
+          SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
+          NewMulOps[i] = SOp;
+          S = SE.getMulExpr(NewMulOps);
+          return true;
+        }
+      }
+    }
+  }
+
+  // In an AddRec, check if both start and step are divisible.
+  if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
+    const SCEV *Step = A->getStepRecurrence(SE);
+    const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
+    if (!FactorOutConstant(Step, StepRem, Factor, SE, TD))
+      return false;
+    if (!StepRem->isZero())
+      return false;
+    const SCEV *Start = A->getStart();
+    if (!FactorOutConstant(Start, Remainder, Factor, SE, TD))
+      return false;
+    S = SE.getAddRecExpr(Start, Step, A->getLoop());
+    return true;
+  }
+
+  return false;
+}
+
+/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs
+/// is the number of SCEVAddRecExprs present, which are kept at the end of
+/// the list.
+///
+static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
+                                const Type *Ty,
+                                ScalarEvolution &SE) {
+  unsigned NumAddRecs = 0;
+  for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
+    ++NumAddRecs;
+  // Group Ops into non-addrecs and addrecs.
+  SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs);
+  SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
+  // Let ScalarEvolution sort and simplify the non-addrecs list.
+  const SCEV *Sum = NoAddRecs.empty() ?
+                    SE.getConstant(Ty, 0) :
+                    SE.getAddExpr(NoAddRecs);
+  // If it returned an add, use the operands. Otherwise it simplified
+  // the sum into a single value, so just use that.
+  Ops.clear();
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
+    Ops.append(Add->op_begin(), Add->op_end());
+  else if (!Sum->isZero())
+    Ops.push_back(Sum);
+  // Then append the addrecs.
+  Ops.append(AddRecs.begin(), AddRecs.end());
+}
+
+/// SplitAddRecs - Flatten a list of add operands, moving addrec start values
+/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}.
+/// This helps expose more opportunities for folding parts of the expressions
+/// into GEP indices.
+///
+static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
+                         const Type *Ty,
+                         ScalarEvolution &SE) {
+  // Find the addrecs.
+  SmallVector<const SCEV *, 8> AddRecs;
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
+      const SCEV *Start = A->getStart();
+      if (Start->isZero()) break;
+      const SCEV *Zero = SE.getConstant(Ty, 0);
+      AddRecs.push_back(SE.getAddRecExpr(Zero,
+                                         A->getStepRecurrence(SE),
+                                         A->getLoop()));
+      if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
+        Ops[i] = Zero;
+        Ops.append(Add->op_begin(), Add->op_end());
+        e += Add->getNumOperands();
+      } else {
+        Ops[i] = Start;
+      }
+    }
+  if (!AddRecs.empty()) {
+    // Add the addrecs onto the end of the list.
+    Ops.append(AddRecs.begin(), AddRecs.end());
+    // Resort the operand list, moving any constants to the front.
+    SimplifyAddOperands(Ops, Ty, SE);
+  }
+}
+
+/// expandAddToGEP - Expand an addition expression with a pointer type into
+/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
+/// BasicAliasAnalysis and other passes analyze the result. See the rules
+/// for getelementptr vs. inttoptr in
+/// http://llvm.org/docs/LangRef.html#pointeraliasing
+/// for details.
+///
+/// Design note: The correctness of using getelementptr here depends on
+/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
+/// they may introduce pointer arithmetic which may not be safely converted
+/// into getelementptr.
+///
+/// Design note: It might seem desirable for this function to be more
+/// loop-aware. If some of the indices are loop-invariant while others
+/// aren't, it might seem desirable to emit multiple GEPs, keeping the
+/// loop-invariant portions of the overall computation outside the loop.
+/// However, there are a few reasons this is not done here. Hoisting simple
+/// arithmetic is a low-level optimization that often isn't very
+/// important until late in the optimization process. In fact, passes
+/// like InstructionCombining will combine GEPs, even if it means
+/// pushing loop-invariant computation down into loops, so even if the
+/// GEPs were split here, the work would quickly be undone. The
+/// LoopStrengthReduction pass, which is usually run quite late (and
+/// after the last InstructionCombining pass), takes care of hoisting
+/// loop-invariant portions of expressions, after considering what
+/// can be folded using target addressing modes.
+///
+Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
+                                    const SCEV *const *op_end,
+                                    const PointerType *PTy,
+                                    const Type *Ty,
+                                    Value *V) {
+  const Type *ElTy = PTy->getElementType();
+  SmallVector<Value *, 4> GepIndices;
+  SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
+  bool AnyNonZeroIndices = false;
+
+  // Split AddRecs up into parts as either of the parts may be usable
+  // without the other.
+  SplitAddRecs(Ops, Ty, SE);
+
+  // Descend down the pointer's type and attempt to convert the other
+  // operands into GEP indices, at each level. The first index in a GEP
+  // indexes into the array implied by the pointer operand; the rest of
+  // the indices index into the element or field type selected by the
+  // preceding index.
+  for (;;) {
+    // If the scale size is not 0, attempt to factor out a scale for
+    // array indexing.
+    SmallVector<const SCEV *, 8> ScaledOps;
+    if (ElTy->isSized()) {
+      const SCEV *ElSize = SE.getSizeOfExpr(ElTy);
+      if (!ElSize->isZero()) {
+        SmallVector<const SCEV *, 8> NewOps;
+        for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+          const SCEV *Op = Ops[i];
+          const SCEV *Remainder = SE.getConstant(Ty, 0);
+          if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) {
+            // Op now has ElSize factored out.
+            ScaledOps.push_back(Op);
+            if (!Remainder->isZero())
+              NewOps.push_back(Remainder);
+            AnyNonZeroIndices = true;
+          } else {
+            // The operand was not divisible, so add it to the list of operands
+            // we'll scan next iteration.
+            NewOps.push_back(Ops[i]);
+          }
+        }
+        // If we made any changes, update Ops.
+        if (!ScaledOps.empty()) {
+          Ops = NewOps;
+          SimplifyAddOperands(Ops, Ty, SE);
+        }
+      }
+    }
+
+    // Record the scaled array index for this level of the type. If
+    // we didn't find any operands that could be factored, tentatively
+    // assume that element zero was selected (since the zero offset
+    // would obviously be folded away).
+    Value *Scaled = ScaledOps.empty() ?
+                    Constant::getNullValue(Ty) :
+                    expandCodeFor(SE.getAddExpr(ScaledOps), Ty);
+    GepIndices.push_back(Scaled);
+
+    // Collect struct field index operands.
+    while (const StructType *STy = dyn_cast<StructType>(ElTy)) {
+      bool FoundFieldNo = false;
+      // An empty struct has no fields.
+      if (STy->getNumElements() == 0) break;
+      if (SE.TD) {
+        // With TargetData, field offsets are known. See if a constant offset
+        // falls within any of the struct fields.
+        if (Ops.empty()) break;
+        if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
+          if (SE.getTypeSizeInBits(C->getType()) <= 64) {
+            const StructLayout &SL = *SE.TD->getStructLayout(STy);
+            uint64_t FullOffset = C->getValue()->getZExtValue();
+            if (FullOffset < SL.getSizeInBytes()) {
+              unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
+              GepIndices.push_back(
+                  ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
+              ElTy = STy->getTypeAtIndex(ElIdx);
+              Ops[0] =
+                SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
+              AnyNonZeroIndices = true;
+              FoundFieldNo = true;
+            }
+          }
+      } else {
+        // Without TargetData, just check for an offsetof expression of the
+        // appropriate struct type.
+        for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+          if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) {
+            const Type *CTy;
+            Constant *FieldNo;
+            if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) {
+              GepIndices.push_back(FieldNo);
+              ElTy =
+                STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue());
+              Ops[i] = SE.getConstant(Ty, 0);
+              AnyNonZeroIndices = true;
+              FoundFieldNo = true;
+              break;
+            }
+          }
+      }
+      // If no struct field offsets were found, tentatively assume that
+      // field zero was selected (since the zero offset would obviously
+      // be folded away).
+      if (!FoundFieldNo) {
+        ElTy = STy->getTypeAtIndex(0u);
+        GepIndices.push_back(
+          Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
+      }
+    }
+
+    if (const ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
+      ElTy = ATy->getElementType();
+    else
+      break;
+  }
+
+  // If none of the operands were convertible to proper GEP indices, cast
+  // the base to i8* and do an ugly getelementptr with that. It's still
+  // better than ptrtoint+arithmetic+inttoptr at least.
+  if (!AnyNonZeroIndices) {
+    // Cast the base to i8*.
+    V = InsertNoopCastOfTo(V,
+       Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
+
+    // Expand the operands for a plain byte offset.
+    Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
+
+    // Fold a GEP with constant operands.
+    if (Constant *CLHS = dyn_cast<Constant>(V))
+      if (Constant *CRHS = dyn_cast<Constant>(Idx))
+        return ConstantExpr::getGetElementPtr(CLHS, &CRHS, 1);
+
+    // Do a quick scan to see if we have this GEP nearby.  If so, reuse it.
+    unsigned ScanLimit = 6;
+    BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+    // Scanning starts from the last instruction before the insertion point.
+    BasicBlock::iterator IP = Builder.GetInsertPoint();
+    if (IP != BlockBegin) {
+      --IP;
+      for (; ScanLimit; --IP, --ScanLimit) {
+        // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+        // generated code.
+        if (isa<DbgInfoIntrinsic>(IP))
+          ScanLimit++;
+        if (IP->getOpcode() == Instruction::GetElementPtr &&
+            IP->getOperand(0) == V && IP->getOperand(1) == Idx)
+          return IP;
+        if (IP == BlockBegin) break;
+      }
+    }
+
+    // Save the original insertion point so we can restore it when we're done.
+    BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+    BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+    // Move the insertion point out of as many loops as we can.
+    while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+      if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
+      BasicBlock *Preheader = L->getLoopPreheader();
+      if (!Preheader) break;
+
+      // Ok, move up a level.
+      Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+    }
+
+    // Emit a GEP.
+    Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
+    rememberInstruction(GEP);
+
+    // Restore the original insert point.
+    if (SaveInsertBB)
+      restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+    return GEP;
+  }
+
+  // Save the original insertion point so we can restore it when we're done.
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+  // Move the insertion point out of as many loops as we can.
+  while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+    if (!L->isLoopInvariant(V)) break;
+
+    bool AnyIndexNotLoopInvariant = false;
+    for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(),
+         E = GepIndices.end(); I != E; ++I)
+      if (!L->isLoopInvariant(*I)) {
+        AnyIndexNotLoopInvariant = true;
+        break;
+      }
+    if (AnyIndexNotLoopInvariant)
+      break;
+
+    BasicBlock *Preheader = L->getLoopPreheader();
+    if (!Preheader) break;
+
+    // Ok, move up a level.
+    Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+  }
+
+  // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
+  // because ScalarEvolution may have changed the address arithmetic to
+  // compute a value which is beyond the end of the allocated object.
+  Value *Casted = V;
+  if (V->getType() != PTy)
+    Casted = InsertNoopCastOfTo(Casted, PTy);
+  Value *GEP = Builder.CreateGEP(Casted,
+                                 GepIndices.begin(),
+                                 GepIndices.end(),
+                                 "scevgep");
+  Ops.push_back(SE.getUnknown(GEP));
+  rememberInstruction(GEP);
+
+  // Restore the original insert point.
+  if (SaveInsertBB)
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+  return expand(SE.getAddExpr(Ops));
+}
+
+/// isNonConstantNegative - Return true if the specified scev is negated, but
+/// not a constant.
+static bool isNonConstantNegative(const SCEV *F) {
+  const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(F);
+  if (!Mul) return false;
+
+  // If there is a constant factor, it will be first.
+  const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
+  if (!SC) return false;
+
+  // Return true if the value is negative, this matches things like (-42 * V).
+  return SC->getValue()->getValue().isNegative();
+}
+
+/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
+/// SCEV expansion. If they are nested, this is the most nested. If they are
+/// neighboring, pick the later.
+static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
+                                        DominatorTree &DT) {
+  if (!A) return B;
+  if (!B) return A;
+  if (A->contains(B)) return B;
+  if (B->contains(A)) return A;
+  if (DT.dominates(A->getHeader(), B->getHeader())) return B;
+  if (DT.dominates(B->getHeader(), A->getHeader())) return A;
+  return A; // Arbitrarily break the tie.
+}
+
+/// getRelevantLoop - Get the most relevant loop associated with the given
+/// expression, according to PickMostRelevantLoop.
+const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
+  // Test whether we've already computed the most relevant loop for this SCEV.
+  std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair =
+    RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0)));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  if (isa<SCEVConstant>(S))
+    // A constant has no relevant loops.
+    return 0;
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
+      return Pair.first->second = SE.LI->getLoopFor(I->getParent());
+    // A non-instruction has no relevant loops.
+    return 0;
+  }
+  if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
+    const Loop *L = 0;
+    if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+      L = AR->getLoop();
+    for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
+         I != E; ++I)
+      L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT);
+    return RelevantLoops[N] = L;
+  }
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) {
+    const Loop *Result = getRelevantLoop(C->getOperand());
+    return RelevantLoops[C] = Result;
+  }
+  if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+    const Loop *Result =
+      PickMostRelevantLoop(getRelevantLoop(D->getLHS()),
+                           getRelevantLoop(D->getRHS()),
+                           *SE.DT);
+    return RelevantLoops[D] = Result;
+  }
+  llvm_unreachable("Unexpected SCEV type!");
+  return 0;
+}
+
+namespace {
+
+/// LoopCompare - Compare loops by PickMostRelevantLoop.
+class LoopCompare {
+  DominatorTree &DT;
+public:
+  explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
+
+  bool operator()(std::pair<const Loop *, const SCEV *> LHS,
+                  std::pair<const Loop *, const SCEV *> RHS) const {
+    // Keep pointer operands sorted at the end.
+    if (LHS.second->getType()->isPointerTy() !=
+        RHS.second->getType()->isPointerTy())
+      return LHS.second->getType()->isPointerTy();
+
+    // Compare loops with PickMostRelevantLoop.
+    if (LHS.first != RHS.first)
+      return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
+
+    // If one operand is a non-constant negative and the other is not,
+    // put the non-constant negative on the right so that a sub can
+    // be used instead of a negate and add.
+    if (isNonConstantNegative(LHS.second)) {
+      if (!isNonConstantNegative(RHS.second))
+        return false;
+    } else if (isNonConstantNegative(RHS.second))
+      return true;
+
+    // Otherwise they are equivalent according to this comparison.
+    return false;
+  }
+};
+
+}
+
+Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
+  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+  // Collect all the add operands in a loop, along with their associated loops.
+  // Iterate in reverse so that constants are emitted last, all else equal, and
+  // so that pointer operands are inserted first, which the code below relies on
+  // to form more involved GEPs.
+  SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+  for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
+       E(S->op_begin()); I != E; ++I)
+    OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+
+  // Sort by loop. Use a stable sort so that constants follow non-constants and
+  // pointer operands precede non-pointer operands.
+  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+
+  // Emit instructions to add all the operands. Hoist as much as possible
+  // out of loops, and form meaningful getelementptrs where possible.
+  Value *Sum = 0;
+  for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
+       I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+    const Loop *CurLoop = I->first;
+    const SCEV *Op = I->second;
+    if (!Sum) {
+      // This is the first operand. Just expand it.
+      Sum = expand(Op);
+      ++I;
+    } else if (const PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
+      // The running sum expression is a pointer. Try to form a getelementptr
+      // at this level with that as the base.
+      SmallVector<const SCEV *, 4> NewOps;
+      for (; I != E && I->first == CurLoop; ++I) {
+        // If the operand is SCEVUnknown and not instructions, peek through
+        // it, to enable more of it to be folded into the GEP.
+        const SCEV *X = I->second;
+        if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
+          if (!isa<Instruction>(U->getValue()))
+            X = SE.getSCEV(U->getValue());
+        NewOps.push_back(X);
+      }
+      Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
+    } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
+      // The running sum is an integer, and there's a pointer at this level.
+      // Try to form a getelementptr. If the running sum is instructions,
+      // use a SCEVUnknown to avoid re-analyzing them.
+      SmallVector<const SCEV *, 4> NewOps;
+      NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) :
+                                               SE.getSCEV(Sum));
+      for (++I; I != E && I->first == CurLoop; ++I)
+        NewOps.push_back(I->second);
+      Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
+    } else if (isNonConstantNegative(Op)) {
+      // Instead of doing a negate and add, just do a subtract.
+      Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
+      Sum = InsertNoopCastOfTo(Sum, Ty);
+      Sum = InsertBinop(Instruction::Sub, Sum, W);
+      ++I;
+    } else {
+      // A simple add.
+      Value *W = expandCodeFor(Op, Ty);
+      Sum = InsertNoopCastOfTo(Sum, Ty);
+      // Canonicalize a constant to the RHS.
+      if (isa<Constant>(Sum)) std::swap(Sum, W);
+      Sum = InsertBinop(Instruction::Add, Sum, W);
+      ++I;
+    }
+  }
+
+  return Sum;
+}
+
+Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
+  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+  // Collect all the mul operands in a loop, along with their associated loops.
+  // Iterate in reverse so that constants are emitted last, all else equal.
+  SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+  for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
+       E(S->op_begin()); I != E; ++I)
+    OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
+
+  // Sort by loop. Use a stable sort so that constants follow non-constants.
+  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+
+  // Emit instructions to mul all the operands. Hoist as much as possible
+  // out of loops.
+  Value *Prod = 0;
+  for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
+       I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+    const SCEV *Op = I->second;
+    if (!Prod) {
+      // This is the first operand. Just expand it.
+      Prod = expand(Op);
+      ++I;
+    } else if (Op->isAllOnesValue()) {
+      // Instead of doing a multiply by negative one, just do a negate.
+      Prod = InsertNoopCastOfTo(Prod, Ty);
+      Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
+      ++I;
+    } else {
+      // A simple mul.
+      Value *W = expandCodeFor(Op, Ty);
+      Prod = InsertNoopCastOfTo(Prod, Ty);
+      // Canonicalize a constant to the RHS.
+      if (isa<Constant>(Prod)) std::swap(Prod, W);
+      Prod = InsertBinop(Instruction::Mul, Prod, W);
+      ++I;
+    }
+  }
+
+  return Prod;
+}
+
+Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
+  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+
+  Value *LHS = expandCodeFor(S->getLHS(), Ty);
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
+    const APInt &RHS = SC->getValue()->getValue();
+    if (RHS.isPowerOf2())
+      return InsertBinop(Instruction::LShr, LHS,
+                         ConstantInt::get(Ty, RHS.logBase2()));
+  }
+
+  Value *RHS = expandCodeFor(S->getRHS(), Ty);
+  return InsertBinop(Instruction::UDiv, LHS, RHS);
+}
+
+/// Move parts of Base into Rest to leave Base with the minimal
+/// expression that provides a pointer operand suitable for a
+/// GEP expansion.
+static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
+                              ScalarEvolution &SE) {
+  while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
+    Base = A->getStart();
+    Rest = SE.getAddExpr(Rest,
+                         SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
+                                          A->getStepRecurrence(SE),
+                                          A->getLoop()));
+  }
+  if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
+    Base = A->getOperand(A->getNumOperands()-1);
+    SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end());
+    NewAddOps.back() = Rest;
+    Rest = SE.getAddExpr(NewAddOps);
+    ExposePointerBase(Base, Rest, SE);
+  }
+}
+
+/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
+/// the base addrec, which is the addrec without any non-loop-dominating
+/// values, and return the PHI.
+PHINode *
+SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
+                                        const Loop *L,
+                                        const Type *ExpandTy,
+                                        const Type *IntTy) {
+  // Reuse a previously-inserted PHI, if present.
+  for (BasicBlock::iterator I = L->getHeader()->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    if (SE.isSCEVable(PN->getType()) &&
+        (SE.getEffectiveSCEVType(PN->getType()) ==
+         SE.getEffectiveSCEVType(Normalized->getType())) &&
+        SE.getSCEV(PN) == Normalized)
+      if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+        Instruction *IncV =
+          cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+
+        // Determine if this is a well-behaved chain of instructions leading
+        // back to the PHI. It probably will be, if we're scanning an inner
+        // loop already visited by LSR for example, but it wouldn't have
+        // to be.
+        do {
+          if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
+              (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) {
+            IncV = 0;
+            break;
+          }
+          // If any of the operands don't dominate the insert position, bail.
+          // Addrec operands are always loop-invariant, so this can only happen
+          // if there are instructions which haven't been hoisted.
+          for (User::op_iterator OI = IncV->op_begin()+1,
+               OE = IncV->op_end(); OI != OE; ++OI)
+            if (Instruction *OInst = dyn_cast<Instruction>(OI))
+              if (!SE.DT->dominates(OInst, IVIncInsertPos)) {
+                IncV = 0;
+                break;
+              }
+          if (!IncV)
+            break;
+          // Advance to the next instruction.
+          IncV = dyn_cast<Instruction>(IncV->getOperand(0));
+          if (!IncV)
+            break;
+          if (IncV->mayHaveSideEffects()) {
+            IncV = 0;
+            break;
+          }
+        } while (IncV != PN);
+
+        if (IncV) {
+          // Ok, the add recurrence looks usable.
+          // Remember this PHI, even in post-inc mode.
+          InsertedValues.insert(PN);
+          // Remember the increment.
+          IncV = cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
+          rememberInstruction(IncV);
+          if (L == IVIncInsertLoop)
+            do {
+              if (SE.DT->dominates(IncV, IVIncInsertPos))
+                break;
+              // Make sure the increment is where we want it. But don't move it
+              // down past a potential existing post-inc user.
+              IncV->moveBefore(IVIncInsertPos);
+              IVIncInsertPos = IncV;
+              IncV = cast<Instruction>(IncV->getOperand(0));
+            } while (IncV != PN);
+          return PN;
+        }
+      }
+
+  // Save the original insertion point so we can restore it when we're done.
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+  // Expand code for the start value.
+  Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
+                                L->getHeader()->begin());
+
+  // Expand code for the step value. Insert instructions right before the
+  // terminator corresponding to the back-edge. Do this before creating the PHI
+  // so that PHI reuse code doesn't see an incomplete PHI. If the stride is
+  // negative, insert a sub instead of an add for the increment (unless it's a
+  // constant, because subtracts of constants are canonicalized to adds).
+  const SCEV *Step = Normalized->getStepRecurrence(SE);
+  bool isPointer = ExpandTy->isPointerTy();
+  bool isNegative = !isPointer && isNonConstantNegative(Step);
+  if (isNegative)
+    Step = SE.getNegativeSCEV(Step);
+  Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
+
+  // Create the PHI.
+  Builder.SetInsertPoint(L->getHeader(), L->getHeader()->begin());
+  PHINode *PN = Builder.CreatePHI(ExpandTy, "lsr.iv");
+  rememberInstruction(PN);
+
+  // Create the step instructions and populate the PHI.
+  BasicBlock *Header = L->getHeader();
+  for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header);
+       HPI != HPE; ++HPI) {
+    BasicBlock *Pred = *HPI;
+
+    // Add a start value.
+    if (!L->contains(Pred)) {
+      PN->addIncoming(StartV, Pred);
+      continue;
+    }
+
+    // Create a step value and add it to the PHI. If IVIncInsertLoop is
+    // non-null and equal to the addrec's loop, insert the instructions
+    // at IVIncInsertPos.
+    Instruction *InsertPos = L == IVIncInsertLoop ?
+      IVIncInsertPos : Pred->getTerminator();
+    Builder.SetInsertPoint(InsertPos->getParent(), InsertPos);
+    Value *IncV;
+    // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
+    if (isPointer) {
+      const PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
+      // If the step isn't constant, don't use an implicitly scaled GEP, because
+      // that would require a multiply inside the loop.
+      if (!isa<ConstantInt>(StepV))
+        GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
+                                    GEPPtrTy->getAddressSpace());
+      const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
+      IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
+      if (IncV->getType() != PN->getType()) {
+        IncV = Builder.CreateBitCast(IncV, PN->getType(), "tmp");
+        rememberInstruction(IncV);
+      }
+    } else {
+      IncV = isNegative ?
+        Builder.CreateSub(PN, StepV, "lsr.iv.next") :
+        Builder.CreateAdd(PN, StepV, "lsr.iv.next");
+      rememberInstruction(IncV);
+    }
+    PN->addIncoming(IncV, Pred);
+  }
+
+  // Restore the original insert point.
+  if (SaveInsertBB)
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+  // Remember this PHI, even in post-inc mode.
+  InsertedValues.insert(PN);
+
+  return PN;
+}
+
+Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
+  const Type *STy = S->getType();
+  const Type *IntTy = SE.getEffectiveSCEVType(STy);
+  const Loop *L = S->getLoop();
+
+  // Determine a normalized form of this expression, which is the expression
+  // before any post-inc adjustment is made.
+  const SCEVAddRecExpr *Normalized = S;
+  if (PostIncLoops.count(L)) {
+    PostIncLoopSet Loops;
+    Loops.insert(L);
+    Normalized =
+      cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0,
+                                                  Loops, SE, *SE.DT));
+  }
+
+  // Strip off any non-loop-dominating component from the addrec start.
+  const SCEV *Start = Normalized->getStart();
+  const SCEV *PostLoopOffset = 0;
+  if (!SE.properlyDominates(Start, L->getHeader())) {
+    PostLoopOffset = Start;
+    Start = SE.getConstant(Normalized->getType(), 0);
+    Normalized =
+      cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start,
+                                            Normalized->getStepRecurrence(SE),
+                                            Normalized->getLoop()));
+  }
+
+  // Strip off any non-loop-dominating component from the addrec step.
+  const SCEV *Step = Normalized->getStepRecurrence(SE);
+  const SCEV *PostLoopScale = 0;
+  if (!SE.dominates(Step, L->getHeader())) {
+    PostLoopScale = Step;
+    Step = SE.getConstant(Normalized->getType(), 1);
+    Normalized =
+      cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step,
+                                            Normalized->getLoop()));
+  }
+
+  // Expand the core addrec. If we need post-loop scaling, force it to
+  // expand to an integer type to avoid the need for additional casting.
+  const Type *ExpandTy = PostLoopScale ? IntTy : STy;
+  PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy);
+
+  // Accommodate post-inc mode, if necessary.
+  Value *Result;
+  if (!PostIncLoops.count(L))
+    Result = PN;
+  else {
+    // In PostInc mode, use the post-incremented value.
+    BasicBlock *LatchBlock = L->getLoopLatch();
+    assert(LatchBlock && "PostInc mode requires a unique loop latch!");
+    Result = PN->getIncomingValueForBlock(LatchBlock);
+  }
+
+  // Re-apply any non-loop-dominating scale.
+  if (PostLoopScale) {
+    Result = InsertNoopCastOfTo(Result, IntTy);
+    Result = Builder.CreateMul(Result,
+                               expandCodeFor(PostLoopScale, IntTy));
+    rememberInstruction(Result);
+  }
+
+  // Re-apply any non-loop-dominating offset.
+  if (PostLoopOffset) {
+    if (const PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
+      const SCEV *const OffsetArray[1] = { PostLoopOffset };
+      Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result);
+    } else {
+      Result = InsertNoopCastOfTo(Result, IntTy);
+      Result = Builder.CreateAdd(Result,
+                                 expandCodeFor(PostLoopOffset, IntTy));
+      rememberInstruction(Result);
+    }
+  }
+
+  return Result;
+}
+
+Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
+  if (!CanonicalMode) return expandAddRecExprLiterally(S);
+
+  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+  const Loop *L = S->getLoop();
+
+  // First check for an existing canonical IV in a suitable type.
+  PHINode *CanonicalIV = 0;
+  if (PHINode *PN = L->getCanonicalInductionVariable())
+    if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
+      CanonicalIV = PN;
+
+  // Rewrite an AddRec in terms of the canonical induction variable, if
+  // its type is more narrow.
+  if (CanonicalIV &&
+      SE.getTypeSizeInBits(CanonicalIV->getType()) >
+      SE.getTypeSizeInBits(Ty)) {
+    SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
+    for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
+      NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
+    Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop()));
+    BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+    BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+    BasicBlock::iterator NewInsertPt =
+      llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
+    while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt))
+      ++NewInsertPt;
+    V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
+                      NewInsertPt);
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+    return V;
+  }
+
+  // {X,+,F} --> X + {0,+,F}
+  if (!S->getStart()->isZero()) {
+    SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
+    NewOps[0] = SE.getConstant(Ty, 0);
+    const SCEV *Rest = SE.getAddRecExpr(NewOps, L);
+
+    // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
+    // comments on expandAddToGEP for details.
+    const SCEV *Base = S->getStart();
+    const SCEV *RestArray[1] = { Rest };
+    // Dig into the expression to find the pointer base for a GEP.
+    ExposePointerBase(Base, RestArray[0], SE);
+    // If we found a pointer, expand the AddRec with a GEP.
+    if (const PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
+      // Make sure the Base isn't something exotic, such as a multiplied
+      // or divided pointer value. In those cases, the result type isn't
+      // actually a pointer type.
+      if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
+        Value *StartV = expand(Base);
+        assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
+        return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV);
+      }
+    }
+
+    // Just do a normal add. Pre-expand the operands to suppress folding.
+    return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())),
+                                SE.getUnknown(expand(Rest))));
+  }
+
+  // If we don't yet have a canonical IV, create one.
+  if (!CanonicalIV) {
+    // Create and insert the PHI node for the induction variable in the
+    // specified loop.
+    BasicBlock *Header = L->getHeader();
+    CanonicalIV = PHINode::Create(Ty, "indvar", Header->begin());
+    rememberInstruction(CanonicalIV);
+
+    Constant *One = ConstantInt::get(Ty, 1);
+    for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header);
+         HPI != HPE; ++HPI) {
+      BasicBlock *HP = *HPI;
+      if (L->contains(HP)) {
+        // Insert a unit add instruction right before the terminator
+        // corresponding to the back-edge.
+        Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
+                                                     "indvar.next",
+                                                     HP->getTerminator());
+        rememberInstruction(Add);
+        CanonicalIV->addIncoming(Add, HP);
+      } else {
+        CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
+      }
+    }
+  }
+
+  // {0,+,1} --> Insert a canonical induction variable into the loop!
+  if (S->isAffine() && S->getOperand(1)->isOne()) {
+    assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
+           "IVs with types different from the canonical IV should "
+           "already have been handled!");
+    return CanonicalIV;
+  }
+
+  // {0,+,F} --> {0,+,1} * F
+
+  // If this is a simple linear addrec, emit it now as a special case.
+  if (S->isAffine())    // {0,+,F} --> i*F
+    return
+      expand(SE.getTruncateOrNoop(
+        SE.getMulExpr(SE.getUnknown(CanonicalIV),
+                      SE.getNoopOrAnyExtend(S->getOperand(1),
+                                            CanonicalIV->getType())),
+        Ty));
+
+  // If this is a chain of recurrences, turn it into a closed form, using the
+  // folders, then expandCodeFor the closed form.  This allows the folders to
+  // simplify the expression without having to build a bunch of special code
+  // into this folder.
+  const SCEV *IH = SE.getUnknown(CanonicalIV);   // Get I as a "symbolic" SCEV.
+
+  // Promote S up to the canonical IV type, if the cast is foldable.
+  const SCEV *NewS = S;
+  const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
+  if (isa<SCEVAddRecExpr>(Ext))
+    NewS = Ext;
+
+  const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
+  //cerr << "Evaluated: " << *this << "\n     to: " << *V << "\n";
+
+  // Truncate the result down to the original type, if needed.
+  const SCEV *T = SE.getTruncateOrNoop(V, Ty);
+  return expand(T);
+}
+
+Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
+  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+  Value *V = expandCodeFor(S->getOperand(),
+                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
+  Value *I = Builder.CreateTrunc(V, Ty, "tmp");
+  rememberInstruction(I);
+  return I;
+}
+
+Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
+  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+  Value *V = expandCodeFor(S->getOperand(),
+                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
+  Value *I = Builder.CreateZExt(V, Ty, "tmp");
+  rememberInstruction(I);
+  return I;
+}
+
+Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
+  const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+  Value *V = expandCodeFor(S->getOperand(),
+                           SE.getEffectiveSCEVType(S->getOperand()->getType()));
+  Value *I = Builder.CreateSExt(V, Ty, "tmp");
+  rememberInstruction(I);
+  return I;
+}
+
+Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
+  Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+  const Type *Ty = LHS->getType();
+  for (int i = S->getNumOperands()-2; i >= 0; --i) {
+    // In the case of mixed integer and pointer types, do the
+    // rest of the comparisons as integer.
+    if (S->getOperand(i)->getType() != Ty) {
+      Ty = SE.getEffectiveSCEVType(Ty);
+      LHS = InsertNoopCastOfTo(LHS, Ty);
+    }
+    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+    Value *ICmp = Builder.CreateICmpSGT(LHS, RHS, "tmp");
+    rememberInstruction(ICmp);
+    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
+    rememberInstruction(Sel);
+    LHS = Sel;
+  }
+  // In the case of mixed integer and pointer types, cast the
+  // final result back to the pointer type.
+  if (LHS->getType() != S->getType())
+    LHS = InsertNoopCastOfTo(LHS, S->getType());
+  return LHS;
+}
+
+Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
+  Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
+  const Type *Ty = LHS->getType();
+  for (int i = S->getNumOperands()-2; i >= 0; --i) {
+    // In the case of mixed integer and pointer types, do the
+    // rest of the comparisons as integer.
+    if (S->getOperand(i)->getType() != Ty) {
+      Ty = SE.getEffectiveSCEVType(Ty);
+      LHS = InsertNoopCastOfTo(LHS, Ty);
+    }
+    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+    Value *ICmp = Builder.CreateICmpUGT(LHS, RHS, "tmp");
+    rememberInstruction(ICmp);
+    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
+    rememberInstruction(Sel);
+    LHS = Sel;
+  }
+  // In the case of mixed integer and pointer types, cast the
+  // final result back to the pointer type.
+  if (LHS->getType() != S->getType())
+    LHS = InsertNoopCastOfTo(LHS, S->getType());
+  return LHS;
+}
+
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty,
+                                   Instruction *I) {
+  BasicBlock::iterator IP = I;
+  while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP))
+    ++IP;
+  Builder.SetInsertPoint(IP->getParent(), IP);
+  return expandCodeFor(SH, Ty);
+}
+
+Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) {
+  // Expand the code for this SCEV.
+  Value *V = expand(SH);
+  if (Ty) {
+    assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
+           "non-trivial casts should be done with the SCEVs directly!");
+    V = InsertNoopCastOfTo(V, Ty);
+  }
+  return V;
+}
+
+Value *SCEVExpander::expand(const SCEV *S) {
+  // Compute an insertion point for this SCEV object. Hoist the instructions
+  // as far out in the loop nest as possible.
+  Instruction *InsertPt = Builder.GetInsertPoint();
+  for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ;
+       L = L->getParentLoop())
+    if (SE.isLoopInvariant(S, L)) {
+      if (!L) break;
+      if (BasicBlock *Preheader = L->getLoopPreheader())
+        InsertPt = Preheader->getTerminator();
+    } else {
+      // If the SCEV is computable at this level, insert it into the header
+      // after the PHIs (and after any other instructions that we've inserted
+      // there) so that it is guaranteed to dominate any user inside the loop.
+      if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
+        InsertPt = L->getHeader()->getFirstNonPHI();
+      while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt))
+        InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
+      break;
+    }
+
+  // Check to see if we already expanded this here.
+  std::map<std::pair<const SCEV *, Instruction *>,
+           AssertingVH<Value> >::iterator I =
+    InsertedExpressions.find(std::make_pair(S, InsertPt));
+  if (I != InsertedExpressions.end())
+    return I->second;
+
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+  Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
+
+  // Expand the expression into instructions.
+  Value *V = visit(S);
+
+  // Remember the expanded value for this SCEV at this location.
+  if (PostIncLoops.empty())
+    InsertedExpressions[std::make_pair(S, InsertPt)] = V;
+
+  restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+  return V;
+}
+
+void SCEVExpander::rememberInstruction(Value *I) {
+  if (!PostIncLoops.empty())
+    InsertedPostIncValues.insert(I);
+  else
+    InsertedValues.insert(I);
+
+  // If we just claimed an existing instruction and that instruction had
+  // been the insert point, adjust the insert point forward so that 
+  // subsequently inserted code will be dominated.
+  if (Builder.GetInsertPoint() == I) {
+    BasicBlock::iterator It = cast<Instruction>(I);
+    do { ++It; } while (isInsertedInstruction(It) ||
+                        isa<DbgInfoIntrinsic>(It));
+    Builder.SetInsertPoint(Builder.GetInsertBlock(), It);
+  }
+}
+
+void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
+  // If we acquired more instructions since the old insert point was saved,
+  // advance past them.
+  while (isInsertedInstruction(I) || isa<DbgInfoIntrinsic>(I)) ++I;
+
+  Builder.SetInsertPoint(BB, I);
+}
+
+/// getOrInsertCanonicalInductionVariable - This method returns the
+/// canonical induction variable of the specified type for the specified
+/// loop (inserting one if there is none).  A canonical induction variable
+/// starts at zero and steps by one on each iteration.
+PHINode *
+SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
+                                                    const Type *Ty) {
+  assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
+
+  // Build a SCEV for {0,+,1}<L>.
+  const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
+                                   SE.getConstant(Ty, 1), L);
+
+  // Emit code for it.
+  BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+  BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+  PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin()));
+  if (SaveInsertBB)
+    restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
+  return V;
+}
diff --git a/final/lib/Analysis/ScalarEvolutionNormalization.cpp b/final/lib/Analysis/ScalarEvolutionNormalization.cpp
new file mode 100644
index 00000000000..ac36cef89eb
--- /dev/null
+++ b/final/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -0,0 +1,183 @@
+//===- ScalarEvolutionNormalization.cpp - See below -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for working with "normalized" expressions.
+// See the comments at the top of ScalarEvolutionNormalization.h for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
+using namespace llvm;
+
+/// IVUseShouldUsePostIncValue - We have discovered a "User" of an IV expression
+/// and now we need to decide whether the user should use the preinc or post-inc
+/// value.  If this user should use the post-inc version of the IV, return true.
+///
+/// Choosing wrong here can break dominance properties (if we choose to use the
+/// post-inc value when we cannot) or it can end up adding extra live-ranges to
+/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
+/// should use the post-inc value).
+static bool IVUseShouldUsePostIncValue(Instruction *User, Value *Operand,
+                                       const Loop *L, DominatorTree *DT) {
+  // If the user is in the loop, use the preinc value.
+  if (L->contains(User)) return false;
+
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  if (!LatchBlock)
+    return false;
+
+  // Ok, the user is outside of the loop.  If it is dominated by the latch
+  // block, use the post-inc value.
+  if (DT->dominates(LatchBlock, User->getParent()))
+    return true;
+
+  // There is one case we have to be careful of: PHI nodes.  These little guys
+  // can live in blocks that are not dominated by the latch block, but (since
+  // their uses occur in the predecessor block, not the block the PHI lives in)
+  // should still use the post-inc value.  Check for this case now.
+  PHINode *PN = dyn_cast<PHINode>(User);
+  if (!PN || !Operand) return false; // not a phi, not dominated by latch block.
+
+  // Look at all of the uses of Operand by the PHI node.  If any use corresponds
+  // to a block that is not dominated by the latch block, give up and use the
+  // preincremented value.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingValue(i) == Operand &&
+        !DT->dominates(LatchBlock, PN->getIncomingBlock(i)))
+      return false;
+
+  // Okay, all uses of Operand by PN are in predecessor blocks that really are
+  // dominated by the latch block.  Use the post-incremented value.
+  return true;
+}
+
+const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
+                                         const SCEV *S,
+                                         Instruction *User,
+                                         Value *OperandValToReplace,
+                                         PostIncLoopSet &Loops,
+                                         ScalarEvolution &SE,
+                                         DominatorTree &DT) {
+  if (isa<SCEVConstant>(S) || isa<SCEVUnknown>(S))
+    return S;
+
+  if (const SCEVCastExpr *X = dyn_cast<SCEVCastExpr>(S)) {
+    const SCEV *O = X->getOperand();
+    const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
+                                           Loops, SE, DT);
+    if (O != N)
+      switch (S->getSCEVType()) {
+      case scZeroExtend: return SE.getZeroExtendExpr(N, S->getType());
+      case scSignExtend: return SE.getSignExtendExpr(N, S->getType());
+      case scTruncate: return SE.getTruncateExpr(N, S->getType());
+      default: llvm_unreachable("Unexpected SCEVCastExpr kind!");
+      }
+    return S;
+  }
+
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // An addrec. This is the interesting part.
+    SmallVector<const SCEV *, 8> Operands;
+    const Loop *L = AR->getLoop();
+    // The addrec conceptually uses its operands at loop entry.
+    Instruction *LUser = L->getHeader()->begin();
+    // Transform each operand.
+    for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();
+         I != E; ++I) {
+      const SCEV *O = *I;
+      const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT);
+      Operands.push_back(N);
+    }
+    const SCEV *Result = SE.getAddRecExpr(Operands, L);
+    switch (Kind) {
+    default: llvm_unreachable("Unexpected transform name!");
+    case NormalizeAutodetect:
+      if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) {
+        const SCEV *TransformedStep =
+          TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+                                 User, OperandValToReplace, Loops, SE, DT);
+        Result = SE.getMinusSCEV(Result, TransformedStep);
+        Loops.insert(L);
+      }
+#if 0
+      // This assert is conceptually correct, but ScalarEvolution currently
+      // sometimes fails to canonicalize two equal SCEVs to exactly the same
+      // form. It's possibly a pessimization when this happens, but it isn't a
+      // correctness problem, so disable this assert for now.
+      assert(S == TransformForPostIncUse(Denormalize, Result,
+                                         User, OperandValToReplace,
+                                         Loops, SE, DT) &&
+             "SCEV normalization is not invertible!");
+#endif
+      break;
+    case Normalize:
+      if (Loops.count(L)) {
+        const SCEV *TransformedStep =
+          TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+                                 User, OperandValToReplace, Loops, SE, DT);
+        Result = SE.getMinusSCEV(Result, TransformedStep);
+      }
+#if 0
+      // See the comment on the assert above.
+      assert(S == TransformForPostIncUse(Denormalize, Result,
+                                         User, OperandValToReplace,
+                                         Loops, SE, DT) &&
+             "SCEV normalization is not invertible!");
+#endif
+      break;
+    case Denormalize:
+      if (Loops.count(L))
+        Result = cast<SCEVAddRecExpr>(Result)->getPostIncExpr(SE);
+      break;
+    }
+    return Result;
+  }
+
+  if (const SCEVNAryExpr *X = dyn_cast<SCEVNAryExpr>(S)) {
+    SmallVector<const SCEV *, 8> Operands;
+    bool Changed = false;
+    // Transform each operand.
+    for (SCEVNAryExpr::op_iterator I = X->op_begin(), E = X->op_end();
+         I != E; ++I) {
+      const SCEV *O = *I;
+      const SCEV *N = TransformForPostIncUse(Kind, O, User, OperandValToReplace,
+                                             Loops, SE, DT);
+      Changed |= N != O;
+      Operands.push_back(N);
+    }
+    // If any operand actually changed, return a transformed result.
+    if (Changed)
+      switch (S->getSCEVType()) {
+      case scAddExpr: return SE.getAddExpr(Operands);
+      case scMulExpr: return SE.getMulExpr(Operands);
+      case scSMaxExpr: return SE.getSMaxExpr(Operands);
+      case scUMaxExpr: return SE.getUMaxExpr(Operands);
+      default: llvm_unreachable("Unexpected SCEVNAryExpr kind!");
+      }
+    return S;
+  }
+
+  if (const SCEVUDivExpr *X = dyn_cast<SCEVUDivExpr>(S)) {
+    const SCEV *LO = X->getLHS();
+    const SCEV *RO = X->getRHS();
+    const SCEV *LN = TransformForPostIncUse(Kind, LO, User, OperandValToReplace,
+                                            Loops, SE, DT);
+    const SCEV *RN = TransformForPostIncUse(Kind, RO, User, OperandValToReplace,
+                                            Loops, SE, DT);
+    if (LO != LN || RO != RN)
+      return SE.getUDivExpr(LN, RN);
+    return S;
+  }
+
+  llvm_unreachable("Unexpected SCEV kind!");
+  return 0;
+}
diff --git a/final/lib/Analysis/SparsePropagation.cpp b/final/lib/Analysis/SparsePropagation.cpp
new file mode 100644
index 00000000000..d8c207b4bd4
--- /dev/null
+++ b/final/lib/Analysis/SparsePropagation.cpp
@@ -0,0 +1,347 @@
+//===- SparsePropagation.cpp - Sparse Conditional Property Propagation ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an abstract sparse conditional propagation algorithm,
+// modeled after SCCP, but with a customizable lattice function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sparseprop"
+#include "llvm/Analysis/SparsePropagation.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                  AbstractLatticeFunction Implementation
+//===----------------------------------------------------------------------===//
+
+AbstractLatticeFunction::~AbstractLatticeFunction() {}
+
+/// PrintValue - Render the specified lattice value to the specified stream.
+void AbstractLatticeFunction::PrintValue(LatticeVal V, raw_ostream &OS) {
+  if (V == UndefVal)
+    OS << "undefined";
+  else if (V == OverdefinedVal)
+    OS << "overdefined";
+  else if (V == UntrackedVal)
+    OS << "untracked";
+  else
+    OS << "unknown lattice value";
+}
+
+//===----------------------------------------------------------------------===//
+//                          SparseSolver Implementation
+//===----------------------------------------------------------------------===//
+
+/// getOrInitValueState - Return the LatticeVal object that corresponds to the
+/// value, initializing the value's state if it hasn't been entered into the
+/// map yet.   This function is necessary because not all values should start
+/// out in the underdefined state... Arguments should be overdefined, and
+/// constants should be marked as constants.
+///
+SparseSolver::LatticeVal SparseSolver::getOrInitValueState(Value *V) {
+  DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(V);
+  if (I != ValueState.end()) return I->second;  // Common case, in the map
+  
+  LatticeVal LV;
+  if (LatticeFunc->IsUntrackedValue(V))
+    return LatticeFunc->getUntrackedVal();
+  else if (Constant *C = dyn_cast<Constant>(V))
+    LV = LatticeFunc->ComputeConstant(C);
+  else if (Argument *A = dyn_cast<Argument>(V))
+    LV = LatticeFunc->ComputeArgument(A);
+  else if (!isa<Instruction>(V))
+    // All other non-instructions are overdefined.
+    LV = LatticeFunc->getOverdefinedVal();
+  else
+    // All instructions are underdefined by default.
+    LV = LatticeFunc->getUndefVal();
+  
+  // If this value is untracked, don't add it to the map.
+  if (LV == LatticeFunc->getUntrackedVal())
+    return LV;
+  return ValueState[V] = LV;
+}
+
+/// UpdateState - When the state for some instruction is potentially updated,
+/// this function notices and adds I to the worklist if needed.
+void SparseSolver::UpdateState(Instruction &Inst, LatticeVal V) {
+  DenseMap<Value*, LatticeVal>::iterator I = ValueState.find(&Inst);
+  if (I != ValueState.end() && I->second == V)
+    return;  // No change.
+  
+  // An update.  Visit uses of I.
+  ValueState[&Inst] = V;
+  InstWorkList.push_back(&Inst);
+}
+
+/// MarkBlockExecutable - This method can be used by clients to mark all of
+/// the blocks that are known to be intrinsically live in the processed unit.
+void SparseSolver::MarkBlockExecutable(BasicBlock *BB) {
+  DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n");
+  BBExecutable.insert(BB);   // Basic block is executable!
+  BBWorkList.push_back(BB);  // Add the block to the work list!
+}
+
+/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+/// work list if it is not already executable...
+void SparseSolver::markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
+  if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
+    return;  // This edge is already known to be executable!
+  
+  DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
+        << " -> " << Dest->getName() << "\n");
+
+  if (BBExecutable.count(Dest)) {
+    // The destination is already executable, but we just made an edge
+    // feasible that wasn't before.  Revisit the PHI nodes in the block
+    // because they have potentially new operands.
+    for (BasicBlock::iterator I = Dest->begin(); isa<PHINode>(I); ++I)
+      visitPHINode(*cast<PHINode>(I));
+    
+  } else {
+    MarkBlockExecutable(Dest);
+  }
+}
+
+
+/// getFeasibleSuccessors - Return a vector of booleans to indicate which
+/// successors are reachable from a given terminator instruction.
+void SparseSolver::getFeasibleSuccessors(TerminatorInst &TI,
+                                         SmallVectorImpl<bool> &Succs,
+                                         bool AggressiveUndef) {
+  Succs.resize(TI.getNumSuccessors());
+  if (TI.getNumSuccessors() == 0) return;
+  
+  if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
+    if (BI->isUnconditional()) {
+      Succs[0] = true;
+      return;
+    }
+    
+    LatticeVal BCValue;
+    if (AggressiveUndef)
+      BCValue = getOrInitValueState(BI->getCondition());
+    else
+      BCValue = getLatticeState(BI->getCondition());
+    
+    if (BCValue == LatticeFunc->getOverdefinedVal() ||
+        BCValue == LatticeFunc->getUntrackedVal()) {
+      // Overdefined condition variables can branch either way.
+      Succs[0] = Succs[1] = true;
+      return;
+    }
+
+    // If undefined, neither is feasible yet.
+    if (BCValue == LatticeFunc->getUndefVal())
+      return;
+
+    Constant *C = LatticeFunc->GetConstant(BCValue, BI->getCondition(), *this);
+    if (C == 0 || !isa<ConstantInt>(C)) {
+      // Non-constant values can go either way.
+      Succs[0] = Succs[1] = true;
+      return;
+    }
+
+    // Constant condition variables mean the branch can only go a single way
+    Succs[C->isNullValue()] = true;
+    return;
+  }
+  
+  if (isa<InvokeInst>(TI)) {
+    // Invoke instructions successors are always executable.
+    // TODO: Could ask the lattice function if the value can throw.
+    Succs[0] = Succs[1] = true;
+    return;
+  }
+  
+  if (isa<IndirectBrInst>(TI)) {
+    Succs.assign(Succs.size(), true);
+    return;
+  }
+  
+  SwitchInst &SI = cast<SwitchInst>(TI);
+  LatticeVal SCValue;
+  if (AggressiveUndef)
+    SCValue = getOrInitValueState(SI.getCondition());
+  else
+    SCValue = getLatticeState(SI.getCondition());
+  
+  if (SCValue == LatticeFunc->getOverdefinedVal() ||
+      SCValue == LatticeFunc->getUntrackedVal()) {
+    // All destinations are executable!
+    Succs.assign(TI.getNumSuccessors(), true);
+    return;
+  }
+  
+  // If undefined, neither is feasible yet.
+  if (SCValue == LatticeFunc->getUndefVal())
+    return;
+  
+  Constant *C = LatticeFunc->GetConstant(SCValue, SI.getCondition(), *this);
+  if (C == 0 || !isa<ConstantInt>(C)) {
+    // All destinations are executable!
+    Succs.assign(TI.getNumSuccessors(), true);
+    return;
+  }
+  
+  Succs[SI.findCaseValue(cast<ConstantInt>(C))] = true;
+}
+
+
+/// isEdgeFeasible - Return true if the control flow edge from the 'From'
+/// basic block to the 'To' basic block is currently feasible...
+bool SparseSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To,
+                                  bool AggressiveUndef) {
+  SmallVector<bool, 16> SuccFeasible;
+  TerminatorInst *TI = From->getTerminator();
+  getFeasibleSuccessors(*TI, SuccFeasible, AggressiveUndef);
+  
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+    if (TI->getSuccessor(i) == To && SuccFeasible[i])
+      return true;
+  
+  return false;
+}
+
+void SparseSolver::visitTerminatorInst(TerminatorInst &TI) {
+  SmallVector<bool, 16> SuccFeasible;
+  getFeasibleSuccessors(TI, SuccFeasible, true);
+  
+  BasicBlock *BB = TI.getParent();
+  
+  // Mark all feasible successors executable...
+  for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
+    if (SuccFeasible[i])
+      markEdgeExecutable(BB, TI.getSuccessor(i));
+}
+
+void SparseSolver::visitPHINode(PHINode &PN) {
+  // The lattice function may store more information on a PHINode than could be
+  // computed from its incoming values.  For example, SSI form stores its sigma
+  // functions as PHINodes with a single incoming value.
+  if (LatticeFunc->IsSpecialCasedPHI(&PN)) {
+    LatticeVal IV = LatticeFunc->ComputeInstructionState(PN, *this);
+    if (IV != LatticeFunc->getUntrackedVal())
+      UpdateState(PN, IV);
+    return;
+  }
+
+  LatticeVal PNIV = getOrInitValueState(&PN);
+  LatticeVal Overdefined = LatticeFunc->getOverdefinedVal();
+  
+  // If this value is already overdefined (common) just return.
+  if (PNIV == Overdefined || PNIV == LatticeFunc->getUntrackedVal())
+    return;  // Quick exit
+  
+  // Super-extra-high-degree PHI nodes are unlikely to ever be interesting,
+  // and slow us down a lot.  Just mark them overdefined.
+  if (PN.getNumIncomingValues() > 64) {
+    UpdateState(PN, Overdefined);
+    return;
+  }
+  
+  // Look at all of the executable operands of the PHI node.  If any of them
+  // are overdefined, the PHI becomes overdefined as well.  Otherwise, ask the
+  // transfer function to give us the merge of the incoming values.
+  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+    // If the edge is not yet known to be feasible, it doesn't impact the PHI.
+    if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent(), true))
+      continue;
+    
+    // Merge in this value.
+    LatticeVal OpVal = getOrInitValueState(PN.getIncomingValue(i));
+    if (OpVal != PNIV)
+      PNIV = LatticeFunc->MergeValues(PNIV, OpVal);
+    
+    if (PNIV == Overdefined)
+      break;  // Rest of input values don't matter.
+  }
+
+  // Update the PHI with the compute value, which is the merge of the inputs.
+  UpdateState(PN, PNIV);
+}
+
+
+void SparseSolver::visitInst(Instruction &I) {
+  // PHIs are handled by the propagation logic, they are never passed into the
+  // transfer functions.
+  if (PHINode *PN = dyn_cast<PHINode>(&I))
+    return visitPHINode(*PN);
+  
+  // Otherwise, ask the transfer function what the result is.  If this is
+  // something that we care about, remember it.
+  LatticeVal IV = LatticeFunc->ComputeInstructionState(I, *this);
+  if (IV != LatticeFunc->getUntrackedVal())
+    UpdateState(I, IV);
+  
+  if (TerminatorInst *TI = dyn_cast<TerminatorInst>(&I))
+    visitTerminatorInst(*TI);
+}
+
+void SparseSolver::Solve(Function &F) {
+  MarkBlockExecutable(&F.getEntryBlock());
+  
+  // Process the work lists until they are empty!
+  while (!BBWorkList.empty() || !InstWorkList.empty()) {
+    // Process the instruction work list.
+    while (!InstWorkList.empty()) {
+      Instruction *I = InstWorkList.back();
+      InstWorkList.pop_back();
+
+      DEBUG(dbgs() << "\nPopped off I-WL: " << *I << "\n");
+
+      // "I" got into the work list because it made a transition.  See if any
+      // users are both live and in need of updating.
+      for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+           UI != E; ++UI) {
+        Instruction *U = cast<Instruction>(*UI);
+        if (BBExecutable.count(U->getParent()))   // Inst is executable?
+          visitInst(*U);
+      }
+    }
+
+    // Process the basic block work list.
+    while (!BBWorkList.empty()) {
+      BasicBlock *BB = BBWorkList.back();
+      BBWorkList.pop_back();
+
+      DEBUG(dbgs() << "\nPopped off BBWL: " << *BB);
+
+      // Notify all instructions in this basic block that they are newly
+      // executable.
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+        visitInst(*I);
+    }
+  }
+}
+
+void SparseSolver::Print(Function &F, raw_ostream &OS) const {
+  OS << "\nFUNCTION: " << F.getNameStr() << "\n";
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (!BBExecutable.count(BB))
+      OS << "INFEASIBLE: ";
+    OS << "\t";
+    if (BB->hasName())
+      OS << BB->getNameStr() << ":\n";
+    else
+      OS << "; anon bb\n";
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      LatticeFunc->PrintValue(getLatticeState(I), OS);
+      OS << *I << "\n";
+    }
+    
+    OS << "\n";
+  }
+}
+
diff --git a/final/lib/Analysis/Trace.cpp b/final/lib/Analysis/Trace.cpp
new file mode 100644
index 00000000000..68a39cd581f
--- /dev/null
+++ b/final/lib/Analysis/Trace.cpp
@@ -0,0 +1,51 @@
+//===- Trace.cpp - Implementation of Trace class --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents a single trace of LLVM basic blocks.  A trace is a
+// single entry, multiple exit, region of code that is often hot.  Trace-based
+// optimizations treat traces almost like they are a large, strange, basic
+// block: because the trace path is assumed to be hot, optimizations for the
+// fall-through path are made at the expense of the non-fall-through paths.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Trace.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+Function *Trace::getFunction() const {
+  return getEntryBasicBlock()->getParent();
+}
+
+Module *Trace::getModule() const {
+  return getFunction()->getParent();
+}
+
+/// print - Write trace to output stream.
+///
+void Trace::print(raw_ostream &O) const {
+  Function *F = getFunction();
+  O << "; Trace from function " << F->getNameStr() << ", blocks:\n";
+  for (const_iterator i = begin(), e = end(); i != e; ++i) {
+    O << "; ";
+    WriteAsOperand(O, *i, true, getModule());
+    O << "\n";
+  }
+  O << "; Trace parent function: \n" << *F;
+}
+
+/// dump - Debugger convenience method; writes trace to standard error
+/// output stream.
+///
+void Trace::dump() const {
+  print(dbgs());
+}
diff --git a/final/lib/Analysis/TypeBasedAliasAnalysis.cpp b/final/lib/Analysis/TypeBasedAliasAnalysis.cpp
new file mode 100644
index 00000000000..40e18ab2fbf
--- /dev/null
+++ b/final/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -0,0 +1,299 @@
+//===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the TypeBasedAliasAnalysis pass, which implements
+// metadata-based TBAA.
+//
+// In LLVM IR, memory does not have types, so LLVM's own type system is not
+// suitable for doing TBAA. Instead, metadata is added to the IR to describe
+// a type system of a higher level language. This can be used to implement
+// typical C/C++ TBAA, but it can also be used to implement custom alias
+// analysis behavior for other languages.
+//
+// The current metadata format is very simple. TBAA MDNodes have up to
+// three fields, e.g.:
+//   !0 = metadata !{ metadata !"an example type tree" }
+//   !1 = metadata !{ metadata !"int", metadata !0 }
+//   !2 = metadata !{ metadata !"float", metadata !0 }
+//   !3 = metadata !{ metadata !"const float", metadata !2, i64 1 }
+//
+// The first field is an identity field. It can be any value, usually
+// an MDString, which uniquely identifies the type. The most important
+// name in the tree is the name of the root node. Two trees with
+// different root node names are entirely disjoint, even if they
+// have leaves with common names.
+//
+// The second field identifies the type's parent node in the tree, or
+// is null or omitted for a root node. A type is considered to alias
+// all of its decendents and all of its ancestors in the tree. Also,
+// a type is considered to alias all types in other trees, so that
+// bitcode produced from multiple front-ends is handled conservatively.
+//
+// If the third field is present, it's an integer which if equal to 1
+// indicates that the type is "constant" (meaning pointsToConstantMemory
+// should return true; see
+// http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
+//
+// TODO: The current metadata format doesn't support struct
+// fields. For example:
+//   struct X {
+//     double d;
+//     int i;
+//   };
+//   void foo(struct X *x, struct X *y, double *p) {
+//     *x = *y;
+//     *p = 0.0;
+//   }
+// Struct X has a double member, so the store to *x can alias the store to *p.
+// Currently it's not possible to precisely describe all the things struct X
+// aliases, so struct assignments must use conservative TBAA nodes. There's
+// no scheme for attaching metadata to @llvm.memcpy yet either.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+// A handy option for disabling TBAA functionality. The same effect can also be
+// achieved by stripping the !tbaa tags from IR, but this option is sometimes
+// more convenient.
+static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
+
+namespace {
+  /// TBAANode - This is a simple wrapper around an MDNode which provides a
+  /// higher-level interface by hiding the details of how alias analysis
+  /// information is encoded in its operands.
+  class TBAANode {
+    const MDNode *Node;
+
+  public:
+    TBAANode() : Node(0) {}
+    explicit TBAANode(const MDNode *N) : Node(N) {}
+
+    /// getNode - Get the MDNode for this TBAANode.
+    const MDNode *getNode() const { return Node; }
+
+    /// getParent - Get this TBAANode's Alias tree parent.
+    TBAANode getParent() const {
+      if (Node->getNumOperands() < 2)
+        return TBAANode();
+      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+      if (!P)
+        return TBAANode();
+      // Ok, this node has a valid parent. Return it.
+      return TBAANode(P);
+    }
+
+    /// TypeIsImmutable - Test if this TBAANode represents a type for objects
+    /// which are not modified (by any means) in the context where this
+    /// AliasAnalysis is relevant.
+    bool TypeIsImmutable() const {
+      if (Node->getNumOperands() < 3)
+        return false;
+      ConstantInt *CI = dyn_cast<ConstantInt>(Node->getOperand(2));
+      if (!CI)
+        return false;
+      return CI->getValue()[0];
+    }
+  };
+}
+
+namespace {
+  /// TypeBasedAliasAnalysis - This is a simple alias analysis
+  /// implementation that uses TypeBased to answer queries.
+  class TypeBasedAliasAnalysis : public ImmutablePass,
+                                 public AliasAnalysis {
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    TypeBasedAliasAnalysis() : ImmutablePass(ID) {
+      initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void initializePass() {
+      InitializeAliasAnalysis(this);
+    }
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(const void *PI) {
+      if (PI == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+
+    bool Aliases(const MDNode *A, const MDNode *B) const;
+
+  private:
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual AliasResult alias(const Location &LocA, const Location &LocB);
+    virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+    virtual ModRefBehavior getModRefBehavior(const Function *F);
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+                                       const Location &Loc);
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                                       ImmutableCallSite CS2);
+  };
+}  // End of anonymous namespace
+
+// Register this pass...
+char TypeBasedAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa",
+                   "Type-Based Alias Analysis", false, true, false)
+
+ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() {
+  return new TypeBasedAliasAnalysis();
+}
+
+void
+TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AliasAnalysis::getAnalysisUsage(AU);
+}
+
+/// Aliases - Test whether the type represented by A may alias the
+/// type represented by B.
+bool
+TypeBasedAliasAnalysis::Aliases(const MDNode *A,
+                                const MDNode *B) const {
+  // Keep track of the root node for A and B.
+  TBAANode RootA, RootB;
+
+  // Climb the tree from A to see if we reach B.
+  for (TBAANode T(A); ; ) {
+    if (T.getNode() == B)
+      // B is an ancestor of A.
+      return true;
+
+    RootA = T;
+    T = T.getParent();
+    if (!T.getNode())
+      break;
+  }
+
+  // Climb the tree from B to see if we reach A.
+  for (TBAANode T(B); ; ) {
+    if (T.getNode() == A)
+      // A is an ancestor of B.
+      return true;
+
+    RootB = T;
+    T = T.getParent();
+    if (!T.getNode())
+      break;
+  }
+
+  // Neither node is an ancestor of the other.
+  
+  // If they have different roots, they're part of different potentially
+  // unrelated type systems, so we must be conservative.
+  if (RootA.getNode() != RootB.getNode())
+    return true;
+
+  // If they have the same root, then we've proved there's no alias.
+  return false;
+}
+
+AliasAnalysis::AliasResult
+TypeBasedAliasAnalysis::alias(const Location &LocA,
+                              const Location &LocB) {
+  if (!EnableTBAA)
+    return AliasAnalysis::alias(LocA, LocB);
+
+  // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must
+  // be conservative.
+  const MDNode *AM = LocA.TBAATag;
+  if (!AM) return AliasAnalysis::alias(LocA, LocB);
+  const MDNode *BM = LocB.TBAATag;
+  if (!BM) return AliasAnalysis::alias(LocA, LocB);
+
+  // If they may alias, chain to the next AliasAnalysis.
+  if (Aliases(AM, BM))
+    return AliasAnalysis::alias(LocA, LocB);
+
+  // Otherwise return a definitive result.
+  return NoAlias;
+}
+
+bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+                                                    bool OrLocal) {
+  if (!EnableTBAA)
+    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+  const MDNode *M = Loc.TBAATag;
+  if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+  // If this is an "immutable" type, we can assume the pointer is pointing
+  // to constant memory.
+  if (TBAANode(M).TypeIsImmutable())
+    return true;
+
+  return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+}
+
+AliasAnalysis::ModRefBehavior
+TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+  if (!EnableTBAA)
+    return AliasAnalysis::getModRefBehavior(CS);
+
+  ModRefBehavior Min = UnknownModRefBehavior;
+
+  // If this is an "immutable" type, we can assume the call doesn't write
+  // to memory.
+  if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+    if (TBAANode(M).TypeIsImmutable())
+      Min = OnlyReadsMemory;
+
+  return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min);
+}
+
+AliasAnalysis::ModRefBehavior
+TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) {
+  // Functions don't have metadata. Just chain to the next implementation.
+  return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS,
+                                      const Location &Loc) {
+  if (!EnableTBAA)
+    return AliasAnalysis::getModRefInfo(CS, Loc);
+
+  if (const MDNode *L = Loc.TBAATag)
+    if (const MDNode *M =
+          CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+      if (!Aliases(L, M))
+        return NoModRef;
+
+  return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+                                      ImmutableCallSite CS2) {
+  if (!EnableTBAA)
+    return AliasAnalysis::getModRefInfo(CS1, CS2);
+
+  if (const MDNode *M1 =
+        CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+    if (const MDNode *M2 =
+          CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
+      if (!Aliases(M1, M2))
+        return NoModRef;
+
+  return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
diff --git a/final/lib/Analysis/ValueTracking.cpp b/final/lib/Analysis/ValueTracking.cpp
new file mode 100644
index 00000000000..231b95b618f
--- /dev/null
+++ b/final/lib/Analysis/ValueTracking.cpp
@@ -0,0 +1,1743 @@
+//===- ValueTracking.cpp - Walk computations to compute properties --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines that help analyze properties that chains of
+// computations have.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <cstring>
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+const unsigned MaxDepth = 6;
+
+/// getBitWidth - Returns the bitwidth of the given scalar or pointer type (if
+/// unknown returns 0).  For vector types, returns the element type's bitwidth.
+static unsigned getBitWidth(const Type *Ty, const TargetData *TD) {
+  if (unsigned BitWidth = Ty->getScalarSizeInBits())
+    return BitWidth;
+  assert(isa<PointerType>(Ty) && "Expected a pointer type!");
+  return TD ? TD->getPointerSizeInBits() : 0;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bit sets.  This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+/// NOTE: we cannot consider 'undef' to be "IsZero" here.  The problem is that
+/// we cannot optimize based on the assumption that it is zero without changing
+/// it to be an explicit zero.  If we don't change it to zero, other code could
+/// optimized based on the contradictory assumption that it is non-zero.
+/// Because instcombine aggressively folds operations with undef args anyway,
+/// this won't lose us code quality.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type (but only if TD is non-null), and vectors of integers.  In the case
+/// where V is a vector, the mask, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
+void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
+                             APInt &KnownZero, APInt &KnownOne,
+                             const TargetData *TD, unsigned Depth) {
+  assert(V && "No Value?");
+  assert(Depth <= MaxDepth && "Limit Search Depth");
+  unsigned BitWidth = Mask.getBitWidth();
+  assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy())
+         && "Not integer or pointer type!");
+  assert((!TD ||
+          TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
+         (!V->getType()->isIntOrIntVectorTy() ||
+          V->getType()->getScalarSizeInBits() == BitWidth) &&
+         KnownZero.getBitWidth() == BitWidth && 
+         KnownOne.getBitWidth() == BitWidth &&
+         "V, Mask, KnownOne and KnownZero should have same BitWidth");
+
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    // We know all of the bits for a constant!
+    KnownOne = CI->getValue() & Mask;
+    KnownZero = ~KnownOne & Mask;
+    return;
+  }
+  // Null and aggregate-zero are all-zeros.
+  if (isa<ConstantPointerNull>(V) ||
+      isa<ConstantAggregateZero>(V)) {
+    KnownOne.clearAllBits();
+    KnownZero = Mask;
+    return;
+  }
+  // Handle a constant vector by taking the intersection of the known bits of
+  // each element.
+  if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+    KnownZero.setAllBits(); KnownOne.setAllBits();
+    for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
+      APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+      ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2,
+                        TD, Depth);
+      KnownZero &= KnownZero2;
+      KnownOne &= KnownOne2;
+    }
+    return;
+  }
+  // The address of an aligned GlobalValue has trailing zeros.
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    unsigned Align = GV->getAlignment();
+    if (Align == 0 && TD && GV->getType()->getElementType()->isSized()) {
+      const Type *ObjectType = GV->getType()->getElementType();
+      // If the object is defined in the current Module, we'll be giving
+      // it the preferred alignment. Otherwise, we have to assume that it
+      // may only have the minimum ABI alignment.
+      if (!GV->isDeclaration() && !GV->mayBeOverridden())
+        Align = TD->getPrefTypeAlignment(ObjectType);
+      else
+        Align = TD->getABITypeAlignment(ObjectType);
+    }
+    if (Align > 0)
+      KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
+                                              CountTrailingZeros_32(Align));
+    else
+      KnownZero.clearAllBits();
+    KnownOne.clearAllBits();
+    return;
+  }
+  // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
+  // the bits of its aliasee.
+  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+    if (GA->mayBeOverridden()) {
+      KnownZero.clearAllBits(); KnownOne.clearAllBits();
+    } else {
+      ComputeMaskedBits(GA->getAliasee(), Mask, KnownZero, KnownOne,
+                        TD, Depth+1);
+    }
+    return;
+  }
+
+  KnownZero.clearAllBits(); KnownOne.clearAllBits();   // Start out not knowing anything.
+
+  if (Depth == MaxDepth || Mask == 0)
+    return;  // Limit search depth.
+
+  Operator *I = dyn_cast<Operator>(V);
+  if (!I) return;
+
+  APInt KnownZero2(KnownZero), KnownOne2(KnownOne);
+  switch (I->getOpcode()) {
+  default: break;
+  case Instruction::And: {
+    // If either the LHS or the RHS are Zero, the result is zero.
+    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
+    APInt Mask2(Mask & ~KnownZero);
+    ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+                      Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    KnownOne &= KnownOne2;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    KnownZero |= KnownZero2;
+    return;
+  }
+  case Instruction::Or: {
+    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
+    APInt Mask2(Mask & ~KnownOne);
+    ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+                      Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    KnownZero &= KnownZero2;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    KnownOne |= KnownOne2;
+    return;
+  }
+  case Instruction::Xor: {
+    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, TD, Depth+1);
+    ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, TD,
+                      Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+    KnownZero = KnownZeroOut;
+    return;
+  }
+  case Instruction::Mul: {
+    APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+    ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1);
+    ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
+                      Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    
+    // If low bits are zero in either operand, output low known-0 bits.
+    // Also compute a conserative estimate for high known-0 bits.
+    // More trickiness is possible, but this is sufficient for the
+    // interesting case of alignment computation.
+    KnownOne.clearAllBits();
+    unsigned TrailZ = KnownZero.countTrailingOnes() +
+                      KnownZero2.countTrailingOnes();
+    unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
+                               KnownZero2.countLeadingOnes(),
+                               BitWidth) - BitWidth;
+
+    TrailZ = std::min(TrailZ, BitWidth);
+    LeadZ = std::min(LeadZ, BitWidth);
+    KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+                APInt::getHighBitsSet(BitWidth, LeadZ);
+    KnownZero &= Mask;
+    return;
+  }
+  case Instruction::UDiv: {
+    // For the purposes of computing leading zeros we can conservatively
+    // treat a udiv as a logical right shift by the power of 2 known to
+    // be less than the denominator.
+    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+    ComputeMaskedBits(I->getOperand(0),
+                      AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
+    unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+    KnownOne2.clearAllBits();
+    KnownZero2.clearAllBits();
+    ComputeMaskedBits(I->getOperand(1),
+                      AllOnes, KnownZero2, KnownOne2, TD, Depth+1);
+    unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+    if (RHSUnknownLeadingOnes != BitWidth)
+      LeadZ = std::min(BitWidth,
+                       LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+    KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+    return;
+  }
+  case Instruction::Select:
+    ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, TD, Depth+1);
+    ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, TD,
+                      Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    return;
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::SIToFP:
+  case Instruction::UIToFP:
+    return; // Can't work with floating point.
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+    // We can't handle these if we don't know the pointer size.
+    if (!TD) return;
+    // FALL THROUGH and handle them the same as zext/trunc.
+  case Instruction::ZExt:
+  case Instruction::Trunc: {
+    const Type *SrcTy = I->getOperand(0)->getType();
+    
+    unsigned SrcBitWidth;
+    // Note that we handle pointer operands here because of inttoptr/ptrtoint
+    // which fall through here.
+    if (SrcTy->isPointerTy())
+      SrcBitWidth = TD->getTypeSizeInBits(SrcTy);
+    else
+      SrcBitWidth = SrcTy->getScalarSizeInBits();
+    
+    APInt MaskIn = Mask.zextOrTrunc(SrcBitWidth);
+    KnownZero = KnownZero.zextOrTrunc(SrcBitWidth);
+    KnownOne = KnownOne.zextOrTrunc(SrcBitWidth);
+    ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
+                      Depth+1);
+    KnownZero = KnownZero.zextOrTrunc(BitWidth);
+    KnownOne = KnownOne.zextOrTrunc(BitWidth);
+    // Any top bits are known to be zero.
+    if (BitWidth > SrcBitWidth)
+      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+    return;
+  }
+  case Instruction::BitCast: {
+    const Type *SrcTy = I->getOperand(0)->getType();
+    if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+        // TODO: For now, not handling conversions like:
+        // (bitcast i64 %x to <2 x i32>)
+        !I->getType()->isVectorTy()) {
+      ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD,
+                        Depth+1);
+      return;
+    }
+    break;
+  }
+  case Instruction::SExt: {
+    // Compute the bits in the result that are not present in the input.
+    unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits();
+      
+    APInt MaskIn = Mask.trunc(SrcBitWidth);
+    KnownZero = KnownZero.trunc(SrcBitWidth);
+    KnownOne = KnownOne.trunc(SrcBitWidth);
+    ComputeMaskedBits(I->getOperand(0), MaskIn, KnownZero, KnownOne, TD,
+                      Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+    if (KnownZero[SrcBitWidth-1])             // Input sign bit known zero
+      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+    else if (KnownOne[SrcBitWidth-1])           // Input sign bit known set
+      KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+    return;
+  }
+  case Instruction::Shl:
+    // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+      APInt Mask2(Mask.lshr(ShiftAmt));
+      ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+                        Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero <<= ShiftAmt;
+      KnownOne  <<= ShiftAmt;
+      KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0
+      return;
+    }
+    break;
+  case Instruction::LShr:
+    // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      // Compute the new bits that are at the top now.
+      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth);
+      
+      // Unsigned shift right.
+      APInt Mask2(Mask.shl(ShiftAmt));
+      ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne, TD,
+                        Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
+      // high bits known zero.
+      KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+      return;
+    }
+    break;
+  case Instruction::AShr:
+    // (ashr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      // Compute the new bits that are at the top now.
+      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+      
+      // Signed shift right.
+      APInt Mask2(Mask.shl(ShiftAmt));
+      ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+                        Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
+        
+      APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+      if (KnownZero[BitWidth-ShiftAmt-1])    // New bits are known zero.
+        KnownZero |= HighBits;
+      else if (KnownOne[BitWidth-ShiftAmt-1])  // New bits are known one.
+        KnownOne |= HighBits;
+      return;
+    }
+    break;
+  case Instruction::Sub: {
+    if (ConstantInt *CLHS = dyn_cast<ConstantInt>(I->getOperand(0))) {
+      // We know that the top bits of C-X are clear if X contains less bits
+      // than C (i.e. no wrap-around can happen).  For example, 20-X is
+      // positive if we can prove that X is >= 0 and < 16.
+      if (!CLHS->getValue().isNegative()) {
+        unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros();
+        // NLZ can't be BitWidth with no sign bit
+        APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+        ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2,
+                          TD, Depth+1);
+    
+        // If all of the MaskV bits are known to be zero, then we know the
+        // output top bits are zero, because we now know that the output is
+        // from [0-C].
+        if ((KnownZero2 & MaskV) == MaskV) {
+          unsigned NLZ2 = CLHS->getValue().countLeadingZeros();
+          // Top bits known zero.
+          KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+        }
+      }        
+    }
+  }
+  // fall through
+  case Instruction::Add: {
+    // If one of the operands has trailing zeros, then the bits that the
+    // other operand has in those bit positions will be preserved in the
+    // result. For an add, this works with either operand. For a subtract,
+    // this only works if the known zeros are in the right operand.
+    APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+    APInt Mask2 = APInt::getLowBitsSet(BitWidth,
+                                       BitWidth - Mask.countLeadingZeros());
+    ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD,
+                      Depth+1);
+    assert((LHSKnownZero & LHSKnownOne) == 0 &&
+           "Bits known to be one AND zero?");
+    unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes();
+
+    ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, TD, 
+                      Depth+1);
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 
+    unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes();
+
+    // Determine which operand has more trailing zeros, and use that
+    // many bits from the other operand.
+    if (LHSKnownZeroOut > RHSKnownZeroOut) {
+      if (I->getOpcode() == Instruction::Add) {
+        APInt Mask = APInt::getLowBitsSet(BitWidth, LHSKnownZeroOut);
+        KnownZero |= KnownZero2 & Mask;
+        KnownOne  |= KnownOne2 & Mask;
+      } else {
+        // If the known zeros are in the left operand for a subtract,
+        // fall back to the minimum known zeros in both operands.
+        KnownZero |= APInt::getLowBitsSet(BitWidth,
+                                          std::min(LHSKnownZeroOut,
+                                                   RHSKnownZeroOut));
+      }
+    } else if (RHSKnownZeroOut >= LHSKnownZeroOut) {
+      APInt Mask = APInt::getLowBitsSet(BitWidth, RHSKnownZeroOut);
+      KnownZero |= LHSKnownZero & Mask;
+      KnownOne  |= LHSKnownOne & Mask;
+    }
+    return;
+  }
+  case Instruction::SRem:
+    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      APInt RA = Rem->getValue().abs();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = RA - 1;
+        APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+        ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, 
+                          Depth+1);
+
+        // The low bits of the first operand are unchanged by the srem.
+        KnownZero = KnownZero2 & LowBits;
+        KnownOne = KnownOne2 & LowBits;
+
+        // If the first operand is non-negative or has all low bits zero, then
+        // the upper bits are all zero.
+        if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+          KnownZero |= ~LowBits;
+
+        // If the first operand is negative and not all low bits are zero, then
+        // the upper bits are all one.
+        if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+          KnownOne |= ~LowBits;
+
+        KnownZero &= Mask;
+        KnownOne &= Mask;
+
+        assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+      }
+    }
+
+    // The sign bit is the LHS's sign bit, except when the result of the
+    // remainder is zero.
+    if (Mask.isNegative() && KnownZero.isNonNegative()) {
+      APInt Mask2 = APInt::getSignBit(BitWidth);
+      APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+      ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD,
+                        Depth+1);
+      // If it's known zero, our sign bit is also zero.
+      if (LHSKnownZero.isNegative())
+        KnownZero |= LHSKnownZero;
+    }
+
+    break;
+  case Instruction::URem: {
+    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      APInt RA = Rem->getValue();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = (RA - 1);
+        APInt Mask2 = LowBits & Mask;
+        KnownZero |= ~LowBits & Mask;
+        ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD,
+                          Depth+1);
+        assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+        break;
+      }
+    }
+
+    // Since the result is less than or equal to either operand, any leading
+    // zero bits in either operand must also exist in the result.
+    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+    ComputeMaskedBits(I->getOperand(0), AllOnes, KnownZero, KnownOne,
+                      TD, Depth+1);
+    ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2,
+                      TD, Depth+1);
+
+    unsigned Leaders = std::max(KnownZero.countLeadingOnes(),
+                                KnownZero2.countLeadingOnes());
+    KnownOne.clearAllBits();
+    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+    break;
+  }
+
+  case Instruction::Alloca: {
+    AllocaInst *AI = cast<AllocaInst>(V);
+    unsigned Align = AI->getAlignment();
+    if (Align == 0 && TD)
+      Align = TD->getABITypeAlignment(AI->getType()->getElementType());
+    
+    if (Align > 0)
+      KnownZero = Mask & APInt::getLowBitsSet(BitWidth,
+                                              CountTrailingZeros_32(Align));
+    break;
+  }
+  case Instruction::GetElementPtr: {
+    // Analyze all of the subscripts of this getelementptr instruction
+    // to determine if we can prove known low zero bits.
+    APInt LocalMask = APInt::getAllOnesValue(BitWidth);
+    APInt LocalKnownZero(BitWidth, 0), LocalKnownOne(BitWidth, 0);
+    ComputeMaskedBits(I->getOperand(0), LocalMask,
+                      LocalKnownZero, LocalKnownOne, TD, Depth+1);
+    unsigned TrailZ = LocalKnownZero.countTrailingOnes();
+
+    gep_type_iterator GTI = gep_type_begin(I);
+    for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
+      Value *Index = I->getOperand(i);
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        // Handle struct member offset arithmetic.
+        if (!TD) return;
+        const StructLayout *SL = TD->getStructLayout(STy);
+        unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
+        uint64_t Offset = SL->getElementOffset(Idx);
+        TrailZ = std::min(TrailZ,
+                          CountTrailingZeros_64(Offset));
+      } else {
+        // Handle array index arithmetic.
+        const Type *IndexedTy = GTI.getIndexedType();
+        if (!IndexedTy->isSized()) return;
+        unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
+        uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
+        LocalMask = APInt::getAllOnesValue(GEPOpiBits);
+        LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
+        ComputeMaskedBits(Index, LocalMask,
+                          LocalKnownZero, LocalKnownOne, TD, Depth+1);
+        TrailZ = std::min(TrailZ,
+                          unsigned(CountTrailingZeros_64(TypeSize) +
+                                   LocalKnownZero.countTrailingOnes()));
+      }
+    }
+    
+    KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) & Mask;
+    break;
+  }
+  case Instruction::PHI: {
+    PHINode *P = cast<PHINode>(I);
+    // Handle the case of a simple two-predecessor recurrence PHI.
+    // There's a lot more that could theoretically be done here, but
+    // this is sufficient to catch some interesting cases.
+    if (P->getNumIncomingValues() == 2) {
+      for (unsigned i = 0; i != 2; ++i) {
+        Value *L = P->getIncomingValue(i);
+        Value *R = P->getIncomingValue(!i);
+        Operator *LU = dyn_cast<Operator>(L);
+        if (!LU)
+          continue;
+        unsigned Opcode = LU->getOpcode();
+        // Check for operations that have the property that if
+        // both their operands have low zero bits, the result
+        // will have low zero bits.
+        if (Opcode == Instruction::Add ||
+            Opcode == Instruction::Sub ||
+            Opcode == Instruction::And ||
+            Opcode == Instruction::Or ||
+            Opcode == Instruction::Mul) {
+          Value *LL = LU->getOperand(0);
+          Value *LR = LU->getOperand(1);
+          // Find a recurrence.
+          if (LL == I)
+            L = LR;
+          else if (LR == I)
+            L = LL;
+          else
+            break;
+          // Ok, we have a PHI of the form L op= R. Check for low
+          // zero bits.
+          APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+          ComputeMaskedBits(R, Mask2, KnownZero2, KnownOne2, TD, Depth+1);
+          Mask2 = APInt::getLowBitsSet(BitWidth,
+                                       KnownZero2.countTrailingOnes());
+
+          // We need to take the minimum number of known bits
+          APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
+          ComputeMaskedBits(L, Mask2, KnownZero3, KnownOne3, TD, Depth+1);
+
+          KnownZero = Mask &
+                      APInt::getLowBitsSet(BitWidth,
+                                           std::min(KnownZero2.countTrailingOnes(),
+                                                    KnownZero3.countTrailingOnes()));
+          break;
+        }
+      }
+    }
+
+    // Unreachable blocks may have zero-operand PHI nodes.
+    if (P->getNumIncomingValues() == 0)
+      return;
+
+    // Otherwise take the unions of the known bit sets of the operands,
+    // taking conservative care to avoid excessive recursion.
+    if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) {
+      KnownZero = APInt::getAllOnesValue(BitWidth);
+      KnownOne = APInt::getAllOnesValue(BitWidth);
+      for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) {
+        // Skip direct self references.
+        if (P->getIncomingValue(i) == P) continue;
+
+        KnownZero2 = APInt(BitWidth, 0);
+        KnownOne2 = APInt(BitWidth, 0);
+        // Recurse, but cap the recursion to one level, because we don't
+        // want to waste time spinning around in loops.
+        ComputeMaskedBits(P->getIncomingValue(i), KnownZero | KnownOne,
+                          KnownZero2, KnownOne2, TD, MaxDepth-1);
+        KnownZero &= KnownZero2;
+        KnownOne &= KnownOne2;
+        // If all bits have been ruled out, there's no need to check
+        // more operands.
+        if (!KnownZero && !KnownOne)
+          break;
+      }
+    }
+    break;
+  }
+  case Instruction::Call:
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::ctpop:
+      case Intrinsic::ctlz:
+      case Intrinsic::cttz: {
+        unsigned LowBits = Log2_32(BitWidth)+1;
+        KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+        break;
+      }
+      }
+    }
+    break;
+  }
+}
+
+/// ComputeSignBit - Determine whether the sign bit is known to be zero or
+/// one.  Convenience wrapper around ComputeMaskedBits.
+void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+                          const TargetData *TD, unsigned Depth) {
+  unsigned BitWidth = getBitWidth(V->getType(), TD);
+  if (!BitWidth) {
+    KnownZero = false;
+    KnownOne = false;
+    return;
+  }
+  APInt ZeroBits(BitWidth, 0);
+  APInt OneBits(BitWidth, 0);
+  ComputeMaskedBits(V, APInt::getSignBit(BitWidth), ZeroBits, OneBits, TD,
+                    Depth);
+  KnownOne = OneBits[BitWidth - 1];
+  KnownZero = ZeroBits[BitWidth - 1];
+}
+
+/// isPowerOfTwo - Return true if the given value is known to have exactly one
+/// bit set when defined. For vectors return true if every element is known to
+/// be a power of two when defined.  Supports values with integer or pointer
+/// types and vectors of integers.
+bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+    return CI->getValue().isPowerOf2();
+  // TODO: Handle vector constants.
+
+  // 1 << X is clearly a power of two if the one is not shifted off the end.  If
+  // it is shifted off the end then the result is undefined.
+  if (match(V, m_Shl(m_One(), m_Value())))
+    return true;
+
+  // (signbit) >>l X is clearly a power of two if the one is not shifted off the
+  // bottom.  If it is shifted off the bottom then the result is undefined.
+  if (match(V, m_LShr(m_SignBit(), m_Value())))
+    return true;
+
+  // The remaining tests are all recursive, so bail out if we hit the limit.
+  if (Depth++ == MaxDepth)
+    return false;
+
+  if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
+    return isPowerOfTwo(ZI->getOperand(0), TD, Depth);
+
+  if (SelectInst *SI = dyn_cast<SelectInst>(V))
+    return isPowerOfTwo(SI->getTrueValue(), TD, Depth) &&
+      isPowerOfTwo(SI->getFalseValue(), TD, Depth);
+
+  // An exact divide or right shift can only shift off zero bits, so the result
+  // is a power of two only if the first operand is a power of two.
+  if (match(V, m_Shr(m_Value(), m_Value())) ||
+      match(V, m_IDiv(m_Value(), m_Value()))) {
+    BinaryOperator *BO = cast<BinaryOperator>(V);
+    if (BO->isExact())
+      return isPowerOfTwo(BO->getOperand(0), TD, Depth);
+  }
+
+  return false;
+}
+
+/// isKnownNonZero - Return true if the given value is known to be non-zero
+/// when defined.  For vectors return true if every element is known to be
+/// non-zero when defined.  Supports values with integer or pointer type and
+/// vectors of integers.
+bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) {
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    if (C->isNullValue())
+      return false;
+    if (isa<ConstantInt>(C))
+      // Must be non-zero due to null test above.
+      return true;
+    // TODO: Handle vectors
+    return false;
+  }
+
+  // The remaining tests are all recursive, so bail out if we hit the limit.
+  if (Depth++ == MaxDepth)
+    return false;
+
+  unsigned BitWidth = getBitWidth(V->getType(), TD);
+
+  // X | Y != 0 if X != 0 or Y != 0.
+  Value *X = 0, *Y = 0;
+  if (match(V, m_Or(m_Value(X), m_Value(Y))))
+    return isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth);
+
+  // ext X != 0 if X != 0.
+  if (isa<SExtInst>(V) || isa<ZExtInst>(V))
+    return isKnownNonZero(cast<Instruction>(V)->getOperand(0), TD, Depth);
+
+  // shl X, Y != 0 if X is odd.  Note that the value of the shift is undefined
+  // if the lowest bit is shifted off the end.
+  if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) {
+    // shl nuw can't remove any non-zero bits.
+    BinaryOperator *BO = cast<BinaryOperator>(V);
+    if (BO->hasNoUnsignedWrap())
+      return isKnownNonZero(X, TD, Depth);
+
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    ComputeMaskedBits(X, APInt(BitWidth, 1), KnownZero, KnownOne, TD, Depth);
+    if (KnownOne[0])
+      return true;
+  }
+  // shr X, Y != 0 if X is negative.  Note that the value of the shift is not
+  // defined if the sign bit is shifted off the end.
+  else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
+    // shr exact can only shift out zero bits.
+    BinaryOperator *BO = cast<BinaryOperator>(V);
+    if (BO->isExact())
+      return isKnownNonZero(X, TD, Depth);
+
+    bool XKnownNonNegative, XKnownNegative;
+    ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
+    if (XKnownNegative)
+      return true;
+  }
+  // div exact can only produce a zero if the dividend is zero.
+  else if (match(V, m_IDiv(m_Value(X), m_Value()))) {
+    BinaryOperator *BO = cast<BinaryOperator>(V);
+    if (BO->isExact())
+      return isKnownNonZero(X, TD, Depth);
+  }
+  // X + Y.
+  else if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
+    bool XKnownNonNegative, XKnownNegative;
+    bool YKnownNonNegative, YKnownNegative;
+    ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth);
+    ComputeSignBit(Y, YKnownNonNegative, YKnownNegative, TD, Depth);
+
+    // If X and Y are both non-negative (as signed values) then their sum is not
+    // zero unless both X and Y are zero.
+    if (XKnownNonNegative && YKnownNonNegative)
+      if (isKnownNonZero(X, TD, Depth) || isKnownNonZero(Y, TD, Depth))
+        return true;
+
+    // If X and Y are both negative (as signed values) then their sum is not
+    // zero unless both X and Y equal INT_MIN.
+    if (BitWidth && XKnownNegative && YKnownNegative) {
+      APInt KnownZero(BitWidth, 0);
+      APInt KnownOne(BitWidth, 0);
+      APInt Mask = APInt::getSignedMaxValue(BitWidth);
+      // The sign bit of X is set.  If some other bit is set then X is not equal
+      // to INT_MIN.
+      ComputeMaskedBits(X, Mask, KnownZero, KnownOne, TD, Depth);
+      if ((KnownOne & Mask) != 0)
+        return true;
+      // The sign bit of Y is set.  If some other bit is set then Y is not equal
+      // to INT_MIN.
+      ComputeMaskedBits(Y, Mask, KnownZero, KnownOne, TD, Depth);
+      if ((KnownOne & Mask) != 0)
+        return true;
+    }
+
+    // The sum of a non-negative number and a power of two is not zero.
+    if (XKnownNonNegative && isPowerOfTwo(Y, TD, Depth))
+      return true;
+    if (YKnownNonNegative && isPowerOfTwo(X, TD, Depth))
+      return true;
+  }
+  // (C ? X : Y) != 0 if X != 0 and Y != 0.
+  else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+    if (isKnownNonZero(SI->getTrueValue(), TD, Depth) &&
+        isKnownNonZero(SI->getFalseValue(), TD, Depth))
+      return true;
+  }
+
+  if (!BitWidth) return false;
+  APInt KnownZero(BitWidth, 0);
+  APInt KnownOne(BitWidth, 0);
+  ComputeMaskedBits(V, APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne,
+                    TD, Depth);
+  return KnownOne != 0;
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
+/// this predicate to simplify operations downstream.  Mask is known to be zero
+/// for bits that V cannot have.
+///
+/// This function is defined on values with integer type, values with pointer
+/// type (but only if TD is non-null), and vectors of integers.  In the case
+/// where V is a vector, the mask, known zero, and known one values are the
+/// same width as the vector element, and the bit is set only if it is true
+/// for all of the elements in the vector.
+bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
+                             const TargetData *TD, unsigned Depth) {
+  APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
+  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 
+  return (KnownZero & Mask) == Mask;
+}
+
+
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits.  We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information.  For example, immediately after an "ashr X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+///
+/// 'Op' must have a scalar integer type.
+///
+unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
+                                  unsigned Depth) {
+  assert((TD || V->getType()->isIntOrIntVectorTy()) &&
+         "ComputeNumSignBits requires a TargetData object to operate "
+         "on non-integer values!");
+  const Type *Ty = V->getType();
+  unsigned TyBits = TD ? TD->getTypeSizeInBits(V->getType()->getScalarType()) :
+                         Ty->getScalarSizeInBits();
+  unsigned Tmp, Tmp2;
+  unsigned FirstAnswer = 1;
+
+  // Note that ConstantInt is handled by the general ComputeMaskedBits case
+  // below.
+
+  if (Depth == 6)
+    return 1;  // Limit search depth.
+  
+  Operator *U = dyn_cast<Operator>(V);
+  switch (Operator::getOpcode(V)) {
+  default: break;
+  case Instruction::SExt:
+    Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
+    return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp;
+    
+  case Instruction::AShr:
+    Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+    // ashr X, C   -> adds C sign bits.
+    if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      Tmp += C->getZExtValue();
+      if (Tmp > TyBits) Tmp = TyBits;
+    }
+    // vector ashr X, <C, C, C, C>  -> adds C sign bits
+    if (ConstantVector *C = dyn_cast<ConstantVector>(U->getOperand(1))) {
+      if (ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue())) {
+        Tmp += CI->getZExtValue();
+        if (Tmp > TyBits) Tmp = TyBits;
+      }
+    }
+    return Tmp;
+  case Instruction::Shl:
+    if (ConstantInt *C = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      // shl destroys sign bits.
+      Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+      if (C->getZExtValue() >= TyBits ||      // Bad shift.
+          C->getZExtValue() >= Tmp) break;    // Shifted all sign bits out.
+      return Tmp - C->getZExtValue();
+    }
+    break;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:    // NOT is handled here.
+    // Logical binary ops preserve the number of sign bits at the worst.
+    Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+    if (Tmp != 1) {
+      Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+      FirstAnswer = std::min(Tmp, Tmp2);
+      // We computed what we know about the sign bits as our first
+      // answer. Now proceed to the generic code that uses
+      // ComputeMaskedBits, and pick whichever answer is better.
+    }
+    break;
+
+  case Instruction::Select:
+    Tmp = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1);
+    return std::min(Tmp, Tmp2);
+    
+  case Instruction::Add:
+    // Add can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+      
+    // Special case decrementing a value (ADD X, -1):
+    if (ConstantInt *CRHS = dyn_cast<ConstantInt>(U->getOperand(1)))
+      if (CRHS->isAllOnesValue()) {
+        APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+        APInt Mask = APInt::getAllOnesValue(TyBits);
+        ComputeMaskedBits(U->getOperand(0), Mask, KnownZero, KnownOne, TD,
+                          Depth+1);
+        
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero | APInt(TyBits, 1)) == Mask)
+          return TyBits;
+        
+        // If we are subtracting one from a positive number, there is no carry
+        // out of the result.
+        if (KnownZero.isNegative())
+          return Tmp;
+      }
+      
+    Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+    if (Tmp2 == 1) return 1;
+    return std::min(Tmp, Tmp2)-1;
+    
+  case Instruction::Sub:
+    Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1);
+    if (Tmp2 == 1) return 1;
+      
+    // Handle NEG.
+    if (ConstantInt *CLHS = dyn_cast<ConstantInt>(U->getOperand(0)))
+      if (CLHS->isNullValue()) {
+        APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+        APInt Mask = APInt::getAllOnesValue(TyBits);
+        ComputeMaskedBits(U->getOperand(1), Mask, KnownZero, KnownOne, 
+                          TD, Depth+1);
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero | APInt(TyBits, 1)) == Mask)
+          return TyBits;
+        
+        // If the input is known to be positive (the sign bit is known clear),
+        // the output of the NEG has the same number of sign bits as the input.
+        if (KnownZero.isNegative())
+          return Tmp2;
+        
+        // Otherwise, we treat this like a SUB.
+      }
+    
+    // Sub can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+    return std::min(Tmp, Tmp2)-1;
+      
+  case Instruction::PHI: {
+    PHINode *PN = cast<PHINode>(U);
+    // Don't analyze large in-degree PHIs.
+    if (PN->getNumIncomingValues() > 4) break;
+    
+    // Take the minimum of all incoming values.  This can't infinitely loop
+    // because of our depth threshold.
+    Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1);
+    for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+      if (Tmp == 1) return Tmp;
+      Tmp = std::min(Tmp,
+                     ComputeNumSignBits(PN->getIncomingValue(i), TD, Depth+1));
+    }
+    return Tmp;
+  }
+
+  case Instruction::Trunc:
+    // FIXME: it's tricky to do anything useful for this, but it is an important
+    // case for targets like X86.
+    break;
+  }
+  
+  // Finally, if we can prove that the top bits of the result are 0's or 1's,
+  // use this information.
+  APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0);
+  APInt Mask = APInt::getAllOnesValue(TyBits);
+  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+  
+  if (KnownZero.isNegative()) {        // sign bit is 0
+    Mask = KnownZero;
+  } else if (KnownOne.isNegative()) {  // sign bit is 1;
+    Mask = KnownOne;
+  } else {
+    // Nothing known.
+    return FirstAnswer;
+  }
+  
+  // Okay, we know that the sign bit in Mask is set.  Use CLZ to determine
+  // the number of identical bits in the top of the input value.
+  Mask = ~Mask;
+  Mask <<= Mask.getBitWidth()-TyBits;
+  // Return # leading zeros.  We use 'min' here in case Val was zero before
+  // shifting.  We don't want to return '64' as for an i32 "0".
+  return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros()));
+}
+
+/// ComputeMultiple - This function computes the integer multiple of Base that
+/// equals V.  If successful, it returns true and returns the multiple in
+/// Multiple.  If unsuccessful, it returns false. It looks
+/// through SExt instructions only if LookThroughSExt is true.
+bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
+                           bool LookThroughSExt, unsigned Depth) {
+  const unsigned MaxDepth = 6;
+
+  assert(V && "No Value?");
+  assert(Depth <= MaxDepth && "Limit Search Depth");
+  assert(V->getType()->isIntegerTy() && "Not integer or pointer type!");
+
+  const Type *T = V->getType();
+
+  ConstantInt *CI = dyn_cast<ConstantInt>(V);
+
+  if (Base == 0)
+    return false;
+    
+  if (Base == 1) {
+    Multiple = V;
+    return true;
+  }
+
+  ConstantExpr *CO = dyn_cast<ConstantExpr>(V);
+  Constant *BaseVal = ConstantInt::get(T, Base);
+  if (CO && CO == BaseVal) {
+    // Multiple is 1.
+    Multiple = ConstantInt::get(T, 1);
+    return true;
+  }
+
+  if (CI && CI->getZExtValue() % Base == 0) {
+    Multiple = ConstantInt::get(T, CI->getZExtValue() / Base);
+    return true;  
+  }
+  
+  if (Depth == MaxDepth) return false;  // Limit search depth.
+        
+  Operator *I = dyn_cast<Operator>(V);
+  if (!I) return false;
+
+  switch (I->getOpcode()) {
+  default: break;
+  case Instruction::SExt:
+    if (!LookThroughSExt) return false;
+    // otherwise fall through to ZExt
+  case Instruction::ZExt:
+    return ComputeMultiple(I->getOperand(0), Base, Multiple,
+                           LookThroughSExt, Depth+1);
+  case Instruction::Shl:
+  case Instruction::Mul: {
+    Value *Op0 = I->getOperand(0);
+    Value *Op1 = I->getOperand(1);
+
+    if (I->getOpcode() == Instruction::Shl) {
+      ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1);
+      if (!Op1CI) return false;
+      // Turn Op0 << Op1 into Op0 * 2^Op1
+      APInt Op1Int = Op1CI->getValue();
+      uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
+      APInt API(Op1Int.getBitWidth(), 0);
+      API.setBit(BitToSet);
+      Op1 = ConstantInt::get(V->getContext(), API);
+    }
+
+    Value *Mul0 = NULL;
+    if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) {
+      if (Constant *Op1C = dyn_cast<Constant>(Op1))
+        if (Constant *MulC = dyn_cast<Constant>(Mul0)) {
+          if (Op1C->getType()->getPrimitiveSizeInBits() < 
+              MulC->getType()->getPrimitiveSizeInBits())
+            Op1C = ConstantExpr::getZExt(Op1C, MulC->getType());
+          if (Op1C->getType()->getPrimitiveSizeInBits() > 
+              MulC->getType()->getPrimitiveSizeInBits())
+            MulC = ConstantExpr::getZExt(MulC, Op1C->getType());
+          
+          // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
+          Multiple = ConstantExpr::getMul(MulC, Op1C);
+          return true;
+        }
+
+      if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0))
+        if (Mul0CI->getValue() == 1) {
+          // V == Base * Op1, so return Op1
+          Multiple = Op1;
+          return true;
+        }
+    }
+
+    Value *Mul1 = NULL;
+    if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) {
+      if (Constant *Op0C = dyn_cast<Constant>(Op0))
+        if (Constant *MulC = dyn_cast<Constant>(Mul1)) {
+          if (Op0C->getType()->getPrimitiveSizeInBits() < 
+              MulC->getType()->getPrimitiveSizeInBits())
+            Op0C = ConstantExpr::getZExt(Op0C, MulC->getType());
+          if (Op0C->getType()->getPrimitiveSizeInBits() > 
+              MulC->getType()->getPrimitiveSizeInBits())
+            MulC = ConstantExpr::getZExt(MulC, Op0C->getType());
+          
+          // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
+          Multiple = ConstantExpr::getMul(MulC, Op0C);
+          return true;
+        }
+
+      if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1))
+        if (Mul1CI->getValue() == 1) {
+          // V == Base * Op0, so return Op0
+          Multiple = Op0;
+          return true;
+        }
+    }
+  }
+  }
+
+  // We could not determine if V is a multiple of Base.
+  return false;
+}
+
+/// CannotBeNegativeZero - Return true if we can prove that the specified FP 
+/// value is never equal to -0.0.
+///
+/// NOTE: this function will need to be revisited when we support non-default
+/// rounding modes!
+///
+bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(V))
+    return !CFP->getValueAPF().isNegZero();
+  
+  if (Depth == 6)
+    return 1;  // Limit search depth.
+
+  const Operator *I = dyn_cast<Operator>(V);
+  if (I == 0) return false;
+  
+  // (add x, 0.0) is guaranteed to return +0.0, not -0.0.
+  if (I->getOpcode() == Instruction::FAdd &&
+      isa<ConstantFP>(I->getOperand(1)) && 
+      cast<ConstantFP>(I->getOperand(1))->isNullValue())
+    return true;
+    
+  // sitofp and uitofp turn into +0.0 for zero.
+  if (isa<SIToFPInst>(I) || isa<UIToFPInst>(I))
+    return true;
+  
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+    // sqrt(-0.0) = -0.0, no other negative results are possible.
+    if (II->getIntrinsicID() == Intrinsic::sqrt)
+      return CannotBeNegativeZero(II->getArgOperand(0), Depth+1);
+  
+  if (const CallInst *CI = dyn_cast<CallInst>(I))
+    if (const Function *F = CI->getCalledFunction()) {
+      if (F->isDeclaration()) {
+        // abs(x) != -0.0
+        if (F->getName() == "abs") return true;
+        // fabs[lf](x) != -0.0
+        if (F->getName() == "fabs") return true;
+        if (F->getName() == "fabsf") return true;
+        if (F->getName() == "fabsl") return true;
+        if (F->getName() == "sqrt" || F->getName() == "sqrtf" ||
+            F->getName() == "sqrtl")
+          return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1);
+      }
+    }
+  
+  return false;
+}
+
+/// isBytewiseValue - If the specified value can be set by repeating the same
+/// byte in memory, return the i8 value that it is represented with.  This is
+/// true for all i8 values obviously, but is also true for i32 0, i32 -1,
+/// i16 0xF0F0, double 0.0 etc.  If the value can't be handled with a repeated
+/// byte store (e.g. i16 0x1234), return null.
+Value *llvm::isBytewiseValue(Value *V) {
+  // All byte-wide stores are splatable, even of arbitrary variables.
+  if (V->getType()->isIntegerTy(8)) return V;
+
+  // Handle 'null' ConstantArrayZero etc.
+  if (Constant *C = dyn_cast<Constant>(V))
+    if (C->isNullValue())
+      return Constant::getNullValue(Type::getInt8Ty(V->getContext()));
+  
+  // Constant float and double values can be handled as integer values if the
+  // corresponding integer value is "byteable".  An important case is 0.0. 
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+    if (CFP->getType()->isFloatTy())
+      V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext()));
+    if (CFP->getType()->isDoubleTy())
+      V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext()));
+    // Don't handle long double formats, which have strange constraints.
+  }
+  
+  // We can handle constant integers that are power of two in size and a 
+  // multiple of 8 bits.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    unsigned Width = CI->getBitWidth();
+    if (isPowerOf2_32(Width) && Width > 8) {
+      // We can handle this value if the recursive binary decomposition is the
+      // same at all levels.
+      APInt Val = CI->getValue();
+      APInt Val2;
+      while (Val.getBitWidth() != 8) {
+        unsigned NextWidth = Val.getBitWidth()/2;
+        Val2  = Val.lshr(NextWidth);
+        Val2 = Val2.trunc(Val.getBitWidth()/2);
+        Val = Val.trunc(Val.getBitWidth()/2);
+        
+        // If the top/bottom halves aren't the same, reject it.
+        if (Val != Val2)
+          return 0;
+      }
+      return ConstantInt::get(V->getContext(), Val);
+    }
+  }
+  
+  // A ConstantArray is splatable if all its members are equal and also
+  // splatable.
+  if (ConstantArray *CA = dyn_cast<ConstantArray>(V)) {
+    if (CA->getNumOperands() == 0)
+      return 0;
+    
+    Value *Val = isBytewiseValue(CA->getOperand(0));
+    if (!Val)
+      return 0;
+    
+    for (unsigned I = 1, E = CA->getNumOperands(); I != E; ++I)
+      if (CA->getOperand(I-1) != CA->getOperand(I))
+        return 0;
+    
+    return Val;
+  }
+  
+  // Conceptually, we could handle things like:
+  //   %a = zext i8 %X to i16
+  //   %b = shl i16 %a, 8
+  //   %c = or i16 %a, %b
+  // but until there is an example that actually needs this, it doesn't seem
+  // worth worrying about.
+  return 0;
+}
+
+
+// This is the recursive version of BuildSubAggregate. It takes a few different
+// arguments. Idxs is the index within the nested struct From that we are
+// looking at now (which is of type IndexedType). IdxSkip is the number of
+// indices from Idxs that should be left out when inserting into the resulting
+// struct. To is the result struct built so far, new insertvalue instructions
+// build on that.
+static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
+                                SmallVector<unsigned, 10> &Idxs,
+                                unsigned IdxSkip,
+                                Instruction *InsertBefore) {
+  const llvm::StructType *STy = llvm::dyn_cast<llvm::StructType>(IndexedType);
+  if (STy) {
+    // Save the original To argument so we can modify it
+    Value *OrigTo = To;
+    // General case, the type indexed by Idxs is a struct
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+      // Process each struct element recursively
+      Idxs.push_back(i);
+      Value *PrevTo = To;
+      To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip,
+                             InsertBefore);
+      Idxs.pop_back();
+      if (!To) {
+        // Couldn't find any inserted value for this index? Cleanup
+        while (PrevTo != OrigTo) {
+          InsertValueInst* Del = cast<InsertValueInst>(PrevTo);
+          PrevTo = Del->getAggregateOperand();
+          Del->eraseFromParent();
+        }
+        // Stop processing elements
+        break;
+      }
+    }
+    // If we succesfully found a value for each of our subaggregates 
+    if (To)
+      return To;
+  }
+  // Base case, the type indexed by SourceIdxs is not a struct, or not all of
+  // the struct's elements had a value that was inserted directly. In the latter
+  // case, perhaps we can't determine each of the subelements individually, but
+  // we might be able to find the complete struct somewhere.
+  
+  // Find the value that is at that particular spot
+  Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end());
+
+  if (!V)
+    return NULL;
+
+  // Insert the value in the new (sub) aggregrate
+  return llvm::InsertValueInst::Create(To, V, Idxs.begin() + IdxSkip,
+                                       Idxs.end(), "tmp", InsertBefore);
+}
+
+// This helper takes a nested struct and extracts a part of it (which is again a
+// struct) into a new value. For example, given the struct:
+// { a, { b, { c, d }, e } }
+// and the indices "1, 1" this returns
+// { c, d }.
+//
+// It does this by inserting an insertvalue for each element in the resulting
+// struct, as opposed to just inserting a single struct. This will only work if
+// each of the elements of the substruct are known (ie, inserted into From by an
+// insertvalue instruction somewhere).
+//
+// All inserted insertvalue instructions are inserted before InsertBefore
+static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
+                                const unsigned *idx_end,
+                                Instruction *InsertBefore) {
+  assert(InsertBefore && "Must have someplace to insert!");
+  const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
+                                                             idx_begin,
+                                                             idx_end);
+  Value *To = UndefValue::get(IndexedType);
+  SmallVector<unsigned, 10> Idxs(idx_begin, idx_end);
+  unsigned IdxSkip = Idxs.size();
+
+  return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
+}
+
+/// FindInsertedValue - Given an aggregrate and an sequence of indices, see if
+/// the scalar value indexed is already around as a register, for example if it
+/// were inserted directly into the aggregrate.
+///
+/// If InsertBefore is not null, this function will duplicate (modified)
+/// insertvalues when a part of a nested struct is extracted.
+Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
+                         const unsigned *idx_end, Instruction *InsertBefore) {
+  // Nothing to index? Just return V then (this is useful at the end of our
+  // recursion)
+  if (idx_begin == idx_end)
+    return V;
+  // We have indices, so V should have an indexable type
+  assert((V->getType()->isStructTy() || V->getType()->isArrayTy())
+         && "Not looking at a struct or array?");
+  assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end)
+         && "Invalid indices for type?");
+  const CompositeType *PTy = cast<CompositeType>(V->getType());
+
+  if (isa<UndefValue>(V))
+    return UndefValue::get(ExtractValueInst::getIndexedType(PTy,
+                                                              idx_begin,
+                                                              idx_end));
+  else if (isa<ConstantAggregateZero>(V))
+    return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, 
+                                                                  idx_begin,
+                                                                  idx_end));
+  else if (Constant *C = dyn_cast<Constant>(V)) {
+    if (isa<ConstantArray>(C) || isa<ConstantStruct>(C))
+      // Recursively process this constant
+      return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1,
+                               idx_end, InsertBefore);
+  } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
+    // Loop the indices for the insertvalue instruction in parallel with the
+    // requested indices
+    const unsigned *req_idx = idx_begin;
+    for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
+         i != e; ++i, ++req_idx) {
+      if (req_idx == idx_end) {
+        if (InsertBefore)
+          // The requested index identifies a part of a nested aggregate. Handle
+          // this specially. For example,
+          // %A = insertvalue { i32, {i32, i32 } } undef, i32 10, 1, 0
+          // %B = insertvalue { i32, {i32, i32 } } %A, i32 11, 1, 1
+          // %C = extractvalue {i32, { i32, i32 } } %B, 1
+          // This can be changed into
+          // %A = insertvalue {i32, i32 } undef, i32 10, 0
+          // %C = insertvalue {i32, i32 } %A, i32 11, 1
+          // which allows the unused 0,0 element from the nested struct to be
+          // removed.
+          return BuildSubAggregate(V, idx_begin, req_idx, InsertBefore);
+        else
+          // We can't handle this without inserting insertvalues
+          return 0;
+      }
+      
+      // This insert value inserts something else than what we are looking for.
+      // See if the (aggregrate) value inserted into has the value we are
+      // looking for, then.
+      if (*req_idx != *i)
+        return FindInsertedValue(I->getAggregateOperand(), idx_begin, idx_end,
+                                 InsertBefore);
+    }
+    // If we end up here, the indices of the insertvalue match with those
+    // requested (though possibly only partially). Now we recursively look at
+    // the inserted value, passing any remaining indices.
+    return FindInsertedValue(I->getInsertedValueOperand(), req_idx, idx_end,
+                             InsertBefore);
+  } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
+    // If we're extracting a value from an aggregrate that was extracted from
+    // something else, we can extract from that something else directly instead.
+    // However, we will need to chain I's indices with the requested indices.
+   
+    // Calculate the number of indices required 
+    unsigned size = I->getNumIndices() + (idx_end - idx_begin);
+    // Allocate some space to put the new indices in
+    SmallVector<unsigned, 5> Idxs;
+    Idxs.reserve(size);
+    // Add indices from the extract value instruction
+    for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
+         i != e; ++i)
+      Idxs.push_back(*i);
+    
+    // Add requested indices
+    for (const unsigned *i = idx_begin, *e = idx_end; i != e; ++i)
+      Idxs.push_back(*i);
+
+    assert(Idxs.size() == size 
+           && "Number of indices added not correct?");
+    
+    return FindInsertedValue(I->getAggregateOperand(), Idxs.begin(), Idxs.end(),
+                             InsertBefore);
+  }
+  // Otherwise, we don't know (such as, extracting from a function return value
+  // or load instruction)
+  return 0;
+}
+
+/// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if
+/// it can be expressed as a base pointer plus a constant offset.  Return the
+/// base and offset to the caller.
+Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+                                              const TargetData &TD) {
+  Operator *PtrOp = dyn_cast<Operator>(Ptr);
+  if (PtrOp == 0) return Ptr;
+  
+  // Just look through bitcasts.
+  if (PtrOp->getOpcode() == Instruction::BitCast)
+    return GetPointerBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
+  
+  // If this is a GEP with constant indices, we can look through it.
+  GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
+  if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
+  
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
+       ++I, ++GTI) {
+    ConstantInt *OpC = cast<ConstantInt>(*I);
+    if (OpC->isZero()) continue;
+    
+    // Handle a struct and array indices which add their offset to the pointer.
+    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+    } else {
+      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+      Offset += OpC->getSExtValue()*Size;
+    }
+  }
+  
+  // Re-sign extend from the pointer size if needed to get overflow edge cases
+  // right.
+  unsigned PtrSize = TD.getPointerSizeInBits();
+  if (PtrSize < 64)
+    Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
+  
+  return GetPointerBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
+}
+
+
+/// GetConstantStringInfo - This function computes the length of a
+/// null-terminated C string pointed to by V.  If successful, it returns true
+/// and returns the string in Str.  If unsuccessful, it returns false.
+bool llvm::GetConstantStringInfo(const Value *V, std::string &Str,
+                                 uint64_t Offset,
+                                 bool StopAtNul) {
+  // If V is NULL then return false;
+  if (V == NULL) return false;
+
+  // Look through bitcast instructions.
+  if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+    return GetConstantStringInfo(BCI->getOperand(0), Str, Offset, StopAtNul);
+  
+  // If the value is not a GEP instruction nor a constant expression with a
+  // GEP instruction, then return false because ConstantArray can't occur
+  // any other way
+  const User *GEP = 0;
+  if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+    GEP = GEPI;
+  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (CE->getOpcode() == Instruction::BitCast)
+      return GetConstantStringInfo(CE->getOperand(0), Str, Offset, StopAtNul);
+    if (CE->getOpcode() != Instruction::GetElementPtr)
+      return false;
+    GEP = CE;
+  }
+  
+  if (GEP) {
+    // Make sure the GEP has exactly three arguments.
+    if (GEP->getNumOperands() != 3)
+      return false;
+    
+    // Make sure the index-ee is a pointer to array of i8.
+    const PointerType *PT = cast<PointerType>(GEP->getOperand(0)->getType());
+    const ArrayType *AT = dyn_cast<ArrayType>(PT->getElementType());
+    if (AT == 0 || !AT->getElementType()->isIntegerTy(8))
+      return false;
+    
+    // Check to make sure that the first operand of the GEP is an integer and
+    // has value 0 so that we are sure we're indexing into the initializer.
+    const ConstantInt *FirstIdx = dyn_cast<ConstantInt>(GEP->getOperand(1));
+    if (FirstIdx == 0 || !FirstIdx->isZero())
+      return false;
+    
+    // If the second index isn't a ConstantInt, then this is a variable index
+    // into the array.  If this occurs, we can't say anything meaningful about
+    // the string.
+    uint64_t StartIdx = 0;
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+      StartIdx = CI->getZExtValue();
+    else
+      return false;
+    return GetConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset,
+                                 StopAtNul);
+  }
+  
+  // The GEP instruction, constant or instruction, must reference a global
+  // variable that is a constant and is initialized. The referenced constant
+  // initializer is the array that we'll use for optimization.
+  const GlobalVariable* GV = dyn_cast<GlobalVariable>(V);
+  if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+    return false;
+  const Constant *GlobalInit = GV->getInitializer();
+  
+  // Handle the ConstantAggregateZero case
+  if (isa<ConstantAggregateZero>(GlobalInit)) {
+    // This is a degenerate case. The initializer is constant zero so the
+    // length of the string must be zero.
+    Str.clear();
+    return true;
+  }
+  
+  // Must be a Constant Array
+  const ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+  if (Array == 0 || !Array->getType()->getElementType()->isIntegerTy(8))
+    return false;
+  
+  // Get the number of elements in the array
+  uint64_t NumElts = Array->getType()->getNumElements();
+  
+  if (Offset > NumElts)
+    return false;
+  
+  // Traverse the constant array from 'Offset' which is the place the GEP refers
+  // to in the array.
+  Str.reserve(NumElts-Offset);
+  for (unsigned i = Offset; i != NumElts; ++i) {
+    const Constant *Elt = Array->getOperand(i);
+    const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+    if (!CI) // This array isn't suitable, non-int initializer.
+      return false;
+    if (StopAtNul && CI->isZero())
+      return true; // we found end of string, success!
+    Str += (char)CI->getZExtValue();
+  }
+  
+  // The array isn't null terminated, but maybe this is a memcpy, not a strcpy.
+  return true;
+}
+
+// These next two are very similar to the above, but also look through PHI
+// nodes.
+// TODO: See if we can integrate these two together.
+
+/// GetStringLengthH - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'.  If we can't, return 0.
+static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
+  // Look through noop bitcast instructions.
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+    return GetStringLengthH(BCI->getOperand(0), PHIs);
+
+  // If this is a PHI node, there are two cases: either we have already seen it
+  // or we haven't.
+  if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    if (!PHIs.insert(PN))
+      return ~0ULL;  // already in the set.
+
+    // If it was new, see if all the input strings are the same length.
+    uint64_t LenSoFar = ~0ULL;
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      uint64_t Len = GetStringLengthH(PN->getIncomingValue(i), PHIs);
+      if (Len == 0) return 0; // Unknown length -> unknown.
+
+      if (Len == ~0ULL) continue;
+
+      if (Len != LenSoFar && LenSoFar != ~0ULL)
+        return 0;    // Disagree -> unknown.
+      LenSoFar = Len;
+    }
+
+    // Success, all agree.
+    return LenSoFar;
+  }
+
+  // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
+  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+    uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
+    if (Len1 == 0) return 0;
+    uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
+    if (Len2 == 0) return 0;
+    if (Len1 == ~0ULL) return Len2;
+    if (Len2 == ~0ULL) return Len1;
+    if (Len1 != Len2) return 0;
+    return Len1;
+  }
+
+  // If the value is not a GEP instruction nor a constant expression with a
+  // GEP instruction, then return unknown.
+  User *GEP = 0;
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(V)) {
+    GEP = GEPI;
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (CE->getOpcode() != Instruction::GetElementPtr)
+      return 0;
+    GEP = CE;
+  } else {
+    return 0;
+  }
+
+  // Make sure the GEP has exactly three arguments.
+  if (GEP->getNumOperands() != 3)
+    return 0;
+
+  // Check to make sure that the first operand of the GEP is an integer and
+  // has value 0 so that we are sure we're indexing into the initializer.
+  if (ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(1))) {
+    if (!Idx->isZero())
+      return 0;
+  } else
+    return 0;
+
+  // If the second index isn't a ConstantInt, then this is a variable index
+  // into the array.  If this occurs, we can't say anything meaningful about
+  // the string.
+  uint64_t StartIdx = 0;
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(2)))
+    StartIdx = CI->getZExtValue();
+  else
+    return 0;
+
+  // The GEP instruction, constant or instruction, must reference a global
+  // variable that is a constant and is initialized. The referenced constant
+  // initializer is the array that we'll use for optimization.
+  GlobalVariable* GV = dyn_cast<GlobalVariable>(GEP->getOperand(0));
+  if (!GV || !GV->isConstant() || !GV->hasInitializer() ||
+      GV->mayBeOverridden())
+    return 0;
+  Constant *GlobalInit = GV->getInitializer();
+
+  // Handle the ConstantAggregateZero case, which is a degenerate case. The
+  // initializer is constant zero so the length of the string must be zero.
+  if (isa<ConstantAggregateZero>(GlobalInit))
+    return 1;  // Len = 0 offset by 1.
+
+  // Must be a Constant Array
+  ConstantArray *Array = dyn_cast<ConstantArray>(GlobalInit);
+  if (!Array || !Array->getType()->getElementType()->isIntegerTy(8))
+    return false;
+
+  // Get the number of elements in the array
+  uint64_t NumElts = Array->getType()->getNumElements();
+
+  // Traverse the constant array from StartIdx (derived above) which is
+  // the place the GEP refers to in the array.
+  for (unsigned i = StartIdx; i != NumElts; ++i) {
+    Constant *Elt = Array->getOperand(i);
+    ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+    if (!CI) // This array isn't suitable, non-int initializer.
+      return 0;
+    if (CI->isZero())
+      return i-StartIdx+1; // We found end of string, success!
+  }
+
+  return 0; // The array isn't null terminated, conservatively return 'unknown'.
+}
+
+/// GetStringLength - If we can compute the length of the string pointed to by
+/// the specified pointer, return 'len+1'.  If we can't, return 0.
+uint64_t llvm::GetStringLength(Value *V) {
+  if (!V->getType()->isPointerTy()) return 0;
+
+  SmallPtrSet<PHINode*, 32> PHIs;
+  uint64_t Len = GetStringLengthH(V, PHIs);
+  // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
+  // an empty string as a length.
+  return Len == ~0ULL ? 1 : Len;
+}
+
+Value *
+llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) {
+  if (!V->getType()->isPointerTy())
+    return V;
+  for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        return V;
+      V = GA->getAliasee();
+    } else {
+      // See if InstructionSimplify knows any relevant tricks.
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        // TODO: Aquire a DominatorTree and use it.
+        if (Value *Simplified = SimplifyInstruction(I, TD, 0)) {
+          V = Simplified;
+          continue;
+        }
+
+      return V;
+    }
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+  }
+  return V;
+}
diff --git a/final/lib/Archive/Archive.cpp b/final/lib/Archive/Archive.cpp
new file mode 100644
index 00000000000..1eab27d3eba
--- /dev/null
+++ b/final/lib/Archive/Archive.cpp
@@ -0,0 +1,261 @@
+//===-- Archive.cpp - Generic LLVM archive functions ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the Archive and ArchiveMember
+// classes that is common to both reading and writing archives..
+//
+//===----------------------------------------------------------------------===//
+
+#include "ArchiveInternals.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Module.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/system_error.h"
+#include <memory>
+#include <cstring>
+using namespace llvm;
+
+// getMemberSize - compute the actual physical size of the file member as seen
+// on disk. This isn't the size of member's payload. Use getSize() for that.
+unsigned
+ArchiveMember::getMemberSize() const {
+  // Basically its the file size plus the header size
+  unsigned result =  info.fileSize + sizeof(ArchiveMemberHeader);
+
+  // If it has a long filename, include the name length
+  if (hasLongFilename())
+    result += path.str().length() + 1;
+
+  // If its now odd lengthed, include the padding byte
+  if (result % 2 != 0 )
+    result++;
+
+  return result;
+}
+
+// This default constructor is only use by the ilist when it creates its
+// sentry node. We give it specific static values to make it stand out a bit.
+ArchiveMember::ArchiveMember()
+  : parent(0), path("--invalid--"), flags(0), data(0)
+{
+  info.user = sys::Process::GetCurrentUserId();
+  info.group = sys::Process::GetCurrentGroupId();
+  info.mode = 0777;
+  info.fileSize = 0;
+  info.modTime = sys::TimeValue::now();
+}
+
+// This is the constructor that the Archive class uses when it is building or
+// reading an archive. It just defaults a few things and ensures the parent is
+// set for the iplist. The Archive class fills in the ArchiveMember's data.
+// This is required because correctly setting the data may depend on other
+// things in the Archive.
+ArchiveMember::ArchiveMember(Archive* PAR)
+  : parent(PAR), path(), flags(0), data(0)
+{
+}
+
+// This method allows an ArchiveMember to be replaced with the data for a
+// different file, presumably as an update to the member. It also makes sure
+// the flags are reset correctly.
+bool ArchiveMember::replaceWith(const sys::Path& newFile, std::string* ErrMsg) {
+  bool Exists;
+  if (sys::fs::exists(newFile.str(), Exists) || !Exists) {
+    if (ErrMsg)
+      *ErrMsg = "Can not replace an archive member with a non-existent file";
+    return true;
+  }
+
+  data = 0;
+  path = newFile;
+
+  // SVR4 symbol tables have an empty name
+  if (path.str() == ARFILE_SVR4_SYMTAB_NAME)
+    flags |= SVR4SymbolTableFlag;
+  else
+    flags &= ~SVR4SymbolTableFlag;
+
+  // BSD4.4 symbol tables have a special name
+  if (path.str() == ARFILE_BSD4_SYMTAB_NAME)
+    flags |= BSD4SymbolTableFlag;
+  else
+    flags &= ~BSD4SymbolTableFlag;
+
+  // LLVM symbol tables have a very specific name
+  if (path.str() == ARFILE_LLVM_SYMTAB_NAME)
+    flags |= LLVMSymbolTableFlag;
+  else
+    flags &= ~LLVMSymbolTableFlag;
+
+  // String table name
+  if (path.str() == ARFILE_STRTAB_NAME)
+    flags |= StringTableFlag;
+  else
+    flags &= ~StringTableFlag;
+
+  // If it has a slash then it has a path
+  bool hasSlash = path.str().find('/') != std::string::npos;
+  if (hasSlash)
+    flags |= HasPathFlag;
+  else
+    flags &= ~HasPathFlag;
+
+  // If it has a slash or its over 15 chars then its a long filename format
+  if (hasSlash || path.str().length() > 15)
+    flags |= HasLongFilenameFlag;
+  else
+    flags &= ~HasLongFilenameFlag;
+
+  // Get the signature and status info
+  const char* signature = (const char*) data;
+  SmallString<4> magic;
+  if (!signature) {
+    sys::fs::get_magic(path.str(), magic.capacity(), magic);
+    signature = magic.c_str();
+    const sys::FileStatus *FSinfo = path.getFileStatus(false, ErrMsg);
+    if (FSinfo)
+      info = *FSinfo;
+    else
+      return true;
+  }
+
+  // Determine what kind of file it is.
+  switch (sys::IdentifyFileType(signature,4)) {
+    case sys::Bitcode_FileType:
+      flags |= BitcodeFlag;
+      break;
+    default:
+      flags &= ~BitcodeFlag;
+      break;
+  }
+  return false;
+}
+
+// Archive constructor - this is the only constructor that gets used for the
+// Archive class. Everything else (default,copy) is deprecated. This just
+// initializes and maps the file into memory, if requested.
+Archive::Archive(const sys::Path& filename, LLVMContext& C)
+  : archPath(filename), members(), mapfile(0), base(0), symTab(), strtab(),
+    symTabSize(0), firstFileOffset(0), modules(), foreignST(0), Context(C) {
+}
+
+bool
+Archive::mapToMemory(std::string* ErrMsg) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFile(archPath.c_str(), File)) {
+    if (ErrMsg)
+      *ErrMsg = ec.message();
+    return true;
+  }
+  mapfile = File.take();
+  base = mapfile->getBufferStart();
+  return false;
+}
+
+void Archive::cleanUpMemory() {
+  // Shutdown the file mapping
+  delete mapfile;
+  mapfile = 0;
+  base = 0;
+
+  // Forget the entire symbol table
+  symTab.clear();
+  symTabSize = 0;
+
+  firstFileOffset = 0;
+
+  // Free the foreign symbol table member
+  if (foreignST) {
+    delete foreignST;
+    foreignST = 0;
+  }
+
+  // Delete any Modules and ArchiveMember's we've allocated as a result of
+  // symbol table searches.
+  for (ModuleMap::iterator I=modules.begin(), E=modules.end(); I != E; ++I ) {
+    delete I->second.first;
+    delete I->second.second;
+  }
+}
+
+// Archive destructor - just clean up memory
+Archive::~Archive() {
+  cleanUpMemory();
+}
+
+
+
+static void getSymbols(Module*M, std::vector<std::string>& symbols) {
+  // Loop over global variables
+  for (Module::global_iterator GI = M->global_begin(), GE=M->global_end(); GI != GE; ++GI)
+    if (!GI->isDeclaration() && !GI->hasLocalLinkage())
+      if (!GI->getName().empty())
+        symbols.push_back(GI->getName());
+
+  // Loop over functions
+  for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
+    if (!FI->isDeclaration() && !FI->hasLocalLinkage())
+      if (!FI->getName().empty())
+        symbols.push_back(FI->getName());
+
+  // Loop over aliases
+  for (Module::alias_iterator AI = M->alias_begin(), AE = M->alias_end();
+       AI != AE; ++AI) {
+    if (AI->hasName())
+      symbols.push_back(AI->getName());
+  }
+}
+
+// Get just the externally visible defined symbols from the bitcode
+bool llvm::GetBitcodeSymbols(const sys::Path& fName,
+                             LLVMContext& Context,
+                             std::vector<std::string>& symbols,
+                             std::string* ErrMsg) {
+  OwningPtr<MemoryBuffer> Buffer;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(fName.c_str(), Buffer)) {
+    if (ErrMsg) *ErrMsg = "Could not open file '" + fName.str() + "'" + ": "
+                        + ec.message();
+    return true;
+  }
+
+  Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg);
+  if (!M)
+    return true;
+
+  // Get the symbols
+  getSymbols(M, symbols);
+
+  // Done with the module.
+  delete M;
+  return true;
+}
+
+Module*
+llvm::GetBitcodeSymbols(const char *BufPtr, unsigned Length,
+                        const std::string& ModuleID,
+                        LLVMContext& Context,
+                        std::vector<std::string>& symbols,
+                        std::string* ErrMsg) {
+  // Get the module.
+  OwningPtr<MemoryBuffer> Buffer(
+    MemoryBuffer::getMemBufferCopy(StringRef(BufPtr, Length),ModuleID.c_str()));
+
+  Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg);
+  if (!M)
+    return 0;
+
+  // Get the symbols
+  getSymbols(M, symbols);
+
+  // Done with the module. Note that it's the caller's responsibility to delete
+  // the Module.
+  return M;
+}
diff --git a/final/lib/Archive/ArchiveInternals.h b/final/lib/Archive/ArchiveInternals.h
new file mode 100644
index 00000000000..55684f7023d
--- /dev/null
+++ b/final/lib/Archive/ArchiveInternals.h
@@ -0,0 +1,89 @@
+//===-- lib/Archive/ArchiveInternals.h -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Internal implementation header for LLVM Archive files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_ARCHIVE_ARCHIVEINTERNALS_H
+#define LIB_ARCHIVE_ARCHIVEINTERNALS_H
+
+#include "llvm/Bitcode/Archive.h"
+#include "llvm/Support/TimeValue.h"
+#include "llvm/ADT/StringExtras.h"
+
+#include <cstring>
+
+#define ARFILE_MAGIC "!<arch>\n"                   ///< magic string
+#define ARFILE_MAGIC_LEN (sizeof(ARFILE_MAGIC)-1)  ///< length of magic string
+#define ARFILE_SVR4_SYMTAB_NAME "/               " ///< SVR4 symtab entry name
+#define ARFILE_LLVM_SYMTAB_NAME "#_LLVM_SYM_TAB_#" ///< LLVM symtab entry name
+#define ARFILE_BSD4_SYMTAB_NAME "__.SYMDEF SORTED" ///< BSD4 symtab entry name
+#define ARFILE_STRTAB_NAME      "//              " ///< Name of string table
+#define ARFILE_PAD "\n"                            ///< inter-file align padding
+#define ARFILE_MEMBER_MAGIC "`\n"                  ///< fmag field magic #
+
+namespace llvm {
+
+  class LLVMContext;
+
+  /// The ArchiveMemberHeader structure is used internally for bitcode
+  /// archives.
+  /// The header precedes each file member in the archive. This structure is
+  /// defined using character arrays for direct and correct interpretation
+  /// regardless of the endianess of the machine that produced it.
+  /// @brief Archive File Member Header
+  class ArchiveMemberHeader {
+    /// @name Data
+    /// @{
+    public:
+      char name[16];  ///< Name of the file member.
+      char date[12];  ///< File date, decimal seconds since Epoch
+      char uid[6];    ///< user id in ASCII decimal
+      char gid[6];    ///< group id in ASCII decimal
+      char mode[8];   ///< file mode in ASCII octal
+      char size[10];  ///< file size in ASCII decimal
+      char fmag[2];   ///< Always contains ARFILE_MAGIC_TERMINATOR
+
+    /// @}
+    /// @name Methods
+    /// @{
+    public:
+    void init() {
+      memset(name,' ',16);
+      memset(date,' ',12);
+      memset(uid,' ',6);
+      memset(gid,' ',6);
+      memset(mode,' ',8);
+      memset(size,' ',10);
+      fmag[0] = '`';
+      fmag[1] = '\n';
+    }
+
+    bool checkSignature() {
+      return 0 == memcmp(fmag, ARFILE_MEMBER_MAGIC,2);
+    }
+  };
+  
+  // Get just the externally visible defined symbols from the bitcode
+  bool GetBitcodeSymbols(const sys::Path& fName,
+                          LLVMContext& Context,
+                          std::vector<std::string>& symbols,
+                          std::string* ErrMsg);
+  
+  Module* GetBitcodeSymbols(const char *Buffer, unsigned Length,
+                            const std::string& ModuleID,
+                            LLVMContext& Context,
+                            std::vector<std::string>& symbols,
+                            std::string* ErrMsg);
+}
+
+#endif
+
+// vim: sw=2 ai
diff --git a/final/lib/Archive/ArchiveReader.cpp b/final/lib/Archive/ArchiveReader.cpp
new file mode 100644
index 00000000000..eef6fe0b1c1
--- /dev/null
+++ b/final/lib/Archive/ArchiveReader.cpp
@@ -0,0 +1,630 @@
+//===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Builds up standard unix archive files (.a) containing LLVM bitcode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ArchiveInternals.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Module.h"
+#include <cstdlib>
+#include <memory>
+using namespace llvm;
+
+/// Read a variable-bit-rate encoded unsigned integer
+static inline unsigned readInteger(const char*&At, const char*End) {
+  unsigned Shift = 0;
+  unsigned Result = 0;
+
+  do {
+    if (At == End)
+      return Result;
+    Result |= (unsigned)((*At++) & 0x7F) << Shift;
+    Shift += 7;
+  } while (At[-1] & 0x80);
+  return Result;
+}
+
+// Completely parse the Archive's symbol table and populate symTab member var.
+bool
+Archive::parseSymbolTable(const void* data, unsigned size, std::string* error) {
+  const char* At = (const char*) data;
+  const char* End = At + size;
+  while (At < End) {
+    unsigned offset = readInteger(At, End);
+    if (At == End) {
+      if (error)
+        *error = "Ran out of data reading vbr_uint for symtab offset!";
+      return false;
+    }
+    unsigned length = readInteger(At, End);
+    if (At == End) {
+      if (error)
+        *error = "Ran out of data reading vbr_uint for symtab length!";
+      return false;
+    }
+    if (At + length > End) {
+      if (error)
+        *error = "Malformed symbol table: length not consistent with size";
+      return false;
+    }
+    // we don't care if it can't be inserted (duplicate entry)
+    symTab.insert(std::make_pair(std::string(At, length), offset));
+    At += length;
+  }
+  symTabSize = size;
+  return true;
+}
+
+// This member parses an ArchiveMemberHeader that is presumed to be pointed to
+// by At. The At pointer is updated to the byte just after the header, which
+// can be variable in size.
+ArchiveMember*
+Archive::parseMemberHeader(const char*& At, const char* End, std::string* error)
+{
+  if (At + sizeof(ArchiveMemberHeader) >= End) {
+    if (error)
+      *error = "Unexpected end of file";
+    return 0;
+  }
+
+  // Cast archive member header
+  ArchiveMemberHeader* Hdr = (ArchiveMemberHeader*)At;
+  At += sizeof(ArchiveMemberHeader);
+
+  // Extract the size and determine if the file is
+  // compressed or not (negative length).
+  int flags = 0;
+  int MemberSize = atoi(Hdr->size);
+  if (MemberSize < 0) {
+    flags |= ArchiveMember::CompressedFlag;
+    MemberSize = -MemberSize;
+  }
+
+  // Check the size of the member for sanity
+  if (At + MemberSize > End) {
+    if (error)
+      *error = "invalid member length in archive file";
+    return 0;
+  }
+
+  // Check the member signature
+  if (!Hdr->checkSignature()) {
+    if (error)
+      *error = "invalid file member signature";
+    return 0;
+  }
+
+  // Convert and check the member name
+  // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol
+  // table. The special name "//" and 14 blanks is for a string table, used
+  // for long file names. This library doesn't generate either of those but
+  // it will accept them. If the name starts with #1/ and the remainder is
+  // digits, then those digits specify the length of the name that is
+  // stored immediately following the header. The special name
+  // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bitcode.
+  // Anything else is a regular, short filename that is terminated with
+  // a '/' and blanks.
+
+  std::string pathname;
+  switch (Hdr->name[0]) {
+    case '#':
+      if (Hdr->name[1] == '1' && Hdr->name[2] == '/') {
+        if (isdigit(Hdr->name[3])) {
+          unsigned len = atoi(&Hdr->name[3]);
+          const char *nulp = (const char *)memchr(At, '\0', len);
+          pathname.assign(At, nulp != 0 ? (uintptr_t)(nulp - At) : len);
+          At += len;
+          MemberSize -= len;
+          flags |= ArchiveMember::HasLongFilenameFlag;
+        } else {
+          if (error)
+            *error = "invalid long filename";
+          return 0;
+        }
+      } else if (Hdr->name[1] == '_' &&
+                 (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) {
+        // The member is using a long file name (>15 chars) format.
+        // This format is standard for 4.4BSD and Mac OSX operating
+        // systems. LLVM uses it similarly. In this format, the
+        // remainder of the name field (after #1/) specifies the
+        // length of the file name which occupy the first bytes of
+        // the member's data. The pathname already has the #1/ stripped.
+        pathname.assign(ARFILE_LLVM_SYMTAB_NAME);
+        flags |= ArchiveMember::LLVMSymbolTableFlag;
+      }
+      break;
+    case '/':
+      if (Hdr->name[1]== '/') {
+        if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) {
+          pathname.assign(ARFILE_STRTAB_NAME);
+          flags |= ArchiveMember::StringTableFlag;
+        } else {
+          if (error)
+            *error = "invalid string table name";
+          return 0;
+        }
+      } else if (Hdr->name[1] == ' ') {
+        if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) {
+          pathname.assign(ARFILE_SVR4_SYMTAB_NAME);
+          flags |= ArchiveMember::SVR4SymbolTableFlag;
+        } else {
+          if (error)
+            *error = "invalid SVR4 symbol table name";
+          return 0;
+        }
+      } else if (isdigit(Hdr->name[1])) {
+        unsigned index = atoi(&Hdr->name[1]);
+        if (index < strtab.length()) {
+          const char* namep = strtab.c_str() + index;
+          const char* endp = strtab.c_str() + strtab.length();
+          const char* p = namep;
+          const char* last_p = p;
+          while (p < endp) {
+            if (*p == '\n' && *last_p == '/') {
+              pathname.assign(namep, last_p - namep);
+              flags |= ArchiveMember::HasLongFilenameFlag;
+              break;
+            }
+            last_p = p;
+            p++;
+          }
+          if (p >= endp) {
+            if (error)
+              *error = "missing name termiantor in string table";
+            return 0;
+          }
+        } else {
+          if (error)
+            *error = "name index beyond string table";
+          return 0;
+        }
+      }
+      break;
+    case '_':
+      if (Hdr->name[1] == '_' &&
+          (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) {
+        pathname.assign(ARFILE_BSD4_SYMTAB_NAME);
+        flags |= ArchiveMember::BSD4SymbolTableFlag;
+        break;
+      }
+      /* FALL THROUGH */
+
+    default:
+      char* slash = (char*) memchr(Hdr->name, '/', 16);
+      if (slash == 0)
+        slash = Hdr->name + 16;
+      pathname.assign(Hdr->name, slash - Hdr->name);
+      break;
+  }
+
+  // Determine if this is a bitcode file
+  switch (sys::IdentifyFileType(At, 4)) {
+    case sys::Bitcode_FileType:
+      flags |= ArchiveMember::BitcodeFlag;
+      break;
+    default:
+      flags &= ~ArchiveMember::BitcodeFlag;
+      break;
+  }
+
+  // Instantiate the ArchiveMember to be filled
+  ArchiveMember* member = new ArchiveMember(this);
+
+  // Fill in fields of the ArchiveMember
+  member->parent = this;
+  member->path.set(pathname);
+  member->info.fileSize = MemberSize;
+  member->info.modTime.fromEpochTime(atoi(Hdr->date));
+  unsigned int mode;
+  sscanf(Hdr->mode, "%o", &mode);
+  member->info.mode = mode;
+  member->info.user = atoi(Hdr->uid);
+  member->info.group = atoi(Hdr->gid);
+  member->flags = flags;
+  member->data = At;
+
+  return member;
+}
+
+bool
+Archive::checkSignature(std::string* error) {
+  // Check the magic string at file's header
+  if (mapfile->getBufferSize() < 8 || memcmp(base, ARFILE_MAGIC, 8)) {
+    if (error)
+      *error = "invalid signature for an archive file";
+    return false;
+  }
+  return true;
+}
+
+// This function loads the entire archive and fully populates its ilist with
+// the members of the archive file. This is typically used in preparation for
+// editing the contents of the archive.
+bool
+Archive::loadArchive(std::string* error) {
+
+  // Set up parsing
+  members.clear();
+  symTab.clear();
+  const char *At = base;
+  const char *End = mapfile->getBufferEnd();
+
+  if (!checkSignature(error))
+    return false;
+
+  At += 8;  // Skip the magic string.
+
+  bool seenSymbolTable = false;
+  bool foundFirstFile = false;
+  while (At < End) {
+    // parse the member header
+    const char* Save = At;
+    ArchiveMember* mbr = parseMemberHeader(At, End, error);
+    if (!mbr)
+      return false;
+
+    // check if this is the foreign symbol table
+    if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) {
+      // We just save this but don't do anything special
+      // with it. It doesn't count as the "first file".
+      if (foreignST) {
+        // What? Multiple foreign symbol tables? Just chuck it
+        // and retain the last one found.
+        delete foreignST;
+      }
+      foreignST = mbr;
+      At += mbr->getSize();
+      if ((intptr_t(At) & 1) == 1)
+        At++;
+    } else if (mbr->isStringTable()) {
+      // Simply suck the entire string table into a string
+      // variable. This will be used to get the names of the
+      // members that use the "/ddd" format for their names
+      // (SVR4 style long names).
+      strtab.assign(At, mbr->getSize());
+      At += mbr->getSize();
+      if ((intptr_t(At) & 1) == 1)
+        At++;
+      delete mbr;
+    } else if (mbr->isLLVMSymbolTable()) {
+      // This is the LLVM symbol table for the archive. If we've seen it
+      // already, its an error. Otherwise, parse the symbol table and move on.
+      if (seenSymbolTable) {
+        if (error)
+          *error = "invalid archive: multiple symbol tables";
+        return false;
+      }
+      if (!parseSymbolTable(mbr->getData(), mbr->getSize(), error))
+        return false;
+      seenSymbolTable = true;
+      At += mbr->getSize();
+      if ((intptr_t(At) & 1) == 1)
+        At++;
+      delete mbr; // We don't need this member in the list of members.
+    } else {
+      // This is just a regular file. If its the first one, save its offset.
+      // Otherwise just push it on the list and move on to the next file.
+      if (!foundFirstFile) {
+        firstFileOffset = Save - base;
+        foundFirstFile = true;
+      }
+      members.push_back(mbr);
+      At += mbr->getSize();
+      if ((intptr_t(At) & 1) == 1)
+        At++;
+    }
+  }
+  return true;
+}
+
+// Open and completely load the archive file.
+Archive*
+Archive::OpenAndLoad(const sys::Path& file, LLVMContext& C, 
+                     std::string* ErrorMessage) {
+  std::auto_ptr<Archive> result ( new Archive(file, C));
+  if (result->mapToMemory(ErrorMessage))
+    return 0;
+  if (!result->loadArchive(ErrorMessage))
+    return 0;
+  return result.release();
+}
+
+// Get all the bitcode modules from the archive
+bool
+Archive::getAllModules(std::vector<Module*>& Modules,
+                       std::string* ErrMessage) {
+
+  for (iterator I=begin(), E=end(); I != E; ++I) {
+    if (I->isBitcode()) {
+      std::string FullMemberName = archPath.str() +
+        "(" + I->getPath().str() + ")";
+      MemoryBuffer *Buffer =
+        MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()),
+                                       FullMemberName.c_str());
+      
+      Module *M = ParseBitcodeFile(Buffer, Context, ErrMessage);
+      delete Buffer;
+      if (!M)
+        return true;
+
+      Modules.push_back(M);
+    }
+  }
+  return false;
+}
+
+// Load just the symbol table from the archive file
+bool
+Archive::loadSymbolTable(std::string* ErrorMsg) {
+
+  // Set up parsing
+  members.clear();
+  symTab.clear();
+  const char *At = base;
+  const char *End = mapfile->getBufferEnd();
+
+  // Make sure we're dealing with an archive
+  if (!checkSignature(ErrorMsg))
+    return false;
+
+  At += 8; // Skip signature
+
+  // Parse the first file member header
+  const char* FirstFile = At;
+  ArchiveMember* mbr = parseMemberHeader(At, End, ErrorMsg);
+  if (!mbr)
+    return false;
+
+  if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) {
+    // Skip the foreign symbol table, we don't do anything with it
+    At += mbr->getSize();
+    if ((intptr_t(At) & 1) == 1)
+      At++;
+    delete mbr;
+
+    // Read the next one
+    FirstFile = At;
+    mbr = parseMemberHeader(At, End, ErrorMsg);
+    if (!mbr) {
+      delete mbr;
+      return false;
+    }
+  }
+
+  if (mbr->isStringTable()) {
+    // Process the string table entry
+    strtab.assign((const char*)mbr->getData(), mbr->getSize());
+    At += mbr->getSize();
+    if ((intptr_t(At) & 1) == 1)
+      At++;
+    delete mbr;
+    // Get the next one
+    FirstFile = At;
+    mbr = parseMemberHeader(At, End, ErrorMsg);
+    if (!mbr) {
+      delete mbr;
+      return false;
+    }
+  }
+
+  // See if its the symbol table
+  if (mbr->isLLVMSymbolTable()) {
+    if (!parseSymbolTable(mbr->getData(), mbr->getSize(), ErrorMsg)) {
+      delete mbr;
+      return false;
+    }
+
+    At += mbr->getSize();
+    if ((intptr_t(At) & 1) == 1)
+      At++;
+    delete mbr;
+    // Can't be any more symtab headers so just advance
+    FirstFile = At;
+  } else {
+    // There's no symbol table in the file. We have to rebuild it from scratch
+    // because the intent of this method is to get the symbol table loaded so
+    // it can be searched efficiently.
+    // Add the member to the members list
+    members.push_back(mbr);
+  }
+
+  firstFileOffset = FirstFile - base;
+  return true;
+}
+
+// Open the archive and load just the symbol tables
+Archive* Archive::OpenAndLoadSymbols(const sys::Path& file,
+                                     LLVMContext& C,
+                                     std::string* ErrorMessage) {
+  std::auto_ptr<Archive> result ( new Archive(file, C) );
+  if (result->mapToMemory(ErrorMessage))
+    return 0;
+  if (!result->loadSymbolTable(ErrorMessage))
+    return 0;
+  return result.release();
+}
+
+// Look up one symbol in the symbol table and return the module that defines
+// that symbol.
+Module*
+Archive::findModuleDefiningSymbol(const std::string& symbol, 
+                                  std::string* ErrMsg) {
+  SymTabType::iterator SI = symTab.find(symbol);
+  if (SI == symTab.end())
+    return 0;
+
+  // The symbol table was previously constructed assuming that the members were
+  // written without the symbol table header. Because VBR encoding is used, the
+  // values could not be adjusted to account for the offset of the symbol table
+  // because that could affect the size of the symbol table due to VBR encoding.
+  // We now have to account for this by adjusting the offset by the size of the
+  // symbol table and its header.
+  unsigned fileOffset =
+    SI->second +                // offset in symbol-table-less file
+    firstFileOffset;            // add offset to first "real" file in archive
+
+  // See if the module is already loaded
+  ModuleMap::iterator MI = modules.find(fileOffset);
+  if (MI != modules.end())
+    return MI->second.first;
+
+  // Module hasn't been loaded yet, we need to load it
+  const char* modptr = base + fileOffset;
+  ArchiveMember* mbr = parseMemberHeader(modptr, mapfile->getBufferEnd(),
+                                         ErrMsg);
+  if (!mbr)
+    return 0;
+
+  // Now, load the bitcode module to get the Module.
+  std::string FullMemberName = archPath.str() + "(" +
+    mbr->getPath().str() + ")";
+  MemoryBuffer *Buffer =
+    MemoryBuffer::getMemBufferCopy(StringRef(mbr->getData(), mbr->getSize()),
+                                   FullMemberName.c_str());
+  
+  Module *m = getLazyBitcodeModule(Buffer, Context, ErrMsg);
+  if (!m)
+    return 0;
+
+  modules.insert(std::make_pair(fileOffset, std::make_pair(m, mbr)));
+
+  return m;
+}
+
+// Look up multiple symbols in the symbol table and return a set of
+// Modules that define those symbols.
+bool
+Archive::findModulesDefiningSymbols(std::set<std::string>& symbols,
+                                    std::set<Module*>& result,
+                                    std::string* error) {
+  if (!mapfile || !base) {
+    if (error)
+      *error = "Empty archive invalid for finding modules defining symbols";
+    return false;
+  }
+
+  if (symTab.empty()) {
+    // We don't have a symbol table, so we must build it now but lets also
+    // make sure that we populate the modules table as we do this to ensure
+    // that we don't load them twice when findModuleDefiningSymbol is called
+    // below.
+
+    // Get a pointer to the first file
+    const char* At  = base + firstFileOffset;
+    const char* End = mapfile->getBufferEnd();
+
+    while ( At < End) {
+      // Compute the offset to be put in the symbol table
+      unsigned offset = At - base - firstFileOffset;
+
+      // Parse the file's header
+      ArchiveMember* mbr = parseMemberHeader(At, End, error);
+      if (!mbr)
+        return false;
+
+      // If it contains symbols
+      if (mbr->isBitcode()) {
+        // Get the symbols
+        std::vector<std::string> symbols;
+        std::string FullMemberName = archPath.str() + "(" +
+          mbr->getPath().str() + ")";
+        Module* M = 
+          GetBitcodeSymbols(At, mbr->getSize(), FullMemberName, Context,
+                            symbols, error);
+
+        if (M) {
+          // Insert the module's symbols into the symbol table
+          for (std::vector<std::string>::iterator I = symbols.begin(),
+               E=symbols.end(); I != E; ++I ) {
+            symTab.insert(std::make_pair(*I, offset));
+          }
+          // Insert the Module and the ArchiveMember into the table of
+          // modules.
+          modules.insert(std::make_pair(offset, std::make_pair(M, mbr)));
+        } else {
+          if (error)
+            *error = "Can't parse bitcode member: " + 
+              mbr->getPath().str() + ": " + *error;
+          delete mbr;
+          return false;
+        }
+      }
+
+      // Go to the next file location
+      At += mbr->getSize();
+      if ((intptr_t(At) & 1) == 1)
+        At++;
+    }
+  }
+
+  // At this point we have a valid symbol table (one way or another) so we
+  // just use it to quickly find the symbols requested.
+
+  for (std::set<std::string>::iterator I=symbols.begin(),
+       E=symbols.end(); I != E;) {
+    // See if this symbol exists
+    Module* m = findModuleDefiningSymbol(*I,error);
+    if (m) {
+      // The symbol exists, insert the Module into our result, duplicates will
+      // be ignored.
+      result.insert(m);
+
+      // Remove the symbol now that its been resolved, being careful to
+      // post-increment the iterator.
+      symbols.erase(I++);
+    } else {
+      ++I;
+    }
+  }
+  return true;
+}
+
+bool Archive::isBitcodeArchive() {
+  // Make sure the symTab has been loaded. In most cases this should have been
+  // done when the archive was constructed, but still,  this is just in case.
+  if (symTab.empty())
+    if (!loadSymbolTable(0))
+      return false;
+
+  // Now that we know it's been loaded, return true
+  // if it has a size
+  if (symTab.size()) return true;
+
+  // We still can't be sure it isn't a bitcode archive
+  if (!loadArchive(0))
+    return false;
+
+  std::vector<Module *> Modules;
+  std::string ErrorMessage;
+
+  // Scan the archive, trying to load a bitcode member.  We only load one to
+  // see if this works.
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    if (!I->isBitcode())
+      continue;
+    
+    std::string FullMemberName = 
+      archPath.str() + "(" + I->getPath().str() + ")";
+
+    MemoryBuffer *Buffer =
+      MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()),
+                                     FullMemberName.c_str());
+    Module *M = ParseBitcodeFile(Buffer, Context);
+    delete Buffer;
+    if (!M)
+      return false;  // Couldn't parse bitcode, not a bitcode archive.
+    delete M;
+    return true;
+  }
+  
+  return false;
+}
diff --git a/final/lib/Archive/ArchiveWriter.cpp b/final/lib/Archive/ArchiveWriter.cpp
new file mode 100644
index 00000000000..8fcc7aa29cc
--- /dev/null
+++ b/final/lib/Archive/ArchiveWriter.cpp
@@ -0,0 +1,489 @@
+//===-- ArchiveWriter.cpp - Write LLVM archive files ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Builds up an LLVM archive file (.a) containing LLVM bitcode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ArchiveInternals.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
+#include <fstream>
+#include <ostream>
+#include <iomanip>
+using namespace llvm;
+
+// Write an integer using variable bit rate encoding. This saves a few bytes
+// per entry in the symbol table.
+static inline void writeInteger(unsigned num, std::ofstream& ARFile) {
+  while (1) {
+    if (num < 0x80) { // done?
+      ARFile << (unsigned char)num;
+      return;
+    }
+
+    // Nope, we are bigger than a character, output the next 7 bits and set the
+    // high bit to say that there is more coming...
+    ARFile << (unsigned char)(0x80 | ((unsigned char)num & 0x7F));
+    num >>= 7;  // Shift out 7 bits now...
+  }
+}
+
+// Compute how many bytes are taken by a given VBR encoded value. This is needed
+// to pre-compute the size of the symbol table.
+static inline unsigned numVbrBytes(unsigned num) {
+
+  // Note that the following nested ifs are somewhat equivalent to a binary
+  // search. We split it in half by comparing against 2^14 first. This allows
+  // most reasonable values to be done in 2 comparisons instead of 1 for
+  // small ones and four for large ones. We expect this to access file offsets
+  // in the 2^10 to 2^24 range and symbol lengths in the 2^0 to 2^8 range,
+  // so this approach is reasonable.
+  if (num < 1<<14) {
+    if (num < 1<<7)
+      return 1;
+    else
+      return 2;
+  }
+  if (num < 1<<21)
+    return 3;
+
+  if (num < 1<<28)
+    return 4;
+  return 5; // anything >= 2^28 takes 5 bytes
+}
+
+// Create an empty archive.
+Archive* Archive::CreateEmpty(const sys::Path& FilePath, LLVMContext& C) {
+  Archive* result = new Archive(FilePath, C);
+  return result;
+}
+
+// Fill the ArchiveMemberHeader with the information from a member. If
+// TruncateNames is true, names are flattened to 15 chars or less. The sz field
+// is provided here instead of coming from the mbr because the member might be
+// stored compressed and the compressed size is not the ArchiveMember's size.
+// Furthermore compressed files have negative size fields to identify them as
+// compressed.
+bool
+Archive::fillHeader(const ArchiveMember &mbr, ArchiveMemberHeader& hdr,
+                    int sz, bool TruncateNames) const {
+
+  // Set the permissions mode, uid and gid
+  hdr.init();
+  char buffer[32];
+  sprintf(buffer, "%-8o", mbr.getMode());
+  memcpy(hdr.mode,buffer,8);
+  sprintf(buffer,  "%-6u", mbr.getUser());
+  memcpy(hdr.uid,buffer,6);
+  sprintf(buffer,  "%-6u", mbr.getGroup());
+  memcpy(hdr.gid,buffer,6);
+
+  // Set the last modification date
+  uint64_t secondsSinceEpoch = mbr.getModTime().toEpochTime();
+  sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch));
+  memcpy(hdr.date,buffer,12);
+
+  // Get rid of trailing blanks in the name
+  std::string mbrPath = mbr.getPath().str();
+  size_t mbrLen = mbrPath.length();
+  while (mbrLen > 0 && mbrPath[mbrLen-1] == ' ') {
+    mbrPath.erase(mbrLen-1,1);
+    mbrLen--;
+  }
+
+  // Set the name field in one of its various flavors.
+  bool writeLongName = false;
+  if (mbr.isStringTable()) {
+    memcpy(hdr.name,ARFILE_STRTAB_NAME,16);
+  } else if (mbr.isSVR4SymbolTable()) {
+    memcpy(hdr.name,ARFILE_SVR4_SYMTAB_NAME,16);
+  } else if (mbr.isBSD4SymbolTable()) {
+    memcpy(hdr.name,ARFILE_BSD4_SYMTAB_NAME,16);
+  } else if (mbr.isLLVMSymbolTable()) {
+    memcpy(hdr.name,ARFILE_LLVM_SYMTAB_NAME,16);
+  } else if (TruncateNames) {
+    const char* nm = mbrPath.c_str();
+    unsigned len = mbrPath.length();
+    size_t slashpos = mbrPath.rfind('/');
+    if (slashpos != std::string::npos) {
+      nm += slashpos + 1;
+      len -= slashpos +1;
+    }
+    if (len > 15)
+      len = 15;
+    memcpy(hdr.name,nm,len);
+    hdr.name[len] = '/';
+  } else if (mbrPath.length() < 16 && mbrPath.find('/') == std::string::npos) {
+    memcpy(hdr.name,mbrPath.c_str(),mbrPath.length());
+    hdr.name[mbrPath.length()] = '/';
+  } else {
+    std::string nm = "#1/";
+    nm += utostr(mbrPath.length());
+    memcpy(hdr.name,nm.data(),nm.length());
+    if (sz < 0)
+      sz -= mbrPath.length();
+    else
+      sz += mbrPath.length();
+    writeLongName = true;
+  }
+
+  // Set the size field
+  if (sz < 0) {
+    buffer[0] = '-';
+    sprintf(&buffer[1],"%-9u",(unsigned)-sz);
+  } else {
+    sprintf(buffer, "%-10u", (unsigned)sz);
+  }
+  memcpy(hdr.size,buffer,10);
+
+  return writeLongName;
+}
+
+// Insert a file into the archive before some other member. This also takes care
+// of extracting the necessary flags and information from the file.
+bool
+Archive::addFileBefore(const sys::Path& filePath, iterator where,
+                        std::string* ErrMsg) {
+  bool Exists;
+  if (sys::fs::exists(filePath.str(), Exists) || !Exists) {
+    if (ErrMsg)
+      *ErrMsg = "Can not add a non-existent file to archive";
+    return true;
+  }
+
+  ArchiveMember* mbr = new ArchiveMember(this);
+
+  mbr->data = 0;
+  mbr->path = filePath;
+  const sys::FileStatus *FSInfo = mbr->path.getFileStatus(false, ErrMsg);
+  if (!FSInfo) {
+    delete mbr;
+    return true;
+  }
+  mbr->info = *FSInfo;
+
+  unsigned flags = 0;
+  bool hasSlash = filePath.str().find('/') != std::string::npos;
+  if (hasSlash)
+    flags |= ArchiveMember::HasPathFlag;
+  if (hasSlash || filePath.str().length() > 15)
+    flags |= ArchiveMember::HasLongFilenameFlag;
+
+  sys::LLVMFileType type;
+  if (sys::fs::identify_magic(mbr->path.str(), type))
+    type = sys::Unknown_FileType;
+  switch (type) {
+    case sys::Bitcode_FileType:
+      flags |= ArchiveMember::BitcodeFlag;
+      break;
+    default:
+      break;
+  }
+  mbr->flags = flags;
+  members.insert(where,mbr);
+  return false;
+}
+
+// Write one member out to the file.
+bool
+Archive::writeMember(
+  const ArchiveMember& member,
+  std::ofstream& ARFile,
+  bool CreateSymbolTable,
+  bool TruncateNames,
+  bool ShouldCompress,
+  std::string* ErrMsg
+) {
+
+  unsigned filepos = ARFile.tellp();
+  filepos -= 8;
+
+  // Get the data and its size either from the
+  // member's in-memory data or directly from the file.
+  size_t fSize = member.getSize();
+  const char *data = (const char*)member.getData();
+  MemoryBuffer *mFile = 0;
+  if (!data) {
+    OwningPtr<MemoryBuffer> File;
+    if (error_code ec = MemoryBuffer::getFile(member.getPath().c_str(), File)) {
+      if (ErrMsg)
+        *ErrMsg = ec.message();
+      return true;
+    }
+    mFile = File.take();
+    data = mFile->getBufferStart();
+    fSize = mFile->getBufferSize();
+  }
+
+  // Now that we have the data in memory, update the
+  // symbol table if it's a bitcode file.
+  if (CreateSymbolTable && member.isBitcode()) {
+    std::vector<std::string> symbols;
+    std::string FullMemberName = archPath.str() + "(" + member.getPath().str()
+      + ")";
+    Module* M =
+      GetBitcodeSymbols(data, fSize, FullMemberName, Context, symbols, ErrMsg);
+
+    // If the bitcode parsed successfully
+    if ( M ) {
+      for (std::vector<std::string>::iterator SI = symbols.begin(),
+           SE = symbols.end(); SI != SE; ++SI) {
+
+        std::pair<SymTabType::iterator,bool> Res =
+          symTab.insert(std::make_pair(*SI,filepos));
+
+        if (Res.second) {
+          symTabSize += SI->length() +
+                        numVbrBytes(SI->length()) +
+                        numVbrBytes(filepos);
+        }
+      }
+      // We don't need this module any more.
+      delete M;
+    } else {
+      delete mFile;
+      if (ErrMsg)
+        *ErrMsg = "Can't parse bitcode member: " + member.getPath().str()
+          + ": " + *ErrMsg;
+      return true;
+    }
+  }
+
+  int hdrSize = fSize;
+
+  // Compute the fields of the header
+  ArchiveMemberHeader Hdr;
+  bool writeLongName = fillHeader(member,Hdr,hdrSize,TruncateNames);
+
+  // Write header to archive file
+  ARFile.write((char*)&Hdr, sizeof(Hdr));
+
+  // Write the long filename if its long
+  if (writeLongName) {
+    ARFile.write(member.getPath().str().data(),
+                 member.getPath().str().length());
+  }
+
+  // Write the (possibly compressed) member's content to the file.
+  ARFile.write(data,fSize);
+
+  // Make sure the member is an even length
+  if ((ARFile.tellp() & 1) == 1)
+    ARFile << ARFILE_PAD;
+
+  // Close the mapped file if it was opened
+  delete mFile;
+  return false;
+}
+
+// Write out the LLVM symbol table as an archive member to the file.
+void
+Archive::writeSymbolTable(std::ofstream& ARFile) {
+
+  // Construct the symbol table's header
+  ArchiveMemberHeader Hdr;
+  Hdr.init();
+  memcpy(Hdr.name,ARFILE_LLVM_SYMTAB_NAME,16);
+  uint64_t secondsSinceEpoch = sys::TimeValue::now().toEpochTime();
+  char buffer[32];
+  sprintf(buffer, "%-8o", 0644);
+  memcpy(Hdr.mode,buffer,8);
+  sprintf(buffer, "%-6u", sys::Process::GetCurrentUserId());
+  memcpy(Hdr.uid,buffer,6);
+  sprintf(buffer, "%-6u", sys::Process::GetCurrentGroupId());
+  memcpy(Hdr.gid,buffer,6);
+  sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch));
+  memcpy(Hdr.date,buffer,12);
+  sprintf(buffer,"%-10u",symTabSize);
+  memcpy(Hdr.size,buffer,10);
+
+  // Write the header
+  ARFile.write((char*)&Hdr, sizeof(Hdr));
+
+#ifndef NDEBUG
+  // Save the starting position of the symbol tables data content.
+  unsigned startpos = ARFile.tellp();
+#endif
+
+  // Write out the symbols sequentially
+  for ( Archive::SymTabType::iterator I = symTab.begin(), E = symTab.end();
+        I != E; ++I)
+  {
+    // Write out the file index
+    writeInteger(I->second, ARFile);
+    // Write out the length of the symbol
+    writeInteger(I->first.length(), ARFile);
+    // Write out the symbol
+    ARFile.write(I->first.data(), I->first.length());
+  }
+
+#ifndef NDEBUG
+  // Now that we're done with the symbol table, get the ending file position
+  unsigned endpos = ARFile.tellp();
+#endif
+
+  // Make sure that the amount we wrote is what we pre-computed. This is
+  // critical for file integrity purposes.
+  assert(endpos - startpos == symTabSize && "Invalid symTabSize computation");
+
+  // Make sure the symbol table is even sized
+  if (symTabSize % 2 != 0 )
+    ARFile << ARFILE_PAD;
+}
+
+// Write the entire archive to the file specified when the archive was created.
+// This writes to a temporary file first. Options are for creating a symbol
+// table, flattening the file names (no directories, 15 chars max) and
+// compressing each archive member.
+bool
+Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
+                     std::string* ErrMsg)
+{
+  // Make sure they haven't opened up the file, not loaded it,
+  // but are now trying to write it which would wipe out the file.
+  if (members.empty() && mapfile && mapfile->getBufferSize() > 8) {
+    if (ErrMsg)
+      *ErrMsg = "Can't write an archive not opened for writing";
+    return true;
+  }
+
+  // Create a temporary file to store the archive in
+  sys::Path TmpArchive = archPath;
+  if (TmpArchive.createTemporaryFileOnDisk(ErrMsg))
+    return true;
+
+  // Make sure the temporary gets removed if we crash
+  sys::RemoveFileOnSignal(TmpArchive);
+
+  // Create archive file for output.
+  std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
+                               std::ios::binary;
+  std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode);
+
+  // Check for errors opening or creating archive file.
+  if (!ArchiveFile.is_open() || ArchiveFile.bad()) {
+    TmpArchive.eraseFromDisk();
+    if (ErrMsg)
+      *ErrMsg = "Error opening archive file: " + archPath.str();
+    return true;
+  }
+
+  // If we're creating a symbol table, reset it now
+  if (CreateSymbolTable) {
+    symTabSize = 0;
+    symTab.clear();
+  }
+
+  // Write magic string to archive.
+  ArchiveFile << ARFILE_MAGIC;
+
+  // Loop over all member files, and write them out. Note that this also
+  // builds the symbol table, symTab.
+  for (MembersList::iterator I = begin(), E = end(); I != E; ++I) {
+    if (writeMember(*I, ArchiveFile, CreateSymbolTable,
+                     TruncateNames, Compress, ErrMsg)) {
+      TmpArchive.eraseFromDisk();
+      ArchiveFile.close();
+      return true;
+    }
+  }
+
+  // Close archive file.
+  ArchiveFile.close();
+
+  // Write the symbol table
+  if (CreateSymbolTable) {
+    // At this point we have written a file that is a legal archive but it
+    // doesn't have a symbol table in it. To aid in faster reading and to
+    // ensure compatibility with other archivers we need to put the symbol
+    // table first in the file. Unfortunately, this means mapping the file
+    // we just wrote back in and copying it to the destination file.
+    sys::Path FinalFilePath = archPath;
+
+    // Map in the archive we just wrote.
+    {
+    OwningPtr<MemoryBuffer> arch;
+    if (error_code ec = MemoryBuffer::getFile(TmpArchive.c_str(), arch)) {
+      if (ErrMsg)
+        *ErrMsg = ec.message();
+      return true;
+    }
+    const char* base = arch->getBufferStart();
+
+    // Open another temporary file in order to avoid invalidating the
+    // mmapped data
+    if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg))
+      return true;
+    sys::RemoveFileOnSignal(FinalFilePath);
+
+    std::ofstream FinalFile(FinalFilePath.c_str(), io_mode);
+    if (!FinalFile.is_open() || FinalFile.bad()) {
+      TmpArchive.eraseFromDisk();
+      if (ErrMsg)
+        *ErrMsg = "Error opening archive file: " + FinalFilePath.str();
+      return true;
+    }
+
+    // Write the file magic number
+    FinalFile << ARFILE_MAGIC;
+
+    // If there is a foreign symbol table, put it into the file now. Most
+    // ar(1) implementations require the symbol table to be first but llvm-ar
+    // can deal with it being after a foreign symbol table. This ensures
+    // compatibility with other ar(1) implementations as well as allowing the
+    // archive to store both native .o and LLVM .bc files, both indexed.
+    if (foreignST) {
+      if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) {
+        FinalFile.close();
+        TmpArchive.eraseFromDisk();
+        return true;
+      }
+    }
+
+    // Put out the LLVM symbol table now.
+    writeSymbolTable(FinalFile);
+
+    // Copy the temporary file contents being sure to skip the file's magic
+    // number.
+    FinalFile.write(base + sizeof(ARFILE_MAGIC)-1,
+      arch->getBufferSize()-sizeof(ARFILE_MAGIC)+1);
+
+    // Close up shop
+    FinalFile.close();
+    } // free arch.
+
+    // Move the final file over top of TmpArchive
+    if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg))
+      return true;
+  }
+
+  // Before we replace the actual archive, we need to forget all the
+  // members, since they point to data in that old archive. We need to do
+  // this because we cannot replace an open file on Windows.
+  cleanUpMemory();
+
+  if (TmpArchive.renamePathOnDisk(archPath, ErrMsg))
+    return true;
+
+  // Set correct read and write permissions after temporary file is moved
+  // to final destination path.
+  if (archPath.makeReadableOnDisk(ErrMsg))
+    return true;
+  if (archPath.makeWriteableOnDisk(ErrMsg))
+    return true;
+
+  return false;
+}
diff --git a/final/lib/Archive/CMakeLists.txt b/final/lib/Archive/CMakeLists.txt
new file mode 100644
index 00000000000..7ff478a41a5
--- /dev/null
+++ b/final/lib/Archive/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_library(LLVMArchive
+  Archive.cpp
+  ArchiveReader.cpp
+  ArchiveWriter.cpp
+  )
diff --git a/final/lib/Archive/Makefile b/final/lib/Archive/Makefile
new file mode 100644
index 00000000000..da9780403a0
--- /dev/null
+++ b/final/lib/Archive/Makefile
@@ -0,0 +1,17 @@
+##===- lib/Archive/Makefile --------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMArchive
+
+# We only want an archive so only those modules actually used by a tool are
+# included.
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/AsmParser/CMakeLists.txt b/final/lib/AsmParser/CMakeLists.txt
new file mode 100644
index 00000000000..985ebe20098
--- /dev/null
+++ b/final/lib/AsmParser/CMakeLists.txt
@@ -0,0 +1,6 @@
+# AsmParser
+add_llvm_library(LLVMAsmParser
+  LLLexer.cpp
+  LLParser.cpp
+  Parser.cpp
+  )
diff --git a/final/lib/AsmParser/LLLexer.cpp b/final/lib/AsmParser/LLLexer.cpp
new file mode 100644
index 00000000000..857fa1ef626
--- /dev/null
+++ b/final/lib/AsmParser/LLLexer.cpp
@@ -0,0 +1,874 @@
+//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the Lexer for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLLexer.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instruction.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Assembly/Parser.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+using namespace llvm;
+
+bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
+  ErrorInfo = SM.GetMessage(ErrorLoc, Msg, "error");
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions.
+//===----------------------------------------------------------------------===//
+
+// atoull - Convert an ascii string of decimal digits into the unsigned long
+// long representation... this does not have to do input error checking,
+// because we know that the input will be matched by a suitable regex...
+//
+uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
+  uint64_t Result = 0;
+  for (; Buffer != End; Buffer++) {
+    uint64_t OldRes = Result;
+    Result *= 10;
+    Result += *Buffer-'0';
+    if (Result < OldRes) {  // Uh, oh, overflow detected!!!
+      Error("constant bigger than 64 bits detected!");
+      return 0;
+    }
+  }
+  return Result;
+}
+
+uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
+  uint64_t Result = 0;
+  for (; Buffer != End; ++Buffer) {
+    uint64_t OldRes = Result;
+    Result *= 16;
+    char C = *Buffer;
+    if (C >= '0' && C <= '9')
+      Result += C-'0';
+    else if (C >= 'A' && C <= 'F')
+      Result += C-'A'+10;
+    else if (C >= 'a' && C <= 'f')
+      Result += C-'a'+10;
+
+    if (Result < OldRes) {   // Uh, oh, overflow detected!!!
+      Error("constant bigger than 64 bits detected!");
+      return 0;
+    }
+  }
+  return Result;
+}
+
+void LLLexer::HexToIntPair(const char *Buffer, const char *End,
+                           uint64_t Pair[2]) {
+  Pair[0] = 0;
+  for (int i=0; i<16; i++, Buffer++) {
+    assert(Buffer != End);
+    Pair[0] *= 16;
+    char C = *Buffer;
+    if (C >= '0' && C <= '9')
+      Pair[0] += C-'0';
+    else if (C >= 'A' && C <= 'F')
+      Pair[0] += C-'A'+10;
+    else if (C >= 'a' && C <= 'f')
+      Pair[0] += C-'a'+10;
+  }
+  Pair[1] = 0;
+  for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
+    Pair[1] *= 16;
+    char C = *Buffer;
+    if (C >= '0' && C <= '9')
+      Pair[1] += C-'0';
+    else if (C >= 'A' && C <= 'F')
+      Pair[1] += C-'A'+10;
+    else if (C >= 'a' && C <= 'f')
+      Pair[1] += C-'a'+10;
+  }
+  if (Buffer != End)
+    Error("constant bigger than 128 bits detected!");
+}
+
+/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
+/// { low64, high16 } as usual for an APInt.
+void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
+                           uint64_t Pair[2]) {
+  Pair[1] = 0;
+  for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
+    assert(Buffer != End);
+    Pair[1] *= 16;
+    char C = *Buffer;
+    if (C >= '0' && C <= '9')
+      Pair[1] += C-'0';
+    else if (C >= 'A' && C <= 'F')
+      Pair[1] += C-'A'+10;
+    else if (C >= 'a' && C <= 'f')
+      Pair[1] += C-'a'+10;
+  }
+  Pair[0] = 0;
+  for (int i=0; i<16; i++, Buffer++) {
+    Pair[0] *= 16;
+    char C = *Buffer;
+    if (C >= '0' && C <= '9')
+      Pair[0] += C-'0';
+    else if (C >= 'A' && C <= 'F')
+      Pair[0] += C-'A'+10;
+    else if (C >= 'a' && C <= 'f')
+      Pair[0] += C-'a'+10;
+  }
+  if (Buffer != End)
+    Error("constant bigger than 128 bits detected!");
+}
+
+// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
+// appropriate character.
+static void UnEscapeLexed(std::string &Str) {
+  if (Str.empty()) return;
+
+  char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
+  char *BOut = Buffer;
+  for (char *BIn = Buffer; BIn != EndBuffer; ) {
+    if (BIn[0] == '\\') {
+      if (BIn < EndBuffer-1 && BIn[1] == '\\') {
+        *BOut++ = '\\'; // Two \ becomes one
+        BIn += 2;
+      } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
+        char Tmp = BIn[3]; BIn[3] = 0;      // Terminate string
+        *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number
+        BIn[3] = Tmp;                       // Restore character
+        BIn += 3;                           // Skip over handled chars
+        ++BOut;
+      } else {
+        *BOut++ = *BIn++;
+      }
+    } else {
+      *BOut++ = *BIn++;
+    }
+  }
+  Str.resize(BOut-Buffer);
+}
+
+/// isLabelChar - Return true for [-a-zA-Z$._0-9].
+static bool isLabelChar(char C) {
+  return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_';
+}
+
+
+/// isLabelTail - Return true if this pointer points to a valid end of a label.
+static const char *isLabelTail(const char *CurPtr) {
+  while (1) {
+    if (CurPtr[0] == ':') return CurPtr+1;
+    if (!isLabelChar(CurPtr[0])) return 0;
+    ++CurPtr;
+  }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Lexer definition.
+//===----------------------------------------------------------------------===//
+
+LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err,
+                 LLVMContext &C)
+  : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
+  CurPtr = CurBuf->getBufferStart();
+}
+
+std::string LLLexer::getFilename() const {
+  return CurBuf->getBufferIdentifier();
+}
+
+int LLLexer::getNextChar() {
+  char CurChar = *CurPtr++;
+  switch (CurChar) {
+  default: return (unsigned char)CurChar;
+  case 0:
+    // A nul character in the stream is either the end of the current buffer or
+    // a random nul in the file.  Disambiguate that here.
+    if (CurPtr-1 != CurBuf->getBufferEnd())
+      return 0;  // Just whitespace.
+
+    // Otherwise, return end of file.
+    --CurPtr;  // Another call to lex will return EOF again.
+    return EOF;
+  }
+}
+
+
+lltok::Kind LLLexer::LexToken() {
+  TokStart = CurPtr;
+
+  int CurChar = getNextChar();
+  switch (CurChar) {
+  default:
+    // Handle letters: [a-zA-Z_]
+    if (isalpha(CurChar) || CurChar == '_')
+      return LexIdentifier();
+
+    return lltok::Error;
+  case EOF: return lltok::Eof;
+  case 0:
+  case ' ':
+  case '\t':
+  case '\n':
+  case '\r':
+    // Ignore whitespace.
+    return LexToken();
+  case '+': return LexPositive();
+  case '@': return LexAt();
+  case '%': return LexPercent();
+  case '"': return LexQuote();
+  case '.':
+    if (const char *Ptr = isLabelTail(CurPtr)) {
+      CurPtr = Ptr;
+      StrVal.assign(TokStart, CurPtr-1);
+      return lltok::LabelStr;
+    }
+    if (CurPtr[0] == '.' && CurPtr[1] == '.') {
+      CurPtr += 2;
+      return lltok::dotdotdot;
+    }
+    return lltok::Error;
+  case '$':
+    if (const char *Ptr = isLabelTail(CurPtr)) {
+      CurPtr = Ptr;
+      StrVal.assign(TokStart, CurPtr-1);
+      return lltok::LabelStr;
+    }
+    return lltok::Error;
+  case ';':
+    SkipLineComment();
+    return LexToken();
+  case '!': return LexExclaim();
+  case '0': case '1': case '2': case '3': case '4':
+  case '5': case '6': case '7': case '8': case '9':
+  case '-':
+    return LexDigitOrNegative();
+  case '=': return lltok::equal;
+  case '[': return lltok::lsquare;
+  case ']': return lltok::rsquare;
+  case '{': return lltok::lbrace;
+  case '}': return lltok::rbrace;
+  case '<': return lltok::less;
+  case '>': return lltok::greater;
+  case '(': return lltok::lparen;
+  case ')': return lltok::rparen;
+  case ',': return lltok::comma;
+  case '*': return lltok::star;
+  case '\\': return lltok::backslash;
+  }
+}
+
+void LLLexer::SkipLineComment() {
+  while (1) {
+    if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
+      return;
+  }
+}
+
+/// LexAt - Lex all tokens that start with an @ character:
+///   GlobalVar   @\"[^\"]*\"
+///   GlobalVar   @[-a-zA-Z$._][-a-zA-Z$._0-9]*
+///   GlobalVarID @[0-9]+
+lltok::Kind LLLexer::LexAt() {
+  // Handle AtStringConstant: @\"[^\"]*\"
+  if (CurPtr[0] == '"') {
+    ++CurPtr;
+
+    while (1) {
+      int CurChar = getNextChar();
+
+      if (CurChar == EOF) {
+        Error("end of file in global variable name");
+        return lltok::Error;
+      }
+      if (CurChar == '"') {
+        StrVal.assign(TokStart+2, CurPtr-1);
+        UnEscapeLexed(StrVal);
+        return lltok::GlobalVar;
+      }
+    }
+  }
+
+  // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]*
+  if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+      CurPtr[0] == '.' || CurPtr[0] == '_') {
+    ++CurPtr;
+    while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+           CurPtr[0] == '.' || CurPtr[0] == '_')
+      ++CurPtr;
+
+    StrVal.assign(TokStart+1, CurPtr);   // Skip @
+    return lltok::GlobalVar;
+  }
+
+  // Handle GlobalVarID: @[0-9]+
+  if (isdigit(CurPtr[0])) {
+    for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
+      /*empty*/;
+
+    uint64_t Val = atoull(TokStart+1, CurPtr);
+    if ((unsigned)Val != Val)
+      Error("invalid value number (too large)!");
+    UIntVal = unsigned(Val);
+    return lltok::GlobalID;
+  }
+
+  return lltok::Error;
+}
+
+
+/// LexPercent - Lex all tokens that start with a % character:
+///   LocalVar   ::= %\"[^\"]*\"
+///   LocalVar   ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
+///   LocalVarID ::= %[0-9]+
+lltok::Kind LLLexer::LexPercent() {
+  // Handle LocalVarName: %\"[^\"]*\"
+  if (CurPtr[0] == '"') {
+    ++CurPtr;
+
+    while (1) {
+      int CurChar = getNextChar();
+
+      if (CurChar == EOF) {
+        Error("end of file in string constant");
+        return lltok::Error;
+      }
+      if (CurChar == '"') {
+        StrVal.assign(TokStart+2, CurPtr-1);
+        UnEscapeLexed(StrVal);
+        return lltok::LocalVar;
+      }
+    }
+  }
+
+  // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]*
+  if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+      CurPtr[0] == '.' || CurPtr[0] == '_') {
+    ++CurPtr;
+    while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+           CurPtr[0] == '.' || CurPtr[0] == '_')
+      ++CurPtr;
+
+    StrVal.assign(TokStart+1, CurPtr);   // Skip %
+    return lltok::LocalVar;
+  }
+
+  // Handle LocalVarID: %[0-9]+
+  if (isdigit(CurPtr[0])) {
+    for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
+      /*empty*/;
+
+    uint64_t Val = atoull(TokStart+1, CurPtr);
+    if ((unsigned)Val != Val)
+      Error("invalid value number (too large)!");
+    UIntVal = unsigned(Val);
+    return lltok::LocalVarID;
+  }
+
+  return lltok::Error;
+}
+
+/// LexQuote - Lex all tokens that start with a " character:
+///   QuoteLabel        "[^"]+":
+///   StringConstant    "[^"]*"
+lltok::Kind LLLexer::LexQuote() {
+  while (1) {
+    int CurChar = getNextChar();
+
+    if (CurChar == EOF) {
+      Error("end of file in quoted string");
+      return lltok::Error;
+    }
+
+    if (CurChar != '"') continue;
+
+    if (CurPtr[0] != ':') {
+      StrVal.assign(TokStart+1, CurPtr-1);
+      UnEscapeLexed(StrVal);
+      return lltok::StringConstant;
+    }
+
+    ++CurPtr;
+    StrVal.assign(TokStart+1, CurPtr-2);
+    UnEscapeLexed(StrVal);
+    return lltok::LabelStr;
+  }
+}
+
+static bool JustWhitespaceNewLine(const char *&Ptr) {
+  const char *ThisPtr = Ptr;
+  while (*ThisPtr == ' ' || *ThisPtr == '\t')
+    ++ThisPtr;
+  if (*ThisPtr == '\n' || *ThisPtr == '\r') {
+    Ptr = ThisPtr;
+    return true;
+  }
+  return false;
+}
+
+/// LexExclaim:
+///    !foo
+///    !
+lltok::Kind LLLexer::LexExclaim() {
+  // Lex a metadata name as a MetadataVar.
+  if (isalpha(CurPtr[0])) {
+    ++CurPtr;
+    while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+           CurPtr[0] == '.' || CurPtr[0] == '_')
+      ++CurPtr;
+
+    StrVal.assign(TokStart+1, CurPtr);   // Skip !
+    return lltok::MetadataVar;
+  }
+  return lltok::exclaim;
+}
+  
+/// LexIdentifier: Handle several related productions:
+///    Label           [-a-zA-Z$._0-9]+:
+///    IntegerType     i[0-9]+
+///    Keyword         sdiv, float, ...
+///    HexIntConstant  [us]0x[0-9A-Fa-f]+
+lltok::Kind LLLexer::LexIdentifier() {
+  const char *StartChar = CurPtr;
+  const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar;
+  const char *KeywordEnd = 0;
+
+  for (; isLabelChar(*CurPtr); ++CurPtr) {
+    // If we decide this is an integer, remember the end of the sequence.
+    if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr;
+    if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr;
+  }
+
+  // If we stopped due to a colon, this really is a label.
+  if (*CurPtr == ':') {
+    StrVal.assign(StartChar-1, CurPtr++);
+    return lltok::LabelStr;
+  }
+
+  // Otherwise, this wasn't a label.  If this was valid as an integer type,
+  // return it.
+  if (IntEnd == 0) IntEnd = CurPtr;
+  if (IntEnd != StartChar) {
+    CurPtr = IntEnd;
+    uint64_t NumBits = atoull(StartChar, CurPtr);
+    if (NumBits < IntegerType::MIN_INT_BITS ||
+        NumBits > IntegerType::MAX_INT_BITS) {
+      Error("bitwidth for integer type out of range!");
+      return lltok::Error;
+    }
+    TyVal = IntegerType::get(Context, NumBits);
+    return lltok::Type;
+  }
+
+  // Otherwise, this was a letter sequence.  See which keyword this is.
+  if (KeywordEnd == 0) KeywordEnd = CurPtr;
+  CurPtr = KeywordEnd;
+  --StartChar;
+  unsigned Len = CurPtr-StartChar;
+#define KEYWORD(STR) \
+  if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \
+    return lltok::kw_##STR;
+
+  KEYWORD(begin);   KEYWORD(end);
+  KEYWORD(true);    KEYWORD(false);
+  KEYWORD(declare); KEYWORD(define);
+  KEYWORD(global);  KEYWORD(constant);
+
+  KEYWORD(private);
+  KEYWORD(linker_private);
+  KEYWORD(linker_private_weak);
+  KEYWORD(linker_private_weak_def_auto);
+  KEYWORD(internal);
+  KEYWORD(available_externally);
+  KEYWORD(linkonce);
+  KEYWORD(linkonce_odr);
+  KEYWORD(weak);
+  KEYWORD(weak_odr);
+  KEYWORD(appending);
+  KEYWORD(dllimport);
+  KEYWORD(dllexport);
+  KEYWORD(common);
+  KEYWORD(default);
+  KEYWORD(hidden);
+  KEYWORD(protected);
+  KEYWORD(unnamed_addr);
+  KEYWORD(extern_weak);
+  KEYWORD(external);
+  KEYWORD(thread_local);
+  KEYWORD(zeroinitializer);
+  KEYWORD(undef);
+  KEYWORD(null);
+  KEYWORD(to);
+  KEYWORD(tail);
+  KEYWORD(target);
+  KEYWORD(triple);
+  KEYWORD(deplibs);
+  KEYWORD(datalayout);
+  KEYWORD(volatile);
+  KEYWORD(nuw);
+  KEYWORD(nsw);
+  KEYWORD(exact);
+  KEYWORD(inbounds);
+  KEYWORD(align);
+  KEYWORD(addrspace);
+  KEYWORD(section);
+  KEYWORD(alias);
+  KEYWORD(module);
+  KEYWORD(asm);
+  KEYWORD(sideeffect);
+  KEYWORD(alignstack);
+  KEYWORD(gc);
+
+  KEYWORD(ccc);
+  KEYWORD(fastcc);
+  KEYWORD(coldcc);
+  KEYWORD(x86_stdcallcc);
+  KEYWORD(x86_fastcallcc);
+  KEYWORD(x86_thiscallcc);
+  KEYWORD(arm_apcscc);
+  KEYWORD(arm_aapcscc);
+  KEYWORD(arm_aapcs_vfpcc);
+  KEYWORD(msp430_intrcc);
+  KEYWORD(ptx_kernel);
+  KEYWORD(ptx_device);
+
+  KEYWORD(cc);
+  KEYWORD(c);
+
+  KEYWORD(signext);
+  KEYWORD(zeroext);
+  KEYWORD(inreg);
+  KEYWORD(sret);
+  KEYWORD(nounwind);
+  KEYWORD(noreturn);
+  KEYWORD(noalias);
+  KEYWORD(nocapture);
+  KEYWORD(byval);
+  KEYWORD(nest);
+  KEYWORD(readnone);
+  KEYWORD(readonly);
+
+  KEYWORD(inlinehint);
+  KEYWORD(noinline);
+  KEYWORD(alwaysinline);
+  KEYWORD(optsize);
+  KEYWORD(ssp);
+  KEYWORD(sspreq);
+  KEYWORD(noredzone);
+  KEYWORD(noimplicitfloat);
+  KEYWORD(naked);
+  KEYWORD(hotpatch);
+
+  KEYWORD(type);
+  KEYWORD(opaque);
+
+  KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
+  KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
+  KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
+  KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
+
+  KEYWORD(x);
+  KEYWORD(blockaddress);
+#undef KEYWORD
+
+  // Keywords for types.
+#define TYPEKEYWORD(STR, LLVMTY) \
+  if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
+    TyVal = LLVMTY; return lltok::Type; }
+  TYPEKEYWORD("void",      Type::getVoidTy(Context));
+  TYPEKEYWORD("float",     Type::getFloatTy(Context));
+  TYPEKEYWORD("double",    Type::getDoubleTy(Context));
+  TYPEKEYWORD("x86_fp80",  Type::getX86_FP80Ty(Context));
+  TYPEKEYWORD("fp128",     Type::getFP128Ty(Context));
+  TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
+  TYPEKEYWORD("label",     Type::getLabelTy(Context));
+  TYPEKEYWORD("metadata",  Type::getMetadataTy(Context));
+  TYPEKEYWORD("x86_mmx",   Type::getX86_MMXTy(Context));
+#undef TYPEKEYWORD
+
+  // Handle special forms for autoupgrading.  Drop these in LLVM 3.0.  This is
+  // to avoid conflicting with the sext/zext instructions, below.
+  if (Len == 4 && !memcmp(StartChar, "sext", 4)) {
+    // Scan CurPtr ahead, seeing if there is just whitespace before the newline.
+    if (JustWhitespaceNewLine(CurPtr))
+      return lltok::kw_signext;
+  } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) {
+    // Scan CurPtr ahead, seeing if there is just whitespace before the newline.
+    if (JustWhitespaceNewLine(CurPtr))
+      return lltok::kw_zeroext;
+  } else if (Len == 6 && !memcmp(StartChar, "malloc", 6)) {
+    // FIXME: Remove in LLVM 3.0.
+    // Autoupgrade malloc instruction.
+    return lltok::kw_malloc;
+  } else if (Len == 4 && !memcmp(StartChar, "free", 4)) {
+    // FIXME: Remove in LLVM 3.0.
+    // Autoupgrade malloc instruction.
+    return lltok::kw_free;
+  }
+
+  // Keywords for instructions.
+#define INSTKEYWORD(STR, Enum) \
+  if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
+    UIntVal = Instruction::Enum; return lltok::kw_##STR; }
+
+  INSTKEYWORD(add,   Add);  INSTKEYWORD(fadd,   FAdd);
+  INSTKEYWORD(sub,   Sub);  INSTKEYWORD(fsub,   FSub);
+  INSTKEYWORD(mul,   Mul);  INSTKEYWORD(fmul,   FMul);
+  INSTKEYWORD(udiv,  UDiv); INSTKEYWORD(sdiv,  SDiv); INSTKEYWORD(fdiv,  FDiv);
+  INSTKEYWORD(urem,  URem); INSTKEYWORD(srem,  SRem); INSTKEYWORD(frem,  FRem);
+  INSTKEYWORD(shl,   Shl);  INSTKEYWORD(lshr,  LShr); INSTKEYWORD(ashr,  AShr);
+  INSTKEYWORD(and,   And);  INSTKEYWORD(or,    Or);   INSTKEYWORD(xor,   Xor);
+  INSTKEYWORD(icmp,  ICmp); INSTKEYWORD(fcmp,  FCmp);
+
+  INSTKEYWORD(phi,         PHI);
+  INSTKEYWORD(call,        Call);
+  INSTKEYWORD(trunc,       Trunc);
+  INSTKEYWORD(zext,        ZExt);
+  INSTKEYWORD(sext,        SExt);
+  INSTKEYWORD(fptrunc,     FPTrunc);
+  INSTKEYWORD(fpext,       FPExt);
+  INSTKEYWORD(uitofp,      UIToFP);
+  INSTKEYWORD(sitofp,      SIToFP);
+  INSTKEYWORD(fptoui,      FPToUI);
+  INSTKEYWORD(fptosi,      FPToSI);
+  INSTKEYWORD(inttoptr,    IntToPtr);
+  INSTKEYWORD(ptrtoint,    PtrToInt);
+  INSTKEYWORD(bitcast,     BitCast);
+  INSTKEYWORD(select,      Select);
+  INSTKEYWORD(va_arg,      VAArg);
+  INSTKEYWORD(ret,         Ret);
+  INSTKEYWORD(br,          Br);
+  INSTKEYWORD(switch,      Switch);
+  INSTKEYWORD(indirectbr,  IndirectBr);
+  INSTKEYWORD(invoke,      Invoke);
+  INSTKEYWORD(unwind,      Unwind);
+  INSTKEYWORD(unreachable, Unreachable);
+
+  INSTKEYWORD(alloca,      Alloca);
+  INSTKEYWORD(load,        Load);
+  INSTKEYWORD(store,       Store);
+  INSTKEYWORD(getelementptr, GetElementPtr);
+
+  INSTKEYWORD(extractelement, ExtractElement);
+  INSTKEYWORD(insertelement,  InsertElement);
+  INSTKEYWORD(shufflevector,  ShuffleVector);
+  INSTKEYWORD(getresult,      ExtractValue);
+  INSTKEYWORD(extractvalue,   ExtractValue);
+  INSTKEYWORD(insertvalue,    InsertValue);
+#undef INSTKEYWORD
+
+  // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
+  // the CFE to avoid forcing it to deal with 64-bit numbers.
+  if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
+      TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) {
+    int len = CurPtr-TokStart-3;
+    uint32_t bits = len * 4;
+    APInt Tmp(bits, StringRef(TokStart+3, len), 16);
+    uint32_t activeBits = Tmp.getActiveBits();
+    if (activeBits > 0 && activeBits < bits)
+      Tmp = Tmp.trunc(activeBits);
+    APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
+    return lltok::APSInt;
+  }
+
+  // If this is "cc1234", return this as just "cc".
+  if (TokStart[0] == 'c' && TokStart[1] == 'c') {
+    CurPtr = TokStart+2;
+    return lltok::kw_cc;
+  }
+
+  // If this starts with "call", return it as CALL.  This is to support old
+  // broken .ll files.  FIXME: remove this with LLVM 3.0.
+  if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) {
+    CurPtr = TokStart+4;
+    UIntVal = Instruction::Call;
+    return lltok::kw_call;
+  }
+
+  // Finally, if this isn't known, return an error.
+  CurPtr = TokStart+1;
+  return lltok::Error;
+}
+
+
+/// Lex0x: Handle productions that start with 0x, knowing that it matches and
+/// that this is not a label:
+///    HexFPConstant     0x[0-9A-Fa-f]+
+///    HexFP80Constant   0xK[0-9A-Fa-f]+
+///    HexFP128Constant  0xL[0-9A-Fa-f]+
+///    HexPPC128Constant 0xM[0-9A-Fa-f]+
+lltok::Kind LLLexer::Lex0x() {
+  CurPtr = TokStart + 2;
+
+  char Kind;
+  if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') {
+    Kind = *CurPtr++;
+  } else {
+    Kind = 'J';
+  }
+
+  if (!isxdigit(CurPtr[0])) {
+    // Bad token, return it as an error.
+    CurPtr = TokStart+1;
+    return lltok::Error;
+  }
+
+  while (isxdigit(CurPtr[0]))
+    ++CurPtr;
+
+  if (Kind == 'J') {
+    // HexFPConstant - Floating point constant represented in IEEE format as a
+    // hexadecimal number for when exponential notation is not precise enough.
+    // Float and double only.
+    APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
+    return lltok::APFloat;
+  }
+
+  uint64_t Pair[2];
+  switch (Kind) {
+  default: llvm_unreachable("Unknown kind!");
+  case 'K':
+    // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
+    FP80HexToIntPair(TokStart+3, CurPtr, Pair);
+    APFloatVal = APFloat(APInt(80, 2, Pair));
+    return lltok::APFloat;
+  case 'L':
+    // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
+    HexToIntPair(TokStart+3, CurPtr, Pair);
+    APFloatVal = APFloat(APInt(128, 2, Pair), true);
+    return lltok::APFloat;
+  case 'M':
+    // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
+    HexToIntPair(TokStart+3, CurPtr, Pair);
+    APFloatVal = APFloat(APInt(128, 2, Pair));
+    return lltok::APFloat;
+  }
+}
+
+/// LexIdentifier: Handle several related productions:
+///    Label             [-a-zA-Z$._0-9]+:
+///    NInteger          -[0-9]+
+///    FPConstant        [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
+///    PInteger          [0-9]+
+///    HexFPConstant     0x[0-9A-Fa-f]+
+///    HexFP80Constant   0xK[0-9A-Fa-f]+
+///    HexFP128Constant  0xL[0-9A-Fa-f]+
+///    HexPPC128Constant 0xM[0-9A-Fa-f]+
+lltok::Kind LLLexer::LexDigitOrNegative() {
+  // If the letter after the negative is a number, this is probably a label.
+  if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) {
+    // Okay, this is not a number after the -, it's probably a label.
+    if (const char *End = isLabelTail(CurPtr)) {
+      StrVal.assign(TokStart, End-1);
+      CurPtr = End;
+      return lltok::LabelStr;
+    }
+
+    return lltok::Error;
+  }
+
+  // At this point, it is either a label, int or fp constant.
+
+  // Skip digits, we have at least one.
+  for (; isdigit(CurPtr[0]); ++CurPtr)
+    /*empty*/;
+
+  // Check to see if this really is a label afterall, e.g. "-1:".
+  if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
+    if (const char *End = isLabelTail(CurPtr)) {
+      StrVal.assign(TokStart, End-1);
+      CurPtr = End;
+      return lltok::LabelStr;
+    }
+  }
+
+  // If the next character is a '.', then it is a fp value, otherwise its
+  // integer.
+  if (CurPtr[0] != '.') {
+    if (TokStart[0] == '0' && TokStart[1] == 'x')
+      return Lex0x();
+    unsigned Len = CurPtr-TokStart;
+    uint32_t numBits = ((Len * 64) / 19) + 2;
+    APInt Tmp(numBits, StringRef(TokStart, Len), 10);
+    if (TokStart[0] == '-') {
+      uint32_t minBits = Tmp.getMinSignedBits();
+      if (minBits > 0 && minBits < numBits)
+        Tmp = Tmp.trunc(minBits);
+      APSIntVal = APSInt(Tmp, false);
+    } else {
+      uint32_t activeBits = Tmp.getActiveBits();
+      if (activeBits > 0 && activeBits < numBits)
+        Tmp = Tmp.trunc(activeBits);
+      APSIntVal = APSInt(Tmp, true);
+    }
+    return lltok::APSInt;
+  }
+
+  ++CurPtr;
+
+  // Skip over [0-9]*([eE][-+]?[0-9]+)?
+  while (isdigit(CurPtr[0])) ++CurPtr;
+
+  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
+    if (isdigit(CurPtr[1]) ||
+        ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
+      CurPtr += 2;
+      while (isdigit(CurPtr[0])) ++CurPtr;
+    }
+  }
+
+  APFloatVal = APFloat(std::atof(TokStart));
+  return lltok::APFloat;
+}
+
+///    FPConstant  [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
+lltok::Kind LLLexer::LexPositive() {
+  // If the letter after the negative is a number, this is probably not a
+  // label.
+  if (!isdigit(CurPtr[0]))
+    return lltok::Error;
+
+  // Skip digits.
+  for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
+    /*empty*/;
+
+  // At this point, we need a '.'.
+  if (CurPtr[0] != '.') {
+    CurPtr = TokStart+1;
+    return lltok::Error;
+  }
+
+  ++CurPtr;
+
+  // Skip over [0-9]*([eE][-+]?[0-9]+)?
+  while (isdigit(CurPtr[0])) ++CurPtr;
+
+  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
+    if (isdigit(CurPtr[1]) ||
+        ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
+      CurPtr += 2;
+      while (isdigit(CurPtr[0])) ++CurPtr;
+    }
+  }
+
+  APFloatVal = APFloat(std::atof(TokStart));
+  return lltok::APFloat;
+}
diff --git a/final/lib/AsmParser/LLLexer.h b/final/lib/AsmParser/LLLexer.h
new file mode 100644
index 00000000000..09ae8017f40
--- /dev/null
+++ b/final/lib/AsmParser/LLLexer.h
@@ -0,0 +1,90 @@
+//===- LLLexer.h - Lexer for LLVM Assembly Files ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents the Lexer for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_ASMPARSER_LLLEXER_H
+#define LIB_ASMPARSER_LLLEXER_H
+
+#include "LLToken.h"
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/Support/SourceMgr.h"
+#include <string>
+
+namespace llvm {
+  class MemoryBuffer;
+  class Type;
+  class SMDiagnostic;
+  class LLVMContext;
+
+  class LLLexer {
+    const char *CurPtr;
+    MemoryBuffer *CurBuf;
+    SMDiagnostic &ErrorInfo;
+    SourceMgr &SM;
+    LLVMContext &Context;
+
+    // Information about the current token.
+    const char *TokStart;
+    lltok::Kind CurKind;
+    std::string StrVal;
+    unsigned UIntVal;
+    const Type *TyVal;
+    APFloat APFloatVal;
+    APSInt  APSIntVal;
+
+    std::string TheError;
+  public:
+    explicit LLLexer(MemoryBuffer *StartBuf, SourceMgr &SM, SMDiagnostic &,
+                     LLVMContext &C);
+    ~LLLexer() {}
+
+    lltok::Kind Lex() {
+      return CurKind = LexToken();
+    }
+
+    typedef SMLoc LocTy;
+    LocTy getLoc() const { return SMLoc::getFromPointer(TokStart); }
+    lltok::Kind getKind() const { return CurKind; }
+    const std::string &getStrVal() const { return StrVal; }
+    const Type *getTyVal() const { return TyVal; }
+    unsigned getUIntVal() const { return UIntVal; }
+    const APSInt &getAPSIntVal() const { return APSIntVal; }
+    const APFloat &getAPFloatVal() const { return APFloatVal; }
+
+
+    bool Error(LocTy L, const Twine &Msg) const;
+    bool Error(const Twine &Msg) const { return Error(getLoc(), Msg); }
+    std::string getFilename() const;
+
+  private:
+    lltok::Kind LexToken();
+
+    int getNextChar();
+    void SkipLineComment();
+    lltok::Kind LexIdentifier();
+    lltok::Kind LexDigitOrNegative();
+    lltok::Kind LexPositive();
+    lltok::Kind LexAt();
+    lltok::Kind LexExclaim();
+    lltok::Kind LexPercent();
+    lltok::Kind LexQuote();
+    lltok::Kind Lex0x();
+
+    uint64_t atoull(const char *Buffer, const char *End);
+    uint64_t HexIntToVal(const char *Buffer, const char *End);
+    void HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]);
+    void FP80HexToIntPair(const char *Buff, const char *End, uint64_t Pair[2]);
+  };
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/AsmParser/LLParser.cpp b/final/lib/AsmParser/LLParser.cpp
new file mode 100644
index 00000000000..fc10c040547
--- /dev/null
+++ b/final/lib/AsmParser/LLParser.cpp
@@ -0,0 +1,3976 @@
+//===-- LLParser.cpp - Parser Class ---------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the parser class for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLParser.h"
+#include "llvm/AutoUpgrade.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// Run: module ::= toplevelentity*
+bool LLParser::Run() {
+  // Prime the lexer.
+  Lex.Lex();
+
+  return ParseTopLevelEntities() ||
+         ValidateEndOfModule();
+}
+
+/// ValidateEndOfModule - Do final validity and sanity checks at the end of the
+/// module.
+bool LLParser::ValidateEndOfModule() {
+  // Handle any instruction metadata forward references.
+  if (!ForwardRefInstMetadata.empty()) {
+    for (DenseMap<Instruction*, std::vector<MDRef> >::iterator
+         I = ForwardRefInstMetadata.begin(), E = ForwardRefInstMetadata.end();
+         I != E; ++I) {
+      Instruction *Inst = I->first;
+      const std::vector<MDRef> &MDList = I->second;
+      
+      for (unsigned i = 0, e = MDList.size(); i != e; ++i) {
+        unsigned SlotNo = MDList[i].MDSlot;
+        
+        if (SlotNo >= NumberedMetadata.size() || NumberedMetadata[SlotNo] == 0)
+          return Error(MDList[i].Loc, "use of undefined metadata '!" +
+                       Twine(SlotNo) + "'");
+        Inst->setMetadata(MDList[i].MDKind, NumberedMetadata[SlotNo]);
+      }
+    }
+    ForwardRefInstMetadata.clear();
+  }
+  
+  
+  // Update auto-upgraded malloc calls to "malloc".
+  // FIXME: Remove in LLVM 3.0.
+  if (MallocF) {
+    MallocF->setName("malloc");
+    // If setName() does not set the name to "malloc", then there is already a 
+    // declaration of "malloc".  In that case, iterate over all calls to MallocF
+    // and get them to call the declared "malloc" instead.
+    if (MallocF->getName() != "malloc") {
+      Constant *RealMallocF = M->getFunction("malloc");
+      if (RealMallocF->getType() != MallocF->getType())
+        RealMallocF = ConstantExpr::getBitCast(RealMallocF, MallocF->getType());
+      MallocF->replaceAllUsesWith(RealMallocF);
+      MallocF->eraseFromParent();
+      MallocF = NULL;
+    }
+  }
+  
+  
+  // If there are entries in ForwardRefBlockAddresses at this point, they are
+  // references after the function was defined.  Resolve those now.
+  while (!ForwardRefBlockAddresses.empty()) {
+    // Okay, we are referencing an already-parsed function, resolve them now.
+    Function *TheFn = 0;
+    const ValID &Fn = ForwardRefBlockAddresses.begin()->first;
+    if (Fn.Kind == ValID::t_GlobalName)
+      TheFn = M->getFunction(Fn.StrVal);
+    else if (Fn.UIntVal < NumberedVals.size())
+      TheFn = dyn_cast<Function>(NumberedVals[Fn.UIntVal]);
+    
+    if (TheFn == 0)
+      return Error(Fn.Loc, "unknown function referenced by blockaddress");
+    
+    // Resolve all these references.
+    if (ResolveForwardRefBlockAddresses(TheFn, 
+                                      ForwardRefBlockAddresses.begin()->second,
+                                        0))
+      return true;
+    
+    ForwardRefBlockAddresses.erase(ForwardRefBlockAddresses.begin());
+  }
+  
+  
+  if (!ForwardRefTypes.empty())
+    return Error(ForwardRefTypes.begin()->second.second,
+                 "use of undefined type named '" +
+                 ForwardRefTypes.begin()->first + "'");
+  if (!ForwardRefTypeIDs.empty())
+    return Error(ForwardRefTypeIDs.begin()->second.second,
+                 "use of undefined type '%" +
+                 Twine(ForwardRefTypeIDs.begin()->first) + "'");
+
+  if (!ForwardRefVals.empty())
+    return Error(ForwardRefVals.begin()->second.second,
+                 "use of undefined value '@" + ForwardRefVals.begin()->first +
+                 "'");
+
+  if (!ForwardRefValIDs.empty())
+    return Error(ForwardRefValIDs.begin()->second.second,
+                 "use of undefined value '@" +
+                 Twine(ForwardRefValIDs.begin()->first) + "'");
+
+  if (!ForwardRefMDNodes.empty())
+    return Error(ForwardRefMDNodes.begin()->second.second,
+                 "use of undefined metadata '!" +
+                 Twine(ForwardRefMDNodes.begin()->first) + "'");
+
+
+  // Look for intrinsic functions and CallInst that need to be upgraded
+  for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
+    UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove
+
+  // Check debug info intrinsics.
+  CheckDebugInfoIntrinsics(M);
+  return false;
+}
+
+bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn, 
+                             std::vector<std::pair<ValID, GlobalValue*> > &Refs,
+                                               PerFunctionState *PFS) {
+  // Loop over all the references, resolving them.
+  for (unsigned i = 0, e = Refs.size(); i != e; ++i) {
+    BasicBlock *Res;
+    if (PFS) {
+      if (Refs[i].first.Kind == ValID::t_LocalName)
+        Res = PFS->GetBB(Refs[i].first.StrVal, Refs[i].first.Loc);
+      else
+        Res = PFS->GetBB(Refs[i].first.UIntVal, Refs[i].first.Loc);
+    } else if (Refs[i].first.Kind == ValID::t_LocalID) {
+      return Error(Refs[i].first.Loc,
+       "cannot take address of numeric label after the function is defined");
+    } else {
+      Res = dyn_cast_or_null<BasicBlock>(
+                     TheFn->getValueSymbolTable().lookup(Refs[i].first.StrVal));
+    }
+    
+    if (Res == 0)
+      return Error(Refs[i].first.Loc,
+                   "referenced value is not a basic block");
+    
+    // Get the BlockAddress for this and update references to use it.
+    BlockAddress *BA = BlockAddress::get(TheFn, Res);
+    Refs[i].second->replaceAllUsesWith(BA);
+    Refs[i].second->eraseFromParent();
+  }
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Top-Level Entities
+//===----------------------------------------------------------------------===//
+
+bool LLParser::ParseTopLevelEntities() {
+  while (1) {
+    switch (Lex.getKind()) {
+    default:         return TokError("expected top-level entity");
+    case lltok::Eof: return false;
+    //case lltok::kw_define:
+    case lltok::kw_declare: if (ParseDeclare()) return true; break;
+    case lltok::kw_define:  if (ParseDefine()) return true; break;
+    case lltok::kw_module:  if (ParseModuleAsm()) return true; break;
+    case lltok::kw_target:  if (ParseTargetDefinition()) return true; break;
+    case lltok::kw_deplibs: if (ParseDepLibs()) return true; break;
+    case lltok::kw_type:    if (ParseUnnamedType()) return true; break;
+    case lltok::LocalVarID: if (ParseUnnamedType()) return true; break;
+    case lltok::StringConstant: // FIXME: REMOVE IN LLVM 3.0
+    case lltok::LocalVar:   if (ParseNamedType()) return true; break;
+    case lltok::GlobalID:   if (ParseUnnamedGlobal()) return true; break;
+    case lltok::GlobalVar:  if (ParseNamedGlobal()) return true; break;
+    case lltok::exclaim:    if (ParseStandaloneMetadata()) return true; break;
+    case lltok::MetadataVar: if (ParseNamedMetadata()) return true; break;
+
+    // The Global variable production with no name can have many different
+    // optional leading prefixes, the production is:
+    // GlobalVar ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
+    //               OptionalAddrSpace OptionalUnNammedAddr
+    //               ('constant'|'global') ...
+    case lltok::kw_private:             // OptionalLinkage
+    case lltok::kw_linker_private:      // OptionalLinkage
+    case lltok::kw_linker_private_weak: // OptionalLinkage
+    case lltok::kw_linker_private_weak_def_auto: // OptionalLinkage
+    case lltok::kw_internal:            // OptionalLinkage
+    case lltok::kw_weak:                // OptionalLinkage
+    case lltok::kw_weak_odr:            // OptionalLinkage
+    case lltok::kw_linkonce:            // OptionalLinkage
+    case lltok::kw_linkonce_odr:        // OptionalLinkage
+    case lltok::kw_appending:           // OptionalLinkage
+    case lltok::kw_dllexport:           // OptionalLinkage
+    case lltok::kw_common:              // OptionalLinkage
+    case lltok::kw_dllimport:           // OptionalLinkage
+    case lltok::kw_extern_weak:         // OptionalLinkage
+    case lltok::kw_external: {          // OptionalLinkage
+      unsigned Linkage, Visibility;
+      if (ParseOptionalLinkage(Linkage) ||
+          ParseOptionalVisibility(Visibility) ||
+          ParseGlobal("", SMLoc(), Linkage, true, Visibility))
+        return true;
+      break;
+    }
+    case lltok::kw_default:       // OptionalVisibility
+    case lltok::kw_hidden:        // OptionalVisibility
+    case lltok::kw_protected: {   // OptionalVisibility
+      unsigned Visibility;
+      if (ParseOptionalVisibility(Visibility) ||
+          ParseGlobal("", SMLoc(), 0, false, Visibility))
+        return true;
+      break;
+    }
+
+    case lltok::kw_thread_local:  // OptionalThreadLocal
+    case lltok::kw_addrspace:     // OptionalAddrSpace
+    case lltok::kw_constant:      // GlobalType
+    case lltok::kw_global:        // GlobalType
+      if (ParseGlobal("", SMLoc(), 0, false, 0)) return true;
+      break;
+    }
+  }
+}
+
+
+/// toplevelentity
+///   ::= 'module' 'asm' STRINGCONSTANT
+bool LLParser::ParseModuleAsm() {
+  assert(Lex.getKind() == lltok::kw_module);
+  Lex.Lex();
+
+  std::string AsmStr;
+  if (ParseToken(lltok::kw_asm, "expected 'module asm'") ||
+      ParseStringConstant(AsmStr)) return true;
+
+  M->appendModuleInlineAsm(AsmStr);
+  return false;
+}
+
+/// toplevelentity
+///   ::= 'target' 'triple' '=' STRINGCONSTANT
+///   ::= 'target' 'datalayout' '=' STRINGCONSTANT
+bool LLParser::ParseTargetDefinition() {
+  assert(Lex.getKind() == lltok::kw_target);
+  std::string Str;
+  switch (Lex.Lex()) {
+  default: return TokError("unknown target property");
+  case lltok::kw_triple:
+    Lex.Lex();
+    if (ParseToken(lltok::equal, "expected '=' after target triple") ||
+        ParseStringConstant(Str))
+      return true;
+    M->setTargetTriple(Str);
+    return false;
+  case lltok::kw_datalayout:
+    Lex.Lex();
+    if (ParseToken(lltok::equal, "expected '=' after target datalayout") ||
+        ParseStringConstant(Str))
+      return true;
+    M->setDataLayout(Str);
+    return false;
+  }
+}
+
+/// toplevelentity
+///   ::= 'deplibs' '=' '[' ']'
+///   ::= 'deplibs' '=' '[' STRINGCONSTANT (',' STRINGCONSTANT)* ']'
+bool LLParser::ParseDepLibs() {
+  assert(Lex.getKind() == lltok::kw_deplibs);
+  Lex.Lex();
+  if (ParseToken(lltok::equal, "expected '=' after deplibs") ||
+      ParseToken(lltok::lsquare, "expected '=' after deplibs"))
+    return true;
+
+  if (EatIfPresent(lltok::rsquare))
+    return false;
+
+  std::string Str;
+  if (ParseStringConstant(Str)) return true;
+  M->addLibrary(Str);
+
+  while (EatIfPresent(lltok::comma)) {
+    if (ParseStringConstant(Str)) return true;
+    M->addLibrary(Str);
+  }
+
+  return ParseToken(lltok::rsquare, "expected ']' at end of list");
+}
+
+/// ParseUnnamedType:
+///   ::= 'type' type
+///   ::= LocalVarID '=' 'type' type
+bool LLParser::ParseUnnamedType() {
+  unsigned TypeID = NumberedTypes.size();
+
+  // Handle the LocalVarID form.
+  if (Lex.getKind() == lltok::LocalVarID) {
+    if (Lex.getUIntVal() != TypeID)
+      return Error(Lex.getLoc(), "type expected to be numbered '%" +
+                   Twine(TypeID) + "'");
+    Lex.Lex(); // eat LocalVarID;
+
+    if (ParseToken(lltok::equal, "expected '=' after name"))
+      return true;
+  }
+
+  LocTy TypeLoc = Lex.getLoc();
+  if (ParseToken(lltok::kw_type, "expected 'type' after '='")) return true;
+
+  PATypeHolder Ty(Type::getVoidTy(Context));
+  if (ParseType(Ty)) return true;
+
+  // See if this type was previously referenced.
+  std::map<unsigned, std::pair<PATypeHolder, LocTy> >::iterator
+    FI = ForwardRefTypeIDs.find(TypeID);
+  if (FI != ForwardRefTypeIDs.end()) {
+    if (FI->second.first.get() == Ty)
+      return Error(TypeLoc, "self referential type is invalid");
+
+    cast<DerivedType>(FI->second.first.get())->refineAbstractTypeTo(Ty);
+    Ty = FI->second.first.get();
+    ForwardRefTypeIDs.erase(FI);
+  }
+
+  NumberedTypes.push_back(Ty);
+
+  return false;
+}
+
+/// toplevelentity
+///   ::= LocalVar '=' 'type' type
+bool LLParser::ParseNamedType() {
+  std::string Name = Lex.getStrVal();
+  LocTy NameLoc = Lex.getLoc();
+  Lex.Lex();  // eat LocalVar.
+
+  PATypeHolder Ty(Type::getVoidTy(Context));
+
+  if (ParseToken(lltok::equal, "expected '=' after name") ||
+      ParseToken(lltok::kw_type, "expected 'type' after name") ||
+      ParseType(Ty))
+    return true;
+
+  // Set the type name, checking for conflicts as we do so.
+  bool AlreadyExists = M->addTypeName(Name, Ty);
+  if (!AlreadyExists) return false;
+
+  // See if this type is a forward reference.  We need to eagerly resolve
+  // types to allow recursive type redefinitions below.
+  std::map<std::string, std::pair<PATypeHolder, LocTy> >::iterator
+  FI = ForwardRefTypes.find(Name);
+  if (FI != ForwardRefTypes.end()) {
+    if (FI->second.first.get() == Ty)
+      return Error(NameLoc, "self referential type is invalid");
+
+    cast<DerivedType>(FI->second.first.get())->refineAbstractTypeTo(Ty);
+    Ty = FI->second.first.get();
+    ForwardRefTypes.erase(FI);
+  }
+
+  // Inserting a name that is already defined, get the existing name.
+  const Type *Existing = M->getTypeByName(Name);
+  assert(Existing && "Conflict but no matching type?!");
+
+  // Otherwise, this is an attempt to redefine a type. That's okay if
+  // the redefinition is identical to the original.
+  // FIXME: REMOVE REDEFINITIONS IN LLVM 3.0
+  if (Existing == Ty) return false;
+
+  // Any other kind of (non-equivalent) redefinition is an error.
+  return Error(NameLoc, "redefinition of type named '" + Name + "' of type '" +
+               Ty->getDescription() + "'");
+}
+
+
+/// toplevelentity
+///   ::= 'declare' FunctionHeader
+bool LLParser::ParseDeclare() {
+  assert(Lex.getKind() == lltok::kw_declare);
+  Lex.Lex();
+
+  Function *F;
+  return ParseFunctionHeader(F, false);
+}
+
+/// toplevelentity
+///   ::= 'define' FunctionHeader '{' ...
+bool LLParser::ParseDefine() {
+  assert(Lex.getKind() == lltok::kw_define);
+  Lex.Lex();
+
+  Function *F;
+  return ParseFunctionHeader(F, true) ||
+         ParseFunctionBody(*F);
+}
+
+/// ParseGlobalType
+///   ::= 'constant'
+///   ::= 'global'
+bool LLParser::ParseGlobalType(bool &IsConstant) {
+  if (Lex.getKind() == lltok::kw_constant)
+    IsConstant = true;
+  else if (Lex.getKind() == lltok::kw_global)
+    IsConstant = false;
+  else {
+    IsConstant = false;
+    return TokError("expected 'global' or 'constant'");
+  }
+  Lex.Lex();
+  return false;
+}
+
+/// ParseUnnamedGlobal:
+///   OptionalVisibility ALIAS ...
+///   OptionalLinkage OptionalVisibility ...   -> global variable
+///   GlobalID '=' OptionalVisibility ALIAS ...
+///   GlobalID '=' OptionalLinkage OptionalVisibility ...   -> global variable
+bool LLParser::ParseUnnamedGlobal() {
+  unsigned VarID = NumberedVals.size();
+  std::string Name;
+  LocTy NameLoc = Lex.getLoc();
+
+  // Handle the GlobalID form.
+  if (Lex.getKind() == lltok::GlobalID) {
+    if (Lex.getUIntVal() != VarID)
+      return Error(Lex.getLoc(), "variable expected to be numbered '%" +
+                   Twine(VarID) + "'");
+    Lex.Lex(); // eat GlobalID;
+
+    if (ParseToken(lltok::equal, "expected '=' after name"))
+      return true;
+  }
+
+  bool HasLinkage;
+  unsigned Linkage, Visibility;
+  if (ParseOptionalLinkage(Linkage, HasLinkage) ||
+      ParseOptionalVisibility(Visibility))
+    return true;
+
+  if (HasLinkage || Lex.getKind() != lltok::kw_alias)
+    return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility);
+  return ParseAlias(Name, NameLoc, Visibility);
+}
+
+/// ParseNamedGlobal:
+///   GlobalVar '=' OptionalVisibility ALIAS ...
+///   GlobalVar '=' OptionalLinkage OptionalVisibility ...   -> global variable
+bool LLParser::ParseNamedGlobal() {
+  assert(Lex.getKind() == lltok::GlobalVar);
+  LocTy NameLoc = Lex.getLoc();
+  std::string Name = Lex.getStrVal();
+  Lex.Lex();
+
+  bool HasLinkage;
+  unsigned Linkage, Visibility;
+  if (ParseToken(lltok::equal, "expected '=' in global variable") ||
+      ParseOptionalLinkage(Linkage, HasLinkage) ||
+      ParseOptionalVisibility(Visibility))
+    return true;
+
+  if (HasLinkage || Lex.getKind() != lltok::kw_alias)
+    return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility);
+  return ParseAlias(Name, NameLoc, Visibility);
+}
+
+// MDString:
+//   ::= '!' STRINGCONSTANT
+bool LLParser::ParseMDString(MDString *&Result) {
+  std::string Str;
+  if (ParseStringConstant(Str)) return true;
+  Result = MDString::get(Context, Str);
+  return false;
+}
+
+// MDNode:
+//   ::= '!' MDNodeNumber
+//
+/// This version of ParseMDNodeID returns the slot number and null in the case
+/// of a forward reference.
+bool LLParser::ParseMDNodeID(MDNode *&Result, unsigned &SlotNo) {
+  // !{ ..., !42, ... }
+  if (ParseUInt32(SlotNo)) return true;
+
+  // Check existing MDNode.
+  if (SlotNo < NumberedMetadata.size() && NumberedMetadata[SlotNo] != 0)
+    Result = NumberedMetadata[SlotNo];
+  else
+    Result = 0;
+  return false;
+}
+
+bool LLParser::ParseMDNodeID(MDNode *&Result) {
+  // !{ ..., !42, ... }
+  unsigned MID = 0;
+  if (ParseMDNodeID(Result, MID)) return true;
+
+  // If not a forward reference, just return it now.
+  if (Result) return false;
+
+  // Otherwise, create MDNode forward reference.
+  MDNode *FwdNode = MDNode::getTemporary(Context, 0, 0);
+  ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc());
+  
+  if (NumberedMetadata.size() <= MID)
+    NumberedMetadata.resize(MID+1);
+  NumberedMetadata[MID] = FwdNode;
+  Result = FwdNode;
+  return false;
+}
+
+/// ParseNamedMetadata:
+///   !foo = !{ !1, !2 }
+bool LLParser::ParseNamedMetadata() {
+  assert(Lex.getKind() == lltok::MetadataVar);
+  std::string Name = Lex.getStrVal();
+  Lex.Lex();
+
+  if (ParseToken(lltok::equal, "expected '=' here") ||
+      ParseToken(lltok::exclaim, "Expected '!' here") ||
+      ParseToken(lltok::lbrace, "Expected '{' here"))
+    return true;
+
+  NamedMDNode *NMD = M->getOrInsertNamedMetadata(Name);
+  if (Lex.getKind() != lltok::rbrace)
+    do {
+      if (ParseToken(lltok::exclaim, "Expected '!' here"))
+        return true;
+    
+      MDNode *N = 0;
+      if (ParseMDNodeID(N)) return true;
+      NMD->addOperand(N);
+    } while (EatIfPresent(lltok::comma));
+
+  if (ParseToken(lltok::rbrace, "expected end of metadata node"))
+    return true;
+
+  return false;
+}
+
+/// ParseStandaloneMetadata:
+///   !42 = !{...}
+bool LLParser::ParseStandaloneMetadata() {
+  assert(Lex.getKind() == lltok::exclaim);
+  Lex.Lex();
+  unsigned MetadataID = 0;
+
+  LocTy TyLoc;
+  PATypeHolder Ty(Type::getVoidTy(Context));
+  SmallVector<Value *, 16> Elts;
+  if (ParseUInt32(MetadataID) ||
+      ParseToken(lltok::equal, "expected '=' here") ||
+      ParseType(Ty, TyLoc) ||
+      ParseToken(lltok::exclaim, "Expected '!' here") ||
+      ParseToken(lltok::lbrace, "Expected '{' here") ||
+      ParseMDNodeVector(Elts, NULL) ||
+      ParseToken(lltok::rbrace, "expected end of metadata node"))
+    return true;
+
+  MDNode *Init = MDNode::get(Context, Elts.data(), Elts.size());
+  
+  // See if this was forward referenced, if so, handle it.
+  std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> >::iterator
+    FI = ForwardRefMDNodes.find(MetadataID);
+  if (FI != ForwardRefMDNodes.end()) {
+    MDNode *Temp = FI->second.first;
+    Temp->replaceAllUsesWith(Init);
+    MDNode::deleteTemporary(Temp);
+    ForwardRefMDNodes.erase(FI);
+    
+    assert(NumberedMetadata[MetadataID] == Init && "Tracking VH didn't work");
+  } else {
+    if (MetadataID >= NumberedMetadata.size())
+      NumberedMetadata.resize(MetadataID+1);
+
+    if (NumberedMetadata[MetadataID] != 0)
+      return TokError("Metadata id is already used");
+    NumberedMetadata[MetadataID] = Init;
+  }
+
+  return false;
+}
+
+/// ParseAlias:
+///   ::= GlobalVar '=' OptionalVisibility 'alias' OptionalLinkage Aliasee
+/// Aliasee
+///   ::= TypeAndValue
+///   ::= 'bitcast' '(' TypeAndValue 'to' Type ')'
+///   ::= 'getelementptr' 'inbounds'? '(' ... ')'
+///
+/// Everything through visibility has already been parsed.
+///
+bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
+                          unsigned Visibility) {
+  assert(Lex.getKind() == lltok::kw_alias);
+  Lex.Lex();
+  unsigned Linkage;
+  LocTy LinkageLoc = Lex.getLoc();
+  if (ParseOptionalLinkage(Linkage))
+    return true;
+
+  if (Linkage != GlobalValue::ExternalLinkage &&
+      Linkage != GlobalValue::WeakAnyLinkage &&
+      Linkage != GlobalValue::WeakODRLinkage &&
+      Linkage != GlobalValue::InternalLinkage &&
+      Linkage != GlobalValue::PrivateLinkage &&
+      Linkage != GlobalValue::LinkerPrivateLinkage &&
+      Linkage != GlobalValue::LinkerPrivateWeakLinkage &&
+      Linkage != GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+    return Error(LinkageLoc, "invalid linkage type for alias");
+
+  Constant *Aliasee;
+  LocTy AliaseeLoc = Lex.getLoc();
+  if (Lex.getKind() != lltok::kw_bitcast &&
+      Lex.getKind() != lltok::kw_getelementptr) {
+    if (ParseGlobalTypeAndValue(Aliasee)) return true;
+  } else {
+    // The bitcast dest type is not present, it is implied by the dest type.
+    ValID ID;
+    if (ParseValID(ID)) return true;
+    if (ID.Kind != ValID::t_Constant)
+      return Error(AliaseeLoc, "invalid aliasee");
+    Aliasee = ID.ConstantVal;
+  }
+
+  if (!Aliasee->getType()->isPointerTy())
+    return Error(AliaseeLoc, "alias must have pointer type");
+
+  // Okay, create the alias but do not insert it into the module yet.
+  GlobalAlias* GA = new GlobalAlias(Aliasee->getType(),
+                                    (GlobalValue::LinkageTypes)Linkage, Name,
+                                    Aliasee);
+  GA->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+
+  // See if this value already exists in the symbol table.  If so, it is either
+  // a redefinition or a definition of a forward reference.
+  if (GlobalValue *Val = M->getNamedValue(Name)) {
+    // See if this was a redefinition.  If so, there is no entry in
+    // ForwardRefVals.
+    std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator
+      I = ForwardRefVals.find(Name);
+    if (I == ForwardRefVals.end())
+      return Error(NameLoc, "redefinition of global named '@" + Name + "'");
+
+    // Otherwise, this was a definition of forward ref.  Verify that types
+    // agree.
+    if (Val->getType() != GA->getType())
+      return Error(NameLoc,
+              "forward reference and definition of alias have different types");
+
+    // If they agree, just RAUW the old value with the alias and remove the
+    // forward ref info.
+    Val->replaceAllUsesWith(GA);
+    Val->eraseFromParent();
+    ForwardRefVals.erase(I);
+  }
+
+  // Insert into the module, we know its name won't collide now.
+  M->getAliasList().push_back(GA);
+  assert(GA->getName() == Name && "Should not be a name conflict!");
+
+  return false;
+}
+
+/// ParseGlobal
+///   ::= GlobalVar '=' OptionalLinkage OptionalVisibility OptionalThreadLocal
+///       OptionalAddrSpace OptionalUnNammedAddr GlobalType Type Const
+///   ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
+///       OptionalAddrSpace OptionalUnNammedAddr GlobalType Type Const
+///
+/// Everything through visibility has been parsed already.
+///
+bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
+                           unsigned Linkage, bool HasLinkage,
+                           unsigned Visibility) {
+  unsigned AddrSpace;
+  bool ThreadLocal, IsConstant, UnnamedAddr;
+  LocTy UnnamedAddrLoc;
+  LocTy TyLoc;
+
+  PATypeHolder Ty(Type::getVoidTy(Context));
+  if (ParseOptionalToken(lltok::kw_thread_local, ThreadLocal) ||
+      ParseOptionalAddrSpace(AddrSpace) ||
+      ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
+                         &UnnamedAddrLoc) ||
+      ParseGlobalType(IsConstant) ||
+      ParseType(Ty, TyLoc))
+    return true;
+
+  // If the linkage is specified and is external, then no initializer is
+  // present.
+  Constant *Init = 0;
+  if (!HasLinkage || (Linkage != GlobalValue::DLLImportLinkage &&
+                      Linkage != GlobalValue::ExternalWeakLinkage &&
+                      Linkage != GlobalValue::ExternalLinkage)) {
+    if (ParseGlobalValue(Ty, Init))
+      return true;
+  }
+
+  if (Ty->isFunctionTy() || Ty->isLabelTy())
+    return Error(TyLoc, "invalid type for global variable");
+
+  GlobalVariable *GV = 0;
+
+  // See if the global was forward referenced, if so, use the global.
+  if (!Name.empty()) {
+    if (GlobalValue *GVal = M->getNamedValue(Name)) {
+      if (!ForwardRefVals.erase(Name) || !isa<GlobalValue>(GVal))
+        return Error(NameLoc, "redefinition of global '@" + Name + "'");
+      GV = cast<GlobalVariable>(GVal);
+    }
+  } else {
+    std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator
+      I = ForwardRefValIDs.find(NumberedVals.size());
+    if (I != ForwardRefValIDs.end()) {
+      GV = cast<GlobalVariable>(I->second.first);
+      ForwardRefValIDs.erase(I);
+    }
+  }
+
+  if (GV == 0) {
+    GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage, 0,
+                            Name, 0, false, AddrSpace);
+  } else {
+    if (GV->getType()->getElementType() != Ty)
+      return Error(TyLoc,
+            "forward reference and definition of global have different types");
+
+    // Move the forward-reference to the correct spot in the module.
+    M->getGlobalList().splice(M->global_end(), M->getGlobalList(), GV);
+  }
+
+  if (Name.empty())
+    NumberedVals.push_back(GV);
+
+  // Set the parsed properties on the global.
+  if (Init)
+    GV->setInitializer(Init);
+  GV->setConstant(IsConstant);
+  GV->setLinkage((GlobalValue::LinkageTypes)Linkage);
+  GV->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+  GV->setThreadLocal(ThreadLocal);
+  GV->setUnnamedAddr(UnnamedAddr);
+
+  // Parse attributes on the global.
+  while (Lex.getKind() == lltok::comma) {
+    Lex.Lex();
+
+    if (Lex.getKind() == lltok::kw_section) {
+      Lex.Lex();
+      GV->setSection(Lex.getStrVal());
+      if (ParseToken(lltok::StringConstant, "expected global section string"))
+        return true;
+    } else if (Lex.getKind() == lltok::kw_align) {
+      unsigned Alignment;
+      if (ParseOptionalAlignment(Alignment)) return true;
+      GV->setAlignment(Alignment);
+    } else {
+      TokError("unknown global variable property!");
+    }
+  }
+
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// GlobalValue Reference/Resolution Routines.
+//===----------------------------------------------------------------------===//
+
+/// GetGlobalVal - Get a value with the specified name or ID, creating a
+/// forward reference record if needed.  This can return null if the value
+/// exists but does not have the right type.
+GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
+                                    LocTy Loc) {
+  const PointerType *PTy = dyn_cast<PointerType>(Ty);
+  if (PTy == 0) {
+    Error(Loc, "global variable reference must have pointer type");
+    return 0;
+  }
+
+  // Look this name up in the normal function symbol table.
+  GlobalValue *Val =
+    cast_or_null<GlobalValue>(M->getValueSymbolTable().lookup(Name));
+
+  // If this is a forward reference for the value, see if we already created a
+  // forward ref record.
+  if (Val == 0) {
+    std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator
+      I = ForwardRefVals.find(Name);
+    if (I != ForwardRefVals.end())
+      Val = I->second.first;
+  }
+
+  // If we have the value in the symbol table or fwd-ref table, return it.
+  if (Val) {
+    if (Val->getType() == Ty) return Val;
+    Error(Loc, "'@" + Name + "' defined with type '" +
+          Val->getType()->getDescription() + "'");
+    return 0;
+  }
+
+  // Otherwise, create a new forward reference for this value and remember it.
+  GlobalValue *FwdVal;
+  if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) {
+    // Function types can return opaque but functions can't.
+    if (FT->getReturnType()->isOpaqueTy()) {
+      Error(Loc, "function may not return opaque type");
+      return 0;
+    }
+
+    FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
+  } else {
+    FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
+                                GlobalValue::ExternalWeakLinkage, 0, Name);
+  }
+
+  ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
+  return FwdVal;
+}
+
+GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
+  const PointerType *PTy = dyn_cast<PointerType>(Ty);
+  if (PTy == 0) {
+    Error(Loc, "global variable reference must have pointer type");
+    return 0;
+  }
+
+  GlobalValue *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0;
+
+  // If this is a forward reference for the value, see if we already created a
+  // forward ref record.
+  if (Val == 0) {
+    std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator
+      I = ForwardRefValIDs.find(ID);
+    if (I != ForwardRefValIDs.end())
+      Val = I->second.first;
+  }
+
+  // If we have the value in the symbol table or fwd-ref table, return it.
+  if (Val) {
+    if (Val->getType() == Ty) return Val;
+    Error(Loc, "'@" + Twine(ID) + "' defined with type '" +
+          Val->getType()->getDescription() + "'");
+    return 0;
+  }
+
+  // Otherwise, create a new forward reference for this value and remember it.
+  GlobalValue *FwdVal;
+  if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) {
+    // Function types can return opaque but functions can't.
+    if (FT->getReturnType()->isOpaqueTy()) {
+      Error(Loc, "function may not return opaque type");
+      return 0;
+    }
+    FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M);
+  } else {
+    FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
+                                GlobalValue::ExternalWeakLinkage, 0, "");
+  }
+
+  ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
+  return FwdVal;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Helper Routines.
+//===----------------------------------------------------------------------===//
+
+/// ParseToken - If the current token has the specified kind, eat it and return
+/// success.  Otherwise, emit the specified error and return failure.
+bool LLParser::ParseToken(lltok::Kind T, const char *ErrMsg) {
+  if (Lex.getKind() != T)
+    return TokError(ErrMsg);
+  Lex.Lex();
+  return false;
+}
+
+/// ParseStringConstant
+///   ::= StringConstant
+bool LLParser::ParseStringConstant(std::string &Result) {
+  if (Lex.getKind() != lltok::StringConstant)
+    return TokError("expected string constant");
+  Result = Lex.getStrVal();
+  Lex.Lex();
+  return false;
+}
+
+/// ParseUInt32
+///   ::= uint32
+bool LLParser::ParseUInt32(unsigned &Val) {
+  if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned())
+    return TokError("expected integer");
+  uint64_t Val64 = Lex.getAPSIntVal().getLimitedValue(0xFFFFFFFFULL+1);
+  if (Val64 != unsigned(Val64))
+    return TokError("expected 32-bit integer (too large)");
+  Val = Val64;
+  Lex.Lex();
+  return false;
+}
+
+
+/// ParseOptionalAddrSpace
+///   := /*empty*/
+///   := 'addrspace' '(' uint32 ')'
+bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
+  AddrSpace = 0;
+  if (!EatIfPresent(lltok::kw_addrspace))
+    return false;
+  return ParseToken(lltok::lparen, "expected '(' in address space") ||
+         ParseUInt32(AddrSpace) ||
+         ParseToken(lltok::rparen, "expected ')' in address space");
+}
+
+/// ParseOptionalAttrs - Parse a potentially empty attribute list.  AttrKind
+/// indicates what kind of attribute list this is: 0: function arg, 1: result,
+/// 2: function attr.
+/// 3: function arg after value: FIXME: REMOVE IN LLVM 3.0
+bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
+  Attrs = Attribute::None;
+  LocTy AttrLoc = Lex.getLoc();
+
+  while (1) {
+    switch (Lex.getKind()) {
+    case lltok::kw_sext:
+    case lltok::kw_zext:
+      // Treat these as signext/zeroext if they occur in the argument list after
+      // the value, as in "call i8 @foo(i8 10 sext)".  If they occur before the
+      // value, as in "call i8 @foo(i8 sext (" then it is part of a constant
+      // expr.
+      // FIXME: REMOVE THIS IN LLVM 3.0
+      if (AttrKind == 3) {
+        if (Lex.getKind() == lltok::kw_sext)
+          Attrs |= Attribute::SExt;
+        else
+          Attrs |= Attribute::ZExt;
+        break;
+      }
+      // FALL THROUGH.
+    default:  // End of attributes.
+      if (AttrKind != 2 && (Attrs & Attribute::FunctionOnly))
+        return Error(AttrLoc, "invalid use of function-only attribute");
+
+      if (AttrKind != 0 && AttrKind != 3 && (Attrs & Attribute::ParameterOnly))
+        return Error(AttrLoc, "invalid use of parameter-only attribute");
+
+      return false;
+    case lltok::kw_zeroext:         Attrs |= Attribute::ZExt; break;
+    case lltok::kw_signext:         Attrs |= Attribute::SExt; break;
+    case lltok::kw_inreg:           Attrs |= Attribute::InReg; break;
+    case lltok::kw_sret:            Attrs |= Attribute::StructRet; break;
+    case lltok::kw_noalias:         Attrs |= Attribute::NoAlias; break;
+    case lltok::kw_nocapture:       Attrs |= Attribute::NoCapture; break;
+    case lltok::kw_byval:           Attrs |= Attribute::ByVal; break;
+    case lltok::kw_nest:            Attrs |= Attribute::Nest; break;
+
+    case lltok::kw_noreturn:        Attrs |= Attribute::NoReturn; break;
+    case lltok::kw_nounwind:        Attrs |= Attribute::NoUnwind; break;
+    case lltok::kw_noinline:        Attrs |= Attribute::NoInline; break;
+    case lltok::kw_readnone:        Attrs |= Attribute::ReadNone; break;
+    case lltok::kw_readonly:        Attrs |= Attribute::ReadOnly; break;
+    case lltok::kw_inlinehint:      Attrs |= Attribute::InlineHint; break;
+    case lltok::kw_alwaysinline:    Attrs |= Attribute::AlwaysInline; break;
+    case lltok::kw_optsize:         Attrs |= Attribute::OptimizeForSize; break;
+    case lltok::kw_ssp:             Attrs |= Attribute::StackProtect; break;
+    case lltok::kw_sspreq:          Attrs |= Attribute::StackProtectReq; break;
+    case lltok::kw_noredzone:       Attrs |= Attribute::NoRedZone; break;
+    case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
+    case lltok::kw_naked:           Attrs |= Attribute::Naked; break;
+    case lltok::kw_hotpatch:        Attrs |= Attribute::Hotpatch; break;
+
+    case lltok::kw_alignstack: {
+      unsigned Alignment;
+      if (ParseOptionalStackAlignment(Alignment))
+        return true;
+      Attrs |= Attribute::constructStackAlignmentFromInt(Alignment);
+      continue;
+    }
+
+    case lltok::kw_align: {
+      unsigned Alignment;
+      if (ParseOptionalAlignment(Alignment))
+        return true;
+      Attrs |= Attribute::constructAlignmentFromInt(Alignment);
+      continue;
+    }
+
+    }
+    Lex.Lex();
+  }
+}
+
+/// ParseOptionalLinkage
+///   ::= /*empty*/
+///   ::= 'private'
+///   ::= 'linker_private'
+///   ::= 'linker_private_weak'
+///   ::= 'linker_private_weak_def_auto'
+///   ::= 'internal'
+///   ::= 'weak'
+///   ::= 'weak_odr'
+///   ::= 'linkonce'
+///   ::= 'linkonce_odr'
+///   ::= 'available_externally'
+///   ::= 'appending'
+///   ::= 'dllexport'
+///   ::= 'common'
+///   ::= 'dllimport'
+///   ::= 'extern_weak'
+///   ::= 'external'
+bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
+  HasLinkage = false;
+  switch (Lex.getKind()) {
+  default:                       Res=GlobalValue::ExternalLinkage; return false;
+  case lltok::kw_private:        Res = GlobalValue::PrivateLinkage;       break;
+  case lltok::kw_linker_private: Res = GlobalValue::LinkerPrivateLinkage; break;
+  case lltok::kw_linker_private_weak:
+    Res = GlobalValue::LinkerPrivateWeakLinkage;
+    break;
+  case lltok::kw_linker_private_weak_def_auto:
+    Res = GlobalValue::LinkerPrivateWeakDefAutoLinkage;
+    break;
+  case lltok::kw_internal:       Res = GlobalValue::InternalLinkage;      break;
+  case lltok::kw_weak:           Res = GlobalValue::WeakAnyLinkage;       break;
+  case lltok::kw_weak_odr:       Res = GlobalValue::WeakODRLinkage;       break;
+  case lltok::kw_linkonce:       Res = GlobalValue::LinkOnceAnyLinkage;   break;
+  case lltok::kw_linkonce_odr:   Res = GlobalValue::LinkOnceODRLinkage;   break;
+  case lltok::kw_available_externally:
+    Res = GlobalValue::AvailableExternallyLinkage;
+    break;
+  case lltok::kw_appending:      Res = GlobalValue::AppendingLinkage;     break;
+  case lltok::kw_dllexport:      Res = GlobalValue::DLLExportLinkage;     break;
+  case lltok::kw_common:         Res = GlobalValue::CommonLinkage;        break;
+  case lltok::kw_dllimport:      Res = GlobalValue::DLLImportLinkage;     break;
+  case lltok::kw_extern_weak:    Res = GlobalValue::ExternalWeakLinkage;  break;
+  case lltok::kw_external:       Res = GlobalValue::ExternalLinkage;      break;
+  }
+  Lex.Lex();
+  HasLinkage = true;
+  return false;
+}
+
+/// ParseOptionalVisibility
+///   ::= /*empty*/
+///   ::= 'default'
+///   ::= 'hidden'
+///   ::= 'protected'
+///
+bool LLParser::ParseOptionalVisibility(unsigned &Res) {
+  switch (Lex.getKind()) {
+  default:                  Res = GlobalValue::DefaultVisibility; return false;
+  case lltok::kw_default:   Res = GlobalValue::DefaultVisibility; break;
+  case lltok::kw_hidden:    Res = GlobalValue::HiddenVisibility; break;
+  case lltok::kw_protected: Res = GlobalValue::ProtectedVisibility; break;
+  }
+  Lex.Lex();
+  return false;
+}
+
+/// ParseOptionalCallingConv
+///   ::= /*empty*/
+///   ::= 'ccc'
+///   ::= 'fastcc'
+///   ::= 'coldcc'
+///   ::= 'x86_stdcallcc'
+///   ::= 'x86_fastcallcc'
+///   ::= 'x86_thiscallcc'
+///   ::= 'arm_apcscc'
+///   ::= 'arm_aapcscc'
+///   ::= 'arm_aapcs_vfpcc'
+///   ::= 'msp430_intrcc'
+///   ::= 'ptx_kernel'
+///   ::= 'ptx_device'
+///   ::= 'cc' UINT
+///
+bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
+  switch (Lex.getKind()) {
+  default:                       CC = CallingConv::C; return false;
+  case lltok::kw_ccc:            CC = CallingConv::C; break;
+  case lltok::kw_fastcc:         CC = CallingConv::Fast; break;
+  case lltok::kw_coldcc:         CC = CallingConv::Cold; break;
+  case lltok::kw_x86_stdcallcc:  CC = CallingConv::X86_StdCall; break;
+  case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
+  case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break;
+  case lltok::kw_arm_apcscc:     CC = CallingConv::ARM_APCS; break;
+  case lltok::kw_arm_aapcscc:    CC = CallingConv::ARM_AAPCS; break;
+  case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
+  case lltok::kw_msp430_intrcc:  CC = CallingConv::MSP430_INTR; break;
+  case lltok::kw_ptx_kernel:     CC = CallingConv::PTX_Kernel; break;
+  case lltok::kw_ptx_device:     CC = CallingConv::PTX_Device; break;
+  case lltok::kw_cc: {
+      unsigned ArbitraryCC;
+      Lex.Lex();
+      if (ParseUInt32(ArbitraryCC)) {
+        return true;
+      } else
+        CC = static_cast<CallingConv::ID>(ArbitraryCC);
+        return false;
+    }
+    break;
+  }
+
+  Lex.Lex();
+  return false;
+}
+
+/// ParseInstructionMetadata
+///   ::= !dbg !42 (',' !dbg !57)*
+bool LLParser::ParseInstructionMetadata(Instruction *Inst,
+                                        PerFunctionState *PFS) {
+  do {
+    if (Lex.getKind() != lltok::MetadataVar)
+      return TokError("expected metadata after comma");
+
+    std::string Name = Lex.getStrVal();
+    unsigned MDK = M->getMDKindID(Name.c_str());
+    Lex.Lex();
+
+    MDNode *Node;
+    SMLoc Loc = Lex.getLoc();
+
+    if (ParseToken(lltok::exclaim, "expected '!' here"))
+      return true;
+
+    // This code is similar to that of ParseMetadataValue, however it needs to
+    // have special-case code for a forward reference; see the comments on
+    // ForwardRefInstMetadata for details. Also, MDStrings are not supported
+    // at the top level here.
+    if (Lex.getKind() == lltok::lbrace) {
+      ValID ID;
+      if (ParseMetadataListValue(ID, PFS))
+        return true;
+      assert(ID.Kind == ValID::t_MDNode);
+      Inst->setMetadata(MDK, ID.MDNodeVal);
+    } else {
+      unsigned NodeID = 0;
+      if (ParseMDNodeID(Node, NodeID))
+        return true;
+      if (Node) {
+        // If we got the node, add it to the instruction.
+        Inst->setMetadata(MDK, Node);
+      } else {
+        MDRef R = { Loc, MDK, NodeID };
+        // Otherwise, remember that this should be resolved later.
+        ForwardRefInstMetadata[Inst].push_back(R);
+      }
+    }
+
+    // If this is the end of the list, we're done.
+  } while (EatIfPresent(lltok::comma));
+  return false;
+}
+
+/// ParseOptionalAlignment
+///   ::= /* empty */
+///   ::= 'align' 4
+bool LLParser::ParseOptionalAlignment(unsigned &Alignment) {
+  Alignment = 0;
+  if (!EatIfPresent(lltok::kw_align))
+    return false;
+  LocTy AlignLoc = Lex.getLoc();
+  if (ParseUInt32(Alignment)) return true;
+  if (!isPowerOf2_32(Alignment))
+    return Error(AlignLoc, "alignment is not a power of two");
+  if (Alignment > Value::MaximumAlignment)
+    return Error(AlignLoc, "huge alignments are not supported yet");
+  return false;
+}
+
+/// ParseOptionalCommaAlign
+///   ::= 
+///   ::= ',' align 4
+///
+/// This returns with AteExtraComma set to true if it ate an excess comma at the
+/// end.
+bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment,
+                                       bool &AteExtraComma) {
+  AteExtraComma = false;
+  while (EatIfPresent(lltok::comma)) {
+    // Metadata at the end is an early exit.
+    if (Lex.getKind() == lltok::MetadataVar) {
+      AteExtraComma = true;
+      return false;
+    }
+    
+    if (Lex.getKind() != lltok::kw_align)
+      return Error(Lex.getLoc(), "expected metadata or 'align'");
+
+    if (ParseOptionalAlignment(Alignment)) return true;
+  }
+
+  return false;
+}
+
+/// ParseOptionalStackAlignment
+///   ::= /* empty */
+///   ::= 'alignstack' '(' 4 ')'
+bool LLParser::ParseOptionalStackAlignment(unsigned &Alignment) {
+  Alignment = 0;
+  if (!EatIfPresent(lltok::kw_alignstack))
+    return false;
+  LocTy ParenLoc = Lex.getLoc();
+  if (!EatIfPresent(lltok::lparen))
+    return Error(ParenLoc, "expected '('");
+  LocTy AlignLoc = Lex.getLoc();
+  if (ParseUInt32(Alignment)) return true;
+  ParenLoc = Lex.getLoc();
+  if (!EatIfPresent(lltok::rparen))
+    return Error(ParenLoc, "expected ')'");
+  if (!isPowerOf2_32(Alignment))
+    return Error(AlignLoc, "stack alignment is not a power of two");
+  return false;
+}
+
+/// ParseIndexList - This parses the index list for an insert/extractvalue
+/// instruction.  This sets AteExtraComma in the case where we eat an extra
+/// comma at the end of the line and find that it is followed by metadata.
+/// Clients that don't allow metadata can call the version of this function that
+/// only takes one argument.
+///
+/// ParseIndexList
+///    ::=  (',' uint32)+
+///
+bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices,
+                              bool &AteExtraComma) {
+  AteExtraComma = false;
+  
+  if (Lex.getKind() != lltok::comma)
+    return TokError("expected ',' as start of index list");
+
+  while (EatIfPresent(lltok::comma)) {
+    if (Lex.getKind() == lltok::MetadataVar) {
+      AteExtraComma = true;
+      return false;
+    }
+    unsigned Idx = 0;
+    if (ParseUInt32(Idx)) return true;
+    Indices.push_back(Idx);
+  }
+
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Type Parsing.
+//===----------------------------------------------------------------------===//
+
+/// ParseType - Parse and resolve a full type.
+bool LLParser::ParseType(PATypeHolder &Result, bool AllowVoid) {
+  LocTy TypeLoc = Lex.getLoc();
+  if (ParseTypeRec(Result)) return true;
+
+  // Verify no unresolved uprefs.
+  if (!UpRefs.empty())
+    return Error(UpRefs.back().Loc, "invalid unresolved type up reference");
+
+  if (!AllowVoid && Result.get()->isVoidTy())
+    return Error(TypeLoc, "void type only allowed for function results");
+
+  return false;
+}
+
+/// HandleUpRefs - Every time we finish a new layer of types, this function is
+/// called.  It loops through the UpRefs vector, which is a list of the
+/// currently active types.  For each type, if the up-reference is contained in
+/// the newly completed type, we decrement the level count.  When the level
+/// count reaches zero, the up-referenced type is the type that is passed in:
+/// thus we can complete the cycle.
+///
+PATypeHolder LLParser::HandleUpRefs(const Type *ty) {
+  // If Ty isn't abstract, or if there are no up-references in it, then there is
+  // nothing to resolve here.
+  if (!ty->isAbstract() || UpRefs.empty()) return ty;
+
+  PATypeHolder Ty(ty);
+#if 0
+  dbgs() << "Type '" << Ty->getDescription()
+         << "' newly formed.  Resolving upreferences.\n"
+         << UpRefs.size() << " upreferences active!\n";
+#endif
+
+  // If we find any resolvable upreferences (i.e., those whose NestingLevel goes
+  // to zero), we resolve them all together before we resolve them to Ty.  At
+  // the end of the loop, if there is anything to resolve to Ty, it will be in
+  // this variable.
+  OpaqueType *TypeToResolve = 0;
+
+  for (unsigned i = 0; i != UpRefs.size(); ++i) {
+    // Determine if 'Ty' directly contains this up-references 'LastContainedTy'.
+    bool ContainsType =
+      std::find(Ty->subtype_begin(), Ty->subtype_end(),
+                UpRefs[i].LastContainedTy) != Ty->subtype_end();
+
+#if 0
+    dbgs() << "  UR#" << i << " - TypeContains(" << Ty->getDescription() << ", "
+           << UpRefs[i].LastContainedTy->getDescription() << ") = "
+           << (ContainsType ? "true" : "false")
+           << " level=" << UpRefs[i].NestingLevel << "\n";
+#endif
+    if (!ContainsType)
+      continue;
+
+    // Decrement level of upreference
+    unsigned Level = --UpRefs[i].NestingLevel;
+    UpRefs[i].LastContainedTy = Ty;
+
+    // If the Up-reference has a non-zero level, it shouldn't be resolved yet.
+    if (Level != 0)
+      continue;
+
+#if 0
+    dbgs() << "  * Resolving upreference for " << UpRefs[i].UpRefTy << "\n";
+#endif
+    if (!TypeToResolve)
+      TypeToResolve = UpRefs[i].UpRefTy;
+    else
+      UpRefs[i].UpRefTy->refineAbstractTypeTo(TypeToResolve);
+    UpRefs.erase(UpRefs.begin()+i);     // Remove from upreference list.
+    --i;                                // Do not skip the next element.
+  }
+
+  if (TypeToResolve)
+    TypeToResolve->refineAbstractTypeTo(Ty);
+
+  return Ty;
+}
+
+
+/// ParseTypeRec - The recursive function used to process the internal
+/// implementation details of types.
+bool LLParser::ParseTypeRec(PATypeHolder &Result) {
+  switch (Lex.getKind()) {
+  default:
+    return TokError("expected type");
+  case lltok::Type:
+    // TypeRec ::= 'float' | 'void' (etc)
+    Result = Lex.getTyVal();
+    Lex.Lex();
+    break;
+  case lltok::kw_opaque:
+    // TypeRec ::= 'opaque'
+    Result = OpaqueType::get(Context);
+    Lex.Lex();
+    break;
+  case lltok::lbrace:
+    // TypeRec ::= '{' ... '}'
+    if (ParseStructType(Result, false))
+      return true;
+    break;
+  case lltok::lsquare:
+    // TypeRec ::= '[' ... ']'
+    Lex.Lex(); // eat the lsquare.
+    if (ParseArrayVectorType(Result, false))
+      return true;
+    break;
+  case lltok::less: // Either vector or packed struct.
+    // TypeRec ::= '<' ... '>'
+    Lex.Lex();
+    if (Lex.getKind() == lltok::lbrace) {
+      if (ParseStructType(Result, true) ||
+          ParseToken(lltok::greater, "expected '>' at end of packed struct"))
+        return true;
+    } else if (ParseArrayVectorType(Result, true))
+      return true;
+    break;
+  case lltok::LocalVar:
+  case lltok::StringConstant:  // FIXME: REMOVE IN LLVM 3.0
+    // TypeRec ::= %foo
+    if (const Type *T = M->getTypeByName(Lex.getStrVal())) {
+      Result = T;
+    } else {
+      Result = OpaqueType::get(Context);
+      ForwardRefTypes.insert(std::make_pair(Lex.getStrVal(),
+                                            std::make_pair(Result,
+                                                           Lex.getLoc())));
+      M->addTypeName(Lex.getStrVal(), Result.get());
+    }
+    Lex.Lex();
+    break;
+
+  case lltok::LocalVarID:
+    // TypeRec ::= %4
+    if (Lex.getUIntVal() < NumberedTypes.size())
+      Result = NumberedTypes[Lex.getUIntVal()];
+    else {
+      std::map<unsigned, std::pair<PATypeHolder, LocTy> >::iterator
+        I = ForwardRefTypeIDs.find(Lex.getUIntVal());
+      if (I != ForwardRefTypeIDs.end())
+        Result = I->second.first;
+      else {
+        Result = OpaqueType::get(Context);
+        ForwardRefTypeIDs.insert(std::make_pair(Lex.getUIntVal(),
+                                                std::make_pair(Result,
+                                                               Lex.getLoc())));
+      }
+    }
+    Lex.Lex();
+    break;
+  case lltok::backslash: {
+    // TypeRec ::= '\' 4
+    Lex.Lex();
+    unsigned Val;
+    if (ParseUInt32(Val)) return true;
+    OpaqueType *OT = OpaqueType::get(Context); //Use temporary placeholder.
+    UpRefs.push_back(UpRefRecord(Lex.getLoc(), Val, OT));
+    Result = OT;
+    break;
+  }
+  }
+
+  // Parse the type suffixes.
+  while (1) {
+    switch (Lex.getKind()) {
+    // End of type.
+    default: return false;
+
+    // TypeRec ::= TypeRec '*'
+    case lltok::star:
+      if (Result.get()->isLabelTy())
+        return TokError("basic block pointers are invalid");
+      if (Result.get()->isVoidTy())
+        return TokError("pointers to void are invalid; use i8* instead");
+      if (!PointerType::isValidElementType(Result.get()))
+        return TokError("pointer to this type is invalid");
+      Result = HandleUpRefs(PointerType::getUnqual(Result.get()));
+      Lex.Lex();
+      break;
+
+    // TypeRec ::= TypeRec 'addrspace' '(' uint32 ')' '*'
+    case lltok::kw_addrspace: {
+      if (Result.get()->isLabelTy())
+        return TokError("basic block pointers are invalid");
+      if (Result.get()->isVoidTy())
+        return TokError("pointers to void are invalid; use i8* instead");
+      if (!PointerType::isValidElementType(Result.get()))
+        return TokError("pointer to this type is invalid");
+      unsigned AddrSpace;
+      if (ParseOptionalAddrSpace(AddrSpace) ||
+          ParseToken(lltok::star, "expected '*' in address space"))
+        return true;
+
+      Result = HandleUpRefs(PointerType::get(Result.get(), AddrSpace));
+      break;
+    }
+
+    /// Types '(' ArgTypeListI ')' OptFuncAttrs
+    case lltok::lparen:
+      if (ParseFunctionType(Result))
+        return true;
+      break;
+    }
+  }
+}
+
+/// ParseParameterList
+///    ::= '(' ')'
+///    ::= '(' Arg (',' Arg)* ')'
+///  Arg
+///    ::= Type OptionalAttributes Value OptionalAttributes
+bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
+                                  PerFunctionState &PFS) {
+  if (ParseToken(lltok::lparen, "expected '(' in call"))
+    return true;
+
+  while (Lex.getKind() != lltok::rparen) {
+    // If this isn't the first argument, we need a comma.
+    if (!ArgList.empty() &&
+        ParseToken(lltok::comma, "expected ',' in argument list"))
+      return true;
+
+    // Parse the argument.
+    LocTy ArgLoc;
+    PATypeHolder ArgTy(Type::getVoidTy(Context));
+    unsigned ArgAttrs1 = Attribute::None;
+    unsigned ArgAttrs2 = Attribute::None;
+    Value *V;
+    if (ParseType(ArgTy, ArgLoc))
+      return true;
+
+    // Otherwise, handle normal operands.
+    if (ParseOptionalAttrs(ArgAttrs1, 0) ||
+        ParseValue(ArgTy, V, PFS) ||
+        // FIXME: Should not allow attributes after the argument, remove this
+        // in LLVM 3.0.
+        ParseOptionalAttrs(ArgAttrs2, 3))
+      return true;
+    ArgList.push_back(ParamInfo(ArgLoc, V, ArgAttrs1|ArgAttrs2));
+  }
+
+  Lex.Lex();  // Lex the ')'.
+  return false;
+}
+
+
+
+/// ParseArgumentList - Parse the argument list for a function type or function
+/// prototype.  If 'inType' is true then we are parsing a FunctionType.
+///   ::= '(' ArgTypeListI ')'
+/// ArgTypeListI
+///   ::= /*empty*/
+///   ::= '...'
+///   ::= ArgTypeList ',' '...'
+///   ::= ArgType (',' ArgType)*
+///
+bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
+                                 bool &isVarArg, bool inType) {
+  isVarArg = false;
+  assert(Lex.getKind() == lltok::lparen);
+  Lex.Lex(); // eat the (.
+
+  if (Lex.getKind() == lltok::rparen) {
+    // empty
+  } else if (Lex.getKind() == lltok::dotdotdot) {
+    isVarArg = true;
+    Lex.Lex();
+  } else {
+    LocTy TypeLoc = Lex.getLoc();
+    PATypeHolder ArgTy(Type::getVoidTy(Context));
+    unsigned Attrs;
+    std::string Name;
+
+    // If we're parsing a type, use ParseTypeRec, because we allow recursive
+    // types (such as a function returning a pointer to itself).  If parsing a
+    // function prototype, we require fully resolved types.
+    if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) ||
+        ParseOptionalAttrs(Attrs, 0)) return true;
+
+    if (ArgTy->isVoidTy())
+      return Error(TypeLoc, "argument can not have void type");
+
+    if (Lex.getKind() == lltok::LocalVar ||
+        Lex.getKind() == lltok::StringConstant) { // FIXME: REMOVE IN LLVM 3.0
+      Name = Lex.getStrVal();
+      Lex.Lex();
+    }
+
+    if (!FunctionType::isValidArgumentType(ArgTy))
+      return Error(TypeLoc, "invalid type for function argument");
+
+    ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name));
+
+    while (EatIfPresent(lltok::comma)) {
+      // Handle ... at end of arg list.
+      if (EatIfPresent(lltok::dotdotdot)) {
+        isVarArg = true;
+        break;
+      }
+
+      // Otherwise must be an argument type.
+      TypeLoc = Lex.getLoc();
+      if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) ||
+          ParseOptionalAttrs(Attrs, 0)) return true;
+
+      if (ArgTy->isVoidTy())
+        return Error(TypeLoc, "argument can not have void type");
+
+      if (Lex.getKind() == lltok::LocalVar ||
+          Lex.getKind() == lltok::StringConstant) { // FIXME: REMOVE IN LLVM 3.0
+        Name = Lex.getStrVal();
+        Lex.Lex();
+      } else {
+        Name = "";
+      }
+
+      if (!ArgTy->isFirstClassType() && !ArgTy->isOpaqueTy())
+        return Error(TypeLoc, "invalid type for function argument");
+
+      ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name));
+    }
+  }
+
+  return ParseToken(lltok::rparen, "expected ')' at end of argument list");
+}
+
+/// ParseFunctionType
+///  ::= Type ArgumentList OptionalAttrs
+bool LLParser::ParseFunctionType(PATypeHolder &Result) {
+  assert(Lex.getKind() == lltok::lparen);
+
+  if (!FunctionType::isValidReturnType(Result))
+    return TokError("invalid function return type");
+
+  std::vector<ArgInfo> ArgList;
+  bool isVarArg;
+  unsigned Attrs;
+  if (ParseArgumentList(ArgList, isVarArg, true) ||
+      // FIXME: Allow, but ignore attributes on function types!
+      // FIXME: Remove in LLVM 3.0
+      ParseOptionalAttrs(Attrs, 2))
+    return true;
+
+  // Reject names on the arguments lists.
+  for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+    if (!ArgList[i].Name.empty())
+      return Error(ArgList[i].Loc, "argument name invalid in function type");
+    if (!ArgList[i].Attrs != 0) {
+      // Allow but ignore attributes on function types; this permits
+      // auto-upgrade.
+      // FIXME: REJECT ATTRIBUTES ON FUNCTION TYPES in LLVM 3.0
+    }
+  }
+
+  std::vector<const Type*> ArgListTy;
+  for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+    ArgListTy.push_back(ArgList[i].Type);
+
+  Result = HandleUpRefs(FunctionType::get(Result.get(),
+                                                ArgListTy, isVarArg));
+  return false;
+}
+
+/// ParseStructType: Handles packed and unpacked types.  </> parsed elsewhere.
+///   TypeRec
+///     ::= '{' '}'
+///     ::= '{' TypeRec (',' TypeRec)* '}'
+///     ::= '<' '{' '}' '>'
+///     ::= '<' '{' TypeRec (',' TypeRec)* '}' '>'
+bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
+  assert(Lex.getKind() == lltok::lbrace);
+  Lex.Lex(); // Consume the '{'
+
+  if (EatIfPresent(lltok::rbrace)) {
+    Result = StructType::get(Context, Packed);
+    return false;
+  }
+
+  std::vector<PATypeHolder> ParamsList;
+  LocTy EltTyLoc = Lex.getLoc();
+  if (ParseTypeRec(Result)) return true;
+  ParamsList.push_back(Result);
+
+  if (Result->isVoidTy())
+    return Error(EltTyLoc, "struct element can not have void type");
+  if (!StructType::isValidElementType(Result))
+    return Error(EltTyLoc, "invalid element type for struct");
+
+  while (EatIfPresent(lltok::comma)) {
+    EltTyLoc = Lex.getLoc();
+    if (ParseTypeRec(Result)) return true;
+
+    if (Result->isVoidTy())
+      return Error(EltTyLoc, "struct element can not have void type");
+    if (!StructType::isValidElementType(Result))
+      return Error(EltTyLoc, "invalid element type for struct");
+
+    ParamsList.push_back(Result);
+  }
+
+  if (ParseToken(lltok::rbrace, "expected '}' at end of struct"))
+    return true;
+
+  std::vector<const Type*> ParamsListTy;
+  for (unsigned i = 0, e = ParamsList.size(); i != e; ++i)
+    ParamsListTy.push_back(ParamsList[i].get());
+  Result = HandleUpRefs(StructType::get(Context, ParamsListTy, Packed));
+  return false;
+}
+
+/// ParseArrayVectorType - Parse an array or vector type, assuming the first
+/// token has already been consumed.
+///   TypeRec
+///     ::= '[' APSINTVAL 'x' Types ']'
+///     ::= '<' APSINTVAL 'x' Types '>'
+bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) {
+  if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned() ||
+      Lex.getAPSIntVal().getBitWidth() > 64)
+    return TokError("expected number in address space");
+
+  LocTy SizeLoc = Lex.getLoc();
+  uint64_t Size = Lex.getAPSIntVal().getZExtValue();
+  Lex.Lex();
+
+  if (ParseToken(lltok::kw_x, "expected 'x' after element count"))
+      return true;
+
+  LocTy TypeLoc = Lex.getLoc();
+  PATypeHolder EltTy(Type::getVoidTy(Context));
+  if (ParseTypeRec(EltTy)) return true;
+
+  if (EltTy->isVoidTy())
+    return Error(TypeLoc, "array and vector element type cannot be void");
+
+  if (ParseToken(isVector ? lltok::greater : lltok::rsquare,
+                 "expected end of sequential type"))
+    return true;
+
+  if (isVector) {
+    if (Size == 0)
+      return Error(SizeLoc, "zero element vector is illegal");
+    if ((unsigned)Size != Size)
+      return Error(SizeLoc, "size too large for vector");
+    if (!VectorType::isValidElementType(EltTy))
+      return Error(TypeLoc, "vector element type must be fp or integer");
+    Result = VectorType::get(EltTy, unsigned(Size));
+  } else {
+    if (!ArrayType::isValidElementType(EltTy))
+      return Error(TypeLoc, "invalid array element type");
+    Result = HandleUpRefs(ArrayType::get(EltTy, Size));
+  }
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Function Semantic Analysis.
+//===----------------------------------------------------------------------===//
+
+LLParser::PerFunctionState::PerFunctionState(LLParser &p, Function &f,
+                                             int functionNumber)
+  : P(p), F(f), FunctionNumber(functionNumber) {
+
+  // Insert unnamed arguments into the NumberedVals list.
+  for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end();
+       AI != E; ++AI)
+    if (!AI->hasName())
+      NumberedVals.push_back(AI);
+}
+
+LLParser::PerFunctionState::~PerFunctionState() {
+  // If there were any forward referenced non-basicblock values, delete them.
+  for (std::map<std::string, std::pair<Value*, LocTy> >::iterator
+       I = ForwardRefVals.begin(), E = ForwardRefVals.end(); I != E; ++I)
+    if (!isa<BasicBlock>(I->second.first)) {
+      I->second.first->replaceAllUsesWith(
+                           UndefValue::get(I->second.first->getType()));
+      delete I->second.first;
+      I->second.first = 0;
+    }
+
+  for (std::map<unsigned, std::pair<Value*, LocTy> >::iterator
+       I = ForwardRefValIDs.begin(), E = ForwardRefValIDs.end(); I != E; ++I)
+    if (!isa<BasicBlock>(I->second.first)) {
+      I->second.first->replaceAllUsesWith(
+                           UndefValue::get(I->second.first->getType()));
+      delete I->second.first;
+      I->second.first = 0;
+    }
+}
+
+bool LLParser::PerFunctionState::FinishFunction() {
+  // Check to see if someone took the address of labels in this block.
+  if (!P.ForwardRefBlockAddresses.empty()) {
+    ValID FunctionID;
+    if (!F.getName().empty()) {
+      FunctionID.Kind = ValID::t_GlobalName;
+      FunctionID.StrVal = F.getName();
+    } else {
+      FunctionID.Kind = ValID::t_GlobalID;
+      FunctionID.UIntVal = FunctionNumber;
+    }
+  
+    std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >::iterator
+      FRBAI = P.ForwardRefBlockAddresses.find(FunctionID);
+    if (FRBAI != P.ForwardRefBlockAddresses.end()) {
+      // Resolve all these references.
+      if (P.ResolveForwardRefBlockAddresses(&F, FRBAI->second, this))
+        return true;
+      
+      P.ForwardRefBlockAddresses.erase(FRBAI);
+    }
+  }
+  
+  if (!ForwardRefVals.empty())
+    return P.Error(ForwardRefVals.begin()->second.second,
+                   "use of undefined value '%" + ForwardRefVals.begin()->first +
+                   "'");
+  if (!ForwardRefValIDs.empty())
+    return P.Error(ForwardRefValIDs.begin()->second.second,
+                   "use of undefined value '%" +
+                   Twine(ForwardRefValIDs.begin()->first) + "'");
+  return false;
+}
+
+
+/// GetVal - Get a value with the specified name or ID, creating a
+/// forward reference record if needed.  This can return null if the value
+/// exists but does not have the right type.
+Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
+                                          const Type *Ty, LocTy Loc) {
+  // Look this name up in the normal function symbol table.
+  Value *Val = F.getValueSymbolTable().lookup(Name);
+
+  // If this is a forward reference for the value, see if we already created a
+  // forward ref record.
+  if (Val == 0) {
+    std::map<std::string, std::pair<Value*, LocTy> >::iterator
+      I = ForwardRefVals.find(Name);
+    if (I != ForwardRefVals.end())
+      Val = I->second.first;
+  }
+
+  // If we have the value in the symbol table or fwd-ref table, return it.
+  if (Val) {
+    if (Val->getType() == Ty) return Val;
+    if (Ty->isLabelTy())
+      P.Error(Loc, "'%" + Name + "' is not a basic block");
+    else
+      P.Error(Loc, "'%" + Name + "' defined with type '" +
+              Val->getType()->getDescription() + "'");
+    return 0;
+  }
+
+  // Don't make placeholders with invalid type.
+  if (!Ty->isFirstClassType() && !Ty->isOpaqueTy() && !Ty->isLabelTy()) {
+    P.Error(Loc, "invalid use of a non-first-class type");
+    return 0;
+  }
+
+  // Otherwise, create a new forward reference for this value and remember it.
+  Value *FwdVal;
+  if (Ty->isLabelTy())
+    FwdVal = BasicBlock::Create(F.getContext(), Name, &F);
+  else
+    FwdVal = new Argument(Ty, Name);
+
+  ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
+  return FwdVal;
+}
+
+Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty,
+                                          LocTy Loc) {
+  // Look this name up in the normal function symbol table.
+  Value *Val = ID < NumberedVals.size() ? NumberedVals[ID] : 0;
+
+  // If this is a forward reference for the value, see if we already created a
+  // forward ref record.
+  if (Val == 0) {
+    std::map<unsigned, std::pair<Value*, LocTy> >::iterator
+      I = ForwardRefValIDs.find(ID);
+    if (I != ForwardRefValIDs.end())
+      Val = I->second.first;
+  }
+
+  // If we have the value in the symbol table or fwd-ref table, return it.
+  if (Val) {
+    if (Val->getType() == Ty) return Val;
+    if (Ty->isLabelTy())
+      P.Error(Loc, "'%" + Twine(ID) + "' is not a basic block");
+    else
+      P.Error(Loc, "'%" + Twine(ID) + "' defined with type '" +
+              Val->getType()->getDescription() + "'");
+    return 0;
+  }
+
+  if (!Ty->isFirstClassType() && !Ty->isOpaqueTy() && !Ty->isLabelTy()) {
+    P.Error(Loc, "invalid use of a non-first-class type");
+    return 0;
+  }
+
+  // Otherwise, create a new forward reference for this value and remember it.
+  Value *FwdVal;
+  if (Ty->isLabelTy())
+    FwdVal = BasicBlock::Create(F.getContext(), "", &F);
+  else
+    FwdVal = new Argument(Ty);
+
+  ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
+  return FwdVal;
+}
+
+/// SetInstName - After an instruction is parsed and inserted into its
+/// basic block, this installs its name.
+bool LLParser::PerFunctionState::SetInstName(int NameID,
+                                             const std::string &NameStr,
+                                             LocTy NameLoc, Instruction *Inst) {
+  // If this instruction has void type, it cannot have a name or ID specified.
+  if (Inst->getType()->isVoidTy()) {
+    if (NameID != -1 || !NameStr.empty())
+      return P.Error(NameLoc, "instructions returning void cannot have a name");
+    return false;
+  }
+
+  // If this was a numbered instruction, verify that the instruction is the
+  // expected value and resolve any forward references.
+  if (NameStr.empty()) {
+    // If neither a name nor an ID was specified, just use the next ID.
+    if (NameID == -1)
+      NameID = NumberedVals.size();
+
+    if (unsigned(NameID) != NumberedVals.size())
+      return P.Error(NameLoc, "instruction expected to be numbered '%" +
+                     Twine(NumberedVals.size()) + "'");
+
+    std::map<unsigned, std::pair<Value*, LocTy> >::iterator FI =
+      ForwardRefValIDs.find(NameID);
+    if (FI != ForwardRefValIDs.end()) {
+      if (FI->second.first->getType() != Inst->getType())
+        return P.Error(NameLoc, "instruction forward referenced with type '" +
+                       FI->second.first->getType()->getDescription() + "'");
+      FI->second.first->replaceAllUsesWith(Inst);
+      delete FI->second.first;
+      ForwardRefValIDs.erase(FI);
+    }
+
+    NumberedVals.push_back(Inst);
+    return false;
+  }
+
+  // Otherwise, the instruction had a name.  Resolve forward refs and set it.
+  std::map<std::string, std::pair<Value*, LocTy> >::iterator
+    FI = ForwardRefVals.find(NameStr);
+  if (FI != ForwardRefVals.end()) {
+    if (FI->second.first->getType() != Inst->getType())
+      return P.Error(NameLoc, "instruction forward referenced with type '" +
+                     FI->second.first->getType()->getDescription() + "'");
+    FI->second.first->replaceAllUsesWith(Inst);
+    delete FI->second.first;
+    ForwardRefVals.erase(FI);
+  }
+
+  // Set the name on the instruction.
+  Inst->setName(NameStr);
+
+  if (Inst->getName() != NameStr)
+    return P.Error(NameLoc, "multiple definition of local value named '" +
+                   NameStr + "'");
+  return false;
+}
+
+/// GetBB - Get a basic block with the specified name or ID, creating a
+/// forward reference record if needed.
+BasicBlock *LLParser::PerFunctionState::GetBB(const std::string &Name,
+                                              LocTy Loc) {
+  return cast_or_null<BasicBlock>(GetVal(Name,
+                                        Type::getLabelTy(F.getContext()), Loc));
+}
+
+BasicBlock *LLParser::PerFunctionState::GetBB(unsigned ID, LocTy Loc) {
+  return cast_or_null<BasicBlock>(GetVal(ID,
+                                        Type::getLabelTy(F.getContext()), Loc));
+}
+
+/// DefineBB - Define the specified basic block, which is either named or
+/// unnamed.  If there is an error, this returns null otherwise it returns
+/// the block being defined.
+BasicBlock *LLParser::PerFunctionState::DefineBB(const std::string &Name,
+                                                 LocTy Loc) {
+  BasicBlock *BB;
+  if (Name.empty())
+    BB = GetBB(NumberedVals.size(), Loc);
+  else
+    BB = GetBB(Name, Loc);
+  if (BB == 0) return 0; // Already diagnosed error.
+
+  // Move the block to the end of the function.  Forward ref'd blocks are
+  // inserted wherever they happen to be referenced.
+  F.getBasicBlockList().splice(F.end(), F.getBasicBlockList(), BB);
+
+  // Remove the block from forward ref sets.
+  if (Name.empty()) {
+    ForwardRefValIDs.erase(NumberedVals.size());
+    NumberedVals.push_back(BB);
+  } else {
+    // BB forward references are already in the function symbol table.
+    ForwardRefVals.erase(Name);
+  }
+
+  return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Constants.
+//===----------------------------------------------------------------------===//
+
+/// ParseValID - Parse an abstract value that doesn't necessarily have a
+/// type implied.  For example, if we parse "4" we don't know what integer type
+/// it has.  The value will later be combined with its type and checked for
+/// sanity.  PFS is used to convert function-local operands of metadata (since
+/// metadata operands are not just parsed here but also converted to values).
+/// PFS can be null when we are not parsing metadata values inside a function.
+bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
+  ID.Loc = Lex.getLoc();
+  switch (Lex.getKind()) {
+  default: return TokError("expected value token");
+  case lltok::GlobalID:  // @42
+    ID.UIntVal = Lex.getUIntVal();
+    ID.Kind = ValID::t_GlobalID;
+    break;
+  case lltok::GlobalVar:  // @foo
+    ID.StrVal = Lex.getStrVal();
+    ID.Kind = ValID::t_GlobalName;
+    break;
+  case lltok::LocalVarID:  // %42
+    ID.UIntVal = Lex.getUIntVal();
+    ID.Kind = ValID::t_LocalID;
+    break;
+  case lltok::LocalVar:  // %foo
+  case lltok::StringConstant:  // "foo" - FIXME: REMOVE IN LLVM 3.0
+    ID.StrVal = Lex.getStrVal();
+    ID.Kind = ValID::t_LocalName;
+    break;
+  case lltok::exclaim:   // !42, !{...}, or !"foo"
+    return ParseMetadataValue(ID, PFS);
+  case lltok::APSInt:
+    ID.APSIntVal = Lex.getAPSIntVal();
+    ID.Kind = ValID::t_APSInt;
+    break;
+  case lltok::APFloat:
+    ID.APFloatVal = Lex.getAPFloatVal();
+    ID.Kind = ValID::t_APFloat;
+    break;
+  case lltok::kw_true:
+    ID.ConstantVal = ConstantInt::getTrue(Context);
+    ID.Kind = ValID::t_Constant;
+    break;
+  case lltok::kw_false:
+    ID.ConstantVal = ConstantInt::getFalse(Context);
+    ID.Kind = ValID::t_Constant;
+    break;
+  case lltok::kw_null: ID.Kind = ValID::t_Null; break;
+  case lltok::kw_undef: ID.Kind = ValID::t_Undef; break;
+  case lltok::kw_zeroinitializer: ID.Kind = ValID::t_Zero; break;
+
+  case lltok::lbrace: {
+    // ValID ::= '{' ConstVector '}'
+    Lex.Lex();
+    SmallVector<Constant*, 16> Elts;
+    if (ParseGlobalValueVector(Elts) ||
+        ParseToken(lltok::rbrace, "expected end of struct constant"))
+      return true;
+
+    ID.ConstantVal = ConstantStruct::get(Context, Elts.data(),
+                                         Elts.size(), false);
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+  case lltok::less: {
+    // ValID ::= '<' ConstVector '>'         --> Vector.
+    // ValID ::= '<' '{' ConstVector '}' '>' --> Packed Struct.
+    Lex.Lex();
+    bool isPackedStruct = EatIfPresent(lltok::lbrace);
+
+    SmallVector<Constant*, 16> Elts;
+    LocTy FirstEltLoc = Lex.getLoc();
+    if (ParseGlobalValueVector(Elts) ||
+        (isPackedStruct &&
+         ParseToken(lltok::rbrace, "expected end of packed struct")) ||
+        ParseToken(lltok::greater, "expected end of constant"))
+      return true;
+
+    if (isPackedStruct) {
+      ID.ConstantVal =
+        ConstantStruct::get(Context, Elts.data(), Elts.size(), true);
+      ID.Kind = ValID::t_Constant;
+      return false;
+    }
+
+    if (Elts.empty())
+      return Error(ID.Loc, "constant vector must not be empty");
+
+    if (!Elts[0]->getType()->isIntegerTy() &&
+        !Elts[0]->getType()->isFloatingPointTy())
+      return Error(FirstEltLoc,
+                   "vector elements must have integer or floating point type");
+
+    // Verify that all the vector elements have the same type.
+    for (unsigned i = 1, e = Elts.size(); i != e; ++i)
+      if (Elts[i]->getType() != Elts[0]->getType())
+        return Error(FirstEltLoc,
+                     "vector element #" + Twine(i) +
+                    " is not of type '" + Elts[0]->getType()->getDescription());
+
+    ID.ConstantVal = ConstantVector::get(Elts);
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+  case lltok::lsquare: {   // Array Constant
+    Lex.Lex();
+    SmallVector<Constant*, 16> Elts;
+    LocTy FirstEltLoc = Lex.getLoc();
+    if (ParseGlobalValueVector(Elts) ||
+        ParseToken(lltok::rsquare, "expected end of array constant"))
+      return true;
+
+    // Handle empty element.
+    if (Elts.empty()) {
+      // Use undef instead of an array because it's inconvenient to determine
+      // the element type at this point, there being no elements to examine.
+      ID.Kind = ValID::t_EmptyArray;
+      return false;
+    }
+
+    if (!Elts[0]->getType()->isFirstClassType())
+      return Error(FirstEltLoc, "invalid array element type: " +
+                   Elts[0]->getType()->getDescription());
+
+    ArrayType *ATy = ArrayType::get(Elts[0]->getType(), Elts.size());
+
+    // Verify all elements are correct type!
+    for (unsigned i = 0, e = Elts.size(); i != e; ++i) {
+      if (Elts[i]->getType() != Elts[0]->getType())
+        return Error(FirstEltLoc,
+                     "array element #" + Twine(i) +
+                     " is not of type '" +Elts[0]->getType()->getDescription());
+    }
+
+    ID.ConstantVal = ConstantArray::get(ATy, Elts.data(), Elts.size());
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+  case lltok::kw_c:  // c "foo"
+    Lex.Lex();
+    ID.ConstantVal = ConstantArray::get(Context, Lex.getStrVal(), false);
+    if (ParseToken(lltok::StringConstant, "expected string")) return true;
+    ID.Kind = ValID::t_Constant;
+    return false;
+
+  case lltok::kw_asm: {
+    // ValID ::= 'asm' SideEffect? AlignStack? STRINGCONSTANT ',' STRINGCONSTANT
+    bool HasSideEffect, AlignStack;
+    Lex.Lex();
+    if (ParseOptionalToken(lltok::kw_sideeffect, HasSideEffect) ||
+        ParseOptionalToken(lltok::kw_alignstack, AlignStack) ||
+        ParseStringConstant(ID.StrVal) ||
+        ParseToken(lltok::comma, "expected comma in inline asm expression") ||
+        ParseToken(lltok::StringConstant, "expected constraint string"))
+      return true;
+    ID.StrVal2 = Lex.getStrVal();
+    ID.UIntVal = unsigned(HasSideEffect) | (unsigned(AlignStack)<<1);
+    ID.Kind = ValID::t_InlineAsm;
+    return false;
+  }
+
+  case lltok::kw_blockaddress: {
+    // ValID ::= 'blockaddress' '(' @foo ',' %bar ')'
+    Lex.Lex();
+
+    ValID Fn, Label;
+    LocTy FnLoc, LabelLoc;
+    
+    if (ParseToken(lltok::lparen, "expected '(' in block address expression") ||
+        ParseValID(Fn) ||
+        ParseToken(lltok::comma, "expected comma in block address expression")||
+        ParseValID(Label) ||
+        ParseToken(lltok::rparen, "expected ')' in block address expression"))
+      return true;
+    
+    if (Fn.Kind != ValID::t_GlobalID && Fn.Kind != ValID::t_GlobalName)
+      return Error(Fn.Loc, "expected function name in blockaddress");
+    if (Label.Kind != ValID::t_LocalID && Label.Kind != ValID::t_LocalName)
+      return Error(Label.Loc, "expected basic block name in blockaddress");
+    
+    // Make a global variable as a placeholder for this reference.
+    GlobalVariable *FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context),
+                                           false, GlobalValue::InternalLinkage,
+                                                0, "");
+    ForwardRefBlockAddresses[Fn].push_back(std::make_pair(Label, FwdRef));
+    ID.ConstantVal = FwdRef;
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+      
+  case lltok::kw_trunc:
+  case lltok::kw_zext:
+  case lltok::kw_sext:
+  case lltok::kw_fptrunc:
+  case lltok::kw_fpext:
+  case lltok::kw_bitcast:
+  case lltok::kw_uitofp:
+  case lltok::kw_sitofp:
+  case lltok::kw_fptoui:
+  case lltok::kw_fptosi:
+  case lltok::kw_inttoptr:
+  case lltok::kw_ptrtoint: {
+    unsigned Opc = Lex.getUIntVal();
+    PATypeHolder DestTy(Type::getVoidTy(Context));
+    Constant *SrcVal;
+    Lex.Lex();
+    if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") ||
+        ParseGlobalTypeAndValue(SrcVal) ||
+        ParseToken(lltok::kw_to, "expected 'to' in constantexpr cast") ||
+        ParseType(DestTy) ||
+        ParseToken(lltok::rparen, "expected ')' at end of constantexpr cast"))
+      return true;
+    if (!CastInst::castIsValid((Instruction::CastOps)Opc, SrcVal, DestTy))
+      return Error(ID.Loc, "invalid cast opcode for cast from '" +
+                   SrcVal->getType()->getDescription() + "' to '" +
+                   DestTy->getDescription() + "'");
+    ID.ConstantVal = ConstantExpr::getCast((Instruction::CastOps)Opc,
+                                                 SrcVal, DestTy);
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+  case lltok::kw_extractvalue: {
+    Lex.Lex();
+    Constant *Val;
+    SmallVector<unsigned, 4> Indices;
+    if (ParseToken(lltok::lparen, "expected '(' in extractvalue constantexpr")||
+        ParseGlobalTypeAndValue(Val) ||
+        ParseIndexList(Indices) ||
+        ParseToken(lltok::rparen, "expected ')' in extractvalue constantexpr"))
+      return true;
+
+    if (!Val->getType()->isAggregateType())
+      return Error(ID.Loc, "extractvalue operand must be aggregate type");
+    if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(),
+                                          Indices.end()))
+      return Error(ID.Loc, "invalid indices for extractvalue");
+    ID.ConstantVal =
+      ConstantExpr::getExtractValue(Val, Indices.data(), Indices.size());
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+  case lltok::kw_insertvalue: {
+    Lex.Lex();
+    Constant *Val0, *Val1;
+    SmallVector<unsigned, 4> Indices;
+    if (ParseToken(lltok::lparen, "expected '(' in insertvalue constantexpr")||
+        ParseGlobalTypeAndValue(Val0) ||
+        ParseToken(lltok::comma, "expected comma in insertvalue constantexpr")||
+        ParseGlobalTypeAndValue(Val1) ||
+        ParseIndexList(Indices) ||
+        ParseToken(lltok::rparen, "expected ')' in insertvalue constantexpr"))
+      return true;
+    if (!Val0->getType()->isAggregateType())
+      return Error(ID.Loc, "insertvalue operand must be aggregate type");
+    if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
+                                          Indices.end()))
+      return Error(ID.Loc, "invalid indices for insertvalue");
+    ID.ConstantVal = ConstantExpr::getInsertValue(Val0, Val1,
+                       Indices.data(), Indices.size());
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+  case lltok::kw_icmp:
+  case lltok::kw_fcmp: {
+    unsigned PredVal, Opc = Lex.getUIntVal();
+    Constant *Val0, *Val1;
+    Lex.Lex();
+    if (ParseCmpPredicate(PredVal, Opc) ||
+        ParseToken(lltok::lparen, "expected '(' in compare constantexpr") ||
+        ParseGlobalTypeAndValue(Val0) ||
+        ParseToken(lltok::comma, "expected comma in compare constantexpr") ||
+        ParseGlobalTypeAndValue(Val1) ||
+        ParseToken(lltok::rparen, "expected ')' in compare constantexpr"))
+      return true;
+
+    if (Val0->getType() != Val1->getType())
+      return Error(ID.Loc, "compare operands must have the same type");
+
+    CmpInst::Predicate Pred = (CmpInst::Predicate)PredVal;
+
+    if (Opc == Instruction::FCmp) {
+      if (!Val0->getType()->isFPOrFPVectorTy())
+        return Error(ID.Loc, "fcmp requires floating point operands");
+      ID.ConstantVal = ConstantExpr::getFCmp(Pred, Val0, Val1);
+    } else {
+      assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!");
+      if (!Val0->getType()->isIntOrIntVectorTy() &&
+          !Val0->getType()->isPointerTy())
+        return Error(ID.Loc, "icmp requires pointer or integer operands");
+      ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
+    }
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+
+  // Binary Operators.
+  case lltok::kw_add:
+  case lltok::kw_fadd:
+  case lltok::kw_sub:
+  case lltok::kw_fsub:
+  case lltok::kw_mul:
+  case lltok::kw_fmul:
+  case lltok::kw_udiv:
+  case lltok::kw_sdiv:
+  case lltok::kw_fdiv:
+  case lltok::kw_urem:
+  case lltok::kw_srem:
+  case lltok::kw_frem:
+  case lltok::kw_shl:
+  case lltok::kw_lshr:
+  case lltok::kw_ashr: {
+    bool NUW = false;
+    bool NSW = false;
+    bool Exact = false;
+    unsigned Opc = Lex.getUIntVal();
+    Constant *Val0, *Val1;
+    Lex.Lex();
+    LocTy ModifierLoc = Lex.getLoc();
+    if (Opc == Instruction::Add || Opc == Instruction::Sub ||
+        Opc == Instruction::Mul || Opc == Instruction::Shl) {
+      if (EatIfPresent(lltok::kw_nuw))
+        NUW = true;
+      if (EatIfPresent(lltok::kw_nsw)) {
+        NSW = true;
+        if (EatIfPresent(lltok::kw_nuw))
+          NUW = true;
+      }
+    } else if (Opc == Instruction::SDiv || Opc == Instruction::UDiv ||
+               Opc == Instruction::LShr || Opc == Instruction::AShr) {
+      if (EatIfPresent(lltok::kw_exact))
+        Exact = true;
+    }
+    if (ParseToken(lltok::lparen, "expected '(' in binary constantexpr") ||
+        ParseGlobalTypeAndValue(Val0) ||
+        ParseToken(lltok::comma, "expected comma in binary constantexpr") ||
+        ParseGlobalTypeAndValue(Val1) ||
+        ParseToken(lltok::rparen, "expected ')' in binary constantexpr"))
+      return true;
+    if (Val0->getType() != Val1->getType())
+      return Error(ID.Loc, "operands of constexpr must have same type");
+    if (!Val0->getType()->isIntOrIntVectorTy()) {
+      if (NUW)
+        return Error(ModifierLoc, "nuw only applies to integer operations");
+      if (NSW)
+        return Error(ModifierLoc, "nsw only applies to integer operations");
+    }
+    // Check that the type is valid for the operator.
+    switch (Opc) {
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::Shl:
+    case Instruction::AShr:
+    case Instruction::LShr:
+      if (!Val0->getType()->isIntOrIntVectorTy())
+        return Error(ID.Loc, "constexpr requires integer operands");
+      break;
+    case Instruction::FAdd:
+    case Instruction::FSub:
+    case Instruction::FMul:
+    case Instruction::FDiv:
+    case Instruction::FRem:
+      if (!Val0->getType()->isFPOrFPVectorTy())
+        return Error(ID.Loc, "constexpr requires fp operands");
+      break;
+    default: llvm_unreachable("Unknown binary operator!");
+    }
+    unsigned Flags = 0;
+    if (NUW)   Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
+    if (NSW)   Flags |= OverflowingBinaryOperator::NoSignedWrap;
+    if (Exact) Flags |= PossiblyExactOperator::IsExact;
+    Constant *C = ConstantExpr::get(Opc, Val0, Val1, Flags);
+    ID.ConstantVal = C;
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+
+  // Logical Operations
+  case lltok::kw_and:
+  case lltok::kw_or:
+  case lltok::kw_xor: {
+    unsigned Opc = Lex.getUIntVal();
+    Constant *Val0, *Val1;
+    Lex.Lex();
+    if (ParseToken(lltok::lparen, "expected '(' in logical constantexpr") ||
+        ParseGlobalTypeAndValue(Val0) ||
+        ParseToken(lltok::comma, "expected comma in logical constantexpr") ||
+        ParseGlobalTypeAndValue(Val1) ||
+        ParseToken(lltok::rparen, "expected ')' in logical constantexpr"))
+      return true;
+    if (Val0->getType() != Val1->getType())
+      return Error(ID.Loc, "operands of constexpr must have same type");
+    if (!Val0->getType()->isIntOrIntVectorTy())
+      return Error(ID.Loc,
+                   "constexpr requires integer or integer vector operands");
+    ID.ConstantVal = ConstantExpr::get(Opc, Val0, Val1);
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+
+  case lltok::kw_getelementptr:
+  case lltok::kw_shufflevector:
+  case lltok::kw_insertelement:
+  case lltok::kw_extractelement:
+  case lltok::kw_select: {
+    unsigned Opc = Lex.getUIntVal();
+    SmallVector<Constant*, 16> Elts;
+    bool InBounds = false;
+    Lex.Lex();
+    if (Opc == Instruction::GetElementPtr)
+      InBounds = EatIfPresent(lltok::kw_inbounds);
+    if (ParseToken(lltok::lparen, "expected '(' in constantexpr") ||
+        ParseGlobalValueVector(Elts) ||
+        ParseToken(lltok::rparen, "expected ')' in constantexpr"))
+      return true;
+
+    if (Opc == Instruction::GetElementPtr) {
+      if (Elts.size() == 0 || !Elts[0]->getType()->isPointerTy())
+        return Error(ID.Loc, "getelementptr requires pointer operand");
+
+      if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(),
+                                             (Value**)(Elts.data() + 1),
+                                             Elts.size() - 1))
+        return Error(ID.Loc, "invalid indices for getelementptr");
+      ID.ConstantVal = InBounds ?
+        ConstantExpr::getInBoundsGetElementPtr(Elts[0],
+                                               Elts.data() + 1,
+                                               Elts.size() - 1) :
+        ConstantExpr::getGetElementPtr(Elts[0],
+                                       Elts.data() + 1, Elts.size() - 1);
+    } else if (Opc == Instruction::Select) {
+      if (Elts.size() != 3)
+        return Error(ID.Loc, "expected three operands to select");
+      if (const char *Reason = SelectInst::areInvalidOperands(Elts[0], Elts[1],
+                                                              Elts[2]))
+        return Error(ID.Loc, Reason);
+      ID.ConstantVal = ConstantExpr::getSelect(Elts[0], Elts[1], Elts[2]);
+    } else if (Opc == Instruction::ShuffleVector) {
+      if (Elts.size() != 3)
+        return Error(ID.Loc, "expected three operands to shufflevector");
+      if (!ShuffleVectorInst::isValidOperands(Elts[0], Elts[1], Elts[2]))
+        return Error(ID.Loc, "invalid operands to shufflevector");
+      ID.ConstantVal =
+                 ConstantExpr::getShuffleVector(Elts[0], Elts[1],Elts[2]);
+    } else if (Opc == Instruction::ExtractElement) {
+      if (Elts.size() != 2)
+        return Error(ID.Loc, "expected two operands to extractelement");
+      if (!ExtractElementInst::isValidOperands(Elts[0], Elts[1]))
+        return Error(ID.Loc, "invalid extractelement operands");
+      ID.ConstantVal = ConstantExpr::getExtractElement(Elts[0], Elts[1]);
+    } else {
+      assert(Opc == Instruction::InsertElement && "Unknown opcode");
+      if (Elts.size() != 3)
+      return Error(ID.Loc, "expected three operands to insertelement");
+      if (!InsertElementInst::isValidOperands(Elts[0], Elts[1], Elts[2]))
+        return Error(ID.Loc, "invalid insertelement operands");
+      ID.ConstantVal =
+                 ConstantExpr::getInsertElement(Elts[0], Elts[1],Elts[2]);
+    }
+
+    ID.Kind = ValID::t_Constant;
+    return false;
+  }
+  }
+
+  Lex.Lex();
+  return false;
+}
+
+/// ParseGlobalValue - Parse a global value with the specified type.
+bool LLParser::ParseGlobalValue(const Type *Ty, Constant *&C) {
+  C = 0;
+  ValID ID;
+  Value *V = NULL;
+  bool Parsed = ParseValID(ID) ||
+                ConvertValIDToValue(Ty, ID, V, NULL);
+  if (V && !(C = dyn_cast<Constant>(V)))
+    return Error(ID.Loc, "global values must be constants");
+  return Parsed;
+}
+
+bool LLParser::ParseGlobalTypeAndValue(Constant *&V) {
+  PATypeHolder Type(Type::getVoidTy(Context));
+  return ParseType(Type) ||
+         ParseGlobalValue(Type, V);
+}
+
+/// ParseGlobalValueVector
+///   ::= /*empty*/
+///   ::= TypeAndValue (',' TypeAndValue)*
+bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
+  // Empty list.
+  if (Lex.getKind() == lltok::rbrace ||
+      Lex.getKind() == lltok::rsquare ||
+      Lex.getKind() == lltok::greater ||
+      Lex.getKind() == lltok::rparen)
+    return false;
+
+  Constant *C;
+  if (ParseGlobalTypeAndValue(C)) return true;
+  Elts.push_back(C);
+
+  while (EatIfPresent(lltok::comma)) {
+    if (ParseGlobalTypeAndValue(C)) return true;
+    Elts.push_back(C);
+  }
+
+  return false;
+}
+
+bool LLParser::ParseMetadataListValue(ValID &ID, PerFunctionState *PFS) {
+  assert(Lex.getKind() == lltok::lbrace);
+  Lex.Lex();
+
+  SmallVector<Value*, 16> Elts;
+  if (ParseMDNodeVector(Elts, PFS) ||
+      ParseToken(lltok::rbrace, "expected end of metadata node"))
+    return true;
+
+  ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size());
+  ID.Kind = ValID::t_MDNode;
+  return false;
+}
+
+/// ParseMetadataValue
+///  ::= !42
+///  ::= !{...}
+///  ::= !"string"
+bool LLParser::ParseMetadataValue(ValID &ID, PerFunctionState *PFS) {
+  assert(Lex.getKind() == lltok::exclaim);
+  Lex.Lex();
+
+  // MDNode:
+  // !{ ... }
+  if (Lex.getKind() == lltok::lbrace)
+    return ParseMetadataListValue(ID, PFS);
+
+  // Standalone metadata reference
+  // !42
+  if (Lex.getKind() == lltok::APSInt) {
+    if (ParseMDNodeID(ID.MDNodeVal)) return true;
+    ID.Kind = ValID::t_MDNode;
+    return false;
+  }
+
+  // MDString:
+  //   ::= '!' STRINGCONSTANT
+  if (ParseMDString(ID.MDStringVal)) return true;
+  ID.Kind = ValID::t_MDString;
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Function Parsing.
+//===----------------------------------------------------------------------===//
+
+bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
+                                   PerFunctionState *PFS) {
+  if (Ty->isFunctionTy())
+    return Error(ID.Loc, "functions are not values, refer to them as pointers");
+
+  switch (ID.Kind) {
+  default: llvm_unreachable("Unknown ValID!");
+  case ValID::t_LocalID:
+    if (!PFS) return Error(ID.Loc, "invalid use of function-local name");
+    V = PFS->GetVal(ID.UIntVal, Ty, ID.Loc);
+    return (V == 0);
+  case ValID::t_LocalName:
+    if (!PFS) return Error(ID.Loc, "invalid use of function-local name");
+    V = PFS->GetVal(ID.StrVal, Ty, ID.Loc);
+    return (V == 0);
+  case ValID::t_InlineAsm: {
+    const PointerType *PTy = dyn_cast<PointerType>(Ty);
+    const FunctionType *FTy = 
+      PTy ? dyn_cast<FunctionType>(PTy->getElementType()) : 0;
+    if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2))
+      return Error(ID.Loc, "invalid type for inline asm constraint string");
+    V = InlineAsm::get(FTy, ID.StrVal, ID.StrVal2, ID.UIntVal&1, ID.UIntVal>>1);
+    return false;
+  }
+  case ValID::t_MDNode:
+    if (!Ty->isMetadataTy())
+      return Error(ID.Loc, "metadata value must have metadata type");
+    V = ID.MDNodeVal;
+    return false;
+  case ValID::t_MDString:
+    if (!Ty->isMetadataTy())
+      return Error(ID.Loc, "metadata value must have metadata type");
+    V = ID.MDStringVal;
+    return false;
+  case ValID::t_GlobalName:
+    V = GetGlobalVal(ID.StrVal, Ty, ID.Loc);
+    return V == 0;
+  case ValID::t_GlobalID:
+    V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc);
+    return V == 0;
+  case ValID::t_APSInt:
+    if (!Ty->isIntegerTy())
+      return Error(ID.Loc, "integer constant must have integer type");
+    ID.APSIntVal = ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits());
+    V = ConstantInt::get(Context, ID.APSIntVal);
+    return false;
+  case ValID::t_APFloat:
+    if (!Ty->isFloatingPointTy() ||
+        !ConstantFP::isValueValidForType(Ty, ID.APFloatVal))
+      return Error(ID.Loc, "floating point constant invalid for type");
+
+    // The lexer has no type info, so builds all float and double FP constants
+    // as double.  Fix this here.  Long double does not need this.
+    if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble &&
+        Ty->isFloatTy()) {
+      bool Ignored;
+      ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
+                            &Ignored);
+    }
+    V = ConstantFP::get(Context, ID.APFloatVal);
+
+    if (V->getType() != Ty)
+      return Error(ID.Loc, "floating point constant does not have type '" +
+                   Ty->getDescription() + "'");
+
+    return false;
+  case ValID::t_Null:
+    if (!Ty->isPointerTy())
+      return Error(ID.Loc, "null must be a pointer type");
+    V = ConstantPointerNull::get(cast<PointerType>(Ty));
+    return false;
+  case ValID::t_Undef:
+    // FIXME: LabelTy should not be a first-class type.
+    if ((!Ty->isFirstClassType() || Ty->isLabelTy()) &&
+        !Ty->isOpaqueTy())
+      return Error(ID.Loc, "invalid type for undef constant");
+    V = UndefValue::get(Ty);
+    return false;
+  case ValID::t_EmptyArray:
+    if (!Ty->isArrayTy() || cast<ArrayType>(Ty)->getNumElements() != 0)
+      return Error(ID.Loc, "invalid empty array initializer");
+    V = UndefValue::get(Ty);
+    return false;
+  case ValID::t_Zero:
+    // FIXME: LabelTy should not be a first-class type.
+    if (!Ty->isFirstClassType() || Ty->isLabelTy())
+      return Error(ID.Loc, "invalid type for null constant");
+    V = Constant::getNullValue(Ty);
+    return false;
+  case ValID::t_Constant:
+    if (ID.ConstantVal->getType() != Ty)
+      return Error(ID.Loc, "constant expression type mismatch");
+
+    V = ID.ConstantVal;
+    return false;
+  }
+}
+
+bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) {
+  V = 0;
+  ValID ID;
+  return ParseValID(ID, &PFS) ||
+         ConvertValIDToValue(Ty, ID, V, &PFS);
+}
+
+bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
+  PATypeHolder T(Type::getVoidTy(Context));
+  return ParseType(T) ||
+         ParseValue(T, V, PFS);
+}
+
+bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
+                                      PerFunctionState &PFS) {
+  Value *V;
+  Loc = Lex.getLoc();
+  if (ParseTypeAndValue(V, PFS)) return true;
+  if (!isa<BasicBlock>(V))
+    return Error(Loc, "expected a basic block");
+  BB = cast<BasicBlock>(V);
+  return false;
+}
+
+
+/// FunctionHeader
+///   ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
+///       OptUnnamedAddr Type GlobalName '(' ArgList ')' OptFuncAttrs OptSection
+///       OptionalAlign OptGC
+bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
+  // Parse the linkage.
+  LocTy LinkageLoc = Lex.getLoc();
+  unsigned Linkage;
+
+  unsigned Visibility, RetAttrs;
+  CallingConv::ID CC;
+  PATypeHolder RetType(Type::getVoidTy(Context));
+  LocTy RetTypeLoc = Lex.getLoc();
+  if (ParseOptionalLinkage(Linkage) ||
+      ParseOptionalVisibility(Visibility) ||
+      ParseOptionalCallingConv(CC) ||
+      ParseOptionalAttrs(RetAttrs, 1) ||
+      ParseType(RetType, RetTypeLoc, true /*void allowed*/))
+    return true;
+
+  // Verify that the linkage is ok.
+  switch ((GlobalValue::LinkageTypes)Linkage) {
+  case GlobalValue::ExternalLinkage:
+    break; // always ok.
+  case GlobalValue::DLLImportLinkage:
+  case GlobalValue::ExternalWeakLinkage:
+    if (isDefine)
+      return Error(LinkageLoc, "invalid linkage for function definition");
+    break;
+  case GlobalValue::PrivateLinkage:
+  case GlobalValue::LinkerPrivateLinkage:
+  case GlobalValue::LinkerPrivateWeakLinkage:
+  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+  case GlobalValue::InternalLinkage:
+  case GlobalValue::AvailableExternallyLinkage:
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+  case GlobalValue::DLLExportLinkage:
+    if (!isDefine)
+      return Error(LinkageLoc, "invalid linkage for function declaration");
+    break;
+  case GlobalValue::AppendingLinkage:
+  case GlobalValue::CommonLinkage:
+    return Error(LinkageLoc, "invalid function linkage type");
+  }
+
+  if (!FunctionType::isValidReturnType(RetType) ||
+      RetType->isOpaqueTy())
+    return Error(RetTypeLoc, "invalid function return type");
+
+  LocTy NameLoc = Lex.getLoc();
+
+  std::string FunctionName;
+  if (Lex.getKind() == lltok::GlobalVar) {
+    FunctionName = Lex.getStrVal();
+  } else if (Lex.getKind() == lltok::GlobalID) {     // @42 is ok.
+    unsigned NameID = Lex.getUIntVal();
+
+    if (NameID != NumberedVals.size())
+      return TokError("function expected to be numbered '%" +
+                      Twine(NumberedVals.size()) + "'");
+  } else {
+    return TokError("expected function name");
+  }
+
+  Lex.Lex();
+
+  if (Lex.getKind() != lltok::lparen)
+    return TokError("expected '(' in function argument list");
+
+  std::vector<ArgInfo> ArgList;
+  bool isVarArg;
+  unsigned FuncAttrs;
+  std::string Section;
+  unsigned Alignment;
+  std::string GC;
+  bool UnnamedAddr;
+  LocTy UnnamedAddrLoc;
+
+  if (ParseArgumentList(ArgList, isVarArg, false) ||
+      ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
+                         &UnnamedAddrLoc) ||
+      ParseOptionalAttrs(FuncAttrs, 2) ||
+      (EatIfPresent(lltok::kw_section) &&
+       ParseStringConstant(Section)) ||
+      ParseOptionalAlignment(Alignment) ||
+      (EatIfPresent(lltok::kw_gc) &&
+       ParseStringConstant(GC)))
+    return true;
+
+  // If the alignment was parsed as an attribute, move to the alignment field.
+  if (FuncAttrs & Attribute::Alignment) {
+    Alignment = Attribute::getAlignmentFromAttrs(FuncAttrs);
+    FuncAttrs &= ~Attribute::Alignment;
+  }
+
+  // Okay, if we got here, the function is syntactically valid.  Convert types
+  // and do semantic checks.
+  std::vector<const Type*> ParamTypeList;
+  SmallVector<AttributeWithIndex, 8> Attrs;
+  // FIXME : In 3.0, stop accepting zext, sext and inreg as optional function
+  // attributes.
+  unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg;
+  if (FuncAttrs & ObsoleteFuncAttrs) {
+    RetAttrs |= FuncAttrs & ObsoleteFuncAttrs;
+    FuncAttrs &= ~ObsoleteFuncAttrs;
+  }
+
+  if (RetAttrs != Attribute::None)
+    Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
+
+  for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+    ParamTypeList.push_back(ArgList[i].Type);
+    if (ArgList[i].Attrs != Attribute::None)
+      Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
+  }
+
+  if (FuncAttrs != Attribute::None)
+    Attrs.push_back(AttributeWithIndex::get(~0, FuncAttrs));
+
+  AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
+
+  if (PAL.paramHasAttr(1, Attribute::StructRet) && !RetType->isVoidTy())
+    return Error(RetTypeLoc, "functions with 'sret' argument must return void");
+
+  const FunctionType *FT =
+    FunctionType::get(RetType, ParamTypeList, isVarArg);
+  const PointerType *PFT = PointerType::getUnqual(FT);
+
+  Fn = 0;
+  if (!FunctionName.empty()) {
+    // If this was a definition of a forward reference, remove the definition
+    // from the forward reference table and fill in the forward ref.
+    std::map<std::string, std::pair<GlobalValue*, LocTy> >::iterator FRVI =
+      ForwardRefVals.find(FunctionName);
+    if (FRVI != ForwardRefVals.end()) {
+      Fn = M->getFunction(FunctionName);
+      if (Fn->getType() != PFT)
+        return Error(FRVI->second.second, "invalid forward reference to "
+                     "function '" + FunctionName + "' with wrong type!");
+      
+      ForwardRefVals.erase(FRVI);
+    } else if ((Fn = M->getFunction(FunctionName))) {
+      // If this function already exists in the symbol table, then it is
+      // multiply defined.  We accept a few cases for old backwards compat.
+      // FIXME: Remove this stuff for LLVM 3.0.
+      if (Fn->getType() != PFT || Fn->getAttributes() != PAL ||
+          (!Fn->isDeclaration() && isDefine)) {
+        // If the redefinition has different type or different attributes,
+        // reject it.  If both have bodies, reject it.
+        return Error(NameLoc, "invalid redefinition of function '" +
+                     FunctionName + "'");
+      } else if (Fn->isDeclaration()) {
+        // Make sure to strip off any argument names so we can't get conflicts.
+        for (Function::arg_iterator AI = Fn->arg_begin(), AE = Fn->arg_end();
+             AI != AE; ++AI)
+          AI->setName("");
+      }
+    } else if (M->getNamedValue(FunctionName)) {
+      return Error(NameLoc, "redefinition of function '@" + FunctionName + "'");
+    }
+
+  } else {
+    // If this is a definition of a forward referenced function, make sure the
+    // types agree.
+    std::map<unsigned, std::pair<GlobalValue*, LocTy> >::iterator I
+      = ForwardRefValIDs.find(NumberedVals.size());
+    if (I != ForwardRefValIDs.end()) {
+      Fn = cast<Function>(I->second.first);
+      if (Fn->getType() != PFT)
+        return Error(NameLoc, "type of definition and forward reference of '@" +
+                     Twine(NumberedVals.size()) + "' disagree");
+      ForwardRefValIDs.erase(I);
+    }
+  }
+
+  if (Fn == 0)
+    Fn = Function::Create(FT, GlobalValue::ExternalLinkage, FunctionName, M);
+  else // Move the forward-reference to the correct spot in the module.
+    M->getFunctionList().splice(M->end(), M->getFunctionList(), Fn);
+
+  if (FunctionName.empty())
+    NumberedVals.push_back(Fn);
+
+  Fn->setLinkage((GlobalValue::LinkageTypes)Linkage);
+  Fn->setVisibility((GlobalValue::VisibilityTypes)Visibility);
+  Fn->setCallingConv(CC);
+  Fn->setAttributes(PAL);
+  Fn->setUnnamedAddr(UnnamedAddr);
+  Fn->setAlignment(Alignment);
+  Fn->setSection(Section);
+  if (!GC.empty()) Fn->setGC(GC.c_str());
+
+  // Add all of the arguments we parsed to the function.
+  Function::arg_iterator ArgIt = Fn->arg_begin();
+  for (unsigned i = 0, e = ArgList.size(); i != e; ++i, ++ArgIt) {
+    // If we run out of arguments in the Function prototype, exit early.
+    // FIXME: REMOVE THIS IN LLVM 3.0, this is just for the mismatch case above.
+    if (ArgIt == Fn->arg_end()) break;
+    
+    // If the argument has a name, insert it into the argument symbol table.
+    if (ArgList[i].Name.empty()) continue;
+
+    // Set the name, if it conflicted, it will be auto-renamed.
+    ArgIt->setName(ArgList[i].Name);
+
+    if (ArgIt->getName() != ArgList[i].Name)
+      return Error(ArgList[i].Loc, "redefinition of argument '%" +
+                   ArgList[i].Name + "'");
+  }
+
+  return false;
+}
+
+
+/// ParseFunctionBody
+///   ::= '{' BasicBlock+ '}'
+///   ::= 'begin' BasicBlock+ 'end'  // FIXME: remove in LLVM 3.0
+///
+bool LLParser::ParseFunctionBody(Function &Fn) {
+  if (Lex.getKind() != lltok::lbrace && Lex.getKind() != lltok::kw_begin)
+    return TokError("expected '{' in function body");
+  Lex.Lex();  // eat the {.
+
+  int FunctionNumber = -1;
+  if (!Fn.hasName()) FunctionNumber = NumberedVals.size()-1;
+  
+  PerFunctionState PFS(*this, Fn, FunctionNumber);
+
+  // We need at least one basic block.
+  if (Lex.getKind() == lltok::rbrace || Lex.getKind() == lltok::kw_end)
+    return TokError("function body requires at least one basic block");
+  
+  while (Lex.getKind() != lltok::rbrace && Lex.getKind() != lltok::kw_end)
+    if (ParseBasicBlock(PFS)) return true;
+
+  // Eat the }.
+  Lex.Lex();
+
+  // Verify function is ok.
+  return PFS.FinishFunction();
+}
+
+/// ParseBasicBlock
+///   ::= LabelStr? Instruction*
+bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
+  // If this basic block starts out with a name, remember it.
+  std::string Name;
+  LocTy NameLoc = Lex.getLoc();
+  if (Lex.getKind() == lltok::LabelStr) {
+    Name = Lex.getStrVal();
+    Lex.Lex();
+  }
+
+  BasicBlock *BB = PFS.DefineBB(Name, NameLoc);
+  if (BB == 0) return true;
+
+  std::string NameStr;
+
+  // Parse the instructions in this block until we get a terminator.
+  Instruction *Inst;
+  SmallVector<std::pair<unsigned, MDNode *>, 4> MetadataOnInst;
+  do {
+    // This instruction may have three possibilities for a name: a) none
+    // specified, b) name specified "%foo =", c) number specified: "%4 =".
+    LocTy NameLoc = Lex.getLoc();
+    int NameID = -1;
+    NameStr = "";
+
+    if (Lex.getKind() == lltok::LocalVarID) {
+      NameID = Lex.getUIntVal();
+      Lex.Lex();
+      if (ParseToken(lltok::equal, "expected '=' after instruction id"))
+        return true;
+    } else if (Lex.getKind() == lltok::LocalVar ||
+               // FIXME: REMOVE IN LLVM 3.0
+               Lex.getKind() == lltok::StringConstant) {
+      NameStr = Lex.getStrVal();
+      Lex.Lex();
+      if (ParseToken(lltok::equal, "expected '=' after instruction name"))
+        return true;
+    }
+
+    switch (ParseInstruction(Inst, BB, PFS)) {
+    default: assert(0 && "Unknown ParseInstruction result!");
+    case InstError: return true;
+    case InstNormal:
+      BB->getInstList().push_back(Inst);
+
+      // With a normal result, we check to see if the instruction is followed by
+      // a comma and metadata.
+      if (EatIfPresent(lltok::comma))
+        if (ParseInstructionMetadata(Inst, &PFS))
+          return true;
+      break;
+    case InstExtraComma:
+      BB->getInstList().push_back(Inst);
+
+      // If the instruction parser ate an extra comma at the end of it, it
+      // *must* be followed by metadata.
+      if (ParseInstructionMetadata(Inst, &PFS))
+        return true;
+      break;        
+    }
+
+    // Set the name on the instruction.
+    if (PFS.SetInstName(NameID, NameStr, NameLoc, Inst)) return true;
+  } while (!isa<TerminatorInst>(Inst));
+
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Parsing.
+//===----------------------------------------------------------------------===//
+
+/// ParseInstruction - Parse one of the many different instructions.
+///
+int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
+                               PerFunctionState &PFS) {
+  lltok::Kind Token = Lex.getKind();
+  if (Token == lltok::Eof)
+    return TokError("found end of file when expecting more instructions");
+  LocTy Loc = Lex.getLoc();
+  unsigned KeywordVal = Lex.getUIntVal();
+  Lex.Lex();  // Eat the keyword.
+
+  switch (Token) {
+  default:                    return Error(Loc, "expected instruction opcode");
+  // Terminator Instructions.
+  case lltok::kw_unwind:      Inst = new UnwindInst(Context); return false;
+  case lltok::kw_unreachable: Inst = new UnreachableInst(Context); return false;
+  case lltok::kw_ret:         return ParseRet(Inst, BB, PFS);
+  case lltok::kw_br:          return ParseBr(Inst, PFS);
+  case lltok::kw_switch:      return ParseSwitch(Inst, PFS);
+  case lltok::kw_indirectbr:  return ParseIndirectBr(Inst, PFS);
+  case lltok::kw_invoke:      return ParseInvoke(Inst, PFS);
+  // Binary Operators.
+  case lltok::kw_add:
+  case lltok::kw_sub:
+  case lltok::kw_mul:
+  case lltok::kw_shl: {
+    LocTy ModifierLoc = Lex.getLoc();
+    bool NUW = EatIfPresent(lltok::kw_nuw);
+    bool NSW = EatIfPresent(lltok::kw_nsw);
+    if (!NUW) NUW = EatIfPresent(lltok::kw_nuw);
+    
+    if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+    
+    if (NUW) cast<BinaryOperator>(Inst)->setHasNoUnsignedWrap(true);
+    if (NSW) cast<BinaryOperator>(Inst)->setHasNoSignedWrap(true);
+    return false;
+  }
+  case lltok::kw_fadd:
+  case lltok::kw_fsub:
+  case lltok::kw_fmul:    return ParseArithmetic(Inst, PFS, KeywordVal, 2);
+
+  case lltok::kw_sdiv:
+  case lltok::kw_udiv:
+  case lltok::kw_lshr:
+  case lltok::kw_ashr: {
+    bool Exact = EatIfPresent(lltok::kw_exact);
+
+    if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true;
+    if (Exact) cast<BinaryOperator>(Inst)->setIsExact(true);
+    return false;
+  }
+
+  case lltok::kw_urem:
+  case lltok::kw_srem:   return ParseArithmetic(Inst, PFS, KeywordVal, 1);
+  case lltok::kw_fdiv:
+  case lltok::kw_frem:   return ParseArithmetic(Inst, PFS, KeywordVal, 2);
+  case lltok::kw_and:
+  case lltok::kw_or:
+  case lltok::kw_xor:    return ParseLogical(Inst, PFS, KeywordVal);
+  case lltok::kw_icmp:
+  case lltok::kw_fcmp:   return ParseCompare(Inst, PFS, KeywordVal);
+  // Casts.
+  case lltok::kw_trunc:
+  case lltok::kw_zext:
+  case lltok::kw_sext:
+  case lltok::kw_fptrunc:
+  case lltok::kw_fpext:
+  case lltok::kw_bitcast:
+  case lltok::kw_uitofp:
+  case lltok::kw_sitofp:
+  case lltok::kw_fptoui:
+  case lltok::kw_fptosi:
+  case lltok::kw_inttoptr:
+  case lltok::kw_ptrtoint:       return ParseCast(Inst, PFS, KeywordVal);
+  // Other.
+  case lltok::kw_select:         return ParseSelect(Inst, PFS);
+  case lltok::kw_va_arg:         return ParseVA_Arg(Inst, PFS);
+  case lltok::kw_extractelement: return ParseExtractElement(Inst, PFS);
+  case lltok::kw_insertelement:  return ParseInsertElement(Inst, PFS);
+  case lltok::kw_shufflevector:  return ParseShuffleVector(Inst, PFS);
+  case lltok::kw_phi:            return ParsePHI(Inst, PFS);
+  case lltok::kw_call:           return ParseCall(Inst, PFS, false);
+  case lltok::kw_tail:           return ParseCall(Inst, PFS, true);
+  // Memory.
+  case lltok::kw_alloca:         return ParseAlloc(Inst, PFS);
+  case lltok::kw_malloc:         return ParseAlloc(Inst, PFS, BB, false);
+  case lltok::kw_free:           return ParseFree(Inst, PFS, BB);
+  case lltok::kw_load:           return ParseLoad(Inst, PFS, false);
+  case lltok::kw_store:          return ParseStore(Inst, PFS, false);
+  case lltok::kw_volatile:
+    if (EatIfPresent(lltok::kw_load))
+      return ParseLoad(Inst, PFS, true);
+    else if (EatIfPresent(lltok::kw_store))
+      return ParseStore(Inst, PFS, true);
+    else
+      return TokError("expected 'load' or 'store'");
+  case lltok::kw_getresult:     return ParseGetResult(Inst, PFS);
+  case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS);
+  case lltok::kw_extractvalue:  return ParseExtractValue(Inst, PFS);
+  case lltok::kw_insertvalue:   return ParseInsertValue(Inst, PFS);
+  }
+}
+
+/// ParseCmpPredicate - Parse an integer or fp predicate, based on Kind.
+bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) {
+  if (Opc == Instruction::FCmp) {
+    switch (Lex.getKind()) {
+    default: TokError("expected fcmp predicate (e.g. 'oeq')");
+    case lltok::kw_oeq: P = CmpInst::FCMP_OEQ; break;
+    case lltok::kw_one: P = CmpInst::FCMP_ONE; break;
+    case lltok::kw_olt: P = CmpInst::FCMP_OLT; break;
+    case lltok::kw_ogt: P = CmpInst::FCMP_OGT; break;
+    case lltok::kw_ole: P = CmpInst::FCMP_OLE; break;
+    case lltok::kw_oge: P = CmpInst::FCMP_OGE; break;
+    case lltok::kw_ord: P = CmpInst::FCMP_ORD; break;
+    case lltok::kw_uno: P = CmpInst::FCMP_UNO; break;
+    case lltok::kw_ueq: P = CmpInst::FCMP_UEQ; break;
+    case lltok::kw_une: P = CmpInst::FCMP_UNE; break;
+    case lltok::kw_ult: P = CmpInst::FCMP_ULT; break;
+    case lltok::kw_ugt: P = CmpInst::FCMP_UGT; break;
+    case lltok::kw_ule: P = CmpInst::FCMP_ULE; break;
+    case lltok::kw_uge: P = CmpInst::FCMP_UGE; break;
+    case lltok::kw_true: P = CmpInst::FCMP_TRUE; break;
+    case lltok::kw_false: P = CmpInst::FCMP_FALSE; break;
+    }
+  } else {
+    switch (Lex.getKind()) {
+    default: TokError("expected icmp predicate (e.g. 'eq')");
+    case lltok::kw_eq:  P = CmpInst::ICMP_EQ; break;
+    case lltok::kw_ne:  P = CmpInst::ICMP_NE; break;
+    case lltok::kw_slt: P = CmpInst::ICMP_SLT; break;
+    case lltok::kw_sgt: P = CmpInst::ICMP_SGT; break;
+    case lltok::kw_sle: P = CmpInst::ICMP_SLE; break;
+    case lltok::kw_sge: P = CmpInst::ICMP_SGE; break;
+    case lltok::kw_ult: P = CmpInst::ICMP_ULT; break;
+    case lltok::kw_ugt: P = CmpInst::ICMP_UGT; break;
+    case lltok::kw_ule: P = CmpInst::ICMP_ULE; break;
+    case lltok::kw_uge: P = CmpInst::ICMP_UGE; break;
+    }
+  }
+  Lex.Lex();
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Terminator Instructions.
+//===----------------------------------------------------------------------===//
+
+/// ParseRet - Parse a return instruction.
+///   ::= 'ret' void (',' !dbg, !1)*
+///   ::= 'ret' TypeAndValue (',' !dbg, !1)*
+///   ::= 'ret' TypeAndValue (',' TypeAndValue)+  (',' !dbg, !1)*
+///         [[obsolete: LLVM 3.0]]
+int LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
+                       PerFunctionState &PFS) {
+  PATypeHolder Ty(Type::getVoidTy(Context));
+  if (ParseType(Ty, true /*void allowed*/)) return true;
+
+  if (Ty->isVoidTy()) {
+    Inst = ReturnInst::Create(Context);
+    return false;
+  }
+
+  Value *RV;
+  if (ParseValue(Ty, RV, PFS)) return true;
+
+  bool ExtraComma = false;
+  if (EatIfPresent(lltok::comma)) {
+    // Parse optional custom metadata, e.g. !dbg
+    if (Lex.getKind() == lltok::MetadataVar) {
+      ExtraComma = true;
+    } else {
+      // The normal case is one return value.
+      // FIXME: LLVM 3.0 remove MRV support for 'ret i32 1, i32 2', requiring
+      // use of 'ret {i32,i32} {i32 1, i32 2}'
+      SmallVector<Value*, 8> RVs;
+      RVs.push_back(RV);
+
+      do {
+        // If optional custom metadata, e.g. !dbg is seen then this is the 
+        // end of MRV.
+        if (Lex.getKind() == lltok::MetadataVar)
+          break;
+        if (ParseTypeAndValue(RV, PFS)) return true;
+        RVs.push_back(RV);
+      } while (EatIfPresent(lltok::comma));
+
+      RV = UndefValue::get(PFS.getFunction().getReturnType());
+      for (unsigned i = 0, e = RVs.size(); i != e; ++i) {
+        Instruction *I = InsertValueInst::Create(RV, RVs[i], i, "mrv");
+        BB->getInstList().push_back(I);
+        RV = I;
+      }
+    }
+  }
+
+  Inst = ReturnInst::Create(Context, RV);
+  return ExtraComma ? InstExtraComma : InstNormal;
+}
+
+
+/// ParseBr
+///   ::= 'br' TypeAndValue
+///   ::= 'br' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseBr(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy Loc, Loc2;
+  Value *Op0;
+  BasicBlock *Op1, *Op2;
+  if (ParseTypeAndValue(Op0, Loc, PFS)) return true;
+
+  if (BasicBlock *BB = dyn_cast<BasicBlock>(Op0)) {
+    Inst = BranchInst::Create(BB);
+    return false;
+  }
+
+  if (Op0->getType() != Type::getInt1Ty(Context))
+    return Error(Loc, "branch condition must have 'i1' type");
+
+  if (ParseToken(lltok::comma, "expected ',' after branch condition") ||
+      ParseTypeAndBasicBlock(Op1, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after true destination") ||
+      ParseTypeAndBasicBlock(Op2, Loc2, PFS))
+    return true;
+
+  Inst = BranchInst::Create(Op1, Op2, Op0);
+  return false;
+}
+
+/// ParseSwitch
+///  Instruction
+///    ::= 'switch' TypeAndValue ',' TypeAndValue '[' JumpTable ']'
+///  JumpTable
+///    ::= (TypeAndValue ',' TypeAndValue)*
+bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy CondLoc, BBLoc;
+  Value *Cond;
+  BasicBlock *DefaultBB;
+  if (ParseTypeAndValue(Cond, CondLoc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after switch condition") ||
+      ParseTypeAndBasicBlock(DefaultBB, BBLoc, PFS) ||
+      ParseToken(lltok::lsquare, "expected '[' with switch table"))
+    return true;
+
+  if (!Cond->getType()->isIntegerTy())
+    return Error(CondLoc, "switch condition must have integer type");
+
+  // Parse the jump table pairs.
+  SmallPtrSet<Value*, 32> SeenCases;
+  SmallVector<std::pair<ConstantInt*, BasicBlock*>, 32> Table;
+  while (Lex.getKind() != lltok::rsquare) {
+    Value *Constant;
+    BasicBlock *DestBB;
+
+    if (ParseTypeAndValue(Constant, CondLoc, PFS) ||
+        ParseToken(lltok::comma, "expected ',' after case value") ||
+        ParseTypeAndBasicBlock(DestBB, PFS))
+      return true;
+    
+    if (!SeenCases.insert(Constant))
+      return Error(CondLoc, "duplicate case value in switch");
+    if (!isa<ConstantInt>(Constant))
+      return Error(CondLoc, "case value is not a constant integer");
+
+    Table.push_back(std::make_pair(cast<ConstantInt>(Constant), DestBB));
+  }
+
+  Lex.Lex();  // Eat the ']'.
+
+  SwitchInst *SI = SwitchInst::Create(Cond, DefaultBB, Table.size());
+  for (unsigned i = 0, e = Table.size(); i != e; ++i)
+    SI->addCase(Table[i].first, Table[i].second);
+  Inst = SI;
+  return false;
+}
+
+/// ParseIndirectBr
+///  Instruction
+///    ::= 'indirectbr' TypeAndValue ',' '[' LabelList ']'
+bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy AddrLoc;
+  Value *Address;
+  if (ParseTypeAndValue(Address, AddrLoc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after indirectbr address") ||
+      ParseToken(lltok::lsquare, "expected '[' with indirectbr"))
+    return true;
+  
+  if (!Address->getType()->isPointerTy())
+    return Error(AddrLoc, "indirectbr address must have pointer type");
+  
+  // Parse the destination list.
+  SmallVector<BasicBlock*, 16> DestList;
+  
+  if (Lex.getKind() != lltok::rsquare) {
+    BasicBlock *DestBB;
+    if (ParseTypeAndBasicBlock(DestBB, PFS))
+      return true;
+    DestList.push_back(DestBB);
+    
+    while (EatIfPresent(lltok::comma)) {
+      if (ParseTypeAndBasicBlock(DestBB, PFS))
+        return true;
+      DestList.push_back(DestBB);
+    }
+  }
+  
+  if (ParseToken(lltok::rsquare, "expected ']' at end of block list"))
+    return true;
+
+  IndirectBrInst *IBI = IndirectBrInst::Create(Address, DestList.size());
+  for (unsigned i = 0, e = DestList.size(); i != e; ++i)
+    IBI->addDestination(DestList[i]);
+  Inst = IBI;
+  return false;
+}
+
+
+/// ParseInvoke
+///   ::= 'invoke' OptionalCallingConv OptionalAttrs Type Value ParamList
+///       OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue
+bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy CallLoc = Lex.getLoc();
+  unsigned RetAttrs, FnAttrs;
+  CallingConv::ID CC;
+  PATypeHolder RetType(Type::getVoidTy(Context));
+  LocTy RetTypeLoc;
+  ValID CalleeID;
+  SmallVector<ParamInfo, 16> ArgList;
+
+  BasicBlock *NormalBB, *UnwindBB;
+  if (ParseOptionalCallingConv(CC) ||
+      ParseOptionalAttrs(RetAttrs, 1) ||
+      ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
+      ParseValID(CalleeID) ||
+      ParseParameterList(ArgList, PFS) ||
+      ParseOptionalAttrs(FnAttrs, 2) ||
+      ParseToken(lltok::kw_to, "expected 'to' in invoke") ||
+      ParseTypeAndBasicBlock(NormalBB, PFS) ||
+      ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") ||
+      ParseTypeAndBasicBlock(UnwindBB, PFS))
+    return true;
+
+  // If RetType is a non-function pointer type, then this is the short syntax
+  // for the call, which means that RetType is just the return type.  Infer the
+  // rest of the function argument types from the arguments that are present.
+  const PointerType *PFTy = 0;
+  const FunctionType *Ty = 0;
+  if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
+      !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
+    // Pull out the types of all of the arguments...
+    std::vector<const Type*> ParamTypes;
+    for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+      ParamTypes.push_back(ArgList[i].V->getType());
+
+    if (!FunctionType::isValidReturnType(RetType))
+      return Error(RetTypeLoc, "Invalid result type for LLVM function");
+
+    Ty = FunctionType::get(RetType, ParamTypes, false);
+    PFTy = PointerType::getUnqual(Ty);
+  }
+
+  // Look up the callee.
+  Value *Callee;
+  if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
+
+  // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional
+  // function attributes.
+  unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg;
+  if (FnAttrs & ObsoleteFuncAttrs) {
+    RetAttrs |= FnAttrs & ObsoleteFuncAttrs;
+    FnAttrs &= ~ObsoleteFuncAttrs;
+  }
+
+  // Set up the Attributes for the function.
+  SmallVector<AttributeWithIndex, 8> Attrs;
+  if (RetAttrs != Attribute::None)
+    Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
+
+  SmallVector<Value*, 8> Args;
+
+  // Loop through FunctionType's arguments and ensure they are specified
+  // correctly.  Also, gather any parameter attributes.
+  FunctionType::param_iterator I = Ty->param_begin();
+  FunctionType::param_iterator E = Ty->param_end();
+  for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+    const Type *ExpectedTy = 0;
+    if (I != E) {
+      ExpectedTy = *I++;
+    } else if (!Ty->isVarArg()) {
+      return Error(ArgList[i].Loc, "too many arguments specified");
+    }
+
+    if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
+      return Error(ArgList[i].Loc, "argument is not of expected type '" +
+                   ExpectedTy->getDescription() + "'");
+    Args.push_back(ArgList[i].V);
+    if (ArgList[i].Attrs != Attribute::None)
+      Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
+  }
+
+  if (I != E)
+    return Error(CallLoc, "not enough parameters specified for call");
+
+  if (FnAttrs != Attribute::None)
+    Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+  // Finish off the Attributes and check them
+  AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
+
+  InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB,
+                                      Args.begin(), Args.end());
+  II->setCallingConv(CC);
+  II->setAttributes(PAL);
+  Inst = II;
+  return false;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Binary Operators.
+//===----------------------------------------------------------------------===//
+
+/// ParseArithmetic
+///  ::= ArithmeticOps TypeAndValue ',' Value
+///
+/// If OperandType is 0, then any FP or integer operand is allowed.  If it is 1,
+/// then any integer operand is allowed, if it is 2, any fp operand is allowed.
+bool LLParser::ParseArithmetic(Instruction *&Inst, PerFunctionState &PFS,
+                               unsigned Opc, unsigned OperandType) {
+  LocTy Loc; Value *LHS, *RHS;
+  if (ParseTypeAndValue(LHS, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' in arithmetic operation") ||
+      ParseValue(LHS->getType(), RHS, PFS))
+    return true;
+
+  bool Valid;
+  switch (OperandType) {
+  default: llvm_unreachable("Unknown operand type!");
+  case 0: // int or FP.
+    Valid = LHS->getType()->isIntOrIntVectorTy() ||
+            LHS->getType()->isFPOrFPVectorTy();
+    break;
+  case 1: Valid = LHS->getType()->isIntOrIntVectorTy(); break;
+  case 2: Valid = LHS->getType()->isFPOrFPVectorTy(); break;
+  }
+
+  if (!Valid)
+    return Error(Loc, "invalid operand type for instruction");
+
+  Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+  return false;
+}
+
+/// ParseLogical
+///  ::= ArithmeticOps TypeAndValue ',' Value {
+bool LLParser::ParseLogical(Instruction *&Inst, PerFunctionState &PFS,
+                            unsigned Opc) {
+  LocTy Loc; Value *LHS, *RHS;
+  if (ParseTypeAndValue(LHS, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' in logical operation") ||
+      ParseValue(LHS->getType(), RHS, PFS))
+    return true;
+
+  if (!LHS->getType()->isIntOrIntVectorTy())
+    return Error(Loc,"instruction requires integer or integer vector operands");
+
+  Inst = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+  return false;
+}
+
+
+/// ParseCompare
+///  ::= 'icmp' IPredicates TypeAndValue ',' Value
+///  ::= 'fcmp' FPredicates TypeAndValue ',' Value
+bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
+                            unsigned Opc) {
+  // Parse the integer/fp comparison predicate.
+  LocTy Loc;
+  unsigned Pred;
+  Value *LHS, *RHS;
+  if (ParseCmpPredicate(Pred, Opc) ||
+      ParseTypeAndValue(LHS, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after compare value") ||
+      ParseValue(LHS->getType(), RHS, PFS))
+    return true;
+
+  if (Opc == Instruction::FCmp) {
+    if (!LHS->getType()->isFPOrFPVectorTy())
+      return Error(Loc, "fcmp requires floating point operands");
+    Inst = new FCmpInst(CmpInst::Predicate(Pred), LHS, RHS);
+  } else {
+    assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!");
+    if (!LHS->getType()->isIntOrIntVectorTy() &&
+        !LHS->getType()->isPointerTy())
+      return Error(Loc, "icmp requires integer operands");
+    Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
+  }
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Other Instructions.
+//===----------------------------------------------------------------------===//
+
+
+/// ParseCast
+///   ::= CastOpc TypeAndValue 'to' Type
+bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS,
+                         unsigned Opc) {
+  LocTy Loc;  Value *Op;
+  PATypeHolder DestTy(Type::getVoidTy(Context));
+  if (ParseTypeAndValue(Op, Loc, PFS) ||
+      ParseToken(lltok::kw_to, "expected 'to' after cast value") ||
+      ParseType(DestTy))
+    return true;
+
+  if (!CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy)) {
+    CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy);
+    return Error(Loc, "invalid cast opcode for cast from '" +
+                 Op->getType()->getDescription() + "' to '" +
+                 DestTy->getDescription() + "'");
+  }
+  Inst = CastInst::Create((Instruction::CastOps)Opc, Op, DestTy);
+  return false;
+}
+
+/// ParseSelect
+///   ::= 'select' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseSelect(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy Loc;
+  Value *Op0, *Op1, *Op2;
+  if (ParseTypeAndValue(Op0, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after select condition") ||
+      ParseTypeAndValue(Op1, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after select value") ||
+      ParseTypeAndValue(Op2, PFS))
+    return true;
+
+  if (const char *Reason = SelectInst::areInvalidOperands(Op0, Op1, Op2))
+    return Error(Loc, Reason);
+
+  Inst = SelectInst::Create(Op0, Op1, Op2);
+  return false;
+}
+
+/// ParseVA_Arg
+///   ::= 'va_arg' TypeAndValue ',' Type
+bool LLParser::ParseVA_Arg(Instruction *&Inst, PerFunctionState &PFS) {
+  Value *Op;
+  PATypeHolder EltTy(Type::getVoidTy(Context));
+  LocTy TypeLoc;
+  if (ParseTypeAndValue(Op, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after vaarg operand") ||
+      ParseType(EltTy, TypeLoc))
+    return true;
+
+  if (!EltTy->isFirstClassType())
+    return Error(TypeLoc, "va_arg requires operand with first class type");
+
+  Inst = new VAArgInst(Op, EltTy);
+  return false;
+}
+
+/// ParseExtractElement
+///   ::= 'extractelement' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseExtractElement(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy Loc;
+  Value *Op0, *Op1;
+  if (ParseTypeAndValue(Op0, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after extract value") ||
+      ParseTypeAndValue(Op1, PFS))
+    return true;
+
+  if (!ExtractElementInst::isValidOperands(Op0, Op1))
+    return Error(Loc, "invalid extractelement operands");
+
+  Inst = ExtractElementInst::Create(Op0, Op1);
+  return false;
+}
+
+/// ParseInsertElement
+///   ::= 'insertelement' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseInsertElement(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy Loc;
+  Value *Op0, *Op1, *Op2;
+  if (ParseTypeAndValue(Op0, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+      ParseTypeAndValue(Op1, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+      ParseTypeAndValue(Op2, PFS))
+    return true;
+
+  if (!InsertElementInst::isValidOperands(Op0, Op1, Op2))
+    return Error(Loc, "invalid insertelement operands");
+
+  Inst = InsertElementInst::Create(Op0, Op1, Op2);
+  return false;
+}
+
+/// ParseShuffleVector
+///   ::= 'shufflevector' TypeAndValue ',' TypeAndValue ',' TypeAndValue
+bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) {
+  LocTy Loc;
+  Value *Op0, *Op1, *Op2;
+  if (ParseTypeAndValue(Op0, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after shuffle mask") ||
+      ParseTypeAndValue(Op1, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after shuffle value") ||
+      ParseTypeAndValue(Op2, PFS))
+    return true;
+
+  if (!ShuffleVectorInst::isValidOperands(Op0, Op1, Op2))
+    return Error(Loc, "invalid extractelement operands");
+
+  Inst = new ShuffleVectorInst(Op0, Op1, Op2);
+  return false;
+}
+
+/// ParsePHI
+///   ::= 'phi' Type '[' Value ',' Value ']' (',' '[' Value ',' Value ']')*
+int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
+  PATypeHolder Ty(Type::getVoidTy(Context));
+  Value *Op0, *Op1;
+  LocTy TypeLoc = Lex.getLoc();
+
+  if (ParseType(Ty) ||
+      ParseToken(lltok::lsquare, "expected '[' in phi value list") ||
+      ParseValue(Ty, Op0, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+      ParseValue(Type::getLabelTy(Context), Op1, PFS) ||
+      ParseToken(lltok::rsquare, "expected ']' in phi value list"))
+    return true;
+
+  bool AteExtraComma = false;
+  SmallVector<std::pair<Value*, BasicBlock*>, 16> PHIVals;
+  while (1) {
+    PHIVals.push_back(std::make_pair(Op0, cast<BasicBlock>(Op1)));
+
+    if (!EatIfPresent(lltok::comma))
+      break;
+
+    if (Lex.getKind() == lltok::MetadataVar) {
+      AteExtraComma = true;
+      break;
+    }
+
+    if (ParseToken(lltok::lsquare, "expected '[' in phi value list") ||
+        ParseValue(Ty, Op0, PFS) ||
+        ParseToken(lltok::comma, "expected ',' after insertelement value") ||
+        ParseValue(Type::getLabelTy(Context), Op1, PFS) ||
+        ParseToken(lltok::rsquare, "expected ']' in phi value list"))
+      return true;
+  }
+
+  if (!Ty->isFirstClassType())
+    return Error(TypeLoc, "phi node must have first class type");
+
+  PHINode *PN = PHINode::Create(Ty);
+  PN->reserveOperandSpace(PHIVals.size());
+  for (unsigned i = 0, e = PHIVals.size(); i != e; ++i)
+    PN->addIncoming(PHIVals[i].first, PHIVals[i].second);
+  Inst = PN;
+  return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseCall
+///   ::= 'tail'? 'call' OptionalCallingConv OptionalAttrs Type Value
+///       ParameterList OptionalAttrs
+bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
+                         bool isTail) {
+  unsigned RetAttrs, FnAttrs;
+  CallingConv::ID CC;
+  PATypeHolder RetType(Type::getVoidTy(Context));
+  LocTy RetTypeLoc;
+  ValID CalleeID;
+  SmallVector<ParamInfo, 16> ArgList;
+  LocTy CallLoc = Lex.getLoc();
+
+  if ((isTail && ParseToken(lltok::kw_call, "expected 'tail call'")) ||
+      ParseOptionalCallingConv(CC) ||
+      ParseOptionalAttrs(RetAttrs, 1) ||
+      ParseType(RetType, RetTypeLoc, true /*void allowed*/) ||
+      ParseValID(CalleeID) ||
+      ParseParameterList(ArgList, PFS) ||
+      ParseOptionalAttrs(FnAttrs, 2))
+    return true;
+
+  // If RetType is a non-function pointer type, then this is the short syntax
+  // for the call, which means that RetType is just the return type.  Infer the
+  // rest of the function argument types from the arguments that are present.
+  const PointerType *PFTy = 0;
+  const FunctionType *Ty = 0;
+  if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
+      !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
+    // Pull out the types of all of the arguments...
+    std::vector<const Type*> ParamTypes;
+    for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
+      ParamTypes.push_back(ArgList[i].V->getType());
+
+    if (!FunctionType::isValidReturnType(RetType))
+      return Error(RetTypeLoc, "Invalid result type for LLVM function");
+
+    Ty = FunctionType::get(RetType, ParamTypes, false);
+    PFTy = PointerType::getUnqual(Ty);
+  }
+
+  // Look up the callee.
+  Value *Callee;
+  if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
+
+  // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional
+  // function attributes.
+  unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg;
+  if (FnAttrs & ObsoleteFuncAttrs) {
+    RetAttrs |= FnAttrs & ObsoleteFuncAttrs;
+    FnAttrs &= ~ObsoleteFuncAttrs;
+  }
+
+  // Set up the Attributes for the function.
+  SmallVector<AttributeWithIndex, 8> Attrs;
+  if (RetAttrs != Attribute::None)
+    Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
+
+  SmallVector<Value*, 8> Args;
+
+  // Loop through FunctionType's arguments and ensure they are specified
+  // correctly.  Also, gather any parameter attributes.
+  FunctionType::param_iterator I = Ty->param_begin();
+  FunctionType::param_iterator E = Ty->param_end();
+  for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
+    const Type *ExpectedTy = 0;
+    if (I != E) {
+      ExpectedTy = *I++;
+    } else if (!Ty->isVarArg()) {
+      return Error(ArgList[i].Loc, "too many arguments specified");
+    }
+
+    if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
+      return Error(ArgList[i].Loc, "argument is not of expected type '" +
+                   ExpectedTy->getDescription() + "'");
+    Args.push_back(ArgList[i].V);
+    if (ArgList[i].Attrs != Attribute::None)
+      Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
+  }
+
+  if (I != E)
+    return Error(CallLoc, "not enough parameters specified for call");
+
+  if (FnAttrs != Attribute::None)
+    Attrs.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+  // Finish off the Attributes and check them
+  AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
+
+  CallInst *CI = CallInst::Create(Callee, Args.begin(), Args.end());
+  CI->setTailCall(isTail);
+  CI->setCallingConv(CC);
+  CI->setAttributes(PAL);
+  Inst = CI;
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Instructions.
+//===----------------------------------------------------------------------===//
+
+/// ParseAlloc
+///   ::= 'malloc' Type (',' TypeAndValue)? (',' OptionalInfo)?
+///   ::= 'alloca' Type (',' TypeAndValue)? (',' OptionalInfo)?
+int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS,
+                         BasicBlock* BB, bool isAlloca) {
+  PATypeHolder Ty(Type::getVoidTy(Context));
+  Value *Size = 0;
+  LocTy SizeLoc;
+  unsigned Alignment = 0;
+  if (ParseType(Ty)) return true;
+
+  bool AteExtraComma = false;
+  if (EatIfPresent(lltok::comma)) {
+    if (Lex.getKind() == lltok::kw_align) {
+      if (ParseOptionalAlignment(Alignment)) return true;
+    } else if (Lex.getKind() == lltok::MetadataVar) {
+      AteExtraComma = true;
+    } else {
+      if (ParseTypeAndValue(Size, SizeLoc, PFS) ||
+          ParseOptionalCommaAlign(Alignment, AteExtraComma))
+        return true;
+    }
+  }
+
+  if (Size && !Size->getType()->isIntegerTy())
+    return Error(SizeLoc, "element count must have integer type");
+
+  if (isAlloca) {
+    Inst = new AllocaInst(Ty, Size, Alignment);
+    return AteExtraComma ? InstExtraComma : InstNormal;
+  }
+
+  // Autoupgrade old malloc instruction to malloc call.
+  // FIXME: Remove in LLVM 3.0.
+  if (Size && !Size->getType()->isIntegerTy(32))
+    return Error(SizeLoc, "element count must be i32");
+  const Type *IntPtrTy = Type::getInt32Ty(Context);
+  Constant *AllocSize = ConstantExpr::getSizeOf(Ty);
+  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, IntPtrTy);
+  if (!MallocF)
+    // Prototype malloc as "void *(int32)".
+    // This function is renamed as "malloc" in ValidateEndOfModule().
+    MallocF = cast<Function>(
+       M->getOrInsertFunction("", Type::getInt8PtrTy(Context), IntPtrTy, NULL));
+  Inst = CallInst::CreateMalloc(BB, IntPtrTy, Ty, AllocSize, Size, MallocF);
+return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseFree
+///   ::= 'free' TypeAndValue
+bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS,
+                         BasicBlock* BB) {
+  Value *Val; LocTy Loc;
+  if (ParseTypeAndValue(Val, Loc, PFS)) return true;
+  if (!Val->getType()->isPointerTy())
+    return Error(Loc, "operand to free must be a pointer");
+  Inst = CallInst::CreateFree(Val, BB);
+  return false;
+}
+
+/// ParseLoad
+///   ::= 'volatile'? 'load' TypeAndValue (',' OptionalInfo)?
+int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
+                        bool isVolatile) {
+  Value *Val; LocTy Loc;
+  unsigned Alignment = 0;
+  bool AteExtraComma = false;
+  if (ParseTypeAndValue(Val, Loc, PFS) ||
+      ParseOptionalCommaAlign(Alignment, AteExtraComma))
+    return true;
+
+  if (!Val->getType()->isPointerTy() ||
+      !cast<PointerType>(Val->getType())->getElementType()->isFirstClassType())
+    return Error(Loc, "load operand must be a pointer to a first class type");
+
+  Inst = new LoadInst(Val, "", isVolatile, Alignment);
+  return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseStore
+///   ::= 'volatile'? 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)?
+int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
+                         bool isVolatile) {
+  Value *Val, *Ptr; LocTy Loc, PtrLoc;
+  unsigned Alignment = 0;
+  bool AteExtraComma = false;
+  if (ParseTypeAndValue(Val, Loc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after store operand") ||
+      ParseTypeAndValue(Ptr, PtrLoc, PFS) ||
+      ParseOptionalCommaAlign(Alignment, AteExtraComma))
+    return true;
+
+  if (!Ptr->getType()->isPointerTy())
+    return Error(PtrLoc, "store operand must be a pointer");
+  if (!Val->getType()->isFirstClassType())
+    return Error(Loc, "store operand must be a first class value");
+  if (cast<PointerType>(Ptr->getType())->getElementType() != Val->getType())
+    return Error(Loc, "stored value and pointer type do not match");
+
+  Inst = new StoreInst(Val, Ptr, isVolatile, Alignment);
+  return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseGetResult
+///   ::= 'getresult' TypeAndValue ',' i32
+/// FIXME: Remove support for getresult in LLVM 3.0
+bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) {
+  Value *Val; LocTy ValLoc, EltLoc;
+  unsigned Element;
+  if (ParseTypeAndValue(Val, ValLoc, PFS) ||
+      ParseToken(lltok::comma, "expected ',' after getresult operand") ||
+      ParseUInt32(Element, EltLoc))
+    return true;
+
+  if (!Val->getType()->isStructTy() && !Val->getType()->isArrayTy())
+    return Error(ValLoc, "getresult inst requires an aggregate operand");
+  if (!ExtractValueInst::getIndexedType(Val->getType(), Element))
+    return Error(EltLoc, "invalid getresult index for value");
+  Inst = ExtractValueInst::Create(Val, Element);
+  return false;
+}
+
+/// ParseGetElementPtr
+///   ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)*
+int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
+  Value *Ptr, *Val; LocTy Loc, EltLoc;
+
+  bool InBounds = EatIfPresent(lltok::kw_inbounds);
+
+  if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
+
+  if (!Ptr->getType()->isPointerTy())
+    return Error(Loc, "base of getelementptr must be a pointer");
+
+  SmallVector<Value*, 16> Indices;
+  bool AteExtraComma = false;
+  while (EatIfPresent(lltok::comma)) {
+    if (Lex.getKind() == lltok::MetadataVar) {
+      AteExtraComma = true;
+      break;
+    }
+    if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
+    if (!Val->getType()->isIntegerTy())
+      return Error(EltLoc, "getelementptr index must be an integer");
+    Indices.push_back(Val);
+  }
+
+  if (!GetElementPtrInst::getIndexedType(Ptr->getType(),
+                                         Indices.begin(), Indices.end()))
+    return Error(Loc, "invalid getelementptr indices");
+  Inst = GetElementPtrInst::Create(Ptr, Indices.begin(), Indices.end());
+  if (InBounds)
+    cast<GetElementPtrInst>(Inst)->setIsInBounds(true);
+  return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseExtractValue
+///   ::= 'extractvalue' TypeAndValue (',' uint32)+
+int LLParser::ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS) {
+  Value *Val; LocTy Loc;
+  SmallVector<unsigned, 4> Indices;
+  bool AteExtraComma;
+  if (ParseTypeAndValue(Val, Loc, PFS) ||
+      ParseIndexList(Indices, AteExtraComma))
+    return true;
+
+  if (!Val->getType()->isAggregateType())
+    return Error(Loc, "extractvalue operand must be aggregate type");
+
+  if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(),
+                                        Indices.end()))
+    return Error(Loc, "invalid indices for extractvalue");
+  Inst = ExtractValueInst::Create(Val, Indices.begin(), Indices.end());
+  return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+/// ParseInsertValue
+///   ::= 'insertvalue' TypeAndValue ',' TypeAndValue (',' uint32)+
+int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
+  Value *Val0, *Val1; LocTy Loc0, Loc1;
+  SmallVector<unsigned, 4> Indices;
+  bool AteExtraComma;
+  if (ParseTypeAndValue(Val0, Loc0, PFS) ||
+      ParseToken(lltok::comma, "expected comma after insertvalue operand") ||
+      ParseTypeAndValue(Val1, Loc1, PFS) ||
+      ParseIndexList(Indices, AteExtraComma))
+    return true;
+  
+  if (!Val0->getType()->isAggregateType())
+    return Error(Loc0, "insertvalue operand must be aggregate type");
+
+  if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
+                                        Indices.end()))
+    return Error(Loc0, "invalid indices for insertvalue");
+  Inst = InsertValueInst::Create(Val0, Val1, Indices.begin(), Indices.end());
+  return AteExtraComma ? InstExtraComma : InstNormal;
+}
+
+//===----------------------------------------------------------------------===//
+// Embedded metadata.
+//===----------------------------------------------------------------------===//
+
+/// ParseMDNodeVector
+///   ::= Element (',' Element)*
+/// Element
+///   ::= 'null' | TypeAndValue
+bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts,
+                                 PerFunctionState *PFS) {
+  // Check for an empty list.
+  if (Lex.getKind() == lltok::rbrace)
+    return false;
+
+  do {
+    // Null is a special case since it is typeless.
+    if (EatIfPresent(lltok::kw_null)) {
+      Elts.push_back(0);
+      continue;
+    }
+    
+    Value *V = 0;
+    PATypeHolder Ty(Type::getVoidTy(Context));
+    ValID ID;
+    if (ParseType(Ty) || ParseValID(ID, PFS) ||
+        ConvertValIDToValue(Ty, ID, V, PFS))
+      return true;
+    
+    Elts.push_back(V);
+  } while (EatIfPresent(lltok::comma));
+
+  return false;
+}
diff --git a/final/lib/AsmParser/LLParser.h b/final/lib/AsmParser/LLParser.h
new file mode 100644
index 00000000000..93e7f778ebc
--- /dev/null
+++ b/final/lib/AsmParser/LLParser.h
@@ -0,0 +1,378 @@
+//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the parser class for .ll files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ASMPARSER_LLPARSER_H
+#define LLVM_ASMPARSER_LLPARSER_H
+
+#include "LLLexer.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/ValueHandle.h"
+#include <map>
+
+namespace llvm {
+  class Module;
+  class OpaqueType;
+  class Function;
+  class Value;
+  class BasicBlock;
+  class Instruction;
+  class Constant;
+  class GlobalValue;
+  class MDString;
+  class MDNode;
+
+  /// ValID - Represents a reference of a definition of some sort with no type.
+  /// There are several cases where we have to parse the value but where the
+  /// type can depend on later context.  This may either be a numeric reference
+  /// or a symbolic (%var) reference.  This is just a discriminated union.
+  struct ValID {
+    enum {
+      t_LocalID, t_GlobalID,      // ID in UIntVal.
+      t_LocalName, t_GlobalName,  // Name in StrVal.
+      t_APSInt, t_APFloat,        // Value in APSIntVal/APFloatVal.
+      t_Null, t_Undef, t_Zero,    // No value.
+      t_EmptyArray,               // No value:  []
+      t_Constant,                 // Value in ConstantVal.
+      t_InlineAsm,                // Value in StrVal/StrVal2/UIntVal.
+      t_MDNode,                   // Value in MDNodeVal.
+      t_MDString                  // Value in MDStringVal.
+    } Kind;
+    
+    LLLexer::LocTy Loc;
+    unsigned UIntVal;
+    std::string StrVal, StrVal2;
+    APSInt APSIntVal;
+    APFloat APFloatVal;
+    Constant *ConstantVal;
+    MDNode *MDNodeVal;
+    MDString *MDStringVal;
+    ValID() : APFloatVal(0.0) {}
+    
+    bool operator<(const ValID &RHS) const {
+      if (Kind == t_LocalID || Kind == t_GlobalID)
+        return UIntVal < RHS.UIntVal;
+      assert((Kind == t_LocalName || Kind == t_GlobalName) && 
+             "Ordering not defined for this ValID kind yet");
+      return StrVal < RHS.StrVal;
+    }
+  };
+  
+  class LLParser {
+  public:
+    typedef LLLexer::LocTy LocTy;
+  private:
+    LLVMContext &Context;
+    LLLexer Lex;
+    Module *M;
+    
+    // Instruction metadata resolution.  Each instruction can have a list of
+    // MDRef info associated with them.
+    //
+    // The simpler approach of just creating temporary MDNodes and then calling
+    // RAUW on them when the definition is processed doesn't work because some
+    // instruction metadata kinds, such as dbg, get stored in the IR in an
+    // "optimized" format which doesn't participate in the normal value use
+    // lists. This means that RAUW doesn't work, even on temporary MDNodes
+    // which otherwise support RAUW. Instead, we defer resolving MDNode
+    // references until the definitions have been processed.
+    struct MDRef {
+      SMLoc Loc;
+      unsigned MDKind, MDSlot;
+    };
+    DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata;
+
+    // Type resolution handling data structures.
+    std::map<std::string, std::pair<PATypeHolder, LocTy> > ForwardRefTypes;
+    std::map<unsigned, std::pair<PATypeHolder, LocTy> > ForwardRefTypeIDs;
+    std::vector<PATypeHolder> NumberedTypes;
+    std::vector<TrackingVH<MDNode> > NumberedMetadata;
+    std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes;
+    struct UpRefRecord {
+      /// Loc - This is the location of the upref.
+      LocTy Loc;
+
+      /// NestingLevel - The number of nesting levels that need to be popped
+      /// before this type is resolved.
+      unsigned NestingLevel;
+
+      /// LastContainedTy - This is the type at the current binding level for
+      /// the type.  Every time we reduce the nesting level, this gets updated.
+      const Type *LastContainedTy;
+
+      /// UpRefTy - This is the actual opaque type that the upreference is
+      /// represented with.
+      OpaqueType *UpRefTy;
+
+      UpRefRecord(LocTy L, unsigned NL, OpaqueType *URTy)
+        : Loc(L), NestingLevel(NL), LastContainedTy((Type*)URTy),
+          UpRefTy(URTy) {}
+    };
+    std::vector<UpRefRecord> UpRefs;
+
+    // Global Value reference information.
+    std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
+    std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
+    std::vector<GlobalValue*> NumberedVals;
+    
+    // References to blockaddress.  The key is the function ValID, the value is
+    // a list of references to blocks in that function.
+    std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
+      ForwardRefBlockAddresses;
+    
+    Function *MallocF;
+  public:
+    LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : 
+      Context(m->getContext()), Lex(F, SM, Err, m->getContext()),
+      M(m), MallocF(NULL) {}
+    bool Run();
+
+    LLVMContext& getContext() { return Context; }
+
+  private:
+
+    bool Error(LocTy L, const Twine &Msg) const {
+      return Lex.Error(L, Msg);
+    }
+    bool TokError(const Twine &Msg) const {
+      return Error(Lex.getLoc(), Msg);
+    }
+
+    /// GetGlobalVal - Get a value with the specified name or ID, creating a
+    /// forward reference record if needed.  This can return null if the value
+    /// exists but does not have the right type.
+    GlobalValue *GetGlobalVal(const std::string &N, const Type *Ty, LocTy Loc);
+    GlobalValue *GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc);
+
+    // Helper Routines.
+    bool ParseToken(lltok::Kind T, const char *ErrMsg);
+    bool EatIfPresent(lltok::Kind T) {
+      if (Lex.getKind() != T) return false;
+      Lex.Lex();
+      return true;
+    }
+    bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) {
+      if (Lex.getKind() != T) {
+        Present = false;
+      } else {
+        if (Loc)
+          *Loc = Lex.getLoc();
+        Lex.Lex();
+        Present = true;
+      }
+      return false;
+    }
+    bool ParseStringConstant(std::string &Result);
+    bool ParseUInt32(unsigned &Val);
+    bool ParseUInt32(unsigned &Val, LocTy &Loc) {
+      Loc = Lex.getLoc();
+      return ParseUInt32(Val);
+    }
+    bool ParseOptionalAddrSpace(unsigned &AddrSpace);
+    bool ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind);
+    bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
+    bool ParseOptionalLinkage(unsigned &Linkage) {
+      bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
+    }
+    bool ParseOptionalVisibility(unsigned &Visibility);
+    bool ParseOptionalCallingConv(CallingConv::ID &CC);
+    bool ParseOptionalAlignment(unsigned &Alignment);
+    bool ParseOptionalStackAlignment(unsigned &Alignment);
+    bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
+    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
+    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
+      bool AteExtraComma;
+      if (ParseIndexList(Indices, AteExtraComma)) return true;
+      if (AteExtraComma)
+        return TokError("expected index");
+      return false;
+    }
+
+    // Top-Level Entities
+    bool ParseTopLevelEntities();
+    bool ValidateEndOfModule();
+    bool ParseTargetDefinition();
+    bool ParseDepLibs();
+    bool ParseModuleAsm();
+    bool ParseUnnamedType();
+    bool ParseNamedType();
+    bool ParseDeclare();
+    bool ParseDefine();
+
+    bool ParseGlobalType(bool &IsConstant);
+    bool ParseUnnamedGlobal();
+    bool ParseNamedGlobal();
+    bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
+                     bool HasLinkage, unsigned Visibility);
+    bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility);
+    bool ParseStandaloneMetadata();
+    bool ParseNamedMetadata();
+    bool ParseMDString(MDString *&Result);
+    bool ParseMDNodeID(MDNode *&Result);
+    bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo);
+
+    // Type Parsing.
+    bool ParseType(PATypeHolder &Result, bool AllowVoid = false);
+    bool ParseType(PATypeHolder &Result, LocTy &Loc, bool AllowVoid = false) {
+      Loc = Lex.getLoc();
+      return ParseType(Result, AllowVoid);
+    }
+    bool ParseTypeRec(PATypeHolder &H);
+    bool ParseStructType(PATypeHolder &H, bool Packed);
+    bool ParseArrayVectorType(PATypeHolder &H, bool isVector);
+    bool ParseFunctionType(PATypeHolder &Result);
+    PATypeHolder HandleUpRefs(const Type *Ty);
+
+    // Function Semantic Analysis.
+    class PerFunctionState {
+      LLParser &P;
+      Function &F;
+      std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
+      std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
+      std::vector<Value*> NumberedVals;
+      
+      /// FunctionNumber - If this is an unnamed function, this is the slot
+      /// number of it, otherwise it is -1.
+      int FunctionNumber;
+    public:
+      PerFunctionState(LLParser &p, Function &f, int FunctionNumber);
+      ~PerFunctionState();
+
+      Function &getFunction() const { return F; }
+
+      bool FinishFunction();
+
+      /// GetVal - Get a value with the specified name or ID, creating a
+      /// forward reference record if needed.  This can return null if the value
+      /// exists but does not have the right type.
+      Value *GetVal(const std::string &Name, const Type *Ty, LocTy Loc);
+      Value *GetVal(unsigned ID, const Type *Ty, LocTy Loc);
+
+      /// SetInstName - After an instruction is parsed and inserted into its
+      /// basic block, this installs its name.
+      bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
+                       Instruction *Inst);
+
+      /// GetBB - Get a basic block with the specified name or ID, creating a
+      /// forward reference record if needed.  This can return null if the value
+      /// is not a BasicBlock.
+      BasicBlock *GetBB(const std::string &Name, LocTy Loc);
+      BasicBlock *GetBB(unsigned ID, LocTy Loc);
+
+      /// DefineBB - Define the specified basic block, which is either named or
+      /// unnamed.  If there is an error, this returns null otherwise it returns
+      /// the block being defined.
+      BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
+    };
+
+    bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
+                             PerFunctionState *PFS);
+
+    bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS);
+    bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc,
+                    PerFunctionState &PFS) {
+      Loc = Lex.getLoc();
+      return ParseValue(Ty, V, PFS);
+    }
+
+    bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS);
+    bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
+      Loc = Lex.getLoc();
+      return ParseTypeAndValue(V, PFS);
+    }
+    bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
+                                PerFunctionState &PFS);
+    bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
+      LocTy Loc;
+      return ParseTypeAndBasicBlock(BB, Loc, PFS);
+    }
+
+
+    struct ParamInfo {
+      LocTy Loc;
+      Value *V;
+      unsigned Attrs;
+      ParamInfo(LocTy loc, Value *v, unsigned attrs)
+        : Loc(loc), V(v), Attrs(attrs) {}
+    };
+    bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
+                            PerFunctionState &PFS);
+
+    // Constant Parsing.
+    bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL);
+    bool ParseGlobalValue(const Type *Ty, Constant *&V);
+    bool ParseGlobalTypeAndValue(Constant *&V);
+    bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
+    bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS);
+    bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
+    bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
+    bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS);
+
+    // Function Parsing.
+    struct ArgInfo {
+      LocTy Loc;
+      PATypeHolder Type;
+      unsigned Attrs;
+      std::string Name;
+      ArgInfo(LocTy L, PATypeHolder Ty, unsigned Attr, const std::string &N)
+        : Loc(L), Type(Ty), Attrs(Attr), Name(N) {}
+    };
+    bool ParseArgumentList(std::vector<ArgInfo> &ArgList,
+                           bool &isVarArg, bool inType);
+    bool ParseFunctionHeader(Function *&Fn, bool isDefine);
+    bool ParseFunctionBody(Function &Fn);
+    bool ParseBasicBlock(PerFunctionState &PFS);
+
+    // Instruction Parsing.  Each instruction parsing routine can return with a
+    // normal result, an error result, or return having eaten an extra comma.
+    enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 };
+    int ParseInstruction(Instruction *&Inst, BasicBlock *BB,
+                         PerFunctionState &PFS);
+    bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
+
+    int ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
+    bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
+    bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
+    bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
+    bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
+
+    bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
+                         unsigned OperandType);
+    bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
+    bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
+    bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
+    bool ParseSelect(Instruction *&I, PerFunctionState &PFS);
+    bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS);
+    bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS);
+    bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS);
+    bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
+    int ParsePHI(Instruction *&I, PerFunctionState &PFS);
+    bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
+    int ParseAlloc(Instruction *&I, PerFunctionState &PFS,
+                    BasicBlock *BB = 0, bool isAlloca = true);
+    bool ParseFree(Instruction *&I, PerFunctionState &PFS, BasicBlock *BB);
+    int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
+    int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
+    bool ParseGetResult(Instruction *&I, PerFunctionState &PFS);
+    int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
+    int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
+    int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
+    
+    bool ResolveForwardRefBlockAddresses(Function *TheFn, 
+                             std::vector<std::pair<ValID, GlobalValue*> > &Refs,
+                                         PerFunctionState *PFS);
+  };
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/AsmParser/LLToken.h b/final/lib/AsmParser/LLToken.h
new file mode 100644
index 00000000000..576da191aec
--- /dev/null
+++ b/final/lib/AsmParser/LLToken.h
@@ -0,0 +1,148 @@
+//===- LLToken.h - Token Codes for LLVM Assembly Files ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the enums for the .ll lexer.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBS_ASMPARSER_LLTOKEN_H
+#define LIBS_ASMPARSER_LLTOKEN_H
+
+namespace llvm {
+namespace lltok {
+  enum Kind {
+    // Markers
+    Eof, Error,
+
+    // Tokens with no info.
+    dotdotdot,         // ...
+    equal, comma,      // =  ,
+    star,              // *
+    lsquare, rsquare,  // [  ]
+    lbrace, rbrace,    // {  }
+    less, greater,     // <  >
+    lparen, rparen,    // (  )
+    backslash,         // \    (not /)
+    exclaim,           // !
+
+    kw_x,
+    kw_begin,   kw_end,
+    kw_true,    kw_false,
+    kw_declare, kw_define,
+    kw_global,  kw_constant,
+
+    kw_private, kw_linker_private, kw_linker_private_weak,
+    kw_linker_private_weak_def_auto, kw_internal,
+    kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending,
+    kw_dllimport, kw_dllexport, kw_common, kw_available_externally,
+    kw_default, kw_hidden, kw_protected,
+    kw_unnamed_addr,
+    kw_extern_weak,
+    kw_external, kw_thread_local,
+    kw_zeroinitializer,
+    kw_undef, kw_null,
+    kw_to,
+    kw_tail,
+    kw_target,
+    kw_triple,
+    kw_deplibs,
+    kw_datalayout,
+    kw_volatile,
+    kw_nuw,
+    kw_nsw,
+    kw_exact,
+    kw_inbounds,
+    kw_align,
+    kw_addrspace,
+    kw_section,
+    kw_alias,
+    kw_module,
+    kw_asm,
+    kw_sideeffect,
+    kw_alignstack,
+    kw_gc,
+    kw_c,
+
+    kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
+    kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc,
+    kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
+    kw_msp430_intrcc,
+    kw_ptx_kernel, kw_ptx_device,
+
+    kw_signext,
+    kw_zeroext,
+    kw_inreg,
+    kw_sret,
+    kw_nounwind,
+    kw_noreturn,
+    kw_noalias,
+    kw_nocapture,
+    kw_byval,
+    kw_nest,
+    kw_readnone,
+    kw_readonly,
+
+    kw_inlinehint,
+    kw_noinline,
+    kw_alwaysinline,
+    kw_optsize,
+    kw_ssp,
+    kw_sspreq,
+    kw_noredzone,
+    kw_noimplicitfloat,
+    kw_naked,
+    kw_hotpatch,
+
+    kw_type,
+    kw_opaque,
+
+    kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule,
+    kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno,
+    kw_ueq, kw_une,
+
+    // Instruction Opcodes (Opcode in UIntVal).
+    kw_add,  kw_fadd, kw_sub,  kw_fsub, kw_mul,  kw_fmul,
+    kw_udiv, kw_sdiv, kw_fdiv,
+    kw_urem, kw_srem, kw_frem, kw_shl,  kw_lshr, kw_ashr,
+    kw_and,  kw_or,   kw_xor,  kw_icmp, kw_fcmp,
+
+    kw_phi, kw_call,
+    kw_trunc, kw_zext, kw_sext, kw_fptrunc, kw_fpext, kw_uitofp, kw_sitofp,
+    kw_fptoui, kw_fptosi, kw_inttoptr, kw_ptrtoint, kw_bitcast,
+    kw_select, kw_va_arg,
+
+    kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_unwind,
+    kw_unreachable,
+
+    kw_malloc, kw_alloca, kw_free, kw_load, kw_store, kw_getelementptr,
+
+    kw_extractelement, kw_insertelement, kw_shufflevector, kw_getresult,
+    kw_extractvalue, kw_insertvalue, kw_blockaddress,
+
+    // Unsigned Valued tokens (UIntVal).
+    GlobalID,          // @42
+    LocalVarID,        // %42
+
+    // String valued tokens (StrVal).
+    LabelStr,          // foo:
+    GlobalVar,         // @foo @"foo"
+    LocalVar,          // %foo %"foo"
+    MetadataVar,       // !foo
+    StringConstant,    // "foo"
+
+    // Type valued tokens (TyVal).
+    Type,
+
+    APFloat,  // APFloatVal
+    APSInt // APSInt
+  };
+} // end namespace lltok
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/AsmParser/Makefile b/final/lib/AsmParser/Makefile
new file mode 100644
index 00000000000..995bb0e130e
--- /dev/null
+++ b/final/lib/AsmParser/Makefile
@@ -0,0 +1,14 @@
+##===- lib/AsmParser/Makefile ------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME := LLVMAsmParser
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/AsmParser/Parser.cpp b/final/lib/AsmParser/Parser.cpp
new file mode 100644
index 00000000000..59fb471f2b9
--- /dev/null
+++ b/final/lib/AsmParser/Parser.cpp
@@ -0,0 +1,62 @@
+//===- Parser.cpp - Main dispatch module for the Parser library -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library implements the functionality defined in llvm/Assembly/Parser.h
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/Parser.h"
+#include "LLParser.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <cstring>
+using namespace llvm;
+
+Module *llvm::ParseAssembly(MemoryBuffer *F,
+                            Module *M,
+                            SMDiagnostic &Err,
+                            LLVMContext &Context) {
+  SourceMgr SM;
+  SM.AddNewSourceBuffer(F, SMLoc());
+
+  // If we are parsing into an existing module, do it.
+  if (M)
+    return LLParser(F, SM, Err, M).Run() ? 0 : M;
+
+  // Otherwise create a new module.
+  OwningPtr<Module> M2(new Module(F->getBufferIdentifier(), Context));
+  if (LLParser(F, SM, Err, M2.get()).Run())
+    return 0;
+  return M2.take();
+}
+
+Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err,
+                                LLVMContext &Context) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+    Err = SMDiagnostic(Filename,
+                       "Could not open input file: " + ec.message());
+    return 0;
+  }
+
+  return ParseAssembly(File.take(), 0, Err, Context);
+}
+
+Module *llvm::ParseAssemblyString(const char *AsmString, Module *M,
+                                  SMDiagnostic &Err, LLVMContext &Context) {
+  MemoryBuffer *F =
+    MemoryBuffer::getMemBuffer(StringRef(AsmString, strlen(AsmString)),
+                               "<string>");
+
+  return ParseAssembly(F, M, Err, Context);
+}
diff --git a/final/lib/Bitcode/CMakeLists.txt b/final/lib/Bitcode/CMakeLists.txt
new file mode 100644
index 00000000000..ff7e290cad1
--- /dev/null
+++ b/final/lib/Bitcode/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(Reader)
+add_subdirectory(Writer)
diff --git a/final/lib/Bitcode/Makefile b/final/lib/Bitcode/Makefile
new file mode 100644
index 00000000000..2d6b5ad1fe8
--- /dev/null
+++ b/final/lib/Bitcode/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Bitcode/Makefile --------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+PARALLEL_DIRS = Reader Writer
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Bitcode/Reader/BitReader.cpp b/final/lib/Bitcode/Reader/BitReader.cpp
new file mode 100644
index 00000000000..15844c0041c
--- /dev/null
+++ b/final/lib/Bitcode/Reader/BitReader.cpp
@@ -0,0 +1,88 @@
+//===-- BitReader.cpp -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/BitReader.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <string>
+#include <cstring>
+
+using namespace llvm;
+
+/* Builds a module from the bitcode in the specified memory buffer, returning a
+   reference to the module via the OutModule parameter. Returns 0 on success.
+   Optionally returns a human-readable error message via OutMessage. */
+LLVMBool LLVMParseBitcode(LLVMMemoryBufferRef MemBuf,
+                          LLVMModuleRef *OutModule, char **OutMessage) {
+  return LLVMParseBitcodeInContext(wrap(&getGlobalContext()), MemBuf, OutModule,
+                                   OutMessage);
+}
+
+LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
+                                   LLVMMemoryBufferRef MemBuf,
+                                   LLVMModuleRef *OutModule,
+                                   char **OutMessage) {
+  std::string Message;
+  
+  *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef),
+                                     &Message));
+  if (!*OutModule) {
+    if (OutMessage)
+      *OutMessage = strdup(Message.c_str());
+    return 1;
+  }
+  
+  return 0;
+}
+
+/* Reads a module from the specified path, returning via the OutModule parameter
+   a module provider which performs lazy deserialization. Returns 0 on success.
+   Optionally returns a human-readable error message via OutMessage. */ 
+LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
+                                       LLVMMemoryBufferRef MemBuf,
+                                       LLVMModuleRef *OutM,
+                                       char **OutMessage) {
+  std::string Message;
+  
+  *OutM = wrap(getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef),
+                                    &Message));
+  if (!*OutM) {
+    if (OutMessage)
+      *OutMessage = strdup(Message.c_str());
+    return 1;
+  }
+  
+  return 0;
+
+}
+
+LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
+                              char **OutMessage) {
+  return LLVMGetBitcodeModuleInContext(LLVMGetGlobalContext(), MemBuf, OutM,
+                                       OutMessage);
+}
+
+/* Deprecated: Use LLVMGetBitcodeModuleInContext instead. */
+LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
+                                               LLVMMemoryBufferRef MemBuf,
+                                               LLVMModuleProviderRef *OutMP,
+                                               char **OutMessage) {
+  return LLVMGetBitcodeModuleInContext(ContextRef, MemBuf,
+                                       reinterpret_cast<LLVMModuleRef*>(OutMP),
+                                       OutMessage);
+}
+
+/* Deprecated: Use LLVMGetBitcodeModule instead. */
+LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
+                                      LLVMModuleProviderRef *OutMP,
+                                      char **OutMessage) {
+  return LLVMGetBitcodeModuleProviderInContext(LLVMGetGlobalContext(), MemBuf,
+                                               OutMP, OutMessage);
+}
diff --git a/final/lib/Bitcode/Reader/BitcodeReader.cpp b/final/lib/Bitcode/Reader/BitcodeReader.cpp
new file mode 100644
index 00000000000..dbf8da02799
--- /dev/null
+++ b/final/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -0,0 +1,2690 @@
+//===- BitcodeReader.cpp - Internal BitcodeReader implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitcodeReader class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "BitcodeReader.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/AutoUpgrade.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/OperandTraits.h"
+using namespace llvm;
+
+void BitcodeReader::FreeState() {
+  if (BufferOwned)
+    delete Buffer;
+  Buffer = 0;
+  std::vector<PATypeHolder>().swap(TypeList);
+  ValueList.clear();
+  MDValueList.clear();
+
+  std::vector<AttrListPtr>().swap(MAttributes);
+  std::vector<BasicBlock*>().swap(FunctionBBs);
+  std::vector<Function*>().swap(FunctionsWithBodies);
+  DeferredFunctionInfo.clear();
+  MDKindMap.clear();
+}
+
+//===----------------------------------------------------------------------===//
+//  Helper functions to implement forward reference resolution, etc.
+//===----------------------------------------------------------------------===//
+
+/// ConvertToString - Convert a string from a record into an std::string, return
+/// true on failure.
+template<typename StrTy>
+static bool ConvertToString(SmallVector<uint64_t, 64> &Record, unsigned Idx,
+                            StrTy &Result) {
+  if (Idx > Record.size())
+    return true;
+
+  for (unsigned i = Idx, e = Record.size(); i != e; ++i)
+    Result += (char)Record[i];
+  return false;
+}
+
+static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) {
+  switch (Val) {
+  default: // Map unknown/new linkages to external
+  case 0:  return GlobalValue::ExternalLinkage;
+  case 1:  return GlobalValue::WeakAnyLinkage;
+  case 2:  return GlobalValue::AppendingLinkage;
+  case 3:  return GlobalValue::InternalLinkage;
+  case 4:  return GlobalValue::LinkOnceAnyLinkage;
+  case 5:  return GlobalValue::DLLImportLinkage;
+  case 6:  return GlobalValue::DLLExportLinkage;
+  case 7:  return GlobalValue::ExternalWeakLinkage;
+  case 8:  return GlobalValue::CommonLinkage;
+  case 9:  return GlobalValue::PrivateLinkage;
+  case 10: return GlobalValue::WeakODRLinkage;
+  case 11: return GlobalValue::LinkOnceODRLinkage;
+  case 12: return GlobalValue::AvailableExternallyLinkage;
+  case 13: return GlobalValue::LinkerPrivateLinkage;
+  case 14: return GlobalValue::LinkerPrivateWeakLinkage;
+  case 15: return GlobalValue::LinkerPrivateWeakDefAutoLinkage;
+  }
+}
+
+static GlobalValue::VisibilityTypes GetDecodedVisibility(unsigned Val) {
+  switch (Val) {
+  default: // Map unknown visibilities to default.
+  case 0: return GlobalValue::DefaultVisibility;
+  case 1: return GlobalValue::HiddenVisibility;
+  case 2: return GlobalValue::ProtectedVisibility;
+  }
+}
+
+static int GetDecodedCastOpcode(unsigned Val) {
+  switch (Val) {
+  default: return -1;
+  case bitc::CAST_TRUNC   : return Instruction::Trunc;
+  case bitc::CAST_ZEXT    : return Instruction::ZExt;
+  case bitc::CAST_SEXT    : return Instruction::SExt;
+  case bitc::CAST_FPTOUI  : return Instruction::FPToUI;
+  case bitc::CAST_FPTOSI  : return Instruction::FPToSI;
+  case bitc::CAST_UITOFP  : return Instruction::UIToFP;
+  case bitc::CAST_SITOFP  : return Instruction::SIToFP;
+  case bitc::CAST_FPTRUNC : return Instruction::FPTrunc;
+  case bitc::CAST_FPEXT   : return Instruction::FPExt;
+  case bitc::CAST_PTRTOINT: return Instruction::PtrToInt;
+  case bitc::CAST_INTTOPTR: return Instruction::IntToPtr;
+  case bitc::CAST_BITCAST : return Instruction::BitCast;
+  }
+}
+static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) {
+  switch (Val) {
+  default: return -1;
+  case bitc::BINOP_ADD:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FAdd : Instruction::Add;
+  case bitc::BINOP_SUB:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FSub : Instruction::Sub;
+  case bitc::BINOP_MUL:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FMul : Instruction::Mul;
+  case bitc::BINOP_UDIV: return Instruction::UDiv;
+  case bitc::BINOP_SDIV:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FDiv : Instruction::SDiv;
+  case bitc::BINOP_UREM: return Instruction::URem;
+  case bitc::BINOP_SREM:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FRem : Instruction::SRem;
+  case bitc::BINOP_SHL:  return Instruction::Shl;
+  case bitc::BINOP_LSHR: return Instruction::LShr;
+  case bitc::BINOP_ASHR: return Instruction::AShr;
+  case bitc::BINOP_AND:  return Instruction::And;
+  case bitc::BINOP_OR:   return Instruction::Or;
+  case bitc::BINOP_XOR:  return Instruction::Xor;
+  }
+}
+
+namespace llvm {
+namespace {
+  /// @brief A class for maintaining the slot number definition
+  /// as a placeholder for the actual definition for forward constants defs.
+  class ConstantPlaceHolder : public ConstantExpr {
+    void operator=(const ConstantPlaceHolder &); // DO NOT IMPLEMENT
+  public:
+    // allocate space for exactly one operand
+    void *operator new(size_t s) {
+      return User::operator new(s, 1);
+    }
+    explicit ConstantPlaceHolder(const Type *Ty, LLVMContext& Context)
+      : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) {
+      Op<0>() = UndefValue::get(Type::getInt32Ty(Context));
+    }
+
+    /// @brief Methods to support type inquiry through isa, cast, and dyn_cast.
+    //static inline bool classof(const ConstantPlaceHolder *) { return true; }
+    static bool classof(const Value *V) {
+      return isa<ConstantExpr>(V) &&
+             cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1;
+    }
+
+
+    /// Provide fast operand accessors
+    //DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+  };
+}
+
+// FIXME: can we inherit this from ConstantExpr?
+template <>
+struct OperandTraits<ConstantPlaceHolder> :
+  public FixedNumOperandTraits<ConstantPlaceHolder, 1> {
+};
+}
+
+
+void BitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) {
+  if (Idx == size()) {
+    push_back(V);
+    return;
+  }
+
+  if (Idx >= size())
+    resize(Idx+1);
+
+  WeakVH &OldV = ValuePtrs[Idx];
+  if (OldV == 0) {
+    OldV = V;
+    return;
+  }
+
+  // Handle constants and non-constants (e.g. instrs) differently for
+  // efficiency.
+  if (Constant *PHC = dyn_cast<Constant>(&*OldV)) {
+    ResolveConstants.push_back(std::make_pair(PHC, Idx));
+    OldV = V;
+  } else {
+    // If there was a forward reference to this value, replace it.
+    Value *PrevVal = OldV;
+    OldV->replaceAllUsesWith(V);
+    delete PrevVal;
+  }
+}
+
+
+Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx,
+                                                    const Type *Ty) {
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  if (Value *V = ValuePtrs[Idx]) {
+    assert(Ty == V->getType() && "Type mismatch in constant table!");
+    return cast<Constant>(V);
+  }
+
+  // Create and return a placeholder, which will later be RAUW'd.
+  Constant *C = new ConstantPlaceHolder(Ty, Context);
+  ValuePtrs[Idx] = C;
+  return C;
+}
+
+Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, const Type *Ty) {
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  if (Value *V = ValuePtrs[Idx]) {
+    assert((Ty == 0 || Ty == V->getType()) && "Type mismatch in value table!");
+    return V;
+  }
+
+  // No type specified, must be invalid reference.
+  if (Ty == 0) return 0;
+
+  // Create and return a placeholder, which will later be RAUW'd.
+  Value *V = new Argument(Ty);
+  ValuePtrs[Idx] = V;
+  return V;
+}
+
+/// ResolveConstantForwardRefs - Once all constants are read, this method bulk
+/// resolves any forward references.  The idea behind this is that we sometimes
+/// get constants (such as large arrays) which reference *many* forward ref
+/// constants.  Replacing each of these causes a lot of thrashing when
+/// building/reuniquing the constant.  Instead of doing this, we look at all the
+/// uses and rewrite all the place holders at once for any constant that uses
+/// a placeholder.
+void BitcodeReaderValueList::ResolveConstantForwardRefs() {
+  // Sort the values by-pointer so that they are efficient to look up with a
+  // binary search.
+  std::sort(ResolveConstants.begin(), ResolveConstants.end());
+
+  SmallVector<Constant*, 64> NewOps;
+
+  while (!ResolveConstants.empty()) {
+    Value *RealVal = operator[](ResolveConstants.back().second);
+    Constant *Placeholder = ResolveConstants.back().first;
+    ResolveConstants.pop_back();
+
+    // Loop over all users of the placeholder, updating them to reference the
+    // new value.  If they reference more than one placeholder, update them all
+    // at once.
+    while (!Placeholder->use_empty()) {
+      Value::use_iterator UI = Placeholder->use_begin();
+      User *U = *UI;
+
+      // If the using object isn't uniqued, just update the operands.  This
+      // handles instructions and initializers for global variables.
+      if (!isa<Constant>(U) || isa<GlobalValue>(U)) {
+        UI.getUse().set(RealVal);
+        continue;
+      }
+
+      // Otherwise, we have a constant that uses the placeholder.  Replace that
+      // constant with a new constant that has *all* placeholder uses updated.
+      Constant *UserC = cast<Constant>(U);
+      for (User::op_iterator I = UserC->op_begin(), E = UserC->op_end();
+           I != E; ++I) {
+        Value *NewOp;
+        if (!isa<ConstantPlaceHolder>(*I)) {
+          // Not a placeholder reference.
+          NewOp = *I;
+        } else if (*I == Placeholder) {
+          // Common case is that it just references this one placeholder.
+          NewOp = RealVal;
+        } else {
+          // Otherwise, look up the placeholder in ResolveConstants.
+          ResolveConstantsTy::iterator It =
+            std::lower_bound(ResolveConstants.begin(), ResolveConstants.end(),
+                             std::pair<Constant*, unsigned>(cast<Constant>(*I),
+                                                            0));
+          assert(It != ResolveConstants.end() && It->first == *I);
+          NewOp = operator[](It->second);
+        }
+
+        NewOps.push_back(cast<Constant>(NewOp));
+      }
+
+      // Make the new constant.
+      Constant *NewC;
+      if (ConstantArray *UserCA = dyn_cast<ConstantArray>(UserC)) {
+        NewC = ConstantArray::get(UserCA->getType(), &NewOps[0],
+                                        NewOps.size());
+      } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
+        NewC = ConstantStruct::get(Context, &NewOps[0], NewOps.size(),
+                                         UserCS->getType()->isPacked());
+      } else if (isa<ConstantVector>(UserC)) {
+        NewC = ConstantVector::get(NewOps);
+      } else {
+        assert(isa<ConstantExpr>(UserC) && "Must be a ConstantExpr.");
+        NewC = cast<ConstantExpr>(UserC)->getWithOperands(&NewOps[0],
+                                                          NewOps.size());
+      }
+
+      UserC->replaceAllUsesWith(NewC);
+      UserC->destroyConstant();
+      NewOps.clear();
+    }
+
+    // Update all ValueHandles, they should be the only users at this point.
+    Placeholder->replaceAllUsesWith(RealVal);
+    delete Placeholder;
+  }
+}
+
+void BitcodeReaderMDValueList::AssignValue(Value *V, unsigned Idx) {
+  if (Idx == size()) {
+    push_back(V);
+    return;
+  }
+
+  if (Idx >= size())
+    resize(Idx+1);
+
+  WeakVH &OldV = MDValuePtrs[Idx];
+  if (OldV == 0) {
+    OldV = V;
+    return;
+  }
+
+  // If there was a forward reference to this value, replace it.
+  MDNode *PrevVal = cast<MDNode>(OldV);
+  OldV->replaceAllUsesWith(V);
+  MDNode::deleteTemporary(PrevVal);
+  // Deleting PrevVal sets Idx value in MDValuePtrs to null. Set new
+  // value for Idx.
+  MDValuePtrs[Idx] = V;
+}
+
+Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  if (Value *V = MDValuePtrs[Idx]) {
+    assert(V->getType()->isMetadataTy() && "Type mismatch in value table!");
+    return V;
+  }
+
+  // Create and return a placeholder, which will later be RAUW'd.
+  Value *V = MDNode::getTemporary(Context, 0, 0);
+  MDValuePtrs[Idx] = V;
+  return V;
+}
+
+const Type *BitcodeReader::getTypeByID(unsigned ID, bool isTypeTable) {
+  // If the TypeID is in range, return it.
+  if (ID < TypeList.size())
+    return TypeList[ID].get();
+  if (!isTypeTable) return 0;
+
+  // The type table allows forward references.  Push as many Opaque types as
+  // needed to get up to ID.
+  while (TypeList.size() <= ID)
+    TypeList.push_back(OpaqueType::get(Context));
+  return TypeList.back().get();
+}
+
+//===----------------------------------------------------------------------===//
+//  Functions for parsing blocks from the bitcode file
+//===----------------------------------------------------------------------===//
+
+bool BitcodeReader::ParseAttributeBlock() {
+  if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
+    return Error("Malformed block record");
+
+  if (!MAttributes.empty())
+    return Error("Multiple PARAMATTR blocks found!");
+
+  SmallVector<uint64_t, 64> Record;
+
+  SmallVector<AttributeWithIndex, 8> Attrs;
+
+  // Read all the records.
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of PARAMATTR block");
+      return false;
+    }
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    switch (Stream.ReadRecord(Code, Record)) {
+    default:  // Default behavior: ignore.
+      break;
+    case bitc::PARAMATTR_CODE_ENTRY: { // ENTRY: [paramidx0, attr0, ...]
+      if (Record.size() & 1)
+        return Error("Invalid ENTRY record");
+
+      // FIXME : Remove this autoupgrade code in LLVM 3.0.
+      // If Function attributes are using index 0 then transfer them
+      // to index ~0. Index 0 is used for return value attributes but used to be
+      // used for function attributes.
+      Attributes RetAttribute = Attribute::None;
+      Attributes FnAttribute = Attribute::None;
+      for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
+        // FIXME: remove in LLVM 3.0
+        // The alignment is stored as a 16-bit raw value from bits 31--16.
+        // We shift the bits above 31 down by 11 bits.
+
+        unsigned Alignment = (Record[i+1] & (0xffffull << 16)) >> 16;
+        if (Alignment && !isPowerOf2_32(Alignment))
+          return Error("Alignment is not a power of two.");
+
+        Attributes ReconstitutedAttr = Record[i+1] & 0xffff;
+        if (Alignment)
+          ReconstitutedAttr |= Attribute::constructAlignmentFromInt(Alignment);
+        ReconstitutedAttr |= (Record[i+1] & (0xffffull << 32)) >> 11;
+        Record[i+1] = ReconstitutedAttr;
+
+        if (Record[i] == 0)
+          RetAttribute = Record[i+1];
+        else if (Record[i] == ~0U)
+          FnAttribute = Record[i+1];
+      }
+
+      unsigned OldRetAttrs = (Attribute::NoUnwind|Attribute::NoReturn|
+                              Attribute::ReadOnly|Attribute::ReadNone);
+
+      if (FnAttribute == Attribute::None && RetAttribute != Attribute::None &&
+          (RetAttribute & OldRetAttrs) != 0) {
+        if (FnAttribute == Attribute::None) { // add a slot so they get added.
+          Record.push_back(~0U);
+          Record.push_back(0);
+        }
+
+        FnAttribute  |= RetAttribute & OldRetAttrs;
+        RetAttribute &= ~OldRetAttrs;
+      }
+
+      for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
+        if (Record[i] == 0) {
+          if (RetAttribute != Attribute::None)
+            Attrs.push_back(AttributeWithIndex::get(0, RetAttribute));
+        } else if (Record[i] == ~0U) {
+          if (FnAttribute != Attribute::None)
+            Attrs.push_back(AttributeWithIndex::get(~0U, FnAttribute));
+        } else if (Record[i+1] != Attribute::None)
+          Attrs.push_back(AttributeWithIndex::get(Record[i], Record[i+1]));
+      }
+
+      MAttributes.push_back(AttrListPtr::get(Attrs.begin(), Attrs.end()));
+      Attrs.clear();
+      break;
+    }
+    }
+  }
+}
+
+
+bool BitcodeReader::ParseTypeTable() {
+  if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID))
+    return Error("Malformed block record");
+
+  if (!TypeList.empty())
+    return Error("Multiple TYPE_BLOCKs found!");
+
+  SmallVector<uint64_t, 64> Record;
+  unsigned NumRecords = 0;
+
+  // Read all the records for this type table.
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (NumRecords != TypeList.size())
+        return Error("Invalid type forward reference in TYPE_BLOCK");
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of type table block");
+      return false;
+    }
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    const Type *ResultTy = 0;
+    switch (Stream.ReadRecord(Code, Record)) {
+    default:  // Default behavior: unknown type.
+      ResultTy = 0;
+      break;
+    case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
+      // TYPE_CODE_NUMENTRY contains a count of the number of types in the
+      // type list.  This allows us to reserve space.
+      if (Record.size() < 1)
+        return Error("Invalid TYPE_CODE_NUMENTRY record");
+      TypeList.reserve(Record[0]);
+      continue;
+    case bitc::TYPE_CODE_VOID:      // VOID
+      ResultTy = Type::getVoidTy(Context);
+      break;
+    case bitc::TYPE_CODE_FLOAT:     // FLOAT
+      ResultTy = Type::getFloatTy(Context);
+      break;
+    case bitc::TYPE_CODE_DOUBLE:    // DOUBLE
+      ResultTy = Type::getDoubleTy(Context);
+      break;
+    case bitc::TYPE_CODE_X86_FP80:  // X86_FP80
+      ResultTy = Type::getX86_FP80Ty(Context);
+      break;
+    case bitc::TYPE_CODE_FP128:     // FP128
+      ResultTy = Type::getFP128Ty(Context);
+      break;
+    case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128
+      ResultTy = Type::getPPC_FP128Ty(Context);
+      break;
+    case bitc::TYPE_CODE_LABEL:     // LABEL
+      ResultTy = Type::getLabelTy(Context);
+      break;
+    case bitc::TYPE_CODE_OPAQUE:    // OPAQUE
+      ResultTy = 0;
+      break;
+    case bitc::TYPE_CODE_METADATA:  // METADATA
+      ResultTy = Type::getMetadataTy(Context);
+      break;
+    case bitc::TYPE_CODE_X86_MMX:   // X86_MMX
+      ResultTy = Type::getX86_MMXTy(Context);
+      break;
+    case bitc::TYPE_CODE_INTEGER:   // INTEGER: [width]
+      if (Record.size() < 1)
+        return Error("Invalid Integer type record");
+
+      ResultTy = IntegerType::get(Context, Record[0]);
+      break;
+    case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
+                                    //          [pointee type, address space]
+      if (Record.size() < 1)
+        return Error("Invalid POINTER type record");
+      unsigned AddressSpace = 0;
+      if (Record.size() == 2)
+        AddressSpace = Record[1];
+      ResultTy = PointerType::get(getTypeByID(Record[0], true),
+                                        AddressSpace);
+      break;
+    }
+    case bitc::TYPE_CODE_FUNCTION: {
+      // FIXME: attrid is dead, remove it in LLVM 3.0
+      // FUNCTION: [vararg, attrid, retty, paramty x N]
+      if (Record.size() < 3)
+        return Error("Invalid FUNCTION type record");
+      std::vector<const Type*> ArgTys;
+      for (unsigned i = 3, e = Record.size(); i != e; ++i)
+        ArgTys.push_back(getTypeByID(Record[i], true));
+
+      ResultTy = FunctionType::get(getTypeByID(Record[2], true), ArgTys,
+                                   Record[0]);
+      break;
+    }
+    case bitc::TYPE_CODE_STRUCT: {  // STRUCT: [ispacked, eltty x N]
+      if (Record.size() < 1)
+        return Error("Invalid STRUCT type record");
+      std::vector<const Type*> EltTys;
+      for (unsigned i = 1, e = Record.size(); i != e; ++i)
+        EltTys.push_back(getTypeByID(Record[i], true));
+      ResultTy = StructType::get(Context, EltTys, Record[0]);
+      break;
+    }
+    case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
+      if (Record.size() < 2)
+        return Error("Invalid ARRAY type record");
+      ResultTy = ArrayType::get(getTypeByID(Record[1], true), Record[0]);
+      break;
+    case bitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty]
+      if (Record.size() < 2)
+        return Error("Invalid VECTOR type record");
+      ResultTy = VectorType::get(getTypeByID(Record[1], true), Record[0]);
+      break;
+    }
+
+    if (NumRecords == TypeList.size()) {
+      // If this is a new type slot, just append it.
+      TypeList.push_back(ResultTy ? ResultTy : OpaqueType::get(Context));
+      ++NumRecords;
+    } else if (ResultTy == 0) {
+      // Otherwise, this was forward referenced, so an opaque type was created,
+      // but the result type is actually just an opaque.  Leave the one we
+      // created previously.
+      ++NumRecords;
+    } else {
+      // Otherwise, this was forward referenced, so an opaque type was created.
+      // Resolve the opaque type to the real type now.
+      assert(NumRecords < TypeList.size() && "Typelist imbalance");
+      const OpaqueType *OldTy = cast<OpaqueType>(TypeList[NumRecords++].get());
+
+      // Don't directly push the new type on the Tab. Instead we want to replace
+      // the opaque type we previously inserted with the new concrete value. The
+      // refinement from the abstract (opaque) type to the new type causes all
+      // uses of the abstract type to use the concrete type (NewTy). This will
+      // also cause the opaque type to be deleted.
+      const_cast<OpaqueType*>(OldTy)->refineAbstractTypeTo(ResultTy);
+
+      // This should have replaced the old opaque type with the new type in the
+      // value table... or with a preexisting type that was already in the
+      // system.  Let's just make sure it did.
+      assert(TypeList[NumRecords-1].get() != OldTy &&
+             "refineAbstractType didn't work!");
+    }
+  }
+}
+
+
+bool BitcodeReader::ParseTypeSymbolTable() {
+  if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this type table.
+  std::string TypeName;
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of type symbol table block");
+      return false;
+    }
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    switch (Stream.ReadRecord(Code, Record)) {
+    default:  // Default behavior: unknown type.
+      break;
+    case bitc::TST_CODE_ENTRY:    // TST_ENTRY: [typeid, namechar x N]
+      if (ConvertToString(Record, 1, TypeName))
+        return Error("Invalid TST_ENTRY record");
+      unsigned TypeID = Record[0];
+      if (TypeID >= TypeList.size())
+        return Error("Invalid Type ID in TST_ENTRY record");
+
+      TheModule->addTypeName(TypeName, TypeList[TypeID].get());
+      TypeName.clear();
+      break;
+    }
+  }
+}
+
+bool BitcodeReader::ParseValueSymbolTable() {
+  if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this value table.
+  SmallString<128> ValueName;
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of value symbol table block");
+      return false;
+    }
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    switch (Stream.ReadRecord(Code, Record)) {
+    default:  // Default behavior: unknown type.
+      break;
+    case bitc::VST_CODE_ENTRY: {  // VST_ENTRY: [valueid, namechar x N]
+      if (ConvertToString(Record, 1, ValueName))
+        return Error("Invalid VST_ENTRY record");
+      unsigned ValueID = Record[0];
+      if (ValueID >= ValueList.size())
+        return Error("Invalid Value ID in VST_ENTRY record");
+      Value *V = ValueList[ValueID];
+
+      V->setName(StringRef(ValueName.data(), ValueName.size()));
+      ValueName.clear();
+      break;
+    }
+    case bitc::VST_CODE_BBENTRY: {
+      if (ConvertToString(Record, 1, ValueName))
+        return Error("Invalid VST_BBENTRY record");
+      BasicBlock *BB = getBasicBlock(Record[0]);
+      if (BB == 0)
+        return Error("Invalid BB ID in VST_BBENTRY record");
+
+      BB->setName(StringRef(ValueName.data(), ValueName.size()));
+      ValueName.clear();
+      break;
+    }
+    }
+  }
+}
+
+bool BitcodeReader::ParseMetadata() {
+  unsigned NextMDValueNo = MDValueList.size();
+
+  if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records.
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of PARAMATTR block");
+      return false;
+    }
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    bool IsFunctionLocal = false;
+    // Read a record.
+    Record.clear();
+    Code = Stream.ReadRecord(Code, Record);
+    switch (Code) {
+    default:  // Default behavior: ignore.
+      break;
+    case bitc::METADATA_NAME: {
+      // Read named of the named metadata.
+      unsigned NameLength = Record.size();
+      SmallString<8> Name;
+      Name.resize(NameLength);
+      for (unsigned i = 0; i != NameLength; ++i)
+        Name[i] = Record[i];
+      Record.clear();
+      Code = Stream.ReadCode();
+
+      // METADATA_NAME is always followed by METADATA_NAMED_NODE2.
+      // Or METADATA_NAMED_NODE in LLVM 2.7. FIXME: Remove this in LLVM 3.0.
+      unsigned NextBitCode = Stream.ReadRecord(Code, Record);
+      if (NextBitCode == bitc::METADATA_NAMED_NODE) {
+        LLVM2_7MetadataDetected = true;
+      } else if (NextBitCode != bitc::METADATA_NAMED_NODE2)
+        assert ( 0 && "Invalid Named Metadata record");
+
+      // Read named metadata elements.
+      unsigned Size = Record.size();
+      NamedMDNode *NMD = TheModule->getOrInsertNamedMetadata(Name);
+      for (unsigned i = 0; i != Size; ++i) {
+        MDNode *MD = dyn_cast<MDNode>(MDValueList.getValueFwdRef(Record[i]));
+        if (MD == 0)
+          return Error("Malformed metadata record");
+        NMD->addOperand(MD);
+      }
+      // Backwards compatibility hack: NamedMDValues used to be Values,
+      // and they got their own slots in the value numbering. They are no
+      // longer Values, however we still need to account for them in the
+      // numbering in order to be able to read old bitcode files.
+      // FIXME: Remove this in LLVM 3.0.
+      if (LLVM2_7MetadataDetected)
+        MDValueList.AssignValue(0, NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_FN_NODE: // FIXME: Remove in LLVM 3.0.
+    case bitc::METADATA_FN_NODE2:
+      IsFunctionLocal = true;
+      // fall-through
+    case bitc::METADATA_NODE:    // FIXME: Remove in LLVM 3.0.
+    case bitc::METADATA_NODE2: {
+
+      // Detect 2.7-era metadata.
+      // FIXME: Remove in LLVM 3.0.
+      if (Code == bitc::METADATA_FN_NODE || Code == bitc::METADATA_NODE)
+        LLVM2_7MetadataDetected = true;
+
+      if (Record.size() % 2 == 1)
+        return Error("Invalid METADATA_NODE2 record");
+
+      unsigned Size = Record.size();
+      SmallVector<Value*, 8> Elts;
+      for (unsigned i = 0; i != Size; i += 2) {
+        const Type *Ty = getTypeByID(Record[i]);
+        if (!Ty) return Error("Invalid METADATA_NODE2 record");
+        if (Ty->isMetadataTy())
+          Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
+        else if (!Ty->isVoidTy())
+          Elts.push_back(ValueList.getValueFwdRef(Record[i+1], Ty));
+        else
+          Elts.push_back(NULL);
+      }
+      Value *V = MDNode::getWhenValsUnresolved(Context,
+                                               Elts.data(), Elts.size(),
+                                               IsFunctionLocal);
+      IsFunctionLocal = false;
+      MDValueList.AssignValue(V, NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_STRING: {
+      unsigned MDStringLength = Record.size();
+      SmallString<8> String;
+      String.resize(MDStringLength);
+      for (unsigned i = 0; i != MDStringLength; ++i)
+        String[i] = Record[i];
+      Value *V = MDString::get(Context,
+                               StringRef(String.data(), String.size()));
+      MDValueList.AssignValue(V, NextMDValueNo++);
+      break;
+    }
+    case bitc::METADATA_KIND: {
+      unsigned RecordLength = Record.size();
+      if (Record.empty() || RecordLength < 2)
+        return Error("Invalid METADATA_KIND record");
+      SmallString<8> Name;
+      Name.resize(RecordLength-1);
+      unsigned Kind = Record[0];
+      for (unsigned i = 1; i != RecordLength; ++i)
+        Name[i-1] = Record[i];
+      
+      unsigned NewKind = TheModule->getMDKindID(Name.str());
+      if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second)
+        return Error("Conflicting METADATA_KIND records");
+      break;
+    }
+    }
+  }
+}
+
+/// DecodeSignRotatedValue - Decode a signed value stored with the sign bit in
+/// the LSB for dense VBR encoding.
+static uint64_t DecodeSignRotatedValue(uint64_t V) {
+  if ((V & 1) == 0)
+    return V >> 1;
+  if (V != 1)
+    return -(V >> 1);
+  // There is no such thing as -0 with integers.  "-0" really means MININT.
+  return 1ULL << 63;
+}
+
+/// ResolveGlobalAndAliasInits - Resolve all of the initializers for global
+/// values and aliases that we can.
+bool BitcodeReader::ResolveGlobalAndAliasInits() {
+  std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInitWorklist;
+  std::vector<std::pair<GlobalAlias*, unsigned> > AliasInitWorklist;
+
+  GlobalInitWorklist.swap(GlobalInits);
+  AliasInitWorklist.swap(AliasInits);
+
+  while (!GlobalInitWorklist.empty()) {
+    unsigned ValID = GlobalInitWorklist.back().second;
+    if (ValID >= ValueList.size()) {
+      // Not ready to resolve this yet, it requires something later in the file.
+      GlobalInits.push_back(GlobalInitWorklist.back());
+    } else {
+      if (Constant *C = dyn_cast<Constant>(ValueList[ValID]))
+        GlobalInitWorklist.back().first->setInitializer(C);
+      else
+        return Error("Global variable initializer is not a constant!");
+    }
+    GlobalInitWorklist.pop_back();
+  }
+
+  while (!AliasInitWorklist.empty()) {
+    unsigned ValID = AliasInitWorklist.back().second;
+    if (ValID >= ValueList.size()) {
+      AliasInits.push_back(AliasInitWorklist.back());
+    } else {
+      if (Constant *C = dyn_cast<Constant>(ValueList[ValID]))
+        AliasInitWorklist.back().first->setAliasee(C);
+      else
+        return Error("Alias initializer is not a constant!");
+    }
+    AliasInitWorklist.pop_back();
+  }
+  return false;
+}
+
+bool BitcodeReader::ParseConstants() {
+  if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this value table.
+  const Type *CurTy = Type::getInt32Ty(Context);
+  unsigned NextCstNo = ValueList.size();
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK)
+      break;
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    Value *V = 0;
+    unsigned BitCode = Stream.ReadRecord(Code, Record);
+    switch (BitCode) {
+    default:  // Default behavior: unknown constant
+    case bitc::CST_CODE_UNDEF:     // UNDEF
+      V = UndefValue::get(CurTy);
+      break;
+    case bitc::CST_CODE_SETTYPE:   // SETTYPE: [typeid]
+      if (Record.empty())
+        return Error("Malformed CST_SETTYPE record");
+      if (Record[0] >= TypeList.size())
+        return Error("Invalid Type ID in CST_SETTYPE record");
+      CurTy = TypeList[Record[0]];
+      continue;  // Skip the ValueList manipulation.
+    case bitc::CST_CODE_NULL:      // NULL
+      V = Constant::getNullValue(CurTy);
+      break;
+    case bitc::CST_CODE_INTEGER:   // INTEGER: [intval]
+      if (!CurTy->isIntegerTy() || Record.empty())
+        return Error("Invalid CST_INTEGER record");
+      V = ConstantInt::get(CurTy, DecodeSignRotatedValue(Record[0]));
+      break;
+    case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
+      if (!CurTy->isIntegerTy() || Record.empty())
+        return Error("Invalid WIDE_INTEGER record");
+
+      unsigned NumWords = Record.size();
+      SmallVector<uint64_t, 8> Words;
+      Words.resize(NumWords);
+      for (unsigned i = 0; i != NumWords; ++i)
+        Words[i] = DecodeSignRotatedValue(Record[i]);
+      V = ConstantInt::get(Context,
+                           APInt(cast<IntegerType>(CurTy)->getBitWidth(),
+                           NumWords, &Words[0]));
+      break;
+    }
+    case bitc::CST_CODE_FLOAT: {    // FLOAT: [fpval]
+      if (Record.empty())
+        return Error("Invalid FLOAT record");
+      if (CurTy->isFloatTy())
+        V = ConstantFP::get(Context, APFloat(APInt(32, (uint32_t)Record[0])));
+      else if (CurTy->isDoubleTy())
+        V = ConstantFP::get(Context, APFloat(APInt(64, Record[0])));
+      else if (CurTy->isX86_FP80Ty()) {
+        // Bits are not stored the same way as a normal i80 APInt, compensate.
+        uint64_t Rearrange[2];
+        Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16);
+        Rearrange[1] = Record[0] >> 48;
+        V = ConstantFP::get(Context, APFloat(APInt(80, 2, Rearrange)));
+      } else if (CurTy->isFP128Ty())
+        V = ConstantFP::get(Context, APFloat(APInt(128, 2, &Record[0]), true));
+      else if (CurTy->isPPC_FP128Ty())
+        V = ConstantFP::get(Context, APFloat(APInt(128, 2, &Record[0])));
+      else
+        V = UndefValue::get(CurTy);
+      break;
+    }
+
+    case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number]
+      if (Record.empty())
+        return Error("Invalid CST_AGGREGATE record");
+
+      unsigned Size = Record.size();
+      std::vector<Constant*> Elts;
+
+      if (const StructType *STy = dyn_cast<StructType>(CurTy)) {
+        for (unsigned i = 0; i != Size; ++i)
+          Elts.push_back(ValueList.getConstantFwdRef(Record[i],
+                                                     STy->getElementType(i)));
+        V = ConstantStruct::get(STy, Elts);
+      } else if (const ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) {
+        const Type *EltTy = ATy->getElementType();
+        for (unsigned i = 0; i != Size; ++i)
+          Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
+        V = ConstantArray::get(ATy, Elts);
+      } else if (const VectorType *VTy = dyn_cast<VectorType>(CurTy)) {
+        const Type *EltTy = VTy->getElementType();
+        for (unsigned i = 0; i != Size; ++i)
+          Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy));
+        V = ConstantVector::get(Elts);
+      } else {
+        V = UndefValue::get(CurTy);
+      }
+      break;
+    }
+    case bitc::CST_CODE_STRING: { // STRING: [values]
+      if (Record.empty())
+        return Error("Invalid CST_AGGREGATE record");
+
+      const ArrayType *ATy = cast<ArrayType>(CurTy);
+      const Type *EltTy = ATy->getElementType();
+
+      unsigned Size = Record.size();
+      std::vector<Constant*> Elts;
+      for (unsigned i = 0; i != Size; ++i)
+        Elts.push_back(ConstantInt::get(EltTy, Record[i]));
+      V = ConstantArray::get(ATy, Elts);
+      break;
+    }
+    case bitc::CST_CODE_CSTRING: { // CSTRING: [values]
+      if (Record.empty())
+        return Error("Invalid CST_AGGREGATE record");
+
+      const ArrayType *ATy = cast<ArrayType>(CurTy);
+      const Type *EltTy = ATy->getElementType();
+
+      unsigned Size = Record.size();
+      std::vector<Constant*> Elts;
+      for (unsigned i = 0; i != Size; ++i)
+        Elts.push_back(ConstantInt::get(EltTy, Record[i]));
+      Elts.push_back(Constant::getNullValue(EltTy));
+      V = ConstantArray::get(ATy, Elts);
+      break;
+    }
+    case bitc::CST_CODE_CE_BINOP: {  // CE_BINOP: [opcode, opval, opval]
+      if (Record.size() < 3) return Error("Invalid CE_BINOP record");
+      int Opc = GetDecodedBinaryOpcode(Record[0], CurTy);
+      if (Opc < 0) {
+        V = UndefValue::get(CurTy);  // Unknown binop.
+      } else {
+        Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy);
+        Constant *RHS = ValueList.getConstantFwdRef(Record[2], CurTy);
+        unsigned Flags = 0;
+        if (Record.size() >= 4) {
+          if (Opc == Instruction::Add ||
+              Opc == Instruction::Sub ||
+              Opc == Instruction::Mul ||
+              Opc == Instruction::Shl) {
+            if (Record[3] & (1 << bitc::OBO_NO_SIGNED_WRAP))
+              Flags |= OverflowingBinaryOperator::NoSignedWrap;
+            if (Record[3] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
+              Flags |= OverflowingBinaryOperator::NoUnsignedWrap;
+          } else if (Opc == Instruction::SDiv ||
+                     Opc == Instruction::UDiv ||
+                     Opc == Instruction::LShr ||
+                     Opc == Instruction::AShr) {
+            if (Record[3] & (1 << bitc::PEO_EXACT))
+              Flags |= SDivOperator::IsExact;
+          }
+        }
+        V = ConstantExpr::get(Opc, LHS, RHS, Flags);
+      }
+      break;
+    }
+    case bitc::CST_CODE_CE_CAST: {  // CE_CAST: [opcode, opty, opval]
+      if (Record.size() < 3) return Error("Invalid CE_CAST record");
+      int Opc = GetDecodedCastOpcode(Record[0]);
+      if (Opc < 0) {
+        V = UndefValue::get(CurTy);  // Unknown cast.
+      } else {
+        const Type *OpTy = getTypeByID(Record[1]);
+        if (!OpTy) return Error("Invalid CE_CAST record");
+        Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy);
+        V = ConstantExpr::getCast(Opc, Op, CurTy);
+      }
+      break;
+    }
+    case bitc::CST_CODE_CE_INBOUNDS_GEP:
+    case bitc::CST_CODE_CE_GEP: {  // CE_GEP:        [n x operands]
+      if (Record.size() & 1) return Error("Invalid CE_GEP record");
+      SmallVector<Constant*, 16> Elts;
+      for (unsigned i = 0, e = Record.size(); i != e; i += 2) {
+        const Type *ElTy = getTypeByID(Record[i]);
+        if (!ElTy) return Error("Invalid CE_GEP record");
+        Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy));
+      }
+      if (BitCode == bitc::CST_CODE_CE_INBOUNDS_GEP)
+        V = ConstantExpr::getInBoundsGetElementPtr(Elts[0], &Elts[1],
+                                                   Elts.size()-1);
+      else
+        V = ConstantExpr::getGetElementPtr(Elts[0], &Elts[1],
+                                           Elts.size()-1);
+      break;
+    }
+    case bitc::CST_CODE_CE_SELECT:  // CE_SELECT: [opval#, opval#, opval#]
+      if (Record.size() < 3) return Error("Invalid CE_SELECT record");
+      V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0],
+                                                              Type::getInt1Ty(Context)),
+                                  ValueList.getConstantFwdRef(Record[1],CurTy),
+                                  ValueList.getConstantFwdRef(Record[2],CurTy));
+      break;
+    case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval]
+      if (Record.size() < 3) return Error("Invalid CE_EXTRACTELT record");
+      const VectorType *OpTy =
+        dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
+      if (OpTy == 0) return Error("Invalid CE_EXTRACTELT record");
+      Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
+      V = ConstantExpr::getExtractElement(Op0, Op1);
+      break;
+    }
+    case bitc::CST_CODE_CE_INSERTELT: { // CE_INSERTELT: [opval, opval, opval]
+      const VectorType *OpTy = dyn_cast<VectorType>(CurTy);
+      if (Record.size() < 3 || OpTy == 0)
+        return Error("Invalid CE_INSERTELT record");
+      Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[1],
+                                                  OpTy->getElementType());
+      Constant *Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context));
+      V = ConstantExpr::getInsertElement(Op0, Op1, Op2);
+      break;
+    }
+    case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval]
+      const VectorType *OpTy = dyn_cast<VectorType>(CurTy);
+      if (Record.size() < 3 || OpTy == 0)
+        return Error("Invalid CE_SHUFFLEVEC record");
+      Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy);
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy);
+      const Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
+                                                 OpTy->getNumElements());
+      Constant *Op2 = ValueList.getConstantFwdRef(Record[2], ShufTy);
+      V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
+      break;
+    }
+    case bitc::CST_CODE_CE_SHUFVEC_EX: { // [opty, opval, opval, opval]
+      const VectorType *RTy = dyn_cast<VectorType>(CurTy);
+      const VectorType *OpTy =
+        dyn_cast_or_null<VectorType>(getTypeByID(Record[0]));
+      if (Record.size() < 4 || RTy == 0 || OpTy == 0)
+        return Error("Invalid CE_SHUFVEC_EX record");
+      Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
+      const Type *ShufTy = VectorType::get(Type::getInt32Ty(Context),
+                                                 RTy->getNumElements());
+      Constant *Op2 = ValueList.getConstantFwdRef(Record[3], ShufTy);
+      V = ConstantExpr::getShuffleVector(Op0, Op1, Op2);
+      break;
+    }
+    case bitc::CST_CODE_CE_CMP: {     // CE_CMP: [opty, opval, opval, pred]
+      if (Record.size() < 4) return Error("Invalid CE_CMP record");
+      const Type *OpTy = getTypeByID(Record[0]);
+      if (OpTy == 0) return Error("Invalid CE_CMP record");
+      Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy);
+      Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy);
+
+      if (OpTy->isFPOrFPVectorTy())
+        V = ConstantExpr::getFCmp(Record[3], Op0, Op1);
+      else
+        V = ConstantExpr::getICmp(Record[3], Op0, Op1);
+      break;
+    }
+    case bitc::CST_CODE_INLINEASM: {
+      if (Record.size() < 2) return Error("Invalid INLINEASM record");
+      std::string AsmStr, ConstrStr;
+      bool HasSideEffects = Record[0] & 1;
+      bool IsAlignStack = Record[0] >> 1;
+      unsigned AsmStrSize = Record[1];
+      if (2+AsmStrSize >= Record.size())
+        return Error("Invalid INLINEASM record");
+      unsigned ConstStrSize = Record[2+AsmStrSize];
+      if (3+AsmStrSize+ConstStrSize > Record.size())
+        return Error("Invalid INLINEASM record");
+
+      for (unsigned i = 0; i != AsmStrSize; ++i)
+        AsmStr += (char)Record[2+i];
+      for (unsigned i = 0; i != ConstStrSize; ++i)
+        ConstrStr += (char)Record[3+AsmStrSize+i];
+      const PointerType *PTy = cast<PointerType>(CurTy);
+      V = InlineAsm::get(cast<FunctionType>(PTy->getElementType()),
+                         AsmStr, ConstrStr, HasSideEffects, IsAlignStack);
+      break;
+    }
+    case bitc::CST_CODE_BLOCKADDRESS:{
+      if (Record.size() < 3) return Error("Invalid CE_BLOCKADDRESS record");
+      const Type *FnTy = getTypeByID(Record[0]);
+      if (FnTy == 0) return Error("Invalid CE_BLOCKADDRESS record");
+      Function *Fn =
+        dyn_cast_or_null<Function>(ValueList.getConstantFwdRef(Record[1],FnTy));
+      if (Fn == 0) return Error("Invalid CE_BLOCKADDRESS record");
+      
+      GlobalVariable *FwdRef = new GlobalVariable(*Fn->getParent(),
+                                                  Type::getInt8Ty(Context),
+                                            false, GlobalValue::InternalLinkage,
+                                                  0, "");
+      BlockAddrFwdRefs[Fn].push_back(std::make_pair(Record[2], FwdRef));
+      V = FwdRef;
+      break;
+    }  
+    }
+
+    ValueList.AssignValue(V, NextCstNo);
+    ++NextCstNo;
+  }
+
+  if (NextCstNo != ValueList.size())
+    return Error("Invalid constant reference!");
+
+  if (Stream.ReadBlockEnd())
+    return Error("Error at end of constants block");
+
+  // Once all the constants have been read, go through and resolve forward
+  // references.
+  ValueList.ResolveConstantForwardRefs();
+  return false;
+}
+
+/// RememberAndSkipFunctionBody - When we see the block for a function body,
+/// remember where it is and then skip it.  This lets us lazily deserialize the
+/// functions.
+bool BitcodeReader::RememberAndSkipFunctionBody() {
+  // Get the function we are talking about.
+  if (FunctionsWithBodies.empty())
+    return Error("Insufficient function protos");
+
+  Function *Fn = FunctionsWithBodies.back();
+  FunctionsWithBodies.pop_back();
+
+  // Save the current stream state.
+  uint64_t CurBit = Stream.GetCurrentBitNo();
+  DeferredFunctionInfo[Fn] = CurBit;
+
+  // Skip over the function block for now.
+  if (Stream.SkipBlock())
+    return Error("Malformed block record");
+  return false;
+}
+
+bool BitcodeReader::ParseModule() {
+  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+  std::vector<std::string> SectionTable;
+  std::vector<std::string> GCTable;
+
+  // Read all the records for this module.
+  while (!Stream.AtEndOfStream()) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of module block");
+
+      // Patch the initializers for globals and aliases up.
+      ResolveGlobalAndAliasInits();
+      if (!GlobalInits.empty() || !AliasInits.empty())
+        return Error("Malformed global initializer set");
+      if (!FunctionsWithBodies.empty())
+        return Error("Too few function bodies found");
+
+      // Look for intrinsic functions which need to be upgraded at some point
+      for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
+           FI != FE; ++FI) {
+        Function* NewFn;
+        if (UpgradeIntrinsicFunction(FI, NewFn))
+          UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn));
+      }
+
+      // Look for global variables which need to be renamed.
+      for (Module::global_iterator
+             GI = TheModule->global_begin(), GE = TheModule->global_end();
+           GI != GE; ++GI)
+        UpgradeGlobalVariable(GI);
+
+      // Force deallocation of memory for these vectors to favor the client that
+      // want lazy deserialization.
+      std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
+      std::vector<std::pair<GlobalAlias*, unsigned> >().swap(AliasInits);
+      std::vector<Function*>().swap(FunctionsWithBodies);
+      return false;
+    }
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      switch (Stream.ReadSubBlockID()) {
+      default:  // Skip unknown content.
+        if (Stream.SkipBlock())
+          return Error("Malformed block record");
+        break;
+      case bitc::BLOCKINFO_BLOCK_ID:
+        if (Stream.ReadBlockInfoBlock())
+          return Error("Malformed BlockInfoBlock");
+        break;
+      case bitc::PARAMATTR_BLOCK_ID:
+        if (ParseAttributeBlock())
+          return true;
+        break;
+      case bitc::TYPE_BLOCK_ID:
+        if (ParseTypeTable())
+          return true;
+        break;
+      case bitc::TYPE_SYMTAB_BLOCK_ID:
+        if (ParseTypeSymbolTable())
+          return true;
+        break;
+      case bitc::VALUE_SYMTAB_BLOCK_ID:
+        if (ParseValueSymbolTable())
+          return true;
+        break;
+      case bitc::CONSTANTS_BLOCK_ID:
+        if (ParseConstants() || ResolveGlobalAndAliasInits())
+          return true;
+        break;
+      case bitc::METADATA_BLOCK_ID:
+        if (ParseMetadata())
+          return true;
+        break;
+      case bitc::FUNCTION_BLOCK_ID:
+        // If this is the first function body we've seen, reverse the
+        // FunctionsWithBodies list.
+        if (!HasReversedFunctionsWithBodies) {
+          std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end());
+          HasReversedFunctionsWithBodies = true;
+        }
+
+        if (RememberAndSkipFunctionBody())
+          return true;
+        break;
+      }
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    switch (Stream.ReadRecord(Code, Record)) {
+    default: break;  // Default behavior, ignore unknown content.
+    case bitc::MODULE_CODE_VERSION:  // VERSION: [version#]
+      if (Record.size() < 1)
+        return Error("Malformed MODULE_CODE_VERSION");
+      // Only version #0 is supported so far.
+      if (Record[0] != 0)
+        return Error("Unknown bitstream version!");
+      break;
+    case bitc::MODULE_CODE_TRIPLE: {  // TRIPLE: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_TRIPLE record");
+      TheModule->setTargetTriple(S);
+      break;
+    }
+    case bitc::MODULE_CODE_DATALAYOUT: {  // DATALAYOUT: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_DATALAYOUT record");
+      TheModule->setDataLayout(S);
+      break;
+    }
+    case bitc::MODULE_CODE_ASM: {  // ASM: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_ASM record");
+      TheModule->setModuleInlineAsm(S);
+      break;
+    }
+    case bitc::MODULE_CODE_DEPLIB: {  // DEPLIB: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_DEPLIB record");
+      TheModule->addLibrary(S);
+      break;
+    }
+    case bitc::MODULE_CODE_SECTIONNAME: {  // SECTIONNAME: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_SECTIONNAME record");
+      SectionTable.push_back(S);
+      break;
+    }
+    case bitc::MODULE_CODE_GCNAME: {  // SECTIONNAME: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_GCNAME record");
+      GCTable.push_back(S);
+      break;
+    }
+    // GLOBALVAR: [pointer type, isconst, initid,
+    //             linkage, alignment, section, visibility, threadlocal,
+    //             unnamed_addr]
+    case bitc::MODULE_CODE_GLOBALVAR: {
+      if (Record.size() < 6)
+        return Error("Invalid MODULE_CODE_GLOBALVAR record");
+      const Type *Ty = getTypeByID(Record[0]);
+      if (!Ty) return Error("Invalid MODULE_CODE_GLOBALVAR record");
+      if (!Ty->isPointerTy())
+        return Error("Global not a pointer type!");
+      unsigned AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
+      Ty = cast<PointerType>(Ty)->getElementType();
+
+      bool isConstant = Record[1];
+      GlobalValue::LinkageTypes Linkage = GetDecodedLinkage(Record[3]);
+      unsigned Alignment = (1 << Record[4]) >> 1;
+      std::string Section;
+      if (Record[5]) {
+        if (Record[5]-1 >= SectionTable.size())
+          return Error("Invalid section ID");
+        Section = SectionTable[Record[5]-1];
+      }
+      GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility;
+      if (Record.size() > 6)
+        Visibility = GetDecodedVisibility(Record[6]);
+      bool isThreadLocal = false;
+      if (Record.size() > 7)
+        isThreadLocal = Record[7];
+
+      bool UnnamedAddr = false;
+      if (Record.size() > 8)
+        UnnamedAddr = Record[8];
+
+      GlobalVariable *NewGV =
+        new GlobalVariable(*TheModule, Ty, isConstant, Linkage, 0, "", 0,
+                           isThreadLocal, AddressSpace);
+      NewGV->setAlignment(Alignment);
+      if (!Section.empty())
+        NewGV->setSection(Section);
+      NewGV->setVisibility(Visibility);
+      NewGV->setThreadLocal(isThreadLocal);
+      NewGV->setUnnamedAddr(UnnamedAddr);
+
+      ValueList.push_back(NewGV);
+
+      // Remember which value to use for the global initializer.
+      if (unsigned InitID = Record[2])
+        GlobalInits.push_back(std::make_pair(NewGV, InitID-1));
+      break;
+    }
+    // FUNCTION:  [type, callingconv, isproto, linkage, paramattr,
+    //             alignment, section, visibility, gc, unnamed_addr]
+    case bitc::MODULE_CODE_FUNCTION: {
+      if (Record.size() < 8)
+        return Error("Invalid MODULE_CODE_FUNCTION record");
+      const Type *Ty = getTypeByID(Record[0]);
+      if (!Ty) return Error("Invalid MODULE_CODE_FUNCTION record");
+      if (!Ty->isPointerTy())
+        return Error("Function not a pointer type!");
+      const FunctionType *FTy =
+        dyn_cast<FunctionType>(cast<PointerType>(Ty)->getElementType());
+      if (!FTy)
+        return Error("Function not a pointer to function type!");
+
+      Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                        "", TheModule);
+
+      Func->setCallingConv(static_cast<CallingConv::ID>(Record[1]));
+      bool isProto = Record[2];
+      Func->setLinkage(GetDecodedLinkage(Record[3]));
+      Func->setAttributes(getAttributes(Record[4]));
+
+      Func->setAlignment((1 << Record[5]) >> 1);
+      if (Record[6]) {
+        if (Record[6]-1 >= SectionTable.size())
+          return Error("Invalid section ID");
+        Func->setSection(SectionTable[Record[6]-1]);
+      }
+      Func->setVisibility(GetDecodedVisibility(Record[7]));
+      if (Record.size() > 8 && Record[8]) {
+        if (Record[8]-1 > GCTable.size())
+          return Error("Invalid GC ID");
+        Func->setGC(GCTable[Record[8]-1].c_str());
+      }
+      bool UnnamedAddr = false;
+      if (Record.size() > 9)
+        UnnamedAddr = Record[9];
+      Func->setUnnamedAddr(UnnamedAddr);
+      ValueList.push_back(Func);
+
+      // If this is a function with a body, remember the prototype we are
+      // creating now, so that we can match up the body with them later.
+      if (!isProto)
+        FunctionsWithBodies.push_back(Func);
+      break;
+    }
+    // ALIAS: [alias type, aliasee val#, linkage]
+    // ALIAS: [alias type, aliasee val#, linkage, visibility]
+    case bitc::MODULE_CODE_ALIAS: {
+      if (Record.size() < 3)
+        return Error("Invalid MODULE_ALIAS record");
+      const Type *Ty = getTypeByID(Record[0]);
+      if (!Ty) return Error("Invalid MODULE_ALIAS record");
+      if (!Ty->isPointerTy())
+        return Error("Function not a pointer type!");
+
+      GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]),
+                                           "", 0, TheModule);
+      // Old bitcode files didn't have visibility field.
+      if (Record.size() > 3)
+        NewGA->setVisibility(GetDecodedVisibility(Record[3]));
+      ValueList.push_back(NewGA);
+      AliasInits.push_back(std::make_pair(NewGA, Record[1]));
+      break;
+    }
+    /// MODULE_CODE_PURGEVALS: [numvals]
+    case bitc::MODULE_CODE_PURGEVALS:
+      // Trim down the value list to the specified size.
+      if (Record.size() < 1 || Record[0] > ValueList.size())
+        return Error("Invalid MODULE_PURGEVALS record");
+      ValueList.shrinkTo(Record[0]);
+      break;
+    }
+    Record.clear();
+  }
+
+  return Error("Premature end of bitstream");
+}
+
+bool BitcodeReader::ParseBitcodeInto(Module *M) {
+  TheModule = 0;
+
+  unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
+  unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+
+  if (Buffer->getBufferSize() & 3) {
+    if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd))
+      return Error("Invalid bitcode signature");
+    else
+      return Error("Bitcode stream should be a multiple of 4 bytes in length");
+  }
+
+  // If we have a wrapper header, parse it and ignore the non-bc file contents.
+  // The magic number is 0x0B17C0DE stored in little endian.
+  if (isBitcodeWrapper(BufPtr, BufEnd))
+    if (SkipBitcodeWrapperHeader(BufPtr, BufEnd))
+      return Error("Invalid bitcode wrapper header");
+
+  StreamFile.init(BufPtr, BufEnd);
+  Stream.init(StreamFile);
+
+  // Sniff for the signature.
+  if (Stream.Read(8) != 'B' ||
+      Stream.Read(8) != 'C' ||
+      Stream.Read(4) != 0x0 ||
+      Stream.Read(4) != 0xC ||
+      Stream.Read(4) != 0xE ||
+      Stream.Read(4) != 0xD)
+    return Error("Invalid bitcode signature");
+
+  // We expect a number of well-defined blocks, though we don't necessarily
+  // need to understand them all.
+  while (!Stream.AtEndOfStream()) {
+    unsigned Code = Stream.ReadCode();
+
+    if (Code != bitc::ENTER_SUBBLOCK)
+      return Error("Invalid record at top-level");
+
+    unsigned BlockID = Stream.ReadSubBlockID();
+
+    // We only know the MODULE subblock ID.
+    switch (BlockID) {
+    case bitc::BLOCKINFO_BLOCK_ID:
+      if (Stream.ReadBlockInfoBlock())
+        return Error("Malformed BlockInfoBlock");
+      break;
+    case bitc::MODULE_BLOCK_ID:
+      // Reject multiple MODULE_BLOCK's in a single bitstream.
+      if (TheModule)
+        return Error("Multiple MODULE_BLOCKs in same stream");
+      TheModule = M;
+      if (ParseModule())
+        return true;
+      break;
+    default:
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      break;
+    }
+  }
+
+  return false;
+}
+
+bool BitcodeReader::ParseModuleTriple(std::string &Triple) {
+  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this module.
+  while (!Stream.AtEndOfStream()) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of module block");
+
+      return false;
+    }
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      switch (Stream.ReadSubBlockID()) {
+      default:  // Skip unknown content.
+        if (Stream.SkipBlock())
+          return Error("Malformed block record");
+        break;
+      }
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    switch (Stream.ReadRecord(Code, Record)) {
+    default: break;  // Default behavior, ignore unknown content.
+    case bitc::MODULE_CODE_VERSION:  // VERSION: [version#]
+      if (Record.size() < 1)
+        return Error("Malformed MODULE_CODE_VERSION");
+      // Only version #0 is supported so far.
+      if (Record[0] != 0)
+        return Error("Unknown bitstream version!");
+      break;
+    case bitc::MODULE_CODE_TRIPLE: {  // TRIPLE: [strchr x N]
+      std::string S;
+      if (ConvertToString(Record, 0, S))
+        return Error("Invalid MODULE_CODE_TRIPLE record");
+      Triple = S;
+      break;
+    }
+    }
+    Record.clear();
+  }
+
+  return Error("Premature end of bitstream");
+}
+
+bool BitcodeReader::ParseTriple(std::string &Triple) {
+  if (Buffer->getBufferSize() & 3)
+    return Error("Bitcode stream should be a multiple of 4 bytes in length");
+
+  unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
+  unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+
+  // If we have a wrapper header, parse it and ignore the non-bc file contents.
+  // The magic number is 0x0B17C0DE stored in little endian.
+  if (isBitcodeWrapper(BufPtr, BufEnd))
+    if (SkipBitcodeWrapperHeader(BufPtr, BufEnd))
+      return Error("Invalid bitcode wrapper header");
+
+  StreamFile.init(BufPtr, BufEnd);
+  Stream.init(StreamFile);
+
+  // Sniff for the signature.
+  if (Stream.Read(8) != 'B' ||
+      Stream.Read(8) != 'C' ||
+      Stream.Read(4) != 0x0 ||
+      Stream.Read(4) != 0xC ||
+      Stream.Read(4) != 0xE ||
+      Stream.Read(4) != 0xD)
+    return Error("Invalid bitcode signature");
+
+  // We expect a number of well-defined blocks, though we don't necessarily
+  // need to understand them all.
+  while (!Stream.AtEndOfStream()) {
+    unsigned Code = Stream.ReadCode();
+
+    if (Code != bitc::ENTER_SUBBLOCK)
+      return Error("Invalid record at top-level");
+
+    unsigned BlockID = Stream.ReadSubBlockID();
+
+    // We only know the MODULE subblock ID.
+    switch (BlockID) {
+    case bitc::MODULE_BLOCK_ID:
+      if (ParseModuleTriple(Triple))
+        return true;
+      break;
+    default:
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      break;
+    }
+  }
+
+  return false;
+}
+
+/// ParseMetadataAttachment - Parse metadata attachments.
+bool BitcodeReader::ParseMetadataAttachment() {
+  if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+  while(1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of PARAMATTR block");
+      break;
+    }
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+    // Read a metadata attachment record.
+    Record.clear();
+    switch (Stream.ReadRecord(Code, Record)) {
+    default:  // Default behavior: ignore.
+      break;
+    // FIXME: Remove in LLVM 3.0.
+    case bitc::METADATA_ATTACHMENT:
+      LLVM2_7MetadataDetected = true;
+    case bitc::METADATA_ATTACHMENT2: {
+      unsigned RecordLength = Record.size();
+      if (Record.empty() || (RecordLength - 1) % 2 == 1)
+        return Error ("Invalid METADATA_ATTACHMENT reader!");
+      Instruction *Inst = InstructionList[Record[0]];
+      for (unsigned i = 1; i != RecordLength; i = i+2) {
+        unsigned Kind = Record[i];
+        DenseMap<unsigned, unsigned>::iterator I =
+          MDKindMap.find(Kind);
+        if (I == MDKindMap.end())
+          return Error("Invalid metadata kind ID");
+        Value *Node = MDValueList.getValueFwdRef(Record[i+1]);
+        Inst->setMetadata(I->second, cast<MDNode>(Node));
+      }
+      break;
+    }
+    }
+  }
+  return false;
+}
+
+/// ParseFunctionBody - Lazily parse the specified function body block.
+bool BitcodeReader::ParseFunctionBody(Function *F) {
+  if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
+    return Error("Malformed block record");
+
+  InstructionList.clear();
+  unsigned ModuleValueListSize = ValueList.size();
+  unsigned ModuleMDValueListSize = MDValueList.size();
+
+  // Add all the function arguments to the value table.
+  for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
+    ValueList.push_back(I);
+
+  unsigned NextValueNo = ValueList.size();
+  BasicBlock *CurBB = 0;
+  unsigned CurBBNo = 0;
+
+  DebugLoc LastLoc;
+  
+  // Read all the records.
+  SmallVector<uint64_t, 64> Record;
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of function block");
+      break;
+    }
+
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      switch (Stream.ReadSubBlockID()) {
+      default:  // Skip unknown content.
+        if (Stream.SkipBlock())
+          return Error("Malformed block record");
+        break;
+      case bitc::CONSTANTS_BLOCK_ID:
+        if (ParseConstants()) return true;
+        NextValueNo = ValueList.size();
+        break;
+      case bitc::VALUE_SYMTAB_BLOCK_ID:
+        if (ParseValueSymbolTable()) return true;
+        break;
+      case bitc::METADATA_ATTACHMENT_ID:
+        if (ParseMetadataAttachment()) return true;
+        break;
+      case bitc::METADATA_BLOCK_ID:
+        if (ParseMetadata()) return true;
+        break;
+      }
+      continue;
+    }
+
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    Instruction *I = 0;
+    unsigned BitCode = Stream.ReadRecord(Code, Record);
+    switch (BitCode) {
+    default: // Default behavior: reject
+      return Error("Unknown instruction");
+    case bitc::FUNC_CODE_DECLAREBLOCKS:     // DECLAREBLOCKS: [nblocks]
+      if (Record.size() < 1 || Record[0] == 0)
+        return Error("Invalid DECLAREBLOCKS record");
+      // Create all the basic blocks for the function.
+      FunctionBBs.resize(Record[0]);
+      for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i)
+        FunctionBBs[i] = BasicBlock::Create(Context, "", F);
+      CurBB = FunctionBBs[0];
+      continue;
+
+        
+    case bitc::FUNC_CODE_DEBUG_LOC_AGAIN:  // DEBUG_LOC_AGAIN
+      // This record indicates that the last instruction is at the same
+      // location as the previous instruction with a location.
+      I = 0;
+        
+      // Get the last instruction emitted.
+      if (CurBB && !CurBB->empty())
+        I = &CurBB->back();
+      else if (CurBBNo && FunctionBBs[CurBBNo-1] &&
+               !FunctionBBs[CurBBNo-1]->empty())
+        I = &FunctionBBs[CurBBNo-1]->back();
+        
+      if (I == 0) return Error("Invalid DEBUG_LOC_AGAIN record");
+      I->setDebugLoc(LastLoc);
+      I = 0;
+      continue;
+        
+    // FIXME: Remove this in LLVM 3.0.
+    case bitc::FUNC_CODE_DEBUG_LOC:
+      LLVM2_7MetadataDetected = true;
+    case bitc::FUNC_CODE_DEBUG_LOC2: {      // DEBUG_LOC: [line, col, scope, ia]
+      I = 0;     // Get the last instruction emitted.
+      if (CurBB && !CurBB->empty())
+        I = &CurBB->back();
+      else if (CurBBNo && FunctionBBs[CurBBNo-1] &&
+               !FunctionBBs[CurBBNo-1]->empty())
+        I = &FunctionBBs[CurBBNo-1]->back();
+      if (I == 0 || Record.size() < 4)
+        return Error("Invalid FUNC_CODE_DEBUG_LOC record");
+      
+      unsigned Line = Record[0], Col = Record[1];
+      unsigned ScopeID = Record[2], IAID = Record[3];
+      
+      MDNode *Scope = 0, *IA = 0;
+      if (ScopeID) Scope = cast<MDNode>(MDValueList.getValueFwdRef(ScopeID-1));
+      if (IAID)    IA = cast<MDNode>(MDValueList.getValueFwdRef(IAID-1));
+      LastLoc = DebugLoc::get(Line, Col, Scope, IA);
+      I->setDebugLoc(LastLoc);
+      I = 0;
+      continue;
+    }
+
+    case bitc::FUNC_CODE_INST_BINOP: {    // BINOP: [opval, ty, opval, opcode]
+      unsigned OpNum = 0;
+      Value *LHS, *RHS;
+      if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
+          getValue(Record, OpNum, LHS->getType(), RHS) ||
+          OpNum+1 > Record.size())
+        return Error("Invalid BINOP record");
+
+      int Opc = GetDecodedBinaryOpcode(Record[OpNum++], LHS->getType());
+      if (Opc == -1) return Error("Invalid BINOP record");
+      I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+      InstructionList.push_back(I);
+      if (OpNum < Record.size()) {
+        if (Opc == Instruction::Add ||
+            Opc == Instruction::Sub ||
+            Opc == Instruction::Mul ||
+            Opc == Instruction::Shl) {
+          if (Record[OpNum] & (1 << bitc::OBO_NO_SIGNED_WRAP))
+            cast<BinaryOperator>(I)->setHasNoSignedWrap(true);
+          if (Record[OpNum] & (1 << bitc::OBO_NO_UNSIGNED_WRAP))
+            cast<BinaryOperator>(I)->setHasNoUnsignedWrap(true);
+        } else if (Opc == Instruction::SDiv ||
+                   Opc == Instruction::UDiv ||
+                   Opc == Instruction::LShr ||
+                   Opc == Instruction::AShr) {
+          if (Record[OpNum] & (1 << bitc::PEO_EXACT))
+            cast<BinaryOperator>(I)->setIsExact(true);
+        }
+      }
+      break;
+    }
+    case bitc::FUNC_CODE_INST_CAST: {    // CAST: [opval, opty, destty, castopc]
+      unsigned OpNum = 0;
+      Value *Op;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+          OpNum+2 != Record.size())
+        return Error("Invalid CAST record");
+
+      const Type *ResTy = getTypeByID(Record[OpNum]);
+      int Opc = GetDecodedCastOpcode(Record[OpNum+1]);
+      if (Opc == -1 || ResTy == 0)
+        return Error("Invalid CAST record");
+      I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy);
+      InstructionList.push_back(I);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_INBOUNDS_GEP:
+    case bitc::FUNC_CODE_INST_GEP: { // GEP: [n x operands]
+      unsigned OpNum = 0;
+      Value *BasePtr;
+      if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr))
+        return Error("Invalid GEP record");
+
+      SmallVector<Value*, 16> GEPIdx;
+      while (OpNum != Record.size()) {
+        Value *Op;
+        if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+          return Error("Invalid GEP record");
+        GEPIdx.push_back(Op);
+      }
+
+      I = GetElementPtrInst::Create(BasePtr, GEPIdx.begin(), GEPIdx.end());
+      InstructionList.push_back(I);
+      if (BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP)
+        cast<GetElementPtrInst>(I)->setIsInBounds(true);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_EXTRACTVAL: {
+                                       // EXTRACTVAL: [opty, opval, n x indices]
+      unsigned OpNum = 0;
+      Value *Agg;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
+        return Error("Invalid EXTRACTVAL record");
+
+      SmallVector<unsigned, 4> EXTRACTVALIdx;
+      for (unsigned RecSize = Record.size();
+           OpNum != RecSize; ++OpNum) {
+        uint64_t Index = Record[OpNum];
+        if ((unsigned)Index != Index)
+          return Error("Invalid EXTRACTVAL index");
+        EXTRACTVALIdx.push_back((unsigned)Index);
+      }
+
+      I = ExtractValueInst::Create(Agg,
+                                   EXTRACTVALIdx.begin(), EXTRACTVALIdx.end());
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_INSERTVAL: {
+                           // INSERTVAL: [opty, opval, opty, opval, n x indices]
+      unsigned OpNum = 0;
+      Value *Agg;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Agg))
+        return Error("Invalid INSERTVAL record");
+      Value *Val;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Val))
+        return Error("Invalid INSERTVAL record");
+
+      SmallVector<unsigned, 4> INSERTVALIdx;
+      for (unsigned RecSize = Record.size();
+           OpNum != RecSize; ++OpNum) {
+        uint64_t Index = Record[OpNum];
+        if ((unsigned)Index != Index)
+          return Error("Invalid INSERTVAL index");
+        INSERTVALIdx.push_back((unsigned)Index);
+      }
+
+      I = InsertValueInst::Create(Agg, Val,
+                                  INSERTVALIdx.begin(), INSERTVALIdx.end());
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_SELECT: { // SELECT: [opval, ty, opval, opval]
+      // obsolete form of select
+      // handles select i1 ... in old bitcode
+      unsigned OpNum = 0;
+      Value *TrueVal, *FalseVal, *Cond;
+      if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
+          getValue(Record, OpNum, TrueVal->getType(), FalseVal) ||
+          getValue(Record, OpNum, Type::getInt1Ty(Context), Cond))
+        return Error("Invalid SELECT record");
+
+      I = SelectInst::Create(Cond, TrueVal, FalseVal);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_VSELECT: {// VSELECT: [ty,opval,opval,predty,pred]
+      // new form of select
+      // handles select i1 or select [N x i1]
+      unsigned OpNum = 0;
+      Value *TrueVal, *FalseVal, *Cond;
+      if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) ||
+          getValue(Record, OpNum, TrueVal->getType(), FalseVal) ||
+          getValueTypePair(Record, OpNum, NextValueNo, Cond))
+        return Error("Invalid SELECT record");
+
+      // select condition can be either i1 or [N x i1]
+      if (const VectorType* vector_type =
+          dyn_cast<const VectorType>(Cond->getType())) {
+        // expect <n x i1>
+        if (vector_type->getElementType() != Type::getInt1Ty(Context))
+          return Error("Invalid SELECT condition type");
+      } else {
+        // expect i1
+        if (Cond->getType() != Type::getInt1Ty(Context))
+          return Error("Invalid SELECT condition type");
+      }
+
+      I = SelectInst::Create(Cond, TrueVal, FalseVal);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_EXTRACTELT: { // EXTRACTELT: [opty, opval, opval]
+      unsigned OpNum = 0;
+      Value *Vec, *Idx;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
+          getValue(Record, OpNum, Type::getInt32Ty(Context), Idx))
+        return Error("Invalid EXTRACTELT record");
+      I = ExtractElementInst::Create(Vec, Idx);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [ty, opval,opval,opval]
+      unsigned OpNum = 0;
+      Value *Vec, *Elt, *Idx;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Vec) ||
+          getValue(Record, OpNum,
+                   cast<VectorType>(Vec->getType())->getElementType(), Elt) ||
+          getValue(Record, OpNum, Type::getInt32Ty(Context), Idx))
+        return Error("Invalid INSERTELT record");
+      I = InsertElementInst::Create(Vec, Elt, Idx);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_SHUFFLEVEC: {// SHUFFLEVEC: [opval,ty,opval,opval]
+      unsigned OpNum = 0;
+      Value *Vec1, *Vec2, *Mask;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) ||
+          getValue(Record, OpNum, Vec1->getType(), Vec2))
+        return Error("Invalid SHUFFLEVEC record");
+
+      if (getValueTypePair(Record, OpNum, NextValueNo, Mask))
+        return Error("Invalid SHUFFLEVEC record");
+      I = new ShuffleVectorInst(Vec1, Vec2, Mask);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_CMP:   // CMP: [opty, opval, opval, pred]
+      // Old form of ICmp/FCmp returning bool
+      // Existed to differentiate between icmp/fcmp and vicmp/vfcmp which were
+      // both legal on vectors but had different behaviour.
+    case bitc::FUNC_CODE_INST_CMP2: { // CMP2: [opty, opval, opval, pred]
+      // FCmp/ICmp returning bool or vector of bool
+
+      unsigned OpNum = 0;
+      Value *LHS, *RHS;
+      if (getValueTypePair(Record, OpNum, NextValueNo, LHS) ||
+          getValue(Record, OpNum, LHS->getType(), RHS) ||
+          OpNum+1 != Record.size())
+        return Error("Invalid CMP record");
+
+      if (LHS->getType()->isFPOrFPVectorTy())
+        I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS);
+      else
+        I = new ICmpInst((ICmpInst::Predicate)Record[OpNum], LHS, RHS);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_GETRESULT: { // GETRESULT: [ty, val, n]
+      if (Record.size() != 2)
+        return Error("Invalid GETRESULT record");
+      unsigned OpNum = 0;
+      Value *Op;
+      getValueTypePair(Record, OpNum, NextValueNo, Op);
+      unsigned Index = Record[1];
+      I = ExtractValueInst::Create(Op, Index);
+      InstructionList.push_back(I);
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_RET: // RET: [opty,opval<optional>]
+      {
+        unsigned Size = Record.size();
+        if (Size == 0) {
+          I = ReturnInst::Create(Context);
+          InstructionList.push_back(I);
+          break;
+        }
+
+        unsigned OpNum = 0;
+        SmallVector<Value *,4> Vs;
+        do {
+          Value *Op = NULL;
+          if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+            return Error("Invalid RET record");
+          Vs.push_back(Op);
+        } while(OpNum != Record.size());
+
+        const Type *ReturnType = F->getReturnType();
+        // Handle multiple return values. FIXME: Remove in LLVM 3.0.
+        if (Vs.size() > 1 ||
+            (ReturnType->isStructTy() &&
+             (Vs.empty() || Vs[0]->getType() != ReturnType))) {
+          Value *RV = UndefValue::get(ReturnType);
+          for (unsigned i = 0, e = Vs.size(); i != e; ++i) {
+            I = InsertValueInst::Create(RV, Vs[i], i, "mrv");
+            InstructionList.push_back(I);
+            CurBB->getInstList().push_back(I);
+            ValueList.AssignValue(I, NextValueNo++);
+            RV = I;
+          }
+          I = ReturnInst::Create(Context, RV);
+          InstructionList.push_back(I);
+          break;
+        }
+
+        I = ReturnInst::Create(Context, Vs[0]);
+        InstructionList.push_back(I);
+        break;
+      }
+    case bitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#]
+      if (Record.size() != 1 && Record.size() != 3)
+        return Error("Invalid BR record");
+      BasicBlock *TrueDest = getBasicBlock(Record[0]);
+      if (TrueDest == 0)
+        return Error("Invalid BR record");
+
+      if (Record.size() == 1) {
+        I = BranchInst::Create(TrueDest);
+        InstructionList.push_back(I);
+      }
+      else {
+        BasicBlock *FalseDest = getBasicBlock(Record[1]);
+        Value *Cond = getFnValueByID(Record[2], Type::getInt1Ty(Context));
+        if (FalseDest == 0 || Cond == 0)
+          return Error("Invalid BR record");
+        I = BranchInst::Create(TrueDest, FalseDest, Cond);
+        InstructionList.push_back(I);
+      }
+      break;
+    }
+    case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...]
+      if (Record.size() < 3 || (Record.size() & 1) == 0)
+        return Error("Invalid SWITCH record");
+      const Type *OpTy = getTypeByID(Record[0]);
+      Value *Cond = getFnValueByID(Record[1], OpTy);
+      BasicBlock *Default = getBasicBlock(Record[2]);
+      if (OpTy == 0 || Cond == 0 || Default == 0)
+        return Error("Invalid SWITCH record");
+      unsigned NumCases = (Record.size()-3)/2;
+      SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
+      InstructionList.push_back(SI);
+      for (unsigned i = 0, e = NumCases; i != e; ++i) {
+        ConstantInt *CaseVal =
+          dyn_cast_or_null<ConstantInt>(getFnValueByID(Record[3+i*2], OpTy));
+        BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]);
+        if (CaseVal == 0 || DestBB == 0) {
+          delete SI;
+          return Error("Invalid SWITCH record!");
+        }
+        SI->addCase(CaseVal, DestBB);
+      }
+      I = SI;
+      break;
+    }
+    case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...]
+      if (Record.size() < 2)
+        return Error("Invalid INDIRECTBR record");
+      const Type *OpTy = getTypeByID(Record[0]);
+      Value *Address = getFnValueByID(Record[1], OpTy);
+      if (OpTy == 0 || Address == 0)
+        return Error("Invalid INDIRECTBR record");
+      unsigned NumDests = Record.size()-2;
+      IndirectBrInst *IBI = IndirectBrInst::Create(Address, NumDests);
+      InstructionList.push_back(IBI);
+      for (unsigned i = 0, e = NumDests; i != e; ++i) {
+        if (BasicBlock *DestBB = getBasicBlock(Record[2+i])) {
+          IBI->addDestination(DestBB);
+        } else {
+          delete IBI;
+          return Error("Invalid INDIRECTBR record!");
+        }
+      }
+      I = IBI;
+      break;
+    }
+        
+    case bitc::FUNC_CODE_INST_INVOKE: {
+      // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...]
+      if (Record.size() < 4) return Error("Invalid INVOKE record");
+      AttrListPtr PAL = getAttributes(Record[0]);
+      unsigned CCInfo = Record[1];
+      BasicBlock *NormalBB = getBasicBlock(Record[2]);
+      BasicBlock *UnwindBB = getBasicBlock(Record[3]);
+
+      unsigned OpNum = 4;
+      Value *Callee;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+        return Error("Invalid INVOKE record");
+
+      const PointerType *CalleeTy = dyn_cast<PointerType>(Callee->getType());
+      const FunctionType *FTy = !CalleeTy ? 0 :
+        dyn_cast<FunctionType>(CalleeTy->getElementType());
+
+      // Check that the right number of fixed parameters are here.
+      if (FTy == 0 || NormalBB == 0 || UnwindBB == 0 ||
+          Record.size() < OpNum+FTy->getNumParams())
+        return Error("Invalid INVOKE record");
+
+      SmallVector<Value*, 16> Ops;
+      for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
+        Ops.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i)));
+        if (Ops.back() == 0) return Error("Invalid INVOKE record");
+      }
+
+      if (!FTy->isVarArg()) {
+        if (Record.size() != OpNum)
+          return Error("Invalid INVOKE record");
+      } else {
+        // Read type/value pairs for varargs params.
+        while (OpNum != Record.size()) {
+          Value *Op;
+          if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+            return Error("Invalid INVOKE record");
+          Ops.push_back(Op);
+        }
+      }
+
+      I = InvokeInst::Create(Callee, NormalBB, UnwindBB,
+                             Ops.begin(), Ops.end());
+      InstructionList.push_back(I);
+      cast<InvokeInst>(I)->setCallingConv(
+        static_cast<CallingConv::ID>(CCInfo));
+      cast<InvokeInst>(I)->setAttributes(PAL);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_UNWIND: // UNWIND
+      I = new UnwindInst(Context);
+      InstructionList.push_back(I);
+      break;
+    case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE
+      I = new UnreachableInst(Context);
+      InstructionList.push_back(I);
+      break;
+    case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
+      if (Record.size() < 1 || ((Record.size()-1)&1))
+        return Error("Invalid PHI record");
+      const Type *Ty = getTypeByID(Record[0]);
+      if (!Ty) return Error("Invalid PHI record");
+
+      PHINode *PN = PHINode::Create(Ty);
+      InstructionList.push_back(PN);
+      PN->reserveOperandSpace((Record.size()-1)/2);
+
+      for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) {
+        Value *V = getFnValueByID(Record[1+i], Ty);
+        BasicBlock *BB = getBasicBlock(Record[2+i]);
+        if (!V || !BB) return Error("Invalid PHI record");
+        PN->addIncoming(V, BB);
+      }
+      I = PN;
+      break;
+    }
+
+    case bitc::FUNC_CODE_INST_MALLOC: { // MALLOC: [instty, op, align]
+      // Autoupgrade malloc instruction to malloc call.
+      // FIXME: Remove in LLVM 3.0.
+      if (Record.size() < 3)
+        return Error("Invalid MALLOC record");
+      const PointerType *Ty =
+        dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
+      Value *Size = getFnValueByID(Record[1], Type::getInt32Ty(Context));
+      if (!Ty || !Size) return Error("Invalid MALLOC record");
+      if (!CurBB) return Error("Invalid malloc instruction with no BB");
+      const Type *Int32Ty = IntegerType::getInt32Ty(CurBB->getContext());
+      Constant *AllocSize = ConstantExpr::getSizeOf(Ty->getElementType());
+      AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, Int32Ty);
+      I = CallInst::CreateMalloc(CurBB, Int32Ty, Ty->getElementType(),
+                                 AllocSize, Size, NULL);
+      InstructionList.push_back(I);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_FREE: { // FREE: [op, opty]
+      unsigned OpNum = 0;
+      Value *Op;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+          OpNum != Record.size())
+        return Error("Invalid FREE record");
+      if (!CurBB) return Error("Invalid free instruction with no BB");
+      I = CallInst::CreateFree(Op, CurBB);
+      InstructionList.push_back(I);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
+      // For backward compatibility, tolerate a lack of an opty, and use i32.
+      // Remove this in LLVM 3.0.
+      if (Record.size() < 3 || Record.size() > 4)
+        return Error("Invalid ALLOCA record");
+      unsigned OpNum = 0;
+      const PointerType *Ty =
+        dyn_cast_or_null<PointerType>(getTypeByID(Record[OpNum++]));
+      const Type *OpTy = Record.size() == 4 ? getTypeByID(Record[OpNum++]) :
+                                              Type::getInt32Ty(Context);
+      Value *Size = getFnValueByID(Record[OpNum++], OpTy);
+      unsigned Align = Record[OpNum++];
+      if (!Ty || !Size) return Error("Invalid ALLOCA record");
+      I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1);
+      InstructionList.push_back(I);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_LOAD: { // LOAD: [opty, op, align, vol]
+      unsigned OpNum = 0;
+      Value *Op;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
+          OpNum+2 != Record.size())
+        return Error("Invalid LOAD record");
+
+      I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+      InstructionList.push_back(I);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_STORE2: { // STORE2:[ptrty, ptr, val, align, vol]
+      unsigned OpNum = 0;
+      Value *Val, *Ptr;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
+          getValue(Record, OpNum,
+                    cast<PointerType>(Ptr->getType())->getElementType(), Val) ||
+          OpNum+2 != Record.size())
+        return Error("Invalid STORE record");
+
+      I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+      InstructionList.push_back(I);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_STORE: { // STORE:[val, valty, ptr, align, vol]
+      // FIXME: Legacy form of store instruction. Should be removed in LLVM 3.0.
+      unsigned OpNum = 0;
+      Value *Val, *Ptr;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Val) ||
+          getValue(Record, OpNum,
+                   PointerType::getUnqual(Val->getType()), Ptr)||
+          OpNum+2 != Record.size())
+        return Error("Invalid STORE record");
+
+      I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1);
+      InstructionList.push_back(I);
+      break;
+    }
+    // FIXME: Remove this in LLVM 3.0.
+    case bitc::FUNC_CODE_INST_CALL:
+      LLVM2_7MetadataDetected = true;
+    case bitc::FUNC_CODE_INST_CALL2: {
+      // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
+      if (Record.size() < 3)
+        return Error("Invalid CALL record");
+
+      AttrListPtr PAL = getAttributes(Record[0]);
+      unsigned CCInfo = Record[1];
+
+      unsigned OpNum = 2;
+      Value *Callee;
+      if (getValueTypePair(Record, OpNum, NextValueNo, Callee))
+        return Error("Invalid CALL record");
+
+      const PointerType *OpTy = dyn_cast<PointerType>(Callee->getType());
+      const FunctionType *FTy = 0;
+      if (OpTy) FTy = dyn_cast<FunctionType>(OpTy->getElementType());
+      if (!FTy || Record.size() < FTy->getNumParams()+OpNum)
+        return Error("Invalid CALL record");
+
+      SmallVector<Value*, 16> Args;
+      // Read the fixed params.
+      for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
+        if (FTy->getParamType(i)->getTypeID()==Type::LabelTyID)
+          Args.push_back(getBasicBlock(Record[OpNum]));
+        else
+          Args.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i)));
+        if (Args.back() == 0) return Error("Invalid CALL record");
+      }
+
+      // Read type/value pairs for varargs params.
+      if (!FTy->isVarArg()) {
+        if (OpNum != Record.size())
+          return Error("Invalid CALL record");
+      } else {
+        while (OpNum != Record.size()) {
+          Value *Op;
+          if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+            return Error("Invalid CALL record");
+          Args.push_back(Op);
+        }
+      }
+
+      I = CallInst::Create(Callee, Args.begin(), Args.end());
+      InstructionList.push_back(I);
+      cast<CallInst>(I)->setCallingConv(
+        static_cast<CallingConv::ID>(CCInfo>>1));
+      cast<CallInst>(I)->setTailCall(CCInfo & 1);
+      cast<CallInst>(I)->setAttributes(PAL);
+      break;
+    }
+    case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty]
+      if (Record.size() < 3)
+        return Error("Invalid VAARG record");
+      const Type *OpTy = getTypeByID(Record[0]);
+      Value *Op = getFnValueByID(Record[1], OpTy);
+      const Type *ResTy = getTypeByID(Record[2]);
+      if (!OpTy || !Op || !ResTy)
+        return Error("Invalid VAARG record");
+      I = new VAArgInst(Op, ResTy);
+      InstructionList.push_back(I);
+      break;
+    }
+    }
+
+    // Add instruction to end of current BB.  If there is no current BB, reject
+    // this file.
+    if (CurBB == 0) {
+      delete I;
+      return Error("Invalid instruction with no BB");
+    }
+    CurBB->getInstList().push_back(I);
+
+    // If this was a terminator instruction, move to the next block.
+    if (isa<TerminatorInst>(I)) {
+      ++CurBBNo;
+      CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : 0;
+    }
+
+    // Non-void values get registered in the value table for future use.
+    if (I && !I->getType()->isVoidTy())
+      ValueList.AssignValue(I, NextValueNo++);
+  }
+
+  // Check the function list for unresolved values.
+  if (Argument *A = dyn_cast<Argument>(ValueList.back())) {
+    if (A->getParent() == 0) {
+      // We found at least one unresolved value.  Nuke them all to avoid leaks.
+      for (unsigned i = ModuleValueListSize, e = ValueList.size(); i != e; ++i){
+        if ((A = dyn_cast<Argument>(ValueList[i])) && A->getParent() == 0) {
+          A->replaceAllUsesWith(UndefValue::get(A->getType()));
+          delete A;
+        }
+      }
+      return Error("Never resolved value found in function!");
+    }
+  }
+
+  // FIXME: Check for unresolved forward-declared metadata references
+  // and clean up leaks.
+
+  // See if anything took the address of blocks in this function.  If so,
+  // resolve them now.
+  DenseMap<Function*, std::vector<BlockAddrRefTy> >::iterator BAFRI =
+    BlockAddrFwdRefs.find(F);
+  if (BAFRI != BlockAddrFwdRefs.end()) {
+    std::vector<BlockAddrRefTy> &RefList = BAFRI->second;
+    for (unsigned i = 0, e = RefList.size(); i != e; ++i) {
+      unsigned BlockIdx = RefList[i].first;
+      if (BlockIdx >= FunctionBBs.size())
+        return Error("Invalid blockaddress block #");
+    
+      GlobalVariable *FwdRef = RefList[i].second;
+      FwdRef->replaceAllUsesWith(BlockAddress::get(F, FunctionBBs[BlockIdx]));
+      FwdRef->eraseFromParent();
+    }
+    
+    BlockAddrFwdRefs.erase(BAFRI);
+  }
+  
+  // FIXME: Remove this in LLVM 3.0.
+  unsigned NewMDValueListSize = MDValueList.size();
+
+  // Trim the value list down to the size it was before we parsed this function.
+  ValueList.shrinkTo(ModuleValueListSize);
+  MDValueList.shrinkTo(ModuleMDValueListSize);
+
+  // Backwards compatibility hack: Function-local metadata numbers
+  // were previously not reset between functions. This is now fixed,
+  // however we still need to understand the old numbering in order
+  // to be able to read old bitcode files.
+  // FIXME: Remove this in LLVM 3.0.
+  if (LLVM2_7MetadataDetected)
+    MDValueList.resize(NewMDValueListSize);
+
+  std::vector<BasicBlock*>().swap(FunctionBBs);
+
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// GVMaterializer implementation
+//===----------------------------------------------------------------------===//
+
+
+bool BitcodeReader::isMaterializable(const GlobalValue *GV) const {
+  if (const Function *F = dyn_cast<Function>(GV)) {
+    return F->isDeclaration() &&
+      DeferredFunctionInfo.count(const_cast<Function*>(F));
+  }
+  return false;
+}
+
+bool BitcodeReader::Materialize(GlobalValue *GV, std::string *ErrInfo) {
+  Function *F = dyn_cast<Function>(GV);
+  // If it's not a function or is already material, ignore the request.
+  if (!F || !F->isMaterializable()) return false;
+
+  DenseMap<Function*, uint64_t>::iterator DFII = DeferredFunctionInfo.find(F);
+  assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
+
+  // Move the bit stream to the saved position of the deferred function body.
+  Stream.JumpToBit(DFII->second);
+
+  if (ParseFunctionBody(F)) {
+    if (ErrInfo) *ErrInfo = ErrorString;
+    return true;
+  }
+
+  // Upgrade any old intrinsic calls in the function.
+  for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(),
+       E = UpgradedIntrinsics.end(); I != E; ++I) {
+    if (I->first != I->second) {
+      for (Value::use_iterator UI = I->first->use_begin(),
+           UE = I->first->use_end(); UI != UE; ) {
+        if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+          UpgradeIntrinsicCall(CI, I->second);
+      }
+    }
+  }
+
+  return false;
+}
+
+bool BitcodeReader::isDematerializable(const GlobalValue *GV) const {
+  const Function *F = dyn_cast<Function>(GV);
+  if (!F || F->isDeclaration())
+    return false;
+  return DeferredFunctionInfo.count(const_cast<Function*>(F));
+}
+
+void BitcodeReader::Dematerialize(GlobalValue *GV) {
+  Function *F = dyn_cast<Function>(GV);
+  // If this function isn't dematerializable, this is a noop.
+  if (!F || !isDematerializable(F))
+    return;
+
+  assert(DeferredFunctionInfo.count(F) && "No info to read function later?");
+
+  // Just forget the function body, we can remat it later.
+  F->deleteBody();
+}
+
+
+bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) {
+  assert(M == TheModule &&
+         "Can only Materialize the Module this BitcodeReader is attached to.");
+  // Iterate over the module, deserializing any functions that are still on
+  // disk.
+  for (Module::iterator F = TheModule->begin(), E = TheModule->end();
+       F != E; ++F)
+    if (F->isMaterializable() &&
+        Materialize(F, ErrInfo))
+      return true;
+
+  // Upgrade any intrinsic calls that slipped through (should not happen!) and
+  // delete the old functions to clean up. We can't do this unless the entire
+  // module is materialized because there could always be another function body
+  // with calls to the old function.
+  for (std::vector<std::pair<Function*, Function*> >::iterator I =
+       UpgradedIntrinsics.begin(), E = UpgradedIntrinsics.end(); I != E; ++I) {
+    if (I->first != I->second) {
+      for (Value::use_iterator UI = I->first->use_begin(),
+           UE = I->first->use_end(); UI != UE; ) {
+        if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+          UpgradeIntrinsicCall(CI, I->second);
+      }
+      if (!I->first->use_empty())
+        I->first->replaceAllUsesWith(I->second);
+      I->first->eraseFromParent();
+    }
+  }
+  std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics);
+
+  // Check debug info intrinsics.
+  CheckDebugInfoIntrinsics(TheModule);
+
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// External interface
+//===----------------------------------------------------------------------===//
+
+/// getLazyBitcodeModule - lazy function-at-a-time loading from a file.
+///
+Module *llvm::getLazyBitcodeModule(MemoryBuffer *Buffer,
+                                   LLVMContext& Context,
+                                   std::string *ErrMsg) {
+  Module *M = new Module(Buffer->getBufferIdentifier(), Context);
+  BitcodeReader *R = new BitcodeReader(Buffer, Context);
+  M->setMaterializer(R);
+  if (R->ParseBitcodeInto(M)) {
+    if (ErrMsg)
+      *ErrMsg = R->getErrorString();
+
+    delete M;  // Also deletes R.
+    return 0;
+  }
+  // Have the BitcodeReader dtor delete 'Buffer'.
+  R->setBufferOwned(true);
+  return M;
+}
+
+/// ParseBitcodeFile - Read the specified bitcode file, returning the module.
+/// If an error occurs, return null and fill in *ErrMsg if non-null.
+Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
+                               std::string *ErrMsg){
+  Module *M = getLazyBitcodeModule(Buffer, Context, ErrMsg);
+  if (!M) return 0;
+
+  // Don't let the BitcodeReader dtor delete 'Buffer', regardless of whether
+  // there was an error.
+  static_cast<BitcodeReader*>(M->getMaterializer())->setBufferOwned(false);
+
+  // Read in the entire module, and destroy the BitcodeReader.
+  if (M->MaterializeAllPermanently(ErrMsg)) {
+    delete M;
+    return 0;
+  }
+
+  return M;
+}
+
+std::string llvm::getBitcodeTargetTriple(MemoryBuffer *Buffer,
+                                         LLVMContext& Context,
+                                         std::string *ErrMsg) {
+  BitcodeReader *R = new BitcodeReader(Buffer, Context);
+  // Don't let the BitcodeReader dtor delete 'Buffer'.
+  R->setBufferOwned(false);
+
+  std::string Triple("");
+  if (R->ParseTriple(Triple))
+    if (ErrMsg)
+      *ErrMsg = R->getErrorString();
+
+  delete R;
+  return Triple;
+}
diff --git a/final/lib/Bitcode/Reader/BitcodeReader.h b/final/lib/Bitcode/Reader/BitcodeReader.h
new file mode 100644
index 00000000000..f8fc079c73d
--- /dev/null
+++ b/final/lib/Bitcode/Reader/BitcodeReader.h
@@ -0,0 +1,282 @@
+//===- BitcodeReader.h - Internal BitcodeReader impl ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitcodeReader class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BITCODE_READER_H
+#define BITCODE_READER_H
+
+#include "llvm/GVMaterializer.h"
+#include "llvm/Attributes.h"
+#include "llvm/Type.h"
+#include "llvm/OperandTraits.h"
+#include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/DenseMap.h"
+#include <vector>
+
+namespace llvm {
+  class MemoryBuffer;
+  class LLVMContext;
+  
+//===----------------------------------------------------------------------===//
+//                          BitcodeReaderValueList Class
+//===----------------------------------------------------------------------===//
+
+class BitcodeReaderValueList {
+  std::vector<WeakVH> ValuePtrs;
+  
+  /// ResolveConstants - As we resolve forward-referenced constants, we add
+  /// information about them to this vector.  This allows us to resolve them in
+  /// bulk instead of resolving each reference at a time.  See the code in
+  /// ResolveConstantForwardRefs for more information about this.
+  ///
+  /// The key of this vector is the placeholder constant, the value is the slot
+  /// number that holds the resolved value.
+  typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy;
+  ResolveConstantsTy ResolveConstants;
+  LLVMContext& Context;
+public:
+  BitcodeReaderValueList(LLVMContext& C) : Context(C) {}
+  ~BitcodeReaderValueList() {
+    assert(ResolveConstants.empty() && "Constants not resolved?");
+  }
+
+  // vector compatibility methods
+  unsigned size() const { return ValuePtrs.size(); }
+  void resize(unsigned N) { ValuePtrs.resize(N); }
+  void push_back(Value *V) {
+    ValuePtrs.push_back(V);
+  }
+  
+  void clear() {
+    assert(ResolveConstants.empty() && "Constants not resolved?");
+    ValuePtrs.clear();
+  }
+  
+  Value *operator[](unsigned i) const {
+    assert(i < ValuePtrs.size());
+    return ValuePtrs[i];
+  }
+  
+  Value *back() const { return ValuePtrs.back(); }
+    void pop_back() { ValuePtrs.pop_back(); }
+  bool empty() const { return ValuePtrs.empty(); }
+  void shrinkTo(unsigned N) {
+    assert(N <= size() && "Invalid shrinkTo request!");
+    ValuePtrs.resize(N);
+  }
+  
+  Constant *getConstantFwdRef(unsigned Idx, const Type *Ty);
+  Value *getValueFwdRef(unsigned Idx, const Type *Ty);
+  
+  void AssignValue(Value *V, unsigned Idx);
+  
+  /// ResolveConstantForwardRefs - Once all constants are read, this method bulk
+  /// resolves any forward references.
+  void ResolveConstantForwardRefs();
+};
+
+
+//===----------------------------------------------------------------------===//
+//                          BitcodeReaderMDValueList Class
+//===----------------------------------------------------------------------===//
+
+class BitcodeReaderMDValueList {
+  std::vector<WeakVH> MDValuePtrs;
+  
+  LLVMContext &Context;
+public:
+  BitcodeReaderMDValueList(LLVMContext& C) : Context(C) {}
+
+  // vector compatibility methods
+  unsigned size() const       { return MDValuePtrs.size(); }
+  void resize(unsigned N)     { MDValuePtrs.resize(N); }
+  void push_back(Value *V)    { MDValuePtrs.push_back(V);  }
+  void clear()                { MDValuePtrs.clear();  }
+  Value *back() const         { return MDValuePtrs.back(); }
+  void pop_back()             { MDValuePtrs.pop_back(); }
+  bool empty() const          { return MDValuePtrs.empty(); }
+  
+  Value *operator[](unsigned i) const {
+    assert(i < MDValuePtrs.size());
+    return MDValuePtrs[i];
+  }
+  
+  void shrinkTo(unsigned N) {
+    assert(N <= size() && "Invalid shrinkTo request!");
+    MDValuePtrs.resize(N);
+  }
+
+  Value *getValueFwdRef(unsigned Idx);
+  void AssignValue(Value *V, unsigned Idx);
+};
+
+class BitcodeReader : public GVMaterializer {
+  LLVMContext &Context;
+  Module *TheModule;
+  MemoryBuffer *Buffer;
+  bool BufferOwned;
+  BitstreamReader StreamFile;
+  BitstreamCursor Stream;
+  
+  const char *ErrorString;
+  
+  std::vector<PATypeHolder> TypeList;
+  BitcodeReaderValueList ValueList;
+  BitcodeReaderMDValueList MDValueList;
+  SmallVector<Instruction *, 64> InstructionList;
+
+  std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInits;
+  std::vector<std::pair<GlobalAlias*, unsigned> > AliasInits;
+  
+  /// MAttributes - The set of attributes by index.  Index zero in the
+  /// file is for null, and is thus not represented here.  As such all indices
+  /// are off by one.
+  std::vector<AttrListPtr> MAttributes;
+  
+  /// FunctionBBs - While parsing a function body, this is a list of the basic
+  /// blocks for the function.
+  std::vector<BasicBlock*> FunctionBBs;
+  
+  // When reading the module header, this list is populated with functions that
+  // have bodies later in the file.
+  std::vector<Function*> FunctionsWithBodies;
+
+  // When intrinsic functions are encountered which require upgrading they are 
+  // stored here with their replacement function.
+  typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
+  UpgradedIntrinsicMap UpgradedIntrinsics;
+
+  // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
+  DenseMap<unsigned, unsigned> MDKindMap;
+  
+  // After the module header has been read, the FunctionsWithBodies list is 
+  // reversed.  This keeps track of whether we've done this yet.
+  bool HasReversedFunctionsWithBodies;
+  
+  /// DeferredFunctionInfo - When function bodies are initially scanned, this
+  /// map contains info about where to find deferred function body in the
+  /// stream.
+  DenseMap<Function*, uint64_t> DeferredFunctionInfo;
+  
+  /// BlockAddrFwdRefs - These are blockaddr references to basic blocks.  These
+  /// are resolved lazily when functions are loaded.
+  typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
+  DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs;
+
+  /// LLVM2_7MetadataDetected - True if metadata produced by LLVM 2.7 or
+  /// earlier was detected, in which case we behave slightly differently,
+  /// for compatibility.
+  /// FIXME: Remove in LLVM 3.0.
+  bool LLVM2_7MetadataDetected;
+  
+public:
+  explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
+    : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
+      ErrorString(0), ValueList(C), MDValueList(C),
+      LLVM2_7MetadataDetected(false) {
+    HasReversedFunctionsWithBodies = false;
+  }
+  ~BitcodeReader() {
+    FreeState();
+  }
+  
+  void FreeState();
+  
+  /// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer
+  /// when the reader is destroyed.
+  void setBufferOwned(bool Owned) { BufferOwned = Owned; }
+  
+  virtual bool isMaterializable(const GlobalValue *GV) const;
+  virtual bool isDematerializable(const GlobalValue *GV) const;
+  virtual bool Materialize(GlobalValue *GV, std::string *ErrInfo = 0);
+  virtual bool MaterializeModule(Module *M, std::string *ErrInfo = 0);
+  virtual void Dematerialize(GlobalValue *GV);
+
+  bool Error(const char *Str) {
+    ErrorString = Str;
+    return true;
+  }
+  const char *getErrorString() const { return ErrorString; }
+  
+  /// @brief Main interface to parsing a bitcode buffer.
+  /// @returns true if an error occurred.
+  bool ParseBitcodeInto(Module *M);
+
+  /// @brief Cheap mechanism to just extract module triple
+  /// @returns true if an error occurred.
+  bool ParseTriple(std::string &Triple);
+private:
+  const Type *getTypeByID(unsigned ID, bool isTypeTable = false);
+  Value *getFnValueByID(unsigned ID, const Type *Ty) {
+    if (Ty == Type::getMetadataTy(Context))
+      return MDValueList.getValueFwdRef(ID);
+    else
+      return ValueList.getValueFwdRef(ID, Ty);
+  }
+  BasicBlock *getBasicBlock(unsigned ID) const {
+    if (ID >= FunctionBBs.size()) return 0; // Invalid ID
+    return FunctionBBs[ID];
+  }
+  AttrListPtr getAttributes(unsigned i) const {
+    if (i-1 < MAttributes.size())
+      return MAttributes[i-1];
+    return AttrListPtr();
+  }
+  
+  /// getValueTypePair - Read a value/type pair out of the specified record from
+  /// slot 'Slot'.  Increment Slot past the number of slots used in the record.
+  /// Return true on failure.
+  bool getValueTypePair(SmallVector<uint64_t, 64> &Record, unsigned &Slot,
+                        unsigned InstNum, Value *&ResVal) {
+    if (Slot == Record.size()) return true;
+    unsigned ValNo = (unsigned)Record[Slot++];
+    if (ValNo < InstNum) {
+      // If this is not a forward reference, just return the value we already
+      // have.
+      ResVal = getFnValueByID(ValNo, 0);
+      return ResVal == 0;
+    } else if (Slot == Record.size()) {
+      return true;
+    }
+    
+    unsigned TypeNo = (unsigned)Record[Slot++];
+    ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo));
+    return ResVal == 0;
+  }
+  bool getValue(SmallVector<uint64_t, 64> &Record, unsigned &Slot,
+                const Type *Ty, Value *&ResVal) {
+    if (Slot == Record.size()) return true;
+    unsigned ValNo = (unsigned)Record[Slot++];
+    ResVal = getFnValueByID(ValNo, Ty);
+    return ResVal == 0;
+  }
+
+  
+  bool ParseModule();
+  bool ParseAttributeBlock();
+  bool ParseTypeTable();
+  bool ParseTypeSymbolTable();
+  bool ParseValueSymbolTable();
+  bool ParseConstants();
+  bool RememberAndSkipFunctionBody();
+  bool ParseFunctionBody(Function *F);
+  bool ResolveGlobalAndAliasInits();
+  bool ParseMetadata();
+  bool ParseMetadataAttachment();
+  bool ParseModuleTriple(std::string &Triple);
+};
+  
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Bitcode/Reader/CMakeLists.txt b/final/lib/Bitcode/Reader/CMakeLists.txt
new file mode 100644
index 00000000000..693d4310b83
--- /dev/null
+++ b/final/lib/Bitcode/Reader/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMBitReader
+  BitReader.cpp
+  BitcodeReader.cpp
+  )
diff --git a/final/lib/Bitcode/Reader/Makefile b/final/lib/Bitcode/Reader/Makefile
new file mode 100644
index 00000000000..59af8d53a73
--- /dev/null
+++ b/final/lib/Bitcode/Reader/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Bitcode/Reader/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMBitReader
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Bitcode/Writer/BitWriter.cpp b/final/lib/Bitcode/Writer/BitWriter.cpp
new file mode 100644
index 00000000000..42884224633
--- /dev/null
+++ b/final/lib/Bitcode/Writer/BitWriter.cpp
@@ -0,0 +1,40 @@
+//===-- BitWriter.cpp -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/BitWriter.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+
+/*===-- Operations on modules ---------------------------------------------===*/
+
+int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) {
+  std::string ErrorInfo;
+  raw_fd_ostream OS(Path, ErrorInfo,
+                    raw_fd_ostream::F_Binary);
+  
+  if (!ErrorInfo.empty())
+    return -1;
+  
+  WriteBitcodeToFile(unwrap(M), OS);
+  return 0;
+}
+
+int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose,
+                         int Unbuffered) {
+  raw_fd_ostream OS(FD, ShouldClose, Unbuffered);
+  
+  WriteBitcodeToFile(unwrap(M), OS);
+  return 0;
+}
+
+int LLVMWriteBitcodeToFileHandle(LLVMModuleRef M, int FileHandle) {
+  return LLVMWriteBitcodeToFD(M, FileHandle, true, false);
+}
diff --git a/final/lib/Bitcode/Writer/BitcodeWriter.cpp b/final/lib/Bitcode/Writer/BitcodeWriter.cpp
new file mode 100644
index 00000000000..f8ef8c668c4
--- /dev/null
+++ b/final/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -0,0 +1,1672 @@
+//===--- Bitcode/Writer/BitcodeWriter.cpp - Bitcode Writer ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Bitcode writer implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitstreamWriter.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "ValueEnumerator.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Program.h"
+#include <cctype>
+using namespace llvm;
+
+/// These are manifest constants used by the bitcode writer. They do not need to
+/// be kept in sync with the reader, but need to be consistent within this file.
+enum {
+  CurVersion = 0,
+
+  // VALUE_SYMTAB_BLOCK abbrev id's.
+  VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+  VST_ENTRY_7_ABBREV,
+  VST_ENTRY_6_ABBREV,
+  VST_BBENTRY_6_ABBREV,
+
+  // CONSTANTS_BLOCK abbrev id's.
+  CONSTANTS_SETTYPE_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+  CONSTANTS_INTEGER_ABBREV,
+  CONSTANTS_CE_CAST_Abbrev,
+  CONSTANTS_NULL_Abbrev,
+
+  // FUNCTION_BLOCK abbrev id's.
+  FUNCTION_INST_LOAD_ABBREV = bitc::FIRST_APPLICATION_ABBREV,
+  FUNCTION_INST_BINOP_ABBREV,
+  FUNCTION_INST_BINOP_FLAGS_ABBREV,
+  FUNCTION_INST_CAST_ABBREV,
+  FUNCTION_INST_RET_VOID_ABBREV,
+  FUNCTION_INST_RET_VAL_ABBREV,
+  FUNCTION_INST_UNREACHABLE_ABBREV
+};
+
+
+static unsigned GetEncodedCastOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Unknown cast instruction!");
+  case Instruction::Trunc   : return bitc::CAST_TRUNC;
+  case Instruction::ZExt    : return bitc::CAST_ZEXT;
+  case Instruction::SExt    : return bitc::CAST_SEXT;
+  case Instruction::FPToUI  : return bitc::CAST_FPTOUI;
+  case Instruction::FPToSI  : return bitc::CAST_FPTOSI;
+  case Instruction::UIToFP  : return bitc::CAST_UITOFP;
+  case Instruction::SIToFP  : return bitc::CAST_SITOFP;
+  case Instruction::FPTrunc : return bitc::CAST_FPTRUNC;
+  case Instruction::FPExt   : return bitc::CAST_FPEXT;
+  case Instruction::PtrToInt: return bitc::CAST_PTRTOINT;
+  case Instruction::IntToPtr: return bitc::CAST_INTTOPTR;
+  case Instruction::BitCast : return bitc::CAST_BITCAST;
+  }
+}
+
+static unsigned GetEncodedBinaryOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Unknown binary instruction!");
+  case Instruction::Add:
+  case Instruction::FAdd: return bitc::BINOP_ADD;
+  case Instruction::Sub:
+  case Instruction::FSub: return bitc::BINOP_SUB;
+  case Instruction::Mul:
+  case Instruction::FMul: return bitc::BINOP_MUL;
+  case Instruction::UDiv: return bitc::BINOP_UDIV;
+  case Instruction::FDiv:
+  case Instruction::SDiv: return bitc::BINOP_SDIV;
+  case Instruction::URem: return bitc::BINOP_UREM;
+  case Instruction::FRem:
+  case Instruction::SRem: return bitc::BINOP_SREM;
+  case Instruction::Shl:  return bitc::BINOP_SHL;
+  case Instruction::LShr: return bitc::BINOP_LSHR;
+  case Instruction::AShr: return bitc::BINOP_ASHR;
+  case Instruction::And:  return bitc::BINOP_AND;
+  case Instruction::Or:   return bitc::BINOP_OR;
+  case Instruction::Xor:  return bitc::BINOP_XOR;
+  }
+}
+
+
+
+static void WriteStringRecord(unsigned Code, const std::string &Str,
+                              unsigned AbbrevToUse, BitstreamWriter &Stream) {
+  SmallVector<unsigned, 64> Vals;
+
+  // Code: [strchar x N]
+  for (unsigned i = 0, e = Str.size(); i != e; ++i)
+    Vals.push_back(Str[i]);
+
+  // Emit the finished record.
+  Stream.EmitRecord(Code, Vals, AbbrevToUse);
+}
+
+// Emit information about parameter attributes.
+static void WriteAttributeTable(const ValueEnumerator &VE,
+                                BitstreamWriter &Stream) {
+  const std::vector<AttrListPtr> &Attrs = VE.getAttributes();
+  if (Attrs.empty()) return;
+
+  Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3);
+
+  SmallVector<uint64_t, 64> Record;
+  for (unsigned i = 0, e = Attrs.size(); i != e; ++i) {
+    const AttrListPtr &A = Attrs[i];
+    for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) {
+      const AttributeWithIndex &PAWI = A.getSlot(i);
+      Record.push_back(PAWI.Index);
+
+      // FIXME: remove in LLVM 3.0
+      // Store the alignment in the bitcode as a 16-bit raw value instead of a
+      // 5-bit log2 encoded value. Shift the bits above the alignment up by
+      // 11 bits.
+      uint64_t FauxAttr = PAWI.Attrs & 0xffff;
+      if (PAWI.Attrs & Attribute::Alignment)
+        FauxAttr |= (1ull<<16)<<(((PAWI.Attrs & Attribute::Alignment)-1) >> 16);
+      FauxAttr |= (PAWI.Attrs & (0x3FFull << 21)) << 11;
+
+      Record.push_back(FauxAttr);
+    }
+
+    Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record);
+    Record.clear();
+  }
+
+  Stream.ExitBlock();
+}
+
+/// WriteTypeTable - Write out the type table for a module.
+static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
+  const ValueEnumerator::TypeList &TypeList = VE.getTypes();
+
+  Stream.EnterSubblock(bitc::TYPE_BLOCK_ID, 4 /*count from # abbrevs */);
+  SmallVector<uint64_t, 64> TypeVals;
+
+  // Abbrev for TYPE_CODE_POINTER.
+  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  Abbv->Add(BitCodeAbbrevOp(0));  // Addrspace = 0
+  unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // Abbrev for TYPE_CODE_FUNCTION.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // isvararg
+  Abbv->Add(BitCodeAbbrevOp(0));  // FIXME: DEAD value, remove in LLVM 3.0
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // Abbrev for TYPE_CODE_STRUCT.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // ispacked
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  unsigned StructAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // Abbrev for TYPE_CODE_ARRAY.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // size
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // Emit an entry count so the reader can reserve space.
+  TypeVals.push_back(TypeList.size());
+  Stream.EmitRecord(bitc::TYPE_CODE_NUMENTRY, TypeVals);
+  TypeVals.clear();
+
+  // Loop over all of the types, emitting each in turn.
+  for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
+    const Type *T = TypeList[i].first;
+    int AbbrevToUse = 0;
+    unsigned Code = 0;
+
+    switch (T->getTypeID()) {
+    default: llvm_unreachable("Unknown type!");
+    case Type::VoidTyID:   Code = bitc::TYPE_CODE_VOID;   break;
+    case Type::FloatTyID:  Code = bitc::TYPE_CODE_FLOAT;  break;
+    case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break;
+    case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break;
+    case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break;
+    case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break;
+    case Type::LabelTyID:  Code = bitc::TYPE_CODE_LABEL;  break;
+    case Type::OpaqueTyID: Code = bitc::TYPE_CODE_OPAQUE; break;
+    case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break;
+    case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break;
+    case Type::IntegerTyID:
+      // INTEGER: [width]
+      Code = bitc::TYPE_CODE_INTEGER;
+      TypeVals.push_back(cast<IntegerType>(T)->getBitWidth());
+      break;
+    case Type::PointerTyID: {
+      const PointerType *PTy = cast<PointerType>(T);
+      // POINTER: [pointee type, address space]
+      Code = bitc::TYPE_CODE_POINTER;
+      TypeVals.push_back(VE.getTypeID(PTy->getElementType()));
+      unsigned AddressSpace = PTy->getAddressSpace();
+      TypeVals.push_back(AddressSpace);
+      if (AddressSpace == 0) AbbrevToUse = PtrAbbrev;
+      break;
+    }
+    case Type::FunctionTyID: {
+      const FunctionType *FT = cast<FunctionType>(T);
+      // FUNCTION: [isvararg, attrid, retty, paramty x N]
+      Code = bitc::TYPE_CODE_FUNCTION;
+      TypeVals.push_back(FT->isVarArg());
+      TypeVals.push_back(0);  // FIXME: DEAD: remove in llvm 3.0
+      TypeVals.push_back(VE.getTypeID(FT->getReturnType()));
+      for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i)
+        TypeVals.push_back(VE.getTypeID(FT->getParamType(i)));
+      AbbrevToUse = FunctionAbbrev;
+      break;
+    }
+    case Type::StructTyID: {
+      const StructType *ST = cast<StructType>(T);
+      // STRUCT: [ispacked, eltty x N]
+      Code = bitc::TYPE_CODE_STRUCT;
+      TypeVals.push_back(ST->isPacked());
+      // Output all of the element types.
+      for (StructType::element_iterator I = ST->element_begin(),
+           E = ST->element_end(); I != E; ++I)
+        TypeVals.push_back(VE.getTypeID(*I));
+      AbbrevToUse = StructAbbrev;
+      break;
+    }
+    case Type::ArrayTyID: {
+      const ArrayType *AT = cast<ArrayType>(T);
+      // ARRAY: [numelts, eltty]
+      Code = bitc::TYPE_CODE_ARRAY;
+      TypeVals.push_back(AT->getNumElements());
+      TypeVals.push_back(VE.getTypeID(AT->getElementType()));
+      AbbrevToUse = ArrayAbbrev;
+      break;
+    }
+    case Type::VectorTyID: {
+      const VectorType *VT = cast<VectorType>(T);
+      // VECTOR [numelts, eltty]
+      Code = bitc::TYPE_CODE_VECTOR;
+      TypeVals.push_back(VT->getNumElements());
+      TypeVals.push_back(VE.getTypeID(VT->getElementType()));
+      break;
+    }
+    }
+
+    // Emit the finished record.
+    Stream.EmitRecord(Code, TypeVals, AbbrevToUse);
+    TypeVals.clear();
+  }
+
+  Stream.ExitBlock();
+}
+
+static unsigned getEncodedLinkage(const GlobalValue *GV) {
+  switch (GV->getLinkage()) {
+  default: llvm_unreachable("Invalid linkage!");
+  case GlobalValue::ExternalLinkage:                 return 0;
+  case GlobalValue::WeakAnyLinkage:                  return 1;
+  case GlobalValue::AppendingLinkage:                return 2;
+  case GlobalValue::InternalLinkage:                 return 3;
+  case GlobalValue::LinkOnceAnyLinkage:              return 4;
+  case GlobalValue::DLLImportLinkage:                return 5;
+  case GlobalValue::DLLExportLinkage:                return 6;
+  case GlobalValue::ExternalWeakLinkage:             return 7;
+  case GlobalValue::CommonLinkage:                   return 8;
+  case GlobalValue::PrivateLinkage:                  return 9;
+  case GlobalValue::WeakODRLinkage:                  return 10;
+  case GlobalValue::LinkOnceODRLinkage:              return 11;
+  case GlobalValue::AvailableExternallyLinkage:      return 12;
+  case GlobalValue::LinkerPrivateLinkage:            return 13;
+  case GlobalValue::LinkerPrivateWeakLinkage:        return 14;
+  case GlobalValue::LinkerPrivateWeakDefAutoLinkage: return 15;
+  }
+}
+
+static unsigned getEncodedVisibility(const GlobalValue *GV) {
+  switch (GV->getVisibility()) {
+  default: llvm_unreachable("Invalid visibility!");
+  case GlobalValue::DefaultVisibility:   return 0;
+  case GlobalValue::HiddenVisibility:    return 1;
+  case GlobalValue::ProtectedVisibility: return 2;
+  }
+}
+
+// Emit top-level description of module, including target triple, inline asm,
+// descriptors for global variables, and function prototype info.
+static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
+                            BitstreamWriter &Stream) {
+  // Emit the list of dependent libraries for the Module.
+  for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
+    WriteStringRecord(bitc::MODULE_CODE_DEPLIB, *I, 0/*TODO*/, Stream);
+
+  // Emit various pieces of data attached to a module.
+  if (!M->getTargetTriple().empty())
+    WriteStringRecord(bitc::MODULE_CODE_TRIPLE, M->getTargetTriple(),
+                      0/*TODO*/, Stream);
+  if (!M->getDataLayout().empty())
+    WriteStringRecord(bitc::MODULE_CODE_DATALAYOUT, M->getDataLayout(),
+                      0/*TODO*/, Stream);
+  if (!M->getModuleInlineAsm().empty())
+    WriteStringRecord(bitc::MODULE_CODE_ASM, M->getModuleInlineAsm(),
+                      0/*TODO*/, Stream);
+
+  // Emit information about sections and GC, computing how many there are. Also
+  // compute the maximum alignment value.
+  std::map<std::string, unsigned> SectionMap;
+  std::map<std::string, unsigned> GCMap;
+  unsigned MaxAlignment = 0;
+  unsigned MaxGlobalType = 0;
+  for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end();
+       GV != E; ++GV) {
+    MaxAlignment = std::max(MaxAlignment, GV->getAlignment());
+    MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV->getType()));
+
+    if (!GV->hasSection()) continue;
+    // Give section names unique ID's.
+    unsigned &Entry = SectionMap[GV->getSection()];
+    if (Entry != 0) continue;
+    WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV->getSection(),
+                      0/*TODO*/, Stream);
+    Entry = SectionMap.size();
+  }
+  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+    MaxAlignment = std::max(MaxAlignment, F->getAlignment());
+    if (F->hasSection()) {
+      // Give section names unique ID's.
+      unsigned &Entry = SectionMap[F->getSection()];
+      if (!Entry) {
+        WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, F->getSection(),
+                          0/*TODO*/, Stream);
+        Entry = SectionMap.size();
+      }
+    }
+    if (F->hasGC()) {
+      // Same for GC names.
+      unsigned &Entry = GCMap[F->getGC()];
+      if (!Entry) {
+        WriteStringRecord(bitc::MODULE_CODE_GCNAME, F->getGC(),
+                          0/*TODO*/, Stream);
+        Entry = GCMap.size();
+      }
+    }
+  }
+
+  // Emit abbrev for globals, now that we know # sections and max alignment.
+  unsigned SimpleGVarAbbrev = 0;
+  if (!M->global_empty()) {
+    // Add an abbrev for common globals with no visibility or thread localness.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                              Log2_32_Ceil(MaxGlobalType+1)));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));      // Constant.
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));        // Initializer.
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4));      // Linkage.
+    if (MaxAlignment == 0)                                      // Alignment.
+      Abbv->Add(BitCodeAbbrevOp(0));
+    else {
+      unsigned MaxEncAlignment = Log2_32(MaxAlignment)+1;
+      Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                               Log2_32_Ceil(MaxEncAlignment+1)));
+    }
+    if (SectionMap.empty())                                    // Section.
+      Abbv->Add(BitCodeAbbrevOp(0));
+    else
+      Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                               Log2_32_Ceil(SectionMap.size()+1)));
+    // Don't bother emitting vis + thread local.
+    SimpleGVarAbbrev = Stream.EmitAbbrev(Abbv);
+  }
+
+  // Emit the global variable information.
+  SmallVector<unsigned, 64> Vals;
+  for (Module::const_global_iterator GV = M->global_begin(),E = M->global_end();
+       GV != E; ++GV) {
+    unsigned AbbrevToUse = 0;
+
+    // GLOBALVAR: [type, isconst, initid,
+    //             linkage, alignment, section, visibility, threadlocal,
+    //             unnamed_addr]
+    Vals.push_back(VE.getTypeID(GV->getType()));
+    Vals.push_back(GV->isConstant());
+    Vals.push_back(GV->isDeclaration() ? 0 :
+                   (VE.getValueID(GV->getInitializer()) + 1));
+    Vals.push_back(getEncodedLinkage(GV));
+    Vals.push_back(Log2_32(GV->getAlignment())+1);
+    Vals.push_back(GV->hasSection() ? SectionMap[GV->getSection()] : 0);
+    if (GV->isThreadLocal() ||
+        GV->getVisibility() != GlobalValue::DefaultVisibility ||
+        GV->hasUnnamedAddr()) {
+      Vals.push_back(getEncodedVisibility(GV));
+      Vals.push_back(GV->isThreadLocal());
+      Vals.push_back(GV->hasUnnamedAddr());
+    } else {
+      AbbrevToUse = SimpleGVarAbbrev;
+    }
+
+    Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals, AbbrevToUse);
+    Vals.clear();
+  }
+
+  // Emit the function proto information.
+  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+    // FUNCTION:  [type, callingconv, isproto, paramattr,
+    //             linkage, alignment, section, visibility, gc, unnamed_addr]
+    Vals.push_back(VE.getTypeID(F->getType()));
+    Vals.push_back(F->getCallingConv());
+    Vals.push_back(F->isDeclaration());
+    Vals.push_back(getEncodedLinkage(F));
+    Vals.push_back(VE.getAttributeID(F->getAttributes()));
+    Vals.push_back(Log2_32(F->getAlignment())+1);
+    Vals.push_back(F->hasSection() ? SectionMap[F->getSection()] : 0);
+    Vals.push_back(getEncodedVisibility(F));
+    Vals.push_back(F->hasGC() ? GCMap[F->getGC()] : 0);
+    Vals.push_back(F->hasUnnamedAddr());
+
+    unsigned AbbrevToUse = 0;
+    Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
+    Vals.clear();
+  }
+
+
+  // Emit the alias information.
+  for (Module::const_alias_iterator AI = M->alias_begin(), E = M->alias_end();
+       AI != E; ++AI) {
+    Vals.push_back(VE.getTypeID(AI->getType()));
+    Vals.push_back(VE.getValueID(AI->getAliasee()));
+    Vals.push_back(getEncodedLinkage(AI));
+    Vals.push_back(getEncodedVisibility(AI));
+    unsigned AbbrevToUse = 0;
+    Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse);
+    Vals.clear();
+  }
+}
+
+static uint64_t GetOptimizationFlags(const Value *V) {
+  uint64_t Flags = 0;
+
+  if (const OverflowingBinaryOperator *OBO =
+        dyn_cast<OverflowingBinaryOperator>(V)) {
+    if (OBO->hasNoSignedWrap())
+      Flags |= 1 << bitc::OBO_NO_SIGNED_WRAP;
+    if (OBO->hasNoUnsignedWrap())
+      Flags |= 1 << bitc::OBO_NO_UNSIGNED_WRAP;
+  } else if (const PossiblyExactOperator *PEO =
+               dyn_cast<PossiblyExactOperator>(V)) {
+    if (PEO->isExact())
+      Flags |= 1 << bitc::PEO_EXACT;
+  }
+
+  return Flags;
+}
+
+static void WriteMDNode(const MDNode *N,
+                        const ValueEnumerator &VE,
+                        BitstreamWriter &Stream,
+                        SmallVector<uint64_t, 64> &Record) {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (N->getOperand(i)) {
+      Record.push_back(VE.getTypeID(N->getOperand(i)->getType()));
+      Record.push_back(VE.getValueID(N->getOperand(i)));
+    } else {
+      Record.push_back(VE.getTypeID(Type::getVoidTy(N->getContext())));
+      Record.push_back(0);
+    }
+  }
+  unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE2 :
+                                           bitc::METADATA_NODE2;
+  Stream.EmitRecord(MDCode, Record, 0);
+  Record.clear();
+}
+
+static void WriteModuleMetadata(const Module *M,
+                                const ValueEnumerator &VE,
+                                BitstreamWriter &Stream) {
+  const ValueEnumerator::ValueList &Vals = VE.getMDValues();
+  bool StartedMetadataBlock = false;
+  unsigned MDSAbbrev = 0;
+  SmallVector<uint64_t, 64> Record;
+  for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+
+    if (const MDNode *N = dyn_cast<MDNode>(Vals[i].first)) {
+      if (!N->isFunctionLocal() || !N->getFunction()) {
+        if (!StartedMetadataBlock) {
+          Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+          StartedMetadataBlock = true;
+        }
+        WriteMDNode(N, VE, Stream, Record);
+      }
+    } else if (const MDString *MDS = dyn_cast<MDString>(Vals[i].first)) {
+      if (!StartedMetadataBlock)  {
+        Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+
+        // Abbrev for METADATA_STRING.
+        BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+        Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRING));
+        Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+        Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+        MDSAbbrev = Stream.EmitAbbrev(Abbv);
+        StartedMetadataBlock = true;
+      }
+
+      // Code: [strchar x N]
+      Record.append(MDS->begin(), MDS->end());
+
+      // Emit the finished record.
+      Stream.EmitRecord(bitc::METADATA_STRING, Record, MDSAbbrev);
+      Record.clear();
+    }
+  }
+
+  // Write named metadata.
+  for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+       E = M->named_metadata_end(); I != E; ++I) {
+    const NamedMDNode *NMD = I;
+    if (!StartedMetadataBlock)  {
+      Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+      StartedMetadataBlock = true;
+    }
+
+    // Write name.
+    StringRef Str = NMD->getName();
+    for (unsigned i = 0, e = Str.size(); i != e; ++i)
+      Record.push_back(Str[i]);
+    Stream.EmitRecord(bitc::METADATA_NAME, Record, 0/*TODO*/);
+    Record.clear();
+
+    // Write named metadata operands.
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      Record.push_back(VE.getValueID(NMD->getOperand(i)));
+    Stream.EmitRecord(bitc::METADATA_NAMED_NODE2, Record, 0);
+    Record.clear();
+  }
+
+  if (StartedMetadataBlock)
+    Stream.ExitBlock();
+}
+
+static void WriteFunctionLocalMetadata(const Function &F,
+                                       const ValueEnumerator &VE,
+                                       BitstreamWriter &Stream) {
+  bool StartedMetadataBlock = false;
+  SmallVector<uint64_t, 64> Record;
+  const SmallVector<const MDNode *, 8> &Vals = VE.getFunctionLocalMDValues();
+  for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+    if (const MDNode *N = Vals[i])
+      if (N->isFunctionLocal() && N->getFunction() == &F) {
+        if (!StartedMetadataBlock) {
+          Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+          StartedMetadataBlock = true;
+        }
+        WriteMDNode(N, VE, Stream, Record);
+      }
+      
+  if (StartedMetadataBlock)
+    Stream.ExitBlock();
+}
+
+static void WriteMetadataAttachment(const Function &F,
+                                    const ValueEnumerator &VE,
+                                    BitstreamWriter &Stream) {
+  Stream.EnterSubblock(bitc::METADATA_ATTACHMENT_ID, 3);
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Write metadata attachments
+  // METADATA_ATTACHMENT2 - [m x [value, [n x [id, mdnode]]]
+  SmallVector<std::pair<unsigned, MDNode*>, 4> MDs;
+  
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      MDs.clear();
+      I->getAllMetadataOtherThanDebugLoc(MDs);
+      
+      // If no metadata, ignore instruction.
+      if (MDs.empty()) continue;
+
+      Record.push_back(VE.getInstructionID(I));
+      
+      for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
+        Record.push_back(MDs[i].first);
+        Record.push_back(VE.getValueID(MDs[i].second));
+      }
+      Stream.EmitRecord(bitc::METADATA_ATTACHMENT2, Record, 0);
+      Record.clear();
+    }
+
+  Stream.ExitBlock();
+}
+
+static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) {
+  SmallVector<uint64_t, 64> Record;
+
+  // Write metadata kinds
+  // METADATA_KIND - [n x [id, name]]
+  SmallVector<StringRef, 4> Names;
+  M->getMDKindNames(Names);
+  
+  if (Names.empty()) return;
+
+  Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+  
+  for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) {
+    Record.push_back(MDKindID);
+    StringRef KName = Names[MDKindID];
+    Record.append(KName.begin(), KName.end());
+    
+    Stream.EmitRecord(bitc::METADATA_KIND, Record, 0);
+    Record.clear();
+  }
+
+  Stream.ExitBlock();
+}
+
+static void WriteConstants(unsigned FirstVal, unsigned LastVal,
+                           const ValueEnumerator &VE,
+                           BitstreamWriter &Stream, bool isGlobal) {
+  if (FirstVal == LastVal) return;
+
+  Stream.EnterSubblock(bitc::CONSTANTS_BLOCK_ID, 4);
+
+  unsigned AggregateAbbrev = 0;
+  unsigned String8Abbrev = 0;
+  unsigned CString7Abbrev = 0;
+  unsigned CString6Abbrev = 0;
+  // If this is a constant pool for the module, emit module-specific abbrevs.
+  if (isGlobal) {
+    // Abbrev for CST_CODE_AGGREGATE.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_AGGREGATE));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(LastVal+1)));
+    AggregateAbbrev = Stream.EmitAbbrev(Abbv);
+
+    // Abbrev for CST_CODE_STRING.
+    Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_STRING));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+    String8Abbrev = Stream.EmitAbbrev(Abbv);
+    // Abbrev for CST_CODE_CSTRING.
+    Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+    CString7Abbrev = Stream.EmitAbbrev(Abbv);
+    // Abbrev for CST_CODE_CSTRING.
+    Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+    CString6Abbrev = Stream.EmitAbbrev(Abbv);
+  }
+
+  SmallVector<uint64_t, 64> Record;
+
+  const ValueEnumerator::ValueList &Vals = VE.getValues();
+  const Type *LastTy = 0;
+  for (unsigned i = FirstVal; i != LastVal; ++i) {
+    const Value *V = Vals[i].first;
+    // If we need to switch types, do so now.
+    if (V->getType() != LastTy) {
+      LastTy = V->getType();
+      Record.push_back(VE.getTypeID(LastTy));
+      Stream.EmitRecord(bitc::CST_CODE_SETTYPE, Record,
+                        CONSTANTS_SETTYPE_ABBREV);
+      Record.clear();
+    }
+
+    if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+      Record.push_back(unsigned(IA->hasSideEffects()) |
+                       unsigned(IA->isAlignStack()) << 1);
+
+      // Add the asm string.
+      const std::string &AsmStr = IA->getAsmString();
+      Record.push_back(AsmStr.size());
+      for (unsigned i = 0, e = AsmStr.size(); i != e; ++i)
+        Record.push_back(AsmStr[i]);
+
+      // Add the constraint string.
+      const std::string &ConstraintStr = IA->getConstraintString();
+      Record.push_back(ConstraintStr.size());
+      for (unsigned i = 0, e = ConstraintStr.size(); i != e; ++i)
+        Record.push_back(ConstraintStr[i]);
+      Stream.EmitRecord(bitc::CST_CODE_INLINEASM, Record);
+      Record.clear();
+      continue;
+    }
+    const Constant *C = cast<Constant>(V);
+    unsigned Code = -1U;
+    unsigned AbbrevToUse = 0;
+    if (C->isNullValue()) {
+      Code = bitc::CST_CODE_NULL;
+    } else if (isa<UndefValue>(C)) {
+      Code = bitc::CST_CODE_UNDEF;
+    } else if (const ConstantInt *IV = dyn_cast<ConstantInt>(C)) {
+      if (IV->getBitWidth() <= 64) {
+        uint64_t V = IV->getSExtValue();
+        if ((int64_t)V >= 0)
+          Record.push_back(V << 1);
+        else
+          Record.push_back((-V << 1) | 1);
+        Code = bitc::CST_CODE_INTEGER;
+        AbbrevToUse = CONSTANTS_INTEGER_ABBREV;
+      } else {                             // Wide integers, > 64 bits in size.
+        // We have an arbitrary precision integer value to write whose
+        // bit width is > 64. However, in canonical unsigned integer
+        // format it is likely that the high bits are going to be zero.
+        // So, we only write the number of active words.
+        unsigned NWords = IV->getValue().getActiveWords();
+        const uint64_t *RawWords = IV->getValue().getRawData();
+        for (unsigned i = 0; i != NWords; ++i) {
+          int64_t V = RawWords[i];
+          if (V >= 0)
+            Record.push_back(V << 1);
+          else
+            Record.push_back((-V << 1) | 1);
+        }
+        Code = bitc::CST_CODE_WIDE_INTEGER;
+      }
+    } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+      Code = bitc::CST_CODE_FLOAT;
+      const Type *Ty = CFP->getType();
+      if (Ty->isFloatTy() || Ty->isDoubleTy()) {
+        Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+      } else if (Ty->isX86_FP80Ty()) {
+        // api needed to prevent premature destruction
+        // bits are not in the same order as a normal i80 APInt, compensate.
+        APInt api = CFP->getValueAPF().bitcastToAPInt();
+        const uint64_t *p = api.getRawData();
+        Record.push_back((p[1] << 48) | (p[0] >> 16));
+        Record.push_back(p[0] & 0xffffLL);
+      } else if (Ty->isFP128Ty() || Ty->isPPC_FP128Ty()) {
+        APInt api = CFP->getValueAPF().bitcastToAPInt();
+        const uint64_t *p = api.getRawData();
+        Record.push_back(p[0]);
+        Record.push_back(p[1]);
+      } else {
+        assert (0 && "Unknown FP type!");
+      }
+    } else if (isa<ConstantArray>(C) && cast<ConstantArray>(C)->isString()) {
+      const ConstantArray *CA = cast<ConstantArray>(C);
+      // Emit constant strings specially.
+      unsigned NumOps = CA->getNumOperands();
+      // If this is a null-terminated string, use the denser CSTRING encoding.
+      if (CA->getOperand(NumOps-1)->isNullValue()) {
+        Code = bitc::CST_CODE_CSTRING;
+        --NumOps;  // Don't encode the null, which isn't allowed by char6.
+      } else {
+        Code = bitc::CST_CODE_STRING;
+        AbbrevToUse = String8Abbrev;
+      }
+      bool isCStr7 = Code == bitc::CST_CODE_CSTRING;
+      bool isCStrChar6 = Code == bitc::CST_CODE_CSTRING;
+      for (unsigned i = 0; i != NumOps; ++i) {
+        unsigned char V = cast<ConstantInt>(CA->getOperand(i))->getZExtValue();
+        Record.push_back(V);
+        isCStr7 &= (V & 128) == 0;
+        if (isCStrChar6)
+          isCStrChar6 = BitCodeAbbrevOp::isChar6(V);
+      }
+
+      if (isCStrChar6)
+        AbbrevToUse = CString6Abbrev;
+      else if (isCStr7)
+        AbbrevToUse = CString7Abbrev;
+    } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(V) ||
+               isa<ConstantVector>(V)) {
+      Code = bitc::CST_CODE_AGGREGATE;
+      for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+        Record.push_back(VE.getValueID(C->getOperand(i)));
+      AbbrevToUse = AggregateAbbrev;
+    } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+      switch (CE->getOpcode()) {
+      default:
+        if (Instruction::isCast(CE->getOpcode())) {
+          Code = bitc::CST_CODE_CE_CAST;
+          Record.push_back(GetEncodedCastOpcode(CE->getOpcode()));
+          Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+          Record.push_back(VE.getValueID(C->getOperand(0)));
+          AbbrevToUse = CONSTANTS_CE_CAST_Abbrev;
+        } else {
+          assert(CE->getNumOperands() == 2 && "Unknown constant expr!");
+          Code = bitc::CST_CODE_CE_BINOP;
+          Record.push_back(GetEncodedBinaryOpcode(CE->getOpcode()));
+          Record.push_back(VE.getValueID(C->getOperand(0)));
+          Record.push_back(VE.getValueID(C->getOperand(1)));
+          uint64_t Flags = GetOptimizationFlags(CE);
+          if (Flags != 0)
+            Record.push_back(Flags);
+        }
+        break;
+      case Instruction::GetElementPtr:
+        Code = bitc::CST_CODE_CE_GEP;
+        if (cast<GEPOperator>(C)->isInBounds())
+          Code = bitc::CST_CODE_CE_INBOUNDS_GEP;
+        for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
+          Record.push_back(VE.getTypeID(C->getOperand(i)->getType()));
+          Record.push_back(VE.getValueID(C->getOperand(i)));
+        }
+        break;
+      case Instruction::Select:
+        Code = bitc::CST_CODE_CE_SELECT;
+        Record.push_back(VE.getValueID(C->getOperand(0)));
+        Record.push_back(VE.getValueID(C->getOperand(1)));
+        Record.push_back(VE.getValueID(C->getOperand(2)));
+        break;
+      case Instruction::ExtractElement:
+        Code = bitc::CST_CODE_CE_EXTRACTELT;
+        Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+        Record.push_back(VE.getValueID(C->getOperand(0)));
+        Record.push_back(VE.getValueID(C->getOperand(1)));
+        break;
+      case Instruction::InsertElement:
+        Code = bitc::CST_CODE_CE_INSERTELT;
+        Record.push_back(VE.getValueID(C->getOperand(0)));
+        Record.push_back(VE.getValueID(C->getOperand(1)));
+        Record.push_back(VE.getValueID(C->getOperand(2)));
+        break;
+      case Instruction::ShuffleVector:
+        // If the return type and argument types are the same, this is a
+        // standard shufflevector instruction.  If the types are different,
+        // then the shuffle is widening or truncating the input vectors, and
+        // the argument type must also be encoded.
+        if (C->getType() == C->getOperand(0)->getType()) {
+          Code = bitc::CST_CODE_CE_SHUFFLEVEC;
+        } else {
+          Code = bitc::CST_CODE_CE_SHUFVEC_EX;
+          Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+        }
+        Record.push_back(VE.getValueID(C->getOperand(0)));
+        Record.push_back(VE.getValueID(C->getOperand(1)));
+        Record.push_back(VE.getValueID(C->getOperand(2)));
+        break;
+      case Instruction::ICmp:
+      case Instruction::FCmp:
+        Code = bitc::CST_CODE_CE_CMP;
+        Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
+        Record.push_back(VE.getValueID(C->getOperand(0)));
+        Record.push_back(VE.getValueID(C->getOperand(1)));
+        Record.push_back(CE->getPredicate());
+        break;
+      }
+    } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+      assert(BA->getFunction() == BA->getBasicBlock()->getParent() &&
+             "Malformed blockaddress");
+      Code = bitc::CST_CODE_BLOCKADDRESS;
+      Record.push_back(VE.getTypeID(BA->getFunction()->getType()));
+      Record.push_back(VE.getValueID(BA->getFunction()));
+      Record.push_back(VE.getGlobalBasicBlockID(BA->getBasicBlock()));
+    } else {
+#ifndef NDEBUG
+      C->dump();
+#endif
+      llvm_unreachable("Unknown constant!");
+    }
+    Stream.EmitRecord(Code, Record, AbbrevToUse);
+    Record.clear();
+  }
+
+  Stream.ExitBlock();
+}
+
+static void WriteModuleConstants(const ValueEnumerator &VE,
+                                 BitstreamWriter &Stream) {
+  const ValueEnumerator::ValueList &Vals = VE.getValues();
+
+  // Find the first constant to emit, which is the first non-globalvalue value.
+  // We know globalvalues have been emitted by WriteModuleInfo.
+  for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+    if (!isa<GlobalValue>(Vals[i].first)) {
+      WriteConstants(i, Vals.size(), VE, Stream, true);
+      return;
+    }
+  }
+}
+
+/// PushValueAndType - The file has to encode both the value and type id for
+/// many values, because we need to know what type to create for forward
+/// references.  However, most operands are not forward references, so this type
+/// field is not needed.
+///
+/// This function adds V's value ID to Vals.  If the value ID is higher than the
+/// instruction ID, then it is a forward reference, and it also includes the
+/// type ID.
+static bool PushValueAndType(const Value *V, unsigned InstID,
+                             SmallVector<unsigned, 64> &Vals,
+                             ValueEnumerator &VE) {
+  unsigned ValID = VE.getValueID(V);
+  Vals.push_back(ValID);
+  if (ValID >= InstID) {
+    Vals.push_back(VE.getTypeID(V->getType()));
+    return true;
+  }
+  return false;
+}
+
+/// WriteInstruction - Emit an instruction to the specified stream.
+static void WriteInstruction(const Instruction &I, unsigned InstID,
+                             ValueEnumerator &VE, BitstreamWriter &Stream,
+                             SmallVector<unsigned, 64> &Vals) {
+  unsigned Code = 0;
+  unsigned AbbrevToUse = 0;
+  VE.setInstructionID(&I);
+  switch (I.getOpcode()) {
+  default:
+    if (Instruction::isCast(I.getOpcode())) {
+      Code = bitc::FUNC_CODE_INST_CAST;
+      if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
+        AbbrevToUse = FUNCTION_INST_CAST_ABBREV;
+      Vals.push_back(VE.getTypeID(I.getType()));
+      Vals.push_back(GetEncodedCastOpcode(I.getOpcode()));
+    } else {
+      assert(isa<BinaryOperator>(I) && "Unknown instruction!");
+      Code = bitc::FUNC_CODE_INST_BINOP;
+      if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
+        AbbrevToUse = FUNCTION_INST_BINOP_ABBREV;
+      Vals.push_back(VE.getValueID(I.getOperand(1)));
+      Vals.push_back(GetEncodedBinaryOpcode(I.getOpcode()));
+      uint64_t Flags = GetOptimizationFlags(&I);
+      if (Flags != 0) {
+        if (AbbrevToUse == FUNCTION_INST_BINOP_ABBREV)
+          AbbrevToUse = FUNCTION_INST_BINOP_FLAGS_ABBREV;
+        Vals.push_back(Flags);
+      }
+    }
+    break;
+
+  case Instruction::GetElementPtr:
+    Code = bitc::FUNC_CODE_INST_GEP;
+    if (cast<GEPOperator>(&I)->isInBounds())
+      Code = bitc::FUNC_CODE_INST_INBOUNDS_GEP;
+    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+      PushValueAndType(I.getOperand(i), InstID, Vals, VE);
+    break;
+  case Instruction::ExtractValue: {
+    Code = bitc::FUNC_CODE_INST_EXTRACTVAL;
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+    const ExtractValueInst *EVI = cast<ExtractValueInst>(&I);
+    for (const unsigned *i = EVI->idx_begin(), *e = EVI->idx_end(); i != e; ++i)
+      Vals.push_back(*i);
+    break;
+  }
+  case Instruction::InsertValue: {
+    Code = bitc::FUNC_CODE_INST_INSERTVAL;
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+    PushValueAndType(I.getOperand(1), InstID, Vals, VE);
+    const InsertValueInst *IVI = cast<InsertValueInst>(&I);
+    for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i)
+      Vals.push_back(*i);
+    break;
+  }
+  case Instruction::Select:
+    Code = bitc::FUNC_CODE_INST_VSELECT;
+    PushValueAndType(I.getOperand(1), InstID, Vals, VE);
+    Vals.push_back(VE.getValueID(I.getOperand(2)));
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+    break;
+  case Instruction::ExtractElement:
+    Code = bitc::FUNC_CODE_INST_EXTRACTELT;
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+    Vals.push_back(VE.getValueID(I.getOperand(1)));
+    break;
+  case Instruction::InsertElement:
+    Code = bitc::FUNC_CODE_INST_INSERTELT;
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+    Vals.push_back(VE.getValueID(I.getOperand(1)));
+    Vals.push_back(VE.getValueID(I.getOperand(2)));
+    break;
+  case Instruction::ShuffleVector:
+    Code = bitc::FUNC_CODE_INST_SHUFFLEVEC;
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+    Vals.push_back(VE.getValueID(I.getOperand(1)));
+    Vals.push_back(VE.getValueID(I.getOperand(2)));
+    break;
+  case Instruction::ICmp:
+  case Instruction::FCmp:
+    // compare returning Int1Ty or vector of Int1Ty
+    Code = bitc::FUNC_CODE_INST_CMP2;
+    PushValueAndType(I.getOperand(0), InstID, Vals, VE);
+    Vals.push_back(VE.getValueID(I.getOperand(1)));
+    Vals.push_back(cast<CmpInst>(I).getPredicate());
+    break;
+
+  case Instruction::Ret:
+    {
+      Code = bitc::FUNC_CODE_INST_RET;
+      unsigned NumOperands = I.getNumOperands();
+      if (NumOperands == 0)
+        AbbrevToUse = FUNCTION_INST_RET_VOID_ABBREV;
+      else if (NumOperands == 1) {
+        if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))
+          AbbrevToUse = FUNCTION_INST_RET_VAL_ABBREV;
+      } else {
+        for (unsigned i = 0, e = NumOperands; i != e; ++i)
+          PushValueAndType(I.getOperand(i), InstID, Vals, VE);
+      }
+    }
+    break;
+  case Instruction::Br:
+    {
+      Code = bitc::FUNC_CODE_INST_BR;
+      BranchInst &II = cast<BranchInst>(I);
+      Vals.push_back(VE.getValueID(II.getSuccessor(0)));
+      if (II.isConditional()) {
+        Vals.push_back(VE.getValueID(II.getSuccessor(1)));
+        Vals.push_back(VE.getValueID(II.getCondition()));
+      }
+    }
+    break;
+  case Instruction::Switch:
+    Code = bitc::FUNC_CODE_INST_SWITCH;
+    Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
+    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+      Vals.push_back(VE.getValueID(I.getOperand(i)));
+    break;
+  case Instruction::IndirectBr:
+    Code = bitc::FUNC_CODE_INST_INDIRECTBR;
+    Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
+    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+      Vals.push_back(VE.getValueID(I.getOperand(i)));
+    break;
+      
+  case Instruction::Invoke: {
+    const InvokeInst *II = cast<InvokeInst>(&I);
+    const Value *Callee(II->getCalledValue());
+    const PointerType *PTy = cast<PointerType>(Callee->getType());
+    const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    Code = bitc::FUNC_CODE_INST_INVOKE;
+
+    Vals.push_back(VE.getAttributeID(II->getAttributes()));
+    Vals.push_back(II->getCallingConv());
+    Vals.push_back(VE.getValueID(II->getNormalDest()));
+    Vals.push_back(VE.getValueID(II->getUnwindDest()));
+    PushValueAndType(Callee, InstID, Vals, VE);
+
+    // Emit value #'s for the fixed parameters.
+    for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+      Vals.push_back(VE.getValueID(I.getOperand(i)));  // fixed param.
+
+    // Emit type/value pairs for varargs params.
+    if (FTy->isVarArg()) {
+      for (unsigned i = FTy->getNumParams(), e = I.getNumOperands()-3;
+           i != e; ++i)
+        PushValueAndType(I.getOperand(i), InstID, Vals, VE); // vararg
+    }
+    break;
+  }
+  case Instruction::Unwind:
+    Code = bitc::FUNC_CODE_INST_UNWIND;
+    break;
+  case Instruction::Unreachable:
+    Code = bitc::FUNC_CODE_INST_UNREACHABLE;
+    AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
+    break;
+
+  case Instruction::PHI:
+    Code = bitc::FUNC_CODE_INST_PHI;
+    Vals.push_back(VE.getTypeID(I.getType()));
+    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+      Vals.push_back(VE.getValueID(I.getOperand(i)));
+    break;
+
+  case Instruction::Alloca:
+    Code = bitc::FUNC_CODE_INST_ALLOCA;
+    Vals.push_back(VE.getTypeID(I.getType()));
+    Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
+    Vals.push_back(VE.getValueID(I.getOperand(0))); // size.
+    Vals.push_back(Log2_32(cast<AllocaInst>(I).getAlignment())+1);
+    break;
+
+  case Instruction::Load:
+    Code = bitc::FUNC_CODE_INST_LOAD;
+    if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE))  // ptr
+      AbbrevToUse = FUNCTION_INST_LOAD_ABBREV;
+
+    Vals.push_back(Log2_32(cast<LoadInst>(I).getAlignment())+1);
+    Vals.push_back(cast<LoadInst>(I).isVolatile());
+    break;
+  case Instruction::Store:
+    Code = bitc::FUNC_CODE_INST_STORE2;
+    PushValueAndType(I.getOperand(1), InstID, Vals, VE);  // ptrty + ptr
+    Vals.push_back(VE.getValueID(I.getOperand(0)));       // val.
+    Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
+    Vals.push_back(cast<StoreInst>(I).isVolatile());
+    break;
+  case Instruction::Call: {
+    const CallInst &CI = cast<CallInst>(I);
+    const PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType());
+    const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+
+    Code = bitc::FUNC_CODE_INST_CALL2;
+
+    Vals.push_back(VE.getAttributeID(CI.getAttributes()));
+    Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()));
+    PushValueAndType(CI.getCalledValue(), InstID, Vals, VE);  // Callee
+
+    // Emit value #'s for the fixed parameters.
+    for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+      Vals.push_back(VE.getValueID(CI.getArgOperand(i)));  // fixed param.
+
+    // Emit type/value pairs for varargs params.
+    if (FTy->isVarArg()) {
+      for (unsigned i = FTy->getNumParams(), e = CI.getNumArgOperands();
+           i != e; ++i)
+        PushValueAndType(CI.getArgOperand(i), InstID, Vals, VE);  // varargs
+    }
+    break;
+  }
+  case Instruction::VAArg:
+    Code = bitc::FUNC_CODE_INST_VAARG;
+    Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));   // valistty
+    Vals.push_back(VE.getValueID(I.getOperand(0))); // valist.
+    Vals.push_back(VE.getTypeID(I.getType())); // restype.
+    break;
+  }
+
+  Stream.EmitRecord(Code, Vals, AbbrevToUse);
+  Vals.clear();
+}
+
+// Emit names for globals/functions etc.
+static void WriteValueSymbolTable(const ValueSymbolTable &VST,
+                                  const ValueEnumerator &VE,
+                                  BitstreamWriter &Stream) {
+  if (VST.empty()) return;
+  Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
+
+  // FIXME: Set up the abbrev, we know how many values there are!
+  // FIXME: We know if the type names can use 7-bit ascii.
+  SmallVector<unsigned, 64> NameVals;
+
+  for (ValueSymbolTable::const_iterator SI = VST.begin(), SE = VST.end();
+       SI != SE; ++SI) {
+
+    const ValueName &Name = *SI;
+
+    // Figure out the encoding to use for the name.
+    bool is7Bit = true;
+    bool isChar6 = true;
+    for (const char *C = Name.getKeyData(), *E = C+Name.getKeyLength();
+         C != E; ++C) {
+      if (isChar6)
+        isChar6 = BitCodeAbbrevOp::isChar6(*C);
+      if ((unsigned char)*C & 128) {
+        is7Bit = false;
+        break;  // don't bother scanning the rest.
+      }
+    }
+
+    unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
+
+    // VST_ENTRY:   [valueid, namechar x N]
+    // VST_BBENTRY: [bbid, namechar x N]
+    unsigned Code;
+    if (isa<BasicBlock>(SI->getValue())) {
+      Code = bitc::VST_CODE_BBENTRY;
+      if (isChar6)
+        AbbrevToUse = VST_BBENTRY_6_ABBREV;
+    } else {
+      Code = bitc::VST_CODE_ENTRY;
+      if (isChar6)
+        AbbrevToUse = VST_ENTRY_6_ABBREV;
+      else if (is7Bit)
+        AbbrevToUse = VST_ENTRY_7_ABBREV;
+    }
+
+    NameVals.push_back(VE.getValueID(SI->getValue()));
+    for (const char *P = Name.getKeyData(),
+         *E = Name.getKeyData()+Name.getKeyLength(); P != E; ++P)
+      NameVals.push_back((unsigned char)*P);
+
+    // Emit the finished record.
+    Stream.EmitRecord(Code, NameVals, AbbrevToUse);
+    NameVals.clear();
+  }
+  Stream.ExitBlock();
+}
+
+/// WriteFunction - Emit a function body to the module stream.
+static void WriteFunction(const Function &F, ValueEnumerator &VE,
+                          BitstreamWriter &Stream) {
+  Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4);
+  VE.incorporateFunction(F);
+
+  SmallVector<unsigned, 64> Vals;
+
+  // Emit the number of basic blocks, so the reader can create them ahead of
+  // time.
+  Vals.push_back(VE.getBasicBlocks().size());
+  Stream.EmitRecord(bitc::FUNC_CODE_DECLAREBLOCKS, Vals);
+  Vals.clear();
+
+  // If there are function-local constants, emit them now.
+  unsigned CstStart, CstEnd;
+  VE.getFunctionConstantRange(CstStart, CstEnd);
+  WriteConstants(CstStart, CstEnd, VE, Stream, false);
+
+  // If there is function-local metadata, emit it now.
+  WriteFunctionLocalMetadata(F, VE, Stream);
+
+  // Keep a running idea of what the instruction ID is.
+  unsigned InstID = CstEnd;
+
+  bool NeedsMetadataAttachment = false;
+  
+  DebugLoc LastDL;
+  
+  // Finally, emit all the instructions, in order.
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      WriteInstruction(*I, InstID, VE, Stream, Vals);
+      
+      if (!I->getType()->isVoidTy())
+        ++InstID;
+      
+      // If the instruction has metadata, write a metadata attachment later.
+      NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc();
+      
+      // If the instruction has a debug location, emit it.
+      DebugLoc DL = I->getDebugLoc();
+      if (DL.isUnknown()) {
+        // nothing todo.
+      } else if (DL == LastDL) {
+        // Just repeat the same debug loc as last time.
+        Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals);
+      } else {
+        MDNode *Scope, *IA;
+        DL.getScopeAndInlinedAt(Scope, IA, I->getContext());
+        
+        Vals.push_back(DL.getLine());
+        Vals.push_back(DL.getCol());
+        Vals.push_back(Scope ? VE.getValueID(Scope)+1 : 0);
+        Vals.push_back(IA ? VE.getValueID(IA)+1 : 0);
+        Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC2, Vals);
+        Vals.clear();
+        
+        LastDL = DL;
+      }
+    }
+
+  // Emit names for all the instructions etc.
+  WriteValueSymbolTable(F.getValueSymbolTable(), VE, Stream);
+
+  if (NeedsMetadataAttachment)
+    WriteMetadataAttachment(F, VE, Stream);
+  VE.purgeFunction();
+  Stream.ExitBlock();
+}
+
+/// WriteTypeSymbolTable - Emit a block for the specified type symtab.
+static void WriteTypeSymbolTable(const TypeSymbolTable &TST,
+                                 const ValueEnumerator &VE,
+                                 BitstreamWriter &Stream) {
+  if (TST.empty()) return;
+
+  Stream.EnterSubblock(bitc::TYPE_SYMTAB_BLOCK_ID, 3);
+
+  // 7-bit fixed width VST_CODE_ENTRY strings.
+  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+  unsigned V7Abbrev = Stream.EmitAbbrev(Abbv);
+
+  SmallVector<unsigned, 64> NameVals;
+
+  for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end();
+       TI != TE; ++TI) {
+    // TST_ENTRY: [typeid, namechar x N]
+    NameVals.push_back(VE.getTypeID(TI->second));
+
+    const std::string &Str = TI->first;
+    bool is7Bit = true;
+    for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+      NameVals.push_back((unsigned char)Str[i]);
+      if (Str[i] & 128)
+        is7Bit = false;
+    }
+
+    // Emit the finished record.
+    Stream.EmitRecord(bitc::VST_CODE_ENTRY, NameVals, is7Bit ? V7Abbrev : 0);
+    NameVals.clear();
+  }
+
+  Stream.ExitBlock();
+}
+
+// Emit blockinfo, which defines the standard abbreviations etc.
+static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
+  // We only want to emit block info records for blocks that have multiple
+  // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK.  Other
+  // blocks can defined their abbrevs inline.
+  Stream.EnterBlockInfoBlock(2);
+
+  { // 8-bit fixed-width VST_ENTRY/VST_BBENTRY strings.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8));
+    if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+                                   Abbv) != VST_ENTRY_8_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // 7-bit fixed width VST_ENTRY strings.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
+    if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+                                   Abbv) != VST_ENTRY_7_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // 6-bit char6 VST_ENTRY strings.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+    if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+                                   Abbv) != VST_ENTRY_6_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // 6-bit char6 VST_BBENTRY strings.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_BBENTRY));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+    if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID,
+                                   Abbv) != VST_BBENTRY_6_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+
+
+  { // SETTYPE abbrev for CONSTANTS_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                              Log2_32_Ceil(VE.getTypes().size()+1)));
+    if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+                                   Abbv) != CONSTANTS_SETTYPE_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // INTEGER abbrev for CONSTANTS_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_INTEGER));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+    if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+                                   Abbv) != CONSTANTS_INTEGER_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // CE_CAST abbrev for CONSTANTS_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4));  // cast opc
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,       // typeid
+                              Log2_32_Ceil(VE.getTypes().size()+1)));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));    // value id
+
+    if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+                                   Abbv) != CONSTANTS_CE_CAST_Abbrev)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // NULL abbrev for CONSTANTS_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_NULL));
+    if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID,
+                                   Abbv) != CONSTANTS_NULL_Abbrev)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  // FIXME: This should only use space for first class types!
+
+  { // INST_LOAD abbrev for FUNCTION_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Ptr
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // Align
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // volatile
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_LOAD_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_BINOP abbrev for FUNCTION_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_BINOP_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_BINOP_FLAGS abbrev for FUNCTION_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // flags
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_BINOP_FLAGS_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_CAST abbrev for FUNCTION_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));    // OpVal
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,       // dest ty
+                              Log2_32_Ceil(VE.getTypes().size()+1)));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4));  // opc
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_CAST_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // INST_RET abbrev for FUNCTION_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_RET_VOID_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_RET abbrev for FUNCTION_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET));
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ValID
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_RET_VAL_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_UNREACHABLE abbrev for FUNCTION_BLOCK.
+    BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+    Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNREACHABLE));
+    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_UNREACHABLE_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  Stream.ExitBlock();
+}
+
+
+/// WriteModule - Emit the specified module to the bitstream.
+static void WriteModule(const Module *M, BitstreamWriter &Stream) {
+  Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
+
+  // Emit the version number if it is non-zero.
+  if (CurVersion) {
+    SmallVector<unsigned, 1> Vals;
+    Vals.push_back(CurVersion);
+    Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals);
+  }
+
+  // Analyze the module, enumerating globals, functions, etc.
+  ValueEnumerator VE(M);
+
+  // Emit blockinfo, which defines the standard abbreviations etc.
+  WriteBlockInfo(VE, Stream);
+
+  // Emit information about parameter attributes.
+  WriteAttributeTable(VE, Stream);
+
+  // Emit information describing all of the types in the module.
+  WriteTypeTable(VE, Stream);
+
+  // Emit top-level description of module, including target triple, inline asm,
+  // descriptors for global variables, and function prototype info.
+  WriteModuleInfo(M, VE, Stream);
+
+  // Emit constants.
+  WriteModuleConstants(VE, Stream);
+
+  // Emit metadata.
+  WriteModuleMetadata(M, VE, Stream);
+
+  // Emit function bodies.
+  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
+    if (!I->isDeclaration())
+      WriteFunction(*I, VE, Stream);
+
+  // Emit metadata.
+  WriteModuleMetadataStore(M, Stream);
+
+  // Emit the type symbol table information.
+  WriteTypeSymbolTable(M->getTypeSymbolTable(), VE, Stream);
+
+  // Emit names for globals/functions etc.
+  WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
+
+  Stream.ExitBlock();
+}
+
+/// EmitDarwinBCHeader - If generating a bc file on darwin, we have to emit a
+/// header and trailer to make it compatible with the system archiver.  To do
+/// this we emit the following header, and then emit a trailer that pads the
+/// file out to be a multiple of 16 bytes.
+///
+/// struct bc_header {
+///   uint32_t Magic;         // 0x0B17C0DE
+///   uint32_t Version;       // Version, currently always 0.
+///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
+///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
+///   uint32_t CPUType;       // CPU specifier.
+///   ... potentially more later ...
+/// };
+enum {
+  DarwinBCSizeFieldOffset = 3*4, // Offset to bitcode_size.
+  DarwinBCHeaderSize = 5*4
+};
+
+/// isARMTriplet - Return true if the triplet looks like:
+/// arm-*, thumb-*, armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*.
+static bool isARMTriplet(const std::string &TT) {
+  size_t Pos = 0;
+  size_t Size = TT.size();
+  if (Size >= 6 &&
+      TT[0] == 't' && TT[1] == 'h' && TT[2] == 'u' &&
+      TT[3] == 'm' && TT[4] == 'b')
+    Pos = 5;
+  else if (Size >= 4 && TT[0] == 'a' && TT[1] == 'r' && TT[2] == 'm')
+    Pos = 3;
+  else
+    return false;
+
+  if (TT[Pos] == '-')
+    return true;
+  else if (TT[Pos] == 'v') {
+    if (Size >= Pos+4 &&
+        TT[Pos+1] == '6' && TT[Pos+2] == 't' && TT[Pos+3] == '2')
+      return true;
+    else if (Size >= Pos+4 &&
+             TT[Pos+1] == '5' && TT[Pos+2] == 't' && TT[Pos+3] == 'e')
+      return true;
+  } else
+    return false;
+  while (++Pos < Size && TT[Pos] != '-') {
+    if (!isdigit(TT[Pos]))
+      return false;
+  }
+  return true;
+}
+
+static void EmitDarwinBCHeader(BitstreamWriter &Stream,
+                               const std::string &TT) {
+  unsigned CPUType = ~0U;
+
+  // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*,
+  // armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*. The CPUType is a magic
+  // number from /usr/include/mach/machine.h.  It is ok to reproduce the
+  // specific constants here because they are implicitly part of the Darwin ABI.
+  enum {
+    DARWIN_CPU_ARCH_ABI64      = 0x01000000,
+    DARWIN_CPU_TYPE_X86        = 7,
+    DARWIN_CPU_TYPE_ARM        = 12,
+    DARWIN_CPU_TYPE_POWERPC    = 18
+  };
+
+  if (TT.find("x86_64-") == 0)
+    CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64;
+  else if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
+           TT[4] == '-' && TT[1] - '3' < 6)
+    CPUType = DARWIN_CPU_TYPE_X86;
+  else if (TT.find("powerpc-") == 0)
+    CPUType = DARWIN_CPU_TYPE_POWERPC;
+  else if (TT.find("powerpc64-") == 0)
+    CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64;
+  else if (isARMTriplet(TT))
+    CPUType = DARWIN_CPU_TYPE_ARM;
+
+  // Traditional Bitcode starts after header.
+  unsigned BCOffset = DarwinBCHeaderSize;
+
+  Stream.Emit(0x0B17C0DE, 32);
+  Stream.Emit(0         , 32);  // Version.
+  Stream.Emit(BCOffset  , 32);
+  Stream.Emit(0         , 32);  // Filled in later.
+  Stream.Emit(CPUType   , 32);
+}
+
+/// EmitDarwinBCTrailer - Emit the darwin epilog after the bitcode file and
+/// finalize the header.
+static void EmitDarwinBCTrailer(BitstreamWriter &Stream, unsigned BufferSize) {
+  // Update the size field in the header.
+  Stream.BackpatchWord(DarwinBCSizeFieldOffset, BufferSize-DarwinBCHeaderSize);
+
+  // If the file is not a multiple of 16 bytes, insert dummy padding.
+  while (BufferSize & 15) {
+    Stream.Emit(0, 8);
+    ++BufferSize;
+  }
+}
+
+
+/// WriteBitcodeToFile - Write the specified module to the specified output
+/// stream.
+void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
+  std::vector<unsigned char> Buffer;
+  BitstreamWriter Stream(Buffer);
+
+  Buffer.reserve(256*1024);
+
+  WriteBitcodeToStream( M, Stream );
+
+  // Write the generated bitstream to "Out".
+  Out.write((char*)&Buffer.front(), Buffer.size());
+}
+
+/// WriteBitcodeToStream - Write the specified module to the specified output
+/// stream.
+void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) {
+  // If this is darwin or another generic macho target, emit a file header and
+  // trailer if needed.
+  bool isMacho =
+    M->getTargetTriple().find("-darwin") != std::string::npos ||
+    M->getTargetTriple().find("-macho") != std::string::npos;
+  if (isMacho)
+    EmitDarwinBCHeader(Stream, M->getTargetTriple());
+
+  // Emit the file header.
+  Stream.Emit((unsigned)'B', 8);
+  Stream.Emit((unsigned)'C', 8);
+  Stream.Emit(0x0, 4);
+  Stream.Emit(0xC, 4);
+  Stream.Emit(0xE, 4);
+  Stream.Emit(0xD, 4);
+
+  // Emit the module.
+  WriteModule(M, Stream);
+
+  if (isMacho)
+    EmitDarwinBCTrailer(Stream, Stream.getBuffer().size());
+}
diff --git a/final/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/final/lib/Bitcode/Writer/BitcodeWriterPass.cpp
new file mode 100644
index 00000000000..91e115cba6c
--- /dev/null
+++ b/final/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -0,0 +1,41 @@
+//===--- Bitcode/Writer/BitcodeWriterPass.cpp - Bitcode Writer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BitcodeWriterPass implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+  class WriteBitcodePass : public ModulePass {
+    raw_ostream &OS; // raw_ostream to print on
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit WriteBitcodePass(raw_ostream &o)
+      : ModulePass(ID), OS(o) {}
+    
+    const char *getPassName() const { return "Bitcode Writer"; }
+    
+    bool runOnModule(Module &M) {
+      WriteBitcodeToFile(&M, OS);
+      return false;
+    }
+  };
+}
+
+char WriteBitcodePass::ID = 0;
+
+/// createBitcodeWriterPass - Create and return a pass that writes the module
+/// to the specified ostream.
+ModulePass *llvm::createBitcodeWriterPass(raw_ostream &Str) {
+  return new WriteBitcodePass(Str);
+}
diff --git a/final/lib/Bitcode/Writer/CMakeLists.txt b/final/lib/Bitcode/Writer/CMakeLists.txt
new file mode 100644
index 00000000000..f097b097c33
--- /dev/null
+++ b/final/lib/Bitcode/Writer/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_library(LLVMBitWriter
+  BitWriter.cpp
+  BitcodeWriter.cpp
+  BitcodeWriterPass.cpp
+  ValueEnumerator.cpp
+  )
diff --git a/final/lib/Bitcode/Writer/Makefile b/final/lib/Bitcode/Writer/Makefile
new file mode 100644
index 00000000000..7b0bd72159a
--- /dev/null
+++ b/final/lib/Bitcode/Writer/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Bitcode/Reader/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMBitWriter
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Bitcode/Writer/ValueEnumerator.cpp b/final/lib/Bitcode/Writer/ValueEnumerator.cpp
new file mode 100644
index 00000000000..2f02262c36a
--- /dev/null
+++ b/final/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -0,0 +1,511 @@
+//===-- ValueEnumerator.cpp - Number values and types for bitcode writer --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ValueEnumerator class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ValueEnumerator.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/Instructions.h"
+#include <algorithm>
+using namespace llvm;
+
+static bool isSingleValueType(const std::pair<const llvm::Type*,
+                              unsigned int> &P) {
+  return P.first->isSingleValueType();
+}
+
+static bool isIntegerValue(const std::pair<const Value*, unsigned> &V) {
+  return V.first->getType()->isIntegerTy();
+}
+
+static bool CompareByFrequency(const std::pair<const llvm::Type*,
+                               unsigned int> &P1,
+                               const std::pair<const llvm::Type*,
+                               unsigned int> &P2) {
+  return P1.second > P2.second;
+}
+
+/// ValueEnumerator - Enumerate module-level information.
+ValueEnumerator::ValueEnumerator(const Module *M) {
+  // Enumerate the global variables.
+  for (Module::const_global_iterator I = M->global_begin(),
+         E = M->global_end(); I != E; ++I)
+    EnumerateValue(I);
+
+  // Enumerate the functions.
+  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+    EnumerateValue(I);
+    EnumerateAttributes(cast<Function>(I)->getAttributes());
+  }
+
+  // Enumerate the aliases.
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    EnumerateValue(I);
+
+  // Remember what is the cutoff between globalvalue's and other constants.
+  unsigned FirstConstant = Values.size();
+
+  // Enumerate the global variable initializers.
+  for (Module::const_global_iterator I = M->global_begin(),
+         E = M->global_end(); I != E; ++I)
+    if (I->hasInitializer())
+      EnumerateValue(I->getInitializer());
+
+  // Enumerate the aliasees.
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    EnumerateValue(I->getAliasee());
+
+  // Enumerate types used by the type symbol table.
+  EnumerateTypeSymbolTable(M->getTypeSymbolTable());
+
+  // Insert constants and metadata that are named at module level into the slot 
+  // pool so that the module symbol table can refer to them...
+  EnumerateValueSymbolTable(M->getValueSymbolTable());
+  EnumerateNamedMetadata(M);
+
+  SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
+
+  // Enumerate types used by function bodies and argument lists.
+  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+
+    for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+         I != E; ++I)
+      EnumerateType(I->getType());
+
+    for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){
+        for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+             OI != E; ++OI) {
+          if (MDNode *MD = dyn_cast<MDNode>(*OI))
+            if (MD->isFunctionLocal() && MD->getFunction())
+              // These will get enumerated during function-incorporation.
+              continue;
+          EnumerateOperandType(*OI);
+        }
+        EnumerateType(I->getType());
+        if (const CallInst *CI = dyn_cast<CallInst>(I))
+          EnumerateAttributes(CI->getAttributes());
+        else if (const InvokeInst *II = dyn_cast<InvokeInst>(I))
+          EnumerateAttributes(II->getAttributes());
+
+        // Enumerate metadata attached with this instruction.
+        MDs.clear();
+        I->getAllMetadataOtherThanDebugLoc(MDs);
+        for (unsigned i = 0, e = MDs.size(); i != e; ++i)
+          EnumerateMetadata(MDs[i].second);
+        
+        if (!I->getDebugLoc().isUnknown()) {
+          MDNode *Scope, *IA;
+          I->getDebugLoc().getScopeAndInlinedAt(Scope, IA, I->getContext());
+          if (Scope) EnumerateMetadata(Scope);
+          if (IA) EnumerateMetadata(IA);
+        }
+      }
+  }
+
+  // Optimize constant ordering.
+  OptimizeConstants(FirstConstant, Values.size());
+
+  // Sort the type table by frequency so that most commonly used types are early
+  // in the table (have low bit-width).
+  std::stable_sort(Types.begin(), Types.end(), CompareByFrequency);
+
+  // Partition the Type ID's so that the single-value types occur before the
+  // aggregate types.  This allows the aggregate types to be dropped from the
+  // type table after parsing the global variable initializers.
+  std::partition(Types.begin(), Types.end(), isSingleValueType);
+
+  // Now that we rearranged the type table, rebuild TypeMap.
+  for (unsigned i = 0, e = Types.size(); i != e; ++i)
+    TypeMap[Types[i].first] = i+1;
+}
+
+unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
+  InstructionMapType::const_iterator I = InstructionMap.find(Inst);
+  assert (I != InstructionMap.end() && "Instruction is not mapped!");
+  return I->second;
+}
+
+void ValueEnumerator::setInstructionID(const Instruction *I) {
+  InstructionMap[I] = InstructionCount++;
+}
+
+unsigned ValueEnumerator::getValueID(const Value *V) const {
+  if (isa<MDNode>(V) || isa<MDString>(V)) {
+    ValueMapType::const_iterator I = MDValueMap.find(V);
+    assert(I != MDValueMap.end() && "Value not in slotcalculator!");
+    return I->second-1;
+  }
+
+  ValueMapType::const_iterator I = ValueMap.find(V);
+  assert(I != ValueMap.end() && "Value not in slotcalculator!");
+  return I->second-1;
+}
+
+// Optimize constant ordering.
+namespace {
+  struct CstSortPredicate {
+    ValueEnumerator &VE;
+    explicit CstSortPredicate(ValueEnumerator &ve) : VE(ve) {}
+    bool operator()(const std::pair<const Value*, unsigned> &LHS,
+                    const std::pair<const Value*, unsigned> &RHS) {
+      // Sort by plane.
+      if (LHS.first->getType() != RHS.first->getType())
+        return VE.getTypeID(LHS.first->getType()) <
+               VE.getTypeID(RHS.first->getType());
+      // Then by frequency.
+      return LHS.second > RHS.second;
+    }
+  };
+}
+
+/// OptimizeConstants - Reorder constant pool for denser encoding.
+void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
+  if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
+
+  CstSortPredicate P(*this);
+  std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P);
+
+  // Ensure that integer constants are at the start of the constant pool.  This
+  // is important so that GEP structure indices come before gep constant exprs.
+  std::partition(Values.begin()+CstStart, Values.begin()+CstEnd,
+                 isIntegerValue);
+
+  // Rebuild the modified portion of ValueMap.
+  for (; CstStart != CstEnd; ++CstStart)
+    ValueMap[Values[CstStart].first] = CstStart+1;
+}
+
+
+/// EnumerateTypeSymbolTable - Insert all of the types in the specified symbol
+/// table.
+void ValueEnumerator::EnumerateTypeSymbolTable(const TypeSymbolTable &TST) {
+  for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end();
+       TI != TE; ++TI)
+    EnumerateType(TI->second);
+}
+
+/// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
+/// table into the values table.
+void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
+  for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
+       VI != VE; ++VI)
+    EnumerateValue(VI->getValue());
+}
+
+/// EnumerateNamedMetadata - Insert all of the values referenced by
+/// named metadata in the specified module.
+void ValueEnumerator::EnumerateNamedMetadata(const Module *M) {
+  for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+       E = M->named_metadata_end(); I != E; ++I)
+    EnumerateNamedMDNode(I);
+}
+
+void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode *MD) {
+  for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i)
+    EnumerateMetadata(MD->getOperand(i));
+}
+
+/// EnumerateMDNodeOperands - Enumerate all non-function-local values
+/// and types referenced by the given MDNode.
+void ValueEnumerator::EnumerateMDNodeOperands(const MDNode *N) {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (Value *V = N->getOperand(i)) {
+      if (isa<MDNode>(V) || isa<MDString>(V))
+        EnumerateMetadata(V);
+      else if (!isa<Instruction>(V) && !isa<Argument>(V))
+        EnumerateValue(V);
+    } else
+      EnumerateType(Type::getVoidTy(N->getContext()));
+  }
+}
+
+void ValueEnumerator::EnumerateMetadata(const Value *MD) {
+  assert((isa<MDNode>(MD) || isa<MDString>(MD)) && "Invalid metadata kind");
+
+  // Enumerate the type of this value.
+  EnumerateType(MD->getType());
+
+  const MDNode *N = dyn_cast<MDNode>(MD);
+
+  // In the module-level pass, skip function-local nodes themselves, but
+  // do walk their operands.
+  if (N && N->isFunctionLocal() && N->getFunction()) {
+    EnumerateMDNodeOperands(N);
+    return;
+  }
+
+  // Check to see if it's already in!
+  unsigned &MDValueID = MDValueMap[MD];
+  if (MDValueID) {
+    // Increment use count.
+    MDValues[MDValueID-1].second++;
+    return;
+  }
+  MDValues.push_back(std::make_pair(MD, 1U));
+  MDValueID = MDValues.size();
+
+  // Enumerate all non-function-local operands.
+  if (N)
+    EnumerateMDNodeOperands(N);
+}
+
+/// EnumerateFunctionLocalMetadataa - Incorporate function-local metadata
+/// information reachable from the given MDNode.
+void ValueEnumerator::EnumerateFunctionLocalMetadata(const MDNode *N) {
+  assert(N->isFunctionLocal() && N->getFunction() &&
+         "EnumerateFunctionLocalMetadata called on non-function-local mdnode!");
+
+  // Enumerate the type of this value.
+  EnumerateType(N->getType());
+
+  // Check to see if it's already in!
+  unsigned &MDValueID = MDValueMap[N];
+  if (MDValueID) {
+    // Increment use count.
+    MDValues[MDValueID-1].second++;
+    return;
+  }
+  MDValues.push_back(std::make_pair(N, 1U));
+  MDValueID = MDValues.size();
+
+  // To incoroporate function-local information visit all function-local
+  // MDNodes and all function-local values they reference.
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (Value *V = N->getOperand(i)) {
+      if (MDNode *O = dyn_cast<MDNode>(V)) {
+        if (O->isFunctionLocal() && O->getFunction())
+          EnumerateFunctionLocalMetadata(O);
+      } else if (isa<Instruction>(V) || isa<Argument>(V))
+        EnumerateValue(V);
+    }
+
+  // Also, collect all function-local MDNodes for easy access.
+  FunctionLocalMDs.push_back(N);
+}
+
+void ValueEnumerator::EnumerateValue(const Value *V) {
+  assert(!V->getType()->isVoidTy() && "Can't insert void values!");
+  assert(!isa<MDNode>(V) && !isa<MDString>(V) &&
+         "EnumerateValue doesn't handle Metadata!");
+
+  // Check to see if it's already in!
+  unsigned &ValueID = ValueMap[V];
+  if (ValueID) {
+    // Increment use count.
+    Values[ValueID-1].second++;
+    return;
+  }
+
+  // Enumerate the type of this value.
+  EnumerateType(V->getType());
+
+  if (const Constant *C = dyn_cast<Constant>(V)) {
+    if (isa<GlobalValue>(C)) {
+      // Initializers for globals are handled explicitly elsewhere.
+    } else if (isa<ConstantArray>(C) && cast<ConstantArray>(C)->isString()) {
+      // Do not enumerate the initializers for an array of simple characters.
+      // The initializers just polute the value table, and we emit the strings
+      // specially.
+    } else if (C->getNumOperands()) {
+      // If a constant has operands, enumerate them.  This makes sure that if a
+      // constant has uses (for example an array of const ints), that they are
+      // inserted also.
+
+      // We prefer to enumerate them with values before we enumerate the user
+      // itself.  This makes it more likely that we can avoid forward references
+      // in the reader.  We know that there can be no cycles in the constants
+      // graph that don't go through a global variable.
+      for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
+           I != E; ++I)
+        if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
+          EnumerateValue(*I);
+
+      // Finally, add the value.  Doing this could make the ValueID reference be
+      // dangling, don't reuse it.
+      Values.push_back(std::make_pair(V, 1U));
+      ValueMap[V] = Values.size();
+      return;
+    }
+  }
+
+  // Add the value.
+  Values.push_back(std::make_pair(V, 1U));
+  ValueID = Values.size();
+}
+
+
+void ValueEnumerator::EnumerateType(const Type *Ty) {
+  unsigned &TypeID = TypeMap[Ty];
+
+  if (TypeID) {
+    // If we've already seen this type, just increase its occurrence count.
+    Types[TypeID-1].second++;
+    return;
+  }
+
+  // First time we saw this type, add it.
+  Types.push_back(std::make_pair(Ty, 1U));
+  TypeID = Types.size();
+
+  // Enumerate subtypes.
+  for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+       I != E; ++I)
+    EnumerateType(*I);
+}
+
+// Enumerate the types for the specified value.  If the value is a constant,
+// walk through it, enumerating the types of the constant.
+void ValueEnumerator::EnumerateOperandType(const Value *V) {
+  EnumerateType(V->getType());
+  
+  if (const Constant *C = dyn_cast<Constant>(V)) {
+    // If this constant is already enumerated, ignore it, we know its type must
+    // be enumerated.
+    if (ValueMap.count(V)) return;
+
+    // This constant may have operands, make sure to enumerate the types in
+    // them.
+    for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+      const User *Op = C->getOperand(i);
+      
+      // Don't enumerate basic blocks here, this happens as operands to
+      // blockaddress.
+      if (isa<BasicBlock>(Op)) continue;
+      
+      EnumerateOperandType(Op);
+    }
+
+    if (const MDNode *N = dyn_cast<MDNode>(V)) {
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+        if (Value *Elem = N->getOperand(i))
+          EnumerateOperandType(Elem);
+    }
+  } else if (isa<MDString>(V) || isa<MDNode>(V))
+    EnumerateMetadata(V);
+}
+
+void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
+  if (PAL.isEmpty()) return;  // null is always 0.
+  // Do a lookup.
+  unsigned &Entry = AttributeMap[PAL.getRawPointer()];
+  if (Entry == 0) {
+    // Never saw this before, add it.
+    Attributes.push_back(PAL);
+    Entry = Attributes.size();
+  }
+}
+
+
+void ValueEnumerator::incorporateFunction(const Function &F) {
+  InstructionCount = 0;
+  NumModuleValues = Values.size();
+  NumModuleMDValues = MDValues.size();
+
+  // Adding function arguments to the value table.
+  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
+       I != E; ++I)
+    EnumerateValue(I);
+
+  FirstFuncConstantID = Values.size();
+
+  // Add all function-level constants to the value table.
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I)
+      for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+           OI != E; ++OI) {
+        if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
+            isa<InlineAsm>(*OI))
+          EnumerateValue(*OI);
+      }
+    BasicBlocks.push_back(BB);
+    ValueMap[BB] = BasicBlocks.size();
+  }
+
+  // Optimize the constant layout.
+  OptimizeConstants(FirstFuncConstantID, Values.size());
+
+  // Add the function's parameter attributes so they are available for use in
+  // the function's instruction.
+  EnumerateAttributes(F.getAttributes());
+
+  FirstInstID = Values.size();
+
+  SmallVector<MDNode *, 8> FnLocalMDVector;
+  // Add all of the instructions.
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
+      for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+           OI != E; ++OI) {
+        if (MDNode *MD = dyn_cast<MDNode>(*OI))
+          if (MD->isFunctionLocal() && MD->getFunction())
+            // Enumerate metadata after the instructions they might refer to.
+            FnLocalMDVector.push_back(MD);
+      }
+
+      SmallVector<std::pair<unsigned, MDNode*>, 8> MDs;
+      I->getAllMetadataOtherThanDebugLoc(MDs);
+      for (unsigned i = 0, e = MDs.size(); i != e; ++i) {
+        MDNode *N = MDs[i].second;
+        if (N->isFunctionLocal() && N->getFunction())
+          FnLocalMDVector.push_back(N);
+      }
+        
+      if (!I->getType()->isVoidTy())
+        EnumerateValue(I);
+    }
+  }
+
+  // Add all of the function-local metadata.
+  for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i)
+    EnumerateFunctionLocalMetadata(FnLocalMDVector[i]);
+}
+
+void ValueEnumerator::purgeFunction() {
+  /// Remove purged values from the ValueMap.
+  for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
+    ValueMap.erase(Values[i].first);
+  for (unsigned i = NumModuleMDValues, e = MDValues.size(); i != e; ++i)
+    MDValueMap.erase(MDValues[i].first);
+  for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
+    ValueMap.erase(BasicBlocks[i]);
+
+  Values.resize(NumModuleValues);
+  MDValues.resize(NumModuleMDValues);
+  BasicBlocks.clear();
+  FunctionLocalMDs.clear();
+}
+
+static void IncorporateFunctionInfoGlobalBBIDs(const Function *F,
+                                 DenseMap<const BasicBlock*, unsigned> &IDMap) {
+  unsigned Counter = 0;
+  for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    IDMap[BB] = ++Counter;
+}
+
+/// getGlobalBasicBlockID - This returns the function-specific ID for the
+/// specified basic block.  This is relatively expensive information, so it
+/// should only be used by rare constructs such as address-of-label.
+unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock *BB) const {
+  unsigned &Idx = GlobalBasicBlockIDs[BB];
+  if (Idx != 0)
+    return Idx-1;
+
+  IncorporateFunctionInfoGlobalBBIDs(BB->getParent(), GlobalBasicBlockIDs);
+  return getGlobalBasicBlockID(BB);
+}
+
diff --git a/final/lib/Bitcode/Writer/ValueEnumerator.h b/final/lib/Bitcode/Writer/ValueEnumerator.h
new file mode 100644
index 00000000000..cd1d2371b70
--- /dev/null
+++ b/final/lib/Bitcode/Writer/ValueEnumerator.h
@@ -0,0 +1,156 @@
+//===-- Bitcode/Writer/ValueEnumerator.h - Number values --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class gives values and types Unique ID's.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef VALUE_ENUMERATOR_H
+#define VALUE_ENUMERATOR_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Attributes.h"
+#include <vector>
+
+namespace llvm {
+
+class Type;
+class Value;
+class Instruction;
+class BasicBlock;
+class Function;
+class Module;
+class MDNode;
+class NamedMDNode;
+class AttrListPtr;
+class TypeSymbolTable;
+class ValueSymbolTable;
+class MDSymbolTable;
+
+class ValueEnumerator {
+public:
+  // For each type, we remember its Type* and occurrence frequency.
+  typedef std::vector<std::pair<const Type*, unsigned> > TypeList;
+
+  // For each value, we remember its Value* and occurrence frequency.
+  typedef std::vector<std::pair<const Value*, unsigned> > ValueList;
+private:
+  typedef DenseMap<const Type*, unsigned> TypeMapType;
+  TypeMapType TypeMap;
+  TypeList Types;
+
+  typedef DenseMap<const Value*, unsigned> ValueMapType;
+  ValueMapType ValueMap;
+  ValueList Values;
+  ValueList MDValues;
+  SmallVector<const MDNode *, 8> FunctionLocalMDs;
+  ValueMapType MDValueMap;
+  
+  typedef DenseMap<void*, unsigned> AttributeMapType;
+  AttributeMapType AttributeMap;
+  std::vector<AttrListPtr> Attributes;
+  
+  /// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by
+  /// the "getGlobalBasicBlockID" method.
+  mutable DenseMap<const BasicBlock*, unsigned> GlobalBasicBlockIDs;
+  
+  typedef DenseMap<const Instruction*, unsigned> InstructionMapType;
+  InstructionMapType InstructionMap;
+  unsigned InstructionCount;
+
+  /// BasicBlocks - This contains all the basic blocks for the currently
+  /// incorporated function.  Their reverse mapping is stored in ValueMap.
+  std::vector<const BasicBlock*> BasicBlocks;
+  
+  /// When a function is incorporated, this is the size of the Values list
+  /// before incorporation.
+  unsigned NumModuleValues;
+
+  /// When a function is incorporated, this is the size of the MDValues list
+  /// before incorporation.
+  unsigned NumModuleMDValues;
+
+  unsigned FirstFuncConstantID;
+  unsigned FirstInstID;
+  
+  ValueEnumerator(const ValueEnumerator &);  // DO NOT IMPLEMENT
+  void operator=(const ValueEnumerator &);   // DO NOT IMPLEMENT
+public:
+  ValueEnumerator(const Module *M);
+
+  unsigned getValueID(const Value *V) const;
+
+  unsigned getTypeID(const Type *T) const {
+    TypeMapType::const_iterator I = TypeMap.find(T);
+    assert(I != TypeMap.end() && "Type not in ValueEnumerator!");
+    return I->second-1;
+  }
+
+  unsigned getInstructionID(const Instruction *I) const;
+  void setInstructionID(const Instruction *I);
+
+  unsigned getAttributeID(const AttrListPtr &PAL) const {
+    if (PAL.isEmpty()) return 0;  // Null maps to zero.
+    AttributeMapType::const_iterator I = AttributeMap.find(PAL.getRawPointer());
+    assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!");
+    return I->second;
+  }
+
+  /// getFunctionConstantRange - Return the range of values that corresponds to
+  /// function-local constants.
+  void getFunctionConstantRange(unsigned &Start, unsigned &End) const {
+    Start = FirstFuncConstantID;
+    End = FirstInstID;
+  }
+  
+  const ValueList &getValues() const { return Values; }
+  const ValueList &getMDValues() const { return MDValues; }
+  const SmallVector<const MDNode *, 8> &getFunctionLocalMDValues() const { 
+    return FunctionLocalMDs;
+  }
+  const TypeList &getTypes() const { return Types; }
+  const std::vector<const BasicBlock*> &getBasicBlocks() const {
+    return BasicBlocks; 
+  }
+  const std::vector<AttrListPtr> &getAttributes() const {
+    return Attributes;
+  }
+  
+  /// getGlobalBasicBlockID - This returns the function-specific ID for the
+  /// specified basic block.  This is relatively expensive information, so it
+  /// should only be used by rare constructs such as address-of-label.
+  unsigned getGlobalBasicBlockID(const BasicBlock *BB) const;
+
+  /// incorporateFunction/purgeFunction - If you'd like to deal with a function,
+  /// use these two methods to get its data into the ValueEnumerator!
+  ///
+  void incorporateFunction(const Function &F);
+  void purgeFunction();
+
+private:
+  void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
+    
+  void EnumerateMDNodeOperands(const MDNode *N);
+  void EnumerateMetadata(const Value *MD);
+  void EnumerateFunctionLocalMetadata(const MDNode *N);
+  void EnumerateNamedMDNode(const NamedMDNode *NMD);
+  void EnumerateValue(const Value *V);
+  void EnumerateType(const Type *T);
+  void EnumerateOperandType(const Value *V);
+  void EnumerateAttributes(const AttrListPtr &PAL);
+  
+  void EnumerateTypeSymbolTable(const TypeSymbolTable &ST);
+  void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
+  void EnumerateNamedMetadata(const Module *M);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/CMakeLists.txt b/final/lib/CMakeLists.txt
new file mode 100644
index 00000000000..e2838c373a3
--- /dev/null
+++ b/final/lib/CMakeLists.txt
@@ -0,0 +1,14 @@
+# `Support' library is added on the top-level CMakeLists.txt
+
+add_subdirectory(VMCore)
+add_subdirectory(CodeGen)
+add_subdirectory(Bitcode)
+add_subdirectory(Transforms)
+add_subdirectory(Linker)
+add_subdirectory(Analysis)
+add_subdirectory(MC)
+add_subdirectory(Object)
+add_subdirectory(ExecutionEngine)
+add_subdirectory(Target)
+add_subdirectory(AsmParser)
+add_subdirectory(Archive)
diff --git a/final/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/final/lib/CodeGen/AggressiveAntiDepBreaker.cpp
new file mode 100644
index 00000000000..b520d8fcedc
--- /dev/null
+++ b/final/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -0,0 +1,963 @@
+//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "AggressiveAntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("agg-antidep-debugdiv",
+         cl::desc("Debug control for aggressive anti-dep breaker"),
+         cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("agg-antidep-debugmod",
+         cl::desc("Debug control for aggressive anti-dep breaker"),
+         cl::init(0), cl::Hidden);
+
+AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
+                                               MachineBasicBlock *BB) :
+  NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0),
+  GroupNodeIndices(TargetRegs, 0),
+  KillIndices(TargetRegs, 0),
+  DefIndices(TargetRegs, 0)
+{
+  const unsigned BBSize = BB->size();
+  for (unsigned i = 0; i < NumTargetRegs; ++i) {
+    // Initialize all registers to be in their own group. Initially we
+    // assign the register to the same-indexed GroupNode.
+    GroupNodeIndices[i] = i;
+    // Initialize the indices to indicate that no registers are live.
+    KillIndices[i] = ~0u;
+    DefIndices[i] = BBSize;
+  }
+}
+
+unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) {
+  unsigned Node = GroupNodeIndices[Reg];
+  while (GroupNodes[Node] != Node)
+    Node = GroupNodes[Node];
+
+  return Node;
+}
+
+void AggressiveAntiDepState::GetGroupRegs(
+  unsigned Group,
+  std::vector<unsigned> &Regs,
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs)
+{
+  for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) {
+    if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0))
+      Regs.push_back(Reg);
+  }
+}
+
+unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2)
+{
+  assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!");
+  assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!");
+
+  // find group for each register
+  unsigned Group1 = GetGroup(Reg1);
+  unsigned Group2 = GetGroup(Reg2);
+
+  // if either group is 0, then that must become the parent
+  unsigned Parent = (Group1 == 0) ? Group1 : Group2;
+  unsigned Other = (Parent == Group1) ? Group2 : Group1;
+  GroupNodes.at(Other) = Parent;
+  return Parent;
+}
+
+unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg)
+{
+  // Create a new GroupNode for Reg. Reg's existing GroupNode must
+  // stay as is because there could be other GroupNodes referring to
+  // it.
+  unsigned idx = GroupNodes.size();
+  GroupNodes.push_back(idx);
+  GroupNodeIndices[Reg] = idx;
+  return idx;
+}
+
+bool AggressiveAntiDepState::IsLive(unsigned Reg)
+{
+  // KillIndex must be defined and DefIndex not defined for a register
+  // to be live.
+  return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u));
+}
+
+
+
+AggressiveAntiDepBreaker::
+AggressiveAntiDepBreaker(MachineFunction& MFi,
+                         TargetSubtarget::RegClassVector& CriticalPathRCs) :
+  AntiDepBreaker(), MF(MFi),
+  MRI(MF.getRegInfo()),
+  TII(MF.getTarget().getInstrInfo()),
+  TRI(MF.getTarget().getRegisterInfo()),
+  AllocatableSet(TRI->getAllocatableSet(MF)),
+  State(NULL) {
+  /* Collect a bitset of all registers that are only broken if they
+     are on the critical path. */
+  for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
+    BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]);
+    if (CriticalPathSet.none())
+      CriticalPathSet = CPSet;
+    else
+      CriticalPathSet |= CPSet;
+   }
+
+  DEBUG(dbgs() << "AntiDep Critical-Path Registers:");
+  DEBUG(for (int r = CriticalPathSet.find_first(); r != -1;
+             r = CriticalPathSet.find_next(r))
+          dbgs() << " " << TRI->getName(r));
+  DEBUG(dbgs() << '\n');
+}
+
+AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
+  delete State;
+}
+
+void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+  assert(State == NULL);
+  State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
+
+  bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+  std::vector<unsigned> &KillIndices = State->GetKillIndices();
+  std::vector<unsigned> &DefIndices = State->GetDefIndices();
+
+  // Determine the live-out physregs for this block.
+  if (IsReturnBlock) {
+    // In a return block, examine the function live-out regs.
+    for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+         E = MRI.liveout_end(); I != E; ++I) {
+      for (const unsigned *Alias = TRI->getOverlaps(*I);
+           unsigned Reg = *Alias; ++Alias) {
+        State->UnionGroups(Reg, 0);
+        KillIndices[Reg] = BB->size();
+        DefIndices[Reg] = ~0u;
+      }
+    }
+  }
+
+  // In a non-return block, examine the live-in regs of all successors.
+  // Note a return block can have successors if the return instruction is
+  // predicated.
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+         SE = BB->succ_end(); SI != SE; ++SI)
+    for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+           E = (*SI)->livein_end(); I != E; ++I) {
+      for (const unsigned *Alias = TRI->getOverlaps(*I);
+           unsigned Reg = *Alias; ++Alias) {
+        State->UnionGroups(Reg, 0);
+        KillIndices[Reg] = BB->size();
+        DefIndices[Reg] = ~0u;
+      }
+    }
+
+  // Mark live-out callee-saved registers. In a return block this is
+  // all callee-saved registers. In non-return this is any
+  // callee-saved register that is not saved in the prolog.
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  BitVector Pristine = MFI->getPristineRegs(BB);
+  for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+    unsigned Reg = *I;
+    if (!IsReturnBlock && !Pristine.test(Reg)) continue;
+    for (const unsigned *Alias = TRI->getOverlaps(Reg);
+         unsigned AliasReg = *Alias; ++Alias) {
+      State->UnionGroups(AliasReg, 0);
+      KillIndices[AliasReg] = BB->size();
+      DefIndices[AliasReg] = ~0u;
+    }
+  }
+}
+
+void AggressiveAntiDepBreaker::FinishBlock() {
+  delete State;
+  State = NULL;
+}
+
+void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
+                                       unsigned InsertPosIndex) {
+  assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+  std::set<unsigned> PassthruRegs;
+  GetPassthruRegs(MI, PassthruRegs);
+  PrescanInstruction(MI, Count, PassthruRegs);
+  ScanInstruction(MI, Count);
+
+  DEBUG(dbgs() << "Observe: ");
+  DEBUG(MI->dump());
+  DEBUG(dbgs() << "\tRegs:");
+
+  std::vector<unsigned> &DefIndices = State->GetDefIndices();
+  for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+    // If Reg is current live, then mark that it can't be renamed as
+    // we don't know the extent of its live-range anymore (now that it
+    // has been scheduled). If it is not live but was defined in the
+    // previous schedule region, then set its def index to the most
+    // conservative location (i.e. the beginning of the previous
+    // schedule region).
+    if (State->IsLive(Reg)) {
+      DEBUG(if (State->GetGroup(Reg) != 0)
+              dbgs() << " " << TRI->getName(Reg) << "=g" <<
+                State->GetGroup(Reg) << "->g0(region live-out)");
+      State->UnionGroups(Reg, 0);
+    } else if ((DefIndices[Reg] < InsertPosIndex)
+               && (DefIndices[Reg] >= Count)) {
+      DefIndices[Reg] = Count;
+    }
+  }
+  DEBUG(dbgs() << '\n');
+}
+
+bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI,
+                                                MachineOperand& MO)
+{
+  if (!MO.isReg() || !MO.isImplicit())
+    return false;
+
+  unsigned Reg = MO.getReg();
+  if (Reg == 0)
+    return false;
+
+  MachineOperand *Op = NULL;
+  if (MO.isDef())
+    Op = MI->findRegisterUseOperand(Reg, true);
+  else
+    Op = MI->findRegisterDefOperand(Reg);
+
+  return((Op != NULL) && Op->isImplicit());
+}
+
+void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
+                                           std::set<unsigned>& PassthruRegs) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) ||
+        IsImplicitDefUse(MI, MO)) {
+      const unsigned Reg = MO.getReg();
+      PassthruRegs.insert(Reg);
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        PassthruRegs.insert(*Subreg);
+      }
+    }
+  }
+}
+
+/// AntiDepEdges - Return in Edges the anti- and output- dependencies
+/// in SU that we want to consider for breaking.
+static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) {
+  SmallSet<unsigned, 4> RegSet;
+  for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+       P != PE; ++P) {
+    if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
+      unsigned Reg = P->getReg();
+      if (RegSet.count(Reg) == 0) {
+        Edges.push_back(&*P);
+        RegSet.insert(Reg);
+      }
+    }
+  }
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static const SUnit *CriticalPathStep(const SUnit *SU) {
+  const SDep *Next = 0;
+  unsigned NextDepth = 0;
+  // Find the predecessor edge with the greatest depth.
+  if (SU != 0) {
+    for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+         P != PE; ++P) {
+      const SUnit *PredSU = P->getSUnit();
+      unsigned PredLatency = P->getLatency();
+      unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+      // In the case of a latency tie, prefer an anti-dependency edge over
+      // other types of edges.
+      if (NextDepth < PredTotalLatency ||
+          (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+        NextDepth = PredTotalLatency;
+        Next = &*P;
+      }
+    }
+  }
+
+  return (Next) ? Next->getSUnit() : 0;
+}
+
+void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
+                                             const char *tag,
+                                             const char *header,
+                                             const char *footer) {
+  std::vector<unsigned> &KillIndices = State->GetKillIndices();
+  std::vector<unsigned> &DefIndices = State->GetDefIndices();
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+    RegRefs = State->GetRegRefs();
+
+  if (!State->IsLive(Reg)) {
+    KillIndices[Reg] = KillIdx;
+    DefIndices[Reg] = ~0u;
+    RegRefs.erase(Reg);
+    State->LeaveGroup(Reg);
+    DEBUG(if (header != NULL) {
+        dbgs() << header << TRI->getName(Reg); header = NULL; });
+    DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag);
+  }
+  // Repeat for subregisters.
+  for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+       *Subreg; ++Subreg) {
+    unsigned SubregReg = *Subreg;
+    if (!State->IsLive(SubregReg)) {
+      KillIndices[SubregReg] = KillIdx;
+      DefIndices[SubregReg] = ~0u;
+      RegRefs.erase(SubregReg);
+      State->LeaveGroup(SubregReg);
+      DEBUG(if (header != NULL) {
+          dbgs() << header << TRI->getName(Reg); header = NULL; });
+      DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" <<
+            State->GetGroup(SubregReg) << tag);
+    }
+  }
+
+  DEBUG(if ((header == NULL) && (footer != NULL)) dbgs() << footer);
+}
+
+void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
+                                                  unsigned Count,
+                                             std::set<unsigned>& PassthruRegs) {
+  std::vector<unsigned> &DefIndices = State->GetDefIndices();
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+    RegRefs = State->GetRegRefs();
+
+  // Handle dead defs by simulating a last-use of the register just
+  // after the def. A dead def can occur because the def is truely
+  // dead, or because only a subregister is live at the def. If we
+  // don't do this the dead def will be incorrectly merged into the
+  // previous def.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+
+    HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n");
+  }
+
+  DEBUG(dbgs() << "\tDef Groups:");
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+
+    DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg));
+
+    // If MI's defs have a special allocation requirement, don't allow
+    // any def registers to be changed. Also assume all registers
+    // defined in a call must not be changed (ABI).
+    if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+        TII->isPredicated(MI)) {
+      DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+      State->UnionGroups(Reg, 0);
+    }
+
+    // Any aliased that are live at this point are completely or
+    // partially defined here, so group those aliases with Reg.
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      unsigned AliasReg = *Alias;
+      if (State->IsLive(AliasReg)) {
+        State->UnionGroups(Reg, AliasReg);
+        DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " <<
+              TRI->getName(AliasReg) << ")");
+      }
+    }
+
+    // Note register reference...
+    const TargetRegisterClass *RC = NULL;
+    if (i < MI->getDesc().getNumOperands())
+      RC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+    AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+    RegRefs.insert(std::make_pair(Reg, RR));
+  }
+
+  DEBUG(dbgs() << '\n');
+
+  // Scan the register defs for this instruction and update
+  // live-ranges.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+    // Ignore KILLs and passthru registers for liveness...
+    if (MI->isKill() || (PassthruRegs.count(Reg) != 0))
+      continue;
+
+    // Update def for Reg and aliases.
+    for (const unsigned *Alias = TRI->getOverlaps(Reg);
+         unsigned AliasReg = *Alias; ++Alias)
+      DefIndices[AliasReg] = Count;
+  }
+}
+
+void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
+                                               unsigned Count) {
+  DEBUG(dbgs() << "\tUse Groups:");
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+    RegRefs = State->GetRegRefs();
+
+  // If MI's uses have special allocation requirement, don't allow
+  // any use registers to be changed. Also assume all registers
+  // used in a call must not be changed (ABI).
+  // FIXME: The issue with predicated instruction is more complex. We are being
+  // conservatively here because the kill markers cannot be trusted after
+  // if-conversion:
+  // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+  // ...
+  // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
+  // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
+  // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+  //
+  // The first R6 kill is not really a kill since it's killed by a predicated
+  // instruction which may not be executed. The second R6 def may or may not
+  // re-define R6 so it's not safe to change it since the last R6 use cannot be
+  // changed.
+  bool Special = MI->getDesc().isCall() ||
+    MI->getDesc().hasExtraSrcRegAllocReq() ||
+    TII->isPredicated(MI);
+
+  // Scan the register uses for this instruction and update
+  // live-ranges, groups and RegRefs.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isUse()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+
+    DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" <<
+          State->GetGroup(Reg));
+
+    // It wasn't previously live but now it is, this is a kill. Forget
+    // the previous live-range information and start a new live-range
+    // for the register.
+    HandleLastUse(Reg, Count, "(last-use)");
+
+    if (Special) {
+      DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
+      State->UnionGroups(Reg, 0);
+    }
+
+    // Note register reference...
+    const TargetRegisterClass *RC = NULL;
+    if (i < MI->getDesc().getNumOperands())
+      RC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+    AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
+    RegRefs.insert(std::make_pair(Reg, RR));
+  }
+
+  DEBUG(dbgs() << '\n');
+
+  // Form a group of all defs and uses of a KILL instruction to ensure
+  // that all registers are renamed as a group.
+  if (MI->isKill()) {
+    DEBUG(dbgs() << "\tKill Group:");
+
+    unsigned FirstReg = 0;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+
+      if (FirstReg != 0) {
+        DEBUG(dbgs() << "=" << TRI->getName(Reg));
+        State->UnionGroups(FirstReg, Reg);
+      } else {
+        DEBUG(dbgs() << " " << TRI->getName(Reg));
+        FirstReg = Reg;
+      }
+    }
+
+    DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n');
+  }
+}
+
+BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) {
+  BitVector BV(TRI->getNumRegs(), false);
+  bool first = true;
+
+  // Check all references that need rewriting for Reg. For each, use
+  // the corresponding register class to narrow the set of registers
+  // that are appropriate for renaming.
+  std::pair<std::multimap<unsigned,
+                     AggressiveAntiDepState::RegisterReference>::iterator,
+            std::multimap<unsigned,
+                     AggressiveAntiDepState::RegisterReference>::iterator>
+    Range = State->GetRegRefs().equal_range(Reg);
+  for (std::multimap<unsigned,
+       AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first,
+       QE = Range.second; Q != QE; ++Q) {
+    const TargetRegisterClass *RC = Q->second.RC;
+    if (RC == NULL) continue;
+
+    BitVector RCBV = TRI->getAllocatableSet(MF, RC);
+    if (first) {
+      BV |= RCBV;
+      first = false;
+    } else {
+      BV &= RCBV;
+    }
+
+    DEBUG(dbgs() << " " << RC->getName());
+  }
+
+  return BV;
+}
+
+bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
+                                unsigned AntiDepGroupIndex,
+                                RenameOrderType& RenameOrder,
+                                std::map<unsigned, unsigned> &RenameMap) {
+  std::vector<unsigned> &KillIndices = State->GetKillIndices();
+  std::vector<unsigned> &DefIndices = State->GetDefIndices();
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+    RegRefs = State->GetRegRefs();
+
+  // Collect all referenced registers in the same group as
+  // AntiDepReg. These all need to be renamed together if we are to
+  // break the anti-dependence.
+  std::vector<unsigned> Regs;
+  State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs);
+  assert(Regs.size() > 0 && "Empty register group!");
+  if (Regs.size() == 0)
+    return false;
+
+  // Find the "superest" register in the group. At the same time,
+  // collect the BitVector of registers that can be used to rename
+  // each register.
+  DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex
+        << ":\n");
+  std::map<unsigned, BitVector> RenameRegisterMap;
+  unsigned SuperReg = 0;
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    unsigned Reg = Regs[i];
+    if ((SuperReg == 0) || TRI->isSuperRegister(SuperReg, Reg))
+      SuperReg = Reg;
+
+    // If Reg has any references, then collect possible rename regs
+    if (RegRefs.count(Reg) > 0) {
+      DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":");
+
+      BitVector BV = GetRenameRegisters(Reg);
+      RenameRegisterMap.insert(std::pair<unsigned, BitVector>(Reg, BV));
+
+      DEBUG(dbgs() << " ::");
+      DEBUG(for (int r = BV.find_first(); r != -1; r = BV.find_next(r))
+              dbgs() << " " << TRI->getName(r));
+      DEBUG(dbgs() << "\n");
+    }
+  }
+
+  // All group registers should be a subreg of SuperReg.
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    unsigned Reg = Regs[i];
+    if (Reg == SuperReg) continue;
+    bool IsSub = TRI->isSubRegister(SuperReg, Reg);
+    assert(IsSub && "Expecting group subregister");
+    if (!IsSub)
+      return false;
+  }
+
+#ifndef NDEBUG
+  // If DebugDiv > 0 then only rename (renamecnt % DebugDiv) == DebugMod
+  if (DebugDiv > 0) {
+    static int renamecnt = 0;
+    if (renamecnt++ % DebugDiv != DebugMod)
+      return false;
+
+    dbgs() << "*** Performing rename " << TRI->getName(SuperReg) <<
+      " for debug ***\n";
+  }
+#endif
+
+  // Check each possible rename register for SuperReg in round-robin
+  // order. If that register is available, and the corresponding
+  // registers are available for the other group subregisters, then we
+  // can use those registers to rename.
+
+  // FIXME: Using getMinimalPhysRegClass is very conservative. We should
+  // check every use of the register and find the largest register class
+  // that can be used in all of them.
+  const TargetRegisterClass *SuperRC =
+    TRI->getMinimalPhysRegClass(SuperReg, MVT::Other);
+
+  const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF);
+  const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF);
+  if (RB == RE) {
+    DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
+    return false;
+  }
+
+  DEBUG(dbgs() << "\tFind Registers:");
+
+  if (RenameOrder.count(SuperRC) == 0)
+    RenameOrder.insert(RenameOrderType::value_type(SuperRC, RE));
+
+  const TargetRegisterClass::iterator OrigR = RenameOrder[SuperRC];
+  const TargetRegisterClass::iterator EndR = ((OrigR == RE) ? RB : OrigR);
+  TargetRegisterClass::iterator R = OrigR;
+  do {
+    if (R == RB) R = RE;
+    --R;
+    const unsigned NewSuperReg = *R;
+    // Don't consider non-allocatable registers
+    if (!AllocatableSet.test(NewSuperReg)) continue;
+    // Don't replace a register with itself.
+    if (NewSuperReg == SuperReg) continue;
+
+    DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':');
+    RenameMap.clear();
+
+    // For each referenced group register (which must be a SuperReg or
+    // a subregister of SuperReg), find the corresponding subregister
+    // of NewSuperReg and make sure it is free to be renamed.
+    for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+      unsigned Reg = Regs[i];
+      unsigned NewReg = 0;
+      if (Reg == SuperReg) {
+        NewReg = NewSuperReg;
+      } else {
+        unsigned NewSubRegIdx = TRI->getSubRegIndex(SuperReg, Reg);
+        if (NewSubRegIdx != 0)
+          NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx);
+      }
+
+      DEBUG(dbgs() << " " << TRI->getName(NewReg));
+
+      // Check if Reg can be renamed to NewReg.
+      BitVector BV = RenameRegisterMap[Reg];
+      if (!BV.test(NewReg)) {
+        DEBUG(dbgs() << "(no rename)");
+        goto next_super_reg;
+      }
+
+      // If NewReg is dead and NewReg's most recent def is not before
+      // Regs's kill, it's safe to replace Reg with NewReg. We
+      // must also check all aliases of NewReg, because we can't define a
+      // register when any sub or super is already live.
+      if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) {
+        DEBUG(dbgs() << "(live)");
+        goto next_super_reg;
+      } else {
+        bool found = false;
+        for (const unsigned *Alias = TRI->getAliasSet(NewReg);
+             *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          if (State->IsLive(AliasReg) ||
+              (KillIndices[Reg] > DefIndices[AliasReg])) {
+            DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)");
+            found = true;
+            break;
+          }
+        }
+        if (found)
+          goto next_super_reg;
+      }
+
+      // Record that 'Reg' can be renamed to 'NewReg'.
+      RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg));
+    }
+
+    // If we fall-out here, then every register in the group can be
+    // renamed, as recorded in RenameMap.
+    RenameOrder.erase(SuperRC);
+    RenameOrder.insert(RenameOrderType::value_type(SuperRC, R));
+    DEBUG(dbgs() << "]\n");
+    return true;
+
+  next_super_reg:
+    DEBUG(dbgs() << ']');
+  } while (R != EndR);
+
+  DEBUG(dbgs() << '\n');
+
+  // No registers are free and available!
+  return false;
+}
+
+/// BreakAntiDependencies - Identifiy anti-dependencies within the
+/// ScheduleDAG and break them by renaming registers.
+///
+unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
+                              const std::vector<SUnit>& SUnits,
+                              MachineBasicBlock::iterator Begin,
+                              MachineBasicBlock::iterator End,
+                              unsigned InsertPosIndex) {
+  std::vector<unsigned> &KillIndices = State->GetKillIndices();
+  std::vector<unsigned> &DefIndices = State->GetDefIndices();
+  std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
+    RegRefs = State->GetRegRefs();
+
+  // The code below assumes that there is at least one instruction,
+  // so just duck out immediately if the block is empty.
+  if (SUnits.empty()) return 0;
+
+  // For each regclass the next register to use for renaming.
+  RenameOrderType RenameOrder;
+
+  // ...need a map from MI to SUnit.
+  std::map<MachineInstr *, const SUnit *> MISUnitMap;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    const SUnit *SU = &SUnits[i];
+    MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(),
+                                                               SU));
+  }
+
+  // Track progress along the critical path through the SUnit graph as
+  // we walk the instructions. This is needed for regclasses that only
+  // break critical-path anti-dependencies.
+  const SUnit *CriticalPathSU = 0;
+  MachineInstr *CriticalPathMI = 0;
+  if (CriticalPathSet.any()) {
+    for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+      const SUnit *SU = &SUnits[i];
+      if (!CriticalPathSU ||
+          ((SU->getDepth() + SU->Latency) >
+           (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
+        CriticalPathSU = SU;
+      }
+    }
+
+    CriticalPathMI = CriticalPathSU->getInstr();
+  }
+
+#ifndef NDEBUG
+  DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n");
+  DEBUG(dbgs() << "Available regs:");
+  for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+    if (!State->IsLive(Reg))
+      DEBUG(dbgs() << " " << TRI->getName(Reg));
+  }
+  DEBUG(dbgs() << '\n');
+#endif
+
+  // Attempt to break anti-dependence edges. Walk the instructions
+  // from the bottom up, tracking information about liveness as we go
+  // to help determine which registers are available.
+  unsigned Broken = 0;
+  unsigned Count = InsertPosIndex - 1;
+  for (MachineBasicBlock::iterator I = End, E = Begin;
+       I != E; --Count) {
+    MachineInstr *MI = --I;
+
+    DEBUG(dbgs() << "Anti: ");
+    DEBUG(MI->dump());
+
+    std::set<unsigned> PassthruRegs;
+    GetPassthruRegs(MI, PassthruRegs);
+
+    // Process the defs in MI...
+    PrescanInstruction(MI, Count, PassthruRegs);
+
+    // The dependence edges that represent anti- and output-
+    // dependencies that are candidates for breaking.
+    std::vector<const SDep *> Edges;
+    const SUnit *PathSU = MISUnitMap[MI];
+    AntiDepEdges(PathSU, Edges);
+
+    // If MI is not on the critical path, then we don't rename
+    // registers in the CriticalPathSet.
+    BitVector *ExcludeRegs = NULL;
+    if (MI == CriticalPathMI) {
+      CriticalPathSU = CriticalPathStep(CriticalPathSU);
+      CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0;
+    } else {
+      ExcludeRegs = &CriticalPathSet;
+    }
+
+    // Ignore KILL instructions (they form a group in ScanInstruction
+    // but don't cause any anti-dependence breaking themselves)
+    if (!MI->isKill()) {
+      // Attempt to break each anti-dependency...
+      for (unsigned i = 0, e = Edges.size(); i != e; ++i) {
+        const SDep *Edge = Edges[i];
+        SUnit *NextSU = Edge->getSUnit();
+
+        if ((Edge->getKind() != SDep::Anti) &&
+            (Edge->getKind() != SDep::Output)) continue;
+
+        unsigned AntiDepReg = Edge->getReg();
+        DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
+        assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+
+        if (!AllocatableSet.test(AntiDepReg)) {
+          // Don't break anti-dependencies on non-allocatable registers.
+          DEBUG(dbgs() << " (non-allocatable)\n");
+          continue;
+        } else if ((ExcludeRegs != NULL) && ExcludeRegs->test(AntiDepReg)) {
+          // Don't break anti-dependencies for critical path registers
+          // if not on the critical path
+          DEBUG(dbgs() << " (not critical-path)\n");
+          continue;
+        } else if (PassthruRegs.count(AntiDepReg) != 0) {
+          // If the anti-dep register liveness "passes-thru", then
+          // don't try to change it. It will be changed along with
+          // the use if required to break an earlier antidep.
+          DEBUG(dbgs() << " (passthru)\n");
+          continue;
+        } else {
+          // No anti-dep breaking for implicit deps
+          MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg);
+          assert(AntiDepOp != NULL &&
+                 "Can't find index for defined register operand");
+          if ((AntiDepOp == NULL) || AntiDepOp->isImplicit()) {
+            DEBUG(dbgs() << " (implicit)\n");
+            continue;
+          }
+
+          // If the SUnit has other dependencies on the SUnit that
+          // it anti-depends on, don't bother breaking the
+          // anti-dependency since those edges would prevent such
+          // units from being scheduled past each other
+          // regardless.
+          //
+          // Also, if there are dependencies on other SUnits with the
+          // same register as the anti-dependency, don't attempt to
+          // break it.
+          for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
+                 PE = PathSU->Preds.end(); P != PE; ++P) {
+            if (P->getSUnit() == NextSU ?
+                (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+                (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+              AntiDepReg = 0;
+              break;
+            }
+          }
+          for (SUnit::const_pred_iterator P = PathSU->Preds.begin(),
+                 PE = PathSU->Preds.end(); P != PE; ++P) {
+            if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) &&
+                (P->getKind() != SDep::Output)) {
+              DEBUG(dbgs() << " (real dependency)\n");
+              AntiDepReg = 0;
+              break;
+            } else if ((P->getSUnit() != NextSU) &&
+                       (P->getKind() == SDep::Data) &&
+                       (P->getReg() == AntiDepReg)) {
+              DEBUG(dbgs() << " (other dependency)\n");
+              AntiDepReg = 0;
+              break;
+            }
+          }
+
+          if (AntiDepReg == 0) continue;
+        }
+
+        assert(AntiDepReg != 0);
+        if (AntiDepReg == 0) continue;
+
+        // Determine AntiDepReg's register group.
+        const unsigned GroupIndex = State->GetGroup(AntiDepReg);
+        if (GroupIndex == 0) {
+          DEBUG(dbgs() << " (zero group)\n");
+          continue;
+        }
+
+        DEBUG(dbgs() << '\n');
+
+        // Look for a suitable register to use to break the anti-dependence.
+        std::map<unsigned, unsigned> RenameMap;
+        if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) {
+          DEBUG(dbgs() << "\tBreaking anti-dependence edge on "
+                << TRI->getName(AntiDepReg) << ":");
+
+          // Handle each group register...
+          for (std::map<unsigned, unsigned>::iterator
+                 S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) {
+            unsigned CurrReg = S->first;
+            unsigned NewReg = S->second;
+
+            DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" <<
+                  TRI->getName(NewReg) << "(" <<
+                  RegRefs.count(CurrReg) << " refs)");
+
+            // Update the references to the old register CurrReg to
+            // refer to the new register NewReg.
+            std::pair<std::multimap<unsigned,
+                           AggressiveAntiDepState::RegisterReference>::iterator,
+                      std::multimap<unsigned,
+                           AggressiveAntiDepState::RegisterReference>::iterator>
+              Range = RegRefs.equal_range(CurrReg);
+            for (std::multimap<unsigned,
+                 AggressiveAntiDepState::RegisterReference>::iterator
+                   Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+              Q->second.Operand->setReg(NewReg);
+              // If the SU for the instruction being updated has debug
+              // information related to the anti-dependency register, make
+              // sure to update that as well.
+              const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()];
+              if (!SU) continue;
+              for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) {
+                MachineInstr *DI = SU->DbgInstrList[i];
+                assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() &&
+                        DI->getOperand(0).getReg()
+                        && "Non register dbg_value attached to SUnit!");
+                if (DI->getOperand(0).getReg() == AntiDepReg)
+                  DI->getOperand(0).setReg(NewReg);
+              }
+            }
+
+            // We just went back in time and modified history; the
+            // liveness information for CurrReg is now inconsistent. Set
+            // the state as if it were dead.
+            State->UnionGroups(NewReg, 0);
+            RegRefs.erase(NewReg);
+            DefIndices[NewReg] = DefIndices[CurrReg];
+            KillIndices[NewReg] = KillIndices[CurrReg];
+
+            State->UnionGroups(CurrReg, 0);
+            RegRefs.erase(CurrReg);
+            DefIndices[CurrReg] = KillIndices[CurrReg];
+            KillIndices[CurrReg] = ~0u;
+            assert(((KillIndices[CurrReg] == ~0u) !=
+                    (DefIndices[CurrReg] == ~0u)) &&
+                   "Kill and Def maps aren't consistent for AntiDepReg!");
+          }
+
+          ++Broken;
+          DEBUG(dbgs() << '\n');
+        }
+      }
+    }
+
+    ScanInstruction(MI, Count);
+  }
+
+  return Broken;
+}
diff --git a/final/lib/CodeGen/AggressiveAntiDepBreaker.h b/final/lib/CodeGen/AggressiveAntiDepBreaker.h
new file mode 100644
index 00000000000..9d715ccf79f
--- /dev/null
+++ b/final/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -0,0 +1,184 @@
+//=- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AggressiveAntiDepBreaker class, which
+// implements register anti-dependence breaking during post-RA
+// scheduling. It attempts to break all anti-dependencies within a
+// block.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+#define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H
+
+#include "AntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include <map>
+
+namespace llvm {
+  /// Class AggressiveAntiDepState
+  /// Contains all the state necessary for anti-dep breaking.
+  class AggressiveAntiDepState {
+  public:
+    /// RegisterReference - Information about a register reference
+    /// within a liverange
+    typedef struct {
+      /// Operand - The registers operand
+      MachineOperand *Operand;
+      /// RC - The register class
+      const TargetRegisterClass *RC;
+    } RegisterReference;
+
+  private:
+    /// NumTargetRegs - Number of non-virtual target registers
+    /// (i.e. TRI->getNumRegs()).
+    const unsigned NumTargetRegs;
+
+    /// GroupNodes - Implements a disjoint-union data structure to
+    /// form register groups. A node is represented by an index into
+    /// the vector. A node can "point to" itself to indicate that it
+    /// is the parent of a group, or point to another node to indicate
+    /// that it is a member of the same group as that node.
+    std::vector<unsigned> GroupNodes;
+
+    /// GroupNodeIndices - For each register, the index of the GroupNode
+    /// currently representing the group that the register belongs to.
+    /// Register 0 is always represented by the 0 group, a group
+    /// composed of registers that are not eligible for anti-aliasing.
+    std::vector<unsigned> GroupNodeIndices;
+
+    /// RegRefs - Map registers to all their references within a live range.
+    std::multimap<unsigned, RegisterReference> RegRefs;
+
+    /// KillIndices - The index of the most recent kill (proceding bottom-up),
+    /// or ~0u if the register is not live.
+    std::vector<unsigned> KillIndices;
+
+    /// DefIndices - The index of the most recent complete def (proceding bottom
+    /// up), or ~0u if the register is live.
+    std::vector<unsigned> DefIndices;
+
+  public:
+    AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB);
+
+    /// GetKillIndices - Return the kill indices.
+    std::vector<unsigned> &GetKillIndices() { return KillIndices; }
+
+    /// GetDefIndices - Return the define indices.
+    std::vector<unsigned> &GetDefIndices() { return DefIndices; }
+
+    /// GetRegRefs - Return the RegRefs map.
+    std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; }
+
+    // GetGroup - Get the group for a register. The returned value is
+    // the index of the GroupNode representing the group.
+    unsigned GetGroup(unsigned Reg);
+
+    // GetGroupRegs - Return a vector of the registers belonging to a
+    // group. If RegRefs is non-NULL then only included referenced registers.
+    void GetGroupRegs(
+       unsigned Group,
+       std::vector<unsigned> &Regs,
+       std::multimap<unsigned,
+         AggressiveAntiDepState::RegisterReference> *RegRefs);
+
+    // UnionGroups - Union Reg1's and Reg2's groups to form a new
+    // group. Return the index of the GroupNode representing the
+    // group.
+    unsigned UnionGroups(unsigned Reg1, unsigned Reg2);
+
+    // LeaveGroup - Remove a register from its current group and place
+    // it alone in its own group. Return the index of the GroupNode
+    // representing the registers new group.
+    unsigned LeaveGroup(unsigned Reg);
+
+    /// IsLive - Return true if Reg is live
+    bool IsLive(unsigned Reg);
+  };
+
+
+  /// Class AggressiveAntiDepBreaker
+  class AggressiveAntiDepBreaker : public AntiDepBreaker {
+    MachineFunction& MF;
+    MachineRegisterInfo &MRI;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+
+    /// AllocatableSet - The set of allocatable registers.
+    /// We'll be ignoring anti-dependencies on non-allocatable registers,
+    /// because they may not be safe to break.
+    const BitVector AllocatableSet;
+
+    /// CriticalPathSet - The set of registers that should only be
+    /// renamed if they are on the critical path.
+    BitVector CriticalPathSet;
+
+    /// State - The state used to identify and rename anti-dependence
+    /// registers.
+    AggressiveAntiDepState *State;
+
+  public:
+    AggressiveAntiDepBreaker(MachineFunction& MFi,
+                             TargetSubtarget::RegClassVector& CriticalPathRCs);
+    ~AggressiveAntiDepBreaker();
+
+    /// Start - Initialize anti-dep breaking for a new basic block.
+    void StartBlock(MachineBasicBlock *BB);
+
+    /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
+    /// path
+    /// of the ScheduleDAG and break them by renaming registers.
+    ///
+    unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                                   MachineBasicBlock::iterator Begin,
+                                   MachineBasicBlock::iterator End,
+                                   unsigned InsertPosIndex);
+
+    /// Observe - Update liveness information to account for the current
+    /// instruction, which will not be scheduled.
+    ///
+    void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex);
+
+    /// Finish - Finish anti-dep breaking for a basic block.
+    void FinishBlock();
+
+  private:
+    typedef std::map<const TargetRegisterClass *,
+                     TargetRegisterClass::const_iterator> RenameOrderType;
+
+    /// IsImplicitDefUse - Return true if MO represents a register
+    /// that is both implicitly used and defined in MI
+    bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO);
+
+    /// GetPassthruRegs - If MI implicitly def/uses a register, then
+    /// return that register and all subregisters.
+    void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs);
+
+    void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag,
+                       const char *header =NULL, const char *footer =NULL);
+
+    void PrescanInstruction(MachineInstr *MI, unsigned Count,
+                            std::set<unsigned>& PassthruRegs);
+    void ScanInstruction(MachineInstr *MI, unsigned Count);
+    BitVector GetRenameRegisters(unsigned Reg);
+    bool FindSuitableFreeRegisters(unsigned AntiDepGroupIndex,
+                                   RenameOrderType& RenameOrder,
+                                   std::map<unsigned, unsigned> &RenameMap);
+  };
+}
+
+#endif
diff --git a/final/lib/CodeGen/AllocationOrder.cpp b/final/lib/CodeGen/AllocationOrder.cpp
new file mode 100644
index 00000000000..20c7625f325
--- /dev/null
+++ b/final/lib/CodeGen/AllocationOrder.cpp
@@ -0,0 +1,68 @@
+//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AllocationOrder.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+// Compare VirtRegMap::getRegAllocPref().
+AllocationOrder::AllocationOrder(unsigned VirtReg,
+                                 const VirtRegMap &VRM,
+                                 const BitVector &ReservedRegs)
+  : Pos(0), Reserved(ReservedRegs) {
+  const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg);
+  std::pair<unsigned, unsigned> HintPair =
+    VRM.getRegInfo().getRegAllocationHint(VirtReg);
+
+  // HintPair.second is a register, phys or virt.
+  Hint = HintPair.second;
+
+  // Translate to physreg, or 0 if not assigned yet.
+  if (TargetRegisterInfo::isVirtualRegister(Hint))
+    Hint = VRM.getPhys(Hint);
+
+  // The remaining allocation order may depend on the hint.
+  tie(Begin, End) = VRM.getTargetRegInfo()
+        .getAllocationOrder(RC, HintPair.first, Hint, VRM.getMachineFunction());
+
+  // Target-dependent hints require resolution.
+  if (HintPair.first)
+    Hint = VRM.getTargetRegInfo().ResolveRegAllocHint(HintPair.first, Hint,
+                                                      VRM.getMachineFunction());
+
+  // The hint must be a valid physreg for allocation.
+  if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
+               !RC->contains(Hint) || ReservedRegs.test(Hint)))
+    Hint = 0;
+}
+
+unsigned AllocationOrder::next() {
+  // First take the hint.
+  if (!Pos) {
+    Pos = Begin;
+    if (Hint)
+      return Hint;
+  }
+  // Then look at the order from TRI.
+  while(Pos != End) {
+    unsigned Reg = *Pos++;
+    if (Reg != Hint && !Reserved.test(Reg))
+      return Reg;
+  }
+  return 0;
+}
diff --git a/final/lib/CodeGen/AllocationOrder.h b/final/lib/CodeGen/AllocationOrder.h
new file mode 100644
index 00000000000..61fd8f881a8
--- /dev/null
+++ b/final/lib/CodeGen/AllocationOrder.h
@@ -0,0 +1,56 @@
+//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ALLOCATIONORDER_H
+#define LLVM_CODEGEN_ALLOCATIONORDER_H
+
+namespace llvm {
+
+class BitVector;
+class VirtRegMap;
+
+class AllocationOrder {
+  const unsigned *Begin;
+  const unsigned *End;
+  const unsigned *Pos;
+  const BitVector &Reserved;
+  unsigned Hint;
+public:
+
+  /// AllocationOrder - Create a new AllocationOrder for VirtReg.
+  /// @param VirtReg      Virtual register to allocate for.
+  /// @param VRM          Virtual register map for function.
+  /// @param ReservedRegs Set of reserved registers as returned by
+  ///        TargetRegisterInfo::getReservedRegs().
+  AllocationOrder(unsigned VirtReg,
+                  const VirtRegMap &VRM,
+                  const BitVector &ReservedRegs);
+
+  /// next - Return the next physical register in the allocation order, or 0.
+  /// It is safe to call next again after it returned 0.
+  /// It will keep returning 0 until rewind() is called.
+  unsigned next();
+
+  /// rewind - Start over from the beginning.
+  void rewind() { Pos = 0; }
+
+  /// isHint - Return true if PhysReg is a preferred register.
+  bool isHint(unsigned PhysReg) const { return PhysReg == Hint; }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/CodeGen/Analysis.cpp b/final/lib/CodeGen/Analysis.cpp
new file mode 100644
index 00000000000..36638c36de6
--- /dev/null
+++ b/final/lib/CodeGen/Analysis.cpp
@@ -0,0 +1,303 @@
+//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities --*- C++ ------*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines several CodeGen-specific LLVM IR analysis utilties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence
+/// of insertvalue or extractvalue indices that identify a member, return
+/// the linearized index of the start of the member.
+///
+unsigned llvm::ComputeLinearIndex(const Type *Ty,
+                                  const unsigned *Indices,
+                                  const unsigned *IndicesEnd,
+                                  unsigned CurIndex) {
+  // Base case: We're done.
+  if (Indices && Indices == IndicesEnd)
+    return CurIndex;
+
+  // Given a struct type, recursively traverse the elements.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    for (StructType::element_iterator EB = STy->element_begin(),
+                                      EI = EB,
+                                      EE = STy->element_end();
+        EI != EE; ++EI) {
+      if (Indices && *Indices == unsigned(EI - EB))
+        return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(*EI, 0, 0, CurIndex);
+    }
+    return CurIndex;
+  }
+  // Given an array type, recursively traverse the elements.
+  else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const Type *EltTy = ATy->getElementType();
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
+      if (Indices && *Indices == i)
+        return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
+      CurIndex = ComputeLinearIndex(EltTy, 0, 0, CurIndex);
+    }
+    return CurIndex;
+  }
+  // We haven't found the type we're looking for, so keep searching.
+  return CurIndex + 1;
+}
+
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty,
+                           SmallVectorImpl<EVT> &ValueVTs,
+                           SmallVectorImpl<uint64_t> *Offsets,
+                           uint64_t StartingOffset) {
+  // Given a struct type, recursively traverse the elements.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy);
+    for (StructType::element_iterator EB = STy->element_begin(),
+                                      EI = EB,
+                                      EE = STy->element_end();
+         EI != EE; ++EI)
+      ComputeValueVTs(TLI, *EI, ValueVTs, Offsets,
+                      StartingOffset + SL->getElementOffset(EI - EB));
+    return;
+  }
+  // Given an array type, recursively traverse the elements.
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const Type *EltTy = ATy->getElementType();
+    uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy);
+    for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+      ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets,
+                      StartingOffset + i * EltSize);
+    return;
+  }
+  // Interpret void as zero return values.
+  if (Ty->isVoidTy())
+    return;
+  // Base case: we can get an EVT for this LLVM IR type.
+  ValueVTs.push_back(TLI.getValueType(Ty));
+  if (Offsets)
+    Offsets->push_back(StartingOffset);
+}
+
+/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
+GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
+  V = V->stripPointerCasts();
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
+
+  if (GV && GV->getName() == "llvm.eh.catch.all.value") {
+    assert(GV->hasInitializer() &&
+           "The EH catch-all value must have an initializer");
+    Value *Init = GV->getInitializer();
+    GV = dyn_cast<GlobalVariable>(Init);
+    if (!GV) V = cast<ConstantPointerNull>(Init);
+  }
+
+  assert((GV || isa<ConstantPointerNull>(V)) &&
+         "TypeInfo must be a global variable or NULL");
+  return GV;
+}
+
+/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
+/// processed uses a memory 'm' constraint.
+bool
+llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
+                                const TargetLowering &TLI) {
+  for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
+    InlineAsm::ConstraintInfo &CI = CInfos[i];
+    for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
+      TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
+      if (CType == TargetLowering::C_Memory)
+        return true;
+    }
+
+    // Indirect operand accesses access memory.
+    if (CI.isIndirect)
+      return true;
+  }
+
+  return false;
+}
+
+/// getFCmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR floating-point condition code.  This includes
+/// consideration of global floating-point math flags.
+///
+ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) {
+  ISD::CondCode FPC, FOC;
+  switch (Pred) {
+  case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
+  case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
+  case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
+  case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
+  case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
+  case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
+  case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
+  case FCmpInst::FCMP_ORD:   FOC = FPC = ISD::SETO;   break;
+  case FCmpInst::FCMP_UNO:   FOC = FPC = ISD::SETUO;  break;
+  case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
+  case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
+  case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
+  case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
+  case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
+  case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
+  case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
+  default:
+    llvm_unreachable("Invalid FCmp predicate opcode!");
+    FOC = FPC = ISD::SETFALSE;
+    break;
+  }
+  if (NoNaNsFPMath)
+    return FOC;
+  else
+    return FPC;
+}
+
+/// getICmpCondCode - Return the ISD condition code corresponding to
+/// the given LLVM IR integer condition code.
+///
+ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) {
+  switch (Pred) {
+  case ICmpInst::ICMP_EQ:  return ISD::SETEQ;
+  case ICmpInst::ICMP_NE:  return ISD::SETNE;
+  case ICmpInst::ICMP_SLE: return ISD::SETLE;
+  case ICmpInst::ICMP_ULE: return ISD::SETULE;
+  case ICmpInst::ICMP_SGE: return ISD::SETGE;
+  case ICmpInst::ICMP_UGE: return ISD::SETUGE;
+  case ICmpInst::ICMP_SLT: return ISD::SETLT;
+  case ICmpInst::ICMP_ULT: return ISD::SETULT;
+  case ICmpInst::ICMP_SGT: return ISD::SETGT;
+  case ICmpInst::ICMP_UGT: return ISD::SETUGT;
+  default:
+    llvm_unreachable("Invalid ICmp predicate opcode!");
+    return ISD::SETNE;
+  }
+}
+
+/// Test if the given instruction is in a position to be optimized
+/// with a tail-call. This roughly means that it's in a block with
+/// a return and there's nothing that needs to be scheduled
+/// between it and the return.
+///
+/// This function only tests target-independent requirements.
+bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
+                                const TargetLowering &TLI) {
+  const Instruction *I = CS.getInstruction();
+  const BasicBlock *ExitBB = I->getParent();
+  const TerminatorInst *Term = ExitBB->getTerminator();
+  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
+  const Function *F = ExitBB->getParent();
+
+  // The block must end in a return statement or unreachable.
+  //
+  // FIXME: Decline tailcall if it's not guaranteed and if the block ends in
+  // an unreachable, for now. The way tailcall optimization is currently
+  // implemented means it will add an epilogue followed by a jump. That is
+  // not profitable. Also, if the callee is a special function (e.g.
+  // longjmp on x86), it can end up causing miscompilation that has not
+  // been fully understood.
+  if (!Ret &&
+      (!GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false;
+
+  // If I will have a chain, make sure no other instruction that will have a
+  // chain interposes between I and the return.
+  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
+      !I->isSafeToSpeculativelyExecute())
+    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
+         --BBI) {
+      if (&*BBI == I)
+        break;
+      // Debug info intrinsics do not get in the way of tail call optimization.
+      if (isa<DbgInfoIntrinsic>(BBI))
+        continue;
+      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
+          !BBI->isSafeToSpeculativelyExecute())
+        return false;
+    }
+
+  // If the block ends with a void return or unreachable, it doesn't matter
+  // what the call's return type is.
+  if (!Ret || Ret->getNumOperands() == 0) return true;
+
+  // If the return value is undef, it doesn't matter what the call's
+  // return type is.
+  if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
+  // Conservatively require the attributes of the call to match those of
+  // the return. Ignore noalias because it doesn't affect the call sequence.
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+    return false;
+
+  // Otherwise, make sure the unmodified return value of I is the return value.
+  for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
+       U = dyn_cast<Instruction>(U->getOperand(0))) {
+    if (!U)
+      return false;
+    if (!U->hasOneUse())
+      return false;
+    if (U == I)
+      break;
+    // Check for a truly no-op truncate.
+    if (isa<TruncInst>(U) &&
+        TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
+      continue;
+    // Check for a truly no-op bitcast.
+    if (isa<BitCastInst>(U) &&
+        (U->getOperand(0)->getType() == U->getType() ||
+         (U->getOperand(0)->getType()->isPointerTy() &&
+          U->getType()->isPointerTy())))
+      continue;
+    // Otherwise it's not a true no-op.
+    return false;
+  }
+
+  return true;
+}
+
+bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+                                const TargetLowering &TLI) {
+  const Function *F = DAG.getMachineFunction().getFunction();
+
+  // Conservatively require the attributes of the call to match those of
+  // the return. Ignore noalias because it doesn't affect the call sequence.
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if (CallerRetAttr & ~Attribute::NoAlias)
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+    return false;
+
+  // Check if the only use is a function return node.
+  return TLI.isUsedByReturnOnly(Node);
+}
diff --git a/final/lib/CodeGen/AntiDepBreaker.h b/final/lib/CodeGen/AntiDepBreaker.h
new file mode 100644
index 00000000000..086b7579563
--- /dev/null
+++ b/final/lib/CodeGen/AntiDepBreaker.h
@@ -0,0 +1,59 @@
+//=- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AntiDepBreaker class, which implements
+// anti-dependence breaking heuristics for post-register-allocation scheduling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ANTIDEPBREAKER_H
+#define LLVM_CODEGEN_ANTIDEPBREAKER_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <vector>
+
+namespace llvm {
+
+/// AntiDepBreaker - This class works into conjunction with the
+/// post-RA scheduler to rename registers to break register
+/// anti-dependencies.
+class AntiDepBreaker {
+public:
+  virtual ~AntiDepBreaker();
+
+  /// Start - Initialize anti-dep breaking for a new basic block.
+  virtual void StartBlock(MachineBasicBlock *BB) =0;
+
+  /// BreakAntiDependencies - Identifiy anti-dependencies within a
+  /// basic-block region and break them by renaming registers. Return
+  /// the number of anti-dependencies broken.
+  ///
+  virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                                MachineBasicBlock::iterator Begin,
+                                MachineBasicBlock::iterator End,
+                                unsigned InsertPosIndex) =0;
+  
+  /// Observe - Update liveness information to account for the current
+  /// instruction, which will not be scheduled.
+  ///
+  virtual void Observe(MachineInstr *MI, unsigned Count,
+                       unsigned InsertPosIndex) =0;
+  
+  /// Finish - Finish anti-dep breaking for a basic block.
+  virtual void FinishBlock() =0;
+};
+
+}
+
+#endif
diff --git a/final/lib/CodeGen/AsmPrinter/ARMException.cpp b/final/lib/CodeGen/AsmPrinter/ARMException.cpp
new file mode 100644
index 00000000000..0db28a636ad
--- /dev/null
+++ b/final/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -0,0 +1,87 @@
+//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+ARMException::ARMException(AsmPrinter *A)
+  : DwarfException(A),
+    shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
+    {}
+
+ARMException::~ARMException() {}
+
+void ARMException::EndModule() {
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void ARMException::BeginFunction(const MachineFunction *MF) {
+  Asm->OutStreamer.EmitFnStart();
+  if (!Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory)
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+                                                  Asm->getFunctionNumber()));
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void ARMException::EndFunction() {
+  if (Asm->MF->getFunction()->doesNotThrow() && !UnwindTablesMandatory)
+    Asm->OutStreamer.EmitCantUnwind();
+  else {
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+                                                  Asm->getFunctionNumber()));
+
+    // Emit references to personality.
+    if (const Function * Personality =
+        MMI->getPersonalities()[MMI->getPersonalityIndex()]) {
+      MCSymbol *PerSym = Asm->Mang->getSymbol(Personality);
+      Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global);
+      Asm->OutStreamer.EmitPersonality(PerSym);
+    }
+
+    // Map all labels and get rid of any dead landing pads.
+    MMI->TidyLandingPads();
+
+    Asm->OutStreamer.EmitHandlerData();
+
+    // Emit actual exception table
+    EmitExceptionTable();
+  }
+
+  Asm->OutStreamer.EmitFnEnd();
+}
diff --git a/final/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/final/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
new file mode 100644
index 00000000000..f740c7400bc
--- /dev/null
+++ b/final/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -0,0 +1,1904 @@
+//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "DwarfDebug.h"
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Timer.h"
+using namespace llvm;
+
+static const char *DWARFGroupName = "DWARF Emission";
+static const char *DbgTimerName = "DWARF Debug Writer";
+static const char *EHTimerName = "DWARF Exception Writer";
+
+STATISTIC(EmittedInsts, "Number of machine instrs printed");
+
+char AsmPrinter::ID = 0;
+
+typedef DenseMap<GCStrategy*,GCMetadataPrinter*> gcp_map_type;
+static gcp_map_type &getGCMap(void *&P) {
+  if (P == 0)
+    P = new gcp_map_type();
+  return *(gcp_map_type*)P;
+}
+
+
+/// getGVAlignmentLog2 - Return the alignment to use for the specified global
+/// value in log2 form.  This rounds up to the preferred alignment if possible
+/// and legal.
+static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD,
+                                   unsigned InBits = 0) {
+  unsigned NumBits = 0;
+  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+    NumBits = TD.getPreferredAlignmentLog(GVar);
+  
+  // If InBits is specified, round it to it.
+  if (InBits > NumBits)
+    NumBits = InBits;
+  
+  // If the GV has a specified alignment, take it into account.
+  if (GV->getAlignment() == 0)
+    return NumBits;
+  
+  unsigned GVAlign = Log2_32(GV->getAlignment());
+  
+  // If the GVAlign is larger than NumBits, or if we are required to obey
+  // NumBits because the GV has an assigned section, obey it.
+  if (GVAlign > NumBits || GV->hasSection())
+    NumBits = GVAlign;
+  return NumBits;
+}
+
+
+
+
+AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
+  : MachineFunctionPass(ID),
+    TM(tm), MAI(tm.getMCAsmInfo()),
+    OutContext(Streamer.getContext()),
+    OutStreamer(Streamer),
+    LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
+  DD = 0; DE = 0; MMI = 0; LI = 0;
+  GCMetadataPrinters = 0;
+  VerboseAsm = Streamer.isVerboseAsm();
+}
+
+AsmPrinter::~AsmPrinter() {
+  assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized");
+  
+  if (GCMetadataPrinters != 0) {
+    gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
+    
+    for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I)
+      delete I->second;
+    delete &GCMap;
+    GCMetadataPrinters = 0;
+  }
+  
+  delete &OutStreamer;
+}
+
+/// getFunctionNumber - Return a unique ID for the current function.
+///
+unsigned AsmPrinter::getFunctionNumber() const {
+  return MF->getFunctionNumber();
+}
+
+const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const {
+  return TM.getTargetLowering()->getObjFileLowering();
+}
+
+
+/// getTargetData - Return information about data layout.
+const TargetData &AsmPrinter::getTargetData() const {
+  return *TM.getTargetData();
+}
+
+/// getCurrentSection() - Return the current section we are emitting to.
+const MCSection *AsmPrinter::getCurrentSection() const {
+  return OutStreamer.getCurrentSection();
+}
+
+
+
+void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+  AU.addRequired<MachineModuleInfo>();
+  AU.addRequired<GCModuleInfo>();
+  if (isVerbose())
+    AU.addRequired<MachineLoopInfo>();
+}
+
+bool AsmPrinter::doInitialization(Module &M) {
+  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  MMI->AnalyzeModule(M);
+
+  // Initialize TargetLoweringObjectFile.
+  const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
+    .Initialize(OutContext, TM);
+  
+  Mang = new Mangler(OutContext, *TM.getTargetData());
+  
+  // Allow the target to emit any magic that it wants at the start of the file.
+  EmitStartOfAsmFile(M);
+
+  // Very minimal debug info. It is ignored if we emit actual debug info. If we
+  // don't, this at least helps the user find where a global came from.
+  if (MAI->hasSingleParameterDotFile()) {
+    // .file "foo.c"
+    OutStreamer.EmitFileDirective(M.getModuleIdentifier());
+  }
+
+  GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+  assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+  for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+    if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I))
+      MP->beginAssembly(*this);
+
+  // Emit module-level inline asm if it exists.
+  if (!M.getModuleInlineAsm().empty()) {
+    OutStreamer.AddComment("Start of file scope inline assembly");
+    OutStreamer.AddBlankLine();
+    EmitInlineAsm(M.getModuleInlineAsm()+"\n");
+    OutStreamer.AddComment("End of file scope inline assembly");
+    OutStreamer.AddBlankLine();
+  }
+
+  if (MAI->doesSupportDebugInformation())
+    DD = new DwarfDebug(this, &M);
+
+  if (MAI->doesSupportExceptionHandling())
+    switch (MAI->getExceptionHandlingType()) {
+    default:
+    case ExceptionHandling::DwarfTable:
+      DE = new DwarfTableException(this);
+      break;
+    case ExceptionHandling::DwarfCFI:
+      DE = new DwarfCFIException(this);
+      break;
+    case ExceptionHandling::ARM:
+      DE = new ARMException(this);
+      break;
+    }
+
+  return false;
+}
+
+void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
+  switch ((GlobalValue::LinkageTypes)Linkage) {
+  case GlobalValue::CommonLinkage:
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+  case GlobalValue::LinkerPrivateWeakLinkage:
+  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+    if (MAI->getWeakDefDirective() != 0) {
+      // .globl _foo
+      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+
+      if ((GlobalValue::LinkageTypes)Linkage !=
+          GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+        // .weak_definition _foo
+        OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
+      else
+        OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate);
+    } else if (MAI->getLinkOnceDirective() != 0) {
+      // .globl _foo
+      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+      //NOTE: linkonce is handled by the section the symbol was assigned to.
+    } else {
+      // .weak _foo
+      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+    }
+    break;
+  case GlobalValue::DLLExportLinkage:
+  case GlobalValue::AppendingLinkage:
+    // FIXME: appending linkage variables should go into a section of
+    // their name or something.  For now, just emit them as external.
+  case GlobalValue::ExternalLinkage:
+    // If external or appending, declare as a global symbol.
+    // .globl _foo
+    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+    break;
+  case GlobalValue::PrivateLinkage:
+  case GlobalValue::InternalLinkage:
+  case GlobalValue::LinkerPrivateLinkage:
+    break;
+  default:
+    llvm_unreachable("Unknown linkage type!");
+  }
+}
+
+
+/// EmitGlobalVariable - Emit the specified global variable to the .s file.
+void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+  if (!GV->hasInitializer())   // External globals require no code.
+    return;
+  
+  // Check to see if this is a special global used by LLVM, if so, emit it.
+  if (EmitSpecialLLVMGlobal(GV))
+    return;
+
+  if (isVerbose()) {
+    WriteAsOperand(OutStreamer.GetCommentOS(), GV,
+                   /*PrintType=*/false, GV->getParent());
+    OutStreamer.GetCommentOS() << '\n';
+  }
+  
+  MCSymbol *GVSym = Mang->getSymbol(GV);
+  EmitVisibility(GVSym, GV->getVisibility());
+
+  if (MAI->hasDotTypeDotSizeDirective())
+    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+  
+  SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
+
+  const TargetData *TD = TM.getTargetData();
+  uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
+  
+  // If the alignment is specified, we *must* obey it.  Overaligning a global
+  // with a specified alignment is a prompt way to break globals emitted to
+  // sections and expected to be contiguous (e.g. ObjC metadata).
+  unsigned AlignLog = getGVAlignmentLog2(GV, *TD);
+  
+  // Handle common and BSS local symbols (.lcomm).
+  if (GVKind.isCommon() || GVKind.isBSSLocal()) {
+    if (Size == 0) Size = 1;   // .comm Foo, 0 is undefined, avoid it.
+    
+    if (isVerbose()) {
+      WriteAsOperand(OutStreamer.GetCommentOS(), GV,
+                     /*PrintType=*/false, GV->getParent());
+      OutStreamer.GetCommentOS() << '\n';
+    }
+    
+    // Handle common symbols.
+    if (GVKind.isCommon()) {
+      unsigned Align = 1 << AlignLog;
+      if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+        Align = 0;
+          
+      // .comm _foo, 42, 4
+      OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
+      return;
+    }
+    
+    // Handle local BSS symbols.
+    if (MAI->hasMachoZeroFillDirective()) {
+      const MCSection *TheSection =
+        getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
+      // .zerofill __DATA, __bss, _foo, 400, 5
+      OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
+      return;
+    }
+    
+    if (MAI->hasLCOMMDirective()) {
+      // .lcomm _foo, 42
+      OutStreamer.EmitLocalCommonSymbol(GVSym, Size);
+      return;
+    }
+
+    unsigned Align = 1 << AlignLog;
+    if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+      Align = 0;
+    
+    // .local _foo
+    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local);
+    // .comm _foo, 42, 4
+    OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
+    return;
+  }
+  
+  const MCSection *TheSection =
+    getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM);
+
+  // Handle the zerofill directive on darwin, which is a special form of BSS
+  // emission.
+  if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) {
+    if (Size == 0) Size = 1;  // zerofill of 0 bytes is undefined.
+    
+    // .globl _foo
+    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+    // .zerofill __DATA, __common, _foo, 400, 5
+    OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
+    return;
+  }
+  
+  // Handle thread local data for mach-o which requires us to output an
+  // additional structure of data and mangle the original symbol so that we
+  // can reference it later.
+  //
+  // TODO: This should become an "emit thread local global" method on TLOF.
+  // All of this macho specific stuff should be sunk down into TLOFMachO and
+  // stuff like "TLSExtraDataSection" should no longer be part of the parent
+  // TLOF class.  This will also make it more obvious that stuff like
+  // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
+  // specific code.
+  if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
+    // Emit the .tbss symbol
+    MCSymbol *MangSym = 
+      OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
+    
+    if (GVKind.isThreadBSS())
+      OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
+    else if (GVKind.isThreadData()) {
+      OutStreamer.SwitchSection(TheSection);
+
+      EmitAlignment(AlignLog, GV);      
+      OutStreamer.EmitLabel(MangSym);
+      
+      EmitGlobalConstant(GV->getInitializer());
+    }
+    
+    OutStreamer.AddBlankLine();
+    
+    // Emit the variable struct for the runtime.
+    const MCSection *TLVSect 
+      = getObjFileLowering().getTLSExtraDataSection();
+      
+    OutStreamer.SwitchSection(TLVSect);
+    // Emit the linkage here.
+    EmitLinkage(GV->getLinkage(), GVSym);
+    OutStreamer.EmitLabel(GVSym);
+    
+    // Three pointers in size:
+    //   - __tlv_bootstrap - used to make sure support exists
+    //   - spare pointer, used when mapped by the runtime
+    //   - pointer to mangled symbol above with initializer
+    unsigned PtrSize = TD->getPointerSizeInBits()/8;
+    OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
+                          PtrSize, 0);
+    OutStreamer.EmitIntValue(0, PtrSize, 0);
+    OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0);
+    
+    OutStreamer.AddBlankLine();
+    return;
+  }
+
+  OutStreamer.SwitchSection(TheSection);
+
+  EmitLinkage(GV->getLinkage(), GVSym);
+  EmitAlignment(AlignLog, GV);
+
+  OutStreamer.EmitLabel(GVSym);
+
+  EmitGlobalConstant(GV->getInitializer());
+
+  if (MAI->hasDotTypeDotSizeDirective())
+    // .size foo, 42
+    OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext));
+  
+  OutStreamer.AddBlankLine();
+}
+
+/// EmitFunctionHeader - This method emits the header for the current
+/// function.
+void AsmPrinter::EmitFunctionHeader() {
+  // Print out constants referenced by the function
+  EmitConstantPool();
+  
+  // Print the 'header' of function.
+  const Function *F = MF->getFunction();
+
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
+  EmitVisibility(CurrentFnSym, F->getVisibility());
+
+  EmitLinkage(F->getLinkage(), CurrentFnSym);
+  EmitAlignment(MF->getAlignment(), F);
+
+  if (MAI->hasDotTypeDotSizeDirective())
+    OutStreamer.EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
+
+  if (isVerbose()) {
+    WriteAsOperand(OutStreamer.GetCommentOS(), F,
+                   /*PrintType=*/false, F->getParent());
+    OutStreamer.GetCommentOS() << '\n';
+  }
+
+  // Emit the CurrentFnSym.  This is a virtual function to allow targets to
+  // do their wild and crazy things as required.
+  EmitFunctionEntryLabel();
+  
+  // If the function had address-taken blocks that got deleted, then we have
+  // references to the dangling symbols.  Emit them at the start of the function
+  // so that we don't get references to undefined symbols.
+  std::vector<MCSymbol*> DeadBlockSyms;
+  MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms);
+  for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) {
+    OutStreamer.AddComment("Address taken block that was later removed");
+    OutStreamer.EmitLabel(DeadBlockSyms[i]);
+  }
+  
+  // Add some workaround for linkonce linkage on Cygwin\MinGW.
+  if (MAI->getLinkOnceDirective() != 0 &&
+      (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) {
+    // FIXME: What is this?
+    MCSymbol *FakeStub = 
+      OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+
+                                   CurrentFnSym->getName());
+    OutStreamer.EmitLabel(FakeStub);
+  }
+  
+  // Emit pre-function debug and/or EH information.
+  if (DE) {
+    NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+    DE->BeginFunction(MF);
+  }
+  if (DD) {
+    NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+    DD->beginFunction(MF);
+  }
+}
+
+/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
+/// function.  This can be overridden by targets as required to do custom stuff.
+void AsmPrinter::EmitFunctionEntryLabel() {
+  // The function label could have already been emitted if two symbols end up
+  // conflicting due to asm renaming.  Detect this and emit an error.
+  if (CurrentFnSym->isUndefined())
+    return OutStreamer.EmitLabel(CurrentFnSym);
+
+  report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+                     "' label emitted multiple times to assembly file");
+}
+
+
+static void EmitDebugLoc(DebugLoc DL, const MachineFunction *MF, 
+                         raw_ostream &CommentOS) {
+  const LLVMContext &Ctx = MF->getFunction()->getContext();
+  if (!DL.isUnknown()) {          // Print source line info.
+    DIScope Scope(DL.getScope(Ctx));
+    // Omit the directory, because it's likely to be long and uninteresting.
+    if (Scope.Verify())
+      CommentOS << Scope.getFilename();
+    else
+      CommentOS << "<unknown>";
+    CommentOS << ':' << DL.getLine();
+    if (DL.getCol() != 0)
+      CommentOS << ':' << DL.getCol();
+    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
+    if (!InlinedAtDL.isUnknown()) {
+      CommentOS << "[ ";
+      EmitDebugLoc(InlinedAtDL, MF, CommentOS);
+      CommentOS << " ]";
+    }
+  }
+}
+
+/// EmitComments - Pretty-print comments for instructions.
+static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+  const MachineFunction *MF = MI.getParent()->getParent();
+  const TargetMachine &TM = MF->getTarget();
+  
+  DebugLoc DL = MI.getDebugLoc();
+  if (!DL.isUnknown()) {          // Print source line info.
+    EmitDebugLoc(DL, MF, CommentOS);
+    CommentOS << '\n';
+  }
+  
+  // Check for spills and reloads
+  int FI;
+  
+  const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+  
+  // We assume a single instruction only has a spill or reload, not
+  // both.
+  const MachineMemOperand *MMO;
+  if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
+    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+      MMO = *MI.memoperands_begin();
+      CommentOS << MMO->getSize() << "-byte Reload\n";
+    }
+  } else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) {
+    if (FrameInfo->isSpillSlotObjectIndex(FI))
+      CommentOS << MMO->getSize() << "-byte Folded Reload\n";
+  } else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
+    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+      MMO = *MI.memoperands_begin();
+      CommentOS << MMO->getSize() << "-byte Spill\n";
+    }
+  } else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) {
+    if (FrameInfo->isSpillSlotObjectIndex(FI))
+      CommentOS << MMO->getSize() << "-byte Folded Spill\n";
+  }
+  
+  // Check for spill-induced copies
+  if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
+    CommentOS << " Reload Reuse\n";
+}
+
+/// EmitImplicitDef - This method emits the specified machine instruction
+/// that is an implicit def.
+static void EmitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) {
+  unsigned RegNo = MI->getOperand(0).getReg();
+  AP.OutStreamer.AddComment(Twine("implicit-def: ") +
+                            AP.TM.getRegisterInfo()->getName(RegNo));
+  AP.OutStreamer.AddBlankLine();
+}
+
+static void EmitKill(const MachineInstr *MI, AsmPrinter &AP) {
+  std::string Str = "kill:";
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &Op = MI->getOperand(i);
+    assert(Op.isReg() && "KILL instruction must have only register operands");
+    Str += ' ';
+    Str += AP.TM.getRegisterInfo()->getName(Op.getReg());
+    Str += (Op.isDef() ? "<def>" : "<kill>");
+  }
+  AP.OutStreamer.AddComment(Str);
+  AP.OutStreamer.AddBlankLine();
+}
+
+/// EmitDebugValueComment - This method handles the target-independent form
+/// of DBG_VALUE, returning true if it was able to do so.  A false return
+/// means the target will need to handle MI in EmitInstruction.
+static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
+  // This code handles only the 3-operand target-independent form.
+  if (MI->getNumOperands() != 3)
+    return false;
+
+  SmallString<128> Str;
+  raw_svector_ostream OS(Str);
+  OS << '\t' << AP.MAI->getCommentString() << "DEBUG_VALUE: ";
+
+  // cast away const; DIetc do not take const operands for some reason.
+  DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata()));
+  if (V.getContext().isSubprogram())
+    OS << DISubprogram(V.getContext()).getDisplayName() << ":";
+  OS << V.getName() << " <- ";
+
+  // Register or immediate value. Register 0 means undef.
+  if (MI->getOperand(0).isFPImm()) {
+    APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF());
+    if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) {
+      OS << (double)APF.convertToFloat();
+    } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) {
+      OS << APF.convertToDouble();
+    } else {
+      // There is no good way to print long double.  Convert a copy to
+      // double.  Ah well, it's only a comment.
+      bool ignored;
+      APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+                  &ignored);
+      OS << "(long double) " << APF.convertToDouble();
+    }
+  } else if (MI->getOperand(0).isImm()) {
+    OS << MI->getOperand(0).getImm();
+  } else {
+    assert(MI->getOperand(0).isReg() && "Unknown operand type");
+    if (MI->getOperand(0).getReg() == 0) {
+      // Suppress offset, it is not meaningful here.
+      OS << "undef";
+      // NOTE: Want this comment at start of line, don't emit with AddComment.
+      AP.OutStreamer.EmitRawText(OS.str());
+      return true;
+    }
+    OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg());
+  }
+  
+  OS << '+' << MI->getOperand(1).getImm();
+  // NOTE: Want this comment at start of line, don't emit with AddComment.
+  AP.OutStreamer.EmitRawText(OS.str());
+  return true;
+}
+
+/// EmitFunctionBody - This method emits the body and trailer for a
+/// function.
+void AsmPrinter::EmitFunctionBody() {
+  // Emit target-specific gunk before the function body.
+  EmitFunctionBodyStart();
+  
+  bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo();
+  
+  // Print out code for the function.
+  bool HasAnyRealCode = false;
+  const MachineInstr *LastMI = 0;
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    // Print a label for the basic block.
+    EmitBasicBlockStart(I);
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      LastMI = II;
+
+      // Print the assembly for the instruction.
+      if (!II->isLabel() && !II->isImplicitDef() && !II->isKill() &&
+          !II->isDebugValue()) {
+        HasAnyRealCode = true;
+        ++EmittedInsts;
+      }
+
+      if (ShouldPrintDebugScopes) {
+        NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+        DD->beginInstruction(II);
+      }
+      
+      if (isVerbose())
+        EmitComments(*II, OutStreamer.GetCommentOS());
+
+      switch (II->getOpcode()) {
+      case TargetOpcode::PROLOG_LABEL:
+      case TargetOpcode::EH_LABEL:
+      case TargetOpcode::GC_LABEL:
+        OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol());
+        break;
+      case TargetOpcode::INLINEASM:
+        EmitInlineAsm(II);
+        break;
+      case TargetOpcode::DBG_VALUE:
+        if (isVerbose()) {
+          if (!EmitDebugValueComment(II, *this))
+            EmitInstruction(II);
+        }
+        break;
+      case TargetOpcode::IMPLICIT_DEF:
+        if (isVerbose()) EmitImplicitDef(II, *this);
+        break;
+      case TargetOpcode::KILL:
+        if (isVerbose()) EmitKill(II, *this);
+        break;
+      default:
+        EmitInstruction(II);
+        break;
+      }
+      
+      if (ShouldPrintDebugScopes) {
+        NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+        DD->endInstruction(II);
+      }
+    }
+  }
+
+  // If the last instruction was a prolog label, then we have a situation where
+  // we emitted a prolog but no function body. This results in the ending prolog
+  // label equaling the end of function label and an invalid "row" in the
+  // FDE. We need to emit a noop in this situation so that the FDE's rows are
+  // valid.
+  bool RequiresNoop = LastMI && LastMI->isPrologLabel();
+
+  // If the function is empty and the object file uses .subsections_via_symbols,
+  // then we need to emit *something* to the function body to prevent the
+  // labels from collapsing together.  Just emit a noop.
+  if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode) || RequiresNoop) {
+    MCInst Noop;
+    TM.getInstrInfo()->getNoopForMachoTarget(Noop);
+    if (Noop.getOpcode()) {
+      OutStreamer.AddComment("avoids zero-length function");
+      OutStreamer.EmitInstruction(Noop);
+    } else  // Target not mc-ized yet.
+      OutStreamer.EmitRawText(StringRef("\tnop\n"));
+  }
+  
+  // Emit target-specific gunk after the function body.
+  EmitFunctionBodyEnd();
+  
+  // If the target wants a .size directive for the size of the function, emit
+  // it.
+  if (MAI->hasDotTypeDotSizeDirective()) {
+    // Create a symbol for the end of function, so we can get the size as
+    // difference between the function label and the temp label.
+    MCSymbol *FnEndLabel = OutContext.CreateTempSymbol();
+    OutStreamer.EmitLabel(FnEndLabel);
+    
+    const MCExpr *SizeExp =
+      MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext),
+                              MCSymbolRefExpr::Create(CurrentFnSym, OutContext),
+                              OutContext);
+    OutStreamer.EmitELFSize(CurrentFnSym, SizeExp);
+  }
+  
+  // Emit post-function debug information.
+  if (DD) {
+    NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+    DD->endFunction(MF);
+  }
+  if (DE) {
+    NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+    DE->EndFunction();
+  }
+  MMI->EndFunction();
+  
+  // Print out jump tables referenced by the function.
+  EmitJumpTableInfo();
+  
+  OutStreamer.AddBlankLine();
+}
+
+/// getDebugValueLocation - Get location information encoded by DBG_VALUE
+/// operands.
+MachineLocation AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+  // Target specific DBG_VALUE instructions are handled by each target.
+  return MachineLocation();
+}
+
+bool AsmPrinter::doFinalization(Module &M) {
+  // Emit global variables.
+  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
+    EmitGlobalVariable(I);
+
+  // Emit visibility info for declarations
+  for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    const Function &F = *I;
+    if (!F.isDeclaration())
+      continue;
+    GlobalValue::VisibilityTypes V = F.getVisibility();
+    if (V == GlobalValue::DefaultVisibility)
+      continue;
+
+    MCSymbol *Name = Mang->getSymbol(&F);
+    EmitVisibility(Name, V, false);
+  }
+
+  // Finalize debug and EH information.
+  if (DE) {
+    {
+      NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+      DE->EndModule();
+    }
+    delete DE; DE = 0;
+  }
+  if (DD) {
+    {
+      NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+      DD->endModule();
+    }
+    delete DD; DD = 0;
+  }
+  
+  // If the target wants to know about weak references, print them all.
+  if (MAI->getWeakRefDirective()) {
+    // FIXME: This is not lazy, it would be nice to only print weak references
+    // to stuff that is actually used.  Note that doing so would require targets
+    // to notice uses in operands (due to constant exprs etc).  This should
+    // happen with the MC stuff eventually.
+
+    // Print out module-level global variables here.
+    for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+         I != E; ++I) {
+      if (!I->hasExternalWeakLinkage()) continue;
+      OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
+    }
+    
+    for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+      if (!I->hasExternalWeakLinkage()) continue;
+      OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference);
+    }
+  }
+
+  if (MAI->hasSetDirective()) {
+    OutStreamer.AddBlankLine();
+    for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
+         I != E; ++I) {
+      MCSymbol *Name = Mang->getSymbol(I);
+
+      const GlobalValue *GV = cast<GlobalValue>(I->getAliasedGlobal());
+      MCSymbol *Target = Mang->getSymbol(GV);
+
+      if (I->hasExternalLinkage() || !MAI->getWeakRefDirective())
+        OutStreamer.EmitSymbolAttribute(Name, MCSA_Global);
+      else if (I->hasWeakLinkage())
+        OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference);
+      else
+        assert(I->hasLocalLinkage() && "Invalid alias linkage");
+
+      EmitVisibility(Name, I->getVisibility());
+
+      // Emit the directives as assignments aka .set:
+      OutStreamer.EmitAssignment(Name, 
+                                 MCSymbolRefExpr::Create(Target, OutContext));
+    }
+  }
+
+  GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+  assert(MI && "AsmPrinter didn't require GCModuleInfo?");
+  for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; )
+    if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I))
+      MP->finishAssembly(*this);
+
+  // If we don't have any trampolines, then we don't require stack memory
+  // to be executable. Some targets have a directive to declare this.
+  Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
+  if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
+    if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext))
+      OutStreamer.SwitchSection(S);
+  
+  // Allow the target to emit any magic that it wants at the end of the file,
+  // after everything else has gone out.
+  EmitEndOfAsmFile(M);
+  
+  delete Mang; Mang = 0;
+  MMI = 0;
+  
+  OutStreamer.Finish();
+  return false;
+}
+
+void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
+  this->MF = &MF;
+  // Get the function symbol.
+  CurrentFnSym = Mang->getSymbol(MF.getFunction());
+
+  if (isVerbose())
+    LI = &getAnalysis<MachineLoopInfo>();
+}
+
+namespace {
+  // SectionCPs - Keep track the alignment, constpool entries per Section.
+  struct SectionCPs {
+    const MCSection *S;
+    unsigned Alignment;
+    SmallVector<unsigned, 4> CPEs;
+    SectionCPs(const MCSection *s, unsigned a) : S(s), Alignment(a) {}
+  };
+}
+
+/// EmitConstantPool - Print to the current output stream assembly
+/// representations of the constants in the constant pool MCP. This is
+/// used to print out constants which have been "spilled to memory" by
+/// the code generator.
+///
+void AsmPrinter::EmitConstantPool() {
+  const MachineConstantPool *MCP = MF->getConstantPool();
+  const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+  if (CP.empty()) return;
+
+  // Calculate sections for constant pool entries. We collect entries to go into
+  // the same section together to reduce amount of section switch statements.
+  SmallVector<SectionCPs, 4> CPSections;
+  for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+    const MachineConstantPoolEntry &CPE = CP[i];
+    unsigned Align = CPE.getAlignment();
+    
+    SectionKind Kind;
+    switch (CPE.getRelocationInfo()) {
+    default: llvm_unreachable("Unknown section kind");
+    case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
+    case 1:
+      Kind = SectionKind::getReadOnlyWithRelLocal();
+      break;
+    case 0:
+    switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+    case 4:  Kind = SectionKind::getMergeableConst4(); break;
+    case 8:  Kind = SectionKind::getMergeableConst8(); break;
+    case 16: Kind = SectionKind::getMergeableConst16();break;
+    default: Kind = SectionKind::getMergeableConst(); break;
+    }
+    }
+
+    const MCSection *S = getObjFileLowering().getSectionForConstant(Kind);
+    
+    // The number of sections are small, just do a linear search from the
+    // last section to the first.
+    bool Found = false;
+    unsigned SecIdx = CPSections.size();
+    while (SecIdx != 0) {
+      if (CPSections[--SecIdx].S == S) {
+        Found = true;
+        break;
+      }
+    }
+    if (!Found) {
+      SecIdx = CPSections.size();
+      CPSections.push_back(SectionCPs(S, Align));
+    }
+
+    if (Align > CPSections[SecIdx].Alignment)
+      CPSections[SecIdx].Alignment = Align;
+    CPSections[SecIdx].CPEs.push_back(i);
+  }
+
+  // Now print stuff into the calculated sections.
+  for (unsigned i = 0, e = CPSections.size(); i != e; ++i) {
+    OutStreamer.SwitchSection(CPSections[i].S);
+    EmitAlignment(Log2_32(CPSections[i].Alignment));
+
+    unsigned Offset = 0;
+    for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) {
+      unsigned CPI = CPSections[i].CPEs[j];
+      MachineConstantPoolEntry CPE = CP[CPI];
+
+      // Emit inter-object padding for alignment.
+      unsigned AlignMask = CPE.getAlignment() - 1;
+      unsigned NewOffset = (Offset + AlignMask) & ~AlignMask;
+      OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/);
+
+      const Type *Ty = CPE.getType();
+      Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
+      OutStreamer.EmitLabel(GetCPISymbol(CPI));
+
+      if (CPE.isMachineConstantPoolEntry())
+        EmitMachineConstantPoolValue(CPE.Val.MachineCPVal);
+      else
+        EmitGlobalConstant(CPE.Val.ConstVal);
+    }
+  }
+}
+
+/// EmitJumpTableInfo - Print assembly representations of the jump tables used
+/// by the current function to the current output stream.  
+///
+void AsmPrinter::EmitJumpTableInfo() {
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  if (MJTI == 0) return;
+  if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return;
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  if (JT.empty()) return;
+
+  // Pick the directive to use to print the jump table entries, and switch to 
+  // the appropriate section.
+  const Function *F = MF->getFunction();
+  bool JTInDiffSection = false;
+  if (// In PIC mode, we need to emit the jump table to the same section as the
+      // function body itself, otherwise the label differences won't make sense.
+      // FIXME: Need a better predicate for this: what about custom entries?
+      MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 ||
+      // We should also do if the section name is NULL or function is declared
+      // in discardable section
+      // FIXME: this isn't the right predicate, should be based on the MCSection
+      // for the function.
+      F->isWeakForLinker()) {
+    OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM));
+  } else {
+    // Otherwise, drop it in the readonly section.
+    const MCSection *ReadOnlySection = 
+      getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly());
+    OutStreamer.SwitchSection(ReadOnlySection);
+    JTInDiffSection = true;
+  }
+
+  EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData())));
+  
+  for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
+    const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+    
+    // If this jump table was deleted, ignore it. 
+    if (JTBBs.empty()) continue;
+
+    // For the EK_LabelDifference32 entry, if the target supports .set, emit a
+    // .set directive for each unique entry.  This reduces the number of
+    // relocations the assembler will generate for the jump table.
+    if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 &&
+        MAI->hasSetDirective()) {
+      SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets;
+      const TargetLowering *TLI = TM.getTargetLowering();
+      const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext);
+      for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) {
+        const MachineBasicBlock *MBB = JTBBs[ii];
+        if (!EmittedSets.insert(MBB)) continue;
+        
+        // .set LJTSet, LBB32-base
+        const MCExpr *LHS =
+          MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+        OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()),
+                                MCBinaryExpr::CreateSub(LHS, Base, OutContext));
+      }
+    }          
+    
+    // On some targets (e.g. Darwin) we want to emit two consecutive labels
+    // before each jump table.  The first label is never referenced, but tells
+    // the assembler and linker the extents of the jump table object.  The
+    // second label is actually referenced by the code.
+    if (JTInDiffSection && MAI->getLinkerPrivateGlobalPrefix()[0])
+      // FIXME: This doesn't have to have any specific name, just any randomly
+      // named and numbered 'l' label would work.  Simplify GetJTISymbol.
+      OutStreamer.EmitLabel(GetJTISymbol(JTI, true));
+
+    OutStreamer.EmitLabel(GetJTISymbol(JTI));
+
+    for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii)
+      EmitJumpTableEntry(MJTI, JTBBs[ii], JTI);
+  }
+}
+
+/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the
+/// current stream.
+void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                                    const MachineBasicBlock *MBB,
+                                    unsigned UID) const {
+  assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block");
+  const MCExpr *Value = 0;
+  switch (MJTI->getEntryKind()) {
+  case MachineJumpTableInfo::EK_Inline:
+    llvm_unreachable("Cannot emit EK_Inline jump table entry"); break;
+  case MachineJumpTableInfo::EK_Custom32:
+    Value = TM.getTargetLowering()->LowerCustomJumpTableEntry(MJTI, MBB, UID,
+                                                              OutContext);
+    break;
+  case MachineJumpTableInfo::EK_BlockAddress:
+    // EK_BlockAddress - Each entry is a plain address of block, e.g.:
+    //     .word LBB123
+    Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+    break;
+  case MachineJumpTableInfo::EK_GPRel32BlockAddress: {
+    // EK_GPRel32BlockAddress - Each entry is an address of block, encoded
+    // with a relocation as gp-relative, e.g.:
+    //     .gprel32 LBB123
+    MCSymbol *MBBSym = MBB->getSymbol();
+    OutStreamer.EmitGPRel32Value(MCSymbolRefExpr::Create(MBBSym, OutContext));
+    return;
+  }
+
+  case MachineJumpTableInfo::EK_LabelDifference32: {
+    // EK_LabelDifference32 - Each entry is the address of the block minus
+    // the address of the jump table.  This is used for PIC jump tables where
+    // gprel32 is not supported.  e.g.:
+    //      .word LBB123 - LJTI1_2
+    // If the .set directive is supported, this is emitted as:
+    //      .set L4_5_set_123, LBB123 - LJTI1_2
+    //      .word L4_5_set_123
+    
+    // If we have emitted set directives for the jump table entries, print 
+    // them rather than the entries themselves.  If we're emitting PIC, then
+    // emit the table entries as differences between two text section labels.
+    if (MAI->hasSetDirective()) {
+      // If we used .set, reference the .set's symbol.
+      Value = MCSymbolRefExpr::Create(GetJTSetSymbol(UID, MBB->getNumber()),
+                                      OutContext);
+      break;
+    }
+    // Otherwise, use the difference as the jump table entry.
+    Value = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+    const MCExpr *JTI = MCSymbolRefExpr::Create(GetJTISymbol(UID), OutContext);
+    Value = MCBinaryExpr::CreateSub(Value, JTI, OutContext);
+    break;
+  }
+  }
+  
+  assert(Value && "Unknown entry kind!");
+ 
+  unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData());
+  OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0);
+}
+
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM.  If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+  if (GV->getName() == "llvm.used") {
+    if (MAI->hasNoDeadStrip())    // No need to emit this at all.
+      EmitLLVMUsedList(GV->getInitializer());
+    return true;
+  }
+
+  // Ignore debug and non-emitted data.  This handles llvm.compiler.used.
+  if (GV->getSection() == "llvm.metadata" ||
+      GV->hasAvailableExternallyLinkage())
+    return true;
+  
+  if (!GV->hasAppendingLinkage()) return false;
+
+  assert(GV->hasInitializer() && "Not a special LLVM global!");
+  
+  const TargetData *TD = TM.getTargetData();
+  unsigned Align = Log2_32(TD->getPointerPrefAlignment());
+  if (GV->getName() == "llvm.global_ctors") {
+    OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection());
+    EmitAlignment(Align);
+    EmitXXStructorList(GV->getInitializer());
+    
+    if (TM.getRelocationModel() == Reloc::Static &&
+        MAI->hasStaticCtorDtorReferenceInStaticMode()) {
+      StringRef Sym(".constructors_used");
+      OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
+                                      MCSA_Reference);
+    }
+    return true;
+  } 
+  
+  if (GV->getName() == "llvm.global_dtors") {
+    OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection());
+    EmitAlignment(Align);
+    EmitXXStructorList(GV->getInitializer());
+
+    if (TM.getRelocationModel() == Reloc::Static &&
+        MAI->hasStaticCtorDtorReferenceInStaticMode()) {
+      StringRef Sym(".destructors_used");
+      OutStreamer.EmitSymbolAttribute(OutContext.GetOrCreateSymbol(Sym),
+                                      MCSA_Reference);
+    }
+    return true;
+  }
+  
+  return false;
+}
+
+/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
+/// global in the specified llvm.used list for which emitUsedDirectiveFor
+/// is true, as being used with this directive.
+void AsmPrinter::EmitLLVMUsedList(Constant *List) {
+  // Should be an array of 'i8*'.
+  ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+  if (InitList == 0) return;
+  
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+    const GlobalValue *GV =
+      dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts());
+    if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang))
+      OutStreamer.EmitSymbolAttribute(Mang->getSymbol(GV), MCSA_NoDeadStrip);
+  }
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list.  This just prints out the 
+/// function pointers, ignoring the init priority.
+void AsmPrinter::EmitXXStructorList(Constant *List) {
+  // Should be an array of '{ int, void ()* }' structs.  The first value is the
+  // init priority, which we ignore.
+  if (!isa<ConstantArray>(List)) return;
+  ConstantArray *InitList = cast<ConstantArray>(List);
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+      if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
+
+      if (CS->getOperand(1)->isNullValue())
+        return;  // Found a null terminator, exit printing.
+      // Emit the function pointer.
+      EmitGlobalConstant(CS->getOperand(1));
+    }
+}
+
+//===--------------------------------------------------------------------===//
+// Emission and print routines
+//
+
+/// EmitInt8 - Emit a byte directive and value.
+///
+void AsmPrinter::EmitInt8(int Value) const {
+  OutStreamer.EmitIntValue(Value, 1, 0/*addrspace*/);
+}
+
+/// EmitInt16 - Emit a short directive and value.
+///
+void AsmPrinter::EmitInt16(int Value) const {
+  OutStreamer.EmitIntValue(Value, 2, 0/*addrspace*/);
+}
+
+/// EmitInt32 - Emit a long directive and value.
+///
+void AsmPrinter::EmitInt32(int Value) const {
+  OutStreamer.EmitIntValue(Value, 4, 0/*addrspace*/);
+}
+
+/// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size
+/// in bytes of the directive is specified by Size and Hi/Lo specify the
+/// labels.  This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
+                                     unsigned Size) const {
+  // Get the Hi-Lo expression.
+  const MCExpr *Diff = 
+    MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext),
+                            MCSymbolRefExpr::Create(Lo, OutContext),
+                            OutContext);
+  
+  if (!MAI->hasSetDirective()) {
+    OutStreamer.EmitValue(Diff, Size, 0/*AddrSpace*/);
+    return;
+  }
+
+  // Otherwise, emit with .set (aka assignment).
+  MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+  OutStreamer.EmitAssignment(SetLabel, Diff);
+  OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/);
+}
+
+/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" 
+/// where the size in bytes of the directive is specified by Size and Hi/Lo
+/// specify the labels.  This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset,
+                                           const MCSymbol *Lo, unsigned Size) 
+  const {
+  
+  // Emit Hi+Offset - Lo
+  // Get the Hi+Offset expression.
+  const MCExpr *Plus =
+    MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), 
+                            MCConstantExpr::Create(Offset, OutContext),
+                            OutContext);
+  
+  // Get the Hi+Offset-Lo expression.
+  const MCExpr *Diff = 
+    MCBinaryExpr::CreateSub(Plus,
+                            MCSymbolRefExpr::Create(Lo, OutContext),
+                            OutContext);
+  
+  if (!MAI->hasSetDirective()) 
+    OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/);
+  else {
+    // Otherwise, emit with .set (aka assignment).
+    MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++);
+    OutStreamer.EmitAssignment(SetLabel, Diff);
+    OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/);
+  }
+}
+
+/// EmitLabelPlusOffset - Emit something like ".long Label+Offset" 
+/// where the size in bytes of the directive is specified by Size and Label
+/// specifies the label.  This implicitly uses .set if it is available.
+void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
+                                      unsigned Size) 
+  const {
+  
+  // Emit Label+Offset
+  const MCExpr *Plus =
+    MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext), 
+                            MCConstantExpr::Create(Offset, OutContext),
+                            OutContext);
+  
+  OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/);
+}
+    
+
+//===----------------------------------------------------------------------===//
+
+// EmitAlignment - Emit an alignment directive to the specified power of
+// two boundary.  For example, if you pass in 3 here, you will get an 8
+// byte alignment.  If a global value is specified, and if that global has
+// an explicit alignment requested, it will override the alignment request
+// if required for correctness.
+//
+void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const {
+  if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits);
+  
+  if (NumBits == 0) return;   // 1-byte aligned: no need to emit alignment.
+  
+  if (getCurrentSection()->getKind().isText())
+    OutStreamer.EmitCodeAlignment(1 << NumBits);
+  else
+    OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0);
+}
+
+//===----------------------------------------------------------------------===//
+// Constant emission.
+//===----------------------------------------------------------------------===//
+
+/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
+///
+static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) {
+  MCContext &Ctx = AP.OutContext;
+  
+  if (CV->isNullValue() || isa<UndefValue>(CV))
+    return MCConstantExpr::Create(0, Ctx);
+
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
+    return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
+  
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+    return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx);
+
+  if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV))
+    return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx);
+  
+  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+  if (CE == 0) {
+    llvm_unreachable("Unknown constant value to lower!");
+    return MCConstantExpr::Create(0, Ctx);
+  }
+  
+  switch (CE->getOpcode()) {
+  default:
+    // If the code isn't optimized, there may be outstanding folding
+    // opportunities. Attempt to fold the expression using TargetData as a
+    // last resort before giving up.
+    if (Constant *C =
+          ConstantFoldConstantExpression(CE, AP.TM.getTargetData()))
+      if (C != CE)
+        return LowerConstant(C, AP);
+
+    // Otherwise report the problem to the user.
+    {
+      std::string S;
+      raw_string_ostream OS(S);
+      OS << "Unsupported expression in static initializer: ";
+      WriteAsOperand(OS, CE, /*PrintType=*/false,
+                     !AP.MF ? 0 : AP.MF->getFunction()->getParent());
+      report_fatal_error(OS.str());
+    }
+    return MCConstantExpr::Create(0, Ctx);
+  case Instruction::GetElementPtr: {
+    const TargetData &TD = *AP.TM.getTargetData();
+    // Generate a symbolic expression for the byte address
+    const Constant *PtrVal = CE->getOperand(0);
+    SmallVector<Value*, 8> IdxVec(CE->op_begin()+1, CE->op_end());
+    int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), &IdxVec[0],
+                                         IdxVec.size());
+    
+    const MCExpr *Base = LowerConstant(CE->getOperand(0), AP);
+    if (Offset == 0)
+      return Base;
+    
+    // Truncate/sext the offset to the pointer size.
+    if (TD.getPointerSizeInBits() != 64) {
+      int SExtAmount = 64-TD.getPointerSizeInBits();
+      Offset = (Offset << SExtAmount) >> SExtAmount;
+    }
+    
+    return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
+                                   Ctx);
+  }
+      
+  case Instruction::Trunc:
+    // We emit the value and depend on the assembler to truncate the generated
+    // expression properly.  This is important for differences between
+    // blockaddress labels.  Since the two labels are in the same function, it
+    // is reasonable to treat their delta as a 32-bit value.
+    // FALL THROUGH.
+  case Instruction::BitCast:
+    return LowerConstant(CE->getOperand(0), AP);
+
+  case Instruction::IntToPtr: {
+    const TargetData &TD = *AP.TM.getTargetData();
+    // Handle casts to pointers by changing them into casts to the appropriate
+    // integer type.  This promotes constant folding and simplifies this code.
+    Constant *Op = CE->getOperand(0);
+    Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()),
+                                      false/*ZExt*/);
+    return LowerConstant(Op, AP);
+  }
+    
+  case Instruction::PtrToInt: {
+    const TargetData &TD = *AP.TM.getTargetData();
+    // Support only foldable casts to/from pointers that can be eliminated by
+    // changing the pointer to the appropriately sized integer type.
+    Constant *Op = CE->getOperand(0);
+    const Type *Ty = CE->getType();
+
+    const MCExpr *OpExpr = LowerConstant(Op, AP);
+
+    // We can emit the pointer value into this slot if the slot is an
+    // integer slot equal to the size of the pointer.
+    if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType()))
+      return OpExpr;
+
+    // Otherwise the pointer is smaller than the resultant integer, mask off
+    // the high bits so we are sure to get a proper truncation if the input is
+    // a constant expr.
+    unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType());
+    const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
+    return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
+  }
+      
+  // The MC library also has a right-shift operator, but it isn't consistently
+  // signed or unsigned between different targets.
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::SDiv:
+  case Instruction::SRem:
+  case Instruction::Shl:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor: {
+    const MCExpr *LHS = LowerConstant(CE->getOperand(0), AP);
+    const MCExpr *RHS = LowerConstant(CE->getOperand(1), AP);
+    switch (CE->getOpcode()) {
+    default: llvm_unreachable("Unknown binary operator constant cast expr");
+    case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+    case Instruction::Sub: return MCBinaryExpr::CreateSub(LHS, RHS, Ctx);
+    case Instruction::Mul: return MCBinaryExpr::CreateMul(LHS, RHS, Ctx);
+    case Instruction::SDiv: return MCBinaryExpr::CreateDiv(LHS, RHS, Ctx);
+    case Instruction::SRem: return MCBinaryExpr::CreateMod(LHS, RHS, Ctx);
+    case Instruction::Shl: return MCBinaryExpr::CreateShl(LHS, RHS, Ctx);
+    case Instruction::And: return MCBinaryExpr::CreateAnd(LHS, RHS, Ctx);
+    case Instruction::Or:  return MCBinaryExpr::CreateOr (LHS, RHS, Ctx);
+    case Instruction::Xor: return MCBinaryExpr::CreateXor(LHS, RHS, Ctx);
+    }
+  }
+  }
+}
+
+static void EmitGlobalConstantImpl(const Constant *C, unsigned AddrSpace,
+                                   AsmPrinter &AP);
+
+static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace,
+                                    AsmPrinter &AP) {
+  if (AddrSpace != 0 || !CA->isString()) {
+    // Not a string.  Print the values in successive locations
+    for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+      EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP);
+    return;
+  }
+  
+  // Otherwise, it can be emitted as .ascii.
+  SmallVector<char, 128> TmpVec;
+  TmpVec.reserve(CA->getNumOperands());
+  for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+    TmpVec.push_back(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+
+  AP.OutStreamer.EmitBytes(StringRef(TmpVec.data(), TmpVec.size()), AddrSpace);
+}
+
+static void EmitGlobalConstantVector(const ConstantVector *CV,
+                                     unsigned AddrSpace, AsmPrinter &AP) {
+  for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
+    EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
+}
+
+static void EmitGlobalConstantStruct(const ConstantStruct *CS,
+                                     unsigned AddrSpace, AsmPrinter &AP) {
+  // Print the fields in successive locations. Pad to align if needed!
+  const TargetData *TD = AP.TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(CS->getType());
+  const StructLayout *Layout = TD->getStructLayout(CS->getType());
+  uint64_t SizeSoFar = 0;
+  for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) {
+    const Constant *Field = CS->getOperand(i);
+
+    // Check if padding is needed and insert one or more 0s.
+    uint64_t FieldSize = TD->getTypeAllocSize(Field->getType());
+    uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1))
+                        - Layout->getElementOffset(i)) - FieldSize;
+    SizeSoFar += FieldSize + PadSize;
+
+    // Now print the actual field value.
+    EmitGlobalConstantImpl(Field, AddrSpace, AP);
+
+    // Insert padding - this may include padding to increase the size of the
+    // current field up to the ABI size (if the struct is not packed) as well
+    // as padding to ensure that the next field starts at the right offset.
+    AP.OutStreamer.EmitZeros(PadSize, AddrSpace);
+  }
+  assert(SizeSoFar == Layout->getSizeInBytes() &&
+         "Layout of constant struct may be incorrect!");
+}
+
+static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace,
+                                 AsmPrinter &AP) {
+  // FP Constants are printed as integer constants to avoid losing
+  // precision.
+  if (CFP->getType()->isDoubleTy()) {
+    if (AP.isVerbose()) {
+      double Val = CFP->getValueAPF().convertToDouble();
+      AP.OutStreamer.GetCommentOS() << "double " << Val << '\n';
+    }
+
+    uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+    AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+    return;
+  }
+  
+  if (CFP->getType()->isFloatTy()) {
+    if (AP.isVerbose()) {
+      float Val = CFP->getValueAPF().convertToFloat();
+      AP.OutStreamer.GetCommentOS() << "float " << Val << '\n';
+    }
+    uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+    AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace);
+    return;
+  }
+  
+  if (CFP->getType()->isX86_FP80Ty()) {
+    // all long double variants are printed as hex
+    // API needed to prevent premature destruction
+    APInt API = CFP->getValueAPF().bitcastToAPInt();
+    const uint64_t *p = API.getRawData();
+    if (AP.isVerbose()) {
+      // Convert to double so we can print the approximate val as a comment.
+      APFloat DoubleVal = CFP->getValueAPF();
+      bool ignored;
+      DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+                        &ignored);
+      AP.OutStreamer.GetCommentOS() << "x86_fp80 ~= "
+        << DoubleVal.convertToDouble() << '\n';
+    }
+    
+    if (AP.TM.getTargetData()->isBigEndian()) {
+      AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
+      AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+    } else {
+      AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+      AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace);
+    }
+    
+    // Emit the tail padding for the long double.
+    const TargetData &TD = *AP.TM.getTargetData();
+    AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) -
+                             TD.getTypeStoreSize(CFP->getType()), AddrSpace);
+    return;
+  }
+  
+  assert(CFP->getType()->isPPC_FP128Ty() &&
+         "Floating point constant type not handled");
+  // All long double variants are printed as hex
+  // API needed to prevent premature destruction.
+  APInt API = CFP->getValueAPF().bitcastToAPInt();
+  const uint64_t *p = API.getRawData();
+  if (AP.TM.getTargetData()->isBigEndian()) {
+    AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+    AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
+  } else {
+    AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace);
+    AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace);
+  }
+}
+
+static void EmitGlobalConstantLargeInt(const ConstantInt *CI,
+                                       unsigned AddrSpace, AsmPrinter &AP) {
+  const TargetData *TD = AP.TM.getTargetData();
+  unsigned BitWidth = CI->getBitWidth();
+  assert((BitWidth & 63) == 0 && "only support multiples of 64-bits");
+
+  // We don't expect assemblers to support integer data directives
+  // for more than 64 bits, so we emit the data in at most 64-bit
+  // quantities at a time.
+  const uint64_t *RawData = CI->getValue().getRawData();
+  for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+    uint64_t Val = TD->isBigEndian() ? RawData[e - i - 1] : RawData[i];
+    AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace);
+  }
+}
+
+static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace,
+                                   AsmPrinter &AP) {
+  if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) {
+    uint64_t Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+    return AP.OutStreamer.EmitZeros(Size, AddrSpace);
+  }
+
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+    switch (Size) {
+    case 1:
+    case 2:
+    case 4:
+    case 8:
+      if (AP.isVerbose())
+        AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue());
+      AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace);
+      return;
+    default:
+      EmitGlobalConstantLargeInt(CI, AddrSpace, AP);
+      return;
+    }
+  }
+  
+  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV))
+    return EmitGlobalConstantArray(CVA, AddrSpace, AP);
+  
+  if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV))
+    return EmitGlobalConstantStruct(CVS, AddrSpace, AP);
+
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV))
+    return EmitGlobalConstantFP(CFP, AddrSpace, AP);
+
+  if (isa<ConstantPointerNull>(CV)) {
+    unsigned Size = AP.TM.getTargetData()->getTypeAllocSize(CV->getType());
+    AP.OutStreamer.EmitIntValue(0, Size, AddrSpace);
+    return;
+  }
+  
+  if (const ConstantVector *V = dyn_cast<ConstantVector>(CV))
+    return EmitGlobalConstantVector(V, AddrSpace, AP);
+  
+  // Otherwise, it must be a ConstantExpr.  Lower it to an MCExpr, then emit it
+  // thread the streamer with EmitValue.
+  AP.OutStreamer.EmitValue(LowerConstant(CV, AP),
+                         AP.TM.getTargetData()->getTypeAllocSize(CV->getType()),
+                           AddrSpace);
+}
+
+/// EmitGlobalConstant - Print a general LLVM constant to the .s file.
+void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) {
+  uint64_t Size = TM.getTargetData()->getTypeAllocSize(CV->getType());
+  if (Size)
+    EmitGlobalConstantImpl(CV, AddrSpace, *this);
+  else if (MAI->hasSubsectionsViaSymbols()) {
+    // If the global has zero size, emit a single byte so that two labels don't
+    // look like they are at the same location.
+    OutStreamer.EmitIntValue(0, 1, AddrSpace);
+  }
+}
+
+void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+  // Target doesn't support this yet!
+  llvm_unreachable("Target does not support EmitMachineConstantPoolValue");
+}
+
+void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const {
+  if (Offset > 0)
+    OS << '+' << Offset;
+  else if (Offset < 0)
+    OS << Offset;
+}
+
+//===----------------------------------------------------------------------===//
+// Symbol Lowering Routines.
+//===----------------------------------------------------------------------===//
+
+/// GetTempSymbol - Return the MCSymbol corresponding to the assembler
+/// temporary label with the specified stem and unique ID.
+MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name, unsigned ID) const {
+  return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
+                                      Name + Twine(ID));
+}
+
+/// GetTempSymbol - Return an assembler temporary label with the specified
+/// stem.
+MCSymbol *AsmPrinter::GetTempSymbol(StringRef Name) const {
+  return OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix())+
+                                      Name);
+}
+
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const {
+  return MMI->getAddrLabelSymbol(BA->getBasicBlock());
+}
+
+MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
+  return MMI->getAddrLabelSymbol(BB);
+}
+
+/// GetCPISymbol - Return the symbol for the specified constant pool entry.
+MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
+  return OutContext.GetOrCreateSymbol
+    (Twine(MAI->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
+     + "_" + Twine(CPID));
+}
+
+/// GetJTISymbol - Return the symbol for the specified jump table entry.
+MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
+  return MF->getJTISymbol(JTID, OutContext, isLinkerPrivate);
+}
+
+/// GetJTSetSymbol - Return the symbol for the specified jump table .set
+/// FIXME: privatize to AsmPrinter.
+MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
+  return OutContext.GetOrCreateSymbol
+  (Twine(MAI->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
+   Twine(UID) + "_set_" + Twine(MBBID));
+}
+
+/// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with
+/// global value name as its base, with the specified suffix, and where the
+/// symbol is forced to have private linkage if ForcePrivate is true.
+MCSymbol *AsmPrinter::GetSymbolWithGlobalValueBase(const GlobalValue *GV,
+                                                   StringRef Suffix,
+                                                   bool ForcePrivate) const {
+  SmallString<60> NameStr;
+  Mang->getNameWithPrefix(NameStr, GV, ForcePrivate);
+  NameStr.append(Suffix.begin(), Suffix.end());
+  return OutContext.GetOrCreateSymbol(NameStr.str());
+}
+
+/// GetExternalSymbolSymbol - Return the MCSymbol for the specified
+/// ExternalSymbol.
+MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const {
+  SmallString<60> NameStr;
+  Mang->getNameWithPrefix(NameStr, Sym);
+  return OutContext.GetOrCreateSymbol(NameStr.str());
+}  
+
+
+
+/// PrintParentLoopComment - Print comments about parent loops of this one.
+static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+                                   unsigned FunctionNumber) {
+  if (Loop == 0) return;
+  PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber);
+  OS.indent(Loop->getLoopDepth()*2)
+    << "Parent Loop BB" << FunctionNumber << "_"
+    << Loop->getHeader()->getNumber()
+    << " Depth=" << Loop->getLoopDepth() << '\n';
+}
+
+
+/// PrintChildLoopComment - Print comments about child loops within
+/// the loop for this basic block, with nesting.
+static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop,
+                                  unsigned FunctionNumber) {
+  // Add child loop information
+  for (MachineLoop::iterator CL = Loop->begin(), E = Loop->end();CL != E; ++CL){
+    OS.indent((*CL)->getLoopDepth()*2)
+      << "Child Loop BB" << FunctionNumber << "_"
+      << (*CL)->getHeader()->getNumber() << " Depth " << (*CL)->getLoopDepth()
+      << '\n';
+    PrintChildLoopComment(OS, *CL, FunctionNumber);
+  }
+}
+
+/// EmitBasicBlockLoopComments - Pretty-print comments for basic blocks.
+static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB,
+                                       const MachineLoopInfo *LI,
+                                       const AsmPrinter &AP) {
+  // Add loop depth information
+  const MachineLoop *Loop = LI->getLoopFor(&MBB);
+  if (Loop == 0) return;
+  
+  MachineBasicBlock *Header = Loop->getHeader();
+  assert(Header && "No header for loop");
+  
+  // If this block is not a loop header, just print out what is the loop header
+  // and return.
+  if (Header != &MBB) {
+    AP.OutStreamer.AddComment("  in Loop: Header=BB" +
+                              Twine(AP.getFunctionNumber())+"_" +
+                              Twine(Loop->getHeader()->getNumber())+
+                              " Depth="+Twine(Loop->getLoopDepth()));
+    return;
+  }
+  
+  // Otherwise, it is a loop header.  Print out information about child and
+  // parent loops.
+  raw_ostream &OS = AP.OutStreamer.GetCommentOS();
+  
+  PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber()); 
+  
+  OS << "=>";
+  OS.indent(Loop->getLoopDepth()*2-2);
+  
+  OS << "This ";
+  if (Loop->empty())
+    OS << "Inner ";
+  OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n';
+  
+  PrintChildLoopComment(OS, Loop, AP.getFunctionNumber());
+}
+
+
+/// EmitBasicBlockStart - This method prints the label for the specified
+/// MachineBasicBlock, an alignment (if present) and a comment describing
+/// it if appropriate.
+void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
+  // Emit an alignment directive for this block, if needed.
+  if (unsigned Align = MBB->getAlignment())
+    EmitAlignment(Log2_32(Align));
+
+  // If the block has its address taken, emit any labels that were used to
+  // reference the block.  It is possible that there is more than one label
+  // here, because multiple LLVM BB's may have been RAUW'd to this block after
+  // the references were generated.
+  if (MBB->hasAddressTaken()) {
+    const BasicBlock *BB = MBB->getBasicBlock();
+    if (isVerbose())
+      OutStreamer.AddComment("Block address taken");
+    
+    std::vector<MCSymbol*> Syms = MMI->getAddrLabelSymbolToEmit(BB);
+
+    for (unsigned i = 0, e = Syms.size(); i != e; ++i)
+      OutStreamer.EmitLabel(Syms[i]);
+  }
+
+  // Print the main label for the block.
+  if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
+    if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+      if (const BasicBlock *BB = MBB->getBasicBlock())
+        if (BB->hasName())
+          OutStreamer.AddComment("%" + BB->getName());
+      
+      EmitBasicBlockLoopComments(*MBB, LI, *this);
+      
+      // NOTE: Want this comment at start of line, don't emit with AddComment.
+      OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" +
+                              Twine(MBB->getNumber()) + ":");
+    }
+  } else {
+    if (isVerbose()) {
+      if (const BasicBlock *BB = MBB->getBasicBlock())
+        if (BB->hasName())
+          OutStreamer.AddComment("%" + BB->getName());
+      EmitBasicBlockLoopComments(*MBB, LI, *this);
+    }
+
+    OutStreamer.EmitLabel(MBB->getSymbol());
+  }
+}
+
+void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility,
+                                bool IsDefinition) const {
+  MCSymbolAttr Attr = MCSA_Invalid;
+  
+  switch (Visibility) {
+  default: break;
+  case GlobalValue::HiddenVisibility:
+    if (IsDefinition)
+      Attr = MAI->getHiddenVisibilityAttr();
+    else
+      Attr = MAI->getHiddenDeclarationVisibilityAttr();
+    break;
+  case GlobalValue::ProtectedVisibility:
+    Attr = MAI->getProtectedVisibilityAttr();
+    break;
+  }
+
+  if (Attr != MCSA_Invalid)
+    OutStreamer.EmitSymbolAttribute(Sym, Attr);
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool AsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+  // If this is a landing pad, it isn't a fall through.  If it has no preds,
+  // then nothing falls through to it.
+  if (MBB->isLandingPad() || MBB->pred_empty())
+    return false;
+  
+  // If there isn't exactly one predecessor, it can't be a fall through.
+  MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+  ++PI2;
+  if (PI2 != MBB->pred_end())
+    return false;
+  
+  // The predecessor has to be immediately before this block.
+  const MachineBasicBlock *Pred = *PI;
+  
+  if (!Pred->isLayoutSuccessor(MBB))
+    return false;
+  
+  // If the block is completely empty, then it definitely does fall through.
+  if (Pred->empty())
+    return true;
+  
+  // Otherwise, check the last instruction.
+  const MachineInstr &LastInst = Pred->back();
+  return !LastInst.getDesc().isBarrier();
+}
+
+
+
+GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
+  if (!S->usesMetadata())
+    return 0;
+
+  gcp_map_type &GCMap = getGCMap(GCMetadataPrinters);
+  gcp_map_type::iterator GCPI = GCMap.find(S);
+  if (GCPI != GCMap.end())
+    return GCPI->second;
+  
+  const char *Name = S->getName().c_str();
+  
+  for (GCMetadataPrinterRegistry::iterator
+         I = GCMetadataPrinterRegistry::begin(),
+         E = GCMetadataPrinterRegistry::end(); I != E; ++I)
+    if (strcmp(Name, I->getName()) == 0) {
+      GCMetadataPrinter *GMP = I->instantiate();
+      GMP->S = S;
+      GCMap.insert(std::make_pair(S, GMP));
+      return GMP;
+    }
+  
+  report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name));
+  return 0;
+}
+
diff --git a/final/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/final/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
new file mode 100644
index 00000000000..98a1bf2f1ce
--- /dev/null
+++ b/final/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -0,0 +1,318 @@
+//===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Dwarf emissions parts of AsmPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Dwarf.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Dwarf Emission Helper Routines
+//===----------------------------------------------------------------------===//
+
+/// EmitSLEB128 - emit the specified signed leb128 value.
+void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
+  if (isVerbose() && Desc)
+    OutStreamer.AddComment(Desc);
+    
+  if (MAI->hasLEB128()) {
+    OutStreamer.EmitSLEB128IntValue(Value);
+    return;
+  }
+
+  // If we don't have .sleb128, emit as .bytes.
+  int Sign = Value >> (8 * sizeof(Value) - 1);
+  bool IsMore;
+  
+  do {
+    unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
+    Value >>= 7;
+    IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+    if (IsMore) Byte |= 0x80;
+    OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
+  } while (IsMore);
+}
+
+/// EmitULEB128 - emit the specified signed leb128 value.
+void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
+                             unsigned PadTo) const {
+  if (isVerbose() && Desc)
+    OutStreamer.AddComment(Desc);
+
+  // FIXME: Should we add a PadTo option to the streamer?
+  if (MAI->hasLEB128() && PadTo == 0) {
+    OutStreamer.EmitULEB128IntValue(Value); 
+    return;
+  }
+  
+  // If we don't have .uleb128 or we want to emit padding, emit as .bytes.
+  do {
+    unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
+    Value >>= 7;
+    if (Value || PadTo != 0) Byte |= 0x80;
+    OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
+  } while (Value);
+
+  if (PadTo) {
+    if (PadTo > 1)
+      OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/);
+    OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/);
+  }
+}
+
+/// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value.
+void AsmPrinter::EmitCFAByte(unsigned Val) const {
+  if (isVerbose()) {
+    if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64)
+      OutStreamer.AddComment("DW_CFA_offset + Reg (" + 
+                             Twine(Val-dwarf::DW_CFA_offset) + ")");
+    else
+      OutStreamer.AddComment(dwarf::CallFrameString(Val));
+  }
+  OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
+}
+
+static const char *DecodeDWARFEncoding(unsigned Encoding) {
+  switch (Encoding) {
+  case dwarf::DW_EH_PE_absptr: return "absptr";
+  case dwarf::DW_EH_PE_omit:   return "omit";
+  case dwarf::DW_EH_PE_pcrel:  return "pcrel";
+  case dwarf::DW_EH_PE_udata4: return "udata4";
+  case dwarf::DW_EH_PE_udata8: return "udata8";
+  case dwarf::DW_EH_PE_sdata4: return "sdata4";
+  case dwarf::DW_EH_PE_sdata8: return "sdata8";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: return "pcrel udata4";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: return "pcrel sdata4";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: return "pcrel udata8";
+  case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: return "pcrel sdata8";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4:
+    return "indirect pcrel udata4";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4:
+    return "indirect pcrel sdata4";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8:
+    return "indirect pcrel udata8";
+  case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8:
+    return "indirect pcrel sdata8";
+  }
+  
+  return "<unknown encoding>";
+}
+
+
+/// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an
+/// encoding.  If verbose assembly output is enabled, we output comments
+/// describing the encoding.  Desc is an optional string saying what the
+/// encoding is specifying (e.g. "LSDA").
+void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const {
+  if (isVerbose()) {
+    if (Desc != 0)
+      OutStreamer.AddComment(Twine(Desc)+" Encoding = " +
+                             Twine(DecodeDWARFEncoding(Val)));
+    else
+      OutStreamer.AddComment(Twine("Encoding = ") +
+                             DecodeDWARFEncoding(Val));
+  }
+  
+  OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/);
+}
+
+/// GetSizeOfEncodedValue - Return the size of the encoding in bytes.
+unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
+  if (Encoding == dwarf::DW_EH_PE_omit)
+    return 0;
+  
+  switch (Encoding & 0x07) {
+  default: assert(0 && "Invalid encoded value.");
+  case dwarf::DW_EH_PE_absptr: return TM.getTargetData()->getPointerSize();
+  case dwarf::DW_EH_PE_udata2: return 2;
+  case dwarf::DW_EH_PE_udata4: return 4;
+  case dwarf::DW_EH_PE_udata8: return 8;
+  }
+}
+
+void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+  
+  const MCExpr *Exp =
+    TLOF.getExprForDwarfReference(Sym, Mang, MMI, Encoding, OutStreamer);
+  OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding));
+}
+
+void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+  
+  const MCExpr *Exp =
+    TLOF.getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, OutStreamer);
+  OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0);
+}
+
+/// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its
+/// section.  This can be done with a special directive if the target supports
+/// it (e.g. cygwin) or by emitting it as an offset from a label at the start
+/// of the section.
+///
+/// SectionLabel is a temporary label emitted at the start of the section that
+/// Label lives in.
+void AsmPrinter::EmitSectionOffset(const MCSymbol *Label,
+                                   const MCSymbol *SectionLabel) const {
+  // On COFF targets, we have to emit the special .secrel32 directive.
+  if (const char *SecOffDir = MAI->getDwarfSectionOffsetDirective()) {
+    // FIXME: MCize.
+    OutStreamer.EmitRawText(SecOffDir + Twine(Label->getName()));
+    return;
+  }
+  
+  // Get the section that we're referring to, based on SectionLabel.
+  const MCSection &Section = SectionLabel->getSection();
+  
+  // If Label has already been emitted, verify that it is in the same section as
+  // section label for sanity.
+  assert((!Label->isInSection() || &Label->getSection() == &Section) &&
+         "Section offset using wrong section base for label");
+  
+  // If the section in question will end up with an address of 0 anyway, we can
+  // just emit an absolute reference to save a relocation.
+  if (Section.isBaseAddressKnownZero()) {
+    OutStreamer.EmitSymbolValue(Label, 4, 0/*AddrSpace*/);
+    return;
+  }
+  
+  // Otherwise, emit it as a label difference from the start of the section.
+  EmitLabelDifference(Label, SectionLabel, 4);
+}
+
+//===----------------------------------------------------------------------===//
+// Dwarf Lowering Routines
+//===----------------------------------------------------------------------===//
+
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves,
+                                MCSymbol *BaseLabel, bool isEH) const {
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+  
+  int stackGrowth = TM.getTargetData()->getPointerSize();
+  if (TM.getFrameLowering()->getStackGrowthDirection() !=
+      TargetFrameLowering::StackGrowsUp)
+    stackGrowth *= -1;
+  
+  for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+    const MachineMove &Move = Moves[i];
+    MCSymbol *Label = Move.getLabel();
+    // Throw out move if the label is invalid.
+    if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
+    
+    const MachineLocation &Dst = Move.getDestination();
+    const MachineLocation &Src = Move.getSource();
+    
+    // Advance row if new location.
+    if (BaseLabel && Label) {
+      MCSymbol *ThisSym = Label;
+      if (ThisSym != BaseLabel) {
+        EmitCFAByte(dwarf::DW_CFA_advance_loc4);
+        EmitLabelDifference(ThisSym, BaseLabel, 4);
+        BaseLabel = ThisSym;
+      }
+    }
+    
+    // If advancing cfa.
+    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+      assert(!Src.isReg() && "Machine move not supported yet.");
+      
+      if (Src.getReg() == MachineLocation::VirtualFP) {
+        EmitCFAByte(dwarf::DW_CFA_def_cfa_offset);
+      } else {
+        EmitCFAByte(dwarf::DW_CFA_def_cfa);
+        EmitULEB128(RI->getDwarfRegNum(Src.getReg(), isEH), "Register");
+      }
+      
+      EmitULEB128(-Src.getOffset(), "Offset");
+      continue;
+    }
+    
+    if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+      assert(Dst.isReg() && "Machine move not supported yet.");
+      EmitCFAByte(dwarf::DW_CFA_def_cfa_register);
+      EmitULEB128(RI->getDwarfRegNum(Dst.getReg(), isEH), "Register");
+      continue;
+    }
+    
+    unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH);
+    int Offset = Dst.getOffset() / stackGrowth;
+    
+    if (Offset < 0) {
+      EmitCFAByte(dwarf::DW_CFA_offset_extended_sf);
+      EmitULEB128(Reg, "Reg");
+      EmitSLEB128(Offset, "Offset");
+    } else if (Reg < 64) {
+      EmitCFAByte(dwarf::DW_CFA_offset + Reg);
+      EmitULEB128(Offset, "Offset");
+    } else {
+      EmitCFAByte(dwarf::DW_CFA_offset_extended);
+      EmitULEB128(Reg, "Reg");
+      EmitULEB128(Offset, "Offset");
+    }
+  }
+}
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+void AsmPrinter::EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const {
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+  int stackGrowth = TM.getTargetData()->getPointerSize();
+  if (TM.getFrameLowering()->getStackGrowthDirection() !=
+      TargetFrameLowering::StackGrowsUp)
+    stackGrowth *= -1;
+
+  for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+    const MachineMove &Move = Moves[i];
+    MCSymbol *Label = Move.getLabel();
+    // Throw out move if the label is invalid.
+    if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
+
+    const MachineLocation &Dst = Move.getDestination();
+    const MachineLocation &Src = Move.getSource();
+
+    // If advancing cfa.
+    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+      assert(!Src.isReg() && "Machine move not supported yet.");
+
+      if (Src.getReg() == MachineLocation::VirtualFP) {
+        OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset());
+      } else {
+        assert("Machine move not supported yet");
+        // Reg + Offset
+      }
+    } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+      assert(Dst.isReg() && "Machine move not supported yet.");
+      OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true));
+    } else {
+      assert(!Dst.isReg() && "Machine move not supported yet.");
+      OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true),
+                                Dst.getOffset());
+    }
+  }
+}
diff --git a/final/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/final/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
new file mode 100644
index 00000000000..c6166e2365a
--- /dev/null
+++ b/final/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -0,0 +1,409 @@
+//===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the inline assembler pieces of the AsmPrinter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  struct SrcMgrDiagInfo {
+    const MDNode *LocInfo;
+    LLVMContext::InlineAsmDiagHandlerTy DiagHandler;
+    void *DiagContext;
+  };
+}
+
+/// SrcMgrDiagHandler - This callback is invoked when the SourceMgr for an
+/// inline asm has an error in it.  diagInfo is a pointer to the SrcMgrDiagInfo
+/// struct above.
+static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
+  SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo);
+  assert(DiagInfo && "Diagnostic context not passed down?");
+  
+  // If the inline asm had metadata associated with it, pull out a location
+  // cookie corresponding to which line the error occurred on.
+  unsigned LocCookie = 0;
+  if (const MDNode *LocInfo = DiagInfo->LocInfo) {
+    unsigned ErrorLine = Diag.getLineNo()-1;
+    if (ErrorLine >= LocInfo->getNumOperands())
+      ErrorLine = 0;
+    
+    if (LocInfo->getNumOperands() != 0)
+      if (const ConstantInt *CI =
+          dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine)))
+        LocCookie = CI->getZExtValue();
+  }
+  
+  DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie);
+}
+
+/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
+  assert(!Str.empty() && "Can't emit empty inline asm block");
+
+  // Remember if the buffer is nul terminated or not so we can avoid a copy.
+  bool isNullTerminated = Str.back() == 0;
+  if (isNullTerminated)
+    Str = Str.substr(0, Str.size()-1);
+
+  // If the output streamer is actually a .s file, just emit the blob textually.
+  // This is useful in case the asm parser doesn't handle something but the
+  // system assembler does.
+  if (OutStreamer.hasRawTextSupport()) {
+    OutStreamer.EmitRawText(Str);
+    return;
+  }
+
+  SourceMgr SrcMgr;
+  SrcMgrDiagInfo DiagInfo;
+
+  // If the current LLVMContext has an inline asm handler, set it in SourceMgr.
+  LLVMContext &LLVMCtx = MMI->getModule()->getContext();
+  bool HasDiagHandler = false;
+  if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) {
+    // If the source manager has an issue, we arrange for SrcMgrDiagHandler
+    // to be invoked, getting DiagInfo passed into it.
+    DiagInfo.LocInfo = LocMDNode;
+    DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+    DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+    SrcMgr.setDiagHandler(SrcMgrDiagHandler, &DiagInfo);
+    HasDiagHandler = true;
+  }
+
+  MemoryBuffer *Buffer;
+  if (isNullTerminated)
+    Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>");
+  else
+    Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>");
+
+  // Tell SrcMgr about this buffer, it takes ownership of the buffer.
+  SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+
+  OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr,
+                                                  OutContext, OutStreamer,
+                                                  *MAI));
+  OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*Parser, TM));
+  if (!TAP)
+    report_fatal_error("Inline asm not supported by this streamer because"
+                       " we don't have an asm parser for this target\n");
+  Parser->setTargetParser(*TAP.get());
+
+  // Don't implicitly switch to the text section before the asm.
+  int Res = Parser->Run(/*NoInitialTextSection*/ true,
+                        /*NoFinalize*/ true);
+  if (Res && !HasDiagHandler)
+    report_fatal_error("Error parsing inline asm\n");
+}
+
+
+/// EmitInlineAsm - This method formats and emits the specified machine
+/// instruction that is an inline asm.
+void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
+  assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
+
+  unsigned NumOperands = MI->getNumOperands();
+
+  // Count the number of register definitions to find the asm string.
+  unsigned NumDefs = 0;
+  for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+       ++NumDefs)
+    assert(NumDefs != NumOperands-2 && "No asm string?");
+
+  assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+
+  // Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
+  const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+
+  // If this asmstr is empty, just print the #APP/#NOAPP markers.
+  // These are useful to see where empty asm's wound up.
+  if (AsmStr[0] == 0) {
+    // Don't emit the comments if writing to a .o file.
+    if (!OutStreamer.hasRawTextSupport()) return;
+
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmStart());
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmEnd());
+    return;
+  }
+
+  // Emit the #APP start marker.  This has to happen even if verbose-asm isn't
+  // enabled, so we use EmitRawText.
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmStart());
+
+  // Get the !srcloc metadata node if we have it, and decode the loc cookie from
+  // it.
+  unsigned LocCookie = 0;
+  const MDNode *LocMD = 0;
+  for (unsigned i = MI->getNumOperands(); i != 0; --i) {
+    if (MI->getOperand(i-1).isMetadata() &&
+        (LocMD = MI->getOperand(i-1).getMetadata()) &&
+        LocMD->getNumOperands() != 0) {
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+        LocCookie = CI->getZExtValue();
+        break;
+      }
+    }
+  }
+
+  // Emit the inline asm to a temporary string so we can emit it through
+  // EmitInlineAsm.
+  SmallString<256> StringData;
+  raw_svector_ostream OS(StringData);
+
+  OS << '\t';
+
+  // The variant of the current asmprinter.
+  int AsmPrinterVariant = MAI->getAssemblerDialect();
+
+  int CurVariant = -1;            // The number of the {.|.|.} region we are in.
+  const char *LastEmitted = AsmStr; // One past the last character emitted.
+
+  while (*LastEmitted) {
+    switch (*LastEmitted) {
+    default: {
+      // Not a special case, emit the string section literally.
+      const char *LiteralEnd = LastEmitted+1;
+      while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' &&
+             *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n')
+        ++LiteralEnd;
+      if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+        OS.write(LastEmitted, LiteralEnd-LastEmitted);
+      LastEmitted = LiteralEnd;
+      break;
+    }
+    case '\n':
+      ++LastEmitted;   // Consume newline character.
+      OS << '\n';      // Indent code with newline.
+      break;
+    case '$': {
+      ++LastEmitted;   // Consume '$' character.
+      bool Done = true;
+
+      // Handle escapes.
+      switch (*LastEmitted) {
+      default: Done = false; break;
+      case '$':     // $$ -> $
+        if (CurVariant == -1 || CurVariant == AsmPrinterVariant)
+          OS << '$';
+        ++LastEmitted;  // Consume second '$' character.
+        break;
+      case '(':             // $( -> same as GCC's { character.
+        ++LastEmitted;      // Consume '(' character.
+        if (CurVariant != -1)
+          report_fatal_error("Nested variants found in inline asm string: '" +
+                             Twine(AsmStr) + "'");
+        CurVariant = 0;     // We're in the first variant now.
+        break;
+      case '|':
+        ++LastEmitted;  // consume '|' character.
+        if (CurVariant == -1)
+          OS << '|';       // this is gcc's behavior for | outside a variant
+        else
+          ++CurVariant;   // We're in the next variant.
+        break;
+      case ')':         // $) -> same as GCC's } char.
+        ++LastEmitted;  // consume ')' character.
+        if (CurVariant == -1)
+          OS << '}';     // this is gcc's behavior for } outside a variant
+        else
+          CurVariant = -1;
+        break;
+      }
+      if (Done) break;
+
+      bool HasCurlyBraces = false;
+      if (*LastEmitted == '{') {     // ${variable}
+        ++LastEmitted;               // Consume '{' character.
+        HasCurlyBraces = true;
+      }
+
+      // If we have ${:foo}, then this is not a real operand reference, it is a
+      // "magic" string reference, just like in .td files.  Arrange to call
+      // PrintSpecial.
+      if (HasCurlyBraces && *LastEmitted == ':') {
+        ++LastEmitted;
+        const char *StrStart = LastEmitted;
+        const char *StrEnd = strchr(StrStart, '}');
+        if (StrEnd == 0)
+          report_fatal_error("Unterminated ${:foo} operand in inline asm"
+                             " string: '" + Twine(AsmStr) + "'");
+
+        std::string Val(StrStart, StrEnd);
+        PrintSpecial(MI, OS, Val.c_str());
+        LastEmitted = StrEnd+1;
+        break;
+      }
+
+      const char *IDStart = LastEmitted;
+      const char *IDEnd = IDStart;
+      while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+
+      unsigned Val;
+      if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
+        report_fatal_error("Bad $ operand number in inline asm string: '" +
+                           Twine(AsmStr) + "'");
+      LastEmitted = IDEnd;
+
+      char Modifier[2] = { 0, 0 };
+
+      if (HasCurlyBraces) {
+        // If we have curly braces, check for a modifier character.  This
+        // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
+        if (*LastEmitted == ':') {
+          ++LastEmitted;    // Consume ':' character.
+          if (*LastEmitted == 0)
+            report_fatal_error("Bad ${:} expression in inline asm string: '" +
+                               Twine(AsmStr) + "'");
+
+          Modifier[0] = *LastEmitted;
+          ++LastEmitted;    // Consume modifier character.
+        }
+
+        if (*LastEmitted != '}')
+          report_fatal_error("Bad ${} expression in inline asm string: '" +
+                             Twine(AsmStr) + "'");
+        ++LastEmitted;    // Consume '}' character.
+      }
+
+      if (Val >= NumOperands-1)
+        report_fatal_error("Invalid $ operand number in inline asm string: '" +
+                           Twine(AsmStr) + "'");
+
+      // Okay, we finally have a value number.  Ask the target to print this
+      // operand!
+      if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
+        unsigned OpNo = InlineAsm::MIOp_FirstOperand;
+
+        bool Error = false;
+
+        // Scan to find the machine operand number for the operand.
+        for (; Val; --Val) {
+          if (OpNo >= MI->getNumOperands()) break;
+          unsigned OpFlags = MI->getOperand(OpNo).getImm();
+          OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1;
+        }
+
+        if (OpNo >= MI->getNumOperands()) {
+          Error = true;
+        } else {
+          unsigned OpFlags = MI->getOperand(OpNo).getImm();
+          ++OpNo;  // Skip over the ID number.
+
+          if (Modifier[0] == 'l')  // labels are target independent
+            // FIXME: What if the operand isn't an MBB, report error?
+            OS << *MI->getOperand(OpNo).getMBB()->getSymbol();
+          else {
+            AsmPrinter *AP = const_cast<AsmPrinter*>(this);
+            if (InlineAsm::isMemKind(OpFlags)) {
+              Error = AP->PrintAsmMemoryOperand(MI, OpNo, AsmPrinterVariant,
+                                                Modifier[0] ? Modifier : 0,
+                                                OS);
+            } else {
+              Error = AP->PrintAsmOperand(MI, OpNo, AsmPrinterVariant,
+                                          Modifier[0] ? Modifier : 0, OS);
+            }
+          }
+        }
+        if (Error) {
+          std::string msg;
+          raw_string_ostream Msg(msg);
+          Msg << "invalid operand in inline asm: '" << AsmStr << "'";
+          MMI->getModule()->getContext().emitError(LocCookie, Msg.str());
+        }
+      }
+      break;
+    }
+    }
+  }
+  OS << '\n' << (char)0;  // null terminate string.
+  EmitInlineAsm(OS.str(), LocMD);
+
+  // Emit the #NOAPP end marker.  This has to happen even if verbose-asm isn't
+  // enabled, so we use EmitRawText.
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText(Twine("\t")+MAI->getCommentString()+
+                            MAI->getInlineAsmEnd());
+}
+
+
+/// PrintSpecial - Print information related to the specified machine instr
+/// that is independent of the operand, and may be independent of the instr
+/// itself.  This can be useful for portably encoding the comment character
+/// or other bits of target-specific knowledge into the asmstrings.  The
+/// syntax used is ${:comment}.  Targets can override this to add support
+/// for their own strange codes.
+void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
+                              const char *Code) const {
+  if (!strcmp(Code, "private")) {
+    OS << MAI->getPrivateGlobalPrefix();
+  } else if (!strcmp(Code, "comment")) {
+    OS << MAI->getCommentString();
+  } else if (!strcmp(Code, "uid")) {
+    // Comparing the address of MI isn't sufficient, because machineinstrs may
+    // be allocated to the same address across functions.
+
+    // If this is a new LastFn instruction, bump the counter.
+    if (LastMI != MI || LastFn != getFunctionNumber()) {
+      ++Counter;
+      LastMI = MI;
+      LastFn = getFunctionNumber();
+    }
+    OS << Counter;
+  } else {
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Unknown special formatter '" << Code
+         << "' for machine instr: " << *MI;
+    report_fatal_error(Msg.str());
+  }
+}
+
+/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
+/// instruction, using the specified assembler variant.  Targets should
+/// override this to format as appropriate.
+bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                 unsigned AsmVariant, const char *ExtraCode,
+                                 raw_ostream &O) {
+  // Target doesn't support this yet!
+  return true;
+}
+
+bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                                       unsigned AsmVariant,
+                                       const char *ExtraCode, raw_ostream &O) {
+  // Target doesn't support this yet!
+  return true;
+}
+
diff --git a/final/lib/CodeGen/AsmPrinter/CMakeLists.txt b/final/lib/CodeGen/AsmPrinter/CMakeLists.txt
new file mode 100644
index 00000000000..1377e4dd73e
--- /dev/null
+++ b/final/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,13 @@
+add_llvm_library(LLVMAsmPrinter
+  ARMException.cpp
+  AsmPrinter.cpp
+  AsmPrinterDwarf.cpp
+  AsmPrinterInlineAsm.cpp
+  DIE.cpp
+  DwarfCFIException.cpp
+  DwarfDebug.cpp
+  DwarfException.cpp
+  DwarfTableException.cpp
+  OcamlGCPrinter.cpp
+  )
+
diff --git a/final/lib/CodeGen/AsmPrinter/DIE.cpp b/final/lib/CodeGen/AsmPrinter/DIE.cpp
new file mode 100644
index 00000000000..21396ca37f0
--- /dev/null
+++ b/final/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -0,0 +1,368 @@
+//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+// 
+//===----------------------------------------------------------------------===//
+
+#include "DIE.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrevData Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const {
+  ID.AddInteger(Attribute);
+  ID.AddInteger(Form);
+}
+
+//===----------------------------------------------------------------------===//
+// DIEAbbrev Implementation
+//===----------------------------------------------------------------------===//
+
+/// Profile - Used to gather unique data for the abbreviation folding set.
+///
+void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
+  ID.AddInteger(Tag);
+  ID.AddInteger(ChildrenFlag);
+
+  // For each attribute description.
+  for (unsigned i = 0, N = Data.size(); i < N; ++i)
+    Data[i].Profile(ID);
+}
+
+/// Emit - Print the abbreviation using the specified asm printer.
+///
+void DIEAbbrev::Emit(AsmPrinter *AP) const {
+  // Emit its Dwarf tag type.
+  // FIXME: Doing work even in non-asm-verbose runs.
+  AP->EmitULEB128(Tag, dwarf::TagString(Tag));
+
+  // Emit whether it has children DIEs.
+  // FIXME: Doing work even in non-asm-verbose runs.
+  AP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag));
+
+  // For each attribute description.
+  for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+    const DIEAbbrevData &AttrData = Data[i];
+
+    // Emit attribute type.
+    // FIXME: Doing work even in non-asm-verbose runs.
+    AP->EmitULEB128(AttrData.getAttribute(),
+                              dwarf::AttributeString(AttrData.getAttribute()));
+
+    // Emit form type.
+    // FIXME: Doing work even in non-asm-verbose runs.
+    AP->EmitULEB128(AttrData.getForm(),
+                    dwarf::FormEncodingString(AttrData.getForm()));
+  }
+
+  // Mark end of abbreviation.
+  AP->EmitULEB128(0, "EOM(1)");
+  AP->EmitULEB128(0, "EOM(2)");
+}
+
+#ifndef NDEBUG
+void DIEAbbrev::print(raw_ostream &O) {
+  O << "Abbreviation @"
+    << format("0x%lx", (long)(intptr_t)this)
+    << "  "
+    << dwarf::TagString(Tag)
+    << " "
+    << dwarf::ChildrenString(ChildrenFlag)
+    << '\n';
+
+  for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+    O << "  "
+      << dwarf::AttributeString(Data[i].getAttribute())
+      << "  "
+      << dwarf::FormEncodingString(Data[i].getForm())
+      << '\n';
+  }
+}
+void DIEAbbrev::dump() { print(dbgs()); }
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIE Implementation
+//===----------------------------------------------------------------------===//
+
+DIE::~DIE() {
+  for (unsigned i = 0, N = Children.size(); i < N; ++i)
+    delete Children[i];
+}
+
+/// addSiblingOffset - Add a sibling offset field to the front of the DIE.
+///
+DIEValue *DIE::addSiblingOffset(BumpPtrAllocator &A) {
+  DIEInteger *DI = new (A) DIEInteger(0);
+  Values.insert(Values.begin(), DI);
+  Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4);
+  return DI;
+}
+
+#ifndef NDEBUG
+void DIE::print(raw_ostream &O, unsigned IncIndent) {
+  IndentCount += IncIndent;
+  const std::string Indent(IndentCount, ' ');
+  bool isBlock = Abbrev.getTag() == 0;
+
+  if (!isBlock) {
+    O << Indent
+      << "Die: "
+      << format("0x%lx", (long)(intptr_t)this)
+      << ", Offset: " << Offset
+      << ", Size: " << Size << "\n";
+
+    O << Indent
+      << dwarf::TagString(Abbrev.getTag())
+      << " "
+      << dwarf::ChildrenString(Abbrev.getChildrenFlag()) << "\n";
+  } else {
+    O << "Size: " << Size << "\n";
+  }
+
+  const SmallVector<DIEAbbrevData, 8> &Data = Abbrev.getData();
+
+  IndentCount += 2;
+  for (unsigned i = 0, N = Data.size(); i < N; ++i) {
+    O << Indent;
+
+    if (!isBlock)
+      O << dwarf::AttributeString(Data[i].getAttribute());
+    else
+      O << "Blk[" << i << "]";
+
+    O <<  "  "
+      << dwarf::FormEncodingString(Data[i].getForm())
+      << " ";
+    Values[i]->print(O);
+    O << "\n";
+  }
+  IndentCount -= 2;
+
+  for (unsigned j = 0, M = Children.size(); j < M; ++j) {
+    Children[j]->print(O, 4);
+  }
+
+  if (!isBlock) O << "\n";
+  IndentCount -= IncIndent;
+}
+
+void DIE::dump() {
+  print(dbgs());
+}
+#endif
+
+
+#ifndef NDEBUG
+void DIEValue::dump() {
+  print(dbgs());
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEInteger Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit integer of appropriate size.
+///
+void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
+  unsigned Size = ~0U;
+  switch (Form) {
+  case dwarf::DW_FORM_flag:  // Fall thru
+  case dwarf::DW_FORM_ref1:  // Fall thru
+  case dwarf::DW_FORM_data1: Size = 1; break;
+  case dwarf::DW_FORM_ref2:  // Fall thru
+  case dwarf::DW_FORM_data2: Size = 2; break;
+  case dwarf::DW_FORM_ref4:  // Fall thru
+  case dwarf::DW_FORM_data4: Size = 4; break;
+  case dwarf::DW_FORM_ref8:  // Fall thru
+  case dwarf::DW_FORM_data8: Size = 8; break;
+  case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
+  case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
+  case dwarf::DW_FORM_addr:  Size = Asm->getTargetData().getPointerSize(); break;
+  default: llvm_unreachable("DIE Value form not supported yet");
+  }
+  Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/);
+}
+
+/// SizeOf - Determine size of integer value in bytes.
+///
+unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
+  switch (Form) {
+  case dwarf::DW_FORM_flag:  // Fall thru
+  case dwarf::DW_FORM_ref1:  // Fall thru
+  case dwarf::DW_FORM_data1: return sizeof(int8_t);
+  case dwarf::DW_FORM_ref2:  // Fall thru
+  case dwarf::DW_FORM_data2: return sizeof(int16_t);
+  case dwarf::DW_FORM_ref4:  // Fall thru
+  case dwarf::DW_FORM_data4: return sizeof(int32_t);
+  case dwarf::DW_FORM_ref8:  // Fall thru
+  case dwarf::DW_FORM_data8: return sizeof(int64_t);
+  case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
+  case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
+  case dwarf::DW_FORM_addr:  return AP->getTargetData().getPointerSize();
+  default: llvm_unreachable("DIE Value form not supported yet"); break;
+  }
+  return 0;
+}
+
+#ifndef NDEBUG
+void DIEInteger::print(raw_ostream &O) {
+  O << "Int: " << (int64_t)Integer
+    << format("  0x%llx", (unsigned long long)Integer);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEString Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit string value.
+///
+void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const {
+  AP->OutStreamer.EmitBytes(Str, /*addrspace*/0);
+  // Emit nul terminator.
+  AP->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0);
+}
+
+#ifndef NDEBUG
+void DIEString::print(raw_ostream &O) {
+  O << "Str: \"" << Str << "\"";
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIELabel Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit label value.
+///
+void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const {
+  AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form), 0/*AddrSpace*/);
+}
+
+/// SizeOf - Determine size of label value in bytes.
+///
+unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const {
+  if (Form == dwarf::DW_FORM_data4) return 4;
+  return AP->getTargetData().getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIELabel::print(raw_ostream &O) {
+  O << "Lbl: " << Label->getName();
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEDelta Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit delta value.
+///
+void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const {
+  AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form));
+}
+
+/// SizeOf - Determine size of delta value in bytes.
+///
+unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const {
+  if (Form == dwarf::DW_FORM_data4) return 4;
+  return AP->getTargetData().getPointerSize();
+}
+
+#ifndef NDEBUG
+void DIEDelta::print(raw_ostream &O) {
+  O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName();
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEEntry Implementation
+//===----------------------------------------------------------------------===//
+
+/// EmitValue - Emit debug information entry offset.
+///
+void DIEEntry::EmitValue(AsmPrinter *AP, unsigned Form) const {
+  AP->EmitInt32(Entry->getOffset());
+}
+
+#ifndef NDEBUG
+void DIEEntry::print(raw_ostream &O) {
+  O << format("Die: 0x%lx", (long)(intptr_t)Entry);
+}
+#endif
+
+//===----------------------------------------------------------------------===//
+// DIEBlock Implementation
+//===----------------------------------------------------------------------===//
+
+/// ComputeSize - calculate the size of the block.
+///
+unsigned DIEBlock::ComputeSize(AsmPrinter *AP) {
+  if (!Size) {
+    const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+    for (unsigned i = 0, N = Values.size(); i < N; ++i)
+      Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm());
+  }
+
+  return Size;
+}
+
+/// EmitValue - Emit block data.
+///
+void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const {
+  switch (Form) {
+  default: assert(0 && "Improper form for block");    break;
+  case dwarf::DW_FORM_block1: Asm->EmitInt8(Size);    break;
+  case dwarf::DW_FORM_block2: Asm->EmitInt16(Size);   break;
+  case dwarf::DW_FORM_block4: Asm->EmitInt32(Size);   break;
+  case dwarf::DW_FORM_block:  Asm->EmitULEB128(Size); break;
+  }
+
+  const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev.getData();
+  for (unsigned i = 0, N = Values.size(); i < N; ++i)
+    Values[i]->EmitValue(Asm, AbbrevData[i].getForm());
+}
+
+/// SizeOf - Determine size of block data in bytes.
+///
+unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const {
+  switch (Form) {
+  case dwarf::DW_FORM_block1: return Size + sizeof(int8_t);
+  case dwarf::DW_FORM_block2: return Size + sizeof(int16_t);
+  case dwarf::DW_FORM_block4: return Size + sizeof(int32_t);
+  case dwarf::DW_FORM_block:  return Size + MCAsmInfo::getULEB128Size(Size);
+  default: llvm_unreachable("Improper form for block"); break;
+  }
+  return 0;
+}
+
+#ifndef NDEBUG
+void DIEBlock::print(raw_ostream &O) {
+  O << "Blk: ";
+  DIE::print(O, 5);
+}
+#endif
diff --git a/final/lib/CodeGen/AsmPrinter/DIE.h b/final/lib/CodeGen/AsmPrinter/DIE.h
new file mode 100644
index 00000000000..d56c0947795
--- /dev/null
+++ b/final/lib/CodeGen/AsmPrinter/DIE.h
@@ -0,0 +1,434 @@
+//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Data structures for DWARF info entries.
+// 
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DIE_H__
+#define CODEGEN_ASMPRINTER_DIE_H__
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
+#include <vector>
+
+namespace llvm {
+  class AsmPrinter;
+  class MCSymbol;
+  class raw_ostream;
+
+  //===--------------------------------------------------------------------===//
+  /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a
+  /// Dwarf abbreviation.
+  class DIEAbbrevData {
+    /// Attribute - Dwarf attribute code.
+    ///
+    unsigned Attribute;
+
+    /// Form - Dwarf form code.
+    ///
+    unsigned Form;
+  public:
+    DIEAbbrevData(unsigned A, unsigned F) : Attribute(A), Form(F) {}
+
+    // Accessors.
+    unsigned getAttribute() const { return Attribute; }
+    unsigned getForm()      const { return Form; }
+
+    /// Profile - Used to gather unique data for the abbreviation folding set.
+    ///
+    void Profile(FoldingSetNodeID &ID) const;
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
+  /// information object.
+  class DIEAbbrev : public FoldingSetNode {
+    /// Tag - Dwarf tag code.
+    ///
+    unsigned Tag;
+
+    /// Unique number for node.
+    ///
+    unsigned Number;
+
+    /// ChildrenFlag - Dwarf children flag.
+    ///
+    unsigned ChildrenFlag;
+
+    /// Data - Raw data bytes for abbreviation.
+    ///
+    SmallVector<DIEAbbrevData, 8> Data;
+
+  public:
+    DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {}
+    virtual ~DIEAbbrev() {}
+
+    // Accessors.
+    unsigned getTag() const { return Tag; }
+    unsigned getNumber() const { return Number; }
+    unsigned getChildrenFlag() const { return ChildrenFlag; }
+    const SmallVector<DIEAbbrevData, 8> &getData() const { return Data; }
+    void setTag(unsigned T) { Tag = T; }
+    void setChildrenFlag(unsigned CF) { ChildrenFlag = CF; }
+    void setNumber(unsigned N) { Number = N; }
+
+    /// AddAttribute - Adds another set of attribute information to the
+    /// abbreviation.
+    void AddAttribute(unsigned Attribute, unsigned Form) {
+      Data.push_back(DIEAbbrevData(Attribute, Form));
+    }
+
+    /// AddFirstAttribute - Adds a set of attribute information to the front
+    /// of the abbreviation.
+    void AddFirstAttribute(unsigned Attribute, unsigned Form) {
+      Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form));
+    }
+
+    /// Profile - Used to gather unique data for the abbreviation folding set.
+    ///
+    void Profile(FoldingSetNodeID &ID) const;
+
+    /// Emit - Print the abbreviation using the specified asm printer.
+    ///
+    void Emit(AsmPrinter *AP) const;
+
+#ifndef NDEBUG
+    void print(raw_ostream &O);
+    void dump();
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIE - A structured debug information entry.  Has an abbreviation which
+  /// describes it's organization.
+  class DIEValue;
+
+  class DIE {
+  protected:
+    /// Abbrev - Buffer for constructing abbreviation.
+    ///
+    DIEAbbrev Abbrev;
+
+    /// Offset - Offset in debug info section.
+    ///
+    unsigned Offset;
+
+    /// Size - Size of instance + children.
+    ///
+    unsigned Size;
+
+    /// Children DIEs.
+    ///
+    std::vector<DIE *> Children;
+
+    DIE *Parent;
+
+    /// Attributes values.
+    ///
+    SmallVector<DIEValue*, 32> Values;
+
+    // Private data for print()
+    mutable unsigned IndentCount;
+  public:
+    explicit DIE(unsigned Tag)
+      : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0),
+        Size(0), Parent (0), IndentCount(0) {}
+    virtual ~DIE();
+
+    // Accessors.
+    DIEAbbrev &getAbbrev() { return Abbrev; }
+    unsigned getAbbrevNumber() const { return Abbrev.getNumber(); }
+    unsigned getTag() const { return Abbrev.getTag(); }
+    unsigned getOffset() const { return Offset; }
+    unsigned getSize() const { return Size; }
+    const std::vector<DIE *> &getChildren() const { return Children; }
+    const SmallVector<DIEValue*, 32> &getValues() const { return Values; }
+    DIE *getParent() const { return Parent; }
+    void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
+    void setOffset(unsigned O) { Offset = O; }
+    void setSize(unsigned S) { Size = S; }
+    
+    /// addValue - Add a value and attributes to a DIE.
+    ///
+    void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) {
+      Abbrev.AddAttribute(Attribute, Form);
+      Values.push_back(Value);
+    }
+
+    /// SiblingOffset - Return the offset of the debug information entry's
+    /// sibling.
+    unsigned getSiblingOffset() const { return Offset + Size; }
+
+    /// addSiblingOffset - Add a sibling offset field to the front of the DIE.
+    /// The caller is responsible for deleting the return value at or after the
+    /// same time it destroys this DIE.
+    ///
+    DIEValue *addSiblingOffset(BumpPtrAllocator &A);
+
+    /// addChild - Add a child to the DIE.
+    ///
+    void addChild(DIE *Child) {
+      if (Child->getParent()) {
+        assert (Child->getParent() == this && "Unexpected DIE Parent!");
+        return;
+      }
+      Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes);
+      Children.push_back(Child);
+      Child->Parent = this;
+    }
+
+#ifndef NDEBUG
+    void print(raw_ostream &O, unsigned IncIndent = 0);
+    void dump();
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIEValue - A debug information entry value.
+  ///
+  class DIEValue {
+  public:
+    enum {
+      isInteger,
+      isString,
+      isLabel,
+      isSectionOffset,
+      isDelta,
+      isEntry,
+      isBlock
+    };
+  protected:
+    /// Type - Type of data stored in the value.
+    ///
+    unsigned Type;
+  public:
+    explicit DIEValue(unsigned T) : Type(T) {}
+    virtual ~DIEValue() {}
+
+    // Accessors
+    unsigned getType()  const { return Type; }
+
+    /// EmitValue - Emit value via the Dwarf writer.
+    ///
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const = 0;
+
+    /// SizeOf - Return the size of a value in bytes.
+    ///
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0;
+
+    // Implement isa/cast/dyncast.
+    static bool classof(const DIEValue *) { return true; }
+
+#ifndef NDEBUG
+    virtual void print(raw_ostream &O) = 0;
+    void dump();
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIEInteger - An integer value DIE.
+  ///
+  class DIEInteger : public DIEValue {
+    uint64_t Integer;
+  public:
+    explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
+
+    /// BestForm - Choose the best form for integer.
+    ///
+    static unsigned BestForm(bool IsSigned, uint64_t Int) {
+      if (IsSigned) {
+        if ((char)Int == (signed)Int)   return dwarf::DW_FORM_data1;
+        if ((short)Int == (signed)Int)  return dwarf::DW_FORM_data2;
+        if ((int)Int == (signed)Int)    return dwarf::DW_FORM_data4;
+      } else {
+        if ((unsigned char)Int == Int)  return dwarf::DW_FORM_data1;
+        if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2;
+        if ((unsigned int)Int == Int)   return dwarf::DW_FORM_data4;
+      }
+      return dwarf::DW_FORM_data8;
+    }
+
+    /// EmitValue - Emit integer of appropriate size.
+    ///
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+    uint64_t getValue() const { return Integer; }
+
+    /// SizeOf - Determine size of integer value in bytes.
+    ///
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+    // Implement isa/cast/dyncast.
+    static bool classof(const DIEInteger *) { return true; }
+    static bool classof(const DIEValue *I) { return I->getType() == isInteger; }
+
+#ifndef NDEBUG
+    virtual void print(raw_ostream &O);
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIEString - A string value DIE. This DIE keeps string reference only.
+  ///
+  class DIEString : public DIEValue {
+    const StringRef Str;
+  public:
+    explicit DIEString(const StringRef S) : DIEValue(isString), Str(S) {}
+
+    /// EmitValue - Emit string value.
+    ///
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+    /// SizeOf - Determine size of string value in bytes.
+    ///
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned /*Form*/) const {
+      return Str.size() + sizeof(char); // sizeof('\0');
+    }
+
+    // Implement isa/cast/dyncast.
+    static bool classof(const DIEString *) { return true; }
+    static bool classof(const DIEValue *S) { return S->getType() == isString; }
+
+#ifndef NDEBUG
+    virtual void print(raw_ostream &O);
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIELabel - A label expression DIE.
+  //
+  class DIELabel : public DIEValue {
+    const MCSymbol *Label;
+  public:
+    explicit DIELabel(const MCSymbol *L) : DIEValue(isLabel), Label(L) {}
+
+    /// EmitValue - Emit label value.
+    ///
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+    /// getValue - Get MCSymbol.
+    ///
+    const MCSymbol *getValue()       const { return Label; }
+
+    /// SizeOf - Determine size of label value in bytes.
+    ///
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+    // Implement isa/cast/dyncast.
+    static bool classof(const DIELabel *)  { return true; }
+    static bool classof(const DIEValue *L) { return L->getType() == isLabel; }
+
+#ifndef NDEBUG
+    virtual void print(raw_ostream &O);
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIEDelta - A simple label difference DIE.
+  ///
+  class DIEDelta : public DIEValue {
+    const MCSymbol *LabelHi;
+    const MCSymbol *LabelLo;
+  public:
+    DIEDelta(const MCSymbol *Hi, const MCSymbol *Lo)
+      : DIEValue(isDelta), LabelHi(Hi), LabelLo(Lo) {}
+
+    /// EmitValue - Emit delta value.
+    ///
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+    /// SizeOf - Determine size of delta value in bytes.
+    ///
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+    // Implement isa/cast/dyncast.
+    static bool classof(const DIEDelta *)  { return true; }
+    static bool classof(const DIEValue *D) { return D->getType() == isDelta; }
+
+#ifndef NDEBUG
+    virtual void print(raw_ostream &O);
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIEntry - A pointer to another debug information entry.  An instance of
+  /// this class can also be used as a proxy for a debug information entry not
+  /// yet defined (ie. types.)
+  class DIEEntry : public DIEValue {
+    DIE *const Entry;
+  public:
+    explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {}
+
+    DIE *getEntry() const { return Entry; }
+
+    /// EmitValue - Emit debug information entry offset.
+    ///
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+    /// SizeOf - Determine size of debug information entry in bytes.
+    ///
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const {
+      return sizeof(int32_t);
+    }
+
+    // Implement isa/cast/dyncast.
+    static bool classof(const DIEEntry *)  { return true; }
+    static bool classof(const DIEValue *E) { return E->getType() == isEntry; }
+
+#ifndef NDEBUG
+    virtual void print(raw_ostream &O);
+#endif
+  };
+
+  //===--------------------------------------------------------------------===//
+  /// DIEBlock - A block of values.  Primarily used for location expressions.
+  //
+  class DIEBlock : public DIEValue, public DIE {
+    unsigned Size;                // Size in bytes excluding size header.
+  public:
+    DIEBlock()
+      : DIEValue(isBlock), DIE(0), Size(0) {}
+    virtual ~DIEBlock() {}
+
+    /// ComputeSize - calculate the size of the block.
+    ///
+    unsigned ComputeSize(AsmPrinter *AP);
+
+    /// BestForm - Choose the best form for data.
+    ///
+    unsigned BestForm() const {
+      if ((unsigned char)Size == Size)  return dwarf::DW_FORM_block1;
+      if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2;
+      if ((unsigned int)Size == Size)   return dwarf::DW_FORM_block4;
+      return dwarf::DW_FORM_block;
+    }
+
+    /// EmitValue - Emit block data.
+    ///
+    virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+
+    /// SizeOf - Determine size of block data in bytes.
+    ///
+    virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
+
+    // Implement isa/cast/dyncast.
+    static bool classof(const DIEBlock *)  { return true; }
+    static bool classof(const DIEValue *E) { return E->getType() == isBlock; }
+
+#ifndef NDEBUG
+    virtual void print(raw_ostream &O);
+#endif
+  };
+
+} // end llvm namespace
+
+#endif
diff --git a/final/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/final/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
new file mode 100644
index 00000000000..68be2eed8f0
--- /dev/null
+++ b/final/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -0,0 +1,138 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfCFIException::DwarfCFIException(AsmPrinter *A)
+  : DwarfException(A),
+    shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
+    {}
+
+DwarfCFIException::~DwarfCFIException() {}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfCFIException::EndModule() {
+  if (!Asm->MAI->isExceptionHandlingDwarf())
+    return;
+
+  if (!shouldEmitTableModule)
+    return;
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+  // Begin eh frame section.
+  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+
+  // Emit references to all used personality functions
+  const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+  for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("personality", i));
+    Asm->EmitReference(Personalities[i], PerEncoding);
+  }
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfCFIException::BeginFunction(const MachineFunction *MF) {
+  shouldEmitTable = shouldEmitMoves = false;
+
+  // If any landing pads survive, we need an EH table.
+  shouldEmitTable = !MMI->getLandingPads().empty();
+
+  // See if we need frame move info.
+  shouldEmitMoves =
+    !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
+
+  if (shouldEmitMoves || shouldEmitTable)
+    // Assumes in correct section after the entry point.
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+                                                  Asm->getFunctionNumber()));
+
+  shouldEmitTableModule |= shouldEmitTable;
+
+  if (shouldEmitMoves) {
+    const TargetFrameLowering *TFL = Asm->TM.getFrameLowering();
+    Asm->OutStreamer.EmitCFIStartProc();
+
+    // Indicate locations of general callee saved registers in frame.
+    std::vector<MachineMove> Moves;
+    TFL->getInitialFrameState(Moves);
+    Asm->EmitCFIFrameMoves(Moves);
+    Asm->EmitCFIFrameMoves(MMI->getFrameMoves());
+  }
+
+  if (!shouldEmitTable)
+    return;
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+  // Provide LSDA information.
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  if (LSDAEncoding != dwarf::DW_EH_PE_omit)
+    Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
+                                                    Asm->getFunctionNumber()),
+                                 LSDAEncoding);
+
+  // Indicate personality routine, if any.
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
+  if (PerEncoding != dwarf::DW_EH_PE_omit &&
+      MMI->getPersonalities()[MMI->getPersonalityIndex()])
+    Asm->OutStreamer.EmitCFIPersonality(Asm->GetTempSymbol("personality",
+                                                    MMI->getPersonalityIndex()),
+                                        PerEncoding);
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfCFIException::EndFunction() {
+  if (!shouldEmitMoves && !shouldEmitTable) return;
+
+  if (shouldEmitMoves)
+    Asm->OutStreamer.EmitCFIEndProc();
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+                                                Asm->getFunctionNumber()));
+
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads();
+
+  if (shouldEmitTable)
+    EmitExceptionTable();
+}
diff --git a/final/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/final/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
new file mode 100644
index 00000000000..15330c28d2c
--- /dev/null
+++ b/final/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -0,0 +1,3792 @@
+//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfdebug"
+#include "DwarfDebug.h"
+#include "DIE.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/Path.h"
+using namespace llvm;
+
+static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden,
+     cl::desc("Print DbgScope information for each machine instruction"));
+
+static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
+                                              cl::Hidden,
+     cl::desc("Disable debug info printing"));
+
+static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
+     cl::desc("Make an absense of debug location information explicit."),
+     cl::init(false));
+
+#ifndef NDEBUG
+STATISTIC(BlocksWithoutLineNo, "Number of blocks without any line number");
+#endif
+
+namespace {
+  const char *DWARFGroupName = "DWARF Emission";
+  const char *DbgTimerName = "DWARF Debug Writer";
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+
+/// Configuration values for initial hash set sizes (log2).
+///
+static const unsigned InitAbbreviationsSetSize = 9; // log2(512)
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// CompileUnit - This dwarf writer support class manages information associate
+/// with a source file.
+class CompileUnit {
+  /// ID - File identifier for source.
+  ///
+  unsigned ID;
+
+  /// Die - Compile unit debug information entry.
+  ///
+  const OwningPtr<DIE> CUDie;
+
+  /// IndexTyDie - An anonymous type for index type.  Owned by CUDie.
+  DIE *IndexTyDie;
+
+  /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton
+  /// variables to debug information entries.
+  DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
+
+  /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton
+  /// descriptors to debug information entries using a DIEEntry proxy.
+  DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
+
+  /// Globals - A map of globally visible named entities for this unit.
+  ///
+  StringMap<DIE*> Globals;
+
+  /// GlobalTypes - A map of globally visible types for this unit.
+  ///
+  StringMap<DIE*> GlobalTypes;
+
+public:
+  CompileUnit(unsigned I, DIE *D)
+    : ID(I), CUDie(D), IndexTyDie(0) {}
+
+  // Accessors.
+  unsigned getID()                  const { return ID; }
+  DIE* getCUDie()                   const { return CUDie.get(); }
+  const StringMap<DIE*> &getGlobals()     const { return Globals; }
+  const StringMap<DIE*> &getGlobalTypes() const { return GlobalTypes; }
+
+  /// hasContent - Return true if this compile unit has something to write out.
+  ///
+  bool hasContent() const { return !CUDie->getChildren().empty(); }
+
+  /// addGlobal - Add a new global entity to the compile unit.
+  ///
+  void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; }
+
+  /// addGlobalType - Add a new global type to the compile unit.
+  ///
+  void addGlobalType(StringRef Name, DIE *Die) {
+    GlobalTypes[Name] = Die;
+  }
+
+  /// getDIE - Returns the debug information entry map slot for the
+  /// specified debug variable.
+  DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
+
+  /// insertDIE - Insert DIE into the map.
+  void insertDIE(const MDNode *N, DIE *D) {
+    MDNodeToDieMap.insert(std::make_pair(N, D));
+  }
+
+  /// getDIEEntry - Returns the debug information entry for the speciefied
+  /// debug variable.
+  DIEEntry *getDIEEntry(const MDNode *N) {
+    DenseMap<const MDNode *, DIEEntry *>::iterator I =
+      MDNodeToDIEEntryMap.find(N);
+    if (I == MDNodeToDIEEntryMap.end())
+      return NULL;
+    return I->second;
+  }
+
+  /// insertDIEEntry - Insert debug information entry into the map.
+  void insertDIEEntry(const MDNode *N, DIEEntry *E) {
+    MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
+  }
+
+  /// addDie - Adds or interns the DIE to the compile unit.
+  ///
+  void addDie(DIE *Buffer) {
+    this->CUDie->addChild(Buffer);
+  }
+
+  // getIndexTyDie - Get an anonymous type for index type.
+  DIE *getIndexTyDie() {
+    return IndexTyDie;
+  }
+
+  // setIndexTyDie - Set D as anonymous type for index which can be reused
+  // later.
+  void setIndexTyDie(DIE *D) {
+    IndexTyDie = D;
+  }
+
+};
+
+//===----------------------------------------------------------------------===//
+/// DbgVariable - This class is used to track local variable information.
+///
+class DbgVariable {
+  DIVariable Var;                    // Variable Descriptor.
+  DIE *TheDIE;                       // Variable DIE.
+  unsigned DotDebugLocOffset;        // Offset in DotDebugLocEntries.
+public:
+  // AbsVar may be NULL.
+  DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {}
+
+  // Accessors.
+  DIVariable getVariable()           const { return Var; }
+  void setDIE(DIE *D)                      { TheDIE = D; }
+  DIE *getDIE()                      const { return TheDIE; }
+  void setDotDebugLocOffset(unsigned O)    { DotDebugLocOffset = O; }
+  unsigned getDotDebugLocOffset()    const { return DotDebugLocOffset; }
+  StringRef getName()                const { return Var.getName(); }
+  unsigned getTag()                  const { return Var.getTag(); }
+  bool variableHasComplexAddress()   const {
+    assert(Var.Verify() && "Invalid complex DbgVariable!");
+    return Var.hasComplexAddress();
+  }
+  bool isBlockByrefVariable()        const {
+    assert(Var.Verify() && "Invalid complex DbgVariable!");
+    return Var.isBlockByrefVariable();
+  }
+  unsigned getNumAddrElements()      const { 
+    assert(Var.Verify() && "Invalid complex DbgVariable!");
+    return Var.getNumAddrElements();
+  }
+  uint64_t getAddrElement(unsigned i) const {
+    return Var.getAddrElement(i);
+  }
+  DIType getType()               const {
+    DIType Ty = Var.getType();
+    // FIXME: isBlockByrefVariable should be reformulated in terms of complex
+    // addresses instead.
+    if (Var.isBlockByrefVariable()) {
+      /* Byref variables, in Blocks, are declared by the programmer as
+         "SomeType VarName;", but the compiler creates a
+         __Block_byref_x_VarName struct, and gives the variable VarName
+         either the struct, or a pointer to the struct, as its type.  This
+         is necessary for various behind-the-scenes things the compiler
+         needs to do with by-reference variables in blocks.
+         
+         However, as far as the original *programmer* is concerned, the
+         variable should still have type 'SomeType', as originally declared.
+         
+         The following function dives into the __Block_byref_x_VarName
+         struct to find the original type of the variable.  This will be
+         passed back to the code generating the type for the Debug
+         Information Entry for the variable 'VarName'.  'VarName' will then
+         have the original type 'SomeType' in its debug information.
+         
+         The original type 'SomeType' will be the type of the field named
+         'VarName' inside the __Block_byref_x_VarName struct.
+         
+         NOTE: In order for this to not completely fail on the debugger
+         side, the Debug Information Entry for the variable VarName needs to
+         have a DW_AT_location that tells the debugger how to unwind through
+         the pointers and __Block_byref_x_VarName struct to find the actual
+         value of the variable.  The function addBlockByrefType does this.  */
+      DIType subType = Ty;
+      unsigned tag = Ty.getTag();
+      
+      if (tag == dwarf::DW_TAG_pointer_type) {
+        DIDerivedType DTy = DIDerivedType(Ty);
+        subType = DTy.getTypeDerivedFrom();
+      }
+      
+      DICompositeType blockStruct = DICompositeType(subType);
+      DIArray Elements = blockStruct.getTypeArray();
+      
+      for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+        DIDescriptor Element = Elements.getElement(i);
+        DIDerivedType DT = DIDerivedType(Element);
+        if (getName() == DT.getName())
+          return (DT.getTypeDerivedFrom());
+      }
+      return Ty;
+    }
+    return Ty;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// DbgRange - This is used to track range of instructions with identical
+/// debug info scope.
+///
+typedef std::pair<const MachineInstr *, const MachineInstr *> DbgRange;
+
+//===----------------------------------------------------------------------===//
+/// DbgScope - This class is used to track scope information.
+///
+class DbgScope {
+  DbgScope *Parent;                   // Parent to this scope.
+  DIDescriptor Desc;                  // Debug info descriptor for scope.
+  // Location at which this scope is inlined.
+  AssertingVH<const MDNode> InlinedAtLocation;
+  bool AbstractScope;                 // Abstract Scope
+  const MachineInstr *LastInsn;       // Last instruction of this scope.
+  const MachineInstr *FirstInsn;      // First instruction of this scope.
+  unsigned DFSIn, DFSOut;
+  // Scopes defined in scope.  Contents not owned.
+  SmallVector<DbgScope *, 4> Scopes;
+  // Variables declared in scope.  Contents owned.
+  SmallVector<DbgVariable *, 8> Variables;
+  SmallVector<DbgRange, 4> Ranges;
+  // Private state for dump()
+  mutable unsigned IndentLevel;
+public:
+  DbgScope(DbgScope *P, DIDescriptor D, const MDNode *I = 0)
+    : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false),
+      LastInsn(0), FirstInsn(0),
+      DFSIn(0), DFSOut(0), IndentLevel(0) {}
+  virtual ~DbgScope();
+
+  // Accessors.
+  DbgScope *getParent()          const { return Parent; }
+  void setParent(DbgScope *P)          { Parent = P; }
+  DIDescriptor getDesc()         const { return Desc; }
+  const MDNode *getInlinedAt()         const { return InlinedAtLocation; }
+  const MDNode *getScopeNode()         const { return Desc; }
+  const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
+  const SmallVector<DbgVariable *, 8> &getDbgVariables() { return Variables; }
+  const SmallVector<DbgRange, 4> &getRanges() { return Ranges; }
+
+  /// openInsnRange - This scope covers instruction range starting from MI.
+  void openInsnRange(const MachineInstr *MI) {
+    if (!FirstInsn)
+      FirstInsn = MI;
+
+    if (Parent)
+      Parent->openInsnRange(MI);
+  }
+
+  /// extendInsnRange - Extend the current instruction range covered by
+  /// this scope.
+  void extendInsnRange(const MachineInstr *MI) {
+    assert (FirstInsn && "MI Range is not open!");
+    LastInsn = MI;
+    if (Parent)
+      Parent->extendInsnRange(MI);
+  }
+
+  /// closeInsnRange - Create a range based on FirstInsn and LastInsn collected
+  /// until now. This is used when a new scope is encountered while walking
+  /// machine instructions.
+  void closeInsnRange(DbgScope *NewScope = NULL) {
+    assert (LastInsn && "Last insn missing!");
+    Ranges.push_back(DbgRange(FirstInsn, LastInsn));
+    FirstInsn = NULL;
+    LastInsn = NULL;
+    // If Parent dominates NewScope then do not close Parent's instruction
+    // range.
+    if (Parent && (!NewScope || !Parent->dominates(NewScope)))
+      Parent->closeInsnRange(NewScope);
+  }
+
+  void setAbstractScope() { AbstractScope = true; }
+  bool isAbstractScope() const { return AbstractScope; }
+
+  // Depth First Search support to walk and mainpluate DbgScope hierarchy.
+  unsigned getDFSOut() const { return DFSOut; }
+  void setDFSOut(unsigned O) { DFSOut = O; }
+  unsigned getDFSIn() const  { return DFSIn; }
+  void setDFSIn(unsigned I)  { DFSIn = I; }
+  bool dominates(const DbgScope *S) {
+    if (S == this)
+      return true;
+    if (DFSIn < S->getDFSIn() && DFSOut > S->getDFSOut())
+      return true;
+    return false;
+  }
+
+  /// addScope - Add a scope to the scope.
+  ///
+  void addScope(DbgScope *S) { Scopes.push_back(S); }
+
+  /// addVariable - Add a variable to the scope.
+  ///
+  void addVariable(DbgVariable *V) { Variables.push_back(V); }
+
+#ifndef NDEBUG
+  void dump() const;
+#endif
+};
+
+} // end llvm namespace
+
+#ifndef NDEBUG
+void DbgScope::dump() const {
+  raw_ostream &err = dbgs();
+  err.indent(IndentLevel);
+  const MDNode *N = Desc;
+  N->dump();
+  if (AbstractScope)
+    err << "Abstract Scope\n";
+
+  IndentLevel += 2;
+  if (!Scopes.empty())
+    err << "Children ...\n";
+  for (unsigned i = 0, e = Scopes.size(); i != e; ++i)
+    if (Scopes[i] != this)
+      Scopes[i]->dump();
+
+  IndentLevel -= 2;
+}
+#endif
+
+DbgScope::~DbgScope() {
+  for (unsigned j = 0, M = Variables.size(); j < M; ++j)
+    delete Variables[j];
+}
+
+DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
+  : Asm(A), MMI(Asm->MMI), FirstCU(0),
+    AbbreviationsSet(InitAbbreviationsSetSize),
+    CurrentFnDbgScope(0), PrevLabel(NULL) {
+  NextStringPoolNumber = 0;
+
+  DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
+  DwarfStrSectionSym = TextSectionSym = 0;
+  DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
+  FunctionBeginSym = FunctionEndSym = 0;
+  DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
+  {
+    NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+    beginModule(M);
+  }
+}
+DwarfDebug::~DwarfDebug() {
+  for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
+    DIEBlocks[j]->~DIEBlock();
+}
+
+MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) {
+  std::pair<MCSymbol*, unsigned> &Entry = StringPool[Str];
+  if (Entry.first) return Entry.first;
+
+  Entry.second = NextStringPoolNumber++;
+  return Entry.first = Asm->GetTempSymbol("string", Entry.second);
+}
+
+
+/// assignAbbrevNumber - Define a unique number for the abbreviation.
+///
+void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
+  // Profile the node so that we can make it unique.
+  FoldingSetNodeID ID;
+  Abbrev.Profile(ID);
+
+  // Check the set for priors.
+  DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev);
+
+  // If it's newly added.
+  if (InSet == &Abbrev) {
+    // Add to abbreviation list.
+    Abbreviations.push_back(&Abbrev);
+
+    // Assign the vector position + 1 as its number.
+    Abbrev.setNumber(Abbreviations.size());
+  } else {
+    // Assign existing abbreviation number.
+    Abbrev.setNumber(InSet->getNumber());
+  }
+}
+
+/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+/// information entry.
+DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) {
+  DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
+  return Value;
+}
+
+/// addUInt - Add an unsigned integer attribute data and value.
+///
+void DwarfDebug::addUInt(DIE *Die, unsigned Attribute,
+                         unsigned Form, uint64_t Integer) {
+  if (!Form) Form = DIEInteger::BestForm(false, Integer);
+  DIEValue *Value = Integer == 1 ?
+    DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addSInt - Add an signed integer attribute data and value.
+///
+void DwarfDebug::addSInt(DIE *Die, unsigned Attribute,
+                         unsigned Form, int64_t Integer) {
+  if (!Form) Form = DIEInteger::BestForm(true, Integer);
+  DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addString - Add a string attribute data and value. DIEString only
+/// keeps string reference.
+void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form,
+                           StringRef String) {
+  DIEValue *Value = new (DIEValueAllocator) DIEString(String);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addLabel - Add a Dwarf label attribute data and value.
+///
+void DwarfDebug::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+                          const MCSymbol *Label) {
+  DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addDelta - Add a label delta attribute data and value.
+///
+void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+                          const MCSymbol *Hi, const MCSymbol *Lo) {
+  DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
+  Die->addValue(Attribute, Form, Value);
+}
+
+/// addDIEEntry - Add a DIE attribute data and value.
+///
+void DwarfDebug::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form,
+                             DIE *Entry) {
+  Die->addValue(Attribute, Form, createDIEEntry(Entry));
+}
+
+
+/// addBlock - Add block data.
+///
+void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
+                          DIEBlock *Block) {
+  Block->ComputeSize(Asm);
+  DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
+  Die->addValue(Attribute, Block->BestForm(), Block);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) {
+  // Verify variable.
+  if (!V.Verify())
+    return;
+
+  unsigned Line = V.getLineNumber();
+  if (Line == 0)
+    return;
+  unsigned FileID = GetOrCreateSourceID(V.getContext().getFilename());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) {
+  // Verify global variable.
+  if (!G.Verify())
+    return;
+
+  unsigned Line = G.getLineNumber();
+  if (Line == 0)
+    return;
+  unsigned FileID = GetOrCreateSourceID(G.getContext().getFilename());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) {
+  // Verify subprogram.
+  if (!SP.Verify())
+    return;
+  // If the line number is 0, don't add it.
+  if (SP.getLineNumber() == 0)
+    return;
+
+  unsigned Line = SP.getLineNumber();
+  if (!SP.getContext().Verify())
+    return;
+  unsigned FileID = GetOrCreateSourceID(SP.getFilename());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) {
+  // Verify type.
+  if (!Ty.Verify())
+    return;
+
+  unsigned Line = Ty.getLineNumber();
+  if (Line == 0 || !Ty.getContext().Verify())
+    return;
+  unsigned FileID = GetOrCreateSourceID(Ty.getFilename());
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addSourceLine - Add location information to specified debug information
+/// entry.
+void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) {
+  // Verify namespace.
+  if (!NS.Verify())
+    return;
+
+  unsigned Line = NS.getLineNumber();
+  if (Line == 0)
+    return;
+  StringRef FN = NS.getFilename();
+
+  unsigned FileID = GetOrCreateSourceID(FN);
+  assert(FileID && "Invalid file id");
+  addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
+  addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
+}
+
+/// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based
+/// on provided frame index.
+void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) {
+  MachineLocation Location;
+  unsigned FrameReg;
+  const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+  int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+  Location.set(FrameReg, Offset);
+
+  if (DV->variableHasComplexAddress())
+    addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
+  else if (DV->isBlockByrefVariable())
+    addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location);
+  else
+    addAddress(Die, dwarf::DW_AT_location, Location);
+}
+
+/// addComplexAddress - Start with the address based on the location provided,
+/// and generate the DWARF information necessary to find the actual variable
+/// given the extra address information encoded in the DIVariable, starting from
+/// the starting location.  Add the DWARF information to the die.
+///
+void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
+                                   unsigned Attribute,
+                                   const MachineLocation &Location) {
+  DIType Ty = DV->getType();
+
+  // Decode the original location, and use that as the start of the byref
+  // variable's location.
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+  if (Location.isReg()) {
+    if (Reg < 32) {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+    } else {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+  } else {
+    if (Reg < 32)
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+    else {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+
+    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+  }
+
+  for (unsigned i = 0, N = DV->getNumAddrElements(); i < N; ++i) {
+    uint64_t Element = DV->getAddrElement(i);
+
+    if (Element == DIBuilder::OpPlus) {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i));
+    } else if (Element == DIBuilder::OpDeref) {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    } else llvm_unreachable("unknown DIBuilder Opcode");
+  }
+
+  // Now attach the location information to the DIE.
+  addBlock(Die, Attribute, 0, Block);
+}
+
+/* Byref variables, in Blocks, are declared by the programmer as "SomeType
+   VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
+   gives the variable VarName either the struct, or a pointer to the struct, as
+   its type.  This is necessary for various behind-the-scenes things the
+   compiler needs to do with by-reference variables in Blocks.
+
+   However, as far as the original *programmer* is concerned, the variable
+   should still have type 'SomeType', as originally declared.
+
+   The function getBlockByrefType dives into the __Block_byref_x_VarName
+   struct to find the original type of the variable, which is then assigned to
+   the variable's Debug Information Entry as its real type.  So far, so good.
+   However now the debugger will expect the variable VarName to have the type
+   SomeType.  So we need the location attribute for the variable to be an
+   expression that explains to the debugger how to navigate through the
+   pointers and struct to find the actual variable of type SomeType.
+
+   The following function does just that.  We start by getting
+   the "normal" location for the variable. This will be the location
+   of either the struct __Block_byref_x_VarName or the pointer to the
+   struct __Block_byref_x_VarName.
+
+   The struct will look something like:
+
+   struct __Block_byref_x_VarName {
+     ... <various fields>
+     struct __Block_byref_x_VarName *forwarding;
+     ... <various other fields>
+     SomeType VarName;
+     ... <maybe more fields>
+   };
+
+   If we are given the struct directly (as our starting point) we
+   need to tell the debugger to:
+
+   1).  Add the offset of the forwarding field.
+
+   2).  Follow that pointer to get the real __Block_byref_x_VarName
+   struct to use (the real one may have been copied onto the heap).
+
+   3).  Add the offset for the field VarName, to find the actual variable.
+
+   If we started with a pointer to the struct, then we need to
+   dereference that pointer first, before the other steps.
+   Translating this into DWARF ops, we will need to append the following
+   to the current location description for the variable:
+
+   DW_OP_deref                    -- optional, if we start with a pointer
+   DW_OP_plus_uconst <forward_fld_offset>
+   DW_OP_deref
+   DW_OP_plus_uconst <varName_fld_offset>
+
+   That is what this function does.  */
+
+/// addBlockByrefAddress - Start with the address based on the location
+/// provided, and generate the DWARF information necessary to find the
+/// actual Block variable (navigating the Block struct) based on the
+/// starting location.  Add the DWARF information to the die.  For
+/// more information, read large comment just above here.
+///
+void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
+                                      unsigned Attribute,
+                                      const MachineLocation &Location) {
+  DIType Ty = DV->getType();
+  DIType TmpTy = Ty;
+  unsigned Tag = Ty.getTag();
+  bool isPointer = false;
+
+  StringRef varName = DV->getName();
+
+  if (Tag == dwarf::DW_TAG_pointer_type) {
+    DIDerivedType DTy = DIDerivedType(Ty);
+    TmpTy = DTy.getTypeDerivedFrom();
+    isPointer = true;
+  }
+
+  DICompositeType blockStruct = DICompositeType(TmpTy);
+
+  // Find the __forwarding field and the variable field in the __Block_byref
+  // struct.
+  DIArray Fields = blockStruct.getTypeArray();
+  DIDescriptor varField = DIDescriptor();
+  DIDescriptor forwardingField = DIDescriptor();
+
+  for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
+    DIDescriptor Element = Fields.getElement(i);
+    DIDerivedType DT = DIDerivedType(Element);
+    StringRef fieldName = DT.getName();
+    if (fieldName == "__forwarding")
+      forwardingField = Element;
+    else if (fieldName == varName)
+      varField = Element;
+  }
+
+  // Get the offsets for the forwarding field and the variable field.
+  unsigned forwardingFieldOffset =
+    DIDerivedType(forwardingField).getOffsetInBits() >> 3;
+  unsigned varFieldOffset =
+    DIDerivedType(varField).getOffsetInBits() >> 3;
+
+  // Decode the original location, and use that as the start of the byref
+  // variable's location.
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+  if (Location.isReg()) {
+    if (Reg < 32)
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+    else {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+  } else {
+    if (Reg < 32)
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+    else {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+
+    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+  }
+
+  // If we started with a pointer to the __Block_byref... struct, then
+  // the first thing we need to do is dereference the pointer (DW_OP_deref).
+  if (isPointer)
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+  // Next add the offset for the '__forwarding' field:
+  // DW_OP_plus_uconst ForwardingFieldOffset.  Note there's no point in
+  // adding the offset if it's 0.
+  if (forwardingFieldOffset > 0) {
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset);
+  }
+
+  // Now dereference the __forwarding field to get to the real __Block_byref
+  // struct:  DW_OP_deref.
+  addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+
+  // Now that we've got the real __Block_byref... struct, add the offset
+  // for the variable's field to get to the location of the actual variable:
+  // DW_OP_plus_uconst varFieldOffset.  Again, don't add if it's 0.
+  if (varFieldOffset > 0) {
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+    addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset);
+  }
+
+  // Now attach the location information to the DIE.
+  addBlock(Die, Attribute, 0, Block);
+}
+
+/// addAddress - Add an address attribute to a die based on the location
+/// provided.
+void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
+                            const MachineLocation &Location) {
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+  if (RI->getFrameRegister(*Asm->MF) == Location.getReg()
+      && Location.getOffset()) {
+    // If variable offset is based in frame register then use fbreg.
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
+    addSInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+    addBlock(Die, Attribute, 0, Block);
+    return;
+  }
+
+  if (Location.isReg()) {
+    if (Reg < 32) {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
+    } else {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+  } else {
+    if (Reg < 32) {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+    } else {
+      addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx);
+      addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
+    }
+
+    addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+  }
+
+  addBlock(Die, Attribute, 0, Block);
+}
+
+/// addRegisterAddress - Add register location entry in variable DIE.
+bool DwarfDebug::addRegisterAddress(DIE *Die, const MachineOperand &MO) {
+  assert (MO.isReg() && "Invalid machine operand!");
+  if (!MO.getReg())
+    return false;
+  MachineLocation Location;
+  Location.set(MO.getReg());
+  addAddress(Die, dwarf::DW_AT_location, Location);
+  return true;
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool DwarfDebug::addConstantValue(DIE *Die, const MachineOperand &MO) {
+  assert (MO.isImm() && "Invalid machine operand!");
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+  unsigned Imm = MO.getImm();
+  addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  return true;
+}
+
+/// addConstantFPValue - Add constant value entry in variable DIE.
+bool DwarfDebug::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
+  assert (MO.isFPImm() && "Invalid machine operand!");
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+  APFloat FPImm = MO.getFPImm()->getValueAPF();
+
+  // Get the raw data form of the floating point.
+  const APInt FltVal = FPImm.bitcastToAPInt();
+  const char *FltPtr = (const char*)FltVal.getRawData();
+
+  int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte.
+  bool LittleEndian = Asm->getTargetData().isLittleEndian();
+  int Incr = (LittleEndian ? 1 : -1);
+  int Start = (LittleEndian ? 0 : NumBytes - 1);
+  int Stop = (LittleEndian ? NumBytes : -1);
+
+  // Output the constant to DWARF one byte at a time.
+  for (; Start != Stop; Start += Incr)
+    addUInt(Block, 0, dwarf::DW_FORM_data1,
+            (unsigned char)0xFF & FltPtr[Start]);
+
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  return true;
+}
+
+/// addConstantValue - Add constant value entry in variable DIE.
+bool DwarfDebug::addConstantValue(DIE *Die, ConstantInt *CI,
+                                  bool Unsigned) {
+  if (CI->getBitWidth() <= 64) {
+    if (Unsigned)
+      addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
+              CI->getZExtValue());
+    else
+      addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
+              CI->getSExtValue());
+    return true;
+  }
+
+  DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+  // Get the raw data form of the large APInt.
+  const APInt Val = CI->getValue();
+  const char *Ptr = (const char*)Val.getRawData();
+
+  int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
+  bool LittleEndian = Asm->getTargetData().isLittleEndian();
+  int Incr = (LittleEndian ? 1 : -1);
+  int Start = (LittleEndian ? 0 : NumBytes - 1);
+  int Stop = (LittleEndian ? NumBytes : -1);
+
+  // Output the constant to DWARF one byte at a time.
+  for (; Start != Stop; Start += Incr)
+    addUInt(Block, 0, dwarf::DW_FORM_data1,
+            (unsigned char)0xFF & Ptr[Start]);
+
+  addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+  return true;
+}
+
+/// addToContextOwner - Add Die into the list of its context owner's children.
+void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
+  if (Context.isType()) {
+    DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context));
+    ContextDIE->addChild(Die);
+  } else if (Context.isNameSpace()) {
+    DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
+    ContextDIE->addChild(Die);
+  } else if (Context.isSubprogram()) {
+    DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context));
+    ContextDIE->addChild(Die);
+  } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context))
+    ContextDIE->addChild(Die);
+  else
+    getCompileUnit(Context)->addDie(Die);
+}
+
+/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+/// given DIType.
+DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) {
+  CompileUnit *TypeCU = getCompileUnit(Ty);
+  DIE *TyDIE = TypeCU->getDIE(Ty);
+  if (TyDIE)
+    return TyDIE;
+
+  // Create new type.
+  TyDIE = new DIE(dwarf::DW_TAG_base_type);
+  TypeCU->insertDIE(Ty, TyDIE);
+  if (Ty.isBasicType())
+    constructTypeDIE(*TyDIE, DIBasicType(Ty));
+  else if (Ty.isCompositeType())
+    constructTypeDIE(*TyDIE, DICompositeType(Ty));
+  else {
+    assert(Ty.isDerivedType() && "Unknown kind of DIType");
+    constructTypeDIE(*TyDIE, DIDerivedType(Ty));
+  }
+
+  addToContextOwner(TyDIE, Ty.getContext());
+  return TyDIE;
+}
+
+/// addType - Add a new type attribute to the specified entity.
+void DwarfDebug::addType(DIE *Entity, DIType Ty) {
+  if (!Ty.Verify())
+    return;
+
+  // Check for pre-existence.
+  CompileUnit *TypeCU = getCompileUnit(Ty);
+  DIEEntry *Entry = TypeCU->getDIEEntry(Ty);
+  // If it exists then use the existing value.
+  if (Entry) {
+    Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+    return;
+  }
+
+  // Construct type.
+  DIE *Buffer = getOrCreateTypeDIE(Ty);
+
+  // Set up proxy.
+  Entry = createDIEEntry(Buffer);
+  TypeCU->insertDIEEntry(Ty, Entry);
+
+  Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
+}
+
+/// constructTypeDIE - Construct basic type die from DIBasicType.
+void DwarfDebug::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
+  // Get core information.
+  StringRef Name = BTy.getName();
+  Buffer.setTag(dwarf::DW_TAG_base_type);
+  addUInt(&Buffer, dwarf::DW_AT_encoding,  dwarf::DW_FORM_data1,
+          BTy.getEncoding());
+
+  // Add name if not anonymous or intermediate type.
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  uint64_t Size = BTy.getSizeInBits() >> 3;
+  addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+}
+
+/// constructTypeDIE - Construct derived type die from DIDerivedType.
+void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
+  // Get core information.
+  StringRef Name = DTy.getName();
+  uint64_t Size = DTy.getSizeInBits() >> 3;
+  unsigned Tag = DTy.getTag();
+
+  // FIXME - Workaround for templates.
+  if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type;
+
+  Buffer.setTag(Tag);
+
+  // Map to main type, void will not have a type.
+  DIType FromTy = DTy.getTypeDerivedFrom();
+  addType(&Buffer, FromTy);
+
+  // Add name if not anonymous or intermediate type.
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+  // Add size if non-zero (derived types might be zero-sized.)
+  if (Size)
+    addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+
+  // Add source line info if available and TyDesc is not a forward declaration.
+  if (!DTy.isForwardDecl())
+    addSourceLine(&Buffer, DTy);
+}
+
+/// constructTypeDIE - Construct type DIE from DICompositeType.
+void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
+  // Get core information.
+  StringRef Name = CTy.getName();
+
+  uint64_t Size = CTy.getSizeInBits() >> 3;
+  unsigned Tag = CTy.getTag();
+  Buffer.setTag(Tag);
+
+  switch (Tag) {
+  case dwarf::DW_TAG_vector_type:
+  case dwarf::DW_TAG_array_type:
+    constructArrayTypeDIE(Buffer, &CTy);
+    break;
+  case dwarf::DW_TAG_enumeration_type: {
+    DIArray Elements = CTy.getTypeArray();
+
+    // Add enumerators to enumeration type.
+    for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+      DIE *ElemDie = NULL;
+      DIDescriptor Enum(Elements.getElement(i));
+      if (Enum.isEnumerator()) {
+        ElemDie = constructEnumTypeDIE(DIEnumerator(Enum));
+        Buffer.addChild(ElemDie);
+      }
+    }
+  }
+    break;
+  case dwarf::DW_TAG_subroutine_type: {
+    // Add return type.
+    DIArray Elements = CTy.getTypeArray();
+    DIDescriptor RTy = Elements.getElement(0);
+    addType(&Buffer, DIType(RTy));
+
+    bool isPrototyped = true;
+    // Add arguments.
+    for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
+      DIDescriptor Ty = Elements.getElement(i);
+      if (Ty.isUnspecifiedParameter()) {
+        DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters);
+        Buffer.addChild(Arg);
+        isPrototyped = false;
+      } else {
+        DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+        addType(Arg, DIType(Ty));
+        Buffer.addChild(Arg);
+      }
+    }
+    // Add prototype flag.
+    if (isPrototyped)
+      addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+  }
+    break;
+  case dwarf::DW_TAG_structure_type:
+  case dwarf::DW_TAG_union_type:
+  case dwarf::DW_TAG_class_type: {
+    // Add elements to structure type.
+    DIArray Elements = CTy.getTypeArray();
+
+    // A forward struct declared type may not have elements available.
+    unsigned N = Elements.getNumElements();
+    if (N == 0)
+      break;
+
+    // Add elements to structure type.
+    for (unsigned i = 0; i < N; ++i) {
+      DIDescriptor Element = Elements.getElement(i);
+      DIE *ElemDie = NULL;
+      if (Element.isSubprogram()) {
+        DISubprogram SP(Element);
+        ElemDie = createSubprogramDIE(DISubprogram(Element));
+        if (SP.isProtected())
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+                  dwarf::DW_ACCESS_protected);
+        else if (SP.isPrivate())
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+                  dwarf::DW_ACCESS_private);
+        else 
+          addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_public);
+        if (SP.isExplicit())
+          addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
+      }
+      else if (Element.isVariable()) {
+        DIVariable DV(Element);
+        ElemDie = new DIE(dwarf::DW_TAG_variable);
+        addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+                  DV.getName());
+        addType(ElemDie, DV.getType());
+        addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+        addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+        addSourceLine(ElemDie, DV);
+      } else if (Element.isDerivedType())
+        ElemDie = createMemberDIE(DIDerivedType(Element));
+      else
+        continue;
+      Buffer.addChild(ElemDie);
+    }
+
+    if (CTy.isAppleBlockExtension())
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1);
+
+    unsigned RLang = CTy.getRunTimeLang();
+    if (RLang)
+      addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class,
+              dwarf::DW_FORM_data1, RLang);
+
+    DICompositeType ContainingType = CTy.getContainingType();
+    if (DIDescriptor(ContainingType).isCompositeType())
+      addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
+                  getOrCreateTypeDIE(DIType(ContainingType)));
+    else {
+      DIDescriptor Context = CTy.getContext();
+      addToContextOwner(&Buffer, Context);
+    }
+
+    if (Tag == dwarf::DW_TAG_class_type) {
+      DIArray TParams = CTy.getTemplateParams();
+      unsigned N = TParams.getNumElements();
+      // Add template parameters.
+      for (unsigned i = 0; i < N; ++i) {
+        DIDescriptor Element = TParams.getElement(i);
+        if (Element.isTemplateTypeParameter())
+          Buffer.addChild(getOrCreateTemplateTypeParameterDIE(
+                            DITemplateTypeParameter(Element)));
+        else if (Element.isTemplateValueParameter())
+          Buffer.addChild(getOrCreateTemplateValueParameterDIE(
+                            DITemplateValueParameter(Element)));
+      }
+    }
+    break;
+  }
+  default:
+    break;
+  }
+
+  // Add name if not anonymous or intermediate type.
+  if (!Name.empty())
+    addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+  if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
+      || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
+    {
+    // Add size if non-zero (derived types might be zero-sized.)
+    if (Size)
+      addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
+    else {
+      // Add zero size if it is not a forward declaration.
+      if (CTy.isForwardDecl())
+        addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+      else
+        addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0);
+    }
+
+    // Add source line info if available.
+    if (!CTy.isForwardDecl())
+      addSourceLine(&Buffer, CTy);
+  }
+}
+
+/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
+/// for the given DITemplateTypeParameter.
+DIE *
+DwarfDebug::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
+  CompileUnit *TypeCU = getCompileUnit(TP);
+  DIE *ParamDIE = TypeCU->getDIE(TP);
+  if (ParamDIE)
+    return ParamDIE;
+
+  ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
+  addType(ParamDIE, TP.getType());
+  addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName());
+  return ParamDIE;
+}
+
+/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
+/// for the given DITemplateValueParameter.
+DIE *
+DwarfDebug::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) {
+  CompileUnit *TVCU = getCompileUnit(TPV);
+  DIE *ParamDIE = TVCU->getDIE(TPV);
+  if (ParamDIE)
+    return ParamDIE;
+
+  ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
+  addType(ParamDIE, TPV.getType());
+  addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName());
+  addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, 
+          TPV.getValue());
+  return ParamDIE;
+}
+
+/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
+  int64_t L = SR.getLo();
+  int64_t H = SR.getHi();
+  DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type);
+
+  addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
+  if (L)
+    addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
+  addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+
+  Buffer.addChild(DW_Subrange);
+}
+
+/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+void DwarfDebug::constructArrayTypeDIE(DIE &Buffer,
+                                       DICompositeType *CTy) {
+  Buffer.setTag(dwarf::DW_TAG_array_type);
+  if (CTy->getTag() == dwarf::DW_TAG_vector_type)
+    addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
+
+  // Emit derived type.
+  addType(&Buffer, CTy->getTypeDerivedFrom());
+  DIArray Elements = CTy->getTypeArray();
+
+  // Get an anonymous type for index type.
+  CompileUnit *TheCU = getCompileUnit(*CTy);
+  DIE *IdxTy = TheCU->getIndexTyDie();
+  if (!IdxTy) {
+    // Construct an anonymous type for index type.
+    IdxTy = new DIE(dwarf::DW_TAG_base_type);
+    addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
+    addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
+            dwarf::DW_ATE_signed);
+    TheCU->addDie(IdxTy);
+    TheCU->setIndexTyDie(IdxTy);
+  }
+
+  // Add subranges to array type.
+  for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+    DIDescriptor Element = Elements.getElement(i);
+    if (Element.getTag() == dwarf::DW_TAG_subrange_type)
+      constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy);
+  }
+}
+
+/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator ETy) {
+  DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
+  StringRef Name = ETy.getName();
+  addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+  int64_t Value = ETy.getEnumValue();
+  addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value);
+  return Enumerator;
+}
+
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+  char One = '\1';
+  if (LinkageName.startswith(StringRef(&One, 1)))
+    return LinkageName.substr(1);
+  return LinkageName;
+}
+
+/// createMemberDIE - Create new member DIE.
+DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) {
+  DIE *MemberDie = new DIE(DT.getTag());
+  StringRef Name = DT.getName();
+  if (!Name.empty())
+    addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+
+  addType(MemberDie, DT.getTypeDerivedFrom());
+
+  addSourceLine(MemberDie, DT);
+
+  DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock();
+  addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
+
+  uint64_t Size = DT.getSizeInBits();
+  uint64_t FieldSize = DT.getOriginalTypeSize();
+
+  if (Size != FieldSize) {
+    // Handle bitfield.
+    addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3);
+    addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits());
+
+    uint64_t Offset = DT.getOffsetInBits();
+    uint64_t AlignMask = ~(DT.getAlignInBits() - 1);
+    uint64_t HiMark = (Offset + FieldSize) & AlignMask;
+    uint64_t FieldOffset = (HiMark - FieldSize);
+    Offset -= FieldOffset;
+
+    // Maybe we need to work from the other end.
+    if (Asm->getTargetData().isLittleEndian())
+      Offset = FieldSize - (Offset + Size);
+    addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset);
+
+    // Here WD_AT_data_member_location points to the anonymous
+    // field that includes this bit field.
+    addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3);
+
+  } else
+    // This is not a bitfield.
+    addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3);
+
+  if (DT.getTag() == dwarf::DW_TAG_inheritance
+      && DT.isVirtual()) {
+
+    // For C++, virtual base classes are not at fixed offset. Use following
+    // expression to extract appropriate offset from vtable.
+    // BaseAddr = ObAddr + *((*ObAddr) - Offset)
+
+    DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock();
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits());
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+
+    addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0,
+             VBaseLocationDie);
+  } else
+    addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
+
+  if (DT.isProtected())
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_protected);
+  else if (DT.isPrivate())
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_private);
+  // Otherwise C++ member and base classes are considered public.
+  else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus)
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_public);
+  if (DT.isVirtual())
+    addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
+            dwarf::DW_VIRTUALITY_virtual);
+  return MemberDie;
+}
+
+/// createSubprogramDIE - Create new DIE using SP.
+DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) {
+  CompileUnit *SPCU = getCompileUnit(SP);
+  DIE *SPDie = SPCU->getDIE(SP);
+  if (SPDie)
+    return SPDie;
+
+  SPDie = new DIE(dwarf::DW_TAG_subprogram);
+  // Constructors and operators for anonymous aggregates do not have names.
+  if (!SP.getName().empty())
+    addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName());
+
+  StringRef LinkageName = SP.getLinkageName();
+  if (!LinkageName.empty())
+    addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+              getRealLinkageName(LinkageName));
+
+  addSourceLine(SPDie, SP);
+
+  if (SP.isPrototyped()) 
+    addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
+
+  // Add Return Type.
+  DICompositeType SPTy = SP.getType();
+  DIArray Args = SPTy.getTypeArray();
+  unsigned SPTag = SPTy.getTag();
+
+  if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type)
+    addType(SPDie, SPTy);
+  else
+    addType(SPDie, DIType(Args.getElement(0)));
+
+  unsigned VK = SP.getVirtuality();
+  if (VK) {
+    addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
+    DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
+    addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
+    ContainingTypeMap.insert(std::make_pair(SPDie,
+                                            SP.getContainingType()));
+  }
+
+  if (!SP.isDefinition()) {
+    addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+
+    // Add arguments. Do not add arguments for subprogram definition. They will
+    // be handled while processing variables.
+    DICompositeType SPTy = SP.getType();
+    DIArray Args = SPTy.getTypeArray();
+    unsigned SPTag = SPTy.getTag();
+
+    if (SPTag == dwarf::DW_TAG_subroutine_type)
+      for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
+        DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+        DIType ATy = DIType(DIType(Args.getElement(i)));
+        addType(Arg, ATy);
+        if (ATy.isArtificial())
+          addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+        SPDie->addChild(Arg);
+      }
+  }
+
+  if (SP.isArtificial())
+    addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+
+  if (!SP.isLocalToUnit())
+    addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+
+  if (SP.isOptimized())
+    addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+
+  if (unsigned isa = Asm->getISAEncoding()) {
+    addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa);
+  }
+
+  // DW_TAG_inlined_subroutine may refer to this DIE.
+  SPCU->insertDIE(SP, SPDie);
+
+  // Add to context owner.
+  addToContextOwner(SPDie, SP.getContext());
+
+  return SPDie;
+}
+
+DbgScope *DwarfDebug::getOrCreateAbstractScope(const MDNode *N) {
+  assert(N && "Invalid Scope encoding!");
+
+  DbgScope *AScope = AbstractScopes.lookup(N);
+  if (AScope)
+    return AScope;
+
+  DbgScope *Parent = NULL;
+
+  DIDescriptor Scope(N);
+  if (Scope.isLexicalBlock()) {
+    DILexicalBlock DB(N);
+    DIDescriptor ParentDesc = DB.getContext();
+    Parent = getOrCreateAbstractScope(ParentDesc);
+  }
+
+  AScope = new DbgScope(Parent, DIDescriptor(N), NULL);
+
+  if (Parent)
+    Parent->addScope(AScope);
+  AScope->setAbstractScope();
+  AbstractScopes[N] = AScope;
+  if (DIDescriptor(N).isSubprogram())
+    AbstractScopesList.push_back(AScope);
+  return AScope;
+}
+
+/// isSubprogramContext - Return true if Context is either a subprogram
+/// or another context nested inside a subprogram.
+static bool isSubprogramContext(const MDNode *Context) {
+  if (!Context)
+    return false;
+  DIDescriptor D(Context);
+  if (D.isSubprogram())
+    return true;
+  if (D.isType())
+    return isSubprogramContext(DIType(Context).getContext());
+  return false;
+}
+
+/// updateSubprogramScopeDIE - Find DIE for the given subprogram and
+/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
+/// If there are global variables in this scope then create and insert
+/// DIEs for these variables.
+DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
+  CompileUnit *SPCU = getCompileUnit(SPNode);
+  DIE *SPDie = SPCU->getDIE(SPNode);
+
+  assert(SPDie && "Unable to find subprogram DIE!");
+  DISubprogram SP(SPNode);
+
+  // There is not any need to generate specification DIE for a function
+  // defined at compile unit level. If a function is defined inside another
+  // function then gdb prefers the definition at top level and but does not
+  // expect specification DIE in parent function. So avoid creating
+  // specification DIE for a function defined inside a function.
+  if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
+      !SP.getContext().isFile() &&
+      !isSubprogramContext(SP.getContext())) {
+    addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+
+    // Add arguments.
+    DICompositeType SPTy = SP.getType();
+    DIArray Args = SPTy.getTypeArray();
+    unsigned SPTag = SPTy.getTag();
+    if (SPTag == dwarf::DW_TAG_subroutine_type)
+      for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+        DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+        DIType ATy = DIType(DIType(Args.getElement(i)));
+        addType(Arg, ATy);
+        if (ATy.isArtificial())
+          addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+        SPDie->addChild(Arg);
+      }
+    DIE *SPDeclDie = SPDie;
+    SPDie = new DIE(dwarf::DW_TAG_subprogram);
+    addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+                SPDeclDie);
+    SPCU->addDie(SPDie);
+  }
+
+  // Pick up abstract subprogram DIE.
+  if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
+    SPDie = new DIE(dwarf::DW_TAG_subprogram);
+    addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
+                dwarf::DW_FORM_ref4, AbsSPDIE);
+    SPCU->addDie(SPDie);
+  }
+
+  addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+           Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()));
+  addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+           Asm->GetTempSymbol("func_end", Asm->getFunctionNumber()));
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  MachineLocation Location(RI->getFrameRegister(*Asm->MF));
+  addAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+
+  return SPDie;
+}
+
+/// constructLexicalScope - Construct new DW_TAG_lexical_block
+/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
+
+  DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
+  if (Scope->isAbstractScope())
+    return ScopeDIE;
+
+  const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges();
+  if (Ranges.empty())
+    return 0;
+
+  SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
+  if (Ranges.size() > 1) {
+    // .debug_range section has not been laid out yet. Emit offset in
+    // .debug_range as a uint, size 4, for now. emitDIE will handle
+    // DW_AT_ranges appropriately.
+    addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4,
+            DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize());
+    for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
+         RE = Ranges.end(); RI != RE; ++RI) {
+      DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
+      DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
+    }
+    DebugRangeSymbols.push_back(NULL);
+    DebugRangeSymbols.push_back(NULL);
+    return ScopeDIE;
+  }
+
+  const MCSymbol *Start = getLabelBeforeInsn(RI->first);
+  const MCSymbol *End = getLabelAfterInsn(RI->second);
+
+  if (End == 0) return 0;
+
+  assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
+  assert(End->isDefined() && "Invalid end label for an inlined scope!");
+
+  addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start);
+  addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End);
+
+  return ScopeDIE;
+}
+
+/// constructInlinedScopeDIE - This scope represents inlined body of
+/// a function. Construct DIE to represent this concrete inlined copy
+/// of the function.
+DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
+
+  const SmallVector<DbgRange, 4> &Ranges = Scope->getRanges();
+  assert (Ranges.empty() == false
+          && "DbgScope does not have instruction markers!");
+
+  // FIXME : .debug_inlined section specification does not clearly state how
+  // to emit inlined scope that is split into multiple instruction ranges.
+  // For now, use first instruction range and emit low_pc/high_pc pair and
+  // corresponding .debug_inlined section entry for this pair.
+  SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
+  const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
+  const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
+
+  if (StartLabel == 0 || EndLabel == 0) {
+    assert (0 && "Unexpected Start and End  labels for a inlined scope!");
+    return 0;
+  }
+  assert(StartLabel->isDefined() &&
+         "Invalid starting label for an inlined scope!");
+  assert(EndLabel->isDefined() &&
+         "Invalid end label for an inlined scope!");
+
+  if (!Scope->getScopeNode())
+    return NULL;
+  DIScope DS(Scope->getScopeNode());
+  DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
+
+  DISubprogram InlinedSP = getDISubprogram(DS);
+  CompileUnit *TheCU = getCompileUnit(InlinedSP);
+  DIE *OriginDIE = TheCU->getDIE(InlinedSP);
+  assert(OriginDIE && "Unable to find Origin DIE!");
+  addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
+              dwarf::DW_FORM_ref4, OriginDIE);
+
+  addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel);
+  addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel);
+
+  InlinedSubprogramDIEs.insert(OriginDIE);
+
+  // Track the start label for this inlined function.
+  DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
+    I = InlineInfo.find(InlinedSP);
+
+  if (I == InlineInfo.end()) {
+    InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel,
+                                                             ScopeDIE));
+    InlinedSPNodes.push_back(InlinedSP);
+  } else
+    I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
+
+  DILocation DL(Scope->getInlinedAt());
+  addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID());
+  addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+
+  return ScopeDIE;
+}
+
+
+/// constructVariableDIE - Construct a DIE for the given DbgVariable.
+DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
+  StringRef Name = DV->getName();
+  if (Name.empty())
+    return NULL;
+
+  // Translate tag to proper Dwarf tag.  The result variable is dropped for
+  // now.
+  unsigned Tag;
+  switch (DV->getTag()) {
+  case dwarf::DW_TAG_return_variable:
+    return NULL;
+  case dwarf::DW_TAG_arg_variable:
+    Tag = dwarf::DW_TAG_formal_parameter;
+    break;
+  case dwarf::DW_TAG_auto_variable:    // fall thru
+  default:
+    Tag = dwarf::DW_TAG_variable;
+    break;
+  }
+
+  // Define variable debug information entry.
+  DIE *VariableDie = new DIE(Tag);
+
+  DIE *AbsDIE = NULL;
+  DenseMap<const DbgVariable *, const DbgVariable *>::iterator
+    V2AVI = VarToAbstractVarMap.find(DV);
+  if (V2AVI != VarToAbstractVarMap.end())
+    AbsDIE = V2AVI->second->getDIE();
+
+  if (AbsDIE)
+    addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
+                dwarf::DW_FORM_ref4, AbsDIE);
+  else {
+    addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+    addSourceLine(VariableDie, DV->getVariable());
+
+    // Add variable type.
+    addType(VariableDie, DV->getType());
+  }
+
+  if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial())
+    addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+  else if (DIVariable(DV->getVariable()).isArtificial())
+    addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+
+  if (Scope->isAbstractScope()) {
+    DV->setDIE(VariableDie);
+    return VariableDie;
+  }
+
+  // Add variable address.
+
+  unsigned Offset = DV->getDotDebugLocOffset();
+  if (Offset != ~0U) {
+    addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4,
+             Asm->GetTempSymbol("debug_loc", Offset));
+    DV->setDIE(VariableDie);
+    UseDotDebugLocEntry.insert(VariableDie);
+    return VariableDie;
+  }
+
+  // Check if variable is described by a  DBG_VALUE instruction.
+  DenseMap<const DbgVariable *, const MachineInstr *>::iterator DVI =
+    DbgVariableToDbgInstMap.find(DV);
+  if (DVI != DbgVariableToDbgInstMap.end()) {
+    const MachineInstr *DVInsn = DVI->second;
+    bool updated = false;
+    // FIXME : Handle getNumOperands != 3
+    if (DVInsn->getNumOperands() == 3) {
+      if (DVInsn->getOperand(0).isReg()) {
+        const MachineOperand RegOp = DVInsn->getOperand(0);
+        const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
+        if (DVInsn->getOperand(1).isImm() &&
+            TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) {
+          addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm());
+          updated = true;
+        } else
+          updated = addRegisterAddress(VariableDie, RegOp);
+      }
+      else if (DVInsn->getOperand(0).isImm())
+        updated = addConstantValue(VariableDie, DVInsn->getOperand(0));
+      else if (DVInsn->getOperand(0).isFPImm())
+        updated =
+          addConstantFPValue(VariableDie, DVInsn->getOperand(0));
+    } else {
+      MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
+      if (Location.getReg()) {
+        addAddress(VariableDie, dwarf::DW_AT_location, Location);
+        updated = true;
+      }
+    }
+    if (!updated) {
+      // If variableDie is not updated then DBG_VALUE instruction does not
+      // have valid variable info.
+      delete VariableDie;
+      return NULL;
+    }
+    DV->setDIE(VariableDie);
+    return VariableDie;
+  }
+
+  // .. else use frame index, if available.
+  int FI = 0;
+  if (findVariableFrameIndex(DV, &FI))
+    addVariableAddress(DV, VariableDie, FI);
+  
+  DV->setDIE(VariableDie);
+  return VariableDie;
+
+}
+
+void DwarfDebug::addPubTypes(DISubprogram SP) {
+  DICompositeType SPTy = SP.getType();
+  unsigned SPTag = SPTy.getTag();
+  if (SPTag != dwarf::DW_TAG_subroutine_type)
+    return;
+
+  DIArray Args = SPTy.getTypeArray();
+  for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) {
+    DIType ATy(Args.getElement(i));
+    if (!ATy.Verify())
+      continue;
+    DICompositeType CATy = getDICompositeType(ATy);
+    if (DIDescriptor(CATy).Verify() && !CATy.getName().empty()
+        && !CATy.isForwardDecl()) {
+      CompileUnit *TheCU = getCompileUnit(CATy);
+      if (DIEEntry *Entry = TheCU->getDIEEntry(CATy))
+        TheCU->addGlobalType(CATy.getName(), Entry->getEntry());
+    }
+  }
+}
+
+/// constructScopeDIE - Construct a DIE for this scope.
+DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
+  if (!Scope || !Scope->getScopeNode())
+    return NULL;
+
+  SmallVector <DIE *, 8> Children;
+
+  // Collect arguments for current function.
+  if (Scope == CurrentFnDbgScope)
+    for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i)
+      if (DbgVariable *ArgDV = CurrentFnArguments[i])
+        if (DIE *Arg = constructVariableDIE(ArgDV, Scope))
+          Children.push_back(Arg);
+
+  // Collect lexical scope childrens first.
+  const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
+  for (unsigned i = 0, N = Variables.size(); i < N; ++i)
+    if (DIE *Variable = constructVariableDIE(Variables[i], Scope))
+      Children.push_back(Variable);
+  const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
+  for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
+    if (DIE *Nested = constructScopeDIE(Scopes[j]))
+      Children.push_back(Nested);
+  DIScope DS(Scope->getScopeNode());
+  DIE *ScopeDIE = NULL;
+  if (Scope->getInlinedAt())
+    ScopeDIE = constructInlinedScopeDIE(Scope);
+  else if (DS.isSubprogram()) {
+    ProcessedSPNodes.insert(DS);
+    if (Scope->isAbstractScope()) {
+      ScopeDIE = getCompileUnit(DS)->getDIE(DS);
+      // Note down abstract DIE.
+      if (ScopeDIE)
+        AbstractSPDies.insert(std::make_pair(DS, ScopeDIE));
+    }
+    else
+      ScopeDIE = updateSubprogramScopeDIE(DS);
+  }
+  else {
+    // There is no need to emit empty lexical block DIE.
+    if (Children.empty())
+      return NULL;
+    ScopeDIE = constructLexicalScopeDIE(Scope);
+  }
+  
+  if (!ScopeDIE) return NULL;
+
+  // Add children
+  for (SmallVector<DIE *, 8>::iterator I = Children.begin(),
+         E = Children.end(); I != E; ++I)
+    ScopeDIE->addChild(*I);
+
+  if (DS.isSubprogram())
+    addPubTypes(DISubprogram(DS));
+
+ return ScopeDIE;
+}
+
+/// GetOrCreateSourceID - Look up the source id with the given directory and
+/// source file names. If none currently exists, create a new id and insert it
+/// in the SourceIds map. This can update DirectoryNames and SourceFileNames
+/// maps as well.
+
+unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName){
+  // If FE did not provide a file name, then assume stdin.
+  if (FileName.empty())
+    return GetOrCreateSourceID("<stdin>");
+
+  StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
+  if (Entry.getValue())
+    return Entry.getValue();
+
+  unsigned SrcId = SourceIdMap.size();
+  Entry.setValue(SrcId);
+
+  // Print out a .file directive to specify files for .loc directives.
+  Asm->OutStreamer.EmitDwarfFileDirective(SrcId, FileName);
+
+  return SrcId;
+}
+
+/// getOrCreateNameSpace - Create a DIE for DINameSpace.
+DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) {
+  CompileUnit *TheCU = getCompileUnit(NS);
+  DIE *NDie = TheCU->getDIE(NS);
+  if (NDie)
+    return NDie;
+  NDie = new DIE(dwarf::DW_TAG_namespace);
+  TheCU->insertDIE(NS, NDie);
+  if (!NS.getName().empty())
+    addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName());
+  addSourceLine(NDie, NS);
+  addToContextOwner(NDie, NS.getContext());
+  return NDie;
+}
+
+/// constructCompileUnit - Create new CompileUnit for the given
+/// metadata node with tag DW_TAG_compile_unit.
+void DwarfDebug::constructCompileUnit(const MDNode *N) {
+  DICompileUnit DIUnit(N);
+  StringRef FN = DIUnit.getFilename();
+  StringRef Dir = DIUnit.getDirectory();
+  unsigned ID = GetOrCreateSourceID(FN);
+
+  DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
+  addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
+            DIUnit.getProducer());
+  addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
+          DIUnit.getLanguage());
+  addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
+  // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This
+  // simplifies debug range entries.
+  addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0);
+  // DW_AT_stmt_list is a offset of line number information for this
+  // compile unit in debug_line section.
+  if (Asm->MAI->doesDwarfUsesAbsoluteLabelForStmtList())
+    addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_addr,
+             Asm->GetTempSymbol("section_line"));
+  else
+    addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+
+  if (!Dir.empty())
+    addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
+  if (DIUnit.isOptimized())
+    addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
+
+  StringRef Flags = DIUnit.getFlags();
+  if (!Flags.empty())
+    addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags);
+
+  unsigned RVer = DIUnit.getRunTimeVersion();
+  if (RVer)
+    addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
+            dwarf::DW_FORM_data1, RVer);
+
+  CompileUnit *NewCU = new CompileUnit(ID, Die);
+  if (!FirstCU)
+    FirstCU = NewCU;
+  CUMap.insert(std::make_pair(N, NewCU));
+}
+
+/// getCompielUnit - Get CompileUnit DIE.
+CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const {
+  assert (N && "Invalid DwarfDebug::getCompileUnit argument!");
+  DIDescriptor D(N);
+  const MDNode *CUNode = NULL;
+  if (D.isCompileUnit())
+    CUNode = N;
+  else if (D.isSubprogram())
+    CUNode = DISubprogram(N).getCompileUnit();
+  else if (D.isType())
+    CUNode = DIType(N).getCompileUnit();
+  else if (D.isGlobalVariable())
+    CUNode = DIGlobalVariable(N).getCompileUnit();
+  else if (D.isVariable())
+    CUNode = DIVariable(N).getCompileUnit();
+  else if (D.isNameSpace())
+    CUNode = DINameSpace(N).getCompileUnit();
+  else if (D.isFile())
+    CUNode = DIFile(N).getCompileUnit();
+  else
+    return FirstCU;
+
+  DenseMap<const MDNode *, CompileUnit *>::const_iterator I
+    = CUMap.find(CUNode);
+  if (I == CUMap.end())
+    return FirstCU;
+  return I->second;
+}
+
+/// isUnsignedDIType - Return true if type encoding is unsigned.
+static bool isUnsignedDIType(DIType Ty) {
+  DIDerivedType DTy(Ty);
+  if (DTy.Verify())
+    return isUnsignedDIType(DTy.getTypeDerivedFrom());
+
+  DIBasicType BTy(Ty);
+  if (BTy.Verify()) {
+    unsigned Encoding = BTy.getEncoding();
+    if (Encoding == dwarf::DW_ATE_unsigned ||
+        Encoding == dwarf::DW_ATE_unsigned_char)
+      return true;
+  }
+  return false;
+}
+
+// Return const exprssion if value is a GEP to access merged global
+// constant. e.g.
+// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
+static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
+  const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
+  if (!CE || CE->getNumOperands() != 3 ||
+      CE->getOpcode() != Instruction::GetElementPtr)
+    return NULL;
+
+  // First operand points to a global value.
+  if (!isa<GlobalValue>(CE->getOperand(0)))
+    return NULL;
+
+  // Second operand is zero.
+  const ConstantInt *CI = 
+    dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
+  if (!CI || !CI->isZero())
+    return NULL;
+
+  // Third operand is offset.
+  if (!isa<ConstantInt>(CE->getOperand(2)))
+    return NULL;
+
+  return CE;
+}
+
+/// constructGlobalVariableDIE - Construct global variable DIE.
+void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
+  DIGlobalVariable GV(N);
+
+  // If debug information is malformed then ignore it.
+  if (GV.Verify() == false)
+    return;
+
+  // Check for pre-existence.
+  CompileUnit *TheCU = getCompileUnit(N);
+  if (TheCU->getDIE(GV))
+    return;
+
+  DIType GTy = GV.getType();
+  DIE *VariableDIE = new DIE(GV.getTag());
+
+  bool isGlobalVariable = GV.getGlobal() != NULL;
+
+  // Add name.
+  addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string,
+            GV.getDisplayName());
+  StringRef LinkageName = GV.getLinkageName();
+  if (!LinkageName.empty() && isGlobalVariable)
+    addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
+              getRealLinkageName(LinkageName));
+  // Add type.
+  addType(VariableDIE, GTy);
+  if (GTy.isCompositeType() && !GTy.getName().empty()
+      && !GTy.isForwardDecl()) {
+    DIEEntry *Entry = TheCU->getDIEEntry(GTy);
+    assert(Entry && "Missing global type!");
+    TheCU->addGlobalType(GTy.getName(), Entry->getEntry());
+  }
+  // Add scoping info.
+  if (!GV.isLocalToUnit()) {
+    addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+    // Expose as global. 
+    TheCU->addGlobal(GV.getName(), VariableDIE);
+  }
+  // Add line number info.
+  addSourceLine(VariableDIE, GV);
+  // Add to map.
+  TheCU->insertDIE(N, VariableDIE);
+  // Add to context owner.
+  DIDescriptor GVContext = GV.getContext();
+  addToContextOwner(VariableDIE, GVContext);
+  // Add location.
+  if (isGlobalVariable) {
+    DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+    addLabel(Block, 0, dwarf::DW_FORM_udata,
+             Asm->Mang->getSymbol(GV.getGlobal()));
+    // Do not create specification DIE if context is either compile unit
+    // or a subprogram.
+    if (GV.isDefinition() && !GVContext.isCompileUnit() &&
+        !GVContext.isFile() && !isSubprogramContext(GVContext)) {
+      // Create specification DIE.
+      DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
+      addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
+                  dwarf::DW_FORM_ref4, VariableDIE);
+      addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
+      addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+      TheCU->addDie(VariableSpecDIE);
+    } else {
+      addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+    } 
+  } else if (ConstantInt *CI = 
+             dyn_cast_or_null<ConstantInt>(GV.getConstant()))
+    addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy));
+  else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+    // GV is a merged global.
+    DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+    addLabel(Block, 0, dwarf::DW_FORM_udata,
+             Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0))));
+    ConstantInt *CII = cast<ConstantInt>(CE->getOperand(2));
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue());
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+    addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
+  }
+
+  return;
+}
+
+/// construct SubprogramDIE - Construct subprogram DIE.
+void DwarfDebug::constructSubprogramDIE(const MDNode *N) {
+  DISubprogram SP(N);
+
+  // Check for pre-existence.
+  CompileUnit *TheCU = getCompileUnit(N);
+  if (TheCU->getDIE(N))
+    return;
+
+  if (!SP.isDefinition())
+    // This is a method declaration which will be handled while constructing
+    // class type.
+    return;
+
+  DIE *SubprogramDie = createSubprogramDIE(SP);
+
+  // Add to map.
+  TheCU->insertDIE(N, SubprogramDie);
+
+  // Add to context owner.
+  addToContextOwner(SubprogramDie, SP.getContext());
+
+  // Expose as global.
+  TheCU->addGlobal(SP.getName(), SubprogramDie);
+
+  return;
+}
+
+/// beginModule - Emit all Dwarf sections that should come prior to the
+/// content. Create global DIEs and emit initial debug info sections.
+/// This is inovked by the target AsmPrinter.
+void DwarfDebug::beginModule(Module *M) {
+  if (DisableDebugInfoPrinting)
+    return;
+
+  DebugInfoFinder DbgFinder;
+  DbgFinder.processModule(*M);
+
+  bool HasDebugInfo = false;
+
+  // Scan all the compile-units to see if there are any marked as the main unit.
+  // if not, we do not generate debug info.
+  for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+       E = DbgFinder.compile_unit_end(); I != E; ++I) {
+    if (DICompileUnit(*I).isMain()) {
+      HasDebugInfo = true;
+      break;
+    }
+  }
+
+  if (!HasDebugInfo) return;
+
+  // Tell MMI that we have debug info.
+  MMI->setDebugInfoAvailability(true);
+
+  // Emit initial sections.
+  EmitSectionLabels();
+
+  // Create all the compile unit DIEs.
+  for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+         E = DbgFinder.compile_unit_end(); I != E; ++I)
+    constructCompileUnit(*I);
+
+  // Create DIEs for each subprogram.
+  for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
+         E = DbgFinder.subprogram_end(); I != E; ++I)
+    constructSubprogramDIE(*I);
+
+  // Create DIEs for each global variable.
+  for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
+         E = DbgFinder.global_variable_end(); I != E; ++I)
+    constructGlobalVariableDIE(*I);
+
+  //getOrCreateTypeDIE
+  if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum"))
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      getOrCreateTypeDIE(DIType(NMD->getOperand(i)));
+
+  if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      getOrCreateTypeDIE(DIType(NMD->getOperand(i)));
+
+  // Prime section data.
+  SectionMap.insert(Asm->getObjFileLowering().getTextSection());
+}
+
+/// endModule - Emit all Dwarf sections that should come after the content.
+///
+void DwarfDebug::endModule() {
+  if (!FirstCU) return;
+  const Module *M = MMI->getModule();
+  DenseMap<const MDNode *, DbgScope *> DeadFnScopeMap;
+  if (NamedMDNode *AllSPs = M->getNamedMetadata("llvm.dbg.sp")) {
+    for (unsigned SI = 0, SE = AllSPs->getNumOperands(); SI != SE; ++SI) {
+      if (ProcessedSPNodes.count(AllSPs->getOperand(SI)) != 0) continue;
+      DISubprogram SP(AllSPs->getOperand(SI));
+      if (!SP.Verify()) continue;
+
+      // Collect info for variables that were optimized out.
+      if (!SP.isDefinition()) continue;
+      StringRef FName = SP.getLinkageName();
+      if (FName.empty())
+        FName = SP.getName();
+      NamedMDNode *NMD = getFnSpecificMDNode(*(MMI->getModule()), FName);
+      if (!NMD) continue;
+      unsigned E = NMD->getNumOperands();
+      if (!E) continue;
+      DbgScope *Scope = new DbgScope(NULL, DIDescriptor(SP), NULL);
+      DeadFnScopeMap[SP] = Scope;
+      for (unsigned I = 0; I != E; ++I) {
+        DIVariable DV(NMD->getOperand(I));
+        if (!DV.Verify()) continue;
+        Scope->addVariable(new DbgVariable(DV));
+      }
+
+      // Construct subprogram DIE and add variables DIEs.
+      constructSubprogramDIE(SP);
+      DIE *ScopeDIE = getCompileUnit(SP)->getDIE(SP);
+      const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
+      for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
+        DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
+        if (VariableDIE)
+          ScopeDIE->addChild(VariableDIE);
+      }
+    }
+  }
+
+  // Attach DW_AT_inline attribute with inlined subprogram DIEs.
+  for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
+         AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
+    DIE *ISP = *AI;
+    addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+  }
+
+  for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
+         CE = ContainingTypeMap.end(); CI != CE; ++CI) {
+    DIE *SPDie = CI->first;
+    const MDNode *N = dyn_cast_or_null<MDNode>(CI->second);
+    if (!N) continue;
+    DIE *NDie = getCompileUnit(N)->getDIE(N);
+    if (!NDie) continue;
+    addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+  }
+
+  // Standard sections final addresses.
+  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end"));
+  Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection());
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end"));
+
+  // End text sections.
+  for (unsigned i = 1, N = SectionMap.size(); i <= N; ++i) {
+    Asm->OutStreamer.SwitchSection(SectionMap[i]);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", i));
+  }
+
+  // Emit common frame information.
+  emitCommonDebugFrame();
+
+  // Emit function debug frame information
+  for (std::vector<FunctionDebugFrameInfo>::iterator I = DebugFrames.begin(),
+         E = DebugFrames.end(); I != E; ++I)
+    emitFunctionDebugFrame(*I);
+
+  // Compute DIE offsets and sizes.
+  computeSizeAndOffsets();
+
+  // Emit all the DIEs into a debug info section
+  emitDebugInfo();
+
+  // Corresponding abbreviations into a abbrev section.
+  emitAbbreviations();
+
+  // Emit info into a debug pubnames section.
+  emitDebugPubNames();
+
+  // Emit info into a debug pubtypes section.
+  emitDebugPubTypes();
+
+  // Emit info into a debug loc section.
+  emitDebugLoc();
+
+  // Emit info into a debug aranges section.
+  EmitDebugARanges();
+
+  // Emit info into a debug ranges section.
+  emitDebugRanges();
+
+  // Emit info into a debug macinfo section.
+  emitDebugMacInfo();
+
+  // Emit inline info.
+  emitDebugInlineInfo();
+
+  // Emit info into a debug str section.
+  emitDebugStr();
+
+  // clean up.
+  DeleteContainerSeconds(DeadFnScopeMap);
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I)
+    delete I->second;
+  FirstCU = NULL;  // Reset for the next Module, if any.
+}
+
+/// findAbstractVariable - Find abstract variable, if any, associated with Var.
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
+                                              DebugLoc ScopeLoc) {
+
+  DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var);
+  if (AbsDbgVariable)
+    return AbsDbgVariable;
+
+  LLVMContext &Ctx = Var->getContext();
+  DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope(Ctx));
+  if (!Scope)
+    return NULL;
+
+  AbsDbgVariable = new DbgVariable(Var);
+  Scope->addVariable(AbsDbgVariable);
+  AbstractVariables[Var] = AbsDbgVariable;
+  return AbsDbgVariable;
+}
+
+/// addCurrentFnArgument - If Var is an current function argument that add
+/// it in CurrentFnArguments list.
+bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
+                                      DbgVariable *Var, DbgScope *Scope) {
+  if (Scope != CurrentFnDbgScope) 
+    return false;
+  DIVariable DV = Var->getVariable();
+  if (DV.getTag() != dwarf::DW_TAG_arg_variable)
+    return false;
+  unsigned ArgNo = DV.getArgNumber();
+  if (ArgNo == 0) 
+    return false;
+
+  size_t Size = CurrentFnArguments.size();
+  if (Size == 0)
+    CurrentFnArguments.resize(MF->getFunction()->arg_size());
+  // llvm::Function argument size is not good indicator of how many
+  // arguments does the function have at source level.
+  if (ArgNo > Size)
+    CurrentFnArguments.resize(ArgNo * 2);
+  CurrentFnArguments[ArgNo - 1] = Var;
+  return true;
+}
+
+/// collectVariableInfoFromMMITable - Collect variable information from
+/// side table maintained by MMI.
+void
+DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
+                                   SmallPtrSet<const MDNode *, 16> &Processed) {
+  const LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
+  MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
+  for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
+         VE = VMap.end(); VI != VE; ++VI) {
+    const MDNode *Var = VI->first;
+    if (!Var) continue;
+    Processed.insert(Var);
+    DIVariable DV(Var);
+    const std::pair<unsigned, DebugLoc> &VP = VI->second;
+
+    DbgScope *Scope = 0;
+    if (const MDNode *IA = VP.second.getInlinedAt(Ctx))
+      Scope = ConcreteScopes.lookup(IA);
+    if (Scope == 0)
+      Scope = DbgScopeMap.lookup(VP.second.getScope(Ctx));
+
+    // If variable scope is not found then skip this variable.
+    if (Scope == 0)
+      continue;
+
+    DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second);
+    DbgVariable *RegVar = new DbgVariable(DV);
+    recordVariableFrameIndex(RegVar, VP.first);
+    if (!addCurrentFnArgument(MF, RegVar, Scope))
+      Scope->addVariable(RegVar);
+    if (AbsDbgVariable) {
+      recordVariableFrameIndex(AbsDbgVariable, VP.first);
+      VarToAbstractVarMap[RegVar] = AbsDbgVariable;
+    }
+  }
+}
+
+/// isDbgValueInDefinedReg - Return true if debug value, encoded by
+/// DBG_VALUE instruction, is in a defined reg.
+static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
+  assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
+  if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
+    return true;
+  return false;
+}
+
+/// collectVariableInfo - Populate DbgScope entries with variables' info.
+void
+DwarfDebug::collectVariableInfo(const MachineFunction *MF,
+                                SmallPtrSet<const MDNode *, 16> &Processed) {
+
+  /// collection info from MMI table.
+  collectVariableInfoFromMMITable(MF, Processed);
+
+  SmallVector<const MachineInstr *, 8> DbgValues;
+  // Collect variable information from DBG_VALUE machine instructions;
+  for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
+       I != E; ++I)
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MInsn = II;
+      if (!MInsn->isDebugValue())
+        continue;
+      DbgValues.push_back(MInsn);
+    }
+
+  // This is a collection of DBV_VALUE instructions describing same variable.
+  SmallVector<const MachineInstr *, 4> MultipleValues;
+  for(SmallVector<const MachineInstr *, 8>::iterator I = DbgValues.begin(),
+        E = DbgValues.end(); I != E; ++I) {
+    const MachineInstr *MInsn = *I;
+    MultipleValues.clear();
+    if (isDbgValueInDefinedReg(MInsn))
+      MultipleValues.push_back(MInsn);
+    DIVariable DV(MInsn->getOperand(MInsn->getNumOperands() - 1).getMetadata());
+    if (Processed.count(DV) != 0)
+      continue;
+
+    const MachineInstr *PrevMI = MInsn;
+    for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1,
+           ME = DbgValues.end(); MI != ME; ++MI) {
+      const MDNode *Var =
+        (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata();
+      if (Var == DV && 
+          !PrevMI->isIdenticalTo(*MI))
+        MultipleValues.push_back(*MI);
+      PrevMI = *MI;
+    }
+
+    DbgScope *Scope = NULL;
+    if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
+        DISubprogram(DV.getContext()).describes(MF->getFunction()))
+      Scope = CurrentFnDbgScope;
+    else
+      Scope = findDbgScope(MInsn);
+    // If variable scope is not found then skip this variable.
+    if (!Scope)
+      continue;
+
+    Processed.insert(DV);
+    DbgVariable *RegVar = new DbgVariable(DV);
+    if (!addCurrentFnArgument(MF, RegVar, Scope))
+      Scope->addVariable(RegVar);
+    if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) {
+      DbgVariableToDbgInstMap[AbsVar] = MInsn;
+      VarToAbstractVarMap[RegVar] = AbsVar;
+    }
+    if (MultipleValues.size() <= 1) {
+      DbgVariableToDbgInstMap[RegVar] = MInsn;
+      continue;
+    }
+
+    // handle multiple DBG_VALUE instructions describing one variable.
+    if (DotDebugLocEntries.empty())
+      RegVar->setDotDebugLocOffset(0);
+    else
+      RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
+    const MachineInstr *Begin = NULL;
+    const MachineInstr *End = NULL;
+    for (SmallVector<const MachineInstr *, 4>::iterator
+           MVI = MultipleValues.begin(), MVE = MultipleValues.end();
+         MVI != MVE; ++MVI) {
+      if (!Begin) {
+        Begin = *MVI;
+        continue;
+      }
+      End = *MVI;
+      MachineLocation MLoc;
+      if (Begin->getNumOperands() == 3) {
+        if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm())
+          MLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm());
+      } else
+        MLoc = Asm->getDebugValueLocation(Begin);
+
+      const MCSymbol *FLabel = getLabelBeforeInsn(Begin);
+      const MCSymbol *SLabel = getLabelBeforeInsn(End);
+      if (MLoc.getReg())
+        DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc));
+
+      Begin = End;
+      if (MVI + 1 == MVE) {
+        // If End is the last instruction then its value is valid
+        // until the end of the funtion.
+        MachineLocation EMLoc;
+        if (End->getNumOperands() == 3) {
+          if (End->getOperand(0).isReg() && Begin->getOperand(1).isImm())
+          EMLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm());
+        } else
+          EMLoc = Asm->getDebugValueLocation(End);
+        if (EMLoc.getReg()) 
+          DotDebugLocEntries.
+            push_back(DotDebugLocEntry(SLabel, FunctionEndSym, EMLoc));
+      }
+    }
+    DotDebugLocEntries.push_back(DotDebugLocEntry());
+  }
+
+  // Collect info for variables that were optimized out.
+  const Function *F = MF->getFunction();
+  if (NamedMDNode *NMD = getFnSpecificMDNode(*(F->getParent()), F->getName())) {
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+      DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
+      if (!DV || !Processed.insert(DV))
+        continue;
+      DbgScope *Scope = DbgScopeMap.lookup(DV.getContext());
+      if (Scope)
+        Scope->addVariable(new DbgVariable(DV));
+    }
+  }
+}
+
+/// getLabelBeforeInsn - Return Label preceding the instruction.
+const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) {
+  DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
+    LabelsBeforeInsn.find(MI);
+  if (I == LabelsBeforeInsn.end())
+    // FunctionBeginSym always preceeds all the instruction in current function.
+    return FunctionBeginSym;
+  return I->second;
+}
+
+/// getLabelAfterInsn - Return Label immediately following the instruction.
+const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
+  DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
+    LabelsAfterInsn.find(MI);
+  if (I == LabelsAfterInsn.end())
+    return NULL;
+  return I->second;
+}
+
+/// beginInstruction - Process beginning of an instruction.
+void DwarfDebug::beginInstruction(const MachineInstr *MI) {
+  if (InsnNeedsLabel.count(MI) == 0) {
+    LabelsBeforeInsn[MI] = PrevLabel;
+    return;
+  }
+
+  // Check location.
+  DebugLoc DL = MI->getDebugLoc();
+  if (!DL.isUnknown()) {
+    const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
+    PrevLabel = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
+    PrevInstLoc = DL;
+    LabelsBeforeInsn[MI] = PrevLabel;
+    return;
+  }
+
+  // If location is unknown then use temp label for this DBG_VALUE
+  // instruction.
+  if (MI->isDebugValue()) {
+    PrevLabel = MMI->getContext().CreateTempSymbol();
+    Asm->OutStreamer.EmitLabel(PrevLabel);
+    LabelsBeforeInsn[MI] = PrevLabel;
+    return;
+  }
+
+  if (UnknownLocations) {
+    PrevLabel = recordSourceLine(0, 0, 0);
+    LabelsBeforeInsn[MI] = PrevLabel;
+    return;
+  }
+
+  assert (0 && "Instruction is not processed!");
+}
+
+/// endInstruction - Process end of an instruction.
+void DwarfDebug::endInstruction(const MachineInstr *MI) {
+  if (InsnsEndScopeSet.count(MI) != 0) {
+    // Emit a label if this instruction ends a scope.
+    MCSymbol *Label = MMI->getContext().CreateTempSymbol();
+    Asm->OutStreamer.EmitLabel(Label);
+    LabelsAfterInsn[MI] = Label;
+  }
+}
+
+/// getOrCreateDbgScope - Create DbgScope for the scope.
+DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
+                                          const MDNode *InlinedAt) {
+  if (!InlinedAt) {
+    DbgScope *WScope = DbgScopeMap.lookup(Scope);
+    if (WScope)
+      return WScope;
+    WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL);
+    DbgScopeMap.insert(std::make_pair(Scope, WScope));
+    if (DIDescriptor(Scope).isLexicalBlock()) {
+      DbgScope *Parent =
+        getOrCreateDbgScope(DILexicalBlock(Scope).getContext(), NULL);
+      WScope->setParent(Parent);
+      Parent->addScope(WScope);
+    }
+
+    if (!WScope->getParent()) {
+      StringRef SPName = DISubprogram(Scope).getLinkageName();
+      // We used to check only for a linkage name, but that fails
+      // since we began omitting the linkage name for private
+      // functions.  The new way is to check for the name in metadata,
+      // but that's not supported in old .ll test cases.  Ergo, we
+      // check both.
+      if (SPName == Asm->MF->getFunction()->getName() ||
+          DISubprogram(Scope).getFunction() == Asm->MF->getFunction())
+        CurrentFnDbgScope = WScope;
+    }
+
+    return WScope;
+  }
+
+  getOrCreateAbstractScope(Scope);
+  DbgScope *WScope = DbgScopeMap.lookup(InlinedAt);
+  if (WScope)
+    return WScope;
+
+  WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt);
+  DbgScopeMap.insert(std::make_pair(InlinedAt, WScope));
+  DILocation DL(InlinedAt);
+  DbgScope *Parent =
+    getOrCreateDbgScope(DL.getScope(), DL.getOrigLocation());
+  WScope->setParent(Parent);
+  Parent->addScope(WScope);
+
+  ConcreteScopes[InlinedAt] = WScope;
+
+  return WScope;
+}
+
+/// hasValidLocation - Return true if debug location entry attached with
+/// machine instruction encodes valid location info.
+static bool hasValidLocation(LLVMContext &Ctx,
+                             const MachineInstr *MInsn,
+                             const MDNode *&Scope, const MDNode *&InlinedAt) {
+  DebugLoc DL = MInsn->getDebugLoc();
+  if (DL.isUnknown()) return false;
+
+  const MDNode *S = DL.getScope(Ctx);
+
+  // There is no need to create another DIE for compile unit. For all
+  // other scopes, create one DbgScope now. This will be translated
+  // into a scope DIE at the end.
+  if (DIScope(S).isCompileUnit()) return false;
+
+  Scope = S;
+  InlinedAt = DL.getInlinedAt(Ctx);
+  return true;
+}
+
+/// calculateDominanceGraph - Calculate dominance graph for DbgScope
+/// hierarchy.
+static void calculateDominanceGraph(DbgScope *Scope) {
+  assert (Scope && "Unable to calculate scop edominance graph!");
+  SmallVector<DbgScope *, 4> WorkStack;
+  WorkStack.push_back(Scope);
+  unsigned Counter = 0;
+  while (!WorkStack.empty()) {
+    DbgScope *WS = WorkStack.back();
+    const SmallVector<DbgScope *, 4> &Children = WS->getScopes();
+    bool visitedChildren = false;
+    for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(),
+           SE = Children.end(); SI != SE; ++SI) {
+      DbgScope *ChildScope = *SI;
+      if (!ChildScope->getDFSOut()) {
+        WorkStack.push_back(ChildScope);
+        visitedChildren = true;
+        ChildScope->setDFSIn(++Counter);
+        break;
+      }
+    }
+    if (!visitedChildren) {
+      WorkStack.pop_back();
+      WS->setDFSOut(++Counter);
+    }
+  }
+}
+
+/// printDbgScopeInfo - Print DbgScope info for each machine instruction.
+static
+void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF,
+                       DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap)
+{
+#ifndef NDEBUG
+  unsigned PrevDFSIn = 0;
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MInsn = II;
+      const MDNode *Scope = NULL;
+      const MDNode *InlinedAt = NULL;
+
+      // Check if instruction has valid location information.
+      if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) {
+        dbgs() << " [ ";
+        if (InlinedAt)
+          dbgs() << "*";
+        DenseMap<const MachineInstr *, DbgScope *>::iterator DI =
+          MI2ScopeMap.find(MInsn);
+        if (DI != MI2ScopeMap.end()) {
+          DbgScope *S = DI->second;
+          dbgs() << S->getDFSIn();
+          PrevDFSIn = S->getDFSIn();
+        } else
+          dbgs() << PrevDFSIn;
+      } else
+        dbgs() << " [ x" << PrevDFSIn;
+      dbgs() << " ]";
+      MInsn->dump();
+    }
+    dbgs() << "\n";
+  }
+#endif
+}
+/// extractScopeInformation - Scan machine instructions in this function
+/// and collect DbgScopes. Return true, if at least one scope was found.
+bool DwarfDebug::extractScopeInformation() {
+  // If scope information was extracted using .dbg intrinsics then there is not
+  // any need to extract these information by scanning each instruction.
+  if (!DbgScopeMap.empty())
+    return false;
+
+  // Scan each instruction and create scopes. First build working set of scopes.
+  LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
+  SmallVector<DbgRange, 4> MIRanges;
+  DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap;
+  const MDNode *PrevScope = NULL;
+  const MDNode *PrevInlinedAt = NULL;
+  const MachineInstr *RangeBeginMI = NULL;
+  const MachineInstr *PrevMI = NULL;
+  for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
+       I != E; ++I) {
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MInsn = II;
+      const MDNode *Scope = NULL;
+      const MDNode *InlinedAt = NULL;
+
+      // Check if instruction has valid location information.
+      if (!hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) {
+        PrevMI = MInsn;
+        continue;
+      }
+
+      // If scope has not changed then skip this instruction.
+      if (Scope == PrevScope && PrevInlinedAt == InlinedAt) {
+        PrevMI = MInsn;
+        continue;
+      }
+
+      // Ignore DBG_VALUE. It does not contribute any instruction in output.
+      if (MInsn->isDebugValue())
+        continue;
+
+      if (RangeBeginMI) {
+        // If we have alread seen a beginning of a instruction range and
+        // current instruction scope does not match scope of first instruction
+        // in this range then create a new instruction range.
+        DbgRange R(RangeBeginMI, PrevMI);
+        MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope,
+                                                        PrevInlinedAt);
+        MIRanges.push_back(R);
+      }
+
+      // This is a beginning of a new instruction range.
+      RangeBeginMI = MInsn;
+
+      // Reset previous markers.
+      PrevMI = MInsn;
+      PrevScope = Scope;
+      PrevInlinedAt = InlinedAt;
+    }
+  }
+
+  // Create last instruction range.
+  if (RangeBeginMI && PrevMI && PrevScope) {
+    DbgRange R(RangeBeginMI, PrevMI);
+    MIRanges.push_back(R);
+    MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt);
+  }
+
+  if (!CurrentFnDbgScope)
+    return false;
+
+  calculateDominanceGraph(CurrentFnDbgScope);
+  if (PrintDbgScope)
+    printDbgScopeInfo(Ctx, Asm->MF, MI2ScopeMap);
+
+  // Find ranges of instructions covered by each DbgScope;
+  DbgScope *PrevDbgScope = NULL;
+  for (SmallVector<DbgRange, 4>::const_iterator RI = MIRanges.begin(),
+         RE = MIRanges.end(); RI != RE; ++RI) {
+    const DbgRange &R = *RI;
+    DbgScope *S = MI2ScopeMap.lookup(R.first);
+    assert (S && "Lost DbgScope for a machine instruction!");
+    if (PrevDbgScope && !PrevDbgScope->dominates(S))
+      PrevDbgScope->closeInsnRange(S);
+    S->openInsnRange(R.first);
+    S->extendInsnRange(R.second);
+    PrevDbgScope = S;
+  }
+
+  if (PrevDbgScope)
+    PrevDbgScope->closeInsnRange();
+
+  identifyScopeMarkers();
+
+  return !DbgScopeMap.empty();
+}
+
+/// identifyScopeMarkers() -
+/// Each DbgScope has first instruction and last instruction to mark beginning
+/// and end of a scope respectively. Create an inverse map that list scopes
+/// starts (and ends) with an instruction. One instruction may start (or end)
+/// multiple scopes. Ignore scopes that are not reachable.
+void DwarfDebug::identifyScopeMarkers() {
+  SmallVector<DbgScope *, 4> WorkList;
+  WorkList.push_back(CurrentFnDbgScope);
+  while (!WorkList.empty()) {
+    DbgScope *S = WorkList.pop_back_val();
+
+    const SmallVector<DbgScope *, 4> &Children = S->getScopes();
+    if (!Children.empty())
+      for (SmallVector<DbgScope *, 4>::const_iterator SI = Children.begin(),
+             SE = Children.end(); SI != SE; ++SI)
+        WorkList.push_back(*SI);
+
+    if (S->isAbstractScope())
+      continue;
+
+    const SmallVector<DbgRange, 4> &Ranges = S->getRanges();
+    if (Ranges.empty())
+      continue;
+    for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
+           RE = Ranges.end(); RI != RE; ++RI) {
+      assert(RI->first && "DbgRange does not have first instruction!");
+      assert(RI->second && "DbgRange does not have second instruction!");
+      InsnsEndScopeSet.insert(RI->second);
+    }
+  }
+}
+
+/// FindFirstDebugLoc - Find the first debug location in the function. This
+/// is intended to be an approximation for the source position of the
+/// beginning of the function.
+static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) {
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I)
+    for (MachineBasicBlock::const_iterator MBBI = I->begin(), MBBE = I->end();
+         MBBI != MBBE; ++MBBI) {
+      DebugLoc DL = MBBI->getDebugLoc();
+      if (!DL.isUnknown())
+        return DL;
+    }
+  return DebugLoc();
+}
+
+#ifndef NDEBUG
+/// CheckLineNumbers - Count basicblocks whose instructions do not have any
+/// line number information.
+static void CheckLineNumbers(const MachineFunction *MF) {
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    bool FoundLineNo = false;
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MI = II;
+      if (!MI->getDebugLoc().isUnknown()) {
+        FoundLineNo = true;
+        break;
+      }
+    }
+    if (!FoundLineNo && I->size())
+      ++BlocksWithoutLineNo;      
+  }
+}
+#endif
+
+/// beginFunction - Gather pre-function debug information.  Assumes being
+/// emitted immediately after the function entry point.
+void DwarfDebug::beginFunction(const MachineFunction *MF) {
+  if (!MMI->hasDebugInfo()) return;
+  if (!extractScopeInformation()) return;
+
+#ifndef NDEBUG
+  CheckLineNumbers(MF);
+#endif
+
+  FunctionBeginSym = Asm->GetTempSymbol("func_begin",
+                                        Asm->getFunctionNumber());
+  // Assumes in correct section after the entry point.
+  Asm->OutStreamer.EmitLabel(FunctionBeginSym);
+
+  // Emit label for the implicitly defined dbg.stoppoint at the start of the
+  // function.
+  DebugLoc FDL = FindFirstDebugLoc(MF);
+  if (FDL.isUnknown()) return;
+
+  const MDNode *Scope = FDL.getScope(MF->getFunction()->getContext());
+  const MDNode *TheScope = 0;
+
+  DISubprogram SP = getDISubprogram(Scope);
+  unsigned Line, Col;
+  if (SP.Verify()) {
+    Line = SP.getLineNumber();
+    Col = 0;
+    TheScope = SP;
+  } else {
+    Line = FDL.getLine();
+    Col = FDL.getCol();
+    TheScope = Scope;
+  }
+
+  recordSourceLine(Line, Col, TheScope);
+
+  /// ProcessedArgs - Collection of arguments already processed.
+  SmallPtrSet<const MDNode *, 8> ProcessedArgs;
+
+  DebugLoc PrevLoc;
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I)
+    for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+         II != IE; ++II) {
+      const MachineInstr *MI = II;
+      DebugLoc DL = MI->getDebugLoc();
+      if (MI->isDebugValue()) {
+        assert (MI->getNumOperands() > 1 && "Invalid machine instruction!");
+        DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata());
+        if (!DV.Verify()) continue;
+        // If DBG_VALUE is for a local variable then it needs a label.
+        if (DV.getTag() != dwarf::DW_TAG_arg_variable)
+          InsnNeedsLabel.insert(MI);
+        // DBG_VALUE for inlined functions argument needs a label.
+        else if (!DISubprogram(getDISubprogram(DV.getContext())).
+                 describes(MF->getFunction()))
+          InsnNeedsLabel.insert(MI);
+        // DBG_VALUE indicating argument location change needs a label.
+        else if (!ProcessedArgs.insert(DV))
+          InsnNeedsLabel.insert(MI);
+      } else {
+        // If location is unknown then instruction needs a location only if
+        // UnknownLocations flag is set.
+        if (DL.isUnknown()) {
+          if (UnknownLocations && !PrevLoc.isUnknown())
+            InsnNeedsLabel.insert(MI);
+        } else if (DL != PrevLoc)
+          // Otherwise, instruction needs a location only if it is new location.
+          InsnNeedsLabel.insert(MI);
+      }
+
+      if (!DL.isUnknown() || UnknownLocations)
+        PrevLoc = DL;
+    }
+
+  PrevLabel = FunctionBeginSym;
+}
+
+/// endFunction - Gather and emit post-function debug information.
+///
+void DwarfDebug::endFunction(const MachineFunction *MF) {
+  if (!MMI->hasDebugInfo() || DbgScopeMap.empty()) return;
+
+  if (CurrentFnDbgScope) {
+
+    // Define end label for subprogram.
+    FunctionEndSym = Asm->GetTempSymbol("func_end",
+                                        Asm->getFunctionNumber());
+    // Assumes in correct section after the entry point.
+    Asm->OutStreamer.EmitLabel(FunctionEndSym);
+
+    SmallPtrSet<const MDNode *, 16> ProcessedVars;
+    collectVariableInfo(MF, ProcessedVars);
+
+    // Construct abstract scopes.
+    for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
+           AE = AbstractScopesList.end(); AI != AE; ++AI) {
+      DISubprogram SP((*AI)->getScopeNode());
+      if (SP.Verify()) {
+        // Collect info for variables that were optimized out.
+        StringRef FName = SP.getLinkageName();
+        if (FName.empty())
+          FName = SP.getName();
+        if (NamedMDNode *NMD = 
+            getFnSpecificMDNode(*(MF->getFunction()->getParent()), FName)) {
+          for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+          DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
+          if (!DV || !ProcessedVars.insert(DV))
+            continue;
+          DbgScope *Scope = AbstractScopes.lookup(DV.getContext());
+          if (Scope)
+            Scope->addVariable(new DbgVariable(DV));
+          }
+        }
+      }
+      if (ProcessedSPNodes.count((*AI)->getScopeNode()) == 0)
+        constructScopeDIE(*AI);
+    }
+
+    DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope);
+
+    if (!DisableFramePointerElim(*MF))
+      addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr,
+              dwarf::DW_FORM_flag, 1);
+
+
+    DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(),
+                                                 MMI->getFrameMoves()));
+  }
+
+  // Clear debug info
+  CurrentFnDbgScope = NULL;
+  CurrentFnArguments.clear();
+  InsnNeedsLabel.clear();
+  DbgVariableToFrameIndexMap.clear();
+  VarToAbstractVarMap.clear();
+  DbgVariableToDbgInstMap.clear();
+  DeleteContainerSeconds(DbgScopeMap);
+  InsnsEndScopeSet.clear();
+  ConcreteScopes.clear();
+  DeleteContainerSeconds(AbstractScopes);
+  AbstractScopesList.clear();
+  AbstractVariables.clear();
+  LabelsBeforeInsn.clear();
+  LabelsAfterInsn.clear();
+  PrevLabel = NULL;
+}
+
+/// recordVariableFrameIndex - Record a variable's index.
+void DwarfDebug::recordVariableFrameIndex(const DbgVariable *V, int Index) {
+  assert (V && "Invalid DbgVariable!");
+  DbgVariableToFrameIndexMap[V] = Index;
+}
+
+/// findVariableFrameIndex - Return true if frame index for the variable
+/// is found. Update FI to hold value of the index.
+bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) {
+  assert (V && "Invalid DbgVariable!");
+  DenseMap<const DbgVariable *, int>::iterator I =
+    DbgVariableToFrameIndexMap.find(V);
+  if (I == DbgVariableToFrameIndexMap.end())
+    return false;
+  *FI = I->second;
+  return true;
+}
+
+/// findDbgScope - Find DbgScope for the debug loc attached with an
+/// instruction.
+DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
+  DbgScope *Scope = NULL;
+  LLVMContext &Ctx =
+    MInsn->getParent()->getParent()->getFunction()->getContext();
+  DebugLoc DL = MInsn->getDebugLoc();
+
+  if (DL.isUnknown())
+    return Scope;
+
+  if (const MDNode *IA = DL.getInlinedAt(Ctx))
+    Scope = ConcreteScopes.lookup(IA);
+  if (Scope == 0)
+    Scope = DbgScopeMap.lookup(DL.getScope(Ctx));
+
+  return Scope;
+}
+
+
+/// recordSourceLine - Register a source line with debug info. Returns the
+/// unique label that was emitted and which provides correspondence to
+/// the source line list.
+MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
+                                       const MDNode *S) {
+  StringRef Fn;
+
+  unsigned Src = 1;
+  if (S) {
+    DIDescriptor Scope(S);
+
+    if (Scope.isCompileUnit()) {
+      DICompileUnit CU(S);
+      Fn = CU.getFilename();
+    } else if (Scope.isFile()) {
+      DIFile F(S);
+      Fn = F.getFilename();
+    } else if (Scope.isSubprogram()) {
+      DISubprogram SP(S);
+      Fn = SP.getFilename();
+    } else if (Scope.isLexicalBlock()) {
+      DILexicalBlock DB(S);
+      Fn = DB.getFilename();
+    } else
+      assert(0 && "Unexpected scope info");
+
+    Src = GetOrCreateSourceID(Fn);
+  }
+
+  Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, DWARF2_FLAG_IS_STMT,
+                                         0, 0);
+
+  MCSymbol *Label = MMI->getContext().CreateTempSymbol();
+  Asm->OutStreamer.EmitLabel(Label);
+  return Label;
+}
+
+//===----------------------------------------------------------------------===//
+// Emit Methods
+//===----------------------------------------------------------------------===//
+
+/// computeSizeAndOffset - Compute the size and offset of a DIE.
+///
+unsigned
+DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
+  // Get the children.
+  const std::vector<DIE *> &Children = Die->getChildren();
+
+  // If not last sibling and has children then add sibling offset attribute.
+  if (!Last && !Children.empty())
+    Die->addSiblingOffset(DIEValueAllocator);
+
+  // Record the abbreviation.
+  assignAbbrevNumber(Die->getAbbrev());
+
+  // Get the abbreviation for this DIE.
+  unsigned AbbrevNumber = Die->getAbbrevNumber();
+  const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+  // Set DIE offset
+  Die->setOffset(Offset);
+
+  // Start the size with the size of abbreviation code.
+  Offset += MCAsmInfo::getULEB128Size(AbbrevNumber);
+
+  const SmallVector<DIEValue*, 32> &Values = Die->getValues();
+  const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+
+  // Size the DIE attribute values.
+  for (unsigned i = 0, N = Values.size(); i < N; ++i)
+    // Size attribute value.
+    Offset += Values[i]->SizeOf(Asm, AbbrevData[i].getForm());
+
+  // Size the DIE children if any.
+  if (!Children.empty()) {
+    assert(Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes &&
+           "Children flag not set");
+
+    for (unsigned j = 0, M = Children.size(); j < M; ++j)
+      Offset = computeSizeAndOffset(Children[j], Offset, (j + 1) == M);
+
+    // End of children marker.
+    Offset += sizeof(int8_t);
+  }
+
+  Die->setSize(Offset - Die->getOffset());
+  return Offset;
+}
+
+/// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
+///
+void DwarfDebug::computeSizeAndOffsets() {
+  unsigned PrevOffset = 0;
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    // Compute size of compile unit header.
+    static unsigned Offset = PrevOffset +
+      sizeof(int32_t) + // Length of Compilation Unit Info
+      sizeof(int16_t) + // DWARF version number
+      sizeof(int32_t) + // Offset Into Abbrev. Section
+      sizeof(int8_t);   // Pointer Size (in bytes)
+    computeSizeAndOffset(I->second->getCUDie(), Offset, true);
+    PrevOffset = Offset;
+  }
+}
+
+/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
+/// temporary label to it if SymbolStem is specified.
+static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section,
+                                const char *SymbolStem = 0) {
+  Asm->OutStreamer.SwitchSection(Section);
+  if (!SymbolStem) return 0;
+
+  MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem);
+  Asm->OutStreamer.EmitLabel(TmpSym);
+  return TmpSym;
+}
+
+/// EmitSectionLabels - Emit initial Dwarf sections with a label at
+/// the start of each one.
+void DwarfDebug::EmitSectionLabels() {
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+  // Dwarf sections base addresses.
+  if (Asm->MAI->doesDwarfRequireFrameSection()) {
+    DwarfFrameSectionSym =
+      EmitSectionSym(Asm, TLOF.getDwarfFrameSection(), "section_debug_frame");
+   }
+
+  DwarfInfoSectionSym =
+    EmitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info");
+  DwarfAbbrevSectionSym =
+    EmitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev");
+  EmitSectionSym(Asm, TLOF.getDwarfARangesSection());
+
+  if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection())
+    EmitSectionSym(Asm, MacroInfo);
+
+  EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
+  EmitSectionSym(Asm, TLOF.getDwarfLocSection());
+  EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
+  EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
+  DwarfStrSectionSym =
+    EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str");
+  DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(),
+                                             "debug_range");
+
+  DwarfDebugLocSectionSym = EmitSectionSym(Asm, TLOF.getDwarfLocSection(),
+                                           "section_debug_loc");
+
+  TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
+  EmitSectionSym(Asm, TLOF.getDataSection());
+}
+
+/// emitDIE - Recusively Emits a debug information entry.
+///
+void DwarfDebug::emitDIE(DIE *Die) {
+  // Get the abbreviation for this DIE.
+  unsigned AbbrevNumber = Die->getAbbrevNumber();
+  const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1];
+
+  // Emit the code (index) for the abbreviation.
+  if (Asm->isVerbose())
+    Asm->OutStreamer.AddComment("Abbrev [" + Twine(AbbrevNumber) + "] 0x" +
+                                Twine::utohexstr(Die->getOffset()) + ":0x" +
+                                Twine::utohexstr(Die->getSize()) + " " +
+                                dwarf::TagString(Abbrev->getTag()));
+  Asm->EmitULEB128(AbbrevNumber);
+
+  const SmallVector<DIEValue*, 32> &Values = Die->getValues();
+  const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
+
+  // Emit the DIE attribute values.
+  for (unsigned i = 0, N = Values.size(); i < N; ++i) {
+    unsigned Attr = AbbrevData[i].getAttribute();
+    unsigned Form = AbbrevData[i].getForm();
+    assert(Form && "Too many attributes for DIE (check abbreviation)");
+
+    if (Asm->isVerbose())
+      Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr));
+
+    switch (Attr) {
+    case dwarf::DW_AT_sibling:
+      Asm->EmitInt32(Die->getSiblingOffset());
+      break;
+    case dwarf::DW_AT_abstract_origin: {
+      DIEEntry *E = cast<DIEEntry>(Values[i]);
+      DIE *Origin = E->getEntry();
+      unsigned Addr = Origin->getOffset();
+      Asm->EmitInt32(Addr);
+      break;
+    }
+    case dwarf::DW_AT_ranges: {
+      // DW_AT_range Value encodes offset in debug_range section.
+      DIEInteger *V = cast<DIEInteger>(Values[i]);
+
+      if (Asm->MAI->doesDwarfUsesLabelOffsetForRanges()) {
+        Asm->EmitLabelPlusOffset(DwarfDebugRangeSectionSym,
+                                 V->getValue(),
+                                 4);
+      } else {
+        Asm->EmitLabelOffsetDifference(DwarfDebugRangeSectionSym,
+                                       V->getValue(),
+                                       DwarfDebugRangeSectionSym,
+                                       4);
+      }
+      break;
+    }
+    case dwarf::DW_AT_location: {
+      if (UseDotDebugLocEntry.count(Die) != 0) {
+        DIELabel *L = cast<DIELabel>(Values[i]);
+        Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4);
+      } else
+        Values[i]->EmitValue(Asm, Form);
+      break;
+    }
+    case dwarf::DW_AT_accessibility: {
+      if (Asm->isVerbose()) {
+        DIEInteger *V = cast<DIEInteger>(Values[i]);
+        Asm->OutStreamer.AddComment(dwarf::AccessibilityString(V->getValue()));
+      }
+      Values[i]->EmitValue(Asm, Form);
+      break;
+    }
+    default:
+      // Emit an attribute using the defined form.
+      Values[i]->EmitValue(Asm, Form);
+      break;
+    }
+  }
+
+  // Emit the DIE children if any.
+  if (Abbrev->getChildrenFlag() == dwarf::DW_CHILDREN_yes) {
+    const std::vector<DIE *> &Children = Die->getChildren();
+
+    for (unsigned j = 0, M = Children.size(); j < M; ++j)
+      emitDIE(Children[j]);
+
+    if (Asm->isVerbose())
+      Asm->OutStreamer.AddComment("End Of Children Mark");
+    Asm->EmitInt8(0);
+  }
+}
+
+/// emitDebugInfo - Emit the debug info section.
+///
+void DwarfDebug::emitDebugInfo() {
+  // Start debug info section.
+  Asm->OutStreamer.SwitchSection(
+                            Asm->getObjFileLowering().getDwarfInfoSection());
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    CompileUnit *TheCU = I->second;
+    DIE *Die = TheCU->getCUDie();
+
+    // Emit the compile units header.
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin",
+                                                  TheCU->getID()));
+
+    // Emit size of content not including length itself
+    unsigned ContentSize = Die->getSize() +
+      sizeof(int16_t) + // DWARF version number
+      sizeof(int32_t) + // Offset Into Abbrev. Section
+      sizeof(int8_t) +  // Pointer Size (in bytes)
+      sizeof(int32_t);  // FIXME - extra pad for gdb bug.
+
+    Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
+    Asm->EmitInt32(ContentSize);
+    Asm->OutStreamer.AddComment("DWARF version number");
+    Asm->EmitInt16(dwarf::DWARF_VERSION);
+    Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
+    Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
+                           DwarfAbbrevSectionSym);
+    Asm->OutStreamer.AddComment("Address Size (in bytes)");
+    Asm->EmitInt8(Asm->getTargetData().getPointerSize());
+
+    emitDIE(Die);
+    // FIXME - extra padding for gdb bug.
+    Asm->OutStreamer.AddComment("4 extra padding bytes for GDB");
+    Asm->EmitInt8(0);
+    Asm->EmitInt8(0);
+    Asm->EmitInt8(0);
+    Asm->EmitInt8(0);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID()));
+  }
+}
+
+/// emitAbbreviations - Emit the abbreviation section.
+///
+void DwarfDebug::emitAbbreviations() const {
+  // Check to see if it is worth the effort.
+  if (!Abbreviations.empty()) {
+    // Start the debug abbrev section.
+    Asm->OutStreamer.SwitchSection(
+                            Asm->getObjFileLowering().getDwarfAbbrevSection());
+
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_begin"));
+
+    // For each abbrevation.
+    for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) {
+      // Get abbreviation data
+      const DIEAbbrev *Abbrev = Abbreviations[i];
+
+      // Emit the abbrevations code (base 1 index.)
+      Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code");
+
+      // Emit the abbreviations data.
+      Abbrev->Emit(Asm);
+    }
+
+    // Mark end of abbreviations.
+    Asm->EmitULEB128(0, "EOM(3)");
+
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_end"));
+  }
+}
+
+/// emitEndOfLineMatrix - Emit the last address of the section and the end of
+/// the line matrix.
+///
+void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
+  // Define last address of section.
+  Asm->OutStreamer.AddComment("Extended Op");
+  Asm->EmitInt8(0);
+
+  Asm->OutStreamer.AddComment("Op size");
+  Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
+  Asm->OutStreamer.AddComment("DW_LNE_set_address");
+  Asm->EmitInt8(dwarf::DW_LNE_set_address);
+
+  Asm->OutStreamer.AddComment("Section end label");
+
+  Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd),
+                                   Asm->getTargetData().getPointerSize(),
+                                   0/*AddrSpace*/);
+
+  // Mark end of matrix.
+  Asm->OutStreamer.AddComment("DW_LNE_end_sequence");
+  Asm->EmitInt8(0);
+  Asm->EmitInt8(1);
+  Asm->EmitInt8(1);
+}
+
+/// emitCommonDebugFrame - Emit common frame info into a debug frame section.
+///
+void DwarfDebug::emitCommonDebugFrame() {
+  if (!Asm->MAI->doesDwarfRequireFrameSection())
+    return;
+
+  int stackGrowth = Asm->getTargetData().getPointerSize();
+  if (Asm->TM.getFrameLowering()->getStackGrowthDirection() ==
+      TargetFrameLowering::StackGrowsDown)
+    stackGrowth *= -1;
+
+  // Start the dwarf frame section.
+  Asm->OutStreamer.SwitchSection(
+                              Asm->getObjFileLowering().getDwarfFrameSection());
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common"));
+  Asm->OutStreamer.AddComment("Length of Common Information Entry");
+  Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_frame_common_end"),
+                           Asm->GetTempSymbol("debug_frame_common_begin"), 4);
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common_begin"));
+  Asm->OutStreamer.AddComment("CIE Identifier Tag");
+  Asm->EmitInt32((int)dwarf::DW_CIE_ID);
+  Asm->OutStreamer.AddComment("CIE Version");
+  Asm->EmitInt8(dwarf::DW_CIE_VERSION);
+  Asm->OutStreamer.AddComment("CIE Augmentation");
+  Asm->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0); // nul terminator.
+  Asm->EmitULEB128(1, "CIE Code Alignment Factor");
+  Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
+  Asm->OutStreamer.AddComment("CIE RA Column");
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+  Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), false));
+
+  std::vector<MachineMove> Moves;
+  TFI->getInitialFrameState(Moves);
+
+  Asm->EmitFrameMoves(Moves, 0, false);
+
+  Asm->EmitAlignment(2);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_frame_common_end"));
+}
+
+/// emitFunctionDebugFrame - Emit per function frame info into a debug frame
+/// section.
+void DwarfDebug::
+emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) {
+  if (!Asm->MAI->doesDwarfRequireFrameSection())
+    return;
+
+  // Start the dwarf frame section.
+  Asm->OutStreamer.SwitchSection(
+                              Asm->getObjFileLowering().getDwarfFrameSection());
+
+  Asm->OutStreamer.AddComment("Length of Frame Information Entry");
+  MCSymbol *DebugFrameBegin =
+    Asm->GetTempSymbol("debug_frame_begin", DebugFrameInfo.Number);
+  MCSymbol *DebugFrameEnd =
+    Asm->GetTempSymbol("debug_frame_end", DebugFrameInfo.Number);
+  Asm->EmitLabelDifference(DebugFrameEnd, DebugFrameBegin, 4);
+
+  Asm->OutStreamer.EmitLabel(DebugFrameBegin);
+
+  Asm->OutStreamer.AddComment("FDE CIE offset");
+  Asm->EmitSectionOffset(Asm->GetTempSymbol("debug_frame_common"),
+                         DwarfFrameSectionSym);
+
+  Asm->OutStreamer.AddComment("FDE initial location");
+  MCSymbol *FuncBeginSym =
+    Asm->GetTempSymbol("func_begin", DebugFrameInfo.Number);
+  Asm->OutStreamer.EmitSymbolValue(FuncBeginSym,
+                                   Asm->getTargetData().getPointerSize(),
+                                   0/*AddrSpace*/);
+
+
+  Asm->OutStreamer.AddComment("FDE address range");
+  Asm->EmitLabelDifference(Asm->GetTempSymbol("func_end",DebugFrameInfo.Number),
+                           FuncBeginSym, Asm->getTargetData().getPointerSize());
+
+  Asm->EmitFrameMoves(DebugFrameInfo.Moves, FuncBeginSym, false);
+
+  Asm->EmitAlignment(2);
+  Asm->OutStreamer.EmitLabel(DebugFrameEnd);
+}
+
+/// emitDebugPubNames - Emit visible names into a debug pubnames section.
+///
+void DwarfDebug::emitDebugPubNames() {
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    CompileUnit *TheCU = I->second;
+    // Start the dwarf pubnames section.
+    Asm->OutStreamer.SwitchSection(
+      Asm->getObjFileLowering().getDwarfPubNamesSection());
+
+    Asm->OutStreamer.AddComment("Length of Public Names Info");
+    Asm->EmitLabelDifference(
+      Asm->GetTempSymbol("pubnames_end", TheCU->getID()),
+      Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4);
+
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin",
+                                                  TheCU->getID()));
+
+    Asm->OutStreamer.AddComment("DWARF Version");
+    Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+    Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+    Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
+                           DwarfInfoSectionSym);
+
+    Asm->OutStreamer.AddComment("Compilation Unit Length");
+    Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
+                             Asm->GetTempSymbol("info_begin", TheCU->getID()),
+                             4);
+
+    const StringMap<DIE*> &Globals = TheCU->getGlobals();
+    for (StringMap<DIE*>::const_iterator
+           GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+      const char *Name = GI->getKeyData();
+      DIE *Entity = GI->second;
+
+      Asm->OutStreamer.AddComment("DIE offset");
+      Asm->EmitInt32(Entity->getOffset());
+
+      if (Asm->isVerbose())
+        Asm->OutStreamer.AddComment("External Name");
+      Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
+    }
+
+    Asm->OutStreamer.AddComment("End Mark");
+    Asm->EmitInt32(0);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end",
+                                                TheCU->getID()));
+  }
+}
+
+void DwarfDebug::emitDebugPubTypes() {
+  for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+         E = CUMap.end(); I != E; ++I) {
+    CompileUnit *TheCU = I->second;
+    // Start the dwarf pubnames section.
+    Asm->OutStreamer.SwitchSection(
+      Asm->getObjFileLowering().getDwarfPubTypesSection());
+    Asm->OutStreamer.AddComment("Length of Public Types Info");
+    Asm->EmitLabelDifference(
+      Asm->GetTempSymbol("pubtypes_end", TheCU->getID()),
+      Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4);
+
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
+                                                  TheCU->getID()));
+
+    if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
+    Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+    Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+    Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
+                           DwarfInfoSectionSym);
+
+    Asm->OutStreamer.AddComment("Compilation Unit Length");
+    Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
+                             Asm->GetTempSymbol("info_begin", TheCU->getID()),
+                             4);
+
+    const StringMap<DIE*> &Globals = TheCU->getGlobalTypes();
+    for (StringMap<DIE*>::const_iterator
+           GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+      const char *Name = GI->getKeyData();
+      DIE * Entity = GI->second;
+
+      if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
+      Asm->EmitInt32(Entity->getOffset());
+
+      if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
+      Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
+    }
+
+    Asm->OutStreamer.AddComment("End Mark");
+    Asm->EmitInt32(0);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
+                                                  TheCU->getID()));
+  }
+}
+
+/// emitDebugStr - Emit visible names into a debug str section.
+///
+void DwarfDebug::emitDebugStr() {
+  // Check to see if it is worth the effort.
+  if (StringPool.empty()) return;
+
+  // Start the dwarf str section.
+  Asm->OutStreamer.SwitchSection(
+                                Asm->getObjFileLowering().getDwarfStrSection());
+
+  // Get all of the string pool entries and put them in an array by their ID so
+  // we can sort them.
+  SmallVector<std::pair<unsigned,
+      StringMapEntry<std::pair<MCSymbol*, unsigned> >*>, 64> Entries;
+
+  for (StringMap<std::pair<MCSymbol*, unsigned> >::iterator
+       I = StringPool.begin(), E = StringPool.end(); I != E; ++I)
+    Entries.push_back(std::make_pair(I->second.second, &*I));
+
+  array_pod_sort(Entries.begin(), Entries.end());
+
+  for (unsigned i = 0, e = Entries.size(); i != e; ++i) {
+    // Emit a label for reference from debug information entries.
+    Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first);
+
+    // Emit the string itself.
+    Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/);
+  }
+}
+
+/// emitDebugLoc - Emit visible names into a debug loc section.
+///
+void DwarfDebug::emitDebugLoc() {
+  if (DotDebugLocEntries.empty())
+    return;
+
+  for (SmallVector<DotDebugLocEntry, 4>::iterator
+         I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+       I != E; ++I) {
+    DotDebugLocEntry &Entry = *I;
+    if (I + 1 != DotDebugLocEntries.end())
+      Entry.Merge(I+1);
+  }
+
+  // Start the dwarf loc section.
+  Asm->OutStreamer.SwitchSection(
+    Asm->getObjFileLowering().getDwarfLocSection());
+  unsigned char Size = Asm->getTargetData().getPointerSize();
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
+  unsigned index = 1;
+  for (SmallVector<DotDebugLocEntry, 4>::iterator
+         I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+       I != E; ++I, ++index) {
+    DotDebugLocEntry &Entry = *I;
+    if (Entry.isMerged()) continue;
+    if (Entry.isEmpty()) {
+      Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+      Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+      Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index));
+    } else {
+      Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0);
+      Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0);
+      const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+      unsigned Reg = RI->getDwarfRegNum(Entry.Loc.getReg(), false);
+      if (int Offset =  Entry.Loc.getOffset()) {
+        // If the value is at a certain offset from frame register then
+        // use DW_OP_fbreg.
+        unsigned OffsetSize = Offset ? MCAsmInfo::getSLEB128Size(Offset) : 1;
+        Asm->OutStreamer.AddComment("Loc expr size");
+        Asm->EmitInt16(1 + OffsetSize);
+        Asm->OutStreamer.AddComment(
+          dwarf::OperationEncodingString(dwarf::DW_OP_fbreg));
+        Asm->EmitInt8(dwarf::DW_OP_fbreg);
+        Asm->OutStreamer.AddComment("Offset");
+        Asm->EmitSLEB128(Offset);
+      } else {
+        if (Reg < 32) {
+          Asm->OutStreamer.AddComment("Loc expr size");
+          Asm->EmitInt16(1);
+          Asm->OutStreamer.AddComment(
+            dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg));
+          Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg);
+        } else {
+          Asm->OutStreamer.AddComment("Loc expr size");
+          Asm->EmitInt16(1 + MCAsmInfo::getULEB128Size(Reg));
+          Asm->EmitInt8(dwarf::DW_OP_regx);
+          Asm->EmitULEB128(Reg);
+        }
+      }
+    }
+  }
+}
+
+/// EmitDebugARanges - Emit visible names into a debug aranges section.
+///
+void DwarfDebug::EmitDebugARanges() {
+  // Start the dwarf aranges section.
+  Asm->OutStreamer.SwitchSection(
+                          Asm->getObjFileLowering().getDwarfARangesSection());
+}
+
+/// emitDebugRanges - Emit visible names into a debug ranges section.
+///
+void DwarfDebug::emitDebugRanges() {
+  // Start the dwarf ranges section.
+  Asm->OutStreamer.SwitchSection(
+    Asm->getObjFileLowering().getDwarfRangesSection());
+  unsigned char Size = Asm->getTargetData().getPointerSize();
+  for (SmallVector<const MCSymbol *, 8>::iterator
+         I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end();
+       I != E; ++I) {
+    if (*I)
+      Asm->OutStreamer.EmitSymbolValue(const_cast<MCSymbol*>(*I), Size, 0);
+    else
+      Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+  }
+}
+
+/// emitDebugMacInfo - Emit visible names into a debug macinfo section.
+///
+void DwarfDebug::emitDebugMacInfo() {
+  if (const MCSection *LineInfo =
+      Asm->getObjFileLowering().getDwarfMacroInfoSection()) {
+    // Start the dwarf macinfo section.
+    Asm->OutStreamer.SwitchSection(LineInfo);
+  }
+}
+
+/// emitDebugInlineInfo - Emit inline info using following format.
+/// Section Header:
+/// 1. length of section
+/// 2. Dwarf version number
+/// 3. address size.
+///
+/// Entries (one "entry" for each function that was inlined):
+///
+/// 1. offset into __debug_str section for MIPS linkage name, if exists;
+///   otherwise offset into __debug_str for regular function name.
+/// 2. offset into __debug_str section for regular function name.
+/// 3. an unsigned LEB128 number indicating the number of distinct inlining
+/// instances for the function.
+///
+/// The rest of the entry consists of a {die_offset, low_pc} pair for each
+/// inlined instance; the die_offset points to the inlined_subroutine die in the
+/// __debug_info section, and the low_pc is the starting address for the
+/// inlining instance.
+void DwarfDebug::emitDebugInlineInfo() {
+  if (!Asm->MAI->doesDwarfUsesInlineInfoSection())
+    return;
+
+  if (!FirstCU)
+    return;
+
+  Asm->OutStreamer.SwitchSection(
+                        Asm->getObjFileLowering().getDwarfDebugInlineSection());
+
+  Asm->OutStreamer.AddComment("Length of Debug Inlined Information Entry");
+  Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_inlined_end", 1),
+                           Asm->GetTempSymbol("debug_inlined_begin", 1), 4);
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_begin", 1));
+
+  Asm->OutStreamer.AddComment("Dwarf Version");
+  Asm->EmitInt16(dwarf::DWARF_VERSION);
+  Asm->OutStreamer.AddComment("Address Size (in bytes)");
+  Asm->EmitInt8(Asm->getTargetData().getPointerSize());
+
+  for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
+         E = InlinedSPNodes.end(); I != E; ++I) {
+
+    const MDNode *Node = *I;
+    DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
+      = InlineInfo.find(Node);
+    SmallVector<InlineInfoLabels, 4> &Labels = II->second;
+    DISubprogram SP(Node);
+    StringRef LName = SP.getLinkageName();
+    StringRef Name = SP.getName();
+
+    Asm->OutStreamer.AddComment("MIPS linkage name");
+    if (LName.empty()) {
+      Asm->OutStreamer.EmitBytes(Name, 0);
+      Asm->OutStreamer.EmitIntValue(0, 1, 0); // nul terminator.
+    } else
+      Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)),
+                             DwarfStrSectionSym);
+
+    Asm->OutStreamer.AddComment("Function name");
+    Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym);
+    Asm->EmitULEB128(Labels.size(), "Inline count");
+
+    for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
+           LE = Labels.end(); LI != LE; ++LI) {
+      if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
+      Asm->EmitInt32(LI->second->getOffset());
+
+      if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc");
+      Asm->OutStreamer.EmitSymbolValue(LI->first,
+                                       Asm->getTargetData().getPointerSize(),0);
+    }
+  }
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1));
+}
diff --git a/final/lib/CodeGen/AsmPrinter/DwarfDebug.h b/final/lib/CodeGen/AsmPrinter/DwarfDebug.h
new file mode 100644
index 00000000000..2863ea38060
--- /dev/null
+++ b/final/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -0,0 +1,602 @@
+//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf debug info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "DIE.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/DebugLoc.h"
+
+namespace llvm {
+
+class CompileUnit;
+class DbgConcreteScope;
+class DbgScope;
+class DbgVariable;
+class MachineFrameInfo;
+class MachineModuleInfo;
+class MachineOperand;
+class MCAsmInfo;
+class DIEAbbrev;
+class DIE;
+class DIEBlock;
+class DIEEntry;
+
+class DIEnumerator;
+class DIDescriptor;
+class DIVariable;
+class DIGlobal;
+class DIGlobalVariable;
+class DISubprogram;
+class DIBasicType;
+class DIDerivedType;
+class DIType;
+class DINameSpace;
+class DISubrange;
+class DICompositeType;
+class DITemplateTypeParameter;
+class DITemplateValueParameter;
+
+//===----------------------------------------------------------------------===//
+/// SrcLineInfo - This class is used to record source line correspondence.
+///
+class SrcLineInfo {
+  unsigned Line;                     // Source line number.
+  unsigned Column;                   // Source column.
+  unsigned SourceID;                 // Source ID number.
+  MCSymbol *Label;                   // Label in code ID number.
+public:
+  SrcLineInfo(unsigned L, unsigned C, unsigned S, MCSymbol *label)
+    : Line(L), Column(C), SourceID(S), Label(label) {}
+
+  // Accessors
+  unsigned getLine() const { return Line; }
+  unsigned getColumn() const { return Column; }
+  unsigned getSourceID() const { return SourceID; }
+  MCSymbol *getLabel() const { return Label; }
+};
+
+/// DotDebugLocEntry - This struct describes location entries emitted in
+/// .debug_loc section.
+typedef struct DotDebugLocEntry {
+  const MCSymbol *Begin;
+  const MCSymbol *End;
+  MachineLocation Loc;
+  bool Merged;
+  DotDebugLocEntry() : Begin(0), End(0), Merged(false) {}
+  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L) 
+    : Begin(B), End(E), Loc(L), Merged(false) {}
+  /// Empty entries are also used as a trigger to emit temp label. Such
+  /// labels are referenced is used to find debug_loc offset for a given DIE.
+  bool isEmpty() { return Begin == 0 && End == 0; }
+  bool isMerged() { return Merged; }
+  void Merge(DotDebugLocEntry *Next) {
+    if (!(Begin && Loc == Next->Loc && End == Next->Begin))
+      return;
+    Next->Begin = Begin;
+    Merged = true;
+  }
+} DotDebugLocEntry;
+
+class DwarfDebug {
+  /// Asm - Target of Dwarf emission.
+  AsmPrinter *Asm;
+
+  /// MMI - Collected machine module information.
+  MachineModuleInfo *MMI;
+
+  //===--------------------------------------------------------------------===//
+  // Attributes used to construct specific Dwarf sections.
+  //
+
+  CompileUnit *FirstCU;
+  DenseMap <const MDNode *, CompileUnit *> CUMap;
+
+  /// AbbreviationsSet - Used to uniquely define abbreviations.
+  ///
+  FoldingSet<DIEAbbrev> AbbreviationsSet;
+
+  /// Abbreviations - A list of all the unique abbreviations in use.
+  ///
+  std::vector<DIEAbbrev *> Abbreviations;
+
+  /// SourceIdMap - Source id map, i.e. pair of directory id and source file
+  /// id mapped to a unique id.
+  StringMap<unsigned> SourceIdMap;
+
+  /// DIEBlocks - A list of all the DIEBlocks in use.
+  std::vector<DIEBlock *> DIEBlocks;
+
+  // DIEValueAllocator - All DIEValues are allocated through this allocator.
+  BumpPtrAllocator DIEValueAllocator;
+
+  /// StringPool - A String->Symbol mapping of strings used by indirect
+  /// references.
+  StringMap<std::pair<MCSymbol*, unsigned> > StringPool;
+  unsigned NextStringPoolNumber;
+  
+  MCSymbol *getStringPoolEntry(StringRef Str);
+
+  /// SectionMap - Provides a unique id per text section.
+  ///
+  UniqueVector<const MCSection*> SectionMap;
+
+  /// CurrentFnDbgScope - Top level scope for the current function.
+  ///
+  DbgScope *CurrentFnDbgScope;
+  
+  /// CurrentFnArguments - List of Arguments (DbgValues) for current function.
+  SmallVector<DbgVariable *, 8> CurrentFnArguments;
+
+  /// DbgScopeMap - Tracks the scopes in the current function.  Owns the
+  /// contained DbgScope*s.
+  ///
+  DenseMap<const MDNode *, DbgScope *> DbgScopeMap;
+
+  /// ConcreteScopes - Tracks the concrete scopees in the current function.
+  /// These scopes are also included in DbgScopeMap.
+  DenseMap<const MDNode *, DbgScope *> ConcreteScopes;
+
+  /// AbstractScopes - Tracks the abstract scopes a module. These scopes are
+  /// not included DbgScopeMap.  AbstractScopes owns its DbgScope*s.
+  DenseMap<const MDNode *, DbgScope *> AbstractScopes;
+
+  /// AbstractSPDies - Collection of abstract subprogram DIEs.
+  DenseMap<const MDNode *, DIE *> AbstractSPDies;
+
+  /// AbstractScopesList - Tracks abstract scopes constructed while processing
+  /// a function. This list is cleared during endFunction().
+  SmallVector<DbgScope *, 4>AbstractScopesList;
+
+  /// AbstractVariables - Collection on abstract variables.  Owned by the
+  /// DbgScopes in AbstractScopes.
+  DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
+
+  /// DbgVariableToFrameIndexMap - Tracks frame index used to find 
+  /// variable's value.
+  DenseMap<const DbgVariable *, int> DbgVariableToFrameIndexMap;
+
+  /// DbgVariableToDbgInstMap - Maps DbgVariable to corresponding DBG_VALUE
+  /// machine instruction.
+  DenseMap<const DbgVariable *, const MachineInstr *> DbgVariableToDbgInstMap;
+
+  /// DotDebugLocEntries - Collection of DotDebugLocEntry.
+  SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
+
+  /// UseDotDebugLocEntry - DW_AT_location attributes for the DIEs in this set
+  /// idetifies corresponding .debug_loc entry offset.
+  SmallPtrSet<const DIE *, 4> UseDotDebugLocEntry;
+
+  /// VarToAbstractVarMap - Maps DbgVariable with corresponding Abstract
+  /// DbgVariable, if any.
+  DenseMap<const DbgVariable *, const DbgVariable *> VarToAbstractVarMap;
+
+  /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
+  /// (at the end of the module) as DW_AT_inline.
+  SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
+
+  /// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
+  /// need DW_AT_containing_type attribute. This attribute points to a DIE that
+  /// corresponds to the MDNode mapped with the subprogram DIE.
+  DenseMap<DIE *, const MDNode *> ContainingTypeMap;
+
+  typedef SmallVector<DbgScope *, 2> ScopeVector;
+
+  SmallPtrSet<const MachineInstr *, 8> InsnsEndScopeSet;
+
+  /// InlineInfo - Keep track of inlined functions and their location.  This
+  /// information is used to populate debug_inlined section.
+  typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
+  DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
+  SmallVector<const MDNode *, 4> InlinedSPNodes;
+
+  // ProcessedSPNodes - This is a collection of subprogram MDNodes that
+  // are processed to create DIEs.
+  SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
+
+  /// LabelsBeforeInsn - Maps instruction with label emitted before 
+  /// instruction.
+  DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
+
+  /// LabelsAfterInsn - Maps instruction with label emitted after
+  /// instruction.
+  DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
+
+  /// insnNeedsLabel - Collection of instructions that need a label to mark
+  /// a debuggging information entity.
+  SmallPtrSet<const MachineInstr *, 8> InsnNeedsLabel;
+
+  SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
+
+  /// Previous instruction's location information. This is used to determine
+  /// label location to indicate scope boundries in dwarf debug info.
+  DebugLoc PrevInstLoc;
+  MCSymbol *PrevLabel;
+
+  struct FunctionDebugFrameInfo {
+    unsigned Number;
+    std::vector<MachineMove> Moves;
+
+    FunctionDebugFrameInfo(unsigned Num, const std::vector<MachineMove> &M)
+      : Number(Num), Moves(M) {}
+  };
+
+  std::vector<FunctionDebugFrameInfo> DebugFrames;
+
+  // Section Symbols: these are assembler temporary labels that are emitted at
+  // the beginning of each supported dwarf section.  These are used to form
+  // section offsets and are created by EmitSectionLabels.
+  MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
+  MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
+  MCSymbol *DwarfDebugLocSectionSym;
+  MCSymbol *FunctionBeginSym, *FunctionEndSym;
+
+  DIEInteger *DIEIntegerOne;
+private:
+
+  /// getNumSourceIds - Return the number of unique source ids.
+  unsigned getNumSourceIds() const {
+    return SourceIdMap.size();
+  }
+
+  /// assignAbbrevNumber - Define a unique number for the abbreviation.
+  ///
+  void assignAbbrevNumber(DIEAbbrev &Abbrev);
+
+  /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
+  /// information entry.
+  DIEEntry *createDIEEntry(DIE *Entry);
+
+  /// addUInt - Add an unsigned integer attribute data and value.
+  ///
+  void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer);
+
+  /// addSInt - Add an signed integer attribute data and value.
+  ///
+  void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer);
+
+  /// addString - Add a string attribute data and value.
+  ///
+  void addString(DIE *Die, unsigned Attribute, unsigned Form,
+                 const StringRef Str);
+
+  /// addLabel - Add a Dwarf label attribute data and value.
+  ///
+  void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
+                const MCSymbol *Label);
+
+  /// addDelta - Add a label delta attribute data and value.
+  ///
+  void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
+                const MCSymbol *Hi, const MCSymbol *Lo);
+
+  /// addDIEEntry - Add a DIE attribute data and value.
+  ///
+  void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry);
+  
+  /// addBlock - Add block data.
+  ///
+  void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block);
+
+  /// addSourceLine - Add location information to specified debug information
+  /// entry.
+  void addSourceLine(DIE *Die, DIVariable V);
+  void addSourceLine(DIE *Die, DIGlobalVariable G);
+  void addSourceLine(DIE *Die, DISubprogram SP);
+  void addSourceLine(DIE *Die, DIType Ty);
+  void addSourceLine(DIE *Die, DINameSpace NS);
+
+  /// addAddress - Add an address attribute to a die based on the location
+  /// provided.
+  void addAddress(DIE *Die, unsigned Attribute,
+                  const MachineLocation &Location);
+
+  /// addRegisterAddress - Add register location entry in variable DIE.
+  bool addRegisterAddress(DIE *Die, const MachineOperand &MO);
+
+  /// addConstantValue - Add constant value entry in variable DIE.
+  bool addConstantValue(DIE *Die, const MachineOperand &MO);
+  bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned);
+
+  /// addConstantFPValue - Add constant value entry in variable DIE.
+  bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
+
+  /// addComplexAddress - Start with the address based on the location provided,
+  /// and generate the DWARF information necessary to find the actual variable
+  /// (navigating the extra location information encoded in the type) based on
+  /// the starting location.  Add the DWARF information to the die.
+  ///
+  void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+                         const MachineLocation &Location);
+
+  // FIXME: Should be reformulated in terms of addComplexAddress.
+  /// addBlockByrefAddress - Start with the address based on the location
+  /// provided, and generate the DWARF information necessary to find the
+  /// actual Block variable (navigating the Block struct) based on the
+  /// starting location.  Add the DWARF information to the die.  Obsolete,
+  /// please use addComplexAddress instead.
+  ///
+  void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
+                            const MachineLocation &Location);
+
+  /// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based
+  /// on provided frame index.
+  void addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI);
+
+  /// addToContextOwner - Add Die into the list of its context owner's children.
+  void addToContextOwner(DIE *Die, DIDescriptor Context);
+
+  /// addType - Add a new type attribute to the specified entity.
+  void addType(DIE *Entity, DIType Ty);
+
+ 
+  /// getOrCreateNameSpace - Create a DIE for DINameSpace.
+  DIE *getOrCreateNameSpace(DINameSpace NS);
+
+  /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+  /// given DIType.
+  DIE *getOrCreateTypeDIE(DIType Ty);
+
+  /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE 
+  /// for the given DITemplateTypeParameter.
+  DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
+
+  /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE 
+  /// for the given DITemplateValueParameter.
+  DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
+
+  void addPubTypes(DISubprogram SP);
+
+  /// constructTypeDIE - Construct basic type die from DIBasicType.
+  void constructTypeDIE(DIE &Buffer,
+                        DIBasicType BTy);
+
+  /// constructTypeDIE - Construct derived type die from DIDerivedType.
+  void constructTypeDIE(DIE &Buffer,
+                        DIDerivedType DTy);
+
+  /// constructTypeDIE - Construct type DIE from DICompositeType.
+  void constructTypeDIE(DIE &Buffer,
+                        DICompositeType CTy);
+
+  /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
+  void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
+
+  /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
+  void constructArrayTypeDIE(DIE &Buffer, 
+                             DICompositeType *CTy);
+
+  /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
+  DIE *constructEnumTypeDIE(DIEnumerator ETy);
+
+  /// createMemberDIE - Create new member DIE.
+  DIE *createMemberDIE(DIDerivedType DT);
+
+  /// createSubprogramDIE - Create new DIE using SP.
+  DIE *createSubprogramDIE(DISubprogram SP);
+
+  /// getOrCreateDbgScope - Create DbgScope for the scope.
+  DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt);
+
+  DbgScope *getOrCreateAbstractScope(const MDNode *N);
+
+  /// findAbstractVariable - Find abstract variable associated with Var.
+  DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
+
+  /// updateSubprogramScopeDIE - Find DIE for the given subprogram and 
+  /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
+  /// If there are global variables in this scope then create and insert
+  /// DIEs for these variables.
+  DIE *updateSubprogramScopeDIE(const MDNode *SPNode);
+
+  /// constructLexicalScope - Construct new DW_TAG_lexical_block 
+  /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
+  DIE *constructLexicalScopeDIE(DbgScope *Scope);
+
+  /// constructInlinedScopeDIE - This scope represents inlined body of
+  /// a function. Construct DIE to represent this concrete inlined copy
+  /// of the function.
+  DIE *constructInlinedScopeDIE(DbgScope *Scope);
+
+  /// constructVariableDIE - Construct a DIE for the given DbgVariable.
+  DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S);
+
+  /// constructScopeDIE - Construct a DIE for this scope.
+  DIE *constructScopeDIE(DbgScope *Scope);
+
+  /// EmitSectionLabels - Emit initial Dwarf sections with a label at
+  /// the start of each one.
+  void EmitSectionLabels();
+
+  /// emitDIE - Recusively Emits a debug information entry.
+  ///
+  void emitDIE(DIE *Die);
+
+  /// computeSizeAndOffset - Compute the size and offset of a DIE.
+  ///
+  unsigned computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last);
+
+  /// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
+  ///
+  void computeSizeAndOffsets();
+
+  /// EmitDebugInfo - Emit the debug info section.
+  ///
+  void emitDebugInfo();
+
+  /// emitAbbreviations - Emit the abbreviation section.
+  ///
+  void emitAbbreviations() const;
+
+  /// emitEndOfLineMatrix - Emit the last address of the section and the end of
+  /// the line matrix.
+  ///
+  void emitEndOfLineMatrix(unsigned SectionEnd);
+
+  /// emitCommonDebugFrame - Emit common frame info into a debug frame section.
+  ///
+  void emitCommonDebugFrame();
+
+  /// emitFunctionDebugFrame - Emit per function frame info into a debug frame
+  /// section.
+  void emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo);
+
+  /// emitDebugPubNames - Emit visible names into a debug pubnames section.
+  ///
+  void emitDebugPubNames();
+
+  /// emitDebugPubTypes - Emit visible types into a debug pubtypes section.
+  ///
+  void emitDebugPubTypes();
+
+  /// emitDebugStr - Emit visible names into a debug str section.
+  ///
+  void emitDebugStr();
+
+  /// emitDebugLoc - Emit visible names into a debug loc section.
+  ///
+  void emitDebugLoc();
+
+  /// EmitDebugARanges - Emit visible names into a debug aranges section.
+  ///
+  void EmitDebugARanges();
+
+  /// emitDebugRanges - Emit visible names into a debug ranges section.
+  ///
+  void emitDebugRanges();
+
+  /// emitDebugMacInfo - Emit visible names into a debug macinfo section.
+  ///
+  void emitDebugMacInfo();
+
+  /// emitDebugInlineInfo - Emit inline info using following format.
+  /// Section Header:
+  /// 1. length of section
+  /// 2. Dwarf version number
+  /// 3. address size.
+  ///
+  /// Entries (one "entry" for each function that was inlined):
+  ///
+  /// 1. offset into __debug_str section for MIPS linkage name, if exists; 
+  ///   otherwise offset into __debug_str for regular function name.
+  /// 2. offset into __debug_str section for regular function name.
+  /// 3. an unsigned LEB128 number indicating the number of distinct inlining 
+  /// instances for the function.
+  /// 
+  /// The rest of the entry consists of a {die_offset, low_pc}  pair for each 
+  /// inlined instance; the die_offset points to the inlined_subroutine die in
+  /// the __debug_info section, and the low_pc is the starting address  for the
+  ///  inlining instance.
+  void emitDebugInlineInfo();
+
+  /// GetOrCreateSourceID - Look up the source id with the given directory and
+  /// source file names. If none currently exists, create a new id and insert it
+  /// in the SourceIds map.
+  unsigned GetOrCreateSourceID(StringRef FullName);
+
+  /// constructCompileUnit - Create new CompileUnit for the given 
+  /// metadata node with tag DW_TAG_compile_unit.
+  void constructCompileUnit(const MDNode *N);
+
+  /// getCompielUnit - Get CompileUnit DIE.
+  CompileUnit *getCompileUnit(const MDNode *N) const;
+
+  /// constructGlobalVariableDIE - Construct global variable DIE.
+  void constructGlobalVariableDIE(const MDNode *N);
+
+  /// construct SubprogramDIE - Construct subprogram DIE.
+  void constructSubprogramDIE(const MDNode *N);
+
+  /// recordSourceLine - Register a source line with debug info. Returns the
+  /// unique label that was emitted and which provides correspondence to
+  /// the source line list.
+  MCSymbol *recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope);
+  
+  /// recordVariableFrameIndex - Record a variable's index.
+  void recordVariableFrameIndex(const DbgVariable *V, int Index);
+
+  /// findVariableFrameIndex - Return true if frame index for the variable
+  /// is found. Update FI to hold value of the index.
+  bool findVariableFrameIndex(const DbgVariable *V, int *FI);
+
+  /// findDbgScope - Find DbgScope for the debug loc attached with an 
+  /// instruction.
+  DbgScope *findDbgScope(const MachineInstr *MI);
+
+  /// identifyScopeMarkers() - Indentify instructions that are marking
+  /// beginning of or end of a scope.
+  void identifyScopeMarkers();
+
+  /// extractScopeInformation - Scan machine instructions in this function
+  /// and collect DbgScopes. Return true, if atleast one scope was found.
+  bool extractScopeInformation();
+  
+  /// addCurrentFnArgument - If Var is an current function argument that add
+  /// it in CurrentFnArguments list.
+  bool addCurrentFnArgument(const MachineFunction *MF,
+                            DbgVariable *Var, DbgScope *Scope);
+
+  /// collectVariableInfo - Populate DbgScope entries with variables' info.
+  void collectVariableInfo(const MachineFunction *,
+                           SmallPtrSet<const MDNode *, 16> &ProcessedVars);
+  
+  /// collectVariableInfoFromMMITable - Collect variable information from
+  /// side table maintained by MMI.
+  void collectVariableInfoFromMMITable(const MachineFunction * MF,
+                                       SmallPtrSet<const MDNode *, 16> &P);
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  DwarfDebug(AsmPrinter *A, Module *M);
+  ~DwarfDebug();
+
+  /// beginModule - Emit all Dwarf sections that should come prior to the
+  /// content.
+  void beginModule(Module *M);
+
+  /// endModule - Emit all Dwarf sections that should come after the content.
+  ///
+  void endModule();
+
+  /// beginFunction - Gather pre-function debug information.  Assumes being
+  /// emitted immediately after the function entry point.
+  void beginFunction(const MachineFunction *MF);
+
+  /// endFunction - Gather and emit post-function debug information.
+  ///
+  void endFunction(const MachineFunction *MF);
+
+  /// getLabelBeforeInsn - Return Label preceding the instruction.
+  const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+
+  /// getLabelAfterInsn - Return Label immediately following the instruction.
+  const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+
+  /// beginInstruction - Process beginning of an instruction.
+  void beginInstruction(const MachineInstr *MI);
+
+  /// endInstruction - Prcess end of an instruction.
+  void endInstruction(const MachineInstr *MI);
+};
+} // End of namespace llvm
+
+#endif
diff --git a/final/lib/CodeGen/AsmPrinter/DwarfException.cpp b/final/lib/CodeGen/AsmPrinter/DwarfException.cpp
new file mode 100644
index 00000000000..967a2783da1
--- /dev/null
+++ b/final/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -0,0 +1,676 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfException::DwarfException(AsmPrinter *A)
+  : Asm(A), MMI(Asm->MMI) {}
+
+DwarfException::~DwarfException() {}
+
+/// SharedTypeIds - How many leading type ids two landing pads have in common.
+unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
+                                       const LandingPadInfo *R) {
+  const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+  unsigned LSize = LIds.size(), RSize = RIds.size();
+  unsigned MinSize = LSize < RSize ? LSize : RSize;
+  unsigned Count = 0;
+
+  for (; Count != MinSize; ++Count)
+    if (LIds[Count] != RIds[Count])
+      return Count;
+
+  return Count;
+}
+
+/// PadLT - Order landing pads lexicographically by type id.
+bool DwarfException::PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
+  const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+  unsigned LSize = LIds.size(), RSize = RIds.size();
+  unsigned MinSize = LSize < RSize ? LSize : RSize;
+
+  for (unsigned i = 0; i != MinSize; ++i)
+    if (LIds[i] != RIds[i])
+      return LIds[i] < RIds[i];
+
+  return LSize < RSize;
+}
+
+/// ComputeActionsTable - Compute the actions table and gather the first action
+/// index for each landing pad site.
+unsigned DwarfException::
+ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
+                    SmallVectorImpl<ActionEntry> &Actions,
+                    SmallVectorImpl<unsigned> &FirstActions) {
+
+  // The action table follows the call-site table in the LSDA. The individual
+  // records are of two types:
+  //
+  //   * Catch clause
+  //   * Exception specification
+  //
+  // The two record kinds have the same format, with only small differences.
+  // They are distinguished by the "switch value" field: Catch clauses
+  // (TypeInfos) have strictly positive switch values, and exception
+  // specifications (FilterIds) have strictly negative switch values. Value 0
+  // indicates a catch-all clause.
+  //
+  // Negative type IDs index into FilterIds. Positive type IDs index into
+  // TypeInfos.  The value written for a positive type ID is just the type ID
+  // itself.  For a negative type ID, however, the value written is the
+  // (negative) byte offset of the corresponding FilterIds entry.  The byte
+  // offset is usually equal to the type ID (because the FilterIds entries are
+  // written using a variable width encoding, which outputs one byte per entry
+  // as long as the value written is not too large) but can differ.  This kind
+  // of complication does not occur for positive type IDs because type infos are
+  // output using a fixed width encoding.  FilterOffsets[i] holds the byte
+  // offset corresponding to FilterIds[i].
+
+  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+  SmallVector<int, 16> FilterOffsets;
+  FilterOffsets.reserve(FilterIds.size());
+  int Offset = -1;
+
+  for (std::vector<unsigned>::const_iterator
+         I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) {
+    FilterOffsets.push_back(Offset);
+    Offset -= MCAsmInfo::getULEB128Size(*I);
+  }
+
+  FirstActions.reserve(LandingPads.size());
+
+  int FirstAction = 0;
+  unsigned SizeActions = 0;
+  const LandingPadInfo *PrevLPI = 0;
+
+  for (SmallVectorImpl<const LandingPadInfo *>::const_iterator
+         I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) {
+    const LandingPadInfo *LPI = *I;
+    const std::vector<int> &TypeIds = LPI->TypeIds;
+    unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0;
+    unsigned SizeSiteActions = 0;
+
+    if (NumShared < TypeIds.size()) {
+      unsigned SizeAction = 0;
+      unsigned PrevAction = (unsigned)-1;
+
+      if (NumShared) {
+        unsigned SizePrevIds = PrevLPI->TypeIds.size();
+        assert(Actions.size());
+        PrevAction = Actions.size() - 1;
+        SizeAction =
+          MCAsmInfo::getSLEB128Size(Actions[PrevAction].NextAction) +
+          MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+
+        for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+          assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!");
+          SizeAction -=
+            MCAsmInfo::getSLEB128Size(Actions[PrevAction].ValueForTypeID);
+          SizeAction += -Actions[PrevAction].NextAction;
+          PrevAction = Actions[PrevAction].Previous;
+        }
+      }
+
+      // Compute the actions.
+      for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) {
+        int TypeID = TypeIds[J];
+        assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+        int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+        unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
+
+        int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+        SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
+        SizeSiteActions += SizeAction;
+
+        ActionEntry Action = { ValueForTypeID, NextAction, PrevAction };
+        Actions.push_back(Action);
+        PrevAction = Actions.size() - 1;
+      }
+
+      // Record the first action of the landing pad site.
+      FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+    } // else identical - re-use previous FirstAction
+
+    // Information used when created the call-site table. The action record
+    // field of the call site record is the offset of the first associated
+    // action record, relative to the start of the actions table. This value is
+    // biased by 1 (1 indicating the start of the actions table), and 0
+    // indicates that there are no actions.
+    FirstActions.push_back(FirstAction);
+
+    // Compute this sites contribution to size.
+    SizeActions += SizeSiteActions;
+
+    PrevLPI = LPI;
+  }
+
+  return SizeActions;
+}
+
+/// CallToNoUnwindFunction - Return `true' if this is a call to a function
+/// marked `nounwind'. Return `false' otherwise.
+bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
+  assert(MI->getDesc().isCall() && "This should be a call instruction!");
+
+  bool MarkedNoUnwind = false;
+  bool SawFunc = false;
+
+  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+    const MachineOperand &MO = MI->getOperand(I);
+
+    if (!MO.isGlobal()) continue;
+
+    const Function *F = dyn_cast<Function>(MO.getGlobal());
+    if (F == 0) continue;
+
+    if (SawFunc) {
+      // Be conservative. If we have more than one function operand for this
+      // call, then we can't make the assumption that it's the callee and
+      // not a parameter to the call.
+      //
+      // FIXME: Determine if there's a way to say that `F' is the callee or
+      // parameter.
+      MarkedNoUnwind = false;
+      break;
+    }
+
+    MarkedNoUnwind = F->doesNotThrow();
+    SawFunc = true;
+  }
+
+  return MarkedNoUnwind;
+}
+
+/// ComputeCallSiteTable - Compute the call-site table.  The entry for an invoke
+/// has a try-range containing the call, a non-zero landing pad, and an
+/// appropriate action.  The entry for an ordinary call has a try-range
+/// containing the call and zero for the landing pad and the action.  Calls
+/// marked 'nounwind' have no entry and must not be contained in the try-range
+/// of any entry - they form gaps in the table.  Entries must be ordered by
+/// try-range address.
+void DwarfException::
+ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+                     const RangeMapType &PadMap,
+                     const SmallVectorImpl<const LandingPadInfo *> &LandingPads,
+                     const SmallVectorImpl<unsigned> &FirstActions) {
+  // The end label of the previous invoke or nounwind try-range.
+  MCSymbol *LastLabel = 0;
+
+  // Whether there is a potentially throwing instruction (currently this means
+  // an ordinary call) between the end of the previous try-range and now.
+  bool SawPotentiallyThrowing = false;
+
+  // Whether the last CallSite entry was for an invoke.
+  bool PreviousIsInvoke = false;
+
+  // Visit all instructions in order of address.
+  for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
+       I != E; ++I) {
+    for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+         MI != E; ++MI) {
+      if (!MI->isLabel()) {
+        if (MI->getDesc().isCall())
+          SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
+        continue;
+      }
+
+      // End of the previous try-range?
+      MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
+      if (BeginLabel == LastLabel)
+        SawPotentiallyThrowing = false;
+
+      // Beginning of a new try-range?
+      RangeMapType::const_iterator L = PadMap.find(BeginLabel);
+      if (L == PadMap.end())
+        // Nope, it was just some random label.
+        continue;
+
+      const PadRange &P = L->second;
+      const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+      assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+             "Inconsistent landing pad map!");
+
+      // For Dwarf exception handling (SjLj handling doesn't use this). If some
+      // instruction between the previous try-range and this one may throw,
+      // create a call-site entry with no landing pad for the region between the
+      // try-ranges.
+      if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
+        CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 };
+        CallSites.push_back(Site);
+        PreviousIsInvoke = false;
+      }
+
+      LastLabel = LandingPad->EndLabels[P.RangeIndex];
+      assert(BeginLabel && LastLabel && "Invalid landing pad!");
+
+      if (!LandingPad->LandingPadLabel) {
+        // Create a gap.
+        PreviousIsInvoke = false;
+      } else {
+        // This try-range is for an invoke.
+        CallSiteEntry Site = {
+          BeginLabel,
+          LastLabel,
+          LandingPad->LandingPadLabel,
+          FirstActions[P.PadIndex]
+        };
+
+        // Try to merge with the previous call-site. SJLJ doesn't do this
+        if (PreviousIsInvoke && Asm->MAI->isExceptionHandlingDwarf()) {
+          CallSiteEntry &Prev = CallSites.back();
+          if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+            // Extend the range of the previous entry.
+            Prev.EndLabel = Site.EndLabel;
+            continue;
+          }
+        }
+
+        // Otherwise, create a new call-site.
+        if (Asm->MAI->isExceptionHandlingDwarf())
+          CallSites.push_back(Site);
+        else {
+          // SjLj EH must maintain the call sites in the order assigned
+          // to them by the SjLjPrepare pass.
+          unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel);
+          if (CallSites.size() < SiteNo)
+            CallSites.resize(SiteNo);
+          CallSites[SiteNo - 1] = Site;
+        }
+        PreviousIsInvoke = true;
+      }
+    }
+  }
+
+  // If some instruction between the previous try-range and the end of the
+  // function may throw, create a call-site entry with no landing pad for the
+  // region following the try-range.
+  if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
+    CallSiteEntry Site = { LastLabel, 0, 0, 0 };
+    CallSites.push_back(Site);
+  }
+}
+
+/// EmitExceptionTable - Emit landing pads and actions.
+///
+/// The general organization of the table is complex, but the basic concepts are
+/// easy.  First there is a header which describes the location and organization
+/// of the three components that follow.
+///
+///  1. The landing pad site information describes the range of code covered by
+///     the try.  In our case it's an accumulation of the ranges covered by the
+///     invokes in the try.  There is also a reference to the landing pad that
+///     handles the exception once processed.  Finally an index into the actions
+///     table.
+///  2. The action table, in our case, is composed of pairs of type IDs and next
+///     action offset.  Starting with the action index from the landing pad
+///     site, each type ID is checked for a match to the current exception.  If
+///     it matches then the exception and type id are passed on to the landing
+///     pad.  Otherwise the next action is looked up.  This chain is terminated
+///     with a next action of zero.  If no type id is found then the frame is
+///     unwound and handling continues.
+///  3. Type ID table contains references to all the C++ typeinfo for all
+///     catches in the function.  This tables is reverse indexed base 1.
+void DwarfException::EmitExceptionTable() {
+  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+
+  // Sort the landing pads in order of their type ids.  This is used to fold
+  // duplicate actions.
+  SmallVector<const LandingPadInfo *, 64> LandingPads;
+  LandingPads.reserve(PadInfos.size());
+
+  for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+    LandingPads.push_back(&PadInfos[i]);
+
+  std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+  // Compute the actions table and gather the first action index for each
+  // landing pad site.
+  SmallVector<ActionEntry, 32> Actions;
+  SmallVector<unsigned, 64> FirstActions;
+  unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions);
+
+  // Invokes and nounwind calls have entries in PadMap (due to being bracketed
+  // by try-range labels when lowered).  Ordinary calls do not, so appropriate
+  // try-ranges for them need be deduced when using DWARF exception handling.
+  RangeMapType PadMap;
+  for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+    const LandingPadInfo *LandingPad = LandingPads[i];
+    for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+      MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
+      assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+      PadRange P = { i, j };
+      PadMap[BeginLabel] = P;
+    }
+  }
+
+  // Compute the call-site table.
+  SmallVector<CallSiteEntry, 64> CallSites;
+  ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions);
+
+  // Final tallies.
+
+  // Call sites.
+  bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+  bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
+
+  unsigned CallSiteTableLength;
+  if (IsSJLJ)
+    CallSiteTableLength = 0;
+  else {
+    unsigned SiteStartSize  = 4; // dwarf::DW_EH_PE_udata4
+    unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4
+    unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4
+    CallSiteTableLength =
+      CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize);
+  }
+
+  for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
+    CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+    if (IsSJLJ)
+      CallSiteTableLength += MCAsmInfo::getULEB128Size(i);
+  }
+
+  // Type infos.
+  const MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection();
+  unsigned TTypeEncoding;
+  unsigned TypeFormatSize;
+
+  if (!HaveTTData) {
+    // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say
+    // that we're omitting that bit.
+    TTypeEncoding = dwarf::DW_EH_PE_omit;
+    // dwarf::DW_EH_PE_absptr
+    TypeFormatSize = Asm->getTargetData().getPointerSize();
+  } else {
+    // Okay, we have actual filters or typeinfos to emit.  As such, we need to
+    // pick a type encoding for them.  We're about to emit a list of pointers to
+    // typeinfo objects at the end of the LSDA.  However, unless we're in static
+    // mode, this reference will require a relocation by the dynamic linker.
+    //
+    // Because of this, we have a couple of options:
+    //
+    //   1) If we are in -static mode, we can always use an absolute reference
+    //      from the LSDA, because the static linker will resolve it.
+    //
+    //   2) Otherwise, if the LSDA section is writable, we can output the direct
+    //      reference to the typeinfo and allow the dynamic linker to relocate
+    //      it.  Since it is in a writable section, the dynamic linker won't
+    //      have a problem.
+    //
+    //   3) Finally, if we're in PIC mode and the LDSA section isn't writable,
+    //      we need to use some form of indirection.  For example, on Darwin,
+    //      we can output a statically-relocatable reference to a dyld stub. The
+    //      offset to the stub is constant, but the contents are in a section
+    //      that is updated by the dynamic linker.  This is easy enough, but we
+    //      need to tell the personality function of the unwinder to indirect
+    //      through the dyld stub.
+    //
+    // FIXME: When (3) is actually implemented, we'll have to emit the stubs
+    // somewhere.  This predicate should be moved to a shared location that is
+    // in target-independent code.
+    //
+    TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding();
+    TypeFormatSize = Asm->GetSizeOfEncodedValue(TTypeEncoding);
+  }
+
+  // Begin the exception table.
+  // Sometimes we want not to emit the data into separate section (e.g. ARM
+  // EHABI). In this case LSDASection will be NULL.
+  if (LSDASection)
+    Asm->OutStreamer.SwitchSection(LSDASection);
+  Asm->EmitAlignment(2);
+
+  // Emit the LSDA.
+  MCSymbol *GCCETSym =
+    Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+
+                                      Twine(Asm->getFunctionNumber()));
+  Asm->OutStreamer.EmitLabel(GCCETSym);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("exception",
+                                                Asm->getFunctionNumber()));
+
+  if (IsSJLJ)
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("_LSDA_",
+                                                  Asm->getFunctionNumber()));
+
+  // Emit the LSDA header.
+  Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+  Asm->EmitEncodingByte(TTypeEncoding, "@TType");
+
+  // The type infos need to be aligned. GCC does this by inserting padding just
+  // before the type infos. However, this changes the size of the exception
+  // table, so you need to take this into account when you output the exception
+  // table size. However, the size is output using a variable length encoding.
+  // So by increasing the size by inserting padding, you may increase the number
+  // of bytes used for writing the size. If it increases, say by one byte, then
+  // you now need to output one less byte of padding to get the type infos
+  // aligned. However this decreases the size of the exception table. This
+  // changes the value you have to output for the exception table size. Due to
+  // the variable length encoding, the number of bytes used for writing the
+  // length may decrease. If so, you then have to increase the amount of
+  // padding. And so on. If you look carefully at the GCC code you will see that
+  // it indeed does this in a loop, going on and on until the values stabilize.
+  // We chose another solution: don't output padding inside the table like GCC
+  // does, instead output it before the table.
+  unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
+  unsigned CallSiteTableLengthSize =
+    MCAsmInfo::getULEB128Size(CallSiteTableLength);
+  unsigned TTypeBaseOffset =
+    sizeof(int8_t) +                            // Call site format
+    CallSiteTableLengthSize +                   // Call site table length size
+    CallSiteTableLength +                       // Call site table length
+    SizeActions +                               // Actions size
+    SizeTypes;
+  unsigned TTypeBaseOffsetSize = MCAsmInfo::getULEB128Size(TTypeBaseOffset);
+  unsigned TotalSize =
+    sizeof(int8_t) +                            // LPStart format
+    sizeof(int8_t) +                            // TType format
+    (HaveTTData ? TTypeBaseOffsetSize : 0) +    // TType base offset size
+    TTypeBaseOffset;                            // TType base offset
+  unsigned SizeAlign = (4 - TotalSize) & 3;
+
+  if (HaveTTData) {
+    // Account for any extra padding that will be added to the call site table
+    // length.
+    Asm->EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign);
+    SizeAlign = 0;
+  }
+
+  // SjLj Exception handling
+  if (IsSJLJ) {
+    Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+
+    // Add extra padding if it wasn't added to the TType base offset.
+    Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+
+    // Emit the landing pad site information.
+    unsigned idx = 0;
+    for (SmallVectorImpl<CallSiteEntry>::const_iterator
+         I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
+      const CallSiteEntry &S = *I;
+
+      // Offset of the landing pad, counted in 16-byte bundles relative to the
+      // @LPStart address.
+      Asm->EmitULEB128(idx, "Landing pad");
+
+      // Offset of the first associated action record, relative to the start of
+      // the action table. This value is biased by 1 (1 indicates the start of
+      // the action table), and 0 indicates that there are no actions.
+      Asm->EmitULEB128(S.Action, "Action");
+    }
+  } else {
+    // DWARF Exception handling
+    assert(Asm->MAI->isExceptionHandlingDwarf());
+
+    // The call-site table is a list of all call sites that may throw an
+    // exception (including C++ 'throw' statements) in the procedure
+    // fragment. It immediately follows the LSDA header. Each entry indicates,
+    // for a given call, the first corresponding action record and corresponding
+    // landing pad.
+    //
+    // The table begins with the number of bytes, stored as an LEB128
+    // compressed, unsigned integer. The records immediately follow the record
+    // count. They are sorted in increasing call-site address. Each record
+    // indicates:
+    //
+    //   * The position of the call-site.
+    //   * The position of the landing pad.
+    //   * The first action record for that call site.
+    //
+    // A missing entry in the call-site table indicates that a call is not
+    // supposed to throw.
+
+    // Emit the landing pad call site table.
+    Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
+
+    // Add extra padding if it wasn't added to the TType base offset.
+    Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
+
+    for (SmallVectorImpl<CallSiteEntry>::const_iterator
+         I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
+      const CallSiteEntry &S = *I;
+
+      MCSymbol *EHFuncBeginSym =
+        Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
+
+      MCSymbol *BeginLabel = S.BeginLabel;
+      if (BeginLabel == 0)
+        BeginLabel = EHFuncBeginSym;
+      MCSymbol *EndLabel = S.EndLabel;
+      if (EndLabel == 0)
+        EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
+
+      // Offset of the call site relative to the previous call site, counted in
+      // number of 16-byte bundles. The first call site is counted relative to
+      // the start of the procedure fragment.
+      Asm->OutStreamer.AddComment("Region start");
+      Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
+
+      Asm->OutStreamer.AddComment("Region length");
+      Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
+
+
+      // Offset of the landing pad, counted in 16-byte bundles relative to the
+      // @LPStart address.
+      Asm->OutStreamer.AddComment("Landing pad");
+      if (!S.PadLabel)
+        Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+      else
+        Asm->EmitLabelDifference(S.PadLabel, EHFuncBeginSym, 4);
+
+      // Offset of the first associated action record, relative to the start of
+      // the action table. This value is biased by 1 (1 indicates the start of
+      // the action table), and 0 indicates that there are no actions.
+      Asm->EmitULEB128(S.Action, "Action");
+    }
+  }
+
+  // Emit the Action Table.
+  if (Actions.size() != 0) {
+    Asm->OutStreamer.AddComment("-- Action Record Table --");
+    Asm->OutStreamer.AddBlankLine();
+  }
+
+  for (SmallVectorImpl<ActionEntry>::const_iterator
+         I = Actions.begin(), E = Actions.end(); I != E; ++I) {
+    const ActionEntry &Action = *I;
+    Asm->OutStreamer.AddComment("Action Record");
+    Asm->OutStreamer.AddBlankLine();
+
+    // Type Filter
+    //
+    //   Used by the runtime to match the type of the thrown exception to the
+    //   type of the catch clauses or the types in the exception specification.
+    Asm->EmitSLEB128(Action.ValueForTypeID, "  TypeInfo index");
+
+    // Action Record
+    //
+    //   Self-relative signed displacement in bytes of the next action record,
+    //   or 0 if there is no next action record.
+    Asm->EmitSLEB128(Action.NextAction, "  Next action");
+  }
+
+  // Emit the Catch TypeInfos.
+  if (!TypeInfos.empty()) {
+    Asm->OutStreamer.AddComment("-- Catch TypeInfos --");
+    Asm->OutStreamer.AddBlankLine();
+  }
+  for (std::vector<const GlobalVariable *>::const_reverse_iterator
+         I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
+    const GlobalVariable *GV = *I;
+
+    Asm->OutStreamer.AddComment("TypeInfo");
+    if (GV)
+      Asm->EmitReference(GV, TTypeEncoding);
+    else
+      Asm->OutStreamer.EmitIntValue(0,Asm->GetSizeOfEncodedValue(TTypeEncoding),
+                                    0);
+  }
+
+  // Emit the Exception Specifications.
+  if (!FilterIds.empty()) {
+    Asm->OutStreamer.AddComment("-- Filter IDs --");
+    Asm->OutStreamer.AddBlankLine();
+  }
+  for (std::vector<unsigned>::const_iterator
+         I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
+    unsigned TypeID = *I;
+    Asm->EmitULEB128(TypeID, TypeID != 0 ? "Exception specification" : 0);
+  }
+
+  Asm->EmitAlignment(2);
+}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfException::EndModule() {
+  assert(0 && "Should be implemented");
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfException::BeginFunction(const MachineFunction *MF) {
+  assert(0 && "Should be implemented");
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfException::EndFunction() {
+  assert(0 && "Should be implemented");
+}
diff --git a/final/lib/CodeGen/AsmPrinter/DwarfException.h b/final/lib/CodeGen/AsmPrinter/DwarfException.h
new file mode 100644
index 00000000000..06b1de62fbd
--- /dev/null
+++ b/final/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -0,0 +1,274 @@
+//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing dwarf exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+#define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H
+
+#include "llvm/ADT/DenseMap.h"
+#include <vector>
+
+namespace llvm {
+
+template <typename T> class SmallVectorImpl;
+struct LandingPadInfo;
+class MachineModuleInfo;
+class MachineMove;
+class MachineInstr;
+class MachineFunction;
+class MCAsmInfo;
+class MCExpr;
+class MCSymbol;
+class Function;
+class AsmPrinter;
+
+//===----------------------------------------------------------------------===//
+/// DwarfException - Emits Dwarf exception handling directives.
+///
+class DwarfException {
+protected:
+  /// Asm - Target of Dwarf emission.
+  AsmPrinter *Asm;
+
+  /// MMI - Collected machine module information.
+  MachineModuleInfo *MMI;
+
+  /// EmitExceptionTable - Emit landing pads and actions.
+  ///
+  /// The general organization of the table is complex, but the basic concepts
+  /// are easy.  First there is a header which describes the location and
+  /// organization of the three components that follow.
+  ///  1. The landing pad site information describes the range of code covered
+  ///     by the try.  In our case it's an accumulation of the ranges covered
+  ///     by the invokes in the try.  There is also a reference to the landing
+  ///     pad that handles the exception once processed.  Finally an index into
+  ///     the actions table.
+  ///  2. The action table, in our case, is composed of pairs of type ids
+  ///     and next action offset.  Starting with the action index from the
+  ///     landing pad site, each type Id is checked for a match to the current
+  ///     exception.  If it matches then the exception and type id are passed
+  ///     on to the landing pad.  Otherwise the next action is looked up.  This
+  ///     chain is terminated with a next action of zero.  If no type id is
+  ///     found the frame is unwound and handling continues.
+  ///  3. Type id table contains references to all the C++ typeinfo for all
+  ///     catches in the function.  This tables is reversed indexed base 1.
+
+  /// SharedTypeIds - How many leading type ids two landing pads have in common.
+  static unsigned SharedTypeIds(const LandingPadInfo *L,
+                                const LandingPadInfo *R);
+
+  /// PadLT - Order landing pads lexicographically by type id.
+  static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R);
+
+  /// PadRange - Structure holding a try-range and the associated landing pad.
+  struct PadRange {
+    // The index of the landing pad.
+    unsigned PadIndex;
+    // The index of the begin and end labels in the landing pad's label lists.
+    unsigned RangeIndex;
+  };
+
+  typedef DenseMap<MCSymbol *, PadRange> RangeMapType;
+
+  /// ActionEntry - Structure describing an entry in the actions table.
+  struct ActionEntry {
+    int ValueForTypeID; // The value to write - may not be equal to the type id.
+    int NextAction;
+    unsigned Previous;
+  };
+
+  /// CallSiteEntry - Structure describing an entry in the call-site table.
+  struct CallSiteEntry {
+    // The 'try-range' is BeginLabel .. EndLabel.
+    MCSymbol *BeginLabel; // zero indicates the start of the function.
+    MCSymbol *EndLabel;   // zero indicates the end of the function.
+
+    // The landing pad starts at PadLabel.
+    MCSymbol *PadLabel;   // zero indicates that there is no landing pad.
+    unsigned Action;
+  };
+
+  /// ComputeActionsTable - Compute the actions table and gather the first
+  /// action index for each landing pad site.
+  unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs,
+                               SmallVectorImpl<ActionEntry> &Actions,
+                               SmallVectorImpl<unsigned> &FirstActions);
+
+  /// CallToNoUnwindFunction - Return `true' if this is a call to a function
+  /// marked `nounwind'. Return `false' otherwise.
+  bool CallToNoUnwindFunction(const MachineInstr *MI);
+
+  /// ComputeCallSiteTable - Compute the call-site table.  The entry for an
+  /// invoke has a try-range containing the call, a non-zero landing pad and an
+  /// appropriate action.  The entry for an ordinary call has a try-range
+  /// containing the call and zero for the landing pad and the action.  Calls
+  /// marked 'nounwind' have no entry and must not be contained in the try-range
+  /// of any entry - they form gaps in the table.  Entries must be ordered by
+  /// try-range address.
+  void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
+                            const RangeMapType &PadMap,
+                            const SmallVectorImpl<const LandingPadInfo *> &LPs,
+                            const SmallVectorImpl<unsigned> &FirstActions);
+  void EmitExceptionTable();
+
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  DwarfException(AsmPrinter *A);
+  virtual ~DwarfException();
+
+  /// EndModule - Emit all exception information that should come after the
+  /// content.
+  virtual void EndModule();
+
+  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// emitted immediately after the function entry point.
+  virtual void BeginFunction(const MachineFunction *MF);
+
+  /// EndFunction - Gather and emit post-function exception information.
+  virtual void EndFunction();
+};
+
+class DwarfCFIException : public DwarfException {
+  /// shouldEmitTable - Per-function flag to indicate if EH tables should
+  /// be emitted.
+  bool shouldEmitTable;
+
+  /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+  /// should be emitted.
+  bool shouldEmitMoves;
+
+  /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+  /// should be emitted.
+  bool shouldEmitTableModule;
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  DwarfCFIException(AsmPrinter *A);
+  virtual ~DwarfCFIException();
+
+  /// EndModule - Emit all exception information that should come after the
+  /// content.
+  virtual void EndModule();
+
+  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// emitted immediately after the function entry point.
+  virtual void BeginFunction(const MachineFunction *MF);
+
+  /// EndFunction - Gather and emit post-function exception information.
+  virtual void EndFunction();
+};
+
+class DwarfTableException : public DwarfException {
+  /// shouldEmitTable - Per-function flag to indicate if EH tables should
+  /// be emitted.
+  bool shouldEmitTable;
+
+  /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+  /// should be emitted.
+  bool shouldEmitMoves;
+
+  /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+  /// should be emitted.
+  bool shouldEmitTableModule;
+
+  /// shouldEmitMovesModule - Per-module flag to indicate if frame moves
+  /// should be emitted.
+  bool shouldEmitMovesModule;
+
+  struct FunctionEHFrameInfo {
+    MCSymbol *FunctionEHSym;  // L_foo.eh
+    unsigned Number;
+    unsigned PersonalityIndex;
+    bool adjustsStack;
+    bool hasLandingPads;
+    std::vector<MachineMove> Moves;
+    const Function *function;
+
+    FunctionEHFrameInfo(MCSymbol *EHSym, unsigned Num, unsigned P,
+                        bool hC, bool hL,
+                        const std::vector<MachineMove> &M,
+                        const Function *f):
+      FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P),
+      adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { }
+  };
+
+  std::vector<FunctionEHFrameInfo> EHFrames;
+
+  /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index
+  /// uses an LSDA. If so, then we need to encode that information in the CIE's
+  /// augmentation.
+  DenseMap<unsigned, bool> UsesLSDA;
+
+  /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
+  /// that is shared among many Frame Description Entries.  There is at least
+  /// one CIE in every non-empty .debug_frame section.
+  void EmitCIE(const Function *Personality, unsigned Index);
+
+  /// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
+  void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo);
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  DwarfTableException(AsmPrinter *A);
+  virtual ~DwarfTableException();
+
+  /// EndModule - Emit all exception information that should come after the
+  /// content.
+  virtual void EndModule();
+
+  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// emitted immediately after the function entry point.
+  virtual void BeginFunction(const MachineFunction *MF);
+
+  /// EndFunction - Gather and emit post-function exception information.
+  virtual void EndFunction();
+};
+
+
+class ARMException : public DwarfException {
+  /// shouldEmitTable - Per-function flag to indicate if EH tables should
+  /// be emitted.
+  bool shouldEmitTable;
+
+  /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+  /// should be emitted.
+  bool shouldEmitMoves;
+
+  /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+  /// should be emitted.
+  bool shouldEmitTableModule;
+public:
+  //===--------------------------------------------------------------------===//
+  // Main entry points.
+  //
+  ARMException(AsmPrinter *A);
+  virtual ~ARMException();
+
+  /// EndModule - Emit all exception information that should come after the
+  /// content.
+  virtual void EndModule();
+
+  /// BeginFunction - Gather pre-function exception information.  Assumes being
+  /// emitted immediately after the function entry point.
+  virtual void BeginFunction(const MachineFunction *MF);
+
+  /// EndFunction - Gather and emit post-function exception information.
+  virtual void EndFunction();
+};
+
+} // End of namespace llvm
+
+#endif
diff --git a/final/lib/CodeGen/AsmPrinter/DwarfTableException.cpp b/final/lib/CodeGen/AsmPrinter/DwarfTableException.cpp
new file mode 100644
index 00000000000..751901183cd
--- /dev/null
+++ b/final/lib/CodeGen/AsmPrinter/DwarfTableException.cpp
@@ -0,0 +1,349 @@
+//===-- CodeGen/AsmPrinter/DwarfTableException.cpp - Dwarf Exception Impl --==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+// The implementation emits all the necessary tables "by hands".
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfTableException::DwarfTableException(AsmPrinter *A)
+  :  DwarfException(A),
+     shouldEmitTable(false), shouldEmitMoves(false),
+     shouldEmitTableModule(false), shouldEmitMovesModule(false) {}
+
+DwarfTableException::~DwarfTableException() {}
+
+/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
+/// is shared among many Frame Description Entries.  There is at least one CIE
+/// in every non-empty .debug_frame section.
+void DwarfTableException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
+  // Size and sign of stack growth.
+  int stackGrowth = Asm->getTargetData().getPointerSize();
+  if (Asm->TM.getFrameLowering()->getStackGrowthDirection() ==
+      TargetFrameLowering::StackGrowsDown)
+    stackGrowth *= -1;
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+  // Begin eh frame section.
+  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+
+  MCSymbol *EHFrameSym;
+  if (TLOF.isFunctionEHFrameSymbolPrivate())
+    EHFrameSym = Asm->GetTempSymbol("EH_frame", Index);
+  else
+    EHFrameSym = Asm->OutContext.GetOrCreateSymbol(Twine("EH_frame") +
+                                                   Twine(Index));
+  Asm->OutStreamer.EmitLabel(EHFrameSym);
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_eh_frame", Index));
+
+  // Define base labels.
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common", Index));
+
+  // Define the eh frame length.
+  Asm->OutStreamer.AddComment("Length of Common Information Entry");
+  Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_frame_common_end", Index),
+                           Asm->GetTempSymbol("eh_frame_common_begin", Index),
+                           4);
+
+  // EH frame header.
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_begin",Index));
+  Asm->OutStreamer.AddComment("CIE Identifier Tag");
+  Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+  Asm->OutStreamer.AddComment("DW_CIE_VERSION");
+  Asm->OutStreamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1/*size*/, 0/*addr*/);
+
+  // The personality presence indicates that language specific information will
+  // show up in the eh frame.  Find out how we are supposed to lower the
+  // personality function reference:
+
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  unsigned FDEEncoding = TLOF.getFDEEncoding();
+  unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+  char Augmentation[6] = { 0 };
+  unsigned AugmentationSize = 0;
+  char *APtr = Augmentation + 1;
+
+  if (PersonalityFn) {
+    // There is a personality function.
+    *APtr++ = 'P';
+    AugmentationSize += 1 + Asm->GetSizeOfEncodedValue(PerEncoding);
+  }
+
+  if (UsesLSDA[Index]) {
+    // An LSDA pointer is in the FDE augmentation.
+    *APtr++ = 'L';
+    ++AugmentationSize;
+  }
+
+  if (FDEEncoding != dwarf::DW_EH_PE_absptr) {
+    // A non-default pointer encoding for the FDE.
+    *APtr++ = 'R';
+    ++AugmentationSize;
+  }
+
+  if (APtr != Augmentation + 1)
+    Augmentation[0] = 'z';
+
+  Asm->OutStreamer.AddComment("CIE Augmentation");
+  Asm->OutStreamer.EmitBytes(StringRef(Augmentation, strlen(Augmentation)+1),0);
+
+  // Round out reader.
+  Asm->EmitULEB128(1, "CIE Code Alignment Factor");
+  Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
+  Asm->OutStreamer.AddComment("CIE Return Address Column");
+
+  const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+  const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+  Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
+
+  if (Augmentation[0]) {
+    Asm->EmitULEB128(AugmentationSize, "Augmentation Size");
+
+    // If there is a personality, we need to indicate the function's location.
+    if (PersonalityFn) {
+      Asm->EmitEncodingByte(PerEncoding, "Personality");
+      Asm->OutStreamer.AddComment("Personality");
+      Asm->EmitReference(PersonalityFn, PerEncoding);
+    }
+    if (UsesLSDA[Index])
+      Asm->EmitEncodingByte(LSDAEncoding, "LSDA");
+    if (FDEEncoding != dwarf::DW_EH_PE_absptr)
+      Asm->EmitEncodingByte(FDEEncoding, "FDE");
+  }
+
+  // Indicate locations of general callee saved registers in frame.
+  std::vector<MachineMove> Moves;
+  TFI->getInitialFrameState(Moves);
+  Asm->EmitFrameMoves(Moves, 0, true);
+
+  // On Darwin the linker honors the alignment of eh_frame, which means it must
+  // be 8-byte on 64-bit targets to match what gcc does.  Otherwise you get
+  // holes which confuse readers of eh_frame.
+  Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index));
+}
+
+/// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
+void DwarfTableException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
+  assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() &&
+         "Should not emit 'available externally' functions at all");
+
+  const Function *TheFunc = EHFrameInfo.function;
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+  unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+  unsigned FDEEncoding = TLOF.getFDEEncoding();
+
+  Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+
+  // Externally visible entry into the functions eh frame info. If the
+  // corresponding function is static, this should not be externally visible.
+  if (!TheFunc->hasLocalLinkage() && TLOF.isFunctionEHSymbolGlobal())
+    Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,MCSA_Global);
+
+  // If corresponding function is weak definition, this should be too.
+  if (TheFunc->isWeakForLinker() && Asm->MAI->getWeakDefDirective())
+    Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+                                         MCSA_WeakDefinition);
+
+  // If corresponding function is hidden, this should be too.
+  if (TheFunc->hasHiddenVisibility())
+    if (MCSymbolAttr HiddenAttr = Asm->MAI->getHiddenVisibilityAttr())
+      Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+                                           HiddenAttr);
+
+  // If there are no calls then you can't unwind.  This may mean we can omit the
+  // EH Frame, but some environments do not handle weak absolute symbols. If
+  // UnwindTablesMandatory is set we cannot do this optimization; the unwind
+  // info is to be available for non-EH uses.
+  if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory &&
+      (!TheFunc->isWeakForLinker() ||
+       !Asm->MAI->getWeakDefDirective() ||
+       TLOF.getSupportsWeakOmittedEHFrame())) {
+    Asm->OutStreamer.EmitAssignment(EHFrameInfo.FunctionEHSym,
+                                    MCConstantExpr::Create(0, Asm->OutContext));
+    // This name has no connection to the function, so it might get
+    // dead-stripped when the function is not, erroneously.  Prohibit
+    // dead-stripping unconditionally.
+    if (Asm->MAI->hasNoDeadStrip())
+      Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+                                           MCSA_NoDeadStrip);
+  } else {
+    Asm->OutStreamer.EmitLabel(EHFrameInfo.FunctionEHSym);
+
+    // EH frame header.
+    Asm->OutStreamer.AddComment("Length of Frame Information Entry");
+    Asm->EmitLabelDifference(
+                Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number),
+                Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), 4);
+
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_begin",
+                                                  EHFrameInfo.Number));
+
+    Asm->OutStreamer.AddComment("FDE CIE offset");
+    Asm->EmitLabelDifference(
+                       Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number),
+                       Asm->GetTempSymbol("eh_frame_common",
+                                          EHFrameInfo.PersonalityIndex), 4);
+
+    MCSymbol *EHFuncBeginSym =
+      Asm->GetTempSymbol("eh_func_begin", EHFrameInfo.Number);
+
+    Asm->OutStreamer.AddComment("FDE initial location");
+    Asm->EmitReference(EHFuncBeginSym, FDEEncoding);
+
+    Asm->OutStreamer.AddComment("FDE address range");
+    Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_func_end",
+                                                EHFrameInfo.Number),
+                             EHFuncBeginSym,
+                             Asm->GetSizeOfEncodedValue(FDEEncoding));
+
+    // If there is a personality and landing pads then point to the language
+    // specific data area in the exception table.
+    if (MMI->getPersonalities()[0] != NULL) {
+      unsigned Size = Asm->GetSizeOfEncodedValue(LSDAEncoding);
+
+      Asm->EmitULEB128(Size, "Augmentation size");
+      Asm->OutStreamer.AddComment("Language Specific Data Area");
+      if (EHFrameInfo.hasLandingPads)
+        Asm->EmitReference(Asm->GetTempSymbol("exception", EHFrameInfo.Number),
+                           LSDAEncoding);
+      else
+        Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/);
+
+    } else {
+      Asm->EmitULEB128(0, "Augmentation size");
+    }
+
+    // Indicate locations of function specific callee saved registers in frame.
+    Asm->EmitFrameMoves(EHFrameInfo.Moves, EHFuncBeginSym, true);
+
+    // On Darwin the linker honors the alignment of eh_frame, which means it
+    // must be 8-byte on 64-bit targets to match what gcc does.  Otherwise you
+    // get holes which confuse readers of eh_frame.
+    Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end",
+                                                  EHFrameInfo.Number));
+
+    // If the function is marked used, this table should be also.  We cannot
+    // make the mark unconditional in this case, since retaining the table also
+    // retains the function in this case, and there is code around that depends
+    // on unused functions (calling undefined externals) being dead-stripped to
+    // link correctly.  Yes, there really is.
+    if (MMI->isUsedFunction(EHFrameInfo.function))
+      if (Asm->MAI->hasNoDeadStrip())
+        Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+                                             MCSA_NoDeadStrip);
+  }
+  Asm->OutStreamer.AddBlankLine();
+}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfTableException::EndModule() {
+  if (!Asm->MAI->isExceptionHandlingDwarf())
+    return;
+
+  if (!shouldEmitMovesModule && !shouldEmitTableModule)
+    return;
+
+  const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+
+  for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
+    EmitCIE(Personalities[I], I);
+
+  for (std::vector<FunctionEHFrameInfo>::iterator
+         I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I)
+    EmitFDE(*I);
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfTableException::BeginFunction(const MachineFunction *MF) {
+  shouldEmitTable = shouldEmitMoves = false;
+
+  // If any landing pads survive, we need an EH table.
+  shouldEmitTable = !MMI->getLandingPads().empty();
+
+  // See if we need frame move info.
+  shouldEmitMoves =
+    !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
+
+  if (shouldEmitMoves || shouldEmitTable)
+    // Assumes in correct section after the entry point.
+    Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+                                                  Asm->getFunctionNumber()));
+
+  shouldEmitTableModule |= shouldEmitTable;
+  shouldEmitMovesModule |= shouldEmitMoves;
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfTableException::EndFunction() {
+  if (!shouldEmitMoves && !shouldEmitTable) return;
+
+  Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+                                                Asm->getFunctionNumber()));
+
+  // Record if this personality index uses a landing pad.
+  bool HasLandingPad = !MMI->getLandingPads().empty();
+  UsesLSDA[MMI->getPersonalityIndex()] |= HasLandingPad;
+
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads();
+
+  if (HasLandingPad)
+    EmitExceptionTable();
+
+  const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+  MCSymbol *FunctionEHSym =
+    Asm->GetSymbolWithGlobalValueBase(Asm->MF->getFunction(), ".eh",
+                                      TLOF.isFunctionEHFrameSymbolPrivate());
+
+  // Save EH frame information
+  EHFrames.
+    push_back(FunctionEHFrameInfo(FunctionEHSym,
+                                  Asm->getFunctionNumber(),
+                                  MMI->getPersonalityIndex(),
+                                  Asm->MF->getFrameInfo()->adjustsStack(),
+                                  !MMI->getLandingPads().empty(),
+                                  MMI->getFrameMoves(),
+                                  Asm->MF->getFunction()));
+}
diff --git a/final/lib/CodeGen/AsmPrinter/Makefile b/final/lib/CodeGen/AsmPrinter/Makefile
new file mode 100644
index 00000000000..60aa6cbcf6f
--- /dev/null
+++ b/final/lib/CodeGen/AsmPrinter/Makefile
@@ -0,0 +1,13 @@
+##===- lib/CodeGen/AsmPrinter/Makefile ---------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMAsmPrinter
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/final/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
new file mode 100644
index 00000000000..11538176775
--- /dev/null
+++ b/final/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -0,0 +1,166 @@
+//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements printing the assembly code for an Ocaml frametable.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+  class OcamlGCMetadataPrinter : public GCMetadataPrinter {
+  public:
+    void beginAssembly(AsmPrinter &AP);
+    void finishAssembly(AsmPrinter &AP);
+  };
+
+}
+
+static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter>
+Y("ocaml", "ocaml 3.10-compatible collector");
+
+void llvm::linkOcamlGCPrinter() { }
+
+static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
+  const std::string &MId = M.getModuleIdentifier();
+
+  std::string SymName;
+  SymName += "caml";
+  size_t Letter = SymName.size();
+  SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
+  SymName += "__";
+  SymName += Id;
+
+  // Capitalize the first letter of the module name.
+  SymName[Letter] = toupper(SymName[Letter]);
+
+  SmallString<128> TmpStr;
+  AP.Mang->getNameWithPrefix(TmpStr, SymName);
+
+  MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr);
+
+  AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+  AP.OutStreamer.EmitLabel(Sym);
+}
+
+void OcamlGCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+  EmitCamlGlobal(getModule(), AP, "code_begin");
+
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  EmitCamlGlobal(getModule(), AP, "data_begin");
+}
+
+/// emitAssembly - Print the frametable. The ocaml frametable format is thus:
+///
+///   extern "C" struct align(sizeof(intptr_t)) {
+///     uint16_t NumDescriptors;
+///     struct align(sizeof(intptr_t)) {
+///       void *ReturnAddress;
+///       uint16_t FrameSize;
+///       uint16_t NumLiveOffsets;
+///       uint16_t LiveOffsets[NumLiveOffsets];
+///     } Descriptors[NumDescriptors];
+///   } caml${module}__frametable;
+///
+/// Note that this precludes programs from stack frames larger than 64K
+/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if
+/// either condition is detected in a function which uses the GC.
+///
+void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
+  unsigned IntPtrSize = AP.TM.getTargetData()->getPointerSize();
+
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getTextSection());
+  EmitCamlGlobal(getModule(), AP, "code_end");
+
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  EmitCamlGlobal(getModule(), AP, "data_end");
+
+  // FIXME: Why does ocaml emit this??
+  AP.OutStreamer.EmitIntValue(0, IntPtrSize, 0);
+
+  AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
+  EmitCamlGlobal(getModule(), AP, "frametable");
+
+  int NumDescriptors = 0;
+  for (iterator I = begin(), IE = end(); I != IE; ++I) {
+    GCFunctionInfo &FI = **I;
+    for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+      NumDescriptors++;
+    }
+  }
+
+  if (NumDescriptors >= 1<<16) {
+    // Very rude!
+    report_fatal_error(" Too much descriptor for ocaml GC");
+  }
+  AP.EmitInt16(NumDescriptors);
+  AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+
+  for (iterator I = begin(), IE = end(); I != IE; ++I) {
+    GCFunctionInfo &FI = **I;
+
+    uint64_t FrameSize = FI.getFrameSize();
+    if (FrameSize >= 1<<16) {
+      // Very rude!
+      report_fatal_error("Function '" + FI.getFunction().getName() +
+                         "' is too large for the ocaml GC! "
+                         "Frame size " + Twine(FrameSize) + ">= 65536.\n"
+                         "(" + Twine(uintptr_t(&FI)) + ")");
+    }
+
+    AP.OutStreamer.AddComment("live roots for " +
+                              Twine(FI.getFunction().getName()));
+    AP.OutStreamer.AddBlankLine();
+
+    for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+      size_t LiveCount = FI.live_size(J);
+      if (LiveCount >= 1<<16) {
+        // Very rude!
+        report_fatal_error("Function '" + FI.getFunction().getName() +
+                           "' is too large for the ocaml GC! "
+                           "Live root count "+Twine(LiveCount)+" >= 65536.");
+      }
+
+      AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize, 0);
+      AP.EmitInt16(FrameSize);
+      AP.EmitInt16(LiveCount);
+
+      for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
+                                         KE = FI.live_end(J); K != KE; ++K) {
+        if (K->StackOffset >= 1<<16) {
+          // Very rude!
+          report_fatal_error(
+                 "GC root stack offset is outside of fixed stack frame and out "
+                 "of range for ocaml GC!");
+        }
+        AP.EmitInt16(K->StackOffset);
+      }
+
+      AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+    }
+  }
+}
diff --git a/final/lib/CodeGen/BranchFolding.cpp b/final/lib/CodeGen/BranchFolding.cpp
new file mode 100644
index 00000000000..78a87431fea
--- /dev/null
+++ b/final/lib/CodeGen/BranchFolding.cpp
@@ -0,0 +1,1341 @@
+//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass forwards branches to unconditional branches to make them branch
+// directly to the target block.  This pass often results in dead MBB's, which
+// it then removes.
+//
+// Note that this pass must be run after register allocation, it cannot handle
+// SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "branchfolding"
+#include "BranchFolding.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+STATISTIC(NumBranchOpts, "Number of branches optimized");
+STATISTIC(NumTailMerge , "Number of block tails merged");
+
+static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
+                              cl::init(cl::BOU_UNSET), cl::Hidden);
+
+// Throttle for huge numbers of predecessors (compile speed problems)
+static cl::opt<unsigned>
+TailMergeThreshold("tail-merge-threshold",
+          cl::desc("Max number of predecessors to consider tail merging"),
+          cl::init(150), cl::Hidden);
+
+// Heuristic for tail merging (and, inversely, tail duplication).
+// TODO: This should be replaced with a target query.
+static cl::opt<unsigned>
+TailMergeSize("tail-merge-size",
+          cl::desc("Min number of instructions to consider tail merging"),
+                              cl::init(3), cl::Hidden);
+
+namespace {
+  /// BranchFolderPass - Wrap branch folder in a machine function pass.
+  class BranchFolderPass : public MachineFunctionPass,
+                           public BranchFolder {
+  public:
+    static char ID;
+    explicit BranchFolderPass(bool defaultEnableTailMerge)
+      : MachineFunctionPass(ID), BranchFolder(defaultEnableTailMerge) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+  };
+}
+
+char BranchFolderPass::ID = 0;
+
+FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
+  return new BranchFolderPass(DefaultEnableTailMerge);
+}
+
+bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
+  return OptimizeFunction(MF,
+                          MF.getTarget().getInstrInfo(),
+                          MF.getTarget().getRegisterInfo(),
+                          getAnalysisIfAvailable<MachineModuleInfo>());
+}
+
+
+BranchFolder::BranchFolder(bool defaultEnableTailMerge) {
+  switch (FlagEnableTailMerge) {
+  case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
+  case cl::BOU_TRUE: EnableTailMerge = true; break;
+  case cl::BOU_FALSE: EnableTailMerge = false; break;
+  }
+}
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
+  assert(MBB->pred_empty() && "MBB must be dead!");
+  DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+  MachineFunction *MF = MBB->getParent();
+  // drop all successors.
+  while (!MBB->succ_empty())
+    MBB->removeSuccessor(MBB->succ_end()-1);
+
+  // Remove the block.
+  MF->erase(MBB);
+}
+
+/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
+/// followed by terminators, and if the implicitly defined registers are not
+/// used by the terminators, remove those implicit_def's. e.g.
+/// BB1:
+///   r0 = implicit_def
+///   r1 = implicit_def
+///   br
+/// This block can be optimized away later if the implicit instructions are
+/// removed.
+bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
+  SmallSet<unsigned, 4> ImpDefRegs;
+  MachineBasicBlock::iterator I = MBB->begin();
+  while (I != MBB->end()) {
+    if (!I->isImplicitDef())
+      break;
+    unsigned Reg = I->getOperand(0).getReg();
+    ImpDefRegs.insert(Reg);
+    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+         unsigned SubReg = *SubRegs; ++SubRegs)
+      ImpDefRegs.insert(SubReg);
+    ++I;
+  }
+  if (ImpDefRegs.empty())
+    return false;
+
+  MachineBasicBlock::iterator FirstTerm = I;
+  while (I != MBB->end()) {
+    if (!TII->isUnpredicatedTerminator(I))
+      return false;
+    // See if it uses any of the implicitly defined registers.
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = I->getOperand(i);
+      if (!MO.isReg() || !MO.isUse())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (ImpDefRegs.count(Reg))
+        return false;
+    }
+    ++I;
+  }
+
+  I = MBB->begin();
+  while (I != FirstTerm) {
+    MachineInstr *ImpDefMI = &*I;
+    ++I;
+    MBB->erase(ImpDefMI);
+  }
+
+  return true;
+}
+
+/// OptimizeFunction - Perhaps branch folding, tail merging and other
+/// CFG optimizations on the given function.
+bool BranchFolder::OptimizeFunction(MachineFunction &MF,
+                                    const TargetInstrInfo *tii,
+                                    const TargetRegisterInfo *tri,
+                                    MachineModuleInfo *mmi) {
+  if (!tii) return false;
+
+  TII = tii;
+  TRI = tri;
+  MMI = mmi;
+
+  RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : NULL;
+
+  // Fix CFG.  The later algorithms expect it to be right.
+  bool MadeChange = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; I++) {
+    MachineBasicBlock *MBB = I, *TBB = 0, *FBB = 0;
+    SmallVector<MachineOperand, 4> Cond;
+    if (!TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true))
+      MadeChange |= MBB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+    MadeChange |= OptimizeImpDefsBlock(MBB);
+  }
+
+  bool MadeChangeThisIteration = true;
+  while (MadeChangeThisIteration) {
+    MadeChangeThisIteration = false;
+    MadeChangeThisIteration |= TailMergeBlocks(MF);
+    MadeChangeThisIteration |= OptimizeBranches(MF);
+    MadeChange |= MadeChangeThisIteration;
+  }
+
+  // See if any jump tables have become dead as the code generator
+  // did its thing.
+  MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
+  if (JTI == 0) {
+    delete RS;
+    return MadeChange;
+  }
+  
+  // Walk the function to find jump tables that are live.
+  BitVector JTIsLive(JTI->getJumpTables().size());
+  for (MachineFunction::iterator BB = MF.begin(), E = MF.end();
+       BB != E; ++BB) {
+    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+         I != E; ++I)
+      for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+        MachineOperand &Op = I->getOperand(op);
+        if (!Op.isJTI()) continue;
+
+        // Remember that this JT is live.
+        JTIsLive.set(Op.getIndex());
+      }
+  }
+
+  // Finally, remove dead jump tables.  This happens when the
+  // indirect jump was unreachable (and thus deleted).
+  for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i)
+    if (!JTIsLive.test(i)) {
+      JTI->RemoveJumpTable(i);
+      MadeChange = true;
+    }
+
+  delete RS;
+  return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+//  Tail Merging of Blocks
+//===----------------------------------------------------------------------===//
+
+/// HashMachineInstr - Compute a hash value for MI and its operands.
+static unsigned HashMachineInstr(const MachineInstr *MI) {
+  unsigned Hash = MI->getOpcode();
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &Op = MI->getOperand(i);
+
+    // Merge in bits from the operand if easy.
+    unsigned OperandHash = 0;
+    switch (Op.getType()) {
+    case MachineOperand::MO_Register:          OperandHash = Op.getReg(); break;
+    case MachineOperand::MO_Immediate:         OperandHash = Op.getImm(); break;
+    case MachineOperand::MO_MachineBasicBlock:
+      OperandHash = Op.getMBB()->getNumber();
+      break;
+    case MachineOperand::MO_FrameIndex:
+    case MachineOperand::MO_ConstantPoolIndex:
+    case MachineOperand::MO_JumpTableIndex:
+      OperandHash = Op.getIndex();
+      break;
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_ExternalSymbol:
+      // Global address / external symbol are too hard, don't bother, but do
+      // pull in the offset.
+      OperandHash = Op.getOffset();
+      break;
+    default: break;
+    }
+
+    Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
+  }
+  return Hash;
+}
+
+/// HashEndOfMBB - Hash the last instruction in the MBB.
+static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) {
+  MachineBasicBlock::const_iterator I = MBB->end();
+  if (I == MBB->begin())
+    return 0;   // Empty MBB.
+
+  --I;
+  // Skip debug info so it will not affect codegen.
+  while (I->isDebugValue()) {
+    if (I==MBB->begin())
+      return 0;      // MBB empty except for debug info.
+    --I;
+  }
+
+  return HashMachineInstr(I);
+}
+
+/// ComputeCommonTailLength - Given two machine basic blocks, compute the number
+/// of instructions they actually have in common together at their end.  Return
+/// iterators for the first shared instruction in each block.
+static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
+                                        MachineBasicBlock *MBB2,
+                                        MachineBasicBlock::iterator &I1,
+                                        MachineBasicBlock::iterator &I2) {
+  I1 = MBB1->end();
+  I2 = MBB2->end();
+
+  unsigned TailLen = 0;
+  while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
+    --I1; --I2;
+    // Skip debugging pseudos; necessary to avoid changing the code.
+    while (I1->isDebugValue()) {
+      if (I1==MBB1->begin()) {
+        while (I2->isDebugValue()) {
+          if (I2==MBB2->begin())
+            // I1==DBG at begin; I2==DBG at begin
+            return TailLen;
+          --I2;
+        }
+        ++I2;
+        // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin
+        return TailLen;
+      }
+      --I1;
+    }
+    // I1==first (untested) non-DBG preceding known match
+    while (I2->isDebugValue()) {
+      if (I2==MBB2->begin()) {
+        ++I1;
+        // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin
+        return TailLen;
+      }
+      --I2;
+    }
+    // I1, I2==first (untested) non-DBGs preceding known match
+    if (!I1->isIdenticalTo(I2) ||
+        // FIXME: This check is dubious. It's used to get around a problem where
+        // people incorrectly expect inline asm directives to remain in the same
+        // relative order. This is untenable because normal compiler
+        // optimizations (like this one) may reorder and/or merge these
+        // directives.
+        I1->isInlineAsm()) {
+      ++I1; ++I2;
+      break;
+    }
+    ++TailLen;
+  }
+  // Back past possible debugging pseudos at beginning of block.  This matters
+  // when one block differs from the other only by whether debugging pseudos
+  // are present at the beginning.  (This way, the various checks later for
+  // I1==MBB1->begin() work as expected.)
+  if (I1 == MBB1->begin() && I2 != MBB2->begin()) {
+    --I2;
+    while (I2->isDebugValue()) {
+      if (I2 == MBB2->begin()) {
+        return TailLen;
+        }
+      --I2;
+    }
+    ++I2;
+  }
+  if (I2 == MBB2->begin() && I1 != MBB1->begin()) {
+    --I1;
+    while (I1->isDebugValue()) {
+      if (I1 == MBB1->begin())
+        return TailLen;
+      --I1;
+    }
+    ++I1;
+  }
+  return TailLen;
+}
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.
+void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+                                           MachineBasicBlock *NewDest) {
+  TII->ReplaceTailWithBranchTo(OldInst, NewDest);
+  ++NumTailMerge;
+}
+
+/// SplitMBBAt - Given a machine basic block and an iterator into it, split the
+/// MBB so that the part before the iterator falls into the part starting at the
+/// iterator.  This returns the new MBB.
+MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
+                                            MachineBasicBlock::iterator BBI1) {
+  if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1))
+    return 0;
+
+  MachineFunction &MF = *CurMBB.getParent();
+
+  // Create the fall-through block.
+  MachineFunction::iterator MBBI = &CurMBB;
+  MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock());
+  CurMBB.getParent()->insert(++MBBI, NewMBB);
+
+  // Move all the successors of this block to the specified block.
+  NewMBB->transferSuccessors(&CurMBB);
+
+  // Add an edge from CurMBB to NewMBB for the fall-through.
+  CurMBB.addSuccessor(NewMBB);
+
+  // Splice the code over.
+  NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
+
+  // For targets that use the register scavenger, we must maintain LiveIns.
+  if (RS) {
+    RS->enterBasicBlock(&CurMBB);
+    if (!CurMBB.empty())
+      RS->forward(prior(CurMBB.end()));
+    BitVector RegsLiveAtExit(TRI->getNumRegs());
+    RS->getRegsUsed(RegsLiveAtExit, false);
+    for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
+      if (RegsLiveAtExit[i])
+        NewMBB->addLiveIn(i);
+  }
+
+  return NewMBB;
+}
+
+/// EstimateRuntime - Make a rough estimate for how long it will take to run
+/// the specified code.
+static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
+                                MachineBasicBlock::iterator E) {
+  unsigned Time = 0;
+  for (; I != E; ++I) {
+    if (I->isDebugValue())
+      continue;
+    const TargetInstrDesc &TID = I->getDesc();
+    if (TID.isCall())
+      Time += 10;
+    else if (TID.mayLoad() || TID.mayStore())
+      Time += 2;
+    else
+      ++Time;
+  }
+  return Time;
+}
+
+// CurMBB needs to add an unconditional branch to SuccMBB (we removed these
+// branches temporarily for tail merging).  In the case where CurMBB ends
+// with a conditional branch to the next block, optimize by reversing the
+// test and conditionally branching to SuccMBB instead.
+static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
+                    const TargetInstrInfo *TII) {
+  MachineFunction *MF = CurMBB->getParent();
+  MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB));
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  DebugLoc dl;  // FIXME: this is nowhere
+  if (I != MF->end() &&
+      !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
+    MachineBasicBlock *NextBB = I;
+    if (TBB == NextBB && !Cond.empty() && !FBB) {
+      if (!TII->ReverseBranchCondition(Cond)) {
+        TII->RemoveBranch(*CurMBB);
+        TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl);
+        return;
+      }
+    }
+  }
+  TII->InsertBranch(*CurMBB, SuccBB, NULL,
+                    SmallVector<MachineOperand, 0>(), dl);
+}
+
+bool
+BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
+  if (getHash() < o.getHash())
+    return true;
+   else if (getHash() > o.getHash())
+    return false;
+  else if (getBlock()->getNumber() < o.getBlock()->getNumber())
+    return true;
+  else if (getBlock()->getNumber() > o.getBlock()->getNumber())
+    return false;
+  else {
+    // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+    // an object with itself.
+#ifndef _GLIBCXX_DEBUG
+    llvm_unreachable("Predecessor appears twice");
+#endif
+    return false;
+  }
+}
+
+/// CountTerminators - Count the number of terminators in the given
+/// block and set I to the position of the first non-terminator, if there
+/// is one, or MBB->end() otherwise.
+static unsigned CountTerminators(MachineBasicBlock *MBB,
+                                 MachineBasicBlock::iterator &I) {
+  I = MBB->end();
+  unsigned NumTerms = 0;
+  for (;;) {
+    if (I == MBB->begin()) {
+      I = MBB->end();
+      break;
+    }
+    --I;
+    if (!I->getDesc().isTerminator()) break;
+    ++NumTerms;
+  }
+  return NumTerms;
+}
+
+/// ProfitableToMerge - Check if two machine basic blocks have a common tail
+/// and decide if it would be profitable to merge those tails.  Return the
+/// length of the common tail and iterators to the first common instruction
+/// in each block.
+static bool ProfitableToMerge(MachineBasicBlock *MBB1,
+                              MachineBasicBlock *MBB2,
+                              unsigned minCommonTailLength,
+                              unsigned &CommonTailLen,
+                              MachineBasicBlock::iterator &I1,
+                              MachineBasicBlock::iterator &I2,
+                              MachineBasicBlock *SuccBB,
+                              MachineBasicBlock *PredBB) {
+  CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
+  if (CommonTailLen == 0)
+    return false;
+  DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber()
+               << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen
+               << '\n');
+
+  // It's almost always profitable to merge any number of non-terminator
+  // instructions with the block that falls through into the common successor.
+  if (MBB1 == PredBB || MBB2 == PredBB) {
+    MachineBasicBlock::iterator I;
+    unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
+    if (CommonTailLen > NumTerms)
+      return true;
+  }
+
+  // If one of the blocks can be completely merged and happens to be in
+  // a position where the other could fall through into it, merge any number
+  // of instructions, because it can be done without a branch.
+  // TODO: If the blocks are not adjacent, move one of them so that they are?
+  if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+    return true;
+  if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+    return true;
+
+  // If both blocks have an unconditional branch temporarily stripped out,
+  // count that as an additional common instruction for the following
+  // heuristics.
+  unsigned EffectiveTailLen = CommonTailLen;
+  if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
+      !MBB1->back().getDesc().isBarrier() &&
+      !MBB2->back().getDesc().isBarrier())
+    ++EffectiveTailLen;
+
+  // Check if the common tail is long enough to be worthwhile.
+  if (EffectiveTailLen >= minCommonTailLength)
+    return true;
+
+  // If we are optimizing for code size, 2 instructions in common is enough if
+  // we don't have to split a block.  At worst we will be introducing 1 new
+  // branch instruction, which is likely to be smaller than the 2
+  // instructions that would be deleted in the merge.
+  MachineFunction *MF = MBB1->getParent();
+  if (EffectiveTailLen >= 2 &&
+      MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+      (I1 == MBB1->begin() || I2 == MBB2->begin()))
+    return true;
+
+  return false;
+}
+
+/// ComputeSameTails - Look through all the blocks in MergePotentials that have
+/// hash CurHash (guaranteed to match the last element).  Build the vector
+/// SameTails of all those that have the (same) largest number of instructions
+/// in common of any pair of these blocks.  SameTails entries contain an
+/// iterator into MergePotentials (from which the MachineBasicBlock can be
+/// found) and a MachineBasicBlock::iterator into that MBB indicating the
+/// instruction where the matching code sequence begins.
+/// Order of elements in SameTails is the reverse of the order in which
+/// those blocks appear in MergePotentials (where they are not necessarily
+/// consecutive).
+unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
+                                        unsigned minCommonTailLength,
+                                        MachineBasicBlock *SuccBB,
+                                        MachineBasicBlock *PredBB) {
+  unsigned maxCommonTailLength = 0U;
+  SameTails.clear();
+  MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
+  MPIterator HighestMPIter = prior(MergePotentials.end());
+  for (MPIterator CurMPIter = prior(MergePotentials.end()),
+                  B = MergePotentials.begin();
+       CurMPIter != B && CurMPIter->getHash() == CurHash;
+       --CurMPIter) {
+    for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) {
+      unsigned CommonTailLen;
+      if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
+                            minCommonTailLength,
+                            CommonTailLen, TrialBBI1, TrialBBI2,
+                            SuccBB, PredBB)) {
+        if (CommonTailLen > maxCommonTailLength) {
+          SameTails.clear();
+          maxCommonTailLength = CommonTailLen;
+          HighestMPIter = CurMPIter;
+          SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1));
+        }
+        if (HighestMPIter == CurMPIter &&
+            CommonTailLen == maxCommonTailLength)
+          SameTails.push_back(SameTailElt(I, TrialBBI2));
+      }
+      if (I == B)
+        break;
+    }
+  }
+  return maxCommonTailLength;
+}
+
+/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
+/// MergePotentials, restoring branches at ends of blocks as appropriate.
+void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
+                                        MachineBasicBlock *SuccBB,
+                                        MachineBasicBlock *PredBB) {
+  MPIterator CurMPIter, B;
+  for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin();
+       CurMPIter->getHash() == CurHash;
+       --CurMPIter) {
+    // Put the unconditional branch back, if we need one.
+    MachineBasicBlock *CurMBB = CurMPIter->getBlock();
+    if (SuccBB && CurMBB != PredBB)
+      FixTail(CurMBB, SuccBB, TII);
+    if (CurMPIter == B)
+      break;
+  }
+  if (CurMPIter->getHash() != CurHash)
+    CurMPIter++;
+  MergePotentials.erase(CurMPIter, MergePotentials.end());
+}
+
+/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
+/// only of the common tail.  Create a block that does by splitting one.
+bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+                                             unsigned maxCommonTailLength,
+                                             unsigned &commonTailIndex) {
+  commonTailIndex = 0;
+  unsigned TimeEstimate = ~0U;
+  for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+    // Use PredBB if possible; that doesn't require a new branch.
+    if (SameTails[i].getBlock() == PredBB) {
+      commonTailIndex = i;
+      break;
+    }
+    // Otherwise, make a (fairly bogus) choice based on estimate of
+    // how long it will take the various blocks to execute.
+    unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(),
+                                 SameTails[i].getTailStartPos());
+    if (t <= TimeEstimate) {
+      TimeEstimate = t;
+      commonTailIndex = i;
+    }
+  }
+
+  MachineBasicBlock::iterator BBI =
+    SameTails[commonTailIndex].getTailStartPos();
+  MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+
+  // If the common tail includes any debug info we will take it pretty
+  // randomly from one of the inputs.  Might be better to remove it?
+  DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
+               << maxCommonTailLength);
+
+  MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
+  if (!newMBB) {
+    DEBUG(dbgs() << "... failed!");
+    return false;
+  }
+
+  SameTails[commonTailIndex].setBlock(newMBB);
+  SameTails[commonTailIndex].setTailStartPos(newMBB->begin());
+
+  // If we split PredBB, newMBB is the new predecessor.
+  if (PredBB == MBB)
+    PredBB = newMBB;
+
+  return true;
+}
+
+// See if any of the blocks in MergePotentials (which all have a common single
+// successor, or all have no successor) can be tail-merged.  If there is a
+// successor, any blocks in MergePotentials that are not tail-merged and
+// are not immediately before Succ must have an unconditional branch to
+// Succ added (but the predecessor/successor lists need no adjustment).
+// The lone predecessor of Succ that falls through into Succ,
+// if any, is given in PredBB.
+
+bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
+                                      MachineBasicBlock *PredBB) {
+  bool MadeChange = false;
+
+  // Except for the special cases below, tail-merge if there are at least
+  // this many instructions in common.
+  unsigned minCommonTailLength = TailMergeSize;
+
+  DEBUG(dbgs() << "\nTryTailMergeBlocks: ";
+        for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+          dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
+                 << (i == e-1 ? "" : ", ");
+        dbgs() << "\n";
+        if (SuccBB) {
+          dbgs() << "  with successor BB#" << SuccBB->getNumber() << '\n';
+          if (PredBB)
+            dbgs() << "  which has fall-through from BB#"
+                   << PredBB->getNumber() << "\n";
+        }
+        dbgs() << "Looking for common tails of at least "
+               << minCommonTailLength << " instruction"
+               << (minCommonTailLength == 1 ? "" : "s") << '\n';
+       );
+
+  // Sort by hash value so that blocks with identical end sequences sort
+  // together.
+  std::stable_sort(MergePotentials.begin(), MergePotentials.end());
+
+  // Walk through equivalence sets looking for actual exact matches.
+  while (MergePotentials.size() > 1) {
+    unsigned CurHash = MergePotentials.back().getHash();
+
+    // Build SameTails, identifying the set of blocks with this hash code
+    // and with the maximum number of instructions in common.
+    unsigned maxCommonTailLength = ComputeSameTails(CurHash,
+                                                    minCommonTailLength,
+                                                    SuccBB, PredBB);
+
+    // If we didn't find any pair that has at least minCommonTailLength
+    // instructions in common, remove all blocks with this hash code and retry.
+    if (SameTails.empty()) {
+      RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+      continue;
+    }
+
+    // If one of the blocks is the entire common tail (and not the entry
+    // block, which we can't jump to), we can treat all blocks with this same
+    // tail at once.  Use PredBB if that is one of the possibilities, as that
+    // will not introduce any extra branches.
+    MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()->
+                                 getParent()->begin();
+    unsigned commonTailIndex = SameTails.size();
+    // If there are two blocks, check to see if one can be made to fall through
+    // into the other.
+    if (SameTails.size() == 2 &&
+        SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) &&
+        SameTails[1].tailIsWholeBlock())
+      commonTailIndex = 1;
+    else if (SameTails.size() == 2 &&
+             SameTails[1].getBlock()->isLayoutSuccessor(
+                                                     SameTails[0].getBlock()) &&
+             SameTails[0].tailIsWholeBlock())
+      commonTailIndex = 0;
+    else {
+      // Otherwise just pick one, favoring the fall-through predecessor if
+      // there is one.
+      for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+        MachineBasicBlock *MBB = SameTails[i].getBlock();
+        if (MBB == EntryBB && SameTails[i].tailIsWholeBlock())
+          continue;
+        if (MBB == PredBB) {
+          commonTailIndex = i;
+          break;
+        }
+        if (SameTails[i].tailIsWholeBlock())
+          commonTailIndex = i;
+      }
+    }
+
+    if (commonTailIndex == SameTails.size() ||
+        (SameTails[commonTailIndex].getBlock() == PredBB &&
+         !SameTails[commonTailIndex].tailIsWholeBlock())) {
+      // None of the blocks consist entirely of the common tail.
+      // Split a block so that one does.
+      if (!CreateCommonTailOnlyBlock(PredBB,
+                                     maxCommonTailLength, commonTailIndex)) {
+        RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+        continue;
+      }
+    }
+
+    MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
+    // MBB is common tail.  Adjust all other BB's to jump to this one.
+    // Traversal must be forwards so erases work.
+    DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber()
+                 << " for ");
+    for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
+      if (commonTailIndex == i)
+        continue;
+      DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
+                   << (i == e-1 ? "" : ", "));
+      // Hack the end off BB i, making it jump to BB commonTailIndex instead.
+      ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
+      // BB i is no longer a predecessor of SuccBB; remove it from the worklist.
+      MergePotentials.erase(SameTails[i].getMPIter());
+    }
+    DEBUG(dbgs() << "\n");
+    // We leave commonTailIndex in the worklist in case there are other blocks
+    // that match it with a smaller number of instructions.
+    MadeChange = true;
+  }
+  return MadeChange;
+}
+
+bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
+
+  if (!EnableTailMerge) return false;
+
+  bool MadeChange = false;
+
+  // First find blocks with no successors.
+  MergePotentials.clear();
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    if (I->succ_empty())
+      MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I));
+  }
+
+  // See if we can do any tail merging on those.
+  if (MergePotentials.size() < TailMergeThreshold &&
+      MergePotentials.size() >= 2)
+    MadeChange |= TryTailMergeBlocks(NULL, NULL);
+
+  // Look at blocks (IBB) with multiple predecessors (PBB).
+  // We change each predecessor to a canonical form, by
+  // (1) temporarily removing any unconditional branch from the predecessor
+  // to IBB, and
+  // (2) alter conditional branches so they branch to the other block
+  // not IBB; this may require adding back an unconditional branch to IBB
+  // later, where there wasn't one coming in.  E.g.
+  //   Bcc IBB
+  //   fallthrough to QBB
+  // here becomes
+  //   Bncc QBB
+  // with a conceptual B to IBB after that, which never actually exists.
+  // With those changes, we see whether the predecessors' tails match,
+  // and merge them if so.  We change things out of canonical form and
+  // back to the way they were later in the process.  (OptimizeBranches
+  // would undo some of this, but we can't use it, because we'd get into
+  // a compile-time infinite loop repeatedly doing and undoing the same
+  // transformations.)
+
+  for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+       I != E; ++I) {
+    if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
+      SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
+      MachineBasicBlock *IBB = I;
+      MachineBasicBlock *PredBB = prior(I);
+      MergePotentials.clear();
+      for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
+                                            E2 = I->pred_end();
+           P != E2; ++P) {
+        MachineBasicBlock *PBB = *P;
+        // Skip blocks that loop to themselves, can't tail merge these.
+        if (PBB == IBB)
+          continue;
+        // Visit each predecessor only once.
+        if (!UniquePreds.insert(PBB))
+          continue;
+        MachineBasicBlock *TBB = 0, *FBB = 0;
+        SmallVector<MachineOperand, 4> Cond;
+        if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
+          // Failing case:  IBB is the target of a cbr, and
+          // we cannot reverse the branch.
+          SmallVector<MachineOperand, 4> NewCond(Cond);
+          if (!Cond.empty() && TBB == IBB) {
+            if (TII->ReverseBranchCondition(NewCond))
+              continue;
+            // This is the QBB case described above
+            if (!FBB)
+              FBB = llvm::next(MachineFunction::iterator(PBB));
+          }
+          // Failing case:  the only way IBB can be reached from PBB is via
+          // exception handling.  Happens for landing pads.  Would be nice
+          // to have a bit in the edge so we didn't have to do all this.
+          if (IBB->isLandingPad()) {
+            MachineFunction::iterator IP = PBB;  IP++;
+            MachineBasicBlock *PredNextBB = NULL;
+            if (IP != MF.end())
+              PredNextBB = IP;
+            if (TBB == NULL) {
+              if (IBB != PredNextBB)      // fallthrough
+                continue;
+            } else if (FBB) {
+              if (TBB != IBB && FBB != IBB)   // cbr then ubr
+                continue;
+            } else if (Cond.empty()) {
+              if (TBB != IBB)               // ubr
+                continue;
+            } else {
+              if (TBB != IBB && IBB != PredNextBB)  // cbr
+                continue;
+            }
+          }
+          // Remove the unconditional branch at the end, if any.
+          if (TBB && (Cond.empty() || FBB)) {
+            DebugLoc dl;  // FIXME: this is nowhere
+            TII->RemoveBranch(*PBB);
+            if (!Cond.empty())
+              // reinsert conditional branch only, for now
+              TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl);
+          }
+          MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
+        }
+      }
+      if (MergePotentials.size() >= 2)
+        MadeChange |= TryTailMergeBlocks(IBB, PredBB);
+      // Reinsert an unconditional branch if needed.
+      // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks.
+      PredBB = prior(I);      // this may have been changed in TryTailMergeBlocks
+      if (MergePotentials.size() == 1 &&
+          MergePotentials.begin()->getBlock() != PredBB)
+        FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
+    }
+  }
+  return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+//  Branch Optimization
+//===----------------------------------------------------------------------===//
+
+bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  // Make sure blocks are numbered in order
+  MF.RenumberBlocks();
+
+  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+    MachineBasicBlock *MBB = I++;
+    MadeChange |= OptimizeBlock(MBB);
+
+    // If it is dead, remove it.
+    if (MBB->pred_empty()) {
+      RemoveDeadBlock(MBB);
+      MadeChange = true;
+      ++NumDeadBlocks;
+    }
+  }
+  return MadeChange;
+}
+
+// Blocks should be considered empty if they contain only debug info;
+// else the debug info would affect codegen.
+static bool IsEmptyBlock(MachineBasicBlock *MBB) {
+  if (MBB->empty())
+    return true;
+  for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+       MBBI!=MBBE; ++MBBI) {
+    if (!MBBI->isDebugValue())
+      return false;
+  }
+  return true;
+}
+
+// Blocks with only debug info and branches should be considered the same
+// as blocks with only branches.
+static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
+  MachineBasicBlock::iterator MBBI, MBBE;
+  for (MBBI = MBB->begin(), MBBE = MBB->end(); MBBI!=MBBE; ++MBBI) {
+    if (!MBBI->isDebugValue())
+      break;
+  }
+  return (MBBI->getDesc().isBranch());
+}
+
+/// IsBetterFallthrough - Return true if it would be clearly better to
+/// fall-through to MBB1 than to fall through into MBB2.  This has to return
+/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
+/// result in infinite loops.
+static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
+                                MachineBasicBlock *MBB2) {
+  // Right now, we use a simple heuristic.  If MBB2 ends with a call, and
+  // MBB1 doesn't, we prefer to fall through into MBB1.  This allows us to
+  // optimize branches that branch to either a return block or an assert block
+  // into a fallthrough to the return.
+  if (IsEmptyBlock(MBB1) || IsEmptyBlock(MBB2)) return false;
+
+  // If there is a clear successor ordering we make sure that one block
+  // will fall through to the next
+  if (MBB1->isSuccessor(MBB2)) return true;
+  if (MBB2->isSuccessor(MBB1)) return false;
+
+  // Neither block consists entirely of debug info (per IsEmptyBlock check),
+  // so we needn't test for falling off the beginning here.
+  MachineBasicBlock::iterator MBB1I = --MBB1->end();
+  while (MBB1I->isDebugValue())
+    --MBB1I;
+  MachineBasicBlock::iterator MBB2I = --MBB2->end();
+  while (MBB2I->isDebugValue())
+    --MBB2I;
+  return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
+}
+
+/// OptimizeBlock - Analyze and optimize control flow related to the specified
+/// block.  This is never called on the entry block.
+bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
+  bool MadeChange = false;
+  MachineFunction &MF = *MBB->getParent();
+  DebugLoc dl;  // FIXME: this is nowhere
+ReoptimizeBlock:
+
+  MachineFunction::iterator FallThrough = MBB;
+  ++FallThrough;
+
+  // If this block is empty, make everyone use its fall-through, not the block
+  // explicitly.  Landing pads should not do this since the landing-pad table
+  // points to this block.  Blocks with their addresses taken shouldn't be
+  // optimized away.
+  if (IsEmptyBlock(MBB) && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
+    // Dead block?  Leave for cleanup later.
+    if (MBB->pred_empty()) return MadeChange;
+
+    if (FallThrough == MF.end()) {
+      // TODO: Simplify preds to not branch here if possible!
+    } else {
+      // Rewrite all predecessors of the old block to go to the fallthrough
+      // instead.
+      while (!MBB->pred_empty()) {
+        MachineBasicBlock *Pred = *(MBB->pred_end()-1);
+        Pred->ReplaceUsesOfBlockWith(MBB, FallThrough);
+      }
+      // If MBB was the target of a jump table, update jump tables to go to the
+      // fallthrough instead.
+      if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+        MJTI->ReplaceMBBInJumpTables(MBB, FallThrough);
+      MadeChange = true;
+    }
+    return MadeChange;
+  }
+
+  // Check to see if we can simplify the terminator of the block before this
+  // one.
+  MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(MBB));
+
+  MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+  SmallVector<MachineOperand, 4> PriorCond;
+  bool PriorUnAnalyzable =
+    TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+  if (!PriorUnAnalyzable) {
+    // If the CFG for the prior block has extra edges, remove them.
+    MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
+                                              !PriorCond.empty());
+
+    // If the previous branch is conditional and both conditions go to the same
+    // destination, remove the branch, replacing it with an unconditional one or
+    // a fall-through.
+    if (PriorTBB && PriorTBB == PriorFBB) {
+      TII->RemoveBranch(PrevBB);
+      PriorCond.clear();
+      if (PriorTBB != MBB)
+        TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
+      MadeChange = true;
+      ++NumBranchOpts;
+      goto ReoptimizeBlock;
+    }
+
+    // If the previous block unconditionally falls through to this block and
+    // this block has no other predecessors, move the contents of this block
+    // into the prior block. This doesn't usually happen when SimplifyCFG
+    // has been used, but it can happen if tail merging splits a fall-through
+    // predecessor of a block.
+    // This has to check PrevBB->succ_size() because EH edges are ignored by
+    // AnalyzeBranch.
+    if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
+        PrevBB.succ_size() == 1 &&
+        !MBB->hasAddressTaken()) {
+      DEBUG(dbgs() << "\nMerging into block: " << PrevBB
+                   << "From MBB: " << *MBB);
+      PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
+      PrevBB.removeSuccessor(PrevBB.succ_begin());;
+      assert(PrevBB.succ_empty());
+      PrevBB.transferSuccessors(MBB);
+      MadeChange = true;
+      return MadeChange;
+    }
+
+    // If the previous branch *only* branches to *this* block (conditional or
+    // not) remove the branch.
+    if (PriorTBB == MBB && PriorFBB == 0) {
+      TII->RemoveBranch(PrevBB);
+      MadeChange = true;
+      ++NumBranchOpts;
+      goto ReoptimizeBlock;
+    }
+
+    // If the prior block branches somewhere else on the condition and here if
+    // the condition is false, remove the uncond second branch.
+    if (PriorFBB == MBB) {
+      TII->RemoveBranch(PrevBB);
+      TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
+      MadeChange = true;
+      ++NumBranchOpts;
+      goto ReoptimizeBlock;
+    }
+
+    // If the prior block branches here on true and somewhere else on false, and
+    // if the branch condition is reversible, reverse the branch to create a
+    // fall-through.
+    if (PriorTBB == MBB) {
+      SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+      if (!TII->ReverseBranchCondition(NewPriorCond)) {
+        TII->RemoveBranch(PrevBB);
+        TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl);
+        MadeChange = true;
+        ++NumBranchOpts;
+        goto ReoptimizeBlock;
+      }
+    }
+
+    // If this block has no successors (e.g. it is a return block or ends with
+    // a call to a no-return function like abort or __cxa_throw) and if the pred
+    // falls through into this block, and if it would otherwise fall through
+    // into the block after this, move this block to the end of the function.
+    //
+    // We consider it more likely that execution will stay in the function (e.g.
+    // due to loops) than it is to exit it.  This asserts in loops etc, moving
+    // the assert condition out of the loop body.
+    if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 &&
+        MachineFunction::iterator(PriorTBB) == FallThrough &&
+        !MBB->canFallThrough()) {
+      bool DoTransform = true;
+
+      // We have to be careful that the succs of PredBB aren't both no-successor
+      // blocks.  If neither have successors and if PredBB is the second from
+      // last block in the function, we'd just keep swapping the two blocks for
+      // last.  Only do the swap if one is clearly better to fall through than
+      // the other.
+      if (FallThrough == --MF.end() &&
+          !IsBetterFallthrough(PriorTBB, MBB))
+        DoTransform = false;
+
+      if (DoTransform) {
+        // Reverse the branch so we will fall through on the previous true cond.
+        SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
+        if (!TII->ReverseBranchCondition(NewPriorCond)) {
+          DEBUG(dbgs() << "\nMoving MBB: " << *MBB
+                       << "To make fallthrough to: " << *PriorTBB << "\n");
+
+          TII->RemoveBranch(PrevBB);
+          TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl);
+
+          // Move this block to the end of the function.
+          MBB->moveAfter(--MF.end());
+          MadeChange = true;
+          ++NumBranchOpts;
+          return MadeChange;
+        }
+      }
+    }
+  }
+
+  // Analyze the branch in the current block.
+  MachineBasicBlock *CurTBB = 0, *CurFBB = 0;
+  SmallVector<MachineOperand, 4> CurCond;
+  bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true);
+  if (!CurUnAnalyzable) {
+    // If the CFG for the prior block has extra edges, remove them.
+    MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
+
+    // If this is a two-way branch, and the FBB branches to this block, reverse
+    // the condition so the single-basic-block loop is faster.  Instead of:
+    //    Loop: xxx; jcc Out; jmp Loop
+    // we want:
+    //    Loop: xxx; jncc Loop; jmp Out
+    if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
+      SmallVector<MachineOperand, 4> NewCond(CurCond);
+      if (!TII->ReverseBranchCondition(NewCond)) {
+        TII->RemoveBranch(*MBB);
+        TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
+        MadeChange = true;
+        ++NumBranchOpts;
+        goto ReoptimizeBlock;
+      }
+    }
+
+    // If this branch is the only thing in its block, see if we can forward
+    // other blocks across it.
+    if (CurTBB && CurCond.empty() && CurFBB == 0 &&
+        IsBranchOnlyBlock(MBB) && CurTBB != MBB &&
+        !MBB->hasAddressTaken()) {
+      // This block may contain just an unconditional branch.  Because there can
+      // be 'non-branch terminators' in the block, try removing the branch and
+      // then seeing if the block is empty.
+      TII->RemoveBranch(*MBB);
+      // If the only things remaining in the block are debug info, remove these
+      // as well, so this will behave the same as an empty block in non-debug
+      // mode.
+      if (!MBB->empty()) {
+        bool NonDebugInfoFound = false;
+        for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+             I != E; ++I) {
+          if (!I->isDebugValue()) {
+            NonDebugInfoFound = true;
+            break;
+          }
+        }
+        if (!NonDebugInfoFound)
+          // Make the block empty, losing the debug info (we could probably
+          // improve this in some cases.)
+          MBB->erase(MBB->begin(), MBB->end());
+      }
+      // If this block is just an unconditional branch to CurTBB, we can
+      // usually completely eliminate the block.  The only case we cannot
+      // completely eliminate the block is when the block before this one
+      // falls through into MBB and we can't understand the prior block's branch
+      // condition.
+      if (MBB->empty()) {
+        bool PredHasNoFallThrough = !PrevBB.canFallThrough();
+        if (PredHasNoFallThrough || !PriorUnAnalyzable ||
+            !PrevBB.isSuccessor(MBB)) {
+          // If the prior block falls through into us, turn it into an
+          // explicit branch to us to make updates simpler.
+          if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
+              PriorTBB != MBB && PriorFBB != MBB) {
+            if (PriorTBB == 0) {
+              assert(PriorCond.empty() && PriorFBB == 0 &&
+                     "Bad branch analysis");
+              PriorTBB = MBB;
+            } else {
+              assert(PriorFBB == 0 && "Machine CFG out of date!");
+              PriorFBB = MBB;
+            }
+            TII->RemoveBranch(PrevBB);
+            TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl);
+          }
+
+          // Iterate through all the predecessors, revectoring each in-turn.
+          size_t PI = 0;
+          bool DidChange = false;
+          bool HasBranchToSelf = false;
+          while(PI != MBB->pred_size()) {
+            MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI);
+            if (PMBB == MBB) {
+              // If this block has an uncond branch to itself, leave it.
+              ++PI;
+              HasBranchToSelf = true;
+            } else {
+              DidChange = true;
+              PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);
+              // If this change resulted in PMBB ending in a conditional
+              // branch where both conditions go to the same destination,
+              // change this to an unconditional branch (and fix the CFG).
+              MachineBasicBlock *NewCurTBB = 0, *NewCurFBB = 0;
+              SmallVector<MachineOperand, 4> NewCurCond;
+              bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB,
+                      NewCurFBB, NewCurCond, true);
+              if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
+                TII->RemoveBranch(*PMBB);
+                NewCurCond.clear();
+                TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl);
+                MadeChange = true;
+                ++NumBranchOpts;
+                PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
+              }
+            }
+          }
+
+          // Change any jumptables to go to the new MBB.
+          if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
+            MJTI->ReplaceMBBInJumpTables(MBB, CurTBB);
+          if (DidChange) {
+            ++NumBranchOpts;
+            MadeChange = true;
+            if (!HasBranchToSelf) return MadeChange;
+          }
+        }
+      }
+
+      // Add the branch back if the block is more than just an uncond branch.
+      TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl);
+    }
+  }
+
+  // If the prior block doesn't fall through into this block, and if this
+  // block doesn't fall through into some other block, see if we can find a
+  // place to move this block where a fall-through will happen.
+  if (!PrevBB.canFallThrough()) {
+
+    // Now we know that there was no fall-through into this block, check to
+    // see if it has a fall-through into its successor.
+    bool CurFallsThru = MBB->canFallThrough();
+
+    if (!MBB->isLandingPad()) {
+      // Check all the predecessors of this block.  If one of them has no fall
+      // throughs, move this block right after it.
+      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+           E = MBB->pred_end(); PI != E; ++PI) {
+        // Analyze the branch at the end of the pred.
+        MachineBasicBlock *PredBB = *PI;
+        MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
+        MachineBasicBlock *PredTBB = 0, *PredFBB = 0;
+        SmallVector<MachineOperand, 4> PredCond;
+        if (PredBB != MBB && !PredBB->canFallThrough() &&
+            !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)
+            && (!CurFallsThru || !CurTBB || !CurFBB)
+            && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
+          // If the current block doesn't fall through, just move it.
+          // If the current block can fall through and does not end with a
+          // conditional branch, we need to append an unconditional jump to
+          // the (current) next block.  To avoid a possible compile-time
+          // infinite loop, move blocks only backward in this case.
+          // Also, if there are already 2 branches here, we cannot add a third;
+          // this means we have the case
+          // Bcc next
+          // B elsewhere
+          // next:
+          if (CurFallsThru) {
+            MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+            CurCond.clear();
+            TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl);
+          }
+          MBB->moveAfter(PredBB);
+          MadeChange = true;
+          goto ReoptimizeBlock;
+        }
+      }
+    }
+
+    if (!CurFallsThru) {
+      // Check all successors to see if we can move this block before it.
+      for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+           E = MBB->succ_end(); SI != E; ++SI) {
+        // Analyze the branch at the end of the block before the succ.
+        MachineBasicBlock *SuccBB = *SI;
+        MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
+
+        // If this block doesn't already fall-through to that successor, and if
+        // the succ doesn't already have a block that can fall through into it,
+        // and if the successor isn't an EH destination, we can arrange for the
+        // fallthrough to happen.
+        if (SuccBB != MBB && &*SuccPrev != MBB &&
+            !SuccPrev->canFallThrough() && !CurUnAnalyzable &&
+            !SuccBB->isLandingPad()) {
+          MBB->moveBefore(SuccBB);
+          MadeChange = true;
+          goto ReoptimizeBlock;
+        }
+      }
+
+      // Okay, there is no really great place to put this block.  If, however,
+      // the block before this one would be a fall-through if this block were
+      // removed, move this block to the end of the function.
+      MachineBasicBlock *PrevTBB = 0, *PrevFBB = 0;
+      SmallVector<MachineOperand, 4> PrevCond;
+      if (FallThrough != MF.end() &&
+          !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
+          PrevBB.isSuccessor(FallThrough)) {
+        MBB->moveAfter(--MF.end());
+        MadeChange = true;
+        return MadeChange;
+      }
+    }
+  }
+
+  return MadeChange;
+}
diff --git a/final/lib/CodeGen/BranchFolding.h b/final/lib/CodeGen/BranchFolding.h
new file mode 100644
index 00000000000..15dfa7f6bee
--- /dev/null
+++ b/final/lib/CodeGen/BranchFolding.h
@@ -0,0 +1,116 @@
+//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP
+#define LLVM_CODEGEN_BRANCHFOLDING_HPP
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include <vector>
+
+namespace llvm {
+  class MachineFunction;
+  class MachineModuleInfo;
+  class RegScavenger;
+  class TargetInstrInfo;
+  class TargetRegisterInfo;
+  template<typename T> class SmallVectorImpl;
+
+  class BranchFolder {
+  public:
+    explicit BranchFolder(bool defaultEnableTailMerge);
+
+    bool OptimizeFunction(MachineFunction &MF,
+                          const TargetInstrInfo *tii,
+                          const TargetRegisterInfo *tri,
+                          MachineModuleInfo *mmi);
+  private:
+    class MergePotentialsElt {
+      unsigned Hash;
+      MachineBasicBlock *Block;
+    public:
+      MergePotentialsElt(unsigned h, MachineBasicBlock *b)
+        : Hash(h), Block(b) {}
+
+      unsigned getHash() const { return Hash; }
+      MachineBasicBlock *getBlock() const { return Block; }
+
+      void setBlock(MachineBasicBlock *MBB) {
+        Block = MBB;
+      }
+
+      bool operator<(const MergePotentialsElt &) const;
+    };
+    typedef std::vector<MergePotentialsElt>::iterator MPIterator;
+    std::vector<MergePotentialsElt> MergePotentials;
+
+    class SameTailElt {
+      MPIterator MPIter;
+      MachineBasicBlock::iterator TailStartPos;
+    public:
+      SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp)
+        : MPIter(mp), TailStartPos(tsp) {}
+
+      MPIterator getMPIter() const {
+        return MPIter;
+      }
+      MergePotentialsElt &getMergePotentialsElt() const {
+        return *getMPIter();
+      }
+      MachineBasicBlock::iterator getTailStartPos() const {
+        return TailStartPos;
+      }
+      unsigned getHash() const {
+        return getMergePotentialsElt().getHash();
+      }
+      MachineBasicBlock *getBlock() const {
+        return getMergePotentialsElt().getBlock();
+      }
+      bool tailIsWholeBlock() const {
+        return TailStartPos == getBlock()->begin();
+      }
+
+      void setBlock(MachineBasicBlock *MBB) {
+        getMergePotentialsElt().setBlock(MBB);
+      }
+      void setTailStartPos(MachineBasicBlock::iterator Pos) {
+        TailStartPos = Pos;
+      }
+    };
+    std::vector<SameTailElt> SameTails;
+
+    bool EnableTailMerge;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineModuleInfo *MMI;
+    RegScavenger *RS;
+
+    bool TailMergeBlocks(MachineFunction &MF);
+    bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
+                       MachineBasicBlock* PredBB);
+    void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
+                                 MachineBasicBlock *NewDest);
+    MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
+                                  MachineBasicBlock::iterator BBI1);
+    unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength,
+                              MachineBasicBlock *SuccBB,
+                              MachineBasicBlock *PredBB);
+    void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
+                                                MachineBasicBlock* PredBB);
+    bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+                                   unsigned maxCommonTailLength,
+                                   unsigned &commonTailIndex);
+
+    bool OptimizeBranches(MachineFunction &MF);
+    bool OptimizeBlock(MachineBasicBlock *MBB);
+    void RemoveDeadBlock(MachineBasicBlock *MBB);
+    bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
+  };
+}
+
+#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */
diff --git a/final/lib/CodeGen/CMakeLists.txt b/final/lib/CodeGen/CMakeLists.txt
new file mode 100644
index 00000000000..d7d0e1b3812
--- /dev/null
+++ b/final/lib/CodeGen/CMakeLists.txt
@@ -0,0 +1,99 @@
+add_llvm_library(LLVMCodeGen
+  AggressiveAntiDepBreaker.cpp
+  AllocationOrder.cpp
+  Analysis.cpp
+  BranchFolding.cpp
+  CalcSpillWeights.cpp
+  CallingConvLower.cpp
+  CodeGen.cpp
+  CodePlacementOpt.cpp
+  CriticalAntiDepBreaker.cpp
+  DeadMachineInstructionElim.cpp
+  DwarfEHPrepare.cpp
+  EdgeBundles.cpp
+  ELFCodeEmitter.cpp
+  ELFWriter.cpp
+  ExpandISelPseudos.cpp
+  GCMetadata.cpp
+  GCMetadataPrinter.cpp
+  GCStrategy.cpp
+  IfConversion.cpp
+  InlineSpiller.cpp
+  IntrinsicLowering.cpp
+  LLVMTargetMachine.cpp
+  LatencyPriorityQueue.cpp
+  LiveDebugVariables.cpp
+  LiveInterval.cpp
+  LiveIntervalAnalysis.cpp
+  LiveIntervalUnion.cpp
+  LiveStackAnalysis.cpp
+  LiveVariables.cpp
+  LiveRangeEdit.cpp
+  LocalStackSlotAllocation.cpp
+  LowerSubregs.cpp
+  MachineBasicBlock.cpp
+  MachineCSE.cpp
+  MachineDominators.cpp
+  MachineFunction.cpp
+  MachineFunctionAnalysis.cpp
+  MachineFunctionPass.cpp
+  MachineFunctionPrinterPass.cpp
+  MachineInstr.cpp
+  MachineLICM.cpp
+  MachineLoopInfo.cpp
+  MachineLoopRanges.cpp
+  MachineModuleInfo.cpp
+  MachineModuleInfoImpls.cpp
+  MachinePassRegistry.cpp
+  MachineRegisterInfo.cpp
+  MachineSSAUpdater.cpp
+  MachineSink.cpp
+  MachineVerifier.cpp
+  ObjectCodeEmitter.cpp
+  OcamlGC.cpp
+  OptimizePHIs.cpp
+  PHIElimination.cpp
+  PHIEliminationUtils.cpp
+  Passes.cpp
+  PeepholeOptimizer.cpp
+  PostRASchedulerList.cpp
+  PreAllocSplitting.cpp
+  ProcessImplicitDefs.cpp
+  PrologEpilogInserter.cpp
+  PseudoSourceValue.cpp
+  RegAllocBasic.cpp
+  RegAllocFast.cpp
+  RegAllocGreedy.cpp
+  RegAllocLinearScan.cpp
+  RegAllocPBQP.cpp
+  RegisterCoalescer.cpp
+  RegisterScavenging.cpp
+  RenderMachineFunction.cpp
+  ScheduleDAG.cpp
+  ScheduleDAGEmit.cpp
+  ScheduleDAGInstrs.cpp
+  ScheduleDAGPrinter.cpp
+  ScoreboardHazardRecognizer.cpp
+  ShadowStackGC.cpp
+  ShrinkWrapping.cpp
+  SimpleRegisterCoalescing.cpp
+  SjLjEHPrepare.cpp
+  SlotIndexes.cpp
+  Spiller.cpp
+  SpillPlacement.cpp
+  SplitKit.cpp
+  Splitter.cpp
+  StackProtector.cpp
+  StackSlotColoring.cpp
+  StrongPHIElimination.cpp
+  TailDuplication.cpp
+  TargetInstrInfoImpl.cpp
+  TargetLoweringObjectFileImpl.cpp
+  TwoAddressInstructionPass.cpp
+  UnreachableBlockElim.cpp
+  VirtRegMap.cpp
+  VirtRegRewriter.cpp
+  )
+
+add_subdirectory(SelectionDAG)
+add_subdirectory(AsmPrinter)
diff --git a/final/lib/CodeGen/CalcSpillWeights.cpp b/final/lib/CodeGen/CalcSpillWeights.cpp
new file mode 100644
index 00000000000..76bb3d148b0
--- /dev/null
+++ b/final/lib/CodeGen/CalcSpillWeights.cpp
@@ -0,0 +1,227 @@
+//===------------------------ CalcSpillWeights.cpp ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "calcspillweights"
+
+#include "llvm/Function.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+char CalculateSpillWeights::ID = 0;
+INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights",
+                "Calculate spill weights", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights",
+                "Calculate spill weights", false, false)
+
+void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
+  au.addRequired<LiveIntervals>();
+  au.addRequired<MachineLoopInfo>();
+  au.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(au);
+}
+
+bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {
+
+  DEBUG(dbgs() << "********** Compute Spill Weights **********\n"
+               << "********** Function: "
+               << fn.getFunction()->getName() << '\n');
+
+  LiveIntervals &lis = getAnalysis<LiveIntervals>();
+  VirtRegAuxInfo vrai(fn, lis, getAnalysis<MachineLoopInfo>());
+  for (LiveIntervals::iterator I = lis.begin(), E = lis.end(); I != E; ++I) {
+    LiveInterval &li = *I->second;
+    if (TargetRegisterInfo::isVirtualRegister(li.reg))
+      vrai.CalculateWeightAndHint(li);
+  }
+  return false;
+}
+
+// Return the preferred allocation register for reg, given a COPY instruction.
+static unsigned copyHint(const MachineInstr *mi, unsigned reg,
+                         const TargetRegisterInfo &tri,
+                         const MachineRegisterInfo &mri) {
+  unsigned sub, hreg, hsub;
+  if (mi->getOperand(0).getReg() == reg) {
+    sub = mi->getOperand(0).getSubReg();
+    hreg = mi->getOperand(1).getReg();
+    hsub = mi->getOperand(1).getSubReg();
+  } else {
+    sub = mi->getOperand(1).getSubReg();
+    hreg = mi->getOperand(0).getReg();
+    hsub = mi->getOperand(0).getSubReg();
+  }
+
+  if (!hreg)
+    return 0;
+
+  if (TargetRegisterInfo::isVirtualRegister(hreg))
+    return sub == hsub ? hreg : 0;
+
+  const TargetRegisterClass *rc = mri.getRegClass(reg);
+
+  // Only allow physreg hints in rc.
+  if (sub == 0)
+    return rc->contains(hreg) ? hreg : 0;
+
+  // reg:sub should match the physreg hreg.
+  return tri.getMatchingSuperReg(hreg, sub, rc);
+}
+
+void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
+  MachineRegisterInfo &mri = mf_.getRegInfo();
+  const TargetRegisterInfo &tri = *mf_.getTarget().getRegisterInfo();
+  MachineBasicBlock *mbb = 0;
+  MachineLoop *loop = 0;
+  unsigned loopDepth = 0;
+  bool isExiting = false;
+  float totalWeight = 0;
+  SmallPtrSet<MachineInstr*, 8> visited;
+
+  // Find the best physreg hist and the best virtreg hint.
+  float bestPhys = 0, bestVirt = 0;
+  unsigned hintPhys = 0, hintVirt = 0;
+
+  // Don't recompute a target specific hint.
+  bool noHint = mri.getRegAllocationHint(li.reg).first != 0;
+
+  for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg);
+       MachineInstr *mi = I.skipInstruction();) {
+    if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue())
+      continue;
+    if (!visited.insert(mi))
+      continue;
+
+    // Get loop info for mi.
+    if (mi->getParent() != mbb) {
+      mbb = mi->getParent();
+      loop = loops_.getLoopFor(mbb);
+      loopDepth = loop ? loop->getLoopDepth() : 0;
+      isExiting = loop ? loop->isLoopExiting(mbb) : false;
+    }
+
+    // Calculate instr weight.
+    bool reads, writes;
+    tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg);
+    float weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth);
+
+    // Give extra weight to what looks like a loop induction variable update.
+    if (writes && isExiting && lis_.isLiveOutOfMBB(li, mbb))
+      weight *= 3;
+
+    totalWeight += weight;
+
+    // Get allocation hints from copies.
+    if (noHint || !mi->isCopy())
+      continue;
+    unsigned hint = copyHint(mi, li.reg, tri, mri);
+    if (!hint)
+      continue;
+    float hweight = hint_[hint] += weight;
+    if (TargetRegisterInfo::isPhysicalRegister(hint)) {
+      if (hweight > bestPhys && lis_.isAllocatable(hint))
+        bestPhys = hweight, hintPhys = hint;
+    } else {
+      if (hweight > bestVirt)
+        bestVirt = hweight, hintVirt = hint;
+    }
+  }
+
+  hint_.clear();
+
+  // Always prefer the physreg hint.
+  if (unsigned hint = hintPhys ? hintPhys : hintVirt) {
+    mri.setRegAllocationHint(li.reg, 0, hint);
+    // Weakly boost the spill weifght of hinted registers.
+    totalWeight *= 1.01F;
+  }
+
+  // Mark li as unspillable if all live ranges are tiny.
+  if (li.isZeroLength()) {
+    li.markNotSpillable();
+    return;
+  }
+
+  // If all of the definitions of the interval are re-materializable,
+  // it is a preferred candidate for spilling. If none of the defs are
+  // loads, then it's potentially very cheap to re-materialize.
+  // FIXME: this gets much more complicated once we support non-trivial
+  // re-materialization.
+  bool isLoad = false;
+  SmallVector<LiveInterval*, 4> spillIs;
+  if (lis_.isReMaterializable(li, spillIs, isLoad)) {
+    if (isLoad)
+      totalWeight *= 0.9F;
+    else
+      totalWeight *= 0.5F;
+  }
+
+  li.weight = normalizeSpillWeight(totalWeight, li.getSize());
+}
+
+void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
+  MachineRegisterInfo &mri = mf_.getRegInfo();
+  const TargetRegisterInfo *tri = mf_.getTarget().getRegisterInfo();
+  const TargetRegisterClass *orc = mri.getRegClass(reg);
+  SmallPtrSet<const TargetRegisterClass*,8> rcs;
+
+  for (MachineRegisterInfo::reg_nodbg_iterator I = mri.reg_nodbg_begin(reg),
+       E = mri.reg_nodbg_end(); I != E; ++I) {
+    // The targets don't have accurate enough regclass descriptions that we can
+    // handle subregs. We need something similar to
+    // TRI::getMatchingSuperRegClass, but returning a super class instead of a
+    // sub class.
+    if (I.getOperand().getSubReg()) {
+      DEBUG(dbgs() << "Cannot handle subregs: " << I.getOperand() << '\n');
+      return;
+    }
+    if (const TargetRegisterClass *rc =
+                                I->getDesc().getRegClass(I.getOperandNo(), tri))
+      rcs.insert(rc);
+  }
+
+  // If we found no regclass constraints, just leave reg as is.
+  // In theory, we could inflate to the largest superclass of reg's existing
+  // class, but that might not be legal for the current cpu setting.
+  // This could happen if reg is only used by COPY instructions, so we may need
+  // to improve on this.
+  if (rcs.empty()) {
+    return;
+  }
+
+  // Compute the intersection of all classes in rcs.
+  // This ought to be independent of iteration order, but if the target register
+  // classes don't form a proper algebra, it is possible to get different
+  // results. The solution is to make sure the intersection of any two register
+  // classes is also a register class or the null set.
+  const TargetRegisterClass *rc = 0;
+  for (SmallPtrSet<const TargetRegisterClass*,8>::iterator I = rcs.begin(),
+         E = rcs.end(); I != E; ++I) {
+    rc = rc ? getCommonSubClass(rc, *I) : *I;
+    assert(rc && "Incompatible regclass constraints found");
+  }
+
+  if (rc == orc)
+    return;
+  DEBUG(dbgs() << "Inflating " << orc->getName() << ':' << PrintReg(reg)
+               << " to " << rc->getName() <<".\n");
+  mri.setRegClass(reg, rc);
+}
diff --git a/final/lib/CodeGen/CallingConvLower.cpp b/final/lib/CodeGen/CallingConvLower.cpp
new file mode 100644
index 00000000000..ecd69a08e86
--- /dev/null
+++ b/final/lib/CodeGen/CallingConvLower.cpp
@@ -0,0 +1,177 @@
+//===-- CallingConvLower.cpp - Calling Conventions ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CCState class, used for lowering and implementing
+// calling conventions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm,
+                 SmallVector<CCValAssign, 16> &locs, LLVMContext &C)
+  : CallingConv(CC), IsVarArg(isVarArg), TM(tm),
+    TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) {
+  // No stack is used.
+  StackOffset = 0;
+  
+  UsedRegs.resize((TRI.getNumRegs()+31)/32);
+}
+
+// HandleByVal - Allocate a stack slot large enough to pass an argument by
+// value. The size and alignment information of the argument is encoded in its
+// parameter attribute.
+void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
+                          MVT LocVT, CCValAssign::LocInfo LocInfo,
+                          int MinSize, int MinAlign,
+                          ISD::ArgFlagsTy ArgFlags) {
+  unsigned Align = ArgFlags.getByValAlign();
+  unsigned Size  = ArgFlags.getByValSize();
+  if (MinSize > (int)Size)
+    Size = MinSize;
+  if (MinAlign > (int)Align)
+    Align = MinAlign;
+  unsigned Offset = AllocateStack(Size, Align);
+
+  addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  TM.getTargetLowering()->HandleByVal(const_cast<CCState*>(this));
+}
+
+/// MarkAllocated - Mark a register and all of its aliases as allocated.
+void CCState::MarkAllocated(unsigned Reg) {
+  for (const unsigned *Alias = TRI.getOverlaps(Reg);
+       unsigned Reg = *Alias; ++Alias)
+    UsedRegs[Reg/32] |= 1 << (Reg&31);
+}
+
+/// AnalyzeFormalArguments - Analyze an array of argument values,
+/// incorporating info about the formals into this state.
+void
+CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                CCAssignFn Fn) {
+  unsigned NumArgs = Ins.size();
+
+  for (unsigned i = 0; i != NumArgs; ++i) {
+    MVT ArgVT = Ins[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+      dbgs() << "Formal argument #" << i << " has unhandled type "
+             << EVT(ArgVT).getEVTString();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
+/// CheckReturn - Analyze the return values of a function, returning true if
+/// the return can be performed without sret-demotion, and false otherwise.
+bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                          CCAssignFn Fn) {
+  // Determine which register each value should be copied into.
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+    MVT VT = Outs[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+    if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
+      return false;
+  }
+  return true;
+}
+
+/// AnalyzeReturn - Analyze the returned values of a return,
+/// incorporating info about the result values into this state.
+void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                            CCAssignFn Fn) {
+  // Determine which register each value should be copied into.
+  for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+    MVT VT = Outs[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+    if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+      dbgs() << "Return operand #" << i << " has unhandled type "
+             << EVT(VT).getEVTString();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
+/// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  CCAssignFn Fn) {
+  unsigned NumOps = Outs.size();
+  for (unsigned i = 0; i != NumOps; ++i) {
+    MVT ArgVT = Outs[i].VT;
+    ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+      dbgs() << "Call operand #" << i << " has unhandled type "
+             << EVT(ArgVT).getEVTString();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
+/// AnalyzeCallOperands - Same as above except it takes vectors of types
+/// and argument flags.
+void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
+                                  SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
+                                  CCAssignFn Fn) {
+  unsigned NumOps = ArgVTs.size();
+  for (unsigned i = 0; i != NumOps; ++i) {
+    MVT ArgVT = ArgVTs[i];
+    ISD::ArgFlagsTy ArgFlags = Flags[i];
+    if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
+#ifndef NDEBUG
+      dbgs() << "Call operand #" << i << " has unhandled type "
+             << EVT(ArgVT).getEVTString();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
+/// AnalyzeCallResult - Analyze the return values of a call,
+/// incorporating info about the passed values into this state.
+void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
+                                CCAssignFn Fn) {
+  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+    MVT VT = Ins[i].VT;
+    ISD::ArgFlagsTy Flags = Ins[i].Flags;
+    if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
+#ifndef NDEBUG
+      dbgs() << "Call result #" << i << " has unhandled type "
+             << EVT(VT).getEVTString() << "\n";
+#endif
+      llvm_unreachable(0);
+    }
+  }
+}
+
+/// AnalyzeCallResult - Same as above except it's specialized for calls which
+/// produce a single value.
+void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
+  if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
+#ifndef NDEBUG
+    dbgs() << "Call result has unhandled type "
+           << EVT(VT).getEVTString();
+#endif
+    llvm_unreachable(0);
+  }
+}
diff --git a/final/lib/CodeGen/CodeGen.cpp b/final/lib/CodeGen/CodeGen.cpp
new file mode 100644
index 00000000000..515e6f9fde8
--- /dev/null
+++ b/final/lib/CodeGen/CodeGen.cpp
@@ -0,0 +1,61 @@
+//===-- CodeGen.cpp -------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the
+// CodeGen library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
+void llvm::initializeCodeGen(PassRegistry &Registry) {
+  initializeCalculateSpillWeightsPass(Registry);
+  initializeDeadMachineInstructionElimPass(Registry);
+  initializeGCModuleInfoPass(Registry);
+  initializeIfConverterPass(Registry);
+  initializeLiveDebugVariablesPass(Registry);
+  initializeLiveIntervalsPass(Registry);
+  initializeLiveStacksPass(Registry);
+  initializeLiveVariablesPass(Registry);
+  initializeMachineCSEPass(Registry);
+  initializeMachineDominatorTreePass(Registry);
+  initializeMachineLICMPass(Registry);
+  initializeMachineLoopInfoPass(Registry);
+  initializeMachineModuleInfoPass(Registry);
+  initializeMachineSinkingPass(Registry);
+  initializeMachineVerifierPassPass(Registry);
+  initializeOptimizePHIsPass(Registry);
+  initializePHIEliminationPass(Registry);
+  initializePeepholeOptimizerPass(Registry);
+  initializePreAllocSplittingPass(Registry);
+  initializeProcessImplicitDefsPass(Registry);
+  initializePEIPass(Registry);
+  initializeRALinScanPass(Registry);
+  initializeRegisterCoalescerAnalysisGroup(Registry);
+  initializeRenderMachineFunctionPass(Registry);
+  initializeSimpleRegisterCoalescingPass(Registry);
+  initializeSlotIndexesPass(Registry);
+  initializeLoopSplitterPass(Registry);
+  initializeStackProtectorPass(Registry);
+  initializeStackSlotColoringPass(Registry);
+  initializeStrongPHIEliminationPass(Registry);
+  initializeTwoAddressInstructionPassPass(Registry);
+  initializeUnreachableBlockElimPass(Registry);
+  initializeUnreachableMachineBlockElimPass(Registry);
+  initializeVirtRegMapPass(Registry);
+  initializeLowerIntrinsicsPass(Registry);
+}
+
+void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
+  initializeCodeGen(*unwrap(R));
+}
diff --git a/final/lib/CodeGen/CodePlacementOpt.cpp b/final/lib/CodeGen/CodePlacementOpt.cpp
new file mode 100644
index 00000000000..e37356a60cf
--- /dev/null
+++ b/final/lib/CodeGen/CodePlacementOpt.cpp
@@ -0,0 +1,425 @@
+//===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the pass that optimizes code placement and aligns loop
+// headers to target-specific alignment boundaries.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "code-placement"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumLoopsAligned,  "Number of loops aligned");
+STATISTIC(NumIntraElim,     "Number of intra loop branches eliminated");
+STATISTIC(NumIntraMoved,    "Number of intra loop branches moved");
+
+namespace {
+  class CodePlacementOpt : public MachineFunctionPass {
+    const MachineLoopInfo *MLI;
+    const TargetInstrInfo *TII;
+    const TargetLowering  *TLI;
+
+  public:
+    static char ID;
+    CodePlacementOpt() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char *getPassName() const {
+      return "Code Placement Optimizer";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreservedID(MachineDominatorsID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    bool HasFallthrough(MachineBasicBlock *MBB);
+    bool HasAnalyzableTerminator(MachineBasicBlock *MBB);
+    void Splice(MachineFunction &MF,
+                MachineFunction::iterator InsertPt,
+                MachineFunction::iterator Begin,
+                MachineFunction::iterator End);
+    bool EliminateUnconditionalJumpsToTop(MachineFunction &MF,
+                                          MachineLoop *L);
+    bool MoveDiscontiguousLoopBlocks(MachineFunction &MF,
+                                     MachineLoop *L);
+    bool OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, MachineLoop *L);
+    bool OptimizeIntraLoopEdges(MachineFunction &MF);
+    bool AlignLoops(MachineFunction &MF);
+    bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align);
+  };
+
+  char CodePlacementOpt::ID = 0;
+} // end anonymous namespace
+
+FunctionPass *llvm::createCodePlacementOptPass() {
+  return new CodePlacementOpt();
+}
+
+/// HasFallthrough - Test whether the given branch has a fallthrough, either as
+/// a plain fallthrough or as a fallthrough case of a conditional branch.
+///
+bool CodePlacementOpt::HasFallthrough(MachineBasicBlock *MBB) {
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
+    return false;
+  // This conditional branch has no fallthrough.
+  if (FBB)
+    return false;
+  // An unconditional branch has no fallthrough.
+  if (Cond.empty() && TBB)
+    return false;
+  // It has a fallthrough.
+  return true;
+}
+
+/// HasAnalyzableTerminator - Test whether AnalyzeBranch will succeed on MBB.
+/// This is called before major changes are begun to test whether it will be
+/// possible to complete the changes.
+///
+/// Target-specific code is hereby encouraged to make AnalyzeBranch succeed
+/// whenever possible.
+///
+bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
+  // Conservatively ignore EH landing pads.
+  if (MBB->isLandingPad()) return false;
+
+  // Aggressively handle return blocks and similar constructs.
+  if (MBB->succ_empty()) return true;
+
+  // Ask the target's AnalyzeBranch if it can handle this block.
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  // Make sure the terminator is understood.
+  if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
+    return false;
+   // Ignore blocks which look like they might have EH-related control flow.
+   // AnalyzeBranch thinks it knows how to analyze such things, but it doesn't
+   // recognize the possibility of a control transfer through an unwind.
+   // Such blocks contain EH_LABEL instructions, however they may be in the
+   // middle of the block. Instead of searching for them, just check to see
+   // if the CFG disagrees with AnalyzeBranch.
+  if (1u + !Cond.empty() != MBB->succ_size())
+    return false;
+  // Make sure we have the option of reversing the condition.
+  if (!Cond.empty() && TII->ReverseBranchCondition(Cond))
+    return false;
+  return true;
+}
+
+/// Splice - Move the sequence of instructions [Begin,End) to just before
+/// InsertPt. Update branch instructions as needed to account for broken
+/// fallthrough edges and to take advantage of newly exposed fallthrough
+/// opportunities.
+///
+void CodePlacementOpt::Splice(MachineFunction &MF,
+                              MachineFunction::iterator InsertPt,
+                              MachineFunction::iterator Begin,
+                              MachineFunction::iterator End) {
+  assert(Begin != MF.begin() && End != MF.begin() && InsertPt != MF.begin() &&
+         "Splice can't change the entry block!");
+  MachineFunction::iterator OldBeginPrior = prior(Begin);
+  MachineFunction::iterator OldEndPrior = prior(End);
+
+  MF.splice(InsertPt, Begin, End);
+
+  prior(Begin)->updateTerminator();
+  OldBeginPrior->updateTerminator();
+  OldEndPrior->updateTerminator();
+}
+
+/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump
+/// to the loop top to the top of the loop so that they have a fall through.
+/// This can introduce a branch on entry to the loop, but it can eliminate a
+/// branch within the loop. See the @simple case in
+/// test/CodeGen/X86/loop_blocks.ll for an example of this.
+bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF,
+                                                        MachineLoop *L) {
+  bool Changed = false;
+  MachineBasicBlock *TopMBB = L->getTopBlock();
+
+  bool BotHasFallthrough = HasFallthrough(L->getBottomBlock());
+
+  if (TopMBB == MF.begin() ||
+      HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) {
+  new_top:
+    for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(),
+         PE = TopMBB->pred_end(); PI != PE; ++PI) {
+      MachineBasicBlock *Pred = *PI;
+      if (Pred == TopMBB) continue;
+      if (HasFallthrough(Pred)) continue;
+      if (!L->contains(Pred)) continue;
+
+      // Verify that we can analyze all the loop entry edges before beginning
+      // any changes which will require us to be able to analyze them.
+      if (Pred == MF.begin())
+        continue;
+      if (!HasAnalyzableTerminator(Pred))
+        continue;
+      if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred))))
+        continue;
+
+      // Move the block.
+      DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber()
+                   << " to top of loop.\n");
+      Changed = true;
+
+      // Move it and all the blocks that can reach it via fallthrough edges
+      // exclusively, to keep existing fallthrough edges intact.
+      MachineFunction::iterator Begin = Pred;
+      MachineFunction::iterator End = llvm::next(Begin);
+      while (Begin != MF.begin()) {
+        MachineFunction::iterator Prior = prior(Begin);
+        if (Prior == MF.begin())
+          break;
+        // Stop when a non-fallthrough edge is found.
+        if (!HasFallthrough(Prior))
+          break;
+        // Stop if a block which could fall-through out of the loop is found.
+        if (Prior->isSuccessor(End))
+          break;
+        // If we've reached the top, stop scanning.
+        if (Prior == MachineFunction::iterator(TopMBB)) {
+          // We know top currently has a fall through (because we just checked
+          // it) which would be lost if we do the transformation, so it isn't
+          // worthwhile to do the transformation unless it would expose a new
+          // fallthrough edge.
+          if (!Prior->isSuccessor(End))
+            goto next_pred;
+          // Otherwise we can stop scanning and procede to move the blocks.
+          break;
+        }
+        // If we hit a switch or something complicated, don't move anything
+        // for this predecessor.
+        if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior))))
+          break;
+        // Ok, the block prior to Begin will be moved along with the rest.
+        // Extend the range to include it.
+        Begin = Prior;
+        ++NumIntraMoved;
+      }
+
+      // Move the blocks.
+      Splice(MF, TopMBB, Begin, End);
+
+      // Update TopMBB.
+      TopMBB = L->getTopBlock();
+
+      // We have a new loop top. Iterate on it. We shouldn't have to do this
+      // too many times if BranchFolding has done a reasonable job.
+      goto new_top;
+    next_pred:;
+    }
+  }
+
+  // If the loop previously didn't exit with a fall-through and it now does,
+  // we eliminated a branch.
+  if (Changed &&
+      !BotHasFallthrough &&
+      HasFallthrough(L->getBottomBlock())) {
+    ++NumIntraElim;
+  }
+
+  return Changed;
+}
+
+/// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the
+/// portion of the loop contiguous with the header. This usually makes the loop
+/// contiguous, provided that AnalyzeBranch can handle all the relevant
+/// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll
+/// for an example of this.
+bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
+                                                   MachineLoop *L) {
+  bool Changed = false;
+  MachineBasicBlock *TopMBB = L->getTopBlock();
+  MachineBasicBlock *BotMBB = L->getBottomBlock();
+
+  // Determine a position to move orphaned loop blocks to. If TopMBB is not
+  // entered via fallthrough and BotMBB is exited via fallthrough, prepend them
+  // to the top of the loop to avoid loosing that fallthrough. Otherwise append
+  // them to the bottom, even if it previously had a fallthrough, on the theory
+  // that it's worth an extra branch to keep the loop contiguous.
+  MachineFunction::iterator InsertPt =
+    llvm::next(MachineFunction::iterator(BotMBB));
+  bool InsertAtTop = false;
+  if (TopMBB != MF.begin() &&
+      !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) &&
+      HasFallthrough(BotMBB)) {
+    InsertPt = TopMBB;
+    InsertAtTop = true;
+  }
+
+  // Keep a record of which blocks are in the portion of the loop contiguous
+  // with the loop header.
+  SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks;
+  for (MachineFunction::iterator I = TopMBB,
+       E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I)
+    ContiguousBlocks.insert(I);
+
+  // Find non-contigous blocks and fix them.
+  if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt)))
+    for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end();
+         BI != BE; ++BI) {
+      MachineBasicBlock *BB = *BI;
+
+      // Verify that we can analyze all the loop entry edges before beginning
+      // any changes which will require us to be able to analyze them.
+      if (!HasAnalyzableTerminator(BB))
+        continue;
+      if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB))))
+        continue;
+
+      // If the layout predecessor is part of the loop, this block will be
+      // processed along with it. This keeps them in their relative order.
+      if (BB != MF.begin() &&
+          L->contains(prior(MachineFunction::iterator(BB))))
+        continue;
+
+      // Check to see if this block is already contiguous with the main
+      // portion of the loop.
+      if (!ContiguousBlocks.insert(BB))
+        continue;
+
+      // Move the block.
+      DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber()
+                   << " to be contiguous with loop.\n");
+      Changed = true;
+
+      // Process this block and all loop blocks contiguous with it, to keep
+      // them in their relative order.
+      MachineFunction::iterator Begin = BB;
+      MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB));
+      for (; End != MF.end(); ++End) {
+        if (!L->contains(End)) break;
+        if (!HasAnalyzableTerminator(End)) break;
+        ContiguousBlocks.insert(End);
+        ++NumIntraMoved;
+      }
+
+      // If we're inserting at the bottom of the loop, and the code we're
+      // moving originally had fall-through successors, bring the sucessors
+      // up with the loop blocks to preserve the fall-through edges.
+      if (!InsertAtTop)
+        for (; End != MF.end(); ++End) {
+          if (L->contains(End)) break;
+          if (!HasAnalyzableTerminator(End)) break;
+          if (!HasFallthrough(prior(End))) break;
+        }
+
+      // Move the blocks. This may invalidate TopMBB and/or BotMBB, but
+      // we don't need them anymore at this point.
+      Splice(MF, InsertPt, Begin, End);
+    }
+
+  return Changed;
+}
+
+/// OptimizeIntraLoopEdgesInLoopNest - Reposition loop blocks to minimize
+/// intra-loop branching and to form contiguous loops.
+///
+/// This code takes the approach of making minor changes to the existing
+/// layout to fix specific loop-oriented problems. Also, it depends on
+/// AnalyzeBranch, which can't understand complex control instructions.
+///
+bool CodePlacementOpt::OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF,
+                                                        MachineLoop *L) {
+  bool Changed = false;
+
+  // Do optimization for nested loops.
+  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
+
+  // Do optimization for this loop.
+  Changed |= EliminateUnconditionalJumpsToTop(MF, L);
+  Changed |= MoveDiscontiguousLoopBlocks(MF, L);
+
+  return Changed;
+}
+
+/// OptimizeIntraLoopEdges - Reposition loop blocks to minimize
+/// intra-loop branching and to form contiguous loops.
+///
+bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) {
+  bool Changed = false;
+
+  if (!TLI->shouldOptimizeCodePlacement())
+    return Changed;
+
+  // Do optimization for each loop in the function.
+  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+       I != E; ++I)
+    if (!(*I)->getParentLoop())
+      Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I);
+
+  return Changed;
+}
+
+/// AlignLoops - Align loop headers to target preferred alignments.
+///
+bool CodePlacementOpt::AlignLoops(MachineFunction &MF) {
+  const Function *F = MF.getFunction();
+  if (F->hasFnAttr(Attribute::OptimizeForSize))
+    return false;
+
+  unsigned Align = TLI->getPrefLoopAlignment();
+  if (!Align)
+    return false;  // Don't care about loop alignment.
+
+  bool Changed = false;
+
+  for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+       I != E; ++I)
+    Changed |= AlignLoop(MF, *I, Align);
+
+  return Changed;
+}
+
+/// AlignLoop - Align loop headers to target preferred alignments.
+///
+bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L,
+                                 unsigned Align) {
+  bool Changed = false;
+
+  // Do alignment for nested loops.
+  for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    Changed |= AlignLoop(MF, *I, Align);
+
+  L->getTopBlock()->setAlignment(Align);
+  Changed = true;
+  ++NumLoopsAligned;
+
+  return Changed;
+}
+
+bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) {
+  MLI = &getAnalysis<MachineLoopInfo>();
+  if (MLI->empty())
+    return false;  // No loops.
+
+  TLI = MF.getTarget().getTargetLowering();
+  TII = MF.getTarget().getInstrInfo();
+
+  bool Changed = OptimizeIntraLoopEdges(MF);
+
+  Changed |= AlignLoops(MF);
+
+  return Changed;
+}
diff --git a/final/lib/CodeGen/CriticalAntiDepBreaker.cpp b/final/lib/CodeGen/CriticalAntiDepBreaker.cpp
new file mode 100644
index 00000000000..f79598de1d9
--- /dev/null
+++ b/final/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -0,0 +1,668 @@
+//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "CriticalAntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+CriticalAntiDepBreaker::
+CriticalAntiDepBreaker(MachineFunction& MFi) :
+  AntiDepBreaker(), MF(MFi),
+  MRI(MF.getRegInfo()),
+  TII(MF.getTarget().getInstrInfo()),
+  TRI(MF.getTarget().getRegisterInfo()),
+  AllocatableSet(TRI->getAllocatableSet(MF)),
+  Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
+  KillIndices(TRI->getNumRegs(), 0),
+  DefIndices(TRI->getNumRegs(), 0) {}
+
+CriticalAntiDepBreaker::~CriticalAntiDepBreaker() {
+}
+
+void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
+  const unsigned BBSize = BB->size();
+  for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+    // Clear out the register class data.
+    Classes[i] = static_cast<const TargetRegisterClass *>(0);
+
+    // Initialize the indices to indicate that no registers are live.
+    KillIndices[i] = ~0u;
+    DefIndices[i] = BBSize;
+  }
+
+  // Clear "do not change" set.
+  KeepRegs.clear();
+
+  bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
+
+  // Determine the live-out physregs for this block.
+  if (IsReturnBlock) {
+    // In a return block, examine the function live-out regs.
+    for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+         E = MRI.liveout_end(); I != E; ++I) {
+      unsigned Reg = *I;
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      KillIndices[Reg] = BB->size();
+      DefIndices[Reg] = ~0u;
+
+      // Repeat, for all aliases.
+      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+        unsigned AliasReg = *Alias;
+        Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+        KillIndices[AliasReg] = BB->size();
+        DefIndices[AliasReg] = ~0u;
+      }
+    }
+  }
+
+  // In a non-return block, examine the live-in regs of all successors.
+  // Note a return block can have successors if the return instruction is
+  // predicated.
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+         SE = BB->succ_end(); SI != SE; ++SI)
+    for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+           E = (*SI)->livein_end(); I != E; ++I) {
+      unsigned Reg = *I;
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      KillIndices[Reg] = BB->size();
+      DefIndices[Reg] = ~0u;
+
+      // Repeat, for all aliases.
+      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+        unsigned AliasReg = *Alias;
+        Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+        KillIndices[AliasReg] = BB->size();
+        DefIndices[AliasReg] = ~0u;
+      }
+    }
+
+  // Mark live-out callee-saved registers. In a return block this is
+  // all callee-saved registers. In non-return this is any
+  // callee-saved register that is not saved in the prolog.
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  BitVector Pristine = MFI->getPristineRegs(BB);
+  for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
+    unsigned Reg = *I;
+    if (!IsReturnBlock && !Pristine.test(Reg)) continue;
+    Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+    KillIndices[Reg] = BB->size();
+    DefIndices[Reg] = ~0u;
+
+    // Repeat, for all aliases.
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      unsigned AliasReg = *Alias;
+      Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      KillIndices[AliasReg] = BB->size();
+      DefIndices[AliasReg] = ~0u;
+    }
+  }
+}
+
+void CriticalAntiDepBreaker::FinishBlock() {
+  RegRefs.clear();
+  KeepRegs.clear();
+}
+
+void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
+                                     unsigned InsertPosIndex) {
+  if (MI->isDebugValue())
+    return;
+  assert(Count < InsertPosIndex && "Instruction index out of expected range!");
+
+  for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+    if (KillIndices[Reg] != ~0u) {
+      // If Reg is currently live, then mark that it can't be renamed as
+      // we don't know the extent of its live-range anymore (now that it
+      // has been scheduled).
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      KillIndices[Reg] = Count;
+    } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
+      // Any register which was defined within the previous scheduling region
+      // may have been rescheduled and its lifetime may overlap with registers
+      // in ways not reflected in our current liveness state. For each such
+      // register, adjust the liveness state to be conservatively correct.
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+      // Move the def index to the end of the previous region, to reflect
+      // that the def could theoretically have been scheduled at the end.
+      DefIndices[Reg] = InsertPosIndex;
+    }
+  }
+
+  PrescanInstruction(MI);
+  ScanInstruction(MI, Count);
+}
+
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static const SDep *CriticalPathStep(const SUnit *SU) {
+  const SDep *Next = 0;
+  unsigned NextDepth = 0;
+  // Find the predecessor edge with the greatest depth.
+  for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+       P != PE; ++P) {
+    const SUnit *PredSU = P->getSUnit();
+    unsigned PredLatency = P->getLatency();
+    unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+    // In the case of a latency tie, prefer an anti-dependency edge over
+    // other types of edges.
+    if (NextDepth < PredTotalLatency ||
+        (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+      NextDepth = PredTotalLatency;
+      Next = &*P;
+    }
+  }
+  return Next;
+}
+
+void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
+  // It's not safe to change register allocation for source operands of
+  // that have special allocation requirements. Also assume all registers
+  // used in a call must not be changed (ABI).
+  // FIXME: The issue with predicated instruction is more complex. We are being
+  // conservative here because the kill markers cannot be trusted after
+  // if-conversion:
+  // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+  // ...
+  // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
+  // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
+  // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+  //
+  // The first R6 kill is not really a kill since it's killed by a predicated
+  // instruction which may not be executed. The second R6 def may or may not
+  // re-define R6 so it's not safe to change it since the last R6 use cannot be
+  // changed.
+  bool Special = MI->getDesc().isCall() ||
+    MI->getDesc().hasExtraSrcRegAllocReq() ||
+    TII->isPredicated(MI);
+
+  // Scan the register operands for this instruction and update
+  // Classes and RegRefs.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+    const TargetRegisterClass *NewRC = 0;
+
+    if (i < MI->getDesc().getNumOperands())
+      NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+
+    // For now, only allow the register to be changed if its register
+    // class is consistent across all uses.
+    if (!Classes[Reg] && NewRC)
+      Classes[Reg] = NewRC;
+    else if (!NewRC || Classes[Reg] != NewRC)
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+    // Now check for aliases.
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      // If an alias of the reg is used during the live range, give up.
+      // Note that this allows us to skip checking if AntiDepReg
+      // overlaps with any of the aliases, among other things.
+      unsigned AliasReg = *Alias;
+      if (Classes[AliasReg]) {
+        Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+        Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      }
+    }
+
+    // If we're still willing to consider this register, note the reference.
+    if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
+      RegRefs.insert(std::make_pair(Reg, &MO));
+
+    if (MO.isUse() && Special) {
+      if (KeepRegs.insert(Reg)) {
+        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+             *Subreg; ++Subreg)
+          KeepRegs.insert(*Subreg);
+      }
+    }
+  }
+}
+
+void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
+                                             unsigned Count) {
+  // Update liveness.
+  // Proceding upwards, registers that are defed but not used in this
+  // instruction are now dead.
+
+  if (!TII->isPredicated(MI)) {
+    // Predicated defs are modeled as read + write, i.e. similar to two
+    // address updates.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+      if (!MO.isDef()) continue;
+      // Ignore two-addr defs.
+      if (MI->isRegTiedToUseOperand(i)) continue;
+
+      DefIndices[Reg] = Count;
+      KillIndices[Reg] = ~0u;
+      assert(((KillIndices[Reg] == ~0u) !=
+              (DefIndices[Reg] == ~0u)) &&
+             "Kill and Def maps aren't consistent for Reg!");
+      KeepRegs.erase(Reg);
+      Classes[Reg] = 0;
+      RegRefs.erase(Reg);
+      // Repeat, for all subregs.
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        unsigned SubregReg = *Subreg;
+        DefIndices[SubregReg] = Count;
+        KillIndices[SubregReg] = ~0u;
+        KeepRegs.erase(SubregReg);
+        Classes[SubregReg] = 0;
+        RegRefs.erase(SubregReg);
+      }
+      // Conservatively mark super-registers as unusable.
+      for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+           *Super; ++Super) {
+        unsigned SuperReg = *Super;
+        Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+      }
+    }
+  }
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+    if (!MO.isUse()) continue;
+
+    const TargetRegisterClass *NewRC = 0;
+    if (i < MI->getDesc().getNumOperands())
+      NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+
+    // For now, only allow the register to be changed if its register
+    // class is consistent across all uses.
+    if (!Classes[Reg] && NewRC)
+      Classes[Reg] = NewRC;
+    else if (!NewRC || Classes[Reg] != NewRC)
+      Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+    RegRefs.insert(std::make_pair(Reg, &MO));
+
+    // It wasn't previously live but now it is, this is a kill.
+    if (KillIndices[Reg] == ~0u) {
+      KillIndices[Reg] = Count;
+      DefIndices[Reg] = ~0u;
+          assert(((KillIndices[Reg] == ~0u) !=
+                  (DefIndices[Reg] == ~0u)) &&
+               "Kill and Def maps aren't consistent for Reg!");
+    }
+    // Repeat, for all aliases.
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+      unsigned AliasReg = *Alias;
+      if (KillIndices[AliasReg] == ~0u) {
+        KillIndices[AliasReg] = Count;
+        DefIndices[AliasReg] = ~0u;
+      }
+    }
+  }
+}
+
+// Check all machine operands that reference the antidependent register and must
+// be replaced by NewReg. Return true if any of their parent instructions may
+// clobber the new register.
+//
+// Note: AntiDepReg may be referenced by a two-address instruction such that
+// it's use operand is tied to a def operand. We guard against the case in which
+// the two-address instruction also defines NewReg, as may happen with
+// pre/postincrement loads. In this case, both the use and def operands are in
+// RegRefs because the def is inserted by PrescanInstruction and not erased
+// during ScanInstruction. So checking for an instructions with definitions of
+// both NewReg and AntiDepReg covers it.
+bool
+CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+                                                RegRefIter RegRefEnd,
+                                                unsigned NewReg)
+{
+  for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) {
+    MachineOperand *RefOper = I->second;
+
+    // Don't allow the instruction defining AntiDepReg to earlyclobber its
+    // operands, in case they may be assigned to NewReg. In this case antidep
+    // breaking must fail, but it's too rare to bother optimizing.
+    if (RefOper->isDef() && RefOper->isEarlyClobber())
+      return true;
+
+    // Handle cases in which this instructions defines NewReg.
+    MachineInstr *MI = RefOper->getParent();
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &CheckOper = MI->getOperand(i);
+
+      if (!CheckOper.isReg() || !CheckOper.isDef() ||
+          CheckOper.getReg() != NewReg)
+        continue;
+
+      // Don't allow the instruction to define NewReg and AntiDepReg.
+      // When AntiDepReg is renamed it will be an illegal op.
+      if (RefOper->isDef())
+        return true;
+
+      // Don't allow an instruction using AntiDepReg to be earlyclobbered by
+      // NewReg
+      if (CheckOper.isEarlyClobber())
+        return true;
+
+      // Don't allow inline asm to define NewReg at all. Who know what it's
+      // doing with it.
+      if (MI->isInlineAsm())
+        return true;
+    }
+  }
+  return false;
+}
+
+unsigned
+CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
+                                                 RegRefIter RegRefEnd,
+                                                 unsigned AntiDepReg,
+                                                 unsigned LastNewReg,
+                                                 const TargetRegisterClass *RC)
+{
+  for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
+       RE = RC->allocation_order_end(MF); R != RE; ++R) {
+    unsigned NewReg = *R;
+    // Don't consider non-allocatable registers
+    if (!AllocatableSet.test(NewReg)) continue;
+    // Don't replace a register with itself.
+    if (NewReg == AntiDepReg) continue;
+    // Don't replace a register with one that was recently used to repair
+    // an anti-dependence with this AntiDepReg, because that would
+    // re-introduce that anti-dependence.
+    if (NewReg == LastNewReg) continue;
+    // If any instructions that define AntiDepReg also define the NewReg, it's
+    // not suitable.  For example, Instruction with multiple definitions can
+    // result in this condition.
+    if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue;
+    // If NewReg is dead and NewReg's most recent def is not before
+    // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
+    assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
+           && "Kill and Def maps aren't consistent for AntiDepReg!");
+    assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u))
+           && "Kill and Def maps aren't consistent for NewReg!");
+    if (KillIndices[NewReg] != ~0u ||
+        Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) ||
+        KillIndices[AntiDepReg] > DefIndices[NewReg])
+      continue;
+    return NewReg;
+  }
+
+  // No registers are free and available!
+  return 0;
+}
+
+unsigned CriticalAntiDepBreaker::
+BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                      MachineBasicBlock::iterator Begin,
+                      MachineBasicBlock::iterator End,
+                      unsigned InsertPosIndex) {
+  // The code below assumes that there is at least one instruction,
+  // so just duck out immediately if the block is empty.
+  if (SUnits.empty()) return 0;
+
+  // Keep a map of the MachineInstr*'s back to the SUnit representing them.
+  // This is used for updating debug information.
+  DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
+
+  // Find the node at the bottom of the critical path.
+  const SUnit *Max = 0;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    const SUnit *SU = &SUnits[i];
+    MISUnitMap[SU->getInstr()] = SU;
+    if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
+      Max = SU;
+  }
+
+#ifndef NDEBUG
+  {
+    DEBUG(dbgs() << "Critical path has total latency "
+          << (Max->getDepth() + Max->Latency) << "\n");
+    DEBUG(dbgs() << "Available regs:");
+    for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+      if (KillIndices[Reg] == ~0u)
+        DEBUG(dbgs() << " " << TRI->getName(Reg));
+    }
+    DEBUG(dbgs() << '\n');
+  }
+#endif
+
+  // Track progress along the critical path through the SUnit graph as we walk
+  // the instructions.
+  const SUnit *CriticalPathSU = Max;
+  MachineInstr *CriticalPathMI = CriticalPathSU->getInstr();
+
+  // Consider this pattern:
+  //   A = ...
+  //   ... = A
+  //   A = ...
+  //   ... = A
+  //   A = ...
+  //   ... = A
+  //   A = ...
+  //   ... = A
+  // There are three anti-dependencies here, and without special care,
+  // we'd break all of them using the same register:
+  //   A = ...
+  //   ... = A
+  //   B = ...
+  //   ... = B
+  //   B = ...
+  //   ... = B
+  //   B = ...
+  //   ... = B
+  // because at each anti-dependence, B is the first register that
+  // isn't A which is free.  This re-introduces anti-dependencies
+  // at all but one of the original anti-dependencies that we were
+  // trying to break.  To avoid this, keep track of the most recent
+  // register that each register was replaced with, avoid
+  // using it to repair an anti-dependence on the same register.
+  // This lets us produce this:
+  //   A = ...
+  //   ... = A
+  //   B = ...
+  //   ... = B
+  //   C = ...
+  //   ... = C
+  //   B = ...
+  //   ... = B
+  // This still has an anti-dependence on B, but at least it isn't on the
+  // original critical path.
+  //
+  // TODO: If we tracked more than one register here, we could potentially
+  // fix that remaining critical edge too. This is a little more involved,
+  // because unlike the most recent register, less recent registers should
+  // still be considered, though only if no other registers are available.
+  std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0);
+
+  // Attempt to break anti-dependence edges on the critical path. Walk the
+  // instructions from the bottom up, tracking information about liveness
+  // as we go to help determine which registers are available.
+  unsigned Broken = 0;
+  unsigned Count = InsertPosIndex - 1;
+  for (MachineBasicBlock::iterator I = End, E = Begin;
+       I != E; --Count) {
+    MachineInstr *MI = --I;
+    if (MI->isDebugValue())
+      continue;
+
+    // Check if this instruction has a dependence on the critical path that
+    // is an anti-dependence that we may be able to break. If it is, set
+    // AntiDepReg to the non-zero register associated with the anti-dependence.
+    //
+    // We limit our attention to the critical path as a heuristic to avoid
+    // breaking anti-dependence edges that aren't going to significantly
+    // impact the overall schedule. There are a limited number of registers
+    // and we want to save them for the important edges.
+    //
+    // TODO: Instructions with multiple defs could have multiple
+    // anti-dependencies. The current code here only knows how to break one
+    // edge per instruction. Note that we'd have to be able to break all of
+    // the anti-dependencies in an instruction in order to be effective.
+    unsigned AntiDepReg = 0;
+    if (MI == CriticalPathMI) {
+      if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) {
+        const SUnit *NextSU = Edge->getSUnit();
+
+        // Only consider anti-dependence edges.
+        if (Edge->getKind() == SDep::Anti) {
+          AntiDepReg = Edge->getReg();
+          assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
+          if (!AllocatableSet.test(AntiDepReg))
+            // Don't break anti-dependencies on non-allocatable registers.
+            AntiDepReg = 0;
+          else if (KeepRegs.count(AntiDepReg))
+            // Don't break anti-dependencies if an use down below requires
+            // this exact register.
+            AntiDepReg = 0;
+          else {
+            // If the SUnit has other dependencies on the SUnit that it
+            // anti-depends on, don't bother breaking the anti-dependency
+            // since those edges would prevent such units from being
+            // scheduled past each other regardless.
+            //
+            // Also, if there are dependencies on other SUnits with the
+            // same register as the anti-dependency, don't attempt to
+            // break it.
+            for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(),
+                 PE = CriticalPathSU->Preds.end(); P != PE; ++P)
+              if (P->getSUnit() == NextSU ?
+                    (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) :
+                    (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) {
+                AntiDepReg = 0;
+                break;
+              }
+          }
+        }
+        CriticalPathSU = NextSU;
+        CriticalPathMI = CriticalPathSU->getInstr();
+      } else {
+        // We've reached the end of the critical path.
+        CriticalPathSU = 0;
+        CriticalPathMI = 0;
+      }
+    }
+
+    PrescanInstruction(MI);
+
+    // If MI's defs have a special allocation requirement, don't allow
+    // any def registers to be changed. Also assume all registers
+    // defined in a call must not be changed (ABI).
+    if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+        TII->isPredicated(MI))
+      // If this instruction's defs have special allocation requirement, don't
+      // break this anti-dependency.
+      AntiDepReg = 0;
+    else if (AntiDepReg) {
+      // If this instruction has a use of AntiDepReg, breaking it
+      // is invalid.
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg()) continue;
+        unsigned Reg = MO.getReg();
+        if (Reg == 0) continue;
+        if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) {
+          AntiDepReg = 0;
+          break;
+        }
+      }
+    }
+
+    // Determine AntiDepReg's register class, if it is live and is
+    // consistently used within a single class.
+    const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] : 0;
+    assert((AntiDepReg == 0 || RC != NULL) &&
+           "Register should be live if it's causing an anti-dependence!");
+    if (RC == reinterpret_cast<TargetRegisterClass *>(-1))
+      AntiDepReg = 0;
+
+    // Look for a suitable register to use to break the anti-depenence.
+    //
+    // TODO: Instead of picking the first free register, consider which might
+    // be the best.
+    if (AntiDepReg != 0) {
+      std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+                std::multimap<unsigned, MachineOperand *>::iterator>
+        Range = RegRefs.equal_range(AntiDepReg);
+      if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
+                                                     AntiDepReg,
+                                                     LastNewReg[AntiDepReg],
+                                                     RC)) {
+        DEBUG(dbgs() << "Breaking anti-dependence edge on "
+              << TRI->getName(AntiDepReg)
+              << " with " << RegRefs.count(AntiDepReg) << " references"
+              << " using " << TRI->getName(NewReg) << "!\n");
+
+        // Update the references to the old register to refer to the new
+        // register.
+        for (std::multimap<unsigned, MachineOperand *>::iterator
+             Q = Range.first, QE = Range.second; Q != QE; ++Q) {
+          Q->second->setReg(NewReg);
+          // If the SU for the instruction being updated has debug information
+          // related to the anti-dependency register, make sure to update that
+          // as well.
+          const SUnit *SU = MISUnitMap[Q->second->getParent()];
+          if (!SU) continue;
+          for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) {
+            MachineInstr *DI = SU->DbgInstrList[i];
+            assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() &&
+                    DI->getOperand(0).getReg()
+                    && "Non register dbg_value attached to SUnit!");
+            if (DI->getOperand(0).getReg() == AntiDepReg)
+              DI->getOperand(0).setReg(NewReg);
+          }
+        }
+
+        // We just went back in time and modified history; the
+        // liveness information for the anti-dependence reg is now
+        // inconsistent. Set the state as if it were dead.
+        Classes[NewReg] = Classes[AntiDepReg];
+        DefIndices[NewReg] = DefIndices[AntiDepReg];
+        KillIndices[NewReg] = KillIndices[AntiDepReg];
+        assert(((KillIndices[NewReg] == ~0u) !=
+                (DefIndices[NewReg] == ~0u)) &&
+             "Kill and Def maps aren't consistent for NewReg!");
+
+        Classes[AntiDepReg] = 0;
+        DefIndices[AntiDepReg] = KillIndices[AntiDepReg];
+        KillIndices[AntiDepReg] = ~0u;
+        assert(((KillIndices[AntiDepReg] == ~0u) !=
+                (DefIndices[AntiDepReg] == ~0u)) &&
+             "Kill and Def maps aren't consistent for AntiDepReg!");
+
+        RegRefs.erase(AntiDepReg);
+        LastNewReg[AntiDepReg] = NewReg;
+        ++Broken;
+      }
+    }
+
+    ScanInstruction(MI, Count);
+  }
+
+  return Broken;
+}
diff --git a/final/lib/CodeGen/CriticalAntiDepBreaker.h b/final/lib/CodeGen/CriticalAntiDepBreaker.h
new file mode 100644
index 00000000000..0daaef27344
--- /dev/null
+++ b/final/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -0,0 +1,106 @@
+//=- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CriticalAntiDepBreaker class, which
+// implements register anti-dependence breaking along a blocks
+// critical path during post-RA scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
+#define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
+
+#include "AntiDepBreaker.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include <map>
+
+namespace llvm {
+class TargetInstrInfo;
+class TargetRegisterInfo;
+
+  class CriticalAntiDepBreaker : public AntiDepBreaker {
+    MachineFunction& MF;
+    MachineRegisterInfo &MRI;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+
+    /// AllocatableSet - The set of allocatable registers.
+    /// We'll be ignoring anti-dependencies on non-allocatable registers,
+    /// because they may not be safe to break.
+    const BitVector AllocatableSet;
+
+    /// Classes - For live regs that are only used in one register class in a
+    /// live range, the register class. If the register is not live, the
+    /// corresponding value is null. If the register is live but used in
+    /// multiple register classes, the corresponding value is -1 casted to a
+    /// pointer.
+    std::vector<const TargetRegisterClass*> Classes;
+
+    /// RegRefs - Map registers to all their references within a live range.
+    std::multimap<unsigned, MachineOperand *> RegRefs;
+    typedef std::multimap<unsigned, MachineOperand *>::const_iterator
+      RegRefIter;
+
+    /// KillIndices - The index of the most recent kill (proceding bottom-up),
+    /// or ~0u if the register is not live.
+    std::vector<unsigned> KillIndices;
+
+    /// DefIndices - The index of the most recent complete def (proceding bottom
+    /// up), or ~0u if the register is live.
+    std::vector<unsigned> DefIndices;
+
+    /// KeepRegs - A set of registers which are live and cannot be changed to
+    /// break anti-dependencies.
+    SmallSet<unsigned, 4> KeepRegs;
+
+  public:
+    CriticalAntiDepBreaker(MachineFunction& MFi);
+    ~CriticalAntiDepBreaker();
+
+    /// Start - Initialize anti-dep breaking for a new basic block.
+    void StartBlock(MachineBasicBlock *BB);
+
+    /// BreakAntiDependencies - Identifiy anti-dependencies along the critical
+    /// path
+    /// of the ScheduleDAG and break them by renaming registers.
+    ///
+    unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits,
+                                   MachineBasicBlock::iterator Begin,
+                                   MachineBasicBlock::iterator End,
+                                   unsigned InsertPosIndex);
+
+    /// Observe - Update liveness information to account for the current
+    /// instruction, which will not be scheduled.
+    ///
+    void Observe(MachineInstr *MI, unsigned Count, unsigned InsertPosIndex);
+
+    /// Finish - Finish anti-dep breaking for a basic block.
+    void FinishBlock();
+
+  private:
+    void PrescanInstruction(MachineInstr *MI);
+    void ScanInstruction(MachineInstr *MI, unsigned Count);
+    bool isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+                                 RegRefIter RegRefEnd,
+                                 unsigned NewReg);
+    unsigned findSuitableFreeRegister(RegRefIter RegRefBegin,
+                                      RegRefIter RegRefEnd,
+                                      unsigned AntiDepReg,
+                                      unsigned LastNewReg,
+                                      const TargetRegisterClass *RC);
+  };
+}
+
+#endif
diff --git a/final/lib/CodeGen/DeadMachineInstructionElim.cpp b/final/lib/CodeGen/DeadMachineInstructionElim.cpp
new file mode 100644
index 00000000000..fdc1d914214
--- /dev/null
+++ b/final/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -0,0 +1,196 @@
+//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an extremely simple MachineInstr-level dead-code-elimination pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegen-dce"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeletes,          "Number of dead instructions deleted");
+
+namespace {
+  class DeadMachineInstructionElim : public MachineFunctionPass {
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    
+    const TargetRegisterInfo *TRI;
+    const MachineRegisterInfo *MRI;
+    const TargetInstrInfo *TII;
+    BitVector LivePhysRegs;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    DeadMachineInstructionElim() : MachineFunctionPass(ID) {
+     initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry());
+    }
+
+  private:
+    bool isDead(const MachineInstr *MI) const;
+  };
+}
+char DeadMachineInstructionElim::ID = 0;
+
+INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
+                "Remove dead machine instructions", false, false)
+
+FunctionPass *llvm::createDeadMachineInstructionElimPass() {
+  return new DeadMachineInstructionElim();
+}
+
+bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
+  // Technically speaking inline asm without side effects and no defs can still
+  // be deleted. But there is so much bad inline asm code out there, we should
+  // let them be.
+  if (MI->isInlineAsm())
+    return false;
+
+  // Don't delete instructions with side effects.
+  bool SawStore = false;
+  if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI())
+    return false;
+
+  // Examine each operand.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isDef()) {
+      unsigned Reg = MO.getReg();
+      if (TargetRegisterInfo::isPhysicalRegister(Reg) ?
+          LivePhysRegs[Reg] : !MRI->use_nodbg_empty(Reg)) {
+        // This def has a non-debug use. Don't delete the instruction!
+        return false;
+      }
+    }
+  }
+
+  // If there are no defs with uses, the instruction is dead.
+  return true;
+}
+
+bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
+  bool AnyChanges = false;
+  MRI = &MF.getRegInfo();
+  TRI = MF.getTarget().getRegisterInfo();
+  TII = MF.getTarget().getInstrInfo();
+
+  // Treat reserved registers as always live.
+  BitVector ReservedRegs = TRI->getReservedRegs(MF);
+
+  // Loop over all instructions in all blocks, from bottom to top, so that it's
+  // more likely that chains of dependent but ultimately dead instructions will
+  // be cleaned up.
+  for (MachineFunction::reverse_iterator I = MF.rbegin(), E = MF.rend();
+       I != E; ++I) {
+    MachineBasicBlock *MBB = &*I;
+
+    // Start out assuming that reserved registers are live out of this block.
+    LivePhysRegs = ReservedRegs;
+
+    // Also add any explicit live-out physregs for this block.
+    if (!MBB->empty() && MBB->back().getDesc().isReturn())
+      for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(),
+           LOE = MRI->liveout_end(); LOI != LOE; ++LOI) {
+        unsigned Reg = *LOI;
+        if (TargetRegisterInfo::isPhysicalRegister(Reg))
+          LivePhysRegs.set(Reg);
+      }
+
+    // FIXME: Add live-ins from sucessors to LivePhysRegs. Normally, physregs
+    // are not live across blocks, but some targets (x86) can have flags live
+    // out of a block.
+
+    // Now scan the instructions and delete dead ones, tracking physreg
+    // liveness as we go.
+    for (MachineBasicBlock::reverse_iterator MII = MBB->rbegin(),
+         MIE = MBB->rend(); MII != MIE; ) {
+      MachineInstr *MI = &*MII;
+
+      // If the instruction is dead, delete it!
+      if (isDead(MI)) {
+        DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI);
+        // It is possible that some DBG_VALUE instructions refer to this
+        // instruction.  Examine each def operand for such references;
+        // if found, mark the DBG_VALUE as undef (but don't delete it).
+        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+          const MachineOperand &MO = MI->getOperand(i);
+          if (!MO.isReg() || !MO.isDef())
+            continue;
+          unsigned Reg = MO.getReg();
+          if (!TargetRegisterInfo::isVirtualRegister(Reg))
+            continue;
+          MachineRegisterInfo::use_iterator nextI;
+          for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
+               E = MRI->use_end(); I!=E; I=nextI) {
+            nextI = llvm::next(I);  // I is invalidated by the setReg
+            MachineOperand& Use = I.getOperand();
+            MachineInstr *UseMI = Use.getParent();
+            if (UseMI==MI)
+              continue;
+            assert(Use.isDebug());
+            UseMI->getOperand(0).setReg(0U);
+          }
+        }
+        AnyChanges = true;
+        MI->eraseFromParent();
+        ++NumDeletes;
+        MIE = MBB->rend();
+        // MII is now pointing to the next instruction to process,
+        // so don't increment it.
+        continue;
+      }
+
+      // Record the physreg defs.
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &MO = MI->getOperand(i);
+        if (MO.isReg() && MO.isDef()) {
+          unsigned Reg = MO.getReg();
+          if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+            LivePhysRegs.reset(Reg);
+            // Check the subreg set, not the alias set, because a def
+            // of a super-register may still be partially live after
+            // this def.
+            for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+                 *SubRegs; ++SubRegs)
+              LivePhysRegs.reset(*SubRegs);
+          }
+        }
+      }
+      // Record the physreg uses, after the defs, in case a physreg is
+      // both defined and used in the same instruction.
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &MO = MI->getOperand(i);
+        if (MO.isReg() && MO.isUse()) {
+          unsigned Reg = MO.getReg();
+          if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+            LivePhysRegs.set(Reg);
+            for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
+                 *AliasSet; ++AliasSet)
+              LivePhysRegs.set(*AliasSet);
+          }
+        }
+      }
+
+      // We didn't delete the current instruction, so increment MII to
+      // the next one.
+      ++MII;
+    }
+  }
+
+  LivePhysRegs.clear();
+  return AnyChanges;
+}
diff --git a/final/lib/CodeGen/DwarfEHPrepare.cpp b/final/lib/CodeGen/DwarfEHPrepare.cpp
new file mode 100644
index 00000000000..0ebb5b0db70
--- /dev/null
+++ b/final/lib/CodeGen/DwarfEHPrepare.cpp
@@ -0,0 +1,670 @@
+//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass mulches exception handling code into a form adapted to code
+// generation. Required if using dwarf exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dwarfehprepare"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+using namespace llvm;
+
+STATISTIC(NumLandingPadsSplit,     "Number of landing pads split");
+STATISTIC(NumUnwindsLowered,       "Number of unwind instructions lowered");
+STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved");
+
+namespace {
+  class DwarfEHPrepare : public FunctionPass {
+    const TargetMachine *TM;
+    const TargetLowering *TLI;
+
+    // The eh.exception intrinsic.
+    Function *ExceptionValueIntrinsic;
+
+    // The eh.selector intrinsic.
+    Function *SelectorIntrinsic;
+
+    // _Unwind_Resume_or_Rethrow or _Unwind_SjLj_Resume call.
+    Constant *URoR;
+
+    // The EH language-specific catch-all type.
+    GlobalVariable *EHCatchAllValue;
+
+    // _Unwind_Resume or the target equivalent.
+    Constant *RewindFunction;
+
+    // We both use and preserve dominator info.
+    DominatorTree *DT;
+
+    // The function we are running on.
+    Function *F;
+
+    // The landing pads for this function.
+    typedef SmallPtrSet<BasicBlock*, 8> BBSet;
+    BBSet LandingPads;
+
+    bool NormalizeLandingPads();
+    bool LowerUnwinds();
+    bool MoveExceptionValueCalls();
+
+    Instruction *CreateExceptionValueCall(BasicBlock *BB);
+
+    /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still
+    /// use the "llvm.eh.catch.all.value" call need to convert to using its
+    /// initializer instead.
+    bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels);
+
+    bool HasCatchAllInSelector(IntrinsicInst *);
+
+    /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
+    void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
+                                 SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels);
+
+    /// FindAllURoRInvokes - Find all URoR invokes in the function.
+    void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes);
+
+    /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
+    /// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to
+    /// a landing pad within the current function. This is a candidate to merge
+    /// the selector associated with the URoR invoke with the one from the
+    /// URoR's landing pad.
+    bool HandleURoRInvokes();
+
+    /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated
+    /// with the eh.exception call. This recursively looks past instructions
+    /// which don't change the EH pointer value, like casts or PHI nodes.
+    bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
+                             SmallPtrSet<IntrinsicInst*, 8> &SelCalls);
+      
+  public:
+    static char ID; // Pass identification, replacement for typeid.
+    DwarfEHPrepare(const TargetMachine *tm) :
+      FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()),
+      ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
+      URoR(0), EHCatchAllValue(0), RewindFunction(0) {
+        initializeDominatorTreePass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual bool runOnFunction(Function &Fn);
+
+    // getAnalysisUsage - We need the dominator tree for handling URoR.
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+      AU.addPreserved<DominatorTree>();
+    }
+
+    const char *getPassName() const {
+      return "Exception handling preparation";
+    }
+
+  };
+} // end anonymous namespace
+
+char DwarfEHPrepare::ID = 0;
+
+FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) {
+  return new DwarfEHPrepare(tm);
+}
+
+/// HasCatchAllInSelector - Return true if the intrinsic instruction has a
+/// catch-all.
+bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) {
+  if (!EHCatchAllValue) return false;
+
+  unsigned ArgIdx = II->getNumArgOperands() - 1;
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(II->getArgOperand(ArgIdx));
+  return GV == EHCatchAllValue;
+}
+
+/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
+void DwarfEHPrepare::
+FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
+                        SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels) {
+  for (Value::use_iterator
+         I = SelectorIntrinsic->use_begin(),
+         E = SelectorIntrinsic->use_end(); I != E; ++I) {
+    IntrinsicInst *II = cast<IntrinsicInst>(*I);
+
+    if (II->getParent()->getParent() != F)
+      continue;
+
+    if (!HasCatchAllInSelector(II))
+      Sels.insert(II);
+    else
+      CatchAllSels.insert(II);
+  }
+}
+
+/// FindAllURoRInvokes - Find all URoR invokes in the function.
+void DwarfEHPrepare::
+FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) {
+  for (Value::use_iterator
+         I = URoR->use_begin(),
+         E = URoR->use_end(); I != E; ++I) {
+    if (InvokeInst *II = dyn_cast<InvokeInst>(*I))
+      URoRInvokes.insert(II);
+  }
+}
+
+/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
+/// the "llvm.eh.catch.all.value" call need to convert to using its
+/// initializer instead.
+bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
+  if (!EHCatchAllValue) return false;
+
+  if (!SelectorIntrinsic) {
+    SelectorIntrinsic =
+      Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
+    if (!SelectorIntrinsic) return false;
+  }
+
+  bool Changed = false;
+  for (SmallPtrSet<IntrinsicInst*, 32>::iterator
+         I = Sels.begin(), E = Sels.end(); I != E; ++I) {
+    IntrinsicInst *Sel = *I;
+
+    // Index of the "llvm.eh.catch.all.value" variable.
+    unsigned OpIdx = Sel->getNumArgOperands() - 1;
+    GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx));
+    if (GV != EHCatchAllValue) continue;
+    Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer());
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+/// FindSelectorAndURoR - Find the eh.selector call associated with the
+/// eh.exception call. And indicate if there is a URoR "invoke" associated with
+/// the eh.exception call. This recursively looks past instructions which don't
+/// change the EH pointer value, like casts or PHI nodes.
+bool
+DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
+                                    SmallPtrSet<IntrinsicInst*, 8> &SelCalls) {
+  SmallPtrSet<PHINode*, 32> SeenPHIs;
+  bool Changed = false;
+
+  for (Value::use_iterator
+         I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) {
+    Instruction *II = dyn_cast<Instruction>(*I);
+    if (!II || II->getParent()->getParent() != F) continue;
+    
+    if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) {
+      if (Sel->getIntrinsicID() == Intrinsic::eh_selector)
+        SelCalls.insert(Sel);
+    } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(II)) {
+      if (Invoke->getCalledFunction() == URoR)
+        URoRInvoke = true;
+    } else if (CastInst *CI = dyn_cast<CastInst>(II)) {
+      Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls);
+    } else if (PHINode *PN = dyn_cast<PHINode>(II)) {
+      if (SeenPHIs.insert(PN))
+        // Don't process a PHI node more than once.
+        Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls);
+    }
+  }
+
+  return Changed;
+}
+
+/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
+/// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to a
+/// landing pad within the current function. This is a candidate to merge the
+/// selector associated with the URoR invoke with the one from the URoR's
+/// landing pad.
+bool DwarfEHPrepare::HandleURoRInvokes() {
+  if (!EHCatchAllValue) {
+    EHCatchAllValue =
+      F->getParent()->getNamedGlobal("llvm.eh.catch.all.value");
+    if (!EHCatchAllValue) return false;
+  }
+
+  if (!SelectorIntrinsic) {
+    SelectorIntrinsic =
+      Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
+    if (!SelectorIntrinsic) return false;
+  }
+
+  SmallPtrSet<IntrinsicInst*, 32> Sels;
+  SmallPtrSet<IntrinsicInst*, 32> CatchAllSels;
+  FindAllCleanupSelectors(Sels, CatchAllSels);
+
+  if (!URoR) {
+    URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
+    if (!URoR) {
+      URoR = F->getParent()->getFunction("_Unwind_SjLj_Resume");
+      if (!URoR) return CleanupSelectors(CatchAllSels);
+    }
+  }
+
+  SmallPtrSet<InvokeInst*, 32> URoRInvokes;
+  FindAllURoRInvokes(URoRInvokes);
+
+  SmallPtrSet<IntrinsicInst*, 32> SelsToConvert;
+
+  for (SmallPtrSet<IntrinsicInst*, 32>::iterator
+         SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) {
+    const BasicBlock *SelBB = (*SI)->getParent();
+    for (SmallPtrSet<InvokeInst*, 32>::iterator
+           UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) {
+      const BasicBlock *URoRBB = (*UI)->getParent();
+      if (DT->dominates(SelBB, URoRBB)) {
+        SelsToConvert.insert(*SI);
+        break;
+      }
+    }
+  }
+
+  bool Changed = false;
+
+  if (Sels.size() != SelsToConvert.size()) {
+    // If we haven't been able to convert all of the clean-up selectors, then
+    // loop through the slow way to see if they still need to be converted.
+    if (!ExceptionValueIntrinsic) {
+      ExceptionValueIntrinsic =
+        Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception);
+      if (!ExceptionValueIntrinsic)
+        return CleanupSelectors(CatchAllSels);
+    }
+
+    for (Value::use_iterator
+           I = ExceptionValueIntrinsic->use_begin(),
+           E = ExceptionValueIntrinsic->use_end(); I != E; ++I) {
+      IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(*I);
+      if (!EHPtr || EHPtr->getParent()->getParent() != F) continue;
+
+      bool URoRInvoke = false;
+      SmallPtrSet<IntrinsicInst*, 8> SelCalls;
+      Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls);
+
+      if (URoRInvoke) {
+        // This EH pointer is being used by an invoke of an URoR instruction and
+        // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we
+        // need to convert it to a 'catch-all'.
+        for (SmallPtrSet<IntrinsicInst*, 8>::iterator
+               SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI)
+          if (!HasCatchAllInSelector(*SI))
+              SelsToConvert.insert(*SI);
+      }
+    }
+  }
+
+  if (!SelsToConvert.empty()) {
+    // Convert all clean-up eh.selectors, which are associated with "invokes" of
+    // URoR calls, into catch-all eh.selectors.
+    Changed = true;
+
+    for (SmallPtrSet<IntrinsicInst*, 8>::iterator
+           SI = SelsToConvert.begin(), SE = SelsToConvert.end();
+         SI != SE; ++SI) {
+      IntrinsicInst *II = *SI;
+
+      // Use the exception object pointer and the personality function
+      // from the original selector.
+      CallSite CS(II);
+      IntrinsicInst::op_iterator I = CS.arg_begin();
+      IntrinsicInst::op_iterator E = CS.arg_end();
+      IntrinsicInst::op_iterator B = prior(E);
+
+      // Exclude last argument if it is an integer.
+      if (isa<ConstantInt>(B)) E = B;
+
+      // Add exception object pointer (front).
+      // Add personality function (next).
+      // Add in any filter IDs (rest).
+      SmallVector<Value*, 8> Args(I, E);
+
+      Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator.
+
+      CallInst *NewSelector =
+        CallInst::Create(SelectorIntrinsic, Args.begin(), Args.end(),
+                         "eh.sel.catch.all", II);
+
+      NewSelector->setTailCall(II->isTailCall());
+      NewSelector->setAttributes(II->getAttributes());
+      NewSelector->setCallingConv(II->getCallingConv());
+
+      II->replaceAllUsesWith(NewSelector);
+      II->eraseFromParent();
+    }
+  }
+
+  Changed |= CleanupSelectors(CatchAllSels);
+  return Changed;
+}
+
+/// NormalizeLandingPads - Normalize and discover landing pads, noting them
+/// in the LandingPads set.  A landing pad is normal if the only CFG edges
+/// that end at it are unwind edges from invoke instructions. If we inlined
+/// through an invoke we could have a normal branch from the previous
+/// unwind block through to the landing pad for the original invoke.
+/// Abnormal landing pads are fixed up by redirecting all unwind edges to
+/// a new basic block which falls through to the original.
+bool DwarfEHPrepare::NormalizeLandingPads() {
+  bool Changed = false;
+
+  const MCAsmInfo *MAI = TM->getMCAsmInfo();
+  bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
+
+  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+    TerminatorInst *TI = I->getTerminator();
+    if (!isa<InvokeInst>(TI))
+      continue;
+    BasicBlock *LPad = TI->getSuccessor(1);
+    // Skip landing pads that have already been normalized.
+    if (LandingPads.count(LPad))
+      continue;
+
+    // Check that only invoke unwind edges end at the landing pad.
+    bool OnlyUnwoundTo = true;
+    bool SwitchOK = usingSjLjEH;
+    for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad);
+         PI != PE; ++PI) {
+      TerminatorInst *PT = (*PI)->getTerminator();
+      // The SjLj dispatch block uses a switch instruction. This is effectively
+      // an unwind edge, so we can disregard it here. There will only ever
+      // be one dispatch, however, so if there are multiple switches, one
+      // of them truly is a normal edge, not an unwind edge.
+      if (SwitchOK && isa<SwitchInst>(PT)) {
+        SwitchOK = false;
+        continue;
+      }
+      if (!isa<InvokeInst>(PT) || LPad == PT->getSuccessor(0)) {
+        OnlyUnwoundTo = false;
+        break;
+      }
+    }
+
+    if (OnlyUnwoundTo) {
+      // Only unwind edges lead to the landing pad.  Remember the landing pad.
+      LandingPads.insert(LPad);
+      continue;
+    }
+
+    // At least one normal edge ends at the landing pad.  Redirect the unwind
+    // edges to a new basic block which falls through into this one.
+
+    // Create the new basic block.
+    BasicBlock *NewBB = BasicBlock::Create(F->getContext(),
+                                           LPad->getName() + "_unwind_edge");
+
+    // Insert it into the function right before the original landing pad.
+    LPad->getParent()->getBasicBlockList().insert(LPad, NewBB);
+
+    // Redirect unwind edges from the original landing pad to NewBB.
+    for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) {
+      TerminatorInst *PT = (*PI++)->getTerminator();
+      if (isa<InvokeInst>(PT) && PT->getSuccessor(1) == LPad)
+        // Unwind to the new block.
+        PT->setSuccessor(1, NewBB);
+    }
+
+    // If there are any PHI nodes in LPad, we need to update them so that they
+    // merge incoming values from NewBB instead.
+    for (BasicBlock::iterator II = LPad->begin(); isa<PHINode>(II); ++II) {
+      PHINode *PN = cast<PHINode>(II);
+      pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB);
+
+      // Check to see if all of the values coming in via unwind edges are the
+      // same.  If so, we don't need to create a new PHI node.
+      Value *InVal = PN->getIncomingValueForBlock(*PB);
+      for (pred_iterator PI = PB; PI != PE; ++PI) {
+        if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) {
+          InVal = 0;
+          break;
+        }
+      }
+
+      if (InVal == 0) {
+        // Different unwind edges have different values.  Create a new PHI node
+        // in NewBB.
+        PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".unwind",
+                                         NewBB);
+        // Add an entry for each unwind edge, using the value from the old PHI.
+        for (pred_iterator PI = PB; PI != PE; ++PI)
+          NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI);
+
+        // Now use this new PHI as the common incoming value for NewBB in PN.
+        InVal = NewPN;
+      }
+
+      // Revector exactly one entry in the PHI node to come from NewBB
+      // and delete all other entries that come from unwind edges.  If
+      // there are both normal and unwind edges from the same predecessor,
+      // this leaves an entry for the normal edge.
+      for (pred_iterator PI = PB; PI != PE; ++PI)
+        PN->removeIncomingValue(*PI);
+      PN->addIncoming(InVal, NewBB);
+    }
+
+    // Add a fallthrough from NewBB to the original landing pad.
+    BranchInst::Create(LPad, NewBB);
+
+    // Now update DominatorTree analysis information.
+    DT->splitBlock(NewBB);
+
+    // Remember the newly constructed landing pad.  The original landing pad
+    // LPad is no longer a landing pad now that all unwind edges have been
+    // revectored to NewBB.
+    LandingPads.insert(NewBB);
+    ++NumLandingPadsSplit;
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume,
+/// rethrowing any previously caught exception.  This will crash horribly
+/// at runtime if there is no such exception: using unwind to throw a new
+/// exception is currently not supported.
+bool DwarfEHPrepare::LowerUnwinds() {
+  SmallVector<TerminatorInst*, 16> UnwindInsts;
+
+  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+    TerminatorInst *TI = I->getTerminator();
+    if (isa<UnwindInst>(TI))
+      UnwindInsts.push_back(TI);
+  }
+
+  if (UnwindInsts.empty()) return false;
+
+  // Find the rewind function if we didn't already.
+  if (!RewindFunction) {
+    LLVMContext &Ctx = UnwindInsts[0]->getContext();
+    std::vector<const Type*>
+      Params(1, Type::getInt8PtrTy(Ctx));
+    FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
+                                          Params, false);
+    const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
+    RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
+  }
+
+  bool Changed = false;
+
+  for (SmallVectorImpl<TerminatorInst*>::iterator
+         I = UnwindInsts.begin(), E = UnwindInsts.end(); I != E; ++I) {
+    TerminatorInst *TI = *I;
+
+    // Replace the unwind instruction with a call to _Unwind_Resume (or the
+    // appropriate target equivalent) followed by an UnreachableInst.
+
+    // Create the call...
+    CallInst *CI = CallInst::Create(RewindFunction,
+                                    CreateExceptionValueCall(TI->getParent()),
+                                    "", TI);
+    CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME));
+    // ...followed by an UnreachableInst.
+    new UnreachableInst(TI->getContext(), TI);
+
+    // Nuke the unwind instruction.
+    TI->eraseFromParent();
+    ++NumUnwindsLowered;
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from
+/// landing pads by replacing calls outside of landing pads with direct use of
+/// a register holding the appropriate value; this requires adding calls inside
+/// all landing pads to initialize the register.  Also, move eh.exception calls
+/// inside landing pads to the start of the landing pad (optional, but may make
+/// things simpler for later passes).
+bool DwarfEHPrepare::MoveExceptionValueCalls() {
+  // If the eh.exception intrinsic is not declared in the module then there is
+  // nothing to do.  Speed up compilation by checking for this common case.
+  if (!ExceptionValueIntrinsic &&
+      !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception)))
+    return false;
+
+  bool Changed = false;
+
+  // Move calls to eh.exception that are inside a landing pad to the start of
+  // the landing pad.
+  for (BBSet::const_iterator LI = LandingPads.begin(), LE = LandingPads.end();
+       LI != LE; ++LI) {
+    BasicBlock *LP = *LI;
+    for (BasicBlock::iterator II = LP->getFirstNonPHIOrDbg(), IE = LP->end();
+         II != IE;)
+      if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
+        // Found a call to eh.exception.
+        if (!EI->use_empty()) {
+          // If there is already a call to eh.exception at the start of the
+          // landing pad, then get hold of it; otherwise create such a call.
+          Value *CallAtStart = CreateExceptionValueCall(LP);
+
+          // If the call was at the start of a landing pad then leave it alone.
+          if (EI == CallAtStart)
+            continue;
+          EI->replaceAllUsesWith(CallAtStart);
+        }
+        EI->eraseFromParent();
+        ++NumExceptionValuesMoved;
+        Changed = true;
+      }
+  }
+
+  // Look for calls to eh.exception that are not in a landing pad.  If one is
+  // found, then a register that holds the exception value will be created in
+  // each landing pad, and the SSAUpdater will be used to compute the values
+  // returned by eh.exception calls outside of landing pads.
+  SSAUpdater SSA;
+
+  // Remember where we found the eh.exception call, to avoid rescanning earlier
+  // basic blocks which we already know contain no eh.exception calls.
+  bool FoundCallOutsideLandingPad = false;
+  Function::iterator BB = F->begin();
+  for (Function::iterator BE = F->end(); BB != BE; ++BB) {
+    // Skip over landing pads.
+    if (LandingPads.count(BB))
+      continue;
+
+    for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+         II != IE; ++II)
+      if (isa<EHExceptionInst>(II)) {
+        SSA.Initialize(II->getType(), II->getName());
+        FoundCallOutsideLandingPad = true;
+        break;
+      }
+
+    if (FoundCallOutsideLandingPad)
+      break;
+  }
+
+  // If all calls to eh.exception are in landing pads then we are done.
+  if (!FoundCallOutsideLandingPad)
+    return Changed;
+
+  // Add a call to eh.exception at the start of each landing pad, and tell the
+  // SSAUpdater that this is the value produced by the landing pad.
+  for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end();
+       LI != LE; ++LI)
+    SSA.AddAvailableValue(*LI, CreateExceptionValueCall(*LI));
+
+  // Now turn all calls to eh.exception that are not in a landing pad into a use
+  // of the appropriate register.
+  for (Function::iterator BE = F->end(); BB != BE; ++BB) {
+    // Skip over landing pads.
+    if (LandingPads.count(BB))
+      continue;
+
+    for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end();
+         II != IE;)
+      if (EHExceptionInst *EI = dyn_cast<EHExceptionInst>(II++)) {
+        // Found a call to eh.exception, replace it with the value from any
+        // upstream landing pad(s).
+        EI->replaceAllUsesWith(SSA.GetValueAtEndOfBlock(BB));
+        EI->eraseFromParent();
+        ++NumExceptionValuesMoved;
+      }
+  }
+
+  return true;
+}
+
+/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at
+/// the start of the basic block (unless there already is one, in which case
+/// the existing call is returned).
+Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) {
+  Instruction *Start = BB->getFirstNonPHIOrDbg();
+  // Is this a call to eh.exception?
+  if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Start))
+    if (CI->getIntrinsicID() == Intrinsic::eh_exception)
+      // Reuse the existing call.
+      return Start;
+
+  // Find the eh.exception intrinsic if we didn't already.
+  if (!ExceptionValueIntrinsic)
+    ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(),
+                                                       Intrinsic::eh_exception);
+
+  // Create the call.
+  return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start);
+}
+
+bool DwarfEHPrepare::runOnFunction(Function &Fn) {
+  bool Changed = false;
+
+  // Initialize internal state.
+  DT = &getAnalysis<DominatorTree>();
+  F = &Fn;
+
+  // Ensure that only unwind edges end at landing pads (a landing pad is a
+  // basic block where an invoke unwind edge ends).
+  Changed |= NormalizeLandingPads();
+
+  // Turn unwind instructions into libcalls.
+  Changed |= LowerUnwinds();
+
+  // TODO: Move eh.selector calls to landing pads and combine them.
+
+  // Move eh.exception calls to landing pads.
+  Changed |= MoveExceptionValueCalls();
+
+  Changed |= HandleURoRInvokes();
+
+  LandingPads.clear();
+
+  return Changed;
+}
diff --git a/final/lib/CodeGen/ELF.h b/final/lib/CodeGen/ELF.h
new file mode 100644
index 00000000000..e08feeb2753
--- /dev/null
+++ b/final/lib/CodeGen/ELF.h
@@ -0,0 +1,227 @@
+//===-- lib/CodeGen/ELF.h - ELF constants and data structures ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header contains common, non-processor-specific data structures and
+// constants for the ELF file format.
+//
+// The details of the ELF32 bits in this file are largely based on the Tool
+// Interface Standard (TIS) Executable and Linking Format (ELF) Specification
+// Version 1.2, May 1995. The ELF64 is based on HP/Intel definition of the
+// ELF-64 object file format document, Version 1.5 Draft 2 May 27, 1998
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_ELF_H
+#define CODEGEN_ELF_H
+
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+  class GlobalValue;
+
+  /// ELFSym - This struct contains information about each symbol that is
+  /// added to logical symbol table for the module.  This is eventually
+  /// turned into a real symbol table in the file.
+  struct ELFSym {
+
+    // ELF symbols are related to llvm ones by being one of the two llvm
+    // types, for the other ones (section, file, func) a null pointer is
+    // assumed by default.
+    union {
+      const GlobalValue *GV;  // If this is a pointer to a GV
+      const char *Ext;        // If this is a pointer to a named symbol
+    } Source;
+
+    // Describes from which source type this ELF symbol comes from,
+    // they can be GlobalValue, ExternalSymbol or neither.
+    enum {
+      isGV,      // The Source.GV field is valid.
+      isExtSym,  // The Source.ExtSym field is valid.
+      isOther    // Not a GlobalValue or External Symbol
+    };
+    unsigned SourceType;
+
+    bool isGlobalValue() const { return SourceType == isGV; }
+    bool isExternalSym() const { return SourceType == isExtSym; }
+
+    // getGlobalValue - If this is a global value which originated the
+    // elf symbol, return a reference to it.
+    const GlobalValue *getGlobalValue() const {
+      assert(SourceType == isGV && "This is not a global value");
+      return Source.GV;
+    }
+
+    // getExternalSym - If this is an external symbol which originated the
+    // elf symbol, return a reference to it.
+    const char *getExternalSymbol() const {
+      assert(SourceType == isExtSym && "This is not an external symbol");
+      return Source.Ext;
+    }
+
+    // getGV - From a global value return a elf symbol to represent it
+    static ELFSym *getGV(const GlobalValue *GV, unsigned Bind,
+                         unsigned Type, unsigned Visibility) {
+      ELFSym *Sym = new ELFSym();
+      Sym->Source.GV = GV;
+      Sym->setBind(Bind);
+      Sym->setType(Type);
+      Sym->setVisibility(Visibility);
+      Sym->SourceType = isGV;
+      return Sym;
+    }
+
+    // getExtSym - Create and return an elf symbol to represent an
+    // external symbol
+    static ELFSym *getExtSym(const char *Ext) {
+      ELFSym *Sym = new ELFSym();
+      Sym->Source.Ext = Ext;
+      Sym->setBind(ELF::STB_GLOBAL);
+      Sym->setType(ELF::STT_NOTYPE);
+      Sym->setVisibility(ELF::STV_DEFAULT);
+      Sym->SourceType = isExtSym;
+      return Sym;
+    }
+
+    // getSectionSym - Returns a elf symbol to represent an elf section
+    static ELFSym *getSectionSym() {
+      ELFSym *Sym = new ELFSym();
+      Sym->setBind(ELF::STB_LOCAL);
+      Sym->setType(ELF::STT_SECTION);
+      Sym->setVisibility(ELF::STV_DEFAULT);
+      Sym->SourceType = isOther;
+      return Sym;
+    }
+
+    // getFileSym - Returns a elf symbol to represent the module identifier
+    static ELFSym *getFileSym() {
+      ELFSym *Sym = new ELFSym();
+      Sym->setBind(ELF::STB_LOCAL);
+      Sym->setType(ELF::STT_FILE);
+      Sym->setVisibility(ELF::STV_DEFAULT);
+      Sym->SectionIdx = 0xfff1;  // ELFSection::SHN_ABS;
+      Sym->SourceType = isOther;
+      return Sym;
+    }
+
+    // getUndefGV - Returns a STT_NOTYPE symbol
+    static ELFSym *getUndefGV(const GlobalValue *GV, unsigned Bind) {
+      ELFSym *Sym = new ELFSym();
+      Sym->Source.GV = GV;
+      Sym->setBind(Bind);
+      Sym->setType(ELF::STT_NOTYPE);
+      Sym->setVisibility(ELF::STV_DEFAULT);
+      Sym->SectionIdx = 0;  //ELFSection::SHN_UNDEF;
+      Sym->SourceType = isGV;
+      return Sym;
+    }
+
+    // ELF specific fields
+    unsigned NameIdx;         // Index in .strtab of name, once emitted.
+    uint64_t Value;
+    unsigned Size;
+    uint8_t Info;
+    uint8_t Other;
+    unsigned short SectionIdx;
+
+    // Symbol index into the Symbol table
+    unsigned SymTabIdx;
+
+    ELFSym() : SourceType(isOther), NameIdx(0), Value(0),
+               Size(0), Info(0), Other(ELF::STV_DEFAULT), SectionIdx(0),
+               SymTabIdx(0) {}
+
+    unsigned getBind() const { return (Info >> 4) & 0xf; }
+    unsigned getType() const { return Info & 0xf; }
+    bool isLocalBind() const { return getBind() == ELF::STB_LOCAL; }
+    bool isFileType() const { return getType() == ELF::STT_FILE; }
+
+    void setBind(unsigned X) {
+      assert(X == (X & 0xF) && "Bind value out of range!");
+      Info = (Info & 0x0F) | (X << 4);
+    }
+
+    void setType(unsigned X) {
+      assert(X == (X & 0xF) && "Type value out of range!");
+      Info = (Info & 0xF0) | X;
+    }
+
+    void setVisibility(unsigned V) {
+      assert(V == (V & 0x3) && "Visibility value out of range!");
+      Other = V;
+    }
+  };
+
+  /// ELFSection - This struct contains information about each section that is
+  /// emitted to the file.  This is eventually turned into the section header
+  /// table at the end of the file.
+  class ELFSection : public BinaryObject {
+    public:
+    // ELF specific fields
+    unsigned NameIdx;   // sh_name - .shstrtab idx of name, once emitted.
+    unsigned Type;      // sh_type - Section contents & semantics 
+    unsigned Flags;     // sh_flags - Section flags.
+    uint64_t Addr;      // sh_addr - The mem addr this section is in.
+    unsigned Offset;    // sh_offset - Offset from the file start
+    unsigned Size;      // sh_size - The section size.
+    unsigned Link;      // sh_link - Section header table index link.
+    unsigned Info;      // sh_info - Auxillary information.
+    unsigned Align;     // sh_addralign - Alignment of section.
+    unsigned EntSize;   // sh_entsize - Size of entries in the section e
+
+    /// SectionIdx - The number of the section in the Section Table.
+    unsigned short SectionIdx;
+
+    /// Sym - The symbol to represent this section if it has one.
+    ELFSym *Sym;
+
+    /// getSymIndex - Returns the symbol table index of the symbol
+    /// representing this section.
+    unsigned getSymbolTableIndex() const {
+      assert(Sym && "section not present in the symbol table");
+      return Sym->SymTabIdx;
+    }
+
+    ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit)
+      : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0),
+        Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0), Sym(0) {}
+  };
+
+  /// ELFRelocation - This class contains all the information necessary to
+  /// to generate any 32-bit or 64-bit ELF relocation entry.
+  class ELFRelocation {
+    uint64_t r_offset;    // offset in the section of the object this applies to
+    uint32_t r_symidx;    // symbol table index of the symbol to use
+    uint32_t r_type;      // machine specific relocation type
+    int64_t  r_add;       // explicit relocation addend
+    bool     r_rela;      // if true then the addend is part of the entry
+                          // otherwise the addend is at the location specified
+                          // by r_offset
+  public:
+    uint64_t getInfo(bool is64Bit) const {
+      if (is64Bit)
+        return ((uint64_t)r_symidx << 32) + ((uint64_t)r_type & 0xFFFFFFFFL);
+      else
+        return (r_symidx << 8)  + (r_type & 0xFFL);
+    }
+
+    uint64_t getOffset() const { return r_offset; }
+    int64_t getAddend() const { return r_add; }
+
+    ELFRelocation(uint64_t off, uint32_t sym, uint32_t type,
+                  bool rela = true, int64_t addend = 0) :
+      r_offset(off), r_symidx(sym), r_type(type),
+      r_add(addend), r_rela(rela) {}
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/CodeGen/ELFCodeEmitter.cpp b/final/lib/CodeGen/ELFCodeEmitter.cpp
new file mode 100644
index 00000000000..3fb087c5ea8
--- /dev/null
+++ b/final/lib/CodeGen/ELFCodeEmitter.cpp
@@ -0,0 +1,205 @@
+//===-- lib/CodeGen/ELFCodeEmitter.cpp ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "elfce"
+
+#include "ELF.h"
+#include "ELFWriter.h"
+#include "ELFCodeEmitter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+//===----------------------------------------------------------------------===//
+//                       ELFCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+/// startFunction - This callback is invoked when a new machine function is
+/// about to be emitted.
+void ELFCodeEmitter::startFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "processing function: "
+        << MF.getFunction()->getName() << "\n");
+
+  // Get the ELF Section that this function belongs in.
+  ES = &EW.getTextSection(MF.getFunction());
+
+  // Set the desired binary object to be used by the code emitters
+  setBinaryObject(ES);
+
+  // Get the function alignment in bytes
+  unsigned Align = (1 << MF.getAlignment());
+
+  // The function must start on its required alignment
+  ES->emitAlignment(Align);
+
+  // Update the section alignment if needed.
+  ES->Align = std::max(ES->Align, Align);
+
+  // Record the function start offset
+  FnStartOff = ES->getCurrentPCOffset();
+
+  // Emit constant pool and jump tables to their appropriate sections.
+  // They need to be emitted before the function because in some targets
+  // the later may reference JT or CP entry address.
+  emitConstantPool(MF.getConstantPool());
+  if (MF.getJumpTableInfo())
+    emitJumpTables(MF.getJumpTableInfo());
+}
+
+/// finishFunction - This callback is invoked after the function is completely
+/// finished.
+bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
+  // Add a symbol to represent the function.
+  const Function *F = MF.getFunction();
+  ELFSym *FnSym = ELFSym::getGV(F, EW.getGlobalELFBinding(F), ELF::STT_FUNC,
+                                EW.getGlobalELFVisibility(F));
+  FnSym->SectionIdx = ES->SectionIdx;
+  FnSym->Size = ES->getCurrentPCOffset()-FnStartOff;
+  EW.AddPendingGlobalSymbol(F, true);
+
+  // Offset from start of Section
+  FnSym->Value = FnStartOff;
+
+  if (!F->hasPrivateLinkage())
+    EW.SymbolList.push_back(FnSym);
+
+  // Patch up Jump Table Section relocations to use the real MBBs offsets
+  // now that the MBB label offsets inside the function are known.
+  if (MF.getJumpTableInfo()) {
+    ELFSection &JTSection = EW.getJumpTableSection();
+    for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(),
+         MRE = JTRelocations.end(); MRI != MRE; ++MRI) {
+      MachineRelocation &MR = *MRI;
+      uintptr_t MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock());
+      MR.setResultPointer((void*)MBBOffset);
+      MR.setConstantVal(ES->SectionIdx);
+      JTSection.addRelocation(MR);
+    }
+  }
+
+  // If we have emitted any relocations to function-specific objects such as
+  // basic blocks, constant pools entries, or jump tables, record their
+  // addresses now so that we can rewrite them with the correct addresses later
+  for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
+    MachineRelocation &MR = Relocations[i];
+    intptr_t Addr;
+    if (MR.isGlobalValue()) {
+      EW.AddPendingGlobalSymbol(MR.getGlobalValue());
+    } else if (MR.isExternalSymbol()) {
+      EW.AddPendingExternalSymbol(MR.getExternalSymbol());
+    } else if (MR.isBasicBlock()) {
+      Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
+      MR.setConstantVal(ES->SectionIdx);
+      MR.setResultPointer((void*)Addr);
+    } else if (MR.isConstantPoolIndex()) {
+      Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+      MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
+      MR.setResultPointer((void*)Addr);
+    } else if (MR.isJumpTableIndex()) {
+      ELFSection &JTSection = EW.getJumpTableSection();
+      Addr = getJumpTableEntryAddress(MR.getJumpTableIndex());
+      MR.setConstantVal(JTSection.SectionIdx);
+      MR.setResultPointer((void*)Addr);
+    } else {
+      llvm_unreachable("Unhandled relocation type");
+    }
+    ES->addRelocation(MR);
+  }
+
+  // Clear per-function data structures.
+  JTRelocations.clear();
+  Relocations.clear();
+  CPLocations.clear();
+  CPSections.clear();
+  JTLocations.clear();
+  MBBLocations.clear();
+  return false;
+}
+
+/// emitConstantPool - For each constant pool entry, figure out which section
+/// the constant should live in and emit the constant
+void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
+  const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+  if (CP.empty()) return;
+
+  // TODO: handle PIC codegen
+  assert(TM.getRelocationModel() != Reloc::PIC_ &&
+         "PIC codegen not yet handled for elf constant pools!");
+
+  for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+    MachineConstantPoolEntry CPE = CP[i];
+
+    // Record the constant pool location and the section index
+    ELFSection &CstPool = EW.getConstantPoolSection(CPE);
+    CPLocations.push_back(CstPool.size());
+    CPSections.push_back(CstPool.SectionIdx);
+
+    if (CPE.isMachineConstantPoolEntry())
+      assert("CPE.isMachineConstantPoolEntry not supported yet");
+
+    // Emit the constant to constant pool section
+    EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPool);
+  }
+}
+
+/// emitJumpTables - Emit all the jump tables for a given jump table info
+/// record to the appropriate section.
+void ELFCodeEmitter::emitJumpTables(MachineJumpTableInfo *MJTI) {
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  if (JT.empty()) return;
+
+  // FIXME: handle PIC codegen
+  assert(TM.getRelocationModel() != Reloc::PIC_ &&
+         "PIC codegen not yet handled for elf jump tables!");
+
+  const TargetELFWriterInfo *TEW = TM.getELFWriterInfo();
+  unsigned EntrySize = 4; //MJTI->getEntrySize();
+
+  // Get the ELF Section to emit the jump table
+  ELFSection &JTSection = EW.getJumpTableSection();
+
+  // For each JT, record its offset from the start of the section
+  for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+    const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+
+    // Record JT 'i' offset in the JT section
+    JTLocations.push_back(JTSection.size());
+
+    // Each MBB entry in the Jump table section has a relocation entry
+    // against the current text section.
+    for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+      unsigned MachineRelTy = TEW->getAbsoluteLabelMachineRelTy();
+      MachineRelocation MR =
+        MachineRelocation::getBB(JTSection.size(), MachineRelTy, MBBs[mi]);
+
+      // Add the relocation to the Jump Table section
+      JTRelocations.push_back(MR);
+
+      // Output placeholder for MBB in the JT section
+      for (unsigned s=0; s < EntrySize; ++s)
+        JTSection.emitByte(0);
+    }
+  }
+}
+
+} // end namespace llvm
diff --git a/final/lib/CodeGen/ELFCodeEmitter.h b/final/lib/CodeGen/ELFCodeEmitter.h
new file mode 100644
index 00000000000..2ec1f6e873d
--- /dev/null
+++ b/final/lib/CodeGen/ELFCodeEmitter.h
@@ -0,0 +1,78 @@
+//===-- lib/CodeGen/ELFCodeEmitter.h ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ELFCODEEMITTER_H
+#define ELFCODEEMITTER_H
+
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+#include <vector>
+
+namespace llvm {
+  class ELFWriter;
+  class ELFSection;
+
+  /// ELFCodeEmitter - This class is used by the ELFWriter to 
+  /// emit the code for functions to the ELF file.
+  class ELFCodeEmitter : public ObjectCodeEmitter {
+    ELFWriter &EW;
+
+    /// Target machine description
+    TargetMachine &TM;
+
+    /// Section containing code for functions
+    ELFSection *ES;
+
+    /// Relocations - Record relocations needed by the current function 
+    std::vector<MachineRelocation> Relocations;
+
+    /// JTRelocations - Record relocations needed by the relocation
+    /// section.
+    std::vector<MachineRelocation> JTRelocations;
+
+    /// FnStartPtr - Function offset from the beginning of ELFSection 'ES'
+    uintptr_t FnStartOff;
+  public:
+    explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {}
+
+    /// addRelocation - Register new relocations for this function
+    void addRelocation(const MachineRelocation &MR) {
+      Relocations.push_back(MR);
+    }
+
+    /// emitConstantPool - For each constant pool entry, figure out which
+    /// section the constant should live in and emit data to it
+    void emitConstantPool(MachineConstantPool *MCP);
+
+    /// emitJumpTables - Emit all the jump tables for a given jump table
+    /// info and record them to the appropriate section.
+    void emitJumpTables(MachineJumpTableInfo *MJTI);
+
+    void startFunction(MachineFunction &F);
+    bool finishFunction(MachineFunction &F);
+
+    /// emitLabel - Emits a label
+    virtual void emitLabel(MCSymbol *Label) {
+      assert("emitLabel not implemented");
+    }
+
+    /// getLabelAddress - Return the address of the specified LabelID, 
+    /// only usable after the LabelID has been emitted.
+    virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
+      assert("getLabelAddress not implemented");
+      return 0;
+    }
+
+    virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) {}
+
+};  // end class ELFCodeEmitter
+
+} // end namespace llvm
+
+#endif
+
diff --git a/final/lib/CodeGen/ELFWriter.cpp b/final/lib/CodeGen/ELFWriter.cpp
new file mode 100644
index 00000000000..0fd1e8e83bd
--- /dev/null
+++ b/final/lib/CodeGen/ELFWriter.cpp
@@ -0,0 +1,1103 @@
+//===-- ELFWriter.cpp - Target-independent ELF Writer code ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the target-independent ELF writer.  This file writes out
+// the ELF file in the following order:
+//
+//  #1. ELF Header
+//  #2. '.text' section
+//  #3. '.data' section
+//  #4. '.bss' section  (conceptual position in file)
+//  ...
+//  #X. '.shstrtab' section
+//  #Y. Section Table
+//
+// The entries in the section table are laid out as:
+//  #0. Null entry [required]
+//  #1. ".text" entry - the program code
+//  #2. ".data" entry - global variables with initializers.     [ if needed ]
+//  #3. ".bss" entry  - global variables without initializers.  [ if needed ]
+//  ...
+//  #N. ".shstrtab" entry - String table for the section names.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "elfwriter"
+#include "ELF.h"
+#include "ELFWriter.h"
+#include "ELFCodeEmitter.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+char ELFWriter::ID = 0;
+
+//===----------------------------------------------------------------------===//
+//                          ELFWriter Implementation
+//===----------------------------------------------------------------------===//
+
+ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
+  : MachineFunctionPass(ID), O(o), TM(tm),
+    OutContext(*new MCContext(*TM.getMCAsmInfo(), new TargetAsmInfo(tm))),
+    TLOF(TM.getTargetLowering()->getObjFileLowering()),
+    is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
+    isLittleEndian(TM.getTargetData()->isLittleEndian()),
+    ElfHdr(isLittleEndian, is64Bit) {
+
+  MAI = TM.getMCAsmInfo();
+  TEW = TM.getELFWriterInfo();
+
+  // Create the object code emitter object for this target.
+  ElfCE = new ELFCodeEmitter(*this);
+
+  // Inital number of sections
+  NumSections = 0;
+}
+
+ELFWriter::~ELFWriter() {
+  delete ElfCE;
+  delete &OutContext;
+
+  while(!SymbolList.empty()) {
+    delete SymbolList.back(); 
+    SymbolList.pop_back();
+  }
+
+  while(!PrivateSyms.empty()) {
+    delete PrivateSyms.back(); 
+    PrivateSyms.pop_back();
+  }
+
+  while(!SectionList.empty()) {
+    delete SectionList.back(); 
+    SectionList.pop_back();
+  }
+
+  // Release the name mangler object.
+  delete Mang; Mang = 0;
+}
+
+// doInitialization - Emit the file header and all of the global variables for
+// the module to the ELF file.
+bool ELFWriter::doInitialization(Module &M) {
+  // Initialize TargetLoweringObjectFile.
+  const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(OutContext, TM);
+  
+  Mang = new Mangler(OutContext, *TM.getTargetData());
+
+  // ELF Header
+  // ----------
+  // Fields e_shnum e_shstrndx are only known after all section have
+  // been emitted. They locations in the ouput buffer are recorded so
+  // to be patched up later.
+  //
+  // Note
+  // ----
+  // emitWord method behaves differently for ELF32 and ELF64, writing
+  // 4 bytes in the former and 8 in the last for *_off and *_addr elf types
+
+  ElfHdr.emitByte(0x7f); // e_ident[EI_MAG0]
+  ElfHdr.emitByte('E');  // e_ident[EI_MAG1]
+  ElfHdr.emitByte('L');  // e_ident[EI_MAG2]
+  ElfHdr.emitByte('F');  // e_ident[EI_MAG3]
+
+  ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS]
+  ElfHdr.emitByte(TEW->getEIData());  // e_ident[EI_DATA]
+  ElfHdr.emitByte(ELF::EV_CURRENT);   // e_ident[EI_VERSION]
+  ElfHdr.emitAlignment(16);           // e_ident[EI_NIDENT-EI_PAD]
+
+  ElfHdr.emitWord16(ELF::ET_REL);        // e_type
+  ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target
+  ElfHdr.emitWord32(ELF::EV_CURRENT);    // e_version
+  ElfHdr.emitWord(0);                    // e_entry, no entry point in .o file
+  ElfHdr.emitWord(0);                    // e_phoff, no program header for .o
+  ELFHdr_e_shoff_Offset = ElfHdr.size();
+  ElfHdr.emitWord(0);                    // e_shoff = sec hdr table off in bytes
+  ElfHdr.emitWord32(TEW->getEFlags());   // e_flags = whatever the target wants
+  ElfHdr.emitWord16(TEW->getHdrSize());  // e_ehsize = ELF header size
+  ElfHdr.emitWord16(0);                  // e_phentsize = prog header entry size
+  ElfHdr.emitWord16(0);                  // e_phnum = # prog header entries = 0
+
+  // e_shentsize = Section header entry size
+  ElfHdr.emitWord16(TEW->getSHdrSize());
+
+  // e_shnum     = # of section header ents
+  ELFHdr_e_shnum_Offset = ElfHdr.size();
+  ElfHdr.emitWord16(0); // Placeholder
+
+  // e_shstrndx  = Section # of '.shstrtab'
+  ELFHdr_e_shstrndx_Offset = ElfHdr.size();
+  ElfHdr.emitWord16(0); // Placeholder
+
+  // Add the null section, which is required to be first in the file.
+  getNullSection();
+
+  // The first entry in the symtab is the null symbol and the second
+  // is a local symbol containing the module/file name
+  SymbolList.push_back(new ELFSym());
+  SymbolList.push_back(ELFSym::getFileSym());
+
+  return false;
+}
+
+// AddPendingGlobalSymbol - Add a global to be processed and to
+// the global symbol lookup, use a zero index because the table
+// index will be determined later.
+void ELFWriter::AddPendingGlobalSymbol(const GlobalValue *GV, 
+                                       bool AddToLookup /* = false */) {
+  PendingGlobals.insert(GV);
+  if (AddToLookup) 
+    GblSymLookup[GV] = 0;
+}
+
+// AddPendingExternalSymbol - Add the external to be processed
+// and to the external symbol lookup, use a zero index because
+// the symbol table index will be determined later.
+void ELFWriter::AddPendingExternalSymbol(const char *External) {
+  PendingExternals.insert(External);
+  ExtSymLookup[External] = 0;
+}
+
+ELFSection &ELFWriter::getDataSection() {
+  const MCSectionELF *Data = (const MCSectionELF *)TLOF.getDataSection();
+  return getSection(Data->getSectionName(), Data->getType(), 
+                    Data->getFlags(), 4);
+}
+
+ELFSection &ELFWriter::getBSSSection() {
+  const MCSectionELF *BSS = (const MCSectionELF *)TLOF.getBSSSection();
+  return getSection(BSS->getSectionName(), BSS->getType(), BSS->getFlags(), 4);
+}
+
+// getCtorSection - Get the static constructor section
+ELFSection &ELFWriter::getCtorSection() {
+  const MCSectionELF *Ctor = (const MCSectionELF *)TLOF.getStaticCtorSection();
+  return getSection(Ctor->getSectionName(), Ctor->getType(), Ctor->getFlags()); 
+}
+
+// getDtorSection - Get the static destructor section
+ELFSection &ELFWriter::getDtorSection() {
+  const MCSectionELF *Dtor = (const MCSectionELF *)TLOF.getStaticDtorSection();
+  return getSection(Dtor->getSectionName(), Dtor->getType(), Dtor->getFlags());
+}
+
+// getTextSection - Get the text section for the specified function
+ELFSection &ELFWriter::getTextSection(const Function *F) {
+  const MCSectionELF *Text = 
+    (const MCSectionELF *)TLOF.SectionForGlobal(F, Mang, TM);
+  return getSection(Text->getSectionName(), Text->getType(), Text->getFlags());
+}
+
+// getJumpTableSection - Get a read only section for constants when 
+// emitting jump tables. TODO: add PIC support
+ELFSection &ELFWriter::getJumpTableSection() {
+  const MCSectionELF *JT = 
+    (const MCSectionELF *)TLOF.getSectionForConstant(SectionKind::getReadOnly());
+  return getSection(JT->getSectionName(), JT->getType(), JT->getFlags(),
+                    TM.getTargetData()->getPointerABIAlignment());
+}
+
+// getConstantPoolSection - Get a constant pool section based on the machine 
+// constant pool entry type and relocation info.
+ELFSection &ELFWriter::getConstantPoolSection(MachineConstantPoolEntry &CPE) {
+  SectionKind Kind;
+  switch (CPE.getRelocationInfo()) {
+  default: llvm_unreachable("Unknown section kind");
+  case 2: Kind = SectionKind::getReadOnlyWithRel(); break;
+  case 1:
+    Kind = SectionKind::getReadOnlyWithRelLocal();
+    break;
+  case 0:
+    switch (TM.getTargetData()->getTypeAllocSize(CPE.getType())) {
+    case 4:  Kind = SectionKind::getMergeableConst4(); break;
+    case 8:  Kind = SectionKind::getMergeableConst8(); break;
+    case 16: Kind = SectionKind::getMergeableConst16(); break;
+    default: Kind = SectionKind::getMergeableConst(); break;
+    }
+  }
+
+  const MCSectionELF *CPSect = 
+    (const MCSectionELF *)TLOF.getSectionForConstant(Kind);
+  return getSection(CPSect->getSectionName(), CPSect->getType(), 
+                    CPSect->getFlags(), CPE.getAlignment());
+}
+
+// getRelocSection - Return the relocation section of section 'S'. 'RelA' 
+// is true if the relocation section contains entries with addends.
+ELFSection &ELFWriter::getRelocSection(ELFSection &S) {
+  unsigned SectionType = TEW->hasRelocationAddend() ?
+                ELF::SHT_RELA : ELF::SHT_REL;
+
+  std::string SectionName(".rel");
+  if (TEW->hasRelocationAddend())
+    SectionName.append("a");
+  SectionName.append(S.getName());
+
+  return getSection(SectionName, SectionType, 0, TEW->getPrefELFAlignment());
+}
+
+// getGlobalELFVisibility - Returns the ELF specific visibility type
+unsigned ELFWriter::getGlobalELFVisibility(const GlobalValue *GV) {
+  switch (GV->getVisibility()) {
+  default:
+    llvm_unreachable("unknown visibility type");
+  case GlobalValue::DefaultVisibility:
+    return ELF::STV_DEFAULT;
+  case GlobalValue::HiddenVisibility:
+    return ELF::STV_HIDDEN;
+  case GlobalValue::ProtectedVisibility:
+    return ELF::STV_PROTECTED;
+  }
+  return 0;
+}
+
+// getGlobalELFBinding - Returns the ELF specific binding type
+unsigned ELFWriter::getGlobalELFBinding(const GlobalValue *GV) {
+  if (GV->hasInternalLinkage())
+    return ELF::STB_LOCAL;
+
+  if (GV->isWeakForLinker() && !GV->hasCommonLinkage())
+    return ELF::STB_WEAK;
+
+  return ELF::STB_GLOBAL;
+}
+
+// getGlobalELFType - Returns the ELF specific type for a global
+unsigned ELFWriter::getGlobalELFType(const GlobalValue *GV) {
+  if (GV->isDeclaration())
+    return ELF::STT_NOTYPE;
+
+  if (isa<Function>(GV))
+    return ELF::STT_FUNC;
+
+  return ELF::STT_OBJECT;
+}
+
+// IsELFUndefSym - True if the global value must be marked as a symbol
+// which points to a SHN_UNDEF section. This means that the symbol has
+// no definition on the module.
+static bool IsELFUndefSym(const GlobalValue *GV) {
+  return GV->isDeclaration() || (isa<Function>(GV));
+}
+
+// AddToSymbolList - Update the symbol lookup and If the symbol is 
+// private add it to PrivateSyms list, otherwise to SymbolList. 
+void ELFWriter::AddToSymbolList(ELFSym *GblSym) {
+  assert(GblSym->isGlobalValue() && "Symbol must be a global value");
+
+  const GlobalValue *GV = GblSym->getGlobalValue(); 
+  if (GV->hasPrivateLinkage()) {
+    // For a private symbols, keep track of the index inside 
+    // the private list since it will never go to the symbol 
+    // table and won't be patched up later.
+    PrivateSyms.push_back(GblSym);
+    GblSymLookup[GV] = PrivateSyms.size()-1;
+  } else {
+    // Non private symbol are left with zero indices until 
+    // they are patched up during the symbol table emition 
+    // (where the indicies are created).
+    SymbolList.push_back(GblSym);
+    GblSymLookup[GV] = 0;
+  }
+}
+
+/// HasCommonSymbols - True if this section holds common symbols, this is
+/// indicated on the ELF object file by a symbol with SHN_COMMON section
+/// header index.
+static bool HasCommonSymbols(const MCSectionELF &S) {
+  // FIXME: this is wrong, a common symbol can be in .data for example.
+  if (StringRef(S.getSectionName()).startswith(".gnu.linkonce."))
+    return true;
+
+  return false;
+}
+
+
+// EmitGlobal - Choose the right section for global and emit it
+void ELFWriter::EmitGlobal(const GlobalValue *GV) {
+
+  // Check if the referenced symbol is already emitted
+  if (GblSymLookup.find(GV) != GblSymLookup.end())
+    return;
+
+  // Handle ELF Bind, Visibility and Type for the current symbol
+  unsigned SymBind = getGlobalELFBinding(GV);
+  unsigned SymType = getGlobalELFType(GV);
+  bool IsUndefSym = IsELFUndefSym(GV);
+
+  ELFSym *GblSym = IsUndefSym ? ELFSym::getUndefGV(GV, SymBind)
+    : ELFSym::getGV(GV, SymBind, SymType, getGlobalELFVisibility(GV));
+
+  if (!IsUndefSym) {
+    assert(isa<GlobalVariable>(GV) && "GV not a global variable!");
+    const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+
+    // Handle special llvm globals
+    if (EmitSpecialLLVMGlobal(GVar))
+      return;
+
+    // Get the ELF section where this global belongs from TLOF
+    const MCSectionELF *S = 
+      (const MCSectionELF *)TLOF.SectionForGlobal(GV, Mang, TM);
+    ELFSection &ES = 
+      getSection(S->getSectionName(), S->getType(), S->getFlags());
+    SectionKind Kind = S->getKind();
+
+    // The symbol align should update the section alignment if needed
+    const TargetData *TD = TM.getTargetData();
+    unsigned Align = TD->getPreferredAlignment(GVar);
+    unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType());
+    GblSym->Size = Size;
+
+    if (HasCommonSymbols(*S)) { // Symbol must go to a common section
+      GblSym->SectionIdx = ELF::SHN_COMMON;
+
+      // A new linkonce section is created for each global in the
+      // common section, the default alignment is 1 and the symbol
+      // value contains its alignment.
+      ES.Align = 1;
+      GblSym->Value = Align;
+
+    } else if (Kind.isBSS() || Kind.isThreadBSS()) { // Symbol goes to BSS.
+      GblSym->SectionIdx = ES.SectionIdx;
+
+      // Update the size with alignment and the next object can
+      // start in the right offset in the section
+      if (Align) ES.Size = (ES.Size + Align-1) & ~(Align-1);
+      ES.Align = std::max(ES.Align, Align);
+
+      // GblSym->Value should contain the virtual offset inside the section.
+      // Virtual because the BSS space is not allocated on ELF objects
+      GblSym->Value = ES.Size;
+      ES.Size += Size;
+
+    } else { // The symbol must go to some kind of data section
+      GblSym->SectionIdx = ES.SectionIdx;
+
+      // GblSym->Value should contain the symbol offset inside the section,
+      // and all symbols should start on their required alignment boundary
+      ES.Align = std::max(ES.Align, Align);
+      ES.emitAlignment(Align);
+      GblSym->Value = ES.size();
+
+      // Emit the global to the data section 'ES'
+      EmitGlobalConstant(GVar->getInitializer(), ES);
+    }
+  }
+
+  AddToSymbolList(GblSym);
+}
+
+void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS,
+                                         ELFSection &GblS) {
+
+  // Print the fields in successive locations. Pad to align if needed!
+  const TargetData *TD = TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(CVS->getType());
+  const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType());
+  uint64_t sizeSoFar = 0;
+  for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) {
+    const Constant* field = CVS->getOperand(i);
+
+    // Check if padding is needed and insert one or more 0s.
+    uint64_t fieldSize = TD->getTypeAllocSize(field->getType());
+    uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1))
+                        - cvsLayout->getElementOffset(i)) - fieldSize;
+    sizeSoFar += fieldSize + padSize;
+
+    // Now print the actual field value.
+    EmitGlobalConstant(field, GblS);
+
+    // Insert padding - this may include padding to increase the size of the
+    // current field up to the ABI size (if the struct is not packed) as well
+    // as padding to ensure that the next field starts at the right offset.
+    GblS.emitZeros(padSize);
+  }
+  assert(sizeSoFar == cvsLayout->getSizeInBytes() &&
+         "Layout of constant struct may be incorrect!");
+}
+
+void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) {
+  const TargetData *TD = TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(CV->getType());
+
+  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) {
+    for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i)
+      EmitGlobalConstant(CVA->getOperand(i), GblS);
+    return;
+  } else if (isa<ConstantAggregateZero>(CV)) {
+    GblS.emitZeros(Size);
+    return;
+  } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) {
+    EmitGlobalConstantStruct(CVS, GblS);
+    return;
+  } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+    APInt Val = CFP->getValueAPF().bitcastToAPInt();
+    if (CFP->getType()->isDoubleTy())
+      GblS.emitWord64(Val.getZExtValue());
+    else if (CFP->getType()->isFloatTy())
+      GblS.emitWord32(Val.getZExtValue());
+    else if (CFP->getType()->isX86_FP80Ty()) {
+      unsigned PadSize = TD->getTypeAllocSize(CFP->getType())-
+                         TD->getTypeStoreSize(CFP->getType());
+      GblS.emitWordFP80(Val.getRawData(), PadSize);
+    } else if (CFP->getType()->isPPC_FP128Ty())
+      llvm_unreachable("PPC_FP128Ty global emission not implemented");
+    return;
+  } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    if (Size == 1)
+      GblS.emitByte(CI->getZExtValue());
+    else if (Size == 2) 
+      GblS.emitWord16(CI->getZExtValue());
+    else if (Size == 4)
+      GblS.emitWord32(CI->getZExtValue());
+    else 
+      EmitGlobalConstantLargeInt(CI, GblS);
+    return;
+  } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+    const VectorType *PTy = CP->getType();
+    for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I)
+      EmitGlobalConstant(CP->getOperand(I), GblS);
+    return;
+  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+    // Resolve a constant expression which returns a (Constant, Offset)
+    // pair. If 'Res.first' is a GlobalValue, emit a relocation with 
+    // the offset 'Res.second', otherwise emit a global constant like
+    // it is always done for not contant expression types.
+    CstExprResTy Res = ResolveConstantExpr(CE);
+    const Constant *Op = Res.first;
+
+    if (isa<GlobalValue>(Op))
+      EmitGlobalDataRelocation(cast<const GlobalValue>(Op), 
+                               TD->getTypeAllocSize(Op->getType()), 
+                               GblS, Res.second);
+    else
+      EmitGlobalConstant(Op, GblS);
+
+    return;
+  } else if (CV->getType()->getTypeID() == Type::PointerTyID) {
+    // Fill the data entry with zeros or emit a relocation entry
+    if (isa<ConstantPointerNull>(CV))
+      GblS.emitZeros(Size);
+    else 
+      EmitGlobalDataRelocation(cast<const GlobalValue>(CV), 
+                               Size, GblS);
+    return;
+  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+    // This is a constant address for a global variable or function and
+    // therefore must be referenced using a relocation entry.
+    EmitGlobalDataRelocation(GV, Size, GblS);
+    return;
+  }
+
+  std::string msg;
+  raw_string_ostream ErrorMsg(msg);
+  ErrorMsg << "Constant unimp for type: " << *CV->getType();
+  report_fatal_error(ErrorMsg.str());
+}
+
+// ResolveConstantExpr - Resolve the constant expression until it stop
+// yielding other constant expressions.
+CstExprResTy ELFWriter::ResolveConstantExpr(const Constant *CV) {
+  const TargetData *TD = TM.getTargetData();
+  
+  // There ins't constant expression inside others anymore
+  if (!isa<ConstantExpr>(CV))
+    return std::make_pair(CV, 0);
+
+  const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV);
+  switch (CE->getOpcode()) {
+  case Instruction::BitCast:
+    return ResolveConstantExpr(CE->getOperand(0));
+  
+  case Instruction::GetElementPtr: {
+    const Constant *ptrVal = CE->getOperand(0);
+    SmallVector<Value*, 8> idxVec(CE->op_begin()+1, CE->op_end());
+    int64_t Offset = TD->getIndexedOffset(ptrVal->getType(), &idxVec[0],
+                                          idxVec.size());
+    return std::make_pair(ptrVal, Offset);
+  }
+  case Instruction::IntToPtr: {
+    Constant *Op = CE->getOperand(0);
+    Op = ConstantExpr::getIntegerCast(Op, TD->getIntPtrType(CV->getContext()),
+                                      false/*ZExt*/);
+    return ResolveConstantExpr(Op);
+  }
+  case Instruction::PtrToInt: {
+    Constant *Op = CE->getOperand(0);
+    const Type *Ty = CE->getType();
+
+    // We can emit the pointer value into this slot if the slot is an
+    // integer slot greater or equal to the size of the pointer.
+    if (TD->getTypeAllocSize(Ty) == TD->getTypeAllocSize(Op->getType()))
+      return ResolveConstantExpr(Op);
+
+    llvm_unreachable("Integer size less then pointer size");
+  }
+  case Instruction::Add:
+  case Instruction::Sub: {
+    // Only handle cases where there's a constant expression with GlobalValue
+    // as first operand and ConstantInt as second, which are the cases we can
+    // solve direclty using a relocation entry. GlobalValue=Op0, CstInt=Op1
+    // 1)  Instruction::Add  => (global) + CstInt
+    // 2)  Instruction::Sub  => (global) + -CstInt
+    const Constant *Op0 = CE->getOperand(0); 
+    const Constant *Op1 = CE->getOperand(1); 
+    assert(isa<ConstantInt>(Op1) && "Op1 must be a ConstantInt");
+
+    CstExprResTy Res = ResolveConstantExpr(Op0);
+    assert(isa<GlobalValue>(Res.first) && "Op0 must be a GlobalValue");
+
+    const APInt &RHS = cast<ConstantInt>(Op1)->getValue();
+    switch (CE->getOpcode()) {
+    case Instruction::Add: 
+      return std::make_pair(Res.first, RHS.getSExtValue());
+    case Instruction::Sub:
+      return std::make_pair(Res.first, (-RHS).getSExtValue());
+    }
+  }
+  }
+
+  report_fatal_error(CE->getOpcodeName() +
+                     StringRef(": Unsupported ConstantExpr type"));
+
+  return std::make_pair(CV, 0); // silence warning
+}
+
+void ELFWriter::EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size,
+                                         ELFSection &GblS, int64_t Offset) {
+  // Create the relocation entry for the global value
+  MachineRelocation MR =
+    MachineRelocation::getGV(GblS.getCurrentPCOffset(),
+                             TEW->getAbsoluteLabelMachineRelTy(),
+                             const_cast<GlobalValue*>(GV),
+                             Offset);
+
+  // Fill the data entry with zeros
+  GblS.emitZeros(Size);
+
+  // Add the relocation entry for the current data section
+  GblS.addRelocation(MR);
+}
+
+void ELFWriter::EmitGlobalConstantLargeInt(const ConstantInt *CI, 
+                                           ELFSection &S) {
+  const TargetData *TD = TM.getTargetData();
+  unsigned BitWidth = CI->getBitWidth();
+  assert(isPowerOf2_32(BitWidth) &&
+         "Non-power-of-2-sized integers not handled!");
+
+  const uint64_t *RawData = CI->getValue().getRawData();
+  uint64_t Val = 0;
+  for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) {
+    Val = (TD->isBigEndian()) ? RawData[e - i - 1] : RawData[i];
+    S.emitWord64(Val);
+  }
+}
+
+/// EmitSpecialLLVMGlobal - Check to see if the specified global is a
+/// special global used by LLVM.  If so, emit it and return true, otherwise
+/// do nothing and return false.
+bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
+  if (GV->getName() == "llvm.used")
+    llvm_unreachable("not implemented yet");
+
+  // Ignore debug and non-emitted data.  This handles llvm.compiler.used.
+  if (GV->getSection() == "llvm.metadata" ||
+      GV->hasAvailableExternallyLinkage())
+    return true;
+  
+  if (!GV->hasAppendingLinkage()) return false;
+
+  assert(GV->hasInitializer() && "Not a special LLVM global!");
+  
+  const TargetData *TD = TM.getTargetData();
+  unsigned Align = TD->getPointerPrefAlignment();
+  if (GV->getName() == "llvm.global_ctors") {
+    ELFSection &Ctor = getCtorSection();
+    Ctor.emitAlignment(Align);
+    EmitXXStructorList(GV->getInitializer(), Ctor);
+    return true;
+  } 
+  
+  if (GV->getName() == "llvm.global_dtors") {
+    ELFSection &Dtor = getDtorSection();
+    Dtor.emitAlignment(Align);
+    EmitXXStructorList(GV->getInitializer(), Dtor);
+    return true;
+  }
+  
+  return false;
+}
+
+/// EmitXXStructorList - Emit the ctor or dtor list.  This just emits out the 
+/// function pointers, ignoring the init priority.
+void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) {
+  // Should be an array of '{ int, void ()* }' structs.  The first value is the
+  // init priority, which we ignore.
+  if (!isa<ConstantArray>(List)) return;
+  ConstantArray *InitList = cast<ConstantArray>(List);
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+      if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
+
+      if (CS->getOperand(1)->isNullValue())
+        return;  // Found a null terminator, exit printing.
+      // Emit the function pointer.
+      EmitGlobalConstant(CS->getOperand(1), Xtor);
+    }
+}
+
+bool ELFWriter::runOnMachineFunction(MachineFunction &MF) {
+  // Nothing to do here, this is all done through the ElfCE object above.
+  return false;
+}
+
+/// doFinalization - Now that the module has been completely processed, emit
+/// the ELF file to 'O'.
+bool ELFWriter::doFinalization(Module &M) {
+  // Emit .data section placeholder
+  getDataSection();
+
+  // Emit .bss section placeholder
+  getBSSSection();
+
+  // Build and emit data, bss and "common" sections.
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
+    EmitGlobal(I);
+
+  // Emit all pending globals
+  for (PendingGblsIter I = PendingGlobals.begin(), E = PendingGlobals.end();
+       I != E; ++I)
+    EmitGlobal(*I);
+
+  // Emit all pending externals
+  for (PendingExtsIter I = PendingExternals.begin(), E = PendingExternals.end();
+       I != E; ++I)
+    SymbolList.push_back(ELFSym::getExtSym(*I));
+
+  // Emit a symbol for each section created until now, skip null section
+  for (unsigned i = 1, e = SectionList.size(); i < e; ++i) {
+    ELFSection &ES = *SectionList[i];
+    ELFSym *SectionSym = ELFSym::getSectionSym();
+    SectionSym->SectionIdx = ES.SectionIdx;
+    SymbolList.push_back(SectionSym);
+    ES.Sym = SymbolList.back();
+  }
+
+  // Emit string table
+  EmitStringTable(M.getModuleIdentifier());
+
+  // Emit the symbol table now, if non-empty.
+  EmitSymbolTable();
+
+  // Emit the relocation sections.
+  EmitRelocations();
+
+  // Emit the sections string table.
+  EmitSectionTableStringTable();
+
+  // Dump the sections and section table to the .o file.
+  OutputSectionsAndSectionTable();
+
+  return false;
+}
+
+// RelocateField - Patch relocatable field with 'Offset' in 'BO'
+// using a 'Value' of known 'Size'
+void ELFWriter::RelocateField(BinaryObject &BO, uint32_t Offset,
+                              int64_t Value, unsigned Size) {
+  if (Size == 32)
+    BO.fixWord32(Value, Offset);
+  else if (Size == 64)
+    BO.fixWord64(Value, Offset);
+  else
+    llvm_unreachable("don't know howto patch relocatable field");
+}
+
+/// EmitRelocations - Emit relocations
+void ELFWriter::EmitRelocations() {
+
+  // True if the target uses the relocation entry to hold the addend,
+  // otherwise the addend is written directly to the relocatable field.
+  bool HasRelA = TEW->hasRelocationAddend();
+
+  // Create Relocation sections for each section which needs it.
+  for (unsigned i=0, e=SectionList.size(); i != e; ++i) {
+    ELFSection &S = *SectionList[i];
+
+    // This section does not have relocations
+    if (!S.hasRelocations()) continue;
+    ELFSection &RelSec = getRelocSection(S);
+
+    // 'Link' - Section hdr idx of the associated symbol table
+    // 'Info' - Section hdr idx of the section to which the relocation applies
+    ELFSection &SymTab = getSymbolTableSection();
+    RelSec.Link = SymTab.SectionIdx;
+    RelSec.Info = S.SectionIdx;
+    RelSec.EntSize = TEW->getRelocationEntrySize();
+
+    // Get the relocations from Section
+    std::vector<MachineRelocation> Relos = S.getRelocations();
+    for (std::vector<MachineRelocation>::iterator MRI = Relos.begin(),
+         MRE = Relos.end(); MRI != MRE; ++MRI) {
+      MachineRelocation &MR = *MRI;
+
+      // Relocatable field offset from the section start
+      unsigned RelOffset = MR.getMachineCodeOffset();
+
+      // Symbol index in the symbol table
+      unsigned SymIdx = 0;
+
+      // Target specific relocation field type and size
+      unsigned RelType = TEW->getRelocationType(MR.getRelocationType());
+      unsigned RelTySize = TEW->getRelocationTySize(RelType);
+      int64_t Addend = 0;
+
+      // There are several machine relocations types, and each one of
+      // them needs a different approach to retrieve the symbol table index.
+      if (MR.isGlobalValue()) {
+        const GlobalValue *G = MR.getGlobalValue();
+        int64_t GlobalOffset = MR.getConstantVal();
+        SymIdx = GblSymLookup[G];
+        if (G->hasPrivateLinkage()) {
+          // If the target uses a section offset in the relocation:
+          // SymIdx + Addend = section sym for global + section offset
+          unsigned SectionIdx = PrivateSyms[SymIdx]->SectionIdx;
+          Addend = PrivateSyms[SymIdx]->Value + GlobalOffset;
+          SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
+        } else {
+          Addend = TEW->getDefaultAddendForRelTy(RelType, GlobalOffset);
+        }
+      } else if (MR.isExternalSymbol()) {
+        const char *ExtSym = MR.getExternalSymbol();
+        SymIdx = ExtSymLookup[ExtSym];
+        Addend = TEW->getDefaultAddendForRelTy(RelType);
+      } else {
+        // Get the symbol index for the section symbol
+        unsigned SectionIdx = MR.getConstantVal();
+        SymIdx = SectionList[SectionIdx]->getSymbolTableIndex();
+
+        // The symbol offset inside the section
+        int64_t SymOffset = (int64_t)MR.getResultPointer();
+
+        // For pc relative relocations where symbols are defined in the same
+        // section they are referenced, ignore the relocation entry and patch
+        // the relocatable field with the symbol offset directly.
+        if (S.SectionIdx == SectionIdx && TEW->isPCRelativeRel(RelType)) {
+          int64_t Value = TEW->computeRelocation(SymOffset, RelOffset, RelType);
+          RelocateField(S, RelOffset, Value, RelTySize);
+          continue;
+        }
+
+        Addend = TEW->getDefaultAddendForRelTy(RelType, SymOffset);
+      }
+
+      // The target without addend on the relocation symbol must be
+      // patched in the relocation place itself to contain the addend
+      // otherwise write zeros to make sure there is no garbage there
+      RelocateField(S, RelOffset, HasRelA ? 0 : Addend, RelTySize);
+
+      // Get the relocation entry and emit to the relocation section
+      ELFRelocation Rel(RelOffset, SymIdx, RelType, HasRelA, Addend);
+      EmitRelocation(RelSec, Rel, HasRelA);
+    }
+  }
+}
+
+/// EmitRelocation - Write relocation 'Rel' to the relocation section 'Rel'
+void ELFWriter::EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel,
+                               bool HasRelA) {
+  RelSec.emitWord(Rel.getOffset());
+  RelSec.emitWord(Rel.getInfo(is64Bit));
+  if (HasRelA)
+    RelSec.emitWord(Rel.getAddend());
+}
+
+/// EmitSymbol - Write symbol 'Sym' to the symbol table 'SymbolTable'
+void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) {
+  if (is64Bit) {
+    SymbolTable.emitWord32(Sym.NameIdx);
+    SymbolTable.emitByte(Sym.Info);
+    SymbolTable.emitByte(Sym.Other);
+    SymbolTable.emitWord16(Sym.SectionIdx);
+    SymbolTable.emitWord64(Sym.Value);
+    SymbolTable.emitWord64(Sym.Size);
+  } else {
+    SymbolTable.emitWord32(Sym.NameIdx);
+    SymbolTable.emitWord32(Sym.Value);
+    SymbolTable.emitWord32(Sym.Size);
+    SymbolTable.emitByte(Sym.Info);
+    SymbolTable.emitByte(Sym.Other);
+    SymbolTable.emitWord16(Sym.SectionIdx);
+  }
+}
+
+/// EmitSectionHeader - Write section 'Section' header in 'SHdrTab'
+/// Section Header Table
+void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab,
+                                  const ELFSection &SHdr) {
+  SHdrTab.emitWord32(SHdr.NameIdx);
+  SHdrTab.emitWord32(SHdr.Type);
+  if (is64Bit) {
+    SHdrTab.emitWord64(SHdr.Flags);
+    SHdrTab.emitWord(SHdr.Addr);
+    SHdrTab.emitWord(SHdr.Offset);
+    SHdrTab.emitWord64(SHdr.Size);
+    SHdrTab.emitWord32(SHdr.Link);
+    SHdrTab.emitWord32(SHdr.Info);
+    SHdrTab.emitWord64(SHdr.Align);
+    SHdrTab.emitWord64(SHdr.EntSize);
+  } else {
+    SHdrTab.emitWord32(SHdr.Flags);
+    SHdrTab.emitWord(SHdr.Addr);
+    SHdrTab.emitWord(SHdr.Offset);
+    SHdrTab.emitWord32(SHdr.Size);
+    SHdrTab.emitWord32(SHdr.Link);
+    SHdrTab.emitWord32(SHdr.Info);
+    SHdrTab.emitWord32(SHdr.Align);
+    SHdrTab.emitWord32(SHdr.EntSize);
+  }
+}
+
+/// EmitStringTable - If the current symbol table is non-empty, emit the string
+/// table for it
+void ELFWriter::EmitStringTable(const std::string &ModuleName) {
+  if (!SymbolList.size()) return;  // Empty symbol table.
+  ELFSection &StrTab = getStringTableSection();
+
+  // Set the zero'th symbol to a null byte, as required.
+  StrTab.emitByte(0);
+
+  // Walk on the symbol list and write symbol names into the string table.
+  unsigned Index = 1;
+  for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
+    ELFSym &Sym = *(*I);
+
+    std::string Name;
+    if (Sym.isGlobalValue()) {
+      SmallString<40> NameStr;
+      Mang->getNameWithPrefix(NameStr, Sym.getGlobalValue(), false);
+      Name.append(NameStr.begin(), NameStr.end());
+    } else if (Sym.isExternalSym())
+      Name.append(Sym.getExternalSymbol());
+    else if (Sym.isFileType())
+      Name.append(ModuleName);
+
+    if (Name.empty()) {
+      Sym.NameIdx = 0;
+    } else {
+      Sym.NameIdx = Index;
+      StrTab.emitString(Name);
+
+      // Keep track of the number of bytes emitted to this section.
+      Index += Name.size()+1;
+    }
+  }
+  assert(Index == StrTab.size());
+  StrTab.Size = Index;
+}
+
+// SortSymbols - On the symbol table local symbols must come before
+// all other symbols with non-local bindings. The return value is
+// the position of the first non local symbol.
+unsigned ELFWriter::SortSymbols() {
+  unsigned FirstNonLocalSymbol;
+  std::vector<ELFSym*> LocalSyms, OtherSyms;
+
+  for (ELFSymIter I=SymbolList.begin(), E=SymbolList.end(); I != E; ++I) {
+    if ((*I)->isLocalBind())
+      LocalSyms.push_back(*I);
+    else
+      OtherSyms.push_back(*I);
+  }
+  SymbolList.clear();
+  FirstNonLocalSymbol = LocalSyms.size();
+
+  for (unsigned i = 0; i < FirstNonLocalSymbol; ++i)
+    SymbolList.push_back(LocalSyms[i]);
+
+  for (ELFSymIter I=OtherSyms.begin(), E=OtherSyms.end(); I != E; ++I)
+    SymbolList.push_back(*I);
+
+  LocalSyms.clear();
+  OtherSyms.clear();
+
+  return FirstNonLocalSymbol;
+}
+
+/// EmitSymbolTable - Emit the symbol table itself.
+void ELFWriter::EmitSymbolTable() {
+  if (!SymbolList.size()) return;  // Empty symbol table.
+
+  // Now that we have emitted the string table and know the offset into the
+  // string table of each symbol, emit the symbol table itself.
+  ELFSection &SymTab = getSymbolTableSection();
+  SymTab.Align = TEW->getPrefELFAlignment();
+
+  // Section Index of .strtab.
+  SymTab.Link = getStringTableSection().SectionIdx;
+
+  // Size of each symtab entry.
+  SymTab.EntSize = TEW->getSymTabEntrySize();
+
+  // Reorder the symbol table with local symbols first!
+  unsigned FirstNonLocalSymbol = SortSymbols();
+
+  // Emit all the symbols to the symbol table.
+  for (unsigned i = 0, e = SymbolList.size(); i < e; ++i) {
+    ELFSym &Sym = *SymbolList[i];
+
+    // Emit symbol to the symbol table
+    EmitSymbol(SymTab, Sym);
+
+    // Record the symbol table index for each symbol
+    if (Sym.isGlobalValue())
+      GblSymLookup[Sym.getGlobalValue()] = i;
+    else if (Sym.isExternalSym())
+      ExtSymLookup[Sym.getExternalSymbol()] = i;
+
+    // Keep track on the symbol index into the symbol table
+    Sym.SymTabIdx = i;
+  }
+
+  // One greater than the symbol table index of the last local symbol
+  SymTab.Info = FirstNonLocalSymbol;
+  SymTab.Size = SymTab.size();
+}
+
+/// EmitSectionTableStringTable - This method adds and emits a section for the
+/// ELF Section Table string table: the string table that holds all of the
+/// section names.
+void ELFWriter::EmitSectionTableStringTable() {
+  // First step: add the section for the string table to the list of sections:
+  ELFSection &SHStrTab = getSectionHeaderStringTableSection();
+
+  // Now that we know which section number is the .shstrtab section, update the
+  // e_shstrndx entry in the ELF header.
+  ElfHdr.fixWord16(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset);
+
+  // Set the NameIdx of each section in the string table and emit the bytes for
+  // the string table.
+  unsigned Index = 0;
+
+  for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
+    ELFSection &S = *(*I);
+    // Set the index into the table.  Note if we have lots of entries with
+    // common suffixes, we could memoize them here if we cared.
+    S.NameIdx = Index;
+    SHStrTab.emitString(S.getName());
+
+    // Keep track of the number of bytes emitted to this section.
+    Index += S.getName().size()+1;
+  }
+
+  // Set the size of .shstrtab now that we know what it is.
+  assert(Index == SHStrTab.size());
+  SHStrTab.Size = Index;
+}
+
+/// OutputSectionsAndSectionTable - Now that we have constructed the file header
+/// and all of the sections, emit these to the ostream destination and emit the
+/// SectionTable.
+void ELFWriter::OutputSectionsAndSectionTable() {
+  // Pass #1: Compute the file offset for each section.
+  size_t FileOff = ElfHdr.size();   // File header first.
+
+  // Adjust alignment of all section if needed, skip the null section.
+  for (unsigned i=1, e=SectionList.size(); i < e; ++i) {
+    ELFSection &ES = *SectionList[i];
+    if (!ES.size()) {
+      ES.Offset = FileOff;
+      continue;
+    }
+
+    // Update Section size
+    if (!ES.Size)
+      ES.Size = ES.size();
+
+    // Align FileOff to whatever the alignment restrictions of the section are.
+    if (ES.Align)
+      FileOff = (FileOff+ES.Align-1) & ~(ES.Align-1);
+
+    ES.Offset = FileOff;
+    FileOff += ES.Size;
+  }
+
+  // Align Section Header.
+  unsigned TableAlign = TEW->getPrefELFAlignment();
+  FileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+
+  // Now that we know where all of the sections will be emitted, set the e_shnum
+  // entry in the ELF header.
+  ElfHdr.fixWord16(NumSections, ELFHdr_e_shnum_Offset);
+
+  // Now that we know the offset in the file of the section table, update the
+  // e_shoff address in the ELF header.
+  ElfHdr.fixWord(FileOff, ELFHdr_e_shoff_Offset);
+
+  // Now that we know all of the data in the file header, emit it and all of the
+  // sections!
+  O.write((char *)&ElfHdr.getData()[0], ElfHdr.size());
+  FileOff = ElfHdr.size();
+
+  // Section Header Table blob
+  BinaryObject SHdrTable(isLittleEndian, is64Bit);
+
+  // Emit all of sections to the file and build the section header table.
+  for (ELFSectionIter I=SectionList.begin(), E=SectionList.end(); I != E; ++I) {
+    ELFSection &S = *(*I);
+    DEBUG(dbgs() << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName()
+                 << ", Size: " << S.Size << ", Offset: " << S.Offset
+                 << ", SectionData Size: " << S.size() << "\n");
+
+    // Align FileOff to whatever the alignment restrictions of the section are.
+    if (S.size()) {
+      if (S.Align)  {
+        for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1);
+             FileOff != NewFileOff; ++FileOff)
+          O << (char)0xAB;
+      }
+      O.write((char *)&S.getData()[0], S.Size);
+      FileOff += S.Size;
+    }
+
+    EmitSectionHeader(SHdrTable, S);
+  }
+
+  // Align output for the section table.
+  for (size_t NewFileOff = (FileOff+TableAlign-1) & ~(TableAlign-1);
+       FileOff != NewFileOff; ++FileOff)
+    O << (char)0xAB;
+
+  // Emit the section table itself.
+  O.write((char *)&SHdrTable.getData()[0], SHdrTable.size());
+}
diff --git a/final/lib/CodeGen/ELFWriter.h b/final/lib/CodeGen/ELFWriter.h
new file mode 100644
index 00000000000..b8bac5598ec
--- /dev/null
+++ b/final/lib/CodeGen/ELFWriter.h
@@ -0,0 +1,251 @@
+//===-- ELFWriter.h - Target-independent ELF writer support -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ELFWriter class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ELFWRITER_H
+#define ELFWRITER_H
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <map>
+
+namespace llvm {
+  class BinaryObject;
+  class Constant;
+  class ConstantInt;
+  class ConstantStruct;
+  class ELFCodeEmitter;
+  class ELFRelocation;
+  class ELFSection;
+  struct ELFSym;
+  class GlobalVariable;
+  class JITDebugRegisterer;
+  class Mangler;
+  class MachineCodeEmitter;
+  class MachineConstantPoolEntry;
+  class ObjectCodeEmitter;
+  class MCAsmInfo;
+  class TargetELFWriterInfo;
+  class TargetLoweringObjectFile;
+  class raw_ostream;
+  class SectionKind;
+  class MCContext;
+  class TargetMachine;
+
+  typedef std::vector<ELFSym*>::iterator ELFSymIter;
+  typedef std::vector<ELFSection*>::iterator ELFSectionIter;
+  typedef SetVector<const GlobalValue*>::const_iterator PendingGblsIter;
+  typedef SetVector<const char *>::const_iterator PendingExtsIter;
+  typedef std::pair<const Constant *, int64_t> CstExprResTy;
+
+  /// ELFWriter - This class implements the common target-independent code for
+  /// writing ELF files.  Targets should derive a class from this to
+  /// parameterize the output format.
+  ///
+  class ELFWriter : public MachineFunctionPass {
+    friend class ELFCodeEmitter;
+    friend class JITDebugRegisterer;
+  public:
+    static char ID;
+
+    /// Return the ELFCodeEmitter as an instance of ObjectCodeEmitter
+    ObjectCodeEmitter *getObjectCodeEmitter() {
+      return reinterpret_cast<ObjectCodeEmitter*>(ElfCE);
+    }
+
+    ELFWriter(raw_ostream &O, TargetMachine &TM);
+    ~ELFWriter();
+
+  protected:
+    /// Output stream to send the resultant object file to.
+    raw_ostream &O;
+
+    /// Target machine description.
+    TargetMachine &TM;
+
+    /// Context object for machine code objects.
+    MCContext &OutContext;
+    
+    /// Target Elf Writer description.
+    const TargetELFWriterInfo *TEW;
+
+    /// Mang - The object used to perform name mangling for this module.
+    Mangler *Mang;
+
+    /// MCE - The MachineCodeEmitter object that we are exposing to emit machine
+    /// code for functions to the .o file.
+    ELFCodeEmitter *ElfCE;
+
+    /// TLOF - Target Lowering Object File, provide section names for globals 
+    /// and other object file specific stuff
+    const TargetLoweringObjectFile &TLOF;
+
+    /// MAI - Target Asm Info, provide information about section names for
+    /// globals and other target specific stuff.
+    const MCAsmInfo *MAI;
+
+    //===------------------------------------------------------------------===//
+    // Properties inferred automatically from the target machine.
+    //===------------------------------------------------------------------===//
+
+    /// is64Bit/isLittleEndian - This information is inferred from the target
+    /// machine directly, indicating whether to emit a 32- or 64-bit ELF file.
+    bool is64Bit, isLittleEndian;
+
+    /// doInitialization - Emit the file header and all of the global variables
+    /// for the module to the ELF file.
+    bool doInitialization(Module &M);
+    bool runOnMachineFunction(MachineFunction &MF);
+
+    /// doFinalization - Now that the module has been completely processed, emit
+    /// the ELF file to 'O'.
+    bool doFinalization(Module &M);
+
+  private:
+    /// Blob containing the Elf header
+    BinaryObject ElfHdr;
+
+    /// SectionList - This is the list of sections that we have emitted to the
+    /// file. Once the file has been completely built, the section header table
+    /// is constructed from this info.
+    std::vector<ELFSection*> SectionList;
+    unsigned NumSections;   // Always = SectionList.size()
+
+    /// SectionLookup - This is a mapping from section name to section number in
+    /// the SectionList. Used to quickly gather the Section Index from MAI names
+    std::map<std::string, ELFSection*> SectionLookup;
+
+    /// PendingGlobals - Globals not processed as symbols yet.
+    SetVector<const GlobalValue*> PendingGlobals;
+
+    /// GblSymLookup - This is a mapping from global value to a symbol index
+    /// in the symbol table or private symbols list. This is useful since reloc
+    /// symbol references must be quickly mapped to their indices on the lists.
+    std::map<const GlobalValue*, uint32_t> GblSymLookup;
+
+    /// PendingExternals - Externals not processed as symbols yet.
+    SetVector<const char *> PendingExternals;
+
+    /// ExtSymLookup - This is a mapping from externals to a symbol index
+    /// in the symbol table list. This is useful since reloc symbol references
+    /// must be quickly mapped to their symbol table indices.
+    std::map<const char *, uint32_t> ExtSymLookup;
+
+    /// SymbolList - This is the list of symbols emitted to the symbol table.
+    /// When the SymbolList is finally built, local symbols must be placed in
+    /// the beginning while non-locals at the end.
+    std::vector<ELFSym*> SymbolList;
+
+    /// PrivateSyms - Record private symbols, every symbol here must never be
+    /// present in the SymbolList.
+    std::vector<ELFSym*> PrivateSyms;
+
+    /// getSection - Return the section with the specified name, creating a new
+    /// section if one does not already exist.
+    ELFSection &getSection(const std::string &Name, unsigned Type,
+                           unsigned Flags = 0, unsigned Align = 0) {
+      ELFSection *&SN = SectionLookup[Name];
+      if (SN) return *SN;
+
+      SectionList.push_back(new ELFSection(Name, isLittleEndian, is64Bit));
+      SN = SectionList.back();
+      SN->SectionIdx = NumSections++;
+      SN->Type = Type;
+      SN->Flags = Flags;
+      SN->Link = ELF::SHN_UNDEF;
+      SN->Align = Align;
+      return *SN;
+    }
+
+    ELFSection &getNonExecStackSection() {
+      return getSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0, 1);
+    }
+
+    ELFSection &getSymbolTableSection() {
+      return getSection(".symtab", ELF::SHT_SYMTAB, 0);
+    }
+
+    ELFSection &getStringTableSection() {
+      return getSection(".strtab", ELF::SHT_STRTAB, 0, 1);
+    }
+
+    ELFSection &getSectionHeaderStringTableSection() {
+      return getSection(".shstrtab", ELF::SHT_STRTAB, 0, 1);
+    }
+
+    ELFSection &getNullSection() {
+      return getSection("", ELF::SHT_NULL, 0);
+    }
+
+    ELFSection &getDataSection();
+    ELFSection &getBSSSection();
+    ELFSection &getCtorSection();
+    ELFSection &getDtorSection();
+    ELFSection &getJumpTableSection();
+    ELFSection &getConstantPoolSection(MachineConstantPoolEntry &CPE);
+    ELFSection &getTextSection(const Function *F);
+    ELFSection &getRelocSection(ELFSection &S);
+
+    // Helpers for obtaining ELF specific info.
+    unsigned getGlobalELFBinding(const GlobalValue *GV);
+    unsigned getGlobalELFType(const GlobalValue *GV);
+    unsigned getGlobalELFVisibility(const GlobalValue *GV);
+
+    // AddPendingGlobalSymbol - Add a global to be processed and to
+    // the global symbol lookup, use a zero index because the table
+    // index will be determined later.
+    void AddPendingGlobalSymbol(const GlobalValue *GV, 
+                                bool AddToLookup = false);
+    
+    // AddPendingExternalSymbol - Add the external to be processed
+    // and to the external symbol lookup, use a zero index because
+    // the symbol table index will be determined later.
+    void AddPendingExternalSymbol(const char *External);
+
+    // AddToSymbolList - Update the symbol lookup and If the symbol is 
+    // private add it to PrivateSyms list, otherwise to SymbolList. 
+    void AddToSymbolList(ELFSym *GblSym);
+
+    // As we complete the ELF file, we need to update fields in the ELF header
+    // (e.g. the location of the section table).  These members keep track of
+    // the offset in ELFHeader of these various pieces to update and other
+    // locations in the file.
+    unsigned ELFHdr_e_shoff_Offset;     // e_shoff    in ELF header.
+    unsigned ELFHdr_e_shstrndx_Offset;  // e_shstrndx in ELF header.
+    unsigned ELFHdr_e_shnum_Offset;     // e_shnum    in ELF header.
+
+  private:
+    void EmitGlobal(const GlobalValue *GV);
+    void EmitGlobalConstant(const Constant *C, ELFSection &GblS);
+    void EmitGlobalConstantStruct(const ConstantStruct *CVS,
+                                  ELFSection &GblS);
+    void EmitGlobalConstantLargeInt(const ConstantInt *CI, ELFSection &S);
+    void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, 
+                                  ELFSection &GblS, int64_t Offset = 0);
+    bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
+    void EmitXXStructorList(Constant *List, ELFSection &Xtor);
+    void EmitRelocations();
+    void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA);
+    void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr);
+    void EmitSectionTableStringTable();
+    void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym);
+    void EmitSymbolTable();
+    void EmitStringTable(const std::string &ModuleName);
+    void OutputSectionsAndSectionTable();
+    void RelocateField(BinaryObject &BO, uint32_t Offset, int64_t Value,
+                       unsigned Size);
+    unsigned SortSymbols();
+    CstExprResTy ResolveConstantExpr(const Constant *CV);
+  };
+}
+
+#endif
diff --git a/final/lib/CodeGen/EdgeBundles.cpp b/final/lib/CodeGen/EdgeBundles.cpp
new file mode 100644
index 00000000000..aed8bc94799
--- /dev/null
+++ b/final/lib/CodeGen/EdgeBundles.cpp
@@ -0,0 +1,86 @@
+//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the EdgeBundles analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GraphWriter.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ViewEdgeBundles("view-edge-bundles", cl::Hidden,
+                cl::desc("Pop up a window to show edge bundle graphs"));
+
+char EdgeBundles::ID = 0;
+
+INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges",
+                /* cfg = */true, /* analysis = */ true)
+
+char &llvm::EdgeBundlesID = EdgeBundles::ID;
+
+void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  EC.clear();
+  EC.grow(2 * MF->size());
+
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+       ++I) {
+    const MachineBasicBlock &MBB = *I;
+    unsigned OutE = 2 * MBB.getNumber() + 1;
+    // Join the outgoing bundle with the ingoing bundles of all successors.
+    for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+           SE = MBB.succ_end(); SI != SE; ++SI)
+      EC.join(OutE, 2 * (*SI)->getNumber());
+  }
+  EC.compress();
+  if (ViewEdgeBundles)
+    view();
+  return false;
+}
+
+/// view - Visualize the annotated bipartite CFG with Graphviz.
+void EdgeBundles::view() const {
+  ViewGraph(*this, "EdgeBundles");
+}
+
+/// Specialize WriteGraph, the standard implementation won't work.
+raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
+                              bool ShortNames,
+                              const std::string &Title) {
+  const MachineFunction *MF = G.getMachineFunction();
+
+  O << "digraph {\n";
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+       I != E; ++I) {
+    unsigned BB = I->getNumber();
+    O << "\t\"BB#" << BB << "\" [ shape=box ]\n"
+      << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n"
+      << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n';
+    for (MachineBasicBlock::const_succ_iterator SI = I->succ_begin(),
+           SE = I->succ_end(); SI != SE; ++SI)
+      O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber()
+        << "\" [ color=lightgray ]\n";
+  }
+  O << "}\n";
+  return O;
+}
+
+
diff --git a/final/lib/CodeGen/ExpandISelPseudos.cpp b/final/lib/CodeGen/ExpandISelPseudos.cpp
new file mode 100644
index 00000000000..b5ec303f5d9
--- /dev/null
+++ b/final/lib/CodeGen/ExpandISelPseudos.cpp
@@ -0,0 +1,82 @@
+//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand Psuedo-instructions produced by ISel. These are usually to allow
+// the expansion to contain control flow, such as a conditional move
+// implemented with a conditional branch and a phi, or an atomic operation
+// implemented with a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "expand-isel-pseudos"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+  class ExpandISelPseudos : public MachineFunctionPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandISelPseudos() : MachineFunctionPass(ID) {}
+
+  private:
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    const char *getPassName() const {
+      return "Expand ISel Pseudo-instructions";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+} // end anonymous namespace
+
+char ExpandISelPseudos::ID = 0;
+INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
+                "Expand CodeGen Pseudo-instructions", false, false)
+
+FunctionPass *llvm::createExpandISelPseudosPass() {
+  return new ExpandISelPseudos();
+}
+
+bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
+  bool Changed = false;
+  const TargetLowering *TLI = MF.getTarget().getTargetLowering();
+
+  // Iterate through each instruction in the function, looking for pseudos.
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I;
+    for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+         MBBI != MBBE; ) {
+      MachineInstr *MI = MBBI++;
+
+      // If MI is a pseudo, expand it.
+      const TargetInstrDesc &TID = MI->getDesc();
+      if (TID.usesCustomInsertionHook()) {
+        Changed = true;
+        MachineBasicBlock *NewMBB =
+          TLI->EmitInstrWithCustomInserter(MI, MBB);
+        // The expansion may involve new basic blocks.
+        if (NewMBB != MBB) {
+          MBB = NewMBB;
+          I = NewMBB;
+          MBBI = NewMBB->begin();
+          MBBE = NewMBB->end();
+        }
+      }
+    }
+  }
+
+  return Changed;
+}
diff --git a/final/lib/CodeGen/GCMetadata.cpp b/final/lib/CodeGen/GCMetadata.cpp
new file mode 100644
index 00000000000..d757cf409d5
--- /dev/null
+++ b/final/lib/CodeGen/GCMetadata.cpp
@@ -0,0 +1,213 @@
+//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GCFunctionInfo class and GCModuleInfo pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  
+  class Printer : public FunctionPass {
+    static char ID;
+    raw_ostream &OS;
+    
+  public:
+    explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
+
+    
+    const char *getPassName() const;
+    void getAnalysisUsage(AnalysisUsage &AU) const;
+    
+    bool runOnFunction(Function &F);
+  };
+  
+  class Deleter : public FunctionPass {
+    static char ID;
+    
+  public:
+    Deleter();
+    
+    const char *getPassName() const;
+    void getAnalysisUsage(AnalysisUsage &AU) const;
+    
+    bool runOnFunction(Function &F);
+    bool doFinalization(Module &M);
+  };
+  
+}
+
+INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
+                "Create Garbage Collector Module Metadata", false, false)
+
+// -----------------------------------------------------------------------------
+
+GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S)
+  : F(F), S(S), FrameSize(~0LL) {}
+
+GCFunctionInfo::~GCFunctionInfo() {}
+
+// -----------------------------------------------------------------------------
+
+char GCModuleInfo::ID = 0;
+
+GCModuleInfo::GCModuleInfo()
+    : ImmutablePass(ID) {
+  initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+GCModuleInfo::~GCModuleInfo() {
+  clear();
+}
+
+GCStrategy *GCModuleInfo::getOrCreateStrategy(const Module *M,
+                                              const std::string &Name) {
+  strategy_map_type::iterator NMI = StrategyMap.find(Name);
+  if (NMI != StrategyMap.end())
+    return NMI->getValue();
+  
+  for (GCRegistry::iterator I = GCRegistry::begin(),
+                            E = GCRegistry::end(); I != E; ++I) {
+    if (Name == I->getName()) {
+      GCStrategy *S = I->instantiate();
+      S->M = M;
+      S->Name = Name;
+      StrategyMap.GetOrCreateValue(Name).setValue(S);
+      StrategyList.push_back(S);
+      return S;
+    }
+  }
+ 
+  dbgs() << "unsupported GC: " << Name << "\n";
+  llvm_unreachable(0);
+}
+
+GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
+  assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!");
+  assert(F.hasGC());
+  
+  finfo_map_type::iterator I = FInfoMap.find(&F);
+  if (I != FInfoMap.end())
+    return *I->second;
+  
+  GCStrategy *S = getOrCreateStrategy(F.getParent(), F.getGC());
+  GCFunctionInfo *GFI = S->insertFunctionInfo(F);
+  FInfoMap[&F] = GFI;
+  return *GFI;
+}
+
+void GCModuleInfo::clear() {
+  FInfoMap.clear();
+  StrategyMap.clear();
+  
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    delete *I;
+  StrategyList.clear();
+}
+
+// -----------------------------------------------------------------------------
+
+char Printer::ID = 0;
+
+FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
+  return new Printer(OS);
+}
+
+
+const char *Printer::getPassName() const {
+  return "Print Garbage Collector Information";
+}
+
+void Printer::getAnalysisUsage(AnalysisUsage &AU) const {
+  FunctionPass::getAnalysisUsage(AU);
+  AU.setPreservesAll();
+  AU.addRequired<GCModuleInfo>();
+}
+
+static const char *DescKind(GC::PointKind Kind) {
+  switch (Kind) {
+    default: llvm_unreachable("Unknown GC point kind");
+    case GC::Loop:     return "loop";
+    case GC::Return:   return "return";
+    case GC::PreCall:  return "pre-call";
+    case GC::PostCall: return "post-call";
+  }
+}
+
+bool Printer::runOnFunction(Function &F) {
+  if (F.hasGC()) return false;
+  
+  GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+  
+  OS << "GC roots for " << FD->getFunction().getNameStr() << ":\n";
+  for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(),
+                                      RE = FD->roots_end(); RI != RE; ++RI)
+    OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n";
+  
+  OS << "GC safe points for " << FD->getFunction().getNameStr() << ":\n";
+  for (GCFunctionInfo::iterator PI = FD->begin(),
+                                PE = FD->end(); PI != PE; ++PI) {
+    
+    OS << "\t" << PI->Label->getName() << ": "
+       << DescKind(PI->Kind) << ", live = {";
+    
+    for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI),
+                                       RE = FD->live_end(PI);;) {
+      OS << " " << RI->Num;
+      if (++RI == RE)
+        break;
+      OS << ",";
+    }
+    
+    OS << " }\n";
+  }
+  
+  return false;
+}
+
+// -----------------------------------------------------------------------------
+
+char Deleter::ID = 0;
+
+FunctionPass *llvm::createGCInfoDeleter() {
+  return new Deleter();
+}
+
+Deleter::Deleter() : FunctionPass(ID) {}
+
+const char *Deleter::getPassName() const {
+  return "Delete Garbage Collector Information";
+}
+
+void Deleter::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<GCModuleInfo>();
+}
+
+bool Deleter::runOnFunction(Function &MF) {
+  return false;
+}
+
+bool Deleter::doFinalization(Module &M) {
+  GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>();
+  assert(GMI && "Deleter didn't require GCModuleInfo?!");
+  GMI->clear();
+  return false;
+}
diff --git a/final/lib/CodeGen/GCMetadataPrinter.cpp b/final/lib/CodeGen/GCMetadataPrinter.cpp
new file mode 100644
index 00000000000..f80e9ced0bc
--- /dev/null
+++ b/final/lib/CodeGen/GCMetadataPrinter.cpp
@@ -0,0 +1,27 @@
+//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the abstract base class GCMetadataPrinter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCMetadataPrinter.h"
+using namespace llvm;
+
+GCMetadataPrinter::GCMetadataPrinter() { }
+
+GCMetadataPrinter::~GCMetadataPrinter() { }
+
+void GCMetadataPrinter::beginAssembly(AsmPrinter &AP) {
+  // Default is no action.
+}
+
+void GCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
+  // Default is no action.
+}
diff --git a/final/lib/CodeGen/GCStrategy.cpp b/final/lib/CodeGen/GCStrategy.cpp
new file mode 100644
index 00000000000..766c6ee542a
--- /dev/null
+++ b/final/lib/CodeGen/GCStrategy.cpp
@@ -0,0 +1,416 @@
+//===-- GCStrategy.cpp - Garbage collection infrastructure -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target- and collector-independent garbage collection
+// infrastructure.
+//
+// MachineCodeAnalysis identifies the GC safe points in the machine code. Roots
+// are identified in SelectionDAGISel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+  
+  /// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or
+  /// llvm.gcwrite intrinsics, replacing them with simple loads and stores as 
+  /// directed by the GCStrategy. It also performs automatic root initialization
+  /// and custom intrinsic lowering.
+  class LowerIntrinsics : public FunctionPass {
+    static bool NeedsDefaultLoweringPass(const GCStrategy &C);
+    static bool NeedsCustomLoweringPass(const GCStrategy &C);
+    static bool CouldBecomeSafePoint(Instruction *I);
+    bool PerformDefaultLowering(Function &F, GCStrategy &Coll);
+    static bool InsertRootInitializers(Function &F,
+                                       AllocaInst **Roots, unsigned Count);
+    
+  public:
+    static char ID;
+    
+    LowerIntrinsics();
+    const char *getPassName() const;
+    void getAnalysisUsage(AnalysisUsage &AU) const;
+    
+    bool doInitialization(Module &M);
+    bool runOnFunction(Function &F);
+  };
+  
+  
+  /// MachineCodeAnalysis - This is a target-independent pass over the machine 
+  /// function representation to identify safe points for the garbage collector
+  /// in the machine code. It inserts labels at safe points and populates a
+  /// GCMetadata record for each function.
+  class MachineCodeAnalysis : public MachineFunctionPass {
+    const TargetMachine *TM;
+    GCFunctionInfo *FI;
+    MachineModuleInfo *MMI;
+    const TargetInstrInfo *TII;
+    
+    void FindSafePoints(MachineFunction &MF);
+    void VisitCallPoint(MachineBasicBlock::iterator MI);
+    MCSymbol *InsertLabel(MachineBasicBlock &MBB, 
+                          MachineBasicBlock::iterator MI,
+                          DebugLoc DL) const;
+    
+    void FindStackOffsets(MachineFunction &MF);
+    
+  public:
+    static char ID;
+    
+    MachineCodeAnalysis();
+    const char *getPassName() const;
+    void getAnalysisUsage(AnalysisUsage &AU) const;
+    
+    bool runOnMachineFunction(MachineFunction &MF);
+  };
+  
+}
+
+// -----------------------------------------------------------------------------
+
+GCStrategy::GCStrategy() :
+  NeededSafePoints(0),
+  CustomReadBarriers(false),
+  CustomWriteBarriers(false),
+  CustomRoots(false),
+  InitRoots(true),
+  UsesMetadata(false)
+{}
+
+GCStrategy::~GCStrategy() {
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    delete *I;
+  
+  Functions.clear();
+}
+ 
+bool GCStrategy::initializeCustomLowering(Module &M) { return false; }
+ 
+bool GCStrategy::performCustomLowering(Function &F) {
+  dbgs() << "gc " << getName() << " must override performCustomLowering.\n";
+  llvm_unreachable(0);
+  return 0;
+}
+
+GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
+  GCFunctionInfo *FI = new GCFunctionInfo(F, *this);
+  Functions.push_back(FI);
+  return FI;
+}
+
+// -----------------------------------------------------------------------------
+
+INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
+
+FunctionPass *llvm::createGCLoweringPass() {
+  return new LowerIntrinsics();
+}
+ 
+char LowerIntrinsics::ID = 0;
+
+LowerIntrinsics::LowerIntrinsics()
+  : FunctionPass(ID) {
+    initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
+  }
+
+const char *LowerIntrinsics::getPassName() const {
+  return "Lower Garbage Collection Instructions";
+}
+    
+void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
+  FunctionPass::getAnalysisUsage(AU);
+  AU.addRequired<GCModuleInfo>();
+  AU.addPreserved<DominatorTree>();
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now.
+bool LowerIntrinsics::doInitialization(Module &M) {
+  // FIXME: This is rather antisocial in the context of a JIT since it performs
+  //        work against the entire module. But this cannot be done at
+  //        runFunction time (initializeCustomLowering likely needs to change
+  //        the module).
+  GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
+  assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?");
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    if (!I->isDeclaration() && I->hasGC())
+      MI->getFunctionInfo(*I); // Instantiate the GC strategy.
+  
+  bool MadeChange = false;
+  for (GCModuleInfo::iterator I = MI->begin(), E = MI->end(); I != E; ++I)
+    if (NeedsCustomLoweringPass(**I))
+      if ((*I)->initializeCustomLowering(M))
+        MadeChange = true;
+  
+  return MadeChange;
+}
+
+bool LowerIntrinsics::InsertRootInitializers(Function &F, AllocaInst **Roots, 
+                                                          unsigned Count) {
+  // Scroll past alloca instructions.
+  BasicBlock::iterator IP = F.getEntryBlock().begin();
+  while (isa<AllocaInst>(IP)) ++IP;
+  
+  // Search for initializers in the initial BB.
+  SmallPtrSet<AllocaInst*,16> InitedRoots;
+  for (; !CouldBecomeSafePoint(IP); ++IP)
+    if (StoreInst *SI = dyn_cast<StoreInst>(IP))
+      if (AllocaInst *AI =
+          dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts()))
+        InitedRoots.insert(AI);
+  
+  // Add root initializers.
+  bool MadeChange = false;
+  
+  for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I)
+    if (!InitedRoots.count(*I)) {
+      StoreInst* SI = new StoreInst(ConstantPointerNull::get(cast<PointerType>(
+                        cast<PointerType>((*I)->getType())->getElementType())),
+                        *I);
+      SI->insertAfter(*I);
+      MadeChange = true;
+    }
+  
+  return MadeChange;
+}
+
+bool LowerIntrinsics::NeedsDefaultLoweringPass(const GCStrategy &C) {
+  // Default lowering is necessary only if read or write barriers have a default
+  // action. The default for roots is no action.
+  return !C.customWriteBarrier()
+      || !C.customReadBarrier()
+      || C.initializeRoots();
+}
+
+bool LowerIntrinsics::NeedsCustomLoweringPass(const GCStrategy &C) {
+  // Custom lowering is only necessary if enabled for some action.
+  return C.customWriteBarrier()
+      || C.customReadBarrier()
+      || C.customRoots();
+}
+
+/// CouldBecomeSafePoint - Predicate to conservatively determine whether the
+/// instruction could introduce a safe point.
+bool LowerIntrinsics::CouldBecomeSafePoint(Instruction *I) {
+  // The natural definition of instructions which could introduce safe points
+  // are:
+  // 
+  //   - call, invoke (AfterCall, BeforeCall)
+  //   - phis (Loops)
+  //   - invoke, ret, unwind (Exit)
+  // 
+  // However, instructions as seemingly inoccuous as arithmetic can become
+  // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead
+  // it is necessary to take a conservative approach.
+  
+  if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) ||
+      isa<StoreInst>(I) || isa<LoadInst>(I))
+    return false;
+  
+  // llvm.gcroot is safe because it doesn't do anything at runtime.
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    if (Function *F = CI->getCalledFunction())
+      if (unsigned IID = F->getIntrinsicID())
+        if (IID == Intrinsic::gcroot)
+          return false;
+  
+  return true;
+}
+
+/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores.
+/// Leave gcroot intrinsics; the code generator needs to see those.
+bool LowerIntrinsics::runOnFunction(Function &F) {
+  // Quick exit for functions that do not use GC.
+  if (!F.hasGC())
+    return false;
+  
+  GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F);
+  GCStrategy &S = FI.getStrategy();
+  
+  bool MadeChange = false;
+  
+  if (NeedsDefaultLoweringPass(S))
+    MadeChange |= PerformDefaultLowering(F, S);
+  
+  bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
+  if (UseCustomLoweringPass)
+    MadeChange |= S.performCustomLowering(F);
+
+  // Custom lowering may modify the CFG, so dominators must be recomputed.
+  if (UseCustomLoweringPass) {
+    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+      DT->DT->recalculate(F);
+  }
+
+  return MadeChange;
+}
+
+bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
+  bool LowerWr = !S.customWriteBarrier();
+  bool LowerRd = !S.customReadBarrier();
+  bool InitRoots = S.initializeRoots();
+  
+  SmallVector<AllocaInst*, 32> Roots;
+  
+  bool MadeChange = false;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) {
+      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) {
+        Function *F = CI->getCalledFunction();
+        switch (F->getIntrinsicID()) {
+        case Intrinsic::gcwrite:
+          if (LowerWr) {
+            // Replace a write barrier with a simple store.
+            Value *St = new StoreInst(CI->getArgOperand(0),
+                                      CI->getArgOperand(2), CI);
+            CI->replaceAllUsesWith(St);
+            CI->eraseFromParent();
+          }
+          break;
+        case Intrinsic::gcread:
+          if (LowerRd) {
+            // Replace a read barrier with a simple load.
+            Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
+            Ld->takeName(CI);
+            CI->replaceAllUsesWith(Ld);
+            CI->eraseFromParent();
+          }
+          break;
+        case Intrinsic::gcroot:
+          if (InitRoots) {
+            // Initialize the GC root, but do not delete the intrinsic. The
+            // backend needs the intrinsic to flag the stack slot.
+            Roots.push_back(cast<AllocaInst>(
+                              CI->getArgOperand(0)->stripPointerCasts()));
+          }
+          break;
+        default:
+          continue;
+        }
+        
+        MadeChange = true;
+      }
+    }
+  }
+  
+  if (Roots.size())
+    MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size());
+  
+  return MadeChange;
+}
+
+// -----------------------------------------------------------------------------
+
+FunctionPass *llvm::createGCMachineCodeAnalysisPass() {
+  return new MachineCodeAnalysis();
+}
+
+char MachineCodeAnalysis::ID = 0;
+
+MachineCodeAnalysis::MachineCodeAnalysis()
+  : MachineFunctionPass(ID) {}
+
+const char *MachineCodeAnalysis::getPassName() const {
+  return "Analyze Machine Code For Garbage Collection";
+}
+
+void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  MachineFunctionPass::getAnalysisUsage(AU);
+  AU.setPreservesAll();
+  AU.addRequired<MachineModuleInfo>();
+  AU.addRequired<GCModuleInfo>();
+}
+
+MCSymbol *MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, 
+                                           MachineBasicBlock::iterator MI,
+                                           DebugLoc DL) const {
+  MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol();
+  BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label);
+  return Label;
+}
+
+void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
+  // Find the return address (next instruction), too, so as to bracket the call
+  // instruction.
+  MachineBasicBlock::iterator RAI = CI; 
+  ++RAI;                                
+  
+  if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
+    MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
+    FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
+  }
+  
+  if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
+    MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+    FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
+  }
+}
+
+void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
+  for (MachineFunction::iterator BBI = MF.begin(),
+                                 BBE = MF.end(); BBI != BBE; ++BBI)
+    for (MachineBasicBlock::iterator MI = BBI->begin(),
+                                     ME = BBI->end(); MI != ME; ++MI)
+      if (MI->getDesc().isCall())
+        VisitCallPoint(MI);
+}
+
+void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
+  const TargetFrameLowering *TFI = TM->getFrameLowering();
+  assert(TFI && "TargetRegisterInfo not available!");
+  
+  for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
+                                      RE = FI->roots_end(); RI != RE; ++RI)
+    RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
+}
+
+bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
+  // Quick exit for functions that do not use GC.
+  if (!MF.getFunction()->hasGC())
+    return false;
+  
+  FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction());
+  if (!FI->getStrategy().needsSafePoints())
+    return false;
+  
+  TM = &MF.getTarget();
+  MMI = &getAnalysis<MachineModuleInfo>();
+  TII = TM->getInstrInfo();
+  
+  // Find the size of the stack frame.
+  FI->setFrameSize(MF.getFrameInfo()->getStackSize());
+  
+  // Find all safe points.
+  FindSafePoints(MF);
+  
+  // Find the stack offsets for all roots.
+  FindStackOffsets(MF);
+  
+  return false;
+}
diff --git a/final/lib/CodeGen/IfConversion.cpp b/final/lib/CodeGen/IfConversion.cpp
new file mode 100644
index 00000000000..db53b0473a9
--- /dev/null
+++ b/final/lib/CodeGen/IfConversion.cpp
@@ -0,0 +1,1528 @@
+//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level if-conversion pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ifcvt"
+#include "BranchFolding.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+// Hidden options for help debugging.
+static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
+static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
+static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
+                                   cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
+                                    cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
+                                     cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
+                                      cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
+                                      cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
+                                       cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
+                                    cl::init(false), cl::Hidden);
+static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
+                                     cl::init(true), cl::Hidden);
+
+STATISTIC(NumSimple,       "Number of simple if-conversions performed");
+STATISTIC(NumSimpleFalse,  "Number of simple (F) if-conversions performed");
+STATISTIC(NumTriangle,     "Number of triangle if-conversions performed");
+STATISTIC(NumTriangleRev,  "Number of triangle (R) if-conversions performed");
+STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
+STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
+STATISTIC(NumDiamonds,     "Number of diamond if-conversions performed");
+STATISTIC(NumIfConvBBs,    "Number of if-converted blocks");
+STATISTIC(NumDupBBs,       "Number of duplicated blocks");
+
+namespace {
+  class IfConverter : public MachineFunctionPass {
+    enum IfcvtKind {
+      ICNotClassfied,  // BB data valid, but not classified.
+      ICSimpleFalse,   // Same as ICSimple, but on the false path.
+      ICSimple,        // BB is entry of an one split, no rejoin sub-CFG.
+      ICTriangleFRev,  // Same as ICTriangleFalse, but false path rev condition.
+      ICTriangleRev,   // Same as ICTriangle, but true path rev condition.
+      ICTriangleFalse, // Same as ICTriangle, but on the false path.
+      ICTriangle,      // BB is entry of a triangle sub-CFG.
+      ICDiamond        // BB is entry of a diamond sub-CFG.
+    };
+
+    /// BBInfo - One per MachineBasicBlock, this is used to cache the result
+    /// if-conversion feasibility analysis. This includes results from
+    /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its
+    /// classification, and common tail block of its successors (if it's a
+    /// diamond shape), its size, whether it's predicable, and whether any
+    /// instruction can clobber the 'would-be' predicate.
+    ///
+    /// IsDone          - True if BB is not to be considered for ifcvt.
+    /// IsBeingAnalyzed - True if BB is currently being analyzed.
+    /// IsAnalyzed      - True if BB has been analyzed (info is still valid).
+    /// IsEnqueued      - True if BB has been enqueued to be ifcvt'ed.
+    /// IsBrAnalyzable  - True if AnalyzeBranch() returns false.
+    /// HasFallThrough  - True if BB may fallthrough to the following BB.
+    /// IsUnpredicable  - True if BB is known to be unpredicable.
+    /// ClobbersPred    - True if BB could modify predicates (e.g. has
+    ///                   cmp, call, etc.)
+    /// NonPredSize     - Number of non-predicated instructions.
+    /// ExtraCost       - Extra cost for multi-cycle instructions.
+    /// ExtraCost2      - Some instructions are slower when predicated
+    /// BB              - Corresponding MachineBasicBlock.
+    /// TrueBB / FalseBB- See AnalyzeBranch().
+    /// BrCond          - Conditions for end of block conditional branches.
+    /// Predicate       - Predicate used in the BB.
+    struct BBInfo {
+      bool IsDone          : 1;
+      bool IsBeingAnalyzed : 1;
+      bool IsAnalyzed      : 1;
+      bool IsEnqueued      : 1;
+      bool IsBrAnalyzable  : 1;
+      bool HasFallThrough  : 1;
+      bool IsUnpredicable  : 1;
+      bool CannotBeCopied  : 1;
+      bool ClobbersPred    : 1;
+      unsigned NonPredSize;
+      unsigned ExtraCost;
+      unsigned ExtraCost2;
+      MachineBasicBlock *BB;
+      MachineBasicBlock *TrueBB;
+      MachineBasicBlock *FalseBB;
+      SmallVector<MachineOperand, 4> BrCond;
+      SmallVector<MachineOperand, 4> Predicate;
+      BBInfo() : IsDone(false), IsBeingAnalyzed(false),
+                 IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
+                 HasFallThrough(false), IsUnpredicable(false),
+                 CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
+                 ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {}
+    };
+
+    /// IfcvtToken - Record information about pending if-conversions to attempt:
+    /// BBI             - Corresponding BBInfo.
+    /// Kind            - Type of block. See IfcvtKind.
+    /// NeedSubsumption - True if the to-be-predicated BB has already been
+    ///                   predicated.
+    /// NumDups      - Number of instructions that would be duplicated due
+    ///                   to this if-conversion. (For diamonds, the number of
+    ///                   identical instructions at the beginnings of both
+    ///                   paths).
+    /// NumDups2     - For diamonds, the number of identical instructions
+    ///                   at the ends of both paths.
+    struct IfcvtToken {
+      BBInfo &BBI;
+      IfcvtKind Kind;
+      bool NeedSubsumption;
+      unsigned NumDups;
+      unsigned NumDups2;
+      IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
+        : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
+    };
+
+    /// Roots - Basic blocks that do not have successors. These are the starting
+    /// points of Graph traversal.
+    std::vector<MachineBasicBlock*> Roots;
+
+    /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
+    /// basic block number.
+    std::vector<BBInfo> BBAnalysis;
+
+    const TargetLowering *TLI;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    const InstrItineraryData *InstrItins;
+    const MachineLoopInfo *MLI;
+    bool MadeChange;
+    int FnNum;
+  public:
+    static char ID;
+    IfConverter() : MachineFunctionPass(ID), FnNum(-1) {
+      initializeIfConverterPass(*PassRegistry::getPassRegistry());
+    }
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char *getPassName() const { return "If Converter"; }
+
+  private:
+    bool ReverseBranchCondition(BBInfo &BBI);
+    bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+                     float Prediction, float Confidence) const;
+    bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+                       bool FalseBranch, unsigned &Dups,
+                       float Prediction, float Confidence) const;
+    bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+                      unsigned &Dups1, unsigned &Dups2) const;
+    void ScanInstructions(BBInfo &BBI);
+    BBInfo &AnalyzeBlock(MachineBasicBlock *BB,
+                         std::vector<IfcvtToken*> &Tokens);
+    bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
+                             bool isTriangle = false, bool RevBranch = false);
+    void AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens);
+    void InvalidatePreds(MachineBasicBlock *BB);
+    void RemoveExtraEdges(BBInfo &BBI);
+    bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
+    bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+    bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+                          unsigned NumDups1, unsigned NumDups2);
+    void PredicateBlock(BBInfo &BBI,
+                        MachineBasicBlock::iterator E,
+                        SmallVectorImpl<MachineOperand> &Cond,
+                        SmallSet<unsigned, 4> &Redefs);
+    void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+                               SmallVectorImpl<MachineOperand> &Cond,
+                               SmallSet<unsigned, 4> &Redefs,
+                               bool IgnoreBr = false);
+    void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
+
+    bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
+                            unsigned Cycle, unsigned Extra,
+                            float Prediction, float Confidence) const {
+      return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
+                                                   Prediction, Confidence);
+    }
+
+    bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
+                            unsigned TCycle, unsigned TExtra,
+                            MachineBasicBlock &FBB,
+                            unsigned FCycle, unsigned FExtra,
+                            float Prediction, float Confidence) const {
+      return TCycle > 0 && FCycle > 0 &&
+        TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
+                                 Prediction, Confidence);
+    }
+
+    // blockAlwaysFallThrough - Block ends without a terminator.
+    bool blockAlwaysFallThrough(BBInfo &BBI) const {
+      return BBI.IsBrAnalyzable && BBI.TrueBB == NULL;
+    }
+
+    // IfcvtTokenCmp - Used to sort if-conversion candidates.
+    static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) {
+      int Incr1 = (C1->Kind == ICDiamond)
+        ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups;
+      int Incr2 = (C2->Kind == ICDiamond)
+        ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups;
+      if (Incr1 > Incr2)
+        return true;
+      else if (Incr1 == Incr2) {
+        // Favors subsumption.
+        if (C1->NeedSubsumption == false && C2->NeedSubsumption == true)
+          return true;
+        else if (C1->NeedSubsumption == C2->NeedSubsumption) {
+          // Favors diamond over triangle, etc.
+          if ((unsigned)C1->Kind < (unsigned)C2->Kind)
+            return true;
+          else if (C1->Kind == C2->Kind)
+            return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber();
+        }
+      }
+      return false;
+    }
+  };
+
+  char IfConverter::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
+
+FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
+
+bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
+  TLI = MF.getTarget().getTargetLowering();
+  TII = MF.getTarget().getInstrInfo();
+  TRI = MF.getTarget().getRegisterInfo();
+  MLI = &getAnalysis<MachineLoopInfo>();
+  InstrItins = MF.getTarget().getInstrItineraryData();
+  if (!TII) return false;
+
+  // Tail merge tend to expose more if-conversion opportunities.
+  BranchFolder BF(true);
+  bool BFChange = BF.OptimizeFunction(MF, TII,
+                                   MF.getTarget().getRegisterInfo(),
+                                   getAnalysisIfAvailable<MachineModuleInfo>());
+
+  DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum <<  ") \'"
+               << MF.getFunction()->getName() << "\'");
+
+  if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) {
+    DEBUG(dbgs() << " skipped\n");
+    return false;
+  }
+  DEBUG(dbgs() << "\n");
+
+  MF.RenumberBlocks();
+  BBAnalysis.resize(MF.getNumBlockIDs());
+
+  // Look for root nodes, i.e. blocks without successors.
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+    if (I->succ_empty())
+      Roots.push_back(I);
+
+  std::vector<IfcvtToken*> Tokens;
+  MadeChange = false;
+  unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle +
+    NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+  while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
+    // Do an initial analysis for each basic block and find all the potential
+    // candidates to perform if-conversion.
+    bool Change = false;
+    AnalyzeBlocks(MF, Tokens);
+    while (!Tokens.empty()) {
+      IfcvtToken *Token = Tokens.back();
+      Tokens.pop_back();
+      BBInfo &BBI = Token->BBI;
+      IfcvtKind Kind = Token->Kind;
+      unsigned NumDups = Token->NumDups;
+      unsigned NumDups2 = Token->NumDups2;
+
+      delete Token;
+
+      // If the block has been evicted out of the queue or it has already been
+      // marked dead (due to it being predicated), then skip it.
+      if (BBI.IsDone)
+        BBI.IsEnqueued = false;
+      if (!BBI.IsEnqueued)
+        continue;
+
+      BBI.IsEnqueued = false;
+
+      bool RetVal = false;
+      switch (Kind) {
+      default: assert(false && "Unexpected!");
+        break;
+      case ICSimple:
+      case ICSimpleFalse: {
+        bool isFalse = Kind == ICSimpleFalse;
+        if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
+        DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ?
+                                            " false" : "")
+                     << "): BB#" << BBI.BB->getNumber() << " ("
+                     << ((Kind == ICSimpleFalse)
+                         ? BBI.FalseBB->getNumber()
+                         : BBI.TrueBB->getNumber()) << ") ");
+        RetVal = IfConvertSimple(BBI, Kind);
+        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+        if (RetVal) {
+          if (isFalse) ++NumSimpleFalse;
+          else         ++NumSimple;
+        }
+       break;
+      }
+      case ICTriangle:
+      case ICTriangleRev:
+      case ICTriangleFalse:
+      case ICTriangleFRev: {
+        bool isFalse = Kind == ICTriangleFalse;
+        bool isRev   = (Kind == ICTriangleRev || Kind == ICTriangleFRev);
+        if (DisableTriangle && !isFalse && !isRev) break;
+        if (DisableTriangleR && !isFalse && isRev) break;
+        if (DisableTriangleF && isFalse && !isRev) break;
+        if (DisableTriangleFR && isFalse && isRev) break;
+        DEBUG(dbgs() << "Ifcvt (Triangle");
+        if (isFalse)
+          DEBUG(dbgs() << " false");
+        if (isRev)
+          DEBUG(dbgs() << " rev");
+        DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:"
+                     << BBI.TrueBB->getNumber() << ",F:"
+                     << BBI.FalseBB->getNumber() << ") ");
+        RetVal = IfConvertTriangle(BBI, Kind);
+        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+        if (RetVal) {
+          if (isFalse) {
+            if (isRev) ++NumTriangleFRev;
+            else       ++NumTriangleFalse;
+          } else {
+            if (isRev) ++NumTriangleRev;
+            else       ++NumTriangle;
+          }
+        }
+        break;
+      }
+      case ICDiamond: {
+        if (DisableDiamond) break;
+        DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
+                     << BBI.TrueBB->getNumber() << ",F:"
+                     << BBI.FalseBB->getNumber() << ") ");
+        RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
+        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+        if (RetVal) ++NumDiamonds;
+        break;
+      }
+      }
+
+      Change |= RetVal;
+
+      NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev +
+        NumTriangleFalse + NumTriangleFRev + NumDiamonds;
+      if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit)
+        break;
+    }
+
+    if (!Change)
+      break;
+    MadeChange |= Change;
+  }
+
+  // Delete tokens in case of early exit.
+  while (!Tokens.empty()) {
+    IfcvtToken *Token = Tokens.back();
+    Tokens.pop_back();
+    delete Token;
+  }
+
+  Tokens.clear();
+  Roots.clear();
+  BBAnalysis.clear();
+
+  if (MadeChange && IfCvtBranchFold) {
+    BranchFolder BF(false);
+    BF.OptimizeFunction(MF, TII,
+                        MF.getTarget().getRegisterInfo(),
+                        getAnalysisIfAvailable<MachineModuleInfo>());
+  }
+
+  MadeChange |= BFChange;
+  return MadeChange;
+}
+
+/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
+/// its 'true' successor.
+static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
+                                         MachineBasicBlock *TrueBB) {
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+         E = BB->succ_end(); SI != E; ++SI) {
+    MachineBasicBlock *SuccBB = *SI;
+    if (SuccBB != TrueBB)
+      return SuccBB;
+  }
+  return NULL;
+}
+
+/// ReverseBranchCondition - Reverse the condition of the end of the block
+/// branch. Swap block's 'true' and 'false' successors.
+bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+  DebugLoc dl;  // FIXME: this is nowhere
+  if (!TII->ReverseBranchCondition(BBI.BrCond)) {
+    TII->RemoveBranch(*BBI.BB);
+    TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
+    std::swap(BBI.TrueBB, BBI.FalseBB);
+    return true;
+  }
+  return false;
+}
+
+/// getNextBlock - Returns the next block in the function blocks ordering. If
+/// it is the end, returns NULL.
+static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
+  MachineFunction::iterator I = BB;
+  MachineFunction::iterator E = BB->getParent()->end();
+  if (++I == E)
+    return NULL;
+  return I;
+}
+
+/// ValidSimple - Returns true if the 'true' block (along with its
+/// predecessor) forms a valid simple shape for ifcvt. It also returns the
+/// number of instructions that the ifcvt would need to duplicate if performed
+/// in Dups.
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+                              float Prediction, float Confidence) const {
+  Dups = 0;
+  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+    return false;
+
+  if (TrueBBI.IsBrAnalyzable)
+    return false;
+
+  if (TrueBBI.BB->pred_size() > 1) {
+    if (TrueBBI.CannotBeCopied ||
+        !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
+                                        Prediction, Confidence))
+      return false;
+    Dups = TrueBBI.NonPredSize;
+  }
+
+  return true;
+}
+
+/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid triangle shape for ifcvt.
+/// If 'FalseBranch' is true, it checks if 'true' block's false branch
+/// branches to the 'false' block rather than the other way around. It also
+/// returns the number of instructions that the ifcvt would need to duplicate
+/// if performed in 'Dups'.
+bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
+                                bool FalseBranch, unsigned &Dups,
+                                float Prediction, float Confidence) const {
+  Dups = 0;
+  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
+    return false;
+
+  if (TrueBBI.BB->pred_size() > 1) {
+    if (TrueBBI.CannotBeCopied)
+      return false;
+
+    unsigned Size = TrueBBI.NonPredSize;
+    if (TrueBBI.IsBrAnalyzable) {
+      if (TrueBBI.TrueBB && TrueBBI.BrCond.empty())
+        // Ends with an unconditional branch. It will be removed.
+        --Size;
+      else {
+        MachineBasicBlock *FExit = FalseBranch
+          ? TrueBBI.TrueBB : TrueBBI.FalseBB;
+        if (FExit)
+          // Require a conditional branch
+          ++Size;
+      }
+    }
+    if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size,
+                                        Prediction, Confidence))
+      return false;
+    Dups = Size;
+  }
+
+  MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB;
+  if (!TExit && blockAlwaysFallThrough(TrueBBI)) {
+    MachineFunction::iterator I = TrueBBI.BB;
+    if (++I == TrueBBI.BB->getParent()->end())
+      return false;
+    TExit = I;
+  }
+  return TExit && TExit == FalseBBI.BB;
+}
+
+/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid diamond shape for ifcvt.
+bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+                               unsigned &Dups1, unsigned &Dups2) const {
+  Dups1 = Dups2 = 0;
+  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+      FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+    return false;
+
+  MachineBasicBlock *TT = TrueBBI.TrueBB;
+  MachineBasicBlock *FT = FalseBBI.TrueBB;
+
+  if (!TT && blockAlwaysFallThrough(TrueBBI))
+    TT = getNextBlock(TrueBBI.BB);
+  if (!FT && blockAlwaysFallThrough(FalseBBI))
+    FT = getNextBlock(FalseBBI.BB);
+  if (TT != FT)
+    return false;
+  if (TT == NULL && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+    return false;
+  if  (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+    return false;
+
+  // FIXME: Allow true block to have an early exit?
+  if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
+      (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
+    return false;
+
+  // Count duplicate instructions at the beginning of the true and false blocks.
+  MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+  MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+  MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+  MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+  while (TIB != TIE && FIB != FIE) {
+    // Skip dbg_value instructions. These do not count.
+    if (TIB->isDebugValue()) {
+      while (TIB != TIE && TIB->isDebugValue())
+        ++TIB;
+      if (TIB == TIE)
+        break;
+    }
+    if (FIB->isDebugValue()) {
+      while (FIB != FIE && FIB->isDebugValue())
+        ++FIB;
+      if (FIB == FIE)
+        break;
+    }
+    if (!TIB->isIdenticalTo(FIB))
+      break;
+    ++Dups1;
+    ++TIB;
+    ++FIB;
+  }
+
+  // Now, in preparation for counting duplicate instructions at the ends of the
+  // blocks, move the end iterators up past any branch instructions.
+  while (TIE != TIB) {
+    --TIE;
+    if (!TIE->getDesc().isBranch())
+      break;
+  }
+  while (FIE != FIB) {
+    --FIE;
+    if (!FIE->getDesc().isBranch())
+      break;
+  }
+
+  // If Dups1 includes all of a block, then don't count duplicate
+  // instructions at the end of the blocks.
+  if (TIB == TIE || FIB == FIE)
+    return true;
+
+  // Count duplicate instructions at the ends of the blocks.
+  while (TIE != TIB && FIE != FIB) {
+    // Skip dbg_value instructions. These do not count.
+    if (TIE->isDebugValue()) {
+      while (TIE != TIB && TIE->isDebugValue())
+        --TIE;
+      if (TIE == TIB)
+        break;
+    }
+    if (FIE->isDebugValue()) {
+      while (FIE != FIB && FIE->isDebugValue())
+        --FIE;
+      if (FIE == FIB)
+        break;
+    }
+    if (!TIE->isIdenticalTo(FIE))
+      break;
+    ++Dups2;
+    --TIE;
+    --FIE;
+  }
+
+  return true;
+}
+
+/// ScanInstructions - Scan all the instructions in the block to determine if
+/// the block is predicable. In most cases, that means all the instructions
+/// in the block are isPredicable(). Also checks if the block contains any
+/// instruction which can clobber a predicate (e.g. condition code register).
+/// If so, the block is not predicable unless it's the last instruction.
+void IfConverter::ScanInstructions(BBInfo &BBI) {
+  if (BBI.IsDone)
+    return;
+
+  bool AlreadyPredicated = BBI.Predicate.size() > 0;
+  // First analyze the end of BB branches.
+  BBI.TrueBB = BBI.FalseBB = NULL;
+  BBI.BrCond.clear();
+  BBI.IsBrAnalyzable =
+    !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+  BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == NULL;
+
+  if (BBI.BrCond.size()) {
+    // No false branch. This BB must end with a conditional branch and a
+    // fallthrough.
+    if (!BBI.FalseBB)
+      BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
+    if (!BBI.FalseBB) {
+      // Malformed bcc? True and false blocks are the same?
+      BBI.IsUnpredicable = true;
+      return;
+    }
+  }
+
+  // Then scan all the instructions.
+  BBI.NonPredSize = 0;
+  BBI.ExtraCost = 0;
+  BBI.ExtraCost2 = 0;
+  BBI.ClobbersPred = false;
+  for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
+       I != E; ++I) {
+    if (I->isDebugValue())
+      continue;
+
+    const TargetInstrDesc &TID = I->getDesc();
+    if (TID.isNotDuplicable())
+      BBI.CannotBeCopied = true;
+
+    bool isPredicated = TII->isPredicated(I);
+    bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch();
+
+    if (!isCondBr) {
+      if (!isPredicated) {
+        BBI.NonPredSize++;
+        unsigned ExtraPredCost = 0;
+        unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
+                                                  &ExtraPredCost);
+        if (NumCycles > 1)
+          BBI.ExtraCost += NumCycles-1;
+        BBI.ExtraCost2 += ExtraPredCost;
+      } else if (!AlreadyPredicated) {
+        // FIXME: This instruction is already predicated before the
+        // if-conversion pass. It's probably something like a conditional move.
+        // Mark this block unpredicable for now.
+        BBI.IsUnpredicable = true;
+        return;
+      }
+    }
+
+    if (BBI.ClobbersPred && !isPredicated) {
+      // Predicate modification instruction should end the block (except for
+      // already predicated instructions and end of block branches).
+      if (isCondBr) {
+        // A conditional branch is not predicable, but it may be eliminated.
+        continue;
+      }
+
+      // Predicate may have been modified, the subsequent (currently)
+      // unpredicated instructions cannot be correctly predicated.
+      BBI.IsUnpredicable = true;
+      return;
+    }
+
+    // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are
+    // still potentially predicable.
+    std::vector<MachineOperand> PredDefs;
+    if (TII->DefinesPredicate(I, PredDefs))
+      BBI.ClobbersPred = true;
+
+    if (!TII->isPredicable(I)) {
+      BBI.IsUnpredicable = true;
+      return;
+    }
+  }
+}
+
+/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
+/// predicated by the specified predicate.
+bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
+                                      SmallVectorImpl<MachineOperand> &Pred,
+                                      bool isTriangle, bool RevBranch) {
+  // If the block is dead or unpredicable, then it cannot be predicated.
+  if (BBI.IsDone || BBI.IsUnpredicable)
+    return false;
+
+  // If it is already predicated, check if its predicate subsumes the new
+  // predicate.
+  if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred))
+    return false;
+
+  if (BBI.BrCond.size()) {
+    if (!isTriangle)
+      return false;
+
+    // Test predicate subsumption.
+    SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
+    SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+    if (RevBranch) {
+      if (TII->ReverseBranchCondition(Cond))
+        return false;
+    }
+    if (TII->ReverseBranchCondition(RevPred) ||
+        !TII->SubsumesPredicate(Cond, RevPred))
+      return false;
+  }
+
+  return true;
+}
+
+/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
+/// the specified block. Record its successors and whether it looks like an
+/// if-conversion candidate.
+IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
+                                             std::vector<IfcvtToken*> &Tokens) {
+  BBInfo &BBI = BBAnalysis[BB->getNumber()];
+
+  if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed)
+    return BBI;
+
+  BBI.BB = BB;
+  BBI.IsBeingAnalyzed = true;
+
+  ScanInstructions(BBI);
+
+  // Unanalyzable or ends with fallthrough or unconditional branch.
+  if (!BBI.IsBrAnalyzable || BBI.BrCond.empty()) {
+    BBI.IsBeingAnalyzed = false;
+    BBI.IsAnalyzed = true;
+    return BBI;
+  }
+
+  // Do not ifcvt if either path is a back edge to the entry block.
+  if (BBI.TrueBB == BB || BBI.FalseBB == BB) {
+    BBI.IsBeingAnalyzed = false;
+    BBI.IsAnalyzed = true;
+    return BBI;
+  }
+
+  // Do not ifcvt if true and false fallthrough blocks are the same.
+  if (!BBI.FalseBB) {
+    BBI.IsBeingAnalyzed = false;
+    BBI.IsAnalyzed = true;
+    return BBI;
+  }
+
+  BBInfo &TrueBBI  = AnalyzeBlock(BBI.TrueBB, Tokens);
+  BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens);
+
+  if (TrueBBI.IsDone && FalseBBI.IsDone) {
+    BBI.IsBeingAnalyzed = false;
+    BBI.IsAnalyzed = true;
+    return BBI;
+  }
+
+  SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+  bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
+
+  unsigned Dups = 0;
+  unsigned Dups2 = 0;
+  bool TNeedSub = TrueBBI.Predicate.size() > 0;
+  bool FNeedSub = FalseBBI.Predicate.size() > 0;
+  bool Enqueued = false;
+  
+  // Try to predict the branch, using loop info to guide us.
+  // General heuristics are:
+  //   - backedge -> 90% taken
+  //   - early exit -> 20% taken
+  //   - branch predictor confidence -> 90%
+  float Prediction = 0.5f;
+  float Confidence = 0.9f;
+  MachineLoop *Loop = MLI->getLoopFor(BB);
+  if (Loop) {
+    if (TrueBBI.BB == Loop->getHeader())
+      Prediction = 0.9f;
+    else if (FalseBBI.BB == Loop->getHeader())
+      Prediction = 0.1f;
+
+    MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB);
+    MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB);
+    if (!TrueLoop || TrueLoop->getParentLoop() == Loop)
+      Prediction = 0.2f;
+    else if (!FalseLoop || FalseLoop->getParentLoop() == Loop)
+      Prediction = 0.8f;
+  }
+  
+  if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
+      MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
+                                       TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
+                         *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
+                                        FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
+                         Prediction, Confidence) &&
+      FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
+      FeasibilityAnalysis(FalseBBI, RevCond)) {
+    // Diamond:
+    //   EBB
+    //   / \_
+    //  |   |
+    // TBB FBB
+    //   \ /
+    //  TailBB
+    // Note TailBB can be empty.
+    Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups,
+                                    Dups2));
+    Enqueued = true;
+  }
+
+  if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) &&
+      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
+      FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
+    // Triangle:
+    //   EBB
+    //   | \_
+    //   |  |
+    //   | TBB
+    //   |  /
+    //   FBB
+    Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups));
+    Enqueued = true;
+  }
+
+  if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) &&
+      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
+      FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
+    Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
+    Enqueued = true;
+  }
+
+  if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) &&
+      MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
+      FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
+    // Simple (split, no rejoin):
+    //   EBB
+    //   | \_
+    //   |  |
+    //   | TBB---> exit
+    //   |
+    //   FBB
+    Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups));
+    Enqueued = true;
+  }
+
+  if (CanRevCond) {
+    // Try the other path...
+    if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
+                      1.0-Prediction, Confidence) &&
+        MeetIfcvtSizeLimit(*FalseBBI.BB,
+                           FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
+        FeasibilityAnalysis(FalseBBI, RevCond, true)) {
+      Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
+      Enqueued = true;
+    }
+
+    if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
+                      1.0-Prediction, Confidence) &&
+        MeetIfcvtSizeLimit(*FalseBBI.BB,
+                           FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
+        FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
+      Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
+      Enqueued = true;
+    }
+
+    if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) &&
+        MeetIfcvtSizeLimit(*FalseBBI.BB,
+                           FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
+        FeasibilityAnalysis(FalseBBI, RevCond)) {
+      Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
+      Enqueued = true;
+    }
+  }
+
+  BBI.IsEnqueued = Enqueued;
+  BBI.IsBeingAnalyzed = false;
+  BBI.IsAnalyzed = true;
+  return BBI;
+}
+
+/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
+/// candidates.
+void IfConverter::AnalyzeBlocks(MachineFunction &MF,
+                                std::vector<IfcvtToken*> &Tokens) {
+  std::set<MachineBasicBlock*> Visited;
+  for (unsigned i = 0, e = Roots.size(); i != e; ++i) {
+    for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited),
+           E = idf_ext_end(Roots[i], Visited); I != E; ++I) {
+      MachineBasicBlock *BB = *I;
+      AnalyzeBlock(BB, Tokens);
+    }
+  }
+
+  // Sort to favor more complex ifcvt scheme.
+  std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
+}
+
+/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
+/// that all the intervening blocks are empty (given BB can fall through to its
+/// next block).
+static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
+  MachineFunction::iterator PI = BB;
+  MachineFunction::iterator I = llvm::next(PI);
+  MachineFunction::iterator TI = ToBB;
+  MachineFunction::iterator E = BB->getParent()->end();
+  while (I != TI) {
+    // Check isSuccessor to avoid case where the next block is empty, but
+    // it's not a successor.
+    if (I == E || !I->empty() || !PI->isSuccessor(I))
+      return false;
+    PI = I++;
+  }
+  return true;
+}
+
+/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed
+/// to determine if it can be if-converted. If predecessor is already enqueued,
+/// dequeue it!
+void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
+  for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+         E = BB->pred_end(); PI != E; ++PI) {
+    BBInfo &PBBI = BBAnalysis[(*PI)->getNumber()];
+    if (PBBI.IsDone || PBBI.BB == BB)
+      continue;
+    PBBI.IsAnalyzed = false;
+    PBBI.IsEnqueued = false;
+  }
+}
+
+/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB.
+///
+static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
+                               const TargetInstrInfo *TII) {
+  DebugLoc dl;  // FIXME: this is nowhere
+  SmallVector<MachineOperand, 0> NoCond;
+  TII->InsertBranch(*BB, ToBB, NULL, NoCond, dl);
+}
+
+/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
+/// successors.
+void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
+  MachineBasicBlock *TBB = NULL, *FBB = NULL;
+  SmallVector<MachineOperand, 4> Cond;
+  if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond))
+    BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
+}
+
+/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are
+/// modeled as read + write (sort like two-address instructions). These
+/// routines track register liveness and add implicit uses to if-converted
+/// instructions to conform to the model.
+static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs,
+                           const TargetRegisterInfo *TRI) {
+  for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
+         E = BB->livein_end(); I != E; ++I) {
+    unsigned Reg = *I;
+    Redefs.insert(Reg);
+    for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+         *Subreg; ++Subreg)
+      Redefs.insert(*Subreg);
+  }
+}
+
+static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
+                             const TargetRegisterInfo *TRI,
+                             bool AddImpUse = false) {
+  SmallVector<unsigned, 4> Defs;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (MO.isDef())
+      Defs.push_back(Reg);
+    else if (MO.isKill()) {
+      Redefs.erase(Reg);
+      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+        Redefs.erase(*SR);
+    }
+  }
+  for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+    unsigned Reg = Defs[i];
+    if (Redefs.count(Reg)) {
+      if (AddImpUse)
+        // Treat predicated update as read + write.
+        MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+                                                true/*IsImp*/,false/*IsKill*/));
+    } else {
+      Redefs.insert(Reg);
+      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+        Redefs.insert(*SR);
+    }
+  }
+}
+
+static void UpdatePredRedefs(MachineBasicBlock::iterator I,
+                             MachineBasicBlock::iterator E,
+                             SmallSet<unsigned,4> &Redefs,
+                             const TargetRegisterInfo *TRI) {
+  while (I != E) {
+    UpdatePredRedefs(I, Redefs, TRI);
+    ++I;
+  }
+}
+
+/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
+///
+bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
+  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
+  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+  BBInfo *CvtBBI = &TrueBBI;
+  BBInfo *NextBBI = &FalseBBI;
+
+  SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+  if (Kind == ICSimpleFalse)
+    std::swap(CvtBBI, NextBBI);
+
+  if (CvtBBI->IsDone ||
+      (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+    // Something has changed. It's no longer safe to predicate this block.
+    BBI.IsAnalyzed = false;
+    CvtBBI->IsAnalyzed = false;
+    return false;
+  }
+
+  if (Kind == ICSimpleFalse)
+    if (TII->ReverseBranchCondition(Cond))
+      assert(false && "Unable to reverse branch condition!");
+
+  // Initialize liveins to the first BB. These are potentiall redefined by
+  // predicated instructions.
+  SmallSet<unsigned, 4> Redefs;
+  InitPredRedefs(CvtBBI->BB, Redefs, TRI);
+  InitPredRedefs(NextBBI->BB, Redefs, TRI);
+
+  if (CvtBBI->BB->pred_size() > 1) {
+    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+    // Copy instructions in the true block, predicate them, and add them to
+    // the entry block.
+    CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs);
+  } else {
+    PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
+
+    // Merge converted block into entry block.
+    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+    MergeBlocks(BBI, *CvtBBI);
+  }
+
+  bool IterIfcvt = true;
+  if (!canFallThroughTo(BBI.BB, NextBBI->BB)) {
+    InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+    BBI.HasFallThrough = false;
+    // Now ifcvt'd block will look like this:
+    // BB:
+    // ...
+    // t, f = cmp
+    // if t op
+    // b BBf
+    //
+    // We cannot further ifcvt this block because the unconditional branch
+    // will have to be predicated on the new condition, that will not be
+    // available if cmp executes.
+    IterIfcvt = false;
+  }
+
+  RemoveExtraEdges(BBI);
+
+  // Update block info. BB can be iteratively if-converted.
+  if (!IterIfcvt)
+    BBI.IsDone = true;
+  InvalidatePreds(BBI.BB);
+  CvtBBI->IsDone = true;
+
+  // FIXME: Must maintain LiveIns.
+  return true;
+}
+
+/// IfConvertTriangle - If convert a triangle sub-CFG.
+///
+bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
+  BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+  BBInfo *CvtBBI = &TrueBBI;
+  BBInfo *NextBBI = &FalseBBI;
+  DebugLoc dl;  // FIXME: this is nowhere
+
+  SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
+  if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+    std::swap(CvtBBI, NextBBI);
+
+  if (CvtBBI->IsDone ||
+      (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+    // Something has changed. It's no longer safe to predicate this block.
+    BBI.IsAnalyzed = false;
+    CvtBBI->IsAnalyzed = false;
+    return false;
+  }
+
+  if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
+    if (TII->ReverseBranchCondition(Cond))
+      assert(false && "Unable to reverse branch condition!");
+
+  if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
+    if (ReverseBranchCondition(*CvtBBI)) {
+      // BB has been changed, modify its predecessors (except for this
+      // one) so they don't get ifcvt'ed based on bad intel.
+      for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(),
+             E = CvtBBI->BB->pred_end(); PI != E; ++PI) {
+        MachineBasicBlock *PBB = *PI;
+        if (PBB == BBI.BB)
+          continue;
+        BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
+        if (PBBI.IsEnqueued) {
+          PBBI.IsAnalyzed = false;
+          PBBI.IsEnqueued = false;
+        }
+      }
+    }
+  }
+
+  // Initialize liveins to the first BB. These are potentially redefined by
+  // predicated instructions.
+  SmallSet<unsigned, 4> Redefs;
+  InitPredRedefs(CvtBBI->BB, Redefs, TRI);
+  InitPredRedefs(NextBBI->BB, Redefs, TRI);
+
+  bool HasEarlyExit = CvtBBI->FalseBB != NULL;
+  if (CvtBBI->BB->pred_size() > 1) {
+    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+    // Copy instructions in the true block, predicate them, and add them to
+    // the entry block.
+    CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true);
+  } else {
+    // Predicate the 'true' block after removing its branch.
+    CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
+    PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
+
+    // Now merge the entry of the triangle with the true block.
+    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+    MergeBlocks(BBI, *CvtBBI, false);
+  }
+
+  // If 'true' block has a 'false' successor, add an exit branch to it.
+  if (HasEarlyExit) {
+    SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
+                                           CvtBBI->BrCond.end());
+    if (TII->ReverseBranchCondition(RevCond))
+      assert(false && "Unable to reverse branch condition!");
+    TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl);
+    BBI.BB->addSuccessor(CvtBBI->FalseBB);
+  }
+
+  // Merge in the 'false' block if the 'false' block has no other
+  // predecessors. Otherwise, add an unconditional branch to 'false'.
+  bool FalseBBDead = false;
+  bool IterIfcvt = true;
+  bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB);
+  if (!isFallThrough) {
+    // Only merge them if the true block does not fallthrough to the false
+    // block. By not merging them, we make it possible to iteratively
+    // ifcvt the blocks.
+    if (!HasEarlyExit &&
+        NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough) {
+      MergeBlocks(BBI, *NextBBI);
+      FalseBBDead = true;
+    } else {
+      InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+      BBI.HasFallThrough = false;
+    }
+    // Mixed predicated and unpredicated code. This cannot be iteratively
+    // predicated.
+    IterIfcvt = false;
+  }
+
+  RemoveExtraEdges(BBI);
+
+  // Update block info. BB can be iteratively if-converted.
+  if (!IterIfcvt)
+    BBI.IsDone = true;
+  InvalidatePreds(BBI.BB);
+  CvtBBI->IsDone = true;
+  if (FalseBBDead)
+    NextBBI->IsDone = true;
+
+  // FIXME: Must maintain LiveIns.
+  return true;
+}
+
+/// IfConvertDiamond - If convert a diamond sub-CFG.
+///
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+                                   unsigned NumDups1, unsigned NumDups2) {
+  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
+  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+  MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+  // True block must fall through or end with an unanalyzable terminator.
+  if (!TailBB) {
+    if (blockAlwaysFallThrough(TrueBBI))
+      TailBB = FalseBBI.TrueBB;
+    assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+  }
+
+  if (TrueBBI.IsDone || FalseBBI.IsDone ||
+      TrueBBI.BB->pred_size() > 1 ||
+      FalseBBI.BB->pred_size() > 1) {
+    // Something has changed. It's no longer safe to predicate these blocks.
+    BBI.IsAnalyzed = false;
+    TrueBBI.IsAnalyzed = false;
+    FalseBBI.IsAnalyzed = false;
+    return false;
+  }
+
+  // Put the predicated instructions from the 'true' block before the
+  // instructions from the 'false' block, unless the true block would clobber
+  // the predicate, in which case, do the opposite.
+  BBInfo *BBI1 = &TrueBBI;
+  BBInfo *BBI2 = &FalseBBI;
+  SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+  if (TII->ReverseBranchCondition(RevCond))
+    assert(false && "Unable to reverse branch condition!");
+  SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
+  SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
+
+  // Figure out the more profitable ordering.
+  bool DoSwap = false;
+  if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
+    DoSwap = true;
+  else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
+    if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
+      DoSwap = true;
+  }
+  if (DoSwap) {
+    std::swap(BBI1, BBI2);
+    std::swap(Cond1, Cond2);
+  }
+
+  // Remove the conditional branch from entry to the blocks.
+  BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+
+  // Initialize liveins to the first BB. These are potentially redefined by
+  // predicated instructions.
+  SmallSet<unsigned, 4> Redefs;
+  InitPredRedefs(BBI1->BB, Redefs, TRI);
+
+  // Remove the duplicated instructions at the beginnings of both paths.
+  MachineBasicBlock::iterator DI1 = BBI1->BB->begin();
+  MachineBasicBlock::iterator DI2 = BBI2->BB->begin();
+  MachineBasicBlock::iterator DIE1 = BBI1->BB->end();
+  MachineBasicBlock::iterator DIE2 = BBI2->BB->end();
+  // Skip dbg_value instructions
+  while (DI1 != DIE1 && DI1->isDebugValue())
+    ++DI1;
+  while (DI2 != DIE2 && DI2->isDebugValue())
+    ++DI2;
+  BBI1->NonPredSize -= NumDups1;
+  BBI2->NonPredSize -= NumDups1;
+
+  // Skip past the dups on each side separately since there may be
+  // differing dbg_value entries.
+  for (unsigned i = 0; i < NumDups1; ++DI1) {
+    if (!DI1->isDebugValue())
+      ++i;
+  }
+  while (NumDups1 != 0) {
+    ++DI2;
+    if (!DI2->isDebugValue())
+      --NumDups1;
+  }
+
+  UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI);
+  BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
+  BBI2->BB->erase(BBI2->BB->begin(), DI2);
+
+  // Predicate the 'true' block after removing its branch.
+  BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+  DI1 = BBI1->BB->end();
+  for (unsigned i = 0; i != NumDups2; ) {
+    // NumDups2 only counted non-dbg_value instructions, so this won't
+    // run off the head of the list.
+    assert (DI1 != BBI1->BB->begin());
+    --DI1;
+    // skip dbg_value instructions
+    if (!DI1->isDebugValue())
+      ++i;
+  }
+  BBI1->BB->erase(DI1, BBI1->BB->end());
+  PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs);
+
+  // Predicate the 'false' block.
+  BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
+  DI2 = BBI2->BB->end();
+  while (NumDups2 != 0) {
+    // NumDups2 only counted non-dbg_value instructions, so this won't
+    // run off the head of the list.
+    assert (DI2 != BBI2->BB->begin());
+    --DI2;
+    // skip dbg_value instructions
+    if (!DI2->isDebugValue())
+      --NumDups2;
+  }
+  PredicateBlock(*BBI2, DI2, *Cond2, Redefs);
+
+  // Merge the true block into the entry of the diamond.
+  MergeBlocks(BBI, *BBI1, TailBB == 0);
+  MergeBlocks(BBI, *BBI2, TailBB == 0);
+
+  // If the if-converted block falls through or unconditionally branches into
+  // the tail block, and the tail block does not have other predecessors, then
+  // fold the tail block in as well. Otherwise, unless it falls through to the
+  // tail, add a unconditional branch to it.
+  if (TailBB) {
+    BBInfo TailBBI = BBAnalysis[TailBB->getNumber()];
+    bool CanMergeTail = !TailBBI.HasFallThrough;
+    // There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
+    // check if there are any other predecessors besides those.
+    unsigned NumPreds = TailBB->pred_size();
+    if (NumPreds > 1)
+      CanMergeTail = false;
+    else if (NumPreds == 1 && CanMergeTail) {
+      MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
+      if (*PI != BBI1->BB && *PI != BBI2->BB)
+        CanMergeTail = false;
+    }
+    if (CanMergeTail) {
+      MergeBlocks(BBI, TailBBI);
+      TailBBI.IsDone = true;
+    } else {
+      BBI.BB->addSuccessor(TailBB);
+      InsertUncondBranch(BBI.BB, TailBB, TII);
+      BBI.HasFallThrough = false;
+    }
+  }
+
+  // RemoveExtraEdges won't work if the block has an unanalyzable branch,
+  // which can happen here if TailBB is unanalyzable and is merged, so
+  // explicitly remove BBI1 and BBI2 as successors.
+  BBI.BB->removeSuccessor(BBI1->BB);
+  BBI.BB->removeSuccessor(BBI2->BB);
+  RemoveExtraEdges(BBI);
+
+  // Update block info.
+  BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+  InvalidatePreds(BBI.BB);
+
+  // FIXME: Must maintain LiveIns.
+  return true;
+}
+
+/// PredicateBlock - Predicate instructions from the start of the block to the
+/// specified end with the specified condition.
+void IfConverter::PredicateBlock(BBInfo &BBI,
+                                 MachineBasicBlock::iterator E,
+                                 SmallVectorImpl<MachineOperand> &Cond,
+                                 SmallSet<unsigned, 4> &Redefs) {
+  for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
+    if (I->isDebugValue() || TII->isPredicated(I))
+      continue;
+    if (!TII->PredicateInstruction(I, Cond)) {
+#ifndef NDEBUG
+      dbgs() << "Unable to predicate " << *I << "!\n";
+#endif
+      llvm_unreachable(0);
+    }
+
+    // If the predicated instruction now redefines a register as the result of
+    // if-conversion, add an implicit kill.
+    UpdatePredRedefs(I, Redefs, TRI, true);
+  }
+
+  std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
+
+  BBI.IsAnalyzed = false;
+  BBI.NonPredSize = 0;
+
+  ++NumIfConvBBs;
+}
+
+/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
+/// the destination block. Skip end of block branches if IgnoreBr is true.
+void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
+                                        SmallVectorImpl<MachineOperand> &Cond,
+                                        SmallSet<unsigned, 4> &Redefs,
+                                        bool IgnoreBr) {
+  MachineFunction &MF = *ToBBI.BB->getParent();
+
+  for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
+         E = FromBBI.BB->end(); I != E; ++I) {
+    const TargetInstrDesc &TID = I->getDesc();
+    // Do not copy the end of the block branches.
+    if (IgnoreBr && TID.isBranch())
+      break;
+
+    MachineInstr *MI = MF.CloneMachineInstr(I);
+    ToBBI.BB->insert(ToBBI.BB->end(), MI);
+    ToBBI.NonPredSize++;
+    unsigned ExtraPredCost = 0;
+    unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost);
+    if (NumCycles > 1)
+      ToBBI.ExtraCost += NumCycles-1;
+    ToBBI.ExtraCost2 += ExtraPredCost;
+
+    if (!TII->isPredicated(I) && !MI->isDebugValue()) {
+      if (!TII->PredicateInstruction(MI, Cond)) {
+#ifndef NDEBUG
+        dbgs() << "Unable to predicate " << *I << "!\n";
+#endif
+        llvm_unreachable(0);
+      }
+    }
+
+    // If the predicated instruction now redefines a register as the result of
+    // if-conversion, add an implicit kill.
+    UpdatePredRedefs(MI, Redefs, TRI, true);
+  }
+
+  if (!IgnoreBr) {
+    std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+                                           FromBBI.BB->succ_end());
+    MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+    MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+    for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+      MachineBasicBlock *Succ = Succs[i];
+      // Fallthrough edge can't be transferred.
+      if (Succ == FallThrough)
+        continue;
+      ToBBI.BB->addSuccessor(Succ);
+    }
+  }
+
+  std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+            std::back_inserter(ToBBI.Predicate));
+  std::copy(Cond.begin(), Cond.end(), std::back_inserter(ToBBI.Predicate));
+
+  ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+  ToBBI.IsAnalyzed = false;
+
+  ++NumDupBBs;
+}
+
+/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
+/// This will leave FromBB as an empty block, so remove all of its
+/// successor edges except for the fall-through edge.  If AddEdges is true,
+/// i.e., when FromBBI's branch is being moved, add those successor edges to
+/// ToBBI.
+void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
+  ToBBI.BB->splice(ToBBI.BB->end(),
+                   FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
+
+  std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+                                         FromBBI.BB->succ_end());
+  MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+  MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+
+  for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+    MachineBasicBlock *Succ = Succs[i];
+    // Fallthrough edge can't be transferred.
+    if (Succ == FallThrough)
+      continue;
+    FromBBI.BB->removeSuccessor(Succ);
+    if (AddEdges)
+      ToBBI.BB->addSuccessor(Succ);
+  }
+
+  // Now FromBBI always falls through to the next block!
+  if (NBB && !FromBBI.BB->isSuccessor(NBB))
+    FromBBI.BB->addSuccessor(NBB);
+
+  std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
+            std::back_inserter(ToBBI.Predicate));
+  FromBBI.Predicate.clear();
+
+  ToBBI.NonPredSize += FromBBI.NonPredSize;
+  ToBBI.ExtraCost += FromBBI.ExtraCost;
+  ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
+  FromBBI.NonPredSize = 0;
+  FromBBI.ExtraCost = 0;
+  FromBBI.ExtraCost2 = 0;
+
+  ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
+  ToBBI.HasFallThrough = FromBBI.HasFallThrough;
+  ToBBI.IsAnalyzed = false;
+  FromBBI.IsAnalyzed = false;
+}
diff --git a/final/lib/CodeGen/InlineSpiller.cpp b/final/lib/CodeGen/InlineSpiller.cpp
new file mode 100644
index 00000000000..34ae3ec2f3e
--- /dev/null
+++ b/final/lib/CodeGen/InlineSpiller.cpp
@@ -0,0 +1,439 @@
+//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The inline spiller modifies the machine function directly instead of
+// inserting spills and restores in VirtRegMap.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "Spiller.h"
+#include "LiveRangeEdit.h"
+#include "VirtRegMap.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+VerifySpills("verify-spills", cl::desc("Verify after each spill/split"));
+
+namespace {
+class InlineSpiller : public Spiller {
+  MachineFunctionPass &pass_;
+  MachineFunction &mf_;
+  LiveIntervals &lis_;
+  LiveStacks &lss_;
+  AliasAnalysis *aa_;
+  VirtRegMap &vrm_;
+  MachineFrameInfo &mfi_;
+  MachineRegisterInfo &mri_;
+  const TargetInstrInfo &tii_;
+  const TargetRegisterInfo &tri_;
+  const BitVector reserved_;
+
+  // Variables that are valid during spill(), but used by multiple methods.
+  LiveRangeEdit *edit_;
+  const TargetRegisterClass *rc_;
+  int stackSlot_;
+
+  // Values that failed to remat at some point.
+  SmallPtrSet<VNInfo*, 8> usedValues_;
+
+  ~InlineSpiller() {}
+
+public:
+  InlineSpiller(MachineFunctionPass &pass,
+                MachineFunction &mf,
+                VirtRegMap &vrm)
+    : pass_(pass),
+      mf_(mf),
+      lis_(pass.getAnalysis<LiveIntervals>()),
+      lss_(pass.getAnalysis<LiveStacks>()),
+      aa_(&pass.getAnalysis<AliasAnalysis>()),
+      vrm_(vrm),
+      mfi_(*mf.getFrameInfo()),
+      mri_(mf.getRegInfo()),
+      tii_(*mf.getTarget().getInstrInfo()),
+      tri_(*mf.getTarget().getRegisterInfo()),
+      reserved_(tri_.getReservedRegs(mf_)) {}
+
+  void spill(LiveInterval *li,
+             SmallVectorImpl<LiveInterval*> &newIntervals,
+             const SmallVectorImpl<LiveInterval*> &spillIs);
+
+  void spill(LiveRangeEdit &);
+
+private:
+  bool reMaterializeFor(MachineBasicBlock::iterator MI);
+  void reMaterializeAll();
+
+  bool coalesceStackAccess(MachineInstr *MI);
+  bool foldMemoryOperand(MachineBasicBlock::iterator MI,
+                         const SmallVectorImpl<unsigned> &Ops,
+                         MachineInstr *LoadMI = 0);
+  void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
+  void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
+};
+}
+
+namespace llvm {
+Spiller *createInlineSpiller(MachineFunctionPass &pass,
+                             MachineFunction &mf,
+                             VirtRegMap &vrm) {
+  if (VerifySpills)
+    mf.verify(&pass, "When creating inline spiller");
+  return new InlineSpiller(pass, mf, vrm);
+}
+}
+
+/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
+bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) {
+  SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex();
+  VNInfo *OrigVNI = edit_->getParent().getVNInfoAt(UseIdx);
+
+  if (!OrigVNI) {
+    DEBUG(dbgs() << "\tadding <undef> flags: ");
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg())
+        MO.setIsUndef();
+    }
+    DEBUG(dbgs() << UseIdx << '\t' << *MI);
+    return true;
+  }
+
+  LiveRangeEdit::Remat RM(OrigVNI);
+  if (!edit_->canRematerializeAt(RM, UseIdx, false, lis_)) {
+    usedValues_.insert(OrigVNI);
+    DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
+    return false;
+  }
+
+  // If the instruction also writes edit_->getReg(), it had better not require
+  // the same register for uses and defs.
+  bool Reads, Writes;
+  SmallVector<unsigned, 8> Ops;
+  tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit_->getReg(), &Ops);
+  if (Writes) {
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(Ops[i]);
+      if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) {
+        usedValues_.insert(OrigVNI);
+        DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
+        return false;
+      }
+    }
+  }
+
+  // Before rematerializing into a register for a single instruction, try to
+  // fold a load into the instruction. That avoids allocating a new register.
+  if (RM.OrigMI->getDesc().canFoldAsLoad() &&
+      foldMemoryOperand(MI, Ops, RM.OrigMI)) {
+    edit_->markRematerialized(RM.ParentVNI);
+    return true;
+  }
+
+  // Alocate a new register for the remat.
+  LiveInterval &NewLI = edit_->create(mri_, lis_, vrm_);
+  NewLI.markNotSpillable();
+
+  // Rematting for a copy: Set allocation hint to be the destination register.
+  if (MI->isCopy())
+    mri_.setRegAllocationHint(NewLI.reg, 0, MI->getOperand(0).getReg());
+
+  // Finally we can rematerialize OrigMI before MI.
+  SlotIndex DefIdx = edit_->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
+                                            lis_, tii_, tri_);
+  DEBUG(dbgs() << "\tremat:  " << DefIdx << '\t'
+               << *lis_.getInstructionFromIndex(DefIdx));
+
+  // Replace operands
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(Ops[i]);
+    if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg()) {
+      MO.setReg(NewLI.reg);
+      MO.setIsKill();
+    }
+  }
+  DEBUG(dbgs() << "\t        " << UseIdx << '\t' << *MI);
+
+  VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, lis_.getVNInfoAllocator());
+  NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI));
+  DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+  return true;
+}
+
+/// reMaterializeAll - Try to rematerialize as many uses as possible,
+/// and trim the live ranges after.
+void InlineSpiller::reMaterializeAll() {
+  // Do a quick scan of the interval values to find if any are remattable.
+  if (!edit_->anyRematerializable(lis_, tii_, aa_))
+    return;
+
+  usedValues_.clear();
+
+  // Try to remat before all uses of edit_->getReg().
+  bool anyRemat = false;
+  for (MachineRegisterInfo::use_nodbg_iterator
+       RI = mri_.use_nodbg_begin(edit_->getReg());
+       MachineInstr *MI = RI.skipInstruction();)
+     anyRemat |= reMaterializeFor(MI);
+
+  if (!anyRemat)
+    return;
+
+  // Remove any values that were completely rematted.
+  bool anyRemoved = false;
+  for (LiveInterval::vni_iterator I = edit_->getParent().vni_begin(),
+       E = edit_->getParent().vni_end(); I != E; ++I) {
+    VNInfo *VNI = *I;
+    if (VNI->hasPHIKill() || !edit_->didRematerialize(VNI) ||
+        usedValues_.count(VNI))
+      continue;
+    MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
+    DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI);
+    lis_.RemoveMachineInstrFromMaps(DefMI);
+    vrm_.RemoveMachineInstrFromMaps(DefMI);
+    DefMI->eraseFromParent();
+    VNI->def = SlotIndex();
+    anyRemoved = true;
+  }
+
+  if (!anyRemoved)
+    return;
+
+  // Removing values may cause debug uses where parent is not live.
+  for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(edit_->getReg());
+       MachineInstr *MI = RI.skipInstruction();) {
+    if (!MI->isDebugValue())
+      continue;
+    // Try to preserve the debug value if parent is live immediately after it.
+    MachineBasicBlock::iterator NextMI = MI;
+    ++NextMI;
+    if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) {
+      SlotIndex Idx = lis_.getInstructionIndex(NextMI);
+      VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx);
+      if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI)))
+        continue;
+    }
+    DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI);
+    MI->eraseFromParent();
+  }
+}
+
+/// If MI is a load or store of stackSlot_, it can be removed.
+bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) {
+  int FI = 0;
+  unsigned reg;
+  if (!(reg = tii_.isLoadFromStackSlot(MI, FI)) &&
+      !(reg = tii_.isStoreToStackSlot(MI, FI)))
+    return false;
+
+  // We have a stack access. Is it the right register and slot?
+  if (reg != edit_->getReg() || FI != stackSlot_)
+    return false;
+
+  DEBUG(dbgs() << "Coalescing stack access: " << *MI);
+  lis_.RemoveMachineInstrFromMaps(MI);
+  MI->eraseFromParent();
+  return true;
+}
+
+/// foldMemoryOperand - Try folding stack slot references in Ops into MI.
+/// @param MI     Instruction using or defining the current register.
+/// @param Ops    Operand indices from readsWritesVirtualRegister().
+/// @param LoadMI Load instruction to use instead of stack slot when non-null.
+/// @return       True on success, and MI will be erased.
+bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
+                                      const SmallVectorImpl<unsigned> &Ops,
+                                      MachineInstr *LoadMI) {
+  // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
+  // operands.
+  SmallVector<unsigned, 8> FoldOps;
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    unsigned Idx = Ops[i];
+    MachineOperand &MO = MI->getOperand(Idx);
+    if (MO.isImplicit())
+      continue;
+    // FIXME: Teach targets to deal with subregs.
+    if (MO.getSubReg())
+      return false;
+    // We cannot fold a load instruction into a def.
+    if (LoadMI && MO.isDef())
+      return false;
+    // Tied use operands should not be passed to foldMemoryOperand.
+    if (!MI->isRegTiedToDefOperand(Idx))
+      FoldOps.push_back(Idx);
+  }
+
+  MachineInstr *FoldMI =
+                LoadMI ? tii_.foldMemoryOperand(MI, FoldOps, LoadMI)
+                       : tii_.foldMemoryOperand(MI, FoldOps, stackSlot_);
+  if (!FoldMI)
+    return false;
+  lis_.ReplaceMachineInstrInMaps(MI, FoldMI);
+  if (!LoadMI)
+    vrm_.addSpillSlotUse(stackSlot_, FoldMI);
+  MI->eraseFromParent();
+  DEBUG(dbgs() << "\tfolded: " << *FoldMI);
+  return true;
+}
+
+/// insertReload - Insert a reload of NewLI.reg before MI.
+void InlineSpiller::insertReload(LiveInterval &NewLI,
+                                 MachineBasicBlock::iterator MI) {
+  MachineBasicBlock &MBB = *MI->getParent();
+  SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex();
+  tii_.loadRegFromStackSlot(MBB, MI, NewLI.reg, stackSlot_, rc_, &tri_);
+  --MI; // Point to load instruction.
+  SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
+  vrm_.addSpillSlotUse(stackSlot_, MI);
+  DEBUG(dbgs() << "\treload:  " << LoadIdx << '\t' << *MI);
+  VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0,
+                                       lis_.getVNInfoAllocator());
+  NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
+}
+
+/// insertSpill - Insert a spill of NewLI.reg after MI.
+void InlineSpiller::insertSpill(LiveInterval &NewLI,
+                                MachineBasicBlock::iterator MI) {
+  MachineBasicBlock &MBB = *MI->getParent();
+
+  // Get the defined value. It could be an early clobber so keep the def index.
+  SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex();
+  VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx);
+  assert(VNI && VNI->def.getDefIndex() == Idx && "Inconsistent VNInfo");
+  Idx = VNI->def;
+
+  tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_);
+  --MI; // Point to store instruction.
+  SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
+  vrm_.addSpillSlotUse(stackSlot_, MI);
+  DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
+  VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, lis_.getVNInfoAllocator());
+  NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
+}
+
+void InlineSpiller::spill(LiveInterval *li,
+                          SmallVectorImpl<LiveInterval*> &newIntervals,
+                          const SmallVectorImpl<LiveInterval*> &spillIs) {
+  LiveRangeEdit edit(*li, newIntervals, &spillIs);
+  spill(edit);
+  if (VerifySpills)
+    mf_.verify(&pass_, "After inline spill");
+}
+
+void InlineSpiller::spill(LiveRangeEdit &edit) {
+  edit_ = &edit;
+  assert(!TargetRegisterInfo::isStackSlot(edit.getReg())
+         && "Trying to spill a stack slot.");
+  DEBUG(dbgs() << "Inline spilling "
+               << mri_.getRegClass(edit.getReg())->getName()
+               << ':' << edit.getParent() << "\nFrom original "
+               << PrintReg(vrm_.getOriginal(edit.getReg())) << '\n');
+  assert(edit.getParent().isSpillable() &&
+         "Attempting to spill already spilled value.");
+
+  reMaterializeAll();
+
+  // Remat may handle everything.
+  if (edit_->getParent().empty())
+    return;
+
+  rc_ = mri_.getRegClass(edit.getReg());
+
+  // Share a stack slot among all descendants of Orig.
+  unsigned Orig = vrm_.getOriginal(edit.getReg());
+  stackSlot_ = vrm_.getStackSlot(Orig);
+  if (stackSlot_ == VirtRegMap::NO_STACK_SLOT)
+    stackSlot_ = vrm_.assignVirt2StackSlot(Orig);
+
+  if (Orig != edit.getReg())
+    vrm_.assignVirt2StackSlot(edit.getReg(), stackSlot_);
+
+  // Update LiveStacks now that we are committed to spilling.
+  LiveInterval &stacklvr = lss_.getOrCreateInterval(stackSlot_, rc_);
+  if (!stacklvr.hasAtLeastOneValue())
+    stacklvr.getNextValue(SlotIndex(), 0, lss_.getVNInfoAllocator());
+  stacklvr.MergeRangesInAsValue(edit_->getParent(), stacklvr.getValNumInfo(0));
+
+  // Iterate over instructions using register.
+  for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(edit.getReg());
+       MachineInstr *MI = RI.skipInstruction();) {
+
+    // Debug values are not allowed to affect codegen.
+    if (MI->isDebugValue()) {
+      // Modify DBG_VALUE now that the value is in a spill slot.
+      uint64_t Offset = MI->getOperand(1).getImm();
+      const MDNode *MDPtr = MI->getOperand(2).getMetadata();
+      DebugLoc DL = MI->getDebugLoc();
+      if (MachineInstr *NewDV = tii_.emitFrameIndexDebugValue(mf_, stackSlot_,
+                                                           Offset, MDPtr, DL)) {
+        DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+        MachineBasicBlock *MBB = MI->getParent();
+        MBB->insert(MBB->erase(MI), NewDV);
+      } else {
+        DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+        MI->eraseFromParent();
+      }
+      continue;
+    }
+
+    // Stack slot accesses may coalesce away.
+    if (coalesceStackAccess(MI))
+      continue;
+
+    // Analyze instruction.
+    bool Reads, Writes;
+    SmallVector<unsigned, 8> Ops;
+    tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit.getReg(), &Ops);
+
+    // Attempt to fold memory ops.
+    if (foldMemoryOperand(MI, Ops))
+      continue;
+
+    // Allocate interval around instruction.
+    // FIXME: Infer regclass from instruction alone.
+    LiveInterval &NewLI = edit.create(mri_, lis_, vrm_);
+    NewLI.markNotSpillable();
+
+    if (Reads)
+      insertReload(NewLI, MI);
+
+    // Rewrite instruction operands.
+    bool hasLiveDef = false;
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(Ops[i]);
+      MO.setReg(NewLI.reg);
+      if (MO.isUse()) {
+        if (!MI->isRegTiedToDefOperand(Ops[i]))
+          MO.setIsKill();
+      } else {
+        if (!MO.isDead())
+          hasLiveDef = true;
+      }
+    }
+
+    // FIXME: Use a second vreg if instruction has no tied ops.
+    if (Writes && hasLiveDef)
+      insertSpill(NewLI, MI);
+
+    DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+  }
+}
diff --git a/final/lib/CodeGen/IntrinsicLowering.cpp b/final/lib/CodeGen/IntrinsicLowering.cpp
new file mode 100644
index 00000000000..3861ddadf65
--- /dev/null
+++ b/final/lib/CodeGen/IntrinsicLowering.cpp
@@ -0,0 +1,564 @@
+//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IntrinsicLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+template <class ArgIt>
+static void EnsureFunctionExists(Module &M, const char *Name,
+                                 ArgIt ArgBegin, ArgIt ArgEnd,
+                                 const Type *RetTy) {
+  // Insert a correctly-typed definition now.
+  std::vector<const Type *> ParamTys;
+  for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+    ParamTys.push_back(I->getType());
+  M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
+}
+
+static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
+                                    const char *FName,
+                                    const char *DName, const char *LDName) {
+  // Insert definitions for all the floating point types.
+  switch((int)Fn->arg_begin()->getType()->getTypeID()) {
+  case Type::FloatTyID:
+    EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
+                         Type::getFloatTy(M.getContext()));
+    break;
+  case Type::DoubleTyID:
+    EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
+                         Type::getDoubleTy(M.getContext()));
+    break;
+  case Type::X86_FP80TyID:
+  case Type::FP128TyID:
+  case Type::PPC_FP128TyID:
+    EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(),
+                         Fn->arg_begin()->getType());
+    break;
+  }
+}
+
+/// ReplaceCallWith - This function is used when we want to lower an intrinsic
+/// call to a call of an external function.  This handles hard cases such as
+/// when there was already a prototype for the external function, and if that
+/// prototype doesn't match the arguments we expect to pass in.
+template <class ArgIt>
+static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
+                                 ArgIt ArgBegin, ArgIt ArgEnd,
+                                 const Type *RetTy) {
+  // If we haven't already looked up this function, check to see if the
+  // program already contains a function with this name.
+  Module *M = CI->getParent()->getParent()->getParent();
+  // Get or insert the definition now.
+  std::vector<const Type *> ParamTys;
+  for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+    ParamTys.push_back((*I)->getType());
+  Constant* FCache = M->getOrInsertFunction(NewFn,
+                                  FunctionType::get(RetTy, ParamTys, false));
+
+  IRBuilder<> Builder(CI->getParent(), CI);
+  SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
+  CallInst *NewCI = Builder.CreateCall(FCache, Args.begin(), Args.end());
+  NewCI->setName(CI->getName());
+  if (!CI->use_empty())
+    CI->replaceAllUsesWith(NewCI);
+  return NewCI;
+}
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                         !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
+#endif
+
+void IntrinsicLowering::AddPrototypes(Module &M) {
+  LLVMContext &Context = M.getContext();
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    if (I->isDeclaration() && !I->use_empty())
+      switch (I->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::setjmp:
+        EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
+                             Type::getInt32Ty(M.getContext()));
+        break;
+      case Intrinsic::longjmp:
+        EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
+                             Type::getVoidTy(M.getContext()));
+        break;
+      case Intrinsic::siglongjmp:
+        EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
+                             Type::getVoidTy(M.getContext()));
+        break;
+      case Intrinsic::memcpy:
+        M.getOrInsertFunction("memcpy",
+          Type::getInt8PtrTy(Context),
+                              Type::getInt8PtrTy(Context), 
+                              Type::getInt8PtrTy(Context), 
+                              TD.getIntPtrType(Context), (Type *)0);
+        break;
+      case Intrinsic::memmove:
+        M.getOrInsertFunction("memmove",
+          Type::getInt8PtrTy(Context),
+                              Type::getInt8PtrTy(Context), 
+                              Type::getInt8PtrTy(Context), 
+                              TD.getIntPtrType(Context), (Type *)0);
+        break;
+      case Intrinsic::memset:
+        M.getOrInsertFunction("memset",
+          Type::getInt8PtrTy(Context),
+                              Type::getInt8PtrTy(Context), 
+                              Type::getInt32Ty(M.getContext()), 
+                              TD.getIntPtrType(Context), (Type *)0);
+        break;
+      case Intrinsic::sqrt:
+        EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
+        break;
+      case Intrinsic::sin:
+        EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl");
+        break;
+      case Intrinsic::cos:
+        EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl");
+        break;
+      case Intrinsic::pow:
+        EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl");
+        break;
+      case Intrinsic::log:
+        EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl");
+        break;
+      case Intrinsic::log2:
+        EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l");
+        break;
+      case Intrinsic::log10:
+        EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l");
+        break;
+      case Intrinsic::exp:
+        EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl");
+        break;
+      case Intrinsic::exp2:
+        EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l");
+        break;
+      }
+}
+
+/// LowerBSWAP - Emit the code to lower bswap of V before the specified
+/// instruction IP.
+static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
+  assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!");
+
+  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+  
+  IRBuilder<> Builder(IP->getParent(), IP);
+
+  switch(BitSize) {
+  default: llvm_unreachable("Unhandled type size of value to byteswap!");
+  case 16: {
+    Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+                                    "bswap.2");
+    Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+                                     "bswap.1");
+    V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16");
+    break;
+  }
+  case 32: {
+    Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+                                    "bswap.4");
+    Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+                                    "bswap.3");
+    Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+                                     "bswap.2");
+    Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24),
+                                     "bswap.1");
+    Tmp3 = Builder.CreateAnd(Tmp3,
+                         ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000),
+                             "bswap.and3");
+    Tmp2 = Builder.CreateAnd(Tmp2,
+                           ConstantInt::get(Type::getInt32Ty(Context), 0xFF00),
+                             "bswap.and2");
+    Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1");
+    Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2");
+    V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32");
+    break;
+  }
+  case 64: {
+    Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56),
+                                    "bswap.8");
+    Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40),
+                                    "bswap.7");
+    Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24),
+                                    "bswap.6");
+    Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8),
+                                    "bswap.5");
+    Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8),
+                                     "bswap.4");
+    Value* Tmp3 = Builder.CreateLShr(V, 
+                                     ConstantInt::get(V->getType(), 24),
+                                     "bswap.3");
+    Value* Tmp2 = Builder.CreateLShr(V, 
+                                     ConstantInt::get(V->getType(), 40),
+                                     "bswap.2");
+    Value* Tmp1 = Builder.CreateLShr(V, 
+                                     ConstantInt::get(V->getType(), 56),
+                                     "bswap.1");
+    Tmp7 = Builder.CreateAnd(Tmp7,
+                             ConstantInt::get(Type::getInt64Ty(Context),
+                                              0xFF000000000000ULL),
+                             "bswap.and7");
+    Tmp6 = Builder.CreateAnd(Tmp6,
+                             ConstantInt::get(Type::getInt64Ty(Context),
+                                              0xFF0000000000ULL),
+                             "bswap.and6");
+    Tmp5 = Builder.CreateAnd(Tmp5,
+                        ConstantInt::get(Type::getInt64Ty(Context),
+                             0xFF00000000ULL),
+                             "bswap.and5");
+    Tmp4 = Builder.CreateAnd(Tmp4,
+                        ConstantInt::get(Type::getInt64Ty(Context),
+                             0xFF000000ULL),
+                             "bswap.and4");
+    Tmp3 = Builder.CreateAnd(Tmp3,
+                             ConstantInt::get(Type::getInt64Ty(Context),
+                             0xFF0000ULL),
+                             "bswap.and3");
+    Tmp2 = Builder.CreateAnd(Tmp2,
+                             ConstantInt::get(Type::getInt64Ty(Context),
+                             0xFF00ULL),
+                             "bswap.and2");
+    Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1");
+    Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2");
+    Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3");
+    Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4");
+    Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5");
+    Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6");
+    V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64");
+    break;
+  }
+  }
+  return V;
+}
+
+/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
+/// instruction IP.
+static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) {
+  assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!");
+
+  static const uint64_t MaskValues[6] = {
+    0x5555555555555555ULL, 0x3333333333333333ULL,
+    0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
+    0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
+  };
+
+  IRBuilder<> Builder(IP->getParent(), IP);
+
+  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+  unsigned WordSize = (BitSize + 63) / 64;
+  Value *Count = ConstantInt::get(V->getType(), 0);
+
+  for (unsigned n = 0; n < WordSize; ++n) {
+    Value *PartValue = V;
+    for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize); 
+         i <<= 1, ++ct) {
+      Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
+      Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1");
+      Value *VShift = Builder.CreateLShr(PartValue,
+                                        ConstantInt::get(V->getType(), i),
+                                         "ctpop.sh");
+      Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2");
+      PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step");
+    }
+    Count = Builder.CreateAdd(PartValue, Count, "ctpop.part");
+    if (BitSize > 64) {
+      V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64),
+                             "ctpop.part.sh");
+      BitSize -= 64;
+    }
+  }
+
+  return Count;
+}
+
+/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
+/// instruction IP.
+static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
+
+  IRBuilder<> Builder(IP->getParent(), IP);
+
+  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
+  for (unsigned i = 1; i < BitSize; i <<= 1) {
+    Value *ShVal = ConstantInt::get(V->getType(), i);
+    ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh");
+    V = Builder.CreateOr(V, ShVal, "ctlz.step");
+  }
+
+  V = Builder.CreateNot(V);
+  return LowerCTPOP(Context, V, IP);
+}
+
+static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
+                                       const char *Dname,
+                                       const char *LDname) {
+  CallSite CS(CI);
+  switch (CI->getArgOperand(0)->getType()->getTypeID()) {
+  default: llvm_unreachable("Invalid type in intrinsic");
+  case Type::FloatTyID:
+    ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(),
+                  Type::getFloatTy(CI->getContext()));
+    break;
+  case Type::DoubleTyID:
+    ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(),
+                  Type::getDoubleTy(CI->getContext()));
+    break;
+  case Type::X86_FP80TyID:
+  case Type::FP128TyID:
+  case Type::PPC_FP128TyID:
+    ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(),
+                  CI->getArgOperand(0)->getType());
+    break;
+  }
+}
+
+void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
+  IRBuilder<> Builder(CI->getParent(), CI);
+  LLVMContext &Context = CI->getContext();
+
+  const Function *Callee = CI->getCalledFunction();
+  assert(Callee && "Cannot lower an indirect call!");
+
+  CallSite CS(CI);
+  switch (Callee->getIntrinsicID()) {
+  case Intrinsic::not_intrinsic:
+    report_fatal_error("Cannot lower a call to a non-intrinsic function '"+
+                      Callee->getName() + "'!");
+  default:
+    report_fatal_error("Code generator does not support intrinsic function '"+
+                      Callee->getName()+"'!");
+
+    // The setjmp/longjmp intrinsics should only exist in the code if it was
+    // never optimized (ie, right out of the CFE), or if it has been hacked on
+    // by the lowerinvoke pass.  In both cases, the right thing to do is to
+    // convert the call to an explicit setjmp or longjmp call.
+  case Intrinsic::setjmp: {
+    Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(),
+                               Type::getInt32Ty(Context));
+    if (!CI->getType()->isVoidTy())
+      CI->replaceAllUsesWith(V);
+    break;
+  }
+  case Intrinsic::sigsetjmp:
+     if (!CI->getType()->isVoidTy())
+       CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+     break;
+
+  case Intrinsic::longjmp: {
+    ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(),
+                    Type::getVoidTy(Context));
+    break;
+  }
+
+  case Intrinsic::siglongjmp: {
+    // Insert the call to abort
+    ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(), 
+                    Type::getVoidTy(Context));
+    break;
+  }
+  case Intrinsic::ctpop:
+    CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI));
+    break;
+
+  case Intrinsic::bswap:
+    CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI));
+    break;
+    
+  case Intrinsic::ctlz:
+    CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI));
+    break;
+
+  case Intrinsic::cttz: {
+    // cttz(x) -> ctpop(~X & (X-1))
+    Value *Src = CI->getArgOperand(0);
+    Value *NotSrc = Builder.CreateNot(Src);
+    NotSrc->setName(Src->getName() + ".not");
+    Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
+    SrcM1 = Builder.CreateSub(Src, SrcM1);
+    Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI);
+    CI->replaceAllUsesWith(Src);
+    break;
+  }
+
+  case Intrinsic::stacksave:
+  case Intrinsic::stackrestore: {
+    if (!Warned)
+      errs() << "WARNING: this target does not support the llvm.stack"
+             << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
+               "save" : "restore") << " intrinsic.\n";
+    Warned = true;
+    if (Callee->getIntrinsicID() == Intrinsic::stacksave)
+      CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+    break;
+  }
+    
+  case Intrinsic::returnaddress:
+  case Intrinsic::frameaddress:
+    errs() << "WARNING: this target does not support the llvm."
+           << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
+             "return" : "frame") << "address intrinsic.\n";
+    CI->replaceAllUsesWith(ConstantPointerNull::get(
+                                            cast<PointerType>(CI->getType())));
+    break;
+
+  case Intrinsic::prefetch:
+    break;    // Simply strip out prefetches on unsupported architectures
+
+  case Intrinsic::pcmarker:
+    break;    // Simply strip out pcmarker on unsupported architectures
+  case Intrinsic::readcyclecounter: {
+    errs() << "WARNING: this target does not support the llvm.readcyclecoun"
+           << "ter intrinsic.  It is being lowered to a constant 0\n";
+    CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0));
+    break;
+  }
+
+  case Intrinsic::dbg_declare:
+    break;    // Simply strip out debugging intrinsics
+
+  case Intrinsic::eh_exception:
+  case Intrinsic::eh_selector:
+    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+    break;
+
+  case Intrinsic::eh_typeid_for:
+    // Return something different to eh_selector.
+    CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+    break;
+
+  case Intrinsic::var_annotation:
+    break;   // Strip out annotate intrinsic
+    
+  case Intrinsic::memcpy: {
+    const IntegerType *IntPtr = TD.getIntPtrType(Context);
+    Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+                                        /* isSigned */ false);
+    Value *Ops[3];
+    Ops[0] = CI->getArgOperand(0);
+    Ops[1] = CI->getArgOperand(1);
+    Ops[2] = Size;
+    ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+    break;
+  }
+  case Intrinsic::memmove: {
+    const IntegerType *IntPtr = TD.getIntPtrType(Context);
+    Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+                                        /* isSigned */ false);
+    Value *Ops[3];
+    Ops[0] = CI->getArgOperand(0);
+    Ops[1] = CI->getArgOperand(1);
+    Ops[2] = Size;
+    ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+    break;
+  }
+  case Intrinsic::memset: {
+    const IntegerType *IntPtr = TD.getIntPtrType(Context);
+    Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
+                                        /* isSigned */ false);
+    Value *Ops[3];
+    Ops[0] = CI->getArgOperand(0);
+    // Extend the amount to i32.
+    Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1),
+                                   Type::getInt32Ty(Context),
+                                   /* isSigned */ false);
+    Ops[2] = Size;
+    ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
+    break;
+  }
+  case Intrinsic::sqrt: {
+    ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl");
+    break;
+  }
+  case Intrinsic::log: {
+    ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl");
+    break;
+  }
+  case Intrinsic::log2: {
+    ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l");
+    break;
+  }
+  case Intrinsic::log10: {
+    ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l");
+    break;
+  }
+  case Intrinsic::exp: {
+    ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl");
+    break;
+  }
+  case Intrinsic::exp2: {
+    ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l");
+    break;
+  }
+  case Intrinsic::pow: {
+    ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl");
+    break;
+  }
+  case Intrinsic::flt_rounds:
+     // Lower to "round to the nearest"
+     if (!CI->getType()->isVoidTy())
+       CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
+     break;
+  case Intrinsic::invariant_start:
+  case Intrinsic::lifetime_start:
+    // Discard region information.
+    CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+    break;
+  case Intrinsic::invariant_end:
+  case Intrinsic::lifetime_end:
+    // Discard region information.
+    break;
+  }
+
+  assert(CI->use_empty() &&
+         "Lowering should have eliminated any uses of the intrinsic call!");
+  CI->eraseFromParent();
+}
+
+bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
+  // Verify this is a simple bswap.
+  if (CI->getNumArgOperands() != 1 ||
+      CI->getType() != CI->getArgOperand(0)->getType() ||
+      !CI->getType()->isIntegerTy())
+    return false;
+
+  const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+  if (!Ty)
+    return false;
+
+  // Okay, we can do this xform, do so now.
+  const Type *Tys[] = { Ty };
+  Module *M = CI->getParent()->getParent()->getParent();
+  Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+
+  Value *Op = CI->getArgOperand(0);
+  Op = CallInst::Create(Int, Op, CI->getName(), CI);
+
+  CI->replaceAllUsesWith(Op);
+  CI->eraseFromParent();
+  return true;
+}
diff --git a/final/lib/CodeGen/LLVMTargetMachine.cpp b/final/lib/CodeGen/LLVMTargetMachine.cpp
new file mode 100644
index 00000000000..377f80db7bf
--- /dev/null
+++ b/final/lib/CodeGen/LLVMTargetMachine.cpp
@@ -0,0 +1,453 @@
+//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVMTargetMachine class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/StandardPasses.h"
+using namespace llvm;
+
+namespace llvm {
+  bool EnableFastISel;
+}
+
+static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
+    cl::desc("Disable Post Regalloc"));
+static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
+    cl::desc("Disable branch folding"));
+static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
+    cl::desc("Disable tail duplication"));
+static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden,
+    cl::desc("Disable pre-register allocation tail duplication"));
+static cl::opt<bool> DisableCodePlace("disable-code-place", cl::Hidden,
+    cl::desc("Disable code placement"));
+static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden,
+    cl::desc("Disable Stack Slot Coloring"));
+static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden,
+    cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm",
+    cl::Hidden,
+    cl::desc("Disable Machine LICM"));
+static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden,
+    cl::desc("Disable Machine Sinking"));
+static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden,
+    cl::desc("Disable Loop Strength Reduction Pass"));
+static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden,
+    cl::desc("Disable Codegen Prepare"));
+static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden,
+    cl::desc("Print LLVM IR produced by the loop-reduce pass"));
+static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
+    cl::desc("Print LLVM IR input to isel pass"));
+static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
+    cl::desc("Dump garbage collector data"));
+static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden,
+    cl::desc("Show encoding in .s output"));
+static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden,
+    cl::desc("Show instruction structure in .s output"));
+static cl::opt<bool> EnableMCLogging("enable-mc-api-logging", cl::Hidden,
+    cl::desc("Enable MC API logging"));
+static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
+    cl::desc("Verify generated machine code"),
+    cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+
+static cl::opt<cl::boolOrDefault>
+AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
+           cl::init(cl::BOU_UNSET));
+
+static bool getVerboseAsm() {
+  switch (AsmVerbose) {
+  default:
+  case cl::BOU_UNSET: return TargetMachine::getAsmVerbosityDefault();
+  case cl::BOU_TRUE:  return true;
+  case cl::BOU_FALSE: return false;
+  }
+}
+
+// Enable or disable FastISel. Both options are needed, because
+// FastISel is enabled by default with -fast, and we wish to be
+// able to enable or disable fast-isel independently from -O0.
+static cl::opt<cl::boolOrDefault>
+EnableFastISelOption("fast-isel", cl::Hidden,
+  cl::desc("Enable the \"fast\" instruction selector"));
+
+LLVMTargetMachine::LLVMTargetMachine(const Target &T,
+                                     const std::string &Triple)
+  : TargetMachine(T), TargetTriple(Triple) {
+  AsmInfo = T.createAsmInfo(TargetTriple);
+}
+
+// Set the default code model for the JIT for a generic target.
+// FIXME: Is small right here? or .is64Bit() ? Large : Small?
+void LLVMTargetMachine::setCodeModelForJIT() {
+  setCodeModel(CodeModel::Small);
+}
+
+// Set the default code model for static compilation for a generic target.
+void LLVMTargetMachine::setCodeModelForStatic() {
+  setCodeModel(CodeModel::Small);
+}
+
+bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+                                            formatted_raw_ostream &Out,
+                                            CodeGenFileType FileType,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool DisableVerify) {
+  // Add common CodeGen passes.
+  MCContext *Context = 0;
+  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
+    return true;
+  assert(Context != 0 && "Failed to get MCContext");
+
+  const MCAsmInfo &MAI = *getMCAsmInfo();
+  OwningPtr<MCStreamer> AsmStreamer;
+
+  switch (FileType) {
+  default: return true;
+  case CGFT_AssemblyFile: {
+    MCInstPrinter *InstPrinter =
+      getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI);
+
+    // Create a code emitter if asked to show the encoding.
+    MCCodeEmitter *MCE = 0;
+    TargetAsmBackend *TAB = 0;
+    if (ShowMCEncoding) {
+      MCE = getTarget().createCodeEmitter(*this, *Context);
+      TAB = getTarget().createAsmBackend(TargetTriple);
+    }
+
+    MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
+                                                  getVerboseAsm(),
+                                                  hasMCUseLoc(),
+                                                  InstPrinter,
+                                                  MCE, TAB,
+                                                  ShowMCInst);
+    AsmStreamer.reset(S);
+    break;
+  }
+  case CGFT_ObjectFile: {
+    // Create the code emitter for the target if it exists.  If not, .o file
+    // emission fails.
+    MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context);
+    TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple);
+    if (MCE == 0 || TAB == 0)
+      return true;
+
+    AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Context,
+                                                       *TAB, Out, MCE,
+                                                       hasMCRelaxAll(),
+                                                       hasMCNoExecStack()));
+    AsmStreamer.get()->InitSections();
+    break;
+  }
+  case CGFT_Null:
+    // The Null output is intended for use for performance analysis and testing,
+    // not real users.
+    AsmStreamer.reset(createNullStreamer(*Context));
+    break;
+  }
+
+  if (EnableMCLogging)
+    AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
+
+  // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
+  FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
+  if (Printer == 0)
+    return true;
+
+  // If successful, createAsmPrinter took ownership of AsmStreamer.
+  AsmStreamer.take();
+
+  PM.add(Printer);
+
+  // Make sure the code model is set.
+  setCodeModelForStatic();
+  PM.add(createGCInfoDeleter());
+  return false;
+}
+
+/// addPassesToEmitMachineCode - Add passes to the specified pass manager to
+/// get machine code emitted.  This uses a JITCodeEmitter object to handle
+/// actually outputting the machine code and resolving things like the address
+/// of functions.  This method should returns true if machine code emission is
+/// not supported.
+///
+bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
+                                                   JITCodeEmitter &JCE,
+                                                   CodeGenOpt::Level OptLevel,
+                                                   bool DisableVerify) {
+  // Make sure the code model is set.
+  setCodeModelForJIT();
+
+  // Add common CodeGen passes.
+  MCContext *Ctx = 0;
+  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
+    return true;
+
+  addCodeEmitter(PM, OptLevel, JCE);
+  PM.add(createGCInfoDeleter());
+
+  return false; // success!
+}
+
+/// addPassesToEmitMC - Add passes to the specified pass manager to get
+/// machine code emitted with the MCJIT. This method returns true if machine
+/// code is not supported. It fills the MCContext Ctx pointer which can be
+/// used to build custom MCStreamer.
+///
+bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
+                                          MCContext *&Ctx,
+                                          CodeGenOpt::Level OptLevel,
+                                          bool DisableVerify) {
+  // Add common CodeGen passes.
+  if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx))
+    return true;
+  // Make sure the code model is set.
+  setCodeModelForJIT();
+
+  return false; // success!
+}
+
+static void printNoVerify(PassManagerBase &PM, const char *Banner) {
+  if (PrintMachineCode)
+    PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+}
+
+static void printAndVerify(PassManagerBase &PM,
+                           const char *Banner) {
+  if (PrintMachineCode)
+    PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
+
+  if (VerifyMachineCode)
+    PM.add(createMachineVerifierPass(Banner));
+}
+
+/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
+/// emitting to assembly files or machine code output.
+///
+bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
+                                               CodeGenOpt::Level OptLevel,
+                                               bool DisableVerify,
+                                               MCContext *&OutContext) {
+  // Standard LLVM-Level Passes.
+
+  // Basic AliasAnalysis support.
+  createStandardAliasAnalysisPasses(&PM);
+
+  // Before running any passes, run the verifier to determine if the input
+  // coming from the front-end and/or optimizer is valid.
+  if (!DisableVerify)
+    PM.add(createVerifierPass());
+
+  // Run loop strength reduction before anything else.
+  if (OptLevel != CodeGenOpt::None && !DisableLSR) {
+    PM.add(createLoopStrengthReducePass(getTargetLowering()));
+    if (PrintLSR)
+      PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
+  }
+
+  PM.add(createGCLoweringPass());
+
+  // Make sure that no unreachable blocks are instruction selected.
+  PM.add(createUnreachableBlockEliminationPass());
+
+  // Turn exception handling constructs into something the code generators can
+  // handle.
+  switch (getMCAsmInfo()->getExceptionHandlingType()) {
+  case ExceptionHandling::SjLj:
+    // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
+    // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
+    // catch info can get misplaced when a selector ends up more than one block
+    // removed from the parent invoke(s). This could happen when a landing
+    // pad is shared by multiple invokes and is also a target of a normal
+    // edge from elsewhere.
+    PM.add(createSjLjEHPass(getTargetLowering()));
+    // FALLTHROUGH
+  case ExceptionHandling::DwarfCFI:
+  case ExceptionHandling::DwarfTable:
+  case ExceptionHandling::ARM:
+    PM.add(createDwarfEHPass(this));
+    break;
+  case ExceptionHandling::None:
+    PM.add(createLowerInvokePass(getTargetLowering()));
+
+    // The lower invoke pass may create unreachable code. Remove it.
+    PM.add(createUnreachableBlockEliminationPass());
+    break;
+  }
+
+  if (OptLevel != CodeGenOpt::None && !DisableCGP)
+    PM.add(createCodeGenPreparePass(getTargetLowering()));
+
+  PM.add(createStackProtectorPass(getTargetLowering()));
+
+  addPreISel(PM, OptLevel);
+
+  if (PrintISelInput)
+    PM.add(createPrintFunctionPass("\n\n"
+                                   "*** Final LLVM Code input to ISel ***\n",
+                                   &dbgs()));
+
+  // All passes which modify the LLVM IR are now complete; run the verifier
+  // to ensure that the IR is valid.
+  if (!DisableVerify)
+    PM.add(createVerifierPass());
+
+  // Standard Lower-Level Passes.
+
+  // Install a MachineModuleInfo class, which is an immutable pass that holds
+  // all the per-module stuff we're generating, including MCContext.
+  TargetAsmInfo *TAI = new TargetAsmInfo(*this);
+  MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), TAI);
+  PM.add(MMI);
+  OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
+
+  // Set up a MachineFunction for the rest of CodeGen to work on.
+  PM.add(new MachineFunctionAnalysis(*this, OptLevel));
+
+  // Enable FastISel with -fast, but allow that to be overridden.
+  if (EnableFastISelOption == cl::BOU_TRUE ||
+      (OptLevel == CodeGenOpt::None && EnableFastISelOption != cl::BOU_FALSE))
+    EnableFastISel = true;
+
+  // Ask the target for an isel.
+  if (addInstSelector(PM, OptLevel))
+    return true;
+
+  // Print the instruction selected machine code...
+  printAndVerify(PM, "After Instruction Selection");
+
+  // Expand pseudo-instructions emitted by ISel.
+  PM.add(createExpandISelPseudosPass());
+
+  // Optimize PHIs before DCE: removing dead PHI cycles may make more
+  // instructions dead.
+  if (OptLevel != CodeGenOpt::None)
+    PM.add(createOptimizePHIsPass());
+
+  // If the target requests it, assign local variables to stack slots relative
+  // to one another and simplify frame index references where possible.
+  PM.add(createLocalStackSlotAllocationPass());
+
+  if (OptLevel != CodeGenOpt::None) {
+    // With optimization, dead code should already be eliminated. However
+    // there is one known exception: lowered code for arguments that are only
+    // used by tail calls, where the tail calls reuse the incoming stack
+    // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+    PM.add(createDeadMachineInstructionElimPass());
+    printAndVerify(PM, "After codegen DCE pass");
+
+    if (!DisableMachineLICM)
+      PM.add(createMachineLICMPass());
+    PM.add(createMachineCSEPass());
+    if (!DisableMachineSink)
+      PM.add(createMachineSinkingPass());
+    printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
+
+    PM.add(createPeepholeOptimizerPass());
+    printAndVerify(PM, "After codegen peephole optimization pass");
+  }
+
+  // Pre-ra tail duplication.
+  if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) {
+    PM.add(createTailDuplicatePass(true));
+    printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
+  }
+
+  // Run pre-ra passes.
+  if (addPreRegAlloc(PM, OptLevel))
+    printAndVerify(PM, "After PreRegAlloc passes");
+
+  // Perform register allocation.
+  PM.add(createRegisterAllocator(OptLevel));
+  printAndVerify(PM, "After Register Allocation");
+
+  // Perform stack slot coloring and post-ra machine LICM.
+  if (OptLevel != CodeGenOpt::None) {
+    // FIXME: Re-enable coloring with register when it's capable of adding
+    // kill markers.
+    if (!DisableSSC)
+      PM.add(createStackSlotColoringPass(false));
+
+    // Run post-ra machine LICM to hoist reloads / remats.
+    if (!DisablePostRAMachineLICM)
+      PM.add(createMachineLICMPass(false));
+
+    printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
+  }
+
+  // Run post-ra passes.
+  if (addPostRegAlloc(PM, OptLevel))
+    printAndVerify(PM, "After PostRegAlloc passes");
+
+  PM.add(createLowerSubregsPass());
+  printAndVerify(PM, "After LowerSubregs");
+
+  // Insert prolog/epilog code.  Eliminate abstract frame index references...
+  PM.add(createPrologEpilogCodeInserter());
+  printAndVerify(PM, "After PrologEpilogCodeInserter");
+
+  // Run pre-sched2 passes.
+  if (addPreSched2(PM, OptLevel))
+    printAndVerify(PM, "After PreSched2 passes");
+
+  // Second pass scheduler.
+  if (OptLevel != CodeGenOpt::None && !DisablePostRA) {
+    PM.add(createPostRAScheduler(OptLevel));
+    printAndVerify(PM, "After PostRAScheduler");
+  }
+
+  // Branch folding must be run after regalloc and prolog/epilog insertion.
+  if (OptLevel != CodeGenOpt::None && !DisableBranchFold) {
+    PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
+    printNoVerify(PM, "After BranchFolding");
+  }
+
+  // Tail duplication.
+  if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) {
+    PM.add(createTailDuplicatePass(false));
+    printNoVerify(PM, "After TailDuplicate");
+  }
+
+  PM.add(createGCMachineCodeAnalysisPass());
+
+  if (PrintGCInfo)
+    PM.add(createGCInfoPrinter(dbgs()));
+
+  if (OptLevel != CodeGenOpt::None && !DisableCodePlace) {
+    PM.add(createCodePlacementOptPass());
+    printNoVerify(PM, "After CodePlacementOpt");
+  }
+
+  if (addPreEmitPass(PM, OptLevel))
+    printNoVerify(PM, "After PreEmit passes");
+
+  return false;
+}
diff --git a/final/lib/CodeGen/LatencyPriorityQueue.cpp b/final/lib/CodeGen/LatencyPriorityQueue.cpp
new file mode 100644
index 00000000000..0eb009ddac2
--- /dev/null
+++ b/final/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -0,0 +1,152 @@
+//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LatencyPriorityQueue class, which is a
+// SchedulingPriorityQueue that schedules using latency information to
+// reduce the length of the critical path through the basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scheduler"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
+  // The isScheduleHigh flag allows nodes with wraparound dependencies that
+  // cannot easily be modeled as edges with latencies to be scheduled as
+  // soon as possible in a top-down schedule.
+  if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
+    return false;
+  if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
+    return true;
+
+  unsigned LHSNum = LHS->NodeNum;
+  unsigned RHSNum = RHS->NodeNum;
+
+  // The most important heuristic is scheduling the critical path.
+  unsigned LHSLatency = PQ->getLatency(LHSNum);
+  unsigned RHSLatency = PQ->getLatency(RHSNum);
+  if (LHSLatency < RHSLatency) return true;
+  if (LHSLatency > RHSLatency) return false;
+
+  // After that, if two nodes have identical latencies, look to see if one will
+  // unblock more other nodes than the other.
+  unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
+  unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
+  if (LHSBlocked < RHSBlocked) return true;
+  if (LHSBlocked > RHSBlocked) return false;
+
+  // Finally, just to provide a stable ordering, use the node number as a
+  // deciding factor.
+  return LHSNum < RHSNum;
+}
+
+
+/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor
+/// of SU, return it, otherwise return null.
+SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
+  SUnit *OnlyAvailablePred = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    SUnit &Pred = *I->getSUnit();
+    if (!Pred.isScheduled) {
+      // We found an available, but not scheduled, predecessor.  If it's the
+      // only one we have found, keep track of it... otherwise give up.
+      if (OnlyAvailablePred && OnlyAvailablePred != &Pred)
+        return 0;
+      OnlyAvailablePred = &Pred;
+    }
+  }
+
+  return OnlyAvailablePred;
+}
+
+void LatencyPriorityQueue::push(SUnit *SU) {
+  // Look at all of the successors of this node.  Count the number of nodes that
+  // this node is the sole unscheduled node for.
+  unsigned NumNodesBlocking = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (getSingleUnscheduledPred(I->getSUnit()) == SU)
+      ++NumNodesBlocking;
+  }
+  NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
+
+  Queue.push_back(SU);
+}
+
+
+// ScheduledNode - As nodes are scheduled, we look to see if there are any
+// successor nodes that have a single unscheduled predecessor.  If so, that
+// single predecessor has a higher priority, since scheduling it will make
+// the node available.
+void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    AdjustPriorityOfUnscheduledPreds(I->getSUnit());
+  }
+}
+
+/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just
+/// scheduled.  If SU is not itself available, then there is at least one
+/// predecessor node that has not been scheduled yet.  If SU has exactly ONE
+/// unscheduled predecessor, we want to increase its priority: it getting
+/// scheduled will make this node available, so it is better than some other
+/// node of the same priority that will not make a node available.
+void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
+  if (SU->isAvailable) return;  // All preds scheduled.
+
+  SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
+  if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
+
+  // Okay, we found a single predecessor that is available, but not scheduled.
+  // Since it is available, it must be in the priority queue.  First remove it.
+  remove(OnlyAvailablePred);
+
+  // Reinsert the node into the priority queue, which recomputes its
+  // NumNodesSolelyBlocking value.
+  push(OnlyAvailablePred);
+}
+
+SUnit *LatencyPriorityQueue::pop() {
+  if (empty()) return NULL;
+  std::vector<SUnit *>::iterator Best = Queue.begin();
+  for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
+       E = Queue.end(); I != E; ++I)
+    if (Picker(*Best, *I))
+      Best = I;
+  SUnit *V = *Best;
+  if (Best != prior(Queue.end()))
+    std::swap(*Best, Queue.back());
+  Queue.pop_back();
+  return V;
+}
+
+void LatencyPriorityQueue::remove(SUnit *SU) {
+  assert(!Queue.empty() && "Queue is empty!");
+  std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+  if (I != prior(Queue.end()))
+    std::swap(*I, Queue.back());
+  Queue.pop_back();
+}
+
+#ifdef NDEBUG
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
+  LatencyPriorityQueue q = *this;
+  while (!q.empty()) {
+    SUnit *su = q.pop();
+    dbgs() << "Height " << su->getHeight() << ": ";
+    su->dump(DAG);
+  }
+}
+#endif
diff --git a/final/lib/CodeGen/LiveDebugVariables.cpp b/final/lib/CodeGen/LiveDebugVariables.cpp
new file mode 100644
index 00000000000..853ec1ac7c1
--- /dev/null
+++ b/final/lib/CodeGen/LiveDebugVariables.cpp
@@ -0,0 +1,711 @@
+//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveDebugVariables analysis.
+//
+// Remove all DBG_VALUE instructions referencing virtual registers and replace
+// them with a data structure tracking where live user variables are kept - in a
+// virtual register or in a stack slot.
+//
+// Allow the data structure to be updated during register allocation when values
+// are moved between registers and stack slots. Finally emit new DBG_VALUE
+// instructions after register allocation is complete.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livedebug"
+#include "LiveDebugVariables.h"
+#include "VirtRegMap.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableLDV("live-debug-variables", cl::init(true),
+          cl::desc("Enable the live debug variables pass"), cl::Hidden);
+
+char LiveDebugVariables::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars",
+                "Debug Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars",
+                "Debug Variable Analysis", false, false)
+
+void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<MachineDominatorTree>();
+  AU.addRequiredTransitive<LiveIntervals>();
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) {
+  initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+}
+
+/// LocMap - Map of where a user value is live, and its location.
+typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
+
+/// UserValue - A user value is a part of a debug info user variable.
+///
+/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
+/// holds part of a user variable. The part is identified by a byte offset.
+///
+/// UserValues are grouped into equivalence classes for easier searching. Two
+/// user values are related if they refer to the same variable, or if they are
+/// held by the same virtual register. The equivalence class is the transitive
+/// closure of that relation.
+namespace {
+class UserValue {
+  const MDNode *variable; ///< The debug info variable we are part of.
+  unsigned offset;        ///< Byte offset into variable.
+  DebugLoc dl;            ///< The debug location for the variable. This is
+                          ///< used by dwarf writer to find lexical scope.
+  UserValue *leader;      ///< Equivalence class leader.
+  UserValue *next;        ///< Next value in equivalence class, or null.
+
+  /// Numbered locations referenced by locmap.
+  SmallVector<MachineOperand, 4> locations;
+
+  /// Map of slot indices where this value is live.
+  LocMap locInts;
+
+  /// coalesceLocation - After LocNo was changed, check if it has become
+  /// identical to another location, and coalesce them. This may cause LocNo or
+  /// a later location to be erased, but no earlier location will be erased.
+  void coalesceLocation(unsigned LocNo);
+
+  /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo.
+  void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo,
+                        LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+  /// insertDebugKill - Insert an undef DBG_VALUE into MBB at Idx.
+  void insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx,
+                       LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+public:
+  /// UserValue - Create a new UserValue.
+  UserValue(const MDNode *var, unsigned o, DebugLoc L, 
+            LocMap::Allocator &alloc)
+    : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc)
+  {}
+
+  /// getLeader - Get the leader of this value's equivalence class.
+  UserValue *getLeader() {
+    UserValue *l = leader;
+    while (l != l->leader)
+      l = l->leader;
+    return leader = l;
+  }
+
+  /// getNext - Return the next UserValue in the equivalence class.
+  UserValue *getNext() const { return next; }
+
+  /// match - Does this UserValue match the aprameters?
+  bool match(const MDNode *Var, unsigned Offset) const {
+    return Var == variable && Offset == offset;
+  }
+
+  /// merge - Merge equivalence classes.
+  static UserValue *merge(UserValue *L1, UserValue *L2) {
+    L2 = L2->getLeader();
+    if (!L1)
+      return L2;
+    L1 = L1->getLeader();
+    if (L1 == L2)
+      return L1;
+    // Splice L2 before L1's members.
+    UserValue *End = L2;
+    while (End->next)
+      End->leader = L1, End = End->next;
+    End->leader = L1;
+    End->next = L1->next;
+    L1->next = L2;
+    return L1;
+  }
+
+  /// getLocationNo - Return the location number that matches Loc.
+  unsigned getLocationNo(const MachineOperand &LocMO) {
+    if (LocMO.isReg() && LocMO.getReg() == 0)
+      return ~0u;
+    for (unsigned i = 0, e = locations.size(); i != e; ++i)
+      if (LocMO.isIdenticalTo(locations[i]))
+        return i;
+    locations.push_back(LocMO);
+    // We are storing a MachineOperand outside a MachineInstr.
+    locations.back().clearParent();
+    return locations.size() - 1;
+  }
+
+  /// addDef - Add a definition point to this value.
+  void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
+    // Add a singular (Idx,Idx) -> Loc mapping.
+    LocMap::iterator I = locInts.find(Idx);
+    if (!I.valid() || I.start() != Idx)
+      I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO));
+  }
+
+  /// extendDef - Extend the current definition as far as possible down the
+  /// dominator tree. Stop when meeting an existing def or when leaving the live
+  /// range of VNI.
+  /// @param Idx   Starting point for the definition.
+  /// @param LocNo Location number to propagate.
+  /// @param LI    Restrict liveness to where LI has the value VNI. May be null.
+  /// @param VNI   When LI is not null, this is the value to restrict to.
+  /// @param LIS   Live intervals analysis.
+  /// @param MDT   Dominator tree.
+  void extendDef(SlotIndex Idx, unsigned LocNo,
+                 LiveInterval *LI, const VNInfo *VNI,
+                 LiveIntervals &LIS, MachineDominatorTree &MDT);
+
+  /// computeIntervals - Compute the live intervals of all locations after
+  /// collecting all their def points.
+  void computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT);
+
+  /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx.
+  void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
+                      const TargetRegisterInfo *TRI);
+
+  /// rewriteLocations - Rewrite virtual register locations according to the
+  /// provided virtual register map.
+  void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI);
+
+  /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+  void emitDebugValues(VirtRegMap *VRM,
+                       LiveIntervals &LIS, const TargetInstrInfo &TRI);
+
+  /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A
+  /// variable may have more than one corresponding DBG_VALUE instructions. 
+  /// Only first one needs DebugLoc to identify variable's lexical scope
+  /// in source file.
+  DebugLoc findDebugLoc();
+  void print(raw_ostream&, const TargetRegisterInfo*);
+};
+} // namespace
+
+/// LDVImpl - Implementation of the LiveDebugVariables pass.
+namespace {
+class LDVImpl {
+  LiveDebugVariables &pass;
+  LocMap::Allocator allocator;
+  MachineFunction *MF;
+  LiveIntervals *LIS;
+  MachineDominatorTree *MDT;
+  const TargetRegisterInfo *TRI;
+
+  /// userValues - All allocated UserValue instances.
+  SmallVector<UserValue*, 8> userValues;
+
+  /// Map virtual register to eq class leader.
+  typedef DenseMap<unsigned, UserValue*> VRMap;
+  VRMap virtRegToEqClass;
+
+  /// Map user variable to eq class leader.
+  typedef DenseMap<const MDNode *, UserValue*> UVMap;
+  UVMap userVarMap;
+
+  /// getUserValue - Find or create a UserValue.
+  UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL);
+
+  /// lookupVirtReg - Find the EC leader for VirtReg or null.
+  UserValue *lookupVirtReg(unsigned VirtReg);
+
+  /// mapVirtReg - Map virtual register to an equivalence class.
+  void mapVirtReg(unsigned VirtReg, UserValue *EC);
+
+  /// handleDebugValue - Add DBG_VALUE instruction to our maps.
+  /// @param MI  DBG_VALUE instruction
+  /// @param Idx Last valid SLotIndex before instruction.
+  /// @return    True if the DBG_VALUE instruction should be deleted.
+  bool handleDebugValue(MachineInstr *MI, SlotIndex Idx);
+
+  /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding
+  /// a UserValue def for each instruction.
+  /// @param mf MachineFunction to be scanned.
+  /// @return True if any debug values were found.
+  bool collectDebugValues(MachineFunction &mf);
+
+  /// computeIntervals - Compute the live intervals of all user values after
+  /// collecting all their def points.
+  void computeIntervals();
+
+public:
+  LDVImpl(LiveDebugVariables *ps) : pass(*ps) {}
+  bool runOnMachineFunction(MachineFunction &mf);
+
+  /// clear - Relase all memory.
+  void clear() {
+    DeleteContainerPointers(userValues);
+    userValues.clear();
+    virtRegToEqClass.clear();
+    userVarMap.clear();
+  }
+
+  /// renameRegister - Replace all references to OldReg wiht NewReg:SubIdx.
+  void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+  /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+  void emitDebugValues(VirtRegMap *VRM);
+
+  void print(raw_ostream&);
+};
+} // namespace
+
+void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
+  if (const MDString *MDS = dyn_cast<MDString>(variable->getOperand(2)))
+    OS << "!\"" << MDS->getString() << "\"\t";
+  if (offset)
+    OS << '+' << offset;
+  for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
+    OS << " [" << I.start() << ';' << I.stop() << "):";
+    if (I.value() == ~0u)
+      OS << "undef";
+    else
+      OS << I.value();
+  }
+  for (unsigned i = 0, e = locations.size(); i != e; ++i)
+    OS << " Loc" << i << '=' << locations[i];
+  OS << '\n';
+}
+
+void LDVImpl::print(raw_ostream &OS) {
+  OS << "********** DEBUG VARIABLES **********\n";
+  for (unsigned i = 0, e = userValues.size(); i != e; ++i)
+    userValues[i]->print(OS, TRI);
+}
+
+void UserValue::coalesceLocation(unsigned LocNo) {
+  unsigned KeepLoc = 0;
+  for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) {
+    if (KeepLoc == LocNo)
+      continue;
+    if (locations[KeepLoc].isIdenticalTo(locations[LocNo]))
+      break;
+  }
+  // No matches.
+  if (KeepLoc == locations.size())
+    return;
+
+  // Keep the smaller location, erase the larger one.
+  unsigned EraseLoc = LocNo;
+  if (KeepLoc > EraseLoc)
+    std::swap(KeepLoc, EraseLoc);
+  locations.erase(locations.begin() + EraseLoc);
+
+  // Rewrite values.
+  for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+    unsigned v = I.value();
+    if (v == EraseLoc)
+      I.setValue(KeepLoc);      // Coalesce when possible.
+    else if (v > EraseLoc)
+      I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values.
+  }
+}
+
+UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset,
+                                 DebugLoc DL) {
+  UserValue *&Leader = userVarMap[Var];
+  if (Leader) {
+    UserValue *UV = Leader->getLeader();
+    Leader = UV;
+    for (; UV; UV = UV->getNext())
+      if (UV->match(Var, Offset))
+        return UV;
+  }
+
+  UserValue *UV = new UserValue(Var, Offset, DL, allocator);
+  userValues.push_back(UV);
+  Leader = UserValue::merge(Leader, UV);
+  return UV;
+}
+
+void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs");
+  UserValue *&Leader = virtRegToEqClass[VirtReg];
+  Leader = UserValue::merge(Leader, EC);
+}
+
+UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+  if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
+    return UV->getLeader();
+  return 0;
+}
+
+bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
+  // DBG_VALUE loc, offset, variable
+  if (MI->getNumOperands() != 3 ||
+      !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) {
+    DEBUG(dbgs() << "Can't handle " << *MI);
+    return false;
+  }
+
+  // Get or create the UserValue for (variable,offset).
+  unsigned Offset = MI->getOperand(1).getImm();
+  const MDNode *Var = MI->getOperand(2).getMetadata();
+  UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc());
+
+  // If the location is a virtual register, make sure it is mapped.
+  if (MI->getOperand(0).isReg()) {
+    unsigned Reg = MI->getOperand(0).getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      mapVirtReg(Reg, UV);
+  }
+
+  UV->addDef(Idx, MI->getOperand(0));
+  return true;
+}
+
+bool LDVImpl::collectDebugValues(MachineFunction &mf) {
+  bool Changed = false;
+  for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
+       ++MFI) {
+    MachineBasicBlock *MBB = MFI;
+    for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+         MBBI != MBBE;) {
+      if (!MBBI->isDebugValue()) {
+        ++MBBI;
+        continue;
+      }
+      // DBG_VALUE has no slot index, use the previous instruction instead.
+      SlotIndex Idx = MBBI == MBB->begin() ?
+        LIS->getMBBStartIdx(MBB) :
+        LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex();
+      // Handle consecutive DBG_VALUE instructions with the same slot index.
+      do {
+        if (handleDebugValue(MBBI, Idx)) {
+          MBBI = MBB->erase(MBBI);
+          Changed = true;
+        } else
+          ++MBBI;
+      } while (MBBI != MBBE && MBBI->isDebugValue());
+    }
+  }
+  return Changed;
+}
+
+void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
+                          LiveInterval *LI, const VNInfo *VNI,
+                          LiveIntervals &LIS, MachineDominatorTree &MDT) {
+  SmallVector<SlotIndex, 16> Todo;
+  Todo.push_back(Idx);
+
+  do {
+    SlotIndex Start = Todo.pop_back_val();
+    MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
+    SlotIndex Stop = LIS.getMBBEndIdx(MBB);
+    LocMap::iterator I = locInts.find(Start);
+
+    // Limit to VNI's live range.
+    bool ToEnd = true;
+    if (LI && VNI) {
+      LiveRange *Range = LI->getLiveRangeContaining(Start);
+      if (!Range || Range->valno != VNI)
+        continue;
+      if (Range->end < Stop)
+        Stop = Range->end, ToEnd = false;
+    }
+
+    // There could already be a short def at Start.
+    if (I.valid() && I.start() <= Start) {
+      // Stop when meeting a different location or an already extended interval.
+      Start = Start.getNextSlot();
+      if (I.value() != LocNo || I.stop() != Start)
+        continue;
+      // This is a one-slot placeholder. Just skip it.
+      ++I;
+    }
+
+    // Limited by the next def.
+    if (I.valid() && I.start() < Stop)
+      Stop = I.start(), ToEnd = false;
+
+    if (Start >= Stop)
+      continue;
+
+    I.insert(Start, Stop, LocNo);
+
+    // If we extended to the MBB end, propagate down the dominator tree.
+    if (!ToEnd)
+      continue;
+    const std::vector<MachineDomTreeNode*> &Children =
+      MDT.getNode(MBB)->getChildren();
+    for (unsigned i = 0, e = Children.size(); i != e; ++i)
+      Todo.push_back(LIS.getMBBStartIdx(Children[i]->getBlock()));
+  } while (!Todo.empty());
+}
+
+void
+UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) {
+  SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
+
+  // Collect all defs to be extended (Skipping undefs).
+  for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I)
+    if (I.value() != ~0u)
+      Defs.push_back(std::make_pair(I.start(), I.value()));
+
+  for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+    SlotIndex Idx = Defs[i].first;
+    unsigned LocNo = Defs[i].second;
+    const MachineOperand &Loc = locations[LocNo];
+
+    // Register locations are constrained to where the register value is live.
+    if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) {
+      LiveInterval *LI = &LIS.getInterval(Loc.getReg());
+      const VNInfo *VNI = LI->getVNInfoAt(Idx);
+      extendDef(Idx, LocNo, LI, VNI, LIS, MDT);
+    } else
+      extendDef(Idx, LocNo, 0, 0, LIS, MDT);
+  }
+
+  // Finally, erase all the undefs.
+  for (LocMap::iterator I = locInts.begin(); I.valid();)
+    if (I.value() == ~0u)
+      I.erase();
+    else
+      ++I;
+}
+
+void LDVImpl::computeIntervals() {
+  for (unsigned i = 0, e = userValues.size(); i != e; ++i)
+    userValues[i]->computeIntervals(*LIS, *MDT);
+}
+
+bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  LIS = &pass.getAnalysis<LiveIntervals>();
+  MDT = &pass.getAnalysis<MachineDominatorTree>();
+  TRI = mf.getTarget().getRegisterInfo();
+  clear();
+  DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
+               << ((Value*)mf.getFunction())->getName()
+               << " **********\n");
+
+  bool Changed = collectDebugValues(mf);
+  computeIntervals();
+  DEBUG(print(dbgs()));
+  return Changed;
+}
+
+bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
+  if (!EnableLDV)
+    return false;
+  if (!pImpl)
+    pImpl = new LDVImpl(this);
+  return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
+}
+
+void LiveDebugVariables::releaseMemory() {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->clear();
+}
+
+LiveDebugVariables::~LiveDebugVariables() {
+  if (pImpl)
+    delete static_cast<LDVImpl*>(pImpl);
+}
+
+void UserValue::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
+               const TargetRegisterInfo *TRI) {
+  for (unsigned i = locations.size(); i; --i) {
+    unsigned LocNo = i - 1;
+    MachineOperand &Loc = locations[LocNo];
+    if (!Loc.isReg() || Loc.getReg() != OldReg)
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+      Loc.substPhysReg(NewReg, *TRI);
+    else
+      Loc.substVirtReg(NewReg, SubIdx, *TRI);
+    coalesceLocation(LocNo);
+  }
+}
+
+void LDVImpl::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
+  UserValue *UV = lookupVirtReg(OldReg);
+  if (!UV)
+    return;
+
+  if (TargetRegisterInfo::isVirtualRegister(NewReg))
+    mapVirtReg(NewReg, UV);
+  virtRegToEqClass.erase(OldReg);
+
+  do {
+    UV->renameRegister(OldReg, NewReg, SubIdx, TRI);
+    UV = UV->getNext();
+  } while (UV);
+}
+
+void LiveDebugVariables::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->renameRegister(OldReg, NewReg, SubIdx);
+}
+
+void
+UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
+  // Iterate over locations in reverse makes it easier to handle coalescing.
+  for (unsigned i = locations.size(); i ; --i) {
+    unsigned LocNo = i-1;
+    MachineOperand &Loc = locations[LocNo];
+    // Only virtual registers are rewritten.
+    if (!Loc.isReg() || !Loc.getReg() ||
+        !TargetRegisterInfo::isVirtualRegister(Loc.getReg()))
+      continue;
+    unsigned VirtReg = Loc.getReg();
+    if (VRM.isAssignedReg(VirtReg) &&
+        TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+      Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
+    } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT &&
+               VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) {
+      // FIXME: Translate SubIdx to a stackslot offset.
+      Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
+    } else {
+      Loc.setReg(0);
+      Loc.setSubReg(0);
+    }
+    coalesceLocation(LocNo);
+  }
+  DEBUG(print(dbgs(), &TRI));
+}
+
+/// findInsertLocation - Find an iterator for inserting a DBG_VALUE
+/// instruction.
+static MachineBasicBlock::iterator
+findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
+                   LiveIntervals &LIS) {
+  SlotIndex Start = LIS.getMBBStartIdx(MBB);
+  Idx = Idx.getBaseIndex();
+
+  // Try to find an insert location by going backwards from Idx.
+  MachineInstr *MI;
+  while (!(MI = LIS.getInstructionFromIndex(Idx))) {
+    // We've reached the beginning of MBB.
+    if (Idx == Start) {
+      MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin());
+      return I;
+    }
+    Idx = Idx.getPrevIndex();
+  }
+
+  // Don't insert anything after the first terminator, though.
+  return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() :
+                                    llvm::next(MachineBasicBlock::iterator(MI));
+}
+
+DebugLoc UserValue::findDebugLoc() {
+  DebugLoc D = dl;
+  dl = DebugLoc();
+  return D;
+}
+void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
+                                 unsigned LocNo,
+                                 LiveIntervals &LIS,
+                                 const TargetInstrInfo &TII) {
+  MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+  MachineOperand &Loc = locations[LocNo];
+
+  // Frame index locations may require a target callback.
+  if (Loc.isFI()) {
+    MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(),
+                                          Loc.getIndex(), offset, variable, 
+                                                    findDebugLoc());
+    if (MI) {
+      MBB->insert(I, MI);
+      return;
+    }
+  }
+  // This is not a frame index, or the target is happy with a standard FI.
+  BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+    .addOperand(Loc).addImm(offset).addMetadata(variable);
+}
+
+void UserValue::insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx,
+                               LiveIntervals &LIS, const TargetInstrInfo &TII) {
+  MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+  BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)).addReg(0)
+    .addImm(offset).addMetadata(variable);
+}
+
+void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
+                                const TargetInstrInfo &TII) {
+  MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
+
+  for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
+    SlotIndex Start = I.start();
+    SlotIndex Stop = I.stop();
+    unsigned LocNo = I.value();
+    DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo);
+    MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+    SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
+
+    DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+    insertDebugValue(MBB, Start, LocNo, LIS, TII);
+
+    // This interval may span multiple basic blocks.
+    // Insert a DBG_VALUE into each one.
+    while(Stop > MBBEnd) {
+      // Move to the next block.
+      Start = MBBEnd;
+      if (++MBB == MFEnd)
+        break;
+      MBBEnd = LIS.getMBBEndIdx(MBB);
+      DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+      insertDebugValue(MBB, Start, LocNo, LIS, TII);
+    }
+    DEBUG(dbgs() << '\n');
+    if (MBB == MFEnd)
+      break;
+
+    ++I;
+    if (Stop == MBBEnd)
+      continue;
+    // The current interval ends before MBB.
+    // Insert a kill if there is a gap.
+    if (!I.valid() || I.start() > Stop)
+      insertDebugKill(MBB, Stop, LIS, TII);
+  }
+}
+
+void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
+  DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+  for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+    userValues[i]->rewriteLocations(*VRM, *TRI);
+    userValues[i]->emitDebugValues(VRM, *LIS, *TII);
+  }
+}
+
+void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
+}
+
+
+#ifndef NDEBUG
+void LiveDebugVariables::dump() {
+  if (pImpl)
+    static_cast<LDVImpl*>(pImpl)->print(dbgs());
+}
+#endif
+
diff --git a/final/lib/CodeGen/LiveDebugVariables.h b/final/lib/CodeGen/LiveDebugVariables.h
new file mode 100644
index 00000000000..a6e40a19845
--- /dev/null
+++ b/final/lib/CodeGen/LiveDebugVariables.h
@@ -0,0 +1,63 @@
+//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface to the LiveDebugVariables analysis.
+//
+// The analysis removes DBG_VALUE instructions for virtual registers and tracks
+// live user variables in a data structure that can be updated during register
+// allocation.
+//
+// After register allocation new DBG_VALUE instructions are emitted to reflect
+// the new locations of user variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class VirtRegMap;
+
+class LiveDebugVariables : public MachineFunctionPass {
+  void *pImpl;
+public:
+  static char ID; // Pass identification, replacement for typeid
+
+  LiveDebugVariables();
+  ~LiveDebugVariables();
+
+  /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
+  /// @param OldReg Old virtual register that is going away.
+  /// @param NewReg New register holding the user variables.
+  /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub-
+  ///               register.
+  void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+  /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
+  /// that happened during register allocation.
+  /// @param VRM Rename virtual registers according to map.
+  void emitDebugValues(VirtRegMap *VRM);
+
+  /// dump - Print data structures to dbgs().
+  void dump();
+
+private:
+
+  virtual bool runOnMachineFunction(MachineFunction &);
+  virtual void releaseMemory();
+  virtual void getAnalysisUsage(AnalysisUsage &) const;
+
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
diff --git a/final/lib/CodeGen/LiveInterval.cpp b/final/lib/CodeGen/LiveInterval.cpp
new file mode 100644
index 00000000000..585e3a2dc24
--- /dev/null
+++ b/final/lib/CodeGen/LiveInterval.cpp
@@ -0,0 +1,801 @@
+//===-- LiveInterval.cpp - Live Interval Representation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveRange and LiveInterval classes.  Given some
+// numbering of each the machine instructions an interval [i, j) is said to be a
+// live interval for register v if there is no instruction with number j' > j
+// such that v is live at j' and there is no instruction with number i' < i such
+// that v is live at i'. In this implementation intervals can have holes,
+// i.e. an interval might look like [1,20), [50,65), [1000,1001).  Each
+// individual range is represented as an instance of LiveRange, and the whole
+// interval is represented as an instance of LiveInterval.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+using namespace llvm;
+
+// CompEnd - Compare LiveRange ends.
+namespace {
+struct CompEnd {
+  bool operator()(SlotIndex A, const LiveRange &B) const {
+    return A < B.end;
+  }
+  bool operator()(const LiveRange &A, SlotIndex B) const {
+    return A.end < B;
+  }
+  bool operator()(const LiveRange &A, const LiveRange &B) const {
+    return A.end < B.end;
+  }
+};
+}
+
+LiveInterval::iterator LiveInterval::find(SlotIndex Pos) {
+  assert(Pos.isValid() && "Cannot search for an invalid index");
+  return std::upper_bound(begin(), end(), Pos, CompEnd());
+}
+
+/// killedInRange - Return true if the interval has kills in [Start,End).
+bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const {
+  Ranges::const_iterator r =
+    std::lower_bound(ranges.begin(), ranges.end(), End);
+
+  // Now r points to the first interval with start >= End, or ranges.end().
+  if (r == ranges.begin())
+    return false;
+
+  --r;
+  // Now r points to the last interval with end <= End.
+  // r->end is the kill point.
+  return r->end >= Start && r->end < End;
+}
+
+// overlaps - Return true if the intersection of the two live intervals is
+// not empty.
+//
+// An example for overlaps():
+//
+// 0: A = ...
+// 4: B = ...
+// 8: C = A + B ;; last use of A
+//
+// The live intervals should look like:
+//
+// A = [3, 11)
+// B = [7, x)
+// C = [11, y)
+//
+// A->overlaps(C) should return false since we want to be able to join
+// A and C.
+//
+bool LiveInterval::overlapsFrom(const LiveInterval& other,
+                                const_iterator StartPos) const {
+  assert(!empty() && "empty interval");
+  const_iterator i = begin();
+  const_iterator ie = end();
+  const_iterator j = StartPos;
+  const_iterator je = other.end();
+
+  assert((StartPos->start <= i->start || StartPos == other.begin()) &&
+         StartPos != other.end() && "Bogus start position hint!");
+
+  if (i->start < j->start) {
+    i = std::upper_bound(i, ie, j->start);
+    if (i != ranges.begin()) --i;
+  } else if (j->start < i->start) {
+    ++StartPos;
+    if (StartPos != other.end() && StartPos->start <= i->start) {
+      assert(StartPos < other.end() && i < end());
+      j = std::upper_bound(j, je, i->start);
+      if (j != other.ranges.begin()) --j;
+    }
+  } else {
+    return true;
+  }
+
+  if (j == je) return false;
+
+  while (i != ie) {
+    if (i->start > j->start) {
+      std::swap(i, j);
+      std::swap(ie, je);
+    }
+
+    if (i->end > j->start)
+      return true;
+    ++i;
+  }
+
+  return false;
+}
+
+/// overlaps - Return true if the live interval overlaps a range specified
+/// by [Start, End).
+bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
+  assert(Start < End && "Invalid range");
+  const_iterator I = std::lower_bound(begin(), end(), End);
+  return I != begin() && (--I)->end > Start;
+}
+
+
+/// ValNo is dead, remove it.  If it is the largest value number, just nuke it
+/// (and any other deleted values neighboring it), otherwise mark it as ~1U so
+/// it can be nuked later.
+void LiveInterval::markValNoForDeletion(VNInfo *ValNo) {
+  if (ValNo->id == getNumValNums()-1) {
+    do {
+      valnos.pop_back();
+    } while (!valnos.empty() && valnos.back()->isUnused());
+  } else {
+    ValNo->setIsUnused(true);
+  }
+}
+
+/// RenumberValues - Renumber all values in order of appearance and delete the
+/// remaining unused values.
+void LiveInterval::RenumberValues(LiveIntervals &lis) {
+  SmallPtrSet<VNInfo*, 8> Seen;
+  bool seenPHIDef = false;
+  valnos.clear();
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    VNInfo *VNI = I->valno;
+    if (!Seen.insert(VNI))
+      continue;
+    assert(!VNI->isUnused() && "Unused valno used by live range");
+    VNI->id = (unsigned)valnos.size();
+    valnos.push_back(VNI);
+    VNI->setHasPHIKill(false);
+    if (VNI->isPHIDef())
+      seenPHIDef = true;
+  }
+
+  // Recompute phi kill flags.
+  if (!seenPHIDef)
+    return;
+  for (const_vni_iterator I = vni_begin(), E = vni_end(); I != E; ++I) {
+    VNInfo *VNI = *I;
+    if (!VNI->isPHIDef())
+      continue;
+    const MachineBasicBlock *PHIBB = lis.getMBBFromIndex(VNI->def);
+    assert(PHIBB && "No basic block for phi-def");
+    for (MachineBasicBlock::const_pred_iterator PI = PHIBB->pred_begin(),
+         PE = PHIBB->pred_end(); PI != PE; ++PI) {
+      VNInfo *KVNI = getVNInfoAt(lis.getMBBEndIdx(*PI).getPrevSlot());
+      if (KVNI)
+        KVNI->setHasPHIKill(true);
+    }
+  }
+}
+
+/// extendIntervalEndTo - This method is used when we want to extend the range
+/// specified by I to end at the specified endpoint.  To do this, we should
+/// merge and eliminate all ranges that this will overlap with.  The iterator is
+/// not invalidated.
+void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
+  assert(I != ranges.end() && "Not a valid interval!");
+  VNInfo *ValNo = I->valno;
+
+  // Search for the first interval that we can't merge with.
+  Ranges::iterator MergeTo = llvm::next(I);
+  for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) {
+    assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+  }
+
+  // If NewEnd was in the middle of an interval, make sure to get its endpoint.
+  I->end = std::max(NewEnd, prior(MergeTo)->end);
+
+  // Erase any dead ranges.
+  ranges.erase(llvm::next(I), MergeTo);
+
+  // If the newly formed range now touches the range after it and if they have
+  // the same value number, merge the two ranges into one range.
+  Ranges::iterator Next = llvm::next(I);
+  if (Next != ranges.end() && Next->start <= I->end && Next->valno == ValNo) {
+    I->end = Next->end;
+    ranges.erase(Next);
+  }
+}
+
+
+/// extendIntervalStartTo - This method is used when we want to extend the range
+/// specified by I to start at the specified endpoint.  To do this, we should
+/// merge and eliminate all ranges that this will overlap with.
+LiveInterval::Ranges::iterator
+LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) {
+  assert(I != ranges.end() && "Not a valid interval!");
+  VNInfo *ValNo = I->valno;
+
+  // Search for the first interval that we can't merge with.
+  Ranges::iterator MergeTo = I;
+  do {
+    if (MergeTo == ranges.begin()) {
+      I->start = NewStart;
+      ranges.erase(MergeTo, I);
+      return I;
+    }
+    assert(MergeTo->valno == ValNo && "Cannot merge with differing values!");
+    --MergeTo;
+  } while (NewStart <= MergeTo->start);
+
+  // If we start in the middle of another interval, just delete a range and
+  // extend that interval.
+  if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) {
+    MergeTo->end = I->end;
+  } else {
+    // Otherwise, extend the interval right after.
+    ++MergeTo;
+    MergeTo->start = NewStart;
+    MergeTo->end = I->end;
+  }
+
+  ranges.erase(llvm::next(MergeTo), llvm::next(I));
+  return MergeTo;
+}
+
+LiveInterval::iterator
+LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
+  SlotIndex Start = LR.start, End = LR.end;
+  iterator it = std::upper_bound(From, ranges.end(), Start);
+
+  // If the inserted interval starts in the middle or right at the end of
+  // another interval, just extend that interval to contain the range of LR.
+  if (it != ranges.begin()) {
+    iterator B = prior(it);
+    if (LR.valno == B->valno) {
+      if (B->start <= Start && B->end >= Start) {
+        extendIntervalEndTo(B, End);
+        return B;
+      }
+    } else {
+      // Check to make sure that we are not overlapping two live ranges with
+      // different valno's.
+      assert(B->end <= Start &&
+             "Cannot overlap two LiveRanges with differing ValID's"
+             " (did you def the same reg twice in a MachineInstr?)");
+    }
+  }
+
+  // Otherwise, if this range ends in the middle of, or right next to, another
+  // interval, merge it into that interval.
+  if (it != ranges.end()) {
+    if (LR.valno == it->valno) {
+      if (it->start <= End) {
+        it = extendIntervalStartTo(it, Start);
+
+        // If LR is a complete superset of an interval, we may need to grow its
+        // endpoint as well.
+        if (End > it->end)
+          extendIntervalEndTo(it, End);
+        return it;
+      }
+    } else {
+      // Check to make sure that we are not overlapping two live ranges with
+      // different valno's.
+      assert(it->start >= End &&
+             "Cannot overlap two LiveRanges with differing ValID's");
+    }
+  }
+
+  // Otherwise, this is just a new range that doesn't interact with anything.
+  // Insert it.
+  return ranges.insert(it, LR);
+}
+
+/// extendInBlock - If this interval is live before UseIdx in the basic
+/// block that starts at StartIdx, extend it to be live at UseIdx and return
+/// the value. If there is no live range before UseIdx, return NULL.
+VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex UseIdx) {
+  if (empty())
+    return 0;
+  iterator I = std::upper_bound(begin(), end(), UseIdx);
+  if (I == begin())
+    return 0;
+  --I;
+  if (I->end <= StartIdx)
+    return 0;
+  if (I->end <= UseIdx)
+    extendIntervalEndTo(I, UseIdx.getNextSlot());
+  return I->valno;
+}
+
+/// removeRange - Remove the specified range from this interval.  Note that
+/// the range must be in a single LiveRange in its entirety.
+void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
+                               bool RemoveDeadValNo) {
+  // Find the LiveRange containing this span.
+  Ranges::iterator I = find(Start);
+  assert(I != ranges.end() && "Range is not in interval!");
+  assert(I->containsRange(Start, End) && "Range is not entirely in interval!");
+
+  // If the span we are removing is at the start of the LiveRange, adjust it.
+  VNInfo *ValNo = I->valno;
+  if (I->start == Start) {
+    if (I->end == End) {
+      if (RemoveDeadValNo) {
+        // Check if val# is dead.
+        bool isDead = true;
+        for (const_iterator II = begin(), EE = end(); II != EE; ++II)
+          if (II != I && II->valno == ValNo) {
+            isDead = false;
+            break;
+          }
+        if (isDead) {
+          // Now that ValNo is dead, remove it.
+          markValNoForDeletion(ValNo);
+        }
+      }
+
+      ranges.erase(I);  // Removed the whole LiveRange.
+    } else
+      I->start = End;
+    return;
+  }
+
+  // Otherwise if the span we are removing is at the end of the LiveRange,
+  // adjust the other way.
+  if (I->end == End) {
+    I->end = Start;
+    return;
+  }
+
+  // Otherwise, we are splitting the LiveRange into two pieces.
+  SlotIndex OldEnd = I->end;
+  I->end = Start;   // Trim the old interval.
+
+  // Insert the new one.
+  ranges.insert(llvm::next(I), LiveRange(End, OldEnd, ValNo));
+}
+
+/// removeValNo - Remove all the ranges defined by the specified value#.
+/// Also remove the value# from value# list.
+void LiveInterval::removeValNo(VNInfo *ValNo) {
+  if (empty()) return;
+  Ranges::iterator I = ranges.end();
+  Ranges::iterator E = ranges.begin();
+  do {
+    --I;
+    if (I->valno == ValNo)
+      ranges.erase(I);
+  } while (I != E);
+  // Now that ValNo is dead, remove it.
+  markValNoForDeletion(ValNo);
+}
+
+/// findDefinedVNInfo - Find the VNInfo defined by the specified
+/// index (register interval).
+VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const {
+  for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
+       i != e; ++i) {
+    if ((*i)->def == Idx)
+      return *i;
+  }
+
+  return 0;
+}
+
+/// join - Join two live intervals (this, and other) together.  This applies
+/// mappings to the value numbers in the LHS/RHS intervals as specified.  If
+/// the intervals are not joinable, this aborts.
+void LiveInterval::join(LiveInterval &Other,
+                        const int *LHSValNoAssignments,
+                        const int *RHSValNoAssignments,
+                        SmallVector<VNInfo*, 16> &NewVNInfo,
+                        MachineRegisterInfo *MRI) {
+  // Determine if any of our live range values are mapped.  This is uncommon, so
+  // we want to avoid the interval scan if not.
+  bool MustMapCurValNos = false;
+  unsigned NumVals = getNumValNums();
+  unsigned NumNewVals = NewVNInfo.size();
+  for (unsigned i = 0; i != NumVals; ++i) {
+    unsigned LHSValID = LHSValNoAssignments[i];
+    if (i != LHSValID ||
+        (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i)))
+      MustMapCurValNos = true;
+  }
+
+  // If we have to apply a mapping to our base interval assignment, rewrite it
+  // now.
+  if (MustMapCurValNos) {
+    // Map the first live range.
+    iterator OutIt = begin();
+    OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]];
+    ++OutIt;
+    for (iterator I = OutIt, E = end(); I != E; ++I) {
+      OutIt->valno = NewVNInfo[LHSValNoAssignments[I->valno->id]];
+
+      // If this live range has the same value # as its immediate predecessor,
+      // and if they are neighbors, remove one LiveRange.  This happens when we
+      // have [0,3:0)[4,7:1) and map 0/1 onto the same value #.
+      if (OutIt->valno == (OutIt-1)->valno && (OutIt-1)->end == OutIt->start) {
+        (OutIt-1)->end = OutIt->end;
+      } else {
+        if (I != OutIt) {
+          OutIt->start = I->start;
+          OutIt->end = I->end;
+        }
+
+        // Didn't merge, on to the next one.
+        ++OutIt;
+      }
+    }
+
+    // If we merge some live ranges, chop off the end.
+    ranges.erase(OutIt, end());
+  }
+
+  // Remember assignements because val# ids are changing.
+  SmallVector<unsigned, 16> OtherAssignments;
+  for (iterator I = Other.begin(), E = Other.end(); I != E; ++I)
+    OtherAssignments.push_back(RHSValNoAssignments[I->valno->id]);
+
+  // Update val# info. Renumber them and make sure they all belong to this
+  // LiveInterval now. Also remove dead val#'s.
+  unsigned NumValNos = 0;
+  for (unsigned i = 0; i < NumNewVals; ++i) {
+    VNInfo *VNI = NewVNInfo[i];
+    if (VNI) {
+      if (NumValNos >= NumVals)
+        valnos.push_back(VNI);
+      else
+        valnos[NumValNos] = VNI;
+      VNI->id = NumValNos++;  // Renumber val#.
+    }
+  }
+  if (NumNewVals < NumVals)
+    valnos.resize(NumNewVals);  // shrinkify
+
+  // Okay, now insert the RHS live ranges into the LHS.
+  iterator InsertPos = begin();
+  unsigned RangeNo = 0;
+  for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) {
+    // Map the valno in the other live range to the current live range.
+    I->valno = NewVNInfo[OtherAssignments[RangeNo]];
+    assert(I->valno && "Adding a dead range?");
+    InsertPos = addRangeFrom(*I, InsertPos);
+  }
+
+  ComputeJoinedWeight(Other);
+}
+
+/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
+/// interval as the specified value number.  The LiveRanges in RHS are
+/// allowed to overlap with LiveRanges in the current interval, but only if
+/// the overlapping LiveRanges have the specified value number.
+void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS,
+                                        VNInfo *LHSValNo) {
+  // TODO: Make this more efficient.
+  iterator InsertPos = begin();
+  for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+    // Map the valno in the other live range to the current live range.
+    LiveRange Tmp = *I;
+    Tmp.valno = LHSValNo;
+    InsertPos = addRangeFrom(Tmp, InsertPos);
+  }
+}
+
+
+/// MergeValueInAsValue - Merge all of the live ranges of a specific val#
+/// in RHS into this live interval as the specified value number.
+/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the
+/// current interval, it will replace the value numbers of the overlaped
+/// live ranges with the specified value number.
+void LiveInterval::MergeValueInAsValue(
+                                    const LiveInterval &RHS,
+                                    const VNInfo *RHSValNo, VNInfo *LHSValNo) {
+  SmallVector<VNInfo*, 4> ReplacedValNos;
+  iterator IP = begin();
+  for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+    assert(I->valno == RHS.getValNumInfo(I->valno->id) && "Bad VNInfo");
+    if (I->valno != RHSValNo)
+      continue;
+    SlotIndex Start = I->start, End = I->end;
+    IP = std::upper_bound(IP, end(), Start);
+    // If the start of this range overlaps with an existing liverange, trim it.
+    if (IP != begin() && IP[-1].end > Start) {
+      if (IP[-1].valno != LHSValNo) {
+        ReplacedValNos.push_back(IP[-1].valno);
+        IP[-1].valno = LHSValNo; // Update val#.
+      }
+      Start = IP[-1].end;
+      // Trimmed away the whole range?
+      if (Start >= End) continue;
+    }
+    // If the end of this range overlaps with an existing liverange, trim it.
+    if (IP != end() && End > IP->start) {
+      if (IP->valno != LHSValNo) {
+        ReplacedValNos.push_back(IP->valno);
+        IP->valno = LHSValNo;  // Update val#.
+      }
+      End = IP->start;
+      // If this trimmed away the whole range, ignore it.
+      if (Start == End) continue;
+    }
+
+    // Map the valno in the other live range to the current live range.
+    IP = addRangeFrom(LiveRange(Start, End, LHSValNo), IP);
+  }
+
+
+  SmallSet<VNInfo*, 4> Seen;
+  for (unsigned i = 0, e = ReplacedValNos.size(); i != e; ++i) {
+    VNInfo *V1 = ReplacedValNos[i];
+    if (Seen.insert(V1)) {
+      bool isDead = true;
+      for (const_iterator I = begin(), E = end(); I != E; ++I)
+        if (I->valno == V1) {
+          isDead = false;
+          break;
+        }
+      if (isDead) {
+        // Now that V1 is dead, remove it.
+        markValNoForDeletion(V1);
+      }
+    }
+  }
+}
+
+
+
+/// MergeValueNumberInto - This method is called when two value nubmers
+/// are found to be equivalent.  This eliminates V1, replacing all
+/// LiveRanges with the V1 value number with the V2 value number.  This can
+/// cause merging of V1/V2 values numbers and compaction of the value space.
+VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
+  assert(V1 != V2 && "Identical value#'s are always equivalent!");
+
+  // This code actually merges the (numerically) larger value number into the
+  // smaller value number, which is likely to allow us to compactify the value
+  // space.  The only thing we have to be careful of is to preserve the
+  // instruction that defines the result value.
+
+  // Make sure V2 is smaller than V1.
+  if (V1->id < V2->id) {
+    V1->copyFrom(*V2);
+    std::swap(V1, V2);
+  }
+
+  // Merge V1 live ranges into V2.
+  for (iterator I = begin(); I != end(); ) {
+    iterator LR = I++;
+    if (LR->valno != V1) continue;  // Not a V1 LiveRange.
+
+    // Okay, we found a V1 live range.  If it had a previous, touching, V2 live
+    // range, extend it.
+    if (LR != begin()) {
+      iterator Prev = LR-1;
+      if (Prev->valno == V2 && Prev->end == LR->start) {
+        Prev->end = LR->end;
+
+        // Erase this live-range.
+        ranges.erase(LR);
+        I = Prev+1;
+        LR = Prev;
+      }
+    }
+
+    // Okay, now we have a V1 or V2 live range that is maximally merged forward.
+    // Ensure that it is a V2 live-range.
+    LR->valno = V2;
+
+    // If we can merge it into later V2 live ranges, do so now.  We ignore any
+    // following V1 live ranges, as they will be merged in subsequent iterations
+    // of the loop.
+    if (I != end()) {
+      if (I->start == LR->end && I->valno == V2) {
+        LR->end = I->end;
+        ranges.erase(I);
+        I = LR+1;
+      }
+    }
+  }
+
+  // Merge the relevant flags.
+  V2->mergeFlags(V1);
+
+  // Now that V1 is dead, remove it.
+  markValNoForDeletion(V1);
+
+  return V2;
+}
+
+void LiveInterval::Copy(const LiveInterval &RHS,
+                        MachineRegisterInfo *MRI,
+                        VNInfo::Allocator &VNInfoAllocator) {
+  ranges.clear();
+  valnos.clear();
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg);
+  MRI->setRegAllocationHint(reg, Hint.first, Hint.second);
+
+  weight = RHS.weight;
+  for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) {
+    const VNInfo *VNI = RHS.getValNumInfo(i);
+    createValueCopy(VNI, VNInfoAllocator);
+  }
+  for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) {
+    const LiveRange &LR = RHS.ranges[i];
+    addRange(LiveRange(LR.start, LR.end, getValNumInfo(LR.valno->id)));
+  }
+}
+
+unsigned LiveInterval::getSize() const {
+  unsigned Sum = 0;
+  for (const_iterator I = begin(), E = end(); I != E; ++I)
+    Sum += I->start.distance(I->end);
+  return Sum;
+}
+
+/// ComputeJoinedWeight - Set the weight of a live interval Joined
+/// after Other has been merged into it.
+void LiveInterval::ComputeJoinedWeight(const LiveInterval &Other) {
+  // If either of these intervals was spilled, the weight is the
+  // weight of the non-spilled interval.  This can only happen with
+  // iterative coalescers.
+
+  if (Other.weight != HUGE_VALF) {
+    weight += Other.weight;
+  }
+  else if (weight == HUGE_VALF &&
+      !TargetRegisterInfo::isPhysicalRegister(reg)) {
+    // Remove this assert if you have an iterative coalescer
+    assert(0 && "Joining to spilled interval");
+    weight = Other.weight;
+  }
+  else {
+    // Otherwise the weight stays the same
+    // Remove this assert if you have an iterative coalescer
+    assert(0 && "Joining from spilled interval");
+  }
+}
+
+raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) {
+  return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")";
+}
+
+void LiveRange::dump() const {
+  dbgs() << *this << "\n";
+}
+
+void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+  OS << PrintReg(reg, TRI);
+  if (weight != 0)
+    OS << ',' << weight;
+
+  if (empty())
+    OS << " EMPTY";
+  else {
+    OS << " = ";
+    for (LiveInterval::Ranges::const_iterator I = ranges.begin(),
+           E = ranges.end(); I != E; ++I) {
+      OS << *I;
+      assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo");
+    }
+  }
+
+  // Print value number info.
+  if (getNumValNums()) {
+    OS << "  ";
+    unsigned vnum = 0;
+    for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e;
+         ++i, ++vnum) {
+      const VNInfo *vni = *i;
+      if (vnum) OS << " ";
+      OS << vnum << "@";
+      if (vni->isUnused()) {
+        OS << "x";
+      } else {
+        OS << vni->def;
+        if (vni->isPHIDef())
+          OS << "-phidef";
+        if (vni->hasPHIKill())
+          OS << "-phikill";
+        if (vni->hasRedefByEC())
+          OS << "-ec";
+      }
+    }
+  }
+}
+
+void LiveInterval::dump() const {
+  dbgs() << *this << "\n";
+}
+
+
+void LiveRange::print(raw_ostream &os) const {
+  os << *this;
+}
+
+unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
+  // Create initial equivalence classes.
+  eqClass_.clear();
+  eqClass_.grow(LI->getNumValNums());
+
+  const VNInfo *used = 0, *unused = 0;
+
+  // Determine connections.
+  for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end();
+       I != E; ++I) {
+    const VNInfo *VNI = *I;
+    // Group all unused values into one class.
+    if (VNI->isUnused()) {
+      if (unused)
+        eqClass_.join(unused->id, VNI->id);
+      unused = VNI;
+      continue;
+    }
+    used = VNI;
+    if (VNI->isPHIDef()) {
+      const MachineBasicBlock *MBB = lis_.getMBBFromIndex(VNI->def);
+      assert(MBB && "Phi-def has no defining MBB");
+      // Connect to values live out of predecessors.
+      for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+           PE = MBB->pred_end(); PI != PE; ++PI)
+        if (const VNInfo *PVNI =
+              LI->getVNInfoAt(lis_.getMBBEndIdx(*PI).getPrevSlot()))
+          eqClass_.join(VNI->id, PVNI->id);
+    } else {
+      // Normal value defined by an instruction. Check for two-addr redef.
+      // FIXME: This could be coincidental. Should we really check for a tied
+      // operand constraint?
+      // Note that VNI->def may be a use slot for an early clobber def.
+      if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot()))
+        eqClass_.join(VNI->id, UVNI->id);
+    }
+  }
+
+  // Lump all the unused values in with the last used value.
+  if (used && unused)
+    eqClass_.join(used->id, unused->id);
+
+  eqClass_.compress();
+  return eqClass_.getNumClasses();
+}
+
+void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[]) {
+  assert(LIV[0] && "LIV[0] must be set");
+  LiveInterval &LI = *LIV[0];
+
+  // First move runs to new intervals.
+  LiveInterval::iterator J = LI.begin(), E = LI.end();
+  while (J != E && eqClass_[J->valno->id] == 0)
+    ++J;
+  for (LiveInterval::iterator I = J; I != E; ++I) {
+    if (unsigned eq = eqClass_[I->valno->id]) {
+      assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) &&
+             "New intervals should be empty");
+      LIV[eq]->ranges.push_back(*I);
+    } else
+      *J++ = *I;
+  }
+  LI.ranges.erase(J, E);
+
+  // Transfer VNInfos to their new owners and renumber them.
+  unsigned j = 0, e = LI.getNumValNums();
+  while (j != e && eqClass_[j] == 0)
+    ++j;
+  for (unsigned i = j; i != e; ++i) {
+    VNInfo *VNI = LI.getValNumInfo(i);
+    if (unsigned eq = eqClass_[i]) {
+      VNI->id = LIV[eq]->getNumValNums();
+      LIV[eq]->valnos.push_back(VNI);
+    } else {
+      VNI->id = j;
+      LI.valnos[j++] = VNI;
+    }
+  }
+  LI.valnos.resize(j);
+}
diff --git a/final/lib/CodeGen/LiveIntervalAnalysis.cpp b/final/lib/CodeGen/LiveIntervalAnalysis.cpp
new file mode 100644
index 00000000000..8a2db3e1a38
--- /dev/null
+++ b/final/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -0,0 +1,2162 @@
+//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveInterval analysis pass which is used
+// by the Linear Scan Register allocator. This pass linearizes the
+// basic blocks of the function in DFS order and uses the
+// LiveVariables pass to conservatively compute live intervals for
+// each virtual and physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "liveintervals"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "VirtRegMap.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/ProcessImplicitDefs.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <limits>
+#include <cmath>
+using namespace llvm;
+
+// Hidden options for help debugging.
+static cl::opt<bool> DisableReMat("disable-rematerialization",
+                                  cl::init(false), cl::Hidden);
+
+STATISTIC(numIntervals , "Number of original intervals");
+STATISTIC(numFolds     , "Number of loads/stores folded into instructions");
+STATISTIC(numSplits    , "Number of intervals split");
+
+char LiveIntervals::ID = 0;
+INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
+                "Live Interval Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_PASS_DEPENDENCY(ProcessImplicitDefs)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
+                "Live Interval Analysis", false, false)
+
+void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<LiveVariables>();
+  AU.addPreserved<LiveVariables>();
+  AU.addRequired<MachineLoopInfo>();
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreservedID(MachineDominatorsID);
+
+  if (!StrongPHIElim) {
+    AU.addPreservedID(PHIEliminationID);
+    AU.addRequiredID(PHIEliminationID);
+  }
+
+  AU.addRequiredID(TwoAddressInstructionPassID);
+  AU.addPreserved<ProcessImplicitDefs>();
+  AU.addRequired<ProcessImplicitDefs>();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequiredTransitive<SlotIndexes>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveIntervals::releaseMemory() {
+  // Free the live intervals themselves.
+  for (DenseMap<unsigned, LiveInterval*>::iterator I = r2iMap_.begin(),
+       E = r2iMap_.end(); I != E; ++I)
+    delete I->second;
+
+  r2iMap_.clear();
+
+  // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+  VNInfoAllocator.Reset();
+  while (!CloneMIs.empty()) {
+    MachineInstr *MI = CloneMIs.back();
+    CloneMIs.pop_back();
+    mf_->DeleteMachineInstr(MI);
+  }
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
+  mf_ = &fn;
+  mri_ = &mf_->getRegInfo();
+  tm_ = &fn.getTarget();
+  tri_ = tm_->getRegisterInfo();
+  tii_ = tm_->getInstrInfo();
+  aa_ = &getAnalysis<AliasAnalysis>();
+  lv_ = &getAnalysis<LiveVariables>();
+  indexes_ = &getAnalysis<SlotIndexes>();
+  allocatableRegs_ = tri_->getAllocatableSet(fn);
+
+  computeIntervals();
+
+  numIntervals += getNumIntervals();
+
+  DEBUG(dump());
+  return true;
+}
+
+/// print - Implement the dump method.
+void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
+  OS << "********** INTERVALS **********\n";
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    I->second->print(OS, tri_);
+    OS << "\n";
+  }
+
+  printInstrs(OS);
+}
+
+void LiveIntervals::printInstrs(raw_ostream &OS) const {
+  OS << "********** MACHINEINSTRS **********\n";
+  mf_->print(OS, indexes_);
+}
+
+void LiveIntervals::dumpInstrs() const {
+  printInstrs(dbgs());
+}
+
+bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
+                                         VirtRegMap &vrm, unsigned reg) {
+  // We don't handle fancy stuff crossing basic block boundaries
+  if (li.ranges.size() != 1)
+    return true;
+  const LiveRange &range = li.ranges.front();
+  SlotIndex idx = range.start.getBaseIndex();
+  SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex();
+
+  // Skip deleted instructions
+  MachineInstr *firstMI = getInstructionFromIndex(idx);
+  while (!firstMI && idx != end) {
+    idx = idx.getNextIndex();
+    firstMI = getInstructionFromIndex(idx);
+  }
+  if (!firstMI)
+    return false;
+
+  // Find last instruction in range
+  SlotIndex lastIdx = end.getPrevIndex();
+  MachineInstr *lastMI = getInstructionFromIndex(lastIdx);
+  while (!lastMI && lastIdx != idx) {
+    lastIdx = lastIdx.getPrevIndex();
+    lastMI = getInstructionFromIndex(lastIdx);
+  }
+  if (!lastMI)
+    return false;
+
+  // Range cannot cross basic block boundaries or terminators
+  MachineBasicBlock *MBB = firstMI->getParent();
+  if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator())
+    return true;
+
+  MachineBasicBlock::const_iterator E = lastMI;
+  ++E;
+  for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) {
+    const MachineInstr &MI = *I;
+
+    // Allow copies to and from li.reg
+    if (MI.isCopy())
+      if (MI.getOperand(0).getReg() == li.reg ||
+          MI.getOperand(1).getReg() == li.reg)
+        continue;
+
+    // Check for operands using reg
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e;  ++i) {
+      const MachineOperand& mop = MI.getOperand(i);
+      if (!mop.isReg())
+        continue;
+      unsigned PhysReg = mop.getReg();
+      if (PhysReg == 0 || PhysReg == li.reg)
+        continue;
+      if (TargetRegisterInfo::isVirtualRegister(PhysReg)) {
+        if (!vrm.hasPhys(PhysReg))
+          continue;
+        PhysReg = vrm.getPhys(PhysReg);
+      }
+      if (PhysReg && tri_->regsOverlap(PhysReg, reg))
+        return true;
+    }
+  }
+
+  // No conflicts found.
+  return false;
+}
+
+bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg,
+                                  SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
+  for (LiveInterval::Ranges::const_iterator
+         I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+    for (SlotIndex index = I->start.getBaseIndex(),
+           end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
+           index != end;
+           index = index.getNextIndex()) {
+      MachineInstr *MI = getInstructionFromIndex(index);
+      if (!MI)
+        continue;               // skip deleted instructions
+
+      if (JoinedCopies.count(MI))
+        continue;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand& MO = MI->getOperand(i);
+        if (!MO.isReg())
+          continue;
+        unsigned PhysReg = MO.getReg();
+        if (PhysReg == 0 || PhysReg == Reg ||
+            TargetRegisterInfo::isVirtualRegister(PhysReg))
+          continue;
+        if (tri_->regsOverlap(Reg, PhysReg))
+          return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+static
+bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
+  unsigned Reg = MI.getOperand(MOIdx).getReg();
+  for (unsigned i = MOIdx+1, e = MI.getNumOperands(); i < e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg())
+      continue;
+    if (MO.getReg() == Reg && MO.isDef()) {
+      assert(MI.getOperand(MOIdx).getSubReg() != MO.getSubReg() &&
+             MI.getOperand(MOIdx).getSubReg() &&
+             (MO.getSubReg() || MO.isImplicit()));
+      return true;
+    }
+  }
+  return false;
+}
+
+/// isPartialRedef - Return true if the specified def at the specific index is
+/// partially re-defining the specified live interval. A common case of this is
+/// a definition of the sub-register.
+bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
+                                   LiveInterval &interval) {
+  if (!MO.getSubReg() || MO.isEarlyClobber())
+    return false;
+
+  SlotIndex RedefIndex = MIIdx.getDefIndex();
+  const LiveRange *OldLR =
+    interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+  MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
+  if (DefMI != 0) {
+    return DefMI->findRegisterDefOperandIdx(interval.reg) != -1;
+  }
+  return false;
+}
+
+void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
+                                             MachineBasicBlock::iterator mi,
+                                             SlotIndex MIIdx,
+                                             MachineOperand& MO,
+                                             unsigned MOIdx,
+                                             LiveInterval &interval) {
+  DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
+
+  // Virtual registers may be defined multiple times (due to phi
+  // elimination and 2-addr elimination).  Much of what we do only has to be
+  // done once for the vreg.  We use an empty interval to detect the first
+  // time we see a vreg.
+  LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg);
+  if (interval.empty()) {
+    // Get the Idx of the defining instructions.
+    SlotIndex defIndex = MIIdx.getDefIndex();
+    // Earlyclobbers move back one, so that they overlap the live range
+    // of inputs.
+    if (MO.isEarlyClobber())
+      defIndex = MIIdx.getUseIndex();
+
+    // Make sure the first definition is not a partial redefinition. Add an
+    // <imp-def> of the full register.
+    if (MO.getSubReg())
+      mi->addRegisterDefined(interval.reg);
+
+    MachineInstr *CopyMI = NULL;
+    if (mi->isCopyLike()) {
+      CopyMI = mi;
+    }
+
+    VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+    assert(ValNo->id == 0 && "First value in interval is not 0?");
+
+    // Loop over all of the blocks that the vreg is defined in.  There are
+    // two cases we have to handle here.  The most common case is a vreg
+    // whose lifetime is contained within a basic block.  In this case there
+    // will be a single kill, in MBB, which comes after the definition.
+    if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) {
+      // FIXME: what about dead vars?
+      SlotIndex killIdx;
+      if (vi.Kills[0] != mi)
+        killIdx = getInstructionIndex(vi.Kills[0]).getDefIndex();
+      else
+        killIdx = defIndex.getStoreIndex();
+
+      // If the kill happens after the definition, we have an intra-block
+      // live range.
+      if (killIdx > defIndex) {
+        assert(vi.AliveBlocks.empty() &&
+               "Shouldn't be alive across any blocks!");
+        LiveRange LR(defIndex, killIdx, ValNo);
+        interval.addRange(LR);
+        DEBUG(dbgs() << " +" << LR << "\n");
+        return;
+      }
+    }
+
+    // The other case we handle is when a virtual register lives to the end
+    // of the defining block, potentially live across some blocks, then is
+    // live into some number of blocks, but gets killed.  Start by adding a
+    // range that goes from this definition to the end of the defining block.
+    LiveRange NewLR(defIndex, getMBBEndIdx(mbb), ValNo);
+    DEBUG(dbgs() << " +" << NewLR);
+    interval.addRange(NewLR);
+
+    bool PHIJoin = lv_->isPHIJoin(interval.reg);
+
+    if (PHIJoin) {
+      // A phi join register is killed at the end of the MBB and revived as a new
+      // valno in the killing blocks.
+      assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
+      DEBUG(dbgs() << " phi-join");
+      ValNo->setHasPHIKill(true);
+    } else {
+      // Iterate over all of the blocks that the variable is completely
+      // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
+      // live interval.
+      for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(),
+               E = vi.AliveBlocks.end(); I != E; ++I) {
+        MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I);
+        LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo);
+        interval.addRange(LR);
+        DEBUG(dbgs() << " +" << LR);
+      }
+    }
+
+    // Finally, this virtual register is live from the start of any killing
+    // block to the 'use' slot of the killing instruction.
+    for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
+      MachineInstr *Kill = vi.Kills[i];
+      SlotIndex Start = getMBBStartIdx(Kill->getParent());
+      SlotIndex killIdx = getInstructionIndex(Kill).getDefIndex();
+
+      // Create interval with one of a NEW value number.  Note that this value
+      // number isn't actually defined by an instruction, weird huh? :)
+      if (PHIJoin) {
+        assert(getInstructionFromIndex(Start) == 0 &&
+               "PHI def index points at actual instruction.");
+        ValNo = interval.getNextValue(Start, 0, VNInfoAllocator);
+        ValNo->setIsPHIDef(true);
+      }
+      LiveRange LR(Start, killIdx, ValNo);
+      interval.addRange(LR);
+      DEBUG(dbgs() << " +" << LR);
+    }
+
+  } else {
+    if (MultipleDefsBySameMI(*mi, MOIdx))
+      // Multiple defs of the same virtual register by the same instruction.
+      // e.g. %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
+      // This is likely due to elimination of REG_SEQUENCE instructions. Return
+      // here since there is nothing to do.
+      return;
+
+    // If this is the second time we see a virtual register definition, it
+    // must be due to phi elimination or two addr elimination.  If this is
+    // the result of two address elimination, then the vreg is one of the
+    // def-and-use register operand.
+
+    // It may also be partial redef like this:
+    // 80  %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0
+    // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0
+    bool PartReDef = isPartialRedef(MIIdx, MO, interval);
+    if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) {
+      // If this is a two-address definition, then we have already processed
+      // the live range.  The only problem is that we didn't realize there
+      // are actually two values in the live interval.  Because of this we
+      // need to take the LiveRegion that defines this register and split it
+      // into two values.
+      SlotIndex RedefIndex = MIIdx.getDefIndex();
+      if (MO.isEarlyClobber())
+        RedefIndex = MIIdx.getUseIndex();
+
+      const LiveRange *OldLR =
+        interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+      VNInfo *OldValNo = OldLR->valno;
+      SlotIndex DefIndex = OldValNo->def.getDefIndex();
+
+      // Delete the previous value, which should be short and continuous,
+      // because the 2-addr copy must be in the same MBB as the redef.
+      interval.removeRange(DefIndex, RedefIndex);
+
+      // The new value number (#1) is defined by the instruction we claimed
+      // defined value #0.
+      VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator);
+
+      // Value#0 is now defined by the 2-addr instruction.
+      OldValNo->def  = RedefIndex;
+      OldValNo->setCopy(0);
+
+      // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ...
+      if (PartReDef && mi->isCopyLike())
+        OldValNo->setCopy(&*mi);
+
+      // Add the new live interval which replaces the range for the input copy.
+      LiveRange LR(DefIndex, RedefIndex, ValNo);
+      DEBUG(dbgs() << " replace range with " << LR);
+      interval.addRange(LR);
+
+      // If this redefinition is dead, we need to add a dummy unit live
+      // range covering the def slot.
+      if (MO.isDead())
+        interval.addRange(LiveRange(RedefIndex, RedefIndex.getStoreIndex(),
+                                    OldValNo));
+
+      DEBUG({
+          dbgs() << " RESULT: ";
+          interval.print(dbgs(), tri_);
+        });
+    } else if (lv_->isPHIJoin(interval.reg)) {
+      // In the case of PHI elimination, each variable definition is only
+      // live until the end of the block.  We've already taken care of the
+      // rest of the live range.
+
+      SlotIndex defIndex = MIIdx.getDefIndex();
+      if (MO.isEarlyClobber())
+        defIndex = MIIdx.getUseIndex();
+
+      VNInfo *ValNo;
+      MachineInstr *CopyMI = NULL;
+      if (mi->isCopyLike())
+        CopyMI = mi;
+      ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+
+      SlotIndex killIndex = getMBBEndIdx(mbb);
+      LiveRange LR(defIndex, killIndex, ValNo);
+      interval.addRange(LR);
+      ValNo->setHasPHIKill(true);
+      DEBUG(dbgs() << " phi-join +" << LR);
+    } else {
+      llvm_unreachable("Multiply defined register");
+    }
+  }
+
+  DEBUG(dbgs() << '\n');
+}
+
+void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
+                                              MachineBasicBlock::iterator mi,
+                                              SlotIndex MIIdx,
+                                              MachineOperand& MO,
+                                              LiveInterval &interval,
+                                              MachineInstr *CopyMI) {
+  // A physical register cannot be live across basic block, so its
+  // lifetime must end somewhere in its defining basic block.
+  DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
+
+  SlotIndex baseIndex = MIIdx;
+  SlotIndex start = baseIndex.getDefIndex();
+  // Earlyclobbers move back one.
+  if (MO.isEarlyClobber())
+    start = MIIdx.getUseIndex();
+  SlotIndex end = start;
+
+  // If it is not used after definition, it is considered dead at
+  // the instruction defining it. Hence its interval is:
+  // [defSlot(def), defSlot(def)+1)
+  // For earlyclobbers, the defSlot was pushed back one; the extra
+  // advance below compensates.
+  if (MO.isDead()) {
+    DEBUG(dbgs() << " dead");
+    end = start.getStoreIndex();
+    goto exit;
+  }
+
+  // If it is not dead on definition, it must be killed by a
+  // subsequent instruction. Hence its interval is:
+  // [defSlot(def), useSlot(kill)+1)
+  baseIndex = baseIndex.getNextIndex();
+  while (++mi != MBB->end()) {
+
+    if (mi->isDebugValue())
+      continue;
+    if (getInstructionFromIndex(baseIndex) == 0)
+      baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+
+    if (mi->killsRegister(interval.reg, tri_)) {
+      DEBUG(dbgs() << " killed");
+      end = baseIndex.getDefIndex();
+      goto exit;
+    } else {
+      int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_);
+      if (DefIdx != -1) {
+        if (mi->isRegTiedToUseOperand(DefIdx)) {
+          // Two-address instruction.
+          end = baseIndex.getDefIndex();
+        } else {
+          // Another instruction redefines the register before it is ever read.
+          // Then the register is essentially dead at the instruction that
+          // defines it. Hence its interval is:
+          // [defSlot(def), defSlot(def)+1)
+          DEBUG(dbgs() << " dead");
+          end = start.getStoreIndex();
+        }
+        goto exit;
+      }
+    }
+
+    baseIndex = baseIndex.getNextIndex();
+  }
+
+  // The only case we should have a dead physreg here without a killing or
+  // instruction where we know it's dead is if it is live-in to the function
+  // and never used. Another possible case is the implicit use of the
+  // physical register has been deleted by two-address pass.
+  end = start.getStoreIndex();
+
+exit:
+  assert(start < end && "did not find end of interval?");
+
+  // Already exists? Extend old live interval.
+  VNInfo *ValNo = interval.getVNInfoAt(start);
+  bool Extend = ValNo != 0;
+  if (!Extend)
+    ValNo = interval.getNextValue(start, CopyMI, VNInfoAllocator);
+  if (Extend && MO.isEarlyClobber())
+    ValNo->setHasRedefByEC(true);
+  LiveRange LR(start, end, ValNo);
+  interval.addRange(LR);
+  DEBUG(dbgs() << " +" << LR << '\n');
+}
+
+void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
+                                      MachineBasicBlock::iterator MI,
+                                      SlotIndex MIIdx,
+                                      MachineOperand& MO,
+                                      unsigned MOIdx) {
+  if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+    handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx,
+                             getOrCreateInterval(MO.getReg()));
+  else if (allocatableRegs_[MO.getReg()]) {
+    MachineInstr *CopyMI = NULL;
+    if (MI->isCopyLike())
+      CopyMI = MI;
+    handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
+                              getOrCreateInterval(MO.getReg()), CopyMI);
+    // Def of a register also defines its sub-registers.
+    for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS)
+      // If MI also modifies the sub-register explicitly, avoid processing it
+      // more than once. Do not pass in TRI here so it checks for exact match.
+      if (!MI->definesRegister(*AS))
+        handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
+                                  getOrCreateInterval(*AS), 0);
+  }
+}
+
+void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
+                                         SlotIndex MIIdx,
+                                         LiveInterval &interval, bool isAlias) {
+  DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_));
+
+  // Look for kills, if it reaches a def before it's killed, then it shouldn't
+  // be considered a livein.
+  MachineBasicBlock::iterator mi = MBB->begin();
+  MachineBasicBlock::iterator E = MBB->end();
+  // Skip over DBG_VALUE at the start of the MBB.
+  if (mi != E && mi->isDebugValue()) {
+    while (++mi != E && mi->isDebugValue())
+      ;
+    if (mi == E)
+      // MBB is empty except for DBG_VALUE's.
+      return;
+  }
+
+  SlotIndex baseIndex = MIIdx;
+  SlotIndex start = baseIndex;
+  if (getInstructionFromIndex(baseIndex) == 0)
+    baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+
+  SlotIndex end = baseIndex;
+  bool SeenDefUse = false;
+
+  while (mi != E) {
+    if (mi->killsRegister(interval.reg, tri_)) {
+      DEBUG(dbgs() << " killed");
+      end = baseIndex.getDefIndex();
+      SeenDefUse = true;
+      break;
+    } else if (mi->definesRegister(interval.reg, tri_)) {
+      // Another instruction redefines the register before it is ever read.
+      // Then the register is essentially dead at the instruction that defines
+      // it. Hence its interval is:
+      // [defSlot(def), defSlot(def)+1)
+      DEBUG(dbgs() << " dead");
+      end = start.getStoreIndex();
+      SeenDefUse = true;
+      break;
+    }
+
+    while (++mi != E && mi->isDebugValue())
+      // Skip over DBG_VALUE.
+      ;
+    if (mi != E)
+      baseIndex = indexes_->getNextNonNullIndex(baseIndex);
+  }
+
+  // Live-in register might not be used at all.
+  if (!SeenDefUse) {
+    if (isAlias) {
+      DEBUG(dbgs() << " dead");
+      end = MIIdx.getStoreIndex();
+    } else {
+      DEBUG(dbgs() << " live through");
+      end = baseIndex;
+    }
+  }
+
+  SlotIndex defIdx = getMBBStartIdx(MBB);
+  assert(getInstructionFromIndex(defIdx) == 0 &&
+         "PHI def index points at actual instruction.");
+  VNInfo *vni =
+    interval.getNextValue(defIdx, 0, VNInfoAllocator);
+  vni->setIsPHIDef(true);
+  LiveRange LR(start, end, vni);
+
+  interval.addRange(LR);
+  DEBUG(dbgs() << " +" << LR << '\n');
+}
+
+/// computeIntervals - computes the live intervals for virtual
+/// registers. for some ordering of the machine instructions [1,N] a
+/// live interval is an interval [i, j) where 1 <= i <= j < N for
+/// which a variable is live
+void LiveIntervals::computeIntervals() {
+  DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n"
+               << "********** Function: "
+               << ((Value*)mf_->getFunction())->getName() << '\n');
+
+  SmallVector<unsigned, 8> UndefUses;
+  for (MachineFunction::iterator MBBI = mf_->begin(), E = mf_->end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock *MBB = MBBI;
+    if (MBB->empty())
+      continue;
+
+    // Track the index of the current machine instr.
+    SlotIndex MIIndex = getMBBStartIdx(MBB);
+    DEBUG(dbgs() << "BB#" << MBB->getNumber()
+          << ":\t\t# derived from " << MBB->getName() << "\n");
+
+    // Create intervals for live-ins to this BB first.
+    for (MachineBasicBlock::livein_iterator LI = MBB->livein_begin(),
+           LE = MBB->livein_end(); LI != LE; ++LI) {
+      handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*LI));
+      // Multiple live-ins can alias the same register.
+      for (const unsigned* AS = tri_->getSubRegisters(*LI); *AS; ++AS)
+        if (!hasInterval(*AS))
+          handleLiveInRegister(MBB, MIIndex, getOrCreateInterval(*AS),
+                               true);
+    }
+
+    // Skip over empty initial indices.
+    if (getInstructionFromIndex(MIIndex) == 0)
+      MIIndex = indexes_->getNextNonNullIndex(MIIndex);
+
+    for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end();
+         MI != miEnd; ++MI) {
+      DEBUG(dbgs() << MIIndex << "\t" << *MI);
+      if (MI->isDebugValue())
+        continue;
+
+      // Handle defs.
+      for (int i = MI->getNumOperands() - 1; i >= 0; --i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.getReg())
+          continue;
+
+        // handle register defs - build intervals
+        if (MO.isDef())
+          handleRegisterDef(MBB, MI, MIIndex, MO, i);
+        else if (MO.isUndef())
+          UndefUses.push_back(MO.getReg());
+      }
+
+      // Move to the next instr slot.
+      MIIndex = indexes_->getNextNonNullIndex(MIIndex);
+    }
+  }
+
+  // Create empty intervals for registers defined by implicit_def's (except
+  // for those implicit_def that define values which are liveout of their
+  // blocks.
+  for (unsigned i = 0, e = UndefUses.size(); i != e; ++i) {
+    unsigned UndefReg = UndefUses[i];
+    (void)getOrCreateInterval(UndefReg);
+  }
+}
+
+LiveInterval* LiveIntervals::createInterval(unsigned reg) {
+  float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F;
+  return new LiveInterval(reg, Weight);
+}
+
+/// dupInterval - Duplicate a live interval. The caller is responsible for
+/// managing the allocated memory.
+LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
+  LiveInterval *NewLI = createInterval(li->reg);
+  NewLI->Copy(*li, mri_, getVNInfoAllocator());
+  return NewLI;
+}
+
+/// shrinkToUses - After removing some uses of a register, shrink its live
+/// range to just the remaining uses. This method does not compute reaching
+/// defs for new uses, and it doesn't remove dead defs.
+void LiveIntervals::shrinkToUses(LiveInterval *li,
+                                 SmallVectorImpl<MachineInstr*> *dead) {
+  DEBUG(dbgs() << "Shrink: " << *li << '\n');
+  assert(TargetRegisterInfo::isVirtualRegister(li->reg)
+         && "Can't only shrink physical registers");
+  // Find all the values used, including PHI kills.
+  SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList;
+
+  // Visit all instructions reading li->reg.
+  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li->reg);
+       MachineInstr *UseMI = I.skipInstruction();) {
+    if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
+      continue;
+    SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex();
+    VNInfo *VNI = li->getVNInfoAt(Idx);
+    assert(VNI && "Live interval not live into reading instruction");
+    if (VNI->def == Idx) {
+      // Special case: An early-clobber tied operand reads and writes the
+      // register one slot early.
+      Idx = Idx.getPrevSlot();
+      VNI = li->getVNInfoAt(Idx);
+      assert(VNI && "Early-clobber tied value not available");
+    }
+    WorkList.push_back(std::make_pair(Idx, VNI));
+  }
+
+  // Create a new live interval with only minimal live segments per def.
+  LiveInterval NewLI(li->reg, 0);
+  for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+       I != E; ++I) {
+    VNInfo *VNI = *I;
+    if (VNI->isUnused())
+      continue;
+    NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI));
+
+    // A use tied to an early-clobber def ends at the load slot and isn't caught
+    // above. Catch it here instead. This probably only ever happens for inline
+    // assembly.
+    if (VNI->def.isUse())
+      if (VNInfo *UVNI = li->getVNInfoAt(VNI->def.getLoadIndex()))
+        WorkList.push_back(std::make_pair(VNI->def.getLoadIndex(), UVNI));
+  }
+
+  // Keep track of the PHIs that are in use.
+  SmallPtrSet<VNInfo*, 8> UsedPHIs;
+
+  // Extend intervals to reach all uses in WorkList.
+  while (!WorkList.empty()) {
+    SlotIndex Idx = WorkList.back().first;
+    VNInfo *VNI = WorkList.back().second;
+    WorkList.pop_back();
+    const MachineBasicBlock *MBB = getMBBFromIndex(Idx);
+    SlotIndex BlockStart = getMBBStartIdx(MBB);
+
+    // Extend the live range for VNI to be live at Idx.
+    if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) {
+      (void)ExtVNI;
+      assert(ExtVNI == VNI && "Unexpected existing value number");
+      // Is this a PHIDef we haven't seen before?
+      if (!VNI->isPHIDef() || VNI->def != BlockStart || !UsedPHIs.insert(VNI))
+        continue;
+      // The PHI is live, make sure the predecessors are live-out.
+      for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+           PE = MBB->pred_end(); PI != PE; ++PI) {
+        SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+        VNInfo *PVNI = li->getVNInfoAt(Stop);
+        // A predecessor is not required to have a live-out value for a PHI.
+        if (PVNI) {
+          assert(PVNI->hasPHIKill() && "Missing hasPHIKill flag");
+          WorkList.push_back(std::make_pair(Stop, PVNI));
+        }
+      }
+      continue;
+    }
+
+    // VNI is live-in to MBB.
+    DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
+    NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI));
+
+    // Make sure VNI is live-out from the predecessors.
+    for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+         PE = MBB->pred_end(); PI != PE; ++PI) {
+      SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+      assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor");
+      WorkList.push_back(std::make_pair(Stop, VNI));
+    }
+  }
+
+  // Handle dead values.
+  for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+       I != E; ++I) {
+    VNInfo *VNI = *I;
+    if (VNI->isUnused())
+      continue;
+    LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def);
+    assert(LII != NewLI.end() && "Missing live range for PHI");
+    if (LII->end != VNI->def.getNextSlot())
+      continue;
+    if (VNI->isPHIDef()) {
+      // This is a dead PHI. Remove it.
+      VNI->setIsUnused(true);
+      NewLI.removeRange(*LII);
+    } else {
+      // This is a dead def. Make sure the instruction knows.
+      MachineInstr *MI = getInstructionFromIndex(VNI->def);
+      assert(MI && "No instruction defining live value");
+      MI->addRegisterDead(li->reg, tri_);
+      if (dead && MI->allDefsAreDead()) {
+        DEBUG(dbgs() << "All defs dead: " << *MI);
+        dead->push_back(MI);
+      }
+    }
+  }
+
+  // Move the trimmed ranges back.
+  li->ranges.swap(NewLI.ranges);
+  DEBUG(dbgs() << "Shrink: " << *li << '\n');
+}
+
+
+//===----------------------------------------------------------------------===//
+// Register allocator hooks.
+//
+
+MachineBasicBlock::iterator
+LiveIntervals::getLastSplitPoint(const LiveInterval &li,
+                                 MachineBasicBlock *mbb) const {
+  const MachineBasicBlock *lpad = mbb->getLandingPadSuccessor();
+
+  // If li is not live into a landing pad, we can insert spill code before the
+  // first terminator.
+  if (!lpad || !isLiveInToMBB(li, lpad))
+    return mbb->getFirstTerminator();
+
+  // When there is a landing pad, spill code must go before the call instruction
+  // that can throw.
+  MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin();
+  while (I != B) {
+    --I;
+    if (I->getDesc().isCall())
+      return I;
+  }
+  // The block contains no calls that can throw, so use the first terminator.
+  return mbb->getFirstTerminator();
+}
+
+void LiveIntervals::addKillFlags() {
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    unsigned Reg = I->first;
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+    if (mri_->reg_nodbg_empty(Reg))
+      continue;
+    LiveInterval *LI = I->second;
+
+    // Every instruction that kills Reg corresponds to a live range end point.
+    for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
+         ++RI) {
+      // A LOAD index indicates an MBB edge.
+      if (RI->end.isLoad())
+        continue;
+      MachineInstr *MI = getInstructionFromIndex(RI->end);
+      if (!MI)
+        continue;
+      MI->addRegisterKilled(Reg, NULL);
+    }
+  }
+}
+
+/// getReMatImplicitUse - If the remat definition MI has one (for now, we only
+/// allow one) virtual register operand, then its uses are implicitly using
+/// the register. Returns the virtual register.
+unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
+                                            MachineInstr *MI) const {
+  unsigned RegOp = 0;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isUse())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0 || Reg == li.reg)
+      continue;
+
+    if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+        !allocatableRegs_[Reg])
+      continue;
+    // FIXME: For now, only remat MI with at most one register operand.
+    assert(!RegOp &&
+           "Can't rematerialize instruction with multiple register operand!");
+    RegOp = MO.getReg();
+#ifndef NDEBUG
+    break;
+#endif
+  }
+  return RegOp;
+}
+
+/// isValNoAvailableAt - Return true if the val# of the specified interval
+/// which reaches the given instruction also reaches the specified use index.
+bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
+                                       SlotIndex UseIdx) const {
+  VNInfo *UValNo = li.getVNInfoAt(UseIdx);
+  return UValNo && UValNo == li.getVNInfoAt(getInstructionIndex(MI));
+}
+
+/// isReMaterializable - Returns true if the definition MI of the specified
+/// val# of the specified interval is re-materializable.
+bool
+LiveIntervals::isReMaterializable(const LiveInterval &li,
+                                  const VNInfo *ValNo, MachineInstr *MI,
+                                  const SmallVectorImpl<LiveInterval*> &SpillIs,
+                                  bool &isLoad) {
+  if (DisableReMat)
+    return false;
+
+  if (!tii_->isTriviallyReMaterializable(MI, aa_))
+    return false;
+
+  // Target-specific code can mark an instruction as being rematerializable
+  // if it has one virtual reg use, though it had better be something like
+  // a PIC base register which is likely to be live everywhere.
+  unsigned ImpUse = getReMatImplicitUse(li, MI);
+  if (ImpUse) {
+    const LiveInterval &ImpLi = getInterval(ImpUse);
+    for (MachineRegisterInfo::use_nodbg_iterator
+           ri = mri_->use_nodbg_begin(li.reg), re = mri_->use_nodbg_end();
+         ri != re; ++ri) {
+      MachineInstr *UseMI = &*ri;
+      SlotIndex UseIdx = getInstructionIndex(UseMI);
+      if (li.getVNInfoAt(UseIdx) != ValNo)
+        continue;
+      if (!isValNoAvailableAt(ImpLi, MI, UseIdx))
+        return false;
+    }
+
+    // If a register operand of the re-materialized instruction is going to
+    // be spilled next, then it's not legal to re-materialize this instruction.
+    for (unsigned i = 0, e = SpillIs.size(); i != e; ++i)
+      if (ImpUse == SpillIs[i]->reg)
+        return false;
+  }
+  return true;
+}
+
+/// isReMaterializable - Returns true if the definition MI of the specified
+/// val# of the specified interval is re-materializable.
+bool LiveIntervals::isReMaterializable(const LiveInterval &li,
+                                       const VNInfo *ValNo, MachineInstr *MI) {
+  SmallVector<LiveInterval*, 4> Dummy1;
+  bool Dummy2;
+  return isReMaterializable(li, ValNo, MI, Dummy1, Dummy2);
+}
+
+/// isReMaterializable - Returns true if every definition of MI of every
+/// val# of the specified interval is re-materializable.
+bool
+LiveIntervals::isReMaterializable(const LiveInterval &li,
+                                  const SmallVectorImpl<LiveInterval*> &SpillIs,
+                                  bool &isLoad) {
+  isLoad = false;
+  for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
+       i != e; ++i) {
+    const VNInfo *VNI = *i;
+    if (VNI->isUnused())
+      continue; // Dead val#.
+    // Is the def for the val# rematerializable?
+    MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
+    if (!ReMatDefMI)
+      return false;
+    bool DefIsLoad = false;
+    if (!ReMatDefMI ||
+        !isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad))
+      return false;
+    isLoad |= DefIsLoad;
+  }
+  return true;
+}
+
+/// FilterFoldedOps - Filter out two-address use operands. Return
+/// true if it finds any issue with the operands that ought to prevent
+/// folding.
+static bool FilterFoldedOps(MachineInstr *MI,
+                            SmallVector<unsigned, 2> &Ops,
+                            unsigned &MRInfo,
+                            SmallVector<unsigned, 2> &FoldOps) {
+  MRInfo = 0;
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    unsigned OpIdx = Ops[i];
+    MachineOperand &MO = MI->getOperand(OpIdx);
+    // FIXME: fold subreg use.
+    if (MO.getSubReg())
+      return true;
+    if (MO.isDef())
+      MRInfo |= (unsigned)VirtRegMap::isMod;
+    else {
+      // Filter out two-address use operand(s).
+      if (MI->isRegTiedToDefOperand(OpIdx)) {
+        MRInfo = VirtRegMap::isModRef;
+        continue;
+      }
+      MRInfo |= (unsigned)VirtRegMap::isRef;
+    }
+    FoldOps.push_back(OpIdx);
+  }
+  return false;
+}
+
+
+/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from
+/// slot / to reg or any rematerialized load into ith operand of specified
+/// MI. If it is successul, MI is updated with the newly created MI and
+/// returns true.
+bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
+                                         VirtRegMap &vrm, MachineInstr *DefMI,
+                                         SlotIndex InstrIdx,
+                                         SmallVector<unsigned, 2> &Ops,
+                                         bool isSS, int Slot, unsigned Reg) {
+  // If it is an implicit def instruction, just delete it.
+  if (MI->isImplicitDef()) {
+    RemoveMachineInstrFromMaps(MI);
+    vrm.RemoveMachineInstrFromMaps(MI);
+    MI->eraseFromParent();
+    ++numFolds;
+    return true;
+  }
+
+  // Filter the list of operand indexes that are to be folded. Abort if
+  // any operand will prevent folding.
+  unsigned MRInfo = 0;
+  SmallVector<unsigned, 2> FoldOps;
+  if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
+    return false;
+
+  // The only time it's safe to fold into a two address instruction is when
+  // it's folding reload and spill from / into a spill stack slot.
+  if (DefMI && (MRInfo & VirtRegMap::isMod))
+    return false;
+
+  MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot)
+                           : tii_->foldMemoryOperand(MI, FoldOps, DefMI);
+  if (fmi) {
+    // Remember this instruction uses the spill slot.
+    if (isSS) vrm.addSpillSlotUse(Slot, fmi);
+
+    // Attempt to fold the memory reference into the instruction. If
+    // we can do this, we don't need to insert spill code.
+    if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot))
+      vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo);
+    vrm.transferSpillPts(MI, fmi);
+    vrm.transferRestorePts(MI, fmi);
+    vrm.transferEmergencySpills(MI, fmi);
+    ReplaceMachineInstrInMaps(MI, fmi);
+    MI->eraseFromParent();
+    MI = fmi;
+    ++numFolds;
+    return true;
+  }
+  return false;
+}
+
+/// canFoldMemoryOperand - Returns true if the specified load / store
+/// folding is possible.
+bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI,
+                                         SmallVector<unsigned, 2> &Ops,
+                                         bool ReMat) const {
+  // Filter the list of operand indexes that are to be folded. Abort if
+  // any operand will prevent folding.
+  unsigned MRInfo = 0;
+  SmallVector<unsigned, 2> FoldOps;
+  if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps))
+    return false;
+
+  // It's only legal to remat for a use, not a def.
+  if (ReMat && (MRInfo & VirtRegMap::isMod))
+    return false;
+
+  return tii_->canFoldMemoryOperand(MI, FoldOps);
+}
+
+bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const {
+  LiveInterval::Ranges::const_iterator itr = li.ranges.begin();
+
+  MachineBasicBlock *mbb =  indexes_->getMBBCoveringRange(itr->start, itr->end);
+
+  if (mbb == 0)
+    return false;
+
+  for (++itr; itr != li.ranges.end(); ++itr) {
+    MachineBasicBlock *mbb2 =
+      indexes_->getMBBCoveringRange(itr->start, itr->end);
+
+    if (mbb2 != mbb)
+      return false;
+  }
+
+  return true;
+}
+
+/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of
+/// interval on to-be re-materialized operands of MI) with new register.
+void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
+                                       MachineInstr *MI, unsigned NewVReg,
+                                       VirtRegMap &vrm) {
+  // There is an implicit use. That means one of the other operand is
+  // being remat'ed and the remat'ed instruction has li.reg as an
+  // use operand. Make sure we rewrite that as well.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (!vrm.isReMaterialized(Reg))
+      continue;
+    MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg);
+    MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg);
+    if (UseMO)
+      UseMO->setReg(NewVReg);
+  }
+}
+
+/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions
+/// for addIntervalsForSpills to rewrite uses / defs for the given live range.
+bool LiveIntervals::
+rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
+                 bool TrySplit, SlotIndex index, SlotIndex end,
+                 MachineInstr *MI,
+                 MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
+                 unsigned Slot, int LdSlot,
+                 bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
+                 VirtRegMap &vrm,
+                 const TargetRegisterClass* rc,
+                 SmallVector<int, 4> &ReMatIds,
+                 const MachineLoopInfo *loopInfo,
+                 unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse,
+                 DenseMap<unsigned,unsigned> &MBBVRegsMap,
+                 std::vector<LiveInterval*> &NewLIs) {
+  bool CanFold = false;
+ RestartInstruction:
+  for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+    MachineOperand& mop = MI->getOperand(i);
+    if (!mop.isReg())
+      continue;
+    unsigned Reg = mop.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (Reg != li.reg)
+      continue;
+
+    bool TryFold = !DefIsReMat;
+    bool FoldSS = true; // Default behavior unless it's a remat.
+    int FoldSlot = Slot;
+    if (DefIsReMat) {
+      // If this is the rematerializable definition MI itself and
+      // all of its uses are rematerialized, simply delete it.
+      if (MI == ReMatOrigDefMI && CanDelete) {
+        DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: "
+                     << *MI << '\n');
+        RemoveMachineInstrFromMaps(MI);
+        vrm.RemoveMachineInstrFromMaps(MI);
+        MI->eraseFromParent();
+        break;
+      }
+
+      // If def for this use can't be rematerialized, then try folding.
+      // If def is rematerializable and it's a load, also try folding.
+      TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad));
+      if (isLoad) {
+        // Try fold loads (from stack slot, constant pool, etc.) into uses.
+        FoldSS = isLoadSS;
+        FoldSlot = LdSlot;
+      }
+    }
+
+    // Scan all of the operands of this instruction rewriting operands
+    // to use NewVReg instead of li.reg as appropriate.  We do this for
+    // two reasons:
+    //
+    //   1. If the instr reads the same spilled vreg multiple times, we
+    //      want to reuse the NewVReg.
+    //   2. If the instr is a two-addr instruction, we are required to
+    //      keep the src/dst regs pinned.
+    //
+    // Keep track of whether we replace a use and/or def so that we can
+    // create the spill interval with the appropriate range.
+    SmallVector<unsigned, 2> Ops;
+    tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops);
+
+    // Create a new virtual register for the spill interval.
+    // Create the new register now so we can map the fold instruction
+    // to the new register so when it is unfolded we get the correct
+    // answer.
+    bool CreatedNewVReg = false;
+    if (NewVReg == 0) {
+      NewVReg = mri_->createVirtualRegister(rc);
+      vrm.grow();
+      CreatedNewVReg = true;
+
+      // The new virtual register should get the same allocation hints as the
+      // old one.
+      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(Reg);
+      if (Hint.first || Hint.second)
+        mri_->setRegAllocationHint(NewVReg, Hint.first, Hint.second);
+    }
+
+    if (!TryFold)
+      CanFold = false;
+    else {
+      // Do not fold load / store here if we are splitting. We'll find an
+      // optimal point to insert a load / store later.
+      if (!TrySplit) {
+        if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
+                                 Ops, FoldSS, FoldSlot, NewVReg)) {
+          // Folding the load/store can completely change the instruction in
+          // unpredictable ways, rescan it from the beginning.
+
+          if (FoldSS) {
+            // We need to give the new vreg the same stack slot as the
+            // spilled interval.
+            vrm.assignVirt2StackSlot(NewVReg, FoldSlot);
+          }
+
+          HasUse = false;
+          HasDef = false;
+          CanFold = false;
+          if (isNotInMIMap(MI))
+            break;
+          goto RestartInstruction;
+        }
+      } else {
+        // We'll try to fold it later if it's profitable.
+        CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat);
+      }
+    }
+
+    mop.setReg(NewVReg);
+    if (mop.isImplicit())
+      rewriteImplicitOps(li, MI, NewVReg, vrm);
+
+    // Reuse NewVReg for other reads.
+    bool HasEarlyClobber = false;
+    for (unsigned j = 0, e = Ops.size(); j != e; ++j) {
+      MachineOperand &mopj = MI->getOperand(Ops[j]);
+      mopj.setReg(NewVReg);
+      if (mopj.isImplicit())
+        rewriteImplicitOps(li, MI, NewVReg, vrm);
+      if (mopj.isEarlyClobber())
+        HasEarlyClobber = true;
+    }
+
+    if (CreatedNewVReg) {
+      if (DefIsReMat) {
+        vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI);
+        if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) {
+          // Each valnum may have its own remat id.
+          ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg);
+        } else {
+          vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]);
+        }
+        if (!CanDelete || (HasUse && HasDef)) {
+          // If this is a two-addr instruction then its use operands are
+          // rematerializable but its def is not. It should be assigned a
+          // stack slot.
+          vrm.assignVirt2StackSlot(NewVReg, Slot);
+        }
+      } else {
+        vrm.assignVirt2StackSlot(NewVReg, Slot);
+      }
+    } else if (HasUse && HasDef &&
+               vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) {
+      // If this interval hasn't been assigned a stack slot (because earlier
+      // def is a deleted remat def), do it now.
+      assert(Slot != VirtRegMap::NO_STACK_SLOT);
+      vrm.assignVirt2StackSlot(NewVReg, Slot);
+    }
+
+    // Re-matting an instruction with virtual register use. Add the
+    // register as an implicit use on the use MI.
+    if (DefIsReMat && ImpUse)
+      MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
+
+    // Create a new register interval for this spill / remat.
+    LiveInterval &nI = getOrCreateInterval(NewVReg);
+    if (CreatedNewVReg) {
+      NewLIs.push_back(&nI);
+      MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg));
+      if (TrySplit)
+        vrm.setIsSplitFromReg(NewVReg, li.reg);
+    }
+
+    if (HasUse) {
+      if (CreatedNewVReg) {
+        LiveRange LR(index.getLoadIndex(), index.getDefIndex(),
+                     nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
+        DEBUG(dbgs() << " +" << LR);
+        nI.addRange(LR);
+      } else {
+        // Extend the split live interval to this def / use.
+        SlotIndex End = index.getDefIndex();
+        LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End,
+                     nI.getValNumInfo(nI.getNumValNums()-1));
+        DEBUG(dbgs() << " +" << LR);
+        nI.addRange(LR);
+      }
+    }
+    if (HasDef) {
+      // An early clobber starts at the use slot, except for an early clobber
+      // tied to a use operand (yes, that is a thing).
+      LiveRange LR(HasEarlyClobber && !HasUse ?
+                   index.getUseIndex() : index.getDefIndex(),
+                   index.getStoreIndex(),
+                   nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
+      DEBUG(dbgs() << " +" << LR);
+      nI.addRange(LR);
+    }
+
+    DEBUG({
+        dbgs() << "\t\t\t\tAdded new interval: ";
+        nI.print(dbgs(), tri_);
+        dbgs() << '\n';
+      });
+  }
+  return CanFold;
+}
+bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
+                                   const VNInfo *VNI,
+                                   MachineBasicBlock *MBB,
+                                   SlotIndex Idx) const {
+  return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB));
+}
+
+/// RewriteInfo - Keep track of machine instrs that will be rewritten
+/// during spilling.
+namespace {
+  struct RewriteInfo {
+    SlotIndex Index;
+    MachineInstr *MI;
+    RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {}
+  };
+
+  struct RewriteInfoCompare {
+    bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const {
+      return LHS.Index < RHS.Index;
+    }
+  };
+}
+
+void LiveIntervals::
+rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
+                    LiveInterval::Ranges::const_iterator &I,
+                    MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI,
+                    unsigned Slot, int LdSlot,
+                    bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete,
+                    VirtRegMap &vrm,
+                    const TargetRegisterClass* rc,
+                    SmallVector<int, 4> &ReMatIds,
+                    const MachineLoopInfo *loopInfo,
+                    BitVector &SpillMBBs,
+                    DenseMap<unsigned, std::vector<SRInfo> > &SpillIdxes,
+                    BitVector &RestoreMBBs,
+                    DenseMap<unsigned, std::vector<SRInfo> > &RestoreIdxes,
+                    DenseMap<unsigned,unsigned> &MBBVRegsMap,
+                    std::vector<LiveInterval*> &NewLIs) {
+  bool AllCanFold = true;
+  unsigned NewVReg = 0;
+  SlotIndex start = I->start.getBaseIndex();
+  SlotIndex end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
+
+  // First collect all the def / use in this live range that will be rewritten.
+  // Make sure they are sorted according to instruction index.
+  std::vector<RewriteInfo> RewriteMIs;
+  for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
+         re = mri_->reg_end(); ri != re; ) {
+    MachineInstr *MI = &*ri;
+    MachineOperand &O = ri.getOperand();
+    ++ri;
+    if (MI->isDebugValue()) {
+      // Modify DBG_VALUE now that the value is in a spill slot.
+      if (Slot != VirtRegMap::MAX_STACK_SLOT || isLoadSS) {
+        uint64_t Offset = MI->getOperand(1).getImm();
+        const MDNode *MDPtr = MI->getOperand(2).getMetadata();
+        DebugLoc DL = MI->getDebugLoc();
+        int FI = isLoadSS ? LdSlot : (int)Slot;
+        if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, FI,
+                                                           Offset, MDPtr, DL)) {
+          DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+          ReplaceMachineInstrInMaps(MI, NewDV);
+          MachineBasicBlock *MBB = MI->getParent();
+          MBB->insert(MBB->erase(MI), NewDV);
+          continue;
+        }
+      }
+
+      DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+      RemoveMachineInstrFromMaps(MI);
+      vrm.RemoveMachineInstrFromMaps(MI);
+      MI->eraseFromParent();
+      continue;
+    }
+    assert(!(O.isImplicit() && O.isUse()) &&
+           "Spilling register that's used as implicit use?");
+    SlotIndex index = getInstructionIndex(MI);
+    if (index < start || index >= end)
+      continue;
+
+    if (O.isUndef())
+      // Must be defined by an implicit def. It should not be spilled. Note,
+      // this is for correctness reason. e.g.
+      // 8   %reg1024<def> = IMPLICIT_DEF
+      // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+      // The live range [12, 14) are not part of the r1024 live interval since
+      // it's defined by an implicit def. It will not conflicts with live
+      // interval of r1025. Now suppose both registers are spilled, you can
+      // easily see a situation where both registers are reloaded before
+      // the INSERT_SUBREG and both target registers that would overlap.
+      continue;
+    RewriteMIs.push_back(RewriteInfo(index, MI));
+  }
+  std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare());
+
+  unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0;
+  // Now rewrite the defs and uses.
+  for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) {
+    RewriteInfo &rwi = RewriteMIs[i];
+    ++i;
+    SlotIndex index = rwi.Index;
+    MachineInstr *MI = rwi.MI;
+    // If MI def and/or use the same register multiple times, then there
+    // are multiple entries.
+    while (i != e && RewriteMIs[i].MI == MI) {
+      assert(RewriteMIs[i].Index == index);
+      ++i;
+    }
+    MachineBasicBlock *MBB = MI->getParent();
+
+    if (ImpUse && MI != ReMatDefMI) {
+      // Re-matting an instruction with virtual register use. Prevent interval
+      // from being spilled.
+      getInterval(ImpUse).markNotSpillable();
+    }
+
+    unsigned MBBId = MBB->getNumber();
+    unsigned ThisVReg = 0;
+    if (TrySplit) {
+      DenseMap<unsigned,unsigned>::iterator NVI = MBBVRegsMap.find(MBBId);
+      if (NVI != MBBVRegsMap.end()) {
+        ThisVReg = NVI->second;
+        // One common case:
+        // x = use
+        // ...
+        // ...
+        // def = ...
+        //     = use
+        // It's better to start a new interval to avoid artifically
+        // extend the new interval.
+        if (MI->readsWritesVirtualRegister(li.reg) ==
+            std::make_pair(false,true)) {
+          MBBVRegsMap.erase(MBB->getNumber());
+          ThisVReg = 0;
+        }
+      }
+    }
+
+    bool IsNew = ThisVReg == 0;
+    if (IsNew) {
+      // This ends the previous live interval. If all of its def / use
+      // can be folded, give it a low spill weight.
+      if (NewVReg && TrySplit && AllCanFold) {
+        LiveInterval &nI = getOrCreateInterval(NewVReg);
+        nI.weight /= 10.0F;
+      }
+      AllCanFold = true;
+    }
+    NewVReg = ThisVReg;
+
+    bool HasDef = false;
+    bool HasUse = false;
+    bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit,
+                         index, end, MI, ReMatOrigDefMI, ReMatDefMI,
+                         Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+                         CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg,
+                         ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs);
+    if (!HasDef && !HasUse)
+      continue;
+
+    AllCanFold &= CanFold;
+
+    // Update weight of spill interval.
+    LiveInterval &nI = getOrCreateInterval(NewVReg);
+    if (!TrySplit) {
+      // The spill weight is now infinity as it cannot be spilled again.
+      nI.markNotSpillable();
+      continue;
+    }
+
+    // Keep track of the last def and first use in each MBB.
+    if (HasDef) {
+      if (MI != ReMatOrigDefMI || !CanDelete) {
+        bool HasKill = false;
+        if (!HasUse)
+          HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, index.getDefIndex());
+        else {
+          // If this is a two-address code, then this index starts a new VNInfo.
+          const VNInfo *VNI = li.findDefinedVNInfoForRegInt(index.getDefIndex());
+          if (VNI)
+            HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, index.getDefIndex());
+        }
+        DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
+          SpillIdxes.find(MBBId);
+        if (!HasKill) {
+          if (SII == SpillIdxes.end()) {
+            std::vector<SRInfo> S;
+            S.push_back(SRInfo(index, NewVReg, true));
+            SpillIdxes.insert(std::make_pair(MBBId, S));
+          } else if (SII->second.back().vreg != NewVReg) {
+            SII->second.push_back(SRInfo(index, NewVReg, true));
+          } else if (index > SII->second.back().index) {
+            // If there is an earlier def and this is a two-address
+            // instruction, then it's not possible to fold the store (which
+            // would also fold the load).
+            SRInfo &Info = SII->second.back();
+            Info.index = index;
+            Info.canFold = !HasUse;
+          }
+          SpillMBBs.set(MBBId);
+        } else if (SII != SpillIdxes.end() &&
+                   SII->second.back().vreg == NewVReg &&
+                   index > SII->second.back().index) {
+          // There is an earlier def that's not killed (must be two-address).
+          // The spill is no longer needed.
+          SII->second.pop_back();
+          if (SII->second.empty()) {
+            SpillIdxes.erase(MBBId);
+            SpillMBBs.reset(MBBId);
+          }
+        }
+      }
+    }
+
+    if (HasUse) {
+      DenseMap<unsigned, std::vector<SRInfo> >::iterator SII =
+        SpillIdxes.find(MBBId);
+      if (SII != SpillIdxes.end() &&
+          SII->second.back().vreg == NewVReg &&
+          index > SII->second.back().index)
+        // Use(s) following the last def, it's not safe to fold the spill.
+        SII->second.back().canFold = false;
+      DenseMap<unsigned, std::vector<SRInfo> >::iterator RII =
+        RestoreIdxes.find(MBBId);
+      if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg)
+        // If we are splitting live intervals, only fold if it's the first
+        // use and there isn't another use later in the MBB.
+        RII->second.back().canFold = false;
+      else if (IsNew) {
+        // Only need a reload if there isn't an earlier def / use.
+        if (RII == RestoreIdxes.end()) {
+          std::vector<SRInfo> Infos;
+          Infos.push_back(SRInfo(index, NewVReg, true));
+          RestoreIdxes.insert(std::make_pair(MBBId, Infos));
+        } else {
+          RII->second.push_back(SRInfo(index, NewVReg, true));
+        }
+        RestoreMBBs.set(MBBId);
+      }
+    }
+
+    // Update spill weight.
+    unsigned loopDepth = loopInfo->getLoopDepth(MBB);
+    nI.weight += getSpillWeight(HasDef, HasUse, loopDepth);
+  }
+
+  if (NewVReg && TrySplit && AllCanFold) {
+    // If all of its def / use can be folded, give it a low spill weight.
+    LiveInterval &nI = getOrCreateInterval(NewVReg);
+    nI.weight /= 10.0F;
+  }
+}
+
+bool LiveIntervals::alsoFoldARestore(int Id, SlotIndex index,
+                        unsigned vr, BitVector &RestoreMBBs,
+                        DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
+  if (!RestoreMBBs[Id])
+    return false;
+  std::vector<SRInfo> &Restores = RestoreIdxes[Id];
+  for (unsigned i = 0, e = Restores.size(); i != e; ++i)
+    if (Restores[i].index == index &&
+        Restores[i].vreg == vr &&
+        Restores[i].canFold)
+      return true;
+  return false;
+}
+
+void LiveIntervals::eraseRestoreInfo(int Id, SlotIndex index,
+                        unsigned vr, BitVector &RestoreMBBs,
+                        DenseMap<unsigned,std::vector<SRInfo> > &RestoreIdxes) {
+  if (!RestoreMBBs[Id])
+    return;
+  std::vector<SRInfo> &Restores = RestoreIdxes[Id];
+  for (unsigned i = 0, e = Restores.size(); i != e; ++i)
+    if (Restores[i].index == index && Restores[i].vreg)
+      Restores[i].index = SlotIndex();
+}
+
+/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being
+/// spilled and create empty intervals for their uses.
+void
+LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
+                                    const TargetRegisterClass* rc,
+                                    std::vector<LiveInterval*> &NewLIs) {
+  for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg),
+         re = mri_->reg_end(); ri != re; ) {
+    MachineOperand &O = ri.getOperand();
+    MachineInstr *MI = &*ri;
+    ++ri;
+    if (MI->isDebugValue()) {
+      // Remove debug info for now.
+      O.setReg(0U);
+      DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+      continue;
+    }
+    if (O.isDef()) {
+      assert(MI->isImplicitDef() &&
+             "Register def was not rewritten?");
+      RemoveMachineInstrFromMaps(MI);
+      vrm.RemoveMachineInstrFromMaps(MI);
+      MI->eraseFromParent();
+    } else {
+      // This must be an use of an implicit_def so it's not part of the live
+      // interval. Create a new empty live interval for it.
+      // FIXME: Can we simply erase some of the instructions? e.g. Stores?
+      unsigned NewVReg = mri_->createVirtualRegister(rc);
+      vrm.grow();
+      vrm.setIsImplicitlyDefined(NewVReg);
+      NewLIs.push_back(&getOrCreateInterval(NewVReg));
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (MO.isReg() && MO.getReg() == li.reg) {
+          MO.setReg(NewVReg);
+          MO.setIsUndef();
+        }
+      }
+    }
+  }
+}
+
+float
+LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
+  // Limit the loop depth ridiculousness.
+  if (loopDepth > 200)
+    loopDepth = 200;
+
+  // The loop depth is used to roughly estimate the number of times the
+  // instruction is executed. Something like 10^d is simple, but will quickly
+  // overflow a float. This expression behaves like 10^d for small d, but is
+  // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
+  // headroom before overflow.
+  float lc = std::pow(1 + (100.0f / (loopDepth+10)), (float)loopDepth);
+
+  return (isDef + isUse) * lc;
+}
+
+static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
+  for (unsigned i = 0, e = NewLIs.size(); i != e; ++i)
+    NewLIs[i]->weight =
+      normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize());
+}
+
+std::vector<LiveInterval*> LiveIntervals::
+addIntervalsForSpills(const LiveInterval &li,
+                      const SmallVectorImpl<LiveInterval*> &SpillIs,
+                      const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
+  assert(li.isSpillable() && "attempt to spill already spilled interval!");
+
+  DEBUG({
+      dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
+      li.print(dbgs(), tri_);
+      dbgs() << '\n';
+    });
+
+  // Each bit specify whether a spill is required in the MBB.
+  BitVector SpillMBBs(mf_->getNumBlockIDs());
+  DenseMap<unsigned, std::vector<SRInfo> > SpillIdxes;
+  BitVector RestoreMBBs(mf_->getNumBlockIDs());
+  DenseMap<unsigned, std::vector<SRInfo> > RestoreIdxes;
+  DenseMap<unsigned,unsigned> MBBVRegsMap;
+  std::vector<LiveInterval*> NewLIs;
+  const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
+
+  unsigned NumValNums = li.getNumValNums();
+  SmallVector<MachineInstr*, 4> ReMatDefs;
+  ReMatDefs.resize(NumValNums, NULL);
+  SmallVector<MachineInstr*, 4> ReMatOrigDefs;
+  ReMatOrigDefs.resize(NumValNums, NULL);
+  SmallVector<int, 4> ReMatIds;
+  ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT);
+  BitVector ReMatDelete(NumValNums);
+  unsigned Slot = VirtRegMap::MAX_STACK_SLOT;
+
+  // Spilling a split live interval. It cannot be split any further. Also,
+  // it's also guaranteed to be a single val# / range interval.
+  if (vrm.getPreSplitReg(li.reg)) {
+    vrm.setIsSplitFromReg(li.reg, 0);
+    // Unset the split kill marker on the last use.
+    SlotIndex KillIdx = vrm.getKillPoint(li.reg);
+    if (KillIdx != SlotIndex()) {
+      MachineInstr *KillMI = getInstructionFromIndex(KillIdx);
+      assert(KillMI && "Last use disappeared?");
+      int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true);
+      assert(KillOp != -1 && "Last use disappeared?");
+      KillMI->getOperand(KillOp).setIsKill(false);
+    }
+    vrm.removeKillPoint(li.reg);
+    bool DefIsReMat = vrm.isReMaterialized(li.reg);
+    Slot = vrm.getStackSlot(li.reg);
+    assert(Slot != VirtRegMap::MAX_STACK_SLOT);
+    MachineInstr *ReMatDefMI = DefIsReMat ?
+      vrm.getReMaterializedMI(li.reg) : NULL;
+    int LdSlot = 0;
+    bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+    bool isLoad = isLoadSS ||
+      (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad()));
+    bool IsFirstRange = true;
+    for (LiveInterval::Ranges::const_iterator
+           I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+      // If this is a split live interval with multiple ranges, it means there
+      // are two-address instructions that re-defined the value. Only the
+      // first def can be rematerialized!
+      if (IsFirstRange) {
+        // Note ReMatOrigDefMI has already been deleted.
+        rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI,
+                             Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+                             false, vrm, rc, ReMatIds, loopInfo,
+                             SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+                             MBBVRegsMap, NewLIs);
+      } else {
+        rewriteInstructionsForSpills(li, false, I, NULL, 0,
+                             Slot, 0, false, false, false,
+                             false, vrm, rc, ReMatIds, loopInfo,
+                             SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+                             MBBVRegsMap, NewLIs);
+      }
+      IsFirstRange = false;
+    }
+
+    handleSpilledImpDefs(li, vrm, rc, NewLIs);
+    normalizeSpillWeights(NewLIs);
+    return NewLIs;
+  }
+
+  bool TrySplit = !intervalIsInOneMBB(li);
+  if (TrySplit)
+    ++numSplits;
+  bool NeedStackSlot = false;
+  for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
+       i != e; ++i) {
+    const VNInfo *VNI = *i;
+    unsigned VN = VNI->id;
+    if (VNI->isUnused())
+      continue; // Dead val#.
+    // Is the def for the val# rematerializable?
+    MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
+    bool dummy;
+    if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
+      // Remember how to remat the def of this val#.
+      ReMatOrigDefs[VN] = ReMatDefMI;
+      // Original def may be modified so we have to make a copy here.
+      MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI);
+      CloneMIs.push_back(Clone);
+      ReMatDefs[VN] = Clone;
+
+      bool CanDelete = true;
+      if (VNI->hasPHIKill()) {
+        // A kill is a phi node, not all of its uses can be rematerialized.
+        // It must not be deleted.
+        CanDelete = false;
+        // Need a stack slot if there is any live range where uses cannot be
+        // rematerialized.
+        NeedStackSlot = true;
+      }
+      if (CanDelete)
+        ReMatDelete.set(VN);
+    } else {
+      // Need a stack slot if there is any live range where uses cannot be
+      // rematerialized.
+      NeedStackSlot = true;
+    }
+  }
+
+  // One stack slot per live interval.
+  if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) {
+    if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT)
+      Slot = vrm.assignVirt2StackSlot(li.reg);
+
+    // This case only occurs when the prealloc splitter has already assigned
+    // a stack slot to this vreg.
+    else
+      Slot = vrm.getStackSlot(li.reg);
+  }
+
+  // Create new intervals and rewrite defs and uses.
+  for (LiveInterval::Ranges::const_iterator
+         I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
+    MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id];
+    MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id];
+    bool DefIsReMat = ReMatDefMI != NULL;
+    bool CanDelete = ReMatDelete[I->valno->id];
+    int LdSlot = 0;
+    bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+    bool isLoad = isLoadSS ||
+      (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad());
+    rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI,
+                               Slot, LdSlot, isLoad, isLoadSS, DefIsReMat,
+                               CanDelete, vrm, rc, ReMatIds, loopInfo,
+                               SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes,
+                               MBBVRegsMap, NewLIs);
+  }
+
+  // Insert spills / restores if we are splitting.
+  if (!TrySplit) {
+    handleSpilledImpDefs(li, vrm, rc, NewLIs);
+    normalizeSpillWeights(NewLIs);
+    return NewLIs;
+  }
+
+  SmallPtrSet<LiveInterval*, 4> AddedKill;
+  SmallVector<unsigned, 2> Ops;
+  if (NeedStackSlot) {
+    int Id = SpillMBBs.find_first();
+    while (Id != -1) {
+      std::vector<SRInfo> &spills = SpillIdxes[Id];
+      for (unsigned i = 0, e = spills.size(); i != e; ++i) {
+        SlotIndex index = spills[i].index;
+        unsigned VReg = spills[i].vreg;
+        LiveInterval &nI = getOrCreateInterval(VReg);
+        bool isReMat = vrm.isReMaterialized(VReg);
+        MachineInstr *MI = getInstructionFromIndex(index);
+        bool CanFold = false;
+        bool FoundUse = false;
+        Ops.clear();
+        if (spills[i].canFold) {
+          CanFold = true;
+          for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+            MachineOperand &MO = MI->getOperand(j);
+            if (!MO.isReg() || MO.getReg() != VReg)
+              continue;
+
+            Ops.push_back(j);
+            if (MO.isDef())
+              continue;
+            if (isReMat ||
+                (!FoundUse && !alsoFoldARestore(Id, index, VReg,
+                                                RestoreMBBs, RestoreIdxes))) {
+              // MI has two-address uses of the same register. If the use
+              // isn't the first and only use in the BB, then we can't fold
+              // it. FIXME: Move this to rewriteInstructionsForSpills.
+              CanFold = false;
+              break;
+            }
+            FoundUse = true;
+          }
+        }
+        // Fold the store into the def if possible.
+        bool Folded = false;
+        if (CanFold && !Ops.empty()) {
+          if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){
+            Folded = true;
+            if (FoundUse) {
+              // Also folded uses, do not issue a load.
+              eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes);
+              nI.removeRange(index.getLoadIndex(), index.getDefIndex());
+            }
+            nI.removeRange(index.getDefIndex(), index.getStoreIndex());
+          }
+        }
+
+        // Otherwise tell the spiller to issue a spill.
+        if (!Folded) {
+          LiveRange *LR = &nI.ranges[nI.ranges.size()-1];
+          bool isKill = LR->end == index.getStoreIndex();
+          if (!MI->registerDefIsDead(nI.reg))
+            // No need to spill a dead def.
+            vrm.addSpillPoint(VReg, isKill, MI);
+          if (isKill)
+            AddedKill.insert(&nI);
+        }
+      }
+      Id = SpillMBBs.find_next(Id);
+    }
+  }
+
+  int Id = RestoreMBBs.find_first();
+  while (Id != -1) {
+    std::vector<SRInfo> &restores = RestoreIdxes[Id];
+    for (unsigned i = 0, e = restores.size(); i != e; ++i) {
+      SlotIndex index = restores[i].index;
+      if (index == SlotIndex())
+        continue;
+      unsigned VReg = restores[i].vreg;
+      LiveInterval &nI = getOrCreateInterval(VReg);
+      bool isReMat = vrm.isReMaterialized(VReg);
+      MachineInstr *MI = getInstructionFromIndex(index);
+      bool CanFold = false;
+      Ops.clear();
+      if (restores[i].canFold) {
+        CanFold = true;
+        for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+          MachineOperand &MO = MI->getOperand(j);
+          if (!MO.isReg() || MO.getReg() != VReg)
+            continue;
+
+          if (MO.isDef()) {
+            // If this restore were to be folded, it would have been folded
+            // already.
+            CanFold = false;
+            break;
+          }
+          Ops.push_back(j);
+        }
+      }
+
+      // Fold the load into the use if possible.
+      bool Folded = false;
+      if (CanFold && !Ops.empty()) {
+        if (!isReMat)
+          Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg);
+        else {
+          MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg);
+          int LdSlot = 0;
+          bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot);
+          // If the rematerializable def is a load, also try to fold it.
+          if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad())
+            Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index,
+                                          Ops, isLoadSS, LdSlot, VReg);
+          if (!Folded) {
+            unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI);
+            if (ImpUse) {
+              // Re-matting an instruction with virtual register use. Add the
+              // register as an implicit use on the use MI and mark the register
+              // interval as unspillable.
+              LiveInterval &ImpLi = getInterval(ImpUse);
+              ImpLi.markNotSpillable();
+              MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
+            }
+          }
+        }
+      }
+      // If folding is not possible / failed, then tell the spiller to issue a
+      // load / rematerialization for us.
+      if (Folded)
+        nI.removeRange(index.getLoadIndex(), index.getDefIndex());
+      else
+        vrm.addRestorePoint(VReg, MI);
+    }
+    Id = RestoreMBBs.find_next(Id);
+  }
+
+  // Finalize intervals: add kills, finalize spill weights, and filter out
+  // dead intervals.
+  std::vector<LiveInterval*> RetNewLIs;
+  for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) {
+    LiveInterval *LI = NewLIs[i];
+    if (!LI->empty()) {
+      if (!AddedKill.count(LI)) {
+        LiveRange *LR = &LI->ranges[LI->ranges.size()-1];
+        SlotIndex LastUseIdx = LR->end.getBaseIndex();
+        MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx);
+        int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false);
+        assert(UseIdx != -1);
+        if (!LastUse->isRegTiedToDefOperand(UseIdx)) {
+          LastUse->getOperand(UseIdx).setIsKill();
+          vrm.addKillPoint(LI->reg, LastUseIdx);
+        }
+      }
+      RetNewLIs.push_back(LI);
+    }
+  }
+
+  handleSpilledImpDefs(li, vrm, rc, RetNewLIs);
+  normalizeSpillWeights(RetNewLIs);
+  return RetNewLIs;
+}
+
+/// hasAllocatableSuperReg - Return true if the specified physical register has
+/// any super register that's allocatable.
+bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const {
+  for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS)
+    if (allocatableRegs_[*AS] && hasInterval(*AS))
+      return true;
+  return false;
+}
+
+/// getRepresentativeReg - Find the largest super register of the specified
+/// physical register.
+unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const {
+  // Find the largest super-register that is allocatable.
+  unsigned BestReg = Reg;
+  for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) {
+    unsigned SuperReg = *AS;
+    if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) {
+      BestReg = SuperReg;
+      break;
+    }
+  }
+  return BestReg;
+}
+
+/// getNumConflictsWithPhysReg - Return the number of uses and defs of the
+/// specified interval that conflicts with the specified physical register.
+unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li,
+                                                   unsigned PhysReg) const {
+  unsigned NumConflicts = 0;
+  const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg));
+  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
+         E = mri_->reg_end(); I != E; ++I) {
+    MachineOperand &O = I.getOperand();
+    MachineInstr *MI = O.getParent();
+    if (MI->isDebugValue())
+      continue;
+    SlotIndex Index = getInstructionIndex(MI);
+    if (pli.liveAt(Index))
+      ++NumConflicts;
+  }
+  return NumConflicts;
+}
+
+/// spillPhysRegAroundRegDefsUses - Spill the specified physical register
+/// around all defs and uses of the specified interval. Return true if it
+/// was able to cut its interval.
+bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
+                                            unsigned PhysReg, VirtRegMap &vrm) {
+  unsigned SpillReg = getRepresentativeReg(PhysReg);
+
+  DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg)
+               << " represented by " << tri_->getName(SpillReg) << '\n');
+
+  for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS)
+    // If there are registers which alias PhysReg, but which are not a
+    // sub-register of the chosen representative super register. Assert
+    // since we can't handle it yet.
+    assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) ||
+           tri_->isSuperRegister(*AS, SpillReg));
+
+  bool Cut = false;
+  SmallVector<unsigned, 4> PRegs;
+  if (hasInterval(SpillReg))
+    PRegs.push_back(SpillReg);
+  for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR)
+    if (hasInterval(*SR))
+      PRegs.push_back(*SR);
+
+  DEBUG({
+    dbgs() << "Trying to spill:";
+    for (unsigned i = 0, e = PRegs.size(); i != e; ++i)
+      dbgs() << ' ' << tri_->getName(PRegs[i]);
+    dbgs() << '\n';
+  });
+
+  SmallPtrSet<MachineInstr*, 8> SeenMIs;
+  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
+         E = mri_->reg_end(); I != E; ++I) {
+    MachineOperand &O = I.getOperand();
+    MachineInstr *MI = O.getParent();
+    if (MI->isDebugValue() || SeenMIs.count(MI))
+      continue;
+    SeenMIs.insert(MI);
+    SlotIndex Index = getInstructionIndex(MI);
+    bool LiveReg = false;
+    for (unsigned i = 0, e = PRegs.size(); i != e; ++i) {
+      unsigned PReg = PRegs[i];
+      LiveInterval &pli = getInterval(PReg);
+      if (!pli.liveAt(Index))
+        continue;
+      LiveReg = true;
+      SlotIndex StartIdx = Index.getLoadIndex();
+      SlotIndex EndIdx = Index.getNextIndex().getBaseIndex();
+      if (!pli.isInOneLiveRange(StartIdx, EndIdx)) {
+        std::string msg;
+        raw_string_ostream Msg(msg);
+        Msg << "Ran out of registers during register allocation!";
+        if (MI->isInlineAsm()) {
+          Msg << "\nPlease check your inline asm statement for invalid "
+              << "constraints:\n";
+          MI->print(Msg, tm_);
+        }
+        report_fatal_error(Msg.str());
+      }
+      pli.removeRange(StartIdx, EndIdx);
+      LiveReg = true;
+    }
+    if (!LiveReg)
+      continue;
+    DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI);
+    vrm.addEmergencySpill(SpillReg, MI);
+    Cut = true;
+  }
+  return Cut;
+}
+
+LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
+                                                  MachineInstr* startInst) {
+  LiveInterval& Interval = getOrCreateInterval(reg);
+  VNInfo* VN = Interval.getNextValue(
+    SlotIndex(getInstructionIndex(startInst).getDefIndex()),
+    startInst, getVNInfoAllocator());
+  VN->setHasPHIKill(true);
+  LiveRange LR(
+     SlotIndex(getInstructionIndex(startInst).getDefIndex()),
+     getMBBEndIdx(startInst->getParent()), VN);
+  Interval.addRange(LR);
+
+  return LR;
+}
+
diff --git a/final/lib/CodeGen/LiveIntervalUnion.cpp b/final/lib/CodeGen/LiveIntervalUnion.cpp
new file mode 100644
index 00000000000..205f28a0d65
--- /dev/null
+++ b/final/lib/CodeGen/LiveIntervalUnion.cpp
@@ -0,0 +1,315 @@
+//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion represents a coalesced set of live intervals. This may be
+// used during coalescing to represent a congruence class, or during register
+// allocation to model liveness of a physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveIntervalUnion.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+
+// Merge a LiveInterval's segments. Guarantee no overlaps.
+void LiveIntervalUnion::unify(LiveInterval &VirtReg) {
+  if (VirtReg.empty())
+    return;
+  ++Tag;
+
+  // Insert each of the virtual register's live segments into the map.
+  LiveInterval::iterator RegPos = VirtReg.begin();
+  LiveInterval::iterator RegEnd = VirtReg.end();
+  SegmentIter SegPos = Segments.find(RegPos->start);
+
+  for (;;) {
+    SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+    if (++RegPos == RegEnd)
+      return;
+    SegPos.advanceTo(RegPos->start);
+  }
+}
+
+// Remove a live virtual register's segments from this union.
+void LiveIntervalUnion::extract(LiveInterval &VirtReg) {
+  if (VirtReg.empty())
+    return;
+  ++Tag;
+
+  // Remove each of the virtual register's live segments from the map.
+  LiveInterval::iterator RegPos = VirtReg.begin();
+  LiveInterval::iterator RegEnd = VirtReg.end();
+  SegmentIter SegPos = Segments.find(RegPos->start);
+
+  for (;;) {
+    assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval");
+    SegPos.erase();
+    if (!SegPos.valid())
+      return;
+
+    // Skip all segments that may have been coalesced.
+    RegPos = VirtReg.advanceTo(RegPos, SegPos.start());
+    if (RegPos == RegEnd)
+      return;
+
+    SegPos.advanceTo(RegPos->start);
+  }
+}
+
+void
+LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+  OS << "LIU " << PrintReg(RepReg, TRI);
+  if (empty()) {
+    OS << " empty\n";
+    return;
+  }
+  for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
+    OS << " [" << SI.start() << ' ' << SI.stop() << "):"
+       << PrintReg(SI.value()->reg, TRI);
+  }
+  OS << '\n';
+}
+
+void LiveIntervalUnion::InterferenceResult::print(raw_ostream &OS,
+                                          const TargetRegisterInfo *TRI) const {
+  OS << '[' << start() << ';' << stop() << "):"
+     << PrintReg(interference()->reg, TRI);
+}
+
+void LiveIntervalUnion::Query::print(raw_ostream &OS,
+                                     const TargetRegisterInfo *TRI) {
+  OS << "Interferences with ";
+  LiveUnion->print(OS, TRI);
+  InterferenceResult IR = firstInterference();
+  while (isInterference(IR)) {
+    OS << "  ";
+    IR.print(OS, TRI);
+    OS << '\n';
+    nextInterference(IR);
+  }
+}
+
+#ifndef NDEBUG
+// Verify the live intervals in this union and add them to the visited set.
+void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
+  for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI)
+    VisitedVRegs.set(SI.value()->reg);
+}
+#endif //!NDEBUG
+
+// Private interface accessed by Query.
+//
+// Find a pair of segments that intersect, one in the live virtual register
+// (LiveInterval), and the other in this LiveIntervalUnion. The caller (Query)
+// is responsible for advancing the LiveIntervalUnion segments to find a
+// "notable" intersection, which requires query-specific logic.
+//
+// This design assumes only a fast mechanism for intersecting a single live
+// virtual register segment with a set of LiveIntervalUnion segments.  This may
+// be ok since most virtual registers have very few segments.  If we had a data
+// structure that optimizd MxN intersection of segments, then we would bypass
+// the loop that advances within the LiveInterval.
+//
+// If no intersection exists, set VirtRegI = VirtRegEnd, and set SI to the first
+// segment whose start point is greater than LiveInterval's end point.
+//
+// Assumes that segments are sorted by start position in both
+// LiveInterval and LiveSegments.
+void LiveIntervalUnion::Query::findIntersection(InterferenceResult &IR) const {
+  // Search until reaching the end of the LiveUnion segments.
+  LiveInterval::iterator VirtRegEnd = VirtReg->end();
+  if (IR.VirtRegI == VirtRegEnd)
+    return;
+  while (IR.LiveUnionI.valid()) {
+    // Slowly advance the live virtual reg iterator until we surpass the next
+    // segment in LiveUnion.
+    //
+    // Note: If this is ever used for coalescing of fixed registers and we have
+    // a live vreg with thousands of segments, then change this code to use
+    // upperBound instead.
+    IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start());
+    if (IR.VirtRegI == VirtRegEnd)
+      break; // Retain current (nonoverlapping) LiveUnionI
+
+    // VirtRegI may have advanced far beyond LiveUnionI, catch up.
+    IR.LiveUnionI.advanceTo(IR.VirtRegI->start);
+
+    // Check if no LiveUnionI exists with VirtRegI->Start < LiveUnionI.end
+    if (!IR.LiveUnionI.valid())
+      break;
+    if (IR.LiveUnionI.start() < IR.VirtRegI->end) {
+      assert(overlap(*IR.VirtRegI, IR.LiveUnionI) &&
+             "upperBound postcondition");
+      break;
+    }
+  }
+  if (!IR.LiveUnionI.valid())
+    IR.VirtRegI = VirtRegEnd;
+}
+
+// Find the first intersection, and cache interference info
+// (retain segment iterators into both VirtReg and LiveUnion).
+const LiveIntervalUnion::InterferenceResult &
+LiveIntervalUnion::Query::firstInterference() {
+  if (CheckedFirstInterference)
+    return FirstInterference;
+  CheckedFirstInterference = true;
+  InterferenceResult &IR = FirstInterference;
+
+  // Quickly skip interference check for empty sets.
+  if (VirtReg->empty() || LiveUnion->empty()) {
+    IR.VirtRegI = VirtReg->end();
+  } else if (VirtReg->beginIndex() < LiveUnion->startIndex()) {
+    // VirtReg starts first, perform double binary search.
+    IR.VirtRegI = VirtReg->find(LiveUnion->startIndex());
+    if (IR.VirtRegI != VirtReg->end())
+      IR.LiveUnionI = LiveUnion->find(IR.VirtRegI->start);
+  } else {
+    // LiveUnion starts first, perform double binary search.
+    IR.LiveUnionI = LiveUnion->find(VirtReg->beginIndex());
+    if (IR.LiveUnionI.valid())
+      IR.VirtRegI = VirtReg->find(IR.LiveUnionI.start());
+    else
+      IR.VirtRegI = VirtReg->end();
+  }
+  findIntersection(FirstInterference);
+  assert((IR.VirtRegI == VirtReg->end() || IR.LiveUnionI.valid())
+         && "Uninitialized iterator");
+  return FirstInterference;
+}
+
+// Treat the result as an iterator and advance to the next interfering pair
+// of segments. This is a plain iterator with no filter.
+bool LiveIntervalUnion::Query::nextInterference(InterferenceResult &IR) const {
+  assert(isInterference(IR) && "iteration past end of interferences");
+
+  // Advance either the VirtReg or LiveUnion segment to ensure that we visit all
+  // unique overlapping pairs.
+  if (IR.VirtRegI->end < IR.LiveUnionI.stop()) {
+    if (++IR.VirtRegI == VirtReg->end())
+      return false;
+  }
+  else {
+    if (!(++IR.LiveUnionI).valid()) {
+      IR.VirtRegI = VirtReg->end();
+      return false;
+    }
+  }
+  // Short-circuit findIntersection() if possible.
+  if (overlap(*IR.VirtRegI, IR.LiveUnionI))
+    return true;
+
+  // Find the next intersection.
+  findIntersection(IR);
+  return isInterference(IR);
+}
+
+// Scan the vector of interfering virtual registers in this union. Assume it's
+// quite small.
+bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
+  SmallVectorImpl<LiveInterval*>::const_iterator I =
+    std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg);
+  return I != InterferingVRegs.end();
+}
+
+// Count the number of virtual registers in this union that interfere with this
+// query's live virtual register.
+//
+// The number of times that we either advance IR.VirtRegI or call
+// LiveUnion.upperBound() will be no more than the number of holes in
+// VirtReg. So each invocation of collectInterferingVRegs() takes
+// time proportional to |VirtReg Holes| * time(LiveUnion.upperBound()).
+//
+// For comments on how to speed it up, see Query::findIntersection().
+unsigned LiveIntervalUnion::Query::
+collectInterferingVRegs(unsigned MaxInterferingRegs) {
+  InterferenceResult IR = firstInterference();
+  LiveInterval::iterator VirtRegEnd = VirtReg->end();
+  LiveInterval *RecentInterferingVReg = NULL;
+  if (IR.VirtRegI != VirtRegEnd) while (IR.LiveUnionI.valid()) {
+    // Advance the union's iterator to reach an unseen interfering vreg.
+    do {
+      if (IR.LiveUnionI.value() == RecentInterferingVReg)
+        continue;
+
+      if (!isSeenInterference(IR.LiveUnionI.value()))
+        break;
+
+      // Cache the most recent interfering vreg to bypass isSeenInterference.
+      RecentInterferingVReg = IR.LiveUnionI.value();
+
+    } while ((++IR.LiveUnionI).valid());
+    if (!IR.LiveUnionI.valid())
+      break;
+
+    // Advance the VirtReg iterator until surpassing the next segment in
+    // LiveUnion.
+    IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start());
+    if (IR.VirtRegI == VirtRegEnd)
+      break;
+
+    // Check for intersection with the union's segment.
+    if (overlap(*IR.VirtRegI, IR.LiveUnionI)) {
+
+      if (!IR.LiveUnionI.value()->isSpillable())
+        SeenUnspillableVReg = true;
+
+      if (InterferingVRegs.size() == MaxInterferingRegs)
+        // Leave SeenAllInterferences set to false to indicate that at least one
+        // interference exists beyond those we collected.
+        return MaxInterferingRegs;
+
+      InterferingVRegs.push_back(IR.LiveUnionI.value());
+
+      // Cache the most recent interfering vreg to bypass isSeenInterference.
+      RecentInterferingVReg = IR.LiveUnionI.value();
+      ++IR.LiveUnionI;
+      continue;
+    }
+    // VirtRegI may have advanced far beyond LiveUnionI,
+    // do a fast intersection test to "catch up"
+    IR.LiveUnionI.advanceTo(IR.VirtRegI->start);
+  }
+  SeenAllInterferences = true;
+  return InterferingVRegs.size();
+}
+
+bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) {
+  // VirtReg is likely live throughout the loop, so start by checking LIU-Loop
+  // overlaps.
+  IntervalMapOverlaps<LiveIntervalUnion::Map, MachineLoopRange::Map>
+    Overlaps(LiveUnion->getMap(), Loop->getMap());
+  if (!Overlaps.valid())
+    return false;
+
+  // The loop is overlapping an LIU assignment. Check VirtReg as well.
+  LiveInterval::iterator VRI = VirtReg->find(Overlaps.start());
+
+  for (;;) {
+    if (VRI == VirtReg->end())
+      return false;
+    if (VRI->start < Overlaps.stop())
+      return true;
+
+    Overlaps.advanceTo(VRI->start);
+    if (!Overlaps.valid())
+      return false;
+    if (Overlaps.start() < VRI->end)
+      return true;
+
+    VRI = VirtReg->advanceTo(VRI, Overlaps.start());
+  }
+}
diff --git a/final/lib/CodeGen/LiveIntervalUnion.h b/final/lib/CodeGen/LiveIntervalUnion.h
new file mode 100644
index 00000000000..6f9c5f4455e
--- /dev/null
+++ b/final/lib/CodeGen/LiveIntervalUnion.h
@@ -0,0 +1,258 @@
+//===-- LiveIntervalUnion.h - Live interval union data struct --*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion is a union of live segments across multiple live virtual
+// registers. This may be used during coalescing to represent a congruence
+// class, or during register allocation to model liveness of a physical
+// register.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEINTERVALUNION
+#define LLVM_CODEGEN_LIVEINTERVALUNION
+
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+#include <algorithm>
+
+namespace llvm {
+
+class MachineLoopRange;
+class TargetRegisterInfo;
+
+#ifndef NDEBUG
+// forward declaration
+template <unsigned Element> class SparseBitVector;
+typedef SparseBitVector<128> LiveVirtRegBitSet;
+#endif
+
+/// Compare a live virtual register segment to a LiveIntervalUnion segment.
+inline bool
+overlap(const LiveRange &VRSeg,
+        const IntervalMap<SlotIndex, LiveInterval*>::const_iterator &LUSeg) {
+  return VRSeg.start < LUSeg.stop() && LUSeg.start() < VRSeg.end;
+}
+
+/// Union of live intervals that are strong candidates for coalescing into a
+/// single register (either physical or virtual depending on the context).  We
+/// expect the constituent live intervals to be disjoint, although we may
+/// eventually make exceptions to handle value-based interference.
+class LiveIntervalUnion {
+  // A set of live virtual register segments that supports fast insertion,
+  // intersection, and removal.
+  // Mapping SlotIndex intervals to virtual register numbers.
+  typedef IntervalMap<SlotIndex, LiveInterval*> LiveSegments;
+
+public:
+  // SegmentIter can advance to the next segment ordered by starting position
+  // which may belong to a different live virtual register. We also must be able
+  // to reach the current segment's containing virtual register.
+  typedef LiveSegments::iterator SegmentIter;
+
+  // LiveIntervalUnions share an external allocator.
+  typedef LiveSegments::Allocator Allocator;
+
+  class InterferenceResult;
+  class Query;
+
+private:
+  const unsigned RepReg;  // representative register number
+  unsigned Tag;           // unique tag for current contents.
+  LiveSegments Segments;  // union of virtual reg segments
+
+public:
+  LiveIntervalUnion(unsigned r, Allocator &a) : RepReg(r), Tag(0), Segments(a)
+    {}
+
+  // Iterate over all segments in the union of live virtual registers ordered
+  // by their starting position.
+  SegmentIter begin() { return Segments.begin(); }
+  SegmentIter end() { return Segments.end(); }
+  SegmentIter find(SlotIndex x) { return Segments.find(x); }
+  bool empty() const { return Segments.empty(); }
+  SlotIndex startIndex() const { return Segments.start(); }
+
+  // Provide public access to the underlying map to allow overlap iteration.
+  typedef LiveSegments Map;
+  const Map &getMap() { return Segments; }
+
+  /// getTag - Return an opaque tag representing the current state of the union.
+  unsigned getTag() const { return Tag; }
+
+  /// changedSince - Return true if the union change since getTag returned tag.
+  bool changedSince(unsigned tag) const { return tag != Tag; }
+
+  // Add a live virtual register to this union and merge its segments.
+  void unify(LiveInterval &VirtReg);
+
+  // Remove a live virtual register's segments from this union.
+  void extract(LiveInterval &VirtReg);
+
+  // Print union, using TRI to translate register names
+  void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
+
+#ifndef NDEBUG
+  // Verify the live intervals in this union and add them to the visited set.
+  void verify(LiveVirtRegBitSet& VisitedVRegs);
+#endif
+
+  /// Cache a single interference test result in the form of two intersecting
+  /// segments. This allows efficiently iterating over the interferences. The
+  /// iteration logic is handled by LiveIntervalUnion::Query which may
+  /// filter interferences depending on the type of query.
+  class InterferenceResult {
+    friend class Query;
+
+    LiveInterval::iterator VirtRegI; // current position in VirtReg
+    SegmentIter LiveUnionI;          // current position in LiveUnion
+
+    // Internal ctor.
+    InterferenceResult(LiveInterval::iterator VRegI, SegmentIter UnionI)
+      : VirtRegI(VRegI), LiveUnionI(UnionI) {}
+
+  public:
+    // Public default ctor.
+    InterferenceResult(): VirtRegI(), LiveUnionI() {}
+
+    /// start - Return the start of the current overlap.
+    SlotIndex start() const {
+      return std::max(VirtRegI->start, LiveUnionI.start());
+    }
+
+    /// stop - Return the end of the current overlap.
+    SlotIndex stop() const {
+      return std::min(VirtRegI->end, LiveUnionI.stop());
+    }
+
+    /// interference - Return the register that is interfering here.
+    LiveInterval *interference() const { return LiveUnionI.value(); }
+
+    // Note: this interface provides raw access to the iterators because the
+    // result has no way to tell if it's valid to dereference them.
+
+    // Access the VirtReg segment.
+    LiveInterval::iterator virtRegPos() const { return VirtRegI; }
+
+    // Access the LiveUnion segment.
+    const SegmentIter &liveUnionPos() const { return LiveUnionI; }
+
+    bool operator==(const InterferenceResult &IR) const {
+      return VirtRegI == IR.VirtRegI && LiveUnionI == IR.LiveUnionI;
+    }
+    bool operator!=(const InterferenceResult &IR) const {
+      return !operator==(IR);
+    }
+
+    void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
+  };
+
+  /// Query interferences between a single live virtual register and a live
+  /// interval union.
+  class Query {
+    LiveIntervalUnion *LiveUnion;
+    LiveInterval *VirtReg;
+    InterferenceResult FirstInterference;
+    SmallVector<LiveInterval*,4> InterferingVRegs;
+    bool CheckedFirstInterference;
+    bool SeenAllInterferences;
+    bool SeenUnspillableVReg;
+    unsigned Tag;
+
+  public:
+    Query(): LiveUnion(), VirtReg() {}
+
+    Query(LiveInterval *VReg, LiveIntervalUnion *LIU):
+      LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false),
+      SeenAllInterferences(false), SeenUnspillableVReg(false)
+    {}
+
+    void clear() {
+      LiveUnion = NULL;
+      VirtReg = NULL;
+      InterferingVRegs.clear();
+      CheckedFirstInterference = false;
+      SeenAllInterferences = false;
+      SeenUnspillableVReg = false;
+      Tag = 0;
+    }
+
+    void init(LiveInterval *VReg, LiveIntervalUnion *LIU) {
+      assert(VReg && LIU && "Invalid arguments");
+      if (VirtReg == VReg && LiveUnion == LIU && !LIU->changedSince(Tag)) {
+        // Retain cached results, e.g. firstInterference.
+        return;
+      }
+      clear();
+      LiveUnion = LIU;
+      VirtReg = VReg;
+      Tag = LIU->getTag();
+    }
+
+    LiveInterval &virtReg() const {
+      assert(VirtReg && "uninitialized");
+      return *VirtReg;
+    }
+
+    bool isInterference(const InterferenceResult &IR) const {
+      if (IR.VirtRegI != VirtReg->end()) {
+        assert(overlap(*IR.VirtRegI, IR.LiveUnionI) &&
+               "invalid segment iterators");
+        return true;
+      }
+      return false;
+    }
+
+    // Does this live virtual register interfere with the union?
+    bool checkInterference() { return isInterference(firstInterference()); }
+
+    // Get the first pair of interfering segments, or a noninterfering result.
+    // This initializes the firstInterference_ cache.
+    const InterferenceResult &firstInterference();
+
+    // Treat the result as an iterator and advance to the next interfering pair
+    // of segments. Visiting each unique interfering pairs means that the same
+    // VirtReg or LiveUnion segment may be visited multiple times.
+    bool nextInterference(InterferenceResult &IR) const;
+
+    // Count the virtual registers in this union that interfere with this
+    // query's live virtual register, up to maxInterferingRegs.
+    unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX);
+
+    // Was this virtual register visited during collectInterferingVRegs?
+    bool isSeenInterference(LiveInterval *VReg) const;
+
+    // Did collectInterferingVRegs collect all interferences?
+    bool seenAllInterferences() const { return SeenAllInterferences; }
+
+    // Did collectInterferingVRegs encounter an unspillable vreg?
+    bool seenUnspillableVReg() const { return SeenUnspillableVReg; }
+
+    // Vector generated by collectInterferingVRegs.
+    const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
+      return InterferingVRegs;
+    }
+
+    /// checkLoopInterference - Return true if there is interference overlapping
+    /// Loop.
+    bool checkLoopInterference(MachineLoopRange*);
+
+    void print(raw_ostream &OS, const TargetRegisterInfo *TRI);
+  private:
+    Query(const Query&);          // DO NOT IMPLEMENT
+    void operator=(const Query&); // DO NOT IMPLEMENT
+
+    // Private interface for queries
+    void findIntersection(InterferenceResult &IR) const;
+  };
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_LIVEINTERVALUNION)
diff --git a/final/lib/CodeGen/LiveRangeEdit.cpp b/final/lib/CodeGen/LiveRangeEdit.cpp
new file mode 100644
index 00000000000..7de1284cb97
--- /dev/null
+++ b/final/lib/CodeGen/LiveRangeEdit.cpp
@@ -0,0 +1,130 @@
+//===--- LiveRangeEdit.cpp - Basic tools for editing a register live range --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//===----------------------------------------------------------------------===//
+
+#include "LiveRangeEdit.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+LiveInterval &LiveRangeEdit::create(MachineRegisterInfo &mri,
+                                    LiveIntervals &lis,
+                                    VirtRegMap &vrm) {
+  const TargetRegisterClass *RC = mri.getRegClass(getReg());
+  unsigned VReg = mri.createVirtualRegister(RC);
+  vrm.grow();
+  vrm.setIsSplitFromReg(VReg, vrm.getOriginal(getReg()));
+  LiveInterval &li = lis.getOrCreateInterval(VReg);
+  newRegs_.push_back(&li);
+  return li;
+}
+
+void LiveRangeEdit::scanRemattable(LiveIntervals &lis,
+                                   const TargetInstrInfo &tii,
+                                   AliasAnalysis *aa) {
+  for (LiveInterval::vni_iterator I = parent_.vni_begin(),
+       E = parent_.vni_end(); I != E; ++I) {
+    VNInfo *VNI = *I;
+    if (VNI->isUnused())
+      continue;
+    MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def);
+    if (!DefMI)
+      continue;
+    if (tii.isTriviallyReMaterializable(DefMI, aa))
+      remattable_.insert(VNI);
+  }
+  scannedRemattable_ = true;
+}
+
+bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
+                                        const TargetInstrInfo &tii,
+                                        AliasAnalysis *aa) {
+  if (!scannedRemattable_)
+    scanRemattable(lis, tii, aa);
+  return !remattable_.empty();
+}
+
+/// allUsesAvailableAt - Return true if all registers used by OrigMI at
+/// OrigIdx are also available with the same value at UseIdx.
+bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
+                                       SlotIndex OrigIdx,
+                                       SlotIndex UseIdx,
+                                       LiveIntervals &lis) {
+  OrigIdx = OrigIdx.getUseIndex();
+  UseIdx = UseIdx.getUseIndex();
+  for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = OrigMI->getOperand(i);
+    if (!MO.isReg() || !MO.getReg() || MO.getReg() == getReg())
+      continue;
+    // Reserved registers are OK.
+    if (MO.isUndef() || !lis.hasInterval(MO.getReg()))
+      continue;
+    // We don't want to move any defs.
+    if (MO.isDef())
+      return false;
+    // We cannot depend on virtual registers in uselessRegs_.
+    if (uselessRegs_)
+      for (unsigned ui = 0, ue = uselessRegs_->size(); ui != ue; ++ui)
+        if ((*uselessRegs_)[ui]->reg == MO.getReg())
+          return false;
+
+    LiveInterval &li = lis.getInterval(MO.getReg());
+    const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
+    if (!OVNI)
+      continue;
+    if (OVNI != li.getVNInfoAt(UseIdx))
+      return false;
+  }
+  return true;
+}
+
+bool LiveRangeEdit::canRematerializeAt(Remat &RM,
+                                       SlotIndex UseIdx,
+                                       bool cheapAsAMove,
+                                       LiveIntervals &lis) {
+  assert(scannedRemattable_ && "Call anyRematerializable first");
+
+  // Use scanRemattable info.
+  if (!remattable_.count(RM.ParentVNI))
+    return false;
+
+  // No defining instruction.
+  RM.OrigMI = lis.getInstructionFromIndex(RM.ParentVNI->def);
+  assert(RM.OrigMI && "Defining instruction for remattable value disappeared");
+
+  // If only cheap remats were requested, bail out early.
+  if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove())
+    return false;
+
+  // Verify that all used registers are available with the same values.
+  if (!allUsesAvailableAt(RM.OrigMI, RM.ParentVNI->def, UseIdx, lis))
+    return false;
+
+  return true;
+}
+
+SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MI,
+                                         unsigned DestReg,
+                                         const Remat &RM,
+                                         LiveIntervals &lis,
+                                         const TargetInstrInfo &tii,
+                                         const TargetRegisterInfo &tri) {
+  assert(RM.OrigMI && "Invalid remat");
+  tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
+  rematted_.insert(RM.ParentVNI);
+  return lis.InsertMachineInstrInMaps(--MI).getDefIndex();
+}
+
diff --git a/final/lib/CodeGen/LiveRangeEdit.h b/final/lib/CodeGen/LiveRangeEdit.h
new file mode 100644
index 00000000000..95181305600
--- /dev/null
+++ b/final/lib/CodeGen/LiveRangeEdit.h
@@ -0,0 +1,135 @@
+//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//
+// The parent register is never changed. Instead, a number of new virtual
+// registers are created and added to the newRegs vector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H
+#define LLVM_CODEGEN_LIVERANGEEDIT_H
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+class AliasAnalysis;
+class LiveIntervals;
+class MachineRegisterInfo;
+class VirtRegMap;
+
+class LiveRangeEdit {
+  LiveInterval &parent_;
+  SmallVectorImpl<LiveInterval*> &newRegs_;
+  const SmallVectorImpl<LiveInterval*> *uselessRegs_;
+
+  /// firstNew_ - Index of the first register added to newRegs_.
+  const unsigned firstNew_;
+
+  /// scannedRemattable_ - true when remattable values have been identified.
+  bool scannedRemattable_;
+
+  /// remattable_ - Values defined by remattable instructions as identified by
+  /// tii.isTriviallyReMaterializable().
+  SmallPtrSet<const VNInfo*,4> remattable_;
+
+  /// rematted_ - Values that were actually rematted, and so need to have their
+  /// live range trimmed or entirely removed.
+  SmallPtrSet<const VNInfo*,4> rematted_;
+
+  /// scanRemattable - Identify the parent_ values that may rematerialize.
+  void scanRemattable(LiveIntervals &lis,
+                      const TargetInstrInfo &tii,
+                      AliasAnalysis *aa);
+
+  /// allUsesAvailableAt - Return true if all registers used by OrigMI at
+  /// OrigIdx are also available with the same value at UseIdx.
+  bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
+                          SlotIndex UseIdx, LiveIntervals &lis);
+
+public:
+  /// Create a LiveRangeEdit for breaking down parent into smaller pieces.
+  /// @param parent The register being spilled or split.
+  /// @param newRegs List to receive any new registers created. This needn't be
+  ///                empty initially, any existing registers are ignored.
+  /// @param uselessRegs List of registers that can't be used when
+  ///        rematerializing values because they are about to be removed.
+  LiveRangeEdit(LiveInterval &parent,
+                SmallVectorImpl<LiveInterval*> &newRegs,
+                const SmallVectorImpl<LiveInterval*> *uselessRegs = 0)
+    : parent_(parent), newRegs_(newRegs), uselessRegs_(uselessRegs),
+      firstNew_(newRegs.size()), scannedRemattable_(false) {}
+
+  LiveInterval &getParent() const { return parent_; }
+  unsigned getReg() const { return parent_.reg; }
+
+  /// Iterator for accessing the new registers added by this edit.
+  typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator;
+  iterator begin() const { return newRegs_.begin()+firstNew_; }
+  iterator end() const { return newRegs_.end(); }
+  unsigned size() const { return newRegs_.size()-firstNew_; }
+  bool empty() const { return size() == 0; }
+  LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; }
+
+  /// create - Create a new register with the same class and stack slot as
+  /// parent.
+  LiveInterval &create(MachineRegisterInfo&, LiveIntervals&, VirtRegMap&);
+
+  /// anyRematerializable - Return true if any parent values may be
+  /// rematerializable.
+  /// This function must be called before any rematerialization is attempted.
+  bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&,
+                           AliasAnalysis*);
+
+  /// Remat - Information needed to rematerialize at a specific location.
+  struct Remat {
+    VNInfo *ParentVNI;      // parent_'s value at the remat location.
+    MachineInstr *OrigMI;   // Instruction defining ParentVNI.
+    explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {}
+  };
+
+  /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
+  /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
+  /// When cheapAsAMove is set, only cheap remats are allowed.
+  bool canRematerializeAt(Remat &RM,
+                          SlotIndex UseIdx,
+                          bool cheapAsAMove,
+                          LiveIntervals &lis);
+
+  /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
+  /// instruction into MBB before MI. The new instruction is mapped, but
+  /// liveness is not updated.
+  /// Return the SlotIndex of the new instruction.
+  SlotIndex rematerializeAt(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            unsigned DestReg,
+                            const Remat &RM,
+                            LiveIntervals&,
+                            const TargetInstrInfo&,
+                            const TargetRegisterInfo&);
+
+  /// markRematerialized - explicitly mark a value as rematerialized after doing
+  /// it manually.
+  void markRematerialized(const VNInfo *ParentVNI) {
+    rematted_.insert(ParentVNI);
+  }
+
+  /// didRematerialize - Return true if ParentVNI was rematerialized anywhere.
+  bool didRematerialize(const VNInfo *ParentVNI) const {
+    return rematted_.count(ParentVNI);
+  }
+};
+
+}
+
+#endif
diff --git a/final/lib/CodeGen/LiveStackAnalysis.cpp b/final/lib/CodeGen/LiveStackAnalysis.cpp
new file mode 100644
index 00000000000..c75196a4721
--- /dev/null
+++ b/final/lib/CodeGen/LiveStackAnalysis.cpp
@@ -0,0 +1,82 @@
+//===-- LiveStackAnalysis.cpp - Live Stack Slot Analysis ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the live stack slot analysis pass. It is analogous to
+// live interval analysis except it's analyzing liveness of stack slots rather
+// than registers.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livestacks"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include <limits>
+using namespace llvm;
+
+char LiveStacks::ID = 0;
+INITIALIZE_PASS(LiveStacks, "livestacks",
+                "Live Stack Slot Analysis", false, false)
+
+char &llvm::LiveStacksID = LiveStacks::ID;
+
+void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequiredTransitive<SlotIndexes>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void LiveStacks::releaseMemory() {
+  // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+  VNInfoAllocator.Reset();
+  S2IMap.clear();
+  S2RCMap.clear();
+}
+
+bool LiveStacks::runOnMachineFunction(MachineFunction &) {
+  // FIXME: No analysis is being done right now. We are relying on the
+  // register allocators to provide the information.
+  return false;
+}
+
+LiveInterval &
+LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
+  assert(Slot >= 0 && "Spill slot indice must be >= 0");
+  SS2IntervalMap::iterator I = S2IMap.find(Slot);
+  if (I == S2IMap.end()) {
+    I = S2IMap.insert(I, std::make_pair(Slot,
+            LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F)));
+    S2RCMap.insert(std::make_pair(Slot, RC));
+  } else {
+    // Use the largest common subclass register class.
+    const TargetRegisterClass *OldRC = S2RCMap[Slot];
+    S2RCMap[Slot] = getCommonSubClass(OldRC, RC);
+  }
+  return I->second;
+}
+
+/// print - Implement the dump method.
+void LiveStacks::print(raw_ostream &OS, const Module*) const {
+
+  OS << "********** INTERVALS **********\n";
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    I->second.print(OS);
+    int Slot = I->first;
+    const TargetRegisterClass *RC = getIntervalRegClass(Slot);
+    if (RC)
+      OS << " [" << RC->getName() << "]\n";
+    else
+      OS << " [Unknown]\n";
+  }
+}
diff --git a/final/lib/CodeGen/LiveVariables.cpp b/final/lib/CodeGen/LiveVariables.cpp
new file mode 100644
index 00000000000..dd43ef2530c
--- /dev/null
+++ b/final/lib/CodeGen/LiveVariables.cpp
@@ -0,0 +1,772 @@
+//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveVariable analysis pass.  For each machine
+// instruction in the function, this pass calculates the set of registers that
+// are immediately dead after the instruction (i.e., the instruction calculates
+// the value, but it is never used) and the set of registers that are used by
+// the instruction, but are never used after the instruction (i.e., they are
+// killed).
+//
+// This class computes live variables using are sparse implementation based on
+// the machine code SSA form.  This class computes live variable information for
+// each virtual and _register allocatable_ physical register in a function.  It
+// uses the dominance properties of SSA form to efficiently compute live
+// variables for virtual registers, and assumes that physical registers are only
+// live within a single basic block (allowing it to do a single local analysis
+// to resolve physical register lifetimes in each basic block).  If a physical
+// register is not register allocatable, it is not tracked.  This is useful for
+// things like the stack pointer and condition codes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+char LiveVariables::ID = 0;
+INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
+                "Live Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
+INITIALIZE_PASS_END(LiveVariables, "livevars",
+                "Live Variable Analysis", false, false)
+
+
+void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequiredID(UnreachableMachineBlockElimID);
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineInstr *
+LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
+  for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+    if (Kills[i]->getParent() == MBB)
+      return Kills[i];
+  return NULL;
+}
+
+void LiveVariables::VarInfo::dump() const {
+  dbgs() << "  Alive in blocks: ";
+  for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
+           E = AliveBlocks.end(); I != E; ++I)
+    dbgs() << *I << ", ";
+  dbgs() << "\n  Killed by:";
+  if (Kills.empty())
+    dbgs() << " No instructions.\n";
+  else {
+    for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+      dbgs() << "\n    #" << i << ": " << *Kills[i];
+    dbgs() << "\n";
+  }
+}
+
+/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
+LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
+  assert(TargetRegisterInfo::isVirtualRegister(RegIdx) &&
+         "getVarInfo: not a virtual register!");
+  VirtRegInfo.grow(RegIdx);
+  return VirtRegInfo[RegIdx];
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
+                                            MachineBasicBlock *DefBlock,
+                                            MachineBasicBlock *MBB,
+                                    std::vector<MachineBasicBlock*> &WorkList) {
+  unsigned BBNum = MBB->getNumber();
+  
+  // Check to see if this basic block is one of the killing blocks.  If so,
+  // remove it.
+  for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+    if (VRInfo.Kills[i]->getParent() == MBB) {
+      VRInfo.Kills.erase(VRInfo.Kills.begin()+i);  // Erase entry
+      break;
+    }
+  
+  if (MBB == DefBlock) return;  // Terminate recursion
+
+  if (VRInfo.AliveBlocks.test(BBNum))
+    return;  // We already know the block is live
+
+  // Mark the variable known alive in this bb
+  VRInfo.AliveBlocks.set(BBNum);
+
+  for (MachineBasicBlock::const_pred_reverse_iterator PI = MBB->pred_rbegin(),
+         E = MBB->pred_rend(); PI != E; ++PI)
+    WorkList.push_back(*PI);
+}
+
+void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
+                                            MachineBasicBlock *DefBlock,
+                                            MachineBasicBlock *MBB) {
+  std::vector<MachineBasicBlock*> WorkList;
+  MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);
+
+  while (!WorkList.empty()) {
+    MachineBasicBlock *Pred = WorkList.back();
+    WorkList.pop_back();
+    MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList);
+  }
+}
+
+void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB,
+                                     MachineInstr *MI) {
+  assert(MRI->getVRegDef(reg) && "Register use before def!");
+
+  unsigned BBNum = MBB->getNumber();
+
+  VarInfo& VRInfo = getVarInfo(reg);
+  VRInfo.NumUses++;
+
+  // Check to see if this basic block is already a kill block.
+  if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) {
+    // Yes, this register is killed in this basic block already. Increase the
+    // live range by updating the kill instruction.
+    VRInfo.Kills.back() = MI;
+    return;
+  }
+
+#ifndef NDEBUG
+  for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i)
+    assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!");
+#endif
+
+  // This situation can occur:
+  //
+  //     ,------.
+  //     |      |
+  //     |      v
+  //     |   t2 = phi ... t1 ...
+  //     |      |
+  //     |      v
+  //     |   t1 = ...
+  //     |  ... = ... t1 ...
+  //     |      |
+  //     `------'
+  //
+  // where there is a use in a PHI node that's a predecessor to the defining
+  // block. We don't want to mark all predecessors as having the value "alive"
+  // in this case.
+  if (MBB == MRI->getVRegDef(reg)->getParent()) return;
+
+  // Add a new kill entry for this basic block. If this virtual register is
+  // already marked as alive in this basic block, that means it is alive in at
+  // least one of the successor blocks, it's not a kill.
+  if (!VRInfo.AliveBlocks.test(BBNum))
+    VRInfo.Kills.push_back(MI);
+
+  // Update all dominating blocks to mark them as "known live".
+  for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+         E = MBB->pred_end(); PI != E; ++PI)
+    MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI);
+}
+
+void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) {
+  VarInfo &VRInfo = getVarInfo(Reg);
+
+  if (VRInfo.AliveBlocks.empty())
+    // If vr is not alive in any block, then defaults to dead.
+    VRInfo.Kills.push_back(MI);
+}
+
+/// FindLastPartialDef - Return the last partial def of the specified register.
+/// Also returns the sub-registers that're defined by the instruction.
+MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
+                                            SmallSet<unsigned,4> &PartDefRegs) {
+  unsigned LastDefReg = 0;
+  unsigned LastDefDist = 0;
+  MachineInstr *LastDef = NULL;
+  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs) {
+    MachineInstr *Def = PhysRegDef[SubReg];
+    if (!Def)
+      continue;
+    unsigned Dist = DistanceMap[Def];
+    if (Dist > LastDefDist) {
+      LastDefReg  = SubReg;
+      LastDef     = Def;
+      LastDefDist = Dist;
+    }
+  }
+
+  if (!LastDef)
+    return 0;
+
+  PartDefRegs.insert(LastDefReg);
+  for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = LastDef->getOperand(i);
+    if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+      continue;
+    unsigned DefReg = MO.getReg();
+    if (TRI->isSubRegister(Reg, DefReg)) {
+      PartDefRegs.insert(DefReg);
+      for (const unsigned *SubRegs = TRI->getSubRegisters(DefReg);
+           unsigned SubReg = *SubRegs; ++SubRegs)
+        PartDefRegs.insert(SubReg);
+    }
+  }
+  return LastDef;
+}
+
+/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add
+/// implicit defs to a machine instruction if there was an earlier def of its
+/// super-register.
+void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
+  MachineInstr *LastDef = PhysRegDef[Reg];
+  // If there was a previous use or a "full" def all is well.
+  if (!LastDef && !PhysRegUse[Reg]) {
+    // Otherwise, the last sub-register def implicitly defines this register.
+    // e.g.
+    // AH =
+    // AL = ... <imp-def EAX>, <imp-kill AH>
+    //    = AH
+    // ...
+    //    = EAX
+    // All of the sub-registers must have been defined before the use of Reg!
+    SmallSet<unsigned, 4> PartDefRegs;
+    MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs);
+    // If LastPartialDef is NULL, it must be using a livein register.
+    if (LastPartialDef) {
+      LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/,
+                                                           true/*IsImp*/));
+      PhysRegDef[Reg] = LastPartialDef;
+      SmallSet<unsigned, 8> Processed;
+      for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+           unsigned SubReg = *SubRegs; ++SubRegs) {
+        if (Processed.count(SubReg))
+          continue;
+        if (PartDefRegs.count(SubReg))
+          continue;
+        // This part of Reg was defined before the last partial def. It's killed
+        // here.
+        LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg,
+                                                             false/*IsDef*/,
+                                                             true/*IsImp*/));
+        PhysRegDef[SubReg] = LastPartialDef;
+        for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+          Processed.insert(*SS);
+      }
+    }
+  }
+  else if (LastDef && !PhysRegUse[Reg] &&
+           !LastDef->findRegisterDefOperand(Reg))
+    // Last def defines the super register, add an implicit def of reg.
+    LastDef->addOperand(MachineOperand::CreateReg(Reg,
+                                                 true/*IsDef*/, true/*IsImp*/));
+
+  // Remember this use.
+  PhysRegUse[Reg]  = MI;
+  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs)
+    PhysRegUse[SubReg] =  MI;
+}
+
+/// FindLastRefOrPartRef - Return the last reference or partial reference of
+/// the specified register.
+MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
+  MachineInstr *LastDef = PhysRegDef[Reg];
+  MachineInstr *LastUse = PhysRegUse[Reg];
+  if (!LastDef && !LastUse)
+    return 0;
+
+  MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+  unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+  unsigned LastPartDefDist = 0;
+  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs) {
+    MachineInstr *Def = PhysRegDef[SubReg];
+    if (Def && Def != LastDef) {
+      // There was a def of this sub-register in between. This is a partial
+      // def, keep track of the last one.
+      unsigned Dist = DistanceMap[Def];
+      if (Dist > LastPartDefDist)
+        LastPartDefDist = Dist;
+    } else if (MachineInstr *Use = PhysRegUse[SubReg]) {
+      unsigned Dist = DistanceMap[Use];
+      if (Dist > LastRefOrPartRefDist) {
+        LastRefOrPartRefDist = Dist;
+        LastRefOrPartRef = Use;
+      }
+    }
+  }
+
+  return LastRefOrPartRef;
+}
+
+bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
+  MachineInstr *LastDef = PhysRegDef[Reg];
+  MachineInstr *LastUse = PhysRegUse[Reg];
+  if (!LastDef && !LastUse)
+    return false;
+
+  MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
+  unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
+  // The whole register is used.
+  // AL =
+  // AH =
+  //
+  //    = AX
+  //    = AL, AX<imp-use, kill>
+  // AX =
+  //
+  // Or whole register is defined, but not used at all.
+  // AX<dead> =
+  // ...
+  // AX =
+  //
+  // Or whole register is defined, but only partly used.
+  // AX<dead> = AL<imp-def>
+  //    = AL<kill>
+  // AX = 
+  MachineInstr *LastPartDef = 0;
+  unsigned LastPartDefDist = 0;
+  SmallSet<unsigned, 8> PartUses;
+  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs) {
+    MachineInstr *Def = PhysRegDef[SubReg];
+    if (Def && Def != LastDef) {
+      // There was a def of this sub-register in between. This is a partial
+      // def, keep track of the last one.
+      unsigned Dist = DistanceMap[Def];
+      if (Dist > LastPartDefDist) {
+        LastPartDefDist = Dist;
+        LastPartDef = Def;
+      }
+      continue;
+    }
+    if (MachineInstr *Use = PhysRegUse[SubReg]) {
+      PartUses.insert(SubReg);
+      for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+        PartUses.insert(*SS);
+      unsigned Dist = DistanceMap[Use];
+      if (Dist > LastRefOrPartRefDist) {
+        LastRefOrPartRefDist = Dist;
+        LastRefOrPartRef = Use;
+      }
+    }
+  }
+
+  if (!PhysRegUse[Reg]) {
+    // Partial uses. Mark register def dead and add implicit def of
+    // sub-registers which are used.
+    // EAX<dead>  = op  AL<imp-def>
+    // That is, EAX def is dead but AL def extends pass it.
+    PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
+    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+         unsigned SubReg = *SubRegs; ++SubRegs) {
+      if (!PartUses.count(SubReg))
+        continue;
+      bool NeedDef = true;
+      if (PhysRegDef[Reg] == PhysRegDef[SubReg]) {
+        MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg);
+        if (MO) {
+          NeedDef = false;
+          assert(!MO->isDead());
+        }
+      }
+      if (NeedDef)
+        PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg,
+                                                 true/*IsDef*/, true/*IsImp*/));
+      MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg);
+      if (LastSubRef)
+        LastSubRef->addRegisterKilled(SubReg, TRI, true);
+      else {
+        LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
+        PhysRegUse[SubReg] = LastRefOrPartRef;
+        for (const unsigned *SSRegs = TRI->getSubRegisters(SubReg);
+             unsigned SSReg = *SSRegs; ++SSRegs)
+          PhysRegUse[SSReg] = LastRefOrPartRef;
+      }
+      for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+        PartUses.erase(*SS);
+    }
+  } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
+    if (LastPartDef)
+      // The last partial def kills the register.
+      LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+                                                true/*IsImp*/, true/*IsKill*/));
+    else {
+      MachineOperand *MO =
+        LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
+      bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
+      // If the last reference is the last def, then it's not used at all.
+      // That is, unless we are currently processing the last reference itself.
+      LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
+      if (NeedEC) {
+        // If we are adding a subreg def and the superreg def is marked early
+        // clobber, add an early clobber marker to the subreg def.
+        MO = LastRefOrPartRef->findRegisterDefOperand(Reg);
+        if (MO)
+          MO->setIsEarlyClobber();
+      }
+    }
+  } else
+    LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
+  return true;
+}
+
+void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI,
+                                     SmallVector<unsigned, 4> &Defs) {
+  // What parts of the register are previously defined?
+  SmallSet<unsigned, 32> Live;
+  if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
+    Live.insert(Reg);
+    for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS)
+      Live.insert(*SS);
+  } else {
+    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+         unsigned SubReg = *SubRegs; ++SubRegs) {
+      // If a register isn't itself defined, but all parts that make up of it
+      // are defined, then consider it also defined.
+      // e.g.
+      // AL =
+      // AH =
+      //    = AX
+      if (Live.count(SubReg))
+        continue;
+      if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
+        Live.insert(SubReg);
+        for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS)
+          Live.insert(*SS);
+      }
+    }
+  }
+
+  // Start from the largest piece, find the last time any part of the register
+  // is referenced.
+  HandlePhysRegKill(Reg, MI);
+  // Only some of the sub-registers are used.
+  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs) {
+    if (!Live.count(SubReg))
+      // Skip if this sub-register isn't defined.
+      continue;
+    HandlePhysRegKill(SubReg, MI);
+  }
+
+  if (MI)
+    Defs.push_back(Reg);  // Remember this def.
+}
+
+void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI,
+                                      SmallVector<unsigned, 4> &Defs) {
+  while (!Defs.empty()) {
+    unsigned Reg = Defs.back();
+    Defs.pop_back();
+    PhysRegDef[Reg]  = MI;
+    PhysRegUse[Reg]  = NULL;
+    for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+         unsigned SubReg = *SubRegs; ++SubRegs) {
+      PhysRegDef[SubReg]  = MI;
+      PhysRegUse[SubReg]  = NULL;
+    }
+  }
+}
+
+bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  MRI = &mf.getRegInfo();
+  TRI = MF->getTarget().getRegisterInfo();
+
+  ReservedRegisters = TRI->getReservedRegs(mf);
+
+  unsigned NumRegs = TRI->getNumRegs();
+  PhysRegDef  = new MachineInstr*[NumRegs];
+  PhysRegUse  = new MachineInstr*[NumRegs];
+  PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
+  std::fill(PhysRegDef,  PhysRegDef  + NumRegs, (MachineInstr*)0);
+  std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);
+  PHIJoins.clear();
+
+  analyzePHINodes(mf);
+
+  // Calculate live variable information in depth first order on the CFG of the
+  // function.  This guarantees that we will see the definition of a virtual
+  // register before its uses due to dominance properties of SSA (except for PHI
+  // nodes, which are treated as a special case).
+  MachineBasicBlock *Entry = MF->begin();
+  SmallPtrSet<MachineBasicBlock*,16> Visited;
+
+  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+       DFI != E; ++DFI) {
+    MachineBasicBlock *MBB = *DFI;
+
+    // Mark live-in registers as live-in.
+    SmallVector<unsigned, 4> Defs;
+    for (MachineBasicBlock::livein_iterator II = MBB->livein_begin(),
+           EE = MBB->livein_end(); II != EE; ++II) {
+      assert(TargetRegisterInfo::isPhysicalRegister(*II) &&
+             "Cannot have a live-in virtual register!");
+      HandlePhysRegDef(*II, 0, Defs);
+    }
+
+    // Loop over all of the instructions, processing them.
+    DistanceMap.clear();
+    unsigned Dist = 0;
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         I != E; ++I) {
+      MachineInstr *MI = I;
+      if (MI->isDebugValue())
+        continue;
+      DistanceMap.insert(std::make_pair(MI, Dist++));
+
+      // Process all of the operands of the instruction...
+      unsigned NumOperandsToProcess = MI->getNumOperands();
+
+      // Unless it is a PHI node.  In this case, ONLY process the DEF, not any
+      // of the uses.  They will be handled in other basic blocks.
+      if (MI->isPHI())
+        NumOperandsToProcess = 1;
+
+      // Clear kill and dead markers. LV will recompute them.
+      SmallVector<unsigned, 4> UseRegs;
+      SmallVector<unsigned, 4> DefRegs;
+      for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || MO.getReg() == 0)
+          continue;
+        unsigned MOReg = MO.getReg();
+        if (MO.isUse()) {
+          MO.setIsKill(false);
+          UseRegs.push_back(MOReg);
+        } else /*MO.isDef()*/ {
+          MO.setIsDead(false);
+          DefRegs.push_back(MOReg);
+        }
+      }
+
+      // Process all uses.
+      for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
+        unsigned MOReg = UseRegs[i];
+        if (TargetRegisterInfo::isVirtualRegister(MOReg))
+          HandleVirtRegUse(MOReg, MBB, MI);
+        else if (!ReservedRegisters[MOReg])
+          HandlePhysRegUse(MOReg, MI);
+      }
+
+      // Process all defs.
+      for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
+        unsigned MOReg = DefRegs[i];
+        if (TargetRegisterInfo::isVirtualRegister(MOReg))
+          HandleVirtRegDef(MOReg, MI);
+        else if (!ReservedRegisters[MOReg])
+          HandlePhysRegDef(MOReg, MI, Defs);
+      }
+      UpdatePhysRegDefs(MI, Defs);
+    }
+
+    // Handle any virtual assignments from PHI nodes which might be at the
+    // bottom of this basic block.  We check all of our successor blocks to see
+    // if they have PHI nodes, and if so, we simulate an assignment at the end
+    // of the current block.
+    if (!PHIVarInfo[MBB->getNumber()].empty()) {
+      SmallVector<unsigned, 4>& VarInfoVec = PHIVarInfo[MBB->getNumber()];
+
+      for (SmallVector<unsigned, 4>::iterator I = VarInfoVec.begin(),
+             E = VarInfoVec.end(); I != E; ++I)
+        // Mark it alive only in the block we are representing.
+        MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(),
+                                MBB);
+    }
+
+    // Finally, if the last instruction in the block is a return, make sure to
+    // mark it as using all of the live-out values in the function.
+    // Things marked both call and return are tail calls; do not do this for
+    // them.  The tail callee need not take the same registers as input
+    // that it produces as output, and there are dependencies for its input
+    // registers elsewhere.
+    if (!MBB->empty() && MBB->back().getDesc().isReturn()
+        && !MBB->back().getDesc().isCall()) {
+      MachineInstr *Ret = &MBB->back();
+
+      for (MachineRegisterInfo::liveout_iterator
+           I = MF->getRegInfo().liveout_begin(),
+           E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+        assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
+               "Cannot have a live-out virtual register!");
+        HandlePhysRegUse(*I, Ret);
+
+        // Add live-out registers as implicit uses.
+        if (!Ret->readsRegister(*I))
+          Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
+      }
+    }
+
+    // Loop over PhysRegDef / PhysRegUse, killing any registers that are
+    // available at the end of the basic block.
+    for (unsigned i = 0; i != NumRegs; ++i)
+      if (PhysRegDef[i] || PhysRegUse[i])
+        HandlePhysRegDef(i, 0, Defs);
+
+    std::fill(PhysRegDef,  PhysRegDef  + NumRegs, (MachineInstr*)0);
+    std::fill(PhysRegUse,  PhysRegUse  + NumRegs, (MachineInstr*)0);
+  }
+
+  // Convert and transfer the dead / killed information we have gathered into
+  // VirtRegInfo onto MI's.
+  for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
+    const unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
+      if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
+        VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
+      else
+        VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI);
+  }
+
+  // Check to make sure there are no unreachable blocks in the MC CFG for the
+  // function.  If so, it is due to a bug in the instruction selector or some
+  // other part of the code generator if this happens.
+#ifndef NDEBUG
+  for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i)
+    assert(Visited.count(&*i) != 0 && "unreachable basic block found");
+#endif
+
+  delete[] PhysRegDef;
+  delete[] PhysRegUse;
+  delete[] PHIVarInfo;
+
+  return false;
+}
+
+/// replaceKillInstruction - Update register kill info by replacing a kill
+/// instruction with a new one.
+void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr *OldMI,
+                                           MachineInstr *NewMI) {
+  VarInfo &VI = getVarInfo(Reg);
+  std::replace(VI.Kills.begin(), VI.Kills.end(), OldMI, NewMI);
+}
+
+/// removeVirtualRegistersKilled - Remove all killed info for the specified
+/// instruction.
+void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isKill()) {
+      MO.setIsKill(false);
+      unsigned Reg = MO.getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        bool removed = getVarInfo(Reg).removeKill(MI);
+        assert(removed && "kill not in register's VarInfo?");
+        removed = true;
+      }
+    }
+  }
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the variable information of a virtual register
+/// which is used in a PHI node. We map that to the BB the vreg is coming from.
+///
+void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
+  for (MachineFunction::const_iterator I = Fn.begin(), E = Fn.end();
+       I != E; ++I)
+    for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->isPHI(); ++BBI)
+      for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+        PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
+          .push_back(BBI->getOperand(i).getReg());
+}
+
+bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
+                                      unsigned Reg,
+                                      MachineRegisterInfo &MRI) {
+  unsigned Num = MBB.getNumber();
+
+  // Reg is live-through.
+  if (AliveBlocks.test(Num))
+    return true;
+
+  // Registers defined in MBB cannot be live in.
+  const MachineInstr *Def = MRI.getVRegDef(Reg);
+  if (Def && Def->getParent() == &MBB)
+    return false;
+
+ // Reg was not defined in MBB, was it killed here?
+  return findKill(&MBB);
+}
+
+bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) {
+  LiveVariables::VarInfo &VI = getVarInfo(Reg);
+
+  // Loop over all of the successors of the basic block, checking to see if
+  // the value is either live in the block, or if it is killed in the block.
+  std::vector<MachineBasicBlock*> OpSuccBlocks;
+  for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+         E = MBB.succ_end(); SI != E; ++SI) {
+    MachineBasicBlock *SuccMBB = *SI;
+
+    // Is it alive in this successor?
+    unsigned SuccIdx = SuccMBB->getNumber();
+    if (VI.AliveBlocks.test(SuccIdx))
+      return true;
+    OpSuccBlocks.push_back(SuccMBB);
+  }
+
+  // Check to see if this value is live because there is a use in a successor
+  // that kills it.
+  switch (OpSuccBlocks.size()) {
+  case 1: {
+    MachineBasicBlock *SuccMBB = OpSuccBlocks[0];
+    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+      if (VI.Kills[i]->getParent() == SuccMBB)
+        return true;
+    break;
+  }
+  case 2: {
+    MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1];
+    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+      if (VI.Kills[i]->getParent() == SuccMBB1 ||
+          VI.Kills[i]->getParent() == SuccMBB2)
+        return true;
+    break;
+  }
+  default:
+    std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
+    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+      if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
+                             VI.Kills[i]->getParent()))
+        return true;
+  }
+  return false;
+}
+
+/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
+/// variables that are live out of DomBB will be marked as passing live through
+/// BB.
+void LiveVariables::addNewBlock(MachineBasicBlock *BB,
+                                MachineBasicBlock *DomBB,
+                                MachineBasicBlock *SuccBB) {
+  const unsigned NumNew = BB->getNumber();
+
+  // All registers used by PHI nodes in SuccBB must be live through BB.
+  for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(),
+         BBE = SuccBB->end(); BBI != BBE && BBI->isPHI(); ++BBI)
+    for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+      if (BBI->getOperand(i+1).getMBB() == BB)
+        getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
+
+  // Update info for all live variables
+  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    VarInfo &VI = getVarInfo(Reg);
+    if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI))
+      VI.AliveBlocks.set(NumNew);
+  }
+}
diff --git a/final/lib/CodeGen/LocalStackSlotAllocation.cpp b/final/lib/CodeGen/LocalStackSlotAllocation.cpp
new file mode 100644
index 00000000000..1318d621249
--- /dev/null
+++ b/final/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -0,0 +1,359 @@
+//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass assigns local frame indices to stack slots relative to one another
+// and allocates additional base registers to access them when the target
+// estimates they are likely to be out of range of stack pointer and frame
+// pointer relative addressing.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "localstackalloc"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+using namespace llvm;
+
+STATISTIC(NumAllocations, "Number of frame indices allocated into local block");
+STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated");
+STATISTIC(NumReplacements, "Number of frame indices references replaced");
+
+namespace {
+  class FrameRef {
+    MachineBasicBlock::iterator MI; // Instr referencing the frame
+    int64_t LocalOffset;            // Local offset of the frame idx referenced
+  public:
+    FrameRef(MachineBasicBlock::iterator I, int64_t Offset) :
+      MI(I), LocalOffset(Offset) {}
+    bool operator<(const FrameRef &RHS) const {
+      return LocalOffset < RHS.LocalOffset;
+    }
+    MachineBasicBlock::iterator getMachineInstr() { return MI; }
+  };
+
+  class LocalStackSlotPass: public MachineFunctionPass {
+    SmallVector<int64_t,16> LocalOffsets;
+
+    void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset,
+                           bool StackGrowsDown, unsigned &MaxAlign);
+    void calculateFrameObjectOffsets(MachineFunction &Fn);
+    bool insertFrameReferenceRegisters(MachineFunction &Fn);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit LocalStackSlotPass() : MachineFunctionPass(ID) { }
+    bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    const char *getPassName() const {
+      return "Local Stack Slot Allocation";
+    }
+
+  private:
+  };
+} // end anonymous namespace
+
+char LocalStackSlotPass::ID = 0;
+
+FunctionPass *llvm::createLocalStackSlotAllocationPass() {
+  return new LocalStackSlotPass();
+}
+
+bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+  unsigned LocalObjectCount = MFI->getObjectIndexEnd();
+
+  // If the target doesn't want/need this pass, or if there are no locals
+  // to consider, early exit.
+  if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0)
+    return true;
+
+  // Make sure we have enough space to store the local offsets.
+  LocalOffsets.resize(MFI->getObjectIndexEnd());
+
+  // Lay out the local blob.
+  calculateFrameObjectOffsets(MF);
+
+  // Insert virtual base registers to resolve frame index references.
+  bool UsedBaseRegs = insertFrameReferenceRegisters(MF);
+
+  // Tell MFI whether any base registers were allocated. PEI will only
+  // want to use the local block allocations from this pass if there were any.
+  // Otherwise, PEI can do a bit better job of getting the alignment right
+  // without a hole at the start since it knows the alignment of the stack
+  // at the start of local allocation, and this pass doesn't.
+  MFI->setUseLocalStackAllocationBlock(UsedBaseRegs);
+
+  return true;
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
+                                           int FrameIdx, int64_t &Offset,
+                                           bool StackGrowsDown,
+                                           unsigned &MaxAlign) {
+  // If the stack grows down, add the object size to find the lowest address.
+  if (StackGrowsDown)
+    Offset += MFI->getObjectSize(FrameIdx);
+
+  unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+  // If the alignment of this object is greater than that of the stack, then
+  // increase the stack alignment to match.
+  MaxAlign = std::max(MaxAlign, Align);
+
+  // Adjust to alignment boundary.
+  Offset = (Offset + Align - 1) / Align * Align;
+
+  int64_t LocalOffset = StackGrowsDown ? -Offset : Offset;
+  DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset "
+        << LocalOffset << "\n");
+  // Keep the offset available for base register allocation
+  LocalOffsets[FrameIdx] = LocalOffset;
+  // And tell MFI about it for PEI to use later
+  MFI->mapLocalFrameObject(FrameIdx, LocalOffset);
+
+  if (!StackGrowsDown)
+    Offset += MFI->getObjectSize(FrameIdx);
+
+  ++NumAllocations;
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
+  // Loop over all of the stack objects, assigning sequential addresses...
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+  bool StackGrowsDown =
+    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+  int64_t Offset = 0;
+  unsigned MaxAlign = 0;
+
+  // Make sure that the stack protector comes before the local variables on the
+  // stack.
+  SmallSet<int, 16> LargeStackObjs;
+  if (MFI->getStackProtectorIndex() >= 0) {
+    AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset,
+                      StackGrowsDown, MaxAlign);
+
+    // Assign large stack objects first.
+    for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+      if (MFI->isDeadObjectIndex(i))
+        continue;
+      if (MFI->getStackProtectorIndex() == (int)i)
+        continue;
+      if (!MFI->MayNeedStackProtector(i))
+        continue;
+
+      AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+      LargeStackObjs.insert(i);
+    }
+  }
+
+  // Then assign frame offsets to stack objects that are not used to spill
+  // callee saved registers.
+  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+    if (MFI->isDeadObjectIndex(i))
+      continue;
+    if (MFI->getStackProtectorIndex() == (int)i)
+      continue;
+    if (LargeStackObjs.count(i))
+      continue;
+
+    AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign);
+  }
+
+  // Remember how big this blob of stack space is
+  MFI->setLocalFrameSize(Offset);
+  MFI->setLocalFrameMaxAlign(MaxAlign);
+}
+
+static inline bool
+lookupCandidateBaseReg(const SmallVector<std::pair<unsigned, int64_t>, 8> &Regs,
+                       std::pair<unsigned, int64_t> &RegOffset,
+                       int64_t FrameSizeAdjust,
+                       int64_t LocalFrameOffset,
+                       const MachineInstr *MI,
+                       const TargetRegisterInfo *TRI) {
+  unsigned e = Regs.size();
+  for (unsigned i = 0; i < e; ++i) {
+    RegOffset = Regs[i];
+    // Check if the relative offset from the where the base register references
+    // to the target address is in range for the instruction.
+    int64_t Offset = FrameSizeAdjust + LocalFrameOffset - RegOffset.second;
+    if (TRI->isFrameOffsetLegal(MI, Offset))
+      return true;
+  }
+  return false;
+}
+
+bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
+  // Scan the function's instructions looking for frame index references.
+  // For each, ask the target if it wants a virtual base register for it
+  // based on what we can tell it about where the local will end up in the
+  // stack frame. If it wants one, re-use a suitable one we've previously
+  // allocated, or if there isn't one that fits the bill, allocate a new one
+  // and ask the target to create a defining instruction for it.
+  bool UsedBaseReg = false;
+
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+  bool StackGrowsDown =
+    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+  // Collect all of the instructions in the block that reference
+  // a frame index. Also store the frame index referenced to ease later
+  // lookup. (For any insn that has more than one FI reference, we arbitrarily
+  // choose the first one).
+  SmallVector<FrameRef, 64> FrameReferenceInsns;
+
+  // A base register definition is a register + offset pair.
+  SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
+
+  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+      MachineInstr *MI = I;
+
+      // Debug value instructions can't be out of range, so they don't need
+      // any updates.
+      if (MI->isDebugValue())
+        continue;
+
+      // For now, allocate the base register(s) within the basic block
+      // where they're used, and don't try to keep them around outside
+      // of that. It may be beneficial to try sharing them more broadly
+      // than that, but the increased register pressure makes that a
+      // tricky thing to balance. Investigate if re-materializing these
+      // becomes an issue.
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        // Consider replacing all frame index operands that reference
+        // an object allocated in the local block.
+        if (MI->getOperand(i).isFI()) {
+          // Don't try this with values not in the local block.
+          if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex()))
+            break;
+          FrameReferenceInsns.
+            push_back(FrameRef(MI, LocalOffsets[MI->getOperand(i).getIndex()]));
+          break;
+        }
+      }
+    }
+  }
+
+  // Sort the frame references by local offset
+  array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+
+  MachineBasicBlock *Entry = Fn.begin();
+
+  // Loop through the frame references and allocate for them as necessary.
+  for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
+    MachineBasicBlock::iterator I =
+      FrameReferenceInsns[ref].getMachineInstr();
+    MachineInstr *MI = I;
+    for (unsigned idx = 0, e = MI->getNumOperands(); idx != e; ++idx) {
+      // Consider replacing all frame index operands that reference
+      // an object allocated in the local block.
+      if (MI->getOperand(idx).isFI()) {
+        int FrameIdx = MI->getOperand(idx).getIndex();
+
+        assert(MFI->isObjectPreAllocated(FrameIdx) &&
+               "Only pre-allocated locals expected!");
+
+        DEBUG(dbgs() << "Considering: " << *MI);
+        if (TRI->needsFrameBaseReg(MI, LocalOffsets[FrameIdx])) {
+          unsigned BaseReg = 0;
+          int64_t Offset = 0;
+          int64_t FrameSizeAdjust =
+            StackGrowsDown ? MFI->getLocalFrameSize() : 0;
+
+          DEBUG(dbgs() << "  Replacing FI in: " << *MI);
+
+          // If we have a suitable base register available, use it; otherwise
+          // create a new one. Note that any offset encoded in the
+          // instruction itself will be taken into account by the target,
+          // so we don't have to adjust for it here when reusing a base
+          // register.
+          std::pair<unsigned, int64_t> RegOffset;
+          if (lookupCandidateBaseReg(BaseRegisters, RegOffset,
+                                     FrameSizeAdjust,
+                                     LocalOffsets[FrameIdx],
+                                     MI, TRI)) {
+            DEBUG(dbgs() << "  Reusing base register " <<
+                  RegOffset.first << "\n");
+            // We found a register to reuse.
+            BaseReg = RegOffset.first;
+            Offset = FrameSizeAdjust + LocalOffsets[FrameIdx] -
+              RegOffset.second;
+          } else {
+            // No previously defined register was in range, so create a
+            // new one.
+            int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx);
+            const TargetRegisterClass *RC = TRI->getPointerRegClass();
+            BaseReg = Fn.getRegInfo().createVirtualRegister(RC);
+
+            DEBUG(dbgs() << "  Materializing base register " << BaseReg <<
+                  " at frame local offset " <<
+                  LocalOffsets[FrameIdx] + InstrOffset << "\n");
+
+            // Tell the target to insert the instruction to initialize
+            // the base register.
+            //            MachineBasicBlock::iterator InsertionPt = Entry->begin();
+            TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
+                                              InstrOffset);
+
+            // The base register already includes any offset specified
+            // by the instruction, so account for that so it doesn't get
+            // applied twice.
+            Offset = -InstrOffset;
+
+            int64_t BaseOffset = FrameSizeAdjust + LocalOffsets[FrameIdx] +
+              InstrOffset;
+            BaseRegisters.push_back(
+              std::pair<unsigned, int64_t>(BaseReg, BaseOffset));
+            ++NumBaseRegisters;
+            UsedBaseReg = true;
+          }
+          assert(BaseReg != 0 && "Unable to allocate virtual base register!");
+
+          // Modify the instruction to use the new base register rather
+          // than the frame index operand.
+          TRI->resolveFrameIndex(I, BaseReg, Offset);
+          DEBUG(dbgs() << "Resolved: " << *MI);
+
+          ++NumReplacements;
+        }
+      }
+    }
+  }
+  return UsedBaseReg;
+}
diff --git a/final/lib/CodeGen/LowerSubregs.cpp b/final/lib/CodeGen/LowerSubregs.cpp
new file mode 100644
index 00000000000..7871ba9c17e
--- /dev/null
+++ b/final/lib/CodeGen/LowerSubregs.cpp
@@ -0,0 +1,223 @@
+//===-- LowerSubregs.cpp - Subregister Lowering instruction pass ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a MachineFunction pass which runs after register
+// allocation that turns subreg insert/extract instructions into register
+// copies, as needed. This ensures correct codegen even if the coalescer
+// isn't able to remove all subreg instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowersubregs"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  struct LowerSubregsInstructionPass : public MachineFunctionPass {
+  private:
+    const TargetRegisterInfo *TRI;
+    const TargetInstrInfo *TII;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    LowerSubregsInstructionPass() : MachineFunctionPass(ID) {}
+
+    const char *getPassName() const {
+      return "Subregister lowering instruction pass";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addPreservedID(MachineLoopInfoID);
+      AU.addPreservedID(MachineDominatorsID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    /// runOnMachineFunction - pass entry point
+    bool runOnMachineFunction(MachineFunction&);
+
+  private:
+    bool LowerSubregToReg(MachineInstr *MI);
+    bool LowerCopy(MachineInstr *MI);
+
+    void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
+                          const TargetRegisterInfo *TRI);
+    void TransferImplicitDefs(MachineInstr *MI);
+  };
+
+  char LowerSubregsInstructionPass::ID = 0;
+}
+
+FunctionPass *llvm::createLowerSubregsPass() {
+  return new LowerSubregsInstructionPass();
+}
+
+/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead,
+/// and the lowered replacement instructions immediately precede it.
+/// Mark the replacement instructions with the dead flag.
+void
+LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI,
+                                              unsigned DstReg,
+                                              const TargetRegisterInfo *TRI) {
+  for (MachineBasicBlock::iterator MII =
+        prior(MachineBasicBlock::iterator(MI)); ; --MII) {
+    if (MII->addRegisterDead(DstReg, TRI))
+      break;
+    assert(MII != MI->getParent()->begin() &&
+           "copyPhysReg output doesn't reference destination register!");
+  }
+}
+
+/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered
+/// replacement instructions immediately precede it.  Copy any implicit-def
+/// operands from MI to the replacement instruction.
+void
+LowerSubregsInstructionPass::TransferImplicitDefs(MachineInstr *MI) {
+  MachineBasicBlock::iterator CopyMI = MI;
+  --CopyMI;
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isImplicit() || MO.isUse())
+      continue;
+    CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true));
+  }
+}
+
+bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
+  MachineBasicBlock *MBB = MI->getParent();
+  assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
+         MI->getOperand(1).isImm() &&
+         (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
+          MI->getOperand(3).isImm() && "Invalid subreg_to_reg");
+
+  unsigned DstReg  = MI->getOperand(0).getReg();
+  unsigned InsReg  = MI->getOperand(2).getReg();
+  assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?");
+  unsigned SubIdx  = MI->getOperand(3).getImm();
+
+  assert(SubIdx != 0 && "Invalid index for insert_subreg");
+  unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx);
+
+  assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+         "Insert destination must be in a physical register");
+  assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
+         "Inserted value must be in a physical register");
+
+  DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
+
+  if (DstSubReg == InsReg) {
+    // No need to insert an identify copy instruction.
+    // Watch out for case like this:
+    // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3
+    // We must leave %RAX live.
+    if (DstReg != InsReg) {
+      MI->setDesc(TII->get(TargetOpcode::KILL));
+      MI->RemoveOperand(3);     // SubIdx
+      MI->RemoveOperand(1);     // Imm
+      DEBUG(dbgs() << "subreg: replace by: " << *MI);
+      return true;
+    }
+    DEBUG(dbgs() << "subreg: eliminated!");
+  } else {
+    TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,
+                     MI->getOperand(2).isKill());
+    // Transfer the kill/dead flags, if needed.
+    if (MI->getOperand(0).isDead())
+      TransferDeadFlag(MI, DstSubReg, TRI);
+    DEBUG({
+        MachineBasicBlock::iterator dMI = MI;
+        dbgs() << "subreg: " << *(--dMI);
+      });
+  }
+
+  DEBUG(dbgs() << '\n');
+  MBB->erase(MI);
+  return true;
+}
+
+bool LowerSubregsInstructionPass::LowerCopy(MachineInstr *MI) {
+  MachineOperand &DstMO = MI->getOperand(0);
+  MachineOperand &SrcMO = MI->getOperand(1);
+
+  if (SrcMO.getReg() == DstMO.getReg()) {
+    DEBUG(dbgs() << "identity copy: " << *MI);
+    // No need to insert an identity copy instruction, but replace with a KILL
+    // if liveness is changed.
+    if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) {
+      // We must make sure the super-register gets killed. Replace the
+      // instruction with KILL.
+      MI->setDesc(TII->get(TargetOpcode::KILL));
+      DEBUG(dbgs() << "replaced by:   " << *MI);
+      return true;
+    }
+    // Vanilla identity copy.
+    MI->eraseFromParent();
+    return true;
+  }
+
+  DEBUG(dbgs() << "real copy:   " << *MI);
+  TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(),
+                   DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());
+
+  if (DstMO.isDead())
+    TransferDeadFlag(MI, DstMO.getReg(), TRI);
+  if (MI->getNumOperands() > 2)
+    TransferImplicitDefs(MI);
+  DEBUG({
+    MachineBasicBlock::iterator dMI = MI;
+    dbgs() << "replaced by: " << *(--dMI);
+  });
+  MI->eraseFromParent();
+  return true;
+}
+
+/// runOnMachineFunction - Reduce subregister inserts and extracts to register
+/// copies.
+///
+bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "Machine Function\n"
+               << "********** LOWERING SUBREG INSTRS **********\n"
+               << "********** Function: "
+               << MF.getFunction()->getName() << '\n');
+  TRI = MF.getTarget().getRegisterInfo();
+  TII = MF.getTarget().getInstrInfo();
+
+  bool MadeChange = false;
+
+  for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+       mbbi != mbbe; ++mbbi) {
+    for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+         mi != me;) {
+      MachineBasicBlock::iterator nmi = llvm::next(mi);
+      MachineInstr *MI = mi;
+      assert(!MI->isInsertSubreg() && "INSERT_SUBREG should no longer appear");
+      assert(MI->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+             "EXTRACT_SUBREG should no longer appear");
+      if (MI->isSubregToReg()) {
+        MadeChange |= LowerSubregToReg(MI);
+      } else if (MI->isCopy()) {
+        MadeChange |= LowerCopy(MI);
+      }
+      mi = nmi;
+    }
+  }
+
+  return MadeChange;
+}
diff --git a/final/lib/CodeGen/MachineBasicBlock.cpp b/final/lib/CodeGen/MachineBasicBlock.cpp
new file mode 100644
index 00000000000..ccbff0af5b2
--- /dev/null
+++ b/final/lib/CodeGen/MachineBasicBlock.cpp
@@ -0,0 +1,692 @@
+//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect the sequence of machine instructions for a basic block.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrDesc.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+MachineBasicBlock::MachineBasicBlock(MachineFunction &mf, const BasicBlock *bb)
+  : BB(bb), Number(-1), xParent(&mf), Alignment(0), IsLandingPad(false),
+    AddressTaken(false) {
+  Insts.Parent = this;
+}
+
+MachineBasicBlock::~MachineBasicBlock() {
+  LeakDetector::removeGarbageObject(this);
+}
+
+/// getSymbol - Return the MCSymbol for this basic block.
+///
+MCSymbol *MachineBasicBlock::getSymbol() const {
+  const MachineFunction *MF = getParent();
+  MCContext &Ctx = MF->getContext();
+  const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
+  return Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
+                               Twine(MF->getFunctionNumber()) + "_" +
+                               Twine(getNumber()));
+}
+
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
+  MBB.print(OS);
+  return OS;
+}
+
+/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the 
+/// parent pointer of the MBB, the MBB numbering, and any instructions in the
+/// MBB to be on the right operand list for registers.
+///
+/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
+/// gets the next available unique MBB number. If it is removed from a
+/// MachineFunction, it goes back to being #-1.
+void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
+  MachineFunction &MF = *N->getParent();
+  N->Number = MF.addToMBBNumbering(N);
+
+  // Make sure the instructions have their operands in the reginfo lists.
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  for (MachineBasicBlock::iterator I = N->begin(), E = N->end(); I != E; ++I)
+    I->AddRegOperandsToUseLists(RegInfo);
+
+  LeakDetector::removeGarbageObject(N);
+}
+
+void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
+  N->getParent()->removeFromMBBNumbering(N->Number);
+  N->Number = -1;
+  LeakDetector::addGarbageObject(N);
+}
+
+
+/// addNodeToList (MI) - When we add an instruction to a basic block
+/// list, we update its parent pointer and add its operands from reg use/def
+/// lists if appropriate.
+void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
+  assert(N->getParent() == 0 && "machine instruction already in a basic block");
+  N->setParent(Parent);
+  
+  // Add the instruction's register operands to their corresponding
+  // use/def lists.
+  MachineFunction *MF = Parent->getParent();
+  N->AddRegOperandsToUseLists(MF->getRegInfo());
+
+  LeakDetector::removeGarbageObject(N);
+}
+
+/// removeNodeFromList (MI) - When we remove an instruction from a basic block
+/// list, we update its parent pointer and remove its operands from reg use/def
+/// lists if appropriate.
+void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
+  assert(N->getParent() != 0 && "machine instruction not in a basic block");
+
+  // Remove from the use/def lists.
+  N->RemoveRegOperandsFromUseLists();
+  
+  N->setParent(0);
+
+  LeakDetector::addGarbageObject(N);
+}
+
+/// transferNodesFromList (MI) - When moving a range of instructions from one
+/// MBB list to another, we need to update the parent pointers and the use/def
+/// lists.
+void ilist_traits<MachineInstr>::
+transferNodesFromList(ilist_traits<MachineInstr> &fromList,
+                      MachineBasicBlock::iterator first,
+                      MachineBasicBlock::iterator last) {
+  assert(Parent->getParent() == fromList.Parent->getParent() &&
+        "MachineInstr parent mismatch!");
+
+  // Splice within the same MBB -> no change.
+  if (Parent == fromList.Parent) return;
+
+  // If splicing between two blocks within the same function, just update the
+  // parent pointers.
+  for (; first != last; ++first)
+    first->setParent(Parent);
+}
+
+void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
+  assert(!MI->getParent() && "MI is still in a block!");
+  Parent->getParent()->DeleteMachineInstr(MI);
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
+  iterator I = begin();
+  while (I != end() && I->isPHI())
+    ++I;
+  return I;
+}
+
+MachineBasicBlock::iterator
+MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
+  while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue()))
+    ++I;
+  return I;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
+  iterator I = end();
+  while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue()))
+    ; /*noop */
+  while (I != end() && !I->getDesc().isTerminator())
+    ++I;
+  return I;
+}
+
+MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
+  iterator B = begin(), I = end();
+  while (I != B) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    return I;
+  }
+  // The block is all debug values.
+  return end();
+}
+
+const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
+  // A block with a landing pad successor only has one other successor.
+  if (succ_size() > 2)
+    return 0;
+  for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
+    if ((*I)->isLandingPad())
+      return *I;
+  return 0;
+}
+
+void MachineBasicBlock::dump() const {
+  print(dbgs());
+}
+
+StringRef MachineBasicBlock::getName() const {
+  if (const BasicBlock *LBB = getBasicBlock())
+    return LBB->getName();
+  else
+    return "(null)";
+}
+
+void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
+  const MachineFunction *MF = getParent();
+  if (!MF) {
+    OS << "Can't print out MachineBasicBlock because parent MachineFunction"
+       << " is null\n";
+    return;
+  }
+
+  if (Alignment) { OS << "Alignment " << Alignment << "\n"; }
+
+  if (Indexes)
+    OS << Indexes->getMBBStartIdx(this) << '\t';
+
+  OS << "BB#" << getNumber() << ": ";
+
+  const char *Comma = "";
+  if (const BasicBlock *LBB = getBasicBlock()) {
+    OS << Comma << "derived from LLVM BB ";
+    WriteAsOperand(OS, LBB, /*PrintType=*/false);
+    Comma = ", ";
+  }
+  if (isLandingPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; }
+  if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
+  OS << '\n';
+
+  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+  if (!livein_empty()) {
+    if (Indexes) OS << '\t';
+    OS << "    Live Ins:";
+    for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
+      OS << ' ' << PrintReg(*I, TRI);
+    OS << '\n';
+  }
+  // Print the preds of this block according to the CFG.
+  if (!pred_empty()) {
+    if (Indexes) OS << '\t';
+    OS << "    Predecessors according to CFG:";
+    for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
+      OS << " BB#" << (*PI)->getNumber();
+    OS << '\n';
+  }
+
+  for (const_iterator I = begin(); I != end(); ++I) {
+    if (Indexes) {
+      if (Indexes->hasIndex(I))
+        OS << Indexes->getInstructionIndex(I);
+      OS << '\t';
+    }
+    OS << '\t';
+    I->print(OS, &getParent()->getTarget());
+  }
+
+  // Print the successors of this block according to the CFG.
+  if (!succ_empty()) {
+    if (Indexes) OS << '\t';
+    OS << "    Successors according to CFG:";
+    for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
+      OS << " BB#" << (*SI)->getNumber();
+    OS << '\n';
+  }
+}
+
+void MachineBasicBlock::removeLiveIn(unsigned Reg) {
+  std::vector<unsigned>::iterator I =
+    std::find(LiveIns.begin(), LiveIns.end(), Reg);
+  assert(I != LiveIns.end() && "Not a live in!");
+  LiveIns.erase(I);
+}
+
+bool MachineBasicBlock::isLiveIn(unsigned Reg) const {
+  livein_iterator I = std::find(livein_begin(), livein_end(), Reg);
+  return I != livein_end();
+}
+
+void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) {
+  getParent()->splice(NewAfter, this);
+}
+
+void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
+  MachineFunction::iterator BBI = NewBefore;
+  getParent()->splice(++BBI, this);
+}
+
+void MachineBasicBlock::updateTerminator() {
+  const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+  // A block with no successors has no concerns with fall-through edges.
+  if (this->succ_empty()) return;
+
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  DebugLoc dl;  // FIXME: this is nowhere
+  bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
+  (void) B;
+  assert(!B && "UpdateTerminators requires analyzable predecessors!");
+  if (Cond.empty()) {
+    if (TBB) {
+      // The block has an unconditional branch. If its successor is now
+      // its layout successor, delete the branch.
+      if (isLayoutSuccessor(TBB))
+        TII->RemoveBranch(*this);
+    } else {
+      // The block has an unconditional fallthrough. If its successor is not
+      // its layout successor, insert a branch.
+      TBB = *succ_begin();
+      if (!isLayoutSuccessor(TBB))
+        TII->InsertBranch(*this, TBB, 0, Cond, dl);
+    }
+  } else {
+    if (FBB) {
+      // The block has a non-fallthrough conditional branch. If one of its
+      // successors is its layout successor, rewrite it to a fallthrough
+      // conditional branch.
+      if (isLayoutSuccessor(TBB)) {
+        if (TII->ReverseBranchCondition(Cond))
+          return;
+        TII->RemoveBranch(*this);
+        TII->InsertBranch(*this, FBB, 0, Cond, dl);
+      } else if (isLayoutSuccessor(FBB)) {
+        TII->RemoveBranch(*this);
+        TII->InsertBranch(*this, TBB, 0, Cond, dl);
+      }
+    } else {
+      // The block has a fallthrough conditional branch.
+      MachineBasicBlock *MBBA = *succ_begin();
+      MachineBasicBlock *MBBB = *llvm::next(succ_begin());
+      if (MBBA == TBB) std::swap(MBBB, MBBA);
+      if (isLayoutSuccessor(TBB)) {
+        if (TII->ReverseBranchCondition(Cond)) {
+          // We can't reverse the condition, add an unconditional branch.
+          Cond.clear();
+          TII->InsertBranch(*this, MBBA, 0, Cond, dl);
+          return;
+        }
+        TII->RemoveBranch(*this);
+        TII->InsertBranch(*this, MBBA, 0, Cond, dl);
+      } else if (!isLayoutSuccessor(MBBA)) {
+        TII->RemoveBranch(*this);
+        TII->InsertBranch(*this, TBB, MBBA, Cond, dl);
+      }
+    }
+  }
+}
+
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) {
+  Successors.push_back(succ);
+  succ->addPredecessor(this);
+}
+
+void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
+  succ->removePredecessor(this);
+  succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+  assert(I != Successors.end() && "Not a current successor!");
+  Successors.erase(I);
+}
+
+MachineBasicBlock::succ_iterator 
+MachineBasicBlock::removeSuccessor(succ_iterator I) {
+  assert(I != Successors.end() && "Not a current successor!");
+  (*I)->removePredecessor(this);
+  return Successors.erase(I);
+}
+
+void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
+  Predecessors.push_back(pred);
+}
+
+void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
+  std::vector<MachineBasicBlock *>::iterator I =
+    std::find(Predecessors.begin(), Predecessors.end(), pred);
+  assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
+  Predecessors.erase(I);
+}
+
+void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
+  if (this == fromMBB)
+    return;
+  
+  while (!fromMBB->succ_empty()) {
+    MachineBasicBlock *Succ = *fromMBB->succ_begin();
+    addSuccessor(Succ);
+    fromMBB->removeSuccessor(Succ);
+  }
+}
+
+void
+MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
+  if (this == fromMBB)
+    return;
+  
+  while (!fromMBB->succ_empty()) {
+    MachineBasicBlock *Succ = *fromMBB->succ_begin();
+    addSuccessor(Succ);
+    fromMBB->removeSuccessor(Succ);
+
+    // Fix up any PHI nodes in the successor.
+    for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end();
+         MI != ME && MI->isPHI(); ++MI)
+      for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (MO.getMBB() == fromMBB)
+          MO.setMBB(this);
+      }
+  }
+}
+
+bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
+  std::vector<MachineBasicBlock *>::const_iterator I =
+    std::find(Successors.begin(), Successors.end(), MBB);
+  return I != Successors.end();
+}
+
+bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
+  MachineFunction::const_iterator I(this);
+  return llvm::next(I) == MachineFunction::const_iterator(MBB);
+}
+
+bool MachineBasicBlock::canFallThrough() {
+  MachineFunction::iterator Fallthrough = this;
+  ++Fallthrough;
+  // If FallthroughBlock is off the end of the function, it can't fall through.
+  if (Fallthrough == getParent()->end())
+    return false;
+
+  // If FallthroughBlock isn't a successor, no fallthrough is possible.
+  if (!isSuccessor(Fallthrough))
+    return false;
+
+  // Analyze the branches, if any, at the end of the block.
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+  if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) {
+    // If we couldn't analyze the branch, examine the last instruction.
+    // If the block doesn't end in a known control barrier, assume fallthrough
+    // is possible. The isPredicable check is needed because this code can be
+    // called during IfConversion, where an instruction which is normally a
+    // Barrier is predicated and thus no longer an actual control barrier. This
+    // is over-conservative though, because if an instruction isn't actually
+    // predicated we could still treat it like a barrier.
+    return empty() || !back().getDesc().isBarrier() ||
+           back().getDesc().isPredicable();
+  }
+
+  // If there is no branch, control always falls through.
+  if (TBB == 0) return true;
+
+  // If there is some explicit branch to the fallthrough block, it can obviously
+  // reach, even though the branch should get folded to fall through implicitly.
+  if (MachineFunction::iterator(TBB) == Fallthrough ||
+      MachineFunction::iterator(FBB) == Fallthrough)
+    return true;
+
+  // If it's an unconditional branch to some block not the fall through, it
+  // doesn't fall through.
+  if (Cond.empty()) return false;
+
+  // Otherwise, if it is conditional and has no explicit false block, it falls
+  // through.
+  return FBB == 0;
+}
+
+MachineBasicBlock *
+MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
+  MachineFunction *MF = getParent();
+  DebugLoc dl;  // FIXME: this is nowhere
+
+  // We may need to update this's terminator, but we can't do that if
+  // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
+    return NULL;
+
+  // Avoid bugpoint weirdness: A block may end with a conditional branch but
+  // jumps to the same MBB is either case. We have duplicate CFG edges in that
+  // case that we can't handle. Since this never happens in properly optimized
+  // code, just skip those edges.
+  if (TBB && TBB == FBB) {
+    DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
+                 << getNumber() << '\n');
+    return NULL;
+  }
+
+  MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+  MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
+  DEBUG(dbgs() << "Splitting critical edge:"
+        " BB#" << getNumber()
+        << " -- BB#" << NMBB->getNumber()
+        << " -- BB#" << Succ->getNumber() << '\n');
+
+  ReplaceUsesOfBlockWith(Succ, NMBB);
+  updateTerminator();
+
+  // Insert unconditional "jump Succ" instruction in NMBB if necessary.
+  NMBB->addSuccessor(Succ);
+  if (!NMBB->isLayoutSuccessor(Succ)) {
+    Cond.clear();
+    MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl);
+  }
+
+  // Fix PHI nodes in Succ so they refer to NMBB instead of this
+  for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end();
+       i != e && i->isPHI(); ++i)
+    for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
+      if (i->getOperand(ni+1).getMBB() == this)
+        i->getOperand(ni+1).setMBB(NMBB);
+
+  if (LiveVariables *LV =
+        P->getAnalysisIfAvailable<LiveVariables>())
+    LV->addNewBlock(NMBB, this, Succ);
+
+  if (MachineDominatorTree *MDT =
+      P->getAnalysisIfAvailable<MachineDominatorTree>()) {
+    // Update dominator information.
+    MachineDomTreeNode *SucccDTNode = MDT->getNode(Succ);
+
+    bool IsNewIDom = true;
+    for (const_pred_iterator PI = Succ->pred_begin(), E = Succ->pred_end();
+         PI != E; ++PI) {
+      MachineBasicBlock *PredBB = *PI;
+      if (PredBB == NMBB)
+        continue;
+      if (!MDT->dominates(SucccDTNode, MDT->getNode(PredBB))) {
+        IsNewIDom = false;
+        break;
+      }
+    }
+
+    // We know "this" dominates the newly created basic block.
+    MachineDomTreeNode *NewDTNode = MDT->addNewBlock(NMBB, this);
+
+    // If all the other predecessors of "Succ" are dominated by "Succ" itself
+    // then the new block is the new immediate dominator of "Succ". Otherwise,
+    // the new block doesn't dominate anything.
+    if (IsNewIDom)
+      MDT->changeImmediateDominator(SucccDTNode, NewDTNode);
+  }
+
+  if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>())
+    if (MachineLoop *TIL = MLI->getLoopFor(this)) {
+      // If one or the other blocks were not in a loop, the new block is not
+      // either, and thus LI doesn't need to be updated.
+      if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
+        if (TIL == DestLoop) {
+          // Both in the same loop, the NMBB joins loop.
+          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+        } else if (TIL->contains(DestLoop)) {
+          // Edge from an outer loop to an inner loop.  Add to the outer loop.
+          TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
+        } else if (DestLoop->contains(TIL)) {
+          // Edge from an inner loop to an outer loop.  Add to the outer loop.
+          DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+        } else {
+          // Edge from two loops with no containment relation.  Because these
+          // are natural loops, we know that the destination block must be the
+          // header of its loop (adding a branch into a loop elsewhere would
+          // create an irreducible loop).
+          assert(DestLoop->getHeader() == Succ &&
+                 "Should not create irreducible loops!");
+          if (MachineLoop *P = DestLoop->getParentLoop())
+            P->addBasicBlockToLoop(NMBB, MLI->getBase());
+        }
+      }
+    }
+
+  return NMBB;
+}
+
+/// removeFromParent - This method unlinks 'this' from the containing function,
+/// and returns it, but does not delete it.
+MachineBasicBlock *MachineBasicBlock::removeFromParent() {
+  assert(getParent() && "Not embedded in a function!");
+  getParent()->remove(this);
+  return this;
+}
+
+
+/// eraseFromParent - This method unlinks 'this' from the containing function,
+/// and deletes it.
+void MachineBasicBlock::eraseFromParent() {
+  assert(getParent() && "Not embedded in a function!");
+  getParent()->erase(this);
+}
+
+
+/// ReplaceUsesOfBlockWith - Given a machine basic block that branched to
+/// 'Old', change the code and CFG so that it branches to 'New' instead.
+void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
+                                               MachineBasicBlock *New) {
+  assert(Old != New && "Cannot replace self with self!");
+
+  MachineBasicBlock::iterator I = end();
+  while (I != begin()) {
+    --I;
+    if (!I->getDesc().isTerminator()) break;
+
+    // Scan the operands of this machine instruction, replacing any uses of Old
+    // with New.
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+      if (I->getOperand(i).isMBB() &&
+          I->getOperand(i).getMBB() == Old)
+        I->getOperand(i).setMBB(New);
+  }
+
+  // Update the successor information.
+  removeSuccessor(Old);
+  addSuccessor(New);
+}
+
+/// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
+/// CFG to be inserted.  If we have proven that MBB can only branch to DestA and
+/// DestB, remove any other MBB successors from the CFG.  DestA and DestB can be
+/// null.
+/// 
+/// Besides DestA and DestB, retain other edges leading to LandingPads
+/// (currently there can be only one; we don't check or require that here).
+/// Note it is possible that DestA and/or DestB are LandingPads.
+bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
+                                             MachineBasicBlock *DestB,
+                                             bool isCond) {
+  // The values of DestA and DestB frequently come from a call to the
+  // 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial
+  // values from there.
+  //
+  // 1. If both DestA and DestB are null, then the block ends with no branches
+  //    (it falls through to its successor).
+  // 2. If DestA is set, DestB is null, and isCond is false, then the block ends
+  //    with only an unconditional branch.
+  // 3. If DestA is set, DestB is null, and isCond is true, then the block ends
+  //    with a conditional branch that falls through to a successor (DestB).
+  // 4. If DestA and DestB is set and isCond is true, then the block ends with a
+  //    conditional branch followed by an unconditional branch. DestA is the
+  //    'true' destination and DestB is the 'false' destination.
+
+  bool Changed = false;
+
+  MachineFunction::iterator FallThru =
+    llvm::next(MachineFunction::iterator(this));
+
+  if (DestA == 0 && DestB == 0) {
+    // Block falls through to successor.
+    DestA = FallThru;
+    DestB = FallThru;
+  } else if (DestA != 0 && DestB == 0) {
+    if (isCond)
+      // Block ends in conditional jump that falls through to successor.
+      DestB = FallThru;
+  } else {
+    assert(DestA && DestB && isCond &&
+           "CFG in a bad state. Cannot correct CFG edges");
+  }
+
+  // Remove superfluous edges. I.e., those which aren't destinations of this
+  // basic block, duplicate edges, or landing pads.
+  SmallPtrSet<const MachineBasicBlock*, 8> SeenMBBs;
+  MachineBasicBlock::succ_iterator SI = succ_begin();
+  while (SI != succ_end()) {
+    const MachineBasicBlock *MBB = *SI;
+    if (!SeenMBBs.insert(MBB) ||
+        (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) {
+      // This is a superfluous edge, remove it.
+      SI = removeSuccessor(SI);
+      Changed = true;
+    } else {
+      ++SI;
+    }
+  }
+
+  return Changed;
+}
+
+/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
+/// any DBG_VALUE instructions.  Return UnknownLoc if there is none.
+DebugLoc
+MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) {
+  DebugLoc DL;
+  MachineBasicBlock::iterator E = end();
+  if (MBBI != E) {
+    // Skip debug declarations, we don't want a DebugLoc from them.
+    MachineBasicBlock::iterator MBBI2 = MBBI;
+    while (MBBI2 != E && MBBI2->isDebugValue())
+      MBBI2++;
+    if (MBBI2 != E)
+      DL = MBBI2->getDebugLoc();
+  }
+  return DL;
+}
+
+void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
+                          bool t) {
+  OS << "BB#" << MBB->getNumber();
+}
+
diff --git a/final/lib/CodeGen/MachineCSE.cpp b/final/lib/CodeGen/MachineCSE.cpp
new file mode 100644
index 00000000000..07a7d27b019
--- /dev/null
+++ b/final/lib/CodeGen/MachineCSE.cpp
@@ -0,0 +1,531 @@
+//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global common subexpression elimination on machine
+// instructions using a scoped hash table based value numbering scheme. It
+// must be run while the machine function is still in SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-cse"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
+
+using namespace llvm;
+
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
+STATISTIC(NumPhysCSEs,
+          "Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
+
+namespace {
+  class MachineCSE : public MachineFunctionPass {
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    AliasAnalysis *AA;
+    MachineDominatorTree *DT;
+    MachineRegisterInfo *MRI;
+  public:
+    static char ID; // Pass identification
+    MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
+      initializeMachineCSEPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+      AU.addRequired<AliasAnalysis>();
+      AU.addPreservedID(MachineLoopInfoID);
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+    }
+
+    virtual void releaseMemory() {
+      ScopeMap.clear();
+      Exps.clear();
+    }
+
+  private:
+    const unsigned LookAheadLimit;
+    typedef RecyclingAllocator<BumpPtrAllocator,
+        ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
+    typedef ScopedHashTable<MachineInstr*, unsigned,
+        MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
+    typedef ScopedHTType::ScopeTy ScopeType;
+    DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
+    ScopedHTType VNT;
+    SmallVector<MachineInstr*, 64> Exps;
+    unsigned CurrVN;
+
+    bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
+    bool isPhysDefTriviallyDead(unsigned Reg,
+                                MachineBasicBlock::const_iterator I,
+                                MachineBasicBlock::const_iterator E) const ;
+    bool hasLivePhysRegDefUses(const MachineInstr *MI,
+                               const MachineBasicBlock *MBB,
+                               SmallSet<unsigned,8> &PhysRefs) const;
+    bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+                          SmallSet<unsigned,8> &PhysRefs) const;
+    bool isCSECandidate(MachineInstr *MI);
+    bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
+                           MachineInstr *CSMI, MachineInstr *MI);
+    void EnterScope(MachineBasicBlock *MBB);
+    void ExitScope(MachineBasicBlock *MBB);
+    bool ProcessBlock(MachineBasicBlock *MBB);
+    void ExitScopeIfDone(MachineDomTreeNode *Node,
+                 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+                 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
+    bool PerformCSE(MachineDomTreeNode *Node);
+  };
+} // end anonymous namespace
+
+char MachineCSE::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
+                "Machine Common Subexpression Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineCSE, "machine-cse",
+                "Machine Common Subexpression Elimination", false, false)
+
+FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
+
+bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
+                                          MachineBasicBlock *MBB) {
+  bool Changed = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isUse())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (!MRI->hasOneNonDBGUse(Reg))
+      // Only coalesce single use copies. This ensure the copy will be
+      // deleted.
+      continue;
+    MachineInstr *DefMI = MRI->getVRegDef(Reg);
+    if (DefMI->getParent() != MBB)
+      continue;
+    if (!DefMI->isCopy())
+      continue;
+    unsigned SrcReg = DefMI->getOperand(1).getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+      continue;
+    if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
+      continue;
+    if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
+      continue;
+    DEBUG(dbgs() << "Coalescing: " << *DefMI);
+    DEBUG(dbgs() << "***     to: " << *MI);
+    MO.setReg(SrcReg);
+    MRI->clearKillFlags(SrcReg);
+    DefMI->eraseFromParent();
+    ++NumCoalesces;
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+bool
+MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
+                                   MachineBasicBlock::const_iterator I,
+                                   MachineBasicBlock::const_iterator E) const {
+  unsigned LookAheadLeft = LookAheadLimit;
+  while (LookAheadLeft) {
+    // Skip over dbg_value's.
+    while (I != E && I->isDebugValue())
+      ++I;
+
+    if (I == E)
+      // Reached end of block, register is obviously dead.
+      return true;
+
+    bool SeenDef = false;
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = I->getOperand(i);
+      if (!MO.isReg() || !MO.getReg())
+        continue;
+      if (!TRI->regsOverlap(MO.getReg(), Reg))
+        continue;
+      if (MO.isUse())
+        // Found a use!
+        return false;
+      SeenDef = true;
+    }
+    if (SeenDef)
+      // See a def of Reg (or an alias) before encountering any use, it's 
+      // trivially dead.
+      return true;
+
+    --LookAheadLeft;
+    ++I;
+  }
+  return false;
+}
+
+/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
+/// physical registers (except for dead defs of physical registers). It also
+/// returns the physical register def by reference if it's the only one and the
+/// instruction does not uses a physical register.
+bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
+                                       const MachineBasicBlock *MBB,
+                                       SmallSet<unsigned,8> &PhysRefs) const {
+  MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    // If the def is dead, it's ok. But the def may not marked "dead". That's
+    // common since this pass is run before livevariables. We can scan
+    // forward a few instructions and check if it is obviously dead.
+    if (MO.isDef() &&
+        (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
+      continue;
+    PhysRefs.insert(Reg);
+    for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+      PhysRefs.insert(*Alias);
+  }
+
+  return !PhysRefs.empty();
+}
+
+bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+                                  SmallSet<unsigned,8> &PhysRefs) const {
+  // For now conservatively returns false if the common subexpression is
+  // not in the same basic block as the given instruction.
+  MachineBasicBlock *MBB = MI->getParent();
+  if (CSMI->getParent() != MBB)
+    return false;
+  MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
+  MachineBasicBlock::const_iterator E = MI;
+  unsigned LookAheadLeft = LookAheadLimit;
+  while (LookAheadLeft) {
+    // Skip over dbg_value's.
+    while (I != E && I->isDebugValue())
+      ++I;
+
+    if (I == E)
+      return true;
+
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = I->getOperand(i);
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      unsigned MOReg = MO.getReg();
+      if (TargetRegisterInfo::isVirtualRegister(MOReg))
+        continue;
+      if (PhysRefs.count(MOReg))
+        return false;
+    }
+
+    --LookAheadLeft;
+    ++I;
+  }
+
+  return false;
+}
+
+bool MachineCSE::isCSECandidate(MachineInstr *MI) {
+  if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
+      MI->isKill() || MI->isInlineAsm() || MI->isDebugValue())
+    return false;
+
+  // Ignore copies.
+  if (MI->isCopyLike())
+    return false;
+
+  // Ignore stuff that we obviously can't move.
+  const TargetInstrDesc &TID = MI->getDesc();  
+  if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+      MI->hasUnmodeledSideEffects())
+    return false;
+
+  if (TID.mayLoad()) {
+    // Okay, this instruction does a load. As a refinement, we allow the target
+    // to decide whether the loaded value is actually a constant. If so, we can
+    // actually use it as a load.
+    if (!MI->isInvariantLoad(AA))
+      // FIXME: we should be able to hoist loads with no other side effects if
+      // there are no other instructions which can change memory in this loop.
+      // This is a trivial form of alias analysis.
+      return false;
+  }
+  return true;
+}
+
+/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
+/// common expression that defines Reg.
+bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
+                                   MachineInstr *CSMI, MachineInstr *MI) {
+  // FIXME: Heuristics that works around the lack the live range splitting.
+
+  // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
+  // an immediate predecessor. We don't want to increase register pressure and
+  // end up causing other computation to be spilled.
+  if (MI->getDesc().isAsCheapAsAMove()) {
+    MachineBasicBlock *CSBB = CSMI->getParent();
+    MachineBasicBlock *BB = MI->getParent();
+    if (CSBB != BB && !CSBB->isSuccessor(BB))
+      return false;
+  }
+
+  // Heuristics #2: If the expression doesn't not use a vr and the only use
+  // of the redundant computation are copies, do not cse.
+  bool HasVRegUse = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isUse() &&
+        TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+      HasVRegUse = true;
+      break;
+    }
+  }
+  if (!HasVRegUse) {
+    bool HasNonCopyUse = false;
+    for (MachineRegisterInfo::use_nodbg_iterator I =  MRI->use_nodbg_begin(Reg),
+           E = MRI->use_nodbg_end(); I != E; ++I) {
+      MachineInstr *Use = &*I;
+      // Ignore copies.
+      if (!Use->isCopyLike()) {
+        HasNonCopyUse = true;
+        break;
+      }
+    }
+    if (!HasNonCopyUse)
+      return false;
+  }
+
+  // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
+  // it unless the defined value is already used in the BB of the new use.
+  bool HasPHI = false;
+  SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
+  for (MachineRegisterInfo::use_nodbg_iterator I =  MRI->use_nodbg_begin(CSReg),
+       E = MRI->use_nodbg_end(); I != E; ++I) {
+    MachineInstr *Use = &*I;
+    HasPHI |= Use->isPHI();
+    CSBBs.insert(Use->getParent());
+  }
+
+  if (!HasPHI)
+    return true;
+  return CSBBs.count(MI->getParent());
+}
+
+void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
+  ScopeType *Scope = new ScopeType(VNT);
+  ScopeMap[MBB] = Scope;
+}
+
+void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
+  DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
+  assert(SI != ScopeMap.end());
+  ScopeMap.erase(SI);
+  delete SI->second;
+}
+
+bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
+  bool Changed = false;
+
+  SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
+  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
+    MachineInstr *MI = &*I;
+    ++I;
+
+    if (!isCSECandidate(MI))
+      continue;
+
+    bool FoundCSE = VNT.count(MI);
+    if (!FoundCSE) {
+      // Look for trivial copy coalescing opportunities.
+      if (PerformTrivialCoalescing(MI, MBB)) {
+        // After coalescing MI itself may become a copy.
+        if (MI->isCopyLike())
+          continue;
+        FoundCSE = VNT.count(MI);
+      }
+    }
+
+    // Commute commutable instructions.
+    bool Commuted = false;
+    if (!FoundCSE && MI->getDesc().isCommutable()) {
+      MachineInstr *NewMI = TII->commuteInstruction(MI);
+      if (NewMI) {
+        Commuted = true;
+        FoundCSE = VNT.count(NewMI);
+        if (NewMI != MI)
+          // New instruction. It doesn't need to be kept.
+          NewMI->eraseFromParent();
+        else if (!FoundCSE)
+          // MI was changed but it didn't help, commute it back!
+          (void)TII->commuteInstruction(MI);
+      }
+    }
+
+    // If the instruction defines physical registers and the values *may* be
+    // used, then it's not safe to replace it with a common subexpression.
+    // It's also not safe if the instruction uses physical registers.
+    SmallSet<unsigned,8> PhysRefs;
+    if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) {
+      FoundCSE = false;
+
+      // ... Unless the CS is local and it also defines the physical register
+      // which is not clobbered in between and the physical register uses 
+      // were not clobbered.
+      unsigned CSVN = VNT.lookup(MI);
+      MachineInstr *CSMI = Exps[CSVN];
+      if (PhysRegDefsReach(CSMI, MI, PhysRefs))
+        FoundCSE = true;
+    }
+
+    if (!FoundCSE) {
+      VNT.insert(MI, CurrVN++);
+      Exps.push_back(MI);
+      continue;
+    }
+
+    // Found a common subexpression, eliminate it.
+    unsigned CSVN = VNT.lookup(MI);
+    MachineInstr *CSMI = Exps[CSVN];
+    DEBUG(dbgs() << "Examining: " << *MI);
+    DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
+
+    // Check if it's profitable to perform this CSE.
+    bool DoCSE = true;
+    unsigned NumDefs = MI->getDesc().getNumDefs();
+    for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      unsigned OldReg = MO.getReg();
+      unsigned NewReg = CSMI->getOperand(i).getReg();
+      if (OldReg == NewReg)
+        continue;
+      assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
+             TargetRegisterInfo::isVirtualRegister(NewReg) &&
+             "Do not CSE physical register defs!");
+      if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
+        DoCSE = false;
+        break;
+      }
+      CSEPairs.push_back(std::make_pair(OldReg, NewReg));
+      --NumDefs;
+    }
+
+    // Actually perform the elimination.
+    if (DoCSE) {
+      for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
+        MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
+        MRI->clearKillFlags(CSEPairs[i].second);
+      }
+      MI->eraseFromParent();
+      ++NumCSEs;
+      if (!PhysRefs.empty())
+        ++NumPhysCSEs;
+      if (Commuted)
+        ++NumCommutes;
+    } else {
+      DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
+      VNT.insert(MI, CurrVN++);
+      Exps.push_back(MI);
+    }
+    CSEPairs.clear();
+  }
+
+  return Changed;
+}
+
+/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
+/// dominator tree node if its a leaf or all of its children are done. Walk
+/// up the dominator tree to destroy ancestors which are now done.
+void
+MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
+                DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
+                DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
+  if (OpenChildren[Node])
+    return;
+
+  // Pop scope.
+  ExitScope(Node->getBlock());
+
+  // Now traverse upwards to pop ancestors whose offsprings are all done.
+  while (MachineDomTreeNode *Parent = ParentMap[Node]) {
+    unsigned Left = --OpenChildren[Parent];
+    if (Left != 0)
+      break;
+    ExitScope(Parent->getBlock());
+    Node = Parent;
+  }
+}
+
+bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
+  SmallVector<MachineDomTreeNode*, 32> Scopes;
+  SmallVector<MachineDomTreeNode*, 8> WorkList;
+  DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
+  DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
+
+  CurrVN = 0;
+
+  // Perform a DFS walk to determine the order of visit.
+  WorkList.push_back(Node);
+  do {
+    Node = WorkList.pop_back_val();
+    Scopes.push_back(Node);
+    const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+    unsigned NumChildren = Children.size();
+    OpenChildren[Node] = NumChildren;
+    for (unsigned i = 0; i != NumChildren; ++i) {
+      MachineDomTreeNode *Child = Children[i];
+      ParentMap[Child] = Node;
+      WorkList.push_back(Child);
+    }
+  } while (!WorkList.empty());
+
+  // Now perform CSE.
+  bool Changed = false;
+  for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
+    MachineDomTreeNode *Node = Scopes[i];
+    MachineBasicBlock *MBB = Node->getBlock();
+    EnterScope(MBB);
+    Changed |= ProcessBlock(MBB);
+    // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
+    ExitScopeIfDone(Node, OpenChildren, ParentMap);
+  }
+
+  return Changed;
+}
+
+bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
+  TII = MF.getTarget().getInstrInfo();
+  TRI = MF.getTarget().getRegisterInfo();
+  MRI = &MF.getRegInfo();
+  AA = &getAnalysis<AliasAnalysis>();
+  DT = &getAnalysis<MachineDominatorTree>();
+  return PerformCSE(DT->getRootNode());
+}
diff --git a/final/lib/CodeGen/MachineDominators.cpp b/final/lib/CodeGen/MachineDominators.cpp
new file mode 100644
index 00000000000..04c8ecbf9bd
--- /dev/null
+++ b/final/lib/CodeGen/MachineDominators.cpp
@@ -0,0 +1,59 @@
+//===- MachineDominators.cpp - Machine Dominator Calculation --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// forward dominators on machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+namespace llvm {
+TEMPLATE_INSTANTIATION(class DomTreeNodeBase<MachineBasicBlock>);
+TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
+}
+
+char MachineDominatorTree::ID = 0;
+
+INITIALIZE_PASS(MachineDominatorTree, "machinedomtree",
+                "MachineDominator Tree Construction", true, true)
+
+char &llvm::MachineDominatorsID = MachineDominatorTree::ID;
+
+void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
+  DT->recalculate(F);
+
+  return false;
+}
+
+MachineDominatorTree::MachineDominatorTree()
+    : MachineFunctionPass(ID) {
+  initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+  DT = new DominatorTreeBase<MachineBasicBlock>(false);
+}
+
+MachineDominatorTree::~MachineDominatorTree() {
+  delete DT;
+}
+
+void MachineDominatorTree::releaseMemory() {
+  DT->releaseMemory();
+}
+
+void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
+  DT->print(OS);
+}
diff --git a/final/lib/CodeGen/MachineFunction.cpp b/final/lib/CodeGen/MachineFunction.cpp
new file mode 100644
index 00000000000..d81e4a1d015
--- /dev/null
+++ b/final/lib/CodeGen/MachineFunction.cpp
@@ -0,0 +1,752 @@
+//===-- MachineFunction.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Collect native machine code information for a function.  This allows
+// target-specific information about the generated code to be stored with each
+// function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Config/config.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineFunction implementation
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+MachineFunctionInfo::~MachineFunctionInfo() {}
+
+void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
+  MBB->getParent()->DeleteMachineBasicBlock(MBB);
+}
+
+MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
+                                 unsigned FunctionNum, MachineModuleInfo &mmi,
+                                 GCModuleInfo* gmi)
+  : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) {
+  if (TM.getRegisterInfo())
+    RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo());
+  else
+    RegInfo = 0;
+  MFInfo = 0;
+  FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
+  if (Fn->hasFnAttr(Attribute::StackAlignment))
+    FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
+        Fn->getAttributes().getFnAttributes()));
+  ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData());
+  Alignment = TM.getTargetLowering()->getFunctionAlignment(F);
+  FunctionNumber = FunctionNum;
+  JumpTableInfo = 0;
+}
+
+MachineFunction::~MachineFunction() {
+  BasicBlocks.clear();
+  InstructionRecycler.clear(Allocator);
+  BasicBlockRecycler.clear(Allocator);
+  if (RegInfo) {
+    RegInfo->~MachineRegisterInfo();
+    Allocator.Deallocate(RegInfo);
+  }
+  if (MFInfo) {
+    MFInfo->~MachineFunctionInfo();
+    Allocator.Deallocate(MFInfo);
+  }
+  FrameInfo->~MachineFrameInfo();         Allocator.Deallocate(FrameInfo);
+  ConstantPool->~MachineConstantPool();   Allocator.Deallocate(ConstantPool);
+  
+  if (JumpTableInfo) {
+    JumpTableInfo->~MachineJumpTableInfo();
+    Allocator.Deallocate(JumpTableInfo);
+  }
+}
+
+/// getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it
+/// does already exist, allocate one.
+MachineJumpTableInfo *MachineFunction::
+getOrCreateJumpTableInfo(unsigned EntryKind) {
+  if (JumpTableInfo) return JumpTableInfo;
+  
+  JumpTableInfo = new (Allocator)
+    MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind);
+  return JumpTableInfo;
+}
+
+/// RenumberBlocks - This discards all of the MachineBasicBlock numbers and
+/// recomputes them.  This guarantees that the MBB numbers are sequential,
+/// dense, and match the ordering of the blocks within the function.  If a
+/// specific MachineBasicBlock is specified, only that block and those after
+/// it are renumbered.
+void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
+  if (empty()) { MBBNumbering.clear(); return; }
+  MachineFunction::iterator MBBI, E = end();
+  if (MBB == 0)
+    MBBI = begin();
+  else
+    MBBI = MBB;
+  
+  // Figure out the block number this should have.
+  unsigned BlockNo = 0;
+  if (MBBI != begin())
+    BlockNo = prior(MBBI)->getNumber()+1;
+  
+  for (; MBBI != E; ++MBBI, ++BlockNo) {
+    if (MBBI->getNumber() != (int)BlockNo) {
+      // Remove use of the old number.
+      if (MBBI->getNumber() != -1) {
+        assert(MBBNumbering[MBBI->getNumber()] == &*MBBI &&
+               "MBB number mismatch!");
+        MBBNumbering[MBBI->getNumber()] = 0;
+      }
+      
+      // If BlockNo is already taken, set that block's number to -1.
+      if (MBBNumbering[BlockNo])
+        MBBNumbering[BlockNo]->setNumber(-1);
+
+      MBBNumbering[BlockNo] = MBBI;
+      MBBI->setNumber(BlockNo);
+    }
+  }    
+
+  // Okay, all the blocks are renumbered.  If we have compactified the block
+  // numbering, shrink MBBNumbering now.
+  assert(BlockNo <= MBBNumbering.size() && "Mismatch!");
+  MBBNumbering.resize(BlockNo);
+}
+
+/// CreateMachineInstr - Allocate a new MachineInstr. Use this instead
+/// of `new MachineInstr'.
+///
+MachineInstr *
+MachineFunction::CreateMachineInstr(const TargetInstrDesc &TID,
+                                    DebugLoc DL, bool NoImp) {
+  return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+    MachineInstr(TID, DL, NoImp);
+}
+
+/// CloneMachineInstr - Create a new MachineInstr which is a copy of the
+/// 'Orig' instruction, identical in all ways except the instruction
+/// has no parent, prev, or next.
+///
+MachineInstr *
+MachineFunction::CloneMachineInstr(const MachineInstr *Orig) {
+  return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
+             MachineInstr(*this, *Orig);
+}
+
+/// DeleteMachineInstr - Delete the given MachineInstr.
+///
+void
+MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
+  MI->~MachineInstr();
+  InstructionRecycler.Deallocate(Allocator, MI);
+}
+
+/// CreateMachineBasicBlock - Allocate a new MachineBasicBlock. Use this
+/// instead of `new MachineBasicBlock'.
+///
+MachineBasicBlock *
+MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) {
+  return new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator))
+             MachineBasicBlock(*this, bb);
+}
+
+/// DeleteMachineBasicBlock - Delete the given MachineBasicBlock.
+///
+void
+MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
+  assert(MBB->getParent() == this && "MBB parent mismatch!");
+  MBB->~MachineBasicBlock();
+  BasicBlockRecycler.Deallocate(Allocator, MBB);
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
+                                      uint64_t s, unsigned base_alignment,
+                                      const MDNode *TBAAInfo) {
+  return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
+                                           TBAAInfo);
+}
+
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+                                      int64_t Offset, uint64_t Size) {
+  return new (Allocator)
+             MachineMemOperand(MachinePointerInfo(MMO->getValue(),
+                                                  MMO->getOffset()+Offset),
+                               MMO->getFlags(), Size,
+                               MMO->getBaseAlignment(), 0);
+}
+
+MachineInstr::mmo_iterator
+MachineFunction::allocateMemRefsArray(unsigned long Num) {
+  return Allocator.Allocate<MachineMemOperand *>(Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
+                                    MachineInstr::mmo_iterator End) {
+  // Count the number of load mem refs.
+  unsigned Num = 0;
+  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+    if ((*I)->isLoad())
+      ++Num;
+
+  // Allocate a new array and populate it with the load information.
+  MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+  unsigned Index = 0;
+  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+    if ((*I)->isLoad()) {
+      if (!(*I)->isStore())
+        // Reuse the MMO.
+        Result[Index] = *I;
+      else {
+        // Clone the MMO and unset the store flag.
+        MachineMemOperand *JustLoad =
+          getMachineMemOperand((*I)->getPointerInfo(),
+                               (*I)->getFlags() & ~MachineMemOperand::MOStore,
+                               (*I)->getSize(), (*I)->getBaseAlignment(),
+                               (*I)->getTBAAInfo());
+        Result[Index] = JustLoad;
+      }
+      ++Index;
+    }
+  }
+  return std::make_pair(Result, Result + Num);
+}
+
+std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator>
+MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
+                                     MachineInstr::mmo_iterator End) {
+  // Count the number of load mem refs.
+  unsigned Num = 0;
+  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I)
+    if ((*I)->isStore())
+      ++Num;
+
+  // Allocate a new array and populate it with the store information.
+  MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num);
+  unsigned Index = 0;
+  for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) {
+    if ((*I)->isStore()) {
+      if (!(*I)->isLoad())
+        // Reuse the MMO.
+        Result[Index] = *I;
+      else {
+        // Clone the MMO and unset the load flag.
+        MachineMemOperand *JustStore =
+          getMachineMemOperand((*I)->getPointerInfo(),
+                               (*I)->getFlags() & ~MachineMemOperand::MOLoad,
+                               (*I)->getSize(), (*I)->getBaseAlignment(),
+                               (*I)->getTBAAInfo());
+        Result[Index] = JustStore;
+      }
+      ++Index;
+    }
+  }
+  return std::make_pair(Result, Result + Num);
+}
+
+void MachineFunction::dump() const {
+  print(dbgs());
+}
+
+void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
+  OS << "# Machine code for function " << Fn->getName() << ":\n";
+
+  // Print Frame Information
+  FrameInfo->print(*this, OS);
+  
+  // Print JumpTable Information
+  if (JumpTableInfo)
+    JumpTableInfo->print(OS);
+
+  // Print Constant Pool
+  ConstantPool->print(OS);
+  
+  const TargetRegisterInfo *TRI = getTarget().getRegisterInfo();
+  
+  if (RegInfo && !RegInfo->livein_empty()) {
+    OS << "Function Live Ins: ";
+    for (MachineRegisterInfo::livein_iterator
+         I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) {
+      if (TRI)
+        OS << "%" << TRI->getName(I->first);
+      else
+        OS << " %physreg" << I->first;
+      
+      if (I->second)
+        OS << " in reg%" << I->second;
+
+      if (llvm::next(I) != E)
+        OS << ", ";
+    }
+    OS << '\n';
+  }
+  if (RegInfo && !RegInfo->liveout_empty()) {
+    OS << "Function Live Outs: ";
+    for (MachineRegisterInfo::liveout_iterator
+         I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I){
+      if (TRI)
+        OS << '%' << TRI->getName(*I);
+      else
+        OS << "%physreg" << *I;
+
+      if (llvm::next(I) != E)
+        OS << " ";
+    }
+    OS << '\n';
+  }
+  
+  for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
+    OS << '\n';
+    BB->print(OS, Indexes);
+  }
+
+  OS << "\n# End machine code for function " << Fn->getName() << ".\n\n";
+}
+
+namespace llvm {
+  template<>
+  struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+    static std::string getGraphName(const MachineFunction *F) {
+      return "CFG for '" + F->getFunction()->getNameStr() + "' function";
+    }
+
+    std::string getNodeLabel(const MachineBasicBlock *Node,
+                             const MachineFunction *Graph) {
+      std::string OutStr;
+      {
+        raw_string_ostream OSS(OutStr);
+
+        if (isSimple()) {
+          OSS << "BB#" << Node->getNumber();
+          if (const BasicBlock *BB = Node->getBasicBlock())
+            OSS << ": " << BB->getName();
+        } else
+          Node->print(OSS);
+      }
+
+      if (OutStr[0] == '\n') OutStr.erase(OutStr.begin());
+
+      // Process string output to make it nicer...
+      for (unsigned i = 0; i != OutStr.length(); ++i)
+        if (OutStr[i] == '\n') {                            // Left justify
+          OutStr[i] = '\\';
+          OutStr.insert(OutStr.begin()+i+1, 'l');
+        }
+      return OutStr;
+    }
+  };
+}
+
+void MachineFunction::viewCFG() const
+{
+#ifndef NDEBUG
+  ViewGraph(this, "mf" + getFunction()->getNameStr());
+#else
+  errs() << "MachineFunction::viewCFG is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+void MachineFunction::viewCFGOnly() const
+{
+#ifndef NDEBUG
+  ViewGraph(this, "mf" + getFunction()->getNameStr(), true);
+#else
+  errs() << "MachineFunction::viewCFGOnly is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
+}
+
+/// addLiveIn - Add the specified physical register as a live-in value and
+/// create a corresponding virtual register for it.
+unsigned MachineFunction::addLiveIn(unsigned PReg,
+                                    const TargetRegisterClass *RC) {
+  MachineRegisterInfo &MRI = getRegInfo();
+  unsigned VReg = MRI.getLiveInVirtReg(PReg);
+  if (VReg) {
+    assert(MRI.getRegClass(VReg) == RC && "Register class mismatch!");
+    return VReg;
+  }
+  VReg = MRI.createVirtualRegister(RC);
+  MRI.addLiveIn(PReg, VReg);
+  return VReg;
+}
+
+/// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
+/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
+/// normal 'L' label is returned.
+MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, 
+                                        bool isLinkerPrivate) const {
+  assert(JumpTableInfo && "No jump tables");
+  
+  assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
+  const MCAsmInfo &MAI = *getTarget().getMCAsmInfo();
+  
+  const char *Prefix = isLinkerPrivate ? MAI.getLinkerPrivateGlobalPrefix() :
+                                         MAI.getPrivateGlobalPrefix();
+  SmallString<60> Name;
+  raw_svector_ostream(Name)
+    << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
+/// base.
+MCSymbol *MachineFunction::getPICBaseSymbol() const {
+  const MCAsmInfo &MAI = *Target.getMCAsmInfo();
+  return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
+                               Twine(getFunctionNumber())+"$pb");
+}
+
+//===----------------------------------------------------------------------===//
+//  MachineFrameInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// CreateFixedObject - Create a new object at a fixed location on the stack.
+/// All fixed objects should be created before other objects are created for
+/// efficiency. By default, fixed objects are immutable. This returns an
+/// index with a negative value.
+///
+int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
+                                        bool Immutable) {
+  assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
+  // The alignment of the frame index can be determined from its offset from
+  // the incoming frame position.  If the frame object is at offset 32 and
+  // the stack is guaranteed to be 16-byte aligned, then we know that the
+  // object is 16-byte aligned.
+  unsigned StackAlign = TFI.getStackAlignment();
+  unsigned Align = MinAlign(SPOffset, StackAlign);
+  Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
+                                              /*isSS*/false, false));
+  return -++NumFixedObjects;
+}
+
+
+BitVector
+MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
+  assert(MBB && "MBB must be valid");
+  const MachineFunction *MF = MBB->getParent();
+  assert(MF && "MBB must be part of a MachineFunction");
+  const TargetMachine &TM = MF->getTarget();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  BitVector BV(TRI->getNumRegs());
+
+  // Before CSI is calculated, no registers are considered pristine. They can be
+  // freely used and PEI will make sure they are saved.
+  if (!isCalleeSavedInfoValid())
+    return BV;
+
+  for (const unsigned *CSR = TRI->getCalleeSavedRegs(MF); CSR && *CSR; ++CSR)
+    BV.set(*CSR);
+
+  // The entry MBB always has all CSRs pristine.
+  if (MBB == &MF->front())
+    return BV;
+
+  // On other MBBs the saved CSRs are not pristine.
+  const std::vector<CalleeSavedInfo> &CSI = getCalleeSavedInfo();
+  for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
+         E = CSI.end(); I != E; ++I)
+    BV.reset(I->getReg());
+
+  return BV;
+}
+
+
+void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
+  if (Objects.empty()) return;
+
+  const TargetFrameLowering *FI = MF.getTarget().getFrameLowering();
+  int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
+
+  OS << "Frame Objects:\n";
+
+  for (unsigned i = 0, e = Objects.size(); i != e; ++i) {
+    const StackObject &SO = Objects[i];
+    OS << "  fi#" << (int)(i-NumFixedObjects) << ": ";
+    if (SO.Size == ~0ULL) {
+      OS << "dead\n";
+      continue;
+    }
+    if (SO.Size == 0)
+      OS << "variable sized";
+    else
+      OS << "size=" << SO.Size;
+    OS << ", align=" << SO.Alignment;
+
+    if (i < NumFixedObjects)
+      OS << ", fixed";
+    if (i < NumFixedObjects || SO.SPOffset != -1) {
+      int64_t Off = SO.SPOffset - ValOffset;
+      OS << ", at location [SP";
+      if (Off > 0)
+        OS << "+" << Off;
+      else if (Off < 0)
+        OS << Off;
+      OS << "]";
+    }
+    OS << "\n";
+  }
+}
+
+void MachineFrameInfo::dump(const MachineFunction &MF) const {
+  print(MF, dbgs());
+}
+
+//===----------------------------------------------------------------------===//
+//  MachineJumpTableInfo implementation
+//===----------------------------------------------------------------------===//
+
+/// getEntrySize - Return the size of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntrySize(const TargetData &TD) const {
+  // The size of a jump table entry is 4 bytes unless the entry is just the
+  // address of a block, in which case it is the pointer size.
+  switch (getEntryKind()) {
+  case MachineJumpTableInfo::EK_BlockAddress:
+    return TD.getPointerSize();
+  case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+  case MachineJumpTableInfo::EK_LabelDifference32:
+  case MachineJumpTableInfo::EK_Custom32:
+    return 4;
+  case MachineJumpTableInfo::EK_Inline:
+    return 0;
+  }
+  assert(0 && "Unknown jump table encoding!");
+  return ~0;
+}
+
+/// getEntryAlignment - Return the alignment of each entry in the jump table.
+unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const {
+  // The alignment of a jump table entry is the alignment of int32 unless the
+  // entry is just the address of a block, in which case it is the pointer
+  // alignment.
+  switch (getEntryKind()) {
+  case MachineJumpTableInfo::EK_BlockAddress:
+    return TD.getPointerABIAlignment();
+  case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+  case MachineJumpTableInfo::EK_LabelDifference32:
+  case MachineJumpTableInfo::EK_Custom32:
+    return TD.getABIIntegerTypeAlignment(32);
+  case MachineJumpTableInfo::EK_Inline:
+    return 1;
+  }
+  assert(0 && "Unknown jump table encoding!");
+  return ~0;
+}
+
+/// createJumpTableIndex - Create a new jump table entry in the jump table info.
+///
+unsigned MachineJumpTableInfo::createJumpTableIndex(
+                               const std::vector<MachineBasicBlock*> &DestBBs) {
+  assert(!DestBBs.empty() && "Cannot create an empty jump table!");
+  JumpTables.push_back(MachineJumpTableEntry(DestBBs));
+  return JumpTables.size()-1;
+}
+
+/// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update
+/// the jump tables to branch to New instead.
+bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
+                                                  MachineBasicBlock *New) {
+  assert(Old != New && "Not making a change?");
+  bool MadeChange = false;
+  for (size_t i = 0, e = JumpTables.size(); i != e; ++i)
+    ReplaceMBBInJumpTable(i, Old, New);
+  return MadeChange;
+}
+
+/// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update
+/// the jump table to branch to New instead.
+bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
+                                                 MachineBasicBlock *Old,
+                                                 MachineBasicBlock *New) {
+  assert(Old != New && "Not making a change?");
+  bool MadeChange = false;
+  MachineJumpTableEntry &JTE = JumpTables[Idx];
+  for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
+    if (JTE.MBBs[j] == Old) {
+      JTE.MBBs[j] = New;
+      MadeChange = true;
+    }
+  return MadeChange;
+}
+
+void MachineJumpTableInfo::print(raw_ostream &OS) const {
+  if (JumpTables.empty()) return;
+
+  OS << "Jump Tables:\n";
+
+  for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
+    OS << "  jt#" << i << ": ";
+    for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j)
+      OS << " BB#" << JumpTables[i].MBBs[j]->getNumber();
+  }
+
+  OS << '\n';
+}
+
+void MachineJumpTableInfo::dump() const { print(dbgs()); }
+
+
+//===----------------------------------------------------------------------===//
+//  MachineConstantPool implementation
+//===----------------------------------------------------------------------===//
+
+const Type *MachineConstantPoolEntry::getType() const {
+  if (isMachineConstantPoolEntry())
+    return Val.MachineCPVal->getType();
+  return Val.ConstVal->getType();
+}
+
+
+unsigned MachineConstantPoolEntry::getRelocationInfo() const {
+  if (isMachineConstantPoolEntry())
+    return Val.MachineCPVal->getRelocationInfo();
+  return Val.ConstVal->getRelocationInfo();
+}
+
+MachineConstantPool::~MachineConstantPool() {
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+    if (Constants[i].isMachineConstantPoolEntry())
+      delete Constants[i].Val.MachineCPVal;
+  for (DenseSet<MachineConstantPoolValue*>::iterator I =
+       MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end();
+       I != E; ++I)
+    delete *I;
+}
+
+/// CanShareConstantPoolEntry - Test whether the given two constants
+/// can be allocated the same constant pool entry.
+static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
+                                      const TargetData *TD) {
+  // Handle the trivial case quickly.
+  if (A == B) return true;
+
+  // If they have the same type but weren't the same constant, quickly
+  // reject them.
+  if (A->getType() == B->getType()) return false;
+
+  // For now, only support constants with the same size.
+  if (TD->getTypeStoreSize(A->getType()) != TD->getTypeStoreSize(B->getType()))
+    return false;
+
+  // If a floating-point value and an integer value have the same encoding,
+  // they can share a constant-pool entry.
+  if (const ConstantFP *AFP = dyn_cast<ConstantFP>(A))
+    if (const ConstantInt *BI = dyn_cast<ConstantInt>(B))
+      return AFP->getValueAPF().bitcastToAPInt() == BI->getValue();
+  if (const ConstantFP *BFP = dyn_cast<ConstantFP>(B))
+    if (const ConstantInt *AI = dyn_cast<ConstantInt>(A))
+      return BFP->getValueAPF().bitcastToAPInt() == AI->getValue();
+
+  // Two vectors can share an entry if each pair of corresponding
+  // elements could.
+  if (const ConstantVector *AV = dyn_cast<ConstantVector>(A))
+    if (const ConstantVector *BV = dyn_cast<ConstantVector>(B)) {
+      if (AV->getType()->getNumElements() != BV->getType()->getNumElements())
+        return false;
+      for (unsigned i = 0, e = AV->getType()->getNumElements(); i != e; ++i)
+        if (!CanShareConstantPoolEntry(AV->getOperand(i),
+                                       BV->getOperand(i), TD))
+          return false;
+      return true;
+    }
+
+  // TODO: Handle other cases.
+
+  return false;
+}
+
+/// getConstantPoolIndex - Create a new entry in the constant pool or return
+/// an existing one.  User must specify the log2 of the minimum required
+/// alignment for the object.
+///
+unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, 
+                                                   unsigned Alignment) {
+  assert(Alignment && "Alignment must be specified!");
+  if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+
+  // Check to see if we already have this constant.
+  //
+  // FIXME, this could be made much more efficient for large constant pools.
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i)
+    if (!Constants[i].isMachineConstantPoolEntry() &&
+        CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, TD)) {
+      if ((unsigned)Constants[i].getAlignment() < Alignment)
+        Constants[i].Alignment = Alignment;
+      return i;
+    }
+  
+  Constants.push_back(MachineConstantPoolEntry(C, Alignment));
+  return Constants.size()-1;
+}
+
+unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V,
+                                                   unsigned Alignment) {
+  assert(Alignment && "Alignment must be specified!");
+  if (Alignment > PoolAlignment) PoolAlignment = Alignment;
+  
+  // Check to see if we already have this constant.
+  //
+  // FIXME, this could be made much more efficient for large constant pools.
+  int Idx = V->getExistingMachineCPValue(this, Alignment);
+  if (Idx != -1) {
+    MachineCPVsSharingEntries.insert(V);
+    return (unsigned)Idx;
+  }
+
+  Constants.push_back(MachineConstantPoolEntry(V, Alignment));
+  return Constants.size()-1;
+}
+
+void MachineConstantPool::print(raw_ostream &OS) const {
+  if (Constants.empty()) return;
+
+  OS << "Constant Pool:\n";
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+    OS << "  cp#" << i << ": ";
+    if (Constants[i].isMachineConstantPoolEntry())
+      Constants[i].Val.MachineCPVal->print(OS);
+    else
+      OS << *(Value*)Constants[i].Val.ConstVal;
+    OS << ", align=" << Constants[i].getAlignment();
+    OS << "\n";
+  }
+}
+
+void MachineConstantPool::dump() const { print(dbgs()); }
diff --git a/final/lib/CodeGen/MachineFunctionAnalysis.cpp b/final/lib/CodeGen/MachineFunctionAnalysis.cpp
new file mode 100644
index 00000000000..054c750c9f2
--- /dev/null
+++ b/final/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -0,0 +1,58 @@
+//===-- MachineFunctionAnalysis.cpp ---------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionAnalysis members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+using namespace llvm;
+
+char MachineFunctionAnalysis::ID = 0;
+
+MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
+                                                 CodeGenOpt::Level OL) :
+  FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) {
+  initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
+}
+
+MachineFunctionAnalysis::~MachineFunctionAnalysis() {
+  releaseMemory();
+  assert(!MF && "MachineFunctionAnalysis left initialized!");
+}
+
+void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<MachineModuleInfo>();
+}
+
+bool MachineFunctionAnalysis::doInitialization(Module &M) {
+  MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  assert(MMI && "MMI not around yet??");
+  MMI->setModule(&M);
+  NextFnNum = 0;
+  return false;
+}
+
+
+bool MachineFunctionAnalysis::runOnFunction(Function &F) {
+  assert(!MF && "MachineFunctionAnalysis already initialized!");
+  MF = new MachineFunction(&F, TM, NextFnNum++,
+                           getAnalysis<MachineModuleInfo>(),
+                           getAnalysisIfAvailable<GCModuleInfo>());
+  return false;
+}
+
+void MachineFunctionAnalysis::releaseMemory() {
+  delete MF;
+  MF = 0;
+}
diff --git a/final/lib/CodeGen/MachineFunctionPass.cpp b/final/lib/CodeGen/MachineFunctionPass.cpp
new file mode 100644
index 00000000000..e5a491270a8
--- /dev/null
+++ b/final/lib/CodeGen/MachineFunctionPass.cpp
@@ -0,0 +1,56 @@
+//===-- MachineFunctionPass.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the MachineFunctionPass members.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+using namespace llvm;
+
+Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O,
+                                             const std::string &Banner) const {
+  return createMachineFunctionPrinterPass(O, Banner);
+}
+
+bool MachineFunctionPass::runOnFunction(Function &F) {
+  // Do not codegen any 'available_externally' functions at all, they have
+  // definitions outside the translation unit.
+  if (F.hasAvailableExternallyLinkage())
+    return false;
+
+  MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF();
+  return runOnMachineFunction(MF);
+}
+
+void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<MachineFunctionAnalysis>();
+  AU.addPreserved<MachineFunctionAnalysis>();
+
+  // MachineFunctionPass preserves all LLVM IR passes, but there's no
+  // high-level way to express this. Instead, just list a bunch of
+  // passes explicitly. This does not include setPreservesCFG,
+  // because CodeGen overloads that to mean preserving the MachineBasicBlock
+  // CFG in addition to the LLVM IR CFG.
+  AU.addPreserved<AliasAnalysis>();
+  AU.addPreserved("scalar-evolution");
+  AU.addPreserved("iv-users");
+  AU.addPreserved("memdep");
+  AU.addPreserved("live-values");
+  AU.addPreserved("domtree");
+  AU.addPreserved("domfrontier");
+  AU.addPreserved("loops");
+  AU.addPreserved("lda");
+
+  FunctionPass::getAnalysisUsage(AU);
+}
diff --git a/final/lib/CodeGen/MachineFunctionPrinterPass.cpp b/final/lib/CodeGen/MachineFunctionPrinterPass.cpp
new file mode 100644
index 00000000000..2aaa798a02c
--- /dev/null
+++ b/final/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -0,0 +1,60 @@
+//===-- MachineFunctionPrinterPass.cpp ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// MachineFunctionPrinterPass implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+/// MachineFunctionPrinterPass - This is a pass to dump the IR of a
+/// MachineFunction.
+///
+struct MachineFunctionPrinterPass : public MachineFunctionPass {
+  static char ID;
+
+  raw_ostream &OS;
+  const std::string Banner;
+
+  MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) 
+      : MachineFunctionPass(ID), OS(os), Banner(banner) {}
+
+  const char *getPassName() const { return "MachineFunction Printer"; }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) {
+    OS << "# " << Banner << ":\n";
+    MF.print(OS);
+    return false;
+  }
+};
+
+char MachineFunctionPrinterPass::ID = 0;
+}
+
+namespace llvm {
+/// Returns a newly-created MachineFunction Printer pass. The
+/// default banner is empty.
+///
+MachineFunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS,
+                                                      const std::string &Banner){
+  return new MachineFunctionPrinterPass(OS, Banner);
+}
+
+}
diff --git a/final/lib/CodeGen/MachineInstr.cpp b/final/lib/CodeGen/MachineInstr.cpp
new file mode 100644
index 00000000000..dce05d3e3f4
--- /dev/null
+++ b/final/lib/CodeGen/MachineInstr.cpp
@@ -0,0 +1,1671 @@
+//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Methods common to all machine instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Metadata.h"
+#include "llvm/Type.h"
+#include "llvm/Value.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrDesc.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/FoldingSet.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineOperand Implementation
+//===----------------------------------------------------------------------===//
+
+/// AddRegOperandToRegInfo - Add this register operand to the specified
+/// MachineRegisterInfo.  If it is null, then the next/prev fields should be
+/// explicitly nulled out.
+void MachineOperand::AddRegOperandToRegInfo(MachineRegisterInfo *RegInfo) {
+  assert(isReg() && "Can only add reg operand to use lists");
+  
+  // If the reginfo pointer is null, just explicitly null out or next/prev
+  // pointers, to ensure they are not garbage.
+  if (RegInfo == 0) {
+    Contents.Reg.Prev = 0;
+    Contents.Reg.Next = 0;
+    return;
+  }
+  
+  // Otherwise, add this operand to the head of the registers use/def list.
+  MachineOperand **Head = &RegInfo->getRegUseDefListHead(getReg());
+  
+  // For SSA values, we prefer to keep the definition at the start of the list.
+  // we do this by skipping over the definition if it is at the head of the
+  // list.
+  if (*Head && (*Head)->isDef())
+    Head = &(*Head)->Contents.Reg.Next;
+  
+  Contents.Reg.Next = *Head;
+  if (Contents.Reg.Next) {
+    assert(getReg() == Contents.Reg.Next->getReg() &&
+           "Different regs on the same list!");
+    Contents.Reg.Next->Contents.Reg.Prev = &Contents.Reg.Next;
+  }
+  
+  Contents.Reg.Prev = Head;
+  *Head = this;
+}
+
+/// RemoveRegOperandFromRegInfo - Remove this register operand from the
+/// MachineRegisterInfo it is linked with.
+void MachineOperand::RemoveRegOperandFromRegInfo() {
+  assert(isOnRegUseList() && "Reg operand is not on a use list");
+  // Unlink this from the doubly linked list of operands.
+  MachineOperand *NextOp = Contents.Reg.Next;
+  *Contents.Reg.Prev = NextOp; 
+  if (NextOp) {
+    assert(NextOp->getReg() == getReg() && "Corrupt reg use/def chain!");
+    NextOp->Contents.Reg.Prev = Contents.Reg.Prev;
+  }
+  Contents.Reg.Prev = 0;
+  Contents.Reg.Next = 0;
+}
+
+void MachineOperand::setReg(unsigned Reg) {
+  if (getReg() == Reg) return; // No change.
+  
+  // Otherwise, we have to change the register.  If this operand is embedded
+  // into a machine function, we need to update the old and new register's
+  // use/def lists.
+  if (MachineInstr *MI = getParent())
+    if (MachineBasicBlock *MBB = MI->getParent())
+      if (MachineFunction *MF = MBB->getParent()) {
+        RemoveRegOperandFromRegInfo();
+        SmallContents.RegNo = Reg;
+        AddRegOperandToRegInfo(&MF->getRegInfo());
+        return;
+      }
+        
+  // Otherwise, just change the register, no problem.  :)
+  SmallContents.RegNo = Reg;
+}
+
+void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
+                                  const TargetRegisterInfo &TRI) {
+  assert(TargetRegisterInfo::isVirtualRegister(Reg));
+  if (SubIdx && getSubReg())
+    SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg());
+  setReg(Reg);
+  if (SubIdx)
+    setSubReg(SubIdx);
+}
+
+void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+  if (getSubReg()) {
+    Reg = TRI.getSubReg(Reg, getSubReg());
+    assert(Reg && "Invalid SubReg for physical register");
+    setSubReg(0);
+  }
+  setReg(Reg);
+}
+
+/// ChangeToImmediate - Replace this operand with a new immediate operand of
+/// the specified value.  If an operand is known to be an immediate already,
+/// the setImm method should be used.
+void MachineOperand::ChangeToImmediate(int64_t ImmVal) {
+  // If this operand is currently a register operand, and if this is in a
+  // function, deregister the operand from the register's use/def list.
+  if (isReg() && getParent() && getParent()->getParent() &&
+      getParent()->getParent()->getParent())
+    RemoveRegOperandFromRegInfo();
+  
+  OpKind = MO_Immediate;
+  Contents.ImmVal = ImmVal;
+}
+
+/// ChangeToRegister - Replace this operand with a new register operand of
+/// the specified value.  If an operand is known to be an register already,
+/// the setReg method should be used.
+void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
+                                      bool isKill, bool isDead, bool isUndef,
+                                      bool isDebug) {
+  // If this operand is already a register operand, use setReg to update the 
+  // register's use/def lists.
+  if (isReg()) {
+    assert(!isEarlyClobber());
+    setReg(Reg);
+  } else {
+    // Otherwise, change this to a register and set the reg#.
+    OpKind = MO_Register;
+    SmallContents.RegNo = Reg;
+
+    // If this operand is embedded in a function, add the operand to the
+    // register's use/def list.
+    if (MachineInstr *MI = getParent())
+      if (MachineBasicBlock *MBB = MI->getParent())
+        if (MachineFunction *MF = MBB->getParent())
+          AddRegOperandToRegInfo(&MF->getRegInfo());
+  }
+
+  IsDef = isDef;
+  IsImp = isImp;
+  IsKill = isKill;
+  IsDead = isDead;
+  IsUndef = isUndef;
+  IsEarlyClobber = false;
+  IsDebug = isDebug;
+  SubReg = 0;
+}
+
+/// isIdenticalTo - Return true if this operand is identical to the specified
+/// operand.
+bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
+  if (getType() != Other.getType() ||
+      getTargetFlags() != Other.getTargetFlags())
+    return false;
+  
+  switch (getType()) {
+  default: llvm_unreachable("Unrecognized operand type");
+  case MachineOperand::MO_Register:
+    return getReg() == Other.getReg() && isDef() == Other.isDef() &&
+           getSubReg() == Other.getSubReg();
+  case MachineOperand::MO_Immediate:
+    return getImm() == Other.getImm();
+  case MachineOperand::MO_FPImmediate:
+    return getFPImm() == Other.getFPImm();
+  case MachineOperand::MO_MachineBasicBlock:
+    return getMBB() == Other.getMBB();
+  case MachineOperand::MO_FrameIndex:
+    return getIndex() == Other.getIndex();
+  case MachineOperand::MO_ConstantPoolIndex:
+    return getIndex() == Other.getIndex() && getOffset() == Other.getOffset();
+  case MachineOperand::MO_JumpTableIndex:
+    return getIndex() == Other.getIndex();
+  case MachineOperand::MO_GlobalAddress:
+    return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset();
+  case MachineOperand::MO_ExternalSymbol:
+    return !strcmp(getSymbolName(), Other.getSymbolName()) &&
+           getOffset() == Other.getOffset();
+  case MachineOperand::MO_BlockAddress:
+    return getBlockAddress() == Other.getBlockAddress();
+  case MachineOperand::MO_MCSymbol:
+    return getMCSymbol() == Other.getMCSymbol();
+  case MachineOperand::MO_Metadata:
+    return getMetadata() == Other.getMetadata();
+  }
+}
+
+/// print - Print the specified machine operand.
+///
+void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
+  // If the instruction is embedded into a basic block, we can find the
+  // target info for the instruction.
+  if (!TM)
+    if (const MachineInstr *MI = getParent())
+      if (const MachineBasicBlock *MBB = MI->getParent())
+        if (const MachineFunction *MF = MBB->getParent())
+          TM = &MF->getTarget();
+  const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
+
+  switch (getType()) {
+  case MachineOperand::MO_Register:
+    OS << PrintReg(getReg(), TRI, getSubReg());
+
+    if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
+        isEarlyClobber()) {
+      OS << '<';
+      bool NeedComma = false;
+      if (isDef()) {
+        if (NeedComma) OS << ',';
+        if (isEarlyClobber())
+          OS << "earlyclobber,";
+        if (isImplicit())
+          OS << "imp-";
+        OS << "def";
+        NeedComma = true;
+      } else if (isImplicit()) {
+          OS << "imp-use";
+          NeedComma = true;
+      }
+
+      if (isKill() || isDead() || isUndef()) {
+        if (NeedComma) OS << ',';
+        if (isKill())  OS << "kill";
+        if (isDead())  OS << "dead";
+        if (isUndef()) {
+          if (isKill() || isDead())
+            OS << ',';
+          OS << "undef";
+        }
+      }
+      OS << '>';
+    }
+    break;
+  case MachineOperand::MO_Immediate:
+    OS << getImm();
+    break;
+  case MachineOperand::MO_FPImmediate:
+    if (getFPImm()->getType()->isFloatTy())
+      OS << getFPImm()->getValueAPF().convertToFloat();
+    else
+      OS << getFPImm()->getValueAPF().convertToDouble();
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    OS << "<BB#" << getMBB()->getNumber() << ">";
+    break;
+  case MachineOperand::MO_FrameIndex:
+    OS << "<fi#" << getIndex() << '>';
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    OS << "<cp#" << getIndex();
+    if (getOffset()) OS << "+" << getOffset();
+    OS << '>';
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    OS << "<jt#" << getIndex() << '>';
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    OS << "<ga:";
+    WriteAsOperand(OS, getGlobal(), /*PrintType=*/false);
+    if (getOffset()) OS << "+" << getOffset();
+    OS << '>';
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    OS << "<es:" << getSymbolName();
+    if (getOffset()) OS << "+" << getOffset();
+    OS << '>';
+    break;
+  case MachineOperand::MO_BlockAddress:
+    OS << '<';
+    WriteAsOperand(OS, getBlockAddress(), /*PrintType=*/false);
+    OS << '>';
+    break;
+  case MachineOperand::MO_Metadata:
+    OS << '<';
+    WriteAsOperand(OS, getMetadata(), /*PrintType=*/false);
+    OS << '>';
+    break;
+  case MachineOperand::MO_MCSymbol:
+    OS << "<MCSym=" << *getMCSymbol() << '>';
+    break;
+  default:
+    llvm_unreachable("Unrecognized operand type");
+  }
+  
+  if (unsigned TF = getTargetFlags())
+    OS << "[TF=" << TF << ']';
+}
+
+//===----------------------------------------------------------------------===//
+// MachineMemOperand Implementation
+//===----------------------------------------------------------------------===//
+
+/// getAddrSpace - Return the LLVM IR address space number that this pointer
+/// points into.
+unsigned MachinePointerInfo::getAddrSpace() const {
+  if (V == 0) return 0;
+  return cast<PointerType>(V->getType())->getAddressSpace();
+}
+
+/// getConstantPool - Return a MachinePointerInfo record that refers to the
+/// constant pool.
+MachinePointerInfo MachinePointerInfo::getConstantPool() {
+  return MachinePointerInfo(PseudoSourceValue::getConstantPool());
+}
+
+/// getFixedStack - Return a MachinePointerInfo record that refers to the
+/// the specified FrameIndex.
+MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) {
+  return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset);
+}
+
+MachinePointerInfo MachinePointerInfo::getJumpTable() {
+  return MachinePointerInfo(PseudoSourceValue::getJumpTable());
+}
+
+MachinePointerInfo MachinePointerInfo::getGOT() {
+  return MachinePointerInfo(PseudoSourceValue::getGOT());
+}
+
+MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
+  return MachinePointerInfo(PseudoSourceValue::getStack(), Offset);
+}
+
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
+                                     uint64_t s, unsigned int a,
+                                     const MDNode *TBAAInfo)
+  : PtrInfo(ptrinfo), Size(s),
+    Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
+    TBAAInfo(TBAAInfo) {
+  assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) &&
+         "invalid pointer value");
+  assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
+  assert((isLoad() || isStore()) && "Not a load/store!");
+}
+
+/// Profile - Gather unique data for the object.
+///
+void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
+  ID.AddInteger(getOffset());
+  ID.AddInteger(Size);
+  ID.AddPointer(getValue());
+  ID.AddInteger(Flags);
+}
+
+void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
+  // The Value and Offset may differ due to CSE. But the flags and size
+  // should be the same.
+  assert(MMO->getFlags() == getFlags() && "Flags mismatch!");
+  assert(MMO->getSize() == getSize() && "Size mismatch!");
+
+  if (MMO->getBaseAlignment() >= getBaseAlignment()) {
+    // Update the alignment value.
+    Flags = (Flags & ((1 << MOMaxBits) - 1)) |
+      ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits);
+    // Also update the base and offset, because the new alignment may
+    // not be applicable with the old ones.
+    PtrInfo = MMO->PtrInfo;
+  }
+}
+
+/// getAlignment - Return the minimum known alignment in bytes of the
+/// actual memory reference.
+uint64_t MachineMemOperand::getAlignment() const {
+  return MinAlign(getBaseAlignment(), getOffset());
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
+  assert((MMO.isLoad() || MMO.isStore()) &&
+         "SV has to be a load, store or both.");
+  
+  if (MMO.isVolatile())
+    OS << "Volatile ";
+
+  if (MMO.isLoad())
+    OS << "LD";
+  if (MMO.isStore())
+    OS << "ST";
+  OS << MMO.getSize();
+  
+  // Print the address information.
+  OS << "[";
+  if (!MMO.getValue())
+    OS << "<unknown>";
+  else
+    WriteAsOperand(OS, MMO.getValue(), /*PrintType=*/false);
+
+  // If the alignment of the memory reference itself differs from the alignment
+  // of the base pointer, print the base alignment explicitly, next to the base
+  // pointer.
+  if (MMO.getBaseAlignment() != MMO.getAlignment())
+    OS << "(align=" << MMO.getBaseAlignment() << ")";
+
+  if (MMO.getOffset() != 0)
+    OS << "+" << MMO.getOffset();
+  OS << "]";
+
+  // Print the alignment of the reference.
+  if (MMO.getBaseAlignment() != MMO.getAlignment() ||
+      MMO.getBaseAlignment() != MMO.getSize())
+    OS << "(align=" << MMO.getAlignment() << ")";
+
+  // Print TBAA info.
+  if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) {
+    OS << "(tbaa=";
+    if (TBAAInfo->getNumOperands() > 0)
+      WriteAsOperand(OS, TBAAInfo->getOperand(0), /*PrintType=*/false);
+    else
+      OS << "<unknown>";
+    OS << ")";
+  }
+
+  return OS;
+}
+
+//===----------------------------------------------------------------------===//
+// MachineInstr Implementation
+//===----------------------------------------------------------------------===//
+
+/// MachineInstr ctor - This constructor creates a dummy MachineInstr with
+/// TID NULL and no operands.
+MachineInstr::MachineInstr()
+  : TID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+    MemRefs(0), MemRefsEnd(0),
+    Parent(0) {
+  // Make sure that we get added to a machine basicblock
+  LeakDetector::addGarbageObject(this);
+}
+
+void MachineInstr::addImplicitDefUseOperands() {
+  if (TID->ImplicitDefs)
+    for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+      addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
+  if (TID->ImplicitUses)
+    for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses)
+      addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
+}
+
+/// MachineInstr ctor - This constructor creates a MachineInstr and adds the
+/// implicit operands. It reserves space for the number of operands specified by
+/// the TargetInstrDesc.
+MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
+  : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+    MemRefs(0), MemRefsEnd(0), Parent(0) {
+  if (!NoImp)
+    NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
+  Operands.reserve(NumImplicitOps + TID->getNumOperands());
+  if (!NoImp)
+    addImplicitDefUseOperands();
+  // Make sure that we get added to a machine basicblock
+  LeakDetector::addGarbageObject(this);
+}
+
+/// MachineInstr ctor - As above, but with a DebugLoc.
+MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
+                           bool NoImp)
+  : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+    MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+  if (!NoImp)
+    NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
+  Operands.reserve(NumImplicitOps + TID->getNumOperands());
+  if (!NoImp)
+    addImplicitDefUseOperands();
+  // Make sure that we get added to a machine basicblock
+  LeakDetector::addGarbageObject(this);
+}
+
+/// MachineInstr ctor - Work exactly the same as the ctor two above, except
+/// that the MachineInstr is created and added to the end of the specified 
+/// basic block.
+MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
+  : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+    MemRefs(0), MemRefsEnd(0), Parent(0) {
+  assert(MBB && "Cannot use inserting ctor with null basic block!");
+  NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
+  Operands.reserve(NumImplicitOps + TID->getNumOperands());
+  addImplicitDefUseOperands();
+  // Make sure that we get added to a machine basicblock
+  LeakDetector::addGarbageObject(this);
+  MBB->push_back(this);  // Add instruction to end of basic block!
+}
+
+/// MachineInstr ctor - As above, but with a DebugLoc.
+///
+MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
+                           const TargetInstrDesc &tid)
+  : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+    MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
+  assert(MBB && "Cannot use inserting ctor with null basic block!");
+  NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
+  Operands.reserve(NumImplicitOps + TID->getNumOperands());
+  addImplicitDefUseOperands();
+  // Make sure that we get added to a machine basicblock
+  LeakDetector::addGarbageObject(this);
+  MBB->push_back(this);  // Add instruction to end of basic block!
+}
+
+/// MachineInstr ctor - Copies MachineInstr arg exactly
+///
+MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
+  : TID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+    MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
+    Parent(0), debugLoc(MI.getDebugLoc()) {
+  Operands.reserve(MI.getNumOperands());
+
+  // Add operands
+  for (unsigned i = 0; i != MI.getNumOperands(); ++i)
+    addOperand(MI.getOperand(i));
+  NumImplicitOps = MI.NumImplicitOps;
+
+  // Copy all the flags.
+  Flags = MI.Flags;
+
+  // Set parent to null.
+  Parent = 0;
+
+  LeakDetector::addGarbageObject(this);
+}
+
+MachineInstr::~MachineInstr() {
+  LeakDetector::removeGarbageObject(this);
+#ifndef NDEBUG
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+    assert(Operands[i].ParentMI == this && "ParentMI mismatch!");
+    assert((!Operands[i].isReg() || !Operands[i].isOnRegUseList()) &&
+           "Reg operand def/use list corrupted");
+  }
+#endif
+}
+
+/// getRegInfo - If this instruction is embedded into a MachineFunction,
+/// return the MachineRegisterInfo object for the current function, otherwise
+/// return null.
+MachineRegisterInfo *MachineInstr::getRegInfo() {
+  if (MachineBasicBlock *MBB = getParent())
+    return &MBB->getParent()->getRegInfo();
+  return 0;
+}
+
+/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
+/// this instruction from their respective use lists.  This requires that the
+/// operands already be on their use lists.
+void MachineInstr::RemoveRegOperandsFromUseLists() {
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+    if (Operands[i].isReg())
+      Operands[i].RemoveRegOperandFromRegInfo();
+  }
+}
+
+/// AddRegOperandsToUseLists - Add all of the register operands in
+/// this instruction from their respective use lists.  This requires that the
+/// operands not be on their use lists yet.
+void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &RegInfo) {
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+    if (Operands[i].isReg())
+      Operands[i].AddRegOperandToRegInfo(&RegInfo);
+  }
+}
+
+
+/// addOperand - Add the specified operand to the instruction.  If it is an
+/// implicit operand, it is added to the end of the operand list.  If it is
+/// an explicit operand it is added at the end of the explicit operand list
+/// (before the first implicit operand). 
+void MachineInstr::addOperand(const MachineOperand &Op) {
+  bool isImpReg = Op.isReg() && Op.isImplicit();
+  assert((isImpReg || !OperandsComplete()) &&
+         "Trying to add an operand to a machine instr that is already done!");
+
+  MachineRegisterInfo *RegInfo = getRegInfo();
+
+  // If we are adding the operand to the end of the list, our job is simpler.
+  // This is true most of the time, so this is a reasonable optimization.
+  if (isImpReg || NumImplicitOps == 0) {
+    // We can only do this optimization if we know that the operand list won't
+    // reallocate.
+    if (Operands.empty() || Operands.size()+1 <= Operands.capacity()) {
+      Operands.push_back(Op);
+    
+      // Set the parent of the operand.
+      Operands.back().ParentMI = this;
+  
+      // If the operand is a register, update the operand's use list.
+      if (Op.isReg()) {
+        Operands.back().AddRegOperandToRegInfo(RegInfo);
+        // If the register operand is flagged as early, mark the operand as such
+        unsigned OpNo = Operands.size() - 1;
+        if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
+          Operands[OpNo].setIsEarlyClobber(true);
+      }
+      return;
+    }
+  }
+  
+  // Otherwise, we have to insert a real operand before any implicit ones.
+  unsigned OpNo = Operands.size()-NumImplicitOps;
+
+  // If this instruction isn't embedded into a function, then we don't need to
+  // update any operand lists.
+  if (RegInfo == 0) {
+    // Simple insertion, no reginfo update needed for other register operands.
+    Operands.insert(Operands.begin()+OpNo, Op);
+    Operands[OpNo].ParentMI = this;
+
+    // Do explicitly set the reginfo for this operand though, to ensure the
+    // next/prev fields are properly nulled out.
+    if (Operands[OpNo].isReg()) {
+      Operands[OpNo].AddRegOperandToRegInfo(0);
+      // If the register operand is flagged as early, mark the operand as such
+      if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
+        Operands[OpNo].setIsEarlyClobber(true);
+    }
+
+  } else if (Operands.size()+1 <= Operands.capacity()) {
+    // Otherwise, we have to remove register operands from their register use
+    // list, add the operand, then add the register operands back to their use
+    // list.  This also must handle the case when the operand list reallocates
+    // to somewhere else.
+  
+    // If insertion of this operand won't cause reallocation of the operand
+    // list, just remove the implicit operands, add the operand, then re-add all
+    // the rest of the operands.
+    for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+      assert(Operands[i].isReg() && "Should only be an implicit reg!");
+      Operands[i].RemoveRegOperandFromRegInfo();
+    }
+    
+    // Add the operand.  If it is a register, add it to the reg list.
+    Operands.insert(Operands.begin()+OpNo, Op);
+    Operands[OpNo].ParentMI = this;
+
+    if (Operands[OpNo].isReg()) {
+      Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
+      // If the register operand is flagged as early, mark the operand as such
+      if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
+        Operands[OpNo].setIsEarlyClobber(true);
+    }
+    
+    // Re-add all the implicit ops.
+    for (unsigned i = OpNo+1, e = Operands.size(); i != e; ++i) {
+      assert(Operands[i].isReg() && "Should only be an implicit reg!");
+      Operands[i].AddRegOperandToRegInfo(RegInfo);
+    }
+  } else {
+    // Otherwise, we will be reallocating the operand list.  Remove all reg
+    // operands from their list, then readd them after the operand list is
+    // reallocated.
+    RemoveRegOperandsFromUseLists();
+    
+    Operands.insert(Operands.begin()+OpNo, Op);
+    Operands[OpNo].ParentMI = this;
+  
+    // Re-add all the operands.
+    AddRegOperandsToUseLists(*RegInfo);
+
+      // If the register operand is flagged as early, mark the operand as such
+    if (Operands[OpNo].isReg()
+        && TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
+      Operands[OpNo].setIsEarlyClobber(true);
+  }
+}
+
+/// RemoveOperand - Erase an operand  from an instruction, leaving it with one
+/// fewer operand than it started with.
+///
+void MachineInstr::RemoveOperand(unsigned OpNo) {
+  assert(OpNo < Operands.size() && "Invalid operand number");
+  
+  // Special case removing the last one.
+  if (OpNo == Operands.size()-1) {
+    // If needed, remove from the reg def/use list.
+    if (Operands.back().isReg() && Operands.back().isOnRegUseList())
+      Operands.back().RemoveRegOperandFromRegInfo();
+    
+    Operands.pop_back();
+    return;
+  }
+
+  // Otherwise, we are removing an interior operand.  If we have reginfo to
+  // update, remove all operands that will be shifted down from their reg lists,
+  // move everything down, then re-add them.
+  MachineRegisterInfo *RegInfo = getRegInfo();
+  if (RegInfo) {
+    for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+      if (Operands[i].isReg())
+        Operands[i].RemoveRegOperandFromRegInfo();
+    }
+  }
+  
+  Operands.erase(Operands.begin()+OpNo);
+
+  if (RegInfo) {
+    for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) {
+      if (Operands[i].isReg())
+        Operands[i].AddRegOperandToRegInfo(RegInfo);
+    }
+  }
+}
+
+/// addMemOperand - Add a MachineMemOperand to the machine instruction.
+/// This function should be used only occasionally. The setMemRefs function
+/// is the primary method for setting up a MachineInstr's MemRefs list.
+void MachineInstr::addMemOperand(MachineFunction &MF,
+                                 MachineMemOperand *MO) {
+  mmo_iterator OldMemRefs = MemRefs;
+  mmo_iterator OldMemRefsEnd = MemRefsEnd;
+
+  size_t NewNum = (MemRefsEnd - MemRefs) + 1;
+  mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum);
+  mmo_iterator NewMemRefsEnd = NewMemRefs + NewNum;
+
+  std::copy(OldMemRefs, OldMemRefsEnd, NewMemRefs);
+  NewMemRefs[NewNum - 1] = MO;
+
+  MemRefs = NewMemRefs;
+  MemRefsEnd = NewMemRefsEnd;
+}
+
+bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
+                                 MICheckType Check) const {
+  // If opcodes or number of operands are not the same then the two
+  // instructions are obviously not identical.
+  if (Other->getOpcode() != getOpcode() ||
+      Other->getNumOperands() != getNumOperands())
+    return false;
+
+  // Check operands to make sure they match.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    const MachineOperand &OMO = Other->getOperand(i);
+    // Clients may or may not want to ignore defs when testing for equality.
+    // For example, machine CSE pass only cares about finding common
+    // subexpressions, so it's safe to ignore virtual register defs.
+    if (Check != CheckDefs && MO.isReg() && MO.isDef()) {
+      if (Check == IgnoreDefs)
+        continue;
+      // Check == IgnoreVRegDefs
+      if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+          TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
+        if (MO.getReg() != OMO.getReg())
+          return false;
+    } else if (!MO.isIdenticalTo(OMO))
+      return false;
+  }
+  return true;
+}
+
+/// removeFromParent - This method unlinks 'this' from the containing basic
+/// block, and returns it, but does not delete it.
+MachineInstr *MachineInstr::removeFromParent() {
+  assert(getParent() && "Not embedded in a basic block!");
+  getParent()->remove(this);
+  return this;
+}
+
+
+/// eraseFromParent - This method unlinks 'this' from the containing basic
+/// block, and deletes it.
+void MachineInstr::eraseFromParent() {
+  assert(getParent() && "Not embedded in a basic block!");
+  getParent()->erase(this);
+}
+
+
+/// OperandComplete - Return true if it's illegal to add a new operand
+///
+bool MachineInstr::OperandsComplete() const {
+  unsigned short NumOperands = TID->getNumOperands();
+  if (!TID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands)
+    return true;  // Broken: we have all the operands of this instruction!
+  return false;
+}
+
+/// getNumExplicitOperands - Returns the number of non-implicit operands.
+///
+unsigned MachineInstr::getNumExplicitOperands() const {
+  unsigned NumOperands = TID->getNumOperands();
+  if (!TID->isVariadic())
+    return NumOperands;
+
+  for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || !MO.isImplicit())
+      NumOperands++;
+  }
+  return NumOperands;
+}
+
+bool MachineInstr::isStackAligningInlineAsm() const {
+  if (isInlineAsm()) {
+    unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+    if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+      return true;
+  }
+  return false;
+}
+
+/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
+/// the specific register or -1 if it is not found. It further tightens
+/// the search criteria to a use that kills the register if isKill is true.
+int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
+                                          const TargetRegisterInfo *TRI) const {
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || !MO.isUse())
+      continue;
+    unsigned MOReg = MO.getReg();
+    if (!MOReg)
+      continue;
+    if (MOReg == Reg ||
+        (TRI &&
+         TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+         TargetRegisterInfo::isPhysicalRegister(Reg) &&
+         TRI->isSubRegister(MOReg, Reg)))
+      if (!isKill || MO.isKill())
+        return i;
+  }
+  return -1;
+}
+
+/// readsWritesVirtualRegister - Return a pair of bools (reads, writes)
+/// indicating if this instruction reads or writes Reg. This also considers
+/// partial defines.
+std::pair<bool,bool>
+MachineInstr::readsWritesVirtualRegister(unsigned Reg,
+                                         SmallVectorImpl<unsigned> *Ops) const {
+  bool PartDef = false; // Partial redefine.
+  bool FullDef = false; // Full define.
+  bool Use = false;
+
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || MO.getReg() != Reg)
+      continue;
+    if (Ops)
+      Ops->push_back(i);
+    if (MO.isUse())
+      Use |= !MO.isUndef();
+    else if (MO.getSubReg())
+      PartDef = true;
+    else
+      FullDef = true;
+  }
+  // A partial redefine uses Reg unless there is also a full define.
+  return std::make_pair(Use || (PartDef && !FullDef), PartDef || FullDef);
+}
+
+/// findRegisterDefOperandIdx() - Returns the operand index that is a def of
+/// the specified register or -1 if it is not found. If isDead is true, defs
+/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it
+/// also checks if there is a def of a super-register.
+int
+MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
+                                        const TargetRegisterInfo *TRI) const {
+  bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg);
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    unsigned MOReg = MO.getReg();
+    bool Found = (MOReg == Reg);
+    if (!Found && TRI && isPhys &&
+        TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+      if (Overlap)
+        Found = TRI->regsOverlap(MOReg, Reg);
+      else
+        Found = TRI->isSubRegister(MOReg, Reg);
+    }
+    if (Found && (!isDead || MO.isDead()))
+      return i;
+  }
+  return -1;
+}
+
+/// findFirstPredOperandIdx() - Find the index of the first operand in the
+/// operand list that is used to represent the predicate. It returns -1 if
+/// none is found.
+int MachineInstr::findFirstPredOperandIdx() const {
+  const TargetInstrDesc &TID = getDesc();
+  if (TID.isPredicable()) {
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+      if (TID.OpInfo[i].isPredicate())
+        return i;
+  }
+
+  return -1;
+}
+  
+/// isRegTiedToUseOperand - Given the index of a register def operand,
+/// check if the register def is tied to a source operand, due to either
+/// two-address elimination or inline assembly constraints. Returns the
+/// first tied use operand index by reference is UseOpIdx is not null.
+bool MachineInstr::
+isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
+  if (isInlineAsm()) {
+    assert(DefOpIdx > InlineAsm::MIOp_FirstOperand);
+    const MachineOperand &MO = getOperand(DefOpIdx);
+    if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+      return false;
+    // Determine the actual operand index that corresponds to this index.
+    unsigned DefNo = 0;
+    unsigned DefPart = 0;
+    for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
+         i < e; ) {
+      const MachineOperand &FMO = getOperand(i);
+      // After the normal asm operands there may be additional imp-def regs.
+      if (!FMO.isImm())
+        return false;
+      // Skip over this def.
+      unsigned NumOps = InlineAsm::getNumOperandRegisters(FMO.getImm());
+      unsigned PrevDef = i + 1;
+      i = PrevDef + NumOps;
+      if (i > DefOpIdx) {
+        DefPart = DefOpIdx - PrevDef;
+        break;
+      }
+      ++DefNo;
+    }
+    for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
+         i != e; ++i) {
+      const MachineOperand &FMO = getOperand(i);
+      if (!FMO.isImm())
+        continue;
+      if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse())
+        continue;
+      unsigned Idx;
+      if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) &&
+          Idx == DefNo) {
+        if (UseOpIdx)
+          *UseOpIdx = (unsigned)i + 1 + DefPart;
+        return true;
+      }
+    }
+    return false;
+  }
+
+  assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!");
+  const TargetInstrDesc &TID = getDesc();
+  for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (MO.isReg() && MO.isUse() &&
+        TID.getOperandConstraint(i, TOI::TIED_TO) == (int)DefOpIdx) {
+      if (UseOpIdx)
+        *UseOpIdx = (unsigned)i;
+      return true;
+    }
+  }
+  return false;
+}
+
+/// isRegTiedToDefOperand - Return true if the operand of the specified index
+/// is a register use and it is tied to an def operand. It also returns the def
+/// operand index by reference.
+bool MachineInstr::
+isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
+  if (isInlineAsm()) {
+    const MachineOperand &MO = getOperand(UseOpIdx);
+    if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0)
+      return false;
+
+    // Find the flag operand corresponding to UseOpIdx
+    unsigned FlagIdx, NumOps=0;
+    for (FlagIdx = InlineAsm::MIOp_FirstOperand;
+         FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
+      const MachineOperand &UFMO = getOperand(FlagIdx);
+      // After the normal asm operands there may be additional imp-def regs.
+      if (!UFMO.isImm())
+        return false;
+      NumOps = InlineAsm::getNumOperandRegisters(UFMO.getImm());
+      assert(NumOps < getNumOperands() && "Invalid inline asm flag");
+      if (UseOpIdx < FlagIdx+NumOps+1)
+        break;
+    }
+    if (FlagIdx >= UseOpIdx)
+      return false;
+    const MachineOperand &UFMO = getOperand(FlagIdx);
+    unsigned DefNo;
+    if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) {
+      if (!DefOpIdx)
+        return true;
+
+      unsigned DefIdx = InlineAsm::MIOp_FirstOperand;
+      // Remember to adjust the index. First operand is asm string, second is
+      // the HasSideEffects and AlignStack bits, then there is a flag for each.
+      while (DefNo) {
+        const MachineOperand &FMO = getOperand(DefIdx);
+        assert(FMO.isImm());
+        // Skip over this def.
+        DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
+        --DefNo;
+      }
+      *DefOpIdx = DefIdx + UseOpIdx - FlagIdx;
+      return true;
+    }
+    return false;
+  }
+
+  const TargetInstrDesc &TID = getDesc();
+  if (UseOpIdx >= TID.getNumOperands())
+    return false;
+  const MachineOperand &MO = getOperand(UseOpIdx);
+  if (!MO.isReg() || !MO.isUse())
+    return false;
+  int DefIdx = TID.getOperandConstraint(UseOpIdx, TOI::TIED_TO);
+  if (DefIdx == -1)
+    return false;
+  if (DefOpIdx)
+    *DefOpIdx = (unsigned)DefIdx;
+  return true;
+}
+
+/// clearKillInfo - Clears kill flags on all operands.
+///
+void MachineInstr::clearKillInfo() {
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = getOperand(i);
+    if (MO.isReg() && MO.isUse())
+      MO.setIsKill(false);
+  }
+}
+
+/// copyKillDeadInfo - Copies kill / dead operand properties from MI.
+///
+void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || (!MO.isKill() && !MO.isDead()))
+      continue;
+    for (unsigned j = 0, ee = getNumOperands(); j != ee; ++j) {
+      MachineOperand &MOp = getOperand(j);
+      if (!MOp.isIdenticalTo(MO))
+        continue;
+      if (MO.isKill())
+        MOp.setIsKill();
+      else
+        MOp.setIsDead();
+      break;
+    }
+  }
+}
+
+/// copyPredicates - Copies predicate operand(s) from MI.
+void MachineInstr::copyPredicates(const MachineInstr *MI) {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.isPredicable())
+    return;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    if (TID.OpInfo[i].isPredicate()) {
+      // Predicated operands must be last operands.
+      addOperand(MI->getOperand(i));
+    }
+  }
+}
+
+void MachineInstr::substituteRegister(unsigned FromReg,
+                                      unsigned ToReg,
+                                      unsigned SubIdx,
+                                      const TargetRegisterInfo &RegInfo) {
+  if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
+    if (SubIdx)
+      ToReg = RegInfo.getSubReg(ToReg, SubIdx);
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = getOperand(i);
+      if (!MO.isReg() || MO.getReg() != FromReg)
+        continue;
+      MO.substPhysReg(ToReg, RegInfo);
+    }
+  } else {
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = getOperand(i);
+      if (!MO.isReg() || MO.getReg() != FromReg)
+        continue;
+      MO.substVirtReg(ToReg, SubIdx, RegInfo);
+    }
+  }
+}
+
+/// isSafeToMove - Return true if it is safe to move this instruction. If
+/// SawStore is set to true, it means that there is a store (or call) between
+/// the instruction's location and its intended destination.
+bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
+                                AliasAnalysis *AA,
+                                bool &SawStore) const {
+  // Ignore stuff that we obviously can't move.
+  if (TID->mayStore() || TID->isCall()) {
+    SawStore = true;
+    return false;
+  }
+
+  if (isLabel() || isDebugValue() ||
+      TID->isTerminator() || hasUnmodeledSideEffects())
+    return false;
+
+  // See if this instruction does a load.  If so, we have to guarantee that the
+  // loaded value doesn't change between the load and the its intended
+  // destination. The check for isInvariantLoad gives the targe the chance to
+  // classify the load as always returning a constant, e.g. a constant pool
+  // load.
+  if (TID->mayLoad() && !isInvariantLoad(AA))
+    // Otherwise, this is a real load.  If there is a store between the load and
+    // end of block, or if the load is volatile, we can't move it.
+    return !SawStore && !hasVolatileMemoryRef();
+
+  return true;
+}
+
+/// isSafeToReMat - Return true if it's safe to rematerialize the specified
+/// instruction which defined the specified register instead of copying it.
+bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
+                                 AliasAnalysis *AA,
+                                 unsigned DstReg) const {
+  bool SawStore = false;
+  if (!TII->isTriviallyReMaterializable(this, AA) ||
+      !isSafeToMove(TII, AA, SawStore))
+    return false;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (!MO.isReg())
+      continue;
+    // FIXME: For now, do not remat any instruction with register operands.
+    // Later on, we can loosen the restriction is the register operands have
+    // not been modified between the def and use. Note, this is different from
+    // MachineSink because the code is no longer in two-address form (at least
+    // partially).
+    if (MO.isUse())
+      return false;
+    else if (!MO.isDead() && MO.getReg() != DstReg)
+      return false;
+  }
+  return true;
+}
+
+/// hasVolatileMemoryRef - Return true if this instruction may have a
+/// volatile memory reference, or if the information describing the
+/// memory reference is not available. Return false if it is known to
+/// have no volatile memory references.
+bool MachineInstr::hasVolatileMemoryRef() const {
+  // An instruction known never to access memory won't have a volatile access.
+  if (!TID->mayStore() &&
+      !TID->mayLoad() &&
+      !TID->isCall() &&
+      !hasUnmodeledSideEffects())
+    return false;
+
+  // Otherwise, if the instruction has no memory reference information,
+  // conservatively assume it wasn't preserved.
+  if (memoperands_empty())
+    return true;
+  
+  // Check the memory reference information for volatile references.
+  for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I)
+    if ((*I)->isVolatile())
+      return true;
+
+  return false;
+}
+
+/// isInvariantLoad - Return true if this instruction is loading from a
+/// location whose value is invariant across the function.  For example,
+/// loading a value from the constant pool or from the argument area
+/// of a function if it does not change.  This should only return true of
+/// *all* loads the instruction does are invariant (if it does multiple loads).
+bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
+  // If the instruction doesn't load at all, it isn't an invariant load.
+  if (!TID->mayLoad())
+    return false;
+
+  // If the instruction has lost its memoperands, conservatively assume that
+  // it may not be an invariant load.
+  if (memoperands_empty())
+    return false;
+
+  const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo();
+
+  for (mmo_iterator I = memoperands_begin(),
+       E = memoperands_end(); I != E; ++I) {
+    if ((*I)->isVolatile()) return false;
+    if ((*I)->isStore()) return false;
+
+    if (const Value *V = (*I)->getValue()) {
+      // A load from a constant PseudoSourceValue is invariant.
+      if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
+        if (PSV->isConstant(MFI))
+          continue;
+      // If we have an AliasAnalysis, ask it whether the memory is constant.
+      if (AA && AA->pointsToConstantMemory(
+                      AliasAnalysis::Location(V, (*I)->getSize(),
+                                              (*I)->getTBAAInfo())))
+        continue;
+    }
+
+    // Otherwise assume conservatively.
+    return false;
+  }
+
+  // Everything checks out.
+  return true;
+}
+
+/// isConstantValuePHI - If the specified instruction is a PHI that always
+/// merges together the same virtual register, return the register, otherwise
+/// return 0.
+unsigned MachineInstr::isConstantValuePHI() const {
+  if (!isPHI())
+    return 0;
+  assert(getNumOperands() >= 3 &&
+         "It's illegal to have a PHI without source operands");
+
+  unsigned Reg = getOperand(1).getReg();
+  for (unsigned i = 3, e = getNumOperands(); i < e; i += 2)
+    if (getOperand(i).getReg() != Reg)
+      return 0;
+  return Reg;
+}
+
+bool MachineInstr::hasUnmodeledSideEffects() const {
+  if (getDesc().hasUnmodeledSideEffects())
+    return true;
+  if (isInlineAsm()) {
+    unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+    if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+      return true;
+  }
+
+  return false;
+}
+
+/// allDefsAreDead - Return true if all the defs of this instruction are dead.
+///
+bool MachineInstr::allDefsAreDead() const {
+  for (unsigned i = 0, e = getNumOperands(); i < e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || MO.isUse())
+      continue;
+    if (!MO.isDead())
+      return false;
+  }
+  return true;
+}
+
+/// copyImplicitOps - Copy implicit register operands from specified
+/// instruction to this instruction.
+void MachineInstr::copyImplicitOps(const MachineInstr *MI) {
+  for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
+       i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isImplicit())
+      addOperand(MO);
+  }
+}
+
+void MachineInstr::dump() const {
+  dbgs() << "  " << *this;
+}
+
+static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, 
+                         raw_ostream &CommentOS) {
+  const LLVMContext &Ctx = MF->getFunction()->getContext();
+  if (!DL.isUnknown()) {          // Print source line info.
+    DIScope Scope(DL.getScope(Ctx));
+    // Omit the directory, because it's likely to be long and uninteresting.
+    if (Scope.Verify())
+      CommentOS << Scope.getFilename();
+    else
+      CommentOS << "<unknown>";
+    CommentOS << ':' << DL.getLine();
+    if (DL.getCol() != 0)
+      CommentOS << ':' << DL.getCol();
+    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
+    if (!InlinedAtDL.isUnknown()) {
+      CommentOS << " @[ ";
+      printDebugLoc(InlinedAtDL, MF, CommentOS);
+      CommentOS << " ]";
+    }
+  }
+}
+
+void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
+  // We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
+  const MachineFunction *MF = 0;
+  const MachineRegisterInfo *MRI = 0;
+  if (const MachineBasicBlock *MBB = getParent()) {
+    MF = MBB->getParent();
+    if (!TM && MF)
+      TM = &MF->getTarget();
+    if (MF)
+      MRI = &MF->getRegInfo();
+  }
+
+  // Save a list of virtual registers.
+  SmallVector<unsigned, 8> VirtRegs;
+
+  // Print explicitly defined operands on the left of an assignment syntax.
+  unsigned StartOp = 0, e = getNumOperands();
+  for (; StartOp < e && getOperand(StartOp).isReg() &&
+         getOperand(StartOp).isDef() &&
+         !getOperand(StartOp).isImplicit();
+       ++StartOp) {
+    if (StartOp != 0) OS << ", ";
+    getOperand(StartOp).print(OS, TM);
+    unsigned Reg = getOperand(StartOp).getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      VirtRegs.push_back(Reg);
+  }
+
+  if (StartOp != 0)
+    OS << " = ";
+
+  // Print the opcode name.
+  OS << getDesc().getName();
+
+  // Print the rest of the operands.
+  bool OmittedAnyCallClobbers = false;
+  bool FirstOp = true;
+
+  if (isInlineAsm()) {
+    // Print asm string.
+    OS << " ";
+    getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
+
+    // Print HasSideEffects, IsAlignStack
+    unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+    if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+      OS << " [sideeffect]";
+    if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+      OS << " [alignstack]";
+
+    StartOp = InlineAsm::MIOp_FirstOperand;
+    FirstOp = false;
+  }
+
+
+  for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = getOperand(i);
+
+    if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+      VirtRegs.push_back(MO.getReg());
+
+    // Omit call-clobbered registers which aren't used anywhere. This makes
+    // call instructions much less noisy on targets where calls clobber lots
+    // of registers. Don't rely on MO.isDead() because we may be called before
+    // LiveVariables is run, or we may be looking at a non-allocatable reg.
+    if (MF && getDesc().isCall() &&
+        MO.isReg() && MO.isImplicit() && MO.isDef()) {
+      unsigned Reg = MO.getReg();
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        const MachineRegisterInfo &MRI = MF->getRegInfo();
+        if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
+          bool HasAliasLive = false;
+          for (const unsigned *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
+               unsigned AliasReg = *Alias; ++Alias)
+            if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) {
+              HasAliasLive = true;
+              break;
+            }
+          if (!HasAliasLive) {
+            OmittedAnyCallClobbers = true;
+            continue;
+          }
+        }
+      }
+    }
+
+    if (FirstOp) FirstOp = false; else OS << ",";
+    OS << " ";
+    if (i < getDesc().NumOperands) {
+      const TargetOperandInfo &TOI = getDesc().OpInfo[i];
+      if (TOI.isPredicate())
+        OS << "pred:";
+      if (TOI.isOptionalDef())
+        OS << "opt:";
+    }
+    if (isDebugValue() && MO.isMetadata()) {
+      // Pretty print DBG_VALUE instructions.
+      const MDNode *MD = MO.getMetadata();
+      if (const MDString *MDS = dyn_cast<MDString>(MD->getOperand(2)))
+        OS << "!\"" << MDS->getString() << '\"';
+      else
+        MO.print(OS, TM);
+    } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
+      OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm());
+    } else
+      MO.print(OS, TM);
+  }
+
+  // Briefly indicate whether any call clobbers were omitted.
+  if (OmittedAnyCallClobbers) {
+    if (!FirstOp) OS << ",";
+    OS << " ...";
+  }
+
+  bool HaveSemi = false;
+  if (Flags) {
+    if (!HaveSemi) OS << ";"; HaveSemi = true;
+    OS << " flags: ";
+
+    if (Flags & FrameSetup)
+      OS << "FrameSetup";
+  }
+
+  if (!memoperands_empty()) {
+    if (!HaveSemi) OS << ";"; HaveSemi = true;
+
+    OS << " mem:";
+    for (mmo_iterator i = memoperands_begin(), e = memoperands_end();
+         i != e; ++i) {
+      OS << **i;
+      if (llvm::next(i) != e)
+        OS << " ";
+    }
+  }
+
+  // Print the regclass of any virtual registers encountered.
+  if (MRI && !VirtRegs.empty()) {
+    if (!HaveSemi) OS << ";"; HaveSemi = true;
+    for (unsigned i = 0; i != VirtRegs.size(); ++i) {
+      const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
+      OS << " " << RC->getName() << ':' << PrintReg(VirtRegs[i]);
+      for (unsigned j = i+1; j != VirtRegs.size();) {
+        if (MRI->getRegClass(VirtRegs[j]) != RC) {
+          ++j;
+          continue;
+        }
+        if (VirtRegs[i] != VirtRegs[j])
+          OS << "," << PrintReg(VirtRegs[j]);
+        VirtRegs.erase(VirtRegs.begin()+j);
+      }
+    }
+  }
+
+  // Print debug location information.
+  if (!debugLoc.isUnknown() && MF) {
+    if (!HaveSemi) OS << ";"; HaveSemi = true;
+    OS << " dbg:";
+    printDebugLoc(debugLoc, MF, OS);
+  }
+
+  OS << '\n';
+}
+
+bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
+                                     const TargetRegisterInfo *RegInfo,
+                                     bool AddIfNotFound) {
+  bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+  bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg);
+  bool Found = false;
+  SmallVector<unsigned,4> DeadOps;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || !MO.isUse() || MO.isUndef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+
+    if (Reg == IncomingReg) {
+      if (!Found) {
+        if (MO.isKill())
+          // The register is already marked kill.
+          return true;
+        if (isPhysReg && isRegTiedToDefOperand(i))
+          // Two-address uses of physregs must not be marked kill.
+          return true;
+        MO.setIsKill();
+        Found = true;
+      }
+    } else if (hasAliases && MO.isKill() &&
+               TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      // A super-register kill already exists.
+      if (RegInfo->isSuperRegister(IncomingReg, Reg))
+        return true;
+      if (RegInfo->isSubRegister(IncomingReg, Reg))
+        DeadOps.push_back(i);
+    }
+  }
+
+  // Trim unneeded kill operands.
+  while (!DeadOps.empty()) {
+    unsigned OpIdx = DeadOps.back();
+    if (getOperand(OpIdx).isImplicit())
+      RemoveOperand(OpIdx);
+    else
+      getOperand(OpIdx).setIsKill(false);
+    DeadOps.pop_back();
+  }
+
+  // If not found, this means an alias of one of the operands is killed. Add a
+  // new implicit operand if required.
+  if (!Found && AddIfNotFound) {
+    addOperand(MachineOperand::CreateReg(IncomingReg,
+                                         false /*IsDef*/,
+                                         true  /*IsImp*/,
+                                         true  /*IsKill*/));
+    return true;
+  }
+  return Found;
+}
+
+bool MachineInstr::addRegisterDead(unsigned IncomingReg,
+                                   const TargetRegisterInfo *RegInfo,
+                                   bool AddIfNotFound) {
+  bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+  bool hasAliases = isPhysReg && RegInfo->getAliasSet(IncomingReg);
+  bool Found = false;
+  SmallVector<unsigned,4> DeadOps;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+
+    if (Reg == IncomingReg) {
+      if (!Found) {
+        if (MO.isDead())
+          // The register is already marked dead.
+          return true;
+        MO.setIsDead();
+        Found = true;
+      }
+    } else if (hasAliases && MO.isDead() &&
+               TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      // There exists a super-register that's marked dead.
+      if (RegInfo->isSuperRegister(IncomingReg, Reg))
+        return true;
+      if (RegInfo->getSubRegisters(IncomingReg) &&
+          RegInfo->getSuperRegisters(Reg) &&
+          RegInfo->isSubRegister(IncomingReg, Reg))
+        DeadOps.push_back(i);
+    }
+  }
+
+  // Trim unneeded dead operands.
+  while (!DeadOps.empty()) {
+    unsigned OpIdx = DeadOps.back();
+    if (getOperand(OpIdx).isImplicit())
+      RemoveOperand(OpIdx);
+    else
+      getOperand(OpIdx).setIsDead(false);
+    DeadOps.pop_back();
+  }
+
+  // If not found, this means an alias of one of the operands is dead. Add a
+  // new implicit operand if required.
+  if (Found || !AddIfNotFound)
+    return Found;
+    
+  addOperand(MachineOperand::CreateReg(IncomingReg,
+                                       true  /*IsDef*/,
+                                       true  /*IsImp*/,
+                                       false /*IsKill*/,
+                                       true  /*IsDead*/));
+  return true;
+}
+
+void MachineInstr::addRegisterDefined(unsigned IncomingReg,
+                                      const TargetRegisterInfo *RegInfo) {
+  if (TargetRegisterInfo::isPhysicalRegister(IncomingReg)) {
+    MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo);
+    if (MO)
+      return;
+  } else {
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = getOperand(i);
+      if (MO.isReg() && MO.getReg() == IncomingReg && MO.isDef() &&
+          MO.getSubReg() == 0)
+        return;
+    }
+  }
+  addOperand(MachineOperand::CreateReg(IncomingReg,
+                                       true  /*IsDef*/,
+                                       true  /*IsImp*/));
+}
+
+void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRegs,
+                                         const TargetRegisterInfo &TRI) {
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = getOperand(i);
+    if (!MO.isReg() || !MO.isDef()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+    bool Dead = true;
+    for (SmallVectorImpl<unsigned>::const_iterator I = UsedRegs.begin(),
+         E = UsedRegs.end(); I != E; ++I)
+      if (TRI.regsOverlap(*I, Reg)) {
+        Dead = false;
+        break;
+      }
+    // If there are no uses, including partial uses, the def is dead.
+    if (Dead) MO.setIsDead();
+  }
+}
+
+unsigned
+MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
+  unsigned Hash = MI->getOpcode() * 37;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    uint64_t Key = (uint64_t)MO.getType() << 32;
+    switch (MO.getType()) {
+    default: break;
+    case MachineOperand::MO_Register:
+      if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;  // Skip virtual register defs.
+      Key |= MO.getReg();
+      break;
+    case MachineOperand::MO_Immediate:
+      Key |= MO.getImm();
+      break;
+    case MachineOperand::MO_FrameIndex:
+    case MachineOperand::MO_ConstantPoolIndex:
+    case MachineOperand::MO_JumpTableIndex:
+      Key |= MO.getIndex();
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
+      break;
+    case MachineOperand::MO_BlockAddress:
+      Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
+      break;
+    case MachineOperand::MO_MCSymbol:
+      Key |= DenseMapInfo<void*>::getHashValue(MO.getMCSymbol());
+      break;
+    }
+    Key += ~(Key << 32);
+    Key ^= (Key >> 22);
+    Key += ~(Key << 13);
+    Key ^= (Key >> 8);
+    Key += (Key << 3);
+    Key ^= (Key >> 15);
+    Key += ~(Key << 27);
+    Key ^= (Key >> 31);
+    Hash = (unsigned)Key + Hash * 37;
+  }
+  return Hash;
+}
diff --git a/final/lib/CodeGen/MachineLICM.cpp b/final/lib/CodeGen/MachineLICM.cpp
new file mode 100644
index 00000000000..1c0f6ade856
--- /dev/null
+++ b/final/lib/CodeGen/MachineLICM.cpp
@@ -0,0 +1,1202 @@
+//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs loop invariant code motion on machine instructions. We
+// attempt to remove as much code from the body of a loop as possible.
+//
+// This pass does not attempt to throttle itself to limit register pressure.
+// The register allocation phases are expected to perform rematerialization
+// to recover when register pressure is high.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for the LLVM-IR-level LICM pass. It is only designed to hoist simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-licm"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(NumHoisted,
+          "Number of machine instructions hoisted out of loops");
+STATISTIC(NumLowRP,
+          "Number of instructions hoisted in low reg pressure situation");
+STATISTIC(NumHighLatency,
+          "Number of high latency instructions hoisted");
+STATISTIC(NumCSEed,
+          "Number of hoisted machine instructions CSEed");
+STATISTIC(NumPostRAHoisted,
+          "Number of machine instructions hoisted out of loops post regalloc");
+
+namespace {
+  class MachineLICM : public MachineFunctionPass {
+    bool PreRegAlloc;
+
+    const TargetMachine   *TM;
+    const TargetInstrInfo *TII;
+    const TargetLowering *TLI;
+    const TargetRegisterInfo *TRI;
+    const MachineFrameInfo *MFI;
+    MachineRegisterInfo *MRI;
+    const InstrItineraryData *InstrItins;
+
+    // Various analyses that we use...
+    AliasAnalysis        *AA;      // Alias analysis info.
+    MachineLoopInfo      *MLI;     // Current MachineLoopInfo
+    MachineDominatorTree *DT;      // Machine dominator tree for the cur loop
+
+    // State that is updated as we process loops
+    bool         Changed;          // True if a loop is changed.
+    bool         FirstInLoop;      // True if it's the first LICM in the loop.
+    MachineLoop *CurLoop;          // The current loop we are working on.
+    MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
+
+    BitVector AllocatableSet;
+
+    // Track 'estimated' register pressure.
+    SmallSet<unsigned, 32> RegSeen;
+    SmallVector<unsigned, 8> RegPressure;
+
+    // Register pressure "limit" per register class. If the pressure
+    // is higher than the limit, then it's considered high.
+    SmallVector<unsigned, 8> RegLimit;
+
+    // Register pressure on path leading from loop preheader to current BB.
+    SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
+
+    // For each opcode, keep a list of potential CSE instructions.
+    DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    MachineLICM() :
+      MachineFunctionPass(ID), PreRegAlloc(true) {
+        initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+      }
+
+    explicit MachineLICM(bool PreRA) :
+      MachineFunctionPass(ID), PreRegAlloc(PreRA) {
+        initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    const char *getPassName() const { return "Machine Instruction LICM"; }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineLoopInfo>();
+      AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<AliasAnalysis>();
+      AU.addPreserved<MachineLoopInfo>();
+      AU.addPreserved<MachineDominatorTree>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    virtual void releaseMemory() {
+      RegSeen.clear();
+      RegPressure.clear();
+      RegLimit.clear();
+      BackTrace.clear();
+      for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
+             CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
+        CI->second.clear();
+      CSEMap.clear();
+    }
+
+  private:
+    /// CandidateInfo - Keep track of information about hoisting candidates.
+    struct CandidateInfo {
+      MachineInstr *MI;
+      unsigned      Def;
+      int           FI;
+      CandidateInfo(MachineInstr *mi, unsigned def, int fi)
+        : MI(mi), Def(def), FI(fi) {}
+    };
+
+    /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
+    /// invariants out to the preheader.
+    void HoistRegionPostRA();
+
+    /// HoistPostRA - When an instruction is found to only use loop invariant
+    /// operands that is safe to hoist, this instruction is called to do the
+    /// dirty work.
+    void HoistPostRA(MachineInstr *MI, unsigned Def);
+
+    /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+    /// gather register def and frame object update information.
+    void ProcessMI(MachineInstr *MI, unsigned *PhysRegDefs,
+                   SmallSet<int, 32> &StoredFIs,
+                   SmallVector<CandidateInfo, 32> &Candidates);
+
+    /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
+    /// current loop.
+    void AddToLiveIns(unsigned Reg);
+
+    /// IsLICMCandidate - Returns true if the instruction may be a suitable
+    /// candidate for LICM. e.g. If the instruction is a call, then it's
+    /// obviously not safe to hoist it.
+    bool IsLICMCandidate(MachineInstr &I);
+
+    /// IsLoopInvariantInst - Returns true if the instruction is loop
+    /// invariant. I.e., all virtual register operands are defined outside of
+    /// the loop, physical registers aren't accessed (explicitly or implicitly),
+    /// and the instruction is hoistable.
+    /// 
+    bool IsLoopInvariantInst(MachineInstr &I);
+
+    /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+    /// and an use in the current loop, return true if the target considered
+    /// it 'high'.
+    bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
+                               unsigned Reg) const;
+
+    bool IsCheapInstruction(MachineInstr &MI) const;
+
+    /// CanCauseHighRegPressure - Visit BBs from header to current BB,
+    /// check if hoisting an instruction of the given cost matrix can cause high
+    /// register pressure.
+    bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost);
+
+    /// UpdateBackTraceRegPressure - Traverse the back trace from header to
+    /// the current block and update their register pressures to reflect the
+    /// effect of hoisting MI from the current block to the preheader.
+    void UpdateBackTraceRegPressure(const MachineInstr *MI);
+
+    /// IsProfitableToHoist - Return true if it is potentially profitable to
+    /// hoist the given loop invariant.
+    bool IsProfitableToHoist(MachineInstr &MI);
+
+    /// HoistRegion - Walk the specified region of the CFG (defined by all
+    /// blocks dominated by the specified block, and that are in the current
+    /// loop) in depth first order w.r.t the DominatorTree. This allows us to
+    /// visit definitions before uses, allowing us to hoist a loop body in one
+    /// pass without iteration.
+    ///
+    void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false);
+
+    /// InitRegPressure - Find all virtual register references that are liveout
+    /// of the preheader to initialize the starting "register pressure". Note
+    /// this does not count live through (livein but not used) registers.
+    void InitRegPressure(MachineBasicBlock *BB);
+
+    /// UpdateRegPressure - Update estimate of register pressure after the
+    /// specified instruction.
+    void UpdateRegPressure(const MachineInstr *MI);
+
+    /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
+    /// the load itself could be hoisted. Return the unfolded and hoistable
+    /// load, or null if the load couldn't be unfolded or if it wouldn't
+    /// be hoistable.
+    MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
+
+    /// LookForDuplicate - Find an instruction amount PrevMIs that is a
+    /// duplicate of MI. Return this instruction if it's found.
+    const MachineInstr *LookForDuplicate(const MachineInstr *MI,
+                                     std::vector<const MachineInstr*> &PrevMIs);
+
+    /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
+    /// the preheader that compute the same value. If it's found, do a RAU on
+    /// with the definition of the existing instruction rather than hoisting
+    /// the instruction to the preheader.
+    bool EliminateCSE(MachineInstr *MI,
+           DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
+
+    /// Hoist - When an instruction is found to only use loop invariant operands
+    /// that is safe to hoist, this instruction is called to do the dirty work.
+    /// It returns true if the instruction is hoisted.
+    bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
+
+    /// InitCSEMap - Initialize the CSE map with instructions that are in the
+    /// current loop preheader that may become duplicates of instructions that
+    /// are hoisted out of the loop.
+    void InitCSEMap(MachineBasicBlock *BB);
+
+    /// getCurPreheader - Get the preheader for the current loop, splitting
+    /// a critical edge if needed.
+    MachineBasicBlock *getCurPreheader();
+  };
+} // end anonymous namespace
+
+char MachineLICM::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
+                "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineLICM, "machinelicm",
+                "Machine Loop Invariant Code Motion", false, false)
+
+FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
+  return new MachineLICM(PreRegAlloc);
+}
+
+/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
+/// loop that has a unique predecessor.
+static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
+  // Check whether this loop even has a unique predecessor.
+  if (!CurLoop->getLoopPredecessor())
+    return false;
+  // Ok, now check to see if any of its outer loops do.
+  for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
+    if (L->getLoopPredecessor())
+      return false;
+  // None of them did, so this is the outermost with a unique predecessor.
+  return true;
+}
+
+bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
+  if (PreRegAlloc)
+    DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
+  else
+    DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+  DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
+
+  Changed = FirstInLoop = false;
+  TM = &MF.getTarget();
+  TII = TM->getInstrInfo();
+  TLI = TM->getTargetLowering();
+  TRI = TM->getRegisterInfo();
+  MFI = MF.getFrameInfo();
+  MRI = &MF.getRegInfo();
+  InstrItins = TM->getInstrItineraryData();
+  AllocatableSet = TRI->getAllocatableSet(MF);
+
+  if (PreRegAlloc) {
+    // Estimate register pressure during pre-regalloc pass.
+    unsigned NumRC = TRI->getNumRegClasses();
+    RegPressure.resize(NumRC);
+    std::fill(RegPressure.begin(), RegPressure.end(), 0);
+    RegLimit.resize(NumRC);
+    for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+           E = TRI->regclass_end(); I != E; ++I)
+      RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF);
+  }
+
+  // Get our Loop information...
+  MLI = &getAnalysis<MachineLoopInfo>();
+  DT  = &getAnalysis<MachineDominatorTree>();
+  AA  = &getAnalysis<AliasAnalysis>();
+
+  SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
+  while (!Worklist.empty()) {
+    CurLoop = Worklist.pop_back_val();
+    CurPreheader = 0;
+
+    // If this is done before regalloc, only visit outer-most preheader-sporting
+    // loops.
+    if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
+      Worklist.append(CurLoop->begin(), CurLoop->end());
+      continue;
+    }
+
+    if (!PreRegAlloc)
+      HoistRegionPostRA();
+    else {
+      // CSEMap is initialized for loop header when the first instruction is
+      // being hoisted.
+      MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
+      FirstInLoop = true;
+      HoistRegion(N, true);
+      CSEMap.clear();
+    }
+  }
+
+  return Changed;
+}
+
+/// InstructionStoresToFI - Return true if instruction stores to the
+/// specified frame.
+static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+         oe = MI->memoperands_end(); o != oe; ++o) {
+    if (!(*o)->isStore() || !(*o)->getValue())
+      continue;
+    if (const FixedStackPseudoSourceValue *Value =
+        dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+      if (Value->getFrameIndex() == FI)
+        return true;
+    }
+  }
+  return false;
+}
+
+/// ProcessMI - Examine the instruction for potentai LICM candidate. Also
+/// gather register def and frame object update information.
+void MachineLICM::ProcessMI(MachineInstr *MI,
+                            unsigned *PhysRegDefs,
+                            SmallSet<int, 32> &StoredFIs,
+                            SmallVector<CandidateInfo, 32> &Candidates) {
+  bool RuledOut = false;
+  bool HasNonInvariantUse = false;
+  unsigned Def = 0;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isFI()) {
+      // Remember if the instruction stores to the frame index.
+      int FI = MO.getIndex();
+      if (!StoredFIs.count(FI) &&
+          MFI->isSpillSlotObjectIndex(FI) &&
+          InstructionStoresToFI(MI, FI))
+        StoredFIs.insert(FI);
+      HasNonInvariantUse = true;
+      continue;
+    }
+
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg)
+      continue;
+    assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+           "Not expecting virtual register!");
+
+    if (!MO.isDef()) {
+      if (Reg && PhysRegDefs[Reg])
+        // If it's using a non-loop-invariant register, then it's obviously not
+        // safe to hoist.
+        HasNonInvariantUse = true;
+      continue;
+    }
+
+    if (MO.isImplicit()) {
+      ++PhysRegDefs[Reg];
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        ++PhysRegDefs[*AS];
+      if (!MO.isDead())
+        // Non-dead implicit def? This cannot be hoisted.
+        RuledOut = true;
+      // No need to check if a dead implicit def is also defined by
+      // another instruction.
+      continue;
+    }
+
+    // FIXME: For now, avoid instructions with multiple defs, unless
+    // it's a dead implicit def.
+    if (Def)
+      RuledOut = true;
+    else
+      Def = Reg;
+
+    // If we have already seen another instruction that defines the same
+    // register, then this is not safe.
+    if (++PhysRegDefs[Reg] > 1)
+      // MI defined register is seen defined by another instruction in
+      // the loop, it cannot be a LICM candidate.
+      RuledOut = true;
+    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+      if (++PhysRegDefs[*AS] > 1)
+        RuledOut = true;
+  }
+
+  // Only consider reloads for now and remats which do not have register
+  // operands. FIXME: Consider unfold load folding instructions.
+  if (Def && !RuledOut) {
+    int FI = INT_MIN;
+    if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
+        (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
+      Candidates.push_back(CandidateInfo(MI, Def, FI));
+  }
+}
+
+/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
+/// invariants out to the preheader.
+void MachineLICM::HoistRegionPostRA() {
+  unsigned NumRegs = TRI->getNumRegs();
+  unsigned *PhysRegDefs = new unsigned[NumRegs];
+  std::fill(PhysRegDefs, PhysRegDefs + NumRegs, 0);
+
+  SmallVector<CandidateInfo, 32> Candidates;
+  SmallSet<int, 32> StoredFIs;
+
+  // Walk the entire region, count number of defs for each register, and
+  // collect potential LICM candidates.
+  const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    MachineBasicBlock *BB = Blocks[i];
+    // Conservatively treat live-in's as an external def.
+    // FIXME: That means a reload that're reused in successor block(s) will not
+    // be LICM'ed.
+    for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
+           E = BB->livein_end(); I != E; ++I) {
+      unsigned Reg = *I;
+      ++PhysRegDefs[Reg];
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        ++PhysRegDefs[*AS];
+    }
+
+    for (MachineBasicBlock::iterator
+           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
+      MachineInstr *MI = &*MII;
+      ProcessMI(MI, PhysRegDefs, StoredFIs, Candidates);
+    }
+  }
+
+  // Now evaluate whether the potential candidates qualify.
+  // 1. Check if the candidate defined register is defined by another
+  //    instruction in the loop.
+  // 2. If the candidate is a load from stack slot (always true for now),
+  //    check if the slot is stored anywhere in the loop.
+  for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
+    if (Candidates[i].FI != INT_MIN &&
+        StoredFIs.count(Candidates[i].FI))
+      continue;
+
+    if (PhysRegDefs[Candidates[i].Def] == 1) {
+      bool Safe = true;
+      MachineInstr *MI = Candidates[i].MI;
+      for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+        const MachineOperand &MO = MI->getOperand(j);
+        if (!MO.isReg() || MO.isDef() || !MO.getReg())
+          continue;
+        if (PhysRegDefs[MO.getReg()]) {
+          // If it's using a non-loop-invariant register, then it's obviously
+          // not safe to hoist.
+          Safe = false;
+          break;
+        }
+      }
+      if (Safe)
+        HoistPostRA(MI, Candidates[i].Def);
+    }
+  }
+
+  delete[] PhysRegDefs;
+}
+
+/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
+/// loop, and make sure it is not killed by any instructions in the loop.
+void MachineLICM::AddToLiveIns(unsigned Reg) {
+  const std::vector<MachineBasicBlock*> Blocks = CurLoop->getBlocks();
+  for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+    MachineBasicBlock *BB = Blocks[i];
+    if (!BB->isLiveIn(Reg))
+      BB->addLiveIn(Reg);
+    for (MachineBasicBlock::iterator
+           MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
+      MachineInstr *MI = &*MII;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
+        if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
+          MO.setIsKill(false);
+      }
+    }
+  }
+}
+
+/// HoistPostRA - When an instruction is found to only use loop invariant
+/// operands that is safe to hoist, this instruction is called to do the
+/// dirty work.
+void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
+  MachineBasicBlock *Preheader = getCurPreheader();
+  if (!Preheader) return;
+
+  // Now move the instructions to the predecessor, inserting it before any
+  // terminator instructions.
+  DEBUG({
+      dbgs() << "Hoisting " << *MI;
+      if (Preheader->getBasicBlock())
+        dbgs() << " to MachineBasicBlock "
+               << Preheader->getName();
+      if (MI->getParent()->getBasicBlock())
+        dbgs() << " from MachineBasicBlock "
+               << MI->getParent()->getName();
+      dbgs() << "\n";
+    });
+
+  // Splice the instruction to the preheader.
+  MachineBasicBlock *MBB = MI->getParent();
+  Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
+
+  // Add register to livein list to all the BBs in the current loop since a 
+  // loop invariant must be kept live throughout the whole loop. This is
+  // important to ensure later passes do not scavenge the def register.
+  AddToLiveIns(Def);
+
+  ++NumPostRAHoisted;
+  Changed = true;
+}
+
+/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in depth
+/// first order w.r.t the DominatorTree. This allows us to visit definitions
+/// before uses, allowing us to hoist a loop body in one pass without iteration.
+///
+void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
+  assert(N != 0 && "Null dominator tree node?");
+  MachineBasicBlock *BB = N->getBlock();
+
+  // If this subregion is not in the top level loop at all, exit.
+  if (!CurLoop->contains(BB)) return;
+
+  MachineBasicBlock *Preheader = getCurPreheader();
+  if (!Preheader)
+    return;
+
+  if (IsHeader) {
+    // Compute registers which are livein into the loop headers.
+    RegSeen.clear();
+    BackTrace.clear();
+    InitRegPressure(Preheader);
+  }
+
+  // Remember livein register pressure.
+  BackTrace.push_back(RegPressure);
+
+  for (MachineBasicBlock::iterator
+         MII = BB->begin(), E = BB->end(); MII != E; ) {
+    MachineBasicBlock::iterator NextMII = MII; ++NextMII;
+    MachineInstr *MI = &*MII;
+    if (!Hoist(MI, Preheader))
+      UpdateRegPressure(MI);
+    MII = NextMII;
+  }
+
+  // Don't hoist things out of a large switch statement.  This often causes
+  // code to be hoisted that wasn't going to be executed, and increases
+  // register pressure in a situation where it's likely to matter.
+  if (BB->succ_size() < 25) {
+    const std::vector<MachineDomTreeNode*> &Children = N->getChildren();
+    for (unsigned I = 0, E = Children.size(); I != E; ++I)
+      HoistRegion(Children[I]);
+  }
+
+  BackTrace.pop_back();
+}
+
+static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
+  return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
+}
+
+/// InitRegPressure - Find all virtual register references that are liveout of
+/// the preheader to initialize the starting "register pressure". Note this
+/// does not count live through (livein but not used) registers.
+void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
+  std::fill(RegPressure.begin(), RegPressure.end(), 0);
+
+  // If the preheader has only a single predecessor and it ends with a
+  // fallthrough or an unconditional branch, then scan its predecessor for live
+  // defs as well. This happens whenever the preheader is created by splitting
+  // the critical edge from the loop predecessor to the loop header.
+  if (BB->pred_size() == 1) {
+    MachineBasicBlock *TBB = 0, *FBB = 0;
+    SmallVector<MachineOperand, 4> Cond;
+    if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
+      InitRegPressure(*BB->pred_begin());
+  }
+
+  for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
+       MII != E; ++MII) {
+    MachineInstr *MI = &*MII;
+    for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || MO.isImplicit())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg))
+        continue;
+
+      bool isNew = RegSeen.insert(Reg);
+      const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+      EVT VT = *RC->vt_begin();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      if (MO.isDef())
+        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      else {
+        bool isKill = isOperandKill(MO, MRI);
+        if (isNew && !isKill)
+          // Haven't seen this, it must be a livein.
+          RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+        else if (!isNew && isKill)
+          RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+      }
+    }
+  }
+}
+
+/// UpdateRegPressure - Update estimate of register pressure after the
+/// specified instruction.
+void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
+  if (MI->isImplicitDef())
+    return;
+
+  SmallVector<unsigned, 4> Defs;
+  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+
+    bool isNew = RegSeen.insert(Reg);
+    if (MO.isDef())
+      Defs.push_back(Reg);
+    else if (!isNew && isOperandKill(MO, MRI)) {
+      const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+      EVT VT = *RC->vt_begin();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+
+      if (RCCost > RegPressure[RCId])
+        RegPressure[RCId] = 0;
+      else
+        RegPressure[RCId] -= RCCost;
+    }
+  }
+
+  while (!Defs.empty()) {
+    unsigned Reg = Defs.pop_back_val();
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    EVT VT = *RC->vt_begin();
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+    RegPressure[RCId] += RCCost;
+  }
+}
+
+/// IsLICMCandidate - Returns true if the instruction may be a suitable
+/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
+/// not safe to hoist it.
+bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
+  // Check if it's safe to move the instruction.
+  bool DontMoveAcrossStore = true;
+  if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
+    return false;
+  
+  return true;
+}
+
+/// IsLoopInvariantInst - Returns true if the instruction is loop
+/// invariant. I.e., all virtual register operands are defined outside of the
+/// loop, physical registers aren't accessed explicitly, and there are no side
+/// effects that aren't captured by the operands or other flags.
+/// 
+bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
+  if (!IsLICMCandidate(I))
+    return false;
+
+  // The instruction is loop invariant if all of its operands are.
+  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = I.getOperand(i);
+
+    if (!MO.isReg())
+      continue;
+
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+
+    // Don't hoist an instruction that uses or defines a physical register.
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (MO.isUse()) {
+        // If the physreg has no defs anywhere, it's just an ambient register
+        // and we can freely move its uses. Alternatively, if it's allocatable,
+        // it could get allocated to something with a def during allocation.
+        if (!MRI->def_empty(Reg))
+          return false;
+        if (AllocatableSet.test(Reg))
+          return false;
+        // Check for a def among the register's aliases too.
+        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          if (!MRI->def_empty(AliasReg))
+            return false;
+          if (AllocatableSet.test(AliasReg))
+            return false;
+        }
+        // Otherwise it's safe to move.
+        continue;
+      } else if (!MO.isDead()) {
+        // A def that isn't dead. We can't move it.
+        return false;
+      } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
+        // If the reg is live into the loop, we can't hoist an instruction
+        // which would clobber it.
+        return false;
+      }
+    }
+
+    if (!MO.isUse())
+      continue;
+
+    assert(MRI->getVRegDef(Reg) &&
+           "Machine instr not mapped for this vreg?!");
+
+    // If the loop contains the definition of an operand, then the instruction
+    // isn't loop invariant.
+    if (CurLoop->contains(MRI->getVRegDef(Reg)))
+      return false;
+  }
+
+  // If we got this far, the instruction is loop invariant!
+  return true;
+}
+
+
+/// HasPHIUses - Return true if the specified register has any PHI use.
+static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) {
+  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+         UE = MRI->use_end(); UI != UE; ++UI) {
+    MachineInstr *UseMI = &*UI;
+    if (UseMI->isPHI())
+      return true;
+  }
+  return false;
+}
+
+
+/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+/// and an use in the current loop, return true if the target considered
+/// it 'high'.
+bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
+                                        unsigned DefIdx, unsigned Reg) const {
+  if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg))
+    return false;
+
+  for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+         E = MRI->use_nodbg_end(); I != E; ++I) {
+    MachineInstr *UseMI = &*I;
+    if (UseMI->isCopyLike())
+      continue;
+    if (!CurLoop->contains(UseMI->getParent()))
+      continue;
+    for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = UseMI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse())
+        continue;
+      unsigned MOReg = MO.getReg();
+      if (MOReg != Reg)
+        continue;
+
+      if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
+        return true;
+    }
+
+    // Only look at the first in loop use.
+    break;
+  }
+
+  return false;
+}
+
+/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
+/// the operand latency between its def and a use is one or less.
+bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
+  if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike())
+    return true;
+  if (!InstrItins || InstrItins->isEmpty())
+    return false;
+
+  bool isCheap = false;
+  unsigned NumDefs = MI.getDesc().getNumDefs();
+  for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
+    MachineOperand &DefMO = MI.getOperand(i);
+    if (!DefMO.isReg() || !DefMO.isDef())
+      continue;
+    --NumDefs;
+    unsigned Reg = DefMO.getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+
+    if (!TII->hasLowDefLatency(InstrItins, &MI, i))
+      return false;
+    isCheap = true;
+  }
+
+  return isCheap;
+}
+
+/// CanCauseHighRegPressure - Visit BBs from header to current BB, check
+/// if hoisting an instruction of the given cost matrix can cause high
+/// register pressure.
+bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) {
+  for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+       CI != CE; ++CI) {
+    if (CI->second <= 0) 
+      continue;
+
+    unsigned RCId = CI->first;
+    for (unsigned i = BackTrace.size(); i != 0; --i) {
+      SmallVector<unsigned, 8> &RP = BackTrace[i-1];
+      if (RP[RCId] + CI->second >= RegLimit[RCId])
+        return true;
+    }
+  }
+
+  return false;
+}
+
+/// UpdateBackTraceRegPressure - Traverse the back trace from header to the
+/// current block and update their register pressures to reflect the effect
+/// of hoisting MI from the current block to the preheader.
+void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
+  if (MI->isImplicitDef())
+    return;
+
+  // First compute the 'cost' of the instruction, i.e. its contribution
+  // to register pressure.
+  DenseMap<unsigned, int> Cost;
+  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+
+    const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+    EVT VT = *RC->vt_begin();
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+    if (MO.isDef()) {
+      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+      if (CI != Cost.end())
+        CI->second += RCCost;
+      else
+        Cost.insert(std::make_pair(RCId, RCCost));
+    } else if (isOperandKill(MO, MRI)) {
+      DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+      if (CI != Cost.end())
+        CI->second -= RCCost;
+      else
+        Cost.insert(std::make_pair(RCId, -RCCost));
+    }
+  }
+
+  // Update register pressure of blocks from loop header to current block.
+  for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
+    SmallVector<unsigned, 8> &RP = BackTrace[i];
+    for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+         CI != CE; ++CI) {
+      unsigned RCId = CI->first;
+      RP[RCId] += CI->second;
+    }
+  }
+}
+
+/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
+/// the given loop invariant.
+bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
+  if (MI.isImplicitDef())
+    return true;
+
+  // If the instruction is cheap, only hoist if it is re-materilizable. LICM
+  // will increase register pressure. It's probably not worth it if the
+  // instruction is cheap.
+  // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
+  // these tend to help performance in low register pressure situation. The
+  // trade off is it may cause spill in high pressure situation. It will end up
+  // adding a store in the loop preheader. But the reload is no more expensive.
+  // The side benefit is these loads are frequently CSE'ed.
+  if (IsCheapInstruction(MI)) {
+    if (!TII->isTriviallyReMaterializable(&MI, AA))
+      return false;
+  } else {
+    // Estimate register pressure to determine whether to LICM the instruction.
+    // In low register pressure situation, we can be more aggressive about 
+    // hoisting. Also, favors hoisting long latency instructions even in
+    // moderately high pressure situation.
+    // FIXME: If there are long latency loop-invariant instructions inside the
+    // loop at this point, why didn't the optimizer's LICM hoist them?
+    DenseMap<unsigned, int> Cost;
+    for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI.getOperand(i);
+      if (!MO.isReg() || MO.isImplicit())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg))
+        continue;
+      if (MO.isDef()) {
+        if (HasHighOperandLatency(MI, i, Reg)) {
+          ++NumHighLatency;
+          return true;
+        }
+
+        const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+        EVT VT = *RC->vt_begin();
+        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+        unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+        DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+        if (CI != Cost.end())
+          CI->second += RCCost;
+        else
+          Cost.insert(std::make_pair(RCId, RCCost));
+      } else if (isOperandKill(MO, MRI)) {
+        // Is a virtual register use is a kill, hoisting it out of the loop
+        // may actually reduce register pressure or be register pressure
+        // neutral.
+        const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+        EVT VT = *RC->vt_begin();
+        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+        unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+        DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+        if (CI != Cost.end())
+          CI->second -= RCCost;
+        else
+          Cost.insert(std::make_pair(RCId, -RCCost));
+      }
+    }
+
+    // Visit BBs from header to current BB, if hoisting this doesn't cause
+    // high register pressure, then it's safe to proceed.
+    if (!CanCauseHighRegPressure(Cost)) {
+      ++NumLowRP;
+      return true;
+    }
+
+    // High register pressure situation, only hoist if the instruction is going to
+    // be remat'ed.
+    if (!TII->isTriviallyReMaterializable(&MI, AA) &&
+        !MI.isInvariantLoad(AA))
+      return false;
+  }
+
+  // If result(s) of this instruction is used by PHIs, then don't hoist it.
+  // The presence of joins makes it difficult for current register allocator
+  // implementation to perform remat.
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    if (HasPHIUses(MO.getReg(), MRI))
+      return false;
+  }
+
+  return true;
+}
+
+MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
+  // Don't unfold simple loads.
+  if (MI->getDesc().canFoldAsLoad())
+    return 0;
+
+  // If not, we may be able to unfold a load and hoist that.
+  // First test whether the instruction is loading from an amenable
+  // memory location.
+  if (!MI->isInvariantLoad(AA))
+    return 0;
+
+  // Next determine the register class for a temporary register.
+  unsigned LoadRegIndex;
+  unsigned NewOpc =
+    TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
+                                    /*UnfoldLoad=*/true,
+                                    /*UnfoldStore=*/false,
+                                    &LoadRegIndex);
+  if (NewOpc == 0) return 0;
+  const TargetInstrDesc &TID = TII->get(NewOpc);
+  if (TID.getNumDefs() != 1) return 0;
+  const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI);
+  // Ok, we're unfolding. Create a temporary register and do the unfold.
+  unsigned Reg = MRI->createVirtualRegister(RC);
+
+  MachineFunction &MF = *MI->getParent()->getParent();
+  SmallVector<MachineInstr *, 2> NewMIs;
+  bool Success =
+    TII->unfoldMemoryOperand(MF, MI, Reg,
+                             /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
+                             NewMIs);
+  (void)Success;
+  assert(Success &&
+         "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
+         "succeeded!");
+  assert(NewMIs.size() == 2 &&
+         "Unfolded a load into multiple instructions!");
+  MachineBasicBlock *MBB = MI->getParent();
+  MBB->insert(MI, NewMIs[0]);
+  MBB->insert(MI, NewMIs[1]);
+  // If unfolding produced a load that wasn't loop-invariant or profitable to
+  // hoist, discard the new instructions and bail.
+  if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
+    NewMIs[0]->eraseFromParent();
+    NewMIs[1]->eraseFromParent();
+    return 0;
+  }
+
+  // Update register pressure for the unfolded instruction.
+  UpdateRegPressure(NewMIs[1]);
+
+  // Otherwise we successfully unfolded a load that we can hoist.
+  MI->eraseFromParent();
+  return NewMIs[0];
+}
+
+void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
+  for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
+    const MachineInstr *MI = &*I;
+    unsigned Opcode = MI->getOpcode();
+    DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+      CI = CSEMap.find(Opcode);
+    if (CI != CSEMap.end())
+      CI->second.push_back(MI);
+    else {
+      std::vector<const MachineInstr*> CSEMIs;
+      CSEMIs.push_back(MI);
+      CSEMap.insert(std::make_pair(Opcode, CSEMIs));
+    }
+  }
+}
+
+const MachineInstr*
+MachineLICM::LookForDuplicate(const MachineInstr *MI,
+                              std::vector<const MachineInstr*> &PrevMIs) {
+  for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
+    const MachineInstr *PrevMI = PrevMIs[i];
+    if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0)))
+      return PrevMI;
+  }
+  return 0;
+}
+
+bool MachineLICM::EliminateCSE(MachineInstr *MI,
+          DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
+  // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+  // the undef property onto uses.
+  if (CI == CSEMap.end() || MI->isImplicitDef())
+    return false;
+
+  if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
+    DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
+
+    // Replace virtual registers defined by MI by their counterparts defined
+    // by Dup.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+
+      // Physical registers may not differ here.
+      assert((!MO.isReg() || MO.getReg() == 0 ||
+              !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+              MO.getReg() == Dup->getOperand(i).getReg()) &&
+             "Instructions with different phys regs are not identical!");
+
+      if (MO.isReg() && MO.isDef() &&
+          !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+        MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
+        MRI->clearKillFlags(Dup->getOperand(i).getReg());
+      }
+    }
+    MI->eraseFromParent();
+    ++NumCSEed;
+    return true;
+  }
+  return false;
+}
+
+/// Hoist - When an instruction is found to use only loop invariant operands
+/// that are safe to hoist, this instruction is called to do the dirty work.
+///
+bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
+  // First check whether we should hoist this instruction.
+  if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
+    // If not, try unfolding a hoistable load.
+    MI = ExtractHoistableLoad(MI);
+    if (!MI) return false;
+  }
+
+  // Now move the instructions to the predecessor, inserting it before any
+  // terminator instructions.
+  DEBUG({
+      dbgs() << "Hoisting " << *MI;
+      if (Preheader->getBasicBlock())
+        dbgs() << " to MachineBasicBlock "
+               << Preheader->getName();
+      if (MI->getParent()->getBasicBlock())
+        dbgs() << " from MachineBasicBlock "
+               << MI->getParent()->getName();
+      dbgs() << "\n";
+    });
+
+  // If this is the first instruction being hoisted to the preheader,
+  // initialize the CSE map with potential common expressions.
+  if (FirstInLoop) {
+    InitCSEMap(Preheader);
+    FirstInLoop = false;
+  }
+
+  // Look for opportunity to CSE the hoisted instruction.
+  unsigned Opcode = MI->getOpcode();
+  DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+    CI = CSEMap.find(Opcode);
+  if (!EliminateCSE(MI, CI)) {
+    // Otherwise, splice the instruction to the preheader.
+    Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
+
+    // Update register pressure for BBs from header to this block.
+    UpdateBackTraceRegPressure(MI);
+
+    // Clear the kill flags of any register this instruction defines,
+    // since they may need to be live throughout the entire loop
+    // rather than just live for part of it.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.isDef() && !MO.isDead())
+        MRI->clearKillFlags(MO.getReg());
+    }
+
+    // Add to the CSE map.
+    if (CI != CSEMap.end())
+      CI->second.push_back(MI);
+    else {
+      std::vector<const MachineInstr*> CSEMIs;
+      CSEMIs.push_back(MI);
+      CSEMap.insert(std::make_pair(Opcode, CSEMIs));
+    }
+  }
+
+  ++NumHoisted;
+  Changed = true;
+
+  return true;
+}
+
+MachineBasicBlock *MachineLICM::getCurPreheader() {
+  // Determine the block to which to hoist instructions. If we can't find a
+  // suitable loop predecessor, we can't do any hoisting.
+
+  // If we've tried to get a preheader and failed, don't try again.
+  if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
+    return 0;
+
+  if (!CurPreheader) {
+    CurPreheader = CurLoop->getLoopPreheader();
+    if (!CurPreheader) {
+      MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
+      if (!Pred) {
+        CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+        return 0;
+      }
+
+      CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
+      if (!CurPreheader) {
+        CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+        return 0;
+      }
+    }
+  }
+  return CurPreheader;
+}
diff --git a/final/lib/CodeGen/MachineLoopInfo.cpp b/final/lib/CodeGen/MachineLoopInfo.cpp
new file mode 100644
index 00000000000..189cb2ba5d1
--- /dev/null
+++ b/final/lib/CodeGen/MachineLoopInfo.cpp
@@ -0,0 +1,83 @@
+//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MachineLoopInfo class that is used to identify natural
+// loops and determine the loop depth of various nodes of the CFG.  Note that
+// the loops identified may actually be several natural loops that share the 
+// same header node... not just a single natural loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace llvm {
+#define MLB class LoopBase<MachineBasicBlock, MachineLoop>
+TEMPLATE_INSTANTIATION(MLB);
+#undef MLB
+#define MLIB class LoopInfoBase<MachineBasicBlock, MachineLoop>
+TEMPLATE_INSTANTIATION(MLIB);
+#undef MLIB
+}
+
+char MachineLoopInfo::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
+                "Machine Natural Loop Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops",
+                "Machine Natural Loop Construction", true, true)
+
+char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
+
+bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) {
+  releaseMemory();
+  LI.Calculate(getAnalysis<MachineDominatorTree>().getBase());    // Update
+  return false;
+}
+
+void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<MachineDominatorTree>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+MachineBasicBlock *MachineLoop::getTopBlock() {
+  MachineBasicBlock *TopMBB = getHeader();
+  MachineFunction::iterator Begin = TopMBB->getParent()->begin();
+  if (TopMBB != Begin) {
+    MachineBasicBlock *PriorMBB = prior(MachineFunction::iterator(TopMBB));
+    while (contains(PriorMBB)) {
+      TopMBB = PriorMBB;
+      if (TopMBB == Begin) break;
+      PriorMBB = prior(MachineFunction::iterator(TopMBB));
+    }
+  }
+  return TopMBB;
+}
+
+MachineBasicBlock *MachineLoop::getBottomBlock() {
+  MachineBasicBlock *BotMBB = getHeader();
+  MachineFunction::iterator End = BotMBB->getParent()->end();
+  if (BotMBB != prior(End)) {
+    MachineBasicBlock *NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
+    while (contains(NextMBB)) {
+      BotMBB = NextMBB;
+      if (BotMBB == llvm::next(MachineFunction::iterator(BotMBB))) break;
+      NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
+    }
+  }
+  return BotMBB;
+}
+
+void MachineLoop::dump() const {
+  print(dbgs());
+}
diff --git a/final/lib/CodeGen/MachineLoopRanges.cpp b/final/lib/CodeGen/MachineLoopRanges.cpp
new file mode 100644
index 00000000000..17fe67f6504
--- /dev/null
+++ b/final/lib/CodeGen/MachineLoopRanges.cpp
@@ -0,0 +1,116 @@
+//===- MachineLoopRanges.cpp - Ranges of machine loops --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the MachineLoopRanges analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+char MachineLoopRanges::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLoopRanges, "machine-loop-ranges",
+                "Machine Loop Ranges", true, true)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineLoopRanges, "machine-loop-ranges",
+                "Machine Loop Ranges", true, true)
+
+char &llvm::MachineLoopRangesID = MachineLoopRanges::ID;
+
+void MachineLoopRanges::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<SlotIndexes>();
+  AU.addRequiredTransitive<MachineLoopInfo>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// runOnMachineFunction - Don't do much, loop ranges are computed on demand.
+bool MachineLoopRanges::runOnMachineFunction(MachineFunction &) {
+  releaseMemory();
+  Indexes = &getAnalysis<SlotIndexes>();
+  return false;
+}
+
+void MachineLoopRanges::releaseMemory() {
+  DeleteContainerSeconds(Cache);
+  Cache.clear();
+}
+
+MachineLoopRange *MachineLoopRanges::getLoopRange(const MachineLoop *Loop) {
+  MachineLoopRange *&Range = Cache[Loop];
+  if (!Range)
+    Range = new MachineLoopRange(Loop, Allocator, *Indexes);
+  return Range;
+}
+
+/// Create a MachineLoopRange, only accessible to MachineLoopRanges.
+MachineLoopRange::MachineLoopRange(const MachineLoop *loop,
+                                   MachineLoopRange::Allocator &alloc,
+                                   SlotIndexes &Indexes)
+  : Loop(loop), Intervals(alloc), Area(0) {
+  // Compute loop coverage.
+  for (MachineLoop::block_iterator I = Loop->block_begin(),
+         E = Loop->block_end(); I != E; ++I) {
+    const std::pair<SlotIndex, SlotIndex> &Range = Indexes.getMBBRange(*I);
+    Intervals.insert(Range.first, Range.second, 1u);
+    Area += Range.first.distance(Range.second);
+  }
+}
+
+/// overlaps - Return true if this loop overlaps the given range of machine
+/// instructions.
+bool MachineLoopRange::overlaps(SlotIndex Start, SlotIndex Stop) {
+  Map::const_iterator I = Intervals.find(Start);
+  return I.valid() && Stop > I.start();
+}
+
+unsigned MachineLoopRange::getNumber() const {
+  return Loop->getHeader()->getNumber();
+}
+
+/// byNumber - Comparator for array_pod_sort that sorts a list of
+/// MachineLoopRange pointers by number.
+int MachineLoopRange::byNumber(const void *pa, const void *pb) {
+  const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
+  const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
+  unsigned na = a->getNumber();
+  unsigned nb = b->getNumber();
+  if (na < nb)
+    return -1;
+  if (na > nb)
+    return 1;
+  return 0;
+}
+
+/// byAreaDesc - Comparator for array_pod_sort that sorts a list of
+/// MachineLoopRange pointers by:
+/// 1. Descending area.
+/// 2. Ascending number.
+int MachineLoopRange::byAreaDesc(const void *pa, const void *pb) {
+  const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
+  const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
+  if (a->getArea() != b->getArea())
+    return a->getArea() > b->getArea() ? -1 : 1;
+  return byNumber(pa, pb);
+}
+
+void MachineLoopRange::print(raw_ostream &OS) const {
+  OS << "Loop#" << getNumber() << " =";
+  for (Map::const_iterator I = Intervals.begin(); I.valid(); ++I)
+    OS << " [" << I.start() << ';' << I.stop() << ')';
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineLoopRange &MLR) {
+  MLR.print(OS);
+  return OS;
+}
diff --git a/final/lib/CodeGen/MachineModuleInfo.cpp b/final/lib/CodeGen/MachineModuleInfo.cpp
new file mode 100644
index 00000000000..fadc594efcb
--- /dev/null
+++ b/final/lib/CodeGen/MachineModuleInfo.cpp
@@ -0,0 +1,567 @@
+//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+using namespace llvm::dwarf;
+
+// Handle the Pass registration stuff necessary to use TargetData's.
+INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
+                "Machine Module Information", false, false)
+char MachineModuleInfo::ID = 0;
+
+// Out of line virtual method.
+MachineModuleInfoImpl::~MachineModuleInfoImpl() {}
+
+namespace llvm {
+class MMIAddrLabelMapCallbackPtr : CallbackVH {
+  MMIAddrLabelMap *Map;
+public:
+  MMIAddrLabelMapCallbackPtr() : Map(0) {}
+  MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {}
+
+  void setPtr(BasicBlock *BB) {
+    ValueHandleBase::operator=(BB);
+  }
+
+  void setMap(MMIAddrLabelMap *map) { Map = map; }
+
+  virtual void deleted();
+  virtual void allUsesReplacedWith(Value *V2);
+};
+
+class MMIAddrLabelMap {
+  MCContext &Context;
+  struct AddrLabelSymEntry {
+    /// Symbols - The symbols for the label.  This is a pointer union that is
+    /// either one symbol (the common case) or a list of symbols.
+    PointerUnion<MCSymbol *, std::vector<MCSymbol*>*> Symbols;
+
+    Function *Fn;   // The containing function of the BasicBlock.
+    unsigned Index; // The index in BBCallbacks for the BasicBlock.
+  };
+
+  DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
+
+  /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for.  We
+  /// use this so we get notified if a block is deleted or RAUWd.
+  std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
+
+  /// DeletedAddrLabelsNeedingEmission - This is a per-function list of symbols
+  /// whose corresponding BasicBlock got deleted.  These symbols need to be
+  /// emitted at some point in the file, so AsmPrinter emits them after the
+  /// function body.
+  DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
+    DeletedAddrLabelsNeedingEmission;
+public:
+
+  MMIAddrLabelMap(MCContext &context) : Context(context) {}
+  ~MMIAddrLabelMap() {
+    assert(DeletedAddrLabelsNeedingEmission.empty() &&
+           "Some labels for deleted blocks never got emitted");
+
+    // Deallocate any of the 'list of symbols' case.
+    for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator
+         I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I)
+      if (I->second.Symbols.is<std::vector<MCSymbol*>*>())
+        delete I->second.Symbols.get<std::vector<MCSymbol*>*>();
+  }
+
+  MCSymbol *getAddrLabelSymbol(BasicBlock *BB);
+  std::vector<MCSymbol*> getAddrLabelSymbolToEmit(BasicBlock *BB);
+
+  void takeDeletedSymbolsForFunction(Function *F,
+                                     std::vector<MCSymbol*> &Result);
+
+  void UpdateForDeletedBlock(BasicBlock *BB);
+  void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New);
+};
+}
+
+MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
+  assert(BB->hasAddressTaken() &&
+         "Shouldn't get label for block without address taken");
+  AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+
+  // If we already had an entry for this block, just return it.
+  if (!Entry.Symbols.isNull()) {
+    assert(BB->getParent() == Entry.Fn && "Parent changed");
+    if (Entry.Symbols.is<MCSymbol*>())
+      return Entry.Symbols.get<MCSymbol*>();
+    return (*Entry.Symbols.get<std::vector<MCSymbol*>*>())[0];
+  }
+
+  // Otherwise, this is a new entry, create a new symbol for it and add an
+  // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
+  BBCallbacks.push_back(BB);
+  BBCallbacks.back().setMap(this);
+  Entry.Index = BBCallbacks.size()-1;
+  Entry.Fn = BB->getParent();
+  MCSymbol *Result = Context.CreateTempSymbol();
+  Entry.Symbols = Result;
+  return Result;
+}
+
+std::vector<MCSymbol*>
+MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
+  assert(BB->hasAddressTaken() &&
+         "Shouldn't get label for block without address taken");
+  AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
+
+  std::vector<MCSymbol*> Result;
+
+  // If we already had an entry for this block, just return it.
+  if (Entry.Symbols.isNull())
+    Result.push_back(getAddrLabelSymbol(BB));
+  else if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>())
+    Result.push_back(Sym);
+  else
+    Result = *Entry.Symbols.get<std::vector<MCSymbol*>*>();
+  return Result;
+}
+
+
+/// takeDeletedSymbolsForFunction - If we have any deleted symbols for F, return
+/// them.
+void MMIAddrLabelMap::
+takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
+  DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I =
+    DeletedAddrLabelsNeedingEmission.find(F);
+
+  // If there are no entries for the function, just return.
+  if (I == DeletedAddrLabelsNeedingEmission.end()) return;
+
+  // Otherwise, take the list.
+  std::swap(Result, I->second);
+  DeletedAddrLabelsNeedingEmission.erase(I);
+}
+
+
+void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
+  // If the block got deleted, there is no need for the symbol.  If the symbol
+  // was already emitted, we can just forget about it, otherwise we need to
+  // queue it up for later emission when the function is output.
+  AddrLabelSymEntry Entry = AddrLabelSymbols[BB];
+  AddrLabelSymbols.erase(BB);
+  assert(!Entry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
+  BBCallbacks[Entry.Index] = 0;  // Clear the callback.
+
+  assert((BB->getParent() == 0 || BB->getParent() == Entry.Fn) &&
+         "Block/parent mismatch");
+
+  // Handle both the single and the multiple symbols cases.
+  if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) {
+    if (Sym->isDefined())
+      return;
+
+    // If the block is not yet defined, we need to emit it at the end of the
+    // function.  Add the symbol to the DeletedAddrLabelsNeedingEmission list
+    // for the containing Function.  Since the block is being deleted, its
+    // parent may already be removed, we have to get the function from 'Entry'.
+    DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+  } else {
+    std::vector<MCSymbol*> *Syms = Entry.Symbols.get<std::vector<MCSymbol*>*>();
+
+    for (unsigned i = 0, e = Syms->size(); i != e; ++i) {
+      MCSymbol *Sym = (*Syms)[i];
+      if (Sym->isDefined()) continue;  // Ignore already emitted labels.
+
+      // If the block is not yet defined, we need to emit it at the end of the
+      // function.  Add the symbol to the DeletedAddrLabelsNeedingEmission list
+      // for the containing Function.  Since the block is being deleted, its
+      // parent may already be removed, we have to get the function from
+      // 'Entry'.
+      DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
+    }
+
+    // The entry is deleted, free the memory associated with the symbol list.
+    delete Syms;
+  }
+}
+
+void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
+  // Get the entry for the RAUW'd block and remove it from our map.
+  AddrLabelSymEntry OldEntry = AddrLabelSymbols[Old];
+  AddrLabelSymbols.erase(Old);
+  assert(!OldEntry.Symbols.isNull() && "Didn't have a symbol, why a callback?");
+
+  AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New];
+
+  // If New is not address taken, just move our symbol over to it.
+  if (NewEntry.Symbols.isNull()) {
+    BBCallbacks[OldEntry.Index].setPtr(New);    // Update the callback.
+    NewEntry = OldEntry;     // Set New's entry.
+    return;
+  }
+
+  BBCallbacks[OldEntry.Index] = 0;    // Update the callback.
+
+  // Otherwise, we need to add the old symbol to the new block's set.  If it is
+  // just a single entry, upgrade it to a symbol list.
+  if (MCSymbol *PrevSym = NewEntry.Symbols.dyn_cast<MCSymbol*>()) {
+    std::vector<MCSymbol*> *SymList = new std::vector<MCSymbol*>();
+    SymList->push_back(PrevSym);
+    NewEntry.Symbols = SymList;
+  }
+
+  std::vector<MCSymbol*> *SymList =
+    NewEntry.Symbols.get<std::vector<MCSymbol*>*>();
+
+  // If the old entry was a single symbol, add it.
+  if (MCSymbol *Sym = OldEntry.Symbols.dyn_cast<MCSymbol*>()) {
+    SymList->push_back(Sym);
+    return;
+  }
+
+  // Otherwise, concatenate the list.
+  std::vector<MCSymbol*> *Syms =OldEntry.Symbols.get<std::vector<MCSymbol*>*>();
+  SymList->insert(SymList->end(), Syms->begin(), Syms->end());
+  delete Syms;
+}
+
+
+void MMIAddrLabelMapCallbackPtr::deleted() {
+  Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr()));
+}
+
+void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
+  Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
+}
+
+
+//===----------------------------------------------------------------------===//
+
+MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
+                                     const TargetAsmInfo *TAI)
+: ImmutablePass(ID), Context(MAI, TAI),
+  ObjFileMMI(0),
+  CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false),
+  CallsExternalVAFunctionWithFloatingPointArguments(false) {
+  initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
+  // Always emit some info, by default "no personality" info.
+  Personalities.push_back(NULL);
+  AddrLabelSymbols = 0;
+  TheModule = 0;
+}
+
+MachineModuleInfo::MachineModuleInfo()
+: ImmutablePass(ID), Context(*(MCAsmInfo*)0, NULL) {
+  assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
+         "should always be explicitly constructed by LLVMTargetMachine");
+  abort();
+}
+
+MachineModuleInfo::~MachineModuleInfo() {
+  delete ObjFileMMI;
+
+  // FIXME: Why isn't doFinalization being called??
+  //assert(AddrLabelSymbols == 0 && "doFinalization not called");
+  delete AddrLabelSymbols;
+  AddrLabelSymbols = 0;
+}
+
+/// doInitialization - Initialize the state for a new module.
+///
+bool MachineModuleInfo::doInitialization() {
+  assert(AddrLabelSymbols == 0 && "Improperly initialized");
+  return false;
+}
+
+/// doFinalization - Tear down the state after completion of a module.
+///
+bool MachineModuleInfo::doFinalization() {
+  delete AddrLabelSymbols;
+  AddrLabelSymbols = 0;
+  return false;
+}
+
+/// EndFunction - Discard function meta information.
+///
+void MachineModuleInfo::EndFunction() {
+  // Clean up frame info.
+  FrameMoves.clear();
+
+  // Clean up exception info.
+  LandingPads.clear();
+  CallSiteMap.clear();
+  TypeInfos.clear();
+  FilterIds.clear();
+  FilterEnds.clear();
+  CallsEHReturn = 0;
+  CallsUnwindInit = 0;
+  VariableDbgInfo.clear();
+}
+
+/// AnalyzeModule - Scan the module for global debug information.
+///
+void MachineModuleInfo::AnalyzeModule(const Module &M) {
+  // Insert functions in the llvm.used array (but not llvm.compiler.used) into
+  // UsedFunctions.
+  const GlobalVariable *GV = M.getGlobalVariable("llvm.used");
+  if (!GV || !GV->hasInitializer()) return;
+
+  // Should be an array of 'i8*'.
+  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (InitList == 0) return;
+
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+    if (const Function *F =
+          dyn_cast<Function>(InitList->getOperand(i)->stripPointerCasts()))
+      UsedFunctions.insert(F);
+}
+
+//===- Address of Block Management ----------------------------------------===//
+
+
+/// getAddrLabelSymbol - Return the symbol to be used for the specified basic
+/// block when its address is taken.  This cannot be its normal LBB label
+/// because the block may be accessed outside its containing function.
+MCSymbol *MachineModuleInfo::getAddrLabelSymbol(const BasicBlock *BB) {
+  // Lazily create AddrLabelSymbols.
+  if (AddrLabelSymbols == 0)
+    AddrLabelSymbols = new MMIAddrLabelMap(Context);
+  return AddrLabelSymbols->getAddrLabelSymbol(const_cast<BasicBlock*>(BB));
+}
+
+/// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified
+/// basic block when its address is taken.  If other blocks were RAUW'd to
+/// this one, we may have to emit them as well, return the whole set.
+std::vector<MCSymbol*> MachineModuleInfo::
+getAddrLabelSymbolToEmit(const BasicBlock *BB) {
+  // Lazily create AddrLabelSymbols.
+  if (AddrLabelSymbols == 0)
+    AddrLabelSymbols = new MMIAddrLabelMap(Context);
+ return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
+}
+
+
+/// takeDeletedSymbolsForFunction - If the specified function has had any
+/// references to address-taken blocks generated, but the block got deleted,
+/// return the symbol now so we can emit it.  This prevents emitting a
+/// reference to a symbol that has no definition.
+void MachineModuleInfo::
+takeDeletedSymbolsForFunction(const Function *F,
+                              std::vector<MCSymbol*> &Result) {
+  // If no blocks have had their addresses taken, we're done.
+  if (AddrLabelSymbols == 0) return;
+  return AddrLabelSymbols->
+     takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
+}
+
+//===- EH -----------------------------------------------------------------===//
+
+/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
+/// specified MachineBasicBlock.
+LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
+    (MachineBasicBlock *LandingPad) {
+  unsigned N = LandingPads.size();
+  for (unsigned i = 0; i < N; ++i) {
+    LandingPadInfo &LP = LandingPads[i];
+    if (LP.LandingPadBlock == LandingPad)
+      return LP;
+  }
+
+  LandingPads.push_back(LandingPadInfo(LandingPad));
+  return LandingPads[N];
+}
+
+/// addInvoke - Provide the begin and end labels of an invoke style call and
+/// associate it with a try landing pad block.
+void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
+                                  MCSymbol *BeginLabel, MCSymbol *EndLabel) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.BeginLabels.push_back(BeginLabel);
+  LP.EndLabels.push_back(EndLabel);
+}
+
+/// addLandingPad - Provide the label of a try LandingPad block.
+///
+MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
+  MCSymbol *LandingPadLabel = Context.CreateTempSymbol();
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.LandingPadLabel = LandingPadLabel;
+  return LandingPadLabel;
+}
+
+/// addPersonality - Provide the personality function for the exception
+/// information.
+void MachineModuleInfo::addPersonality(MachineBasicBlock *LandingPad,
+                                       const Function *Personality) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.Personality = Personality;
+
+  for (unsigned i = 0; i < Personalities.size(); ++i)
+    if (Personalities[i] == Personality)
+      return;
+
+  // If this is the first personality we're adding go
+  // ahead and add it at the beginning.
+  if (Personalities[0] == NULL)
+    Personalities[0] = Personality;
+  else
+    Personalities.push_back(Personality);
+}
+
+/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
+///
+void MachineModuleInfo::addCatchTypeInfo(MachineBasicBlock *LandingPad,
+                                  std::vector<const GlobalVariable *> &TyInfo) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  for (unsigned N = TyInfo.size(); N; --N)
+    LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+}
+
+/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
+///
+void MachineModuleInfo::addFilterTypeInfo(MachineBasicBlock *LandingPad,
+                                  std::vector<const GlobalVariable *> &TyInfo) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  std::vector<unsigned> IdsInFilter(TyInfo.size());
+  for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
+    IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
+  LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
+}
+
+/// addCleanup - Add a cleanup action for a landing pad.
+///
+void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.TypeIds.push_back(0);
+}
+
+/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
+/// pads.
+void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
+  for (unsigned i = 0; i != LandingPads.size(); ) {
+    LandingPadInfo &LandingPad = LandingPads[i];
+    if (LandingPad.LandingPadLabel &&
+        !LandingPad.LandingPadLabel->isDefined() &&
+        (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
+      LandingPad.LandingPadLabel = 0;
+
+    // Special case: we *should* emit LPs with null LP MBB. This indicates
+    // "nounwind" case.
+    if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
+      LandingPads.erase(LandingPads.begin() + i);
+      continue;
+    }
+
+    for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
+      MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
+      MCSymbol *EndLabel = LandingPad.EndLabels[j];
+      if ((BeginLabel->isDefined() ||
+           (LPMap && (*LPMap)[BeginLabel] != 0)) &&
+          (EndLabel->isDefined() ||
+           (LPMap && (*LPMap)[EndLabel] != 0))) continue;
+
+      LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+      LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+      --j, --e;
+    }
+
+    // Remove landing pads with no try-ranges.
+    if (LandingPads[i].BeginLabels.empty()) {
+      LandingPads.erase(LandingPads.begin() + i);
+      continue;
+    }
+
+    // If there is no landing pad, ensure that the list of typeids is empty.
+    // If the only typeid is a cleanup, this is the same as having no typeids.
+    if (!LandingPad.LandingPadBlock ||
+        (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
+      LandingPad.TypeIds.clear();
+    ++i;
+  }
+}
+
+/// getTypeIDFor - Return the type id for the specified typeinfo.  This is
+/// function wide.
+unsigned MachineModuleInfo::getTypeIDFor(const GlobalVariable *TI) {
+  for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
+    if (TypeInfos[i] == TI) return i + 1;
+
+  TypeInfos.push_back(TI);
+  return TypeInfos.size();
+}
+
+/// getFilterIDFor - Return the filter id for the specified typeinfos.  This is
+/// function wide.
+int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) {
+  // If the new filter coincides with the tail of an existing filter, then
+  // re-use the existing filter.  Folding filters more than this requires
+  // re-ordering filters and/or their elements - probably not worth it.
+  for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
+       E = FilterEnds.end(); I != E; ++I) {
+    unsigned i = *I, j = TyIds.size();
+
+    while (i && j)
+      if (FilterIds[--i] != TyIds[--j])
+        goto try_next;
+
+    if (!j)
+      // The new filter coincides with range [i, end) of the existing filter.
+      return -(1 + i);
+
+try_next:;
+  }
+
+  // Add the new filter.
+  int FilterID = -(1 + FilterIds.size());
+  FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
+  for (unsigned I = 0, N = TyIds.size(); I != N; ++I)
+    FilterIds.push_back(TyIds[I]);
+  FilterEnds.push_back(FilterIds.size());
+  FilterIds.push_back(0); // terminator
+  return FilterID;
+}
+
+/// getPersonality - Return the personality function for the current function.
+const Function *MachineModuleInfo::getPersonality() const {
+  // FIXME: Until PR1414 will be fixed, we're using 1 personality function per
+  // function
+  return !LandingPads.empty() ? LandingPads[0].Personality : NULL;
+}
+
+/// getPersonalityIndex - Return unique index for current personality
+/// function. NULL/first personality function should always get zero index.
+unsigned MachineModuleInfo::getPersonalityIndex() const {
+  const Function* Personality = NULL;
+
+  // Scan landing pads. If there is at least one non-NULL personality - use it.
+  for (unsigned i = 0; i != LandingPads.size(); ++i)
+    if (LandingPads[i].Personality) {
+      Personality = LandingPads[i].Personality;
+      break;
+    }
+
+  for (unsigned i = 0; i < Personalities.size(); ++i) {
+    if (Personalities[i] == Personality)
+      return i;
+  }
+
+  // This will happen if the current personality function is
+  // in the zero index.
+  return 0;
+}
diff --git a/final/lib/CodeGen/MachineModuleInfoImpls.cpp b/final/lib/CodeGen/MachineModuleInfoImpls.cpp
new file mode 100644
index 00000000000..5ab56c09f5f
--- /dev/null
+++ b/final/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -0,0 +1,45 @@
+//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements object-file format specific implementations of
+// MachineModuleInfoImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCSymbol.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MachineModuleInfoMachO
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+void MachineModuleInfoMachO::Anchor() {}
+void MachineModuleInfoELF::Anchor() {}
+
+static int SortSymbolPair(const void *LHS, const void *RHS) {
+  typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy;
+  const MCSymbol *LHSS = ((const PairTy *)LHS)->first;
+  const MCSymbol *RHSS = ((const PairTy *)RHS)->first;
+  return LHSS->getName().compare(RHSS->getName());
+}
+
+/// GetSortedStubs - Return the entries from a DenseMap in a deterministic
+/// sorted orer.
+MachineModuleInfoImpl::SymbolListTy
+MachineModuleInfoImpl::GetSortedStubs(const DenseMap<MCSymbol*,
+                                      MachineModuleInfoImpl::StubValueTy>&Map) {
+  MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end());
+
+  if (!List.empty())
+    qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair);
+  return List;
+}
+
diff --git a/final/lib/CodeGen/MachinePassRegistry.cpp b/final/lib/CodeGen/MachinePassRegistry.cpp
new file mode 100644
index 00000000000..9f4ef128780
--- /dev/null
+++ b/final/lib/CodeGen/MachinePassRegistry.cpp
@@ -0,0 +1,41 @@
+//===-- CodeGen/MachineInstr.cpp ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the machine function pass registry for register allocators
+// and instruction schedulers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachinePassRegistry.h"
+
+using namespace llvm;
+
+
+/// Add - Adds a function pass to the registration list.
+///
+void MachinePassRegistry::Add(MachinePassRegistryNode *Node) {
+  Node->setNext(List);
+  List = Node;
+  if (Listener) Listener->NotifyAdd(Node->getName(),
+                                    Node->getCtor(),
+                                    Node->getDescription());
+}
+
+
+/// Remove - Removes a function pass from the registration list.
+///
+void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) {
+  for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) {
+    if (*I == Node) {
+      if (Listener) Listener->NotifyRemove(Node->getName());
+      *I = (*I)->getNext();
+      break;
+    }
+  }
+}
diff --git a/final/lib/CodeGen/MachineRegisterInfo.cpp b/final/lib/CodeGen/MachineRegisterInfo.cpp
new file mode 100644
index 00000000000..7244d5f03a9
--- /dev/null
+++ b/final/lib/CodeGen/MachineRegisterInfo.cpp
@@ -0,0 +1,241 @@
+//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MachineRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
+  VRegInfo.reserve(256);
+  RegAllocHints.reserve(256);
+  RegClass2VRegMap = new std::vector<unsigned>[TRI.getNumRegClasses()];
+  UsedPhysRegs.resize(TRI.getNumRegs());
+  
+  // Create the physreg use/def lists.
+  PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()];
+  memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs());
+}
+
+MachineRegisterInfo::~MachineRegisterInfo() {
+#ifndef NDEBUG
+  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+    assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
+           "Vreg use list non-empty still?");
+  for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
+    assert(!PhysRegUseDefLists[i] &&
+           "PhysRegUseDefLists has entries after all instructions are deleted");
+#endif
+  delete [] PhysRegUseDefLists;
+  delete [] RegClass2VRegMap;
+}
+
+/// setRegClass - Set the register class of the specified virtual register.
+///
+void
+MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
+  const TargetRegisterClass *OldRC = VRegInfo[Reg].first;
+  VRegInfo[Reg].first = RC;
+
+  // Remove from old register class's vregs list. This may be slow but
+  // fortunately this operation is rarely needed.
+  std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()];
+  std::vector<unsigned>::iterator I =
+    std::find(VRegs.begin(), VRegs.end(), Reg);
+  VRegs.erase(I);
+
+  // Add to new register class's vregs list.
+  RegClass2VRegMap[RC->getID()].push_back(Reg);
+}
+
+const TargetRegisterClass *
+MachineRegisterInfo::constrainRegClass(unsigned Reg,
+                                       const TargetRegisterClass *RC) {
+  const TargetRegisterClass *OldRC = getRegClass(Reg);
+  if (OldRC == RC)
+    return RC;
+  const TargetRegisterClass *NewRC = getCommonSubClass(OldRC, RC);
+  if (!NewRC)
+    return 0;
+  if (NewRC != OldRC)
+    setRegClass(Reg, NewRC);
+  return NewRC;
+}
+
+/// createVirtualRegister - Create and return a new virtual register in the
+/// function with the specified register class.
+///
+unsigned
+MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
+  assert(RegClass && "Cannot create register without RegClass!");
+
+  // New virtual register number.
+  unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+
+  // Add a reg, but keep track of whether the vector reallocated or not.
+  const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0);
+  void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg];
+  VRegInfo.grow(Reg);
+  VRegInfo[Reg].first = RegClass;
+  RegAllocHints.grow(Reg);
+
+  if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase)
+    // The vector reallocated, handle this now.
+    HandleVRegListReallocation();
+  RegClass2VRegMap[RegClass->getID()].push_back(Reg);
+  return Reg;
+}
+
+/// HandleVRegListReallocation - We just added a virtual register to the
+/// VRegInfo info list and it reallocated.  Update the use/def lists info
+/// pointers.
+void MachineRegisterInfo::HandleVRegListReallocation() {
+  // The back pointers for the vreg lists point into the previous vector.
+  // Update them to point to their correct slots.
+  for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    MachineOperand *List = VRegInfo[Reg].second;
+    if (!List) continue;
+    // Update the back-pointer to be accurate once more.
+    List->Contents.Reg.Prev = &VRegInfo[Reg].second;
+  }
+}
+
+/// replaceRegWith - Replace all instances of FromReg with ToReg in the
+/// machine function.  This is like llvm-level X->replaceAllUsesWith(Y),
+/// except that it also changes any definitions of the register as well.
+void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
+  assert(FromReg != ToReg && "Cannot replace a reg with itself");
+
+  // TODO: This could be more efficient by bulk changing the operands.
+  for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
+    MachineOperand &O = I.getOperand();
+    ++I;
+    O.setReg(ToReg);
+  }
+}
+
+
+/// getVRegDef - Return the machine instr that defines the specified virtual
+/// register or null if none is found.  This assumes that the code is in SSA
+/// form, so there should only be one definition.
+MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
+  // Since we are in SSA form, we can use the first definition.
+  if (!def_empty(Reg))
+    return &*def_begin(Reg);
+  return 0;
+}
+
+bool MachineRegisterInfo::hasOneUse(unsigned RegNo) const {
+  use_iterator UI = use_begin(RegNo);
+  if (UI == use_end())
+    return false;
+  return ++UI == use_end();
+}
+
+bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
+  use_nodbg_iterator UI = use_nodbg_begin(RegNo);
+  if (UI == use_nodbg_end())
+    return false;
+  return ++UI == use_nodbg_end();
+}
+
+/// clearKillFlags - Iterate over all the uses of the given register and
+/// clear the kill flag from the MachineOperand. This function is used by
+/// optimization passes which extend register lifetimes and need only
+/// preserve conservative kill flag information.
+void MachineRegisterInfo::clearKillFlags(unsigned Reg) const {
+  for (use_iterator UI = use_begin(Reg), UE = use_end(); UI != UE; ++UI)
+    UI.getOperand().setIsKill(false);
+}
+
+bool MachineRegisterInfo::isLiveIn(unsigned Reg) const {
+  for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+    if (I->first == Reg || I->second == Reg)
+      return true;
+  return false;
+}
+
+bool MachineRegisterInfo::isLiveOut(unsigned Reg) const {
+  for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I)
+    if (*I == Reg)
+      return true;
+  return false;
+}
+
+/// getLiveInPhysReg - If VReg is a live-in virtual register, return the
+/// corresponding live-in physical register.
+unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const {
+  for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+    if (I->second == VReg)
+      return I->first;
+  return 0;
+}
+
+/// getLiveInVirtReg - If PReg is a live-in physical register, return the
+/// corresponding live-in physical register.
+unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const {
+  for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+    if (I->first == PReg)
+      return I->second;
+  return 0;
+}
+
+/// EmitLiveInCopies - Emit copies to initialize livein virtual registers
+/// into the given entry block.
+void
+MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
+                                      const TargetRegisterInfo &TRI,
+                                      const TargetInstrInfo &TII) {
+  // Emit the copies into the top of the block.
+  for (unsigned i = 0, e = LiveIns.size(); i != e; ++i)
+    if (LiveIns[i].second) {
+      if (use_empty(LiveIns[i].second)) {
+        // The livein has no uses. Drop it.
+        //
+        // It would be preferable to have isel avoid creating live-in
+        // records for unused arguments in the first place, but it's
+        // complicated by the debug info code for arguments.
+        LiveIns.erase(LiveIns.begin() + i);
+        --i; --e;
+      } else {
+        // Emit a copy.
+        BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(),
+                TII.get(TargetOpcode::COPY), LiveIns[i].second)
+          .addReg(LiveIns[i].first);
+
+        // Add the register to the entry block live-in set.
+        EntryMBB->addLiveIn(LiveIns[i].first);
+      }
+    } else {
+      // Add the register to the entry block live-in set.
+      EntryMBB->addLiveIn(LiveIns[i].first);
+    }
+}
+
+void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) {
+  for (int i = UsedPhysRegs.find_first(); i >= 0;
+       i = UsedPhysRegs.find_next(i))
+         for (const unsigned *SS = TRI.getSubRegisters(i);
+              unsigned SubReg = *SS; ++SS)
+           if (SubReg > unsigned(i))
+             UsedPhysRegs.set(SubReg);
+}
+
+#ifndef NDEBUG
+void MachineRegisterInfo::dumpUses(unsigned Reg) const {
+  for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
+    I.getOperand().getParent()->dump();
+}
+#endif
diff --git a/final/lib/CodeGen/MachineSSAUpdater.cpp b/final/lib/CodeGen/MachineSSAUpdater.cpp
new file mode 100644
index 00000000000..84d6df25397
--- /dev/null
+++ b/final/lib/CodeGen/MachineSSAUpdater.cpp
@@ -0,0 +1,372 @@
+//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MachineSSAUpdater class. It's based on SSAUpdater
+// class in lib/Transforms/Utils.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+using namespace llvm;
+
+typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
+static AvailableValsTy &getAvailableVals(void *AV) {
+  return *static_cast<AvailableValsTy*>(AV);
+}
+
+MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
+                                     SmallVectorImpl<MachineInstr*> *NewPHI)
+  : AV(0), InsertedPHIs(NewPHI) {
+  TII = MF.getTarget().getInstrInfo();
+  MRI = &MF.getRegInfo();
+}
+
+MachineSSAUpdater::~MachineSSAUpdater() {
+  delete &getAvailableVals(AV);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates.  ProtoValue is the value used to name PHI nodes.
+void MachineSSAUpdater::Initialize(unsigned V) {
+  if (AV == 0)
+    AV = new AvailableValsTy();
+  else
+    getAvailableVals(AV).clear();
+
+  VR = V;
+  VRC = MRI->getRegClass(VR);
+}
+
+/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for
+/// the specified block.
+bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const {
+  return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, unsigned V) {
+  getAvailableVals(AV)[BB] = V;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) {
+  return GetValueAtEndOfBlockInternal(BB);
+}
+
+static
+unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
+          SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> &PredValues) {
+  if (BB->empty())
+    return 0;
+
+  MachineBasicBlock::iterator I = BB->front();
+  if (!I->isPHI())
+    return 0;
+
+  AvailableValsTy AVals;
+  for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+    AVals[PredValues[i].first] = PredValues[i].second;
+  while (I != BB->end() && I->isPHI()) {
+    bool Same = true;
+    for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) {
+      unsigned SrcReg = I->getOperand(i).getReg();
+      MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB();
+      if (AVals[SrcBB] != SrcReg) {
+        Same = false;
+        break;
+      }
+    }
+    if (Same)
+      return I->getOperand(0).getReg();
+    ++I;
+  }
+  return 0;
+}
+
+/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define
+/// a value of the given register class at the start of the specified basic
+/// block. It returns the virtual register defined by the instruction.
+static
+MachineInstr *InsertNewDef(unsigned Opcode,
+                           MachineBasicBlock *BB, MachineBasicBlock::iterator I,
+                           const TargetRegisterClass *RC,
+                           MachineRegisterInfo *MRI,
+                           const TargetInstrInfo *TII) {
+  unsigned NewVR = MRI->createVirtualRegister(RC);
+  return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR);
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB.  Consider code like this:
+///
+///      X1 = ...
+///   SomeBB:
+///      use(X)
+///      X2 = ...
+///      br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks.  However, the use of X happens in the *middle* of
+/// a block.  Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
+  // If there is no definition of the renamed variable in this block, just use
+  // GetValueAtEndOfBlock to do our work.
+  if (!HasValueForBlock(BB))
+    return GetValueAtEndOfBlockInternal(BB);
+
+  // If there are no predecessors, just return undef.
+  if (BB->pred_empty()) {
+    // Insert an implicit_def to represent an undef value.
+    MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+                                        BB, BB->getFirstTerminator(),
+                                        VRC, MRI, TII);
+    return NewDef->getOperand(0).getReg();
+  }
+
+  // Otherwise, we have the hard case.  Get the live-in values for each
+  // predecessor.
+  SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> PredValues;
+  unsigned SingularValue = 0;
+
+  bool isFirstPred = true;
+  for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+         E = BB->pred_end(); PI != E; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+    unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB);
+    PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+    // Compute SingularValue.
+    if (isFirstPred) {
+      SingularValue = PredVal;
+      isFirstPred = false;
+    } else if (PredVal != SingularValue)
+      SingularValue = 0;
+  }
+
+  // Otherwise, if all the merged values are the same, just use it.
+  if (SingularValue != 0)
+    return SingularValue;
+
+  // If an identical PHI is already in BB, just reuse it.
+  unsigned DupPHI = LookForIdenticalPHI(BB, PredValues);
+  if (DupPHI)
+    return DupPHI;
+
+  // Otherwise, we do need a PHI: insert one now.
+  MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+  MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB,
+                                           Loc, VRC, MRI, TII);
+
+  // Fill in all the predecessors of the PHI.
+  MachineInstrBuilder MIB(InsertedPHI);
+  for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+    MIB.addReg(PredValues[i].second).addMBB(PredValues[i].first);
+
+  // See if the PHI node can be merged to a single value.  This can happen in
+  // loop cases when we get a PHI of itself and one other value.
+  if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
+    InsertedPHI->eraseFromParent();
+    return ConstVal;
+  }
+
+  // If the client wants to know about all new instructions, tell it.
+  if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+  DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+  return InsertedPHI->getOperand(0).getReg();
+}
+
+static
+MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI,
+                                         MachineOperand *U) {
+  for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+    if (&MI->getOperand(i) == U)
+      return MI->getOperand(i+1).getMBB();
+  }
+
+  llvm_unreachable("MachineOperand::getParent() failure?");
+  return 0;
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void MachineSSAUpdater::RewriteUse(MachineOperand &U) {
+  MachineInstr *UseMI = U.getParent();
+  unsigned NewVR = 0;
+  if (UseMI->isPHI()) {
+    MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U);
+    NewVR = GetValueAtEndOfBlockInternal(SourceBB);
+  } else {
+    NewVR = GetValueInMiddleOfBlock(UseMI->getParent());
+  }
+
+  U.setReg(NewVR);
+}
+
+void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) {
+  MRI->replaceRegWith(OldReg, NewReg);
+
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  for (DenseMap<MachineBasicBlock*, unsigned>::iterator
+         I = AvailableVals.begin(), E = AvailableVals.end(); I != E; ++I)
+    if (I->second == OldReg)
+      I->second = NewReg;
+}
+
+/// MachinePHIiter - Iterator for PHI operands.  This is used for the
+/// PHI_iterator in the SSAUpdaterImpl template.
+namespace {
+  class MachinePHIiter {
+  private:
+    MachineInstr *PHI;
+    unsigned idx;
+ 
+  public:
+    explicit MachinePHIiter(MachineInstr *P) // begin iterator
+      : PHI(P), idx(1) {}
+    MachinePHIiter(MachineInstr *P, bool) // end iterator
+      : PHI(P), idx(PHI->getNumOperands()) {}
+
+    MachinePHIiter &operator++() { idx += 2; return *this; } 
+    bool operator==(const MachinePHIiter& x) const { return idx == x.idx; }
+    bool operator!=(const MachinePHIiter& x) const { return !operator==(x); }
+    unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); }
+    MachineBasicBlock *getIncomingBlock() {
+      return PHI->getOperand(idx+1).getMBB();
+    }
+  };
+}
+
+/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl
+/// template, specialized for MachineSSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<MachineSSAUpdater> {
+public:
+  typedef MachineBasicBlock BlkT;
+  typedef unsigned ValT;
+  typedef MachineInstr PhiT;
+
+  typedef MachineBasicBlock::succ_iterator BlkSucc_iterator;
+  static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); }
+  static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); }
+
+  typedef MachinePHIiter PHI_iterator;
+  static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+  static inline PHI_iterator PHI_end(PhiT *PHI) {
+    return PHI_iterator(PHI, true);
+  }
+
+  /// FindPredecessorBlocks - Put the predecessors of BB into the Preds
+  /// vector.
+  static void FindPredecessorBlocks(MachineBasicBlock *BB,
+                                    SmallVectorImpl<MachineBasicBlock*> *Preds){
+    for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+           E = BB->pred_end(); PI != E; ++PI)
+      Preds->push_back(*PI);
+  }
+
+  /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register.
+  /// Add it into the specified block and return the register.
+  static unsigned GetUndefVal(MachineBasicBlock *BB,
+                              MachineSSAUpdater *Updater) {
+    // Insert an implicit_def to represent an undef value.
+    MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
+                                        BB, BB->getFirstTerminator(),
+                                        Updater->VRC, Updater->MRI,
+                                        Updater->TII);
+    return NewDef->getOperand(0).getReg();
+  }
+
+  /// CreateEmptyPHI - Create a PHI instruction that defines a new register.
+  /// Add it into the specified block and return the register.
+  static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
+                                 MachineSSAUpdater *Updater) {
+    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+    MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
+                                     Updater->VRC, Updater->MRI,
+                                     Updater->TII);
+    return PHI->getOperand(0).getReg();
+  }
+
+  /// AddPHIOperand - Add the specified value as an operand of the PHI for
+  /// the specified predecessor block.
+  static void AddPHIOperand(MachineInstr *PHI, unsigned Val,
+                            MachineBasicBlock *Pred) {
+    PHI->addOperand(MachineOperand::CreateReg(Val, false));
+    PHI->addOperand(MachineOperand::CreateMBB(Pred));
+  }
+
+  /// InstrIsPHI - Check if an instruction is a PHI.
+  ///
+  static MachineInstr *InstrIsPHI(MachineInstr *I) {
+    if (I && I->isPHI())
+      return I;
+    return 0;
+  }
+
+  /// ValueIsPHI - Check if the instruction that defines the specified register
+  /// is a PHI instruction.
+  static MachineInstr *ValueIsPHI(unsigned Val, MachineSSAUpdater *Updater) {
+    return InstrIsPHI(Updater->MRI->getVRegDef(Val));
+  }
+
+  /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+  /// operands, i.e., it was just added.
+  static MachineInstr *ValueIsNewPHI(unsigned Val, MachineSSAUpdater *Updater) {
+    MachineInstr *PHI = ValueIsPHI(Val, Updater);
+    if (PHI && PHI->getNumOperands() <= 1)
+      return PHI;
+    return 0;
+  }
+
+  /// GetPHIValue - For the specified PHI instruction, return the register
+  /// that it defines.
+  static unsigned GetPHIValue(MachineInstr *PHI) {
+    return PHI->getOperand(0).getReg();
+  }
+};
+
+} // End llvm namespace
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it.  If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  if (unsigned V = AvailableVals[BB])
+    return V;
+
+  SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+  return Impl.GetValue(BB);
+}
diff --git a/final/lib/CodeGen/MachineSink.cpp b/final/lib/CodeGen/MachineSink.cpp
new file mode 100644
index 00000000000..8a93a24287b
--- /dev/null
+++ b/final/lib/CodeGen/MachineSink.cpp
@@ -0,0 +1,607 @@
+//===-- MachineSink.cpp - Sinking for machine instructions ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions into successor blocks when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+// This pass is not intended to be a replacement or a complete alternative
+// for an LLVM-IR-level sinking pass. It is only designed to sink simple
+// constructs that are not exposed before lowering and instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-sink"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool> 
+SplitEdges("machine-sink-split",
+           cl::desc("Split critical edges during machine sinking"),
+           cl::init(true), cl::Hidden);
+
+STATISTIC(NumSunk,      "Number of machine instructions sunk");
+STATISTIC(NumSplit,     "Number of critical edges split");
+STATISTIC(NumCoalesces, "Number of copies coalesced");
+
+namespace {
+  class MachineSinking : public MachineFunctionPass {
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineRegisterInfo  *MRI;  // Machine register information
+    MachineDominatorTree *DT;   // Machine dominator tree
+    MachineLoopInfo *LI;
+    AliasAnalysis *AA;
+    BitVector AllocatableSet;   // Which physregs are allocatable?
+
+    // Remember which edges have been considered for breaking.
+    SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
+    CEBCandidates;
+
+  public:
+    static char ID; // Pass identification
+    MachineSinking() : MachineFunctionPass(ID) {
+      initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineDominatorTree>();
+      AU.addPreserved<MachineLoopInfo>();
+    }
+
+    virtual void releaseMemory() {
+      CEBCandidates.clear();
+    }
+
+  private:
+    bool ProcessBlock(MachineBasicBlock &MBB);
+    bool isWorthBreakingCriticalEdge(MachineInstr *MI,
+                                     MachineBasicBlock *From,
+                                     MachineBasicBlock *To);
+    MachineBasicBlock *SplitCriticalEdge(MachineInstr *MI,
+                                         MachineBasicBlock *From,
+                                         MachineBasicBlock *To,
+                                         bool BreakPHIEdge);
+    bool SinkInstruction(MachineInstr *MI, bool &SawStore);
+    bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
+                                 MachineBasicBlock *DefMBB,
+                                 bool &BreakPHIEdge, bool &LocalUse) const;
+    bool PerformTrivialForwardCoalescing(MachineInstr *MI,
+                                         MachineBasicBlock *MBB);
+  };
+} // end anonymous namespace
+
+char MachineSinking::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
+                "Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineSinking, "machine-sink",
+                "Machine code sinking", false, false)
+
+FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
+
+bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
+                                                     MachineBasicBlock *MBB) {
+  if (!MI->isCopy())
+    return false;
+
+  unsigned SrcReg = MI->getOperand(1).getReg();
+  unsigned DstReg = MI->getOperand(0).getReg();
+  if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+      !TargetRegisterInfo::isVirtualRegister(DstReg) ||
+      !MRI->hasOneNonDBGUse(SrcReg))
+    return false;
+
+  const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
+  const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
+  if (SRC != DRC)
+    return false;
+
+  MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+  if (DefMI->isCopyLike())
+    return false;
+  DEBUG(dbgs() << "Coalescing: " << *DefMI);
+  DEBUG(dbgs() << "*** to: " << *MI);
+  MRI->replaceRegWith(DstReg, SrcReg);
+  MI->eraseFromParent();
+  ++NumCoalesces;
+  return true;
+}
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified register
+/// occur in blocks dominated by the specified block. If any use is in the
+/// definition block, then return false since it is never legal to move def
+/// after uses.
+bool
+MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
+                                        MachineBasicBlock *MBB,
+                                        MachineBasicBlock *DefMBB,
+                                        bool &BreakPHIEdge,
+                                        bool &LocalUse) const {
+  assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+         "Only makes sense for vregs");
+
+  if (MRI->use_nodbg_empty(Reg))
+    return true;
+
+  // Ignoring debug uses is necessary so debug info doesn't affect the code.
+  // This may leave a referencing dbg_value in the original block, before
+  // the definition of the vreg.  Dwarf generator handles this although the
+  // user might not get the right info at runtime.
+
+  // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
+  // into and they are all PHI nodes. In this case, machine-sink must break
+  // the critical edge first. e.g.
+  //
+  // BB#1: derived from LLVM BB %bb4.preheader
+  //   Predecessors according to CFG: BB#0
+  //     ...
+  //     %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead>
+  //     ...
+  //     JE_4 <BB#37>, %EFLAGS<imp-use>
+  //   Successors according to CFG: BB#37 BB#2
+  //
+  // BB#2: derived from LLVM BB %bb.nph
+  //   Predecessors according to CFG: BB#0 BB#1
+  //     %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1>
+  BreakPHIEdge = true;
+  for (MachineRegisterInfo::use_nodbg_iterator
+         I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+       I != E; ++I) {
+    MachineInstr *UseInst = &*I;
+    MachineBasicBlock *UseBlock = UseInst->getParent();
+    if (!(UseBlock == MBB && UseInst->isPHI() &&
+          UseInst->getOperand(I.getOperandNo()+1).getMBB() == DefMBB)) {
+      BreakPHIEdge = false;
+      break;
+    }
+  }
+  if (BreakPHIEdge)
+    return true;
+
+  for (MachineRegisterInfo::use_nodbg_iterator
+         I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+       I != E; ++I) {
+    // Determine the block of the use.
+    MachineInstr *UseInst = &*I;
+    MachineBasicBlock *UseBlock = UseInst->getParent();
+    if (UseInst->isPHI()) {
+      // PHI nodes use the operand in the predecessor block, not the block with
+      // the PHI.
+      UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB();
+    } else if (UseBlock == DefMBB) {
+      LocalUse = true;
+      return false;
+    }
+
+    // Check that it dominates.
+    if (!DT->dominates(MBB, UseBlock))
+      return false;
+  }
+
+  return true;
+}
+
+bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "******** Machine Sinking ********\n");
+
+  const TargetMachine &TM = MF.getTarget();
+  TII = TM.getInstrInfo();
+  TRI = TM.getRegisterInfo();
+  MRI = &MF.getRegInfo();
+  DT = &getAnalysis<MachineDominatorTree>();
+  LI = &getAnalysis<MachineLoopInfo>();
+  AA = &getAnalysis<AliasAnalysis>();
+  AllocatableSet = TRI->getAllocatableSet(MF);
+
+  bool EverMadeChange = false;
+
+  while (1) {
+    bool MadeChange = false;
+
+    // Process all basic blocks.
+    CEBCandidates.clear();
+    for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+         I != E; ++I)
+      MadeChange |= ProcessBlock(*I);
+
+    // If this iteration over the code changed anything, keep iterating.
+    if (!MadeChange) break;
+    EverMadeChange = true;
+  }
+  return EverMadeChange;
+}
+
+bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
+  // Can't sink anything out of a block that has less than two successors.
+  if (MBB.succ_size() <= 1 || MBB.empty()) return false;
+
+  // Don't bother sinking code out of unreachable blocks. In addition to being
+  // unprofitable, it can also lead to infinite looping, because in an
+  // unreachable loop there may be nowhere to stop.
+  if (!DT->isReachableFromEntry(&MBB)) return false;
+
+  bool MadeChange = false;
+
+  // Walk the basic block bottom-up.  Remember if we saw a store.
+  MachineBasicBlock::iterator I = MBB.end();
+  --I;
+  bool ProcessedBegin, SawStore = false;
+  do {
+    MachineInstr *MI = I;  // The instruction to sink.
+
+    // Predecrement I (if it's not begin) so that it isn't invalidated by
+    // sinking.
+    ProcessedBegin = I == MBB.begin();
+    if (!ProcessedBegin)
+      --I;
+
+    if (MI->isDebugValue())
+      continue;
+
+    if (PerformTrivialForwardCoalescing(MI, &MBB))
+      continue;
+
+    if (SinkInstruction(MI, SawStore))
+      ++NumSunk, MadeChange = true;
+
+    // If we just processed the first instruction in the block, we're done.
+  } while (!ProcessedBegin);
+
+  return MadeChange;
+}
+
+bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
+                                                 MachineBasicBlock *From,
+                                                 MachineBasicBlock *To) {
+  // FIXME: Need much better heuristics.
+
+  // If the pass has already considered breaking this edge (during this pass
+  // through the function), then let's go ahead and break it. This means
+  // sinking multiple "cheap" instructions into the same block.
+  if (!CEBCandidates.insert(std::make_pair(From, To)))
+    return true;
+
+  if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove())
+    return true;
+
+  // MI is cheap, we probably don't want to break the critical edge for it.
+  // However, if this would allow some definitions of its source operands
+  // to be sunk then it's probably worth it.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+    if (MRI->hasOneNonDBGUse(Reg))
+      return true;
+  }
+
+  return false;
+}
+
+MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI,
+                                                     MachineBasicBlock *FromBB,
+                                                     MachineBasicBlock *ToBB,
+                                                     bool BreakPHIEdge) {
+  if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
+    return 0;
+
+  // Avoid breaking back edge. From == To means backedge for single BB loop.
+  if (!SplitEdges || FromBB == ToBB)
+    return 0;
+
+  // Check for backedges of more "complex" loops.
+  if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
+      LI->isLoopHeader(ToBB))
+    return 0;
+
+  // It's not always legal to break critical edges and sink the computation
+  // to the edge.
+  //
+  // BB#1:
+  // v1024
+  // Beq BB#3
+  // <fallthrough>
+  // BB#2:
+  // ... no uses of v1024
+  // <fallthrough>
+  // BB#3:
+  // ...
+  //       = v1024
+  //
+  // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
+  //
+  // BB#1:
+  // ...
+  // Bne BB#2
+  // BB#4:
+  // v1024 =
+  // B BB#3
+  // BB#2:
+  // ... no uses of v1024
+  // <fallthrough>
+  // BB#3:
+  // ...
+  //       = v1024
+  //
+  // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
+  // flow. We need to ensure the new basic block where the computation is
+  // sunk to dominates all the uses.
+  // It's only legal to break critical edge and sink the computation to the
+  // new block if all the predecessors of "To", except for "From", are
+  // not dominated by "From". Given SSA property, this means these
+  // predecessors are dominated by "To".
+  //
+  // There is no need to do this check if all the uses are PHI nodes. PHI
+  // sources are only defined on the specific predecessor edges.
+  if (!BreakPHIEdge) {
+    for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
+           E = ToBB->pred_end(); PI != E; ++PI) {
+      if (*PI == FromBB)
+        continue;
+      if (!DT->dominates(ToBB, *PI))
+        return 0;
+    }
+  }
+
+  return FromBB->SplitCriticalEdge(ToBB, this);
+}
+
+static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
+  return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence();
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+  // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
+  // be close to the source to make it easier to coalesce.
+  if (AvoidsSinking(MI, MRI))
+    return false;
+
+  // Check if it's safe to move the instruction.
+  if (!MI->isSafeToMove(TII, AA, SawStore))
+    return false;
+
+  // FIXME: This should include support for sinking instructions within the
+  // block they are currently in to shorten the live ranges.  We often get
+  // instructions sunk into the top of a large block, but it would be better to
+  // also sink them down before their first use in the block.  This xform has to
+  // be careful not to *increase* register pressure though, e.g. sinking
+  // "x = y + z" down if it kills y and z would increase the live ranges of y
+  // and z and only shrink the live range of x.
+
+  // Loop over all the operands of the specified instruction.  If there is
+  // anything we can't handle, bail out.
+  MachineBasicBlock *ParentBlock = MI->getParent();
+
+  // SuccToSinkTo - This is the successor to sink this instruction to, once we
+  // decide.
+  MachineBasicBlock *SuccToSinkTo = 0;
+
+  bool BreakPHIEdge = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;  // Ignore non-register operands.
+
+    unsigned Reg = MO.getReg();
+    if (Reg == 0) continue;
+
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (MO.isUse()) {
+        // If the physreg has no defs anywhere, it's just an ambient register
+        // and we can freely move its uses. Alternatively, if it's allocatable,
+        // it could get allocated to something with a def during allocation.
+        if (!MRI->def_empty(Reg))
+          return false;
+
+        if (AllocatableSet.test(Reg))
+          return false;
+
+        // Check for a def among the register's aliases too.
+        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          if (!MRI->def_empty(AliasReg))
+            return false;
+
+          if (AllocatableSet.test(AliasReg))
+            return false;
+        }
+      } else if (!MO.isDead()) {
+        // A def that isn't dead. We can't move it.
+        return false;
+      }
+    } else {
+      // Virtual register uses are always safe to sink.
+      if (MO.isUse()) continue;
+
+      // If it's not safe to move defs of the register class, then abort.
+      if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
+        return false;
+
+      // FIXME: This picks a successor to sink into based on having one
+      // successor that dominates all the uses.  However, there are cases where
+      // sinking can happen but where the sink point isn't a successor.  For
+      // example:
+      //
+      //   x = computation
+      //   if () {} else {}
+      //   use x
+      //
+      // the instruction could be sunk over the whole diamond for the
+      // if/then/else (or loop, etc), allowing it to be sunk into other blocks
+      // after that.
+
+      // Virtual register defs can only be sunk if all their uses are in blocks
+      // dominated by one of the successors.
+      if (SuccToSinkTo) {
+        // If a previous operand picked a block to sink to, then this operand
+        // must be sinkable to the same block.
+        bool LocalUse = false;
+        if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock,
+                                     BreakPHIEdge, LocalUse))
+          return false;
+
+        continue;
+      }
+
+      // Otherwise, we should look at all the successors and decide which one
+      // we should sink to.
+      for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
+           E = ParentBlock->succ_end(); SI != E; ++SI) {
+        bool LocalUse = false;
+        if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock,
+                                    BreakPHIEdge, LocalUse)) {
+          SuccToSinkTo = *SI;
+          break;
+        }
+        if (LocalUse)
+          // Def is used locally, it's never safe to move this def.
+          return false;
+      }
+
+      // If we couldn't find a block to sink to, ignore this instruction.
+      if (SuccToSinkTo == 0)
+        return false;
+    }
+  }
+
+  // If there are no outputs, it must have side-effects.
+  if (SuccToSinkTo == 0)
+    return false;
+
+  // It's not safe to sink instructions to EH landing pad. Control flow into
+  // landing pad is implicitly defined.
+  if (SuccToSinkTo->isLandingPad())
+    return false;
+
+  // It is not possible to sink an instruction into its own block.  This can
+  // happen with loops.
+  if (MI->getParent() == SuccToSinkTo)
+    return false;
+
+  // If the instruction to move defines a dead physical register which is live
+  // when leaving the basic block, don't move it because it could turn into a
+  // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
+  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+    const MachineOperand &MO = MI->getOperand(I);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+    if (SuccToSinkTo->isLiveIn(Reg))
+      return false;
+  }
+
+  DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo);
+
+  // If the block has multiple predecessors, this would introduce computation on
+  // a path that it doesn't already exist.  We could split the critical edge,
+  // but for now we just punt.
+  if (SuccToSinkTo->pred_size() > 1) {
+    // We cannot sink a load across a critical edge - there may be stores in
+    // other code paths.
+    bool TryBreak = false;
+    bool store = true;
+    if (!MI->isSafeToMove(TII, AA, store)) {
+      DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n");
+      TryBreak = true;
+    }
+
+    // We don't want to sink across a critical edge if we don't dominate the
+    // successor. We could be introducing calculations to new code paths.
+    if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** NOTE: Critical edge found\n");
+      TryBreak = true;
+    }
+
+    // Don't sink instructions into a loop.
+    if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** NOTE: Loop header found\n");
+      TryBreak = true;
+    }
+
+    // Otherwise we are OK with sinking along a critical edge.
+    if (!TryBreak)
+      DEBUG(dbgs() << "Sinking along critical edge.\n");
+    else {
+      MachineBasicBlock *NewSucc =
+        SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
+      if (!NewSucc) {
+        DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+                        "break critical edge\n");
+        return false;
+      } else {
+        DEBUG(dbgs() << " *** Splitting critical edge:"
+              " BB#" << ParentBlock->getNumber()
+              << " -- BB#" << NewSucc->getNumber()
+              << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+        SuccToSinkTo = NewSucc;
+        ++NumSplit;
+        BreakPHIEdge = false;
+      }
+    }
+  }
+
+  if (BreakPHIEdge) {
+    // BreakPHIEdge is true if all the uses are in the successor MBB being
+    // sunken into and they are all PHI nodes. In this case, machine-sink must
+    // break the critical edge first.
+    MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock,
+                                                   SuccToSinkTo, BreakPHIEdge);
+    if (!NewSucc) {
+      DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+            "break critical edge\n");
+      return false;
+    }
+
+    DEBUG(dbgs() << " *** Splitting critical edge:"
+          " BB#" << ParentBlock->getNumber()
+          << " -- BB#" << NewSucc->getNumber()
+          << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+    SuccToSinkTo = NewSucc;
+    ++NumSplit;
+  }
+
+  // Determine where to insert into. Skip phi nodes.
+  MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
+  while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
+    ++InsertPos;
+
+  // Move the instruction.
+  SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
+                       ++MachineBasicBlock::iterator(MI));
+
+  // Conservatively, clear any kill flags, since it's possible that they are no
+  // longer correct.
+  MI->clearKillInfo();
+
+  return true;
+}
diff --git a/final/lib/CodeGen/MachineVerifier.cpp b/final/lib/CodeGen/MachineVerifier.cpp
new file mode 100644
index 00000000000..7351119f472
--- /dev/null
+++ b/final/lib/CodeGen/MachineVerifier.cpp
@@ -0,0 +1,1216 @@
+//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pass to verify generated machine code. The following is checked:
+//
+// Operand counts: All explicit operands must be present.
+//
+// Register classes: All physical and virtual register operands must be
+// compatible with the register class required by the instruction descriptor.
+//
+// Register live intervals: Registers must be defined only once, and must be
+// defined before use.
+//
+// The machine code verifier is enabled from LLVMTargetMachine.cpp with the
+// command-line option -verify-machineinstrs, or by defining the environment
+// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive
+// the verifier errors.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  struct MachineVerifier {
+
+    MachineVerifier(Pass *pass, const char *b) :
+      PASS(pass),
+      Banner(b),
+      OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
+      {}
+
+    bool runOnMachineFunction(MachineFunction &MF);
+
+    Pass *const PASS;
+    const char *Banner;
+    const char *const OutFileName;
+    raw_ostream *OS;
+    const MachineFunction *MF;
+    const TargetMachine *TM;
+    const TargetRegisterInfo *TRI;
+    const MachineRegisterInfo *MRI;
+
+    unsigned foundErrors;
+
+    typedef SmallVector<unsigned, 16> RegVector;
+    typedef DenseSet<unsigned> RegSet;
+    typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+
+    BitVector regsReserved;
+    RegSet regsLive;
+    RegVector regsDefined, regsDead, regsKilled;
+    RegSet regsLiveInButUnused;
+
+    SlotIndex lastIndex;
+
+    // Add Reg and any sub-registers to RV
+    void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
+      RV.push_back(Reg);
+      if (TargetRegisterInfo::isPhysicalRegister(Reg))
+        for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+          RV.push_back(*R);
+    }
+
+    struct BBInfo {
+      // Is this MBB reachable from the MF entry point?
+      bool reachable;
+
+      // Vregs that must be live in because they are used without being
+      // defined. Map value is the user.
+      RegMap vregsLiveIn;
+
+      // Regs killed in MBB. They may be defined again, and will then be in both
+      // regsKilled and regsLiveOut.
+      RegSet regsKilled;
+
+      // Regs defined in MBB and live out. Note that vregs passing through may
+      // be live out without being mentioned here.
+      RegSet regsLiveOut;
+
+      // Vregs that pass through MBB untouched. This set is disjoint from
+      // regsKilled and regsLiveOut.
+      RegSet vregsPassed;
+
+      // Vregs that must pass through MBB because they are needed by a successor
+      // block. This set is disjoint from regsLiveOut.
+      RegSet vregsRequired;
+
+      BBInfo() : reachable(false) {}
+
+      // Add register to vregsPassed if it belongs there. Return true if
+      // anything changed.
+      bool addPassed(unsigned Reg) {
+        if (!TargetRegisterInfo::isVirtualRegister(Reg))
+          return false;
+        if (regsKilled.count(Reg) || regsLiveOut.count(Reg))
+          return false;
+        return vregsPassed.insert(Reg).second;
+      }
+
+      // Same for a full set.
+      bool addPassed(const RegSet &RS) {
+        bool changed = false;
+        for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+          if (addPassed(*I))
+            changed = true;
+        return changed;
+      }
+
+      // Add register to vregsRequired if it belongs there. Return true if
+      // anything changed.
+      bool addRequired(unsigned Reg) {
+        if (!TargetRegisterInfo::isVirtualRegister(Reg))
+          return false;
+        if (regsLiveOut.count(Reg))
+          return false;
+        return vregsRequired.insert(Reg).second;
+      }
+
+      // Same for a full set.
+      bool addRequired(const RegSet &RS) {
+        bool changed = false;
+        for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I)
+          if (addRequired(*I))
+            changed = true;
+        return changed;
+      }
+
+      // Same for a full map.
+      bool addRequired(const RegMap &RM) {
+        bool changed = false;
+        for (RegMap::const_iterator I = RM.begin(), E = RM.end(); I != E; ++I)
+          if (addRequired(I->first))
+            changed = true;
+        return changed;
+      }
+
+      // Live-out registers are either in regsLiveOut or vregsPassed.
+      bool isLiveOut(unsigned Reg) const {
+        return regsLiveOut.count(Reg) || vregsPassed.count(Reg);
+      }
+    };
+
+    // Extra register info per MBB.
+    DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap;
+
+    bool isReserved(unsigned Reg) {
+      return Reg < regsReserved.size() && regsReserved.test(Reg);
+    }
+
+    // Analysis information if available
+    LiveVariables *LiveVars;
+    LiveIntervals *LiveInts;
+    LiveStacks *LiveStks;
+    SlotIndexes *Indexes;
+
+    void visitMachineFunctionBefore();
+    void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
+    void visitMachineInstrBefore(const MachineInstr *MI);
+    void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
+    void visitMachineInstrAfter(const MachineInstr *MI);
+    void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
+    void visitMachineFunctionAfter();
+
+    void report(const char *msg, const MachineFunction *MF);
+    void report(const char *msg, const MachineBasicBlock *MBB);
+    void report(const char *msg, const MachineInstr *MI);
+    void report(const char *msg, const MachineOperand *MO, unsigned MONum);
+
+    void markReachable(const MachineBasicBlock *MBB);
+    void calcRegsPassed();
+    void checkPHIOps(const MachineBasicBlock *MBB);
+
+    void calcRegsRequired();
+    void verifyLiveVariables();
+    void verifyLiveIntervals();
+  };
+
+  struct MachineVerifierPass : public MachineFunctionPass {
+    static char ID; // Pass ID, replacement for typeid
+    const char *const Banner;
+
+    MachineVerifierPass(const char *b = 0)
+      : MachineFunctionPass(ID), Banner(b) {
+        initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
+      }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) {
+      MF.verify(this, Banner);
+      return false;
+    }
+  };
+
+}
+
+char MachineVerifierPass::ID = 0;
+INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
+                "Verify generated machine code", false, false)
+
+FunctionPass *llvm::createMachineVerifierPass(const char *Banner) {
+  return new MachineVerifierPass(Banner);
+}
+
+void MachineFunction::verify(Pass *p, const char *Banner) const {
+  MachineVerifier(p, Banner)
+    .runOnMachineFunction(const_cast<MachineFunction&>(*this));
+}
+
+bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
+  raw_ostream *OutFile = 0;
+  if (OutFileName) {
+    std::string ErrorInfo;
+    OutFile = new raw_fd_ostream(OutFileName, ErrorInfo,
+                                 raw_fd_ostream::F_Append);
+    if (!ErrorInfo.empty()) {
+      errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n';
+      exit(1);
+    }
+
+    OS = OutFile;
+  } else {
+    OS = &errs();
+  }
+
+  foundErrors = 0;
+
+  this->MF = &MF;
+  TM = &MF.getTarget();
+  TRI = TM->getRegisterInfo();
+  MRI = &MF.getRegInfo();
+
+  LiveVars = NULL;
+  LiveInts = NULL;
+  LiveStks = NULL;
+  Indexes = NULL;
+  if (PASS) {
+    LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
+    // We don't want to verify LiveVariables if LiveIntervals is available.
+    if (!LiveInts)
+      LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
+    LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>();
+    Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>();
+  }
+
+  visitMachineFunctionBefore();
+  for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end();
+       MFI!=MFE; ++MFI) {
+    visitMachineBasicBlockBefore(MFI);
+    for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
+           MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+      if (MBBI->getParent() != MFI) {
+        report("Bad instruction parent pointer", MFI);
+        *OS << "Instruction: " << *MBBI;
+        continue;
+      }
+      visitMachineInstrBefore(MBBI);
+      for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
+        visitMachineOperand(&MBBI->getOperand(I), I);
+      visitMachineInstrAfter(MBBI);
+    }
+    visitMachineBasicBlockAfter(MFI);
+  }
+  visitMachineFunctionAfter();
+
+  if (OutFile)
+    delete OutFile;
+  else if (foundErrors)
+    report_fatal_error("Found "+Twine(foundErrors)+" machine code errors.");
+
+  // Clean up.
+  regsLive.clear();
+  regsDefined.clear();
+  regsDead.clear();
+  regsKilled.clear();
+  regsLiveInButUnused.clear();
+  MBBInfoMap.clear();
+
+  return false;                 // no changes
+}
+
+void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
+  assert(MF);
+  *OS << '\n';
+  if (!foundErrors++) {
+    if (Banner)
+      *OS << "# " << Banner << '\n';
+    MF->print(*OS, Indexes);
+  }
+  *OS << "*** Bad machine code: " << msg << " ***\n"
+      << "- function:    " << MF->getFunction()->getNameStr() << "\n";
+}
+
+void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
+  assert(MBB);
+  report(msg, MBB->getParent());
+  *OS << "- basic block: " << MBB->getName()
+      << " " << (void*)MBB
+      << " (BB#" << MBB->getNumber() << ")";
+  if (Indexes)
+    *OS << " [" << Indexes->getMBBStartIdx(MBB)
+        << ';' <<  Indexes->getMBBEndIdx(MBB) << ')';
+  *OS << '\n';
+}
+
+void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
+  assert(MI);
+  report(msg, MI->getParent());
+  *OS << "- instruction: ";
+  if (Indexes && Indexes->hasIndex(MI))
+    *OS << Indexes->getInstructionIndex(MI) << '\t';
+  MI->print(*OS, TM);
+}
+
+void MachineVerifier::report(const char *msg,
+                             const MachineOperand *MO, unsigned MONum) {
+  assert(MO);
+  report(msg, MO->getParent());
+  *OS << "- operand " << MONum << ":   ";
+  MO->print(*OS, TM);
+  *OS << "\n";
+}
+
+void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
+  BBInfo &MInfo = MBBInfoMap[MBB];
+  if (!MInfo.reachable) {
+    MInfo.reachable = true;
+    for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+           SuE = MBB->succ_end(); SuI != SuE; ++SuI)
+      markReachable(*SuI);
+  }
+}
+
+void MachineVerifier::visitMachineFunctionBefore() {
+  lastIndex = SlotIndex();
+  regsReserved = TRI->getReservedRegs(*MF);
+
+  // A sub-register of a reserved register is also reserved
+  for (int Reg = regsReserved.find_first(); Reg>=0;
+       Reg = regsReserved.find_next(Reg)) {
+    for (const unsigned *Sub = TRI->getSubRegisters(Reg); *Sub; ++Sub) {
+      // FIXME: This should probably be:
+      // assert(regsReserved.test(*Sub) && "Non-reserved sub-register");
+      regsReserved.set(*Sub);
+    }
+  }
+  markReachable(&MF->front());
+}
+
+// Does iterator point to a and b as the first two elements?
+static bool matchPair(MachineBasicBlock::const_succ_iterator i,
+                      const MachineBasicBlock *a, const MachineBasicBlock *b) {
+  if (*i == a)
+    return *++i == b;
+  if (*i == b)
+    return *++i == a;
+  return false;
+}
+
+void
+MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+  // Count the number of landing pad successors.
+  SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
+  for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I) {
+    if ((*I)->isLandingPad())
+      LandingPadSuccs.insert(*I);
+  }
+  if (LandingPadSuccs.size() > 1)
+    report("MBB has more than one landing pad successor", MBB);
+
+  // Call AnalyzeBranch. If it succeeds, there several more conditions to check.
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB),
+                          TBB, FBB, Cond)) {
+    // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's
+    // check whether its answers match up with reality.
+    if (!TBB && !FBB) {
+      // Block falls through to its successor.
+      MachineFunction::const_iterator MBBI = MBB;
+      ++MBBI;
+      if (MBBI == MF->end()) {
+        // It's possible that the block legitimately ends with a noreturn
+        // call or an unreachable, in which case it won't actually fall
+        // out the bottom of the function.
+      } else if (MBB->succ_size() == LandingPadSuccs.size()) {
+        // It's possible that the block legitimately ends with a noreturn
+        // call or an unreachable, in which case it won't actuall fall
+        // out of the block.
+      } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
+        report("MBB exits via unconditional fall-through but doesn't have "
+               "exactly one CFG successor!", MBB);
+      } else if (!MBB->isSuccessor(MBBI)) {
+        report("MBB exits via unconditional fall-through but its successor "
+               "differs from its CFG successor!", MBB);
+      }
+      if (!MBB->empty() && MBB->back().getDesc().isBarrier() &&
+          !TII->isPredicated(&MBB->back())) {
+        report("MBB exits via unconditional fall-through but ends with a "
+               "barrier instruction!", MBB);
+      }
+      if (!Cond.empty()) {
+        report("MBB exits via unconditional fall-through but has a condition!",
+               MBB);
+      }
+    } else if (TBB && !FBB && Cond.empty()) {
+      // Block unconditionally branches somewhere.
+      if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
+        report("MBB exits via unconditional branch but doesn't have "
+               "exactly one CFG successor!", MBB);
+      } else if (!MBB->isSuccessor(TBB)) {
+        report("MBB exits via unconditional branch but the CFG "
+               "successor doesn't match the actual successor!", MBB);
+      }
+      if (MBB->empty()) {
+        report("MBB exits via unconditional branch but doesn't contain "
+               "any instructions!", MBB);
+      } else if (!MBB->back().getDesc().isBarrier()) {
+        report("MBB exits via unconditional branch but doesn't end with a "
+               "barrier instruction!", MBB);
+      } else if (!MBB->back().getDesc().isTerminator()) {
+        report("MBB exits via unconditional branch but the branch isn't a "
+               "terminator instruction!", MBB);
+      }
+    } else if (TBB && !FBB && !Cond.empty()) {
+      // Block conditionally branches somewhere, otherwise falls through.
+      MachineFunction::const_iterator MBBI = MBB;
+      ++MBBI;
+      if (MBBI == MF->end()) {
+        report("MBB conditionally falls through out of function!", MBB);
+      } if (MBB->succ_size() != 2) {
+        report("MBB exits via conditional branch/fall-through but doesn't have "
+               "exactly two CFG successors!", MBB);
+      } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
+        report("MBB exits via conditional branch/fall-through but the CFG "
+               "successors don't match the actual successors!", MBB);
+      }
+      if (MBB->empty()) {
+        report("MBB exits via conditional branch/fall-through but doesn't "
+               "contain any instructions!", MBB);
+      } else if (MBB->back().getDesc().isBarrier()) {
+        report("MBB exits via conditional branch/fall-through but ends with a "
+               "barrier instruction!", MBB);
+      } else if (!MBB->back().getDesc().isTerminator()) {
+        report("MBB exits via conditional branch/fall-through but the branch "
+               "isn't a terminator instruction!", MBB);
+      }
+    } else if (TBB && FBB) {
+      // Block conditionally branches somewhere, otherwise branches
+      // somewhere else.
+      if (MBB->succ_size() != 2) {
+        report("MBB exits via conditional branch/branch but doesn't have "
+               "exactly two CFG successors!", MBB);
+      } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
+        report("MBB exits via conditional branch/branch but the CFG "
+               "successors don't match the actual successors!", MBB);
+      }
+      if (MBB->empty()) {
+        report("MBB exits via conditional branch/branch but doesn't "
+               "contain any instructions!", MBB);
+      } else if (!MBB->back().getDesc().isBarrier()) {
+        report("MBB exits via conditional branch/branch but doesn't end with a "
+               "barrier instruction!", MBB);
+      } else if (!MBB->back().getDesc().isTerminator()) {
+        report("MBB exits via conditional branch/branch but the branch "
+               "isn't a terminator instruction!", MBB);
+      }
+      if (Cond.empty()) {
+        report("MBB exits via conditinal branch/branch but there's no "
+               "condition!", MBB);
+      }
+    } else {
+      report("AnalyzeBranch returned invalid data!", MBB);
+    }
+  }
+
+  regsLive.clear();
+  for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+         E = MBB->livein_end(); I != E; ++I) {
+    if (!TargetRegisterInfo::isPhysicalRegister(*I)) {
+      report("MBB live-in list contains non-physical register", MBB);
+      continue;
+    }
+    regsLive.insert(*I);
+    for (const unsigned *R = TRI->getSubRegisters(*I); *R; R++)
+      regsLive.insert(*R);
+  }
+  regsLiveInButUnused = regsLive;
+
+  const MachineFrameInfo *MFI = MF->getFrameInfo();
+  assert(MFI && "Function has no frame info");
+  BitVector PR = MFI->getPristineRegs(MBB);
+  for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
+    regsLive.insert(I);
+    for (const unsigned *R = TRI->getSubRegisters(I); *R; R++)
+      regsLive.insert(*R);
+  }
+
+  regsKilled.clear();
+  regsDefined.clear();
+
+  if (Indexes)
+    lastIndex = Indexes->getMBBStartIdx(MBB);
+}
+
+void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
+  const TargetInstrDesc &TI = MI->getDesc();
+  if (MI->getNumOperands() < TI.getNumOperands()) {
+    report("Too few operands", MI);
+    *OS << TI.getNumOperands() << " operands expected, but "
+        << MI->getNumExplicitOperands() << " given.\n";
+  }
+
+  // Check the MachineMemOperands for basic consistency.
+  for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+       E = MI->memoperands_end(); I != E; ++I) {
+    if ((*I)->isLoad() && !TI.mayLoad())
+      report("Missing mayLoad flag", MI);
+    if ((*I)->isStore() && !TI.mayStore())
+      report("Missing mayStore flag", MI);
+  }
+
+  // Debug values must not have a slot index.
+  // Other instructions must have one.
+  if (LiveInts) {
+    bool mapped = !LiveInts->isNotInMIMap(MI);
+    if (MI->isDebugValue()) {
+      if (mapped)
+        report("Debug instruction has a slot index", MI);
+    } else {
+      if (!mapped)
+        report("Missing slot index", MI);
+    }
+  }
+
+}
+
+void
+MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
+  const MachineInstr *MI = MO->getParent();
+  const TargetInstrDesc &TI = MI->getDesc();
+  const TargetOperandInfo &TOI = TI.OpInfo[MONum];
+
+  // The first TI.NumDefs operands must be explicit register defines
+  if (MONum < TI.getNumDefs()) {
+    if (!MO->isReg())
+      report("Explicit definition must be a register", MO, MONum);
+    else if (!MO->isDef())
+      report("Explicit definition marked as use", MO, MONum);
+    else if (MO->isImplicit())
+      report("Explicit definition marked as implicit", MO, MONum);
+  } else if (MONum < TI.getNumOperands()) {
+    // Don't check if it's the last operand in a variadic instruction. See,
+    // e.g., LDM_RET in the arm back end.
+    if (MO->isReg() && !(TI.isVariadic() && MONum == TI.getNumOperands()-1)) {
+      if (MO->isDef() && !TOI.isOptionalDef())
+          report("Explicit operand marked as def", MO, MONum);
+      if (MO->isImplicit())
+        report("Explicit operand marked as implicit", MO, MONum);
+    }
+  } else {
+    // ARM adds %reg0 operands to indicate predicates. We'll allow that.
+    if (MO->isReg() && !MO->isImplicit() && !TI.isVariadic() && MO->getReg())
+      report("Extra explicit operand on non-variadic instruction", MO, MONum);
+  }
+
+  switch (MO->getType()) {
+  case MachineOperand::MO_Register: {
+    const unsigned Reg = MO->getReg();
+    if (!Reg)
+      return;
+
+    // Check Live Variables.
+    if (MI->isDebugValue()) {
+      // Liveness checks are not valid for debug values.
+    } else if (MO->isUndef()) {
+      // An <undef> doesn't refer to any register, so just skip it.
+    } else if (MO->isUse()) {
+      regsLiveInButUnused.erase(Reg);
+
+      bool isKill = false;
+      unsigned defIdx;
+      if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
+        // A two-addr use counts as a kill if use and def are the same.
+        unsigned DefReg = MI->getOperand(defIdx).getReg();
+        if (Reg == DefReg) {
+          isKill = true;
+          // And in that case an explicit kill flag is not allowed.
+          if (MO->isKill())
+            report("Illegal kill flag on two-address instruction operand",
+                   MO, MONum);
+        } else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+          report("Two-address instruction operands must be identical",
+                 MO, MONum);
+        }
+      } else
+        isKill = MO->isKill();
+
+      if (isKill)
+        addRegWithSubRegs(regsKilled, Reg);
+
+      // Check that LiveVars knows this kill.
+      if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
+          MO->isKill()) {
+        LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+        if (std::find(VI.Kills.begin(),
+                      VI.Kills.end(), MI) == VI.Kills.end())
+          report("Kill missing from LiveVariables", MO, MONum);
+      }
+
+      // Check LiveInts liveness and kill.
+      if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+          LiveInts && !LiveInts->isNotInMIMap(MI)) {
+        SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex();
+        if (LiveInts->hasInterval(Reg)) {
+          const LiveInterval &LI = LiveInts->getInterval(Reg);
+          if (!LI.liveAt(UseIdx)) {
+            report("No live range at use", MO, MONum);
+            *OS << UseIdx << " is not live in " << LI << '\n';
+          }
+          // Check for extra kill flags.
+          // Note that we allow missing kill flags for now.
+          if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) {
+            report("Live range continues after kill flag", MO, MONum);
+            *OS << "Live range: " << LI << '\n';
+          }
+        } else {
+          report("Virtual register has no Live interval", MO, MONum);
+        }
+      }
+
+      // Use of a dead register.
+      if (!regsLive.count(Reg)) {
+        if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+          // Reserved registers may be used even when 'dead'.
+          if (!isReserved(Reg))
+            report("Using an undefined physical register", MO, MONum);
+        } else {
+          BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+          // We don't know which virtual registers are live in, so only complain
+          // if vreg was killed in this MBB. Otherwise keep track of vregs that
+          // must be live in. PHI instructions are handled separately.
+          if (MInfo.regsKilled.count(Reg))
+            report("Using a killed virtual register", MO, MONum);
+          else if (!MI->isPHI())
+            MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI));
+        }
+      }
+    } else {
+      assert(MO->isDef());
+      // Register defined.
+      // TODO: verify that earlyclobber ops are not used.
+      if (MO->isDead())
+        addRegWithSubRegs(regsDead, Reg);
+      else
+        addRegWithSubRegs(regsDefined, Reg);
+
+      // Check LiveInts for a live range, but only for virtual registers.
+      if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) &&
+          !LiveInts->isNotInMIMap(MI)) {
+        SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex();
+        if (LiveInts->hasInterval(Reg)) {
+          const LiveInterval &LI = LiveInts->getInterval(Reg);
+          if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
+            assert(VNI && "NULL valno is not allowed");
+            if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
+              report("Inconsistent valno->def", MO, MONum);
+              *OS << "Valno " << VNI->id << " is not defined at "
+                  << DefIdx << " in " << LI << '\n';
+            }
+          } else {
+            report("No live range at def", MO, MONum);
+            *OS << DefIdx << " is not live in " << LI << '\n';
+          }
+        } else {
+          report("Virtual register has no Live interval", MO, MONum);
+        }
+      }
+    }
+
+    // Check register classes.
+    if (MONum < TI.getNumOperands() && !MO->isImplicit()) {
+      unsigned SubIdx = MO->getSubReg();
+
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        unsigned sr = Reg;
+        if (SubIdx) {
+          unsigned s = TRI->getSubReg(Reg, SubIdx);
+          if (!s) {
+            report("Invalid subregister index for physical register",
+                   MO, MONum);
+            return;
+          }
+          sr = s;
+        }
+        if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) {
+          if (!DRC->contains(sr)) {
+            report("Illegal physical register for instruction", MO, MONum);
+            *OS << TRI->getName(sr) << " is not a "
+                << DRC->getName() << " register.\n";
+          }
+        }
+      } else {
+        // Virtual register.
+        const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+        if (SubIdx) {
+          const TargetRegisterClass *SRC = RC->getSubRegisterRegClass(SubIdx);
+          if (!SRC) {
+            report("Invalid subregister index for virtual register", MO, MONum);
+            *OS << "Register class " << RC->getName()
+                << " does not support subreg index " << SubIdx << "\n";
+            return;
+          }
+          RC = SRC;
+        }
+        if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) {
+          if (RC != DRC && !RC->hasSuperClass(DRC)) {
+            report("Illegal virtual register for instruction", MO, MONum);
+            *OS << "Expected a " << DRC->getName() << " register, but got a "
+                << RC->getName() << " register\n";
+          }
+        }
+      }
+    }
+    break;
+  }
+
+  case MachineOperand::MO_MachineBasicBlock:
+    if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent()))
+      report("PHI operand is not in the CFG", MO, MONum);
+    break;
+
+  case MachineOperand::MO_FrameIndex:
+    if (LiveStks && LiveStks->hasInterval(MO->getIndex()) &&
+        LiveInts && !LiveInts->isNotInMIMap(MI)) {
+      LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
+      SlotIndex Idx = LiveInts->getInstructionIndex(MI);
+      if (TI.mayLoad() && !LI.liveAt(Idx.getUseIndex())) {
+        report("Instruction loads from dead spill slot", MO, MONum);
+        *OS << "Live stack: " << LI << '\n';
+      }
+      if (TI.mayStore() && !LI.liveAt(Idx.getDefIndex())) {
+        report("Instruction stores to dead spill slot", MO, MONum);
+        *OS << "Live stack: " << LI << '\n';
+      }
+    }
+    break;
+
+  default:
+    break;
+  }
+}
+
+void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
+  BBInfo &MInfo = MBBInfoMap[MI->getParent()];
+  set_union(MInfo.regsKilled, regsKilled);
+  set_subtract(regsLive, regsKilled); regsKilled.clear();
+  set_subtract(regsLive, regsDead);   regsDead.clear();
+  set_union(regsLive, regsDefined);   regsDefined.clear();
+
+  if (Indexes && Indexes->hasIndex(MI)) {
+    SlotIndex idx = Indexes->getInstructionIndex(MI);
+    if (!(idx > lastIndex)) {
+      report("Instruction index out of order", MI);
+      *OS << "Last instruction was at " << lastIndex << '\n';
+    }
+    lastIndex = idx;
+  }
+}
+
+void
+MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
+  MBBInfoMap[MBB].regsLiveOut = regsLive;
+  regsLive.clear();
+
+  if (Indexes) {
+    SlotIndex stop = Indexes->getMBBEndIdx(MBB);
+    if (!(stop > lastIndex)) {
+      report("Block ends before last instruction index", MBB);
+      *OS << "Block ends at " << stop
+          << " last instruction was at " << lastIndex << '\n';
+    }
+    lastIndex = stop;
+  }
+}
+
+// Calculate the largest possible vregsPassed sets. These are the registers that
+// can pass through an MBB live, but may not be live every time. It is assumed
+// that all vregsPassed sets are empty before the call.
+void MachineVerifier::calcRegsPassed() {
+  // First push live-out regs to successors' vregsPassed. Remember the MBBs that
+  // have any vregsPassed.
+  DenseSet<const MachineBasicBlock*> todo;
+  for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+       MFI != MFE; ++MFI) {
+    const MachineBasicBlock &MBB(*MFI);
+    BBInfo &MInfo = MBBInfoMap[&MBB];
+    if (!MInfo.reachable)
+      continue;
+    for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(),
+           SuE = MBB.succ_end(); SuI != SuE; ++SuI) {
+      BBInfo &SInfo = MBBInfoMap[*SuI];
+      if (SInfo.addPassed(MInfo.regsLiveOut))
+        todo.insert(*SuI);
+    }
+  }
+
+  // Iteratively push vregsPassed to successors. This will converge to the same
+  // final state regardless of DenseSet iteration order.
+  while (!todo.empty()) {
+    const MachineBasicBlock *MBB = *todo.begin();
+    todo.erase(MBB);
+    BBInfo &MInfo = MBBInfoMap[MBB];
+    for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(),
+           SuE = MBB->succ_end(); SuI != SuE; ++SuI) {
+      if (*SuI == MBB)
+        continue;
+      BBInfo &SInfo = MBBInfoMap[*SuI];
+      if (SInfo.addPassed(MInfo.vregsPassed))
+        todo.insert(*SuI);
+    }
+  }
+}
+
+// Calculate the set of virtual registers that must be passed through each basic
+// block in order to satisfy the requirements of successor blocks. This is very
+// similar to calcRegsPassed, only backwards.
+void MachineVerifier::calcRegsRequired() {
+  // First push live-in regs to predecessors' vregsRequired.
+  DenseSet<const MachineBasicBlock*> todo;
+  for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+       MFI != MFE; ++MFI) {
+    const MachineBasicBlock &MBB(*MFI);
+    BBInfo &MInfo = MBBInfoMap[&MBB];
+    for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(),
+           PrE = MBB.pred_end(); PrI != PrE; ++PrI) {
+      BBInfo &PInfo = MBBInfoMap[*PrI];
+      if (PInfo.addRequired(MInfo.vregsLiveIn))
+        todo.insert(*PrI);
+    }
+  }
+
+  // Iteratively push vregsRequired to predecessors. This will converge to the
+  // same final state regardless of DenseSet iteration order.
+  while (!todo.empty()) {
+    const MachineBasicBlock *MBB = *todo.begin();
+    todo.erase(MBB);
+    BBInfo &MInfo = MBBInfoMap[MBB];
+    for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+           PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+      if (*PrI == MBB)
+        continue;
+      BBInfo &SInfo = MBBInfoMap[*PrI];
+      if (SInfo.addRequired(MInfo.vregsRequired))
+        todo.insert(*PrI);
+    }
+  }
+}
+
+// Check PHI instructions at the beginning of MBB. It is assumed that
+// calcRegsPassed has been run so BBInfo::isLiveOut is valid.
+void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) {
+  for (MachineBasicBlock::const_iterator BBI = MBB->begin(), BBE = MBB->end();
+       BBI != BBE && BBI->isPHI(); ++BBI) {
+    DenseSet<const MachineBasicBlock*> seen;
+
+    for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+      unsigned Reg = BBI->getOperand(i).getReg();
+      const MachineBasicBlock *Pre = BBI->getOperand(i + 1).getMBB();
+      if (!Pre->isSuccessor(MBB))
+        continue;
+      seen.insert(Pre);
+      BBInfo &PrInfo = MBBInfoMap[Pre];
+      if (PrInfo.reachable && !PrInfo.isLiveOut(Reg))
+        report("PHI operand is not live-out from predecessor",
+               &BBI->getOperand(i), i);
+    }
+
+    // Did we see all predecessors?
+    for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(),
+           PrE = MBB->pred_end(); PrI != PrE; ++PrI) {
+      if (!seen.count(*PrI)) {
+        report("Missing PHI operand", BBI);
+        *OS << "BB#" << (*PrI)->getNumber()
+            << " is a predecessor according to the CFG.\n";
+      }
+    }
+  }
+}
+
+void MachineVerifier::visitMachineFunctionAfter() {
+  calcRegsPassed();
+
+  for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+       MFI != MFE; ++MFI) {
+    BBInfo &MInfo = MBBInfoMap[MFI];
+
+    // Skip unreachable MBBs.
+    if (!MInfo.reachable)
+      continue;
+
+    checkPHIOps(MFI);
+  }
+
+  // Now check liveness info if available
+  if (LiveVars || LiveInts)
+    calcRegsRequired();
+  if (LiveVars)
+    verifyLiveVariables();
+  if (LiveInts)
+    verifyLiveIntervals();
+}
+
+void MachineVerifier::verifyLiveVariables() {
+  assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
+  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
+    for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
+         MFI != MFE; ++MFI) {
+      BBInfo &MInfo = MBBInfoMap[MFI];
+
+      // Our vregsRequired should be identical to LiveVariables' AliveBlocks
+      if (MInfo.vregsRequired.count(Reg)) {
+        if (!VI.AliveBlocks.test(MFI->getNumber())) {
+          report("LiveVariables: Block missing from AliveBlocks", MFI);
+          *OS << "Virtual register " << PrintReg(Reg)
+              << " must be live through the block.\n";
+        }
+      } else {
+        if (VI.AliveBlocks.test(MFI->getNumber())) {
+          report("LiveVariables: Block should not be in AliveBlocks", MFI);
+          *OS << "Virtual register " << PrintReg(Reg)
+              << " is not needed live through the block.\n";
+        }
+      }
+    }
+  }
+}
+
+void MachineVerifier::verifyLiveIntervals() {
+  assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts");
+  for (LiveIntervals::const_iterator LVI = LiveInts->begin(),
+       LVE = LiveInts->end(); LVI != LVE; ++LVI) {
+    const LiveInterval &LI = *LVI->second;
+
+    // Spilling and splitting may leave unused registers around. Skip them.
+    if (MRI->use_empty(LI.reg))
+      continue;
+
+    // Physical registers have much weirdness going on, mostly from coalescing.
+    // We should probably fix it, but for now just ignore them.
+    if (TargetRegisterInfo::isPhysicalRegister(LI.reg))
+      continue;
+
+    assert(LVI->first == LI.reg && "Invalid reg to interval mapping");
+
+    for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
+         I!=E; ++I) {
+      VNInfo *VNI = *I;
+      const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
+
+      if (!DefVNI) {
+        if (!VNI->isUnused()) {
+          report("Valno not live at def and not marked unused", MF);
+          *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+        }
+        continue;
+      }
+
+      if (VNI->isUnused())
+        continue;
+
+      if (DefVNI != VNI) {
+        report("Live range at def has different valno", MF);
+        *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+            << " where valno #" << DefVNI->id << " is live in " << LI << '\n';
+        continue;
+      }
+
+      const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+      if (!MBB) {
+        report("Invalid definition index", MF);
+        *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+            << " in " << LI << '\n';
+        continue;
+      }
+
+      if (VNI->isPHIDef()) {
+        if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+          report("PHIDef value is not defined at MBB start", MF);
+          *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+              << ", not at the beginning of BB#" << MBB->getNumber()
+              << " in " << LI << '\n';
+        }
+      } else {
+        // Non-PHI def.
+        const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+        if (!MI) {
+          report("No instruction at def index", MF);
+          *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+              << " in " << LI << '\n';
+        } else if (!MI->modifiesRegister(LI.reg, TRI)) {
+          report("Defining instruction does not modify register", MI);
+          *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+        }
+
+        bool isEarlyClobber = false;
+        if (MI) {
+          for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+               MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+            if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() &&
+                MOI->isEarlyClobber()) {
+              isEarlyClobber = true;
+              break;
+            }
+          }
+        }
+
+        // Early clobber defs begin at USE slots, but other defs must begin at
+        // DEF slots.
+        if (isEarlyClobber) {
+          if (!VNI->def.isUse()) {
+            report("Early clobber def must be at a USE slot", MF);
+            *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+                << " in " << LI << '\n';
+          }
+        } else if (!VNI->def.isDef()) {
+          report("Non-PHI, non-early clobber def must be at a DEF slot", MF);
+          *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+              << " in " << LI << '\n';
+        }
+      }
+    }
+
+    for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) {
+      const VNInfo *VNI = I->valno;
+      assert(VNI && "Live range has no valno");
+
+      if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
+        report("Foreign valno in live range", MF);
+        I->print(*OS);
+        *OS << " has a valno not in " << LI << '\n';
+      }
+
+      if (VNI->isUnused()) {
+        report("Live range valno is marked unused", MF);
+        I->print(*OS);
+        *OS << " in " << LI << '\n';
+      }
+
+      const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
+      if (!MBB) {
+        report("Bad start of live segment, no basic block", MF);
+        I->print(*OS);
+        *OS << " in " << LI << '\n';
+        continue;
+      }
+      SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+      if (I->start != MBBStartIdx && I->start != VNI->def) {
+        report("Live segment must begin at MBB entry or valno def", MBB);
+        I->print(*OS);
+        *OS << " in " << LI << '\n' << "Basic block starts at "
+            << MBBStartIdx << '\n';
+      }
+
+      const MachineBasicBlock *EndMBB =
+                                LiveInts->getMBBFromIndex(I->end.getPrevSlot());
+      if (!EndMBB) {
+        report("Bad end of live segment, no basic block", MF);
+        I->print(*OS);
+        *OS << " in " << LI << '\n';
+        continue;
+      }
+      if (I->end != LiveInts->getMBBEndIdx(EndMBB)) {
+        // The live segment is ending inside EndMBB
+        const MachineInstr *MI =
+                        LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+        if (!MI) {
+          report("Live segment doesn't end at a valid instruction", EndMBB);
+        I->print(*OS);
+        *OS << " in " << LI << '\n' << "Basic block starts at "
+            << MBBStartIdx << '\n';
+        } else if (TargetRegisterInfo::isVirtualRegister(LI.reg) &&
+                   !MI->readsVirtualRegister(LI.reg)) {
+          // A live range can end with either a redefinition, a kill flag on a
+          // use, or a dead flag on a def.
+          // FIXME: Should we check for each of these?
+          bool hasDeadDef = false;
+          for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+               MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+            if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && MOI->isDead()) {
+              hasDeadDef = true;
+              break;
+            }
+          }
+
+          if (!hasDeadDef) {
+            report("Instruction killing live segment neither defines nor reads "
+                   "register", MI);
+            I->print(*OS);
+            *OS << " in " << LI << '\n';
+          }
+        }
+      }
+
+      // Now check all the basic blocks in this live segment.
+      MachineFunction::const_iterator MFI = MBB;
+      // Is this live range the beginning of a non-PHIDef VN?
+      if (I->start == VNI->def && !VNI->isPHIDef()) {
+        // Not live-in to any blocks.
+        if (MBB == EndMBB)
+          continue;
+        // Skip this block.
+        ++MFI;
+      }
+      for (;;) {
+        assert(LiveInts->isLiveInToMBB(LI, MFI));
+        // We don't know how to track physregs into a landing pad.
+        if (TargetRegisterInfo::isPhysicalRegister(LI.reg) &&
+            MFI->isLandingPad()) {
+          if (&*MFI == EndMBB)
+            break;
+          ++MFI;
+          continue;
+        }
+        // Check that VNI is live-out of all predecessors.
+        for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
+             PE = MFI->pred_end(); PI != PE; ++PI) {
+          SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot();
+          const VNInfo *PVNI = LI.getVNInfoAt(PEnd);
+
+          if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) {
+            if (PVNI && !PVNI->hasPHIKill()) {
+              report("Value live out of predecessor doesn't have PHIKill", MF);
+              *OS << "Valno #" << PVNI->id << " live out of BB#"
+                  << (*PI)->getNumber() << '@' << PEnd
+                  << " doesn't have PHIKill, but Valno #" << VNI->id
+                  << " is PHIDef and defined at the beginning of BB#"
+                  << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI)
+                  << " in " << LI << '\n';
+            }
+            continue;
+          }
+
+          if (!PVNI) {
+            report("Register not marked live out of predecessor", *PI);
+            *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+                << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at "
+                << PEnd << " in " << LI << '\n';
+            continue;
+          }
+
+          if (PVNI != VNI) {
+            report("Different value live out of predecessor", *PI);
+            *OS << "Valno #" << PVNI->id << " live out of BB#"
+                << (*PI)->getNumber() << '@' << PEnd
+                << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
+                << '@' << LiveInts->getMBBStartIdx(MFI) << " in " << LI << '\n';
+          }
+        }
+        if (&*MFI == EndMBB)
+          break;
+        ++MFI;
+      }
+    }
+
+    // Check the LI only has one connected component.
+    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+      ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+      unsigned NumComp = ConEQ.Classify(&LI);
+      if (NumComp > 1) {
+        report("Multiple connected components in live interval", MF);
+        *OS << NumComp << " components in " << LI << '\n';
+        for (unsigned comp = 0; comp != NumComp; ++comp) {
+          *OS << comp << ": valnos";
+          for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+               E = LI.vni_end(); I!=E; ++I)
+            if (comp == ConEQ.getEqClass(*I))
+              *OS << ' ' << (*I)->id;
+          *OS << '\n';
+        }
+      }
+    }
+  }
+}
+
diff --git a/final/lib/CodeGen/Makefile b/final/lib/CodeGen/Makefile
new file mode 100644
index 00000000000..4ab3e3c0013
--- /dev/null
+++ b/final/lib/CodeGen/Makefile
@@ -0,0 +1,22 @@
+##===- lib/CodeGen/Makefile --------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMCodeGen
+PARALLEL_DIRS = SelectionDAG AsmPrinter
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
+# Xcode prior to 2.4 generates an error in -pedantic mode with use of HUGE_VAL
+# in this directory.  Disable -pedantic for this broken compiler.
+ifneq ($(HUGE_VAL_SANITY),yes)
+CompileCommonOpts := $(filter-out -pedantic, $(CompileCommonOpts))
+endif
+
diff --git a/final/lib/CodeGen/ObjectCodeEmitter.cpp b/final/lib/CodeGen/ObjectCodeEmitter.cpp
new file mode 100644
index 00000000000..cf05275d7a3
--- /dev/null
+++ b/final/lib/CodeGen/ObjectCodeEmitter.cpp
@@ -0,0 +1,141 @@
+//===-- llvm/CodeGen/ObjectCodeEmitter.cpp -------------------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/BinaryObject.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/CodeGen/ObjectCodeEmitter.h"
+
+//===----------------------------------------------------------------------===//
+//                       ObjectCodeEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+ObjectCodeEmitter::ObjectCodeEmitter() : BO(0) {}
+ObjectCodeEmitter::ObjectCodeEmitter(BinaryObject *bo) : BO(bo) {}
+ObjectCodeEmitter::~ObjectCodeEmitter() {}
+
+/// setBinaryObject - set the BinaryObject we are writting to
+void ObjectCodeEmitter::setBinaryObject(BinaryObject *bo) { BO = bo; }
+
+/// emitByte - This callback is invoked when a byte needs to be
+/// written to the data stream, without buffer overflow testing.
+void ObjectCodeEmitter::emitByte(uint8_t B) {
+  BO->emitByte(B);
+}
+
+/// emitWordLE - This callback is invoked when a 32-bit word needs to be
+/// written to the data stream in little-endian format.
+void ObjectCodeEmitter::emitWordLE(uint32_t W) {
+  BO->emitWordLE(W);
+}
+
+/// emitWordBE - This callback is invoked when a 32-bit word needs to be
+/// written to the data stream in big-endian format.
+void ObjectCodeEmitter::emitWordBE(uint32_t W) {
+  BO->emitWordBE(W);
+}
+
+/// emitDWordLE - This callback is invoked when a 64-bit word needs to be
+/// written to the data stream in little-endian format.
+void ObjectCodeEmitter::emitDWordLE(uint64_t W) {
+  BO->emitDWordLE(W);
+}
+
+/// emitDWordBE - This callback is invoked when a 64-bit word needs to be
+/// written to the data stream in big-endian format.
+void ObjectCodeEmitter::emitDWordBE(uint64_t W) {
+  BO->emitDWordBE(W);
+}
+
+/// emitAlignment - Align 'BO' to the necessary alignment boundary.
+void ObjectCodeEmitter::emitAlignment(unsigned Alignment /* 0 */,
+                                      uint8_t fill /* 0 */) {
+  BO->emitAlignment(Alignment, fill);
+}
+
+/// emitULEB128Bytes - This callback is invoked when a ULEB128 needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitULEB128Bytes(uint64_t Value) {
+  BO->emitULEB128Bytes(Value);
+}
+
+/// emitSLEB128Bytes - This callback is invoked when a SLEB128 needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitSLEB128Bytes(uint64_t Value) {
+  BO->emitSLEB128Bytes(Value);
+}
+
+/// emitString - This callback is invoked when a String needs to be
+/// written to the data stream.
+void ObjectCodeEmitter::emitString(const std::string &String) {
+  BO->emitString(String);
+}
+
+/// getCurrentPCValue - This returns the address that the next emitted byte
+/// will be output to.
+uintptr_t ObjectCodeEmitter::getCurrentPCValue() const {
+  return BO->getCurrentPCOffset();
+}
+
+/// getCurrentPCOffset - Return the offset from the start of the emitted
+/// buffer that we are currently writing to.
+uintptr_t ObjectCodeEmitter::getCurrentPCOffset() const {
+  return BO->getCurrentPCOffset();
+}
+
+/// addRelocation - Whenever a relocatable address is needed, it should be
+/// noted with this interface.
+void ObjectCodeEmitter::addRelocation(const MachineRelocation& relocation) {
+  BO->addRelocation(relocation);
+}
+
+/// StartMachineBasicBlock - This should be called by the target when a new
+/// basic block is about to be emitted.  This way the MCE knows where the
+/// start of the block is, and can implement getMachineBasicBlockAddress.
+void ObjectCodeEmitter::StartMachineBasicBlock(MachineBasicBlock *MBB) {
+  if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+    MBBLocations.resize((MBB->getNumber()+1)*2);
+  MBBLocations[MBB->getNumber()] = getCurrentPCOffset();
+}
+
+/// getMachineBasicBlockAddress - Return the address of the specified
+/// MachineBasicBlock, only usable after the label for the MBB has been
+/// emitted.
+uintptr_t
+ObjectCodeEmitter::getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+  assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
+         MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+  return MBBLocations[MBB->getNumber()];
+}
+
+/// getJumpTableEntryAddress - Return the address of the jump table with index
+/// 'Index' in the function that last called initJumpTableInfo.
+uintptr_t ObjectCodeEmitter::getJumpTableEntryAddress(unsigned Index) const {
+  assert(JTLocations.size() > Index && "JT not emitted!");
+  return JTLocations[Index];
+}
+
+/// getConstantPoolEntryAddress - Return the address of the 'Index' entry in
+/// the constant pool that was last emitted with the emitConstantPool method.
+uintptr_t ObjectCodeEmitter::getConstantPoolEntryAddress(unsigned Index) const {
+  assert(CPLocations.size() > Index && "CP not emitted!");
+  return CPLocations[Index];
+}
+
+/// getConstantPoolEntrySection - Return the section of the 'Index' entry in
+/// the constant pool that was last emitted with the emitConstantPool method.
+uintptr_t ObjectCodeEmitter::getConstantPoolEntrySection(unsigned Index) const {
+  assert(CPSections.size() > Index && "CP not emitted!");
+  return CPSections[Index];
+}
+
+} // end namespace llvm
+
diff --git a/final/lib/CodeGen/OcamlGC.cpp b/final/lib/CodeGen/OcamlGC.cpp
new file mode 100644
index 00000000000..48db200c513
--- /dev/null
+++ b/final/lib/CodeGen/OcamlGC.cpp
@@ -0,0 +1,37 @@
+//===-- OcamlGC.cpp - Ocaml frametable GC strategy ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics compatible with
+// Objective Caml 3.10.0, which uses a liveness-accurate static stack map.
+//
+// The frametable emitter is in OcamlGCPrinter.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/CodeGen/GCStrategy.h"
+
+using namespace llvm;
+
+namespace {
+  class OcamlGC : public GCStrategy {
+  public:
+    OcamlGC();
+  };
+}
+
+static GCRegistry::Add<OcamlGC>
+X("ocaml", "ocaml 3.10-compatible GC");
+
+void llvm::linkOcamlGC() { }
+
+OcamlGC::OcamlGC() {
+  NeededSafePoints = 1 << GC::PostCall;
+  UsesMetadata = true;
+}
diff --git a/final/lib/CodeGen/OptimizePHIs.cpp b/final/lib/CodeGen/OptimizePHIs.cpp
new file mode 100644
index 00000000000..c05be130ec6
--- /dev/null
+++ b/final/lib/CodeGen/OptimizePHIs.cpp
@@ -0,0 +1,190 @@
+//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass optimizes machine instruction PHIs to take advantage of
+// opportunities created during DAG legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phi-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPHICycles, "Number of PHI cycles replaced");
+STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
+
+namespace {
+  class OptimizePHIs : public MachineFunctionPass {
+    MachineRegisterInfo *MRI;
+    const TargetInstrInfo *TII;
+
+  public:
+    static char ID; // Pass identification
+    OptimizePHIs() : MachineFunctionPass(ID) {
+      initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    typedef SmallPtrSet<MachineInstr*, 16> InstrSet;
+    typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator;
+
+    bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg,
+                               InstrSet &PHIsInCycle);
+    bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle);
+    bool OptimizeBB(MachineBasicBlock &MBB);
+  };
+}
+
+char OptimizePHIs::ID = 0;
+INITIALIZE_PASS(OptimizePHIs, "opt-phis",
+                "Optimize machine instruction PHIs", false, false)
+
+FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
+
+bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) {
+  MRI = &Fn.getRegInfo();
+  TII = Fn.getTarget().getInstrInfo();
+
+  // Find dead PHI cycles and PHI cycles that can be replaced by a single
+  // value.  InstCombine does these optimizations, but DAG legalization may
+  // introduce new opportunities, e.g., when i64 values are split up for
+  // 32-bit targets.
+  bool Changed = false;
+  for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+    Changed |= OptimizeBB(*I);
+
+  return Changed;
+}
+
+/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands
+/// are copies of SingleValReg, possibly via copies through other PHIs.  If
+/// SingleValReg is zero on entry, it is set to the register with the single
+/// non-copy value.  PHIsInCycle is a set used to keep track of the PHIs that
+/// have been scanned.
+bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
+                                         unsigned &SingleValReg,
+                                         InstrSet &PHIsInCycle) {
+  assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction");
+  unsigned DstReg = MI->getOperand(0).getReg();
+
+  // See if we already saw this register.
+  if (!PHIsInCycle.insert(MI))
+    return true;
+
+  // Don't scan crazily complex things.
+  if (PHIsInCycle.size() == 16)
+    return false;
+
+  // Scan the PHI operands.
+  for (unsigned i = 1; i != MI->getNumOperands(); i += 2) {
+    unsigned SrcReg = MI->getOperand(i).getReg();
+    if (SrcReg == DstReg)
+      continue;
+    MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
+
+    // Skip over register-to-register moves.
+    if (SrcMI && SrcMI->isCopy() &&
+        !SrcMI->getOperand(0).getSubReg() &&
+        !SrcMI->getOperand(1).getSubReg() &&
+        TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg()))
+      SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
+    if (!SrcMI)
+      return false;
+
+    if (SrcMI->isPHI()) {
+      if (!IsSingleValuePHICycle(SrcMI, SingleValReg, PHIsInCycle))
+        return false;
+    } else {
+      // Fail if there is more than one non-phi/non-move register.
+      if (SingleValReg != 0)
+        return false;
+      SingleValReg = SrcReg;
+    }
+  }
+  return true;
+}
+
+/// IsDeadPHICycle - Check if the register defined by a PHI is only used by
+/// other PHIs in a cycle.
+bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
+  assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction");
+  unsigned DstReg = MI->getOperand(0).getReg();
+  assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
+         "PHI destination is not a virtual register");
+
+  // See if we already saw this register.
+  if (!PHIsInCycle.insert(MI))
+    return true;
+
+  // Don't scan crazily complex things.
+  if (PHIsInCycle.size() == 16)
+    return false;
+
+  for (MachineRegisterInfo::use_iterator I = MRI->use_begin(DstReg),
+         E = MRI->use_end(); I != E; ++I) {
+    MachineInstr *UseMI = &*I;
+    if (!UseMI->isPHI() || !IsDeadPHICycle(UseMI, PHIsInCycle))
+      return false;
+  }
+
+  return true;
+}
+
+/// OptimizeBB - Remove dead PHI cycles and PHI cycles that can be replaced by
+/// a single value.
+bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
+  bool Changed = false;
+  for (MachineBasicBlock::iterator
+         MII = MBB.begin(), E = MBB.end(); MII != E; ) {
+    MachineInstr *MI = &*MII++;
+    if (!MI->isPHI())
+      break;
+
+    // Check for single-value PHI cycles.
+    unsigned SingleValReg = 0;
+    InstrSet PHIsInCycle;
+    if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) &&
+        SingleValReg != 0) {
+      MRI->replaceRegWith(MI->getOperand(0).getReg(), SingleValReg);
+      MI->eraseFromParent();
+      ++NumPHICycles;
+      Changed = true;
+      continue;
+    }
+
+    // Check for dead PHI cycles.
+    PHIsInCycle.clear();
+    if (IsDeadPHICycle(MI, PHIsInCycle)) {
+      for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end();
+           PI != PE; ++PI) {
+        MachineInstr *PhiMI = *PI;
+        if (&*MII == PhiMI)
+          ++MII;
+        PhiMI->eraseFromParent();
+      }
+      ++NumDeadPHICycles;
+      Changed = true;
+    }
+  }
+  return Changed;
+}
diff --git a/final/lib/CodeGen/PHIElimination.cpp b/final/lib/CodeGen/PHIElimination.cpp
new file mode 100644
index 00000000000..5f7cf582c96
--- /dev/null
+++ b/final/lib/CodeGen/PHIElimination.cpp
@@ -0,0 +1,427 @@
+//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates machine instruction PHI nodes by inserting copy
+// instructions.  This destroys SSA information, but is the desired input for
+// some register allocators.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "phielim"
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+namespace {
+  class PHIElimination : public MachineFunctionPass {
+    MachineRegisterInfo *MRI; // Machine register information
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    PHIElimination() : MachineFunctionPass(ID) {
+      initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  private:
+    /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+    /// in predecessor basic blocks.
+    ///
+    bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+    void LowerAtomicPHINode(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator AfterPHIsIt);
+
+    /// analyzePHINodes - Gather information about the PHI nodes in
+    /// here. In particular, we want to map the number of uses of a virtual
+    /// register which is used in a PHI node. We map that to the BB the
+    /// vreg is coming from. This is used later to determine when the vreg
+    /// is killed in the BB.
+    ///
+    void analyzePHINodes(const MachineFunction& Fn);
+
+    /// Split critical edges where necessary for good coalescer performance.
+    bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
+                       LiveVariables &LV, MachineLoopInfo *MLI);
+
+    typedef std::pair<unsigned, unsigned> BBVRegPair;
+    typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
+
+    VRegPHIUse VRegPHIUseCount;
+
+    // Defs of PHI sources which are implicit_def.
+    SmallPtrSet<MachineInstr*, 4> ImpDefs;
+
+    // Map reusable lowered PHI node -> incoming join register.
+    typedef DenseMap<MachineInstr*, unsigned,
+                     MachineInstrExpressionTrait> LoweredPHIMap;
+    LoweredPHIMap LoweredPHIs;
+  };
+}
+
+STATISTIC(NumAtomic, "Number of atomic phis lowered");
+STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
+STATISTIC(NumReused, "Number of reused lowered phis");
+
+char PHIElimination::ID = 0;
+INITIALIZE_PASS(PHIElimination, "phi-node-elimination",
+                "Eliminate PHI nodes for register allocation", false, false)
+
+char& llvm::PHIEliminationID = PHIElimination::ID;
+
+void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addPreserved<LiveVariables>();
+  AU.addPreserved<MachineDominatorTree>();
+  AU.addPreserved<MachineLoopInfo>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
+  MRI = &MF.getRegInfo();
+
+  bool Changed = false;
+
+  // Split critical edges to help the coalescer
+  if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) {
+    MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+      Changed |= SplitPHIEdges(MF, *I, *LV, MLI);
+  }
+
+  // Populate VRegPHIUseCount
+  analyzePHINodes(MF);
+
+  // Eliminate PHI instructions by inserting copies into predecessor blocks.
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+    Changed |= EliminatePHINodes(MF, *I);
+
+  // Remove dead IMPLICIT_DEF instructions.
+  for (SmallPtrSet<MachineInstr*, 4>::iterator I = ImpDefs.begin(),
+         E = ImpDefs.end(); I != E; ++I) {
+    MachineInstr *DefMI = *I;
+    unsigned DefReg = DefMI->getOperand(0).getReg();
+    if (MRI->use_nodbg_empty(DefReg))
+      DefMI->eraseFromParent();
+  }
+
+  // Clean up the lowered PHI instructions.
+  for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end();
+       I != E; ++I)
+    MF.DeleteMachineInstr(I->first);
+
+  LoweredPHIs.clear();
+  ImpDefs.clear();
+  VRegPHIUseCount.clear();
+
+  return Changed;
+}
+
+/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
+/// predecessor basic blocks.
+///
+bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
+                                             MachineBasicBlock &MBB) {
+  if (MBB.empty() || !MBB.front().isPHI())
+    return false;   // Quick exit for basic blocks without PHIs.
+
+  // Get an iterator to the first instruction after the last PHI node (this may
+  // also be the end of the basic block).
+  MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin());
+
+  while (MBB.front().isPHI())
+    LowerAtomicPHINode(MBB, AfterPHIsIt);
+
+  return true;
+}
+
+/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node
+/// are implicit_def's.
+static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
+                                         const MachineRegisterInfo *MRI) {
+  for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
+    unsigned SrcReg = MPhi->getOperand(i).getReg();
+    const MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+    if (!DefMI || !DefMI->isImplicitDef())
+      return false;
+  }
+  return true;
+}
+
+
+
+/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
+/// under the assuption that it needs to be lowered in a way that supports
+/// atomic execution of PHIs.  This lowering method is always correct all of the
+/// time.
+///
+void PHIElimination::LowerAtomicPHINode(
+                                      MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator AfterPHIsIt) {
+  ++NumAtomic;
+  // Unlink the PHI node from the basic block, but don't delete the PHI yet.
+  MachineInstr *MPhi = MBB.remove(MBB.begin());
+
+  unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
+  unsigned DestReg = MPhi->getOperand(0).getReg();
+  assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs");
+  bool isDead = MPhi->getOperand(0).isDead();
+
+  // Create a new register for the incoming PHI arguments.
+  MachineFunction &MF = *MBB.getParent();
+  unsigned IncomingReg = 0;
+  bool reusedIncoming = false;  // Is IncomingReg reused from an earlier PHI?
+
+  // Insert a register to register copy at the top of the current block (but
+  // after any remaining phi nodes) which copies the new incoming register
+  // into the phi node destination.
+  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+  if (isSourceDefinedByImplicitDef(MPhi, MRI))
+    // If all sources of a PHI node are implicit_def, just emit an
+    // implicit_def instead of a copy.
+    BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+            TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
+  else {
+    // Can we reuse an earlier PHI node? This only happens for critical edges,
+    // typically those created by tail duplication.
+    unsigned &entry = LoweredPHIs[MPhi];
+    if (entry) {
+      // An identical PHI node was already lowered. Reuse the incoming register.
+      IncomingReg = entry;
+      reusedIncoming = true;
+      ++NumReused;
+      DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi);
+    } else {
+      const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
+      entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
+    }
+    BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+            TII->get(TargetOpcode::COPY), DestReg)
+      .addReg(IncomingReg);
+  }
+
+  // Update live variable information if there is any.
+  LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
+  if (LV) {
+    MachineInstr *PHICopy = prior(AfterPHIsIt);
+
+    if (IncomingReg) {
+      LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
+
+      // Increment use count of the newly created virtual register.
+      VI.NumUses++;
+      LV->setPHIJoin(IncomingReg);
+
+      // When we are reusing the incoming register, it may already have been
+      // killed in this block. The old kill will also have been inserted at
+      // AfterPHIsIt, so it appears before the current PHICopy.
+      if (reusedIncoming)
+        if (MachineInstr *OldKill = VI.findKill(&MBB)) {
+          DEBUG(dbgs() << "Remove old kill from " << *OldKill);
+          LV->removeVirtualRegisterKilled(IncomingReg, OldKill);
+          DEBUG(MBB.dump());
+        }
+
+      // Add information to LiveVariables to know that the incoming value is
+      // killed.  Note that because the value is defined in several places (once
+      // each for each incoming block), the "def" block and instruction fields
+      // for the VarInfo is not filled in.
+      LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
+    }
+
+    // Since we are going to be deleting the PHI node, if it is the last use of
+    // any registers, or if the value itself is dead, we need to move this
+    // information over to the new copy we just inserted.
+    LV->removeVirtualRegistersKilled(MPhi);
+
+    // If the result is dead, update LV.
+    if (isDead) {
+      LV->addVirtualRegisterDead(DestReg, PHICopy);
+      LV->removeVirtualRegisterDead(DestReg, MPhi);
+    }
+  }
+
+  // Adjust the VRegPHIUseCount map to account for the removal of this PHI node.
+  for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2)
+    --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(),
+                                 MPhi->getOperand(i).getReg())];
+
+  // Now loop over all of the incoming arguments, changing them to copy into the
+  // IncomingReg register in the corresponding predecessor basic block.
+  SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
+  for (int i = NumSrcs - 1; i >= 0; --i) {
+    unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
+    unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
+
+    assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+           "Machine PHI Operands must all be virtual registers!");
+
+    // Get the MachineBasicBlock equivalent of the BasicBlock that is the source
+    // path the PHI.
+    MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
+
+    // If source is defined by an implicit def, there is no need to insert a
+    // copy.
+    MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+    if (DefMI->isImplicitDef()) {
+      ImpDefs.insert(DefMI);
+      continue;
+    }
+
+    // Check to make sure we haven't already emitted the copy for this block.
+    // This can happen because PHI nodes may have multiple entries for the same
+    // basic block.
+    if (!MBBsInsertedInto.insert(&opBlock))
+      continue;  // If the copy has already been emitted, we're done.
+
+    // Find a safe location to insert the copy, this may be the first terminator
+    // in the block (or end()).
+    MachineBasicBlock::iterator InsertPos =
+      findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
+
+    // Insert the copy.
+    if (!reusedIncoming && IncomingReg)
+      BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+              TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg, 0, SrcSubReg);
+
+    // Now update live variable information if we have it.  Otherwise we're done
+    if (!LV) continue;
+
+    // We want to be able to insert a kill of the register if this PHI (aka, the
+    // copy we just inserted) is the last use of the source value.  Live
+    // variable analysis conservatively handles this by saying that the value is
+    // live until the end of the block the PHI entry lives in.  If the value
+    // really is dead at the PHI copy, there will be no successor blocks which
+    // have the value live-in.
+
+    // Also check to see if this register is in use by another PHI node which
+    // has not yet been eliminated.  If so, it will be killed at an appropriate
+    // point later.
+
+    // Is it used by any PHI instructions in this block?
+    bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)];
+
+    // Okay, if we now know that the value is not live out of the block, we can
+    // add a kill marker in this block saying that it kills the incoming value!
+    if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) {
+      // In our final twist, we have to decide which instruction kills the
+      // register.  In most cases this is the copy, however, the first
+      // terminator instruction at the end of the block may also use the value.
+      // In this case, we should mark *it* as being the killing block, not the
+      // copy.
+      MachineBasicBlock::iterator KillInst;
+      MachineBasicBlock::iterator Term = opBlock.getFirstTerminator();
+      if (Term != opBlock.end() && Term->readsRegister(SrcReg)) {
+        KillInst = Term;
+
+        // Check that no other terminators use values.
+#ifndef NDEBUG
+        for (MachineBasicBlock::iterator TI = llvm::next(Term);
+             TI != opBlock.end(); ++TI) {
+          if (TI->isDebugValue())
+            continue;
+          assert(!TI->readsRegister(SrcReg) &&
+                 "Terminator instructions cannot use virtual registers unless"
+                 "they are the first terminator in a block!");
+        }
+#endif
+      } else if (reusedIncoming || !IncomingReg) {
+        // We may have to rewind a bit if we didn't insert a copy this time.
+        KillInst = Term;
+        while (KillInst != opBlock.begin()) {
+          --KillInst;
+          if (KillInst->isDebugValue())
+            continue;
+          if (KillInst->readsRegister(SrcReg))
+            break;
+        }
+      } else {
+        // We just inserted this copy.
+        KillInst = prior(InsertPos);
+      }
+      assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
+
+      // Finally, mark it killed.
+      LV->addVirtualRegisterKilled(SrcReg, KillInst);
+
+      // This vreg no longer lives all of the way through opBlock.
+      unsigned opBlockNum = opBlock.getNumber();
+      LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum);
+    }
+  }
+
+  // Really delete the PHI instruction now, if it is not in the LoweredPHIs map.
+  if (reusedIncoming || !IncomingReg)
+    MF.DeleteMachineInstr(MPhi);
+}
+
+/// analyzePHINodes - Gather information about the PHI nodes in here. In
+/// particular, we want to map the number of uses of a virtual register which is
+/// used in a PHI node. We map that to the BB the vreg is coming from. This is
+/// used later to determine when the vreg is killed in the BB.
+///
+void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
+  for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+       I != E; ++I)
+    for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->isPHI(); ++BBI)
+      for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2)
+        ++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i+1).getMBB()->getNumber(),
+                                     BBI->getOperand(i).getReg())];
+}
+
+bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
+                                   MachineBasicBlock &MBB,
+                                   LiveVariables &LV,
+                                   MachineLoopInfo *MLI) {
+  if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
+    return false;   // Quick exit for basic blocks without PHIs.
+
+  bool Changed = false;
+  for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
+       BBI != BBE && BBI->isPHI(); ++BBI) {
+    for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+      unsigned Reg = BBI->getOperand(i).getReg();
+      MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
+      // We break edges when registers are live out from the predecessor block
+      // (not considering PHI nodes). If the register is live in to this block
+      // anyway, we would gain nothing from splitting.
+      // Avoid splitting backedges of loops. It would introduce small
+      // out-of-line blocks into the loop which is very bad for code placement.
+      if (PreMBB != &MBB &&
+          !LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) {
+        if (!MLI ||
+            !(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) &&
+              MLI->isLoopHeader(&MBB))) {
+          if (PreMBB->SplitCriticalEdge(&MBB, this)) {
+            Changed = true;
+            ++NumCriticalEdgesSplit;
+          }
+        }
+      }
+    }
+  }
+  return Changed;
+}
diff --git a/final/lib/CodeGen/PHIEliminationUtils.cpp b/final/lib/CodeGen/PHIEliminationUtils.cpp
new file mode 100644
index 00000000000..10bfdcce676
--- /dev/null
+++ b/final/lib/CodeGen/PHIEliminationUtils.cpp
@@ -0,0 +1,61 @@
+//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
+// when following the CFG edge to SuccMBB. This needs to be after any def of
+// SrcReg, but before any subsequent point where control flow might jump out of
+// the basic block.
+MachineBasicBlock::iterator
+llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+                             unsigned SrcReg) {
+  // Handle the trivial case trivially.
+  if (MBB->empty())
+    return MBB->begin();
+
+  // Usually, we just want to insert the copy before the first terminator
+  // instruction. However, for the edge going to a landing pad, we must insert
+  // the copy before the call/invoke instruction.
+  if (!SuccMBB->isLandingPad())
+    return MBB->getFirstTerminator();
+
+  // Discover any defs/uses in this basic block.
+  SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
+  MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(SrcReg),
+         RE = MRI.reg_end(); RI != RE; ++RI) {
+    MachineInstr* DefUseMI = &*RI;
+    if (DefUseMI->getParent() == MBB)
+      DefUsesInMBB.insert(DefUseMI);
+  }
+
+  MachineBasicBlock::iterator InsertPoint;
+  if (DefUsesInMBB.empty()) {
+    // No defs.  Insert the copy at the start of the basic block.
+    InsertPoint = MBB->begin();
+  } else if (DefUsesInMBB.size() == 1) {
+    // Insert the copy immediately after the def/use.
+    InsertPoint = *DefUsesInMBB.begin();
+    ++InsertPoint;
+  } else {
+    // Insert the copy immediately after the last def/use.
+    InsertPoint = MBB->end();
+    while (!DefUsesInMBB.count(&*--InsertPoint)) {}
+    ++InsertPoint;
+  }
+
+  // Make sure the copy goes after any phi nodes however.
+  return MBB->SkipPHIsAndLabels(InsertPoint);
+}
diff --git a/final/lib/CodeGen/PHIEliminationUtils.h b/final/lib/CodeGen/PHIEliminationUtils.h
new file mode 100644
index 00000000000..9ac47fb4c50
--- /dev/null
+++ b/final/lib/CodeGen/PHIEliminationUtils.h
@@ -0,0 +1,25 @@
+//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+#define LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+namespace llvm {
+    /// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from
+    /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
+    /// any def of SrcReg, but before any subsequent point where control flow
+    /// might jump out of the basic block.
+    MachineBasicBlock::iterator
+    findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+                           unsigned SrcReg);
+}
+
+#endif
diff --git a/final/lib/CodeGen/Passes.cpp b/final/lib/CodeGen/Passes.cpp
new file mode 100644
index 00000000000..3489db2e9f4
--- /dev/null
+++ b/final/lib/CodeGen/Passes.cpp
@@ -0,0 +1,68 @@
+//===-- Passes.cpp - Target independent code generation passes ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines interfaces to access the target independent code
+// generation passes provided by the LLVM backend.
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterRegAlloc class - Track the registration of register allocators.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterRegAlloc::Registry;
+
+static FunctionPass *createDefaultRegisterAllocator() { return 0; }
+static RegisterRegAlloc
+defaultRegAlloc("default",
+                "pick register allocator based on -O option",
+                createDefaultRegisterAllocator);
+
+//===---------------------------------------------------------------------===//
+///
+/// RegAlloc command line options.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
+               RegisterPassParser<RegisterRegAlloc> >
+RegAlloc("regalloc",
+         cl::init(&createDefaultRegisterAllocator),
+         cl::desc("Register allocator to use"));
+
+
+//===---------------------------------------------------------------------===//
+///
+/// createRegisterAllocator - choose the appropriate register allocator.
+///
+//===---------------------------------------------------------------------===//
+FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) {
+  RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
+
+  if (!Ctor) {
+    Ctor = RegAlloc;
+    RegisterRegAlloc::setDefault(RegAlloc);
+  }
+
+  if (Ctor != createDefaultRegisterAllocator)
+    return Ctor();
+
+  // When the 'default' allocator is requested, pick one based on OptLevel.
+  switch (OptLevel) {
+  case CodeGenOpt::None:
+    return createFastRegisterAllocator();
+  default:
+    return createLinearScanRegisterAllocator();
+  }
+}
diff --git a/final/lib/CodeGen/PeepholeOptimizer.cpp b/final/lib/CodeGen/PeepholeOptimizer.cpp
new file mode 100644
index 00000000000..5d7123caa01
--- /dev/null
+++ b/final/lib/CodeGen/PeepholeOptimizer.cpp
@@ -0,0 +1,372 @@
+//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Perform peephole optimizations on the machine code:
+//
+// - Optimize Extensions
+//
+//     Optimization of sign / zero extension instructions. It may be extended to
+//     handle other instructions with similar properties.
+//
+//     On some targets, some instructions, e.g. X86 sign / zero extension, may
+//     leave the source value in the lower part of the result. This optimization
+//     will replace some uses of the pre-extension value with uses of the
+//     sub-register of the results.
+//
+// - Optimize Comparisons
+//
+//     Optimization of comparison instructions. For instance, in this code:
+//
+//       sub r1, 1
+//       cmp r1, 0
+//       bz  L1
+//
+//     If the "sub" instruction all ready sets (or could be modified to set) the
+//     same flag that the "cmp" instruction sets and that "bz" uses, then we can
+//     eliminate the "cmp" instruction.
+// 
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "peephole-opt"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+// Optimize Extensions
+static cl::opt<bool>
+Aggressive("aggressive-ext-opt", cl::Hidden,
+           cl::desc("Aggressive extension optimization"));
+
+static cl::opt<bool>
+DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
+                cl::desc("Disable the peephole optimizer"));
+
+STATISTIC(NumReuse,      "Number of extension results reused");
+STATISTIC(NumEliminated, "Number of compares eliminated");
+STATISTIC(NumImmFold,    "Number of move immediate foled");
+
+namespace {
+  class PeepholeOptimizer : public MachineFunctionPass {
+    const TargetMachine   *TM;
+    const TargetInstrInfo *TII;
+    MachineRegisterInfo   *MRI;
+    MachineDominatorTree  *DT;  // Machine dominator tree
+
+  public:
+    static char ID; // Pass identification
+    PeepholeOptimizer() : MachineFunctionPass(ID) {
+      initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+      if (Aggressive) {
+        AU.addRequired<MachineDominatorTree>();
+        AU.addPreserved<MachineDominatorTree>();
+      }
+    }
+
+  private:
+    bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
+    bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+                          SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+    bool isMoveImmediate(MachineInstr *MI,
+                         SmallSet<unsigned, 4> &ImmDefRegs,
+                         DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+    bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+                       SmallSet<unsigned, 4> &ImmDefRegs,
+                       DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+  };
+}
+
+char PeepholeOptimizer::ID = 0;
+INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
+                "Peephole Optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
+                "Peephole Optimizations", false, false)
+
+FunctionPass *llvm::createPeepholeOptimizerPass() {
+  return new PeepholeOptimizer();
+}
+
+/// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
+/// a single register and writes a single register and it does not modify the
+/// source, and if the source value is preserved as a sub-register of the
+/// result, then replace all reachable uses of the source with the subreg of the
+/// result.
+/// 
+/// Do not generate an EXTRACT that is used only in a debug use, as this changes
+/// the code. Since this code does not currently share EXTRACTs, just ignore all
+/// debug uses.
+bool PeepholeOptimizer::
+OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
+                 SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
+  unsigned SrcReg, DstReg, SubIdx;
+  if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
+    return false;
+  
+  if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+      TargetRegisterInfo::isPhysicalRegister(SrcReg))
+    return false;
+
+  MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
+  if (++UI == MRI->use_nodbg_end())
+    // No other uses.
+    return false;
+
+  // The source has other uses. See if we can replace the other uses with use of
+  // the result of the extension.
+  SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
+  UI = MRI->use_nodbg_begin(DstReg);
+  for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+       UI != UE; ++UI)
+    ReachedBBs.insert(UI->getParent());
+
+  // Uses that are in the same BB of uses of the result of the instruction.
+  SmallVector<MachineOperand*, 8> Uses;
+
+  // Uses that the result of the instruction can reach.
+  SmallVector<MachineOperand*, 8> ExtendedUses;
+
+  bool ExtendLife = true;
+  UI = MRI->use_nodbg_begin(SrcReg);
+  for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+       UI != UE; ++UI) {
+    MachineOperand &UseMO = UI.getOperand();
+    MachineInstr *UseMI = &*UI;
+    if (UseMI == MI)
+      continue;
+
+    if (UseMI->isPHI()) {
+      ExtendLife = false;
+      continue;
+    }
+
+    // It's an error to translate this:
+    //
+    //    %reg1025 = <sext> %reg1024
+    //     ...
+    //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
+    //
+    // into this:
+    //
+    //    %reg1025 = <sext> %reg1024
+    //     ...
+    //    %reg1027 = COPY %reg1025:4
+    //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
+    //
+    // The problem here is that SUBREG_TO_REG is there to assert that an
+    // implicit zext occurs. It doesn't insert a zext instruction. If we allow
+    // the COPY here, it will give us the value after the <sext>, not the
+    // original value of %reg1024 before <sext>.
+    if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
+      continue;
+
+    MachineBasicBlock *UseMBB = UseMI->getParent();
+    if (UseMBB == MBB) {
+      // Local uses that come after the extension.
+      if (!LocalMIs.count(UseMI))
+        Uses.push_back(&UseMO);
+    } else if (ReachedBBs.count(UseMBB)) {
+      // Non-local uses where the result of the extension is used. Always
+      // replace these unless it's a PHI.
+      Uses.push_back(&UseMO);
+    } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
+      // We may want to extend the live range of the extension result in order
+      // to replace these uses.
+      ExtendedUses.push_back(&UseMO);
+    } else {
+      // Both will be live out of the def MBB anyway. Don't extend live range of
+      // the extension result.
+      ExtendLife = false;
+      break;
+    }
+  }
+
+  if (ExtendLife && !ExtendedUses.empty())
+    // Extend the liveness of the extension result.
+    std::copy(ExtendedUses.begin(), ExtendedUses.end(),
+              std::back_inserter(Uses));
+
+  // Now replace all uses.
+  bool Changed = false;
+  if (!Uses.empty()) {
+    SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
+
+    // Look for PHI uses of the extended result, we don't want to extend the
+    // liveness of a PHI input. It breaks all kinds of assumptions down
+    // stream. A PHI use is expected to be the kill of its source values.
+    UI = MRI->use_nodbg_begin(DstReg);
+    for (MachineRegisterInfo::use_nodbg_iterator
+           UE = MRI->use_nodbg_end(); UI != UE; ++UI)
+      if (UI->isPHI())
+        PHIBBs.insert(UI->getParent());
+
+    const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+    for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+      MachineOperand *UseMO = Uses[i];
+      MachineInstr *UseMI = UseMO->getParent();
+      MachineBasicBlock *UseMBB = UseMI->getParent();
+      if (PHIBBs.count(UseMBB))
+        continue;
+
+      unsigned NewVR = MRI->createVirtualRegister(RC);
+      BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
+              TII->get(TargetOpcode::COPY), NewVR)
+        .addReg(DstReg, 0, SubIdx);
+
+      UseMO->setReg(NewVR);
+      ++NumReuse;
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+/// OptimizeCmpInstr - If the instruction is a compare and the previous
+/// instruction it's comparing against all ready sets (or could be modified to
+/// set) the same flag as the compare, then we can remove the comparison and use
+/// the flag from the previous instruction.
+bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
+                                         MachineBasicBlock *MBB){
+  // If this instruction is a comparison against zero and isn't comparing a
+  // physical register, we can try to optimize it.
+  unsigned SrcReg;
+  int CmpMask, CmpValue;
+  if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) ||
+      TargetRegisterInfo::isPhysicalRegister(SrcReg))
+    return false;
+
+  // Attempt to optimize the comparison instruction.
+  if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) {
+    ++NumEliminated;
+    return true;
+  }
+
+  return false;
+}
+
+bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
+                                        SmallSet<unsigned, 4> &ImmDefRegs,
+                                 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.isMoveImmediate())
+    return false;
+  if (TID.getNumDefs() != 1)
+    return false;
+  unsigned Reg = MI->getOperand(0).getReg();
+  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    ImmDefMIs.insert(std::make_pair(Reg, MI));
+    ImmDefRegs.insert(Reg);
+    return true;
+  }
+  
+  return false;
+}
+
+/// FoldImmediate - Try folding register operands that are defined by move
+/// immediate instructions, i.e. a trivial constant folding optimization, if
+/// and only if the def and use are in the same BB.
+bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+                                      SmallSet<unsigned, 4> &ImmDefRegs,
+                                 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+  for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isDef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (ImmDefRegs.count(Reg) == 0)
+      continue;
+    DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
+    assert(II != ImmDefMIs.end());
+    if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
+      ++NumImmFold;
+      return true;
+    }
+  }
+  return false;
+}
+
+bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+  if (DisablePeephole)
+    return false;
+  
+  TM  = &MF.getTarget();
+  TII = TM->getInstrInfo();
+  MRI = &MF.getRegInfo();
+  DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
+
+  bool Changed = false;
+
+  SmallPtrSet<MachineInstr*, 8> LocalMIs;
+  SmallSet<unsigned, 4> ImmDefRegs;
+  DenseMap<unsigned, MachineInstr*> ImmDefMIs;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = &*I;
+    
+    bool SeenMoveImm = false;
+    LocalMIs.clear();
+    ImmDefRegs.clear();
+    ImmDefMIs.clear();
+
+    bool First = true;
+    MachineBasicBlock::iterator PMII;
+    for (MachineBasicBlock::iterator
+           MII = I->begin(), MIE = I->end(); MII != MIE; ) {
+      MachineInstr *MI = &*MII;
+      LocalMIs.insert(MI);
+
+      if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
+          MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
+          MI->hasUnmodeledSideEffects()) {
+        ++MII;
+        continue;
+      }
+
+      if (MI->getDesc().isCompare()) {
+        if (OptimizeCmpInstr(MI, MBB)) {
+          // MI is deleted.
+          Changed = true;
+          MII = First ? I->begin() : llvm::next(PMII);
+          continue;
+        }
+      }
+
+      if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
+        SeenMoveImm = true;
+      } else {
+        Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
+        if (SeenMoveImm)
+          Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
+      }
+
+      First = false;
+      PMII = MII;
+      ++MII;
+    }
+  }
+
+  return Changed;
+}
diff --git a/final/lib/CodeGen/PostRASchedulerList.cpp b/final/lib/CodeGen/PostRASchedulerList.cpp
new file mode 100644
index 00000000000..60c24b71079
--- /dev/null
+++ b/final/lib/CodeGen/PostRASchedulerList.cpp
@@ -0,0 +1,694 @@
+//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "post-RA-sched"
+#include "AntiDepBreaker.h"
+#include "AggressiveAntiDepBreaker.h"
+#include "CriticalAntiDepBreaker.h"
+#include "ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumNoops, "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
+
+// Post-RA scheduling is enabled with
+// TargetSubtarget.enablePostRAScheduler(). This flag can be used to
+// override the target.
+static cl::opt<bool>
+EnablePostRAScheduler("post-RA-scheduler",
+                       cl::desc("Enable scheduling after register allocation"),
+                       cl::init(false), cl::Hidden);
+static cl::opt<std::string>
+EnableAntiDepBreaking("break-anti-dependencies",
+                      cl::desc("Break post-RA scheduling anti-dependencies: "
+                               "\"critical\", \"all\", or \"none\""),
+                      cl::init("none"), cl::Hidden);
+
+// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+static cl::opt<int>
+DebugDiv("postra-sched-debugdiv",
+                      cl::desc("Debug control MBBs that are scheduled"),
+                      cl::init(0), cl::Hidden);
+static cl::opt<int>
+DebugMod("postra-sched-debugmod",
+                      cl::desc("Debug control MBBs that are scheduled"),
+                      cl::init(0), cl::Hidden);
+
+AntiDepBreaker::~AntiDepBreaker() { }
+
+namespace {
+  class PostRAScheduler : public MachineFunctionPass {
+    AliasAnalysis *AA;
+    const TargetInstrInfo *TII;
+    CodeGenOpt::Level OptLevel;
+
+  public:
+    static char ID;
+    PostRAScheduler(CodeGenOpt::Level ol) :
+      MachineFunctionPass(ID), OptLevel(ol) {}
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineLoopInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    const char *getPassName() const {
+      return "Post RA top-down list latency scheduler";
+    }
+
+    bool runOnMachineFunction(MachineFunction &Fn);
+  };
+  char PostRAScheduler::ID = 0;
+
+  class SchedulePostRATDList : public ScheduleDAGInstrs {
+    /// AvailableQueue - The priority queue to use for the available SUnits.
+    ///
+    LatencyPriorityQueue AvailableQueue;
+
+    /// PendingQueue - This contains all of the instructions whose operands have
+    /// been issued, but their results are not ready yet (due to the latency of
+    /// the operation).  Once the operands becomes available, the instruction is
+    /// added to the AvailableQueue.
+    std::vector<SUnit*> PendingQueue;
+
+    /// Topo - A topological ordering for SUnits.
+    ScheduleDAGTopologicalSort Topo;
+
+    /// HazardRec - The hazard recognizer to use.
+    ScheduleHazardRecognizer *HazardRec;
+
+    /// AntiDepBreak - Anti-dependence breaking object, or NULL if none
+    AntiDepBreaker *AntiDepBreak;
+
+    /// AA - AliasAnalysis for making memory reference queries.
+    AliasAnalysis *AA;
+
+    /// KillIndices - The index of the most recent kill (proceding bottom-up),
+    /// or ~0u if the register is not live.
+    std::vector<unsigned> KillIndices;
+
+  public:
+    SchedulePostRATDList(
+      MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+      AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode,
+      SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs);
+
+    ~SchedulePostRATDList();
+
+    /// StartBlock - Initialize register live-range state for scheduling in
+    /// this block.
+    ///
+    void StartBlock(MachineBasicBlock *BB);
+
+    /// Schedule - Schedule the instruction range using list scheduling.
+    ///
+    void Schedule();
+
+    /// Observe - Update liveness information to account for the current
+    /// instruction, which will not be scheduled.
+    ///
+    void Observe(MachineInstr *MI, unsigned Count);
+
+    /// FinishBlock - Clean up register live-range state.
+    ///
+    void FinishBlock();
+
+    /// FixupKills - Fix register kill flags that have been made
+    /// invalid due to scheduling
+    ///
+    void FixupKills(MachineBasicBlock *MBB);
+
+  private:
+    void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
+    void ReleaseSuccessors(SUnit *SU);
+    void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+    void ListScheduleTopDown();
+    void StartBlockForKills(MachineBasicBlock *BB);
+
+    // ToggleKillFlag - Toggle a register operand kill flag. Other
+    // adjustments may be made to the instruction if necessary. Return
+    // true if the operand has been deleted, false if not.
+    bool ToggleKillFlag(MachineInstr *MI, MachineOperand &MO);
+  };
+}
+
+SchedulePostRATDList::SchedulePostRATDList(
+  MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+  AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode,
+  SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs)
+  : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA),
+    KillIndices(TRI->getNumRegs())
+{
+  const TargetMachine &TM = MF.getTarget();
+  const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
+  HazardRec =
+    TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
+  AntiDepBreak =
+    ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
+     (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, CriticalPathRCs) :
+     ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
+      (AntiDepBreaker *)new CriticalAntiDepBreaker(MF) : NULL));
+}
+
+SchedulePostRATDList::~SchedulePostRATDList() {
+  delete HazardRec;
+  delete AntiDepBreak;
+}
+
+bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
+  TII = Fn.getTarget().getInstrInfo();
+  MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+  MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+  AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+
+  // Check for explicit enable/disable of post-ra scheduling.
+  TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
+  SmallVector<TargetRegisterClass*, 4> CriticalPathRCs;
+  if (EnablePostRAScheduler.getPosition() > 0) {
+    if (!EnablePostRAScheduler)
+      return false;
+  } else {
+    // Check that post-RA scheduling is enabled for this target.
+    // This may upgrade the AntiDepMode.
+    const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
+    if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
+      return false;
+  }
+
+  // Check for antidep breaking override...
+  if (EnableAntiDepBreaking.getPosition() > 0) {
+    AntiDepMode = (EnableAntiDepBreaking == "all") ?
+      TargetSubtarget::ANTIDEP_ALL :
+        (EnableAntiDepBreaking == "critical")
+           ? TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE;
+  }
+
+  DEBUG(dbgs() << "PostRAScheduler\n");
+
+  SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, AntiDepMode,
+                                 CriticalPathRCs);
+
+  // Loop over all of the basic blocks
+  for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
+       MBB != MBBe; ++MBB) {
+#ifndef NDEBUG
+    // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
+    if (DebugDiv > 0) {
+      static int bbcnt = 0;
+      if (bbcnt++ % DebugDiv != DebugMod)
+        continue;
+      dbgs() << "*** DEBUG scheduling " << Fn.getFunction()->getNameStr() <<
+        ":BB#" << MBB->getNumber() << " ***\n";
+    }
+#endif
+
+    // Initialize register live-range state for scheduling in this block.
+    Scheduler.StartBlock(MBB);
+
+    // Schedule each sequence of instructions not interrupted by a label
+    // or anything else that effectively needs to shut down scheduling.
+    MachineBasicBlock::iterator Current = MBB->end();
+    unsigned Count = MBB->size(), CurrentCount = Count;
+    for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
+      MachineInstr *MI = llvm::prior(I);
+      if (TII->isSchedulingBoundary(MI, MBB, Fn)) {
+        Scheduler.Run(MBB, I, Current, CurrentCount);
+        Scheduler.EmitSchedule();
+        Current = MI;
+        CurrentCount = Count - 1;
+        Scheduler.Observe(MI, CurrentCount);
+      }
+      I = MI;
+      --Count;
+    }
+    assert(Count == 0 && "Instruction count mismatch!");
+    assert((MBB->begin() == Current || CurrentCount != 0) &&
+           "Instruction count mismatch!");
+    Scheduler.Run(MBB, MBB->begin(), Current, CurrentCount);
+    Scheduler.EmitSchedule();
+
+    // Clean up register live-range state.
+    Scheduler.FinishBlock();
+
+    // Update register kills
+    Scheduler.FixupKills(MBB);
+  }
+
+  return true;
+}
+
+/// StartBlock - Initialize register live-range state for scheduling in
+/// this block.
+///
+void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
+  // Call the superclass.
+  ScheduleDAGInstrs::StartBlock(BB);
+
+  // Reset the hazard recognizer and anti-dep breaker.
+  HazardRec->Reset();
+  if (AntiDepBreak != NULL)
+    AntiDepBreak->StartBlock(BB);
+}
+
+/// Schedule - Schedule the instruction range using list scheduling.
+///
+void SchedulePostRATDList::Schedule() {
+  // Build the scheduling graph.
+  BuildSchedGraph(AA);
+
+  if (AntiDepBreak != NULL) {
+    unsigned Broken =
+      AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos,
+                                          InsertPosIndex);
+
+    if (Broken != 0) {
+      // We made changes. Update the dependency graph.
+      // Theoretically we could update the graph in place:
+      // When a live range is changed to use a different register, remove
+      // the def's anti-dependence *and* output-dependence edges due to
+      // that register, and add new anti-dependence and output-dependence
+      // edges based on the next live range of the register.
+      SUnits.clear();
+      Sequence.clear();
+      EntrySU = SUnit();
+      ExitSU = SUnit();
+      BuildSchedGraph(AA);
+
+      NumFixedAnti += Broken;
+    }
+  }
+
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+
+  AvailableQueue.initNodes(SUnits);
+  ListScheduleTopDown();
+  AvailableQueue.releaseState();
+}
+
+/// Observe - Update liveness information to account for the current
+/// instruction, which will not be scheduled.
+///
+void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) {
+  if (AntiDepBreak != NULL)
+    AntiDepBreak->Observe(MI, Count, InsertPosIndex);
+}
+
+/// FinishBlock - Clean up register live-range state.
+///
+void SchedulePostRATDList::FinishBlock() {
+  if (AntiDepBreak != NULL)
+    AntiDepBreak->FinishBlock();
+
+  // Call the superclass.
+  ScheduleDAGInstrs::FinishBlock();
+}
+
+/// StartBlockForKills - Initialize register live-range state for updating kills
+///
+void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
+  // Initialize the indices to indicate that no registers are live.
+  for (unsigned i = 0; i < TRI->getNumRegs(); ++i)
+    KillIndices[i] = ~0u;
+
+  // Determine the live-out physregs for this block.
+  if (!BB->empty() && BB->back().getDesc().isReturn()) {
+    // In a return block, examine the function live-out regs.
+    for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
+           E = MRI.liveout_end(); I != E; ++I) {
+      unsigned Reg = *I;
+      KillIndices[Reg] = BB->size();
+      // Repeat, for all subregs.
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        KillIndices[*Subreg] = BB->size();
+      }
+    }
+  }
+  else {
+    // In a non-return block, examine the live-in regs of all successors.
+    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+           SE = BB->succ_end(); SI != SE; ++SI) {
+      for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+             E = (*SI)->livein_end(); I != E; ++I) {
+        unsigned Reg = *I;
+        KillIndices[Reg] = BB->size();
+        // Repeat, for all subregs.
+        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+             *Subreg; ++Subreg) {
+          KillIndices[*Subreg] = BB->size();
+        }
+      }
+    }
+  }
+}
+
+bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
+                                          MachineOperand &MO) {
+  // Setting kill flag...
+  if (!MO.isKill()) {
+    MO.setIsKill(true);
+    return false;
+  }
+
+  // If MO itself is live, clear the kill flag...
+  if (KillIndices[MO.getReg()] != ~0u) {
+    MO.setIsKill(false);
+    return false;
+  }
+
+  // If any subreg of MO is live, then create an imp-def for that
+  // subreg and keep MO marked as killed.
+  MO.setIsKill(false);
+  bool AllDead = true;
+  const unsigned SuperReg = MO.getReg();
+  for (const unsigned *Subreg = TRI->getSubRegisters(SuperReg);
+       *Subreg; ++Subreg) {
+    if (KillIndices[*Subreg] != ~0u) {
+      MI->addOperand(MachineOperand::CreateReg(*Subreg,
+                                               true  /*IsDef*/,
+                                               true  /*IsImp*/,
+                                               false /*IsKill*/,
+                                               false /*IsDead*/));
+      AllDead = false;
+    }
+  }
+
+  if(AllDead)
+    MO.setIsKill(true);
+  return false;
+}
+
+/// FixupKills - Fix the register kill flags, they may have been made
+/// incorrect by instruction reordering.
+///
+void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
+  DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n');
+
+  std::set<unsigned> killedRegs;
+  BitVector ReservedRegs = TRI->getReservedRegs(MF);
+
+  StartBlockForKills(MBB);
+
+  // Examine block from end to start...
+  unsigned Count = MBB->size();
+  for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
+       I != E; --Count) {
+    MachineInstr *MI = --I;
+    if (MI->isDebugValue())
+      continue;
+
+    // Update liveness.  Registers that are defed but not used in this
+    // instruction are now dead. Mark register and all subregs as they
+    // are completely defined.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+      if (!MO.isDef()) continue;
+      // Ignore two-addr defs.
+      if (MI->isRegTiedToUseOperand(i)) continue;
+
+      KillIndices[Reg] = ~0u;
+
+      // Repeat for all subregs.
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        KillIndices[*Subreg] = ~0u;
+      }
+    }
+
+    // Examine all used registers and set/clear kill flag. When a
+    // register is used multiple times we only set the kill flag on
+    // the first use.
+    killedRegs.clear();
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse()) continue;
+      unsigned Reg = MO.getReg();
+      if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+
+      bool kill = false;
+      if (killedRegs.find(Reg) == killedRegs.end()) {
+        kill = true;
+        // A register is not killed if any subregs are live...
+        for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+             *Subreg; ++Subreg) {
+          if (KillIndices[*Subreg] != ~0u) {
+            kill = false;
+            break;
+          }
+        }
+
+        // If subreg is not live, then register is killed if it became
+        // live in this instruction
+        if (kill)
+          kill = (KillIndices[Reg] == ~0u);
+      }
+
+      if (MO.isKill() != kill) {
+        DEBUG(dbgs() << "Fixing " << MO << " in ");
+        // Warning: ToggleKillFlag may invalidate MO.
+        ToggleKillFlag(MI, MO);
+        DEBUG(MI->dump());
+      }
+
+      killedRegs.insert(Reg);
+    }
+
+    // Mark any used register (that is not using undef) and subregs as
+    // now live...
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
+      unsigned Reg = MO.getReg();
+      if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
+
+      KillIndices[Reg] = Count;
+
+      for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+           *Subreg; ++Subreg) {
+        KillIndices[*Subreg] = Count;
+      }
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
+  SUnit *SuccSU = SuccEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    SuccSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --SuccSU->NumPredsLeft;
+
+  // Compute how many cycles it will be before this actually becomes
+  // available.  This is the max of the start time of all predecessors plus
+  // their latencies.
+  SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
+
+  // If all the node's predecessors are scheduled, this node is ready
+  // to be scheduled. Ignore the special ExitSU node.
+  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+    PendingQueue.push_back(SuccSU);
+}
+
+/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors.
+void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    ReleaseSucc(SU, &*I);
+  }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(SU->dump(this));
+
+  Sequence.push_back(SU);
+  assert(CurCycle >= SU->getDepth() &&
+         "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
+
+  ReleaseSuccessors(SU);
+  SU->isScheduled = true;
+  AvailableQueue.ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void SchedulePostRATDList::ListScheduleTopDown() {
+  unsigned CurCycle = 0;
+
+  // We're scheduling top-down but we're visiting the regions in
+  // bottom-up order, so we don't know the hazards at the start of a
+  // region. So assume no hazards (this should usually be ok as most
+  // blocks are a single region).
+  HazardRec->Reset();
+
+  // Release any successors of the special Entry node.
+  ReleaseSuccessors(&EntrySU);
+
+  // Add all leaves to Available queue.
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    // It is available if it has no predecessors.
+    bool available = SUnits[i].Preds.empty();
+    if (available) {
+      AvailableQueue.push(&SUnits[i]);
+      SUnits[i].isAvailable = true;
+    }
+  }
+
+  // In any cycle where we can't schedule any instructions, we must
+  // stall or emit a noop, depending on the target.
+  bool CycleHasInsts = false;
+
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  std::vector<SUnit*> NotReady;
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue.empty() || !PendingQueue.empty()) {
+    // Check to see if any of the pending instructions are ready to issue.  If
+    // so, add them to the available queue.
+    unsigned MinDepth = ~0u;
+    for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+      if (PendingQueue[i]->getDepth() <= CurCycle) {
+        AvailableQueue.push(PendingQueue[i]);
+        PendingQueue[i]->isAvailable = true;
+        PendingQueue[i] = PendingQueue.back();
+        PendingQueue.pop_back();
+        --i; --e;
+      } else if (PendingQueue[i]->getDepth() < MinDepth)
+        MinDepth = PendingQueue[i]->getDepth();
+    }
+
+    DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
+
+    SUnit *FoundSUnit = 0;
+    bool HasNoopHazards = false;
+    while (!AvailableQueue.empty()) {
+      SUnit *CurSUnit = AvailableQueue.pop();
+
+      ScheduleHazardRecognizer::HazardType HT =
+        HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+      if (HT == ScheduleHazardRecognizer::NoHazard) {
+        FoundSUnit = CurSUnit;
+        break;
+      }
+
+      // Remember if this is a noop hazard.
+      HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+      NotReady.push_back(CurSUnit);
+    }
+
+    // Add the nodes that aren't ready back onto the available list.
+    if (!NotReady.empty()) {
+      AvailableQueue.push_all(NotReady);
+      NotReady.clear();
+    }
+
+    // If we found a node to schedule...
+    if (FoundSUnit) {
+      // ... schedule the node...
+      ScheduleNodeTopDown(FoundSUnit, CurCycle);
+      HazardRec->EmitInstruction(FoundSUnit);
+      CycleHasInsts = true;
+    } else {
+      if (CycleHasInsts) {
+        DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
+        HazardRec->AdvanceCycle();
+      } else if (!HasNoopHazards) {
+        // Otherwise, we have a pipeline stall, but no other problem,
+        // just advance the current cycle and try again.
+        DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n');
+        HazardRec->AdvanceCycle();
+        ++NumStalls;
+      } else {
+        // Otherwise, we have no instructions to issue and we have instructions
+        // that will fault if we don't do this right.  This is the case for
+        // processors without pipeline interlocks and other cases.
+        DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n');
+        HazardRec->EmitNoop();
+        Sequence.push_back(0);   // NULL here means noop
+        ++NumNoops;
+      }
+
+      ++CurCycle;
+      CycleHasInsts = false;
+    }
+  }
+
+#ifndef NDEBUG
+  VerifySchedule(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createPostRAScheduler(CodeGenOpt::Level OptLevel) {
+  return new PostRAScheduler(OptLevel);
+}
diff --git a/final/lib/CodeGen/PreAllocSplitting.cpp b/final/lib/CodeGen/PreAllocSplitting.cpp
new file mode 100644
index 00000000000..d6e31dae9d1
--- /dev/null
+++ b/final/lib/CodeGen/PreAllocSplitting.cpp
@@ -0,0 +1,1430 @@
+//===-- PreAllocSplitting.cpp - Pre-allocation Interval Spltting Pass. ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine instruction level pre-register allocation
+// live interval splitting pass. It finds live interval barriers, i.e.
+// instructions which will kill all physical registers in certain register
+// classes, and split all live intervals which cross the barrier.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-alloc-split"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+static cl::opt<int> PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden);
+static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1),
+                                   cl::Hidden);
+static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1),
+                                     cl::Hidden);
+
+STATISTIC(NumSplits, "Number of intervals split");
+STATISTIC(NumRemats, "Number of intervals split by rematerialization");
+STATISTIC(NumFolds, "Number of intervals split with spill folding");
+STATISTIC(NumRestoreFolds, "Number of intervals split with restore folding");
+STATISTIC(NumRenumbers, "Number of intervals renumbered into new registers");
+STATISTIC(NumDeadSpills, "Number of dead spills removed");
+
+namespace {
+  class PreAllocSplitting : public MachineFunctionPass {
+    MachineFunction       *CurrMF;
+    const TargetMachine   *TM;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo* TRI;
+    MachineFrameInfo      *MFI;
+    MachineRegisterInfo   *MRI;
+    SlotIndexes           *SIs;
+    LiveIntervals         *LIs;
+    LiveStacks            *LSs;
+    VirtRegMap            *VRM;
+
+    // Barrier - Current barrier being processed.
+    MachineInstr          *Barrier;
+
+    // BarrierMBB - Basic block where the barrier resides in.
+    MachineBasicBlock     *BarrierMBB;
+
+    // Barrier - Current barrier index.
+    SlotIndex     BarrierIdx;
+
+    // CurrLI - Current live interval being split.
+    LiveInterval          *CurrLI;
+
+    // CurrSLI - Current stack slot live interval.
+    LiveInterval          *CurrSLI;
+
+    // CurrSValNo - Current val# for the stack slot live interval.
+    VNInfo                *CurrSValNo;
+
+    // IntervalSSMap - A map from live interval to spill slots.
+    DenseMap<unsigned, int> IntervalSSMap;
+
+    // Def2SpillMap - A map from a def instruction index to spill index.
+    DenseMap<SlotIndex, SlotIndex> Def2SpillMap;
+
+  public:
+    static char ID;
+    PreAllocSplitting() : MachineFunctionPass(ID) {
+      initializePreAllocSplittingPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<SlotIndexes>();
+      AU.addPreserved<SlotIndexes>();
+      AU.addRequired<LiveIntervals>();
+      AU.addPreserved<LiveIntervals>();
+      AU.addRequired<LiveStacks>();
+      AU.addPreserved<LiveStacks>();
+      AU.addPreserved<RegisterCoalescer>();
+      AU.addPreserved<CalculateSpillWeights>();
+      AU.addPreservedID(StrongPHIEliminationID);
+      AU.addPreservedID(PHIEliminationID);
+      AU.addRequired<MachineDominatorTree>();
+      AU.addRequired<MachineLoopInfo>();
+      AU.addRequired<VirtRegMap>();
+      AU.addPreserved<MachineDominatorTree>();
+      AU.addPreserved<MachineLoopInfo>();
+      AU.addPreserved<VirtRegMap>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    
+    virtual void releaseMemory() {
+      IntervalSSMap.clear();
+      Def2SpillMap.clear();
+    }
+
+    virtual const char *getPassName() const {
+      return "Pre-Register Allocaton Live Interval Splitting";
+    }
+
+    /// print - Implement the dump method.
+    virtual void print(raw_ostream &O, const Module* M = 0) const {
+      LIs->print(O, M);
+    }
+
+
+  private:
+
+    MachineBasicBlock::iterator
+      findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*,
+                     SmallPtrSet<MachineInstr*, 4>&);
+
+    MachineBasicBlock::iterator
+      findRestorePoint(MachineBasicBlock*, MachineInstr*, SlotIndex,
+                     SmallPtrSet<MachineInstr*, 4>&);
+
+    int CreateSpillStackSlot(unsigned, const TargetRegisterClass *);
+
+    bool IsAvailableInStack(MachineBasicBlock*, unsigned,
+                            SlotIndex, SlotIndex,
+                            SlotIndex&, int&) const;
+
+    void UpdateSpillSlotInterval(VNInfo*, SlotIndex, SlotIndex);
+
+    bool SplitRegLiveInterval(LiveInterval*);
+
+    bool SplitRegLiveIntervals(const TargetRegisterClass **,
+                               SmallPtrSet<LiveInterval*, 8>&);
+    
+    bool createsNewJoin(LiveRange* LR, MachineBasicBlock* DefMBB,
+                        MachineBasicBlock* BarrierMBB);
+    bool Rematerialize(unsigned vreg, VNInfo* ValNo,
+                       MachineInstr* DefMI,
+                       MachineBasicBlock::iterator RestorePt,
+                       SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
+    MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC,
+                            MachineInstr* DefMI,
+                            MachineInstr* Barrier,
+                            MachineBasicBlock* MBB,
+                            int& SS,
+                            SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
+    MachineInstr* FoldRestore(unsigned vreg, 
+                              const TargetRegisterClass* RC,
+                              MachineInstr* Barrier,
+                              MachineBasicBlock* MBB,
+                              int SS,
+                              SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
+    void RenumberValno(VNInfo* VN);
+    void ReconstructLiveInterval(LiveInterval* LI);
+    bool removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split);
+    unsigned getNumberOfNonSpills(SmallPtrSet<MachineInstr*, 4>& MIs,
+                               unsigned Reg, int FrameIndex, bool& TwoAddr);
+    VNInfo* PerformPHIConstruction(MachineBasicBlock::iterator Use,
+                                   MachineBasicBlock* MBB, LiveInterval* LI,
+                                   SmallPtrSet<MachineInstr*, 4>& Visited,
+            DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
+            DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
+                                      DenseMap<MachineInstr*, VNInfo*>& NewVNs,
+                                DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
+                                DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
+                                        bool IsTopLevel, bool IsIntraBlock);
+    VNInfo* PerformPHIConstructionFallBack(MachineBasicBlock::iterator Use,
+                                   MachineBasicBlock* MBB, LiveInterval* LI,
+                                   SmallPtrSet<MachineInstr*, 4>& Visited,
+            DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
+            DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
+                                      DenseMap<MachineInstr*, VNInfo*>& NewVNs,
+                                DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
+                                DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
+                                        bool IsTopLevel, bool IsIntraBlock);
+};
+} // end anonymous namespace
+
+char PreAllocSplitting::ID = 0;
+
+INITIALIZE_PASS_BEGIN(PreAllocSplitting, "pre-alloc-splitting",
+                "Pre-Register Allocation Live Interval Splitting",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(PreAllocSplitting, "pre-alloc-splitting",
+                "Pre-Register Allocation Live Interval Splitting",
+                false, false)
+
+char &llvm::PreAllocSplittingID = PreAllocSplitting::ID;
+
+/// findSpillPoint - Find a gap as far away from the given MI that's suitable
+/// for spilling the current live interval. The index must be before any
+/// defs and uses of the live interval register in the mbb. Return begin() if
+/// none is found.
+MachineBasicBlock::iterator
+PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
+                                  MachineInstr *DefMI,
+                                  SmallPtrSet<MachineInstr*, 4> &RefsInMBB) {
+  MachineBasicBlock::iterator Pt = MBB->begin();
+
+  MachineBasicBlock::iterator MII = MI;
+  MachineBasicBlock::iterator EndPt = DefMI
+    ? MachineBasicBlock::iterator(DefMI) : MBB->begin();
+    
+  while (MII != EndPt && !RefsInMBB.count(MII) &&
+         MII->getOpcode() != TRI->getCallFrameSetupOpcode())
+    --MII;
+  if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
+    
+  while (MII != EndPt && !RefsInMBB.count(MII)) {
+    // We can't insert the spill between the barrier (a call), and its
+    // corresponding call frame setup.
+    if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) {
+      while (MII->getOpcode() != TRI->getCallFrameSetupOpcode()) {
+        --MII;
+        if (MII == EndPt) {
+          return Pt;
+        }
+      }
+      continue;
+    } else {
+      Pt = MII;
+    }
+    
+    if (RefsInMBB.count(MII))
+      return Pt;
+    
+    
+    --MII;
+  }
+
+  return Pt;
+}
+
+/// findRestorePoint - Find a gap in the instruction index map that's suitable
+/// for restoring the current live interval value. The index must be before any
+/// uses of the live interval register in the mbb. Return end() if none is
+/// found.
+MachineBasicBlock::iterator
+PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
+                                    SlotIndex LastIdx,
+                                    SmallPtrSet<MachineInstr*, 4> &RefsInMBB) {
+  // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb
+  // begin index accordingly.
+  MachineBasicBlock::iterator Pt = MBB->end();
+  MachineBasicBlock::iterator EndPt = MBB->getFirstTerminator();
+
+  // We start at the call, so walk forward until we find the call frame teardown
+  // since we can't insert restores before that.  Bail if we encounter a use
+  // during this time.
+  MachineBasicBlock::iterator MII = MI;
+  if (MII == EndPt) return Pt;
+  
+  while (MII != EndPt && !RefsInMBB.count(MII) &&
+         MII->getOpcode() != TRI->getCallFrameDestroyOpcode())
+    ++MII;
+  if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
+  ++MII;
+  
+  // FIXME: Limit the number of instructions to examine to reduce
+  // compile time?
+  while (MII != EndPt) {
+    SlotIndex Index = LIs->getInstructionIndex(MII);
+    if (Index > LastIdx)
+      break;
+      
+    // We can't insert a restore between the barrier (a call) and its 
+    // corresponding call frame teardown.
+    if (MII->getOpcode() == TRI->getCallFrameSetupOpcode()) {
+      do {
+        if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
+        ++MII;
+      } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode());
+    } else {
+      Pt = MII;
+    }
+    
+    if (RefsInMBB.count(MII))
+      return Pt;
+    
+    ++MII;
+  }
+
+  return Pt;
+}
+
+/// CreateSpillStackSlot - Create a stack slot for the live interval being
+/// split. If the live interval was previously split, just reuse the same
+/// slot.
+int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
+                                            const TargetRegisterClass *RC) {
+  int SS;
+  DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg);
+  if (I != IntervalSSMap.end()) {
+    SS = I->second;
+  } else {
+    SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment());
+    IntervalSSMap[Reg] = SS;
+  }
+
+  // Create live interval for stack slot.
+  CurrSLI = &LSs->getOrCreateInterval(SS, RC);
+  if (CurrSLI->hasAtLeastOneValue())
+    CurrSValNo = CurrSLI->getValNumInfo(0);
+  else
+    CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0,
+                                       LSs->getVNInfoAllocator());
+  return SS;
+}
+
+/// IsAvailableInStack - Return true if register is available in a split stack
+/// slot at the specified index.
+bool
+PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
+                                    unsigned Reg, SlotIndex DefIndex,
+                                    SlotIndex RestoreIndex,
+                                    SlotIndex &SpillIndex,
+                                    int& SS) const {
+  if (!DefMBB)
+    return false;
+
+  DenseMap<unsigned, int>::const_iterator I = IntervalSSMap.find(Reg);
+  if (I == IntervalSSMap.end())
+    return false;
+  DenseMap<SlotIndex, SlotIndex>::const_iterator
+    II = Def2SpillMap.find(DefIndex);
+  if (II == Def2SpillMap.end())
+    return false;
+
+  // If last spill of def is in the same mbb as barrier mbb (where restore will
+  // be), make sure it's not below the intended restore index.
+  // FIXME: Undo the previous spill?
+  assert(LIs->getMBBFromIndex(II->second) == DefMBB);
+  if (DefMBB == BarrierMBB && II->second >= RestoreIndex)
+    return false;
+
+  SS = I->second;
+  SpillIndex = II->second;
+  return true;
+}
+
+/// UpdateSpillSlotInterval - Given the specified val# of the register live
+/// interval being split, and the spill and restore indicies, update the live
+/// interval of the spill stack slot.
+void
+PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, SlotIndex SpillIndex,
+                                           SlotIndex RestoreIndex) {
+  assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB &&
+         "Expect restore in the barrier mbb");
+
+  MachineBasicBlock *MBB = LIs->getMBBFromIndex(SpillIndex);
+  if (MBB == BarrierMBB) {
+    // Intra-block spill + restore. We are done.
+    LiveRange SLR(SpillIndex, RestoreIndex, CurrSValNo);
+    CurrSLI->addRange(SLR);
+    return;
+  }
+
+  SmallPtrSet<MachineBasicBlock*, 4> Processed;
+  SlotIndex EndIdx = LIs->getMBBEndIdx(MBB);
+  LiveRange SLR(SpillIndex, EndIdx, CurrSValNo);
+  CurrSLI->addRange(SLR);
+  Processed.insert(MBB);
+
+  // Start from the spill mbb, figure out the extend of the spill slot's
+  // live interval.
+  SmallVector<MachineBasicBlock*, 4> WorkList;
+  const LiveRange *LR = CurrLI->getLiveRangeContaining(SpillIndex);
+  if (LR->end > EndIdx)
+    // If live range extend beyond end of mbb, add successors to work list.
+    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+           SE = MBB->succ_end(); SI != SE; ++SI)
+      WorkList.push_back(*SI);
+
+  while (!WorkList.empty()) {
+    MachineBasicBlock *MBB = WorkList.back();
+    WorkList.pop_back();
+    if (Processed.count(MBB))
+      continue;
+    SlotIndex Idx = LIs->getMBBStartIdx(MBB);
+    LR = CurrLI->getLiveRangeContaining(Idx);
+    if (LR && LR->valno == ValNo) {
+      EndIdx = LIs->getMBBEndIdx(MBB);
+      if (Idx <= RestoreIndex && RestoreIndex < EndIdx) {
+        // Spill slot live interval stops at the restore.
+        LiveRange SLR(Idx, RestoreIndex, CurrSValNo);
+        CurrSLI->addRange(SLR);
+      } else if (LR->end > EndIdx) {
+        // Live range extends beyond end of mbb, process successors.
+        LiveRange SLR(Idx, EndIdx.getNextIndex(), CurrSValNo);
+        CurrSLI->addRange(SLR);
+        for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+               SE = MBB->succ_end(); SI != SE; ++SI)
+          WorkList.push_back(*SI);
+      } else {
+        LiveRange SLR(Idx, LR->end, CurrSValNo);
+        CurrSLI->addRange(SLR);
+      }
+      Processed.insert(MBB);
+    }
+  }
+}
+
+/// PerformPHIConstruction - From properly set up use and def lists, use a PHI
+/// construction algorithm to compute the ranges and valnos for an interval.
+VNInfo*
+PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
+                                       MachineBasicBlock* MBB, LiveInterval* LI,
+                                       SmallPtrSet<MachineInstr*, 4>& Visited,
+             DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
+             DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
+                                       DenseMap<MachineInstr*, VNInfo*>& NewVNs,
+                                 DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
+                                 DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
+                                           bool IsTopLevel, bool IsIntraBlock) {
+  // Return memoized result if it's available.
+  if (IsTopLevel && Visited.count(UseI) && NewVNs.count(UseI))
+    return NewVNs[UseI];
+  else if (!IsTopLevel && IsIntraBlock && NewVNs.count(UseI))
+    return NewVNs[UseI];
+  else if (!IsIntraBlock && LiveOut.count(MBB))
+    return LiveOut[MBB];
+  
+  // Check if our block contains any uses or defs.
+  bool ContainsDefs = Defs.count(MBB);
+  bool ContainsUses = Uses.count(MBB);
+  
+  VNInfo* RetVNI = 0;
+  
+  // Enumerate the cases of use/def contaning blocks.
+  if (!ContainsDefs && !ContainsUses) {
+    return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses,
+                                          NewVNs, LiveOut, Phis,
+                                          IsTopLevel, IsIntraBlock);
+  } else if (ContainsDefs && !ContainsUses) {
+    SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB];
+
+    // Search for the def in this block.  If we don't find it before the
+    // instruction we care about, go to the fallback case.  Note that that
+    // should never happen: this cannot be intrablock, so use should
+    // always be an end() iterator.
+    assert(UseI == MBB->end() && "No use marked in intrablock");
+    
+    MachineBasicBlock::iterator Walker = UseI;
+    --Walker;
+    while (Walker != MBB->begin()) {
+      if (BlockDefs.count(Walker))
+        break;
+      --Walker;
+    }
+    
+    // Once we've found it, extend its VNInfo to our instruction.
+    SlotIndex DefIndex = LIs->getInstructionIndex(Walker);
+    DefIndex = DefIndex.getDefIndex();
+    SlotIndex EndIndex = LIs->getMBBEndIdx(MBB);
+    
+    RetVNI = NewVNs[Walker];
+    LI->addRange(LiveRange(DefIndex, EndIndex, RetVNI));
+  } else if (!ContainsDefs && ContainsUses) {
+    SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
+    
+    // Search for the use in this block that precedes the instruction we care 
+    // about, going to the fallback case if we don't find it.    
+    MachineBasicBlock::iterator Walker = UseI;
+    bool found = false;
+    while (Walker != MBB->begin()) {
+      --Walker;
+      if (BlockUses.count(Walker)) {
+        found = true;
+        break;
+      }
+    }
+
+    if (!found)
+      return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
+                                            Uses, NewVNs, LiveOut, Phis,
+                                            IsTopLevel, IsIntraBlock);
+
+    SlotIndex UseIndex = LIs->getInstructionIndex(Walker);
+    UseIndex = UseIndex.getUseIndex();
+    SlotIndex EndIndex;
+    if (IsIntraBlock) {
+      EndIndex = LIs->getInstructionIndex(UseI).getDefIndex();
+    } else
+      EndIndex = LIs->getMBBEndIdx(MBB);
+
+    // Now, recursively phi construct the VNInfo for the use we found,
+    // and then extend it to include the instruction we care about
+    RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
+                                    NewVNs, LiveOut, Phis, false, true);
+    
+    LI->addRange(LiveRange(UseIndex, EndIndex, RetVNI));
+    
+    // FIXME: Need to set kills properly for inter-block stuff.
+  } else if (ContainsDefs && ContainsUses) {
+    SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB];
+    SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
+    
+    // This case is basically a merging of the two preceding case, with the
+    // special note that checking for defs must take precedence over checking
+    // for uses, because of two-address instructions.
+    MachineBasicBlock::iterator Walker = UseI;
+    bool foundDef = false;
+    bool foundUse = false;
+    while (Walker != MBB->begin()) {
+      --Walker;
+      if (BlockDefs.count(Walker)) {
+        foundDef = true;
+        break;
+      } else if (BlockUses.count(Walker)) {
+        foundUse = true;
+        break;
+      }
+    }
+
+    if (!foundDef && !foundUse)
+      return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
+                                            Uses, NewVNs, LiveOut, Phis,
+                                            IsTopLevel, IsIntraBlock);
+
+    SlotIndex StartIndex = LIs->getInstructionIndex(Walker);
+    StartIndex = foundDef ? StartIndex.getDefIndex() : StartIndex.getUseIndex();
+    SlotIndex EndIndex;
+    if (IsIntraBlock) {
+      EndIndex = LIs->getInstructionIndex(UseI).getDefIndex();
+    } else
+      EndIndex = LIs->getMBBEndIdx(MBB);
+
+    if (foundDef)
+      RetVNI = NewVNs[Walker];
+    else
+      RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
+                                      NewVNs, LiveOut, Phis, false, true);
+
+    LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI));
+  }
+  
+  // Memoize results so we don't have to recompute them.
+  if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
+  else {
+    if (!NewVNs.count(UseI))
+      NewVNs[UseI] = RetVNI;
+    Visited.insert(UseI);
+  }
+
+  return RetVNI;
+}
+
+/// PerformPHIConstructionFallBack - PerformPHIConstruction fall back path.
+///
+VNInfo*
+PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator UseI,
+                                       MachineBasicBlock* MBB, LiveInterval* LI,
+                                       SmallPtrSet<MachineInstr*, 4>& Visited,
+             DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
+             DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
+                                       DenseMap<MachineInstr*, VNInfo*>& NewVNs,
+                                 DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
+                                 DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
+                                           bool IsTopLevel, bool IsIntraBlock) {
+  // NOTE: Because this is the fallback case from other cases, we do NOT
+  // assume that we are not intrablock here.
+  if (Phis.count(MBB)) return Phis[MBB]; 
+
+  SlotIndex StartIndex = LIs->getMBBStartIdx(MBB);
+  VNInfo *RetVNI = Phis[MBB] =
+    LI->getNextValue(SlotIndex(), /*FIXME*/ 0,
+                     LIs->getVNInfoAllocator());
+
+  if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
+    
+  // If there are no uses or defs between our starting point and the
+  // beginning of the block, then recursive perform phi construction
+  // on our predecessors.
+  DenseMap<MachineBasicBlock*, VNInfo*> IncomingVNs;
+  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+         PE = MBB->pred_end(); PI != PE; ++PI) {
+    VNInfo* Incoming = PerformPHIConstruction((*PI)->end(), *PI, LI, 
+                                              Visited, Defs, Uses, NewVNs,
+                                              LiveOut, Phis, false, false);
+    if (Incoming != 0)
+      IncomingVNs[*PI] = Incoming;
+  }
+    
+  if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill()) {
+    VNInfo* OldVN = RetVNI;
+    VNInfo* NewVN = IncomingVNs.begin()->second;
+    VNInfo* MergedVN = LI->MergeValueNumberInto(OldVN, NewVN);
+    if (MergedVN == OldVN) std::swap(OldVN, NewVN);
+    
+    for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator LOI = LiveOut.begin(),
+         LOE = LiveOut.end(); LOI != LOE; ++LOI)
+      if (LOI->second == OldVN)
+        LOI->second = MergedVN;
+    for (DenseMap<MachineInstr*, VNInfo*>::iterator NVI = NewVNs.begin(),
+         NVE = NewVNs.end(); NVI != NVE; ++NVI)
+      if (NVI->second == OldVN)
+        NVI->second = MergedVN;
+    for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator PI = Phis.begin(),
+         PE = Phis.end(); PI != PE; ++PI)
+      if (PI->second == OldVN)
+        PI->second = MergedVN;
+    RetVNI = MergedVN;
+  } else {
+    // Otherwise, merge the incoming VNInfos with a phi join.  Create a new
+    // VNInfo to represent the joined value.
+    for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I =
+           IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) {
+      I->second->setHasPHIKill(true);
+    }
+  }
+      
+  SlotIndex EndIndex;
+  if (IsIntraBlock) {
+    EndIndex = LIs->getInstructionIndex(UseI).getDefIndex();
+  } else
+    EndIndex = LIs->getMBBEndIdx(MBB);
+  LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI));
+
+  // Memoize results so we don't have to recompute them.
+  if (!IsIntraBlock)
+    LiveOut[MBB] = RetVNI;
+  else {
+    if (!NewVNs.count(UseI))
+      NewVNs[UseI] = RetVNI;
+    Visited.insert(UseI);
+  }
+
+  return RetVNI;
+}
+
+/// ReconstructLiveInterval - Recompute a live interval from scratch.
+void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
+  VNInfo::Allocator& Alloc = LIs->getVNInfoAllocator();
+  
+  // Clear the old ranges and valnos;
+  LI->clear();
+  
+  // Cache the uses and defs of the register
+  typedef DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> > RegMap;
+  RegMap Defs, Uses;
+  
+  // Keep track of the new VNs we're creating.
+  DenseMap<MachineInstr*, VNInfo*> NewVNs;
+  SmallPtrSet<VNInfo*, 2> PhiVNs;
+  
+  // Cache defs, and create a new VNInfo for each def.
+  for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg),
+       DE = MRI->def_end(); DI != DE; ++DI) {
+    Defs[(*DI).getParent()].insert(&*DI);
+    
+    SlotIndex DefIdx = LIs->getInstructionIndex(&*DI);
+    DefIdx = DefIdx.getDefIndex();
+    
+    assert(!DI->isPHI() && "PHI instr in code during pre-alloc splitting.");
+    VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc);
+    
+    // If the def is a move, set the copy field.
+    if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg)
+      NewVN->setCopy(&*DI);
+
+    NewVNs[&*DI] = NewVN;
+  }
+  
+  // Cache uses as a separate pass from actually processing them.
+  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg),
+       UE = MRI->use_end(); UI != UE; ++UI)
+    Uses[(*UI).getParent()].insert(&*UI);
+    
+  // Now, actually process every use and use a phi construction algorithm
+  // to walk from it to its reaching definitions, building VNInfos along
+  // the way.
+  DenseMap<MachineBasicBlock*, VNInfo*> LiveOut;
+  DenseMap<MachineBasicBlock*, VNInfo*> Phis;
+  SmallPtrSet<MachineInstr*, 4> Visited;
+  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg),
+       UE = MRI->use_end(); UI != UE; ++UI) {
+    PerformPHIConstruction(&*UI, UI->getParent(), LI, Visited, Defs,
+                           Uses, NewVNs, LiveOut, Phis, true, true); 
+  }
+  
+  // Add ranges for dead defs
+  for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg),
+       DE = MRI->def_end(); DI != DE; ++DI) {
+    SlotIndex DefIdx = LIs->getInstructionIndex(&*DI);
+    DefIdx = DefIdx.getDefIndex();
+    
+    if (LI->liveAt(DefIdx)) continue;
+    
+    VNInfo* DeadVN = NewVNs[&*DI];
+    LI->addRange(LiveRange(DefIdx, DefIdx.getNextSlot(), DeadVN));
+  }
+}
+
+/// RenumberValno - Split the given valno out into a new vreg, allowing it to
+/// be allocated to a different register.  This function creates a new vreg,
+/// copies the valno and its live ranges over to the new vreg's interval,
+/// removes them from the old interval, and rewrites all uses and defs of
+/// the original reg to the new vreg within those ranges.
+void PreAllocSplitting::RenumberValno(VNInfo* VN) {
+  SmallVector<VNInfo*, 4> Stack;
+  SmallVector<VNInfo*, 4> VNsToCopy;
+  Stack.push_back(VN);
+
+  // Walk through and copy the valno we care about, and any other valnos
+  // that are two-address redefinitions of the one we care about.  These
+  // will need to be rewritten as well.  We also check for safety of the 
+  // renumbering here, by making sure that none of the valno involved has
+  // phi kills.
+  while (!Stack.empty()) {
+    VNInfo* OldVN = Stack.back();
+    Stack.pop_back();
+    
+    // Bail out if we ever encounter a valno that has a PHI kill.  We can't
+    // renumber these.
+    if (OldVN->hasPHIKill()) return;
+    
+    VNsToCopy.push_back(OldVN);
+    
+    // Locate two-address redefinitions
+    for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(CurrLI->reg),
+         DE = MRI->def_end(); DI != DE; ++DI) {
+      if (!DI->isRegTiedToUseOperand(DI.getOperandNo())) continue;
+      SlotIndex DefIdx = LIs->getInstructionIndex(&*DI).getDefIndex();
+      VNInfo* NextVN = CurrLI->findDefinedVNInfoForRegInt(DefIdx);
+      if (std::find(VNsToCopy.begin(), VNsToCopy.end(), NextVN) !=
+          VNsToCopy.end())
+        Stack.push_back(NextVN);
+    }
+  }
+  
+  // Create the new vreg
+  unsigned NewVReg = MRI->createVirtualRegister(MRI->getRegClass(CurrLI->reg));
+  
+  // Create the new live interval
+  LiveInterval& NewLI = LIs->getOrCreateInterval(NewVReg);
+  
+  for (SmallVector<VNInfo*, 4>::iterator OI = VNsToCopy.begin(), OE = 
+       VNsToCopy.end(); OI != OE; ++OI) {
+    VNInfo* OldVN = *OI;
+    
+    // Copy the valno over
+    VNInfo* NewVN = NewLI.createValueCopy(OldVN, LIs->getVNInfoAllocator());
+    NewLI.MergeValueInAsValue(*CurrLI, OldVN, NewVN);
+
+    // Remove the valno from the old interval
+    CurrLI->removeValNo(OldVN);
+  }
+  
+  // Rewrite defs and uses.  This is done in two stages to avoid invalidating
+  // the reg_iterator.
+  SmallVector<std::pair<MachineInstr*, unsigned>, 8> OpsToChange;
+  
+  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg),
+         E = MRI->reg_end(); I != E; ++I) {
+    MachineOperand& MO = I.getOperand();
+    SlotIndex InstrIdx = LIs->getInstructionIndex(&*I);
+    
+    if ((MO.isUse() && NewLI.liveAt(InstrIdx.getUseIndex())) ||
+        (MO.isDef() && NewLI.liveAt(InstrIdx.getDefIndex())))
+      OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo()));
+  }
+  
+  for (SmallVector<std::pair<MachineInstr*, unsigned>, 8>::iterator I =
+       OpsToChange.begin(), E = OpsToChange.end(); I != E; ++I) {
+    MachineInstr* Inst = I->first;
+    unsigned OpIdx = I->second;
+    MachineOperand& MO = Inst->getOperand(OpIdx);
+    MO.setReg(NewVReg);
+  }
+  
+  // Grow the VirtRegMap, since we've created a new vreg.
+  VRM->grow();
+  
+  // The renumbered vreg shares a stack slot with the old register.
+  if (IntervalSSMap.count(CurrLI->reg))
+    IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg];
+  
+  ++NumRenumbers;
+}
+
+bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
+                                      MachineInstr* DefMI,
+                                      MachineBasicBlock::iterator RestorePt,
+                                    SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
+  MachineBasicBlock& MBB = *RestorePt->getParent();
+  
+  MachineBasicBlock::iterator KillPt = BarrierMBB->end();
+  if (!DefMI || DefMI->getParent() == BarrierMBB)
+    KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
+  else
+    KillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
+  
+  if (KillPt == DefMI->getParent()->end())
+    return false;
+  
+  TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, *TRI);
+  SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt));
+  
+  ReconstructLiveInterval(CurrLI);
+  RematIdx = RematIdx.getDefIndex();
+  RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx));
+  
+  ++NumSplits;
+  ++NumRemats;
+  return true;  
+}
+
+MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, 
+                                           const TargetRegisterClass* RC,
+                                           MachineInstr* DefMI,
+                                           MachineInstr* Barrier,
+                                           MachineBasicBlock* MBB,
+                                           int& SS,
+                                    SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
+  // Go top down if RefsInMBB is empty.
+  if (RefsInMBB.empty())
+    return 0;
+  
+  MachineBasicBlock::iterator FoldPt = Barrier;
+  while (&*FoldPt != DefMI && FoldPt != MBB->begin() &&
+         !RefsInMBB.count(FoldPt))
+    --FoldPt;
+  
+  int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg);
+  if (OpIdx == -1)
+    return 0;
+  
+  SmallVector<unsigned, 1> Ops;
+  Ops.push_back(OpIdx);
+  
+  if (!TII->canFoldMemoryOperand(FoldPt, Ops))
+    return 0;
+  
+  DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(vreg);
+  if (I != IntervalSSMap.end()) {
+    SS = I->second;
+  } else {
+    SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment());
+  }
+  
+  MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS);
+  
+  if (FMI) {
+    LIs->ReplaceMachineInstrInMaps(FoldPt, FMI);
+    FoldPt->eraseFromParent();
+    ++NumFolds;
+    
+    IntervalSSMap[vreg] = SS;
+    CurrSLI = &LSs->getOrCreateInterval(SS, RC);
+    if (CurrSLI->hasAtLeastOneValue())
+      CurrSValNo = CurrSLI->getValNumInfo(0);
+    else
+      CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0,
+                                         LSs->getVNInfoAllocator());
+  }
+  
+  return FMI;
+}
+
+MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg, 
+                                             const TargetRegisterClass* RC,
+                                             MachineInstr* Barrier,
+                                             MachineBasicBlock* MBB,
+                                             int SS,
+                                     SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
+  if ((int)RestoreFoldLimit != -1 && RestoreFoldLimit == (int)NumRestoreFolds)
+    return 0;
+                                       
+  // Go top down if RefsInMBB is empty.
+  if (RefsInMBB.empty())
+    return 0;
+  
+  // Can't fold a restore between a call stack setup and teardown.
+  MachineBasicBlock::iterator FoldPt = Barrier;
+  
+  // Advance from barrier to call frame teardown.
+  while (FoldPt != MBB->getFirstTerminator() &&
+         FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) {
+    if (RefsInMBB.count(FoldPt))
+      return 0;
+    
+    ++FoldPt;
+  }
+  
+  if (FoldPt == MBB->getFirstTerminator())
+    return 0;
+  else
+    ++FoldPt;
+  
+  // Now find the restore point.
+  while (FoldPt != MBB->getFirstTerminator() && !RefsInMBB.count(FoldPt)) {
+    if (FoldPt->getOpcode() == TRI->getCallFrameSetupOpcode()) {
+      while (FoldPt != MBB->getFirstTerminator() &&
+             FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) {
+        if (RefsInMBB.count(FoldPt))
+          return 0;
+        
+        ++FoldPt;
+      }
+      
+      if (FoldPt == MBB->getFirstTerminator())
+        return 0;
+    } 
+    
+    ++FoldPt;
+  }
+  
+  if (FoldPt == MBB->getFirstTerminator())
+    return 0;
+  
+  int OpIdx = FoldPt->findRegisterUseOperandIdx(vreg, true);
+  if (OpIdx == -1)
+    return 0;
+  
+  SmallVector<unsigned, 1> Ops;
+  Ops.push_back(OpIdx);
+  
+  if (!TII->canFoldMemoryOperand(FoldPt, Ops))
+    return 0;
+  
+  MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS);
+  
+  if (FMI) {
+    LIs->ReplaceMachineInstrInMaps(FoldPt, FMI);
+    FoldPt->eraseFromParent();
+    ++NumRestoreFolds;
+  }
+  
+  return FMI;
+}
+
+/// SplitRegLiveInterval - Split (spill and restore) the given live interval
+/// so it would not cross the barrier that's being processed. Shrink wrap
+/// (minimize) the live interval to the last uses.
+bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
+  DEBUG(dbgs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier
+               << "  result: ");
+
+  CurrLI = LI;
+
+  // Find live range where current interval cross the barrier.
+  LiveInterval::iterator LR =
+    CurrLI->FindLiveRangeContaining(BarrierIdx.getUseIndex());
+  VNInfo *ValNo = LR->valno;
+
+  assert(!ValNo->isUnused() && "Val# is defined by a dead def?");
+
+  MachineInstr *DefMI = LIs->getInstructionFromIndex(ValNo->def);
+
+  // If this would create a new join point, do not split.
+  if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) {
+    DEBUG(dbgs() << "FAILED (would create a new join point).\n");
+    return false;
+  }
+
+  // Find all references in the barrier mbb.
+  SmallPtrSet<MachineInstr*, 4> RefsInMBB;
+  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg),
+         E = MRI->reg_end(); I != E; ++I) {
+    MachineInstr *RefMI = &*I;
+    if (RefMI->getParent() == BarrierMBB)
+      RefsInMBB.insert(RefMI);
+  }
+
+  // Find a point to restore the value after the barrier.
+  MachineBasicBlock::iterator RestorePt =
+    findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB);
+  if (RestorePt == BarrierMBB->end()) {
+    DEBUG(dbgs() << "FAILED (could not find a suitable restore point).\n");
+    return false;
+  }
+
+  if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI))
+    if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) {
+      DEBUG(dbgs() << "success (remat).\n");
+      return true;
+    }
+
+  // Add a spill either before the barrier or after the definition.
+  MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL;
+  const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg);
+  SlotIndex SpillIndex;
+  MachineInstr *SpillMI = NULL;
+  int SS = -1;
+  if (!DefMI) {
+    // If we don't know where the def is we must split just before the barrier.
+    if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier,
+                            BarrierMBB, SS, RefsInMBB))) {
+      SpillIndex = LIs->getInstructionIndex(SpillMI);
+    } else {
+      MachineBasicBlock::iterator SpillPt = 
+        findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
+      if (SpillPt == BarrierMBB->begin()) {
+        DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
+        return false; // No gap to insert spill.
+      }
+      // Add spill.
+    
+      SS = CreateSpillStackSlot(CurrLI->reg, RC);
+      TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC,
+                               TRI);
+      SpillMI = prior(SpillPt);
+      SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
+    }
+  } else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def,
+                                 LIs->getZeroIndex(), SpillIndex, SS)) {
+    // If it's already split, just restore the value. There is no need to spill
+    // the def again.
+    if (!DefMI) {
+      DEBUG(dbgs() << "FAILED (def is dead).\n");
+      return false; // Def is dead. Do nothing.
+    }
+    
+    if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier,
+                             BarrierMBB, SS, RefsInMBB))) {
+      SpillIndex = LIs->getInstructionIndex(SpillMI);
+    } else {
+      // Check if it's possible to insert a spill after the def MI.
+      MachineBasicBlock::iterator SpillPt;
+      if (DefMBB == BarrierMBB) {
+        // Add spill after the def and the last use before the barrier.
+        SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI,
+                                 RefsInMBB);
+        if (SpillPt == DefMBB->begin()) {
+          DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
+          return false; // No gap to insert spill.
+        }
+      } else {
+        SpillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
+        if (SpillPt == DefMBB->end()) {
+          DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
+          return false; // No gap to insert spill.
+        }
+      }
+      // Add spill. 
+      SS = CreateSpillStackSlot(CurrLI->reg, RC);
+      TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC,
+                               TRI);
+      SpillMI = prior(SpillPt);
+      SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
+    }
+  }
+
+  // Remember def instruction index to spill index mapping.
+  if (DefMI && SpillMI)
+    Def2SpillMap[ValNo->def] = SpillIndex;
+
+  // Add restore.
+  bool FoldedRestore = false;
+  SlotIndex RestoreIndex;
+  if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier,
+                                      BarrierMBB, SS, RefsInMBB)) {
+    RestorePt = LMI;
+    RestoreIndex = LIs->getInstructionIndex(RestorePt);
+    FoldedRestore = true;
+  } else {
+    TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC, TRI);
+    MachineInstr *LoadMI = prior(RestorePt);
+    RestoreIndex = LIs->InsertMachineInstrInMaps(LoadMI);
+  }
+
+  // Update spill stack slot live interval.
+  UpdateSpillSlotInterval(ValNo, SpillIndex.getUseIndex().getNextSlot(),
+                          RestoreIndex.getDefIndex());
+
+  ReconstructLiveInterval(CurrLI);
+
+  if (!FoldedRestore) {
+    SlotIndex RestoreIdx = LIs->getInstructionIndex(prior(RestorePt));
+    RestoreIdx = RestoreIdx.getDefIndex();
+    RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RestoreIdx));
+  }
+  
+  ++NumSplits;
+  DEBUG(dbgs() << "success.\n");
+  return true;
+}
+
+/// SplitRegLiveIntervals - Split all register live intervals that cross the
+/// barrier that's being processed.
+bool
+PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs,
+                                         SmallPtrSet<LiveInterval*, 8>& Split) {
+  // First find all the virtual registers whose live intervals are intercepted
+  // by the current barrier.
+  SmallVector<LiveInterval*, 8> Intervals;
+  for (const TargetRegisterClass **RC = RCs; *RC; ++RC) {
+    // FIXME: If it's not safe to move any instruction that defines the barrier
+    // register class, then it means there are some special dependencies which
+    // codegen is not modelling. Ignore these barriers for now.
+    if (!TII->isSafeToMoveRegClassDefs(*RC))
+      continue;
+    const std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC);
+    for (unsigned i = 0, e = VRs.size(); i != e; ++i) {
+      unsigned Reg = VRs[i];
+      if (!LIs->hasInterval(Reg))
+        continue;
+      LiveInterval *LI = &LIs->getInterval(Reg);
+      if (LI->liveAt(BarrierIdx) && !Barrier->readsRegister(Reg))
+        // Virtual register live interval is intercepted by the barrier. We
+        // should split and shrink wrap its interval if possible.
+        Intervals.push_back(LI);
+    }
+  }
+
+  // Process the affected live intervals.
+  bool Change = false;
+  while (!Intervals.empty()) {
+    if (PreSplitLimit != -1 && (int)NumSplits == PreSplitLimit)
+      break;
+    LiveInterval *LI = Intervals.back();
+    Intervals.pop_back();
+    bool result = SplitRegLiveInterval(LI);
+    if (result) Split.insert(LI);
+    Change |= result;
+  }
+
+  return Change;
+}
+
+unsigned PreAllocSplitting::getNumberOfNonSpills(
+                                  SmallPtrSet<MachineInstr*, 4>& MIs,
+                                  unsigned Reg, int FrameIndex,
+                                  bool& FeedsTwoAddr) {
+  unsigned NonSpills = 0;
+  for (SmallPtrSet<MachineInstr*, 4>::iterator UI = MIs.begin(), UE = MIs.end();
+       UI != UE; ++UI) {
+    int StoreFrameIndex;
+    unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
+    if (StoreVReg != Reg || StoreFrameIndex != FrameIndex)
+      ++NonSpills;
+    
+    int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg);
+    if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx))
+      FeedsTwoAddr = true;
+  }
+  
+  return NonSpills;
+}
+
+/// removeDeadSpills - After doing splitting, filter through all intervals we've
+/// split, and see if any of the spills are unnecessary.  If so, remove them.
+bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
+  bool changed = false;
+  
+  // Walk over all of the live intervals that were touched by the splitter,
+  // and see if we can do any DCE and/or folding.
+  for (SmallPtrSet<LiveInterval*, 8>::iterator LI = split.begin(),
+       LE = split.end(); LI != LE; ++LI) {
+    DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> > VNUseCount;
+    
+    // First, collect all the uses of the vreg, and sort them by their
+    // reaching definition (VNInfo).
+    for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg),
+         UE = MRI->use_end(); UI != UE; ++UI) {
+      SlotIndex index = LIs->getInstructionIndex(&*UI);
+      index = index.getUseIndex();
+      
+      const LiveRange* LR = (*LI)->getLiveRangeContaining(index);
+      VNUseCount[LR->valno].insert(&*UI);
+    }
+    
+    // Now, take the definitions (VNInfo's) one at a time and try to DCE 
+    // and/or fold them away.
+    for (LiveInterval::vni_iterator VI = (*LI)->vni_begin(),
+         VE = (*LI)->vni_end(); VI != VE; ++VI) {
+      
+      if (DeadSplitLimit != -1 && (int)NumDeadSpills == DeadSplitLimit) 
+        return changed;
+      
+      VNInfo* CurrVN = *VI;
+      
+      // We don't currently try to handle definitions with PHI kills, because
+      // it would involve processing more than one VNInfo at once.
+      if (CurrVN->hasPHIKill()) continue;
+      
+      // We also don't try to handle the results of PHI joins, since there's
+      // no defining instruction to analyze.
+      MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def);
+      if (!DefMI || CurrVN->isUnused()) continue;
+    
+      // We're only interested in eliminating cruft introduced by the splitter,
+      // is of the form load-use or load-use-store.  First, check that the
+      // definition is a load, and remember what stack slot we loaded it from.
+      int FrameIndex;
+      if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue;
+      
+      // If the definition has no uses at all, just DCE it.
+      if (VNUseCount[CurrVN].size() == 0) {
+        LIs->RemoveMachineInstrFromMaps(DefMI);
+        (*LI)->removeValNo(CurrVN);
+        DefMI->eraseFromParent();
+        VNUseCount.erase(CurrVN);
+        ++NumDeadSpills;
+        changed = true;
+        continue;
+      }
+      
+      // Second, get the number of non-store uses of the definition, as well as
+      // a flag indicating whether it feeds into a later two-address definition.
+      bool FeedsTwoAddr = false;
+      unsigned NonSpillCount = getNumberOfNonSpills(VNUseCount[CurrVN],
+                                                    (*LI)->reg, FrameIndex,
+                                                    FeedsTwoAddr);
+      
+      // If there's one non-store use and it doesn't feed a two-addr, then
+      // this is a load-use-store case that we can try to fold.
+      if (NonSpillCount == 1 && !FeedsTwoAddr) {
+        // Start by finding the non-store use MachineInstr.
+        SmallPtrSet<MachineInstr*, 4>::iterator UI = VNUseCount[CurrVN].begin();
+        int StoreFrameIndex;
+        unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
+        while (UI != VNUseCount[CurrVN].end() &&
+               (StoreVReg == (*LI)->reg && StoreFrameIndex == FrameIndex)) {
+          ++UI;
+          if (UI != VNUseCount[CurrVN].end())
+            StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
+        }
+        if (UI == VNUseCount[CurrVN].end()) continue;
+        
+        MachineInstr* use = *UI;
+        
+        // Attempt to fold it away!
+        int OpIdx = use->findRegisterUseOperandIdx((*LI)->reg, false);
+        if (OpIdx == -1) continue;
+        SmallVector<unsigned, 1> Ops;
+        Ops.push_back(OpIdx);
+        if (!TII->canFoldMemoryOperand(use, Ops)) continue;
+
+        MachineInstr* NewMI = TII->foldMemoryOperand(use, Ops, FrameIndex);
+
+        if (!NewMI) continue;
+
+        // Update relevant analyses.
+        LIs->RemoveMachineInstrFromMaps(DefMI);
+        LIs->ReplaceMachineInstrInMaps(use, NewMI);
+        (*LI)->removeValNo(CurrVN);
+
+        DefMI->eraseFromParent();
+        use->eraseFromParent();
+        VNUseCount[CurrVN].erase(use);
+
+        // Remove deleted instructions.  Note that we need to remove them from 
+        // the VNInfo->use map as well, just to be safe.
+        for (SmallPtrSet<MachineInstr*, 4>::iterator II = 
+             VNUseCount[CurrVN].begin(), IE = VNUseCount[CurrVN].end();
+             II != IE; ++II) {
+          for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator
+               VNI = VNUseCount.begin(), VNE = VNUseCount.end(); VNI != VNE; 
+               ++VNI)
+            if (VNI->first != CurrVN)
+              VNI->second.erase(*II);
+          LIs->RemoveMachineInstrFromMaps(*II);
+          (*II)->eraseFromParent();
+        }
+        
+        VNUseCount.erase(CurrVN);
+
+        for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator
+             VI = VNUseCount.begin(), VE = VNUseCount.end(); VI != VE; ++VI)
+          if (VI->second.erase(use))
+            VI->second.insert(NewMI);
+
+        ++NumDeadSpills;
+        changed = true;
+        continue;
+      }
+      
+      // If there's more than one non-store instruction, we can't profitably
+      // fold it, so bail.
+      if (NonSpillCount) continue;
+        
+      // Otherwise, this is a load-store case, so DCE them.
+      for (SmallPtrSet<MachineInstr*, 4>::iterator UI = 
+           VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end();
+           UI != UE; ++UI) {
+        LIs->RemoveMachineInstrFromMaps(*UI);
+        (*UI)->eraseFromParent();
+      }
+        
+      VNUseCount.erase(CurrVN);
+        
+      LIs->RemoveMachineInstrFromMaps(DefMI);
+      (*LI)->removeValNo(CurrVN);
+      DefMI->eraseFromParent();
+      ++NumDeadSpills;
+      changed = true;
+    }
+  }
+  
+  return changed;
+}
+
+bool PreAllocSplitting::createsNewJoin(LiveRange* LR,
+                                       MachineBasicBlock* DefMBB,
+                                       MachineBasicBlock* BarrierMBB) {
+  if (DefMBB == BarrierMBB)
+    return false;
+  
+  if (LR->valno->hasPHIKill())
+    return false;
+  
+  SlotIndex MBBEnd = LIs->getMBBEndIdx(BarrierMBB);
+  if (LR->end < MBBEnd)
+    return false;
+  
+  MachineLoopInfo& MLI = getAnalysis<MachineLoopInfo>();
+  if (MLI.getLoopFor(DefMBB) != MLI.getLoopFor(BarrierMBB))
+    return true;
+  
+  MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
+  SmallPtrSet<MachineBasicBlock*, 4> Visited;
+  typedef std::pair<MachineBasicBlock*,
+                    MachineBasicBlock::succ_iterator> ItPair;
+  SmallVector<ItPair, 4> Stack;
+  Stack.push_back(std::make_pair(BarrierMBB, BarrierMBB->succ_begin()));
+  
+  while (!Stack.empty()) {
+    ItPair P = Stack.back();
+    Stack.pop_back();
+    
+    MachineBasicBlock* PredMBB = P.first;
+    MachineBasicBlock::succ_iterator S = P.second;
+    
+    if (S == PredMBB->succ_end())
+      continue;
+    else if (Visited.count(*S)) {
+      Stack.push_back(std::make_pair(PredMBB, ++S));
+      continue;
+    } else
+      Stack.push_back(std::make_pair(PredMBB, S+1));
+    
+    MachineBasicBlock* MBB = *S;
+    Visited.insert(MBB);
+    
+    if (MBB == BarrierMBB)
+      return true;
+    
+    MachineDomTreeNode* DefMDTN = MDT.getNode(DefMBB);
+    MachineDomTreeNode* BarrierMDTN = MDT.getNode(BarrierMBB);
+    MachineDomTreeNode* MDTN = MDT.getNode(MBB)->getIDom();
+    while (MDTN) {
+      if (MDTN == DefMDTN)
+        return true;
+      else if (MDTN == BarrierMDTN)
+        break;
+      MDTN = MDTN->getIDom();
+    }
+    
+    MBBEnd = LIs->getMBBEndIdx(MBB);
+    if (LR->end > MBBEnd)
+      Stack.push_back(std::make_pair(MBB, MBB->succ_begin()));
+  }
+  
+  return false;
+} 
+  
+
+bool PreAllocSplitting::runOnMachineFunction(MachineFunction &MF) {
+  CurrMF = &MF;
+  TM     = &MF.getTarget();
+  TRI    = TM->getRegisterInfo();
+  TII    = TM->getInstrInfo();
+  MFI    = MF.getFrameInfo();
+  MRI    = &MF.getRegInfo();
+  SIs    = &getAnalysis<SlotIndexes>();
+  LIs    = &getAnalysis<LiveIntervals>();
+  LSs    = &getAnalysis<LiveStacks>();
+  VRM    = &getAnalysis<VirtRegMap>();
+
+  bool MadeChange = false;
+
+  // Make sure blocks are numbered in order.
+  MF.RenumberBlocks();
+
+  MachineBasicBlock *Entry = MF.begin();
+  SmallPtrSet<MachineBasicBlock*,16> Visited;
+
+  SmallPtrSet<LiveInterval*, 8> Split;
+
+  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+       DFI != E; ++DFI) {
+    BarrierMBB = *DFI;
+    for (MachineBasicBlock::iterator I = BarrierMBB->begin(),
+           E = BarrierMBB->end(); I != E; ++I) {
+      Barrier = &*I;
+      const TargetRegisterClass **BarrierRCs =
+        Barrier->getDesc().getRegClassBarriers();
+      if (!BarrierRCs)
+        continue;
+      BarrierIdx = LIs->getInstructionIndex(Barrier);
+      MadeChange |= SplitRegLiveIntervals(BarrierRCs, Split);
+    }
+  }
+
+  MadeChange |= removeDeadSpills(Split);
+
+  return MadeChange;
+}
diff --git a/final/lib/CodeGen/ProcessImplicitDefs.cpp b/final/lib/CodeGen/ProcessImplicitDefs.cpp
new file mode 100644
index 00000000000..9cd9941e56b
--- /dev/null
+++ b/final/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -0,0 +1,298 @@
+//===---------------------- ProcessImplicitDefs.cpp -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "processimplicitdefs"
+
+#include "llvm/CodeGen/ProcessImplicitDefs.h"
+
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+
+using namespace llvm;
+
+char ProcessImplicitDefs::ID = 0;
+INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
+                "Process Implicit Definitions", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
+                "Process Implicit Definitions", false, false)
+
+void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addPreserved<LiveVariables>();
+  AU.addRequired<LiveVariables>();
+  AU.addPreservedID(MachineLoopInfoID);
+  AU.addPreservedID(MachineDominatorsID);
+  AU.addPreservedID(TwoAddressInstructionPassID);
+  AU.addPreservedID(PHIEliminationID);
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool
+ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
+                                            unsigned Reg, unsigned OpIdx,
+                                            const TargetInstrInfo *tii_,
+                                            SmallSet<unsigned, 8> &ImpDefRegs) {
+  switch(OpIdx) {
+  case 1:
+    return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 ||
+                            ImpDefRegs.count(MI->getOperand(0).getReg()));
+  case 2:
+    return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 ||
+                                  ImpDefRegs.count(MI->getOperand(0).getReg()));
+  default: return false;
+  }
+}
+
+static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
+                        const TargetInstrInfo *tii_,
+                        SmallSet<unsigned, 8> &ImpDefRegs) {
+  if (MI->isCopy()) {
+    MachineOperand &MO0 = MI->getOperand(0);
+    MachineOperand &MO1 = MI->getOperand(1);
+    if (MO1.getReg() != Reg)
+      return false;
+    if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg()))
+      return true;
+    return false;
+  }
+  return false;
+}
+
+/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
+/// there is one implicit_def for each use. Add isUndef marker to
+/// implicit_def defs and their uses.
+bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
+
+  DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n"
+               << "********** Function: "
+               << ((Value*)fn.getFunction())->getName() << '\n');
+
+  bool Changed = false;
+
+  const TargetInstrInfo *tii_ = fn.getTarget().getInstrInfo();
+  const TargetRegisterInfo *tri_ = fn.getTarget().getRegisterInfo();
+  MachineRegisterInfo *mri_ = &fn.getRegInfo();
+
+  LiveVariables *lv_ = &getAnalysis<LiveVariables>();
+
+  SmallSet<unsigned, 8> ImpDefRegs;
+  SmallVector<MachineInstr*, 8> ImpDefMIs;
+  SmallVector<MachineInstr*, 4> RUses;
+  SmallPtrSet<MachineBasicBlock*,16> Visited;
+  SmallPtrSet<MachineInstr*, 8> ModInsts;
+
+  MachineBasicBlock *Entry = fn.begin();
+  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
+         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+       DFI != E; ++DFI) {
+    MachineBasicBlock *MBB = *DFI;
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+         I != E; ) {
+      MachineInstr *MI = &*I;
+      ++I;
+      if (MI->isImplicitDef()) {
+        if (MI->getOperand(0).getSubReg())
+          continue;
+        unsigned Reg = MI->getOperand(0).getReg();
+        ImpDefRegs.insert(Reg);
+        if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+          for (const unsigned *SS = tri_->getSubRegisters(Reg); *SS; ++SS)
+            ImpDefRegs.insert(*SS);
+        }
+        ImpDefMIs.push_back(MI);
+        continue;
+      }
+
+      // Eliminate %reg1032:sub<def> = COPY undef.
+      if (MI->isCopy() && MI->getOperand(0).getSubReg()) {
+        MachineOperand &MO = MI->getOperand(1);
+        if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
+          if (MO.isKill()) {
+            LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg());
+            vi.removeKill(MI);
+          }
+          MI->eraseFromParent();
+          Changed = true;
+          continue;
+        }
+      }
+
+      bool ChangedToImpDef = false;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand& MO = MI->getOperand(i);
+        if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef())
+          continue;
+        unsigned Reg = MO.getReg();
+        if (!Reg)
+          continue;
+        if (!ImpDefRegs.count(Reg))
+          continue;
+        // Use is a copy, just turn it into an implicit_def.
+        if (CanTurnIntoImplicitDef(MI, Reg, i, tii_, ImpDefRegs)) {
+          bool isKill = MO.isKill();
+          MI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
+          for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
+            MI->RemoveOperand(j);
+          if (isKill) {
+            ImpDefRegs.erase(Reg);
+            LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
+            vi.removeKill(MI);
+          }
+          ChangedToImpDef = true;
+          Changed = true;
+          break;
+        }
+
+        Changed = true;
+        MO.setIsUndef();
+        // This is a partial register redef of an implicit def.
+        // Make sure the whole register is defined by the instruction.
+        if (MO.isDef()) {
+          MI->addRegisterDefined(Reg);
+          continue;
+        }
+        if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
+          // Make sure other uses of 
+          for (unsigned j = i+1; j != e; ++j) {
+            MachineOperand &MOJ = MI->getOperand(j);
+            if (MOJ.isReg() && MOJ.isUse() && MOJ.getReg() == Reg)
+              MOJ.setIsUndef();
+          }
+          ImpDefRegs.erase(Reg);
+        }
+      }
+
+      if (ChangedToImpDef) {
+        // Backtrack to process this new implicit_def.
+        --I;
+      } else {
+        for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
+          MachineOperand& MO = MI->getOperand(i);
+          if (!MO.isReg() || !MO.isDef())
+            continue;
+          ImpDefRegs.erase(MO.getReg());
+        }
+      }
+    }
+
+    // Any outstanding liveout implicit_def's?
+    for (unsigned i = 0, e = ImpDefMIs.size(); i != e; ++i) {
+      MachineInstr *MI = ImpDefMIs[i];
+      unsigned Reg = MI->getOperand(0).getReg();
+      if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+          !ImpDefRegs.count(Reg)) {
+        // Delete all "local" implicit_def's. That include those which define
+        // physical registers since they cannot be liveout.
+        MI->eraseFromParent();
+        Changed = true;
+        continue;
+      }
+
+      // If there are multiple defs of the same register and at least one
+      // is not an implicit_def, do not insert implicit_def's before the
+      // uses.
+      bool Skip = false;
+      SmallVector<MachineInstr*, 4> DeadImpDefs;
+      for (MachineRegisterInfo::def_iterator DI = mri_->def_begin(Reg),
+             DE = mri_->def_end(); DI != DE; ++DI) {
+        MachineInstr *DeadImpDef = &*DI;
+        if (!DeadImpDef->isImplicitDef()) {
+          Skip = true;
+          break;
+        }
+        DeadImpDefs.push_back(DeadImpDef);
+      }
+      if (Skip)
+        continue;
+
+      // The only implicit_def which we want to keep are those that are live
+      // out of its block.
+      for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j)
+        DeadImpDefs[j]->eraseFromParent();
+      Changed = true;
+
+      // Process each use instruction once.
+      for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg),
+             UE = mri_->use_end(); UI != UE; ++UI) {
+        if (UI.getOperand().isUndef())
+          continue;
+        MachineInstr *RMI = &*UI;
+        if (ModInsts.insert(RMI))
+          RUses.push_back(RMI);
+      }
+
+      for (unsigned i = 0, e = RUses.size(); i != e; ++i) {
+        MachineInstr *RMI = RUses[i];
+
+        // Turn a copy use into an implicit_def.
+        if (isUndefCopy(RMI, Reg, tii_, ImpDefRegs)) {
+          RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
+
+          bool isKill = false;
+          SmallVector<unsigned, 4> Ops;
+          for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
+            MachineOperand &RRMO = RMI->getOperand(j);
+            if (RRMO.isReg() && RRMO.getReg() == Reg) {
+              Ops.push_back(j);
+              if (RRMO.isKill())
+                isKill = true;
+            }
+          }
+          // Leave the other operands along.
+          for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) {
+            unsigned OpIdx = Ops[j];
+            RMI->RemoveOperand(OpIdx-j);
+          }
+
+          // Update LiveVariables varinfo if the instruction is a kill.
+          if (isKill) {
+            LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
+            vi.removeKill(RMI);
+          }
+          continue;
+        }
+
+        // Replace Reg with a new vreg that's marked implicit.
+        const TargetRegisterClass* RC = mri_->getRegClass(Reg);
+        unsigned NewVReg = mri_->createVirtualRegister(RC);
+        bool isKill = true;
+        for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) {
+          MachineOperand &RRMO = RMI->getOperand(j);
+          if (RRMO.isReg() && RRMO.getReg() == Reg) {
+            RRMO.setReg(NewVReg);
+            RRMO.setIsUndef();
+            if (isKill) {
+              // Only the first operand of NewVReg is marked kill.
+              RRMO.setIsKill();
+              isKill = false;
+            }
+          }
+        }
+      }
+      RUses.clear();
+      ModInsts.clear();
+    }
+    ImpDefRegs.clear();
+    ImpDefMIs.clear();
+  }
+
+  return Changed;
+}
+
diff --git a/final/lib/CodeGen/PrologEpilogInserter.cpp b/final/lib/CodeGen/PrologEpilogInserter.cpp
new file mode 100644
index 00000000000..92e25e150f5
--- /dev/null
+++ b/final/lib/CodeGen/PrologEpilogInserter.cpp
@@ -0,0 +1,850 @@
+//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation.  After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This pass provides an optional shrink wrapping variant of prolog/epilog
+// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pei"
+#include "PrologEpilogInserter.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <climits>
+
+using namespace llvm;
+
+char PEI::ID = 0;
+
+INITIALIZE_PASS_BEGIN(PEI, "prologepilog",
+                "Prologue/Epilogue Insertion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PEI, "prologepilog",
+                "Prologue/Epilogue Insertion", false, false)
+
+STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
+STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
+
+/// createPrologEpilogCodeInserter - This function returns a pass that inserts
+/// prolog and epilog code, and eliminates abstract frame references.
+///
+FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
+
+/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+/// frame indexes with appropriate references.
+///
+bool PEI::runOnMachineFunction(MachineFunction &Fn) {
+  const Function* F = Fn.getFunction();
+  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+
+  RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
+  FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
+
+  // Calculate the MaxCallFrameSize and AdjustsStack variables for the
+  // function's frame information. Also eliminates call frame pseudo
+  // instructions.
+  calculateCallsInformation(Fn);
+
+  // Allow the target machine to make some adjustments to the function
+  // e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
+  TFI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+
+  // Scan the function for modified callee saved registers and insert spill code
+  // for any callee saved registers that are modified.
+  calculateCalleeSavedRegisters(Fn);
+
+  // Determine placement of CSR spill/restore code:
+  //  - With shrink wrapping, place spills and restores to tightly
+  //    enclose regions in the Machine CFG of the function where
+  //    they are used.
+  //  - Without shink wrapping (default), place all spills in the
+  //    entry block, all restores in return blocks.
+  placeCSRSpillsAndRestores(Fn);
+
+  // Add the code to save and restore the callee saved registers
+  if (!F->hasFnAttr(Attribute::Naked))
+    insertCSRSpillsAndRestores(Fn);
+
+  // Allow the target machine to make final modifications to the function
+  // before the frame layout is finalized.
+  TFI->processFunctionBeforeFrameFinalized(Fn);
+
+  // Calculate actual frame offsets for all abstract stack objects...
+  calculateFrameObjectOffsets(Fn);
+
+  // Add prolog and epilog code to the function.  This function is required
+  // to align the stack frame as necessary for any stack variables or
+  // called functions.  Because of this, calculateCalleeSavedRegisters()
+  // must be called before this function in order to set the AdjustsStack
+  // and MaxCallFrameSize variables.
+  if (!F->hasFnAttr(Attribute::Naked))
+    insertPrologEpilogCode(Fn);
+
+  // Replace all MO_FrameIndex operands with physical register references
+  // and actual offsets.
+  //
+  replaceFrameIndices(Fn);
+
+  // If register scavenging is needed, as we've enabled doing it as a
+  // post-pass, scavenge the virtual registers that frame index elimiation
+  // inserted.
+  if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
+    scavengeFrameVirtualRegs(Fn);
+
+  delete RS;
+  clearAllSets();
+  return true;
+}
+
+#if 0
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  if (ShrinkWrapping || ShrinkWrapFunc != "") {
+    AU.addRequired<MachineLoopInfo>();
+    AU.addRequired<MachineDominatorTree>();
+  }
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreserved<MachineDominatorTree>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+#endif
+
+/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack
+/// variables for the function's frame information and eliminate call frame
+/// pseudo instructions.
+void PEI::calculateCallsInformation(MachineFunction &Fn) {
+  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+  unsigned MaxCallFrameSize = 0;
+  bool AdjustsStack = MFI->adjustsStack();
+
+  // Get the function call frame set-up and tear-down instruction opcode
+  int FrameSetupOpcode   = RegInfo->getCallFrameSetupOpcode();
+  int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode();
+
+  // Early exit for targets which have no call frame setup/destroy pseudo
+  // instructions.
+  if (FrameSetupOpcode == -1 && FrameDestroyOpcode == -1)
+    return;
+
+  std::vector<MachineBasicBlock::iterator> FrameSDOps;
+  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB)
+    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I)
+      if (I->getOpcode() == FrameSetupOpcode ||
+          I->getOpcode() == FrameDestroyOpcode) {
+        assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo"
+               " instructions should have a single immediate argument!");
+        unsigned Size = I->getOperand(0).getImm();
+        if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
+        AdjustsStack = true;
+        FrameSDOps.push_back(I);
+      } else if (I->isInlineAsm()) {
+        // Some inline asm's need a stack frame, as indicated by operand 1.
+        unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+        if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+          AdjustsStack = true;
+      }
+
+  MFI->setAdjustsStack(AdjustsStack);
+  MFI->setMaxCallFrameSize(MaxCallFrameSize);
+
+  for (std::vector<MachineBasicBlock::iterator>::iterator
+         i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
+    MachineBasicBlock::iterator I = *i;
+
+    // If call frames are not being included as part of the stack frame, and
+    // the target doesn't indicate otherwise, remove the call frame pseudos
+    // here. The sub/add sp instruction pairs are still inserted, but we don't
+    // need to track the SP adjustment for frame index elimination.
+    if (TFI->canSimplifyCallFramePseudos(Fn))
+      RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
+  }
+}
+
+
+/// calculateCalleeSavedRegisters - Scan the function for modified callee saved
+/// registers.
+void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
+  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+  // Get the callee saved register list...
+  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(&Fn);
+
+  // These are used to keep track the callee-save area. Initialize them.
+  MinCSFrameIndex = INT_MAX;
+  MaxCSFrameIndex = 0;
+
+  // Early exit for targets which have no callee saved registers.
+  if (CSRegs == 0 || CSRegs[0] == 0)
+    return;
+
+  // In Naked functions we aren't going to save any registers.
+  if (Fn.getFunction()->hasFnAttr(Attribute::Naked))
+    return;
+
+  std::vector<CalleeSavedInfo> CSI;
+  for (unsigned i = 0; CSRegs[i]; ++i) {
+    unsigned Reg = CSRegs[i];
+    if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
+      // If the reg is modified, save it!
+      CSI.push_back(CalleeSavedInfo(Reg));
+    } else {
+      for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
+           *AliasSet; ++AliasSet) {  // Check alias registers too.
+        if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
+          CSI.push_back(CalleeSavedInfo(Reg));
+          break;
+        }
+      }
+    }
+  }
+
+  if (CSI.empty())
+    return;   // Early exit if no callee saved registers are modified!
+
+  unsigned NumFixedSpillSlots;
+  const TargetFrameLowering::SpillSlot *FixedSpillSlots =
+    TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
+
+  // Now that we know which registers need to be saved and restored, allocate
+  // stack slots for them.
+  for (std::vector<CalleeSavedInfo>::iterator
+         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+    unsigned Reg = I->getReg();
+    const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
+
+    int FrameIdx;
+    if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) {
+      I->setFrameIdx(FrameIdx);
+      continue;
+    }
+
+    // Check to see if this physreg must be spilled to a particular stack slot
+    // on this target.
+    const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
+    while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
+           FixedSlot->Reg != Reg)
+      ++FixedSlot;
+
+    if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) {
+      // Nope, just spill it anywhere convenient.
+      unsigned Align = RC->getAlignment();
+      unsigned StackAlign = TFI->getStackAlignment();
+
+      // We may not be able to satisfy the desired alignment specification of
+      // the TargetRegisterClass if the stack alignment is smaller. Use the
+      // min.
+      Align = std::min(Align, StackAlign);
+      FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
+      if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
+      if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
+    } else {
+      // Spill it to the stack where we must.
+      FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true);
+    }
+
+    I->setFrameIdx(FrameIdx);
+  }
+
+  MFI->setCalleeSavedInfo(CSI);
+}
+
+/// insertCSRSpillsAndRestores - Insert spill and restore code for
+/// callee saved registers used in the function, handling shrink wrapping.
+///
+void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
+  // Get callee saved register information.
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+
+  MFI->setCalleeSavedInfoValid(true);
+
+  // Early exit if no callee saved registers are modified!
+  if (CSI.empty())
+    return;
+
+  const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+  const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+  MachineBasicBlock::iterator I;
+
+  if (! ShrinkWrapThisFunction) {
+    // Spill using target interface.
+    I = EntryBlock->begin();
+    if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
+      for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+        // Add the callee-saved register as live-in.
+        // It's killed at the spill.
+        EntryBlock->addLiveIn(CSI[i].getReg());
+
+        // Insert the spill to the stack frame.
+        unsigned Reg = CSI[i].getReg();
+        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+        TII.storeRegToStackSlot(*EntryBlock, I, Reg, true,
+                                CSI[i].getFrameIdx(), RC, TRI);
+      }
+    }
+
+    // Restore using target interface.
+    for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) {
+      MachineBasicBlock* MBB = ReturnBlocks[ri];
+      I = MBB->end(); --I;
+
+      // Skip over all terminator instructions, which are part of the return
+      // sequence.
+      MachineBasicBlock::iterator I2 = I;
+      while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+        I = I2;
+
+      bool AtStart = I == MBB->begin();
+      MachineBasicBlock::iterator BeforeI = I;
+      if (!AtStart)
+        --BeforeI;
+
+      // Restore all registers immediately before the return and any
+      // terminators that preceed it.
+      if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
+        for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+          unsigned Reg = CSI[i].getReg();
+          const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+          TII.loadRegFromStackSlot(*MBB, I, Reg,
+                                   CSI[i].getFrameIdx(),
+                                   RC, TRI);
+          assert(I != MBB->begin() &&
+                 "loadRegFromStackSlot didn't insert any code!");
+          // Insert in reverse order.  loadRegFromStackSlot can insert
+          // multiple instructions.
+          if (AtStart)
+            I = MBB->begin();
+          else {
+            I = BeforeI;
+            ++I;
+          }
+        }
+      }
+    }
+    return;
+  }
+
+  // Insert spills.
+  std::vector<CalleeSavedInfo> blockCSI;
+  for (CSRegBlockMap::iterator BI = CSRSave.begin(),
+         BE = CSRSave.end(); BI != BE; ++BI) {
+    MachineBasicBlock* MBB = BI->first;
+    CSRegSet save = BI->second;
+
+    if (save.empty())
+      continue;
+
+    blockCSI.clear();
+    for (CSRegSet::iterator RI = save.begin(),
+           RE = save.end(); RI != RE; ++RI) {
+      blockCSI.push_back(CSI[*RI]);
+    }
+    assert(blockCSI.size() > 0 &&
+           "Could not collect callee saved register info");
+
+    I = MBB->begin();
+
+    // When shrink wrapping, use stack slot stores/loads.
+    for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+      // Add the callee-saved register as live-in.
+      // It's killed at the spill.
+      MBB->addLiveIn(blockCSI[i].getReg());
+
+      // Insert the spill to the stack frame.
+      unsigned Reg = blockCSI[i].getReg();
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.storeRegToStackSlot(*MBB, I, Reg,
+                              true,
+                              blockCSI[i].getFrameIdx(),
+                              RC, TRI);
+    }
+  }
+
+  for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
+         BE = CSRRestore.end(); BI != BE; ++BI) {
+    MachineBasicBlock* MBB = BI->first;
+    CSRegSet restore = BI->second;
+
+    if (restore.empty())
+      continue;
+
+    blockCSI.clear();
+    for (CSRegSet::iterator RI = restore.begin(),
+           RE = restore.end(); RI != RE; ++RI) {
+      blockCSI.push_back(CSI[*RI]);
+    }
+    assert(blockCSI.size() > 0 &&
+           "Could not find callee saved register info");
+
+    // If MBB is empty and needs restores, insert at the _beginning_.
+    if (MBB->empty()) {
+      I = MBB->begin();
+    } else {
+      I = MBB->end();
+      --I;
+
+      // Skip over all terminator instructions, which are part of the
+      // return sequence.
+      if (! I->getDesc().isTerminator()) {
+        ++I;
+      } else {
+        MachineBasicBlock::iterator I2 = I;
+        while (I2 != MBB->begin() && (--I2)->getDesc().isTerminator())
+          I = I2;
+      }
+    }
+
+    bool AtStart = I == MBB->begin();
+    MachineBasicBlock::iterator BeforeI = I;
+    if (!AtStart)
+      --BeforeI;
+
+    // Restore all registers immediately before the return and any
+    // terminators that preceed it.
+    for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
+      unsigned Reg = blockCSI[i].getReg();
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.loadRegFromStackSlot(*MBB, I, Reg,
+                               blockCSI[i].getFrameIdx(),
+                               RC, TRI);
+      assert(I != MBB->begin() &&
+             "loadRegFromStackSlot didn't insert any code!");
+      // Insert in reverse order.  loadRegFromStackSlot can insert
+      // multiple instructions.
+      if (AtStart)
+        I = MBB->begin();
+      else {
+        I = BeforeI;
+        ++I;
+      }
+    }
+  }
+}
+
+/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
+static inline void
+AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
+                  bool StackGrowsDown, int64_t &Offset,
+                  unsigned &MaxAlign) {
+  // If the stack grows down, add the object size to find the lowest address.
+  if (StackGrowsDown)
+    Offset += MFI->getObjectSize(FrameIdx);
+
+  unsigned Align = MFI->getObjectAlignment(FrameIdx);
+
+  // If the alignment of this object is greater than that of the stack, then
+  // increase the stack alignment to match.
+  MaxAlign = std::max(MaxAlign, Align);
+
+  // Adjust to alignment boundary.
+  Offset = (Offset + Align - 1) / Align * Align;
+
+  if (StackGrowsDown) {
+    DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
+    MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+  } else {
+    DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
+    MFI->setObjectOffset(FrameIdx, Offset);
+    Offset += MFI->getObjectSize(FrameIdx);
+  }
+}
+
+/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the
+/// abstract stack objects.
+///
+void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
+  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+
+  bool StackGrowsDown =
+    TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+
+  // Loop over all of the stack objects, assigning sequential addresses...
+  MachineFrameInfo *MFI = Fn.getFrameInfo();
+
+  // Start at the beginning of the local area.
+  // The Offset is the distance from the stack top in the direction
+  // of stack growth -- so it's always nonnegative.
+  int LocalAreaOffset = TFI.getOffsetOfLocalArea();
+  if (StackGrowsDown)
+    LocalAreaOffset = -LocalAreaOffset;
+  assert(LocalAreaOffset >= 0
+         && "Local area offset should be in direction of stack growth");
+  int64_t Offset = LocalAreaOffset;
+
+  // If there are fixed sized objects that are preallocated in the local area,
+  // non-fixed objects can't be allocated right at the start of local area.
+  // We currently don't support filling in holes in between fixed sized
+  // objects, so we adjust 'Offset' to point to the end of last fixed sized
+  // preallocated object.
+  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+    int64_t FixedOff;
+    if (StackGrowsDown) {
+      // The maximum distance from the stack pointer is at lower address of
+      // the object -- which is given by offset. For down growing stack
+      // the offset is negative, so we negate the offset to get the distance.
+      FixedOff = -MFI->getObjectOffset(i);
+    } else {
+      // The maximum distance from the start pointer is at the upper
+      // address of the object.
+      FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
+    }
+    if (FixedOff > Offset) Offset = FixedOff;
+  }
+
+  // First assign frame offsets to stack objects that are used to spill
+  // callee saved registers.
+  if (StackGrowsDown) {
+    for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+      // If the stack grows down, we need to add the size to find the lowest
+      // address of the object.
+      Offset += MFI->getObjectSize(i);
+
+      unsigned Align = MFI->getObjectAlignment(i);
+      // Adjust to alignment boundary
+      Offset = (Offset+Align-1)/Align*Align;
+
+      MFI->setObjectOffset(i, -Offset);        // Set the computed offset
+    }
+  } else {
+    int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
+    for (int i = MaxCSFI; i >= MinCSFI ; --i) {
+      unsigned Align = MFI->getObjectAlignment(i);
+      // Adjust to alignment boundary
+      Offset = (Offset+Align-1)/Align*Align;
+
+      MFI->setObjectOffset(i, Offset);
+      Offset += MFI->getObjectSize(i);
+    }
+  }
+
+  unsigned MaxAlign = MFI->getMaxAlignment();
+
+  // Make sure the special register scavenging spill slot is closest to the
+  // frame pointer if a frame pointer is required.
+  const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+  if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) &&
+      !RegInfo->needsStackRealignment(Fn)) {
+    int SFI = RS->getScavengingFrameIndex();
+    if (SFI >= 0)
+      AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
+  }
+
+  // FIXME: Once this is working, then enable flag will change to a target
+  // check for whether the frame is large enough to want to use virtual
+  // frame index registers. Functions which don't want/need this optimization
+  // will continue to use the existing code path.
+  if (MFI->getUseLocalStackAllocationBlock()) {
+    unsigned Align = MFI->getLocalFrameMaxAlign();
+
+    // Adjust to alignment boundary.
+    Offset = (Offset + Align - 1) / Align * Align;
+
+    DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
+
+    // Resolve offsets for objects in the local block.
+    for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
+      std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+      int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
+      DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
+            FIOffset << "]\n");
+      MFI->setObjectOffset(Entry.first, FIOffset);
+    }
+    // Allocate the local block
+    Offset += MFI->getLocalFrameSize();
+
+    MaxAlign = std::max(Align, MaxAlign);
+  }
+
+  // Make sure that the stack protector comes before the local variables on the
+  // stack.
+  SmallSet<int, 16> LargeStackObjs;
+  if (MFI->getStackProtectorIndex() >= 0) {
+    AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
+                      Offset, MaxAlign);
+
+    // Assign large stack objects first.
+    for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+      if (MFI->isObjectPreAllocated(i) &&
+          MFI->getUseLocalStackAllocationBlock())
+        continue;
+      if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+        continue;
+      if (RS && (int)i == RS->getScavengingFrameIndex())
+        continue;
+      if (MFI->isDeadObjectIndex(i))
+        continue;
+      if (MFI->getStackProtectorIndex() == (int)i)
+        continue;
+      if (!MFI->MayNeedStackProtector(i))
+        continue;
+
+      AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+      LargeStackObjs.insert(i);
+    }
+  }
+
+  // Then assign frame offsets to stack objects that are not used to spill
+  // callee saved registers.
+  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+    if (MFI->isObjectPreAllocated(i) &&
+        MFI->getUseLocalStackAllocationBlock())
+      continue;
+    if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
+      continue;
+    if (RS && (int)i == RS->getScavengingFrameIndex())
+      continue;
+    if (MFI->isDeadObjectIndex(i))
+      continue;
+    if (MFI->getStackProtectorIndex() == (int)i)
+      continue;
+    if (LargeStackObjs.count(i))
+      continue;
+
+    AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
+  }
+
+  // Make sure the special register scavenging spill slot is closest to the
+  // stack pointer.
+  if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) ||
+             !RegInfo->useFPForScavengingIndex(Fn))) {
+    int SFI = RS->getScavengingFrameIndex();
+    if (SFI >= 0)
+      AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
+  }
+
+  if (!TFI.targetHandlesStackFrameRounding()) {
+    // If we have reserved argument space for call sites in the function
+    // immediately on entry to the current function, count it as part of the
+    // overall stack size.
+    if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
+      Offset += MFI->getMaxCallFrameSize();
+
+    // Round up the size to a multiple of the alignment.  If the function has
+    // any calls or alloca's, align to the target's StackAlignment value to
+    // ensure that the callee's frame or the alloca data is suitably aligned;
+    // otherwise, for leaf functions, align to the TransientStackAlignment
+    // value.
+    unsigned StackAlign;
+    if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+        (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
+      StackAlign = TFI.getStackAlignment();
+    else
+      StackAlign = TFI.getTransientStackAlignment();
+
+    // If the frame pointer is eliminated, all frame offsets will be relative to
+    // SP not FP. Align to MaxAlign so this works.
+    StackAlign = std::max(StackAlign, MaxAlign);
+    unsigned AlignMask = StackAlign - 1;
+    Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+  }
+
+  // Update frame info to pretend that this is part of the stack...
+  MFI->setStackSize(Offset - LocalAreaOffset);
+}
+
+/// insertPrologEpilogCode - Scan the function for modified callee saved
+/// registers, insert spill code for these callee saved registers, then add
+/// prolog and epilog code to the function.
+///
+void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
+  const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
+
+  // Add prologue to the function...
+  TFI.emitPrologue(Fn);
+
+  // Add epilogue to restore the callee-save registers in each exiting block
+  for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
+    // If last instruction is a return instruction, add an epilogue
+    if (!I->empty() && I->back().getDesc().isReturn())
+      TFI.emitEpilogue(Fn, *I);
+  }
+}
+
+/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
+/// register references and actual offsets.
+///
+void PEI::replaceFrameIndices(MachineFunction &Fn) {
+  if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do?
+
+  const TargetMachine &TM = Fn.getTarget();
+  assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
+  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+  const TargetFrameLowering *TFI = TM.getFrameLowering();
+  bool StackGrowsDown =
+    TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+  int FrameSetupOpcode   = TRI.getCallFrameSetupOpcode();
+  int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
+
+  for (MachineFunction::iterator BB = Fn.begin(),
+         E = Fn.end(); BB != E; ++BB) {
+#ifndef NDEBUG
+    int SPAdjCount = 0; // frame setup / destroy count.
+#endif
+    int SPAdj = 0;  // SP offset due to call frame setup / destroy.
+    if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
+
+    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+
+      if (I->getOpcode() == FrameSetupOpcode ||
+          I->getOpcode() == FrameDestroyOpcode) {
+#ifndef NDEBUG
+        // Track whether we see even pairs of them
+        SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1;
+#endif
+        // Remember how much SP has been adjusted to create the call
+        // frame.
+        int Size = I->getOperand(0).getImm();
+
+        if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) ||
+            (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode))
+          Size = -Size;
+
+        SPAdj += Size;
+
+        MachineBasicBlock::iterator PrevI = BB->end();
+        if (I != BB->begin()) PrevI = prior(I);
+        TRI.eliminateCallFramePseudoInstr(Fn, *BB, I);
+
+        // Visit the instructions created by eliminateCallFramePseudoInstr().
+        if (PrevI == BB->end())
+          I = BB->begin();     // The replaced instr was the first in the block.
+        else
+          I = llvm::next(PrevI);
+        continue;
+      }
+
+      MachineInstr *MI = I;
+      bool DoIncr = true;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
+        if (MI->getOperand(i).isFI()) {
+          // Some instructions (e.g. inline asm instructions) can have
+          // multiple frame indices and/or cause eliminateFrameIndex
+          // to insert more than one instruction. We need the register
+          // scavenger to go through all of these instructions so that
+          // it can update its register information. We keep the
+          // iterator at the point before insertion so that we can
+          // revisit them in full.
+          bool AtBeginning = (I == BB->begin());
+          if (!AtBeginning) --I;
+
+          // If this instruction has a FrameIndex operand, we need to
+          // use that target machine register info object to eliminate
+          // it.
+          TRI.eliminateFrameIndex(MI, SPAdj,
+                                  FrameIndexVirtualScavenging ?  NULL : RS);
+
+          // Reset the iterator if we were at the beginning of the BB.
+          if (AtBeginning) {
+            I = BB->begin();
+            DoIncr = false;
+          }
+
+          MI = 0;
+          break;
+        }
+
+      if (DoIncr && I != BB->end()) ++I;
+
+      // Update register states.
+      if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
+    }
+
+    // If we have evenly matched pairs of frame setup / destroy instructions,
+    // make sure the adjustments come out to zero. If we don't have matched
+    // pairs, we can't be sure the missing bit isn't in another basic block
+    // due to a custom inserter playing tricks, so just asserting SPAdj==0
+    // isn't sufficient. See tMOVCC on Thumb1, for example.
+    assert((SPAdjCount || SPAdj == 0) &&
+           "Unbalanced call frame setup / destroy pairs?");
+  }
+}
+
+/// scavengeFrameVirtualRegs - Replace all frame index virtual registers
+/// with physical registers. Use the register scavenger to find an
+/// appropriate register to use.
+void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
+  // Run through the instructions and find any virtual registers.
+  for (MachineFunction::iterator BB = Fn.begin(),
+       E = Fn.end(); BB != E; ++BB) {
+    RS->enterBasicBlock(BB);
+
+    unsigned VirtReg = 0;
+    unsigned ScratchReg = 0;
+    int SPAdj = 0;
+
+    // The instruction stream may change in the loop, so check BB->end()
+    // directly.
+    for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
+      MachineInstr *MI = I;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        if (MI->getOperand(i).isReg()) {
+          MachineOperand &MO = MI->getOperand(i);
+          unsigned Reg = MO.getReg();
+          if (Reg == 0)
+            continue;
+          if (!TargetRegisterInfo::isVirtualRegister(Reg))
+            continue;
+
+          ++NumVirtualFrameRegs;
+
+          // Have we already allocated a scratch register for this virtual?
+          if (Reg != VirtReg) {
+            // When we first encounter a new virtual register, it
+            // must be a definition.
+            assert(MI->getOperand(i).isDef() &&
+                   "frame index virtual missing def!");
+            // Scavenge a new scratch register
+            VirtReg = Reg;
+            const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
+            ScratchReg = RS->scavengeRegister(RC, I, SPAdj);
+            ++NumScavengedRegs;
+          }
+          // Replace this reference to the virtual register with the
+          // scratch register.
+          assert (ScratchReg && "Missing scratch register!");
+          MI->getOperand(i).setReg(ScratchReg);
+
+        }
+      }
+      RS->forward(I);
+      ++I;
+    }
+  }
+}
diff --git a/final/lib/CodeGen/PrologEpilogInserter.h b/final/lib/CodeGen/PrologEpilogInserter.h
new file mode 100644
index 00000000000..e2391591ad0
--- /dev/null
+++ b/final/lib/CodeGen/PrologEpilogInserter.h
@@ -0,0 +1,177 @@
+//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is responsible for finalizing the functions frame layout, saving
+// callee saved registers, and for emitting prolog & epilog code for the
+// function.
+//
+// This pass must be run after register allocation.  After this pass is
+// executed, it is illegal to construct MO_FrameIndex operands.
+//
+// This pass also implements a shrink wrapping variant of prolog/epilog
+// insertion.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PEI_H
+#define LLVM_CODEGEN_PEI_H
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+  class RegScavenger;
+  class MachineBasicBlock;
+
+  class PEI : public MachineFunctionPass {
+  public:
+    static char ID;
+    PEI() : MachineFunctionPass(ID) {
+      initializePEIPass(*PassRegistry::getPassRegistry());
+    }
+
+    const char *getPassName() const {
+      return "Prolog/Epilog Insertion & Frame Finalization";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+    /// runOnMachineFunction - Insert prolog/epilog code and replace abstract
+    /// frame indexes with appropriate references.
+    ///
+    bool runOnMachineFunction(MachineFunction &Fn);
+
+  private:
+    RegScavenger *RS;
+
+    // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
+    // stack frame indexes.
+    unsigned MinCSFrameIndex, MaxCSFrameIndex;
+
+    // Analysis info for spill/restore placement.
+    // "CSR": "callee saved register".
+
+    // CSRegSet contains indices into the Callee Saved Register Info
+    // vector built by calculateCalleeSavedRegisters() and accessed
+    // via MF.getFrameInfo()->getCalleeSavedInfo().
+    typedef SparseBitVector<> CSRegSet;
+
+    // CSRegBlockMap maps MachineBasicBlocks to sets of callee
+    // saved register indices.
+    typedef DenseMap<MachineBasicBlock*, CSRegSet> CSRegBlockMap;
+
+    // Set and maps for computing CSR spill/restore placement:
+    //  used in function (UsedCSRegs)
+    //  used in a basic block (CSRUsed)
+    //  anticipatable in a basic block (Antic{In,Out})
+    //  available in a basic block (Avail{In,Out})
+    //  to be spilled at the entry to a basic block (CSRSave)
+    //  to be restored at the end of a basic block (CSRRestore)
+    CSRegSet UsedCSRegs;
+    CSRegBlockMap CSRUsed;
+    CSRegBlockMap AnticIn, AnticOut;
+    CSRegBlockMap AvailIn, AvailOut;
+    CSRegBlockMap CSRSave;
+    CSRegBlockMap CSRRestore;
+
+    // Entry and return blocks of the current function.
+    MachineBasicBlock* EntryBlock;
+    SmallVector<MachineBasicBlock*, 4> ReturnBlocks;
+
+    // Map of MBBs to top level MachineLoops.
+    DenseMap<MachineBasicBlock*, MachineLoop*> TLLoops;
+
+    // Flag to control shrink wrapping per-function:
+    // may choose to skip shrink wrapping for certain
+    // functions.
+    bool ShrinkWrapThisFunction;
+
+    // Flag to control whether to use the register scavenger to resolve
+    // frame index materialization registers. Set according to
+    // TRI->requiresFrameIndexScavenging() for the curren function.
+    bool FrameIndexVirtualScavenging;
+
+#ifndef NDEBUG
+    // Machine function handle.
+    MachineFunction* MF;
+
+    // Flag indicating that the current function
+    // has at least one "short" path in the machine
+    // CFG from the entry block to an exit block.
+    bool HasFastExitPath;
+#endif
+
+    bool calculateSets(MachineFunction &Fn);
+    bool calcAnticInOut(MachineBasicBlock* MBB);
+    bool calcAvailInOut(MachineBasicBlock* MBB);
+    void calculateAnticAvail(MachineFunction &Fn);
+    bool addUsesForMEMERegion(MachineBasicBlock* MBB,
+                              SmallVector<MachineBasicBlock*, 4>& blks);
+    bool addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks);
+    bool calcSpillPlacements(MachineBasicBlock* MBB,
+                             SmallVector<MachineBasicBlock*, 4> &blks,
+                             CSRegBlockMap &prevSpills);
+    bool calcRestorePlacements(MachineBasicBlock* MBB,
+                               SmallVector<MachineBasicBlock*, 4> &blks,
+                               CSRegBlockMap &prevRestores);
+    void placeSpillsAndRestores(MachineFunction &Fn);
+    void placeCSRSpillsAndRestores(MachineFunction &Fn);
+    void calculateCallsInformation(MachineFunction &Fn);
+    void calculateCalleeSavedRegisters(MachineFunction &Fn);
+    void insertCSRSpillsAndRestores(MachineFunction &Fn);
+    void calculateFrameObjectOffsets(MachineFunction &Fn);
+    void replaceFrameIndices(MachineFunction &Fn);
+    void scavengeFrameVirtualRegs(MachineFunction &Fn);
+    void insertPrologEpilogCode(MachineFunction &Fn);
+
+    // Initialize DFA sets, called before iterations.
+    void clearAnticAvailSets();
+    // Clear all sets constructed by shrink wrapping.
+    void clearAllSets();
+
+    // Initialize all shrink wrapping data.
+    void initShrinkWrappingInfo();
+
+    // Convienences for dealing with machine loops.
+    MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP);
+    MachineLoop* getTopLevelLoopParent(MachineLoop *LP);
+
+    // Propgate CSRs used in MBB to all MBBs of loop LP.
+    void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP);
+
+    // Convenience for recognizing return blocks.
+    bool isReturnBlock(MachineBasicBlock* MBB);
+
+#ifndef NDEBUG
+    // Debugging methods.
+
+    // Mark this function as having fast exit paths.
+    void findFastExitPath();
+
+    // Verify placement of spills/restores.
+    void verifySpillRestorePlacement();
+
+    std::string getBasicBlockName(const MachineBasicBlock* MBB);
+    std::string stringifyCSRegSet(const CSRegSet& s);
+    void dumpSet(const CSRegSet& s);
+    void dumpUsed(MachineBasicBlock* MBB);
+    void dumpAllUsed();
+    void dumpSets(MachineBasicBlock* MBB);
+    void dumpSets1(MachineBasicBlock* MBB);
+    void dumpAllSets();
+    void dumpSRSets();
+#endif
+
+  };
+} // End llvm namespace
+#endif
diff --git a/final/lib/CodeGen/PseudoSourceValue.cpp b/final/lib/CodeGen/PseudoSourceValue.cpp
new file mode 100644
index 00000000000..73b66d868f3
--- /dev/null
+++ b/final/lib/CodeGen/PseudoSourceValue.cpp
@@ -0,0 +1,134 @@
+//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PseudoSourceValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include <map>
+using namespace llvm;
+
+namespace {
+struct PSVGlobalsTy {
+  // PseudoSourceValues are immutable so don't need locking.
+  const PseudoSourceValue PSVs[4];
+  sys::Mutex Lock;  // Guards FSValues, but not the values inside it.
+  std::map<int, const PseudoSourceValue *> FSValues;
+
+  PSVGlobalsTy() : PSVs() {}
+  ~PSVGlobalsTy() {
+    for (std::map<int, const PseudoSourceValue *>::iterator
+           I = FSValues.begin(), E = FSValues.end(); I != E; ++I) {
+      delete I->second;
+    }
+  }
+};
+
+static ManagedStatic<PSVGlobalsTy> PSVGlobals;
+
+}  // anonymous namespace
+
+const PseudoSourceValue *PseudoSourceValue::getStack()
+{ return &PSVGlobals->PSVs[0]; }
+const PseudoSourceValue *PseudoSourceValue::getGOT()
+{ return &PSVGlobals->PSVs[1]; }
+const PseudoSourceValue *PseudoSourceValue::getJumpTable()
+{ return &PSVGlobals->PSVs[2]; }
+const PseudoSourceValue *PseudoSourceValue::getConstantPool()
+{ return &PSVGlobals->PSVs[3]; }
+
+static const char *const PSVNames[] = {
+  "Stack",
+  "GOT",
+  "JumpTable",
+  "ConstantPool"
+};
+
+// FIXME: THIS IS A HACK!!!!
+// Eventually these should be uniqued on LLVMContext rather than in a managed
+// static.  For now, we can safely use the global context for the time being to
+// squeak by.
+PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) :
+  Value(Type::getInt8PtrTy(getGlobalContext()),
+        Subclass) {}
+
+void PseudoSourceValue::printCustom(raw_ostream &O) const {
+  O << PSVNames[this - PSVGlobals->PSVs];
+}
+
+const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
+  PSVGlobalsTy &PG = *PSVGlobals;
+  sys::ScopedLock locked(PG.Lock);
+  const PseudoSourceValue *&V = PG.FSValues[FI];
+  if (!V)
+    V = new FixedStackPseudoSourceValue(FI);
+  return V;
+}
+
+bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const {
+  if (this == getStack())
+    return false;
+  if (this == getGOT() ||
+      this == getConstantPool() ||
+      this == getJumpTable())
+    return true;
+  llvm_unreachable("Unknown PseudoSourceValue!");
+  return false;
+}
+
+bool PseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
+  if (this == getStack() ||
+      this == getGOT() ||
+      this == getConstantPool() ||
+      this == getJumpTable())
+    return false;
+  llvm_unreachable("Unknown PseudoSourceValue!");
+  return true;
+}
+
+bool PseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
+  if (this == getGOT() ||
+      this == getConstantPool() ||
+      this == getJumpTable())
+    return false;
+  return true;
+}
+
+bool FixedStackPseudoSourceValue::isConstant(const MachineFrameInfo *MFI) const{
+  return MFI && MFI->isImmutableObjectIndex(FI);
+}
+
+bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const {
+  // Negative frame indices are used for special things that don't
+  // appear in LLVM IR. Non-negative indices may be used for things
+  // like static allocas.
+  if (!MFI)
+    return FI >= 0;
+  // Spill slots should not alias others.
+  return !MFI->isFixedObjectIndex(FI) && !MFI->isSpillSlotObjectIndex(FI);
+}
+
+bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
+  if (!MFI)
+    return true;
+  // Spill slots will not alias any LLVM IR value.
+  return !MFI->isSpillSlotObjectIndex(FI);
+}
+
+void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
+  OS << "FixedStack" << FI;
+}
diff --git a/final/lib/CodeGen/README.txt b/final/lib/CodeGen/README.txt
new file mode 100644
index 00000000000..b655dda4115
--- /dev/null
+++ b/final/lib/CodeGen/README.txt
@@ -0,0 +1,199 @@
+//===---------------------------------------------------------------------===//
+
+Common register allocation / spilling problem:
+
+        mul lr, r4, lr
+        str lr, [sp, #+52]
+        ldr lr, [r1, #+32]
+        sxth r3, r3
+        ldr r4, [sp, #+52]
+        mla r4, r3, lr, r4
+
+can be:
+
+        mul lr, r4, lr
+        mov r4, lr
+        str lr, [sp, #+52]
+        ldr lr, [r1, #+32]
+        sxth r3, r3
+        mla r4, r3, lr, r4
+
+and then "merge" mul and mov:
+
+        mul r4, r4, lr
+        str lr, [sp, #+52]
+        ldr lr, [r1, #+32]
+        sxth r3, r3
+        mla r4, r3, lr, r4
+
+It also increase the likelyhood the store may become dead.
+
+//===---------------------------------------------------------------------===//
+
+bb27 ...
+        ...
+        %reg1037 = ADDri %reg1039, 1
+        %reg1038 = ADDrs %reg1032, %reg1039, %NOREG, 10
+    Successors according to CFG: 0x8b03bf0 (#5)
+
+bb76 (0x8b03bf0, LLVM BB @0x8b032d0, ID#5):
+    Predecessors according to CFG: 0x8b0c5f0 (#3) 0x8b0a7c0 (#4)
+        %reg1039 = PHI %reg1070, mbb<bb76.outer,0x8b0c5f0>, %reg1037, mbb<bb27,0x8b0a7c0>
+
+Note ADDri is not a two-address instruction. However, its result %reg1037 is an
+operand of the PHI node in bb76 and its operand %reg1039 is the result of the
+PHI node. We should treat it as a two-address code and make sure the ADDri is
+scheduled after any node that reads %reg1039.
+
+//===---------------------------------------------------------------------===//
+
+Use local info (i.e. register scavenger) to assign it a free register to allow
+reuse:
+        ldr r3, [sp, #+4]
+        add r3, r3, #3
+        ldr r2, [sp, #+8]
+        add r2, r2, #2
+        ldr r1, [sp, #+4]  <==
+        add r1, r1, #1
+        ldr r0, [sp, #+4]
+        add r0, r0, #2
+
+//===---------------------------------------------------------------------===//
+
+LLVM aggressively lift CSE out of loop. Sometimes this can be negative side-
+effects:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+load [i + R1]
+...
+load [i + R2]
+...
+load [i + R3]
+
+Suppose there is high register pressure, R1, R2, R3, can be spilled. We need
+to implement proper re-materialization to handle this:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+R1 = X + 4  @ re-materialized
+load [i + R1]
+...
+R2 = X + 7 @ re-materialized
+load [i + R2]
+...
+R3 = X + 15 @ re-materialized
+load [i + R3]
+
+Furthermore, with re-association, we can enable sharing:
+
+R1 = X + 4
+R2 = X + 7
+R3 = X + 15
+
+loop:
+T = i + X
+load [T + 4]
+...
+load [T + 7]
+...
+load [T + 15]
+//===---------------------------------------------------------------------===//
+
+It's not always a good idea to choose rematerialization over spilling. If all
+the load / store instructions would be folded then spilling is cheaper because
+it won't require new live intervals / registers. See 2003-05-31-LongShifts for
+an example.
+
+//===---------------------------------------------------------------------===//
+
+With a copying garbage collector, derived pointers must not be retained across
+collector safe points; the collector could move the objects and invalidate the
+derived pointer. This is bad enough in the first place, but safe points can
+crop up unpredictably. Consider:
+
+        %array = load { i32, [0 x %obj] }** %array_addr
+        %nth_el = getelementptr { i32, [0 x %obj] }* %array, i32 0, i32 %n
+        %old = load %obj** %nth_el
+        %z = div i64 %x, %y
+        store %obj* %new, %obj** %nth_el
+
+If the i64 division is lowered to a libcall, then a safe point will (must)
+appear for the call site. If a collection occurs, %array and %nth_el no longer
+point into the correct object.
+
+The fix for this is to copy address calculations so that dependent pointers
+are never live across safe point boundaries. But the loads cannot be copied
+like this if there was an intervening store, so may be hard to get right.
+
+Only a concurrent mutator can trigger a collection at the libcall safe point.
+So single-threaded programs do not have this requirement, even with a copying
+collector. Still, LLVM optimizations would probably undo a front-end's careful
+work.
+
+//===---------------------------------------------------------------------===//
+
+The ocaml frametable structure supports liveness information. It would be good
+to support it.
+
+//===---------------------------------------------------------------------===//
+
+The FIXME in ComputeCommonTailLength in BranchFolding.cpp needs to be
+revisited. The check is there to work around a misuse of directives in inline
+assembly.
+
+//===---------------------------------------------------------------------===//
+
+It would be good to detect collector/target compatibility instead of silently
+doing the wrong thing.
+
+//===---------------------------------------------------------------------===//
+
+It would be really nice to be able to write patterns in .td files for copies,
+which would eliminate a bunch of explicit predicates on them (e.g. no side 
+effects).  Once this is in place, it would be even better to have tblgen 
+synthesize the various copy insertion/inspection methods in TargetInstrInfo.
+
+//===---------------------------------------------------------------------===//
+
+Stack coloring improvments:
+
+1. Do proper LiveStackAnalysis on all stack objects including those which are
+   not spill slots.
+2. Reorder objects to fill in gaps between objects.
+   e.g. 4, 1, <gap>, 4, 1, 1, 1, <gap>, 4 => 4, 1, 1, 1, 1, 4, 4
+
+//===---------------------------------------------------------------------===//
+
+The scheduler should be able to sort nearby instructions by their address. For
+example, in an expanded memset sequence it's not uncommon to see code like this:
+
+  movl $0, 4(%rdi)
+  movl $0, 8(%rdi)
+  movl $0, 12(%rdi)
+  movl $0, 0(%rdi)
+
+Each of the stores is independent, and the scheduler is currently making an
+arbitrary decision about the order.
+
+//===---------------------------------------------------------------------===//
+
+Another opportunitiy in this code is that the $0 could be moved to a register:
+
+  movl $0, 4(%rdi)
+  movl $0, 8(%rdi)
+  movl $0, 12(%rdi)
+  movl $0, 0(%rdi)
+
+This would save substantial code size, especially for longer sequences like
+this. It would be easy to have a rule telling isel to avoid matching MOV32mi
+if the immediate has more than some fixed number of uses. It's more involved
+to teach the register allocator how to do late folding to recover from
+excessive register pressure.
+
diff --git a/final/lib/CodeGen/RegAllocBase.h b/final/lib/CodeGen/RegAllocBase.h
new file mode 100644
index 00000000000..5af0ce79acf
--- /dev/null
+++ b/final/lib/CodeGen/RegAllocBase.h
@@ -0,0 +1,182 @@
+//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class, which is the skeleton of a basic
+// register allocation algorithm and interface for extending it. It provides the
+// building blocks on which to construct other experimental allocators and test
+// the validity of two principles:
+//
+// - If virtual and physical register liveness is modeled using intervals, then
+// on-the-fly interference checking is cheap. Furthermore, interferences can be
+// lazily cached and reused.
+//
+// - Register allocation complexity, and generated code performance is
+// determined by the effectiveness of live range splitting rather than optimal
+// coloring.
+//
+// Following the first principle, interfering checking revolves around the
+// LiveIntervalUnion data structure.
+//
+// To fulfill the second principle, the basic allocator provides a driver for
+// incremental splitting. It essentially punts on the problem of register
+// coloring, instead driving the assignment of virtual to physical registers by
+// the cost of splitting. The basic allocator allows for heuristic reassignment
+// of registers, if a more sophisticated allocator chooses to do that.
+//
+// This framework provides a way to engineer the compile time vs. code
+// quality trade-off without relying on a particular theoretical solver.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCBASE
+#define LLVM_CODEGEN_REGALLOCBASE
+
+#include "llvm/ADT/OwningPtr.h"
+#include "LiveIntervalUnion.h"
+
+namespace llvm {
+
+template<typename T> class SmallVectorImpl;
+class TargetRegisterInfo;
+class VirtRegMap;
+class LiveIntervals;
+class Spiller;
+
+// Forward declare a priority queue of live virtual registers. If an
+// implementation needs to prioritize by anything other than spill weight, then
+// this will become an abstract base class with virtual calls to push/get.
+class LiveVirtRegQueue;
+
+/// RegAllocBase provides the register allocation driver and interface that can
+/// be extended to add interesting heuristics.
+///
+/// Register allocators must override the selectOrSplit() method to implement
+/// live range splitting. They must also override enqueue/dequeue to provide an
+/// assignment order.
+class RegAllocBase {
+  LiveIntervalUnion::Allocator UnionAllocator;
+protected:
+  // Array of LiveIntervalUnions indexed by physical register.
+  class LiveUnionArray {
+    unsigned NumRegs;
+    LiveIntervalUnion *Array;
+  public:
+    LiveUnionArray(): NumRegs(0), Array(0) {}
+    ~LiveUnionArray() { clear(); }
+
+    unsigned numRegs() const { return NumRegs; }
+
+    void init(LiveIntervalUnion::Allocator &, unsigned NRegs);
+
+    void clear();
+
+    LiveIntervalUnion& operator[](unsigned PhysReg) {
+      assert(PhysReg <  NumRegs && "physReg out of bounds");
+      return Array[PhysReg];
+    }
+  };
+
+  const TargetRegisterInfo *TRI;
+  MachineRegisterInfo *MRI;
+  VirtRegMap *VRM;
+  LiveIntervals *LIS;
+  LiveUnionArray PhysReg2LiveUnion;
+
+  // Current queries, one per physreg. They must be reinitialized each time we
+  // query on a new live virtual register.
+  OwningArrayPtr<LiveIntervalUnion::Query> Queries;
+
+  RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0) {}
+
+  virtual ~RegAllocBase() {}
+
+  // A RegAlloc pass should call this before allocatePhysRegs.
+  void init(VirtRegMap &vrm, LiveIntervals &lis);
+
+  // Get an initialized query to check interferences between lvr and preg.  Note
+  // that Query::init must be called at least once for each physical register
+  // before querying a new live virtual register. This ties Queries and
+  // PhysReg2LiveUnion together.
+  LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) {
+    Queries[PhysReg].init(&VirtReg, &PhysReg2LiveUnion[PhysReg]);
+    return Queries[PhysReg];
+  }
+
+  // The top-level driver. The output is a VirtRegMap that us updated with
+  // physical register assignments.
+  //
+  // If an implementation wants to override the LiveInterval comparator, we
+  // should modify this interface to allow passing in an instance derived from
+  // LiveVirtRegQueue.
+  void allocatePhysRegs();
+
+  // Get a temporary reference to a Spiller instance.
+  virtual Spiller &spiller() = 0;
+
+  /// enqueue - Add VirtReg to the priority queue of unassigned registers.
+  virtual void enqueue(LiveInterval *LI) = 0;
+
+  /// dequeue - Return the next unassigned register, or NULL.
+  virtual LiveInterval *dequeue() = 0;
+
+  // A RegAlloc pass should override this to provide the allocation heuristics.
+  // Each call must guarantee forward progess by returning an available PhysReg
+  // or new set of split live virtual registers. It is up to the splitter to
+  // converge quickly toward fully spilled live ranges.
+  virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+                                 SmallVectorImpl<LiveInterval*> &splitLVRs) = 0;
+
+  // A RegAlloc pass should call this when PassManager releases its memory.
+  virtual void releaseMemory();
+
+  // Helper for checking interference between a live virtual register and a
+  // physical register, including all its register aliases. If an interference
+  // exists, return the interfering register, which may be preg or an alias.
+  unsigned checkPhysRegInterference(LiveInterval& VirtReg, unsigned PhysReg);
+
+  /// assign - Assign VirtReg to PhysReg.
+  /// This should not be called from selectOrSplit for the current register.
+  void assign(LiveInterval &VirtReg, unsigned PhysReg);
+
+  /// unassign - Undo a previous assignment of VirtReg to PhysReg.
+  /// This can be invoked from selectOrSplit, but be careful to guarantee that
+  /// allocation is making progress.
+  void unassign(LiveInterval &VirtReg, unsigned PhysReg);
+
+  // Helper for spilling all live virtual registers currently unified under preg
+  // that interfere with the most recently queried lvr.  Return true if spilling
+  // was successful, and append any new spilled/split intervals to splitLVRs.
+  bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+                          SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+  /// addMBBLiveIns - Add physreg liveins to basic blocks.
+  void addMBBLiveIns(MachineFunction *);
+
+#ifndef NDEBUG
+  // Verify each LiveIntervalUnion.
+  void verify();
+#endif
+
+  // Use this group name for NamedRegionTimer.
+  static const char *TimerGroupName;
+
+public:
+  /// VerifyEnabled - True when -verify-regalloc is given.
+  static bool VerifyEnabled;
+
+private:
+  void seedLiveRegs();
+
+  void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
+                SmallVectorImpl<LiveInterval*> &SplitVRegs);
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_REGALLOCBASE)
diff --git a/final/lib/CodeGen/RegAllocBasic.cpp b/final/lib/CodeGen/RegAllocBasic.cpp
new file mode 100644
index 00000000000..6923908a32d
--- /dev/null
+++ b/final/lib/CodeGen/RegAllocBasic.cpp
@@ -0,0 +1,537 @@
+//===-- RegAllocBasic.cpp - basic register allocator ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RABasic function pass, which provides a minimal
+// implementation of the basic register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveIntervalUnion.h"
+#include "RegAllocBase.h"
+#include "RenderMachineFunction.h"
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SparseBitVector.h"
+#endif
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+#include <cstdlib>
+#include <queue>
+
+using namespace llvm;
+
+STATISTIC(NumAssigned     , "Number of registers assigned");
+STATISTIC(NumUnassigned   , "Number of registers unassigned");
+STATISTIC(NumNewQueued    , "Number of new live ranges queued");
+
+static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
+                                      createBasicRegisterAllocator);
+
+// Temporary verification option until we can put verification inside
+// MachineVerifier.
+static cl::opt<bool, true>
+VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+               cl::desc("Verify during register allocation"));
+
+const char *RegAllocBase::TimerGroupName = "Register Allocation";
+bool RegAllocBase::VerifyEnabled = false;
+
+namespace {
+  struct CompSpillWeight {
+    bool operator()(LiveInterval *A, LiveInterval *B) const {
+      return A->weight < B->weight;
+    }
+  };
+}
+
+namespace {
+/// RABasic provides a minimal implementation of the basic register allocation
+/// algorithm. It prioritizes live virtual registers by spill weight and spills
+/// whenever a register is unavailable. This is not practical in production but
+/// provides a useful baseline both for measuring other allocators and comparing
+/// the speed of the basic algorithm against other styles of allocators.
+class RABasic : public MachineFunctionPass, public RegAllocBase
+{
+  // context
+  MachineFunction *MF;
+  BitVector ReservedRegs;
+
+  // analyses
+  LiveStacks *LS;
+  RenderMachineFunction *RMF;
+
+  // state
+  std::auto_ptr<Spiller> SpillerInstance;
+  std::priority_queue<LiveInterval*, std::vector<LiveInterval*>,
+                      CompSpillWeight> Queue;
+public:
+  RABasic();
+
+  /// Return the pass name.
+  virtual const char* getPassName() const {
+    return "Basic Register Allocator";
+  }
+
+  /// RABasic analysis usage.
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  virtual void releaseMemory();
+
+  virtual Spiller &spiller() { return *SpillerInstance; }
+
+  virtual float getPriority(LiveInterval *LI) { return LI->weight; }
+
+  virtual void enqueue(LiveInterval *LI) {
+    Queue.push(LI);
+  }
+
+  virtual LiveInterval *dequeue() {
+    if (Queue.empty())
+      return 0;
+    LiveInterval *LI = Queue.top();
+    Queue.pop();
+    return LI;
+  }
+
+  virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+                                 SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+  /// Perform register allocation.
+  virtual bool runOnMachineFunction(MachineFunction &mf);
+
+  static char ID;
+};
+
+char RABasic::ID = 0;
+
+} // end anonymous namespace
+
+RABasic::RABasic(): MachineFunctionPass(ID) {
+  initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+  initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+  initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+  initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+  initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+  initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+  initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+  initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+  initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+}
+
+void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<LiveIntervals>();
+  AU.addPreserved<SlotIndexes>();
+  if (StrongPHIElim)
+    AU.addRequiredID(StrongPHIEliminationID);
+  AU.addRequiredTransitive<RegisterCoalescer>();
+  AU.addRequired<CalculateSpillWeights>();
+  AU.addRequired<LiveStacks>();
+  AU.addPreserved<LiveStacks>();
+  AU.addRequiredID(MachineDominatorsID);
+  AU.addPreservedID(MachineDominatorsID);
+  AU.addRequired<MachineLoopInfo>();
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addRequired<VirtRegMap>();
+  AU.addPreserved<VirtRegMap>();
+  DEBUG(AU.addRequired<RenderMachineFunction>());
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RABasic::releaseMemory() {
+  SpillerInstance.reset(0);
+  RegAllocBase::releaseMemory();
+}
+
+#ifndef NDEBUG
+// Verify each LiveIntervalUnion.
+void RegAllocBase::verify() {
+  LiveVirtRegBitSet VisitedVRegs;
+  OwningArrayPtr<LiveVirtRegBitSet>
+    unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
+
+  // Verify disjoint unions.
+  for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+    DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
+    LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
+    PhysReg2LiveUnion[PhysReg].verify(VRegs);
+    // Union + intersection test could be done efficiently in one pass, but
+    // don't add a method to SparseBitVector unless we really need it.
+    assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
+    VisitedVRegs |= VRegs;
+  }
+
+  // Verify vreg coverage.
+  for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
+       liItr != liEnd; ++liItr) {
+    unsigned reg = liItr->first;
+    if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
+    if (!VRM->hasPhys(reg)) continue; // spilled?
+    unsigned PhysReg = VRM->getPhys(reg);
+    if (!unionVRegs[PhysReg].test(reg)) {
+      dbgs() << "LiveVirtReg " << reg << " not in union " <<
+        TRI->getName(PhysReg) << "\n";
+      llvm_unreachable("unallocated live vreg");
+    }
+  }
+  // FIXME: I'm not sure how to verify spilled intervals.
+}
+#endif //!NDEBUG
+
+//===----------------------------------------------------------------------===//
+//                         RegAllocBase Implementation
+//===----------------------------------------------------------------------===//
+
+// Instantiate a LiveIntervalUnion for each physical register.
+void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
+                                        unsigned NRegs) {
+  NumRegs = NRegs;
+  Array =
+    static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
+  for (unsigned r = 0; r != NRegs; ++r)
+    new(Array + r) LiveIntervalUnion(r, allocator);
+}
+
+void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
+  NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
+  TRI = &vrm.getTargetRegInfo();
+  MRI = &vrm.getRegInfo();
+  VRM = &vrm;
+  LIS = &lis;
+  PhysReg2LiveUnion.init(UnionAllocator, TRI->getNumRegs());
+  // Cache an interferece query for each physical reg
+  Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
+}
+
+void RegAllocBase::LiveUnionArray::clear() {
+  if (!Array)
+    return;
+  for (unsigned r = 0; r != NumRegs; ++r)
+    Array[r].~LiveIntervalUnion();
+  free(Array);
+  NumRegs =  0;
+  Array = 0;
+}
+
+void RegAllocBase::releaseMemory() {
+  PhysReg2LiveUnion.clear();
+}
+
+// Visit all the live registers. If they are already assigned to a physical
+// register, unify them with the corresponding LiveIntervalUnion, otherwise push
+// them on the priority queue for later assignment.
+void RegAllocBase::seedLiveRegs() {
+  for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
+    unsigned RegNum = I->first;
+    LiveInterval &VirtReg = *I->second;
+    if (TargetRegisterInfo::isPhysicalRegister(RegNum))
+      PhysReg2LiveUnion[RegNum].unify(VirtReg);
+    else
+      enqueue(&VirtReg);
+  }
+}
+
+void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+  DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+               << " to " << PrintReg(PhysReg, TRI) << '\n');
+  assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+  VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+  PhysReg2LiveUnion[PhysReg].unify(VirtReg);
+  ++NumAssigned;
+}
+
+void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
+  DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+               << " from " << PrintReg(PhysReg, TRI) << '\n');
+  assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
+  PhysReg2LiveUnion[PhysReg].extract(VirtReg);
+  VRM->clearVirt(VirtReg.reg);
+  ++NumUnassigned;
+}
+
+// Top-level driver to manage the queue of unassigned VirtRegs and call the
+// selectOrSplit implementation.
+void RegAllocBase::allocatePhysRegs() {
+  seedLiveRegs();
+
+  // Continue assigning vregs one at a time to available physical registers.
+  while (LiveInterval *VirtReg = dequeue()) {
+    // selectOrSplit requests the allocator to return an available physical
+    // register if possible and populate a list of new live intervals that
+    // result from splitting.
+    DEBUG(dbgs() << "\nselectOrSplit "
+                 << MRI->getRegClass(VirtReg->reg)->getName()
+                 << ':' << *VirtReg << '\n');
+    typedef SmallVector<LiveInterval*, 4> VirtRegVec;
+    VirtRegVec SplitVRegs;
+    unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs);
+
+    if (AvailablePhysReg)
+      assign(*VirtReg, AvailablePhysReg);
+
+    for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
+         I != E; ++I) {
+      LiveInterval *SplitVirtReg = *I;
+      if (SplitVirtReg->empty()) continue;
+      DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+      assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+             "expect split value in virtual register");
+      enqueue(SplitVirtReg);
+      ++NumNewQueued;
+    }
+  }
+}
+
+// Check if this live virtual register interferes with a physical register. If
+// not, then check for interference on each register that aliases with the
+// physical register. Return the interfering register.
+unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
+                                                unsigned PhysReg) {
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+    if (query(VirtReg, *AliasI).checkInterference())
+      return *AliasI;
+  return 0;
+}
+
+// Helper for spillInteferences() that spills all interfering vregs currently
+// assigned to this physical register.
+void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
+                            SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+  LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg);
+  assert(Q.seenAllInterferences() && "need collectInterferences()");
+  const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs();
+
+  for (SmallVectorImpl<LiveInterval*>::const_iterator I = PendingSpills.begin(),
+         E = PendingSpills.end(); I != E; ++I) {
+    LiveInterval &SpilledVReg = **I;
+    DEBUG(dbgs() << "extracting from " <<
+          TRI->getName(PhysReg) << " " << SpilledVReg << '\n');
+
+    // Deallocate the interfering vreg by removing it from the union.
+    // A LiveInterval instance may not be in a union during modification!
+    unassign(SpilledVReg, PhysReg);
+
+    // Spill the extracted interval.
+    spiller().spill(&SpilledVReg, SplitVRegs, PendingSpills);
+  }
+  // After extracting segments, the query's results are invalid. But keep the
+  // contents valid until we're done accessing pendingSpills.
+  Q.clear();
+}
+
+// Spill or split all live virtual registers currently unified under PhysReg
+// that interfere with VirtReg. The newly spilled or split live intervals are
+// returned by appending them to SplitVRegs.
+bool
+RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+                                 SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+  // Record each interference and determine if all are spillable before mutating
+  // either the union or live intervals.
+  unsigned NumInterferences = 0;
+  // Collect interferences assigned to any alias of the physical register.
+  for (const unsigned *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
+    LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI);
+    NumInterferences += QAlias.collectInterferingVRegs();
+    if (QAlias.seenUnspillableVReg()) {
+      return false;
+    }
+  }
+  DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) <<
+        " interferences with " << VirtReg << "\n");
+  assert(NumInterferences > 0 && "expect interference");
+
+  // Spill each interfering vreg allocated to PhysReg or an alias.
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+    spillReg(VirtReg, *AliasI, SplitVRegs);
+  return true;
+}
+
+// Add newly allocated physical registers to the MBB live in sets.
+void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
+  NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
+  typedef SmallVector<MachineBasicBlock*, 8> MBBVec;
+  MBBVec liveInMBBs;
+  MachineBasicBlock &entryMBB = *MF->begin();
+
+  for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+    LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
+    if (LiveUnion.empty())
+      continue;
+    for (LiveIntervalUnion::SegmentIter SI = LiveUnion.begin(); SI.valid();
+         ++SI) {
+
+      // Find the set of basic blocks which this range is live into...
+      liveInMBBs.clear();
+      if (!LIS->findLiveInMBBs(SI.start(), SI.stop(), liveInMBBs)) continue;
+
+      // And add the physreg for this interval to their live-in sets.
+      for (MBBVec::iterator I = liveInMBBs.begin(), E = liveInMBBs.end();
+           I != E; ++I) {
+        MachineBasicBlock *MBB = *I;
+        if (MBB == &entryMBB) continue;
+        if (MBB->isLiveIn(PhysReg)) continue;
+        MBB->addLiveIn(PhysReg);
+      }
+    }
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         RABasic Implementation
+//===----------------------------------------------------------------------===//
+
+// Driver for the register assignment and splitting heuristics.
+// Manages iteration over the LiveIntervalUnions.
+//
+// This is a minimal implementation of register assignment and splitting that
+// spills whenever we run out of registers.
+//
+// selectOrSplit can only be called once per live virtual register. We then do a
+// single interference test for each register the correct class until we find an
+// available register. So, the number of interference tests in the worst case is
+// |vregs| * |machineregs|. And since the number of interference tests is
+// minimal, there is no value in caching them outside the scope of
+// selectOrSplit().
+unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
+                                SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+  // Populate a list of physical register spill candidates.
+  SmallVector<unsigned, 8> PhysRegSpillCands;
+
+  // Check for an available register in this class.
+  const TargetRegisterClass *TRC = MRI->getRegClass(VirtReg.reg);
+
+  for (TargetRegisterClass::iterator I = TRC->allocation_order_begin(*MF),
+         E = TRC->allocation_order_end(*MF);
+       I != E; ++I) {
+
+    unsigned PhysReg = *I;
+    if (ReservedRegs.test(PhysReg)) continue;
+
+    // Check interference and as a side effect, intialize queries for this
+    // VirtReg and its aliases.
+    unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg);
+    if (interfReg == 0) {
+      // Found an available register.
+      return PhysReg;
+    }
+    LiveInterval *interferingVirtReg =
+      Queries[interfReg].firstInterference().liveUnionPos().value();
+
+    // The current VirtReg must either be spillable, or one of its interferences
+    // must have less spill weight.
+    if (interferingVirtReg->weight < VirtReg.weight ) {
+      PhysRegSpillCands.push_back(PhysReg);
+    }
+  }
+  // Try to spill another interfering reg with less spill weight.
+  for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(),
+         PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
+
+    if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue;
+
+    assert(checkPhysRegInterference(VirtReg, *PhysRegI) == 0 &&
+           "Interference after spill.");
+    // Tell the caller to allocate to this newly freed physical register.
+    return *PhysRegI;
+  }
+  // No other spill candidates were found, so spill the current VirtReg.
+  DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
+  SmallVector<LiveInterval*, 1> pendingSpills;
+
+  spiller().spill(&VirtReg, SplitVRegs, pendingSpills);
+
+  // The live virtual register requesting allocation was spilled, so tell
+  // the caller not to allocate anything during this round.
+  return 0;
+}
+
+bool RABasic::runOnMachineFunction(MachineFunction &mf) {
+  DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n"
+               << "********** Function: "
+               << ((Value*)mf.getFunction())->getName() << '\n');
+
+  MF = &mf;
+  DEBUG(RMF = &getAnalysis<RenderMachineFunction>());
+
+  RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+
+  ReservedRegs = TRI->getReservedRegs(*MF);
+
+  SpillerInstance.reset(createSpiller(*this, *MF, *VRM));
+
+  allocatePhysRegs();
+
+  addMBBLiveIns(MF);
+
+  // Diagnostic output before rewriting
+  DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
+
+  // optional HTML output
+  DEBUG(RMF->renderMachineFunction("After basic register allocation.", VRM));
+
+  // FIXME: Verification currently must run before VirtRegRewriter. We should
+  // make the rewriter a separate pass and override verifyAnalysis instead. When
+  // that happens, verification naturally falls under VerifyMachineCode.
+#ifndef NDEBUG
+  if (VerifyEnabled) {
+    // Verify accuracy of LiveIntervals. The standard machine code verifier
+    // ensures that each LiveIntervals covers all uses of the virtual reg.
+
+    // FIXME: MachineVerifier is badly broken when using the standard
+    // spiller. Always use -spiller=inline with -verify-regalloc. Even with the
+    // inline spiller, some tests fail to verify because the coalescer does not
+    // always generate verifiable code.
+    MF->verify(this, "In RABasic::verify");
+
+    // Verify that LiveIntervals are partitioned into unions and disjoint within
+    // the unions.
+    verify();
+  }
+#endif // !NDEBUG
+
+  // Run rewriter
+  VRM->rewrite(LIS->getSlotIndexes());
+
+  // The pass output is in VirtRegMap. Release all the transient data.
+  releaseMemory();
+
+  return true;
+}
+
+FunctionPass* llvm::createBasicRegisterAllocator()
+{
+  return new RABasic();
+}
diff --git a/final/lib/CodeGen/RegAllocFast.cpp b/final/lib/CodeGen/RegAllocFast.cpp
new file mode 100644
index 00000000000..15036e38b89
--- /dev/null
+++ b/final/lib/CodeGen/RegAllocFast.cpp
@@ -0,0 +1,1076 @@
+//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This register allocator allocates registers to a basic block at a time,
+// attempting to keep values in registers and reusing registers as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumStores, "Number of stores added");
+STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumCopies, "Number of copies coalesced");
+
+static RegisterRegAlloc
+  fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
+
+namespace {
+  class RAFast : public MachineFunctionPass {
+  public:
+    static char ID;
+    RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
+               isBulkSpilling(false) {
+      initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+      initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+    }
+  private:
+    const TargetMachine *TM;
+    MachineFunction *MF;
+    MachineRegisterInfo *MRI;
+    const TargetRegisterInfo *TRI;
+    const TargetInstrInfo *TII;
+
+    // Basic block currently being allocated.
+    MachineBasicBlock *MBB;
+
+    // StackSlotForVirtReg - Maps virtual regs to the frame index where these
+    // values are spilled.
+    IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
+
+    // Everything we know about a live virtual register.
+    struct LiveReg {
+      MachineInstr *LastUse;    // Last instr to use reg.
+      unsigned PhysReg;         // Currently held here.
+      unsigned short LastOpNum; // OpNum on LastUse.
+      bool Dirty;               // Register needs spill.
+
+      LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0),
+                              Dirty(false) {}
+    };
+
+    typedef DenseMap<unsigned, LiveReg> LiveRegMap;
+    typedef LiveRegMap::value_type LiveRegEntry;
+
+    // LiveVirtRegs - This map contains entries for each virtual register
+    // that is currently available in a physical register.
+    LiveRegMap LiveVirtRegs;
+
+    DenseMap<unsigned, MachineInstr *> LiveDbgValueMap;
+
+    // RegState - Track the state of a physical register.
+    enum RegState {
+      // A disabled register is not available for allocation, but an alias may
+      // be in use. A register can only be moved out of the disabled state if
+      // all aliases are disabled.
+      regDisabled,
+
+      // A free register is not currently in use and can be allocated
+      // immediately without checking aliases.
+      regFree,
+
+      // A reserved register has been assigned expolicitly (e.g., setting up a
+      // call parameter), and it remains reserved until it is used.
+      regReserved
+
+      // A register state may also be a virtual register number, indication that
+      // the physical register is currently allocated to a virtual register. In
+      // that case, LiveVirtRegs contains the inverse mapping.
+    };
+
+    // PhysRegState - One of the RegState enums, or a virtreg.
+    std::vector<unsigned> PhysRegState;
+
+    // UsedInInstr - BitVector of physregs that are used in the current
+    // instruction, and so cannot be allocated.
+    BitVector UsedInInstr;
+
+    // Allocatable - vector of allocatable physical registers.
+    BitVector Allocatable;
+
+    // SkippedInstrs - Descriptors of instructions whose clobber list was
+    // ignored because all registers were spilled. It is still necessary to
+    // mark all the clobbered registers as used by the function.
+    SmallPtrSet<const TargetInstrDesc*, 4> SkippedInstrs;
+
+    // isBulkSpilling - This flag is set when LiveRegMap will be cleared
+    // completely after spilling all live registers. LiveRegMap entries should
+    // not be erased.
+    bool isBulkSpilling;
+
+    enum {
+      spillClean = 1,
+      spillDirty = 100,
+      spillImpossible = ~0u
+    };
+  public:
+    virtual const char *getPassName() const {
+      return "Fast Register Allocator";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequiredID(PHIEliminationID);
+      AU.addRequiredID(TwoAddressInstructionPassID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    bool runOnMachineFunction(MachineFunction &Fn);
+    void AllocateBasicBlock();
+    void handleThroughOperands(MachineInstr *MI,
+                               SmallVectorImpl<unsigned> &VirtDead);
+    int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
+    bool isLastUseOfLocalReg(MachineOperand&);
+
+    void addKillFlag(const LiveReg&);
+    void killVirtReg(LiveRegMap::iterator);
+    void killVirtReg(unsigned VirtReg);
+    void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator);
+    void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
+
+    void usePhysReg(MachineOperand&);
+    void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState);
+    unsigned calcSpillCost(unsigned PhysReg) const;
+    void assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg);
+    void allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint);
+    LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum,
+                                       unsigned VirtReg, unsigned Hint);
+    LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum,
+                                       unsigned VirtReg, unsigned Hint);
+    void spillAll(MachineInstr *MI);
+    bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
+  };
+  char RAFast::ID = 0;
+}
+
+/// getStackSpaceFor - This allocates space for the specified virtual register
+/// to be held on the stack.
+int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
+  // Find the location Reg would belong...
+  int SS = StackSlotForVirtReg[VirtReg];
+  if (SS != -1)
+    return SS;          // Already has space allocated?
+
+  // Allocate a new stack object for this spill location...
+  int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+                                                            RC->getAlignment());
+
+  // Assign the slot.
+  StackSlotForVirtReg[VirtReg] = FrameIdx;
+  return FrameIdx;
+}
+
+/// isLastUseOfLocalReg - Return true if MO is the only remaining reference to
+/// its virtual register, and it is guaranteed to be a block-local register.
+///
+bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
+  // Check for non-debug uses or defs following MO.
+  // This is the most likely way to fail - fast path it.
+  MachineOperand *Next = &MO;
+  while ((Next = Next->getNextOperandForReg()))
+    if (!Next->isDebug())
+      return false;
+
+  // If the register has ever been spilled or reloaded, we conservatively assume
+  // it is a global register used in multiple blocks.
+  if (StackSlotForVirtReg[MO.getReg()] != -1)
+    return false;
+
+  // Check that the use/def chain has exactly one operand - MO.
+  return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO;
+}
+
+/// addKillFlag - Set kill flags on last use of a virtual register.
+void RAFast::addKillFlag(const LiveReg &LR) {
+  if (!LR.LastUse) return;
+  MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum);
+  if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) {
+    if (MO.getReg() == LR.PhysReg)
+      MO.setIsKill();
+    else
+      LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true);
+  }
+}
+
+/// killVirtReg - Mark virtreg as no longer available.
+void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
+  addKillFlag(LRI->second);
+  const LiveReg &LR = LRI->second;
+  assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
+  PhysRegState[LR.PhysReg] = regFree;
+  // Erase from LiveVirtRegs unless we're spilling in bulk.
+  if (!isBulkSpilling)
+    LiveVirtRegs.erase(LRI);
+}
+
+/// killVirtReg - Mark virtreg as no longer available.
+void RAFast::killVirtReg(unsigned VirtReg) {
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+         "killVirtReg needs a virtual register");
+  LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+  if (LRI != LiveVirtRegs.end())
+    killVirtReg(LRI);
+}
+
+/// spillVirtReg - This method spills the value specified by VirtReg into the
+/// corresponding stack slot if needed.
+void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+         "Spilling a physical register is illegal!");
+  LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+  assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register");
+  spillVirtReg(MI, LRI);
+}
+
+/// spillVirtReg - Do the actual work of spilling.
+void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
+                          LiveRegMap::iterator LRI) {
+  LiveReg &LR = LRI->second;
+  assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
+
+  if (LR.Dirty) {
+    // If this physreg is used by the instruction, we want to kill it on the
+    // instruction, not on the spill.
+    bool SpillKill = LR.LastUse != MI;
+    LR.Dirty = false;
+    DEBUG(dbgs() << "Spilling " << PrintReg(LRI->first, TRI)
+                 << " in " << PrintReg(LR.PhysReg, TRI));
+    const TargetRegisterClass *RC = MRI->getRegClass(LRI->first);
+    int FI = getStackSpaceFor(LRI->first, RC);
+    DEBUG(dbgs() << " to stack slot #" << FI << "\n");
+    TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
+    ++NumStores;   // Update statistics
+
+    // If this register is used by DBG_VALUE then insert new DBG_VALUE to
+    // identify spilled location as the place to find corresponding variable's
+    // value.
+    if (MachineInstr *DBG = LiveDbgValueMap.lookup(LRI->first)) {
+      const MDNode *MDPtr =
+        DBG->getOperand(DBG->getNumOperands()-1).getMetadata();
+      int64_t Offset = 0;
+      if (DBG->getOperand(1).isImm())
+        Offset = DBG->getOperand(1).getImm();
+      DebugLoc DL;
+      if (MI == MBB->end()) {
+        // If MI is at basic block end then use last instruction's location.
+        MachineBasicBlock::iterator EI = MI;
+        DL = (--EI)->getDebugLoc();
+      }
+      else
+        DL = MI->getDebugLoc();
+      if (MachineInstr *NewDV =
+          TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) {
+        MachineBasicBlock *MBB = DBG->getParent();
+        MBB->insert(MI, NewDV);
+        DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
+        LiveDbgValueMap[LRI->first] = NewDV;
+      }
+    }
+    if (SpillKill)
+      LR.LastUse = 0; // Don't kill register again
+  }
+  killVirtReg(LRI);
+}
+
+/// spillAll - Spill all dirty virtregs without killing them.
+void RAFast::spillAll(MachineInstr *MI) {
+  if (LiveVirtRegs.empty()) return;
+  isBulkSpilling = true;
+  // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
+  // of spilling here is deterministic, if arbitrary.
+  for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end();
+       i != e; ++i)
+    spillVirtReg(MI, i);
+  LiveVirtRegs.clear();
+  isBulkSpilling = false;
+}
+
+/// usePhysReg - Handle the direct use of a physical register.
+/// Check that the register is not used by a virtreg.
+/// Kill the physreg, marking it free.
+/// This may add implicit kills to MO->getParent() and invalidate MO.
+void RAFast::usePhysReg(MachineOperand &MO) {
+  unsigned PhysReg = MO.getReg();
+  assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+         "Bad usePhysReg operand");
+
+  switch (PhysRegState[PhysReg]) {
+  case regDisabled:
+    break;
+  case regReserved:
+    PhysRegState[PhysReg] = regFree;
+    // Fall through
+  case regFree:
+    UsedInInstr.set(PhysReg);
+    MO.setIsKill();
+    return;
+  default:
+    // The physreg was allocated to a virtual register. That means the value we
+    // wanted has been clobbered.
+    llvm_unreachable("Instruction uses an allocated register");
+  }
+
+  // Maybe a superregister is reserved?
+  for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+       unsigned Alias = *AS; ++AS) {
+    switch (PhysRegState[Alias]) {
+    case regDisabled:
+      break;
+    case regReserved:
+      assert(TRI->isSuperRegister(PhysReg, Alias) &&
+             "Instruction is not using a subregister of a reserved register");
+      // Leave the superregister in the working set.
+      PhysRegState[Alias] = regFree;
+      UsedInInstr.set(Alias);
+      MO.getParent()->addRegisterKilled(Alias, TRI, true);
+      return;
+    case regFree:
+      if (TRI->isSuperRegister(PhysReg, Alias)) {
+        // Leave the superregister in the working set.
+        UsedInInstr.set(Alias);
+        MO.getParent()->addRegisterKilled(Alias, TRI, true);
+        return;
+      }
+      // Some other alias was in the working set - clear it.
+      PhysRegState[Alias] = regDisabled;
+      break;
+    default:
+      llvm_unreachable("Instruction uses an alias of an allocated register");
+    }
+  }
+
+  // All aliases are disabled, bring register into working set.
+  PhysRegState[PhysReg] = regFree;
+  UsedInInstr.set(PhysReg);
+  MO.setIsKill();
+}
+
+/// definePhysReg - Mark PhysReg as reserved or free after spilling any
+/// virtregs. This is very similar to defineVirtReg except the physreg is
+/// reserved instead of allocated.
+void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
+                           RegState NewState) {
+  UsedInInstr.set(PhysReg);
+  switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+  case regDisabled:
+    break;
+  default:
+    spillVirtReg(MI, VirtReg);
+    // Fall through.
+  case regFree:
+  case regReserved:
+    PhysRegState[PhysReg] = NewState;
+    return;
+  }
+
+  // This is a disabled register, disable all aliases.
+  PhysRegState[PhysReg] = NewState;
+  for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+       unsigned Alias = *AS; ++AS) {
+    UsedInInstr.set(Alias);
+    switch (unsigned VirtReg = PhysRegState[Alias]) {
+    case regDisabled:
+      break;
+    default:
+      spillVirtReg(MI, VirtReg);
+      // Fall through.
+    case regFree:
+    case regReserved:
+      PhysRegState[Alias] = regDisabled;
+      if (TRI->isSuperRegister(PhysReg, Alias))
+        return;
+      break;
+    }
+  }
+}
+
+
+// calcSpillCost - Return the cost of spilling clearing out PhysReg and
+// aliases so it is free for allocation.
+// Returns 0 when PhysReg is free or disabled with all aliases disabled - it
+// can be allocated directly.
+// Returns spillImpossible when PhysReg or an alias can't be spilled.
+unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
+  if (UsedInInstr.test(PhysReg))
+    return spillImpossible;
+  switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+  case regDisabled:
+    break;
+  case regFree:
+    return 0;
+  case regReserved:
+    return spillImpossible;
+  default:
+    return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
+  }
+
+  // This is a disabled register, add up const of aliases.
+  unsigned Cost = 0;
+  for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+       unsigned Alias = *AS; ++AS) {
+    if (UsedInInstr.test(Alias))
+      return spillImpossible;
+    switch (unsigned VirtReg = PhysRegState[Alias]) {
+    case regDisabled:
+      break;
+    case regFree:
+      ++Cost;
+      break;
+    case regReserved:
+      return spillImpossible;
+    default:
+      Cost += LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
+      break;
+    }
+  }
+  return Cost;
+}
+
+
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now.  The physical
+/// register must not be used for anything else when this is called.
+///
+void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) {
+  DEBUG(dbgs() << "Assigning " << PrintReg(LRE.first, TRI) << " to "
+               << PrintReg(PhysReg, TRI) << "\n");
+  PhysRegState[PhysReg] = LRE.first;
+  assert(!LRE.second.PhysReg && "Already assigned a physreg");
+  LRE.second.PhysReg = PhysReg;
+}
+
+/// allocVirtReg - Allocate a physical register for VirtReg.
+void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
+  const unsigned VirtReg = LRE.first;
+
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+         "Can only allocate virtual registers");
+
+  const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+
+  // Ignore invalid hints.
+  if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
+               !RC->contains(Hint) || !Allocatable.test(Hint)))
+    Hint = 0;
+
+  // Take hint when possible.
+  if (Hint) {
+    switch(calcSpillCost(Hint)) {
+    default:
+      definePhysReg(MI, Hint, regFree);
+      // Fall through.
+    case 0:
+      return assignVirtToPhysReg(LRE, Hint);
+    case spillImpossible:
+      break;
+    }
+  }
+
+  TargetRegisterClass::iterator AOB = RC->allocation_order_begin(*MF);
+  TargetRegisterClass::iterator AOE = RC->allocation_order_end(*MF);
+
+  // First try to find a completely free register.
+  for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
+    unsigned PhysReg = *I;
+    if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg) &&
+        Allocatable.test(PhysReg))
+      return assignVirtToPhysReg(LRE, PhysReg);
+  }
+
+  DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
+               << RC->getName() << "\n");
+
+  unsigned BestReg = 0, BestCost = spillImpossible;
+  for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
+    if (!Allocatable.test(*I))
+      continue;
+    unsigned Cost = calcSpillCost(*I);
+    // Cost is 0 when all aliases are already disabled.
+    if (Cost == 0)
+      return assignVirtToPhysReg(LRE, *I);
+    if (Cost < BestCost)
+      BestReg = *I, BestCost = Cost;
+  }
+
+  if (BestReg) {
+    definePhysReg(MI, BestReg, regFree);
+    return assignVirtToPhysReg(LRE, BestReg);
+  }
+
+  // Nothing we can do.
+  std::string msg;
+  raw_string_ostream Msg(msg);
+  Msg << "Ran out of registers during register allocation!";
+  if (MI->isInlineAsm()) {
+    Msg << "\nPlease check your inline asm statement for "
+        << "invalid constraints:\n";
+    MI->print(Msg, TM);
+  }
+  report_fatal_error(Msg.str());
+}
+
+/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
+RAFast::LiveRegMap::iterator
+RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
+                      unsigned VirtReg, unsigned Hint) {
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+         "Not a virtual register");
+  LiveRegMap::iterator LRI;
+  bool New;
+  tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
+  LiveReg &LR = LRI->second;
+  if (New) {
+    // If there is no hint, peek at the only use of this register.
+    if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
+        MRI->hasOneNonDBGUse(VirtReg)) {
+      const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg);
+      // It's a copy, use the destination register as a hint.
+      if (UseMI.isCopyLike())
+        Hint = UseMI.getOperand(0).getReg();
+    }
+    allocVirtReg(MI, *LRI, Hint);
+  } else if (LR.LastUse) {
+    // Redefining a live register - kill at the last use, unless it is this
+    // instruction defining VirtReg multiple times.
+    if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse())
+      addKillFlag(LR);
+  }
+  assert(LR.PhysReg && "Register not assigned");
+  LR.LastUse = MI;
+  LR.LastOpNum = OpNum;
+  LR.Dirty = true;
+  UsedInInstr.set(LR.PhysReg);
+  return LRI;
+}
+
+/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it.
+RAFast::LiveRegMap::iterator
+RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
+                      unsigned VirtReg, unsigned Hint) {
+  assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+         "Not a virtual register");
+  LiveRegMap::iterator LRI;
+  bool New;
+  tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
+  LiveReg &LR = LRI->second;
+  MachineOperand &MO = MI->getOperand(OpNum);
+  if (New) {
+    allocVirtReg(MI, *LRI, Hint);
+    const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+    int FrameIndex = getStackSpaceFor(VirtReg, RC);
+    DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
+                 << PrintReg(LR.PhysReg, TRI) << "\n");
+    TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI);
+    ++NumLoads;
+  } else if (LR.Dirty) {
+    if (isLastUseOfLocalReg(MO)) {
+      DEBUG(dbgs() << "Killing last use: " << MO << "\n");
+      if (MO.isUse())
+        MO.setIsKill();
+      else
+        MO.setIsDead();
+    } else if (MO.isKill()) {
+      DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n");
+      MO.setIsKill(false);
+    } else if (MO.isDead()) {
+      DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n");
+      MO.setIsDead(false);
+    }
+  } else if (MO.isKill()) {
+    // We must remove kill flags from uses of reloaded registers because the
+    // register would be killed immediately, and there might be a second use:
+    //   %foo = OR %x<kill>, %x
+    // This would cause a second reload of %x into a different register.
+    DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
+    MO.setIsKill(false);
+  } else if (MO.isDead()) {
+    DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
+    MO.setIsDead(false);
+  }
+  assert(LR.PhysReg && "Register not assigned");
+  LR.LastUse = MI;
+  LR.LastOpNum = OpNum;
+  UsedInInstr.set(LR.PhysReg);
+  return LRI;
+}
+
+// setPhysReg - Change operand OpNum in MI the refer the PhysReg, considering
+// subregs. This may invalidate any operand pointers.
+// Return true if the operand kills its register.
+bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) {
+  MachineOperand &MO = MI->getOperand(OpNum);
+  if (!MO.getSubReg()) {
+    MO.setReg(PhysReg);
+    return MO.isKill() || MO.isDead();
+  }
+
+  // Handle subregister index.
+  MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0);
+  MO.setSubReg(0);
+
+  // A kill flag implies killing the full register. Add corresponding super
+  // register kill.
+  if (MO.isKill()) {
+    MI->addRegisterKilled(PhysReg, TRI, true);
+    return true;
+  }
+  return MO.isDead();
+}
+
+// Handle special instruction operand like early clobbers and tied ops when
+// there are additional physreg defines.
+void RAFast::handleThroughOperands(MachineInstr *MI,
+                                   SmallVectorImpl<unsigned> &VirtDead) {
+  DEBUG(dbgs() << "Scanning for through registers:");
+  SmallSet<unsigned, 8> ThroughRegs;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
+        (MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
+      if (ThroughRegs.insert(Reg))
+        DEBUG(dbgs() << ' ' << PrintReg(Reg));
+    }
+  }
+
+  // If any physreg defines collide with preallocated through registers,
+  // we must spill and reallocate.
+  DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef()) continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+    UsedInInstr.set(Reg);
+    if (ThroughRegs.count(PhysRegState[Reg]))
+      definePhysReg(MI, Reg, regFree);
+    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+      UsedInInstr.set(*AS);
+      if (ThroughRegs.count(PhysRegState[*AS]))
+        definePhysReg(MI, *AS, regFree);
+    }
+  }
+
+  SmallVector<unsigned, 8> PartialDefs;
+  DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n");
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+    if (MO.isUse()) {
+      unsigned DefIdx = 0;
+      if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
+      DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand "
+        << DefIdx << ".\n");
+      LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
+      unsigned PhysReg = LRI->second.PhysReg;
+      setPhysReg(MI, i, PhysReg);
+      // Note: we don't update the def operand yet. That would cause the normal
+      // def-scan to attempt spilling.
+    } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) {
+      DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
+      // Reload the register, but don't assign to the operand just yet.
+      // That would confuse the later phys-def processing pass.
+      LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
+      PartialDefs.push_back(LRI->second.PhysReg);
+    } else if (MO.isEarlyClobber()) {
+      // Note: defineVirtReg may invalidate MO.
+      LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
+      unsigned PhysReg = LRI->second.PhysReg;
+      if (setPhysReg(MI, i, PhysReg))
+        VirtDead.push_back(Reg);
+    }
+  }
+
+  // Restore UsedInInstr to a state usable for allocating normal virtual uses.
+  UsedInInstr.reset();
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+    UsedInInstr.set(Reg);
+    for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+      UsedInInstr.set(*AS);
+  }
+
+  // Also mark PartialDefs as used to avoid reallocation.
+  for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
+    UsedInInstr.set(PartialDefs[i]);
+}
+
+void RAFast::AllocateBasicBlock() {
+  DEBUG(dbgs() << "\nAllocating " << *MBB);
+
+  // FIXME: This should probably be added by instruction selection instead?
+  // If the last instruction in the block is a return, make sure to mark it as
+  // using all of the live-out values in the function.  Things marked both call
+  // and return are tail calls; do not do this for them.  The tail callee need
+  // not take the same registers as input that it produces as output, and there
+  // are dependencies for its input registers elsewhere.
+  if (!MBB->empty() && MBB->back().getDesc().isReturn() &&
+      !MBB->back().getDesc().isCall()) {
+    MachineInstr *Ret = &MBB->back();
+
+    for (MachineRegisterInfo::liveout_iterator
+         I = MF->getRegInfo().liveout_begin(),
+         E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+      assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
+             "Cannot have a live-out virtual register.");
+
+      // Add live-out registers as implicit uses.
+      Ret->addRegisterKilled(*I, TRI, true);
+    }
+  }
+
+  PhysRegState.assign(TRI->getNumRegs(), regDisabled);
+  assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?");
+
+  MachineBasicBlock::iterator MII = MBB->begin();
+
+  // Add live-in registers as live.
+  for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+         E = MBB->livein_end(); I != E; ++I)
+    if (Allocatable.test(*I))
+      definePhysReg(MII, *I, regReserved);
+
+  SmallVector<unsigned, 8> VirtDead;
+  SmallVector<MachineInstr*, 32> Coalesced;
+
+  // Otherwise, sequentially allocate each instruction in the MBB.
+  while (MII != MBB->end()) {
+    MachineInstr *MI = MII++;
+    const TargetInstrDesc &TID = MI->getDesc();
+    DEBUG({
+        dbgs() << "\n>> " << *MI << "Regs:";
+        for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
+          if (PhysRegState[Reg] == regDisabled) continue;
+          dbgs() << " " << TRI->getName(Reg);
+          switch(PhysRegState[Reg]) {
+          case regFree:
+            break;
+          case regReserved:
+            dbgs() << "*";
+            break;
+          default:
+            dbgs() << '=' << PrintReg(PhysRegState[Reg]);
+            if (LiveVirtRegs[PhysRegState[Reg]].Dirty)
+              dbgs() << "*";
+            assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg &&
+                   "Bad inverse map");
+            break;
+          }
+        }
+        dbgs() << '\n';
+        // Check that LiveVirtRegs is the inverse.
+        for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
+             e = LiveVirtRegs.end(); i != e; ++i) {
+           assert(TargetRegisterInfo::isVirtualRegister(i->first) &&
+                  "Bad map key");
+           assert(TargetRegisterInfo::isPhysicalRegister(i->second.PhysReg) &&
+                  "Bad map value");
+           assert(PhysRegState[i->second.PhysReg] == i->first &&
+                  "Bad inverse map");
+        }
+      });
+
+    // Debug values are not allowed to change codegen in any way.
+    if (MI->isDebugValue()) {
+      bool ScanDbgValue = true;
+      while (ScanDbgValue) {
+        ScanDbgValue = false;
+        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+          MachineOperand &MO = MI->getOperand(i);
+          if (!MO.isReg()) continue;
+          unsigned Reg = MO.getReg();
+          if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+          LiveDbgValueMap[Reg] = MI;
+          LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
+          if (LRI != LiveVirtRegs.end())
+            setPhysReg(MI, i, LRI->second.PhysReg);
+          else {
+            int SS = StackSlotForVirtReg[Reg];
+            if (SS == -1) {
+              // We can't allocate a physreg for a DebugValue, sorry!
+              DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+              MO.setReg(0);
+            }
+            else {
+              // Modify DBG_VALUE now that the value is in a spill slot.
+              int64_t Offset = MI->getOperand(1).getImm();
+              const MDNode *MDPtr =
+                MI->getOperand(MI->getNumOperands()-1).getMetadata();
+              DebugLoc DL = MI->getDebugLoc();
+              if (MachineInstr *NewDV =
+                  TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) {
+                DEBUG(dbgs() << "Modifying debug info due to spill:" <<
+                      "\t" << *MI);
+                MachineBasicBlock *MBB = MI->getParent();
+                MBB->insert(MBB->erase(MI), NewDV);
+                // Scan NewDV operands from the beginning.
+                MI = NewDV;
+                ScanDbgValue = true;
+                break;
+              } else {
+                // We can't allocate a physreg for a DebugValue; sorry!
+                DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
+                MO.setReg(0);
+              }
+            }
+          }
+        }
+      }
+      // Next instruction.
+      continue;
+    }
+
+    // If this is a copy, we may be able to coalesce.
+    unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0;
+    if (MI->isCopy()) {
+      CopyDst = MI->getOperand(0).getReg();
+      CopySrc = MI->getOperand(1).getReg();
+      CopyDstSub = MI->getOperand(0).getSubReg();
+      CopySrcSub = MI->getOperand(1).getSubReg();
+    }
+
+    // Track registers used by instruction.
+    UsedInInstr.reset();
+
+    // First scan.
+    // Mark physreg uses and early clobbers as used.
+    // Find the end of the virtreg operands
+    unsigned VirtOpEnd = 0;
+    bool hasTiedOps = false;
+    bool hasEarlyClobbers = false;
+    bool hasPartialRedefs = false;
+    bool hasPhysDefs = false;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (!Reg) continue;
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        VirtOpEnd = i+1;
+        if (MO.isUse()) {
+          hasTiedOps = hasTiedOps ||
+                                TID.getOperandConstraint(i, TOI::TIED_TO) != -1;
+        } else {
+          if (MO.isEarlyClobber())
+            hasEarlyClobbers = true;
+          if (MO.getSubReg() && MI->readsVirtualRegister(Reg))
+            hasPartialRedefs = true;
+        }
+        continue;
+      }
+      if (!Allocatable.test(Reg)) continue;
+      if (MO.isUse()) {
+        usePhysReg(MO);
+      } else if (MO.isEarlyClobber()) {
+        definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
+                               regFree : regReserved);
+        hasEarlyClobbers = true;
+      } else
+        hasPhysDefs = true;
+    }
+
+    // The instruction may have virtual register operands that must be allocated
+    // the same register at use-time and def-time: early clobbers and tied
+    // operands. If there are also physical defs, these registers must avoid
+    // both physical defs and uses, making them more constrained than normal
+    // operands.
+    // Similarly, if there are multiple defs and tied operands, we must make
+    // sure the same register is allocated to uses and defs.
+    // We didn't detect inline asm tied operands above, so just make this extra
+    // pass for all inline asm.
+    if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
+        (hasTiedOps && (hasPhysDefs || TID.getNumDefs() > 1))) {
+      handleThroughOperands(MI, VirtDead);
+      // Don't attempt coalescing when we have funny stuff going on.
+      CopyDst = 0;
+      // Pretend we have early clobbers so the use operands get marked below.
+      // This is not necessary for the common case of a single tied use.
+      hasEarlyClobbers = true;
+    }
+
+    // Second scan.
+    // Allocate virtreg uses.
+    for (unsigned i = 0; i != VirtOpEnd; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+      if (MO.isUse()) {
+        LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
+        unsigned PhysReg = LRI->second.PhysReg;
+        CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
+        if (setPhysReg(MI, i, PhysReg))
+          killVirtReg(LRI);
+      }
+    }
+
+    MRI->addPhysRegsUsed(UsedInInstr);
+
+    // Track registers defined by instruction - early clobbers and tied uses at
+    // this point.
+    UsedInInstr.reset();
+    if (hasEarlyClobbers) {
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg()) continue;
+        unsigned Reg = MO.getReg();
+        if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+        // Look for physreg defs and tied uses.
+        if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue;
+        UsedInInstr.set(Reg);
+        for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+          UsedInInstr.set(*AS);
+      }
+    }
+
+    unsigned DefOpEnd = MI->getNumOperands();
+    if (TID.isCall()) {
+      // Spill all virtregs before a call. This serves two purposes: 1. If an
+      // exception is thrown, the landing pad is going to expect to find
+      // registers in their spill slots, and 2. we don't have to wade through
+      // all the <imp-def> operands on the call instruction.
+      DefOpEnd = VirtOpEnd;
+      DEBUG(dbgs() << "  Spilling remaining registers before call.\n");
+      spillAll(MI);
+
+      // The imp-defs are skipped below, but we still need to mark those
+      // registers as used by the function.
+      SkippedInstrs.insert(&TID);
+    }
+
+    // Third scan.
+    // Allocate defs and collect dead defs.
+    for (unsigned i = 0; i != DefOpEnd; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
+        continue;
+      unsigned Reg = MO.getReg();
+
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+        if (!Allocatable.test(Reg)) continue;
+        definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
+                               regFree : regReserved);
+        continue;
+      }
+      LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc);
+      unsigned PhysReg = LRI->second.PhysReg;
+      if (setPhysReg(MI, i, PhysReg)) {
+        VirtDead.push_back(Reg);
+        CopyDst = 0; // cancel coalescing;
+      } else
+        CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0;
+    }
+
+    // Kill dead defs after the scan to ensure that multiple defs of the same
+    // register are allocated identically. We didn't need to do this for uses
+    // because we are crerating our own kill flags, and they are always at the
+    // last use.
+    for (unsigned i = 0, e = VirtDead.size(); i != e; ++i)
+      killVirtReg(VirtDead[i]);
+    VirtDead.clear();
+
+    MRI->addPhysRegsUsed(UsedInInstr);
+
+    if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
+      DEBUG(dbgs() << "-- coalescing: " << *MI);
+      Coalesced.push_back(MI);
+    } else {
+      DEBUG(dbgs() << "<< " << *MI);
+    }
+  }
+
+  // Spill all physical registers holding virtual registers now.
+  DEBUG(dbgs() << "Spilling live registers at end of block.\n");
+  spillAll(MBB->getFirstTerminator());
+
+  // Erase all the coalesced copies. We are delaying it until now because
+  // LiveVirtRegs might refer to the instrs.
+  for (unsigned i = 0, e = Coalesced.size(); i != e; ++i)
+    MBB->erase(Coalesced[i]);
+  NumCopies += Coalesced.size();
+
+  DEBUG(MBB->dump());
+}
+
+/// runOnMachineFunction - Register allocate the whole function
+///
+bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
+  DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
+               << "********** Function: "
+               << ((Value*)Fn.getFunction())->getName() << '\n');
+  MF = &Fn;
+  MRI = &MF->getRegInfo();
+  TM = &Fn.getTarget();
+  TRI = TM->getRegisterInfo();
+  TII = TM->getInstrInfo();
+
+  UsedInInstr.resize(TRI->getNumRegs());
+  Allocatable = TRI->getAllocatableSet(*MF);
+
+  // initialize the virtual->physical register map to have a 'null'
+  // mapping for all virtual registers
+  StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
+
+  // Loop over all of the basic blocks, eliminating virtual register references
+  for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
+       MBBi != MBBe; ++MBBi) {
+    MBB = &*MBBi;
+    AllocateBasicBlock();
+  }
+
+  // Make sure the set of used physregs is closed under subreg operations.
+  MRI->closePhysRegsUsed(*TRI);
+
+  // Add the clobber lists for all the instructions we skipped earlier.
+  for (SmallPtrSet<const TargetInstrDesc*, 4>::const_iterator
+       I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
+    if (const unsigned *Defs = (*I)->getImplicitDefs())
+      while (*Defs)
+        MRI->setPhysRegUsed(*Defs++);
+
+  SkippedInstrs.clear();
+  StackSlotForVirtReg.clear();
+  LiveDbgValueMap.clear();
+  return true;
+}
+
+FunctionPass *llvm::createFastRegisterAllocator() {
+  return new RAFast();
+}
diff --git a/final/lib/CodeGen/RegAllocGreedy.cpp b/final/lib/CodeGen/RegAllocGreedy.cpp
new file mode 100644
index 00000000000..917e64049c6
--- /dev/null
+++ b/final/lib/CodeGen/RegAllocGreedy.cpp
@@ -0,0 +1,1286 @@
+//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RAGreedy function pass for register allocation in
+// optimized builds.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "AllocationOrder.h"
+#include "LiveIntervalUnion.h"
+#include "LiveRangeEdit.h"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "SpillPlacement.h"
+#include "SplitKit.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+#include <queue>
+
+using namespace llvm;
+
+STATISTIC(NumGlobalSplits, "Number of split global live ranges");
+STATISTIC(NumLocalSplits,  "Number of split local live ranges");
+STATISTIC(NumReassigned,   "Number of interferences reassigned");
+STATISTIC(NumEvicted,      "Number of interferences evicted");
+
+static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
+                                       createGreedyRegisterAllocator);
+
+namespace {
+class RAGreedy : public MachineFunctionPass, public RegAllocBase {
+  // context
+  MachineFunction *MF;
+  BitVector ReservedRegs;
+
+  // analyses
+  SlotIndexes *Indexes;
+  LiveStacks *LS;
+  MachineDominatorTree *DomTree;
+  MachineLoopInfo *Loops;
+  MachineLoopRanges *LoopRanges;
+  EdgeBundles *Bundles;
+  SpillPlacement *SpillPlacer;
+
+  // state
+  std::auto_ptr<Spiller> SpillerInstance;
+  std::priority_queue<std::pair<unsigned, unsigned> > Queue;
+
+  // Live ranges pass through a number of stages as we try to allocate them.
+  // Some of the stages may also create new live ranges:
+  //
+  // - Region splitting.
+  // - Per-block splitting.
+  // - Local splitting.
+  // - Spilling.
+  //
+  // Ranges produced by one of the stages skip the previous stages when they are
+  // dequeued. This improves performance because we can skip interference checks
+  // that are unlikely to give any results. It also guarantees that the live
+  // range splitting algorithm terminates, something that is otherwise hard to
+  // ensure.
+  enum LiveRangeStage {
+    RS_Original, ///< Never seen before, never split.
+    RS_Second,   ///< Second time in the queue.
+    RS_Region,   ///< Produced by region splitting.
+    RS_Block,    ///< Produced by per-block splitting.
+    RS_Local,    ///< Produced by local splitting.
+    RS_Spill     ///< Produced by spilling.
+  };
+
+  IndexedMap<unsigned char, VirtReg2IndexFunctor> LRStage;
+
+  LiveRangeStage getStage(const LiveInterval &VirtReg) const {
+    return LiveRangeStage(LRStage[VirtReg.reg]);
+  }
+
+  template<typename Iterator>
+  void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
+    LRStage.resize(MRI->getNumVirtRegs());
+    for (;Begin != End; ++Begin)
+      LRStage[(*Begin)->reg] = NewStage;
+  }
+
+  // splitting state.
+  std::auto_ptr<SplitAnalysis> SA;
+  std::auto_ptr<SplitEditor> SE;
+
+  /// All basic blocks where the current register is live.
+  SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints;
+
+  typedef std::pair<SlotIndex, SlotIndex> IndexPair;
+
+  /// Global live range splitting candidate info.
+  struct GlobalSplitCandidate {
+    unsigned PhysReg;
+    SmallVector<IndexPair, 8> Interference;
+    BitVector LiveBundles;
+  };
+
+  /// Candidate info for for each PhysReg in AllocationOrder.
+  /// This vector never shrinks, but grows to the size of the largest register
+  /// class.
+  SmallVector<GlobalSplitCandidate, 32> GlobalCand;
+
+  /// For every instruction in SA->UseSlots, store the previous non-copy
+  /// instruction.
+  SmallVector<SlotIndex, 8> PrevSlot;
+
+public:
+  RAGreedy();
+
+  /// Return the pass name.
+  virtual const char* getPassName() const {
+    return "Greedy Register Allocator";
+  }
+
+  /// RAGreedy analysis usage.
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+  virtual void releaseMemory();
+  virtual Spiller &spiller() { return *SpillerInstance; }
+  virtual void enqueue(LiveInterval *LI);
+  virtual LiveInterval *dequeue();
+  virtual unsigned selectOrSplit(LiveInterval&,
+                                 SmallVectorImpl<LiveInterval*>&);
+
+  /// Perform register allocation.
+  virtual bool runOnMachineFunction(MachineFunction &mf);
+
+  static char ID;
+
+private:
+  bool checkUncachedInterference(LiveInterval&, unsigned);
+  LiveInterval *getSingleInterference(LiveInterval&, unsigned);
+  bool reassignVReg(LiveInterval &InterferingVReg, unsigned OldPhysReg);
+
+  void mapGlobalInterference(unsigned, SmallVectorImpl<IndexPair>&);
+  float calcSplitConstraints(const SmallVectorImpl<IndexPair>&);
+
+  float calcGlobalSplitCost(const BitVector&);
+  void splitAroundRegion(LiveInterval&, unsigned, const BitVector&,
+                         SmallVectorImpl<LiveInterval*>&);
+  void calcGapWeights(unsigned, SmallVectorImpl<float>&);
+  SlotIndex getPrevMappedIndex(const MachineInstr*);
+  void calcPrevSlots();
+  unsigned nextSplitPoint(unsigned);
+  bool canEvictInterference(LiveInterval&, unsigned, float&);
+
+  unsigned tryReassign(LiveInterval&, AllocationOrder&,
+                              SmallVectorImpl<LiveInterval*>&);
+  unsigned tryEvict(LiveInterval&, AllocationOrder&,
+                    SmallVectorImpl<LiveInterval*>&);
+  unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
+                          SmallVectorImpl<LiveInterval*>&);
+  unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
+    SmallVectorImpl<LiveInterval*>&);
+  unsigned trySplit(LiveInterval&, AllocationOrder&,
+                    SmallVectorImpl<LiveInterval*>&);
+};
+} // end anonymous namespace
+
+char RAGreedy::ID = 0;
+
+FunctionPass* llvm::createGreedyRegisterAllocator() {
+  return new RAGreedy();
+}
+
+RAGreedy::RAGreedy(): MachineFunctionPass(ID), LRStage(RS_Original) {
+  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+  initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+  initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+  initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+  initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+  initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+  initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+  initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+  initializeMachineLoopRangesPass(*PassRegistry::getPassRegistry());
+  initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+  initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
+  initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
+}
+
+void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<LiveIntervals>();
+  AU.addRequired<SlotIndexes>();
+  AU.addPreserved<SlotIndexes>();
+  if (StrongPHIElim)
+    AU.addRequiredID(StrongPHIEliminationID);
+  AU.addRequiredTransitive<RegisterCoalescer>();
+  AU.addRequired<CalculateSpillWeights>();
+  AU.addRequired<LiveStacks>();
+  AU.addPreserved<LiveStacks>();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addPreserved<MachineDominatorTree>();
+  AU.addRequired<MachineLoopInfo>();
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addRequired<MachineLoopRanges>();
+  AU.addPreserved<MachineLoopRanges>();
+  AU.addRequired<VirtRegMap>();
+  AU.addPreserved<VirtRegMap>();
+  AU.addRequired<EdgeBundles>();
+  AU.addRequired<SpillPlacement>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RAGreedy::releaseMemory() {
+  SpillerInstance.reset(0);
+  LRStage.clear();
+  RegAllocBase::releaseMemory();
+}
+
+void RAGreedy::enqueue(LiveInterval *LI) {
+  // Prioritize live ranges by size, assigning larger ranges first.
+  // The queue holds (size, reg) pairs.
+  const unsigned Size = LI->getSize();
+  const unsigned Reg = LI->reg;
+  assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+         "Can only enqueue virtual registers");
+  unsigned Prio;
+
+  LRStage.grow(Reg);
+  if (LRStage[Reg] == RS_Original)
+    // 1st generation ranges are handled first, long -> short.
+    Prio = (1u << 31) + Size;
+  else
+    // Repeat offenders are handled second, short -> long
+    Prio = (1u << 30) - Size;
+
+  // Boost ranges that have a physical register hint.
+  const unsigned Hint = VRM->getRegAllocPref(Reg);
+  if (TargetRegisterInfo::isPhysicalRegister(Hint))
+    Prio |= (1u << 30);
+
+  Queue.push(std::make_pair(Prio, Reg));
+}
+
+LiveInterval *RAGreedy::dequeue() {
+  if (Queue.empty())
+    return 0;
+  LiveInterval *LI = &LIS->getInterval(Queue.top().second);
+  Queue.pop();
+  return LI;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Register Reassignment
+//===----------------------------------------------------------------------===//
+
+// Check interference without using the cache.
+bool RAGreedy::checkUncachedInterference(LiveInterval &VirtReg,
+                                         unsigned PhysReg) {
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+    LiveIntervalUnion::Query subQ(&VirtReg, &PhysReg2LiveUnion[*AliasI]);
+    if (subQ.checkInterference())
+      return true;
+  }
+  return false;
+}
+
+/// getSingleInterference - Return the single interfering virtual register
+/// assigned to PhysReg. Return 0 if more than one virtual register is
+/// interfering.
+LiveInterval *RAGreedy::getSingleInterference(LiveInterval &VirtReg,
+                                              unsigned PhysReg) {
+  // Check physreg and aliases.
+  LiveInterval *Interference = 0;
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+    LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+    if (Q.checkInterference()) {
+      if (Interference)
+        return 0;
+      if (Q.collectInterferingVRegs(2) > 1)
+        return 0;
+      Interference = Q.interferingVRegs().front();
+    }
+  }
+  return Interference;
+}
+
+// Attempt to reassign this virtual register to a different physical register.
+//
+// FIXME: we are not yet caching these "second-level" interferences discovered
+// in the sub-queries. These interferences can change with each call to
+// selectOrSplit. However, we could implement a "may-interfere" cache that
+// could be conservatively dirtied when we reassign or split.
+//
+// FIXME: This may result in a lot of alias queries. We could summarize alias
+// live intervals in their parent register's live union, but it's messy.
+bool RAGreedy::reassignVReg(LiveInterval &InterferingVReg,
+                            unsigned WantedPhysReg) {
+  assert(TargetRegisterInfo::isVirtualRegister(InterferingVReg.reg) &&
+         "Can only reassign virtual registers");
+  assert(TRI->regsOverlap(WantedPhysReg, VRM->getPhys(InterferingVReg.reg)) &&
+         "inconsistent phys reg assigment");
+
+  AllocationOrder Order(InterferingVReg.reg, *VRM, ReservedRegs);
+  while (unsigned PhysReg = Order.next()) {
+    // Don't reassign to a WantedPhysReg alias.
+    if (TRI->regsOverlap(PhysReg, WantedPhysReg))
+      continue;
+
+    if (checkUncachedInterference(InterferingVReg, PhysReg))
+      continue;
+
+    // Reassign the interfering virtual reg to this physical reg.
+    unsigned OldAssign = VRM->getPhys(InterferingVReg.reg);
+    DEBUG(dbgs() << "reassigning: " << InterferingVReg << " from " <<
+          TRI->getName(OldAssign) << " to " << TRI->getName(PhysReg) << '\n');
+    unassign(InterferingVReg, OldAssign);
+    assign(InterferingVReg, PhysReg);
+    ++NumReassigned;
+    return true;
+  }
+  return false;
+}
+
+/// tryReassign - Try to reassign a single interference to a different physreg.
+/// @param  VirtReg Currently unassigned virtual register.
+/// @param  Order   Physregs to try.
+/// @return         Physreg to assign VirtReg, or 0.
+unsigned RAGreedy::tryReassign(LiveInterval &VirtReg, AllocationOrder &Order,
+                               SmallVectorImpl<LiveInterval*> &NewVRegs){
+  NamedRegionTimer T("Reassign", TimerGroupName, TimePassesIsEnabled);
+
+  Order.rewind();
+  while (unsigned PhysReg = Order.next()) {
+    LiveInterval *InterferingVReg = getSingleInterference(VirtReg, PhysReg);
+    if (!InterferingVReg)
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(InterferingVReg->reg))
+      continue;
+    if (reassignVReg(*InterferingVReg, PhysReg))
+      return PhysReg;
+  }
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Interference eviction
+//===----------------------------------------------------------------------===//
+
+/// canEvict - Return true if all interferences between VirtReg and PhysReg can
+/// be evicted. Set maxWeight to the maximal spill weight of an interference.
+bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+                                    float &MaxWeight) {
+  float Weight = 0;
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+    LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+    // If there is 10 or more interferences, chances are one is smaller.
+    if (Q.collectInterferingVRegs(10) >= 10)
+      return false;
+
+    // Check if any interfering live range is heavier than VirtReg.
+    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
+      LiveInterval *Intf = Q.interferingVRegs()[i];
+      if (TargetRegisterInfo::isPhysicalRegister(Intf->reg))
+        return false;
+      if (Intf->weight >= VirtReg.weight)
+        return false;
+      Weight = std::max(Weight, Intf->weight);
+    }
+  }
+  MaxWeight = Weight;
+  return true;
+}
+
+/// tryEvict - Try to evict all interferences for a physreg.
+/// @param  VirtReg Currently unassigned virtual register.
+/// @param  Order   Physregs to try.
+/// @return         Physreg to assign VirtReg, or 0.
+unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
+                            AllocationOrder &Order,
+                            SmallVectorImpl<LiveInterval*> &NewVRegs){
+  NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
+
+  // Keep track of the lightest single interference seen so far.
+  float BestWeight = 0;
+  unsigned BestPhys = 0;
+
+  Order.rewind();
+  while (unsigned PhysReg = Order.next()) {
+    float Weight = 0;
+    if (!canEvictInterference(VirtReg, PhysReg, Weight))
+      continue;
+
+    // This is an eviction candidate.
+    DEBUG(dbgs() << "max " << PrintReg(PhysReg, TRI) << " interference = "
+                 << Weight << '\n');
+    if (BestPhys && Weight >= BestWeight)
+      continue;
+
+    // Best so far.
+    BestPhys = PhysReg;
+    BestWeight = Weight;
+    // Stop if the hint can be used.
+    if (Order.isHint(PhysReg))
+      break;
+  }
+
+  if (!BestPhys)
+    return 0;
+
+  DEBUG(dbgs() << "evicting " << PrintReg(BestPhys, TRI) << " interference\n");
+  for (const unsigned *AliasI = TRI->getOverlaps(BestPhys); *AliasI; ++AliasI) {
+    LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+    assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
+    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
+      LiveInterval *Intf = Q.interferingVRegs()[i];
+      unassign(*Intf, VRM->getPhys(Intf->reg));
+      ++NumEvicted;
+      NewVRegs.push_back(Intf);
+    }
+  }
+  return BestPhys;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                              Region Splitting
+//===----------------------------------------------------------------------===//
+
+/// mapGlobalInterference - Compute a map of the interference from PhysReg and
+/// its aliases in each block in SA->LiveBlocks.
+/// If LiveBlocks[i] is live-in, Ranges[i].first is the first interference.
+/// If LiveBlocks[i] is live-out, Ranges[i].second is the last interference.
+void RAGreedy::mapGlobalInterference(unsigned PhysReg,
+                                     SmallVectorImpl<IndexPair> &Ranges) {
+  Ranges.assign(SA->LiveBlocks.size(), IndexPair());
+  LiveInterval &VirtReg = const_cast<LiveInterval&>(SA->getParent());
+  for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+    if (!query(VirtReg, *AI).checkInterference())
+      continue;
+    LiveIntervalUnion::SegmentIter IntI =
+      PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex());
+    if (!IntI.valid())
+      continue;
+    for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+      const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+      IndexPair &IP = Ranges[i];
+
+      // Skip interference-free blocks.
+      if (IntI.start() >= BI.Stop)
+        continue;
+
+      // First interference in block.
+      if (BI.LiveIn) {
+        IntI.advanceTo(BI.Start);
+        if (!IntI.valid())
+          break;
+        if (IntI.start() >= BI.Stop)
+          continue;
+        if (!IP.first.isValid() || IntI.start() < IP.first)
+          IP.first = IntI.start();
+      }
+
+      // Last interference in block.
+      if (BI.LiveOut) {
+        IntI.advanceTo(BI.Stop);
+        if (!IntI.valid() || IntI.start() >= BI.Stop)
+          --IntI;
+        if (IntI.stop() <= BI.Start)
+          continue;
+        if (!IP.second.isValid() || IntI.stop() > IP.second)
+          IP.second = IntI.stop();
+      }
+    }
+  }
+}
+
+/// calcSplitConstraints - Fill out the SplitConstraints vector based on the
+/// interference pattern in Intf. Return the static cost of this split,
+/// assuming that all preferences in SplitConstraints are met.
+float RAGreedy::calcSplitConstraints(const SmallVectorImpl<IndexPair> &Intf) {
+  // Reset interference dependent info.
+  SplitConstraints.resize(SA->LiveBlocks.size());
+  float StaticCost = 0;
+  for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+    SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+    SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+    IndexPair IP = Intf[i];
+
+    BC.Number = BI.MBB->getNumber();
+    BC.Entry = (BI.Uses && BI.LiveIn) ?
+      SpillPlacement::PrefReg : SpillPlacement::DontCare;
+    BC.Exit = (BI.Uses && BI.LiveOut) ?
+      SpillPlacement::PrefReg : SpillPlacement::DontCare;
+
+    // Number of spill code instructions to insert.
+    unsigned Ins = 0;
+
+    // Interference for the live-in value.
+    if (IP.first.isValid()) {
+      if (IP.first <= BI.Start)
+        BC.Entry = SpillPlacement::MustSpill, Ins += BI.Uses;
+      else if (!BI.Uses)
+        BC.Entry = SpillPlacement::PrefSpill;
+      else if (IP.first < BI.FirstUse)
+        BC.Entry = SpillPlacement::PrefSpill, ++Ins;
+      else if (IP.first < (BI.LiveThrough ? BI.LastUse : BI.Kill))
+        ++Ins;
+    }
+
+    // Interference for the live-out value.
+    if (IP.second.isValid()) {
+      if (IP.second >= BI.LastSplitPoint)
+        BC.Exit = SpillPlacement::MustSpill, Ins += BI.Uses;
+      else if (!BI.Uses)
+        BC.Exit = SpillPlacement::PrefSpill;
+      else if (IP.second > BI.LastUse)
+        BC.Exit = SpillPlacement::PrefSpill, ++Ins;
+      else if (IP.second > (BI.LiveThrough ? BI.FirstUse : BI.Def))
+        ++Ins;
+    }
+
+    // Accumulate the total frequency of inserted spill code.
+    if (Ins)
+      StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
+  }
+  return StaticCost;
+}
+
+
+/// calcGlobalSplitCost - Return the global split cost of following the split
+/// pattern in LiveBundles. This cost should be added to the local cost of the
+/// interference pattern in SplitConstraints.
+///
+float RAGreedy::calcGlobalSplitCost(const BitVector &LiveBundles) {
+  float GlobalCost = 0;
+  for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+    SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+    SpillPlacement::BlockConstraint &BC = SplitConstraints[i];
+    bool RegIn  = LiveBundles[Bundles->getBundle(BC.Number, 0)];
+    bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)];
+    unsigned Ins = 0;
+
+    if (!BI.Uses)
+      Ins += RegIn != RegOut;
+    else {
+      if (BI.LiveIn)
+        Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg);
+      if (BI.LiveOut)
+        Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg);
+    }
+    if (Ins)
+      GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number);
+  }
+  return GlobalCost;
+}
+
+/// splitAroundRegion - Split VirtReg around the region determined by
+/// LiveBundles. Make an effort to avoid interference from PhysReg.
+///
+/// The 'register' interval is going to contain as many uses as possible while
+/// avoiding interference. The 'stack' interval is the complement constructed by
+/// SplitEditor. It will contain the rest.
+///
+void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
+                                 const BitVector &LiveBundles,
+                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  DEBUG({
+    dbgs() << "Splitting around region for " << PrintReg(PhysReg, TRI)
+           << " with bundles";
+    for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i))
+      dbgs() << " EB#" << i;
+    dbgs() << ".\n";
+  });
+
+  // First compute interference ranges in the live blocks.
+  SmallVector<IndexPair, 8> InterferenceRanges;
+  mapGlobalInterference(PhysReg, InterferenceRanges);
+
+  LiveRangeEdit LREdit(VirtReg, NewVRegs);
+  SE->reset(LREdit);
+
+  // Create the main cross-block interval.
+  SE->openIntv();
+
+  // First add all defs that are live out of a block.
+  for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+    SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+    bool RegIn  = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
+    bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
+
+    // Should the register be live out?
+    if (!BI.LiveOut || !RegOut)
+      continue;
+
+    IndexPair &IP = InterferenceRanges[i];
+    DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#"
+                 << Bundles->getBundle(BI.MBB->getNumber(), 1)
+                 << " intf [" << IP.first << ';' << IP.second << ')');
+
+    // The interference interval should either be invalid or overlap MBB.
+    assert((!IP.first.isValid() || IP.first < BI.Stop) && "Bad interference");
+    assert((!IP.second.isValid() || IP.second > BI.Start)
+           && "Bad interference");
+
+    // Check interference leaving the block.
+    if (!IP.second.isValid()) {
+      // Block is interference-free.
+      DEBUG(dbgs() << ", no interference");
+      if (!BI.Uses) {
+        assert(BI.LiveThrough && "No uses, but not live through block?");
+        // Block is live-through without interference.
+        DEBUG(dbgs() << ", no uses"
+                     << (RegIn ? ", live-through.\n" : ", stack in.\n"));
+        if (!RegIn)
+          SE->enterIntvAtEnd(*BI.MBB);
+        continue;
+      }
+      if (!BI.LiveThrough) {
+        DEBUG(dbgs() << ", not live-through.\n");
+        SE->useIntv(SE->enterIntvBefore(BI.Def), BI.Stop);
+        continue;
+      }
+      if (!RegIn) {
+        // Block is live-through, but entry bundle is on the stack.
+        // Reload just before the first use.
+        DEBUG(dbgs() << ", not live-in, enter before first use.\n");
+        SE->useIntv(SE->enterIntvBefore(BI.FirstUse), BI.Stop);
+        continue;
+      }
+      DEBUG(dbgs() << ", live-through.\n");
+      continue;
+    }
+
+    // Block has interference.
+    DEBUG(dbgs() << ", interference to " << IP.second);
+
+    if (!BI.LiveThrough && IP.second <= BI.Def) {
+      // The interference doesn't reach the outgoing segment.
+      DEBUG(dbgs() << " doesn't affect def from " << BI.Def << '\n');
+      SE->useIntv(BI.Def, BI.Stop);
+      continue;
+    }
+
+
+    if (!BI.Uses) {
+      // No uses in block, avoid interference by reloading as late as possible.
+      DEBUG(dbgs() << ", no uses.\n");
+      SlotIndex SegStart = SE->enterIntvAtEnd(*BI.MBB);
+      assert(SegStart >= IP.second && "Couldn't avoid interference");
+      continue;
+    }
+
+    if (IP.second.getBoundaryIndex() < BI.LastUse) {
+      // There are interference-free uses at the end of the block.
+      // Find the first use that can get the live-out register.
+      SmallVectorImpl<SlotIndex>::const_iterator UI =
+        std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(),
+                         IP.second.getBoundaryIndex());
+      assert(UI != SA->UseSlots.end() && "Couldn't find last use");
+      SlotIndex Use = *UI;
+      assert(Use <= BI.LastUse && "Couldn't find last use");
+      // Only attempt a split befroe the last split point.
+      if (Use.getBaseIndex() <= BI.LastSplitPoint) {
+        DEBUG(dbgs() << ", free use at " << Use << ".\n");
+        SlotIndex SegStart = SE->enterIntvBefore(Use);
+        assert(SegStart >= IP.second && "Couldn't avoid interference");
+        assert(SegStart < BI.LastSplitPoint && "Impossible split point");
+        SE->useIntv(SegStart, BI.Stop);
+        continue;
+      }
+    }
+
+    // Interference is after the last use.
+    DEBUG(dbgs() << " after last use.\n");
+    SlotIndex SegStart = SE->enterIntvAtEnd(*BI.MBB);
+    assert(SegStart >= IP.second && "Couldn't avoid interference");
+  }
+
+  // Now all defs leading to live bundles are handled, do everything else.
+  for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+    SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+    bool RegIn  = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
+    bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
+
+    // Is the register live-in?
+    if (!BI.LiveIn || !RegIn)
+      continue;
+
+    // We have an incoming register. Check for interference.
+    IndexPair &IP = InterferenceRanges[i];
+
+    DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0)
+                 << " -> BB#" << BI.MBB->getNumber());
+
+    // Check interference entering the block.
+    if (!IP.first.isValid()) {
+      // Block is interference-free.
+      DEBUG(dbgs() << ", no interference");
+      if (!BI.Uses) {
+        assert(BI.LiveThrough && "No uses, but not live through block?");
+        // Block is live-through without interference.
+        if (RegOut) {
+          DEBUG(dbgs() << ", no uses, live-through.\n");
+          SE->useIntv(BI.Start, BI.Stop);
+        } else {
+          DEBUG(dbgs() << ", no uses, stack-out.\n");
+          SE->leaveIntvAtTop(*BI.MBB);
+        }
+        continue;
+      }
+      if (!BI.LiveThrough) {
+        DEBUG(dbgs() << ", killed in block.\n");
+        SE->useIntv(BI.Start, SE->leaveIntvAfter(BI.Kill));
+        continue;
+      }
+      if (!RegOut) {
+        // Block is live-through, but exit bundle is on the stack.
+        // Spill immediately after the last use.
+        if (BI.LastUse < BI.LastSplitPoint) {
+          DEBUG(dbgs() << ", uses, stack-out.\n");
+          SE->useIntv(BI.Start, SE->leaveIntvAfter(BI.LastUse));
+          continue;
+        }
+        // The last use is after the last split point, it is probably an
+        // indirect jump.
+        DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point "
+                     << BI.LastSplitPoint << ", stack-out.\n");
+        SlotIndex SegEnd = SE->leaveIntvBefore(BI.LastSplitPoint);
+        SE->useIntv(BI.Start, SegEnd);
+        // Run a double interval from the split to the last use.
+        // This makes it possible to spill the complement without affecting the
+        // indirect branch.
+        SE->overlapIntv(SegEnd, BI.LastUse);
+        continue;
+      }
+      // Register is live-through.
+      DEBUG(dbgs() << ", uses, live-through.\n");
+      SE->useIntv(BI.Start, BI.Stop);
+      continue;
+    }
+
+    // Block has interference.
+    DEBUG(dbgs() << ", interference from " << IP.first);
+
+    if (!BI.LiveThrough && IP.first >= BI.Kill) {
+      // The interference doesn't reach the outgoing segment.
+      DEBUG(dbgs() << " doesn't affect kill at " << BI.Kill << '\n');
+      SE->useIntv(BI.Start, BI.Kill);
+      continue;
+    }
+
+    if (!BI.Uses) {
+      // No uses in block, avoid interference by spilling as soon as possible.
+      DEBUG(dbgs() << ", no uses.\n");
+      SlotIndex SegEnd = SE->leaveIntvAtTop(*BI.MBB);
+      assert(SegEnd <= IP.first && "Couldn't avoid interference");
+      continue;
+    }
+    if (IP.first.getBaseIndex() > BI.FirstUse) {
+      // There are interference-free uses at the beginning of the block.
+      // Find the last use that can get the register.
+      SmallVectorImpl<SlotIndex>::const_iterator UI =
+        std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(),
+                         IP.first.getBaseIndex());
+      assert(UI != SA->UseSlots.begin() && "Couldn't find first use");
+      SlotIndex Use = (--UI)->getBoundaryIndex();
+      DEBUG(dbgs() << ", free use at " << *UI << ".\n");
+      SlotIndex SegEnd = SE->leaveIntvAfter(Use);
+      assert(SegEnd <= IP.first && "Couldn't avoid interference");
+      SE->useIntv(BI.Start, SegEnd);
+      continue;
+    }
+
+    // Interference is before the first use.
+    DEBUG(dbgs() << " before first use.\n");
+    SlotIndex SegEnd = SE->leaveIntvAtTop(*BI.MBB);
+    assert(SegEnd <= IP.first && "Couldn't avoid interference");
+  }
+
+  SE->closeIntv();
+
+  // FIXME: Should we be more aggressive about splitting the stack region into
+  // per-block segments? The current approach allows the stack region to
+  // separate into connected components. Some components may be allocatable.
+  SE->finish();
+  ++NumGlobalSplits;
+
+  if (VerifyEnabled) {
+    MF->verify(this, "After splitting live range around region");
+
+#ifndef NDEBUG
+    // Make sure that at least one of the new intervals can allocate to PhysReg.
+    // That was the whole point of splitting the live range.
+    bool found = false;
+    for (LiveRangeEdit::iterator I = LREdit.begin(), E = LREdit.end(); I != E;
+         ++I)
+      if (!checkUncachedInterference(**I, PhysReg)) {
+        found = true;
+        break;
+      }
+    assert(found && "No allocatable intervals after pointless splitting");
+#endif
+  }
+}
+
+unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+                                  SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  BitVector LiveBundles, BestBundles;
+  float BestCost = 0;
+  unsigned BestReg = 0;
+
+  Order.rewind();
+  for (unsigned Cand = 0; unsigned PhysReg = Order.next(); ++Cand) {
+    if (GlobalCand.size() <= Cand)
+      GlobalCand.resize(Cand+1);
+    GlobalCand[Cand].PhysReg = PhysReg;
+
+    mapGlobalInterference(PhysReg, GlobalCand[Cand].Interference);
+    float Cost = calcSplitConstraints(GlobalCand[Cand].Interference);
+    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost);
+    if (BestReg && Cost >= BestCost) {
+      DEBUG(dbgs() << " higher.\n");
+      continue;
+    }
+
+    SpillPlacer->placeSpills(SplitConstraints, LiveBundles);
+    // No live bundles, defer to splitSingleBlocks().
+    if (!LiveBundles.any()) {
+      DEBUG(dbgs() << " no bundles.\n");
+      continue;
+    }
+
+    Cost += calcGlobalSplitCost(LiveBundles);
+    DEBUG({
+      dbgs() << ", total = " << Cost << " with bundles";
+      for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i))
+        dbgs() << " EB#" << i;
+      dbgs() << ".\n";
+    });
+    if (!BestReg || Cost < BestCost) {
+      BestReg = PhysReg;
+      BestCost = 0.98f * Cost; // Prevent rounding effects.
+      BestBundles.swap(LiveBundles);
+    }
+  }
+
+  if (!BestReg)
+    return 0;
+
+  splitAroundRegion(VirtReg, BestReg, BestBundles, NewVRegs);
+  setStage(NewVRegs.begin(), NewVRegs.end(), RS_Region);
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                             Local Splitting
+//===----------------------------------------------------------------------===//
+
+
+/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
+/// in order to use PhysReg between two entries in SA->UseSlots.
+///
+/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
+///
+void RAGreedy::calcGapWeights(unsigned PhysReg,
+                              SmallVectorImpl<float> &GapWeight) {
+  assert(SA->LiveBlocks.size() == 1 && "Not a local interval");
+  const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front();
+  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  const unsigned NumGaps = Uses.size()-1;
+
+  // Start and end points for the interference check.
+  SlotIndex StartIdx = BI.LiveIn ? BI.FirstUse.getBaseIndex() : BI.FirstUse;
+  SlotIndex StopIdx = BI.LiveOut ? BI.LastUse.getBoundaryIndex() : BI.LastUse;
+
+  GapWeight.assign(NumGaps, 0.0f);
+
+  // Add interference from each overlapping register.
+  for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+    if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI)
+           .checkInterference())
+      continue;
+
+    // We know that VirtReg is a continuous interval from FirstUse to LastUse,
+    // so we don't need InterferenceQuery.
+    //
+    // Interference that overlaps an instruction is counted in both gaps
+    // surrounding the instruction. The exception is interference before
+    // StartIdx and after StopIdx.
+    //
+    LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx);
+    for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
+      // Skip the gaps before IntI.
+      while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
+        if (++Gap == NumGaps)
+          break;
+      if (Gap == NumGaps)
+        break;
+
+      // Update the gaps covered by IntI.
+      const float weight = IntI.value()->weight;
+      for (; Gap != NumGaps; ++Gap) {
+        GapWeight[Gap] = std::max(GapWeight[Gap], weight);
+        if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
+          break;
+      }
+      if (Gap == NumGaps)
+        break;
+    }
+  }
+}
+
+/// getPrevMappedIndex - Return the slot index of the last non-copy instruction
+/// before MI that has a slot index. If MI is the first mapped instruction in
+/// its block, return the block start index instead.
+///
+SlotIndex RAGreedy::getPrevMappedIndex(const MachineInstr *MI) {
+  assert(MI && "Missing MachineInstr");
+  const MachineBasicBlock *MBB = MI->getParent();
+  MachineBasicBlock::const_iterator B = MBB->begin(), I = MI;
+  while (I != B)
+    if (!(--I)->isDebugValue() && !I->isCopy())
+      return Indexes->getInstructionIndex(I);
+  return Indexes->getMBBStartIdx(MBB);
+}
+
+/// calcPrevSlots - Fill in the PrevSlot array with the index of the previous
+/// real non-copy instruction for each instruction in SA->UseSlots.
+///
+void RAGreedy::calcPrevSlots() {
+  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  PrevSlot.clear();
+  PrevSlot.reserve(Uses.size());
+  for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+    const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]);
+    PrevSlot.push_back(getPrevMappedIndex(MI).getDefIndex());
+  }
+}
+
+/// nextSplitPoint - Find the next index into SA->UseSlots > i such that it may
+/// be beneficial to split before UseSlots[i].
+///
+/// 0 is always a valid split point
+unsigned RAGreedy::nextSplitPoint(unsigned i) {
+  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  const unsigned Size = Uses.size();
+  assert(i != Size && "No split points after the end");
+  // Allow split before i when Uses[i] is not adjacent to the previous use.
+  while (++i != Size && PrevSlot[i].getBaseIndex() <= Uses[i-1].getBaseIndex())
+    ;
+  return i;
+}
+
+/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
+/// basic block.
+///
+unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  assert(SA->LiveBlocks.size() == 1 && "Not a local interval");
+  const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front();
+
+  // Note that it is possible to have an interval that is live-in or live-out
+  // while only covering a single block - A phi-def can use undef values from
+  // predecessors, and the block could be a single-block loop.
+  // We don't bother doing anything clever about such a case, we simply assume
+  // that the interval is continuous from FirstUse to LastUse. We should make
+  // sure that we don't do anything illegal to such an interval, though.
+
+  const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+  if (Uses.size() <= 2)
+    return 0;
+  const unsigned NumGaps = Uses.size()-1;
+
+  DEBUG({
+    dbgs() << "tryLocalSplit: ";
+    for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+      dbgs() << ' ' << SA->UseSlots[i];
+    dbgs() << '\n';
+  });
+
+  // For every use, find the previous mapped non-copy instruction.
+  // We use this to detect valid split points, and to estimate new interval
+  // sizes.
+  calcPrevSlots();
+
+  unsigned BestBefore = NumGaps;
+  unsigned BestAfter = 0;
+  float BestDiff = 0;
+
+  const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber());
+  SmallVector<float, 8> GapWeight;
+
+  Order.rewind();
+  while (unsigned PhysReg = Order.next()) {
+    // Keep track of the largest spill weight that would need to be evicted in
+    // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
+    calcGapWeights(PhysReg, GapWeight);
+
+    // Try to find the best sequence of gaps to close.
+    // The new spill weight must be larger than any gap interference.
+
+    // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
+    unsigned SplitBefore = 0, SplitAfter = nextSplitPoint(1) - 1;
+
+    // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
+    // It is the spill weight that needs to be evicted.
+    float MaxGap = GapWeight[0];
+    for (unsigned i = 1; i != SplitAfter; ++i)
+      MaxGap = std::max(MaxGap, GapWeight[i]);
+
+    for (;;) {
+      // Live before/after split?
+      const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
+      const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
+
+      DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
+                   << Uses[SplitBefore] << '-' << Uses[SplitAfter]
+                   << " i=" << MaxGap);
+
+      // Stop before the interval gets so big we wouldn't be making progress.
+      if (!LiveBefore && !LiveAfter) {
+        DEBUG(dbgs() << " all\n");
+        break;
+      }
+      // Should the interval be extended or shrunk?
+      bool Shrink = true;
+      if (MaxGap < HUGE_VALF) {
+        // Estimate the new spill weight.
+        //
+        // Each instruction reads and writes the register, except the first
+        // instr doesn't read when !FirstLive, and the last instr doesn't write
+        // when !LastLive.
+        //
+        // We will be inserting copies before and after, so the total number of
+        // reads and writes is 2 * EstUses.
+        //
+        const unsigned EstUses = 2*(SplitAfter - SplitBefore) +
+                                 2*(LiveBefore + LiveAfter);
+
+        // Try to guess the size of the new interval. This should be trivial,
+        // but the slot index of an inserted copy can be a lot smaller than the
+        // instruction it is inserted before if there are many dead indexes
+        // between them.
+        //
+        // We measure the distance from the instruction before SplitBefore to
+        // get a conservative estimate.
+        //
+        // The final distance can still be different if inserting copies
+        // triggers a slot index renumbering.
+        //
+        const float EstWeight = normalizeSpillWeight(blockFreq * EstUses,
+                              PrevSlot[SplitBefore].distance(Uses[SplitAfter]));
+        // Would this split be possible to allocate?
+        // Never allocate all gaps, we wouldn't be making progress.
+        float Diff = EstWeight - MaxGap;
+        DEBUG(dbgs() << " w=" << EstWeight << " d=" << Diff);
+        if (Diff > 0) {
+          Shrink = false;
+          if (Diff > BestDiff) {
+            DEBUG(dbgs() << " (best)");
+            BestDiff = Diff;
+            BestBefore = SplitBefore;
+            BestAfter = SplitAfter;
+          }
+        }
+      }
+
+      // Try to shrink.
+      if (Shrink) {
+        SplitBefore = nextSplitPoint(SplitBefore);
+        if (SplitBefore < SplitAfter) {
+          DEBUG(dbgs() << " shrink\n");
+          // Recompute the max when necessary.
+          if (GapWeight[SplitBefore - 1] >= MaxGap) {
+            MaxGap = GapWeight[SplitBefore];
+            for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
+              MaxGap = std::max(MaxGap, GapWeight[i]);
+          }
+          continue;
+        }
+        MaxGap = 0;
+      }
+
+      // Try to extend the interval.
+      if (SplitAfter >= NumGaps) {
+        DEBUG(dbgs() << " end\n");
+        break;
+      }
+
+      DEBUG(dbgs() << " extend\n");
+      for (unsigned e = nextSplitPoint(SplitAfter + 1) - 1;
+           SplitAfter != e; ++SplitAfter)
+        MaxGap = std::max(MaxGap, GapWeight[SplitAfter]);
+          continue;
+    }
+  }
+
+  // Didn't find any candidates?
+  if (BestBefore == NumGaps)
+    return 0;
+
+  DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
+               << '-' << Uses[BestAfter] << ", " << BestDiff
+               << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
+
+  LiveRangeEdit LREdit(VirtReg, NewVRegs);
+  SE->reset(LREdit);
+
+  SE->openIntv();
+  SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]);
+  SlotIndex SegStop  = SE->leaveIntvAfter(Uses[BestAfter]);
+  SE->useIntv(SegStart, SegStop);
+  SE->closeIntv();
+  SE->finish();
+  setStage(NewVRegs.begin(), NewVRegs.end(), RS_Local);
+  ++NumLocalSplits;
+
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//                          Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+/// trySplit - Try to split VirtReg or one of its interferences, making it
+/// assignable.
+/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
+unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
+                            SmallVectorImpl<LiveInterval*>&NewVRegs) {
+  // Local intervals are handled separately.
+  if (LIS->intervalIsInOneMBB(VirtReg)) {
+    NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
+    SA->analyze(&VirtReg);
+    return tryLocalSplit(VirtReg, Order, NewVRegs);
+  }
+
+  NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
+
+  // Don't iterate global splitting.
+  // Move straight to spilling if this range was produced by a global split.
+  LiveRangeStage Stage = getStage(VirtReg);
+  if (Stage >= RS_Block)
+    return 0;
+
+  SA->analyze(&VirtReg);
+
+  // First try to split around a region spanning multiple blocks.
+  if (Stage < RS_Region) {
+    unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+    if (PhysReg || !NewVRegs.empty())
+      return PhysReg;
+  }
+
+  // Then isolate blocks with multiple uses.
+  if (Stage < RS_Block) {
+    SplitAnalysis::BlockPtrSet Blocks;
+    if (SA->getMultiUseBlocks(Blocks)) {
+      LiveRangeEdit LREdit(VirtReg, NewVRegs);
+      SE->reset(LREdit);
+      SE->splitSingleBlocks(Blocks);
+      setStage(NewVRegs.begin(), NewVRegs.end(), RS_Block);
+      if (VerifyEnabled)
+        MF->verify(this, "After splitting live range around basic blocks");
+    }
+  }
+
+  // Don't assign any physregs.
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                            Main Entry Point
+//===----------------------------------------------------------------------===//
+
+unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  LiveRangeStage Stage = getStage(VirtReg);
+  if (Stage == RS_Original)
+    LRStage[VirtReg.reg] = RS_Second;
+
+  // First try assigning a free register.
+  AllocationOrder Order(VirtReg.reg, *VRM, ReservedRegs);
+  while (unsigned PhysReg = Order.next()) {
+    if (!checkPhysRegInterference(VirtReg, PhysReg))
+      return PhysReg;
+  }
+
+  if (unsigned PhysReg = tryReassign(VirtReg, Order, NewVRegs))
+    return PhysReg;
+
+  if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs))
+    return PhysReg;
+
+  assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
+
+  // The first time we see a live range, don't try to split or spill.
+  // Wait until the second time, when all smaller ranges have been allocated.
+  // This gives a better picture of the interference to split around.
+  if (Stage == RS_Original) {
+    NewVRegs.push_back(&VirtReg);
+    return 0;
+  }
+
+  assert(Stage < RS_Spill && "Cannot allocate after spilling");
+
+  // Try splitting VirtReg or interferences.
+  unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+  if (PhysReg || !NewVRegs.empty())
+    return PhysReg;
+
+  // Finally spill VirtReg itself.
+  NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+  SmallVector<LiveInterval*, 1> pendingSpills;
+  spiller().spill(&VirtReg, NewVRegs, pendingSpills);
+
+  // The live virtual register requesting allocation was spilled, so tell
+  // the caller not to allocate anything during this round.
+  return 0;
+}
+
+bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
+  DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
+               << "********** Function: "
+               << ((Value*)mf.getFunction())->getName() << '\n');
+
+  MF = &mf;
+  if (VerifyEnabled)
+    MF->verify(this, "Before greedy register allocator");
+
+  RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+  Indexes = &getAnalysis<SlotIndexes>();
+  DomTree = &getAnalysis<MachineDominatorTree>();
+  ReservedRegs = TRI->getReservedRegs(*MF);
+  SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+  Loops = &getAnalysis<MachineLoopInfo>();
+  LoopRanges = &getAnalysis<MachineLoopRanges>();
+  Bundles = &getAnalysis<EdgeBundles>();
+  SpillPlacer = &getAnalysis<SpillPlacement>();
+
+  SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
+  SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree));
+  LRStage.clear();
+  LRStage.resize(MRI->getNumVirtRegs());
+
+  allocatePhysRegs();
+  addMBBLiveIns(MF);
+  LIS->addKillFlags();
+
+  // Run rewriter
+  {
+    NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled);
+    VRM->rewrite(Indexes);
+  }
+
+  // The pass output is in VirtRegMap. Release all the transient data.
+  releaseMemory();
+
+  return true;
+}
diff --git a/final/lib/CodeGen/RegAllocLinearScan.cpp b/final/lib/CodeGen/RegAllocLinearScan.cpp
new file mode 100644
index 00000000000..a24fc80cf5e
--- /dev/null
+++ b/final/lib/CodeGen/RegAllocLinearScan.cpp
@@ -0,0 +1,1538 @@
+//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a linear scan register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveDebugVariables.h"
+#include "VirtRegMap.h"
+#include "VirtRegRewriter.h"
+#include "Spiller.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <set>
+#include <queue>
+#include <memory>
+#include <cmath>
+
+using namespace llvm;
+
+STATISTIC(NumIters     , "Number of iterations performed");
+STATISTIC(NumBacktracks, "Number of times we had to backtrack");
+STATISTIC(NumCoalesce,   "Number of copies coalesced");
+STATISTIC(NumDowngrade,  "Number of registers downgraded");
+
+static cl::opt<bool>
+NewHeuristic("new-spilling-heuristic",
+             cl::desc("Use new spilling heuristic"),
+             cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+PreSplitIntervals("pre-alloc-split",
+                  cl::desc("Pre-register allocation live interval splitting"),
+                  cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+TrivCoalesceEnds("trivial-coalesce-ends",
+                  cl::desc("Attempt trivial coalescing of interval ends"),
+                  cl::init(false), cl::Hidden);
+
+static RegisterRegAlloc
+linearscanRegAlloc("linearscan", "linear scan register allocator",
+                   createLinearScanRegisterAllocator);
+
+namespace {
+  // When we allocate a register, add it to a fixed-size queue of
+  // registers to skip in subsequent allocations. This trades a small
+  // amount of register pressure and increased spills for flexibility in
+  // the post-pass scheduler.
+  //
+  // Note that in a the number of registers used for reloading spills
+  // will be one greater than the value of this option.
+  //
+  // One big limitation of this is that it doesn't differentiate between
+  // different register classes. So on x86-64, if there is xmm register
+  // pressure, it can caused fewer GPRs to be held in the queue.
+  static cl::opt<unsigned>
+  NumRecentlyUsedRegs("linearscan-skip-count",
+                      cl::desc("Number of registers for linearscan to remember"
+                               "to skip."),
+                      cl::init(0),
+                      cl::Hidden);
+
+  struct RALinScan : public MachineFunctionPass {
+    static char ID;
+    RALinScan() : MachineFunctionPass(ID) {
+      initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+      initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+      initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+      initializeRegisterCoalescerAnalysisGroup(
+        *PassRegistry::getPassRegistry());
+      initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+      initializePreAllocSplittingPass(*PassRegistry::getPassRegistry());
+      initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+      initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+      initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+      initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+      initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+      
+      // Initialize the queue to record recently-used registers.
+      if (NumRecentlyUsedRegs > 0)
+        RecentRegs.resize(NumRecentlyUsedRegs, 0);
+      RecentNext = RecentRegs.begin();
+    }
+
+    typedef std::pair<LiveInterval*, LiveInterval::iterator> IntervalPtr;
+    typedef SmallVector<IntervalPtr, 32> IntervalPtrs;
+  private:
+    /// RelatedRegClasses - This structure is built the first time a function is
+    /// compiled, and keeps track of which register classes have registers that
+    /// belong to multiple classes or have aliases that are in other classes.
+    EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses;
+    DenseMap<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg;
+
+    // NextReloadMap - For each register in the map, it maps to the another
+    // register which is defined by a reload from the same stack slot and
+    // both reloads are in the same basic block.
+    DenseMap<unsigned, unsigned> NextReloadMap;
+
+    // DowngradedRegs - A set of registers which are being "downgraded", i.e.
+    // un-favored for allocation.
+    SmallSet<unsigned, 8> DowngradedRegs;
+
+    // DowngradeMap - A map from virtual registers to physical registers being
+    // downgraded for the virtual registers.
+    DenseMap<unsigned, unsigned> DowngradeMap;
+
+    MachineFunction* mf_;
+    MachineRegisterInfo* mri_;
+    const TargetMachine* tm_;
+    const TargetRegisterInfo* tri_;
+    const TargetInstrInfo* tii_;
+    BitVector allocatableRegs_;
+    BitVector reservedRegs_;
+    LiveIntervals* li_;
+    MachineLoopInfo *loopInfo;
+
+    /// handled_ - Intervals are added to the handled_ set in the order of their
+    /// start value.  This is uses for backtracking.
+    std::vector<LiveInterval*> handled_;
+
+    /// fixed_ - Intervals that correspond to machine registers.
+    ///
+    IntervalPtrs fixed_;
+
+    /// active_ - Intervals that are currently being processed, and which have a
+    /// live range active for the current point.
+    IntervalPtrs active_;
+
+    /// inactive_ - Intervals that are currently being processed, but which have
+    /// a hold at the current point.
+    IntervalPtrs inactive_;
+
+    typedef std::priority_queue<LiveInterval*,
+                                SmallVector<LiveInterval*, 64>,
+                                greater_ptr<LiveInterval> > IntervalHeap;
+    IntervalHeap unhandled_;
+
+    /// regUse_ - Tracks register usage.
+    SmallVector<unsigned, 32> regUse_;
+    SmallVector<unsigned, 32> regUseBackUp_;
+
+    /// vrm_ - Tracks register assignments.
+    VirtRegMap* vrm_;
+
+    std::auto_ptr<VirtRegRewriter> rewriter_;
+
+    std::auto_ptr<Spiller> spiller_;
+
+    // The queue of recently-used registers.
+    SmallVector<unsigned, 4> RecentRegs;
+    SmallVector<unsigned, 4>::iterator RecentNext;
+
+    // Record that we just picked this register.
+    void recordRecentlyUsed(unsigned reg) {
+      assert(reg != 0 && "Recently used register is NOREG!");
+      if (!RecentRegs.empty()) {
+        *RecentNext++ = reg;
+        if (RecentNext == RecentRegs.end())
+          RecentNext = RecentRegs.begin();
+      }
+    }
+
+  public:
+    virtual const char* getPassName() const {
+      return "Linear Scan Register Allocator";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<AliasAnalysis>();
+      AU.addPreserved<AliasAnalysis>();
+      AU.addRequired<LiveIntervals>();
+      AU.addPreserved<SlotIndexes>();
+      if (StrongPHIElim)
+        AU.addRequiredID(StrongPHIEliminationID);
+      // Make sure PassManager knows which analyses to make available
+      // to coalescing and which analyses coalescing invalidates.
+      AU.addRequiredTransitive<RegisterCoalescer>();
+      AU.addRequired<CalculateSpillWeights>();
+      if (PreSplitIntervals)
+        AU.addRequiredID(PreAllocSplittingID);
+      AU.addRequiredID(LiveStacksID);
+      AU.addPreservedID(LiveStacksID);
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineLoopInfo>();
+      AU.addRequired<VirtRegMap>();
+      AU.addPreserved<VirtRegMap>();
+      AU.addRequired<LiveDebugVariables>();
+      AU.addPreserved<LiveDebugVariables>();
+      AU.addRequiredID(MachineDominatorsID);
+      AU.addPreservedID(MachineDominatorsID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    /// runOnMachineFunction - register allocate the whole function
+    bool runOnMachineFunction(MachineFunction&);
+
+    // Determine if we skip this register due to its being recently used.
+    bool isRecentlyUsed(unsigned reg) const {
+      return std::find(RecentRegs.begin(), RecentRegs.end(), reg) !=
+             RecentRegs.end();
+    }
+
+  private:
+    /// linearScan - the linear scan algorithm
+    void linearScan();
+
+    /// initIntervalSets - initialize the interval sets.
+    ///
+    void initIntervalSets();
+
+    /// processActiveIntervals - expire old intervals and move non-overlapping
+    /// ones to the inactive list.
+    void processActiveIntervals(SlotIndex CurPoint);
+
+    /// processInactiveIntervals - expire old intervals and move overlapping
+    /// ones to the active list.
+    void processInactiveIntervals(SlotIndex CurPoint);
+
+    /// hasNextReloadInterval - Return the next liveinterval that's being
+    /// defined by a reload from the same SS as the specified one.
+    LiveInterval *hasNextReloadInterval(LiveInterval *cur);
+
+    /// DowngradeRegister - Downgrade a register for allocation.
+    void DowngradeRegister(LiveInterval *li, unsigned Reg);
+
+    /// UpgradeRegister - Upgrade a register for allocation.
+    void UpgradeRegister(unsigned Reg);
+
+    /// assignRegOrStackSlotAtInterval - assign a register if one
+    /// is available, or spill.
+    void assignRegOrStackSlotAtInterval(LiveInterval* cur);
+
+    void updateSpillWeights(std::vector<float> &Weights,
+                            unsigned reg, float weight,
+                            const TargetRegisterClass *RC);
+
+    /// findIntervalsToSpill - Determine the intervals to spill for the
+    /// specified interval. It's passed the physical registers whose spill
+    /// weight is the lowest among all the registers whose live intervals
+    /// conflict with the interval.
+    void findIntervalsToSpill(LiveInterval *cur,
+                            std::vector<std::pair<unsigned,float> > &Candidates,
+                            unsigned NumCands,
+                            SmallVector<LiveInterval*, 8> &SpillIntervals);
+
+    /// attemptTrivialCoalescing - If a simple interval is defined by a copy,
+    /// try to allocate the definition to the same register as the source,
+    /// if the register is not defined during the life time of the interval.
+    /// This eliminates a copy, and is used to coalesce copies which were not
+    /// coalesced away before allocation either due to dest and src being in
+    /// different register classes or because the coalescer was overly
+    /// conservative.
+    unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg);
+
+    ///
+    /// Register usage / availability tracking helpers.
+    ///
+
+    void initRegUses() {
+      regUse_.resize(tri_->getNumRegs(), 0);
+      regUseBackUp_.resize(tri_->getNumRegs(), 0);
+    }
+
+    void finalizeRegUses() {
+#ifndef NDEBUG
+      // Verify all the registers are "freed".
+      bool Error = false;
+      for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) {
+        if (regUse_[i] != 0) {
+          dbgs() << tri_->getName(i) << " is still in use!\n";
+          Error = true;
+        }
+      }
+      if (Error)
+        llvm_unreachable(0);
+#endif
+      regUse_.clear();
+      regUseBackUp_.clear();
+    }
+
+    void addRegUse(unsigned physReg) {
+      assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+             "should be physical register!");
+      ++regUse_[physReg];
+      for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as)
+        ++regUse_[*as];
+    }
+
+    void delRegUse(unsigned physReg) {
+      assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+             "should be physical register!");
+      assert(regUse_[physReg] != 0);
+      --regUse_[physReg];
+      for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) {
+        assert(regUse_[*as] != 0);
+        --regUse_[*as];
+      }
+    }
+
+    bool isRegAvail(unsigned physReg) const {
+      assert(TargetRegisterInfo::isPhysicalRegister(physReg) &&
+             "should be physical register!");
+      return regUse_[physReg] == 0;
+    }
+
+    void backUpRegUses() {
+      regUseBackUp_ = regUse_;
+    }
+
+    void restoreRegUses() {
+      regUse_ = regUseBackUp_;
+    }
+
+    ///
+    /// Register handling helpers.
+    ///
+
+    /// getFreePhysReg - return a free physical register for this virtual
+    /// register interval if we have one, otherwise return 0.
+    unsigned getFreePhysReg(LiveInterval* cur);
+    unsigned getFreePhysReg(LiveInterval* cur,
+                            const TargetRegisterClass *RC,
+                            unsigned MaxInactiveCount,
+                            SmallVector<unsigned, 256> &inactiveCounts,
+                            bool SkipDGRegs);
+
+    /// getFirstNonReservedPhysReg - return the first non-reserved physical
+    /// register in the register class.
+    unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) {
+        TargetRegisterClass::iterator aoe = RC->allocation_order_end(*mf_);
+        TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_);
+        while (i != aoe && reservedRegs_.test(*i))
+          ++i;
+        assert(i != aoe && "All registers reserved?!");
+        return *i;
+      }
+
+    void ComputeRelatedRegClasses();
+
+    template <typename ItTy>
+    void printIntervals(const char* const str, ItTy i, ItTy e) const {
+      DEBUG({
+          if (str)
+            dbgs() << str << " intervals:\n";
+
+          for (; i != e; ++i) {
+            dbgs() << "\t" << *i->first << " -> ";
+
+            unsigned reg = i->first->reg;
+            if (TargetRegisterInfo::isVirtualRegister(reg))
+              reg = vrm_->getPhys(reg);
+
+            dbgs() << tri_->getName(reg) << '\n';
+          }
+        });
+    }
+  };
+  char RALinScan::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc",
+                "Linear Scan Register Allocator", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights)
+INITIALIZE_PASS_DEPENDENCY(PreAllocSplitting)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_AG_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc",
+                "Linear Scan Register Allocator", false, false)
+
+void RALinScan::ComputeRelatedRegClasses() {
+  // First pass, add all reg classes to the union, and determine at least one
+  // reg class that each register is in.
+  bool HasAliases = false;
+  for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(),
+       E = tri_->regclass_end(); RCI != E; ++RCI) {
+    RelatedRegClasses.insert(*RCI);
+    for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
+         I != E; ++I) {
+      HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0;
+
+      const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
+      if (PRC) {
+        // Already processed this register.  Just make sure we know that
+        // multiple register classes share a register.
+        RelatedRegClasses.unionSets(PRC, *RCI);
+      } else {
+        PRC = *RCI;
+      }
+    }
+  }
+
+  // Second pass, now that we know conservatively what register classes each reg
+  // belongs to, add info about aliases.  We don't need to do this for targets
+  // without register aliases.
+  if (HasAliases)
+    for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
+         I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
+         I != E; ++I)
+      for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) {
+        const TargetRegisterClass *AliasClass = 
+          OneClassForEachPhysReg.lookup(*AS);
+        if (AliasClass)
+          RelatedRegClasses.unionSets(I->second, AliasClass);
+      }
+}
+
+/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try
+/// allocate the definition the same register as the source register if the
+/// register is not defined during live time of the interval. If the interval is
+/// killed by a copy, try to use the destination register. This eliminates a
+/// copy. This is used to coalesce copies which were not coalesced away before
+/// allocation either due to dest and src being in different register classes or
+/// because the coalescer was overly conservative.
+unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
+  unsigned Preference = vrm_->getRegAllocPref(cur.reg);
+  if ((Preference && Preference == Reg) || !cur.containsOneValue())
+    return Reg;
+
+  // We cannot handle complicated live ranges. Simple linear stuff only.
+  if (cur.ranges.size() != 1)
+    return Reg;
+
+  const LiveRange &range = cur.ranges.front();
+
+  VNInfo *vni = range.valno;
+  if (vni->isUnused() || !vni->def.isValid())
+    return Reg;
+
+  unsigned CandReg;
+  {
+    MachineInstr *CopyMI;
+    if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
+      // Defined by a copy, try to extend SrcReg forward
+      CandReg = CopyMI->getOperand(1).getReg();
+    else if (TrivCoalesceEnds &&
+            (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) &&
+             CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg())
+      // Only used by a copy, try to extend DstReg backwards
+      CandReg = CopyMI->getOperand(0).getReg();
+    else
+      return Reg;
+
+    // If the target of the copy is a sub-register then don't coalesce.
+    if(CopyMI->getOperand(0).getSubReg())
+      return Reg;
+  }
+
+  if (TargetRegisterInfo::isVirtualRegister(CandReg)) {
+    if (!vrm_->isAssignedReg(CandReg))
+      return Reg;
+    CandReg = vrm_->getPhys(CandReg);
+  }
+  if (Reg == CandReg)
+    return Reg;
+
+  const TargetRegisterClass *RC = mri_->getRegClass(cur.reg);
+  if (!RC->contains(CandReg))
+    return Reg;
+
+  if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg))
+    return Reg;
+
+  // Try to coalesce.
+  DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg)
+        << '\n');
+  vrm_->clearVirt(cur.reg);
+  vrm_->assignVirt2Phys(cur.reg, CandReg);
+
+  ++NumCoalesce;
+  return CandReg;
+}
+
+bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
+  mf_ = &fn;
+  mri_ = &fn.getRegInfo();
+  tm_ = &fn.getTarget();
+  tri_ = tm_->getRegisterInfo();
+  tii_ = tm_->getInstrInfo();
+  allocatableRegs_ = tri_->getAllocatableSet(fn);
+  reservedRegs_ = tri_->getReservedRegs(fn);
+  li_ = &getAnalysis<LiveIntervals>();
+  loopInfo = &getAnalysis<MachineLoopInfo>();
+
+  // We don't run the coalescer here because we have no reason to
+  // interact with it.  If the coalescer requires interaction, it
+  // won't do anything.  If it doesn't require interaction, we assume
+  // it was run as a separate pass.
+
+  // If this is the first function compiled, compute the related reg classes.
+  if (RelatedRegClasses.empty())
+    ComputeRelatedRegClasses();
+
+  // Also resize register usage trackers.
+  initRegUses();
+
+  vrm_ = &getAnalysis<VirtRegMap>();
+  if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter());
+
+  spiller_.reset(createSpiller(*this, *mf_, *vrm_));
+
+  initIntervalSets();
+
+  linearScan();
+
+  // Rewrite spill code and update the PhysRegsUsed set.
+  rewriter_->runOnMachineFunction(*mf_, *vrm_, li_);
+
+  // Write out new DBG_VALUE instructions.
+  getAnalysis<LiveDebugVariables>().emitDebugValues(vrm_);
+
+  assert(unhandled_.empty() && "Unhandled live intervals remain!");
+
+  finalizeRegUses();
+
+  fixed_.clear();
+  active_.clear();
+  inactive_.clear();
+  handled_.clear();
+  NextReloadMap.clear();
+  DowngradedRegs.clear();
+  DowngradeMap.clear();
+  spiller_.reset(0);
+
+  return true;
+}
+
+/// initIntervalSets - initialize the interval sets.
+///
+void RALinScan::initIntervalSets()
+{
+  assert(unhandled_.empty() && fixed_.empty() &&
+         active_.empty() && inactive_.empty() &&
+         "interval sets should be empty on initialization");
+
+  handled_.reserve(li_->getNumIntervals());
+
+  for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
+    if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) {
+      if (!i->second->empty()) {
+        mri_->setPhysRegUsed(i->second->reg);
+        fixed_.push_back(std::make_pair(i->second, i->second->begin()));
+      }
+    } else {
+      if (i->second->empty()) {
+        assignRegOrStackSlotAtInterval(i->second);
+      }
+      else
+        unhandled_.push(i->second);
+    }
+  }
+}
+
+void RALinScan::linearScan() {
+  // linear scan algorithm
+  DEBUG({
+      dbgs() << "********** LINEAR SCAN **********\n"
+             << "********** Function: "
+             << mf_->getFunction()->getName() << '\n';
+      printIntervals("fixed", fixed_.begin(), fixed_.end());
+    });
+
+  while (!unhandled_.empty()) {
+    // pick the interval with the earliest start point
+    LiveInterval* cur = unhandled_.top();
+    unhandled_.pop();
+    ++NumIters;
+    DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n');
+
+    assert(!cur->empty() && "Empty interval in unhandled set.");
+
+    processActiveIntervals(cur->beginIndex());
+    processInactiveIntervals(cur->beginIndex());
+
+    assert(TargetRegisterInfo::isVirtualRegister(cur->reg) &&
+           "Can only allocate virtual registers!");
+
+    // Allocating a virtual register. try to find a free
+    // physical register or spill an interval (possibly this one) in order to
+    // assign it one.
+    assignRegOrStackSlotAtInterval(cur);
+
+    DEBUG({
+        printIntervals("active", active_.begin(), active_.end());
+        printIntervals("inactive", inactive_.begin(), inactive_.end());
+      });
+  }
+
+  // Expire any remaining active intervals
+  while (!active_.empty()) {
+    IntervalPtr &IP = active_.back();
+    unsigned reg = IP.first->reg;
+    DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n");
+    assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+           "Can only allocate virtual registers!");
+    reg = vrm_->getPhys(reg);
+    delRegUse(reg);
+    active_.pop_back();
+  }
+
+  // Expire any remaining inactive intervals
+  DEBUG({
+      for (IntervalPtrs::reverse_iterator
+             i = inactive_.rbegin(); i != inactive_.rend(); ++i)
+        dbgs() << "\tinterval " << *i->first << " expired\n";
+    });
+  inactive_.clear();
+
+  // Add live-ins to every BB except for entry. Also perform trivial coalescing.
+  MachineFunction::iterator EntryMBB = mf_->begin();
+  SmallVector<MachineBasicBlock*, 8> LiveInMBBs;
+  for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) {
+    LiveInterval &cur = *i->second;
+    unsigned Reg = 0;
+    bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg);
+    if (isPhys)
+      Reg = cur.reg;
+    else if (vrm_->isAssignedReg(cur.reg))
+      Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg));
+    if (!Reg)
+      continue;
+    // Ignore splited live intervals.
+    if (!isPhys && vrm_->getPreSplitReg(cur.reg))
+      continue;
+
+    for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end();
+         I != E; ++I) {
+      const LiveRange &LR = *I;
+      if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) {
+        for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i)
+          if (LiveInMBBs[i] != EntryMBB) {
+            assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+                   "Adding a virtual register to livein set?");
+            LiveInMBBs[i]->addLiveIn(Reg);
+          }
+        LiveInMBBs.clear();
+      }
+    }
+  }
+
+  DEBUG(dbgs() << *vrm_);
+
+  // Look for physical registers that end up not being allocated even though
+  // register allocator had to spill other registers in its register class.
+  if (!vrm_->FindUnusedRegisters(li_))
+    return;
+}
+
+/// processActiveIntervals - expire old intervals and move non-overlapping ones
+/// to the inactive list.
+void RALinScan::processActiveIntervals(SlotIndex CurPoint)
+{
+  DEBUG(dbgs() << "\tprocessing active intervals:\n");
+
+  for (unsigned i = 0, e = active_.size(); i != e; ++i) {
+    LiveInterval *Interval = active_[i].first;
+    LiveInterval::iterator IntervalPos = active_[i].second;
+    unsigned reg = Interval->reg;
+
+    IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
+
+    if (IntervalPos == Interval->end()) {     // Remove expired intervals.
+      DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
+      assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+             "Can only allocate virtual registers!");
+      reg = vrm_->getPhys(reg);
+      delRegUse(reg);
+
+      // Pop off the end of the list.
+      active_[i] = active_.back();
+      active_.pop_back();
+      --i; --e;
+
+    } else if (IntervalPos->start > CurPoint) {
+      // Move inactive intervals to inactive list.
+      DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n");
+      assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+             "Can only allocate virtual registers!");
+      reg = vrm_->getPhys(reg);
+      delRegUse(reg);
+      // add to inactive.
+      inactive_.push_back(std::make_pair(Interval, IntervalPos));
+
+      // Pop off the end of the list.
+      active_[i] = active_.back();
+      active_.pop_back();
+      --i; --e;
+    } else {
+      // Otherwise, just update the iterator position.
+      active_[i].second = IntervalPos;
+    }
+  }
+}
+
+/// processInactiveIntervals - expire old intervals and move overlapping
+/// ones to the active list.
+void RALinScan::processInactiveIntervals(SlotIndex CurPoint)
+{
+  DEBUG(dbgs() << "\tprocessing inactive intervals:\n");
+
+  for (unsigned i = 0, e = inactive_.size(); i != e; ++i) {
+    LiveInterval *Interval = inactive_[i].first;
+    LiveInterval::iterator IntervalPos = inactive_[i].second;
+    unsigned reg = Interval->reg;
+
+    IntervalPos = Interval->advanceTo(IntervalPos, CurPoint);
+
+    if (IntervalPos == Interval->end()) {       // remove expired intervals.
+      DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n");
+
+      // Pop off the end of the list.
+      inactive_[i] = inactive_.back();
+      inactive_.pop_back();
+      --i; --e;
+    } else if (IntervalPos->start <= CurPoint) {
+      // move re-activated intervals in active list
+      DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n");
+      assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+             "Can only allocate virtual registers!");
+      reg = vrm_->getPhys(reg);
+      addRegUse(reg);
+      // add to active
+      active_.push_back(std::make_pair(Interval, IntervalPos));
+
+      // Pop off the end of the list.
+      inactive_[i] = inactive_.back();
+      inactive_.pop_back();
+      --i; --e;
+    } else {
+      // Otherwise, just update the iterator position.
+      inactive_[i].second = IntervalPos;
+    }
+  }
+}
+
+/// updateSpillWeights - updates the spill weights of the specifed physical
+/// register and its weight.
+void RALinScan::updateSpillWeights(std::vector<float> &Weights,
+                                   unsigned reg, float weight,
+                                   const TargetRegisterClass *RC) {
+  SmallSet<unsigned, 4> Processed;
+  SmallSet<unsigned, 4> SuperAdded;
+  SmallVector<unsigned, 4> Supers;
+  Weights[reg] += weight;
+  Processed.insert(reg);
+  for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) {
+    Weights[*as] += weight;
+    Processed.insert(*as);
+    if (tri_->isSubRegister(*as, reg) &&
+        SuperAdded.insert(*as) &&
+        RC->contains(*as)) {
+      Supers.push_back(*as);
+    }
+  }
+
+  // If the alias is a super-register, and the super-register is in the
+  // register class we are trying to allocate. Then add the weight to all
+  // sub-registers of the super-register even if they are not aliases.
+  // e.g. allocating for GR32, bh is not used, updating bl spill weight.
+  //      bl should get the same spill weight otherwise it will be choosen
+  //      as a spill candidate since spilling bh doesn't make ebx available.
+  for (unsigned i = 0, e = Supers.size(); i != e; ++i) {
+    for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr)
+      if (!Processed.count(*sr))
+        Weights[*sr] += weight;
+  }
+}
+
+static
+RALinScan::IntervalPtrs::iterator
+FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) {
+  for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end();
+       I != E; ++I)
+    if (I->first == LI) return I;
+  return IP.end();
+}
+
+static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V,
+                                    SlotIndex Point){
+  for (unsigned i = 0, e = V.size(); i != e; ++i) {
+    RALinScan::IntervalPtr &IP = V[i];
+    LiveInterval::iterator I = std::upper_bound(IP.first->begin(),
+                                                IP.second, Point);
+    if (I != IP.first->begin()) --I;
+    IP.second = I;
+  }
+}
+
+/// getConflictWeight - Return the number of conflicts between cur
+/// live interval and defs and uses of Reg weighted by loop depthes.
+static
+float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_,
+                        MachineRegisterInfo *mri_,
+                        MachineLoopInfo *loopInfo) {
+  float Conflicts = 0;
+  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg),
+         E = mri_->reg_end(); I != E; ++I) {
+    MachineInstr *MI = &*I;
+    if (cur->liveAt(li_->getInstructionIndex(MI))) {
+      unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent());
+      Conflicts += std::pow(10.0f, (float)loopDepth);
+    }
+  }
+  return Conflicts;
+}
+
+/// findIntervalsToSpill - Determine the intervals to spill for the
+/// specified interval. It's passed the physical registers whose spill
+/// weight is the lowest among all the registers whose live intervals
+/// conflict with the interval.
+void RALinScan::findIntervalsToSpill(LiveInterval *cur,
+                            std::vector<std::pair<unsigned,float> > &Candidates,
+                            unsigned NumCands,
+                            SmallVector<LiveInterval*, 8> &SpillIntervals) {
+  // We have figured out the *best* register to spill. But there are other
+  // registers that are pretty good as well (spill weight within 3%). Spill
+  // the one that has fewest defs and uses that conflict with cur.
+  float Conflicts[3] = { 0.0f, 0.0f, 0.0f };
+  SmallVector<LiveInterval*, 8> SLIs[3];
+
+  DEBUG({
+      dbgs() << "\tConsidering " << NumCands << " candidates: ";
+      for (unsigned i = 0; i != NumCands; ++i)
+        dbgs() << tri_->getName(Candidates[i].first) << " ";
+      dbgs() << "\n";
+    });
+
+  // Calculate the number of conflicts of each candidate.
+  for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) {
+    unsigned Reg = i->first->reg;
+    unsigned PhysReg = vrm_->getPhys(Reg);
+    if (!cur->overlapsFrom(*i->first, i->second))
+      continue;
+    for (unsigned j = 0; j < NumCands; ++j) {
+      unsigned Candidate = Candidates[j].first;
+      if (tri_->regsOverlap(PhysReg, Candidate)) {
+        if (NumCands > 1)
+          Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
+        SLIs[j].push_back(i->first);
+      }
+    }
+  }
+
+  for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){
+    unsigned Reg = i->first->reg;
+    unsigned PhysReg = vrm_->getPhys(Reg);
+    if (!cur->overlapsFrom(*i->first, i->second-1))
+      continue;
+    for (unsigned j = 0; j < NumCands; ++j) {
+      unsigned Candidate = Candidates[j].first;
+      if (tri_->regsOverlap(PhysReg, Candidate)) {
+        if (NumCands > 1)
+          Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo);
+        SLIs[j].push_back(i->first);
+      }
+    }
+  }
+
+  // Which is the best candidate?
+  unsigned BestCandidate = 0;
+  float MinConflicts = Conflicts[0];
+  for (unsigned i = 1; i != NumCands; ++i) {
+    if (Conflicts[i] < MinConflicts) {
+      BestCandidate = i;
+      MinConflicts = Conflicts[i];
+    }
+  }
+
+  std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(),
+            std::back_inserter(SpillIntervals));
+}
+
+namespace {
+  struct WeightCompare {
+  private:
+    const RALinScan &Allocator;
+
+  public:
+    WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {}
+
+    typedef std::pair<unsigned, float> RegWeightPair;
+    bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const {
+      return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first);
+    }
+  };
+}
+
+static bool weightsAreClose(float w1, float w2) {
+  if (!NewHeuristic)
+    return false;
+
+  float diff = w1 - w2;
+  if (diff <= 0.02f)  // Within 0.02f
+    return true;
+  return (diff / w2) <= 0.05f;  // Within 5%.
+}
+
+LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) {
+  DenseMap<unsigned, unsigned>::iterator I = NextReloadMap.find(cur->reg);
+  if (I == NextReloadMap.end())
+    return 0;
+  return &li_->getInterval(I->second);
+}
+
+void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
+  for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) {
+    bool isNew = DowngradedRegs.insert(*AS);
+    (void)isNew; // Silence compiler warning.
+    assert(isNew && "Multiple reloads holding the same register?");
+    DowngradeMap.insert(std::make_pair(li->reg, *AS));
+  }
+  ++NumDowngrade;
+}
+
+void RALinScan::UpgradeRegister(unsigned Reg) {
+  if (Reg) {
+    DowngradedRegs.erase(Reg);
+    for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS)
+      DowngradedRegs.erase(*AS);
+  }
+}
+
+namespace {
+  struct LISorter {
+    bool operator()(LiveInterval* A, LiveInterval* B) {
+      return A->beginIndex() < B->beginIndex();
+    }
+  };
+}
+
+/// assignRegOrStackSlotAtInterval - assign a register if one is available, or
+/// spill.
+void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
+  const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+  DEBUG(dbgs() << "\tallocating current interval from "
+               << RC->getName() << ": ");
+
+  // This is an implicitly defined live interval, just assign any register.
+  if (cur->empty()) {
+    unsigned physReg = vrm_->getRegAllocPref(cur->reg);
+    if (!physReg)
+      physReg = getFirstNonReservedPhysReg(RC);
+    DEBUG(dbgs() <<  tri_->getName(physReg) << '\n');
+    // Note the register is not really in use.
+    vrm_->assignVirt2Phys(cur->reg, physReg);
+    return;
+  }
+
+  backUpRegUses();
+
+  std::vector<std::pair<unsigned, float> > SpillWeightsToAdd;
+  SlotIndex StartPosition = cur->beginIndex();
+  const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
+
+  // If start of this live interval is defined by a move instruction and its
+  // source is assigned a physical register that is compatible with the target
+  // register class, then we should try to assign it the same register.
+  // This can happen when the move is from a larger register class to a smaller
+  // one, e.g. X86::mov32to32_. These move instructions are not coalescable.
+  if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
+    VNInfo *vni = cur->begin()->valno;
+    if (!vni->isUnused() && vni->def.isValid()) {
+      MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
+      if (CopyMI && CopyMI->isCopy()) {
+        unsigned DstSubReg = CopyMI->getOperand(0).getSubReg();
+        unsigned SrcReg = CopyMI->getOperand(1).getReg();
+        unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg();
+        unsigned Reg = 0;
+        if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
+          Reg = SrcReg;
+        else if (vrm_->isAssignedReg(SrcReg))
+          Reg = vrm_->getPhys(SrcReg);
+        if (Reg) {
+          if (SrcSubReg)
+            Reg = tri_->getSubReg(Reg, SrcSubReg);
+          if (DstSubReg)
+            Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
+          if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
+            mri_->setRegAllocationHint(cur->reg, 0, Reg);
+        }
+      }
+    }
+  }
+
+  // For every interval in inactive we overlap with, mark the
+  // register as not free and update spill weights.
+  for (IntervalPtrs::const_iterator i = inactive_.begin(),
+         e = inactive_.end(); i != e; ++i) {
+    unsigned Reg = i->first->reg;
+    assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+           "Can only allocate virtual registers!");
+    const TargetRegisterClass *RegRC = mri_->getRegClass(Reg);
+    // If this is not in a related reg class to the register we're allocating,
+    // don't check it.
+    if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+        cur->overlapsFrom(*i->first, i->second-1)) {
+      Reg = vrm_->getPhys(Reg);
+      addRegUse(Reg);
+      SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight));
+    }
+  }
+
+  // Speculatively check to see if we can get a register right now.  If not,
+  // we know we won't be able to by adding more constraints.  If so, we can
+  // check to see if it is valid.  Doing an exhaustive search of the fixed_ list
+  // is very bad (it contains all callee clobbered registers for any functions
+  // with a call), so we want to avoid doing that if possible.
+  unsigned physReg = getFreePhysReg(cur);
+  unsigned BestPhysReg = physReg;
+  if (physReg) {
+    // We got a register.  However, if it's in the fixed_ list, we might
+    // conflict with it.  Check to see if we conflict with it or any of its
+    // aliases.
+    SmallSet<unsigned, 8> RegAliases;
+    for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS)
+      RegAliases.insert(*AS);
+
+    bool ConflictsWithFixed = false;
+    for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+      IntervalPtr &IP = fixed_[i];
+      if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) {
+        // Okay, this reg is on the fixed list.  Check to see if we actually
+        // conflict.
+        LiveInterval *I = IP.first;
+        if (I->endIndex() > StartPosition) {
+          LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
+          IP.second = II;
+          if (II != I->begin() && II->start > StartPosition)
+            --II;
+          if (cur->overlapsFrom(*I, II)) {
+            ConflictsWithFixed = true;
+            break;
+          }
+        }
+      }
+    }
+
+    // Okay, the register picked by our speculative getFreePhysReg call turned
+    // out to be in use.  Actually add all of the conflicting fixed registers to
+    // regUse_ so we can do an accurate query.
+    if (ConflictsWithFixed) {
+      // For every interval in fixed we overlap with, mark the register as not
+      // free and update spill weights.
+      for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+        IntervalPtr &IP = fixed_[i];
+        LiveInterval *I = IP.first;
+
+        const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg];
+        if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader &&
+            I->endIndex() > StartPosition) {
+          LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition);
+          IP.second = II;
+          if (II != I->begin() && II->start > StartPosition)
+            --II;
+          if (cur->overlapsFrom(*I, II)) {
+            unsigned reg = I->reg;
+            addRegUse(reg);
+            SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight));
+          }
+        }
+      }
+
+      // Using the newly updated regUse_ object, which includes conflicts in the
+      // future, see if there are any registers available.
+      physReg = getFreePhysReg(cur);
+    }
+  }
+
+  // Restore the physical register tracker, removing information about the
+  // future.
+  restoreRegUses();
+
+  // If we find a free register, we are done: assign this virtual to
+  // the free physical register and add this interval to the active
+  // list.
+  if (physReg) {
+    DEBUG(dbgs() <<  tri_->getName(physReg) << '\n');
+    vrm_->assignVirt2Phys(cur->reg, physReg);
+    addRegUse(physReg);
+    active_.push_back(std::make_pair(cur, cur->begin()));
+    handled_.push_back(cur);
+
+    // "Upgrade" the physical register since it has been allocated.
+    UpgradeRegister(physReg);
+    if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
+      // "Downgrade" physReg to try to keep physReg from being allocated until
+      // the next reload from the same SS is allocated.
+      mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg);
+      DowngradeRegister(cur, physReg);
+    }
+    return;
+  }
+  DEBUG(dbgs() << "no free registers\n");
+
+  // Compile the spill weights into an array that is better for scanning.
+  std::vector<float> SpillWeights(tri_->getNumRegs(), 0.0f);
+  for (std::vector<std::pair<unsigned, float> >::iterator
+       I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I)
+    updateSpillWeights(SpillWeights, I->first, I->second, RC);
+
+  // for each interval in active, update spill weights.
+  for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end();
+       i != e; ++i) {
+    unsigned reg = i->first->reg;
+    assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+           "Can only allocate virtual registers!");
+    reg = vrm_->getPhys(reg);
+    updateSpillWeights(SpillWeights, reg, i->first->weight, RC);
+  }
+
+  DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n");
+
+  // Find a register to spill.
+  float minWeight = HUGE_VALF;
+  unsigned minReg = 0;
+
+  bool Found = false;
+  std::vector<std::pair<unsigned,float> > RegsWeights;
+  if (!minReg || SpillWeights[minReg] == HUGE_VALF)
+    for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
+           e = RC->allocation_order_end(*mf_); i != e; ++i) {
+      unsigned reg = *i;
+      float regWeight = SpillWeights[reg];
+      // Don't even consider reserved regs.
+      if (reservedRegs_.test(reg))
+        continue;
+      // Skip recently allocated registers and reserved registers.
+      if (minWeight > regWeight && !isRecentlyUsed(reg))
+        Found = true;
+      RegsWeights.push_back(std::make_pair(reg, regWeight));
+    }
+
+  // If we didn't find a register that is spillable, try aliases?
+  if (!Found) {
+    for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
+           e = RC->allocation_order_end(*mf_); i != e; ++i) {
+      unsigned reg = *i;
+      if (reservedRegs_.test(reg))
+        continue;
+      // No need to worry about if the alias register size < regsize of RC.
+      // We are going to spill all registers that alias it anyway.
+      for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as)
+        RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as]));
+    }
+  }
+
+  // Sort all potential spill candidates by weight.
+  std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this));
+  minReg = RegsWeights[0].first;
+  minWeight = RegsWeights[0].second;
+  if (minWeight == HUGE_VALF) {
+    // All registers must have inf weight. Just grab one!
+    minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC);
+    if (cur->weight == HUGE_VALF ||
+        li_->getApproximateInstructionCount(*cur) == 0) {
+      // Spill a physical register around defs and uses.
+      if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) {
+        // spillPhysRegAroundRegDefsUses may have invalidated iterator stored
+        // in fixed_. Reset them.
+        for (unsigned i = 0, e = fixed_.size(); i != e; ++i) {
+          IntervalPtr &IP = fixed_[i];
+          LiveInterval *I = IP.first;
+          if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg))
+            IP.second = I->advanceTo(I->begin(), StartPosition);
+        }
+
+        DowngradedRegs.clear();
+        assignRegOrStackSlotAtInterval(cur);
+      } else {
+        assert(false && "Ran out of registers during register allocation!");
+        report_fatal_error("Ran out of registers during register allocation!");
+      }
+      return;
+    }
+  }
+
+  // Find up to 3 registers to consider as spill candidates.
+  unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1;
+  while (LastCandidate > 1) {
+    if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight))
+      break;
+    --LastCandidate;
+  }
+
+  DEBUG({
+      dbgs() << "\t\tregister(s) with min weight(s): ";
+
+      for (unsigned i = 0; i != LastCandidate; ++i)
+        dbgs() << tri_->getName(RegsWeights[i].first)
+               << " (" << RegsWeights[i].second << ")\n";
+    });
+
+  // If the current has the minimum weight, we need to spill it and
+  // add any added intervals back to unhandled, and restart
+  // linearscan.
+  if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
+    DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n');
+    SmallVector<LiveInterval*, 8> spillIs, added;
+    spiller_->spill(cur, added, spillIs);
+
+    std::sort(added.begin(), added.end(), LISorter());
+    if (added.empty())
+      return;  // Early exit if all spills were folded.
+
+    // Merge added with unhandled.  Note that we have already sorted
+    // intervals returned by addIntervalsForSpills by their starting
+    // point.
+    // This also update the NextReloadMap. That is, it adds mapping from a
+    // register defined by a reload from SS to the next reload from SS in the
+    // same basic block.
+    MachineBasicBlock *LastReloadMBB = 0;
+    LiveInterval *LastReload = 0;
+    int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
+    for (unsigned i = 0, e = added.size(); i != e; ++i) {
+      LiveInterval *ReloadLi = added[i];
+      if (ReloadLi->weight == HUGE_VALF &&
+          li_->getApproximateInstructionCount(*ReloadLi) == 0) {
+        SlotIndex ReloadIdx = ReloadLi->beginIndex();
+        MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
+        int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
+        if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
+          // Last reload of same SS is in the same MBB. We want to try to
+          // allocate both reloads the same register and make sure the reg
+          // isn't clobbered in between if at all possible.
+          assert(LastReload->beginIndex() < ReloadIdx);
+          NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
+        }
+        LastReloadMBB = ReloadMBB;
+        LastReload = ReloadLi;
+        LastReloadSS = ReloadSS;
+      }
+      unhandled_.push(ReloadLi);
+    }
+    return;
+  }
+
+  ++NumBacktracks;
+
+  // Push the current interval back to unhandled since we are going
+  // to re-run at least this iteration. Since we didn't modify it it
+  // should go back right in the front of the list
+  unhandled_.push(cur);
+
+  assert(TargetRegisterInfo::isPhysicalRegister(minReg) &&
+         "did not choose a register to spill?");
+
+  // We spill all intervals aliasing the register with
+  // minimum weight, rollback to the interval with the earliest
+  // start point and let the linear scan algorithm run again
+  SmallVector<LiveInterval*, 8> spillIs;
+
+  // Determine which intervals have to be spilled.
+  findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs);
+
+  // Set of spilled vregs (used later to rollback properly)
+  SmallSet<unsigned, 8> spilled;
+
+  // The earliest start of a Spilled interval indicates up to where
+  // in handled we need to roll back
+  assert(!spillIs.empty() && "No spill intervals?");
+  SlotIndex earliestStart = spillIs[0]->beginIndex();
+
+  // Spill live intervals of virtual regs mapped to the physical register we
+  // want to clear (and its aliases).  We only spill those that overlap with the
+  // current interval as the rest do not affect its allocation. we also keep
+  // track of the earliest start of all spilled live intervals since this will
+  // mark our rollback point.
+  SmallVector<LiveInterval*, 8> added;
+  while (!spillIs.empty()) {
+    LiveInterval *sli = spillIs.back();
+    spillIs.pop_back();
+    DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n');
+    if (sli->beginIndex() < earliestStart)
+      earliestStart = sli->beginIndex();
+    spiller_->spill(sli, added, spillIs);
+    spilled.insert(sli->reg);
+  }
+
+  // Include any added intervals in earliestStart.
+  for (unsigned i = 0, e = added.size(); i != e; ++i) {
+    SlotIndex SI = added[i]->beginIndex();
+    if (SI < earliestStart)
+      earliestStart = SI;
+  }
+
+  DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n');
+
+  // Scan handled in reverse order up to the earliest start of a
+  // spilled live interval and undo each one, restoring the state of
+  // unhandled.
+  while (!handled_.empty()) {
+    LiveInterval* i = handled_.back();
+    // If this interval starts before t we are done.
+    if (!i->empty() && i->beginIndex() < earliestStart)
+      break;
+    DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n');
+    handled_.pop_back();
+
+    // When undoing a live interval allocation we must know if it is active or
+    // inactive to properly update regUse_ and the VirtRegMap.
+    IntervalPtrs::iterator it;
+    if ((it = FindIntervalInVector(active_, i)) != active_.end()) {
+      active_.erase(it);
+      assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
+      if (!spilled.count(i->reg))
+        unhandled_.push(i);
+      delRegUse(vrm_->getPhys(i->reg));
+      vrm_->clearVirt(i->reg);
+    } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) {
+      inactive_.erase(it);
+      assert(!TargetRegisterInfo::isPhysicalRegister(i->reg));
+      if (!spilled.count(i->reg))
+        unhandled_.push(i);
+      vrm_->clearVirt(i->reg);
+    } else {
+      assert(TargetRegisterInfo::isVirtualRegister(i->reg) &&
+             "Can only allocate virtual registers!");
+      vrm_->clearVirt(i->reg);
+      unhandled_.push(i);
+    }
+
+    DenseMap<unsigned, unsigned>::iterator ii = DowngradeMap.find(i->reg);
+    if (ii == DowngradeMap.end())
+      // It interval has a preference, it must be defined by a copy. Clear the
+      // preference now since the source interval allocation may have been
+      // undone as well.
+      mri_->setRegAllocationHint(i->reg, 0, 0);
+    else {
+      UpgradeRegister(ii->second);
+    }
+  }
+
+  // Rewind the iterators in the active, inactive, and fixed lists back to the
+  // point we reverted to.
+  RevertVectorIteratorsTo(active_, earliestStart);
+  RevertVectorIteratorsTo(inactive_, earliestStart);
+  RevertVectorIteratorsTo(fixed_, earliestStart);
+
+  // Scan the rest and undo each interval that expired after t and
+  // insert it in active (the next iteration of the algorithm will
+  // put it in inactive if required)
+  for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
+    LiveInterval *HI = handled_[i];
+    if (!HI->expiredAt(earliestStart) &&
+        HI->expiredAt(cur->beginIndex())) {
+      DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n');
+      active_.push_back(std::make_pair(HI, HI->begin()));
+      assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg));
+      addRegUse(vrm_->getPhys(HI->reg));
+    }
+  }
+
+  // Merge added with unhandled.
+  // This also update the NextReloadMap. That is, it adds mapping from a
+  // register defined by a reload from SS to the next reload from SS in the
+  // same basic block.
+  MachineBasicBlock *LastReloadMBB = 0;
+  LiveInterval *LastReload = 0;
+  int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
+  std::sort(added.begin(), added.end(), LISorter());
+  for (unsigned i = 0, e = added.size(); i != e; ++i) {
+    LiveInterval *ReloadLi = added[i];
+    if (ReloadLi->weight == HUGE_VALF &&
+        li_->getApproximateInstructionCount(*ReloadLi) == 0) {
+      SlotIndex ReloadIdx = ReloadLi->beginIndex();
+      MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
+      int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
+      if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
+        // Last reload of same SS is in the same MBB. We want to try to
+        // allocate both reloads the same register and make sure the reg
+        // isn't clobbered in between if at all possible.
+        assert(LastReload->beginIndex() < ReloadIdx);
+        NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
+      }
+      LastReloadMBB = ReloadMBB;
+      LastReload = ReloadLi;
+      LastReloadSS = ReloadSS;
+    }
+    unhandled_.push(ReloadLi);
+  }
+}
+
+unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
+                                   const TargetRegisterClass *RC,
+                                   unsigned MaxInactiveCount,
+                                   SmallVector<unsigned, 256> &inactiveCounts,
+                                   bool SkipDGRegs) {
+  unsigned FreeReg = 0;
+  unsigned FreeRegInactiveCount = 0;
+
+  std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg);
+  // Resolve second part of the hint (if possible) given the current allocation.
+  unsigned physReg = Hint.second;
+  if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
+    physReg = vrm_->getPhys(physReg);
+
+  TargetRegisterClass::iterator I, E;
+  tie(I, E) = tri_->getAllocationOrder(RC, Hint.first, physReg, *mf_);
+  assert(I != E && "No allocatable register in this register class!");
+
+  // Scan for the first available register.
+  for (; I != E; ++I) {
+    unsigned Reg = *I;
+    // Ignore "downgraded" registers.
+    if (SkipDGRegs && DowngradedRegs.count(Reg))
+      continue;
+    // Skip reserved registers.
+    if (reservedRegs_.test(Reg))
+      continue;
+    // Skip recently allocated registers.
+    if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) {
+      FreeReg = Reg;
+      if (FreeReg < inactiveCounts.size())
+        FreeRegInactiveCount = inactiveCounts[FreeReg];
+      else
+        FreeRegInactiveCount = 0;
+      break;
+    }
+  }
+
+  // If there are no free regs, or if this reg has the max inactive count,
+  // return this register.
+  if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) {
+    // Remember what register we picked so we can skip it next time.
+    if (FreeReg != 0) recordRecentlyUsed(FreeReg);
+    return FreeReg;
+  }
+
+  // Continue scanning the registers, looking for the one with the highest
+  // inactive count.  Alkis found that this reduced register pressure very
+  // slightly on X86 (in rev 1.94 of this file), though this should probably be
+  // reevaluated now.
+  for (; I != E; ++I) {
+    unsigned Reg = *I;
+    // Ignore "downgraded" registers.
+    if (SkipDGRegs && DowngradedRegs.count(Reg))
+      continue;
+    // Skip reserved registers.
+    if (reservedRegs_.test(Reg))
+      continue;
+    if (isRegAvail(Reg) && Reg < inactiveCounts.size() &&
+        FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) {
+      FreeReg = Reg;
+      FreeRegInactiveCount = inactiveCounts[Reg];
+      if (FreeRegInactiveCount == MaxInactiveCount)
+        break;    // We found the one with the max inactive count.
+    }
+  }
+
+  // Remember what register we picked so we can skip it next time.
+  recordRecentlyUsed(FreeReg);
+
+  return FreeReg;
+}
+
+/// getFreePhysReg - return a free physical register for this virtual register
+/// interval if we have one, otherwise return 0.
+unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
+  SmallVector<unsigned, 256> inactiveCounts;
+  unsigned MaxInactiveCount = 0;
+
+  const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+  const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC);
+
+  for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end();
+       i != e; ++i) {
+    unsigned reg = i->first->reg;
+    assert(TargetRegisterInfo::isVirtualRegister(reg) &&
+           "Can only allocate virtual registers!");
+
+    // If this is not in a related reg class to the register we're allocating,
+    // don't check it.
+    const TargetRegisterClass *RegRC = mri_->getRegClass(reg);
+    if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) {
+      reg = vrm_->getPhys(reg);
+      if (inactiveCounts.size() <= reg)
+        inactiveCounts.resize(reg+1);
+      ++inactiveCounts[reg];
+      MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]);
+    }
+  }
+
+  // If copy coalescer has assigned a "preferred" register, check if it's
+  // available first.
+  unsigned Preference = vrm_->getRegAllocPref(cur->reg);
+  if (Preference) {
+    DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") ");
+    if (isRegAvail(Preference) &&
+        RC->contains(Preference))
+      return Preference;
+  }
+
+  if (!DowngradedRegs.empty()) {
+    unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
+                                      true);
+    if (FreeReg)
+      return FreeReg;
+  }
+  return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false);
+}
+
+FunctionPass* llvm::createLinearScanRegisterAllocator() {
+  return new RALinScan();
+}
diff --git a/final/lib/CodeGen/RegAllocPBQP.cpp b/final/lib/CodeGen/RegAllocPBQP.cpp
new file mode 100644
index 00000000000..ea0d1fe0233
--- /dev/null
+++ b/final/lib/CodeGen/RegAllocPBQP.cpp
@@ -0,0 +1,720 @@
+//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based
+// register allocator for LLVM. This allocator works by constructing a PBQP
+// problem representing the register allocation problem under consideration,
+// solving this using a PBQP solver, and mapping the solution back to a
+// register assignment. If any variables are selected for spilling then spill
+// code is inserted and the process repeated.
+//
+// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned
+// for register allocation. For more information on PBQP for register
+// allocation, see the following papers:
+//
+//   (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with
+//   PBQP. In Proceedings of the 7th Joint Modular Languages Conference
+//   (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361.
+//
+//   (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular
+//   architectures. In Proceedings of the Joint Conference on Languages,
+//   Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York,
+//   NY, USA, 139-148.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+
+#include "RenderMachineFunction.h"
+#include "Splitter.h"
+#include "VirtRegMap.h"
+#include "VirtRegRewriter.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/RegAllocPBQP.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
+#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/Heuristics/Briggs.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <limits>
+#include <memory>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+static RegisterRegAlloc
+registerPBQPRepAlloc("pbqp", "PBQP register allocator",
+                       createDefaultPBQPRegisterAllocator);
+
+static cl::opt<bool>
+pbqpCoalescing("pbqp-coalescing",
+                cl::desc("Attempt coalescing during PBQP register allocation."),
+                cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+pbqpPreSplitting("pbqp-pre-splitting",
+                 cl::desc("Pre-split before PBQP register allocation."),
+                 cl::init(false), cl::Hidden);
+
+namespace {
+
+///
+/// PBQP based allocators solve the register allocation problem by mapping
+/// register allocation problems to Partitioned Boolean Quadratic
+/// Programming problems.
+class RegAllocPBQP : public MachineFunctionPass {
+public:
+
+  static char ID;
+
+  /// Construct a PBQP register allocator.
+  RegAllocPBQP(std::auto_ptr<PBQPBuilder> b)
+      : MachineFunctionPass(ID), builder(b) {
+    initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+    initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+    initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+    initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+    initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+    initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+    initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
+    initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+    initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+  }
+
+  /// Return the pass name.
+  virtual const char* getPassName() const {
+    return "PBQP Register Allocator";
+  }
+
+  /// PBQP analysis usage.
+  virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+  /// Perform register allocation
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+private:
+
+  typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
+  typedef std::vector<const LiveInterval*> Node2LIMap;
+  typedef std::vector<unsigned> AllowedSet;
+  typedef std::vector<AllowedSet> AllowedSetMap;
+  typedef std::pair<unsigned, unsigned> RegPair;
+  typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
+  typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
+  typedef std::set<unsigned> RegSet;
+
+
+  std::auto_ptr<PBQPBuilder> builder;
+
+  MachineFunction *mf;
+  const TargetMachine *tm;
+  const TargetRegisterInfo *tri;
+  const TargetInstrInfo *tii;
+  const MachineLoopInfo *loopInfo;
+  MachineRegisterInfo *mri;
+  RenderMachineFunction *rmf;
+
+  LiveIntervals *lis;
+  LiveStacks *lss;
+  VirtRegMap *vrm;
+
+  RegSet vregsToAlloc, emptyIntervalVRegs;
+
+  /// \brief Finds the initial set of vreg intervals to allocate.
+  void findVRegIntervalsToAlloc();
+
+  /// \brief Adds a stack interval if the given live interval has been
+  /// spilled. Used to support stack slot coloring.
+  void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
+
+  /// \brief Given a solved PBQP problem maps this solution back to a register
+  /// assignment.
+  bool mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+                         const PBQP::Solution &solution);
+
+  /// \brief Postprocessing before final spilling. Sets basic block "live in"
+  /// variables.
+  void finalizeAlloc() const;
+
+};
+
+char RegAllocPBQP::ID = 0;
+
+} // End anonymous namespace.
+
+unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const {
+  Node2VReg::const_iterator vregItr = node2VReg.find(node);
+  assert(vregItr != node2VReg.end() && "No vreg for node.");
+  return vregItr->second;
+}
+
+PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
+  VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
+  assert(nodeItr != vreg2Node.end() && "No node for vreg.");
+  return nodeItr->second;
+  
+}
+
+const PBQPRAProblem::AllowedSet&
+  PBQPRAProblem::getAllowedSet(unsigned vreg) const {
+  AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg);
+  assert(allowedSetItr != allowedSets.end() && "No pregs for vreg.");
+  const AllowedSet &allowedSet = allowedSetItr->second;
+  return allowedSet;
+}
+
+unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
+  assert(isPRegOption(vreg, option) && "Not a preg option.");
+
+  const AllowedSet& allowedSet = getAllowedSet(vreg);
+  assert(option <= allowedSet.size() && "Option outside allowed set.");
+  return allowedSet[option - 1];
+}
+
+std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
+                                                const LiveIntervals *lis,
+                                                const MachineLoopInfo *loopInfo,
+                                                const RegSet &vregs) {
+
+  typedef std::vector<const LiveInterval*> LIVector;
+
+  MachineRegisterInfo *mri = &mf->getRegInfo();
+  const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();  
+
+  std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
+  PBQP::Graph &g = p->getGraph();
+  RegSet pregs;
+
+  // Collect the set of preg intervals, record that they're used in the MF.
+  for (LiveIntervals::const_iterator itr = lis->begin(), end = lis->end();
+       itr != end; ++itr) {
+    if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
+      pregs.insert(itr->first);
+      mri->setPhysRegUsed(itr->first);
+    }
+  }
+
+  BitVector reservedRegs = tri->getReservedRegs(*mf);
+
+  // Iterate over vregs. 
+  for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
+       vregItr != vregEnd; ++vregItr) {
+    unsigned vreg = *vregItr;
+    const TargetRegisterClass *trc = mri->getRegClass(vreg);
+    const LiveInterval *vregLI = &lis->getInterval(vreg);
+
+    // Compute an initial allowed set for the current vreg.
+    typedef std::vector<unsigned> VRAllowed;
+    VRAllowed vrAllowed;
+    for (TargetRegisterClass::iterator aoItr = trc->allocation_order_begin(*mf),
+                                       aoEnd = trc->allocation_order_end(*mf);
+         aoItr != aoEnd; ++aoItr) {
+      unsigned preg = *aoItr;
+      if (!reservedRegs.test(preg)) {
+        vrAllowed.push_back(preg);
+      }
+    }
+
+    // Remove any physical registers which overlap.
+    for (RegSet::const_iterator pregItr = pregs.begin(),
+                                pregEnd = pregs.end();
+         pregItr != pregEnd; ++pregItr) {
+      unsigned preg = *pregItr;
+      const LiveInterval *pregLI = &lis->getInterval(preg);
+
+      if (pregLI->empty()) {
+        continue;
+      }
+
+      if (!vregLI->overlaps(*pregLI)) {
+        continue;
+      }
+
+      // Remove the register from the allowed set.
+      VRAllowed::iterator eraseItr =
+        std::find(vrAllowed.begin(), vrAllowed.end(), preg);
+
+      if (eraseItr != vrAllowed.end()) {
+        vrAllowed.erase(eraseItr);
+      }
+
+      // Also remove any aliases.
+      const unsigned *aliasItr = tri->getAliasSet(preg);
+      if (aliasItr != 0) {
+        for (; *aliasItr != 0; ++aliasItr) {
+          VRAllowed::iterator eraseItr =
+            std::find(vrAllowed.begin(), vrAllowed.end(), *aliasItr);
+
+          if (eraseItr != vrAllowed.end()) {
+            vrAllowed.erase(eraseItr);
+          }
+        }
+      }
+    }
+
+    // Construct the node.
+    PBQP::Graph::NodeItr node = 
+      g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0));
+
+    // Record the mapping and allowed set in the problem.
+    p->recordVReg(vreg, node, vrAllowed.begin(), vrAllowed.end());
+
+    PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ?
+        vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min();
+
+    addSpillCosts(g.getNodeCosts(node), spillCost);
+  }
+
+  for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end();
+         vr1Itr != vrEnd; ++vr1Itr) {
+    unsigned vr1 = *vr1Itr;
+    const LiveInterval &l1 = lis->getInterval(vr1);
+    const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1);
+
+    for (RegSet::const_iterator vr2Itr = llvm::next(vr1Itr);
+         vr2Itr != vrEnd; ++vr2Itr) {
+      unsigned vr2 = *vr2Itr;
+      const LiveInterval &l2 = lis->getInterval(vr2);
+      const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2);
+
+      assert(!l2.empty() && "Empty interval in vreg set?");
+      if (l1.overlaps(l2)) {
+        PBQP::Graph::EdgeItr edge =
+          g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2),
+                    PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0));
+
+        addInterferenceCosts(g.getEdgeCosts(edge), vr1Allowed, vr2Allowed, tri);
+      }
+    }
+  }
+
+  return p;
+}
+
+void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
+                                PBQP::PBQPNum spillCost) {
+  costVec[0] = spillCost;
+}
+
+void PBQPBuilder::addInterferenceCosts(
+                                    PBQP::Matrix &costMat,
+                                    const PBQPRAProblem::AllowedSet &vr1Allowed,
+                                    const PBQPRAProblem::AllowedSet &vr2Allowed,
+                                    const TargetRegisterInfo *tri) {
+  assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch.");
+  assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch.");
+
+  for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+    unsigned preg1 = vr1Allowed[i];
+
+    for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+      unsigned preg2 = vr2Allowed[j];
+
+      if (tri->regsOverlap(preg1, preg2)) {
+        costMat[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+      }
+    }
+  }
+}
+
+std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
+                                                MachineFunction *mf,
+                                                const LiveIntervals *lis,
+                                                const MachineLoopInfo *loopInfo,
+                                                const RegSet &vregs) {
+
+  std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs);
+  PBQP::Graph &g = p->getGraph();
+
+  const TargetMachine &tm = mf->getTarget();
+  CoalescerPair cp(*tm.getInstrInfo(), *tm.getRegisterInfo());
+
+  // Scan the machine function and add a coalescing cost whenever CoalescerPair
+  // gives the Ok.
+  for (MachineFunction::const_iterator mbbItr = mf->begin(),
+                                       mbbEnd = mf->end();
+       mbbItr != mbbEnd; ++mbbItr) {
+    const MachineBasicBlock *mbb = &*mbbItr;
+
+    for (MachineBasicBlock::const_iterator miItr = mbb->begin(),
+                                           miEnd = mbb->end();
+         miItr != miEnd; ++miItr) {
+      const MachineInstr *mi = &*miItr;
+
+      if (!cp.setRegisters(mi)) {
+        continue; // Not coalescable.
+      }
+
+      if (cp.getSrcReg() == cp.getDstReg()) {
+        continue; // Already coalesced.
+      }
+
+      unsigned dst = cp.getDstReg(),
+               src = cp.getSrcReg();
+
+      const float copyFactor = 0.5; // Cost of copy relative to load. Current
+      // value plucked randomly out of the air.
+                                      
+      PBQP::PBQPNum cBenefit =
+        copyFactor * LiveIntervals::getSpillWeight(false, true,
+                                                   loopInfo->getLoopDepth(mbb));
+
+      if (cp.isPhys()) {
+        if (!lis->isAllocatable(dst)) {
+          continue;
+        }
+
+        const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src);
+        unsigned pregOpt = 0;  
+        while (pregOpt < allowed.size() && allowed[pregOpt] != dst) {
+          ++pregOpt;
+        }
+        if (pregOpt < allowed.size()) {
+          ++pregOpt; // +1 to account for spill option.
+          PBQP::Graph::NodeItr node = p->getNodeForVReg(src);
+          addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit);
+        }
+      } else {
+        const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst);
+        const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src);
+        PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst);
+        PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src);
+        PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2);
+        if (edge == g.edgesEnd()) {
+          edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1,
+                                                      allowed2->size() + 1,
+                                                      0));
+        } else {
+          if (g.getEdgeNode1(edge) == node2) {
+            std::swap(node1, node2);
+            std::swap(allowed1, allowed2);
+          }
+        }
+            
+        addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2,
+                           cBenefit);
+      }
+    }
+  }
+
+  return p;
+}
+
+void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
+                                                   unsigned pregOption,
+                                                   PBQP::PBQPNum benefit) {
+  costVec[pregOption] += -benefit;
+}
+
+void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
+                                    PBQP::Matrix &costMat,
+                                    const PBQPRAProblem::AllowedSet &vr1Allowed,
+                                    const PBQPRAProblem::AllowedSet &vr2Allowed,
+                                    PBQP::PBQPNum benefit) {
+
+  assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch.");
+  assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch.");
+
+  for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+    unsigned preg1 = vr1Allowed[i];
+    for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+      unsigned preg2 = vr2Allowed[j];
+
+      if (preg1 == preg2) {
+        costMat[i + 1][j + 1] += -benefit;
+      } 
+    }
+  }
+}
+
+
+void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+  au.addRequired<SlotIndexes>();
+  au.addPreserved<SlotIndexes>();
+  au.addRequired<LiveIntervals>();
+  //au.addRequiredID(SplitCriticalEdgesID);
+  au.addRequired<RegisterCoalescer>();
+  au.addRequired<CalculateSpillWeights>();
+  au.addRequired<LiveStacks>();
+  au.addPreserved<LiveStacks>();
+  au.addRequired<MachineLoopInfo>();
+  au.addPreserved<MachineLoopInfo>();
+  if (pbqpPreSplitting)
+    au.addRequired<LoopSplitter>();
+  au.addRequired<VirtRegMap>();
+  au.addRequired<RenderMachineFunction>();
+  MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void RegAllocPBQP::findVRegIntervalsToAlloc() {
+
+  // Iterate over all live ranges.
+  for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
+       itr != end; ++itr) {
+
+    // Ignore physical ones.
+    if (TargetRegisterInfo::isPhysicalRegister(itr->first))
+      continue;
+
+    LiveInterval *li = itr->second;
+
+    // If this live interval is non-empty we will use pbqp to allocate it.
+    // Empty intervals we allocate in a simple post-processing stage in
+    // finalizeAlloc.
+    if (!li->empty()) {
+      vregsToAlloc.insert(li->reg);
+    } else {
+      emptyIntervalVRegs.insert(li->reg);
+    }
+  }
+}
+
+void RegAllocPBQP::addStackInterval(const LiveInterval *spilled,
+                                    MachineRegisterInfo* mri) {
+  int stackSlot = vrm->getStackSlot(spilled->reg);
+
+  if (stackSlot == VirtRegMap::NO_STACK_SLOT) {
+    return;
+  }
+
+  const TargetRegisterClass *RC = mri->getRegClass(spilled->reg);
+  LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC);
+
+  VNInfo *vni;
+  if (stackInterval.getNumValNums() != 0) {
+    vni = stackInterval.getValNumInfo(0);
+  } else {
+    vni = stackInterval.getNextValue(
+      SlotIndex(), 0, lss->getVNInfoAllocator());
+  }
+
+  LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
+  stackInterval.MergeRangesInAsValue(rhsInterval, vni);
+}
+
+bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+                                     const PBQP::Solution &solution) {
+  // Set to true if we have any spills
+  bool anotherRoundNeeded = false;
+
+  // Clear the existing allocation.
+  vrm->clearAllVirt();
+
+  const PBQP::Graph &g = problem.getGraph();
+  // Iterate over the nodes mapping the PBQP solution to a register
+  // assignment.
+  for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(),
+                                 nodeEnd = g.nodesEnd();
+       node != nodeEnd; ++node) {
+    unsigned vreg = problem.getVRegForNode(node);
+    unsigned alloc = solution.getSelection(node);
+
+    if (problem.isPRegOption(vreg, alloc)) {
+      unsigned preg = problem.getPRegForOption(vreg, alloc);    
+      DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n");
+      assert(preg != 0 && "Invalid preg selected.");
+      vrm->assignVirt2Phys(vreg, preg);      
+    } else if (problem.isSpillOption(vreg, alloc)) {
+      vregsToAlloc.erase(vreg);
+      const LiveInterval* spillInterval = &lis->getInterval(vreg);
+      double oldWeight = spillInterval->weight;
+      SmallVector<LiveInterval*, 8> spillIs;
+      rmf->rememberUseDefs(spillInterval);
+      std::vector<LiveInterval*> newSpills =
+        lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm);
+      addStackInterval(spillInterval, mri);
+      rmf->rememberSpills(spillInterval, newSpills);
+
+      (void) oldWeight;
+      DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: "
+                   << oldWeight << ", New vregs: ");
+
+      // Copy any newly inserted live intervals into the list of regs to
+      // allocate.
+      for (std::vector<LiveInterval*>::const_iterator
+           itr = newSpills.begin(), end = newSpills.end();
+           itr != end; ++itr) {
+        assert(!(*itr)->empty() && "Empty spill range.");
+        DEBUG(dbgs() << (*itr)->reg << " ");
+        vregsToAlloc.insert((*itr)->reg);
+      }
+
+      DEBUG(dbgs() << ")\n");
+
+      // We need another round if spill intervals were added.
+      anotherRoundNeeded |= !newSpills.empty();
+    } else {
+      assert(false && "Unknown allocation option.");
+    }
+  }
+
+  return !anotherRoundNeeded;
+}
+
+
+void RegAllocPBQP::finalizeAlloc() const {
+  typedef LiveIntervals::iterator LIIterator;
+  typedef LiveInterval::Ranges::const_iterator LRIterator;
+
+  // First allocate registers for the empty intervals.
+  for (RegSet::const_iterator
+         itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end();
+         itr != end; ++itr) {
+    LiveInterval *li = &lis->getInterval(*itr);
+
+    unsigned physReg = vrm->getRegAllocPref(li->reg);
+
+    if (physReg == 0) {
+      const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
+      physReg = *liRC->allocation_order_begin(*mf);
+    }
+
+    vrm->assignVirt2Phys(li->reg, physReg);
+  }
+
+  // Finally iterate over the basic blocks to compute and set the live-in sets.
+  SmallVector<MachineBasicBlock*, 8> liveInMBBs;
+  MachineBasicBlock *entryMBB = &*mf->begin();
+
+  for (LIIterator liItr = lis->begin(), liEnd = lis->end();
+       liItr != liEnd; ++liItr) {
+
+    const LiveInterval *li = liItr->second;
+    unsigned reg = 0;
+
+    // Get the physical register for this interval
+    if (TargetRegisterInfo::isPhysicalRegister(li->reg)) {
+      reg = li->reg;
+    } else if (vrm->isAssignedReg(li->reg)) {
+      reg = vrm->getPhys(li->reg);
+    } else {
+      // Ranges which are assigned a stack slot only are ignored.
+      continue;
+    }
+
+    if (reg == 0) {
+      // Filter out zero regs - they're for intervals that were spilled.
+      continue;
+    }
+
+    // Iterate over the ranges of the current interval...
+    for (LRIterator lrItr = li->begin(), lrEnd = li->end();
+         lrItr != lrEnd; ++lrItr) {
+
+      // Find the set of basic blocks which this range is live into...
+      if (lis->findLiveInMBBs(lrItr->start, lrItr->end,  liveInMBBs)) {
+        // And add the physreg for this interval to their live-in sets.
+        for (unsigned i = 0; i != liveInMBBs.size(); ++i) {
+          if (liveInMBBs[i] != entryMBB) {
+            if (!liveInMBBs[i]->isLiveIn(reg)) {
+              liveInMBBs[i]->addLiveIn(reg);
+            }
+          }
+        }
+        liveInMBBs.clear();
+      }
+    }
+  }
+
+}
+
+bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
+
+  mf = &MF;
+  tm = &mf->getTarget();
+  tri = tm->getRegisterInfo();
+  tii = tm->getInstrInfo();
+  mri = &mf->getRegInfo(); 
+
+  lis = &getAnalysis<LiveIntervals>();
+  lss = &getAnalysis<LiveStacks>();
+  loopInfo = &getAnalysis<MachineLoopInfo>();
+  rmf = &getAnalysis<RenderMachineFunction>();
+
+  vrm = &getAnalysis<VirtRegMap>();
+
+
+  DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n");
+
+  // Allocator main loop:
+  //
+  // * Map current regalloc problem to a PBQP problem
+  // * Solve the PBQP problem
+  // * Map the solution back to a register allocation
+  // * Spill if necessary
+  //
+  // This process is continued till no more spills are generated.
+
+  // Find the vreg intervals in need of allocation.
+  findVRegIntervalsToAlloc();
+
+  // If there are non-empty intervals allocate them using pbqp.
+  if (!vregsToAlloc.empty()) {
+
+    bool pbqpAllocComplete = false;
+    unsigned round = 0;
+
+    while (!pbqpAllocComplete) {
+      DEBUG(dbgs() << "  PBQP Regalloc round " << round << ":\n");
+
+      std::auto_ptr<PBQPRAProblem> problem =
+        builder->build(mf, lis, loopInfo, vregsToAlloc);
+      PBQP::Solution solution =
+        PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(
+          problem->getGraph());
+
+      pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution);
+
+      ++round;
+    }
+  }
+
+  // Finalise allocation, allocate empty ranges.
+  finalizeAlloc();
+
+  rmf->renderMachineFunction("After PBQP register allocation.", vrm);
+
+  vregsToAlloc.clear();
+  emptyIntervalVRegs.clear();
+
+  DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
+
+  // Run rewriter
+  std::auto_ptr<VirtRegRewriter> rewriter(createVirtRegRewriter());
+
+  rewriter->runOnMachineFunction(*mf, *vrm, lis);
+
+  return true;
+}
+
+FunctionPass* llvm::createPBQPRegisterAllocator(
+                                           std::auto_ptr<PBQPBuilder> builder) {
+  return new RegAllocPBQP(builder);
+}
+
+FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
+  if (pbqpCoalescing) {
+    return createPBQPRegisterAllocator(
+             std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing()));
+  } // else
+  return createPBQPRegisterAllocator(
+           std::auto_ptr<PBQPBuilder>(new PBQPBuilder()));
+}
+
+#undef DEBUG_TYPE
diff --git a/final/lib/CodeGen/RegisterCoalescer.cpp b/final/lib/CodeGen/RegisterCoalescer.cpp
new file mode 100644
index 00000000000..407559a211a
--- /dev/null
+++ b/final/lib/CodeGen/RegisterCoalescer.cpp
@@ -0,0 +1,197 @@
+//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the generic RegisterCoalescer interface which
+// is used as the common interface used by all clients and
+// implementations of register coalescing.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+// Register the RegisterCoalescer interface, providing a nice name to refer to.
+INITIALIZE_ANALYSIS_GROUP(RegisterCoalescer, "Register Coalescer", 
+                          SimpleRegisterCoalescing)
+char RegisterCoalescer::ID = 0;
+
+// RegisterCoalescer destructor: DO NOT move this to the header file
+// for RegisterCoalescer or else clients of the RegisterCoalescer
+// class may not depend on the RegisterCoalescer.o file in the current
+// .a file, causing alias analysis support to not be included in the
+// tool correctly!
+//
+RegisterCoalescer::~RegisterCoalescer() {}
+
+unsigned CoalescerPair::compose(unsigned a, unsigned b) const {
+  if (!a) return b;
+  if (!b) return a;
+  return tri_.composeSubRegIndices(a, b);
+}
+
+bool CoalescerPair::isMoveInstr(const MachineInstr *MI,
+                                unsigned &Src, unsigned &Dst,
+                                unsigned &SrcSub, unsigned &DstSub) const {
+  if (MI->isCopy()) {
+    Dst = MI->getOperand(0).getReg();
+    DstSub = MI->getOperand(0).getSubReg();
+    Src = MI->getOperand(1).getReg();
+    SrcSub = MI->getOperand(1).getSubReg();
+  } else if (MI->isSubregToReg()) {
+    Dst = MI->getOperand(0).getReg();
+    DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm());
+    Src = MI->getOperand(2).getReg();
+    SrcSub = MI->getOperand(2).getSubReg();
+  } else
+    return false;
+  return true;
+}
+
+bool CoalescerPair::setRegisters(const MachineInstr *MI) {
+  srcReg_ = dstReg_ = subIdx_ = 0;
+  newRC_ = 0;
+  flipped_ = crossClass_ = false;
+
+  unsigned Src, Dst, SrcSub, DstSub;
+  if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub))
+    return false;
+  partial_ = SrcSub || DstSub;
+
+  // If one register is a physreg, it must be Dst.
+  if (TargetRegisterInfo::isPhysicalRegister(Src)) {
+    if (TargetRegisterInfo::isPhysicalRegister(Dst))
+      return false;
+    std::swap(Src, Dst);
+    std::swap(SrcSub, DstSub);
+    flipped_ = true;
+  }
+
+  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+  if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
+    // Eliminate DstSub on a physreg.
+    if (DstSub) {
+      Dst = tri_.getSubReg(Dst, DstSub);
+      if (!Dst) return false;
+      DstSub = 0;
+    }
+
+    // Eliminate SrcSub by picking a corresponding Dst superregister.
+    if (SrcSub) {
+      Dst = tri_.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
+      if (!Dst) return false;
+      SrcSub = 0;
+    } else if (!MRI.getRegClass(Src)->contains(Dst)) {
+      return false;
+    }
+  } else {
+    // Both registers are virtual.
+
+    // Both registers have subreg indices.
+    if (SrcSub && DstSub) {
+      // For now we only handle the case of identical indices in commensurate
+      // registers: Dreg:ssub_1 + Dreg:ssub_1 -> Dreg
+      // FIXME: Handle Qreg:ssub_3 + Dreg:ssub_1 as QReg:dsub_1 + Dreg.
+      if (SrcSub != DstSub)
+        return false;
+      const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+      const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+      if (!getCommonSubClass(DstRC, SrcRC))
+        return false;
+      SrcSub = DstSub = 0;
+    }
+
+    // There can be no SrcSub.
+    if (SrcSub) {
+      std::swap(Src, Dst);
+      DstSub = SrcSub;
+      SrcSub = 0;
+      assert(!flipped_ && "Unexpected flip");
+      flipped_ = true;
+    }
+
+    // Find the new register class.
+    const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+    const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+    if (DstSub)
+      newRC_ = tri_.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
+    else
+      newRC_ = getCommonSubClass(DstRC, SrcRC);
+    if (!newRC_)
+      return false;
+    crossClass_ = newRC_ != DstRC || newRC_ != SrcRC;
+  }
+  // Check our invariants
+  assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
+  assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
+         "Cannot have a physical SubIdx");
+  srcReg_ = Src;
+  dstReg_ = Dst;
+  subIdx_ = DstSub;
+  return true;
+}
+
+bool CoalescerPair::flip() {
+  if (subIdx_ || TargetRegisterInfo::isPhysicalRegister(dstReg_))
+    return false;
+  std::swap(srcReg_, dstReg_);
+  flipped_ = !flipped_;
+  return true;
+}
+
+bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
+  if (!MI)
+    return false;
+  unsigned Src, Dst, SrcSub, DstSub;
+  if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub))
+    return false;
+
+  // Find the virtual register that is srcReg_.
+  if (Dst == srcReg_) {
+    std::swap(Src, Dst);
+    std::swap(SrcSub, DstSub);
+  } else if (Src != srcReg_) {
+    return false;
+  }
+
+  // Now check that Dst matches dstReg_.
+  if (TargetRegisterInfo::isPhysicalRegister(dstReg_)) {
+    if (!TargetRegisterInfo::isPhysicalRegister(Dst))
+      return false;
+    assert(!subIdx_ && "Inconsistent CoalescerPair state.");
+    // DstSub could be set for a physreg from INSERT_SUBREG.
+    if (DstSub)
+      Dst = tri_.getSubReg(Dst, DstSub);
+    // Full copy of Src.
+    if (!SrcSub)
+      return dstReg_ == Dst;
+    // This is a partial register copy. Check that the parts match.
+    return tri_.getSubReg(dstReg_, SrcSub) == Dst;
+  } else {
+    // dstReg_ is virtual.
+    if (dstReg_ != Dst)
+      return false;
+    // Registers match, do the subregisters line up?
+    return compose(subIdx_, SrcSub) == DstSub;
+  }
+}
+
+// Because of the way .a files work, we must force the SimpleRC
+// implementation to be pulled in if the RegisterCoalescer classes are
+// pulled in.  Otherwise we run the risk of RegisterCoalescer being
+// used, but the default implementation not being linked into the tool
+// that uses it.
+DEFINING_FILE_FOR(RegisterCoalescer)
diff --git a/final/lib/CodeGen/RegisterScavenging.cpp b/final/lib/CodeGen/RegisterScavenging.cpp
new file mode 100644
index 00000000000..b9dd28e5c78
--- /dev/null
+++ b/final/lib/CodeGen/RegisterScavenging.cpp
@@ -0,0 +1,389 @@
+//===-- RegisterScavenging.cpp - Machine register scavenging --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the machine register scavenger. It can provide
+// information, such as unused registers, at any point in a machine basic block.
+// It also provides a mechanism to make registers available by evicting them to
+// spill slots.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reg-scavenging"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+/// setUsed - Set the register and its sub-registers as being used.
+void RegScavenger::setUsed(unsigned Reg) {
+  RegsAvailable.reset(Reg);
+
+  for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+       unsigned SubReg = *SubRegs; ++SubRegs)
+    RegsAvailable.reset(SubReg);
+}
+
+bool RegScavenger::isAliasUsed(unsigned Reg) const {
+  if (isUsed(Reg))
+    return true;
+  for (const unsigned *R = TRI->getAliasSet(Reg); *R; ++R)
+    if (isUsed(*R))
+      return true;
+  return false;
+}
+
+void RegScavenger::initRegState() {
+  ScavengedReg = 0;
+  ScavengedRC = NULL;
+  ScavengeRestore = NULL;
+
+  // All registers started out unused.
+  RegsAvailable.set();
+
+  // Reserved registers are always used.
+  RegsAvailable ^= ReservedRegs;
+
+  if (!MBB)
+    return;
+
+  // Live-in registers are in use.
+  for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+         E = MBB->livein_end(); I != E; ++I)
+    setUsed(*I);
+
+  // Pristine CSRs are also unavailable.
+  BitVector PR = MBB->getParent()->getFrameInfo()->getPristineRegs(MBB);
+  for (int I = PR.find_first(); I>0; I = PR.find_next(I))
+    setUsed(I);
+}
+
+void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
+  MachineFunction &MF = *mbb->getParent();
+  const TargetMachine &TM = MF.getTarget();
+  TII = TM.getInstrInfo();
+  TRI = TM.getRegisterInfo();
+  MRI = &MF.getRegInfo();
+
+  assert((NumPhysRegs == 0 || NumPhysRegs == TRI->getNumRegs()) &&
+         "Target changed?");
+
+  // Self-initialize.
+  if (!MBB) {
+    NumPhysRegs = TRI->getNumRegs();
+    RegsAvailable.resize(NumPhysRegs);
+
+    // Create reserved registers bitvector.
+    ReservedRegs = TRI->getReservedRegs(MF);
+
+    // Create callee-saved registers bitvector.
+    CalleeSavedRegs.resize(NumPhysRegs);
+    const unsigned *CSRegs = TRI->getCalleeSavedRegs();
+    if (CSRegs != NULL)
+      for (unsigned i = 0; CSRegs[i]; ++i)
+        CalleeSavedRegs.set(CSRegs[i]);
+  }
+
+  MBB = mbb;
+  initRegState();
+
+  Tracking = false;
+}
+
+void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) {
+  BV.set(Reg);
+  for (const unsigned *R = TRI->getSubRegisters(Reg); *R; R++)
+    BV.set(*R);
+}
+
+void RegScavenger::addRegWithAliases(BitVector &BV, unsigned Reg) {
+  BV.set(Reg);
+  for (const unsigned *R = TRI->getAliasSet(Reg); *R; R++)
+    BV.set(*R);
+}
+
+void RegScavenger::forward() {
+  // Move ptr forward.
+  if (!Tracking) {
+    MBBI = MBB->begin();
+    Tracking = true;
+  } else {
+    assert(MBBI != MBB->end() && "Already at the end of the basic block!");
+    MBBI = llvm::next(MBBI);
+  }
+
+  MachineInstr *MI = MBBI;
+
+  if (MI == ScavengeRestore) {
+    ScavengedReg = 0;
+    ScavengedRC = NULL;
+    ScavengeRestore = NULL;
+  }
+
+  if (MI->isDebugValue())
+    return;
+
+  // Find out which registers are early clobbered, killed, defined, and marked
+  // def-dead in this instruction.
+  // FIXME: The scavenger is not predication aware. If the instruction is
+  // predicated, conservatively assume "kill" markers do not actually kill the
+  // register. Similarly ignores "dead" markers.
+  bool isPred = TII->isPredicated(MI);
+  BitVector EarlyClobberRegs(NumPhysRegs);
+  BitVector KillRegs(NumPhysRegs);
+  BitVector DefRegs(NumPhysRegs);
+  BitVector DeadRegs(NumPhysRegs);
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isUndef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg || isReserved(Reg))
+      continue;
+
+    if (MO.isUse()) {
+      // Two-address operands implicitly kill.
+      if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i)))
+        addRegWithSubRegs(KillRegs, Reg);
+    } else {
+      assert(MO.isDef());
+      if (!isPred && MO.isDead())
+        addRegWithSubRegs(DeadRegs, Reg);
+      else
+        addRegWithSubRegs(DefRegs, Reg);
+      if (MO.isEarlyClobber())
+        addRegWithAliases(EarlyClobberRegs, Reg);
+    }
+  }
+
+  // Verify uses and defs.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isUndef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg || isReserved(Reg))
+      continue;
+    if (MO.isUse()) {
+      if (!isUsed(Reg)) {
+        // Check if it's partial live: e.g.
+        // D0 = insert_subreg D0<undef>, S0
+        // ... D0
+        // The problem is the insert_subreg could be eliminated. The use of
+        // D0 is using a partially undef value. This is not *incorrect* since
+        // S1 is can be freely clobbered.
+        // Ideally we would like a way to model this, but leaving the
+        // insert_subreg around causes both correctness and performance issues.
+        bool SubUsed = false;
+        for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+             unsigned SubReg = *SubRegs; ++SubRegs)
+          if (isUsed(SubReg)) {
+            SubUsed = true;
+            break;
+          }
+        assert(SubUsed && "Using an undefined register!");
+      }
+      assert((!EarlyClobberRegs.test(Reg) || MI->isRegTiedToDefOperand(i)) &&
+             "Using an early clobbered register!");
+    } else {
+      assert(MO.isDef());
+#if 0
+      // FIXME: Enable this once we've figured out how to correctly transfer
+      // implicit kills during codegen passes like the coalescer.
+      assert((KillRegs.test(Reg) || isUnused(Reg) ||
+              isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) &&
+             "Re-defining a live register!");
+#endif
+    }
+  }
+
+  // Commit the changes.
+  setUnused(KillRegs);
+  setUnused(DeadRegs);
+  setUsed(DefRegs);
+}
+
+void RegScavenger::getRegsUsed(BitVector &used, bool includeReserved) {
+  if (includeReserved)
+    used = ~RegsAvailable;
+  else
+    used = ~RegsAvailable & ~ReservedRegs;
+}
+
+unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
+  for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+       I != E; ++I)
+    if (!isAliasUsed(*I)) {
+      DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) <<
+            "\n");
+      return *I;
+    }
+  return 0;
+}
+
+/// getRegsAvailable - Return all available registers in the register class
+/// in Mask.
+BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
+  BitVector Mask(TRI->getNumRegs());
+  for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+       I != E; ++I)
+    if (!isAliasUsed(*I))
+      Mask.set(*I);
+  return Mask;
+}
+
+/// findSurvivorReg - Return the candidate register that is unused for the
+/// longest after StargMII. UseMI is set to the instruction where the search
+/// stopped.
+///
+/// No more than InstrLimit instructions are inspected.
+///
+unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
+                                       BitVector &Candidates,
+                                       unsigned InstrLimit,
+                                       MachineBasicBlock::iterator &UseMI) {
+  int Survivor = Candidates.find_first();
+  assert(Survivor > 0 && "No candidates for scavenging");
+
+  MachineBasicBlock::iterator ME = MBB->getFirstTerminator();
+  assert(StartMI != ME && "MI already at terminator");
+  MachineBasicBlock::iterator RestorePointMI = StartMI;
+  MachineBasicBlock::iterator MI = StartMI;
+
+  bool inVirtLiveRange = false;
+  for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
+    if (MI->isDebugValue()) {
+      ++InstrLimit; // Don't count debug instructions
+      continue;
+    }
+    bool isVirtKillInsn = false;
+    bool isVirtDefInsn = false;
+    // Remove any candidates touched by instruction.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || MO.isUndef() || !MO.getReg())
+        continue;
+      if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+        if (MO.isDef())
+          isVirtDefInsn = true;
+        else if (MO.isKill())
+          isVirtKillInsn = true;
+        continue;
+      }
+      Candidates.reset(MO.getReg());
+      for (const unsigned *R = TRI->getAliasSet(MO.getReg()); *R; R++)
+        Candidates.reset(*R);
+    }
+    // If we're not in a virtual reg's live range, this is a valid
+    // restore point.
+    if (!inVirtLiveRange) RestorePointMI = MI;
+
+    // Update whether we're in the live range of a virtual register
+    if (isVirtKillInsn) inVirtLiveRange = false;
+    if (isVirtDefInsn) inVirtLiveRange = true;
+
+    // Was our survivor untouched by this instruction?
+    if (Candidates.test(Survivor))
+      continue;
+
+    // All candidates gone?
+    if (Candidates.none())
+      break;
+
+    Survivor = Candidates.find_first();
+  }
+  // If we ran off the end, that's where we want to restore.
+  if (MI == ME) RestorePointMI = ME;
+  assert (RestorePointMI != StartMI &&
+          "No available scavenger restore location!");
+
+  // We ran out of candidates, so stop the search.
+  UseMI = RestorePointMI;
+  return Survivor;
+}
+
+unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
+                                        MachineBasicBlock::iterator I,
+                                        int SPAdj) {
+  // Consider all allocatable registers in the register class initially
+  BitVector Candidates =
+    TRI->getAllocatableSet(*I->getParent()->getParent(), RC);
+
+  // Exclude all the registers being used by the instruction.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = I->getOperand(i);
+    if (MO.isReg() && MO.getReg() != 0 &&
+        !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+      Candidates.reset(MO.getReg());
+  }
+
+  // Try to find a register that's unused if there is one, as then we won't
+  // have to spill. Search explicitly rather than masking out based on
+  // RegsAvailable, as RegsAvailable does not take aliases into account.
+  // That's what getRegsAvailable() is for.
+  BitVector Available = getRegsAvailable(RC);
+
+  if ((Candidates & Available).any())
+     Candidates &= Available;
+
+  // Find the register whose use is furthest away.
+  MachineBasicBlock::iterator UseMI;
+  unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
+
+  // If we found an unused register there is no reason to spill it.
+  if (!isAliasUsed(SReg)) {
+    DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n");
+    return SReg;
+  }
+
+  assert(ScavengedReg == 0 &&
+         "Scavenger slot is live, unable to scavenge another register!");
+
+  // Avoid infinite regress
+  ScavengedReg = SReg;
+
+  // If the target knows how to save/restore the register, let it do so;
+  // otherwise, use the emergency stack spill slot.
+  if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) {
+    // Spill the scavenged register before I.
+    assert(ScavengingFrameIndex >= 0 &&
+           "Cannot scavenge register without an emergency spill slot!");
+    TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI);
+    MachineBasicBlock::iterator II = prior(I);
+    TRI->eliminateFrameIndex(II, SPAdj, this);
+
+    // Restore the scavenged register before its use (or first terminator).
+    TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI);
+    II = prior(UseMI);
+    TRI->eliminateFrameIndex(II, SPAdj, this);
+  }
+
+  ScavengeRestore = prior(UseMI);
+
+  // Doing this here leads to infinite regress.
+  // ScavengedReg = SReg;
+  ScavengedRC = RC;
+
+  DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) <<
+        "\n");
+
+  return SReg;
+}
diff --git a/final/lib/CodeGen/RenderMachineFunction.cpp b/final/lib/CodeGen/RenderMachineFunction.cpp
new file mode 100644
index 00000000000..cbfd5a23d63
--- /dev/null
+++ b/final/lib/CodeGen/RenderMachineFunction.cpp
@@ -0,0 +1,1014 @@
+//===-- llvm/CodeGen/RenderMachineFunction.cpp - MF->HTML -----s-----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "rendermf"
+
+#include "RenderMachineFunction.h"
+
+#include "VirtRegMap.h"
+
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <sstream>
+
+using namespace llvm;
+
+char RenderMachineFunction::ID = 0;
+INITIALIZE_PASS_BEGIN(RenderMachineFunction, "rendermf",
+                "Render machine functions (and related info) to HTML pages",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(RenderMachineFunction, "rendermf",
+                "Render machine functions (and related info) to HTML pages",
+                false, false)
+
+static cl::opt<std::string>
+outputFileSuffix("rmf-file-suffix",
+                 cl::desc("Appended to function name to get output file name "
+                          "(default: \".html\")"),
+                 cl::init(".html"), cl::Hidden);
+
+static cl::opt<std::string>
+machineFuncsToRender("rmf-funcs",
+                     cl::desc("Coma seperated list of functions to render"
+                              ", or \"*\"."),
+                     cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+pressureClasses("rmf-classes",
+                cl::desc("Register classes to render pressure for."),
+                cl::init(""), cl::Hidden);
+
+static cl::opt<std::string>
+showIntervals("rmf-intervals",
+              cl::desc("Live intervals to show alongside code."),
+              cl::init(""), cl::Hidden);
+
+static cl::opt<bool>
+filterEmpty("rmf-filter-empty-intervals",
+            cl::desc("Don't display empty intervals."),
+            cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+showEmptyIndexes("rmf-empty-indexes",
+                 cl::desc("Render indexes not associated with instructions or "
+                          "MBB starts."),
+                 cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+useFancyVerticals("rmf-fancy-verts",
+                  cl::desc("Use SVG for vertical text."),
+                  cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+prettyHTML("rmf-pretty-html",
+           cl::desc("Pretty print HTML. For debugging the renderer only.."),
+           cl::init(false), cl::Hidden);
+
+
+namespace llvm {
+
+  bool MFRenderingOptions::renderingOptionsProcessed;
+  std::set<std::string> MFRenderingOptions::mfNamesToRender;
+  bool MFRenderingOptions::renderAllMFs = false;
+
+  std::set<std::string> MFRenderingOptions::classNamesToRender;
+  bool MFRenderingOptions::renderAllClasses = false;
+
+  std::set<std::pair<unsigned, unsigned> >
+    MFRenderingOptions::intervalNumsToRender;
+  unsigned MFRenderingOptions::intervalTypesToRender = ExplicitOnly;
+
+  template <typename OutputItr>
+  void MFRenderingOptions::splitComaSeperatedList(const std::string &s,
+                                                         OutputItr outItr) {
+    std::string::const_iterator curPos = s.begin();
+    std::string::const_iterator nextComa = std::find(curPos, s.end(), ',');
+    while (nextComa != s.end()) {
+      std::string elem;
+      std::copy(curPos, nextComa, std::back_inserter(elem));
+      *outItr = elem;
+      ++outItr;
+      curPos = llvm::next(nextComa);
+      nextComa = std::find(curPos, s.end(), ',');
+    }
+
+    if (curPos != s.end()) {
+      std::string elem;
+      std::copy(curPos, s.end(), std::back_inserter(elem));
+      *outItr = elem;
+      ++outItr;
+    }
+  }
+
+  void MFRenderingOptions::processOptions() {
+    if (!renderingOptionsProcessed) {
+      processFuncNames();
+      processRegClassNames();
+      processIntervalNumbers();
+      renderingOptionsProcessed = true;
+    }
+  }
+
+  void MFRenderingOptions::processFuncNames() {
+    if (machineFuncsToRender == "*") {
+      renderAllMFs = true;
+    } else {
+      splitComaSeperatedList(machineFuncsToRender,
+                             std::inserter(mfNamesToRender,
+                                           mfNamesToRender.begin()));
+    }
+  }
+
+  void MFRenderingOptions::processRegClassNames() {
+    if (pressureClasses == "*") {
+      renderAllClasses = true;
+    } else {
+      splitComaSeperatedList(pressureClasses,
+                             std::inserter(classNamesToRender,
+                                           classNamesToRender.begin()));
+    }
+  }
+
+  void MFRenderingOptions::processIntervalNumbers() {
+    std::set<std::string> intervalRanges;
+    splitComaSeperatedList(showIntervals,
+                           std::inserter(intervalRanges,
+                                         intervalRanges.begin()));
+    std::for_each(intervalRanges.begin(), intervalRanges.end(),
+                  processIntervalRange);
+  }
+
+  void MFRenderingOptions::processIntervalRange(
+                                          const std::string &intervalRangeStr) {
+    if (intervalRangeStr == "*") {
+      intervalTypesToRender |= All;
+    } else if (intervalRangeStr == "virt-nospills*") {
+      intervalTypesToRender |= VirtNoSpills;
+    } else if (intervalRangeStr == "spills*") {
+      intervalTypesToRender |= VirtSpills;
+    } else if (intervalRangeStr == "virt*") {
+      intervalTypesToRender |= AllVirt;
+    } else if (intervalRangeStr == "phys*") {
+      intervalTypesToRender |= AllPhys;
+    } else {
+      std::istringstream iss(intervalRangeStr);
+      unsigned reg1, reg2;
+      if ((iss >> reg1 >> std::ws)) {
+        if (iss.eof()) {
+          intervalNumsToRender.insert(std::make_pair(reg1, reg1 + 1));
+        } else {
+          char c;
+          iss >> c;
+          if (c == '-' && (iss >> reg2)) {
+            intervalNumsToRender.insert(std::make_pair(reg1, reg2 + 1));
+          } else {
+            dbgs() << "Warning: Invalid interval range \""
+                   << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n";
+          }
+        }
+      } else {
+        dbgs() << "Warning: Invalid interval number \""
+               << intervalRangeStr << "\" in -rmf-intervals. Skipping.\n";
+      }
+    }
+  }
+
+  void MFRenderingOptions::setup(MachineFunction *mf,
+                                 const TargetRegisterInfo *tri,
+                                 LiveIntervals *lis,
+                                 const RenderMachineFunction *rmf) {
+    this->mf = mf;
+    this->tri = tri;
+    this->lis = lis;
+    this->rmf = rmf;
+
+    clear();
+  }
+
+  void MFRenderingOptions::clear() {
+    regClassesTranslatedToCurrentFunction = false;
+    regClassSet.clear();
+
+    intervalsTranslatedToCurrentFunction = false;
+    intervalSet.clear();
+  }
+
+  void MFRenderingOptions::resetRenderSpecificOptions() {
+    intervalSet.clear();
+    intervalsTranslatedToCurrentFunction = false;
+  }
+
+  bool MFRenderingOptions::shouldRenderCurrentMachineFunction() const {
+    processOptions();
+
+    return (renderAllMFs ||
+            mfNamesToRender.find(mf->getFunction()->getName()) !=
+              mfNamesToRender.end());    
+  }
+
+  const MFRenderingOptions::RegClassSet& MFRenderingOptions::regClasses() const{
+    translateRegClassNamesToCurrentFunction();
+    return regClassSet;
+  }
+
+  const MFRenderingOptions::IntervalSet& MFRenderingOptions::intervals() const {
+    translateIntervalNumbersToCurrentFunction();
+    return intervalSet;
+  }
+
+  bool MFRenderingOptions::renderEmptyIndexes() const {
+    return showEmptyIndexes;
+  }
+
+  bool MFRenderingOptions::fancyVerticals() const {
+    return useFancyVerticals;
+  }
+
+  void MFRenderingOptions::translateRegClassNamesToCurrentFunction() const {
+    if (!regClassesTranslatedToCurrentFunction) {
+      processOptions();
+      for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+                                                 rcEnd = tri->regclass_end();
+           rcItr != rcEnd; ++rcItr) {
+        const TargetRegisterClass *trc = *rcItr;
+        if (renderAllClasses ||
+            classNamesToRender.find(trc->getName()) !=
+              classNamesToRender.end()) {
+          regClassSet.insert(trc);
+        }
+      }
+      regClassesTranslatedToCurrentFunction = true;
+    }
+  }
+
+  void MFRenderingOptions::translateIntervalNumbersToCurrentFunction() const {
+    if (!intervalsTranslatedToCurrentFunction) {
+      processOptions();
+
+      // If we're not just doing explicit then do a copy over all matching
+      // types.
+      if (intervalTypesToRender != ExplicitOnly) {
+        for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
+             liItr != liEnd; ++liItr) {
+          LiveInterval *li = liItr->second;
+
+          if (filterEmpty && li->empty())
+            continue;
+
+          if ((TargetRegisterInfo::isPhysicalRegister(li->reg) &&
+               (intervalTypesToRender & AllPhys))) {
+            intervalSet.insert(li);
+          } else if (TargetRegisterInfo::isVirtualRegister(li->reg)) {
+            if (((intervalTypesToRender & VirtNoSpills) && !rmf->isSpill(li)) || 
+                ((intervalTypesToRender & VirtSpills) && rmf->isSpill(li))) {
+              intervalSet.insert(li);
+            }
+          }
+        }
+      }
+
+      // If we need to process the explicit list...
+      if (intervalTypesToRender != All) {
+        for (std::set<std::pair<unsigned, unsigned> >::const_iterator
+               regRangeItr = intervalNumsToRender.begin(),
+               regRangeEnd = intervalNumsToRender.end();
+             regRangeItr != regRangeEnd; ++regRangeItr) {
+          const std::pair<unsigned, unsigned> &range = *regRangeItr;
+          for (unsigned reg = range.first; reg != range.second; ++reg) {
+            if (lis->hasInterval(reg)) {
+              intervalSet.insert(&lis->getInterval(reg));
+            }
+          }
+        }
+      }
+
+      intervalsTranslatedToCurrentFunction = true;
+    }
+  }
+
+  // ---------- TargetRegisterExtraInformation implementation ----------
+
+  TargetRegisterExtraInfo::TargetRegisterExtraInfo()
+    : mapsPopulated(false) {
+  }
+
+  void TargetRegisterExtraInfo::setup(MachineFunction *mf,
+                                      MachineRegisterInfo *mri,
+                                      const TargetRegisterInfo *tri,
+                                      LiveIntervals *lis) {
+    this->mf = mf;
+    this->mri = mri;
+    this->tri = tri;
+    this->lis = lis;
+  }
+
+  void TargetRegisterExtraInfo::reset() {
+    if (!mapsPopulated) {
+      initWorst();
+      //initBounds();
+      initCapacity();
+      mapsPopulated = true;
+    }
+
+    resetPressureAndLiveStates();
+  }
+
+  void TargetRegisterExtraInfo::clear() {
+    prWorst.clear();
+    vrWorst.clear();
+    capacityMap.clear();
+    pressureMap.clear();
+    //liveStatesMap.clear();
+    mapsPopulated = false;
+  }
+
+  void TargetRegisterExtraInfo::initWorst() {
+    assert(!mapsPopulated && prWorst.empty() && vrWorst.empty() &&
+           "Worst map already initialised?");
+
+    // Start with the physical registers.
+    for (unsigned preg = 1; preg < tri->getNumRegs(); ++preg) {
+      WorstMapLine &pregLine = prWorst[preg];
+
+      for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+                                                 rcEnd = tri->regclass_end();
+           rcItr != rcEnd; ++rcItr) {
+        const TargetRegisterClass *trc = *rcItr;
+
+        unsigned numOverlaps = 0;
+        for (TargetRegisterClass::iterator rItr = trc->begin(),
+                                           rEnd = trc->end();
+             rItr != rEnd; ++rItr) {
+          unsigned trcPReg = *rItr;
+          if (tri->regsOverlap(preg, trcPReg))
+            ++numOverlaps;
+        }
+        
+        pregLine[trc] = numOverlaps;
+      }
+    }
+
+    // Now the register classes.
+    for (TargetRegisterInfo::regclass_iterator rc1Itr = tri->regclass_begin(),
+                                               rcEnd = tri->regclass_end();
+         rc1Itr != rcEnd; ++rc1Itr) {
+      const TargetRegisterClass *trc1 = *rc1Itr;
+      WorstMapLine &classLine = vrWorst[trc1];
+
+      for (TargetRegisterInfo::regclass_iterator rc2Itr = tri->regclass_begin();
+           rc2Itr != rcEnd; ++rc2Itr) {
+        const TargetRegisterClass *trc2 = *rc2Itr;
+
+        unsigned worst = 0;
+
+        for (TargetRegisterClass::iterator trc1Itr = trc1->begin(),
+                                           trc1End = trc1->end();
+             trc1Itr != trc1End; ++trc1Itr) {
+          unsigned trc1Reg = *trc1Itr;
+          unsigned trc1RegWorst = 0;
+
+          for (TargetRegisterClass::iterator trc2Itr = trc2->begin(),
+                                             trc2End = trc2->end();
+               trc2Itr != trc2End; ++trc2Itr) {
+            unsigned trc2Reg = *trc2Itr;
+            if (tri->regsOverlap(trc1Reg, trc2Reg))
+              ++trc1RegWorst;
+          }
+          if (trc1RegWorst > worst) {
+            worst = trc1RegWorst;
+          }    
+        }
+
+        if (worst != 0) {
+          classLine[trc2] = worst;
+        }
+      }
+    }
+  }
+
+  unsigned TargetRegisterExtraInfo::getWorst(
+                                        unsigned reg,
+                                        const TargetRegisterClass *trc) const {
+    const WorstMapLine *wml = 0;
+    if (TargetRegisterInfo::isPhysicalRegister(reg)) {
+      PRWorstMap::const_iterator prwItr = prWorst.find(reg);
+      assert(prwItr != prWorst.end() && "Missing prWorst entry.");
+      wml = &prwItr->second;
+    } else {
+      const TargetRegisterClass *regTRC = mri->getRegClass(reg);
+      VRWorstMap::const_iterator vrwItr = vrWorst.find(regTRC);
+      assert(vrwItr != vrWorst.end() && "Missing vrWorst entry.");
+      wml = &vrwItr->second;
+    }
+    
+    WorstMapLine::const_iterator wmlItr = wml->find(trc);
+    if (wmlItr == wml->end())
+      return 0;
+
+    return wmlItr->second;
+  }
+
+  void TargetRegisterExtraInfo::initCapacity() {
+    assert(!mapsPopulated && capacityMap.empty() &&
+           "Capacity map already initialised?");
+
+    for (TargetRegisterInfo::regclass_iterator rcItr = tri->regclass_begin(),
+           rcEnd = tri->regclass_end();
+         rcItr != rcEnd; ++rcItr) {
+      const TargetRegisterClass *trc = *rcItr;
+      unsigned capacity = std::distance(trc->allocation_order_begin(*mf),
+                                        trc->allocation_order_end(*mf));
+
+      if (capacity != 0)
+        capacityMap[trc] = capacity;
+    }
+  }
+
+  unsigned TargetRegisterExtraInfo::getCapacity(
+                                         const TargetRegisterClass *trc) const {
+    CapacityMap::const_iterator cmItr = capacityMap.find(trc);
+    assert(cmItr != capacityMap.end() &&
+           "vreg with unallocable register class");
+    return cmItr->second;
+  }
+
+  void TargetRegisterExtraInfo::resetPressureAndLiveStates() {
+    pressureMap.clear();
+    //liveStatesMap.clear();
+
+    // Iterate over all slots.
+    
+
+    // Iterate over all live intervals.
+    for (LiveIntervals::iterator liItr = lis->begin(),
+           liEnd = lis->end();
+         liItr != liEnd; ++liItr) {
+      LiveInterval *li = liItr->second;
+
+      if (TargetRegisterInfo::isPhysicalRegister(li->reg))
+        continue;
+      
+      // For all ranges in the current interal.
+      for (LiveInterval::iterator lrItr = li->begin(),
+             lrEnd = li->end();
+           lrItr != lrEnd; ++lrItr) {
+        LiveRange *lr = &*lrItr;
+        
+        // For all slots in the current range.
+        for (SlotIndex i = lr->start; i != lr->end; i = i.getNextSlot()) {
+
+          // Record increased pressure at index for all overlapping classes.
+          for (TargetRegisterInfo::regclass_iterator
+                 rcItr = tri->regclass_begin(),
+                 rcEnd = tri->regclass_end();
+               rcItr != rcEnd; ++rcItr) {
+            const TargetRegisterClass *trc = *rcItr;
+
+            if (trc->allocation_order_begin(*mf) ==
+                trc->allocation_order_end(*mf))
+              continue;
+
+            unsigned worstAtI = getWorst(li->reg, trc);
+
+            if (worstAtI != 0) {
+              pressureMap[i][trc] += worstAtI;
+            }
+          }
+        }
+      }
+    } 
+  }
+
+  unsigned TargetRegisterExtraInfo::getPressureAtSlot(
+                                                 const TargetRegisterClass *trc,
+                                                 SlotIndex i) const {
+    PressureMap::const_iterator pmItr = pressureMap.find(i);
+    if (pmItr == pressureMap.end())
+      return 0;
+    const PressureMapLine &pmLine = pmItr->second;
+    PressureMapLine::const_iterator pmlItr = pmLine.find(trc);
+    if (pmlItr == pmLine.end())
+      return 0;
+    return pmlItr->second;
+  }
+
+  bool TargetRegisterExtraInfo::classOverCapacityAtSlot(
+                                                 const TargetRegisterClass *trc,
+                                                 SlotIndex i) const {
+    return (getPressureAtSlot(trc, i) > getCapacity(trc));
+  }
+
+  // ---------- MachineFunctionRenderer implementation ----------
+
+  void RenderMachineFunction::Spacer::print(raw_ostream &os) const {
+    if (!prettyHTML)
+      return;
+    for (unsigned i = 0; i < ns; ++i) {
+      os << " ";
+    }
+  }
+
+  RenderMachineFunction::Spacer RenderMachineFunction::s(unsigned ns) const {
+    return Spacer(ns);
+  }
+
+  raw_ostream& operator<<(raw_ostream &os, const RenderMachineFunction::Spacer &s) {
+    s.print(os);
+    return os;
+  }
+
+  template <typename Iterator>
+  std::string RenderMachineFunction::escapeChars(Iterator sBegin, Iterator sEnd) const {
+    std::string r;
+
+    for (Iterator sItr = sBegin; sItr != sEnd; ++sItr) {
+      char c = *sItr;
+
+      switch (c) {
+        case '<': r.append("&lt;"); break;
+        case '>': r.append("&gt;"); break;
+        case '&': r.append("&amp;"); break;
+        case ' ': r.append("&nbsp;"); break;
+        case '\"': r.append("&quot;"); break;
+        default: r.push_back(c); break;
+      }
+    }
+
+    return r;
+  }
+
+  RenderMachineFunction::LiveState
+  RenderMachineFunction::getLiveStateAt(const LiveInterval *li,
+                                        SlotIndex i) const {
+    const MachineInstr *mi = sis->getInstructionFromIndex(i);
+
+    // For uses/defs recorded use/def indexes override current liveness and
+    // instruction operands (Only for the interval which records the indexes).
+    if (i.isUse() || i.isDef()) {
+      UseDefs::const_iterator udItr = useDefs.find(li);
+      if (udItr != useDefs.end()) {
+        const SlotSet &slotSet = udItr->second;
+        if (slotSet.count(i)) {
+          if (i.isUse()) {
+            return Used;
+          }
+          // else
+          return Defined;
+        }
+      }
+    }
+
+    // If the slot is a load/store, or there's no info in the use/def set then
+    // use liveness and instruction operand info.
+    if (li->liveAt(i)) {
+
+      if (mi == 0) {
+        if (vrm == 0 || 
+            (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) {
+          return AliveReg;
+        } else {
+          return AliveStack;
+        }
+      } else {
+        if (i.isDef() && mi->definesRegister(li->reg, tri)) {
+          return Defined;
+        } else if (i.isUse() && mi->readsRegister(li->reg)) {
+          return Used;
+        } else {
+          if (vrm == 0 || 
+              (vrm->getStackSlot(li->reg) == VirtRegMap::NO_STACK_SLOT)) {
+            return AliveReg;
+          } else {
+            return AliveStack;
+          }
+        }
+      }
+    }
+    return Dead;
+  }
+
+  RenderMachineFunction::PressureState
+  RenderMachineFunction::getPressureStateAt(const TargetRegisterClass *trc,
+                                              SlotIndex i) const {
+    if (trei.getPressureAtSlot(trc, i) == 0) {
+      return Zero;
+    } else if (trei.classOverCapacityAtSlot(trc, i)){
+      return High;
+    }
+    return Low;
+  }
+
+  /// \brief Render a machine instruction.
+  void RenderMachineFunction::renderMachineInstr(raw_ostream &os,
+                                                 const MachineInstr *mi) const {
+    std::string s;
+    raw_string_ostream oss(s);
+    oss << *mi;
+
+    os << escapeChars(oss.str());
+  }
+
+  template <typename T>
+  void RenderMachineFunction::renderVertical(const Spacer &indent,
+                                             raw_ostream &os,
+                                             const T &t) const {
+    if (ro.fancyVerticals()) {
+      os << indent << "<object\n"
+         << indent + s(2) << "class=\"obj\"\n"
+         << indent + s(2) << "type=\"image/svg+xml\"\n"
+         << indent + s(2) << "width=\"14px\"\n"
+         << indent + s(2) << "height=\"55px\"\n"
+         << indent + s(2) << "data=\"data:image/svg+xml,\n"
+         << indent + s(4) << "<svg xmlns='http://www.w3.org/2000/svg'>\n"
+         << indent + s(6) << "<text x='-55' y='10' "
+                             "font-family='Courier' font-size='12' "
+                             "transform='rotate(-90)' "
+                             "text-rendering='optimizeSpeed' "
+                             "fill='#000'>" << t << "</text>\n"
+         << indent + s(4) << "</svg>\">\n"
+         << indent << "</object>\n";
+    } else {
+      std::ostringstream oss;
+      oss << t;
+      std::string tStr(oss.str());
+
+      os << indent;
+      for (std::string::iterator tStrItr = tStr.begin(), tStrEnd = tStr.end();
+           tStrItr != tStrEnd; ++tStrItr) {
+        os << *tStrItr << "<br/>";
+      }
+      os << "\n";
+    }
+  }
+
+  void RenderMachineFunction::insertCSS(const Spacer &indent,
+                                        raw_ostream &os) const {
+    os << indent << "<style type=\"text/css\">\n"
+       << indent + s(2) << "body { font-color: black; }\n"
+       << indent + s(2) << "table.code td { font-family: monospace; "
+                    "border-width: 0px; border-style: solid; "
+                    "border-bottom: 1px solid #dddddd; white-space: nowrap; }\n"
+       << indent + s(2) << "table.code td.p-z { background-color: #000000; }\n"
+       << indent + s(2) << "table.code td.p-l { background-color: #00ff00; }\n"
+       << indent + s(2) << "table.code td.p-h { background-color: #ff0000; }\n"
+       << indent + s(2) << "table.code td.l-n { background-color: #ffffff; }\n"
+       << indent + s(2) << "table.code td.l-d { background-color: #ff0000; }\n"
+       << indent + s(2) << "table.code td.l-u { background-color: #ffff00; }\n"
+       << indent + s(2) << "table.code td.l-r { background-color: #000000; }\n"
+       << indent + s(2) << "table.code td.l-s { background-color: #770000; }\n"
+       << indent + s(2) << "table.code th { border-width: 0px; "
+                    "border-style: solid; }\n"
+       << indent << "</style>\n";
+  }
+
+  void RenderMachineFunction::renderFunctionSummary(
+                                    const Spacer &indent, raw_ostream &os,
+                                    const char * const renderContextStr) const {
+    os << indent << "<h1>Function: " << mf->getFunction()->getName()
+                 << "</h1>\n"
+       << indent << "<h2>Rendering context: " << renderContextStr << "</h2>\n";
+  }
+
+
+  void RenderMachineFunction::renderPressureTableLegend(
+                                                      const Spacer &indent,
+                                                      raw_ostream &os) const {
+    os << indent << "<h2>Rendering Pressure Legend:</h2>\n"
+       << indent << "<table class=\"code\">\n"
+       << indent + s(2) << "<tr>\n"
+       << indent + s(4) << "<th>Pressure</th><th>Description</th>"
+                    "<th>Appearance</th>\n"
+       << indent + s(2) << "</tr>\n"
+       << indent + s(2) << "<tr>\n"
+       << indent + s(4) << "<td>No Pressure</td>"
+                    "<td>No physical registers of this class requested.</td>"
+                    "<td class=\"p-z\">&nbsp;&nbsp;</td>\n"
+       << indent + s(2) << "</tr>\n"
+       << indent + s(2) << "<tr>\n"
+       << indent + s(4) << "<td>Low Pressure</td>"
+                    "<td>Sufficient physical registers to meet demand.</td>"
+                    "<td class=\"p-l\">&nbsp;&nbsp;</td>\n"
+       << indent + s(2) << "</tr>\n"
+       << indent + s(2) << "<tr>\n"
+       << indent + s(4) << "<td>High Pressure</td>"
+                    "<td>Potentially insufficient physical registers to meet demand.</td>"
+                    "<td class=\"p-h\">&nbsp;&nbsp;</td>\n"
+       << indent + s(2) << "</tr>\n"
+       << indent << "</table>\n";
+  }
+
+  template <typename CellType>
+  void RenderMachineFunction::renderCellsWithRLE(
+                   const Spacer &indent, raw_ostream &os,
+                   const std::pair<CellType, unsigned> &rleAccumulator,
+                   const std::map<CellType, std::string> &cellTypeStrs) const {
+
+    if (rleAccumulator.second == 0)
+      return; 
+
+    typename std::map<CellType, std::string>::const_iterator ctsItr =
+      cellTypeStrs.find(rleAccumulator.first);
+
+    assert(ctsItr != cellTypeStrs.end() && "No string for given cell type.");
+
+    os << indent + s(4) << "<td class=\"" << ctsItr->second << "\"";
+    if (rleAccumulator.second > 1)
+      os << " colspan=" << rleAccumulator.second;
+    os << "></td>\n";
+  }
+
+
+  void RenderMachineFunction::renderCodeTablePlusPI(const Spacer &indent,
+                                                    raw_ostream &os) const {
+
+    std::map<LiveState, std::string> lsStrs;
+    lsStrs[Dead] = "l-n";
+    lsStrs[Defined] = "l-d";
+    lsStrs[Used] = "l-u";
+    lsStrs[AliveReg] = "l-r";
+    lsStrs[AliveStack] = "l-s";
+
+    std::map<PressureState, std::string> psStrs;
+    psStrs[Zero] = "p-z";
+    psStrs[Low] = "p-l";
+    psStrs[High] = "p-h";
+
+    // Open the table... 
+
+    os << indent << "<table cellpadding=0 cellspacing=0 class=\"code\">\n"
+       << indent + s(2) << "<tr>\n";
+
+    // Render the header row...
+
+    os << indent + s(4) << "<th>index</th>\n"
+       << indent + s(4) << "<th>instr</th>\n";
+
+    // Render class names if necessary...
+    if (!ro.regClasses().empty()) {
+      for (MFRenderingOptions::RegClassSet::const_iterator
+             rcItr = ro.regClasses().begin(),
+             rcEnd = ro.regClasses().end();
+           rcItr != rcEnd; ++rcItr) {
+        const TargetRegisterClass *trc = *rcItr;
+        os << indent + s(4) << "<th>\n";
+        renderVertical(indent + s(6), os, trc->getName());
+        os << indent + s(4) << "</th>\n";
+      }
+    }
+
+    // FIXME: Is there a nicer way to insert space between columns in HTML?
+    if (!ro.regClasses().empty() && !ro.intervals().empty())
+      os << indent + s(4) << "<th>&nbsp;&nbsp;</th>\n";
+
+    // Render interval numbers if necessary...
+    if (!ro.intervals().empty()) {
+      for (MFRenderingOptions::IntervalSet::const_iterator
+             liItr = ro.intervals().begin(),
+             liEnd = ro.intervals().end();
+           liItr != liEnd; ++liItr) {
+
+        const LiveInterval *li = *liItr;
+        os << indent + s(4) << "<th>\n";
+        renderVertical(indent + s(6), os, li->reg);
+        os << indent + s(4) << "</th>\n";
+      }
+    }
+
+    os << indent + s(2) << "</tr>\n";
+
+    // End header row, start with the data rows...
+
+    MachineInstr *mi = 0;
+
+    // Data rows:
+    for (SlotIndex i = sis->getZeroIndex(); i != sis->getLastIndex();
+         i = i.getNextSlot()) {
+     
+      // Render the slot column. 
+      os << indent + s(2) << "<tr height=6ex>\n";
+      
+      // Render the code column.
+      if (i.isLoad()) {
+        MachineBasicBlock *mbb = sis->getMBBFromIndex(i);
+        mi = sis->getInstructionFromIndex(i);
+
+        if (i == sis->getMBBStartIdx(mbb) || mi != 0 ||
+            ro.renderEmptyIndexes()) {
+          os << indent + s(4) << "<td rowspan=4>" << i << "&nbsp;</td>\n"
+             << indent + s(4) << "<td rowspan=4>\n";
+
+          if (i == sis->getMBBStartIdx(mbb)) {
+            os << indent + s(6) << "BB#" << mbb->getNumber() << ":&nbsp;\n";
+          } else if (mi != 0) {
+            os << indent + s(6) << "&nbsp;&nbsp;";
+            renderMachineInstr(os, mi);
+          } else {
+            // Empty interval - leave blank.
+          }
+          os << indent + s(4) << "</td>\n";
+        } else {
+          i = i.getStoreIndex(); // <- Will be incremented to the next index.
+          continue;
+        }
+      }
+
+      // Render the class columns.
+      if (!ro.regClasses().empty()) {
+        std::pair<PressureState, unsigned> psRLEAccumulator(Zero, 0);
+        for (MFRenderingOptions::RegClassSet::const_iterator
+               rcItr = ro.regClasses().begin(),
+               rcEnd = ro.regClasses().end();
+             rcItr != rcEnd; ++rcItr) {
+          const TargetRegisterClass *trc = *rcItr;
+          PressureState newPressure = getPressureStateAt(trc, i);
+
+          if (newPressure == psRLEAccumulator.first) {
+            ++psRLEAccumulator.second;
+          } else {
+            renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs);
+            psRLEAccumulator.first = newPressure;
+            psRLEAccumulator.second = 1;
+          }
+        }
+        renderCellsWithRLE(indent + s(4), os, psRLEAccumulator, psStrs);
+      }
+  
+      // FIXME: Is there a nicer way to insert space between columns in HTML?
+      if (!ro.regClasses().empty() && !ro.intervals().empty())
+        os << indent + s(4) << "<td width=2em></td>\n";
+
+      if (!ro.intervals().empty()) {
+        std::pair<LiveState, unsigned> lsRLEAccumulator(Dead, 0);
+        for (MFRenderingOptions::IntervalSet::const_iterator
+               liItr = ro.intervals().begin(),
+               liEnd = ro.intervals().end();
+             liItr != liEnd; ++liItr) {
+          const LiveInterval *li = *liItr;
+          LiveState newLiveness = getLiveStateAt(li, i);
+
+          if (newLiveness == lsRLEAccumulator.first) {
+            ++lsRLEAccumulator.second;
+          } else {
+            renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs);
+            lsRLEAccumulator.first = newLiveness;
+            lsRLEAccumulator.second = 1;
+          }
+        }
+        renderCellsWithRLE(indent + s(4), os, lsRLEAccumulator, lsStrs);
+      }
+      os << indent + s(2) << "</tr>\n";
+    }
+
+    os << indent << "</table>\n";
+
+    if (!ro.regClasses().empty())
+      renderPressureTableLegend(indent, os);
+  }
+
+  void RenderMachineFunction::renderFunctionPage(
+                                    raw_ostream &os,
+                                    const char * const renderContextStr) const {
+    os << "<html>\n"
+       << s(2) << "<head>\n"
+       << s(4) << "<title>" << fqn << "</title>\n";
+
+    insertCSS(s(4), os);
+
+    os << s(2) << "<head>\n"
+       << s(2) << "<body >\n";
+
+    renderFunctionSummary(s(4), os, renderContextStr);
+
+    os << s(4) << "<br/><br/><br/>\n";
+
+    //renderLiveIntervalInfoTable("    ", os);
+
+    os << s(4) << "<br/><br/><br/>\n";
+
+    renderCodeTablePlusPI(s(4), os);
+
+    os << s(2) << "</body>\n"
+       << "</html>\n";
+  }
+
+  void RenderMachineFunction::getAnalysisUsage(AnalysisUsage &au) const {
+    au.addRequired<SlotIndexes>();
+    au.addRequired<LiveIntervals>();
+    au.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(au);
+  }
+
+  bool RenderMachineFunction::runOnMachineFunction(MachineFunction &fn) {
+
+    mf = &fn;
+    mri = &mf->getRegInfo();
+    tri = mf->getTarget().getRegisterInfo();
+    lis = &getAnalysis<LiveIntervals>();
+    sis = &getAnalysis<SlotIndexes>();
+
+    trei.setup(mf, mri, tri, lis);
+    ro.setup(mf, tri, lis, this);
+    spillIntervals.clear();
+    spillFor.clear();
+    useDefs.clear();
+
+    fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
+          mf->getFunction()->getName().str();
+
+    return false;
+  }
+
+  void RenderMachineFunction::releaseMemory() {
+    trei.clear();
+    ro.clear();
+    spillIntervals.clear();
+    spillFor.clear();
+    useDefs.clear();
+  }
+
+  void RenderMachineFunction::rememberUseDefs(const LiveInterval *li) {
+
+    if (!ro.shouldRenderCurrentMachineFunction())
+      return; 
+
+    for (MachineRegisterInfo::reg_iterator rItr = mri->reg_begin(li->reg),
+                                           rEnd = mri->reg_end();
+         rItr != rEnd; ++rItr) {
+      const MachineInstr *mi = &*rItr;
+      if (mi->readsRegister(li->reg)) {
+        useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex());
+      }
+      if (mi->definesRegister(li->reg)) {
+        useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex());
+      }
+    }
+  }
+
+  void RenderMachineFunction::rememberSpills(
+                                     const LiveInterval *li,
+                                     const std::vector<LiveInterval*> &spills) {
+
+    if (!ro.shouldRenderCurrentMachineFunction())
+      return; 
+
+    for (std::vector<LiveInterval*>::const_iterator siItr = spills.begin(),
+                                                    siEnd = spills.end();
+         siItr != siEnd; ++siItr) {
+      const LiveInterval *spill = *siItr;
+      spillIntervals[li].insert(spill);
+      spillFor[spill] = li;
+    }
+  }
+
+  bool RenderMachineFunction::isSpill(const LiveInterval *li) const {
+    SpillForMap::const_iterator sfItr = spillFor.find(li);
+    if (sfItr == spillFor.end())
+      return false;
+    return true;
+  }
+
+  void RenderMachineFunction::renderMachineFunction(
+                                                   const char *renderContextStr,
+                                                   const VirtRegMap *vrm,
+                                                   const char *renderSuffix) {
+    if (!ro.shouldRenderCurrentMachineFunction())
+      return; 
+
+    this->vrm = vrm;
+    trei.reset();
+
+    std::string rpFileName(mf->getFunction()->getName().str() +
+                           (renderSuffix ? renderSuffix : "") +
+                           outputFileSuffix);
+
+    std::string errMsg;
+    raw_fd_ostream outFile(rpFileName.c_str(), errMsg, raw_fd_ostream::F_Binary);
+
+    renderFunctionPage(outFile, renderContextStr);
+
+    ro.resetRenderSpecificOptions();
+  }
+
+  std::string RenderMachineFunction::escapeChars(const std::string &s) const {
+    return escapeChars(s.begin(), s.end());
+  }
+
+}
diff --git a/final/lib/CodeGen/RenderMachineFunction.h b/final/lib/CodeGen/RenderMachineFunction.h
new file mode 100644
index 00000000000..85719923c0c
--- /dev/null
+++ b/final/lib/CodeGen/RenderMachineFunction.h
@@ -0,0 +1,338 @@
+//===-- llvm/CodeGen/RenderMachineFunction.h - MF->HTML -*- C++ -*---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_RENDERMACHINEFUNCTION_H
+#define LLVM_CODEGEN_RENDERMACHINEFUNCTION_H
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <algorithm>
+#include <map>
+#include <set>
+#include <string>
+
+namespace llvm {
+
+  class LiveInterval;
+  class LiveIntervals;
+  class MachineInstr;
+  class MachineRegisterInfo;
+  class RenderMachineFunction;
+  class TargetRegisterClass;
+  class TargetRegisterInfo;
+  class VirtRegMap;
+  class raw_ostream;
+
+  /// \brief Helper class to process rendering options. Tries to be as lazy as
+  ///        possible.
+  class MFRenderingOptions {
+  public:
+
+    struct RegClassComp {
+      bool operator()(const TargetRegisterClass *trc1,
+                      const TargetRegisterClass *trc2) const {
+        std::string trc1Name(trc1->getName()), trc2Name(trc2->getName());
+        return std::lexicographical_compare(trc1Name.begin(), trc1Name.end(),
+                                            trc2Name.begin(), trc2Name.end());
+      }
+    };
+
+    typedef std::set<const TargetRegisterClass*, RegClassComp> RegClassSet;
+
+    struct IntervalComp {
+      bool operator()(const LiveInterval *li1, const LiveInterval *li2) const {
+        return li1->reg < li2->reg;
+      }
+    };
+
+    typedef std::set<const LiveInterval*, IntervalComp> IntervalSet;
+
+    /// Initialise the rendering options.
+    void setup(MachineFunction *mf, const TargetRegisterInfo *tri,
+               LiveIntervals *lis, const RenderMachineFunction *rmf);
+
+    /// Clear translations of options to the current function.
+    void clear();
+
+    /// Reset any options computed for this specific rendering.
+    void resetRenderSpecificOptions();
+
+    /// Should we render the current function.
+    bool shouldRenderCurrentMachineFunction() const;
+
+    /// Return the set of register classes to render pressure for.
+    const RegClassSet& regClasses() const;
+
+    /// Return the set of live intervals to render liveness for.
+    const IntervalSet& intervals() const;
+
+    /// Render indexes which are not associated with instructions / MBB starts.
+    bool renderEmptyIndexes() const;
+
+    /// Return whether or not to render using SVG for fancy vertical text.
+    bool fancyVerticals() const;
+
+  private:
+
+    static bool renderingOptionsProcessed;
+    static std::set<std::string> mfNamesToRender;
+    static bool renderAllMFs;
+
+    static std::set<std::string> classNamesToRender;
+    static bool renderAllClasses;
+
+
+    static std::set<std::pair<unsigned, unsigned> > intervalNumsToRender;
+    typedef enum { ExplicitOnly     = 0,
+                   AllPhys          = 1,
+                   VirtNoSpills     = 2,
+                   VirtSpills       = 4,
+                   AllVirt          = 6,
+                   All              = 7 }
+      IntervalTypesToRender;
+    static unsigned intervalTypesToRender;
+
+    template <typename OutputItr>
+    static void splitComaSeperatedList(const std::string &s, OutputItr outItr);
+
+    static void processOptions();
+
+    static void processFuncNames();
+    static void processRegClassNames();
+    static void processIntervalNumbers();
+
+    static void processIntervalRange(const std::string &intervalRangeStr);
+
+    MachineFunction *mf;
+    const TargetRegisterInfo *tri;
+    LiveIntervals *lis;
+    const RenderMachineFunction *rmf;
+
+    mutable bool regClassesTranslatedToCurrentFunction;
+    mutable RegClassSet regClassSet;
+
+    mutable bool intervalsTranslatedToCurrentFunction;
+    mutable IntervalSet intervalSet;
+
+    void translateRegClassNamesToCurrentFunction() const;
+
+    void translateIntervalNumbersToCurrentFunction() const;
+  };
+
+  /// \brief Provide extra information about the physical and virtual registers
+  ///        in the function being compiled.
+  class TargetRegisterExtraInfo {
+  public:
+    TargetRegisterExtraInfo();
+
+    /// \brief Set up TargetRegisterExtraInfo with pointers to necessary
+    ///        sources of information.
+    void setup(MachineFunction *mf, MachineRegisterInfo *mri,
+               const TargetRegisterInfo *tri, LiveIntervals *lis);
+
+    /// \brief Recompute tables for changed function.
+    void reset(); 
+
+    /// \brief Free all tables in TargetRegisterExtraInfo.
+    void clear();
+
+    /// \brief Maximum number of registers from trc which alias reg.
+    unsigned getWorst(unsigned reg, const TargetRegisterClass *trc) const;
+
+    /// \brief Returns the number of allocable registers in trc.
+    unsigned getCapacity(const TargetRegisterClass *trc) const;
+
+    /// \brief Return the number of registers of class trc that may be
+    ///        needed at slot i.
+    unsigned getPressureAtSlot(const TargetRegisterClass *trc,
+                               SlotIndex i) const;
+
+    /// \brief Return true if the number of registers of type trc that may be
+    ///        needed at slot i is greater than the capacity of trc.
+    bool classOverCapacityAtSlot(const TargetRegisterClass *trc,
+                                 SlotIndex i) const;
+
+  private:
+
+    MachineFunction *mf;
+    MachineRegisterInfo *mri;
+    const TargetRegisterInfo *tri;
+    LiveIntervals *lis;
+
+    typedef std::map<const TargetRegisterClass*, unsigned> WorstMapLine;
+    typedef std::map<const TargetRegisterClass*, WorstMapLine> VRWorstMap;
+    VRWorstMap vrWorst;
+
+    typedef std::map<unsigned, WorstMapLine> PRWorstMap;
+    PRWorstMap prWorst;
+
+    typedef std::map<const TargetRegisterClass*, unsigned> CapacityMap;
+    CapacityMap capacityMap;
+
+    typedef std::map<const TargetRegisterClass*, unsigned> PressureMapLine;
+    typedef std::map<SlotIndex, PressureMapLine> PressureMap;
+    PressureMap pressureMap;
+
+    bool mapsPopulated;
+
+    /// \brief Initialise the 'worst' table.
+    void initWorst();
+ 
+    /// \brief Initialise the 'capacity' table.
+    void initCapacity();
+
+    /// \brief Initialise/Reset the 'pressure' and live states tables.
+    void resetPressureAndLiveStates();
+  };
+
+  /// \brief Render MachineFunction objects and related information to a HTML
+  ///        page.
+  class RenderMachineFunction : public MachineFunctionPass {
+  public:
+    static char ID;
+
+    RenderMachineFunction() : MachineFunctionPass(ID) {
+      initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+    virtual bool runOnMachineFunction(MachineFunction &fn);
+
+    virtual void releaseMemory();
+
+    void rememberUseDefs(const LiveInterval *li);
+
+    void rememberSpills(const LiveInterval *li,
+                        const std::vector<LiveInterval*> &spills);
+
+    bool isSpill(const LiveInterval *li) const;
+
+    /// \brief Render this machine function to HTML.
+    /// 
+    /// @param renderContextStr This parameter will be included in the top of
+    ///                         the html file to explain where (in the
+    ///                         codegen pipeline) this function was rendered
+    ///                         from. Set it to something like
+    ///                         "Pre-register-allocation".
+    /// @param vrm              If non-null the VRM will be queried to determine
+    ///                         whether a virtual register was allocated to a
+    ///                         physical register or spilled.
+    /// @param renderFilePrefix This string will be appended to the function
+    ///                         name (before the output file suffix) to enable
+    ///                         multiple renderings from the same function.
+    void renderMachineFunction(const char *renderContextStr,
+                               const VirtRegMap *vrm = 0,
+                               const char *renderSuffix = 0);
+
+  private:
+    class Spacer;
+    friend raw_ostream& operator<<(raw_ostream &os, const Spacer &s);
+
+    std::string fqn;
+
+    MachineFunction *mf;
+    MachineRegisterInfo *mri;
+    const TargetRegisterInfo *tri;
+    LiveIntervals *lis;
+    SlotIndexes *sis;
+    const VirtRegMap *vrm;
+
+    TargetRegisterExtraInfo trei;
+    MFRenderingOptions ro;
+
+    
+
+    // Utilities.
+    typedef enum { Dead, Defined, Used, AliveReg, AliveStack } LiveState;
+    LiveState getLiveStateAt(const LiveInterval *li, SlotIndex i) const;
+
+    typedef enum { Zero, Low, High } PressureState;
+    PressureState getPressureStateAt(const TargetRegisterClass *trc,
+                                     SlotIndex i) const;
+
+    typedef std::map<const LiveInterval*, std::set<const LiveInterval*> >
+      SpillIntervals;
+    SpillIntervals spillIntervals;
+
+    typedef std::map<const LiveInterval*, const LiveInterval*> SpillForMap;
+    SpillForMap spillFor;
+
+    typedef std::set<SlotIndex> SlotSet;
+    typedef std::map<const LiveInterval*, SlotSet> UseDefs;
+    UseDefs useDefs;
+
+    // ---------- Rendering methods ----------
+
+    /// For inserting spaces when pretty printing.
+    class Spacer {
+    public:
+      explicit Spacer(unsigned numSpaces) : ns(numSpaces) {}
+      Spacer operator+(const Spacer &o) const { return Spacer(ns + o.ns); }
+      void print(raw_ostream &os) const;
+    private:
+      unsigned ns;
+    };
+
+    Spacer s(unsigned ns) const;
+
+    template <typename Iterator>
+    std::string escapeChars(Iterator sBegin, Iterator sEnd) const;
+
+    /// \brief Render a machine instruction.
+    void renderMachineInstr(raw_ostream &os,
+                            const MachineInstr *mi) const;
+
+    /// \brief Render vertical text.
+    template <typename T>
+    void renderVertical(const Spacer &indent,
+                        raw_ostream &os,
+                        const T &t) const;
+
+    /// \brief Insert CSS layout info.
+    void insertCSS(const Spacer &indent,
+                   raw_ostream &os) const;
+
+    /// \brief Render a brief summary of the function (including rendering
+    ///        context).
+    void renderFunctionSummary(const Spacer &indent,
+                               raw_ostream &os,
+                               const char * const renderContextStr) const;
+
+    /// \brief Render a legend for the pressure table.
+    void renderPressureTableLegend(const Spacer &indent,
+                                   raw_ostream &os) const;
+
+    /// \brief Render a consecutive set of HTML cells of the same class using
+    /// the colspan attribute for run-length encoding.
+    template <typename CellType>
+    void renderCellsWithRLE(
+                     const Spacer &indent, raw_ostream &os,
+                     const std::pair<CellType, unsigned> &rleAccumulator,
+                     const std::map<CellType, std::string> &cellTypeStrs) const;
+
+    /// \brief Render code listing, potentially with register pressure
+    ///        and live intervals shown alongside.
+    void renderCodeTablePlusPI(const Spacer &indent,
+                               raw_ostream &os) const;
+
+    /// \brief Render the HTML page representing the MachineFunction.
+    void renderFunctionPage(raw_ostream &os,
+                            const char * const renderContextStr) const;
+
+    std::string escapeChars(const std::string &s) const;
+  };
+}
+
+#endif /* LLVM_CODEGEN_RENDERMACHINEFUNCTION_H */
diff --git a/final/lib/CodeGen/ScheduleDAG.cpp b/final/lib/CodeGen/ScheduleDAG.cpp
new file mode 100644
index 00000000000..3388889c9e9
--- /dev/null
+++ b/final/lib/CodeGen/ScheduleDAG.cpp
@@ -0,0 +1,595 @@
+//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <climits>
+using namespace llvm;
+
+ScheduleDAG::ScheduleDAG(MachineFunction &mf)
+  : TM(mf.getTarget()),
+    TII(TM.getInstrInfo()),
+    TRI(TM.getRegisterInfo()),
+    MF(mf), MRI(mf.getRegInfo()),
+    EntrySU(), ExitSU() {
+}
+
+ScheduleDAG::~ScheduleDAG() {}
+
+/// getInstrDesc helper to handle SDNodes.
+const TargetInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+  if (!Node || !Node->isMachineOpcode()) return NULL;
+  return &TII->get(Node->getMachineOpcode());
+}
+
+/// dump - dump the schedule.
+void ScheduleDAG::dumpSchedule() const {
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    if (SUnit *SU = Sequence[i])
+      SU->dump(this);
+    else
+      dbgs() << "**** NOOP ****\n";
+  }
+}
+
+
+/// Run - perform scheduling.
+///
+void ScheduleDAG::Run(MachineBasicBlock *bb,
+                      MachineBasicBlock::iterator insertPos) {
+  BB = bb;
+  InsertPos = insertPos;
+
+  SUnits.clear();
+  Sequence.clear();
+  EntrySU = SUnit();
+  ExitSU = SUnit();
+
+  Schedule();
+
+  DEBUG({
+      dbgs() << "*** Final schedule ***\n";
+      dumpSchedule();
+      dbgs() << '\n';
+    });
+}
+
+/// addPred - This adds the specified edge as a pred of the current node if
+/// not already.  It also adds the current node as a successor of the
+/// specified node.
+bool SUnit::addPred(const SDep &D) {
+  // If this node already has this depenence, don't add a redundant one.
+  for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end();
+       I != E; ++I)
+    if (*I == D)
+      return false;
+  // Now add a corresponding succ to N.
+  SDep P = D;
+  P.setSUnit(this);
+  SUnit *N = D.getSUnit();
+  // Update the bookkeeping.
+  if (D.getKind() == SDep::Data) {
+    assert(NumPreds < UINT_MAX && "NumPreds will overflow!");
+    assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!");
+    ++NumPreds;
+    ++N->NumSuccs;
+  }
+  if (!N->isScheduled) {
+    assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!");
+    ++NumPredsLeft;
+  }
+  if (!isScheduled) {
+    assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+    ++N->NumSuccsLeft;
+  }
+  Preds.push_back(D);
+  N->Succs.push_back(P);
+  if (P.getLatency() != 0) {
+    this->setDepthDirty();
+    N->setHeightDirty();
+  }
+  return true;
+}
+
+/// removePred - This removes the specified edge as a pred of the current
+/// node if it exists.  It also removes the current node as a successor of
+/// the specified node.
+void SUnit::removePred(const SDep &D) {
+  // Find the matching predecessor.
+  for (SmallVector<SDep, 4>::iterator I = Preds.begin(), E = Preds.end();
+       I != E; ++I)
+    if (*I == D) {
+      bool FoundSucc = false;
+      // Find the corresponding successor in N.
+      SDep P = D;
+      P.setSUnit(this);
+      SUnit *N = D.getSUnit();
+      for (SmallVector<SDep, 4>::iterator II = N->Succs.begin(),
+             EE = N->Succs.end(); II != EE; ++II)
+        if (*II == P) {
+          FoundSucc = true;
+          N->Succs.erase(II);
+          break;
+        }
+      assert(FoundSucc && "Mismatching preds / succs lists!");
+      Preds.erase(I);
+      // Update the bookkeeping.
+      if (P.getKind() == SDep::Data) {
+        assert(NumPreds > 0 && "NumPreds will underflow!");
+        assert(N->NumSuccs > 0 && "NumSuccs will underflow!");
+        --NumPreds;
+        --N->NumSuccs;
+      }
+      if (!N->isScheduled) {
+        assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
+        --NumPredsLeft;
+      }
+      if (!isScheduled) {
+        assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
+        --N->NumSuccsLeft;
+      }
+      if (P.getLatency() != 0) {
+        this->setDepthDirty();
+        N->setHeightDirty();
+      }
+      return;
+    }
+}
+
+void SUnit::setDepthDirty() {
+  if (!isDepthCurrent) return;
+  SmallVector<SUnit*, 8> WorkList;
+  WorkList.push_back(this);
+  do {
+    SUnit *SU = WorkList.pop_back_val();
+    SU->isDepthCurrent = false;
+    for (SUnit::const_succ_iterator I = SU->Succs.begin(),
+         E = SU->Succs.end(); I != E; ++I) {
+      SUnit *SuccSU = I->getSUnit();
+      if (SuccSU->isDepthCurrent)
+        WorkList.push_back(SuccSU);
+    }
+  } while (!WorkList.empty());
+}
+
+void SUnit::setHeightDirty() {
+  if (!isHeightCurrent) return;
+  SmallVector<SUnit*, 8> WorkList;
+  WorkList.push_back(this);
+  do {
+    SUnit *SU = WorkList.pop_back_val();
+    SU->isHeightCurrent = false;
+    for (SUnit::const_pred_iterator I = SU->Preds.begin(),
+         E = SU->Preds.end(); I != E; ++I) {
+      SUnit *PredSU = I->getSUnit();
+      if (PredSU->isHeightCurrent)
+        WorkList.push_back(PredSU);
+    }
+  } while (!WorkList.empty());
+}
+
+/// setDepthToAtLeast - Update this node's successors to reflect the
+/// fact that this node's depth just increased.
+///
+void SUnit::setDepthToAtLeast(unsigned NewDepth) {
+  if (NewDepth <= getDepth())
+    return;
+  setDepthDirty();
+  Depth = NewDepth;
+  isDepthCurrent = true;
+}
+
+/// setHeightToAtLeast - Update this node's predecessors to reflect the
+/// fact that this node's height just increased.
+///
+void SUnit::setHeightToAtLeast(unsigned NewHeight) {
+  if (NewHeight <= getHeight())
+    return;
+  setHeightDirty();
+  Height = NewHeight;
+  isHeightCurrent = true;
+}
+
+/// ComputeDepth - Calculate the maximal path from the node to the exit.
+///
+void SUnit::ComputeDepth() {
+  SmallVector<SUnit*, 8> WorkList;
+  WorkList.push_back(this);
+  do {
+    SUnit *Cur = WorkList.back();
+
+    bool Done = true;
+    unsigned MaxPredDepth = 0;
+    for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
+         E = Cur->Preds.end(); I != E; ++I) {
+      SUnit *PredSU = I->getSUnit();
+      if (PredSU->isDepthCurrent)
+        MaxPredDepth = std::max(MaxPredDepth,
+                                PredSU->Depth + I->getLatency());
+      else {
+        Done = false;
+        WorkList.push_back(PredSU);
+      }
+    }
+
+    if (Done) {
+      WorkList.pop_back();
+      if (MaxPredDepth != Cur->Depth) {
+        Cur->setDepthDirty();
+        Cur->Depth = MaxPredDepth;
+      }
+      Cur->isDepthCurrent = true;
+    }
+  } while (!WorkList.empty());
+}
+
+/// ComputeHeight - Calculate the maximal path from the node to the entry.
+///
+void SUnit::ComputeHeight() {
+  SmallVector<SUnit*, 8> WorkList;
+  WorkList.push_back(this);
+  do {
+    SUnit *Cur = WorkList.back();
+
+    bool Done = true;
+    unsigned MaxSuccHeight = 0;
+    for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
+         E = Cur->Succs.end(); I != E; ++I) {
+      SUnit *SuccSU = I->getSUnit();
+      if (SuccSU->isHeightCurrent)
+        MaxSuccHeight = std::max(MaxSuccHeight,
+                                 SuccSU->Height + I->getLatency());
+      else {
+        Done = false;
+        WorkList.push_back(SuccSU);
+      }
+    }
+
+    if (Done) {
+      WorkList.pop_back();
+      if (MaxSuccHeight != Cur->Height) {
+        Cur->setHeightDirty();
+        Cur->Height = MaxSuccHeight;
+      }
+      Cur->isHeightCurrent = true;
+    }
+  } while (!WorkList.empty());
+}
+
+/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
+/// a group of nodes flagged together.
+void SUnit::dump(const ScheduleDAG *G) const {
+  dbgs() << "SU(" << NodeNum << "): ";
+  G->dumpNode(this);
+}
+
+void SUnit::dumpAll(const ScheduleDAG *G) const {
+  dump(G);
+
+  dbgs() << "  # preds left       : " << NumPredsLeft << "\n";
+  dbgs() << "  # succs left       : " << NumSuccsLeft << "\n";
+  dbgs() << "  # rdefs left       : " << NumRegDefsLeft << "\n";
+  dbgs() << "  Latency            : " << Latency << "\n";
+  dbgs() << "  Depth              : " << Depth << "\n";
+  dbgs() << "  Height             : " << Height << "\n";
+
+  if (Preds.size() != 0) {
+    dbgs() << "  Predecessors:\n";
+    for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end();
+         I != E; ++I) {
+      dbgs() << "   ";
+      switch (I->getKind()) {
+      case SDep::Data:        dbgs() << "val "; break;
+      case SDep::Anti:        dbgs() << "anti"; break;
+      case SDep::Output:      dbgs() << "out "; break;
+      case SDep::Order:       dbgs() << "ch  "; break;
+      }
+      dbgs() << "#";
+      dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+      if (I->isArtificial())
+        dbgs() << " *";
+      dbgs() << ": Latency=" << I->getLatency();
+      dbgs() << "\n";
+    }
+  }
+  if (Succs.size() != 0) {
+    dbgs() << "  Successors:\n";
+    for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end();
+         I != E; ++I) {
+      dbgs() << "   ";
+      switch (I->getKind()) {
+      case SDep::Data:        dbgs() << "val "; break;
+      case SDep::Anti:        dbgs() << "anti"; break;
+      case SDep::Output:      dbgs() << "out "; break;
+      case SDep::Order:       dbgs() << "ch  "; break;
+      }
+      dbgs() << "#";
+      dbgs() << I->getSUnit() << " - SU(" << I->getSUnit()->NodeNum << ")";
+      if (I->isArtificial())
+        dbgs() << " *";
+      dbgs() << ": Latency=" << I->getLatency();
+      dbgs() << "\n";
+    }
+  }
+  dbgs() << "\n";
+}
+
+#ifndef NDEBUG
+/// VerifySchedule - Verify that all SUnits were scheduled and that
+/// their state is consistent.
+///
+void ScheduleDAG::VerifySchedule(bool isBottomUp) {
+  bool AnyNotSched = false;
+  unsigned DeadNodes = 0;
+  unsigned Noops = 0;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    if (!SUnits[i].isScheduled) {
+      if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) {
+        ++DeadNodes;
+        continue;
+      }
+      if (!AnyNotSched)
+        dbgs() << "*** Scheduling failed! ***\n";
+      SUnits[i].dump(this);
+      dbgs() << "has not been scheduled!\n";
+      AnyNotSched = true;
+    }
+    if (SUnits[i].isScheduled &&
+        (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) >
+          unsigned(INT_MAX)) {
+      if (!AnyNotSched)
+        dbgs() << "*** Scheduling failed! ***\n";
+      SUnits[i].dump(this);
+      dbgs() << "has an unexpected "
+           << (isBottomUp ? "Height" : "Depth") << " value!\n";
+      AnyNotSched = true;
+    }
+    if (isBottomUp) {
+      if (SUnits[i].NumSuccsLeft != 0) {
+        if (!AnyNotSched)
+          dbgs() << "*** Scheduling failed! ***\n";
+        SUnits[i].dump(this);
+        dbgs() << "has successors left!\n";
+        AnyNotSched = true;
+      }
+    } else {
+      if (SUnits[i].NumPredsLeft != 0) {
+        if (!AnyNotSched)
+          dbgs() << "*** Scheduling failed! ***\n";
+        SUnits[i].dump(this);
+        dbgs() << "has predecessors left!\n";
+        AnyNotSched = true;
+      }
+    }
+  }
+  for (unsigned i = 0, e = Sequence.size(); i != e; ++i)
+    if (!Sequence[i])
+      ++Noops;
+  assert(!AnyNotSched);
+  assert(Sequence.size() + DeadNodes - Noops == SUnits.size() &&
+         "The number of nodes scheduled doesn't match the expected number!");
+}
+#endif
+
+/// InitDAGTopologicalSorting - create the initial topological
+/// ordering from the DAG to be scheduled.
+///
+/// The idea of the algorithm is taken from
+/// "Online algorithms for managing the topological order of
+/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
+/// This is the MNR algorithm, which was first introduced by
+/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
+/// "Maintaining a topological order under edge insertions".
+///
+/// Short description of the algorithm:
+///
+/// Topological ordering, ord, of a DAG maps each node to a topological
+/// index so that for all edges X->Y it is the case that ord(X) < ord(Y).
+///
+/// This means that if there is a path from the node X to the node Z,
+/// then ord(X) < ord(Z).
+///
+/// This property can be used to check for reachability of nodes:
+/// if Z is reachable from X, then an insertion of the edge Z->X would
+/// create a cycle.
+///
+/// The algorithm first computes a topological ordering for the DAG by
+/// initializing the Index2Node and Node2Index arrays and then tries to keep
+/// the ordering up-to-date after edge insertions by reordering the DAG.
+///
+/// On insertion of the edge X->Y, the algorithm first marks by calling DFS
+/// the nodes reachable from Y, and then shifts them using Shift to lie
+/// immediately after X in Index2Node.
+void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
+  unsigned DAGSize = SUnits.size();
+  std::vector<SUnit*> WorkList;
+  WorkList.reserve(DAGSize);
+
+  Index2Node.resize(DAGSize);
+  Node2Index.resize(DAGSize);
+
+  // Initialize the data structures.
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    int NodeNum = SU->NodeNum;
+    unsigned Degree = SU->Succs.size();
+    // Temporarily use the Node2Index array as scratch space for degree counts.
+    Node2Index[NodeNum] = Degree;
+
+    // Is it a node without dependencies?
+    if (Degree == 0) {
+      assert(SU->Succs.empty() && "SUnit should have no successors");
+      // Collect leaf nodes.
+      WorkList.push_back(SU);
+    }
+  }
+
+  int Id = DAGSize;
+  while (!WorkList.empty()) {
+    SUnit *SU = WorkList.back();
+    WorkList.pop_back();
+    Allocate(SU->NodeNum, --Id);
+    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I) {
+      SUnit *SU = I->getSUnit();
+      if (!--Node2Index[SU->NodeNum])
+        // If all dependencies of the node are processed already,
+        // then the node can be computed now.
+        WorkList.push_back(SU);
+    }
+  }
+
+  Visited.resize(DAGSize);
+
+#ifndef NDEBUG
+  // Check correctness of the ordering
+  for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I) {
+      assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
+      "Wrong topological sorting");
+    }
+  }
+#endif
+}
+
+/// AddPred - Updates the topological ordering to accomodate an edge
+/// to be added from SUnit X to SUnit Y.
+void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
+  int UpperBound, LowerBound;
+  LowerBound = Node2Index[Y->NodeNum];
+  UpperBound = Node2Index[X->NodeNum];
+  bool HasLoop = false;
+  // Is Ord(X) < Ord(Y) ?
+  if (LowerBound < UpperBound) {
+    // Update the topological order.
+    Visited.reset();
+    DFS(Y, UpperBound, HasLoop);
+    assert(!HasLoop && "Inserted edge creates a loop!");
+    // Recompute topological indexes.
+    Shift(Visited, LowerBound, UpperBound);
+  }
+}
+
+/// RemovePred - Updates the topological ordering to accomodate an
+/// an edge to be removed from the specified node N from the predecessors
+/// of the current node M.
+void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
+  // InitDAGTopologicalSorting();
+}
+
+/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark
+/// all nodes affected by the edge insertion. These nodes will later get new
+/// topological indexes by means of the Shift method.
+void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
+                                     bool &HasLoop) {
+  std::vector<const SUnit*> WorkList;
+  WorkList.reserve(SUnits.size());
+
+  WorkList.push_back(SU);
+  do {
+    SU = WorkList.back();
+    WorkList.pop_back();
+    Visited.set(SU->NodeNum);
+    for (int I = SU->Succs.size()-1; I >= 0; --I) {
+      int s = SU->Succs[I].getSUnit()->NodeNum;
+      if (Node2Index[s] == UpperBound) {
+        HasLoop = true;
+        return;
+      }
+      // Visit successors if not already and in affected region.
+      if (!Visited.test(s) && Node2Index[s] < UpperBound) {
+        WorkList.push_back(SU->Succs[I].getSUnit());
+      }
+    }
+  } while (!WorkList.empty());
+}
+
+/// Shift - Renumber the nodes so that the topological ordering is
+/// preserved.
+void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
+                                       int UpperBound) {
+  std::vector<int> L;
+  int shift = 0;
+  int i;
+
+  for (i = LowerBound; i <= UpperBound; ++i) {
+    // w is node at topological index i.
+    int w = Index2Node[i];
+    if (Visited.test(w)) {
+      // Unmark.
+      Visited.reset(w);
+      L.push_back(w);
+      shift = shift + 1;
+    } else {
+      Allocate(w, i - shift);
+    }
+  }
+
+  for (unsigned j = 0; j < L.size(); ++j) {
+    Allocate(L[j], i - shift);
+    i = i + 1;
+  }
+}
+
+
+/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+/// create a cycle.
+bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+  if (IsReachable(TargetSU, SU))
+    return true;
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I)
+    if (I->isAssignedRegDep() &&
+        IsReachable(TargetSU, I->getSUnit()))
+      return true;
+  return false;
+}
+
+/// IsReachable - Checks if SU is reachable from TargetSU.
+bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
+                                             const SUnit *TargetSU) {
+  // If insertion of the edge SU->TargetSU would create a cycle
+  // then there is a path from TargetSU to SU.
+  int UpperBound, LowerBound;
+  LowerBound = Node2Index[TargetSU->NodeNum];
+  UpperBound = Node2Index[SU->NodeNum];
+  bool HasLoop = false;
+  // Is Ord(TargetSU) < Ord(SU) ?
+  if (LowerBound < UpperBound) {
+    Visited.reset();
+    // There may be a path from TargetSU to SU. Check for it.
+    DFS(TargetSU, UpperBound, HasLoop);
+  }
+  return HasLoop;
+}
+
+/// Allocate - assign the topological index to the node n.
+void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
+  Node2Index[n] = index;
+  Index2Node[index] = n;
+}
+
+ScheduleDAGTopologicalSort::
+ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits) : SUnits(sunits) {}
+
+ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
diff --git a/final/lib/CodeGen/ScheduleDAGEmit.cpp b/final/lib/CodeGen/ScheduleDAGEmit.cpp
new file mode 100644
index 00000000000..6b7a8c6491b
--- /dev/null
+++ b/final/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -0,0 +1,67 @@
+//===---- ScheduleDAGEmit.cpp - Emit routines for the ScheduleDAG class ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the ScheduleDAG class, which creates
+// MachineInstrs according to the computed schedule.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+void ScheduleDAG::EmitNoop() {
+  TII->insertNoop(*BB, InsertPos);
+}
+
+void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
+                                  DenseMap<SUnit*, unsigned> &VRBaseMap) {
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    if (I->getSUnit()->CopyDstRC) {
+      // Copy to physical register.
+      DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit());
+      assert(VRI != VRBaseMap.end() && "Node emitted out of order - late");
+      // Find the destination physical register.
+      unsigned Reg = 0;
+      for (SUnit::const_succ_iterator II = SU->Succs.begin(),
+             EE = SU->Succs.end(); II != EE; ++II) {
+        if (II->getReg()) {
+          Reg = II->getReg();
+          break;
+        }
+      }
+      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+        .addReg(VRI->second);
+    } else {
+      // Copy from physical register.
+      assert(I->getReg() && "Unknown physical register!");
+      unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+      bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
+      (void)isNew; // Silence compiler warning.
+      assert(isNew && "Node emitted out of order - early");
+      BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+        .addReg(I->getReg());
+    }
+    break;
+  }
+}
diff --git a/final/lib/CodeGen/ScheduleDAGInstrs.cpp b/final/lib/CodeGen/ScheduleDAGInstrs.cpp
new file mode 100644
index 00000000000..f17023eabb7
--- /dev/null
+++ b/final/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -0,0 +1,693 @@
+//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAGInstrs class, which implements re-scheduling
+// of MachineInstrs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sched-instrs"
+#include "ScheduleDAGInstrs.h"
+#include "llvm/Operator.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+using namespace llvm;
+
+ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
+                                     const MachineLoopInfo &mli,
+                                     const MachineDominatorTree &mdt)
+  : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
+    InstrItins(mf.getTarget().getInstrItineraryData()),
+    Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) {
+  DbgValueVec.clear();
+}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
+                            MachineBasicBlock::iterator begin,
+                            MachineBasicBlock::iterator end,
+                            unsigned endcount) {
+  BB = bb;
+  Begin = begin;
+  InsertPosIndex = endcount;
+
+  ScheduleDAG::Run(bb, end);
+}
+
+/// getUnderlyingObjectFromInt - This is the function that does the work of
+/// looking through basic ptrtoint+arithmetic+inttoptr sequences.
+static const Value *getUnderlyingObjectFromInt(const Value *V) {
+  do {
+    if (const Operator *U = dyn_cast<Operator>(V)) {
+      // If we find a ptrtoint, we can transfer control back to the
+      // regular getUnderlyingObjectFromInt.
+      if (U->getOpcode() == Instruction::PtrToInt)
+        return U->getOperand(0);
+      // If we find an add of a constant or a multiplied value, it's
+      // likely that the other operand will lead us to the base
+      // object. We don't have to worry about the case where the
+      // object address is somehow being computed by the multiply,
+      // because our callers only care when the result is an
+      // identifibale object.
+      if (U->getOpcode() != Instruction::Add ||
+          (!isa<ConstantInt>(U->getOperand(1)) &&
+           Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
+        return V;
+      V = U->getOperand(0);
+    } else {
+      return V;
+    }
+    assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
+  } while (1);
+}
+
+/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
+/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
+static const Value *getUnderlyingObject(const Value *V) {
+  // First just call Value::getUnderlyingObject to let it do what it does.
+  do {
+    V = GetUnderlyingObject(V);
+    // If it found an inttoptr, use special code to continue climing.
+    if (Operator::getOpcode(V) != Instruction::IntToPtr)
+      break;
+    const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
+    // If that succeeded in finding a pointer, continue the search.
+    if (!O->getType()->isPointerTy())
+      break;
+    V = O;
+  } while (1);
+  return V;
+}
+
+/// getUnderlyingObjectForInstr - If this machine instr has memory reference
+/// information and it can be tracked to a normal reference to a known
+/// object, return the Value for that object. Otherwise return null.
+static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
+                                                const MachineFrameInfo *MFI,
+                                                bool &MayAlias) {
+  MayAlias = true;
+  if (!MI->hasOneMemOperand() ||
+      !(*MI->memoperands_begin())->getValue() ||
+      (*MI->memoperands_begin())->isVolatile())
+    return 0;
+
+  const Value *V = (*MI->memoperands_begin())->getValue();
+  if (!V)
+    return 0;
+
+  V = getUnderlyingObject(V);
+  if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
+    // For now, ignore PseudoSourceValues which may alias LLVM IR values
+    // because the code that uses this function has no way to cope with
+    // such aliases.
+    if (PSV->isAliased(MFI))
+      return 0;
+    
+    MayAlias = PSV->mayAlias(MFI);
+    return V;
+  }
+
+  if (isIdentifiedObject(V))
+    return V;
+
+  return 0;
+}
+
+void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
+  if (MachineLoop *ML = MLI.getLoopFor(BB))
+    if (BB == ML->getLoopLatch()) {
+      MachineBasicBlock *Header = ML->getHeader();
+      for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
+           E = Header->livein_end(); I != E; ++I)
+        LoopLiveInRegs.insert(*I);
+      LoopRegs.VisitLoop(ML);
+    }
+}
+
+/// AddSchedBarrierDeps - Add dependencies from instructions in the current
+/// list of instructions being scheduled to scheduling barrier by adding
+/// the exit SU to the register defs and use list. This is because we want to
+/// make sure instructions which define registers that are either used by
+/// the terminator or are live-out are properly scheduled. This is
+/// especially important when the definition latency of the return value(s)
+/// are too high to be hidden by the branch or when the liveout registers
+/// used by instructions in the fallthrough block.
+void ScheduleDAGInstrs::AddSchedBarrierDeps() {
+  MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
+  ExitSU.setInstr(ExitMI);
+  bool AllDepKnown = ExitMI &&
+    (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier());
+  if (ExitMI && AllDepKnown) {
+    // If it's a call or a barrier, add dependencies on the defs and uses of
+    // instruction.
+    for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = ExitMI->getOperand(i);
+      if (!MO.isReg() || MO.isDef()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+
+      assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+      Uses[Reg].push_back(&ExitSU);
+    }
+  } else {
+    // For others, e.g. fallthrough, conditional branch, assume the exit
+    // uses all the registers that are livein to the successor blocks.
+    SmallSet<unsigned, 8> Seen;
+    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+           SE = BB->succ_end(); SI != SE; ++SI)
+      for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+             E = (*SI)->livein_end(); I != E; ++I) {    
+        unsigned Reg = *I;
+        if (Seen.insert(Reg))
+          Uses[Reg].push_back(&ExitSU);
+      }
+  }
+}
+
+void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
+  // We'll be allocating one SUnit for each instruction, plus one for
+  // the region exit node.
+  SUnits.reserve(BB->size());
+
+  // We build scheduling units by walking a block's instruction list from bottom
+  // to top.
+
+  // Remember where a generic side-effecting instruction is as we procede.
+  SUnit *BarrierChain = 0, *AliasChain = 0;
+
+  // Memory references to specific known memory locations are tracked
+  // so that they can be given more precise dependencies. We track
+  // separately the known memory locations that may alias and those
+  // that are known not to alias
+  std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
+  std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
+
+  // Keep track of dangling debug references to registers.
+  std::vector<std::pair<MachineInstr*, unsigned> >
+    DanglingDebugValue(TRI->getNumRegs(),
+    std::make_pair(static_cast<MachineInstr*>(0), 0));
+
+  // Check to see if the scheduler cares about latencies.
+  bool UnitLatencies = ForceUnitLatencies();
+
+  // Ask the target if address-backscheduling is desirable, and if so how much.
+  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
+  unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
+
+  // Remove any stale debug info; sometimes BuildSchedGraph is called again
+  // without emitting the info from the previous call.
+  DbgValueVec.clear();
+
+  // Model data dependencies between instructions being scheduled and the
+  // ExitSU.
+  AddSchedBarrierDeps();
+
+  // Walk the list of instructions, from bottom moving up.
+  for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
+       MII != MIE; --MII) {
+    MachineInstr *MI = prior(MII);
+    // DBG_VALUE does not have SUnit's built, so just remember these for later
+    // reinsertion.
+    if (MI->isDebugValue()) {
+      if (MI->getNumOperands()==3 && MI->getOperand(0).isReg() &&
+          MI->getOperand(0).getReg())
+        DanglingDebugValue[MI->getOperand(0).getReg()] =
+             std::make_pair(MI, DbgValueVec.size());
+      DbgValueVec.push_back(MI);
+      continue;
+    }
+    const TargetInstrDesc &TID = MI->getDesc();
+    assert(!TID.isTerminator() && !MI->isLabel() &&
+           "Cannot schedule terminators or labels!");
+    // Create the SUnit for this MI.
+    SUnit *SU = NewSUnit(MI);
+    SU->isCall = TID.isCall();
+    SU->isCommutable = TID.isCommutable();
+
+    // Assign the Latency field of SU using target-provided information.
+    if (UnitLatencies)
+      SU->Latency = 1;
+    else
+      ComputeLatency(SU);
+
+    // Add register-based dependencies (data, anti, and output).
+    for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
+      const MachineOperand &MO = MI->getOperand(j);
+      if (!MO.isReg()) continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0) continue;
+
+      assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+
+      if (MO.isDef() && DanglingDebugValue[Reg].first!=0) {
+        SU->DbgInstrList.push_back(DanglingDebugValue[Reg].first);
+        DbgValueVec[DanglingDebugValue[Reg].second] = 0;
+        DanglingDebugValue[Reg] = std::make_pair((MachineInstr*)0, 0);
+      }
+
+      std::vector<SUnit *> &UseList = Uses[Reg];
+      std::vector<SUnit *> &DefList = Defs[Reg];
+      // Optionally add output and anti dependencies. For anti
+      // dependencies we use a latency of 0 because for a multi-issue
+      // target we want to allow the defining instruction to issue
+      // in the same cycle as the using instruction.
+      // TODO: Using a latency of 1 here for output dependencies assumes
+      //       there's no cost for reusing registers.
+      SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
+      unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
+      for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+        SUnit *DefSU = DefList[i];
+        if (DefSU == &ExitSU)
+          continue;
+        if (DefSU != SU &&
+            (Kind != SDep::Output || !MO.isDead() ||
+             !DefSU->getInstr()->registerDefIsDead(Reg)))
+          DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
+      }
+      for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+        std::vector<SUnit *> &DefList = Defs[*Alias];
+        for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
+          SUnit *DefSU = DefList[i];
+          if (DefSU == &ExitSU)
+            continue;
+          if (DefSU != SU &&
+              (Kind != SDep::Output || !MO.isDead() ||
+               !DefSU->getInstr()->registerDefIsDead(*Alias)))
+            DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias));
+        }
+      }
+
+      if (MO.isDef()) {
+        // Add any data dependencies.
+        unsigned DataLatency = SU->Latency;
+        for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+          SUnit *UseSU = UseList[i];
+          if (UseSU == SU)
+            continue;
+          unsigned LDataLatency = DataLatency;
+          // Optionally add in a special extra latency for nodes that
+          // feed addresses.
+          // TODO: Do this for register aliases too.
+          // TODO: Perhaps we should get rid of
+          // SpecialAddressLatency and just move this into
+          // adjustSchedDependency for the targets that care about it.
+          if (SpecialAddressLatency != 0 && !UnitLatencies &&
+              UseSU != &ExitSU) {
+            MachineInstr *UseMI = UseSU->getInstr();
+            const TargetInstrDesc &UseTID = UseMI->getDesc();
+            int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
+            assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
+            if (RegUseIndex >= 0 &&
+                (UseTID.mayLoad() || UseTID.mayStore()) &&
+                (unsigned)RegUseIndex < UseTID.getNumOperands() &&
+                UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
+              LDataLatency += SpecialAddressLatency;
+          }
+          // Adjust the dependence latency using operand def/use
+          // information (if any), and then allow the target to
+          // perform its own adjustments.
+          const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
+          if (!UnitLatencies) {
+            ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+            ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+          }
+          UseSU->addPred(dep);
+        }
+        for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+          std::vector<SUnit *> &UseList = Uses[*Alias];
+          for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
+            SUnit *UseSU = UseList[i];
+            if (UseSU == SU)
+              continue;
+            const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
+            if (!UnitLatencies) {
+              ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
+              ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
+            }
+            UseSU->addPred(dep);
+          }
+        }
+
+        // If a def is going to wrap back around to the top of the loop,
+        // backschedule it.
+        if (!UnitLatencies && DefList.empty()) {
+          LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
+          if (I != LoopRegs.Deps.end()) {
+            const MachineOperand *UseMO = I->second.first;
+            unsigned Count = I->second.second;
+            const MachineInstr *UseMI = UseMO->getParent();
+            unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
+            const TargetInstrDesc &UseTID = UseMI->getDesc();
+            // TODO: If we knew the total depth of the region here, we could
+            // handle the case where the whole loop is inside the region but
+            // is large enough that the isScheduleHigh trick isn't needed.
+            if (UseMOIdx < UseTID.getNumOperands()) {
+              // Currently, we only support scheduling regions consisting of
+              // single basic blocks. Check to see if the instruction is in
+              // the same region by checking to see if it has the same parent.
+              if (UseMI->getParent() != MI->getParent()) {
+                unsigned Latency = SU->Latency;
+                if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass())
+                  Latency += SpecialAddressLatency;
+                // This is a wild guess as to the portion of the latency which
+                // will be overlapped by work done outside the current
+                // scheduling region.
+                Latency -= std::min(Latency, Count);
+                // Add the artifical edge.
+                ExitSU.addPred(SDep(SU, SDep::Order, Latency,
+                                    /*Reg=*/0, /*isNormalMemory=*/false,
+                                    /*isMustAlias=*/false,
+                                    /*isArtificial=*/true));
+              } else if (SpecialAddressLatency > 0 &&
+                         UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
+                // The entire loop body is within the current scheduling region
+                // and the latency of this operation is assumed to be greater
+                // than the latency of the loop.
+                // TODO: Recursively mark data-edge predecessors as
+                //       isScheduleHigh too.
+                SU->isScheduleHigh = true;
+              }
+            }
+            LoopRegs.Deps.erase(I);
+          }
+        }
+
+        UseList.clear();
+        if (!MO.isDead())
+          DefList.clear();
+        DefList.push_back(SU);
+      } else {
+        UseList.push_back(SU);
+      }
+    }
+
+    // Add chain dependencies.
+    // Chain dependencies used to enforce memory order should have
+    // latency of 0 (except for true dependency of Store followed by
+    // aliased Load... we estimate that with a single cycle of latency
+    // assuming the hardware will bypass)
+    // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
+    // after stack slots are lowered to actual addresses.
+    // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
+    // produce more precise dependence information.
+#define STORE_LOAD_LATENCY 1
+    unsigned TrueMemOrderLatency = 0;
+    if (TID.isCall() || MI->hasUnmodeledSideEffects() ||
+        (MI->hasVolatileMemoryRef() && 
+         (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
+      // Be conservative with these and add dependencies on all memory
+      // references, even those that are known to not alias.
+      for (std::map<const Value *, SUnit *>::iterator I = 
+             NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
+        I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+      }
+      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+             NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
+        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+          I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+      }
+      NonAliasMemDefs.clear();
+      NonAliasMemUses.clear();
+      // Add SU to the barrier chain.
+      if (BarrierChain)
+        BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+      BarrierChain = SU;
+
+      // fall-through
+    new_alias_chain:
+      // Chain all possibly aliasing memory references though SU.
+      if (AliasChain)
+        AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+      AliasChain = SU;
+      for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+        PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+      for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
+           E = AliasMemDefs.end(); I != E; ++I) {
+        I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+      }
+      for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+           AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
+        for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+          I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+      }
+      PendingLoads.clear();
+      AliasMemDefs.clear();
+      AliasMemUses.clear();
+    } else if (TID.mayStore()) {
+      bool MayAlias = true;
+      TrueMemOrderLatency = STORE_LOAD_LATENCY;
+      if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
+        // A store to a specific PseudoSourceValue. Add precise dependencies.
+        // Record the def in MemDefs, first adding a dep if there is
+        // an existing def.
+        std::map<const Value *, SUnit *>::iterator I = 
+          ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+        std::map<const Value *, SUnit *>::iterator IE = 
+          ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+        if (I != IE) {
+          I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
+                                  /*isNormalMemory=*/true));
+          I->second = SU;
+        } else {
+          if (MayAlias)
+            AliasMemDefs[V] = SU;
+          else
+            NonAliasMemDefs[V] = SU;
+        }
+        // Handle the uses in MemUses, if there are any.
+        std::map<const Value *, std::vector<SUnit *> >::iterator J =
+          ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
+        std::map<const Value *, std::vector<SUnit *> >::iterator JE =
+          ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
+        if (J != JE) {
+          for (unsigned i = 0, e = J->second.size(); i != e; ++i)
+            J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency,
+                                       /*Reg=*/0, /*isNormalMemory=*/true));
+          J->second.clear();
+        }
+        if (MayAlias) {
+          // Add dependencies from all the PendingLoads, i.e. loads
+          // with no underlying object.
+          for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
+            PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+          // Add dependence on alias chain, if needed.
+          if (AliasChain)
+            AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+        }
+        // Add dependence on barrier chain, if needed.
+        if (BarrierChain)
+          BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+      } else {
+        // Treat all other stores conservatively.
+        goto new_alias_chain;
+      }
+
+      if (!ExitSU.isPred(SU))
+        // Push store's up a bit to avoid them getting in between cmp
+        // and branches.
+        ExitSU.addPred(SDep(SU, SDep::Order, 0,
+                            /*Reg=*/0, /*isNormalMemory=*/false,
+                            /*isMustAlias=*/false,
+                            /*isArtificial=*/true));
+    } else if (TID.mayLoad()) {
+      bool MayAlias = true;
+      TrueMemOrderLatency = 0;
+      if (MI->isInvariantLoad(AA)) {
+        // Invariant load, no chain dependencies needed!
+      } else {
+        if (const Value *V = 
+            getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
+          // A load from a specific PseudoSourceValue. Add precise dependencies.
+          std::map<const Value *, SUnit *>::iterator I = 
+            ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+          std::map<const Value *, SUnit *>::iterator IE = 
+            ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+          if (I != IE)
+            I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
+                                    /*isNormalMemory=*/true));
+          if (MayAlias)
+            AliasMemUses[V].push_back(SU);
+          else 
+            NonAliasMemUses[V].push_back(SU);
+        } else {
+          // A load with no underlying object. Depend on all
+          // potentially aliasing stores.
+          for (std::map<const Value *, SUnit *>::iterator I = 
+                 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
+            I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+          
+          PendingLoads.push_back(SU);
+          MayAlias = true;
+        }
+        
+        // Add dependencies on alias and barrier chains, if needed.
+        if (MayAlias && AliasChain)
+          AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+        if (BarrierChain)
+          BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+      } 
+    }
+  }
+
+  for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+    Defs[i].clear();
+    Uses[i].clear();
+  }
+  PendingLoads.clear();
+}
+
+void ScheduleDAGInstrs::FinishBlock() {
+  // Nothing to do.
+}
+
+void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
+  // Compute the latency for the node.
+  if (!InstrItins || InstrItins->isEmpty()) {
+    SU->Latency = 1;
+
+    // Simplistic target-independent heuristic: assume that loads take
+    // extra time.
+    if (SU->getInstr()->getDesc().mayLoad())
+      SU->Latency += 2;
+  } else {
+    SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
+  }
+}
+
+void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, 
+                                              SDep& dep) const {
+  if (!InstrItins || InstrItins->isEmpty())
+    return;
+  
+  // For a data dependency with a known register...
+  if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
+    return;
+
+  const unsigned Reg = dep.getReg();
+
+  // ... find the definition of the register in the defining
+  // instruction
+  MachineInstr *DefMI = Def->getInstr();
+  int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
+  if (DefIdx != -1) {
+    const MachineOperand &MO = DefMI->getOperand(DefIdx);
+    if (MO.isReg() && MO.isImplicit() &&
+        DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
+      // This is an implicit def, getOperandLatency() won't return the correct
+      // latency. e.g.
+      //   %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
+      //   %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
+      // What we want is to compute latency between def of %D6/%D7 and use of
+      // %Q3 instead.
+      DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+    }
+    MachineInstr *UseMI = Use->getInstr();
+    // For all uses of the register, calculate the maxmimum latency
+    int Latency = -1;
+    if (UseMI) {
+      for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &MO = UseMI->getOperand(i);
+        if (!MO.isReg() || !MO.isUse())
+          continue;
+        unsigned MOReg = MO.getReg();
+        if (MOReg != Reg)
+          continue;
+
+        int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
+                                              UseMI, i);
+        Latency = std::max(Latency, UseCycle);
+      }
+    } else {
+      // UseMI is null, then it must be a scheduling barrier.
+      if (!InstrItins || InstrItins->isEmpty())
+        return;
+      unsigned DefClass = DefMI->getDesc().getSchedClass();
+      Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
+    }
+
+    // If we found a latency, then replace the existing dependence latency.
+    if (Latency >= 0)
+      dep.setLatency(Latency);
+  }
+}
+
+void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
+  SU->getInstr()->dump();
+}
+
+std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
+  std::string s;
+  raw_string_ostream oss(s);
+  if (SU == &EntrySU)
+    oss << "<entry>";
+  else if (SU == &ExitSU)
+    oss << "<exit>";
+  else
+    SU->getInstr()->print(oss);
+  return oss.str();
+}
+
+// EmitSchedule - Emit the machine code in scheduled order.
+MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
+  // For MachineInstr-based scheduling, we're rescheduling the instructions in
+  // the block, so start by removing them from the block.
+  while (Begin != InsertPos) {
+    MachineBasicBlock::iterator I = Begin;
+    ++Begin;
+    BB->remove(I);
+  }
+
+  // First reinsert any remaining debug_values; these are either constants,
+  // or refer to live-in registers.  The beginning of the block is the right
+  // place for the latter.  The former might reasonably be placed elsewhere
+  // using some kind of ordering algorithm, but right now it doesn't matter.
+  for (int i = DbgValueVec.size()-1; i>=0; --i)
+    if (DbgValueVec[i])
+      BB->insert(InsertPos, DbgValueVec[i]);
+
+  // Then re-insert them according to the given schedule.
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    SUnit *SU = Sequence[i];
+    if (!SU) {
+      // Null SUnit* is a noop.
+      EmitNoop();
+      continue;
+    }
+
+    BB->insert(InsertPos, SU->getInstr());
+    for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i)
+      BB->insert(InsertPos, SU->DbgInstrList[i]);
+  }
+
+  // Update the Begin iterator, as the first instruction in the block
+  // may have been scheduled later.
+  if (!DbgValueVec.empty()) {
+    for (int i = DbgValueVec.size()-1; i>=0; --i)
+      if (DbgValueVec[i]!=0) {
+        Begin = DbgValueVec[DbgValueVec.size()-1];
+        break;
+      }
+  } else if (!Sequence.empty())
+    Begin = Sequence[0]->getInstr();
+
+  DbgValueVec.clear();
+  return BB;
+}
diff --git a/final/lib/CodeGen/ScheduleDAGInstrs.h b/final/lib/CodeGen/ScheduleDAGInstrs.h
new file mode 100644
index 00000000000..c878287d9c8
--- /dev/null
+++ b/final/lib/CodeGen/ScheduleDAGInstrs.h
@@ -0,0 +1,207 @@
+//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGInstrs class, which implements
+// scheduling for a MachineInstr-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCHEDULEDAGINSTRS_H
+#define SCHEDULEDAGINSTRS_H
+
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include <map>
+
+namespace llvm {
+  class MachineLoopInfo;
+  class MachineDominatorTree;
+
+  /// LoopDependencies - This class analyzes loop-oriented register
+  /// dependencies, which are used to guide scheduling decisions.
+  /// For example, loop induction variable increments should be
+  /// scheduled as soon as possible after the variable's last use.
+  ///
+  class LLVM_LIBRARY_VISIBILITY LoopDependencies {
+    const MachineLoopInfo &MLI;
+    const MachineDominatorTree &MDT;
+
+  public:
+    typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> >
+      LoopDeps;
+    LoopDeps Deps;
+
+    LoopDependencies(const MachineLoopInfo &mli,
+                     const MachineDominatorTree &mdt) :
+      MLI(mli), MDT(mdt) {}
+
+    /// VisitLoop - Clear out any previous state and analyze the given loop.
+    ///
+    void VisitLoop(const MachineLoop *Loop) {
+      Deps.clear();
+      MachineBasicBlock *Header = Loop->getHeader();
+      SmallSet<unsigned, 8> LoopLiveIns;
+      for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(),
+           LE = Header->livein_end(); LI != LE; ++LI)
+        LoopLiveIns.insert(*LI);
+
+      const MachineDomTreeNode *Node = MDT.getNode(Header);
+      const MachineBasicBlock *MBB = Node->getBlock();
+      assert(Loop->contains(MBB) &&
+             "Loop does not contain header!");
+      VisitRegion(Node, MBB, Loop, LoopLiveIns);
+    }
+
+  private:
+    void VisitRegion(const MachineDomTreeNode *Node,
+                     const MachineBasicBlock *MBB,
+                     const MachineLoop *Loop,
+                     const SmallSet<unsigned, 8> &LoopLiveIns) {
+      unsigned Count = 0;
+      for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+           I != E; ++I) {
+        const MachineInstr *MI = I;
+        if (MI->isDebugValue())
+          continue;
+        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+          const MachineOperand &MO = MI->getOperand(i);
+          if (!MO.isReg() || !MO.isUse())
+            continue;
+          unsigned MOReg = MO.getReg();
+          if (LoopLiveIns.count(MOReg))
+            Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count)));
+        }
+        ++Count; // Not every iteration due to dbg_value above.
+      }
+
+      const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+      for (std::vector<MachineDomTreeNode*>::const_iterator I =
+           Children.begin(), E = Children.end(); I != E; ++I) {
+        const MachineDomTreeNode *ChildNode = *I;
+        MachineBasicBlock *ChildBlock = ChildNode->getBlock();
+        if (Loop->contains(ChildBlock))
+          VisitRegion(ChildNode, ChildBlock, Loop, LoopLiveIns);
+      }
+    }
+  };
+
+  /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
+  /// MachineInstrs.
+  class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG {
+    const MachineLoopInfo &MLI;
+    const MachineDominatorTree &MDT;
+    const MachineFrameInfo *MFI;
+    const InstrItineraryData *InstrItins;
+
+    /// Defs, Uses - Remember where defs and uses of each physical register
+    /// are as we iterate upward through the instructions. This is allocated
+    /// here instead of inside BuildSchedGraph to avoid the need for it to be
+    /// initialized and destructed for each block.
+    std::vector<std::vector<SUnit *> > Defs;
+    std::vector<std::vector<SUnit *> > Uses;
+ 
+    /// DbgValueVec - Remember DBG_VALUEs that refer to a particular
+    /// register.
+    std::vector<MachineInstr *>DbgValueVec;
+
+    /// PendingLoads - Remember where unknown loads are after the most recent
+    /// unknown store, as we iterate. As with Defs and Uses, this is here
+    /// to minimize construction/destruction.
+    std::vector<SUnit *> PendingLoads;
+
+    /// LoopRegs - Track which registers are used for loop-carried dependencies.
+    ///
+    LoopDependencies LoopRegs;
+
+    /// LoopLiveInRegs - Track which regs are live into a loop, to help guide
+    /// back-edge-aware scheduling.
+    ///
+    SmallSet<unsigned, 8> LoopLiveInRegs;
+
+  public:
+    MachineBasicBlock::iterator Begin;    // The beginning of the range to
+                                          // be scheduled. The range extends
+                                          // to InsertPos.
+    unsigned InsertPosIndex;              // The index in BB of InsertPos.
+
+    explicit ScheduleDAGInstrs(MachineFunction &mf,
+                               const MachineLoopInfo &mli,
+                               const MachineDominatorTree &mdt);
+
+    virtual ~ScheduleDAGInstrs() {}
+
+    /// NewSUnit - Creates a new SUnit and return a ptr to it.
+    ///
+    SUnit *NewSUnit(MachineInstr *MI) {
+#ifndef NDEBUG
+      const SUnit *Addr = SUnits.empty() ? 0 : &SUnits[0];
+#endif
+      SUnits.push_back(SUnit(MI, (unsigned)SUnits.size()));
+      assert((Addr == 0 || Addr == &SUnits[0]) &&
+             "SUnits std::vector reallocated on the fly!");
+      SUnits.back().OrigNode = &SUnits.back();
+      return &SUnits.back();
+    }
+
+    /// Run - perform scheduling.
+    ///
+    void Run(MachineBasicBlock *bb,
+             MachineBasicBlock::iterator begin,
+             MachineBasicBlock::iterator end,
+             unsigned endindex);
+
+    /// BuildSchedGraph - Build SUnits from the MachineBasicBlock that we are
+    /// input.
+    virtual void BuildSchedGraph(AliasAnalysis *AA);
+
+    /// AddSchedBarrierDeps - Add dependencies from instructions in the current
+    /// list of instructions being scheduled to scheduling barrier. We want to
+    /// make sure instructions which define registers that are either used by
+    /// the terminator or are live-out are properly scheduled. This is
+    /// especially important when the definition latency of the return value(s)
+    /// are too high to be hidden by the branch or when the liveout registers
+    /// used by instructions in the fallthrough block.
+    void AddSchedBarrierDeps();
+
+    /// ComputeLatency - Compute node latency.
+    ///
+    virtual void ComputeLatency(SUnit *SU);
+
+    /// ComputeOperandLatency - Override dependence edge latency using
+    /// operand use/def information
+    ///
+    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+                                       SDep& dep) const;
+
+    virtual MachineBasicBlock *EmitSchedule();
+
+    /// StartBlock - Prepare to perform scheduling in the given block.
+    ///
+    virtual void StartBlock(MachineBasicBlock *BB);
+
+    /// Schedule - Order nodes according to selected style, filling
+    /// in the Sequence member.
+    ///
+    virtual void Schedule() = 0;
+
+    /// FinishBlock - Clean up after scheduling in the given block.
+    ///
+    virtual void FinishBlock();
+
+    virtual void dumpNode(const SUnit *SU) const;
+
+    virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+  };
+}
+
+#endif
diff --git a/final/lib/CodeGen/ScheduleDAGPrinter.cpp b/final/lib/CodeGen/ScheduleDAGPrinter.cpp
new file mode 100644
index 00000000000..4b55a2284f8
--- /dev/null
+++ b/final/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -0,0 +1,99 @@
+//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include <fstream>
+using namespace llvm;
+
+namespace llvm {
+  template<>
+  struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+    static std::string getGraphName(const ScheduleDAG *G) {
+      return G->MF.getFunction()->getName();
+    }
+
+    static bool renderGraphFromBottomUp() {
+      return true;
+    }
+    
+    static bool hasNodeAddressLabel(const SUnit *Node,
+                                    const ScheduleDAG *Graph) {
+      return true;
+    }
+    
+    /// If you want to override the dot attributes printed for a particular
+    /// edge, override this method.
+    static std::string getEdgeAttributes(const SUnit *Node,
+                                         SUnitIterator EI,
+                                         const ScheduleDAG *Graph) {
+      if (EI.isArtificialDep())
+        return "color=cyan,style=dashed";
+      if (EI.isCtrlDep())
+        return "color=blue,style=dashed";
+      return "";
+    }
+    
+
+    std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph);
+    static std::string getNodeAttributes(const SUnit *N,
+                                         const ScheduleDAG *Graph) {
+      return "shape=Mrecord";
+    }
+
+    static void addCustomGraphFeatures(ScheduleDAG *G,
+                                       GraphWriter<ScheduleDAG*> &GW) {
+      return G->addCustomGraphFeatures(GW);
+    }
+  };
+}
+
+std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
+                                                       const ScheduleDAG *G) {
+  return G->getGraphNodeLabel(SU);
+}
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void ScheduleDAG::viewGraph() {
+// This code is only for debugging!
+#ifndef NDEBUG
+  if (BB->getBasicBlock())
+    ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
+              "Scheduling-Units Graph for " + MF.getFunction()->getNameStr() + 
+              ":" + BB->getBasicBlock()->getNameStr());
+  else
+    ViewGraph(this, "dag." + MF.getFunction()->getNameStr(), false,
+              "Scheduling-Units Graph for " + MF.getFunction()->getNameStr());
+#else
+  errs() << "ScheduleDAG::viewGraph is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
+#endif  // NDEBUG
+}
diff --git a/final/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/final/lib/CodeGen/ScoreboardHazardRecognizer.cpp
new file mode 100644
index 00000000000..e6d7ded8a78
--- /dev/null
+++ b/final/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -0,0 +1,243 @@
+//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScoreboardHazardRecognizer class, which
+// encapsultes hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
+
+using namespace llvm;
+
+#ifndef NDEBUG
+const char *ScoreboardHazardRecognizer::DebugType = "";
+#endif
+
+ScoreboardHazardRecognizer::
+ScoreboardHazardRecognizer(const InstrItineraryData *II,
+                           const ScheduleDAG *SchedDAG,
+                           const char *ParentDebugType) :
+  ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0),
+  IssueCount(0) {
+
+#ifndef NDEBUG
+  DebugType = ParentDebugType;
+#endif
+
+  // Determine the maximum depth of any itinerary. This determines the
+  // depth of the scoreboard. We always make the scoreboard at least 1
+  // cycle deep to avoid dealing with the boundary condition.
+  unsigned ScoreboardDepth = 1;
+  if (ItinData && !ItinData->isEmpty()) {
+    IssueWidth = ItinData->IssueWidth;
+
+    for (unsigned idx = 0; ; ++idx) {
+      if (ItinData->isEndMarker(idx))
+        break;
+
+      const InstrStage *IS = ItinData->beginStage(idx);
+      const InstrStage *E = ItinData->endStage(idx);
+      unsigned CurCycle = 0;
+      unsigned ItinDepth = 0;
+      for (; IS != E; ++IS) {
+        unsigned StageDepth = CurCycle + IS->getCycles();
+        if (ItinDepth < StageDepth) ItinDepth = StageDepth;
+        CurCycle += IS->getNextCycles();
+      }
+
+      // Find the next power-of-2 >= ItinDepth
+      while (ItinDepth > ScoreboardDepth) {
+        ScoreboardDepth *= 2;
+      }
+    }
+    MaxLookAhead = ScoreboardDepth;
+  }
+
+  ReservedScoreboard.reset(ScoreboardDepth);
+  RequiredScoreboard.reset(ScoreboardDepth);
+
+  DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
+               << ScoreboardDepth << '\n');
+}
+
+void ScoreboardHazardRecognizer::Reset() {
+  IssueCount = 0;
+  RequiredScoreboard.reset();
+  ReservedScoreboard.reset();
+}
+
+void ScoreboardHazardRecognizer::Scoreboard::dump() const {
+  dbgs() << "Scoreboard:\n";
+
+  unsigned last = Depth - 1;
+  while ((last > 0) && ((*this)[last] == 0))
+    last--;
+
+  for (unsigned i = 0; i <= last; i++) {
+    unsigned FUs = (*this)[i];
+    dbgs() << "\t";
+    for (int j = 31; j >= 0; j--)
+      dbgs() << ((FUs & (1 << j)) ? '1' : '0');
+    dbgs() << '\n';
+  }
+}
+
+bool ScoreboardHazardRecognizer::atIssueLimit() const {
+  if (IssueWidth == 0)
+    return false;
+
+  return IssueCount == IssueWidth;
+}
+
+ScheduleHazardRecognizer::HazardType
+ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+  if (!ItinData || ItinData->isEmpty())
+    return NoHazard;
+
+  // Note that stalls will be negative for bottom-up scheduling.
+  int cycle = Stalls;
+
+  // Use the itinerary for the underlying instruction to check for
+  // free FU's in the scoreboard at the appropriate future cycles.
+
+  const TargetInstrDesc *TID = DAG->getInstrDesc(SU);
+  if (TID == NULL) {
+    // Don't check hazards for non-machineinstr Nodes.
+    return NoHazard;
+  }
+  unsigned idx = TID->getSchedClass();
+  for (const InstrStage *IS = ItinData->beginStage(idx),
+         *E = ItinData->endStage(idx); IS != E; ++IS) {
+    // We must find one of the stage's units free for every cycle the
+    // stage is occupied. FIXME it would be more accurate to find the
+    // same unit free in all the cycles.
+    for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+      int StageCycle = cycle + (int)i;
+      if (StageCycle < 0)
+        continue;
+
+      if (StageCycle >= (int)RequiredScoreboard.getDepth()) {
+        assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() &&
+               "Scoreboard depth exceeded!");
+        // This stage was stalled beyond pipeline depth, so cannot conflict.
+        break;
+      }
+
+      unsigned freeUnits = IS->getUnits();
+      switch (IS->getReservationKind()) {
+      default:
+       assert(0 && "Invalid FU reservation");
+      case InstrStage::Required:
+        // Required FUs conflict with both reserved and required ones
+        freeUnits &= ~ReservedScoreboard[StageCycle];
+        // FALLTHROUGH
+      case InstrStage::Reserved:
+        // Reserved FUs can conflict only with required ones.
+        freeUnits &= ~RequiredScoreboard[StageCycle];
+        break;
+      }
+
+      if (!freeUnits) {
+        DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
+        DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
+        DEBUG(DAG->dumpNode(SU));
+        return Hazard;
+      }
+    }
+
+    // Advance the cycle to the next stage.
+    cycle += IS->getNextCycles();
+  }
+
+  return NoHazard;
+}
+
+void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+  if (!ItinData || ItinData->isEmpty())
+    return;
+
+  // Use the itinerary for the underlying instruction to reserve FU's
+  // in the scoreboard at the appropriate future cycles.
+  const TargetInstrDesc *TID = DAG->getInstrDesc(SU);
+  assert(TID && "The scheduler must filter non-machineinstrs");
+  if (DAG->TII->isZeroCost(TID->Opcode))
+    return;
+
+  ++IssueCount;
+
+  unsigned cycle = 0;
+
+  unsigned idx = TID->getSchedClass();
+  for (const InstrStage *IS = ItinData->beginStage(idx),
+         *E = ItinData->endStage(idx); IS != E; ++IS) {
+    // We must reserve one of the stage's units for every cycle the
+    // stage is occupied. FIXME it would be more accurate to reserve
+    // the same unit free in all the cycles.
+    for (unsigned int i = 0; i < IS->getCycles(); ++i) {
+      assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
+             "Scoreboard depth exceeded!");
+
+      unsigned freeUnits = IS->getUnits();
+      switch (IS->getReservationKind()) {
+      default:
+       assert(0 && "Invalid FU reservation");
+      case InstrStage::Required:
+        // Required FUs conflict with both reserved and required ones
+        freeUnits &= ~ReservedScoreboard[cycle + i];
+        // FALLTHROUGH
+      case InstrStage::Reserved:
+        // Reserved FUs can conflict only with required ones.
+        freeUnits &= ~RequiredScoreboard[cycle + i];
+        break;
+      }
+
+      // reduce to a single unit
+      unsigned freeUnit = 0;
+      do {
+        freeUnit = freeUnits;
+        freeUnits = freeUnit & (freeUnit - 1);
+      } while (freeUnits);
+
+      assert(freeUnit && "No function unit available!");
+      if (IS->getReservationKind() == InstrStage::Required)
+        RequiredScoreboard[cycle + i] |= freeUnit;
+      else
+        ReservedScoreboard[cycle + i] |= freeUnit;
+    }
+
+    // Advance the cycle to the next stage.
+    cycle += IS->getNextCycles();
+  }
+
+  DEBUG(ReservedScoreboard.dump());
+  DEBUG(RequiredScoreboard.dump());
+}
+
+void ScoreboardHazardRecognizer::AdvanceCycle() {
+  IssueCount = 0;
+  ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
+  RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
+}
+
+void ScoreboardHazardRecognizer::RecedeCycle() {
+  IssueCount = 0;
+  ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0;
+  ReservedScoreboard.recede();
+  RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0;
+  RequiredScoreboard.recede();
+}
diff --git a/final/lib/CodeGen/SelectionDAG/CMakeLists.txt b/final/lib/CodeGen/SelectionDAG/CMakeLists.txt
new file mode 100644
index 00000000000..15932c03a19
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -0,0 +1,23 @@
+add_llvm_library(LLVMSelectionDAG
+  DAGCombiner.cpp
+  FastISel.cpp
+  FunctionLoweringInfo.cpp
+  InstrEmitter.cpp
+  LegalizeDAG.cpp
+  LegalizeFloatTypes.cpp
+  LegalizeIntegerTypes.cpp
+  LegalizeTypes.cpp
+  LegalizeTypesGeneric.cpp
+  LegalizeVectorOps.cpp
+  LegalizeVectorTypes.cpp
+  ScheduleDAGFast.cpp
+  ScheduleDAGList.cpp
+  ScheduleDAGRRList.cpp
+  ScheduleDAGSDNodes.cpp
+  SelectionDAG.cpp
+  SelectionDAGBuilder.cpp
+  SelectionDAGISel.cpp
+  SelectionDAGPrinter.cpp
+  TargetLowering.cpp
+  TargetSelectionDAGInfo.cpp
+  )
diff --git a/final/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/final/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
new file mode 100644
index 00000000000..4c70bc3e88b
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -0,0 +1,7641 @@
+//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
+// both before and after the DAG is legalized.
+//
+// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
+// primarily intended to handle simplification opportunities that are implicit
+// in the LLVM IR and exposed by the various codegen lowering phases.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dagcombine"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NodesCombined   , "Number of dag nodes combined");
+STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
+STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
+
+namespace {
+  static cl::opt<bool>
+    CombinerAA("combiner-alias-analysis", cl::Hidden,
+               cl::desc("Turn on alias analysis during testing"));
+
+  static cl::opt<bool>
+    CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+               cl::desc("Include global information in alias analysis"));
+
+//------------------------------ DAGCombiner ---------------------------------//
+
+  class DAGCombiner {
+    SelectionDAG &DAG;
+    const TargetLowering &TLI;
+    CombineLevel Level;
+    CodeGenOpt::Level OptLevel;
+    bool LegalOperations;
+    bool LegalTypes;
+
+    // Worklist of all of the nodes that need to be simplified.
+    std::vector<SDNode*> WorkList;
+
+    // AA - Used for DAG load/store alias analysis.
+    AliasAnalysis &AA;
+
+    /// AddUsersToWorkList - When an instruction is simplified, add all users of
+    /// the instruction to the work lists because they might get more simplified
+    /// now.
+    ///
+    void AddUsersToWorkList(SDNode *N) {
+      for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+           UI != UE; ++UI)
+        AddToWorkList(*UI);
+    }
+
+    /// visit - call the node-specific routine that knows how to fold each
+    /// particular type of node.
+    SDValue visit(SDNode *N);
+
+  public:
+    /// AddToWorkList - Add to the work list making sure it's instance is at the
+    /// the back (next to be processed.)
+    void AddToWorkList(SDNode *N) {
+      removeFromWorkList(N);
+      WorkList.push_back(N);
+    }
+
+    /// removeFromWorkList - remove all instances of N from the worklist.
+    ///
+    void removeFromWorkList(SDNode *N) {
+      WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
+                     WorkList.end());
+    }
+
+    SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+                      bool AddTo = true);
+
+    SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
+      return CombineTo(N, &Res, 1, AddTo);
+    }
+
+    SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
+                      bool AddTo = true) {
+      SDValue To[] = { Res0, Res1 };
+      return CombineTo(N, To, 2, AddTo);
+    }
+
+    void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
+
+  private:
+
+    /// SimplifyDemandedBits - Check the specified integer node value to see if
+    /// it can be simplified or if things it uses can be simplified by bit
+    /// propagation.  If so, return true.
+    bool SimplifyDemandedBits(SDValue Op) {
+      unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+      APInt Demanded = APInt::getAllOnesValue(BitWidth);
+      return SimplifyDemandedBits(Op, Demanded);
+    }
+
+    bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
+
+    bool CombineToPreIndexedLoadStore(SDNode *N);
+    bool CombineToPostIndexedLoadStore(SDNode *N);
+
+    void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
+    SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
+    SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
+    SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
+    SDValue PromoteIntBinOp(SDValue Op);
+    SDValue PromoteIntShiftOp(SDValue Op);
+    SDValue PromoteExtend(SDValue Op);
+    bool PromoteLoad(SDValue Op);
+
+    /// combine - call the node-specific routine that knows how to fold each
+    /// particular type of node. If that doesn't do anything, try the
+    /// target-specific DAG combines.
+    SDValue combine(SDNode *N);
+
+    // Visitation implementation - Implement dag node combining for different
+    // node types.  The semantics are as follows:
+    // Return Value:
+    //   SDValue.getNode() == 0 - No change was made
+    //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
+    //   otherwise              - N should be replaced by the returned Operand.
+    //
+    SDValue visitTokenFactor(SDNode *N);
+    SDValue visitMERGE_VALUES(SDNode *N);
+    SDValue visitADD(SDNode *N);
+    SDValue visitSUB(SDNode *N);
+    SDValue visitADDC(SDNode *N);
+    SDValue visitADDE(SDNode *N);
+    SDValue visitMUL(SDNode *N);
+    SDValue visitSDIV(SDNode *N);
+    SDValue visitUDIV(SDNode *N);
+    SDValue visitSREM(SDNode *N);
+    SDValue visitUREM(SDNode *N);
+    SDValue visitMULHU(SDNode *N);
+    SDValue visitMULHS(SDNode *N);
+    SDValue visitSMUL_LOHI(SDNode *N);
+    SDValue visitUMUL_LOHI(SDNode *N);
+    SDValue visitSDIVREM(SDNode *N);
+    SDValue visitUDIVREM(SDNode *N);
+    SDValue visitAND(SDNode *N);
+    SDValue visitOR(SDNode *N);
+    SDValue visitXOR(SDNode *N);
+    SDValue SimplifyVBinOp(SDNode *N);
+    SDValue visitSHL(SDNode *N);
+    SDValue visitSRA(SDNode *N);
+    SDValue visitSRL(SDNode *N);
+    SDValue visitCTLZ(SDNode *N);
+    SDValue visitCTTZ(SDNode *N);
+    SDValue visitCTPOP(SDNode *N);
+    SDValue visitSELECT(SDNode *N);
+    SDValue visitSELECT_CC(SDNode *N);
+    SDValue visitSETCC(SDNode *N);
+    SDValue visitSIGN_EXTEND(SDNode *N);
+    SDValue visitZERO_EXTEND(SDNode *N);
+    SDValue visitANY_EXTEND(SDNode *N);
+    SDValue visitSIGN_EXTEND_INREG(SDNode *N);
+    SDValue visitTRUNCATE(SDNode *N);
+    SDValue visitBITCAST(SDNode *N);
+    SDValue visitBUILD_PAIR(SDNode *N);
+    SDValue visitFADD(SDNode *N);
+    SDValue visitFSUB(SDNode *N);
+    SDValue visitFMUL(SDNode *N);
+    SDValue visitFDIV(SDNode *N);
+    SDValue visitFREM(SDNode *N);
+    SDValue visitFCOPYSIGN(SDNode *N);
+    SDValue visitSINT_TO_FP(SDNode *N);
+    SDValue visitUINT_TO_FP(SDNode *N);
+    SDValue visitFP_TO_SINT(SDNode *N);
+    SDValue visitFP_TO_UINT(SDNode *N);
+    SDValue visitFP_ROUND(SDNode *N);
+    SDValue visitFP_ROUND_INREG(SDNode *N);
+    SDValue visitFP_EXTEND(SDNode *N);
+    SDValue visitFNEG(SDNode *N);
+    SDValue visitFABS(SDNode *N);
+    SDValue visitBRCOND(SDNode *N);
+    SDValue visitBR_CC(SDNode *N);
+    SDValue visitLOAD(SDNode *N);
+    SDValue visitSTORE(SDNode *N);
+    SDValue visitINSERT_VECTOR_ELT(SDNode *N);
+    SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
+    SDValue visitBUILD_VECTOR(SDNode *N);
+    SDValue visitCONCAT_VECTORS(SDNode *N);
+    SDValue visitVECTOR_SHUFFLE(SDNode *N);
+    SDValue visitMEMBARRIER(SDNode *N);
+
+    SDValue XformToShuffleWithZero(SDNode *N);
+    SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
+
+    SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
+
+    bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
+    SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
+    SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2);
+    SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,
+                             SDValue N3, ISD::CondCode CC,
+                             bool NotExtCompare = false);
+    SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
+                          DebugLoc DL, bool foldBooleans = true);
+    SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+                                         unsigned HiOp);
+    SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+    SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
+    SDValue BuildSDIV(SDNode *N);
+    SDValue BuildUDIV(SDNode *N);
+    SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
+    SDValue ReduceLoadWidth(SDNode *N);
+    SDValue ReduceLoadOpStoreWidth(SDNode *N);
+    SDValue TransformFPLoadStorePair(SDNode *N);
+
+    SDValue GetDemandedBits(SDValue V, const APInt &Mask);
+
+    /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+    /// looking for aliasing nodes and adding them to the Aliases vector.
+    void GatherAllAliases(SDNode *N, SDValue OriginalChain,
+                          SmallVector<SDValue, 8> &Aliases);
+
+    /// isAlias - Return true if there is any possibility that the two addresses
+    /// overlap.
+    bool isAlias(SDValue Ptr1, int64_t Size1,
+                 const Value *SrcValue1, int SrcValueOffset1,
+                 unsigned SrcValueAlign1,
+                 const MDNode *TBAAInfo1,
+                 SDValue Ptr2, int64_t Size2,
+                 const Value *SrcValue2, int SrcValueOffset2,
+                 unsigned SrcValueAlign2,
+                 const MDNode *TBAAInfo2) const;
+
+    /// FindAliasInfo - Extracts the relevant alias information from the memory
+    /// node.  Returns true if the operand was a load.
+    bool FindAliasInfo(SDNode *N,
+                       SDValue &Ptr, int64_t &Size,
+                       const Value *&SrcValue, int &SrcValueOffset,
+                       unsigned &SrcValueAlignment,
+                       const MDNode *&TBAAInfo) const;
+
+    /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
+    /// looking for a better chain (aliasing node.)
+    SDValue FindBetterChain(SDNode *N, SDValue Chain);
+
+  public:
+    DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
+      : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted),
+        OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
+
+    /// Run - runs the dag combiner on all nodes in the work list
+    void Run(CombineLevel AtLevel);
+
+    SelectionDAG &getDAG() const { return DAG; }
+
+    /// getShiftAmountTy - Returns a type large enough to hold any valid
+    /// shift amount - before type legalization these can be huge.
+    EVT getShiftAmountTy(EVT LHSTy) {
+      return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy();
+    }
+
+    /// isTypeLegal - This method returns true if we are running before type
+    /// legalization or if the specified VT is legal.
+    bool isTypeLegal(const EVT &VT) {
+      if (!LegalTypes) return true;
+      return TLI.isTypeLegal(VT);
+    }
+  };
+}
+
+
+namespace {
+/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
+/// nodes from the worklist.
+class WorkListRemover : public SelectionDAG::DAGUpdateListener {
+  DAGCombiner &DC;
+public:
+  explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
+
+  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+    DC.removeFromWorkList(N);
+  }
+
+  virtual void NodeUpdated(SDNode *N) {
+    // Ignore updates.
+  }
+};
+}
+
+//===----------------------------------------------------------------------===//
+//  TargetLowering::DAGCombinerInfo implementation
+//===----------------------------------------------------------------------===//
+
+void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
+  ((DAGCombiner*)DC)->AddToWorkList(N);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
+  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
+}
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res, bool AddTo) {
+  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
+}
+
+
+SDValue TargetLowering::DAGCombinerInfo::
+CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
+  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
+}
+
+void TargetLowering::DAGCombinerInfo::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// isNegatibleForFree - Return 1 if we can compute the negated form of the
+/// specified expression for the same cost as the expression itself, or 2 if we
+/// can compute the negated form more cheaply than the expression itself.
+static char isNegatibleForFree(SDValue Op, bool LegalOperations,
+                               unsigned Depth = 0) {
+  // No compile time optimizations on this type.
+  if (Op.getValueType() == MVT::ppcf128)
+    return 0;
+
+  // fneg is removable even if it has multiple uses.
+  if (Op.getOpcode() == ISD::FNEG) return 2;
+
+  // Don't allow anything with multiple uses.
+  if (!Op.hasOneUse()) return 0;
+
+  // Don't recurse exponentially.
+  if (Depth > 6) return 0;
+
+  switch (Op.getOpcode()) {
+  default: return false;
+  case ISD::ConstantFP:
+    // Don't invert constant FP values after legalize.  The negated constant
+    // isn't necessarily legal.
+    return LegalOperations ? 0 : 1;
+  case ISD::FADD:
+    // FIXME: determine better conditions for this xform.
+    if (!UnsafeFPMath) return 0;
+
+    // fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
+    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+      return V;
+    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+  case ISD::FSUB:
+    // We can't turn -(A-B) into B-A when we honor signed zeros.
+    if (!UnsafeFPMath) return 0;
+
+    // fold (fneg (fsub A, B)) -> (fsub B, A)
+    return 1;
+
+  case ISD::FMUL:
+  case ISD::FDIV:
+    if (HonorSignDependentRoundingFPMath()) return 0;
+
+    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
+    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+      return V;
+
+    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
+
+  case ISD::FP_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::FSIN:
+    return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);
+  }
+}
+
+/// GetNegatedExpression - If isNegatibleForFree returns true, this function
+/// returns the newly negated expression.
+static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
+                                    bool LegalOperations, unsigned Depth = 0) {
+  // fneg is removable even if it has multiple uses.
+  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+
+  // Don't allow anything with multiple uses.
+  assert(Op.hasOneUse() && "Unknown reuse!");
+
+  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Unknown code");
+  case ISD::ConstantFP: {
+    APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+    V.changeSign();
+    return DAG.getConstantFP(V, Op.getValueType());
+  }
+  case ISD::FADD:
+    // FIXME: determine better conditions for this xform.
+    assert(UnsafeFPMath);
+
+    // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+    if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+      return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+                         GetNegatedExpression(Op.getOperand(0), DAG,
+                                              LegalOperations, Depth+1),
+                         Op.getOperand(1));
+    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+    return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+                       GetNegatedExpression(Op.getOperand(1), DAG,
+                                            LegalOperations, Depth+1),
+                       Op.getOperand(0));
+  case ISD::FSUB:
+    // We can't turn -(A-B) into B-A when we honor signed zeros.
+    assert(UnsafeFPMath);
+
+    // fold (fneg (fsub 0, B)) -> B
+    if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+      if (N0CFP->getValueAPF().isZero())
+        return Op.getOperand(1);
+
+    // fold (fneg (fsub A, B)) -> (fsub B, A)
+    return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(0));
+
+  case ISD::FMUL:
+  case ISD::FDIV:
+    assert(!HonorSignDependentRoundingFPMath());
+
+    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+    if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
+      return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+                         GetNegatedExpression(Op.getOperand(0), DAG,
+                                              LegalOperations, Depth+1),
+                         Op.getOperand(1));
+
+    // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+    return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+                       Op.getOperand(0),
+                       GetNegatedExpression(Op.getOperand(1), DAG,
+                                            LegalOperations, Depth+1));
+
+  case ISD::FP_EXTEND:
+  case ISD::FSIN:
+    return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
+                       GetNegatedExpression(Op.getOperand(0), DAG,
+                                            LegalOperations, Depth+1));
+  case ISD::FP_ROUND:
+      return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(),
+                         GetNegatedExpression(Op.getOperand(0), DAG,
+                                              LegalOperations, Depth+1),
+                         Op.getOperand(1));
+  }
+}
+
+
+// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
+// that selects between the values 1 and 0, making it equivalent to a setcc.
+// Also, set the incoming LHS, RHS, and CC references to the appropriate
+// nodes based on the type of node we are checking.  This simplifies life a
+// bit for the callers.
+static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
+                              SDValue &CC) {
+  if (N.getOpcode() == ISD::SETCC) {
+    LHS = N.getOperand(0);
+    RHS = N.getOperand(1);
+    CC  = N.getOperand(2);
+    return true;
+  }
+  if (N.getOpcode() == ISD::SELECT_CC &&
+      N.getOperand(2).getOpcode() == ISD::Constant &&
+      N.getOperand(3).getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
+      cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
+    LHS = N.getOperand(0);
+    RHS = N.getOperand(1);
+    CC  = N.getOperand(4);
+    return true;
+  }
+  return false;
+}
+
+// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
+// one use.  If this is true, it allows the users to invert the operation for
+// free when it is profitable to do so.
+static bool isOneUseSetCC(SDValue N) {
+  SDValue N0, N1, N2;
+  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
+    return true;
+  return false;
+}
+
+SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
+                                    SDValue N0, SDValue N1) {
+  EVT VT = N0.getValueType();
+  if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
+    if (isa<ConstantSDNode>(N1)) {
+      // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
+      SDValue OpNode =
+        DAG.FoldConstantArithmetic(Opc, VT,
+                                   cast<ConstantSDNode>(N0.getOperand(1)),
+                                   cast<ConstantSDNode>(N1));
+      return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+    } else if (N0.hasOneUse()) {
+      // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
+      SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+                                   N0.getOperand(0), N1);
+      AddToWorkList(OpNode.getNode());
+      return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
+    }
+  }
+
+  if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
+    if (isa<ConstantSDNode>(N0)) {
+      // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
+      SDValue OpNode =
+        DAG.FoldConstantArithmetic(Opc, VT,
+                                   cast<ConstantSDNode>(N1.getOperand(1)),
+                                   cast<ConstantSDNode>(N0));
+      return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+    } else if (N1.hasOneUse()) {
+      // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
+      SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
+                                   N1.getOperand(0), N0);
+      AddToWorkList(OpNode.getNode());
+      return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
+                               bool AddTo) {
+  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
+  ++NodesCombined;
+  DEBUG(dbgs() << "\nReplacing.1 ";
+        N->dump(&DAG);
+        dbgs() << "\nWith: ";
+        To[0].getNode()->dump(&DAG);
+        dbgs() << " and " << NumTo-1 << " other values\n";
+        for (unsigned i = 0, e = NumTo; i != e; ++i)
+          assert((!To[i].getNode() ||
+                  N->getValueType(i) == To[i].getValueType()) &&
+                 "Cannot combine value to value of different type!"));
+  WorkListRemover DeadNodes(*this);
+  DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
+
+  if (AddTo) {
+    // Push the new nodes and any users onto the worklist
+    for (unsigned i = 0, e = NumTo; i != e; ++i) {
+      if (To[i].getNode()) {
+        AddToWorkList(To[i].getNode());
+        AddUsersToWorkList(To[i].getNode());
+      }
+    }
+  }
+
+  // Finally, if the node is now dead, remove it from the graph.  The node
+  // may not be dead if the replacement process recursively simplified to
+  // something else needing this node.
+  if (N->use_empty()) {
+    // Nodes can be reintroduced into the worklist.  Make sure we do not
+    // process a node that has been replaced.
+    removeFromWorkList(N);
+
+    // Finally, since the node is now dead, remove it from the graph.
+    DAG.DeleteNode(N);
+  }
+  return SDValue(N, 0);
+}
+
+void DAGCombiner::
+CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
+  // Replace all uses.  If any nodes become isomorphic to other nodes and
+  // are deleted, make sure to remove them from our worklist.
+  WorkListRemover DeadNodes(*this);
+  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
+
+  // Push the new node and any (possibly new) users onto the worklist.
+  AddToWorkList(TLO.New.getNode());
+  AddUsersToWorkList(TLO.New.getNode());
+
+  // Finally, if the node is now dead, remove it from the graph.  The node
+  // may not be dead if the replacement process recursively simplified to
+  // something else needing this node.
+  if (TLO.Old.getNode()->use_empty()) {
+    removeFromWorkList(TLO.Old.getNode());
+
+    // If the operands of this node are only used by the node, they will now
+    // be dead.  Make sure to visit them first to delete dead nodes early.
+    for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
+      if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
+        AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
+
+    DAG.DeleteNode(TLO.Old.getNode());
+  }
+}
+
+/// SimplifyDemandedBits - Check the specified integer node value to see if
+/// it can be simplified or if things it uses can be simplified by bit
+/// propagation.  If so, return true.
+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
+  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
+  APInt KnownZero, KnownOne;
+  if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
+    return false;
+
+  // Revisit the node.
+  AddToWorkList(Op.getNode());
+
+  // Replace the old value with the new one.
+  ++NodesCombined;
+  DEBUG(dbgs() << "\nReplacing.2 ";
+        TLO.Old.getNode()->dump(&DAG);
+        dbgs() << "\nWith: ";
+        TLO.New.getNode()->dump(&DAG);
+        dbgs() << '\n');
+
+  CommitTargetLoweringOpt(TLO);
+  return true;
+}
+
+void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
+  DebugLoc dl = Load->getDebugLoc();
+  EVT VT = Load->getValueType(0);
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
+
+  DEBUG(dbgs() << "\nReplacing.9 ";
+        Load->dump(&DAG);
+        dbgs() << "\nWith: ";
+        Trunc.getNode()->dump(&DAG);
+        dbgs() << '\n');
+  WorkListRemover DeadNodes(*this);
+  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes);
+  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1),
+                                &DeadNodes);
+  removeFromWorkList(Load);
+  DAG.DeleteNode(Load);
+  AddToWorkList(Trunc.getNode());
+}
+
+SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
+  Replace = false;
+  DebugLoc dl = Op.getDebugLoc();
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
+    EVT MemVT = LD->getMemoryVT();
+    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
+      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+                                                  : ISD::EXTLOAD)
+      : LD->getExtensionType();
+    Replace = true;
+    return DAG.getExtLoad(ExtType, dl, PVT,
+                          LD->getChain(), LD->getBasePtr(),
+                          LD->getPointerInfo(),
+                          MemVT, LD->isVolatile(),
+                          LD->isNonTemporal(), LD->getAlignment());
+  }
+
+  unsigned Opc = Op.getOpcode();
+  switch (Opc) {
+  default: break;
+  case ISD::AssertSext:
+    return DAG.getNode(ISD::AssertSext, dl, PVT,
+                       SExtPromoteOperand(Op.getOperand(0), PVT),
+                       Op.getOperand(1));
+  case ISD::AssertZext:
+    return DAG.getNode(ISD::AssertZext, dl, PVT,
+                       ZExtPromoteOperand(Op.getOperand(0), PVT),
+                       Op.getOperand(1));
+  case ISD::Constant: {
+    unsigned ExtOpc =
+      Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    return DAG.getNode(ExtOpc, dl, PVT, Op);
+  }
+  }
+
+  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
+    return SDValue();
+  return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
+}
+
+SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
+  if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
+    return SDValue();
+  EVT OldVT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  bool Replace = false;
+  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+  if (NewOp.getNode() == 0)
+    return SDValue();
+  AddToWorkList(NewOp.getNode());
+
+  if (Replace)
+    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
+                     DAG.getValueType(OldVT));
+}
+
+SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
+  EVT OldVT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  bool Replace = false;
+  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
+  if (NewOp.getNode() == 0)
+    return SDValue();
+  AddToWorkList(NewOp.getNode());
+
+  if (Replace)
+    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
+  return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
+}
+
+/// PromoteIntBinOp - Promote the specified integer binary operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return SDValue();
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+
+    bool Replace0 = false;
+    SDValue N0 = Op.getOperand(0);
+    SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
+    if (NN0.getNode() == 0)
+      return SDValue();
+
+    bool Replace1 = false;
+    SDValue N1 = Op.getOperand(1);
+    SDValue NN1;
+    if (N0 == N1)
+      NN1 = NN0;
+    else {
+      NN1 = PromoteOperand(N1, PVT, Replace1);
+      if (NN1.getNode() == 0)
+        return SDValue();
+    }
+
+    AddToWorkList(NN0.getNode());
+    if (NN1.getNode())
+      AddToWorkList(NN1.getNode());
+
+    if (Replace0)
+      ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
+    if (Replace1)
+      ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
+
+    DEBUG(dbgs() << "\nPromoting ";
+          Op.getNode()->dump(&DAG));
+    DebugLoc dl = Op.getDebugLoc();
+    return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                       DAG.getNode(Opc, dl, PVT, NN0, NN1));
+  }
+  return SDValue();
+}
+
+/// PromoteIntShiftOp - Promote the specified integer shift operation if the
+/// target indicates it is beneficial. e.g. On x86, it's usually better to
+/// promote i16 operations to i32 since i16 instructions are longer.
+SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return SDValue();
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+
+    bool Replace = false;
+    SDValue N0 = Op.getOperand(0);
+    if (Opc == ISD::SRA)
+      N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
+    else if (Opc == ISD::SRL)
+      N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
+    else
+      N0 = PromoteOperand(N0, PVT, Replace);
+    if (N0.getNode() == 0)
+      return SDValue();
+
+    AddToWorkList(N0.getNode());
+    if (Replace)
+      ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
+
+    DEBUG(dbgs() << "\nPromoting ";
+          Op.getNode()->dump(&DAG));
+    DebugLoc dl = Op.getDebugLoc();
+    return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                       DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::PromoteExtend(SDValue Op) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return SDValue();
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return SDValue();
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+    // fold (aext (aext x)) -> (aext x)
+    // fold (aext (zext x)) -> (zext x)
+    // fold (aext (sext x)) -> (sext x)
+    DEBUG(dbgs() << "\nPromoting ";
+          Op.getNode()->dump(&DAG));
+    return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0));
+  }
+  return SDValue();
+}
+
+bool DAGCombiner::PromoteLoad(SDValue Op) {
+  if (!LegalOperations)
+    return false;
+
+  EVT VT = Op.getValueType();
+  if (VT.isVector() || !VT.isInteger())
+    return false;
+
+  // If operation type is 'undesirable', e.g. i16 on x86, consider
+  // promoting it.
+  unsigned Opc = Op.getOpcode();
+  if (TLI.isTypeDesirableForOp(Opc, VT))
+    return false;
+
+  EVT PVT = VT;
+  // Consult target whether it is a good idea to promote this operation and
+  // what's the right type to promote it to.
+  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
+    assert(PVT != VT && "Don't know what type to promote to!");
+
+    DebugLoc dl = Op.getDebugLoc();
+    SDNode *N = Op.getNode();
+    LoadSDNode *LD = cast<LoadSDNode>(N);
+    EVT MemVT = LD->getMemoryVT();
+    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
+      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+                                                  : ISD::EXTLOAD)
+      : LD->getExtensionType();
+    SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
+                                   LD->getChain(), LD->getBasePtr(),
+                                   LD->getPointerInfo(),
+                                   MemVT, LD->isVolatile(),
+                                   LD->isNonTemporal(), LD->getAlignment());
+    SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
+
+    DEBUG(dbgs() << "\nPromoting ";
+          N->dump(&DAG);
+          dbgs() << "\nTo: ";
+          Result.getNode()->dump(&DAG);
+          dbgs() << '\n');
+    WorkListRemover DeadNodes(*this);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes);
+    removeFromWorkList(N);
+    DAG.DeleteNode(N);
+    AddToWorkList(Result.getNode());
+    return true;
+  }
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Main DAG Combiner implementation
+//===----------------------------------------------------------------------===//
+
+void DAGCombiner::Run(CombineLevel AtLevel) {
+  // set the instance variables, so that the various visit routines may use it.
+  Level = AtLevel;
+  LegalOperations = Level >= NoIllegalOperations;
+  LegalTypes = Level >= NoIllegalTypes;
+
+  // Add all the dag nodes to the worklist.
+  WorkList.reserve(DAG.allnodes_size());
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I)
+    WorkList.push_back(I);
+
+  // Create a dummy node (which is not added to allnodes), that adds a reference
+  // to the root node, preventing it from being deleted, and tracking any
+  // changes of the root.
+  HandleSDNode Dummy(DAG.getRoot());
+
+  // The root of the dag may dangle to deleted nodes until the dag combiner is
+  // done.  Set it to null to avoid confusion.
+  DAG.setRoot(SDValue());
+
+  // while the worklist isn't empty, inspect the node on the end of it and
+  // try and combine it.
+  while (!WorkList.empty()) {
+    SDNode *N = WorkList.back();
+    WorkList.pop_back();
+
+    // If N has no uses, it is dead.  Make sure to revisit all N's operands once
+    // N is deleted from the DAG, since they too may now be dead or may have a
+    // reduced number of uses, allowing other xforms.
+    if (N->use_empty() && N != &Dummy) {
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+        AddToWorkList(N->getOperand(i).getNode());
+
+      DAG.DeleteNode(N);
+      continue;
+    }
+
+    SDValue RV = combine(N);
+
+    if (RV.getNode() == 0)
+      continue;
+
+    ++NodesCombined;
+
+    // If we get back the same node we passed in, rather than a new node or
+    // zero, we know that the node must have defined multiple values and
+    // CombineTo was used.  Since CombineTo takes care of the worklist
+    // mechanics for us, we have no work to do in this case.
+    if (RV.getNode() == N)
+      continue;
+
+    assert(N->getOpcode() != ISD::DELETED_NODE &&
+           RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
+           "Node was deleted but visit returned new node!");
+
+    DEBUG(dbgs() << "\nReplacing.3 ";
+          N->dump(&DAG);
+          dbgs() << "\nWith: ";
+          RV.getNode()->dump(&DAG);
+          dbgs() << '\n');
+    WorkListRemover DeadNodes(*this);
+    if (N->getNumValues() == RV.getNode()->getNumValues())
+      DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
+    else {
+      assert(N->getValueType(0) == RV.getValueType() &&
+             N->getNumValues() == 1 && "Type mismatch");
+      SDValue OpV = RV;
+      DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes);
+    }
+
+    // Push the new node and any users onto the worklist
+    AddToWorkList(RV.getNode());
+    AddUsersToWorkList(RV.getNode());
+
+    // Add any uses of the old node to the worklist in case this node is the
+    // last one that uses them.  They may become dead after this node is
+    // deleted.
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      AddToWorkList(N->getOperand(i).getNode());
+
+    // Finally, if the node is now dead, remove it from the graph.  The node
+    // may not be dead if the replacement process recursively simplified to
+    // something else needing this node.
+    if (N->use_empty()) {
+      // Nodes can be reintroduced into the worklist.  Make sure we do not
+      // process a node that has been replaced.
+      removeFromWorkList(N);
+
+      // Finally, since the node is now dead, remove it from the graph.
+      DAG.DeleteNode(N);
+    }
+  }
+
+  // If the root changed (e.g. it was a dead load, update the root).
+  DAG.setRoot(Dummy.getValue());
+}
+
+SDValue DAGCombiner::visit(SDNode *N) {
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::TokenFactor:        return visitTokenFactor(N);
+  case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
+  case ISD::ADD:                return visitADD(N);
+  case ISD::SUB:                return visitSUB(N);
+  case ISD::ADDC:               return visitADDC(N);
+  case ISD::ADDE:               return visitADDE(N);
+  case ISD::MUL:                return visitMUL(N);
+  case ISD::SDIV:               return visitSDIV(N);
+  case ISD::UDIV:               return visitUDIV(N);
+  case ISD::SREM:               return visitSREM(N);
+  case ISD::UREM:               return visitUREM(N);
+  case ISD::MULHU:              return visitMULHU(N);
+  case ISD::MULHS:              return visitMULHS(N);
+  case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
+  case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
+  case ISD::SDIVREM:            return visitSDIVREM(N);
+  case ISD::UDIVREM:            return visitUDIVREM(N);
+  case ISD::AND:                return visitAND(N);
+  case ISD::OR:                 return visitOR(N);
+  case ISD::XOR:                return visitXOR(N);
+  case ISD::SHL:                return visitSHL(N);
+  case ISD::SRA:                return visitSRA(N);
+  case ISD::SRL:                return visitSRL(N);
+  case ISD::CTLZ:               return visitCTLZ(N);
+  case ISD::CTTZ:               return visitCTTZ(N);
+  case ISD::CTPOP:              return visitCTPOP(N);
+  case ISD::SELECT:             return visitSELECT(N);
+  case ISD::SELECT_CC:          return visitSELECT_CC(N);
+  case ISD::SETCC:              return visitSETCC(N);
+  case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
+  case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
+  case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
+  case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
+  case ISD::TRUNCATE:           return visitTRUNCATE(N);
+  case ISD::BITCAST:            return visitBITCAST(N);
+  case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
+  case ISD::FADD:               return visitFADD(N);
+  case ISD::FSUB:               return visitFSUB(N);
+  case ISD::FMUL:               return visitFMUL(N);
+  case ISD::FDIV:               return visitFDIV(N);
+  case ISD::FREM:               return visitFREM(N);
+  case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
+  case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
+  case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
+  case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
+  case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
+  case ISD::FP_ROUND:           return visitFP_ROUND(N);
+  case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
+  case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
+  case ISD::FNEG:               return visitFNEG(N);
+  case ISD::FABS:               return visitFABS(N);
+  case ISD::BRCOND:             return visitBRCOND(N);
+  case ISD::BR_CC:              return visitBR_CC(N);
+  case ISD::LOAD:               return visitLOAD(N);
+  case ISD::STORE:              return visitSTORE(N);
+  case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
+  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
+  case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
+  case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
+  case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
+  case ISD::MEMBARRIER:         return visitMEMBARRIER(N);
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::combine(SDNode *N) {
+  SDValue RV = visit(N);
+
+  // If nothing happened, try a target-specific DAG combine.
+  if (RV.getNode() == 0) {
+    assert(N->getOpcode() != ISD::DELETED_NODE &&
+           "Node was deleted but visit returned NULL!");
+
+    if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
+        TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
+
+      // Expose the DAG combiner to the target combiner impls.
+      TargetLowering::DAGCombinerInfo
+        DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
+
+      RV = TLI.PerformDAGCombine(N, DagCombineInfo);
+    }
+  }
+
+  // If nothing happened still, try promoting the operation.
+  if (RV.getNode() == 0) {
+    switch (N->getOpcode()) {
+    default: break;
+    case ISD::ADD:
+    case ISD::SUB:
+    case ISD::MUL:
+    case ISD::AND:
+    case ISD::OR:
+    case ISD::XOR:
+      RV = PromoteIntBinOp(SDValue(N, 0));
+      break;
+    case ISD::SHL:
+    case ISD::SRA:
+    case ISD::SRL:
+      RV = PromoteIntShiftOp(SDValue(N, 0));
+      break;
+    case ISD::SIGN_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::ANY_EXTEND:
+      RV = PromoteExtend(SDValue(N, 0));
+      break;
+    case ISD::LOAD:
+      if (PromoteLoad(SDValue(N, 0)))
+        RV = SDValue(N, 0);
+      break;
+    }
+  }
+
+  // If N is a commutative binary node, try commuting it to enable more
+  // sdisel CSE.
+  if (RV.getNode() == 0 &&
+      SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
+      N->getNumValues() == 1) {
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+
+    // Constant operands are canonicalized to RHS.
+    if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
+      SDValue Ops[] = { N1, N0 };
+      SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
+                                            Ops, 2);
+      if (CSENode)
+        return SDValue(CSENode, 0);
+    }
+  }
+
+  return RV;
+}
+
+/// getInputChainForNode - Given a node, return its input chain if it has one,
+/// otherwise return a null sd operand.
+static SDValue getInputChainForNode(SDNode *N) {
+  if (unsigned NumOps = N->getNumOperands()) {
+    if (N->getOperand(0).getValueType() == MVT::Other)
+      return N->getOperand(0);
+    else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
+      return N->getOperand(NumOps-1);
+    for (unsigned i = 1; i < NumOps-1; ++i)
+      if (N->getOperand(i).getValueType() == MVT::Other)
+        return N->getOperand(i);
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
+  // If N has two operands, where one has an input chain equal to the other,
+  // the 'other' chain is redundant.
+  if (N->getNumOperands() == 2) {
+    if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
+      return N->getOperand(0);
+    if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
+      return N->getOperand(1);
+  }
+
+  SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
+  SmallVector<SDValue, 8> Ops;    // Ops for replacing token factor.
+  SmallPtrSet<SDNode*, 16> SeenOps;
+  bool Changed = false;             // If we should replace this token factor.
+
+  // Start out with this token factor.
+  TFs.push_back(N);
+
+  // Iterate through token factors.  The TFs grows when new token factors are
+  // encountered.
+  for (unsigned i = 0; i < TFs.size(); ++i) {
+    SDNode *TF = TFs[i];
+
+    // Check each of the operands.
+    for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
+      SDValue Op = TF->getOperand(i);
+
+      switch (Op.getOpcode()) {
+      case ISD::EntryToken:
+        // Entry tokens don't need to be added to the list. They are
+        // rededundant.
+        Changed = true;
+        break;
+
+      case ISD::TokenFactor:
+        if (Op.hasOneUse() &&
+            std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
+          // Queue up for processing.
+          TFs.push_back(Op.getNode());
+          // Clean up in case the token factor is removed.
+          AddToWorkList(Op.getNode());
+          Changed = true;
+          break;
+        }
+        // Fall thru
+
+      default:
+        // Only add if it isn't already in the list.
+        if (SeenOps.insert(Op.getNode()))
+          Ops.push_back(Op);
+        else
+          Changed = true;
+        break;
+      }
+    }
+  }
+
+  SDValue Result;
+
+  // If we've change things around then replace token factor.
+  if (Changed) {
+    if (Ops.empty()) {
+      // The entry token is the only possible outcome.
+      Result = DAG.getEntryNode();
+    } else {
+      // New and improved token factor.
+      Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+                           MVT::Other, &Ops[0], Ops.size());
+    }
+
+    // Don't add users to work list.
+    return CombineTo(N, Result, false);
+  }
+
+  return Result;
+}
+
+/// MERGE_VALUES can always be eliminated.
+SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
+  WorkListRemover DeadNodes(*this);
+  // Replacing results may cause a different MERGE_VALUES to suddenly
+  // be CSE'd with N, and carry its uses with it. Iterate until no
+  // uses remain, to ensure that the node can be safely deleted.
+  do {
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
+                                    &DeadNodes);
+  } while (!N->use_empty());
+  removeFromWorkList(N);
+  DAG.DeleteNode(N);
+  return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+}
+
+static
+SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
+                              SelectionDAG &DAG) {
+  EVT VT = N0.getValueType();
+  SDValue N00 = N0.getOperand(0);
+  SDValue N01 = N0.getOperand(1);
+  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
+
+  if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
+      isa<ConstantSDNode>(N00.getOperand(1))) {
+    // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+    N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
+                     DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT,
+                                 N00.getOperand(0), N01),
+                     DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT,
+                                 N00.getOperand(1), N01));
+    return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
+  }
+
+  return SDValue();
+}
+
+/// isCarryMaterialization - Returns true if V is an ADDE node that is known to
+/// return 0 or 1 depending on the carry flag.
+static bool isCarryMaterialization(SDValue V) {
+  if (V.getOpcode() != ISD::ADDE)
+    return false;
+
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(0));
+  return C && C->isNullValue() && V.getOperand(0) == V.getOperand(1);
+}
+
+SDValue DAGCombiner::visitADD(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (add x, undef) -> undef
+  if (N0.getOpcode() == ISD::UNDEF)
+    return N0;
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+  // fold (add c1, c2) -> c1+c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0);
+  // fold (add x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // fold (add Sym, c) -> Sym+c
+  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+    if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
+        GA->getOpcode() == ISD::GlobalAddress)
+      return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT,
+                                  GA->getOffset() +
+                                    (uint64_t)N1C->getSExtValue());
+  // fold ((c1-A)+c2) -> (c1+c2)-A
+  if (N1C && N0.getOpcode() == ISD::SUB)
+    if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
+      return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                         DAG.getConstant(N1C->getAPIntValue()+
+                                         N0C->getAPIntValue(), VT),
+                         N0.getOperand(1));
+  // reassociate add
+  SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1);
+  if (RADD.getNode() != 0)
+    return RADD;
+  // fold ((0-A) + B) -> B-A
+  if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
+      cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1));
+  // fold (A + (0-B)) -> A-B
+  if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
+      cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1));
+  // fold (A+(B-A)) -> B
+  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
+    return N1.getOperand(0);
+  // fold ((B-A)+A) -> B
+  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
+    return N0.getOperand(0);
+  // fold (A+(B-(A+C))) to (B-C)
+  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+      N0 == N1.getOperand(1).getOperand(0))
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+                       N1.getOperand(1).getOperand(1));
+  // fold (A+(B-(C+A))) to (B-C)
+  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
+      N0 == N1.getOperand(1).getOperand(1))
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
+                       N1.getOperand(1).getOperand(0));
+  // fold (A+((B-A)+or-C)) to (B+or-C)
+  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
+      N1.getOperand(0).getOpcode() == ISD::SUB &&
+      N0 == N1.getOperand(0).getOperand(1))
+    return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT,
+                       N1.getOperand(0).getOperand(0), N1.getOperand(1));
+
+  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
+  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
+    SDValue N00 = N0.getOperand(0);
+    SDValue N01 = N0.getOperand(1);
+    SDValue N10 = N1.getOperand(0);
+    SDValue N11 = N1.getOperand(1);
+
+    if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
+      return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                         DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10),
+                         DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11));
+  }
+
+  if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  // fold (a+b) -> (a|b) iff a and b share no bits.
+  if (VT.isInteger() && !VT.isVector()) {
+    APInt LHSZero, LHSOne;
+    APInt RHSZero, RHSOne;
+    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
+    DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+
+    if (LHSZero.getBoolValue()) {
+      DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+
+      // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+      // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+      if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+          (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+        return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
+    }
+  }
+
+  // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
+  if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
+    SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG);
+    if (Result.getNode()) return Result;
+  }
+  if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
+    SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG);
+    if (Result.getNode()) return Result;
+  }
+
+  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
+  if (N1.getOpcode() == ISD::SHL &&
+      N1.getOperand(0).getOpcode() == ISD::SUB)
+    if (ConstantSDNode *C =
+          dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
+      if (C->getAPIntValue() == 0)
+        return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0,
+                           DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+                                       N1.getOperand(0).getOperand(1),
+                                       N1.getOperand(1)));
+  if (N0.getOpcode() == ISD::SHL &&
+      N0.getOperand(0).getOpcode() == ISD::SUB)
+    if (ConstantSDNode *C =
+          dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
+      if (C->getAPIntValue() == 0)
+        return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1,
+                           DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+                                       N0.getOperand(0).getOperand(1),
+                                       N0.getOperand(1)));
+
+  if (N1.getOpcode() == ISD::AND) {
+    SDValue AndOp0 = N1.getOperand(0);
+    ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
+    unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
+    unsigned DestBits = VT.getScalarType().getSizeInBits();
+
+    // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
+    // and similar xforms where the inner op is either ~0 or 0.
+    if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
+      DebugLoc DL = N->getDebugLoc();
+      return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+    }
+  }
+
+  // add (sext i1), X -> sub X, (zext i1)
+  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+      N0.getOperand(0).getValueType() == MVT::i1 &&
+      !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
+    DebugLoc DL = N->getDebugLoc();
+    SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+    return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
+  }
+
+  // add (adde 0, 0, glue), X -> adde X, 0, glue
+  if (N0->hasOneUse() && isCarryMaterialization(N0))
+    return DAG.getNode(ISD::ADDE, N->getDebugLoc(),
+                       DAG.getVTList(VT, MVT::Glue), N1, N0.getOperand(0),
+                       N0.getOperand(2));
+
+  // add X, (adde 0, 0, glue) -> adde X, 0, glue
+  if (N1->hasOneUse() && isCarryMaterialization(N1))
+    return DAG.getNode(ISD::ADDE, N->getDebugLoc(),
+                       DAG.getVTList(VT, MVT::Glue), N0, N1.getOperand(0),
+                       N1.getOperand(2));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitADDC(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // If the flag result is dead, turn this into an ADD.
+  if (N->hasNUsesOfValue(0, 1))
+    return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
+                     DAG.getNode(ISD::CARRY_FALSE,
+                                 N->getDebugLoc(), MVT::Glue));
+
+  // canonicalize constant to RHS.
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+
+  // fold (addc x, 0) -> x + no carry out
+  if (N1C && N1C->isNullValue())
+    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
+                                        N->getDebugLoc(), MVT::Glue));
+
+  // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
+  APInt LHSZero, LHSOne;
+  APInt RHSZero, RHSOne;
+  APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
+  DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
+
+  if (LHSZero.getBoolValue()) {
+    DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
+
+    // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
+    // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
+    if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
+        (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
+      return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
+                       DAG.getNode(ISD::CARRY_FALSE,
+                                   N->getDebugLoc(), MVT::Glue));
+  }
+
+  // addc (adde 0, 0, glue), X -> adde X, 0, glue
+  if (N0->hasOneUse() && isCarryMaterialization(N0))
+    return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N1,
+                       DAG.getConstant(0, VT), N0.getOperand(2));
+
+  // addc X, (adde 0, 0, glue) -> adde X, 0, glue
+  if (N1->hasOneUse() && isCarryMaterialization(N1))
+    return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N0,
+                       DAG.getConstant(0, VT), N1.getOperand(2));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitADDE(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue CarryIn = N->getOperand(2);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+
+  // If both operands are null we know that carry out will always be false.
+  if (N0C && N0C->isNullValue() && N0 == N1)
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), DAG.getNode(ISD::CARRY_FALSE,
+                                                             N->getDebugLoc(),
+                                                             MVT::Glue));
+
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
+                       N1, N0, CarryIn);
+
+  // fold (adde x, y, false) -> (addc x, y)
+  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
+    return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
+
+  return SDValue();
+}
+
+// Since it may not be valid to emit a fold to zero for vector initializers
+// check if we can before folding.
+static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT,
+                             SelectionDAG &DAG, bool LegalOperations) {
+  if (!VT.isVector()) {
+    return DAG.getConstant(0, VT);
+  } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+    // Produce a vector of zeros.
+    SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+    std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
+    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
+      &Ops[0], Ops.size());
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSUB(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  EVT VT = N0.getValueType();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (sub x, x) -> 0
+  // FIXME: Refactor this and xor and other similar operations together.
+  if (N0 == N1)
+    return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
+  // fold (sub c1, c2) -> c1-c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
+  // fold (sub x, c) -> (add x, -c)
+  if (N1C)
+    return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0,
+                       DAG.getConstant(-N1C->getAPIntValue(), VT));
+  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
+  if (N0C && N0C->isAllOnesValue())
+    return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+  // fold A-(A-B) -> B
+  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
+    return N1.getOperand(1);
+  // fold (A+B)-A -> B
+  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
+    return N0.getOperand(1);
+  // fold (A+B)-B -> A
+  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
+    return N0.getOperand(0);
+  // fold ((A+(B+or-C))-B) -> A+or-C
+  if (N0.getOpcode() == ISD::ADD &&
+      (N0.getOperand(1).getOpcode() == ISD::SUB ||
+       N0.getOperand(1).getOpcode() == ISD::ADD) &&
+      N0.getOperand(1).getOperand(0) == N1)
+    return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT,
+                       N0.getOperand(0), N0.getOperand(1).getOperand(1));
+  // fold ((A+(C+B))-B) -> A+C
+  if (N0.getOpcode() == ISD::ADD &&
+      N0.getOperand(1).getOpcode() == ISD::ADD &&
+      N0.getOperand(1).getOperand(1) == N1)
+    return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+                       N0.getOperand(0), N0.getOperand(1).getOperand(0));
+  // fold ((A-(B-C))-C) -> A-B
+  if (N0.getOpcode() == ISD::SUB &&
+      N0.getOperand(1).getOpcode() == ISD::SUB &&
+      N0.getOperand(1).getOperand(1) == N1)
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                       N0.getOperand(0), N0.getOperand(1).getOperand(0));
+
+  // If either operand of a sub is undef, the result is undef
+  if (N0.getOpcode() == ISD::UNDEF)
+    return N0;
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  // If the relocation model supports it, consider symbol offsets.
+  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
+    if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
+      // fold (sub Sym, c) -> Sym-c
+      if (N1C && GA->getOpcode() == ISD::GlobalAddress)
+        return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT,
+                                    GA->getOffset() -
+                                      (uint64_t)N1C->getSExtValue());
+      // fold (sub Sym+c1, Sym+c2) -> c1-c2
+      if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
+        if (GA->getGlobal() == GB->getGlobal())
+          return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
+                                 VT);
+    }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitMUL(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (mul x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // fold (mul c1, c2) -> c1*c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0);
+  // fold (mul x, 0) -> 0
+  if (N1C && N1C->isNullValue())
+    return N1;
+  // fold (mul x, -1) -> 0-x
+  if (N1C && N1C->isAllOnesValue())
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                       DAG.getConstant(0, VT), N0);
+  // fold (mul x, (1 << c)) -> x << c
+  if (N1C && N1C->getAPIntValue().isPowerOf2())
+    return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+                       DAG.getConstant(N1C->getAPIntValue().logBase2(),
+                                       getShiftAmountTy(N0.getValueType())));
+  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
+  if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) {
+    unsigned Log2Val = (-N1C->getAPIntValue()).logBase2();
+    // FIXME: If the input is something that is easily negated (e.g. a
+    // single-use add), we should put the negate there.
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                       DAG.getConstant(0, VT),
+                       DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+                            DAG.getConstant(Log2Val,
+                                      getShiftAmountTy(N0.getValueType()))));
+  }
+  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
+  if (N1C && N0.getOpcode() == ISD::SHL &&
+      isa<ConstantSDNode>(N0.getOperand(1))) {
+    SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+                             N1, N0.getOperand(1));
+    AddToWorkList(C3.getNode());
+    return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+                       N0.getOperand(0), C3);
+  }
+
+  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
+  // use.
+  {
+    SDValue Sh(0,0), Y(0,0);
+    // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
+    if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
+        N0.getNode()->hasOneUse()) {
+      Sh = N0; Y = N1;
+    } else if (N1.getOpcode() == ISD::SHL &&
+               isa<ConstantSDNode>(N1.getOperand(1)) &&
+               N1.getNode()->hasOneUse()) {
+      Sh = N1; Y = N0;
+    }
+
+    if (Sh.getNode()) {
+      SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+                                Sh.getOperand(0), Y);
+      return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
+                         Mul, Sh.getOperand(1));
+    }
+  }
+
+  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
+  if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+      isa<ConstantSDNode>(N0.getOperand(1)))
+    return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
+                       DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT,
+                                   N0.getOperand(0), N1),
+                       DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT,
+                                   N0.getOperand(1), N1));
+
+  // reassociate mul
+  SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1);
+  if (RMUL.getNode() != 0)
+    return RMUL;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIV(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (sdiv c1, c2) -> c1/c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
+  // fold (sdiv X, 1) -> X
+  if (N1C && N1C->getSExtValue() == 1LL)
+    return N0;
+  // fold (sdiv X, -1) -> 0-X
+  if (N1C && N1C->isAllOnesValue())
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                       DAG.getConstant(0, VT), N0);
+  // If we know the sign bits of both operands are zero, strength reduce to a
+  // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
+  if (!VT.isVector()) {
+    if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+      return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(),
+                         N0, N1);
+  }
+  // fold (sdiv X, pow2) -> simple ops after legalize
+  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&
+      (isPowerOf2_64(N1C->getSExtValue()) ||
+       isPowerOf2_64(-N1C->getSExtValue()))) {
+    // If dividing by powers of two is cheap, then don't perform the following
+    // fold.
+    if (TLI.isPow2DivCheap())
+      return SDValue();
+
+    int64_t pow2 = N1C->getSExtValue();
+    int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
+    unsigned lg2 = Log2_64(abs2);
+
+    // Splat the sign bit into the register
+    SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+                              DAG.getConstant(VT.getSizeInBits()-1,
+                                       getShiftAmountTy(N0.getValueType())));
+    AddToWorkList(SGN.getNode());
+
+    // Add (N0 < 0) ? abs2 - 1 : 0;
+    SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN,
+                              DAG.getConstant(VT.getSizeInBits() - lg2,
+                                       getShiftAmountTy(SGN.getValueType())));
+    SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL);
+    AddToWorkList(SRL.getNode());
+    AddToWorkList(ADD.getNode());    // Divide by pow2
+    SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD,
+                  DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType())));
+
+    // If we're dividing by a positive value, we're done.  Otherwise, we must
+    // negate the result.
+    if (pow2 > 0)
+      return SRA;
+
+    AddToWorkList(SRA.getNode());
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
+                       DAG.getConstant(0, VT), SRA);
+  }
+
+  // if integer divide is expensive and we satisfy the requirements, emit an
+  // alternate sequence.
+  if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) &&
+      !TLI.isIntDivCheap()) {
+    SDValue Op = BuildSDIV(N);
+    if (Op.getNode()) return Op;
+  }
+
+  // undef / X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X / undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIV(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (udiv c1, c2) -> c1/c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
+  // fold (udiv x, (1 << c)) -> x >>u c
+  if (N1C && N1C->getAPIntValue().isPowerOf2())
+    return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+                       DAG.getConstant(N1C->getAPIntValue().logBase2(),
+                                       getShiftAmountTy(N0.getValueType())));
+  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
+  if (N1.getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+      if (SHC->getAPIntValue().isPowerOf2()) {
+        EVT ADDVT = N1.getOperand(1).getValueType();
+        SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,
+                                  N1.getOperand(1),
+                                  DAG.getConstant(SHC->getAPIntValue()
+                                                                  .logBase2(),
+                                                  ADDVT));
+        AddToWorkList(Add.getNode());
+        return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add);
+      }
+    }
+  }
+  // fold (udiv x, c) -> alternate
+  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
+    SDValue Op = BuildUDIV(N);
+    if (Op.getNode()) return Op;
+  }
+
+  // undef / X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X / undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSREM(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold (srem c1, c2) -> c1%c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
+  // If we know the sign bits of both operands are zero, strength reduce to a
+  // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
+  if (!VT.isVector()) {
+    if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+      return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1);
+  }
+
+  // If X/C can be simplified by the division-by-constant logic, lower
+  // X%C to the equivalent of X-X/C*C.
+  if (N1C && !N1C->isNullValue()) {
+    SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1);
+    AddToWorkList(Div.getNode());
+    SDValue OptimizedDiv = combine(Div.getNode());
+    if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+      SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+                                OptimizedDiv, N1);
+      SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+      AddToWorkList(Mul.getNode());
+      return Sub;
+    }
+  }
+
+  // undef % X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X % undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUREM(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold (urem c1, c2) -> c1%c2
+  if (N0C && N1C && !N1C->isNullValue())
+    return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
+  // fold (urem x, pow2) -> (and x, pow2-1)
+  if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0,
+                       DAG.getConstant(N1C->getAPIntValue()-1,VT));
+  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
+  if (N1.getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
+      if (SHC->getAPIntValue().isPowerOf2()) {
+        SDValue Add =
+          DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1,
+                 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
+                                 VT));
+        AddToWorkList(Add.getNode());
+        return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add);
+      }
+    }
+  }
+
+  // If X/C can be simplified by the division-by-constant logic, lower
+  // X%C to the equivalent of X-X/C*C.
+  if (N1C && !N1C->isNullValue()) {
+    SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1);
+    AddToWorkList(Div.getNode());
+    SDValue OptimizedDiv = combine(Div.getNode());
+    if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
+      SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
+                                OptimizedDiv, N1);
+      SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
+      AddToWorkList(Mul.getNode());
+      return Sub;
+    }
+  }
+
+  // undef % X -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // X % undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHS(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  // fold (mulhs x, 0) -> 0
+  if (N1C && N1C->isNullValue())
+    return N1;
+  // fold (mulhs x, 1) -> (sra x, size(x)-1)
+  if (N1C && N1C->getAPIntValue() == 1)
+    return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0,
+                       DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
+                                       getShiftAmountTy(N0.getValueType())));
+  // fold (mulhs x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+
+  // If the type twice as wide is legal, transform the mulhs to a wider multiply
+  // plus a shift.
+  if (VT.isSimple() && !VT.isVector()) {
+    MVT Simple = VT.getSimpleVT();
+    unsigned SimpleSize = Simple.getSizeInBits();
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+      N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
+      N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
+      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+            DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitMULHU(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  // fold (mulhu x, 0) -> 0
+  if (N1C && N1C->isNullValue())
+    return N1;
+  // fold (mulhu x, 1) -> 0
+  if (N1C && N1C->getAPIntValue() == 1)
+    return DAG.getConstant(0, N0.getValueType());
+  // fold (mulhu x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+
+  // If the type twice as wide is legal, transform the mulhu to a wider multiply
+  // plus a shift.
+  if (VT.isSimple() && !VT.isVector()) {
+    MVT Simple = VT.getSimpleVT();
+    unsigned SimpleSize = Simple.getSizeInBits();
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+      N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
+      N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
+      N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+      N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+            DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType())));
+      return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+    }
+  }
+
+  return SDValue();
+}
+
+/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
+/// compute two values. LoOp and HiOp give the opcodes for the two computations
+/// that are being performed. Return true if a simplification was made.
+///
+SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
+                                                unsigned HiOp) {
+  // If the high half is not needed, just compute the low half.
+  bool HiExists = N->hasAnyUseOfValue(1);
+  if (!HiExists &&
+      (!LegalOperations ||
+       TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
+    SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+                              N->op_begin(), N->getNumOperands());
+    return CombineTo(N, Res, Res);
+  }
+
+  // If the low half is not needed, just compute the high half.
+  bool LoExists = N->hasAnyUseOfValue(0);
+  if (!LoExists &&
+      (!LegalOperations ||
+       TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
+    SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+                              N->op_begin(), N->getNumOperands());
+    return CombineTo(N, Res, Res);
+  }
+
+  // If both halves are used, return as it is.
+  if (LoExists && HiExists)
+    return SDValue();
+
+  // If the two computed results can be simplified separately, separate them.
+  if (LoExists) {
+    SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
+                             N->op_begin(), N->getNumOperands());
+    AddToWorkList(Lo.getNode());
+    SDValue LoOpt = combine(Lo.getNode());
+    if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
+        (!LegalOperations ||
+         TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
+      return CombineTo(N, LoOpt, LoOpt);
+  }
+
+  if (HiExists) {
+    SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
+                             N->op_begin(), N->getNumOperands());
+    AddToWorkList(Hi.getNode());
+    SDValue HiOpt = combine(Hi.getNode());
+    if (HiOpt.getNode() && HiOpt != Hi &&
+        (!LegalOperations ||
+         TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
+      return CombineTo(N, HiOpt, HiOpt);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
+  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
+  if (Res.getNode()) return Res;
+
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  // If the type twice as wide is legal, transform the mulhu to a wider multiply
+  // plus a shift.
+  if (VT.isSimple() && !VT.isVector()) {
+    MVT Simple = VT.getSimpleVT();
+    unsigned SimpleSize = Simple.getSizeInBits();
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+      SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
+      SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
+      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+      // Compute the high part as N1.
+      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+            DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+      // Compute the low part as N0.
+      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+      return CombineTo(N, Lo, Hi);
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
+  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
+  if (Res.getNode()) return Res;
+
+  EVT VT = N->getValueType(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  // If the type twice as wide is legal, transform the mulhu to a wider multiply
+  // plus a shift.
+  if (VT.isSimple() && !VT.isVector()) {
+    MVT Simple = VT.getSimpleVT();
+    unsigned SimpleSize = Simple.getSizeInBits();
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+    if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+      SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
+      SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
+      Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+      // Compute the high part as N1.
+      Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+            DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType())));
+      Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+      // Compute the low part as N0.
+      Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+      return CombineTo(N, Lo, Hi);
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
+  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
+  if (Res.getNode()) return Res;
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
+  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
+  if (Res.getNode()) return Res;
+
+  return SDValue();
+}
+
+/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
+/// two operands of the same opcode, try to simplify it.
+SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
+  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+  EVT VT = N0.getValueType();
+  assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
+
+  // Bail early if none of these transforms apply.
+  if (N0.getNode()->getNumOperands() == 0) return SDValue();
+
+  // For each of OP in AND/OR/XOR:
+  // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
+  // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
+  // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
+  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+  //
+  // do not sink logical op inside of a vector extend, since it may combine
+  // into a vsetcc.
+  EVT Op0VT = N0.getOperand(0).getValueType();
+  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+       N0.getOpcode() == ISD::SIGN_EXTEND ||
+       // Avoid infinite looping with PromoteIntBinOp.
+       (N0.getOpcode() == ISD::ANY_EXTEND &&
+        (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
+       (N0.getOpcode() == ISD::TRUNCATE &&
+        (!TLI.isZExtFree(VT, Op0VT) ||
+         !TLI.isTruncateFree(Op0VT, VT)) &&
+        TLI.isTypeLegal(Op0VT))) &&
+      !VT.isVector() &&
+      Op0VT == N1.getOperand(0).getValueType() &&
+      (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
+    SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+                                 N0.getOperand(0).getValueType(),
+                                 N0.getOperand(0), N1.getOperand(0));
+    AddToWorkList(ORNode.getNode());
+    return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode);
+  }
+
+  // For each of OP in SHL/SRL/SRA/AND...
+  //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
+  //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
+  //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
+  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
+       N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
+      N0.getOperand(1) == N1.getOperand(1)) {
+    SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
+                                 N0.getOperand(0).getValueType(),
+                                 N0.getOperand(0), N1.getOperand(0));
+    AddToWorkList(ORNode.getNode());
+    return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+                       ORNode, N0.getOperand(1));
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitAND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue LL, LR, RL, RR, CC0, CC1;
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N1.getValueType();
+  unsigned BitWidth = VT.getScalarType().getSizeInBits();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (and x, undef) -> 0
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // fold (and c1, c2) -> c1&c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0);
+  // fold (and x, -1) -> x
+  if (N1C && N1C->isAllOnesValue())
+    return N0;
+  // if (and x, c) is known to be zero, return 0
+  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+                                   APInt::getAllOnesValue(BitWidth)))
+    return DAG.getConstant(0, VT);
+  // reassociate and
+  SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
+  if (RAND.getNode() != 0)
+    return RAND;
+  // fold (and (or x, C), D) -> D if (C & D) == D
+  if (N1C && N0.getOpcode() == ISD::OR)
+    if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+      if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
+        return N1;
+  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
+  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+    SDValue N0Op0 = N0.getOperand(0);
+    APInt Mask = ~N1C->getAPIntValue();
+    Mask = Mask.trunc(N0Op0.getValueSizeInBits());
+    if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
+      SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
+                                 N0.getValueType(), N0Op0);
+
+      // Replace uses of the AND with uses of the Zero extend node.
+      CombineTo(N, Zext);
+
+      // We actually want to replace all uses of the any_extend with the
+      // zero_extend, to avoid duplicating things.  This will later cause this
+      // AND to be folded.
+      CombineTo(N0.getNode(), Zext);
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+  // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
+  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+        LL.getValueType().isInteger()) {
+      // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
+      if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
+        SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+                                     LR.getValueType(), LL, RL);
+        AddToWorkList(ORNode.getNode());
+        return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+      }
+      // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
+      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
+        SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(),
+                                      LR.getValueType(), LL, RL);
+        AddToWorkList(ANDNode.getNode());
+        return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+      }
+      // fold (and (setgt X,  -1), (setgt Y,  -1)) -> (setgt (or X, Y), -1)
+      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
+        SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
+                                     LR.getValueType(), LL, RL);
+        AddToWorkList(ORNode.getNode());
+        return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+      }
+    }
+    // canonicalize equivalent to ll == rl
+    if (LL == RR && LR == RL) {
+      Op1 = ISD::getSetCCSwappedOperands(Op1);
+      std::swap(RL, RR);
+    }
+    if (LL == RL && LR == RR) {
+      bool isInteger = LL.getValueType().isInteger();
+      ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
+      if (Result != ISD::SETCC_INVALID &&
+          (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+        return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+                            LL, LR, Result);
+    }
+  }
+
+  // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.getNode()) return Tmp;
+  }
+
+  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
+  // fold (and (sra)) -> (and (srl)) when possible.
+  if (!VT.isVector() &&
+      SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  // fold (zext_inreg (extload x)) -> (zextload x)
+  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    // If we zero all the possible extended bits, then we can turn this into
+    // a zextload if we are running before legalize or the operation is legal.
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+        ((!LegalOperations && !LN0->isVolatile()) ||
+         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+                                       LN0->getChain(), LN0->getBasePtr(),
+                                       LN0->getPointerInfo(), MemVT,
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
+      AddToWorkList(N);
+      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
+  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    // If we zero all the possible extended bits, then we can turn this into
+    // a zextload if we are running before legalize or the operation is legal.
+    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
+                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+        ((!LegalOperations && !LN0->isVolatile()) ||
+         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       MemVT,
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
+      AddToWorkList(N);
+      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  // fold (and (load x), 255) -> (zextload x, i8)
+  // fold (and (extload x, i16), 255) -> (zextload x, i8)
+  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
+  if (N1C && (N0.getOpcode() == ISD::LOAD ||
+              (N0.getOpcode() == ISD::ANY_EXTEND &&
+               N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+    bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
+    LoadSDNode *LN0 = HasAnyExt
+      ? cast<LoadSDNode>(N0.getOperand(0))
+      : cast<LoadSDNode>(N0);
+    if (LN0->getExtensionType() != ISD::SEXTLOAD &&
+        LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) {
+      uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
+      if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
+        EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+        EVT LoadedVT = LN0->getMemoryVT();
+
+        if (ExtVT == LoadedVT &&
+            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+          EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+
+          SDValue NewLoad =
+            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+                           LN0->getChain(), LN0->getBasePtr(),
+                           LN0->getPointerInfo(),
+                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                           LN0->getAlignment());
+          AddToWorkList(N);
+          CombineTo(LN0, NewLoad, NewLoad.getValue(1));
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+        }
+
+        // Do not change the width of a volatile load.
+        // Do not generate loads of non-round integer types since these can
+        // be expensive (and would be wrong if the type is not byte sized).
+        if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
+            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
+          EVT PtrType = LN0->getOperand(1).getValueType();
+
+          unsigned Alignment = LN0->getAlignment();
+          SDValue NewPtr = LN0->getBasePtr();
+
+          // For big endian targets, we need to add an offset to the pointer
+          // to load the correct bytes.  For little endian systems, we merely
+          // need to read fewer bytes from the same pointer.
+          if (TLI.isBigEndian()) {
+            unsigned LVTStoreBytes = LoadedVT.getStoreSize();
+            unsigned EVTStoreBytes = ExtVT.getStoreSize();
+            unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
+            NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
+                                 NewPtr, DAG.getConstant(PtrOff, PtrType));
+            Alignment = MinAlign(Alignment, PtrOff);
+          }
+
+          AddToWorkList(NewPtr.getNode());
+
+          EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
+          SDValue Load =
+            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+                           LN0->getChain(), NewPtr,
+                           LN0->getPointerInfo(),
+                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                           Alignment);
+          AddToWorkList(N);
+          CombineTo(LN0, Load, Load.getValue(1));
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+        }
+      }
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitOR(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue LL, LR, RL, RR, CC0, CC1;
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N1.getValueType();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (or x, undef) -> -1
+  if (!LegalOperations &&
+      (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
+    EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+    return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+  }
+  // fold (or c1, c2) -> c1|c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0);
+  // fold (or x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // fold (or x, -1) -> -1
+  if (N1C && N1C->isAllOnesValue())
+    return N1;
+  // fold (or x, c) -> c iff (x & ~c) == 0
+  if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
+    return N1;
+  // reassociate or
+  SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1);
+  if (ROR.getNode() != 0)
+    return ROR;
+  // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+  // iff (c1 & c2) == 0.
+  if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+             isa<ConstantSDNode>(N0.getOperand(1))) {
+    ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
+    if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
+      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                         DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+                                     N0.getOperand(0), N1),
+                         DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+  }
+  // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
+  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
+    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
+    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
+
+    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
+        LL.getValueType().isInteger()) {
+      // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
+      // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
+      if (cast<ConstantSDNode>(LR)->isNullValue() &&
+          (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
+        SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(),
+                                     LR.getValueType(), LL, RL);
+        AddToWorkList(ORNode.getNode());
+        return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
+      }
+      // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
+      // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
+      if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
+          (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
+        SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(),
+                                      LR.getValueType(), LL, RL);
+        AddToWorkList(ANDNode.getNode());
+        return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
+      }
+    }
+    // canonicalize equivalent to ll == rl
+    if (LL == RR && LR == RL) {
+      Op1 = ISD::getSetCCSwappedOperands(Op1);
+      std::swap(RL, RR);
+    }
+    if (LL == RL && LR == RR) {
+      bool isInteger = LL.getValueType().isInteger();
+      ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
+      if (Result != ISD::SETCC_INVALID &&
+          (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
+        return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
+                            LL, LR, Result);
+    }
+  }
+
+  // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.getNode()) return Tmp;
+  }
+
+  // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
+  if (N0.getOpcode() == ISD::AND &&
+      N1.getOpcode() == ISD::AND &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      N1.getOperand(1).getOpcode() == ISD::Constant &&
+      // Don't increase # computations.
+      (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
+    // We can only do this xform if we know that bits from X that are set in C2
+    // but not in C1 are already zero.  Likewise for Y.
+    const APInt &LHSMask =
+      cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+    const APInt &RHSMask =
+      cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
+
+    if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
+        DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
+      SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+                              N0.getOperand(0), N1.getOperand(0));
+      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X,
+                         DAG.getConstant(LHSMask | RHSMask, VT));
+    }
+  }
+
+  // See if this is some rotate idiom.
+  if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
+    return SDValue(Rot, 0);
+
+  // Simplify the operands using demanded-bits information.
+  if (!VT.isVector() &&
+      SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
+static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
+  if (Op.getOpcode() == ISD::AND) {
+    if (isa<ConstantSDNode>(Op.getOperand(1))) {
+      Mask = Op.getOperand(1);
+      Op = Op.getOperand(0);
+    } else {
+      return false;
+    }
+  }
+
+  if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
+    Shift = Op;
+    return true;
+  }
+
+  return false;
+}
+
+// MatchRotate - Handle an 'or' of two operands.  If this is one of the many
+// idioms for rotate, and if the target supports rotation instructions, generate
+// a rot[lr].
+SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
+  // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
+  EVT VT = LHS.getValueType();
+  if (!TLI.isTypeLegal(VT)) return 0;
+
+  // The target must have at least one rotate flavor.
+  bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
+  bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
+  if (!HasROTL && !HasROTR) return 0;
+
+  // Match "(X shl/srl V1) & V2" where V2 may not be present.
+  SDValue LHSShift;   // The shift.
+  SDValue LHSMask;    // AND value if any.
+  if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
+    return 0; // Not part of a rotate.
+
+  SDValue RHSShift;   // The shift.
+  SDValue RHSMask;    // AND value if any.
+  if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
+    return 0; // Not part of a rotate.
+
+  if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
+    return 0;   // Not shifting the same value.
+
+  if (LHSShift.getOpcode() == RHSShift.getOpcode())
+    return 0;   // Shifts must disagree.
+
+  // Canonicalize shl to left side in a shl/srl pair.
+  if (RHSShift.getOpcode() == ISD::SHL) {
+    std::swap(LHS, RHS);
+    std::swap(LHSShift, RHSShift);
+    std::swap(LHSMask , RHSMask );
+  }
+
+  unsigned OpSizeInBits = VT.getSizeInBits();
+  SDValue LHSShiftArg = LHSShift.getOperand(0);
+  SDValue LHSShiftAmt = LHSShift.getOperand(1);
+  SDValue RHSShiftAmt = RHSShift.getOperand(1);
+
+  // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
+  // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
+  if (LHSShiftAmt.getOpcode() == ISD::Constant &&
+      RHSShiftAmt.getOpcode() == ISD::Constant) {
+    uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
+    uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
+    if ((LShVal + RShVal) != OpSizeInBits)
+      return 0;
+
+    SDValue Rot;
+    if (HasROTL)
+      Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt);
+    else
+      Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt);
+
+    // If there is an AND of either shifted operand, apply it to the result.
+    if (LHSMask.getNode() || RHSMask.getNode()) {
+      APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
+
+      if (LHSMask.getNode()) {
+        APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
+        Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
+      }
+      if (RHSMask.getNode()) {
+        APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
+        Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
+      }
+
+      Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
+    }
+
+    return Rot.getNode();
+  }
+
+  // If there is a mask here, and we have a variable shift, we can't be sure
+  // that we're masking out the right stuff.
+  if (LHSMask.getNode() || RHSMask.getNode())
+    return 0;
+
+  // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)
+  // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))
+  if (RHSShiftAmt.getOpcode() == ISD::SUB &&
+      LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
+    if (ConstantSDNode *SUBC =
+          dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
+      if (SUBC->getAPIntValue() == OpSizeInBits) {
+        if (HasROTL)
+          return DAG.getNode(ISD::ROTL, DL, VT,
+                             LHSShiftArg, LHSShiftAmt).getNode();
+        else
+          return DAG.getNode(ISD::ROTR, DL, VT,
+                             LHSShiftArg, RHSShiftAmt).getNode();
+      }
+    }
+  }
+
+  // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
+  // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
+  if (LHSShiftAmt.getOpcode() == ISD::SUB &&
+      RHSShiftAmt == LHSShiftAmt.getOperand(1)) {
+    if (ConstantSDNode *SUBC =
+          dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
+      if (SUBC->getAPIntValue() == OpSizeInBits) {
+        if (HasROTR)
+          return DAG.getNode(ISD::ROTR, DL, VT,
+                             LHSShiftArg, RHSShiftAmt).getNode();
+        else
+          return DAG.getNode(ISD::ROTL, DL, VT,
+                             LHSShiftArg, LHSShiftAmt).getNode();
+      }
+    }
+  }
+
+  // Look for sign/zext/any-extended or truncate cases:
+  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+       || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+       || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
+       || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
+      (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
+       || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
+       || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
+       || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
+    SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
+    SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
+    if (RExtOp0.getOpcode() == ISD::SUB &&
+        RExtOp0.getOperand(1) == LExtOp0) {
+      // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+      //   (rotl x, y)
+      // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
+      //   (rotr x, (sub 32, y))
+      if (ConstantSDNode *SUBC =
+            dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
+        if (SUBC->getAPIntValue() == OpSizeInBits) {
+          return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
+                             LHSShiftArg,
+                             HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
+        }
+      }
+    } else if (LExtOp0.getOpcode() == ISD::SUB &&
+               RExtOp0 == LExtOp0.getOperand(1)) {
+      // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+      //   (rotr x, y)
+      // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
+      //   (rotl x, (sub 32, y))
+      if (ConstantSDNode *SUBC =
+            dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
+        if (SUBC->getAPIntValue() == OpSizeInBits) {
+          return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
+                             LHSShiftArg,
+                             HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+
+SDValue DAGCombiner::visitXOR(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue LHS, RHS, CC;
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
+  if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
+  // fold (xor x, undef) -> undef
+  if (N0.getOpcode() == ISD::UNDEF)
+    return N0;
+  if (N1.getOpcode() == ISD::UNDEF)
+    return N1;
+  // fold (xor c1, c2) -> c1^c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
+  // canonicalize constant to RHS
+  if (N0C && !N1C)
+    return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+  // fold (xor x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // reassociate xor
+  SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1);
+  if (RXOR.getNode() != 0)
+    return RXOR;
+
+  // fold !(x cc y) -> (x !cc y)
+  if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
+    bool isInt = LHS.getValueType().isInteger();
+    ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+                                               isInt);
+
+    if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) {
+      switch (N0.getOpcode()) {
+      default:
+        llvm_unreachable("Unhandled SetCC Equivalent!");
+      case ISD::SETCC:
+        return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC);
+      case ISD::SELECT_CC:
+        return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2),
+                               N0.getOperand(3), NotCC);
+      }
+    }
+  }
+
+  // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
+  if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
+      N0.getNode()->hasOneUse() &&
+      isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
+    SDValue V = N0.getOperand(0);
+    V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V,
+                    DAG.getConstant(1, V.getValueType()));
+    AddToWorkList(V.getNode());
+    return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V);
+  }
+
+  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
+  if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
+      (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+    SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+    if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
+      unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+      LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+      RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+      AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+      return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+    }
+  }
+  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
+  if (N1C && N1C->isAllOnesValue() &&
+      (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
+    SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
+    if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
+      unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
+      LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
+      RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
+      AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
+      return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
+    }
+  }
+  // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
+  if (N1C && N0.getOpcode() == ISD::XOR) {
+    ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
+    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (N00C)
+      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1),
+                         DAG.getConstant(N1C->getAPIntValue() ^
+                                         N00C->getAPIntValue(), VT));
+    if (N01C)
+      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0),
+                         DAG.getConstant(N1C->getAPIntValue() ^
+                                         N01C->getAPIntValue(), VT));
+  }
+  // fold (xor x, x) -> 0
+  if (N0 == N1)
+    return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
+
+  // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
+  if (N0.getOpcode() == N1.getOpcode()) {
+    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
+    if (Tmp.getNode()) return Tmp;
+  }
+
+  // Simplify the expression using non-local knowledge.
+  if (!VT.isVector() &&
+      SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+/// visitShiftByConstant - Handle transforms common to the three shifts, when
+/// the shift amount is a constant.
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
+  SDNode *LHS = N->getOperand(0).getNode();
+  if (!LHS->hasOneUse()) return SDValue();
+
+  // We want to pull some binops through shifts, so that we have (and (shift))
+  // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
+  // thing happens with address calculations, so it's important to canonicalize
+  // it.
+  bool HighBitSet = false;  // Can we transform this if the high bit is set?
+
+  switch (LHS->getOpcode()) {
+  default: return SDValue();
+  case ISD::OR:
+  case ISD::XOR:
+    HighBitSet = false; // We can only transform sra if the high bit is clear.
+    break;
+  case ISD::AND:
+    HighBitSet = true;  // We can only transform sra if the high bit is set.
+    break;
+  case ISD::ADD:
+    if (N->getOpcode() != ISD::SHL)
+      return SDValue(); // only shl(add) not sr[al](add).
+    HighBitSet = false; // We can only transform sra if the high bit is clear.
+    break;
+  }
+
+  // We require the RHS of the binop to be a constant as well.
+  ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
+  if (!BinOpCst) return SDValue();
+
+  // FIXME: disable this unless the input to the binop is a shift by a constant.
+  // If it is not a shift, it pessimizes some common cases like:
+  //
+  //    void foo(int *X, int i) { X[i & 1235] = 1; }
+  //    int bar(int *X, int i) { return X[i & 255]; }
+  SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
+  if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
+       BinOpLHSVal->getOpcode() != ISD::SRA &&
+       BinOpLHSVal->getOpcode() != ISD::SRL) ||
+      !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+
+  // If this is a signed shift right, and the high bit is modified by the
+  // logical operation, do not perform the transformation. The highBitSet
+  // boolean indicates the value of the high bit of the constant which would
+  // cause it to be modified for this operation.
+  if (N->getOpcode() == ISD::SRA) {
+    bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
+    if (BinOpRHSSignSet != HighBitSet)
+      return SDValue();
+  }
+
+  // Fold the constants, shifting the binop RHS by the shift amount.
+  SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(),
+                               N->getValueType(0),
+                               LHS->getOperand(1), N->getOperand(1));
+
+  // Create the new shift.
+  SDValue NewShift = DAG.getNode(N->getOpcode(),
+                                 LHS->getOperand(0).getDebugLoc(),
+                                 VT, LHS->getOperand(0), N->getOperand(1));
+
+  // Create the new binop.
+  return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS);
+}
+
+SDValue DAGCombiner::visitSHL(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+  // fold (shl c1, c2) -> c1<<c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
+  // fold (shl 0, x) -> 0
+  if (N0C && N0C->isNullValue())
+    return N0;
+  // fold (shl x, c >= size(x)) -> undef
+  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+    return DAG.getUNDEF(VT);
+  // fold (shl x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // if (shl x, c) is known to be zero, return 0
+  if (DAG.MaskedValueIsZero(SDValue(N, 0),
+                            APInt::getAllOnesValue(OpSizeInBits)))
+    return DAG.getConstant(0, VT);
+  // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
+  if (N1.getOpcode() == ISD::TRUNCATE &&
+      N1.getOperand(0).getOpcode() == ISD::AND &&
+      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+    SDValue N101 = N1.getOperand(0).getOperand(1);
+    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+      EVT TruncVT = N1.getValueType();
+      SDValue N100 = N1.getOperand(0).getOperand(0);
+      APInt TruncC = N101C->getAPIntValue();
+      TruncC = TruncC.trunc(TruncVT.getSizeInBits());
+      return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
+                         DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
+                                     DAG.getNode(ISD::TRUNCATE,
+                                                 N->getDebugLoc(),
+                                                 TruncVT, N100),
+                                     DAG.getConstant(TruncC, TruncVT)));
+    }
+  }
+
+  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
+  if (N1C && N0.getOpcode() == ISD::SHL &&
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+    uint64_t c2 = N1C->getZExtValue();
+    if (c1 + c2 >= OpSizeInBits)
+      return DAG.getConstant(0, VT);
+    return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getConstant(c1 + c2, N1.getValueType()));
+  }
+
+  // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
+  // For this to be valid, the second form must not preserve any of the bits
+  // that are shifted out by the inner shift in the first form.  This means
+  // the outer shift size must be >= the number of bits added by the ext.
+  // As a corollary, we don't care what kind of ext it is.
+  if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
+              N0.getOpcode() == ISD::ANY_EXTEND ||
+              N0.getOpcode() == ISD::SIGN_EXTEND) &&
+      N0.getOperand(0).getOpcode() == ISD::SHL &&
+      isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+    uint64_t c1 =
+      cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+    uint64_t c2 = N1C->getZExtValue();
+    EVT InnerShiftVT = N0.getOperand(0).getValueType();
+    uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+    if (c2 >= OpSizeInBits - InnerShiftSize) {
+      if (c1 + c2 >= OpSizeInBits)
+        return DAG.getConstant(0, VT);
+      return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT,
+                         DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT,
+                                     N0.getOperand(0)->getOperand(0)),
+                         DAG.getConstant(c1 + c2, N1.getValueType()));
+    }
+  }
+
+  // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
+  //                               (srl (and x, (shl -1, c1)), (sub c1, c2))
+  if (N1C && N0.getOpcode() == ISD::SRL &&
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+    if (c1 < VT.getSizeInBits()) {
+      uint64_t c2 = N1C->getZExtValue();
+      SDValue HiBitsMask =
+        DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+                                              VT.getSizeInBits() - c1),
+                        VT);
+      SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT,
+                                 N0.getOperand(0),
+                                 HiBitsMask);
+      if (c2 > c1)
+        return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
+                           DAG.getConstant(c2-c1, N1.getValueType()));
+      else
+        return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
+                           DAG.getConstant(c1-c2, N1.getValueType()));
+    }
+  }
+  // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
+  if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+    SDValue HiBitsMask =
+      DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
+                                            VT.getSizeInBits() -
+                                              N1C->getZExtValue()),
+                      VT);
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+                       HiBitsMask);
+  }
+
+  if (N1C) {
+    SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
+    if (NewSHL.getNode())
+      return NewSHL;
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSRA(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+  // fold (sra c1, c2) -> (sra c1, c2)
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
+  // fold (sra 0, x) -> 0
+  if (N0C && N0C->isNullValue())
+    return N0;
+  // fold (sra -1, x) -> -1
+  if (N0C && N0C->isAllOnesValue())
+    return N0;
+  // fold (sra x, (setge c, size(x))) -> undef
+  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+    return DAG.getUNDEF(VT);
+  // fold (sra x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
+  // sext_inreg.
+  if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
+    unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
+    EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
+    if (VT.isVector())
+      ExtVT = EVT::getVectorVT(*DAG.getContext(),
+                               ExtVT, VT.getVectorNumElements());
+    if ((!LegalOperations ||
+         TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+                         N0.getOperand(0), DAG.getValueType(ExtVT));
+  }
+
+  // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
+  if (N1C && N0.getOpcode() == ISD::SRA) {
+    if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
+      if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
+      return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),
+                         DAG.getConstant(Sum, N1C->getValueType(0)));
+    }
+  }
+
+  // fold (sra (shl X, m), (sub result_size, n))
+  // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
+  // result_size - n != m.
+  // If truncate is free for the target sext(shl) is likely to result in better
+  // code.
+  if (N0.getOpcode() == ISD::SHL) {
+    // Get the two constanst of the shifts, CN0 = m, CN = n.
+    const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (N01C && N1C) {
+      // Determine what the truncate's result bitsize and type would be.
+      EVT TruncVT =
+        EVT::getIntegerVT(*DAG.getContext(),
+                          OpSizeInBits - N1C->getZExtValue());
+      // Determine the residual right-shift amount.
+      signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
+
+      // If the shift is not a no-op (in which case this should be just a sign
+      // extend already), the truncated to type is legal, sign_extend is legal
+      // on that type, and the truncate to that type is both legal and free,
+      // perform the transform.
+      if ((ShiftAmt > 0) &&
+          TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
+          TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
+          TLI.isTruncateFree(VT, TruncVT)) {
+
+          SDValue Amt = DAG.getConstant(ShiftAmt,
+              getShiftAmountTy(N0.getOperand(0).getValueType()));
+          SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT,
+                                      N0.getOperand(0), Amt);
+          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT,
+                                      Shift);
+          return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(),
+                             N->getValueType(0), Trunc);
+      }
+    }
+  }
+
+  // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
+  if (N1.getOpcode() == ISD::TRUNCATE &&
+      N1.getOperand(0).getOpcode() == ISD::AND &&
+      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+    SDValue N101 = N1.getOperand(0).getOperand(1);
+    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+      EVT TruncVT = N1.getValueType();
+      SDValue N100 = N1.getOperand(0).getOperand(0);
+      APInt TruncC = N101C->getAPIntValue();
+      TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
+      return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
+                         DAG.getNode(ISD::AND, N->getDebugLoc(),
+                                     TruncVT,
+                                     DAG.getNode(ISD::TRUNCATE,
+                                                 N->getDebugLoc(),
+                                                 TruncVT, N100),
+                                     DAG.getConstant(TruncC, TruncVT)));
+    }
+  }
+
+  // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+  //      if c1 is equal to the number of bits the trunc removes
+  if (N0.getOpcode() == ISD::TRUNCATE &&
+      (N0.getOperand(0).getOpcode() == ISD::SRL ||
+       N0.getOperand(0).getOpcode() == ISD::SRA) &&
+      N0.getOperand(0).hasOneUse() &&
+      N0.getOperand(0).getOperand(1).hasOneUse() &&
+      N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
+    EVT LargeVT = N0.getOperand(0).getValueType();
+    ConstantSDNode *LargeShiftAmt =
+      cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
+
+    if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
+        LargeShiftAmt->getZExtValue()) {
+      SDValue Amt =
+        DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
+              getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType()));
+      SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT,
+                                N0.getOperand(0).getOperand(0), Amt);
+      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA);
+    }
+  }
+
+  // Simplify, based on bits shifted out of the LHS.
+  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+
+  // If the sign bit is known to be zero, switch this to a SRL.
+  if (DAG.SignBitIsZero(N0))
+    return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1);
+
+  if (N1C) {
+    SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
+    if (NewSRA.getNode())
+      return NewSRA;
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSRL(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+
+  // fold (srl c1, c2) -> c1 >>u c2
+  if (N0C && N1C)
+    return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
+  // fold (srl 0, x) -> 0
+  if (N0C && N0C->isNullValue())
+    return N0;
+  // fold (srl x, c >= size(x)) -> undef
+  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+    return DAG.getUNDEF(VT);
+  // fold (srl x, 0) -> x
+  if (N1C && N1C->isNullValue())
+    return N0;
+  // if (srl x, c) is known to be zero, return 0
+  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
+                                   APInt::getAllOnesValue(OpSizeInBits)))
+    return DAG.getConstant(0, VT);
+
+  // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
+  if (N1C && N0.getOpcode() == ISD::SRL &&
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+    uint64_t c2 = N1C->getZExtValue();
+    if (c1 + c2 >= OpSizeInBits)
+      return DAG.getConstant(0, VT);
+    return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getConstant(c1 + c2, N1.getValueType()));
+  }
+
+  // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+  if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
+      N0.getOperand(0).getOpcode() == ISD::SRL &&
+      isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+    uint64_t c1 =
+      cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+    uint64_t c2 = N1C->getZExtValue();
+    EVT InnerShiftVT = N0.getOperand(0).getValueType();
+    EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
+    uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+    // This is only valid if the OpSizeInBits + c1 = size of inner shift.
+    if (c1 + OpSizeInBits == InnerShiftSize) {
+      if (c1 + c2 >= InnerShiftSize)
+        return DAG.getConstant(0, VT);
+      return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT,
+                         DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT,
+                                     N0.getOperand(0)->getOperand(0),
+                                     DAG.getConstant(c1 + c2, ShiftCountVT)));
+    }
+  }
+
+  // fold (srl (shl x, c), c) -> (and x, cst2)
+  if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
+      N0.getValueSizeInBits() <= 64) {
+    uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getConstant(~0ULL >> ShAmt, VT));
+  }
+
+
+  // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
+  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
+    // Shifting in all undef bits?
+    EVT SmallVT = N0.getOperand(0).getValueType();
+    if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
+      return DAG.getUNDEF(VT);
+
+    if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
+      SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
+                                       N0.getOperand(0), N1);
+      AddToWorkList(SmallShift.getNode());
+      return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
+    }
+  }
+
+  // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
+  // bit, which is unmodified by sra.
+  if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
+    if (N0.getOpcode() == ISD::SRA)
+      return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1);
+  }
+
+  // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
+  if (N1C && N0.getOpcode() == ISD::CTLZ &&
+      N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
+    APInt KnownZero, KnownOne;
+    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
+    DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
+
+    // If any of the input bits are KnownOne, then the input couldn't be all
+    // zeros, thus the result of the srl will always be zero.
+    if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
+
+    // If all of the bits input the to ctlz node are known to be zero, then
+    // the result of the ctlz is "32" and the result of the shift is one.
+    APInt UnknownBits = ~KnownZero & Mask;
+    if (UnknownBits == 0) return DAG.getConstant(1, VT);
+
+    // Otherwise, check to see if there is exactly one bit input to the ctlz.
+    if ((UnknownBits & (UnknownBits - 1)) == 0) {
+      // Okay, we know that only that the single bit specified by UnknownBits
+      // could be set on input to the CTLZ node. If this bit is set, the SRL
+      // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
+      // to an SRL/XOR pair, which is likely to simplify more.
+      unsigned ShAmt = UnknownBits.countTrailingZeros();
+      SDValue Op = N0.getOperand(0);
+
+      if (ShAmt) {
+        Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op,
+                  DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType())));
+        AddToWorkList(Op.getNode());
+      }
+
+      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+                         Op, DAG.getConstant(1, VT));
+    }
+  }
+
+  // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
+  if (N1.getOpcode() == ISD::TRUNCATE &&
+      N1.getOperand(0).getOpcode() == ISD::AND &&
+      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
+    SDValue N101 = N1.getOperand(0).getOperand(1);
+    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
+      EVT TruncVT = N1.getValueType();
+      SDValue N100 = N1.getOperand(0).getOperand(0);
+      APInt TruncC = N101C->getAPIntValue();
+      TruncC = TruncC.trunc(TruncVT.getSizeInBits());
+      return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
+                         DAG.getNode(ISD::AND, N->getDebugLoc(),
+                                     TruncVT,
+                                     DAG.getNode(ISD::TRUNCATE,
+                                                 N->getDebugLoc(),
+                                                 TruncVT, N100),
+                                     DAG.getConstant(TruncC, TruncVT)));
+    }
+  }
+
+  // fold operands of srl based on knowledge that the low bits are not
+  // demanded.
+  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  if (N1C) {
+    SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
+    if (NewSRL.getNode())
+      return NewSRL;
+  }
+
+  // Attempt to convert a srl of a load into a narrower zero-extending load.
+  SDValue NarrowLoad = ReduceLoadWidth(N);
+  if (NarrowLoad.getNode())
+    return NarrowLoad;
+
+  // Here is a common situation. We want to optimize:
+  //
+  //   %a = ...
+  //   %b = and i32 %a, 2
+  //   %c = srl i32 %b, 1
+  //   brcond i32 %c ...
+  //
+  // into
+  //
+  //   %a = ...
+  //   %b = and %a, 2
+  //   %c = setcc eq %b, 0
+  //   brcond %c ...
+  //
+  // However when after the source operand of SRL is optimized into AND, the SRL
+  // itself may not be optimized further. Look for it and add the BRCOND into
+  // the worklist.
+  if (N->hasOneUse()) {
+    SDNode *Use = *N->use_begin();
+    if (Use->getOpcode() == ISD::BRCOND)
+      AddToWorkList(Use);
+    else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
+      // Also look pass the truncate.
+      Use = *Use->use_begin();
+      if (Use->getOpcode() == ISD::BRCOND)
+        AddToWorkList(Use);
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitCTLZ(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (ctlz c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0);
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitCTTZ(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (cttz c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0);
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitCTPOP(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (ctpop c1) -> c2
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0);
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+  EVT VT = N->getValueType(0);
+  EVT VT0 = N0.getValueType();
+
+  // fold (select C, X, X) -> X
+  if (N1 == N2)
+    return N1;
+  // fold (select true, X, Y) -> X
+  if (N0C && !N0C->isNullValue())
+    return N1;
+  // fold (select false, X, Y) -> Y
+  if (N0C && N0C->isNullValue())
+    return N2;
+  // fold (select C, 1, X) -> (or C, X)
+  if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
+    return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+  // fold (select C, 0, 1) -> (xor C, 1)
+  if (VT.isInteger() &&
+      (VT0 == MVT::i1 ||
+       (VT0.isInteger() &&
+        TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) &&
+      N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
+    SDValue XORNode;
+    if (VT == VT0)
+      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0,
+                         N0, DAG.getConstant(1, VT0));
+    XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0,
+                          N0, DAG.getConstant(1, VT0));
+    AddToWorkList(XORNode.getNode());
+    if (VT.bitsGT(VT0))
+      return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode);
+    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode);
+  }
+  // fold (select C, 0, X) -> (and (not C), X)
+  if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
+    SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+    AddToWorkList(NOTNode.getNode());
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2);
+  }
+  // fold (select C, X, 1) -> (or (not C), X)
+  if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
+    SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
+    AddToWorkList(NOTNode.getNode());
+    return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1);
+  }
+  // fold (select C, X, 0) -> (and C, X)
+  if (VT == MVT::i1 && N2C && N2C->isNullValue())
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+  // fold (select X, X, Y) -> (or X, Y)
+  // fold (select X, 1, Y) -> (or X, Y)
+  if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
+    return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
+  // fold (select X, Y, X) -> (and X, Y)
+  // fold (select X, Y, 0) -> (and X, Y)
+  if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
+
+  // If we can fold this based on the true/false value, do so.
+  if (SimplifySelectOps(N, N1, N2))
+    return SDValue(N, 0);  // Don't revisit N.
+
+  // fold selects based on a setcc into other things, such as min/max/abs
+  if (N0.getOpcode() == ISD::SETCC) {
+    // FIXME:
+    // Check against MVT::Other for SELECT_CC, which is a workaround for targets
+    // having to say they don't support SELECT_CC on every type the DAG knows
+    // about, since there is no way to mark an opcode illegal at all value types
+    if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
+        TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
+      return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
+                         N0.getOperand(0), N0.getOperand(1),
+                         N1, N2, N0.getOperand(2));
+    return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+  SDValue N3 = N->getOperand(3);
+  SDValue N4 = N->getOperand(4);
+  ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
+
+  // fold select_cc lhs, rhs, x, x, cc -> x
+  if (N2 == N3)
+    return N2;
+
+  // Determine if the condition we're dealing with is constant
+  SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+                              N0, N1, CC, N->getDebugLoc(), false);
+  if (SCC.getNode()) AddToWorkList(SCC.getNode());
+
+  if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
+    if (!SCCC->isNullValue())
+      return N2;    // cond always true -> true val
+    else
+      return N3;    // cond always false -> false val
+  }
+
+  // Fold to a simpler select_cc
+  if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC)
+    return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(),
+                       SCC.getOperand(0), SCC.getOperand(1), N2, N3,
+                       SCC.getOperand(2));
+
+  // If we can fold this based on the true/false value, do so.
+  if (SimplifySelectOps(N, N2, N3))
+    return SDValue(N, 0);  // Don't revisit N.
+
+  // fold select_cc into other things, such as min/max/abs
+  return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC);
+}
+
+SDValue DAGCombiner::visitSETCC(SDNode *N) {
+  return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
+                       cast<CondCodeSDNode>(N->getOperand(2))->get(),
+                       N->getDebugLoc());
+}
+
+// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
+// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
+// transformation. Returns true if extension are possible and the above
+// mentioned transformation is profitable.
+static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
+                                    unsigned ExtOpc,
+                                    SmallVector<SDNode*, 4> &ExtendNodes,
+                                    const TargetLowering &TLI) {
+  bool HasCopyToRegUses = false;
+  bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
+  for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
+                            UE = N0.getNode()->use_end();
+       UI != UE; ++UI) {
+    SDNode *User = *UI;
+    if (User == N)
+      continue;
+    if (UI.getUse().getResNo() != N0.getResNo())
+      continue;
+    // FIXME: Only extend SETCC N, N and SETCC N, c for now.
+    if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
+      ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
+      if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
+        // Sign bits will be lost after a zext.
+        return false;
+      bool Add = false;
+      for (unsigned i = 0; i != 2; ++i) {
+        SDValue UseOp = User->getOperand(i);
+        if (UseOp == N0)
+          continue;
+        if (!isa<ConstantSDNode>(UseOp))
+          return false;
+        Add = true;
+      }
+      if (Add)
+        ExtendNodes.push_back(User);
+      continue;
+    }
+    // If truncates aren't free and there are users we can't
+    // extend, it isn't worthwhile.
+    if (!isTruncFree)
+      return false;
+    // Remember if this value is live-out.
+    if (User->getOpcode() == ISD::CopyToReg)
+      HasCopyToRegUses = true;
+  }
+
+  if (HasCopyToRegUses) {
+    bool BothLiveOut = false;
+    for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+         UI != UE; ++UI) {
+      SDUse &Use = UI.getUse();
+      if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
+        BothLiveOut = true;
+        break;
+      }
+    }
+    if (BothLiveOut)
+      // Both unextended and extended values are live out. There had better be
+      // a good reason for the transformation.
+      return ExtendNodes.size();
+  }
+  return true;
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (sext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0);
+
+  // fold (sext (sext x)) -> (sext x)
+  // fold (sext (aext x)) -> (sext x)
+  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+    return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
+                       N0.getOperand(0));
+
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    // fold (sext (truncate (load x))) -> (sext (smaller load x))
+    // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
+    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+    if (NarrowLoad.getNode()) {
+      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      if (NarrowLoad.getNode() != N0.getNode()) {
+        CombineTo(N0.getNode(), NarrowLoad);
+        // CombineTo deleted the truncate, if needed, but not what's under it.
+        AddToWorkList(oye);
+      }
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+
+    // See if the value being truncated is already sign extended.  If so, just
+    // eliminate the trunc/sext pair.
+    SDValue Op = N0.getOperand(0);
+    unsigned OpBits   = Op.getValueType().getScalarType().getSizeInBits();
+    unsigned MidBits  = N0.getValueType().getScalarType().getSizeInBits();
+    unsigned DestBits = VT.getScalarType().getSizeInBits();
+    unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
+
+    if (OpBits == DestBits) {
+      // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
+      // bits, it is already ready.
+      if (NumSignBits > DestBits-MidBits)
+        return Op;
+    } else if (OpBits < DestBits) {
+      // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
+      // bits, just sext from i32.
+      if (NumSignBits > OpBits-MidBits)
+        return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op);
+    } else {
+      // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
+      // bits, just truncate to i32.
+      if (NumSignBits > OpBits-MidBits)
+        return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+    }
+
+    // fold (sext (truncate x)) -> (sextinreg x).
+    if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
+                                                 N0.getValueType())) {
+      if (OpBits < DestBits)
+        Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op);
+      else if (OpBits > DestBits)
+        Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
+                         DAG.getValueType(N0.getValueType()));
+    }
+  }
+
+  // fold (sext (load x)) -> (sext (truncate (sextload x)))
+  // None of the supported targets knows how to perform load and sign extend
+  // on vectors in one instruction.  We only perform this transformation on
+  // scalars.
+  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
+    bool DoXform = true;
+    SmallVector<SDNode*, 4> SetCCs;
+    if (!N0.hasOneUse())
+      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
+    if (DoXform) {
+      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       N0.getValueType(),
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+                                  N0.getValueType(), ExtLoad);
+      CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+      // Extend SetCC uses if necessary.
+      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+        SDNode *SetCC = SetCCs[i];
+        SmallVector<SDValue, 4> Ops;
+
+        for (unsigned j = 0; j != 2; ++j) {
+          SDValue SOp = SetCC->getOperand(j);
+          if (SOp == Trunc)
+            Ops.push_back(ExtLoad);
+          else
+            Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND,
+                                      N->getDebugLoc(), VT, SOp));
+        }
+
+        Ops.push_back(SetCC->getOperand(2));
+        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+                                     SetCC->getValueType(0),
+                                     &Ops[0], Ops.size()));
+      }
+
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
+  // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
+  if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+      ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    if ((!LegalOperations && !LN0->isVolatile()) ||
+        TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
+      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       MemVT,
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      CombineTo(N0.getNode(),
+                DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+                            N0.getValueType(), ExtLoad),
+                ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  if (N0.getOpcode() == ISD::SETCC) {
+    // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
+    // Only do this before legalize for now.
+    if (VT.isVector() && !LegalOperations) {
+      EVT N0VT = N0.getOperand(0).getValueType();
+        // We know that the # elements of the results is the same as the
+        // # elements of the compare (and the # elements of the compare result
+        // for that matter).  Check to see that they are the same size.  If so,
+        // we know that the element size of the sext'd result matches the
+        // element size of the compare operands.
+      if (VT.getSizeInBits() == N0VT.getSizeInBits())
+        return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+                             N0.getOperand(1),
+                             cast<CondCodeSDNode>(N0.getOperand(2))->get());
+      // If the desired elements are smaller or larger than the source
+      // elements we can use a matching integer vector type and then
+      // truncate/sign extend
+      else {
+        EVT MatchingElementType =
+          EVT::getIntegerVT(*DAG.getContext(),
+                            N0VT.getScalarType().getSizeInBits());
+        EVT MatchingVectorType =
+          EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+                           N0VT.getVectorNumElements());
+        SDValue VsetCC =
+          DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+                        N0.getOperand(1),
+                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
+        return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+      }
+    }
+
+    // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
+    unsigned ElementWidth = VT.getScalarType().getSizeInBits();
+    SDValue NegOne =
+      DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
+    SDValue SCC =
+      SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+                       NegOne, DAG.getConstant(0, VT),
+                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+    if (SCC.getNode()) return SCC;
+    if (!LegalOperations ||
+        TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))
+      return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+                         DAG.getSetCC(N->getDebugLoc(),
+                                      TLI.getSetCCResultType(VT),
+                                      N0.getOperand(0), N0.getOperand(1),
+                                 cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+                         NegOne, DAG.getConstant(0, VT));
+  }
+
+  // fold (sext x) -> (zext x) if the sign bit is known zero.
+  if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
+      DAG.SignBitIsZero(N0))
+    return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (zext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
+  // fold (zext (zext x)) -> (zext x)
+  // fold (zext (aext x)) -> (zext x)
+  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
+    return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
+                       N0.getOperand(0));
+
+  // fold (zext (truncate (load x))) -> (zext (smaller load x))
+  // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+    if (NarrowLoad.getNode()) {
+      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      if (NarrowLoad.getNode() != N0.getNode()) {
+        CombineTo(N0.getNode(), NarrowLoad);
+        // CombineTo deleted the truncate, if needed, but not what's under it.
+        AddToWorkList(oye);
+      }
+      return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+    }
+  }
+
+  // fold (zext (truncate x)) -> (and x, mask)
+  if (N0.getOpcode() == ISD::TRUNCATE &&
+      (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+
+    // fold (zext (truncate (load x))) -> (zext (smaller load x))
+    // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
+    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+    if (NarrowLoad.getNode()) {
+      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      if (NarrowLoad.getNode() != N0.getNode()) {
+        CombineTo(N0.getNode(), NarrowLoad);
+        // CombineTo deleted the truncate, if needed, but not what's under it.
+        AddToWorkList(oye);
+      }
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+
+    SDValue Op = N0.getOperand(0);
+    if (Op.getValueType().bitsLT(VT)) {
+      Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
+    } else if (Op.getValueType().bitsGT(VT)) {
+      Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
+    }
+    return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
+                                  N0.getValueType().getScalarType());
+  }
+
+  // Fold (zext (and (trunc x), cst)) -> (and x, cst),
+  // if either of the casts is not free.
+  if (N0.getOpcode() == ISD::AND &&
+      N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+                           N0.getValueType()) ||
+       !TLI.isZExtFree(N0.getValueType(), VT))) {
+    SDValue X = N0.getOperand(0).getOperand(0);
+    if (X.getValueType().bitsLT(VT)) {
+      X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X);
+    } else if (X.getValueType().bitsGT(VT)) {
+      X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+    }
+    APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+    Mask = Mask.zext(VT.getSizeInBits());
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                       X, DAG.getConstant(Mask, VT));
+  }
+
+  // fold (zext (load x)) -> (zext (truncate (zextload x)))
+  // None of the supported targets knows how to perform load and vector_zext
+  // on vectors in one instruction.  We only perform this transformation on
+  // scalars.
+  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
+    bool DoXform = true;
+    SmallVector<SDNode*, 4> SetCCs;
+    if (!N0.hasOneUse())
+      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
+    if (DoXform) {
+      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       N0.getValueType(),
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+                                  N0.getValueType(), ExtLoad);
+      CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+      // Extend SetCC uses if necessary.
+      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+        SDNode *SetCC = SetCCs[i];
+        SmallVector<SDValue, 4> Ops;
+
+        for (unsigned j = 0; j != 2; ++j) {
+          SDValue SOp = SetCC->getOperand(j);
+          if (SOp == Trunc)
+            Ops.push_back(ExtLoad);
+          else
+            Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND,
+                                      N->getDebugLoc(), VT, SOp));
+        }
+
+        Ops.push_back(SetCC->getOperand(2));
+        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+                                     SetCC->getValueType(0),
+                                     &Ops[0], Ops.size()));
+      }
+
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
+  // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
+  if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
+      ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    if ((!LegalOperations && !LN0->isVolatile()) ||
+        TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
+      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       MemVT,
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      CombineTo(N0.getNode(),
+                DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
+                            ExtLoad),
+                ExtLoad.getValue(1));
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  if (N0.getOpcode() == ISD::SETCC) {
+    if (!LegalOperations && VT.isVector()) {
+      // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
+      // Only do this before legalize for now.
+      EVT N0VT = N0.getOperand(0).getValueType();
+      EVT EltVT = VT.getVectorElementType();
+      SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
+                                    DAG.getConstant(1, EltVT));
+      if (VT.getSizeInBits() == N0VT.getSizeInBits()) {
+        // We know that the # elements of the results is the same as the
+        // # elements of the compare (and the # elements of the compare result
+        // for that matter).  Check to see that they are the same size.  If so,
+        // we know that the element size of the sext'd result matches the
+        // element size of the compare operands.
+        return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                           DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+                                         N0.getOperand(1),
+                                 cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+                           DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+                                       &OneOps[0], OneOps.size()));
+      } else {
+        // If the desired elements are smaller or larger than the source
+        // elements we can use a matching integer vector type and then
+        // truncate/sign extend
+        EVT MatchingElementType =
+          EVT::getIntegerVT(*DAG.getContext(),
+                            N0VT.getScalarType().getSizeInBits());
+        EVT MatchingVectorType =
+          EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+                           N0VT.getVectorNumElements());
+        SDValue VsetCC =
+          DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+                        N0.getOperand(1),
+                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
+        return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                           DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT),
+                           DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+                                       &OneOps[0], OneOps.size()));
+      }
+    }
+
+    // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+    SDValue SCC =
+      SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+    if (SCC.getNode()) return SCC;
+  }
+
+  // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
+  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+      isa<ConstantSDNode>(N0.getOperand(1)) &&
+      N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+      N0.hasOneUse()) {
+    SDValue ShAmt = N0.getOperand(1);
+    unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+    if (N0.getOpcode() == ISD::SHL) {
+      SDValue InnerZExt = N0.getOperand(0);
+      // If the original shl may be shifting out bits, do not perform this
+      // transformation.
+      unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
+        InnerZExt.getOperand(0).getValueType().getSizeInBits();
+      if (ShAmtVal > KnownZeroBits)
+        return SDValue();
+    }
+
+    DebugLoc DL = N->getDebugLoc();
+
+    // Ensure that the shift amount is wide enough for the shifted value.
+    if (VT.getSizeInBits() >= 256)
+      ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
+
+    return DAG.getNode(N0.getOpcode(), DL, VT,
+                       DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
+                       ShAmt);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // fold (aext c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0);
+  // fold (aext (aext x)) -> (aext x)
+  // fold (aext (zext x)) -> (zext x)
+  // fold (aext (sext x)) -> (sext x)
+  if (N0.getOpcode() == ISD::ANY_EXTEND  ||
+      N0.getOpcode() == ISD::ZERO_EXTEND ||
+      N0.getOpcode() == ISD::SIGN_EXTEND)
+    return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0));
+
+  // fold (aext (truncate (load x))) -> (aext (smaller load x))
+  // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+    if (NarrowLoad.getNode()) {
+      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      if (NarrowLoad.getNode() != N0.getNode()) {
+        CombineTo(N0.getNode(), NarrowLoad);
+        // CombineTo deleted the truncate, if needed, but not what's under it.
+        AddToWorkList(oye);
+      }
+      return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
+    }
+  }
+
+  // fold (aext (truncate x))
+  if (N0.getOpcode() == ISD::TRUNCATE) {
+    SDValue TruncOp = N0.getOperand(0);
+    if (TruncOp.getValueType() == VT)
+      return TruncOp; // x iff x size == zext size.
+    if (TruncOp.getValueType().bitsGT(VT))
+      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp);
+    return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp);
+  }
+
+  // Fold (aext (and (trunc x), cst)) -> (and x, cst)
+  // if the trunc is not free.
+  if (N0.getOpcode() == ISD::AND &&
+      N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
+                          N0.getValueType())) {
+    SDValue X = N0.getOperand(0).getOperand(0);
+    if (X.getValueType().bitsLT(VT)) {
+      X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X);
+    } else if (X.getValueType().bitsGT(VT)) {
+      X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
+    }
+    APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+    Mask = Mask.zext(VT.getSizeInBits());
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                       X, DAG.getConstant(Mask, VT));
+  }
+
+  // fold (aext (load x)) -> (aext (truncate (extload x)))
+  // None of the supported targets knows how to perform load and any_ext
+  // on vectors in one instruction.  We only perform this transformation on
+  // scalars.
+  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+    bool DoXform = true;
+    SmallVector<SDNode*, 4> SetCCs;
+    if (!N0.hasOneUse())
+      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
+    if (DoXform) {
+      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+      SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+                                       LN0->getChain(),
+                                       LN0->getBasePtr(), LN0->getPointerInfo(),
+                                       N0.getValueType(),
+                                       LN0->isVolatile(), LN0->isNonTemporal(),
+                                       LN0->getAlignment());
+      CombineTo(N, ExtLoad);
+      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+                                  N0.getValueType(), ExtLoad);
+      CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
+
+      // Extend SetCC uses if necessary.
+      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+        SDNode *SetCC = SetCCs[i];
+        SmallVector<SDValue, 4> Ops;
+
+        for (unsigned j = 0; j != 2; ++j) {
+          SDValue SOp = SetCC->getOperand(j);
+          if (SOp == Trunc)
+            Ops.push_back(ExtLoad);
+          else
+            Ops.push_back(DAG.getNode(ISD::ANY_EXTEND,
+                                      N->getDebugLoc(), VT, SOp));
+        }
+
+        Ops.push_back(SetCC->getOperand(2));
+        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+                                     SetCC->getValueType(0),
+                                     &Ops[0], Ops.size()));
+      }
+
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
+
+  // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
+  // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
+  // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
+  if (N0.getOpcode() == ISD::LOAD &&
+      !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      N0.hasOneUse()) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    EVT MemVT = LN0->getMemoryVT();
+    SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
+                                     VT, LN0->getChain(), LN0->getBasePtr(),
+                                     LN0->getPointerInfo(), MemVT,
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.getNode(),
+              DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
+                          N0.getValueType(), ExtLoad),
+              ExtLoad.getValue(1));
+    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+  }
+
+  if (N0.getOpcode() == ISD::SETCC) {
+    // aext(setcc) -> sext_in_reg(vsetcc) for vectors.
+    // Only do this before legalize for now.
+    if (VT.isVector() && !LegalOperations) {
+      EVT N0VT = N0.getOperand(0).getValueType();
+        // We know that the # elements of the results is the same as the
+        // # elements of the compare (and the # elements of the compare result
+        // for that matter).  Check to see that they are the same size.  If so,
+        // we know that the element size of the sext'd result matches the
+        // element size of the compare operands.
+      if (VT.getSizeInBits() == N0VT.getSizeInBits())
+        return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+                             N0.getOperand(1),
+                             cast<CondCodeSDNode>(N0.getOperand(2))->get());
+      // If the desired elements are smaller or larger than the source
+      // elements we can use a matching integer vector type and then
+      // truncate/sign extend
+      else {
+        EVT MatchingElementType =
+          EVT::getIntegerVT(*DAG.getContext(),
+                            N0VT.getScalarType().getSizeInBits());
+        EVT MatchingVectorType =
+          EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+                           N0VT.getVectorNumElements());
+        SDValue VsetCC =
+          DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+                        N0.getOperand(1),
+                        cast<CondCodeSDNode>(N0.getOperand(2))->get());
+        return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+      }
+    }
+
+    // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
+    SDValue SCC =
+      SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
+                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
+                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
+    if (SCC.getNode())
+      return SCC;
+  }
+
+  return SDValue();
+}
+
+/// GetDemandedBits - See if the specified operand can be simplified with the
+/// knowledge that only the bits specified by Mask are used.  If so, return the
+/// simpler operand, otherwise return a null SDValue.
+SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
+  switch (V.getOpcode()) {
+  default: break;
+  case ISD::OR:
+  case ISD::XOR:
+    // If the LHS or RHS don't contribute bits to the or, drop them.
+    if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
+      return V.getOperand(1);
+    if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
+      return V.getOperand(0);
+    break;
+  case ISD::SRL:
+    // Only look at single-use SRLs.
+    if (!V.getNode()->hasOneUse())
+      break;
+    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+      // See if we can recursively simplify the LHS.
+      unsigned Amt = RHSC->getZExtValue();
+
+      // Watch out for shift count overflow though.
+      if (Amt >= Mask.getBitWidth()) break;
+      APInt NewMask = Mask << Amt;
+      SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
+      if (SimplifyLHS.getNode())
+        return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(),
+                           SimplifyLHS, V.getOperand(1));
+    }
+  }
+  return SDValue();
+}
+
+/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
+/// bits and then truncated to a narrower type and where N is a multiple
+/// of number of bits of the narrower type, transform it to a narrower load
+/// from address + N / num of bits of new type. If the result is to be
+/// extended, also fold the extension to form a extending load.
+SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
+  unsigned Opc = N->getOpcode();
+
+  ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  EVT ExtVT = VT;
+
+  // This transformation isn't valid for vector loads.
+  if (VT.isVector())
+    return SDValue();
+
+  // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
+  // extended to VT.
+  if (Opc == ISD::SIGN_EXTEND_INREG) {
+    ExtType = ISD::SEXTLOAD;
+    ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  } else if (Opc == ISD::SRL) {
+    // Another special-case: SRL is basically zero-extending a narrower value.
+    ExtType = ISD::ZEXTLOAD;
+    N0 = SDValue(N, 0);
+    ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (!N01) return SDValue();
+    ExtVT = EVT::getIntegerVT(*DAG.getContext(),
+                              VT.getSizeInBits() - N01->getZExtValue());
+  }
+  if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
+    return SDValue();
+
+  unsigned EVTBits = ExtVT.getSizeInBits();
+
+  // Do not generate loads of non-round integer types since these can
+  // be expensive (and would be wrong if the type is not byte sized).
+  if (!ExtVT.isRound())
+    return SDValue();
+
+  unsigned ShAmt = 0;
+  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
+    if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      ShAmt = N01->getZExtValue();
+      // Is the shift amount a multiple of size of VT?
+      if ((ShAmt & (EVTBits-1)) == 0) {
+        N0 = N0.getOperand(0);
+        // Is the load width a multiple of size of VT?
+        if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
+          return SDValue();
+      }
+
+      // At this point, we must have a load or else we can't do the transform.
+      if (!isa<LoadSDNode>(N0)) return SDValue();
+
+      // If the shift amount is larger than the input type then we're not
+      // accessing any of the loaded bytes.  If the load was a zextload/extload
+      // then the result of the shift+trunc is zero/undef (handled elsewhere).
+      // If the load was a sextload then the result is a splat of the sign bit
+      // of the extended byte.  This is not worth optimizing for.
+      if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
+        return SDValue();
+    }
+  }
+
+  // If the load is shifted left (and the result isn't shifted back right),
+  // we can fold the truncate through the shift.
+  unsigned ShLeftAmt = 0;
+  if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+      ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
+    if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+      ShLeftAmt = N01->getZExtValue();
+      N0 = N0.getOperand(0);
+    }
+  }
+
+  // If we haven't found a load, we can't narrow it.  Don't transform one with
+  // multiple uses, this would require adding a new load.
+  if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() ||
+      // Don't change the width of a volatile load.
+      cast<LoadSDNode>(N0)->isVolatile())
+    return SDValue();
+
+  // Verify that we are actually reducing a load width here.
+  if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits)
+    return SDValue();
+
+  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+  EVT PtrType = N0.getOperand(1).getValueType();
+
+  // For big endian targets, we need to adjust the offset to the pointer to
+  // load the correct bytes.
+  if (TLI.isBigEndian()) {
+    unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+    unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+    ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+  }
+
+  uint64_t PtrOff = ShAmt / 8;
+  unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+  SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
+                               PtrType, LN0->getBasePtr(),
+                               DAG.getConstant(PtrOff, PtrType));
+  AddToWorkList(NewPtr.getNode());
+
+  SDValue Load;
+  if (ExtType == ISD::NON_EXTLOAD)
+    Load =  DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
+                        LN0->getPointerInfo().getWithOffset(PtrOff),
+                        LN0->isVolatile(), LN0->isNonTemporal(), NewAlign);
+  else
+    Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr,
+                          LN0->getPointerInfo().getWithOffset(PtrOff),
+                          ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+                          NewAlign);
+
+  // Replace the old load's chain with the new load's chain.
+  WorkListRemover DeadNodes(*this);
+  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
+                                &DeadNodes);
+
+  // Shift the result left, if we've swallowed a left shift.
+  SDValue Result = Load;
+  if (ShLeftAmt != 0) {
+    EVT ShImmTy = getShiftAmountTy(Result.getValueType());
+    if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
+      ShImmTy = VT;
+    Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT,
+                         Result, DAG.getConstant(ShLeftAmt, ShImmTy));
+  }
+
+  // Return the new loaded value.
+  return Result;
+}
+
+SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N->getValueType(0);
+  EVT EVT = cast<VTSDNode>(N1)->getVT();
+  unsigned VTBits = VT.getScalarType().getSizeInBits();
+  unsigned EVTBits = EVT.getScalarType().getSizeInBits();
+
+  // fold (sext_in_reg c1) -> c1
+  if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);
+
+  // If the input is already sign extended, just drop the extension.
+  if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
+    return N0;
+
+  // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
+  if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+      EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) {
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+                       N0.getOperand(0), N1);
+  }
+
+  // fold (sext_in_reg (sext x)) -> (sext x)
+  // fold (sext_in_reg (aext x)) -> (sext x)
+  // if x is small enough.
+  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
+    SDValue N00 = N0.getOperand(0);
+    if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
+        (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
+      return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
+  }
+
+  // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
+  if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
+    return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT);
+
+  // fold operands of sext_in_reg based on knowledge that the top bits are not
+  // demanded.
+  if (SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  // fold (sext_in_reg (load x)) -> (smaller sextload x)
+  // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
+  SDValue NarrowLoad = ReduceLoadWidth(N);
+  if (NarrowLoad.getNode())
+    return NarrowLoad;
+
+  // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
+  // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
+  // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
+  if (N0.getOpcode() == ISD::SRL) {
+    if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+      if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
+        // We can turn this into an SRA iff the input to the SRL is already sign
+        // extended enough.
+        unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
+        if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
+          return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,
+                             N0.getOperand(0), N0.getOperand(1));
+      }
+  }
+
+  // fold (sext_inreg (extload x)) -> (sextload x)
+  if (ISD::isEXTLoad(N0.getNode()) &&
+      ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+                                     LN0->getChain(),
+                                     LN0->getBasePtr(), LN0->getPointerInfo(),
+                                     EVT,
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
+  if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
+      N0.hasOneUse() &&
+      EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+                                     LN0->getChain(),
+                                     LN0->getBasePtr(), LN0->getPointerInfo(),
+                                     EVT,
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
+    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // noop truncate
+  if (N0.getValueType() == N->getValueType(0))
+    return N0;
+  // fold (truncate c1) -> c1
+  if (isa<ConstantSDNode>(N0))
+    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0);
+  // fold (truncate (truncate x)) -> (truncate x)
+  if (N0.getOpcode() == ISD::TRUNCATE)
+    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+  // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
+  if (N0.getOpcode() == ISD::ZERO_EXTEND ||
+      N0.getOpcode() == ISD::SIGN_EXTEND ||
+      N0.getOpcode() == ISD::ANY_EXTEND) {
+    if (N0.getOperand(0).getValueType().bitsLT(VT))
+      // if the source is smaller than the dest, we still need an extend
+      return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+                         N0.getOperand(0));
+    else if (N0.getOperand(0).getValueType().bitsGT(VT))
+      // if the source is larger than the dest, than we just need the truncate
+      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
+    else
+      // if the source and dest are the same type, we can drop both the extend
+      // and the truncate.
+      return N0.getOperand(0);
+  }
+
+  // See if we can simplify the input to this truncate through knowledge that
+  // only the low bits are being used.
+  // For example "trunc (or (shl x, 8), y)" // -> trunc y
+  // Currently we only perform this optimization on scalars because vectors
+  // may have different active low bits.
+  if (!VT.isVector()) {
+    SDValue Shorter =
+      GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
+                                               VT.getSizeInBits()));
+    if (Shorter.getNode())
+      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
+  }
+  // fold (truncate (load x)) -> (smaller load x)
+  // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
+  if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
+    SDValue Reduced = ReduceLoadWidth(N);
+    if (Reduced.getNode())
+      return Reduced;
+  }
+
+  // Simplify the operands using demanded-bits information.
+  if (!VT.isVector() &&
+      SimplifyDemandedBits(SDValue(N, 0)))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
+  SDValue Elt = N->getOperand(i);
+  if (Elt.getOpcode() != ISD::MERGE_VALUES)
+    return Elt.getNode();
+  return Elt.getOperand(Elt.getResNo()).getNode();
+}
+
+/// CombineConsecutiveLoads - build_pair (load, load) -> load
+/// if load locations are consecutive.
+SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
+  assert(N->getOpcode() == ISD::BUILD_PAIR);
+
+  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
+  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+      LD1->getPointerInfo().getAddrSpace() !=
+         LD2->getPointerInfo().getAddrSpace())
+    return SDValue();
+  EVT LD1VT = LD1->getValueType(0);
+
+  if (ISD::isNON_EXTLoad(LD2) &&
+      LD2->hasOneUse() &&
+      // If both are volatile this would reduce the number of volatile loads.
+      // If one is volatile it might be ok, but play conservative and bail out.
+      !LD1->isVolatile() &&
+      !LD2->isVolatile() &&
+      DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
+    unsigned Align = LD1->getAlignment();
+    unsigned NewAlign = TLI.getTargetData()->
+      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+
+    if (NewAlign <= Align &&
+        (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
+      return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
+                         LD1->getBasePtr(), LD1->getPointerInfo(),
+                         false, false, Align);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitBITCAST(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  // If the input is a BUILD_VECTOR with all constant elements, fold this now.
+  // Only do this before legalize, since afterward the target may be depending
+  // on the bitconvert.
+  // First check to see if this is all constant.
+  if (!LegalTypes &&
+      N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
+      VT.isVector()) {
+    bool isSimple = true;
+    for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
+      if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
+          N0.getOperand(i).getOpcode() != ISD::Constant &&
+          N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
+        isSimple = false;
+        break;
+      }
+
+    EVT DestEltVT = N->getValueType(0).getVectorElementType();
+    assert(!DestEltVT.isVector() &&
+           "Element type of vector ValueType must not be vector!");
+    if (isSimple)
+      return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
+  }
+
+  // If the input is a constant, let getNode fold it.
+  if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
+    SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0);
+    if (Res.getNode() != N) {
+      if (!LegalOperations ||
+          TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
+        return Res;
+
+      // Folding it resulted in an illegal node, and it's too late to
+      // do that. Clean up the old node and forego the transformation.
+      // Ideally this won't happen very often, because instcombine
+      // and the earlier dagcombine runs (where illegal nodes are
+      // permitted) should have folded most of them already.
+      DAG.DeleteNode(Res.getNode());
+    }
+  }
+
+  // (conv (conv x, t1), t2) -> (conv x, t2)
+  if (N0.getOpcode() == ISD::BITCAST)
+    return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT,
+                       N0.getOperand(0));
+
+  // fold (conv (load x)) -> (load (conv*)x)
+  // If the resultant load doesn't need a higher alignment than the original!
+  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+      // Do not change the width of a volatile load.
+      !cast<LoadSDNode>(N0)->isVolatile() &&
+      (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    unsigned Align = TLI.getTargetData()->
+      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
+    unsigned OrigAlign = LN0->getAlignment();
+
+    if (Align <= OrigAlign) {
+      SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
+                                 LN0->getBasePtr(), LN0->getPointerInfo(),
+                                 LN0->isVolatile(), LN0->isNonTemporal(),
+                                 OrigAlign);
+      AddToWorkList(N);
+      CombineTo(N0.getNode(),
+                DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
+                            N0.getValueType(), Load),
+                Load.getValue(1));
+      return Load;
+    }
+  }
+
+  // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
+  // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
+  // This often reduces constant pool loads.
+  if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
+      N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
+    SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
+                                  N0.getOperand(0));
+    AddToWorkList(NewConv.getNode());
+
+    APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+    if (N0.getOpcode() == ISD::FNEG)
+      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+                         NewConv, DAG.getConstant(SignBit, VT));
+    assert(N0.getOpcode() == ISD::FABS);
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                       NewConv, DAG.getConstant(~SignBit, VT));
+  }
+
+  // fold (bitconvert (fcopysign cst, x)) ->
+  //         (or (and (bitconvert x), sign), (and cst, (not sign)))
+  // Note that we don't handle (copysign x, cst) because this can always be
+  // folded to an fneg or fabs.
+  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
+      isa<ConstantFPSDNode>(N0.getOperand(0)) &&
+      VT.isInteger() && !VT.isVector()) {
+    unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
+    EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
+    if (isTypeLegal(IntXVT)) {
+      SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
+                              IntXVT, N0.getOperand(1));
+      AddToWorkList(X.getNode());
+
+      // If X has a different width than the result/lhs, sext it or truncate it.
+      unsigned VTWidth = VT.getSizeInBits();
+      if (OrigXWidth < VTWidth) {
+        X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X);
+        AddToWorkList(X.getNode());
+      } else if (OrigXWidth > VTWidth) {
+        // To get the sign bit in the right place, we have to shift it right
+        // before truncating.
+        X = DAG.getNode(ISD::SRL, X.getDebugLoc(),
+                        X.getValueType(), X,
+                        DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
+        AddToWorkList(X.getNode());
+        X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
+        AddToWorkList(X.getNode());
+      }
+
+      APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
+      X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT,
+                      X, DAG.getConstant(SignBit, VT));
+      AddToWorkList(X.getNode());
+
+      SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
+                                VT, N0.getOperand(0));
+      Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
+                        Cst, DAG.getConstant(~SignBit, VT));
+      AddToWorkList(Cst.getNode());
+
+      return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst);
+    }
+  }
+
+  // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
+  if (N0.getOpcode() == ISD::BUILD_PAIR) {
+    SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
+    if (CombineLD.getNode())
+      return CombineLD;
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  return CombineConsecutiveLoads(N, VT);
+}
+
+/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
+/// node with Constant, ConstantFP or Undef operands.  DstEltVT indicates the
+/// destination element value type.
+SDValue DAGCombiner::
+ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+  EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
+
+  // If this is already the right type, we're done.
+  if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
+
+  unsigned SrcBitSize = SrcEltVT.getSizeInBits();
+  unsigned DstBitSize = DstEltVT.getSizeInBits();
+
+  // If this is a conversion of N elements of one type to N elements of another
+  // type, convert each element.  This handles FP<->INT cases.
+  if (SrcBitSize == DstBitSize) {
+    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+                              BV->getValueType(0).getVectorNumElements());
+
+    // Due to the FP element handling below calling this routine recursively,
+    // we can end up with a scalar-to-vector node here.
+    if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
+      return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+                         DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
+                                     DstEltVT, BV->getOperand(0)));
+
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+      SDValue Op = BV->getOperand(i);
+      // If the vector element type is not legal, the BUILD_VECTOR operands
+      // are promoted and implicitly truncated.  Make that explicit here.
+      if (Op.getValueType() != SrcEltVT)
+        Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
+      Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
+                                DstEltVT, Op));
+      AddToWorkList(Ops.back().getNode());
+    }
+    return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+                       &Ops[0], Ops.size());
+  }
+
+  // Otherwise, we're growing or shrinking the elements.  To avoid having to
+  // handle annoying details of growing/shrinking FP values, we convert them to
+  // int first.
+  if (SrcEltVT.isFloatingPoint()) {
+    // Convert the input float vector to a int vector where the elements are the
+    // same sizes.
+    assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
+    BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
+    SrcEltVT = IntVT;
+  }
+
+  // Now we know the input is an integer vector.  If the output is a FP type,
+  // convert to integer first, then to FP of the right size.
+  if (DstEltVT.isFloatingPoint()) {
+    assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
+    EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
+    SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
+
+    // Next, convert to FP elements of the same size.
+    return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
+  }
+
+  // Okay, we know the src/dst types are both integers of differing types.
+  // Handling growing first.
+  assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
+  if (SrcBitSize < DstBitSize) {
+    unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
+
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0, e = BV->getNumOperands(); i != e;
+         i += NumInputsPerOutput) {
+      bool isLE = TLI.isLittleEndian();
+      APInt NewBits = APInt(DstBitSize, 0);
+      bool EltIsUndef = true;
+      for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
+        // Shift the previously computed bits over.
+        NewBits <<= SrcBitSize;
+        SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
+        if (Op.getOpcode() == ISD::UNDEF) continue;
+        EltIsUndef = false;
+
+        NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
+                   zextOrTrunc(SrcBitSize).zext(DstBitSize);
+      }
+
+      if (EltIsUndef)
+        Ops.push_back(DAG.getUNDEF(DstEltVT));
+      else
+        Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
+    }
+
+    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
+    return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+                       &Ops[0], Ops.size());
+  }
+
+  // Finally, this must be the case where we are shrinking elements: each input
+  // turns into multiple outputs.
+  bool isS2V = ISD::isScalarToVector(BV);
+  unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
+  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
+                            NumOutputsPerInput*BV->getNumOperands());
+  SmallVector<SDValue, 8> Ops;
+
+  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+    if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
+      for (unsigned j = 0; j != NumOutputsPerInput; ++j)
+        Ops.push_back(DAG.getUNDEF(DstEltVT));
+      continue;
+    }
+
+    APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
+                  getAPIntValue().zextOrTrunc(SrcBitSize);
+
+    for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
+      APInt ThisVal = OpVal.trunc(DstBitSize);
+      Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
+      if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
+        // Simply turn this into a SCALAR_TO_VECTOR of the new type.
+        return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+                           Ops[0]);
+      OpVal = OpVal.lshr(DstBitSize);
+    }
+
+    // For big endian targets, swap the order of the pieces of each element.
+    if (TLI.isBigEndian())
+      std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
+                     &Ops[0], Ops.size());
+}
+
+SDValue DAGCombiner::visitFADD(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (fadd c1, c2) -> (fadd c1, c2)
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0CFP && !N1CFP)
+    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
+  // fold (fadd A, 0) -> A
+  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+    return N0;
+  // fold (fadd A, (fneg B)) -> (fsub A, B)
+  if (isNegatibleForFree(N1, LegalOperations) == 2)
+    return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
+                       GetNegatedExpression(N1, DAG, LegalOperations));
+  // fold (fadd (fneg A), B) -> (fsub B, A)
+  if (isNegatibleForFree(N0, LegalOperations) == 2)
+    return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
+                       GetNegatedExpression(N0, DAG, LegalOperations));
+
+  // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
+  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
+      N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
+                                   N0.getOperand(1), N1));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFSUB(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (fsub c1, c2) -> c1-c2
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
+  // fold (fsub A, 0) -> A
+  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+    return N0;
+  // fold (fsub 0, B) -> -B
+  if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {
+    if (isNegatibleForFree(N1, LegalOperations))
+      return GetNegatedExpression(N1, DAG, LegalOperations);
+    if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+      return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
+  }
+  // fold (fsub A, (fneg B)) -> (fadd A, B)
+  if (isNegatibleForFree(N1, LegalOperations))
+    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
+                       GetNegatedExpression(N1, DAG, LegalOperations));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFMUL(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (fmul c1, c2) -> c1*c2
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1);
+  // canonicalize constant to RHS
+  if (N0CFP && !N1CFP)
+    return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
+  // fold (fmul A, 0) -> 0
+  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
+    return N1;
+  // fold (fmul A, 0) -> 0, vector edition.
+  if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
+    return N1;
+  // fold (fmul X, 2.0) -> (fadd X, X)
+  if (N1CFP && N1CFP->isExactlyValue(+2.0))
+    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
+  // fold (fmul X, -1.0) -> (fneg X)
+  if (N1CFP && N1CFP->isExactlyValue(-1.0))
+    if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+      return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
+
+  // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+      // Both can be negated for free, check to see if at least one is cheaper
+      // negated.
+      if (LHSNeg == 2 || RHSNeg == 2)
+        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                           GetNegatedExpression(N0, DAG, LegalOperations),
+                           GetNegatedExpression(N1, DAG, LegalOperations));
+    }
+  }
+
+  // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
+  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
+      N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
+    return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
+                                   N0.getOperand(1), N1));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFDIV(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold vector ops
+  if (VT.isVector()) {
+    SDValue FoldedVOp = SimplifyVBinOp(N);
+    if (FoldedVOp.getNode()) return FoldedVOp;
+  }
+
+  // fold (fdiv c1, c2) -> c1/c2
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
+
+
+  // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
+  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
+    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
+      // Both can be negated for free, check to see if at least one is cheaper
+      // negated.
+      if (LHSNeg == 2 || RHSNeg == 2)
+        return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT,
+                           GetNegatedExpression(N0, DAG, LegalOperations),
+                           GetNegatedExpression(N1, DAG, LegalOperations));
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFREM(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  // fold (frem c1, c2) -> fmod(c1,c2)
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+  EVT VT = N->getValueType(0);
+
+  if (N0CFP && N1CFP && VT != MVT::ppcf128)  // Constant fold
+    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
+
+  if (N1CFP) {
+    const APFloat& V = N1CFP->getValueAPF();
+    // copysign(x, c1) -> fabs(x)       iff ispos(c1)
+    // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
+    if (!V.isNegative()) {
+      if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
+        return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+    } else {
+      if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+        return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
+                           DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0));
+    }
+  }
+
+  // copysign(fabs(x), y) -> copysign(x, y)
+  // copysign(fneg(x), y) -> copysign(x, y)
+  // copysign(copysign(x,z), y) -> copysign(x, y)
+  if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
+      N0.getOpcode() == ISD::FCOPYSIGN)
+    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+                       N0.getOperand(0), N1);
+
+  // copysign(x, abs(y)) -> abs(x)
+  if (N1.getOpcode() == ISD::FABS)
+    return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+
+  // copysign(x, copysign(y,z)) -> copysign(x, z)
+  if (N1.getOpcode() == ISD::FCOPYSIGN)
+    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+                       N0, N1.getOperand(1));
+
+  // copysign(x, fp_extend(y)) -> copysign(x, y)
+  // copysign(x, fp_round(y)) -> copysign(x, y)
+  if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
+    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+                       N0, N1.getOperand(0));
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  EVT VT = N->getValueType(0);
+  EVT OpVT = N0.getValueType();
+
+  // fold (sint_to_fp c1) -> c1fp
+  if (N0C && OpVT != MVT::ppcf128 &&
+      // ...but only if the target supports immediate floating-point values
+      (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+    return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+  // If the input is a legal type, and SINT_TO_FP is not legal on this target,
+  // but UINT_TO_FP is legal on this target, try to convert.
+  if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
+      TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
+    // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
+    if (DAG.SignBitIsZero(N0))
+      return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+  EVT VT = N->getValueType(0);
+  EVT OpVT = N0.getValueType();
+
+  // fold (uint_to_fp c1) -> c1fp
+  if (N0C && OpVT != MVT::ppcf128 &&
+      // ...but only if the target supports immediate floating-point values
+      (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+    return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
+
+  // If the input is a legal type, and UINT_TO_FP is not legal on this target,
+  // but SINT_TO_FP is legal on this target, try to convert.
+  if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
+      TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
+    // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
+    if (DAG.SignBitIsZero(N0))
+      return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (fp_to_sint c1fp) -> c1
+  if (N0CFP)
+    return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (fp_to_uint c1fp) -> c1
+  if (N0CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (fp_round c1fp) -> c1fp
+  if (N0CFP && N0.getValueType() != MVT::ppcf128)
+    return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1);
+
+  // fold (fp_round (fp_extend x)) -> x
+  if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
+    return N0.getOperand(0);
+
+  // fold (fp_round (fp_round x)) -> (fp_round x)
+  if (N0.getOpcode() == ISD::FP_ROUND) {
+    // This is a value preserving truncation if both round's are.
+    bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
+                   N0.getNode()->getConstantOperandVal(1) == 1;
+    return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0),
+                       DAG.getIntPtrConstant(IsTrunc));
+  }
+
+  // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
+  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
+    SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT,
+                              N0.getOperand(0), N1);
+    AddToWorkList(Tmp.getNode());
+    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
+                       Tmp, N0.getOperand(1));
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+
+  // fold (fp_round_inreg c1fp) -> c1fp
+  if (N0CFP && isTypeLegal(EVT)) {
+    SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
+    return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
+  if (N->hasOneUse() &&
+      N->use_begin()->getOpcode() == ISD::FP_ROUND)
+    return SDValue();
+
+  // fold (fp_extend c1fp) -> c1fp
+  if (N0CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0);
+
+  // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
+  // value of X.
+  if (N0.getOpcode() == ISD::FP_ROUND
+      && N0.getNode()->getConstantOperandVal(1) == 1) {
+    SDValue In = N0.getOperand(0);
+    if (In.getValueType() == VT) return In;
+    if (VT.bitsLT(In.getValueType()))
+      return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT,
+                         In, N0.getOperand(1));
+    return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In);
+  }
+
+  // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
+  if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() &&
+      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+       TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+    SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+                                     LN0->getChain(),
+                                     LN0->getBasePtr(), LN0->getPointerInfo(),
+                                     N0.getValueType(),
+                                     LN0->isVolatile(), LN0->isNonTemporal(),
+                                     LN0->getAlignment());
+    CombineTo(N, ExtLoad);
+    CombineTo(N0.getNode(),
+              DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
+                          N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
+              ExtLoad.getValue(1));
+    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFNEG(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  if (isNegatibleForFree(N0, LegalOperations))
+    return GetNegatedExpression(N0, DAG, LegalOperations);
+
+  // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
+  // constant pool values.
+  if (N0.getOpcode() == ISD::BITCAST &&
+      !VT.isVector() &&
+      N0.getNode()->hasOneUse() &&
+      N0.getOperand(0).getValueType().isInteger()) {
+    SDValue Int = N0.getOperand(0);
+    EVT IntVT = Int.getValueType();
+    if (IntVT.isInteger() && !IntVT.isVector()) {
+      Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
+              DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+      AddToWorkList(Int.getNode());
+      return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+                         VT, Int);
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitFABS(SDNode *N) {
+  SDValue N0 = N->getOperand(0);
+  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
+  EVT VT = N->getValueType(0);
+
+  // fold (fabs c1) -> fabs(c1)
+  if (N0CFP && VT != MVT::ppcf128)
+    return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
+  // fold (fabs (fabs x)) -> (fabs x)
+  if (N0.getOpcode() == ISD::FABS)
+    return N->getOperand(0);
+  // fold (fabs (fneg x)) -> (fabs x)
+  // fold (fabs (fcopysign x, y)) -> (fabs x)
+  if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
+    return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0));
+
+  // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
+  // constant pool values.
+  if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
+      N0.getOperand(0).getValueType().isInteger() &&
+      !N0.getOperand(0).getValueType().isVector()) {
+    SDValue Int = N0.getOperand(0);
+    EVT IntVT = Int.getValueType();
+    if (IntVT.isInteger() && !IntVT.isVector()) {
+      Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
+             DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
+      AddToWorkList(Int.getNode());
+      return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+                         N->getValueType(0), Int);
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitBRCOND(SDNode *N) {
+  SDValue Chain = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+
+  // If N is a constant we could fold this into a fallthrough or unconditional
+  // branch. However that doesn't happen very often in normal code, because
+  // Instcombine/SimplifyCFG should have handled the available opportunities.
+  // If we did this folding here, it would be necessary to update the
+  // MachineBasicBlock CFG, which is awkward.
+
+  // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
+  // on the target.
+  if (N1.getOpcode() == ISD::SETCC &&
+      TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) {
+    return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+                       Chain, N1.getOperand(2),
+                       N1.getOperand(0), N1.getOperand(1), N2);
+  }
+
+  if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
+      ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
+       (N1.getOperand(0).hasOneUse() &&
+        N1.getOperand(0).getOpcode() == ISD::SRL))) {
+    SDNode *Trunc = 0;
+    if (N1.getOpcode() == ISD::TRUNCATE) {
+      // Look pass the truncate.
+      Trunc = N1.getNode();
+      N1 = N1.getOperand(0);
+    }
+
+    // Match this pattern so that we can generate simpler code:
+    //
+    //   %a = ...
+    //   %b = and i32 %a, 2
+    //   %c = srl i32 %b, 1
+    //   brcond i32 %c ...
+    //
+    // into
+    //
+    //   %a = ...
+    //   %b = and i32 %a, 2
+    //   %c = setcc eq %b, 0
+    //   brcond %c ...
+    //
+    // This applies only when the AND constant value has one bit set and the
+    // SRL constant is equal to the log2 of the AND constant. The back-end is
+    // smart enough to convert the result into a TEST/JMP sequence.
+    SDValue Op0 = N1.getOperand(0);
+    SDValue Op1 = N1.getOperand(1);
+
+    if (Op0.getOpcode() == ISD::AND &&
+        Op1.getOpcode() == ISD::Constant) {
+      SDValue AndOp1 = Op0.getOperand(1);
+
+      if (AndOp1.getOpcode() == ISD::Constant) {
+        const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
+
+        if (AndConst.isPowerOf2() &&
+            cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
+          SDValue SetCC =
+            DAG.getSetCC(N->getDebugLoc(),
+                         TLI.getSetCCResultType(Op0.getValueType()),
+                         Op0, DAG.getConstant(0, Op0.getValueType()),
+                         ISD::SETNE);
+
+          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                                          MVT::Other, Chain, SetCC, N2);
+          // Don't add the new BRCond into the worklist or else SimplifySelectCC
+          // will convert it back to (X & C1) >> C2.
+          CombineTo(N, NewBRCond, false);
+          // Truncate is dead.
+          if (Trunc) {
+            removeFromWorkList(Trunc);
+            DAG.DeleteNode(Trunc);
+          }
+          // Replace the uses of SRL with SETCC
+          WorkListRemover DeadNodes(*this);
+          DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+          removeFromWorkList(N1.getNode());
+          DAG.DeleteNode(N1.getNode());
+          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+        }
+      }
+    }
+
+    if (Trunc)
+      // Restore N1 if the above transformation doesn't match.
+      N1 = N->getOperand(1);
+  }
+
+  // Transform br(xor(x, y)) -> br(x != y)
+  // Transform br(xor(xor(x,y), 1)) -> br (x == y)
+  if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
+    SDNode *TheXor = N1.getNode();
+    SDValue Op0 = TheXor->getOperand(0);
+    SDValue Op1 = TheXor->getOperand(1);
+    if (Op0.getOpcode() == Op1.getOpcode()) {
+      // Avoid missing important xor optimizations.
+      SDValue Tmp = visitXOR(TheXor);
+      if (Tmp.getNode() && Tmp.getNode() != TheXor) {
+        DEBUG(dbgs() << "\nReplacing.8 ";
+              TheXor->dump(&DAG);
+              dbgs() << "\nWith: ";
+              Tmp.getNode()->dump(&DAG);
+              dbgs() << '\n');
+        WorkListRemover DeadNodes(*this);
+        DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes);
+        removeFromWorkList(TheXor);
+        DAG.DeleteNode(TheXor);
+        return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                           MVT::Other, Chain, Tmp, N2);
+      }
+    }
+
+    if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
+      bool Equal = false;
+      if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
+        if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
+            Op0.getOpcode() == ISD::XOR) {
+          TheXor = Op0.getNode();
+          Equal = true;
+        }
+
+      EVT SetCCVT = N1.getValueType();
+      if (LegalTypes)
+        SetCCVT = TLI.getSetCCResultType(SetCCVT);
+      SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
+                                   SetCCVT,
+                                   Op0, Op1,
+                                   Equal ? ISD::SETEQ : ISD::SETNE);
+      // Replace the uses of XOR with SETCC
+      WorkListRemover DeadNodes(*this);
+      DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+      removeFromWorkList(N1.getNode());
+      DAG.DeleteNode(N1.getNode());
+      return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+                         MVT::Other, Chain, SetCC, N2);
+    }
+  }
+
+  return SDValue();
+}
+
+// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
+//
+SDValue DAGCombiner::visitBR_CC(SDNode *N) {
+  CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
+  SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+
+  // If N is a constant we could fold this into a fallthrough or unconditional
+  // branch. However that doesn't happen very often in normal code, because
+  // Instcombine/SimplifyCFG should have handled the available opportunities.
+  // If we did this folding here, it would be necessary to update the
+  // MachineBasicBlock CFG, which is awkward.
+
+  // Use SimplifySetCC to simplify SETCC's.
+  SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
+                               CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
+                               false);
+  if (Simp.getNode()) AddToWorkList(Simp.getNode());
+
+  // fold to a simpler setcc
+  if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
+    return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
+                       N->getOperand(0), Simp.getOperand(2),
+                       Simp.getOperand(0), Simp.getOperand(1),
+                       N->getOperand(4));
+
+  return SDValue();
+}
+
+/// CombineToPreIndexedLoadStore - Try turning a load / store into a
+/// pre-indexed load / store when the base pointer is an add or subtract
+/// and it has other uses besides the load / store. After the
+/// transformation, the new indexed load / store has effectively folded
+/// the add / subtract in and all of its other uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
+  if (!LegalOperations)
+    return false;
+
+  bool isLoad = true;
+  SDValue Ptr;
+  EVT VT;
+  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
+    if (LD->isIndexed())
+      return false;
+    VT = LD->getMemoryVT();
+    if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
+        !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
+      return false;
+    Ptr = LD->getBasePtr();
+  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
+    if (ST->isIndexed())
+      return false;
+    VT = ST->getMemoryVT();
+    if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
+        !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
+      return false;
+    Ptr = ST->getBasePtr();
+    isLoad = false;
+  } else {
+    return false;
+  }
+
+  // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
+  // out.  There is no reason to make this a preinc/predec.
+  if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
+      Ptr.getNode()->hasOneUse())
+    return false;
+
+  // Ask the target to do addressing mode selection.
+  SDValue BasePtr;
+  SDValue Offset;
+  ISD::MemIndexedMode AM = ISD::UNINDEXED;
+  if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
+    return false;
+  // Don't create a indexed load / store with zero offset.
+  if (isa<ConstantSDNode>(Offset) &&
+      cast<ConstantSDNode>(Offset)->isNullValue())
+    return false;
+
+  // Try turning it into a pre-indexed load / store except when:
+  // 1) The new base ptr is a frame index.
+  // 2) If N is a store and the new base ptr is either the same as or is a
+  //    predecessor of the value being stored.
+  // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
+  //    that would create a cycle.
+  // 4) All uses are load / store ops that use it as old base ptr.
+
+  // Check #1.  Preinc'ing a frame index would require copying the stack pointer
+  // (plus the implicit offset) to a register to preinc anyway.
+  if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+    return false;
+
+  // Check #2.
+  if (!isLoad) {
+    SDValue Val = cast<StoreSDNode>(N)->getValue();
+    if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
+      return false;
+  }
+
+  // Now check for #3 and #4.
+  bool RealUse = false;
+  for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+         E = Ptr.getNode()->use_end(); I != E; ++I) {
+    SDNode *Use = *I;
+    if (Use == N)
+      continue;
+    if (Use->isPredecessorOf(N))
+      return false;
+
+    if (!((Use->getOpcode() == ISD::LOAD &&
+           cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
+          (Use->getOpcode() == ISD::STORE &&
+           cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
+      RealUse = true;
+  }
+
+  if (!RealUse)
+    return false;
+
+  SDValue Result;
+  if (isLoad)
+    Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+                                BasePtr, Offset, AM);
+  else
+    Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+                                 BasePtr, Offset, AM);
+  ++PreIndexedNodes;
+  ++NodesCombined;
+  DEBUG(dbgs() << "\nReplacing.4 ";
+        N->dump(&DAG);
+        dbgs() << "\nWith: ";
+        Result.getNode()->dump(&DAG);
+        dbgs() << '\n');
+  WorkListRemover DeadNodes(*this);
+  if (isLoad) {
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
+                                  &DeadNodes);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
+                                  &DeadNodes);
+  } else {
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
+                                  &DeadNodes);
+  }
+
+  // Finally, since the node is now dead, remove it from the graph.
+  DAG.DeleteNode(N);
+
+  // Replace the uses of Ptr with uses of the updated base value.
+  DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0),
+                                &DeadNodes);
+  removeFromWorkList(Ptr.getNode());
+  DAG.DeleteNode(Ptr.getNode());
+
+  return true;
+}
+
+/// CombineToPostIndexedLoadStore - Try to combine a load / store with a
+/// add / sub of the base pointer node into a post-indexed load / store.
+/// The transformation folded the add / subtract into the new indexed
+/// load / store effectively and all of its uses are redirected to the
+/// new load / store.
+bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
+  if (!LegalOperations)
+    return false;
+
+  bool isLoad = true;
+  SDValue Ptr;
+  EVT VT;
+  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
+    if (LD->isIndexed())
+      return false;
+    VT = LD->getMemoryVT();
+    if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
+        !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
+      return false;
+    Ptr = LD->getBasePtr();
+  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
+    if (ST->isIndexed())
+      return false;
+    VT = ST->getMemoryVT();
+    if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
+        !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
+      return false;
+    Ptr = ST->getBasePtr();
+    isLoad = false;
+  } else {
+    return false;
+  }
+
+  if (Ptr.getNode()->hasOneUse())
+    return false;
+
+  for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
+         E = Ptr.getNode()->use_end(); I != E; ++I) {
+    SDNode *Op = *I;
+    if (Op == N ||
+        (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
+      continue;
+
+    SDValue BasePtr;
+    SDValue Offset;
+    ISD::MemIndexedMode AM = ISD::UNINDEXED;
+    if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
+      // Don't create a indexed load / store with zero offset.
+      if (isa<ConstantSDNode>(Offset) &&
+          cast<ConstantSDNode>(Offset)->isNullValue())
+        continue;
+
+      // Try turning it into a post-indexed load / store except when
+      // 1) All uses are load / store ops that use it as base ptr.
+      // 2) Op must be independent of N, i.e. Op is neither a predecessor
+      //    nor a successor of N. Otherwise, if Op is folded that would
+      //    create a cycle.
+
+      if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
+        continue;
+
+      // Check for #1.
+      bool TryNext = false;
+      for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
+             EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
+        SDNode *Use = *II;
+        if (Use == Ptr.getNode())
+          continue;
+
+        // If all the uses are load / store addresses, then don't do the
+        // transformation.
+        if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
+          bool RealUse = false;
+          for (SDNode::use_iterator III = Use->use_begin(),
+                 EEE = Use->use_end(); III != EEE; ++III) {
+            SDNode *UseUse = *III;
+            if (!((UseUse->getOpcode() == ISD::LOAD &&
+                   cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
+                  (UseUse->getOpcode() == ISD::STORE &&
+                   cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
+              RealUse = true;
+          }
+
+          if (!RealUse) {
+            TryNext = true;
+            break;
+          }
+        }
+      }
+
+      if (TryNext)
+        continue;
+
+      // Check for #2
+      if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
+        SDValue Result = isLoad
+          ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
+                               BasePtr, Offset, AM)
+          : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
+                                BasePtr, Offset, AM);
+        ++PostIndexedNodes;
+        ++NodesCombined;
+        DEBUG(dbgs() << "\nReplacing.5 ";
+              N->dump(&DAG);
+              dbgs() << "\nWith: ";
+              Result.getNode()->dump(&DAG);
+              dbgs() << '\n');
+        WorkListRemover DeadNodes(*this);
+        if (isLoad) {
+          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
+                                        &DeadNodes);
+          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
+                                        &DeadNodes);
+        } else {
+          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
+                                        &DeadNodes);
+        }
+
+        // Finally, since the node is now dead, remove it from the graph.
+        DAG.DeleteNode(N);
+
+        // Replace the uses of Use with uses of the updated base value.
+        DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
+                                      Result.getValue(isLoad ? 1 : 0),
+                                      &DeadNodes);
+        removeFromWorkList(Op);
+        DAG.DeleteNode(Op);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+SDValue DAGCombiner::visitLOAD(SDNode *N) {
+  LoadSDNode *LD  = cast<LoadSDNode>(N);
+  SDValue Chain = LD->getChain();
+  SDValue Ptr   = LD->getBasePtr();
+
+  // If load is not volatile and there are no uses of the loaded value (and
+  // the updated indexed value in case of indexed loads), change uses of the
+  // chain value into uses of the chain input (i.e. delete the dead load).
+  if (!LD->isVolatile()) {
+    if (N->getValueType(1) == MVT::Other) {
+      // Unindexed loads.
+      if (N->hasNUsesOfValue(0, 0)) {
+        // It's not safe to use the two value CombineTo variant here. e.g.
+        // v1, chain2 = load chain1, loc
+        // v2, chain3 = load chain2, loc
+        // v3         = add v2, c
+        // Now we replace use of chain2 with chain1.  This makes the second load
+        // isomorphic to the one we are deleting, and thus makes this load live.
+        DEBUG(dbgs() << "\nReplacing.6 ";
+              N->dump(&DAG);
+              dbgs() << "\nWith chain: ";
+              Chain.getNode()->dump(&DAG);
+              dbgs() << "\n");
+        WorkListRemover DeadNodes(*this);
+        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
+
+        if (N->use_empty()) {
+          removeFromWorkList(N);
+          DAG.DeleteNode(N);
+        }
+
+        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+      }
+    } else {
+      // Indexed loads.
+      assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
+      if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
+        SDValue Undef = DAG.getUNDEF(N->getValueType(0));
+        DEBUG(dbgs() << "\nReplacing.7 ";
+              N->dump(&DAG);
+              dbgs() << "\nWith: ";
+              Undef.getNode()->dump(&DAG);
+              dbgs() << " and 2 other values\n");
+        WorkListRemover DeadNodes(*this);
+        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
+        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
+                                      DAG.getUNDEF(N->getValueType(1)),
+                                      &DeadNodes);
+        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes);
+        removeFromWorkList(N);
+        DAG.DeleteNode(N);
+        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+      }
+    }
+  }
+
+  // If this load is directly stored, replace the load value with the stored
+  // value.
+  // TODO: Handle store large -> read small portion.
+  // TODO: Handle TRUNCSTORE/LOADEXT
+  if (LD->getExtensionType() == ISD::NON_EXTLOAD &&
+      !LD->isVolatile()) {
+    if (ISD::isNON_TRUNCStore(Chain.getNode())) {
+      StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
+      if (PrevST->getBasePtr() == Ptr &&
+          PrevST->getValue().getValueType() == N->getValueType(0))
+      return CombineTo(N, Chain.getOperand(1), Chain);
+    }
+  }
+
+  // Try to infer better alignment information than the load already has.
+  if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+      if (Align > LD->getAlignment())
+        return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+                              LD->getValueType(0),
+                              Chain, Ptr, LD->getPointerInfo(),
+                              LD->getMemoryVT(),
+                              LD->isVolatile(), LD->isNonTemporal(), Align);
+    }
+  }
+
+  if (CombinerAA) {
+    // Walk up chain skipping non-aliasing memory nodes.
+    SDValue BetterChain = FindBetterChain(N, Chain);
+
+    // If there is a better chain.
+    if (Chain != BetterChain) {
+      SDValue ReplLoad;
+
+      // Replace the chain to void dependency.
+      if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
+        ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
+                               BetterChain, Ptr, LD->getPointerInfo(),
+                               LD->isVolatile(), LD->isNonTemporal(),
+                               LD->getAlignment());
+      } else {
+        ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
+                                  LD->getValueType(0),
+                                  BetterChain, Ptr, LD->getPointerInfo(),
+                                  LD->getMemoryVT(),
+                                  LD->isVolatile(),
+                                  LD->isNonTemporal(),
+                                  LD->getAlignment());
+      }
+
+      // Create token factor to keep old chain connected.
+      SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+                                  MVT::Other, Chain, ReplLoad.getValue(1));
+
+      // Make sure the new and old chains are cleaned up.
+      AddToWorkList(Token.getNode());
+
+      // Replace uses with load result and token factor. Don't add users
+      // to work list.
+      return CombineTo(N, ReplLoad.getValue(0), Token, false);
+    }
+  }
+
+  // Try transforming N to an indexed load.
+  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
+/// load is having specific bytes cleared out.  If so, return the byte size
+/// being masked out and the shift amount.
+static std::pair<unsigned, unsigned>
+CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
+  std::pair<unsigned, unsigned> Result(0, 0);
+
+  // Check for the structure we're looking for.
+  if (V->getOpcode() != ISD::AND ||
+      !isa<ConstantSDNode>(V->getOperand(1)) ||
+      !ISD::isNormalLoad(V->getOperand(0).getNode()))
+    return Result;
+
+  // Check the chain and pointer.
+  LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
+  if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
+
+  // The store should be chained directly to the load or be an operand of a
+  // tokenfactor.
+  if (LD == Chain.getNode())
+    ; // ok.
+  else if (Chain->getOpcode() != ISD::TokenFactor)
+    return Result; // Fail.
+  else {
+    bool isOk = false;
+    for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
+      if (Chain->getOperand(i).getNode() == LD) {
+        isOk = true;
+        break;
+      }
+    if (!isOk) return Result;
+  }
+
+  // This only handles simple types.
+  if (V.getValueType() != MVT::i16 &&
+      V.getValueType() != MVT::i32 &&
+      V.getValueType() != MVT::i64)
+    return Result;
+
+  // Check the constant mask.  Invert it so that the bits being masked out are
+  // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
+  // follow the sign bit for uniformity.
+  uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
+  unsigned NotMaskLZ = CountLeadingZeros_64(NotMask);
+  if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
+  unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
+  if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
+  if (NotMaskLZ == 64) return Result;  // All zero mask.
+
+  // See if we have a continuous run of bits.  If so, we have 0*1+0*
+  if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
+    return Result;
+
+  // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
+  if (V.getValueType() != MVT::i64 && NotMaskLZ)
+    NotMaskLZ -= 64-V.getValueSizeInBits();
+
+  unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
+  switch (MaskedBytes) {
+  case 1:
+  case 2:
+  case 4: break;
+  default: return Result; // All one mask, or 5-byte mask.
+  }
+
+  // Verify that the first bit starts at a multiple of mask so that the access
+  // is aligned the same as the access width.
+  if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
+
+  Result.first = MaskedBytes;
+  Result.second = NotMaskTZ/8;
+  return Result;
+}
+
+
+/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
+/// provides a value as specified by MaskInfo.  If so, replace the specified
+/// store with a narrower store of truncated IVal.
+static SDNode *
+ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
+                                SDValue IVal, StoreSDNode *St,
+                                DAGCombiner *DC) {
+  unsigned NumBytes = MaskInfo.first;
+  unsigned ByteShift = MaskInfo.second;
+  SelectionDAG &DAG = DC->getDAG();
+
+  // Check to see if IVal is all zeros in the part being masked in by the 'or'
+  // that uses this.  If not, this is not a replacement.
+  APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
+                                  ByteShift*8, (ByteShift+NumBytes)*8);
+  if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
+
+  // Check that it is legal on the target to do this.  It is legal if the new
+  // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
+  // legalization.
+  MVT VT = MVT::getIntegerVT(NumBytes*8);
+  if (!DC->isTypeLegal(VT))
+    return 0;
+
+  // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
+  // shifted by ByteShift and truncated down to NumBytes.
+  if (ByteShift)
+    IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal,
+                       DAG.getConstant(ByteShift*8,
+                                    DC->getShiftAmountTy(IVal.getValueType())));
+
+  // Figure out the offset for the store and the alignment of the access.
+  unsigned StOffset;
+  unsigned NewAlign = St->getAlignment();
+
+  if (DAG.getTargetLoweringInfo().isLittleEndian())
+    StOffset = ByteShift;
+  else
+    StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
+
+  SDValue Ptr = St->getBasePtr();
+  if (StOffset) {
+    Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
+                      Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
+    NewAlign = MinAlign(NewAlign, StOffset);
+  }
+
+  // Truncate down to the new size.
+  IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
+
+  ++OpsNarrowed;
+  return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
+                      St->getPointerInfo().getWithOffset(StOffset),
+                      false, false, NewAlign).getNode();
+}
+
+
+/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
+/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
+/// of the loaded bits, try narrowing the load and store if it would end up
+/// being a win for performance or code size.
+SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
+  StoreSDNode *ST  = cast<StoreSDNode>(N);
+  if (ST->isVolatile())
+    return SDValue();
+
+  SDValue Chain = ST->getChain();
+  SDValue Value = ST->getValue();
+  SDValue Ptr   = ST->getBasePtr();
+  EVT VT = Value.getValueType();
+
+  if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
+    return SDValue();
+
+  unsigned Opc = Value.getOpcode();
+
+  // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
+  // is a byte mask indicating a consecutive number of bytes, check to see if
+  // Y is known to provide just those bytes.  If so, we try to replace the
+  // load + replace + store sequence with a single (narrower) store, which makes
+  // the load dead.
+  if (Opc == ISD::OR) {
+    std::pair<unsigned, unsigned> MaskedLoad;
+    MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
+    if (MaskedLoad.first)
+      if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+                                                  Value.getOperand(1), ST,this))
+        return SDValue(NewST, 0);
+
+    // Or is commutative, so try swapping X and Y.
+    MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
+    if (MaskedLoad.first)
+      if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+                                                  Value.getOperand(0), ST,this))
+        return SDValue(NewST, 0);
+  }
+
+  if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
+      Value.getOperand(1).getOpcode() != ISD::Constant)
+    return SDValue();
+
+  SDValue N0 = Value.getOperand(0);
+  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
+      Chain == SDValue(N0.getNode(), 1)) {
+    LoadSDNode *LD = cast<LoadSDNode>(N0);
+    if (LD->getBasePtr() != Ptr ||
+        LD->getPointerInfo().getAddrSpace() !=
+        ST->getPointerInfo().getAddrSpace())
+      return SDValue();
+
+    // Find the type to narrow it the load / op / store to.
+    SDValue N1 = Value.getOperand(1);
+    unsigned BitWidth = N1.getValueSizeInBits();
+    APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
+    if (Opc == ISD::AND)
+      Imm ^= APInt::getAllOnesValue(BitWidth);
+    if (Imm == 0 || Imm.isAllOnesValue())
+      return SDValue();
+    unsigned ShAmt = Imm.countTrailingZeros();
+    unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
+    unsigned NewBW = NextPowerOf2(MSB - ShAmt);
+    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+    while (NewBW < BitWidth &&
+           !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
+             TLI.isNarrowingProfitable(VT, NewVT))) {
+      NewBW = NextPowerOf2(NewBW);
+      NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
+    }
+    if (NewBW >= BitWidth)
+      return SDValue();
+
+    // If the lsb changed does not start at the type bitwidth boundary,
+    // start at the previous one.
+    if (ShAmt % NewBW)
+      ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
+    APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW);
+    if ((Imm & Mask) == Imm) {
+      APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
+      if (Opc == ISD::AND)
+        NewImm ^= APInt::getAllOnesValue(NewBW);
+      uint64_t PtrOff = ShAmt / 8;
+      // For big endian targets, we need to adjust the offset to the pointer to
+      // load the correct bytes.
+      if (TLI.isBigEndian())
+        PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
+
+      unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
+      const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
+      if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy))
+        return SDValue();
+
+      SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
+                                   Ptr.getValueType(), Ptr,
+                                   DAG.getConstant(PtrOff, Ptr.getValueType()));
+      SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
+                                  LD->getChain(), NewPtr,
+                                  LD->getPointerInfo().getWithOffset(PtrOff),
+                                  LD->isVolatile(), LD->isNonTemporal(),
+                                  NewAlign);
+      SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
+                                   DAG.getConstant(NewImm, NewVT));
+      SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
+                                   NewVal, NewPtr,
+                                   ST->getPointerInfo().getWithOffset(PtrOff),
+                                   false, false, NewAlign);
+
+      AddToWorkList(NewPtr.getNode());
+      AddToWorkList(NewLD.getNode());
+      AddToWorkList(NewVal.getNode());
+      WorkListRemover DeadNodes(*this);
+      DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1),
+                                    &DeadNodes);
+      ++OpsNarrowed;
+      return NewST;
+    }
+  }
+
+  return SDValue();
+}
+
+/// TransformFPLoadStorePair - For a given floating point load / store pair,
+/// if the load value isn't used by any other operations, then consider
+/// transforming the pair to integer load / store operations if the target
+/// deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+  StoreSDNode *ST  = cast<StoreSDNode>(N);
+  SDValue Chain = ST->getChain();
+  SDValue Value = ST->getValue();
+  if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+      Value.hasOneUse() &&
+      Chain == SDValue(Value.getNode(), 1)) {
+    LoadSDNode *LD = cast<LoadSDNode>(Value);
+    EVT VT = LD->getMemoryVT();
+    if (!VT.isFloatingPoint() ||
+        VT != ST->getMemoryVT() ||
+        LD->isNonTemporal() ||
+        ST->isNonTemporal() ||
+        LD->getPointerInfo().getAddrSpace() != 0 ||
+        ST->getPointerInfo().getAddrSpace() != 0)
+      return SDValue();
+
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+    if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+        !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+        !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+        !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+      return SDValue();
+
+    unsigned LDAlign = LD->getAlignment();
+    unsigned STAlign = ST->getAlignment();
+    const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+    unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
+    if (LDAlign < ABIAlign || STAlign < ABIAlign)
+      return SDValue();
+
+    SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
+                                LD->getChain(), LD->getBasePtr(),
+                                LD->getPointerInfo(),
+                                false, false, LDAlign);
+
+    SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
+                                 NewLD, ST->getBasePtr(),
+                                 ST->getPointerInfo(),
+                                 false, false, STAlign);
+
+    AddToWorkList(NewLD.getNode());
+    AddToWorkList(NewST.getNode());
+    WorkListRemover DeadNodes(*this);
+    DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1),
+                                  &DeadNodes);
+    ++LdStFP2Int;
+    return NewST;
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitSTORE(SDNode *N) {
+  StoreSDNode *ST  = cast<StoreSDNode>(N);
+  SDValue Chain = ST->getChain();
+  SDValue Value = ST->getValue();
+  SDValue Ptr   = ST->getBasePtr();
+
+  // If this is a store of a bit convert, store the input value if the
+  // resultant store does not need a higher alignment than the original.
+  if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
+      ST->isUnindexed()) {
+    unsigned OrigAlign = ST->getAlignment();
+    EVT SVT = Value.getOperand(0).getValueType();
+    unsigned Align = TLI.getTargetData()->
+      getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
+    if (Align <= OrigAlign &&
+        ((!LegalOperations && !ST->isVolatile()) ||
+         TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
+      return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+                          Ptr, ST->getPointerInfo(), ST->isVolatile(),
+                          ST->isNonTemporal(), OrigAlign);
+  }
+
+  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
+    // NOTE: If the original store is volatile, this transform must not increase
+    // the number of stores.  For example, on x86-32 an f64 can be stored in one
+    // processor operation but an i64 (which is not legal) requires two.  So the
+    // transform should not be done in this case.
+    if (Value.getOpcode() != ISD::TargetConstantFP) {
+      SDValue Tmp;
+      switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unknown FP type");
+      case MVT::f80:    // We don't do this for these yet.
+      case MVT::f128:
+      case MVT::ppcf128:
+        break;
+      case MVT::f32:
+        if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
+            TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+          Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
+                              bitcastToAPInt().getZExtValue(), MVT::i32);
+          return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+                              Ptr, ST->getPointerInfo(), ST->isVolatile(),
+                              ST->isNonTemporal(), ST->getAlignment());
+        }
+        break;
+      case MVT::f64:
+        if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
+             !ST->isVolatile()) ||
+            TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
+          Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+                                getZExtValue(), MVT::i64);
+          return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
+                              Ptr, ST->getPointerInfo(), ST->isVolatile(),
+                              ST->isNonTemporal(), ST->getAlignment());
+        } else if (!ST->isVolatile() &&
+                   TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
+          // Many FP stores are not made apparent until after legalize, e.g. for
+          // argument passing.  Since this is so common, custom legalize the
+          // 64-bit integer store into two 32-bit stores.
+          uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
+          SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
+          SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
+          if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+          unsigned Alignment = ST->getAlignment();
+          bool isVolatile = ST->isVolatile();
+          bool isNonTemporal = ST->isNonTemporal();
+
+          SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
+                                     Ptr, ST->getPointerInfo(),
+                                     isVolatile, isNonTemporal,
+                                     ST->getAlignment());
+          Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
+                            DAG.getConstant(4, Ptr.getValueType()));
+          Alignment = MinAlign(Alignment, 4U);
+          SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
+                                     Ptr, ST->getPointerInfo().getWithOffset(4),
+                                     isVolatile, isNonTemporal,
+                                     Alignment);
+          return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+                             St0, St1);
+        }
+
+        break;
+      }
+    }
+  }
+
+  // Try to infer better alignment information than the store already has.
+  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+      if (Align > ST->getAlignment())
+        return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
+                                 Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
+                                 ST->isVolatile(), ST->isNonTemporal(), Align);
+    }
+  }
+
+  // Try transforming a pair floating point load / store ops to integer
+  // load / store ops.
+  SDValue NewST = TransformFPLoadStorePair(N);
+  if (NewST.getNode())
+    return NewST;
+
+  if (CombinerAA) {
+    // Walk up chain skipping non-aliasing memory nodes.
+    SDValue BetterChain = FindBetterChain(N, Chain);
+
+    // If there is a better chain.
+    if (Chain != BetterChain) {
+      SDValue ReplStore;
+
+      // Replace the chain to avoid dependency.
+      if (ST->isTruncatingStore()) {
+        ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+                                      ST->getPointerInfo(),
+                                      ST->getMemoryVT(), ST->isVolatile(),
+                                      ST->isNonTemporal(), ST->getAlignment());
+      } else {
+        ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
+                                 ST->getPointerInfo(),
+                                 ST->isVolatile(), ST->isNonTemporal(),
+                                 ST->getAlignment());
+      }
+
+      // Create token to keep both nodes around.
+      SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
+                                  MVT::Other, Chain, ReplStore);
+
+      // Make sure the new and old chains are cleaned up.
+      AddToWorkList(Token.getNode());
+
+      // Don't add users to work list.
+      return CombineTo(N, Token, false);
+    }
+  }
+
+  // Try transforming N to an indexed store.
+  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
+    return SDValue(N, 0);
+
+  // FIXME: is there such a thing as a truncating indexed store?
+  if (ST->isTruncatingStore() && ST->isUnindexed() &&
+      Value.getValueType().isInteger()) {
+    // See if we can simplify the input to this truncstore with knowledge that
+    // only the low bits are being used.  For example:
+    // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
+    SDValue Shorter =
+      GetDemandedBits(Value,
+                      APInt::getLowBitsSet(Value.getValueSizeInBits(),
+                                           ST->getMemoryVT().getSizeInBits()));
+    AddToWorkList(Value.getNode());
+    if (Shorter.getNode())
+      return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
+                               Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
+                               ST->isVolatile(), ST->isNonTemporal(),
+                               ST->getAlignment());
+
+    // Otherwise, see if we can simplify the operation with
+    // SimplifyDemandedBits, which only works if the value has a single use.
+    if (SimplifyDemandedBits(Value,
+                        APInt::getLowBitsSet(
+                          Value.getValueType().getScalarType().getSizeInBits(),
+                          ST->getMemoryVT().getScalarType().getSizeInBits())))
+      return SDValue(N, 0);
+  }
+
+  // If this is a load followed by a store to the same location, then the store
+  // is dead/noop.
+  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
+    if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
+        ST->isUnindexed() && !ST->isVolatile() &&
+        // There can't be any side effects between the load and store, such as
+        // a call or store.
+        Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
+      // The store is dead, remove it.
+      return Chain;
+    }
+  }
+
+  // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
+  // truncating store.  We can do this even if this is already a truncstore.
+  if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
+      && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
+      TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
+                            ST->getMemoryVT())) {
+    return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
+                             Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
+                             ST->isVolatile(), ST->isNonTemporal(),
+                             ST->getAlignment());
+  }
+
+  return ReduceLoadOpStoreWidth(N);
+}
+
+SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
+  SDValue InVec = N->getOperand(0);
+  SDValue InVal = N->getOperand(1);
+  SDValue EltNo = N->getOperand(2);
+
+  // If the inserted element is an UNDEF, just use the input vector.
+  if (InVal.getOpcode() == ISD::UNDEF)
+    return InVec;
+
+  EVT VT = InVec.getValueType();
+
+  // If we can't generate a legal BUILD_VECTOR, exit
+  if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+    return SDValue();
+
+  // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
+  // vector with the inserted element.
+  if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
+    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(),
+                                InVec.getNode()->op_end());
+    if (Elt < Ops.size())
+      Ops[Elt] = InVal;
+    return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                       VT, &Ops[0], Ops.size());
+  }
+  // If the invec is an UNDEF and if EltNo is a constant, create a new
+  // BUILD_VECTOR with undef elements and the inserted element.
+  if (InVec.getOpcode() == ISD::UNDEF &&
+      isa<ConstantSDNode>(EltNo)) {
+    EVT EltVT = VT.getVectorElementType();
+    unsigned NElts = VT.getVectorNumElements();
+    SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
+
+    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    if (Elt < Ops.size())
+      Ops[Elt] = InVal;
+    return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                       VT, &Ops[0], Ops.size());
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
+  // (vextract (scalar_to_vector val, 0) -> val
+  SDValue InVec = N->getOperand(0);
+
+ if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+   // Check if the result type doesn't match the inserted element type. A
+   // SCALAR_TO_VECTOR may truncate the inserted element and the
+   // EXTRACT_VECTOR_ELT may widen the extracted vector.
+   SDValue InOp = InVec.getOperand(0);
+   EVT NVT = N->getValueType(0);
+   if (InOp.getValueType() != NVT) {
+     assert(InOp.getValueType().isInteger() && NVT.isInteger());
+     return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
+   }
+   return InOp;
+ }
+
+  // Perform only after legalization to ensure build_vector / vector_shuffle
+  // optimizations have already been done.
+  if (!LegalOperations) return SDValue();
+
+  // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
+  // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
+  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
+  SDValue EltNo = N->getOperand(1);
+
+  if (isa<ConstantSDNode>(EltNo)) {
+    int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+    bool NewLoad = false;
+    bool BCNumEltsChanged = false;
+    EVT VT = InVec.getValueType();
+    EVT ExtVT = VT.getVectorElementType();
+    EVT LVT = ExtVT;
+
+    if (InVec.getOpcode() == ISD::BITCAST) {
+      EVT BCVT = InVec.getOperand(0).getValueType();
+      if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
+        return SDValue();
+      if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
+        BCNumEltsChanged = true;
+      InVec = InVec.getOperand(0);
+      ExtVT = BCVT.getVectorElementType();
+      NewLoad = true;
+    }
+
+    LoadSDNode *LN0 = NULL;
+    const ShuffleVectorSDNode *SVN = NULL;
+    if (ISD::isNormalLoad(InVec.getNode())) {
+      LN0 = cast<LoadSDNode>(InVec);
+    } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+               InVec.getOperand(0).getValueType() == ExtVT &&
+               ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
+      LN0 = cast<LoadSDNode>(InVec.getOperand(0));
+    } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
+      // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
+      // =>
+      // (load $addr+1*size)
+
+      // If the bit convert changed the number of elements, it is unsafe
+      // to examine the mask.
+      if (BCNumEltsChanged)
+        return SDValue();
+
+      // Select the input vector, guarding against out of range extract vector.
+      unsigned NumElems = VT.getVectorNumElements();
+      int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
+      InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
+
+      if (InVec.getOpcode() == ISD::BITCAST)
+        InVec = InVec.getOperand(0);
+      if (ISD::isNormalLoad(InVec.getNode())) {
+        LN0 = cast<LoadSDNode>(InVec);
+        Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
+      }
+    }
+
+    if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())
+      return SDValue();
+
+    // If Idx was -1 above, Elt is going to be -1, so just return undef.
+    if (Elt == -1)
+      return DAG.getUNDEF(LN0->getBasePtr().getValueType());
+
+    unsigned Align = LN0->getAlignment();
+    if (NewLoad) {
+      // Check the resultant load doesn't need a higher alignment than the
+      // original load.
+      unsigned NewAlign =
+        TLI.getTargetData()
+            ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
+
+      if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
+        return SDValue();
+
+      Align = NewAlign;
+    }
+
+    SDValue NewPtr = LN0->getBasePtr();
+    unsigned PtrOff = 0;
+
+    if (Elt) {
+      PtrOff = LVT.getSizeInBits() * Elt / 8;
+      EVT PtrType = NewPtr.getValueType();
+      if (TLI.isBigEndian())
+        PtrOff = VT.getSizeInBits() / 8 - PtrOff;
+      NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr,
+                           DAG.getConstant(PtrOff, PtrType));
+    }
+
+    return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
+                       LN0->getPointerInfo().getWithOffset(PtrOff),
+                       LN0->isVolatile(), LN0->isNonTemporal(), Align);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+  unsigned NumInScalars = N->getNumOperands();
+  EVT VT = N->getValueType(0);
+
+  // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+  // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
+  // at most two distinct vectors, turn this into a shuffle node.
+  SDValue VecIn1, VecIn2;
+  for (unsigned i = 0; i != NumInScalars; ++i) {
+    // Ignore undef inputs.
+    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+    // If this input is something other than a EXTRACT_VECTOR_ELT with a
+    // constant index, bail out.
+    if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
+        !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
+      VecIn1 = VecIn2 = SDValue(0, 0);
+      break;
+    }
+
+    // If the input vector type disagrees with the result of the build_vector,
+    // we can't make a shuffle.
+    SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
+    if (ExtractedFromVec.getValueType() != VT) {
+      VecIn1 = VecIn2 = SDValue(0, 0);
+      break;
+    }
+
+    // Otherwise, remember this.  We allow up to two distinct input vectors.
+    if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
+      continue;
+
+    if (VecIn1.getNode() == 0) {
+      VecIn1 = ExtractedFromVec;
+    } else if (VecIn2.getNode() == 0) {
+      VecIn2 = ExtractedFromVec;
+    } else {
+      // Too many inputs.
+      VecIn1 = VecIn2 = SDValue(0, 0);
+      break;
+    }
+  }
+
+  // If everything is good, we can make a shuffle operation.
+  if (VecIn1.getNode()) {
+    SmallVector<int, 8> Mask;
+    for (unsigned i = 0; i != NumInScalars; ++i) {
+      if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
+        Mask.push_back(-1);
+        continue;
+      }
+
+      // If extracting from the first vector, just use the index directly.
+      SDValue Extract = N->getOperand(i);
+      SDValue ExtVal = Extract.getOperand(1);
+      if (Extract.getOperand(0) == VecIn1) {
+        unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+        if (ExtIndex > VT.getVectorNumElements())
+          return SDValue();
+
+        Mask.push_back(ExtIndex);
+        continue;
+      }
+
+      // Otherwise, use InIdx + VecSize
+      unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
+      Mask.push_back(Idx+NumInScalars);
+    }
+
+    // Add count and size info.
+    if (!isTypeLegal(VT))
+      return SDValue();
+
+    // Return the new VECTOR_SHUFFLE node.
+    SDValue Ops[2];
+    Ops[0] = VecIn1;
+    Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+    return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
+  // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
+  // EXTRACT_SUBVECTOR operations.  If so, and if the EXTRACT_SUBVECTOR vector
+  // inputs come from at most two distinct vectors, turn this into a shuffle
+  // node.
+
+  // If we only have one input vector, we don't need to do any concatenation.
+  if (N->getNumOperands() == 1)
+    return N->getOperand(0);
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  unsigned NumElts = VT.getVectorNumElements();
+
+  SDValue N0 = N->getOperand(0);
+
+  assert(N0.getValueType().getVectorNumElements() == NumElts &&
+        "Vector shuffle must be normalized in DAG");
+
+  // FIXME: implement canonicalizations from DAG.getVectorShuffle()
+
+  // If it is a splat, check if the argument vector is another splat or a
+  // build_vector with all scalar elements the same.
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+  if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
+    SDNode *V = N0.getNode();
+
+    // If this is a bit convert that changes the element type of the vector but
+    // not the number of vector elements, look through it.  Be careful not to
+    // look though conversions that change things like v4f32 to v2f64.
+    if (V->getOpcode() == ISD::BITCAST) {
+      SDValue ConvInput = V->getOperand(0);
+      if (ConvInput.getValueType().isVector() &&
+          ConvInput.getValueType().getVectorNumElements() == NumElts)
+        V = ConvInput.getNode();
+    }
+
+    if (V->getOpcode() == ISD::BUILD_VECTOR) {
+      assert(V->getNumOperands() == NumElts &&
+             "BUILD_VECTOR has wrong number of operands");
+      SDValue Base;
+      bool AllSame = true;
+      for (unsigned i = 0; i != NumElts; ++i) {
+        if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+          Base = V->getOperand(i);
+          break;
+        }
+      }
+      // Splat of <u, u, u, u>, return <u, u, u, u>
+      if (!Base.getNode())
+        return N0;
+      for (unsigned i = 0; i != NumElts; ++i) {
+        if (V->getOperand(i) != Base) {
+          AllSame = false;
+          break;
+        }
+      }
+      // Splat of <x, x, x, x>, return <x, x, x, x>
+      if (AllSame)
+        return N0;
+    }
+  }
+  return SDValue();
+}
+
+SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) {
+  if (!TLI.getShouldFoldAtomicFences())
+    return SDValue();
+
+  SDValue atomic = N->getOperand(0);
+  switch (atomic.getOpcode()) {
+    case ISD::ATOMIC_CMP_SWAP:
+    case ISD::ATOMIC_SWAP:
+    case ISD::ATOMIC_LOAD_ADD:
+    case ISD::ATOMIC_LOAD_SUB:
+    case ISD::ATOMIC_LOAD_AND:
+    case ISD::ATOMIC_LOAD_OR:
+    case ISD::ATOMIC_LOAD_XOR:
+    case ISD::ATOMIC_LOAD_NAND:
+    case ISD::ATOMIC_LOAD_MIN:
+    case ISD::ATOMIC_LOAD_MAX:
+    case ISD::ATOMIC_LOAD_UMIN:
+    case ISD::ATOMIC_LOAD_UMAX:
+      break;
+    default:
+      return SDValue();
+  }
+
+  SDValue fence = atomic.getOperand(0);
+  if (fence.getOpcode() != ISD::MEMBARRIER)
+    return SDValue();
+
+  switch (atomic.getOpcode()) {
+    case ISD::ATOMIC_CMP_SWAP:
+      return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
+                                    fence.getOperand(0),
+                                    atomic.getOperand(1), atomic.getOperand(2),
+                                    atomic.getOperand(3)), atomic.getResNo());
+    case ISD::ATOMIC_SWAP:
+    case ISD::ATOMIC_LOAD_ADD:
+    case ISD::ATOMIC_LOAD_SUB:
+    case ISD::ATOMIC_LOAD_AND:
+    case ISD::ATOMIC_LOAD_OR:
+    case ISD::ATOMIC_LOAD_XOR:
+    case ISD::ATOMIC_LOAD_NAND:
+    case ISD::ATOMIC_LOAD_MIN:
+    case ISD::ATOMIC_LOAD_MAX:
+    case ISD::ATOMIC_LOAD_UMIN:
+    case ISD::ATOMIC_LOAD_UMAX:
+      return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
+                                    fence.getOperand(0),
+                                    atomic.getOperand(1), atomic.getOperand(2)),
+                     atomic.getResNo());
+    default:
+      return SDValue();
+  }
+}
+
+/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
+/// an AND to a vector_shuffle with the destination vector and a zero vector.
+/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
+///      vector_shuffle V, Zero, <0, 4, 2, 4>
+SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  if (N->getOpcode() == ISD::AND) {
+    if (RHS.getOpcode() == ISD::BITCAST)
+      RHS = RHS.getOperand(0);
+    if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
+      SmallVector<int, 8> Indices;
+      unsigned NumElts = RHS.getNumOperands();
+      for (unsigned i = 0; i != NumElts; ++i) {
+        SDValue Elt = RHS.getOperand(i);
+        if (!isa<ConstantSDNode>(Elt))
+          return SDValue();
+        else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
+          Indices.push_back(i);
+        else if (cast<ConstantSDNode>(Elt)->isNullValue())
+          Indices.push_back(NumElts);
+        else
+          return SDValue();
+      }
+
+      // Let's see if the target supports this vector_shuffle.
+      EVT RVT = RHS.getValueType();
+      if (!TLI.isVectorClearMaskLegal(Indices, RVT))
+        return SDValue();
+
+      // Return the new VECTOR_SHUFFLE node.
+      EVT EltVT = RVT.getVectorElementType();
+      SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
+                                     DAG.getConstant(0, EltVT));
+      SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                                 RVT, &ZeroOps[0], ZeroOps.size());
+      LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
+      SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
+      return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
+    }
+  }
+
+  return SDValue();
+}
+
+/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
+SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
+  // After legalize, the target may be depending on adds and other
+  // binary ops to provide legal ways to construct constants or other
+  // things. Simplifying them may result in a loss of legality.
+  if (LegalOperations) return SDValue();
+
+  assert(N->getValueType(0).isVector() &&
+         "SimplifyVBinOp only works on vectors!");
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  SDValue Shuffle = XformToShuffleWithZero(N);
+  if (Shuffle.getNode()) return Shuffle;
+
+  // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
+  // this operation.
+  if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
+      RHS.getOpcode() == ISD::BUILD_VECTOR) {
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+      SDValue LHSOp = LHS.getOperand(i);
+      SDValue RHSOp = RHS.getOperand(i);
+      // If these two elements can't be folded, bail out.
+      if ((LHSOp.getOpcode() != ISD::UNDEF &&
+           LHSOp.getOpcode() != ISD::Constant &&
+           LHSOp.getOpcode() != ISD::ConstantFP) ||
+          (RHSOp.getOpcode() != ISD::UNDEF &&
+           RHSOp.getOpcode() != ISD::Constant &&
+           RHSOp.getOpcode() != ISD::ConstantFP))
+        break;
+
+      // Can't fold divide by zero.
+      if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
+          N->getOpcode() == ISD::FDIV) {
+        if ((RHSOp.getOpcode() == ISD::Constant &&
+             cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
+            (RHSOp.getOpcode() == ISD::ConstantFP &&
+             cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
+          break;
+      }
+
+      EVT VT = LHSOp.getValueType();
+      assert(RHSOp.getValueType() == VT &&
+             "SimplifyVBinOp with different BUILD_VECTOR element types");
+      SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,
+                                   LHSOp, RHSOp);
+      if (FoldOp.getOpcode() != ISD::UNDEF &&
+          FoldOp.getOpcode() != ISD::Constant &&
+          FoldOp.getOpcode() != ISD::ConstantFP)
+        break;
+      Ops.push_back(FoldOp);
+      AddToWorkList(FoldOp.getNode());
+    }
+
+    if (Ops.size() == LHS.getNumOperands())
+      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                         LHS.getValueType(), &Ops[0], Ops.size());
+  }
+
+  return SDValue();
+}
+
+SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
+                                    SDValue N1, SDValue N2){
+  assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
+
+  SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
+                                 cast<CondCodeSDNode>(N0.getOperand(2))->get());
+
+  // If we got a simplified select_cc node back from SimplifySelectCC, then
+  // break it down into a new SETCC node, and a new SELECT node, and then return
+  // the SELECT node, since we were called with a SELECT node.
+  if (SCC.getNode()) {
+    // Check to see if we got a select_cc back (to turn into setcc/select).
+    // Otherwise, just return whatever node we got back, like fabs.
+    if (SCC.getOpcode() == ISD::SELECT_CC) {
+      SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(),
+                                  N0.getValueType(),
+                                  SCC.getOperand(0), SCC.getOperand(1),
+                                  SCC.getOperand(4));
+      AddToWorkList(SETCC.getNode());
+      return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(),
+                         SCC.getOperand(2), SCC.getOperand(3), SETCC);
+    }
+
+    return SCC;
+  }
+  return SDValue();
+}
+
+/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
+/// are the two values being selected between, see if we can simplify the
+/// select.  Callers of this should assume that TheSelect is deleted if this
+/// returns true.  As such, they should return the appropriate thing (e.g. the
+/// node) back to the top-level of the DAG combiner loop to avoid it being
+/// looked at.
+bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
+                                    SDValue RHS) {
+
+  // Cannot simplify select with vector condition
+  if (TheSelect->getOperand(0).getValueType().isVector()) return false;
+
+  // If this is a select from two identical things, try to pull the operation
+  // through the select.
+  if (LHS.getOpcode() != RHS.getOpcode() ||
+      !LHS.hasOneUse() || !RHS.hasOneUse())
+    return false;
+
+  // If this is a load and the token chain is identical, replace the select
+  // of two loads with a load through a select of the address to load from.
+  // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+  // constants have been dropped into the constant pool.
+  if (LHS.getOpcode() == ISD::LOAD) {
+    LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+    LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+    // Token chains must be identical.
+    if (LHS.getOperand(0) != RHS.getOperand(0) ||
+        // Do not let this transformation reduce the number of volatile loads.
+        LLD->isVolatile() || RLD->isVolatile() ||
+        // If this is an EXTLOAD, the VT's must match.
+        LLD->getMemoryVT() != RLD->getMemoryVT() ||
+        // If this is an EXTLOAD, the kind of extension must match.
+        (LLD->getExtensionType() != RLD->getExtensionType() &&
+         // The only exception is if one of the extensions is anyext.
+         LLD->getExtensionType() != ISD::EXTLOAD &&
+         RLD->getExtensionType() != ISD::EXTLOAD) ||
+        // FIXME: this discards src value information.  This is
+        // over-conservative. It would be beneficial to be able to remember
+        // both potential memory locations.  Since we are discarding
+        // src value info, don't do the transformation if the memory
+        // locations are not in the default address space.
+        LLD->getPointerInfo().getAddrSpace() != 0 ||
+        RLD->getPointerInfo().getAddrSpace() != 0)
+      return false;
+
+    // Check that the select condition doesn't reach either load.  If so,
+    // folding this will induce a cycle into the DAG.  If not, this is safe to
+    // xform, so create a select of the addresses.
+    SDValue Addr;
+    if (TheSelect->getOpcode() == ISD::SELECT) {
+      SDNode *CondNode = TheSelect->getOperand(0).getNode();
+      if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
+          (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
+        return false;
+      Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+                         LLD->getBasePtr().getValueType(),
+                         TheSelect->getOperand(0), LLD->getBasePtr(),
+                         RLD->getBasePtr());
+    } else {  // Otherwise SELECT_CC
+      SDNode *CondLHS = TheSelect->getOperand(0).getNode();
+      SDNode *CondRHS = TheSelect->getOperand(1).getNode();
+
+      if ((LLD->hasAnyUseOfValue(1) &&
+           (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
+          (LLD->hasAnyUseOfValue(1) &&
+           (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))))
+        return false;
+
+      Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+                         LLD->getBasePtr().getValueType(),
+                         TheSelect->getOperand(0),
+                         TheSelect->getOperand(1),
+                         LLD->getBasePtr(), RLD->getBasePtr(),
+                         TheSelect->getOperand(4));
+    }
+
+    SDValue Load;
+    if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+      Load = DAG.getLoad(TheSelect->getValueType(0),
+                         TheSelect->getDebugLoc(),
+                         // FIXME: Discards pointer info.
+                         LLD->getChain(), Addr, MachinePointerInfo(),
+                         LLD->isVolatile(), LLD->isNonTemporal(),
+                         LLD->getAlignment());
+    } else {
+      Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
+                            RLD->getExtensionType() : LLD->getExtensionType(),
+                            TheSelect->getDebugLoc(),
+                            TheSelect->getValueType(0),
+                            // FIXME: Discards pointer info.
+                            LLD->getChain(), Addr, MachinePointerInfo(),
+                            LLD->getMemoryVT(), LLD->isVolatile(),
+                            LLD->isNonTemporal(), LLD->getAlignment());
+    }
+
+    // Users of the select now use the result of the load.
+    CombineTo(TheSelect, Load);
+
+    // Users of the old loads now use the new load's chain.  We know the
+    // old-load value is dead now.
+    CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+    CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+    return true;
+  }
+
+  return false;
+}
+
+/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
+/// where 'cond' is the comparison specified by CC.
+SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
+                                      SDValue N2, SDValue N3,
+                                      ISD::CondCode CC, bool NotExtCompare) {
+  // (x ? y : y) -> y.
+  if (N2 == N3) return N2;
+
+  EVT VT = N2.getValueType();
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+  ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
+
+  // Determine if the condition we're dealing with is constant
+  SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
+                              N0, N1, CC, DL, false);
+  if (SCC.getNode()) AddToWorkList(SCC.getNode());
+  ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
+
+  // fold select_cc true, x, y -> x
+  if (SCCC && !SCCC->isNullValue())
+    return N2;
+  // fold select_cc false, x, y -> y
+  if (SCCC && SCCC->isNullValue())
+    return N3;
+
+  // Check to see if we can simplify the select into an fabs node
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
+    // Allow either -0.0 or 0.0
+    if (CFP->getValueAPF().isZero()) {
+      // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
+      if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
+          N0 == N2 && N3.getOpcode() == ISD::FNEG &&
+          N2 == N3.getOperand(0))
+        return DAG.getNode(ISD::FABS, DL, VT, N0);
+
+      // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
+      if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
+          N0 == N3 && N2.getOpcode() == ISD::FNEG &&
+          N2.getOperand(0) == N3)
+        return DAG.getNode(ISD::FABS, DL, VT, N3);
+    }
+  }
+
+  // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
+  // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
+  // in it.  This is a win when the constant is not otherwise available because
+  // it replaces two constant pool loads with one.  We only do this if the FP
+  // type is known to be legal, because if it isn't, then we are before legalize
+  // types an we want the other legalization to happen first (e.g. to avoid
+  // messing with soft float) and if the ConstantFP is not legal, because if
+  // it is legal, we may not need to store the FP constant in a constant pool.
+  if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
+    if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
+      if (TLI.isTypeLegal(N2.getValueType()) &&
+          (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
+           TargetLowering::Legal) &&
+          // If both constants have multiple uses, then we won't need to do an
+          // extra load, they are likely around in registers for other users.
+          (TV->hasOneUse() || FV->hasOneUse())) {
+        Constant *Elts[] = {
+          const_cast<ConstantFP*>(FV->getConstantFPValue()),
+          const_cast<ConstantFP*>(TV->getConstantFPValue())
+        };
+        const Type *FPTy = Elts[0]->getType();
+        const TargetData &TD = *TLI.getTargetData();
+
+        // Create a ConstantArray of the two constants.
+        Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
+        SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
+                                            TD.getPrefTypeAlignment(FPTy));
+        unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+
+        // Get the offsets to the 0 and 1 element of the array so that we can
+        // select between them.
+        SDValue Zero = DAG.getIntPtrConstant(0);
+        unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
+        SDValue One = DAG.getIntPtrConstant(EltSize);
+
+        SDValue Cond = DAG.getSetCC(DL,
+                                    TLI.getSetCCResultType(N0.getValueType()),
+                                    N0, N1, CC);
+        SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),
+                                        Cond, One, Zero);
+        CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
+                            CstOffset);
+        return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
+                           MachinePointerInfo::getConstantPool(), false,
+                           false, Alignment);
+
+      }
+    }
+
+  // Check to see if we can perform the "gzip trick", transforming
+  // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
+  if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
+      N0.getValueType().isInteger() &&
+      N2.getValueType().isInteger() &&
+      (N1C->isNullValue() ||                         // (a < 0) ? b : 0
+       (N1C->getAPIntValue() == 1 && N0 == N2))) {   // (a < 1) ? a : 0
+    EVT XType = N0.getValueType();
+    EVT AType = N2.getValueType();
+    if (XType.bitsGE(AType)) {
+      // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
+      // single-bit constant.
+      if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
+        unsigned ShCtV = N2C->getAPIntValue().logBase2();
+        ShCtV = XType.getSizeInBits()-ShCtV-1;
+        SDValue ShCt = DAG.getConstant(ShCtV,
+                                       getShiftAmountTy(N0.getValueType()));
+        SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(),
+                                    XType, N0, ShCt);
+        AddToWorkList(Shift.getNode());
+
+        if (XType.bitsGT(AType)) {
+          Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+          AddToWorkList(Shift.getNode());
+        }
+
+        return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+      }
+
+      SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(),
+                                  XType, N0,
+                                  DAG.getConstant(XType.getSizeInBits()-1,
+                                         getShiftAmountTy(N0.getValueType())));
+      AddToWorkList(Shift.getNode());
+
+      if (XType.bitsGT(AType)) {
+        Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+        AddToWorkList(Shift.getNode());
+      }
+
+      return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+    }
+  }
+
+  // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
+  // where y is has a single bit set.
+  // A plaintext description would be, we can turn the SELECT_CC into an AND
+  // when the condition can be materialized as an all-ones register.  Any
+  // single bit-test can be materialized as an all-ones register with
+  // shift-left and shift-right-arith.
+  if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
+      N0->getValueType(0) == VT &&
+      N1C && N1C->isNullValue() &&
+      N2C && N2C->isNullValue()) {
+    SDValue AndLHS = N0->getOperand(0);
+    ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+    if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
+      // Shift the tested bit over the sign bit.
+      APInt AndMask = ConstAndRHS->getAPIntValue();
+      SDValue ShlAmt =
+        DAG.getConstant(AndMask.countLeadingZeros(),
+                        getShiftAmountTy(AndLHS.getValueType()));
+      SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt);
+
+      // Now arithmetic right shift it all the way over, so the result is either
+      // all-ones, or zero.
+      SDValue ShrAmt =
+        DAG.getConstant(AndMask.getBitWidth()-1,
+                        getShiftAmountTy(Shl.getValueType()));
+      SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt);
+
+      return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+    }
+  }
+
+  // fold select C, 16, 0 -> shl C, 4
+  if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
+      TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {
+
+    // If the caller doesn't want us to simplify this into a zext of a compare,
+    // don't do it.
+    if (NotExtCompare && N2C->getAPIntValue() == 1)
+      return SDValue();
+
+    // Get a SetCC of the condition
+    // FIXME: Should probably make sure that setcc is legal if we ever have a
+    // target where it isn't.
+    SDValue Temp, SCC;
+    // cast from setcc result type to select result type
+    if (LegalTypes) {
+      SCC  = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
+                          N0, N1, CC);
+      if (N2.getValueType().bitsLT(SCC.getValueType()))
+        Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType());
+      else
+        Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+                           N2.getValueType(), SCC);
+    } else {
+      SCC  = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
+      Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
+                         N2.getValueType(), SCC);
+    }
+
+    AddToWorkList(SCC.getNode());
+    AddToWorkList(Temp.getNode());
+
+    if (N2C->getAPIntValue() == 1)
+      return Temp;
+
+    // shl setcc result by log2 n2c
+    return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
+                       DAG.getConstant(N2C->getAPIntValue().logBase2(),
+                                       getShiftAmountTy(Temp.getValueType())));
+  }
+
+  // Check to see if this is the equivalent of setcc
+  // FIXME: Turn all of these into setcc if setcc if setcc is legal
+  // otherwise, go ahead with the folds.
+  if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
+    EVT XType = N0.getValueType();
+    if (!LegalOperations ||
+        TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) {
+      SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC);
+      if (Res.getValueType() != VT)
+        Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
+      return Res;
+    }
+
+    // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
+    if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
+        (!LegalOperations ||
+         TLI.isOperationLegal(ISD::CTLZ, XType))) {
+      SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0);
+      return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
+                         DAG.getConstant(Log2_32(XType.getSizeInBits()),
+                                       getShiftAmountTy(Ctlz.getValueType())));
+    }
+    // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
+    if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
+      SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(),
+                                  XType, DAG.getConstant(0, XType), N0);
+      SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType);
+      return DAG.getNode(ISD::SRL, DL, XType,
+                         DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
+                         DAG.getConstant(XType.getSizeInBits()-1,
+                                         getShiftAmountTy(XType)));
+    }
+    // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
+    if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
+      SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0,
+                                 DAG.getConstant(XType.getSizeInBits()-1,
+                                         getShiftAmountTy(N0.getValueType())));
+      return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
+    }
+  }
+
+  // Check to see if this is an integer abs.
+  // select_cc setg[te] X,  0,  X, -X ->
+  // select_cc setgt    X, -1,  X, -X ->
+  // select_cc setl[te] X,  0, -X,  X ->
+  // select_cc setlt    X,  1, -X,  X ->
+  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+  if (N1C) {
+    ConstantSDNode *SubC = NULL;
+    if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
+         (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
+        N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
+      SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
+    else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
+              (N1C->isOne() && CC == ISD::SETLT)) &&
+             N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
+      SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
+
+    EVT XType = N0.getValueType();
+    if (SubC && SubC->isNullValue() && XType.isInteger()) {
+      SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
+                                  N0,
+                                  DAG.getConstant(XType.getSizeInBits()-1,
+                                         getShiftAmountTy(N0.getValueType())));
+      SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
+                                XType, N0, Shift);
+      AddToWorkList(Shift.getNode());
+      AddToWorkList(Add.getNode());
+      return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
+    }
+  }
+
+  return SDValue();
+}
+
+/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
+SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
+                                   SDValue N1, ISD::CondCode Cond,
+                                   DebugLoc DL, bool foldBooleans) {
+  TargetLowering::DAGCombinerInfo
+    DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
+  return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildSDIV(SDNode *N) {
+  std::vector<SDNode*> Built;
+  SDValue S = TLI.BuildSDIV(N, DAG, &Built);
+
+  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+       ii != ee; ++ii)
+    AddToWorkList(*ii);
+  return S;
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue DAGCombiner::BuildUDIV(SDNode *N) {
+  std::vector<SDNode*> Built;
+  SDValue S = TLI.BuildUDIV(N, DAG, &Built);
+
+  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
+       ii != ee; ++ii)
+    AddToWorkList(*ii);
+  return S;
+}
+
+/// FindBaseOffset - Return true if base is a frame index, which is known not
+// to alias with anything but itself.  Provides base object and offset as
+// results.
+static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
+                           const GlobalValue *&GV, void *&CV) {
+  // Assume it is a primitive operation.
+  Base = Ptr; Offset = 0; GV = 0; CV = 0;
+
+  // If it's an adding a simple constant then integrate the offset.
+  if (Base.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
+      Base = Base.getOperand(0);
+      Offset += C->getZExtValue();
+    }
+  }
+
+  // Return the underlying GlobalValue, and update the Offset.  Return false
+  // for GlobalAddressSDNode since the same GlobalAddress may be represented
+  // by multiple nodes with different offsets.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
+    GV = G->getGlobal();
+    Offset += G->getOffset();
+    return false;
+  }
+
+  // Return the underlying Constant value, and update the Offset.  Return false
+  // for ConstantSDNodes since the same constant pool entry may be represented
+  // by multiple nodes with different offsets.
+  if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
+    CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal()
+                                         : (void *)C->getConstVal();
+    Offset += C->getOffset();
+    return false;
+  }
+  // If it's any of the following then it can't alias with anything but itself.
+  return isa<FrameIndexSDNode>(Base);
+}
+
+/// isAlias - Return true if there is any possibility that the two addresses
+/// overlap.
+bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
+                          const Value *SrcValue1, int SrcValueOffset1,
+                          unsigned SrcValueAlign1,
+                          const MDNode *TBAAInfo1,
+                          SDValue Ptr2, int64_t Size2,
+                          const Value *SrcValue2, int SrcValueOffset2,
+                          unsigned SrcValueAlign2,
+                          const MDNode *TBAAInfo2) const {
+  // If they are the same then they must be aliases.
+  if (Ptr1 == Ptr2) return true;
+
+  // Gather base node and offset information.
+  SDValue Base1, Base2;
+  int64_t Offset1, Offset2;
+  const GlobalValue *GV1, *GV2;
+  void *CV1, *CV2;
+  bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
+  bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
+
+  // If they have a same base address then check to see if they overlap.
+  if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
+    return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+
+  // It is possible for different frame indices to alias each other, mostly
+  // when tail call optimization reuses return address slots for arguments.
+  // To catch this case, look up the actual index of frame indices to compute
+  // the real alias relationship.
+  if (isFrameIndex1 && isFrameIndex2) {
+    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
+    Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
+    return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+  }
+
+  // Otherwise, if we know what the bases are, and they aren't identical, then
+  // we know they cannot alias.
+  if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
+    return false;
+
+  // If we know required SrcValue1 and SrcValue2 have relatively large alignment
+  // compared to the size and offset of the access, we may be able to prove they
+  // do not alias.  This check is conservative for now to catch cases created by
+  // splitting vector types.
+  if ((SrcValueAlign1 == SrcValueAlign2) &&
+      (SrcValueOffset1 != SrcValueOffset2) &&
+      (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
+    int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
+    int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
+
+    // There is no overlap between these relatively aligned accesses of similar
+    // size, return no alias.
+    if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
+      return false;
+  }
+
+  if (CombinerGlobalAA) {
+    // Use alias analysis information.
+    int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
+    int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
+    int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
+    AliasAnalysis::AliasResult AAResult =
+      AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
+               AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
+    if (AAResult == AliasAnalysis::NoAlias)
+      return false;
+  }
+
+  // Otherwise we have to assume they alias.
+  return true;
+}
+
+/// FindAliasInfo - Extracts the relevant alias information from the memory
+/// node.  Returns true if the operand was a load.
+bool DAGCombiner::FindAliasInfo(SDNode *N,
+                        SDValue &Ptr, int64_t &Size,
+                        const Value *&SrcValue,
+                        int &SrcValueOffset,
+                        unsigned &SrcValueAlign,
+                        const MDNode *&TBAAInfo) const {
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    Ptr = LD->getBasePtr();
+    Size = LD->getMemoryVT().getSizeInBits() >> 3;
+    SrcValue = LD->getSrcValue();
+    SrcValueOffset = LD->getSrcValueOffset();
+    SrcValueAlign = LD->getOriginalAlignment();
+    TBAAInfo = LD->getTBAAInfo();
+    return true;
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    Ptr = ST->getBasePtr();
+    Size = ST->getMemoryVT().getSizeInBits() >> 3;
+    SrcValue = ST->getSrcValue();
+    SrcValueOffset = ST->getSrcValueOffset();
+    SrcValueAlign = ST->getOriginalAlignment();
+    TBAAInfo = ST->getTBAAInfo();
+  } else {
+    llvm_unreachable("FindAliasInfo expected a memory operand");
+  }
+
+  return false;
+}
+
+/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
+/// looking for aliasing nodes and adding them to the Aliases vector.
+void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
+                                   SmallVector<SDValue, 8> &Aliases) {
+  SmallVector<SDValue, 8> Chains;     // List of chains to visit.
+  SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
+
+  // Get alias information for node.
+  SDValue Ptr;
+  int64_t Size;
+  const Value *SrcValue;
+  int SrcValueOffset;
+  unsigned SrcValueAlign;
+  const MDNode *SrcTBAAInfo;
+  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
+                              SrcValueAlign, SrcTBAAInfo);
+
+  // Starting off.
+  Chains.push_back(OriginalChain);
+  unsigned Depth = 0;
+
+  // Look at each chain and determine if it is an alias.  If so, add it to the
+  // aliases list.  If not, then continue up the chain looking for the next
+  // candidate.
+  while (!Chains.empty()) {
+    SDValue Chain = Chains.back();
+    Chains.pop_back();
+
+    // For TokenFactor nodes, look at each operand and only continue up the
+    // chain until we find two aliases.  If we've seen two aliases, assume we'll
+    // find more and revert to original chain since the xform is unlikely to be
+    // profitable.
+    //
+    // FIXME: The depth check could be made to return the last non-aliasing
+    // chain we found before we hit a tokenfactor rather than the original
+    // chain.
+    if (Depth > 6 || Aliases.size() == 2) {
+      Aliases.clear();
+      Aliases.push_back(OriginalChain);
+      break;
+    }
+
+    // Don't bother if we've been before.
+    if (!Visited.insert(Chain.getNode()))
+      continue;
+
+    switch (Chain.getOpcode()) {
+    case ISD::EntryToken:
+      // Entry token is ideal chain operand, but handled in FindBetterChain.
+      break;
+
+    case ISD::LOAD:
+    case ISD::STORE: {
+      // Get alias information for Chain.
+      SDValue OpPtr;
+      int64_t OpSize;
+      const Value *OpSrcValue;
+      int OpSrcValueOffset;
+      unsigned OpSrcValueAlign;
+      const MDNode *OpSrcTBAAInfo;
+      bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
+                                    OpSrcValue, OpSrcValueOffset,
+                                    OpSrcValueAlign,
+                                    OpSrcTBAAInfo);
+
+      // If chain is alias then stop here.
+      if (!(IsLoad && IsOpLoad) &&
+          isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+                  SrcTBAAInfo,
+                  OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
+                  OpSrcValueAlign, OpSrcTBAAInfo)) {
+        Aliases.push_back(Chain);
+      } else {
+        // Look further up the chain.
+        Chains.push_back(Chain.getOperand(0));
+        ++Depth;
+      }
+      break;
+    }
+
+    case ISD::TokenFactor:
+      // We have to check each of the operands of the token factor for "small"
+      // token factors, so we queue them up.  Adding the operands to the queue
+      // (stack) in reverse order maintains the original order and increases the
+      // likelihood that getNode will find a matching token factor (CSE.)
+      if (Chain.getNumOperands() > 16) {
+        Aliases.push_back(Chain);
+        break;
+      }
+      for (unsigned n = Chain.getNumOperands(); n;)
+        Chains.push_back(Chain.getOperand(--n));
+      ++Depth;
+      break;
+
+    default:
+      // For all other instructions we will just have to take what we can get.
+      Aliases.push_back(Chain);
+      break;
+    }
+  }
+}
+
+/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
+/// for a better chain (aliasing node.)
+SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
+  SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
+
+  // Accumulate all the aliases to this node.
+  GatherAllAliases(N, OldChain, Aliases);
+
+  if (Aliases.size() == 0) {
+    // If no operands then chain to entry token.
+    return DAG.getEntryNode();
+  } else if (Aliases.size() == 1) {
+    // If a single operand then chain to it.  We don't need to revisit it.
+    return Aliases[0];
+  }
+
+  // Construct a custom tailored token factor.
+  return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+                     &Aliases[0], Aliases.size());
+}
+
+// SelectionDAG::Combine - This is the entry point for the file.
+//
+void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
+                           CodeGenOpt::Level OptLevel) {
+  /// run - This is the main entry point to this class.
+  ///
+  DAGCombiner(*this, AA, OptLevel).Run(Level);
+}
diff --git a/final/lib/CodeGen/SelectionDAG/FastISel.cpp b/final/lib/CodeGen/SelectionDAG/FastISel.cpp
new file mode 100644
index 00000000000..490b857b0e9
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -0,0 +1,1251 @@
+//===-- FastISel.cpp - Implementation of the FastISel class ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the FastISel class.
+//
+// "Fast" instruction selection is designed to emit very poor code quickly.
+// Also, it is not designed to be able to do much lowering, so most illegal
+// types (e.g. i64 on 32-bit targets) and operations are not supported.  It is
+// also not intended to be able to do much optimization, except in a few cases
+// where doing optimizations reduces overall compile time.  For example, folding
+// constants into immediate fields is often done, because it's cheap and it
+// reduces the number of instructions later phases have to examine.
+//
+// "Fast" instruction selection is able to fail gracefully and transfer
+// control to the SelectionDAG selector for operations that it doesn't
+// support.  In many cases, this allows us to avoid duplicating a lot of
+// the complicated lowering logic that SelectionDAG currently has.
+//
+// The intended use for "fast" instruction selection is "-O0" mode
+// compilation, where the quality of the generated code is irrelevant when
+// weighed against the speed at which the code can be generated.  Also,
+// at -O0, the LLVM optimizers are not running, and this makes the
+// compile time of codegen a much higher portion of the overall compile
+// time.  Despite its limitations, "fast" instruction selection is able to
+// handle enough code on its own to provide noticeable overall speedups
+// in -O0 compiles.
+//
+// Basic operations are supported in a target-independent way, by reading
+// the same instruction descriptions that the SelectionDAG selector reads,
+// and identifying simple arithmetic operations that can be directly selected
+// from simple operators.  More complicated operations currently require
+// target-specific code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+/// startNewBlock - Set the current block to which generated machine
+/// instructions will be appended, and clear the local CSE map.
+///
+void FastISel::startNewBlock() {
+  LocalValueMap.clear();
+
+  // Start out as null, meaining no local-value instructions have
+  // been emitted.
+  LastLocalValue = 0;
+
+  // Advance the last local value past any EH_LABEL instructions.
+  MachineBasicBlock::iterator
+    I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end();
+  while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) {
+    LastLocalValue = I;
+    ++I;
+  }
+}
+
+bool FastISel::hasTrivialKill(const Value *V) const {
+  // Don't consider constants or arguments to have trivial kills.
+  const Instruction *I = dyn_cast<Instruction>(V);
+  if (!I)
+    return false;
+
+  // No-op casts are trivially coalesced by fast-isel.
+  if (const CastInst *Cast = dyn_cast<CastInst>(I))
+    if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) &&
+        !hasTrivialKill(Cast->getOperand(0)))
+      return false;
+
+  // Only instructions with a single use in the same basic block are considered
+  // to have trivial kills.
+  return I->hasOneUse() &&
+         !(I->getOpcode() == Instruction::BitCast ||
+           I->getOpcode() == Instruction::PtrToInt ||
+           I->getOpcode() == Instruction::IntToPtr) &&
+         cast<Instruction>(*I->use_begin())->getParent() == I->getParent();
+}
+
+unsigned FastISel::getRegForValue(const Value *V) {
+  EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
+  // Don't handle non-simple values in FastISel.
+  if (!RealVT.isSimple())
+    return 0;
+
+  // Ignore illegal types. We must do this before looking up the value
+  // in ValueMap because Arguments are given virtual registers regardless
+  // of whether FastISel can handle them.
+  MVT VT = RealVT.getSimpleVT();
+  if (!TLI.isTypeLegal(VT)) {
+    // Promote MVT::i1 to a legal type though, because it's common and easy.
+    if (VT == MVT::i1)
+      VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT();
+    else
+      return 0;
+  }
+
+  // Look up the value to see if we already have a register for it. We
+  // cache values defined by Instructions across blocks, and other values
+  // only locally. This is because Instructions already have the SSA
+  // def-dominates-use requirement enforced.
+  DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+  if (I != FuncInfo.ValueMap.end()) {
+    unsigned Reg = I->second;
+    return Reg;
+  }
+  unsigned Reg = LocalValueMap[V];
+  if (Reg != 0)
+    return Reg;
+
+  // In bottom-up mode, just create the virtual register which will be used
+  // to hold the value. It will be materialized later.
+  if (isa<Instruction>(V) &&
+      (!isa<AllocaInst>(V) ||
+       !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V))))
+    return FuncInfo.InitializeRegForValue(V);
+
+  SavePoint SaveInsertPt = enterLocalValueArea();
+
+  // Materialize the value in a register. Emit any instructions in the
+  // local value area.
+  Reg = materializeRegForValue(V, VT);
+
+  leaveLocalValueArea(SaveInsertPt);
+
+  return Reg;
+}
+
+/// materializeRegForValue - Helper for getRegForValue. This function is
+/// called when the value isn't already available in a register and must
+/// be materialized with new instructions.
+unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
+  unsigned Reg = 0;
+
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    if (CI->getValue().getActiveBits() <= 64)
+      Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
+  } else if (isa<AllocaInst>(V)) {
+    Reg = TargetMaterializeAlloca(cast<AllocaInst>(V));
+  } else if (isa<ConstantPointerNull>(V)) {
+    // Translate this as an integer zero so that it can be
+    // local-CSE'd with actual integer zeros.
+    Reg =
+      getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext())));
+  } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+    // Try to emit the constant directly.
+    Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF);
+
+    if (!Reg) {
+      // Try to emit the constant by using an integer constant with a cast.
+      const APFloat &Flt = CF->getValueAPF();
+      EVT IntVT = TLI.getPointerTy();
+
+      uint64_t x[2];
+      uint32_t IntBitWidth = IntVT.getSizeInBits();
+      bool isExact;
+      (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+                                APFloat::rmTowardZero, &isExact);
+      if (isExact) {
+        APInt IntVal(IntBitWidth, 2, x);
+
+        unsigned IntegerReg =
+          getRegForValue(ConstantInt::get(V->getContext(), IntVal));
+        if (IntegerReg != 0)
+          Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP,
+                           IntegerReg, /*Kill=*/false);
+      }
+    }
+  } else if (const Operator *Op = dyn_cast<Operator>(V)) {
+    if (!SelectOperator(Op, Op->getOpcode()))
+      if (!isa<Instruction>(Op) ||
+          !TargetSelectInstruction(cast<Instruction>(Op)))
+        return 0;
+    Reg = lookUpRegForValue(Op);
+  } else if (isa<UndefValue>(V)) {
+    Reg = createResultReg(TLI.getRegClassFor(VT));
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
+  }
+
+  // If target-independent code couldn't handle the value, give target-specific
+  // code a try.
+  if (!Reg && isa<Constant>(V))
+    Reg = TargetMaterializeConstant(cast<Constant>(V));
+
+  // Don't cache constant materializations in the general ValueMap.
+  // To do so would require tracking what uses they dominate.
+  if (Reg != 0) {
+    LocalValueMap[V] = Reg;
+    LastLocalValue = MRI.getVRegDef(Reg);
+  }
+  return Reg;
+}
+
+unsigned FastISel::lookUpRegForValue(const Value *V) {
+  // Look up the value to see if we already have a register for it. We
+  // cache values defined by Instructions across blocks, and other values
+  // only locally. This is because Instructions already have the SSA
+  // def-dominates-use requirement enforced.
+  DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+  if (I != FuncInfo.ValueMap.end())
+    return I->second;
+  return LocalValueMap[V];
+}
+
+/// UpdateValueMap - Update the value map to include the new mapping for this
+/// instruction, or insert an extra copy to get the result in a previous
+/// determined register.
+/// NOTE: This is only necessary because we might select a block that uses
+/// a value before we select the block that defines the value.  It might be
+/// possible to fix this by selecting blocks in reverse postorder.
+unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
+  if (!isa<Instruction>(I)) {
+    LocalValueMap[I] = Reg;
+    return Reg;
+  }
+
+  unsigned &AssignedReg = FuncInfo.ValueMap[I];
+  if (AssignedReg == 0)
+    // Use the new register.
+    AssignedReg = Reg;
+  else if (Reg != AssignedReg) {
+    // Arrange for uses of AssignedReg to be replaced by uses of Reg.
+    FuncInfo.RegFixups[AssignedReg] = Reg;
+
+    AssignedReg = Reg;
+  }
+
+  return AssignedReg;
+}
+
+std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
+  unsigned IdxN = getRegForValue(Idx);
+  if (IdxN == 0)
+    // Unhandled operand. Halt "fast" selection and bail.
+    return std::pair<unsigned, bool>(0, false);
+
+  bool IdxNIsKill = hasTrivialKill(Idx);
+
+  // If the index is smaller or larger than intptr_t, truncate or extend it.
+  MVT PtrVT = TLI.getPointerTy();
+  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
+  if (IdxVT.bitsLT(PtrVT)) {
+    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND,
+                      IdxN, IdxNIsKill);
+    IdxNIsKill = true;
+  }
+  else if (IdxVT.bitsGT(PtrVT)) {
+    IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE,
+                      IdxN, IdxNIsKill);
+    IdxNIsKill = true;
+  }
+  return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
+}
+
+void FastISel::recomputeInsertPt() {
+  if (getLastLocalValue()) {
+    FuncInfo.InsertPt = getLastLocalValue();
+    FuncInfo.MBB = FuncInfo.InsertPt->getParent();
+    ++FuncInfo.InsertPt;
+  } else
+    FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI();
+
+  // Now skip past any EH_LABELs, which must remain at the beginning.
+  while (FuncInfo.InsertPt != FuncInfo.MBB->end() &&
+         FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL)
+    ++FuncInfo.InsertPt;
+}
+
+FastISel::SavePoint FastISel::enterLocalValueArea() {
+  MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
+  DebugLoc OldDL = DL;
+  recomputeInsertPt();
+  DL = DebugLoc();
+  SavePoint SP = { OldInsertPt, OldDL };
+  return SP;
+}
+
+void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
+  if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
+    LastLocalValue = llvm::prior(FuncInfo.InsertPt);
+
+  // Restore the previous insert position.
+  FuncInfo.InsertPt = OldInsertPt.InsertPt;
+  DL = OldInsertPt.DL;
+}
+
+/// SelectBinaryOp - Select and emit code for a binary operator instruction,
+/// which has an opcode which directly corresponds to the given ISD opcode.
+///
+bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
+  EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true);
+  if (VT == MVT::Other || !VT.isSimple())
+    // Unhandled type. Halt "fast" selection and bail.
+    return false;
+
+  // We only handle legal types. For example, on x86-32 the instruction
+  // selector contains all of the 64-bit instructions from x86-64,
+  // under the assumption that i64 won't be used if the target doesn't
+  // support it.
+  if (!TLI.isTypeLegal(VT)) {
+    // MVT::i1 is special. Allow AND, OR, or XOR because they
+    // don't require additional zeroing, which makes them easy.
+    if (VT == MVT::i1 &&
+        (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
+         ISDOpcode == ISD::XOR))
+      VT = TLI.getTypeToTransformTo(I->getContext(), VT);
+    else
+      return false;
+  }
+
+  unsigned Op0 = getRegForValue(I->getOperand(0));
+  if (Op0 == 0)
+    // Unhandled operand. Halt "fast" selection and bail.
+    return false;
+
+  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+  // Check if the second operand is a constant and handle it appropriately.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+    unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(),
+                                     ISDOpcode, Op0, Op0IsKill,
+                                     CI->getZExtValue());
+    if (ResultReg != 0) {
+      // We successfully emitted code for the given LLVM Instruction.
+      UpdateValueMap(I, ResultReg);
+      return true;
+    }
+  }
+
+  // Check if the second operand is a constant float.
+  if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
+    unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
+                                     ISDOpcode, Op0, Op0IsKill, CF);
+    if (ResultReg != 0) {
+      // We successfully emitted code for the given LLVM Instruction.
+      UpdateValueMap(I, ResultReg);
+      return true;
+    }
+  }
+
+  unsigned Op1 = getRegForValue(I->getOperand(1));
+  if (Op1 == 0)
+    // Unhandled operand. Halt "fast" selection and bail.
+    return false;
+
+  bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
+  // Now we have both operands in registers. Emit the instruction.
+  unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
+                                   ISDOpcode,
+                                   Op0, Op0IsKill,
+                                   Op1, Op1IsKill);
+  if (ResultReg == 0)
+    // Target-specific code wasn't able to find a machine opcode for
+    // the given ISD opcode and type. Halt "fast" selection and bail.
+    return false;
+
+  // We successfully emitted code for the given LLVM Instruction.
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool FastISel::SelectGetElementPtr(const User *I) {
+  unsigned N = getRegForValue(I->getOperand(0));
+  if (N == 0)
+    // Unhandled operand. Halt "fast" selection and bail.
+    return false;
+
+  bool NIsKill = hasTrivialKill(I->getOperand(0));
+
+  const Type *Ty = I->getOperand(0)->getType();
+  MVT VT = TLI.getPointerTy();
+  for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
+       E = I->op_end(); OI != E; ++OI) {
+    const Value *Idx = *OI;
+    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      if (Field) {
+        // N = N + Offset
+        uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
+        // FIXME: This can be optimized by combining the add with a
+        // subsequent one.
+        N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
+        if (N == 0)
+          // Unhandled operand. Halt "fast" selection and bail.
+          return false;
+        NIsKill = true;
+      }
+      Ty = StTy->getElementType(Field);
+    } else {
+      Ty = cast<SequentialType>(Ty)->getElementType();
+
+      // If this is a constant subscript, handle it quickly.
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+        if (CI->isZero()) continue;
+        uint64_t Offs =
+          TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+        N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
+        if (N == 0)
+          // Unhandled operand. Halt "fast" selection and bail.
+          return false;
+        NIsKill = true;
+        continue;
+      }
+
+      // N = N + Idx * ElementSize;
+      uint64_t ElementSize = TD.getTypeAllocSize(Ty);
+      std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+      unsigned IdxN = Pair.first;
+      bool IdxNIsKill = Pair.second;
+      if (IdxN == 0)
+        // Unhandled operand. Halt "fast" selection and bail.
+        return false;
+
+      if (ElementSize != 1) {
+        IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
+        if (IdxN == 0)
+          // Unhandled operand. Halt "fast" selection and bail.
+          return false;
+        IdxNIsKill = true;
+      }
+      N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
+      if (N == 0)
+        // Unhandled operand. Halt "fast" selection and bail.
+        return false;
+    }
+  }
+
+  // We successfully emitted code for the given LLVM Instruction.
+  UpdateValueMap(I, N);
+  return true;
+}
+
+bool FastISel::SelectCall(const User *I) {
+  const Function *F = cast<CallInst>(I)->getCalledFunction();
+  if (!F) return false;
+
+  // Handle selected intrinsic function calls.
+  unsigned IID = F->getIntrinsicID();
+  switch (IID) {
+  default: break;
+  case Intrinsic::dbg_declare: {
+    const DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
+    if (!DIVariable(DI->getVariable()).Verify() ||
+        !FuncInfo.MF->getMMI().hasDebugInfo())
+      return true;
+
+    const Value *Address = DI->getAddress();
+    if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))
+      return true;
+
+    unsigned Reg = 0;
+    unsigned Offset = 0;
+    if (const Argument *Arg = dyn_cast<Argument>(Address)) {
+      if (Arg->hasByValAttr()) {
+        // Byval arguments' frame index is recorded during argument lowering.
+        // Use this info directly.
+        Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+        if (Offset)
+          Reg = TRI.getFrameRegister(*FuncInfo.MF);
+      }
+    }
+    if (!Reg)
+      Reg = getRegForValue(Address);
+
+    if (Reg)
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+              TII.get(TargetOpcode::DBG_VALUE))
+        .addReg(Reg, RegState::Debug).addImm(Offset)
+        .addMetadata(DI->getVariable());
+    return true;
+  }
+  case Intrinsic::dbg_value: {
+    // This form of DBG_VALUE is target-independent.
+    const DbgValueInst *DI = cast<DbgValueInst>(I);
+    const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+    const Value *V = DI->getValue();
+    if (!V) {
+      // Currently the optimizer can produce this; insert an undef to
+      // help debugging.  Probably the optimizer should not do this.
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+        .addReg(0U).addImm(DI->getOffset())
+        .addMetadata(DI->getVariable());
+    } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+        .addImm(CI->getZExtValue()).addImm(DI->getOffset())
+        .addMetadata(DI->getVariable());
+    } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+        .addFPImm(CF).addImm(DI->getOffset())
+        .addMetadata(DI->getVariable());
+    } else if (unsigned Reg = lookUpRegForValue(V)) {
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+        .addReg(Reg, RegState::Debug).addImm(DI->getOffset())
+        .addMetadata(DI->getVariable());
+    } else {
+      // We can't yet handle anything else here because it would require
+      // generating code, thus altering codegen because of debug info.
+      DEBUG(dbgs() << "Dropping debug info for " << DI);
+    }
+    return true;
+  }
+  case Intrinsic::eh_exception: {
+    EVT VT = TLI.getValueType(I->getType());
+    switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
+    default: break;
+    case TargetLowering::Expand: {
+      assert(FuncInfo.MBB->isLandingPad() &&
+             "Call to eh.exception not in landing pad!");
+      unsigned Reg = TLI.getExceptionAddressRegister();
+      const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+      unsigned ResultReg = createResultReg(RC);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+              ResultReg).addReg(Reg);
+      UpdateValueMap(I, ResultReg);
+      return true;
+    }
+    }
+    break;
+  }
+  case Intrinsic::eh_selector: {
+    EVT VT = TLI.getValueType(I->getType());
+    switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
+    default: break;
+    case TargetLowering::Expand: {
+      if (FuncInfo.MBB->isLandingPad())
+        AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB);
+      else {
+#ifndef NDEBUG
+        FuncInfo.CatchInfoLost.insert(cast<CallInst>(I));
+#endif
+        // FIXME: Mark exception selector register as live in.  Hack for PR1508.
+        unsigned Reg = TLI.getExceptionSelectorRegister();
+        if (Reg) FuncInfo.MBB->addLiveIn(Reg);
+      }
+
+      unsigned Reg = TLI.getExceptionSelectorRegister();
+      EVT SrcVT = TLI.getPointerTy();
+      const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
+      unsigned ResultReg = createResultReg(RC);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+              ResultReg).addReg(Reg);
+
+      bool ResultRegIsKill = hasTrivialKill(I);
+
+      // Cast the register to the type of the selector.
+      if (SrcVT.bitsGT(MVT::i32))
+        ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
+                               ResultReg, ResultRegIsKill);
+      else if (SrcVT.bitsLT(MVT::i32))
+        ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
+                               ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
+      if (ResultReg == 0)
+        // Unhandled operand. Halt "fast" selection and bail.
+        return false;
+
+      UpdateValueMap(I, ResultReg);
+
+      return true;
+    }
+    }
+    break;
+  }
+  }
+
+  // An arbitrary call. Bail.
+  return false;
+}
+
+bool FastISel::SelectCast(const User *I, unsigned Opcode) {
+  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(I->getType());
+
+  if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+      DstVT == MVT::Other || !DstVT.isSimple())
+    // Unhandled type. Halt "fast" selection and bail.
+    return false;
+
+  // Check if the destination type is legal. Or as a special case,
+  // it may be i1 if we're doing a truncate because that's
+  // easy and somewhat common.
+  if (!TLI.isTypeLegal(DstVT))
+    if (DstVT != MVT::i1 || Opcode != ISD::TRUNCATE)
+      // Unhandled type. Halt "fast" selection and bail.
+      return false;
+
+  // Check if the source operand is legal. Or as a special case,
+  // it may be i1 if we're doing zero-extension because that's
+  // easy and somewhat common.
+  if (!TLI.isTypeLegal(SrcVT))
+    if (SrcVT != MVT::i1 || Opcode != ISD::ZERO_EXTEND)
+      // Unhandled type. Halt "fast" selection and bail.
+      return false;
+
+  unsigned InputReg = getRegForValue(I->getOperand(0));
+  if (!InputReg)
+    // Unhandled operand.  Halt "fast" selection and bail.
+    return false;
+
+  bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
+
+  // If the operand is i1, arrange for the high bits in the register to be zero.
+  if (SrcVT == MVT::i1) {
+   SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT);
+   InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg, InputRegIsKill);
+   if (!InputReg)
+     return false;
+   InputRegIsKill = true;
+  }
+  // If the result is i1, truncate to the target's type for i1 first.
+  if (DstVT == MVT::i1)
+    DstVT = TLI.getTypeToTransformTo(I->getContext(), DstVT);
+
+  unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
+                                  DstVT.getSimpleVT(),
+                                  Opcode,
+                                  InputReg, InputRegIsKill);
+  if (!ResultReg)
+    return false;
+
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool FastISel::SelectBitCast(const User *I) {
+  // If the bitcast doesn't change the type, just use the operand value.
+  if (I->getType() == I->getOperand(0)->getType()) {
+    unsigned Reg = getRegForValue(I->getOperand(0));
+    if (Reg == 0)
+      return false;
+    UpdateValueMap(I, Reg);
+    return true;
+  }
+
+  // Bitcasts of other values become reg-reg copies or BITCAST operators.
+  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(I->getType());
+
+  if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
+      DstVT == MVT::Other || !DstVT.isSimple() ||
+      !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))
+    // Unhandled type. Halt "fast" selection and bail.
+    return false;
+
+  unsigned Op0 = getRegForValue(I->getOperand(0));
+  if (Op0 == 0)
+    // Unhandled operand. Halt "fast" selection and bail.
+    return false;
+
+  bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
+  // First, try to perform the bitcast by inserting a reg-reg copy.
+  unsigned ResultReg = 0;
+  if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
+    TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
+    TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
+    // Don't attempt a cross-class copy. It will likely fail.
+    if (SrcClass == DstClass) {
+      ResultReg = createResultReg(DstClass);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+              ResultReg).addReg(Op0);
+    }
+  }
+
+  // If the reg-reg copy failed, select a BITCAST opcode.
+  if (!ResultReg)
+    ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
+                           ISD::BITCAST, Op0, Op0IsKill);
+
+  if (!ResultReg)
+    return false;
+
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool
+FastISel::SelectInstruction(const Instruction *I) {
+  // Just before the terminator instruction, insert instructions to
+  // feed PHI nodes in successor blocks.
+  if (isa<TerminatorInst>(I))
+    if (!HandlePHINodesInSuccessorBlocks(I->getParent()))
+      return false;
+
+  DL = I->getDebugLoc();
+
+  // First, try doing target-independent selection.
+  if (SelectOperator(I, I->getOpcode())) {
+    DL = DebugLoc();
+    return true;
+  }
+
+  // Next, try calling the target to attempt to handle the instruction.
+  if (TargetSelectInstruction(I)) {
+    DL = DebugLoc();
+    return true;
+  }
+
+  DL = DebugLoc();
+  return false;
+}
+
+/// FastEmitBranch - Emit an unconditional branch to the given block,
+/// unless it is the immediate (fall-through) successor, and update
+/// the CFG.
+void
+FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
+  if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
+    // The unconditional fall-through case, which needs no instructions.
+  } else {
+    // The unconditional branch case.
+    TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL,
+                     SmallVector<MachineOperand, 0>(), DL);
+  }
+  FuncInfo.MBB->addSuccessor(MSucc);
+}
+
+/// SelectFNeg - Emit an FNeg operation.
+///
+bool
+FastISel::SelectFNeg(const User *I) {
+  unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
+  if (OpReg == 0) return false;
+
+  bool OpRegIsKill = hasTrivialKill(I);
+
+  // If the target has ISD::FNEG, use it.
+  EVT VT = TLI.getValueType(I->getType());
+  unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(),
+                                  ISD::FNEG, OpReg, OpRegIsKill);
+  if (ResultReg != 0) {
+    UpdateValueMap(I, ResultReg);
+    return true;
+  }
+
+  // Bitcast the value to integer, twiddle the sign bit with xor,
+  // and then bitcast it back to floating-point.
+  if (VT.getSizeInBits() > 64) return false;
+  EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits());
+  if (!TLI.isTypeLegal(IntVT))
+    return false;
+
+  unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
+                               ISD::BITCAST, OpReg, OpRegIsKill);
+  if (IntReg == 0)
+    return false;
+
+  unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR,
+                                       IntReg, /*Kill=*/true,
+                                       UINT64_C(1) << (VT.getSizeInBits()-1),
+                                       IntVT.getSimpleVT());
+  if (IntResultReg == 0)
+    return false;
+
+  ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
+                         ISD::BITCAST, IntResultReg, /*Kill=*/true);
+  if (ResultReg == 0)
+    return false;
+
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool
+FastISel::SelectOperator(const User *I, unsigned Opcode) {
+  switch (Opcode) {
+  case Instruction::Add:
+    return SelectBinaryOp(I, ISD::ADD);
+  case Instruction::FAdd:
+    return SelectBinaryOp(I, ISD::FADD);
+  case Instruction::Sub:
+    return SelectBinaryOp(I, ISD::SUB);
+  case Instruction::FSub:
+    // FNeg is currently represented in LLVM IR as a special case of FSub.
+    if (BinaryOperator::isFNeg(I))
+      return SelectFNeg(I);
+    return SelectBinaryOp(I, ISD::FSUB);
+  case Instruction::Mul:
+    return SelectBinaryOp(I, ISD::MUL);
+  case Instruction::FMul:
+    return SelectBinaryOp(I, ISD::FMUL);
+  case Instruction::SDiv:
+    return SelectBinaryOp(I, ISD::SDIV);
+  case Instruction::UDiv:
+    return SelectBinaryOp(I, ISD::UDIV);
+  case Instruction::FDiv:
+    return SelectBinaryOp(I, ISD::FDIV);
+  case Instruction::SRem:
+    return SelectBinaryOp(I, ISD::SREM);
+  case Instruction::URem:
+    return SelectBinaryOp(I, ISD::UREM);
+  case Instruction::FRem:
+    return SelectBinaryOp(I, ISD::FREM);
+  case Instruction::Shl:
+    return SelectBinaryOp(I, ISD::SHL);
+  case Instruction::LShr:
+    return SelectBinaryOp(I, ISD::SRL);
+  case Instruction::AShr:
+    return SelectBinaryOp(I, ISD::SRA);
+  case Instruction::And:
+    return SelectBinaryOp(I, ISD::AND);
+  case Instruction::Or:
+    return SelectBinaryOp(I, ISD::OR);
+  case Instruction::Xor:
+    return SelectBinaryOp(I, ISD::XOR);
+
+  case Instruction::GetElementPtr:
+    return SelectGetElementPtr(I);
+
+  case Instruction::Br: {
+    const BranchInst *BI = cast<BranchInst>(I);
+
+    if (BI->isUnconditional()) {
+      const BasicBlock *LLVMSucc = BI->getSuccessor(0);
+      MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc];
+      FastEmitBranch(MSucc, BI->getDebugLoc());
+      return true;
+    }
+
+    // Conditional branches are not handed yet.
+    // Halt "fast" selection and bail.
+    return false;
+  }
+
+  case Instruction::Unreachable:
+    // Nothing to emit.
+    return true;
+
+  case Instruction::Alloca:
+    // FunctionLowering has the static-sized case covered.
+    if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I)))
+      return true;
+
+    // Dynamic-sized alloca is not handled yet.
+    return false;
+
+  case Instruction::Call:
+    return SelectCall(I);
+
+  case Instruction::BitCast:
+    return SelectBitCast(I);
+
+  case Instruction::FPToSI:
+    return SelectCast(I, ISD::FP_TO_SINT);
+  case Instruction::ZExt:
+    return SelectCast(I, ISD::ZERO_EXTEND);
+  case Instruction::SExt:
+    return SelectCast(I, ISD::SIGN_EXTEND);
+  case Instruction::Trunc:
+    return SelectCast(I, ISD::TRUNCATE);
+  case Instruction::SIToFP:
+    return SelectCast(I, ISD::SINT_TO_FP);
+
+  case Instruction::IntToPtr: // Deliberate fall-through.
+  case Instruction::PtrToInt: {
+    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+    EVT DstVT = TLI.getValueType(I->getType());
+    if (DstVT.bitsGT(SrcVT))
+      return SelectCast(I, ISD::ZERO_EXTEND);
+    if (DstVT.bitsLT(SrcVT))
+      return SelectCast(I, ISD::TRUNCATE);
+    unsigned Reg = getRegForValue(I->getOperand(0));
+    if (Reg == 0) return false;
+    UpdateValueMap(I, Reg);
+    return true;
+  }
+
+  case Instruction::PHI:
+    llvm_unreachable("FastISel shouldn't visit PHI nodes!");
+
+  default:
+    // Unhandled instruction. Halt "fast" selection and bail.
+    return false;
+  }
+}
+
+FastISel::FastISel(FunctionLoweringInfo &funcInfo)
+  : FuncInfo(funcInfo),
+    MRI(FuncInfo.MF->getRegInfo()),
+    MFI(*FuncInfo.MF->getFrameInfo()),
+    MCP(*FuncInfo.MF->getConstantPool()),
+    TM(FuncInfo.MF->getTarget()),
+    TD(*TM.getTargetData()),
+    TII(*TM.getInstrInfo()),
+    TLI(*TM.getTargetLowering()),
+    TRI(*TM.getRegisterInfo()) {
+}
+
+FastISel::~FastISel() {}
+
+unsigned FastISel::FastEmit_(MVT, MVT,
+                             unsigned) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_r(MVT, MVT,
+                              unsigned,
+                              unsigned /*Op0*/, bool /*Op0IsKill*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_rr(MVT, MVT,
+                               unsigned,
+                               unsigned /*Op0*/, bool /*Op0IsKill*/,
+                               unsigned /*Op1*/, bool /*Op1IsKill*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_f(MVT, MVT,
+                              unsigned, const ConstantFP * /*FPImm*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_ri(MVT, MVT,
+                               unsigned,
+                               unsigned /*Op0*/, bool /*Op0IsKill*/,
+                               uint64_t /*Imm*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_rf(MVT, MVT,
+                               unsigned,
+                               unsigned /*Op0*/, bool /*Op0IsKill*/,
+                               const ConstantFP * /*FPImm*/) {
+  return 0;
+}
+
+unsigned FastISel::FastEmit_rri(MVT, MVT,
+                                unsigned,
+                                unsigned /*Op0*/, bool /*Op0IsKill*/,
+                                unsigned /*Op1*/, bool /*Op1IsKill*/,
+                                uint64_t /*Imm*/) {
+  return 0;
+}
+
+/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries
+/// to emit an instruction with an immediate operand using FastEmit_ri.
+/// If that fails, it materializes the immediate into a register and try
+/// FastEmit_rr instead.
+unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
+                                unsigned Op0, bool Op0IsKill,
+                                uint64_t Imm, MVT ImmType) {
+  // First check if immediate type is legal. If not, we can't use the ri form.
+  unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
+  if (ResultReg != 0)
+    return ResultReg;
+  unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
+  if (MaterialReg == 0)
+    return 0;
+  return FastEmit_rr(VT, VT, Opcode,
+                     Op0, Op0IsKill,
+                     MaterialReg, /*Kill=*/true);
+}
+
+/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries
+/// to emit an instruction with a floating-point immediate operand using
+/// FastEmit_rf. If that fails, it materializes the immediate into a register
+/// and try FastEmit_rr instead.
+unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
+                                unsigned Op0, bool Op0IsKill,
+                                const ConstantFP *FPImm, MVT ImmType) {
+  // First check if immediate type is legal. If not, we can't use the rf form.
+  unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm);
+  if (ResultReg != 0)
+    return ResultReg;
+
+  // Materialize the constant in a register.
+  unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm);
+  if (MaterialReg == 0) {
+    // If the target doesn't have a way to directly enter a floating-point
+    // value into a register, use an alternate approach.
+    // TODO: The current approach only supports floating-point constants
+    // that can be constructed by conversion from integer values. This should
+    // be replaced by code that creates a load from a constant-pool entry,
+    // which will require some target-specific work.
+    const APFloat &Flt = FPImm->getValueAPF();
+    EVT IntVT = TLI.getPointerTy();
+
+    uint64_t x[2];
+    uint32_t IntBitWidth = IntVT.getSizeInBits();
+    bool isExact;
+    (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true,
+                             APFloat::rmTowardZero, &isExact);
+    if (!isExact)
+      return 0;
+    APInt IntVal(IntBitWidth, 2, x);
+
+    unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(),
+                                     ISD::Constant, IntVal.getZExtValue());
+    if (IntegerReg == 0)
+      return 0;
+    MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT,
+                             ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true);
+    if (MaterialReg == 0)
+      return 0;
+  }
+  return FastEmit_rr(VT, VT, Opcode,
+                     Op0, Op0IsKill,
+                     MaterialReg, /*Kill=*/true);
+}
+
+unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
+  return MRI.createVirtualRegister(RC);
+}
+
+unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
+                                 const TargetRegisterClass* RC) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg);
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+                                  const TargetRegisterClass *RC,
+                                  unsigned Op0, bool Op0IsKill) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+                                   const TargetRegisterClass *RC,
+                                   unsigned Op0, bool Op0IsKill,
+                                   unsigned Op1, bool Op1IsKill) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+                                   const TargetRegisterClass *RC,
+                                   unsigned Op0, bool Op0IsKill,
+                                   uint64_t Imm) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addImm(Imm);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addImm(Imm);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+                                   const TargetRegisterClass *RC,
+                                   unsigned Op0, bool Op0IsKill,
+                                   const ConstantFP *FPImm) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addFPImm(FPImm);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addFPImm(FPImm);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+                                    const TargetRegisterClass *RC,
+                                    unsigned Op0, bool Op0IsKill,
+                                    unsigned Op1, bool Op1IsKill,
+                                    uint64_t Imm) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill)
+      .addImm(Imm);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+      .addReg(Op0, Op0IsKill * RegState::Kill)
+      .addReg(Op1, Op1IsKill * RegState::Kill)
+      .addImm(Imm);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+                                  const TargetRegisterClass *RC,
+                                  uint64_t Imm) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
+unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
+                                              unsigned Op0, bool Op0IsKill,
+                                              uint32_t Idx) {
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+  assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+         "Cannot yet extract from physregs");
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+          DL, TII.get(TargetOpcode::COPY), ResultReg)
+    .addReg(Op0, getKillRegState(Op0IsKill), Idx);
+  return ResultReg;
+}
+
+/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
+/// with all but the least significant bit set to zero.
+unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
+  return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
+}
+
+/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
+/// Emit code to ensure constants are copied into registers when needed.
+/// Remember the virtual registers that need to be added to the Machine PHI
+/// nodes as input.  We cannot just directly add them, because expansion
+/// might result in multiple MBB's for one BB.  As such, the start of the
+/// BB might correspond to a different MBB than the end.
+bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+  const TerminatorInst *TI = LLVMBB->getTerminator();
+
+  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+  unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
+
+  // Check successor nodes' PHI nodes that expect a constant to be available
+  // from this block.
+  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+    const BasicBlock *SuccBB = TI->getSuccessor(succ);
+    if (!isa<PHINode>(SuccBB->begin())) continue;
+    MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+    // If this terminator has multiple identical successors (common for
+    // switches), only handle each succ once.
+    if (!SuccsHandled.insert(SuccMBB)) continue;
+
+    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+    // At this point we know that there is a 1-1 correspondence between LLVM PHI
+    // nodes and Machine PHI nodes, but the incoming operands have not been
+    // emitted yet.
+    for (BasicBlock::const_iterator I = SuccBB->begin();
+         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+
+      // Ignore dead phi's.
+      if (PN->use_empty()) continue;
+
+      // Only handle legal types. Two interesting things to note here. First,
+      // by bailing out early, we may leave behind some dead instructions,
+      // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
+      // own moves. Second, this check is necessary becuase FastISel doesn't
+      // use CreateRegs to create registers, so it always creates
+      // exactly one register for each non-void instruction.
+      EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
+      if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
+        // Promote MVT::i1.
+        if (VT == MVT::i1)
+          VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
+        else {
+          FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+          return false;
+        }
+      }
+
+      const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+      // Set the DebugLoc for the copy. Prefer the location of the operand
+      // if there is one; use the location of the PHI otherwise.
+      DL = PN->getDebugLoc();
+      if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp))
+        DL = Inst->getDebugLoc();
+
+      unsigned Reg = getRegForValue(PHIOp);
+      if (Reg == 0) {
+        FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+        return false;
+      }
+      FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+      DL = DebugLoc();
+    }
+  }
+
+  return true;
+}
diff --git a/final/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/final/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
new file mode 100644
index 00000000000..d8a5770d36c
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -0,0 +1,477 @@
+//===-- FunctionLoweringInfo.cpp ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating functions from LLVM IR into
+// Machine IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "function-lowering-info"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by
+/// PHI nodes or outside of the basic block that defines it, or used by a
+/// switch or atomic instruction, which may expand to multiple basic blocks.
+static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
+  if (I->use_empty()) return false;
+  if (isa<PHINode>(I)) return true;
+  const BasicBlock *BB = I->getParent();
+  for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end();
+        UI != E; ++UI) {
+    const User *U = *UI;
+    if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U))
+      return true;
+  }
+  return false;
+}
+
+/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
+/// entry block, return true.  This includes arguments used by switches, since
+/// the switch may expand into multiple basic blocks.
+static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) {
+  // With FastISel active, we may be splitting blocks, so force creation
+  // of virtual registers for all non-dead arguments.
+  if (EnableFastISel)
+    return A->use_empty();
+
+  const BasicBlock *Entry = A->getParent()->begin();
+  for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
+       UI != E; ++UI) {
+    const User *U = *UI;
+    if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
+      return false;  // Use not in entry block.
+  }
+  return true;
+}
+
+FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli)
+  : TLI(tli) {
+}
+
+void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
+  Fn = &fn;
+  MF = &mf;
+  RegInfo = &MF->getRegInfo();
+
+  // Check whether the function can return without sret-demotion.
+  SmallVector<ISD::OutputArg, 4> Outs;
+  GetReturnInfo(Fn->getReturnType(),
+                Fn->getAttributes().getRetAttributes(), Outs, TLI);
+  CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), Fn->isVarArg(),
+                                      Outs, Fn->getContext());
+
+  // Create a vreg for each argument register that is not dead and is used
+  // outside of the entry block for the function.
+  for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
+       AI != E; ++AI)
+    if (!isOnlyUsedInEntryBlock(AI, EnableFastISel))
+      InitializeRegForValue(AI);
+
+  // Initialize the mapping of values to registers.  This is only set up for
+  // instruction values that are used outside of the block that defines
+  // them.
+  Function::const_iterator BB = Fn->begin(), EB = Fn->end();
+  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) {
+        const Type *Ty = AI->getAllocatedType();
+        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+        unsigned Align =
+          std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+                   AI->getAlignment());
+
+        TySize *= CUI->getZExtValue();   // Get total allocated size.
+        if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
+
+        // The object may need to be placed onto the stack near the stack
+        // protector if one exists. Determine here if this object is a suitable
+        // candidate. I.e., it would trigger the creation of a stack protector.
+        bool MayNeedSP =
+          (AI->isArrayAllocation() ||
+           (TySize > 8 && isa<ArrayType>(Ty) &&
+            cast<ArrayType>(Ty)->getElementType()->isIntegerTy(8)));
+        StaticAllocaMap[AI] =
+          MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP);
+      }
+
+  for (; BB != EB; ++BB)
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      // Mark values used outside their block as exported, by allocating
+      // a virtual register for them.
+      if (isUsedOutsideOfDefiningBlock(I))
+        if (!isa<AllocaInst>(I) ||
+            !StaticAllocaMap.count(cast<AllocaInst>(I)))
+          InitializeRegForValue(I);
+
+      // Collect llvm.dbg.declare information. This is done now instead of
+      // during the initial isel pass through the IR so that it is done
+      // in a predictable order.
+      if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
+        MachineModuleInfo &MMI = MF->getMMI();
+        if (MMI.hasDebugInfo() &&
+            DIVariable(DI->getVariable()).Verify() &&
+            !DI->getDebugLoc().isUnknown()) {
+          // Don't handle byval struct arguments or VLAs, for example.
+          // Non-byval arguments are handled here (they refer to the stack
+          // temporary alloca at this point).
+          const Value *Address = DI->getAddress();
+          if (Address) {
+            if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+              Address = BCI->getOperand(0);
+            if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+              DenseMap<const AllocaInst *, int>::iterator SI =
+                StaticAllocaMap.find(AI);
+              if (SI != StaticAllocaMap.end()) { // Check for VLAs.
+                int FI = SI->second;
+                MMI.setVariableDbgInfo(DI->getVariable(),
+                                       FI, DI->getDebugLoc());
+              }
+            }
+          }
+        }
+      }
+    }
+
+  // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
+  // also creates the initial PHI MachineInstrs, though none of the input
+  // operands are populated.
+  for (BB = Fn->begin(); BB != EB; ++BB) {
+    MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB);
+    MBBMap[BB] = MBB;
+    MF->push_back(MBB);
+
+    // Transfer the address-taken flag. This is necessary because there could
+    // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
+    // the first one should be marked.
+    if (BB->hasAddressTaken())
+      MBB->setHasAddressTaken();
+
+    // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
+    // appropriate.
+    for (BasicBlock::const_iterator I = BB->begin();
+         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+      if (PN->use_empty()) continue;
+
+      DebugLoc DL = PN->getDebugLoc();
+      unsigned PHIReg = ValueMap[PN];
+      assert(PHIReg && "PHI node does not have an assigned virtual register!");
+
+      SmallVector<EVT, 4> ValueVTs;
+      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+        EVT VT = ValueVTs[vti];
+        unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT);
+        const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+        for (unsigned i = 0; i != NumRegisters; ++i)
+          BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i);
+        PHIReg += NumRegisters;
+      }
+    }
+  }
+
+  // Mark landing pad blocks.
+  for (BB = Fn->begin(); BB != EB; ++BB)
+    if (const InvokeInst *Invoke = dyn_cast<InvokeInst>(BB->getTerminator()))
+      MBBMap[Invoke->getSuccessor(1)]->setIsLandingPad();
+}
+
+/// clear - Clear out all the function-specific state. This returns this
+/// FunctionLoweringInfo to an empty state, ready to be used for a
+/// different function.
+void FunctionLoweringInfo::clear() {
+  assert(CatchInfoFound.size() == CatchInfoLost.size() &&
+         "Not all catch info was assigned to a landing pad!");
+
+  MBBMap.clear();
+  ValueMap.clear();
+  StaticAllocaMap.clear();
+#ifndef NDEBUG
+  CatchInfoLost.clear();
+  CatchInfoFound.clear();
+#endif
+  LiveOutRegInfo.clear();
+  VisitedBBs.clear();
+  ArgDbgValues.clear();
+  ByValArgFrameIndexMap.clear();
+  RegFixups.clear();
+}
+
+/// CreateReg - Allocate a single virtual register for the given type.
+unsigned FunctionLoweringInfo::CreateReg(EVT VT) {
+  return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
+}
+
+/// CreateRegs - Allocate the appropriate number of virtual registers of
+/// the correctly promoted or expanded types.  Assign these registers
+/// consecutive vreg numbers and return the first assigned number.
+///
+/// In the case that the given value has struct or array type, this function
+/// will assign registers for each member or element.
+///
+unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) {
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, Ty, ValueVTs);
+
+  unsigned FirstReg = 0;
+  for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    EVT ValueVT = ValueVTs[Value];
+    EVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT);
+
+    unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      unsigned R = CreateReg(RegisterVT);
+      if (!FirstReg) FirstReg = R;
+    }
+  }
+  return FirstReg;
+}
+
+/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
+/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
+/// the register's LiveOutInfo is for a smaller bit width, it is extended to
+/// the larger bit width by zero extension. The bit width must be no smaller
+/// than the LiveOutInfo's existing bit width.
+const FunctionLoweringInfo::LiveOutInfo *
+FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
+  if (!LiveOutRegInfo.inBounds(Reg))
+    return NULL;
+
+  LiveOutInfo *LOI = &LiveOutRegInfo[Reg];
+  if (!LOI->IsValid)
+    return NULL;
+
+  if (BitWidth > LOI->KnownZero.getBitWidth()) {
+    LOI->NumSignBits = 1;
+    LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth);
+    LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth);
+  }
+
+  return LOI;
+}
+
+/// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination
+/// register based on the LiveOutInfo of its operands.
+void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
+  const Type *Ty = PN->getType();
+  if (!Ty->isIntegerTy() || Ty->isVectorTy())
+    return;
+
+  SmallVector<EVT, 1> ValueVTs;
+  ComputeValueVTs(TLI, Ty, ValueVTs);
+  assert(ValueVTs.size() == 1 &&
+         "PHIs with non-vector integer types should have a single VT.");
+  EVT IntVT = ValueVTs[0];
+
+  if (TLI.getNumRegisters(PN->getContext(), IntVT) != 1)
+    return;
+  IntVT = TLI.getTypeToTransformTo(PN->getContext(), IntVT);
+  unsigned BitWidth = IntVT.getSizeInBits();
+
+  unsigned DestReg = ValueMap[PN];
+  if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+    return;
+  LiveOutRegInfo.grow(DestReg);
+  LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
+
+  Value *V = PN->getIncomingValue(0);
+  if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+    DestLOI.NumSignBits = 1;
+    APInt Zero(BitWidth, 0);
+    DestLOI.KnownZero = Zero;
+    DestLOI.KnownOne = Zero;
+    return;
+  }
+
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+    DestLOI.NumSignBits = Val.getNumSignBits();
+    DestLOI.KnownZero = ~Val;
+    DestLOI.KnownOne = Val;
+  } else {
+    assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
+                                "CopyToReg node was created.");
+    unsigned SrcReg = ValueMap[V];
+    if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+      DestLOI.IsValid = false;
+      return;
+    }
+    const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+    if (!SrcLOI) {
+      DestLOI.IsValid = false;
+      return;
+    }
+    DestLOI = *SrcLOI;
+  }
+
+  assert(DestLOI.KnownZero.getBitWidth() == BitWidth &&
+         DestLOI.KnownOne.getBitWidth() == BitWidth &&
+         "Masks should have the same bit width as the type.");
+
+  for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *V = PN->getIncomingValue(i);
+    if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) {
+      DestLOI.NumSignBits = 1;
+      APInt Zero(BitWidth, 0);
+      DestLOI.KnownZero = Zero;
+      DestLOI.KnownOne = Zero;
+      return;      
+    }
+
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      APInt Val = CI->getValue().zextOrTrunc(BitWidth);
+      DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits());
+      DestLOI.KnownZero &= ~Val;
+      DestLOI.KnownOne &= Val;
+      continue;
+    }
+
+    assert(ValueMap.count(V) && "V should have been placed in ValueMap when "
+                                "its CopyToReg node was created.");
+    unsigned SrcReg = ValueMap[V];
+    if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+      DestLOI.IsValid = false;
+      return;
+    }
+    const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth);
+    if (!SrcLOI) {
+      DestLOI.IsValid = false;
+      return;
+    }
+    DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits);
+    DestLOI.KnownZero &= SrcLOI->KnownZero;
+    DestLOI.KnownOne &= SrcLOI->KnownOne;
+  }
+}
+
+/// setByValArgumentFrameIndex - Record frame index for the byval
+/// argument. This overrides previous frame index entry for this argument,
+/// if any.
+void FunctionLoweringInfo::setByValArgumentFrameIndex(const Argument *A, 
+                                                      int FI) {
+  assert (A->hasByValAttr() && "Argument does not have byval attribute!");
+  ByValArgFrameIndexMap[A] = FI;
+}
+  
+/// getByValArgumentFrameIndex - Get frame index for the byval argument.
+/// If the argument does not have any assigned frame index then 0 is
+/// returned.
+int FunctionLoweringInfo::getByValArgumentFrameIndex(const Argument *A) {
+  assert (A->hasByValAttr() && "Argument does not have byval attribute!");
+  DenseMap<const Argument *, int>::iterator I = 
+    ByValArgFrameIndexMap.find(A);
+  if (I != ByValArgFrameIndexMap.end())
+    return I->second;
+  DEBUG(dbgs() << "Argument does not have assigned frame index!");
+  return 0;
+}
+
+/// AddCatchInfo - Extract the personality and type infos from an eh.selector
+/// call, and add them to the specified machine basic block.
+void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
+                        MachineBasicBlock *MBB) {
+  // Inform the MachineModuleInfo of the personality for this landing pad.
+  const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1));
+  assert(CE->getOpcode() == Instruction::BitCast &&
+         isa<Function>(CE->getOperand(0)) &&
+         "Personality should be a function");
+  MMI->addPersonality(MBB, cast<Function>(CE->getOperand(0)));
+
+  // Gather all the type infos for this landing pad and pass them along to
+  // MachineModuleInfo.
+  std::vector<const GlobalVariable *> TyInfo;
+  unsigned N = I.getNumArgOperands();
+
+  for (unsigned i = N - 1; i > 1; --i) {
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) {
+      unsigned FilterLength = CI->getZExtValue();
+      unsigned FirstCatch = i + FilterLength + !FilterLength;
+      assert(FirstCatch <= N && "Invalid filter length");
+
+      if (FirstCatch < N) {
+        TyInfo.reserve(N - FirstCatch);
+        for (unsigned j = FirstCatch; j < N; ++j)
+          TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+        MMI->addCatchTypeInfo(MBB, TyInfo);
+        TyInfo.clear();
+      }
+
+      if (!FilterLength) {
+        // Cleanup.
+        MMI->addCleanup(MBB);
+      } else {
+        // Filter.
+        TyInfo.reserve(FilterLength - 1);
+        for (unsigned j = i + 1; j < FirstCatch; ++j)
+          TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+        MMI->addFilterTypeInfo(MBB, TyInfo);
+        TyInfo.clear();
+      }
+
+      N = i;
+    }
+  }
+
+  if (N > 2) {
+    TyInfo.reserve(N - 2);
+    for (unsigned j = 2; j < N; ++j)
+      TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
+    MMI->addCatchTypeInfo(MBB, TyInfo);
+  }
+}
+
+void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad,
+                         MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) {
+  SmallPtrSet<const BasicBlock*, 4> Visited;
+
+  // The 'eh.selector' call may not be in the direct successor of a basic block,
+  // but could be several successors deeper. If we don't find it, try going one
+  // level further. <rdar://problem/8824861>
+  while (Visited.insert(SuccBB)) {
+    for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end();
+         I != E; ++I)
+      if (const EHSelectorInst *EHSel = dyn_cast<EHSelectorInst>(I)) {
+        // Apply the catch info to LPad.
+        AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]);
+#ifndef NDEBUG
+        if (!FLI.MBBMap[SuccBB]->isLandingPad())
+          FLI.CatchInfoFound.insert(EHSel);
+#endif
+        return;
+      }
+
+    const BranchInst *Br = dyn_cast<BranchInst>(SuccBB->getTerminator());
+    if (Br && Br->isUnconditional())
+      SuccBB = Br->getSuccessor(0);
+    else
+      break;
+  }
+}
diff --git a/final/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/final/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
new file mode 100644
index 00000000000..e309defba20
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -0,0 +1,897 @@
+//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instr-emitter"
+#include "InstrEmitter.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+/// CountResults - The results of target nodes have register or immediate
+/// operands first, then an optional chain, and optional glue operands (which do
+/// not go into the resulting MachineInstr).
+unsigned InstrEmitter::CountResults(SDNode *Node) {
+  unsigned N = Node->getNumValues();
+  while (N && Node->getValueType(N - 1) == MVT::Glue)
+    --N;
+  if (N && Node->getValueType(N - 1) == MVT::Other)
+    --N;    // Skip over chain result.
+  return N;
+}
+
+/// CountOperands - The inputs to target nodes have any actual inputs first,
+/// followed by an optional chain operand, then an optional glue operand.
+/// Compute the number of actual operands that will go into the resulting
+/// MachineInstr.
+unsigned InstrEmitter::CountOperands(SDNode *Node) {
+  unsigned N = Node->getNumOperands();
+  while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
+    --N;
+  if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
+    --N; // Ignore chain if it exists.
+  return N;
+}
+
+/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+/// implicit physical register output.
+void InstrEmitter::
+EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
+                unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
+  unsigned VRBase = 0;
+  if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+    // Just use the input register directly!
+    SDValue Op(Node, ResNo);
+    if (IsClone)
+      VRBaseMap.erase(Op);
+    bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
+    (void)isNew; // Silence compiler warning.
+    assert(isNew && "Node emitted out of order - early");
+    return;
+  }
+
+  // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+  // the CopyToReg'd destination register instead of creating a new vreg.
+  bool MatchReg = true;
+  const TargetRegisterClass *UseRC = NULL;
+  if (!IsClone && !IsCloned)
+    for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+         UI != E; ++UI) {
+      SDNode *User = *UI;
+      bool Match = true;
+      if (User->getOpcode() == ISD::CopyToReg && 
+          User->getOperand(2).getNode() == Node &&
+          User->getOperand(2).getResNo() == ResNo) {
+        unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+        if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+          VRBase = DestReg;
+          Match = false;
+        } else if (DestReg != SrcReg)
+          Match = false;
+      } else {
+        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+          SDValue Op = User->getOperand(i);
+          if (Op.getNode() != Node || Op.getResNo() != ResNo)
+            continue;
+          EVT VT = Node->getValueType(Op.getResNo());
+          if (VT == MVT::Other || VT == MVT::Glue)
+            continue;
+          Match = false;
+          if (User->isMachineOpcode()) {
+            const TargetInstrDesc &II = TII->get(User->getMachineOpcode());
+            const TargetRegisterClass *RC = 0;
+            if (i+II.getNumDefs() < II.getNumOperands())
+              RC = II.OpInfo[i+II.getNumDefs()].getRegClass(TRI);
+            if (!UseRC)
+              UseRC = RC;
+            else if (RC) {
+              const TargetRegisterClass *ComRC = getCommonSubClass(UseRC, RC);
+              // If multiple uses expect disjoint register classes, we emit
+              // copies in AddRegisterOperand.
+              if (ComRC)
+                UseRC = ComRC;
+            }
+          }
+        }
+      }
+      MatchReg &= Match;
+      if (VRBase)
+        break;
+    }
+
+  EVT VT = Node->getValueType(ResNo);
+  const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
+  SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
+  
+  // Figure out the register class to create for the destreg.
+  if (VRBase) {
+    DstRC = MRI->getRegClass(VRBase);
+  } else if (UseRC) {
+    assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!");
+    DstRC = UseRC;
+  } else {
+    DstRC = TLI->getRegClassFor(VT);
+  }
+    
+  // If all uses are reading from the src physical register and copying the
+  // register is either impossible or very expensive, then don't create a copy.
+  if (MatchReg && SrcRC->getCopyCost() < 0) {
+    VRBase = SrcReg;
+  } else {
+    // Create the reg, emit the copy.
+    VRBase = MRI->createVirtualRegister(DstRC);
+    BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+            VRBase).addReg(SrcReg);
+  }
+
+  SDValue Op(Node, ResNo);
+  if (IsClone)
+    VRBaseMap.erase(Op);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+  (void)isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// getDstOfCopyToRegUse - If the only use of the specified result number of
+/// node is a CopyToReg, return its destination register. Return 0 otherwise.
+unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
+                                                unsigned ResNo) const {
+  if (!Node->hasOneUse())
+    return 0;
+
+  SDNode *User = *Node->use_begin();
+  if (User->getOpcode() == ISD::CopyToReg && 
+      User->getOperand(2).getNode() == Node &&
+      User->getOperand(2).getResNo() == ResNo) {
+    unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      return Reg;
+  }
+  return 0;
+}
+
+void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+                                       const TargetInstrDesc &II,
+                                       bool IsClone, bool IsCloned,
+                                       DenseMap<SDValue, unsigned> &VRBaseMap) {
+  assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
+         "IMPLICIT_DEF should have been handled as a special case elsewhere!");
+
+  for (unsigned i = 0; i < II.getNumDefs(); ++i) {
+    // If the specific node value is only used by a CopyToReg and the dest reg
+    // is a vreg in the same register class, use the CopyToReg'd destination
+    // register instead of creating a new vreg.
+    unsigned VRBase = 0;
+    const TargetRegisterClass *RC = II.OpInfo[i].getRegClass(TRI);
+    if (II.OpInfo[i].isOptionalDef()) {
+      // Optional def must be a physical register.
+      unsigned NumResults = CountResults(Node);
+      VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
+      assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
+      MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    }
+
+    if (!VRBase && !IsClone && !IsCloned)
+      for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+           UI != E; ++UI) {
+        SDNode *User = *UI;
+        if (User->getOpcode() == ISD::CopyToReg && 
+            User->getOperand(2).getNode() == Node &&
+            User->getOperand(2).getResNo() == i) {
+          unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+          if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+            const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
+            if (RegRC == RC) {
+              VRBase = Reg;
+              MI->addOperand(MachineOperand::CreateReg(Reg, true));
+              break;
+            }
+          }
+        }
+      }
+
+    // Create the result registers for this node and add the result regs to
+    // the machine instruction.
+    if (VRBase == 0) {
+      assert(RC && "Isn't a register operand!");
+      VRBase = MRI->createVirtualRegister(RC);
+      MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    }
+
+    SDValue Op(Node, i);
+    if (IsClone)
+      VRBaseMap.erase(Op);
+    bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+    (void)isNew; // Silence compiler warning.
+    assert(isNew && "Node emitted out of order - early");
+  }
+}
+
+/// getVR - Return the virtual register corresponding to the specified result
+/// of the specified node.
+unsigned InstrEmitter::getVR(SDValue Op,
+                             DenseMap<SDValue, unsigned> &VRBaseMap) {
+  if (Op.isMachineOpcode() &&
+      Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+    // Add an IMPLICIT_DEF instruction before every use.
+    unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
+    // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc
+    // does not include operand register class info.
+    if (!VReg) {
+      const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());
+      VReg = MRI->createVirtualRegister(RC);
+    }
+    BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
+            TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+    return VReg;
+  }
+
+  DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+  assert(I != VRBaseMap.end() && "Node emitted out of order - late");
+  return I->second;
+}
+
+
+/// AddRegisterOperand - Add the specified register as an operand to the
+/// specified machine instr. Insert register copies if the register is
+/// not in the required register class.
+void
+InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
+                                 unsigned IIOpNum,
+                                 const TargetInstrDesc *II,
+                                 DenseMap<SDValue, unsigned> &VRBaseMap,
+                                 bool IsDebug, bool IsClone, bool IsCloned) {
+  assert(Op.getValueType() != MVT::Other &&
+         Op.getValueType() != MVT::Glue &&
+         "Chain and glue operands should occur at end of operand list!");
+  // Get/emit the operand.
+  unsigned VReg = getVR(Op, VRBaseMap);
+  assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
+
+  const TargetInstrDesc &TID = MI->getDesc();
+  bool isOptDef = IIOpNum < TID.getNumOperands() &&
+    TID.OpInfo[IIOpNum].isOptionalDef();
+
+  // If the instruction requires a register in a different class, create
+  // a new virtual register and copy the value into it.
+  if (II) {
+    const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
+    const TargetRegisterClass *DstRC = 0;
+    if (IIOpNum < II->getNumOperands())
+      DstRC = II->OpInfo[IIOpNum].getRegClass(TRI);
+    assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) &&
+           "Don't have operand info for this instruction!");
+    if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) {
+      unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+      BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+              TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
+      VReg = NewVReg;
+    }
+  }
+
+  // If this value has only one use, that use is a kill. This is a
+  // conservative approximation. InstrEmitter does trivial coalescing
+  // with CopyFromReg nodes, so don't emit kill flags for them.
+  // Avoid kill flags on Schedule cloned nodes, since there will be
+  // multiple uses.
+  // Tied operands are never killed, so we need to check that. And that
+  // means we need to determine the index of the operand.
+  bool isKill = Op.hasOneUse() &&
+                Op.getNode()->getOpcode() != ISD::CopyFromReg &&
+                !IsDebug &&
+                !(IsClone || IsCloned);
+  if (isKill) {
+    unsigned Idx = MI->getNumOperands();
+    while (Idx > 0 &&
+           MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit())
+      --Idx;
+    bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1;
+    if (isTied)
+      isKill = false;
+  }
+
+  MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef,
+                                           false/*isImp*/, isKill,
+                                           false/*isDead*/, false/*isUndef*/,
+                                           false/*isEarlyClobber*/,
+                                           0/*SubReg*/, IsDebug));
+}
+
+/// AddOperand - Add the specified operand to the specified machine instr.  II
+/// specifies the instruction information for the node, and IIOpNum is the
+/// operand number (in the II) that we are adding. IIOpNum and II are used for 
+/// assertions only.
+void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
+                              unsigned IIOpNum,
+                              const TargetInstrDesc *II,
+                              DenseMap<SDValue, unsigned> &VRBaseMap,
+                              bool IsDebug, bool IsClone, bool IsCloned) {
+  if (Op.isMachineOpcode()) {
+    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
+                       IsDebug, IsClone, IsCloned);
+  } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateImm(C->getSExtValue()));
+  } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
+    const ConstantFP *CFP = F->getConstantFPValue();
+    MI->addOperand(MachineOperand::CreateFPImm(CFP));
+  } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
+  } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
+                                            TGA->getTargetFlags()));
+  } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock()));
+  } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateFI(FI->getIndex()));
+  } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateJTI(JT->getIndex(),
+                                             JT->getTargetFlags()));
+  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
+    int Offset = CP->getOffset();
+    unsigned Align = CP->getAlignment();
+    const Type *Type = CP->getType();
+    // MachineConstantPool wants an explicit alignment.
+    if (Align == 0) {
+      Align = TM->getTargetData()->getPrefTypeAlignment(Type);
+      if (Align == 0) {
+        // Alignment of vector types.  FIXME!
+        Align = TM->getTargetData()->getTypeAllocSize(Type);
+      }
+    }
+    
+    unsigned Idx;
+    MachineConstantPool *MCP = MF->getConstantPool();
+    if (CP->isMachineConstantPoolEntry())
+      Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align);
+    else
+      Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align);
+    MI->addOperand(MachineOperand::CreateCPI(Idx, Offset,
+                                             CP->getTargetFlags()));
+  } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateES(ES->getSymbol(),
+                                            ES->getTargetFlags()));
+  } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) {
+    MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(),
+                                            BA->getTargetFlags()));
+  } else {
+    assert(Op.getValueType() != MVT::Other &&
+           Op.getValueType() != MVT::Glue &&
+           "Chain and glue operands should occur at end of operand list!");
+    AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
+                       IsDebug, IsClone, IsCloned);
+  }
+}
+
+/// getSuperRegisterRegClass - Returns the register class of a superreg A whose
+/// "SubIdx"'th sub-register class is the specified register class and whose
+/// type matches the specified type.
+static const TargetRegisterClass*
+getSuperRegisterRegClass(const TargetRegisterClass *TRC,
+                         unsigned SubIdx, EVT VT) {
+  // Pick the register class of the superegister for this type
+  for (TargetRegisterInfo::regclass_iterator I = TRC->superregclasses_begin(),
+         E = TRC->superregclasses_end(); I != E; ++I)
+    if ((*I)->hasType(VT) && (*I)->getSubRegisterRegClass(SubIdx) == TRC)
+      return *I;
+  assert(false && "Couldn't find the register class");
+  return 0;
+}
+
+/// EmitSubregNode - Generate machine code for subreg nodes.
+///
+void InstrEmitter::EmitSubregNode(SDNode *Node, 
+                                  DenseMap<SDValue, unsigned> &VRBaseMap,
+                                  bool IsClone, bool IsCloned) {
+  unsigned VRBase = 0;
+  unsigned Opc = Node->getMachineOpcode();
+  
+  // If the node is only used by a CopyToReg and the dest reg is a vreg, use
+  // the CopyToReg'd destination register instead of creating a new vreg.
+  for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
+       UI != E; ++UI) {
+    SDNode *User = *UI;
+    if (User->getOpcode() == ISD::CopyToReg && 
+        User->getOperand(2).getNode() == Node) {
+      unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+        VRBase = DestReg;
+        break;
+      }
+    }
+  }
+  
+  if (Opc == TargetOpcode::EXTRACT_SUBREG) {
+    // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub
+    unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+
+    // Figure out the register class to create for the destreg.
+    unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+    MachineInstr *DefMI = MRI->getVRegDef(VReg);
+    unsigned SrcReg, DstReg, DefSubIdx;
+    if (DefMI &&
+        TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
+        SubIdx == DefSubIdx) {
+      // Optimize these:
+      // r1025 = s/zext r1024, 4
+      // r1026 = extract_subreg r1025, 4
+      // to a copy
+      // r1026 = copy r1024
+      const TargetRegisterClass *TRC = MRI->getRegClass(SrcReg);
+      VRBase = MRI->createVirtualRegister(TRC);
+      BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+              TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
+    } else {
+      const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
+      const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
+      assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
+
+      // Figure out the register class to create for the destreg.
+      // Note that if we're going to directly use an existing register,
+      // it must be precisely the required class, and not a subclass
+      // thereof.
+      if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+        // Create the reg
+        assert(SRC && "Couldn't find source register class");
+        VRBase = MRI->createVirtualRegister(SRC);
+      }
+
+      // Create the extract_subreg machine instruction.
+      MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+                                 TII->get(TargetOpcode::COPY), VRBase);
+
+      // Add source, and subreg index
+      AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
+                 IsClone, IsCloned);
+      assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg())&&
+             "Cannot yet extract from physregs");
+      MI->getOperand(1).setSubReg(SubIdx);
+      MBB->insert(InsertPos, MI);
+    }
+  } else if (Opc == TargetOpcode::INSERT_SUBREG ||
+             Opc == TargetOpcode::SUBREG_TO_REG) {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+    SDValue N2 = Node->getOperand(2);
+    unsigned SubReg = getVR(N1, VRBaseMap);
+    unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+    const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+    const TargetRegisterClass *SRC =
+      getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0));
+
+    // Figure out the register class to create for the destreg.
+    // Note that if we're going to directly use an existing register,
+    // it must be precisely the required class, and not a subclass
+    // thereof.
+    if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+      // Create the reg
+      assert(SRC && "Couldn't find source register class");
+      VRBase = MRI->createVirtualRegister(SRC);
+    }
+
+    // Create the insert_subreg or subreg_to_reg machine instruction.
+    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc));
+    MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+    
+    // If creating a subreg_to_reg, then the first input operand
+    // is an implicit value immediate, otherwise it's a register
+    if (Opc == TargetOpcode::SUBREG_TO_REG) {
+      const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
+      MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue()));
+    } else
+      AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false,
+                 IsClone, IsCloned);
+    // Add the subregster being inserted
+    AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false,
+               IsClone, IsCloned);
+    MI->addOperand(MachineOperand::CreateImm(SubIdx));
+    MBB->insert(InsertPos, MI);
+  } else
+    llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg");
+     
+  SDValue Op(Node, 0);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
+  (void)isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+/// COPY_TO_REGCLASS is just a normal copy, except that the destination
+/// register is constrained to be in a particular register class.
+///
+void
+InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
+                                     DenseMap<SDValue, unsigned> &VRBaseMap) {
+  unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
+
+  // Create the new VReg in the destination class and emit a copy.
+  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
+  const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx);
+  unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+  BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+    NewVReg).addReg(VReg);
+
+  SDValue Op(Node, 0);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+  (void)isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+///
+void InstrEmitter::EmitRegSequence(SDNode *Node,
+                                  DenseMap<SDValue, unsigned> &VRBaseMap,
+                                  bool IsClone, bool IsCloned) {
+  const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0));
+  unsigned NewVReg = MRI->createVirtualRegister(RC);
+  MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+                             TII->get(TargetOpcode::REG_SEQUENCE), NewVReg);
+  unsigned NumOps = Node->getNumOperands();
+  assert((NumOps & 1) == 0 &&
+         "REG_SEQUENCE must have an even number of operands!");
+  const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
+  for (unsigned i = 0; i != NumOps; ++i) {
+    SDValue Op = Node->getOperand(i);
+    if (i & 1) {
+      unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
+      unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
+      const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+      const TargetRegisterClass *SRC =
+        TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
+      if (SRC && SRC != RC) {
+        MRI->setRegClass(NewVReg, SRC);
+        RC = SRC;
+      }
+    }
+    AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
+               IsClone, IsCloned);
+  }
+
+  MBB->insert(InsertPos, MI);
+  SDValue Op(Node, 0);
+  bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
+  (void)isNew; // Silence compiler warning.
+  assert(isNew && "Node emitted out of order - early");
+}
+
+/// EmitDbgValue - Generate machine instruction for a dbg_value node.
+///
+MachineInstr *
+InstrEmitter::EmitDbgValue(SDDbgValue *SD,
+                           DenseMap<SDValue, unsigned> &VRBaseMap) {
+  uint64_t Offset = SD->getOffset();
+  MDNode* MDPtr = SD->getMDPtr();
+  DebugLoc DL = SD->getDebugLoc();
+
+  if (SD->getKind() == SDDbgValue::FRAMEIX) {
+    // Stack address; this needs to be lowered in target-dependent fashion.
+    // EmitTargetCodeForFrameDebugValue is responsible for allocation.
+    unsigned FrameIx = SD->getFrameIx();
+    return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL);
+  }
+  // Otherwise, we're going to create an instruction here.
+  const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+  MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
+  if (SD->getKind() == SDDbgValue::SDNODE) {
+    SDNode *Node = SD->getSDNode();
+    SDValue Op = SDValue(Node, SD->getResNo());
+    // It's possible we replaced this SDNode with other(s) and therefore
+    // didn't generate code for it.  It's better to catch these cases where
+    // they happen and transfer the debug info, but trying to guarantee that
+    // in all cases would be very fragile; this is a safeguard for any
+    // that were missed.
+    DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op);
+    if (I==VRBaseMap.end())
+      MIB.addReg(0U);       // undef
+    else
+      AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
+                 /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
+  } else if (SD->getKind() == SDDbgValue::CONST) {
+    const Value *V = SD->getConst();
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      // FIXME: SDDbgValue constants aren't updated with legalization, so it's 
+      // possible to have i128 constants in them at this point. Dwarf writer
+      // does not handle i128 constants at the moment so, as a crude workaround,
+      // just drop the debug info if this happens.
+      if (!CI->getValue().isSignedIntN(64))
+        MIB.addReg(0U);
+      else
+        MIB.addImm(CI->getSExtValue());
+    } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+      MIB.addFPImm(CF);
+    } else {
+      // Could be an Undef.  In any case insert an Undef so we can see what we
+      // dropped.
+      MIB.addReg(0U);
+    }
+  } else {
+    // Insert an Undef so we can see what we dropped.
+    MIB.addReg(0U);
+  }
+
+  MIB.addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
+/// EmitMachineNode - Generate machine code for a target-specific node and
+/// needed dependencies.
+///
+void InstrEmitter::
+EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+                DenseMap<SDValue, unsigned> &VRBaseMap) {
+  unsigned Opc = Node->getMachineOpcode();
+  
+  // Handle subreg insert/extract specially
+  if (Opc == TargetOpcode::EXTRACT_SUBREG || 
+      Opc == TargetOpcode::INSERT_SUBREG ||
+      Opc == TargetOpcode::SUBREG_TO_REG) {
+    EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
+    return;
+  }
+
+  // Handle COPY_TO_REGCLASS specially.
+  if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
+    EmitCopyToRegClassNode(Node, VRBaseMap);
+    return;
+  }
+
+  // Handle REG_SEQUENCE specially.
+  if (Opc == TargetOpcode::REG_SEQUENCE) {
+    EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned);
+    return;
+  }
+
+  if (Opc == TargetOpcode::IMPLICIT_DEF)
+    // We want a unique VR for each IMPLICIT_DEF use.
+    return;
+  
+  const TargetInstrDesc &II = TII->get(Opc);
+  unsigned NumResults = CountResults(Node);
+  unsigned NodeOperands = CountOperands(Node);
+  bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
+#ifndef NDEBUG
+  unsigned NumMIOperands = NodeOperands + NumResults;
+  if (II.isVariadic())
+    assert(NumMIOperands >= II.getNumOperands() &&
+           "Too few operands for a variadic node!");
+  else
+    assert(NumMIOperands >= II.getNumOperands() &&
+           NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() &&
+           "#operands for dag node doesn't match .td file!");
+#endif
+
+  // Create the new machine instruction.
+  MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
+
+  // The MachineInstr constructor adds implicit-def operands. Scan through
+  // these to determine which are dead.
+  if (MI->getNumOperands() != 0 &&
+      Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
+    // First, collect all used registers.
+    SmallVector<unsigned, 8> UsedRegs;
+    for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
+      if (F->getOpcode() == ISD::CopyFromReg)
+        UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+      else {
+        // Collect declared implicit uses.
+        const TargetInstrDesc &TID = TII->get(F->getMachineOpcode());
+        UsedRegs.append(TID.getImplicitUses(),
+                        TID.getImplicitUses() + TID.getNumImplicitUses());
+        // In addition to declared implicit uses, we must also check for
+        // direct RegisterSDNode operands.
+        for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+          if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+            unsigned Reg = R->getReg();
+            if (TargetRegisterInfo::isPhysicalRegister(Reg))
+              UsedRegs.push_back(Reg);
+          }
+      }
+    // Then mark unused registers as dead.
+    MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
+  }
+  
+  // Add result register values for things that are defined by this
+  // instruction.
+  if (NumResults)
+    CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
+  
+  // Emit all of the actual operands of this instruction, adding them to the
+  // instruction as appropriate.
+  bool HasOptPRefs = II.getNumDefs() > NumResults;
+  assert((!HasOptPRefs || !HasPhysRegOuts) &&
+         "Unable to cope with optional defs and phys regs defs!");
+  unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
+  for (unsigned i = NumSkip; i != NodeOperands; ++i)
+    AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
+               VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);
+
+  // Transfer all of the memory reference descriptions of this instruction.
+  MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
+                 cast<MachineSDNode>(Node)->memoperands_end());
+
+  // Insert the instruction into position in the block. This needs to
+  // happen before any custom inserter hook is called so that the
+  // hook knows where in the block to insert the replacement code.
+  MBB->insert(InsertPos, MI);
+
+  // Additional results must be physical register defs.
+  if (HasPhysRegOuts) {
+    for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
+      unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
+      if (Node->hasAnyUseOfValue(i))
+        EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+      // If there are no uses, mark the register as dead now, so that
+      // MachineLICM/Sink can see that it's dead. Don't do this if the
+      // node has a Glue value, for the benefit of targets still using
+      // Glue for values in physregs.
+      else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
+        MI->addRegisterDead(Reg, TRI);
+    }
+  }
+  
+  // If the instruction has implicit defs and the node doesn't, mark the
+  // implicit def as dead.  If the node has any glue outputs, we don't do this
+  // because we don't know what implicit defs are being used by glued nodes.
+  if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
+    if (const unsigned *IDList = II.getImplicitDefs()) {
+      for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
+           i != e; ++i)
+        MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
+    }
+}
+
+/// EmitSpecialNode - Generate machine code for a target-independent node and
+/// needed dependencies.
+void InstrEmitter::
+EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+                DenseMap<SDValue, unsigned> &VRBaseMap) {
+  switch (Node->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    Node->dump();
+#endif
+    llvm_unreachable("This target-independent node should have been selected!");
+    break;
+  case ISD::EntryToken:
+    llvm_unreachable("EntryToken should have been excluded from the schedule!");
+    break;
+  case ISD::MERGE_VALUES:
+  case ISD::TokenFactor: // fall thru
+    break;
+  case ISD::CopyToReg: {
+    unsigned SrcReg;
+    SDValue SrcVal = Node->getOperand(2);
+    if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
+      SrcReg = R->getReg();
+    else
+      SrcReg = getVR(SrcVal, VRBaseMap);
+      
+    unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+    if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
+      break;
+
+    BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+            DestReg).addReg(SrcReg);
+    break;
+  }
+  case ISD::CopyFromReg: {
+    unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
+    EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
+    break;
+  }
+  case ISD::EH_LABEL: {
+    MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel();
+    BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+            TII->get(TargetOpcode::EH_LABEL)).addSym(S);
+    break;
+  }
+      
+  case ISD::INLINEASM: {
+    unsigned NumOps = Node->getNumOperands();
+    if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+      --NumOps;  // Ignore the glue operand.
+      
+    // Create the inline asm machine instruction.
+    MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+                               TII->get(TargetOpcode::INLINEASM));
+
+    // Add the asm string as an external symbol operand.
+    SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
+    const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
+    MI->addOperand(MachineOperand::CreateES(AsmStr));
+      
+    // Add the HasSideEffect and isAlignStack bits.
+    int64_t ExtraInfo =
+      cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
+                          getZExtValue();
+    MI->addOperand(MachineOperand::CreateImm(ExtraInfo));
+
+    // Add all of the operand registers to the instruction.
+    for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+      unsigned Flags =
+        cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+      unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+        
+      MI->addOperand(MachineOperand::CreateImm(Flags));
+      ++i;  // Skip the ID value.
+        
+      switch (InlineAsm::getKind(Flags)) {
+      default: llvm_unreachable("Bad flags!");
+        case InlineAsm::Kind_RegDef:
+        for (; NumVals; --NumVals, ++i) {
+          unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+          // FIXME: Add dead flags for physical and virtual registers defined.
+          // For now, mark physical register defs as implicit to help fast
+          // regalloc. This makes inline asm look a lot like calls.
+          MI->addOperand(MachineOperand::CreateReg(Reg, true,
+                       /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg)));
+        }
+        break;
+      case InlineAsm::Kind_RegDefEarlyClobber:
+        for (; NumVals; --NumVals, ++i) {
+          unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+          MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true,
+                         /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg),
+                                                   /*isKill=*/ false,
+                                                   /*isDead=*/ false,
+                                                   /*isUndef=*/false,
+                                                   /*isEarlyClobber=*/ true));
+        }
+        break;
+      case InlineAsm::Kind_RegUse:  // Use of register.
+      case InlineAsm::Kind_Imm:  // Immediate.
+      case InlineAsm::Kind_Mem:  // Addressing mode.
+        // The addressing mode has been selected, just add all of the
+        // operands to the machine instruction.
+        for (; NumVals; --NumVals, ++i)
+          AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap,
+                     /*IsDebug=*/false, IsClone, IsCloned);
+        break;
+      }
+    }
+    
+    // Get the mdnode from the asm if it exists and add it to the instruction.
+    SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode);
+    const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
+    if (MD)
+      MI->addOperand(MachineOperand::CreateMetadata(MD));
+    
+    MBB->insert(InsertPos, MI);
+    break;
+  }
+  }
+}
+
+/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+/// at the given position in the given block.
+InstrEmitter::InstrEmitter(MachineBasicBlock *mbb,
+                           MachineBasicBlock::iterator insertpos)
+  : MF(mbb->getParent()),
+    MRI(&MF->getRegInfo()),
+    TM(&MF->getTarget()),
+    TII(TM->getInstrInfo()),
+    TRI(TM->getRegisterInfo()),
+    TLI(TM->getTargetLowering()),
+    MBB(mbb), InsertPos(insertpos) {
+}
diff --git a/final/lib/CodeGen/SelectionDAG/InstrEmitter.h b/final/lib/CodeGen/SelectionDAG/InstrEmitter.h
new file mode 100644
index 00000000000..02c044c3f8f
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -0,0 +1,142 @@
+//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares the Emit routines for the SelectionDAG class, which creates
+// MachineInstrs based on the decisions of the SelectionDAG instruction
+// selection.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTREMITTER_H
+#define INSTREMITTER_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class TargetInstrDesc;
+class SDDbgValue;
+
+class InstrEmitter {
+  MachineFunction *MF;
+  MachineRegisterInfo *MRI;
+  const TargetMachine *TM;
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const TargetLowering *TLI;
+
+  MachineBasicBlock *MBB;
+  MachineBasicBlock::iterator InsertPos;
+
+  /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
+  /// implicit physical register output.
+  void EmitCopyFromReg(SDNode *Node, unsigned ResNo,
+                       bool IsClone, bool IsCloned,
+                       unsigned SrcReg,
+                       DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// getDstOfCopyToRegUse - If the only use of the specified result number of
+  /// node is a CopyToReg, return its destination register. Return 0 otherwise.
+  unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
+                                    unsigned ResNo) const;
+
+  void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
+                              const TargetInstrDesc &II,
+                              bool IsClone, bool IsCloned,
+                              DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// getVR - Return the virtual register corresponding to the specified result
+  /// of the specified node.
+  unsigned getVR(SDValue Op,
+                 DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// AddRegisterOperand - Add the specified register as an operand to the
+  /// specified machine instr. Insert register copies if the register is
+  /// not in the required register class.
+  void AddRegisterOperand(MachineInstr *MI, SDValue Op,
+                          unsigned IIOpNum,
+                          const TargetInstrDesc *II,
+                          DenseMap<SDValue, unsigned> &VRBaseMap,
+                          bool IsDebug, bool IsClone, bool IsCloned);
+
+  /// AddOperand - Add the specified operand to the specified machine instr.  II
+  /// specifies the instruction information for the node, and IIOpNum is the
+  /// operand number (in the II) that we are adding. IIOpNum and II are used for
+  /// assertions only.
+  void AddOperand(MachineInstr *MI, SDValue Op,
+                  unsigned IIOpNum,
+                  const TargetInstrDesc *II,
+                  DenseMap<SDValue, unsigned> &VRBaseMap,
+                  bool IsDebug, bool IsClone, bool IsCloned);
+
+  /// EmitSubregNode - Generate machine code for subreg nodes.
+  ///
+  void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+                      bool IsClone, bool IsCloned);
+
+  /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
+  /// COPY_TO_REGCLASS is just a normal copy, except that the destination
+  /// register is constrained to be in a particular register class.
+  ///
+  void EmitCopyToRegClassNode(SDNode *Node,
+                              DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
+  ///
+  void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+                       bool IsClone, bool IsCloned);
+public:
+  /// CountResults - The results of target nodes have register or immediate
+  /// operands first, then an optional chain, and optional flag operands
+  /// (which do not go into the machine instrs.)
+  static unsigned CountResults(SDNode *Node);
+
+  /// CountOperands - The inputs to target nodes have any actual inputs first,
+  /// followed by an optional chain operand, then flag operands.  Compute
+  /// the number of actual operands that will go into the resulting
+  /// MachineInstr.
+  static unsigned CountOperands(SDNode *Node);
+
+  /// EmitDbgValue - Generate machine instruction for a dbg_value node.
+  ///
+  MachineInstr *EmitDbgValue(SDDbgValue *SD,
+                             DenseMap<SDValue, unsigned> &VRBaseMap);
+
+  /// EmitNode - Generate machine code for a node and needed dependencies.
+  ///
+  void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
+                DenseMap<SDValue, unsigned> &VRBaseMap) {
+    if (Node->isMachineOpcode())
+      EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap);
+    else
+      EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap);
+  }
+
+  /// getBlock - Return the current basic block.
+  MachineBasicBlock *getBlock() { return MBB; }
+
+  /// getInsertPos - Return the current insertion position.
+  MachineBasicBlock::iterator getInsertPos() { return InsertPos; }
+
+  /// InstrEmitter - Construct an InstrEmitter and set it to start inserting
+  /// at the given position in the given block.
+  InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos);
+  
+private:
+  void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+                       DenseMap<SDValue, unsigned> &VRBaseMap);
+  void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+                       DenseMap<SDValue, unsigned> &VRBaseMap);
+};
+
+}
+
+#endif
diff --git a/final/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/final/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
new file mode 100644
index 00000000000..94080a04e39
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -0,0 +1,3577 @@
+//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::Legalize method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and
+/// hacks on it until the target machine can handle it.  This involves
+/// eliminating value sizes the machine cannot handle (promoting small sizes to
+/// large sizes or splitting up large values into small values) as well as
+/// eliminating operations the machine cannot handle.
+///
+/// This code also does a small amount of optimization and recognition of idioms
+/// as part of its processing.  For example, if a target does not support a
+/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
+/// will attempt merge setcc and brc instructions into brcc's.
+///
+namespace {
+class SelectionDAGLegalize {
+  const TargetMachine &TM;
+  const TargetLowering &TLI;
+  SelectionDAG &DAG;
+  CodeGenOpt::Level OptLevel;
+
+  // Libcall insertion helpers.
+
+  /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been
+  /// legalized.  We use this to ensure that calls are properly serialized
+  /// against each other, including inserted libcalls.
+  SDValue LastCALLSEQ_END;
+
+  enum LegalizeAction {
+    Legal,      // The target natively supports this operation.
+    Promote,    // This operation should be executed in a larger type.
+    Expand      // Try to expand this to other ops, otherwise use a libcall.
+  };
+
+  /// ValueTypeActions - This is a bitvector that contains two bits for each
+  /// value type, where the two bits correspond to the LegalizeAction enum.
+  /// This can be queried with "getTypeAction(VT)".
+  TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+  /// LegalizedNodes - For nodes that are of legal width, and that have more
+  /// than one use, this map indicates what regularized operand to use.  This
+  /// allows us to avoid legalizing the same thing more than once.
+  DenseMap<SDValue, SDValue> LegalizedNodes;
+
+  void AddLegalizedOperand(SDValue From, SDValue To) {
+    LegalizedNodes.insert(std::make_pair(From, To));
+    // If someone requests legalization of the new node, return itself.
+    if (From != To)
+      LegalizedNodes.insert(std::make_pair(To, To));
+
+    // Transfer SDDbgValues.
+    DAG.TransferDbgValues(From, To);
+  }
+
+public:
+  SelectionDAGLegalize(SelectionDAG &DAG, CodeGenOpt::Level ol);
+
+  /// getTypeAction - Return how we should legalize values of this type, either
+  /// it is already legal or we need to expand it into multiple registers of
+  /// smaller integer type, or we need to promote it to a larger type.
+  LegalizeAction getTypeAction(EVT VT) const {
+    return (LegalizeAction)ValueTypeActions.getTypeAction(VT);
+  }
+
+  /// isTypeLegal - Return true if this type is legal on this target.
+  ///
+  bool isTypeLegal(EVT VT) const {
+    return getTypeAction(VT) == Legal;
+  }
+
+  void LegalizeDAG();
+
+private:
+  /// LegalizeOp - We know that the specified value has a legal type.
+  /// Recursively ensure that the operands have legal types, then return the
+  /// result.
+  SDValue LegalizeOp(SDValue O);
+
+  SDValue OptimizeFloatStore(StoreSDNode *ST);
+
+  /// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+  /// insertion index for the INSERT_VECTOR_ELT instruction.  In this case, it
+  /// is necessary to spill the vector being inserted into to memory, perform
+  /// the insert there, and then read the result back.
+  SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val,
+                                         SDValue Idx, DebugLoc dl);
+  SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
+                                  SDValue Idx, DebugLoc dl);
+
+  /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+  /// performs the same shuffe in terms of order or result bytes, but on a type
+  /// whose vector element type is narrower than the original shuffle type.
+  /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+  SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
+                                     SDValue N1, SDValue N2,
+                                     SmallVectorImpl<int> &Mask) const;
+
+  bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+                                    SmallPtrSet<SDNode*, 32> &NodesLeadingTo);
+
+  void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
+                             DebugLoc dl);
+
+  SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+  std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+                                                 SDNode *Node, bool isSigned);
+  SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
+                          RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
+                          RTLIB::Libcall Call_PPCF128);
+  SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
+                           RTLIB::Libcall Call_I8,
+                           RTLIB::Libcall Call_I16,
+                           RTLIB::Libcall Call_I32,
+                           RTLIB::Libcall Call_I64,
+                           RTLIB::Libcall Call_I128);
+
+  SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
+  SDValue ExpandBUILD_VECTOR(SDNode *Node);
+  SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
+  void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
+                                SmallVectorImpl<SDValue> &Results);
+  SDValue ExpandFCOPYSIGN(SDNode *Node);
+  SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT,
+                               DebugLoc dl);
+  SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned,
+                                DebugLoc dl);
+  SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned,
+                                DebugLoc dl);
+
+  SDValue ExpandBSWAP(SDValue Op, DebugLoc dl);
+  SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
+
+  SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+  SDValue ExpandInsertToVectorThroughStack(SDValue Op);
+  SDValue ExpandVectorBuildThroughStack(SDNode* Node);
+
+  std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
+  void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+  void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
+};
+}
+
+/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which
+/// performs the same shuffe in terms of order or result bytes, but on a type
+/// whose vector element type is narrower than the original shuffle type.
+/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
+SDValue
+SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT,  DebugLoc dl,
+                                                 SDValue N1, SDValue N2,
+                                             SmallVectorImpl<int> &Mask) const {
+  unsigned NumMaskElts = VT.getVectorNumElements();
+  unsigned NumDestElts = NVT.getVectorNumElements();
+  unsigned NumEltsGrowth = NumDestElts / NumMaskElts;
+
+  assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!");
+
+  if (NumEltsGrowth == 1)
+    return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]);
+
+  SmallVector<int, 8> NewMask;
+  for (unsigned i = 0; i != NumMaskElts; ++i) {
+    int Idx = Mask[i];
+    for (unsigned j = 0; j != NumEltsGrowth; ++j) {
+      if (Idx < 0)
+        NewMask.push_back(-1);
+      else
+        NewMask.push_back(Idx * NumEltsGrowth + j);
+    }
+  }
+  assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?");
+  assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?");
+  return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]);
+}
+
+SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
+                                           CodeGenOpt::Level ol)
+  : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+    DAG(dag), OptLevel(ol),
+    ValueTypeActions(TLI.getValueTypeActions()) {
+  assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+         "Too many value types for ValueTypeActions to hold!");
+}
+
+void SelectionDAGLegalize::LegalizeDAG() {
+  LastCALLSEQ_END = DAG.getEntryNode();
+
+  // The legalize process is inherently a bottom-up recursive process (users
+  // legalize their uses before themselves).  Given infinite stack space, we
+  // could just start legalizing on the root and traverse the whole graph.  In
+  // practice however, this causes us to run out of stack space on large basic
+  // blocks.  To avoid this problem, compute an ordering of the nodes where each
+  // node is only legalized after all of its operands are legalized.
+  DAG.AssignTopologicalOrder();
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
+    LegalizeOp(SDValue(I, 0));
+
+  // Finally, it's possible the root changed.  Get the new root.
+  SDValue OldRoot = DAG.getRoot();
+  assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+  DAG.setRoot(LegalizedNodes[OldRoot]);
+
+  LegalizedNodes.clear();
+
+  // Remove dead nodes now.
+  DAG.RemoveDeadNodes();
+}
+
+
+/// FindCallEndFromCallStart - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_END node that terminates the call sequence.
+static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
+  // Nested CALLSEQ_START/END constructs aren't yet legal,
+  // but we can DTRT and handle them correctly here.
+  if (Node->getOpcode() == ISD::CALLSEQ_START)
+    depth++;
+  else if (Node->getOpcode() == ISD::CALLSEQ_END) {
+    depth--;
+    if (depth == 0)
+      return Node;
+  }
+  if (Node->use_empty())
+    return 0;   // No CallSeqEnd
+
+  // The chain is usually at the end.
+  SDValue TheChain(Node, Node->getNumValues()-1);
+  if (TheChain.getValueType() != MVT::Other) {
+    // Sometimes it's at the beginning.
+    TheChain = SDValue(Node, 0);
+    if (TheChain.getValueType() != MVT::Other) {
+      // Otherwise, hunt for it.
+      for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i)
+        if (Node->getValueType(i) == MVT::Other) {
+          TheChain = SDValue(Node, i);
+          break;
+        }
+
+      // Otherwise, we walked into a node without a chain.
+      if (TheChain.getValueType() != MVT::Other)
+        return 0;
+    }
+  }
+
+  for (SDNode::use_iterator UI = Node->use_begin(),
+       E = Node->use_end(); UI != E; ++UI) {
+
+    // Make sure to only follow users of our token chain.
+    SDNode *User = *UI;
+    for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
+      if (User->getOperand(i) == TheChain)
+        if (SDNode *Result = FindCallEndFromCallStart(User, depth))
+          return Result;
+  }
+  return 0;
+}
+
+/// FindCallStartFromCallEnd - Given a chained node that is part of a call
+/// sequence, find the CALLSEQ_START node that initiates the call sequence.
+static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
+  int nested = 0;
+  assert(Node && "Didn't find callseq_start for a call??");
+  while (Node->getOpcode() != ISD::CALLSEQ_START || nested) {
+    Node = Node->getOperand(0).getNode();
+    assert(Node->getOperand(0).getValueType() == MVT::Other &&
+           "Node doesn't have a token chain argument!");
+    switch (Node->getOpcode()) {
+    default:
+      break;
+    case ISD::CALLSEQ_START:
+      if (!nested)
+        return Node;
+      nested--;
+      break;
+    case ISD::CALLSEQ_END:
+      nested++;
+      break;
+    }
+  }
+  return 0;
+}
+
+/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
+/// see if any uses can reach Dest.  If no dest operands can get to dest,
+/// legalize them, legalize ourself, and return false, otherwise, return true.
+///
+/// Keep track of the nodes we fine that actually do lead to Dest in
+/// NodesLeadingTo.  This avoids retraversing them exponential number of times.
+///
+bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
+                                     SmallPtrSet<SDNode*, 32> &NodesLeadingTo) {
+  if (N == Dest) return true;  // N certainly leads to Dest :)
+
+  // If we've already processed this node and it does lead to Dest, there is no
+  // need to reprocess it.
+  if (NodesLeadingTo.count(N)) return true;
+
+  // If the first result of this node has been already legalized, then it cannot
+  // reach N.
+  if (LegalizedNodes.count(SDValue(N, 0))) return false;
+
+  // Okay, this node has not already been legalized.  Check and legalize all
+  // operands.  If none lead to Dest, then we can legalize this node.
+  bool OperandsLeadToDest = false;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    OperandsLeadToDest |=     // If an operand leads to Dest, so do we.
+      LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest,
+                                   NodesLeadingTo);
+
+  if (OperandsLeadToDest) {
+    NodesLeadingTo.insert(N);
+    return true;
+  }
+
+  // Okay, this node looks safe, legalize it and return false.
+  LegalizeOp(SDValue(N, 0));
+  return false;
+}
+
+/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or
+/// a load from the constant pool.
+static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
+                                SelectionDAG &DAG, const TargetLowering &TLI) {
+  bool Extend = false;
+  DebugLoc dl = CFP->getDebugLoc();
+
+  // If a FP immediate is precise when represented as a float and if the
+  // target can do an extending load from float to double, we put it into
+  // the constant pool as a float, even if it's is statically typed as a
+  // double.  This shrinks FP constants and canonicalizes them for targets where
+  // an FP extending load is the same cost as a normal load (such as on the x87
+  // fp stack or PPC FP unit).
+  EVT VT = CFP->getValueType(0);
+  ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue());
+  if (!UseCP) {
+    assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion");
+    return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(),
+                           (VT == MVT::f64) ? MVT::i64 : MVT::i32);
+  }
+
+  EVT OrigVT = VT;
+  EVT SVT = VT;
+  while (SVT != MVT::f32) {
+    SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
+    if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) &&
+        // Only do this if the target has a native EXTLOAD instruction from
+        // smaller type.
+        TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
+        TLI.ShouldShrinkFPConstant(OrigVT)) {
+      const Type *SType = SVT.getTypeForEVT(*DAG.getContext());
+      LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+      VT = SVT;
+      Extend = true;
+    }
+  }
+
+  SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
+  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+  if (Extend)
+    return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
+                          DAG.getEntryNode(),
+                          CPIdx, MachinePointerInfo::getConstantPool(),
+                          VT, false, false, Alignment);
+  return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
+                     MachinePointerInfo::getConstantPool(), false, false,
+                     Alignment);
+}
+
+/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores.
+static
+SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
+                             const TargetLowering &TLI) {
+  SDValue Chain = ST->getChain();
+  SDValue Ptr = ST->getBasePtr();
+  SDValue Val = ST->getValue();
+  EVT VT = Val.getValueType();
+  int Alignment = ST->getAlignment();
+  DebugLoc dl = ST->getDebugLoc();
+  if (ST->getMemoryVT().isFloatingPoint() ||
+      ST->getMemoryVT().isVector()) {
+    EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+    if (TLI.isTypeLegal(intVT)) {
+      // Expand to a bitconvert of the value to the integer type of the
+      // same size, then a (misaligned) int store.
+      // FIXME: Does not handle truncating floating point stores!
+      SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
+      return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+                          ST->isVolatile(), ST->isNonTemporal(), Alignment);
+    } else {
+      // Do a (aligned) store to a stack slot, then copy from the stack slot
+      // to the final destination using (unaligned) integer loads and stores.
+      EVT StoredVT = ST->getMemoryVT();
+      EVT RegVT =
+        TLI.getRegisterType(*DAG.getContext(),
+                            EVT::getIntegerVT(*DAG.getContext(),
+                                              StoredVT.getSizeInBits()));
+      unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+      unsigned RegBytes = RegVT.getSizeInBits() / 8;
+      unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
+
+      // Make sure the stack slot is also aligned for the register type.
+      SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+
+      // Perform the original store, only redirected to the stack slot.
+      SDValue Store = DAG.getTruncStore(Chain, dl,
+                                        Val, StackPtr, MachinePointerInfo(),
+                                        StoredVT, false, false, 0);
+      SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+      SmallVector<SDValue, 8> Stores;
+      unsigned Offset = 0;
+
+      // Do all but one copies using the full register width.
+      for (unsigned i = 1; i < NumRegs; i++) {
+        // Load one integer register's worth from the stack slot.
+        SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
+                                   MachinePointerInfo(),
+                                   false, false, 0);
+        // Store it to the final location.  Remember the store.
+        Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
+                                    ST->getPointerInfo().getWithOffset(Offset),
+                                      ST->isVolatile(), ST->isNonTemporal(),
+                                      MinAlign(ST->getAlignment(), Offset)));
+        // Increment the pointers.
+        Offset += RegBytes;
+        StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                               Increment);
+        Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+      }
+
+      // The last store may be partial.  Do a truncating store.  On big-endian
+      // machines this requires an extending load from the stack slot to ensure
+      // that the bits are in the right place.
+      EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+                                    8 * (StoredBytes - Offset));
+
+      // Load from the stack slot.
+      SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+                                    MachinePointerInfo(),
+                                    MemVT, false, false, 0);
+
+      Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
+                                         ST->getPointerInfo()
+                                           .getWithOffset(Offset),
+                                         MemVT, ST->isVolatile(),
+                                         ST->isNonTemporal(),
+                                         MinAlign(ST->getAlignment(), Offset)));
+      // The order of the stores doesn't matter - say it with a TokenFactor.
+      return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+                         Stores.size());
+    }
+  }
+  assert(ST->getMemoryVT().isInteger() &&
+         !ST->getMemoryVT().isVector() &&
+         "Unaligned store of unknown type.");
+  // Get the half-size VT
+  EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
+  int NumBits = NewStoredVT.getSizeInBits();
+  int IncrementSize = NumBits / 8;
+
+  // Divide the stored value in two parts.
+  SDValue ShiftAmount = DAG.getConstant(NumBits,
+                                      TLI.getShiftAmountTy(Val.getValueType()));
+  SDValue Lo = Val;
+  SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
+
+  // Store the two parts
+  SDValue Store1, Store2;
+  Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
+                             ST->getPointerInfo(), NewStoredVT,
+                             ST->isVolatile(), ST->isNonTemporal(), Alignment);
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+  Alignment = MinAlign(Alignment, IncrementSize);
+  Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
+                             ST->getPointerInfo().getWithOffset(IncrementSize),
+                             NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
+                             Alignment);
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+}
+
+/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads.
+static
+SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
+                            const TargetLowering &TLI) {
+  SDValue Chain = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+  EVT VT = LD->getValueType(0);
+  EVT LoadedVT = LD->getMemoryVT();
+  DebugLoc dl = LD->getDebugLoc();
+  if (VT.isFloatingPoint() || VT.isVector()) {
+    EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
+    if (TLI.isTypeLegal(intVT)) {
+      // Expand to a (misaligned) integer load of the same size,
+      // then bitconvert to floating point or vector.
+      SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
+                                    LD->isVolatile(),
+                                    LD->isNonTemporal(), LD->getAlignment());
+      SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
+      if (VT.isFloatingPoint() && LoadedVT != VT)
+        Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
+
+      SDValue Ops[] = { Result, Chain };
+      return DAG.getMergeValues(Ops, 2, dl);
+    }
+
+    // Copy the value to a (aligned) stack slot using (unaligned) integer
+    // loads and stores, then do a (aligned) load from the stack slot.
+    EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
+    unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+    unsigned RegBytes = RegVT.getSizeInBits() / 8;
+    unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+    // Make sure the stack slot is also aligned for the register type.
+    SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+    SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+    SmallVector<SDValue, 8> Stores;
+    SDValue StackPtr = StackBase;
+    unsigned Offset = 0;
+
+    // Do all but one copies using the full register width.
+    for (unsigned i = 1; i < NumRegs; i++) {
+      // Load one integer register's worth from the original location.
+      SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
+                                 LD->getPointerInfo().getWithOffset(Offset),
+                                 LD->isVolatile(), LD->isNonTemporal(),
+                                 MinAlign(LD->getAlignment(), Offset));
+      // Follow the load with a store to the stack slot.  Remember the store.
+      Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+                                    MachinePointerInfo(), false, false, 0));
+      // Increment the pointers.
+      Offset += RegBytes;
+      Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+      StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                             Increment);
+    }
+
+    // The last copy may be partial.  Do an extending load.
+    EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+                                  8 * (LoadedBytes - Offset));
+    SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+                                  LD->getPointerInfo().getWithOffset(Offset),
+                                  MemVT, LD->isVolatile(),
+                                  LD->isNonTemporal(),
+                                  MinAlign(LD->getAlignment(), Offset));
+    // Follow the load with a store to the stack slot.  Remember the store.
+    // On big-endian machines this requires a truncating store to ensure
+    // that the bits end up in the right place.
+    Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+                                       MachinePointerInfo(), MemVT,
+                                       false, false, 0));
+
+    // The order of the stores doesn't matter - say it with a TokenFactor.
+    SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+                             Stores.size());
+
+    // Finally, perform the original load only redirected to the stack slot.
+    Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+                          MachinePointerInfo(), LoadedVT, false, false, 0);
+
+    // Callers expect a MERGE_VALUES node.
+    SDValue Ops[] = { Load, TF };
+    return DAG.getMergeValues(Ops, 2, dl);
+  }
+  assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
+         "Unaligned load of unsupported type.");
+
+  // Compute the new VT that is half the size of the old one.  This is an
+  // integer MVT.
+  unsigned NumBits = LoadedVT.getSizeInBits();
+  EVT NewLoadedVT;
+  NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
+  NumBits >>= 1;
+
+  unsigned Alignment = LD->getAlignment();
+  unsigned IncrementSize = NumBits / 8;
+  ISD::LoadExtType HiExtType = LD->getExtensionType();
+
+  // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
+  if (HiExtType == ISD::NON_EXTLOAD)
+    HiExtType = ISD::ZEXTLOAD;
+
+  // Load the value in two parts
+  SDValue Lo, Hi;
+  if (TLI.isLittleEndian()) {
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+                        NewLoadedVT, LD->isVolatile(),
+                        LD->isNonTemporal(), Alignment);
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
+                        LD->getPointerInfo().getWithOffset(IncrementSize),
+                        NewLoadedVT, LD->isVolatile(),
+                        LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
+  } else {
+    Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+                        NewLoadedVT, LD->isVolatile(),
+                        LD->isNonTemporal(), Alignment);
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getConstant(IncrementSize, TLI.getPointerTy()));
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
+                        LD->getPointerInfo().getWithOffset(IncrementSize),
+                        NewLoadedVT, LD->isVolatile(),
+                        LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
+  }
+
+  // aggregate the two parts
+  SDValue ShiftAmount = DAG.getConstant(NumBits,
+                                       TLI.getShiftAmountTy(Hi.getValueType()));
+  SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
+  Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
+
+  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                             Hi.getValue(1));
+
+  SDValue Ops[] = { Result, TF };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// PerformInsertVectorEltInMemory - Some target cannot handle a variable
+/// insertion index for the INSERT_VECTOR_ELT instruction.  In this case, it
+/// is necessary to spill the vector being inserted into to memory, perform
+/// the insert there, and then read the result back.
+SDValue SelectionDAGLegalize::
+PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
+                               DebugLoc dl) {
+  SDValue Tmp1 = Vec;
+  SDValue Tmp2 = Val;
+  SDValue Tmp3 = Idx;
+
+  // If the target doesn't support this, we have to spill the input vector
+  // to a temporary stack slot, update the element, then reload it.  This is
+  // badness.  We could also load the value into a vector register (either
+  // with a "move to register" or "extload into register" instruction, then
+  // permute it into place, if the idx is a constant and if the idx is
+  // supported by the target.
+  EVT VT    = Tmp1.getValueType();
+  EVT EltVT = VT.getVectorElementType();
+  EVT IdxVT = Tmp3.getValueType();
+  EVT PtrVT = TLI.getPointerTy();
+  SDValue StackPtr = DAG.CreateStackTemporary(VT);
+
+  int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+
+  // Store the vector.
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
+                            MachinePointerInfo::getFixedStack(SPFI),
+                            false, false, 0);
+
+  // Truncate or zero extend offset to target pointer type.
+  unsigned CastOpc = IdxVT.bitsGT(PtrVT) ? ISD::TRUNCATE : ISD::ZERO_EXTEND;
+  Tmp3 = DAG.getNode(CastOpc, dl, PtrVT, Tmp3);
+  // Add the offset to the index.
+  unsigned EltSize = EltVT.getSizeInBits()/8;
+  Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
+  SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
+  // Store the scalar value.
+  Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
+                         false, false, 0);
+  // Load the updated vector.
+  return DAG.getLoad(VT, dl, Ch, StackPtr,
+                     MachinePointerInfo::getFixedStack(SPFI), false, false, 0);
+}
+
+
+SDValue SelectionDAGLegalize::
+ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) {
+  if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) {
+    // SCALAR_TO_VECTOR requires that the type of the value being inserted
+    // match the element type of the vector being created, except for
+    // integers in which case the inserted value can be over width.
+    EVT EltVT = Vec.getValueType().getVectorElementType();
+    if (Val.getValueType() == EltVT ||
+        (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) {
+      SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+                                  Vec.getValueType(), Val);
+
+      unsigned NumElts = Vec.getValueType().getVectorNumElements();
+      // We generate a shuffle of InVec and ScVec, so the shuffle mask
+      // should be 0,1,2,3,4,5... with the appropriate element replaced with
+      // elt 0 of the RHS.
+      SmallVector<int, 8> ShufOps;
+      for (unsigned i = 0; i != NumElts; ++i)
+        ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts);
+
+      return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec,
+                                  &ShufOps[0]);
+    }
+  }
+  return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl);
+}
+
+SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
+  // FIXME: We shouldn't do this for TargetConstantFP's.
+  // FIXME: move this to the DAG Combiner!  Note that we can't regress due
+  // to phase ordering between legalized code and the dag combiner.  This
+  // probably means that we need to integrate dag combiner and legalizer
+  // together.
+  // We generally can't do this one for long doubles.
+  SDValue Tmp1 = ST->getChain();
+  SDValue Tmp2 = ST->getBasePtr();
+  SDValue Tmp3;
+  unsigned Alignment = ST->getAlignment();
+  bool isVolatile = ST->isVolatile();
+  bool isNonTemporal = ST->isNonTemporal();
+  DebugLoc dl = ST->getDebugLoc();
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
+    if (CFP->getValueType(0) == MVT::f32 &&
+        getTypeAction(MVT::i32) == Legal) {
+      Tmp3 = DAG.getConstant(CFP->getValueAPF().
+                                      bitcastToAPInt().zextOrTrunc(32),
+                              MVT::i32);
+      return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                          isVolatile, isNonTemporal, Alignment);
+    }
+
+    if (CFP->getValueType(0) == MVT::f64) {
+      // If this target supports 64-bit registers, do a single 64-bit store.
+      if (getTypeAction(MVT::i64) == Legal) {
+        Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
+                                  zextOrTrunc(64), MVT::i64);
+        return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                            isVolatile, isNonTemporal, Alignment);
+      }
+
+      if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
+        // Otherwise, if the target supports 32-bit registers, use 2 32-bit
+        // stores.  If the target supports neither 32- nor 64-bits, this
+        // xform is certainly not worth it.
+        const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
+        SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32);
+        SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
+        if (TLI.isBigEndian()) std::swap(Lo, Hi);
+
+        Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile,
+                          isNonTemporal, Alignment);
+        Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                            DAG.getIntPtrConstant(4));
+        Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2,
+                          ST->getPointerInfo().getWithOffset(4),
+                          isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
+
+        return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+      }
+    }
+  }
+  return SDValue();
+}
+
+/// LegalizeOp - We know that the specified value has a legal type, and
+/// that its operands are legal.  Now ensure that the operation itself
+/// is legal, recursively ensuring that the operands' operations remain
+/// legal.
+SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
+  if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+    return Op;
+
+  SDNode *Node = Op.getNode();
+  DebugLoc dl = Node->getDebugLoc();
+
+  for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+    assert(getTypeAction(Node->getValueType(i)) == Legal &&
+           "Unexpected illegal type!");
+
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+    assert((isTypeLegal(Node->getOperand(i).getValueType()) ||
+            Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
+           "Unexpected illegal type!");
+
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+  if (I != LegalizedNodes.end()) return I->second;
+
+  SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+  SDValue Result = Op;
+  bool isCustom = false;
+
+  // Figure out the correct action; the way to query this varies by opcode
+  TargetLowering::LegalizeAction Action = TargetLowering::Legal;
+  bool SimpleFinishLegalizing = true;
+  switch (Node->getOpcode()) {
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_VOID:
+  case ISD::VAARG:
+  case ISD::STACKSAVE:
+    Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
+    break;
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+  case ISD::EXTRACT_VECTOR_ELT:
+    Action = TLI.getOperationAction(Node->getOpcode(),
+                                    Node->getOperand(0).getValueType());
+    break;
+  case ISD::FP_ROUND_INREG:
+  case ISD::SIGN_EXTEND_INREG: {
+    EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
+    break;
+  }
+  case ISD::SELECT_CC:
+  case ISD::SETCC:
+  case ISD::BR_CC: {
+    unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 :
+                         Node->getOpcode() == ISD::SETCC ? 2 : 1;
+    unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0;
+    EVT OpVT = Node->getOperand(CompareOperand).getValueType();
+    ISD::CondCode CCCode =
+        cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get();
+    Action = TLI.getCondCodeAction(CCCode, OpVT);
+    if (Action == TargetLowering::Legal) {
+      if (Node->getOpcode() == ISD::SELECT_CC)
+        Action = TLI.getOperationAction(Node->getOpcode(),
+                                        Node->getValueType(0));
+      else
+        Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
+    }
+    break;
+  }
+  case ISD::LOAD:
+  case ISD::STORE:
+    // FIXME: Model these properly.  LOAD and STORE are complicated, and
+    // STORE expects the unlegalized operand in some cases.
+    SimpleFinishLegalizing = false;
+    break;
+  case ISD::CALLSEQ_START:
+  case ISD::CALLSEQ_END:
+    // FIXME: This shouldn't be necessary.  These nodes have special properties
+    // dealing with the recursive nature of legalization.  Removing this
+    // special case should be done as part of making LegalizeDAG non-recursive.
+    SimpleFinishLegalizing = false;
+    break;
+  case ISD::EXTRACT_ELEMENT:
+  case ISD::FLT_ROUNDS_:
+  case ISD::SADDO:
+  case ISD::SSUBO:
+  case ISD::UADDO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:
+  case ISD::FPOWI:
+  case ISD::MERGE_VALUES:
+  case ISD::EH_RETURN:
+  case ISD::FRAME_TO_ARGS_OFFSET:
+  case ISD::EH_SJLJ_SETJMP:
+  case ISD::EH_SJLJ_LONGJMP:
+  case ISD::EH_SJLJ_DISPATCHSETUP:
+    // These operations lie about being legal: when they claim to be legal,
+    // they should actually be expanded.
+    Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+    if (Action == TargetLowering::Legal)
+      Action = TargetLowering::Expand;
+    break;
+  case ISD::TRAMPOLINE:
+  case ISD::FRAMEADDR:
+  case ISD::RETURNADDR:
+    // These operations lie about being legal: when they claim to be legal,
+    // they should actually be custom-lowered.
+    Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+    if (Action == TargetLowering::Legal)
+      Action = TargetLowering::Custom;
+    break;
+  case ISD::BUILD_VECTOR:
+    // A weird case: legalization for BUILD_VECTOR never legalizes the
+    // operands!
+    // FIXME: This really sucks... changing it isn't semantically incorrect,
+    // but it massively pessimizes the code for floating-point BUILD_VECTORs
+    // because ConstantFP operands get legalized into constant pool loads
+    // before the BUILD_VECTOR code can see them.  It doesn't usually bite,
+    // though, because BUILD_VECTORS usually get lowered into other nodes
+    // which get legalized properly.
+    SimpleFinishLegalizing = false;
+    break;
+  default:
+    if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
+      Action = TargetLowering::Legal;
+    } else {
+      Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+    }
+    break;
+  }
+
+  if (SimpleFinishLegalizing) {
+    SmallVector<SDValue, 8> Ops, ResultVals;
+    for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+      Ops.push_back(LegalizeOp(Node->getOperand(i)));
+    switch (Node->getOpcode()) {
+    default: break;
+    case ISD::BR:
+    case ISD::BRIND:
+    case ISD::BR_JT:
+    case ISD::BR_CC:
+    case ISD::BRCOND:
+      // Branches tweak the chain to include LastCALLSEQ_END
+      Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0],
+                            LastCALLSEQ_END);
+      Ops[0] = LegalizeOp(Ops[0]);
+      LastCALLSEQ_END = DAG.getEntryNode();
+      break;
+    case ISD::SHL:
+    case ISD::SRL:
+    case ISD::SRA:
+    case ISD::ROTL:
+    case ISD::ROTR:
+      // Legalizing shifts/rotates requires adjusting the shift amount
+      // to the appropriate width.
+      if (!Ops[1].getValueType().isVector())
+        Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
+                                                      Ops[1]));
+      break;
+    case ISD::SRL_PARTS:
+    case ISD::SRA_PARTS:
+    case ISD::SHL_PARTS:
+      // Legalizing shifts/rotates requires adjusting the shift amount
+      // to the appropriate width.
+      if (!Ops[2].getValueType().isVector())
+        Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(),
+                                                      Ops[2]));
+      break;
+    }
+
+    Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(),
+                                            Ops.size()), 0);
+    switch (Action) {
+    case TargetLowering::Legal:
+      for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+        ResultVals.push_back(Result.getValue(i));
+      break;
+    case TargetLowering::Custom:
+      // FIXME: The handling for custom lowering with multiple results is
+      // a complete mess.
+      Tmp1 = TLI.LowerOperation(Result, DAG);
+      if (Tmp1.getNode()) {
+        for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) {
+          if (e == 1)
+            ResultVals.push_back(Tmp1);
+          else
+            ResultVals.push_back(Tmp1.getValue(i));
+        }
+        break;
+      }
+
+      // FALL THROUGH
+    case TargetLowering::Expand:
+      ExpandNode(Result.getNode(), ResultVals);
+      break;
+    case TargetLowering::Promote:
+      PromoteNode(Result.getNode(), ResultVals);
+      break;
+    }
+    if (!ResultVals.empty()) {
+      for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) {
+        if (ResultVals[i] != SDValue(Node, i))
+          ResultVals[i] = LegalizeOp(ResultVals[i]);
+        AddLegalizedOperand(SDValue(Node, i), ResultVals[i]);
+      }
+      return ResultVals[Op.getResNo()];
+    }
+  }
+
+  switch (Node->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "NODE: ";
+    Node->dump( &DAG);
+    dbgs() << "\n";
+#endif
+    assert(0 && "Do not know how to legalize this operator!");
+
+  case ISD::BUILD_VECTOR:
+    switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) {
+    default: assert(0 && "This action is not supported yet!");
+    case TargetLowering::Custom:
+      Tmp3 = TLI.LowerOperation(Result, DAG);
+      if (Tmp3.getNode()) {
+        Result = Tmp3;
+        break;
+      }
+      // FALLTHROUGH
+    case TargetLowering::Expand:
+      Result = ExpandBUILD_VECTOR(Result.getNode());
+      break;
+    }
+    break;
+  case ISD::CALLSEQ_START: {
+    static int depth = 0;
+    SDNode *CallEnd = FindCallEndFromCallStart(Node);
+
+    // Recursively Legalize all of the inputs of the call end that do not lead
+    // to this call start.  This ensures that any libcalls that need be inserted
+    // are inserted *before* the CALLSEQ_START.
+    {SmallPtrSet<SDNode*, 32> NodesLeadingTo;
+    for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i)
+      LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node,
+                                   NodesLeadingTo);
+    }
+
+    // Now that we have legalized all of the inputs (which may have inserted
+    // libcalls), create the new CALLSEQ_START node.
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+
+    // Merge in the last call to ensure that this call starts after the last
+    // call ended.
+    if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken && depth == 0) {
+      Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                         Tmp1, LastCALLSEQ_END);
+      Tmp1 = LegalizeOp(Tmp1);
+    }
+
+    // Do not try to legalize the target-specific arguments (#1+).
+    if (Tmp1 != Node->getOperand(0)) {
+      SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+      Ops[0] = Tmp1;
+      Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0],
+                                              Ops.size()), Result.getResNo());
+    }
+
+    // Remember that the CALLSEQ_START is legalized.
+    AddLegalizedOperand(Op.getValue(0), Result);
+    if (Node->getNumValues() == 2)    // If this has a flag result, remember it.
+      AddLegalizedOperand(Op.getValue(1), Result.getValue(1));
+
+    // Now that the callseq_start and all of the non-call nodes above this call
+    // sequence have been legalized, legalize the call itself.  During this
+    // process, no libcalls can/will be inserted, guaranteeing that no calls
+    // can overlap.
+
+    SDValue Saved_LastCALLSEQ_END = LastCALLSEQ_END ;
+    // Note that we are selecting this call!
+    LastCALLSEQ_END = SDValue(CallEnd, 0);
+
+    depth++;
+    // Legalize the call, starting from the CALLSEQ_END.
+    LegalizeOp(LastCALLSEQ_END);
+    depth--;
+    assert(depth >= 0 && "Un-matched CALLSEQ_START?");
+    if (depth > 0)
+      LastCALLSEQ_END = Saved_LastCALLSEQ_END;
+    return Result;
+  }
+  case ISD::CALLSEQ_END:
+    // If the CALLSEQ_START node hasn't been legalized first, legalize it.  This
+    // will cause this node to be legalized as well as handling libcalls right.
+    if (LastCALLSEQ_END.getNode() != Node) {
+      LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0));
+      DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+      assert(I != LegalizedNodes.end() &&
+             "Legalizing the call start should have legalized this node!");
+      return I->second;
+    }
+
+    // Otherwise, the call start has been legalized and everything is going
+    // according to plan.  Just legalize ourselves normally here.
+    Tmp1 = LegalizeOp(Node->getOperand(0));  // Legalize the chain.
+    // Do not try to legalize the target-specific arguments (#1+), except for
+    // an optional flag input.
+    if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){
+      if (Tmp1 != Node->getOperand(0)) {
+        SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+        Ops[0] = Tmp1;
+        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+                                                &Ops[0], Ops.size()),
+                         Result.getResNo());
+      }
+    } else {
+      Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
+      if (Tmp1 != Node->getOperand(0) ||
+          Tmp2 != Node->getOperand(Node->getNumOperands()-1)) {
+        SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
+        Ops[0] = Tmp1;
+        Ops.back() = Tmp2;
+        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+                                                &Ops[0], Ops.size()),
+                         Result.getResNo());
+      }
+    }
+    // This finishes up call legalization.
+    // If the CALLSEQ_END node has a flag, remember that we legalized it.
+    AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
+    if (Node->getNumValues() == 2)
+      AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1));
+    return Result.getValue(Op.getResNo());
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Node);
+    Tmp1 = LegalizeOp(LD->getChain());   // Legalize the chain.
+    Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer.
+
+    ISD::LoadExtType ExtType = LD->getExtensionType();
+    if (ExtType == ISD::NON_EXTLOAD) {
+      EVT VT = Node->getValueType(0);
+      Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+                                              Tmp1, Tmp2, LD->getOffset()),
+                       Result.getResNo());
+      Tmp3 = Result.getValue(0);
+      Tmp4 = Result.getValue(1);
+
+      switch (TLI.getOperationAction(Node->getOpcode(), VT)) {
+      default: assert(0 && "This action is not supported yet!");
+      case TargetLowering::Legal:
+        // If this is an unaligned load and the target doesn't support it,
+        // expand it.
+        if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+          const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+          unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+          if (LD->getAlignment() < ABIAlignment){
+            Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+                                         DAG, TLI);
+            Tmp3 = Result.getOperand(0);
+            Tmp4 = Result.getOperand(1);
+            Tmp3 = LegalizeOp(Tmp3);
+            Tmp4 = LegalizeOp(Tmp4);
+          }
+        }
+        break;
+      case TargetLowering::Custom:
+        Tmp1 = TLI.LowerOperation(Tmp3, DAG);
+        if (Tmp1.getNode()) {
+          Tmp3 = LegalizeOp(Tmp1);
+          Tmp4 = LegalizeOp(Tmp1.getValue(1));
+        }
+        break;
+      case TargetLowering::Promote: {
+        // Only promote a load of vector type to another.
+        assert(VT.isVector() && "Cannot promote this load!");
+        // Change base type to a different vector type.
+        EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
+
+        Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
+                           LD->isVolatile(), LD->isNonTemporal(),
+                           LD->getAlignment());
+        Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1));
+        Tmp4 = LegalizeOp(Tmp1.getValue(1));
+        break;
+      }
+      }
+      // Since loads produce two values, make sure to remember that we
+      // legalized both of them.
+      AddLegalizedOperand(SDValue(Node, 0), Tmp3);
+      AddLegalizedOperand(SDValue(Node, 1), Tmp4);
+      return Op.getResNo() ? Tmp4 : Tmp3;
+    }
+
+    EVT SrcVT = LD->getMemoryVT();
+    unsigned SrcWidth = SrcVT.getSizeInBits();
+    unsigned Alignment = LD->getAlignment();
+    bool isVolatile = LD->isVolatile();
+    bool isNonTemporal = LD->isNonTemporal();
+
+    if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+        // Some targets pretend to have an i1 loading operation, and actually
+        // load an i8.  This trick is correct for ZEXTLOAD because the top 7
+        // bits are guaranteed to be zero; it helps the optimizers understand
+        // that these bits are zero.  It is also useful for EXTLOAD, since it
+        // tells the optimizers that those bits are undefined.  It would be
+        // nice to have an effective generic way of getting these benefits...
+        // Until such a way is found, don't insist on promoting i1 here.
+        (SrcVT != MVT::i1 ||
+         TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+      // Promote to a byte-sized load if not loading an integral number of
+      // bytes.  For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+      unsigned NewWidth = SrcVT.getStoreSizeInBits();
+      EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+      SDValue Ch;
+
+      // The extra bits are guaranteed to be zero, since we stored them that
+      // way.  A zext load from NVT thus automatically gives zext from SrcVT.
+
+      ISD::LoadExtType NewExtType =
+        ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+      Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+                              Tmp1, Tmp2, LD->getPointerInfo(),
+                              NVT, isVolatile, isNonTemporal, Alignment);
+
+      Ch = Result.getValue(1); // The chain.
+
+      if (ExtType == ISD::SEXTLOAD)
+        // Having the top bits zero doesn't help when sign extending.
+        Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+                             Result.getValueType(),
+                             Result, DAG.getValueType(SrcVT));
+      else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+        // All the top bits are guaranteed to be zero - inform the optimizers.
+        Result = DAG.getNode(ISD::AssertZext, dl,
+                             Result.getValueType(), Result,
+                             DAG.getValueType(SrcVT));
+
+      Tmp1 = LegalizeOp(Result);
+      Tmp2 = LegalizeOp(Ch);
+    } else if (SrcWidth & (SrcWidth - 1)) {
+      // If not loading a power-of-2 number of bits, expand as two loads.
+      assert(!SrcVT.isVector() && "Unsupported extload!");
+      unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+      assert(RoundWidth < SrcWidth);
+      unsigned ExtraWidth = SrcWidth - RoundWidth;
+      assert(ExtraWidth < RoundWidth);
+      assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+             "Load size not an integral number of bytes!");
+      EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+      EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+      SDValue Lo, Hi, Ch;
+      unsigned IncrementSize;
+
+      if (TLI.isLittleEndian()) {
+        // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+        // Load the bottom RoundWidth bits.
+        Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
+                            Tmp1, Tmp2,
+                            LD->getPointerInfo(), RoundVT, isVolatile,
+                            isNonTemporal, Alignment);
+
+        // Load the remaining ExtraWidth bits.
+        IncrementSize = RoundWidth / 8;
+        Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                           DAG.getIntPtrConstant(IncrementSize));
+        Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+                            LD->getPointerInfo().getWithOffset(IncrementSize),
+                            ExtraVT, isVolatile, isNonTemporal,
+                            MinAlign(Alignment, IncrementSize));
+
+        // Build a factor node to remember that this load is independent of
+        // the other one.
+        Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                         Hi.getValue(1));
+
+        // Move the top bits to the right place.
+        Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+                         DAG.getConstant(RoundWidth,
+                                      TLI.getShiftAmountTy(Hi.getValueType())));
+
+        // Join the hi and lo parts.
+        Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+      } else {
+        // Big endian - avoid unaligned loads.
+        // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+        // Load the top RoundWidth bits.
+        Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+                            LD->getPointerInfo(), RoundVT, isVolatile,
+                            isNonTemporal, Alignment);
+
+        // Load the remaining ExtraWidth bits.
+        IncrementSize = RoundWidth / 8;
+        Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                           DAG.getIntPtrConstant(IncrementSize));
+        Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
+                            dl, Node->getValueType(0), Tmp1, Tmp2,
+                            LD->getPointerInfo().getWithOffset(IncrementSize),
+                            ExtraVT, isVolatile, isNonTemporal,
+                            MinAlign(Alignment, IncrementSize));
+
+        // Build a factor node to remember that this load is independent of
+        // the other one.
+        Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                         Hi.getValue(1));
+
+        // Move the top bits to the right place.
+        Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+                         DAG.getConstant(ExtraWidth,
+                                      TLI.getShiftAmountTy(Hi.getValueType())));
+
+        // Join the hi and lo parts.
+        Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+      }
+
+      Tmp1 = LegalizeOp(Result);
+      Tmp2 = LegalizeOp(Ch);
+    } else {
+      switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+      default: assert(0 && "This action is not supported yet!");
+      case TargetLowering::Custom:
+        isCustom = true;
+        // FALLTHROUGH
+      case TargetLowering::Legal:
+        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+                                                Tmp1, Tmp2, LD->getOffset()),
+                         Result.getResNo());
+        Tmp1 = Result.getValue(0);
+        Tmp2 = Result.getValue(1);
+
+        if (isCustom) {
+          Tmp3 = TLI.LowerOperation(Result, DAG);
+          if (Tmp3.getNode()) {
+            Tmp1 = LegalizeOp(Tmp3);
+            Tmp2 = LegalizeOp(Tmp3.getValue(1));
+          }
+        } else {
+          // If this is an unaligned load and the target doesn't support it,
+          // expand it.
+          if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+            const Type *Ty =
+              LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+            unsigned ABIAlignment =
+              TLI.getTargetData()->getABITypeAlignment(Ty);
+            if (LD->getAlignment() < ABIAlignment){
+              Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+                                           DAG, TLI);
+              Tmp1 = Result.getOperand(0);
+              Tmp2 = Result.getOperand(1);
+              Tmp1 = LegalizeOp(Tmp1);
+              Tmp2 = LegalizeOp(Tmp2);
+            }
+          }
+        }
+        break;
+      case TargetLowering::Expand:
+        if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) {
+          SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
+                                     LD->getPointerInfo(),
+                                     LD->isVolatile(), LD->isNonTemporal(),
+                                     LD->getAlignment());
+          unsigned ExtendOp;
+          switch (ExtType) {
+          case ISD::EXTLOAD:
+            ExtendOp = (SrcVT.isFloatingPoint() ?
+                        ISD::FP_EXTEND : ISD::ANY_EXTEND);
+            break;
+          case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+          case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+          default: llvm_unreachable("Unexpected extend load type!");
+          }
+          Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+          Tmp1 = LegalizeOp(Result);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Load.getValue(1));
+          break;
+        }
+        // FIXME: This does not work for vectors on most targets.  Sign- and
+        // zero-extend operations are currently folded into extending loads,
+        // whether they are legal or not, and then we end up here without any
+        // support for legalizing them.
+        assert(ExtType != ISD::EXTLOAD &&
+               "EXTLOAD should always be supported!");
+        // Turn the unsupported load into an EXTLOAD followed by an explicit
+        // zero/sign extend inreg.
+        Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+                                Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
+                                LD->isVolatile(), LD->isNonTemporal(),
+                                LD->getAlignment());
+        SDValue ValRes;
+        if (ExtType == ISD::SEXTLOAD)
+          ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+                               Result.getValueType(),
+                               Result, DAG.getValueType(SrcVT));
+        else
+          ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+        Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+        Tmp2 = LegalizeOp(Result.getValue(1));  // Relegalize new nodes.
+        break;
+      }
+    }
+
+    // Since loads produce two values, make sure to remember that we legalized
+    // both of them.
+    AddLegalizedOperand(SDValue(Node, 0), Tmp1);
+    AddLegalizedOperand(SDValue(Node, 1), Tmp2);
+    return Op.getResNo() ? Tmp2 : Tmp1;
+  }
+  case ISD::STORE: {
+    StoreSDNode *ST = cast<StoreSDNode>(Node);
+    Tmp1 = LegalizeOp(ST->getChain());    // Legalize the chain.
+    Tmp2 = LegalizeOp(ST->getBasePtr());  // Legalize the pointer.
+    unsigned Alignment = ST->getAlignment();
+    bool isVolatile = ST->isVolatile();
+    bool isNonTemporal = ST->isNonTemporal();
+
+    if (!ST->isTruncatingStore()) {
+      if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+        Result = SDValue(OptStore, 0);
+        break;
+      }
+
+      {
+        Tmp3 = LegalizeOp(ST->getValue());
+        Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+                                                Tmp1, Tmp3, Tmp2,
+                                                ST->getOffset()),
+                         Result.getResNo());
+
+        EVT VT = Tmp3.getValueType();
+        switch (TLI.getOperationAction(ISD::STORE, VT)) {
+        default: assert(0 && "This action is not supported yet!");
+        case TargetLowering::Legal:
+          // If this is an unaligned store and the target doesn't support it,
+          // expand it.
+          if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+            const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+            unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+            if (ST->getAlignment() < ABIAlignment)
+              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
+                                            DAG, TLI);
+          }
+          break;
+        case TargetLowering::Custom:
+          Tmp1 = TLI.LowerOperation(Result, DAG);
+          if (Tmp1.getNode()) Result = Tmp1;
+          break;
+        case TargetLowering::Promote:
+          assert(VT.isVector() && "Unknown legal promote case!");
+          Tmp3 = DAG.getNode(ISD::BITCAST, dl,
+                             TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
+          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
+                                ST->getPointerInfo(), isVolatile,
+                                isNonTemporal, Alignment);
+          break;
+        }
+        break;
+      }
+    } else {
+      Tmp3 = LegalizeOp(ST->getValue());
+
+      EVT StVT = ST->getMemoryVT();
+      unsigned StWidth = StVT.getSizeInBits();
+
+      if (StWidth != StVT.getStoreSizeInBits()) {
+        // Promote to a byte-sized store with upper bits zero if not
+        // storing an integral number of bytes.  For example, promote
+        // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+        EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+                                    StVT.getStoreSizeInBits());
+        Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
+        Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                                   NVT, isVolatile, isNonTemporal, Alignment);
+      } else if (StWidth & (StWidth - 1)) {
+        // If not storing a power-of-2 number of bits, expand as two stores.
+        assert(!StVT.isVector() && "Unsupported truncstore!");
+        unsigned RoundWidth = 1 << Log2_32(StWidth);
+        assert(RoundWidth < StWidth);
+        unsigned ExtraWidth = StWidth - RoundWidth;
+        assert(ExtraWidth < RoundWidth);
+        assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+               "Store size not an integral number of bytes!");
+        EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+        EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+        SDValue Lo, Hi;
+        unsigned IncrementSize;
+
+        if (TLI.isLittleEndian()) {
+          // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+          // Store the bottom RoundWidth bits.
+          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                                 RoundVT,
+                                 isVolatile, isNonTemporal, Alignment);
+
+          // Store the remaining ExtraWidth bits.
+          IncrementSize = RoundWidth / 8;
+          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                             DAG.getIntPtrConstant(IncrementSize));
+          Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
+                           DAG.getConstant(RoundWidth,
+                                    TLI.getShiftAmountTy(Tmp3.getValueType())));
+          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2,
+                             ST->getPointerInfo().getWithOffset(IncrementSize),
+                                 ExtraVT, isVolatile, isNonTemporal,
+                                 MinAlign(Alignment, IncrementSize));
+        } else {
+          // Big endian - avoid unaligned stores.
+          // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+          // Store the top RoundWidth bits.
+          Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
+                           DAG.getConstant(ExtraWidth,
+                                    TLI.getShiftAmountTy(Tmp3.getValueType())));
+          Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(),
+                                 RoundVT, isVolatile, isNonTemporal, Alignment);
+
+          // Store the remaining ExtraWidth bits.
+          IncrementSize = RoundWidth / 8;
+          Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                             DAG.getIntPtrConstant(IncrementSize));
+          Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
+                              ST->getPointerInfo().getWithOffset(IncrementSize),
+                                 ExtraVT, isVolatile, isNonTemporal,
+                                 MinAlign(Alignment, IncrementSize));
+        }
+
+        // The order of the stores doesn't matter.
+        Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+      } else {
+        if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
+            Tmp2 != ST->getBasePtr())
+          Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+                                                  Tmp1, Tmp3, Tmp2,
+                                                  ST->getOffset()),
+                           Result.getResNo());
+
+        switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+        default: assert(0 && "This action is not supported yet!");
+        case TargetLowering::Legal:
+          // If this is an unaligned store and the target doesn't support it,
+          // expand it.
+          if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+            const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
+            unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
+            if (ST->getAlignment() < ABIAlignment)
+              Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
+                                            DAG, TLI);
+          }
+          break;
+        case TargetLowering::Custom:
+          Result = TLI.LowerOperation(Result, DAG);
+          break;
+        case Expand:
+          // TRUNCSTORE:i16 i32 -> STORE i16
+          assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
+          Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
+          Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+                                isVolatile, isNonTemporal, Alignment);
+          break;
+        }
+      }
+    }
+    break;
+  }
+  }
+  assert(Result.getValueType() == Op.getValueType() &&
+         "Bad legalization!");
+
+  // Make sure that the generated code is itself legal.
+  if (Result != Op)
+    Result = LegalizeOp(Result);
+
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  AddLegalizedOperand(Op, Result);
+  return Result;
+}
+
+SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
+  SDValue Vec = Op.getOperand(0);
+  SDValue Idx = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+  // Store the value to a temporary stack slot, then LOAD the returned part.
+  SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+                            MachinePointerInfo(), false, false, 0);
+
+  // Add the offset to the index.
+  unsigned EltSize =
+      Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+  Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+                    DAG.getConstant(EltSize, Idx.getValueType()));
+
+  if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+    Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
+  else
+    Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+  StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
+
+  if (Op.getValueType().isVector())
+    return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
+                       false, false, 0);
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+                        MachinePointerInfo(),
+                        Vec.getValueType().getVectorElementType(),
+                        false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
+  assert(Op.getValueType().isVector() && "Non-vector insert subvector!");
+
+  SDValue Vec  = Op.getOperand(0);
+  SDValue Part = Op.getOperand(1);
+  SDValue Idx  = Op.getOperand(2);
+  DebugLoc dl  = Op.getDebugLoc();
+
+  // Store the value to a temporary stack slot, then LOAD the returned part.
+
+  SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+  int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+  // First store the whole vector.
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+                            false, false, 0);
+
+  // Then store the inserted part.
+
+  // Add the offset to the index.
+  unsigned EltSize =
+      Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+
+  Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+                    DAG.getConstant(EltSize, Idx.getValueType()));
+
+  if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+    Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
+  else
+    Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+  SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+                                    StackPtr);
+
+  // Store the subvector.
+  Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr,
+                    MachinePointerInfo(), false, false, 0);
+
+  // Finally, load the updated vector.
+  return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
+                     false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
+  // We can't handle this case efficiently.  Allocate a sufficiently
+  // aligned object on the stack, store each element into it, then load
+  // the result as a vector.
+  // Create the stack frame object.
+  EVT VT = Node->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue FIPtr = DAG.CreateStackTemporary(VT);
+  int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+  // Emit a store of each element to the stack slot.
+  SmallVector<SDValue, 8> Stores;
+  unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
+  // Store (in the right endianness) the elements to memory.
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+    // Ignore undef elements.
+    if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+
+    unsigned Offset = TypeByteSize*i;
+
+    SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
+    Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
+
+    // If the destination vector element type is narrower than the source
+    // element type, only store the bits necessary.
+    if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
+      Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
+                                         Node->getOperand(i), Idx,
+                                         PtrInfo.getWithOffset(Offset),
+                                         EltVT, false, false, 0));
+    } else
+      Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
+                                    Node->getOperand(i), Idx,
+                                    PtrInfo.getWithOffset(Offset),
+                                    false, false, 0));
+  }
+
+  SDValue StoreChain;
+  if (!Stores.empty())    // Not all undef elements?
+    StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                             &Stores[0], Stores.size());
+  else
+    StoreChain = DAG.getEntryNode();
+
+  // Result is a load from the stack slot.
+  return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue Tmp1 = Node->getOperand(0);
+  SDValue Tmp2 = Node->getOperand(1);
+
+  // Get the sign bit of the RHS.  First obtain a value that has the same
+  // sign as the sign bit, i.e. negative if and only if the sign bit is 1.
+  SDValue SignBit;
+  EVT FloatVT = Tmp2.getValueType();
+  EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
+  if (isTypeLegal(IVT)) {
+    // Convert to an integer with the same sign bit.
+    SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
+  } else {
+    // Store the float to memory, then load the sign part out as an integer.
+    MVT LoadTy = TLI.getPointerTy();
+    // First create a temporary that is aligned for both the load and store.
+    SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
+    // Then store the float to it.
+    SDValue Ch =
+      DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
+                   false, false, 0);
+    if (TLI.isBigEndian()) {
+      assert(FloatVT.isByteSized() && "Unsupported floating point type!");
+      // Load out a legal integer with the same sign bit as the float.
+      SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
+                            false, false, 0);
+    } else { // Little endian
+      SDValue LoadPtr = StackPtr;
+      // The float may be wider than the integer we are going to load.  Advance
+      // the pointer so that the loaded integer will contain the sign bit.
+      unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits();
+      unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8;
+      LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
+                            LoadPtr, DAG.getIntPtrConstant(ByteOffset));
+      // Load a legal integer containing the sign bit.
+      SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
+                            false, false, 0);
+      // Move the sign bit to the top bit of the loaded integer.
+      unsigned BitShift = LoadTy.getSizeInBits() -
+        (FloatVT.getSizeInBits() - 8 * ByteOffset);
+      assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?");
+      if (BitShift)
+        SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit,
+                              DAG.getConstant(BitShift,
+                                 TLI.getShiftAmountTy(SignBit.getValueType())));
+    }
+  }
+  // Now get the sign bit proper, by seeing whether the value is negative.
+  SignBit = DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()),
+                         SignBit, DAG.getConstant(0, SignBit.getValueType()),
+                         ISD::SETLT);
+  // Get the absolute value of the result.
+  SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1);
+  // Select between the nabs and abs value based on the sign bit of
+  // the input.
+  return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit,
+                     DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal),
+                     AbsVal);
+}
+
+void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
+                                           SmallVectorImpl<SDValue> &Results) {
+  unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+  assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and"
+          " not tell us which reg is the stack pointer!");
+  DebugLoc dl = Node->getDebugLoc();
+  EVT VT = Node->getValueType(0);
+  SDValue Tmp1 = SDValue(Node, 0);
+  SDValue Tmp2 = SDValue(Node, 1);
+  SDValue Tmp3 = Node->getOperand(2);
+  SDValue Chain = Tmp1.getOperand(0);
+
+  // Chain the dynamic stack allocation so that it doesn't modify the stack
+  // pointer when other instructions are using the stack.
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true));
+
+  SDValue Size  = Tmp2.getOperand(1);
+  SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
+  Chain = SP.getValue(1);
+  unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
+  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+  if (Align > StackAlign)
+    SP = DAG.getNode(ISD::AND, dl, VT, SP,
+                      DAG.getConstant(-(uint64_t)Align, VT));
+  Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size);       // Value
+  Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1);     // Output chain
+
+  Tmp2 = DAG.getCALLSEQ_END(Chain,  DAG.getIntPtrConstant(0, true),
+                            DAG.getIntPtrConstant(0, true), SDValue());
+
+  Results.push_back(Tmp1);
+  Results.push_back(Tmp2);
+}
+
+/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
+/// condition code CC on the current target. This routine expands SETCC with
+/// illegal condition code into AND / OR of multiple SETCC values.
+void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
+                                                 SDValue &LHS, SDValue &RHS,
+                                                 SDValue &CC,
+                                                 DebugLoc dl) {
+  EVT OpVT = LHS.getValueType();
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
+  switch (TLI.getCondCodeAction(CCCode, OpVT)) {
+  default: assert(0 && "Unknown condition code action!");
+  case TargetLowering::Legal:
+    // Nothing to do.
+    break;
+  case TargetLowering::Expand: {
+    ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
+    unsigned Opc = 0;
+    switch (CCCode) {
+    default: assert(0 && "Don't know how to expand this condition!");
+    case ISD::SETOEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETOGT: CC1 = ISD::SETGT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETOGE: CC1 = ISD::SETGE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETOLT: CC1 = ISD::SETLT; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETOLE: CC1 = ISD::SETLE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETONE: CC1 = ISD::SETNE; CC2 = ISD::SETO;  Opc = ISD::AND; break;
+    case ISD::SETUEQ: CC1 = ISD::SETEQ; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    case ISD::SETUGT: CC1 = ISD::SETGT; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    case ISD::SETUGE: CC1 = ISD::SETGE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    case ISD::SETULT: CC1 = ISD::SETLT; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    case ISD::SETULE: CC1 = ISD::SETLE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    case ISD::SETUNE: CC1 = ISD::SETNE; CC2 = ISD::SETUO; Opc = ISD::OR;  break;
+    // FIXME: Implement more expansions.
+    }
+
+    SDValue SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1);
+    SDValue SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2);
+    LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
+    RHS = SDValue();
+    CC  = SDValue();
+    break;
+  }
+  }
+}
+
+/// EmitStackConvert - Emit a store/load combination to the stack.  This stores
+/// SrcOp to a stack slot of type SlotVT, truncating it if needed.  It then does
+/// a load from the stack slot to DestVT, extending it if needed.
+/// The resultant code need not be legal.
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
+                                               EVT SlotVT,
+                                               EVT DestVT,
+                                               DebugLoc dl) {
+  // Create the stack frame object.
+  unsigned SrcAlign =
+    TLI.getTargetData()->getPrefTypeAlignment(SrcOp.getValueType().
+                                              getTypeForEVT(*DAG.getContext()));
+  SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign);
+
+  FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
+  int SPFI = StackPtrFI->getIndex();
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+
+  unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
+  unsigned SlotSize = SlotVT.getSizeInBits();
+  unsigned DestSize = DestVT.getSizeInBits();
+  const Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
+  unsigned DestAlign = TLI.getTargetData()->getPrefTypeAlignment(DestType);
+
+  // Emit a store to the stack slot.  Use a truncstore if the input value is
+  // later than DestVT.
+  SDValue Store;
+
+  if (SrcSize > SlotSize)
+    Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+                              PtrInfo, SlotVT, false, false, SrcAlign);
+  else {
+    assert(SrcSize == SlotSize && "Invalid store");
+    Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
+                         PtrInfo, false, false, SrcAlign);
+  }
+
+  // Result is a load from the stack slot.
+  if (SlotSize == DestSize)
+    return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
+                       false, false, DestAlign);
+
+  assert(SlotSize < DestSize && "Unknown extension!");
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
+                        PtrInfo, SlotVT, false, false, DestAlign);
+}
+
+SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
+  DebugLoc dl = Node->getDebugLoc();
+  // Create a vector sized/aligned stack slot, store the value to element #0,
+  // then load the whole vector back out.
+  SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0));
+
+  FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr);
+  int SPFI = StackPtrFI->getIndex();
+
+  SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
+                                 StackPtr,
+                                 MachinePointerInfo::getFixedStack(SPFI),
+                                 Node->getValueType(0).getVectorElementType(),
+                                 false, false, 0);
+  return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
+                     MachinePointerInfo::getFixedStack(SPFI),
+                     false, false, 0);
+}
+
+
+/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't
+/// support the operation, but do support the resultant vector type.
+SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
+  unsigned NumElems = Node->getNumOperands();
+  SDValue Value1, Value2;
+  DebugLoc dl = Node->getDebugLoc();
+  EVT VT = Node->getValueType(0);
+  EVT OpVT = Node->getOperand(0).getValueType();
+  EVT EltVT = VT.getVectorElementType();
+
+  // If the only non-undef value is the low element, turn this into a
+  // SCALAR_TO_VECTOR node.  If this is { X, X, X, X }, determine X.
+  bool isOnlyLowElement = true;
+  bool MoreThanTwoValues = false;
+  bool isConstant = true;
+  for (unsigned i = 0; i < NumElems; ++i) {
+    SDValue V = Node->getOperand(i);
+    if (V.getOpcode() == ISD::UNDEF)
+      continue;
+    if (i > 0)
+      isOnlyLowElement = false;
+    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+      isConstant = false;
+
+    if (!Value1.getNode()) {
+      Value1 = V;
+    } else if (!Value2.getNode()) {
+      if (V != Value1)
+        Value2 = V;
+    } else if (V != Value1 && V != Value2) {
+      MoreThanTwoValues = true;
+    }
+  }
+
+  if (!Value1.getNode())
+    return DAG.getUNDEF(VT);
+
+  if (isOnlyLowElement)
+    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0));
+
+  // If all elements are constants, create a load from the constant pool.
+  if (isConstant) {
+    std::vector<Constant*> CV;
+    for (unsigned i = 0, e = NumElems; i != e; ++i) {
+      if (ConstantFPSDNode *V =
+          dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) {
+        CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue()));
+      } else if (ConstantSDNode *V =
+                 dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
+        if (OpVT==EltVT)
+          CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+        else {
+          // If OpVT and EltVT don't match, EltVT is not legal and the
+          // element values have been promoted/truncated earlier.  Undo this;
+          // we don't want a v16i8 to become a v16i32 for example.
+          const ConstantInt *CI = V->getConstantIntValue();
+          CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()),
+                                        CI->getZExtValue()));
+        }
+      } else {
+        assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
+        const Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
+        CV.push_back(UndefValue::get(OpNTy));
+      }
+    }
+    Constant *CP = ConstantVector::get(CV);
+    SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
+    unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+    return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                       MachinePointerInfo::getConstantPool(),
+                       false, false, Alignment);
+  }
+
+  if (!MoreThanTwoValues) {
+    SmallVector<int, 8> ShuffleVec(NumElems, -1);
+    for (unsigned i = 0; i < NumElems; ++i) {
+      SDValue V = Node->getOperand(i);
+      if (V.getOpcode() == ISD::UNDEF)
+        continue;
+      ShuffleVec[i] = V == Value1 ? 0 : NumElems;
+    }
+    if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) {
+      // Get the splatted value into the low element of a vector register.
+      SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1);
+      SDValue Vec2;
+      if (Value2.getNode())
+        Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2);
+      else
+        Vec2 = DAG.getUNDEF(VT);
+
+      // Return shuffle(LowValVec, undef, <0,0,0,0>)
+      return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data());
+    }
+  }
+
+  // Otherwise, we can't handle this case efficiently.
+  return ExpandVectorBuildThroughStack(Node);
+}
+
+// ExpandLibCall - Expand a node into a call to a libcall.  If the result value
+// does not fit into a register, return the lo part and set the hi part to the
+// by-reg argument.  If it does fit into a single register, return the result
+// and leave the Hi part unset.
+SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+                                            bool isSigned) {
+  // The input chain to this libcall is the entry node of the function.
+  // Legalizing the call will automatically add the previous call to the
+  // dependence.
+  SDValue InChain = DAG.getEntryNode();
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = Node->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy;
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  // Splice the libcall in wherever FindInputOutputChains tells us to.
+  const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+  // isTailCall may be true since the callee does not reference caller stack
+  // frame. Check if it's in the right position.
+  bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+                    0, TLI.getLibcallCallingConv(LC), isTailCall,
+                    /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, Node->getDebugLoc());
+
+  if (!CallInfo.second.getNode())
+    // It's a tailcall, return the chain (which is the DAG root).
+    return DAG.getRoot();
+
+  // Legalize the call sequence, starting with the chain.  This will advance
+  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+  // was added by LowerCallTo (guaranteeing proper serialization of calls).
+  LegalizeOp(CallInfo.second);
+  return CallInfo.first;
+}
+
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
+                                         SDNode *Node,
+                                         bool isSigned) {
+  SDValue InChain = Node->getOperand(0);
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = Node->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = Node->getOperand(i);
+    Entry.Ty = ArgTy;
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  // Splice the libcall in wherever FindInputOutputChains tells us to.
+  const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+                    0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+                    /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, Node->getDebugLoc());
+
+  // Legalize the call sequence, starting with the chain.  This will advance
+  // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+  // was added by LowerCallTo (guaranteeing proper serialization of calls).
+  LegalizeOp(CallInfo.second);
+  return CallInfo;
+}
+
+SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
+                                              RTLIB::Libcall Call_F32,
+                                              RTLIB::Libcall Call_F64,
+                                              RTLIB::Libcall Call_F80,
+                                              RTLIB::Libcall Call_PPCF128) {
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unexpected request for libcall!");
+  case MVT::f32: LC = Call_F32; break;
+  case MVT::f64: LC = Call_F64; break;
+  case MVT::f80: LC = Call_F80; break;
+  case MVT::ppcf128: LC = Call_PPCF128; break;
+  }
+  return ExpandLibCall(LC, Node, false);
+}
+
+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+                                               RTLIB::Libcall Call_I8,
+                                               RTLIB::Libcall Call_I16,
+                                               RTLIB::Libcall Call_I32,
+                                               RTLIB::Libcall Call_I64,
+                                               RTLIB::Libcall Call_I128) {
+  RTLIB::Libcall LC;
+  switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unexpected request for libcall!");
+  case MVT::i8:   LC = Call_I8; break;
+  case MVT::i16:  LC = Call_I16; break;
+  case MVT::i32:  LC = Call_I32; break;
+  case MVT::i64:  LC = Call_I64; break;
+  case MVT::i128: LC = Call_I128; break;
+  }
+  return ExpandLibCall(LC, Node, isSigned);
+}
+
+/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a
+/// INT_TO_FP operation of the specified operand when the target requests that
+/// we expand it.  At this point, we know that the result and operand types are
+/// legal for the target.
+SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
+                                                   SDValue Op0,
+                                                   EVT DestVT,
+                                                   DebugLoc dl) {
+  if (Op0.getValueType() == MVT::i32) {
+    // simple 32-bit [signed|unsigned] integer to float/double expansion
+
+    // Get the stack frame index of a 8 byte buffer.
+    SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
+
+    // word offset constant for Hi/Lo address computation
+    SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy());
+    // set up Hi and Lo (into buffer) address based on endian
+    SDValue Hi = StackSlot;
+    SDValue Lo = DAG.getNode(ISD::ADD, dl,
+                             TLI.getPointerTy(), StackSlot, WordOff);
+    if (TLI.isLittleEndian())
+      std::swap(Hi, Lo);
+
+    // if signed map to unsigned space
+    SDValue Op0Mapped;
+    if (isSigned) {
+      // constant used to invert sign bit (signed to unsigned mapping)
+      SDValue SignBit = DAG.getConstant(0x80000000u, MVT::i32);
+      Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit);
+    } else {
+      Op0Mapped = Op0;
+    }
+    // store the lo of the constructed double - based on integer input
+    SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
+                                  Op0Mapped, Lo, MachinePointerInfo(),
+                                  false, false, 0);
+    // initial hi portion of constructed double
+    SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
+    // store the hi of the constructed double - biased exponent
+    SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi,
+                                  MachinePointerInfo(),
+                                  false, false, 0);
+    // load the constructed double
+    SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
+                               MachinePointerInfo(), false, false, 0);
+    // FP constant to bias correct the final result
+    SDValue Bias = DAG.getConstantFP(isSigned ?
+                                     BitsToDouble(0x4330000080000000ULL) :
+                                     BitsToDouble(0x4330000000000000ULL),
+                                     MVT::f64);
+    // subtract the bias
+    SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias);
+    // final result
+    SDValue Result;
+    // handle final rounding
+    if (DestVT == MVT::f64) {
+      // do nothing
+      Result = Sub;
+    } else if (DestVT.bitsLT(MVT::f64)) {
+      Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+                           DAG.getIntPtrConstant(0));
+    } else if (DestVT.bitsGT(MVT::f64)) {
+      Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+    }
+    return Result;
+  }
+  assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+  // Code below here assumes !isSigned without checking again.
+
+  // Implementation of unsigned i64 to f64 following the algorithm in
+  // __floatundidf in compiler_rt. This implementation has the advantage
+  // of performing rounding correctly, both in the default rounding mode
+  // and in all alternate rounding modes.
+  // TODO: Generalize this for use with other types.
+  if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
+    SDValue TwoP52 =
+      DAG.getConstant(UINT64_C(0x4330000000000000), MVT::i64);
+    SDValue TwoP84PlusTwoP52 =
+      DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), MVT::f64);
+    SDValue TwoP84 =
+      DAG.getConstant(UINT64_C(0x4530000000000000), MVT::i64);
+
+    SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32);
+    SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0,
+                             DAG.getConstant(32, MVT::i64));
+    SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
+    SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
+    SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
+    SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);
+    SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
+                                TwoP84PlusTwoP52);
+    return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
+  }
+
+  // Implementation of unsigned i64 to f32.
+  // TODO: Generalize this for use with other types.
+  if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+    // For unsigned conversions, convert them to signed conversions using the
+    // algorithm from the x86_64 __floatundidf in compiler_rt.
+    if (!isSigned) {
+      SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
+
+      SDValue ShiftConst =
+          DAG.getConstant(1, TLI.getShiftAmountTy(Op0.getValueType()));
+      SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
+      SDValue AndConst = DAG.getConstant(1, MVT::i64);
+      SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
+      SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
+
+      SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
+      SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
+
+      // TODO: This really should be implemented using a branch rather than a
+      // select.  We happen to get lucky and machinesink does the right
+      // thing most of the time.  This would be a good candidate for a
+      //pseudo-op, or, even better, for whole-function isel.
+      SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+        Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT);
+      return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast);
+    }
+
+    // Otherwise, implement the fully general conversion.
+
+    SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+         DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
+    SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
+         DAG.getConstant(UINT64_C(0x800), MVT::i64));
+    SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+         DAG.getConstant(UINT64_C(0x7ff), MVT::i64));
+    SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+                   And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE);
+    SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);
+    SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+                   Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
+                   ISD::SETUGE);
+    SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
+    EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType());
+
+    SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
+                             DAG.getConstant(32, SHVT));
+    SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh);
+    SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc);
+    SDValue TwoP32 =
+      DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64);
+    SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt);
+    SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2);
+    SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo);
+    SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
+    return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
+                       DAG.getIntPtrConstant(0));
+  }
+
+  SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
+
+  SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),
+                                 Op0, DAG.getConstant(0, Op0.getValueType()),
+                                 ISD::SETLT);
+  SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4);
+  SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(),
+                                    SignSet, Four, Zero);
+
+  // If the sign bit of the integer is set, the large number will be treated
+  // as a negative number.  To counteract this, the dynamic code adds an
+  // offset depending on the data type.
+  uint64_t FF;
+  switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unsupported integer type!");
+  case MVT::i8 : FF = 0x43800000ULL; break;  // 2^8  (as a float)
+  case MVT::i16: FF = 0x47800000ULL; break;  // 2^16 (as a float)
+  case MVT::i32: FF = 0x4F800000ULL; break;  // 2^32 (as a float)
+  case MVT::i64: FF = 0x5F800000ULL; break;  // 2^64 (as a float)
+  }
+  if (TLI.isLittleEndian()) FF <<= 32;
+  Constant *FudgeFactor = ConstantInt::get(
+                                       Type::getInt64Ty(*DAG.getContext()), FF);
+
+  SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy());
+  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+  CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset);
+  Alignment = std::min(Alignment, 4u);
+  SDValue FudgeInReg;
+  if (DestVT == MVT::f32)
+    FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
+                             MachinePointerInfo::getConstantPool(),
+                             false, false, Alignment);
+  else {
+    FudgeInReg =
+      LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
+                                DAG.getEntryNode(), CPIdx,
+                                MachinePointerInfo::getConstantPool(),
+                                MVT::f32, false, false, Alignment));
+  }
+
+  return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg);
+}
+
+/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a
+/// *INT_TO_FP operation of the specified operand when the target requests that
+/// we promote it.  At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP
+/// operation that takes a larger input.
+SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp,
+                                                    EVT DestVT,
+                                                    bool isSigned,
+                                                    DebugLoc dl) {
+  // First step, figure out the appropriate *INT_TO_FP operation to use.
+  EVT NewInTy = LegalOp.getValueType();
+
+  unsigned OpToUse = 0;
+
+  // Scan for the appropriate larger type to use.
+  while (1) {
+    NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1);
+    assert(NewInTy.isInteger() && "Ran out of possibilities!");
+
+    // If the target supports SINT_TO_FP of this type, use it.
+    if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) {
+      OpToUse = ISD::SINT_TO_FP;
+      break;
+    }
+    if (isSigned) continue;
+
+    // If the target supports UINT_TO_FP of this type, use it.
+    if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) {
+      OpToUse = ISD::UINT_TO_FP;
+      break;
+    }
+
+    // Otherwise, try a larger type.
+  }
+
+  // Okay, we found the operation and type to use.  Zero extend our input to the
+  // desired type then run the operation on it.
+  return DAG.getNode(OpToUse, dl, DestVT,
+                     DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+                                 dl, NewInTy, LegalOp));
+}
+
+/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a
+/// FP_TO_*INT operation of the specified operand when the target requests that
+/// we promote it.  At this point, we know that the result and operand types are
+/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT
+/// operation that returns a larger result.
+SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp,
+                                                    EVT DestVT,
+                                                    bool isSigned,
+                                                    DebugLoc dl) {
+  // First step, figure out the appropriate FP_TO*INT operation to use.
+  EVT NewOutTy = DestVT;
+
+  unsigned OpToUse = 0;
+
+  // Scan for the appropriate larger type to use.
+  while (1) {
+    NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
+    assert(NewOutTy.isInteger() && "Ran out of possibilities!");
+
+    if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) {
+      OpToUse = ISD::FP_TO_SINT;
+      break;
+    }
+
+    if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) {
+      OpToUse = ISD::FP_TO_UINT;
+      break;
+    }
+
+    // Otherwise, try a larger type.
+  }
+
+
+  // Okay, we found the operation and type to use.
+  SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
+
+  // Truncate the result of the extended FP_TO_*INT operation to the desired
+  // size.
+  return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
+}
+
+/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation.
+///
+SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
+  EVT VT = Op.getValueType();
+  EVT SHVT = TLI.getShiftAmountTy(VT);
+  SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: assert(0 && "Unhandled Expand type in BSWAP!");
+  case MVT::i16:
+    Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+  case MVT::i32:
+    Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(0xFF0000, VT));
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, VT));
+    Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+    Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+    return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+  case MVT::i64:
+    Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, SHVT));
+    Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, SHVT));
+    Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, SHVT));
+    Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, SHVT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, SHVT));
+    Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, SHVT));
+    Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, DAG.getConstant(255ULL<<48, VT));
+    Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, DAG.getConstant(255ULL<<40, VT));
+    Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, DAG.getConstant(255ULL<<32, VT));
+    Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, VT));
+    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, VT));
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(255ULL<<8 , VT));
+    Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
+    Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
+    Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
+    Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
+    Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
+    Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
+    return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
+  }
+}
+
+/// SplatByte - Distribute ByteVal over NumBits bits.
+// FIXME: Move this helper to a common place.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+  APInt Val = APInt(NumBits, ByteVal);
+  unsigned Shift = 8;
+  for (unsigned i = NumBits; i > 8; i >>= 1) {
+    Val = (Val << Shift) | Val;
+    Shift <<= 1;
+  }
+  return Val;
+}
+
+/// ExpandBitCount - Expand the specified bitcount instruction into operations.
+///
+SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
+                                             DebugLoc dl) {
+  switch (Opc) {
+  default: assert(0 && "Cannot expand this yet!");
+  case ISD::CTPOP: {
+    EVT VT = Op.getValueType();
+    EVT ShVT = TLI.getShiftAmountTy(VT);
+    unsigned Len = VT.getSizeInBits();
+
+    assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
+           "CTPOP not implemented for this type.");
+
+    // This is the "best" algorithm from
+    // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+
+    SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT);
+    SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT);
+    SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT);
+    SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT);
+
+    // v = v - ((v >> 1) & 0x55555555...)
+    Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+                     DAG.getNode(ISD::AND, dl, VT,
+                                 DAG.getNode(ISD::SRL, dl, VT, Op,
+                                             DAG.getConstant(1, ShVT)),
+                                 Mask55));
+    // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+    Op = DAG.getNode(ISD::ADD, dl, VT,
+                     DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+                     DAG.getNode(ISD::AND, dl, VT,
+                                 DAG.getNode(ISD::SRL, dl, VT, Op,
+                                             DAG.getConstant(2, ShVT)),
+                                 Mask33));
+    // v = (v + (v >> 4)) & 0x0F0F0F0F...
+    Op = DAG.getNode(ISD::AND, dl, VT,
+                     DAG.getNode(ISD::ADD, dl, VT, Op,
+                                 DAG.getNode(ISD::SRL, dl, VT, Op,
+                                             DAG.getConstant(4, ShVT))),
+                     Mask0F);
+    // v = (v * 0x01010101...) >> (Len - 8)
+    Op = DAG.getNode(ISD::SRL, dl, VT,
+                     DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+                     DAG.getConstant(Len - 8, ShVT));
+
+    return Op;
+  }
+  case ISD::CTLZ: {
+    // for now, we do this:
+    // x = x | (x >> 1);
+    // x = x | (x >> 2);
+    // ...
+    // x = x | (x >>16);
+    // x = x | (x >>32); // for 64-bit input
+    // return popcount(~x);
+    //
+    // but see also: http://www.hackersdelight.org/HDcode/nlz.cc
+    EVT VT = Op.getValueType();
+    EVT ShVT = TLI.getShiftAmountTy(VT);
+    unsigned len = VT.getSizeInBits();
+    for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+      SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
+      Op = DAG.getNode(ISD::OR, dl, VT, Op,
+                       DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
+    }
+    Op = DAG.getNOT(dl, Op, VT);
+    return DAG.getNode(ISD::CTPOP, dl, VT, Op);
+  }
+  case ISD::CTTZ: {
+    // for now, we use: { return popcount(~x & (x - 1)); }
+    // unless the target has ctlz but not ctpop, in which case we use:
+    // { return 32 - nlz(~x & (x-1)); }
+    // see also http://www.hackersdelight.org/HDcode/ntz.cc
+    EVT VT = Op.getValueType();
+    SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
+                               DAG.getNOT(dl, Op, VT),
+                               DAG.getNode(ISD::SUB, dl, VT, Op,
+                                           DAG.getConstant(1, VT)));
+    // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
+    if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+        TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))
+      return DAG.getNode(ISD::SUB, dl, VT,
+                         DAG.getConstant(VT.getSizeInBits(), VT),
+                         DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));
+    return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3);
+  }
+  }
+}
+
+std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
+  unsigned Opc = Node->getOpcode();
+  MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+  RTLIB::Libcall LC;
+
+  switch (Opc) {
+  default:
+    llvm_unreachable("Unhandled atomic intrinsic Expand!");
+    break;
+  case ISD::ATOMIC_SWAP:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+    }
+    break;
+  case ISD::ATOMIC_CMP_SWAP:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_ADD:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_SUB:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_AND:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_OR:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_XOR:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_NAND:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+    }
+    break;
+  }
+
+  return ExpandChainLibCall(LC, Node, false);
+}
+
+void SelectionDAGLegalize::ExpandNode(SDNode *Node,
+                                      SmallVectorImpl<SDValue> &Results) {
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue Tmp1, Tmp2, Tmp3, Tmp4;
+  switch (Node->getOpcode()) {
+  case ISD::CTPOP:
+  case ISD::CTLZ:
+  case ISD::CTTZ:
+    Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::BSWAP:
+    Results.push_back(ExpandBSWAP(Node->getOperand(0), dl));
+    break;
+  case ISD::FRAMEADDR:
+  case ISD::RETURNADDR:
+  case ISD::FRAME_TO_ARGS_OFFSET:
+    Results.push_back(DAG.getConstant(0, Node->getValueType(0)));
+    break;
+  case ISD::FLT_ROUNDS_:
+    Results.push_back(DAG.getConstant(1, Node->getValueType(0)));
+    break;
+  case ISD::EH_RETURN:
+  case ISD::EH_LABEL:
+  case ISD::PREFETCH:
+  case ISD::VAEND:
+  case ISD::EH_SJLJ_LONGJMP:
+  case ISD::EH_SJLJ_DISPATCHSETUP:
+    // If the target didn't expand these, there's nothing to do, so just
+    // preserve the chain and be done.
+    Results.push_back(Node->getOperand(0));
+    break;
+  case ISD::EH_SJLJ_SETJMP:
+    // If the target didn't expand this, just return 'zero' and preserve the
+    // chain.
+    Results.push_back(DAG.getConstant(0, MVT::i32));
+    Results.push_back(Node->getOperand(0));
+    break;
+  case ISD::MEMBARRIER: {
+    // If the target didn't lower this, lower it to '__sync_synchronize()' call
+    TargetLowering::ArgListTy Args;
+    std::pair<SDValue, SDValue> CallResult =
+      TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
+                      false, false, false, false, 0, CallingConv::C,
+                      /*isTailCall=*/false,
+                      /*isReturnValueUsed=*/true,
+                      DAG.getExternalSymbol("__sync_synchronize",
+                                            TLI.getPointerTy()),
+                      Args, DAG, dl);
+    Results.push_back(CallResult.second);
+    break;
+  }
+  // By default, atomic intrinsics are marked Legal and lowered. Targets
+  // which don't support them directly, however, may want libcalls, in which
+  // case they mark them Expand, and we get here.
+  case ISD::ATOMIC_SWAP:
+  case ISD::ATOMIC_LOAD_ADD:
+  case ISD::ATOMIC_LOAD_SUB:
+  case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_OR:
+  case ISD::ATOMIC_LOAD_XOR:
+  case ISD::ATOMIC_LOAD_NAND:
+  case ISD::ATOMIC_LOAD_MIN:
+  case ISD::ATOMIC_LOAD_MAX:
+  case ISD::ATOMIC_LOAD_UMIN:
+  case ISD::ATOMIC_LOAD_UMAX:
+  case ISD::ATOMIC_CMP_SWAP: {
+    std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node);
+    Results.push_back(Tmp.first);
+    Results.push_back(Tmp.second);
+    break;
+  }
+  case ISD::DYNAMIC_STACKALLOC:
+    ExpandDYNAMIC_STACKALLOC(Node, Results);
+    break;
+  case ISD::MERGE_VALUES:
+    for (unsigned i = 0; i < Node->getNumValues(); i++)
+      Results.push_back(Node->getOperand(i));
+    break;
+  case ISD::UNDEF: {
+    EVT VT = Node->getValueType(0);
+    if (VT.isInteger())
+      Results.push_back(DAG.getConstant(0, VT));
+    else {
+      assert(VT.isFloatingPoint() && "Unknown value type!");
+      Results.push_back(DAG.getConstantFP(0, VT));
+    }
+    break;
+  }
+  case ISD::TRAP: {
+    // If this operation is not supported, lower it to 'abort()' call
+    TargetLowering::ArgListTy Args;
+    std::pair<SDValue, SDValue> CallResult =
+      TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
+                      false, false, false, false, 0, CallingConv::C,
+                      /*isTailCall=*/false,
+                      /*isReturnValueUsed=*/true,
+                      DAG.getExternalSymbol("abort", TLI.getPointerTy()),
+                      Args, DAG, dl);
+    Results.push_back(CallResult.second);
+    break;
+  }
+  case ISD::FP_ROUND:
+  case ISD::BITCAST:
+    Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
+                            Node->getValueType(0), dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::FP_EXTEND:
+    Tmp1 = EmitStackConvert(Node->getOperand(0),
+                            Node->getOperand(0).getValueType(),
+                            Node->getValueType(0), dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::SIGN_EXTEND_INREG: {
+    // NOTE: we could fall back on load/store here too for targets without
+    // SAR.  However, it is doubtful that any exist.
+    EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    EVT VT = Node->getValueType(0);
+    EVT ShiftAmountTy = TLI.getShiftAmountTy(VT);
+    if (VT.isVector())
+      ShiftAmountTy = VT;
+    unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+                        ExtraVT.getScalarType().getSizeInBits();
+    SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy);
+    Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
+                       Node->getOperand(0), ShiftCst);
+    Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::FP_ROUND_INREG: {
+    // The only way we can lower this is to turn it into a TRUNCSTORE,
+    // EXTLOAD pair, targetting a temporary location (a stack slot).
+
+    // NOTE: there is a choice here between constantly creating new stack
+    // slots and always reusing the same one.  We currently always create
+    // new ones, as reuse may inhibit scheduling.
+    EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,
+                            Node->getValueType(0), dl);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP,
+                                Node->getOperand(0), Node->getValueType(0), dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::FP_TO_UINT: {
+    SDValue True, False;
+    EVT VT =  Node->getOperand(0).getValueType();
+    EVT NVT = Node->getValueType(0);
+    APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
+    APInt x = APInt::getSignBit(NVT.getSizeInBits());
+    (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
+    Tmp1 = DAG.getConstantFP(apf, VT);
+    Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+                        Node->getOperand(0),
+                        Tmp1, ISD::SETLT);
+    True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0));
+    False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT,
+                        DAG.getNode(ISD::FSUB, dl, VT,
+                                    Node->getOperand(0), Tmp1));
+    False = DAG.getNode(ISD::XOR, dl, NVT, False,
+                        DAG.getConstant(x, NVT));
+    Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::VAARG: {
+    const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+    EVT VT = Node->getValueType(0);
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = Node->getOperand(1);
+    unsigned Align = Node->getConstantOperandVal(3);
+
+    SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
+                                     MachinePointerInfo(V), false, false, 0);
+    SDValue VAList = VAListLoad;
+
+    if (Align > TLI.getMinStackArgumentAlignment()) {
+      assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
+
+      VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+                           DAG.getConstant(Align - 1,
+                                           TLI.getPointerTy()));
+
+      VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList,
+                           DAG.getConstant(-(int64_t)Align,
+                                           TLI.getPointerTy()));
+    }
+
+    // Increment the pointer, VAList, to the next vaarg
+    Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+                       DAG.getConstant(TLI.getTargetData()->
+                          getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
+                                       TLI.getPointerTy()));
+    // Store the incremented VAList to the legalized pointer
+    Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
+                        MachinePointerInfo(V), false, false, 0);
+    // Load the actual argument out of the pointer VAList
+    Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
+                                  false, false, 0));
+    Results.push_back(Results[0].getValue(1));
+    break;
+  }
+  case ISD::VACOPY: {
+    // This defaults to loading a pointer from the input and storing it to the
+    // output, returning the chain.
+    const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
+    const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
+    Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
+                       Node->getOperand(2), MachinePointerInfo(VS),
+                       false, false, 0);
+    Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+                        MachinePointerInfo(VD), false, false, 0);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::EXTRACT_VECTOR_ELT:
+    if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
+      // This must be an access of the only element.  Return it.
+      Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),
+                         Node->getOperand(0));
+    else
+      Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
+    Results.push_back(Tmp1);
+    break;
+  case ISD::EXTRACT_SUBVECTOR:
+    Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
+    break;
+  case ISD::INSERT_SUBVECTOR:
+    Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
+    break;
+  case ISD::CONCAT_VECTORS: {
+    Results.push_back(ExpandVectorBuildThroughStack(Node));
+    break;
+  }
+  case ISD::SCALAR_TO_VECTOR:
+    Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
+    break;
+  case ISD::INSERT_VECTOR_ELT:
+    Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0),
+                                              Node->getOperand(1),
+                                              Node->getOperand(2), dl));
+    break;
+  case ISD::VECTOR_SHUFFLE: {
+    SmallVector<int, 8> Mask;
+    cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+
+    EVT VT = Node->getValueType(0);
+    EVT EltVT = VT.getVectorElementType();
+    if (getTypeAction(EltVT) == Promote)
+      EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
+    unsigned NumElems = VT.getVectorNumElements();
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0; i != NumElems; ++i) {
+      if (Mask[i] < 0) {
+        Ops.push_back(DAG.getUNDEF(EltVT));
+        continue;
+      }
+      unsigned Idx = Mask[i];
+      if (Idx < NumElems)
+        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+                                  Node->getOperand(0),
+                                  DAG.getIntPtrConstant(Idx)));
+      else
+        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+                                  Node->getOperand(1),
+                                  DAG.getIntPtrConstant(Idx - NumElems)));
+    }
+    Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::EXTRACT_ELEMENT: {
+    EVT OpTy = Node->getOperand(0).getValueType();
+    if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
+      // 1 -> Hi
+      Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0),
+                         DAG.getConstant(OpTy.getSizeInBits()/2,
+                    TLI.getShiftAmountTy(Node->getOperand(0).getValueType())));
+      Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1);
+    } else {
+      // 0 -> Lo
+      Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0),
+                         Node->getOperand(0));
+    }
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::STACKSAVE:
+    // Expand to CopyFromReg if the target set
+    // StackPointerRegisterToSaveRestore.
+    if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+      Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP,
+                                           Node->getValueType(0)));
+      Results.push_back(Results[0].getValue(1));
+    } else {
+      Results.push_back(DAG.getUNDEF(Node->getValueType(0)));
+      Results.push_back(Node->getOperand(0));
+    }
+    break;
+  case ISD::STACKRESTORE:
+    // Expand to CopyToReg if the target set
+    // StackPointerRegisterToSaveRestore.
+    if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) {
+      Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP,
+                                         Node->getOperand(1)));
+    } else {
+      Results.push_back(Node->getOperand(0));
+    }
+    break;
+  case ISD::FCOPYSIGN:
+    Results.push_back(ExpandFCOPYSIGN(Node));
+    break;
+  case ISD::FNEG:
+    // Expand Y = FNEG(X) ->  Y = SUB -0.0, X
+    Tmp1 = DAG.getConstantFP(-0.0, Node->getValueType(0));
+    Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1,
+                       Node->getOperand(0));
+    Results.push_back(Tmp1);
+    break;
+  case ISD::FABS: {
+    // Expand Y = FABS(X) -> Y = (X >u 0.0) ? X : fneg(X).
+    EVT VT = Node->getValueType(0);
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = DAG.getConstantFP(0.0, VT);
+    Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()),
+                        Tmp1, Tmp2, ISD::SETUGT);
+    Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1);
+    Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::FSQRT:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+                                      RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128));
+    break;
+  case ISD::FSIN:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64,
+                                      RTLIB::SIN_F80, RTLIB::SIN_PPCF128));
+    break;
+  case ISD::FCOS:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64,
+                                      RTLIB::COS_F80, RTLIB::COS_PPCF128));
+    break;
+  case ISD::FLOG:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64,
+                                      RTLIB::LOG_F80, RTLIB::LOG_PPCF128));
+    break;
+  case ISD::FLOG2:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+                                      RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128));
+    break;
+  case ISD::FLOG10:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64,
+                                      RTLIB::LOG10_F80, RTLIB::LOG10_PPCF128));
+    break;
+  case ISD::FEXP:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64,
+                                      RTLIB::EXP_F80, RTLIB::EXP_PPCF128));
+    break;
+  case ISD::FEXP2:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+                                      RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128));
+    break;
+  case ISD::FTRUNC:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+                                      RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128));
+    break;
+  case ISD::FFLOOR:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64,
+                                      RTLIB::FLOOR_F80, RTLIB::FLOOR_PPCF128));
+    break;
+  case ISD::FCEIL:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+                                      RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128));
+    break;
+  case ISD::FRINT:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64,
+                                      RTLIB::RINT_F80, RTLIB::RINT_PPCF128));
+    break;
+  case ISD::FNEARBYINT:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32,
+                                      RTLIB::NEARBYINT_F64,
+                                      RTLIB::NEARBYINT_F80,
+                                      RTLIB::NEARBYINT_PPCF128));
+    break;
+  case ISD::FPOWI:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64,
+                                      RTLIB::POWI_F80, RTLIB::POWI_PPCF128));
+    break;
+  case ISD::FPOW:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64,
+                                      RTLIB::POW_F80, RTLIB::POW_PPCF128));
+    break;
+  case ISD::FDIV:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
+                                      RTLIB::DIV_F80, RTLIB::DIV_PPCF128));
+    break;
+  case ISD::FREM:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
+                                      RTLIB::REM_F80, RTLIB::REM_PPCF128));
+    break;
+  case ISD::FP16_TO_FP32:
+    Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+    break;
+  case ISD::FP32_TO_FP16:
+    Results.push_back(ExpandLibCall(RTLIB::FPROUND_F32_F16, Node, false));
+    break;
+  case ISD::ConstantFP: {
+    ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
+    // Check to see if this FP immediate is already legal.
+    // If this is a legal constant, turn it into a TargetConstantFP node.
+    if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
+      Results.push_back(SDValue(Node, 0));
+    else
+      Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI));
+    break;
+  }
+  case ISD::EHSELECTION: {
+    unsigned Reg = TLI.getExceptionSelectorRegister();
+    assert(Reg && "Can't expand to unknown register!");
+    Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg,
+                                         Node->getValueType(0)));
+    Results.push_back(Results[0].getValue(1));
+    break;
+  }
+  case ISD::EXCEPTIONADDR: {
+    unsigned Reg = TLI.getExceptionAddressRegister();
+    assert(Reg && "Can't expand to unknown register!");
+    Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg,
+                                         Node->getValueType(0)));
+    Results.push_back(Results[0].getValue(1));
+    break;
+  }
+  case ISD::SUB: {
+    EVT VT = Node->getValueType(0);
+    assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+           TLI.isOperationLegalOrCustom(ISD::XOR, VT) &&
+           "Don't know how to expand this subtraction!");
+    Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1),
+               DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT));
+    Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp2, DAG.getConstant(1, VT));
+    Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1));
+    break;
+  }
+  case ISD::UREM:
+  case ISD::SREM: {
+    EVT VT = Node->getValueType(0);
+    SDVTList VTs = DAG.getVTList(VT, VT);
+    bool isSigned = Node->getOpcode() == ISD::SREM;
+    unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
+    unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+    Tmp2 = Node->getOperand(0);
+    Tmp3 = Node->getOperand(1);
+    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) {
+      Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1);
+    } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) {
+      // X % Y -> X-X/Y*Y
+      Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3);
+      Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
+      Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
+    } else if (isSigned) {
+      Tmp1 = ExpandIntLibCall(Node, true,
+                              RTLIB::SREM_I8,
+                              RTLIB::SREM_I16, RTLIB::SREM_I32,
+                              RTLIB::SREM_I64, RTLIB::SREM_I128);
+    } else {
+      Tmp1 = ExpandIntLibCall(Node, false,
+                              RTLIB::UREM_I8,
+                              RTLIB::UREM_I16, RTLIB::UREM_I32,
+                              RTLIB::UREM_I64, RTLIB::UREM_I128);
+    }
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::UDIV:
+  case ISD::SDIV: {
+    bool isSigned = Node->getOpcode() == ISD::SDIV;
+    unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
+    EVT VT = Node->getValueType(0);
+    SDVTList VTs = DAG.getVTList(VT, VT);
+    if (TLI.isOperationLegalOrCustom(DivRemOpc, VT))
+      Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
+                         Node->getOperand(1));
+    else if (isSigned)
+      Tmp1 = ExpandIntLibCall(Node, true,
+                              RTLIB::SDIV_I8,
+                              RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+                              RTLIB::SDIV_I64, RTLIB::SDIV_I128);
+    else
+      Tmp1 = ExpandIntLibCall(Node, false,
+                              RTLIB::UDIV_I8,
+                              RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+                              RTLIB::UDIV_I64, RTLIB::UDIV_I128);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::MULHU:
+  case ISD::MULHS: {
+    unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI :
+                                                              ISD::SMUL_LOHI;
+    EVT VT = Node->getValueType(0);
+    SDVTList VTs = DAG.getVTList(VT, VT);
+    assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) &&
+           "If this wasn't legal, it shouldn't have been created!");
+    Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0),
+                       Node->getOperand(1));
+    Results.push_back(Tmp1.getValue(1));
+    break;
+  }
+  case ISD::MUL: {
+    EVT VT = Node->getValueType(0);
+    SDVTList VTs = DAG.getVTList(VT, VT);
+    // See if multiply or divide can be lowered using two-result operations.
+    // We just need the low half of the multiply; try both the signed
+    // and unsigned forms. If the target supports both SMUL_LOHI and
+    // UMUL_LOHI, form a preference by checking which forms of plain
+    // MULH it supports.
+    bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT);
+    bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT);
+    bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT);
+    bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT);
+    unsigned OpToUse = 0;
+    if (HasSMUL_LOHI && !HasMULHS) {
+      OpToUse = ISD::SMUL_LOHI;
+    } else if (HasUMUL_LOHI && !HasMULHU) {
+      OpToUse = ISD::UMUL_LOHI;
+    } else if (HasSMUL_LOHI) {
+      OpToUse = ISD::SMUL_LOHI;
+    } else if (HasUMUL_LOHI) {
+      OpToUse = ISD::UMUL_LOHI;
+    }
+    if (OpToUse) {
+      Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0),
+                                    Node->getOperand(1)));
+      break;
+    }
+    Tmp1 = ExpandIntLibCall(Node, false,
+                            RTLIB::MUL_I8,
+                            RTLIB::MUL_I16, RTLIB::MUL_I32,
+                            RTLIB::MUL_I64, RTLIB::MUL_I128);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::SADDO:
+  case ISD::SSUBO: {
+    SDValue LHS = Node->getOperand(0);
+    SDValue RHS = Node->getOperand(1);
+    SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+                              ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+                              LHS, RHS);
+    Results.push_back(Sum);
+    EVT OType = Node->getValueType(1);
+
+    SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+
+    //   LHSSign -> LHS >= 0
+    //   RHSSign -> RHS >= 0
+    //   SumSign -> Sum >= 0
+    //
+    //   Add:
+    //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+    //   Sub:
+    //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+    //
+    SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+    SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+    SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+                                      Node->getOpcode() == ISD::SADDO ?
+                                      ISD::SETEQ : ISD::SETNE);
+
+    SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+    SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+    SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+    Results.push_back(Cmp);
+    break;
+  }
+  case ISD::UADDO:
+  case ISD::USUBO: {
+    SDValue LHS = Node->getOperand(0);
+    SDValue RHS = Node->getOperand(1);
+    SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ?
+                              ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+                              LHS, RHS);
+    Results.push_back(Sum);
+    Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS,
+                                   Node->getOpcode () == ISD::UADDO ?
+                                   ISD::SETULT : ISD::SETUGT));
+    break;
+  }
+  case ISD::UMULO:
+  case ISD::SMULO: {
+    EVT VT = Node->getValueType(0);
+    SDValue LHS = Node->getOperand(0);
+    SDValue RHS = Node->getOperand(1);
+    SDValue BottomHalf;
+    SDValue TopHalf;
+    static const unsigned Ops[2][3] =
+        { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
+          { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
+    bool isSigned = Node->getOpcode() == ISD::SMULO;
+    if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
+      BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+      TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
+    } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
+      BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
+                               RHS);
+      TopHalf = BottomHalf.getValue(1);
+    } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
+                                                 VT.getSizeInBits() * 2))) {
+      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+      LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+      RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+      Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+      BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+                               DAG.getIntPtrConstant(0));
+      TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+                            DAG.getIntPtrConstant(1));
+    } else {
+      // We can fall back to a libcall with an illegal type for the MUL if we
+      // have a libcall big enough.
+      // Also, we can fall back to a division in some cases, but that's a big
+      // performance hit in the general case.
+      EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+      RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+      if (WideVT == MVT::i16)
+        LC = RTLIB::MUL_I16;
+      else if (WideVT == MVT::i32)
+        LC = RTLIB::MUL_I32;
+      else if (WideVT == MVT::i64)
+        LC = RTLIB::MUL_I64;
+      else if (WideVT == MVT::i128)
+        LC = RTLIB::MUL_I128;
+      assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+      LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+      RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+
+      SDValue Ret = ExpandLibCall(LC, Node, isSigned);
+      BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret);
+      TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret,
+                       DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy()));
+      TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf);
+    }
+    if (isSigned) {
+      Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1,
+                             TLI.getShiftAmountTy(BottomHalf.getValueType()));
+      Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
+      TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1,
+                             ISD::SETNE);
+    } else {
+      TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf,
+                             DAG.getConstant(0, VT), ISD::SETNE);
+    }
+    Results.push_back(BottomHalf);
+    Results.push_back(TopHalf);
+    break;
+  }
+  case ISD::BUILD_PAIR: {
+    EVT PairTy = Node->getValueType(0);
+    Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1));
+    Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2,
+                       DAG.getConstant(PairTy.getSizeInBits()/2,
+                                       TLI.getShiftAmountTy(PairTy)));
+    Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2));
+    break;
+  }
+  case ISD::SELECT:
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = Node->getOperand(1);
+    Tmp3 = Node->getOperand(2);
+    if (Tmp1.getOpcode() == ISD::SETCC) {
+      Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1),
+                             Tmp2, Tmp3,
+                             cast<CondCodeSDNode>(Tmp1.getOperand(2))->get());
+    } else {
+      Tmp1 = DAG.getSelectCC(dl, Tmp1,
+                             DAG.getConstant(0, Tmp1.getValueType()),
+                             Tmp2, Tmp3, ISD::SETNE);
+    }
+    Results.push_back(Tmp1);
+    break;
+  case ISD::BR_JT: {
+    SDValue Chain = Node->getOperand(0);
+    SDValue Table = Node->getOperand(1);
+    SDValue Index = Node->getOperand(2);
+
+    EVT PTy = TLI.getPointerTy();
+
+    const TargetData &TD = *TLI.getTargetData();
+    unsigned EntrySize =
+      DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
+
+    Index = DAG.getNode(ISD::MUL, dl, PTy,
+                        Index, DAG.getConstant(EntrySize, PTy));
+    SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+
+    EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
+    SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+                                MachinePointerInfo::getJumpTable(), MemVT,
+                                false, false, 0);
+    Addr = LD;
+    if (TM.getRelocationModel() == Reloc::PIC_) {
+      // For PIC, the sequence is:
+      // BRIND(load(Jumptable + index) + RelocBase)
+      // RelocBase can be JumpTable, GOT or some sort of global base.
+      Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr,
+                          TLI.getPICJumpTableRelocBase(Table, DAG));
+    }
+    Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::BRCOND:
+    // Expand brcond's setcc into its constituent parts and create a BR_CC
+    // Node.
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = Node->getOperand(1);
+    if (Tmp2.getOpcode() == ISD::SETCC) {
+      Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other,
+                         Tmp1, Tmp2.getOperand(2),
+                         Tmp2.getOperand(0), Tmp2.getOperand(1),
+                         Node->getOperand(2));
+    } else {
+      Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
+                         DAG.getCondCode(ISD::SETNE), Tmp2,
+                         DAG.getConstant(0, Tmp2.getValueType()),
+                         Node->getOperand(2));
+    }
+    Results.push_back(Tmp1);
+    break;
+  case ISD::SETCC: {
+    Tmp1 = Node->getOperand(0);
+    Tmp2 = Node->getOperand(1);
+    Tmp3 = Node->getOperand(2);
+    LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl);
+
+    // If we expanded the SETCC into an AND/OR, return the new node
+    if (Tmp2.getNode() == 0) {
+      Results.push_back(Tmp1);
+      break;
+    }
+
+    // Otherwise, SETCC for the given comparison type must be completely
+    // illegal; expand it into a SELECT_CC.
+    EVT VT = Node->getValueType(0);
+    Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2,
+                       DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::SELECT_CC: {
+    Tmp1 = Node->getOperand(0);   // LHS
+    Tmp2 = Node->getOperand(1);   // RHS
+    Tmp3 = Node->getOperand(2);   // True
+    Tmp4 = Node->getOperand(3);   // False
+    SDValue CC = Node->getOperand(4);
+
+    LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()),
+                          Tmp1, Tmp2, CC, dl);
+
+    assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!");
+    Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+    CC = DAG.getCondCode(ISD::SETNE);
+    Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
+                       Tmp3, Tmp4, CC);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::BR_CC: {
+    Tmp1 = Node->getOperand(0);              // Chain
+    Tmp2 = Node->getOperand(2);              // LHS
+    Tmp3 = Node->getOperand(3);              // RHS
+    Tmp4 = Node->getOperand(1);              // CC
+
+    LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()),
+                          Tmp2, Tmp3, Tmp4, dl);
+    LastCALLSEQ_END = DAG.getEntryNode();
+
+    assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
+    Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+    Tmp4 = DAG.getCondCode(ISD::SETNE);
+    Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
+                       Tmp3, Node->getOperand(4));
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::GLOBAL_OFFSET_TABLE:
+  case ISD::GlobalAddress:
+  case ISD::GlobalTLSAddress:
+  case ISD::ExternalSymbol:
+  case ISD::ConstantPool:
+  case ISD::JumpTable:
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_VOID:
+    // FIXME: Custom lowering for these operations shouldn't return null!
+    for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
+      Results.push_back(SDValue(Node, i));
+    break;
+  }
+}
+void SelectionDAGLegalize::PromoteNode(SDNode *Node,
+                                       SmallVectorImpl<SDValue> &Results) {
+  EVT OVT = Node->getValueType(0);
+  if (Node->getOpcode() == ISD::UINT_TO_FP ||
+      Node->getOpcode() == ISD::SINT_TO_FP ||
+      Node->getOpcode() == ISD::SETCC) {
+    OVT = Node->getOperand(0).getValueType();
+  }
+  EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT);
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue Tmp1, Tmp2, Tmp3;
+  switch (Node->getOpcode()) {
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+    // Zero extend the argument.
+    Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+    // Perform the larger operation.
+    Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
+    if (Node->getOpcode() == ISD::CTTZ) {
+      //if Tmp1 == sizeinbits(NVT) then Tmp1 = sizeinbits(Old VT)
+      Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT),
+                          Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT),
+                          ISD::SETEQ);
+      Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2,
+                          DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1);
+    } else if (Node->getOpcode() == ISD::CTLZ) {
+      // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT))
+      Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1,
+                          DAG.getConstant(NVT.getSizeInBits() -
+                                          OVT.getSizeInBits(), NVT));
+    }
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+    break;
+  case ISD::BSWAP: {
+    unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+    Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
+    Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
+    Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1,
+                          DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT)));
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::FP_TO_UINT:
+  case ISD::FP_TO_SINT:
+    Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0),
+                                 Node->getOpcode() == ISD::FP_TO_SINT, dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::UINT_TO_FP:
+  case ISD::SINT_TO_FP:
+    Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0),
+                                 Node->getOpcode() == ISD::SINT_TO_FP, dl);
+    Results.push_back(Tmp1);
+    break;
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR: {
+    unsigned ExtOp, TruncOp;
+    if (OVT.isVector()) {
+      ExtOp   = ISD::BITCAST;
+      TruncOp = ISD::BITCAST;
+    } else {
+      assert(OVT.isInteger() && "Cannot promote logic operation");
+      ExtOp   = ISD::ANY_EXTEND;
+      TruncOp = ISD::TRUNCATE;
+    }
+    // Promote each of the values to the new type.
+    Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+    // Perform the larger operation, then convert back
+    Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2);
+    Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1));
+    break;
+  }
+  case ISD::SELECT: {
+    unsigned ExtOp, TruncOp;
+    if (Node->getValueType(0).isVector()) {
+      ExtOp   = ISD::BITCAST;
+      TruncOp = ISD::BITCAST;
+    } else if (Node->getValueType(0).isInteger()) {
+      ExtOp   = ISD::ANY_EXTEND;
+      TruncOp = ISD::TRUNCATE;
+    } else {
+      ExtOp   = ISD::FP_EXTEND;
+      TruncOp = ISD::FP_ROUND;
+    }
+    Tmp1 = Node->getOperand(0);
+    // Promote each of the values to the new type.
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+    Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
+    // Perform the larger operation, then round down.
+    Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3);
+    if (TruncOp != ISD::FP_ROUND)
+      Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
+    else
+      Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1,
+                         DAG.getIntPtrConstant(0));
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::VECTOR_SHUFFLE: {
+    SmallVector<int, 8> Mask;
+    cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
+
+    // Cast the two input vectors.
+    Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1));
+
+    // Convert the shuffle mask to the right # elements.
+    Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
+    Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1);
+    Results.push_back(Tmp1);
+    break;
+  }
+  case ISD::SETCC: {
+    unsigned ExtOp = ISD::FP_EXTEND;
+    if (NVT.isInteger()) {
+      ISD::CondCode CCCode =
+        cast<CondCodeSDNode>(Node->getOperand(2))->get();
+      ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    }
+    Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+    Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
+                                  Tmp1, Tmp2, Node->getOperand(2)));
+    break;
+  }
+  }
+}
+
+// SelectionDAG::Legalize - This is the entry point for the file.
+//
+void SelectionDAG::Legalize(CodeGenOpt::Level OptLevel) {
+  /// run - This is the main entry point to this class.
+  ///
+  SelectionDAGLegalize(*this, OptLevel).LegalizeDAG();
+}
+
diff --git a/final/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/final/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
new file mode 100644
index 00000000000..27a466b3a92
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -0,0 +1,1428 @@
+//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements float type expansion and softening for LegalizeTypes.
+// Softening is the act of turning a computation in an illegal floating point
+// type into a computation in an integer type of the same size; also known as
+// "soft float".  For example, turning f32 arithmetic into operations using i32.
+// The resulting integer value is the same as what you would get by performing
+// the floating point operation and bitcasting the result to the integer type.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type.  For example,
+// implementing ppcf128 arithmetic in two f64 registers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// GetFPLibCall - Return the right libcall for the given floating point type.
+static RTLIB::Libcall GetFPLibCall(EVT VT,
+                                   RTLIB::Libcall Call_F32,
+                                   RTLIB::Libcall Call_F64,
+                                   RTLIB::Libcall Call_F80,
+                                   RTLIB::Libcall Call_PPCF128) {
+  return
+    VT == MVT::f32 ? Call_F32 :
+    VT == MVT::f64 ? Call_F64 :
+    VT == MVT::f80 ? Call_F80 :
+    VT == MVT::ppcf128 ? Call_PPCF128 :
+    RTLIB::UNKNOWN_LIBCALL;
+}
+
+//===----------------------------------------------------------------------===//
+//  Result Float to Integer Conversion.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue R = SDValue();
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "SoftenFloatResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to soften the result of this operator!");
+
+    case ISD::BITCAST:     R = SoftenFloatRes_BITCAST(N); break;
+    case ISD::BUILD_PAIR:  R = SoftenFloatRes_BUILD_PAIR(N); break;
+    case ISD::ConstantFP:
+      R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
+      break;
+    case ISD::EXTRACT_VECTOR_ELT:
+      R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break;
+    case ISD::FABS:        R = SoftenFloatRes_FABS(N); break;
+    case ISD::FADD:        R = SoftenFloatRes_FADD(N); break;
+    case ISD::FCEIL:       R = SoftenFloatRes_FCEIL(N); break;
+    case ISD::FCOPYSIGN:   R = SoftenFloatRes_FCOPYSIGN(N); break;
+    case ISD::FCOS:        R = SoftenFloatRes_FCOS(N); break;
+    case ISD::FDIV:        R = SoftenFloatRes_FDIV(N); break;
+    case ISD::FEXP:        R = SoftenFloatRes_FEXP(N); break;
+    case ISD::FEXP2:       R = SoftenFloatRes_FEXP2(N); break;
+    case ISD::FFLOOR:      R = SoftenFloatRes_FFLOOR(N); break;
+    case ISD::FLOG:        R = SoftenFloatRes_FLOG(N); break;
+    case ISD::FLOG2:       R = SoftenFloatRes_FLOG2(N); break;
+    case ISD::FLOG10:      R = SoftenFloatRes_FLOG10(N); break;
+    case ISD::FMUL:        R = SoftenFloatRes_FMUL(N); break;
+    case ISD::FNEARBYINT:  R = SoftenFloatRes_FNEARBYINT(N); break;
+    case ISD::FNEG:        R = SoftenFloatRes_FNEG(N); break;
+    case ISD::FP_EXTEND:   R = SoftenFloatRes_FP_EXTEND(N); break;
+    case ISD::FP_ROUND:    R = SoftenFloatRes_FP_ROUND(N); break;
+    case ISD::FP16_TO_FP32:R = SoftenFloatRes_FP16_TO_FP32(N); break;
+    case ISD::FPOW:        R = SoftenFloatRes_FPOW(N); break;
+    case ISD::FPOWI:       R = SoftenFloatRes_FPOWI(N); break;
+    case ISD::FREM:        R = SoftenFloatRes_FREM(N); break;
+    case ISD::FRINT:       R = SoftenFloatRes_FRINT(N); break;
+    case ISD::FSIN:        R = SoftenFloatRes_FSIN(N); break;
+    case ISD::FSQRT:       R = SoftenFloatRes_FSQRT(N); break;
+    case ISD::FSUB:        R = SoftenFloatRes_FSUB(N); break;
+    case ISD::FTRUNC:      R = SoftenFloatRes_FTRUNC(N); break;
+    case ISD::LOAD:        R = SoftenFloatRes_LOAD(N); break;
+    case ISD::SELECT:      R = SoftenFloatRes_SELECT(N); break;
+    case ISD::SELECT_CC:   R = SoftenFloatRes_SELECT_CC(N); break;
+    case ISD::SINT_TO_FP:
+    case ISD::UINT_TO_FP:  R = SoftenFloatRes_XINT_TO_FP(N); break;
+    case ISD::UNDEF:       R = SoftenFloatRes_UNDEF(N); break;
+    case ISD::VAARG:       R = SoftenFloatRes_VAARG(N); break;
+  }
+
+  // If R is null, the sub-method took care of registering the result.
+  if (R.getNode())
+    SetSoftenedFloat(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
+  return BitConvertToInteger(N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
+  // Convert the inputs to integers, and build a new pair out of them.
+  return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(),
+                     TLI.getTypeToTransformTo(*DAG.getContext(),
+                                              N->getValueType(0)),
+                     BitConvertToInteger(N->getOperand(0)),
+                     BitConvertToInteger(N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) {
+  return DAG.getConstant(N->getValueAPF().bitcastToAPInt(),
+                         TLI.getTypeToTransformTo(*DAG.getContext(),
+                                                  N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+  SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+                     NewOp.getValueType().getVectorElementType(),
+                     NewOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned Size = NVT.getSizeInBits();
+
+  // Mask = ~(1 << (Size-1))
+  APInt API = APInt::getAllOnesValue(Size);
+  API.clearBit(Size-1);
+  SDValue Mask = DAG.getConstant(API, NVT);
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::ADD_F32,
+                                  RTLIB::ADD_F64,
+                                  RTLIB::ADD_F80,
+                                  RTLIB::ADD_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::CEIL_F32,
+                                  RTLIB::CEIL_F64,
+                                  RTLIB::CEIL_F80,
+                                  RTLIB::CEIL_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
+  SDValue LHS = GetSoftenedFloat(N->getOperand(0));
+  SDValue RHS = BitConvertToInteger(N->getOperand(1));
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT LVT = LHS.getValueType();
+  EVT RVT = RHS.getValueType();
+
+  unsigned LSize = LVT.getSizeInBits();
+  unsigned RSize = RVT.getSizeInBits();
+
+  // First get the sign bit of second operand.
+  SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT),
+                                  DAG.getConstant(RSize - 1,
+                                                  TLI.getShiftAmountTy(RVT)));
+  SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit);
+
+  // Shift right or sign-extend it if the two operands have different types.
+  int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits();
+  if (SizeDiff > 0) {
+    SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit,
+                          DAG.getConstant(SizeDiff,
+                                 TLI.getShiftAmountTy(SignBit.getValueType())));
+    SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit);
+  } else if (SizeDiff < 0) {
+    SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit);
+    SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit,
+                          DAG.getConstant(-SizeDiff,
+                                 TLI.getShiftAmountTy(SignBit.getValueType())));
+  }
+
+  // Clear the sign bit of the first operand.
+  SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT),
+                               DAG.getConstant(LSize - 1,
+                                               TLI.getShiftAmountTy(LVT)));
+  Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT));
+  LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask);
+
+  // Or the value with the sign bit.
+  return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::COS_F32,
+                                  RTLIB::COS_F64,
+                                  RTLIB::COS_F80,
+                                  RTLIB::COS_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::DIV_F32,
+                                  RTLIB::DIV_F64,
+                                  RTLIB::DIV_F80,
+                                  RTLIB::DIV_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::EXP_F32,
+                                  RTLIB::EXP_F64,
+                                  RTLIB::EXP_F80,
+                                  RTLIB::EXP_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::EXP2_F32,
+                                  RTLIB::EXP2_F64,
+                                  RTLIB::EXP2_F80,
+                                  RTLIB::EXP2_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::FLOOR_F32,
+                                  RTLIB::FLOOR_F64,
+                                  RTLIB::FLOOR_F80,
+                                  RTLIB::FLOOR_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::LOG_F32,
+                                  RTLIB::LOG_F64,
+                                  RTLIB::LOG_F80,
+                                  RTLIB::LOG_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::LOG2_F32,
+                                  RTLIB::LOG2_F64,
+                                  RTLIB::LOG2_F80,
+                                  RTLIB::LOG2_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::LOG10_F32,
+                                  RTLIB::LOG10_F64,
+                                  RTLIB::LOG10_F80,
+                                  RTLIB::LOG10_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::MUL_F32,
+                                  RTLIB::MUL_F64,
+                                  RTLIB::MUL_F80,
+                                  RTLIB::MUL_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::NEARBYINT_F32,
+                                  RTLIB::NEARBYINT_F64,
+                                  RTLIB::NEARBYINT_F80,
+                                  RTLIB::NEARBYINT_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  // Expand Y = FNEG(X) -> Y = SUB -0.0, X
+  SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)),
+                     GetSoftenedFloat(N->getOperand(0)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::SUB_F32,
+                                  RTLIB::SUB_F64,
+                                  RTLIB::SUB_F80,
+                                  RTLIB::SUB_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = N->getOperand(0);
+  RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
+  return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
+// nodes?
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = N->getOperand(0);
+  return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false,
+                     N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = N->getOperand(0);
+  RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
+  return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::POW_F32,
+                                  RTLIB::POW_F64,
+                                  RTLIB::POW_F80,
+                                  RTLIB::POW_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
+  assert(N->getOperand(1).getValueType() == MVT::i32 &&
+         "Unsupported power type!");
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::POWI_F32,
+                                  RTLIB::POWI_F64,
+                                  RTLIB::POWI_F80,
+                                  RTLIB::POWI_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::REM_F32,
+                                  RTLIB::REM_F64,
+                                  RTLIB::REM_F80,
+                                  RTLIB::REM_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::RINT_F32,
+                                  RTLIB::RINT_F64,
+                                  RTLIB::RINT_F80,
+                                  RTLIB::RINT_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::SIN_F32,
+                                  RTLIB::SIN_F64,
+                                  RTLIB::SIN_F80,
+                                  RTLIB::SIN_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::SQRT_F32,
+                                  RTLIB::SQRT_F64,
+                                  RTLIB::SQRT_F80,
+                                  RTLIB::SQRT_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::SUB_F32,
+                                  RTLIB::SUB_F64,
+                                  RTLIB::SUB_F80,
+                                  RTLIB::SUB_PPCF128),
+                     NVT, Ops, 2, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::TRUNC_F32,
+                                  RTLIB::TRUNC_F64,
+                                  RTLIB::TRUNC_F80,
+                                  RTLIB::TRUNC_PPCF128),
+                     NVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
+  LoadSDNode *L = cast<LoadSDNode>(N);
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue NewL;
+  if (L->getExtensionType() == ISD::NON_EXTLOAD) {
+    NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
+                       NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
+                       L->getPointerInfo(), NVT,
+                       L->isVolatile(), L->isNonTemporal(), L->getAlignment());
+    // Legalized the chain result - switch anything that used the old chain to
+    // use the new one.
+    ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+    return NewL;
+  }
+
+  // Do a non-extending load followed by FP_EXTEND.
+  NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD,
+                     L->getMemoryVT(), dl, L->getChain(),
+                     L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
+                     L->getMemoryVT(), L->isVolatile(),
+                     L->isNonTemporal(), L->getAlignment());
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
+  return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
+  SDValue LHS = GetSoftenedFloat(N->getOperand(1));
+  SDValue RHS = GetSoftenedFloat(N->getOperand(2));
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+                     LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
+  SDValue LHS = GetSoftenedFloat(N->getOperand(2));
+  SDValue RHS = GetSoftenedFloat(N->getOperand(3));
+  return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+                     LHS.getValueType(), N->getOperand(0),
+                     N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) {
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+                                               N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
+  SDValue Chain = N->getOperand(0); // Get the chain.
+  SDValue Ptr = N->getOperand(1); // Get the pointer.
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue NewVAARG;
+  NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2),
+                          N->getConstantOperandVal(3));
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1));
+  return NewVAARG;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
+  bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
+  EVT SVT = N->getOperand(0).getValueType();
+  EVT RVT = N->getValueType(0);
+  EVT NVT = EVT();
+  DebugLoc dl = N->getDebugLoc();
+
+  // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to
+  // a larger type, eg: i8 -> fp.  Even if it is legal, no libcall may exactly
+  // match.  Look for an appropriate libcall.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE;
+       t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) {
+    NVT = (MVT::SimpleValueType)t;
+    // The source needs to big enough to hold the operand.
+    if (NVT.bitsGE(SVT))
+      LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT);
+  }
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+  // Sign/zero extend the argument if the libcall takes a larger type.
+  SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+                           NVT, N->getOperand(0));
+  return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
+                     &Op, 1, false, dl);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Operand Float to Integer Conversion..
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to soften this operator's operand!");
+
+  case ISD::BITCAST:     Res = SoftenFloatOp_BITCAST(N); break;
+  case ISD::BR_CC:       Res = SoftenFloatOp_BR_CC(N); break;
+  case ISD::FP_ROUND:    Res = SoftenFloatOp_FP_ROUND(N); break;
+  case ISD::FP_TO_SINT:  Res = SoftenFloatOp_FP_TO_SINT(N); break;
+  case ISD::FP_TO_UINT:  Res = SoftenFloatOp_FP_TO_UINT(N); break;
+  case ISD::FP32_TO_FP16:Res = SoftenFloatOp_FP32_TO_FP16(N); break;
+  case ISD::SELECT_CC:   Res = SoftenFloatOp_SELECT_CC(N); break;
+  case ISD::SETCC:       Res = SoftenFloatOp_SETCC(N); break;
+  case ISD::STORE:       Res = SoftenFloatOp_STORE(N, OpNo); break;
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+/// SoftenSetCCOperands - Soften the operands of a comparison.  This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+                                           ISD::CondCode &CCCode, DebugLoc dl) {
+  SDValue LHSInt = GetSoftenedFloat(NewLHS);
+  SDValue RHSInt = GetSoftenedFloat(NewRHS);
+  EVT VT = NewLHS.getValueType();
+
+  assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!");
+
+  // Expand into one or more soft-fp libcall(s).
+  RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
+  switch (CCCode) {
+  case ISD::SETEQ:
+  case ISD::SETOEQ:
+    LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+    break;
+  case ISD::SETNE:
+  case ISD::SETUNE:
+    LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64;
+    break;
+  case ISD::SETGE:
+  case ISD::SETOGE:
+    LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+    break;
+  case ISD::SETLT:
+  case ISD::SETOLT:
+    LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+    break;
+  case ISD::SETLE:
+  case ISD::SETOLE:
+    LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+    break;
+  case ISD::SETGT:
+  case ISD::SETOGT:
+    LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+    break;
+  case ISD::SETUO:
+    LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+    break;
+  case ISD::SETO:
+    LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64;
+    break;
+  default:
+    LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64;
+    switch (CCCode) {
+    case ISD::SETONE:
+      // SETONE = SETOLT | SETOGT
+      LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+      // Fallthrough
+    case ISD::SETUGT:
+      LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64;
+      break;
+    case ISD::SETUGE:
+      LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64;
+      break;
+    case ISD::SETULT:
+      LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64;
+      break;
+    case ISD::SETULE:
+      LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64;
+      break;
+    case ISD::SETUEQ:
+      LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64;
+      break;
+    default: assert(false && "Do not know how to soften this setcc!");
+    }
+  }
+
+  // Use the target specific return value for comparions lib calls.
+  EVT RetVT = TLI.getCmpLibcallReturnType();
+  SDValue Ops[2] = { LHSInt, RHSInt };
+  NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+  NewRHS = DAG.getConstant(0, RetVT);
+  CCCode = TLI.getCmpLibcallCC(LC1);
+  if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
+    SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT),
+                                NewLHS, NewRHS, DAG.getCondCode(CCCode));
+    NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl);
+    NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS,
+                         NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2)));
+    NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
+    NewRHS = SDValue();
+  }
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
+  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
+                     GetSoftenedFloat(N->getOperand(0)));
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
+  EVT SVT = N->getOperand(0).getValueType();
+  EVT RVT = N->getValueType(0);
+
+  RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, RVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
+
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+  SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If SoftenSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+                                DAG.getCondCode(CCCode), NewLHS, NewRHS,
+                                N->getOperand(4)),
+                 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16;
+  SDValue Op = GetSoftenedFloat(N->getOperand(0));
+  return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+  SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If SoftenSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+                                N->getOperand(2), N->getOperand(3),
+                                DAG.getCondCode(CCCode)),
+                 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If SoftenSetCCOperands returned a scalar, use it.
+  if (NewRHS.getNode() == 0) {
+    assert(NewLHS.getValueType() == N->getValueType(0) &&
+           "Unexpected setcc expansion!");
+    return NewLHS;
+  }
+
+  // Otherwise, update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+                                DAG.getCondCode(CCCode)),
+                 0);
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
+  assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+  assert(OpNo == 1 && "Can only soften the stored value!");
+  StoreSDNode *ST = cast<StoreSDNode>(N);
+  SDValue Val = ST->getValue();
+  DebugLoc dl = N->getDebugLoc();
+
+  if (ST->isTruncatingStore())
+    // Do an FP_ROUND followed by a non-truncating store.
+    Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(),
+                                          Val, DAG.getIntPtrConstant(0)));
+  else
+    Val = GetSoftenedFloat(Val);
+
+  return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
+                      ST->getPointerInfo(),
+                      ST->isVolatile(), ST->isNonTemporal(),
+                      ST->getAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Float Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatResult - This method is called when the specified result of the
+/// specified node is found to need expansion.  At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Lo, Hi;
+  Lo = Hi = SDValue();
+
+  // See if the target wants to custom expand this node.
+  if (CustomLowerNode(N, N->getValueType(ResNo), true))
+    return;
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "ExpandFloatResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to expand the result of this operator!");
+
+  case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+  case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
+  case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
+  case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
+
+  case ISD::BITCAST:            ExpandRes_BITCAST(N, Lo, Hi); break;
+  case ISD::BUILD_PAIR:         ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+  case ISD::EXTRACT_ELEMENT:    ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+  case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+  case ISD::VAARG:              ExpandRes_VAARG(N, Lo, Hi); break;
+
+  case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
+  case ISD::FABS:       ExpandFloatRes_FABS(N, Lo, Hi); break;
+  case ISD::FADD:       ExpandFloatRes_FADD(N, Lo, Hi); break;
+  case ISD::FCEIL:      ExpandFloatRes_FCEIL(N, Lo, Hi); break;
+  case ISD::FCOPYSIGN:  ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break;
+  case ISD::FCOS:       ExpandFloatRes_FCOS(N, Lo, Hi); break;
+  case ISD::FDIV:       ExpandFloatRes_FDIV(N, Lo, Hi); break;
+  case ISD::FEXP:       ExpandFloatRes_FEXP(N, Lo, Hi); break;
+  case ISD::FEXP2:      ExpandFloatRes_FEXP2(N, Lo, Hi); break;
+  case ISD::FFLOOR:     ExpandFloatRes_FFLOOR(N, Lo, Hi); break;
+  case ISD::FLOG:       ExpandFloatRes_FLOG(N, Lo, Hi); break;
+  case ISD::FLOG2:      ExpandFloatRes_FLOG2(N, Lo, Hi); break;
+  case ISD::FLOG10:     ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+  case ISD::FMUL:       ExpandFloatRes_FMUL(N, Lo, Hi); break;
+  case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
+  case ISD::FNEG:       ExpandFloatRes_FNEG(N, Lo, Hi); break;
+  case ISD::FP_EXTEND:  ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break;
+  case ISD::FPOW:       ExpandFloatRes_FPOW(N, Lo, Hi); break;
+  case ISD::FPOWI:      ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+  case ISD::FRINT:      ExpandFloatRes_FRINT(N, Lo, Hi); break;
+  case ISD::FSIN:       ExpandFloatRes_FSIN(N, Lo, Hi); break;
+  case ISD::FSQRT:      ExpandFloatRes_FSQRT(N, Lo, Hi); break;
+  case ISD::FSUB:       ExpandFloatRes_FSUB(N, Lo, Hi); break;
+  case ISD::FTRUNC:     ExpandFloatRes_FTRUNC(N, Lo, Hi); break;
+  case ISD::LOAD:       ExpandFloatRes_LOAD(N, Lo, Hi); break;
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break;
+  }
+
+  // If Lo/Hi is null, the sub-method took care of registering results etc.
+  if (Lo.getNode())
+    SetExpandedFloat(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo,
+                                                 SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  assert(NVT.getSizeInBits() == integerPartWidth &&
+         "Do not know how to expand this float constant!");
+  APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt();
+  Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
+                                       &C.getRawData()[1])), NVT);
+  Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, 1,
+                                       &C.getRawData()[0])), NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  assert(N->getValueType(0) == MVT::ppcf128 &&
+         "Logic only correct for ppcf128!");
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Tmp;
+  GetExpandedFloat(N->getOperand(0), Lo, Tmp);
+  Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp);
+  // Lo = Hi==fabs(Hi) ? Lo : -Lo;
+  Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo,
+                   DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo),
+                   DAG.getCondCode(ISD::SETEQ));
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::ADD_F32, RTLIB::ADD_F64,
+                                         RTLIB::ADD_F80, RTLIB::ADD_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::CEIL_F32, RTLIB::CEIL_F64,
+                                         RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N,
+                                                SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::COPYSIGN_F32,
+                                         RTLIB::COPYSIGN_F64,
+                                         RTLIB::COPYSIGN_F80,
+                                         RTLIB::COPYSIGN_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::COS_F32, RTLIB::COS_F64,
+                                         RTLIB::COS_F80, RTLIB::COS_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                          RTLIB::DIV_F32,
+                                          RTLIB::DIV_F64,
+                                          RTLIB::DIV_F80,
+                                          RTLIB::DIV_PPCF128),
+                             N->getValueType(0), Ops, 2, false,
+                             N->getDebugLoc());
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::EXP_F32, RTLIB::EXP_F64,
+                                         RTLIB::EXP_F80, RTLIB::EXP_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::EXP2_F32, RTLIB::EXP2_F64,
+                                         RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::FLOOR_F32,RTLIB::FLOOR_F64,
+                                         RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::LOG_F32, RTLIB::LOG_F64,
+                                         RTLIB::LOG_F80, RTLIB::LOG_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::LOG2_F32, RTLIB::LOG2_F64,
+                                         RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::LOG10_F32,RTLIB::LOG10_F64,
+                                         RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                          RTLIB::MUL_F32,
+                                          RTLIB::MUL_F64,
+                                          RTLIB::MUL_F80,
+                                          RTLIB::MUL_PPCF128),
+                             N->getValueType(0), Ops, 2, false,
+                             N->getDebugLoc());
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N,
+                                                 SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::NEARBYINT_F32,
+                                         RTLIB::NEARBYINT_F64,
+                                         RTLIB::NEARBYINT_F80,
+                                         RTLIB::NEARBYINT_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedFloat(N->getOperand(0), Lo, Hi);
+  Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo);
+  Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
+                                                SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0));
+  Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::POW_F32, RTLIB::POW_F64,
+                                         RTLIB::POW_F80, RTLIB::POW_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::POWI_F32, RTLIB::POWI_F64,
+                                         RTLIB::POWI_F80, RTLIB::POWI_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::RINT_F32, RTLIB::RINT_F64,
+                                         RTLIB::RINT_F80, RTLIB::RINT_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::SIN_F32, RTLIB::SIN_F64,
+                                         RTLIB::SIN_F80, RTLIB::SIN_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::SQRT_F32, RTLIB::SQRT_F64,
+                                         RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                          RTLIB::SUB_F32,
+                                          RTLIB::SUB_F64,
+                                          RTLIB::SUB_F80,
+                                          RTLIB::SUB_PPCF128),
+                             N->getValueType(0), Ops, 2, false,
+                             N->getDebugLoc());
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0),
+                                         RTLIB::TRUNC_F32, RTLIB::TRUNC_F64,
+                                         RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128),
+                            N, false);
+  GetPairElements(Call, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  if (ISD::isNormalLoad(N)) {
+    ExpandRes_NormalLoad(N, Lo, Hi);
+    return;
+  }
+
+  assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  SDValue Chain = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+  assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+  Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+                      LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(),
+                      LD->isNonTemporal(), LD->getAlignment());
+
+  // Remember the chain.
+  Chain = Hi.getValue(1);
+
+  // The low part is zero.
+  Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+
+  // Modified the chain - switch anything that used the old chain to use the
+  // new one.
+  ReplaceValueWith(SDValue(LD, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
+                                                 SDValue &Hi) {
+  assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!");
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Src = N->getOperand(0);
+  EVT SrcVT = Src.getValueType();
+  bool isSigned = N->getOpcode() == ISD::SINT_TO_FP;
+  DebugLoc dl = N->getDebugLoc();
+
+  // First do an SINT_TO_FP, whether the original was signed or unsigned.
+  // When promoting partial word types to i32 we must honor the signedness,
+  // though.
+  if (SrcVT.bitsLE(MVT::i32)) {
+    // The integer can be represented exactly in an f64.
+    Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+                      MVT::i32, Src);
+    Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT);
+    Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src);
+  } else {
+    RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+    if (SrcVT.bitsLE(MVT::i64)) {
+      Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
+                        MVT::i64, Src);
+      LC = RTLIB::SINTTOFP_I64_PPCF128;
+    } else if (SrcVT.bitsLE(MVT::i128)) {
+      Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src);
+      LC = RTLIB::SINTTOFP_I128_PPCF128;
+    }
+    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
+
+    Hi = MakeLibCall(LC, VT, &Src, 1, true, dl);
+    GetPairElements(Hi, Lo, Hi);
+  }
+
+  if (isSigned)
+    return;
+
+  // Unsigned - fix up the SINT_TO_FP value just calculated.
+  Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi);
+  SrcVT = Src.getValueType();
+
+  // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128.
+  static const uint64_t TwoE32[]  = { 0x41f0000000000000LL, 0 };
+  static const uint64_t TwoE64[]  = { 0x43f0000000000000LL, 0 };
+  static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 };
+  const uint64_t *Parts = 0;
+
+  switch (SrcVT.getSimpleVT().SimpleTy) {
+  default:
+    assert(false && "Unsupported UINT_TO_FP!");
+  case MVT::i32:
+    Parts = TwoE32;
+    break;
+  case MVT::i64:
+    Parts = TwoE64;
+    break;
+  case MVT::i128:
+    Parts = TwoE128;
+    break;
+  }
+
+  Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
+                   DAG.getConstantFP(APFloat(APInt(128, 2, Parts)),
+                                     MVT::ppcf128));
+  Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT),
+                   Lo, Hi, DAG.getCondCode(ISD::SETLT));
+  GetPairElements(Lo, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Float Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandFloatOperand - This method is called when the specified operand of the
+/// specified node is found to need expansion.  At this point, all of the result
+/// types of the node are known to be legal, but other operands of the node may
+/// need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  if (TLI.getOperationAction(N->getOpcode(), N->getOperand(OpNo).getValueType())
+      == TargetLowering::Custom)
+    Res = TLI.LowerOperation(SDValue(N, 0), DAG);
+
+  if (Res.getNode() == 0) {
+    switch (N->getOpcode()) {
+    default:
+  #ifndef NDEBUG
+      dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
+      N->dump(&DAG); dbgs() << "\n";
+  #endif
+      llvm_unreachable("Do not know how to expand this operator's operand!");
+
+    case ISD::BITCAST:         Res = ExpandOp_BITCAST(N); break;
+    case ISD::BUILD_VECTOR:    Res = ExpandOp_BUILD_VECTOR(N); break;
+    case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+
+    case ISD::BR_CC:      Res = ExpandFloatOp_BR_CC(N); break;
+    case ISD::FP_ROUND:   Res = ExpandFloatOp_FP_ROUND(N); break;
+    case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
+    case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+    case ISD::SELECT_CC:  Res = ExpandFloatOp_SELECT_CC(N); break;
+    case ISD::SETCC:      Res = ExpandFloatOp_SETCC(N); break;
+    case ISD::STORE:      Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
+                                                    OpNo); break;
+    }
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+/// FloatExpandSetCCOperands - Expand the operands of a comparison.  This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS,
+                                                SDValue &NewRHS,
+                                                ISD::CondCode &CCCode,
+                                                DebugLoc dl) {
+  SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+  GetExpandedFloat(NewLHS, LHSLo, LHSHi);
+  GetExpandedFloat(NewRHS, RHSLo, RHSHi);
+
+  EVT VT = NewLHS.getValueType();
+  assert(VT == MVT::ppcf128 && "Unsupported setcc type!");
+
+  // FIXME:  This generated code sucks.  We want to generate
+  //         FCMPU crN, hi1, hi2
+  //         BNE crN, L:
+  //         FCMPU crN, lo1, lo2
+  // The following can be improved, but not that much.
+  SDValue Tmp1, Tmp2, Tmp3;
+  Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+                      LHSHi, RHSHi, ISD::SETOEQ);
+  Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+                      LHSLo, RHSLo, CCCode);
+  Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+  Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+                      LHSHi, RHSHi, ISD::SETUNE);
+  Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+                      LHSHi, RHSHi, CCCode);
+  Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2);
+  NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3);
+  NewRHS = SDValue();   // LHS is the result, not a compare.
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+  FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+                                DAG.getCondCode(CCCode), NewLHS, NewRHS,
+                                N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
+  assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+         "Logic only correct for ppcf128!");
+  SDValue Lo, Hi;
+  GetExpandedFloat(N->getOperand(0), Lo, Hi);
+  // Round it the rest of the way (e.g. to f32) if needed.
+  return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(),
+                     N->getValueType(0), Hi, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+  // PPC (the libcall is not available).  FIXME: Do this in a less hacky way.
+  if (RVT == MVT::i32) {
+    assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+           "Logic only correct for ppcf128!");
+    SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128,
+                              N->getOperand(0), DAG.getValueType(MVT::f64));
+    Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res,
+                      DAG.getIntPtrConstant(1));
+    return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+  }
+
+  RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
+  return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
+  EVT RVT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
+  // PPC (the libcall is not available).  FIXME: Do this in a less hacky way.
+  if (RVT == MVT::i32) {
+    assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
+           "Logic only correct for ppcf128!");
+    const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
+    APFloat APF = APFloat(APInt(128, 2, TwoE31));
+    SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128);
+    //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
+    // FIXME: generated code sucks.
+    return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp,
+                       DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                   DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32,
+                                               DAG.getNode(ISD::FSUB, dl,
+                                                           MVT::ppcf128,
+                                                           N->getOperand(0),
+                                                           Tmp)),
+                                   DAG.getConstant(0x80000000, MVT::i32)),
+                       DAG.getNode(ISD::FP_TO_SINT, dl,
+                                   MVT::i32, N->getOperand(0)),
+                       DAG.getCondCode(ISD::SETGE));
+  }
+
+  RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+  return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+  FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+                                N->getOperand(2), N->getOperand(3),
+                                DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, use it.
+  if (NewRHS.getNode() == 0) {
+    assert(NewLHS.getValueType() == N->getValueType(0) &&
+           "Unexpected setcc expansion!");
+    return NewLHS;
+  }
+
+  // Otherwise, update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+                                DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
+  if (ISD::isNormalStore(N))
+    return ExpandOp_NormalStore(N, OpNo);
+
+  assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+  assert(OpNo == 1 && "Can only expand the stored value so far");
+  StoreSDNode *ST = cast<StoreSDNode>(N);
+
+  SDValue Chain = ST->getChain();
+  SDValue Ptr = ST->getBasePtr();
+
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+                                     ST->getValue().getValueType());
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+  assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?");
+
+  SDValue Lo, Hi;
+  GetExpandedOp(ST->getValue(), Lo, Hi);
+
+  return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
+                           ST->getPointerInfo(),
+                           ST->getMemoryVT(), ST->isVolatile(),
+                           ST->isNonTemporal(), ST->getAlignment());
+}
diff --git a/final/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/final/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
new file mode 100644
index 00000000000..935aab0e59a
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -0,0 +1,2615 @@
+//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements integer type expansion and promotion for LegalizeTypes.
+// Promotion is the act of changing a computation in an illegal type into a
+// computation in a larger type.  For example, implementing i8 arithmetic in an
+// i32 register (often needed on powerpc).
+// Expansion is the act of changing a computation in an illegal type into a
+// computation in two identical registers of a smaller type.  For example,
+// implementing i64 arithmetic in two i32 registers (often needed on 32-bit
+// targets).
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Integer Result Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerResult - This method is called when a result of a node is
+/// found to be in need of promotion to a larger type.  At this point, the node
+/// may also have invalid operands or may have other results that need
+/// expansion, we just know that (at least) one result needs promotion.
+void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  // See if the target wants to custom expand this node.
+  if (CustomLowerNode(N, N->getValueType(ResNo), true))
+    return;
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "PromoteIntegerResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to promote this operator!");
+  case ISD::AssertSext:  Res = PromoteIntRes_AssertSext(N); break;
+  case ISD::AssertZext:  Res = PromoteIntRes_AssertZext(N); break;
+  case ISD::BITCAST:     Res = PromoteIntRes_BITCAST(N); break;
+  case ISD::BSWAP:       Res = PromoteIntRes_BSWAP(N); break;
+  case ISD::BUILD_PAIR:  Res = PromoteIntRes_BUILD_PAIR(N); break;
+  case ISD::Constant:    Res = PromoteIntRes_Constant(N); break;
+  case ISD::CONVERT_RNDSAT:
+                         Res = PromoteIntRes_CONVERT_RNDSAT(N); break;
+  case ISD::CTLZ:        Res = PromoteIntRes_CTLZ(N); break;
+  case ISD::CTPOP:       Res = PromoteIntRes_CTPOP(N); break;
+  case ISD::CTTZ:        Res = PromoteIntRes_CTTZ(N); break;
+  case ISD::EXTRACT_VECTOR_ELT:
+                         Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break;
+  case ISD::LOAD:        Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break;
+  case ISD::SELECT:      Res = PromoteIntRes_SELECT(N); break;
+  case ISD::SELECT_CC:   Res = PromoteIntRes_SELECT_CC(N); break;
+  case ISD::SETCC:       Res = PromoteIntRes_SETCC(N); break;
+  case ISD::SHL:         Res = PromoteIntRes_SHL(N); break;
+  case ISD::SIGN_EXTEND_INREG:
+                         Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break;
+  case ISD::SRA:         Res = PromoteIntRes_SRA(N); break;
+  case ISD::SRL:         Res = PromoteIntRes_SRL(N); break;
+  case ISD::TRUNCATE:    Res = PromoteIntRes_TRUNCATE(N); break;
+  case ISD::UNDEF:       Res = PromoteIntRes_UNDEF(N); break;
+  case ISD::VAARG:       Res = PromoteIntRes_VAARG(N); break;
+
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:  Res = PromoteIntRes_INT_EXTEND(N); break;
+
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:  Res = PromoteIntRes_FP_TO_XINT(N); break;
+
+  case ISD::FP32_TO_FP16:Res = PromoteIntRes_FP32_TO_FP16(N); break;
+
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:         Res = PromoteIntRes_SimpleIntBinOp(N); break;
+
+  case ISD::SDIV:
+  case ISD::SREM:        Res = PromoteIntRes_SDIV(N); break;
+
+  case ISD::UDIV:
+  case ISD::UREM:        Res = PromoteIntRes_UDIV(N); break;
+
+  case ISD::SADDO:
+  case ISD::SSUBO:       Res = PromoteIntRes_SADDSUBO(N, ResNo); break;
+  case ISD::UADDO:
+  case ISD::USUBO:       Res = PromoteIntRes_UADDSUBO(N, ResNo); break;
+  case ISD::SMULO:
+  case ISD::UMULO:       Res = PromoteIntRes_XMULO(N, ResNo); break;
+
+  case ISD::ATOMIC_LOAD_ADD:
+  case ISD::ATOMIC_LOAD_SUB:
+  case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_OR:
+  case ISD::ATOMIC_LOAD_XOR:
+  case ISD::ATOMIC_LOAD_NAND:
+  case ISD::ATOMIC_LOAD_MIN:
+  case ISD::ATOMIC_LOAD_MAX:
+  case ISD::ATOMIC_LOAD_UMIN:
+  case ISD::ATOMIC_LOAD_UMAX:
+  case ISD::ATOMIC_SWAP:
+    Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break;
+
+  case ISD::ATOMIC_CMP_SWAP:
+    Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break;
+  }
+
+  // If the result is null then the sub-method took care of registering it.
+  if (Res.getNode())
+    SetPromotedInteger(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) {
+  // Sign-extend the new bits, and continue the assertion.
+  SDValue Op = SExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::AssertSext, N->getDebugLoc(),
+                     Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) {
+  // Zero the new bits, and continue the assertion.
+  SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::AssertZext, N->getDebugLoc(),
+                     Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
+  SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+  SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+                              N->getMemoryVT(),
+                              N->getChain(), N->getBasePtr(),
+                              Op2, N->getMemOperand());
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
+  SDValue Op2 = GetPromotedInteger(N->getOperand(2));
+  SDValue Op3 = GetPromotedInteger(N->getOperand(3));
+  SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
+                              N->getMemoryVT(), N->getChain(), N->getBasePtr(),
+                              Op2, Op3, N->getMemOperand());
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
+  SDValue InOp = N->getOperand(0);
+  EVT InVT = InOp.getValueType();
+  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  DebugLoc dl = N->getDebugLoc();
+
+  switch (getTypeAction(InVT)) {
+  default:
+    assert(false && "Unknown type action!");
+    break;
+  case Legal:
+    break;
+  case PromoteInteger:
+    if (NOutVT.bitsEq(NInVT))
+      // The input promotes to the same size.  Convert the promoted value.
+      return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
+    break;
+  case SoftenFloat:
+    // Promote the integer operand by hand.
+    return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
+  case ExpandInteger:
+  case ExpandFloat:
+    break;
+  case ScalarizeVector:
+    // Convert the element to an integer and promote it by hand.
+    return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                       BitConvertToInteger(GetScalarizedVector(InOp)));
+  case SplitVector: {
+    // For example, i32 = BITCAST v2i16 on alpha.  Convert the split
+    // pieces of the input into integers and reassemble in the final type.
+    SDValue Lo, Hi;
+    GetSplitVector(N->getOperand(0), Lo, Hi);
+    Lo = BitConvertToInteger(Lo);
+    Hi = BitConvertToInteger(Hi);
+
+    if (TLI.isBigEndian())
+      std::swap(Lo, Hi);
+
+    InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
+                       EVT::getIntegerVT(*DAG.getContext(),
+                                         NOutVT.getSizeInBits()),
+                       JoinIntegers(Lo, Hi));
+    return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
+  }
+  case WidenVector:
+    if (OutVT.bitsEq(NInVT))
+      // The input is widened to the same size.  Convert to the widened value.
+      return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
+  }
+
+  return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                     CreateStackStoreLoad(InOp, OutVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  EVT OVT = N->getValueType(0);
+  EVT NVT = Op.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
+  return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+                     DAG.getConstant(DiffBits, TLI.getPointerTy()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
+  // The pair element type may be legal, or may not promote to the same type as
+  // the result, for example i14 = BUILD_PAIR (i7, i7).  Handle all cases.
+  return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(),
+                     TLI.getTypeToTransformTo(*DAG.getContext(),
+                     N->getValueType(0)), JoinIntegers(N->getOperand(0),
+                     N->getOperand(1)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  // FIXME there is no actual debug info here
+  DebugLoc dl = N->getDebugLoc();
+  // Zero extend things like i1, sign extend everything else.  It shouldn't
+  // matter in theory which one we pick, but this tends to give better code?
+  unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+  SDValue Result = DAG.getNode(Opc, dl,
+                               TLI.getTypeToTransformTo(*DAG.getContext(), VT),
+                               SDValue(N, 0));
+  assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?");
+  return Result;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) {
+  ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+  assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+           CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+           CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) &&
+          "can only promote integers");
+  EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0),
+                              N->getOperand(1), N->getOperand(2),
+                              N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
+  // Zero extend to the promoted type and do the count there.
+  SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+  DebugLoc dl = N->getDebugLoc();
+  EVT OVT = N->getValueType(0);
+  EVT NVT = Op.getValueType();
+  Op = DAG.getNode(ISD::CTLZ, dl, NVT, Op);
+  // Subtract off the extra leading bits in the bigger type.
+  return DAG.getNode(ISD::SUB, dl, NVT, Op,
+                     DAG.getConstant(NVT.getSizeInBits() -
+                                     OVT.getSizeInBits(), NVT));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) {
+  // Zero extend to the promoted type and do the count there.
+  SDValue Op = ZExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  EVT OVT = N->getValueType(0);
+  EVT NVT = Op.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+  // The count is the same in the promoted type except if the original
+  // value was zero.  This can be handled by setting the bit just off
+  // the top of the original type.
+  APInt TopBit(NVT.getSizeInBits(), 0);
+  TopBit.setBit(OVT.getSizeInBits());
+  Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
+  return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0),
+                     N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned NewOpc = N->getOpcode();
+  DebugLoc dl = N->getDebugLoc();
+
+  // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is
+  // not Legal, check to see if we can use FP_TO_SINT instead.  (If both UINT
+  // and SINT conversions are Custom, there is no way to tell which is
+  // preferable. We choose SINT because that's the right thing on PPC.)
+  if (N->getOpcode() == ISD::FP_TO_UINT &&
+      !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) &&
+      TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
+    NewOpc = ISD::FP_TO_SINT;
+
+  SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+
+  // Assert that the converted value fits in the original type.  If it doesn't
+  // (eg: because the value being converted is too big), then the result of the
+  // original operation was undefined anyway, so the assert is still correct.
+  return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
+                     ISD::AssertZext : ISD::AssertSext, dl,
+                     NVT, Res, DAG.getValueType(N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+
+  return DAG.getNode(ISD::AssertZext, dl,
+                     NVT, Res, DAG.getValueType(N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+
+  if (getTypeAction(N->getOperand(0).getValueType()) == PromoteInteger) {
+    SDValue Res = GetPromotedInteger(N->getOperand(0));
+    assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
+
+    // If the result and operand types are the same after promotion, simplify
+    // to an in-register extension.
+    if (NVT == Res.getValueType()) {
+      // The high bits are not guaranteed to be anything.  Insert an extend.
+      if (N->getOpcode() == ISD::SIGN_EXTEND)
+        return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+                           DAG.getValueType(N->getOperand(0).getValueType()));
+      if (N->getOpcode() == ISD::ZERO_EXTEND)
+        return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType());
+      assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
+      return Res;
+    }
+  }
+
+  // Otherwise, just extend the original operand all the way to the larger type.
+  return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
+  assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  ISD::LoadExtType ExtType =
+    ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+                               N->getPointerInfo(),
+                               N->getMemoryVT(), N->isVolatile(),
+                               N->isNonTemporal(), N->getAlignment());
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  return Res;
+}
+
+/// Promote the overflow flag of an overflowing arithmetic node.
+SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
+  // Simply change the return type of the boolean result.
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+  EVT ValueVTs[] = { N->getValueType(0), NVT };
+  SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
+  SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                            DAG.getVTList(ValueVTs, 2), Ops, 2);
+
+  // Modified the sum result - switch anything that used the old sum to use
+  // the new one.
+  ReplaceValueWith(SDValue(N, 0), Res);
+
+  return SDValue(Res.getNode(), 1);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
+  if (ResNo == 1)
+    return PromoteIntRes_Overflow(N);
+
+  // The operation overflowed iff the result in the larger type is not the
+  // sign extension of its truncation to the original type.
+  SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+  SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+  EVT OVT = N->getOperand(0).getValueType();
+  EVT NVT = LHS.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Do the arithmetic in the larger type.
+  unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB;
+  SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+  // Calculate the overflow flag: sign extend the arithmetic result from
+  // the original type.
+  SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
+                            DAG.getValueType(OVT));
+  // Overflowed if and only if this is not equal to Res.
+  Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+  // Use the calculated overflow everywhere.
+  ReplaceValueWith(SDValue(N, 1), Ofl);
+
+  return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) {
+  // Sign extend the input.
+  SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+  SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                     LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
+  SDValue LHS = GetPromotedInteger(N->getOperand(1));
+  SDValue RHS = GetPromotedInteger(N->getOperand(2));
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+                     LHS.getValueType(), N->getOperand(0),LHS,RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) {
+  SDValue LHS = GetPromotedInteger(N->getOperand(2));
+  SDValue RHS = GetPromotedInteger(N->getOperand(3));
+  return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+                     LHS.getValueType(), N->getOperand(0),
+                     N->getOperand(1), LHS, RHS, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
+  EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType());
+  assert(isTypeLegal(SVT) && "Illegal SetCC type!");
+  DebugLoc dl = N->getDebugLoc();
+
+  // Get the SETCC result using the canonical SETCC type.
+  SDValue SetCC = DAG.getNode(ISD::SETCC, dl, SVT, N->getOperand(0),
+                              N->getOperand(1), N->getOperand(2));
+
+  // Convert to the expected type.
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  assert(NVT.bitsLE(SVT) && "Integer type overpromoted?");
+  return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
+  return DAG.getNode(ISD::SHL, N->getDebugLoc(),
+                TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
+                     GetPromotedInteger(N->getOperand(0)), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(),
+                     Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) {
+  // The input may have strange things in the top bits of the registers, but
+  // these operations don't care.  They may have weird bits going out, but
+  // that too is okay if they are integer operations.
+  SDValue LHS = GetPromotedInteger(N->getOperand(0));
+  SDValue RHS = GetPromotedInteger(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                    LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) {
+  // The input value must be properly sign extended.
+  SDValue Res = SExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::SRA, N->getDebugLoc(),
+                     Res.getValueType(), Res, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
+  // The input value must be properly zero extended.
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Res = ZExtPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Res;
+
+  switch (getTypeAction(N->getOperand(0).getValueType())) {
+  default: llvm_unreachable("Unknown type action!");
+  case Legal:
+  case ExpandInteger:
+    Res = N->getOperand(0);
+    break;
+  case PromoteInteger:
+    Res = GetPromotedInteger(N->getOperand(0));
+    break;
+  }
+
+  // Truncate to NVT instead of VT
+  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
+  if (ResNo == 1)
+    return PromoteIntRes_Overflow(N);
+
+  // The operation overflowed iff the result in the larger type is not the
+  // zero extension of its truncation to the original type.
+  SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+  SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+  EVT OVT = N->getOperand(0).getValueType();
+  EVT NVT = LHS.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Do the arithmetic in the larger type.
+  unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB;
+  SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS);
+
+  // Calculate the overflow flag: zero extend the arithmetic result from
+  // the original type.
+  SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
+  // Overflowed if and only if this is not equal to Res.
+  Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
+
+  // Use the calculated overflow everywhere.
+  ReplaceValueWith(SDValue(N, 1), Ofl);
+
+  return Res;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+  // Promote the overflow bit trivially.
+  if (ResNo == 1)
+    return PromoteIntRes_Overflow(N);
+
+  SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+  DebugLoc DL = N->getDebugLoc();
+  EVT SmallVT = LHS.getValueType();
+
+  // To determine if the result overflowed in a larger type, we extend the input
+  // to the larger type, do the multiply, then check the high bits of the result
+  // to see if the overflow happened.
+  if (N->getOpcode() == ISD::SMULO) {
+    LHS = SExtPromotedInteger(LHS);
+    RHS = SExtPromotedInteger(RHS);
+  } else {
+    LHS = ZExtPromotedInteger(LHS);
+    RHS = ZExtPromotedInteger(RHS);
+  }
+  SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+
+  // Overflow occurred iff the high part of the result does not zero/sign-extend
+  // the low part.
+  SDValue Overflow;
+  if (N->getOpcode() == ISD::UMULO) {
+    // Unsigned overflow occurred iff the high part is non-zero.
+    SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
+                             DAG.getIntPtrConstant(SmallVT.getSizeInBits()));
+    Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
+                            DAG.getConstant(0, Hi.getValueType()), ISD::SETNE);
+  } else {
+    // Signed overflow occurred iff the high part does not sign extend the low.
+    SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
+                               Mul, DAG.getValueType(SmallVT));
+    Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
+  }
+
+  // Use the calculated overflow everywhere.
+  ReplaceValueWith(SDValue(N, 1), Overflow);
+  return Mul;
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
+  // Zero extend the input.
+  SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
+  SDValue RHS = ZExtPromotedInteger(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                     LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) {
+  return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(),
+                                               N->getValueType(0)));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
+  SDValue Chain = N->getOperand(0); // Get the chain.
+  SDValue Ptr = N->getOperand(1); // Get the pointer.
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT);
+  unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT);
+  // The argument is passed as NumRegs registers of type RegVT.
+
+  SmallVector<SDValue, 8> Parts(NumRegs);
+  for (unsigned i = 0; i < NumRegs; ++i) {
+    Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2),
+                            N->getConstantOperandVal(3));
+    Chain = Parts[i].getValue(1);
+  }
+
+  // Handle endianness of the load.
+  if (TLI.isBigEndian())
+    std::reverse(Parts.begin(), Parts.end());
+
+  // Assemble the parts in the promoted type.
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]);
+  for (unsigned i = 1; i < NumRegs; ++i) {
+    SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]);
+    // Shift it to the right position and "or" it in.
+    Part = DAG.getNode(ISD::SHL, dl, NVT, Part,
+                       DAG.getConstant(i * RegVT.getSizeInBits(),
+                                       TLI.getPointerTy()));
+    Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part);
+  }
+
+  // Modified the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Chain);
+
+  return Res;
+}
+
+//===----------------------------------------------------------------------===//
+//  Integer Operand Promotion
+//===----------------------------------------------------------------------===//
+
+/// PromoteIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need promotion.  At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+    return false;
+
+  switch (N->getOpcode()) {
+    default:
+  #ifndef NDEBUG
+    dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+  #endif
+    llvm_unreachable("Do not know how to promote this operator's operand!");
+
+  case ISD::ANY_EXTEND:   Res = PromoteIntOp_ANY_EXTEND(N); break;
+  case ISD::BITCAST:      Res = PromoteIntOp_BITCAST(N); break;
+  case ISD::BR_CC:        Res = PromoteIntOp_BR_CC(N, OpNo); break;
+  case ISD::BRCOND:       Res = PromoteIntOp_BRCOND(N, OpNo); break;
+  case ISD::BUILD_PAIR:   Res = PromoteIntOp_BUILD_PAIR(N); break;
+  case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break;
+  case ISD::CONVERT_RNDSAT:
+                          Res = PromoteIntOp_CONVERT_RNDSAT(N); break;
+  case ISD::INSERT_VECTOR_ELT:
+                          Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
+  case ISD::MEMBARRIER:   Res = PromoteIntOp_MEMBARRIER(N); break;
+  case ISD::SCALAR_TO_VECTOR:
+                          Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+  case ISD::SELECT:       Res = PromoteIntOp_SELECT(N, OpNo); break;
+  case ISD::SELECT_CC:    Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
+  case ISD::SETCC:        Res = PromoteIntOp_SETCC(N, OpNo); break;
+  case ISD::SIGN_EXTEND:  Res = PromoteIntOp_SIGN_EXTEND(N); break;
+  case ISD::SINT_TO_FP:   Res = PromoteIntOp_SINT_TO_FP(N); break;
+  case ISD::STORE:        Res = PromoteIntOp_STORE(cast<StoreSDNode>(N),
+                                                   OpNo); break;
+  case ISD::TRUNCATE:     Res = PromoteIntOp_TRUNCATE(N); break;
+  case ISD::FP16_TO_FP32:
+  case ISD::UINT_TO_FP:   Res = PromoteIntOp_UINT_TO_FP(N); break;
+  case ISD::ZERO_EXTEND:  Res = PromoteIntOp_ZERO_EXTEND(N); break;
+
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::ROTL:
+  case ISD::ROTR: Res = PromoteIntOp_Shift(N); break;
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+/// PromoteSetCCOperands - Promote the operands of a comparison.  This code is
+/// shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
+                                            ISD::CondCode CCCode) {
+  // We have to insert explicit sign or zero extends.  Note that we could
+  // insert sign extends for ALL conditions, but zero extend is cheaper on
+  // many machines (an AND instead of two shifts), so prefer it.
+  switch (CCCode) {
+  default: llvm_unreachable("Unknown integer comparison!");
+  case ISD::SETEQ:
+  case ISD::SETNE:
+  case ISD::SETUGE:
+  case ISD::SETUGT:
+  case ISD::SETULE:
+  case ISD::SETULT:
+    // ALL of these operations will work if we either sign or zero extend
+    // the operands (including the unsigned comparisons!).  Zero extend is
+    // usually a simpler/cheaper operation, so prefer it.
+    NewLHS = ZExtPromotedInteger(NewLHS);
+    NewRHS = ZExtPromotedInteger(NewRHS);
+    break;
+  case ISD::SETGE:
+  case ISD::SETGT:
+  case ISD::SETLT:
+  case ISD::SETLE:
+    NewLHS = SExtPromotedInteger(NewLHS);
+    NewRHS = SExtPromotedInteger(NewRHS);
+    break;
+  }
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
+  // This should only occur in unusual situations like bitcasting to an
+  // x86_fp80, so just turn it into a store+load
+  return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 2 && "Don't know how to promote this operand!");
+
+  SDValue LHS = N->getOperand(2);
+  SDValue RHS = N->getOperand(3);
+  PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get());
+
+  // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always
+  // legal types.
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+                                N->getOperand(1), LHS, RHS, N->getOperand(4)),
+                 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 1 && "only know how to promote condition");
+
+  // Promote all the way up to the canonical SetCC type.
+  EVT SVT = TLI.getSetCCResultType(MVT::Other);
+  SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);
+
+  // The chain (Op#0) and basic block destination (Op#2) are always legal types.
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond,
+                                        N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
+  // Since the result type is legal, the operands must promote to it.
+  EVT OVT = N->getOperand(0).getValueType();
+  SDValue Lo = ZExtPromotedInteger(N->getOperand(0));
+  SDValue Hi = GetPromotedInteger(N->getOperand(1));
+  assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?");
+  DebugLoc dl = N->getDebugLoc();
+
+  Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi,
+                   DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy()));
+  return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
+  // The vector type is legal but the element type is not.  This implies
+  // that the vector is a power-of-two in length and that the element
+  // type does not have a strange size (eg: it is not i1).
+  EVT VecVT = N->getValueType(0);
+  unsigned NumElts = VecVT.getVectorNumElements();
+  assert(!(NumElts & 1) && "Legal vector of one illegal element?");
+
+  // Promote the inserted value.  The type does not need to match the
+  // vector element type.  Check that any extra bits introduced will be
+  // truncated away.
+  assert(N->getOperand(0).getValueType().getSizeInBits() >=
+         N->getValueType(0).getVectorElementType().getSizeInBits() &&
+         "Type of inserted value narrower than vector element type!");
+
+  SmallVector<SDValue, 16> NewOps;
+  for (unsigned i = 0; i < NumElts; ++i)
+    NewOps.push_back(GetPromotedInteger(N->getOperand(i)));
+
+  return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) {
+  ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+  assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU ||
+           CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU ||
+           CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) &&
+           "can only promote integer arguments");
+  SDValue InOp = GetPromotedInteger(N->getOperand(0));
+  return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp,
+                              N->getOperand(1), N->getOperand(2),
+                              N->getOperand(3), N->getOperand(4), CvtCode);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
+                                                         unsigned OpNo) {
+  if (OpNo == 1) {
+    // Promote the inserted value.  This is valid because the type does not
+    // have to match the vector element type.
+
+    // Check that any extra bits introduced will be truncated away.
+    assert(N->getOperand(1).getValueType().getSizeInBits() >=
+           N->getValueType(0).getVectorElementType().getSizeInBits() &&
+           "Type of inserted value narrower than vector element type!");
+    return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+                                  GetPromotedInteger(N->getOperand(1)),
+                                  N->getOperand(2)),
+                   0);
+  }
+
+  assert(OpNo == 2 && "Different operand and result vector types?");
+
+  // Promote the index.
+  SDValue Idx = ZExtPromotedInteger(N->getOperand(2));
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+                                N->getOperand(1), Idx), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
+  SDValue NewOps[6];
+  DebugLoc dl = N->getDebugLoc();
+  NewOps[0] = N->getOperand(0);
+  for (unsigned i = 1; i < array_lengthof(NewOps); ++i) {
+    SDValue Flag = GetPromotedInteger(N->getOperand(i));
+    NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1);
+  }
+  return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
+  // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
+  // the operand in place.
+  return SDValue(DAG.UpdateNodeOperands(N,
+                                GetPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 0 && "Only know how to promote condition");
+
+  // Promote all the way up to the canonical SetCC type.
+  EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType());
+  SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT);
+
+  return SDValue(DAG.UpdateNodeOperands(N, Cond,
+                                N->getOperand(1), N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());
+
+  // The CC (#4) and the possible return values (#2 and #3) have legal types.
+  return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2),
+                                N->getOperand(3), N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
+  assert(OpNo == 0 && "Don't know how to promote this operand!");
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
+
+  // The CC (#2) is always legal.
+  return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+                                ZExtPromotedInteger(N->getOperand(1))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  DebugLoc dl = N->getDebugLoc();
+  Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(),
+                     Op, DAG.getValueType(N->getOperand(0).getValueType()));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
+  return SDValue(DAG.UpdateNodeOperands(N,
+                                SExtPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
+  assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+  SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
+  unsigned Alignment = N->getAlignment();
+  bool isVolatile = N->isVolatile();
+  bool isNonTemporal = N->isNonTemporal();
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue Val = GetPromotedInteger(N->getValue());  // Get promoted value.
+
+  // Truncate the value and store the result.
+  return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(),
+                           N->getMemoryVT(),
+                           isVolatile, isNonTemporal, Alignment);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
+  return SDValue(DAG.UpdateNodeOperands(N,
+                                ZExtPromotedInteger(N->getOperand(0))), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Op = GetPromotedInteger(N->getOperand(0));
+  Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
+  return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Integer Result Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerResult - This method is called when the specified result of the
+/// specified node is found to need expansion.  At this point, the node may also
+/// have invalid operands or may have other results that need promotion, we just
+/// know that (at least) one result needs expansion.
+void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Lo, Hi;
+  Lo = Hi = SDValue();
+
+  // See if the target wants to custom expand this node.
+  if (CustomLowerNode(N, N->getValueType(ResNo), true))
+    return;
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to expand the result of this operator!");
+
+  case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+  case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
+  case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
+  case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
+
+  case ISD::BITCAST:            ExpandRes_BITCAST(N, Lo, Hi); break;
+  case ISD::BUILD_PAIR:         ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
+  case ISD::EXTRACT_ELEMENT:    ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
+  case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
+  case ISD::VAARG:              ExpandRes_VAARG(N, Lo, Hi); break;
+
+  case ISD::ANY_EXTEND:  ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break;
+  case ISD::AssertSext:  ExpandIntRes_AssertSext(N, Lo, Hi); break;
+  case ISD::AssertZext:  ExpandIntRes_AssertZext(N, Lo, Hi); break;
+  case ISD::BSWAP:       ExpandIntRes_BSWAP(N, Lo, Hi); break;
+  case ISD::Constant:    ExpandIntRes_Constant(N, Lo, Hi); break;
+  case ISD::CTLZ:        ExpandIntRes_CTLZ(N, Lo, Hi); break;
+  case ISD::CTPOP:       ExpandIntRes_CTPOP(N, Lo, Hi); break;
+  case ISD::CTTZ:        ExpandIntRes_CTTZ(N, Lo, Hi); break;
+  case ISD::FP_TO_SINT:  ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
+  case ISD::FP_TO_UINT:  ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+  case ISD::LOAD:        ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
+  case ISD::MUL:         ExpandIntRes_MUL(N, Lo, Hi); break;
+  case ISD::SDIV:        ExpandIntRes_SDIV(N, Lo, Hi); break;
+  case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break;
+  case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
+  case ISD::SREM:        ExpandIntRes_SREM(N, Lo, Hi); break;
+  case ISD::TRUNCATE:    ExpandIntRes_TRUNCATE(N, Lo, Hi); break;
+  case ISD::UDIV:        ExpandIntRes_UDIV(N, Lo, Hi); break;
+  case ISD::UREM:        ExpandIntRes_UREM(N, Lo, Hi); break;
+  case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+
+  case ISD::ATOMIC_LOAD_ADD:
+  case ISD::ATOMIC_LOAD_SUB:
+  case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_OR:
+  case ISD::ATOMIC_LOAD_XOR:
+  case ISD::ATOMIC_LOAD_NAND:
+  case ISD::ATOMIC_LOAD_MIN:
+  case ISD::ATOMIC_LOAD_MAX:
+  case ISD::ATOMIC_LOAD_UMIN:
+  case ISD::ATOMIC_LOAD_UMAX:
+  case ISD::ATOMIC_SWAP: {
+    std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
+    SplitInteger(Tmp.first, Lo, Hi);
+    ReplaceValueWith(SDValue(N, 1), Tmp.second);
+    break;
+  }
+
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
+
+  case ISD::ADD:
+  case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break;
+
+  case ISD::ADDC:
+  case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break;
+
+  case ISD::ADDE:
+  case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
+
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
+
+  case ISD::SADDO:
+  case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
+  case ISD::UADDO:
+  case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+  }
+
+  // If Lo/Hi is null, the sub-method took care of registering results etc.
+  if (Lo.getNode())
+    SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
+}
+
+/// Lower an atomic node to the appropriate builtin call.
+std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
+  unsigned Opc = Node->getOpcode();
+  MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+  RTLIB::Libcall LC;
+
+  switch (Opc) {
+  default:
+    llvm_unreachable("Unhandled atomic intrinsic Expand!");
+    break;
+  case ISD::ATOMIC_SWAP:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+    }
+    break;
+  case ISD::ATOMIC_CMP_SWAP:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_ADD:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_SUB:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_AND:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_OR:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_XOR:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+    }
+    break;
+  case ISD::ATOMIC_LOAD_NAND:
+    switch (VT.SimpleTy) {
+    default: llvm_unreachable("Unexpected value type for atomic!");
+    case MVT::i8:  LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+    case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+    case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+    case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+    }
+    break;
+  }
+
+  return ExpandChainLibCall(LC, Node, false);
+}
+
+/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
+/// and the shift amount is a constant 'Amt'.  Expand the operation.
+void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
+                                             SDValue &Lo, SDValue &Hi) {
+  DebugLoc DL = N->getDebugLoc();
+  // Expand the incoming operand to be shifted, so that we have its parts
+  SDValue InL, InH;
+  GetExpandedInteger(N->getOperand(0), InL, InH);
+
+  EVT NVT = InL.getValueType();
+  unsigned VTBits = N->getValueType(0).getSizeInBits();
+  unsigned NVTBits = NVT.getSizeInBits();
+  EVT ShTy = N->getOperand(1).getValueType();
+
+  if (N->getOpcode() == ISD::SHL) {
+    if (Amt > VTBits) {
+      Lo = Hi = DAG.getConstant(0, NVT);
+    } else if (Amt > NVTBits) {
+      Lo = DAG.getConstant(0, NVT);
+      Hi = DAG.getNode(ISD::SHL, DL,
+                       NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy));
+    } else if (Amt == NVTBits) {
+      Lo = DAG.getConstant(0, NVT);
+      Hi = InL;
+    } else if (Amt == 1 &&
+               TLI.isOperationLegalOrCustom(ISD::ADDC,
+                              TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
+      // Emit this X << 1 as X+X.
+      SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
+      SDValue LoOps[2] = { InL, InL };
+      Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2);
+      SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
+      Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3);
+    } else {
+      Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy));
+      Hi = DAG.getNode(ISD::OR, DL, NVT,
+                       DAG.getNode(ISD::SHL, DL, NVT, InH,
+                                   DAG.getConstant(Amt, ShTy)),
+                       DAG.getNode(ISD::SRL, DL, NVT, InL,
+                                   DAG.getConstant(NVTBits-Amt, ShTy)));
+    }
+    return;
+  }
+
+  if (N->getOpcode() == ISD::SRL) {
+    if (Amt > VTBits) {
+      Lo = DAG.getConstant(0, NVT);
+      Hi = DAG.getConstant(0, NVT);
+    } else if (Amt > NVTBits) {
+      Lo = DAG.getNode(ISD::SRL, DL,
+                       NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
+      Hi = DAG.getConstant(0, NVT);
+    } else if (Amt == NVTBits) {
+      Lo = InH;
+      Hi = DAG.getConstant(0, NVT);
+    } else {
+      Lo = DAG.getNode(ISD::OR, DL, NVT,
+                       DAG.getNode(ISD::SRL, DL, NVT, InL,
+                                   DAG.getConstant(Amt, ShTy)),
+                       DAG.getNode(ISD::SHL, DL, NVT, InH,
+                                   DAG.getConstant(NVTBits-Amt, ShTy)));
+      Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
+    }
+    return;
+  }
+
+  assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+  if (Amt > VTBits) {
+    Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
+                          DAG.getConstant(NVTBits-1, ShTy));
+  } else if (Amt > NVTBits) {
+    Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
+                     DAG.getConstant(Amt-NVTBits, ShTy));
+    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+                     DAG.getConstant(NVTBits-1, ShTy));
+  } else if (Amt == NVTBits) {
+    Lo = InH;
+    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
+                     DAG.getConstant(NVTBits-1, ShTy));
+  } else {
+    Lo = DAG.getNode(ISD::OR, DL, NVT,
+                     DAG.getNode(ISD::SRL, DL, NVT, InL,
+                                 DAG.getConstant(Amt, ShTy)),
+                     DAG.getNode(ISD::SHL, DL, NVT, InH,
+                                 DAG.getConstant(NVTBits-Amt, ShTy)));
+    Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
+  }
+}
+
+/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify
+/// this shift based on knowledge of the high bit of the shift amount.  If we
+/// can tell this, we know that it is >= 32 or < 32, without knowing the actual
+/// shift amount.
+bool DAGTypeLegalizer::
+ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  SDValue Amt = N->getOperand(1);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT ShTy = Amt.getValueType();
+  unsigned ShBits = ShTy.getScalarType().getSizeInBits();
+  unsigned NVTBits = NVT.getScalarType().getSizeInBits();
+  assert(isPowerOf2_32(NVTBits) &&
+         "Expanded integer type size not a power of two!");
+  DebugLoc dl = N->getDebugLoc();
+
+  APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits));
+  APInt KnownZero, KnownOne;
+  DAG.ComputeMaskedBits(N->getOperand(1), HighBitMask, KnownZero, KnownOne);
+
+  // If we don't know anything about the high bits, exit.
+  if (((KnownZero|KnownOne) & HighBitMask) == 0)
+    return false;
+
+  // Get the incoming operand to be shifted.
+  SDValue InL, InH;
+  GetExpandedInteger(N->getOperand(0), InL, InH);
+
+  // If we know that any of the high bits of the shift amount are one, then we
+  // can do this as a couple of simple shifts.
+  if (KnownOne.intersects(HighBitMask)) {
+    // Mask out the high bit, which we know is set.
+    Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt,
+                      DAG.getConstant(~HighBitMask, ShTy));
+
+    switch (N->getOpcode()) {
+    default: llvm_unreachable("Unknown shift");
+    case ISD::SHL:
+      Lo = DAG.getConstant(0, NVT);              // Low part is zero.
+      Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
+      return true;
+    case ISD::SRL:
+      Hi = DAG.getConstant(0, NVT);              // Hi part is zero.
+      Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
+      return true;
+    case ISD::SRA:
+      Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,       // Sign extend high part.
+                       DAG.getConstant(NVTBits-1, ShTy));
+      Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
+      return true;
+    }
+  }
+
+#if 0
+  // FIXME: This code is broken for shifts with a zero amount!
+  // If we know that all of the high bits of the shift amount are zero, then we
+  // can do this as a couple of simple shifts.
+  if ((KnownZero & HighBitMask) == HighBitMask) {
+    // Compute 32-amt.
+    SDValue Amt2 = DAG.getNode(ISD::SUB, ShTy,
+                                 DAG.getConstant(NVTBits, ShTy),
+                                 Amt);
+    unsigned Op1, Op2;
+    switch (N->getOpcode()) {
+    default: llvm_unreachable("Unknown shift");
+    case ISD::SHL:  Op1 = ISD::SHL; Op2 = ISD::SRL; break;
+    case ISD::SRL:
+    case ISD::SRA:  Op1 = ISD::SRL; Op2 = ISD::SHL; break;
+    }
+
+    Lo = DAG.getNode(N->getOpcode(), NVT, InL, Amt);
+    Hi = DAG.getNode(ISD::OR, NVT,
+                     DAG.getNode(Op1, NVT, InH, Amt),
+                     DAG.getNode(Op2, NVT, InL, Amt2));
+    return true;
+  }
+#endif
+
+  return false;
+}
+
+/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift
+/// of any size.
+bool DAGTypeLegalizer::
+ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  SDValue Amt = N->getOperand(1);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT ShTy = Amt.getValueType();
+  unsigned NVTBits = NVT.getSizeInBits();
+  assert(isPowerOf2_32(NVTBits) &&
+         "Expanded integer type size not a power of two!");
+  DebugLoc dl = N->getDebugLoc();
+
+  // Get the incoming operand to be shifted.
+  SDValue InL, InH;
+  GetExpandedInteger(N->getOperand(0), InL, InH);
+
+  SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy);
+  SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode);
+  SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
+  SDValue isShort = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy),
+                                 Amt, NVBitsNode, ISD::SETULT);
+
+  SDValue LoS, HiS, LoL, HiL;
+  switch (N->getOpcode()) {
+  default: llvm_unreachable("Unknown shift");
+  case ISD::SHL:
+    // Short: ShAmt < NVTBits
+    LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
+    HiS = DAG.getNode(ISD::OR, dl, NVT,
+                      DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
+    // FIXME: If Amt is zero, the following shift generates an undefined result
+    // on some architectures.
+                      DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));
+
+    // Long: ShAmt >= NVTBits
+    LoL = DAG.getConstant(0, NVT);                        // Lo part is zero.
+    HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.
+
+    Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+    Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+    return true;
+  case ISD::SRL:
+    // Short: ShAmt < NVTBits
+    HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
+    LoS = DAG.getNode(ISD::OR, dl, NVT,
+                      DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+    // FIXME: If Amt is zero, the following shift generates an undefined result
+    // on some architectures.
+                      DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+    // Long: ShAmt >= NVTBits
+    HiL = DAG.getConstant(0, NVT);                        // Hi part is zero.
+    LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+    Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+    Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+    return true;
+  case ISD::SRA:
+    // Short: ShAmt < NVTBits
+    HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
+    LoS = DAG.getNode(ISD::OR, dl, NVT,
+                      DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+    // FIXME: If Amt is zero, the following shift generates an undefined result
+    // on some architectures.
+                      DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+    // Long: ShAmt >= NVTBits
+    HiL = DAG.getNode(ISD::SRA, dl, NVT, InH,             // Sign of Hi part.
+                      DAG.getConstant(NVTBits-1, ShTy));
+    LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+    Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+    Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
+    return true;
+  }
+
+  return false;
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
+                                           SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  // Expand the subcomponents.
+  SDValue LHSL, LHSH, RHSL, RHSH;
+  GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+  GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+
+  EVT NVT = LHSL.getValueType();
+  SDValue LoOps[2] = { LHSL, RHSL };
+  SDValue HiOps[3] = { LHSH, RHSH };
+
+  // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
+  // them.  TODO: Teach operation legalization how to expand unsupported
+  // ADDC/ADDE/SUBC/SUBE.  The problem is that these operations generate
+  // a carry of type MVT::Glue, but there doesn't seem to be any way to
+  // generate a value of this type in the expanded code sequence.
+  bool hasCarry =
+    TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
+                                   ISD::ADDC : ISD::SUBC,
+                                 TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+
+  if (hasCarry) {
+    SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
+    if (N->getOpcode() == ISD::ADD) {
+      Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+      HiOps[2] = Lo.getValue(1);
+      Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+    } else {
+      Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+      HiOps[2] = Lo.getValue(1);
+      Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+    }
+    return;
+  }
+
+  if (N->getOpcode() == ISD::ADD) {
+    Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
+    Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
+    SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
+                                ISD::SETULT);
+    SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
+                                 DAG.getConstant(1, NVT),
+                                 DAG.getConstant(0, NVT));
+    SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
+                                ISD::SETULT);
+    SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
+                                 DAG.getConstant(1, NVT), Carry1);
+    Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
+  } else {
+    Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
+    Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
+    SDValue Cmp =
+      DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
+                   LoOps[0], LoOps[1], ISD::SETULT);
+    SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
+                                 DAG.getConstant(1, NVT),
+                                 DAG.getConstant(0, NVT));
+    Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
+  }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  // Expand the subcomponents.
+  SDValue LHSL, LHSH, RHSL, RHSH;
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+  GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
+  SDValue LoOps[2] = { LHSL, RHSL };
+  SDValue HiOps[3] = { LHSH, RHSH };
+
+  if (N->getOpcode() == ISD::ADDC) {
+    Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+    HiOps[2] = Lo.getValue(1);
+    Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+  } else {
+    Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps, 2);
+    HiOps[2] = Lo.getValue(1);
+    Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
+  }
+
+  // Legalized the flag result - switch anything that used the old flag to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  // Expand the subcomponents.
+  SDValue LHSL, LHSH, RHSL, RHSH;
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+  GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
+  SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
+  SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
+  SDValue HiOps[3] = { LHSH, RHSH };
+
+  Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps, 3);
+  HiOps[2] = Lo.getValue(1);
+  Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps, 3);
+
+  // Legalized the flag result - switch anything that used the old flag to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N,
+                                               SDValue &Lo, SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Op = N->getOperand(0);
+  if (Op.getValueType().bitsLE(NVT)) {
+    // The low part is any extension of the input (which degenerates to a copy).
+    Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op);
+    Hi = DAG.getUNDEF(NVT);   // The high part is undefined.
+  } else {
+    // For example, extension of an i48 to an i64.  The operand type necessarily
+    // promotes to the result type, so will end up being expanded too.
+    assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+           "Only know how to promote this result!");
+    SDValue Res = GetPromotedInteger(Op);
+    assert(Res.getValueType() == N->getValueType(0) &&
+           "Operand over promoted?");
+    // Split the promoted operand.  This will simplify when it is expanded.
+    SplitInteger(Res, Lo, Hi);
+  }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N,
+                                               SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT NVT = Lo.getValueType();
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  unsigned NVTBits = NVT.getSizeInBits();
+  unsigned EVTBits = EVT.getSizeInBits();
+
+  if (NVTBits < EVTBits) {
+    Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi,
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        EVTBits - NVTBits)));
+  } else {
+    Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT));
+    // The high part replicates the sign bit of Lo, make it explicit.
+    Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+                     DAG.getConstant(NVTBits-1, TLI.getPointerTy()));
+  }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N,
+                                               SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT NVT = Lo.getValueType();
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+  unsigned NVTBits = NVT.getSizeInBits();
+  unsigned EVTBits = EVT.getSizeInBits();
+
+  if (NVTBits < EVTBits) {
+    Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi,
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        EVTBits - NVTBits)));
+  } else {
+    Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT));
+    // The high part must be zero, make it explicit.
+    Hi = DAG.getConstant(0, NVT);
+  }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N,
+                                          SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), Hi, Lo);  // Note swapped operands.
+  Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo);
+  Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned NBitWidth = NVT.getSizeInBits();
+  const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
+  Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT);
+  Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32)
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT NVT = Lo.getValueType();
+
+  SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi,
+                                   DAG.getConstant(0, NVT), ISD::SETNE);
+
+  SDValue LoLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Lo);
+  SDValue HiLZ = DAG.getNode(ISD::CTLZ, dl, NVT, Hi);
+
+  Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ,
+                   DAG.getNode(ISD::ADD, dl, NVT, LoLZ,
+                               DAG.getConstant(NVT.getSizeInBits(), NVT)));
+  Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
+                                          SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT NVT = Lo.getValueType();
+  Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
+                   DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
+  Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32)
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT NVT = Lo.getValueType();
+
+  SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo,
+                                   DAG.getConstant(0, NVT), ISD::SETNE);
+
+  SDValue LoLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Lo);
+  SDValue HiLZ = DAG.getNode(ISD::CTTZ, dl, NVT, Hi);
+
+  Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ,
+                   DAG.getNode(ISD::ADD, dl, NVT, HiLZ,
+                               DAG.getConstant(NVT.getSizeInBits(), NVT)));
+  Hi = DAG.getConstant(0, NVT);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
+                                               SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  SDValue Op = N->getOperand(0);
+  RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
+  SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
+                                               SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  SDValue Op = N->getOperand(0);
+  RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
+  SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  if (ISD::isNormalLoad(N)) {
+    ExpandRes_NormalLoad(N, Lo, Hi);
+    return;
+  }
+
+  assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!");
+
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Ch  = N->getChain();
+  SDValue Ptr = N->getBasePtr();
+  ISD::LoadExtType ExtType = N->getExtensionType();
+  unsigned Alignment = N->getAlignment();
+  bool isVolatile = N->isVolatile();
+  bool isNonTemporal = N->isNonTemporal();
+  DebugLoc dl = N->getDebugLoc();
+
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+  if (N->getMemoryVT().bitsLE(NVT)) {
+    EVT MemVT = N->getMemoryVT();
+
+    Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
+                        MemVT, isVolatile, isNonTemporal, Alignment);
+
+    // Remember the chain.
+    Ch = Lo.getValue(1);
+
+    if (ExtType == ISD::SEXTLOAD) {
+      // The high part is obtained by SRA'ing all but one of the bits of the
+      // lo part.
+      unsigned LoSize = Lo.getValueType().getSizeInBits();
+      Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+                       DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+    } else if (ExtType == ISD::ZEXTLOAD) {
+      // The high part is just a zero.
+      Hi = DAG.getConstant(0, NVT);
+    } else {
+      assert(ExtType == ISD::EXTLOAD && "Unknown extload!");
+      // The high part is undefined.
+      Hi = DAG.getUNDEF(NVT);
+    }
+  } else if (TLI.isLittleEndian()) {
+    // Little-endian - low bits are at low addresses.
+    Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
+                     isVolatile, isNonTemporal, Alignment);
+
+    unsigned ExcessBits =
+      N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+    EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+    // Increment the pointer to the other half.
+    unsigned IncrementSize = NVT.getSizeInBits()/8;
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getIntPtrConstant(IncrementSize));
+    Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
+                        N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
+                        isVolatile, isNonTemporal,
+                        MinAlign(Alignment, IncrementSize));
+
+    // Build a factor node to remember that this load is independent of the
+    // other one.
+    Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                     Hi.getValue(1));
+  } else {
+    // Big-endian - high bits are at low addresses.  Favor aligned loads at
+    // the cost of some bit-fiddling.
+    EVT MemVT = N->getMemoryVT();
+    unsigned EBytes = MemVT.getStoreSize();
+    unsigned IncrementSize = NVT.getSizeInBits()/8;
+    unsigned ExcessBits = (EBytes - IncrementSize)*8;
+
+    // Load both the high bits and maybe some of the low bits.
+    Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
+                        EVT::getIntegerVT(*DAG.getContext(),
+                                          MemVT.getSizeInBits() - ExcessBits),
+                        isVolatile, isNonTemporal, Alignment);
+
+    // Increment the pointer to the other half.
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getIntPtrConstant(IncrementSize));
+    // Load the rest of the low bits.
+    Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
+                        N->getPointerInfo().getWithOffset(IncrementSize),
+                        EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+                        isVolatile, isNonTemporal,
+                        MinAlign(Alignment, IncrementSize));
+
+    // Build a factor node to remember that this load is independent of the
+    // other one.
+    Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                     Hi.getValue(1));
+
+    if (ExcessBits < NVT.getSizeInBits()) {
+      // Transfer low bits from the bottom of Hi to the top of Lo.
+      Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
+                       DAG.getNode(ISD::SHL, dl, NVT, Hi,
+                                   DAG.getConstant(ExcessBits,
+                                                   TLI.getPointerTy())));
+      // Move high bits to the right position in Hi.
+      Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl,
+                       NVT, Hi,
+                       DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+                                       TLI.getPointerTy()));
+    }
+  }
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Ch);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N,
+                                            SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  SDValue LL, LH, RL, RH;
+  GetExpandedInteger(N->getOperand(0), LL, LH);
+  GetExpandedInteger(N->getOperand(1), RL, RH);
+  Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL);
+  Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
+                                        SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  DebugLoc dl = N->getDebugLoc();
+
+  bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT);
+  bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT);
+  bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, NVT);
+  bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, NVT);
+  if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {
+    SDValue LL, LH, RL, RH;
+    GetExpandedInteger(N->getOperand(0), LL, LH);
+    GetExpandedInteger(N->getOperand(1), RL, RH);
+    unsigned OuterBitSize = VT.getSizeInBits();
+    unsigned InnerBitSize = NVT.getSizeInBits();
+    unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));
+    unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));
+
+    APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
+    if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&
+        DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {
+      // The inputs are both zero-extended.
+      if (HasUMUL_LOHI) {
+        // We can emit a umul_lohi.
+        Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+        Hi = SDValue(Lo.getNode(), 1);
+        return;
+      }
+      if (HasMULHU) {
+        // We can emit a mulhu+mul.
+        Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+        Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+        return;
+      }
+    }
+    if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {
+      // The input values are both sign-extended.
+      if (HasSMUL_LOHI) {
+        // We can emit a smul_lohi.
+        Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(NVT, NVT), LL, RL);
+        Hi = SDValue(Lo.getNode(), 1);
+        return;
+      }
+      if (HasMULHS) {
+        // We can emit a mulhs+mul.
+        Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+        Hi = DAG.getNode(ISD::MULHS, dl, NVT, LL, RL);
+        return;
+      }
+    }
+    if (HasUMUL_LOHI) {
+      // Lo,Hi = umul LHS, RHS.
+      SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,
+                                       DAG.getVTList(NVT, NVT), LL, RL);
+      Lo = UMulLOHI;
+      Hi = UMulLOHI.getValue(1);
+      RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+      LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+      Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+      Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+      return;
+    }
+    if (HasMULHU) {
+      Lo = DAG.getNode(ISD::MUL, dl, NVT, LL, RL);
+      Hi = DAG.getNode(ISD::MULHU, dl, NVT, LL, RL);
+      RH = DAG.getNode(ISD::MUL, dl, NVT, LL, RH);
+      LH = DAG.getNode(ISD::MUL, dl, NVT, LH, RL);
+      Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, RH);
+      Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, LH);
+      return;
+    }
+  }
+
+  // If nothing else, we can make a libcall.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i16)
+    LC = RTLIB::MUL_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::MUL_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::MUL_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::MUL_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!");
+
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
+                                             SDValue &Lo, SDValue &Hi) {
+  SDValue LHS = Node->getOperand(0);
+  SDValue RHS = Node->getOperand(1);
+  DebugLoc dl = Node->getDebugLoc();
+
+  // Expand the result by simply replacing it with the equivalent
+  // non-overflow-checking operation.
+  SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+                            ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+                            LHS, RHS);
+  SplitInteger(Sum, Lo, Hi);
+
+  // Compute the overflow.
+  //
+  //   LHSSign -> LHS >= 0
+  //   RHSSign -> RHS >= 0
+  //   SumSign -> Sum >= 0
+  //
+  //   Add:
+  //   Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+  //   Sub:
+  //   Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+  //
+  EVT OType = Node->getValueType(1);
+  SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+
+  SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+  SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+  SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+                                    Node->getOpcode() == ISD::SADDO ?
+                                    ISD::SETEQ : ISD::SETNE);
+
+  SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+  SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+  SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+
+  // Use the calculated overflow everywhere.
+  ReplaceValueWith(SDValue(Node, 1), Cmp);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i16)
+    LC = RTLIB::SDIV_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::SDIV_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::SDIV_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::SDIV_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
+                                          SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  // If we can emit an efficient shift operation, do so now.  Check to see if
+  // the RHS is a constant.
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+    return ExpandShiftByConstant(N, CN->getZExtValue(), Lo, Hi);
+
+  // If we can determine that the high bit of the shift is zero or one, even if
+  // the low bits are variable, emit this shift in an optimized form.
+  if (ExpandShiftWithKnownAmountBit(N, Lo, Hi))
+    return;
+
+  // If this target supports shift_PARTS, use it.  First, map to the _PARTS opc.
+  unsigned PartsOpc;
+  if (N->getOpcode() == ISD::SHL) {
+    PartsOpc = ISD::SHL_PARTS;
+  } else if (N->getOpcode() == ISD::SRL) {
+    PartsOpc = ISD::SRL_PARTS;
+  } else {
+    assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+    PartsOpc = ISD::SRA_PARTS;
+  }
+
+  // Next check to see if the target supports this SHL_PARTS operation or if it
+  // will custom expand it.
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
+  if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+      Action == TargetLowering::Custom) {
+    // Expand the subcomponents.
+    SDValue LHSL, LHSH;
+    GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
+
+    SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) };
+    EVT VT = LHSL.getValueType();
+    Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3);
+    Hi = Lo.getValue(1);
+    return;
+  }
+
+  // Otherwise, emit a libcall.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  bool isSigned;
+  if (N->getOpcode() == ISD::SHL) {
+    isSigned = false; /*sign irrelevant*/
+    if (VT == MVT::i16)
+      LC = RTLIB::SHL_I16;
+    else if (VT == MVT::i32)
+      LC = RTLIB::SHL_I32;
+    else if (VT == MVT::i64)
+      LC = RTLIB::SHL_I64;
+    else if (VT == MVT::i128)
+      LC = RTLIB::SHL_I128;
+  } else if (N->getOpcode() == ISD::SRL) {
+    isSigned = false;
+    if (VT == MVT::i16)
+      LC = RTLIB::SRL_I16;
+    else if (VT == MVT::i32)
+      LC = RTLIB::SRL_I32;
+    else if (VT == MVT::i64)
+      LC = RTLIB::SRL_I64;
+    else if (VT == MVT::i128)
+      LC = RTLIB::SRL_I128;
+  } else {
+    assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
+    isSigned = true;
+    if (VT == MVT::i16)
+      LC = RTLIB::SRA_I16;
+    else if (VT == MVT::i32)
+      LC = RTLIB::SRA_I32;
+    else if (VT == MVT::i64)
+      LC = RTLIB::SRA_I64;
+    else if (VT == MVT::i128)
+      LC = RTLIB::SRA_I128;
+  }
+
+  if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
+    SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+    SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi);
+    return;
+  }
+
+  if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi))
+    llvm_unreachable("Unsupported shift!");
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
+                                                SDValue &Lo, SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Op = N->getOperand(0);
+  if (Op.getValueType().bitsLE(NVT)) {
+    // The low part is sign extension of the input (degenerates to a copy).
+    Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0));
+    // The high part is obtained by SRA'ing all but one of the bits of low part.
+    unsigned LoSize = NVT.getSizeInBits();
+    Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+                     DAG.getConstant(LoSize-1, TLI.getPointerTy()));
+  } else {
+    // For example, extension of an i48 to an i64.  The operand type necessarily
+    // promotes to the result type, so will end up being expanded too.
+    assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+           "Only know how to promote this result!");
+    SDValue Res = GetPromotedInteger(Op);
+    assert(Res.getValueType() == N->getValueType(0) &&
+           "Operand over promoted?");
+    // Split the promoted operand.  This will simplify when it is expanded.
+    SplitInteger(Res, Lo, Hi);
+    unsigned ExcessBits =
+      Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+    Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        ExcessBits)));
+  }
+}
+
+void DAGTypeLegalizer::
+ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+
+  if (EVT.bitsLE(Lo.getValueType())) {
+    // sext_inreg the low part if needed.
+    Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo,
+                     N->getOperand(1));
+
+    // The high part gets the sign extension from the lo-part.  This handles
+    // things like sextinreg V:i64 from i8.
+    Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
+                     DAG.getConstant(Hi.getValueType().getSizeInBits()-1,
+                                     TLI.getPointerTy()));
+  } else {
+    // For example, extension of an i48 to an i64.  Leave the low part alone,
+    // sext_inreg the high part.
+    unsigned ExcessBits =
+      EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
+    Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
+                     DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
+                                                        ExcessBits)));
+  }
+}
+
+void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i16)
+    LC = RTLIB::SREM_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::SREM_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::SREM_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::SREM_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+  Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0));
+  Hi = DAG.getNode(ISD::SRL, dl,
+                   N->getOperand(0).getValueType(), N->getOperand(0),
+                   DAG.getConstant(NVT.getSizeInBits(), TLI.getPointerTy()));
+  Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+
+  // Expand the result by simply replacing it with the equivalent
+  // non-overflow-checking operation.
+  SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ?
+                            ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+                            LHS, RHS);
+  SplitInteger(Sum, Lo, Hi);
+
+  // Calculate the overflow: addition overflows iff a + b < a, and subtraction
+  // overflows iff a - b > a.
+  SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS,
+                             N->getOpcode () == ISD::UADDO ?
+                             ISD::SETULT : ISD::SETUGT);
+
+  // Use the calculated overflow everywhere.
+  ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i16)
+    LC = RTLIB::UDIV_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::UDIV_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::UDIV_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::UDIV_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
+
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
+                                         SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i16)
+    LC = RTLIB::UREM_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::UREM_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::UREM_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::UREM_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
+
+  SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+  SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
+                                                SDValue &Lo, SDValue &Hi) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Op = N->getOperand(0);
+  if (Op.getValueType().bitsLE(NVT)) {
+    // The low part is zero extension of the input (degenerates to a copy).
+    Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0));
+    Hi = DAG.getConstant(0, NVT);   // The high part is just a zero.
+  } else {
+    // For example, extension of an i48 to an i64.  The operand type necessarily
+    // promotes to the result type, so will end up being expanded too.
+    assert(getTypeAction(Op.getValueType()) == PromoteInteger &&
+           "Only know how to promote this result!");
+    SDValue Res = GetPromotedInteger(Op);
+    assert(Res.getValueType() == N->getValueType(0) &&
+           "Operand over promoted?");
+    // Split the promoted operand.  This will simplify when it is expanded.
+    SplitInteger(Res, Lo, Hi);
+    unsigned ExcessBits =
+      Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+    Hi = DAG.getZeroExtendInReg(Hi, dl,
+                                EVT::getIntegerVT(*DAG.getContext(),
+                                                  ExcessBits));
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Integer Operand Expansion
+//===----------------------------------------------------------------------===//
+
+/// ExpandIntegerOperand - This method is called when the specified operand of
+/// the specified node is found to need expansion.  At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need promotion or expansion as well as the specified one.
+bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+    return false;
+
+  switch (N->getOpcode()) {
+  default:
+  #ifndef NDEBUG
+    dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
+    N->dump(&DAG); dbgs() << "\n";
+  #endif
+    llvm_unreachable("Do not know how to expand this operator's operand!");
+
+  case ISD::BITCAST:           Res = ExpandOp_BITCAST(N); break;
+  case ISD::BR_CC:             Res = ExpandIntOp_BR_CC(N); break;
+  case ISD::BUILD_VECTOR:      Res = ExpandOp_BUILD_VECTOR(N); break;
+  case ISD::EXTRACT_ELEMENT:   Res = ExpandOp_EXTRACT_ELEMENT(N); break;
+  case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break;
+  case ISD::SCALAR_TO_VECTOR:  Res = ExpandOp_SCALAR_TO_VECTOR(N); break;
+  case ISD::SELECT_CC:         Res = ExpandIntOp_SELECT_CC(N); break;
+  case ISD::SETCC:             Res = ExpandIntOp_SETCC(N); break;
+  case ISD::SINT_TO_FP:        Res = ExpandIntOp_SINT_TO_FP(N); break;
+  case ISD::STORE:   Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break;
+  case ISD::TRUNCATE:          Res = ExpandIntOp_TRUNCATE(N); break;
+  case ISD::UINT_TO_FP:        Res = ExpandIntOp_UINT_TO_FP(N); break;
+
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::ROTL:
+  case ISD::ROTR:              Res = ExpandIntOp_Shift(N); break;
+  case ISD::RETURNADDR:
+  case ISD::FRAMEADDR:         Res = ExpandIntOp_RETURNADDR(N); break;
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+/// IntegerExpandSetCCOperands - Expand the operands of a comparison.  This code
+/// is shared among BR_CC, SELECT_CC, and SETCC handlers.
+void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
+                                                  SDValue &NewRHS,
+                                                  ISD::CondCode &CCCode,
+                                                  DebugLoc dl) {
+  SDValue LHSLo, LHSHi, RHSLo, RHSHi;
+  GetExpandedInteger(NewLHS, LHSLo, LHSHi);
+  GetExpandedInteger(NewRHS, RHSLo, RHSHi);
+
+  if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) {
+    if (RHSLo == RHSHi) {
+      if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) {
+        if (RHSCST->isAllOnesValue()) {
+          // Equality comparison to -1.
+          NewLHS = DAG.getNode(ISD::AND, dl,
+                               LHSLo.getValueType(), LHSLo, LHSHi);
+          NewRHS = RHSLo;
+          return;
+        }
+      }
+    }
+
+    NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo);
+    NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi);
+    NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS);
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    return;
+  }
+
+  // If this is a comparison of the sign bit, just look at the top part.
+  // X > -1,  x < 0
+  if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS))
+    if ((CCCode == ISD::SETLT && CST->isNullValue()) ||     // X < 0
+        (CCCode == ISD::SETGT && CST->isAllOnesValue())) {  // X > -1
+      NewLHS = LHSHi;
+      NewRHS = RHSHi;
+      return;
+    }
+
+  // FIXME: This generated code sucks.
+  ISD::CondCode LowCC;
+  switch (CCCode) {
+  default: llvm_unreachable("Unknown integer setcc!");
+  case ISD::SETLT:
+  case ISD::SETULT: LowCC = ISD::SETULT; break;
+  case ISD::SETGT:
+  case ISD::SETUGT: LowCC = ISD::SETUGT; break;
+  case ISD::SETLE:
+  case ISD::SETULE: LowCC = ISD::SETULE; break;
+  case ISD::SETGE:
+  case ISD::SETUGE: LowCC = ISD::SETUGE; break;
+  }
+
+  // Tmp1 = lo(op1) < lo(op2)   // Always unsigned comparison
+  // Tmp2 = hi(op1) < hi(op2)   // Signedness depends on operands
+  // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;
+
+  // NOTE: on targets without efficient SELECT of bools, we can always use
+  // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
+  TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, true, NULL);
+  SDValue Tmp1, Tmp2;
+  Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()),
+                           LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
+  if (!Tmp1.getNode())
+    Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()),
+                        LHSLo, RHSLo, LowCC);
+  Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+                           LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
+  if (!Tmp2.getNode())
+    Tmp2 = DAG.getNode(ISD::SETCC, dl,
+                       TLI.getSetCCResultType(LHSHi.getValueType()),
+                       LHSHi, RHSHi, DAG.getCondCode(CCCode));
+
+  ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode());
+  ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode());
+  if ((Tmp1C && Tmp1C->isNullValue()) ||
+      (Tmp2C && Tmp2C->isNullValue() &&
+       (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+        CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||
+      (Tmp2C && Tmp2C->getAPIntValue() == 1 &&
+       (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||
+        CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {
+    // low part is known false, returns high part.
+    // For LE / GE, if high part is known false, ignore the low part.
+    // For LT / GT, if high part is known true, ignore the low part.
+    NewLHS = Tmp2;
+    NewRHS = SDValue();
+    return;
+  }
+
+  NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()),
+                             LHSHi, RHSHi, ISD::SETEQ, false,
+                             DagCombineInfo, dl);
+  if (!NewLHS.getNode())
+    NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()),
+                          LHSHi, RHSHi, ISD::SETEQ);
+  NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(),
+                       NewLHS, Tmp1, Tmp2);
+  NewRHS = SDValue();
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get();
+  IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+                                DAG.getCondCode(CCCode), NewLHS, NewRHS,
+                                N->getOperand(4)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
+  IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, we need to compare the result
+  // against zero to select between true and false values.
+  if (NewRHS.getNode() == 0) {
+    NewRHS = DAG.getConstant(0, NewLHS.getValueType());
+    CCCode = ISD::SETNE;
+  }
+
+  // Update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+                                N->getOperand(2), N->getOperand(3),
+                                DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
+  SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
+  ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc());
+
+  // If ExpandSetCCOperands returned a scalar, use it.
+  if (NewRHS.getNode() == 0) {
+    assert(NewLHS.getValueType() == N->getValueType(0) &&
+           "Unexpected setcc expansion!");
+    return NewLHS;
+  }
+
+  // Otherwise, update N to have the operands specified.
+  return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+                                DAG.getCondCode(CCCode)), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
+  // The value being shifted is legal, but the shift amount is too big.
+  // It follows that either the result of the shift is undefined, or the
+  // upper half of the shift amount is zero.  Just use the lower half.
+  SDValue Lo, Hi;
+  GetExpandedInteger(N->getOperand(1), Lo, Hi);
+  return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
+  // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant.  This
+  // surely makes pretty nice problems on 8/16 bit targets. Just truncate this
+  // constant to valid type.
+  SDValue Lo, Hi;
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  return SDValue(DAG.UpdateNodeOperands(N, Lo), 0);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
+  SDValue Op = N->getOperand(0);
+  EVT DstVT = N->getValueType(0);
+  RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+         "Don't know how to expand this SINT_TO_FP!");
+  return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc());
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
+  if (ISD::isNormalStore(N))
+    return ExpandOp_NormalStore(N, OpNo);
+
+  assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
+  assert(OpNo == 1 && "Can only expand the stored value so far");
+
+  EVT VT = N->getOperand(1).getValueType();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  SDValue Ch  = N->getChain();
+  SDValue Ptr = N->getBasePtr();
+  unsigned Alignment = N->getAlignment();
+  bool isVolatile = N->isVolatile();
+  bool isNonTemporal = N->isNonTemporal();
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Lo, Hi;
+
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+  if (N->getMemoryVT().bitsLE(NVT)) {
+    GetExpandedInteger(N->getValue(), Lo, Hi);
+    return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
+                             N->getMemoryVT(), isVolatile, isNonTemporal,
+                             Alignment);
+  }
+
+  if (TLI.isLittleEndian()) {
+    // Little-endian - low bits are at low addresses.
+    GetExpandedInteger(N->getValue(), Lo, Hi);
+
+    Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
+                      isVolatile, isNonTemporal, Alignment);
+
+    unsigned ExcessBits =
+      N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits();
+    EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits);
+
+    // Increment the pointer to the other half.
+    unsigned IncrementSize = NVT.getSizeInBits()/8;
+    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                      DAG.getIntPtrConstant(IncrementSize));
+    Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
+                           N->getPointerInfo().getWithOffset(IncrementSize),
+                           NEVT, isVolatile, isNonTemporal,
+                           MinAlign(Alignment, IncrementSize));
+    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+  }
+
+  // Big-endian - high bits are at low addresses.  Favor aligned stores at
+  // the cost of some bit-fiddling.
+  GetExpandedInteger(N->getValue(), Lo, Hi);
+
+  EVT ExtVT = N->getMemoryVT();
+  unsigned EBytes = ExtVT.getStoreSize();
+  unsigned IncrementSize = NVT.getSizeInBits()/8;
+  unsigned ExcessBits = (EBytes - IncrementSize)*8;
+  EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+                               ExtVT.getSizeInBits() - ExcessBits);
+
+  if (ExcessBits < NVT.getSizeInBits()) {
+    // Transfer high bits from the top of Lo to the bottom of Hi.
+    Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+                     DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+                                     TLI.getPointerTy()));
+    Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+                     DAG.getNode(ISD::SRL, dl, NVT, Lo,
+                                 DAG.getConstant(ExcessBits,
+                                                 TLI.getPointerTy())));
+  }
+
+  // Store both the high bits and maybe some of the low bits.
+  Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(),
+                         HiVT, isVolatile, isNonTemporal, Alignment);
+
+  // Increment the pointer to the other half.
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getIntPtrConstant(IncrementSize));
+  // Store the lowest ExcessBits bits in the second half.
+  Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
+                         N->getPointerInfo().getWithOffset(IncrementSize),
+                         EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+                         isVolatile, isNonTemporal,
+                         MinAlign(Alignment, IncrementSize));
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
+  SDValue InL, InH;
+  GetExpandedInteger(N->getOperand(0), InL, InH);
+  // Just truncate the low part of the source.
+  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL);
+}
+
+static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unknown FP format");
+  case MVT::f32:     return &APFloat::IEEEsingle;
+  case MVT::f64:     return &APFloat::IEEEdouble;
+  case MVT::f80:     return &APFloat::x87DoubleExtended;
+  case MVT::f128:    return &APFloat::IEEEquad;
+  case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
+  }
+}
+
+SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
+  SDValue Op = N->getOperand(0);
+  EVT SrcVT = Op.getValueType();
+  EVT DstVT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  // The following optimization is valid only if every value in SrcVT (when
+  // treated as signed) is representable in DstVT.  Check that the mantissa
+  // size of DstVT is >= than the number of bits in SrcVT -1.
+  const fltSemantics *sem = EVTToAPFloatSemantics(DstVT);
+  if (APFloat::semanticsPrecision(*sem) >= SrcVT.getSizeInBits()-1 &&
+      TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
+    // Do a signed conversion then adjust the result.
+    SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
+    SignedConv = TLI.LowerOperation(SignedConv, DAG);
+
+    // The result of the signed conversion needs adjusting if the 'sign bit' of
+    // the incoming integer was set.  To handle this, we dynamically test to see
+    // if it is set, and, if so, add a fudge factor.
+
+    const uint64_t F32TwoE32  = 0x4F800000ULL;
+    const uint64_t F32TwoE64  = 0x5F800000ULL;
+    const uint64_t F32TwoE128 = 0x7F800000ULL;
+
+    APInt FF(32, 0);
+    if (SrcVT == MVT::i32)
+      FF = APInt(32, F32TwoE32);
+    else if (SrcVT == MVT::i64)
+      FF = APInt(32, F32TwoE64);
+    else if (SrcVT == MVT::i128)
+      FF = APInt(32, F32TwoE128);
+    else
+      assert(false && "Unsupported UINT_TO_FP!");
+
+    // Check whether the sign bit is set.
+    SDValue Lo, Hi;
+    GetExpandedInteger(Op, Lo, Hi);
+    SDValue SignSet = DAG.getSetCC(dl,
+                                   TLI.getSetCCResultType(Hi.getValueType()),
+                                   Hi, DAG.getConstant(0, Hi.getValueType()),
+                                   ISD::SETLT);
+
+    // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+    SDValue FudgePtr = DAG.getConstantPool(
+                               ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+                                           TLI.getPointerTy());
+
+    // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+    SDValue Zero = DAG.getIntPtrConstant(0);
+    SDValue Four = DAG.getIntPtrConstant(4);
+    if (TLI.isBigEndian()) std::swap(Zero, Four);
+    SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+                                 Zero, Four);
+    unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment();
+    FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset);
+    Alignment = std::min(Alignment, 4u);
+
+    // Load the value out, extending it from f32 to the destination float type.
+    // FIXME: Avoid the extend by constructing the right constant pool?
+    SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+                                   FudgePtr,
+                                   MachinePointerInfo::getConstantPool(),
+                                   MVT::f32,
+                                   false, false, Alignment);
+    return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
+  }
+
+  // Otherwise, use a libcall.
+  RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
+  assert(LC != RTLIB::UNKNOWN_LIBCALL &&
+         "Don't know how to expand this UINT_TO_FP!");
+  return MakeLibCall(LC, DstVT, &Op, 1, true, dl);
+}
diff --git a/final/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/final/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
new file mode 100644
index 00000000000..cedda7e7075
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -0,0 +1,1153 @@
+//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeTypes method.  It transforms
+// an arbitrary well-formed SelectionDAG to only consist of legal types.  This
+// is common code shared among the LegalizeTypes*.cpp files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden);
+
+/// PerformExpensiveChecks - Do extensive, expensive, sanity checking.
+void DAGTypeLegalizer::PerformExpensiveChecks() {
+  // If a node is not processed, then none of its values should be mapped by any
+  // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+
+  // If a node is processed, then each value with an illegal type must be mapped
+  // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues.
+  // Values with a legal type may be mapped by ReplacedValues, but not by any of
+  // the other maps.
+
+  // Note that these invariants may not hold momentarily when processing a node:
+  // the node being processed may be put in a map before being marked Processed.
+
+  // Note that it is possible to have nodes marked NewNode in the DAG.  This can
+  // occur in two ways.  Firstly, a node may be created during legalization but
+  // never passed to the legalization core.  This is usually due to the implicit
+  // folding that occurs when using the DAG.getNode operators.  Secondly, a new
+  // node may be passed to the legalization core, but when analyzed may morph
+  // into a different node, leaving the original node as a NewNode in the DAG.
+  // A node may morph if one of its operands changes during analysis.  Whether
+  // it actually morphs or not depends on whether, after updating its operands,
+  // it is equivalent to an existing node: if so, it morphs into that existing
+  // node (CSE).  An operand can change during analysis if the operand is a new
+  // node that morphs, or it is a processed value that was mapped to some other
+  // value (as recorded in ReplacedValues) in which case the operand is turned
+  // into that other value.  If a node morphs then the node it morphed into will
+  // be used instead of it for legalization, however the original node continues
+  // to live on in the DAG.
+  // The conclusion is that though there may be nodes marked NewNode in the DAG,
+  // all uses of such nodes are also marked NewNode: the result is a fungus of
+  // NewNodes growing on top of the useful nodes, and perhaps using them, but
+  // not used by them.
+
+  // If a value is mapped by ReplacedValues, then it must have no uses, except
+  // by nodes marked NewNode (see above).
+
+  // The final node obtained by mapping by ReplacedValues is not marked NewNode.
+  // Note that ReplacedValues should be applied iteratively.
+
+  // Note that the ReplacedValues map may also map deleted nodes (by iterating
+  // over the DAG we never dereference deleted nodes).  This means that it may
+  // also map nodes marked NewNode if the deallocated memory was reallocated as
+  // another node, and that new node was not seen by the LegalizeTypes machinery
+  // (for example because it was created but not used).  In general, we cannot
+  // distinguish between new nodes and deleted nodes.
+  SmallVector<SDNode*, 16> NewNodes;
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I) {
+    // Remember nodes marked NewNode - they are subject to extra checking below.
+    if (I->getNodeId() == NewNode)
+      NewNodes.push_back(I);
+
+    for (unsigned i = 0, e = I->getNumValues(); i != e; ++i) {
+      SDValue Res(I, i);
+      bool Failed = false;
+
+      unsigned Mapped = 0;
+      if (ReplacedValues.find(Res) != ReplacedValues.end()) {
+        Mapped |= 1;
+        // Check that remapped values are only used by nodes marked NewNode.
+        for (SDNode::use_iterator UI = I->use_begin(), UE = I->use_end();
+             UI != UE; ++UI)
+          if (UI.getUse().getResNo() == i)
+            assert(UI->getNodeId() == NewNode &&
+                   "Remapped value has non-trivial use!");
+
+        // Check that the final result of applying ReplacedValues is not
+        // marked NewNode.
+        SDValue NewVal = ReplacedValues[Res];
+        DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal);
+        while (I != ReplacedValues.end()) {
+          NewVal = I->second;
+          I = ReplacedValues.find(NewVal);
+        }
+        assert(NewVal.getNode()->getNodeId() != NewNode &&
+               "ReplacedValues maps to a new node!");
+      }
+      if (PromotedIntegers.find(Res) != PromotedIntegers.end())
+        Mapped |= 2;
+      if (SoftenedFloats.find(Res) != SoftenedFloats.end())
+        Mapped |= 4;
+      if (ScalarizedVectors.find(Res) != ScalarizedVectors.end())
+        Mapped |= 8;
+      if (ExpandedIntegers.find(Res) != ExpandedIntegers.end())
+        Mapped |= 16;
+      if (ExpandedFloats.find(Res) != ExpandedFloats.end())
+        Mapped |= 32;
+      if (SplitVectors.find(Res) != SplitVectors.end())
+        Mapped |= 64;
+      if (WidenedVectors.find(Res) != WidenedVectors.end())
+        Mapped |= 128;
+
+      if (I->getNodeId() != Processed) {
+        // Since we allow ReplacedValues to map deleted nodes, it may map nodes
+        // marked NewNode too, since a deleted node may have been reallocated as
+        // another node that has not been seen by the LegalizeTypes machinery.
+        if ((I->getNodeId() == NewNode && Mapped > 1) ||
+            (I->getNodeId() != NewNode && Mapped != 0)) {
+          dbgs() << "Unprocessed value in a map!";
+          Failed = true;
+        }
+      } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(I)) {
+        if (Mapped > 1) {
+          dbgs() << "Value with legal type was transformed!";
+          Failed = true;
+        }
+      } else {
+        if (Mapped == 0) {
+          dbgs() << "Processed value not in any map!";
+          Failed = true;
+        } else if (Mapped & (Mapped - 1)) {
+          dbgs() << "Value in multiple maps!";
+          Failed = true;
+        }
+      }
+
+      if (Failed) {
+        if (Mapped & 1)
+          dbgs() << " ReplacedValues";
+        if (Mapped & 2)
+          dbgs() << " PromotedIntegers";
+        if (Mapped & 4)
+          dbgs() << " SoftenedFloats";
+        if (Mapped & 8)
+          dbgs() << " ScalarizedVectors";
+        if (Mapped & 16)
+          dbgs() << " ExpandedIntegers";
+        if (Mapped & 32)
+          dbgs() << " ExpandedFloats";
+        if (Mapped & 64)
+          dbgs() << " SplitVectors";
+        if (Mapped & 128)
+          dbgs() << " WidenedVectors";
+        dbgs() << "\n";
+        llvm_unreachable(0);
+      }
+    }
+  }
+
+  // Checked that NewNodes are only used by other NewNodes.
+  for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) {
+    SDNode *N = NewNodes[i];
+    for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+         UI != UE; ++UI)
+      assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!");
+  }
+}
+
+/// run - This is the main entry point for the type legalizer.  This does a
+/// top-down traversal of the dag, legalizing types as it goes.  Returns "true"
+/// if it made any changes.
+bool DAGTypeLegalizer::run() {
+  bool Changed = false;
+
+  // Create a dummy node (which is not added to allnodes), that adds a reference
+  // to the root node, preventing it from being deleted, and tracking any
+  // changes of the root.
+  HandleSDNode Dummy(DAG.getRoot());
+  Dummy.setNodeId(Unanalyzed);
+
+  // The root of the dag may dangle to deleted nodes until the type legalizer is
+  // done.  Set it to null to avoid confusion.
+  DAG.setRoot(SDValue());
+
+  // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess'
+  // (and remembering them) if they are leaves and assigning 'Unanalyzed' if
+  // non-leaves.
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I) {
+    if (I->getNumOperands() == 0) {
+      I->setNodeId(ReadyToProcess);
+      Worklist.push_back(I);
+    } else {
+      I->setNodeId(Unanalyzed);
+    }
+  }
+
+  // Now that we have a set of nodes to process, handle them all.
+  while (!Worklist.empty()) {
+#ifndef XDEBUG
+    if (EnableExpensiveChecks)
+#endif
+      PerformExpensiveChecks();
+
+    SDNode *N = Worklist.back();
+    Worklist.pop_back();
+    assert(N->getNodeId() == ReadyToProcess &&
+           "Node should be ready if on worklist!");
+
+    if (IgnoreNodeResults(N))
+      goto ScanOperands;
+
+    // Scan the values produced by the node, checking to see if any result
+    // types are illegal.
+    for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
+      EVT ResultVT = N->getValueType(i);
+      switch (getTypeAction(ResultVT)) {
+      default:
+        assert(false && "Unknown action!");
+      case Legal:
+        break;
+      // The following calls must take care of *all* of the node's results,
+      // not just the illegal result they were passed (this includes results
+      // with a legal type).  Results can be remapped using ReplaceValueWith,
+      // or their promoted/expanded/etc values registered in PromotedIntegers,
+      // ExpandedIntegers etc.
+      case PromoteInteger:
+        PromoteIntegerResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case ExpandInteger:
+        ExpandIntegerResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case SoftenFloat:
+        SoftenFloatResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case ExpandFloat:
+        ExpandFloatResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case ScalarizeVector:
+        ScalarizeVectorResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case SplitVector:
+        SplitVectorResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      case WidenVector:
+        WidenVectorResult(N, i);
+        Changed = true;
+        goto NodeDone;
+      }
+    }
+
+ScanOperands:
+    // Scan the operand list for the node, handling any nodes with operands that
+    // are illegal.
+    {
+    unsigned NumOperands = N->getNumOperands();
+    bool NeedsReanalyzing = false;
+    unsigned i;
+    for (i = 0; i != NumOperands; ++i) {
+      if (IgnoreNodeResults(N->getOperand(i).getNode()))
+        continue;
+
+      EVT OpVT = N->getOperand(i).getValueType();
+      switch (getTypeAction(OpVT)) {
+      default:
+        assert(false && "Unknown action!");
+      case Legal:
+        continue;
+      // The following calls must either replace all of the node's results
+      // using ReplaceValueWith, and return "false"; or update the node's
+      // operands in place, and return "true".
+      case PromoteInteger:
+        NeedsReanalyzing = PromoteIntegerOperand(N, i);
+        Changed = true;
+        break;
+      case ExpandInteger:
+        NeedsReanalyzing = ExpandIntegerOperand(N, i);
+        Changed = true;
+        break;
+      case SoftenFloat:
+        NeedsReanalyzing = SoftenFloatOperand(N, i);
+        Changed = true;
+        break;
+      case ExpandFloat:
+        NeedsReanalyzing = ExpandFloatOperand(N, i);
+        Changed = true;
+        break;
+      case ScalarizeVector:
+        NeedsReanalyzing = ScalarizeVectorOperand(N, i);
+        Changed = true;
+        break;
+      case SplitVector:
+        NeedsReanalyzing = SplitVectorOperand(N, i);
+        Changed = true;
+        break;
+      case WidenVector:
+        NeedsReanalyzing = WidenVectorOperand(N, i);
+        Changed = true;
+        break;
+      }
+      break;
+    }
+
+    // The sub-method updated N in place.  Check to see if any operands are new,
+    // and if so, mark them.  If the node needs revisiting, don't add all users
+    // to the worklist etc.
+    if (NeedsReanalyzing) {
+      assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+      N->setNodeId(NewNode);
+      // Recompute the NodeId and correct processed operands, adding the node to
+      // the worklist if ready.
+      SDNode *M = AnalyzeNewNode(N);
+      if (M == N)
+        // The node didn't morph - nothing special to do, it will be revisited.
+        continue;
+
+      // The node morphed - this is equivalent to legalizing by replacing every
+      // value of N with the corresponding value of M.  So do that now.
+      assert(N->getNumValues() == M->getNumValues() &&
+             "Node morphing changed the number of results!");
+      for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+        // Replacing the value takes care of remapping the new value.
+        ReplaceValueWith(SDValue(N, i), SDValue(M, i));
+      assert(N->getNodeId() == NewNode && "Unexpected node state!");
+      // The node continues to live on as part of the NewNode fungus that
+      // grows on top of the useful nodes.  Nothing more needs to be done
+      // with it - move on to the next node.
+      continue;
+    }
+
+    if (i == NumOperands) {
+      DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n");
+    }
+    }
+NodeDone:
+
+    // If we reach here, the node was processed, potentially creating new nodes.
+    // Mark it as processed and add its users to the worklist as appropriate.
+    assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?");
+    N->setNodeId(Processed);
+
+    for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+         UI != E; ++UI) {
+      SDNode *User = *UI;
+      int NodeId = User->getNodeId();
+
+      // This node has two options: it can either be a new node or its Node ID
+      // may be a count of the number of operands it has that are not ready.
+      if (NodeId > 0) {
+        User->setNodeId(NodeId-1);
+
+        // If this was the last use it was waiting on, add it to the ready list.
+        if (NodeId-1 == ReadyToProcess)
+          Worklist.push_back(User);
+        continue;
+      }
+
+      // If this is an unreachable new node, then ignore it.  If it ever becomes
+      // reachable by being used by a newly created node then it will be handled
+      // by AnalyzeNewNode.
+      if (NodeId == NewNode)
+        continue;
+
+      // Otherwise, this node is new: this is the first operand of it that
+      // became ready.  Its new NodeId is the number of operands it has minus 1
+      // (as this node is now processed).
+      assert(NodeId == Unanalyzed && "Unknown node ID!");
+      User->setNodeId(User->getNumOperands() - 1);
+
+      // If the node only has a single operand, it is now ready.
+      if (User->getNumOperands() == 1)
+        Worklist.push_back(User);
+    }
+  }
+
+#ifndef XDEBUG
+  if (EnableExpensiveChecks)
+#endif
+    PerformExpensiveChecks();
+
+  // If the root changed (e.g. it was a dead load) update the root.
+  DAG.setRoot(Dummy.getValue());
+
+  // Remove dead nodes.  This is important to do for cleanliness but also before
+  // the checking loop below.  Implicit folding by the DAG.getNode operators and
+  // node morphing can cause unreachable nodes to be around with their flags set
+  // to new.
+  DAG.RemoveDeadNodes();
+
+  // In a debug build, scan all the nodes to make sure we found them all.  This
+  // ensures that there are no cycles and that everything got processed.
+#ifndef NDEBUG
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = DAG.allnodes_end(); I != E; ++I) {
+    bool Failed = false;
+
+    // Check that all result types are legal.
+    if (!IgnoreNodeResults(I))
+      for (unsigned i = 0, NumVals = I->getNumValues(); i < NumVals; ++i)
+        if (!isTypeLegal(I->getValueType(i))) {
+          dbgs() << "Result type " << i << " illegal!\n";
+          Failed = true;
+        }
+
+    // Check that all operand types are legal.
+    for (unsigned i = 0, NumOps = I->getNumOperands(); i < NumOps; ++i)
+      if (!IgnoreNodeResults(I->getOperand(i).getNode()) &&
+          !isTypeLegal(I->getOperand(i).getValueType())) {
+        dbgs() << "Operand type " << i << " illegal!\n";
+        Failed = true;
+      }
+
+    if (I->getNodeId() != Processed) {
+       if (I->getNodeId() == NewNode)
+         dbgs() << "New node not analyzed?\n";
+       else if (I->getNodeId() == Unanalyzed)
+         dbgs() << "Unanalyzed node not noticed?\n";
+       else if (I->getNodeId() > 0)
+         dbgs() << "Operand not processed?\n";
+       else if (I->getNodeId() == ReadyToProcess)
+         dbgs() << "Not added to worklist?\n";
+       Failed = true;
+    }
+
+    if (Failed) {
+      I->dump(&DAG); dbgs() << "\n";
+      llvm_unreachable(0);
+    }
+  }
+#endif
+
+  return Changed;
+}
+
+/// AnalyzeNewNode - The specified node is the root of a subtree of potentially
+/// new nodes.  Correct any processed operands (this may change the node) and
+/// calculate the NodeId.  If the node itself changes to a processed node, it
+/// is not remapped - the caller needs to take care of this.
+/// Returns the potentially changed node.
+SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
+  // If this was an existing node that is already done, we're done.
+  if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed)
+    return N;
+
+  // Remove any stale map entries.
+  ExpungeNode(N);
+
+  // Okay, we know that this node is new.  Recursively walk all of its operands
+  // to see if they are new also.  The depth of this walk is bounded by the size
+  // of the new tree that was constructed (usually 2-3 nodes), so we don't worry
+  // about revisiting of nodes.
+  //
+  // As we walk the operands, keep track of the number of nodes that are
+  // processed.  If non-zero, this will become the new nodeid of this node.
+  // Operands may morph when they are analyzed.  If so, the node will be
+  // updated after all operands have been analyzed.  Since this is rare,
+  // the code tries to minimize overhead in the non-morphing case.
+
+  SmallVector<SDValue, 8> NewOps;
+  unsigned NumProcessed = 0;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    SDValue OrigOp = N->getOperand(i);
+    SDValue Op = OrigOp;
+
+    AnalyzeNewValue(Op); // Op may morph.
+
+    if (Op.getNode()->getNodeId() == Processed)
+      ++NumProcessed;
+
+    if (!NewOps.empty()) {
+      // Some previous operand changed.  Add this one to the list.
+      NewOps.push_back(Op);
+    } else if (Op != OrigOp) {
+      // This is the first operand to change - add all operands so far.
+      NewOps.append(N->op_begin(), N->op_begin() + i);
+      NewOps.push_back(Op);
+    }
+  }
+
+  // Some operands changed - update the node.
+  if (!NewOps.empty()) {
+    SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size());
+    if (M != N) {
+      // The node morphed into a different node.  Normally for this to happen
+      // the original node would have to be marked NewNode.  However this can
+      // in theory momentarily not be the case while ReplaceValueWith is doing
+      // its stuff.  Mark the original node NewNode to help sanity checking.
+      N->setNodeId(NewNode);
+      if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed)
+        // It morphed into a previously analyzed node - nothing more to do.
+        return M;
+
+      // It morphed into a different new node.  Do the equivalent of passing
+      // it to AnalyzeNewNode: expunge it and calculate the NodeId.  No need
+      // to remap the operands, since they are the same as the operands we
+      // remapped above.
+      N = M;
+      ExpungeNode(N);
+    }
+  }
+
+  // Calculate the NodeId.
+  N->setNodeId(N->getNumOperands() - NumProcessed);
+  if (N->getNodeId() == ReadyToProcess)
+    Worklist.push_back(N);
+
+  return N;
+}
+
+/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed.
+/// If the node changes to a processed node, then remap it.
+void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) {
+  Val.setNode(AnalyzeNewNode(Val.getNode()));
+  if (Val.getNode()->getNodeId() == Processed)
+    // We were passed a processed node, or it morphed into one - remap it.
+    RemapValue(Val);
+}
+
+/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it.
+/// This can occur when a node is deleted then reallocated as a new node -
+/// the mapping in ReplacedValues applies to the deleted node, not the new
+/// one.
+/// The only map that can have a deleted node as a source is ReplacedValues.
+/// Other maps can have deleted nodes as targets, but since their looked-up
+/// values are always immediately remapped using RemapValue, resulting in a
+/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue
+/// always performs correct mappings.  In order to keep the mapping correct,
+/// ExpungeNode should be called on any new nodes *before* adding them as
+/// either source or target to ReplacedValues (which typically means calling
+/// Expunge when a new node is first seen, since it may no longer be marked
+/// NewNode by the time it is added to ReplacedValues).
+void DAGTypeLegalizer::ExpungeNode(SDNode *N) {
+  if (N->getNodeId() != NewNode)
+    return;
+
+  // If N is not remapped by ReplacedValues then there is nothing to do.
+  unsigned i, e;
+  for (i = 0, e = N->getNumValues(); i != e; ++i)
+    if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end())
+      break;
+
+  if (i == e)
+    return;
+
+  // Remove N from all maps - this is expensive but rare.
+
+  for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(),
+       E = PromotedIntegers.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second);
+  }
+
+  for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(),
+       E = SoftenedFloats.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second);
+  }
+
+  for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(),
+       E = ScalarizedVectors.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second);
+  }
+
+  for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(),
+       E = WidenedVectors.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second);
+  }
+
+  for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+       I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){
+    assert(I->first.getNode() != N);
+    RemapValue(I->second.first);
+    RemapValue(I->second.second);
+  }
+
+  for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+       I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second.first);
+    RemapValue(I->second.second);
+  }
+
+  for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator
+       I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) {
+    assert(I->first.getNode() != N);
+    RemapValue(I->second.first);
+    RemapValue(I->second.second);
+  }
+
+  for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(),
+       E = ReplacedValues.end(); I != E; ++I)
+    RemapValue(I->second);
+
+  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
+    ReplacedValues.erase(SDValue(N, i));
+}
+
+/// RemapValue - If the specified value was already legalized to another value,
+/// replace it by that value.
+void DAGTypeLegalizer::RemapValue(SDValue &N) {
+  DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N);
+  if (I != ReplacedValues.end()) {
+    // Use path compression to speed up future lookups if values get multiply
+    // replaced with other values.
+    RemapValue(I->second);
+    N = I->second;
+    assert(N.getNode()->getNodeId() != NewNode && "Mapped to new node!");
+  }
+}
+
+namespace {
+  /// NodeUpdateListener - This class is a DAGUpdateListener that listens for
+  /// updates to nodes and recomputes their ready state.
+  class NodeUpdateListener : public SelectionDAG::DAGUpdateListener {
+    DAGTypeLegalizer &DTL;
+    SmallSetVector<SDNode*, 16> &NodesToAnalyze;
+  public:
+    explicit NodeUpdateListener(DAGTypeLegalizer &dtl,
+                                SmallSetVector<SDNode*, 16> &nta)
+      : DTL(dtl), NodesToAnalyze(nta) {}
+
+    virtual void NodeDeleted(SDNode *N, SDNode *E) {
+      assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+             N->getNodeId() != DAGTypeLegalizer::Processed &&
+             "Invalid node ID for RAUW deletion!");
+      // It is possible, though rare, for the deleted node N to occur as a
+      // target in a map, so note the replacement N -> E in ReplacedValues.
+      assert(E && "Node not replaced?");
+      DTL.NoteDeletion(N, E);
+
+      // In theory the deleted node could also have been scheduled for analysis.
+      // So remove it from the set of nodes which will be analyzed.
+      NodesToAnalyze.remove(N);
+
+      // In general nothing needs to be done for E, since it didn't change but
+      // only gained new uses.  However N -> E was just added to ReplacedValues,
+      // and the result of a ReplacedValues mapping is not allowed to be marked
+      // NewNode.  So if E is marked NewNode, then it needs to be analyzed.
+      if (E->getNodeId() == DAGTypeLegalizer::NewNode)
+        NodesToAnalyze.insert(E);
+    }
+
+    virtual void NodeUpdated(SDNode *N) {
+      // Node updates can mean pretty much anything.  It is possible that an
+      // operand was set to something already processed (f.e.) in which case
+      // this node could become ready.  Recompute its flags.
+      assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess &&
+             N->getNodeId() != DAGTypeLegalizer::Processed &&
+             "Invalid node ID for RAUW deletion!");
+      N->setNodeId(DAGTypeLegalizer::NewNode);
+      NodesToAnalyze.insert(N);
+    }
+  };
+}
+
+
+/// ReplaceValueWith - The specified value was legalized to the specified other
+/// value.  Update the DAG and NodeIds replacing any uses of From to use To
+/// instead.
+void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
+  assert(From.getNode() != To.getNode() && "Potential legalization loop!");
+
+  // If expansion produced new nodes, make sure they are properly marked.
+  ExpungeNode(From.getNode());
+  AnalyzeNewValue(To); // Expunges To.
+
+  // Anything that used the old node should now use the new one.  Note that this
+  // can potentially cause recursive merging.
+  SmallSetVector<SDNode*, 16> NodesToAnalyze;
+  NodeUpdateListener NUL(*this, NodesToAnalyze);
+  do {
+    DAG.ReplaceAllUsesOfValueWith(From, To, &NUL);
+
+    // The old node may still be present in a map like ExpandedIntegers or
+    // PromotedIntegers.  Inform maps about the replacement.
+    ReplacedValues[From] = To;
+
+    // Process the list of nodes that need to be reanalyzed.
+    while (!NodesToAnalyze.empty()) {
+      SDNode *N = NodesToAnalyze.back();
+      NodesToAnalyze.pop_back();
+      if (N->getNodeId() != DAGTypeLegalizer::NewNode)
+        // The node was analyzed while reanalyzing an earlier node - it is safe
+        // to skip.  Note that this is not a morphing node - otherwise it would
+        // still be marked NewNode.
+        continue;
+
+      // Analyze the node's operands and recalculate the node ID.
+      SDNode *M = AnalyzeNewNode(N);
+      if (M != N) {
+        // The node morphed into a different node.  Make everyone use the new
+        // node instead.
+        assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");
+        assert(N->getNumValues() == M->getNumValues() &&
+               "Node morphing changed the number of results!");
+        for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+          SDValue OldVal(N, i);
+          SDValue NewVal(M, i);
+          if (M->getNodeId() == Processed)
+            RemapValue(NewVal);
+          DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+          // OldVal may be a target of the ReplacedValues map which was marked
+          // NewNode to force reanalysis because it was updated.  Ensure that
+          // anything that ReplacedValues mapped to OldVal will now be mapped
+          // all the way to NewVal.
+          ReplacedValues[OldVal] = NewVal;
+        }
+        // The original node continues to exist in the DAG, marked NewNode.
+      }
+    }
+    // When recursively update nodes with new nodes, it is possible to have
+    // new uses of From due to CSE. If this happens, replace the new uses of
+    // From with To.
+  } while (!From.use_empty());
+}
+
+void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for promoted integer");
+  AnalyzeNewValue(Result);
+
+  SDValue &OpEntry = PromotedIntegers[Op];
+  assert(OpEntry.getNode() == 0 && "Node is already promoted!");
+  OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for softened float");
+  AnalyzeNewValue(Result);
+
+  SDValue &OpEntry = SoftenedFloats[Op];
+  assert(OpEntry.getNode() == 0 && "Node is already converted to integer!");
+  OpEntry = Result;
+}
+
+void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() == Op.getValueType().getVectorElementType() &&
+         "Invalid type for scalarized vector");
+  AnalyzeNewValue(Result);
+
+  SDValue &OpEntry = ScalarizedVectors[Op];
+  assert(OpEntry.getNode() == 0 && "Node is already scalarized!");
+  OpEntry = Result;
+}
+
+void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo,
+                                          SDValue &Hi) {
+  std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+  RemapValue(Entry.first);
+  RemapValue(Entry.second);
+  assert(Entry.first.getNode() && "Operand isn't expanded");
+  Lo = Entry.first;
+  Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
+                                          SDValue Hi) {
+  assert(Lo.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         Hi.getValueType() == Lo.getValueType() &&
+         "Invalid type for expanded integer");
+  // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+  AnalyzeNewValue(Lo);
+  AnalyzeNewValue(Hi);
+
+  // Remember that this is the result of the node.
+  std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
+  assert(Entry.first.getNode() == 0 && "Node already expanded");
+  Entry.first = Lo;
+  Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo,
+                                        SDValue &Hi) {
+  std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+  RemapValue(Entry.first);
+  RemapValue(Entry.second);
+  assert(Entry.first.getNode() && "Operand isn't expanded");
+  Lo = Entry.first;
+  Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo,
+                                        SDValue Hi) {
+  assert(Lo.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         Hi.getValueType() == Lo.getValueType() &&
+         "Invalid type for expanded float");
+  // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+  AnalyzeNewValue(Lo);
+  AnalyzeNewValue(Hi);
+
+  // Remember that this is the result of the node.
+  std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op];
+  assert(Entry.first.getNode() == 0 && "Node already expanded");
+  Entry.first = Lo;
+  Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo,
+                                      SDValue &Hi) {
+  std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+  RemapValue(Entry.first);
+  RemapValue(Entry.second);
+  assert(Entry.first.getNode() && "Operand isn't split");
+  Lo = Entry.first;
+  Hi = Entry.second;
+}
+
+void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo,
+                                      SDValue Hi) {
+  assert(Lo.getValueType().getVectorElementType() ==
+         Op.getValueType().getVectorElementType() &&
+         2*Lo.getValueType().getVectorNumElements() ==
+         Op.getValueType().getVectorNumElements() &&
+         Hi.getValueType() == Lo.getValueType() &&
+         "Invalid type for split vector");
+  // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant.
+  AnalyzeNewValue(Lo);
+  AnalyzeNewValue(Hi);
+
+  // Remember that this is the result of the node.
+  std::pair<SDValue, SDValue> &Entry = SplitVectors[Op];
+  assert(Entry.first.getNode() == 0 && "Node already split");
+  Entry.first = Lo;
+  Entry.second = Hi;
+}
+
+void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
+  assert(Result.getValueType() ==
+         TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
+         "Invalid type for widened vector");
+  AnalyzeNewValue(Result);
+
+  SDValue &OpEntry = WidenedVectors[Op];
+  assert(OpEntry.getNode() == 0 && "Node already widened!");
+  OpEntry = Result;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Utilities.
+//===----------------------------------------------------------------------===//
+
+/// BitConvertToInteger - Convert to an integer of the same size.
+SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
+  unsigned BitWidth = Op.getValueType().getSizeInBits();
+  return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
+                     EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
+}
+
+/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the
+/// same size.
+SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
+  assert(Op.getValueType().isVector() && "Only applies to vectors!");
+  unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
+  EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
+  unsigned NumElts = Op.getValueType().getVectorNumElements();
+  return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
+                     EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
+}
+
+SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
+                                               EVT DestVT) {
+  DebugLoc dl = Op.getDebugLoc();
+  // Create the stack frame object.  Make sure it is aligned for both
+  // the source and destination types.
+  SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
+  // Emit a store to the stack slot.
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
+  // Result is a load from the stack slot.
+  return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
+                     false, false, 0);
+}
+
+/// CustomLowerNode - Replace the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+/// The last parameter is FALSE if we are dealing with a node with legal
+/// result types and illegal operand. The second parameter denotes the type of
+/// illegal OperandNo in that case.
+/// The last parameter being TRUE means we are dealing with a
+/// node with illegal result types. The second parameter denotes the type of
+/// illegal ResNo in that case.
+bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) {
+  // See if the target wants to custom lower this node.
+  if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+    return false;
+
+  SmallVector<SDValue, 8> Results;
+  if (LegalizeResult)
+    TLI.ReplaceNodeResults(N, Results, DAG);
+  else
+    TLI.LowerOperationWrapper(N, Results, DAG);
+
+  if (Results.empty())
+    // The target didn't want to custom lower it after all.
+    return false;
+
+  // Make everything that once used N's values now use those in Results instead.
+  assert(Results.size() == N->getNumValues() &&
+         "Custom lowering returned the wrong number of results!");
+  for (unsigned i = 0, e = Results.size(); i != e; ++i)
+    ReplaceValueWith(SDValue(N, i), Results[i]);
+  return true;
+}
+
+
+/// CustomWidenLowerNode - Widen the node's results with custom code provided
+/// by the target and return "true", or do nothing and return "false".
+bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
+  // See if the target wants to custom lower this node.
+  if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom)
+    return false;
+
+  SmallVector<SDValue, 8> Results;
+  TLI.ReplaceNodeResults(N, Results, DAG);
+
+  if (Results.empty())
+    // The target didn't want to custom widen lower its result  after all.
+    return false;
+
+  // Update the widening map.
+  assert(Results.size() == N->getNumValues() &&
+         "Custom lowering returned the wrong number of results!");
+  for (unsigned i = 0, e = Results.size(); i != e; ++i)
+    SetWidenedVector(SDValue(N, i), Results[i]);
+  return true;
+}
+
+/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+/// which is split into two not necessarily identical pieces.
+void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) {
+  // Currently all types are split in half.
+  if (!InVT.isVector()) {
+    LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
+  } else {
+    unsigned NumElements = InVT.getVectorNumElements();
+    assert(!(NumElements & 1) && "Splitting vector, but not in half!");
+    LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(),
+                                   InVT.getVectorElementType(), NumElements/2);
+  }
+}
+
+/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+/// high parts of the given value.
+void DAGTypeLegalizer::GetPairElements(SDValue Pair,
+                                       SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = Pair.getDebugLoc();
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType());
+  Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+                   DAG.getIntPtrConstant(0));
+  Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
+                   DAG.getIntPtrConstant(1));
+}
+
+SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT,
+                                                  SDValue Index) {
+  DebugLoc dl = Index.getDebugLoc();
+  // Make sure the index type is big enough to compute in.
+  if (Index.getValueType().bitsGT(TLI.getPointerTy()))
+    Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index);
+  else
+    Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index);
+
+  // Calculate the element offset and add it to the pointer.
+  unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
+
+  Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index,
+                      DAG.getConstant(EltSize, Index.getValueType()));
+  return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr);
+}
+
+/// JoinIntegers - Build an integer with low bits Lo and high bits Hi.
+SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
+  // Arbitrarily use dlHi for result DebugLoc
+  DebugLoc dlHi = Hi.getDebugLoc();
+  DebugLoc dlLo = Lo.getDebugLoc();
+  EVT LVT = Lo.getValueType();
+  EVT HVT = Hi.getValueType();
+  EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+                              LVT.getSizeInBits() + HVT.getSizeInBits());
+
+  Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
+  Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
+  Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
+                   DAG.getConstant(LVT.getSizeInBits(), TLI.getPointerTy()));
+  return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi);
+}
+
+/// LibCallify - Convert the node into a libcall with the same prototype.
+SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
+                                     bool isSigned) {
+  unsigned NumOps = N->getNumOperands();
+  DebugLoc dl = N->getDebugLoc();
+  if (NumOps == 0) {
+    return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl);
+  } else if (NumOps == 1) {
+    SDValue Op = N->getOperand(0);
+    return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl);
+  } else if (NumOps == 2) {
+    SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+    return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl);
+  }
+  SmallVector<SDValue, 8> Ops(NumOps);
+  for (unsigned i = 0; i < NumOps; ++i)
+    Ops[i] = N->getOperand(i);
+
+  return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl);
+}
+
+/// MakeLibCall - Generate a libcall taking the given operands as arguments and
+/// returning a result of type RetVT.
+SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
+                                      const SDValue *Ops, unsigned NumOps,
+                                      bool isSigned, DebugLoc dl) {
+  TargetLowering::ArgListTy Args;
+  Args.reserve(NumOps);
+
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    Entry.Node = Ops[i];
+    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
+  std::pair<SDValue,SDValue> CallInfo =
+    TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
+                    false, 0, TLI.getLibcallCallingConv(LC), false,
+                    /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, dl);
+  return CallInfo.first;
+}
+
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
+                                         SDNode *Node,
+                                         bool isSigned) {
+  SDValue InChain = Node->getOperand(0);
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = Node->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = Node->getOperand(i);
+    Entry.Ty = ArgTy;
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy());
+
+  // Splice the libcall in wherever FindInputOutputChains tells us to.
+  const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+                    0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+                    /*isReturnValueUsed=*/true,
+                    Callee, Args, DAG, Node->getDebugLoc());
+
+  return CallInfo;
+}
+
+/// PromoteTargetBoolean - Promote the given target boolean to a target boolean
+/// of the given type.  A target boolean is an integer value, not necessarily of
+/// type i1, the bits of which conform to getBooleanContents.
+SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) {
+  DebugLoc dl = Bool.getDebugLoc();
+  ISD::NodeType ExtendCode;
+  switch (TLI.getBooleanContents()) {
+  default:
+    assert(false && "Unknown BooleanContent!");
+  case TargetLowering::UndefinedBooleanContent:
+    // Extend to VT by adding rubbish bits.
+    ExtendCode = ISD::ANY_EXTEND;
+    break;
+  case TargetLowering::ZeroOrOneBooleanContent:
+    // Extend to VT by adding zero bits.
+    ExtendCode = ISD::ZERO_EXTEND;
+    break;
+  case TargetLowering::ZeroOrNegativeOneBooleanContent: {
+    // Extend to VT by copying the sign bit.
+    ExtendCode = ISD::SIGN_EXTEND;
+    break;
+  }
+  }
+  return DAG.getNode(ExtendCode, dl, VT, Bool);
+}
+
+/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT
+/// bits in Hi.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+                                    EVT LoVT, EVT HiVT,
+                                    SDValue &Lo, SDValue &Hi) {
+  DebugLoc dl = Op.getDebugLoc();
+  assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
+         Op.getValueType().getSizeInBits() && "Invalid integer splitting!");
+  Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
+  Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
+                   DAG.getConstant(LoVT.getSizeInBits(), TLI.getPointerTy()));
+  Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
+}
+
+/// SplitInteger - Return the lower and upper halves of Op's bits in a value
+/// type half the size of Op's.
+void DAGTypeLegalizer::SplitInteger(SDValue Op,
+                                    SDValue &Lo, SDValue &Hi) {
+  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(),
+                                 Op.getValueType().getSizeInBits()/2);
+  SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Entry Point
+//===----------------------------------------------------------------------===//
+
+/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that
+/// only uses types natively supported by the target.  Returns "true" if it made
+/// any changes.
+///
+/// Note that this is an involved process that may invalidate pointers into
+/// the graph.
+bool SelectionDAG::LegalizeTypes() {
+  return DAGTypeLegalizer(*this).run();
+}
diff --git a/final/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/final/lib/CodeGen/SelectionDAG/LegalizeTypes.h
new file mode 100644
index 00000000000..ed6c06d5558
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -0,0 +1,747 @@
+//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DAGTypeLegalizer class.  This is a private interface
+// shared between the code that implements the SelectionDAG::LegalizeTypes
+// method.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAG_LEGALIZETYPES_H
+#define SELECTIONDAG_LEGALIZETYPES_H
+
+#define DEBUG_TYPE "legalize-types"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks
+/// on it until only value types the target machine can handle are left.  This
+/// involves promoting small sizes to large sizes or splitting up large values
+/// into small values.
+///
+class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
+  const TargetLowering &TLI;
+  SelectionDAG &DAG;
+public:
+  // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information
+  // about the state of the node.  The enum has all the values.
+  enum NodeIdFlags {
+    /// ReadyToProcess - All operands have been processed, so this node is ready
+    /// to be handled.
+    ReadyToProcess = 0,
+
+    /// NewNode - This is a new node, not before seen, that was created in the
+    /// process of legalizing some other node.
+    NewNode = -1,
+
+    /// Unanalyzed - This node's ID needs to be set to the number of its
+    /// unprocessed operands.
+    Unanalyzed = -2,
+
+    /// Processed - This is a node that has already been processed.
+    Processed = -3
+
+    // 1+ - This is a node which has this many unprocessed operands.
+  };
+private:
+  enum LegalizeAction {
+    Legal,           // The target natively supports this type.
+    PromoteInteger,  // Replace this integer type with a larger one.
+    ExpandInteger,   // Split this integer type into two of half the size.
+    SoftenFloat,     // Convert this float type to a same size integer type.
+    ExpandFloat,     // Split this float type into two of half the size.
+    ScalarizeVector, // Replace this one-element vector with its element type.
+    SplitVector,     // Split this vector type into two of half the size.
+    WidenVector      // This vector type should be widened into a larger vector.
+  };
+
+  /// ValueTypeActions - This is a bitvector that contains two bits for each
+  /// simple value type, where the two bits correspond to the LegalizeAction
+  /// enum from TargetLowering.  This can be queried with "getTypeAction(VT)".
+  TargetLowering::ValueTypeActionImpl ValueTypeActions;
+
+  /// getTypeAction - Return how we should legalize values of this type.
+  LegalizeAction getTypeAction(EVT VT) const {
+    switch (ValueTypeActions.getTypeAction(VT)) {
+    default:
+      assert(false && "Unknown legalize action!");
+    case TargetLowering::Legal:
+      return Legal;
+    case TargetLowering::Promote:
+      // Promote can mean
+      //   1) For integers, use a larger integer type (e.g. i8 -> i32).
+      //   2) For vectors, use a wider vector type (e.g. v3i32 -> v4i32).
+      if (!VT.isVector())
+        return PromoteInteger;
+      return WidenVector;
+    case TargetLowering::Expand:
+      // Expand can mean
+      // 1) split scalar in half, 2) convert a float to an integer,
+      // 3) scalarize a single-element vector, 4) split a vector in two.
+      if (!VT.isVector()) {
+        if (VT.isInteger())
+          return ExpandInteger;
+        if (VT.getSizeInBits() ==
+                TLI.getTypeToTransformTo(*DAG.getContext(), VT).getSizeInBits())
+          return SoftenFloat;
+        return ExpandFloat;
+      }
+
+      if (VT.getVectorNumElements() == 1)
+        return ScalarizeVector;
+      return SplitVector;
+    }
+  }
+
+  /// isTypeLegal - Return true if this type is legal on this target.
+  bool isTypeLegal(EVT VT) const {
+    return ValueTypeActions.getTypeAction(VT) == TargetLowering::Legal;
+  }
+
+  /// IgnoreNodeResults - Pretend all of this node's results are legal.
+  bool IgnoreNodeResults(SDNode *N) const {
+    return N->getOpcode() == ISD::TargetConstant;
+  }
+
+  /// PromotedIntegers - For integer nodes that are below legal width, this map
+  /// indicates what promoted value to use.
+  DenseMap<SDValue, SDValue> PromotedIntegers;
+
+  /// ExpandedIntegers - For integer nodes that need to be expanded this map
+  /// indicates which operands are the expanded version of the input.
+  DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedIntegers;
+
+  /// SoftenedFloats - For floating point nodes converted to integers of
+  /// the same size, this map indicates the converted value to use.
+  DenseMap<SDValue, SDValue> SoftenedFloats;
+
+  /// ExpandedFloats - For float nodes that need to be expanded this map
+  /// indicates which operands are the expanded version of the input.
+  DenseMap<SDValue, std::pair<SDValue, SDValue> > ExpandedFloats;
+
+  /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the
+  /// scalar value of type 'ty' to use.
+  DenseMap<SDValue, SDValue> ScalarizedVectors;
+
+  /// SplitVectors - For nodes that need to be split this map indicates
+  /// which operands are the expanded version of the input.
+  DenseMap<SDValue, std::pair<SDValue, SDValue> > SplitVectors;
+
+  /// WidenedVectors - For vector nodes that need to be widened, indicates
+  /// the widened value to use.
+  DenseMap<SDValue, SDValue> WidenedVectors;
+
+  /// ReplacedValues - For values that have been replaced with another,
+  /// indicates the replacement value to use.
+  DenseMap<SDValue, SDValue> ReplacedValues;
+
+  /// Worklist - This defines a worklist of nodes to process.  In order to be
+  /// pushed onto this worklist, all operands of a node must have already been
+  /// processed.
+  SmallVector<SDNode*, 128> Worklist;
+
+public:
+  explicit DAGTypeLegalizer(SelectionDAG &dag)
+    : TLI(dag.getTargetLoweringInfo()), DAG(dag),
+    ValueTypeActions(TLI.getValueTypeActions()) {
+    assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+           "Too many value types for ValueTypeActions to hold!");
+  }
+
+  /// run - This is the main entry point for the type legalizer.  This does a
+  /// top-down traversal of the dag, legalizing types as it goes.  Returns
+  /// "true" if it made any changes.
+  bool run();
+
+  void NoteDeletion(SDNode *Old, SDNode *New) {
+    ExpungeNode(Old);
+    ExpungeNode(New);
+    for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i)
+      ReplacedValues[SDValue(Old, i)] = SDValue(New, i);
+  }
+
+private:
+  SDNode *AnalyzeNewNode(SDNode *N);
+  void AnalyzeNewValue(SDValue &Val);
+  void ExpungeNode(SDNode *N);
+  void PerformExpensiveChecks();
+  void RemapValue(SDValue &N);
+
+  // Common routines.
+  SDValue BitConvertToInteger(SDValue Op);
+  SDValue BitConvertVectorToIntegerVector(SDValue Op);
+  SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT);
+  bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult);
+  bool CustomWidenLowerNode(SDNode *N, EVT VT);
+  SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index);
+  SDValue JoinIntegers(SDValue Lo, SDValue Hi);
+  SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned);
+  SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
+                      const SDValue *Ops, unsigned NumOps, bool isSigned,
+                      DebugLoc dl);
+	std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+									                               SDNode *Node, bool isSigned);
+	std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
+  SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
+  void ReplaceValueWith(SDValue From, SDValue To);
+  void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+  void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
+                    SDValue &Lo, SDValue &Hi);
+
+  //===--------------------------------------------------------------------===//
+  // Integer Promotion Support: LegalizeIntegerTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetPromotedInteger - Given a processed operand Op which was promoted to a
+  /// larger integer type, this returns the promoted value.  The low bits of the
+  /// promoted value corresponding to the original type are exactly equal to Op.
+  /// The extra bits contain rubbish, so the promoted value may need to be zero-
+  /// or sign-extended from the original type before it is usable (the helpers
+  /// SExtPromotedInteger and ZExtPromotedInteger can do this for you).
+  /// For example, if Op is an i16 and was promoted to an i32, then this method
+  /// returns an i32, the lower 16 bits of which coincide with Op, and the upper
+  /// 16 bits of which contain rubbish.
+  SDValue GetPromotedInteger(SDValue Op) {
+    SDValue &PromotedOp = PromotedIntegers[Op];
+    RemapValue(PromotedOp);
+    assert(PromotedOp.getNode() && "Operand wasn't promoted?");
+    return PromotedOp;
+  }
+  void SetPromotedInteger(SDValue Op, SDValue Result);
+
+  /// SExtPromotedInteger - Get a promoted operand and sign extend it to the
+  /// final size.
+  SDValue SExtPromotedInteger(SDValue Op) {
+    EVT OldVT = Op.getValueType();
+    DebugLoc dl = Op.getDebugLoc();
+    Op = GetPromotedInteger(Op);
+    return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
+                       DAG.getValueType(OldVT));
+  }
+
+  /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the
+  /// final size.
+  SDValue ZExtPromotedInteger(SDValue Op) {
+    EVT OldVT = Op.getValueType();
+    DebugLoc dl = Op.getDebugLoc();
+    Op = GetPromotedInteger(Op);
+    return DAG.getZeroExtendInReg(Op, dl, OldVT);
+  }
+
+  // Integer Result Promotion.
+  void PromoteIntegerResult(SDNode *N, unsigned ResNo);
+  SDValue PromoteIntRes_AssertSext(SDNode *N);
+  SDValue PromoteIntRes_AssertZext(SDNode *N);
+  SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
+  SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
+  SDValue PromoteIntRes_BITCAST(SDNode *N);
+  SDValue PromoteIntRes_BSWAP(SDNode *N);
+  SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
+  SDValue PromoteIntRes_Constant(SDNode *N);
+  SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N);
+  SDValue PromoteIntRes_CTLZ(SDNode *N);
+  SDValue PromoteIntRes_CTPOP(SDNode *N);
+  SDValue PromoteIntRes_CTTZ(SDNode *N);
+  SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue PromoteIntRes_FP_TO_XINT(SDNode *N);
+  SDValue PromoteIntRes_FP32_TO_FP16(SDNode *N);
+  SDValue PromoteIntRes_INT_EXTEND(SDNode *N);
+  SDValue PromoteIntRes_LOAD(LoadSDNode *N);
+  SDValue PromoteIntRes_Overflow(SDNode *N);
+  SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
+  SDValue PromoteIntRes_SDIV(SDNode *N);
+  SDValue PromoteIntRes_SELECT(SDNode *N);
+  SDValue PromoteIntRes_SELECT_CC(SDNode *N);
+  SDValue PromoteIntRes_SETCC(SDNode *N);
+  SDValue PromoteIntRes_SHL(SDNode *N);
+  SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N);
+  SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N);
+  SDValue PromoteIntRes_SRA(SDNode *N);
+  SDValue PromoteIntRes_SRL(SDNode *N);
+  SDValue PromoteIntRes_TRUNCATE(SDNode *N);
+  SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
+  SDValue PromoteIntRes_UDIV(SDNode *N);
+  SDValue PromoteIntRes_UNDEF(SDNode *N);
+  SDValue PromoteIntRes_VAARG(SDNode *N);
+  SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
+
+  // Integer Operand Promotion.
+  bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
+  SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
+  SDValue PromoteIntOp_BITCAST(SDNode *N);
+  SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
+  SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N);
+  SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N);
+  SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_MEMBARRIER(SDNode *N);
+  SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
+  SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_Shift(SDNode *N);
+  SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
+  SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
+  SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+  SDValue PromoteIntOp_TRUNCATE(SDNode *N);
+  SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
+  SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+
+  void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
+
+  //===--------------------------------------------------------------------===//
+  // Integer Expansion Support: LegalizeIntegerTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetExpandedInteger - Given a processed operand Op which was expanded into
+  /// two integers of half the size, this returns the two halves.  The low bits
+  /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi.
+  /// For example, if Op is an i64 which was expanded into two i32's, then this
+  /// method returns the two i32's, with Lo being equal to the lower 32 bits of
+  /// Op, and Hi being equal to the upper 32 bits.
+  void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
+  void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi);
+
+  // Integer Result Expansion.
+  void ExpandIntegerResult(SDNode *N, unsigned ResNo);
+  void ExpandIntRes_ANY_EXTEND        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_AssertSext        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_AssertZext        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_Constant          (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_CTLZ              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_CTPOP             (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_CTTZ              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_LOAD          (LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_SIGN_EXTEND       (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_TRUNCATE          (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_ZERO_EXTEND       (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_FP_TO_SINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_FP_TO_UINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  void ExpandIntRes_Logical           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_ADDSUB            (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_ADDSUBC           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_ADDSUBE           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_BSWAP             (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_MUL               (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_SDIV              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_SREM              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_UDIV              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_UREM              (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_Shift             (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  void ExpandIntRes_SADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_UADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  void ExpandShiftByConstant(SDNode *N, unsigned Amt,
+                             SDValue &Lo, SDValue &Hi);
+  bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+  bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  // Integer Operand Expansion.
+  bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
+  SDValue ExpandIntOp_BITCAST(SDNode *N);
+  SDValue ExpandIntOp_BR_CC(SDNode *N);
+  SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
+  SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
+  SDValue ExpandIntOp_SELECT_CC(SDNode *N);
+  SDValue ExpandIntOp_SETCC(SDNode *N);
+  SDValue ExpandIntOp_Shift(SDNode *N);
+  SDValue ExpandIntOp_SINT_TO_FP(SDNode *N);
+  SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
+  SDValue ExpandIntOp_TRUNCATE(SDNode *N);
+  SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
+  SDValue ExpandIntOp_RETURNADDR(SDNode *N);
+
+  void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+                                  ISD::CondCode &CCCode, DebugLoc dl);
+
+  //===--------------------------------------------------------------------===//
+  // Float to Integer Conversion Support: LegalizeFloatTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetSoftenedFloat - Given a processed operand Op which was converted to an
+  /// integer of the same size, this returns the integer.  The integer contains
+  /// exactly the same bits as Op - only the type changed.  For example, if Op
+  /// is an f32 which was softened to an i32, then this method returns an i32,
+  /// the bits of which coincide with those of Op.
+  SDValue GetSoftenedFloat(SDValue Op) {
+    SDValue &SoftenedOp = SoftenedFloats[Op];
+    RemapValue(SoftenedOp);
+    assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
+    return SoftenedOp;
+  }
+  void SetSoftenedFloat(SDValue Op, SDValue Result);
+
+  // Result Float to Integer Conversion.
+  void SoftenFloatResult(SDNode *N, unsigned OpNo);
+  SDValue SoftenFloatRes_BITCAST(SDNode *N);
+  SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
+  SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
+  SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue SoftenFloatRes_FABS(SDNode *N);
+  SDValue SoftenFloatRes_FADD(SDNode *N);
+  SDValue SoftenFloatRes_FCEIL(SDNode *N);
+  SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
+  SDValue SoftenFloatRes_FCOS(SDNode *N);
+  SDValue SoftenFloatRes_FDIV(SDNode *N);
+  SDValue SoftenFloatRes_FEXP(SDNode *N);
+  SDValue SoftenFloatRes_FEXP2(SDNode *N);
+  SDValue SoftenFloatRes_FFLOOR(SDNode *N);
+  SDValue SoftenFloatRes_FLOG(SDNode *N);
+  SDValue SoftenFloatRes_FLOG2(SDNode *N);
+  SDValue SoftenFloatRes_FLOG10(SDNode *N);
+  SDValue SoftenFloatRes_FMUL(SDNode *N);
+  SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
+  SDValue SoftenFloatRes_FNEG(SDNode *N);
+  SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
+  SDValue SoftenFloatRes_FP16_TO_FP32(SDNode *N);
+  SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
+  SDValue SoftenFloatRes_FPOW(SDNode *N);
+  SDValue SoftenFloatRes_FPOWI(SDNode *N);
+  SDValue SoftenFloatRes_FREM(SDNode *N);
+  SDValue SoftenFloatRes_FRINT(SDNode *N);
+  SDValue SoftenFloatRes_FSIN(SDNode *N);
+  SDValue SoftenFloatRes_FSQRT(SDNode *N);
+  SDValue SoftenFloatRes_FSUB(SDNode *N);
+  SDValue SoftenFloatRes_FTRUNC(SDNode *N);
+  SDValue SoftenFloatRes_LOAD(SDNode *N);
+  SDValue SoftenFloatRes_SELECT(SDNode *N);
+  SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
+  SDValue SoftenFloatRes_UNDEF(SDNode *N);
+  SDValue SoftenFloatRes_VAARG(SDNode *N);
+  SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
+
+  // Operand Float to Integer Conversion.
+  bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
+  SDValue SoftenFloatOp_BITCAST(SDNode *N);
+  SDValue SoftenFloatOp_BR_CC(SDNode *N);
+  SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
+  SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
+  SDValue SoftenFloatOp_FP_TO_UINT(SDNode *N);
+  SDValue SoftenFloatOp_FP32_TO_FP16(SDNode *N);
+  SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
+  SDValue SoftenFloatOp_SETCC(SDNode *N);
+  SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+  void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+                           ISD::CondCode &CCCode, DebugLoc dl);
+
+  //===--------------------------------------------------------------------===//
+  // Float Expansion Support: LegalizeFloatTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetExpandedFloat - Given a processed operand Op which was expanded into
+  /// two floating point values of half the size, this returns the two halves.
+  /// The low bits of Op are exactly equal to the bits of Lo; the high bits
+  /// exactly equal Hi.  For example, if Op is a ppcf128 which was expanded
+  /// into two f64's, then this method returns the two f64's, with Lo being
+  /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits.
+  void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi);
+  void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi);
+
+  // Float Result Expansion.
+  void ExpandFloatResult(SDNode *N, unsigned ResNo);
+  void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FABS      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FADD      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FCEIL     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FCOS      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FDIV      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FEXP      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FEXP2     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FFLOOR    (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FLOG      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FLOG2     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FLOG10    (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FMUL      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FNEG      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FPOW      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FPOWI     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FRINT     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FSIN      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FSQRT     (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FSUB      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FTRUNC    (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_LOAD      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  // Float Operand Expansion.
+  bool ExpandFloatOperand(SDNode *N, unsigned OperandNo);
+  SDValue ExpandFloatOp_BR_CC(SDNode *N);
+  SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
+  SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
+  SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+  SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
+  SDValue ExpandFloatOp_SETCC(SDNode *N);
+  SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
+
+  void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
+                                ISD::CondCode &CCCode, DebugLoc dl);
+
+  //===--------------------------------------------------------------------===//
+  // Scalarization Support: LegalizeVectorTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetScalarizedVector - Given a processed one-element vector Op which was
+  /// scalarized to its element type, this returns the element.  For example,
+  /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32.
+  SDValue GetScalarizedVector(SDValue Op) {
+    SDValue &ScalarizedOp = ScalarizedVectors[Op];
+    RemapValue(ScalarizedOp);
+    assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?");
+    return ScalarizedOp;
+  }
+  void SetScalarizedVector(SDValue Op, SDValue Result);
+
+  // Vector Result Scalarization: <1 x ty> -> ty.
+  void ScalarizeVectorResult(SDNode *N, unsigned OpNo);
+  SDValue ScalarizeVecRes_BinOp(SDNode *N);
+  SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
+  SDValue ScalarizeVecRes_InregOp(SDNode *N);
+
+  SDValue ScalarizeVecRes_BITCAST(SDNode *N);
+  SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
+  SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
+  SDValue ScalarizeVecRes_FPOWI(SDNode *N);
+  SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
+  SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
+  SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
+  SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
+  SDValue ScalarizeVecRes_SELECT(SDNode *N);
+  SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
+  SDValue ScalarizeVecRes_SETCC(SDNode *N);
+  SDValue ScalarizeVecRes_UNDEF(SDNode *N);
+  SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
+  SDValue ScalarizeVecRes_VSETCC(SDNode *N);
+
+  // Vector Operand Scalarization: <1 x ty> -> ty.
+  bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
+  SDValue ScalarizeVecOp_BITCAST(SDNode *N);
+  SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
+  SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+
+  //===--------------------------------------------------------------------===//
+  // Vector Splitting Support: LegalizeVectorTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetSplitVector - Given a processed vector Op which was split into vectors
+  /// of half the size, this method returns the halves.  The first elements of
+  /// Op coincide with the elements of Lo; the remaining elements of Op coincide
+  /// with the elements of Hi: Op is what you would get by concatenating Lo and
+  /// Hi.  For example, if Op is a v8i32 that was split into two v4i32's, then
+  /// this method returns the two v4i32's, with Lo corresponding to the first 4
+  /// elements of Op, and Hi to the last 4 elements.
+  void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
+  void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
+
+  // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
+  void SplitVectorResult(SDNode *N, unsigned OpNo);
+  void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
+                                  SDValue &Hi);
+
+  // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
+  bool SplitVectorOperand(SDNode *N, unsigned OpNo);
+  SDValue SplitVecOp_UnaryOp(SDNode *N);
+
+  SDValue SplitVecOp_BITCAST(SDNode *N);
+  SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+  SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
+  SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+  SDValue SplitVecOp_FP_ROUND(SDNode *N);
+
+  //===--------------------------------------------------------------------===//
+  // Vector Widening Support: LegalizeVectorTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// GetWidenedVector - Given a processed vector Op which was widened into a
+  /// larger vector, this method returns the larger vector.  The elements of
+  /// the returned vector consist of the elements of Op followed by elements
+  /// containing rubbish.  For example, if Op is a v2i32 that was widened to a
+  /// v4i32, then this method returns a v4i32 for which the first two elements
+  /// are the same as those of Op, while the last two elements contain rubbish.
+  SDValue GetWidenedVector(SDValue Op) {
+    SDValue &WidenedOp = WidenedVectors[Op];
+    RemapValue(WidenedOp);
+    assert(WidenedOp.getNode() && "Operand wasn't widened?");
+    return WidenedOp;
+  }
+  void SetWidenedVector(SDValue Op, SDValue Result);
+
+  // Widen Vector Result Promotion.
+  void WidenVectorResult(SDNode *N, unsigned ResNo);
+  SDValue WidenVecRes_BITCAST(SDNode* N);
+  SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
+  SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
+  SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
+  SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N);
+  SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
+  SDValue WidenVecRes_LOAD(SDNode* N);
+  SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+  SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N);
+  SDValue WidenVecRes_SELECT(SDNode* N);
+  SDValue WidenVecRes_SELECT_CC(SDNode* N);
+  SDValue WidenVecRes_SETCC(SDNode* N);
+  SDValue WidenVecRes_UNDEF(SDNode *N);
+  SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
+  SDValue WidenVecRes_VSETCC(SDNode* N);
+
+  SDValue WidenVecRes_Binary(SDNode *N);
+  SDValue WidenVecRes_Convert(SDNode *N);
+  SDValue WidenVecRes_POWI(SDNode *N);
+  SDValue WidenVecRes_Shift(SDNode *N);
+  SDValue WidenVecRes_Unary(SDNode *N);
+  SDValue WidenVecRes_InregOp(SDNode *N);
+
+  // Widen Vector Operand.
+  bool WidenVectorOperand(SDNode *N, unsigned ResNo);
+  SDValue WidenVecOp_BITCAST(SDNode *N);
+  SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
+  SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
+  SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
+  SDValue WidenVecOp_STORE(SDNode* N);
+
+  SDValue WidenVecOp_Convert(SDNode *N);
+
+  //===--------------------------------------------------------------------===//
+  // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
+  //===--------------------------------------------------------------------===//
+
+  /// Helper GenWidenVectorLoads - Helper function to generate a set of
+  /// loads to load a vector with a resulting wider type. It takes
+  ///   LdChain: list of chains for the load to be generated.
+  ///   Ld:      load to widen
+  SDValue GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
+                              LoadSDNode *LD);
+
+  /// GenWidenVectorExtLoads - Helper function to generate a set of extension
+  /// loads to load a ector with a resulting wider type.  It takes
+  ///   LdChain: list of chains for the load to be generated.
+  ///   Ld:      load to widen
+  ///   ExtType: extension element type
+  SDValue GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+                                 LoadSDNode *LD, ISD::LoadExtType ExtType);
+
+  /// Helper genWidenVectorStores - Helper function to generate a set of
+  /// stores to store a widen vector into non widen memory
+  ///   StChain: list of chains for the stores we have generated
+  ///   ST:      store of a widen value
+  void GenWidenVectorStores(SmallVector<SDValue, 16>& StChain, StoreSDNode *ST);
+
+  /// Helper genWidenVectorTruncStores - Helper function to generate a set of
+  /// stores to store a truncate widen vector into non widen memory
+  ///   StChain: list of chains for the stores we have generated
+  ///   ST:      store of a widen value
+  void GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+                                 StoreSDNode *ST);
+
+  /// Modifies a vector input (widen or narrows) to a vector of NVT.  The
+  /// input vector must have the same element type as NVT.
+  SDValue ModifyToType(SDValue InOp, EVT WidenVT);
+
+
+  //===--------------------------------------------------------------------===//
+  // Generic Splitting: LegalizeTypesGeneric.cpp
+  //===--------------------------------------------------------------------===//
+
+  // Legalization methods which only use that the illegal type is split into two
+  // not necessarily identical types.  As such they can be used for splitting
+  // vectors and expanding integers and floats.
+
+  void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+    if (Op.getValueType().isVector())
+      GetSplitVector(Op, Lo, Hi);
+    else if (Op.getValueType().isInteger())
+      GetExpandedInteger(Op, Lo, Hi);
+    else
+      GetExpandedFloat(Op, Lo, Hi);
+  }
+
+  /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
+  /// which is split (or expanded) into two not necessarily identical pieces.
+  void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT);
+
+  /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and
+  /// high parts of the given value.
+  void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi);
+
+  // Generic Result Splitting.
+  void SplitRes_MERGE_VALUES(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitRes_SELECT      (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitRes_SELECT_CC   (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitRes_UNDEF       (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  //===--------------------------------------------------------------------===//
+  // Generic Expansion: LegalizeTypesGeneric.cpp
+  //===--------------------------------------------------------------------===//
+
+  // Legalization methods which only use that the illegal type is split into two
+  // identical types of half the size, and that the Lo/Hi part is stored first
+  // in memory on little/big-endian machines, followed by the Hi/Lo part.  As
+  // such they can be used for expanding integers and floats.
+
+  void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) {
+    if (Op.getValueType().isInteger())
+      GetExpandedInteger(Op, Lo, Hi);
+    else
+      GetExpandedFloat(Op, Lo, Hi);
+  }
+
+  // Generic Result Expansion.
+  void ExpandRes_BITCAST           (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_BUILD_PAIR        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_EXTRACT_ELEMENT   (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_NormalLoad        (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandRes_VAARG             (SDNode *N, SDValue &Lo, SDValue &Hi);
+
+  // Generic Operand Expansion.
+  SDValue ExpandOp_BITCAST          (SDNode *N);
+  SDValue ExpandOp_BUILD_VECTOR     (SDNode *N);
+  SDValue ExpandOp_EXTRACT_ELEMENT  (SDNode *N);
+  SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
+  SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N);
+  SDValue ExpandOp_NormalStore      (SDNode *N, unsigned OpNo);
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/final/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/final/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
new file mode 100644
index 00000000000..a75ae87f3cb
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -0,0 +1,480 @@
+//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements generic type expansion and splitting for LegalizeTypes.
+// The routines here perform legalization when the details of the type (such as
+// whether it is an integer or a float) do not matter.
+// Expansion is the act of changing a computation in an illegal type to be a
+// computation in two identical registers of a smaller type.  The Lo/Hi part
+// is required to be stored first in memory on little/big-endian machines.
+// Splitting is the act of changing a computation in an illegal type to be a
+// computation in two not necessarily identical registers of a smaller type.
+// There are no requirements on how the type is represented in memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Generic Result Expansion.
+//===----------------------------------------------------------------------===//
+
+// These routines assume that the Lo/Hi part is stored first in memory on
+// little/big-endian machines, followed by the Hi/Lo part.  This means that
+// they cannot be used as is on vectors, for which Lo is always stored first.
+
+void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  EVT OutVT = N->getValueType(0);
+  EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+  SDValue InOp = N->getOperand(0);
+  EVT InVT = InOp.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Handle some special cases efficiently.
+  switch (getTypeAction(InVT)) {
+    default:
+      assert(false && "Unknown type action!");
+    case Legal:
+    case PromoteInteger:
+      break;
+    case SoftenFloat:
+      // Convert the integer operand instead.
+      SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+      return;
+    case ExpandInteger:
+    case ExpandFloat:
+      // Convert the expanded pieces of the input.
+      GetExpandedOp(InOp, Lo, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+      return;
+    case SplitVector:
+      GetSplitVector(InOp, Lo, Hi);
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+      return;
+    case ScalarizeVector:
+      // Convert the element instead.
+      SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+      return;
+    case WidenVector: {
+      assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
+      InOp = GetWidenedVector(InOp);
+      EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                                   InVT.getVectorNumElements()/2);
+      Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                       DAG.getIntPtrConstant(0));
+      Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                       DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
+      return;
+    }
+  }
+
+  if (InVT.isVector() && OutVT.isInteger()) {
+    // Handle cases like i64 = BITCAST v1i64 on x86, where the operand
+    // is legal but the result is not.
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);
+
+    if (isTypeLegal(NVT)) {
+      SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
+      Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
+                       DAG.getIntPtrConstant(0));
+      Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
+                       DAG.getIntPtrConstant(1));
+
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+
+      return;
+    }
+  }
+
+  // Lower the bit-convert to a store/load from the stack.
+  assert(NOutVT.isByteSized() && "Expanded type not byte sized!");
+
+  // Create the stack frame object.  Make sure it is aligned for both
+  // the source and expanded destination types.
+  unsigned Alignment =
+    TLI.getTargetData()->getPrefTypeAlignment(NOutVT.
+                                              getTypeForEVT(*DAG.getContext()));
+  SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
+  int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+  MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
+
+  // Emit a store to the stack slot.
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,
+                               false, false, 0);
+
+  // Load the first half from the stack slot.
+  Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0);
+
+  // Increment the pointer to the other half.
+  unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
+  StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                         DAG.getIntPtrConstant(IncrementSize));
+
+  // Load the second half from the stack slot.
+  Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
+                   PtrInfo.getWithOffset(IncrementSize), false,
+                   false, MinAlign(Alignment, IncrementSize));
+
+  // Handle endianness of the load.
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo,
+                                            SDValue &Hi) {
+  // Return the operands.
+  Lo = N->getOperand(0);
+  Hi = N->getOperand(1);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo,
+                                                 SDValue &Hi) {
+  GetExpandedOp(N->getOperand(0), Lo, Hi);
+  SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ?
+                   Hi : Lo;
+
+  assert(Part.getValueType() == N->getValueType(0) &&
+         "Type twice as big as expanded type not itself expanded!");
+
+  GetPairElements(Part, Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+                                                    SDValue &Hi) {
+  SDValue OldVec = N->getOperand(0);
+  unsigned OldElts = OldVec.getValueType().getVectorNumElements();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Convert to a vector of the expanded element type, for example
+  // <3 x i64> -> <6 x i32>.
+  EVT OldVT = N->getValueType(0);
+  EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+
+  SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
+                               EVT::getVectorVT(*DAG.getContext(),
+                                                NewVT, 2*OldElts),
+                               OldVec);
+
+  // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector.
+  SDValue Idx = N->getOperand(1);
+
+  // Make sure the type of Idx is big enough to hold the new values.
+  if (Idx.getValueType().bitsLT(TLI.getPointerTy()))
+    Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+  Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+  Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+  Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+                    DAG.getConstant(1, Idx.getValueType()));
+  Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx);
+
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
+                                            SDValue &Hi) {
+  assert(ISD::isNormalLoad(N) && "This routine only for normal loads!");
+  DebugLoc dl = N->getDebugLoc();
+
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
+  SDValue Chain = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+  unsigned Alignment = LD->getAlignment();
+  bool isVolatile = LD->isVolatile();
+  bool isNonTemporal = LD->isNonTemporal();
+
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+
+  Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
+                   isVolatile, isNonTemporal, Alignment);
+
+  // Increment the pointer to the other half.
+  unsigned IncrementSize = NVT.getSizeInBits() / 8;
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getIntPtrConstant(IncrementSize));
+  Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
+                   LD->getPointerInfo().getWithOffset(IncrementSize),
+                   isVolatile, isNonTemporal,
+                   MinAlign(Alignment, IncrementSize));
+
+  // Build a factor node to remember that this load is independent of the
+  // other one.
+  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                      Hi.getValue(1));
+
+  // Handle endianness of the load.
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new one.
+  ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
+void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  EVT OVT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
+  SDValue Chain = N->getOperand(0);
+  SDValue Ptr = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+  const unsigned Align = N->getConstantOperandVal(3);
+
+  Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align);
+  Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
+
+  // Handle endianness of the load.
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new one.
+  ReplaceValueWith(SDValue(N, 1), Hi.getValue(1));
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Operand Expansion.
+//===--------------------------------------------------------------------===//
+
+SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  if (N->getValueType(0).isVector()) {
+    // An illegal expanding type is being converted to a legal vector type.
+    // Make a two element vector out of the expanded parts and convert that
+    // instead, but only if the new vector type is legal (otherwise there
+    // is no point, and it might create expansion loops).  For example, on
+    // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
+    EVT OVT = N->getOperand(0).getValueType();
+    EVT NVT = EVT::getVectorVT(*DAG.getContext(),
+                               TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
+                               2);
+
+    if (isTypeLegal(NVT)) {
+      SDValue Parts[2];
+      GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]);
+
+      if (TLI.isBigEndian())
+        std::swap(Parts[0], Parts[1]);
+
+      SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
+      return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
+    }
+  }
+
+  // Otherwise, store to a temporary and load out again as the new type.
+  return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
+  // The vector type is legal but the element type needs expansion.
+  EVT VecVT = N->getValueType(0);
+  unsigned NumElts = VecVT.getVectorNumElements();
+  EVT OldVT = N->getOperand(0).getValueType();
+  EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
+  DebugLoc dl = N->getDebugLoc();
+
+  assert(OldVT == VecVT.getVectorElementType() &&
+         "BUILD_VECTOR operand type doesn't match vector element type!");
+
+  // Build a vector of twice the length out of the expanded elements.
+  // For example <3 x i64> -> <6 x i32>.
+  std::vector<SDValue> NewElts;
+  NewElts.reserve(NumElts*2);
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue Lo, Hi;
+    GetExpandedOp(N->getOperand(i), Lo, Hi);
+    if (TLI.isBigEndian())
+      std::swap(Lo, Hi);
+    NewElts.push_back(Lo);
+    NewElts.push_back(Hi);
+  }
+
+  SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+                               EVT::getVectorVT(*DAG.getContext(),
+                                                NewVT, NewElts.size()),
+                               &NewElts[0], NewElts.size());
+
+  // Convert the new vector to the old vector type.
+  return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
+  SDValue Lo, Hi;
+  GetExpandedOp(N->getOperand(0), Lo, Hi);
+  return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo;
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
+  // The vector type is legal but the element type needs expansion.
+  EVT VecVT = N->getValueType(0);
+  unsigned NumElts = VecVT.getVectorNumElements();
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue Val = N->getOperand(1);
+  EVT OldEVT = Val.getValueType();
+  EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT);
+
+  assert(OldEVT == VecVT.getVectorElementType() &&
+         "Inserted element type doesn't match vector element type!");
+
+  // Bitconvert to a vector of twice the length with elements of the expanded
+  // type, insert the expanded vector elements, and then convert back.
+  EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
+  SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
+                               NewVecVT, N->getOperand(0));
+
+  SDValue Lo, Hi;
+  GetExpandedOp(Val, Lo, Hi);
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+
+  SDValue Idx = N->getOperand(2);
+  Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx);
+  NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx);
+  Idx = DAG.getNode(ISD::ADD, dl,
+                    Idx.getValueType(), Idx, DAG.getIntPtrConstant(1));
+  NewVec =  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
+
+  // Convert the new vector to the old vector type.
+  return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  assert(VT.getVectorElementType() == N->getOperand(0).getValueType() &&
+         "SCALAR_TO_VECTOR operand type doesn't match vector element type!");
+  unsigned NumElts = VT.getVectorNumElements();
+  SmallVector<SDValue, 16> Ops(NumElts);
+  Ops[0] = N->getOperand(0);
+  SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType());
+  for (unsigned i = 1; i < NumElts; ++i)
+    Ops[i] = UndefVal;
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
+  assert(ISD::isNormalStore(N) && "This routine only for normal stores!");
+  assert(OpNo == 1 && "Can only expand the stored value so far");
+  DebugLoc dl = N->getDebugLoc();
+
+  StoreSDNode *St = cast<StoreSDNode>(N);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(),
+                                     St->getValue().getValueType());
+  SDValue Chain = St->getChain();
+  SDValue Ptr = St->getBasePtr();
+  unsigned Alignment = St->getAlignment();
+  bool isVolatile = St->isVolatile();
+  bool isNonTemporal = St->isNonTemporal();
+
+  assert(NVT.isByteSized() && "Expanded type not byte sized!");
+  unsigned IncrementSize = NVT.getSizeInBits() / 8;
+
+  SDValue Lo, Hi;
+  GetExpandedOp(St->getValue(), Lo, Hi);
+
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+
+  Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
+                    isVolatile, isNonTemporal, Alignment);
+
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getIntPtrConstant(IncrementSize));
+  assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
+  Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+                    St->getPointerInfo().getWithOffset(IncrementSize),
+                    isVolatile, isNonTemporal,
+                    MinAlign(Alignment, IncrementSize));
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+}
+
+
+//===--------------------------------------------------------------------===//
+// Generic Result Splitting.
+//===--------------------------------------------------------------------===//
+
+// Be careful to make no assumptions about which of Lo/Hi is stored first in
+// memory (for vectors it is always Lo first followed by Hi in the following
+// bytes; for integers and floats it is Lo first if and only if the machine is
+// little-endian).
+
+void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N,
+                                             SDValue &Lo, SDValue &Hi) {
+  // A MERGE_VALUES node can produce any number of values.  We know that the
+  // first illegal one needs to be expanded into Lo/Hi.
+  unsigned i;
+
+  // The string of legal results gets turned into input operands, which have
+  // the same type.
+  for (i = 0; isTypeLegal(N->getValueType(i)); ++i)
+    ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+
+  // The first illegal result must be the one that needs to be expanded.
+  GetSplitOp(N->getOperand(i), Lo, Hi);
+
+  // Legalize the rest of the results into the input operands whether they are
+  // legal or not.
+  unsigned e = N->getNumValues();
+  for (++i; i != e; ++i)
+    ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i)));
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo,
+                                       SDValue &Hi) {
+  SDValue LL, LH, RL, RH;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitOp(N->getOperand(1), LL, LH);
+  GetSplitOp(N->getOperand(2), RL, RH);
+
+  SDValue Cond = N->getOperand(0);
+  Lo = DAG.getNode(ISD::SELECT, dl, LL.getValueType(), Cond, LL, RL);
+  Hi = DAG.getNode(ISD::SELECT, dl, LH.getValueType(), Cond, LH, RH);
+}
+
+void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
+                                          SDValue &Hi) {
+  SDValue LL, LH, RL, RH;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitOp(N->getOperand(2), LL, LH);
+  GetSplitOp(N->getOperand(3), RL, RH);
+
+  Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0),
+                   N->getOperand(1), LL, RL, N->getOperand(4));
+  Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0),
+                   N->getOperand(1), LH, RH, N->getOperand(4));
+}
+
+void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+  Lo = DAG.getUNDEF(LoVT);
+  Hi = DAG.getUNDEF(HiVT);
+}
diff --git a/final/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/final/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
new file mode 100644
index 00000000000..167dbe0377b
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -0,0 +1,290 @@
+//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SelectionDAG::LegalizeVectors method.
+//
+// The vector legalizer looks for vector operations which might need to be
+// scalarized and legalizes them. This is a separate step from Legalize because
+// scalarizing can introduce illegal types.  For example, suppose we have an
+// ISD::SDIV of type v2i64 on x86-32.  The type is legal (for example, addition
+// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
+// operation, which introduces nodes with the illegal type i64 which must be
+// expanded.  Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
+// the operation must be unrolled, which introduces nodes with the illegal
+// type i8 which must be promoted.
+//
+// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
+// or operations that happen to take a vector which are custom-lowered;
+// the legalization for such operations never produces nodes
+// with illegal types, so it's okay to put off legalizing them until
+// SelectionDAG::Legalize runs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+namespace {
+class VectorLegalizer {
+  SelectionDAG& DAG;
+  const TargetLowering &TLI;
+  bool Changed; // Keep track of whether anything changed
+
+  /// LegalizedNodes - For nodes that are of legal width, and that have more
+  /// than one use, this map indicates what regularized operand to use.  This
+  /// allows us to avoid legalizing the same thing more than once.
+  DenseMap<SDValue, SDValue> LegalizedNodes;
+
+  // Adds a node to the translation cache
+  void AddLegalizedOperand(SDValue From, SDValue To) {
+    LegalizedNodes.insert(std::make_pair(From, To));
+    // If someone requests legalization of the new node, return itself.
+    if (From != To)
+      LegalizedNodes.insert(std::make_pair(To, To));
+  }
+
+  // Legalizes the given node
+  SDValue LegalizeOp(SDValue Op);
+  // Assuming the node is legal, "legalize" the results
+  SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
+  // Implements unrolling a VSETCC.
+  SDValue UnrollVSETCC(SDValue Op);
+  // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB
+  // isn't legal.
+  SDValue ExpandFNEG(SDValue Op);
+  // Implements vector promotion; this is essentially just bitcasting the
+  // operands to a different type and bitcasting the result back to the
+  // original type.
+  SDValue PromoteVectorOp(SDValue Op);
+
+  public:
+  bool Run();
+  VectorLegalizer(SelectionDAG& dag) :
+      DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
+};
+
+bool VectorLegalizer::Run() {
+  // The legalize process is inherently a bottom-up recursive process (users
+  // legalize their uses before themselves).  Given infinite stack space, we
+  // could just start legalizing on the root and traverse the whole graph.  In
+  // practice however, this causes us to run out of stack space on large basic
+  // blocks.  To avoid this problem, compute an ordering of the nodes where each
+  // node is only legalized after all of its operands are legalized.
+  DAG.AssignTopologicalOrder();
+  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
+       E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
+    LegalizeOp(SDValue(I, 0));
+
+  // Finally, it's possible the root changed.  Get the new root.
+  SDValue OldRoot = DAG.getRoot();
+  assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
+  DAG.setRoot(LegalizedNodes[OldRoot]);
+
+  LegalizedNodes.clear();
+
+  // Remove dead nodes now.
+  DAG.RemoveDeadNodes();
+
+  return Changed;
+}
+
+SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
+  // Generic legalization: just pass the operand through.
+  for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
+    AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
+  return Result.getValue(Op.getResNo());
+}
+
+SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
+  if (I != LegalizedNodes.end()) return I->second;
+
+  SDNode* Node = Op.getNode();
+
+  // Legalize the operands
+  SmallVector<SDValue, 8> Ops;
+  for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
+    Ops.push_back(LegalizeOp(Node->getOperand(i)));
+
+  SDValue Result =
+    SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0);
+
+  bool HasVectorValue = false;
+  for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
+       J != E;
+       ++J)
+    HasVectorValue |= J->isVector();
+  if (!HasVectorValue)
+    return TranslateLegalizeResults(Op, Result);
+
+  EVT QueryType;
+  switch (Op.getOpcode()) {
+  default:
+    return TranslateLegalizeResults(Op, Result);
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM:
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::ROTL:
+  case ISD::ROTR:
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+  case ISD::SELECT:
+  case ISD::SELECT_CC:
+  case ISD::VSETCC:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+  case ISD::TRUNCATE:
+  case ISD::SIGN_EXTEND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::FNEG:
+  case ISD::FABS:
+  case ISD::FSQRT:
+  case ISD::FSIN:
+  case ISD::FCOS:
+  case ISD::FPOWI:
+  case ISD::FPOW:
+  case ISD::FLOG:
+  case ISD::FLOG2:
+  case ISD::FLOG10:
+  case ISD::FEXP:
+  case ISD::FEXP2:
+  case ISD::FCEIL:
+  case ISD::FTRUNC:
+  case ISD::FRINT:
+  case ISD::FNEARBYINT:
+  case ISD::FFLOOR:
+    QueryType = Node->getValueType(0);
+    break;
+  case ISD::SIGN_EXTEND_INREG:
+  case ISD::FP_ROUND_INREG:
+    QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
+    break;
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    QueryType = Node->getOperand(0).getValueType();
+    break;
+  }
+
+  switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
+  case TargetLowering::Promote:
+    // "Promote" the operation by bitcasting
+    Result = PromoteVectorOp(Op);
+    Changed = true;
+    break;
+  case TargetLowering::Legal: break;
+  case TargetLowering::Custom: {
+    SDValue Tmp1 = TLI.LowerOperation(Op, DAG);
+    if (Tmp1.getNode()) {
+      Result = Tmp1;
+      break;
+    }
+    // FALL THROUGH
+  }
+  case TargetLowering::Expand:
+    if (Node->getOpcode() == ISD::FNEG)
+      Result = ExpandFNEG(Op);
+    else if (Node->getOpcode() == ISD::VSETCC)
+      Result = UnrollVSETCC(Op);
+    else
+      Result = DAG.UnrollVectorOp(Op.getNode());
+    break;
+  }
+
+  // Make sure that the generated code is itself legal.
+  if (Result != Op) {
+    Result = LegalizeOp(Result);
+    Changed = true;
+  }
+
+  // Note that LegalizeOp may be reentered even from single-use nodes, which
+  // means that we always must cache transformed nodes.
+  AddLegalizedOperand(Op, Result);
+  return Result;
+}
+
+SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
+  // Vector "promotion" is basically just bitcasting and doing the operation
+  // in a different type.  For example, x86 promotes ISD::AND on v2i32 to
+  // v1i64.
+  EVT VT = Op.getValueType();
+  assert(Op.getNode()->getNumValues() == 1 &&
+         "Can't promote a vector with multiple results!");
+  EVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
+  DebugLoc dl = Op.getDebugLoc();
+  SmallVector<SDValue, 4> Operands(Op.getNumOperands());
+
+  for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
+    if (Op.getOperand(j).getValueType().isVector())
+      Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
+    else
+      Operands[j] = Op.getOperand(j);
+  }
+
+  Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
+
+  return DAG.getNode(ISD::BITCAST, dl, VT, Op);
+}
+
+SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
+  if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
+    SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType());
+    return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
+                       Zero, Op.getOperand(0));
+  }
+  return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
+  EVT VT = Op.getValueType();
+  unsigned NumElems = VT.getVectorNumElements();
+  EVT EltVT = VT.getVectorElementType();
+  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
+  EVT TmpEltVT = LHS.getValueType().getVectorElementType();
+  DebugLoc dl = Op.getDebugLoc();
+  SmallVector<SDValue, 8> Ops(NumElems);
+  for (unsigned i = 0; i < NumElems; ++i) {
+    SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
+                                  DAG.getIntPtrConstant(i));
+    SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
+                                  DAG.getIntPtrConstant(i));
+    Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT),
+                         LHSElem, RHSElem, CC);
+    Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i],
+                         DAG.getConstant(APInt::getAllOnesValue
+                                         (EltVT.getSizeInBits()), EltVT),
+                         DAG.getConstant(0, EltVT));
+  }
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems);
+}
+
+}
+
+bool SelectionDAG::LegalizeVectors() {
+  return VectorLegalizer(*this).Run();
+}
diff --git a/final/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/final/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
new file mode 100644
index 00000000000..be1620599b5
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -0,0 +1,2585 @@
+//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file performs vector type splitting and scalarization for LegalizeTypes.
+// Scalarization is the act of changing a computation in an illegal one-element
+// vector type to be a computation in its scalar element type.  For example,
+// implementing <1 x f32> arithmetic in a scalar f32 register.  This is needed
+// as a base case when scalarizing vector arithmetic like <4 x f32>, which
+// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
+// types.
+// Splitting is the act of changing a computation in an invalid vector type to
+// be a computation in two vectors of half the size.  For example, implementing
+// <128 x f32> operations in terms of two <64 x f32> operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LegalizeTypes.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Result Vector Scalarization: <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue R = SDValue();
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
+    N->dump(&DAG);
+    dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to scalarize the result of this operator!");
+
+  case ISD::BITCAST:           R = ScalarizeVecRes_BITCAST(N); break;
+  case ISD::BUILD_VECTOR:      R = N->getOperand(0); break;
+  case ISD::CONVERT_RNDSAT:    R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
+  case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::FP_ROUND_INREG:    R = ScalarizeVecRes_InregOp(N); break;
+  case ISD::FPOWI:             R = ScalarizeVecRes_FPOWI(N); break;
+  case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::LOAD:           R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
+  case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
+  case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
+  case ISD::SELECT:            R = ScalarizeVecRes_SELECT(N); break;
+  case ISD::SELECT_CC:         R = ScalarizeVecRes_SELECT_CC(N); break;
+  case ISD::SETCC:             R = ScalarizeVecRes_SETCC(N); break;
+  case ISD::UNDEF:             R = ScalarizeVecRes_UNDEF(N); break;
+  case ISD::VECTOR_SHUFFLE:    R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
+  case ISD::VSETCC:            R = ScalarizeVecRes_VSETCC(N); break;
+
+  case ISD::ANY_EXTEND:
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+  case ISD::CTTZ:
+  case ISD::FABS:
+  case ISD::FCEIL:
+  case ISD::FCOS:
+  case ISD::FEXP:
+  case ISD::FEXP2:
+  case ISD::FFLOOR:
+  case ISD::FLOG:
+  case ISD::FLOG10:
+  case ISD::FLOG2:
+  case ISD::FNEARBYINT:
+  case ISD::FNEG:
+  case ISD::FP_EXTEND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::FRINT:
+  case ISD::FSIN:
+  case ISD::FSQRT:
+  case ISD::FTRUNC:
+  case ISD::SIGN_EXTEND:
+  case ISD::SINT_TO_FP:
+  case ISD::TRUNCATE:
+  case ISD::UINT_TO_FP:
+  case ISD::ZERO_EXTEND:
+    R = ScalarizeVecRes_UnaryOp(N);
+    break;
+
+  case ISD::ADD:
+  case ISD::AND:
+  case ISD::FADD:
+  case ISD::FDIV:
+  case ISD::FMUL:
+  case ISD::FPOW:
+  case ISD::FREM:
+  case ISD::FSUB:
+  case ISD::MUL:
+  case ISD::OR:
+  case ISD::SDIV:
+  case ISD::SREM:
+  case ISD::SUB:
+  case ISD::UDIV:
+  case ISD::UREM:
+  case ISD::XOR:
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+    R = ScalarizeVecRes_BinOp(N);
+    break;
+  }
+
+  // If R is null, the sub-method took care of registering the result.
+  if (R.getNode())
+    SetScalarizedVector(SDValue(N, ResNo), R);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  SDValue RHS = GetScalarizedVector(N->getOperand(1));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                     LHS.getValueType(), LHS, RHS);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
+  EVT NewVT = N->getValueType(0).getVectorElementType();
+  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+                     NewVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) {
+  EVT NewVT = N->getValueType(0).getVectorElementType();
+  SDValue Op0 = GetScalarizedVector(N->getOperand(0));
+  return DAG.getConvertRndSat(NewVT, N->getDebugLoc(),
+                              Op0, DAG.getValueType(NewVT),
+                              DAG.getValueType(Op0.getValueType()),
+                              N->getOperand(3),
+                              N->getOperand(4),
+                              cast<CvtRndSatSDNode>(N)->getCvtCode());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+                     N->getValueType(0).getVectorElementType(),
+                     N->getOperand(0), N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
+  SDValue Op = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(ISD::FPOWI, N->getDebugLoc(),
+                     Op.getValueType(), Op, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+  // The value to insert may have a wider type than the vector element type,
+  // so be sure to truncate it to the element type if necessary.
+  SDValue Op = N->getOperand(1);
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  if (Op.getValueType() != EltVT)
+    // FIXME: Can this happen for floating point types?
+    Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op);
+  return Op;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
+  assert(N->isUnindexed() && "Indexed vector load?");
+
+  SDValue Result = DAG.getLoad(ISD::UNINDEXED,
+                               N->getExtensionType(),
+                               N->getValueType(0).getVectorElementType(),
+                               N->getDebugLoc(),
+                               N->getChain(), N->getBasePtr(),
+                               DAG.getUNDEF(N->getBasePtr().getValueType()),
+                               N->getPointerInfo(),
+                               N->getMemoryVT().getVectorElementType(),
+                               N->isVolatile(), N->isNonTemporal(),
+                               N->getOriginalAlignment());
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
+  return Result;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
+  // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
+  EVT DestVT = N->getValueType(0).getVectorElementType();
+  SDValue Op = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), EltVT,
+                     LHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+  // If the operand is wider than the vector element type then it is implicitly
+  // truncated.  Make that explicit here.
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  SDValue InOp = N->getOperand(0);
+  if (InOp.getValueType() != EltVT)
+    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp);
+  return InOp;
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(1));
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+                     LHS.getValueType(), N->getOperand(0), LHS,
+                     GetScalarizedVector(N->getOperand(2)));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(2));
+  return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(),
+                     N->getOperand(0), N->getOperand(1),
+                     LHS, GetScalarizedVector(N->getOperand(3)),
+                     N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  SDValue RHS = GetScalarizedVector(N->getOperand(1));
+  DebugLoc DL = N->getDebugLoc();
+
+  // Turn it into a scalar SETCC.
+  return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
+  return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
+  // Figure out if the scalar is the LHS or RHS and return it.
+  SDValue Arg = N->getOperand(2).getOperand(0);
+  if (Arg.getOpcode() == ISD::UNDEF)
+    return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
+  unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue();
+  return GetScalarizedVector(N->getOperand(Op));
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  SDValue RHS = GetScalarizedVector(N->getOperand(1));
+  EVT NVT = N->getValueType(0).getVectorElementType();
+  EVT SVT = TLI.getSetCCResultType(LHS.getValueType());
+  DebugLoc DL = N->getDebugLoc();
+
+  // Turn it into a scalar SETCC.
+  SDValue Res = DAG.getNode(ISD::SETCC, DL, SVT, LHS, RHS, N->getOperand(2));
+
+  // VSETCC always returns a sign-extended value, while SETCC may not.  The
+  // SETCC result type may not match the vector element type.  Correct these.
+  if (NVT.bitsLE(SVT)) {
+    // The SETCC result type is bigger than the vector element type.
+    // Ensure the SETCC result is sign-extended.
+    if (TLI.getBooleanContents() !=
+        TargetLowering::ZeroOrNegativeOneBooleanContent)
+      Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, SVT, Res,
+                        DAG.getValueType(MVT::i1));
+    // Truncate to the final type.
+    return DAG.getNode(ISD::TRUNCATE, DL, NVT, Res);
+  }
+
+  // The SETCC result type is smaller than the vector element type.
+  // If the SetCC result is not sign-extended, chop it down to MVT::i1.
+  if (TLI.getBooleanContents() !=
+        TargetLowering::ZeroOrNegativeOneBooleanContent)
+    Res = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Res);
+  // Sign extend to the final type.
+  return DAG.getNode(ISD::SIGN_EXTEND, DL, NVT, Res);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Operand Vector Scalarization <1 x ty> -> ty.
+//===----------------------------------------------------------------------===//
+
+bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  if (Res.getNode() == 0) {
+    switch (N->getOpcode()) {
+    default:
+#ifndef NDEBUG
+      dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
+      N->dump(&DAG);
+      dbgs() << "\n";
+#endif
+      llvm_unreachable("Do not know how to scalarize this operator's operand!");
+    case ISD::BITCAST:
+      Res = ScalarizeVecOp_BITCAST(N);
+      break;
+    case ISD::CONCAT_VECTORS:
+      Res = ScalarizeVecOp_CONCAT_VECTORS(N);
+      break;
+    case ISD::EXTRACT_VECTOR_ELT:
+      Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
+      break;
+    case ISD::STORE:
+      Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+      break;
+    }
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs
+/// to be scalarized, it must be <1 x ty>.  Convert the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
+  SDValue Elt = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+                     N->getValueType(0), Elt);
+}
+
+/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one -
+/// use a BUILD_VECTOR instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
+  SmallVector<SDValue, 8> Ops(N->getNumOperands());
+  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
+    Ops[i] = GetScalarizedVector(N->getOperand(i));
+  return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0),
+                     &Ops[0], Ops.size());
+}
+
+/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to
+/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the
+/// index.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+  SDValue Res = GetScalarizedVector(N->getOperand(0));
+  if (Res.getValueType() != N->getValueType(0))
+    Res = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0),
+                      Res);
+  return Res;
+}
+
+/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be
+/// scalarized, it must be <1 x ty>.  Just store the element.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
+  assert(N->isUnindexed() && "Indexed store of one-element vector?");
+  assert(OpNo == 1 && "Do not know how to scalarize this operand!");
+  DebugLoc dl = N->getDebugLoc();
+
+  if (N->isTruncatingStore())
+    return DAG.getTruncStore(N->getChain(), dl,
+                             GetScalarizedVector(N->getOperand(1)),
+                             N->getBasePtr(), N->getPointerInfo(),
+                             N->getMemoryVT().getVectorElementType(),
+                             N->isVolatile(), N->isNonTemporal(),
+                             N->getAlignment());
+
+  return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
+                      N->getBasePtr(), N->getPointerInfo(),
+                      N->isVolatile(), N->isNonTemporal(),
+                      N->getOriginalAlignment());
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Result Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorResult - This method is called when the specified result of the
+/// specified node is found to need vector splitting.  At this point, the node
+/// may also have invalid operands or may have other results that need
+/// legalization, we just know that (at least) one result needs vector
+/// splitting.
+void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Split node result: ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue Lo, Hi;
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "SplitVectorResult #" << ResNo << ": ";
+    N->dump(&DAG);
+    dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to split the result of this operator!");
+
+  case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, Lo, Hi); break;
+  case ISD::SELECT:       SplitRes_SELECT(N, Lo, Hi); break;
+  case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
+  case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
+
+  case ISD::BITCAST:           SplitVecRes_BITCAST(N, Lo, Hi); break;
+  case ISD::BUILD_VECTOR:      SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
+  case ISD::CONCAT_VECTORS:    SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
+  case ISD::CONVERT_RNDSAT:    SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break;
+  case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
+  case ISD::FP_ROUND_INREG:    SplitVecRes_InregOp(N, Lo, Hi); break;
+  case ISD::FPOWI:             SplitVecRes_FPOWI(N, Lo, Hi); break;
+  case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
+  case ISD::SCALAR_TO_VECTOR:  SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
+  case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
+  case ISD::LOAD:
+    SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
+    break;
+  case ISD::SETCC:
+  case ISD::VSETCC:
+    SplitVecRes_SETCC(N, Lo, Hi);
+    break;
+  case ISD::VECTOR_SHUFFLE:
+    SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
+    break;
+
+  case ISD::ANY_EXTEND:
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+  case ISD::CTTZ:
+  case ISD::FABS:
+  case ISD::FCEIL:
+  case ISD::FCOS:
+  case ISD::FEXP:
+  case ISD::FEXP2:
+  case ISD::FFLOOR:
+  case ISD::FLOG:
+  case ISD::FLOG10:
+  case ISD::FLOG2:
+  case ISD::FNEARBYINT:
+  case ISD::FNEG:
+  case ISD::FP_EXTEND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::FRINT:
+  case ISD::FSIN:
+  case ISD::FSQRT:
+  case ISD::FTRUNC:
+  case ISD::SIGN_EXTEND:
+  case ISD::SINT_TO_FP:
+  case ISD::TRUNCATE:
+  case ISD::UINT_TO_FP:
+  case ISD::ZERO_EXTEND:
+    SplitVecRes_UnaryOp(N, Lo, Hi);
+    break;
+
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::MUL:
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::FDIV:
+  case ISD::FPOW:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::UREM:
+  case ISD::SREM:
+  case ISD::FREM:
+    SplitVecRes_BinOp(N, Lo, Hi);
+    break;
+  }
+
+  // If Lo/Hi is null, the sub-method took care of registering results etc.
+  if (Lo.getNode())
+    SetSplitVector(SDValue(N, ResNo), Lo, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
+                                         SDValue &Hi) {
+  SDValue LHSLo, LHSHi;
+  GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+  SDValue RHSLo, RHSHi;
+  GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
+  DebugLoc dl = N->getDebugLoc();
+
+  Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo);
+  Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  // We know the result is a vector.  The input may be either a vector or a
+  // scalar value.
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue InOp = N->getOperand(0);
+  EVT InVT = InOp.getValueType();
+
+  // Handle some special cases efficiently.
+  switch (getTypeAction(InVT)) {
+  default:
+    assert(false && "Unknown type action!");
+  case Legal:
+  case PromoteInteger:
+  case SoftenFloat:
+  case ScalarizeVector:
+    break;
+  case ExpandInteger:
+  case ExpandFloat:
+    // A scalar to vector conversion, where the scalar needs expansion.
+    // If the vector is being split in two then we can just convert the
+    // expanded pieces.
+    if (LoVT == HiVT) {
+      GetExpandedOp(InOp, Lo, Hi);
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+      Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+      return;
+    }
+    break;
+  case SplitVector:
+    // If the input is a vector that needs to be split, convert each split
+    // piece of the input now.
+    GetSplitVector(InOp, Lo, Hi);
+    Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+    Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+    return;
+  }
+
+  // In the general case, convert the input to an integer and split it by hand.
+  EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
+  EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
+  if (TLI.isBigEndian())
+    std::swap(LoIntVT, HiIntVT);
+
+  SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
+
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+  Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+  Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
+                                                SDValue &Hi) {
+  EVT LoVT, HiVT;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+  unsigned LoNumElts = LoVT.getVectorNumElements();
+  SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
+  Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size());
+
+  SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
+  Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
+                                                  SDValue &Hi) {
+  assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
+  DebugLoc dl = N->getDebugLoc();
+  unsigned NumSubvectors = N->getNumOperands() / 2;
+  if (NumSubvectors == 1) {
+    Lo = N->getOperand(0);
+    Hi = N->getOperand(1);
+    return;
+  }
+
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+  SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
+  Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size());
+
+  SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
+  Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size());
+}
+
+void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
+                                                  SDValue &Hi) {
+  EVT LoVT, HiVT;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+  SDValue DTyOpLo =  DAG.getValueType(LoVT);
+  SDValue DTyOpHi =  DAG.getValueType(HiVT);
+
+  SDValue RndOp = N->getOperand(3);
+  SDValue SatOp = N->getOperand(4);
+  ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+  // Split the input.
+  SDValue VLo, VHi;
+  EVT InVT = N->getOperand(0).getValueType();
+  switch (getTypeAction(InVT)) {
+  default: llvm_unreachable("Unexpected type action!");
+  case Legal: {
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                                 LoVT.getVectorNumElements());
+    VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+                      DAG.getIntPtrConstant(0));
+    VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+                      DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+    break;
+  }
+  case SplitVector:
+    GetSplitVector(N->getOperand(0), VLo, VHi);
+    break;
+  case WidenVector: {
+    // If the result needs to be split and the input needs to be widened,
+    // the two types must have different lengths. Use the widened result
+    // and extract from it to do the split.
+    SDValue InOp = GetWidenedVector(N->getOperand(0));
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                                 LoVT.getVectorNumElements());
+    VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                      DAG.getIntPtrConstant(0));
+    VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                      DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+    break;
+  }
+  }
+
+  SDValue STyOpLo =  DAG.getValueType(VLo.getValueType());
+  SDValue STyOpHi =  DAG.getValueType(VHi.getValueType());
+
+  Lo = DAG.getConvertRndSat(LoVT, dl, VLo, DTyOpLo, STyOpLo, RndOp, SatOp,
+                            CvtCode);
+  Hi = DAG.getConvertRndSat(HiVT, dl, VHi, DTyOpHi, STyOpHi, RndOp, SatOp,
+                            CvtCode);
+}
+
+void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
+                                                     SDValue &Hi) {
+  SDValue Vec = N->getOperand(0);
+  SDValue Idx = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+  Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
+  uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+  Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+                   DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements()));
+}
+
+void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
+                                         SDValue &Hi) {
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+  Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
+  Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
+}
+
+void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  SDValue LHSLo, LHSHi;
+  GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT LoVT, HiVT;
+  GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT(), LoVT, HiVT);
+
+  Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
+                   DAG.getValueType(LoVT));
+  Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
+                   DAG.getValueType(HiVT));
+}
+
+void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
+                                                     SDValue &Hi) {
+  SDValue Vec = N->getOperand(0);
+  SDValue Elt = N->getOperand(1);
+  SDValue Idx = N->getOperand(2);
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitVector(Vec, Lo, Hi);
+
+  if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+    unsigned IdxVal = CIdx->getZExtValue();
+    unsigned LoNumElts = Lo.getValueType().getVectorNumElements();
+    if (IdxVal < LoNumElts)
+      Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+                       Lo.getValueType(), Lo, Elt, Idx);
+    else
+      Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt,
+                       DAG.getIntPtrConstant(IdxVal - LoNumElts));
+    return;
+  }
+
+  // Spill the vector to the stack.
+  EVT VecVT = Vec.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
+  SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
+
+  // Store the new element.  This may be larger than the vector element type,
+  // so use a truncating store.
+  SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+  const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
+  unsigned Alignment =
+    TLI.getTargetData()->getPrefTypeAlignment(VecType);
+  Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
+                            false, false, 0);
+
+  // Load the Lo part from the stack slot.
+  Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+                   false, false, 0);
+
+  // Increment the pointer to the other part.
+  unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+  StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                         DAG.getIntPtrConstant(IncrementSize));
+
+  // Load the Hi part from the stack slot.
+  Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+                   false, false, MinAlign(Alignment, IncrementSize));
+}
+
+void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
+                                                    SDValue &Hi) {
+  EVT LoVT, HiVT;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+  Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0));
+  Hi = DAG.getUNDEF(HiVT);
+}
+
+void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
+                                        SDValue &Hi) {
+  assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
+  EVT LoVT, HiVT;
+  DebugLoc dl = LD->getDebugLoc();
+  GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT);
+
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+  SDValue Ch = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+  SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
+  EVT MemoryVT = LD->getMemoryVT();
+  unsigned Alignment = LD->getOriginalAlignment();
+  bool isVolatile = LD->isVolatile();
+  bool isNonTemporal = LD->isNonTemporal();
+
+  EVT LoMemVT, HiMemVT;
+  GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+  Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
+                   LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
+                   Alignment);
+
+  unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+                    DAG.getIntPtrConstant(IncrementSize));
+  Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
+                   LD->getPointerInfo().getWithOffset(IncrementSize),
+                   HiMemVT, isVolatile, isNonTemporal, Alignment);
+
+  // Build a factor node to remember that this load is independent of the
+  // other one.
+  Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+                   Hi.getValue(1));
+
+  // Legalized the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(LD, 1), Ch);
+}
+
+void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
+  EVT LoVT, HiVT;
+  DebugLoc DL = N->getDebugLoc();
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+  // Split the input.
+  EVT InVT = N->getOperand(0).getValueType();
+  SDValue LL, LH, RL, RH;
+  EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                               LoVT.getVectorNumElements());
+  LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+                   DAG.getIntPtrConstant(0));
+  LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0),
+                   DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+  RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+                   DAG.getIntPtrConstant(0));
+  RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1),
+                   DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+
+  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
+  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
+}
+
+void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
+                                           SDValue &Hi) {
+  // Get the dest types - they may not match the input types, e.g. int_to_fp.
+  EVT LoVT, HiVT;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
+
+  // Split the input.
+  EVT InVT = N->getOperand(0).getValueType();
+  switch (getTypeAction(InVT)) {
+  default: llvm_unreachable("Unexpected type action!");
+  case Legal: {
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                                 LoVT.getVectorNumElements());
+    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+                     DAG.getIntPtrConstant(0));
+    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0),
+                     DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+    break;
+  }
+  case SplitVector:
+    GetSplitVector(N->getOperand(0), Lo, Hi);
+    break;
+  case WidenVector: {
+    // If the result needs to be split and the input needs to be widened,
+    // the two types must have different lengths. Use the widened result
+    // and extract from it to do the split.
+    SDValue InOp = GetWidenedVector(N->getOperand(0));
+    EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
+                                 LoVT.getVectorNumElements());
+    Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                     DAG.getIntPtrConstant(0));
+    Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
+                     DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+    break;
+  }
+  }
+
+  Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
+  Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
+                                                  SDValue &Lo, SDValue &Hi) {
+  // The low and high parts of the original input give four input vectors.
+  SDValue Inputs[4];
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
+  GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
+  EVT NewVT = Inputs[0].getValueType();
+  unsigned NewElts = NewVT.getVectorNumElements();
+
+  // If Lo or Hi uses elements from at most two of the four input vectors, then
+  // express it as a vector shuffle of those two inputs.  Otherwise extract the
+  // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
+  SmallVector<int, 16> Ops;
+  for (unsigned High = 0; High < 2; ++High) {
+    SDValue &Output = High ? Hi : Lo;
+
+    // Build a shuffle mask for the output, discovering on the fly which
+    // input vectors to use as shuffle operands (recorded in InputUsed).
+    // If building a suitable shuffle vector proves too hard, then bail
+    // out with useBuildVector set.
+    unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered.
+    unsigned FirstMaskIdx = High * NewElts;
+    bool useBuildVector = false;
+    for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+      // The mask element.  This indexes into the input.
+      int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+      // The input vector this mask element indexes into.
+      unsigned Input = (unsigned)Idx / NewElts;
+
+      if (Input >= array_lengthof(Inputs)) {
+        // The mask element does not index into any input vector.
+        Ops.push_back(-1);
+        continue;
+      }
+
+      // Turn the index into an offset from the start of the input vector.
+      Idx -= Input * NewElts;
+
+      // Find or create a shuffle vector operand to hold this input.
+      unsigned OpNo;
+      for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+        if (InputUsed[OpNo] == Input) {
+          // This input vector is already an operand.
+          break;
+        } else if (InputUsed[OpNo] == -1U) {
+          // Create a new operand for this input vector.
+          InputUsed[OpNo] = Input;
+          break;
+        }
+      }
+
+      if (OpNo >= array_lengthof(InputUsed)) {
+        // More than two input vectors used!  Give up on trying to create a
+        // shuffle vector.  Insert all elements into a BUILD_VECTOR instead.
+        useBuildVector = true;
+        break;
+      }
+
+      // Add the mask index for the new shuffle vector.
+      Ops.push_back(Idx + OpNo * NewElts);
+    }
+
+    if (useBuildVector) {
+      EVT EltVT = NewVT.getVectorElementType();
+      SmallVector<SDValue, 16> SVOps;
+
+      // Extract the input elements by hand.
+      for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
+        // The mask element.  This indexes into the input.
+        int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset);
+
+        // The input vector this mask element indexes into.
+        unsigned Input = (unsigned)Idx / NewElts;
+
+        if (Input >= array_lengthof(Inputs)) {
+          // The mask element is "undef" or indexes off the end of the input.
+          SVOps.push_back(DAG.getUNDEF(EltVT));
+          continue;
+        }
+
+        // Turn the index into an offset from the start of the input vector.
+        Idx -= Input * NewElts;
+
+        // Extract the vector element by hand.
+        SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+                                    Inputs[Input], DAG.getIntPtrConstant(Idx)));
+      }
+
+      // Construct the Lo/Hi output using a BUILD_VECTOR.
+      Output = DAG.getNode(ISD::BUILD_VECTOR,dl,NewVT, &SVOps[0], SVOps.size());
+    } else if (InputUsed[0] == -1U) {
+      // No input vectors were used!  The result is undefined.
+      Output = DAG.getUNDEF(NewVT);
+    } else {
+      SDValue Op0 = Inputs[InputUsed[0]];
+      // If only one input was used, use an undefined vector for the other.
+      SDValue Op1 = InputUsed[1] == -1U ?
+        DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]];
+      // At least one input vector was used.  Create a new shuffle vector.
+      Output =  DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]);
+    }
+
+    Ops.clear();
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Operand Vector Splitting
+//===----------------------------------------------------------------------===//
+
+/// SplitVectorOperand - This method is called when the specified operand of the
+/// specified node is found to need vector splitting.  At this point, all of the
+/// result types of the node are known to be legal, but other operands of the
+/// node may need legalization as well as the specified one.
+bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
+  DEBUG(dbgs() << "Split node operand: ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  if (Res.getNode() == 0) {
+    switch (N->getOpcode()) {
+    default:
+#ifndef NDEBUG
+      dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
+      N->dump(&DAG);
+      dbgs() << "\n";
+#endif
+      llvm_unreachable("Do not know how to split this operator's operand!");
+
+    case ISD::BITCAST:           Res = SplitVecOp_BITCAST(N); break;
+    case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
+    case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
+    case ISD::CONCAT_VECTORS:    Res = SplitVecOp_CONCAT_VECTORS(N); break;
+    case ISD::FP_ROUND:          Res = SplitVecOp_FP_ROUND(N); break;
+    case ISD::STORE:
+      Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
+      break;
+
+    case ISD::CTTZ:
+    case ISD::CTLZ:
+    case ISD::CTPOP:
+    case ISD::FP_EXTEND:
+    case ISD::FP_TO_SINT:
+    case ISD::FP_TO_UINT:
+    case ISD::SINT_TO_FP:
+    case ISD::UINT_TO_FP:
+    case ISD::FTRUNC:
+    case ISD::TRUNCATE:
+    case ISD::SIGN_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::ANY_EXTEND:
+      Res = SplitVecOp_UnaryOp(N);
+      break;
+    }
+  }
+
+  // If the result is null, the sub-method took care of registering results etc.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
+  // The result has a legal vector type, but the input needs splitting.
+  EVT ResVT = N->getValueType(0);
+  SDValue Lo, Hi;
+  DebugLoc dl = N->getDebugLoc();
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+  EVT InVT = Lo.getValueType();
+
+  EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+                               InVT.getVectorNumElements());
+
+  Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
+  Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+
+  return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
+  // For example, i64 = BITCAST v4i16 on alpha.  Typically the vector will
+  // end up being split all the way down to individual components.  Convert the
+  // split pieces into integers and reassemble.
+  SDValue Lo, Hi;
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+  Lo = BitConvertToInteger(Lo);
+  Hi = BitConvertToInteger(Hi);
+
+  if (TLI.isBigEndian())
+    std::swap(Lo, Hi);
+
+  return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
+                     JoinIntegers(Lo, Hi));
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+  // We know that the extracted result type is legal.
+  EVT SubVT = N->getValueType(0);
+  SDValue Idx = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Lo, Hi;
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+
+  uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+  uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+  if (IdxVal < LoElts) {
+    assert(IdxVal + SubVT.getVectorNumElements() <= LoElts &&
+           "Extracted subvector crosses vector split!");
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
+  } else {
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi,
+                       DAG.getConstant(IdxVal - LoElts, Idx.getValueType()));
+  }
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+  SDValue Vec = N->getOperand(0);
+  SDValue Idx = N->getOperand(1);
+  EVT VecVT = Vec.getValueType();
+
+  if (isa<ConstantSDNode>(Idx)) {
+    uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+    assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");
+
+    SDValue Lo, Hi;
+    GetSplitVector(Vec, Lo, Hi);
+
+    uint64_t LoElts = Lo.getValueType().getVectorNumElements();
+
+    if (IdxVal < LoElts)
+      return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
+    return SDValue(DAG.UpdateNodeOperands(N, Hi,
+                                  DAG.getConstant(IdxVal - LoElts,
+                                                  Idx.getValueType())), 0);
+  }
+
+  // Store the vector to the stack.
+  EVT EltVT = VecVT.getVectorElementType();
+  DebugLoc dl = N->getDebugLoc();
+  SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
+
+  // Load back the required element.
+  StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
+  return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+                        MachinePointerInfo(), EltVT, false, false, 0);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
+  assert(N->isUnindexed() && "Indexed store of vector?");
+  assert(OpNo == 1 && "Can only split the stored value");
+  DebugLoc DL = N->getDebugLoc();
+
+  bool isTruncating = N->isTruncatingStore();
+  SDValue Ch  = N->getChain();
+  SDValue Ptr = N->getBasePtr();
+  EVT MemoryVT = N->getMemoryVT();
+  unsigned Alignment = N->getOriginalAlignment();
+  bool isVol = N->isVolatile();
+  bool isNT = N->isNonTemporal();
+  SDValue Lo, Hi;
+  GetSplitVector(N->getOperand(1), Lo, Hi);
+
+  EVT LoMemVT, HiMemVT;
+  GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
+
+  unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
+
+  if (isTruncating)
+    Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
+                           LoMemVT, isVol, isNT, Alignment);
+  else
+    Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
+                      isVol, isNT, Alignment);
+
+  // Increment the pointer to the other half.
+  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+                    DAG.getIntPtrConstant(IncrementSize));
+
+  if (isTruncating)
+    Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
+                           N->getPointerInfo().getWithOffset(IncrementSize),
+                           HiMemVT, isVol, isNT, Alignment);
+  else
+    Hi = DAG.getStore(Ch, DL, Hi, Ptr,
+                      N->getPointerInfo().getWithOffset(IncrementSize),
+                      isVol, isNT, Alignment);
+
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
+  DebugLoc DL = N->getDebugLoc();
+
+  // The input operands all must have the same type, and we know the result the
+  // result type is valid.  Convert this to a buildvector which extracts all the
+  // input elements.
+  // TODO: If the input elements are power-two vectors, we could convert this to
+  // a new CONCAT_VECTORS node with elements that are half-wide.
+  SmallVector<SDValue, 32> Elts;
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+    SDValue Op = N->getOperand(op);
+    for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
+         i != e; ++i) {
+      Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT,
+                                 Op, DAG.getIntPtrConstant(i)));
+
+    }
+  }
+
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
+                     &Elts[0], Elts.size());
+}
+
+SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
+  // The result has a legal vector type, but the input needs splitting.
+  EVT ResVT = N->getValueType(0);
+  SDValue Lo, Hi;
+  DebugLoc DL = N->getDebugLoc();
+  GetSplitVector(N->getOperand(0), Lo, Hi);
+  EVT InVT = Lo.getValueType();
+  
+  EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+                               InVT.getVectorNumElements());
+  
+  Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+  Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+  
+  return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+}  
+
+
+
+//===----------------------------------------------------------------------===//
+//  Result Vector Widening
+//===----------------------------------------------------------------------===//
+
+void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Widen node result " << ResNo << ": ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+
+  // See if the target wants to custom widen this node.
+  if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
+    return;
+
+  SDValue Res = SDValue();
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "WidenVectorResult #" << ResNo << ": ";
+    N->dump(&DAG);
+    dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to widen the result of this operator!");
+
+  case ISD::BITCAST:           Res = WidenVecRes_BITCAST(N); break;
+  case ISD::BUILD_VECTOR:      Res = WidenVecRes_BUILD_VECTOR(N); break;
+  case ISD::CONCAT_VECTORS:    Res = WidenVecRes_CONCAT_VECTORS(N); break;
+  case ISD::CONVERT_RNDSAT:    Res = WidenVecRes_CONVERT_RNDSAT(N); break;
+  case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
+  case ISD::FP_ROUND_INREG:    Res = WidenVecRes_InregOp(N); break;
+  case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
+  case ISD::LOAD:              Res = WidenVecRes_LOAD(N); break;
+  case ISD::SCALAR_TO_VECTOR:  Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+  case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+  case ISD::SELECT:            Res = WidenVecRes_SELECT(N); break;
+  case ISD::SELECT_CC:         Res = WidenVecRes_SELECT_CC(N); break;
+  case ISD::SETCC:             Res = WidenVecRes_SETCC(N); break;
+  case ISD::UNDEF:             Res = WidenVecRes_UNDEF(N); break;
+  case ISD::VECTOR_SHUFFLE:
+    Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
+    break;
+  case ISD::VSETCC:
+    Res = WidenVecRes_VSETCC(N);
+    break;
+
+  case ISD::ADD:
+  case ISD::AND:
+  case ISD::BSWAP:
+  case ISD::FADD:
+  case ISD::FCOPYSIGN:
+  case ISD::FDIV:
+  case ISD::FMUL:
+  case ISD::FPOW:
+  case ISD::FREM:
+  case ISD::FSUB:
+  case ISD::MUL:
+  case ISD::MULHS:
+  case ISD::MULHU:
+  case ISD::OR:
+  case ISD::SDIV:
+  case ISD::SREM:
+  case ISD::UDIV:
+  case ISD::UREM:
+  case ISD::SUB:
+  case ISD::XOR:
+    Res = WidenVecRes_Binary(N);
+    break;
+
+  case ISD::FPOWI:
+    Res = WidenVecRes_POWI(N);
+    break;
+
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+    Res = WidenVecRes_Shift(N);
+    break;
+
+  case ISD::ANY_EXTEND:
+  case ISD::FP_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::SIGN_EXTEND:
+  case ISD::SINT_TO_FP:
+  case ISD::TRUNCATE:
+  case ISD::UINT_TO_FP:
+  case ISD::ZERO_EXTEND:
+    Res = WidenVecRes_Convert(N);
+    break;
+
+  case ISD::CTLZ:
+  case ISD::CTPOP:
+  case ISD::CTTZ:
+  case ISD::FABS:
+  case ISD::FCEIL:
+  case ISD::FCOS:
+  case ISD::FEXP:
+  case ISD::FEXP2:
+  case ISD::FFLOOR:
+  case ISD::FLOG:
+  case ISD::FLOG10:
+  case ISD::FLOG2:
+  case ISD::FNEARBYINT:
+  case ISD::FNEG:
+  case ISD::FRINT:
+  case ISD::FSIN:
+  case ISD::FSQRT:
+  case ISD::FTRUNC:
+    Res = WidenVecRes_Unary(N);
+    break;
+  }
+
+  // If Res is null, the sub-method took care of registering the result.
+  if (Res.getNode())
+    SetWidenedVector(SDValue(N, ResNo), Res);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
+  // Binary op widening.
+  unsigned Opcode = N->getOpcode();
+  DebugLoc dl = N->getDebugLoc();
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT WidenEltVT = WidenVT.getVectorElementType();
+  EVT VT = WidenVT;
+  unsigned NumElts =  VT.getVectorNumElements();
+  while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+    NumElts = NumElts / 2;
+    VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+  }
+
+  if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
+    // Operation doesn't trap so just widen as normal.
+    SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+    SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+    return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
+  }
+
+  // No legal vector version so unroll the vector operation and then widen.
+  if (NumElts == 1)
+    return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+
+  // Since the operation can trap, apply operation on the original vector.
+  EVT MaxVT = VT;
+  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+  unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
+
+  SmallVector<SDValue, 16> ConcatOps(CurNumElts);
+  unsigned ConcatEnd = 0;  // Current ConcatOps index.
+  int Idx = 0;        // Current Idx into input vectors.
+
+  // NumElts := greatest legal vector size (at most WidenVT)
+  // while (orig. vector has unhandled elements) {
+  //   take munches of size NumElts from the beginning and add to ConcatOps
+  //   NumElts := next smaller supported vector size or 1
+  // }
+  while (CurNumElts != 0) {
+    while (CurNumElts >= NumElts) {
+      SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
+                                 DAG.getIntPtrConstant(Idx));
+      SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2,
+                                 DAG.getIntPtrConstant(Idx));
+      ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2);
+      Idx += NumElts;
+      CurNumElts -= NumElts;
+    }
+    do {
+      NumElts = NumElts / 2;
+      VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
+    } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+
+    if (NumElts == 1) {
+      for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
+        SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+                                   InOp1, DAG.getIntPtrConstant(Idx));
+        SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+                                   InOp2, DAG.getIntPtrConstant(Idx));
+        ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
+                                             EOp1, EOp2);
+      }
+      CurNumElts = 0;
+    }
+  }
+
+  // Check to see if we have a single operation with the widen type.
+  if (ConcatEnd == 1) {
+    VT = ConcatOps[0].getValueType();
+    if (VT == WidenVT)
+      return ConcatOps[0];
+  }
+
+  // while (Some element of ConcatOps is not of type MaxVT) {
+  //   From the end of ConcatOps, collect elements of the same type and put
+  //   them into an op of the next larger supported type
+  // }
+  while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+    Idx = ConcatEnd - 1;
+    VT = ConcatOps[Idx--].getValueType();
+    while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+      Idx--;
+
+    int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+    EVT NextVT;
+    do {
+      NextSize *= 2;
+      NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+    } while (!TLI.isTypeLegal(NextVT));
+
+    if (!VT.isVector()) {
+      // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+      SDValue VecOp = DAG.getUNDEF(NextVT);
+      unsigned NumToInsert = ConcatEnd - Idx - 1;
+      for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+        VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
+                            ConcatOps[OpIdx], DAG.getIntPtrConstant(i));
+      }
+      ConcatOps[Idx+1] = VecOp;
+      ConcatEnd = Idx + 2;
+    } else {
+      // Vector type, create a CONCAT_VECTORS of type NextVT
+      SDValue undefVec = DAG.getUNDEF(VT);
+      unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+      SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+      unsigned RealVals = ConcatEnd - Idx - 1;
+      unsigned SubConcatEnd = 0;
+      unsigned SubConcatIdx = Idx + 1;
+      while (SubConcatEnd < RealVals)
+        SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+      while (SubConcatEnd < OpsToConcat)
+        SubConcatOps[SubConcatEnd++] = undefVec;
+      ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+                                            NextVT, &SubConcatOps[0],
+                                            OpsToConcat);
+      ConcatEnd = SubConcatIdx + 1;
+    }
+  }
+
+  // Check to see if we have a single operation with the widen type.
+  if (ConcatEnd == 1) {
+    VT = ConcatOps[0].getValueType();
+    if (VT == WidenVT)
+      return ConcatOps[0];
+  }
+
+  // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+  unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
+  if (NumOps != ConcatEnd ) {
+    SDValue UndefVal = DAG.getUNDEF(MaxVT);
+    for (unsigned j = ConcatEnd; j < NumOps; ++j)
+      ConcatOps[j] = UndefVal;
+  }
+  return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0], NumOps);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
+  SDValue InOp = N->getOperand(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  EVT InVT = InOp.getValueType();
+  EVT InEltVT = InVT.getVectorElementType();
+  EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+  unsigned Opcode = N->getOpcode();
+  unsigned InVTNumElts = InVT.getVectorNumElements();
+
+  if (getTypeAction(InVT) == WidenVector) {
+    InOp = GetWidenedVector(N->getOperand(0));
+    InVT = InOp.getValueType();
+    InVTNumElts = InVT.getVectorNumElements();
+    if (InVTNumElts == WidenNumElts) {
+      if (N->getNumOperands() == 1)
+        return DAG.getNode(Opcode, DL, WidenVT, InOp);
+      return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+    }
+  }
+
+  if (TLI.isTypeLegal(InWidenVT)) {
+    // Because the result and the input are different vector types, widening
+    // the result could create a legal type but widening the input might make
+    // it an illegal type that might lead to repeatedly splitting the input
+    // and then widening it. To avoid this, we widen the input only if
+    // it results in a legal type.
+    if (WidenNumElts % InVTNumElts == 0) {
+      // Widen the input and call convert on the widened input vector.
+      unsigned NumConcat = WidenNumElts/InVTNumElts;
+      SmallVector<SDValue, 16> Ops(NumConcat);
+      Ops[0] = InOp;
+      SDValue UndefVal = DAG.getUNDEF(InVT);
+      for (unsigned i = 1; i != NumConcat; ++i)
+        Ops[i] = UndefVal;
+      SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT,
+                                  &Ops[0], NumConcat);
+      if (N->getNumOperands() == 1)
+        return DAG.getNode(Opcode, DL, WidenVT, InVec);
+      return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
+    }
+
+    if (InVTNumElts % WidenNumElts == 0) {
+      SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT,
+                                  InOp, DAG.getIntPtrConstant(0));
+      // Extract the input and convert the shorten input vector.
+      if (N->getNumOperands() == 1)
+        return DAG.getNode(Opcode, DL, WidenVT, InVal);
+      return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
+    }
+  }
+
+  // Otherwise unroll into some nasty scalar code and rebuild the vector.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  EVT EltVT = WidenVT.getVectorElementType();
+  unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+  unsigned i;
+  for (i=0; i < MinElts; ++i) {
+    SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+                              DAG.getIntPtrConstant(i));
+    if (N->getNumOperands() == 1)
+      Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
+    else
+      Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+  }
+
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for (; i < WidenNumElts; ++i)
+    Ops[i] = UndefVal;
+
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  SDValue ShOp = N->getOperand(1);
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  SDValue ShOp = N->getOperand(1);
+
+  EVT ShVT = ShOp.getValueType();
+  if (getTypeAction(ShVT) == WidenVector) {
+    ShOp = GetWidenedVector(ShOp);
+    ShVT = ShOp.getValueType();
+  }
+  EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(),
+                                   ShVT.getVectorElementType(),
+                                   WidenVT.getVectorNumElements());
+  if (ShVT != ShWidenVT)
+    ShOp = ModifyToType(ShOp, ShWidenVT);
+
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
+  // Unary op widening.
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
+                               cast<VTSDNode>(N->getOperand(1))->getVT()
+                                 .getVectorElementType(),
+                               WidenVT.getVectorNumElements());
+  SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
+                     WidenVT, WidenLHS, DAG.getValueType(ExtVT));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
+  SDValue InOp = N->getOperand(0);
+  EVT InVT = InOp.getValueType();
+  EVT VT = N->getValueType(0);
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  DebugLoc dl = N->getDebugLoc();
+
+  switch (getTypeAction(InVT)) {
+  default:
+    assert(false && "Unknown type action!");
+    break;
+  case Legal:
+    break;
+  case PromoteInteger:
+    // If the InOp is promoted to the same size, convert it.  Otherwise,
+    // fall out of the switch and widen the promoted input.
+    InOp = GetPromotedInteger(InOp);
+    InVT = InOp.getValueType();
+    if (WidenVT.bitsEq(InVT))
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+    break;
+  case SoftenFloat:
+  case ExpandInteger:
+  case ExpandFloat:
+  case ScalarizeVector:
+  case SplitVector:
+    break;
+  case WidenVector:
+    // If the InOp is widened to the same size, convert it.  Otherwise, fall
+    // out of the switch and widen the widened input.
+    InOp = GetWidenedVector(InOp);
+    InVT = InOp.getValueType();
+    if (WidenVT.bitsEq(InVT))
+      // The input widens to the same size. Convert to the widen value.
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
+    break;
+  }
+
+  unsigned WidenSize = WidenVT.getSizeInBits();
+  unsigned InSize = InVT.getSizeInBits();
+  // x86mmx is not an acceptable vector element type, so don't try.
+  if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
+    // Determine new input vector type.  The new input vector type will use
+    // the same element type (if its a vector) or use the input type as a
+    // vector.  It is the same size as the type to widen to.
+    EVT NewInVT;
+    unsigned NewNumElts = WidenSize / InSize;
+    if (InVT.isVector()) {
+      EVT InEltVT = InVT.getVectorElementType();
+      NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
+                                 WidenSize / InEltVT.getSizeInBits());
+    } else {
+      NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
+    }
+
+    if (TLI.isTypeLegal(NewInVT)) {
+      // Because the result and the input are different vector types, widening
+      // the result could create a legal type but widening the input might make
+      // it an illegal type that might lead to repeatedly splitting the input
+      // and then widening it. To avoid this, we widen the input only if
+      // it results in a legal type.
+      SmallVector<SDValue, 16> Ops(NewNumElts);
+      SDValue UndefVal = DAG.getUNDEF(InVT);
+      Ops[0] = InOp;
+      for (unsigned i = 1; i < NewNumElts; ++i)
+        Ops[i] = UndefVal;
+
+      SDValue NewVec;
+      if (InVT.isVector())
+        NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+                             NewInVT, &Ops[0], NewNumElts);
+      else
+        NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
+                             NewInVT, &Ops[0], NewNumElts);
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
+    }
+  }
+
+  return CreateStackStoreLoad(InOp, WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  // Build a vector with undefined for the new nodes.
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end());
+  NewOps.reserve(WidenNumElts);
+  for (unsigned i = NumElts; i < WidenNumElts; ++i)
+    NewOps.push_back(DAG.getUNDEF(EltVT));
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size());
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
+  EVT InVT = N->getOperand(0).getValueType();
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  DebugLoc dl = N->getDebugLoc();
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+  unsigned NumOperands = N->getNumOperands();
+
+  bool InputWidened = false; // Indicates we need to widen the input.
+  if (getTypeAction(InVT) != WidenVector) {
+    if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) {
+      // Add undef vectors to widen to correct length.
+      unsigned NumConcat = WidenVT.getVectorNumElements() /
+                           InVT.getVectorNumElements();
+      SDValue UndefVal = DAG.getUNDEF(InVT);
+      SmallVector<SDValue, 16> Ops(NumConcat);
+      for (unsigned i=0; i < NumOperands; ++i)
+        Ops[i] = N->getOperand(i);
+      for (unsigned i = NumOperands; i != NumConcat; ++i)
+        Ops[i] = UndefVal;
+      return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &Ops[0], NumConcat);
+    }
+  } else {
+    InputWidened = true;
+    if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
+      // The inputs and the result are widen to the same value.
+      unsigned i;
+      for (i=1; i < NumOperands; ++i)
+        if (N->getOperand(i).getOpcode() != ISD::UNDEF)
+          break;
+
+      if (i > NumOperands)
+        // Everything but the first operand is an UNDEF so just return the
+        // widened first operand.
+        return GetWidenedVector(N->getOperand(0));
+
+      if (NumOperands == 2) {
+        // Replace concat of two operands with a shuffle.
+        SmallVector<int, 16> MaskOps(WidenNumElts);
+        for (unsigned i=0; i < WidenNumElts/2; ++i) {
+          MaskOps[i] = i;
+          MaskOps[i+WidenNumElts/2] = i+WidenNumElts;
+        }
+        return DAG.getVectorShuffle(WidenVT, dl,
+                                    GetWidenedVector(N->getOperand(0)),
+                                    GetWidenedVector(N->getOperand(1)),
+                                    &MaskOps[0]);
+      }
+    }
+  }
+
+  // Fall back to use extracts and build vector.
+  EVT EltVT = WidenVT.getVectorElementType();
+  unsigned NumInElts = InVT.getVectorNumElements();
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  unsigned Idx = 0;
+  for (unsigned i=0; i < NumOperands; ++i) {
+    SDValue InOp = N->getOperand(i);
+    if (InputWidened)
+      InOp = GetWidenedVector(InOp);
+    for (unsigned j=0; j < NumInElts; ++j)
+        Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+                                 DAG.getIntPtrConstant(j));
+  }
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for (; Idx < WidenNumElts; ++Idx)
+    Ops[Idx] = UndefVal;
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  SDValue InOp  = N->getOperand(0);
+  SDValue RndOp = N->getOperand(3);
+  SDValue SatOp = N->getOperand(4);
+
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  EVT InVT = InOp.getValueType();
+  EVT InEltVT = InVT.getVectorElementType();
+  EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts);
+
+  SDValue DTyOp = DAG.getValueType(WidenVT);
+  SDValue STyOp = DAG.getValueType(InWidenVT);
+  ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode();
+
+  unsigned InVTNumElts = InVT.getVectorNumElements();
+  if (getTypeAction(InVT) == WidenVector) {
+    InOp = GetWidenedVector(InOp);
+    InVT = InOp.getValueType();
+    InVTNumElts = InVT.getVectorNumElements();
+    if (InVTNumElts == WidenNumElts)
+      return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+                                  SatOp, CvtCode);
+  }
+
+  if (TLI.isTypeLegal(InWidenVT)) {
+    // Because the result and the input are different vector types, widening
+    // the result could create a legal type but widening the input might make
+    // it an illegal type that might lead to repeatedly splitting the input
+    // and then widening it. To avoid this, we widen the input only if
+    // it results in a legal type.
+    if (WidenNumElts % InVTNumElts == 0) {
+      // Widen the input and call convert on the widened input vector.
+      unsigned NumConcat = WidenNumElts/InVTNumElts;
+      SmallVector<SDValue, 16> Ops(NumConcat);
+      Ops[0] = InOp;
+      SDValue UndefVal = DAG.getUNDEF(InVT);
+      for (unsigned i = 1; i != NumConcat; ++i)
+        Ops[i] = UndefVal;
+
+      InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, &Ops[0],NumConcat);
+      return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+                                  SatOp, CvtCode);
+    }
+
+    if (InVTNumElts % WidenNumElts == 0) {
+      // Extract the input and convert the shorten input vector.
+      InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp,
+                         DAG.getIntPtrConstant(0));
+      return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp,
+                                SatOp, CvtCode);
+    }
+  }
+
+  // Otherwise unroll into some nasty scalar code and rebuild the vector.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  EVT EltVT = WidenVT.getVectorElementType();
+  DTyOp = DAG.getValueType(EltVT);
+  STyOp = DAG.getValueType(InEltVT);
+
+  unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
+  unsigned i;
+  for (i=0; i < MinElts; ++i) {
+    SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+                                 DAG.getIntPtrConstant(i));
+    Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp,
+                                        SatOp, CvtCode);
+  }
+
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for (; i < WidenNumElts; ++i)
+    Ops[i] = UndefVal;
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
+  EVT      VT = N->getValueType(0);
+  EVT      WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+  SDValue  InOp = N->getOperand(0);
+  SDValue  Idx  = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+
+  if (getTypeAction(InOp.getValueType()) == WidenVector)
+    InOp = GetWidenedVector(InOp);
+
+  EVT InVT = InOp.getValueType();
+
+  // Check if we can just return the input vector after widening.
+  uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+  if (IdxVal == 0 && InVT == WidenVT)
+    return InOp;
+
+  // Check if we can extract from the vector.
+  unsigned InNumElts = InVT.getVectorNumElements();
+  if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
+
+  // We could try widening the input to the right length but for now, extract
+  // the original elements, fill the rest with undefs and build a vector.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  EVT EltVT = VT.getVectorElementType();
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned i;
+  for (i=0; i < NumElts; ++i)
+    Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+                         DAG.getIntPtrConstant(IdxVal+i));
+
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for (; i < WidenNumElts; ++i)
+    Ops[i] = UndefVal;
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(),
+                     InOp.getValueType(), InOp,
+                     N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+
+  SDValue Result;
+  SmallVector<SDValue, 16> LdChain;  // Chain for the series of load
+  if (ExtType != ISD::NON_EXTLOAD)
+    Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+  else
+    Result = GenWidenVectorLoads(LdChain, LD);
+
+  // If we generate a single load, we can use that for the chain.  Otherwise,
+  // build a factor node to remember the multiple loads are independent and
+  // chain to that.
+  SDValue NewChain;
+  if (LdChain.size() == 1)
+    NewChain = LdChain[0];
+  else
+    NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other,
+                           &LdChain[0], LdChain.size());
+
+  // Modified the chain - switch anything that used the old chain to use
+  // the new one.
+  ReplaceValueWith(SDValue(N, 1), NewChain);
+
+  return Result;
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(),
+                     WidenVT, N->getOperand(0));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  SDValue Cond1 = N->getOperand(0);
+  EVT CondVT = Cond1.getValueType();
+  if (CondVT.isVector()) {
+    EVT CondEltVT = CondVT.getVectorElementType();
+    EVT CondWidenVT =  EVT::getVectorVT(*DAG.getContext(),
+                                        CondEltVT, WidenNumElts);
+    if (getTypeAction(CondVT) == WidenVector)
+      Cond1 = GetWidenedVector(Cond1);
+
+    if (Cond1.getValueType() != CondWidenVT)
+       Cond1 = ModifyToType(Cond1, CondWidenVT);
+  }
+
+  SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(2));
+  assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
+  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+                     WidenVT, Cond1, InOp1, InOp2);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
+  SDValue InOp1 = GetWidenedVector(N->getOperand(2));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(3));
+  return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(),
+                     InOp1.getValueType(), N->getOperand(0),
+                     N->getOperand(1), InOp1, InOp2, N->getOperand(4));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+  return DAG.getNode(ISD::SETCC, N->getDebugLoc(), WidenVT,
+                     InOp1, InOp2, N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getUNDEF(WidenVT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+  // Adjust mask based on new input vector length.
+  SmallVector<int, 16> NewMask;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    int Idx = N->getMaskElt(i);
+    if (Idx < (int)NumElts)
+      NewMask.push_back(Idx);
+    else
+      NewMask.push_back(Idx - NumElts + WidenNumElts);
+  }
+  for (unsigned i = NumElts; i != WidenNumElts; ++i)
+    NewMask.push_back(-1);
+  return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]);
+}
+
+SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+
+  SDValue InOp1 = N->getOperand(0);
+  EVT InVT = InOp1.getValueType();
+  assert(InVT.isVector() && "can not widen non vector type");
+  EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(),
+                                   InVT.getVectorElementType(), WidenNumElts);
+  InOp1 = GetWidenedVector(InOp1);
+  SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+
+  // Assume that the input and output will be widen appropriately.  If not,
+  // we will have to unroll it at some point.
+  assert(InOp1.getValueType() == WidenInVT &&
+         InOp2.getValueType() == WidenInVT &&
+         "Input not widened to expected type!");
+  return DAG.getNode(ISD::VSETCC, N->getDebugLoc(),
+                     WidenVT, InOp1, InOp2, N->getOperand(2));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Widen Vector Operand
+//===----------------------------------------------------------------------===//
+bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
+  DEBUG(dbgs() << "Widen node operand " << ResNo << ": ";
+        N->dump(&DAG);
+        dbgs() << "\n");
+  SDValue Res = SDValue();
+
+  switch (N->getOpcode()) {
+  default:
+#ifndef NDEBUG
+    dbgs() << "WidenVectorOperand op #" << ResNo << ": ";
+    N->dump(&DAG);
+    dbgs() << "\n";
+#endif
+    llvm_unreachable("Do not know how to widen this operator's operand!");
+
+  case ISD::BITCAST:            Res = WidenVecOp_BITCAST(N); break;
+  case ISD::CONCAT_VECTORS:     Res = WidenVecOp_CONCAT_VECTORS(N); break;
+  case ISD::EXTRACT_SUBVECTOR:  Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
+  case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
+  case ISD::STORE:              Res = WidenVecOp_STORE(N); break;
+
+  case ISD::FP_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+  case ISD::TRUNCATE:
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    Res = WidenVecOp_Convert(N);
+    break;
+  }
+
+  // If Res is null, the sub-method took care of registering the result.
+  if (!Res.getNode()) return false;
+
+  // If the result is N, the sub-method updated N in place.  Tell the legalizer
+  // core about this.
+  if (Res.getNode() == N)
+    return true;
+
+
+  assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+         "Invalid operand expansion");
+
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return false;
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
+  // Since the result is legal and the input is illegal, it is unlikely
+  // that we can fix the input to a legal type so unroll the convert
+  // into some scalar code and create a nasty build vector.
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = N->getDebugLoc();
+  unsigned NumElts = VT.getVectorNumElements();
+  SDValue InOp = N->getOperand(0);
+  if (getTypeAction(InOp.getValueType()) == WidenVector)
+    InOp = GetWidenedVector(InOp);
+  EVT InVT = InOp.getValueType();
+  EVT InEltVT = InVT.getVectorElementType();
+
+  unsigned Opcode = N->getOpcode();
+  SmallVector<SDValue, 16> Ops(NumElts);
+  for (unsigned i=0; i < NumElts; ++i)
+    Ops[i] = DAG.getNode(Opcode, dl, EltVT,
+                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+                                     DAG.getIntPtrConstant(i)));
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  EVT InWidenVT = InOp.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Check if we can convert between two legal vector types and extract.
+  unsigned InWidenSize = InWidenVT.getSizeInBits();
+  unsigned Size = VT.getSizeInBits();
+  // x86mmx is not an acceptable vector element type, so don't try.
+  if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
+    unsigned NewNumElts = InWidenSize / Size;
+    EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
+    if (TLI.isTypeLegal(NewVT)) {
+      SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
+                         DAG.getIntPtrConstant(0));
+    }
+  }
+
+  return CreateStackStoreLoad(InOp, VT);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
+  // If the input vector is not legal, it is likely that we will not find a
+  // legal vector of the same size. Replace the concatenate vector with a
+  // nasty build vector.
+  EVT VT = N->getValueType(0);
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = N->getDebugLoc();
+  unsigned NumElts = VT.getVectorNumElements();
+  SmallVector<SDValue, 16> Ops(NumElts);
+
+  EVT InVT = N->getOperand(0).getValueType();
+  unsigned NumInElts = InVT.getVectorNumElements();
+
+  unsigned Idx = 0;
+  unsigned NumOperands = N->getNumOperands();
+  for (unsigned i=0; i < NumOperands; ++i) {
+    SDValue InOp = N->getOperand(i);
+    if (getTypeAction(InOp.getValueType()) == WidenVector)
+      InOp = GetWidenedVector(InOp);
+    for (unsigned j=0; j < NumInElts; ++j)
+      Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+                               DAG.getIntPtrConstant(j));
+  }
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(),
+                     N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
+  SDValue InOp = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(),
+                     N->getValueType(0), InOp, N->getOperand(1));
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
+  // We have to widen the value but we want only to store the original
+  // vector type.
+  StoreSDNode *ST = cast<StoreSDNode>(N);
+
+  SmallVector<SDValue, 16> StChain;
+  if (ST->isTruncatingStore())
+    GenWidenVectorTruncStores(StChain, ST);
+  else
+    GenWidenVectorStores(StChain, ST);
+
+  if (StChain.size() == 1)
+    return StChain[0];
+  else
+    return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(),
+                       MVT::Other,&StChain[0],StChain.size());
+}
+
+//===----------------------------------------------------------------------===//
+// Vector Widening Utilities
+//===----------------------------------------------------------------------===//
+
+// Utility function to find the type to chop up a widen vector for load/store
+//  TLI:       Target lowering used to determine legal types.
+//  Width:     Width left need to load/store.
+//  WidenVT:   The widen vector type to load to/store from
+//  Align:     If 0, don't allow use of a wider type
+//  WidenEx:   If Align is not 0, the amount additional we can load/store from.
+
+static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
+                       unsigned Width, EVT WidenVT,
+                       unsigned Align = 0, unsigned WidenEx = 0) {
+  EVT WidenEltVT = WidenVT.getVectorElementType();
+  unsigned WidenWidth = WidenVT.getSizeInBits();
+  unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
+  unsigned AlignInBits = Align*8;
+
+  // If we have one element to load/store, return it.
+  EVT RetVT = WidenEltVT;
+  if (Width == WidenEltWidth)
+    return RetVT;
+
+  // See if there is larger legal integer than the element type to load/store
+  unsigned VT;
+  for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
+       VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
+    EVT MemVT((MVT::SimpleValueType) VT);
+    unsigned MemVTWidth = MemVT.getSizeInBits();
+    if (MemVT.getSizeInBits() <= WidenEltWidth)
+      break;
+    if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+        (MemVTWidth <= Width ||
+         (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+      RetVT = MemVT;
+      break;
+    }
+  }
+
+  // See if there is a larger vector type to load/store that has the same vector
+  // element type and is evenly divisible with the WidenVT.
+  for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+       VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
+    EVT MemVT = (MVT::SimpleValueType) VT;
+    unsigned MemVTWidth = MemVT.getSizeInBits();
+    if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+        (WidenWidth % MemVTWidth) == 0 &&
+        (MemVTWidth <= Width ||
+         (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+      if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
+        return MemVT;
+    }
+  }
+
+  return RetVT;
+}
+
+// Builds a vector type from scalar loads
+//  VecTy: Resulting Vector type
+//  LDOps: Load operators to build a vector type
+//  [Start,End) the list of loads to use.
+static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
+                                     SmallVector<SDValue, 16>& LdOps,
+                                     unsigned Start, unsigned End) {
+  DebugLoc dl = LdOps[Start].getDebugLoc();
+  EVT LdTy = LdOps[Start].getValueType();
+  unsigned Width = VecTy.getSizeInBits();
+  unsigned NumElts = Width / LdTy.getSizeInBits();
+  EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
+
+  unsigned Idx = 1;
+  SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
+
+  for (unsigned i = Start + 1; i != End; ++i) {
+    EVT NewLdTy = LdOps[i].getValueType();
+    if (NewLdTy != LdTy) {
+      NumElts = Width / NewLdTy.getSizeInBits();
+      NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
+      VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
+      // Readjust position and vector position based on new load type
+      Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
+      LdTy = NewLdTy;
+    }
+    VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
+                        DAG.getIntPtrConstant(Idx++));
+  }
+  return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
+}
+
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
+                                              LoadSDNode *LD) {
+  // The strategy assumes that we can efficiently load powers of two widths.
+  // The routines chops the vector into the largest vector loads with the same
+  // element type or scalar loads and then recombines it to the widen vector
+  // type.
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+  unsigned WidenWidth = WidenVT.getSizeInBits();
+  EVT LdVT    = LD->getMemoryVT();
+  DebugLoc dl = LD->getDebugLoc();
+  assert(LdVT.isVector() && WidenVT.isVector());
+  assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
+
+  // Load information
+  SDValue   Chain = LD->getChain();
+  SDValue   BasePtr = LD->getBasePtr();
+  unsigned  Align    = LD->getAlignment();
+  bool      isVolatile = LD->isVolatile();
+  bool      isNonTemporal = LD->isNonTemporal();
+
+  int LdWidth = LdVT.getSizeInBits();
+  int WidthDiff = WidenWidth - LdWidth;          // Difference
+  unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads
+
+  // Find the vector type that can load from.
+  EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+  int NewVTWidth = NewVT.getSizeInBits();
+  SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
+                             isVolatile, isNonTemporal, Align);
+  LdChain.push_back(LdOp.getValue(1));
+
+  // Check if we can load the element with one instruction
+  if (LdWidth <= NewVTWidth) {
+    if (!NewVT.isVector()) {
+      unsigned NumElts = WidenWidth / NewVTWidth;
+      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+      SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
+      return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
+    }
+    if (NewVT == WidenVT)
+      return LdOp;
+
+    assert(WidenWidth % NewVTWidth == 0);
+    unsigned NumConcat = WidenWidth / NewVTWidth;
+    SmallVector<SDValue, 16> ConcatOps(NumConcat);
+    SDValue UndefVal = DAG.getUNDEF(NewVT);
+    ConcatOps[0] = LdOp;
+    for (unsigned i = 1; i != NumConcat; ++i)
+      ConcatOps[i] = UndefVal;
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &ConcatOps[0],
+                       NumConcat);
+  }
+
+  // Load vector by using multiple loads from largest vector to scalar
+  SmallVector<SDValue, 16> LdOps;
+  LdOps.push_back(LdOp);
+
+  LdWidth -= NewVTWidth;
+  unsigned Offset = 0;
+
+  while (LdWidth > 0) {
+    unsigned Increment = NewVTWidth / 8;
+    Offset += Increment;
+    BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+                          DAG.getIntPtrConstant(Increment));
+
+    if (LdWidth < NewVTWidth) {
+      // Our current type we are using is too large, find a better size
+      NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
+      NewVTWidth = NewVT.getSizeInBits();
+    }
+
+    SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+                               LD->getPointerInfo().getWithOffset(Offset),
+                               isVolatile,
+                               isNonTemporal, MinAlign(Align, Increment));
+    LdChain.push_back(LdOp.getValue(1));
+    LdOps.push_back(LdOp);
+
+    LdWidth -= NewVTWidth;
+  }
+
+  // Build the vector from the loads operations
+  unsigned End = LdOps.size();
+  if (!LdOps[0].getValueType().isVector())
+    // All the loads are scalar loads.
+    return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
+
+  // If the load contains vectors, build the vector using concat vector.
+  // All of the vectors used to loads are power of 2 and the scalars load
+  // can be combined to make a power of 2 vector.
+  SmallVector<SDValue, 16> ConcatOps(End);
+  int i = End - 1;
+  int Idx = End;
+  EVT LdTy = LdOps[i].getValueType();
+  // First combine the scalar loads to a vector
+  if (!LdTy.isVector())  {
+    for (--i; i >= 0; --i) {
+      LdTy = LdOps[i].getValueType();
+      if (LdTy.isVector())
+        break;
+    }
+    ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End);
+  }
+  ConcatOps[--Idx] = LdOps[i];
+  for (--i; i >= 0; --i) {
+    EVT NewLdTy = LdOps[i].getValueType();
+    if (NewLdTy != LdTy) {
+      // Create a larger vector
+      ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
+                                     &ConcatOps[Idx], End - Idx);
+      Idx = End - 1;
+      LdTy = NewLdTy;
+    }
+    ConcatOps[--Idx] = LdOps[i];
+  }
+
+  if (WidenWidth == LdTy.getSizeInBits()*(End - Idx))
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
+                       &ConcatOps[Idx], End - Idx);
+
+  // We need to fill the rest with undefs to build the vector
+  unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
+  SmallVector<SDValue, 16> WidenOps(NumOps);
+  SDValue UndefVal = DAG.getUNDEF(LdTy);
+  {
+    unsigned i = 0;
+    for (; i != End-Idx; ++i)
+      WidenOps[i] = ConcatOps[Idx+i];
+    for (; i != NumOps; ++i)
+      WidenOps[i] = UndefVal;
+  }
+  return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, &WidenOps[0],NumOps);
+}
+
+SDValue
+DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
+                                         LoadSDNode * LD,
+                                         ISD::LoadExtType ExtType) {
+  // For extension loads, it may not be more efficient to chop up the vector
+  // and then extended it.  Instead, we unroll the load and build a new vector.
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
+  EVT LdVT    = LD->getMemoryVT();
+  DebugLoc dl = LD->getDebugLoc();
+  assert(LdVT.isVector() && WidenVT.isVector());
+
+  // Load information
+  SDValue   Chain = LD->getChain();
+  SDValue   BasePtr = LD->getBasePtr();
+  unsigned  Align    = LD->getAlignment();
+  bool      isVolatile = LD->isVolatile();
+  bool      isNonTemporal = LD->isNonTemporal();
+
+  EVT EltVT = WidenVT.getVectorElementType();
+  EVT LdEltVT = LdVT.getVectorElementType();
+  unsigned NumElts = LdVT.getVectorNumElements();
+
+  // Load each element and widen
+  unsigned WidenNumElts = WidenVT.getVectorNumElements();
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  unsigned Increment = LdEltVT.getSizeInBits() / 8;
+  Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
+                          LD->getPointerInfo(),
+                          LdEltVT, isVolatile, isNonTemporal, Align);
+  LdChain.push_back(Ops[0].getValue(1));
+  unsigned i = 0, Offset = Increment;
+  for (i=1; i < NumElts; ++i, Offset += Increment) {
+    SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+                                     BasePtr, DAG.getIntPtrConstant(Offset));
+    Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
+                            LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
+                            isVolatile, isNonTemporal, Align);
+    LdChain.push_back(Ops[i].getValue(1));
+  }
+
+  // Fill the rest with undefs
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for (; i != WidenNumElts; ++i)
+    Ops[i] = UndefVal;
+
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], Ops.size());
+}
+
+
+void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
+                                            StoreSDNode *ST) {
+  // The strategy assumes that we can efficiently store powers of two widths.
+  // The routines chops the vector into the largest vector stores with the same
+  // element type or scalar stores.
+  SDValue  Chain = ST->getChain();
+  SDValue  BasePtr = ST->getBasePtr();
+  unsigned Align = ST->getAlignment();
+  bool     isVolatile = ST->isVolatile();
+  bool     isNonTemporal = ST->isNonTemporal();
+  SDValue  ValOp = GetWidenedVector(ST->getValue());
+  DebugLoc dl = ST->getDebugLoc();
+
+  EVT StVT = ST->getMemoryVT();
+  unsigned StWidth = StVT.getSizeInBits();
+  EVT ValVT = ValOp.getValueType();
+  unsigned ValWidth = ValVT.getSizeInBits();
+  EVT ValEltVT = ValVT.getVectorElementType();
+  unsigned ValEltWidth = ValEltVT.getSizeInBits();
+  assert(StVT.getVectorElementType() == ValEltVT);
+
+  int Idx = 0;          // current index to store
+  unsigned Offset = 0;  // offset from base to store
+  while (StWidth != 0) {
+    // Find the largest vector type we can store with
+    EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT);
+    unsigned NewVTWidth = NewVT.getSizeInBits();
+    unsigned Increment = NewVTWidth / 8;
+    if (NewVT.isVector()) {
+      unsigned NumVTElts = NewVT.getVectorNumElements();
+      do {
+        SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
+                                   DAG.getIntPtrConstant(Idx));
+        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+                                    ST->getPointerInfo().getWithOffset(Offset),
+                                       isVolatile, isNonTemporal,
+                                       MinAlign(Align, Offset)));
+        StWidth -= NewVTWidth;
+        Offset += Increment;
+        Idx += NumVTElts;
+        BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+                              DAG.getIntPtrConstant(Increment));
+      } while (StWidth != 0 && StWidth >= NewVTWidth);
+    } else {
+      // Cast the vector to the scalar type we can store
+      unsigned NumElts = ValWidth / NewVTWidth;
+      EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
+      SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
+      // Readjust index position based on new vector type
+      Idx = Idx * ValEltWidth / NewVTWidth;
+      do {
+        SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
+                      DAG.getIntPtrConstant(Idx++));
+        StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+                                    ST->getPointerInfo().getWithOffset(Offset),
+                                       isVolatile, isNonTemporal,
+                                       MinAlign(Align, Offset)));
+        StWidth -= NewVTWidth;
+        Offset += Increment;
+        BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
+                              DAG.getIntPtrConstant(Increment));
+      } while (StWidth != 0  && StWidth >= NewVTWidth);
+      // Restore index back to be relative to the original widen element type
+      Idx = Idx * NewVTWidth / ValEltWidth;
+    }
+  }
+}
+
+void
+DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
+                                            StoreSDNode *ST) {
+  // For extension loads, it may not be more efficient to truncate the vector
+  // and then store it.  Instead, we extract each element and then store it.
+  SDValue  Chain = ST->getChain();
+  SDValue  BasePtr = ST->getBasePtr();
+  unsigned Align = ST->getAlignment();
+  bool     isVolatile = ST->isVolatile();
+  bool     isNonTemporal = ST->isNonTemporal();
+  SDValue  ValOp = GetWidenedVector(ST->getValue());
+  DebugLoc dl = ST->getDebugLoc();
+
+  EVT StVT = ST->getMemoryVT();
+  EVT ValVT = ValOp.getValueType();
+
+  // It must be true that we the widen vector type is bigger than where
+  // we need to store.
+  assert(StVT.isVector() && ValOp.getValueType().isVector());
+  assert(StVT.bitsLT(ValOp.getValueType()));
+
+  // For truncating stores, we can not play the tricks of chopping legal
+  // vector types and bit cast it to the right type.  Instead, we unroll
+  // the store.
+  EVT StEltVT  = StVT.getVectorElementType();
+  EVT ValEltVT = ValVT.getVectorElementType();
+  unsigned Increment = ValEltVT.getSizeInBits() / 8;
+  unsigned NumElts = StVT.getVectorNumElements();
+  SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+                            DAG.getIntPtrConstant(0));
+  StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
+                                      ST->getPointerInfo(), StEltVT,
+                                      isVolatile, isNonTemporal, Align));
+  unsigned Offset = Increment;
+  for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
+    SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
+                                     BasePtr, DAG.getIntPtrConstant(Offset));
+    SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
+                            DAG.getIntPtrConstant(0));
+    StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
+                                      ST->getPointerInfo().getWithOffset(Offset),
+                                        StEltVT, isVolatile, isNonTemporal,
+                                        MinAlign(Align, Offset)));
+  }
+}
+
+/// Modifies a vector input (widen or narrows) to a vector of NVT.  The
+/// input vector must have the same element type as NVT.
+SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) {
+  // Note that InOp might have been widened so it might already have
+  // the right width or it might need be narrowed.
+  EVT InVT = InOp.getValueType();
+  assert(InVT.getVectorElementType() == NVT.getVectorElementType() &&
+         "input and widen element type must match");
+  DebugLoc dl = InOp.getDebugLoc();
+
+  // Check if InOp already has the right width.
+  if (InVT == NVT)
+    return InOp;
+
+  unsigned InNumElts = InVT.getVectorNumElements();
+  unsigned WidenNumElts = NVT.getVectorNumElements();
+  if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) {
+    unsigned NumConcat = WidenNumElts / InNumElts;
+    SmallVector<SDValue, 16> Ops(NumConcat);
+    SDValue UndefVal = DAG.getUNDEF(InVT);
+    Ops[0] = InOp;
+    for (unsigned i = 1; i != NumConcat; ++i)
+      Ops[i] = UndefVal;
+
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, &Ops[0], NumConcat);
+  }
+
+  if (WidenNumElts < InNumElts && InNumElts % WidenNumElts)
+    return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp,
+                       DAG.getIntPtrConstant(0));
+
+  // Fall back to extract and build.
+  SmallVector<SDValue, 16> Ops(WidenNumElts);
+  EVT EltVT = NVT.getVectorElementType();
+  unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
+  unsigned Idx;
+  for (Idx = 0; Idx < MinNumElts; ++Idx)
+    Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+                           DAG.getIntPtrConstant(Idx));
+
+  SDValue UndefVal = DAG.getUNDEF(EltVT);
+  for ( ; Idx < WidenNumElts; ++Idx)
+    Ops[Idx] = UndefVal;
+  return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], WidenNumElts);
+}
diff --git a/final/lib/CodeGen/SelectionDAG/Makefile b/final/lib/CodeGen/SelectionDAG/Makefile
new file mode 100644
index 00000000000..ea716fdaabb
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/Makefile
@@ -0,0 +1,13 @@
+##===- lib/CodeGen/SelectionDAG/Makefile -------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMSelectionDAG
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/final/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
new file mode 100644
index 00000000000..2dcb2295732
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -0,0 +1,114 @@
+//===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDDbgValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDNODEDBGVALUE_H
+#define LLVM_CODEGEN_SDNODEDBGVALUE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DebugLoc.h"
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class MDNode;
+class SDNode;
+class Value;
+
+/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
+/// We do not use SDValue here to avoid including its header.
+
+class SDDbgValue {
+public:
+  enum DbgValueKind {
+    SDNODE = 0,             // value is the result of an expression
+    CONST = 1,              // value is a constant
+    FRAMEIX = 2             // value is contents of a stack location
+  };
+private:
+  enum DbgValueKind kind;
+  union {
+    struct {
+      SDNode *Node;         // valid for expressions
+      unsigned ResNo;       // valid for expressions
+    } s;
+    const Value *Const;     // valid for constants
+    unsigned FrameIx;       // valid for stack objects
+  } u;
+  MDNode *mdPtr;
+  uint64_t Offset;
+  DebugLoc DL;
+  unsigned Order;
+  bool Invalid;
+public:
+  // Constructor for non-constants.
+  SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl,
+             unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O),
+                           Invalid(false) {
+    kind = SDNODE;
+    u.s.Node = N;
+    u.s.ResNo = R;
+  }
+
+  // Constructor for constants.
+  SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl,
+             unsigned O) : 
+    mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
+    kind = CONST;
+    u.Const = C;
+  }
+
+  // Constructor for frame indices.
+  SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) : 
+    mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
+    kind = FRAMEIX;
+    u.FrameIx = FI;
+  }
+
+  // Returns the kind.
+  DbgValueKind getKind() { return kind; }
+
+  // Returns the MDNode pointer.
+  MDNode *getMDPtr() { return mdPtr; }
+
+  // Returns the SDNode* for a register ref
+  SDNode *getSDNode() { assert (kind==SDNODE); return u.s.Node; }
+
+  // Returns the ResNo for a register ref
+  unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; }
+
+  // Returns the Value* for a constant
+  const Value *getConst() { assert (kind==CONST); return u.Const; }
+
+  // Returns the FrameIx for a stack object
+  unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; }
+
+  // Returns the offset.
+  uint64_t getOffset() { return Offset; }
+
+  // Returns the DebugLoc.
+  DebugLoc getDebugLoc() { return DL; }
+
+  // Returns the SDNodeOrder.  This is the order of the preceding node in the
+  // input.
+  unsigned getOrder() { return Order; }
+
+  // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
+  // property. A SDDbgValue is invalid if the SDNode that produces the value is
+  // deleted.
+  void setIsInvalidated() { Invalid = true; }
+  bool isInvalidated() { return Invalid; }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/final/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/final/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
new file mode 100644
index 00000000000..f88b26d5c42
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/SDNodeOrdering.h
@@ -0,0 +1,54 @@
+//===-- llvm/CodeGen/SDNodeOrdering.h - SDNode Ordering ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SDNodeOrdering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SDNODEORDERING_H
+#define LLVM_CODEGEN_SDNODEORDERING_H
+
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+class SDNode;
+
+/// SDNodeOrdering - Maps a unique (monotonically increasing) value to each
+/// SDNode that roughly corresponds to the ordering of the original LLVM
+/// instruction. This is used for turning off scheduling, because we'll forgo
+/// the normal scheduling algorithms and output the instructions according to
+/// this ordering.
+class SDNodeOrdering {
+  DenseMap<const SDNode*, unsigned> OrderMap;
+
+  void operator=(const SDNodeOrdering&);   // Do not implement.
+  SDNodeOrdering(const SDNodeOrdering&);   // Do not implement.
+public:
+  SDNodeOrdering() {}
+
+  void add(const SDNode *Node, unsigned O) {
+    OrderMap[Node] = O;
+  }
+  void remove(const SDNode *Node) {
+    DenseMap<const SDNode*, unsigned>::iterator Itr = OrderMap.find(Node);
+    if (Itr != OrderMap.end())
+      OrderMap.erase(Itr);
+  }
+  void clear() {
+    OrderMap.clear();
+  }
+  unsigned getOrder(const SDNode *Node) {
+    return OrderMap[Node];
+  }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/final/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/final/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
new file mode 100644
index 00000000000..e3da2084529
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -0,0 +1,636 @@
+//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a fast scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumUnfolds,    "Number of nodes unfolded");
+STATISTIC(NumDups,       "Number of duplicated nodes");
+STATISTIC(NumPRCopies,   "Number of physical copies");
+
+static RegisterScheduler
+  fastDAGScheduler("fast", "Fast suboptimal list scheduling",
+                   createFastDAGScheduler);
+
+namespace {
+  /// FastPriorityQueue - A degenerate priority queue that considers
+  /// all nodes to have the same priority.
+  ///
+  struct FastPriorityQueue {
+    SmallVector<SUnit *, 16> Queue;
+
+    bool empty() const { return Queue.empty(); }
+    
+    void push(SUnit *U) {
+      Queue.push_back(U);
+    }
+
+    SUnit *pop() {
+      if (empty()) return NULL;
+      SUnit *V = Queue.back();
+      Queue.pop_back();
+      return V;
+    }
+  };
+
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGFast - The actual "fast" list scheduler implementation.
+///
+class ScheduleDAGFast : public ScheduleDAGSDNodes {
+private:
+  /// AvailableQueue - The priority queue to use for the available SUnits.
+  FastPriorityQueue AvailableQueue;
+
+  /// LiveRegDefs - A set of physical registers and their definition
+  /// that are "live". These nodes must be scheduled before any other nodes that
+  /// modifies the registers can be scheduled.
+  unsigned NumLiveRegs;
+  std::vector<SUnit*> LiveRegDefs;
+  std::vector<unsigned> LiveRegCycles;
+
+public:
+  ScheduleDAGFast(MachineFunction &mf)
+    : ScheduleDAGSDNodes(mf) {}
+
+  void Schedule();
+
+  /// AddPred - adds a predecessor edge to SUnit SU.
+  /// This returns true if this is a new predecessor.
+  void AddPred(SUnit *SU, const SDep &D) {
+    SU->addPred(D);
+  }
+
+  /// RemovePred - removes a predecessor edge from SUnit SU.
+  /// This returns true if an edge was removed.
+  void RemovePred(SUnit *SU, const SDep &D) {
+    SU->removePred(D);
+  }
+
+private:
+  void ReleasePred(SUnit *SU, SDep *PredEdge);
+  void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+  void ScheduleNodeBottomUp(SUnit*, unsigned);
+  SUnit *CopyAndMoveSuccessors(SUnit*);
+  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+                                const TargetRegisterClass*,
+                                const TargetRegisterClass*,
+                                SmallVector<SUnit*, 2>&);
+  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+  void ListScheduleBottomUp();
+
+  /// ForceUnitLatencies - The fast scheduler doesn't care about real latencies.
+  bool ForceUnitLatencies() const { return true; }
+};
+}  // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGFast::Schedule() {
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
+
+  NumLiveRegs = 0;
+  LiveRegDefs.resize(TRI->getNumRegs(), NULL);  
+  LiveRegCycles.resize(TRI->getNumRegs(), 0);
+
+  // Build the scheduling graph.
+  BuildSchedGraph(NULL);
+
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+
+  // Execute the actual scheduling loop.
+  ListScheduleBottomUp();
+}
+
+//===----------------------------------------------------------------------===//
+//  Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) {
+  SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (PredSU->NumSuccsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    PredSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --PredSU->NumSuccsLeft;
+
+  // If all the node's successors are scheduled, this node is ready
+  // to be scheduled. Ignore the special EntrySU node.
+  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+    PredSU->isAvailable = true;
+    AvailableQueue.push(PredSU);
+  }
+}
+
+void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+  // Bottom up: release predecessors
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    ReleasePred(SU, &*I);
+    if (I->isAssignedRegDep()) {
+      // This is a physical register dependency and it's impossible or
+      // expensive to copy the register. Make sure nothing that can 
+      // clobber the register is scheduled between the predecessor and
+      // this node.
+      if (!LiveRegDefs[I->getReg()]) {
+        ++NumLiveRegs;
+        LiveRegDefs[I->getReg()] = I->getSUnit();
+        LiveRegCycles[I->getReg()] = CurCycle;
+      }
+    }
+  }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(SU->dump(this));
+
+  assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+  SU->setHeightToAtLeast(CurCycle);
+  Sequence.push_back(SU);
+
+  ReleasePredecessors(SU, CurCycle);
+
+  // Release all the implicit physical register defs that are live.
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isAssignedRegDep()) {
+      if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
+        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+        assert(LiveRegDefs[I->getReg()] == SU &&
+               "Physical register dependency violated?");
+        --NumLiveRegs;
+        LiveRegDefs[I->getReg()] = NULL;
+        LiveRegCycles[I->getReg()] = 0;
+      }
+    }
+  }
+
+  SU->isScheduled = true;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
+  if (SU->getNode()->getGluedNode())
+    return NULL;
+
+  SDNode *N = SU->getNode();
+  if (!N)
+    return NULL;
+
+  SUnit *NewSU;
+  bool TryUnfold = false;
+  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+    EVT VT = N->getValueType(i);
+    if (VT == MVT::Glue)
+      return NULL;
+    else if (VT == MVT::Other)
+      TryUnfold = true;
+  }
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDValue &Op = N->getOperand(i);
+    EVT VT = Op.getNode()->getValueType(Op.getResNo());
+    if (VT == MVT::Glue)
+      return NULL;
+  }
+
+  if (TryUnfold) {
+    SmallVector<SDNode*, 2> NewNodes;
+    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+      return NULL;
+
+    DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+    assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+    N = NewNodes[1];
+    SDNode *LoadNode = NewNodes[0];
+    unsigned NumVals = N->getNumValues();
+    unsigned OldNumVals = SU->getNode()->getNumValues();
+    for (unsigned i = 0; i != NumVals; ++i)
+      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+                                   SDValue(LoadNode, 1));
+
+    SUnit *NewSU = NewSUnit(N);
+    assert(N->getNodeId() == -1 && "Node already inserted!");
+    N->setNodeId(NewSU->NodeNum);
+      
+    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
+      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+        NewSU->isTwoAddress = true;
+        break;
+      }
+    }
+    if (TID.isCommutable())
+      NewSU->isCommutable = true;
+
+    // LoadNode may already exist. This can happen when there is another
+    // load from the same location and producing the same type of value
+    // but it has different alignment or volatileness.
+    bool isNewLoad = true;
+    SUnit *LoadSU;
+    if (LoadNode->getNodeId() != -1) {
+      LoadSU = &SUnits[LoadNode->getNodeId()];
+      isNewLoad = false;
+    } else {
+      LoadSU = NewSUnit(LoadNode);
+      LoadNode->setNodeId(LoadSU->NodeNum);
+    }
+
+    SDep ChainPred;
+    SmallVector<SDep, 4> ChainSuccs;
+    SmallVector<SDep, 4> LoadPreds;
+    SmallVector<SDep, 4> NodePreds;
+    SmallVector<SDep, 4> NodeSuccs;
+    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I) {
+      if (I->isCtrl())
+        ChainPred = *I;
+      else if (I->getSUnit()->getNode() &&
+               I->getSUnit()->getNode()->isOperandOf(LoadNode))
+        LoadPreds.push_back(*I);
+      else
+        NodePreds.push_back(*I);
+    }
+    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+         I != E; ++I) {
+      if (I->isCtrl())
+        ChainSuccs.push_back(*I);
+      else
+        NodeSuccs.push_back(*I);
+    }
+
+    if (ChainPred.getSUnit()) {
+      RemovePred(SU, ChainPred);
+      if (isNewLoad)
+        AddPred(LoadSU, ChainPred);
+    }
+    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+      const SDep &Pred = LoadPreds[i];
+      RemovePred(SU, Pred);
+      if (isNewLoad) {
+        AddPred(LoadSU, Pred);
+      }
+    }
+    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+      const SDep &Pred = NodePreds[i];
+      RemovePred(SU, Pred);
+      AddPred(NewSU, Pred);
+    }
+    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+      SDep D = NodeSuccs[i];
+      SUnit *SuccDep = D.getSUnit();
+      D.setSUnit(SU);
+      RemovePred(SuccDep, D);
+      D.setSUnit(NewSU);
+      AddPred(SuccDep, D);
+    }
+    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+      SDep D = ChainSuccs[i];
+      SUnit *SuccDep = D.getSUnit();
+      D.setSUnit(SU);
+      RemovePred(SuccDep, D);
+      if (isNewLoad) {
+        D.setSUnit(LoadSU);
+        AddPred(SuccDep, D);
+      }
+    } 
+    if (isNewLoad) {
+      AddPred(NewSU, SDep(LoadSU, SDep::Order, LoadSU->Latency));
+    }
+
+    ++NumUnfolds;
+
+    if (NewSU->NumSuccsLeft == 0) {
+      NewSU->isAvailable = true;
+      return NewSU;
+    }
+    SU = NewSU;
+  }
+
+  DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+  NewSU = Clone(SU);
+
+  // New SUnit has the exact same predecessors.
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I)
+    if (!I->isArtificial())
+      AddPred(NewSU, *I);
+
+  // Only copy scheduled successors. Cut them from old node's successor
+  // list and move them over.
+  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isArtificial())
+      continue;
+    SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->isScheduled) {
+      SDep D = *I;
+      D.setSUnit(NewSU);
+      AddPred(SuccSU, D);
+      D.setSUnit(SU);
+      DelDeps.push_back(std::make_pair(SuccSU, D));
+    }
+  }
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+    RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+  ++NumDups;
+  return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+                                              const TargetRegisterClass *DestRC,
+                                              const TargetRegisterClass *SrcRC,
+                                               SmallVector<SUnit*, 2> &Copies) {
+  SUnit *CopyFromSU = NewSUnit(static_cast<SDNode *>(NULL));
+  CopyFromSU->CopySrcRC = SrcRC;
+  CopyFromSU->CopyDstRC = DestRC;
+
+  SUnit *CopyToSU = NewSUnit(static_cast<SDNode *>(NULL));
+  CopyToSU->CopySrcRC = DestRC;
+  CopyToSU->CopyDstRC = SrcRC;
+
+  // Only copy scheduled successors. Cut them from old node's successor
+  // list and move them over.
+  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isArtificial())
+      continue;
+    SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->isScheduled) {
+      SDep D = *I;
+      D.setSUnit(CopyToSU);
+      AddPred(SuccSU, D);
+      DelDeps.push_back(std::make_pair(SuccSU, *I));
+    }
+  }
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+    RemovePred(DelDeps[i].first, DelDeps[i].second);
+  }
+
+  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
+  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+
+  Copies.push_back(CopyFromSU);
+  Copies.push_back(CopyToSU);
+
+  ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+                                 const TargetInstrInfo *TII) {
+  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+  unsigned NumRes = TID.getNumDefs();
+  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+    if (Reg == *ImpDef)
+      break;
+    ++NumRes;
+  }
+  return N->getValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+                               std::vector<SUnit*> &LiveRegDefs,
+                               SmallSet<unsigned, 4> &RegAdded,
+                               SmallVector<unsigned, 4> &LRegs,
+                               const TargetRegisterInfo *TRI) {
+  bool Added = false;
+  if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
+    if (RegAdded.insert(Reg)) {
+      LRegs.push_back(Reg);
+      Added = true;
+    }
+  }
+  for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+    if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
+      if (RegAdded.insert(*Alias)) {
+        LRegs.push_back(*Alias);
+        Added = true;
+      }
+    }
+  return Added;
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
+                                               SmallVector<unsigned, 4> &LRegs){
+  if (NumLiveRegs == 0)
+    return false;
+
+  SmallSet<unsigned, 4> RegAdded;
+  // If this node would clobber any "live" register, then it's not ready.
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isAssignedRegDep()) {
+      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+                         RegAdded, LRegs, TRI);
+    }
+  }
+
+  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+    if (Node->getOpcode() == ISD::INLINEASM) {
+      // Inline asm can clobber physical defs.
+      unsigned NumOps = Node->getNumOperands();
+      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+        --NumOps;  // Ignore the glue operand.
+
+      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+        unsigned Flags =
+          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+        ++i; // Skip the ID value.
+        if (InlineAsm::isRegDefKind(Flags) ||
+            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
+          // Check for def of register or earlyclobber register.
+          for (; NumVals; --NumVals, ++i) {
+            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+            if (TargetRegisterInfo::isPhysicalRegister(Reg))
+              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+          }
+        } else
+          i += NumVals;
+      }
+      continue;
+    }
+    if (!Node->isMachineOpcode())
+      continue;
+    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
+    if (!TID.ImplicitDefs)
+      continue;
+    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
+      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+    }
+  }
+  return !LRegs.empty();
+}
+
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGFast::ListScheduleBottomUp() {
+  unsigned CurCycle = 0;
+
+  // Release any predecessors of the special Exit node.
+  ReleasePredecessors(&ExitSU, CurCycle);
+
+  // Add root to Available queue.
+  if (!SUnits.empty()) {
+    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+    RootSU->isAvailable = true;
+    AvailableQueue.push(RootSU);
+  }
+
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  SmallVector<SUnit*, 4> NotReady;
+  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue.empty()) {
+    bool Delayed = false;
+    LRegsMap.clear();
+    SUnit *CurSU = AvailableQueue.pop();
+    while (CurSU) {
+      SmallVector<unsigned, 4> LRegs;
+      if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+        break;
+      Delayed = true;
+      LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
+      NotReady.push_back(CurSU);
+      CurSU = AvailableQueue.pop();
+    }
+
+    // All candidates are delayed due to live physical reg dependencies.
+    // Try code duplication or inserting cross class copies
+    // to resolve it.
+    if (Delayed && !CurSU) {
+      if (!CurSU) {
+        // Try duplicating the nodes that produces these
+        // "expensive to copy" values to break the dependency. In case even
+        // that doesn't work, insert cross class copies.
+        SUnit *TrySU = NotReady[0];
+        SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+        assert(LRegs.size() == 1 && "Can't handle this yet!");
+        unsigned Reg = LRegs[0];
+        SUnit *LRDef = LiveRegDefs[Reg];
+        EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+        const TargetRegisterClass *RC =
+          TRI->getMinimalPhysRegClass(Reg, VT);
+        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+        // If cross copy register class is null, then it must be possible copy
+        // the value directly. Do not try duplicate the def.
+        SUnit *NewDef = 0;
+        if (DestRC)
+          NewDef = CopyAndMoveSuccessors(LRDef);
+        else
+          DestRC = RC;
+        if (!NewDef) {
+          // Issue copies, these can be expensive cross register class copies.
+          SmallVector<SUnit*, 2> Copies;
+          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+          DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum
+                       << " to SU #" << Copies.front()->NodeNum << "\n");
+          AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+                              /*Reg=*/0, /*isNormalMemory=*/false,
+                              /*isMustAlias=*/false, /*isArtificial=*/true));
+          NewDef = Copies.back();
+        }
+
+        DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum
+                     << " to SU #" << TrySU->NodeNum << "\n");
+        LiveRegDefs[Reg] = NewDef;
+        AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+                             /*Reg=*/0, /*isNormalMemory=*/false,
+                             /*isMustAlias=*/false, /*isArtificial=*/true));
+        TrySU->isAvailable = false;
+        CurSU = NewDef;
+      }
+
+      if (!CurSU) {
+        llvm_unreachable("Unable to resolve live physical register dependencies!");
+      }
+    }
+
+    // Add the nodes that aren't ready back onto the available list.
+    for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
+      NotReady[i]->isPending = false;
+      // May no longer be available due to backtracking.
+      if (NotReady[i]->isAvailable)
+        AvailableQueue.push(NotReady[i]);
+    }
+    NotReady.clear();
+
+    if (CurSU)
+      ScheduleNodeBottomUp(CurSU, CurCycle);
+    ++CurCycle;
+  }
+
+  // Reverse the order since it is bottom up.
+  std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+  VerifySchedule(/*isBottomUp=*/true);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+  return new ScheduleDAGFast(*IS->MF);
+}
diff --git a/final/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/final/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
new file mode 100644
index 00000000000..430283d5eff
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -0,0 +1,265 @@
+//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a top-down list scheduler, using standard algorithms.
+// The basic approach uses a priority queue of available nodes to schedule.
+// One at a time, nodes are taken from the priority queue (thus in priority
+// order), checked for legality to schedule, and emitted if legal.
+//
+// Nodes may not be legal to schedule either due to structural hazards (e.g.
+// pipeline or resource constraints) or because an input to the instruction has
+// not completed execution.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/CodeGen/LatencyPriorityQueue.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumNoops , "Number of noops inserted");
+STATISTIC(NumStalls, "Number of pipeline stalls");
+
+static RegisterScheduler
+  tdListDAGScheduler("list-td", "Top-down list scheduler",
+                     createTDListDAGScheduler);
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGList - The actual list scheduler implementation.  This supports
+/// top-down scheduling.
+///
+class ScheduleDAGList : public ScheduleDAGSDNodes {
+private:
+  /// AvailableQueue - The priority queue to use for the available SUnits.
+  ///
+  SchedulingPriorityQueue *AvailableQueue;
+
+  /// PendingQueue - This contains all of the instructions whose operands have
+  /// been issued, but their results are not ready yet (due to the latency of
+  /// the operation).  Once the operands become available, the instruction is
+  /// added to the AvailableQueue.
+  std::vector<SUnit*> PendingQueue;
+
+  /// HazardRec - The hazard recognizer to use.
+  ScheduleHazardRecognizer *HazardRec;
+
+public:
+  ScheduleDAGList(MachineFunction &mf,
+                  SchedulingPriorityQueue *availqueue)
+    : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) {
+
+    const TargetMachine &tm = mf.getTarget();
+    HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+  }
+
+  ~ScheduleDAGList() {
+    delete HazardRec;
+    delete AvailableQueue;
+  }
+
+  void Schedule();
+
+private:
+  void ReleaseSucc(SUnit *SU, const SDep &D);
+  void ReleaseSuccessors(SUnit *SU);
+  void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
+  void ListScheduleTopDown();
+};
+}  // end anonymous namespace
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGList::Schedule() {
+  DEBUG(dbgs() << "********** List Scheduling **********\n");
+
+  // Build the scheduling graph.
+  BuildSchedGraph(NULL);
+
+  AvailableQueue->initNodes(SUnits);
+
+  ListScheduleTopDown();
+
+  AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+//  Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the PendingQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
+  SUnit *SuccSU = D.getSUnit();
+
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    SuccSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --SuccSU->NumPredsLeft;
+
+  SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
+
+  // If all the node's predecessors are scheduled, this node is ready
+  // to be scheduled. Ignore the special ExitSU node.
+  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
+    PendingQueue.push_back(SuccSU);
+}
+
+void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
+  // Top down: release successors.
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    assert(!I->isAssignedRegDep() &&
+           "The list-td scheduler doesn't yet support physreg dependencies!");
+
+    ReleaseSucc(SU, *I);
+  }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(SU->dump(this));
+
+  Sequence.push_back(SU);
+  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
+
+  ReleaseSuccessors(SU);
+  SU->isScheduled = true;
+  AvailableQueue->ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGList::ListScheduleTopDown() {
+  unsigned CurCycle = 0;
+
+  // Release any successors of the special Entry node.
+  ReleaseSuccessors(&EntrySU);
+
+  // All leaves to Available queue.
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    // It is available if it has no predecessors.
+    if (SUnits[i].Preds.empty()) {
+      AvailableQueue->push(&SUnits[i]);
+      SUnits[i].isAvailable = true;
+    }
+  }
+
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  std::vector<SUnit*> NotReady;
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue->empty() || !PendingQueue.empty()) {
+    // Check to see if any of the pending instructions are ready to issue.  If
+    // so, add them to the available queue.
+    for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+      if (PendingQueue[i]->getDepth() == CurCycle) {
+        AvailableQueue->push(PendingQueue[i]);
+        PendingQueue[i]->isAvailable = true;
+        PendingQueue[i] = PendingQueue.back();
+        PendingQueue.pop_back();
+        --i; --e;
+      } else {
+        assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
+      }
+    }
+
+    // If there are no instructions available, don't try to issue anything, and
+    // don't advance the hazard recognizer.
+    if (AvailableQueue->empty()) {
+      ++CurCycle;
+      continue;
+    }
+
+    SUnit *FoundSUnit = 0;
+
+    bool HasNoopHazards = false;
+    while (!AvailableQueue->empty()) {
+      SUnit *CurSUnit = AvailableQueue->pop();
+
+      ScheduleHazardRecognizer::HazardType HT =
+        HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
+      if (HT == ScheduleHazardRecognizer::NoHazard) {
+        FoundSUnit = CurSUnit;
+        break;
+      }
+
+      // Remember if this is a noop hazard.
+      HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
+
+      NotReady.push_back(CurSUnit);
+    }
+
+    // Add the nodes that aren't ready back onto the available list.
+    if (!NotReady.empty()) {
+      AvailableQueue->push_all(NotReady);
+      NotReady.clear();
+    }
+
+    // If we found a node to schedule, do it now.
+    if (FoundSUnit) {
+      ScheduleNodeTopDown(FoundSUnit, CurCycle);
+      HazardRec->EmitInstruction(FoundSUnit);
+
+      // If this is a pseudo-op node, we don't want to increment the current
+      // cycle.
+      if (FoundSUnit->Latency)  // Don't increment CurCycle for pseudo-ops!
+        ++CurCycle;
+    } else if (!HasNoopHazards) {
+      // Otherwise, we have a pipeline stall, but no other problem, just advance
+      // the current cycle and try again.
+      DEBUG(dbgs() << "*** Advancing cycle, no work to do\n");
+      HazardRec->AdvanceCycle();
+      ++NumStalls;
+      ++CurCycle;
+    } else {
+      // Otherwise, we have no instructions to issue and we have instructions
+      // that will fault if we don't do this right.  This is the case for
+      // processors without pipeline interlocks and other cases.
+      DEBUG(dbgs() << "*** Emitting noop\n");
+      HazardRec->EmitNoop();
+      Sequence.push_back(0);   // NULL here means noop
+      ++NumNoops;
+      ++CurCycle;
+    }
+  }
+
+#ifndef NDEBUG
+  VerifySchedule(/*isBottomUp=*/false);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+/// createTDListDAGScheduler - This creates a top-down list scheduler.
+ScheduleDAGSDNodes *
+llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+  return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue());
+}
diff --git a/final/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/final/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
new file mode 100644
index 00000000000..42d096e0490
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -0,0 +1,2715 @@
+//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements bottom-up and top-down register pressure reduction list
+// schedulers, using standard algorithms.  The basic approach uses a priority
+// queue of available nodes to schedule.  One at a time, nodes are taken from
+// the priority queue (thus in priority order), checked for legality to
+// schedule, and emitted if legal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <climits>
+using namespace llvm;
+
+STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
+STATISTIC(NumUnfolds,    "Number of nodes unfolded");
+STATISTIC(NumDups,       "Number of duplicated nodes");
+STATISTIC(NumPRCopies,   "Number of physical register copies");
+
+static RegisterScheduler
+  burrListDAGScheduler("list-burr",
+                       "Bottom-up register reduction list scheduling",
+                       createBURRListDAGScheduler);
+static RegisterScheduler
+  tdrListrDAGScheduler("list-tdrr",
+                       "Top-down register reduction list scheduling",
+                       createTDRRListDAGScheduler);
+static RegisterScheduler
+  sourceListDAGScheduler("source",
+                         "Similar to list-burr but schedules in source "
+                         "order when possible",
+                         createSourceListDAGScheduler);
+
+static RegisterScheduler
+  hybridListDAGScheduler("list-hybrid",
+                         "Bottom-up register pressure aware list scheduling "
+                         "which tries to balance latency and register pressure",
+                         createHybridListDAGScheduler);
+
+static RegisterScheduler
+  ILPListDAGScheduler("list-ilp",
+                      "Bottom-up register pressure aware list scheduling "
+                      "which tries to balance ILP and register pressure",
+                      createILPListDAGScheduler);
+
+static cl::opt<bool> DisableSchedCycles(
+  "disable-sched-cycles", cl::Hidden, cl::init(false),
+  cl::desc("Disable cycle-level precision during preRA scheduling"));
+
+// Temporary sched=list-ilp flags until the heuristics are robust.
+static cl::opt<bool> DisableSchedRegPressure(
+  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
+  cl::desc("Disable regpressure priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedLiveUses(
+  "disable-sched-live-uses", cl::Hidden, cl::init(true),
+  cl::desc("Disable live use priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedStalls(
+  "disable-sched-stalls", cl::Hidden, cl::init(true),
+  cl::desc("Disable no-stall priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedCriticalPath(
+  "disable-sched-critical-path", cl::Hidden, cl::init(false),
+  cl::desc("Disable critical path priority in sched=list-ilp"));
+static cl::opt<bool> DisableSchedHeight(
+  "disable-sched-height", cl::Hidden, cl::init(false),
+  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
+
+static cl::opt<int> MaxReorderWindow(
+  "max-sched-reorder", cl::Hidden, cl::init(6),
+  cl::desc("Number of instructions to allow ahead of the critical path "
+           "in sched=list-ilp"));
+
+static cl::opt<unsigned> AvgIPC(
+  "sched-avg-ipc", cl::Hidden, cl::init(1),
+  cl::desc("Average inst/cycle whan no target itinerary exists."));
+
+#ifndef NDEBUG
+namespace {
+  // For sched=list-ilp, Count the number of times each factor comes into play.
+  enum { FactPressureDiff, FactRegUses, FactHeight, FactDepth, FactStatic,
+         FactOther, NumFactors };
+}
+static const char *FactorName[NumFactors] =
+{"PressureDiff", "RegUses", "Height", "Depth","Static", "Other"};
+static int FactorCount[NumFactors];
+#endif //!NDEBUG
+
+namespace {
+//===----------------------------------------------------------------------===//
+/// ScheduleDAGRRList - The actual register reduction list scheduler
+/// implementation.  This supports both top-down and bottom-up scheduling.
+///
+class ScheduleDAGRRList : public ScheduleDAGSDNodes {
+private:
+  /// isBottomUp - This is true if the scheduling problem is bottom-up, false if
+  /// it is top-down.
+  bool isBottomUp;
+
+  /// NeedLatency - True if the scheduler will make use of latency information.
+  ///
+  bool NeedLatency;
+
+  /// AvailableQueue - The priority queue to use for the available SUnits.
+  SchedulingPriorityQueue *AvailableQueue;
+
+  /// PendingQueue - This contains all of the instructions whose operands have
+  /// been issued, but their results are not ready yet (due to the latency of
+  /// the operation).  Once the operands becomes available, the instruction is
+  /// added to the AvailableQueue.
+  std::vector<SUnit*> PendingQueue;
+
+  /// HazardRec - The hazard recognizer to use.
+  ScheduleHazardRecognizer *HazardRec;
+
+  /// CurCycle - The current scheduler state corresponds to this cycle.
+  unsigned CurCycle;
+
+  /// MinAvailableCycle - Cycle of the soonest available instruction.
+  unsigned MinAvailableCycle;
+
+  /// IssueCount - Count instructions issued in this cycle
+  /// Currently valid only for bottom-up scheduling.
+  unsigned IssueCount;
+
+  /// LiveRegDefs - A set of physical registers and their definition
+  /// that are "live". These nodes must be scheduled before any other nodes that
+  /// modifies the registers can be scheduled.
+  unsigned NumLiveRegs;
+  std::vector<SUnit*> LiveRegDefs;
+  std::vector<SUnit*> LiveRegGens;
+
+  /// Topo - A topological ordering for SUnits which permits fast IsReachable
+  /// and similar queries.
+  ScheduleDAGTopologicalSort Topo;
+
+public:
+  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
+                    SchedulingPriorityQueue *availqueue,
+                    CodeGenOpt::Level OptLevel)
+    : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
+      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
+      Topo(SUnits) {
+
+    const TargetMachine &tm = mf.getTarget();
+    if (DisableSchedCycles || !NeedLatency)
+      HazardRec = new ScheduleHazardRecognizer();
+    else
+      HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+  }
+
+  ~ScheduleDAGRRList() {
+    delete HazardRec;
+    delete AvailableQueue;
+  }
+
+  void Schedule();
+
+  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
+
+  /// IsReachable - Checks if SU is reachable from TargetSU.
+  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
+    return Topo.IsReachable(SU, TargetSU);
+  }
+
+  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
+  /// create a cycle.
+  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
+    return Topo.WillCreateCycle(SU, TargetSU);
+  }
+
+  /// AddPred - adds a predecessor edge to SUnit SU.
+  /// This returns true if this is a new predecessor.
+  /// Updates the topological ordering if required.
+  void AddPred(SUnit *SU, const SDep &D) {
+    Topo.AddPred(SU, D.getSUnit());
+    SU->addPred(D);
+  }
+
+  /// RemovePred - removes a predecessor edge from SUnit SU.
+  /// This returns true if an edge was removed.
+  /// Updates the topological ordering if required.
+  void RemovePred(SUnit *SU, const SDep &D) {
+    Topo.RemovePred(SU, D.getSUnit());
+    SU->removePred(D);
+  }
+
+private:
+  bool isReady(SUnit *SU) {
+    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
+      AvailableQueue->isReady(SU);
+  }
+
+  void ReleasePred(SUnit *SU, const SDep *PredEdge);
+  void ReleasePredecessors(SUnit *SU);
+  void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
+  void ReleaseSuccessors(SUnit *SU);
+  void ReleasePending();
+  void AdvanceToCycle(unsigned NextCycle);
+  void AdvancePastStalls(SUnit *SU);
+  void EmitNode(SUnit *SU);
+  void ScheduleNodeBottomUp(SUnit*);
+  void CapturePred(SDep *PredEdge);
+  void UnscheduleNodeBottomUp(SUnit*);
+  void RestoreHazardCheckerBottomUp();
+  void BacktrackBottomUp(SUnit*, SUnit*);
+  SUnit *CopyAndMoveSuccessors(SUnit*);
+  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+                                const TargetRegisterClass*,
+                                const TargetRegisterClass*,
+                                SmallVector<SUnit*, 2>&);
+  bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
+
+  SUnit *PickNodeToScheduleBottomUp();
+  void ListScheduleBottomUp();
+
+  void ScheduleNodeTopDown(SUnit*);
+  void ListScheduleTopDown();
+
+
+  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
+  /// Updates the topological ordering if required.
+  SUnit *CreateNewSUnit(SDNode *N) {
+    unsigned NumSUnits = SUnits.size();
+    SUnit *NewNode = NewSUnit(N);
+    // Update the topological ordering.
+    if (NewNode->NodeNum >= NumSUnits)
+      Topo.InitDAGTopologicalSorting();
+    return NewNode;
+  }
+
+  /// CreateClone - Creates a new SUnit from an existing one.
+  /// Updates the topological ordering if required.
+  SUnit *CreateClone(SUnit *N) {
+    unsigned NumSUnits = SUnits.size();
+    SUnit *NewNode = Clone(N);
+    // Update the topological ordering.
+    if (NewNode->NodeNum >= NumSUnits)
+      Topo.InitDAGTopologicalSorting();
+    return NewNode;
+  }
+
+  /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
+  /// need actual latency information but the hybrid scheduler does.
+  bool ForceUnitLatencies() const {
+    return !NeedLatency;
+  }
+};
+}  // end anonymous namespace
+
+
+/// Schedule - Schedule the DAG using list scheduling.
+void ScheduleDAGRRList::Schedule() {
+  DEBUG(dbgs()
+        << "********** List Scheduling BB#" << BB->getNumber()
+        << " '" << BB->getName() << "' **********\n");
+#ifndef NDEBUG
+  for (int i = 0; i < NumFactors; ++i) {
+    FactorCount[i] = 0;
+  }
+#endif //!NDEBUG
+
+  CurCycle = 0;
+  IssueCount = 0;
+  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
+  NumLiveRegs = 0;
+  LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+  LiveRegGens.resize(TRI->getNumRegs(), NULL);
+
+  // Build the scheduling graph.
+  BuildSchedGraph(NULL);
+
+  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+          SUnits[su].dumpAll(this));
+  Topo.InitDAGTopologicalSorting();
+
+  AvailableQueue->initNodes(SUnits);
+
+  HazardRec->Reset();
+
+  // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
+  if (isBottomUp)
+    ListScheduleBottomUp();
+  else
+    ListScheduleTopDown();
+
+#ifndef NDEBUG
+  for (int i = 0; i < NumFactors; ++i) {
+    DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n");
+  }
+#endif // !NDEBUG
+  AvailableQueue->releaseState();
+}
+
+//===----------------------------------------------------------------------===//
+//  Bottom-Up Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
+  SUnit *PredSU = PredEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (PredSU->NumSuccsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    PredSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --PredSU->NumSuccsLeft;
+
+  if (!ForceUnitLatencies()) {
+    // Updating predecessor's height. This is now the cycle when the
+    // predecessor can be scheduled without causing a pipeline stall.
+    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
+  }
+
+  // If all the node's successors are scheduled, this node is ready
+  // to be scheduled. Ignore the special EntrySU node.
+  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
+    PredSU->isAvailable = true;
+
+    unsigned Height = PredSU->getHeight();
+    if (Height < MinAvailableCycle)
+      MinAvailableCycle = Height;
+
+    if (isReady(PredSU)) {
+      AvailableQueue->push(PredSU);
+    }
+    // CapturePred and others may have left the node in the pending queue, avoid
+    // adding it twice.
+    else if (!PredSU->isPending) {
+      PredSU->isPending = true;
+      PendingQueue.push_back(PredSU);
+    }
+  }
+}
+
+/// Call ReleasePred for each predecessor, then update register live def/gen.
+/// Always update LiveRegDefs for a register dependence even if the current SU
+/// also defines the register. This effectively create one large live range
+/// across a sequence of two-address node. This is important because the
+/// entire chain must be scheduled together. Example:
+///
+/// flags = (3) add
+/// flags = (2) addc flags
+/// flags = (1) addc flags
+///
+/// results in
+///
+/// LiveRegDefs[flags] = 3
+/// LiveRegGens[flags] = 1
+///
+/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
+/// interference on flags.
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
+  // Bottom up: release predecessors
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    ReleasePred(SU, &*I);
+    if (I->isAssignedRegDep()) {
+      // This is a physical register dependency and it's impossible or
+      // expensive to copy the register. Make sure nothing that can
+      // clobber the register is scheduled between the predecessor and
+      // this node.
+      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
+      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
+             "interference on register dependence");
+      LiveRegDefs[I->getReg()] = I->getSUnit();
+      if (!LiveRegGens[I->getReg()]) {
+        ++NumLiveRegs;
+        LiveRegGens[I->getReg()] = SU;
+      }
+    }
+  }
+}
+
+/// Check to see if any of the pending instructions are ready to issue.  If
+/// so, add them to the available queue.
+void ScheduleDAGRRList::ReleasePending() {
+  if (DisableSchedCycles) {
+    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
+    return;
+  }
+
+  // If the available queue is empty, it is safe to reset MinAvailableCycle.
+  if (AvailableQueue->empty())
+    MinAvailableCycle = UINT_MAX;
+
+  // Check to see if any of the pending instructions are ready to issue.  If
+  // so, add them to the available queue.
+  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+    unsigned ReadyCycle =
+      isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
+    if (ReadyCycle < MinAvailableCycle)
+      MinAvailableCycle = ReadyCycle;
+
+    if (PendingQueue[i]->isAvailable) {
+      if (!isReady(PendingQueue[i]))
+          continue;
+      AvailableQueue->push(PendingQueue[i]);
+    }
+    PendingQueue[i]->isPending = false;
+    PendingQueue[i] = PendingQueue.back();
+    PendingQueue.pop_back();
+    --i; --e;
+  }
+}
+
+/// Move the scheduler state forward by the specified number of Cycles.
+void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
+  if (NextCycle <= CurCycle)
+    return;
+
+  IssueCount = 0;
+  AvailableQueue->setCurCycle(NextCycle);
+  if (!HazardRec->isEnabled()) {
+    // Bypass lots of virtual calls in case of long latency.
+    CurCycle = NextCycle;
+  }
+  else {
+    for (; CurCycle != NextCycle; ++CurCycle) {
+      if (isBottomUp)
+        HazardRec->RecedeCycle();
+      else
+        HazardRec->AdvanceCycle();
+    }
+  }
+  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
+  // available Q to release pending nodes at least once before popping.
+  ReleasePending();
+}
+
+/// Move the scheduler state forward until the specified node's dependents are
+/// ready and can be scheduled with no resource conflicts.
+void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
+  if (DisableSchedCycles)
+    return;
+
+  unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
+
+  // Bump CurCycle to account for latency. We assume the latency of other
+  // available instructions may be hidden by the stall (not a full pipe stall).
+  // This updates the hazard recognizer's cycle before reserving resources for
+  // this instruction.
+  AdvanceToCycle(ReadyCycle);
+
+  // Calls are scheduled in their preceding cycle, so don't conflict with
+  // hazards from instructions after the call. EmitNode will reset the
+  // scoreboard state before emitting the call.
+  if (isBottomUp && SU->isCall)
+    return;
+
+  // FIXME: For resource conflicts in very long non-pipelined stages, we
+  // should probably skip ahead here to avoid useless scoreboard checks.
+  int Stalls = 0;
+  while (true) {
+    ScheduleHazardRecognizer::HazardType HT =
+      HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
+
+    if (HT == ScheduleHazardRecognizer::NoHazard)
+      break;
+
+    ++Stalls;
+  }
+  AdvanceToCycle(CurCycle + Stalls);
+}
+
+/// Record this SUnit in the HazardRecognizer.
+/// Does not update CurCycle.
+void ScheduleDAGRRList::EmitNode(SUnit *SU) {
+  if (!HazardRec->isEnabled())
+    return;
+
+  // Check for phys reg copy.
+  if (!SU->getNode())
+    return;
+
+  switch (SU->getNode()->getOpcode()) {
+  default:
+    assert(SU->getNode()->isMachineOpcode() &&
+           "This target-independent node should not be scheduled.");
+    break;
+  case ISD::MERGE_VALUES:
+  case ISD::TokenFactor:
+  case ISD::CopyToReg:
+  case ISD::CopyFromReg:
+  case ISD::EH_LABEL:
+    // Noops don't affect the scoreboard state. Copies are likely to be
+    // removed.
+    return;
+  case ISD::INLINEASM:
+    // For inline asm, clear the pipeline state.
+    HazardRec->Reset();
+    return;
+  }
+  if (isBottomUp && SU->isCall) {
+    // Calls are scheduled with their preceding instructions. For bottom-up
+    // scheduling, clear the pipeline state before emitting.
+    HazardRec->Reset();
+  }
+
+  HazardRec->EmitInstruction(SU);
+
+  if (!isBottomUp && SU->isCall) {
+    HazardRec->Reset();
+  }
+}
+
+/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
+/// count of its predecessors. If a predecessor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
+  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(SU->dump(this));
+
+#ifndef NDEBUG
+  if (CurCycle < SU->getHeight())
+    DEBUG(dbgs() << "   Height [" << SU->getHeight() << "] pipeline stall!\n");
+#endif
+
+  // FIXME: Do not modify node height. It may interfere with
+  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
+  // node it's ready cycle can aid heuristics, and after scheduling it can
+  // indicate the scheduled cycle.
+  SU->setHeightToAtLeast(CurCycle);
+
+  // Reserve resources for the scheduled intruction.
+  EmitNode(SU);
+
+  Sequence.push_back(SU);
+
+  AvailableQueue->ScheduledNode(SU);
+
+  // If HazardRec is disabled, and each inst counts as one cycle, then
+  // advance CurCycle before ReleasePredecessors to avoid useles pushed to
+  // PendingQueue for schedulers that implement HasReadyFilter.
+  if (!HazardRec->isEnabled() && AvgIPC < 2)
+    AdvanceToCycle(CurCycle + 1);
+
+  // Update liveness of predecessors before successors to avoid treating a
+  // two-address node as a live range def.
+  ReleasePredecessors(SU);
+
+  // Release all the implicit physical register defs that are live.
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
+    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
+      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+      --NumLiveRegs;
+      LiveRegDefs[I->getReg()] = NULL;
+      LiveRegGens[I->getReg()] = NULL;
+    }
+  }
+
+  SU->isScheduled = true;
+
+  // Conditions under which the scheduler should eagerly advance the cycle:
+  // (1) No available instructions
+  // (2) All pipelines full, so available instructions must have hazards.
+  //
+  // If HazardRec is disabled, the cycle was advanced earlier.
+  //
+  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
+  ++IssueCount;
+  if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
+      || (!HazardRec->isEnabled() && AvgIPC > 1 && IssueCount == AvgIPC)
+      || AvailableQueue->empty())
+    AdvanceToCycle(CurCycle + 1);
+}
+
+/// CapturePred - This does the opposite of ReleasePred. Since SU is being
+/// unscheduled, incrcease the succ left count of its predecessors. Remove
+/// them from AvailableQueue if necessary.
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
+  SUnit *PredSU = PredEdge->getSUnit();
+  if (PredSU->isAvailable) {
+    PredSU->isAvailable = false;
+    if (!PredSU->isPending)
+      AvailableQueue->remove(PredSU);
+  }
+
+  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+  ++PredSU->NumSuccsLeft;
+}
+
+/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
+/// its predecessor states to reflect the change.
+void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
+  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
+  DEBUG(SU->dump(this));
+
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    CapturePred(&*I);
+    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
+      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
+             "Physical register dependency violated?");
+      --NumLiveRegs;
+      LiveRegDefs[I->getReg()] = NULL;
+      LiveRegGens[I->getReg()] = NULL;
+    }
+  }
+
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isAssignedRegDep()) {
+      // This becomes the nearest def. Note that an earlier def may still be
+      // pending if this is a two-address node.
+      LiveRegDefs[I->getReg()] = SU;
+      if (!LiveRegDefs[I->getReg()]) {
+        ++NumLiveRegs;
+      }
+      if (LiveRegGens[I->getReg()] == NULL ||
+          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
+        LiveRegGens[I->getReg()] = I->getSUnit();
+    }
+  }
+  if (SU->getHeight() < MinAvailableCycle)
+    MinAvailableCycle = SU->getHeight();
+
+  SU->setHeightDirty();
+  SU->isScheduled = false;
+  SU->isAvailable = true;
+  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
+    // Don't make available until backtracking is complete.
+    SU->isPending = true;
+    PendingQueue.push_back(SU);
+  }
+  else {
+    AvailableQueue->push(SU);
+  }
+  AvailableQueue->UnscheduledNode(SU);
+}
+
+/// After backtracking, the hazard checker needs to be restored to a state
+/// corresponding the the current cycle.
+void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
+  HazardRec->Reset();
+
+  unsigned LookAhead = std::min((unsigned)Sequence.size(),
+                                HazardRec->getMaxLookAhead());
+  if (LookAhead == 0)
+    return;
+
+  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
+  unsigned HazardCycle = (*I)->getHeight();
+  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
+    SUnit *SU = *I;
+    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
+      HazardRec->RecedeCycle();
+    }
+    EmitNode(SU);
+  }
+}
+
+/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
+/// BTCycle in order to schedule a specific node.
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
+  SUnit *OldSU = Sequence.back();
+  while (true) {
+    Sequence.pop_back();
+    if (SU->isSucc(OldSU))
+      // Don't try to remove SU from AvailableQueue.
+      SU->isAvailable = false;
+    // FIXME: use ready cycle instead of height
+    CurCycle = OldSU->getHeight();
+    UnscheduleNodeBottomUp(OldSU);
+    AvailableQueue->setCurCycle(CurCycle);
+    if (OldSU == BtSU)
+      break;
+    OldSU = Sequence.back();
+  }
+
+  assert(!SU->isSucc(OldSU) && "Something is wrong!");
+
+  RestoreHazardCheckerBottomUp();
+
+  ReleasePending();
+
+  ++NumBacktracks;
+}
+
+static bool isOperandOf(const SUnit *SU, SDNode *N) {
+  for (const SDNode *SUNode = SU->getNode(); SUNode;
+       SUNode = SUNode->getGluedNode()) {
+    if (SUNode->isOperandOf(N))
+      return true;
+  }
+  return false;
+}
+
+/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
+/// successors to the newly created node.
+SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
+  SDNode *N = SU->getNode();
+  if (!N)
+    return NULL;
+
+  if (SU->getNode()->getGluedNode())
+    return NULL;
+
+  SUnit *NewSU;
+  bool TryUnfold = false;
+  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+    EVT VT = N->getValueType(i);
+    if (VT == MVT::Glue)
+      return NULL;
+    else if (VT == MVT::Other)
+      TryUnfold = true;
+  }
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDValue &Op = N->getOperand(i);
+    EVT VT = Op.getNode()->getValueType(Op.getResNo());
+    if (VT == MVT::Glue)
+      return NULL;
+  }
+
+  if (TryUnfold) {
+    SmallVector<SDNode*, 2> NewNodes;
+    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
+      return NULL;
+
+    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
+    assert(NewNodes.size() == 2 && "Expected a load folding node!");
+
+    N = NewNodes[1];
+    SDNode *LoadNode = NewNodes[0];
+    unsigned NumVals = N->getNumValues();
+    unsigned OldNumVals = SU->getNode()->getNumValues();
+    for (unsigned i = 0; i != NumVals; ++i)
+      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
+    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
+                                   SDValue(LoadNode, 1));
+
+    // LoadNode may already exist. This can happen when there is another
+    // load from the same location and producing the same type of value
+    // but it has different alignment or volatileness.
+    bool isNewLoad = true;
+    SUnit *LoadSU;
+    if (LoadNode->getNodeId() != -1) {
+      LoadSU = &SUnits[LoadNode->getNodeId()];
+      isNewLoad = false;
+    } else {
+      LoadSU = CreateNewSUnit(LoadNode);
+      LoadNode->setNodeId(LoadSU->NodeNum);
+
+      InitNumRegDefsLeft(LoadSU);
+      ComputeLatency(LoadSU);
+    }
+
+    SUnit *NewSU = CreateNewSUnit(N);
+    assert(N->getNodeId() == -1 && "Node already inserted!");
+    N->setNodeId(NewSU->NodeNum);
+
+    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
+      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+        NewSU->isTwoAddress = true;
+        break;
+      }
+    }
+    if (TID.isCommutable())
+      NewSU->isCommutable = true;
+
+    InitNumRegDefsLeft(NewSU);
+    ComputeLatency(NewSU);
+
+    // Record all the edges to and from the old SU, by category.
+    SmallVector<SDep, 4> ChainPreds;
+    SmallVector<SDep, 4> ChainSuccs;
+    SmallVector<SDep, 4> LoadPreds;
+    SmallVector<SDep, 4> NodePreds;
+    SmallVector<SDep, 4> NodeSuccs;
+    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+         I != E; ++I) {
+      if (I->isCtrl())
+        ChainPreds.push_back(*I);
+      else if (isOperandOf(I->getSUnit(), LoadNode))
+        LoadPreds.push_back(*I);
+      else
+        NodePreds.push_back(*I);
+    }
+    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+         I != E; ++I) {
+      if (I->isCtrl())
+        ChainSuccs.push_back(*I);
+      else
+        NodeSuccs.push_back(*I);
+    }
+
+    // Now assign edges to the newly-created nodes.
+    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
+      const SDep &Pred = ChainPreds[i];
+      RemovePred(SU, Pred);
+      if (isNewLoad)
+        AddPred(LoadSU, Pred);
+    }
+    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
+      const SDep &Pred = LoadPreds[i];
+      RemovePred(SU, Pred);
+      if (isNewLoad)
+        AddPred(LoadSU, Pred);
+    }
+    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
+      const SDep &Pred = NodePreds[i];
+      RemovePred(SU, Pred);
+      AddPred(NewSU, Pred);
+    }
+    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
+      SDep D = NodeSuccs[i];
+      SUnit *SuccDep = D.getSUnit();
+      D.setSUnit(SU);
+      RemovePred(SuccDep, D);
+      D.setSUnit(NewSU);
+      AddPred(SuccDep, D);
+      // Balance register pressure.
+      if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
+          && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+        --NewSU->NumRegDefsLeft;
+    }
+    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
+      SDep D = ChainSuccs[i];
+      SUnit *SuccDep = D.getSUnit();
+      D.setSUnit(SU);
+      RemovePred(SuccDep, D);
+      if (isNewLoad) {
+        D.setSUnit(LoadSU);
+        AddPred(SuccDep, D);
+      }
+    }
+
+    // Add a data dependency to reflect that NewSU reads the value defined
+    // by LoadSU.
+    AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency));
+
+    if (isNewLoad)
+      AvailableQueue->addNode(LoadSU);
+    AvailableQueue->addNode(NewSU);
+
+    ++NumUnfolds;
+
+    if (NewSU->NumSuccsLeft == 0) {
+      NewSU->isAvailable = true;
+      return NewSU;
+    }
+    SU = NewSU;
+  }
+
+  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
+  NewSU = CreateClone(SU);
+
+  // New SUnit has the exact same predecessors.
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I)
+    if (!I->isArtificial())
+      AddPred(NewSU, *I);
+
+  // Only copy scheduled successors. Cut them from old node's successor
+  // list and move them over.
+  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isArtificial())
+      continue;
+    SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->isScheduled) {
+      SDep D = *I;
+      D.setSUnit(NewSU);
+      AddPred(SuccSU, D);
+      D.setSUnit(SU);
+      DelDeps.push_back(std::make_pair(SuccSU, D));
+    }
+  }
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+    RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+  AvailableQueue->updateNode(SU);
+  AvailableQueue->addNode(NewSU);
+
+  ++NumDups;
+  return NewSU;
+}
+
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+                                               const TargetRegisterClass *DestRC,
+                                               const TargetRegisterClass *SrcRC,
+                                               SmallVector<SUnit*, 2> &Copies) {
+  SUnit *CopyFromSU = CreateNewSUnit(NULL);
+  CopyFromSU->CopySrcRC = SrcRC;
+  CopyFromSU->CopyDstRC = DestRC;
+
+  SUnit *CopyToSU = CreateNewSUnit(NULL);
+  CopyToSU->CopySrcRC = DestRC;
+  CopyToSU->CopyDstRC = SrcRC;
+
+  // Only copy scheduled successors. Cut them from old node's successor
+  // list and move them over.
+  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isArtificial())
+      continue;
+    SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->isScheduled) {
+      SDep D = *I;
+      D.setSUnit(CopyToSU);
+      AddPred(SuccSU, D);
+      DelDeps.push_back(std::make_pair(SuccSU, *I));
+    }
+  }
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
+    RemovePred(DelDeps[i].first, DelDeps[i].second);
+
+  AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
+  AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
+
+  AvailableQueue->updateNode(SU);
+  AvailableQueue->addNode(CopyFromSU);
+  AvailableQueue->addNode(CopyToSU);
+  Copies.push_back(CopyFromSU);
+  Copies.push_back(CopyToSU);
+
+  ++NumPRCopies;
+}
+
+/// getPhysicalRegisterVT - Returns the ValueType of the physical register
+/// definition of the specified node.
+/// FIXME: Move to SelectionDAG?
+static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
+                                 const TargetInstrInfo *TII) {
+  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+  unsigned NumRes = TID.getNumDefs();
+  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+    if (Reg == *ImpDef)
+      break;
+    ++NumRes;
+  }
+  return N->getValueType(NumRes);
+}
+
+/// CheckForLiveRegDef - Return true and update live register vector if the
+/// specified register def of the specified SUnit clobbers any "live" registers.
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+                               std::vector<SUnit*> &LiveRegDefs,
+                               SmallSet<unsigned, 4> &RegAdded,
+                               SmallVector<unsigned, 4> &LRegs,
+                               const TargetRegisterInfo *TRI) {
+  for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+
+    // Check if Ref is live.
+    if (!LiveRegDefs[Reg]) continue;
+
+    // Allow multiple uses of the same def.
+    if (LiveRegDefs[Reg] == SU) continue;
+
+    // Add Reg to the set of interfering live regs.
+    if (RegAdded.insert(Reg))
+      LRegs.push_back(Reg);
+  }
+}
+
+/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
+/// scheduling of the given node to satisfy live physical register dependencies.
+/// If the specific node is the last one that's available to schedule, do
+/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
+bool ScheduleDAGRRList::
+DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
+  if (NumLiveRegs == 0)
+    return false;
+
+  SmallSet<unsigned, 4> RegAdded;
+  // If this node would clobber any "live" register, then it's not ready.
+  //
+  // If SU is the currently live definition of the same register that it uses,
+  // then we are free to schedule it.
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
+      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
+                         RegAdded, LRegs, TRI);
+  }
+
+  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
+    if (Node->getOpcode() == ISD::INLINEASM) {
+      // Inline asm can clobber physical defs.
+      unsigned NumOps = Node->getNumOperands();
+      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+        --NumOps;  // Ignore the glue operand.
+
+      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
+        unsigned Flags =
+          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
+        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
+
+        ++i; // Skip the ID value.
+        if (InlineAsm::isRegDefKind(Flags) ||
+            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
+          // Check for def of register or earlyclobber register.
+          for (; NumVals; --NumVals, ++i) {
+            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
+            if (TargetRegisterInfo::isPhysicalRegister(Reg))
+              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+          }
+        } else
+          i += NumVals;
+      }
+      continue;
+    }
+
+    if (!Node->isMachineOpcode())
+      continue;
+    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
+    if (!TID.ImplicitDefs)
+      continue;
+    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
+      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
+  }
+
+  return !LRegs.empty();
+}
+
+/// Return a node that can be scheduled in this cycle. Requirements:
+/// (1) Ready: latency has been satisfied
+/// (2) No Hazards: resources are available
+/// (3) No Interferences: may unschedule to break register interferences.
+SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
+  SmallVector<SUnit*, 4> Interferences;
+  DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+
+  SUnit *CurSU = AvailableQueue->pop();
+  while (CurSU) {
+    SmallVector<unsigned, 4> LRegs;
+    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+      break;
+    LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+    CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
+    Interferences.push_back(CurSU);
+    CurSU = AvailableQueue->pop();
+  }
+  if (CurSU) {
+    // Add the nodes that aren't ready back onto the available list.
+    for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+      Interferences[i]->isPending = false;
+      assert(Interferences[i]->isAvailable && "must still be available");
+      AvailableQueue->push(Interferences[i]);
+    }
+    return CurSU;
+  }
+
+  // All candidates are delayed due to live physical reg dependencies.
+  // Try backtracking, code duplication, or inserting cross class copies
+  // to resolve it.
+  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+    SUnit *TrySU = Interferences[i];
+    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+
+    // Try unscheduling up to the point where it's safe to schedule
+    // this node.
+    SUnit *BtSU = NULL;
+    unsigned LiveCycle = UINT_MAX;
+    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
+      unsigned Reg = LRegs[j];
+      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
+        BtSU = LiveRegGens[Reg];
+        LiveCycle = BtSU->getHeight();
+      }
+    }
+    if (!WillCreateCycle(TrySU, BtSU))  {
+      BacktrackBottomUp(TrySU, BtSU);
+
+      // Force the current node to be scheduled before the node that
+      // requires the physical reg dep.
+      if (BtSU->isAvailable) {
+        BtSU->isAvailable = false;
+        if (!BtSU->isPending)
+          AvailableQueue->remove(BtSU);
+      }
+      AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
+                          /*Reg=*/0, /*isNormalMemory=*/false,
+                          /*isMustAlias=*/false, /*isArtificial=*/true));
+
+      // If one or more successors has been unscheduled, then the current
+      // node is no longer avaialable. Schedule a successor that's now
+      // available instead.
+      if (!TrySU->isAvailable) {
+        CurSU = AvailableQueue->pop();
+      }
+      else {
+        CurSU = TrySU;
+        TrySU->isPending = false;
+        Interferences.erase(Interferences.begin()+i);
+      }
+      break;
+    }
+  }
+
+  if (!CurSU) {
+    // Can't backtrack. If it's too expensive to copy the value, then try
+    // duplicate the nodes that produces these "too expensive to copy"
+    // values to break the dependency. In case even that doesn't work,
+    // insert cross class copies.
+    // If it's not too expensive, i.e. cost != -1, issue copies.
+    SUnit *TrySU = Interferences[0];
+    SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+    assert(LRegs.size() == 1 && "Can't handle this yet!");
+    unsigned Reg = LRegs[0];
+    SUnit *LRDef = LiveRegDefs[Reg];
+    EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+    const TargetRegisterClass *RC =
+      TRI->getMinimalPhysRegClass(Reg, VT);
+    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+    // If cross copy register class is null, then it must be possible copy
+    // the value directly. Do not try duplicate the def.
+    SUnit *NewDef = 0;
+    if (DestRC)
+      NewDef = CopyAndMoveSuccessors(LRDef);
+    else
+      DestRC = RC;
+    if (!NewDef) {
+      // Issue copies, these can be expensive cross register class copies.
+      SmallVector<SUnit*, 2> Copies;
+      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
+            << " to SU #" << Copies.front()->NodeNum << "\n");
+      AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+                          /*Reg=*/0, /*isNormalMemory=*/false,
+                          /*isMustAlias=*/false,
+                          /*isArtificial=*/true));
+      NewDef = Copies.back();
+    }
+
+    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
+          << " to SU #" << TrySU->NodeNum << "\n");
+    LiveRegDefs[Reg] = NewDef;
+    AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+                         /*Reg=*/0, /*isNormalMemory=*/false,
+                         /*isMustAlias=*/false,
+                         /*isArtificial=*/true));
+    TrySU->isAvailable = false;
+    CurSU = NewDef;
+  }
+
+  assert(CurSU && "Unable to resolve live physical register dependencies!");
+
+  // Add the nodes that aren't ready back onto the available list.
+  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+    Interferences[i]->isPending = false;
+    // May no longer be available due to backtracking.
+    if (Interferences[i]->isAvailable) {
+      AvailableQueue->push(Interferences[i]);
+    }
+  }
+  return CurSU;
+}
+
+/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleBottomUp() {
+  // Release any predecessors of the special Exit node.
+  ReleasePredecessors(&ExitSU);
+
+  // Add root to Available queue.
+  if (!SUnits.empty()) {
+    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
+    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
+    RootSU->isAvailable = true;
+    AvailableQueue->push(RootSU);
+  }
+
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue->empty()) {
+    DEBUG(dbgs() << "\n*** Examining Available\n";
+          AvailableQueue->dump(this));
+
+    // Pick the best node to schedule taking all constraints into
+    // consideration.
+    SUnit *SU = PickNodeToScheduleBottomUp();
+
+    AdvancePastStalls(SU);
+
+    ScheduleNodeBottomUp(SU);
+
+    while (AvailableQueue->empty() && !PendingQueue.empty()) {
+      // Advance the cycle to free resources. Skip ahead to the next ready SU.
+      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
+      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
+    }
+  }
+
+  // Reverse the order if it is bottom up.
+  std::reverse(Sequence.begin(), Sequence.end());
+
+#ifndef NDEBUG
+  VerifySchedule(isBottomUp);
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+//  Top-Down Scheduling
+//===----------------------------------------------------------------------===//
+
+/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to
+/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
+void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) {
+  SUnit *SuccSU = SuccEdge->getSUnit();
+
+#ifndef NDEBUG
+  if (SuccSU->NumPredsLeft == 0) {
+    dbgs() << "*** Scheduling failed! ***\n";
+    SuccSU->dump(this);
+    dbgs() << " has been released too many times!\n";
+    llvm_unreachable(0);
+  }
+#endif
+  --SuccSU->NumPredsLeft;
+
+  // If all the node's predecessors are scheduled, this node is ready
+  // to be scheduled. Ignore the special ExitSU node.
+  if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) {
+    SuccSU->isAvailable = true;
+    AvailableQueue->push(SuccSU);
+  }
+}
+
+void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
+  // Top down: release successors
+  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    assert(!I->isAssignedRegDep() &&
+           "The list-tdrr scheduler doesn't yet support physreg dependencies!");
+
+    ReleaseSucc(SU, &*I);
+  }
+}
+
+/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
+/// count of its successors. If a successor pending count is zero, add it to
+/// the Available queue.
+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
+  DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+  DEBUG(SU->dump(this));
+
+  assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
+  SU->setDepthToAtLeast(CurCycle);
+  Sequence.push_back(SU);
+
+  ReleaseSuccessors(SU);
+  SU->isScheduled = true;
+  AvailableQueue->ScheduledNode(SU);
+}
+
+/// ListScheduleTopDown - The main loop of list scheduling for top-down
+/// schedulers.
+void ScheduleDAGRRList::ListScheduleTopDown() {
+  AvailableQueue->setCurCycle(CurCycle);
+
+  // Release any successors of the special Entry node.
+  ReleaseSuccessors(&EntrySU);
+
+  // All leaves to Available queue.
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    // It is available if it has no predecessors.
+    if (SUnits[i].Preds.empty()) {
+      AvailableQueue->push(&SUnits[i]);
+      SUnits[i].isAvailable = true;
+    }
+  }
+
+  // While Available queue is not empty, grab the node with the highest
+  // priority. If it is not ready put it back.  Schedule the node.
+  Sequence.reserve(SUnits.size());
+  while (!AvailableQueue->empty()) {
+    SUnit *CurSU = AvailableQueue->pop();
+
+    if (CurSU)
+      ScheduleNodeTopDown(CurSU);
+    ++CurCycle;
+    AvailableQueue->setCurCycle(CurCycle);
+  }
+
+#ifndef NDEBUG
+  VerifySchedule(isBottomUp);
+#endif
+}
+
+
+//===----------------------------------------------------------------------===//
+//                RegReductionPriorityQueue Definition
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+//
+namespace {
+class RegReductionPQBase;
+
+struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
+};
+
+/// bu_ls_rr_sort - Priority function for bottom up register pressure
+// reduction scheduler.
+struct bu_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = true,
+    HasReadyFilter = false
+  };
+
+  RegReductionPQBase *SPQ;
+  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+  bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+  bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// td_ls_rr_sort - Priority function for top down register pressure reduction
+// scheduler.
+struct td_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = false,
+    HasReadyFilter = false
+  };
+
+  RegReductionPQBase *SPQ;
+  td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+  td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+  bool operator()(const SUnit* left, const SUnit* right) const;
+};
+
+// src_ls_rr_sort - Priority function for source order scheduler.
+struct src_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = true,
+    HasReadyFilter = false
+  };
+
+  RegReductionPQBase *SPQ;
+  src_ls_rr_sort(RegReductionPQBase *spq)
+    : SPQ(spq) {}
+  src_ls_rr_sort(const src_ls_rr_sort &RHS)
+    : SPQ(RHS.SPQ) {}
+
+  bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
+struct hybrid_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = true,
+    HasReadyFilter = false
+  };
+
+  RegReductionPQBase *SPQ;
+  hybrid_ls_rr_sort(RegReductionPQBase *spq)
+    : SPQ(spq) {}
+  hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
+    : SPQ(RHS.SPQ) {}
+
+  bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+  bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
+// scheduler.
+struct ilp_ls_rr_sort : public queue_sort {
+  enum {
+    IsBottomUp = true,
+    HasReadyFilter = false
+  };
+
+  RegReductionPQBase *SPQ;
+  ilp_ls_rr_sort(RegReductionPQBase *spq)
+    : SPQ(spq) {}
+  ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
+    : SPQ(RHS.SPQ) {}
+
+  bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+  bool operator()(SUnit* left, SUnit* right) const;
+};
+
+class RegReductionPQBase : public SchedulingPriorityQueue {
+protected:
+  std::vector<SUnit*> Queue;
+  unsigned CurQueueId;
+  bool TracksRegPressure;
+
+  // SUnits - The SUnits for the current graph.
+  std::vector<SUnit> *SUnits;
+
+  MachineFunction &MF;
+  const TargetInstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  const TargetLowering *TLI;
+  ScheduleDAGRRList *scheduleDAG;
+
+  // SethiUllmanNumbers - The SethiUllman number for each node.
+  std::vector<unsigned> SethiUllmanNumbers;
+
+  /// RegPressure - Tracking current reg pressure per register class.
+  ///
+  std::vector<unsigned> RegPressure;
+
+  /// RegLimit - Tracking the number of allocatable registers per register
+  /// class.
+  std::vector<unsigned> RegLimit;
+
+public:
+  RegReductionPQBase(MachineFunction &mf,
+                     bool hasReadyFilter,
+                     bool tracksrp,
+                     const TargetInstrInfo *tii,
+                     const TargetRegisterInfo *tri,
+                     const TargetLowering *tli)
+    : SchedulingPriorityQueue(hasReadyFilter),
+      CurQueueId(0), TracksRegPressure(tracksrp),
+      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
+    if (TracksRegPressure) {
+      unsigned NumRC = TRI->getNumRegClasses();
+      RegLimit.resize(NumRC);
+      RegPressure.resize(NumRC);
+      std::fill(RegLimit.begin(), RegLimit.end(), 0);
+      std::fill(RegPressure.begin(), RegPressure.end(), 0);
+      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+             E = TRI->regclass_end(); I != E; ++I)
+        RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
+    }
+  }
+
+  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+    scheduleDAG = scheduleDag;
+  }
+
+  ScheduleHazardRecognizer* getHazardRec() {
+    return scheduleDAG->getHazardRec();
+  }
+
+  void initNodes(std::vector<SUnit> &sunits);
+
+  void addNode(const SUnit *SU);
+
+  void updateNode(const SUnit *SU);
+
+  void releaseState() {
+    SUnits = 0;
+    SethiUllmanNumbers.clear();
+    std::fill(RegPressure.begin(), RegPressure.end(), 0);
+  }
+
+  unsigned getNodePriority(const SUnit *SU) const;
+
+  unsigned getNodeOrdering(const SUnit *SU) const {
+    return scheduleDAG->DAG->GetOrdering(SU->getNode());
+  }
+
+  bool empty() const { return Queue.empty(); }
+
+  void push(SUnit *U) {
+    assert(!U->NodeQueueId && "Node in the queue already");
+    U->NodeQueueId = ++CurQueueId;
+    Queue.push_back(U);
+  }
+
+  void remove(SUnit *SU) {
+    assert(!Queue.empty() && "Queue is empty!");
+    assert(SU->NodeQueueId != 0 && "Not in queue!");
+    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
+                                                 SU);
+    if (I != prior(Queue.end()))
+      std::swap(*I, Queue.back());
+    Queue.pop_back();
+    SU->NodeQueueId = 0;
+  }
+
+  bool tracksRegPressure() const { return TracksRegPressure; }
+
+  void dumpRegPressure() const;
+
+  bool HighRegPressure(const SUnit *SU) const;
+
+  bool MayReduceRegPressure(SUnit *SU) const;
+
+  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
+
+  void ScheduledNode(SUnit *SU);
+
+  void UnscheduledNode(SUnit *SU);
+
+protected:
+  bool canClobber(const SUnit *SU, const SUnit *Op);
+  void AddPseudoTwoAddrDeps();
+  void PrescheduleNodesWithMultipleUses();
+  void CalculateSethiUllmanNumbers();
+};
+
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
+  static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
+    std::vector<SUnit *>::iterator Best = Q.begin();
+    for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+           E = Q.end(); I != E; ++I)
+      if (Picker(*Best, *I))
+        Best = I;
+    SUnit *V = *Best;
+    if (Best != prior(Q.end()))
+      std::swap(*Best, Q.back());
+    Q.pop_back();
+    return V;
+  }
+
+  SF Picker;
+
+public:
+  RegReductionPriorityQueue(MachineFunction &mf,
+                            bool tracksrp,
+                            const TargetInstrInfo *tii,
+                            const TargetRegisterInfo *tri,
+                            const TargetLowering *tli)
+    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
+      Picker(this) {}
+
+  bool isBottomUp() const { return SF::IsBottomUp; }
+
+  bool isReady(SUnit *U) const {
+    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
+  }
+
+  SUnit *pop() {
+    if (Queue.empty()) return NULL;
+
+    SUnit *V = popFromQueue(Queue, Picker);
+    V->NodeQueueId = 0;
+    return V;
+  }
+
+  void dump(ScheduleDAG *DAG) const {
+    // Emulate pop() without clobbering NodeQueueIds.
+    std::vector<SUnit*> DumpQueue = Queue;
+    SF DumpPicker = Picker;
+    while (!DumpQueue.empty()) {
+      SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
+      if (isBottomUp())
+        dbgs() << "Height " << SU->getHeight() << ": ";
+      else
+        dbgs() << "Depth " << SU->getDepth() << ": ";
+      SU->dump(DAG);
+    }
+  }
+};
+
+typedef RegReductionPriorityQueue<bu_ls_rr_sort>
+BURegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<td_ls_rr_sort>
+TDRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<src_ls_rr_sort>
+SrcRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
+HybridBURRPriorityQueue;
+
+typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
+ILPBURRPriorityQueue;
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+//           Static Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
+/// Smaller number is the higher priority.
+static unsigned
+CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
+  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
+  if (SethiUllmanNumber != 0)
+    return SethiUllmanNumber;
+
+  unsigned Extra = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    SUnit *PredSU = I->getSUnit();
+    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
+    if (PredSethiUllman > SethiUllmanNumber) {
+      SethiUllmanNumber = PredSethiUllman;
+      Extra = 0;
+    } else if (PredSethiUllman == SethiUllmanNumber)
+      ++Extra;
+  }
+
+  SethiUllmanNumber += Extra;
+
+  if (SethiUllmanNumber == 0)
+    SethiUllmanNumber = 1;
+
+  return SethiUllmanNumber;
+}
+
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+void RegReductionPQBase::CalculateSethiUllmanNumbers() {
+  SethiUllmanNumbers.assign(SUnits->size(), 0);
+
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+  SUnits = &sunits;
+  // Add pseudo dependency edges for two-address nodes.
+  AddPseudoTwoAddrDeps();
+  // Reroute edges to nodes with multiple uses.
+  if (!TracksRegPressure)
+    PrescheduleNodesWithMultipleUses();
+  // Calculate node priorities.
+  CalculateSethiUllmanNumbers();
+}
+
+void RegReductionPQBase::addNode(const SUnit *SU) {
+  unsigned SUSize = SethiUllmanNumbers.size();
+  if (SUnits->size() > SUSize)
+    SethiUllmanNumbers.resize(SUSize*2, 0);
+  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+void RegReductionPQBase::updateNode(const SUnit *SU) {
+  SethiUllmanNumbers[SU->NodeNum] = 0;
+  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
+
+// Lower priority means schedule further down. For bottom-up scheduling, lower
+// priority SUs are scheduled before higher priority SUs.
+unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
+  assert(SU->NodeNum < SethiUllmanNumbers.size());
+  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+    // CopyToReg should be close to its uses to facilitate coalescing and
+    // avoid spilling.
+    return 0;
+  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+      Opc == TargetOpcode::SUBREG_TO_REG ||
+      Opc == TargetOpcode::INSERT_SUBREG)
+    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+    // close to their uses to facilitate coalescing.
+    return 0;
+  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+    // If SU does not have a register use, i.e. it doesn't produce a value
+    // that would be consumed (e.g. store), then it terminates a chain of
+    // computation.  Give it a large SethiUllman number so it will be
+    // scheduled right before its predecessors that it doesn't lengthen
+    // their live ranges.
+    return 0xffff;
+  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+    // If SU does not have a register def, schedule it close to its uses
+    // because it does not lengthen any live ranges.
+    return 0;
+  return SethiUllmanNumbers[SU->NodeNum];
+}
+
+//===----------------------------------------------------------------------===//
+//                     Register Pressure Tracking
+//===----------------------------------------------------------------------===//
+
+void RegReductionPQBase::dumpRegPressure() const {
+  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+         E = TRI->regclass_end(); I != E; ++I) {
+    const TargetRegisterClass *RC = *I;
+    unsigned Id = RC->getID();
+    unsigned RP = RegPressure[Id];
+    if (!RP) continue;
+    DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
+          << '\n');
+  }
+}
+
+bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
+  if (!TLI)
+    return false;
+
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+    SUnit *PredSU = I->getSUnit();
+    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+    // to cover the number of registers defined (they are all live).
+    if (PredSU->NumRegDefsLeft == 0) {
+      continue;
+    }
+    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+         RegDefPos.IsValid(); RegDefPos.Advance()) {
+      EVT VT = RegDefPos.GetValue();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      unsigned Cost = TLI->getRepRegClassCostFor(VT);
+      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+        return true;
+    }
+  }
+  return false;
+}
+
+bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
+  const SDNode *N = SU->getNode();
+
+  if (!N->isMachineOpcode() || !SU->NumSuccs)
+    return false;
+
+  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+  for (unsigned i = 0; i != NumDefs; ++i) {
+    EVT VT = N->getValueType(i);
+    if (!N->hasAnyUseOfValue(i))
+      continue;
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    if (RegPressure[RCId] >= RegLimit[RCId])
+      return true;
+  }
+  return false;
+}
+
+// Compute the register pressure contribution by this instruction by count up
+// for uses that are not live and down for defs. Only count register classes
+// that are already under high pressure. As a side effect, compute the number of
+// uses of registers that are already live.
+//
+// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
+// so could probably be factored.
+int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
+  LiveUses = 0;
+  int PDiff = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+    SUnit *PredSU = I->getSUnit();
+    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+    // to cover the number of registers defined (they are all live).
+    if (PredSU->NumRegDefsLeft == 0) {
+      if (PredSU->getNode()->isMachineOpcode())
+        ++LiveUses;
+      continue;
+    }
+    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+         RegDefPos.IsValid(); RegDefPos.Advance()) {
+      EVT VT = RegDefPos.GetValue();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      if (RegPressure[RCId] >= RegLimit[RCId])
+        ++PDiff;
+    }
+  }
+  const SDNode *N = SU->getNode();
+
+  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
+    return PDiff;
+
+  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+  for (unsigned i = 0; i != NumDefs; ++i) {
+    EVT VT = N->getValueType(i);
+    if (!N->hasAnyUseOfValue(i))
+      continue;
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    if (RegPressure[RCId] >= RegLimit[RCId])
+      --PDiff;
+  }
+  return PDiff;
+}
+
+void RegReductionPQBase::ScheduledNode(SUnit *SU) {
+  if (!TracksRegPressure)
+    return;
+
+  if (!SU->getNode())
+    return;
+  
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+    SUnit *PredSU = I->getSUnit();
+    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+    // to cover the number of registers defined (they are all live).
+    if (PredSU->NumRegDefsLeft == 0) {
+      continue;
+    }
+    // FIXME: The ScheduleDAG currently loses information about which of a
+    // node's values is consumed by each dependence. Consequently, if the node
+    // defines multiple register classes, we don't know which to pressurize
+    // here. Instead the following loop consumes the register defs in an
+    // arbitrary order. At least it handles the common case of clustered loads
+    // to the same class. For precise liveness, each SDep needs to indicate the
+    // result number. But that tightly couples the ScheduleDAG with the
+    // SelectionDAG making updates tricky. A simpler hack would be to attach a
+    // value type or register class to SDep.
+    //
+    // The most important aspect of register tracking is balancing the increase
+    // here with the reduction further below. Note that this SU may use multiple
+    // defs in PredSU. The can't be determined here, but we've already
+    // compensated by reducing NumRegDefsLeft in PredSU during
+    // ScheduleDAGSDNodes::AddSchedEdges.
+    --PredSU->NumRegDefsLeft;
+    unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
+    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+      if (SkipRegDefs)
+        continue;
+      EVT VT = RegDefPos.GetValue();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      break;
+    }
+  }
+
+  // We should have this assert, but there may be dead SDNodes that never
+  // materialize as SUnits, so they don't appear to generate liveness.
+  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
+  int SkipRegDefs = (int)SU->NumRegDefsLeft;
+  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
+       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+    if (SkipRegDefs > 0)
+      continue;
+    EVT VT = RegDefPos.GetValue();
+    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+    if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) {
+      // Register pressure tracking is imprecise. This can happen. But we try
+      // hard not to let it happen because it likely results in poor scheduling.
+      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
+      RegPressure[RCId] = 0;
+    }
+    else {
+      RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+    }
+  }
+  dumpRegPressure();
+}
+
+void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
+  if (!TracksRegPressure)
+    return;
+
+  const SDNode *N = SU->getNode();
+  if (!N) return;
+  
+  if (!N->isMachineOpcode()) {
+    if (N->getOpcode() != ISD::CopyToReg)
+      return;
+  } else {
+    unsigned Opc = N->getMachineOpcode();
+    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+        Opc == TargetOpcode::INSERT_SUBREG ||
+        Opc == TargetOpcode::SUBREG_TO_REG ||
+        Opc == TargetOpcode::REG_SEQUENCE ||
+        Opc == TargetOpcode::IMPLICIT_DEF)
+      return;
+  }
+
+  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl())
+      continue;
+    SUnit *PredSU = I->getSUnit();
+    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
+    // counts data deps.
+    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
+      continue;
+    const SDNode *PN = PredSU->getNode();
+    if (!PN->isMachineOpcode()) {
+      if (PN->getOpcode() == ISD::CopyFromReg) {
+        EVT VT = PN->getValueType(0);
+        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      }
+      continue;
+    }
+    unsigned POpc = PN->getMachineOpcode();
+    if (POpc == TargetOpcode::IMPLICIT_DEF)
+      continue;
+    if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+      EVT VT = PN->getOperand(0).getValueType();
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      continue;
+    } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+               POpc == TargetOpcode::SUBREG_TO_REG) {
+      EVT VT = PN->getValueType(0);
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+      continue;
+    }
+    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+    for (unsigned i = 0; i != NumDefs; ++i) {
+      EVT VT = PN->getValueType(i);
+      if (!PN->hasAnyUseOfValue(i))
+        continue;
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+        // Register pressure tracking is imprecise. This can happen.
+        RegPressure[RCId] = 0;
+      else
+        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+    }
+  }
+
+  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+  // may transfer data dependencies to CopyToReg.
+  if (SU->NumSuccs && N->isMachineOpcode()) {
+    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+      EVT VT = N->getValueType(i);
+      if (VT == MVT::Glue || VT == MVT::Other)
+        continue;
+      if (!N->hasAnyUseOfValue(i))
+        continue;
+      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+    }
+  }
+
+  dumpRegPressure();
+}
+
+//===----------------------------------------------------------------------===//
+//           Dynamic Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
+/// closestSucc - Returns the scheduled cycle of the successor which is
+/// closest to the current cycle.
+static unsigned closestSucc(const SUnit *SU) {
+  unsigned MaxHeight = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain succs
+    unsigned Height = I->getSUnit()->getHeight();
+    // If there are bunch of CopyToRegs stacked up, they should be considered
+    // to be at the same position.
+    if (I->getSUnit()->getNode() &&
+        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
+      Height = closestSucc(I->getSUnit())+1;
+    if (Height > MaxHeight)
+      MaxHeight = Height;
+  }
+  return MaxHeight;
+}
+
+/// calcMaxScratches - Returns an cost estimate of the worse case requirement
+/// for scratch registers, i.e. number of data dependencies.
+static unsigned calcMaxScratches(const SUnit *SU) {
+  unsigned Scratches = 0;
+  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    Scratches++;
+  }
+  return Scratches;
+}
+
+/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
+/// CopyToReg to a virtual register. This SU def is probably a liveout and
+/// it has no other use. It should be scheduled closer to the terminator.
+static bool hasOnlyLiveOutUses(const SUnit *SU) {
+  bool RetVal = false;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;
+    const SUnit *SuccSU = I->getSUnit();
+    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
+      unsigned Reg =
+        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        RetVal = true;
+        continue;
+      }
+    }
+    return false;
+  }
+  return RetVal;
+}
+
+/// UnitsSharePred - Return true if the two scheduling units share a common
+/// data predecessor.
+static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
+  SmallSet<const SUnit*, 4> Preds;
+  for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    Preds.insert(I->getSUnit());
+  }
+  for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
+       I != E; ++I) {
+    if (I->isCtrl()) continue;  // ignore chain preds
+    if (Preds.count(I->getSUnit()))
+      return true;
+  }
+  return false;
+}
+
+// Check for either a dependence (latency) or resource (hazard) stall.
+//
+// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
+static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
+  if ((int)SPQ->getCurCycle() < Height) return true;
+  if (SPQ->getHazardRec()->getHazardType(SU, 0)
+      != ScheduleHazardRecognizer::NoHazard)
+    return true;
+  return false;
+}
+
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if latency-based priority is equivalent.
+static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
+                            RegReductionPQBase *SPQ) {
+  // If the two nodes share an operand and one of them has a single
+  // use that is a live out copy, favor the one that is live out. Otherwise
+  // it will be difficult to eliminate the copy if the instruction is a
+  // loop induction variable update. e.g.
+  // BB:
+  // sub r1, r3, #1
+  // str r0, [r2, r3]
+  // mov r3, r1
+  // cmp
+  // bne BB
+  bool SharePred = UnitsSharePred(left, right);
+  // FIXME: Only adjust if BB is a loop back edge.
+  // FIXME: What's the cost of a copy?
+  int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
+  int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
+  int LHeight = (int)left->getHeight() - LBonus;
+  int RHeight = (int)right->getHeight() - RBonus;
+
+  bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
+    BUHasStall(left, LHeight, SPQ);
+  bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
+    BUHasStall(right, RHeight, SPQ);
+
+  // If scheduling one of the node will cause a pipeline stall, delay it.
+  // If scheduling either one of the node will cause a pipeline stall, sort
+  // them according to their height.
+  if (LStall) {
+    if (!RStall)
+      return 1;
+    if (LHeight != RHeight)
+      return LHeight > RHeight ? 1 : -1;
+  } else if (RStall)
+    return -1;
+
+  // If either node is scheduling for latency, sort them by height/depth
+  // and latency.
+  if (!checkPref || (left->SchedulingPref == Sched::Latency ||
+                     right->SchedulingPref == Sched::Latency)) {
+    if (DisableSchedCycles) {
+      if (LHeight != RHeight)
+        return LHeight > RHeight ? 1 : -1;
+    }
+    else {
+      // If neither instruction stalls (!LStall && !RStall) then
+      // its height is already covered so only its depth matters. We also reach
+      // this if both stall but have the same height.
+      unsigned LDepth = left->getDepth();
+      unsigned RDepth = right->getDepth();
+      if (LDepth != RDepth) {
+        DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
+              << ") depth " << LDepth << " vs SU (" << right->NodeNum
+              << ") depth " << RDepth << "\n");
+        return LDepth < RDepth ? 1 : -1;
+      }
+    }
+    if (left->Latency != right->Latency)
+      return left->Latency > right->Latency ? 1 : -1;
+  }
+  return 0;
+}
+
+static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
+  unsigned LPriority = SPQ->getNodePriority(left);
+  unsigned RPriority = SPQ->getNodePriority(right);
+  if (LPriority != RPriority) {
+    DEBUG(++FactorCount[FactStatic]);
+    return LPriority > RPriority;
+  }
+  DEBUG(++FactorCount[FactOther]);
+
+  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
+  // e.g.
+  // t1 = op t2, c1
+  // t3 = op t4, c2
+  //
+  // and the following instructions are both ready.
+  // t2 = op c3
+  // t4 = op c4
+  //
+  // Then schedule t2 = op first.
+  // i.e.
+  // t4 = op c4
+  // t2 = op c3
+  // t1 = op t2, c1
+  // t3 = op t4, c2
+  //
+  // This creates more short live intervals.
+  unsigned LDist = closestSucc(left);
+  unsigned RDist = closestSucc(right);
+  if (LDist != RDist)
+    return LDist < RDist;
+
+  // How many registers becomes live when the node is scheduled.
+  unsigned LScratch = calcMaxScratches(left);
+  unsigned RScratch = calcMaxScratches(right);
+  if (LScratch != RScratch)
+    return LScratch > RScratch;
+
+  if (!DisableSchedCycles) {
+    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
+    if (result != 0)
+      return result > 0;
+  }
+  else {
+    if (left->getHeight() != right->getHeight())
+      return left->getHeight() > right->getHeight();
+
+    if (left->getDepth() != right->getDepth())
+      return left->getDepth() < right->getDepth();
+  }
+
+  assert(left->NodeQueueId && right->NodeQueueId &&
+         "NodeQueueId cannot be zero");
+  return (left->NodeQueueId > right->NodeQueueId);
+}
+
+// Bottom up
+bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  return BURRSort(left, right, SPQ);
+}
+
+// Source order, otherwise bottom up.
+bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  unsigned LOrder = SPQ->getNodeOrdering(left);
+  unsigned ROrder = SPQ->getNodeOrdering(right);
+
+  // Prefer an ordering where the lower the non-zero order number, the higher
+  // the preference.
+  if ((LOrder || ROrder) && LOrder != ROrder)
+    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+
+  return BURRSort(left, right, SPQ);
+}
+
+// If the time between now and when the instruction will be ready can cover
+// the spill code, then avoid adding it to the ready queue. This gives long
+// stalls highest priority and allows hoisting across calls. It should also
+// speed up processing the available queue.
+bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+  static const unsigned ReadyDelay = 3;
+
+  if (SPQ->MayReduceRegPressure(SU)) return true;
+
+  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
+
+  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
+      != ScheduleHazardRecognizer::NoHazard)
+    return false;
+
+  return true;
+}
+
+// Return true if right should be scheduled with higher priority than left.
+bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (left->isCall || right->isCall)
+    // No way to compute latency of calls.
+    return BURRSort(left, right, SPQ);
+
+  bool LHigh = SPQ->HighRegPressure(left);
+  bool RHigh = SPQ->HighRegPressure(right);
+  // Avoid causing spills. If register pressure is high, schedule for
+  // register pressure reduction.
+  if (LHigh && !RHigh) {
+    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
+          << right->NodeNum << ")\n");
+    return true;
+  }
+  else if (!LHigh && RHigh) {
+    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
+          << left->NodeNum << ")\n");
+    return false;
+  }
+  else if (!LHigh && !RHigh) {
+    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+    if (result != 0)
+      return result > 0;
+  }
+  return BURRSort(left, right, SPQ);
+}
+
+// Schedule as many instructions in each cycle as possible. So don't make an
+// instruction available unless it is ready in the current cycle.
+bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+  if (SU->getHeight() > CurCycle) return false;
+
+  if (SPQ->getHazardRec()->getHazardType(SU, 0)
+      != ScheduleHazardRecognizer::NoHazard)
+    return false;
+
+  return true;
+}
+
+bool canEnableCoaelscing(SUnit *SU) {
+  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+    // CopyToReg should be close to its uses to facilitate coalescing and
+    // avoid spilling.
+    return true;
+
+  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+      Opc == TargetOpcode::SUBREG_TO_REG ||
+      Opc == TargetOpcode::INSERT_SUBREG)
+    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+    // close to their uses to facilitate coalescing.
+    return true;
+
+  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+    // If SU does not have a register def, schedule it close to its uses
+    // because it does not lengthen any live ranges.
+    return true;
+
+  return false;
+}
+
+// list-ilp is currently an experimental scheduler that allows various
+// heuristics to be enabled prior to the normal register reduction logic.
+bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+  if (left->isCall || right->isCall)
+    // No way to compute latency of calls.
+    return BURRSort(left, right, SPQ);
+
+  unsigned LLiveUses = 0, RLiveUses = 0;
+  int LPDiff = 0, RPDiff = 0;
+  if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
+    LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
+    RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
+  }
+  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
+    DEBUG(++FactorCount[FactPressureDiff]);
+    DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
+          << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
+    return LPDiff > RPDiff;
+  }
+
+  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
+    bool LReduce = canEnableCoaelscing(left);
+    bool RReduce = canEnableCoaelscing(right);
+    DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]);
+    if (LReduce && !RReduce) return false;
+    if (RReduce && !LReduce) return true;
+  }
+
+  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
+    DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
+          << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
+    DEBUG(++FactorCount[FactRegUses]);
+    return LLiveUses < RLiveUses;
+  }
+
+  if (!DisableSchedStalls) {
+    bool LStall = BUHasStall(left, left->getHeight(), SPQ);
+    bool RStall = BUHasStall(right, right->getHeight(), SPQ);
+    if (LStall != RStall) {
+      DEBUG(++FactorCount[FactHeight]);
+      return left->getHeight() > right->getHeight();
+    }
+  }
+
+  if (!DisableSchedCriticalPath) {
+    int spread = (int)left->getDepth() - (int)right->getDepth();
+    if (std::abs(spread) > MaxReorderWindow) {
+      DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
+            << left->getDepth() << " != SU(" << right->NodeNum << "): "
+            << right->getDepth() << "\n");
+      DEBUG(++FactorCount[FactDepth]);
+      return left->getDepth() < right->getDepth();
+    }
+  }
+
+  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
+    int spread = (int)left->getHeight() - (int)right->getHeight();
+    if (std::abs(spread) > MaxReorderWindow) {
+      DEBUG(++FactorCount[FactHeight]);
+      return left->getHeight() > right->getHeight();
+    }
+  }
+
+  return BURRSort(left, right, SPQ);
+}
+
+//===----------------------------------------------------------------------===//
+//                    Preschedule for Register Pressure
+//===----------------------------------------------------------------------===//
+
+bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
+  if (SU->isTwoAddress) {
+    unsigned Opc = SU->getNode()->getMachineOpcode();
+    const TargetInstrDesc &TID = TII->get(Opc);
+    unsigned NumRes = TID.getNumDefs();
+    unsigned NumOps = TID.getNumOperands() - NumRes;
+    for (unsigned i = 0; i != NumOps; ++i) {
+      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
+        SDNode *DU = SU->getNode()->getOperand(i).getNode();
+        if (DU->getNodeId() != -1 &&
+            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
+/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
+/// physical register defs.
+static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
+                                  const TargetInstrInfo *TII,
+                                  const TargetRegisterInfo *TRI) {
+  SDNode *N = SuccSU->getNode();
+  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+  const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
+  assert(ImpDefs && "Caller should check hasPhysRegDefs");
+  for (const SDNode *SUNode = SU->getNode(); SUNode;
+       SUNode = SUNode->getGluedNode()) {
+    if (!SUNode->isMachineOpcode())
+      continue;
+    const unsigned *SUImpDefs =
+      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
+    if (!SUImpDefs)
+      return false;
+    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+      EVT VT = N->getValueType(i);
+      if (VT == MVT::Glue || VT == MVT::Other)
+        continue;
+      if (!N->hasAnyUseOfValue(i))
+        continue;
+      unsigned Reg = ImpDefs[i - NumDefs];
+      for (;*SUImpDefs; ++SUImpDefs) {
+        unsigned SUReg = *SUImpDefs;
+        if (TRI->regsOverlap(Reg, SUReg))
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
+/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
+/// are not handled well by the general register pressure reduction
+/// heuristics. When presented with code like this:
+///
+///      N
+///    / |
+///   /  |
+///  U  store
+///  |
+/// ...
+///
+/// the heuristics tend to push the store up, but since the
+/// operand of the store has another use (U), this would increase
+/// the length of that other use (the U->N edge).
+///
+/// This function transforms code like the above to route U's
+/// dependence through the store when possible, like this:
+///
+///      N
+///      ||
+///      ||
+///     store
+///       |
+///       U
+///       |
+///      ...
+///
+/// This results in the store being scheduled immediately
+/// after N, which shortens the U->N live range, reducing
+/// register pressure.
+///
+void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
+  // Visit all the nodes in topological order, working top-down.
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+    SUnit *SU = &(*SUnits)[i];
+    // For now, only look at nodes with no data successors, such as stores.
+    // These are especially important, due to the heuristics in
+    // getNodePriority for nodes with no data successors.
+    if (SU->NumSuccs != 0)
+      continue;
+    // For now, only look at nodes with exactly one data predecessor.
+    if (SU->NumPreds != 1)
+      continue;
+    // Avoid prescheduling copies to virtual registers, which don't behave
+    // like other nodes from the perspective of scheduling heuristics.
+    if (SDNode *N = SU->getNode())
+      if (N->getOpcode() == ISD::CopyToReg &&
+          TargetRegisterInfo::isVirtualRegister
+            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+        continue;
+
+    // Locate the single data predecessor.
+    SUnit *PredSU = 0;
+    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
+         EE = SU->Preds.end(); II != EE; ++II)
+      if (!II->isCtrl()) {
+        PredSU = II->getSUnit();
+        break;
+      }
+    assert(PredSU);
+
+    // Don't rewrite edges that carry physregs, because that requires additional
+    // support infrastructure.
+    if (PredSU->hasPhysRegDefs)
+      continue;
+    // Short-circuit the case where SU is PredSU's only data successor.
+    if (PredSU->NumSuccs == 1)
+      continue;
+    // Avoid prescheduling to copies from virtual registers, which don't behave
+    // like other nodes from the perspective of scheduling heuristics.
+    if (SDNode *N = SU->getNode())
+      if (N->getOpcode() == ISD::CopyFromReg &&
+          TargetRegisterInfo::isVirtualRegister
+            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+        continue;
+
+    // Perform checks on the successors of PredSU.
+    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
+         EE = PredSU->Succs.end(); II != EE; ++II) {
+      SUnit *PredSuccSU = II->getSUnit();
+      if (PredSuccSU == SU) continue;
+      // If PredSU has another successor with no data successors, for
+      // now don't attempt to choose either over the other.
+      if (PredSuccSU->NumSuccs == 0)
+        goto outer_loop_continue;
+      // Don't break physical register dependencies.
+      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
+        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
+          goto outer_loop_continue;
+      // Don't introduce graph cycles.
+      if (scheduleDAG->IsReachable(SU, PredSuccSU))
+        goto outer_loop_continue;
+    }
+
+    // Ok, the transformation is safe and the heuristics suggest it is
+    // profitable. Update the graph.
+    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
+                 << " next to PredSU #" << PredSU->NodeNum
+                 << " to guide scheduling in the presence of multiple uses\n");
+    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
+      SDep Edge = PredSU->Succs[i];
+      assert(!Edge.isAssignedRegDep());
+      SUnit *SuccSU = Edge.getSUnit();
+      if (SuccSU != SU) {
+        Edge.setSUnit(PredSU);
+        scheduleDAG->RemovePred(SuccSU, Edge);
+        scheduleDAG->AddPred(SU, Edge);
+        Edge.setSUnit(SU);
+        scheduleDAG->AddPred(SuccSU, Edge);
+        --i;
+      }
+    }
+  outer_loop_continue:;
+  }
+}
+
+/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
+/// it as a def&use operand. Add a pseudo control edge from it to the other
+/// node (if it won't create a cycle) so the two-address one will be scheduled
+/// first (lower in the schedule). If both nodes are two-address, favor the
+/// one that has a CopyToReg use (more likely to be a loop induction update).
+/// If both are two-address, but one is commutable while the other is not
+/// commutable, favor the one that's not commutable.
+void RegReductionPQBase::AddPseudoTwoAddrDeps() {
+  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
+    SUnit *SU = &(*SUnits)[i];
+    if (!SU->isTwoAddress)
+      continue;
+
+    SDNode *Node = SU->getNode();
+    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
+      continue;
+
+    bool isLiveOut = hasOnlyLiveOutUses(SU);
+    unsigned Opc = Node->getMachineOpcode();
+    const TargetInstrDesc &TID = TII->get(Opc);
+    unsigned NumRes = TID.getNumDefs();
+    unsigned NumOps = TID.getNumOperands() - NumRes;
+    for (unsigned j = 0; j != NumOps; ++j) {
+      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
+        continue;
+      SDNode *DU = SU->getNode()->getOperand(j).getNode();
+      if (DU->getNodeId() == -1)
+        continue;
+      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
+      if (!DUSU) continue;
+      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
+           E = DUSU->Succs.end(); I != E; ++I) {
+        if (I->isCtrl()) continue;
+        SUnit *SuccSU = I->getSUnit();
+        if (SuccSU == SU)
+          continue;
+        // Be conservative. Ignore if nodes aren't at roughly the same
+        // depth and height.
+        if (SuccSU->getHeight() < SU->getHeight() &&
+            (SU->getHeight() - SuccSU->getHeight()) > 1)
+          continue;
+        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
+        // constrains whatever is using the copy, instead of the copy
+        // itself. In the case that the copy is coalesced, this
+        // preserves the intent of the pseudo two-address heurietics.
+        while (SuccSU->Succs.size() == 1 &&
+               SuccSU->getNode()->isMachineOpcode() &&
+               SuccSU->getNode()->getMachineOpcode() ==
+                 TargetOpcode::COPY_TO_REGCLASS)
+          SuccSU = SuccSU->Succs.front().getSUnit();
+        // Don't constrain non-instruction nodes.
+        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
+          continue;
+        // Don't constrain nodes with physical register defs if the
+        // predecessor can clobber them.
+        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
+          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
+            continue;
+        }
+        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
+        // these may be coalesced away. We want them close to their uses.
+        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
+        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
+            SuccOpc == TargetOpcode::INSERT_SUBREG ||
+            SuccOpc == TargetOpcode::SUBREG_TO_REG)
+          continue;
+        if ((!canClobber(SuccSU, DUSU) ||
+             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
+             (!SU->isCommutable && SuccSU->isCommutable)) &&
+            !scheduleDAG->IsReachable(SuccSU, SU)) {
+          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
+                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
+          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
+                                        /*Reg=*/0, /*isNormalMemory=*/false,
+                                        /*isMustAlias=*/false,
+                                        /*isArtificial=*/true));
+        }
+      }
+    }
+  }
+}
+
+/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
+/// predecessors of the successors of the SUnit SU. Stop when the provided
+/// limit is exceeded.
+static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
+                                                    unsigned Limit) {
+  unsigned Sum = 0;
+  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+       I != E; ++I) {
+    const SUnit *SuccSU = I->getSUnit();
+    for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(),
+         EE = SuccSU->Preds.end(); II != EE; ++II) {
+      SUnit *PredSU = II->getSUnit();
+      if (!PredSU->isScheduled)
+        if (++Sum > Limit)
+          return Sum;
+    }
+  }
+  return Sum;
+}
+
+
+// Top down
+bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+  unsigned LPriority = SPQ->getNodePriority(left);
+  unsigned RPriority = SPQ->getNodePriority(right);
+  bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode();
+  bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode();
+  bool LIsFloater = LIsTarget && left->NumPreds == 0;
+  bool RIsFloater = RIsTarget && right->NumPreds == 0;
+  unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0;
+  unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0;
+
+  if (left->NumSuccs == 0 && right->NumSuccs != 0)
+    return false;
+  else if (left->NumSuccs != 0 && right->NumSuccs == 0)
+    return true;
+
+  if (LIsFloater)
+    LBonus -= 2;
+  if (RIsFloater)
+    RBonus -= 2;
+  if (left->NumSuccs == 1)
+    LBonus += 2;
+  if (right->NumSuccs == 1)
+    RBonus += 2;
+
+  if (LPriority+LBonus != RPriority+RBonus)
+    return LPriority+LBonus < RPriority+RBonus;
+
+  if (left->getDepth() != right->getDepth())
+    return left->getDepth() < right->getDepth();
+
+  if (left->NumSuccsLeft != right->NumSuccsLeft)
+    return left->NumSuccsLeft > right->NumSuccsLeft;
+
+  assert(left->NodeQueueId && right->NodeQueueId &&
+         "NodeQueueId cannot be zero");
+  return (left->NodeQueueId > right->NodeQueueId);
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+
+llvm::ScheduleDAGSDNodes *
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+                                 CodeGenOpt::Level OptLevel) {
+  const TargetMachine &TM = IS->TM;
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+  BURegReductionPriorityQueue *PQ =
+    new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+  PQ->setScheduleDAG(SD);
+  return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
+                                 CodeGenOpt::Level OptLevel) {
+  const TargetMachine &TM = IS->TM;
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+  TDRegReductionPriorityQueue *PQ =
+    new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+  PQ->setScheduleDAG(SD);
+  return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
+                                   CodeGenOpt::Level OptLevel) {
+  const TargetMachine &TM = IS->TM;
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+  SrcRegReductionPriorityQueue *PQ =
+    new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
+  PQ->setScheduleDAG(SD);
+  return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
+                                   CodeGenOpt::Level OptLevel) {
+  const TargetMachine &TM = IS->TM;
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  const TargetLowering *TLI = &IS->getTargetLowering();
+
+  HybridBURRPriorityQueue *PQ =
+    new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
+  PQ->setScheduleDAG(SD);
+  return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
+                                CodeGenOpt::Level OptLevel) {
+  const TargetMachine &TM = IS->TM;
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  const TargetLowering *TLI = &IS->getTargetLowering();
+
+  ILPBURRPriorityQueue *PQ =
+    new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
+  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
+  PQ->setScheduleDAG(SD);
+  return SD;
+}
diff --git a/final/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/final/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
new file mode 100644
index 00000000000..b762af5ca9b
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -0,0 +1,763 @@
+//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the ScheduleDAG class, which is a base class used by
+// scheduling implementation classes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "SDNodeDbgValue.h"
+#include "ScheduleDAGSDNodes.h"
+#include "InstrEmitter.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(LoadsClustered, "Number of loads clustered together");
+
+// This allows latency based scheduler to notice high latency instructions
+// without a target itinerary. The choise if number here has more to do with
+// balancing scheduler heursitics than with the actual machine latency.
+static cl::opt<int> HighLatencyCycles(
+  "sched-high-latency-cycles", cl::Hidden, cl::init(10),
+  cl::desc("Roughly estimate the number of cycles that 'long latency'"
+           "instructions take for targets with no itinerary"));
+
+ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
+  : ScheduleDAG(mf),
+    InstrItins(mf.getTarget().getInstrItineraryData()) {}
+
+/// Run - perform scheduling.
+///
+void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
+                             MachineBasicBlock::iterator insertPos) {
+  DAG = dag;
+  ScheduleDAG::Run(bb, insertPos);
+}
+
+/// NewSUnit - Creates a new SUnit and return a ptr to it.
+///
+SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
+#ifndef NDEBUG
+  const SUnit *Addr = 0;
+  if (!SUnits.empty())
+    Addr = &SUnits[0];
+#endif
+  SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
+  assert((Addr == 0 || Addr == &SUnits[0]) &&
+         "SUnits std::vector reallocated on the fly!");
+  SUnits.back().OrigNode = &SUnits.back();
+  SUnit *SU = &SUnits.back();
+  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+  if (!N ||
+      (N->isMachineOpcode() &&
+       N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF))
+    SU->SchedulingPref = Sched::None;
+  else
+    SU->SchedulingPref = TLI.getSchedulingPreference(N);
+  return SU;
+}
+
+SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
+  SUnit *SU = NewSUnit(Old->getNode());
+  SU->OrigNode = Old->OrigNode;
+  SU->Latency = Old->Latency;
+  SU->isCall = Old->isCall;
+  SU->isTwoAddress = Old->isTwoAddress;
+  SU->isCommutable = Old->isCommutable;
+  SU->hasPhysRegDefs = Old->hasPhysRegDefs;
+  SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+  SU->SchedulingPref = Old->SchedulingPref;
+  Old->isCloned = true;
+  return SU;
+}
+
+/// CheckForPhysRegDependency - Check if the dependency between def and use of
+/// a specified operand is a physical register dependency. If so, returns the
+/// register and the cost of copying the register.
+static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
+                                      const TargetRegisterInfo *TRI,
+                                      const TargetInstrInfo *TII,
+                                      unsigned &PhysReg, int &Cost) {
+  if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
+    return;
+
+  unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
+  if (TargetRegisterInfo::isVirtualRegister(Reg))
+    return;
+
+  unsigned ResNo = User->getOperand(2).getResNo();
+  if (Def->isMachineOpcode()) {
+    const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
+    if (ResNo >= II.getNumDefs() &&
+        II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
+      PhysReg = Reg;
+      const TargetRegisterClass *RC =
+        TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo));
+      Cost = RC->getCopyCost();
+    }
+  }
+}
+
+static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
+  SmallVector<EVT, 4> VTs;
+  SDNode *GlueDestNode = Glue.getNode();
+
+  // Don't add glue from a node to itself.
+  if (GlueDestNode == N) return;
+
+  // Don't add glue to something which already has glue.
+  if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return;
+
+  for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
+    VTs.push_back(N->getValueType(I));
+
+  if (AddGlue)
+    VTs.push_back(MVT::Glue);
+
+  SmallVector<SDValue, 4> Ops;
+  for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
+    Ops.push_back(N->getOperand(I));
+
+  if (GlueDestNode)
+    Ops.push_back(Glue);
+
+  SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
+  MachineSDNode::mmo_iterator Begin = 0, End = 0;
+  MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
+
+  // Store memory references.
+  if (MN) {
+    Begin = MN->memoperands_begin();
+    End = MN->memoperands_end();
+  }
+
+  DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size());
+
+  // Reset the memory references
+  if (MN)
+    MN->setMemRefs(Begin, End);
+}
+
+/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
+/// This function finds loads of the same base and different offsets. If the
+/// offsets are not far apart (target specific), it add MVT::Glue inputs and
+/// outputs to ensure they are scheduled together and in order. This
+/// optimization may benefit some targets by improving cache locality.
+void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
+  SDNode *Chain = 0;
+  unsigned NumOps = Node->getNumOperands();
+  if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+    Chain = Node->getOperand(NumOps-1).getNode();
+  if (!Chain)
+    return;
+
+  // Look for other loads of the same chain. Find loads that are loading from
+  // the same base pointer and different offsets.
+  SmallPtrSet<SDNode*, 16> Visited;
+  SmallVector<int64_t, 4> Offsets;
+  DenseMap<long long, SDNode*> O2SMap;  // Map from offset to SDNode.
+  bool Cluster = false;
+  SDNode *Base = Node;
+  for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
+       I != E; ++I) {
+    SDNode *User = *I;
+    if (User == Node || !Visited.insert(User))
+      continue;
+    int64_t Offset1, Offset2;
+    if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
+        Offset1 == Offset2)
+      // FIXME: Should be ok if they addresses are identical. But earlier
+      // optimizations really should have eliminated one of the loads.
+      continue;
+    if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
+      Offsets.push_back(Offset1);
+    O2SMap.insert(std::make_pair(Offset2, User));
+    Offsets.push_back(Offset2);
+    if (Offset2 < Offset1)
+      Base = User;
+    Cluster = true;
+  }
+
+  if (!Cluster)
+    return;
+
+  // Sort them in increasing order.
+  std::sort(Offsets.begin(), Offsets.end());
+
+  // Check if the loads are close enough.
+  SmallVector<SDNode*, 4> Loads;
+  unsigned NumLoads = 0;
+  int64_t BaseOff = Offsets[0];
+  SDNode *BaseLoad = O2SMap[BaseOff];
+  Loads.push_back(BaseLoad);
+  for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
+    int64_t Offset = Offsets[i];
+    SDNode *Load = O2SMap[Offset];
+    if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
+      break; // Stop right here. Ignore loads that are further away.
+    Loads.push_back(Load);
+    ++NumLoads;
+  }
+
+  if (NumLoads == 0)
+    return;
+
+  // Cluster loads by adding MVT::Glue outputs and inputs. This also
+  // ensure they are scheduled in order of increasing addresses.
+  SDNode *Lead = Loads[0];
+  AddGlue(Lead, SDValue(0, 0), true, DAG);
+
+  SDValue InGlue = SDValue(Lead, Lead->getNumValues() - 1);
+  for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
+    bool OutGlue = I < E - 1;
+    SDNode *Load = Loads[I];
+
+    AddGlue(Load, InGlue, OutGlue, DAG);
+
+    if (OutGlue)
+      InGlue = SDValue(Load, Load->getNumValues() - 1);
+
+    ++LoadsClustered;
+  }
+}
+
+/// ClusterNodes - Cluster certain nodes which should be scheduled together.
+///
+void ScheduleDAGSDNodes::ClusterNodes() {
+  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+       E = DAG->allnodes_end(); NI != E; ++NI) {
+    SDNode *Node = &*NI;
+    if (!Node || !Node->isMachineOpcode())
+      continue;
+
+    unsigned Opc = Node->getMachineOpcode();
+    const TargetInstrDesc &TID = TII->get(Opc);
+    if (TID.mayLoad())
+      // Cluster loads from "near" addresses into combined SUnits.
+      ClusterNeighboringLoads(Node);
+  }
+}
+
+void ScheduleDAGSDNodes::BuildSchedUnits() {
+  // During scheduling, the NodeId field of SDNode is used to map SDNodes
+  // to their associated SUnits by holding SUnits table indices. A value
+  // of -1 means the SDNode does not yet have an associated SUnit.
+  unsigned NumNodes = 0;
+  for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+       E = DAG->allnodes_end(); NI != E; ++NI) {
+    NI->setNodeId(-1);
+    ++NumNodes;
+  }
+
+  // Reserve entries in the vector for each of the SUnits we are creating.  This
+  // ensure that reallocation of the vector won't happen, so SUnit*'s won't get
+  // invalidated.
+  // FIXME: Multiply by 2 because we may clone nodes during scheduling.
+  // This is a temporary workaround.
+  SUnits.reserve(NumNodes * 2);
+
+  // Add all nodes in depth first order.
+  SmallVector<SDNode*, 64> Worklist;
+  SmallPtrSet<SDNode*, 64> Visited;
+  Worklist.push_back(DAG->getRoot().getNode());
+  Visited.insert(DAG->getRoot().getNode());
+
+  while (!Worklist.empty()) {
+    SDNode *NI = Worklist.pop_back_val();
+
+    // Add all operands to the worklist unless they've already been added.
+    for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
+      if (Visited.insert(NI->getOperand(i).getNode()))
+        Worklist.push_back(NI->getOperand(i).getNode());
+
+    if (isPassiveNode(NI))  // Leaf node, e.g. a TargetImmediate.
+      continue;
+
+    // If this node has already been processed, stop now.
+    if (NI->getNodeId() != -1) continue;
+
+    SUnit *NodeSUnit = NewSUnit(NI);
+
+    // See if anything is glued to this node, if so, add them to glued
+    // nodes.  Nodes can have at most one glue input and one glue output.  Glue
+    // is required to be the last operand and result of a node.
+
+    // Scan up to find glued preds.
+    SDNode *N = NI;
+    while (N->getNumOperands() &&
+           N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
+      N = N->getOperand(N->getNumOperands()-1).getNode();
+      assert(N->getNodeId() == -1 && "Node already inserted!");
+      N->setNodeId(NodeSUnit->NodeNum);
+      if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+        NodeSUnit->isCall = true;
+    }
+
+    // Scan down to find any glued succs.
+    N = NI;
+    while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
+      SDValue GlueVal(N, N->getNumValues()-1);
+
+      // There are either zero or one users of the Glue result.
+      bool HasGlueUse = false;
+      for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+           UI != E; ++UI)
+        if (GlueVal.isOperandOf(*UI)) {
+          HasGlueUse = true;
+          assert(N->getNodeId() == -1 && "Node already inserted!");
+          N->setNodeId(NodeSUnit->NodeNum);
+          N = *UI;
+          if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+            NodeSUnit->isCall = true;
+          break;
+        }
+      if (!HasGlueUse) break;
+    }
+
+    // If there are glue operands involved, N is now the bottom-most node
+    // of the sequence of nodes that are glued together.
+    // Update the SUnit.
+    NodeSUnit->setNode(N);
+    assert(N->getNodeId() == -1 && "Node already inserted!");
+    N->setNodeId(NodeSUnit->NodeNum);
+
+    // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
+    InitNumRegDefsLeft(NodeSUnit);
+
+    // Assign the Latency field of NodeSUnit using target-provided information.
+    ComputeLatency(NodeSUnit);
+  }
+}
+
+void ScheduleDAGSDNodes::AddSchedEdges() {
+  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
+
+  // Check to see if the scheduler cares about latencies.
+  bool UnitLatencies = ForceUnitLatencies();
+
+  // Pass 2: add the preds, succs, etc.
+  for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
+    SUnit *SU = &SUnits[su];
+    SDNode *MainNode = SU->getNode();
+
+    if (MainNode->isMachineOpcode()) {
+      unsigned Opc = MainNode->getMachineOpcode();
+      const TargetInstrDesc &TID = TII->get(Opc);
+      for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
+        if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+          SU->isTwoAddress = true;
+          break;
+        }
+      }
+      if (TID.isCommutable())
+        SU->isCommutable = true;
+    }
+
+    // Find all predecessors and successors of the group.
+    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
+      if (N->isMachineOpcode() &&
+          TII->get(N->getMachineOpcode()).getImplicitDefs()) {
+        SU->hasPhysRegClobbers = true;
+        unsigned NumUsed = InstrEmitter::CountResults(N);
+        while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1))
+          --NumUsed;    // Skip over unused values at the end.
+        if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
+          SU->hasPhysRegDefs = true;
+      }
+
+      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+        SDNode *OpN = N->getOperand(i).getNode();
+        if (isPassiveNode(OpN)) continue;   // Not scheduled.
+        SUnit *OpSU = &SUnits[OpN->getNodeId()];
+        assert(OpSU && "Node has no SUnit!");
+        if (OpSU == SU) continue;           // In the same group.
+
+        EVT OpVT = N->getOperand(i).getValueType();
+        assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
+        bool isChain = OpVT == MVT::Other;
+
+        unsigned PhysReg = 0;
+        int Cost = 1;
+        // Determine if this is a physical register dependency.
+        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
+        assert((PhysReg == 0 || !isChain) &&
+               "Chain dependence via physreg data?");
+        // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
+        // emits a copy from the physical register to a virtual register unless
+        // it requires a cross class copy (cost < 0). That means we are only
+        // treating "expensive to copy" register dependency as physical register
+        // dependency. This may change in the future though.
+        if (Cost >= 0)
+          PhysReg = 0;
+
+        // If this is a ctrl dep, latency is 1.
+        unsigned OpLatency = isChain ? 1 : OpSU->Latency;
+        const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
+                               OpLatency, PhysReg);
+        if (!isChain && !UnitLatencies) {
+          ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
+          ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
+        }
+
+        if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) {
+          // Multiple register uses are combined in the same SUnit. For example,
+          // we could have a set of glued nodes with all their defs consumed by
+          // another set of glued nodes. Register pressure tracking sees this as
+          // a single use, so to keep pressure balanced we reduce the defs.
+          --OpSU->NumRegDefsLeft;
+        }
+      }
+    }
+  }
+}
+
+/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+/// are input.  This SUnit graph is similar to the SelectionDAG, but
+/// excludes nodes that aren't interesting to scheduling, and represents
+/// glued together nodes with a single SUnit.
+void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
+  // Cluster certain nodes which should be scheduled together.
+  ClusterNodes();
+  // Populate the SUnits array.
+  BuildSchedUnits();
+  // Compute all the scheduling dependencies between nodes.
+  AddSchedEdges();
+}
+
+// Initialize NumNodeDefs for the current Node's opcode.
+void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+  // Check for phys reg copy.
+  if (!Node)
+    return;
+
+  if (!Node->isMachineOpcode()) {
+    if (Node->getOpcode() == ISD::CopyFromReg)
+      NodeNumDefs = 1;
+    else
+      NodeNumDefs = 0;
+    return;
+  }
+  unsigned POpc = Node->getMachineOpcode();
+  if (POpc == TargetOpcode::IMPLICIT_DEF) {
+    // No register need be allocated for this.
+    NodeNumDefs = 0;
+    return;
+  }
+  unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
+  // Some instructions define regs that are not represented in the selection DAG
+  // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
+  NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
+  DefIdx = 0;
+}
+
+// Construct a RegDefIter for this SUnit and find the first valid value.
+ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
+                                           const ScheduleDAGSDNodes *SD)
+  : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
+  InitNodeNumDefs();
+  Advance();
+}
+
+// Advance to the next valid value defined by the SUnit.
+void ScheduleDAGSDNodes::RegDefIter::Advance() {
+  for (;Node;) { // Visit all glued nodes.
+    for (;DefIdx < NodeNumDefs; ++DefIdx) {
+      if (!Node->hasAnyUseOfValue(DefIdx))
+        continue;
+      if (Node->isMachineOpcode() &&
+          Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) {
+        // Propagate the incoming (full-register) type. I doubt it's needed.
+        ValueType = Node->getOperand(0).getValueType();
+      }
+      else {
+        ValueType = Node->getValueType(DefIdx);
+      }
+      ++DefIdx;
+      return; // Found a normal regdef.
+    }
+    Node = Node->getGluedNode();
+    if (Node == NULL) {
+      return; // No values left to visit.
+    }
+    InitNodeNumDefs();
+  }
+}
+
+void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
+  assert(SU->NumRegDefsLeft == 0 && "expect a new node");
+  for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
+    assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
+    ++SU->NumRegDefsLeft;
+  }
+}
+
+void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+  // Check to see if the scheduler cares about latencies.
+  if (ForceUnitLatencies()) {
+    SU->Latency = 1;
+    return;
+  }
+
+  if (!InstrItins || InstrItins->isEmpty()) {
+    SDNode *N = SU->getNode();
+    if (N && N->isMachineOpcode() &&
+        TII->isHighLatencyDef(N->getMachineOpcode()))
+      SU->Latency = HighLatencyCycles;
+    else
+      SU->Latency = 1;
+    return;
+  }
+
+  // Compute the latency for the node.  We use the sum of the latencies for
+  // all nodes glued together into this SUnit.
+  SU->Latency = 0;
+  for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+    if (N->isMachineOpcode())
+      SU->Latency += TII->getInstrLatency(InstrItins, N);
+}
+
+void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
+                                               unsigned OpIdx, SDep& dep) const{
+  // Check to see if the scheduler cares about latencies.
+  if (ForceUnitLatencies())
+    return;
+
+  if (dep.getKind() != SDep::Data)
+    return;
+
+  unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
+  if (Use->isMachineOpcode())
+    // Adjust the use operand index by num of defs.
+    OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
+  int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+  if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+      !BB->succ_empty()) {
+    unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      // This copy is a liveout value. It is likely coalesced, so reduce the
+      // latency so not to penalize the def.
+      // FIXME: need target specific adjustment here?
+      Latency = (Latency > 1) ? Latency - 1 : 1;
+  }
+  if (Latency >= 0)
+    dep.setLatency(Latency);
+}
+
+void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
+  if (!SU->getNode()) {
+    dbgs() << "PHYS REG COPY\n";
+    return;
+  }
+
+  SU->getNode()->dump(DAG);
+  dbgs() << "\n";
+  SmallVector<SDNode *, 4> GluedNodes;
+  for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+    GluedNodes.push_back(N);
+  while (!GluedNodes.empty()) {
+    dbgs() << "    ";
+    GluedNodes.back()->dump(DAG);
+    dbgs() << "\n";
+    GluedNodes.pop_back();
+  }
+}
+
+namespace {
+  struct OrderSorter {
+    bool operator()(const std::pair<unsigned, MachineInstr*> &A,
+                    const std::pair<unsigned, MachineInstr*> &B) {
+      return A.first < B.first;
+    }
+  };
+}
+
+/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node.
+static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
+                               InstrEmitter &Emitter,
+                    SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+                            DenseMap<SDValue, unsigned> &VRBaseMap,
+                            unsigned Order) {
+  if (!N->getHasDebugValue())
+    return;
+
+  // Opportunistically insert immediate dbg_value uses, i.e. those with source
+  // order number right after the N.
+  MachineBasicBlock *BB = Emitter.getBlock();
+  MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
+  SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N);
+  for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
+    if (DVs[i]->isInvalidated())
+      continue;
+    unsigned DVOrder = DVs[i]->getOrder();
+    if (!Order || DVOrder == ++Order) {
+      MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
+      if (DbgMI) {
+        Orders.push_back(std::make_pair(DVOrder, DbgMI));
+        BB->insert(InsertPos, DbgMI);
+      }
+      DVs[i]->setIsInvalidated();
+    }
+  }
+}
+
+// ProcessSourceNode - Process nodes with source order numbers. These are added
+// to a vector which EmitSchedule uses to determine how to insert dbg_value
+// instructions in the right order.
+static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
+                           InstrEmitter &Emitter,
+                           DenseMap<SDValue, unsigned> &VRBaseMap,
+                    SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+                           SmallSet<unsigned, 8> &Seen) {
+  unsigned Order = DAG->GetOrdering(N);
+  if (!Order || !Seen.insert(Order)) {
+    // Process any valid SDDbgValues even if node does not have any order
+    // assigned.
+    ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
+    return;
+  }
+
+  MachineBasicBlock *BB = Emitter.getBlock();
+  if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
+    // Did not insert any instruction.
+    Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
+    return;
+  }
+
+  Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+  ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
+}
+
+
+/// EmitSchedule - Emit the machine code in scheduled order.
+MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
+  InstrEmitter Emitter(BB, InsertPos);
+  DenseMap<SDValue, unsigned> VRBaseMap;
+  DenseMap<SUnit*, unsigned> CopyVRBaseMap;
+  SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
+  SmallSet<unsigned, 8> Seen;
+  bool HasDbg = DAG->hasDebugValues();
+
+  // If this is the first BB, emit byval parameter dbg_value's.
+  if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
+    SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
+    SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd();
+    for (; PDI != PDE; ++PDI) {
+      MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
+      if (DbgMI)
+        BB->insert(InsertPos, DbgMI);
+    }
+  }
+
+  for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
+    SUnit *SU = Sequence[i];
+    if (!SU) {
+      // Null SUnit* is a noop.
+      EmitNoop();
+      continue;
+    }
+
+    // For pre-regalloc scheduling, create instructions corresponding to the
+    // SDNode and any glued SDNodes and append them to the block.
+    if (!SU->getNode()) {
+      // Emit a copy.
+      EmitPhysRegCopy(SU, CopyVRBaseMap);
+      continue;
+    }
+
+    SmallVector<SDNode *, 4> GluedNodes;
+    for (SDNode *N = SU->getNode()->getGluedNode(); N;
+         N = N->getGluedNode())
+      GluedNodes.push_back(N);
+    while (!GluedNodes.empty()) {
+      SDNode *N = GluedNodes.back();
+      Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
+                       VRBaseMap);
+      // Remember the source order of the inserted instruction.
+      if (HasDbg)
+        ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
+      GluedNodes.pop_back();
+    }
+    Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
+                     VRBaseMap);
+    // Remember the source order of the inserted instruction.
+    if (HasDbg)
+      ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
+                        Seen);
+  }
+
+  // Insert all the dbg_values which have not already been inserted in source
+  // order sequence.
+  if (HasDbg) {
+    MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
+
+    // Sort the source order instructions and use the order to insert debug
+    // values.
+    std::sort(Orders.begin(), Orders.end(), OrderSorter());
+
+    SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
+    SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
+    // Now emit the rest according to source order.
+    unsigned LastOrder = 0;
+    for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
+      unsigned Order = Orders[i].first;
+      MachineInstr *MI = Orders[i].second;
+      // Insert all SDDbgValue's whose order(s) are before "Order".
+      if (!MI)
+        continue;
+      for (; DI != DE &&
+             (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
+        if ((*DI)->isInvalidated())
+          continue;
+        MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
+        if (DbgMI) {
+          if (!LastOrder)
+            // Insert to start of the BB (after PHIs).
+            BB->insert(BBBegin, DbgMI);
+          else {
+            // Insert at the instruction, which may be in a different
+            // block, if the block was split by a custom inserter.
+            MachineBasicBlock::iterator Pos = MI;
+            MI->getParent()->insert(llvm::next(Pos), DbgMI);
+          }
+        }
+      }
+      LastOrder = Order;
+    }
+    // Add trailing DbgValue's before the terminator. FIXME: May want to add
+    // some of them before one or more conditional branches?
+    while (DI != DE) {
+      MachineBasicBlock *InsertBB = Emitter.getBlock();
+      MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator();
+      if (!(*DI)->isInvalidated()) {
+        MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, VRBaseMap);
+        if (DbgMI)
+          InsertBB->insert(Pos, DbgMI);
+      }
+      ++DI;
+    }
+  }
+
+  BB = Emitter.getBlock();
+  InsertPos = Emitter.getInsertPos();
+  return BB;
+}
diff --git a/final/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/final/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
new file mode 100644
index 00000000000..cc7310e4ca4
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -0,0 +1,151 @@
+//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ScheduleDAGSDNodes class, which implements
+// scheduling for an SDNode-based dependency graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCHEDULEDAGSDNODES_H
+#define SCHEDULEDAGSDNODES_H
+
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+
+namespace llvm {
+  /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
+  ///
+  /// Edges between SUnits are initially based on edges in the SelectionDAG,
+  /// and additional edges can be added by the schedulers as heuristics.
+  /// SDNodes such as Constants, Registers, and a few others that are not
+  /// interesting to schedulers are not allocated SUnits.
+  ///
+  /// SDNodes with MVT::Glue operands are grouped along with the flagged
+  /// nodes into a single SUnit so that they are scheduled together.
+  ///
+  /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
+  /// edges.  Physical register dependence information is not carried in
+  /// the DAG and must be handled explicitly by schedulers.
+  ///
+  class ScheduleDAGSDNodes : public ScheduleDAG {
+  public:
+    SelectionDAG *DAG;                    // DAG of the current basic block
+    const InstrItineraryData *InstrItins;
+
+    explicit ScheduleDAGSDNodes(MachineFunction &mf);
+
+    virtual ~ScheduleDAGSDNodes() {}
+
+    /// Run - perform scheduling.
+    ///
+    void Run(SelectionDAG *dag, MachineBasicBlock *bb,
+             MachineBasicBlock::iterator insertPos);
+
+    /// isPassiveNode - Return true if the node is a non-scheduled leaf.
+    ///
+    static bool isPassiveNode(SDNode *Node) {
+      if (isa<ConstantSDNode>(Node))       return true;
+      if (isa<ConstantFPSDNode>(Node))     return true;
+      if (isa<RegisterSDNode>(Node))       return true;
+      if (isa<GlobalAddressSDNode>(Node))  return true;
+      if (isa<BasicBlockSDNode>(Node))     return true;
+      if (isa<FrameIndexSDNode>(Node))     return true;
+      if (isa<ConstantPoolSDNode>(Node))   return true;
+      if (isa<JumpTableSDNode>(Node))      return true;
+      if (isa<ExternalSymbolSDNode>(Node)) return true;
+      if (isa<BlockAddressSDNode>(Node))   return true;
+      if (Node->getOpcode() == ISD::EntryToken ||
+          isa<MDNodeSDNode>(Node)) return true;
+      return false;
+    }
+
+    /// NewSUnit - Creates a new SUnit and return a ptr to it.
+    ///
+    SUnit *NewSUnit(SDNode *N);
+
+    /// Clone - Creates a clone of the specified SUnit. It does not copy the
+    /// predecessors / successors info nor the temporary scheduling states.
+    ///
+    SUnit *Clone(SUnit *N);
+
+    /// BuildSchedGraph - Build the SUnit graph from the selection dag that we
+    /// are input.  This SUnit graph is similar to the SelectionDAG, but
+    /// excludes nodes that aren't interesting to scheduling, and represents
+    /// flagged together nodes with a single SUnit.
+    virtual void BuildSchedGraph(AliasAnalysis *AA);
+
+    /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
+    ///
+    void InitNumRegDefsLeft(SUnit *SU);
+
+    /// ComputeLatency - Compute node latency.
+    ///
+    virtual void ComputeLatency(SUnit *SU);
+
+    /// ComputeOperandLatency - Override dependence edge latency using
+    /// operand use/def information
+    ///
+    virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+                                       SDep& dep) const { }
+
+    virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use,
+                                       unsigned OpIdx, SDep& dep) const;
+
+    virtual MachineBasicBlock *EmitSchedule();
+
+    /// Schedule - Order nodes according to selected style, filling
+    /// in the Sequence member.
+    ///
+    virtual void Schedule() = 0;
+
+    virtual void dumpNode(const SUnit *SU) const;
+
+    virtual std::string getGraphNodeLabel(const SUnit *SU) const;
+
+    virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+
+    /// RegDefIter - In place iteration over the values defined by an
+    /// SUnit. This does not need copies of the iterator or any other STLisms.
+    /// The iterator creates itself, rather than being provided by the SchedDAG.
+    class RegDefIter {
+      const ScheduleDAGSDNodes *SchedDAG;
+      const SDNode *Node;
+      unsigned DefIdx;
+      unsigned NodeNumDefs;
+      EVT ValueType;
+    public:
+      RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
+
+      bool IsValid() const { return Node != NULL; }
+
+      EVT GetValue() const {
+        assert(IsValid() && "bad iterator");
+        return ValueType;
+      }
+
+      void Advance();
+    private:
+      void InitNodeNumDefs();
+    };
+
+  private:
+    /// ClusterNeighboringLoads - Cluster loads from "near" addresses into
+    /// combined SUnits.
+    void ClusterNeighboringLoads(SDNode *Node);
+    /// ClusterNodes - Cluster certain nodes which should be scheduled together.
+    ///
+    void ClusterNodes();
+
+    /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
+    void BuildSchedUnits();
+    void AddSchedEdges();
+  };
+}
+
+#endif
diff --git a/final/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/final/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
new file mode 100644
index 00000000000..8deaae935a6
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -0,0 +1,6623 @@
+//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "SDNodeOrdering.h"
+#include "SDNodeDbgValue.h"
+#include "llvm/Constants.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+/// makeVTList - Return an instance of the SDVTList struct initialized with the
+/// specified members.
+static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
+  SDVTList Res = {VTs, NumVTs};
+  return Res;
+}
+
+static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unknown FP format");
+  case MVT::f32:     return &APFloat::IEEEsingle;
+  case MVT::f64:     return &APFloat::IEEEdouble;
+  case MVT::f80:     return &APFloat::x87DoubleExtended;
+  case MVT::f128:    return &APFloat::IEEEquad;
+  case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
+  }
+}
+
+SelectionDAG::DAGUpdateListener::~DAGUpdateListener() {}
+
+//===----------------------------------------------------------------------===//
+//                              ConstantFPSDNode Class
+//===----------------------------------------------------------------------===//
+
+/// isExactlyValue - We don't rely on operator== working on double values, as
+/// it returns true for things that are clearly not equal, like -0.0 and 0.0.
+/// As such, this method can be used to do an exact bit-for-bit comparison of
+/// two floating point values.
+bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const {
+  return getValueAPF().bitwiseIsEqual(V);
+}
+
+bool ConstantFPSDNode::isValueValidForType(EVT VT,
+                                           const APFloat& Val) {
+  assert(VT.isFloatingPoint() && "Can only convert between FP types");
+
+  // PPC long double cannot be converted to any other type.
+  if (VT == MVT::ppcf128 ||
+      &Val.getSemantics() == &APFloat::PPCDoubleDouble)
+    return false;
+
+  // convert modifies in place, so make a copy.
+  APFloat Val2 = APFloat(Val);
+  bool losesInfo;
+  (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+                      &losesInfo);
+  return !losesInfo;
+}
+
+//===----------------------------------------------------------------------===//
+//                              ISD Namespace
+//===----------------------------------------------------------------------===//
+
+/// isBuildVectorAllOnes - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are ~0 or undef.
+bool ISD::isBuildVectorAllOnes(const SDNode *N) {
+  // Look through a bit convert.
+  if (N->getOpcode() == ISD::BITCAST)
+    N = N->getOperand(0).getNode();
+
+  if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+  unsigned i = 0, e = N->getNumOperands();
+
+  // Skip over all of the undef values.
+  while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+    ++i;
+
+  // Do not accept an all-undef vector.
+  if (i == e) return false;
+
+  // Do not accept build_vectors that aren't all constants or which have non-~0
+  // elements.
+  SDValue NotZero = N->getOperand(i);
+  if (isa<ConstantSDNode>(NotZero)) {
+    if (!cast<ConstantSDNode>(NotZero)->isAllOnesValue())
+      return false;
+  } else if (isa<ConstantFPSDNode>(NotZero)) {
+    if (!cast<ConstantFPSDNode>(NotZero)->getValueAPF().
+                bitcastToAPInt().isAllOnesValue())
+      return false;
+  } else
+    return false;
+
+  // Okay, we have at least one ~0 value, check to see if the rest match or are
+  // undefs.
+  for (++i; i != e; ++i)
+    if (N->getOperand(i) != NotZero &&
+        N->getOperand(i).getOpcode() != ISD::UNDEF)
+      return false;
+  return true;
+}
+
+
+/// isBuildVectorAllZeros - Return true if the specified node is a
+/// BUILD_VECTOR where all of the elements are 0 or undef.
+bool ISD::isBuildVectorAllZeros(const SDNode *N) {
+  // Look through a bit convert.
+  if (N->getOpcode() == ISD::BITCAST)
+    N = N->getOperand(0).getNode();
+
+  if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
+
+  unsigned i = 0, e = N->getNumOperands();
+
+  // Skip over all of the undef values.
+  while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF)
+    ++i;
+
+  // Do not accept an all-undef vector.
+  if (i == e) return false;
+
+  // Do not accept build_vectors that aren't all constants or which have non-0
+  // elements.
+  SDValue Zero = N->getOperand(i);
+  if (isa<ConstantSDNode>(Zero)) {
+    if (!cast<ConstantSDNode>(Zero)->isNullValue())
+      return false;
+  } else if (isa<ConstantFPSDNode>(Zero)) {
+    if (!cast<ConstantFPSDNode>(Zero)->getValueAPF().isPosZero())
+      return false;
+  } else
+    return false;
+
+  // Okay, we have at least one 0 value, check to see if the rest match or are
+  // undefs.
+  for (++i; i != e; ++i)
+    if (N->getOperand(i) != Zero &&
+        N->getOperand(i).getOpcode() != ISD::UNDEF)
+      return false;
+  return true;
+}
+
+/// isScalarToVector - Return true if the specified node is a
+/// ISD::SCALAR_TO_VECTOR node or a BUILD_VECTOR node where only the low
+/// element is not an undef.
+bool ISD::isScalarToVector(const SDNode *N) {
+  if (N->getOpcode() == ISD::SCALAR_TO_VECTOR)
+    return true;
+
+  if (N->getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+  if (N->getOperand(0).getOpcode() == ISD::UNDEF)
+    return false;
+  unsigned NumElems = N->getNumOperands();
+  if (NumElems == 1)
+    return false;
+  for (unsigned i = 1; i < NumElems; ++i) {
+    SDValue V = N->getOperand(i);
+    if (V.getOpcode() != ISD::UNDEF)
+      return false;
+  }
+  return true;
+}
+
+/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X)
+/// when given the operation for (X op Y).
+ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) {
+  // To perform this operation, we just need to swap the L and G bits of the
+  // operation.
+  unsigned OldL = (Operation >> 2) & 1;
+  unsigned OldG = (Operation >> 1) & 1;
+  return ISD::CondCode((Operation & ~6) |  // Keep the N, U, E bits
+                       (OldL << 1) |       // New G bit
+                       (OldG << 2));       // New L bit.
+}
+
+/// getSetCCInverse - Return the operation corresponding to !(X op Y), where
+/// 'op' is a valid SetCC operation.
+ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
+  unsigned Operation = Op;
+  if (isInteger)
+    Operation ^= 7;   // Flip L, G, E bits, but not U.
+  else
+    Operation ^= 15;  // Flip all of the condition bits.
+
+  if (Operation > ISD::SETTRUE2)
+    Operation &= ~8;  // Don't let N and U bits get set.
+
+  return ISD::CondCode(Operation);
+}
+
+
+/// isSignedOp - For an integer comparison, return 1 if the comparison is a
+/// signed operation and 2 if the result is an unsigned comparison.  Return zero
+/// if the operation does not depend on the sign of the input (setne and seteq).
+static int isSignedOp(ISD::CondCode Opcode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Illegal integer setcc operation!");
+  case ISD::SETEQ:
+  case ISD::SETNE: return 0;
+  case ISD::SETLT:
+  case ISD::SETLE:
+  case ISD::SETGT:
+  case ISD::SETGE: return 1;
+  case ISD::SETULT:
+  case ISD::SETULE:
+  case ISD::SETUGT:
+  case ISD::SETUGE: return 2;
+  }
+}
+
+/// getSetCCOrOperation - Return the result of a logical OR between different
+/// comparisons of identical values: ((X op1 Y) | (X op2 Y)).  This function
+/// returns SETCC_INVALID if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+                                       bool isInteger) {
+  if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+    // Cannot fold a signed integer setcc with an unsigned integer setcc.
+    return ISD::SETCC_INVALID;
+
+  unsigned Op = Op1 | Op2;  // Combine all of the condition bits.
+
+  // If the N and U bits get set then the resultant comparison DOES suddenly
+  // care about orderedness, and is true when ordered.
+  if (Op > ISD::SETTRUE2)
+    Op &= ~16;     // Clear the U bit if the N bit is set.
+
+  // Canonicalize illegal integer setcc's.
+  if (isInteger && Op == ISD::SETUNE)  // e.g. SETUGT | SETULT
+    Op = ISD::SETNE;
+
+  return ISD::CondCode(Op);
+}
+
+/// getSetCCAndOperation - Return the result of a logical AND between different
+/// comparisons of identical values: ((X op1 Y) & (X op2 Y)).  This
+/// function returns zero if it is not possible to represent the resultant
+/// comparison.
+ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
+                                        bool isInteger) {
+  if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3)
+    // Cannot fold a signed setcc with an unsigned setcc.
+    return ISD::SETCC_INVALID;
+
+  // Combine all of the condition bits.
+  ISD::CondCode Result = ISD::CondCode(Op1 & Op2);
+
+  // Canonicalize illegal integer setcc's.
+  if (isInteger) {
+    switch (Result) {
+    default: break;
+    case ISD::SETUO : Result = ISD::SETFALSE; break;  // SETUGT & SETULT
+    case ISD::SETOEQ:                                 // SETEQ  & SETU[LG]E
+    case ISD::SETUEQ: Result = ISD::SETEQ   ; break;  // SETUGE & SETULE
+    case ISD::SETOLT: Result = ISD::SETULT  ; break;  // SETULT & SETNE
+    case ISD::SETOGT: Result = ISD::SETUGT  ; break;  // SETUGT & SETNE
+    }
+  }
+
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+//                           SDNode Profile Support
+//===----------------------------------------------------------------------===//
+
+/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
+///
+static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC)  {
+  ID.AddInteger(OpC);
+}
+
+/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them
+/// solely with their pointer.
+static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
+  ID.AddPointer(VTList.VTs);
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+                              const SDValue *Ops, unsigned NumOps) {
+  for (; NumOps; --NumOps, ++Ops) {
+    ID.AddPointer(Ops->getNode());
+    ID.AddInteger(Ops->getResNo());
+  }
+}
+
+/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
+///
+static void AddNodeIDOperands(FoldingSetNodeID &ID,
+                              const SDUse *Ops, unsigned NumOps) {
+  for (; NumOps; --NumOps, ++Ops) {
+    ID.AddPointer(Ops->getNode());
+    ID.AddInteger(Ops->getResNo());
+  }
+}
+
+static void AddNodeIDNode(FoldingSetNodeID &ID,
+                          unsigned short OpC, SDVTList VTList,
+                          const SDValue *OpList, unsigned N) {
+  AddNodeIDOpcode(ID, OpC);
+  AddNodeIDValueTypes(ID, VTList);
+  AddNodeIDOperands(ID, OpList, N);
+}
+
+/// AddNodeIDCustom - If this is an SDNode with special info, add this info to
+/// the NodeID data.
+static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
+  switch (N->getOpcode()) {
+  case ISD::TargetExternalSymbol:
+  case ISD::ExternalSymbol:
+    llvm_unreachable("Should only be used on nodes with operands");
+  default: break;  // Normal nodes don't need extra info.
+  case ISD::TargetConstant:
+  case ISD::Constant:
+    ID.AddPointer(cast<ConstantSDNode>(N)->getConstantIntValue());
+    break;
+  case ISD::TargetConstantFP:
+  case ISD::ConstantFP: {
+    ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
+    break;
+  }
+  case ISD::TargetGlobalAddress:
+  case ISD::GlobalAddress:
+  case ISD::TargetGlobalTLSAddress:
+  case ISD::GlobalTLSAddress: {
+    const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+    ID.AddPointer(GA->getGlobal());
+    ID.AddInteger(GA->getOffset());
+    ID.AddInteger(GA->getTargetFlags());
+    break;
+  }
+  case ISD::BasicBlock:
+    ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock());
+    break;
+  case ISD::Register:
+    ID.AddInteger(cast<RegisterSDNode>(N)->getReg());
+    break;
+
+  case ISD::SRCVALUE:
+    ID.AddPointer(cast<SrcValueSDNode>(N)->getValue());
+    break;
+  case ISD::FrameIndex:
+  case ISD::TargetFrameIndex:
+    ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
+    break;
+  case ISD::JumpTable:
+  case ISD::TargetJumpTable:
+    ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+    ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags());
+    break;
+  case ISD::ConstantPool:
+  case ISD::TargetConstantPool: {
+    const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N);
+    ID.AddInteger(CP->getAlignment());
+    ID.AddInteger(CP->getOffset());
+    if (CP->isMachineConstantPoolEntry())
+      CP->getMachineCPVal()->AddSelectionDAGCSEId(ID);
+    else
+      ID.AddPointer(CP->getConstVal());
+    ID.AddInteger(CP->getTargetFlags());
+    break;
+  }
+  case ISD::LOAD: {
+    const LoadSDNode *LD = cast<LoadSDNode>(N);
+    ID.AddInteger(LD->getMemoryVT().getRawBits());
+    ID.AddInteger(LD->getRawSubclassData());
+    break;
+  }
+  case ISD::STORE: {
+    const StoreSDNode *ST = cast<StoreSDNode>(N);
+    ID.AddInteger(ST->getMemoryVT().getRawBits());
+    ID.AddInteger(ST->getRawSubclassData());
+    break;
+  }
+  case ISD::ATOMIC_CMP_SWAP:
+  case ISD::ATOMIC_SWAP:
+  case ISD::ATOMIC_LOAD_ADD:
+  case ISD::ATOMIC_LOAD_SUB:
+  case ISD::ATOMIC_LOAD_AND:
+  case ISD::ATOMIC_LOAD_OR:
+  case ISD::ATOMIC_LOAD_XOR:
+  case ISD::ATOMIC_LOAD_NAND:
+  case ISD::ATOMIC_LOAD_MIN:
+  case ISD::ATOMIC_LOAD_MAX:
+  case ISD::ATOMIC_LOAD_UMIN:
+  case ISD::ATOMIC_LOAD_UMAX: {
+    const AtomicSDNode *AT = cast<AtomicSDNode>(N);
+    ID.AddInteger(AT->getMemoryVT().getRawBits());
+    ID.AddInteger(AT->getRawSubclassData());
+    break;
+  }
+  case ISD::VECTOR_SHUFFLE: {
+    const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+    for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
+         i != e; ++i)
+      ID.AddInteger(SVN->getMaskElt(i));
+    break;
+  }
+  case ISD::TargetBlockAddress:
+  case ISD::BlockAddress: {
+    ID.AddPointer(cast<BlockAddressSDNode>(N)->getBlockAddress());
+    ID.AddInteger(cast<BlockAddressSDNode>(N)->getTargetFlags());
+    break;
+  }
+  } // end switch (N->getOpcode())
+}
+
+/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID
+/// data.
+static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
+  AddNodeIDOpcode(ID, N->getOpcode());
+  // Add the return value info.
+  AddNodeIDValueTypes(ID, N->getVTList());
+  // Add the operand info.
+  AddNodeIDOperands(ID, N->op_begin(), N->getNumOperands());
+
+  // Handle SDNode leafs with special info.
+  AddNodeIDCustom(ID, N);
+}
+
+/// encodeMemSDNodeFlags - Generic routine for computing a value for use in
+/// the CSE map that carries volatility, temporalness, indexing mode, and
+/// extension/truncation information.
+///
+static inline unsigned
+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
+                     bool isNonTemporal) {
+  assert((ConvType & 3) == ConvType &&
+         "ConvType may not require more than 2 bits!");
+  assert((AM & 7) == AM &&
+         "AM may not require more than 3 bits!");
+  return ConvType |
+         (AM << 2) |
+         (isVolatile << 5) |
+         (isNonTemporal << 6);
+}
+
+//===----------------------------------------------------------------------===//
+//                              SelectionDAG Class
+//===----------------------------------------------------------------------===//
+
+/// doNotCSE - Return true if CSE should not be performed for this node.
+static bool doNotCSE(SDNode *N) {
+  if (N->getValueType(0) == MVT::Glue)
+    return true; // Never CSE anything that produces a flag.
+
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::HANDLENODE:
+  case ISD::EH_LABEL:
+    return true;   // Never CSE these nodes.
+  }
+
+  // Check that remaining values produced are not flags.
+  for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
+    if (N->getValueType(i) == MVT::Glue)
+      return true; // Never CSE anything that produces a flag.
+
+  return false;
+}
+
+/// RemoveDeadNodes - This method deletes all unreachable nodes in the
+/// SelectionDAG.
+void SelectionDAG::RemoveDeadNodes() {
+  // Create a dummy node (which is not added to allnodes), that adds a reference
+  // to the root node, preventing it from being deleted.
+  HandleSDNode Dummy(getRoot());
+
+  SmallVector<SDNode*, 128> DeadNodes;
+
+  // Add all obviously-dead nodes to the DeadNodes worklist.
+  for (allnodes_iterator I = allnodes_begin(), E = allnodes_end(); I != E; ++I)
+    if (I->use_empty())
+      DeadNodes.push_back(I);
+
+  RemoveDeadNodes(DeadNodes);
+
+  // If the root changed (e.g. it was a dead load, update the root).
+  setRoot(Dummy.getValue());
+}
+
+/// RemoveDeadNodes - This method deletes the unreachable nodes in the
+/// given list, and any nodes that become unreachable as a result.
+void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
+                                   DAGUpdateListener *UpdateListener) {
+
+  // Process the worklist, deleting the nodes and adding their uses to the
+  // worklist.
+  while (!DeadNodes.empty()) {
+    SDNode *N = DeadNodes.pop_back_val();
+
+    if (UpdateListener)
+      UpdateListener->NodeDeleted(N, 0);
+
+    // Take the node out of the appropriate CSE map.
+    RemoveNodeFromCSEMaps(N);
+
+    // Next, brutally remove the operand list.  This is safe to do, as there are
+    // no cycles in the graph.
+    for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+      SDUse &Use = *I++;
+      SDNode *Operand = Use.getNode();
+      Use.set(SDValue());
+
+      // Now that we removed this operand, see if there are no uses of it left.
+      if (Operand->use_empty())
+        DeadNodes.push_back(Operand);
+    }
+
+    DeallocateNode(N);
+  }
+}
+
+void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){
+  SmallVector<SDNode*, 16> DeadNodes(1, N);
+  RemoveDeadNodes(DeadNodes, UpdateListener);
+}
+
+void SelectionDAG::DeleteNode(SDNode *N) {
+  // First take this out of the appropriate CSE map.
+  RemoveNodeFromCSEMaps(N);
+
+  // Finally, remove uses due to operands of this node, remove from the
+  // AllNodes list, and delete the node.
+  DeleteNodeNotInCSEMaps(N);
+}
+
+void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
+  assert(N != AllNodes.begin() && "Cannot delete the entry node!");
+  assert(N->use_empty() && "Cannot delete a node that is not dead!");
+
+  // Drop all of the operands and decrement used node's use counts.
+  N->DropOperands();
+
+  DeallocateNode(N);
+}
+
+void SelectionDAG::DeallocateNode(SDNode *N) {
+  if (N->OperandsNeedDelete)
+    delete[] N->OperandList;
+
+  // Set the opcode to DELETED_NODE to help catch bugs when node
+  // memory is reallocated.
+  N->NodeType = ISD::DELETED_NODE;
+
+  NodeAllocator.Deallocate(AllNodes.remove(N));
+
+  // Remove the ordering of this node.
+  Ordering->remove(N);
+
+  // If any of the SDDbgValue nodes refer to this SDNode, invalidate them.
+  SmallVector<SDDbgValue*, 2> &DbgVals = DbgInfo->getSDDbgValues(N);
+  for (unsigned i = 0, e = DbgVals.size(); i != e; ++i)
+    DbgVals[i]->setIsInvalidated();
+}
+
+/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
+/// correspond to it.  This is useful when we're about to delete or repurpose
+/// the node.  We don't want future request for structurally identical nodes
+/// to return N anymore.
+bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
+  bool Erased = false;
+  switch (N->getOpcode()) {
+  case ISD::HANDLENODE: return false;  // noop.
+  case ISD::CONDCODE:
+    assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
+           "Cond code doesn't exist!");
+    Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != 0;
+    CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = 0;
+    break;
+  case ISD::ExternalSymbol:
+    Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+    break;
+  case ISD::TargetExternalSymbol: {
+    ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
+    Erased = TargetExternalSymbols.erase(
+               std::pair<std::string,unsigned char>(ESN->getSymbol(),
+                                                    ESN->getTargetFlags()));
+    break;
+  }
+  case ISD::VALUETYPE: {
+    EVT VT = cast<VTSDNode>(N)->getVT();
+    if (VT.isExtended()) {
+      Erased = ExtendedValueTypeNodes.erase(VT);
+    } else {
+      Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != 0;
+      ValueTypeNodes[VT.getSimpleVT().SimpleTy] = 0;
+    }
+    break;
+  }
+  default:
+    // Remove it from the CSE Map.
+    assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!");
+    assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");
+    Erased = CSEMap.RemoveNode(N);
+    break;
+  }
+#ifndef NDEBUG
+  // Verify that the node was actually in one of the CSE maps, unless it has a
+  // flag result (which cannot be CSE'd) or is one of the special cases that are
+  // not subject to CSE.
+  if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
+      !N->isMachineOpcode() && !doNotCSE(N)) {
+    N->dump(this);
+    dbgs() << "\n";
+    llvm_unreachable("Node is not in map!");
+  }
+#endif
+  return Erased;
+}
+
+/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE
+/// maps and modified in place. Add it back to the CSE maps, unless an identical
+/// node already exists, in which case transfer all its users to the existing
+/// node. This transfer can potentially trigger recursive merging.
+///
+void
+SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N,
+                                       DAGUpdateListener *UpdateListener) {
+  // For node types that aren't CSE'd, just act as if no identical node
+  // already exists.
+  if (!doNotCSE(N)) {
+    SDNode *Existing = CSEMap.GetOrInsertNode(N);
+    if (Existing != N) {
+      // If there was already an existing matching node, use ReplaceAllUsesWith
+      // to replace the dead one with the existing one.  This can cause
+      // recursive merging of other unrelated nodes down the line.
+      ReplaceAllUsesWith(N, Existing, UpdateListener);
+
+      // N is now dead.  Inform the listener if it exists and delete it.
+      if (UpdateListener)
+        UpdateListener->NodeDeleted(N, Existing);
+      DeleteNodeNotInCSEMaps(N);
+      return;
+    }
+  }
+
+  // If the node doesn't already exist, we updated it.  Inform a listener if
+  // it exists.
+  if (UpdateListener)
+    UpdateListener->NodeUpdated(N);
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified.  If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take.  If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
+                                           void *&InsertPos) {
+  if (doNotCSE(N))
+    return 0;
+
+  SDValue Ops[] = { Op };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1);
+  AddNodeIDCustom(ID, N);
+  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  return Node;
+}
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified.  If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take.  If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+                                           SDValue Op1, SDValue Op2,
+                                           void *&InsertPos) {
+  if (doNotCSE(N))
+    return 0;
+
+  SDValue Ops[] = { Op1, Op2 };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2);
+  AddNodeIDCustom(ID, N);
+  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  return Node;
+}
+
+
+/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
+/// were replaced with those specified.  If this node is never memoized,
+/// return null, otherwise return a pointer to the slot it would take.  If a
+/// node already exists with these operands, the slot will be non-null.
+SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
+                                           const SDValue *Ops,unsigned NumOps,
+                                           void *&InsertPos) {
+  if (doNotCSE(N))
+    return 0;
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps);
+  AddNodeIDCustom(ID, N);
+  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  return Node;
+}
+
+#ifndef NDEBUG
+/// VerifyNodeCommon - Sanity check the given node.  Aborts if it is invalid.
+static void VerifyNodeCommon(SDNode *N) {
+  switch (N->getOpcode()) {
+  default:
+    break;
+  case ISD::BUILD_PAIR: {
+    EVT VT = N->getValueType(0);
+    assert(N->getNumValues() == 1 && "Too many results!");
+    assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) &&
+           "Wrong return type!");
+    assert(N->getNumOperands() == 2 && "Wrong number of operands!");
+    assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() &&
+           "Mismatched operand types!");
+    assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() &&
+           "Wrong operand type!");
+    assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() &&
+           "Wrong return type size");
+    break;
+  }
+  case ISD::BUILD_VECTOR: {
+    assert(N->getNumValues() == 1 && "Too many results!");
+    assert(N->getValueType(0).isVector() && "Wrong return type!");
+    assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() &&
+           "Wrong number of operands!");
+    EVT EltVT = N->getValueType(0).getVectorElementType();
+    for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I)
+      assert((I->getValueType() == EltVT ||
+             (EltVT.isInteger() && I->getValueType().isInteger() &&
+              EltVT.bitsLE(I->getValueType()))) &&
+            "Wrong operand type!");
+    break;
+  }
+  }
+}
+
+/// VerifySDNode - Sanity check the given SDNode.  Aborts if it is invalid.
+static void VerifySDNode(SDNode *N) {
+  // The SDNode allocators cannot be used to allocate nodes with fields that are
+  // not present in an SDNode!
+  assert(!isa<MemSDNode>(N) && "Bad MemSDNode!");
+  assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!");
+  assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!");
+  assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!");
+  assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!");
+  assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!");
+  assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!");
+  assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!");
+  assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!");
+  assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!");
+  assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!");
+  assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!");
+  assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!");
+  assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!");
+  assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!");
+  assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!");
+  assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!");
+  assert(!isa<VTSDNode>(N) && "Bad VTSDNode!");
+  assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!");
+
+  VerifyNodeCommon(N);
+}
+
+/// VerifyMachineNode - Sanity check the given MachineNode.  Aborts if it is
+/// invalid.
+static void VerifyMachineNode(SDNode *N) {
+  // The MachineNode allocators cannot be used to allocate nodes with fields
+  // that are not present in a MachineNode!
+  // Currently there are no such nodes.
+
+  VerifyNodeCommon(N);
+}
+#endif // NDEBUG
+
+/// getEVTAlignment - Compute the default alignment value for the
+/// given type.
+///
+unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
+  const Type *Ty = VT == MVT::iPTR ?
+                   PointerType::get(Type::getInt8Ty(*getContext()), 0) :
+                   VT.getTypeForEVT(*getContext());
+
+  return TLI.getTargetData()->getABITypeAlignment(Ty);
+}
+
+// EntryNode could meaningfully have debug info if we can find it...
+SelectionDAG::SelectionDAG(const TargetMachine &tm)
+  : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
+    EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
+    Root(getEntryNode()), Ordering(0) {
+  AllNodes.push_back(&EntryNode);
+  Ordering = new SDNodeOrdering();
+  DbgInfo = new SDDbgInfo();
+}
+
+void SelectionDAG::init(MachineFunction &mf) {
+  MF = &mf;
+  Context = &mf.getFunction()->getContext();
+}
+
+SelectionDAG::~SelectionDAG() {
+  allnodes_clear();
+  delete Ordering;
+  delete DbgInfo;
+}
+
+void SelectionDAG::allnodes_clear() {
+  assert(&*AllNodes.begin() == &EntryNode);
+  AllNodes.remove(AllNodes.begin());
+  while (!AllNodes.empty())
+    DeallocateNode(AllNodes.begin());
+}
+
+void SelectionDAG::clear() {
+  allnodes_clear();
+  OperandAllocator.Reset();
+  CSEMap.clear();
+
+  ExtendedValueTypeNodes.clear();
+  ExternalSymbols.clear();
+  TargetExternalSymbols.clear();
+  std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
+            static_cast<CondCodeSDNode*>(0));
+  std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
+            static_cast<SDNode*>(0));
+
+  EntryNode.UseList = 0;
+  AllNodes.push_back(&EntryNode);
+  Root = getEntryNode();
+  Ordering->clear();
+  DbgInfo->clear();
+}
+
+SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+  return VT.bitsGT(Op.getValueType()) ?
+    getNode(ISD::SIGN_EXTEND, DL, VT, Op) :
+    getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
+  return VT.bitsGT(Op.getValueType()) ?
+    getNode(ISD::ZERO_EXTEND, DL, VT, Op) :
+    getNode(ISD::TRUNCATE, DL, VT, Op);
+}
+
+SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) {
+  assert(!VT.isVector() &&
+         "getZeroExtendInReg should use the vector element type instead of "
+         "the vector type!");
+  if (Op.getValueType() == VT) return Op;
+  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+  APInt Imm = APInt::getLowBitsSet(BitWidth,
+                                   VT.getSizeInBits());
+  return getNode(ISD::AND, DL, Op.getValueType(), Op,
+                 getConstant(Imm, Op.getValueType()));
+}
+
+/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
+///
+SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) {
+  EVT EltVT = VT.getScalarType();
+  SDValue NegOne =
+    getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+  return getNode(ISD::XOR, DL, VT, Val, NegOne);
+}
+
+SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) {
+  EVT EltVT = VT.getScalarType();
+  assert((EltVT.getSizeInBits() >= 64 ||
+         (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
+         "getConstant with a uint64_t value that doesn't fit in the type!");
+  return getConstant(APInt(EltVT.getSizeInBits(), Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) {
+  return getConstant(*ConstantInt::get(*Context, Val), VT, isT);
+}
+
+SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
+  assert(VT.isInteger() && "Cannot create FP integer constant!");
+
+  EVT EltVT = VT.getScalarType();
+  assert(Val.getBitWidth() == EltVT.getSizeInBits() &&
+         "APInt size does not match type size!");
+
+  unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+  ID.AddPointer(&Val);
+  void *IP = 0;
+  SDNode *N = NULL;
+  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+    if (!VT.isVector())
+      return SDValue(N, 0);
+
+  if (!N) {
+    N = new (NodeAllocator) ConstantSDNode(isT, &Val, EltVT);
+    CSEMap.InsertNode(N, IP);
+    AllNodes.push_back(N);
+  }
+
+  SDValue Result(N, 0);
+  if (VT.isVector()) {
+    SmallVector<SDValue, 8> Ops;
+    Ops.assign(VT.getVectorNumElements(), Result);
+    Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size());
+  }
+  return Result;
+}
+
+SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) {
+  return getConstant(Val, TLI.getPointerTy(), isTarget);
+}
+
+
+SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) {
+  return getConstantFP(*ConstantFP::get(*getContext(), V), VT, isTarget);
+}
+
+SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
+  assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
+
+  EVT EltVT = VT.getScalarType();
+
+  // Do the map lookup using the actual bit pattern for the floating point
+  // value, so that we don't have problems with 0.0 comparing equal to -0.0, and
+  // we don't have issues with SNANs.
+  unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(EltVT), 0, 0);
+  ID.AddPointer(&V);
+  void *IP = 0;
+  SDNode *N = NULL;
+  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+    if (!VT.isVector())
+      return SDValue(N, 0);
+
+  if (!N) {
+    N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT);
+    CSEMap.InsertNode(N, IP);
+    AllNodes.push_back(N);
+  }
+
+  SDValue Result(N, 0);
+  if (VT.isVector()) {
+    SmallVector<SDValue, 8> Ops;
+    Ops.assign(VT.getVectorNumElements(), Result);
+    // FIXME DebugLoc info might be appropriate here
+    Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size());
+  }
+  return Result;
+}
+
+SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
+  EVT EltVT = VT.getScalarType();
+  if (EltVT==MVT::f32)
+    return getConstantFP(APFloat((float)Val), VT, isTarget);
+  else if (EltVT==MVT::f64)
+    return getConstantFP(APFloat(Val), VT, isTarget);
+  else if (EltVT==MVT::f80 || EltVT==MVT::f128) {
+    bool ignored;
+    APFloat apf = APFloat(Val);
+    apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
+                &ignored);
+    return getConstantFP(apf, VT, isTarget);
+  } else {
+    assert(0 && "Unsupported type in getConstantFP");
+    return SDValue();
+  }
+}
+
+SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
+                                       EVT VT, int64_t Offset,
+                                       bool isTargetGA,
+                                       unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTargetGA) &&
+         "Cannot set target flags on target-independent globals");
+
+  // Truncate (with sign-extension) the offset value to the pointer size.
+  EVT PTy = TLI.getPointerTy();
+  unsigned BitWidth = PTy.getSizeInBits();
+  if (BitWidth < 64)
+    Offset = (Offset << (64 - BitWidth) >> (64 - BitWidth));
+
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+  if (!GVar) {
+    // If GV is an alias then use the aliasee for determining thread-localness.
+    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+      GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+  }
+
+  unsigned Opc;
+  if (GVar && GVar->isThreadLocal())
+    Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
+  else
+    Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress;
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddPointer(GV);
+  ID.AddInteger(Offset);
+  ID.AddInteger(TargetFlags);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL, GV, VT,
+                                                      Offset, TargetFlags);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
+  unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(FI);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
+                                   unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTarget) &&
+         "Cannot set target flags on target-independent jump tables");
+  unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(JTI);
+  ID.AddInteger(TargetFlags);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget,
+                                                  TargetFlags);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
+                                      unsigned Alignment, int Offset,
+                                      bool isTarget,
+                                      unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTarget) &&
+         "Cannot set target flags on target-independent globals");
+  if (Alignment == 0)
+    Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+  unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(Offset);
+  ID.AddPointer(C);
+  ID.AddInteger(TargetFlags);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
+                                                     Alignment, TargetFlags);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+
+SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
+                                      unsigned Alignment, int Offset,
+                                      bool isTarget,
+                                      unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTarget) &&
+         "Cannot set target flags on target-independent globals");
+  if (Alignment == 0)
+    Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
+  unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddInteger(Alignment);
+  ID.AddInteger(Offset);
+  C->AddSelectionDAGCSEId(ID);
+  ID.AddInteger(TargetFlags);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset,
+                                                     Alignment, TargetFlags);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
+  ID.AddPointer(MBB);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getValueType(EVT VT) {
+  if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >=
+      ValueTypeNodes.size())
+    ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1);
+
+  SDNode *&N = VT.isExtended() ?
+    ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy];
+
+  if (N) return SDValue(N, 0);
+  N = new (NodeAllocator) VTSDNode(VT);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
+  SDNode *&N = ExternalSymbols[Sym];
+  if (N) return SDValue(N, 0);
+  N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
+                                              unsigned char TargetFlags) {
+  SDNode *&N =
+    TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
+                                                               TargetFlags)];
+  if (N) return SDValue(N, 0);
+  N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
+  if ((unsigned)Cond >= CondCodeNodes.size())
+    CondCodeNodes.resize(Cond+1);
+
+  if (CondCodeNodes[Cond] == 0) {
+    CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond);
+    CondCodeNodes[Cond] = N;
+    AllNodes.push_back(N);
+  }
+
+  return SDValue(CondCodeNodes[Cond], 0);
+}
+
+// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in
+// the shuffle mask M that point at N1 to point at N2, and indices that point
+// N2 to point at N1.
+static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) {
+  std::swap(N1, N2);
+  int NElts = M.size();
+  for (int i = 0; i != NElts; ++i) {
+    if (M[i] >= NElts)
+      M[i] -= NElts;
+    else if (M[i] >= 0)
+      M[i] += NElts;
+  }
+}
+
+SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1,
+                                       SDValue N2, const int *Mask) {
+  assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE");
+  assert(VT.isVector() && N1.getValueType().isVector() &&
+         "Vector Shuffle VTs must be a vectors");
+  assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType()
+         && "Vector Shuffle VTs must have same element type");
+
+  // Canonicalize shuffle undef, undef -> undef
+  if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF)
+    return getUNDEF(VT);
+
+  // Validate that all indices in Mask are within the range of the elements
+  // input to the shuffle.
+  unsigned NElts = VT.getVectorNumElements();
+  SmallVector<int, 8> MaskVec;
+  for (unsigned i = 0; i != NElts; ++i) {
+    assert(Mask[i] < (int)(NElts * 2) && "Index out of range");
+    MaskVec.push_back(Mask[i]);
+  }
+
+  // Canonicalize shuffle v, v -> v, undef
+  if (N1 == N2) {
+    N2 = getUNDEF(VT);
+    for (unsigned i = 0; i != NElts; ++i)
+      if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts;
+  }
+
+  // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
+  if (N1.getOpcode() == ISD::UNDEF)
+    commuteShuffle(N1, N2, MaskVec);
+
+  // Canonicalize all index into lhs, -> shuffle lhs, undef
+  // Canonicalize all index into rhs, -> shuffle rhs, undef
+  bool AllLHS = true, AllRHS = true;
+  bool N2Undef = N2.getOpcode() == ISD::UNDEF;
+  for (unsigned i = 0; i != NElts; ++i) {
+    if (MaskVec[i] >= (int)NElts) {
+      if (N2Undef)
+        MaskVec[i] = -1;
+      else
+        AllLHS = false;
+    } else if (MaskVec[i] >= 0) {
+      AllRHS = false;
+    }
+  }
+  if (AllLHS && AllRHS)
+    return getUNDEF(VT);
+  if (AllLHS && !N2Undef)
+    N2 = getUNDEF(VT);
+  if (AllRHS) {
+    N1 = getUNDEF(VT);
+    commuteShuffle(N1, N2, MaskVec);
+  }
+
+  // If Identity shuffle, or all shuffle in to undef, return that node.
+  bool AllUndef = true;
+  bool Identity = true;
+  for (unsigned i = 0; i != NElts; ++i) {
+    if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false;
+    if (MaskVec[i] >= 0) AllUndef = false;
+  }
+  if (Identity && NElts == N1.getValueType().getVectorNumElements())
+    return N1;
+  if (AllUndef)
+    return getUNDEF(VT);
+
+  FoldingSetNodeID ID;
+  SDValue Ops[2] = { N1, N2 };
+  AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops, 2);
+  for (unsigned i = 0; i != NElts; ++i)
+    ID.AddInteger(MaskVec[i]);
+
+  void* IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  // Allocate the mask array for the node out of the BumpPtrAllocator, since
+  // SDNode doesn't have access to it.  This memory will be "leaked" when
+  // the node is deallocated, but recovered when the NodeAllocator is released.
+  int *MaskAlloc = OperandAllocator.Allocate<int>(NElts);
+  memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int));
+
+  ShuffleVectorSDNode *N =
+    new (NodeAllocator) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl,
+                                       SDValue Val, SDValue DTy,
+                                       SDValue STy, SDValue Rnd, SDValue Sat,
+                                       ISD::CvtCode Code) {
+  // If the src and dest types are the same and the conversion is between
+  // integer types of the same sign or two floats, no conversion is necessary.
+  if (DTy == STy &&
+      (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF))
+    return Val;
+
+  FoldingSetNodeID ID;
+  SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
+  AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5);
+  void* IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl, Ops, 5,
+                                                           Code);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
+  ID.AddInteger(RegNo);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
+  FoldingSetNodeID ID;
+  SDValue Ops[] = { Root };
+  AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1);
+  ID.AddPointer(Label);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+
+SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
+                                      bool isTarget,
+                                      unsigned char TargetFlags) {
+  unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
+  ID.AddPointer(BA);
+  ID.AddInteger(TargetFlags);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getSrcValue(const Value *V) {
+  assert((!V || V->getType()->isPointerTy()) &&
+         "SrcValue is not a pointer?");
+
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), 0, 0);
+  ID.AddPointer(V);
+
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) SrcValueSDNode(V);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+/// getMDNode - Return an MDNodeSDNode which holds an MDNode.
+SDValue SelectionDAG::getMDNode(const MDNode *MD) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0);
+  ID.AddPointer(MD);
+
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+
+/// getShiftAmountOperand - Return the specified value casted to
+/// the target's desired shift amount type.
+SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) {
+  EVT OpTy = Op.getValueType();
+  MVT ShTy = TLI.getShiftAmountTy(LHSTy);
+  if (OpTy == ShTy || OpTy.isVector()) return Op;
+
+  ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ?  ISD::TRUNCATE : ISD::ZERO_EXTEND;
+  return getNode(Opcode, Op.getDebugLoc(), ShTy, Op);
+}
+
+/// CreateStackTemporary - Create a stack temporary, suitable for holding the
+/// specified value type.
+SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
+  MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+  unsigned ByteSize = VT.getStoreSize();
+  const Type *Ty = VT.getTypeForEVT(*getContext());
+  unsigned StackAlign =
+  std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
+
+  int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+  return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+/// CreateStackTemporary - Create a stack temporary suitable for holding
+/// either of the specified value types.
+SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
+  unsigned Bytes = std::max(VT1.getStoreSizeInBits(),
+                            VT2.getStoreSizeInBits())/8;
+  const Type *Ty1 = VT1.getTypeForEVT(*getContext());
+  const Type *Ty2 = VT2.getTypeForEVT(*getContext());
+  const TargetData *TD = TLI.getTargetData();
+  unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1),
+                            TD->getPrefTypeAlignment(Ty2));
+
+  MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+  int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
+  return getFrameIndex(FrameIdx, TLI.getPointerTy());
+}
+
+SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
+                                SDValue N2, ISD::CondCode Cond, DebugLoc dl) {
+  // These setcc operations always fold.
+  switch (Cond) {
+  default: break;
+  case ISD::SETFALSE:
+  case ISD::SETFALSE2: return getConstant(0, VT);
+  case ISD::SETTRUE:
+  case ISD::SETTRUE2:  return getConstant(1, VT);
+
+  case ISD::SETOEQ:
+  case ISD::SETOGT:
+  case ISD::SETOGE:
+  case ISD::SETOLT:
+  case ISD::SETOLE:
+  case ISD::SETONE:
+  case ISD::SETO:
+  case ISD::SETUO:
+  case ISD::SETUEQ:
+  case ISD::SETUNE:
+    assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!");
+    break;
+  }
+
+  if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode())) {
+    const APInt &C2 = N2C->getAPIntValue();
+    if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+      const APInt &C1 = N1C->getAPIntValue();
+
+      switch (Cond) {
+      default: llvm_unreachable("Unknown integer setcc!");
+      case ISD::SETEQ:  return getConstant(C1 == C2, VT);
+      case ISD::SETNE:  return getConstant(C1 != C2, VT);
+      case ISD::SETULT: return getConstant(C1.ult(C2), VT);
+      case ISD::SETUGT: return getConstant(C1.ugt(C2), VT);
+      case ISD::SETULE: return getConstant(C1.ule(C2), VT);
+      case ISD::SETUGE: return getConstant(C1.uge(C2), VT);
+      case ISD::SETLT:  return getConstant(C1.slt(C2), VT);
+      case ISD::SETGT:  return getConstant(C1.sgt(C2), VT);
+      case ISD::SETLE:  return getConstant(C1.sle(C2), VT);
+      case ISD::SETGE:  return getConstant(C1.sge(C2), VT);
+      }
+    }
+  }
+  if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+    if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2.getNode())) {
+      // No compile time operations on this type yet.
+      if (N1C->getValueType(0) == MVT::ppcf128)
+        return SDValue();
+
+      APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
+      switch (Cond) {
+      default: break;
+      case ISD::SETEQ:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, VT);
+      case ISD::SETNE:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
+                                           R==APFloat::cmpLessThan, VT);
+      case ISD::SETLT:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, VT);
+      case ISD::SETGT:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, VT);
+      case ISD::SETLE:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
+                                           R==APFloat::cmpEqual, VT);
+      case ISD::SETGE:  if (R==APFloat::cmpUnordered)
+                          return getUNDEF(VT);
+                        // fall through
+      case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
+                                           R==APFloat::cmpEqual, VT);
+      case ISD::SETO:   return getConstant(R!=APFloat::cmpUnordered, VT);
+      case ISD::SETUO:  return getConstant(R==APFloat::cmpUnordered, VT);
+      case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered ||
+                                           R==APFloat::cmpEqual, VT);
+      case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, VT);
+      case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered ||
+                                           R==APFloat::cmpLessThan, VT);
+      case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan ||
+                                           R==APFloat::cmpUnordered, VT);
+      case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, VT);
+      case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, VT);
+      }
+    } else {
+      // Ensure that the constant occurs on the RHS.
+      return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
+    }
+  }
+
+  // Could not fold it.
+  return SDValue();
+}
+
+/// SignBitIsZero - Return true if the sign bit of Op is known to be zero.  We
+/// use this predicate to simplify operations downstream.
+bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
+  // This predicate is not safe for vector operations.
+  if (Op.getValueType().isVector())
+    return false;
+
+  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+  return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero.  We use
+/// this predicate to simplify operations downstream.  Mask is known to be zero
+/// for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
+                                     unsigned Depth) const {
+  APInt KnownZero, KnownOne;
+  ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+  return (KnownZero & Mask) == Mask;
+}
+
+/// ComputeMaskedBits - Determine which of the bits specified in Mask are
+/// known to be either zero or one and return them in the KnownZero/KnownOne
+/// bitsets.  This code only analyzes bits in Mask, in order to short-circuit
+/// processing.
+void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
+                                     APInt &KnownZero, APInt &KnownOne,
+                                     unsigned Depth) const {
+  unsigned BitWidth = Mask.getBitWidth();
+  assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() &&
+         "Mask size mismatches value type size!");
+
+  KnownZero = KnownOne = APInt(BitWidth, 0);   // Don't know anything.
+  if (Depth == 6 || Mask == 0)
+    return;  // Limit search depth.
+
+  APInt KnownZero2, KnownOne2;
+
+  switch (Op.getOpcode()) {
+  case ISD::Constant:
+    // We know all of the bits for a constant!
+    KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & Mask;
+    KnownZero = ~KnownOne & Mask;
+    return;
+  case ISD::AND:
+    // If either the LHS or the RHS are Zero, the result is zero.
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownZero,
+                      KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    KnownOne &= KnownOne2;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    KnownZero |= KnownZero2;
+    return;
+  case ISD::OR:
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask & ~KnownOne,
+                      KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    KnownZero &= KnownZero2;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    KnownOne |= KnownOne2;
+    return;
+  case ISD::XOR: {
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+    KnownZero = KnownZeroOut;
+    return;
+  }
+  case ISD::MUL: {
+    APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+    ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // If low bits are zero in either operand, output low known-0 bits.
+    // Also compute a conserative estimate for high known-0 bits.
+    // More trickiness is possible, but this is sufficient for the
+    // interesting case of alignment computation.
+    KnownOne.clearAllBits();
+    unsigned TrailZ = KnownZero.countTrailingOnes() +
+                      KnownZero2.countTrailingOnes();
+    unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
+                               KnownZero2.countLeadingOnes(),
+                               BitWidth) - BitWidth;
+
+    TrailZ = std::min(TrailZ, BitWidth);
+    LeadZ = std::min(LeadZ, BitWidth);
+    KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+                APInt::getHighBitsSet(BitWidth, LeadZ);
+    KnownZero &= Mask;
+    return;
+  }
+  case ISD::UDIV: {
+    // For the purposes of computing leading zeros we can conservatively
+    // treat a udiv as a logical right shift by the power of 2 known to
+    // be less than the denominator.
+    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+    ComputeMaskedBits(Op.getOperand(0),
+                      AllOnes, KnownZero2, KnownOne2, Depth+1);
+    unsigned LeadZ = KnownZero2.countLeadingOnes();
+
+    KnownOne2.clearAllBits();
+    KnownZero2.clearAllBits();
+    ComputeMaskedBits(Op.getOperand(1),
+                      AllOnes, KnownZero2, KnownOne2, Depth+1);
+    unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
+    if (RHSUnknownLeadingOnes != BitWidth)
+      LeadZ = std::min(BitWidth,
+                       LeadZ + BitWidth - RHSUnknownLeadingOnes - 1);
+
+    KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask;
+    return;
+  }
+  case ISD::SELECT:
+    ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    return;
+  case ISD::SELECT_CC:
+    ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1);
+    ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    return;
+  case ISD::SADDO:
+  case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:
+    if (Op.getResNo() != 1)
+      return;
+    // The boolean result conforms to getBooleanContents.  Fall through.
+  case ISD::SETCC:
+    // If we know the result of a setcc has the top bits zero, use this info.
+    if (TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent &&
+        BitWidth > 1)
+      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+    return;
+  case ISD::SHL:
+    // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned ShAmt = SA->getZExtValue();
+
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        return;
+
+      ComputeMaskedBits(Op.getOperand(0), Mask.lshr(ShAmt),
+                        KnownZero, KnownOne, Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      KnownZero <<= ShAmt;
+      KnownOne  <<= ShAmt;
+      // low bits known zero.
+      KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt);
+    }
+    return;
+  case ISD::SRL:
+    // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned ShAmt = SA->getZExtValue();
+
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        return;
+
+      ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt),
+                        KnownZero, KnownOne, Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      KnownZero = KnownZero.lshr(ShAmt);
+      KnownOne  = KnownOne.lshr(ShAmt);
+
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+      KnownZero |= HighBits;  // High bits known zero.
+    }
+    return;
+  case ISD::SRA:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned ShAmt = SA->getZExtValue();
+
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        return;
+
+      APInt InDemandedMask = (Mask << ShAmt);
+      // If any of the demanded bits are produced by the sign extension, we also
+      // demand the input sign bit.
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt) & Mask;
+      if (HighBits.getBoolValue())
+        InDemandedMask |= APInt::getSignBit(BitWidth);
+
+      ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne,
+                        Depth+1);
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      KnownZero = KnownZero.lshr(ShAmt);
+      KnownOne  = KnownOne.lshr(ShAmt);
+
+      // Handle the sign bits.
+      APInt SignBit = APInt::getSignBit(BitWidth);
+      SignBit = SignBit.lshr(ShAmt);  // Adjust to where it is now in the mask.
+
+      if (KnownZero.intersects(SignBit)) {
+        KnownZero |= HighBits;  // New bits are known zero.
+      } else if (KnownOne.intersects(SignBit)) {
+        KnownOne  |= HighBits;  // New bits are known one.
+      }
+    }
+    return;
+  case ISD::SIGN_EXTEND_INREG: {
+    EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    unsigned EBits = EVT.getScalarType().getSizeInBits();
+
+    // Sign extension.  Compute the demanded bits in the result that are not
+    // present in the input.
+    APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits) & Mask;
+
+    APInt InSignBit = APInt::getSignBit(EBits);
+    APInt InputDemandedBits = Mask & APInt::getLowBitsSet(BitWidth, EBits);
+
+    // If the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    InSignBit = InSignBit.zext(BitWidth);
+    if (NewBits.getBoolValue())
+      InputDemandedBits |= InSignBit;
+
+    ComputeMaskedBits(Op.getOperand(0), InputDemandedBits,
+                      KnownZero, KnownOne, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+    if (KnownZero.intersects(InSignBit)) {         // Input sign bit known clear
+      KnownZero |= NewBits;
+      KnownOne  &= ~NewBits;
+    } else if (KnownOne.intersects(InSignBit)) {   // Input sign bit known set
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {                              // Input sign bit unknown
+      KnownZero &= ~NewBits;
+      KnownOne  &= ~NewBits;
+    }
+    return;
+  }
+  case ISD::CTTZ:
+  case ISD::CTLZ:
+  case ISD::CTPOP: {
+    unsigned LowBits = Log2_32(BitWidth)+1;
+    KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
+    KnownOne.clearAllBits();
+    return;
+  }
+  case ISD::LOAD: {
+    if (ISD::isZEXTLoad(Op.getNode())) {
+      LoadSDNode *LD = cast<LoadSDNode>(Op);
+      EVT VT = LD->getMemoryVT();
+      unsigned MemBits = VT.getScalarType().getSizeInBits();
+      KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits) & Mask;
+    }
+    return;
+  }
+  case ISD::ZERO_EXTEND: {
+    EVT InVT = Op.getOperand(0).getValueType();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
+    APInt InMask    = Mask.trunc(InBits);
+    KnownZero = KnownZero.trunc(InBits);
+    KnownOne = KnownOne.trunc(InBits);
+    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+    KnownZero |= NewBits;
+    return;
+  }
+  case ISD::SIGN_EXTEND: {
+    EVT InVT = Op.getOperand(0).getValueType();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    APInt InSignBit = APInt::getSignBit(InBits);
+    APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
+    APInt InMask = Mask.trunc(InBits);
+
+    // If any of the sign extended bits are demanded, we know that the sign
+    // bit is demanded. Temporarily set this bit in the mask for our callee.
+    if (NewBits.getBoolValue())
+      InMask |= InSignBit;
+
+    KnownZero = KnownZero.trunc(InBits);
+    KnownOne = KnownOne.trunc(InBits);
+    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+
+    // Note if the sign bit is known to be zero or one.
+    bool SignBitKnownZero = KnownZero.isNegative();
+    bool SignBitKnownOne  = KnownOne.isNegative();
+    assert(!(SignBitKnownZero && SignBitKnownOne) &&
+           "Sign bit can't be known to be both zero and one!");
+
+    // If the sign bit wasn't actually demanded by our caller, we don't
+    // want it set in the KnownZero and KnownOne result values. Reset the
+    // mask and reapply it to the result values.
+    InMask = Mask.trunc(InBits);
+    KnownZero &= InMask;
+    KnownOne  &= InMask;
+
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+
+    // If the sign bit is known zero or one, the top bits match.
+    if (SignBitKnownZero)
+      KnownZero |= NewBits;
+    else if (SignBitKnownOne)
+      KnownOne  |= NewBits;
+    return;
+  }
+  case ISD::ANY_EXTEND: {
+    EVT InVT = Op.getOperand(0).getValueType();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    APInt InMask = Mask.trunc(InBits);
+    KnownZero = KnownZero.trunc(InBits);
+    KnownOne = KnownOne.trunc(InBits);
+    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+    return;
+  }
+  case ISD::TRUNCATE: {
+    EVT InVT = Op.getOperand(0).getValueType();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    APInt InMask = Mask.zext(InBits);
+    KnownZero = KnownZero.zext(InBits);
+    KnownOne = KnownOne.zext(InBits);
+    ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    KnownZero = KnownZero.trunc(BitWidth);
+    KnownOne = KnownOne.trunc(BitWidth);
+    break;
+  }
+  case ISD::AssertZext: {
+    EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits());
+    ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero,
+                      KnownOne, Depth+1);
+    KnownZero |= (~InMask) & Mask;
+    return;
+  }
+  case ISD::FGETSIGN:
+    // All bits are zero except the low bit.
+    KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1);
+    return;
+
+  case ISD::SUB: {
+    if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+      // We know that the top bits of C-X are clear if X contains less bits
+      // than C (i.e. no wrap-around can happen).  For example, 20-X is
+      // positive if we can prove that X is >= 0 and < 16.
+      if (CLHS->getAPIntValue().isNonNegative()) {
+        unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
+        // NLZ can't be BitWidth with no sign bit
+        APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
+        ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2,
+                          Depth+1);
+
+        // If all of the MaskV bits are known to be zero, then we know the
+        // output top bits are zero, because we now know that the output is
+        // from [0-C].
+        if ((KnownZero2 & MaskV) == MaskV) {
+          unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
+          // Top bits known zero.
+          KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask;
+        }
+      }
+    }
+  }
+  // fall through
+  case ISD::ADD:
+  case ISD::ADDE: {
+    // Output known-0 bits are known if clear or set in both the low clear bits
+    // common to both LHS & RHS.  For example, 8+(X<<3) is known to have the
+    // low 3 bits clear.
+    APInt Mask2 = APInt::getLowBitsSet(BitWidth,
+                                       BitWidth - Mask.countLeadingZeros());
+    ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+    unsigned KnownZeroOut = KnownZero2.countTrailingOnes();
+
+    ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1);
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+    KnownZeroOut = std::min(KnownZeroOut,
+                            KnownZero2.countTrailingOnes());
+
+    if (Op.getOpcode() == ISD::ADD) {
+      KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+      return;
+    }
+
+    // With ADDE, a carry bit may be added in, so we can only use this
+    // information if we know (at least) that the low two bits are clear.  We
+    // then return to the caller that the low bit is unknown but that other bits
+    // are known zero.
+    if (KnownZeroOut >= 2) // ADDE
+      KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut);
+    return;
+  }
+  case ISD::SREM:
+    if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      const APInt &RA = Rem->getAPIntValue().abs();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = RA - 1;
+        APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+        ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1);
+
+        // The low bits of the first operand are unchanged by the srem.
+        KnownZero = KnownZero2 & LowBits;
+        KnownOne = KnownOne2 & LowBits;
+
+        // If the first operand is non-negative or has all low bits zero, then
+        // the upper bits are all zero.
+        if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits))
+          KnownZero |= ~LowBits;
+
+        // If the first operand is negative and not all low bits are zero, then
+        // the upper bits are all one.
+        if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0))
+          KnownOne |= ~LowBits;
+
+        KnownZero &= Mask;
+        KnownOne &= Mask;
+
+        assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+      }
+    }
+    return;
+  case ISD::UREM: {
+    if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      const APInt &RA = Rem->getAPIntValue();
+      if (RA.isPowerOf2()) {
+        APInt LowBits = (RA - 1);
+        APInt Mask2 = LowBits & Mask;
+        KnownZero |= ~LowBits & Mask;
+        ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1);
+        assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?");
+        break;
+      }
+    }
+
+    // Since the result is less than or equal to either operand, any leading
+    // zero bits in either operand must also exist in the result.
+    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+    ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne,
+                      Depth+1);
+    ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2,
+                      Depth+1);
+
+    uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
+                                KnownZero2.countLeadingOnes());
+    KnownOne.clearAllBits();
+    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
+    return;
+  }
+  case ISD::FrameIndex:
+  case ISD::TargetFrameIndex:
+    if (unsigned Align = InferPtrAlignment(Op)) {
+      // The low bits are known zero if the pointer is aligned.
+      KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
+      return;
+    }
+    break;
+
+  default:
+    // Allow the target to implement this method for its nodes.
+    if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_VOID:
+      TLI.computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne, *this,
+                                         Depth);
+    }
+    return;
+  }
+}
+
+/// ComputeNumSignBits - Return the number of times the sign bit of the
+/// register is replicated into the other bits.  We know that at least 1 bit
+/// is always equal to the sign bit (itself), but other cases can give us
+/// information.  For example, immediately after an "SRA X, 2", we know that
+/// the top 3 bits are all equal to each other, so we return 3.
+unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
+  EVT VT = Op.getValueType();
+  assert(VT.isInteger() && "Invalid VT!");
+  unsigned VTBits = VT.getScalarType().getSizeInBits();
+  unsigned Tmp, Tmp2;
+  unsigned FirstAnswer = 1;
+
+  if (Depth == 6)
+    return 1;  // Limit search depth.
+
+  switch (Op.getOpcode()) {
+  default: break;
+  case ISD::AssertSext:
+    Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+    return VTBits-Tmp+1;
+  case ISD::AssertZext:
+    Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
+    return VTBits-Tmp;
+
+  case ISD::Constant: {
+    const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
+    return Val.getNumSignBits();
+  }
+
+  case ISD::SIGN_EXTEND:
+    Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
+
+  case ISD::SIGN_EXTEND_INREG:
+    // Max of the input and what this extends.
+    Tmp =
+      cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits();
+    Tmp = VTBits-Tmp+1;
+
+    Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    return std::max(Tmp, Tmp2);
+
+  case ISD::SRA:
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    // SRA X, C   -> adds C sign bits.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      Tmp += C->getZExtValue();
+      if (Tmp > VTBits) Tmp = VTBits;
+    }
+    return Tmp;
+  case ISD::SHL:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      // shl destroys sign bits.
+      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+      if (C->getZExtValue() >= VTBits ||      // Bad shift.
+          C->getZExtValue() >= Tmp) break;    // Shifted all sign bits out.
+      return Tmp - C->getZExtValue();
+    }
+    break;
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:    // NOT is handled here.
+    // Logical binary ops preserve the number of sign bits at the worst.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp != 1) {
+      Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+      FirstAnswer = std::min(Tmp, Tmp2);
+      // We computed what we know about the sign bits as our first
+      // answer. Now proceed to the generic code that uses
+      // ComputeMaskedBits, and pick whichever answer is better.
+    }
+    break;
+
+  case ISD::SELECT:
+    Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+    Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+    return std::min(Tmp, Tmp2);
+
+  case ISD::SADDO:
+  case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:
+    if (Op.getResNo() != 1)
+      break;
+    // The boolean result conforms to getBooleanContents.  Fall through.
+  case ISD::SETCC:
+    // If setcc returns 0/-1, all bits are sign bits.
+    if (TLI.getBooleanContents() ==
+        TargetLowering::ZeroOrNegativeOneBooleanContent)
+      return VTBits;
+    break;
+  case ISD::ROTL:
+  case ISD::ROTR:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned RotAmt = C->getZExtValue() & (VTBits-1);
+
+      // Handle rotate right by N like a rotate left by 32-N.
+      if (Op.getOpcode() == ISD::ROTR)
+        RotAmt = (VTBits-RotAmt) & (VTBits-1);
+
+      // If we aren't rotating out all of the known-in sign bits, return the
+      // number that are left.  This handles rotl(sext(x), 1) for example.
+      Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+      if (Tmp > RotAmt+1) return Tmp-RotAmt;
+    }
+    break;
+  case ISD::ADD:
+    // Add can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+
+    // Special case decrementing a value (ADD X, -1):
+    if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
+      if (CRHS->isAllOnesValue()) {
+        APInt KnownZero, KnownOne;
+        APInt Mask = APInt::getAllOnesValue(VTBits);
+        ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero | APInt(VTBits, 1)) == Mask)
+          return VTBits;
+
+        // If we are subtracting one from a positive number, there is no carry
+        // out of the result.
+        if (KnownZero.isNegative())
+          return Tmp;
+      }
+
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    if (Tmp2 == 1) return 1;
+      return std::min(Tmp, Tmp2)-1;
+    break;
+
+  case ISD::SUB:
+    Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+    if (Tmp2 == 1) return 1;
+
+    // Handle NEG.
+    if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+      if (CLHS->isNullValue()) {
+        APInt KnownZero, KnownOne;
+        APInt Mask = APInt::getAllOnesValue(VTBits);
+        ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1);
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if ((KnownZero | APInt(VTBits, 1)) == Mask)
+          return VTBits;
+
+        // If the input is known to be positive (the sign bit is known clear),
+        // the output of the NEG has the same number of sign bits as the input.
+        if (KnownZero.isNegative())
+          return Tmp2;
+
+        // Otherwise, we treat this like a SUB.
+      }
+
+    // Sub can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    if (Tmp == 1) return 1;  // Early out.
+      return std::min(Tmp, Tmp2)-1;
+    break;
+  case ISD::TRUNCATE:
+    // FIXME: it's tricky to do anything useful for this, but it is an important
+    // case for targets like X86.
+    break;
+  }
+
+  // Handle LOADX separately here. EXTLOAD case will fallthrough.
+  if (Op.getOpcode() == ISD::LOAD) {
+    LoadSDNode *LD = cast<LoadSDNode>(Op);
+    unsigned ExtType = LD->getExtensionType();
+    switch (ExtType) {
+    default: break;
+    case ISD::SEXTLOAD:    // '17' bits known
+      Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+      return VTBits-Tmp+1;
+    case ISD::ZEXTLOAD:    // '16' bits known
+      Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+      return VTBits-Tmp;
+    }
+  }
+
+  // Allow the target to implement this method for its nodes.
+  if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+      Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+      Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+      Op.getOpcode() == ISD::INTRINSIC_VOID) {
+    unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth);
+    if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits);
+  }
+
+  // Finally, if we can prove that the top bits of the result are 0's or 1's,
+  // use this information.
+  APInt KnownZero, KnownOne;
+  APInt Mask = APInt::getAllOnesValue(VTBits);
+  ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth);
+
+  if (KnownZero.isNegative()) {        // sign bit is 0
+    Mask = KnownZero;
+  } else if (KnownOne.isNegative()) {  // sign bit is 1;
+    Mask = KnownOne;
+  } else {
+    // Nothing known.
+    return FirstAnswer;
+  }
+
+  // Okay, we know that the sign bit in Mask is set.  Use CLZ to determine
+  // the number of identical bits in the top of the input value.
+  Mask = ~Mask;
+  Mask <<= Mask.getBitWidth()-VTBits;
+  // Return # leading zeros.  We use 'min' here in case Val was zero before
+  // shifting.  We don't want to return '64' as for an i32 "0".
+  return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
+}
+
+/// isBaseWithConstantOffset - Return true if the specified operand is an
+/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
+/// ISD::OR with a ConstantSDNode that is guaranteed to have the same
+/// semantics as an ADD.  This handles the equivalence:
+///     X|Cst == X+Cst iff X&Cst = 0.
+bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
+  if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
+      !isa<ConstantSDNode>(Op.getOperand(1)))
+    return false;
+
+  if (Op.getOpcode() == ISD::OR &&
+      !MaskedValueIsZero(Op.getOperand(0),
+                     cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
+    return false;
+
+  return true;
+}
+
+
+bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
+  // If we're told that NaNs won't happen, assume they won't.
+  if (NoNaNsFPMath)
+    return true;
+
+  // If the value is a constant, we can obviously see if it is a NaN or not.
+  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+    return !C->getValueAPF().isNaN();
+
+  // TODO: Recognize more cases here.
+
+  return false;
+}
+
+bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+  // If the value is a constant, we can obviously see if it is a zero or not.
+  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+    return !C->isZero();
+
+  // TODO: Recognize more cases here.
+
+  return false;
+}
+
+bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
+  // Check the obvious case.
+  if (A == B) return true;
+
+  // For for negative and positive zero.
+  if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
+    if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
+      if (CA->isZero() && CB->isZero()) return true;
+
+  // Otherwise they may not be equal.
+  return false;
+}
+
+bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
+  GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+  if (!GA) return false;
+  if (GA->getOffset() != 0) return false;
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal());
+  if (!GV) return false;
+  return MF->getMMI().hasDebugInfo();
+}
+
+
+/// getNode - Gets or creates the specified node.
+///
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) SDNode(Opcode, DL, getVTList(VT));
+  CSEMap.InsertNode(N, IP);
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifySDNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+                              EVT VT, SDValue Operand) {
+  // Constant fold unary operations with an integer constant operand.
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand.getNode())) {
+    const APInt &Val = C->getAPIntValue();
+    switch (Opcode) {
+    default: break;
+    case ISD::SIGN_EXTEND:
+      return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT);
+    case ISD::ANY_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::TRUNCATE:
+      return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
+    case ISD::UINT_TO_FP:
+    case ISD::SINT_TO_FP: {
+      // No compile time operations on ppcf128.
+      if (VT == MVT::ppcf128) break;
+      APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
+      (void)apf.convertFromAPInt(Val,
+                                 Opcode==ISD::SINT_TO_FP,
+                                 APFloat::rmNearestTiesToEven);
+      return getConstantFP(apf, VT);
+    }
+    case ISD::BITCAST:
+      if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
+        return getConstantFP(Val.bitsToFloat(), VT);
+      else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
+        return getConstantFP(Val.bitsToDouble(), VT);
+      break;
+    case ISD::BSWAP:
+      return getConstant(Val.byteSwap(), VT);
+    case ISD::CTPOP:
+      return getConstant(Val.countPopulation(), VT);
+    case ISD::CTLZ:
+      return getConstant(Val.countLeadingZeros(), VT);
+    case ISD::CTTZ:
+      return getConstant(Val.countTrailingZeros(), VT);
+    }
+  }
+
+  // Constant fold unary operations with a floating point constant operand.
+  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand.getNode())) {
+    APFloat V = C->getValueAPF();    // make copy
+    if (VT != MVT::ppcf128 && Operand.getValueType() != MVT::ppcf128) {
+      switch (Opcode) {
+      case ISD::FNEG:
+        V.changeSign();
+        return getConstantFP(V, VT);
+      case ISD::FABS:
+        V.clearSign();
+        return getConstantFP(V, VT);
+      case ISD::FP_ROUND:
+      case ISD::FP_EXTEND: {
+        bool ignored;
+        // This can return overflow, underflow, or inexact; we don't care.
+        // FIXME need to be more flexible about rounding mode.
+        (void)V.convert(*EVTToAPFloatSemantics(VT),
+                        APFloat::rmNearestTiesToEven, &ignored);
+        return getConstantFP(V, VT);
+      }
+      case ISD::FP_TO_SINT:
+      case ISD::FP_TO_UINT: {
+        integerPart x[2];
+        bool ignored;
+        assert(integerPartWidth >= 64);
+        // FIXME need to be more flexible about rounding mode.
+        APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(),
+                              Opcode==ISD::FP_TO_SINT,
+                              APFloat::rmTowardZero, &ignored);
+        if (s==APFloat::opInvalidOp)     // inexact is OK, in fact usual
+          break;
+        APInt api(VT.getSizeInBits(), 2, x);
+        return getConstant(api, VT);
+      }
+      case ISD::BITCAST:
+        if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
+          return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
+        else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
+          return getConstant(V.bitcastToAPInt().getZExtValue(), VT);
+        break;
+      }
+    }
+  }
+
+  unsigned OpOpcode = Operand.getNode()->getOpcode();
+  switch (Opcode) {
+  case ISD::TokenFactor:
+  case ISD::MERGE_VALUES:
+  case ISD::CONCAT_VECTORS:
+    return Operand;         // Factor, merge or concat of one node?  No need.
+  case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
+  case ISD::FP_EXTEND:
+    assert(VT.isFloatingPoint() &&
+           Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
+    if (Operand.getValueType() == VT) return Operand;  // noop conversion.
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+    if (Operand.getOpcode() == ISD::UNDEF)
+      return getUNDEF(VT);
+    break;
+  case ISD::SIGN_EXTEND:
+    assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+           "Invalid SIGN_EXTEND!");
+    if (Operand.getValueType() == VT) return Operand;   // noop extension
+    assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+           "Invalid sext node, dst < src!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+    if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
+      return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+    break;
+  case ISD::ZERO_EXTEND:
+    assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+           "Invalid ZERO_EXTEND!");
+    if (Operand.getValueType() == VT) return Operand;   // noop extension
+    assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+           "Invalid zext node, dst < src!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+    if (OpOpcode == ISD::ZERO_EXTEND)   // (zext (zext x)) -> (zext x)
+      return getNode(ISD::ZERO_EXTEND, DL, VT,
+                     Operand.getNode()->getOperand(0));
+    break;
+  case ISD::ANY_EXTEND:
+    assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+           "Invalid ANY_EXTEND!");
+    if (Operand.getValueType() == VT) return Operand;   // noop extension
+    assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+           "Invalid anyext node, dst < src!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+
+    if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+        OpOpcode == ISD::ANY_EXTEND)
+      // (ext (zext x)) -> (zext x)  and  (ext (sext x)) -> (sext x)
+      return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+
+    // (ext (trunx x)) -> x
+    if (OpOpcode == ISD::TRUNCATE) {
+      SDValue OpOp = Operand.getNode()->getOperand(0);
+      if (OpOp.getValueType() == VT)
+        return OpOp;
+    }
+    break;
+  case ISD::TRUNCATE:
+    assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+           "Invalid TRUNCATE!");
+    if (Operand.getValueType() == VT) return Operand;   // noop truncate
+    assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) &&
+           "Invalid truncate node, src < dst!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
+    if (OpOpcode == ISD::TRUNCATE)
+      return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+    else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+             OpOpcode == ISD::ANY_EXTEND) {
+      // If the source is smaller than the dest, we still need an extend.
+      if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
+            .bitsLT(VT.getScalarType()))
+        return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+      else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
+        return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
+      else
+        return Operand.getNode()->getOperand(0);
+    }
+    break;
+  case ISD::BITCAST:
+    // Basic sanity checking.
+    assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
+           && "Cannot BITCAST between types of different sizes!");
+    if (VT == Operand.getValueType()) return Operand;  // noop conversion.
+    if (OpOpcode == ISD::BITCAST)  // bitconv(bitconv(x)) -> bitconv(x)
+      return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
+    if (OpOpcode == ISD::UNDEF)
+      return getUNDEF(VT);
+    break;
+  case ISD::SCALAR_TO_VECTOR:
+    assert(VT.isVector() && !Operand.getValueType().isVector() &&
+           (VT.getVectorElementType() == Operand.getValueType() ||
+            (VT.getVectorElementType().isInteger() &&
+             Operand.getValueType().isInteger() &&
+             VT.getVectorElementType().bitsLE(Operand.getValueType()))) &&
+           "Illegal SCALAR_TO_VECTOR node!");
+    if (OpOpcode == ISD::UNDEF)
+      return getUNDEF(VT);
+    // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.
+    if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&
+        isa<ConstantSDNode>(Operand.getOperand(1)) &&
+        Operand.getConstantOperandVal(1) == 0 &&
+        Operand.getOperand(0).getValueType() == VT)
+      return Operand.getOperand(0);
+    break;
+  case ISD::FNEG:
+    // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
+    if (UnsafeFPMath && OpOpcode == ISD::FSUB)
+      return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1),
+                     Operand.getNode()->getOperand(0));
+    if (OpOpcode == ISD::FNEG)  // --X -> X
+      return Operand.getNode()->getOperand(0);
+    break;
+  case ISD::FABS:
+    if (OpOpcode == ISD::FNEG)  // abs(-X) -> abs(X)
+      return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0));
+    break;
+  }
+
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Glue) { // Don't CSE flag producing nodes
+    FoldingSetNodeID ID;
+    SDValue Ops[1] = { Operand };
+    AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDValue(E, 0);
+
+    N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand);
+  }
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifySDNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
+                                             EVT VT,
+                                             ConstantSDNode *Cst1,
+                                             ConstantSDNode *Cst2) {
+  const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue();
+
+  switch (Opcode) {
+  case ISD::ADD:  return getConstant(C1 + C2, VT);
+  case ISD::SUB:  return getConstant(C1 - C2, VT);
+  case ISD::MUL:  return getConstant(C1 * C2, VT);
+  case ISD::UDIV:
+    if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT);
+    break;
+  case ISD::UREM:
+    if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT);
+    break;
+  case ISD::SDIV:
+    if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT);
+    break;
+  case ISD::SREM:
+    if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT);
+    break;
+  case ISD::AND:  return getConstant(C1 & C2, VT);
+  case ISD::OR:   return getConstant(C1 | C2, VT);
+  case ISD::XOR:  return getConstant(C1 ^ C2, VT);
+  case ISD::SHL:  return getConstant(C1 << C2, VT);
+  case ISD::SRL:  return getConstant(C1.lshr(C2), VT);
+  case ISD::SRA:  return getConstant(C1.ashr(C2), VT);
+  case ISD::ROTL: return getConstant(C1.rotl(C2), VT);
+  case ISD::ROTR: return getConstant(C1.rotr(C2), VT);
+  default: break;
+  }
+
+  return SDValue();
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              SDValue N1, SDValue N2) {
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+  switch (Opcode) {
+  default: break;
+  case ISD::TokenFactor:
+    assert(VT == MVT::Other && N1.getValueType() == MVT::Other &&
+           N2.getValueType() == MVT::Other && "Invalid token factor!");
+    // Fold trivial token factors.
+    if (N1.getOpcode() == ISD::EntryToken) return N2;
+    if (N2.getOpcode() == ISD::EntryToken) return N1;
+    if (N1 == N2) return N1;
+    break;
+  case ISD::CONCAT_VECTORS:
+    // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+    // one big BUILD_VECTOR.
+    if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+        N2.getOpcode() == ISD::BUILD_VECTOR) {
+      SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
+                                    N1.getNode()->op_end());
+      Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
+      return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+    }
+    break;
+  case ISD::AND:
+    assert(VT.isInteger() && "This operator does not apply to FP types!");
+    assert(N1.getValueType() == N2.getValueType() &&
+           N1.getValueType() == VT && "Binary operator types must match!");
+    // (X & 0) -> 0.  This commonly occurs when legalizing i64 values, so it's
+    // worth handling here.
+    if (N2C && N2C->isNullValue())
+      return N2;
+    if (N2C && N2C->isAllOnesValue())  // X & -1 -> X
+      return N1;
+    break;
+  case ISD::OR:
+  case ISD::XOR:
+  case ISD::ADD:
+  case ISD::SUB:
+    assert(VT.isInteger() && "This operator does not apply to FP types!");
+    assert(N1.getValueType() == N2.getValueType() &&
+           N1.getValueType() == VT && "Binary operator types must match!");
+    // (X ^|+- 0) -> X.  This commonly occurs when legalizing i64 values, so
+    // it's worth handling here.
+    if (N2C && N2C->isNullValue())
+      return N1;
+    break;
+  case ISD::UDIV:
+  case ISD::UREM:
+  case ISD::MULHU:
+  case ISD::MULHS:
+  case ISD::MUL:
+  case ISD::SDIV:
+  case ISD::SREM:
+    assert(VT.isInteger() && "This operator does not apply to FP types!");
+    assert(N1.getValueType() == N2.getValueType() &&
+           N1.getValueType() == VT && "Binary operator types must match!");
+    break;
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FDIV:
+  case ISD::FREM:
+    if (UnsafeFPMath) {
+      if (Opcode == ISD::FADD) {
+        // 0+x --> x
+        if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1))
+          if (CFP->getValueAPF().isZero())
+            return N2;
+        // x+0 --> x
+        if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+          if (CFP->getValueAPF().isZero())
+            return N1;
+      } else if (Opcode == ISD::FSUB) {
+        // x-0 --> x
+        if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N2))
+          if (CFP->getValueAPF().isZero())
+            return N1;
+      }
+    }
+    assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
+    assert(N1.getValueType() == N2.getValueType() &&
+           N1.getValueType() == VT && "Binary operator types must match!");
+    break;
+  case ISD::FCOPYSIGN:   // N1 and result must match.  N1/N2 need not match.
+    assert(N1.getValueType() == VT &&
+           N1.getValueType().isFloatingPoint() &&
+           N2.getValueType().isFloatingPoint() &&
+           "Invalid FCOPYSIGN!");
+    break;
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::ROTL:
+  case ISD::ROTR:
+    assert(VT == N1.getValueType() &&
+           "Shift operators return type must be the same as their first arg");
+    assert(VT.isInteger() && N2.getValueType().isInteger() &&
+           "Shifts only work on integers");
+    // Verify that the shift amount VT is bit enough to hold valid shift
+    // amounts.  This catches things like trying to shift an i1024 value by an
+    // i8, which is easy to fall into in generic code that uses
+    // TLI.getShiftAmount().
+    assert(N2.getValueType().getSizeInBits() >=
+                   Log2_32_Ceil(N1.getValueType().getSizeInBits()) &&
+           "Invalid use of small shift amount with oversized value!");
+
+    // Always fold shifts of i1 values so the code generator doesn't need to
+    // handle them.  Since we know the size of the shift has to be less than the
+    // size of the value, the shift/rotate count is guaranteed to be zero.
+    if (VT == MVT::i1)
+      return N1;
+    if (N2C && N2C->isNullValue())
+      return N1;
+    break;
+  case ISD::FP_ROUND_INREG: {
+    EVT EVT = cast<VTSDNode>(N2)->getVT();
+    assert(VT == N1.getValueType() && "Not an inreg round!");
+    assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
+           "Cannot FP_ROUND_INREG integer types");
+    assert(EVT.isVector() == VT.isVector() &&
+           "FP_ROUND_INREG type should be vector iff the operand "
+           "type is vector!");
+    assert((!EVT.isVector() ||
+            EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+           "Vector element counts must match in FP_ROUND_INREG");
+    assert(EVT.bitsLE(VT) && "Not rounding down!");
+    if (cast<VTSDNode>(N2)->getVT() == VT) return N1;  // Not actually rounding.
+    break;
+  }
+  case ISD::FP_ROUND:
+    assert(VT.isFloatingPoint() &&
+           N1.getValueType().isFloatingPoint() &&
+           VT.bitsLE(N1.getValueType()) &&
+           isa<ConstantSDNode>(N2) && "Invalid FP_ROUND!");
+    if (N1.getValueType() == VT) return N1;  // noop conversion.
+    break;
+  case ISD::AssertSext:
+  case ISD::AssertZext: {
+    EVT EVT = cast<VTSDNode>(N2)->getVT();
+    assert(VT == N1.getValueType() && "Not an inreg extend!");
+    assert(VT.isInteger() && EVT.isInteger() &&
+           "Cannot *_EXTEND_INREG FP types");
+    assert(!EVT.isVector() &&
+           "AssertSExt/AssertZExt type should be the vector element type "
+           "rather than the vector type!");
+    assert(EVT.bitsLE(VT) && "Not extending!");
+    if (VT == EVT) return N1; // noop assertion.
+    break;
+  }
+  case ISD::SIGN_EXTEND_INREG: {
+    EVT EVT = cast<VTSDNode>(N2)->getVT();
+    assert(VT == N1.getValueType() && "Not an inreg extend!");
+    assert(VT.isInteger() && EVT.isInteger() &&
+           "Cannot *_EXTEND_INREG FP types");
+    assert(EVT.isVector() == VT.isVector() &&
+           "SIGN_EXTEND_INREG type should be vector iff the operand "
+           "type is vector!");
+    assert((!EVT.isVector() ||
+            EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
+           "Vector element counts must match in SIGN_EXTEND_INREG");
+    assert(EVT.bitsLE(VT) && "Not extending!");
+    if (EVT == VT) return N1;  // Not actually extending
+
+    if (N1C) {
+      APInt Val = N1C->getAPIntValue();
+      unsigned FromBits = EVT.getScalarType().getSizeInBits();
+      Val <<= Val.getBitWidth()-FromBits;
+      Val = Val.ashr(Val.getBitWidth()-FromBits);
+      return getConstant(Val, VT);
+    }
+    break;
+  }
+  case ISD::EXTRACT_VECTOR_ELT:
+    // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF.
+    if (N1.getOpcode() == ISD::UNDEF)
+      return getUNDEF(VT);
+
+    // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is
+    // expanding copies of large vectors from registers.
+    if (N2C &&
+        N1.getOpcode() == ISD::CONCAT_VECTORS &&
+        N1.getNumOperands() > 0) {
+      unsigned Factor =
+        N1.getOperand(0).getValueType().getVectorNumElements();
+      return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
+                     N1.getOperand(N2C->getZExtValue() / Factor),
+                     getConstant(N2C->getZExtValue() % Factor,
+                                 N2.getValueType()));
+    }
+
+    // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is
+    // expanding large vector constants.
+    if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) {
+      SDValue Elt = N1.getOperand(N2C->getZExtValue());
+      EVT VEltTy = N1.getValueType().getVectorElementType();
+      if (Elt.getValueType() != VEltTy) {
+        // If the vector element type is not legal, the BUILD_VECTOR operands
+        // are promoted and implicitly truncated.  Make that explicit here.
+        Elt = getNode(ISD::TRUNCATE, DL, VEltTy, Elt);
+      }
+      if (VT != VEltTy) {
+        // If the vector element type is not legal, the EXTRACT_VECTOR_ELT
+        // result is implicitly extended.
+        Elt = getNode(ISD::ANY_EXTEND, DL, VT, Elt);
+      }
+      return Elt;
+    }
+
+    // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector
+    // operations are lowered to scalars.
+    if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+      // If the indices are the same, return the inserted element else
+      // if the indices are known different, extract the element from
+      // the original vector.
+      SDValue N1Op2 = N1.getOperand(2);
+      ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2.getNode());
+
+      if (N1Op2C && N2C) {
+        if (N1Op2C->getZExtValue() == N2C->getZExtValue()) {
+          if (VT == N1.getOperand(1).getValueType())
+            return N1.getOperand(1);
+          else
+            return getSExtOrTrunc(N1.getOperand(1), DL, VT);
+        }
+
+        return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
+      }
+    }
+    break;
+  case ISD::EXTRACT_ELEMENT:
+    assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
+    assert(!N1.getValueType().isVector() && !VT.isVector() &&
+           (N1.getValueType().isInteger() == VT.isInteger()) &&
+           "Wrong types for EXTRACT_ELEMENT!");
+
+    // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding
+    // 64-bit integers into 32-bit parts.  Instead of building the extract of
+    // the BUILD_PAIR, only to have legalize rip it apart, just do it now.
+    if (N1.getOpcode() == ISD::BUILD_PAIR)
+      return N1.getOperand(N2C->getZExtValue());
+
+    // EXTRACT_ELEMENT of a constant int is also very common.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
+      unsigned ElementSize = VT.getSizeInBits();
+      unsigned Shift = ElementSize * N2C->getZExtValue();
+      APInt ShiftedVal = C->getAPIntValue().lshr(Shift);
+      return getConstant(ShiftedVal.trunc(ElementSize), VT);
+    }
+    break;
+  case ISD::EXTRACT_SUBVECTOR: {
+    SDValue Index = N2;
+    if (VT.isSimple() && N1.getValueType().isSimple()) {
+      assert(VT.isVector() && N1.getValueType().isVector() &&
+             "Extract subvector VTs must be a vectors!");
+      assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() &&
+             "Extract subvector VTs must have the same element type!");
+      assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+             "Extract subvector must be from larger vector to smaller vector!");
+
+      if (isa<ConstantSDNode>(Index.getNode())) {
+        assert((VT.getVectorNumElements() +
+                cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+                <= N1.getValueType().getVectorNumElements())
+               && "Extract subvector overflow!");
+      }
+
+      // Trivial extraction.
+      if (VT.getSimpleVT() == N1.getValueType().getSimpleVT())
+        return N1;
+    }
+    break;
+  }
+  }
+
+  if (N1C) {
+    if (N2C) {
+      SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C);
+      if (SV.getNode()) return SV;
+    } else {      // Cannonicalize constant to RHS if commutative
+      if (isCommutativeBinOp(Opcode)) {
+        std::swap(N1C, N2C);
+        std::swap(N1, N2);
+      }
+    }
+  }
+
+  // Constant fold FP operations.
+  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
+  ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+  if (N1CFP) {
+    if (!N2CFP && isCommutativeBinOp(Opcode)) {
+      // Cannonicalize constant to RHS if commutative
+      std::swap(N1CFP, N2CFP);
+      std::swap(N1, N2);
+    } else if (N2CFP && VT != MVT::ppcf128) {
+      APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
+      APFloat::opStatus s;
+      switch (Opcode) {
+      case ISD::FADD:
+        s = V1.add(V2, APFloat::rmNearestTiesToEven);
+        if (s != APFloat::opInvalidOp)
+          return getConstantFP(V1, VT);
+        break;
+      case ISD::FSUB:
+        s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
+        if (s!=APFloat::opInvalidOp)
+          return getConstantFP(V1, VT);
+        break;
+      case ISD::FMUL:
+        s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
+        if (s!=APFloat::opInvalidOp)
+          return getConstantFP(V1, VT);
+        break;
+      case ISD::FDIV:
+        s = V1.divide(V2, APFloat::rmNearestTiesToEven);
+        if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+          return getConstantFP(V1, VT);
+        break;
+      case ISD::FREM :
+        s = V1.mod(V2, APFloat::rmNearestTiesToEven);
+        if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero)
+          return getConstantFP(V1, VT);
+        break;
+      case ISD::FCOPYSIGN:
+        V1.copySign(V2);
+        return getConstantFP(V1, VT);
+      default: break;
+      }
+    }
+  }
+
+  // Canonicalize an UNDEF to the RHS, even over a constant.
+  if (N1.getOpcode() == ISD::UNDEF) {
+    if (isCommutativeBinOp(Opcode)) {
+      std::swap(N1, N2);
+    } else {
+      switch (Opcode) {
+      case ISD::FP_ROUND_INREG:
+      case ISD::SIGN_EXTEND_INREG:
+      case ISD::SUB:
+      case ISD::FSUB:
+      case ISD::FDIV:
+      case ISD::FREM:
+      case ISD::SRA:
+        return N1;     // fold op(undef, arg2) -> undef
+      case ISD::UDIV:
+      case ISD::SDIV:
+      case ISD::UREM:
+      case ISD::SREM:
+      case ISD::SRL:
+      case ISD::SHL:
+        if (!VT.isVector())
+          return getConstant(0, VT);    // fold op(undef, arg2) -> 0
+        // For vectors, we can't easily build an all zero vector, just return
+        // the LHS.
+        return N2;
+      }
+    }
+  }
+
+  // Fold a bunch of operators when the RHS is undef.
+  if (N2.getOpcode() == ISD::UNDEF) {
+    switch (Opcode) {
+    case ISD::XOR:
+      if (N1.getOpcode() == ISD::UNDEF)
+        // Handle undef ^ undef -> 0 special case. This is a common
+        // idiom (misuse).
+        return getConstant(0, VT);
+      // fallthrough
+    case ISD::ADD:
+    case ISD::ADDC:
+    case ISD::ADDE:
+    case ISD::SUB:
+    case ISD::UDIV:
+    case ISD::SDIV:
+    case ISD::UREM:
+    case ISD::SREM:
+      return N2;       // fold op(arg1, undef) -> undef
+    case ISD::FADD:
+    case ISD::FSUB:
+    case ISD::FMUL:
+    case ISD::FDIV:
+    case ISD::FREM:
+      if (UnsafeFPMath)
+        return N2;
+      break;
+    case ISD::MUL:
+    case ISD::AND:
+    case ISD::SRL:
+    case ISD::SHL:
+      if (!VT.isVector())
+        return getConstant(0, VT);  // fold op(arg1, undef) -> 0
+      // For vectors, we can't easily build an all zero vector, just return
+      // the LHS.
+      return N1;
+    case ISD::OR:
+      if (!VT.isVector())
+        return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+      // For vectors, we can't easily build an all one vector, just return
+      // the LHS.
+      return N1;
+    case ISD::SRA:
+      return N1;
+    }
+  }
+
+  // Memoize this node if possible.
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Glue) {
+    SDValue Ops[] = { N1, N2 };
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDValue(E, 0);
+
+    N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2);
+  }
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifySDNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              SDValue N1, SDValue N2, SDValue N3) {
+  // Perform various simplifications.
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  switch (Opcode) {
+  case ISD::CONCAT_VECTORS:
+    // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
+    // one big BUILD_VECTOR.
+    if (N1.getOpcode() == ISD::BUILD_VECTOR &&
+        N2.getOpcode() == ISD::BUILD_VECTOR &&
+        N3.getOpcode() == ISD::BUILD_VECTOR) {
+      SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(),
+                                    N1.getNode()->op_end());
+      Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
+      Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end());
+      return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
+    }
+    break;
+  case ISD::SETCC: {
+    // Use FoldSetCC to simplify SETCC's.
+    SDValue Simp = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL);
+    if (Simp.getNode()) return Simp;
+    break;
+  }
+  case ISD::SELECT:
+    if (N1C) {
+     if (N1C->getZExtValue())
+        return N2;             // select true, X, Y -> X
+      else
+        return N3;             // select false, X, Y -> Y
+    }
+
+    if (N2 == N3) return N2;   // select C, X, X -> X
+    break;
+  case ISD::VECTOR_SHUFFLE:
+    llvm_unreachable("should use getVectorShuffle constructor!");
+    break;
+  case ISD::INSERT_SUBVECTOR: {
+    SDValue Index = N3;
+    if (VT.isSimple() && N1.getValueType().isSimple()
+        && N2.getValueType().isSimple()) {
+      assert(VT.isVector() && N1.getValueType().isVector() &&
+             N2.getValueType().isVector() &&
+             "Insert subvector VTs must be a vectors");
+      assert(VT == N1.getValueType() &&
+             "Dest and insert subvector source types must match!");
+      assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+             "Insert subvector must be from smaller vector to larger vector!");
+      if (isa<ConstantSDNode>(Index.getNode())) {
+        assert((N2.getValueType().getVectorNumElements() +
+                cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+                <= VT.getVectorNumElements())
+               && "Insert subvector overflow!");
+      }
+
+      // Trivial insertion.
+      if (VT.getSimpleVT() == N2.getValueType().getSimpleVT())
+        return N2;
+    }
+    break;
+  }
+  case ISD::BITCAST:
+    // Fold bit_convert nodes from a type to themselves.
+    if (N1.getValueType() == VT)
+      return N1;
+    break;
+  }
+
+  // Memoize node if it doesn't produce a flag.
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+  if (VT != MVT::Glue) {
+    SDValue Ops[] = { N1, N2, N3 };
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDValue(E, 0);
+
+    N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
+  }
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifySDNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              SDValue N1, SDValue N2, SDValue N3,
+                              SDValue N4) {
+  SDValue Ops[] = { N1, N2, N3, N4 };
+  return getNode(Opcode, DL, VT, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              SDValue N1, SDValue N2, SDValue N3,
+                              SDValue N4, SDValue N5) {
+  SDValue Ops[] = { N1, N2, N3, N4, N5 };
+  return getNode(Opcode, DL, VT, Ops, 5);
+}
+
+/// getStackArgumentTokenFactor - Compute a TokenFactor to force all
+/// the incoming stack arguments to be loaded from the stack.
+SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
+  SmallVector<SDValue, 8> ArgChains;
+
+  // Include the original chain at the beginning of the list. When this is
+  // used by target LowerCall hooks, this helps legalize find the
+  // CALLSEQ_BEGIN node.
+  ArgChains.push_back(Chain);
+
+  // Add a chain value for each stack argument.
+  for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(),
+       UE = getEntryNode().getNode()->use_end(); U != UE; ++U)
+    if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
+      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
+        if (FI->getIndex() < 0)
+          ArgChains.push_back(SDValue(L, 1));
+
+  // Build a tokenfactor for all the chains.
+  return getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other,
+                 &ArgChains[0], ArgChains.size());
+}
+
+/// SplatByte - Distribute ByteVal over NumBits bits.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+  APInt Val = APInt(NumBits, ByteVal);
+  unsigned Shift = 8;
+  for (unsigned i = NumBits; i > 8; i >>= 1) {
+    Val = (Val << Shift) | Val;
+    Shift <<= 1;
+  }
+  return Val;
+}
+
+/// getMemsetValue - Vectorized representation of the memset value
+/// operand.
+static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
+                              DebugLoc dl) {
+  assert(Value.getOpcode() != ISD::UNDEF);
+
+  unsigned NumBits = VT.getScalarType().getSizeInBits();
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
+    APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
+    if (VT.isInteger())
+      return DAG.getConstant(Val, VT);
+    return DAG.getConstantFP(APFloat(Val), VT);
+  }
+
+  Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
+  if (NumBits > 8) {
+    // Use a multiplication with 0x010101... to extend the input to the
+    // required length.
+    APInt Magic = SplatByte(NumBits, 0x01);
+    Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
+  }
+
+  return Value;
+}
+
+/// getMemsetStringVal - Similar to getMemsetValue. Except this is only
+/// used when a memcpy is turned into a memset when the source is a constant
+/// string ptr.
+static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
+                                  const TargetLowering &TLI,
+                                  std::string &Str, unsigned Offset) {
+  // Handle vector with all elements zero.
+  if (Str.empty()) {
+    if (VT.isInteger())
+      return DAG.getConstant(0, VT);
+    else if (VT == MVT::f32 || VT == MVT::f64)
+      return DAG.getConstantFP(0.0, VT);
+    else if (VT.isVector()) {
+      unsigned NumElts = VT.getVectorNumElements();
+      MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+      return DAG.getNode(ISD::BITCAST, dl, VT,
+                         DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
+                                                             EltVT, NumElts)));
+    } else
+      llvm_unreachable("Expected type!");
+  }
+
+  assert(!VT.isVector() && "Can't handle vector type here!");
+  unsigned NumBits = VT.getSizeInBits();
+  unsigned MSB = NumBits / 8;
+  uint64_t Val = 0;
+  if (TLI.isLittleEndian())
+    Offset = Offset + MSB - 1;
+  for (unsigned i = 0; i != MSB; ++i) {
+    Val = (Val << 8) | (unsigned char)Str[Offset];
+    Offset += TLI.isLittleEndian() ? -1 : 1;
+  }
+  return DAG.getConstant(Val, VT);
+}
+
+/// getMemBasePlusOffset - Returns base and offset node for the
+///
+static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset,
+                                      SelectionDAG &DAG) {
+  EVT VT = Base.getValueType();
+  return DAG.getNode(ISD::ADD, Base.getDebugLoc(),
+                     VT, Base, DAG.getConstant(Offset, VT));
+}
+
+/// isMemSrcFromString - Returns true if memcpy source is a string constant.
+///
+static bool isMemSrcFromString(SDValue Src, std::string &Str) {
+  unsigned SrcDelta = 0;
+  GlobalAddressSDNode *G = NULL;
+  if (Src.getOpcode() == ISD::GlobalAddress)
+    G = cast<GlobalAddressSDNode>(Src);
+  else if (Src.getOpcode() == ISD::ADD &&
+           Src.getOperand(0).getOpcode() == ISD::GlobalAddress &&
+           Src.getOperand(1).getOpcode() == ISD::Constant) {
+    G = cast<GlobalAddressSDNode>(Src.getOperand(0));
+    SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue();
+  }
+  if (!G)
+    return false;
+
+  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G->getGlobal());
+  if (GV && GetConstantStringInfo(GV, Str, SrcDelta, false))
+    return true;
+
+  return false;
+}
+
+/// FindOptimalMemOpLowering - Determines the optimial series memory ops
+/// to replace the memset / memcpy. Return true if the number of memory ops
+/// is below the threshold. It returns the types of the sequence of
+/// memory ops to perform memset / memcpy by reference.
+static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
+                                     unsigned Limit, uint64_t Size,
+                                     unsigned DstAlign, unsigned SrcAlign,
+                                     bool NonScalarIntSafe,
+                                     bool MemcpyStrSrc,
+                                     SelectionDAG &DAG,
+                                     const TargetLowering &TLI) {
+  assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
+         "Expecting memcpy / memset source to meet alignment requirement!");
+  // If 'SrcAlign' is zero, that means the memory operation does not need load
+  // the value, i.e. memset or memcpy from constant string. Otherwise, it's
+  // the inferred alignment of the source. 'DstAlign', on the other hand, is the
+  // specified alignment of the memory operation. If it is zero, that means
+  // it's possible to change the alignment of the destination. 'MemcpyStrSrc'
+  // indicates whether the memcpy source is constant so it does not need to be
+  // loaded.
+  EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
+                                   NonScalarIntSafe, MemcpyStrSrc,
+                                   DAG.getMachineFunction());
+
+  if (VT == MVT::Other) {
+    if (DstAlign >= TLI.getTargetData()->getPointerPrefAlignment() ||
+        TLI.allowsUnalignedMemoryAccesses(VT)) {
+      VT = TLI.getPointerTy();
+    } else {
+      switch (DstAlign & 7) {
+      case 0:  VT = MVT::i64; break;
+      case 4:  VT = MVT::i32; break;
+      case 2:  VT = MVT::i16; break;
+      default: VT = MVT::i8;  break;
+      }
+    }
+
+    MVT LVT = MVT::i64;
+    while (!TLI.isTypeLegal(LVT))
+      LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
+    assert(LVT.isInteger());
+
+    if (VT.bitsGT(LVT))
+      VT = LVT;
+  }
+
+  unsigned NumMemOps = 0;
+  while (Size != 0) {
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    while (VTSize > Size) {
+      // For now, only use non-vector load / store's for the left-over pieces.
+      if (VT.isVector() || VT.isFloatingPoint()) {
+        VT = MVT::i64;
+        while (!TLI.isTypeLegal(VT))
+          VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+        VTSize = VT.getSizeInBits() / 8;
+      } else {
+        // This can result in a type that is not legal on the target, e.g.
+        // 1 or 2 bytes on PPC.
+        VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+        VTSize >>= 1;
+      }
+    }
+
+    if (++NumMemOps > Limit)
+      return false;
+    MemOps.push_back(VT);
+    Size -= VTSize;
+  }
+
+  return true;
+}
+
+static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+                                       SDValue Chain, SDValue Dst,
+                                       SDValue Src, uint64_t Size,
+                                       unsigned Align, bool isVol,
+                                       bool AlwaysInline,
+                                       MachinePointerInfo DstPtrInfo,
+                                       MachinePointerInfo SrcPtrInfo) {
+  // Turn a memcpy of undef to nop.
+  if (Src.getOpcode() == ISD::UNDEF)
+    return Chain;
+
+  // Expand memcpy to a series of load and store ops if the size operand falls
+  // below a certain threshold.
+  // TODO: In the AlwaysInline case, if the size is big then generate a loop
+  // rather than maybe a humongous number of loads and stores.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  std::vector<EVT> MemOps;
+  bool DstAlignCanChange = false;
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+    DstAlignCanChange = true;
+  unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+  if (Align > SrcAlign)
+    SrcAlign = Align;
+  std::string Str;
+  bool CopyFromStr = isMemSrcFromString(Src, Str);
+  bool isZeroStr = CopyFromStr && Str.empty();
+  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
+
+  if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
+                                (DstAlignCanChange ? 0 : Align),
+                                (isZeroStr ? 0 : SrcAlign),
+                                true, CopyFromStr, DAG, TLI))
+    return SDValue();
+
+  if (DstAlignCanChange) {
+    const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    if (NewAlign > Align) {
+      // Give the stack frame object a larger alignment if needed.
+      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      Align = NewAlign;
+    }
+  }
+
+  SmallVector<SDValue, 8> OutChains;
+  unsigned NumMemOps = MemOps.size();
+  uint64_t SrcOff = 0, DstOff = 0;
+  for (unsigned i = 0; i != NumMemOps; ++i) {
+    EVT VT = MemOps[i];
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    SDValue Value, Store;
+
+    if (CopyFromStr &&
+        (isZeroStr || (VT.isInteger() && !VT.isVector()))) {
+      // It's unlikely a store of a vector immediate can be done in a single
+      // instruction. It would require a load from a constantpool first.
+      // We only handle zero vectors here.
+      // FIXME: Handle other cases where store of vector immediate is done in
+      // a single instruction.
+      Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
+      Store = DAG.getStore(Chain, dl, Value,
+                           getMemBasePlusOffset(Dst, DstOff, DAG),
+                           DstPtrInfo.getWithOffset(DstOff), isVol,
+                           false, Align);
+    } else {
+      // The type might not be legal for the target.  This should only happen
+      // if the type is smaller than a legal type, as on PPC, so the right
+      // thing to do is generate a LoadExt/StoreTrunc pair.  These simplify
+      // to Load/Store if NVT==VT.
+      // FIXME does the case above also need this?
+      EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+      assert(NVT.bitsGE(VT));
+      Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
+                             getMemBasePlusOffset(Src, SrcOff, DAG),
+                             SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false,
+                             MinAlign(SrcAlign, SrcOff));
+      Store = DAG.getTruncStore(Chain, dl, Value,
+                                getMemBasePlusOffset(Dst, DstOff, DAG),
+                                DstPtrInfo.getWithOffset(DstOff), VT, isVol,
+                                false, Align);
+    }
+    OutChains.push_back(Store);
+    SrcOff += VTSize;
+    DstOff += VTSize;
+  }
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                     &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
+                                        SDValue Chain, SDValue Dst,
+                                        SDValue Src, uint64_t Size,
+                                        unsigned Align,  bool isVol,
+                                        bool AlwaysInline,
+                                        MachinePointerInfo DstPtrInfo,
+                                        MachinePointerInfo SrcPtrInfo) {
+  // Turn a memmove of undef to nop.
+  if (Src.getOpcode() == ISD::UNDEF)
+    return Chain;
+
+  // Expand memmove to a series of load and store ops if the size operand falls
+  // below a certain threshold.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  std::vector<EVT> MemOps;
+  bool DstAlignCanChange = false;
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+    DstAlignCanChange = true;
+  unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+  if (Align > SrcAlign)
+    SrcAlign = Align;
+  unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
+
+  if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
+                                (DstAlignCanChange ? 0 : Align),
+                                SrcAlign, true, false, DAG, TLI))
+    return SDValue();
+
+  if (DstAlignCanChange) {
+    const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    if (NewAlign > Align) {
+      // Give the stack frame object a larger alignment if needed.
+      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      Align = NewAlign;
+    }
+  }
+
+  uint64_t SrcOff = 0, DstOff = 0;
+  SmallVector<SDValue, 8> LoadValues;
+  SmallVector<SDValue, 8> LoadChains;
+  SmallVector<SDValue, 8> OutChains;
+  unsigned NumMemOps = MemOps.size();
+  for (unsigned i = 0; i < NumMemOps; i++) {
+    EVT VT = MemOps[i];
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    SDValue Value, Store;
+
+    Value = DAG.getLoad(VT, dl, Chain,
+                        getMemBasePlusOffset(Src, SrcOff, DAG),
+                        SrcPtrInfo.getWithOffset(SrcOff), isVol,
+                        false, SrcAlign);
+    LoadValues.push_back(Value);
+    LoadChains.push_back(Value.getValue(1));
+    SrcOff += VTSize;
+  }
+  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                      &LoadChains[0], LoadChains.size());
+  OutChains.clear();
+  for (unsigned i = 0; i < NumMemOps; i++) {
+    EVT VT = MemOps[i];
+    unsigned VTSize = VT.getSizeInBits() / 8;
+    SDValue Value, Store;
+
+    Store = DAG.getStore(Chain, dl, LoadValues[i],
+                         getMemBasePlusOffset(Dst, DstOff, DAG),
+                         DstPtrInfo.getWithOffset(DstOff), isVol, false, Align);
+    OutChains.push_back(Store);
+    DstOff += VTSize;
+  }
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                     &OutChains[0], OutChains.size());
+}
+
+static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
+                               SDValue Chain, SDValue Dst,
+                               SDValue Src, uint64_t Size,
+                               unsigned Align, bool isVol,
+                               MachinePointerInfo DstPtrInfo) {
+  // Turn a memset of undef to nop.
+  if (Src.getOpcode() == ISD::UNDEF)
+    return Chain;
+
+  // Expand memset to a series of load/store ops if the size operand
+  // falls below a certain threshold.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  std::vector<EVT> MemOps;
+  bool DstAlignCanChange = false;
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+    DstAlignCanChange = true;
+  bool NonScalarIntSafe =
+    isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
+  if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
+                                Size, (DstAlignCanChange ? 0 : Align), 0,
+                                NonScalarIntSafe, false, DAG, TLI))
+    return SDValue();
+
+  if (DstAlignCanChange) {
+    const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+    unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+    if (NewAlign > Align) {
+      // Give the stack frame object a larger alignment if needed.
+      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      Align = NewAlign;
+    }
+  }
+
+  SmallVector<SDValue, 8> OutChains;
+  uint64_t DstOff = 0;
+  unsigned NumMemOps = MemOps.size();
+
+  // Find the largest store and generate the bit pattern for it.
+  EVT LargestVT = MemOps[0];
+  for (unsigned i = 1; i < NumMemOps; i++)
+    if (MemOps[i].bitsGT(LargestVT))
+      LargestVT = MemOps[i];
+  SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
+  for (unsigned i = 0; i < NumMemOps; i++) {
+    EVT VT = MemOps[i];
+
+    // If this store is smaller than the largest store see whether we can get
+    // the smaller value for free with a truncate.
+    SDValue Value = MemSetValue;
+    if (VT.bitsLT(LargestVT)) {
+      if (!LargestVT.isVector() && !VT.isVector() &&
+          TLI.isTruncateFree(LargestVT, VT))
+        Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+      else
+        Value = getMemsetValue(Src, VT, DAG, dl);
+    }
+    assert(Value.getValueType() == VT && "Value with wrong type.");
+    SDValue Store = DAG.getStore(Chain, dl, Value,
+                                 getMemBasePlusOffset(Dst, DstOff, DAG),
+                                 DstPtrInfo.getWithOffset(DstOff),
+                                 isVol, false, Align);
+    OutChains.push_back(Store);
+    DstOff += VT.getSizeInBits() / 8;
+  }
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                     &OutChains[0], OutChains.size());
+}
+
+SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
+                                SDValue Src, SDValue Size,
+                                unsigned Align, bool isVol, bool AlwaysInline,
+                                MachinePointerInfo DstPtrInfo,
+                                MachinePointerInfo SrcPtrInfo) {
+
+  // Check to see if we should lower the memcpy to loads and stores first.
+  // For cases within the target-specified limits, this is the best choice.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (ConstantSize) {
+    // Memcpy with size zero? Just return the original chain.
+    if (ConstantSize->isNullValue())
+      return Chain;
+
+    SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+                                             ConstantSize->getZExtValue(),Align,
+                                isVol, false, DstPtrInfo, SrcPtrInfo);
+    if (Result.getNode())
+      return Result;
+  }
+
+  // Then check to see if we should lower the memcpy with target-specific
+  // code. If the target chooses to do this, this is the next best.
+  SDValue Result =
+    TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
+                                isVol, AlwaysInline,
+                                DstPtrInfo, SrcPtrInfo);
+  if (Result.getNode())
+    return Result;
+
+  // If we really need inline code and the target declined to provide it,
+  // use a (potentially long) sequence of loads and stores.
+  if (AlwaysInline) {
+    assert(ConstantSize && "AlwaysInline requires a constant size!");
+    return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+                                   ConstantSize->getZExtValue(), Align, isVol,
+                                   true, DstPtrInfo, SrcPtrInfo);
+  }
+
+  // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
+  // memcpy is not guaranteed to be safe. libc memcpys aren't required to
+  // respect volatile, so they may do things like read or write memory
+  // beyond the given memory regions. But fixing this isn't easy, and most
+  // people don't care.
+
+  // Emit a library call.
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
+  Entry.Node = Dst; Args.push_back(Entry);
+  Entry.Node = Src; Args.push_back(Entry);
+  Entry.Node = Size; Args.push_back(Entry);
+  // FIXME: pass in DebugLoc
+  std::pair<SDValue,SDValue> CallResult =
+    TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+                    false, false, false, false, 0,
+                    TLI.getLibcallCallingConv(RTLIB::MEMCPY), false,
+                    /*isReturnValueUsed=*/false,
+                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
+                                      TLI.getPointerTy()),
+                    Args, *this, dl);
+  return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
+                                 SDValue Src, SDValue Size,
+                                 unsigned Align, bool isVol,
+                                 MachinePointerInfo DstPtrInfo,
+                                 MachinePointerInfo SrcPtrInfo) {
+
+  // Check to see if we should lower the memmove to loads and stores first.
+  // For cases within the target-specified limits, this is the best choice.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (ConstantSize) {
+    // Memmove with size zero? Just return the original chain.
+    if (ConstantSize->isNullValue())
+      return Chain;
+
+    SDValue Result =
+      getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
+                               ConstantSize->getZExtValue(), Align, isVol,
+                               false, DstPtrInfo, SrcPtrInfo);
+    if (Result.getNode())
+      return Result;
+  }
+
+  // Then check to see if we should lower the memmove with target-specific
+  // code. If the target chooses to do this, this is the next best.
+  SDValue Result =
+    TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
+                                 DstPtrInfo, SrcPtrInfo);
+  if (Result.getNode())
+    return Result;
+
+  // FIXME: If the memmove is volatile, lowering it to plain libc memmove may
+  // not be safe.  See memcpy above for more details.
+
+  // Emit a library call.
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Ty = TLI.getTargetData()->getIntPtrType(*getContext());
+  Entry.Node = Dst; Args.push_back(Entry);
+  Entry.Node = Src; Args.push_back(Entry);
+  Entry.Node = Size; Args.push_back(Entry);
+  // FIXME:  pass in DebugLoc
+  std::pair<SDValue,SDValue> CallResult =
+    TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+                    false, false, false, false, 0,
+                    TLI.getLibcallCallingConv(RTLIB::MEMMOVE), false,
+                    /*isReturnValueUsed=*/false,
+                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
+                                      TLI.getPointerTy()),
+                    Args, *this, dl);
+  return CallResult.second;
+}
+
+SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
+                                SDValue Src, SDValue Size,
+                                unsigned Align, bool isVol,
+                                MachinePointerInfo DstPtrInfo) {
+
+  // Check to see if we should lower the memset to stores first.
+  // For cases within the target-specified limits, this is the best choice.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (ConstantSize) {
+    // Memset with size zero? Just return the original chain.
+    if (ConstantSize->isNullValue())
+      return Chain;
+
+    SDValue Result =
+      getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
+                      Align, isVol, DstPtrInfo);
+
+    if (Result.getNode())
+      return Result;
+  }
+
+  // Then check to see if we should lower the memset with target-specific
+  // code. If the target chooses to do this, this is the next best.
+  SDValue Result =
+    TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
+                                DstPtrInfo);
+  if (Result.getNode())
+    return Result;
+
+  // Emit a library call.
+  const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Entry.Node = Dst; Entry.Ty = IntPtrTy;
+  Args.push_back(Entry);
+  // Extend or truncate the argument to be an i32 value for the call.
+  if (Src.getValueType().bitsGT(MVT::i32))
+    Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+  else
+    Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
+  Entry.Node = Src;
+  Entry.Ty = Type::getInt32Ty(*getContext());
+  Entry.isSExt = true;
+  Args.push_back(Entry);
+  Entry.Node = Size;
+  Entry.Ty = IntPtrTy;
+  Entry.isSExt = false;
+  Args.push_back(Entry);
+  // FIXME: pass in DebugLoc
+  std::pair<SDValue,SDValue> CallResult =
+    TLI.LowerCallTo(Chain, Type::getVoidTy(*getContext()),
+                    false, false, false, false, 0,
+                    TLI.getLibcallCallingConv(RTLIB::MEMSET), false,
+                    /*isReturnValueUsed=*/false,
+                    getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
+                                      TLI.getPointerTy()),
+                    Args, *this, dl);
+  return CallResult.second;
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+                                SDValue Chain, SDValue Ptr, SDValue Cmp,
+                                SDValue Swp, MachinePointerInfo PtrInfo,
+                                unsigned Alignment) {
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(MemVT);
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+  // For now, atomics are considered to be volatile always.
+  Flags |= MachineMemOperand::MOVolatile;
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
+
+  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+                                SDValue Chain,
+                                SDValue Ptr, SDValue Cmp,
+                                SDValue Swp, MachineMemOperand *MMO) {
+  assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op");
+  assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
+
+  EVT VT = Cmp.getValueType();
+
+  SDVTList VTs = getVTList(VT, MVT::Other);
+  FoldingSetNodeID ID;
+  ID.AddInteger(MemVT.getRawBits());
+  SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
+  AddNodeIDNode(ID, Opcode, VTs, Ops, 4);
+  void* IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<AtomicSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+                                               Ptr, Cmp, Swp, MMO);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+                                SDValue Chain,
+                                SDValue Ptr, SDValue Val,
+                                const Value* PtrVal,
+                                unsigned Alignment) {
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(MemVT);
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+
+  // For now, atomics are considered to be volatile always.
+  Flags |= MachineMemOperand::MOVolatile;
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
+                            MemVT.getStoreSize(), Alignment);
+
+  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
+}
+
+SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
+                                SDValue Chain,
+                                SDValue Ptr, SDValue Val,
+                                MachineMemOperand *MMO) {
+  assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
+          Opcode == ISD::ATOMIC_LOAD_SUB ||
+          Opcode == ISD::ATOMIC_LOAD_AND ||
+          Opcode == ISD::ATOMIC_LOAD_OR ||
+          Opcode == ISD::ATOMIC_LOAD_XOR ||
+          Opcode == ISD::ATOMIC_LOAD_NAND ||
+          Opcode == ISD::ATOMIC_LOAD_MIN ||
+          Opcode == ISD::ATOMIC_LOAD_MAX ||
+          Opcode == ISD::ATOMIC_LOAD_UMIN ||
+          Opcode == ISD::ATOMIC_LOAD_UMAX ||
+          Opcode == ISD::ATOMIC_SWAP) &&
+         "Invalid Atomic Op");
+
+  EVT VT = Val.getValueType();
+
+  SDVTList VTs = getVTList(VT, MVT::Other);
+  FoldingSetNodeID ID;
+  ID.AddInteger(MemVT.getRawBits());
+  SDValue Ops[] = {Chain, Ptr, Val};
+  AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
+  void* IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<AtomicSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain,
+                                               Ptr, Val, MMO);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+/// getMergeValues - Create a MERGE_VALUES node from the given operands.
+SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
+                                     DebugLoc dl) {
+  if (NumOps == 1)
+    return Ops[0];
+
+  SmallVector<EVT, 4> VTs;
+  VTs.reserve(NumOps);
+  for (unsigned i = 0; i < NumOps; ++i)
+    VTs.push_back(Ops[i].getValueType());
+  return getNode(ISD::MERGE_VALUES, dl, getVTList(&VTs[0], NumOps),
+                 Ops, NumOps);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
+                                  const EVT *VTs, unsigned NumVTs,
+                                  const SDValue *Ops, unsigned NumOps,
+                                  EVT MemVT, MachinePointerInfo PtrInfo,
+                                  unsigned Align, bool Vol,
+                                  bool ReadMem, bool WriteMem) {
+  return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
+                             MemVT, PtrInfo, Align, Vol,
+                             ReadMem, WriteMem);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+                                  const SDValue *Ops, unsigned NumOps,
+                                  EVT MemVT, MachinePointerInfo PtrInfo,
+                                  unsigned Align, bool Vol,
+                                  bool ReadMem, bool WriteMem) {
+  if (Align == 0)  // Ensure that codegen never sees alignment 0
+    Align = getEVTAlignment(MemVT);
+
+  MachineFunction &MF = getMachineFunction();
+  unsigned Flags = 0;
+  if (WriteMem)
+    Flags |= MachineMemOperand::MOStore;
+  if (ReadMem)
+    Flags |= MachineMemOperand::MOLoad;
+  if (Vol)
+    Flags |= MachineMemOperand::MOVolatile;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align);
+
+  return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
+}
+
+SDValue
+SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
+                                  const SDValue *Ops, unsigned NumOps,
+                                  EVT MemVT, MachineMemOperand *MMO) {
+  assert((Opcode == ISD::INTRINSIC_VOID ||
+          Opcode == ISD::INTRINSIC_W_CHAIN ||
+          Opcode == ISD::PREFETCH ||
+          (Opcode <= INT_MAX &&
+           (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
+         "Opcode is not a memory-accessing opcode!");
+
+  // Memoize the node unless it returns a flag.
+  MemIntrinsicSDNode *N;
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+      cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
+      return SDValue(E, 0);
+    }
+
+    N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps,
+                                               MemVT, MMO);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps,
+                                               MemVT, MMO);
+  }
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it.  This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
+  // If this is FI+Offset, we can model it.
+  if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
+    return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset);
+
+  // If this is (FI+Offset1)+Offset2, we can model it.
+  if (Ptr.getOpcode() != ISD::ADD ||
+      !isa<ConstantSDNode>(Ptr.getOperand(1)) ||
+      !isa<FrameIndexSDNode>(Ptr.getOperand(0)))
+    return MachinePointerInfo();
+
+  int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+  return MachinePointerInfo::getFixedStack(FI, Offset+
+                       cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it.  This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) {
+  // If the 'Offset' value isn't a constant, we can't handle this.
+  if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
+    return InferPointerInfo(Ptr, OffsetNode->getSExtValue());
+  if (OffsetOp.getOpcode() == ISD::UNDEF)
+    return InferPointerInfo(Ptr);
+  return MachinePointerInfo();
+}
+
+
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+                      EVT VT, DebugLoc dl, SDValue Chain,
+                      SDValue Ptr, SDValue Offset,
+                      MachinePointerInfo PtrInfo, EVT MemVT,
+                      bool isVolatile, bool isNonTemporal,
+                      unsigned Alignment, const MDNode *TBAAInfo) {
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(VT);
+
+  unsigned Flags = MachineMemOperand::MOLoad;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  if (isNonTemporal)
+    Flags |= MachineMemOperand::MONonTemporal;
+
+  // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+  // clients.
+  if (PtrInfo.V == 0)
+    PtrInfo = InferPointerInfo(Ptr, Offset);
+
+  MachineFunction &MF = getMachineFunction();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
+                            TBAAInfo);
+  return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
+}
+
+SDValue
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+                      EVT VT, DebugLoc dl, SDValue Chain,
+                      SDValue Ptr, SDValue Offset, EVT MemVT,
+                      MachineMemOperand *MMO) {
+  if (VT == MemVT) {
+    ExtType = ISD::NON_EXTLOAD;
+  } else if (ExtType == ISD::NON_EXTLOAD) {
+    assert(VT == MemVT && "Non-extending load from different memory type!");
+  } else {
+    // Extending load.
+    assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
+           "Should only be an extending load, not truncating!");
+    assert(VT.isInteger() == MemVT.isInteger() &&
+           "Cannot convert from FP to Int or Int -> FP!");
+    assert(VT.isVector() == MemVT.isVector() &&
+           "Cannot use trunc store to convert to or from a vector!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
+           "Cannot use trunc store to change the number of vector elements!");
+  }
+
+  bool Indexed = AM != ISD::UNINDEXED;
+  assert((Indexed || Offset.getOpcode() == ISD::UNDEF) &&
+         "Unindexed load with an offset!");
+
+  SDVTList VTs = Indexed ?
+    getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other);
+  SDValue Ops[] = { Chain, Ptr, Offset };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
+  ID.AddInteger(MemVT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
+                                     MMO->isNonTemporal()));
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<LoadSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl, VTs, AM, ExtType,
+                                             MemVT, MMO);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
+                              SDValue Chain, SDValue Ptr,
+                              MachinePointerInfo PtrInfo,
+                              bool isVolatile, bool isNonTemporal,
+                              unsigned Alignment, const MDNode *TBAAInfo) {
+  SDValue Undef = getUNDEF(Ptr.getValueType());
+  return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
+                 PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
+}
+
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
+                                 SDValue Chain, SDValue Ptr,
+                                 MachinePointerInfo PtrInfo, EVT MemVT,
+                                 bool isVolatile, bool isNonTemporal,
+                                 unsigned Alignment, const MDNode *TBAAInfo) {
+  SDValue Undef = getUNDEF(Ptr.getValueType());
+  return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
+                 PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment,
+                 TBAAInfo);
+}
+
+
+SDValue
+SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
+                             SDValue Offset, ISD::MemIndexedMode AM) {
+  LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
+  assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
+         "Load is already a indexed load!");
+  return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
+                 LD->getChain(), Base, Offset, LD->getPointerInfo(),
+                 LD->getMemoryVT(),
+                 LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+                               SDValue Ptr, MachinePointerInfo PtrInfo,
+                               bool isVolatile, bool isNonTemporal,
+                               unsigned Alignment, const MDNode *TBAAInfo) {
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(Val.getValueType());
+
+  unsigned Flags = MachineMemOperand::MOStore;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  if (isNonTemporal)
+    Flags |= MachineMemOperand::MONonTemporal;
+
+  if (PtrInfo.V == 0)
+    PtrInfo = InferPointerInfo(Ptr);
+
+  MachineFunction &MF = getMachineFunction();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PtrInfo, Flags,
+                            Val.getValueType().getStoreSize(), Alignment,
+                            TBAAInfo);
+
+  return getStore(Chain, dl, Val, Ptr, MMO);
+}
+
+SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
+                               SDValue Ptr, MachineMemOperand *MMO) {
+  EVT VT = Val.getValueType();
+  SDVTList VTs = getVTList(MVT::Other);
+  SDValue Undef = getUNDEF(Ptr.getValueType());
+  SDValue Ops[] = { Chain, Val, Ptr, Undef };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+  ID.AddInteger(VT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+                                     MMO->isNonTemporal()));
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<StoreSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED,
+                                              false, VT, MMO);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+                                    SDValue Ptr, MachinePointerInfo PtrInfo,
+                                    EVT SVT,bool isVolatile, bool isNonTemporal,
+                                    unsigned Alignment,
+                                    const MDNode *TBAAInfo) {
+  if (Alignment == 0)  // Ensure that codegen never sees alignment 0
+    Alignment = getEVTAlignment(SVT);
+
+  unsigned Flags = MachineMemOperand::MOStore;
+  if (isVolatile)
+    Flags |= MachineMemOperand::MOVolatile;
+  if (isNonTemporal)
+    Flags |= MachineMemOperand::MONonTemporal;
+
+  if (PtrInfo.V == 0)
+    PtrInfo = InferPointerInfo(Ptr);
+
+  MachineFunction &MF = getMachineFunction();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment,
+                            TBAAInfo);
+
+  return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
+}
+
+SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
+                                    SDValue Ptr, EVT SVT,
+                                    MachineMemOperand *MMO) {
+  EVT VT = Val.getValueType();
+
+  if (VT == SVT)
+    return getStore(Chain, dl, Val, Ptr, MMO);
+
+  assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+         "Should only be a truncating store, not extending!");
+  assert(VT.isInteger() == SVT.isInteger() &&
+         "Can't do FP-INT conversion!");
+  assert(VT.isVector() == SVT.isVector() &&
+         "Cannot use trunc store to convert to or from a vector!");
+  assert((!VT.isVector() ||
+          VT.getVectorNumElements() == SVT.getVectorNumElements()) &&
+         "Cannot use trunc store to change the number of vector elements!");
+
+  SDVTList VTs = getVTList(MVT::Other);
+  SDValue Undef = getUNDEF(Ptr.getValueType());
+  SDValue Ops[] = { Chain, Val, Ptr, Undef };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+  ID.AddInteger(SVT.getRawBits());
+  ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
+                                     MMO->isNonTemporal()));
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    cast<StoreSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+  SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED,
+                                              true, SVT, MMO);
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue
+SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
+                              SDValue Offset, ISD::MemIndexedMode AM) {
+  StoreSDNode *ST = cast<StoreSDNode>(OrigStore);
+  assert(ST->getOffset().getOpcode() == ISD::UNDEF &&
+         "Store is already a indexed store!");
+  SDVTList VTs = getVTList(Base.getValueType(), MVT::Other);
+  SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset };
+  FoldingSetNodeID ID;
+  AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
+  ID.AddInteger(ST->getMemoryVT().getRawBits());
+  ID.AddInteger(ST->getRawSubclassData());
+  void *IP = 0;
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    return SDValue(E, 0);
+
+  SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, AM,
+                                              ST->isTruncatingStore(),
+                                              ST->getMemoryVT(),
+                                              ST->getMemOperand());
+  CSEMap.InsertNode(N, IP);
+  AllNodes.push_back(N);
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl,
+                               SDValue Chain, SDValue Ptr,
+                               SDValue SV,
+                               unsigned Align) {
+  SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) };
+  return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              const SDUse *Ops, unsigned NumOps) {
+  switch (NumOps) {
+  case 0: return getNode(Opcode, DL, VT);
+  case 1: return getNode(Opcode, DL, VT, Ops[0]);
+  case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+  case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+  default: break;
+  }
+
+  // Copy from an SDUse array into an SDValue array for use with
+  // the regular getNode logic.
+  SmallVector<SDValue, 8> NewOps(Ops, Ops + NumOps);
+  return getNode(Opcode, DL, VT, &NewOps[0], NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
+                              const SDValue *Ops, unsigned NumOps) {
+  switch (NumOps) {
+  case 0: return getNode(Opcode, DL, VT);
+  case 1: return getNode(Opcode, DL, VT, Ops[0]);
+  case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]);
+  case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]);
+  default: break;
+  }
+
+  switch (Opcode) {
+  default: break;
+  case ISD::SELECT_CC: {
+    assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
+    assert(Ops[0].getValueType() == Ops[1].getValueType() &&
+           "LHS and RHS of condition must have same type!");
+    assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+           "True and False arms of SelectCC must have same type!");
+    assert(Ops[2].getValueType() == VT &&
+           "select_cc node must be of same type as true and false value!");
+    break;
+  }
+  case ISD::BR_CC: {
+    assert(NumOps == 5 && "BR_CC takes 5 operands!");
+    assert(Ops[2].getValueType() == Ops[3].getValueType() &&
+           "LHS/RHS of comparison should match types!");
+    break;
+  }
+  }
+
+  // Memoize nodes.
+  SDNode *N;
+  SDVTList VTs = getVTList(VT);
+
+  if (VT != MVT::Glue) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
+    void *IP = 0;
+
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDValue(E, 0);
+
+    N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps);
+    CSEMap.InsertNode(N, IP);
+  } else {
+    N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps);
+  }
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifySDNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+                              const std::vector<EVT> &ResultTys,
+                              const SDValue *Ops, unsigned NumOps) {
+  return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()),
+                 Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
+                              const EVT *VTs, unsigned NumVTs,
+                              const SDValue *Ops, unsigned NumOps) {
+  if (NumVTs == 1)
+    return getNode(Opcode, DL, VTs[0], Ops, NumOps);
+  return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              const SDValue *Ops, unsigned NumOps) {
+  if (VTList.NumVTs == 1)
+    return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps);
+
+#if 0
+  switch (Opcode) {
+  // FIXME: figure out how to safely handle things like
+  // int foo(int x) { return 1 << (x & 255); }
+  // int bar() { return foo(256); }
+  case ISD::SRA_PARTS:
+  case ISD::SRL_PARTS:
+  case ISD::SHL_PARTS:
+    if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+        cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1)
+      return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+    else if (N3.getOpcode() == ISD::AND)
+      if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
+        // If the and is only masking out bits that cannot effect the shift,
+        // eliminate the and.
+        unsigned NumBits = VT.getScalarType().getSizeInBits()*2;
+        if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
+          return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
+      }
+    break;
+  }
+#endif
+
+  // Memoize the node unless it returns a flag.
+  SDNode *N;
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return SDValue(E, 0);
+
+    if (NumOps == 1) {
+      N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+    } else if (NumOps == 2) {
+      N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+    } else if (NumOps == 3) {
+      N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1],
+                                            Ops[2]);
+    } else {
+      N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps);
+    }
+    CSEMap.InsertNode(N, IP);
+  } else {
+    if (NumOps == 1) {
+      N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]);
+    } else if (NumOps == 2) {
+      N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]);
+    } else if (NumOps == 3) {
+      N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1],
+                                            Ops[2]);
+    } else {
+      N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps);
+    }
+  }
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifySDNode(N);
+#endif
+  return SDValue(N, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) {
+  return getNode(Opcode, DL, VTList, 0, 0);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              SDValue N1) {
+  SDValue Ops[] = { N1 };
+  return getNode(Opcode, DL, VTList, Ops, 1);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              SDValue N1, SDValue N2) {
+  SDValue Ops[] = { N1, N2 };
+  return getNode(Opcode, DL, VTList, Ops, 2);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              SDValue N1, SDValue N2, SDValue N3) {
+  SDValue Ops[] = { N1, N2, N3 };
+  return getNode(Opcode, DL, VTList, Ops, 3);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              SDValue N1, SDValue N2, SDValue N3,
+                              SDValue N4) {
+  SDValue Ops[] = { N1, N2, N3, N4 };
+  return getNode(Opcode, DL, VTList, Ops, 4);
+}
+
+SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
+                              SDValue N1, SDValue N2, SDValue N3,
+                              SDValue N4, SDValue N5) {
+  SDValue Ops[] = { N1, N2, N3, N4, N5 };
+  return getNode(Opcode, DL, VTList, Ops, 5);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT) {
+  return makeVTList(SDNode::getValueTypeList(VT), 1);
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) {
+  for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+       E = VTList.rend(); I != E; ++I)
+    if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2)
+      return *I;
+
+  EVT *Array = Allocator.Allocate<EVT>(2);
+  Array[0] = VT1;
+  Array[1] = VT2;
+  SDVTList Result = makeVTList(Array, 2);
+  VTList.push_back(Result);
+  return Result;
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) {
+  for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+       E = VTList.rend(); I != E; ++I)
+    if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+                          I->VTs[2] == VT3)
+      return *I;
+
+  EVT *Array = Allocator.Allocate<EVT>(3);
+  Array[0] = VT1;
+  Array[1] = VT2;
+  Array[2] = VT3;
+  SDVTList Result = makeVTList(Array, 3);
+  VTList.push_back(Result);
+  return Result;
+}
+
+SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
+  for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+       E = VTList.rend(); I != E; ++I)
+    if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 &&
+                          I->VTs[2] == VT3 && I->VTs[3] == VT4)
+      return *I;
+
+  EVT *Array = Allocator.Allocate<EVT>(4);
+  Array[0] = VT1;
+  Array[1] = VT2;
+  Array[2] = VT3;
+  Array[3] = VT4;
+  SDVTList Result = makeVTList(Array, 4);
+  VTList.push_back(Result);
+  return Result;
+}
+
+SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
+  switch (NumVTs) {
+    case 0: llvm_unreachable("Cannot have nodes without results!");
+    case 1: return getVTList(VTs[0]);
+    case 2: return getVTList(VTs[0], VTs[1]);
+    case 3: return getVTList(VTs[0], VTs[1], VTs[2]);
+    case 4: return getVTList(VTs[0], VTs[1], VTs[2], VTs[3]);
+    default: break;
+  }
+
+  for (std::vector<SDVTList>::reverse_iterator I = VTList.rbegin(),
+       E = VTList.rend(); I != E; ++I) {
+    if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1])
+      continue;
+
+    bool NoMatch = false;
+    for (unsigned i = 2; i != NumVTs; ++i)
+      if (VTs[i] != I->VTs[i]) {
+        NoMatch = true;
+        break;
+      }
+    if (!NoMatch)
+      return *I;
+  }
+
+  EVT *Array = Allocator.Allocate<EVT>(NumVTs);
+  std::copy(VTs, VTs+NumVTs, Array);
+  SDVTList Result = makeVTList(Array, NumVTs);
+  VTList.push_back(Result);
+  return Result;
+}
+
+
+/// UpdateNodeOperands - *Mutate* the specified node in-place to have the
+/// specified operands.  If the resultant node already exists in the DAG,
+/// this does not modify the specified node, instead it returns the node that
+/// already exists.  If the resultant node does not exist in the DAG, the
+/// input node is returned.  As a degenerate case, if you specify the same
+/// input operands as the node already has, the input node is returned.
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {
+  assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
+
+  // Check to see if there is no change.
+  if (Op == N->getOperand(0)) return N;
+
+  // See if the modified node already exists.
+  void *InsertPos = 0;
+  if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
+    return Existing;
+
+  // Nope it doesn't.  Remove the node from its current place in the maps.
+  if (InsertPos)
+    if (!RemoveNodeFromCSEMaps(N))
+      InsertPos = 0;
+
+  // Now we update the operands.
+  N->OperandList[0].set(Op);
+
+  // If this gets put into a CSE map, add it.
+  if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+  return N;
+}
+
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
+  assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
+
+  // Check to see if there is no change.
+  if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
+    return N;   // No operands changed, just return the input node.
+
+  // See if the modified node already exists.
+  void *InsertPos = 0;
+  if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
+    return Existing;
+
+  // Nope it doesn't.  Remove the node from its current place in the maps.
+  if (InsertPos)
+    if (!RemoveNodeFromCSEMaps(N))
+      InsertPos = 0;
+
+  // Now we update the operands.
+  if (N->OperandList[0] != Op1)
+    N->OperandList[0].set(Op1);
+  if (N->OperandList[1] != Op2)
+    N->OperandList[1].set(Op2);
+
+  // If this gets put into a CSE map, add it.
+  if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+  return N;
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) {
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return UpdateNodeOperands(N, Ops, 3);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+                   SDValue Op3, SDValue Op4) {
+  SDValue Ops[] = { Op1, Op2, Op3, Op4 };
+  return UpdateNodeOperands(N, Ops, 4);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
+                   SDValue Op3, SDValue Op4, SDValue Op5) {
+  SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
+  return UpdateNodeOperands(N, Ops, 5);
+}
+
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) {
+  assert(N->getNumOperands() == NumOps &&
+         "Update with wrong number of operands");
+
+  // Check to see if there is no change.
+  bool AnyChange = false;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    if (Ops[i] != N->getOperand(i)) {
+      AnyChange = true;
+      break;
+    }
+  }
+
+  // No operands changed, just return the input node.
+  if (!AnyChange) return N;
+
+  // See if the modified node already exists.
+  void *InsertPos = 0;
+  if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))
+    return Existing;
+
+  // Nope it doesn't.  Remove the node from its current place in the maps.
+  if (InsertPos)
+    if (!RemoveNodeFromCSEMaps(N))
+      InsertPos = 0;
+
+  // Now we update the operands.
+  for (unsigned i = 0; i != NumOps; ++i)
+    if (N->OperandList[i] != Ops[i])
+      N->OperandList[i].set(Ops[i]);
+
+  // If this gets put into a CSE map, add it.
+  if (InsertPos) CSEMap.InsertNode(N, InsertPos);
+  return N;
+}
+
+/// DropOperands - Release the operands and set this node to have
+/// zero operands.
+void SDNode::DropOperands() {
+  // Unlike the code in MorphNodeTo that does this, we don't need to
+  // watch for dead nodes here.
+  for (op_iterator I = op_begin(), E = op_end(); I != E; ) {
+    SDUse &Use = *I++;
+    Use.set(SDValue());
+  }
+}
+
+/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a
+/// machine opcode.
+///
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT) {
+  SDVTList VTs = getVTList(VT);
+  return SelectNodeTo(N, MachineOpc, VTs, 0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT, SDValue Op1) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT, SDValue Op1,
+                                   SDValue Op2) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT, SDValue Op1,
+                                   SDValue Op2, SDValue Op3) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT, const SDValue *Ops,
+                                   unsigned NumOps) {
+  SDVTList VTs = getVTList(VT);
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2, const SDValue *Ops,
+                                   unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  return SelectNodeTo(N, MachineOpc, VTs, (SDValue *)0, 0);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2, EVT VT3,
+                                   const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3);
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2, EVT VT3, EVT VT4,
+                                   const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, NumOps);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2,
+                                   SDValue Op1) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 1);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2,
+                                   SDValue Op1, SDValue Op2) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1, Op2 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 2);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2,
+                                   SDValue Op1, SDValue Op2,
+                                   SDValue Op3) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   EVT VT1, EVT VT2, EVT VT3,
+                                   SDValue Op1, SDValue Op2,
+                                   SDValue Op3) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return SelectNodeTo(N, MachineOpc, VTs, Ops, 3);
+}
+
+SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
+                                   SDVTList VTs, const SDValue *Ops,
+                                   unsigned NumOps) {
+  N = MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
+  // Reset the NodeID to -1.
+  N->setNodeId(-1);
+  return N;
+}
+
+/// MorphNodeTo - This *mutates* the specified node to have the specified
+/// return type, opcode, and operands.
+///
+/// Note that MorphNodeTo returns the resultant node.  If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.  Note that the DebugLoc need not be the same.
+///
+/// Using MorphNodeTo is faster than creating a new node and swapping it in
+/// with ReplaceAllUsesWith both because it often avoids allocating a new
+/// node, and because it doesn't require CSE recalculation for any of
+/// the node's users.
+///
+SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
+                                  SDVTList VTs, const SDValue *Ops,
+                                  unsigned NumOps) {
+  // If an identical node already exists, use it.
+  void *IP = 0;
+  if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
+    if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return ON;
+  }
+
+  if (!RemoveNodeFromCSEMaps(N))
+    IP = 0;
+
+  // Start the morphing.
+  N->NodeType = Opc;
+  N->ValueList = VTs.VTs;
+  N->NumValues = VTs.NumVTs;
+
+  // Clear the operands list, updating used nodes to remove this from their
+  // use list.  Keep track of any operands that become dead as a result.
+  SmallPtrSet<SDNode*, 16> DeadNodeSet;
+  for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) {
+    SDUse &Use = *I++;
+    SDNode *Used = Use.getNode();
+    Use.set(SDValue());
+    if (Used->use_empty())
+      DeadNodeSet.insert(Used);
+  }
+
+  if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) {
+    // Initialize the memory references information.
+    MN->setMemRefs(0, 0);
+    // If NumOps is larger than the # of operands we can have in a
+    // MachineSDNode, reallocate the operand list.
+    if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) {
+      if (MN->OperandsNeedDelete)
+        delete[] MN->OperandList;
+      if (NumOps > array_lengthof(MN->LocalOperands))
+        // We're creating a final node that will live unmorphed for the
+        // remainder of the current SelectionDAG iteration, so we can allocate
+        // the operands directly out of a pool with no recycling metadata.
+        MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+                         Ops, NumOps);
+      else
+        MN->InitOperands(MN->LocalOperands, Ops, NumOps);
+      MN->OperandsNeedDelete = false;
+    } else
+      MN->InitOperands(MN->OperandList, Ops, NumOps);
+  } else {
+    // If NumOps is larger than the # of operands we currently have, reallocate
+    // the operand list.
+    if (NumOps > N->NumOperands) {
+      if (N->OperandsNeedDelete)
+        delete[] N->OperandList;
+      N->InitOperands(new SDUse[NumOps], Ops, NumOps);
+      N->OperandsNeedDelete = true;
+    } else
+      N->InitOperands(N->OperandList, Ops, NumOps);
+  }
+
+  // Delete any nodes that are still dead after adding the uses for the
+  // new operands.
+  if (!DeadNodeSet.empty()) {
+    SmallVector<SDNode *, 16> DeadNodes;
+    for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
+         E = DeadNodeSet.end(); I != E; ++I)
+      if ((*I)->use_empty())
+        DeadNodes.push_back(*I);
+    RemoveDeadNodes(DeadNodes);
+  }
+
+  if (IP)
+    CSEMap.InsertNode(N, IP);   // Memoize the new node.
+  return N;
+}
+
+
+/// getMachineNode - These are used for target selectors to create a new node
+/// with specified return type(s), MachineInstr opcode, and operands.
+///
+/// Note that getMachineNode returns the resultant node.  If there is already a
+/// node of the specified opcode and operands, it returns that node instead of
+/// the current one.
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) {
+  SDVTList VTs = getVTList(VT);
+  return getMachineNode(Opcode, dl, VTs, 0, 0);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                             SDValue Op1, SDValue Op2) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                             SDValue Op1, SDValue Op2, SDValue Op3) {
+  SDVTList VTs = getVTList(VT);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT);
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  return getMachineNode(Opcode, dl, VTs, 0, 0);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, SDValue Op1) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1, Op2 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, SDValue Op1,
+                             SDValue Op2, SDValue Op3) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2);
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, EVT VT3,
+                             SDValue Op1, SDValue Op2) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3);
+  SDValue Ops[] = { Op1, Op2 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, EVT VT3,
+                             SDValue Op1, SDValue Op2, SDValue Op3) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3);
+  SDValue Ops[] = { Op1, Op2, Op3 };
+  return getMachineNode(Opcode, dl, VTs, Ops, array_lengthof(Ops));
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             EVT VT1, EVT VT2, EVT VT3,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3);
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1,
+                             EVT VT2, EVT VT3, EVT VT4,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
+                             const std::vector<EVT> &ResultTys,
+                             const SDValue *Ops, unsigned NumOps) {
+  SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size());
+  return getMachineNode(Opcode, dl, VTs, Ops, NumOps);
+}
+
+MachineSDNode *
+SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
+                             const SDValue *Ops, unsigned NumOps) {
+  bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
+  MachineSDNode *N;
+  void *IP = 0;
+
+  if (DoCSE) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
+    IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return cast<MachineSDNode>(E);
+  }
+
+  // Allocate a new MachineSDNode.
+  N = new (NodeAllocator) MachineSDNode(~Opcode, DL, VTs);
+
+  // Initialize the operands list.
+  if (NumOps > array_lengthof(N->LocalOperands))
+    // We're creating a final node that will live unmorphed for the
+    // remainder of the current SelectionDAG iteration, so we can allocate
+    // the operands directly out of a pool with no recycling metadata.
+    N->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps),
+                    Ops, NumOps);
+  else
+    N->InitOperands(N->LocalOperands, Ops, NumOps);
+  N->OperandsNeedDelete = false;
+
+  if (DoCSE)
+    CSEMap.InsertNode(N, IP);
+
+  AllNodes.push_back(N);
+#ifndef NDEBUG
+  VerifyMachineNode(N);
+#endif
+  return N;
+}
+
+/// getTargetExtractSubreg - A convenience function for creating
+/// TargetOpcode::EXTRACT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT,
+                                     SDValue Operand) {
+  SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+  SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
+                                  VT, Operand, SRIdxVal);
+  return SDValue(Subreg, 0);
+}
+
+/// getTargetInsertSubreg - A convenience function for creating
+/// TargetOpcode::INSERT_SUBREG nodes.
+SDValue
+SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
+                                    SDValue Operand, SDValue Subreg) {
+  SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32);
+  SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
+                                  VT, Operand, Subreg, SRIdxVal);
+  return SDValue(Result, 0);
+}
+
+/// getNodeIfExists - Get the specified node if it's already available, or
+/// else return NULL.
+SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
+                                      const SDValue *Ops, unsigned NumOps) {
+  if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
+    FoldingSetNodeID ID;
+    AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
+    void *IP = 0;
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+      return E;
+  }
+  return NULL;
+}
+
+/// getDbgValue - Creates a SDDbgValue node.
+///
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off,
+                          DebugLoc DL, unsigned O) {
+  return new (Allocator) SDDbgValue(MDPtr, N, R, Off, DL, O);
+}
+
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, const Value *C, uint64_t Off,
+                          DebugLoc DL, unsigned O) {
+  return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O);
+}
+
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off,
+                          DebugLoc DL, unsigned O) {
+  return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O);
+}
+
+namespace {
+
+/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
+/// pointed to by a use iterator is deleted, increment the use iterator
+/// so that it doesn't dangle.
+///
+/// This class also manages a "downlink" DAGUpdateListener, to forward
+/// messages to ReplaceAllUsesWith's callers.
+///
+class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener {
+  SelectionDAG::DAGUpdateListener *DownLink;
+  SDNode::use_iterator &UI;
+  SDNode::use_iterator &UE;
+
+  virtual void NodeDeleted(SDNode *N, SDNode *E) {
+    // Increment the iterator as needed.
+    while (UI != UE && N == *UI)
+      ++UI;
+
+    // Then forward the message.
+    if (DownLink) DownLink->NodeDeleted(N, E);
+  }
+
+  virtual void NodeUpdated(SDNode *N) {
+    // Just forward the message.
+    if (DownLink) DownLink->NodeUpdated(N);
+  }
+
+public:
+  RAUWUpdateListener(SelectionDAG::DAGUpdateListener *dl,
+                     SDNode::use_iterator &ui,
+                     SDNode::use_iterator &ue)
+    : DownLink(dl), UI(ui), UE(ue) {}
+};
+
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes From has a single result value.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To,
+                                      DAGUpdateListener *UpdateListener) {
+  SDNode *From = FromN.getNode();
+  assert(From->getNumValues() == 1 && FromN.getResNo() == 0 &&
+         "Cannot replace with this method!");
+  assert(From != To.getNode() && "Cannot replace uses of with self");
+
+  // Iterate over all the existing uses of From. New uses will be added
+  // to the beginning of the use list, which we avoid visiting.
+  // This specifically avoids visiting uses of From that arise while the
+  // replacement is happening, because any such uses would be the result
+  // of CSE: If an existing node looks like From after one of its operands
+  // is replaced by To, we don't want to replace of all its users with To
+  // too. See PR3018 for more info.
+  SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
+  while (UI != UE) {
+    SDNode *User = *UI;
+
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(User);
+
+    // A user can appear in a use list multiple times, and when this
+    // happens the uses are usually next to each other in the list.
+    // To help reduce the number of CSE recomputations, process all
+    // the uses of this user that we can find this way.
+    do {
+      SDUse &Use = UI.getUse();
+      ++UI;
+      Use.set(To);
+    } while (UI != UE && *UI == User);
+
+    // Now that we have modified User, add it back to the CSE maps.  If it
+    // already exists there, recursively merge the results together.
+    AddModifiedNodeToCSEMaps(User, &Listener);
+  }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version assumes that for each value of From, there is a
+/// corresponding value in To in the same position with the same type.
+///
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To,
+                                      DAGUpdateListener *UpdateListener) {
+#ifndef NDEBUG
+  for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
+    assert((!From->hasAnyUseOfValue(i) ||
+            From->getValueType(i) == To->getValueType(i)) &&
+           "Cannot use this version of ReplaceAllUsesWith!");
+#endif
+
+  // Handle the trivial case.
+  if (From == To)
+    return;
+
+  // Iterate over just the existing users of From. See the comments in
+  // the ReplaceAllUsesWith above.
+  SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
+  while (UI != UE) {
+    SDNode *User = *UI;
+
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(User);
+
+    // A user can appear in a use list multiple times, and when this
+    // happens the uses are usually next to each other in the list.
+    // To help reduce the number of CSE recomputations, process all
+    // the uses of this user that we can find this way.
+    do {
+      SDUse &Use = UI.getUse();
+      ++UI;
+      Use.setNode(To);
+    } while (UI != UE && *UI == User);
+
+    // Now that we have modified User, add it back to the CSE maps.  If it
+    // already exists there, recursively merge the results together.
+    AddModifiedNodeToCSEMaps(User, &Listener);
+  }
+}
+
+/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
+/// This can cause recursive merging of nodes in the DAG.
+///
+/// This version can replace From with any result values.  To must match the
+/// number and types of values returned by From.
+void SelectionDAG::ReplaceAllUsesWith(SDNode *From,
+                                      const SDValue *To,
+                                      DAGUpdateListener *UpdateListener) {
+  if (From->getNumValues() == 1)  // Handle the simple case efficiently.
+    return ReplaceAllUsesWith(SDValue(From, 0), To[0], UpdateListener);
+
+  // Iterate over just the existing users of From. See the comments in
+  // the ReplaceAllUsesWith above.
+  SDNode::use_iterator UI = From->use_begin(), UE = From->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
+  while (UI != UE) {
+    SDNode *User = *UI;
+
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(User);
+
+    // A user can appear in a use list multiple times, and when this
+    // happens the uses are usually next to each other in the list.
+    // To help reduce the number of CSE recomputations, process all
+    // the uses of this user that we can find this way.
+    do {
+      SDUse &Use = UI.getUse();
+      const SDValue &ToOp = To[Use.getResNo()];
+      ++UI;
+      Use.set(ToOp);
+    } while (UI != UE && *UI == User);
+
+    // Now that we have modified User, add it back to the CSE maps.  If it
+    // already exists there, recursively merge the results together.
+    AddModifiedNodeToCSEMaps(User, &Listener);
+  }
+}
+
+/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone.  The Deleted
+/// vector is handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To,
+                                             DAGUpdateListener *UpdateListener){
+  // Handle the really simple, really trivial case efficiently.
+  if (From == To) return;
+
+  // Handle the simple, trivial, case efficiently.
+  if (From.getNode()->getNumValues() == 1) {
+    ReplaceAllUsesWith(From, To, UpdateListener);
+    return;
+  }
+
+  // Iterate over just the existing users of From. See the comments in
+  // the ReplaceAllUsesWith above.
+  SDNode::use_iterator UI = From.getNode()->use_begin(),
+                       UE = From.getNode()->use_end();
+  RAUWUpdateListener Listener(UpdateListener, UI, UE);
+  while (UI != UE) {
+    SDNode *User = *UI;
+    bool UserRemovedFromCSEMaps = false;
+
+    // A user can appear in a use list multiple times, and when this
+    // happens the uses are usually next to each other in the list.
+    // To help reduce the number of CSE recomputations, process all
+    // the uses of this user that we can find this way.
+    do {
+      SDUse &Use = UI.getUse();
+
+      // Skip uses of different values from the same node.
+      if (Use.getResNo() != From.getResNo()) {
+        ++UI;
+        continue;
+      }
+
+      // If this node hasn't been modified yet, it's still in the CSE maps,
+      // so remove its old self from the CSE maps.
+      if (!UserRemovedFromCSEMaps) {
+        RemoveNodeFromCSEMaps(User);
+        UserRemovedFromCSEMaps = true;
+      }
+
+      ++UI;
+      Use.set(To);
+    } while (UI != UE && *UI == User);
+
+    // We are iterating over all uses of the From node, so if a use
+    // doesn't use the specific value, no changes are made.
+    if (!UserRemovedFromCSEMaps)
+      continue;
+
+    // Now that we have modified User, add it back to the CSE maps.  If it
+    // already exists there, recursively merge the results together.
+    AddModifiedNodeToCSEMaps(User, &Listener);
+  }
+}
+
+namespace {
+  /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
+  /// to record information about a use.
+  struct UseMemo {
+    SDNode *User;
+    unsigned Index;
+    SDUse *Use;
+  };
+
+  /// operator< - Sort Memos by User.
+  bool operator<(const UseMemo &L, const UseMemo &R) {
+    return (intptr_t)L.User < (intptr_t)R.User;
+  }
+}
+
+/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
+/// uses of other values produced by From.getNode() alone.  The same value
+/// may appear in both the From and To list.  The Deleted vector is
+/// handled the same way as for ReplaceAllUsesWith.
+void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
+                                              const SDValue *To,
+                                              unsigned Num,
+                                              DAGUpdateListener *UpdateListener){
+  // Handle the simple, trivial case efficiently.
+  if (Num == 1)
+    return ReplaceAllUsesOfValueWith(*From, *To, UpdateListener);
+
+  // Read up all the uses and make records of them. This helps
+  // processing new uses that are introduced during the
+  // replacement process.
+  SmallVector<UseMemo, 4> Uses;
+  for (unsigned i = 0; i != Num; ++i) {
+    unsigned FromResNo = From[i].getResNo();
+    SDNode *FromNode = From[i].getNode();
+    for (SDNode::use_iterator UI = FromNode->use_begin(),
+         E = FromNode->use_end(); UI != E; ++UI) {
+      SDUse &Use = UI.getUse();
+      if (Use.getResNo() == FromResNo) {
+        UseMemo Memo = { *UI, i, &Use };
+        Uses.push_back(Memo);
+      }
+    }
+  }
+
+  // Sort the uses, so that all the uses from a given User are together.
+  std::sort(Uses.begin(), Uses.end());
+
+  for (unsigned UseIndex = 0, UseIndexEnd = Uses.size();
+       UseIndex != UseIndexEnd; ) {
+    // We know that this user uses some value of From.  If it is the right
+    // value, update it.
+    SDNode *User = Uses[UseIndex].User;
+
+    // This node is about to morph, remove its old self from the CSE maps.
+    RemoveNodeFromCSEMaps(User);
+
+    // The Uses array is sorted, so all the uses for a given User
+    // are next to each other in the list.
+    // To help reduce the number of CSE recomputations, process all
+    // the uses of this user that we can find this way.
+    do {
+      unsigned i = Uses[UseIndex].Index;
+      SDUse &Use = *Uses[UseIndex].Use;
+      ++UseIndex;
+
+      Use.set(To[i]);
+    } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User);
+
+    // Now that we have modified User, add it back to the CSE maps.  If it
+    // already exists there, recursively merge the results together.
+    AddModifiedNodeToCSEMaps(User, UpdateListener);
+  }
+}
+
+/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG
+/// based on their topological order. It returns the maximum id and a vector
+/// of the SDNodes* in assigned order by reference.
+unsigned SelectionDAG::AssignTopologicalOrder() {
+
+  unsigned DAGSize = 0;
+
+  // SortedPos tracks the progress of the algorithm. Nodes before it are
+  // sorted, nodes after it are unsorted. When the algorithm completes
+  // it is at the end of the list.
+  allnodes_iterator SortedPos = allnodes_begin();
+
+  // Visit all the nodes. Move nodes with no operands to the front of
+  // the list immediately. Annotate nodes that do have operands with their
+  // operand count. Before we do this, the Node Id fields of the nodes
+  // may contain arbitrary values. After, the Node Id fields for nodes
+  // before SortedPos will contain the topological sort index, and the
+  // Node Id fields for nodes At SortedPos and after will contain the
+  // count of outstanding operands.
+  for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) {
+    SDNode *N = I++;
+    checkForCycles(N);
+    unsigned Degree = N->getNumOperands();
+    if (Degree == 0) {
+      // A node with no uses, add it to the result array immediately.
+      N->setNodeId(DAGSize++);
+      allnodes_iterator Q = N;
+      if (Q != SortedPos)
+        SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q));
+      assert(SortedPos != AllNodes.end() && "Overran node list");
+      ++SortedPos;
+    } else {
+      // Temporarily use the Node Id as scratch space for the degree count.
+      N->setNodeId(Degree);
+    }
+  }
+
+  // Visit all the nodes. As we iterate, moves nodes into sorted order,
+  // such that by the time the end is reached all nodes will be sorted.
+  for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) {
+    SDNode *N = I;
+    checkForCycles(N);
+    // N is in sorted position, so all its uses have one less operand
+    // that needs to be sorted.
+    for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+         UI != UE; ++UI) {
+      SDNode *P = *UI;
+      unsigned Degree = P->getNodeId();
+      assert(Degree != 0 && "Invalid node degree");
+      --Degree;
+      if (Degree == 0) {
+        // All of P's operands are sorted, so P may sorted now.
+        P->setNodeId(DAGSize++);
+        if (P != SortedPos)
+          SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P));
+        assert(SortedPos != AllNodes.end() && "Overran node list");
+        ++SortedPos;
+      } else {
+        // Update P's outstanding operand count.
+        P->setNodeId(Degree);
+      }
+    }
+    if (I == SortedPos) {
+#ifndef NDEBUG
+      SDNode *S = ++I;
+      dbgs() << "Overran sorted position:\n";
+      S->dumprFull();
+#endif
+      llvm_unreachable(0);
+    }
+  }
+
+  assert(SortedPos == AllNodes.end() &&
+         "Topological sort incomplete!");
+  assert(AllNodes.front().getOpcode() == ISD::EntryToken &&
+         "First node in topological sort is not the entry token!");
+  assert(AllNodes.front().getNodeId() == 0 &&
+         "First node in topological sort has non-zero id!");
+  assert(AllNodes.front().getNumOperands() == 0 &&
+         "First node in topological sort has operands!");
+  assert(AllNodes.back().getNodeId() == (int)DAGSize-1 &&
+         "Last node in topologic sort has unexpected id!");
+  assert(AllNodes.back().use_empty() &&
+         "Last node in topologic sort has users!");
+  assert(DAGSize == allnodes_size() && "Node count mismatch!");
+  return DAGSize;
+}
+
+/// AssignOrdering - Assign an order to the SDNode.
+void SelectionDAG::AssignOrdering(const SDNode *SD, unsigned Order) {
+  assert(SD && "Trying to assign an order to a null node!");
+  Ordering->add(SD, Order);
+}
+
+/// GetOrdering - Get the order for the SDNode.
+unsigned SelectionDAG::GetOrdering(const SDNode *SD) const {
+  assert(SD && "Trying to get the order of a null node!");
+  return Ordering->getOrder(SD);
+}
+
+/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
+/// value is produced by SD.
+void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
+  DbgInfo->add(DB, SD, isParameter);
+  if (SD)
+    SD->setHasDebugValue(true);
+}
+
+/// TransferDbgValues - Transfer SDDbgValues.
+void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
+  if (From == To || !From.getNode()->getHasDebugValue())
+    return;
+  SDNode *FromNode = From.getNode();
+  SDNode *ToNode = To.getNode();
+  SmallVector<SDDbgValue *, 2> &DVs = GetDbgValues(FromNode);
+  SmallVector<SDDbgValue *, 2> ClonedDVs;
+  for (SmallVector<SDDbgValue *, 2>::iterator I = DVs.begin(), E = DVs.end();
+       I != E; ++I) {
+    SDDbgValue *Dbg = *I;
+    if (Dbg->getKind() == SDDbgValue::SDNODE) {
+      SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(),
+                                      Dbg->getOffset(), Dbg->getDebugLoc(),
+                                      Dbg->getOrder());
+      ClonedDVs.push_back(Clone);
+    }
+  }
+  for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(),
+         E = ClonedDVs.end(); I != E; ++I)
+    AddDbgValue(*I, ToNode, false);
+}
+
+//===----------------------------------------------------------------------===//
+//                              SDNode Class
+//===----------------------------------------------------------------------===//
+
+HandleSDNode::~HandleSDNode() {
+  DropOperands();
+}
+
+GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, DebugLoc DL,
+                                         const GlobalValue *GA,
+                                         EVT VT, int64_t o, unsigned char TF)
+  : SDNode(Opc, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
+  TheGlobal = GA;
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
+                     MachineMemOperand *mmo)
+ : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
+  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+                                      MMO->isNonTemporal());
+  assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+  assert(isNonTemporal() == MMO->isNonTemporal() &&
+         "Non-temporal encoding error!");
+  assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+}
+
+MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
+                     const SDValue *Ops, unsigned NumOps, EVT memvt,
+                     MachineMemOperand *mmo)
+   : SDNode(Opc, dl, VTs, Ops, NumOps),
+     MemoryVT(memvt), MMO(mmo) {
+  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+                                      MMO->isNonTemporal());
+  assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+  assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
+}
+
+/// Profile - Gather unique data for the node.
+///
+void SDNode::Profile(FoldingSetNodeID &ID) const {
+  AddNodeIDNode(ID, this);
+}
+
+namespace {
+  struct EVTArray {
+    std::vector<EVT> VTs;
+
+    EVTArray() {
+      VTs.reserve(MVT::LAST_VALUETYPE);
+      for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
+        VTs.push_back(MVT((MVT::SimpleValueType)i));
+    }
+  };
+}
+
+static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs;
+static ManagedStatic<EVTArray> SimpleVTArray;
+static ManagedStatic<sys::SmartMutex<true> > VTMutex;
+
+/// getValueTypeList - Return a pointer to the specified value type.
+///
+const EVT *SDNode::getValueTypeList(EVT VT) {
+  if (VT.isExtended()) {
+    sys::SmartScopedLock<true> Lock(*VTMutex);
+    return &(*EVTs->insert(VT).first);
+  } else {
+    assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
+           "Value type out of range!");
+    return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
+  }
+}
+
+/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the
+/// indicated value.  This method ignores uses of other values defined by this
+/// operation.
+bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
+  assert(Value < getNumValues() && "Bad value!");
+
+  // TODO: Only iterate over uses of a given value of the node
+  for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+    if (UI.getUse().getResNo() == Value) {
+      if (NUses == 0)
+        return false;
+      --NUses;
+    }
+  }
+
+  // Found exactly the right number of uses?
+  return NUses == 0;
+}
+
+
+/// hasAnyUseOfValue - Return true if there are any use of the indicated
+/// value. This method ignores uses of other values defined by this operation.
+bool SDNode::hasAnyUseOfValue(unsigned Value) const {
+  assert(Value < getNumValues() && "Bad value!");
+
+  for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI)
+    if (UI.getUse().getResNo() == Value)
+      return true;
+
+  return false;
+}
+
+
+/// isOnlyUserOf - Return true if this node is the only use of N.
+///
+bool SDNode::isOnlyUserOf(SDNode *N) const {
+  bool Seen = false;
+  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+    SDNode *User = *I;
+    if (User == this)
+      Seen = true;
+    else
+      return false;
+  }
+
+  return Seen;
+}
+
+/// isOperand - Return true if this node is an operand of N.
+///
+bool SDValue::isOperandOf(SDNode *N) const {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (*this == N->getOperand(i))
+      return true;
+  return false;
+}
+
+bool SDNode::isOperandOf(SDNode *N) const {
+  for (unsigned i = 0, e = N->NumOperands; i != e; ++i)
+    if (this == N->OperandList[i].getNode())
+      return true;
+  return false;
+}
+
+/// reachesChainWithoutSideEffects - Return true if this operand (which must
+/// be a chain) reaches the specified operand without crossing any
+/// side-effecting instructions on any chain path.  In practice, this looks
+/// through token factors and non-volatile loads.  In order to remain efficient,
+/// this only looks a couple of nodes in, it does not do an exhaustive search.
+bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
+                                               unsigned Depth) const {
+  if (*this == Dest) return true;
+
+  // Don't search too deeply, we just want to be able to see through
+  // TokenFactor's etc.
+  if (Depth == 0) return false;
+
+  // If this is a token factor, all inputs to the TF happen in parallel.  If any
+  // of the operands of the TF does not reach dest, then we cannot do the xform.
+  if (getOpcode() == ISD::TokenFactor) {
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+      if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
+        return false;
+    return true;
+  }
+
+  // Loads don't have side effects, look through them.
+  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
+    if (!Ld->isVolatile())
+      return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
+  }
+  return false;
+}
+
+/// isPredecessorOf - Return true if this node is a predecessor of N. This node
+/// is either an operand of N or it can be reached by traversing up the operands.
+/// NOTE: this is an expensive method. Use it carefully.
+bool SDNode::isPredecessorOf(SDNode *N) const {
+  SmallPtrSet<SDNode *, 32> Visited;
+  SmallVector<SDNode *, 16> Worklist;
+  Worklist.push_back(N);
+
+  do {
+    N = Worklist.pop_back_val();
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+      SDNode *Op = N->getOperand(i).getNode();
+      if (Op == this)
+        return true;
+      if (Visited.insert(Op))
+        Worklist.push_back(Op);
+    }
+  } while (!Worklist.empty());
+
+  return false;
+}
+
+uint64_t SDNode::getConstantOperandVal(unsigned Num) const {
+  assert(Num < NumOperands && "Invalid child # of SDNode!");
+  return cast<ConstantSDNode>(OperandList[Num])->getZExtValue();
+}
+
+std::string SDNode::getOperationName(const SelectionDAG *G) const {
+  switch (getOpcode()) {
+  default:
+    if (getOpcode() < ISD::BUILTIN_OP_END)
+      return "<<Unknown DAG Node>>";
+    if (isMachineOpcode()) {
+      if (G)
+        if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
+          if (getMachineOpcode() < TII->getNumOpcodes())
+            return TII->get(getMachineOpcode()).getName();
+      return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
+    }
+    if (G) {
+      const TargetLowering &TLI = G->getTargetLoweringInfo();
+      const char *Name = TLI.getTargetNodeName(getOpcode());
+      if (Name) return Name;
+      return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
+    }
+    return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
+
+#ifndef NDEBUG
+  case ISD::DELETED_NODE:
+    return "<<Deleted Node!>>";
+#endif
+  case ISD::PREFETCH:      return "Prefetch";
+  case ISD::MEMBARRIER:    return "MemBarrier";
+  case ISD::ATOMIC_CMP_SWAP:    return "AtomicCmpSwap";
+  case ISD::ATOMIC_SWAP:        return "AtomicSwap";
+  case ISD::ATOMIC_LOAD_ADD:    return "AtomicLoadAdd";
+  case ISD::ATOMIC_LOAD_SUB:    return "AtomicLoadSub";
+  case ISD::ATOMIC_LOAD_AND:    return "AtomicLoadAnd";
+  case ISD::ATOMIC_LOAD_OR:     return "AtomicLoadOr";
+  case ISD::ATOMIC_LOAD_XOR:    return "AtomicLoadXor";
+  case ISD::ATOMIC_LOAD_NAND:   return "AtomicLoadNand";
+  case ISD::ATOMIC_LOAD_MIN:    return "AtomicLoadMin";
+  case ISD::ATOMIC_LOAD_MAX:    return "AtomicLoadMax";
+  case ISD::ATOMIC_LOAD_UMIN:   return "AtomicLoadUMin";
+  case ISD::ATOMIC_LOAD_UMAX:   return "AtomicLoadUMax";
+  case ISD::PCMARKER:      return "PCMarker";
+  case ISD::READCYCLECOUNTER: return "ReadCycleCounter";
+  case ISD::SRCVALUE:      return "SrcValue";
+  case ISD::MDNODE_SDNODE: return "MDNode";
+  case ISD::EntryToken:    return "EntryToken";
+  case ISD::TokenFactor:   return "TokenFactor";
+  case ISD::AssertSext:    return "AssertSext";
+  case ISD::AssertZext:    return "AssertZext";
+
+  case ISD::BasicBlock:    return "BasicBlock";
+  case ISD::VALUETYPE:     return "ValueType";
+  case ISD::Register:      return "Register";
+
+  case ISD::Constant:      return "Constant";
+  case ISD::ConstantFP:    return "ConstantFP";
+  case ISD::GlobalAddress: return "GlobalAddress";
+  case ISD::GlobalTLSAddress: return "GlobalTLSAddress";
+  case ISD::FrameIndex:    return "FrameIndex";
+  case ISD::JumpTable:     return "JumpTable";
+  case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE";
+  case ISD::RETURNADDR: return "RETURNADDR";
+  case ISD::FRAMEADDR: return "FRAMEADDR";
+  case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET";
+  case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR";
+  case ISD::LSDAADDR: return "LSDAADDR";
+  case ISD::EHSELECTION: return "EHSELECTION";
+  case ISD::EH_RETURN: return "EH_RETURN";
+  case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+  case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+  case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";
+  case ISD::ConstantPool:  return "ConstantPool";
+  case ISD::ExternalSymbol: return "ExternalSymbol";
+  case ISD::BlockAddress:  return "BlockAddress";
+  case ISD::INTRINSIC_WO_CHAIN:
+  case ISD::INTRINSIC_VOID:
+  case ISD::INTRINSIC_W_CHAIN: {
+    unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+    unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
+    if (IID < Intrinsic::num_intrinsics)
+      return Intrinsic::getName((Intrinsic::ID)IID);
+    else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
+      return TII->getName(IID);
+    llvm_unreachable("Invalid intrinsic ID");
+  }
+
+  case ISD::BUILD_VECTOR:   return "BUILD_VECTOR";
+  case ISD::TargetConstant: return "TargetConstant";
+  case ISD::TargetConstantFP:return "TargetConstantFP";
+  case ISD::TargetGlobalAddress: return "TargetGlobalAddress";
+  case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress";
+  case ISD::TargetFrameIndex: return "TargetFrameIndex";
+  case ISD::TargetJumpTable:  return "TargetJumpTable";
+  case ISD::TargetConstantPool:  return "TargetConstantPool";
+  case ISD::TargetExternalSymbol: return "TargetExternalSymbol";
+  case ISD::TargetBlockAddress: return "TargetBlockAddress";
+
+  case ISD::CopyToReg:     return "CopyToReg";
+  case ISD::CopyFromReg:   return "CopyFromReg";
+  case ISD::UNDEF:         return "undef";
+  case ISD::MERGE_VALUES:  return "merge_values";
+  case ISD::INLINEASM:     return "inlineasm";
+  case ISD::EH_LABEL:      return "eh_label";
+  case ISD::HANDLENODE:    return "handlenode";
+
+  // Unary operators
+  case ISD::FABS:   return "fabs";
+  case ISD::FNEG:   return "fneg";
+  case ISD::FSQRT:  return "fsqrt";
+  case ISD::FSIN:   return "fsin";
+  case ISD::FCOS:   return "fcos";
+  case ISD::FTRUNC: return "ftrunc";
+  case ISD::FFLOOR: return "ffloor";
+  case ISD::FCEIL:  return "fceil";
+  case ISD::FRINT:  return "frint";
+  case ISD::FNEARBYINT: return "fnearbyint";
+  case ISD::FEXP:   return "fexp";
+  case ISD::FEXP2:  return "fexp2";
+  case ISD::FLOG:   return "flog";
+  case ISD::FLOG2:  return "flog2";
+  case ISD::FLOG10: return "flog10";
+
+  // Binary operators
+  case ISD::ADD:    return "add";
+  case ISD::SUB:    return "sub";
+  case ISD::MUL:    return "mul";
+  case ISD::MULHU:  return "mulhu";
+  case ISD::MULHS:  return "mulhs";
+  case ISD::SDIV:   return "sdiv";
+  case ISD::UDIV:   return "udiv";
+  case ISD::SREM:   return "srem";
+  case ISD::UREM:   return "urem";
+  case ISD::SMUL_LOHI:  return "smul_lohi";
+  case ISD::UMUL_LOHI:  return "umul_lohi";
+  case ISD::SDIVREM:    return "sdivrem";
+  case ISD::UDIVREM:    return "udivrem";
+  case ISD::AND:    return "and";
+  case ISD::OR:     return "or";
+  case ISD::XOR:    return "xor";
+  case ISD::SHL:    return "shl";
+  case ISD::SRA:    return "sra";
+  case ISD::SRL:    return "srl";
+  case ISD::ROTL:   return "rotl";
+  case ISD::ROTR:   return "rotr";
+  case ISD::FADD:   return "fadd";
+  case ISD::FSUB:   return "fsub";
+  case ISD::FMUL:   return "fmul";
+  case ISD::FDIV:   return "fdiv";
+  case ISD::FREM:   return "frem";
+  case ISD::FCOPYSIGN: return "fcopysign";
+  case ISD::FGETSIGN:  return "fgetsign";
+  case ISD::FPOW:   return "fpow";
+
+  case ISD::FPOWI:  return "fpowi";
+  case ISD::SETCC:       return "setcc";
+  case ISD::VSETCC:      return "vsetcc";
+  case ISD::SELECT:      return "select";
+  case ISD::SELECT_CC:   return "select_cc";
+  case ISD::INSERT_VECTOR_ELT:   return "insert_vector_elt";
+  case ISD::EXTRACT_VECTOR_ELT:  return "extract_vector_elt";
+  case ISD::CONCAT_VECTORS:      return "concat_vectors";
+  case ISD::INSERT_SUBVECTOR:    return "insert_subvector";
+  case ISD::EXTRACT_SUBVECTOR:   return "extract_subvector";
+  case ISD::SCALAR_TO_VECTOR:    return "scalar_to_vector";
+  case ISD::VECTOR_SHUFFLE:      return "vector_shuffle";
+  case ISD::CARRY_FALSE:         return "carry_false";
+  case ISD::ADDC:        return "addc";
+  case ISD::ADDE:        return "adde";
+  case ISD::SADDO:       return "saddo";
+  case ISD::UADDO:       return "uaddo";
+  case ISD::SSUBO:       return "ssubo";
+  case ISD::USUBO:       return "usubo";
+  case ISD::SMULO:       return "smulo";
+  case ISD::UMULO:       return "umulo";
+  case ISD::SUBC:        return "subc";
+  case ISD::SUBE:        return "sube";
+  case ISD::SHL_PARTS:   return "shl_parts";
+  case ISD::SRA_PARTS:   return "sra_parts";
+  case ISD::SRL_PARTS:   return "srl_parts";
+
+  // Conversion operators.
+  case ISD::SIGN_EXTEND: return "sign_extend";
+  case ISD::ZERO_EXTEND: return "zero_extend";
+  case ISD::ANY_EXTEND:  return "any_extend";
+  case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg";
+  case ISD::TRUNCATE:    return "truncate";
+  case ISD::FP_ROUND:    return "fp_round";
+  case ISD::FLT_ROUNDS_: return "flt_rounds";
+  case ISD::FP_ROUND_INREG: return "fp_round_inreg";
+  case ISD::FP_EXTEND:   return "fp_extend";
+
+  case ISD::SINT_TO_FP:  return "sint_to_fp";
+  case ISD::UINT_TO_FP:  return "uint_to_fp";
+  case ISD::FP_TO_SINT:  return "fp_to_sint";
+  case ISD::FP_TO_UINT:  return "fp_to_uint";
+  case ISD::BITCAST:     return "bit_convert";
+  case ISD::FP16_TO_FP32: return "fp16_to_fp32";
+  case ISD::FP32_TO_FP16: return "fp32_to_fp16";
+
+  case ISD::CONVERT_RNDSAT: {
+    switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) {
+    default: llvm_unreachable("Unknown cvt code!");
+    case ISD::CVT_FF:  return "cvt_ff";
+    case ISD::CVT_FS:  return "cvt_fs";
+    case ISD::CVT_FU:  return "cvt_fu";
+    case ISD::CVT_SF:  return "cvt_sf";
+    case ISD::CVT_UF:  return "cvt_uf";
+    case ISD::CVT_SS:  return "cvt_ss";
+    case ISD::CVT_SU:  return "cvt_su";
+    case ISD::CVT_US:  return "cvt_us";
+    case ISD::CVT_UU:  return "cvt_uu";
+    }
+  }
+
+    // Control flow instructions
+  case ISD::BR:      return "br";
+  case ISD::BRIND:   return "brind";
+  case ISD::BR_JT:   return "br_jt";
+  case ISD::BRCOND:  return "brcond";
+  case ISD::BR_CC:   return "br_cc";
+  case ISD::CALLSEQ_START:  return "callseq_start";
+  case ISD::CALLSEQ_END:    return "callseq_end";
+
+    // Other operators
+  case ISD::LOAD:               return "load";
+  case ISD::STORE:              return "store";
+  case ISD::VAARG:              return "vaarg";
+  case ISD::VACOPY:             return "vacopy";
+  case ISD::VAEND:              return "vaend";
+  case ISD::VASTART:            return "vastart";
+  case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc";
+  case ISD::EXTRACT_ELEMENT:    return "extract_element";
+  case ISD::BUILD_PAIR:         return "build_pair";
+  case ISD::STACKSAVE:          return "stacksave";
+  case ISD::STACKRESTORE:       return "stackrestore";
+  case ISD::TRAP:               return "trap";
+
+  // Bit manipulation
+  case ISD::BSWAP:   return "bswap";
+  case ISD::CTPOP:   return "ctpop";
+  case ISD::CTTZ:    return "cttz";
+  case ISD::CTLZ:    return "ctlz";
+
+  // Trampolines
+  case ISD::TRAMPOLINE: return "trampoline";
+
+  case ISD::CONDCODE:
+    switch (cast<CondCodeSDNode>(this)->get()) {
+    default: llvm_unreachable("Unknown setcc condition!");
+    case ISD::SETOEQ:  return "setoeq";
+    case ISD::SETOGT:  return "setogt";
+    case ISD::SETOGE:  return "setoge";
+    case ISD::SETOLT:  return "setolt";
+    case ISD::SETOLE:  return "setole";
+    case ISD::SETONE:  return "setone";
+
+    case ISD::SETO:    return "seto";
+    case ISD::SETUO:   return "setuo";
+    case ISD::SETUEQ:  return "setue";
+    case ISD::SETUGT:  return "setugt";
+    case ISD::SETUGE:  return "setuge";
+    case ISD::SETULT:  return "setult";
+    case ISD::SETULE:  return "setule";
+    case ISD::SETUNE:  return "setune";
+
+    case ISD::SETEQ:   return "seteq";
+    case ISD::SETGT:   return "setgt";
+    case ISD::SETGE:   return "setge";
+    case ISD::SETLT:   return "setlt";
+    case ISD::SETLE:   return "setle";
+    case ISD::SETNE:   return "setne";
+    }
+  }
+}
+
+const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) {
+  switch (AM) {
+  default:
+    return "";
+  case ISD::PRE_INC:
+    return "<pre-inc>";
+  case ISD::PRE_DEC:
+    return "<pre-dec>";
+  case ISD::POST_INC:
+    return "<post-inc>";
+  case ISD::POST_DEC:
+    return "<post-dec>";
+  }
+}
+
+std::string ISD::ArgFlagsTy::getArgFlagsString() {
+  std::string S = "< ";
+
+  if (isZExt())
+    S += "zext ";
+  if (isSExt())
+    S += "sext ";
+  if (isInReg())
+    S += "inreg ";
+  if (isSRet())
+    S += "sret ";
+  if (isByVal())
+    S += "byval ";
+  if (isNest())
+    S += "nest ";
+  if (getByValAlign())
+    S += "byval-align:" + utostr(getByValAlign()) + " ";
+  if (getOrigAlign())
+    S += "orig-align:" + utostr(getOrigAlign()) + " ";
+  if (getByValSize())
+    S += "byval-size:" + utostr(getByValSize()) + " ";
+  return S + ">";
+}
+
+void SDNode::dump() const { dump(0); }
+void SDNode::dump(const SelectionDAG *G) const {
+  print(dbgs(), G);
+  dbgs() << '\n';
+}
+
+void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
+  OS << (void*)this << ": ";
+
+  for (unsigned i = 0, e = getNumValues(); i != e; ++i) {
+    if (i) OS << ",";
+    if (getValueType(i) == MVT::Other)
+      OS << "ch";
+    else
+      OS << getValueType(i).getEVTString();
+  }
+  OS << " = " << getOperationName(G);
+}
+
+void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+  if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
+    if (!MN->memoperands_empty()) {
+      OS << "<";
+      OS << "Mem:";
+      for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(),
+           e = MN->memoperands_end(); i != e; ++i) {
+        OS << **i;
+        if (llvm::next(i) != e)
+          OS << " ";
+      }
+      OS << ">";
+    }
+  } else if (const ShuffleVectorSDNode *SVN =
+               dyn_cast<ShuffleVectorSDNode>(this)) {
+    OS << "<";
+    for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) {
+      int Idx = SVN->getMaskElt(i);
+      if (i) OS << ",";
+      if (Idx < 0)
+        OS << "u";
+      else
+        OS << Idx;
+    }
+    OS << ">";
+  } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
+    OS << '<' << CSDN->getAPIntValue() << '>';
+  } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
+    if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+      OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
+    else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+      OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
+    else {
+      OS << "<APFloat(";
+      CSDN->getValueAPF().bitcastToAPInt().dump();
+      OS << ")>";
+    }
+  } else if (const GlobalAddressSDNode *GADN =
+             dyn_cast<GlobalAddressSDNode>(this)) {
+    int64_t offset = GADN->getOffset();
+    OS << '<';
+    WriteAsOperand(OS, GADN->getGlobal());
+    OS << '>';
+    if (offset > 0)
+      OS << " + " << offset;
+    else
+      OS << " " << offset;
+    if (unsigned int TF = GADN->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
+    OS << "<" << FIDN->getIndex() << ">";
+  } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
+    OS << "<" << JTDN->getIndex() << ">";
+    if (unsigned int TF = JTDN->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
+    int offset = CP->getOffset();
+    if (CP->isMachineConstantPoolEntry())
+      OS << "<" << *CP->getMachineCPVal() << ">";
+    else
+      OS << "<" << *CP->getConstVal() << ">";
+    if (offset > 0)
+      OS << " + " << offset;
+    else
+      OS << " " << offset;
+    if (unsigned int TF = CP->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
+    OS << "<";
+    const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
+    if (LBB)
+      OS << LBB->getName() << " ";
+    OS << (const void*)BBDN->getBasicBlock() << ">";
+  } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
+    OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
+  } else if (const ExternalSymbolSDNode *ES =
+             dyn_cast<ExternalSymbolSDNode>(this)) {
+    OS << "'" << ES->getSymbol() << "'";
+    if (unsigned int TF = ES->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
+    if (M->getValue())
+      OS << "<" << M->getValue() << ">";
+    else
+      OS << "<null>";
+  } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) {
+    if (MD->getMD())
+      OS << "<" << MD->getMD() << ">";
+    else
+      OS << "<null>";
+  } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
+    OS << ":" << N->getVT().getEVTString();
+  }
+  else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
+    OS << "<" << *LD->getMemOperand();
+
+    bool doExt = true;
+    switch (LD->getExtensionType()) {
+    default: doExt = false; break;
+    case ISD::EXTLOAD: OS << ", anyext"; break;
+    case ISD::SEXTLOAD: OS << ", sext"; break;
+    case ISD::ZEXTLOAD: OS << ", zext"; break;
+    }
+    if (doExt)
+      OS << " from " << LD->getMemoryVT().getEVTString();
+
+    const char *AM = getIndexedModeName(LD->getAddressingMode());
+    if (*AM)
+      OS << ", " << AM;
+
+    OS << ">";
+  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) {
+    OS << "<" << *ST->getMemOperand();
+
+    if (ST->isTruncatingStore())
+      OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+
+    const char *AM = getIndexedModeName(ST->getAddressingMode());
+    if (*AM)
+      OS << ", " << AM;
+
+    OS << ">";
+  } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
+    OS << "<" << *M->getMemOperand() << ">";
+  } else if (const BlockAddressSDNode *BA =
+               dyn_cast<BlockAddressSDNode>(this)) {
+    OS << "<";
+    WriteAsOperand(OS, BA->getBlockAddress()->getFunction(), false);
+    OS << ", ";
+    WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false);
+    OS << ">";
+    if (unsigned int TF = BA->getTargetFlags())
+      OS << " [TF=" << TF << ']';
+  }
+
+  if (G)
+    if (unsigned Order = G->GetOrdering(this))
+      OS << " [ORD=" << Order << ']';
+
+  if (getNodeId() != -1)
+    OS << " [ID=" << getNodeId() << ']';
+
+  DebugLoc dl = getDebugLoc();
+  if (G && !dl.isUnknown()) {
+    DIScope
+      Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
+    OS << " dbg:";
+    // Omit the directory, since it's usually long and uninteresting.
+    if (Scope.Verify())
+      OS << Scope.getFilename();
+    else
+      OS << "<unknown>";
+    OS << ':' << dl.getLine();
+    if (dl.getCol() != 0)
+      OS << ':' << dl.getCol();
+  }
+}
+
+void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
+  print_types(OS, G);
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    if (i) OS << ", "; else OS << " ";
+    OS << (void*)getOperand(i).getNode();
+    if (unsigned RN = getOperand(i).getResNo())
+      OS << ":" << RN;
+  }
+  print_details(OS, G);
+}
+
+static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
+                                  const SelectionDAG *G, unsigned depth,
+                                  unsigned indent)
+{
+  if (depth == 0)
+    return;
+
+  OS.indent(indent);
+
+  N->print(OS, G);
+
+  if (depth < 1)
+    return;
+
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    // Don't follow chain operands.
+    if (N->getOperand(i).getValueType() == MVT::Other)
+      continue;
+    OS << '\n';
+    printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2);
+  }
+}
+
+void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
+                            unsigned depth) const {
+  printrWithDepthHelper(OS, this, G, depth, 0);
+}
+
+void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
+  // Don't print impossibly deep things.
+  printrWithDepth(OS, G, 10);
+}
+
+void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
+  printrWithDepth(dbgs(), G, depth);
+}
+
+void SDNode::dumprFull(const SelectionDAG *G) const {
+  // Don't print impossibly deep things.
+  dumprWithDepth(G, 10);
+}
+
+static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (N->getOperand(i).getNode()->hasOneUse())
+      DumpNodes(N->getOperand(i).getNode(), indent+2, G);
+    else
+      dbgs() << "\n" << std::string(indent+2, ' ')
+           << (void*)N->getOperand(i).getNode() << ": <multiple use>";
+
+
+  dbgs() << "\n";
+  dbgs().indent(indent);
+  N->dump(G);
+}
+
+SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
+  assert(N->getNumValues() == 1 &&
+         "Can't unroll a vector with multiple results!");
+
+  EVT VT = N->getValueType(0);
+  unsigned NE = VT.getVectorNumElements();
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = N->getDebugLoc();
+
+  SmallVector<SDValue, 8> Scalars;
+  SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+  // If ResNE is 0, fully unroll the vector op.
+  if (ResNE == 0)
+    ResNE = NE;
+  else if (NE > ResNE)
+    NE = ResNE;
+
+  unsigned i;
+  for (i= 0; i != NE; ++i) {
+    for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
+      SDValue Operand = N->getOperand(j);
+      EVT OperandVT = Operand.getValueType();
+      if (OperandVT.isVector()) {
+        // A vector operand; extract a single element.
+        EVT OperandEltVT = OperandVT.getVectorElementType();
+        Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                              OperandEltVT,
+                              Operand,
+                              getConstant(i, MVT::i32));
+      } else {
+        // A scalar operand; just use it as is.
+        Operands[j] = Operand;
+      }
+    }
+
+    switch (N->getOpcode()) {
+    default:
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+                                &Operands[0], Operands.size()));
+      break;
+    case ISD::SHL:
+    case ISD::SRA:
+    case ISD::SRL:
+    case ISD::ROTL:
+    case ISD::ROTR:
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0],
+                                getShiftAmountOperand(Operands[0].getValueType(),
+                                                      Operands[1])));
+      break;
+    case ISD::SIGN_EXTEND_INREG:
+    case ISD::FP_ROUND_INREG: {
+      EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
+      Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
+                                Operands[0],
+                                getValueType(ExtVT)));
+    }
+    }
+  }
+
+  for (; i < ResNE; ++i)
+    Scalars.push_back(getUNDEF(EltVT));
+
+  return getNode(ISD::BUILD_VECTOR, dl,
+                 EVT::getVectorVT(*getContext(), EltVT, ResNE),
+                 &Scalars[0], Scalars.size());
+}
+
+
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+/// location that is 'Dist' units away from the location that the 'Base' load
+/// is loading from.
+bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
+                                     unsigned Bytes, int Dist) const {
+  if (LD->getChain() != Base->getChain())
+    return false;
+  EVT VT = LD->getValueType(0);
+  if (VT.getSizeInBits() / 8 != Bytes)
+    return false;
+
+  SDValue Loc = LD->getOperand(1);
+  SDValue BaseLoc = Base->getOperand(1);
+  if (Loc.getOpcode() == ISD::FrameIndex) {
+    if (BaseLoc.getOpcode() != ISD::FrameIndex)
+      return false;
+    const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo();
+    int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
+    int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+    int FS  = MFI->getObjectSize(FI);
+    int BFS = MFI->getObjectSize(BFI);
+    if (FS != BFS || FS != (int)Bytes) return false;
+    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+  }
+
+  // Handle X+C
+  if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+      cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+    return true;
+
+  const GlobalValue *GV1 = NULL;
+  const GlobalValue *GV2 = NULL;
+  int64_t Offset1 = 0;
+  int64_t Offset2 = 0;
+  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+  if (isGA1 && isGA2 && GV1 == GV2)
+    return Offset1 == (Offset2 + Dist*Bytes);
+  return false;
+}
+
+
+/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
+/// it cannot be inferred.
+unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
+  // If this is a GlobalAddress + cst, return the alignment.
+  const GlobalValue *GV;
+  int64_t GVOffset = 0;
+  if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
+    // If GV has specified alignment, then use it. Otherwise, use the preferred
+    // alignment.
+    unsigned Align = GV->getAlignment();
+    if (!Align) {
+      if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+        if (GVar->hasInitializer()) {
+          const TargetData *TD = TLI.getTargetData();
+          Align = TD->getPreferredAlignment(GVar);
+        }
+      }
+    }
+    return MinAlign(Align, GVOffset);
+  }
+
+  // If this is a direct reference to a stack slot, use information about the
+  // stack slot's alignment.
+  int FrameIdx = 1 << 31;
+  int64_t FrameOffset = 0;
+  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
+    FrameIdx = FI->getIndex();
+  } else if (isBaseWithConstantOffset(Ptr) &&
+             isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+    // Handle FI+Cst
+    FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+    FrameOffset = Ptr.getConstantOperandVal(1);
+  }
+
+  if (FrameIdx != (1 << 31)) {
+    const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
+    unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
+                                    FrameOffset);
+    return FIInfoAlign;
+  }
+
+  return 0;
+}
+
+void SelectionDAG::dump() const {
+  dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
+
+  for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end();
+       I != E; ++I) {
+    const SDNode *N = I;
+    if (!N->hasOneUse() && N != getRoot().getNode())
+      DumpNodes(N, 2, this);
+  }
+
+  if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
+
+  dbgs() << "\n\n";
+}
+
+void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
+  print_types(OS, G);
+  print_details(OS, G);
+}
+
+typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet;
+static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
+                       const SelectionDAG *G, VisitedSDNodeSet &once) {
+  if (!once.insert(N))          // If we've been here before, return now.
+    return;
+
+  // Dump the current SDNode, but don't end the line yet.
+  OS << std::string(indent, ' ');
+  N->printr(OS, G);
+
+  // Having printed this SDNode, walk the children:
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDNode *child = N->getOperand(i).getNode();
+
+    if (i) OS << ",";
+    OS << " ";
+
+    if (child->getNumOperands() == 0) {
+      // This child has no grandchildren; print it inline right here.
+      child->printr(OS, G);
+      once.insert(child);
+    } else {         // Just the address. FIXME: also print the child's opcode.
+      OS << (void*)child;
+      if (unsigned RN = N->getOperand(i).getResNo())
+        OS << ":" << RN;
+    }
+  }
+
+  OS << "\n";
+
+  // Dump children that have grandchildren on their own line(s).
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    const SDNode *child = N->getOperand(i).getNode();
+    DumpNodesr(OS, child, indent+2, G, once);
+  }
+}
+
+void SDNode::dumpr() const {
+  VisitedSDNodeSet once;
+  DumpNodesr(dbgs(), this, 0, 0, once);
+}
+
+void SDNode::dumpr(const SelectionDAG *G) const {
+  VisitedSDNodeSet once;
+  DumpNodesr(dbgs(), this, 0, G, once);
+}
+
+
+// getAddressSpace - Return the address space this GlobalAddress belongs to.
+unsigned GlobalAddressSDNode::getAddressSpace() const {
+  return getGlobal()->getType()->getAddressSpace();
+}
+
+
+const Type *ConstantPoolSDNode::getType() const {
+  if (isMachineConstantPoolEntry())
+    return Val.MachineCPVal->getType();
+  return Val.ConstVal->getType();
+}
+
+bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
+                                        APInt &SplatUndef,
+                                        unsigned &SplatBitSize,
+                                        bool &HasAnyUndefs,
+                                        unsigned MinSplatBits,
+                                        bool isBigEndian) {
+  EVT VT = getValueType(0);
+  assert(VT.isVector() && "Expected a vector type");
+  unsigned sz = VT.getSizeInBits();
+  if (MinSplatBits > sz)
+    return false;
+
+  SplatValue = APInt(sz, 0);
+  SplatUndef = APInt(sz, 0);
+
+  // Get the bits.  Bits with undefined values (when the corresponding element
+  // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared
+  // in SplatValue.  If any of the values are not constant, give up and return
+  // false.
+  unsigned int nOps = getNumOperands();
+  assert(nOps > 0 && "isConstantSplat has 0-size build vector");
+  unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();
+
+  for (unsigned j = 0; j < nOps; ++j) {
+    unsigned i = isBigEndian ? nOps-1-j : j;
+    SDValue OpVal = getOperand(i);
+    unsigned BitPos = j * EltBitSize;
+
+    if (OpVal.getOpcode() == ISD::UNDEF)
+      SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
+    else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
+      SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
+                    zextOrTrunc(sz) << BitPos;
+    else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
+      SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
+     else
+      return false;
+  }
+
+  // The build_vector is all constants or undefs.  Find the smallest element
+  // size that splats the vector.
+
+  HasAnyUndefs = (SplatUndef != 0);
+  while (sz > 8) {
+
+    unsigned HalfSize = sz / 2;
+    APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
+    APInt LowValue = SplatValue.trunc(HalfSize);
+    APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
+    APInt LowUndef = SplatUndef.trunc(HalfSize);
+
+    // If the two halves do not match (ignoring undef bits), stop here.
+    if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
+        MinSplatBits > HalfSize)
+      break;
+
+    SplatValue = HighValue | LowValue;
+    SplatUndef = HighUndef & LowUndef;
+
+    sz = HalfSize;
+  }
+
+  SplatBitSize = sz;
+  return true;
+}
+
+bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
+  // Find the first non-undef value in the shuffle mask.
+  unsigned i, e;
+  for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
+    /* search */;
+
+  assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");
+
+  // Make sure all remaining elements are either undef or the same as the first
+  // non-undef value.
+  for (int Idx = Mask[i]; i != e; ++i)
+    if (Mask[i] >= 0 && Mask[i] != Idx)
+      return false;
+  return true;
+}
+
+#ifdef XDEBUG
+static void checkForCyclesHelper(const SDNode *N,
+                                 SmallPtrSet<const SDNode*, 32> &Visited,
+                                 SmallPtrSet<const SDNode*, 32> &Checked) {
+  // If this node has already been checked, don't check it again.
+  if (Checked.count(N))
+    return;
+
+  // If a node has already been visited on this depth-first walk, reject it as
+  // a cycle.
+  if (!Visited.insert(N)) {
+    dbgs() << "Offending node:\n";
+    N->dumprFull();
+    errs() << "Detected cycle in SelectionDAG\n";
+    abort();
+  }
+
+  for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
+
+  Checked.insert(N);
+  Visited.erase(N);
+}
+#endif
+
+void llvm::checkForCycles(const llvm::SDNode *N) {
+#ifdef XDEBUG
+  assert(N && "Checking nonexistant SDNode");
+  SmallPtrSet<const SDNode*, 32> visited;
+  SmallPtrSet<const SDNode*, 32> checked;
+  checkForCyclesHelper(N, visited, checked);
+#endif
+}
+
+void llvm::checkForCycles(const llvm::SelectionDAG *DAG) {
+  checkForCycles(DAG->getRoot().getNode());
+}
diff --git a/final/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/final/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
new file mode 100644
index 00000000000..48d9bbb5132
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -0,0 +1,6484 @@
+//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "SDNodeDbgValue.h"
+#include "SelectionDAGBuilder.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Constants.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+/// LimitFloatPrecision - Generate low-precision inline sequences for
+/// some float libcalls (6, 8 or 12 bits).
+static unsigned LimitFloatPrecision;
+
+static cl::opt<unsigned, true>
+LimitFPPrecision("limit-float-precision",
+                 cl::desc("Generate low-precision inline sequences "
+                          "for some float libcalls"),
+                 cl::location(LimitFloatPrecision),
+                 cl::init(0));
+
+// Limit the width of DAG chains. This is important in general to prevent
+// prevent DAG-based analysis from blowing up. For example, alias analysis and
+// load clustering may not complete in reasonable time. It is difficult to
+// recognize and avoid this situation within each individual analysis, and
+// future analyses are likely to have the same behavior. Limiting DAG width is
+// the safe approach, and will be especially important with global DAGs.
+//
+// MaxParallelChains default is arbitrarily high to avoid affecting
+// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
+// sequence over this should have been converted to llvm.memcpy by the
+// frontend. It easy to induce this behavior with .ll code such as:
+// %buffer = alloca [4096 x i8]
+// %data = load [4096 x i8]* %argPtr
+// store [4096 x i8] %data, [4096 x i8]* %buffer
+static cl::opt<unsigned>
+MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"),
+                  cl::init(64), cl::Hidden);
+
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+                                      const SDValue *Parts, unsigned NumParts,
+                                      EVT PartVT, EVT ValueVT);
+
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent.  If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
+                                const SDValue *Parts,
+                                unsigned NumParts, EVT PartVT, EVT ValueVT,
+                                ISD::NodeType AssertOp = ISD::DELETED_NODE) {
+  if (ValueVT.isVector())
+    return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
+
+  assert(NumParts > 0 && "No parts to assemble!");
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDValue Val = Parts[0];
+
+  if (NumParts > 1) {
+    // Assemble the value from multiple parts.
+    if (ValueVT.isInteger()) {
+      unsigned PartBits = PartVT.getSizeInBits();
+      unsigned ValueBits = ValueVT.getSizeInBits();
+
+      // Assemble the power of 2 part.
+      unsigned RoundParts = NumParts & (NumParts - 1) ?
+        1 << Log2_32(NumParts) : NumParts;
+      unsigned RoundBits = PartBits * RoundParts;
+      EVT RoundVT = RoundBits == ValueBits ?
+        ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
+      SDValue Lo, Hi;
+
+      EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
+
+      if (RoundParts > 2) {
+        Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
+                              PartVT, HalfVT);
+        Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
+                              RoundParts / 2, PartVT, HalfVT);
+      } else {
+        Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
+        Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
+      }
+
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+
+      Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
+
+      if (RoundParts < NumParts) {
+        // Assemble the trailing non-power-of-2 part.
+        unsigned OddParts = NumParts - RoundParts;
+        EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
+        Hi = getCopyFromParts(DAG, DL,
+                              Parts + RoundParts, OddParts, PartVT, OddVT);
+
+        // Combine the round and odd parts.
+        Lo = Val;
+        if (TLI.isBigEndian())
+          std::swap(Lo, Hi);
+        EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+        Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
+        Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
+                         DAG.getConstant(Lo.getValueType().getSizeInBits(),
+                                         TLI.getPointerTy()));
+        Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
+        Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
+      }
+    } else if (PartVT.isFloatingPoint()) {
+      // FP split into multiple FP parts (for ppcf128)
+      assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
+             "Unexpected split");
+      SDValue Lo, Hi;
+      Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
+      Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
+      if (TLI.isBigEndian())
+        std::swap(Lo, Hi);
+      Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
+    } else {
+      // FP split into integer parts (soft fp)
+      assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
+             !PartVT.isVector() && "Unexpected split");
+      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
+      Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT);
+    }
+  }
+
+  // There is now one part, held in Val.  Correct it to match ValueVT.
+  PartVT = Val.getValueType();
+
+  if (PartVT == ValueVT)
+    return Val;
+
+  if (PartVT.isInteger() && ValueVT.isInteger()) {
+    if (ValueVT.bitsLT(PartVT)) {
+      // For a truncate, see if we have any information to
+      // indicate whether the truncated bits will always be
+      // zero or sign-extension.
+      if (AssertOp != ISD::DELETED_NODE)
+        Val = DAG.getNode(AssertOp, DL, PartVT, Val,
+                          DAG.getValueType(ValueVT));
+      return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+    }
+    return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
+  }
+
+  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+    // FP_ROUND's are always exact here.
+    if (ValueVT.bitsLT(Val.getValueType()))
+      return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
+                         DAG.getIntPtrConstant(1));
+
+    return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
+  }
+
+  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
+    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+  llvm_unreachable("Unknown mismatch!");
+  return SDValue();
+}
+
+/// getCopyFromParts - Create a value that contains the specified legal parts
+/// combined into the value they represent.  If the parts combine to a type
+/// larger then ValueVT then AssertOp can be used to specify whether the extra
+/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
+/// (ISD::AssertSext).
+static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
+                                      const SDValue *Parts, unsigned NumParts,
+                                      EVT PartVT, EVT ValueVT) {
+  assert(ValueVT.isVector() && "Not a vector value");
+  assert(NumParts > 0 && "No parts to assemble!");
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDValue Val = Parts[0];
+
+  // Handle a multi-element vector.
+  if (NumParts > 1) {
+    EVT IntermediateVT, RegisterVT;
+    unsigned NumIntermediates;
+    unsigned NumRegs =
+    TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+                               NumIntermediates, RegisterVT);
+    assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+    NumParts = NumRegs; // Silence a compiler warning.
+    assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+    assert(RegisterVT == Parts[0].getValueType() &&
+           "Part type doesn't match part!");
+
+    // Assemble the parts into intermediate operands.
+    SmallVector<SDValue, 8> Ops(NumIntermediates);
+    if (NumIntermediates == NumParts) {
+      // If the register was not expanded, truncate or copy the value,
+      // as appropriate.
+      for (unsigned i = 0; i != NumParts; ++i)
+        Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
+                                  PartVT, IntermediateVT);
+    } else if (NumParts > 0) {
+      // If the intermediate type was expanded, build the intermediate
+      // operands from the parts.
+      assert(NumParts % NumIntermediates == 0 &&
+             "Must expand into a divisible number of parts!");
+      unsigned Factor = NumParts / NumIntermediates;
+      for (unsigned i = 0; i != NumIntermediates; ++i)
+        Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
+                                  PartVT, IntermediateVT);
+    }
+
+    // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
+    // intermediate operands.
+    Val = DAG.getNode(IntermediateVT.isVector() ?
+                      ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
+                      ValueVT, &Ops[0], NumIntermediates);
+  }
+
+  // There is now one part, held in Val.  Correct it to match ValueVT.
+  PartVT = Val.getValueType();
+
+  if (PartVT == ValueVT)
+    return Val;
+
+  if (PartVT.isVector()) {
+    // If the element type of the source/dest vectors are the same, but the
+    // parts vector has more elements than the value vector, then we have a
+    // vector widening case (e.g. <2 x float> -> <4 x float>).  Extract the
+    // elements we want.
+    if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
+      assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
+             "Cannot narrow, it would be a lossy transformation");
+      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+                         DAG.getIntPtrConstant(0));
+    }
+
+    // Vector/Vector bitcast.
+    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+  }
+
+  assert(ValueVT.getVectorElementType() == PartVT &&
+         ValueVT.getVectorNumElements() == 1 &&
+         "Only trivial scalar-to-vector conversions should get here!");
+  return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
+}
+
+
+
+
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
+                                 SDValue Val, SDValue *Parts, unsigned NumParts,
+                                 EVT PartVT);
+
+/// getCopyToParts - Create a series of nodes that contain the specified value
+/// split into legal parts.  If the parts contain more bits than Val, then, for
+/// integers, ExtendKind can be used to specify how to generate the extra bits.
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
+                           SDValue Val, SDValue *Parts, unsigned NumParts,
+                           EVT PartVT,
+                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+  EVT ValueVT = Val.getValueType();
+
+  // Handle the vector case separately.
+  if (ValueVT.isVector())
+    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  unsigned PartBits = PartVT.getSizeInBits();
+  unsigned OrigNumParts = NumParts;
+  assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
+
+  if (NumParts == 0)
+    return;
+
+  assert(!ValueVT.isVector() && "Vector case handled elsewhere");
+  if (PartVT == ValueVT) {
+    assert(NumParts == 1 && "No-op copy with multiple parts!");
+    Parts[0] = Val;
+    return;
+  }
+
+  if (NumParts * PartBits > ValueVT.getSizeInBits()) {
+    // If the parts cover more bits than the value has, promote the value.
+    if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
+      assert(NumParts == 1 && "Do not know what to promote to!");
+      Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
+    } else {
+      assert(PartVT.isInteger() && ValueVT.isInteger() &&
+             "Unknown mismatch!");
+      ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+      Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
+    }
+  } else if (PartBits == ValueVT.getSizeInBits()) {
+    // Different types of the same size.
+    assert(NumParts == 1 && PartVT != ValueVT);
+    Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+  } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
+    // If the parts cover less bits than value has, truncate the value.
+    assert(PartVT.isInteger() && ValueVT.isInteger() &&
+           "Unknown mismatch!");
+    ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+    Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+  }
+
+  // The value may have changed - recompute ValueVT.
+  ValueVT = Val.getValueType();
+  assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
+         "Failed to tile the value with PartVT!");
+
+  if (NumParts == 1) {
+    assert(PartVT == ValueVT && "Type conversion failed!");
+    Parts[0] = Val;
+    return;
+  }
+
+  // Expand the value into multiple parts.
+  if (NumParts & (NumParts - 1)) {
+    // The number of parts is not a power of 2.  Split off and copy the tail.
+    assert(PartVT.isInteger() && ValueVT.isInteger() &&
+           "Do not know what to expand to!");
+    unsigned RoundParts = 1 << Log2_32(NumParts);
+    unsigned RoundBits = RoundParts * PartBits;
+    unsigned OddParts = NumParts - RoundParts;
+    SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
+                                 DAG.getIntPtrConstant(RoundBits));
+    getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT);
+
+    if (TLI.isBigEndian())
+      // The odd parts were reversed by getCopyToParts - unreverse them.
+      std::reverse(Parts + RoundParts, Parts + NumParts);
+
+    NumParts = RoundParts;
+    ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
+    Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+  }
+
+  // The number of parts is a power of 2.  Repeatedly bisect the value using
+  // EXTRACT_ELEMENT.
+  Parts[0] = DAG.getNode(ISD::BITCAST, DL,
+                         EVT::getIntegerVT(*DAG.getContext(),
+                                           ValueVT.getSizeInBits()),
+                         Val);
+
+  for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
+    for (unsigned i = 0; i < NumParts; i += StepSize) {
+      unsigned ThisBits = StepSize * PartBits / 2;
+      EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
+      SDValue &Part0 = Parts[i];
+      SDValue &Part1 = Parts[i+StepSize/2];
+
+      Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+                          ThisVT, Part0, DAG.getIntPtrConstant(1));
+      Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
+                          ThisVT, Part0, DAG.getIntPtrConstant(0));
+
+      if (ThisBits == PartBits && ThisVT != PartVT) {
+        Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
+        Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
+      }
+    }
+  }
+
+  if (TLI.isBigEndian())
+    std::reverse(Parts, Parts + OrigNumParts);
+}
+
+
+/// getCopyToPartsVector - Create a series of nodes that contain the specified
+/// value split into legal parts.
+static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
+                                 SDValue Val, SDValue *Parts, unsigned NumParts,
+                                 EVT PartVT) {
+  EVT ValueVT = Val.getValueType();
+  assert(ValueVT.isVector() && "Not a vector");
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  if (NumParts == 1) {
+    if (PartVT == ValueVT) {
+      // Nothing to do.
+    } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
+      // Bitconvert vector->vector case.
+      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
+    } else if (PartVT.isVector() &&
+               PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
+               PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
+      EVT ElementVT = PartVT.getVectorElementType();
+      // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
+      // undef elements.
+      SmallVector<SDValue, 16> Ops;
+      for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
+        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                                  ElementVT, Val, DAG.getIntPtrConstant(i)));
+
+      for (unsigned i = ValueVT.getVectorNumElements(),
+           e = PartVT.getVectorNumElements(); i != e; ++i)
+        Ops.push_back(DAG.getUNDEF(ElementVT));
+
+      Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
+
+      // FIXME: Use CONCAT for 2x -> 4x.
+
+      //SDValue UndefElts = DAG.getUNDEF(VectorTy);
+      //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
+    } else {
+      // Vector -> scalar conversion.
+      assert(ValueVT.getVectorElementType() == PartVT &&
+             ValueVT.getVectorNumElements() == 1 &&
+             "Only trivial vector-to-scalar conversions should get here!");
+      Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                        PartVT, Val, DAG.getIntPtrConstant(0));
+    }
+
+    Parts[0] = Val;
+    return;
+  }
+
+  // Handle a multi-element vector.
+  EVT IntermediateVT, RegisterVT;
+  unsigned NumIntermediates;
+  unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
+                                                IntermediateVT,
+                                                NumIntermediates, RegisterVT);
+  unsigned NumElements = ValueVT.getVectorNumElements();
+
+  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
+  NumParts = NumRegs; // Silence a compiler warning.
+  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+
+  // Split the vector into intermediate operands.
+  SmallVector<SDValue, 8> Ops(NumIntermediates);
+  for (unsigned i = 0; i != NumIntermediates; ++i) {
+    if (IntermediateVT.isVector())
+      Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
+                           IntermediateVT, Val,
+                   DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
+    else
+      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                           IntermediateVT, Val, DAG.getIntPtrConstant(i));
+  }
+
+  // Split the intermediate operands into legal parts.
+  if (NumParts == NumIntermediates) {
+    // If the register was not expanded, promote or copy the value,
+    // as appropriate.
+    for (unsigned i = 0; i != NumParts; ++i)
+      getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT);
+  } else if (NumParts > 0) {
+    // If the intermediate type was expanded, split each the value into
+    // legal parts.
+    assert(NumParts % NumIntermediates == 0 &&
+           "Must expand into a divisible number of parts!");
+    unsigned Factor = NumParts / NumIntermediates;
+    for (unsigned i = 0; i != NumIntermediates; ++i)
+      getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT);
+  }
+}
+
+
+
+
+namespace {
+  /// RegsForValue - This struct represents the registers (physical or virtual)
+  /// that a particular set of values is assigned, and the type information
+  /// about the value. The most common situation is to represent one value at a
+  /// time, but struct or array values are handled element-wise as multiple
+  /// values.  The splitting of aggregates is performed recursively, so that we
+  /// never have aggregate-typed registers. The values at this point do not
+  /// necessarily have legal types, so each value may require one or more
+  /// registers of some legal type.
+  ///
+  struct RegsForValue {
+    /// ValueVTs - The value types of the values, which may not be legal, and
+    /// may need be promoted or synthesized from one or more registers.
+    ///
+    SmallVector<EVT, 4> ValueVTs;
+
+    /// RegVTs - The value types of the registers. This is the same size as
+    /// ValueVTs and it records, for each value, what the type of the assigned
+    /// register or registers are. (Individual values are never synthesized
+    /// from more than one type of register.)
+    ///
+    /// With virtual registers, the contents of RegVTs is redundant with TLI's
+    /// getRegisterType member function, however when with physical registers
+    /// it is necessary to have a separate record of the types.
+    ///
+    SmallVector<EVT, 4> RegVTs;
+
+    /// Regs - This list holds the registers assigned to the values.
+    /// Each legal or promoted value requires one register, and each
+    /// expanded value requires multiple registers.
+    ///
+    SmallVector<unsigned, 4> Regs;
+
+    RegsForValue() {}
+
+    RegsForValue(const SmallVector<unsigned, 4> &regs,
+                 EVT regvt, EVT valuevt)
+      : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+
+    RegsForValue(LLVMContext &Context, const TargetLowering &tli,
+                 unsigned Reg, const Type *Ty) {
+      ComputeValueVTs(tli, Ty, ValueVTs);
+
+      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+        EVT ValueVT = ValueVTs[Value];
+        unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
+        EVT RegisterVT = tli.getRegisterType(Context, ValueVT);
+        for (unsigned i = 0; i != NumRegs; ++i)
+          Regs.push_back(Reg + i);
+        RegVTs.push_back(RegisterVT);
+        Reg += NumRegs;
+      }
+    }
+
+    /// areValueTypesLegal - Return true if types of all the values are legal.
+    bool areValueTypesLegal(const TargetLowering &TLI) {
+      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+        EVT RegisterVT = RegVTs[Value];
+        if (!TLI.isTypeLegal(RegisterVT))
+          return false;
+      }
+      return true;
+    }
+
+    /// append - Add the specified values to this one.
+    void append(const RegsForValue &RHS) {
+      ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+      RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+      Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+    }
+
+    /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+    /// this value and returns the result as a ValueVTs value.  This uses
+    /// Chain/Flag as the input and updates them for the output Chain/Flag.
+    /// If the Flag pointer is NULL, no flag is used.
+    SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
+                            DebugLoc dl,
+                            SDValue &Chain, SDValue *Flag) const;
+
+    /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+    /// specified value into the registers specified by this object.  This uses
+    /// Chain/Flag as the input and updates them for the output Chain/Flag.
+    /// If the Flag pointer is NULL, no flag is used.
+    void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+                       SDValue &Chain, SDValue *Flag) const;
+
+    /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+    /// operand list.  This adds the code marker, matching input operand index
+    /// (if applicable), and includes the number of values added into it.
+    void AddInlineAsmOperands(unsigned Kind,
+                              bool HasMatching, unsigned MatchingIdx,
+                              SelectionDAG &DAG,
+                              std::vector<SDValue> &Ops) const;
+  };
+}
+
+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+/// this value and returns the result as a ValueVT value.  This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
+                                      FunctionLoweringInfo &FuncInfo,
+                                      DebugLoc dl,
+                                      SDValue &Chain, SDValue *Flag) const {
+  // A Value with type {} or [0 x %t] needs no registers.
+  if (ValueVTs.empty())
+    return SDValue();
+
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // Assemble the legal parts into the final values.
+  SmallVector<SDValue, 4> Values(ValueVTs.size());
+  SmallVector<SDValue, 8> Parts;
+  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    // Copy the legal parts from the registers.
+    EVT ValueVT = ValueVTs[Value];
+    unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
+    EVT RegisterVT = RegVTs[Value];
+
+    Parts.resize(NumRegs);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      SDValue P;
+      if (Flag == 0) {
+        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
+      } else {
+        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
+        *Flag = P.getValue(2);
+      }
+
+      Chain = P.getValue(1);
+      Parts[i] = P;
+
+      // If the source register was virtual and if we know something about it,
+      // add an assert node.
+      if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
+          !RegisterVT.isInteger() || RegisterVT.isVector())
+        continue;
+
+      const FunctionLoweringInfo::LiveOutInfo *LOI =
+        FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
+      if (!LOI)
+        continue;
+
+      unsigned RegSize = RegisterVT.getSizeInBits();
+      unsigned NumSignBits = LOI->NumSignBits;
+      unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes();
+
+      // FIXME: We capture more information than the dag can represent.  For
+      // now, just use the tightest assertzext/assertsext possible.
+      bool isSExt = true;
+      EVT FromVT(MVT::Other);
+      if (NumSignBits == RegSize)
+        isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
+      else if (NumZeroBits >= RegSize-1)
+        isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
+      else if (NumSignBits > RegSize-8)
+        isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
+      else if (NumZeroBits >= RegSize-8)
+        isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
+      else if (NumSignBits > RegSize-16)
+        isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
+      else if (NumZeroBits >= RegSize-16)
+        isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+      else if (NumSignBits > RegSize-32)
+        isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
+      else if (NumZeroBits >= RegSize-32)
+        isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+      else
+        continue;
+
+      // Add an assertion node.
+      assert(FromVT != MVT::Other);
+      Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+                             RegisterVT, P, DAG.getValueType(FromVT));
+    }
+
+    Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
+                                     NumRegs, RegisterVT, ValueVT);
+    Part += NumRegs;
+    Parts.clear();
+  }
+
+  return DAG.getNode(ISD::MERGE_VALUES, dl,
+                     DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+                     &Values[0], ValueVTs.size());
+}
+
+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+/// specified value into the registers specified by this object.  This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+                                 SDValue &Chain, SDValue *Flag) const {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // Get the list of the values's legal parts.
+  unsigned NumRegs = Regs.size();
+  SmallVector<SDValue, 8> Parts(NumRegs);
+  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    EVT ValueVT = ValueVTs[Value];
+    unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
+    EVT RegisterVT = RegVTs[Value];
+
+    getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
+                   &Parts[Part], NumParts, RegisterVT);
+    Part += NumParts;
+  }
+
+  // Copy the parts into the registers.
+  SmallVector<SDValue, 8> Chains(NumRegs);
+  for (unsigned i = 0; i != NumRegs; ++i) {
+    SDValue Part;
+    if (Flag == 0) {
+      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
+    } else {
+      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
+      *Flag = Part.getValue(1);
+    }
+
+    Chains[i] = Part.getValue(0);
+  }
+
+  if (NumRegs == 1 || Flag)
+    // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
+    // flagged to it. That is the CopyToReg nodes and the user are considered
+    // a single scheduling unit. If we create a TokenFactor and return it as
+    // chain, then the TokenFactor is both a predecessor (operand) of the
+    // user as well as a successor (the TF operands are flagged to the user).
+    // c1, f1 = CopyToReg
+    // c2, f2 = CopyToReg
+    // c3     = TokenFactor c1, c2
+    // ...
+    //        = op c3, ..., f2
+    Chain = Chains[NumRegs-1];
+  else
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
+}
+
+/// AddInlineAsmOperands - Add this value to the specified inlineasm node
+/// operand list.  This adds the code marker and includes the number of
+/// values added into it.
+void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
+                                        unsigned MatchingIdx,
+                                        SelectionDAG &DAG,
+                                        std::vector<SDValue> &Ops) const {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
+  if (HasMatching)
+    Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
+  SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
+  Ops.push_back(Res);
+
+  for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
+    unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
+    EVT RegisterVT = RegVTs[Value];
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      assert(Reg < Regs.size() && "Mismatch in # registers expected");
+      Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
+    }
+  }
+}
+
+void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
+  AA = &aa;
+  GFI = gfi;
+  TD = DAG.getTarget().getTargetData();
+}
+
+/// clear - Clear out the current SelectionDAG and the associated
+/// state and prepare this SelectionDAGBuilder object to be used
+/// for a new block. This doesn't clear out information about
+/// additional blocks that are needed to complete switch lowering
+/// or PHI node updating; that information is cleared out as it is
+/// consumed.
+void SelectionDAGBuilder::clear() {
+  NodeMap.clear();
+  UnusedArgNodeMap.clear();
+  PendingLoads.clear();
+  PendingExports.clear();
+  DanglingDebugInfoMap.clear();
+  CurDebugLoc = DebugLoc();
+  HasTailCall = false;
+}
+
+/// getRoot - Return the current virtual root of the Selection DAG,
+/// flushing any PendingLoad items. This must be done before emitting
+/// a store or any other node that may need to be ordered after any
+/// prior load instructions.
+///
+SDValue SelectionDAGBuilder::getRoot() {
+  if (PendingLoads.empty())
+    return DAG.getRoot();
+
+  if (PendingLoads.size() == 1) {
+    SDValue Root = PendingLoads[0];
+    DAG.setRoot(Root);
+    PendingLoads.clear();
+    return Root;
+  }
+
+  // Otherwise, we have to make a token factor node.
+  SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+                               &PendingLoads[0], PendingLoads.size());
+  PendingLoads.clear();
+  DAG.setRoot(Root);
+  return Root;
+}
+
+/// getControlRoot - Similar to getRoot, but instead of flushing all the
+/// PendingLoad items, flush all the PendingExports items. It is necessary
+/// to do this before emitting a terminator instruction.
+///
+SDValue SelectionDAGBuilder::getControlRoot() {
+  SDValue Root = DAG.getRoot();
+
+  if (PendingExports.empty())
+    return Root;
+
+  // Turn all of the CopyToReg chains into one factored node.
+  if (Root.getOpcode() != ISD::EntryToken) {
+    unsigned i = 0, e = PendingExports.size();
+    for (; i != e; ++i) {
+      assert(PendingExports[i].getNode()->getNumOperands() > 1);
+      if (PendingExports[i].getNode()->getOperand(0) == Root)
+        break;  // Don't add the root if we already indirectly depend on it.
+    }
+
+    if (i == e)
+      PendingExports.push_back(Root);
+  }
+
+  Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+                     &PendingExports[0],
+                     PendingExports.size());
+  PendingExports.clear();
+  DAG.setRoot(Root);
+  return Root;
+}
+
+void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) {
+  if (DAG.GetOrdering(Node) != 0) return; // Already has ordering.
+  DAG.AssignOrdering(Node, SDNodeOrder);
+
+  for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
+    AssignOrderingToNode(Node->getOperand(I).getNode());
+}
+
+void SelectionDAGBuilder::visit(const Instruction &I) {
+  // Set up outgoing PHI node register values before emitting the terminator.
+  if (isa<TerminatorInst>(&I))
+    HandlePHINodesInSuccessorBlocks(I.getParent());
+
+  CurDebugLoc = I.getDebugLoc();
+
+  visit(I.getOpcode(), I);
+
+  if (!isa<TerminatorInst>(&I) && !HasTailCall)
+    CopyToExportRegsIfNeeded(&I);
+
+  CurDebugLoc = DebugLoc();
+}
+
+void SelectionDAGBuilder::visitPHI(const PHINode &) {
+  llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
+}
+
+void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
+  // Note: this doesn't use InstVisitor, because it has to work with
+  // ConstantExpr's in addition to instructions.
+  switch (Opcode) {
+  default: llvm_unreachable("Unknown instruction type encountered!");
+    // Build the switch statement using the Instruction.def file.
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+    case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break;
+#include "llvm/Instruction.def"
+  }
+
+  // Assign the ordering to the freshly created DAG nodes.
+  if (NodeMap.count(&I)) {
+    ++SDNodeOrder;
+    AssignOrderingToNode(getValue(&I).getNode());
+  }
+}
+
+// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+// generate the debug data structures now that we've seen its definition.
+void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
+                                                   SDValue Val) {
+  DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
+  if (DDI.getDI()) {
+    const DbgValueInst *DI = DDI.getDI();
+    DebugLoc dl = DDI.getdl();
+    unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
+    MDNode *Variable = DI->getVariable();
+    uint64_t Offset = DI->getOffset();
+    SDDbgValue *SDV;
+    if (Val.getNode()) {
+      if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) {
+        SDV = DAG.getDbgValue(Variable, Val.getNode(),
+                              Val.getResNo(), Offset, dl, DbgSDNodeOrder);
+        DAG.AddDbgValue(SDV, Val.getNode(), false);
+      }
+    } else
+      DEBUG(dbgs() << "Dropping debug info for " << DI);
+    DanglingDebugInfoMap[V] = DanglingDebugInfo();
+  }
+}
+
+// getValue - Return an SDValue for the given Value.
+SDValue SelectionDAGBuilder::getValue(const Value *V) {
+  // If we already have an SDValue for this value, use it. It's important
+  // to do this first, so that we don't create a CopyFromReg if we already
+  // have a regular SDValue.
+  SDValue &N = NodeMap[V];
+  if (N.getNode()) return N;
+
+  // If there's a virtual register allocated and initialized for this
+  // value, use it.
+  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
+  if (It != FuncInfo.ValueMap.end()) {
+    unsigned InReg = It->second;
+    RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
+    SDValue Chain = DAG.getEntryNode();
+    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
+    resolveDanglingDebugInfo(V, N);
+    return N;
+  }
+
+  // Otherwise create a new SDValue and remember it.
+  SDValue Val = getValueImpl(V);
+  NodeMap[V] = Val;
+  resolveDanglingDebugInfo(V, Val);
+  return Val;
+}
+
+/// getNonRegisterValue - Return an SDValue for the given Value, but
+/// don't look in FuncInfo.ValueMap for a virtual register.
+SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
+  // If we already have an SDValue for this value, use it.
+  SDValue &N = NodeMap[V];
+  if (N.getNode()) return N;
+
+  // Otherwise create a new SDValue and remember it.
+  SDValue Val = getValueImpl(V);
+  NodeMap[V] = Val;
+  resolveDanglingDebugInfo(V, Val);
+  return Val;
+}
+
+/// getValueImpl - Helper function for getValue and getNonRegisterValue.
+/// Create an SDValue for the given value.
+SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
+  if (const Constant *C = dyn_cast<Constant>(V)) {
+    EVT VT = TLI.getValueType(V->getType(), true);
+
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
+      return DAG.getConstant(*CI, VT);
+
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+      return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT);
+
+    if (isa<ConstantPointerNull>(C))
+      return DAG.getConstant(0, TLI.getPointerTy());
+
+    if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+      return DAG.getConstantFP(*CFP, VT);
+
+    if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
+      return DAG.getUNDEF(VT);
+
+    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+      visit(CE->getOpcode(), *CE);
+      SDValue N1 = NodeMap[V];
+      assert(N1.getNode() && "visit didn't populate the NodeMap!");
+      return N1;
+    }
+
+    if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
+      SmallVector<SDValue, 4> Constants;
+      for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
+           OI != OE; ++OI) {
+        SDNode *Val = getValue(*OI).getNode();
+        // If the operand is an empty aggregate, there are no values.
+        if (!Val) continue;
+        // Add each leaf value from the operand to the Constants list
+        // to form a flattened list of all the values.
+        for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
+          Constants.push_back(SDValue(Val, i));
+      }
+
+      return DAG.getMergeValues(&Constants[0], Constants.size(),
+                                getCurDebugLoc());
+    }
+
+    if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
+      assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
+             "Unknown struct or array constant!");
+
+      SmallVector<EVT, 4> ValueVTs;
+      ComputeValueVTs(TLI, C->getType(), ValueVTs);
+      unsigned NumElts = ValueVTs.size();
+      if (NumElts == 0)
+        return SDValue(); // empty struct
+      SmallVector<SDValue, 4> Constants(NumElts);
+      for (unsigned i = 0; i != NumElts; ++i) {
+        EVT EltVT = ValueVTs[i];
+        if (isa<UndefValue>(C))
+          Constants[i] = DAG.getUNDEF(EltVT);
+        else if (EltVT.isFloatingPoint())
+          Constants[i] = DAG.getConstantFP(0, EltVT);
+        else
+          Constants[i] = DAG.getConstant(0, EltVT);
+      }
+
+      return DAG.getMergeValues(&Constants[0], NumElts,
+                                getCurDebugLoc());
+    }
+
+    if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
+      return DAG.getBlockAddress(BA, VT);
+
+    const VectorType *VecTy = cast<VectorType>(V->getType());
+    unsigned NumElements = VecTy->getNumElements();
+
+    // Now that we know the number and type of the elements, get that number of
+    // elements into the Ops array based on what kind of constant it is.
+    SmallVector<SDValue, 16> Ops;
+    if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
+      for (unsigned i = 0; i != NumElements; ++i)
+        Ops.push_back(getValue(CP->getOperand(i)));
+    } else {
+      assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
+      EVT EltVT = TLI.getValueType(VecTy->getElementType());
+
+      SDValue Op;
+      if (EltVT.isFloatingPoint())
+        Op = DAG.getConstantFP(0, EltVT);
+      else
+        Op = DAG.getConstant(0, EltVT);
+      Ops.assign(NumElements, Op);
+    }
+
+    // Create a BUILD_VECTOR node.
+    return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+                                    VT, &Ops[0], Ops.size());
+  }
+
+  // If this is a static alloca, generate it as the frameindex instead of
+  // computation.
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+    DenseMap<const AllocaInst*, int>::iterator SI =
+      FuncInfo.StaticAllocaMap.find(AI);
+    if (SI != FuncInfo.StaticAllocaMap.end())
+      return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
+  }
+
+  // If this is an instruction which fast-isel has deferred, select it now.
+  if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+    unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+    RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
+    SDValue Chain = DAG.getEntryNode();
+    return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+  }
+
+  llvm_unreachable("Can't get register for value!");
+  return SDValue();
+}
+
+void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
+  SDValue Chain = getControlRoot();
+  SmallVector<ISD::OutputArg, 8> Outs;
+  SmallVector<SDValue, 8> OutVals;
+
+  if (!FuncInfo.CanLowerReturn) {
+    unsigned DemoteReg = FuncInfo.DemoteRegister;
+    const Function *F = I.getParent()->getParent();
+
+    // Emit a store of the return value through the virtual register.
+    // Leave Outs empty so that LowerReturn won't try to load return
+    // registers the usual way.
+    SmallVector<EVT, 1> PtrValueVTs;
+    ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
+                    PtrValueVTs);
+
+    SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
+    SDValue RetOp = getValue(I.getOperand(0));
+
+    SmallVector<EVT, 4> ValueVTs;
+    SmallVector<uint64_t, 4> Offsets;
+    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+    unsigned NumValues = ValueVTs.size();
+
+    SmallVector<SDValue, 4> Chains(NumValues);
+    for (unsigned i = 0; i != NumValues; ++i) {
+      SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                                RetPtr.getValueType(), RetPtr,
+                                DAG.getIntPtrConstant(Offsets[i]));
+      Chains[i] =
+        DAG.getStore(Chain, getCurDebugLoc(),
+                     SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+                     // FIXME: better loc info would be nice.
+                     Add, MachinePointerInfo(), false, false, 0);
+    }
+
+    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                        MVT::Other, &Chains[0], NumValues);
+  } else if (I.getNumOperands() != 0) {
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
+    unsigned NumValues = ValueVTs.size();
+    if (NumValues) {
+      SDValue RetOp = getValue(I.getOperand(0));
+      for (unsigned j = 0, f = NumValues; j != f; ++j) {
+        EVT VT = ValueVTs[j];
+
+        ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+        const Function *F = I.getParent()->getParent();
+        if (F->paramHasAttr(0, Attribute::SExt))
+          ExtendKind = ISD::SIGN_EXTEND;
+        else if (F->paramHasAttr(0, Attribute::ZExt))
+          ExtendKind = ISD::ZERO_EXTEND;
+
+        // FIXME: C calling convention requires the return type to be promoted
+        // to at least 32-bit. But this is not necessary for non-C calling
+        // conventions. The frontend should mark functions whose return values
+        // require promoting with signext or zeroext attributes.
+        if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+          EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
+          if (VT.bitsLT(MinVT))
+            VT = MinVT;
+        }
+
+        unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
+        EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
+        SmallVector<SDValue, 4> Parts(NumParts);
+        getCopyToParts(DAG, getCurDebugLoc(),
+                       SDValue(RetOp.getNode(), RetOp.getResNo() + j),
+                       &Parts[0], NumParts, PartVT, ExtendKind);
+
+        // 'inreg' on function refers to return value
+        ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+        if (F->paramHasAttr(0, Attribute::InReg))
+          Flags.setInReg();
+
+        // Propagate extension type if any
+        if (F->paramHasAttr(0, Attribute::SExt))
+          Flags.setSExt();
+        else if (F->paramHasAttr(0, Attribute::ZExt))
+          Flags.setZExt();
+
+        for (unsigned i = 0; i < NumParts; ++i) {
+          Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
+                                        /*isfixed=*/true));
+          OutVals.push_back(Parts[i]);
+        }
+      }
+    }
+  }
+
+  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+  CallingConv::ID CallConv =
+    DAG.getMachineFunction().getFunction()->getCallingConv();
+  Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
+                          Outs, OutVals, getCurDebugLoc(), DAG);
+
+  // Verify that the target's LowerReturn behaved as expected.
+  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+         "LowerReturn didn't return a valid chain!");
+
+  // Update the DAG with the new chain value resulting from return lowering.
+  DAG.setRoot(Chain);
+}
+
+/// CopyToExportRegsIfNeeded - If the given value has virtual registers
+/// created for it, emit nodes to copy the value into the virtual
+/// registers.
+void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
+  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+  if (VMI != FuncInfo.ValueMap.end()) {
+    assert(!V->use_empty() && "Unused value assigned virtual registers!");
+    CopyValueToVirtualRegister(V, VMI->second);
+  }
+}
+
+/// ExportFromCurrentBlock - If this condition isn't known to be exported from
+/// the current basic block, add it to ValueMap now so that we'll get a
+/// CopyTo/FromReg.
+void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
+  // No need to export constants.
+  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
+
+  // Already exported?
+  if (FuncInfo.isExportedInst(V)) return;
+
+  unsigned Reg = FuncInfo.InitializeRegForValue(V);
+  CopyValueToVirtualRegister(V, Reg);
+}
+
+bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
+                                                     const BasicBlock *FromBB) {
+  // The operands of the setcc have to be in this block.  We don't know
+  // how to export them from some other block.
+  if (const Instruction *VI = dyn_cast<Instruction>(V)) {
+    // Can export from current BB.
+    if (VI->getParent() == FromBB)
+      return true;
+
+    // Is already exported, noop.
+    return FuncInfo.isExportedInst(V);
+  }
+
+  // If this is an argument, we can export it if the BB is the entry block or
+  // if it is already exported.
+  if (isa<Argument>(V)) {
+    if (FromBB == &FromBB->getParent()->getEntryBlock())
+      return true;
+
+    // Otherwise, can only export this if it is already exported.
+    return FuncInfo.isExportedInst(V);
+  }
+
+  // Otherwise, constants can always be exported.
+  return true;
+}
+
+static bool InBlock(const Value *V, const BasicBlock *BB) {
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    return I->getParent() == BB;
+  return true;
+}
+
+/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
+/// This function emits a branch and is used at the leaves of an OR or an
+/// AND operator tree.
+///
+void
+SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
+                                                  MachineBasicBlock *TBB,
+                                                  MachineBasicBlock *FBB,
+                                                  MachineBasicBlock *CurBB,
+                                                  MachineBasicBlock *SwitchBB) {
+  const BasicBlock *BB = CurBB->getBasicBlock();
+
+  // If the leaf of the tree is a comparison, merge the condition into
+  // the caseblock.
+  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
+    // The operands of the cmp have to be in this block.  We don't know
+    // how to export them from some other block.  If this is the first block
+    // of the sequence, no exporting is needed.
+    if (CurBB == SwitchBB ||
+        (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
+         isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
+      ISD::CondCode Condition;
+      if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
+        Condition = getICmpCondCode(IC->getPredicate());
+      } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
+        Condition = getFCmpCondCode(FC->getPredicate());
+      } else {
+        Condition = ISD::SETEQ; // silence warning.
+        llvm_unreachable("Unknown compare instruction");
+      }
+
+      CaseBlock CB(Condition, BOp->getOperand(0),
+                   BOp->getOperand(1), NULL, TBB, FBB, CurBB);
+      SwitchCases.push_back(CB);
+      return;
+    }
+  }
+
+  // Create a CaseBlock record representing this branch.
+  CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
+               NULL, TBB, FBB, CurBB);
+  SwitchCases.push_back(CB);
+}
+
+/// FindMergedConditions - If Cond is an expression like
+void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
+                                               MachineBasicBlock *TBB,
+                                               MachineBasicBlock *FBB,
+                                               MachineBasicBlock *CurBB,
+                                               MachineBasicBlock *SwitchBB,
+                                               unsigned Opc) {
+  // If this node is not part of the or/and tree, emit it as a branch.
+  const Instruction *BOp = dyn_cast<Instruction>(Cond);
+  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
+      (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
+      BOp->getParent() != CurBB->getBasicBlock() ||
+      !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
+      !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
+    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB);
+    return;
+  }
+
+  //  Create TmpBB after CurBB.
+  MachineFunction::iterator BBI = CurBB;
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
+  CurBB->getParent()->insert(++BBI, TmpBB);
+
+  if (Opc == Instruction::Or) {
+    // Codegen X | Y as:
+    //   jmp_if_X TBB
+    //   jmp TmpBB
+    // TmpBB:
+    //   jmp_if_Y TBB
+    //   jmp FBB
+    //
+
+    // Emit the LHS condition.
+    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc);
+
+    // Emit the RHS condition into TmpBB.
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+  } else {
+    assert(Opc == Instruction::And && "Unknown merge op!");
+    // Codegen X & Y as:
+    //   jmp_if_X TmpBB
+    //   jmp FBB
+    // TmpBB:
+    //   jmp_if_Y TBB
+    //   jmp FBB
+    //
+    //  This requires creation of TmpBB after CurBB.
+
+    // Emit the LHS condition.
+    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc);
+
+    // Emit the RHS condition into TmpBB.
+    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
+  }
+}
+
+/// If the set of cases should be emitted as a series of branches, return true.
+/// If we should emit this as a bunch of and/or'd together conditions, return
+/// false.
+bool
+SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
+  if (Cases.size() != 2) return true;
+
+  // If this is two comparisons of the same values or'd or and'd together, they
+  // will get folded into a single comparison, so don't emit two blocks.
+  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
+       Cases[0].CmpRHS == Cases[1].CmpRHS) ||
+      (Cases[0].CmpRHS == Cases[1].CmpLHS &&
+       Cases[0].CmpLHS == Cases[1].CmpRHS)) {
+    return false;
+  }
+
+  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
+  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
+  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
+      Cases[0].CC == Cases[1].CC &&
+      isa<Constant>(Cases[0].CmpRHS) &&
+      cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
+    if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
+      return false;
+    if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
+      return false;
+  }
+
+  return true;
+}
+
+void SelectionDAGBuilder::visitBr(const BranchInst &I) {
+  MachineBasicBlock *BrMBB = FuncInfo.MBB;
+
+  // Update machine-CFG edges.
+  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = BrMBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  if (I.isUnconditional()) {
+    // Update machine-CFG edges.
+    BrMBB->addSuccessor(Succ0MBB);
+
+    // If this is not a fall-through branch, emit the branch.
+    if (Succ0MBB != NextBlock)
+      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+                              MVT::Other, getControlRoot(),
+                              DAG.getBasicBlock(Succ0MBB)));
+
+    return;
+  }
+
+  // If this condition is one of the special cases we handle, do special stuff
+  // now.
+  const Value *CondVal = I.getCondition();
+  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+  // If this is a series of conditions that are or'd or and'd together, emit
+  // this as a sequence of branches instead of setcc's with and/or operations.
+  // As long as jumps are not expensive, this should improve performance.
+  // For example, instead of something like:
+  //     cmp A, B
+  //     C = seteq
+  //     cmp D, E
+  //     F = setle
+  //     or C, F
+  //     jnz foo
+  // Emit:
+  //     cmp A, B
+  //     je foo
+  //     cmp D, E
+  //     jle foo
+  //
+  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
+    if (!TLI.isJumpExpensive() &&
+        BOp->hasOneUse() &&
+        (BOp->getOpcode() == Instruction::And ||
+         BOp->getOpcode() == Instruction::Or)) {
+      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
+                           BOp->getOpcode());
+      // If the compares in later blocks need to use values not currently
+      // exported from this block, export them now.  This block should always
+      // be the first entry.
+      assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
+
+      // Allow some cases to be rejected.
+      if (ShouldEmitAsBranches(SwitchCases)) {
+        for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
+          ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
+          ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+        }
+
+        // Emit the branch for this block.
+        visitSwitchCase(SwitchCases[0], BrMBB);
+        SwitchCases.erase(SwitchCases.begin());
+        return;
+      }
+
+      // Okay, we decided not to do this, remove any inserted MBB's and clear
+      // SwitchCases.
+      for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
+        FuncInfo.MF->erase(SwitchCases[i].ThisBB);
+
+      SwitchCases.clear();
+    }
+  }
+
+  // Create a CaseBlock record representing this branch.
+  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
+               NULL, Succ0MBB, Succ1MBB, BrMBB);
+
+  // Use visitSwitchCase to actually insert the fast branch sequence for this
+  // cond branch.
+  visitSwitchCase(CB, BrMBB);
+}
+
+/// visitSwitchCase - Emits the necessary code to represent a single node in
+/// the binary search tree resulting from lowering a switch instruction.
+void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
+                                          MachineBasicBlock *SwitchBB) {
+  SDValue Cond;
+  SDValue CondLHS = getValue(CB.CmpLHS);
+  DebugLoc dl = getCurDebugLoc();
+
+  // Build the setcc now.
+  if (CB.CmpMHS == NULL) {
+    // Fold "(X == true)" to X and "(X == false)" to !X to
+    // handle common cases produced by branch lowering.
+    if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
+        CB.CC == ISD::SETEQ)
+      Cond = CondLHS;
+    else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
+             CB.CC == ISD::SETEQ) {
+      SDValue True = DAG.getConstant(1, CondLHS.getValueType());
+      Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
+    } else
+      Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+  } else {
+    assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
+
+    const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+    const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+    SDValue CmpOp = getValue(CB.CmpMHS);
+    EVT VT = CmpOp.getValueType();
+
+    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+      Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
+                          ISD::SETLE);
+    } else {
+      SDValue SUB = DAG.getNode(ISD::SUB, dl,
+                                VT, CmpOp, DAG.getConstant(Low, VT));
+      Cond = DAG.getSetCC(dl, MVT::i1, SUB,
+                          DAG.getConstant(High-Low, VT), ISD::SETULE);
+    }
+  }
+
+  // Update successor info
+  SwitchBB->addSuccessor(CB.TrueBB);
+  SwitchBB->addSuccessor(CB.FalseBB);
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = SwitchBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  // If the lhs block is the next block, invert the condition so that we can
+  // fall through to the lhs instead of the rhs block.
+  if (CB.TrueBB == NextBlock) {
+    std::swap(CB.TrueBB, CB.FalseBB);
+    SDValue True = DAG.getConstant(1, Cond.getValueType());
+    Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
+  }
+
+  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+                               MVT::Other, getControlRoot(), Cond,
+                               DAG.getBasicBlock(CB.TrueBB));
+
+  // Insert the false branch. Do this even if it's a fall through branch,
+  // this makes it easier to do DAG optimizations which require inverting
+  // the branch condition.
+  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+                       DAG.getBasicBlock(CB.FalseBB));
+
+  DAG.setRoot(BrCond);
+}
+
+/// visitJumpTable - Emit JumpTable node in the current MBB
+void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
+  // Emit the code for the jump table
+  assert(JT.Reg != -1U && "Should lower JT Header first!");
+  EVT PTy = TLI.getPointerTy();
+  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+                                     JT.Reg, PTy);
+  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
+  SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
+                                    MVT::Other, Index.getValue(1),
+                                    Table, Index);
+  DAG.setRoot(BrJumpTable);
+}
+
+/// visitJumpTableHeader - This function emits necessary code to produce index
+/// in the JumpTable from switch case.
+void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
+                                               JumpTableHeader &JTH,
+                                               MachineBasicBlock *SwitchBB) {
+  // Subtract the lowest switch case value from the value being switched on and
+  // conditional branch to default mbb if the result is greater than the
+  // difference between smallest and largest cases.
+  SDValue SwitchOp = getValue(JTH.SValue);
+  EVT VT = SwitchOp.getValueType();
+  SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+                            DAG.getConstant(JTH.First, VT));
+
+  // The SDNode we just created, which holds the value being switched on minus
+  // the smallest case value, needs to be copied to a virtual register so it
+  // can be used as an index into the jump table in a subsequent basic block.
+  // This value may be smaller or larger than the target's pointer type, and
+  // therefore require extension or truncating.
+  SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy());
+
+  unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
+  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+                                    JumpTableReg, SwitchOp);
+  JT.Reg = JumpTableReg;
+
+  // Emit the range check for the jump table, and branch to the default block
+  // for the switch statement if the value being switched on exceeds the largest
+  // case in the switch.
+  SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
+                             TLI.getSetCCResultType(Sub.getValueType()), Sub,
+                             DAG.getConstant(JTH.Last-JTH.First,VT),
+                             ISD::SETUGT);
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = SwitchBB;
+
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+                               MVT::Other, CopyTo, CMP,
+                               DAG.getBasicBlock(JT.Default));
+
+  if (JT.MBB != NextBlock)
+    BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
+                         DAG.getBasicBlock(JT.MBB));
+
+  DAG.setRoot(BrCond);
+}
+
+/// visitBitTestHeader - This function emits necessary code to produce value
+/// suitable for "bit tests"
+void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
+                                             MachineBasicBlock *SwitchBB) {
+  // Subtract the minimum value
+  SDValue SwitchOp = getValue(B.SValue);
+  EVT VT = SwitchOp.getValueType();
+  SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
+                            DAG.getConstant(B.First, VT));
+
+  // Check range
+  SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
+                                  TLI.getSetCCResultType(Sub.getValueType()),
+                                  Sub, DAG.getConstant(B.Range, VT),
+                                  ISD::SETUGT);
+
+  // Determine the type of the test operands.
+  bool UsePtrType = false;
+  if (!TLI.isTypeLegal(VT))
+    UsePtrType = true;
+  else {
+    for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
+      if ((uint64_t)((int64_t)B.Cases[i].Mask >> VT.getSizeInBits()) + 1 >= 2) {
+        // Switch table case range are encoded into series of masks.
+        // Just use pointer type, it's guaranteed to fit.
+        UsePtrType = true;
+        break;
+      }
+  }
+  if (UsePtrType) {
+    VT = TLI.getPointerTy();
+    Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT);
+  }
+
+  B.RegVT = VT;
+  B.Reg = FuncInfo.CreateReg(VT);
+  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
+                                    B.Reg, Sub);
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = SwitchBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
+
+  SwitchBB->addSuccessor(B.Default);
+  SwitchBB->addSuccessor(MBB);
+
+  SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+                                MVT::Other, CopyTo, RangeCmp,
+                                DAG.getBasicBlock(B.Default));
+
+  if (MBB != NextBlock)
+    BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
+                          DAG.getBasicBlock(MBB));
+
+  DAG.setRoot(BrRange);
+}
+
+/// visitBitTestCase - this function produces one "bit test"
+void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
+                                           MachineBasicBlock* NextMBB,
+                                           unsigned Reg,
+                                           BitTestCase &B,
+                                           MachineBasicBlock *SwitchBB) {
+  EVT VT = BB.RegVT;
+  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+                                       Reg, VT);
+  SDValue Cmp;
+  if (CountPopulation_64(B.Mask) == 1) {
+    // Testing for a single bit; just compare the shift count with what it
+    // would need to be to shift a 1 bit in that position.
+    Cmp = DAG.getSetCC(getCurDebugLoc(),
+                       TLI.getSetCCResultType(VT),
+                       ShiftOp,
+                       DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
+                       ISD::SETEQ);
+  } else {
+    // Make desired shift
+    SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
+                                    DAG.getConstant(1, VT), ShiftOp);
+
+    // Emit bit tests and jumps
+    SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
+                                VT, SwitchVal, DAG.getConstant(B.Mask, VT));
+    Cmp = DAG.getSetCC(getCurDebugLoc(),
+                       TLI.getSetCCResultType(VT),
+                       AndOp, DAG.getConstant(0, VT),
+                       ISD::SETNE);
+  }
+
+  SwitchBB->addSuccessor(B.TargetBB);
+  SwitchBB->addSuccessor(NextMBB);
+
+  SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
+                              MVT::Other, getControlRoot(),
+                              Cmp, DAG.getBasicBlock(B.TargetBB));
+
+  // Set NextBlock to be the MBB immediately after the current one, if any.
+  // This is used to avoid emitting unnecessary branches to the next block.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = SwitchBB;
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  if (NextMBB != NextBlock)
+    BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
+                        DAG.getBasicBlock(NextMBB));
+
+  DAG.setRoot(BrAnd);
+}
+
+void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
+  MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
+
+  // Retrieve successors.
+  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
+  MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
+
+  const Value *Callee(I.getCalledValue());
+  if (isa<InlineAsm>(Callee))
+    visitInlineAsm(&I);
+  else
+    LowerCallTo(&I, getValue(Callee), false, LandingPad);
+
+  // If the value of the invoke is used outside of its defining block, make it
+  // available as a virtual register.
+  CopyToExportRegsIfNeeded(&I);
+
+  // Update successor info
+  InvokeMBB->addSuccessor(Return);
+  InvokeMBB->addSuccessor(LandingPad);
+
+  // Drop into normal successor.
+  DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+                          MVT::Other, getControlRoot(),
+                          DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
+}
+
+/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
+/// small case ranges).
+bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
+                                                 CaseRecVector& WorkList,
+                                                 const Value* SV,
+                                                 MachineBasicBlock *Default,
+                                                 MachineBasicBlock *SwitchBB) {
+  Case& BackCase  = *(CR.Range.second-1);
+
+  // Size is the number of Cases represented by this range.
+  size_t Size = CR.Range.second - CR.Range.first;
+  if (Size > 3)
+    return false;
+
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = FuncInfo.MF;
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineFunction::iterator BBI = CR.CaseBB;
+
+  if (++BBI != FuncInfo.MF->end())
+    NextBlock = BBI;
+
+  // If any two of the cases has the same destination, and if one value
+  // is the same as the other, but has one bit unset that the other has set,
+  // use bit manipulation to do two compares at once.  For example:
+  // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+  // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
+  // TODO: Handle cases where CR.CaseBB != SwitchBB.
+  if (Size == 2 && CR.CaseBB == SwitchBB) {
+    Case &Small = *CR.Range.first;
+    Case &Big = *(CR.Range.second-1);
+
+    if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
+      const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
+      const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
+
+      // Check that there is only one bit different.
+      if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
+          (SmallValue | BigValue) == BigValue) {
+        // Isolate the common bit.
+        APInt CommonBit = BigValue & ~SmallValue;
+        assert((SmallValue | CommonBit) == BigValue &&
+               CommonBit.countPopulation() == 1 && "Not a common bit?");
+
+        SDValue CondLHS = getValue(SV);
+        EVT VT = CondLHS.getValueType();
+        DebugLoc DL = getCurDebugLoc();
+
+        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+                                 DAG.getConstant(CommonBit, VT));
+        SDValue Cond = DAG.getSetCC(DL, MVT::i1,
+                                    Or, DAG.getConstant(BigValue, VT),
+                                    ISD::SETEQ);
+
+        // Update successor info.
+        SwitchBB->addSuccessor(Small.BB);
+        SwitchBB->addSuccessor(Default);
+
+        // Insert the true branch.
+        SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
+                                     getControlRoot(), Cond,
+                                     DAG.getBasicBlock(Small.BB));
+
+        // Insert the false branch.
+        BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+                             DAG.getBasicBlock(Default));
+
+        DAG.setRoot(BrCond);
+        return true;
+      }
+    }
+  }
+
+  // Rearrange the case blocks so that the last one falls through if possible.
+  if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
+    // The last case block won't fall through into 'NextBlock' if we emit the
+    // branches in this order.  See if rearranging a case value would help.
+    for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
+      if (I->BB == NextBlock) {
+        std::swap(*I, BackCase);
+        break;
+      }
+    }
+  }
+
+  // Create a CaseBlock record representing a conditional branch to
+  // the Case's target mbb if the value being switched on SV is equal
+  // to C.
+  MachineBasicBlock *CurBlock = CR.CaseBB;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
+    MachineBasicBlock *FallThrough;
+    if (I != E-1) {
+      FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
+      CurMF->insert(BBI, FallThrough);
+
+      // Put SV in a virtual register to make it available from the new blocks.
+      ExportFromCurrentBlock(SV);
+    } else {
+      // If the last case doesn't match, go to the default block.
+      FallThrough = Default;
+    }
+
+    const Value *RHS, *LHS, *MHS;
+    ISD::CondCode CC;
+    if (I->High == I->Low) {
+      // This is just small small case range :) containing exactly 1 case
+      CC = ISD::SETEQ;
+      LHS = SV; RHS = I->High; MHS = NULL;
+    } else {
+      CC = ISD::SETLE;
+      LHS = I->Low; MHS = SV; RHS = I->High;
+    }
+    CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
+
+    // If emitting the first comparison, just call visitSwitchCase to emit the
+    // code into the current block.  Otherwise, push the CaseBlock onto the
+    // vector to be later processed by SDISel, and insert the node's MBB
+    // before the next MBB.
+    if (CurBlock == SwitchBB)
+      visitSwitchCase(CB, SwitchBB);
+    else
+      SwitchCases.push_back(CB);
+
+    CurBlock = FallThrough;
+  }
+
+  return true;
+}
+
+static inline bool areJTsAllowed(const TargetLowering &TLI) {
+  return !DisableJumpTables &&
+          (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
+           TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
+}
+
+static APInt ComputeRange(const APInt &First, const APInt &Last) {
+  uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
+  APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
+  return (LastExt - FirstExt + 1ULL);
+}
+
+/// handleJTSwitchCase - Emit jumptable for current switch case range
+bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
+                                             CaseRecVector& WorkList,
+                                             const Value* SV,
+                                             MachineBasicBlock* Default,
+                                             MachineBasicBlock *SwitchBB) {
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+
+  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
+
+  APInt TSize(First.getBitWidth(), 0);
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I)
+    TSize += I->size();
+
+  if (!areJTsAllowed(TLI) || TSize.ult(4))
+    return false;
+
+  APInt Range = ComputeRange(First, Last);
+  double Density = TSize.roundToDouble() / Range.roundToDouble();
+  if (Density < 0.4)
+    return false;
+
+  DEBUG(dbgs() << "Lowering jump table\n"
+               << "First entry: " << First << ". Last entry: " << Last << '\n'
+               << "Range: " << Range
+               << ". Size: " << TSize << ". Density: " << Density << "\n\n");
+
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = FuncInfo.MF;
+
+  // Figure out which block is immediately after the current one.
+  MachineFunction::iterator BBI = CR.CaseBB;
+  ++BBI;
+
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  // Create a new basic block to hold the code for loading the address
+  // of the jump table, and jumping to it.  Update successor information;
+  // we will either branch to the default case for the switch, or the jump
+  // table.
+  MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+  CurMF->insert(BBI, JumpTableBB);
+  CR.CaseBB->addSuccessor(Default);
+  CR.CaseBB->addSuccessor(JumpTableBB);
+
+  // Build a vector of destination BBs, corresponding to each target
+  // of the jump table. If the value of the jump table slot corresponds to
+  // a case statement, push the case's BB onto the vector, otherwise, push
+  // the default BB.
+  std::vector<MachineBasicBlock*> DestBBs;
+  APInt TEI = First;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
+    const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
+    const APInt &High = cast<ConstantInt>(I->High)->getValue();
+
+    if (Low.sle(TEI) && TEI.sle(High)) {
+      DestBBs.push_back(I->BB);
+      if (TEI==High)
+        ++I;
+    } else {
+      DestBBs.push_back(Default);
+    }
+  }
+
+  // Update successor info. Add one edge to each unique successor.
+  BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
+  for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
+         E = DestBBs.end(); I != E; ++I) {
+    if (!SuccsHandled[(*I)->getNumber()]) {
+      SuccsHandled[(*I)->getNumber()] = true;
+      JumpTableBB->addSuccessor(*I);
+    }
+  }
+
+  // Create a jump table index for this jump table.
+  unsigned JTEncoding = TLI.getJumpTableEncoding();
+  unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
+                       ->createJumpTableIndex(DestBBs);
+
+  // Set the jump table information so that we can codegen it as a second
+  // MachineBasicBlock
+  JumpTable JT(-1U, JTI, JumpTableBB, Default);
+  JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
+  if (CR.CaseBB == SwitchBB)
+    visitJumpTableHeader(JT, JTH, SwitchBB);
+
+  JTCases.push_back(JumpTableBlock(JTH, JT));
+
+  return true;
+}
+
+/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
+/// 2 subtrees.
+bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
+                                                  CaseRecVector& WorkList,
+                                                  const Value* SV,
+                                                  MachineBasicBlock *Default,
+                                                  MachineBasicBlock *SwitchBB) {
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = FuncInfo.MF;
+
+  // Figure out which block is immediately after the current one.
+  MachineFunction::iterator BBI = CR.CaseBB;
+  ++BBI;
+
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  // Size is the number of Cases represented by this range.
+  unsigned Size = CR.Range.second - CR.Range.first;
+
+  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
+  double FMetric = 0;
+  CaseItr Pivot = CR.Range.first + Size/2;
+
+  // Select optimal pivot, maximizing sum density of LHS and RHS. This will
+  // (heuristically) allow us to emit JumpTable's later.
+  APInt TSize(First.getBitWidth(), 0);
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I)
+    TSize += I->size();
+
+  APInt LSize = FrontCase.size();
+  APInt RSize = TSize-LSize;
+  DEBUG(dbgs() << "Selecting best pivot: \n"
+               << "First: " << First << ", Last: " << Last <<'\n'
+               << "LSize: " << LSize << ", RSize: " << RSize << '\n');
+  for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
+       J!=E; ++I, ++J) {
+    const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
+    const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
+    APInt Range = ComputeRange(LEnd, RBegin);
+    assert((Range - 2ULL).isNonNegative() &&
+           "Invalid case distance");
+    double LDensity = (double)LSize.roundToDouble() /
+                           (LEnd - First + 1ULL).roundToDouble();
+    double RDensity = (double)RSize.roundToDouble() /
+                           (Last - RBegin + 1ULL).roundToDouble();
+    double Metric = Range.logBase2()*(LDensity+RDensity);
+    // Should always split in some non-trivial place
+    DEBUG(dbgs() <<"=>Step\n"
+                 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
+                 << "LDensity: " << LDensity
+                 << ", RDensity: " << RDensity << '\n'
+                 << "Metric: " << Metric << '\n');
+    if (FMetric < Metric) {
+      Pivot = J;
+      FMetric = Metric;
+      DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
+    }
+
+    LSize += J->size();
+    RSize -= J->size();
+  }
+  if (areJTsAllowed(TLI)) {
+    // If our case is dense we *really* should handle it earlier!
+    assert((FMetric > 0) && "Should handle dense range earlier!");
+  } else {
+    Pivot = CR.Range.first + Size/2;
+  }
+
+  CaseRange LHSR(CR.Range.first, Pivot);
+  CaseRange RHSR(Pivot, CR.Range.second);
+  Constant *C = Pivot->Low;
+  MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
+
+  // We know that we branch to the LHS if the Value being switched on is
+  // less than the Pivot value, C.  We use this to optimize our binary
+  // tree a bit, by recognizing that if SV is greater than or equal to the
+  // LHS's Case Value, and that Case Value is exactly one less than the
+  // Pivot's Value, then we can branch directly to the LHS's Target,
+  // rather than creating a leaf node for it.
+  if ((LHSR.second - LHSR.first) == 1 &&
+      LHSR.first->High == CR.GE &&
+      cast<ConstantInt>(C)->getValue() ==
+      (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
+    TrueBB = LHSR.first->BB;
+  } else {
+    TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+    CurMF->insert(BBI, TrueBB);
+    WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
+
+    // Put SV in a virtual register to make it available from the new blocks.
+    ExportFromCurrentBlock(SV);
+  }
+
+  // Similar to the optimization above, if the Value being switched on is
+  // known to be less than the Constant CR.LT, and the current Case Value
+  // is CR.LT - 1, then we can branch directly to the target block for
+  // the current Case Value, rather than emitting a RHS leaf node for it.
+  if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
+      cast<ConstantInt>(RHSR.first->Low)->getValue() ==
+      (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
+    FalseBB = RHSR.first->BB;
+  } else {
+    FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+    CurMF->insert(BBI, FalseBB);
+    WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
+
+    // Put SV in a virtual register to make it available from the new blocks.
+    ExportFromCurrentBlock(SV);
+  }
+
+  // Create a CaseBlock record representing a conditional branch to
+  // the LHS node if the value being switched on SV is less than C.
+  // Otherwise, branch to LHS.
+  CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
+
+  if (CR.CaseBB == SwitchBB)
+    visitSwitchCase(CB, SwitchBB);
+  else
+    SwitchCases.push_back(CB);
+
+  return true;
+}
+
+/// handleBitTestsSwitchCase - if current case range has few destination and
+/// range span less, than machine word bitwidth, encode case range into series
+/// of masks and emit bit tests with these masks.
+bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
+                                                   CaseRecVector& WorkList,
+                                                   const Value* SV,
+                                                   MachineBasicBlock* Default,
+                                                   MachineBasicBlock *SwitchBB){
+  EVT PTy = TLI.getPointerTy();
+  unsigned IntPtrBits = PTy.getSizeInBits();
+
+  Case& FrontCase = *CR.Range.first;
+  Case& BackCase  = *(CR.Range.second-1);
+
+  // Get the MachineFunction which holds the current MBB.  This is used when
+  // inserting any additional MBBs necessary to represent the switch.
+  MachineFunction *CurMF = FuncInfo.MF;
+
+  // If target does not have legal shift left, do not emit bit tests at all.
+  if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
+    return false;
+
+  size_t numCmps = 0;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second;
+       I!=E; ++I) {
+    // Single case counts one, case range - two.
+    numCmps += (I->Low == I->High ? 1 : 2);
+  }
+
+  // Count unique destinations
+  SmallSet<MachineBasicBlock*, 4> Dests;
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+    Dests.insert(I->BB);
+    if (Dests.size() > 3)
+      // Don't bother the code below, if there are too much unique destinations
+      return false;
+  }
+  DEBUG(dbgs() << "Total number of unique destinations: "
+        << Dests.size() << '\n'
+        << "Total number of comparisons: " << numCmps << '\n');
+
+  // Compute span of values.
+  const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
+  const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
+  APInt cmpRange = maxValue - minValue;
+
+  DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
+               << "Low bound: " << minValue << '\n'
+               << "High bound: " << maxValue << '\n');
+
+  if (cmpRange.uge(IntPtrBits) ||
+      (!(Dests.size() == 1 && numCmps >= 3) &&
+       !(Dests.size() == 2 && numCmps >= 5) &&
+       !(Dests.size() >= 3 && numCmps >= 6)))
+    return false;
+
+  DEBUG(dbgs() << "Emitting bit tests\n");
+  APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
+
+  // Optimize the case where all the case values fit in a
+  // word without having to subtract minValue. In this case,
+  // we can optimize away the subtraction.
+  if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
+    cmpRange = maxValue;
+  } else {
+    lowBound = minValue;
+  }
+
+  CaseBitsVector CasesBits;
+  unsigned i, count = 0;
+
+  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
+    MachineBasicBlock* Dest = I->BB;
+    for (i = 0; i < count; ++i)
+      if (Dest == CasesBits[i].BB)
+        break;
+
+    if (i == count) {
+      assert((count < 3) && "Too much destinations to test!");
+      CasesBits.push_back(CaseBits(0, Dest, 0));
+      count++;
+    }
+
+    const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
+    const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
+
+    uint64_t lo = (lowValue - lowBound).getZExtValue();
+    uint64_t hi = (highValue - lowBound).getZExtValue();
+
+    for (uint64_t j = lo; j <= hi; j++) {
+      CasesBits[i].Mask |=  1ULL << j;
+      CasesBits[i].Bits++;
+    }
+
+  }
+  std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
+
+  BitTestInfo BTC;
+
+  // Figure out which block is immediately after the current one.
+  MachineFunction::iterator BBI = CR.CaseBB;
+  ++BBI;
+
+  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
+
+  DEBUG(dbgs() << "Cases:\n");
+  for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
+    DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
+                 << ", Bits: " << CasesBits[i].Bits
+                 << ", BB: " << CasesBits[i].BB << '\n');
+
+    MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
+    CurMF->insert(BBI, CaseBB);
+    BTC.push_back(BitTestCase(CasesBits[i].Mask,
+                              CaseBB,
+                              CasesBits[i].BB));
+
+    // Put SV in a virtual register to make it available from the new blocks.
+    ExportFromCurrentBlock(SV);
+  }
+
+  BitTestBlock BTB(lowBound, cmpRange, SV,
+                   -1U, MVT::Other, (CR.CaseBB == SwitchBB),
+                   CR.CaseBB, Default, BTC);
+
+  if (CR.CaseBB == SwitchBB)
+    visitBitTestHeader(BTB, SwitchBB);
+
+  BitTestCases.push_back(BTB);
+
+  return true;
+}
+
+/// Clusterify - Transform simple list of Cases into list of CaseRange's
+size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
+                                       const SwitchInst& SI) {
+  size_t numCmps = 0;
+
+  // Start with "simple" cases
+  for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
+    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
+    Cases.push_back(Case(SI.getSuccessorValue(i),
+                         SI.getSuccessorValue(i),
+                         SMBB));
+  }
+  std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+  // Merge case into clusters
+  if (Cases.size() >= 2)
+    // Must recompute end() each iteration because it may be
+    // invalidated by erase if we hold on to it
+    for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+         J != Cases.end(); ) {
+      const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
+      const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
+      MachineBasicBlock* nextBB = J->BB;
+      MachineBasicBlock* currentBB = I->BB;
+
+      // If the two neighboring cases go to the same destination, merge them
+      // into a single case.
+      if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
+        I->High = J->High;
+        J = Cases.erase(J);
+      } else {
+        I = J++;
+      }
+    }
+
+  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+    if (I->Low != I->High)
+      // A range counts double, since it requires two compares.
+      ++numCmps;
+  }
+
+  return numCmps;
+}
+
+void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
+                                           MachineBasicBlock *Last) {
+  // Update JTCases.
+  for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
+    if (JTCases[i].first.HeaderBB == First)
+      JTCases[i].first.HeaderBB = Last;
+
+  // Update BitTestCases.
+  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
+    if (BitTestCases[i].Parent == First)
+      BitTestCases[i].Parent = Last;
+}
+
+void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
+  MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
+
+  // Figure out which block is immediately after the current one.
+  MachineBasicBlock *NextBlock = 0;
+  MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
+
+  // If there is only the default destination, branch to it if it is not the
+  // next basic block.  Otherwise, just fall through.
+  if (SI.getNumOperands() == 2) {
+    // Update machine-CFG edges.
+
+    // If this is not a fall-through branch, emit the branch.
+    SwitchMBB->addSuccessor(Default);
+    if (Default != NextBlock)
+      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
+                              MVT::Other, getControlRoot(),
+                              DAG.getBasicBlock(Default)));
+
+    return;
+  }
+
+  // If there are any non-default case statements, create a vector of Cases
+  // representing each one, and sort the vector so that we can efficiently
+  // create a binary search tree from them.
+  CaseVector Cases;
+  size_t numCmps = Clusterify(Cases, SI);
+  DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+               << ". Total compares: " << numCmps << '\n');
+  numCmps = 0;
+
+  // Get the Value to be switched on and default basic blocks, which will be
+  // inserted into CaseBlock records, representing basic blocks in the binary
+  // search tree.
+  const Value *SV = SI.getOperand(0);
+
+  // Push the initial CaseRec onto the worklist
+  CaseRecVector WorkList;
+  WorkList.push_back(CaseRec(SwitchMBB,0,0,
+                             CaseRange(Cases.begin(),Cases.end())));
+
+  while (!WorkList.empty()) {
+    // Grab a record representing a case range to process off the worklist
+    CaseRec CR = WorkList.back();
+    WorkList.pop_back();
+
+    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
+      continue;
+
+    // If the range has few cases (two or less) emit a series of specific
+    // tests.
+    if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
+      continue;
+
+    // If the switch has more than 5 blocks, and at least 40% dense, and the
+    // target supports indirect branches, then emit a jump table rather than
+    // lowering the switch to a binary tree of conditional branches.
+    if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
+      continue;
+
+    // Emit binary tree. We need to pick a pivot, and push left and right ranges
+    // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
+    handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB);
+  }
+}
+
+void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
+  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
+
+  // Update machine-CFG edges with unique successors.
+  SmallVector<BasicBlock*, 32> succs;
+  succs.reserve(I.getNumSuccessors());
+  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
+    succs.push_back(I.getSuccessor(i));
+  array_pod_sort(succs.begin(), succs.end());
+  succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
+  for (unsigned i = 0, e = succs.size(); i != e; ++i)
+    IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
+
+  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
+                          MVT::Other, getControlRoot(),
+                          getValue(I.getAddress())));
+}
+
+void SelectionDAGBuilder::visitFSub(const User &I) {
+  // -0.0 - X --> fneg
+  const Type *Ty = I.getType();
+  if (isa<Constant>(I.getOperand(0)) &&
+      I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
+    SDValue Op2 = getValue(I.getOperand(1));
+    setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+                             Op2.getValueType(), Op2));
+    return;
+  }
+
+  visitBinary(I, ISD::FSUB);
+}
+
+void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+  setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
+                           Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+
+  MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
+
+  // Coerce the shift amount to the right type if we can.
+  if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
+    unsigned ShiftSize = ShiftTy.getSizeInBits();
+    unsigned Op2Size = Op2.getValueType().getSizeInBits();
+    DebugLoc DL = getCurDebugLoc();
+
+    // If the operand is smaller than the shift count type, promote it.
+    if (ShiftSize > Op2Size)
+      Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
+
+    // If the operand is larger than the shift count type but the shift
+    // count type has enough bits to represent any shift value, truncate
+    // it now. This is a common case and it exposes the truncate to
+    // optimization early.
+    else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+      Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
+    // Otherwise we'll need to temporarily settle for some other convenient
+    // type.  Type legalization will make adjustments once the shiftee is split.
+    else
+      Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
+  }
+
+  setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
+                           Op1.getValueType(), Op1, Op2));
+}
+
+void SelectionDAGBuilder::visitICmp(const User &I) {
+  ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
+  if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
+    predicate = IC->getPredicate();
+  else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
+    predicate = ICmpInst::Predicate(IC->getPredicate());
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+  ISD::CondCode Opcode = getICmpCondCode(predicate);
+
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
+}
+
+void SelectionDAGBuilder::visitFCmp(const User &I) {
+  FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
+  if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
+    predicate = FC->getPredicate();
+  else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
+    predicate = FCmpInst::Predicate(FC->getPredicate());
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+  ISD::CondCode Condition = getFCmpCondCode(predicate);
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
+}
+
+void SelectionDAGBuilder::visitSelect(const User &I) {
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, I.getType(), ValueVTs);
+  unsigned NumValues = ValueVTs.size();
+  if (NumValues == 0) return;
+
+  SmallVector<SDValue, 4> Values(NumValues);
+  SDValue Cond     = getValue(I.getOperand(0));
+  SDValue TrueVal  = getValue(I.getOperand(1));
+  SDValue FalseVal = getValue(I.getOperand(2));
+
+  for (unsigned i = 0; i != NumValues; ++i)
+    Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
+                          TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
+                            Cond,
+                            SDValue(TrueVal.getNode(),
+                                    TrueVal.getResNo() + i),
+                            SDValue(FalseVal.getNode(),
+                                    FalseVal.getResNo() + i));
+
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&ValueVTs[0], NumValues),
+                           &Values[0], NumValues));
+}
+
+void SelectionDAGBuilder::visitTrunc(const User &I) {
+  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitZExt(const User &I) {
+  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSExt(const User &I) {
+  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
+  // SExt also can't be a cast to bool for same reason. So, nothing much to do
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPTrunc(const User &I) {
+  // FPTrunc is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
+                           DestVT, N, DAG.getIntPtrConstant(0)));
+}
+
+void SelectionDAGBuilder::visitFPExt(const User &I){
+  // FPTrunc is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToUI(const User &I) {
+  // FPToUI is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitFPToSI(const User &I) {
+  // FPToSI is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitUIToFP(const User &I) {
+  // UIToFP is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitSIToFP(const User &I){
+  // SIToFP is never a no-op cast, no need to check
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
+}
+
+void SelectionDAGBuilder::visitPtrToInt(const User &I) {
+  // What to do depends on the size of the integer and the size of the pointer.
+  // We can either truncate, zero extend, or no-op, accordingly.
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitIntToPtr(const User &I) {
+  // What to do depends on the size of the integer and the size of the pointer.
+  // We can either truncate, zero extend, or no-op, accordingly.
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
+}
+
+void SelectionDAGBuilder::visitBitCast(const User &I) {
+  SDValue N = getValue(I.getOperand(0));
+  EVT DestVT = TLI.getValueType(I.getType());
+
+  // BitCast assures us that source and destination are the same size so this is
+  // either a BITCAST or a no-op.
+  if (DestVT != N.getValueType())
+    setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+                             DestVT, N)); // convert types.
+  else
+    setValue(&I, N);            // noop cast.
+}
+
+void SelectionDAGBuilder::visitInsertElement(const User &I) {
+  SDValue InVec = getValue(I.getOperand(0));
+  SDValue InVal = getValue(I.getOperand(1));
+  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+                              TLI.getPointerTy(),
+                              getValue(I.getOperand(2)));
+  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
+                           TLI.getValueType(I.getType()),
+                           InVec, InVal, InIdx));
+}
+
+void SelectionDAGBuilder::visitExtractElement(const User &I) {
+  SDValue InVec = getValue(I.getOperand(0));
+  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
+                              TLI.getPointerTy(),
+                              getValue(I.getOperand(1)));
+  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+                           TLI.getValueType(I.getType()), InVec, InIdx));
+}
+
+// Utility for visitShuffleVector - Returns true if the mask is mask starting
+// from SIndx and increasing to the element length (undefs are allowed).
+static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
+  unsigned MaskNumElts = Mask.size();
+  for (unsigned i = 0; i != MaskNumElts; ++i)
+    if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
+      return false;
+  return true;
+}
+
+void SelectionDAGBuilder::visitShuffleVector(const User &I) {
+  SmallVector<int, 8> Mask;
+  SDValue Src1 = getValue(I.getOperand(0));
+  SDValue Src2 = getValue(I.getOperand(1));
+
+  // Convert the ConstantVector mask operand into an array of ints, with -1
+  // representing undef values.
+  SmallVector<Constant*, 8> MaskElts;
+  cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
+  unsigned MaskNumElts = MaskElts.size();
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    if (isa<UndefValue>(MaskElts[i]))
+      Mask.push_back(-1);
+    else
+      Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
+  }
+
+  EVT VT = TLI.getValueType(I.getType());
+  EVT SrcVT = Src1.getValueType();
+  unsigned SrcNumElts = SrcVT.getVectorNumElements();
+
+  if (SrcNumElts == MaskNumElts) {
+    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+                                      &Mask[0]));
+    return;
+  }
+
+  // Normalize the shuffle vector since mask and vector length don't match.
+  if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
+    // Mask is longer than the source vectors and is a multiple of the source
+    // vectors.  We can use concatenate vector to make the mask and vectors
+    // lengths match.
+    if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
+      // The shuffle is concatenating two vectors together.
+      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
+                               VT, Src1, Src2));
+      return;
+    }
+
+    // Pad both vectors with undefs to make them the same length as the mask.
+    unsigned NumConcat = MaskNumElts / SrcNumElts;
+    bool Src1U = Src1.getOpcode() == ISD::UNDEF;
+    bool Src2U = Src2.getOpcode() == ISD::UNDEF;
+    SDValue UndefVal = DAG.getUNDEF(SrcVT);
+
+    SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
+    SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
+    MOps1[0] = Src1;
+    MOps2[0] = Src2;
+
+    Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+                                                  getCurDebugLoc(), VT,
+                                                  &MOps1[0], NumConcat);
+    Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
+                                                  getCurDebugLoc(), VT,
+                                                  &MOps2[0], NumConcat);
+
+    // Readjust mask for new input vector length.
+    SmallVector<int, 8> MappedOps;
+    for (unsigned i = 0; i != MaskNumElts; ++i) {
+      int Idx = Mask[i];
+      if (Idx < (int)SrcNumElts)
+        MappedOps.push_back(Idx);
+      else
+        MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
+    }
+
+    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+                                      &MappedOps[0]));
+    return;
+  }
+
+  if (SrcNumElts > MaskNumElts) {
+    // Analyze the access pattern of the vector to see if we can extract
+    // two subvectors and do the shuffle. The analysis is done by calculating
+    // the range of elements the mask access on both vectors.
+    int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
+    int MaxRange[2] = {-1, -1};
+
+    for (unsigned i = 0; i != MaskNumElts; ++i) {
+      int Idx = Mask[i];
+      int Input = 0;
+      if (Idx < 0)
+        continue;
+
+      if (Idx >= (int)SrcNumElts) {
+        Input = 1;
+        Idx -= SrcNumElts;
+      }
+      if (Idx > MaxRange[Input])
+        MaxRange[Input] = Idx;
+      if (Idx < MinRange[Input])
+        MinRange[Input] = Idx;
+    }
+
+    // Check if the access is smaller than the vector size and can we find
+    // a reasonable extract index.
+    int RangeUse[2] = { 2, 2 };  // 0 = Unused, 1 = Extract, 2 = Can not
+                                 // Extract.
+    int StartIdx[2];  // StartIdx to extract from
+    for (int Input=0; Input < 2; ++Input) {
+      if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
+        RangeUse[Input] = 0; // Unused
+        StartIdx[Input] = 0;
+      } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
+        // Fits within range but we should see if we can find a good
+        // start index that is a multiple of the mask length.
+        if (MaxRange[Input] < (int)MaskNumElts) {
+          RangeUse[Input] = 1; // Extract from beginning of the vector
+          StartIdx[Input] = 0;
+        } else {
+          StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
+          if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
+              StartIdx[Input] + MaskNumElts <= SrcNumElts)
+            RangeUse[Input] = 1; // Extract from a multiple of the mask length.
+        }
+      }
+    }
+
+    if (RangeUse[0] == 0 && RangeUse[1] == 0) {
+      setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
+      return;
+    }
+    else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
+      // Extract appropriate subvector and generate a vector shuffle
+      for (int Input=0; Input < 2; ++Input) {
+        SDValue &Src = Input == 0 ? Src1 : Src2;
+        if (RangeUse[Input] == 0)
+          Src = DAG.getUNDEF(VT);
+        else
+          Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
+                            Src, DAG.getIntPtrConstant(StartIdx[Input]));
+      }
+
+      // Calculate new mask.
+      SmallVector<int, 8> MappedOps;
+      for (unsigned i = 0; i != MaskNumElts; ++i) {
+        int Idx = Mask[i];
+        if (Idx < 0)
+          MappedOps.push_back(Idx);
+        else if (Idx < (int)SrcNumElts)
+          MappedOps.push_back(Idx - StartIdx[0]);
+        else
+          MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
+      }
+
+      setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
+                                        &MappedOps[0]));
+      return;
+    }
+  }
+
+  // We can't use either concat vectors or extract subvectors so fall back to
+  // replacing the shuffle with extract and build vector.
+  // to insert and build vector.
+  EVT EltVT = VT.getVectorElementType();
+  EVT PtrVT = TLI.getPointerTy();
+  SmallVector<SDValue,8> Ops;
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    if (Mask[i] < 0) {
+      Ops.push_back(DAG.getUNDEF(EltVT));
+    } else {
+      int Idx = Mask[i];
+      SDValue Res;
+
+      if (Idx < (int)SrcNumElts)
+        Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+                          EltVT, Src1, DAG.getConstant(Idx, PtrVT));
+      else
+        Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
+                          EltVT, Src2,
+                          DAG.getConstant(Idx - SrcNumElts, PtrVT));
+
+      Ops.push_back(Res);
+    }
+  }
+
+  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
+                           VT, &Ops[0], Ops.size()));
+}
+
+void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
+  const Value *Op0 = I.getOperand(0);
+  const Value *Op1 = I.getOperand(1);
+  const Type *AggTy = I.getType();
+  const Type *ValTy = Op1->getType();
+  bool IntoUndef = isa<UndefValue>(Op0);
+  bool FromUndef = isa<UndefValue>(Op1);
+
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
+
+  SmallVector<EVT, 4> AggValueVTs;
+  ComputeValueVTs(TLI, AggTy, AggValueVTs);
+  SmallVector<EVT, 4> ValValueVTs;
+  ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+  unsigned NumAggValues = AggValueVTs.size();
+  unsigned NumValValues = ValValueVTs.size();
+  SmallVector<SDValue, 4> Values(NumAggValues);
+
+  SDValue Agg = getValue(Op0);
+  SDValue Val = getValue(Op1);
+  unsigned i = 0;
+  // Copy the beginning value(s) from the original aggregate.
+  for (; i != LinearIndex; ++i)
+    Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+                SDValue(Agg.getNode(), Agg.getResNo() + i);
+  // Copy values from the inserted value(s).
+  for (; i != LinearIndex + NumValValues; ++i)
+    Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+                SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
+  // Copy remaining value(s) from the original aggregate.
+  for (; i != NumAggValues; ++i)
+    Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
+                SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&AggValueVTs[0], NumAggValues),
+                           &Values[0], NumAggValues));
+}
+
+void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
+  const Value *Op0 = I.getOperand(0);
+  const Type *AggTy = Op0->getType();
+  const Type *ValTy = I.getType();
+  bool OutOfUndef = isa<UndefValue>(Op0);
+
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
+
+  SmallVector<EVT, 4> ValValueVTs;
+  ComputeValueVTs(TLI, ValTy, ValValueVTs);
+
+  unsigned NumValValues = ValValueVTs.size();
+  SmallVector<SDValue, 4> Values(NumValValues);
+
+  SDValue Agg = getValue(Op0);
+  // Copy out the selected value(s).
+  for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
+    Values[i - LinearIndex] =
+      OutOfUndef ?
+        DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
+        SDValue(Agg.getNode(), Agg.getResNo() + i);
+
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&ValValueVTs[0], NumValValues),
+                           &Values[0], NumValValues));
+}
+
+void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
+  SDValue N = getValue(I.getOperand(0));
+  const Type *Ty = I.getOperand(0)->getType();
+
+  for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
+       OI != E; ++OI) {
+    const Value *Idx = *OI;
+    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
+      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
+      if (Field) {
+        // N = N + Offset
+        uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
+        N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+                        DAG.getIntPtrConstant(Offset));
+      }
+
+      Ty = StTy->getElementType(Field);
+    } else {
+      Ty = cast<SequentialType>(Ty)->getElementType();
+
+      // If this is a constant subscript, handle it quickly.
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
+        if (CI->isZero()) continue;
+        uint64_t Offs =
+            TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
+        SDValue OffsVal;
+        EVT PTy = TLI.getPointerTy();
+        unsigned PtrBits = PTy.getSizeInBits();
+        if (PtrBits < 64)
+          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
+                                TLI.getPointerTy(),
+                                DAG.getConstant(Offs, MVT::i64));
+        else
+          OffsVal = DAG.getIntPtrConstant(Offs);
+
+        N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
+                        OffsVal);
+        continue;
+      }
+
+      // N = N + Idx * ElementSize;
+      APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
+                                TD->getTypeAllocSize(Ty));
+      SDValue IdxN = getValue(Idx);
+
+      // If the index is smaller or larger than intptr_t, truncate or extend
+      // it.
+      IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
+
+      // If this is a multiply by a power of two, turn it into a shl
+      // immediately.  This is a very common case.
+      if (ElementSize != 1) {
+        if (ElementSize.isPowerOf2()) {
+          unsigned Amt = ElementSize.logBase2();
+          IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+                             N.getValueType(), IdxN,
+                             DAG.getConstant(Amt, TLI.getPointerTy()));
+        } else {
+          SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
+          IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
+                             N.getValueType(), IdxN, Scale);
+        }
+      }
+
+      N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                      N.getValueType(), N, IdxN);
+    }
+  }
+
+  setValue(&I, N);
+}
+
+void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
+  // If this is a fixed sized alloca in the entry block of the function,
+  // allocate it statically on the stack.
+  if (FuncInfo.StaticAllocaMap.count(&I))
+    return;   // getValue will auto-populate this.
+
+  const Type *Ty = I.getAllocatedType();
+  uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+  unsigned Align =
+    std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
+             I.getAlignment());
+
+  SDValue AllocSize = getValue(I.getArraySize());
+
+  EVT IntPtr = TLI.getPointerTy();
+  if (AllocSize.getValueType() != IntPtr)
+    AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
+
+  AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr,
+                          AllocSize,
+                          DAG.getConstant(TySize, IntPtr));
+
+  // Handle alignment.  If the requested alignment is less than or equal to
+  // the stack alignment, ignore it.  If the size is greater than or equal to
+  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
+  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
+  if (Align <= StackAlign)
+    Align = 0;
+
+  // Round the size of the allocation up to the stack alignment size
+  // by add SA-1 to the size.
+  AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                          AllocSize.getValueType(), AllocSize,
+                          DAG.getIntPtrConstant(StackAlign-1));
+
+  // Mask out the low bits for alignment purposes.
+  AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
+                          AllocSize.getValueType(), AllocSize,
+                          DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
+
+  SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
+  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
+  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
+                            VTs, Ops, 3);
+  setValue(&I, DSA);
+  DAG.setRoot(DSA.getValue(1));
+
+  // Inform the Frame Information that we have just allocated a variable-sized
+  // object.
+  FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1);
+}
+
+void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
+  const Value *SV = I.getOperand(0);
+  SDValue Ptr = getValue(SV);
+
+  const Type *Ty = I.getType();
+
+  bool isVolatile = I.isVolatile();
+  bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+  unsigned Alignment = I.getAlignment();
+  const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+  SmallVector<EVT, 4> ValueVTs;
+  SmallVector<uint64_t, 4> Offsets;
+  ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
+  unsigned NumValues = ValueVTs.size();
+  if (NumValues == 0)
+    return;
+
+  SDValue Root;
+  bool ConstantMemory = false;
+  if (I.isVolatile() || NumValues > MaxParallelChains)
+    // Serialize volatile loads with other side effects.
+    Root = getRoot();
+  else if (AA->pointsToConstantMemory(
+             AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {
+    // Do not serialize (non-volatile) loads of constant memory with anything.
+    Root = DAG.getEntryNode();
+    ConstantMemory = true;
+  } else {
+    // Do not serialize non-volatile loads against each other.
+    Root = DAG.getRoot();
+  }
+
+  SmallVector<SDValue, 4> Values(NumValues);
+  SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+                                          NumValues));
+  EVT PtrVT = Ptr.getValueType();
+  unsigned ChainI = 0;
+  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+    // Serializing loads here may result in excessive register pressure, and
+    // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
+    // could recover a bit by hoisting nodes upward in the chain by recognizing
+    // they are side-effect free or do not alias. The optimizer should really
+    // avoid this case by converting large object/array copies to llvm.memcpy
+    // (MaxParallelChains should always remain as failsafe).
+    if (ChainI == MaxParallelChains) {
+      assert(PendingLoads.empty() && "PendingLoads must be serialized first");
+      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                  MVT::Other, &Chains[0], ChainI);
+      Root = Chain;
+      ChainI = 0;
+    }
+    SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
+                            PtrVT, Ptr,
+                            DAG.getConstant(Offsets[i], PtrVT));
+    SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
+                            A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
+                            isNonTemporal, Alignment, TBAAInfo);
+
+    Values[i] = L;
+    Chains[ChainI] = L.getValue(1);
+  }
+
+  if (!ConstantMemory) {
+    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                MVT::Other, &Chains[0], ChainI);
+    if (isVolatile)
+      DAG.setRoot(Chain);
+    else
+      PendingLoads.push_back(Chain);
+  }
+
+  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                           DAG.getVTList(&ValueVTs[0], NumValues),
+                           &Values[0], NumValues));
+}
+
+void SelectionDAGBuilder::visitStore(const StoreInst &I) {
+  const Value *SrcV = I.getOperand(0);
+  const Value *PtrV = I.getOperand(1);
+
+  SmallVector<EVT, 4> ValueVTs;
+  SmallVector<uint64_t, 4> Offsets;
+  ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
+  unsigned NumValues = ValueVTs.size();
+  if (NumValues == 0)
+    return;
+
+  // Get the lowered operands. Note that we do this after
+  // checking if NumResults is zero, because with zero results
+  // the operands won't have values in the map.
+  SDValue Src = getValue(SrcV);
+  SDValue Ptr = getValue(PtrV);
+
+  SDValue Root = getRoot();
+  SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+                                          NumValues));
+  EVT PtrVT = Ptr.getValueType();
+  bool isVolatile = I.isVolatile();
+  bool isNonTemporal = I.getMetadata("nontemporal") != 0;
+  unsigned Alignment = I.getAlignment();
+  const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+  unsigned ChainI = 0;
+  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+    // See visitLoad comments.
+    if (ChainI == MaxParallelChains) {
+      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                  MVT::Other, &Chains[0], ChainI);
+      Root = Chain;
+      ChainI = 0;
+    }
+    SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
+                              DAG.getConstant(Offsets[i], PtrVT));
+    SDValue St = DAG.getStore(Root, getCurDebugLoc(),
+                              SDValue(Src.getNode(), Src.getResNo() + i),
+                              Add, MachinePointerInfo(PtrV, Offsets[i]),
+                              isVolatile, isNonTemporal, Alignment, TBAAInfo);
+    Chains[ChainI] = St;
+  }
+
+  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                  MVT::Other, &Chains[0], ChainI);
+  ++SDNodeOrder;
+  AssignOrderingToNode(StoreNode.getNode());
+  DAG.setRoot(StoreNode);
+}
+
+/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
+/// node.
+void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
+                                               unsigned Intrinsic) {
+  bool HasChain = !I.doesNotAccessMemory();
+  bool OnlyLoad = HasChain && I.onlyReadsMemory();
+
+  // Build the operand list.
+  SmallVector<SDValue, 8> Ops;
+  if (HasChain) {  // If this intrinsic has side-effects, chainify it.
+    if (OnlyLoad) {
+      // We don't need to serialize loads against other loads.
+      Ops.push_back(DAG.getRoot());
+    } else {
+      Ops.push_back(getRoot());
+    }
+  }
+
+  // Info is set by getTgtMemInstrinsic
+  TargetLowering::IntrinsicInfo Info;
+  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+
+  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
+  if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
+      Info.opc == ISD::INTRINSIC_W_CHAIN)
+    Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
+
+  // Add all operands of the call to the operand list.
+  for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+    SDValue Op = getValue(I.getArgOperand(i));
+    assert(TLI.isTypeLegal(Op.getValueType()) &&
+           "Intrinsic uses a non-legal type?");
+    Ops.push_back(Op);
+  }
+
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, I.getType(), ValueVTs);
+#ifndef NDEBUG
+  for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
+    assert(TLI.isTypeLegal(ValueVTs[Val]) &&
+           "Intrinsic uses a non-legal type?");
+  }
+#endif // NDEBUG
+
+  if (HasChain)
+    ValueVTs.push_back(MVT::Other);
+
+  SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
+
+  // Create the node.
+  SDValue Result;
+  if (IsTgtIntrinsic) {
+    // This is target intrinsic that touches memory
+    Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
+                                     VTs, &Ops[0], Ops.size(),
+                                     Info.memVT,
+                                   MachinePointerInfo(Info.ptrVal, Info.offset),
+                                     Info.align, Info.vol,
+                                     Info.readMem, Info.writeMem);
+  } else if (!HasChain) {
+    Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
+                         VTs, &Ops[0], Ops.size());
+  } else if (!I.getType()->isVoidTy()) {
+    Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
+                         VTs, &Ops[0], Ops.size());
+  } else {
+    Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
+                         VTs, &Ops[0], Ops.size());
+  }
+
+  if (HasChain) {
+    SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
+    if (OnlyLoad)
+      PendingLoads.push_back(Chain);
+    else
+      DAG.setRoot(Chain);
+  }
+
+  if (!I.getType()->isVoidTy()) {
+    if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
+      EVT VT = TLI.getValueType(PTy);
+      Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
+    }
+
+    setValue(&I, Result);
+  }
+}
+
+/// GetSignificand - Get the significand and build it into a floating-point
+/// number with exponent of 1:
+///
+///   Op = (Op & 0x007fffff) | 0x3f800000;
+///
+/// where Op is the hexidecimal representation of floating point value.
+static SDValue
+GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
+  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+                           DAG.getConstant(0x007fffff, MVT::i32));
+  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
+                           DAG.getConstant(0x3f800000, MVT::i32));
+  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
+}
+
+/// GetExponent - Get the exponent:
+///
+///   (float)(int)(((Op & 0x7f800000) >> 23) - 127);
+///
+/// where Op is the hexidecimal representation of floating point value.
+static SDValue
+GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
+            DebugLoc dl) {
+  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
+                           DAG.getConstant(0x7f800000, MVT::i32));
+  SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
+                           DAG.getConstant(23, TLI.getPointerTy()));
+  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
+                           DAG.getConstant(127, MVT::i32));
+  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
+}
+
+/// getF32Constant - Get 32-bit floating point constant.
+static SDValue
+getF32Constant(SelectionDAG &DAG, unsigned Flt) {
+  return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
+}
+
+/// Inlined utility function to implement binary input atomic intrinsics for
+/// visitIntrinsicCall: I is a call instruction
+///                     Op is the associated NodeType for I
+const char *
+SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
+                                           ISD::NodeType Op) {
+  SDValue Root = getRoot();
+  SDValue L =
+    DAG.getAtomic(Op, getCurDebugLoc(),
+                  getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
+                  Root,
+                  getValue(I.getArgOperand(0)),
+                  getValue(I.getArgOperand(1)),
+                  I.getArgOperand(0));
+  setValue(&I, L);
+  DAG.setRoot(L.getValue(1));
+  return 0;
+}
+
+// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
+const char *
+SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
+  SDValue Op1 = getValue(I.getArgOperand(0));
+  SDValue Op2 = getValue(I.getArgOperand(1));
+
+  SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+  setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
+  return 0;
+}
+
+/// visitExp - Lower an exp intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitExp(const CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getArgOperand(0));
+
+    // Put the exponent in the right bit position for later addition to the
+    // final result:
+    //
+    //   #define LOG2OFe 1.4426950f
+    //   IntegerPartOfX = ((int32_t)(X * LOG2OFe));
+    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+                             getF32Constant(DAG, 0x3fb8aa3b));
+    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+    //   FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
+    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+    //   IntegerPartOfX <<= 23;
+    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+                                 DAG.getConstant(23, TLI.getPointerTy()));
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.997535578f +
+      //       (0.735607626f + 0.252464424f * x) * x;
+      //
+      // error 0.0144103317, which is 6 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3e814304));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f3c50c8));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f7f5e7e));
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5);
+
+      // Add the exponent into the result in integer domain.
+      SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                               TwoToFracPartOfX, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999892986f +
+      //       (0.696457318f +
+      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
+      //
+      // 0.000107046256 error, which is 13 to 14 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3da235e3));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3e65b8f3));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f324b07));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3f7ff8fd));
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7);
+
+      // Add the exponent into the result in integer domain.
+      SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                               TwoToFracPartOfX, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999999982f +
+      //       (0.693148872f +
+      //         (0.240227044f +
+      //           (0.554906021e-1f +
+      //             (0.961591928e-2f +
+      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+      //
+      // error 2.47208000*10^(-7), which is better than 18 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3924b03e));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3ab24b87));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3c1d8c17));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3d634a1d));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x3e75fe14));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+                                getF32Constant(DAG, 0x3f317234));
+      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                getF32Constant(DAG, 0x3f800000));
+      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,
+                                             MVT::i32, t13);
+
+      // Add the exponent into the result in integer domain.
+      SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                TwoToFracPartOfX, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FEXP, dl,
+                         getValue(I.getArgOperand(0)).getValueType(),
+                         getValue(I.getArgOperand(0)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitLog - Lower a log intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog(const CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getArgOperand(0));
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+    // Scale the exponent by log(2) [0.69314718f].
+    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+                                        getF32Constant(DAG, 0x3f317218));
+
+    // Get the significand and build it into a floating-point number with
+    // exponent of 1.
+    SDValue X = GetSignificand(DAG, Op1, dl);
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   LogofMantissa =
+      //     -1.1609546f +
+      //       (1.4034025f - 0.23903021f * x) * x;
+      //
+      // error 0.0034276066, which is better than 8 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbe74c456));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3fb3a2b1));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                          getF32Constant(DAG, 0x3f949a29));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, LogOfMantissa);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   LogOfMantissa =
+      //     -1.7417939f +
+      //       (2.8212026f +
+      //         (-1.4699568f +
+      //           (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
+      //
+      // error 0.000061011436, which is 14 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbd67b6d6));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3ee4f4b8));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3fbc278b));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x40348e95));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                                          getF32Constant(DAG, 0x3fdef31a));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, LogOfMantissa);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   LogOfMantissa =
+      //     -2.1072184f +
+      //       (4.2372794f +
+      //         (-3.7029485f +
+      //           (2.2781945f +
+      //             (-0.87823314f +
+      //               (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
+      //
+      // error 0.0000023660568, which is better than 18 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbc91e5ac));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3e4350aa));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f60d3e3));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x4011cdf0));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x406cfd1c));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x408797cb));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+                                          getF32Constant(DAG, 0x4006dcab));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, LogOfMantissa);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FLOG, dl,
+                         getValue(I.getArgOperand(0)).getValueType(),
+                         getValue(I.getArgOperand(0)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog2(const CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getArgOperand(0));
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+    // Get the exponent.
+    SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
+
+    // Get the significand and build it into a floating-point number with
+    // exponent of 1.
+    SDValue X = GetSignificand(DAG, Op1, dl);
+
+    // Different possible minimax approximations of significand in
+    // floating-point for various degrees of accuracy over [1,2].
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
+      //
+      // error 0.0049451742, which is more than 7 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbeb08fe0));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x40019463));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                           getF32Constant(DAG, 0x3fd6633d));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log2ofMantissa);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   Log2ofMantissa =
+      //     -2.51285454f +
+      //       (4.07009056f +
+      //         (-2.12067489f +
+      //           (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
+      //
+      // error 0.0000876136000, which is better than 13 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbda7262e));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3f25280b));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x4007b923));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x40823e2f));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                                           getF32Constant(DAG, 0x4020d29c));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log2ofMantissa);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   Log2ofMantissa =
+      //     -3.0400495f +
+      //       (6.1129976f +
+      //         (-5.3420409f +
+      //           (3.2865683f +
+      //             (-1.2669343f +
+      //               (0.27515199f -
+      //                 0.25691327e-1f * x) * x) * x) * x) * x) * x;
+      //
+      // error 0.0000018516, which is better than 18 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbcd2769e));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3e8ce0b9));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3fa22ae7));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x40525723));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x40aaf200));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x40c39dad));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
+                                           getF32Constant(DAG, 0x4042902c));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log2ofMantissa);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FLOG2, dl,
+                         getValue(I.getArgOperand(0)).getValueType(),
+                         getValue(I.getArgOperand(0)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitLog10(const CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getArgOperand(0));
+    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+
+    // Scale the exponent by log10(2) [0.30102999f].
+    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
+    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+                                        getF32Constant(DAG, 0x3e9a209a));
+
+    // Get the significand and build it into a floating-point number with
+    // exponent of 1.
+    SDValue X = GetSignificand(DAG, Op1, dl);
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   Log10ofMantissa =
+      //     -0.50419619f +
+      //       (0.60948995f - 0.10380950f * x) * x;
+      //
+      // error 0.0014886165, which is 6 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0xbdd49a13));
+      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3f1c0789));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
+                                            getF32Constant(DAG, 0x3f011300));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log10ofMantissa);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   Log10ofMantissa =
+      //     -0.64831180f +
+      //       (0.91751397f +
+      //         (-0.31664806f + 0.47637168e-1f * x) * x) * x;
+      //
+      // error 0.00019228036, which is better than 12 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3d431f31));
+      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3ea21fb2));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f6ae232));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+                                            getF32Constant(DAG, 0x3f25f7c3));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log10ofMantissa);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   Log10ofMantissa =
+      //     -0.84299375f +
+      //       (1.5327582f +
+      //         (-1.0688956f +
+      //           (0.49102474f +
+      //             (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
+      //
+      // error 0.0000037995730, which is better than 18 bits
+      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3c5d51ce));
+      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
+                               getF32Constant(DAG, 0x3e00685a));
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3efb6798));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f88d192));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3fc4316c));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
+                                            getF32Constant(DAG, 0x3f57ce70));
+
+      result = DAG.getNode(ISD::FADD, dl,
+                           MVT::f32, LogOfExponent, Log10ofMantissa);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FLOG10, dl,
+                         getValue(I.getArgOperand(0)).getValueType(),
+                         getValue(I.getArgOperand(0)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
+/// limited-precision mode.
+void
+SelectionDAGBuilder::visitExp2(const CallInst &I) {
+  SDValue result;
+  DebugLoc dl = getCurDebugLoc();
+
+  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getArgOperand(0));
+
+    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
+
+    //   FractionalPartOfX = x - (float)IntegerPartOfX;
+    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
+
+    //   IntegerPartOfX <<= 23;
+    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+                                 DAG.getConstant(23, TLI.getPointerTy()));
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.997535578f +
+      //       (0.735607626f + 0.252464424f * x) * x;
+      //
+      // error 0.0144103317, which is 6 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3e814304));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f3c50c8));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f7f5e7e));
+      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999892986f +
+      //       (0.696457318f +
+      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
+      //
+      // error 0.000107046256, which is 13 to 14 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3da235e3));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3e65b8f3));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f324b07));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3f7ff8fd));
+      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999999982f +
+      //       (0.693148872f +
+      //         (0.240227044f +
+      //           (0.554906021e-1f +
+      //             (0.961591928e-2f +
+      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+      // error 2.47208000*10^(-7), which is better than 18 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3924b03e));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3ab24b87));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3c1d8c17));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3d634a1d));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x3e75fe14));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+                                getF32Constant(DAG, 0x3f317234));
+      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                getF32Constant(DAG, 0x3f800000));
+      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FEXP2, dl,
+                         getValue(I.getArgOperand(0)).getValueType(),
+                         getValue(I.getArgOperand(0)));
+  }
+
+  setValue(&I, result);
+}
+
+/// visitPow - Lower a pow intrinsic. Handles the special sequences for
+/// limited-precision mode with x == 10.0f.
+void
+SelectionDAGBuilder::visitPow(const CallInst &I) {
+  SDValue result;
+  const Value *Val = I.getArgOperand(0);
+  DebugLoc dl = getCurDebugLoc();
+  bool IsExp10 = false;
+
+  if (getValue(Val).getValueType() == MVT::f32 &&
+      getValue(I.getArgOperand(1)).getValueType() == MVT::f32 &&
+      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
+      if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+        APFloat Ten(10.0f);
+        IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
+      }
+    }
+  }
+
+  if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
+    SDValue Op = getValue(I.getArgOperand(1));
+
+    // Put the exponent in the right bit position for later addition to the
+    // final result:
+    //
+    //   #define LOG2OF10 3.3219281f
+    //   IntegerPartOfX = (int32_t)(x * LOG2OF10);
+    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
+                             getF32Constant(DAG, 0x40549a78));
+    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
+
+    //   FractionalPartOfX = x - (float)IntegerPartOfX;
+    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
+    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
+
+    //   IntegerPartOfX <<= 23;
+    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
+                                 DAG.getConstant(23, TLI.getPointerTy()));
+
+    if (LimitFloatPrecision <= 6) {
+      // For floating-point precision of 6:
+      //
+      //   twoToFractionalPartOfX =
+      //     0.997535578f +
+      //       (0.735607626f + 0.252464424f * x) * x;
+      //
+      // error 0.0144103317, which is 6 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3e814304));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3f3c50c8));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f7f5e7e));
+      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
+      // For floating-point precision of 12:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999892986f +
+      //       (0.696457318f +
+      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
+      //
+      // error 0.000107046256, which is 13 to 14 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3da235e3));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3e65b8f3));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3f324b07));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3f7ff8fd));
+      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
+      // For floating-point precision of 18:
+      //
+      //   TwoToFractionalPartOfX =
+      //     0.999999982f +
+      //       (0.693148872f +
+      //         (0.240227044f +
+      //           (0.554906021e-1f +
+      //             (0.961591928e-2f +
+      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
+      // error 2.47208000*10^(-7), which is better than 18 bits
+      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
+                               getF32Constant(DAG, 0x3924b03e));
+      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
+                               getF32Constant(DAG, 0x3ab24b87));
+      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
+      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
+                               getF32Constant(DAG, 0x3c1d8c17));
+      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
+      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
+                               getF32Constant(DAG, 0x3d634a1d));
+      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
+      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
+                               getF32Constant(DAG, 0x3e75fe14));
+      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
+      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
+                                getF32Constant(DAG, 0x3f317234));
+      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
+      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
+                                getF32Constant(DAG, 0x3f800000));
+      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
+      SDValue TwoToFractionalPartOfX =
+        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
+
+      result = DAG.getNode(ISD::BITCAST, dl,
+                           MVT::f32, TwoToFractionalPartOfX);
+    }
+  } else {
+    // No special expansion.
+    result = DAG.getNode(ISD::FPOW, dl,
+                         getValue(I.getArgOperand(0)).getValueType(),
+                         getValue(I.getArgOperand(0)),
+                         getValue(I.getArgOperand(1)));
+  }
+
+  setValue(&I, result);
+}
+
+
+/// ExpandPowI - Expand a llvm.powi intrinsic.
+static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
+                          SelectionDAG &DAG) {
+  // If RHS is a constant, we can expand this out to a multiplication tree,
+  // otherwise we end up lowering to a call to __powidf2 (for example).  When
+  // optimizing for size, we only want to do this if the expansion would produce
+  // a small number of multiplies, otherwise we do the full expansion.
+  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+    // Get the exponent as a positive value.
+    unsigned Val = RHSC->getSExtValue();
+    if ((int)Val < 0) Val = -Val;
+
+    // powi(x, 0) -> 1.0
+    if (Val == 0)
+      return DAG.getConstantFP(1.0, LHS.getValueType());
+
+    const Function *F = DAG.getMachineFunction().getFunction();
+    if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
+        // If optimizing for size, don't insert too many multiplies.  This
+        // inserts up to 5 multiplies.
+        CountPopulation_32(Val)+Log2_32(Val) < 7) {
+      // We use the simple binary decomposition method to generate the multiply
+      // sequence.  There are more optimal ways to do this (for example,
+      // powi(x,15) generates one more multiply than it should), but this has
+      // the benefit of being both really simple and much better than a libcall.
+      SDValue Res;  // Logically starts equal to 1.0
+      SDValue CurSquare = LHS;
+      while (Val) {
+        if (Val & 1) {
+          if (Res.getNode())
+            Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
+          else
+            Res = CurSquare;  // 1.0*CurSquare.
+        }
+
+        CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
+                                CurSquare, CurSquare);
+        Val >>= 1;
+      }
+
+      // If the original was negative, invert the result, producing 1/(x*x*x).
+      if (RHSC->getSExtValue() < 0)
+        Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
+                          DAG.getConstantFP(1.0, LHS.getValueType()), Res);
+      return Res;
+    }
+  }
+
+  // Otherwise, expand to a libcall.
+  return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
+}
+
+/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
+/// argument, create the corresponding DBG_VALUE machine instruction for it now.
+/// At the end of instruction selection, they will be inserted to the entry BB.
+bool
+SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+                                              int64_t Offset,
+                                              const SDValue &N) {
+  const Argument *Arg = dyn_cast<Argument>(V);
+  if (!Arg)
+    return false;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+
+  // Ignore inlined function arguments here.
+  DIVariable DV(Variable);
+  if (DV.isInlinedFnArgument(MF.getFunction()))
+    return false;
+
+  MachineBasicBlock *MBB = FuncInfo.MBB;
+  if (MBB != &MF.front())
+    return false;
+
+  unsigned Reg = 0;
+  if (Arg->hasByValAttr()) {
+    // Byval arguments' frame index is recorded during argument lowering.
+    // Use this info directly.
+    Reg = TRI->getFrameRegister(MF);
+    Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+    // If byval argument ofset is not recorded then ignore this.
+    if (!Offset)
+      Reg = 0;
+  }
+
+  if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) {
+    Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+      MachineRegisterInfo &RegInfo = MF.getRegInfo();
+      unsigned PR = RegInfo.getLiveInPhysReg(Reg);
+      if (PR)
+        Reg = PR;
+    }
+  }
+
+  if (!Reg) {
+    // Check if ValueMap has reg number.
+    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+    if (VMI != FuncInfo.ValueMap.end())
+      Reg = VMI->second;
+  }
+
+  if (!Reg && N.getNode()) {
+    // Check if frame index is available.
+    if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+      if (FrameIndexSDNode *FINode =
+          dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) {
+        Reg = TRI->getFrameRegister(MF);
+        Offset = FINode->getIndex();
+      }
+  }
+
+  if (!Reg)
+    return false;
+
+  MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
+                                    TII->get(TargetOpcode::DBG_VALUE))
+    .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
+  FuncInfo.ArgDbgValues.push_back(&*MIB);
+  return true;
+}
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                         !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
+#endif
+
+/// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
+/// we want to emit this as a call to a named external function, return the name
+/// otherwise lower it and return null.
+const char *
+SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
+  DebugLoc dl = getCurDebugLoc();
+  SDValue Res;
+
+  switch (Intrinsic) {
+  default:
+    // By default, turn this into a target intrinsic node.
+    visitTargetIntrinsic(I, Intrinsic);
+    return 0;
+  case Intrinsic::vastart:  visitVAStart(I); return 0;
+  case Intrinsic::vaend:    visitVAEnd(I); return 0;
+  case Intrinsic::vacopy:   visitVACopy(I); return 0;
+  case Intrinsic::returnaddress:
+    setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::frameaddress:
+    setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::setjmp:
+    return "_setjmp"+!TLI.usesUnderscoreSetJmp();
+  case Intrinsic::longjmp:
+    return "_longjmp"+!TLI.usesUnderscoreLongJmp();
+  case Intrinsic::memcpy: {
+    // Assert for address < 256 since we support only user defined address
+    // spaces.
+    assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
+           < 256 &&
+           cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
+           < 256 &&
+           "Unknown address space");
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
+    SDValue Op3 = getValue(I.getArgOperand(2));
+    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+    DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
+                              MachinePointerInfo(I.getArgOperand(0)),
+                              MachinePointerInfo(I.getArgOperand(1))));
+    return 0;
+  }
+  case Intrinsic::memset: {
+    // Assert for address < 256 since we support only user defined address
+    // spaces.
+    assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
+           < 256 &&
+           "Unknown address space");
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
+    SDValue Op3 = getValue(I.getArgOperand(2));
+    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+    DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
+                              MachinePointerInfo(I.getArgOperand(0))));
+    return 0;
+  }
+  case Intrinsic::memmove: {
+    // Assert for address < 256 since we support only user defined address
+    // spaces.
+    assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
+           < 256 &&
+           cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
+           < 256 &&
+           "Unknown address space");
+    SDValue Op1 = getValue(I.getArgOperand(0));
+    SDValue Op2 = getValue(I.getArgOperand(1));
+    SDValue Op3 = getValue(I.getArgOperand(2));
+    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
+    DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
+                               MachinePointerInfo(I.getArgOperand(0)),
+                               MachinePointerInfo(I.getArgOperand(1))));
+    return 0;
+  }
+  case Intrinsic::dbg_declare: {
+    const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+    MDNode *Variable = DI.getVariable();
+    const Value *Address = DI.getAddress();
+    if (!Address || !DIVariable(DI.getVariable()).Verify())
+      return 0;
+
+    // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
+    // but do not always have a corresponding SDNode built.  The SDNodeOrder
+    // absolute, but not relative, values are different depending on whether
+    // debug info exists.
+    ++SDNodeOrder;
+
+    // Check if address has undef value.
+    if (isa<UndefValue>(Address) ||
+        (Address->use_empty() && !isa<Argument>(Address))) {
+      DEBUG(dbgs() << "Dropping debug info for " << DI);
+      return 0;
+    }
+
+    SDValue &N = NodeMap[Address];
+    if (!N.getNode() && isa<Argument>(Address))
+      // Check unused arguments map.
+      N = UnusedArgNodeMap[Address];
+    SDDbgValue *SDV;
+    if (N.getNode()) {
+      // Parameters are handled specially.
+      bool isParameter =
+        DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
+      if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
+        Address = BCI->getOperand(0);
+      const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
+
+      if (isParameter && !AI) {
+        FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
+        if (FINode)
+          // Byval parameter.  We have a frame index at this point.
+          SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
+                                0, dl, SDNodeOrder);
+        else {
+          // Can't do anything with other non-AI cases yet.  This might be a
+          // parameter of a callee function that got inlined, for example.
+          DEBUG(dbgs() << "Dropping debug info for " << DI);
+          return 0;
+        }
+      } else if (AI)
+        SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
+                              0, dl, SDNodeOrder);
+      else {
+        // Can't do anything with other non-AI cases yet.
+        DEBUG(dbgs() << "Dropping debug info for " << DI);
+        return 0;
+      }
+      DAG.AddDbgValue(SDV, N.getNode(), isParameter);
+    } else {
+      // If Address is an argument then try to emit its dbg value using
+      // virtual register info from the FuncInfo.ValueMap.
+      if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
+        // If variable is pinned by a alloca in dominating bb then
+        // use StaticAllocaMap.
+        if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+          if (AI->getParent() != DI.getParent()) {
+            DenseMap<const AllocaInst*, int>::iterator SI =
+              FuncInfo.StaticAllocaMap.find(AI);
+            if (SI != FuncInfo.StaticAllocaMap.end()) {
+              SDV = DAG.getDbgValue(Variable, SI->second,
+                                    0, dl, SDNodeOrder);
+              DAG.AddDbgValue(SDV, 0, false);
+              return 0;
+            }
+          }
+        }
+        DEBUG(dbgs() << "Dropping debug info for " << DI);
+      }
+    }
+    return 0;
+  }
+  case Intrinsic::dbg_value: {
+    const DbgValueInst &DI = cast<DbgValueInst>(I);
+    if (!DIVariable(DI.getVariable()).Verify())
+      return 0;
+
+    MDNode *Variable = DI.getVariable();
+    uint64_t Offset = DI.getOffset();
+    const Value *V = DI.getValue();
+    if (!V)
+      return 0;
+
+    // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
+    // but do not always have a corresponding SDNode built.  The SDNodeOrder
+    // absolute, but not relative, values are different depending on whether
+    // debug info exists.
+    ++SDNodeOrder;
+    SDDbgValue *SDV;
+    if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
+      SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
+      DAG.AddDbgValue(SDV, 0, false);
+    } else {
+      // Do not use getValue() in here; we don't want to generate code at
+      // this point if it hasn't been done yet.
+      SDValue N = NodeMap[V];
+      if (!N.getNode() && isa<Argument>(V))
+        // Check unused arguments map.
+        N = UnusedArgNodeMap[V];
+      if (N.getNode()) {
+        if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) {
+          SDV = DAG.getDbgValue(Variable, N.getNode(),
+                                N.getResNo(), Offset, dl, SDNodeOrder);
+          DAG.AddDbgValue(SDV, N.getNode(), false);
+        }
+      } else if (!V->use_empty() ) {
+        // Do not call getValue(V) yet, as we don't want to generate code.
+        // Remember it for later.
+        DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
+        DanglingDebugInfoMap[V] = DDI;
+      } else {
+        // We may expand this to cover more cases.  One case where we have no
+        // data available is an unreferenced parameter.
+        DEBUG(dbgs() << "Dropping debug info for " << DI);
+      }
+    }
+
+    // Build a debug info table entry.
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
+      V = BCI->getOperand(0);
+    const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+    // Don't handle byval struct arguments or VLAs, for example.
+    if (!AI)
+      return 0;
+    DenseMap<const AllocaInst*, int>::iterator SI =
+      FuncInfo.StaticAllocaMap.find(AI);
+    if (SI == FuncInfo.StaticAllocaMap.end())
+      return 0; // VLAs.
+    int FI = SI->second;
+
+    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+    if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
+      MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
+    return 0;
+  }
+  case Intrinsic::eh_exception: {
+    // Insert the EXCEPTIONADDR instruction.
+    assert(FuncInfo.MBB->isLandingPad() &&
+           "Call to eh.exception not in landing pad!");
+    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+    SDValue Ops[1];
+    Ops[0] = DAG.getRoot();
+    SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
+    setValue(&I, Op);
+    DAG.setRoot(Op.getValue(1));
+    return 0;
+  }
+
+  case Intrinsic::eh_selector: {
+    MachineBasicBlock *CallMBB = FuncInfo.MBB;
+    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+    if (CallMBB->isLandingPad())
+      AddCatchInfo(I, &MMI, CallMBB);
+    else {
+#ifndef NDEBUG
+      FuncInfo.CatchInfoLost.insert(&I);
+#endif
+      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
+      unsigned Reg = TLI.getExceptionSelectorRegister();
+      if (Reg) FuncInfo.MBB->addLiveIn(Reg);
+    }
+
+    // Insert the EHSELECTION instruction.
+    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
+    SDValue Ops[2];
+    Ops[0] = getValue(I.getArgOperand(0));
+    Ops[1] = getRoot();
+    SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
+    DAG.setRoot(Op.getValue(1));
+    setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
+    return 0;
+  }
+
+  case Intrinsic::eh_typeid_for: {
+    // Find the type id for the given typeinfo.
+    GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0));
+    unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
+    Res = DAG.getConstant(TypeID, MVT::i32);
+    setValue(&I, Res);
+    return 0;
+  }
+
+  case Intrinsic::eh_return_i32:
+  case Intrinsic::eh_return_i64:
+    DAG.getMachineFunction().getMMI().setCallsEHReturn(true);
+    DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
+                            MVT::Other,
+                            getControlRoot(),
+                            getValue(I.getArgOperand(0)),
+                            getValue(I.getArgOperand(1))));
+    return 0;
+  case Intrinsic::eh_unwind_init:
+    DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
+    return 0;
+  case Intrinsic::eh_dwarf_cfa: {
+    SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl,
+                                        TLI.getPointerTy());
+    SDValue Offset = DAG.getNode(ISD::ADD, dl,
+                                 TLI.getPointerTy(),
+                                 DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
+                                             TLI.getPointerTy()),
+                                 CfaArg);
+    SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl,
+                             TLI.getPointerTy(),
+                             DAG.getConstant(0, TLI.getPointerTy()));
+    setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
+                             FA, Offset));
+    return 0;
+  }
+  case Intrinsic::eh_sjlj_callsite: {
+    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+    ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
+    assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
+    assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
+
+    MMI.setCurrentCallSite(CI->getZExtValue());
+    return 0;
+  }
+  case Intrinsic::eh_sjlj_setjmp: {
+    setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
+  }
+  case Intrinsic::eh_sjlj_longjmp: {
+    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
+                            getRoot(), getValue(I.getArgOperand(0))));
+    return 0;
+  }
+  case Intrinsic::eh_sjlj_dispatch_setup: {
+    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+                            getRoot(), getValue(I.getArgOperand(0))));
+    return 0;
+  }
+
+  case Intrinsic::x86_mmx_pslli_w:
+  case Intrinsic::x86_mmx_pslli_d:
+  case Intrinsic::x86_mmx_pslli_q:
+  case Intrinsic::x86_mmx_psrli_w:
+  case Intrinsic::x86_mmx_psrli_d:
+  case Intrinsic::x86_mmx_psrli_q:
+  case Intrinsic::x86_mmx_psrai_w:
+  case Intrinsic::x86_mmx_psrai_d: {
+    SDValue ShAmt = getValue(I.getArgOperand(1));
+    if (isa<ConstantSDNode>(ShAmt)) {
+      visitTargetIntrinsic(I, Intrinsic);
+      return 0;
+    }
+    unsigned NewIntrinsic = 0;
+    EVT ShAmtVT = MVT::v2i32;
+    switch (Intrinsic) {
+    case Intrinsic::x86_mmx_pslli_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_w;
+      break;
+    case Intrinsic::x86_mmx_pslli_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_d;
+      break;
+    case Intrinsic::x86_mmx_pslli_q:
+      NewIntrinsic = Intrinsic::x86_mmx_psll_q;
+      break;
+    case Intrinsic::x86_mmx_psrli_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
+      break;
+    case Intrinsic::x86_mmx_psrli_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
+      break;
+    case Intrinsic::x86_mmx_psrli_q:
+      NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
+      break;
+    case Intrinsic::x86_mmx_psrai_w:
+      NewIntrinsic = Intrinsic::x86_mmx_psra_w;
+      break;
+    case Intrinsic::x86_mmx_psrai_d:
+      NewIntrinsic = Intrinsic::x86_mmx_psra_d;
+      break;
+    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+    }
+
+    // The vector shift intrinsics with scalars uses 32b shift amounts but
+    // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+    // to be zero.
+    // We must do this early because v2i32 is not a legal type.
+    DebugLoc dl = getCurDebugLoc();
+    SDValue ShOps[2];
+    ShOps[0] = ShAmt;
+    ShOps[1] = DAG.getConstant(0, MVT::i32);
+    ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+    EVT DestVT = TLI.getValueType(I.getType());
+    ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt);
+    Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+                       DAG.getConstant(NewIntrinsic, MVT::i32),
+                       getValue(I.getArgOperand(0)), ShAmt);
+    setValue(&I, Res);
+    return 0;
+  }
+  case Intrinsic::convertff:
+  case Intrinsic::convertfsi:
+  case Intrinsic::convertfui:
+  case Intrinsic::convertsif:
+  case Intrinsic::convertuif:
+  case Intrinsic::convertss:
+  case Intrinsic::convertsu:
+  case Intrinsic::convertus:
+  case Intrinsic::convertuu: {
+    ISD::CvtCode Code = ISD::CVT_INVALID;
+    switch (Intrinsic) {
+    case Intrinsic::convertff:  Code = ISD::CVT_FF; break;
+    case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
+    case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
+    case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
+    case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
+    case Intrinsic::convertss:  Code = ISD::CVT_SS; break;
+    case Intrinsic::convertsu:  Code = ISD::CVT_SU; break;
+    case Intrinsic::convertus:  Code = ISD::CVT_US; break;
+    case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
+    }
+    EVT DestVT = TLI.getValueType(I.getType());
+    const Value *Op1 = I.getArgOperand(0);
+    Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
+                               DAG.getValueType(DestVT),
+                               DAG.getValueType(getValue(Op1).getValueType()),
+                               getValue(I.getArgOperand(1)),
+                               getValue(I.getArgOperand(2)),
+                               Code);
+    setValue(&I, Res);
+    return 0;
+  }
+  case Intrinsic::sqrt:
+    setValue(&I, DAG.getNode(ISD::FSQRT, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::powi:
+    setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)),
+                            getValue(I.getArgOperand(1)), DAG));
+    return 0;
+  case Intrinsic::sin:
+    setValue(&I, DAG.getNode(ISD::FSIN, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::cos:
+    setValue(&I, DAG.getNode(ISD::FCOS, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::log:
+    visitLog(I);
+    return 0;
+  case Intrinsic::log2:
+    visitLog2(I);
+    return 0;
+  case Intrinsic::log10:
+    visitLog10(I);
+    return 0;
+  case Intrinsic::exp:
+    visitExp(I);
+    return 0;
+  case Intrinsic::exp2:
+    visitExp2(I);
+    return 0;
+  case Intrinsic::pow:
+    visitPow(I);
+    return 0;
+  case Intrinsic::convert_to_fp16:
+    setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
+                             MVT::i16, getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::convert_from_fp16:
+    setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl,
+                             MVT::f32, getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::pcmarker: {
+    SDValue Tmp = getValue(I.getArgOperand(0));
+    DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
+    return 0;
+  }
+  case Intrinsic::readcyclecounter: {
+    SDValue Op = getRoot();
+    Res = DAG.getNode(ISD::READCYCLECOUNTER, dl,
+                      DAG.getVTList(MVT::i64, MVT::Other),
+                      &Op, 1);
+    setValue(&I, Res);
+    DAG.setRoot(Res.getValue(1));
+    return 0;
+  }
+  case Intrinsic::bswap:
+    setValue(&I, DAG.getNode(ISD::BSWAP, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0))));
+    return 0;
+  case Intrinsic::cttz: {
+    SDValue Arg = getValue(I.getArgOperand(0));
+    EVT Ty = Arg.getValueType();
+    setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
+    return 0;
+  }
+  case Intrinsic::ctlz: {
+    SDValue Arg = getValue(I.getArgOperand(0));
+    EVT Ty = Arg.getValueType();
+    setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
+    return 0;
+  }
+  case Intrinsic::ctpop: {
+    SDValue Arg = getValue(I.getArgOperand(0));
+    EVT Ty = Arg.getValueType();
+    setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg));
+    return 0;
+  }
+  case Intrinsic::stacksave: {
+    SDValue Op = getRoot();
+    Res = DAG.getNode(ISD::STACKSAVE, dl,
+                      DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
+    setValue(&I, Res);
+    DAG.setRoot(Res.getValue(1));
+    return 0;
+  }
+  case Intrinsic::stackrestore: {
+    Res = getValue(I.getArgOperand(0));
+    DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res));
+    return 0;
+  }
+  case Intrinsic::stackprotector: {
+    // Emit code into the DAG to store the stack guard onto the stack.
+    MachineFunction &MF = DAG.getMachineFunction();
+    MachineFrameInfo *MFI = MF.getFrameInfo();
+    EVT PtrTy = TLI.getPointerTy();
+
+    SDValue Src = getValue(I.getArgOperand(0));   // The guard's value.
+    AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
+
+    int FI = FuncInfo.StaticAllocaMap[Slot];
+    MFI->setStackProtectorIndex(FI);
+
+    SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
+
+    // Store the stack protector onto the stack.
+    Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
+                       MachinePointerInfo::getFixedStack(FI),
+                       true, false, 0);
+    setValue(&I, Res);
+    DAG.setRoot(Res);
+    return 0;
+  }
+  case Intrinsic::objectsize: {
+    // If we don't know by now, we're never going to know.
+    ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
+
+    assert(CI && "Non-constant type in __builtin_object_size?");
+
+    SDValue Arg = getValue(I.getCalledValue());
+    EVT Ty = Arg.getValueType();
+
+    if (CI->isZero())
+      Res = DAG.getConstant(-1ULL, Ty);
+    else
+      Res = DAG.getConstant(0, Ty);
+
+    setValue(&I, Res);
+    return 0;
+  }
+  case Intrinsic::var_annotation:
+    // Discard annotate attributes
+    return 0;
+
+  case Intrinsic::init_trampoline: {
+    const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
+
+    SDValue Ops[6];
+    Ops[0] = getRoot();
+    Ops[1] = getValue(I.getArgOperand(0));
+    Ops[2] = getValue(I.getArgOperand(1));
+    Ops[3] = getValue(I.getArgOperand(2));
+    Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
+    Ops[5] = DAG.getSrcValue(F);
+
+    Res = DAG.getNode(ISD::TRAMPOLINE, dl,
+                      DAG.getVTList(TLI.getPointerTy(), MVT::Other),
+                      Ops, 6);
+
+    setValue(&I, Res);
+    DAG.setRoot(Res.getValue(1));
+    return 0;
+  }
+  case Intrinsic::gcroot:
+    if (GFI) {
+      const Value *Alloca = I.getArgOperand(0);
+      const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
+
+      FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
+      GFI->addStackRoot(FI->getIndex(), TypeMap);
+    }
+    return 0;
+  case Intrinsic::gcread:
+  case Intrinsic::gcwrite:
+    llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
+    return 0;
+  case Intrinsic::flt_rounds:
+    setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
+    return 0;
+  case Intrinsic::trap:
+    DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
+    return 0;
+  case Intrinsic::uadd_with_overflow:
+    return implVisitAluOverflow(I, ISD::UADDO);
+  case Intrinsic::sadd_with_overflow:
+    return implVisitAluOverflow(I, ISD::SADDO);
+  case Intrinsic::usub_with_overflow:
+    return implVisitAluOverflow(I, ISD::USUBO);
+  case Intrinsic::ssub_with_overflow:
+    return implVisitAluOverflow(I, ISD::SSUBO);
+  case Intrinsic::umul_with_overflow:
+    return implVisitAluOverflow(I, ISD::UMULO);
+  case Intrinsic::smul_with_overflow:
+    return implVisitAluOverflow(I, ISD::SMULO);
+
+  case Intrinsic::prefetch: {
+    SDValue Ops[4];
+    unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+    Ops[0] = getRoot();
+    Ops[1] = getValue(I.getArgOperand(0));
+    Ops[2] = getValue(I.getArgOperand(1));
+    Ops[3] = getValue(I.getArgOperand(2));
+    DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
+                                        DAG.getVTList(MVT::Other),
+                                        &Ops[0], 4,
+                                        EVT::getIntegerVT(*Context, 8),
+                                        MachinePointerInfo(I.getArgOperand(0)),
+                                        0, /* align */
+                                        false, /* volatile */
+                                        rw==0, /* read */
+                                        rw==1)); /* write */
+    return 0;
+  }
+  case Intrinsic::memory_barrier: {
+    SDValue Ops[6];
+    Ops[0] = getRoot();
+    for (int x = 1; x < 6; ++x)
+      Ops[x] = getValue(I.getArgOperand(x - 1));
+
+    DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
+    return 0;
+  }
+  case Intrinsic::atomic_cmp_swap: {
+    SDValue Root = getRoot();
+    SDValue L =
+      DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
+                    getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
+                    Root,
+                    getValue(I.getArgOperand(0)),
+                    getValue(I.getArgOperand(1)),
+                    getValue(I.getArgOperand(2)),
+                    MachinePointerInfo(I.getArgOperand(0)));
+    setValue(&I, L);
+    DAG.setRoot(L.getValue(1));
+    return 0;
+  }
+  case Intrinsic::atomic_load_add:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
+  case Intrinsic::atomic_load_sub:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
+  case Intrinsic::atomic_load_or:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
+  case Intrinsic::atomic_load_xor:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
+  case Intrinsic::atomic_load_and:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
+  case Intrinsic::atomic_load_nand:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
+  case Intrinsic::atomic_load_max:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
+  case Intrinsic::atomic_load_min:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
+  case Intrinsic::atomic_load_umin:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
+  case Intrinsic::atomic_load_umax:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
+  case Intrinsic::atomic_swap:
+    return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
+
+  case Intrinsic::invariant_start:
+  case Intrinsic::lifetime_start:
+    // Discard region information.
+    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+    return 0;
+  case Intrinsic::invariant_end:
+  case Intrinsic::lifetime_end:
+    // Discard region information.
+    return 0;
+  }
+}
+
+void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
+                                      bool isTailCall,
+                                      MachineBasicBlock *LandingPad) {
+  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+  const Type *RetTy = FTy->getReturnType();
+  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+  MCSymbol *BeginLabel = 0;
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  Args.reserve(CS.arg_size());
+
+  // Check whether the function can return without sret-demotion.
+  SmallVector<ISD::OutputArg, 4> Outs;
+  SmallVector<uint64_t, 4> Offsets;
+  GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
+                Outs, TLI, &Offsets);
+
+  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
+                        FTy->isVarArg(), Outs, FTy->getContext());
+
+  SDValue DemoteStackSlot;
+  int DemoteStackIdx = -100;
+
+  if (!CanLowerReturn) {
+    uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
+                      FTy->getReturnType());
+    unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
+                      FTy->getReturnType());
+    MachineFunction &MF = DAG.getMachineFunction();
+    DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+    const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
+
+    DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
+    Entry.Node = DemoteStackSlot;
+    Entry.Ty = StackSlotPtrType;
+    Entry.isSExt = false;
+    Entry.isZExt = false;
+    Entry.isInReg = false;
+    Entry.isSRet = true;
+    Entry.isNest = false;
+    Entry.isByVal = false;
+    Entry.Alignment = Align;
+    Args.push_back(Entry);
+    RetTy = Type::getVoidTy(FTy->getContext());
+  }
+
+  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+       i != e; ++i) {
+    SDValue ArgNode = getValue(*i);
+    Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
+
+    unsigned attrInd = i - CS.arg_begin() + 1;
+    Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
+    Entry.isZExt  = CS.paramHasAttr(attrInd, Attribute::ZExt);
+    Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
+    Entry.isSRet  = CS.paramHasAttr(attrInd, Attribute::StructRet);
+    Entry.isNest  = CS.paramHasAttr(attrInd, Attribute::Nest);
+    Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
+    Entry.Alignment = CS.getParamAlignment(attrInd);
+    Args.push_back(Entry);
+  }
+
+  if (LandingPad) {
+    // Insert a label before the invoke call to mark the try range.  This can be
+    // used to detect deletion of the invoke via the MachineModuleInfo.
+    BeginLabel = MMI.getContext().CreateTempSymbol();
+
+    // For SjLj, keep track of which landing pads go with which invokes
+    // so as to maintain the ordering of pads in the LSDA.
+    unsigned CallSiteIndex = MMI.getCurrentCallSite();
+    if (CallSiteIndex) {
+      MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+      // Now that the call site is handled, stop tracking it.
+      MMI.setCurrentCallSite(0);
+    }
+
+    // Both PendingLoads and PendingExports must be flushed here;
+    // this call might not return.
+    (void)getRoot();
+    DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel));
+  }
+
+  // Check if target-independent constraints permit a tail call here.
+  // Target-dependent constraints are checked within TLI.LowerCallTo.
+  if (isTailCall &&
+      !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
+    isTailCall = false;
+
+  // If there's a possibility that fast-isel has already selected some amount
+  // of the current basic block, don't emit a tail call.
+  if (isTailCall && EnableFastISel)
+    isTailCall = false;
+
+  std::pair<SDValue,SDValue> Result =
+    TLI.LowerCallTo(getRoot(), RetTy,
+                    CS.paramHasAttr(0, Attribute::SExt),
+                    CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
+                    CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
+                    CS.getCallingConv(),
+                    isTailCall,
+                    !CS.getInstruction()->use_empty(),
+                    Callee, Args, DAG, getCurDebugLoc());
+  assert((isTailCall || Result.second.getNode()) &&
+         "Non-null chain expected with non-tail call!");
+  assert((Result.second.getNode() || !Result.first.getNode()) &&
+         "Null value expected with tail call!");
+  if (Result.first.getNode()) {
+    setValue(CS.getInstruction(), Result.first);
+  } else if (!CanLowerReturn && Result.second.getNode()) {
+    // The instruction result is the result of loading from the
+    // hidden sret parameter.
+    SmallVector<EVT, 1> PVTs;
+    const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
+
+    ComputeValueVTs(TLI, PtrRetTy, PVTs);
+    assert(PVTs.size() == 1 && "Pointers should fit in one register");
+    EVT PtrVT = PVTs[0];
+    unsigned NumValues = Outs.size();
+    SmallVector<SDValue, 4> Values(NumValues);
+    SmallVector<SDValue, 4> Chains(NumValues);
+
+    for (unsigned i = 0; i < NumValues; ++i) {
+      SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
+                                DemoteStackSlot,
+                                DAG.getConstant(Offsets[i], PtrVT));
+      SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
+                              Add,
+                  MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
+                              false, false, 1);
+      Values[i] = L;
+      Chains[i] = L.getValue(1);
+    }
+
+    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+                                MVT::Other, &Chains[0], NumValues);
+    PendingLoads.push_back(Chain);
+
+    // Collect the legal value parts into potentially illegal values
+    // that correspond to the original function's return values.
+    SmallVector<EVT, 4> RetTys;
+    RetTy = FTy->getReturnType();
+    ComputeValueVTs(TLI, RetTy, RetTys);
+    ISD::NodeType AssertOp = ISD::DELETED_NODE;
+    SmallVector<SDValue, 4> ReturnValues;
+    unsigned CurReg = 0;
+    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+      EVT VT = RetTys[I];
+      EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
+      unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
+
+      SDValue ReturnValue =
+        getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
+                         RegisterVT, VT, AssertOp);
+      ReturnValues.push_back(ReturnValue);
+      CurReg += NumRegs;
+    }
+
+    setValue(CS.getInstruction(),
+             DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
+                         DAG.getVTList(&RetTys[0], RetTys.size()),
+                         &ReturnValues[0], ReturnValues.size()));
+
+  }
+
+  // As a special case, a null chain means that a tail call has been emitted and
+  // the DAG root is already updated.
+  if (Result.second.getNode())
+    DAG.setRoot(Result.second);
+  else
+    HasTailCall = true;
+
+  if (LandingPad) {
+    // Insert a label at the end of the invoke call to mark the try range.  This
+    // can be used to detect deletion of the invoke via the MachineModuleInfo.
+    MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol();
+    DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel));
+
+    // Inform MachineModuleInfo of range.
+    MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
+  }
+}
+
+/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+      if (IC->isEquality())
+        if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+          if (C->isNullValue())
+            continue;
+    // Unknown instruction.
+    return false;
+  }
+  return true;
+}
+
+static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
+                             const Type *LoadTy,
+                             SelectionDAGBuilder &Builder) {
+
+  // Check to see if this load can be trivially constant folded, e.g. if the
+  // input is from a string literal.
+  if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
+    // Cast pointer to the type we really want to load.
+    LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
+                                         PointerType::getUnqual(LoadTy));
+
+    if (const Constant *LoadCst =
+          ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
+                                       Builder.TD))
+      return Builder.getValue(LoadCst);
+  }
+
+  // Otherwise, we have to emit the load.  If the pointer is to unfoldable but
+  // still constant memory, the input chain can be the entry node.
+  SDValue Root;
+  bool ConstantMemory = false;
+
+  // Do not serialize (non-volatile) loads of constant memory with anything.
+  if (Builder.AA->pointsToConstantMemory(PtrVal)) {
+    Root = Builder.DAG.getEntryNode();
+    ConstantMemory = true;
+  } else {
+    // Do not serialize non-volatile loads against each other.
+    Root = Builder.DAG.getRoot();
+  }
+
+  SDValue Ptr = Builder.getValue(PtrVal);
+  SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
+                                        Ptr, MachinePointerInfo(PtrVal),
+                                        false /*volatile*/,
+                                        false /*nontemporal*/, 1 /* align=1 */);
+
+  if (!ConstantMemory)
+    Builder.PendingLoads.push_back(LoadVal.getValue(1));
+  return LoadVal;
+}
+
+
+/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
+/// If so, return true and lower it, otherwise return false and it will be
+/// lowered like a normal call.
+bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
+  // Verify that the prototype makes sense.  int memcmp(void*,void*,size_t)
+  if (I.getNumArgOperands() != 3)
+    return false;
+
+  const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
+  if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
+      !I.getArgOperand(2)->getType()->isIntegerTy() ||
+      !I.getType()->isIntegerTy())
+    return false;
+
+  const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2));
+
+  // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
+  // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
+  if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
+    bool ActuallyDoIt = true;
+    MVT LoadVT;
+    const Type *LoadTy;
+    switch (Size->getZExtValue()) {
+    default:
+      LoadVT = MVT::Other;
+      LoadTy = 0;
+      ActuallyDoIt = false;
+      break;
+    case 2:
+      LoadVT = MVT::i16;
+      LoadTy = Type::getInt16Ty(Size->getContext());
+      break;
+    case 4:
+      LoadVT = MVT::i32;
+      LoadTy = Type::getInt32Ty(Size->getContext());
+      break;
+    case 8:
+      LoadVT = MVT::i64;
+      LoadTy = Type::getInt64Ty(Size->getContext());
+      break;
+        /*
+    case 16:
+      LoadVT = MVT::v4i32;
+      LoadTy = Type::getInt32Ty(Size->getContext());
+      LoadTy = VectorType::get(LoadTy, 4);
+      break;
+         */
+    }
+
+    // This turns into unaligned loads.  We only do this if the target natively
+    // supports the MVT we'll be loading or if it is small enough (<= 4) that
+    // we'll only produce a small number of byte loads.
+
+    // Require that we can find a legal MVT, and only do this if the target
+    // supports unaligned loads of that type.  Expanding into byte loads would
+    // bloat the code.
+    if (ActuallyDoIt && Size->getZExtValue() > 4) {
+      // TODO: Handle 5 byte compare as 4-byte + 1 byte.
+      // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
+      if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT))
+        ActuallyDoIt = false;
+    }
+
+    if (ActuallyDoIt) {
+      SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
+      SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
+
+      SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
+                                 ISD::SETNE);
+      EVT CallVT = TLI.getValueType(I.getType(), true);
+      setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
+      return true;
+    }
+  }
+
+
+  return false;
+}
+
+
+void SelectionDAGBuilder::visitCall(const CallInst &I) {
+  // Handle inline assembly differently.
+  if (isa<InlineAsm>(I.getCalledValue())) {
+    visitInlineAsm(&I);
+    return;
+  }
+
+  // See if any floating point values are being passed to this function. This is
+  // used to emit an undefined reference to fltused on Windows.
+  const FunctionType *FT =
+    cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
+  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+  if (FT->isVarArg() &&
+      !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
+    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+      const Type* T = I.getArgOperand(i)->getType();
+      for (po_iterator<const Type*> i = po_begin(T), e = po_end(T);
+           i != e; ++i) {
+        if (!i->isFloatingPointTy()) continue;
+        MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
+        break;
+      }
+    }
+  }
+
+  const char *RenameFn = 0;
+  if (Function *F = I.getCalledFunction()) {
+    if (F->isDeclaration()) {
+      if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) {
+        if (unsigned IID = II->getIntrinsicID(F)) {
+          RenameFn = visitIntrinsicCall(I, IID);
+          if (!RenameFn)
+            return;
+        }
+      }
+      if (unsigned IID = F->getIntrinsicID()) {
+        RenameFn = visitIntrinsicCall(I, IID);
+        if (!RenameFn)
+          return;
+      }
+    }
+
+    // Check for well-known libc/libm calls.  If the function is internal, it
+    // can't be a library call.
+    if (!F->hasLocalLinkage() && F->hasName()) {
+      StringRef Name = F->getName();
+      if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
+        if (I.getNumArgOperands() == 2 &&   // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType() &&
+            I.getType() == I.getArgOperand(1)->getType()) {
+          SDValue LHS = getValue(I.getArgOperand(0));
+          SDValue RHS = getValue(I.getArgOperand(1));
+          setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
+                                   LHS.getValueType(), LHS, RHS));
+          return;
+        }
+      } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
+        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
+        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
+        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
+        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
+            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+            I.getType() == I.getArgOperand(0)->getType() &&
+            I.onlyReadsMemory()) {
+          SDValue Tmp = getValue(I.getArgOperand(0));
+          setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
+                                   Tmp.getValueType(), Tmp));
+          return;
+        }
+      } else if (Name == "memcmp") {
+        if (visitMemCmpCall(I))
+          return;
+      }
+    }
+  }
+
+  SDValue Callee;
+  if (!RenameFn)
+    Callee = getValue(I.getCalledValue());
+  else
+    Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
+
+  // Check if we can potentially perform a tail call. More detailed checking is
+  // be done within LowerCallTo, after more information about the call is known.
+  LowerCallTo(&I, Callee, I.isTailCall());
+}
+
+namespace llvm {
+
+/// AsmOperandInfo - This contains information for each constraint that we are
+/// lowering.
+class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo :
+    public TargetLowering::AsmOperandInfo {
+public:
+  /// CallOperand - If this is the result output operand or a clobber
+  /// this is null, otherwise it is the incoming operand to the CallInst.
+  /// This gets modified as the asm is processed.
+  SDValue CallOperand;
+
+  /// AssignedRegs - If this is a register or register class operand, this
+  /// contains the set of register corresponding to the operand.
+  RegsForValue AssignedRegs;
+
+  explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
+    : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
+  }
+
+  /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
+  /// busy in OutputRegs/InputRegs.
+  void MarkAllocatedRegs(bool isOutReg, bool isInReg,
+                         std::set<unsigned> &OutputRegs,
+                         std::set<unsigned> &InputRegs,
+                         const TargetRegisterInfo &TRI) const {
+    if (isOutReg) {
+      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
+        MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
+    }
+    if (isInReg) {
+      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
+        MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
+    }
+  }
+
+  /// getCallOperandValEVT - Return the EVT of the Value* that this operand
+  /// corresponds to.  If there is no Value* for this operand, it returns
+  /// MVT::Other.
+  EVT getCallOperandValEVT(LLVMContext &Context,
+                           const TargetLowering &TLI,
+                           const TargetData *TD) const {
+    if (CallOperandVal == 0) return MVT::Other;
+
+    if (isa<BasicBlock>(CallOperandVal))
+      return TLI.getPointerTy();
+
+    const llvm::Type *OpTy = CallOperandVal->getType();
+
+    // If this is an indirect operand, the operand is a pointer to the
+    // accessed type.
+    if (isIndirect) {
+      const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+      if (!PtrTy)
+        report_fatal_error("Indirect operand for inline asm not a pointer!");
+      OpTy = PtrTy->getElementType();
+    }
+
+    // If OpTy is not a single value, it may be a struct/union that we
+    // can tile with integers.
+    if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+      unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+      switch (BitSize) {
+      default: break;
+      case 1:
+      case 8:
+      case 16:
+      case 32:
+      case 64:
+      case 128:
+        OpTy = IntegerType::get(Context, BitSize);
+        break;
+      }
+    }
+
+    return TLI.getValueType(OpTy, true);
+  }
+
+private:
+  /// MarkRegAndAliases - Mark the specified register and all aliases in the
+  /// specified set.
+  static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
+                                const TargetRegisterInfo &TRI) {
+    assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
+    Regs.insert(Reg);
+    if (const unsigned *Aliases = TRI.getAliasSet(Reg))
+      for (; *Aliases; ++Aliases)
+        Regs.insert(*Aliases);
+  }
+};
+
+typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
+
+} // end llvm namespace.
+
+/// isAllocatableRegister - If the specified register is safe to allocate,
+/// i.e. it isn't a stack pointer or some other special register, return the
+/// register class for the register.  Otherwise, return null.
+static const TargetRegisterClass *
+isAllocatableRegister(unsigned Reg, MachineFunction &MF,
+                      const TargetLowering &TLI,
+                      const TargetRegisterInfo *TRI) {
+  EVT FoundVT = MVT::Other;
+  const TargetRegisterClass *FoundRC = 0;
+  for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
+       E = TRI->regclass_end(); RCI != E; ++RCI) {
+    EVT ThisVT = MVT::Other;
+
+    const TargetRegisterClass *RC = *RCI;
+    // If none of the value types for this register class are valid, we
+    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
+    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+         I != E; ++I) {
+      if (TLI.isTypeLegal(*I)) {
+        // If we have already found this register in a different register class,
+        // choose the one with the largest VT specified.  For example, on
+        // PowerPC, we favor f64 register classes over f32.
+        if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
+          ThisVT = *I;
+          break;
+        }
+      }
+    }
+
+    if (ThisVT == MVT::Other) continue;
+
+    // NOTE: This isn't ideal.  In particular, this might allocate the
+    // frame pointer in functions that need it (due to them not being taken
+    // out of allocation, because a variable sized allocation hasn't been seen
+    // yet).  This is a slight code pessimization, but should still work.
+    for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
+         E = RC->allocation_order_end(MF); I != E; ++I)
+      if (*I == Reg) {
+        // We found a matching register class.  Keep looking at others in case
+        // we find one with larger registers that this physreg is also in.
+        FoundRC = RC;
+        FoundVT = ThisVT;
+        break;
+      }
+  }
+  return FoundRC;
+}
+
+/// GetRegistersForValue - Assign registers (virtual or physical) for the
+/// specified operand.  We prefer to assign virtual registers, to allow the
+/// register allocator to handle the assignment process.  However, if the asm
+/// uses features that we can't model on machineinstrs, we have SDISel do the
+/// allocation.  This produces generally horrible, but correct, code.
+///
+///   OpInfo describes the operand.
+///   Input and OutputRegs are the set of already allocated physical registers.
+///
+void SelectionDAGBuilder::
+GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
+                     std::set<unsigned> &OutputRegs,
+                     std::set<unsigned> &InputRegs) {
+  LLVMContext &Context = FuncInfo.Fn->getContext();
+
+  // Compute whether this value requires an input register, an output register,
+  // or both.
+  bool isOutReg = false;
+  bool isInReg = false;
+  switch (OpInfo.Type) {
+  case InlineAsm::isOutput:
+    isOutReg = true;
+
+    // If there is an input constraint that matches this, we need to reserve
+    // the input register so no other inputs allocate to it.
+    isInReg = OpInfo.hasMatchingInput();
+    break;
+  case InlineAsm::isInput:
+    isInReg = true;
+    isOutReg = false;
+    break;
+  case InlineAsm::isClobber:
+    isOutReg = true;
+    isInReg = true;
+    break;
+  }
+
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  SmallVector<unsigned, 4> Regs;
+
+  // If this is a constraint for a single physreg, or a constraint for a
+  // register class, find it.
+  std::pair<unsigned, const TargetRegisterClass*> PhysReg =
+    TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
+                                     OpInfo.ConstraintVT);
+
+  unsigned NumRegs = 1;
+  if (OpInfo.ConstraintVT != MVT::Other) {
+    // If this is a FP input in an integer register (or visa versa) insert a bit
+    // cast of the input value.  More generally, handle any case where the input
+    // value disagrees with the register class we plan to stick this in.
+    if (OpInfo.Type == InlineAsm::isInput &&
+        PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
+      // Try to convert to the first EVT that the reg class contains.  If the
+      // types are identical size, use a bitcast to convert (e.g. two differing
+      // vector types).
+      EVT RegVT = *PhysReg.second->vt_begin();
+      if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
+        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+                                         RegVT, OpInfo.CallOperand);
+        OpInfo.ConstraintVT = RegVT;
+      } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
+        // If the input is a FP value and we want it in FP registers, do a
+        // bitcast to the corresponding integer type.  This turns an f64 value
+        // into i64, which can be passed with two i32 values on a 32-bit
+        // machine.
+        RegVT = EVT::getIntegerVT(Context,
+                                  OpInfo.ConstraintVT.getSizeInBits());
+        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+                                         RegVT, OpInfo.CallOperand);
+        OpInfo.ConstraintVT = RegVT;
+      }
+    }
+
+    NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
+  }
+
+  EVT RegVT;
+  EVT ValueVT = OpInfo.ConstraintVT;
+
+  // If this is a constraint for a specific physical register, like {r17},
+  // assign it now.
+  if (unsigned AssignedReg = PhysReg.first) {
+    const TargetRegisterClass *RC = PhysReg.second;
+    if (OpInfo.ConstraintVT == MVT::Other)
+      ValueVT = *RC->vt_begin();
+
+    // Get the actual register value type.  This is important, because the user
+    // may have asked for (e.g.) the AX register in i32 type.  We need to
+    // remember that AX is actually i16 to get the right extension.
+    RegVT = *RC->vt_begin();
+
+    // This is a explicit reference to a physical register.
+    Regs.push_back(AssignedReg);
+
+    // If this is an expanded reference, add the rest of the regs to Regs.
+    if (NumRegs != 1) {
+      TargetRegisterClass::iterator I = RC->begin();
+      for (; *I != AssignedReg; ++I)
+        assert(I != RC->end() && "Didn't find reg!");
+
+      // Already added the first reg.
+      --NumRegs; ++I;
+      for (; NumRegs; --NumRegs, ++I) {
+        assert(I != RC->end() && "Ran out of registers to allocate!");
+        Regs.push_back(*I);
+      }
+    }
+
+    OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+    const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+    OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
+    return;
+  }
+
+  // Otherwise, if this was a reference to an LLVM register class, create vregs
+  // for this reference.
+  if (const TargetRegisterClass *RC = PhysReg.second) {
+    RegVT = *RC->vt_begin();
+    if (OpInfo.ConstraintVT == MVT::Other)
+      ValueVT = RegVT;
+
+    // Create the appropriate number of virtual registers.
+    MachineRegisterInfo &RegInfo = MF.getRegInfo();
+    for (; NumRegs; --NumRegs)
+      Regs.push_back(RegInfo.createVirtualRegister(RC));
+
+    OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
+    return;
+  }
+
+  // This is a reference to a register class that doesn't directly correspond
+  // to an LLVM register class.  Allocate NumRegs consecutive, available,
+  // registers from the class.
+  std::vector<unsigned> RegClassRegs
+    = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
+                                            OpInfo.ConstraintVT);
+
+  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+  unsigned NumAllocated = 0;
+  for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
+    unsigned Reg = RegClassRegs[i];
+    // See if this register is available.
+    if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
+        (isInReg  && InputRegs.count(Reg))) {    // Already used.
+      // Make sure we find consecutive registers.
+      NumAllocated = 0;
+      continue;
+    }
+
+    // Check to see if this register is allocatable (i.e. don't give out the
+    // stack pointer).
+    const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
+    if (!RC) {        // Couldn't allocate this register.
+      // Reset NumAllocated to make sure we return consecutive registers.
+      NumAllocated = 0;
+      continue;
+    }
+
+    // Okay, this register is good, we can use it.
+    ++NumAllocated;
+
+    // If we allocated enough consecutive registers, succeed.
+    if (NumAllocated == NumRegs) {
+      unsigned RegStart = (i-NumAllocated)+1;
+      unsigned RegEnd   = i+1;
+      // Mark all of the allocated registers used.
+      for (unsigned i = RegStart; i != RegEnd; ++i)
+        Regs.push_back(RegClassRegs[i]);
+
+      OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(),
+                                         OpInfo.ConstraintVT);
+      OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
+      return;
+    }
+  }
+
+  // Otherwise, we couldn't allocate enough registers for this.
+}
+
+/// visitInlineAsm - Handle a call to an InlineAsm object.
+///
+void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
+  const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+
+  /// ConstraintOperands - Information about all of the constraints.
+  SDISelAsmOperandInfoVector ConstraintOperands;
+
+  std::set<unsigned> OutputRegs, InputRegs;
+
+  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS);
+  bool hasMemory = false;
+
+  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
+  unsigned ResNo = 0;   // ResNo - The result number of the next output.
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
+    SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+    EVT OpVT = MVT::Other;
+
+    // Compute the value type for each operand.
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput:
+      // Indirect outputs just consume an argument.
+      if (OpInfo.isIndirect) {
+        OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+        break;
+      }
+
+      // The return value of the call is this value.  As such, there is no
+      // corresponding argument.
+      assert(!CS.getType()->isVoidTy() &&
+             "Bad inline asm!");
+      if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+        OpVT = TLI.getValueType(STy->getElementType(ResNo));
+      } else {
+        assert(ResNo == 0 && "Asm only has one result!");
+        OpVT = TLI.getValueType(CS.getType());
+      }
+      ++ResNo;
+      break;
+    case InlineAsm::isInput:
+      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+      break;
+    case InlineAsm::isClobber:
+      // Nothing to do.
+      break;
+    }
+
+    // If this is an input or an indirect output, process the call argument.
+    // BasicBlocks are labels, currently appearing only in asm's.
+    if (OpInfo.CallOperandVal) {
+      if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+        OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+      } else {
+        OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
+      }
+
+      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
+    }
+
+    OpInfo.ConstraintVT = OpVT;
+
+    // Indirect operand accesses access memory.
+    if (OpInfo.isIndirect)
+      hasMemory = true;
+    else {
+      for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
+        TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]);
+        if (CType == TargetLowering::C_Memory) {
+          hasMemory = true;
+          break;
+        }
+      }
+    }
+  }
+
+  SDValue Chain, Flag;
+
+  // We won't need to flush pending loads if this asm doesn't touch
+  // memory and is nonvolatile.
+  if (hasMemory || IA->hasSideEffects())
+    Chain = getRoot();
+  else
+    Chain = DAG.getRoot();
+
+  // Second pass over the constraints: compute which constraint option to use
+  // and assign registers to constraints that want a specific physreg.
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+    // If this is an output operand with a matching input operand, look up the
+    // matching input. If their types mismatch, e.g. one is an integer, the
+    // other is floating point, or their sizes are different, flag it as an
+    // error.
+    if (OpInfo.hasMatchingInput()) {
+      SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+      if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+        if ((OpInfo.ConstraintVT.isInteger() !=
+             Input.ConstraintVT.isInteger()) ||
+            (OpInfo.ConstraintVT.getSizeInBits() !=
+             Input.ConstraintVT.getSizeInBits())) {
+          report_fatal_error("Unsupported asm: input constraint"
+                             " with a matching output constraint of"
+                             " incompatible type!");
+        }
+        Input.ConstraintVT = OpInfo.ConstraintVT;
+      }
+    }
+
+    // Compute the constraint code and ConstraintType to use.
+    TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
+
+    // If this is a memory input, and if the operand is not indirect, do what we
+    // need to to provide an address for the memory input.
+    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+        !OpInfo.isIndirect) {
+      assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) &&
+             "Can only indirectify direct input operands!");
+
+      // Memory operands really want the address of the value.  If we don't have
+      // an indirect input, put it in the constpool if we can, otherwise spill
+      // it to a stack slot.
+
+      // If the operand is a float, integer, or vector constant, spill to a
+      // constant pool entry to get its address.
+      const Value *OpVal = OpInfo.CallOperandVal;
+      if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+          isa<ConstantVector>(OpVal)) {
+        OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
+                                                 TLI.getPointerTy());
+      } else {
+        // Otherwise, create a stack slot and emit a store to it before the
+        // asm.
+        const Type *Ty = OpVal->getType();
+        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
+        unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
+        MachineFunction &MF = DAG.getMachineFunction();
+        int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+        SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+        Chain = DAG.getStore(Chain, getCurDebugLoc(),
+                             OpInfo.CallOperand, StackSlot,
+                             MachinePointerInfo::getFixedStack(SSFI),
+                             false, false, 0);
+        OpInfo.CallOperand = StackSlot;
+      }
+
+      // There is no longer a Value* corresponding to this operand.
+      OpInfo.CallOperandVal = 0;
+
+      // It is now an indirect operand.
+      OpInfo.isIndirect = true;
+    }
+
+    // If this constraint is for a specific register, allocate it before
+    // anything else.
+    if (OpInfo.ConstraintType == TargetLowering::C_Register)
+      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+  }
+
+  // Second pass - Loop over all of the operands, assigning virtual or physregs
+  // to register class operands.
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+    // C_Register operands have already been allocated, Other/Memory don't need
+    // to be.
+    if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
+      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
+  }
+
+  // AsmNodeOperands - The operands for the ISD::INLINEASM node.
+  std::vector<SDValue> AsmNodeOperands;
+  AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
+  AsmNodeOperands.push_back(
+          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
+                                      TLI.getPointerTy()));
+
+  // If we have a !srcloc metadata node associated with it, we want to attach
+  // this to the ultimately generated inline asm machineinstr.  To do this, we
+  // pass in the third operand as this (potentially null) inline asm MDNode.
+  const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
+  AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
+
+  // Remember the HasSideEffect and AlignStack bits as operand 3.
+  unsigned ExtraInfo = 0;
+  if (IA->hasSideEffects())
+    ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+  if (IA->isAlignStack())
+    ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+  AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
+                                                  TLI.getPointerTy()));
+
+  // Loop over all of the inputs, copying the operand values into the
+  // appropriate registers and processing the output regs.
+  RegsForValue RetValRegs;
+
+  // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
+  std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+
+  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
+    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
+
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput: {
+      if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
+          OpInfo.ConstraintType != TargetLowering::C_Register) {
+        // Memory output, or 'other' output (e.g. 'X' constraint).
+        assert(OpInfo.isIndirect && "Memory output must be indirect operand");
+
+        // Add information to the INLINEASM node to know about this output.
+        unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(OpInfo.CallOperand);
+        break;
+      }
+
+      // Otherwise, this is a register or register class output.
+
+      // Copy the output from the appropriate register.  Find a register that
+      // we can use.
+      if (OpInfo.AssignedRegs.Regs.empty())
+        report_fatal_error("Couldn't allocate output reg for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'!");
+
+      // If this is an indirect operand, store through the pointer after the
+      // asm.
+      if (OpInfo.isIndirect) {
+        IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
+                                                      OpInfo.CallOperandVal));
+      } else {
+        // This is the result value of the call.
+        assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+        // Concatenate this output onto the outputs list.
+        RetValRegs.append(OpInfo.AssignedRegs);
+      }
+
+      // Add information to the INLINEASM node to know that this register is
+      // set.
+      OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
+                                           InlineAsm::Kind_RegDefEarlyClobber :
+                                               InlineAsm::Kind_RegDef,
+                                               false,
+                                               0,
+                                               DAG,
+                                               AsmNodeOperands);
+      break;
+    }
+    case InlineAsm::isInput: {
+      SDValue InOperandVal = OpInfo.CallOperand;
+
+      if (OpInfo.isMatchingInputConstraint()) {   // Matching constraint?
+        // If this is required to match an output register we have already set,
+        // just use its register.
+        unsigned OperandNo = OpInfo.getMatchedOperand();
+
+        // Scan until we find the definition we already emitted of this operand.
+        // When we find it, create a RegsForValue operand.
+        unsigned CurOp = InlineAsm::Op_FirstOperand;
+        for (; OperandNo; --OperandNo) {
+          // Advance to the next operand.
+          unsigned OpFlag =
+            cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+          assert((InlineAsm::isRegDefKind(OpFlag) ||
+                  InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
+                  InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
+          CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
+        }
+
+        unsigned OpFlag =
+          cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+        if (InlineAsm::isRegDefKind(OpFlag) ||
+            InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
+          // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
+          if (OpInfo.isIndirect) {
+            // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
+            LLVMContext &Ctx = *DAG.getContext();
+            Ctx.emitError(CS.getInstruction(),  "inline asm not supported yet:"
+                          " don't know how to handle tied "
+                          "indirect register inputs");
+          }
+
+          RegsForValue MatchedRegs;
+          MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
+          EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
+          MatchedRegs.RegVTs.push_back(RegVT);
+          MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+          for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
+               i != e; ++i)
+            MatchedRegs.Regs.push_back
+              (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
+
+          // Use the produced MatchedRegs object to
+          MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+                                    Chain, &Flag);
+          MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
+                                           true, OpInfo.getMatchedOperand(),
+                                           DAG, AsmNodeOperands);
+          break;
+        }
+
+        assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
+        assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
+               "Unexpected number of operands");
+        // Add information to the INLINEASM node to know about this input.
+        // See InlineAsm.h isUseOperandTiedToDef.
+        OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
+                                                    OpInfo.getMatchedOperand());
+        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
+        break;
+      }
+
+      // Treat indirect 'X' constraint as memory.
+      if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+          OpInfo.isIndirect)
+        OpInfo.ConstraintType = TargetLowering::C_Memory;
+
+      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+        std::vector<SDValue> Ops;
+        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
+                                         Ops, DAG);
+        if (Ops.empty())
+          report_fatal_error("Invalid operand for inline asm constraint '" +
+                             Twine(OpInfo.ConstraintCode) + "'!");
+
+        // Add information to the INLINEASM node to know about this input.
+        unsigned ResOpType =
+          InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
+        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
+        break;
+      }
+
+      if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
+        assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
+        assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
+               "Memory operands expect pointer values");
+
+        // Add information to the INLINEASM node to know about this input.
+        unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
+        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
+                                                        TLI.getPointerTy()));
+        AsmNodeOperands.push_back(InOperandVal);
+        break;
+      }
+
+      assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+              OpInfo.ConstraintType == TargetLowering::C_Register) &&
+             "Unknown constraint type!");
+      assert(!OpInfo.isIndirect &&
+             "Don't know how to handle indirect register inputs yet!");
+
+      // Copy the input into the appropriate registers.
+      if (OpInfo.AssignedRegs.Regs.empty() ||
+          !OpInfo.AssignedRegs.areValueTypesLegal(TLI))
+        report_fatal_error("Couldn't allocate input reg for constraint '" +
+                           Twine(OpInfo.ConstraintCode) + "'!");
+
+      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
+                                        Chain, &Flag);
+
+      OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
+                                               DAG, AsmNodeOperands);
+      break;
+    }
+    case InlineAsm::isClobber: {
+      // Add the clobbered value to the operand list, so that the register
+      // allocator is aware that the physreg got clobbered.
+      if (!OpInfo.AssignedRegs.Regs.empty())
+        OpInfo.AssignedRegs.AddInlineAsmOperands(
+                                            InlineAsm::Kind_RegDefEarlyClobber,
+                                                 false, 0, DAG,
+                                                 AsmNodeOperands);
+      break;
+    }
+    }
+  }
+
+  // Finish up input operands.  Set the input chain and add the flag last.
+  AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
+  if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
+
+  Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
+                      DAG.getVTList(MVT::Other, MVT::Glue),
+                      &AsmNodeOperands[0], AsmNodeOperands.size());
+  Flag = Chain.getValue(1);
+
+  // If this asm returns a register value, copy the result from that register
+  // and set it as the value of the call.
+  if (!RetValRegs.Regs.empty()) {
+    SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
+                                             Chain, &Flag);
+
+    // FIXME: Why don't we do this for inline asms with MRVs?
+    if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
+      EVT ResultType = TLI.getValueType(CS.getType());
+
+      // If any of the results of the inline asm is a vector, it may have the
+      // wrong width/num elts.  This can happen for register classes that can
+      // contain multiple different value types.  The preg or vreg allocated may
+      // not have the same VT as was expected.  Convert it to the right type
+      // with bit_convert.
+      if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
+        Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
+                          ResultType, Val);
+
+      } else if (ResultType != Val.getValueType() &&
+                 ResultType.isInteger() && Val.getValueType().isInteger()) {
+        // If a result value was tied to an input value, the computed result may
+        // have a wider width than the expected result.  Extract the relevant
+        // portion.
+        Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
+      }
+
+      assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
+    }
+
+    setValue(CS.getInstruction(), Val);
+    // Don't need to use this as a chain in this case.
+    if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
+      return;
+  }
+
+  std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
+
+  // Process indirect outputs, first output all of the flagged copies out of
+  // physregs.
+  for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
+    RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
+    const Value *Ptr = IndirectStoresToEmit[i].second;
+    SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
+                                             Chain, &Flag);
+    StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+  }
+
+  // Emit the non-flagged stores from the physregs.
+  SmallVector<SDValue, 8> OutChains;
+  for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
+    SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
+                               StoresToEmit[i].first,
+                               getValue(StoresToEmit[i].second),
+                               MachinePointerInfo(StoresToEmit[i].second),
+                               false, false, 0);
+    OutChains.push_back(Val);
+  }
+
+  if (!OutChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
+                        &OutChains[0], OutChains.size());
+
+  DAG.setRoot(Chain);
+}
+
+void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
+                          MVT::Other, getRoot(),
+                          getValue(I.getArgOperand(0)),
+                          DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
+  const TargetData &TD = *TLI.getTargetData();
+  SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
+                           getRoot(), getValue(I.getOperand(0)),
+                           DAG.getSrcValue(I.getOperand(0)),
+                           TD.getABITypeAlignment(I.getType()));
+  setValue(&I, V);
+  DAG.setRoot(V.getValue(1));
+}
+
+void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
+                          MVT::Other, getRoot(),
+                          getValue(I.getArgOperand(0)),
+                          DAG.getSrcValue(I.getArgOperand(0))));
+}
+
+void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
+  DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
+                          MVT::Other, getRoot(),
+                          getValue(I.getArgOperand(0)),
+                          getValue(I.getArgOperand(1)),
+                          DAG.getSrcValue(I.getArgOperand(0)),
+                          DAG.getSrcValue(I.getArgOperand(1))));
+}
+
+/// TargetLowering::LowerCallTo - This is the default LowerCallTo
+/// implementation, which just calls LowerCall.
+/// FIXME: When all targets are
+/// migrated to using LowerCall, this hook should be integrated into SDISel.
+std::pair<SDValue, SDValue>
+TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
+                            bool RetSExt, bool RetZExt, bool isVarArg,
+                            bool isInreg, unsigned NumFixedArgs,
+                            CallingConv::ID CallConv, bool isTailCall,
+                            bool isReturnValueUsed,
+                            SDValue Callee,
+                            ArgListTy &Args, SelectionDAG &DAG,
+                            DebugLoc dl) const {
+  // Handle all of the outgoing arguments.
+  SmallVector<ISD::OutputArg, 32> Outs;
+  SmallVector<SDValue, 32> OutVals;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
+    for (unsigned Value = 0, NumValues = ValueVTs.size();
+         Value != NumValues; ++Value) {
+      EVT VT = ValueVTs[Value];
+      const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
+      SDValue Op = SDValue(Args[i].Node.getNode(),
+                           Args[i].Node.getResNo() + Value);
+      ISD::ArgFlagsTy Flags;
+      unsigned OriginalAlignment =
+        getTargetData()->getABITypeAlignment(ArgTy);
+
+      if (Args[i].isZExt)
+        Flags.setZExt();
+      if (Args[i].isSExt)
+        Flags.setSExt();
+      if (Args[i].isInReg)
+        Flags.setInReg();
+      if (Args[i].isSRet)
+        Flags.setSRet();
+      if (Args[i].isByVal) {
+        Flags.setByVal();
+        const PointerType *Ty = cast<PointerType>(Args[i].Ty);
+        const Type *ElementTy = Ty->getElementType();
+        unsigned FrameAlign = getByValTypeAlignment(ElementTy);
+        unsigned FrameSize  = getTargetData()->getTypeAllocSize(ElementTy);
+        // For ByVal, alignment should come from FE.  BE will guess if this
+        // info is not there but there are cases it cannot get right.
+        if (Args[i].Alignment)
+          FrameAlign = Args[i].Alignment;
+        Flags.setByValAlign(FrameAlign);
+        Flags.setByValSize(FrameSize);
+      }
+      if (Args[i].isNest)
+        Flags.setNest();
+      Flags.setOrigAlign(OriginalAlignment);
+
+      EVT PartVT = getRegisterType(RetTy->getContext(), VT);
+      unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
+      SmallVector<SDValue, 4> Parts(NumParts);
+      ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+      if (Args[i].isSExt)
+        ExtendKind = ISD::SIGN_EXTEND;
+      else if (Args[i].isZExt)
+        ExtendKind = ISD::ZERO_EXTEND;
+
+      getCopyToParts(DAG, dl, Op, &Parts[0], NumParts,
+                     PartVT, ExtendKind);
+
+      for (unsigned j = 0; j != NumParts; ++j) {
+        // if it isn't first piece, alignment must be 1
+        ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
+                               i < NumFixedArgs);
+        if (NumParts > 1 && j == 0)
+          MyFlags.Flags.setSplit();
+        else if (j != 0)
+          MyFlags.Flags.setOrigAlign(1);
+
+        Outs.push_back(MyFlags);
+        OutVals.push_back(Parts[j]);
+      }
+    }
+  }
+
+  // Handle the incoming return values from the call.
+  SmallVector<ISD::InputArg, 32> Ins;
+  SmallVector<EVT, 4> RetTys;
+  ComputeValueVTs(*this, RetTy, RetTys);
+  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+    EVT VT = RetTys[I];
+    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
+    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+    for (unsigned i = 0; i != NumRegs; ++i) {
+      ISD::InputArg MyFlags;
+      MyFlags.VT = RegisterVT.getSimpleVT();
+      MyFlags.Used = isReturnValueUsed;
+      if (RetSExt)
+        MyFlags.Flags.setSExt();
+      if (RetZExt)
+        MyFlags.Flags.setZExt();
+      if (isInreg)
+        MyFlags.Flags.setInReg();
+      Ins.push_back(MyFlags);
+    }
+  }
+
+  SmallVector<SDValue, 4> InVals;
+  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
+                    Outs, OutVals, Ins, dl, DAG, InVals);
+
+  // Verify that the target's LowerCall behaved as expected.
+  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
+         "LowerCall didn't return a valid chain!");
+  assert((!isTailCall || InVals.empty()) &&
+         "LowerCall emitted a return value for a tail call!");
+  assert((isTailCall || InVals.size() == Ins.size()) &&
+         "LowerCall didn't emit the correct number of values!");
+
+  // For a tail call, the return value is merely live-out and there aren't
+  // any nodes in the DAG representing it. Return a special value to
+  // indicate that a tail call has been emitted and no more Instructions
+  // should be processed in the current block.
+  if (isTailCall) {
+    DAG.setRoot(Chain);
+    return std::make_pair(SDValue(), SDValue());
+  }
+
+  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+          assert(InVals[i].getNode() &&
+                 "LowerCall emitted a null value!");
+          assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
+                 "LowerCall emitted a value with the wrong type!");
+        });
+
+  // Collect the legal value parts into potentially illegal values
+  // that correspond to the original function's return values.
+  ISD::NodeType AssertOp = ISD::DELETED_NODE;
+  if (RetSExt)
+    AssertOp = ISD::AssertSext;
+  else if (RetZExt)
+    AssertOp = ISD::AssertZext;
+  SmallVector<SDValue, 4> ReturnValues;
+  unsigned CurReg = 0;
+  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+    EVT VT = RetTys[I];
+    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
+    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
+
+    ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg],
+                                            NumRegs, RegisterVT, VT,
+                                            AssertOp));
+    CurReg += NumRegs;
+  }
+
+  // For a function returning void, there is no return value. We can't create
+  // such a node, so we just return a null return value in that case. In
+  // that case, nothing will actualy look at the value.
+  if (ReturnValues.empty())
+    return std::make_pair(SDValue(), Chain);
+
+  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
+                            DAG.getVTList(&RetTys[0], RetTys.size()),
+                            &ReturnValues[0], ReturnValues.size());
+  return std::make_pair(Res, Chain);
+}
+
+void TargetLowering::LowerOperationWrapper(SDNode *N,
+                                           SmallVectorImpl<SDValue> &Results,
+                                           SelectionDAG &DAG) const {
+  SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+  if (Res.getNode())
+    Results.push_back(Res);
+}
+
+SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  llvm_unreachable("LowerOperation not implemented for this target!");
+  return SDValue();
+}
+
+void
+SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
+  SDValue Op = getNonRegisterValue(V);
+  assert((Op.getOpcode() != ISD::CopyFromReg ||
+          cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
+         "Copy from a reg to the same reg!");
+  assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
+
+  RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
+  SDValue Chain = DAG.getEntryNode();
+  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
+  PendingExports.push_back(Chain);
+}
+
+#include "llvm/CodeGen/SelectionDAGISel.h"
+
+void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
+  // If this is the entry block, emit arguments.
+  const Function &F = *LLVMBB->getParent();
+  SelectionDAG &DAG = SDB->DAG;
+  DebugLoc dl = SDB->getCurDebugLoc();
+  const TargetData *TD = TLI.getTargetData();
+  SmallVector<ISD::InputArg, 16> Ins;
+
+  // Check whether the function can return without sret-demotion.
+  SmallVector<ISD::OutputArg, 4> Outs;
+  GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+                Outs, TLI);
+
+  if (!FuncInfo->CanLowerReturn) {
+    // Put in an sret pointer parameter before all the other parameters.
+    SmallVector<EVT, 1> ValueVTs;
+    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+
+    // NOTE: Assuming that a pointer will never break down to more than one VT
+    // or one register.
+    ISD::ArgFlagsTy Flags;
+    Flags.setSRet();
+    EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
+    ISD::InputArg RetArg(Flags, RegisterVT, true);
+    Ins.push_back(RetArg);
+  }
+
+  // Set up the incoming argument description vector.
+  unsigned Idx = 1;
+  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
+       I != E; ++I, ++Idx) {
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(TLI, I->getType(), ValueVTs);
+    bool isArgValueUsed = !I->use_empty();
+    for (unsigned Value = 0, NumValues = ValueVTs.size();
+         Value != NumValues; ++Value) {
+      EVT VT = ValueVTs[Value];
+      const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+      ISD::ArgFlagsTy Flags;
+      unsigned OriginalAlignment =
+        TD->getABITypeAlignment(ArgTy);
+
+      if (F.paramHasAttr(Idx, Attribute::ZExt))
+        Flags.setZExt();
+      if (F.paramHasAttr(Idx, Attribute::SExt))
+        Flags.setSExt();
+      if (F.paramHasAttr(Idx, Attribute::InReg))
+        Flags.setInReg();
+      if (F.paramHasAttr(Idx, Attribute::StructRet))
+        Flags.setSRet();
+      if (F.paramHasAttr(Idx, Attribute::ByVal)) {
+        Flags.setByVal();
+        const PointerType *Ty = cast<PointerType>(I->getType());
+        const Type *ElementTy = Ty->getElementType();
+        unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
+        unsigned FrameSize  = TD->getTypeAllocSize(ElementTy);
+        // For ByVal, alignment should be passed from FE.  BE will guess if
+        // this info is not there but there are cases it cannot get right.
+        if (F.getParamAlignment(Idx))
+          FrameAlign = F.getParamAlignment(Idx);
+        Flags.setByValAlign(FrameAlign);
+        Flags.setByValSize(FrameSize);
+      }
+      if (F.paramHasAttr(Idx, Attribute::Nest))
+        Flags.setNest();
+      Flags.setOrigAlign(OriginalAlignment);
+
+      EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+      unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+      for (unsigned i = 0; i != NumRegs; ++i) {
+        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
+        if (NumRegs > 1 && i == 0)
+          MyFlags.Flags.setSplit();
+        // if it isn't first piece, alignment must be 1
+        else if (i > 0)
+          MyFlags.Flags.setOrigAlign(1);
+        Ins.push_back(MyFlags);
+      }
+    }
+  }
+
+  // Call the target to set up the argument values.
+  SmallVector<SDValue, 8> InVals;
+  SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
+                                             F.isVarArg(), Ins,
+                                             dl, DAG, InVals);
+
+  // Verify that the target's LowerFormalArguments behaved as expected.
+  assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
+         "LowerFormalArguments didn't return a valid chain!");
+  assert(InVals.size() == Ins.size() &&
+         "LowerFormalArguments didn't emit the correct number of values!");
+  DEBUG({
+      for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+        assert(InVals[i].getNode() &&
+               "LowerFormalArguments emitted a null value!");
+        assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
+               "LowerFormalArguments emitted a value with the wrong type!");
+      }
+    });
+
+  // Update the DAG with the new chain value resulting from argument lowering.
+  DAG.setRoot(NewRoot);
+
+  // Set up the argument values.
+  unsigned i = 0;
+  Idx = 1;
+  if (!FuncInfo->CanLowerReturn) {
+    // Create a virtual register for the sret pointer, and put in a copy
+    // from the sret argument into it.
+    SmallVector<EVT, 1> ValueVTs;
+    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+    EVT VT = ValueVTs[0];
+    EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+    ISD::NodeType AssertOp = ISD::DELETED_NODE;
+    SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
+                                        RegVT, VT, AssertOp);
+
+    MachineFunction& MF = SDB->DAG.getMachineFunction();
+    MachineRegisterInfo& RegInfo = MF.getRegInfo();
+    unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
+    FuncInfo->DemoteRegister = SRetReg;
+    NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(),
+                                    SRetReg, ArgValue);
+    DAG.setRoot(NewRoot);
+
+    // i indexes lowered arguments.  Bump it past the hidden sret argument.
+    // Idx indexes LLVM arguments.  Don't touch it.
+    ++i;
+  }
+
+  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
+      ++I, ++Idx) {
+    SmallVector<SDValue, 4> ArgValues;
+    SmallVector<EVT, 4> ValueVTs;
+    ComputeValueVTs(TLI, I->getType(), ValueVTs);
+    unsigned NumValues = ValueVTs.size();
+
+    // If this argument is unused then remember its value. It is used to generate
+    // debugging information.
+    if (I->use_empty() && NumValues)
+      SDB->setUnusedArgValue(I, InVals[i]);
+
+    for (unsigned Value = 0; Value != NumValues; ++Value) {
+      EVT VT = ValueVTs[Value];
+      EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+      unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
+
+      if (!I->use_empty()) {
+        ISD::NodeType AssertOp = ISD::DELETED_NODE;
+        if (F.paramHasAttr(Idx, Attribute::SExt))
+          AssertOp = ISD::AssertSext;
+        else if (F.paramHasAttr(Idx, Attribute::ZExt))
+          AssertOp = ISD::AssertZext;
+
+        ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
+                                             NumParts, PartVT, VT,
+                                             AssertOp));
+      }
+
+      i += NumParts;
+    }
+
+    // Note down frame index for byval arguments.
+    if (I->hasByValAttr() && !ArgValues.empty())
+      if (FrameIndexSDNode *FI =
+          dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
+        FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex());
+
+    if (!I->use_empty()) {
+      SDValue Res;
+      if (!ArgValues.empty())
+        Res = DAG.getMergeValues(&ArgValues[0], NumValues,
+                                 SDB->getCurDebugLoc());
+      SDB->setValue(I, Res);
+
+      // If this argument is live outside of the entry block, insert a copy from
+      // whereever we got it to the vreg that other BB's will reference it as.
+      SDB->CopyToExportRegsIfNeeded(I);
+    }
+  }
+
+  assert(i == InVals.size() && "Argument register count mismatch!");
+
+  // Finally, if the target has anything special to do, allow it to do so.
+  // FIXME: this should insert code into the DAG!
+  EmitFunctionEntryCode();
+}
+
+/// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
+/// ensure constants are generated when needed.  Remember the virtual registers
+/// that need to be added to the Machine PHI nodes as input.  We cannot just
+/// directly add them, because expansion might result in multiple MBB's for one
+/// BB.  As such, the start of the BB might correspond to a different MBB than
+/// the end.
+///
+void
+SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
+  const TerminatorInst *TI = LLVMBB->getTerminator();
+
+  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
+
+  // Check successor nodes' PHI nodes that expect a constant to be available
+  // from this block.
+  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
+    const BasicBlock *SuccBB = TI->getSuccessor(succ);
+    if (!isa<PHINode>(SuccBB->begin())) continue;
+    MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
+
+    // If this terminator has multiple identical successors (common for
+    // switches), only handle each succ once.
+    if (!SuccsHandled.insert(SuccMBB)) continue;
+
+    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
+
+    // At this point we know that there is a 1-1 correspondence between LLVM PHI
+    // nodes and Machine PHI nodes, but the incoming operands have not been
+    // emitted yet.
+    for (BasicBlock::const_iterator I = SuccBB->begin();
+         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+      // Ignore dead phi's.
+      if (PN->use_empty()) continue;
+
+      unsigned Reg;
+      const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+
+      if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
+        unsigned &RegOut = ConstantsOut[C];
+        if (RegOut == 0) {
+          RegOut = FuncInfo.CreateRegs(C->getType());
+          CopyValueToVirtualRegister(C, RegOut);
+        }
+        Reg = RegOut;
+      } else {
+        DenseMap<const Value *, unsigned>::iterator I =
+          FuncInfo.ValueMap.find(PHIOp);
+        if (I != FuncInfo.ValueMap.end())
+          Reg = I->second;
+        else {
+          assert(isa<AllocaInst>(PHIOp) &&
+                 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
+                 "Didn't codegen value into a register!??");
+          Reg = FuncInfo.CreateRegs(PHIOp->getType());
+          CopyValueToVirtualRegister(PHIOp, Reg);
+        }
+      }
+
+      // Remember that this register needs to added to the machine PHI node as
+      // the input for this MBB.
+      SmallVector<EVT, 4> ValueVTs;
+      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
+      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
+        EVT VT = ValueVTs[vti];
+        unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
+        for (unsigned i = 0, e = NumRegisters; i != e; ++i)
+          FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
+        Reg += NumRegisters;
+      }
+    }
+  }
+  ConstantsOut.clear();
+}
diff --git a/final/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/final/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
new file mode 100644
index 00000000000..8f466d913bb
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -0,0 +1,543 @@
+//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements routines for translating from LLVM IR into SelectionDAG IR.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SELECTIONDAGBUILDER_H
+#define SELECTIONDAGBUILDER_H
+
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <vector>
+#include <set>
+
+namespace llvm {
+
+class AliasAnalysis;
+class AllocaInst;
+class BasicBlock;
+class BitCastInst;
+class BranchInst;
+class CallInst;
+class DbgValueInst;
+class ExtractElementInst;
+class ExtractValueInst;
+class FCmpInst;
+class FPExtInst;
+class FPToSIInst;
+class FPToUIInst;
+class FPTruncInst;
+class Function;
+class FunctionLoweringInfo;
+class GetElementPtrInst;
+class GCFunctionInfo;
+class ICmpInst;
+class IntToPtrInst;
+class IndirectBrInst;
+class InvokeInst;
+class InsertElementInst;
+class InsertValueInst;
+class Instruction;
+class LoadInst;
+class MachineBasicBlock;
+class MachineInstr;
+class MachineRegisterInfo;
+class MDNode;
+class PHINode;
+class PtrToIntInst;
+class ReturnInst;
+class SDISelAsmOperandInfo;
+class SDDbgValue;
+class SExtInst;
+class SelectInst;
+class ShuffleVectorInst;
+class SIToFPInst;
+class StoreInst;
+class SwitchInst;
+class TargetData;
+class TargetLowering;
+class TruncInst;
+class UIToFPInst;
+class UnreachableInst;
+class UnwindInst;
+class VAArgInst;
+class ZExtInst;
+
+//===----------------------------------------------------------------------===//
+/// SelectionDAGBuilder - This is the common target-independent lowering
+/// implementation that is parameterized by a TargetLowering object.
+///
+class SelectionDAGBuilder {
+  /// CurDebugLoc - current file + line number.  Changes as we build the DAG.
+  DebugLoc CurDebugLoc;
+
+  DenseMap<const Value*, SDValue> NodeMap;
+  
+  /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used
+  /// to preserve debug information for incoming arguments.
+  DenseMap<const Value*, SDValue> UnusedArgNodeMap;
+
+  /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap.
+  class DanglingDebugInfo {
+    const DbgValueInst* DI;
+    DebugLoc dl;
+    unsigned SDNodeOrder;
+  public:
+    DanglingDebugInfo() : DI(0), dl(DebugLoc()), SDNodeOrder(0) { }
+    DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) :
+      DI(di), dl(DL), SDNodeOrder(SDNO) { }
+    const DbgValueInst* getDI() { return DI; }
+    DebugLoc getdl() { return dl; }
+    unsigned getSDNodeOrder() { return SDNodeOrder; }
+  };
+
+  /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not
+  /// yet seen the referent.  We defer handling these until we do see it.
+  DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap;
+
+public:
+  /// PendingLoads - Loads are not emitted to the program immediately.  We bunch
+  /// them up and then emit token factor nodes when possible.  This allows us to
+  /// get simple disambiguation between loads without worrying about alias
+  /// analysis.
+  SmallVector<SDValue, 8> PendingLoads;
+private:
+
+  /// PendingExports - CopyToReg nodes that copy values to virtual registers
+  /// for export to other blocks need to be emitted before any terminator
+  /// instruction, but they have no other ordering requirements. We bunch them
+  /// up and the emit a single tokenfactor for them just before terminator
+  /// instructions.
+  SmallVector<SDValue, 8> PendingExports;
+
+  /// SDNodeOrder - A unique monotonically increasing number used to order the
+  /// SDNodes we create.
+  unsigned SDNodeOrder;
+
+  /// Case - A struct to record the Value for a switch case, and the
+  /// case's target basic block.
+  struct Case {
+    Constant* Low;
+    Constant* High;
+    MachineBasicBlock* BB;
+
+    Case() : Low(0), High(0), BB(0) { }
+    Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
+      Low(low), High(high), BB(bb) { }
+    APInt size() const {
+      const APInt &rHigh = cast<ConstantInt>(High)->getValue();
+      const APInt &rLow  = cast<ConstantInt>(Low)->getValue();
+      return (rHigh - rLow + 1ULL);
+    }
+  };
+
+  struct CaseBits {
+    uint64_t Mask;
+    MachineBasicBlock* BB;
+    unsigned Bits;
+
+    CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits):
+      Mask(mask), BB(bb), Bits(bits) { }
+  };
+
+  typedef std::vector<Case>           CaseVector;
+  typedef std::vector<CaseBits>       CaseBitsVector;
+  typedef CaseVector::iterator        CaseItr;
+  typedef std::pair<CaseItr, CaseItr> CaseRange;
+
+  /// CaseRec - A struct with ctor used in lowering switches to a binary tree
+  /// of conditional branches.
+  struct CaseRec {
+    CaseRec(MachineBasicBlock *bb, const Constant *lt, const Constant *ge,
+            CaseRange r) :
+    CaseBB(bb), LT(lt), GE(ge), Range(r) {}
+
+    /// CaseBB - The MBB in which to emit the compare and branch
+    MachineBasicBlock *CaseBB;
+    /// LT, GE - If nonzero, we know the current case value must be less-than or
+    /// greater-than-or-equal-to these Constants.
+    const Constant *LT;
+    const Constant *GE;
+    /// Range - A pair of iterators representing the range of case values to be
+    /// processed at this point in the binary search tree.
+    CaseRange Range;
+  };
+
+  typedef std::vector<CaseRec> CaseRecVector;
+
+  /// The comparison function for sorting the switch case values in the vector.
+  /// WARNING: Case ranges should be disjoint!
+  struct CaseCmp {
+    bool operator()(const Case &C1, const Case &C2) {
+      assert(isa<ConstantInt>(C1.Low) && isa<ConstantInt>(C2.High));
+      const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+      const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+      return CI1->getValue().slt(CI2->getValue());
+    }
+  };
+
+  struct CaseBitsCmp {
+    bool operator()(const CaseBits &C1, const CaseBits &C2) {
+      return C1.Bits > C2.Bits;
+    }
+  };
+
+  size_t Clusterify(CaseVector &Cases, const SwitchInst &SI);
+
+  /// CaseBlock - This structure is used to communicate between
+  /// SelectionDAGBuilder and SDISel for the code generation of additional basic
+  /// blocks needed by multi-case switch statements.
+  struct CaseBlock {
+    CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
+              const Value *cmpmiddle,
+              MachineBasicBlock *truebb, MachineBasicBlock *falsebb,
+              MachineBasicBlock *me)
+      : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+        TrueBB(truebb), FalseBB(falsebb), ThisBB(me) {}
+    // CC - the condition code to use for the case block's setcc node
+    ISD::CondCode CC;
+    // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
+    // Emit by default LHS op RHS. MHS is used for range comparisons:
+    // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
+    const Value *CmpLHS, *CmpMHS, *CmpRHS;
+    // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
+    MachineBasicBlock *TrueBB, *FalseBB;
+    // ThisBB - the block into which to emit the code for the setcc and branches
+    MachineBasicBlock *ThisBB;
+  };
+  struct JumpTable {
+    JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
+              MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
+  
+    /// Reg - the virtual register containing the index of the jump table entry
+    //. to jump to.
+    unsigned Reg;
+    /// JTI - the JumpTableIndex for this jump table in the function.
+    unsigned JTI;
+    /// MBB - the MBB into which to emit the code for the indirect jump.
+    MachineBasicBlock *MBB;
+    /// Default - the MBB of the default bb, which is a successor of the range
+    /// check MBB.  This is when updating PHI nodes in successors.
+    MachineBasicBlock *Default;
+  };
+  struct JumpTableHeader {
+    JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
+                    bool E = false):
+      First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {}
+    APInt First;
+    APInt Last;
+    const Value *SValue;
+    MachineBasicBlock *HeaderBB;
+    bool Emitted;
+  };
+  typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
+
+  struct BitTestCase {
+    BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr):
+      Mask(M), ThisBB(T), TargetBB(Tr) { }
+    uint64_t Mask;
+    MachineBasicBlock *ThisBB;
+    MachineBasicBlock *TargetBB;
+  };
+
+  typedef SmallVector<BitTestCase, 3> BitTestInfo;
+
+  struct BitTestBlock {
+    BitTestBlock(APInt F, APInt R, const Value* SV,
+                 unsigned Rg, EVT RgVT, bool E,
+                 MachineBasicBlock* P, MachineBasicBlock* D,
+                 const BitTestInfo& C):
+      First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
+      Parent(P), Default(D), Cases(C) { }
+    APInt First;
+    APInt Range;
+    const Value *SValue;
+    unsigned Reg;
+    EVT RegVT;
+    bool Emitted;
+    MachineBasicBlock *Parent;
+    MachineBasicBlock *Default;
+    BitTestInfo Cases;
+  };
+
+public:
+  // TLI - This is information that describes the available target features we
+  // need for lowering.  This indicates when operations are unavailable,
+  // implemented with a libcall, etc.
+  const TargetMachine &TM;
+  const TargetLowering &TLI;
+  SelectionDAG &DAG;
+  const TargetData *TD;
+  AliasAnalysis *AA;
+
+  /// SwitchCases - Vector of CaseBlock structures used to communicate
+  /// SwitchInst code generation information.
+  std::vector<CaseBlock> SwitchCases;
+  /// JTCases - Vector of JumpTable structures used to communicate
+  /// SwitchInst code generation information.
+  std::vector<JumpTableBlock> JTCases;
+  /// BitTestCases - Vector of BitTestBlock structures used to communicate
+  /// SwitchInst code generation information.
+  std::vector<BitTestBlock> BitTestCases;
+
+  // Emit PHI-node-operand constants only once even if used by multiple
+  // PHI nodes.
+  DenseMap<const Constant *, unsigned> ConstantsOut;
+
+  /// FuncInfo - Information about the function as a whole.
+  ///
+  FunctionLoweringInfo &FuncInfo;
+
+  /// OptLevel - What optimization level we're generating code for.
+  /// 
+  CodeGenOpt::Level OptLevel;
+  
+  /// GFI - Garbage collection metadata for the function.
+  GCFunctionInfo *GFI;
+
+  /// HasTailCall - This is set to true if a call in the current
+  /// block has been translated as a tail call. In this case,
+  /// no subsequent DAG nodes should be created.
+  ///
+  bool HasTailCall;
+
+  LLVMContext *Context;
+
+  SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
+                      CodeGenOpt::Level ol)
+    : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
+      DAG(dag), FuncInfo(funcinfo), OptLevel(ol),
+      HasTailCall(false), Context(dag.getContext()) {
+  }
+
+  void init(GCFunctionInfo *gfi, AliasAnalysis &aa);
+
+  /// clear - Clear out the current SelectionDAG and the associated
+  /// state and prepare this SelectionDAGBuilder object to be used
+  /// for a new block. This doesn't clear out information about
+  /// additional blocks that are needed to complete switch lowering
+  /// or PHI node updating; that information is cleared out as it is
+  /// consumed.
+  void clear();
+
+  /// getRoot - Return the current virtual root of the Selection DAG,
+  /// flushing any PendingLoad items. This must be done before emitting
+  /// a store or any other node that may need to be ordered after any
+  /// prior load instructions.
+  ///
+  SDValue getRoot();
+
+  /// getControlRoot - Similar to getRoot, but instead of flushing all the
+  /// PendingLoad items, flush all the PendingExports items. It is necessary
+  /// to do this before emitting a terminator instruction.
+  ///
+  SDValue getControlRoot();
+
+  DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
+
+  unsigned getSDNodeOrder() const { return SDNodeOrder; }
+
+  void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
+
+  /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten
+  /// from how the code appeared in the source. The ordering is used by the
+  /// scheduler to effectively turn off scheduling.
+  void AssignOrderingToNode(const SDNode *Node);
+
+  void visit(const Instruction &I);
+
+  void visit(unsigned Opcode, const User &I);
+
+  // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
+  // generate the debug data structures now that we've seen its definition.
+  void resolveDanglingDebugInfo(const Value *V, SDValue Val);
+  SDValue getValue(const Value *V);
+  SDValue getNonRegisterValue(const Value *V);
+  SDValue getValueImpl(const Value *V);
+
+  void setValue(const Value *V, SDValue NewN) {
+    SDValue &N = NodeMap[V];
+    assert(N.getNode() == 0 && "Already set a value for this node!");
+    N = NewN;
+  }
+  
+  void setUnusedArgValue(const Value *V, SDValue NewN) {
+    SDValue &N = UnusedArgNodeMap[V];
+    assert(N.getNode() == 0 && "Already set a value for this node!");
+    N = NewN;
+  }
+  
+  void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
+                            std::set<unsigned> &OutputRegs, 
+                            std::set<unsigned> &InputRegs);
+
+  void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB,
+                            MachineBasicBlock *FBB, MachineBasicBlock *CurBB,
+                            MachineBasicBlock *SwitchBB, unsigned Opc);
+  void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB,
+                                    MachineBasicBlock *FBB,
+                                    MachineBasicBlock *CurBB,
+                                    MachineBasicBlock *SwitchBB);
+  bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
+  bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
+  void CopyToExportRegsIfNeeded(const Value *V);
+  void ExportFromCurrentBlock(const Value *V);
+  void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
+                   MachineBasicBlock *LandingPad = NULL);
+
+  /// UpdateSplitBlock - When an MBB was split during scheduling, update the
+  /// references that ned to refer to the last resulting block.
+  void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
+
+private:
+  // Terminator instructions.
+  void visitRet(const ReturnInst &I);
+  void visitBr(const BranchInst &I);
+  void visitSwitch(const SwitchInst &I);
+  void visitIndirectBr(const IndirectBrInst &I);
+  void visitUnreachable(const UnreachableInst &I) { /* noop */ }
+
+  // Helpers for visitSwitch
+  bool handleSmallSwitchRange(CaseRec& CR,
+                              CaseRecVector& WorkList,
+                              const Value* SV,
+                              MachineBasicBlock* Default,
+                              MachineBasicBlock *SwitchBB);
+  bool handleJTSwitchCase(CaseRec& CR,
+                          CaseRecVector& WorkList,
+                          const Value* SV,
+                          MachineBasicBlock* Default,
+                          MachineBasicBlock *SwitchBB);
+  bool handleBTSplitSwitchCase(CaseRec& CR,
+                               CaseRecVector& WorkList,
+                               const Value* SV,
+                               MachineBasicBlock* Default,
+                               MachineBasicBlock *SwitchBB);
+  bool handleBitTestsSwitchCase(CaseRec& CR,
+                                CaseRecVector& WorkList,
+                                const Value* SV,
+                                MachineBasicBlock* Default,
+                                MachineBasicBlock *SwitchBB);
+public:
+  void visitSwitchCase(CaseBlock &CB,
+                       MachineBasicBlock *SwitchBB);
+  void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
+  void visitBitTestCase(BitTestBlock &BB,
+                        MachineBasicBlock* NextMBB,
+                        unsigned Reg,
+                        BitTestCase &B,
+                        MachineBasicBlock *SwitchBB);
+  void visitJumpTable(JumpTable &JT);
+  void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
+                            MachineBasicBlock *SwitchBB);
+  
+private:
+  // These all get lowered before this pass.
+  void visitInvoke(const InvokeInst &I);
+  void visitUnwind(const UnwindInst &I);
+
+  void visitBinary(const User &I, unsigned OpCode);
+  void visitShift(const User &I, unsigned Opcode);
+  void visitAdd(const User &I)  { visitBinary(I, ISD::ADD); }
+  void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
+  void visitSub(const User &I)  { visitBinary(I, ISD::SUB); }
+  void visitFSub(const User &I);
+  void visitMul(const User &I)  { visitBinary(I, ISD::MUL); }
+  void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
+  void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
+  void visitSRem(const User &I) { visitBinary(I, ISD::SREM); }
+  void visitFRem(const User &I) { visitBinary(I, ISD::FREM); }
+  void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); }
+  void visitSDiv(const User &I) { visitBinary(I, ISD::SDIV); }
+  void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); }
+  void visitAnd (const User &I) { visitBinary(I, ISD::AND); }
+  void visitOr  (const User &I) { visitBinary(I, ISD::OR); }
+  void visitXor (const User &I) { visitBinary(I, ISD::XOR); }
+  void visitShl (const User &I) { visitShift(I, ISD::SHL); }
+  void visitLShr(const User &I) { visitShift(I, ISD::SRL); }
+  void visitAShr(const User &I) { visitShift(I, ISD::SRA); }
+  void visitICmp(const User &I);
+  void visitFCmp(const User &I);
+  // Visit the conversion instructions
+  void visitTrunc(const User &I);
+  void visitZExt(const User &I);
+  void visitSExt(const User &I);
+  void visitFPTrunc(const User &I);
+  void visitFPExt(const User &I);
+  void visitFPToUI(const User &I);
+  void visitFPToSI(const User &I);
+  void visitUIToFP(const User &I);
+  void visitSIToFP(const User &I);
+  void visitPtrToInt(const User &I);
+  void visitIntToPtr(const User &I);
+  void visitBitCast(const User &I);
+
+  void visitExtractElement(const User &I);
+  void visitInsertElement(const User &I);
+  void visitShuffleVector(const User &I);
+
+  void visitExtractValue(const ExtractValueInst &I);
+  void visitInsertValue(const InsertValueInst &I);
+
+  void visitGetElementPtr(const User &I);
+  void visitSelect(const User &I);
+
+  void visitAlloca(const AllocaInst &I);
+  void visitLoad(const LoadInst &I);
+  void visitStore(const StoreInst &I);
+  void visitPHI(const PHINode &I);
+  void visitCall(const CallInst &I);
+  bool visitMemCmpCall(const CallInst &I);
+  
+  void visitInlineAsm(ImmutableCallSite CS);
+  const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
+  void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
+
+  void visitPow(const CallInst &I);
+  void visitExp2(const CallInst &I);
+  void visitExp(const CallInst &I);
+  void visitLog(const CallInst &I);
+  void visitLog2(const CallInst &I);
+  void visitLog10(const CallInst &I);
+
+  void visitVAStart(const CallInst &I);
+  void visitVAArg(const VAArgInst &I);
+  void visitVAEnd(const CallInst &I);
+  void visitVACopy(const CallInst &I);
+
+  void visitUserOp1(const Instruction &I) {
+    llvm_unreachable("UserOp1 should not exist at instruction selection time!");
+  }
+  void visitUserOp2(const Instruction &I) {
+    llvm_unreachable("UserOp2 should not exist at instruction selection time!");
+  }
+  
+  const char *implVisitBinaryAtomic(const CallInst& I, ISD::NodeType Op);
+  const char *implVisitAluOverflow(const CallInst &I, ISD::NodeType Op);
+
+  void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB);
+
+  /// EmitFuncArgumentDbgValue - If V is an function argument then create
+  /// corresponding DBG_VALUE machine instruction for it now. At the end of 
+  /// instruction selection, they will be inserted to the entry BB.
+  bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
+                                int64_t Offset, const SDValue &N);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/final/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
new file mode 100644
index 00000000000..8387bde46e3
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -0,0 +1,2783 @@
+//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAGISel class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel"
+#include "ScheduleDAGSDNodes.h"
+#include "SelectionDAGBuilder.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
+STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
+STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+
+#ifndef NDEBUG
+STATISTIC(NumBBWithOutOfOrderLineInfo,
+          "Number of blocks with out of order line number info");
+STATISTIC(NumMBBWithOutOfOrderLineInfo,
+          "Number of machine blocks with out of order line number info");
+#endif
+
+static cl::opt<bool>
+EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
+          cl::desc("Enable verbose messages in the \"fast\" "
+                   "instruction selector"));
+static cl::opt<bool>
+EnableFastISelAbort("fast-isel-abort", cl::Hidden,
+          cl::desc("Enable abort calls when \"fast\" instruction fails"));
+
+#ifndef NDEBUG
+static cl::opt<bool>
+ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
+          cl::desc("Pop up a window to show dags before the first "
+                   "dag combine pass"));
+static cl::opt<bool>
+ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden,
+          cl::desc("Pop up a window to show dags before legalize types"));
+static cl::opt<bool>
+ViewLegalizeDAGs("view-legalize-dags", cl::Hidden,
+          cl::desc("Pop up a window to show dags before legalize"));
+static cl::opt<bool>
+ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden,
+          cl::desc("Pop up a window to show dags before the second "
+                   "dag combine pass"));
+static cl::opt<bool>
+ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden,
+          cl::desc("Pop up a window to show dags before the post legalize types"
+                   " dag combine pass"));
+static cl::opt<bool>
+ViewISelDAGs("view-isel-dags", cl::Hidden,
+          cl::desc("Pop up a window to show isel dags as they are selected"));
+static cl::opt<bool>
+ViewSchedDAGs("view-sched-dags", cl::Hidden,
+          cl::desc("Pop up a window to show sched dags as they are processed"));
+static cl::opt<bool>
+ViewSUnitDAGs("view-sunit-dags", cl::Hidden,
+      cl::desc("Pop up a window to show SUnit dags after they are processed"));
+#else
+static const bool ViewDAGCombine1 = false,
+                  ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false,
+                  ViewDAGCombine2 = false,
+                  ViewDAGCombineLT = false,
+                  ViewISelDAGs = false, ViewSchedDAGs = false,
+                  ViewSUnitDAGs = false;
+#endif
+
+//===---------------------------------------------------------------------===//
+///
+/// RegisterScheduler class - Track the registration of instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+MachinePassRegistry RegisterScheduler::Registry;
+
+//===---------------------------------------------------------------------===//
+///
+/// ISHeuristic command line option for instruction schedulers.
+///
+//===---------------------------------------------------------------------===//
+static cl::opt<RegisterScheduler::FunctionPassCtor, false,
+               RegisterPassParser<RegisterScheduler> >
+ISHeuristic("pre-RA-sched",
+            cl::init(&createDefaultScheduler),
+            cl::desc("Instruction schedulers available (before register"
+                     " allocation):"));
+
+static RegisterScheduler
+defaultListDAGScheduler("default", "Best scheduler for the target",
+                        createDefaultScheduler);
+
+namespace llvm {
+  //===--------------------------------------------------------------------===//
+  /// createDefaultScheduler - This creates an instruction scheduler appropriate
+  /// for the target.
+  ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS,
+                                             CodeGenOpt::Level OptLevel) {
+    const TargetLowering &TLI = IS->getTargetLowering();
+
+    if (OptLevel == CodeGenOpt::None)
+      return createSourceListDAGScheduler(IS, OptLevel);
+    if (TLI.getSchedulingPreference() == Sched::Latency)
+      return createTDListDAGScheduler(IS, OptLevel);
+    if (TLI.getSchedulingPreference() == Sched::RegPressure)
+      return createBURRListDAGScheduler(IS, OptLevel);
+    if (TLI.getSchedulingPreference() == Sched::Hybrid)
+      return createHybridListDAGScheduler(IS, OptLevel);
+    assert(TLI.getSchedulingPreference() == Sched::ILP &&
+           "Unknown sched type!");
+    return createILPListDAGScheduler(IS, OptLevel);
+  }
+}
+
+// EmitInstrWithCustomInserter - This method should be implemented by targets
+// that mark instructions with the 'usesCustomInserter' flag.  These
+// instructions are special in various ways, which require special support to
+// insert.  The specified MachineInstr is created but not inserted into any
+// basic blocks, and this method is called to expand it into a sequence of
+// instructions, potentially also creating new basic blocks and control flow.
+// When new basic blocks are inserted and the edges from MBB to its successors
+// are modified, the method should insert pairs of <OldSucc, NewSucc> into the
+// DenseMap.
+MachineBasicBlock *
+TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                            MachineBasicBlock *MBB) const {
+#ifndef NDEBUG
+  dbgs() << "If a target marks an instruction with "
+          "'usesCustomInserter', it must implement "
+          "TargetLowering::EmitInstrWithCustomInserter!";
+#endif
+  llvm_unreachable(0);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// SelectionDAGISel code
+//===----------------------------------------------------------------------===//
+
+SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
+                                   CodeGenOpt::Level OL) :
+  MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
+  FuncInfo(new FunctionLoweringInfo(TLI)),
+  CurDAG(new SelectionDAG(tm)),
+  SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
+  GFI(),
+  OptLevel(OL),
+  DAGSize(0) {
+    initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+    initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
+  }
+
+SelectionDAGISel::~SelectionDAGISel() {
+  delete SDB;
+  delete CurDAG;
+  delete FuncInfo;
+}
+
+void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AliasAnalysis>();
+  AU.addPreserved<AliasAnalysis>();
+  AU.addRequired<GCModuleInfo>();
+  AU.addPreserved<GCModuleInfo>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// FunctionCallsSetJmp - Return true if the function has a call to setjmp or
+/// other function that gcc recognizes as "returning twice". This is used to
+/// limit code-gen optimizations on the machine function.
+///
+/// FIXME: Remove after <rdar://problem/8031714> is fixed.
+static bool FunctionCallsSetJmp(const Function *F) {
+  const Module *M = F->getParent();
+  static const char *ReturnsTwiceFns[] = {
+    "_setjmp",
+    "setjmp",
+    "sigsetjmp",
+    "setjmp_syscall",
+    "savectx",
+    "qsetjmp",
+    "vfork",
+    "getcontext"
+  };
+#define NUM_RETURNS_TWICE_FNS sizeof(ReturnsTwiceFns) / sizeof(const char *)
+
+  for (unsigned I = 0; I < NUM_RETURNS_TWICE_FNS; ++I)
+    if (const Function *Callee = M->getFunction(ReturnsTwiceFns[I])) {
+      if (!Callee->use_empty())
+        for (Value::const_use_iterator
+               I = Callee->use_begin(), E = Callee->use_end();
+             I != E; ++I)
+          if (const CallInst *CI = dyn_cast<CallInst>(*I))
+            if (CI->getParent()->getParent() == F)
+              return true;
+    }
+
+  return false;
+#undef NUM_RETURNS_TWICE_FNS
+}
+
+/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
+/// may trap on it.  In this case we have to split the edge so that the path
+/// through the predecessor block that doesn't go to the phi block doesn't
+/// execute the possibly trapping instruction.
+///
+/// This is required for correctness, so it must be done at -O0.
+///
+static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
+  // Loop for blocks with phi nodes.
+  for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+    PHINode *PN = dyn_cast<PHINode>(BB->begin());
+    if (PN == 0) continue;
+
+  ReprocessBlock:
+    // For each block with a PHI node, check to see if any of the input values
+    // are potentially trapping constant expressions.  Constant expressions are
+    // the only potentially trapping value that can occur as the argument to a
+    // PHI.
+    for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
+        if (CE == 0 || !CE->canTrap()) continue;
+
+        // The only case we have to worry about is when the edge is critical.
+        // Since this block has a PHI Node, we assume it has multiple input
+        // edges: check to see if the pred has multiple successors.
+        BasicBlock *Pred = PN->getIncomingBlock(i);
+        if (Pred->getTerminator()->getNumSuccessors() == 1)
+          continue;
+
+        // Okay, we have to split this edge.
+        SplitCriticalEdge(Pred->getTerminator(),
+                          GetSuccessorNumber(Pred, BB), SDISel, true);
+        goto ReprocessBlock;
+      }
+  }
+}
+
+bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+  // Do some sanity-checking on the command-line options.
+  assert((!EnableFastISelVerbose || EnableFastISel) &&
+         "-fast-isel-verbose requires -fast-isel");
+  assert((!EnableFastISelAbort || EnableFastISel) &&
+         "-fast-isel-abort requires -fast-isel");
+
+  const Function &Fn = *mf.getFunction();
+  const TargetInstrInfo &TII = *TM.getInstrInfo();
+  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+  MF = &mf;
+  RegInfo = &MF->getRegInfo();
+  AA = &getAnalysis<AliasAnalysis>();
+  GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : 0;
+
+  DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+
+  SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
+
+  CurDAG->init(*MF);
+  FuncInfo->set(Fn, *MF);
+  SDB->init(GFI, *AA);
+
+  SelectAllBasicBlocks(Fn);
+
+  // If the first basic block in the function has live ins that need to be
+  // copied into vregs, emit the copies into the top of the block before
+  // emitting the code for the block.
+  MachineBasicBlock *EntryMBB = MF->begin();
+  RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII);
+
+  DenseMap<unsigned, unsigned> LiveInMap;
+  if (!FuncInfo->ArgDbgValues.empty())
+    for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
+           E = RegInfo->livein_end(); LI != E; ++LI)
+      if (LI->second)
+        LiveInMap.insert(std::make_pair(LI->first, LI->second));
+
+  // Insert DBG_VALUE instructions for function arguments to the entry block.
+  for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
+    MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
+    unsigned Reg = MI->getOperand(0).getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+      EntryMBB->insert(EntryMBB->begin(), MI);
+    else {
+      MachineInstr *Def = RegInfo->getVRegDef(Reg);
+      MachineBasicBlock::iterator InsertPos = Def;
+      // FIXME: VR def may not be in entry block.
+      Def->getParent()->insert(llvm::next(InsertPos), MI);
+    }
+
+    // If Reg is live-in then update debug info to track its copy in a vreg.
+    DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg);
+    if (LDI != LiveInMap.end()) {
+      MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
+      MachineBasicBlock::iterator InsertPos = Def;
+      const MDNode *Variable =
+        MI->getOperand(MI->getNumOperands()-1).getMetadata();
+      unsigned Offset = MI->getOperand(1).getImm();
+      // Def is never a terminator here, so it is ok to increment InsertPos.
+      BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
+              TII.get(TargetOpcode::DBG_VALUE))
+        .addReg(LDI->second, RegState::Debug)
+        .addImm(Offset).addMetadata(Variable);
+
+      // If this vreg is directly copied into an exported register then
+      // that COPY instructions also need DBG_VALUE, if it is the only
+      // user of LDI->second.
+      MachineInstr *CopyUseMI = NULL;
+      for (MachineRegisterInfo::use_iterator
+             UI = RegInfo->use_begin(LDI->second);
+           MachineInstr *UseMI = UI.skipInstruction();) {
+        if (UseMI->isDebugValue()) continue;
+        if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
+          CopyUseMI = UseMI; continue;
+        }
+        // Otherwise this is another use or second copy use.
+        CopyUseMI = NULL; break;
+      }
+      if (CopyUseMI) {
+        MachineInstr *NewMI =
+          BuildMI(*MF, CopyUseMI->getDebugLoc(),
+                  TII.get(TargetOpcode::DBG_VALUE))
+          .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
+          .addImm(Offset).addMetadata(Variable);
+        EntryMBB->insertAfter(CopyUseMI, NewMI);
+      }
+    }
+  }
+
+  // Determine if there are any calls in this machine function.
+  MachineFrameInfo *MFI = MF->getFrameInfo();
+  if (!MFI->hasCalls()) {
+    for (MachineFunction::const_iterator
+           I = MF->begin(), E = MF->end(); I != E; ++I) {
+      const MachineBasicBlock *MBB = I;
+      for (MachineBasicBlock::const_iterator
+             II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
+        const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode());
+
+        if ((TID.isCall() && !TID.isReturn()) ||
+            II->isStackAligningInlineAsm()) {
+          MFI->setHasCalls(true);
+          goto done;
+        }
+      }
+    }
+  done:;
+  }
+
+  // Determine if there is a call to setjmp in the machine function.
+  MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn));
+
+  // Replace forward-declared registers with the registers containing
+  // the desired value.
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  for (DenseMap<unsigned, unsigned>::iterator
+       I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end();
+       I != E; ++I) {
+    unsigned From = I->first;
+    unsigned To = I->second;
+    // If To is also scheduled to be replaced, find what its ultimate
+    // replacement is.
+    for (;;) {
+      DenseMap<unsigned, unsigned>::iterator J =
+        FuncInfo->RegFixups.find(To);
+      if (J == E) break;
+      To = J->second;
+    }
+    // Replace it.
+    MRI.replaceRegWith(From, To);
+  }
+
+  // Release function-specific state. SDB and CurDAG are already cleared
+  // at this point.
+  FuncInfo->clear();
+
+  return true;
+}
+
+void
+SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
+                                   BasicBlock::const_iterator End,
+                                   bool &HadTailCall) {
+  // Lower all of the non-terminator instructions. If a call is emitted
+  // as a tail call, cease emitting nodes for this block. Terminators
+  // are handled below.
+  for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I)
+    SDB->visit(*I);
+
+  // Make sure the root of the DAG is up-to-date.
+  CurDAG->setRoot(SDB->getControlRoot());
+  HadTailCall = SDB->HasTailCall;
+  SDB->clear();
+
+  // Final step, emit the lowered DAG as machine code.
+  CodeGenAndEmitDAG();
+  return;
+}
+
+void SelectionDAGISel::ComputeLiveOutVRegInfo() {
+  SmallPtrSet<SDNode*, 128> VisitedNodes;
+  SmallVector<SDNode*, 128> Worklist;
+
+  Worklist.push_back(CurDAG->getRoot().getNode());
+
+  APInt Mask;
+  APInt KnownZero;
+  APInt KnownOne;
+
+  do {
+    SDNode *N = Worklist.pop_back_val();
+
+    // If we've already seen this node, ignore it.
+    if (!VisitedNodes.insert(N))
+      continue;
+
+    // Otherwise, add all chain operands to the worklist.
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+      if (N->getOperand(i).getValueType() == MVT::Other)
+        Worklist.push_back(N->getOperand(i).getNode());
+
+    // If this is a CopyToReg with a vreg dest, process it.
+    if (N->getOpcode() != ISD::CopyToReg)
+      continue;
+
+    unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+      continue;
+
+    // Ignore non-scalar or non-integer values.
+    SDValue Src = N->getOperand(2);
+    EVT SrcVT = Src.getValueType();
+    if (!SrcVT.isInteger() || SrcVT.isVector())
+      continue;
+
+    unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src);
+    Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits());
+    CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne);
+    FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne);
+  } while (!Worklist.empty());
+}
+
+void SelectionDAGISel::CodeGenAndEmitDAG() {
+  std::string GroupName;
+  if (TimePassesIsEnabled)
+    GroupName = "Instruction Selection and Scheduling";
+  std::string BlockName;
+  if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs ||
+      ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
+      ViewSUnitDAGs)
+    BlockName = MF->getFunction()->getNameStr() + ":" +
+                FuncInfo->MBB->getBasicBlock()->getNameStr();
+
+  DEBUG(dbgs() << "Initial selection DAG:\n"; CurDAG->dump());
+
+  if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
+
+  // Run the DAG combiner in pre-legalize mode.
+  {
+    NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled);
+    CurDAG->Combine(Unrestricted, *AA, OptLevel);
+  }
+
+  DEBUG(dbgs() << "Optimized lowered selection DAG:\n"; CurDAG->dump());
+
+  // Second step, hack on the DAG until it only uses operations and types that
+  // the target supports.
+  if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " +
+                                               BlockName);
+
+  bool Changed;
+  {
+    NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled);
+    Changed = CurDAG->LegalizeTypes();
+  }
+
+  DEBUG(dbgs() << "Type-legalized selection DAG:\n"; CurDAG->dump());
+
+  if (Changed) {
+    if (ViewDAGCombineLT)
+      CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
+
+    // Run the DAG combiner in post-type-legalize mode.
+    {
+      NamedRegionTimer T("DAG Combining after legalize types", GroupName,
+                         TimePassesIsEnabled);
+      CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
+    }
+
+    DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n";
+          CurDAG->dump());
+  }
+
+  {
+    NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled);
+    Changed = CurDAG->LegalizeVectors();
+  }
+
+  if (Changed) {
+    {
+      NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled);
+      CurDAG->LegalizeTypes();
+    }
+
+    if (ViewDAGCombineLT)
+      CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
+
+    // Run the DAG combiner in post-type-legalize mode.
+    {
+      NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
+                         TimePassesIsEnabled);
+      CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+    }
+
+    DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n";
+          CurDAG->dump());
+  }
+
+  if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
+
+  {
+    NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
+    CurDAG->Legalize(OptLevel);
+  }
+
+  DEBUG(dbgs() << "Legalized selection DAG:\n"; CurDAG->dump());
+
+  if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
+
+  // Run the DAG combiner in post-legalize mode.
+  {
+    NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
+    CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+  }
+
+  DEBUG(dbgs() << "Optimized legalized selection DAG:\n"; CurDAG->dump());
+
+  if (OptLevel != CodeGenOpt::None)
+    ComputeLiveOutVRegInfo();
+
+  if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
+
+  // Third, instruction select all of the operations to machine code, adding the
+  // code to the MachineBasicBlock.
+  {
+    NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled);
+    DoInstructionSelection();
+  }
+
+  DEBUG(dbgs() << "Selected selection DAG:\n"; CurDAG->dump());
+
+  if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
+
+  // Schedule machine code.
+  ScheduleDAGSDNodes *Scheduler = CreateScheduler();
+  {
+    NamedRegionTimer T("Instruction Scheduling", GroupName,
+                       TimePassesIsEnabled);
+    Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt);
+  }
+
+  if (ViewSUnitDAGs) Scheduler->viewGraph();
+
+  // Emit machine code to BB.  This can change 'BB' to the last block being
+  // inserted into.
+  MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
+  {
+    NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
+
+    LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();
+    FuncInfo->InsertPt = Scheduler->InsertPos;
+  }
+
+  // If the block was split, make sure we update any references that are used to
+  // update PHI nodes later on.
+  if (FirstMBB != LastMBB)
+    SDB->UpdateSplitBlock(FirstMBB, LastMBB);
+
+  // Free the scheduler state.
+  {
+    NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
+                       TimePassesIsEnabled);
+    delete Scheduler;
+  }
+
+  // Free the SelectionDAG state, now that we're finished with it.
+  CurDAG->clear();
+}
+
+void SelectionDAGISel::DoInstructionSelection() {
+  DEBUG(errs() << "===== Instruction selection begins:\n");
+
+  PreprocessISelDAG();
+
+  // Select target instructions for the DAG.
+  {
+    // Number all nodes with a topological order and set DAGSize.
+    DAGSize = CurDAG->AssignTopologicalOrder();
+
+    // Create a dummy node (which is not added to allnodes), that adds
+    // a reference to the root node, preventing it from being deleted,
+    // and tracking any changes of the root.
+    HandleSDNode Dummy(CurDAG->getRoot());
+    ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode());
+    ++ISelPosition;
+
+    // The AllNodes list is now topological-sorted. Visit the
+    // nodes by starting at the end of the list (the root of the
+    // graph) and preceding back toward the beginning (the entry
+    // node).
+    while (ISelPosition != CurDAG->allnodes_begin()) {
+      SDNode *Node = --ISelPosition;
+      // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
+      // but there are currently some corner cases that it misses. Also, this
+      // makes it theoretically possible to disable the DAGCombiner.
+      if (Node->use_empty())
+        continue;
+
+      SDNode *ResNode = Select(Node);
+
+      // FIXME: This is pretty gross.  'Select' should be changed to not return
+      // anything at all and this code should be nuked with a tactical strike.
+
+      // If node should not be replaced, continue with the next one.
+      if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
+        continue;
+      // Replace node.
+      if (ResNode)
+        ReplaceUses(Node, ResNode);
+
+      // If after the replacement this node is not used any more,
+      // remove this dead node.
+      if (Node->use_empty()) { // Don't delete EntryToken, etc.
+        ISelUpdater ISU(ISelPosition);
+        CurDAG->RemoveDeadNode(Node, &ISU);
+      }
+    }
+
+    CurDAG->setRoot(Dummy.getValue());
+  }
+
+  DEBUG(errs() << "===== Instruction selection ends:\n");
+
+  PostprocessISelDAG();
+}
+
+/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
+/// do other setup for EH landing-pad blocks.
+void SelectionDAGISel::PrepareEHLandingPad() {
+  // Add a label to mark the beginning of the landing pad.  Deletion of the
+  // landing pad can thus be detected via the MachineModuleInfo.
+  MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB);
+
+  const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
+  BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
+    .addSym(Label);
+
+  // Mark exception register as live in.
+  unsigned Reg = TLI.getExceptionAddressRegister();
+  if (Reg) FuncInfo->MBB->addLiveIn(Reg);
+
+  // Mark exception selector register as live in.
+  Reg = TLI.getExceptionSelectorRegister();
+  if (Reg) FuncInfo->MBB->addLiveIn(Reg);
+
+  // FIXME: Hack around an exception handling flaw (PR1508): the personality
+  // function and list of typeids logically belong to the invoke (or, if you
+  // like, the basic block containing the invoke), and need to be associated
+  // with it in the dwarf exception handling tables.  Currently however the
+  // information is provided by an intrinsic (eh.selector) that can be moved
+  // to unexpected places by the optimizers: if the unwind edge is critical,
+  // then breaking it can result in the intrinsics being in the successor of
+  // the landing pad, not the landing pad itself.  This results
+  // in exceptions not being caught because no typeids are associated with
+  // the invoke.  This may not be the only way things can go wrong, but it
+  // is the only way we try to work around for the moment.
+  const BasicBlock *LLVMBB = FuncInfo->MBB->getBasicBlock();
+  const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
+
+  if (Br && Br->isUnconditional()) { // Critical edge?
+    BasicBlock::const_iterator I, E;
+    for (I = LLVMBB->begin(), E = --LLVMBB->end(); I != E; ++I)
+      if (isa<EHSelectorInst>(I))
+        break;
+
+    if (I == E)
+      // No catch info found - try to extract some from the successor.
+      CopyCatchInfo(Br->getSuccessor(0), LLVMBB, &MF->getMMI(), *FuncInfo);
+  }
+}
+
+
+
+
+bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+                                             FastISel *FastIS) {
+  // Don't try to fold volatile loads.  Target has to deal with alignment
+  // constraints.
+  if (LI->isVolatile()) return false;
+
+  // Figure out which vreg this is going into.
+  unsigned LoadReg = FastIS->getRegForValue(LI);
+  assert(LoadReg && "Load isn't already assigned a vreg? ");
+
+  // Check to see what the uses of this vreg are.  If it has no uses, or more
+  // than one use (at the machine instr level) then we can't fold it.
+  MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
+  if (RI == RegInfo->reg_end())
+    return false;
+
+  // See if there is exactly one use of the vreg.  If there are multiple uses,
+  // then the instruction got lowered to multiple machine instructions or the
+  // use of the loaded value ended up being multiple operands of the result, in
+  // either case, we can't fold this.
+  MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
+  if (PostRI != RegInfo->reg_end())
+    return false;
+
+  assert(RI.getOperand().isUse() &&
+         "The only use of the vreg must be a use, we haven't emitted the def!");
+
+  MachineInstr *User = &*RI;
+
+  // Set the insertion point properly.  Folding the load can cause generation of
+  // other random instructions (like sign extends) for addressing modes, make
+  // sure they get inserted in a logical place before the new instruction.
+  FuncInfo->InsertPt = User;
+  FuncInfo->MBB = User->getParent();
+
+  // Ask the target to try folding the load.
+  return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
+}
+
+#ifndef NDEBUG
+/// CheckLineNumbers - Check if basic block instructions follow source order
+/// or not.
+static void CheckLineNumbers(const BasicBlock *BB) {
+  unsigned Line = 0;
+  unsigned Col = 0;
+  for (BasicBlock::const_iterator BI = BB->begin(),
+         BE = BB->end(); BI != BE; ++BI) {
+    const DebugLoc DL = BI->getDebugLoc();
+    if (DL.isUnknown()) continue;
+    unsigned L = DL.getLine();
+    unsigned C = DL.getCol();
+    if (L < Line || (L == Line && C < Col)) {
+      ++NumBBWithOutOfOrderLineInfo;
+      return;
+    }
+    Line = L;
+    Col = C;
+  }
+}
+
+/// CheckLineNumbers - Check if machine basic block instructions follow source
+/// order or not.
+static void CheckLineNumbers(const MachineBasicBlock *MBB) {
+  unsigned Line = 0;
+  unsigned Col = 0;
+  for (MachineBasicBlock::const_iterator MBI = MBB->begin(),
+         MBE = MBB->end(); MBI != MBE; ++MBI) {
+    const DebugLoc DL = MBI->getDebugLoc();
+    if (DL.isUnknown()) continue;
+    unsigned L = DL.getLine();
+    unsigned C = DL.getCol();
+    if (L < Line || (L == Line && C < Col)) {
+      ++NumMBBWithOutOfOrderLineInfo;
+      return;
+    }
+    Line = L;
+    Col = C;
+  }
+}
+#endif
+
+void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
+  // Initialize the Fast-ISel state, if needed.
+  FastISel *FastIS = 0;
+  if (EnableFastISel)
+    FastIS = TLI.createFastISel(*FuncInfo);
+
+  // Iterate over all basic blocks in the function.
+  ReversePostOrderTraversal<const Function*> RPOT(&Fn);
+  for (ReversePostOrderTraversal<const Function*>::rpo_iterator
+       I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
+    const BasicBlock *LLVMBB = *I;
+#ifndef NDEBUG
+    CheckLineNumbers(LLVMBB);
+#endif
+
+    if (OptLevel != CodeGenOpt::None) {
+      bool AllPredsVisited = true;
+      for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
+           PI != PE; ++PI) {
+        if (!FuncInfo->VisitedBBs.count(*PI)) {
+          AllPredsVisited = false;
+          break;
+        }
+      }
+
+      if (AllPredsVisited) {
+        for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end();
+             I != E && isa<PHINode>(I); ++I) {
+          FuncInfo->ComputePHILiveOutRegInfo(cast<PHINode>(I));
+        }
+      } else {
+        for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end();
+             I != E && isa<PHINode>(I); ++I) {
+          FuncInfo->InvalidatePHILiveOutRegInfo(cast<PHINode>(I));
+        }
+      }
+
+      FuncInfo->VisitedBBs.insert(LLVMBB);
+    }
+
+    FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
+    FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
+
+    BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
+    BasicBlock::const_iterator const End = LLVMBB->end();
+    BasicBlock::const_iterator BI = End;
+
+    FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
+
+    // Setup an EH landing-pad block.
+    if (FuncInfo->MBB->isLandingPad())
+      PrepareEHLandingPad();
+
+    // Lower any arguments needed in this block if this is the entry block.
+    if (LLVMBB == &Fn.getEntryBlock())
+      LowerArguments(LLVMBB);
+
+    // Before doing SelectionDAG ISel, see if FastISel has been requested.
+    if (FastIS) {
+      FastIS->startNewBlock();
+
+      // Emit code for any incoming arguments. This must happen before
+      // beginning FastISel on the entry block.
+      if (LLVMBB == &Fn.getEntryBlock()) {
+        CurDAG->setRoot(SDB->getControlRoot());
+        SDB->clear();
+        CodeGenAndEmitDAG();
+
+        // If we inserted any instructions at the beginning, make a note of
+        // where they are, so we can be sure to emit subsequent instructions
+        // after them.
+        if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
+          FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt));
+        else
+          FastIS->setLastLocalValue(0);
+      }
+
+      // Do FastISel on as many instructions as possible.
+      for (; BI != Begin; --BI) {
+        const Instruction *Inst = llvm::prior(BI);
+
+        // If we no longer require this instruction, skip it.
+        if (!Inst->mayWriteToMemory() &&
+            !isa<TerminatorInst>(Inst) &&
+            !isa<DbgInfoIntrinsic>(Inst) &&
+            !FuncInfo->isExportedInst(Inst))
+          continue;
+
+        // Bottom-up: reset the insert pos at the top, after any local-value
+        // instructions.
+        FastIS->recomputeInsertPt();
+
+        // Try to select the instruction with FastISel.
+        if (FastIS->SelectInstruction(Inst)) {
+          // If fast isel succeeded, check to see if there is a single-use
+          // non-volatile load right before the selected instruction, and see if
+          // the load is used by the instruction.  If so, try to fold it.
+          const Instruction *BeforeInst = 0;
+          if (Inst != Begin)
+            BeforeInst = llvm::prior(llvm::prior(BI));
+          if (BeforeInst && isa<LoadInst>(BeforeInst) &&
+              BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst &&
+              TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS))
+            --BI; // If we succeeded, don't re-select the load.
+          continue;
+        }
+
+        // Then handle certain instructions as single-LLVM-Instruction blocks.
+        if (isa<CallInst>(Inst)) {
+          ++NumFastIselFailures;
+          if (EnableFastISelVerbose || EnableFastISelAbort) {
+            dbgs() << "FastISel missed call: ";
+            Inst->dump();
+          }
+
+          if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) {
+            unsigned &R = FuncInfo->ValueMap[Inst];
+            if (!R)
+              R = FuncInfo->CreateRegs(Inst->getType());
+          }
+
+          bool HadTailCall = false;
+          SelectBasicBlock(Inst, BI, HadTailCall);
+
+          // If the call was emitted as a tail call, we're done with the block.
+          if (HadTailCall) {
+            --BI;
+            break;
+          }
+
+          continue;
+        }
+
+        // Otherwise, give up on FastISel for the rest of the block.
+        // For now, be a little lenient about non-branch terminators.
+        if (!isa<TerminatorInst>(Inst) || isa<BranchInst>(Inst)) {
+          ++NumFastIselFailures;
+          if (EnableFastISelVerbose || EnableFastISelAbort) {
+            dbgs() << "FastISel miss: ";
+            Inst->dump();
+          }
+          if (EnableFastISelAbort)
+            // The "fast" selector couldn't handle something and bailed.
+            // For the purpose of debugging, just abort.
+            llvm_unreachable("FastISel didn't select the entire block");
+        }
+        break;
+      }
+
+      FastIS->recomputeInsertPt();
+    }
+
+    if (Begin != BI)
+      ++NumDAGBlocks;
+    else
+      ++NumFastIselBlocks;
+
+    // Run SelectionDAG instruction selection on the remainder of the block
+    // not handled by FastISel. If FastISel is not run, this is the entire
+    // block.
+    bool HadTailCall;
+    SelectBasicBlock(Begin, BI, HadTailCall);
+
+    FinishBasicBlock();
+    FuncInfo->PHINodesToUpdate.clear();
+  }
+
+  delete FastIS;
+#ifndef NDEBUG
+  for (MachineFunction::const_iterator MBI = MF->begin(), MBE = MF->end();
+       MBI != MBE; ++MBI)
+    CheckLineNumbers(MBI);
+#endif
+}
+
+void
+SelectionDAGISel::FinishBasicBlock() {
+
+  DEBUG(dbgs() << "Total amount of phi nodes to update: "
+               << FuncInfo->PHINodesToUpdate.size() << "\n";
+        for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i)
+          dbgs() << "Node " << i << " : ("
+                 << FuncInfo->PHINodesToUpdate[i].first
+                 << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
+
+  // Next, now that we know what the last MBB the LLVM BB expanded is, update
+  // PHI nodes in successors.
+  if (SDB->SwitchCases.empty() &&
+      SDB->JTCases.empty() &&
+      SDB->BitTestCases.empty()) {
+    for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
+      assert(PHI->isPHI() &&
+             "This is not a machine PHI node that we are updating!");
+      if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
+        continue;
+      PHI->addOperand(
+        MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
+      PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
+    }
+    return;
+  }
+
+  for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) {
+    // Lower header first, if it wasn't already lowered
+    if (!SDB->BitTestCases[i].Emitted) {
+      // Set the current basic block to the mbb we wish to insert the code into
+      FuncInfo->MBB = SDB->BitTestCases[i].Parent;
+      FuncInfo->InsertPt = FuncInfo->MBB->end();
+      // Emit the code
+      SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB);
+      CurDAG->setRoot(SDB->getRoot());
+      SDB->clear();
+      CodeGenAndEmitDAG();
+    }
+
+    for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
+      // Set the current basic block to the mbb we wish to insert the code into
+      FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB;
+      FuncInfo->InsertPt = FuncInfo->MBB->end();
+      // Emit the code
+      if (j+1 != ej)
+        SDB->visitBitTestCase(SDB->BitTestCases[i],
+                              SDB->BitTestCases[i].Cases[j+1].ThisBB,
+                              SDB->BitTestCases[i].Reg,
+                              SDB->BitTestCases[i].Cases[j],
+                              FuncInfo->MBB);
+      else
+        SDB->visitBitTestCase(SDB->BitTestCases[i],
+                              SDB->BitTestCases[i].Default,
+                              SDB->BitTestCases[i].Reg,
+                              SDB->BitTestCases[i].Cases[j],
+                              FuncInfo->MBB);
+
+
+      CurDAG->setRoot(SDB->getRoot());
+      SDB->clear();
+      CodeGenAndEmitDAG();
+    }
+
+    // Update PHI Nodes
+    for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+         pi != pe; ++pi) {
+      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first;
+      MachineBasicBlock *PHIBB = PHI->getParent();
+      assert(PHI->isPHI() &&
+             "This is not a machine PHI node that we are updating!");
+      // This is "default" BB. We have two jumps to it. From "header" BB and
+      // from last "case" BB.
+      if (PHIBB == SDB->BitTestCases[i].Default) {
+        PHI->addOperand(MachineOperand::
+                        CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                  false));
+        PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent));
+        PHI->addOperand(MachineOperand::
+                        CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                  false));
+        PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases.
+                                                  back().ThisBB));
+      }
+      // One of "cases" BB.
+      for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size();
+           j != ej; ++j) {
+        MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB;
+        if (cBB->isSuccessor(PHIBB)) {
+          PHI->addOperand(MachineOperand::
+                          CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                    false));
+          PHI->addOperand(MachineOperand::CreateMBB(cBB));
+        }
+      }
+    }
+  }
+  SDB->BitTestCases.clear();
+
+  // If the JumpTable record is filled in, then we need to emit a jump table.
+  // Updating the PHI nodes is tricky in this case, since we need to determine
+  // whether the PHI is a successor of the range check MBB or the jump table MBB
+  for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) {
+    // Lower header first, if it wasn't already lowered
+    if (!SDB->JTCases[i].first.Emitted) {
+      // Set the current basic block to the mbb we wish to insert the code into
+      FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB;
+      FuncInfo->InsertPt = FuncInfo->MBB->end();
+      // Emit the code
+      SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first,
+                                FuncInfo->MBB);
+      CurDAG->setRoot(SDB->getRoot());
+      SDB->clear();
+      CodeGenAndEmitDAG();
+    }
+
+    // Set the current basic block to the mbb we wish to insert the code into
+    FuncInfo->MBB = SDB->JTCases[i].second.MBB;
+    FuncInfo->InsertPt = FuncInfo->MBB->end();
+    // Emit the code
+    SDB->visitJumpTable(SDB->JTCases[i].second);
+    CurDAG->setRoot(SDB->getRoot());
+    SDB->clear();
+    CodeGenAndEmitDAG();
+
+    // Update PHI Nodes
+    for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
+         pi != pe; ++pi) {
+      MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first;
+      MachineBasicBlock *PHIBB = PHI->getParent();
+      assert(PHI->isPHI() &&
+             "This is not a machine PHI node that we are updating!");
+      // "default" BB. We can go there only from header BB.
+      if (PHIBB == SDB->JTCases[i].second.Default) {
+        PHI->addOperand
+          (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                     false));
+        PHI->addOperand
+          (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB));
+      }
+      // JT BB. Just iterate over successors here
+      if (FuncInfo->MBB->isSuccessor(PHIBB)) {
+        PHI->addOperand
+          (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
+                                     false));
+        PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
+      }
+    }
+  }
+  SDB->JTCases.clear();
+
+  // If the switch block involved a branch to one of the actual successors, we
+  // need to update PHI nodes in that block.
+  for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) {
+    MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
+    assert(PHI->isPHI() &&
+           "This is not a machine PHI node that we are updating!");
+    if (FuncInfo->MBB->isSuccessor(PHI->getParent())) {
+      PHI->addOperand(
+        MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
+      PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
+    }
+  }
+
+  // If we generated any switch lowering information, build and codegen any
+  // additional DAGs necessary.
+  for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
+    // Set the current basic block to the mbb we wish to insert the code into
+    FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+    FuncInfo->InsertPt = FuncInfo->MBB->end();
+
+    // Determine the unique successors.
+    SmallVector<MachineBasicBlock *, 2> Succs;
+    Succs.push_back(SDB->SwitchCases[i].TrueBB);
+    if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
+      Succs.push_back(SDB->SwitchCases[i].FalseBB);
+
+    // Emit the code. Note that this could result in FuncInfo->MBB being split.
+    SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
+    CurDAG->setRoot(SDB->getRoot());
+    SDB->clear();
+    CodeGenAndEmitDAG();
+
+    // Remember the last block, now that any splitting is done, for use in
+    // populating PHI nodes in successors.
+    MachineBasicBlock *ThisBB = FuncInfo->MBB;
+
+    // Handle any PHI nodes in successors of this chunk, as if we were coming
+    // from the original BB before switch expansion.  Note that PHI nodes can
+    // occur multiple times in PHINodesToUpdate.  We have to be very careful to
+    // handle them the right number of times.
+    for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+      FuncInfo->MBB = Succs[i];
+      FuncInfo->InsertPt = FuncInfo->MBB->end();
+      // FuncInfo->MBB may have been removed from the CFG if a branch was
+      // constant folded.
+      if (ThisBB->isSuccessor(FuncInfo->MBB)) {
+        for (MachineBasicBlock::iterator Phi = FuncInfo->MBB->begin();
+             Phi != FuncInfo->MBB->end() && Phi->isPHI();
+             ++Phi) {
+          // This value for this PHI node is recorded in PHINodesToUpdate.
+          for (unsigned pn = 0; ; ++pn) {
+            assert(pn != FuncInfo->PHINodesToUpdate.size() &&
+                   "Didn't find PHI entry!");
+            if (FuncInfo->PHINodesToUpdate[pn].first == Phi) {
+              Phi->addOperand(MachineOperand::
+                              CreateReg(FuncInfo->PHINodesToUpdate[pn].second,
+                                        false));
+              Phi->addOperand(MachineOperand::CreateMBB(ThisBB));
+              break;
+            }
+          }
+        }
+      }
+    }
+  }
+  SDB->SwitchCases.clear();
+}
+
+
+/// Create the scheduler. If a specific scheduler was specified
+/// via the SchedulerRegistry, use it, otherwise select the
+/// one preferred by the target.
+///
+ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
+  RegisterScheduler::FunctionPassCtor Ctor = RegisterScheduler::getDefault();
+
+  if (!Ctor) {
+    Ctor = ISHeuristic;
+    RegisterScheduler::setDefault(Ctor);
+  }
+
+  return Ctor(this, OptLevel);
+}
+
+//===----------------------------------------------------------------------===//
+// Helper functions used by the generated instruction selector.
+//===----------------------------------------------------------------------===//
+// Calls to these methods are generated by tblgen.
+
+/// CheckAndMask - The isel is trying to match something like (and X, 255).  If
+/// the dag combiner simplified the 255, we still want to match.  RHS is the
+/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS,
+                                    int64_t DesiredMaskS) const {
+  const APInt &ActualMask = RHS->getAPIntValue();
+  const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+  // If the actual mask exactly matches, success!
+  if (ActualMask == DesiredMask)
+    return true;
+
+  // If the actual AND mask is allowing unallowed bits, this doesn't match.
+  if (ActualMask.intersects(~DesiredMask))
+    return false;
+
+  // Otherwise, the DAG Combiner may have proven that the value coming in is
+  // either already zero or is not demanded.  Check for known zero input bits.
+  APInt NeededMask = DesiredMask & ~ActualMask;
+  if (CurDAG->MaskedValueIsZero(LHS, NeededMask))
+    return true;
+
+  // TODO: check to see if missing bits are just not demanded.
+
+  // Otherwise, this pattern doesn't match.
+  return false;
+}
+
+/// CheckOrMask - The isel is trying to match something like (or X, 255).  If
+/// the dag combiner simplified the 255, we still want to match.  RHS is the
+/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value
+/// specified in the .td file (e.g. 255).
+bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS,
+                                   int64_t DesiredMaskS) const {
+  const APInt &ActualMask = RHS->getAPIntValue();
+  const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS);
+
+  // If the actual mask exactly matches, success!
+  if (ActualMask == DesiredMask)
+    return true;
+
+  // If the actual AND mask is allowing unallowed bits, this doesn't match.
+  if (ActualMask.intersects(~DesiredMask))
+    return false;
+
+  // Otherwise, the DAG Combiner may have proven that the value coming in is
+  // either already zero or is not demanded.  Check for known zero input bits.
+  APInt NeededMask = DesiredMask & ~ActualMask;
+
+  APInt KnownZero, KnownOne;
+  CurDAG->ComputeMaskedBits(LHS, NeededMask, KnownZero, KnownOne);
+
+  // If all the missing bits in the or are already known to be set, match!
+  if ((NeededMask & KnownOne) == NeededMask)
+    return true;
+
+  // TODO: check to see if missing bits are just not demanded.
+
+  // Otherwise, this pattern doesn't match.
+  return false;
+}
+
+
+/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated
+/// by tblgen.  Others should not call it.
+void SelectionDAGISel::
+SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
+  std::vector<SDValue> InOps;
+  std::swap(InOps, Ops);
+
+  Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
+  Ops.push_back(InOps[InlineAsm::Op_AsmString]);  // 1
+  Ops.push_back(InOps[InlineAsm::Op_MDNode]);     // 2, !srcloc
+  Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]);  // 3 (SideEffect, AlignStack)
+
+  unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
+  if (InOps[e-1].getValueType() == MVT::Glue)
+    --e;  // Don't process a glue operand if it is here.
+
+  while (i != e) {
+    unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
+    if (!InlineAsm::isMemKind(Flags)) {
+      // Just skip over this operand, copying the operands verbatim.
+      Ops.insert(Ops.end(), InOps.begin()+i,
+                 InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1);
+      i += InlineAsm::getNumOperandRegisters(Flags) + 1;
+    } else {
+      assert(InlineAsm::getNumOperandRegisters(Flags) == 1 &&
+             "Memory operand with multiple values?");
+      // Otherwise, this is a memory operand.  Ask the target to select it.
+      std::vector<SDValue> SelOps;
+      if (SelectInlineAsmMemoryOperand(InOps[i+1], 'm', SelOps))
+        report_fatal_error("Could not match memory address.  Inline asm"
+                           " failure!");
+
+      // Add this to the output node.
+      unsigned NewFlags =
+        InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size());
+      Ops.push_back(CurDAG->getTargetConstant(NewFlags, MVT::i32));
+      Ops.insert(Ops.end(), SelOps.begin(), SelOps.end());
+      i += 2;
+    }
+  }
+
+  // Add the glue input back if present.
+  if (e != InOps.size())
+    Ops.push_back(InOps.back());
+}
+
+/// findGlueUse - Return use of MVT::Glue value produced by the specified
+/// SDNode.
+///
+static SDNode *findGlueUse(SDNode *N) {
+  unsigned FlagResNo = N->getNumValues()-1;
+  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
+    SDUse &Use = I.getUse();
+    if (Use.getResNo() == FlagResNo)
+      return Use.getUser();
+  }
+  return NULL;
+}
+
+/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
+/// This function recursively traverses up the operand chain, ignoring
+/// certain nodes.
+static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
+                          SDNode *Root, SmallPtrSet<SDNode*, 16> &Visited,
+                          bool IgnoreChains) {
+  // The NodeID's are given uniques ID's where a node ID is guaranteed to be
+  // greater than all of its (recursive) operands.  If we scan to a point where
+  // 'use' is smaller than the node we're scanning for, then we know we will
+  // never find it.
+  //
+  // The Use may be -1 (unassigned) if it is a newly allocated node.  This can
+  // happen because we scan down to newly selected nodes in the case of glue
+  // uses.
+  if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
+    return false;
+
+  // Don't revisit nodes if we already scanned it and didn't fail, we know we
+  // won't fail if we scan it again.
+  if (!Visited.insert(Use))
+    return false;
+
+  for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+    // Ignore chain uses, they are validated by HandleMergeInputChains.
+    if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)
+      continue;
+
+    SDNode *N = Use->getOperand(i).getNode();
+    if (N == Def) {
+      if (Use == ImmedUse || Use == Root)
+        continue;  // We are not looking for immediate use.
+      assert(N != Root);
+      return true;
+    }
+
+    // Traverse up the operand chain.
+    if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains))
+      return true;
+  }
+  return false;
+}
+
+/// IsProfitableToFold - Returns true if it's profitable to fold the specific
+/// operand node N of U during instruction selection that starts at Root.
+bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
+                                          SDNode *Root) const {
+  if (OptLevel == CodeGenOpt::None) return false;
+  return N.hasOneUse();
+}
+
+/// IsLegalToFold - Returns true if the specific operand node N of
+/// U can be folded during instruction selection that starts at Root.
+bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
+                                     CodeGenOpt::Level OptLevel,
+                                     bool IgnoreChains) {
+  if (OptLevel == CodeGenOpt::None) return false;
+
+  // If Root use can somehow reach N through a path that that doesn't contain
+  // U then folding N would create a cycle. e.g. In the following
+  // diagram, Root can reach N through X. If N is folded into into Root, then
+  // X is both a predecessor and a successor of U.
+  //
+  //          [N*]           //
+  //         ^   ^           //
+  //        /     \          //
+  //      [U*]    [X]?       //
+  //        ^     ^          //
+  //         \   /           //
+  //          \ /            //
+  //         [Root*]         //
+  //
+  // * indicates nodes to be folded together.
+  //
+  // If Root produces glue, then it gets (even more) interesting. Since it
+  // will be "glued" together with its glue use in the scheduler, we need to
+  // check if it might reach N.
+  //
+  //          [N*]           //
+  //         ^   ^           //
+  //        /     \          //
+  //      [U*]    [X]?       //
+  //        ^       ^        //
+  //         \       \       //
+  //          \      |       //
+  //         [Root*] |       //
+  //          ^      |       //
+  //          f      |       //
+  //          |      /       //
+  //         [Y]    /        //
+  //           ^   /         //
+  //           f  /          //
+  //           | /           //
+  //          [GU]           //
+  //
+  // If GU (glue use) indirectly reaches N (the load), and Root folds N
+  // (call it Fold), then X is a predecessor of GU and a successor of
+  // Fold. But since Fold and GU are glued together, this will create
+  // a cycle in the scheduling graph.
+
+  // If the node has glue, walk down the graph to the "lowest" node in the
+  // glueged set.
+  EVT VT = Root->getValueType(Root->getNumValues()-1);
+  while (VT == MVT::Glue) {
+    SDNode *GU = findGlueUse(Root);
+    if (GU == NULL)
+      break;
+    Root = GU;
+    VT = Root->getValueType(Root->getNumValues()-1);
+
+    // If our query node has a glue result with a use, we've walked up it.  If
+    // the user (which has already been selected) has a chain or indirectly uses
+    // the chain, our WalkChainUsers predicate will not consider it.  Because of
+    // this, we cannot ignore chains in this predicate.
+    IgnoreChains = false;
+  }
+
+
+  SmallPtrSet<SDNode*, 16> Visited;
+  return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
+}
+
+SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+  std::vector<SDValue> Ops(N->op_begin(), N->op_end());
+  SelectInlineAsmMemoryOperands(Ops);
+
+  std::vector<EVT> VTs;
+  VTs.push_back(MVT::Other);
+  VTs.push_back(MVT::Glue);
+  SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
+                                VTs, &Ops[0], Ops.size());
+  New->setNodeId(-1);
+  return New.getNode();
+}
+
+SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
+  return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0));
+}
+
+/// GetVBR - decode a vbr encoding whose top bit is set.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
+GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
+  assert(Val >= 128 && "Not a VBR");
+  Val &= 127;  // Remove first vbr bit.
+
+  unsigned Shift = 7;
+  uint64_t NextBits;
+  do {
+    NextBits = MatcherTable[Idx++];
+    Val |= (NextBits&127) << Shift;
+    Shift += 7;
+  } while (NextBits & 128);
+
+  return Val;
+}
+
+
+/// UpdateChainsAndGlue - When a match is complete, this method updates uses of
+/// interior glue and chain results to use the new glue and chain results.
+void SelectionDAGISel::
+UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
+                    const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+                    SDValue InputGlue,
+                    const SmallVectorImpl<SDNode*> &GlueResultNodesMatched,
+                    bool isMorphNodeTo) {
+  SmallVector<SDNode*, 4> NowDeadNodes;
+
+  ISelUpdater ISU(ISelPosition);
+
+  // Now that all the normal results are replaced, we replace the chain and
+  // glue results if present.
+  if (!ChainNodesMatched.empty()) {
+    assert(InputChain.getNode() != 0 &&
+           "Matched input chains but didn't produce a chain");
+    // Loop over all of the nodes we matched that produced a chain result.
+    // Replace all the chain results with the final chain we ended up with.
+    for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+      SDNode *ChainNode = ChainNodesMatched[i];
+
+      // If this node was already deleted, don't look at it.
+      if (ChainNode->getOpcode() == ISD::DELETED_NODE)
+        continue;
+
+      // Don't replace the results of the root node if we're doing a
+      // MorphNodeTo.
+      if (ChainNode == NodeToMatch && isMorphNodeTo)
+        continue;
+
+      SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
+      if (ChainVal.getValueType() == MVT::Glue)
+        ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
+      assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+      CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
+
+      // If the node became dead and we haven't already seen it, delete it.
+      if (ChainNode->use_empty() &&
+          !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
+        NowDeadNodes.push_back(ChainNode);
+    }
+  }
+
+  // If the result produces glue, update any glue results in the matched
+  // pattern with the glue result.
+  if (InputGlue.getNode() != 0) {
+    // Handle any interior nodes explicitly marked.
+    for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) {
+      SDNode *FRN = GlueResultNodesMatched[i];
+
+      // If this node was already deleted, don't look at it.
+      if (FRN->getOpcode() == ISD::DELETED_NODE)
+        continue;
+
+      assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue &&
+             "Doesn't have a glue result");
+      CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
+                                        InputGlue, &ISU);
+
+      // If the node became dead and we haven't already seen it, delete it.
+      if (FRN->use_empty() &&
+          !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN))
+        NowDeadNodes.push_back(FRN);
+    }
+  }
+
+  if (!NowDeadNodes.empty())
+    CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU);
+
+  DEBUG(errs() << "ISEL: Match complete!\n");
+}
+
+enum ChainResult {
+  CR_Simple,
+  CR_InducesCycle,
+  CR_LeadsToInteriorNode
+};
+
+/// WalkChainUsers - Walk down the users of the specified chained node that is
+/// part of the pattern we're matching, looking at all of the users we find.
+/// This determines whether something is an interior node, whether we have a
+/// non-pattern node in between two pattern nodes (which prevent folding because
+/// it would induce a cycle) and whether we have a TokenFactor node sandwiched
+/// between pattern nodes (in which case the TF becomes part of the pattern).
+///
+/// The walk we do here is guaranteed to be small because we quickly get down to
+/// already selected nodes "below" us.
+static ChainResult
+WalkChainUsers(SDNode *ChainedNode,
+               SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
+               SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
+  ChainResult Result = CR_Simple;
+
+  for (SDNode::use_iterator UI = ChainedNode->use_begin(),
+         E = ChainedNode->use_end(); UI != E; ++UI) {
+    // Make sure the use is of the chain, not some other value we produce.
+    if (UI.getUse().getValueType() != MVT::Other) continue;
+
+    SDNode *User = *UI;
+
+    // If we see an already-selected machine node, then we've gone beyond the
+    // pattern that we're selecting down into the already selected chunk of the
+    // DAG.
+    if (User->isMachineOpcode() ||
+        User->getOpcode() == ISD::HANDLENODE)  // Root of the graph.
+      continue;
+
+    if (User->getOpcode() == ISD::CopyToReg ||
+        User->getOpcode() == ISD::CopyFromReg ||
+        User->getOpcode() == ISD::INLINEASM ||
+        User->getOpcode() == ISD::EH_LABEL) {
+      // If their node ID got reset to -1 then they've already been selected.
+      // Treat them like a MachineOpcode.
+      if (User->getNodeId() == -1)
+        continue;
+    }
+
+    // If we have a TokenFactor, we handle it specially.
+    if (User->getOpcode() != ISD::TokenFactor) {
+      // If the node isn't a token factor and isn't part of our pattern, then it
+      // must be a random chained node in between two nodes we're selecting.
+      // This happens when we have something like:
+      //   x = load ptr
+      //   call
+      //   y = x+4
+      //   store y -> ptr
+      // Because we structurally match the load/store as a read/modify/write,
+      // but the call is chained between them.  We cannot fold in this case
+      // because it would induce a cycle in the graph.
+      if (!std::count(ChainedNodesInPattern.begin(),
+                      ChainedNodesInPattern.end(), User))
+        return CR_InducesCycle;
+
+      // Otherwise we found a node that is part of our pattern.  For example in:
+      //   x = load ptr
+      //   y = x+4
+      //   store y -> ptr
+      // This would happen when we're scanning down from the load and see the
+      // store as a user.  Record that there is a use of ChainedNode that is
+      // part of the pattern and keep scanning uses.
+      Result = CR_LeadsToInteriorNode;
+      InteriorChainedNodes.push_back(User);
+      continue;
+    }
+
+    // If we found a TokenFactor, there are two cases to consider: first if the
+    // TokenFactor is just hanging "below" the pattern we're matching (i.e. no
+    // uses of the TF are in our pattern) we just want to ignore it.  Second,
+    // the TokenFactor can be sandwiched in between two chained nodes, like so:
+    //     [Load chain]
+    //         ^
+    //         |
+    //       [Load]
+    //       ^    ^
+    //       |    \                    DAG's like cheese
+    //      /       \                       do you?
+    //     /         |
+    // [TokenFactor] [Op]
+    //     ^          ^
+    //     |          |
+    //      \        /
+    //       \      /
+    //       [Store]
+    //
+    // In this case, the TokenFactor becomes part of our match and we rewrite it
+    // as a new TokenFactor.
+    //
+    // To distinguish these two cases, do a recursive walk down the uses.
+    switch (WalkChainUsers(User, ChainedNodesInPattern, InteriorChainedNodes)) {
+    case CR_Simple:
+      // If the uses of the TokenFactor are just already-selected nodes, ignore
+      // it, it is "below" our pattern.
+      continue;
+    case CR_InducesCycle:
+      // If the uses of the TokenFactor lead to nodes that are not part of our
+      // pattern that are not selected, folding would turn this into a cycle,
+      // bail out now.
+      return CR_InducesCycle;
+    case CR_LeadsToInteriorNode:
+      break;  // Otherwise, keep processing.
+    }
+
+    // Okay, we know we're in the interesting interior case.  The TokenFactor
+    // is now going to be considered part of the pattern so that we rewrite its
+    // uses (it may have uses that are not part of the pattern) with the
+    // ultimate chain result of the generated code.  We will also add its chain
+    // inputs as inputs to the ultimate TokenFactor we create.
+    Result = CR_LeadsToInteriorNode;
+    ChainedNodesInPattern.push_back(User);
+    InteriorChainedNodes.push_back(User);
+    continue;
+  }
+
+  return Result;
+}
+
+/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
+/// operation for when the pattern matched at least one node with a chains.  The
+/// input vector contains a list of all of the chained nodes that we match.  We
+/// must determine if this is a valid thing to cover (i.e. matching it won't
+/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will
+/// be used as the input node chain for the generated nodes.
+static SDValue
+HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
+                       SelectionDAG *CurDAG) {
+  // Walk all of the chained nodes we've matched, recursively scanning down the
+  // users of the chain result. This adds any TokenFactor nodes that are caught
+  // in between chained nodes to the chained and interior nodes list.
+  SmallVector<SDNode*, 3> InteriorChainedNodes;
+  for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+    if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched,
+                       InteriorChainedNodes) == CR_InducesCycle)
+      return SDValue(); // Would induce a cycle.
+  }
+
+  // Okay, we have walked all the matched nodes and collected TokenFactor nodes
+  // that we are interested in.  Form our input TokenFactor node.
+  SmallVector<SDValue, 3> InputChains;
+  for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+    // Add the input chain of this node to the InputChains list (which will be
+    // the operands of the generated TokenFactor) if it's not an interior node.
+    SDNode *N = ChainNodesMatched[i];
+    if (N->getOpcode() != ISD::TokenFactor) {
+      if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
+        continue;
+
+      // Otherwise, add the input chain.
+      SDValue InChain = ChainNodesMatched[i]->getOperand(0);
+      assert(InChain.getValueType() == MVT::Other && "Not a chain");
+      InputChains.push_back(InChain);
+      continue;
+    }
+
+    // If we have a token factor, we want to add all inputs of the token factor
+    // that are not part of the pattern we're matching.
+    for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+      if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(),
+                      N->getOperand(op).getNode()))
+        InputChains.push_back(N->getOperand(op));
+    }
+  }
+
+  SDValue Res;
+  if (InputChains.size() == 1)
+    return InputChains[0];
+  return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(),
+                         MVT::Other, &InputChains[0], InputChains.size());
+}
+
+/// MorphNode - Handle morphing a node in place for the selector.
+SDNode *SelectionDAGISel::
+MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
+          const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {
+  // It is possible we're using MorphNodeTo to replace a node with no
+  // normal results with one that has a normal result (or we could be
+  // adding a chain) and the input could have glue and chains as well.
+  // In this case we need to shift the operands down.
+  // FIXME: This is a horrible hack and broken in obscure cases, no worse
+  // than the old isel though.
+  int OldGlueResultNo = -1, OldChainResultNo = -1;
+
+  unsigned NTMNumResults = Node->getNumValues();
+  if (Node->getValueType(NTMNumResults-1) == MVT::Glue) {
+    OldGlueResultNo = NTMNumResults-1;
+    if (NTMNumResults != 1 &&
+        Node->getValueType(NTMNumResults-2) == MVT::Other)
+      OldChainResultNo = NTMNumResults-2;
+  } else if (Node->getValueType(NTMNumResults-1) == MVT::Other)
+    OldChainResultNo = NTMNumResults-1;
+
+  // Call the underlying SelectionDAG routine to do the transmogrification. Note
+  // that this deletes operands of the old node that become dead.
+  SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops, NumOps);
+
+  // MorphNodeTo can operate in two ways: if an existing node with the
+  // specified operands exists, it can just return it.  Otherwise, it
+  // updates the node in place to have the requested operands.
+  if (Res == Node) {
+    // If we updated the node in place, reset the node ID.  To the isel,
+    // this should be just like a newly allocated machine node.
+    Res->setNodeId(-1);
+  }
+
+  unsigned ResNumResults = Res->getNumValues();
+  // Move the glue if needed.
+  if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
+      (unsigned)OldGlueResultNo != ResNumResults-1)
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo),
+                                      SDValue(Res, ResNumResults-1));
+
+  if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
+    --ResNumResults;
+
+  // Move the chain reference if needed.
+  if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
+      (unsigned)OldChainResultNo != ResNumResults-1)
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
+                                      SDValue(Res, ResNumResults-1));
+
+  // Otherwise, no replacement happened because the node already exists. Replace
+  // Uses of the old node with the new one.
+  if (Res != Node)
+    CurDAG->ReplaceAllUsesWith(Node, Res);
+
+  return Res;
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+          SDValue N,
+          const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+  // Accept if it is exactly the same as a previously recorded node.
+  unsigned RecNo = MatcherTable[MatcherIndex++];
+  assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+  return N == RecordedNodes[RecNo].first;
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+                      SelectionDAGISel &SDISel) {
+  return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
+}
+
+/// CheckNodePredicate - Implements OP_CheckNodePredicate.
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+                   SelectionDAGISel &SDISel, SDNode *N) {
+  return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+            SDNode *N) {
+  uint16_t Opc = MatcherTable[MatcherIndex++];
+  Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+  return N->getOpcode() == Opc;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+          SDValue N, const TargetLowering &TLI) {
+  MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+  if (N.getValueType() == VT) return true;
+
+  // Handle the case when VT is iPTR.
+  return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy();
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+               SDValue N, const TargetLowering &TLI,
+               unsigned ChildNo) {
+  if (ChildNo >= N.getNumOperands())
+    return false;  // Match fails if out of range child #.
+  return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI);
+}
+
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+              SDValue N) {
+  return cast<CondCodeSDNode>(N)->get() ==
+      (ISD::CondCode)MatcherTable[MatcherIndex++];
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+               SDValue N, const TargetLowering &TLI) {
+  MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+  if (cast<VTSDNode>(N)->getVT() == VT)
+    return true;
+
+  // Handle the case when VT is iPTR.
+  return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy();
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+             SDValue N) {
+  int64_t Val = MatcherTable[MatcherIndex++];
+  if (Val & 128)
+    Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+  return C != 0 && C->getSExtValue() == Val;
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+            SDValue N, SelectionDAGISel &SDISel) {
+  int64_t Val = MatcherTable[MatcherIndex++];
+  if (Val & 128)
+    Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+  if (N->getOpcode() != ISD::AND) return false;
+
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
+CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+           SDValue N, SelectionDAGISel &SDISel) {
+  int64_t Val = MatcherTable[MatcherIndex++];
+  if (Val & 128)
+    Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+  if (N->getOpcode() != ISD::OR) return false;
+
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);
+}
+
+/// IsPredicateKnownToFail - If we know how and can do so without pushing a
+/// scope, evaluate the current node.  If the current predicate is known to
+/// fail, set Result=true and return anything.  If the current predicate is
+/// known to pass, set Result=false and return the MatcherIndex to continue
+/// with.  If the current predicate is unknown, set Result=false and return the
+/// MatcherIndex to continue with.
+static unsigned IsPredicateKnownToFail(const unsigned char *Table,
+                                       unsigned Index, SDValue N,
+                                       bool &Result, SelectionDAGISel &SDISel,
+                 SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
+  switch (Table[Index++]) {
+  default:
+    Result = false;
+    return Index-1;  // Could not evaluate this predicate.
+  case SelectionDAGISel::OPC_CheckSame:
+    Result = !::CheckSame(Table, Index, N, RecordedNodes);
+    return Index;
+  case SelectionDAGISel::OPC_CheckPatternPredicate:
+    Result = !::CheckPatternPredicate(Table, Index, SDISel);
+    return Index;
+  case SelectionDAGISel::OPC_CheckPredicate:
+    Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode());
+    return Index;
+  case SelectionDAGISel::OPC_CheckOpcode:
+    Result = !::CheckOpcode(Table, Index, N.getNode());
+    return Index;
+  case SelectionDAGISel::OPC_CheckType:
+    Result = !::CheckType(Table, Index, N, SDISel.TLI);
+    return Index;
+  case SelectionDAGISel::OPC_CheckChild0Type:
+  case SelectionDAGISel::OPC_CheckChild1Type:
+  case SelectionDAGISel::OPC_CheckChild2Type:
+  case SelectionDAGISel::OPC_CheckChild3Type:
+  case SelectionDAGISel::OPC_CheckChild4Type:
+  case SelectionDAGISel::OPC_CheckChild5Type:
+  case SelectionDAGISel::OPC_CheckChild6Type:
+  case SelectionDAGISel::OPC_CheckChild7Type:
+    Result = !::CheckChildType(Table, Index, N, SDISel.TLI,
+                        Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type);
+    return Index;
+  case SelectionDAGISel::OPC_CheckCondCode:
+    Result = !::CheckCondCode(Table, Index, N);
+    return Index;
+  case SelectionDAGISel::OPC_CheckValueType:
+    Result = !::CheckValueType(Table, Index, N, SDISel.TLI);
+    return Index;
+  case SelectionDAGISel::OPC_CheckInteger:
+    Result = !::CheckInteger(Table, Index, N);
+    return Index;
+  case SelectionDAGISel::OPC_CheckAndImm:
+    Result = !::CheckAndImm(Table, Index, N, SDISel);
+    return Index;
+  case SelectionDAGISel::OPC_CheckOrImm:
+    Result = !::CheckOrImm(Table, Index, N, SDISel);
+    return Index;
+  }
+}
+
+namespace {
+
+struct MatchScope {
+  /// FailIndex - If this match fails, this is the index to continue with.
+  unsigned FailIndex;
+
+  /// NodeStack - The node stack when the scope was formed.
+  SmallVector<SDValue, 4> NodeStack;
+
+  /// NumRecordedNodes - The number of recorded nodes when the scope was formed.
+  unsigned NumRecordedNodes;
+
+  /// NumMatchedMemRefs - The number of matched memref entries.
+  unsigned NumMatchedMemRefs;
+
+  /// InputChain/InputGlue - The current chain/glue
+  SDValue InputChain, InputGlue;
+
+  /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
+  bool HasChainNodesMatched, HasGlueResultNodesMatched;
+};
+
+}
+
+SDNode *SelectionDAGISel::
+SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
+                 unsigned TableSize) {
+  // FIXME: Should these even be selected?  Handle these cases in the caller?
+  switch (NodeToMatch->getOpcode()) {
+  default:
+    break;
+  case ISD::EntryToken:       // These nodes remain the same.
+  case ISD::BasicBlock:
+  case ISD::Register:
+  //case ISD::VALUETYPE:
+  //case ISD::CONDCODE:
+  case ISD::HANDLENODE:
+  case ISD::MDNODE_SDNODE:
+  case ISD::TargetConstant:
+  case ISD::TargetConstantFP:
+  case ISD::TargetConstantPool:
+  case ISD::TargetFrameIndex:
+  case ISD::TargetExternalSymbol:
+  case ISD::TargetBlockAddress:
+  case ISD::TargetJumpTable:
+  case ISD::TargetGlobalTLSAddress:
+  case ISD::TargetGlobalAddress:
+  case ISD::TokenFactor:
+  case ISD::CopyFromReg:
+  case ISD::CopyToReg:
+  case ISD::EH_LABEL:
+    NodeToMatch->setNodeId(-1); // Mark selected.
+    return 0;
+  case ISD::AssertSext:
+  case ISD::AssertZext:
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0),
+                                      NodeToMatch->getOperand(0));
+    return 0;
+  case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
+  case ISD::UNDEF:     return Select_UNDEF(NodeToMatch);
+  }
+
+  assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
+
+  // Set up the node stack with NodeToMatch as the only node on the stack.
+  SmallVector<SDValue, 8> NodeStack;
+  SDValue N = SDValue(NodeToMatch, 0);
+  NodeStack.push_back(N);
+
+  // MatchScopes - Scopes used when matching, if a match failure happens, this
+  // indicates where to continue checking.
+  SmallVector<MatchScope, 8> MatchScopes;
+
+  // RecordedNodes - This is the set of nodes that have been recorded by the
+  // state machine.  The second value is the parent of the node, or null if the
+  // root is recorded.
+  SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes;
+
+  // MatchedMemRefs - This is the set of MemRef's we've seen in the input
+  // pattern.
+  SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
+
+  // These are the current input chain and glue for use when generating nodes.
+  // Various Emit operations change these.  For example, emitting a copytoreg
+  // uses and updates these.
+  SDValue InputChain, InputGlue;
+
+  // ChainNodesMatched - If a pattern matches nodes that have input/output
+  // chains, the OPC_EmitMergeInputChains operation is emitted which indicates
+  // which ones they are.  The result is captured into this list so that we can
+  // update the chain results when the pattern is complete.
+  SmallVector<SDNode*, 3> ChainNodesMatched;
+  SmallVector<SDNode*, 3> GlueResultNodesMatched;
+
+  DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
+        NodeToMatch->dump(CurDAG);
+        errs() << '\n');
+
+  // Determine where to start the interpreter.  Normally we start at opcode #0,
+  // but if the state machine starts with an OPC_SwitchOpcode, then we
+  // accelerate the first lookup (which is guaranteed to be hot) with the
+  // OpcodeOffset table.
+  unsigned MatcherIndex = 0;
+
+  if (!OpcodeOffset.empty()) {
+    // Already computed the OpcodeOffset table, just index into it.
+    if (N.getOpcode() < OpcodeOffset.size())
+      MatcherIndex = OpcodeOffset[N.getOpcode()];
+    DEBUG(errs() << "  Initial Opcode index to " << MatcherIndex << "\n");
+
+  } else if (MatcherTable[0] == OPC_SwitchOpcode) {
+    // Otherwise, the table isn't computed, but the state machine does start
+    // with an OPC_SwitchOpcode instruction.  Populate the table now, since this
+    // is the first time we're selecting an instruction.
+    unsigned Idx = 1;
+    while (1) {
+      // Get the size of this case.
+      unsigned CaseSize = MatcherTable[Idx++];
+      if (CaseSize & 128)
+        CaseSize = GetVBR(CaseSize, MatcherTable, Idx);
+      if (CaseSize == 0) break;
+
+      // Get the opcode, add the index to the table.
+      uint16_t Opc = MatcherTable[Idx++];
+      Opc |= (unsigned short)MatcherTable[Idx++] << 8;
+      if (Opc >= OpcodeOffset.size())
+        OpcodeOffset.resize((Opc+1)*2);
+      OpcodeOffset[Opc] = Idx;
+      Idx += CaseSize;
+    }
+
+    // Okay, do the lookup for the first opcode.
+    if (N.getOpcode() < OpcodeOffset.size())
+      MatcherIndex = OpcodeOffset[N.getOpcode()];
+  }
+
+  while (1) {
+    assert(MatcherIndex < TableSize && "Invalid index");
+#ifndef NDEBUG
+    unsigned CurrentOpcodeIndex = MatcherIndex;
+#endif
+    BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++];
+    switch (Opcode) {
+    case OPC_Scope: {
+      // Okay, the semantics of this operation are that we should push a scope
+      // then evaluate the first child.  However, pushing a scope only to have
+      // the first check fail (which then pops it) is inefficient.  If we can
+      // determine immediately that the first check (or first several) will
+      // immediately fail, don't even bother pushing a scope for them.
+      unsigned FailIndex;
+
+      while (1) {
+        unsigned NumToSkip = MatcherTable[MatcherIndex++];
+        if (NumToSkip & 128)
+          NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+        // Found the end of the scope with no match.
+        if (NumToSkip == 0) {
+          FailIndex = 0;
+          break;
+        }
+
+        FailIndex = MatcherIndex+NumToSkip;
+
+        unsigned MatcherIndexOfPredicate = MatcherIndex;
+        (void)MatcherIndexOfPredicate; // silence warning.
+
+        // If we can't evaluate this predicate without pushing a scope (e.g. if
+        // it is a 'MoveParent') or if the predicate succeeds on this node, we
+        // push the scope and evaluate the full predicate chain.
+        bool Result;
+        MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N,
+                                              Result, *this, RecordedNodes);
+        if (!Result)
+          break;
+
+        DEBUG(errs() << "  Skipped scope entry (due to false predicate) at "
+                     << "index " << MatcherIndexOfPredicate
+                     << ", continuing at " << FailIndex << "\n");
+        ++NumDAGIselRetries;
+
+        // Otherwise, we know that this case of the Scope is guaranteed to fail,
+        // move to the next case.
+        MatcherIndex = FailIndex;
+      }
+
+      // If the whole scope failed to match, bail.
+      if (FailIndex == 0) break;
+
+      // Push a MatchScope which indicates where to go if the first child fails
+      // to match.
+      MatchScope NewEntry;
+      NewEntry.FailIndex = FailIndex;
+      NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end());
+      NewEntry.NumRecordedNodes = RecordedNodes.size();
+      NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
+      NewEntry.InputChain = InputChain;
+      NewEntry.InputGlue = InputGlue;
+      NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
+      NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty();
+      MatchScopes.push_back(NewEntry);
+      continue;
+    }
+    case OPC_RecordNode: {
+      // Remember this node, it may end up being an operand in the pattern.
+      SDNode *Parent = 0;
+      if (NodeStack.size() > 1)
+        Parent = NodeStack[NodeStack.size()-2].getNode();
+      RecordedNodes.push_back(std::make_pair(N, Parent));
+      continue;
+    }
+
+    case OPC_RecordChild0: case OPC_RecordChild1:
+    case OPC_RecordChild2: case OPC_RecordChild3:
+    case OPC_RecordChild4: case OPC_RecordChild5:
+    case OPC_RecordChild6: case OPC_RecordChild7: {
+      unsigned ChildNo = Opcode-OPC_RecordChild0;
+      if (ChildNo >= N.getNumOperands())
+        break;  // Match fails if out of range child #.
+
+      RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo),
+                                             N.getNode()));
+      continue;
+    }
+    case OPC_RecordMemRef:
+      MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
+      continue;
+
+    case OPC_CaptureGlueInput:
+      // If the current node has an input glue, capture it in InputGlue.
+      if (N->getNumOperands() != 0 &&
+          N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue)
+        InputGlue = N->getOperand(N->getNumOperands()-1);
+      continue;
+
+    case OPC_MoveChild: {
+      unsigned ChildNo = MatcherTable[MatcherIndex++];
+      if (ChildNo >= N.getNumOperands())
+        break;  // Match fails if out of range child #.
+      N = N.getOperand(ChildNo);
+      NodeStack.push_back(N);
+      continue;
+    }
+
+    case OPC_MoveParent:
+      // Pop the current node off the NodeStack.
+      NodeStack.pop_back();
+      assert(!NodeStack.empty() && "Node stack imbalance!");
+      N = NodeStack.back();
+      continue;
+
+    case OPC_CheckSame:
+      if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
+      continue;
+    case OPC_CheckPatternPredicate:
+      if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
+      continue;
+    case OPC_CheckPredicate:
+      if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this,
+                                N.getNode()))
+        break;
+      continue;
+    case OPC_CheckComplexPat: {
+      unsigned CPNum = MatcherTable[MatcherIndex++];
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
+      if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
+                               RecordedNodes[RecNo].first, CPNum,
+                               RecordedNodes))
+        break;
+      continue;
+    }
+    case OPC_CheckOpcode:
+      if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
+      continue;
+
+    case OPC_CheckType:
+      if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;
+      continue;
+
+    case OPC_SwitchOpcode: {
+      unsigned CurNodeOpcode = N.getOpcode();
+      unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+      unsigned CaseSize;
+      while (1) {
+        // Get the size of this case.
+        CaseSize = MatcherTable[MatcherIndex++];
+        if (CaseSize & 128)
+          CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+        if (CaseSize == 0) break;
+
+        uint16_t Opc = MatcherTable[MatcherIndex++];
+        Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+
+        // If the opcode matches, then we will execute this case.
+        if (CurNodeOpcode == Opc)
+          break;
+
+        // Otherwise, skip over this case.
+        MatcherIndex += CaseSize;
+      }
+
+      // If no cases matched, bail out.
+      if (CaseSize == 0) break;
+
+      // Otherwise, execute the case we found.
+      DEBUG(errs() << "  OpcodeSwitch from " << SwitchStart
+                   << " to " << MatcherIndex << "\n");
+      continue;
+    }
+
+    case OPC_SwitchType: {
+      MVT CurNodeVT = N.getValueType().getSimpleVT();
+      unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+      unsigned CaseSize;
+      while (1) {
+        // Get the size of this case.
+        CaseSize = MatcherTable[MatcherIndex++];
+        if (CaseSize & 128)
+          CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+        if (CaseSize == 0) break;
+
+        MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+        if (CaseVT == MVT::iPTR)
+          CaseVT = TLI.getPointerTy();
+
+        // If the VT matches, then we will execute this case.
+        if (CurNodeVT == CaseVT)
+          break;
+
+        // Otherwise, skip over this case.
+        MatcherIndex += CaseSize;
+      }
+
+      // If no cases matched, bail out.
+      if (CaseSize == 0) break;
+
+      // Otherwise, execute the case we found.
+      DEBUG(errs() << "  TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+                   << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
+      continue;
+    }
+    case OPC_CheckChild0Type: case OPC_CheckChild1Type:
+    case OPC_CheckChild2Type: case OPC_CheckChild3Type:
+    case OPC_CheckChild4Type: case OPC_CheckChild5Type:
+    case OPC_CheckChild6Type: case OPC_CheckChild7Type:
+      if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
+                            Opcode-OPC_CheckChild0Type))
+        break;
+      continue;
+    case OPC_CheckCondCode:
+      if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
+      continue;
+    case OPC_CheckValueType:
+      if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break;
+      continue;
+    case OPC_CheckInteger:
+      if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
+      continue;
+    case OPC_CheckAndImm:
+      if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break;
+      continue;
+    case OPC_CheckOrImm:
+      if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
+      continue;
+
+    case OPC_CheckFoldableChainNode: {
+      assert(NodeStack.size() != 1 && "No parent node");
+      // Verify that all intermediate nodes between the root and this one have
+      // a single use.
+      bool HasMultipleUses = false;
+      for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
+        if (!NodeStack[i].hasOneUse()) {
+          HasMultipleUses = true;
+          break;
+        }
+      if (HasMultipleUses) break;
+
+      // Check to see that the target thinks this is profitable to fold and that
+      // we can fold it without inducing cycles in the graph.
+      if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+                              NodeToMatch) ||
+          !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+                         NodeToMatch, OptLevel,
+                         true/*We validate our own chains*/))
+        break;
+
+      continue;
+    }
+    case OPC_EmitInteger: {
+      MVT::SimpleValueType VT =
+        (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+      int64_t Val = MatcherTable[MatcherIndex++];
+      if (Val & 128)
+        Val = GetVBR(Val, MatcherTable, MatcherIndex);
+      RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+                              CurDAG->getTargetConstant(Val, VT), (SDNode*)0));
+      continue;
+    }
+    case OPC_EmitRegister: {
+      MVT::SimpleValueType VT =
+        (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+      unsigned RegNo = MatcherTable[MatcherIndex++];
+      RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+                              CurDAG->getRegister(RegNo, VT), (SDNode*)0));
+      continue;
+    }
+    case OPC_EmitRegister2: {
+      // For targets w/ more than 256 register names, the register enum
+      // values are stored in two bytes in the matcher table (just like
+      // opcodes).
+      MVT::SimpleValueType VT =
+        (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+      unsigned RegNo = MatcherTable[MatcherIndex++];
+      RegNo |= MatcherTable[MatcherIndex++] << 8;
+      RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+                              CurDAG->getRegister(RegNo, VT), (SDNode*)0));
+      continue;
+    }
+
+    case OPC_EmitConvertToTarget:  {
+      // Convert from IMM/FPIMM to target version.
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+      SDValue Imm = RecordedNodes[RecNo].first;
+
+      if (Imm->getOpcode() == ISD::Constant) {
+        int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue();
+        Imm = CurDAG->getTargetConstant(Val, Imm.getValueType());
+      } else if (Imm->getOpcode() == ISD::ConstantFP) {
+        const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
+        Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());
+      }
+
+      RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
+      continue;
+    }
+
+    case OPC_EmitMergeInputChains1_0:    // OPC_EmitMergeInputChains, 1, 0
+    case OPC_EmitMergeInputChains1_1: {  // OPC_EmitMergeInputChains, 1, 1
+      // These are space-optimized forms of OPC_EmitMergeInputChains.
+      assert(InputChain.getNode() == 0 &&
+             "EmitMergeInputChains should be the first chain producing node");
+      assert(ChainNodesMatched.empty() &&
+             "Should only have one EmitMergeInputChains per match");
+
+      // Read all of the chained nodes.
+      unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+      ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
+      // FIXME: What if other value results of the node have uses not matched
+      // by this pattern?
+      if (ChainNodesMatched.back() != NodeToMatch &&
+          !RecordedNodes[RecNo].first.hasOneUse()) {
+        ChainNodesMatched.clear();
+        break;
+      }
+
+      // Merge the input chains if they are not intra-pattern references.
+      InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+      if (InputChain.getNode() == 0)
+        break;  // Failed to merge.
+      continue;
+    }
+
+    case OPC_EmitMergeInputChains: {
+      assert(InputChain.getNode() == 0 &&
+             "EmitMergeInputChains should be the first chain producing node");
+      // This node gets a list of nodes we matched in the input that have
+      // chains.  We want to token factor all of the input chains to these nodes
+      // together.  However, if any of the input chains is actually one of the
+      // nodes matched in this pattern, then we have an intra-match reference.
+      // Ignore these because the newly token factored chain should not refer to
+      // the old nodes.
+      unsigned NumChains = MatcherTable[MatcherIndex++];
+      assert(NumChains != 0 && "Can't TF zero chains");
+
+      assert(ChainNodesMatched.empty() &&
+             "Should only have one EmitMergeInputChains per match");
+
+      // Read all of the chained nodes.
+      for (unsigned i = 0; i != NumChains; ++i) {
+        unsigned RecNo = MatcherTable[MatcherIndex++];
+        assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+        ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
+        // FIXME: What if other value results of the node have uses not matched
+        // by this pattern?
+        if (ChainNodesMatched.back() != NodeToMatch &&
+            !RecordedNodes[RecNo].first.hasOneUse()) {
+          ChainNodesMatched.clear();
+          break;
+        }
+      }
+
+      // If the inner loop broke out, the match fails.
+      if (ChainNodesMatched.empty())
+        break;
+
+      // Merge the input chains if they are not intra-pattern references.
+      InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+      if (InputChain.getNode() == 0)
+        break;  // Failed to merge.
+
+      continue;
+    }
+
+    case OPC_EmitCopyToReg: {
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+      unsigned DestPhysReg = MatcherTable[MatcherIndex++];
+
+      if (InputChain.getNode() == 0)
+        InputChain = CurDAG->getEntryNode();
+
+      InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(),
+                                        DestPhysReg, RecordedNodes[RecNo].first,
+                                        InputGlue);
+
+      InputGlue = InputChain.getValue(1);
+      continue;
+    }
+
+    case OPC_EmitNodeXForm: {
+      unsigned XFormNo = MatcherTable[MatcherIndex++];
+      unsigned RecNo = MatcherTable[MatcherIndex++];
+      assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+      SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
+      RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0));
+      continue;
+    }
+
+    case OPC_EmitNode:
+    case OPC_MorphNodeTo: {
+      uint16_t TargetOpc = MatcherTable[MatcherIndex++];
+      TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+      unsigned EmitNodeInfo = MatcherTable[MatcherIndex++];
+      // Get the result VT list.
+      unsigned NumVTs = MatcherTable[MatcherIndex++];
+      SmallVector<EVT, 4> VTs;
+      for (unsigned i = 0; i != NumVTs; ++i) {
+        MVT::SimpleValueType VT =
+          (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+        if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy;
+        VTs.push_back(VT);
+      }
+
+      if (EmitNodeInfo & OPFL_Chain)
+        VTs.push_back(MVT::Other);
+      if (EmitNodeInfo & OPFL_GlueOutput)
+        VTs.push_back(MVT::Glue);
+
+      // This is hot code, so optimize the two most common cases of 1 and 2
+      // results.
+      SDVTList VTList;
+      if (VTs.size() == 1)
+        VTList = CurDAG->getVTList(VTs[0]);
+      else if (VTs.size() == 2)
+        VTList = CurDAG->getVTList(VTs[0], VTs[1]);
+      else
+        VTList = CurDAG->getVTList(VTs.data(), VTs.size());
+
+      // Get the operand list.
+      unsigned NumOps = MatcherTable[MatcherIndex++];
+      SmallVector<SDValue, 8> Ops;
+      for (unsigned i = 0; i != NumOps; ++i) {
+        unsigned RecNo = MatcherTable[MatcherIndex++];
+        if (RecNo & 128)
+          RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+        assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
+        Ops.push_back(RecordedNodes[RecNo].first);
+      }
+
+      // If there are variadic operands to add, handle them now.
+      if (EmitNodeInfo & OPFL_VariadicInfo) {
+        // Determine the start index to copy from.
+        unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo);
+        FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
+        assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
+               "Invalid variadic node");
+        // Copy all of the variadic operands, not including a potential glue
+        // input.
+        for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
+             i != e; ++i) {
+          SDValue V = NodeToMatch->getOperand(i);
+          if (V.getValueType() == MVT::Glue) break;
+          Ops.push_back(V);
+        }
+      }
+
+      // If this has chain/glue inputs, add them.
+      if (EmitNodeInfo & OPFL_Chain)
+        Ops.push_back(InputChain);
+      if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0)
+        Ops.push_back(InputGlue);
+
+      // Create the node.
+      SDNode *Res = 0;
+      if (Opcode != OPC_MorphNodeTo) {
+        // If this is a normal EmitNode command, just create the new node and
+        // add the results to the RecordedNodes list.
+        Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
+                                     VTList, Ops.data(), Ops.size());
+
+        // Add all the non-glue/non-chain results to the RecordedNodes list.
+        for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+          if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break;
+          RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
+                                                             (SDNode*) 0));
+        }
+
+      } else {
+        Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
+                        EmitNodeInfo);
+      }
+
+      // If the node had chain/glue results, update our notion of the current
+      // chain and glue.
+      if (EmitNodeInfo & OPFL_GlueOutput) {
+        InputGlue = SDValue(Res, VTs.size()-1);
+        if (EmitNodeInfo & OPFL_Chain)
+          InputChain = SDValue(Res, VTs.size()-2);
+      } else if (EmitNodeInfo & OPFL_Chain)
+        InputChain = SDValue(Res, VTs.size()-1);
+
+      // If the OPFL_MemRefs glue is set on this node, slap all of the
+      // accumulated memrefs onto it.
+      //
+      // FIXME: This is vastly incorrect for patterns with multiple outputs
+      // instructions that access memory and for ComplexPatterns that match
+      // loads.
+      if (EmitNodeInfo & OPFL_MemRefs) {
+        MachineSDNode::mmo_iterator MemRefs =
+          MF->allocateMemRefsArray(MatchedMemRefs.size());
+        std::copy(MatchedMemRefs.begin(), MatchedMemRefs.end(), MemRefs);
+        cast<MachineSDNode>(Res)
+          ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size());
+      }
+
+      DEBUG(errs() << "  "
+                   << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
+                   << " node: "; Res->dump(CurDAG); errs() << "\n");
+
+      // If this was a MorphNodeTo then we're completely done!
+      if (Opcode == OPC_MorphNodeTo) {
+        // Update chain and glue uses.
+        UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+                            InputGlue, GlueResultNodesMatched, true);
+        return Res;
+      }
+
+      continue;
+    }
+
+    case OPC_MarkGlueResults: {
+      unsigned NumNodes = MatcherTable[MatcherIndex++];
+
+      // Read and remember all the glue-result nodes.
+      for (unsigned i = 0; i != NumNodes; ++i) {
+        unsigned RecNo = MatcherTable[MatcherIndex++];
+        if (RecNo & 128)
+          RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+        assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+        GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+      }
+      continue;
+    }
+
+    case OPC_CompleteMatch: {
+      // The match has been completed, and any new nodes (if any) have been
+      // created.  Patch up references to the matched dag to use the newly
+      // created nodes.
+      unsigned NumResults = MatcherTable[MatcherIndex++];
+
+      for (unsigned i = 0; i != NumResults; ++i) {
+        unsigned ResSlot = MatcherTable[MatcherIndex++];
+        if (ResSlot & 128)
+          ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
+
+        assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
+        SDValue Res = RecordedNodes[ResSlot].first;
+
+        assert(i < NodeToMatch->getNumValues() &&
+               NodeToMatch->getValueType(i) != MVT::Other &&
+               NodeToMatch->getValueType(i) != MVT::Glue &&
+               "Invalid number of results to complete!");
+        assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
+                NodeToMatch->getValueType(i) == MVT::iPTR ||
+                Res.getValueType() == MVT::iPTR ||
+                NodeToMatch->getValueType(i).getSizeInBits() ==
+                    Res.getValueType().getSizeInBits()) &&
+               "invalid replacement");
+        CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
+      }
+
+      // If the root node defines glue, add it to the glue nodes to update list.
+      if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue)
+        GlueResultNodesMatched.push_back(NodeToMatch);
+
+      // Update chain and glue uses.
+      UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+                          InputGlue, GlueResultNodesMatched, false);
+
+      assert(NodeToMatch->use_empty() &&
+             "Didn't replace all uses of the node?");
+
+      // FIXME: We just return here, which interacts correctly with SelectRoot
+      // above.  We should fix this to not return an SDNode* anymore.
+      return 0;
+    }
+    }
+
+    // If the code reached this point, then the match failed.  See if there is
+    // another child to try in the current 'Scope', otherwise pop it until we
+    // find a case to check.
+    DEBUG(errs() << "  Match failed at index " << CurrentOpcodeIndex << "\n");
+    ++NumDAGIselRetries;
+    while (1) {
+      if (MatchScopes.empty()) {
+        CannotYetSelect(NodeToMatch);
+        return 0;
+      }
+
+      // Restore the interpreter state back to the point where the scope was
+      // formed.
+      MatchScope &LastScope = MatchScopes.back();
+      RecordedNodes.resize(LastScope.NumRecordedNodes);
+      NodeStack.clear();
+      NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end());
+      N = NodeStack.back();
+
+      if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
+        MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
+      MatcherIndex = LastScope.FailIndex;
+
+      DEBUG(errs() << "  Continuing at " << MatcherIndex << "\n");
+
+      InputChain = LastScope.InputChain;
+      InputGlue = LastScope.InputGlue;
+      if (!LastScope.HasChainNodesMatched)
+        ChainNodesMatched.clear();
+      if (!LastScope.HasGlueResultNodesMatched)
+        GlueResultNodesMatched.clear();
+
+      // Check to see what the offset is at the new MatcherIndex.  If it is zero
+      // we have reached the end of this scope, otherwise we have another child
+      // in the current scope to try.
+      unsigned NumToSkip = MatcherTable[MatcherIndex++];
+      if (NumToSkip & 128)
+        NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+
+      // If we have another child in this scope to match, update FailIndex and
+      // try it.
+      if (NumToSkip != 0) {
+        LastScope.FailIndex = MatcherIndex+NumToSkip;
+        break;
+      }
+
+      // End of this scope, pop it and try the next child in the containing
+      // scope.
+      MatchScopes.pop_back();
+    }
+  }
+}
+
+
+
+void SelectionDAGISel::CannotYetSelect(SDNode *N) {
+  std::string msg;
+  raw_string_ostream Msg(msg);
+  Msg << "Cannot select: ";
+
+  if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
+      N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
+      N->getOpcode() != ISD::INTRINSIC_VOID) {
+    N->printrFull(Msg, CurDAG);
+  } else {
+    bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other;
+    unsigned iid =
+      cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
+    if (iid < Intrinsic::num_intrinsics)
+      Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid);
+    else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
+      Msg << "target intrinsic %" << TII->getName(iid);
+    else
+      Msg << "unknown intrinsic #" << iid;
+  }
+  report_fatal_error(Msg.str());
+}
+
+char SelectionDAGISel::ID = 0;
diff --git a/final/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/final/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
new file mode 100644
index 00000000000..cd1647b17b9
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -0,0 +1,302 @@
+//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the SelectionDAG::viewGraph method.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ScheduleDAGSDNodes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+namespace llvm {
+  template<>
+  struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
+
+    explicit DOTGraphTraits(bool isSimple=false) :
+      DefaultDOTGraphTraits(isSimple) {}
+
+    static bool hasEdgeDestLabels() {
+      return true;
+    }
+
+    static unsigned numEdgeDestLabels(const void *Node) {
+      return ((const SDNode *) Node)->getNumValues();
+    }
+
+    static std::string getEdgeDestLabel(const void *Node, unsigned i) {
+      return ((const SDNode *) Node)->getValueType(i).getEVTString();
+    }
+
+    template<typename EdgeIter>
+    static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) {
+      return itostr(I - SDNodeIterator::begin((SDNode *) Node));
+    }
+
+    /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
+    /// should actually target another edge source, not a node.  If this method
+    /// is implemented, getEdgeTarget should be implemented.
+    template<typename EdgeIter>
+    static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) {
+      return true;
+    }
+
+    /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is
+    /// called to determine which outgoing edge of Node is the target of this
+    /// edge.
+    template<typename EdgeIter>
+    static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) {
+      SDNode *TargetNode = *I;
+      SDNodeIterator NI = SDNodeIterator::begin(TargetNode);
+      std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo());
+      return NI;
+    }
+
+    static std::string getGraphName(const SelectionDAG *G) {
+      return G->getMachineFunction().getFunction()->getName();
+    }
+
+    static bool renderGraphFromBottomUp() {
+      return true;
+    }
+
+    static bool hasNodeAddressLabel(const SDNode *Node,
+                                    const SelectionDAG *Graph) {
+      return true;
+    }
+
+    /// If you want to override the dot attributes printed for a particular
+    /// edge, override this method.
+    template<typename EdgeIter>
+    static std::string getEdgeAttributes(const void *Node, EdgeIter EI,
+                                         const SelectionDAG *Graph) {
+      SDValue Op = EI.getNode()->getOperand(EI.getOperand());
+      EVT VT = Op.getValueType();
+      if (VT == MVT::Glue)
+        return "color=red,style=bold";
+      else if (VT == MVT::Other)
+        return "color=blue,style=dashed";
+      return "";
+    }
+
+
+    static std::string getSimpleNodeLabel(const SDNode *Node,
+                                          const SelectionDAG *G) {
+      std::string Result = Node->getOperationName(G);
+      {
+        raw_string_ostream OS(Result);
+        Node->print_details(OS, G);
+      }
+      return Result;
+    }
+    std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph);
+    static std::string getNodeAttributes(const SDNode *N,
+                                         const SelectionDAG *Graph) {
+#ifndef NDEBUG
+      const std::string &Attrs = Graph->getGraphAttrs(N);
+      if (!Attrs.empty()) {
+        if (Attrs.find("shape=") == std::string::npos)
+          return std::string("shape=Mrecord,") + Attrs;
+        else
+          return Attrs;
+      }
+#endif
+      return "shape=Mrecord";
+    }
+
+    static void addCustomGraphFeatures(SelectionDAG *G,
+                                       GraphWriter<SelectionDAG*> &GW) {
+      GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+      if (G->getRoot().getNode())
+        GW.emitEdge(0, -1, G->getRoot().getNode(), G->getRoot().getResNo(),
+                    "color=blue,style=dashed");
+    }
+  };
+}
+
+std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
+                                                        const SelectionDAG *G) {
+  return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G);
+}
+
+
+/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
+/// rendered using 'dot'.
+///
+void SelectionDAG::viewGraph(const std::string &Title) {
+// This code is only for debugging!
+#ifndef NDEBUG
+  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(),
+            false, Title);
+#else
+  errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+         << "systems with Graphviz or gv!\n";
+#endif  // NDEBUG
+}
+
+// This overload is defined out-of-line here instead of just using a
+// default parameter because this is easiest for gdb to call.
+void SelectionDAG::viewGraph() {
+  viewGraph("");
+}
+
+/// clearGraphAttrs - Clear all previously defined node graph attributes.
+/// Intended to be used from a debugging tool (eg. gdb).
+void SelectionDAG::clearGraphAttrs() {
+#ifndef NDEBUG
+  NodeGraphAttrs.clear();
+#else
+  errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".)
+///
+void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) {
+#ifndef NDEBUG
+  NodeGraphAttrs[N] = Attrs;
+#else
+  errs() << "SelectionDAG::setGraphAttrs is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+
+/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".)
+/// Used from getNodeAttributes.
+const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
+#ifndef NDEBUG
+  std::map<const SDNode *, std::string>::const_iterator I =
+    NodeGraphAttrs.find(N);
+
+  if (I != NodeGraphAttrs.end())
+    return I->second;
+  else
+    return "";
+#else
+  errs() << "SelectionDAG::getGraphAttrs is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+  return std::string();
+#endif
+}
+
+/// setGraphColor - Convenience for setting node color attribute.
+///
+void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) {
+#ifndef NDEBUG
+  NodeGraphAttrs[N] = std::string("color=") + Color;
+#else
+  errs() << "SelectionDAG::setGraphColor is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+/// setSubgraphColorHelper - Implement setSubgraphColor.  Return
+/// whether we truncated the search.
+///
+bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited,
+                                          int level, bool &printed) {
+  bool hit_limit = false;
+
+#ifndef NDEBUG
+  if (level >= 20) {
+    if (!printed) {
+      printed = true;
+      DEBUG(dbgs() << "setSubgraphColor hit max level\n");
+    }
+    return true;
+  }
+
+  unsigned oldSize = visited.size();
+  visited.insert(N);
+  if (visited.size() != oldSize) {
+    setGraphColor(N, Color);
+    for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N);
+        i != iend;
+        ++i) {
+      hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit;
+    }
+  }
+#else
+  errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+#endif
+  return hit_limit;
+}
+
+/// setSubgraphColor - Convenience for setting subgraph color attribute.
+///
+void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {
+#ifndef NDEBUG
+  DenseSet<SDNode *> visited;
+  bool printed = false;
+  if (setSubgraphColorHelper(N, Color, visited, 0, printed)) {
+    // Visually mark that we hit the limit
+    if (strcmp(Color, "red") == 0) {
+      setSubgraphColorHelper(N, "blue", visited, 0, printed);
+    } else if (strcmp(Color, "yellow") == 0) {
+      setSubgraphColorHelper(N, "green", visited, 0, printed);
+    }
+  }
+
+#else
+  errs() << "SelectionDAG::setSubgraphColor is only available in debug builds"
+         << " on systems with Graphviz or gv!\n";
+#endif
+}
+
+std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
+  std::string s;
+  raw_string_ostream O(s);
+  O << "SU(" << SU->NodeNum << "): ";
+  if (SU->getNode()) {
+    SmallVector<SDNode *, 4> GluedNodes;
+    for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+      GluedNodes.push_back(N);
+    while (!GluedNodes.empty()) {
+      O << DOTGraphTraits<SelectionDAG*>
+        ::getSimpleNodeLabel(GluedNodes.back(), DAG);
+      GluedNodes.pop_back();
+      if (!GluedNodes.empty())
+        O << "\n    ";
+    }
+  } else {
+    O << "CROSS RC COPY";
+  }
+  return O.str();
+}
+
+void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const {
+  if (DAG) {
+    // Draw a special "GraphRoot" node to indicate the root of the graph.
+    GW.emitSimpleNode(0, "plaintext=circle", "GraphRoot");
+    const SDNode *N = DAG->getRoot().getNode();
+    if (N && N->getNodeId() != -1)
+      GW.emitEdge(0, -1, &SUnits[N->getNodeId()], -1,
+                  "color=blue,style=dashed");
+  }
+}
diff --git a/final/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/final/lib/CodeGen/SelectionDAG/TargetLowering.cpp
new file mode 100644
index 00000000000..35b847ccabf
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -0,0 +1,3214 @@
+//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cctype>
+using namespace llvm;
+
+namespace llvm {
+TLSModel::Model getTLSModel(const GlobalValue *GV, Reloc::Model reloc) {
+  bool isLocal = GV->hasLocalLinkage();
+  bool isDeclaration = GV->isDeclaration();
+  // FIXME: what should we do for protected and internal visibility?
+  // For variables, is internal different from hidden?
+  bool isHidden = GV->hasHiddenVisibility();
+
+  if (reloc == Reloc::PIC_) {
+    if (isLocal || isHidden)
+      return TLSModel::LocalDynamic;
+    else
+      return TLSModel::GeneralDynamic;
+  } else {
+    if (!isDeclaration || isHidden)
+      return TLSModel::LocalExec;
+    else
+      return TLSModel::InitialExec;
+  }
+}
+}
+
+/// InitLibcallNames - Set default libcall names.
+///
+static void InitLibcallNames(const char **Names) {
+  Names[RTLIB::SHL_I16] = "__ashlhi3";
+  Names[RTLIB::SHL_I32] = "__ashlsi3";
+  Names[RTLIB::SHL_I64] = "__ashldi3";
+  Names[RTLIB::SHL_I128] = "__ashlti3";
+  Names[RTLIB::SRL_I16] = "__lshrhi3";
+  Names[RTLIB::SRL_I32] = "__lshrsi3";
+  Names[RTLIB::SRL_I64] = "__lshrdi3";
+  Names[RTLIB::SRL_I128] = "__lshrti3";
+  Names[RTLIB::SRA_I16] = "__ashrhi3";
+  Names[RTLIB::SRA_I32] = "__ashrsi3";
+  Names[RTLIB::SRA_I64] = "__ashrdi3";
+  Names[RTLIB::SRA_I128] = "__ashrti3";
+  Names[RTLIB::MUL_I8] = "__mulqi3";
+  Names[RTLIB::MUL_I16] = "__mulhi3";
+  Names[RTLIB::MUL_I32] = "__mulsi3";
+  Names[RTLIB::MUL_I64] = "__muldi3";
+  Names[RTLIB::MUL_I128] = "__multi3";
+  Names[RTLIB::SDIV_I8] = "__divqi3";
+  Names[RTLIB::SDIV_I16] = "__divhi3";
+  Names[RTLIB::SDIV_I32] = "__divsi3";
+  Names[RTLIB::SDIV_I64] = "__divdi3";
+  Names[RTLIB::SDIV_I128] = "__divti3";
+  Names[RTLIB::UDIV_I8] = "__udivqi3";
+  Names[RTLIB::UDIV_I16] = "__udivhi3";
+  Names[RTLIB::UDIV_I32] = "__udivsi3";
+  Names[RTLIB::UDIV_I64] = "__udivdi3";
+  Names[RTLIB::UDIV_I128] = "__udivti3";
+  Names[RTLIB::SREM_I8] = "__modqi3";
+  Names[RTLIB::SREM_I16] = "__modhi3";
+  Names[RTLIB::SREM_I32] = "__modsi3";
+  Names[RTLIB::SREM_I64] = "__moddi3";
+  Names[RTLIB::SREM_I128] = "__modti3";
+  Names[RTLIB::UREM_I8] = "__umodqi3";
+  Names[RTLIB::UREM_I16] = "__umodhi3";
+  Names[RTLIB::UREM_I32] = "__umodsi3";
+  Names[RTLIB::UREM_I64] = "__umoddi3";
+  Names[RTLIB::UREM_I128] = "__umodti3";
+  Names[RTLIB::NEG_I32] = "__negsi2";
+  Names[RTLIB::NEG_I64] = "__negdi2";
+  Names[RTLIB::ADD_F32] = "__addsf3";
+  Names[RTLIB::ADD_F64] = "__adddf3";
+  Names[RTLIB::ADD_F80] = "__addxf3";
+  Names[RTLIB::ADD_PPCF128] = "__gcc_qadd";
+  Names[RTLIB::SUB_F32] = "__subsf3";
+  Names[RTLIB::SUB_F64] = "__subdf3";
+  Names[RTLIB::SUB_F80] = "__subxf3";
+  Names[RTLIB::SUB_PPCF128] = "__gcc_qsub";
+  Names[RTLIB::MUL_F32] = "__mulsf3";
+  Names[RTLIB::MUL_F64] = "__muldf3";
+  Names[RTLIB::MUL_F80] = "__mulxf3";
+  Names[RTLIB::MUL_PPCF128] = "__gcc_qmul";
+  Names[RTLIB::DIV_F32] = "__divsf3";
+  Names[RTLIB::DIV_F64] = "__divdf3";
+  Names[RTLIB::DIV_F80] = "__divxf3";
+  Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv";
+  Names[RTLIB::REM_F32] = "fmodf";
+  Names[RTLIB::REM_F64] = "fmod";
+  Names[RTLIB::REM_F80] = "fmodl";
+  Names[RTLIB::REM_PPCF128] = "fmodl";
+  Names[RTLIB::POWI_F32] = "__powisf2";
+  Names[RTLIB::POWI_F64] = "__powidf2";
+  Names[RTLIB::POWI_F80] = "__powixf2";
+  Names[RTLIB::POWI_PPCF128] = "__powitf2";
+  Names[RTLIB::SQRT_F32] = "sqrtf";
+  Names[RTLIB::SQRT_F64] = "sqrt";
+  Names[RTLIB::SQRT_F80] = "sqrtl";
+  Names[RTLIB::SQRT_PPCF128] = "sqrtl";
+  Names[RTLIB::LOG_F32] = "logf";
+  Names[RTLIB::LOG_F64] = "log";
+  Names[RTLIB::LOG_F80] = "logl";
+  Names[RTLIB::LOG_PPCF128] = "logl";
+  Names[RTLIB::LOG2_F32] = "log2f";
+  Names[RTLIB::LOG2_F64] = "log2";
+  Names[RTLIB::LOG2_F80] = "log2l";
+  Names[RTLIB::LOG2_PPCF128] = "log2l";
+  Names[RTLIB::LOG10_F32] = "log10f";
+  Names[RTLIB::LOG10_F64] = "log10";
+  Names[RTLIB::LOG10_F80] = "log10l";
+  Names[RTLIB::LOG10_PPCF128] = "log10l";
+  Names[RTLIB::EXP_F32] = "expf";
+  Names[RTLIB::EXP_F64] = "exp";
+  Names[RTLIB::EXP_F80] = "expl";
+  Names[RTLIB::EXP_PPCF128] = "expl";
+  Names[RTLIB::EXP2_F32] = "exp2f";
+  Names[RTLIB::EXP2_F64] = "exp2";
+  Names[RTLIB::EXP2_F80] = "exp2l";
+  Names[RTLIB::EXP2_PPCF128] = "exp2l";
+  Names[RTLIB::SIN_F32] = "sinf";
+  Names[RTLIB::SIN_F64] = "sin";
+  Names[RTLIB::SIN_F80] = "sinl";
+  Names[RTLIB::SIN_PPCF128] = "sinl";
+  Names[RTLIB::COS_F32] = "cosf";
+  Names[RTLIB::COS_F64] = "cos";
+  Names[RTLIB::COS_F80] = "cosl";
+  Names[RTLIB::COS_PPCF128] = "cosl";
+  Names[RTLIB::POW_F32] = "powf";
+  Names[RTLIB::POW_F64] = "pow";
+  Names[RTLIB::POW_F80] = "powl";
+  Names[RTLIB::POW_PPCF128] = "powl";
+  Names[RTLIB::CEIL_F32] = "ceilf";
+  Names[RTLIB::CEIL_F64] = "ceil";
+  Names[RTLIB::CEIL_F80] = "ceill";
+  Names[RTLIB::CEIL_PPCF128] = "ceill";
+  Names[RTLIB::TRUNC_F32] = "truncf";
+  Names[RTLIB::TRUNC_F64] = "trunc";
+  Names[RTLIB::TRUNC_F80] = "truncl";
+  Names[RTLIB::TRUNC_PPCF128] = "truncl";
+  Names[RTLIB::RINT_F32] = "rintf";
+  Names[RTLIB::RINT_F64] = "rint";
+  Names[RTLIB::RINT_F80] = "rintl";
+  Names[RTLIB::RINT_PPCF128] = "rintl";
+  Names[RTLIB::NEARBYINT_F32] = "nearbyintf";
+  Names[RTLIB::NEARBYINT_F64] = "nearbyint";
+  Names[RTLIB::NEARBYINT_F80] = "nearbyintl";
+  Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl";
+  Names[RTLIB::FLOOR_F32] = "floorf";
+  Names[RTLIB::FLOOR_F64] = "floor";
+  Names[RTLIB::FLOOR_F80] = "floorl";
+  Names[RTLIB::FLOOR_PPCF128] = "floorl";
+  Names[RTLIB::COPYSIGN_F32] = "copysignf";
+  Names[RTLIB::COPYSIGN_F64] = "copysign";
+  Names[RTLIB::COPYSIGN_F80] = "copysignl";
+  Names[RTLIB::COPYSIGN_PPCF128] = "copysignl";
+  Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2";
+  Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee";
+  Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee";
+  Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2";
+  Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2";
+  Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
+  Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
+  Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
+  Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi";
+  Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi";
+  Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
+  Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
+  Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
+  Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi";
+  Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi";
+  Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
+  Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
+  Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
+  Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi";
+  Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi";
+  Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti";
+  Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
+  Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
+  Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
+  Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi";
+  Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi";
+  Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
+  Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
+  Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
+  Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi";
+  Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi";
+  Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
+  Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
+  Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
+  Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi";
+  Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi";
+  Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti";
+  Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi";
+  Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi";
+  Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti";
+  Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
+  Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
+  Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf";
+  Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf";
+  Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
+  Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
+  Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf";
+  Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf";
+  Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf";
+  Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf";
+  Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf";
+  Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf";
+  Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
+  Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
+  Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf";
+  Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf";
+  Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
+  Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
+  Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf";
+  Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf";
+  Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf";
+  Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf";
+  Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf";
+  Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf";
+  Names[RTLIB::OEQ_F32] = "__eqsf2";
+  Names[RTLIB::OEQ_F64] = "__eqdf2";
+  Names[RTLIB::UNE_F32] = "__nesf2";
+  Names[RTLIB::UNE_F64] = "__nedf2";
+  Names[RTLIB::OGE_F32] = "__gesf2";
+  Names[RTLIB::OGE_F64] = "__gedf2";
+  Names[RTLIB::OLT_F32] = "__ltsf2";
+  Names[RTLIB::OLT_F64] = "__ltdf2";
+  Names[RTLIB::OLE_F32] = "__lesf2";
+  Names[RTLIB::OLE_F64] = "__ledf2";
+  Names[RTLIB::OGT_F32] = "__gtsf2";
+  Names[RTLIB::OGT_F64] = "__gtdf2";
+  Names[RTLIB::UO_F32] = "__unordsf2";
+  Names[RTLIB::UO_F64] = "__unorddf2";
+  Names[RTLIB::O_F32] = "__unordsf2";
+  Names[RTLIB::O_F64] = "__unorddf2";
+  Names[RTLIB::MEMCPY] = "memcpy";
+  Names[RTLIB::MEMMOVE] = "memmove";
+  Names[RTLIB::MEMSET] = "memset";
+  Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
+  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
+  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
+  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4";
+  Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8";
+  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1";
+  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2";
+  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4";
+  Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8";
+  Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1";
+  Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2";
+  Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4";
+  Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8";
+  Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1";
+  Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2";
+  Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4";
+  Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8";
+  Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1";
+  Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2";
+  Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4";
+  Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8";
+  Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1";
+  Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2";
+  Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4";
+  Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
+  Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
+  Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
+  Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and-xor_4";
+  Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
+  Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
+  Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
+  Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4";
+  Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8";
+}
+
+/// InitLibcallCallingConvs - Set default libcall CallingConvs.
+///
+static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
+  for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
+    CCs[i] = CallingConv::C;
+  }
+}
+
+/// getFPEXT - Return the FPEXT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::f32) {
+    if (RetVT == MVT::f64)
+      return FPEXT_F32_F64;
+  }
+
+  return UNKNOWN_LIBCALL;
+}
+
+/// getFPROUND - Return the FPROUND_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) {
+  if (RetVT == MVT::f32) {
+    if (OpVT == MVT::f64)
+      return FPROUND_F64_F32;
+    if (OpVT == MVT::f80)
+      return FPROUND_F80_F32;
+    if (OpVT == MVT::ppcf128)
+      return FPROUND_PPCF128_F32;
+  } else if (RetVT == MVT::f64) {
+    if (OpVT == MVT::f80)
+      return FPROUND_F80_F64;
+    if (OpVT == MVT::ppcf128)
+      return FPROUND_PPCF128_F64;
+  }
+
+  return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::f32) {
+    if (RetVT == MVT::i8)
+      return FPTOSINT_F32_I8;
+    if (RetVT == MVT::i16)
+      return FPTOSINT_F32_I16;
+    if (RetVT == MVT::i32)
+      return FPTOSINT_F32_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_F32_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_F32_I128;
+  } else if (OpVT == MVT::f64) {
+    if (RetVT == MVT::i8)
+      return FPTOSINT_F64_I8;
+    if (RetVT == MVT::i16)
+      return FPTOSINT_F64_I16;
+    if (RetVT == MVT::i32)
+      return FPTOSINT_F64_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_F64_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_F64_I128;
+  } else if (OpVT == MVT::f80) {
+    if (RetVT == MVT::i32)
+      return FPTOSINT_F80_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_F80_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_F80_I128;
+  } else if (OpVT == MVT::ppcf128) {
+    if (RetVT == MVT::i32)
+      return FPTOSINT_PPCF128_I32;
+    if (RetVT == MVT::i64)
+      return FPTOSINT_PPCF128_I64;
+    if (RetVT == MVT::i128)
+      return FPTOSINT_PPCF128_I128;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::f32) {
+    if (RetVT == MVT::i8)
+      return FPTOUINT_F32_I8;
+    if (RetVT == MVT::i16)
+      return FPTOUINT_F32_I16;
+    if (RetVT == MVT::i32)
+      return FPTOUINT_F32_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_F32_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_F32_I128;
+  } else if (OpVT == MVT::f64) {
+    if (RetVT == MVT::i8)
+      return FPTOUINT_F64_I8;
+    if (RetVT == MVT::i16)
+      return FPTOUINT_F64_I16;
+    if (RetVT == MVT::i32)
+      return FPTOUINT_F64_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_F64_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_F64_I128;
+  } else if (OpVT == MVT::f80) {
+    if (RetVT == MVT::i32)
+      return FPTOUINT_F80_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_F80_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_F80_I128;
+  } else if (OpVT == MVT::ppcf128) {
+    if (RetVT == MVT::i32)
+      return FPTOUINT_PPCF128_I32;
+    if (RetVT == MVT::i64)
+      return FPTOUINT_PPCF128_I64;
+    if (RetVT == MVT::i128)
+      return FPTOUINT_PPCF128_I128;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::i32) {
+    if (RetVT == MVT::f32)
+      return SINTTOFP_I32_F32;
+    else if (RetVT == MVT::f64)
+      return SINTTOFP_I32_F64;
+    else if (RetVT == MVT::f80)
+      return SINTTOFP_I32_F80;
+    else if (RetVT == MVT::ppcf128)
+      return SINTTOFP_I32_PPCF128;
+  } else if (OpVT == MVT::i64) {
+    if (RetVT == MVT::f32)
+      return SINTTOFP_I64_F32;
+    else if (RetVT == MVT::f64)
+      return SINTTOFP_I64_F64;
+    else if (RetVT == MVT::f80)
+      return SINTTOFP_I64_F80;
+    else if (RetVT == MVT::ppcf128)
+      return SINTTOFP_I64_PPCF128;
+  } else if (OpVT == MVT::i128) {
+    if (RetVT == MVT::f32)
+      return SINTTOFP_I128_F32;
+    else if (RetVT == MVT::f64)
+      return SINTTOFP_I128_F64;
+    else if (RetVT == MVT::f80)
+      return SINTTOFP_I128_F80;
+    else if (RetVT == MVT::ppcf128)
+      return SINTTOFP_I128_PPCF128;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or
+/// UNKNOWN_LIBCALL if there is none.
+RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) {
+  if (OpVT == MVT::i32) {
+    if (RetVT == MVT::f32)
+      return UINTTOFP_I32_F32;
+    else if (RetVT == MVT::f64)
+      return UINTTOFP_I32_F64;
+    else if (RetVT == MVT::f80)
+      return UINTTOFP_I32_F80;
+    else if (RetVT == MVT::ppcf128)
+      return UINTTOFP_I32_PPCF128;
+  } else if (OpVT == MVT::i64) {
+    if (RetVT == MVT::f32)
+      return UINTTOFP_I64_F32;
+    else if (RetVT == MVT::f64)
+      return UINTTOFP_I64_F64;
+    else if (RetVT == MVT::f80)
+      return UINTTOFP_I64_F80;
+    else if (RetVT == MVT::ppcf128)
+      return UINTTOFP_I64_PPCF128;
+  } else if (OpVT == MVT::i128) {
+    if (RetVT == MVT::f32)
+      return UINTTOFP_I128_F32;
+    else if (RetVT == MVT::f64)
+      return UINTTOFP_I128_F64;
+    else if (RetVT == MVT::f80)
+      return UINTTOFP_I128_F80;
+    else if (RetVT == MVT::ppcf128)
+      return UINTTOFP_I128_PPCF128;
+  }
+  return UNKNOWN_LIBCALL;
+}
+
+/// InitCmpLibcallCCs - Set default comparison libcall CC.
+///
+static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
+  memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL);
+  CCs[RTLIB::OEQ_F32] = ISD::SETEQ;
+  CCs[RTLIB::OEQ_F64] = ISD::SETEQ;
+  CCs[RTLIB::UNE_F32] = ISD::SETNE;
+  CCs[RTLIB::UNE_F64] = ISD::SETNE;
+  CCs[RTLIB::OGE_F32] = ISD::SETGE;
+  CCs[RTLIB::OGE_F64] = ISD::SETGE;
+  CCs[RTLIB::OLT_F32] = ISD::SETLT;
+  CCs[RTLIB::OLT_F64] = ISD::SETLT;
+  CCs[RTLIB::OLE_F32] = ISD::SETLE;
+  CCs[RTLIB::OLE_F64] = ISD::SETLE;
+  CCs[RTLIB::OGT_F32] = ISD::SETGT;
+  CCs[RTLIB::OGT_F64] = ISD::SETGT;
+  CCs[RTLIB::UO_F32] = ISD::SETNE;
+  CCs[RTLIB::UO_F64] = ISD::SETNE;
+  CCs[RTLIB::O_F32] = ISD::SETEQ;
+  CCs[RTLIB::O_F64] = ISD::SETEQ;
+}
+
+/// NOTE: The constructor takes ownership of TLOF.
+TargetLowering::TargetLowering(const TargetMachine &tm,
+                               const TargetLoweringObjectFile *tlof)
+  : TM(tm), TD(TM.getTargetData()), TLOF(*tlof) {
+  // All operations default to being supported.
+  memset(OpActions, 0, sizeof(OpActions));
+  memset(LoadExtActions, 0, sizeof(LoadExtActions));
+  memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
+  memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
+  memset(CondCodeActions, 0, sizeof(CondCodeActions));
+
+  // Set default actions for various operations.
+  for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) {
+    // Default all indexed load / store to expand.
+    for (unsigned IM = (unsigned)ISD::PRE_INC;
+         IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) {
+      setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
+      setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
+    }
+
+    // These operations default to expand.
+    setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
+  }
+
+  // Most targets ignore the @llvm.prefetch intrinsic.
+  setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
+
+  // ConstantFP nodes default to expand.  Targets can either change this to
+  // Legal, in which case all fp constants are legal, or use isFPImmLegal()
+  // to optimize expansions for certain constants.
+  setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
+  setOperationAction(ISD::ConstantFP, MVT::f80, Expand);
+
+  // These library functions default to expand.
+  setOperationAction(ISD::FLOG , MVT::f64, Expand);
+  setOperationAction(ISD::FLOG2, MVT::f64, Expand);
+  setOperationAction(ISD::FLOG10,MVT::f64, Expand);
+  setOperationAction(ISD::FEXP , MVT::f64, Expand);
+  setOperationAction(ISD::FEXP2, MVT::f64, Expand);
+  setOperationAction(ISD::FLOG , MVT::f32, Expand);
+  setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+  setOperationAction(ISD::FLOG10,MVT::f32, Expand);
+  setOperationAction(ISD::FEXP , MVT::f32, Expand);
+  setOperationAction(ISD::FEXP2, MVT::f32, Expand);
+
+  // Default ISD::TRAP to expand (which turns it into abort).
+  setOperationAction(ISD::TRAP, MVT::Other, Expand);
+
+  IsLittleEndian = TD->isLittleEndian();
+  PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
+  memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
+  memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
+  maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+  maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize
+    = maxStoresPerMemmoveOptSize = 4;
+  benefitFromCodePlacementOpt = false;
+  UseUnderscoreSetJmp = false;
+  UseUnderscoreLongJmp = false;
+  SelectIsExpensive = false;
+  IntDivIsCheap = false;
+  Pow2DivIsCheap = false;
+  JumpIsExpensive = false;
+  StackPointerRegisterToSaveRestore = 0;
+  ExceptionPointerRegister = 0;
+  ExceptionSelectorRegister = 0;
+  BooleanContents = UndefinedBooleanContent;
+  SchedPreferenceInfo = Sched::Latency;
+  JumpBufSize = 0;
+  JumpBufAlignment = 0;
+  PrefLoopAlignment = 0;
+  MinStackArgumentAlignment = 1;
+  ShouldFoldAtomicFences = false;
+
+  InitLibcallNames(LibcallRoutineNames);
+  InitCmpLibcallCCs(CmpLibcallCCs);
+  InitLibcallCallingConvs(LibcallCallingConvs);
+}
+
+TargetLowering::~TargetLowering() {
+  delete &TLOF;
+}
+
+MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const {
+  return MVT::getIntegerVT(8*TD->getPointerSize());
+}
+
+/// canOpTrap - Returns true if the operation can trap for the value type.
+/// VT must be a legal type.
+bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const {
+  assert(isTypeLegal(VT));
+  switch (Op) {
+  default:
+    return false;
+  case ISD::FDIV:
+  case ISD::FREM:
+  case ISD::SDIV:
+  case ISD::UDIV:
+  case ISD::SREM:
+  case ISD::UREM:
+    return true;
+  }
+}
+
+
+static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
+                                          unsigned &NumIntermediates,
+                                          EVT &RegisterVT,
+                                          TargetLowering *TLI) {
+  // Figure out the right, legal destination reg to copy into.
+  unsigned NumElts = VT.getVectorNumElements();
+  MVT EltTy = VT.getVectorElementType();
+
+  unsigned NumVectorRegs = 1;
+
+  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we
+  // could break down into LHS/RHS like LegalizeDAG does.
+  if (!isPowerOf2_32(NumElts)) {
+    NumVectorRegs = NumElts;
+    NumElts = 1;
+  }
+
+  // Divide the input until we get to a supported size.  This will always
+  // end with a scalar if the target doesn't support vectors.
+  while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
+    NumElts >>= 1;
+    NumVectorRegs <<= 1;
+  }
+
+  NumIntermediates = NumVectorRegs;
+
+  MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
+  if (!TLI->isTypeLegal(NewVT))
+    NewVT = EltTy;
+  IntermediateVT = NewVT;
+
+  EVT DestVT = TLI->getRegisterType(NewVT);
+  RegisterVT = DestVT;
+  if (EVT(DestVT).bitsLT(NewVT))    // Value is expanded, e.g. i64 -> i16.
+    return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+
+  // Otherwise, promotion or legal types use the same number of registers as
+  // the vector decimated to the appropriate level.
+  return NumVectorRegs;
+}
+
+/// isLegalRC - Return true if the value types that can be represented by the
+/// specified register class are all legal.
+bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const {
+  for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+       I != E; ++I) {
+    if (isTypeLegal(*I))
+      return true;
+  }
+  return false;
+}
+
+/// hasLegalSuperRegRegClasses - Return true if the specified register class
+/// has one or more super-reg register classes that are legal.
+bool
+TargetLowering::hasLegalSuperRegRegClasses(const TargetRegisterClass *RC) const{
+  if (*RC->superregclasses_begin() == 0)
+    return false;
+  for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(),
+         E = RC->superregclasses_end(); I != E; ++I) {
+    const TargetRegisterClass *RRC = *I;
+    if (isLegalRC(RRC))
+      return true;
+  }
+  return false;
+}
+
+/// findRepresentativeClass - Return the largest legal super-reg register class
+/// of the register class for the specified type and its associated "cost".
+std::pair<const TargetRegisterClass*, uint8_t>
+TargetLowering::findRepresentativeClass(EVT VT) const {
+  const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy];
+  if (!RC)
+    return std::make_pair(RC, 0);
+  const TargetRegisterClass *BestRC = RC;
+  for (TargetRegisterInfo::regclass_iterator I = RC->superregclasses_begin(),
+         E = RC->superregclasses_end(); I != E; ++I) {
+    const TargetRegisterClass *RRC = *I;
+    if (RRC->isASubClass() || !isLegalRC(RRC))
+      continue;
+    if (!hasLegalSuperRegRegClasses(RRC))
+      return std::make_pair(RRC, 1);
+    BestRC = RRC;
+  }
+  return std::make_pair(BestRC, 1);
+}
+
+
+/// computeRegisterProperties - Once all of the register classes are added,
+/// this allows us to compute derived properties we expose.
+void TargetLowering::computeRegisterProperties() {
+  assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
+         "Too many value types for ValueTypeActions to hold!");
+
+  // Everything defaults to needing one register.
+  for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+    NumRegistersForVT[i] = 1;
+    RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i;
+  }
+  // ...except isVoid, which doesn't need any registers.
+  NumRegistersForVT[MVT::isVoid] = 0;
+
+  // Find the largest integer register class.
+  unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE;
+  for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg)
+    assert(LargestIntReg != MVT::i1 && "No integer registers defined!");
+
+  // Every integer value type larger than this largest register takes twice as
+  // many registers to represent as the previous ValueType.
+  for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) {
+    EVT ExpandedVT = (MVT::SimpleValueType)ExpandedReg;
+    if (!ExpandedVT.isInteger())
+      break;
+    NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1];
+    RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg;
+    TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1);
+    ValueTypeActions.setTypeAction(ExpandedVT, Expand);
+  }
+
+  // Inspect all of the ValueType's smaller than the largest integer
+  // register to see which ones need promotion.
+  unsigned LegalIntReg = LargestIntReg;
+  for (unsigned IntReg = LargestIntReg - 1;
+       IntReg >= (unsigned)MVT::i1; --IntReg) {
+    EVT IVT = (MVT::SimpleValueType)IntReg;
+    if (isTypeLegal(IVT)) {
+      LegalIntReg = IntReg;
+    } else {
+      RegisterTypeForVT[IntReg] = TransformToType[IntReg] =
+        (MVT::SimpleValueType)LegalIntReg;
+      ValueTypeActions.setTypeAction(IVT, Promote);
+    }
+  }
+
+  // ppcf128 type is really two f64's.
+  if (!isTypeLegal(MVT::ppcf128)) {
+    NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64];
+    RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
+    TransformToType[MVT::ppcf128] = MVT::f64;
+    ValueTypeActions.setTypeAction(MVT::ppcf128, Expand);
+  }
+
+  // Decide how to handle f64. If the target does not have native f64 support,
+  // expand it to i64 and we will be generating soft float library calls.
+  if (!isTypeLegal(MVT::f64)) {
+    NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64];
+    RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64];
+    TransformToType[MVT::f64] = MVT::i64;
+    ValueTypeActions.setTypeAction(MVT::f64, Expand);
+  }
+
+  // Decide how to handle f32. If the target does not have native support for
+  // f32, promote it to f64 if it is legal. Otherwise, expand it to i32.
+  if (!isTypeLegal(MVT::f32)) {
+    if (isTypeLegal(MVT::f64)) {
+      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64];
+      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64];
+      TransformToType[MVT::f32] = MVT::f64;
+      ValueTypeActions.setTypeAction(MVT::f32, Promote);
+    } else {
+      NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32];
+      RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32];
+      TransformToType[MVT::f32] = MVT::i32;
+      ValueTypeActions.setTypeAction(MVT::f32, Expand);
+    }
+  }
+
+  // Loop over all of the vector value types to see which need transformations.
+  for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
+       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+    MVT VT = (MVT::SimpleValueType)i;
+    if (isTypeLegal(VT)) continue;
+
+    // Determine if there is a legal wider type.  If so, we should promote to
+    // that wider vector type.
+    EVT EltVT = VT.getVectorElementType();
+    unsigned NElts = VT.getVectorNumElements();
+    if (NElts != 1) {
+      bool IsLegalWiderType = false;
+      for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+        EVT SVT = (MVT::SimpleValueType)nVT;
+        if (SVT.getVectorElementType() == EltVT &&
+            SVT.getVectorNumElements() > NElts &&
+            isTypeLegal(SVT)) {
+          TransformToType[i] = SVT;
+          RegisterTypeForVT[i] = SVT;
+          NumRegistersForVT[i] = 1;
+          ValueTypeActions.setTypeAction(VT, Promote);
+          IsLegalWiderType = true;
+          break;
+        }
+      }
+      if (IsLegalWiderType) continue;
+    }
+
+    MVT IntermediateVT;
+    EVT RegisterVT;
+    unsigned NumIntermediates;
+    NumRegistersForVT[i] =
+      getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
+                                RegisterVT, this);
+    RegisterTypeForVT[i] = RegisterVT;
+
+    EVT NVT = VT.getPow2VectorType();
+    if (NVT == VT) {
+      // Type is already a power of 2.  The default action is to split.
+      TransformToType[i] = MVT::Other;
+      ValueTypeActions.setTypeAction(VT, Expand);
+    } else {
+      TransformToType[i] = NVT;
+      ValueTypeActions.setTypeAction(VT, Promote);
+    }
+  }
+
+  // Determine the 'representative' register class for each value type.
+  // An representative register class is the largest (meaning one which is
+  // not a sub-register class / subreg register class) legal register class for
+  // a group of value types. For example, on i386, i8, i16, and i32
+  // representative would be GR32; while on x86_64 it's GR64.
+  for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) {
+    const TargetRegisterClass* RRC;
+    uint8_t Cost;
+    tie(RRC, Cost) =  findRepresentativeClass((MVT::SimpleValueType)i);
+    RepRegClassForVT[i] = RRC;
+    RepRegClassCostForVT[i] = Cost;
+  }
+}
+
+const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
+  return NULL;
+}
+
+
+MVT::SimpleValueType TargetLowering::getSetCCResultType(EVT VT) const {
+  return PointerTy.SimpleTy;
+}
+
+MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const {
+  return MVT::i32; // return the default value
+}
+
+/// getVectorTypeBreakdown - Vector types are broken down into some number of
+/// legal first class types.  For example, MVT::v8f32 maps to 2 MVT::v4f32
+/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
+/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86.
+///
+/// This method returns the number of registers needed, and the VT for each
+/// register.  It also returns the VT and quantity of the intermediate values
+/// before they are promoted/expanded.
+///
+unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
+                                                EVT &IntermediateVT,
+                                                unsigned &NumIntermediates,
+                                                EVT &RegisterVT) const {
+  unsigned NumElts = VT.getVectorNumElements();
+
+  // If there is a wider vector type with the same element type as this one,
+  // we should widen to that legal vector type.  This handles things like
+  // <2 x float> -> <4 x float>.
+  if (NumElts != 1 && getTypeAction(VT) == Promote) {
+    RegisterVT = getTypeToTransformTo(Context, VT);
+    if (isTypeLegal(RegisterVT)) {
+      IntermediateVT = RegisterVT;
+      NumIntermediates = 1;
+      return 1;
+    }
+  }
+
+  // Figure out the right, legal destination reg to copy into.
+  EVT EltTy = VT.getVectorElementType();
+
+  unsigned NumVectorRegs = 1;
+
+  // FIXME: We don't support non-power-of-2-sized vectors for now.  Ideally we
+  // could break down into LHS/RHS like LegalizeDAG does.
+  if (!isPowerOf2_32(NumElts)) {
+    NumVectorRegs = NumElts;
+    NumElts = 1;
+  }
+
+  // Divide the input until we get to a supported size.  This will always
+  // end with a scalar if the target doesn't support vectors.
+  while (NumElts > 1 && !isTypeLegal(
+                                   EVT::getVectorVT(Context, EltTy, NumElts))) {
+    NumElts >>= 1;
+    NumVectorRegs <<= 1;
+  }
+
+  NumIntermediates = NumVectorRegs;
+
+  EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
+  if (!isTypeLegal(NewVT))
+    NewVT = EltTy;
+  IntermediateVT = NewVT;
+
+  EVT DestVT = getRegisterType(Context, NewVT);
+  RegisterVT = DestVT;
+  if (DestVT.bitsLT(NewVT))   // Value is expanded, e.g. i64 -> i16.
+    return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+
+  // Otherwise, promotion or legal types use the same number of registers as
+  // the vector decimated to the appropriate level.
+  return NumVectorRegs;
+}
+
+/// Get the EVTs and ArgFlags collections that represent the legalized return
+/// type of the given function.  This does not require a DAG or a return value,
+/// and is suitable for use before any DAGs for the function are constructed.
+/// TODO: Move this out of TargetLowering.cpp.
+void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr,
+                         SmallVectorImpl<ISD::OutputArg> &Outs,
+                         const TargetLowering &TLI,
+                         SmallVectorImpl<uint64_t> *Offsets) {
+  SmallVector<EVT, 4> ValueVTs;
+  ComputeValueVTs(TLI, ReturnType, ValueVTs);
+  unsigned NumValues = ValueVTs.size();
+  if (NumValues == 0) return;
+  unsigned Offset = 0;
+
+  for (unsigned j = 0, f = NumValues; j != f; ++j) {
+    EVT VT = ValueVTs[j];
+    ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+    if (attr & Attribute::SExt)
+      ExtendKind = ISD::SIGN_EXTEND;
+    else if (attr & Attribute::ZExt)
+      ExtendKind = ISD::ZERO_EXTEND;
+
+    // FIXME: C calling convention requires the return type to be promoted to
+    // at least 32-bit. But this is not necessary for non-C calling
+    // conventions. The frontend should mark functions whose return values
+    // require promoting with signext or zeroext attributes.
+    if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+      EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+      if (VT.bitsLT(MinVT))
+        VT = MinVT;
+    }
+
+    unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
+    EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+    unsigned PartSize = TLI.getTargetData()->getTypeAllocSize(
+                        PartVT.getTypeForEVT(ReturnType->getContext()));
+
+    // 'inreg' on function refers to return value
+    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+    if (attr & Attribute::InReg)
+      Flags.setInReg();
+
+    // Propagate extension type if any
+    if (attr & Attribute::SExt)
+      Flags.setSExt();
+    else if (attr & Attribute::ZExt)
+      Flags.setZExt();
+
+    for (unsigned i = 0; i < NumParts; ++i) {
+      Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true));
+      if (Offsets) {
+        Offsets->push_back(Offset);
+        Offset += PartSize;
+      }
+    }
+  }
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area.  This is the actual
+/// alignment, not its logarithm.
+unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+  return TD->getCallFrameTypeAlignment(Ty);
+}
+
+/// getJumpTableEncoding - Return the entry encoding for a jump table in the
+/// current function.  The returned value is a member of the
+/// MachineJumpTableInfo::JTEntryKind enum.
+unsigned TargetLowering::getJumpTableEncoding() const {
+  // In non-pic modes, just use the address of a block.
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+    return MachineJumpTableInfo::EK_BlockAddress;
+
+  // In PIC mode, if the target supports a GPRel32 directive, use it.
+  if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
+    return MachineJumpTableInfo::EK_GPRel32BlockAddress;
+
+  // Otherwise, use a label difference.
+  return MachineJumpTableInfo::EK_LabelDifference32;
+}
+
+SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+                                                 SelectionDAG &DAG) const {
+  // If our PIC model is GP relative, use the global offset table as the base.
+  if (getJumpTableEncoding() == MachineJumpTableInfo::EK_GPRel32BlockAddress)
+    return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy());
+  return Table;
+}
+
+/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
+/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
+/// MCExpr.
+const MCExpr *
+TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+                                             unsigned JTI,MCContext &Ctx) const{
+  // The normal PIC reloc base is the label at the start of the jump table.
+  return MCSymbolRefExpr::Create(MF->getJTISymbol(JTI, Ctx), Ctx);
+}
+
+bool
+TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // Assume that everything is safe in static mode.
+  if (getTargetMachine().getRelocationModel() == Reloc::Static)
+    return true;
+
+  // In dynamic-no-pic mode, assume that known defined values are safe.
+  if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC &&
+      GA &&
+      !GA->getGlobal()->isDeclaration() &&
+      !GA->getGlobal()->isWeakForLinker())
+    return true;
+
+  // Otherwise assume nothing is safe.
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+//  Optimization Methods
+//===----------------------------------------------------------------------===//
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// specified instruction is a constant integer.  If so, check to see if there
+/// are any bits set in the constant that are not demanded.  If so, shrink the
+/// constant and return true.
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
+                                                        const APInt &Demanded) {
+  DebugLoc dl = Op.getDebugLoc();
+
+  // FIXME: ISD::SELECT, ISD::SELECT_CC
+  switch (Op.getOpcode()) {
+  default: break;
+  case ISD::XOR:
+  case ISD::AND:
+  case ISD::OR: {
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+    if (!C) return false;
+
+    if (Op.getOpcode() == ISD::XOR &&
+        (C->getAPIntValue() | (~Demanded)).isAllOnesValue())
+      return false;
+
+    // if we can expand it to have all bits set, do it
+    if (C->getAPIntValue().intersects(~Demanded)) {
+      EVT VT = Op.getValueType();
+      SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
+                                DAG.getConstant(Demanded &
+                                                C->getAPIntValue(),
+                                                VT));
+      return CombineTo(Op, New);
+    }
+
+    break;
+  }
+  }
+
+  return false;
+}
+
+/// ShrinkDemandedOp - Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the
+/// casts are free.  This uses isZExtFree and ZERO_EXTEND for the widening
+/// cast, but it could be generalized for targets with other types of
+/// implicit widening casts.
+bool
+TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
+                                                    unsigned BitWidth,
+                                                    const APInt &Demanded,
+                                                    DebugLoc dl) {
+  assert(Op.getNumOperands() == 2 &&
+         "ShrinkDemandedOp only supports binary operators!");
+  assert(Op.getNode()->getNumValues() == 1 &&
+         "ShrinkDemandedOp only supports nodes with one result!");
+
+  // Don't do this if the node has another user, which may require the
+  // full value.
+  if (!Op.getNode()->hasOneUse())
+    return false;
+
+  // Search for the smallest integer type with free casts to and from
+  // Op's type. For expedience, just check power-of-2 integer types.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros();
+  if (!isPowerOf2_32(SmallVTBits))
+    SmallVTBits = NextPowerOf2(SmallVTBits);
+  for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+    EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
+    if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
+        TLI.isZExtFree(SmallVT, Op.getValueType())) {
+      // We found a type with free casts.
+      SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT,
+                              DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+                                          Op.getNode()->getOperand(0)),
+                              DAG.getNode(ISD::TRUNCATE, dl, SmallVT,
+                                          Op.getNode()->getOperand(1)));
+      SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X);
+      return CombineTo(Op, Z);
+    }
+  }
+  return false;
+}
+
+/// SimplifyDemandedBits - Look at Op.  At this point, we know that only the
+/// DemandedMask bits of the result of Op are ever used downstream.  If we can
+/// use this information to simplify Op, create a new simplified DAG node and
+/// return true, returning the original and new nodes in Old and New. Otherwise,
+/// analyze the expression and return a mask of KnownOne and KnownZero bits for
+/// the expression (used to simplify the caller).  The KnownZero/One bits may
+/// only be accurate for those bits in the DemandedMask.
+bool TargetLowering::SimplifyDemandedBits(SDValue Op,
+                                          const APInt &DemandedMask,
+                                          APInt &KnownZero,
+                                          APInt &KnownOne,
+                                          TargetLoweringOpt &TLO,
+                                          unsigned Depth) const {
+  unsigned BitWidth = DemandedMask.getBitWidth();
+  assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth &&
+         "Mask size mismatches value type size!");
+  APInt NewMask = DemandedMask;
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Don't know anything.
+  KnownZero = KnownOne = APInt(BitWidth, 0);
+
+  // Other users may use these bits.
+  if (!Op.getNode()->hasOneUse()) {
+    if (Depth != 0) {
+      // If not at the root, Just compute the KnownZero/KnownOne bits to
+      // simplify things downstream.
+      TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
+      return false;
+    }
+    // If this is the root being simplified, allow it to have multiple uses,
+    // just set the NewMask to all bits.
+    NewMask = APInt::getAllOnesValue(BitWidth);
+  } else if (DemandedMask == 0) {
+    // Not demanding any bits from Op.
+    if (Op.getOpcode() != ISD::UNDEF)
+      return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
+    return false;
+  } else if (Depth == 6) {        // Limit search depth.
+    return false;
+  }
+
+  APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut;
+  switch (Op.getOpcode()) {
+  case ISD::Constant:
+    // We know all of the bits for a constant!
+    KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask;
+    KnownZero = ~KnownOne & NewMask;
+    return false;   // Don't fall through, will infinitely loop.
+  case ISD::AND:
+    // If the RHS is a constant, check to see if the LHS would be zero without
+    // using the bits from the RHS.  Below, we use knowledge about the RHS to
+    // simplify the LHS, here we're using information from the LHS to simplify
+    // the RHS.
+    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      APInt LHSZero, LHSOne;
+      // Do not increment Depth here; that can cause an infinite loop.
+      TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
+                                LHSZero, LHSOne, Depth);
+      // If the LHS already has zeros where RHSC does, this and is dead.
+      if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
+        return TLO.CombineTo(Op, Op.getOperand(0));
+      // If any of the set bits in the RHS are known zero on the LHS, shrink
+      // the constant.
+      if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
+        return true;
+    }
+
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+                             KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
+                             KnownZero2, KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // If all of the demanded bits are known one on one side, return the other.
+    // These bits cannot contribute to the result of the 'and'.
+    if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If all of the demanded bits in the inputs are known zeros, return zero.
+    if ((NewMask & (KnownZero|KnownZero2)) == NewMask)
+      return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType()));
+    // If the RHS is a constant, see if we can simplify it.
+    if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
+      return true;
+    // If the operation can be done in a smaller type, do so.
+    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    KnownOne &= KnownOne2;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    KnownZero |= KnownZero2;
+    break;
+  case ISD::OR:
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+                             KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
+                             KnownZero2, KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'or'.
+    if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If all of the potentially set bits on one side are known to be set on
+    // the other side, just use the 'other' side.
+    if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If the RHS is a constant, see if we can simplify it.
+    if (TLO.ShrinkDemandedConstant(Op, NewMask))
+      return true;
+    // If the operation can be done in a smaller type, do so.
+    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    KnownZero &= KnownZero2;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    KnownOne |= KnownOne2;
+    break;
+  case ISD::XOR:
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+                             KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'xor'.
+    if ((KnownZero & NewMask) == NewMask)
+      return TLO.CombineTo(Op, Op.getOperand(0));
+    if ((KnownZero2 & NewMask) == NewMask)
+      return TLO.CombineTo(Op, Op.getOperand(1));
+    // If the operation can be done in a smaller type, do so.
+    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+
+    // If all of the unknown bits are known to be zero on one side or the other
+    // (but not both) turn this into an *inclusive* or.
+    //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+    if ((NewMask & ~KnownZero & ~KnownZero2) == 0)
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
+                                               Op.getOperand(0),
+                                               Op.getOperand(1)));
+
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
+
+    // If all of the demanded bits on one side are known, and all of the set
+    // bits on that side are also known to be set on the other side, turn this
+    // into an AND, as we know the bits will be cleared.
+    //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+    if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known
+      if ((KnownOne & KnownOne2) == KnownOne) {
+        EVT VT = Op.getValueType();
+        SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
+                                                 Op.getOperand(0), ANDC));
+      }
+    }
+
+    // If the RHS is a constant, see if we can simplify it.
+    // for XOR, we prefer to force bits to 1 if they will make a -1.
+    // if we can't force bits, try to shrink constant
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      APInt Expanded = C->getAPIntValue() | (~NewMask);
+      // if we can expand it to have all bits set, do it
+      if (Expanded.isAllOnesValue()) {
+        if (Expanded != C->getAPIntValue()) {
+          EVT VT = Op.getValueType();
+          SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0),
+                                          TLO.DAG.getConstant(Expanded, VT));
+          return TLO.CombineTo(Op, New);
+        }
+        // if it already has all the bits set, nothing to change
+        // but don't shrink either!
+      } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
+        return true;
+      }
+    }
+
+    KnownZero = KnownZeroOut;
+    KnownOne  = KnownOneOut;
+    break;
+  case ISD::SELECT:
+    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
+                             KnownOne, TLO, Depth+1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // If the operands are constants, see if we can simplify them.
+    if (TLO.ShrinkDemandedConstant(Op, NewMask))
+      return true;
+
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    break;
+  case ISD::SELECT_CC:
+    if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
+                             KnownOne, TLO, Depth+1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // If the operands are constants, see if we can simplify them.
+    if (TLO.ShrinkDemandedConstant(Op, NewMask))
+      return true;
+
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    break;
+  case ISD::SHL:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      unsigned ShAmt = SA->getZExtValue();
+      SDValue InOp = Op.getOperand(0);
+
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        break;
+
+      // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
+      // single shift.  We can do this if the bottom bits (which are shifted
+      // out) are never demanded.
+      if (InOp.getOpcode() == ISD::SRL &&
+          isa<ConstantSDNode>(InOp.getOperand(1))) {
+        if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+          unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+          unsigned Opc = ISD::SHL;
+          int Diff = ShAmt-C1;
+          if (Diff < 0) {
+            Diff = -Diff;
+            Opc = ISD::SRL;
+          }
+
+          SDValue NewSA =
+            TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+          EVT VT = Op.getValueType();
+          return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+                                                   InOp.getOperand(0), NewSA));
+        }
+      }
+
+      if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
+                               KnownZero, KnownOne, TLO, Depth+1))
+        return true;
+
+      // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
+      // are not demanded. This will likely allow the anyext to be folded away.
+      if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
+        SDValue InnerOp = InOp.getNode()->getOperand(0);
+        EVT InnerVT = InnerOp.getValueType();
+        if ((APInt::getHighBitsSet(BitWidth,
+                                   BitWidth - InnerVT.getSizeInBits()) &
+               DemandedMask) == 0 &&
+            isTypeDesirableForOp(ISD::SHL, InnerVT)) {
+          EVT ShTy = getShiftAmountTy(InnerVT);
+          if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
+            ShTy = InnerVT;
+          SDValue NarrowShl =
+            TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
+                            TLO.DAG.getConstant(ShAmt, ShTy));
+          return
+            TLO.CombineTo(Op,
+                          TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(),
+                                          NarrowShl));
+        }
+      }
+
+      KnownZero <<= SA->getZExtValue();
+      KnownOne  <<= SA->getZExtValue();
+      // low bits known zero.
+      KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue());
+    }
+    break;
+  case ISD::SRL:
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      EVT VT = Op.getValueType();
+      unsigned ShAmt = SA->getZExtValue();
+      unsigned VTSize = VT.getSizeInBits();
+      SDValue InOp = Op.getOperand(0);
+
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        break;
+
+      // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
+      // single shift.  We can do this if the top bits (which are shifted out)
+      // are never demanded.
+      if (InOp.getOpcode() == ISD::SHL &&
+          isa<ConstantSDNode>(InOp.getOperand(1))) {
+        if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {
+          unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
+          unsigned Opc = ISD::SRL;
+          int Diff = ShAmt-C1;
+          if (Diff < 0) {
+            Diff = -Diff;
+            Opc = ISD::SHL;
+          }
+
+          SDValue NewSA =
+            TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
+          return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+                                                   InOp.getOperand(0), NewSA));
+        }
+      }
+
+      // Compute the new bits that are at the top now.
+      if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
+                               KnownZero, KnownOne, TLO, Depth+1))
+        return true;
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      KnownZero = KnownZero.lshr(ShAmt);
+      KnownOne  = KnownOne.lshr(ShAmt);
+
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+      KnownZero |= HighBits;  // High bits known zero.
+    }
+    break;
+  case ISD::SRA:
+    // If this is an arithmetic shift right and only the low-bit is set, we can
+    // always convert this into a logical shr, even if the shift amount is
+    // variable.  The low bit of the shift cannot be an input sign bit unless
+    // the shift amount is >= the size of the datatype, which is undefined.
+    if (DemandedMask == 1)
+      return TLO.CombineTo(Op,
+                           TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
+                                           Op.getOperand(0), Op.getOperand(1)));
+
+    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      EVT VT = Op.getValueType();
+      unsigned ShAmt = SA->getZExtValue();
+
+      // If the shift count is an invalid immediate, don't do anything.
+      if (ShAmt >= BitWidth)
+        break;
+
+      APInt InDemandedMask = (NewMask << ShAmt);
+
+      // If any of the demanded bits are produced by the sign extension, we also
+      // demand the input sign bit.
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
+      if (HighBits.intersects(NewMask))
+        InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
+
+      if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
+                               KnownZero, KnownOne, TLO, Depth+1))
+        return true;
+      assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+      KnownZero = KnownZero.lshr(ShAmt);
+      KnownOne  = KnownOne.lshr(ShAmt);
+
+      // Handle the sign bit, adjusted to where it is now in the mask.
+      APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
+
+      // If the input sign bit is known to be zero, or if none of the top bits
+      // are demanded, turn this into an unsigned shift right.
+      if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
+                                                 Op.getOperand(0),
+                                                 Op.getOperand(1)));
+      } else if (KnownOne.intersects(SignBit)) { // New bits are known one.
+        KnownOne |= HighBits;
+      }
+    }
+    break;
+  case ISD::SIGN_EXTEND_INREG: {
+    EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+
+    // Sign extension.  Compute the demanded bits in the result that are not
+    // present in the input.
+    APInt NewBits =
+      APInt::getHighBitsSet(BitWidth,
+                            BitWidth - EVT.getScalarType().getSizeInBits());
+
+    // If none of the extended bits are demanded, eliminate the sextinreg.
+    if ((NewBits & NewMask) == 0)
+      return TLO.CombineTo(Op, Op.getOperand(0));
+
+    APInt InSignBit =
+      APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
+    APInt InputDemandedBits =
+      APInt::getLowBitsSet(BitWidth,
+                           EVT.getScalarType().getSizeInBits()) &
+      NewMask;
+
+    // Since the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    InputDemandedBits |= InSignBit;
+
+    if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+
+    // If the input sign bit is known zero, convert this into a zero extension.
+    if (KnownZero.intersects(InSignBit))
+      return TLO.CombineTo(Op,
+                           TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
+
+    if (KnownOne.intersects(InSignBit)) {    // Input sign bit known set
+      KnownOne |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {                       // Input sign bit unknown
+      KnownZero &= ~NewBits;
+      KnownOne &= ~NewBits;
+    }
+    break;
+  }
+  case ISD::ZERO_EXTEND: {
+    unsigned OperandBitWidth =
+      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    APInt InMask = NewMask.trunc(OperandBitWidth);
+
+    // If none of the top bits are demanded, convert this into an any_extend.
+    APInt NewBits =
+      APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
+    if (!NewBits.intersects(NewMask))
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+                                               Op.getValueType(),
+                                               Op.getOperand(0)));
+
+    if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+    KnownZero |= NewBits;
+    break;
+  }
+  case ISD::SIGN_EXTEND: {
+    EVT InVT = Op.getOperand(0).getValueType();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    APInt InMask    = APInt::getLowBitsSet(BitWidth, InBits);
+    APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
+    APInt NewBits   = ~InMask & NewMask;
+
+    // If none of the top bits are demanded, convert this into an any_extend.
+    if (NewBits == 0)
+      return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
+                                              Op.getValueType(),
+                                              Op.getOperand(0)));
+
+    // Since some of the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    APInt InDemandedBits = InMask & NewMask;
+    InDemandedBits |= InSignBit;
+    InDemandedBits = InDemandedBits.trunc(InBits);
+
+    if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
+                             KnownOne, TLO, Depth+1))
+      return true;
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+
+    // If the sign bit is known zero, convert this to a zero extend.
+    if (KnownZero.intersects(InSignBit))
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
+                                               Op.getValueType(),
+                                               Op.getOperand(0)));
+
+    // If the sign bit is known one, the top bits match.
+    if (KnownOne.intersects(InSignBit)) {
+      KnownOne  |= NewBits;
+      KnownZero &= ~NewBits;
+    } else {   // Otherwise, top bits aren't known.
+      KnownOne  &= ~NewBits;
+      KnownZero &= ~NewBits;
+    }
+    break;
+  }
+  case ISD::ANY_EXTEND: {
+    unsigned OperandBitWidth =
+      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    APInt InMask = NewMask.trunc(OperandBitWidth);
+    if (SimplifyDemandedBits(Op.getOperand(0), InMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+    break;
+  }
+  case ISD::TRUNCATE: {
+    // Simplify the input, using demanded bit information, and compute the known
+    // zero/one bits live out.
+    unsigned OperandBitWidth =
+      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    APInt TruncMask = NewMask.zext(OperandBitWidth);
+    if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    KnownZero = KnownZero.trunc(BitWidth);
+    KnownOne = KnownOne.trunc(BitWidth);
+
+    // If the input is only used by this truncate, see if we can shrink it based
+    // on the known demanded bits.
+    if (Op.getOperand(0).getNode()->hasOneUse()) {
+      SDValue In = Op.getOperand(0);
+      switch (In.getOpcode()) {
+      default: break;
+      case ISD::SRL:
+        // Shrink SRL by a constant if none of the high bits shifted in are
+        // demanded.
+        if (TLO.LegalTypes() &&
+            !isTypeDesirableForOp(ISD::SRL, Op.getValueType()))
+          // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
+          // undesirable.
+          break;
+        ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
+        if (!ShAmt)
+          break;
+        APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
+                                               OperandBitWidth - BitWidth);
+        HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
+
+        if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
+          // None of the shifted in bits are needed.  Add a truncate of the
+          // shift input, then shift it.
+          SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
+                                             Op.getValueType(),
+                                             In.getOperand(0));
+          return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
+                                                   Op.getValueType(),
+                                                   NewTrunc,
+                                                   In.getOperand(1)));
+        }
+        break;
+      }
+    }
+
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    break;
+  }
+  case ISD::AssertZext: {
+    // Demand all the bits of the input that are demanded in the output.
+    // The low bits are obvious; the high bits are demanded because we're
+    // asserting that they're zero here.
+    if (SimplifyDemandedBits(Op.getOperand(0), NewMask,
+                             KnownZero, KnownOne, TLO, Depth+1))
+      return true;
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+    EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+    APInt InMask = APInt::getLowBitsSet(BitWidth,
+                                        VT.getSizeInBits());
+    KnownZero |= ~InMask & NewMask;
+    break;
+  }
+  case ISD::BITCAST:
+#if 0
+    // If this is an FP->Int bitcast and if the sign bit is the only thing that
+    // is demanded, turn this into a FGETSIGN.
+    if (NewMask == EVT::getIntegerVTSignBit(Op.getValueType()) &&
+        MVT::isFloatingPoint(Op.getOperand(0).getValueType()) &&
+        !MVT::isVector(Op.getOperand(0).getValueType())) {
+      // Only do this xform if FGETSIGN is valid or if before legalize.
+      if (!TLO.AfterLegalize ||
+          isOperationLegal(ISD::FGETSIGN, Op.getValueType())) {
+        // Make a FGETSIGN + SHL to move the sign bit into the appropriate
+        // place.  We expect the SHL to be eliminated by other optimizations.
+        SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),
+                                         Op.getOperand(0));
+        unsigned ShVal = Op.getValueType().getSizeInBits()-1;
+        SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy());
+        return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(),
+                                                 Sign, ShAmt));
+      }
+    }
+#endif
+    break;
+  case ISD::ADD:
+  case ISD::MUL:
+  case ISD::SUB: {
+    // Add, Sub, and Mul don't demand any bits in positions beyond that
+    // of the highest bit demanded of them.
+    APInt LoMask = APInt::getLowBitsSet(BitWidth,
+                                        BitWidth - NewMask.countLeadingZeros());
+    if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1))
+      return true;
+    // See if the operation should be performed at a smaller bit width.
+    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+      return true;
+  }
+  // FALL THROUGH
+  default:
+    // Just use ComputeMaskedBits to compute output bits.
+    TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
+    break;
+  }
+
+  // If we know the value of all of the demanded bits, return this as a
+  // constant.
+  if ((NewMask & (KnownZero|KnownOne)) == NewMask)
+    return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
+
+  return false;
+}
+
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified
+/// in Mask are known to be either zero or one and return them in the
+/// KnownZero/KnownOne bitsets.
+void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+                                                    const APInt &Mask,
+                                                    APInt &KnownZero,
+                                                    APInt &KnownOne,
+                                                    const SelectionDAG &DAG,
+                                                    unsigned Depth) const {
+  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+         "Should use MaskedValueIsZero if you don't know whether Op"
+         " is a target node!");
+  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+}
+
+/// ComputeNumSignBitsForTargetNode - This method can be implemented by
+/// targets that want to expose additional information about sign bits to the
+/// DAG Combiner.
+unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                         unsigned Depth) const {
+  assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+          Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+         "Should use ComputeNumSignBits if you don't know whether Op"
+         " is a target node!");
+  return 1;
+}
+
+/// ValueHasExactlyOneBitSet - Test if the given value is known to have exactly
+/// one bit set. This differs from ComputeMaskedBits in that it doesn't need to
+/// determine which bit is set.
+///
+static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
+  // A left-shift of a constant one will have exactly one bit set, because
+  // shifting the bit off the end is undefined.
+  if (Val.getOpcode() == ISD::SHL)
+    if (ConstantSDNode *C =
+         dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+      if (C->getAPIntValue() == 1)
+        return true;
+
+  // Similarly, a right-shift of a constant sign-bit will have exactly
+  // one bit set.
+  if (Val.getOpcode() == ISD::SRL)
+    if (ConstantSDNode *C =
+         dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0)))
+      if (C->getAPIntValue().isSignBit())
+        return true;
+
+  // More could be done here, though the above checks are enough
+  // to handle some common cases.
+
+  // Fall back to ComputeMaskedBits to catch other known cases.
+  EVT OpVT = Val.getValueType();
+  unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
+  APInt Mask = APInt::getAllOnesValue(BitWidth);
+  APInt KnownZero, KnownOne;
+  DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
+  return (KnownZero.countPopulation() == BitWidth - 1) &&
+         (KnownOne.countPopulation() == 1);
+}
+
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands
+/// and cc. If it is unable to simplify it, return a null SDValue.
+SDValue
+TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
+                              ISD::CondCode Cond, bool foldBooleans,
+                              DAGCombinerInfo &DCI, DebugLoc dl) const {
+  SelectionDAG &DAG = DCI.DAG;
+  LLVMContext &Context = *DAG.getContext();
+
+  // These setcc operations always fold.
+  switch (Cond) {
+  default: break;
+  case ISD::SETFALSE:
+  case ISD::SETFALSE2: return DAG.getConstant(0, VT);
+  case ISD::SETTRUE:
+  case ISD::SETTRUE2:  return DAG.getConstant(1, VT);
+  }
+
+  if (isa<ConstantSDNode>(N0.getNode())) {
+    // Ensure that the constant occurs on the RHS, and fold constant
+    // comparisons.
+    return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+  }
+
+  if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+    const APInt &C1 = N1C->getAPIntValue();
+
+    // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
+    // equality comparison, then we're just comparing whether X itself is
+    // zero.
+    if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) &&
+        N0.getOperand(0).getOpcode() == ISD::CTLZ &&
+        N0.getOperand(1).getOpcode() == ISD::Constant) {
+      const APInt &ShAmt
+        = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+      if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+          ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
+        if ((C1 == 0) == (Cond == ISD::SETEQ)) {
+          // (srl (ctlz x), 5) == 0  -> X != 0
+          // (srl (ctlz x), 5) != 1  -> X != 0
+          Cond = ISD::SETNE;
+        } else {
+          // (srl (ctlz x), 5) != 0  -> X == 0
+          // (srl (ctlz x), 5) == 1  -> X == 0
+          Cond = ISD::SETEQ;
+        }
+        SDValue Zero = DAG.getConstant(0, N0.getValueType());
+        return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
+                            Zero, Cond);
+      }
+    }
+
+    SDValue CTPOP = N0;
+    // Look through truncs that don't change the value of a ctpop.
+    if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
+      CTPOP = N0.getOperand(0);
+
+    if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
+        (N0 == CTPOP || N0.getValueType().getSizeInBits() >
+                        Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) {
+      EVT CTVT = CTPOP.getValueType();
+      SDValue CTOp = CTPOP.getOperand(0);
+
+      // (ctpop x) u< 2 -> (x & x-1) == 0
+      // (ctpop x) u> 1 -> (x & x-1) != 0
+      if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
+        SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
+                                  DAG.getConstant(1, CTVT));
+        SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
+        ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
+        return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC);
+      }
+
+      // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
+    }
+
+    // If the LHS is '(and load, const)', the RHS is 0,
+    // the test is for equality or unsigned, and all 1 bits of the const are
+    // in the same partial word, see if we can shorten the load.
+    if (DCI.isBeforeLegalize() &&
+        N0.getOpcode() == ISD::AND && C1 == 0 &&
+        N0.getNode()->hasOneUse() &&
+        isa<LoadSDNode>(N0.getOperand(0)) &&
+        N0.getOperand(0).getNode()->hasOneUse() &&
+        isa<ConstantSDNode>(N0.getOperand(1))) {
+      LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
+      APInt bestMask;
+      unsigned bestWidth = 0, bestOffset = 0;
+      if (!Lod->isVolatile() && Lod->isUnindexed()) {
+        unsigned origWidth = N0.getValueType().getSizeInBits();
+        unsigned maskWidth = origWidth;
+        // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
+        // 8 bits, but have to be careful...
+        if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
+          origWidth = Lod->getMemoryVT().getSizeInBits();
+        const APInt &Mask =
+          cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+        for (unsigned width = origWidth / 2; width>=8; width /= 2) {
+          APInt newMask = APInt::getLowBitsSet(maskWidth, width);
+          for (unsigned offset=0; offset<origWidth/width; offset++) {
+            if ((newMask & Mask) == Mask) {
+              if (!TD->isLittleEndian())
+                bestOffset = (origWidth/width - offset - 1) * (width/8);
+              else
+                bestOffset = (uint64_t)offset * (width/8);
+              bestMask = Mask.lshr(offset * (width/8) * 8);
+              bestWidth = width;
+              break;
+            }
+            newMask = newMask << width;
+          }
+        }
+      }
+      if (bestWidth) {
+        EVT newVT = EVT::getIntegerVT(Context, bestWidth);
+        if (newVT.isRound()) {
+          EVT PtrType = Lod->getOperand(1).getValueType();
+          SDValue Ptr = Lod->getBasePtr();
+          if (bestOffset != 0)
+            Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
+                              DAG.getConstant(bestOffset, PtrType));
+          unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
+          SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
+                                Lod->getPointerInfo().getWithOffset(bestOffset),
+                                        false, false, NewAlign);
+          return DAG.getSetCC(dl, VT,
+                              DAG.getNode(ISD::AND, dl, newVT, NewLoad,
+                                      DAG.getConstant(bestMask.trunc(bestWidth),
+                                                      newVT)),
+                              DAG.getConstant(0LL, newVT), Cond);
+        }
+      }
+    }
+
+    // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
+    if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+      unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
+
+      // If the comparison constant has bits in the upper part, the
+      // zero-extended value could never match.
+      if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
+                                              C1.getBitWidth() - InSize))) {
+        switch (Cond) {
+        case ISD::SETUGT:
+        case ISD::SETUGE:
+        case ISD::SETEQ: return DAG.getConstant(0, VT);
+        case ISD::SETULT:
+        case ISD::SETULE:
+        case ISD::SETNE: return DAG.getConstant(1, VT);
+        case ISD::SETGT:
+        case ISD::SETGE:
+          // True if the sign bit of C1 is set.
+          return DAG.getConstant(C1.isNegative(), VT);
+        case ISD::SETLT:
+        case ISD::SETLE:
+          // True if the sign bit of C1 isn't set.
+          return DAG.getConstant(C1.isNonNegative(), VT);
+        default:
+          break;
+        }
+      }
+
+      // Otherwise, we can perform the comparison with the low bits.
+      switch (Cond) {
+      case ISD::SETEQ:
+      case ISD::SETNE:
+      case ISD::SETUGT:
+      case ISD::SETUGE:
+      case ISD::SETULT:
+      case ISD::SETULE: {
+        EVT newVT = N0.getOperand(0).getValueType();
+        if (DCI.isBeforeLegalizeOps() ||
+            (isOperationLegal(ISD::SETCC, newVT) &&
+              getCondCodeAction(Cond, newVT)==Legal))
+          return DAG.getSetCC(dl, VT, N0.getOperand(0),
+                              DAG.getConstant(C1.trunc(InSize), newVT),
+                              Cond);
+        break;
+      }
+      default:
+        break;   // todo, be more careful with signed comparisons
+      }
+    } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+               (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+      EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
+      unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
+      EVT ExtDstTy = N0.getValueType();
+      unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
+
+      // If the constant doesn't fit into the number of bits for the source of
+      // the sign extension, it is impossible for both sides to be equal.
+      if (C1.getMinSignedBits() > ExtSrcTyBits)
+        return DAG.getConstant(Cond == ISD::SETNE, VT);
+
+      SDValue ZextOp;
+      EVT Op0Ty = N0.getOperand(0).getValueType();
+      if (Op0Ty == ExtSrcTy) {
+        ZextOp = N0.getOperand(0);
+      } else {
+        APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
+        ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
+                              DAG.getConstant(Imm, Op0Ty));
+      }
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(ZextOp.getNode());
+      // Otherwise, make this a use of a zext.
+      return DAG.getSetCC(dl, VT, ZextOp,
+                          DAG.getConstant(C1 & APInt::getLowBitsSet(
+                                                              ExtDstTyBits,
+                                                              ExtSrcTyBits),
+                                          ExtDstTy),
+                          Cond);
+    } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
+                (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+      // SETCC (SETCC), [0|1], [EQ|NE]  -> SETCC
+      if (N0.getOpcode() == ISD::SETCC &&
+          isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
+        bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
+        if (TrueWhenTrue)
+          return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
+        // Invert the condition.
+        ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+        CC = ISD::getSetCCInverse(CC,
+                                  N0.getOperand(0).getValueType().isInteger());
+        return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+      }
+
+      if ((N0.getOpcode() == ISD::XOR ||
+           (N0.getOpcode() == ISD::AND &&
+            N0.getOperand(0).getOpcode() == ISD::XOR &&
+            N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
+          isa<ConstantSDNode>(N0.getOperand(1)) &&
+          cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) {
+        // If this is (X^1) == 0/1, swap the RHS and eliminate the xor.  We
+        // can only do this if the top bits are known zero.
+        unsigned BitWidth = N0.getValueSizeInBits();
+        if (DAG.MaskedValueIsZero(N0,
+                                  APInt::getHighBitsSet(BitWidth,
+                                                        BitWidth-1))) {
+          // Okay, get the un-inverted input value.
+          SDValue Val;
+          if (N0.getOpcode() == ISD::XOR)
+            Val = N0.getOperand(0);
+          else {
+            assert(N0.getOpcode() == ISD::AND &&
+                    N0.getOperand(0).getOpcode() == ISD::XOR);
+            // ((X^1)&1)^1 -> X & 1
+            Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
+                              N0.getOperand(0).getOperand(0),
+                              N0.getOperand(1));
+          }
+
+          return DAG.getSetCC(dl, VT, Val, N1,
+                              Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+        }
+      } else if (N1C->getAPIntValue() == 1 &&
+                 (VT == MVT::i1 ||
+                  getBooleanContents() == ZeroOrOneBooleanContent)) {
+        SDValue Op0 = N0;
+        if (Op0.getOpcode() == ISD::TRUNCATE)
+          Op0 = Op0.getOperand(0);
+
+        if ((Op0.getOpcode() == ISD::XOR) &&
+            Op0.getOperand(0).getOpcode() == ISD::SETCC &&
+            Op0.getOperand(1).getOpcode() == ISD::SETCC) {
+          // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
+          Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
+          return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
+                              Cond);
+        } else if (Op0.getOpcode() == ISD::AND &&
+                isa<ConstantSDNode>(Op0.getOperand(1)) &&
+                cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
+          // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
+          if (Op0.getValueType().bitsGT(VT))
+            Op0 = DAG.getNode(ISD::AND, dl, VT,
+                          DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
+                          DAG.getConstant(1, VT));
+          else if (Op0.getValueType().bitsLT(VT))
+            Op0 = DAG.getNode(ISD::AND, dl, VT,
+                        DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
+                        DAG.getConstant(1, VT));
+
+          return DAG.getSetCC(dl, VT, Op0,
+                              DAG.getConstant(0, Op0.getValueType()),
+                              Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+        }
+      }
+    }
+
+    APInt MinVal, MaxVal;
+    unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
+    if (ISD::isSignedIntSetCC(Cond)) {
+      MinVal = APInt::getSignedMinValue(OperandBitSize);
+      MaxVal = APInt::getSignedMaxValue(OperandBitSize);
+    } else {
+      MinVal = APInt::getMinValue(OperandBitSize);
+      MaxVal = APInt::getMaxValue(OperandBitSize);
+    }
+
+    // Canonicalize GE/LE comparisons to use GT/LT comparisons.
+    if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
+      if (C1 == MinVal) return DAG.getConstant(1, VT);   // X >= MIN --> true
+      // X >= C0 --> X > (C0-1)
+      return DAG.getSetCC(dl, VT, N0,
+                          DAG.getConstant(C1-1, N1.getValueType()),
+                          (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
+    }
+
+    if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
+      if (C1 == MaxVal) return DAG.getConstant(1, VT);   // X <= MAX --> true
+      // X <= C0 --> X < (C0+1)
+      return DAG.getSetCC(dl, VT, N0,
+                          DAG.getConstant(C1+1, N1.getValueType()),
+                          (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
+    }
+
+    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal)
+      return DAG.getConstant(0, VT);      // X < MIN --> false
+    if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal)
+      return DAG.getConstant(1, VT);      // X >= MIN --> true
+    if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal)
+      return DAG.getConstant(0, VT);      // X > MAX --> false
+    if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal)
+      return DAG.getConstant(1, VT);      // X <= MAX --> true
+
+    // Canonicalize setgt X, Min --> setne X, Min
+    if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal)
+      return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+    // Canonicalize setlt X, Max --> setne X, Max
+    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal)
+      return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
+
+    // If we have setult X, 1, turn it into seteq X, 0
+    if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
+      return DAG.getSetCC(dl, VT, N0,
+                          DAG.getConstant(MinVal, N0.getValueType()),
+                          ISD::SETEQ);
+    // If we have setugt X, Max-1, turn it into seteq X, Max
+    else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
+      return DAG.getSetCC(dl, VT, N0,
+                          DAG.getConstant(MaxVal, N0.getValueType()),
+                          ISD::SETEQ);
+
+    // If we have "setcc X, C0", check to see if we can shrink the immediate
+    // by changing cc.
+
+    // SETUGT X, SINTMAX  -> SETLT X, 0
+    if (Cond == ISD::SETUGT &&
+        C1 == APInt::getSignedMaxValue(OperandBitSize))
+      return DAG.getSetCC(dl, VT, N0,
+                          DAG.getConstant(0, N1.getValueType()),
+                          ISD::SETLT);
+
+    // SETULT X, SINTMIN  -> SETGT X, -1
+    if (Cond == ISD::SETULT &&
+        C1 == APInt::getSignedMinValue(OperandBitSize)) {
+      SDValue ConstMinusOne =
+          DAG.getConstant(APInt::getAllOnesValue(OperandBitSize),
+                          N1.getValueType());
+      return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
+    }
+
+    // Fold bit comparisons when we can.
+    if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+        (VT == N0.getValueType() ||
+         (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
+        N0.getOpcode() == ISD::AND)
+      if (ConstantSDNode *AndRHS =
+                  dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+        EVT ShiftTy = DCI.isBeforeLegalize() ?
+          getPointerTy() : getShiftAmountTy(N0.getValueType());
+        if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0  -->  (X & 8) >> 3
+          // Perform the xform if the AND RHS is a single bit.
+          if (AndRHS->getAPIntValue().isPowerOf2()) {
+            return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                              DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+                   DAG.getConstant(AndRHS->getAPIntValue().logBase2(), ShiftTy)));
+          }
+        } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
+          // (X & 8) == 8  -->  (X & 8) >> 3
+          // Perform the xform if C1 is a single bit.
+          if (C1.isPowerOf2()) {
+            return DAG.getNode(ISD::TRUNCATE, dl, VT,
+                               DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
+                                      DAG.getConstant(C1.logBase2(), ShiftTy)));
+          }
+        }
+      }
+  }
+
+  if (isa<ConstantFPSDNode>(N0.getNode())) {
+    // Constant fold or commute setcc.
+    SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
+    if (O.getNode()) return O;
+  } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
+    // If the RHS of an FP comparison is a constant, simplify it away in
+    // some cases.
+    if (CFP->getValueAPF().isNaN()) {
+      // If an operand is known to be a nan, we can fold it.
+      switch (ISD::getUnorderedFlavor(Cond)) {
+      default: llvm_unreachable("Unknown flavor!");
+      case 0:  // Known false.
+        return DAG.getConstant(0, VT);
+      case 1:  // Known true.
+        return DAG.getConstant(1, VT);
+      case 2:  // Undefined.
+        return DAG.getUNDEF(VT);
+      }
+    }
+
+    // Otherwise, we know the RHS is not a NaN.  Simplify the node to drop the
+    // constant if knowing that the operand is non-nan is enough.  We prefer to
+    // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
+    // materialize 0.0.
+    if (Cond == ISD::SETO || Cond == ISD::SETUO)
+      return DAG.getSetCC(dl, VT, N0, N0, Cond);
+
+    // If the condition is not legal, see if we can find an equivalent one
+    // which is legal.
+    if (!isCondCodeLegal(Cond, N0.getValueType())) {
+      // If the comparison was an awkward floating-point == or != and one of
+      // the comparison operands is infinity or negative infinity, convert the
+      // condition to a less-awkward <= or >=.
+      if (CFP->getValueAPF().isInfinity()) {
+        if (CFP->getValueAPF().isNegative()) {
+          if (Cond == ISD::SETOEQ &&
+              isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
+          if (Cond == ISD::SETUEQ &&
+              isCondCodeLegal(ISD::SETOLE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
+          if (Cond == ISD::SETUNE &&
+              isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
+          if (Cond == ISD::SETONE &&
+              isCondCodeLegal(ISD::SETUGT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
+        } else {
+          if (Cond == ISD::SETOEQ &&
+              isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
+          if (Cond == ISD::SETUEQ &&
+              isCondCodeLegal(ISD::SETOGE, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
+          if (Cond == ISD::SETUNE &&
+              isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
+          if (Cond == ISD::SETONE &&
+              isCondCodeLegal(ISD::SETULT, N0.getValueType()))
+            return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
+        }
+      }
+    }
+  }
+
+  if (N0 == N1) {
+    // We can always fold X == X for integer setcc's.
+    if (N0.getValueType().isInteger())
+      return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+    unsigned UOF = ISD::getUnorderedFlavor(Cond);
+    if (UOF == 2)   // FP operators that are undefined on NaNs.
+      return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT);
+    if (UOF == unsigned(ISD::isTrueWhenEqual(Cond)))
+      return DAG.getConstant(UOF, VT);
+    // Otherwise, we can't fold it.  However, we can simplify it to SETUO/SETO
+    // if it is not already.
+    ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
+    if (NewCond != Cond)
+      return DAG.getSetCC(dl, VT, N0, N1, NewCond);
+  }
+
+  if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+      N0.getValueType().isInteger()) {
+    if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
+        N0.getOpcode() == ISD::XOR) {
+      // Simplify (X+Y) == (X+Z) -->  Y == Z
+      if (N0.getOpcode() == N1.getOpcode()) {
+        if (N0.getOperand(0) == N1.getOperand(0))
+          return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
+        if (N0.getOperand(1) == N1.getOperand(1))
+          return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
+        if (DAG.isCommutativeBinOp(N0.getOpcode())) {
+          // If X op Y == Y op X, try other combinations.
+          if (N0.getOperand(0) == N1.getOperand(1))
+            return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
+                                Cond);
+          if (N0.getOperand(1) == N1.getOperand(0))
+            return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
+                                Cond);
+        }
+      }
+
+      if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
+        if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+          // Turn (X+C1) == C2 --> X == C2-C1
+          if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
+            return DAG.getSetCC(dl, VT, N0.getOperand(0),
+                                DAG.getConstant(RHSC->getAPIntValue()-
+                                                LHSR->getAPIntValue(),
+                                N0.getValueType()), Cond);
+          }
+
+          // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
+          if (N0.getOpcode() == ISD::XOR)
+            // If we know that all of the inverted bits are zero, don't bother
+            // performing the inversion.
+            if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
+              return
+                DAG.getSetCC(dl, VT, N0.getOperand(0),
+                             DAG.getConstant(LHSR->getAPIntValue() ^
+                                               RHSC->getAPIntValue(),
+                                             N0.getValueType()),
+                             Cond);
+        }
+
+        // Turn (C1-X) == C2 --> X == C1-C2
+        if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
+          if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
+            return
+              DAG.getSetCC(dl, VT, N0.getOperand(1),
+                           DAG.getConstant(SUBC->getAPIntValue() -
+                                             RHSC->getAPIntValue(),
+                                           N0.getValueType()),
+                           Cond);
+          }
+        }
+      }
+
+      // Simplify (X+Z) == X -->  Z == 0
+      if (N0.getOperand(0) == N1)
+        return DAG.getSetCC(dl, VT, N0.getOperand(1),
+                        DAG.getConstant(0, N0.getValueType()), Cond);
+      if (N0.getOperand(1) == N1) {
+        if (DAG.isCommutativeBinOp(N0.getOpcode()))
+          return DAG.getSetCC(dl, VT, N0.getOperand(0),
+                          DAG.getConstant(0, N0.getValueType()), Cond);
+        else if (N0.getNode()->hasOneUse()) {
+          assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
+          // (Z-X) == X  --> Z == X<<1
+          SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
+                                     N1,
+                       DAG.getConstant(1, getShiftAmountTy(N1.getValueType())));
+          if (!DCI.isCalledByLegalizer())
+            DCI.AddToWorklist(SH.getNode());
+          return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
+        }
+      }
+    }
+
+    if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
+        N1.getOpcode() == ISD::XOR) {
+      // Simplify  X == (X+Z) -->  Z == 0
+      if (N1.getOperand(0) == N0) {
+        return DAG.getSetCC(dl, VT, N1.getOperand(1),
+                        DAG.getConstant(0, N1.getValueType()), Cond);
+      } else if (N1.getOperand(1) == N0) {
+        if (DAG.isCommutativeBinOp(N1.getOpcode())) {
+          return DAG.getSetCC(dl, VT, N1.getOperand(0),
+                          DAG.getConstant(0, N1.getValueType()), Cond);
+        } else if (N1.getNode()->hasOneUse()) {
+          assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
+          // X == (Z-X)  --> X<<1 == Z
+          SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
+                       DAG.getConstant(1, getShiftAmountTy(N0.getValueType())));
+          if (!DCI.isCalledByLegalizer())
+            DCI.AddToWorklist(SH.getNode());
+          return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
+        }
+      }
+    }
+
+    // Simplify x&y == y to x&y != 0 if y has exactly one bit set.
+    // Note that where y is variable and is known to have at most
+    // one bit set (for example, if it is z&1) we cannot do this;
+    // the expressions are not equivalent when y==0.
+    if (N0.getOpcode() == ISD::AND)
+      if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
+        if (ValueHasExactlyOneBitSet(N1, DAG)) {
+          Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+          SDValue Zero = DAG.getConstant(0, N1.getValueType());
+          return DAG.getSetCC(dl, VT, N0, Zero, Cond);
+        }
+      }
+    if (N1.getOpcode() == ISD::AND)
+      if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
+        if (ValueHasExactlyOneBitSet(N0, DAG)) {
+          Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
+          SDValue Zero = DAG.getConstant(0, N0.getValueType());
+          return DAG.getSetCC(dl, VT, N1, Zero, Cond);
+        }
+      }
+  }
+
+  // Fold away ALL boolean setcc's.
+  SDValue Temp;
+  if (N0.getValueType() == MVT::i1 && foldBooleans) {
+    switch (Cond) {
+    default: llvm_unreachable("Unknown integer setcc!");
+    case ISD::SETEQ:  // X == Y  -> ~(X^Y)
+      Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+      N0 = DAG.getNOT(dl, Temp, MVT::i1);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.getNode());
+      break;
+    case ISD::SETNE:  // X != Y   -->  (X^Y)
+      N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1);
+      break;
+    case ISD::SETGT:  // X >s Y   -->  X == 0 & Y == 1  -->  ~X & Y
+    case ISD::SETULT: // X <u Y   -->  X == 0 & Y == 1  -->  ~X & Y
+      Temp = DAG.getNOT(dl, N0, MVT::i1);
+      N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.getNode());
+      break;
+    case ISD::SETLT:  // X <s Y   --> X == 1 & Y == 0  -->  ~Y & X
+    case ISD::SETUGT: // X >u Y   --> X == 1 & Y == 0  -->  ~Y & X
+      Temp = DAG.getNOT(dl, N1, MVT::i1);
+      N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.getNode());
+      break;
+    case ISD::SETULE: // X <=u Y  --> X == 0 | Y == 1  -->  ~X | Y
+    case ISD::SETGE:  // X >=s Y  --> X == 0 | Y == 1  -->  ~X | Y
+      Temp = DAG.getNOT(dl, N0, MVT::i1);
+      N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp);
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(Temp.getNode());
+      break;
+    case ISD::SETUGE: // X >=u Y  --> X == 1 | Y == 0  -->  ~Y | X
+    case ISD::SETLE:  // X <=s Y  --> X == 1 | Y == 0  -->  ~Y | X
+      Temp = DAG.getNOT(dl, N1, MVT::i1);
+      N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp);
+      break;
+    }
+    if (VT != MVT::i1) {
+      if (!DCI.isCalledByLegalizer())
+        DCI.AddToWorklist(N0.getNode());
+      // FIXME: If running after legalize, we probably can't do this.
+      N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0);
+    }
+    return N0;
+  }
+
+  // Could not fold it.
+  return SDValue();
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + offset.
+bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
+                                    int64_t &Offset) const {
+  if (isa<GlobalAddressSDNode>(N)) {
+    GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
+    GA = GASD->getGlobal();
+    Offset += GASD->getOffset();
+    return true;
+  }
+
+  if (N->getOpcode() == ISD::ADD) {
+    SDValue N1 = N->getOperand(0);
+    SDValue N2 = N->getOperand(1);
+    if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
+      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
+      if (V) {
+        Offset += V->getSExtValue();
+        return true;
+      }
+    } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
+      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
+      if (V) {
+        Offset += V->getSExtValue();
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+
+SDValue TargetLowering::
+PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
+  // Default implementation: no optimization.
+  return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+//  Inline Assembler Implementation Methods
+//===----------------------------------------------------------------------===//
+
+
+TargetLowering::ConstraintType
+TargetLowering::getConstraintType(const std::string &Constraint) const {
+  // FIXME: lots more standard ones to handle.
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default: break;
+    case 'r': return C_RegisterClass;
+    case 'm':    // memory
+    case 'o':    // offsetable
+    case 'V':    // not offsetable
+      return C_Memory;
+    case 'i':    // Simple Integer or Relocatable Constant
+    case 'n':    // Simple Integer
+    case 'E':    // Floating Point Constant
+    case 'F':    // Floating Point Constant
+    case 's':    // Relocatable Constant
+    case 'p':    // Address.
+    case 'X':    // Allow ANY value.
+    case 'I':    // Target registers.
+    case 'J':
+    case 'K':
+    case 'L':
+    case 'M':
+    case 'N':
+    case 'O':
+    case 'P':
+    case '<':
+    case '>':
+      return C_Other;
+    }
+  }
+
+  if (Constraint.size() > 1 && Constraint[0] == '{' &&
+      Constraint[Constraint.size()-1] == '}')
+    return C_Register;
+  return C_Unknown;
+}
+
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
+  if (ConstraintVT.isInteger())
+    return "r";
+  if (ConstraintVT.isFloatingPoint())
+    return "f";      // works for many targets
+  return 0;
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector.  If it is invalid, don't add anything to Ops.
+void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+                                                  char ConstraintLetter,
+                                                  std::vector<SDValue> &Ops,
+                                                  SelectionDAG &DAG) const {
+  switch (ConstraintLetter) {
+  default: break;
+  case 'X':     // Allows any operand; labels (basic block) use this.
+    if (Op.getOpcode() == ISD::BasicBlock) {
+      Ops.push_back(Op);
+      return;
+    }
+    // fall through
+  case 'i':    // Simple Integer or Relocatable Constant
+  case 'n':    // Simple Integer
+  case 's': {  // Relocatable Constant
+    // These operands are interested in values of the form (GV+C), where C may
+    // be folded in as an offset of GV, or it may be explicitly added.  Also, it
+    // is possible and fine if either GV or C are missing.
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+    GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
+
+    // If we have "(add GV, C)", pull out GV/C
+    if (Op.getOpcode() == ISD::ADD) {
+      C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+      GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
+      if (C == 0 || GA == 0) {
+        C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
+        GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
+      }
+      if (C == 0 || GA == 0)
+        C = 0, GA = 0;
+    }
+
+    // If we find a valid operand, map to the TargetXXX version so that the
+    // value itself doesn't get selected.
+    if (GA) {   // Either &GV   or   &GV+C
+      if (ConstraintLetter != 'n') {
+        int64_t Offs = GA->getOffset();
+        if (C) Offs += C->getZExtValue();
+        Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+                                                 C ? C->getDebugLoc() : DebugLoc(),
+                                                 Op.getValueType(), Offs));
+        return;
+      }
+    }
+    if (C) {   // just C, no GV.
+      // Simple constants are not allowed for 's'.
+      if (ConstraintLetter != 's') {
+        // gcc prints these as sign extended.  Sign extend value to 64 bits
+        // now; without this it would get ZExt'd later in
+        // ScheduleDAGSDNodes::EmitNode, which is very generic.
+        Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(),
+                                            MVT::i64));
+        return;
+      }
+    }
+    break;
+  }
+  }
+}
+
+std::vector<unsigned> TargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                  EVT VT) const {
+  return std::vector<unsigned>();
+}
+
+
+std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+                             EVT VT) const {
+  if (Constraint[0] != '{')
+    return std::make_pair(0u, static_cast<TargetRegisterClass*>(0));
+  assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
+
+  // Remove the braces from around the name.
+  StringRef RegName(Constraint.data()+1, Constraint.size()-2);
+
+  // Figure out which register class contains this reg.
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+  for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
+       E = RI->regclass_end(); RCI != E; ++RCI) {
+    const TargetRegisterClass *RC = *RCI;
+
+    // If none of the value types for this register class are valid, we
+    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
+    bool isLegal = false;
+    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+         I != E; ++I) {
+      if (isTypeLegal(*I)) {
+        isLegal = true;
+        break;
+      }
+    }
+
+    if (!isLegal) continue;
+
+    for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+         I != E; ++I) {
+      if (RegName.equals_lower(RI->getName(*I)))
+        return std::make_pair(*I, RC);
+    }
+  }
+
+  return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
+}
+
+//===----------------------------------------------------------------------===//
+// Constraint Selection.
+
+/// isMatchingInputConstraint - Return true of this is an input operand that is
+/// a matching constraint like "4".
+bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
+  assert(!ConstraintCode.empty() && "No known constraint!");
+  return isdigit(ConstraintCode[0]);
+}
+
+/// getMatchedOperand - If this is an input matching constraint, this method
+/// returns the output operand it matches.
+unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
+  assert(!ConstraintCode.empty() && "No known constraint!");
+  return atoi(ConstraintCode.c_str());
+}
+
+
+/// ParseConstraints - Split up the constraint string from the inline
+/// assembly value into the specific constraints and their prefixes,
+/// and also tie in the associated operand values.
+/// If this returns an empty vector, and if the constraint string itself
+/// isn't empty, there was an error parsing.
+TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
+    ImmutableCallSite CS) const {
+  /// ConstraintOperands - Information about all of the constraints.
+  AsmOperandInfoVector ConstraintOperands;
+  const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+  unsigned maCount = 0; // Largest number of multiple alternative constraints.
+
+  // Do a prepass over the constraints, canonicalizing them, and building up the
+  // ConstraintOperands list.
+  InlineAsm::ConstraintInfoVector
+    ConstraintInfos = IA->ParseConstraints();
+
+  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
+  unsigned ResNo = 0;   // ResNo - The result number of the next output.
+
+  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+    ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i]));
+    AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+    // Update multiple alternative constraint count.
+    if (OpInfo.multipleAlternatives.size() > maCount)
+      maCount = OpInfo.multipleAlternatives.size();
+
+    OpInfo.ConstraintVT = MVT::Other;
+
+    // Compute the value type for each operand.
+    switch (OpInfo.Type) {
+    case InlineAsm::isOutput:
+      // Indirect outputs just consume an argument.
+      if (OpInfo.isIndirect) {
+        OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+        break;
+      }
+
+      // The return value of the call is this value.  As such, there is no
+      // corresponding argument.
+      assert(!CS.getType()->isVoidTy() &&
+             "Bad inline asm!");
+      if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+        OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo));
+      } else {
+        assert(ResNo == 0 && "Asm only has one result!");
+        OpInfo.ConstraintVT = getValueType(CS.getType());
+      }
+      ++ResNo;
+      break;
+    case InlineAsm::isInput:
+      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+      break;
+    case InlineAsm::isClobber:
+      // Nothing to do.
+      break;
+    }
+
+    if (OpInfo.CallOperandVal) {
+      const llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
+      if (OpInfo.isIndirect) {
+        const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+        if (!PtrTy)
+          report_fatal_error("Indirect operand for inline asm not a pointer!");
+        OpTy = PtrTy->getElementType();
+      }
+      // If OpTy is not a single value, it may be a struct/union that we
+      // can tile with integers.
+      if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+        unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+        switch (BitSize) {
+        default: break;
+        case 1:
+        case 8:
+        case 16:
+        case 32:
+        case 64:
+        case 128:
+          OpInfo.ConstraintVT =
+              EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+          break;
+        }
+      } else if (dyn_cast<PointerType>(OpTy)) {
+        OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize());
+      } else {
+        OpInfo.ConstraintVT = EVT::getEVT(OpTy, true);
+      }
+    }
+  }
+
+  // If we have multiple alternative constraints, select the best alternative.
+  if (ConstraintInfos.size()) {
+    if (maCount) {
+      unsigned bestMAIndex = 0;
+      int bestWeight = -1;
+      // weight:  -1 = invalid match, and 0 = so-so match to 5 = good match.
+      int weight = -1;
+      unsigned maIndex;
+      // Compute the sums of the weights for each alternative, keeping track
+      // of the best (highest weight) one so far.
+      for (maIndex = 0; maIndex < maCount; ++maIndex) {
+        int weightSum = 0;
+        for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+            cIndex != eIndex; ++cIndex) {
+          AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+          if (OpInfo.Type == InlineAsm::isClobber)
+            continue;
+
+          // If this is an output operand with a matching input operand,
+          // look up the matching input. If their types mismatch, e.g. one
+          // is an integer, the other is floating point, or their sizes are
+          // different, flag it as an maCantMatch.
+          if (OpInfo.hasMatchingInput()) {
+            AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+            if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+              if ((OpInfo.ConstraintVT.isInteger() !=
+                   Input.ConstraintVT.isInteger()) ||
+                  (OpInfo.ConstraintVT.getSizeInBits() !=
+                   Input.ConstraintVT.getSizeInBits())) {
+                weightSum = -1;  // Can't match.
+                break;
+              }
+            }
+          }
+          weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
+          if (weight == -1) {
+            weightSum = -1;
+            break;
+          }
+          weightSum += weight;
+        }
+        // Update best.
+        if (weightSum > bestWeight) {
+          bestWeight = weightSum;
+          bestMAIndex = maIndex;
+        }
+      }
+
+      // Now select chosen alternative in each constraint.
+      for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+          cIndex != eIndex; ++cIndex) {
+        AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
+        if (cInfo.Type == InlineAsm::isClobber)
+          continue;
+        cInfo.selectAlternative(bestMAIndex);
+      }
+    }
+  }
+
+  // Check and hook up tied operands, choose constraint code to use.
+  for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+      cIndex != eIndex; ++cIndex) {
+    AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+
+    // If this is an output operand with a matching input operand, look up the
+    // matching input. If their types mismatch, e.g. one is an integer, the
+    // other is floating point, or their sizes are different, flag it as an
+    // error.
+    if (OpInfo.hasMatchingInput()) {
+      AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+      if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+        if ((OpInfo.ConstraintVT.isInteger() !=
+             Input.ConstraintVT.isInteger()) ||
+            (OpInfo.ConstraintVT.getSizeInBits() !=
+             Input.ConstraintVT.getSizeInBits())) {
+          report_fatal_error("Unsupported asm: input constraint"
+                             " with a matching output constraint of"
+                             " incompatible type!");
+        }
+      }
+
+    }
+  }
+
+  return ConstraintOperands;
+}
+
+
+/// getConstraintGenerality - Return an integer indicating how general CT
+/// is.
+static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
+  switch (CT) {
+  default: llvm_unreachable("Unknown constraint type!");
+  case TargetLowering::C_Other:
+  case TargetLowering::C_Unknown:
+    return 0;
+  case TargetLowering::C_Register:
+    return 1;
+  case TargetLowering::C_RegisterClass:
+    return 2;
+  case TargetLowering::C_Memory:
+    return 3;
+  }
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+  TargetLowering::getMultipleConstraintMatchWeight(
+    AsmOperandInfo &info, int maIndex) const {
+  InlineAsm::ConstraintCodeVector *rCodes;
+  if (maIndex >= (int)info.multipleAlternatives.size())
+    rCodes = &info.Codes;
+  else
+    rCodes = &info.multipleAlternatives[maIndex].Codes;
+  ConstraintWeight BestWeight = CW_Invalid;
+
+  // Loop over the options, keeping track of the most general one.
+  for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
+    ConstraintWeight weight =
+      getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
+    if (weight > BestWeight)
+      BestWeight = weight;
+  }
+
+  return BestWeight;
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+  TargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  // Look at the constraint type.
+  switch (*constraint) {
+    case 'i': // immediate integer.
+    case 'n': // immediate integer with a known value.
+      if (isa<ConstantInt>(CallOperandVal))
+        weight = CW_Constant;
+      break;
+    case 's': // non-explicit intregal immediate.
+      if (isa<GlobalValue>(CallOperandVal))
+        weight = CW_Constant;
+      break;
+    case 'E': // immediate float if host format.
+    case 'F': // immediate float.
+      if (isa<ConstantFP>(CallOperandVal))
+        weight = CW_Constant;
+      break;
+    case '<': // memory operand with autodecrement.
+    case '>': // memory operand with autoincrement.
+    case 'm': // memory operand.
+    case 'o': // offsettable memory operand
+    case 'V': // non-offsettable memory operand
+      weight = CW_Memory;
+      break;
+    case 'r': // general register.
+    case 'g': // general register, memory operand or immediate integer.
+              // note: Clang converts "g" to "imr".
+      if (CallOperandVal->getType()->isIntegerTy())
+        weight = CW_Register;
+      break;
+    case 'X': // any operand.
+    default:
+      weight = CW_Default;
+      break;
+  }
+  return weight;
+}
+
+/// ChooseConstraint - If there are multiple different constraints that we
+/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
+/// This is somewhat tricky: constraints fall into four classes:
+///    Other         -> immediates and magic values
+///    Register      -> one specific register
+///    RegisterClass -> a group of regs
+///    Memory        -> memory
+/// Ideally, we would pick the most specific constraint possible: if we have
+/// something that fits into a register, we would pick it.  The problem here
+/// is that if we have something that could either be in a register or in
+/// memory that use of the register could cause selection of *other*
+/// operands to fail: they might only succeed if we pick memory.  Because of
+/// this the heuristic we use is:
+///
+///  1) If there is an 'other' constraint, and if the operand is valid for
+///     that constraint, use it.  This makes us take advantage of 'i'
+///     constraints when available.
+///  2) Otherwise, pick the most general constraint present.  This prefers
+///     'm' over 'r', for example.
+///
+static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
+                             const TargetLowering &TLI,
+                             SDValue Op, SelectionDAG *DAG) {
+  assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
+  unsigned BestIdx = 0;
+  TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
+  int BestGenerality = -1;
+
+  // Loop over the options, keeping track of the most general one.
+  for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
+    TargetLowering::ConstraintType CType =
+      TLI.getConstraintType(OpInfo.Codes[i]);
+
+    // If this is an 'other' constraint, see if the operand is valid for it.
+    // For example, on X86 we might have an 'rI' constraint.  If the operand
+    // is an integer in the range [0..31] we want to use I (saving a load
+    // of a register), otherwise we must use 'r'.
+    if (CType == TargetLowering::C_Other && Op.getNode()) {
+      assert(OpInfo.Codes[i].size() == 1 &&
+             "Unhandled multi-letter 'other' constraint");
+      std::vector<SDValue> ResultOps;
+      TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0],
+                                       ResultOps, *DAG);
+      if (!ResultOps.empty()) {
+        BestType = CType;
+        BestIdx = i;
+        break;
+      }
+    }
+
+    // Things with matching constraints can only be registers, per gcc
+    // documentation.  This mainly affects "g" constraints.
+    if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
+      continue;
+
+    // This constraint letter is more general than the previous one, use it.
+    int Generality = getConstraintGenerality(CType);
+    if (Generality > BestGenerality) {
+      BestType = CType;
+      BestIdx = i;
+      BestGenerality = Generality;
+    }
+  }
+
+  OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
+  OpInfo.ConstraintType = BestType;
+}
+
+/// ComputeConstraintToUse - Determines the constraint code and constraint
+/// type to use for the specific AsmOperandInfo, setting
+/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
+void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
+                                            SDValue Op,
+                                            SelectionDAG *DAG) const {
+  assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
+
+  // Single-letter constraints ('r') are very common.
+  if (OpInfo.Codes.size() == 1) {
+    OpInfo.ConstraintCode = OpInfo.Codes[0];
+    OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+  } else {
+    ChooseConstraint(OpInfo, *this, Op, DAG);
+  }
+
+  // 'X' matches anything.
+  if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
+    // Labels and constants are handled elsewhere ('X' is the only thing
+    // that matches labels).  For Functions, the type here is the type of
+    // the result, which is not what we want to look at; leave them alone.
+    Value *v = OpInfo.CallOperandVal;
+    if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
+      OpInfo.CallOperandVal = v;
+      return;
+    }
+
+    // Otherwise, try to resolve it to something we know about by looking at
+    // the actual operand type.
+    if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
+      OpInfo.ConstraintCode = Repl;
+      OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Loop Strength Reduction hooks
+//===----------------------------------------------------------------------===//
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                           const Type *Ty) const {
+  // The default implementation of this implements a conservative RISCy, r+r and
+  // r+i addr mode.
+
+  // Allows a sign-extended 16-bit immediate field.
+  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+    return false;
+
+  // No global is ever allowed as a base.
+  if (AM.BaseGV)
+    return false;
+
+  // Only support r+r,
+  switch (AM.Scale) {
+  case 0:  // "r+i" or just "i", depending on HasBaseReg.
+    break;
+  case 1:
+    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
+      return false;
+    // Otherwise we have r+r or r+i.
+    break;
+  case 2:
+    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
+      return false;
+    // Allow 2*r as r+r.
+    break;
+  }
+
+  return true;
+}
+
+/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
+                                  std::vector<SDNode*>* Created) const {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl= N->getDebugLoc();
+
+  // Check to see if we can do this.
+  // FIXME: We should be more aggressive here.
+  if (!isTypeLegal(VT))
+    return SDValue();
+
+  APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+  APInt::ms magics = d.magic();
+
+  // Multiply the numerator (operand 0) by the magic value
+  // FIXME: We should support doing a MUL in a wider type
+  SDValue Q;
+  if (isOperationLegalOrCustom(ISD::MULHS, VT))
+    Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0),
+                    DAG.getConstant(magics.m, VT));
+  else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT))
+    Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT),
+                              N->getOperand(0),
+                              DAG.getConstant(magics.m, VT)).getNode(), 1);
+  else
+    return SDValue();       // No mulhs or equvialent
+  // If d > 0 and m < 0, add the numerator
+  if (d.isStrictlyPositive() && magics.m.isNegative()) {
+    Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
+    if (Created)
+      Created->push_back(Q.getNode());
+  }
+  // If d < 0 and m > 0, subtract the numerator.
+  if (d.isNegative() && magics.m.isStrictlyPositive()) {
+    Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0));
+    if (Created)
+      Created->push_back(Q.getNode());
+  }
+  // Shift right algebraic if shift value is nonzero
+  if (magics.s > 0) {
+    Q = DAG.getNode(ISD::SRA, dl, VT, Q,
+                 DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
+    if (Created)
+      Created->push_back(Q.getNode());
+  }
+  // Extract the sign bit and add it to the quotient
+  SDValue T =
+    DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1,
+                                           getShiftAmountTy(Q.getValueType())));
+  if (Created)
+    Created->push_back(T.getNode());
+  return DAG.getNode(ISD::ADD, dl, VT, Q, T);
+}
+
+/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
+/// return a DAG expression to select that will generate the same value by
+/// multiplying by a magic number.  See:
+/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
+SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
+                                  std::vector<SDNode*>* Created) const {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  // Check to see if we can do this.
+  // FIXME: We should be more aggressive here.
+  if (!isTypeLegal(VT))
+    return SDValue();
+
+  // FIXME: We should use a narrower constant when the upper
+  // bits are known to be zero.
+  ConstantSDNode *N1C = cast<ConstantSDNode>(N->getOperand(1));
+  APInt::mu magics = N1C->getAPIntValue().magicu();
+
+  // Multiply the numerator (operand 0) by the magic value
+  // FIXME: We should support doing a MUL in a wider type
+  SDValue Q;
+  if (isOperationLegalOrCustom(ISD::MULHU, VT))
+    Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0),
+                    DAG.getConstant(magics.m, VT));
+  else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT))
+    Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT),
+                              N->getOperand(0),
+                              DAG.getConstant(magics.m, VT)).getNode(), 1);
+  else
+    return SDValue();       // No mulhu or equvialent
+  if (Created)
+    Created->push_back(Q.getNode());
+
+  if (magics.a == 0) {
+    assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
+           "We shouldn't generate an undefined shift!");
+    return DAG.getNode(ISD::SRL, dl, VT, Q,
+                 DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType())));
+  } else {
+    SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
+    if (Created)
+      Created->push_back(NPQ.getNode());
+    NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
+                      DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType())));
+    if (Created)
+      Created->push_back(NPQ.getNode());
+    NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
+    if (Created)
+      Created->push_back(NPQ.getNode());
+    return DAG.getNode(ISD::SRL, dl, VT, NPQ,
+             DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType())));
+  }
+}
diff --git a/final/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/final/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
new file mode 100644
index 00000000000..a081e3cd493
--- /dev/null
+++ b/final/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- TargetSelectionDAGInfo.cpp - SelectionDAG Info --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the TargetSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM)
+  : TD(TM.getTargetData()) {
+}
+
+TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
+}
diff --git a/final/lib/CodeGen/ShadowStackGC.cpp b/final/lib/CodeGen/ShadowStackGC.cpp
new file mode 100644
index 00000000000..6ab0cb03c06
--- /dev/null
+++ b/final/lib/CodeGen/ShadowStackGC.cpp
@@ -0,0 +1,451 @@
+//===-- ShadowStackGC.cpp - GC support for uncooperative targets ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements lowering for the llvm.gc* intrinsics for targets that do
+// not natively support them (which includes the C backend). Note that the code
+// generated is not quite as efficient as algorithms which generate stack maps
+// to identify roots.
+//
+// This pass implements the code transformation described in this paper:
+//   "Accurate Garbage Collection in an Uncooperative Environment"
+//   Fergus Henderson, ISMM, 2002
+//
+// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with
+// ShadowStackGC.
+//
+// In order to support this particular transformation, all stack roots are
+// coallocated in the stack. This allows a fully target-independent stack map
+// while introducing only minor runtime overhead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shadowstackgc"
+#include "llvm/CodeGen/GCs.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/IRBuilder.h"
+
+using namespace llvm;
+
+namespace {
+
+  class ShadowStackGC : public GCStrategy {
+    /// RootChain - This is the global linked-list that contains the chain of GC
+    /// roots.
+    GlobalVariable *Head;
+
+    /// StackEntryTy - Abstract type of a link in the shadow stack.
+    ///
+    const StructType *StackEntryTy;
+
+    /// Roots - GC roots in the current function. Each is a pair of the
+    /// intrinsic call and its corresponding alloca.
+    std::vector<std::pair<CallInst*,AllocaInst*> > Roots;
+
+  public:
+    ShadowStackGC();
+
+    bool initializeCustomLowering(Module &M);
+    bool performCustomLowering(Function &F);
+
+  private:
+    bool IsNullValue(Value *V);
+    Constant *GetFrameMap(Function &F);
+    const Type* GetConcreteStackEntryType(Function &F);
+    void CollectRoots(Function &F);
+    static GetElementPtrInst *CreateGEP(LLVMContext &Context, 
+                                        IRBuilder<> &B, Value *BasePtr,
+                                        int Idx1, const char *Name);
+    static GetElementPtrInst *CreateGEP(LLVMContext &Context,
+                                        IRBuilder<> &B, Value *BasePtr,
+                                        int Idx1, int Idx2, const char *Name);
+  };
+
+}
+
+static GCRegistry::Add<ShadowStackGC>
+X("shadow-stack", "Very portable GC for uncooperative code generators");
+
+namespace {
+  /// EscapeEnumerator - This is a little algorithm to find all escape points
+  /// from a function so that "finally"-style code can be inserted. In addition
+  /// to finding the existing return and unwind instructions, it also (if
+  /// necessary) transforms any call instructions into invokes and sends them to
+  /// a landing pad.
+  ///
+  /// It's wrapped up in a state machine using the same transform C# uses for
+  /// 'yield return' enumerators, This transform allows it to be non-allocating.
+  class EscapeEnumerator {
+    Function &F;
+    const char *CleanupBBName;
+
+    // State.
+    int State;
+    Function::iterator StateBB, StateE;
+    IRBuilder<> Builder;
+
+  public:
+    EscapeEnumerator(Function &F, const char *N = "cleanup")
+      : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
+
+    IRBuilder<> *Next() {
+      switch (State) {
+      default:
+        return 0;
+
+      case 0:
+        StateBB = F.begin();
+        StateE = F.end();
+        State = 1;
+
+      case 1:
+        // Find all 'return' and 'unwind' instructions.
+        while (StateBB != StateE) {
+          BasicBlock *CurBB = StateBB++;
+
+          // Branches and invokes do not escape, only unwind and return do.
+          TerminatorInst *TI = CurBB->getTerminator();
+          if (!isa<UnwindInst>(TI) && !isa<ReturnInst>(TI))
+            continue;
+
+          Builder.SetInsertPoint(TI->getParent(), TI);
+          return &Builder;
+        }
+
+        State = 2;
+
+        // Find all 'call' instructions.
+        SmallVector<Instruction*,16> Calls;
+        for (Function::iterator BB = F.begin(),
+                                E = F.end(); BB != E; ++BB)
+          for (BasicBlock::iterator II = BB->begin(),
+                                    EE = BB->end(); II != EE; ++II)
+            if (CallInst *CI = dyn_cast<CallInst>(II))
+              if (!CI->getCalledFunction() ||
+                  !CI->getCalledFunction()->getIntrinsicID())
+                Calls.push_back(CI);
+
+        if (Calls.empty())
+          return 0;
+
+        // Create a cleanup block.
+        BasicBlock *CleanupBB = BasicBlock::Create(F.getContext(),
+                                                   CleanupBBName, &F);
+        UnwindInst *UI = new UnwindInst(F.getContext(), CleanupBB);
+
+        // Transform the 'call' instructions into 'invoke's branching to the
+        // cleanup block. Go in reverse order to make prettier BB names.
+        SmallVector<Value*,16> Args;
+        for (unsigned I = Calls.size(); I != 0; ) {
+          CallInst *CI = cast<CallInst>(Calls[--I]);
+
+          // Split the basic block containing the function call.
+          BasicBlock *CallBB = CI->getParent();
+          BasicBlock *NewBB =
+            CallBB->splitBasicBlock(CI, CallBB->getName() + ".cont");
+
+          // Remove the unconditional branch inserted at the end of CallBB.
+          CallBB->getInstList().pop_back();
+          NewBB->getInstList().remove(CI);
+
+          // Create a new invoke instruction.
+          Args.clear();
+          CallSite CS(CI);
+          Args.append(CS.arg_begin(), CS.arg_end());
+
+          InvokeInst *II = InvokeInst::Create(CI->getCalledValue(),
+                                              NewBB, CleanupBB,
+                                              Args.begin(), Args.end(),
+                                              CI->getName(), CallBB);
+          II->setCallingConv(CI->getCallingConv());
+          II->setAttributes(CI->getAttributes());
+          CI->replaceAllUsesWith(II);
+          delete CI;
+        }
+
+        Builder.SetInsertPoint(UI->getParent(), UI);
+        return &Builder;
+      }
+    }
+  };
+}
+
+// -----------------------------------------------------------------------------
+
+void llvm::linkShadowStackGC() { }
+
+ShadowStackGC::ShadowStackGC() : Head(0), StackEntryTy(0) {
+  InitRoots = true;
+  CustomRoots = true;
+}
+
+Constant *ShadowStackGC::GetFrameMap(Function &F) {
+  // doInitialization creates the abstract type of this value.
+  const Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
+
+  // Truncate the ShadowStackDescriptor if some metadata is null.
+  unsigned NumMeta = 0;
+  SmallVector<Constant*,16> Metadata;
+  for (unsigned I = 0; I != Roots.size(); ++I) {
+    Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
+    if (!C->isNullValue())
+      NumMeta = I + 1;
+    Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
+  }
+
+  Constant *BaseElts[] = {
+    ConstantInt::get(Type::getInt32Ty(F.getContext()), Roots.size(), false),
+    ConstantInt::get(Type::getInt32Ty(F.getContext()), NumMeta, false),
+  };
+
+  Constant *DescriptorElts[] = {
+    ConstantStruct::get(F.getContext(), BaseElts, 2, false),
+    ConstantArray::get(ArrayType::get(VoidPtr, NumMeta),
+                       Metadata.begin(), NumMeta)
+  };
+
+  Constant *FrameMap = ConstantStruct::get(F.getContext(), DescriptorElts, 2,
+                                           false);
+
+  std::string TypeName("gc_map.");
+  TypeName += utostr(NumMeta);
+  F.getParent()->addTypeName(TypeName, FrameMap->getType());
+
+  // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
+  //        that, short of multithreaded LLVM, it should be safe; all that is
+  //        necessary is that a simple Module::iterator loop not be invalidated.
+  //        Appending to the GlobalVariable list is safe in that sense.
+  //
+  //        All of the output passes emit globals last. The ExecutionEngine
+  //        explicitly supports adding globals to the module after
+  //        initialization.
+  //
+  //        Still, if it isn't deemed acceptable, then this transformation needs
+  //        to be a ModulePass (which means it cannot be in the 'llc' pipeline
+  //        (which uses a FunctionPassManager (which segfaults (not asserts) if
+  //        provided a ModulePass))).
+  Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true,
+                                    GlobalVariable::InternalLinkage,
+                                    FrameMap, "__gc_" + F.getName());
+
+  Constant *GEPIndices[2] = {
+                          ConstantInt::get(Type::getInt32Ty(F.getContext()), 0),
+                          ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)
+                          };
+  return ConstantExpr::getGetElementPtr(GV, GEPIndices, 2);
+}
+
+const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
+  // doInitialization creates the generic version of this type.
+  std::vector<const Type*> EltTys;
+  EltTys.push_back(StackEntryTy);
+  for (size_t I = 0; I != Roots.size(); I++)
+    EltTys.push_back(Roots[I].second->getAllocatedType());
+  Type *Ty = StructType::get(F.getContext(), EltTys);
+
+  std::string TypeName("gc_stackentry.");
+  TypeName += F.getName();
+  F.getParent()->addTypeName(TypeName, Ty);
+
+  return Ty;
+}
+
+/// doInitialization - If this module uses the GC intrinsics, find them now. If
+/// not, exit fast.
+bool ShadowStackGC::initializeCustomLowering(Module &M) {
+  // struct FrameMap {
+  //   int32_t NumRoots; // Number of roots in stack frame.
+  //   int32_t NumMeta;  // Number of metadata descriptors. May be < NumRoots.
+  //   void *Meta[];     // May be absent for roots without metadata.
+  // };
+  std::vector<const Type*> EltTys;
+  // 32 bits is ok up to a 32GB stack frame. :)
+  EltTys.push_back(Type::getInt32Ty(M.getContext()));
+  // Specifies length of variable length array. 
+  EltTys.push_back(Type::getInt32Ty(M.getContext()));
+  StructType *FrameMapTy = StructType::get(M.getContext(), EltTys);
+  M.addTypeName("gc_map", FrameMapTy);
+  PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
+
+  // struct StackEntry {
+  //   ShadowStackEntry *Next; // Caller's stack entry.
+  //   FrameMap *Map;          // Pointer to constant FrameMap.
+  //   void *Roots[];          // Stack roots (in-place array, so we pretend).
+  // };
+  OpaqueType *RecursiveTy = OpaqueType::get(M.getContext());
+
+  EltTys.clear();
+  EltTys.push_back(PointerType::getUnqual(RecursiveTy));
+  EltTys.push_back(FrameMapPtrTy);
+  PATypeHolder LinkTyH = StructType::get(M.getContext(), EltTys);
+
+  RecursiveTy->refineAbstractTypeTo(LinkTyH.get());
+  StackEntryTy = cast<StructType>(LinkTyH.get());
+  const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
+  M.addTypeName("gc_stackentry", LinkTyH.get());  // FIXME: Is this safe from
+                                                  //        a FunctionPass?
+
+  // Get the root chain if it already exists.
+  Head = M.getGlobalVariable("llvm_gc_root_chain");
+  if (!Head) {
+    // If the root chain does not exist, insert a new one with linkonce
+    // linkage!
+    Head = new GlobalVariable(M, StackEntryPtrTy, false,
+                              GlobalValue::LinkOnceAnyLinkage,
+                              Constant::getNullValue(StackEntryPtrTy),
+                              "llvm_gc_root_chain");
+  } else if (Head->hasExternalLinkage() && Head->isDeclaration()) {
+    Head->setInitializer(Constant::getNullValue(StackEntryPtrTy));
+    Head->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+  }
+
+  return true;
+}
+
+bool ShadowStackGC::IsNullValue(Value *V) {
+  if (Constant *C = dyn_cast<Constant>(V))
+    return C->isNullValue();
+  return false;
+}
+
+void ShadowStackGC::CollectRoots(Function &F) {
+  // FIXME: Account for original alignment. Could fragment the root array.
+  //   Approach 1: Null initialize empty slots at runtime. Yuck.
+  //   Approach 2: Emit a map of the array instead of just a count.
+
+  assert(Roots.empty() && "Not cleaned up?");
+
+  SmallVector<std::pair<CallInst*, AllocaInst*>, 16> MetaRoots;
+
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
+      if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
+        if (Function *F = CI->getCalledFunction())
+          if (F->getIntrinsicID() == Intrinsic::gcroot) {
+            std::pair<CallInst*, AllocaInst*> Pair = std::make_pair(
+              CI, cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+            if (IsNullValue(CI->getArgOperand(1)))
+              Roots.push_back(Pair);
+            else
+              MetaRoots.push_back(Pair);
+          }
+
+  // Number roots with metadata (usually empty) at the beginning, so that the
+  // FrameMap::Meta array can be elided.
+  Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end());
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
+                         int Idx, int Idx2, const char *Name) {
+  Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+                       ConstantInt::get(Type::getInt32Ty(Context), Idx),
+                       ConstantInt::get(Type::getInt32Ty(Context), Idx2) };
+  Value* Val = B.CreateGEP(BasePtr, Indices, Indices + 3, Name);
+
+  assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+  return dyn_cast<GetElementPtrInst>(Val);
+}
+
+GetElementPtrInst *
+ShadowStackGC::CreateGEP(LLVMContext &Context, IRBuilder<> &B, Value *BasePtr,
+                         int Idx, const char *Name) {
+  Value *Indices[] = { ConstantInt::get(Type::getInt32Ty(Context), 0),
+                       ConstantInt::get(Type::getInt32Ty(Context), Idx) };
+  Value *Val = B.CreateGEP(BasePtr, Indices, Indices + 2, Name);
+
+  assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant");
+
+  return dyn_cast<GetElementPtrInst>(Val);
+}
+
+/// runOnFunction - Insert code to maintain the shadow stack.
+bool ShadowStackGC::performCustomLowering(Function &F) {
+  LLVMContext &Context = F.getContext();
+  
+  // Find calls to llvm.gcroot.
+  CollectRoots(F);
+
+  // If there are no roots in this function, then there is no need to add a
+  // stack map entry for it.
+  if (Roots.empty())
+    return false;
+
+  // Build the constant map and figure the type of the shadow stack entry.
+  Value *FrameMap = GetFrameMap(F);
+  const Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F);
+
+  // Build the shadow stack entry at the very start of the function.
+  BasicBlock::iterator IP = F.getEntryBlock().begin();
+  IRBuilder<> AtEntry(IP->getParent(), IP);
+
+  Instruction *StackEntry   = AtEntry.CreateAlloca(ConcreteStackEntryTy, 0,
+                                                   "gc_frame");
+
+  while (isa<AllocaInst>(IP)) ++IP;
+  AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+  // Initialize the map pointer and load the current head of the shadow stack.
+  Instruction *CurrentHead  = AtEntry.CreateLoad(Head, "gc_currhead");
+  Instruction *EntryMapPtr  = CreateGEP(Context, AtEntry, StackEntry,
+                                        0,1,"gc_frame.map");
+                              AtEntry.CreateStore(FrameMap, EntryMapPtr);
+
+  // After all the allocas...
+  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+    // For each root, find the corresponding slot in the aggregate...
+    Value *SlotPtr = CreateGEP(Context, AtEntry, StackEntry, 1 + I, "gc_root");
+
+    // And use it in lieu of the alloca.
+    AllocaInst *OriginalAlloca = Roots[I].second;
+    SlotPtr->takeName(OriginalAlloca);
+    OriginalAlloca->replaceAllUsesWith(SlotPtr);
+  }
+
+  // Move past the original stores inserted by GCStrategy::InitRoots. This isn't
+  // really necessary (the collector would never see the intermediate state at
+  // runtime), but it's nicer not to push the half-initialized entry onto the
+  // shadow stack.
+  while (isa<StoreInst>(IP)) ++IP;
+  AtEntry.SetInsertPoint(IP->getParent(), IP);
+
+  // Push the entry onto the shadow stack.
+  Instruction *EntryNextPtr = CreateGEP(Context, AtEntry,
+                                        StackEntry,0,0,"gc_frame.next");
+  Instruction *NewHeadVal   = CreateGEP(Context, AtEntry, 
+                                        StackEntry, 0, "gc_newhead");
+  AtEntry.CreateStore(CurrentHead, EntryNextPtr);
+  AtEntry.CreateStore(NewHeadVal, Head);
+
+  // For each instruction that escapes...
+  EscapeEnumerator EE(F, "gc_cleanup");
+  while (IRBuilder<> *AtExit = EE.Next()) {
+    // Pop the entry from the shadow stack. Don't reuse CurrentHead from
+    // AtEntry, since that would make the value live for the entire function.
+    Instruction *EntryNextPtr2 = CreateGEP(Context, *AtExit, StackEntry, 0, 0,
+                                           "gc_frame.next");
+    Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+                       AtExit->CreateStore(SavedHead, Head);
+  }
+
+  // Delete the original allocas (which are no longer used) and the intrinsic
+  // calls (which are no longer valid). Doing this last avoids invalidating
+  // iterators.
+  for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
+    Roots[I].first->eraseFromParent();
+    Roots[I].second->eraseFromParent();
+  }
+
+  Roots.clear();
+  return true;
+}
diff --git a/final/lib/CodeGen/ShrinkWrapping.cpp b/final/lib/CodeGen/ShrinkWrapping.cpp
new file mode 100644
index 00000000000..7b5bca49520
--- /dev/null
+++ b/final/lib/CodeGen/ShrinkWrapping.cpp
@@ -0,0 +1,1152 @@
+//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a shrink wrapping variant of prolog/epilog insertion:
+// - Spills and restores of callee-saved registers (CSRs) are placed in the
+//   machine CFG to tightly surround their uses so that execution paths that
+//   do not use CSRs do not pay the spill/restore penalty.
+//
+// - Avoiding placment of spills/restores in loops: if a CSR is used inside a
+//   loop the spills are placed in the loop preheader, and restores are
+//   placed in the loop exit nodes (the successors of loop _exiting_ nodes).
+//
+// - Covering paths without CSR uses:
+//   If a region in a CFG uses CSRs and has multiple entry and/or exit points,
+//   the use info for the CSRs inside the region is propagated outward in the
+//   CFG to ensure validity of the spill/restore placements. This decreases
+//   the effectiveness of shrink wrapping but does not require edge splitting
+//   in the machine CFG.
+//
+// This shrink wrapping implementation uses an iterative analysis to determine
+// which basic blocks require spills and restores for CSRs.
+//
+// This pass uses MachineDominators and MachineLoopInfo. Loop information
+// is used to prevent placement of callee-saved register spills/restores
+// in the bodies of loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "shrink-wrap"
+
+#include "PrologEpilogInserter.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include <sstream>
+
+using namespace llvm;
+
+STATISTIC(numSRReduced, "Number of CSR spills+restores reduced.");
+
+// Shrink Wrapping:
+static cl::opt<bool>
+ShrinkWrapping("shrink-wrap",
+               cl::desc("Shrink wrap callee-saved register spills/restores"));
+
+// Shrink wrap only the specified function, a debugging aid.
+static cl::opt<std::string>
+ShrinkWrapFunc("shrink-wrap-func", cl::Hidden,
+               cl::desc("Shrink wrap the specified function"),
+               cl::value_desc("funcname"),
+               cl::init(""));
+
+// Debugging level for shrink wrapping.
+enum ShrinkWrapDebugLevel {
+  None, BasicInfo, Iterations, Details
+};
+
+static cl::opt<enum ShrinkWrapDebugLevel>
+ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden,
+  cl::desc("Print shrink wrapping debugging information"),
+  cl::values(
+    clEnumVal(None      , "disable debug output"),
+    clEnumVal(BasicInfo , "print basic DF sets"),
+    clEnumVal(Iterations, "print SR sets for each iteration"),
+    clEnumVal(Details   , "print all DF sets"),
+    clEnumValEnd));
+
+
+void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  if (ShrinkWrapping || ShrinkWrapFunc != "") {
+    AU.addRequired<MachineLoopInfo>();
+    AU.addRequired<MachineDominatorTree>();
+  }
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreserved<MachineDominatorTree>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+//===----------------------------------------------------------------------===//
+//  ShrinkWrapping implementation
+//===----------------------------------------------------------------------===//
+
+// Convienences for dealing with machine loops.
+MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) {
+  assert(LP && "Machine loop is NULL.");
+  MachineBasicBlock* PHDR = LP->getLoopPreheader();
+  MachineLoop* PLP = LP->getParentLoop();
+  while (PLP) {
+    PHDR = PLP->getLoopPreheader();
+    PLP = PLP->getParentLoop();
+  }
+  return PHDR;
+}
+
+MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) {
+  if (LP == 0)
+    return 0;
+  MachineLoop* PLP = LP->getParentLoop();
+  while (PLP) {
+    LP = PLP;
+    PLP = PLP->getParentLoop();
+  }
+  return LP;
+}
+
+bool PEI::isReturnBlock(MachineBasicBlock* MBB) {
+  return (MBB && !MBB->empty() && MBB->back().getDesc().isReturn());
+}
+
+// Initialize shrink wrapping DFA sets, called before iterations.
+void PEI::clearAnticAvailSets() {
+  AnticIn.clear();
+  AnticOut.clear();
+  AvailIn.clear();
+  AvailOut.clear();
+}
+
+// Clear all sets constructed by shrink wrapping.
+void PEI::clearAllSets() {
+  ReturnBlocks.clear();
+  clearAnticAvailSets();
+  UsedCSRegs.clear();
+  CSRUsed.clear();
+  TLLoops.clear();
+  CSRSave.clear();
+  CSRRestore.clear();
+}
+
+// Initialize all shrink wrapping data.
+void PEI::initShrinkWrappingInfo() {
+  clearAllSets();
+  EntryBlock = 0;
+#ifndef NDEBUG
+  HasFastExitPath = false;
+#endif
+  ShrinkWrapThisFunction = ShrinkWrapping;
+  // DEBUG: enable or disable shrink wrapping for the current function
+  // via --shrink-wrap-func=<funcname>.
+#ifndef NDEBUG
+  if (ShrinkWrapFunc != "") {
+    std::string MFName = MF->getFunction()->getNameStr();
+    ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc);
+  }
+#endif
+}
+
+
+/// placeCSRSpillsAndRestores - determine which MBBs of the function
+/// need save, restore code for callee-saved registers by doing a DF analysis
+/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs
+/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo
+/// is used to ensure that CSR save/restore code is not placed inside loops.
+/// This function computes the maps of MBBs -> CSRs to spill and restore
+/// in CSRSave, CSRRestore.
+///
+/// If shrink wrapping is not being performed, place all spills in
+/// the entry block, all restores in return blocks. In this case,
+/// CSRSave has a single mapping, CSRRestore has mappings for each
+/// return block.
+///
+void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) {
+
+  DEBUG(MF = &Fn);
+
+  initShrinkWrappingInfo();
+
+  DEBUG(if (ShrinkWrapThisFunction) {
+      dbgs() << "Place CSR spills/restores for "
+             << MF->getFunction()->getName() << "\n";
+    });
+
+  if (calculateSets(Fn))
+    placeSpillsAndRestores(Fn);
+}
+
+/// calcAnticInOut - calculate the anticipated in/out reg sets
+/// for the given MBB by looking forward in the MCFG at MBB's
+/// successors.
+///
+bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
+  bool changed = false;
+
+  // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB))
+  SmallVector<MachineBasicBlock*, 4> successors;
+  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI) {
+    MachineBasicBlock* SUCC = *SI;
+    if (SUCC != MBB)
+      successors.push_back(SUCC);
+  }
+
+  unsigned i = 0, e = successors.size();
+  if (i != e) {
+    CSRegSet prevAnticOut = AnticOut[MBB];
+    MachineBasicBlock* SUCC = successors[i];
+
+    AnticOut[MBB] = AnticIn[SUCC];
+    for (++i; i != e; ++i) {
+      SUCC = successors[i];
+      AnticOut[MBB] &= AnticIn[SUCC];
+    }
+    if (prevAnticOut != AnticOut[MBB])
+      changed = true;
+  }
+
+  // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
+  CSRegSet prevAnticIn = AnticIn[MBB];
+  AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
+  if (prevAnticIn != AnticIn[MBB])
+    changed = true;
+  return changed;
+}
+
+/// calcAvailInOut - calculate the available in/out reg sets
+/// for the given MBB by looking backward in the MCFG at MBB's
+/// predecessors.
+///
+bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
+  bool changed = false;
+
+  // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB))
+  SmallVector<MachineBasicBlock*, 4> predecessors;
+  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+         PE = MBB->pred_end(); PI != PE; ++PI) {
+    MachineBasicBlock* PRED = *PI;
+    if (PRED != MBB)
+      predecessors.push_back(PRED);
+  }
+
+  unsigned i = 0, e = predecessors.size();
+  if (i != e) {
+    CSRegSet prevAvailIn = AvailIn[MBB];
+    MachineBasicBlock* PRED = predecessors[i];
+
+    AvailIn[MBB] = AvailOut[PRED];
+    for (++i; i != e; ++i) {
+      PRED = predecessors[i];
+      AvailIn[MBB] &= AvailOut[PRED];
+    }
+    if (prevAvailIn != AvailIn[MBB])
+      changed = true;
+  }
+
+  // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
+  CSRegSet prevAvailOut = AvailOut[MBB];
+  AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
+  if (prevAvailOut != AvailOut[MBB])
+    changed = true;
+  return changed;
+}
+
+/// calculateAnticAvail - build the sets anticipated and available
+/// registers in the MCFG of the current function iteratively,
+/// doing a combined forward and backward analysis.
+///
+void PEI::calculateAnticAvail(MachineFunction &Fn) {
+  // Initialize data flow sets.
+  clearAnticAvailSets();
+
+  // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG.
+  bool changed = true;
+  unsigned iterations = 0;
+  while (changed) {
+    changed = false;
+    ++iterations;
+    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+         MBBI != MBBE; ++MBBI) {
+      MachineBasicBlock* MBB = MBBI;
+
+      // Calculate anticipated in, out regs at MBB from
+      // anticipated at successors of MBB.
+      changed |= calcAnticInOut(MBB);
+
+      // Calculate available in, out regs at MBB from
+      // available at predecessors of MBB.
+      changed |= calcAvailInOut(MBB);
+    }
+  }
+
+  DEBUG({
+      if (ShrinkWrapDebugging >= Details) {
+        dbgs()
+          << "-----------------------------------------------------------\n"
+          << " Antic/Avail Sets:\n"
+          << "-----------------------------------------------------------\n"
+          << "iterations = " << iterations << "\n"
+          << "-----------------------------------------------------------\n"
+          << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n"
+          << "-----------------------------------------------------------\n";
+
+        for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+             MBBI != MBBE; ++MBBI) {
+          MachineBasicBlock* MBB = MBBI;
+          dumpSets(MBB);
+        }
+
+        dbgs()
+          << "-----------------------------------------------------------\n";
+      }
+    });
+}
+
+/// propagateUsesAroundLoop - copy used register info from MBB to all blocks
+/// of the loop given by LP and its parent loops. This prevents spills/restores
+/// from being placed in the bodies of loops.
+///
+void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) {
+  if (! MBB || !LP)
+    return;
+
+  std::vector<MachineBasicBlock*> loopBlocks = LP->getBlocks();
+  for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) {
+    MachineBasicBlock* LBB = loopBlocks[i];
+    if (LBB == MBB)
+      continue;
+    if (CSRUsed[LBB].contains(CSRUsed[MBB]))
+      continue;
+    CSRUsed[LBB] |= CSRUsed[MBB];
+  }
+}
+
+/// calculateSets - collect the CSRs used in this function, compute
+/// the DF sets that describe the initial minimal regions in the
+/// Machine CFG around which CSR spills and restores must be placed.
+///
+/// Additionally, this function decides if shrink wrapping should
+/// be disabled for the current function, checking the following:
+///  1. the current function has more than 500 MBBs: heuristic limit
+///     on function size to reduce compile time impact of the current
+///     iterative algorithm.
+///  2. all CSRs are used in the entry block.
+///  3. all CSRs are used in all immediate successors of the entry block.
+///  4. all CSRs are used in a subset of blocks, each of which dominates
+///     all return blocks. These blocks, taken as a subgraph of the MCFG,
+///     are equivalent to the entry block since all execution paths pass
+///     through them.
+///
+bool PEI::calculateSets(MachineFunction &Fn) {
+  // Sets used to compute spill, restore placement sets.
+  const std::vector<CalleeSavedInfo> CSI =
+    Fn.getFrameInfo()->getCalleeSavedInfo();
+
+  // If no CSRs used, we are done.
+  if (CSI.empty()) {
+    DEBUG(if (ShrinkWrapThisFunction)
+            dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+                   << ": uses no callee-saved registers\n");
+    return false;
+  }
+
+  // Save refs to entry and return blocks.
+  EntryBlock = Fn.begin();
+  for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end();
+       MBB != E; ++MBB)
+    if (isReturnBlock(MBB))
+      ReturnBlocks.push_back(MBB);
+
+  // Determine if this function has fast exit paths.
+  DEBUG(if (ShrinkWrapThisFunction)
+          findFastExitPath());
+
+  // Limit shrink wrapping via the current iterative bit vector
+  // implementation to functions with <= 500 MBBs.
+  if (Fn.size() > 500) {
+    DEBUG(if (ShrinkWrapThisFunction)
+            dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+                   << ": too large (" << Fn.size() << " MBBs)\n");
+    ShrinkWrapThisFunction = false;
+  }
+
+  // Return now if not shrink wrapping.
+  if (! ShrinkWrapThisFunction)
+    return false;
+
+  // Collect set of used CSRs.
+  for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+    UsedCSRegs.set(inx);
+  }
+
+  // Walk instructions in all MBBs, create CSRUsed[] sets, choose
+  // whether or not to shrink wrap this function.
+  MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
+  MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
+  const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+
+  bool allCSRUsesInEntryBlock = true;
+  for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+       MBBI != MBBE; ++MBBI) {
+    MachineBasicBlock* MBB = MBBI;
+    for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) {
+      for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) {
+        unsigned Reg = CSI[inx].getReg();
+        // If instruction I reads or modifies Reg, add it to UsedCSRegs,
+        // CSRUsed map for the current block.
+        for (unsigned opInx = 0, opEnd = I->getNumOperands();
+             opInx != opEnd; ++opInx) {
+          const MachineOperand &MO = I->getOperand(opInx);
+          if (! (MO.isReg() && (MO.isUse() || MO.isDef())))
+            continue;
+          unsigned MOReg = MO.getReg();
+          if (!MOReg)
+            continue;
+          if (MOReg == Reg ||
+              (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+               TargetRegisterInfo::isPhysicalRegister(Reg) &&
+               TRI->isSubRegister(Reg, MOReg))) {
+            // CSR Reg is defined/used in block MBB.
+            CSRUsed[MBB].set(inx);
+            // Check for uses in EntryBlock.
+            if (MBB != EntryBlock)
+              allCSRUsesInEntryBlock = false;
+          }
+        }
+      }
+    }
+
+    if (CSRUsed[MBB].empty())
+      continue;
+
+    // Propagate CSRUsed[MBB] in loops
+    if (MachineLoop* LP = LI.getLoopFor(MBB)) {
+      // Add top level loop to work list.
+      MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP);
+      MachineLoop* PLP = getTopLevelLoopParent(LP);
+
+      if (! HDR) {
+        HDR = PLP->getHeader();
+        assert(HDR->pred_size() > 0 && "Loop header has no predecessors?");
+        MachineBasicBlock::pred_iterator PI = HDR->pred_begin();
+        HDR = *PI;
+      }
+      TLLoops[HDR] = PLP;
+
+      // Push uses from inside loop to its parent loops,
+      // or to all other MBBs in its loop.
+      if (LP->getLoopDepth() > 1) {
+        for (MachineLoop* PLP = LP->getParentLoop(); PLP;
+             PLP = PLP->getParentLoop()) {
+          propagateUsesAroundLoop(MBB, PLP);
+        }
+      } else {
+        propagateUsesAroundLoop(MBB, LP);
+      }
+    }
+  }
+
+  if (allCSRUsesInEntryBlock) {
+    DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+                 << ": all CSRs used in EntryBlock\n");
+    ShrinkWrapThisFunction = false;
+  } else {
+    bool allCSRsUsedInEntryFanout = true;
+    for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
+           SE = EntryBlock->succ_end(); SI != SE; ++SI) {
+      MachineBasicBlock* SUCC = *SI;
+      if (CSRUsed[SUCC] != UsedCSRegs)
+        allCSRsUsedInEntryFanout = false;
+    }
+    if (allCSRsUsedInEntryFanout) {
+      DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+                   << ": all CSRs used in imm successors of EntryBlock\n");
+      ShrinkWrapThisFunction = false;
+    }
+  }
+
+  if (ShrinkWrapThisFunction) {
+    // Check if MBB uses CSRs and dominates all exit nodes.
+    // Such nodes are equiv. to the entry node w.r.t.
+    // CSR uses: every path through the function must
+    // pass through this node. If each CSR is used at least
+    // once by these nodes, shrink wrapping is disabled.
+    CSRegSet CSRUsedInChokePoints;
+    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+         MBBI != MBBE; ++MBBI) {
+      MachineBasicBlock* MBB = MBBI;
+      if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1)
+        continue;
+      bool dominatesExitNodes = true;
+      for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+        if (! DT.dominates(MBB, ReturnBlocks[ri])) {
+          dominatesExitNodes = false;
+          break;
+        }
+      if (dominatesExitNodes) {
+        CSRUsedInChokePoints |= CSRUsed[MBB];
+        if (CSRUsedInChokePoints == UsedCSRegs) {
+          DEBUG(dbgs() << "DISABLED: " << Fn.getFunction()->getName()
+                       << ": all CSRs used in choke point(s) at "
+                       << getBasicBlockName(MBB) << "\n");
+          ShrinkWrapThisFunction = false;
+          break;
+        }
+      }
+    }
+  }
+
+  // Return now if we have decided not to apply shrink wrapping
+  // to the current function.
+  if (! ShrinkWrapThisFunction)
+    return false;
+
+  DEBUG({
+      dbgs() << "ENABLED: " << Fn.getFunction()->getName();
+      if (HasFastExitPath)
+        dbgs() << " (fast exit path)";
+      dbgs() << "\n";
+      if (ShrinkWrapDebugging >= BasicInfo) {
+        dbgs() << "------------------------------"
+             << "-----------------------------\n";
+        dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n";
+        if (ShrinkWrapDebugging >= Details) {
+          dbgs() << "------------------------------"
+               << "-----------------------------\n";
+          dumpAllUsed();
+        }
+      }
+    });
+
+  // Build initial DF sets to determine minimal regions in the
+  // Machine CFG around which CSRs must be spilled and restored.
+  calculateAnticAvail(Fn);
+
+  return true;
+}
+
+/// addUsesForMEMERegion - add uses of CSRs spilled or restored in
+/// multi-entry, multi-exit (MEME) regions so spill and restore
+/// placement will not break code that enters or leaves a
+/// shrink-wrapped region by inducing spills with no matching
+/// restores or restores with no matching spills. A MEME region
+/// is a subgraph of the MCFG with multiple entry edges, multiple
+/// exit edges, or both. This code propagates use information
+/// through the MCFG until all paths requiring spills and restores
+/// _outside_ the computed minimal placement regions have been covered.
+///
+bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB,
+                               SmallVector<MachineBasicBlock*, 4>& blks) {
+  if (MBB->succ_size() < 2 && MBB->pred_size() < 2) {
+    bool processThisBlock = false;
+    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+           SE = MBB->succ_end(); SI != SE; ++SI) {
+      MachineBasicBlock* SUCC = *SI;
+      if (SUCC->pred_size() > 1) {
+        processThisBlock = true;
+        break;
+      }
+    }
+    if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) {
+      for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+             PE = MBB->pred_end(); PI != PE; ++PI) {
+        MachineBasicBlock* PRED = *PI;
+        if (PRED->succ_size() > 1) {
+          processThisBlock = true;
+          break;
+        }
+      }
+    }
+    if (! processThisBlock)
+      return false;
+  }
+
+  CSRegSet prop;
+  if (!CSRSave[MBB].empty())
+    prop = CSRSave[MBB];
+  else if (!CSRRestore[MBB].empty())
+    prop = CSRRestore[MBB];
+  else
+    prop = CSRUsed[MBB];
+  if (prop.empty())
+    return false;
+
+  // Propagate selected bits to successors, predecessors of MBB.
+  bool addedUses = false;
+  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI) {
+    MachineBasicBlock* SUCC = *SI;
+    // Self-loop
+    if (SUCC == MBB)
+      continue;
+    if (! CSRUsed[SUCC].contains(prop)) {
+      CSRUsed[SUCC] |= prop;
+      addedUses = true;
+      blks.push_back(SUCC);
+      DEBUG(if (ShrinkWrapDebugging >= Iterations)
+              dbgs() << getBasicBlockName(MBB)
+                   << "(" << stringifyCSRegSet(prop) << ")->"
+                   << "successor " << getBasicBlockName(SUCC) << "\n");
+    }
+  }
+  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+         PE = MBB->pred_end(); PI != PE; ++PI) {
+    MachineBasicBlock* PRED = *PI;
+    // Self-loop
+    if (PRED == MBB)
+      continue;
+    if (! CSRUsed[PRED].contains(prop)) {
+      CSRUsed[PRED] |= prop;
+      addedUses = true;
+      blks.push_back(PRED);
+      DEBUG(if (ShrinkWrapDebugging >= Iterations)
+              dbgs() << getBasicBlockName(MBB)
+                   << "(" << stringifyCSRegSet(prop) << ")->"
+                   << "predecessor " << getBasicBlockName(PRED) << "\n");
+    }
+  }
+  return addedUses;
+}
+
+/// addUsesForTopLevelLoops - add uses for CSRs used inside top
+/// level loops to the exit blocks of those loops.
+///
+bool PEI::addUsesForTopLevelLoops(SmallVector<MachineBasicBlock*, 4>& blks) {
+  bool addedUses = false;
+
+  // Place restores for top level loops where needed.
+  for (DenseMap<MachineBasicBlock*, MachineLoop*>::iterator
+         I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) {
+    MachineBasicBlock* MBB = I->first;
+    MachineLoop* LP = I->second;
+    MachineBasicBlock* HDR = LP->getHeader();
+    SmallVector<MachineBasicBlock*, 4> exitBlocks;
+    CSRegSet loopSpills;
+
+    loopSpills = CSRSave[MBB];
+    if (CSRSave[MBB].empty()) {
+      loopSpills = CSRUsed[HDR];
+      assert(!loopSpills.empty() && "No CSRs used in loop?");
+    } else if (CSRRestore[MBB].contains(CSRSave[MBB]))
+      continue;
+
+    LP->getExitBlocks(exitBlocks);
+    assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?");
+    for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) {
+      MachineBasicBlock* EXB = exitBlocks[i];
+      if (! CSRUsed[EXB].contains(loopSpills)) {
+        CSRUsed[EXB] |= loopSpills;
+        addedUses = true;
+        DEBUG(if (ShrinkWrapDebugging >= Iterations)
+                dbgs() << "LOOP " << getBasicBlockName(MBB)
+                     << "(" << stringifyCSRegSet(loopSpills) << ")->"
+                     << getBasicBlockName(EXB) << "\n");
+        if (EXB->succ_size() > 1 || EXB->pred_size() > 1)
+          blks.push_back(EXB);
+      }
+    }
+  }
+  return addedUses;
+}
+
+/// calcSpillPlacements - determine which CSRs should be spilled
+/// in MBB using AnticIn sets of MBB's predecessors, keeping track
+/// of changes to spilled reg sets. Add MBB to the set of blocks
+/// that need to be processed for propagating use info to cover
+/// multi-entry/exit regions.
+///
+bool PEI::calcSpillPlacements(MachineBasicBlock* MBB,
+                              SmallVector<MachineBasicBlock*, 4> &blks,
+                              CSRegBlockMap &prevSpills) {
+  bool placedSpills = false;
+  // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB)
+  CSRegSet anticInPreds;
+  SmallVector<MachineBasicBlock*, 4> predecessors;
+  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+         PE = MBB->pred_end(); PI != PE; ++PI) {
+    MachineBasicBlock* PRED = *PI;
+    if (PRED != MBB)
+      predecessors.push_back(PRED);
+  }
+  unsigned i = 0, e = predecessors.size();
+  if (i != e) {
+    MachineBasicBlock* PRED = predecessors[i];
+    anticInPreds = UsedCSRegs - AnticIn[PRED];
+    for (++i; i != e; ++i) {
+      PRED = predecessors[i];
+      anticInPreds &= (UsedCSRegs - AnticIn[PRED]);
+    }
+  } else {
+    // Handle uses in entry blocks (which have no predecessors).
+    // This is necessary because the DFA formulation assumes the
+    // entry and (multiple) exit nodes cannot have CSR uses, which
+    // is not the case in the real world.
+    anticInPreds = UsedCSRegs;
+  }
+  // Compute spills required at MBB:
+  CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds;
+
+  if (! CSRSave[MBB].empty()) {
+    if (MBB == EntryBlock) {
+      for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri)
+        CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB];
+    } else {
+      // Reset all regs spilled in MBB that are also spilled in EntryBlock.
+      if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) {
+        CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock];
+      }
+    }
+  }
+  placedSpills = (CSRSave[MBB] != prevSpills[MBB]);
+  prevSpills[MBB] = CSRSave[MBB];
+  // Remember this block for adding restores to successor
+  // blocks for multi-entry region.
+  if (placedSpills)
+    blks.push_back(MBB);
+
+  DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations)
+          dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+               << stringifyCSRegSet(CSRSave[MBB]) << "\n");
+
+  return placedSpills;
+}
+
+/// calcRestorePlacements - determine which CSRs should be restored
+/// in MBB using AvailOut sets of MBB's succcessors, keeping track
+/// of changes to restored reg sets. Add MBB to the set of blocks
+/// that need to be processed for propagating use info to cover
+/// multi-entry/exit regions.
+///
+bool PEI::calcRestorePlacements(MachineBasicBlock* MBB,
+                                SmallVector<MachineBasicBlock*, 4> &blks,
+                                CSRegBlockMap &prevRestores) {
+  bool placedRestores = false;
+  // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB)
+  CSRegSet availOutSucc;
+  SmallVector<MachineBasicBlock*, 4> successors;
+  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI) {
+    MachineBasicBlock* SUCC = *SI;
+    if (SUCC != MBB)
+      successors.push_back(SUCC);
+  }
+  unsigned i = 0, e = successors.size();
+  if (i != e) {
+    MachineBasicBlock* SUCC = successors[i];
+    availOutSucc = UsedCSRegs - AvailOut[SUCC];
+    for (++i; i != e; ++i) {
+      SUCC = successors[i];
+      availOutSucc &= (UsedCSRegs - AvailOut[SUCC]);
+    }
+  } else {
+    if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) {
+      // Handle uses in return blocks (which have no successors).
+      // This is necessary because the DFA formulation assumes the
+      // entry and (multiple) exit nodes cannot have CSR uses, which
+      // is not the case in the real world.
+      availOutSucc = UsedCSRegs;
+    }
+  }
+  // Compute restores required at MBB:
+  CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc;
+
+  // Postprocess restore placements at MBB.
+  // Remove the CSRs that are restored in the return blocks.
+  // Lest this be confusing, note that:
+  // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks.
+  if (MBB->succ_size() && ! CSRRestore[MBB].empty()) {
+    if (! CSRSave[EntryBlock].empty())
+      CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock];
+  }
+  placedRestores = (CSRRestore[MBB] != prevRestores[MBB]);
+  prevRestores[MBB] = CSRRestore[MBB];
+  // Remember this block for adding saves to predecessor
+  // blocks for multi-entry region.
+  if (placedRestores)
+    blks.push_back(MBB);
+
+  DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations)
+          dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+               << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
+
+  return placedRestores;
+}
+
+/// placeSpillsAndRestores - place spills and restores of CSRs
+/// used in MBBs in minimal regions that contain the uses.
+///
+void PEI::placeSpillsAndRestores(MachineFunction &Fn) {
+  CSRegBlockMap prevCSRSave;
+  CSRegBlockMap prevCSRRestore;
+  SmallVector<MachineBasicBlock*, 4> cvBlocks, ncvBlocks;
+  bool changed = true;
+  unsigned iterations = 0;
+
+  // Iterate computation of spill and restore placements in the MCFG until:
+  //   1. CSR use info has been fully propagated around the MCFG, and
+  //   2. computation of CSRSave[], CSRRestore[] reach fixed points.
+  while (changed) {
+    changed = false;
+    ++iterations;
+
+    DEBUG(if (ShrinkWrapDebugging >= Iterations)
+            dbgs() << "iter " << iterations
+                 << " --------------------------------------------------\n");
+
+    // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG,
+    // which determines the placements of spills and restores.
+    // Keep track of changes to spills, restores in each iteration to
+    // minimize the total iterations.
+    bool SRChanged = false;
+    for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end();
+         MBBI != MBBE; ++MBBI) {
+      MachineBasicBlock* MBB = MBBI;
+
+      // Place spills for CSRs in MBB.
+      SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave);
+
+      // Place restores for CSRs in MBB.
+      SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore);
+    }
+
+    // Add uses of CSRs used inside loops where needed.
+    changed |= addUsesForTopLevelLoops(cvBlocks);
+
+    // Add uses for CSRs spilled or restored at branch, join points.
+    if (changed || SRChanged) {
+      while (! cvBlocks.empty()) {
+        MachineBasicBlock* MBB = cvBlocks.pop_back_val();
+        changed |= addUsesForMEMERegion(MBB, ncvBlocks);
+      }
+      if (! ncvBlocks.empty()) {
+        cvBlocks = ncvBlocks;
+        ncvBlocks.clear();
+      }
+    }
+
+    if (changed) {
+      calculateAnticAvail(Fn);
+      CSRSave.clear();
+      CSRRestore.clear();
+    }
+  }
+
+  // Check for effectiveness:
+  //  SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks}
+  //  numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock]
+  // Gives a measure of how many CSR spills have been moved from EntryBlock
+  // to minimal regions enclosing their uses.
+  CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]);
+  unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count();
+  numSRReduced += numSRReducedThisFunc;
+  DEBUG(if (ShrinkWrapDebugging >= BasicInfo) {
+      dbgs() << "-----------------------------------------------------------\n";
+      dbgs() << "total iterations = " << iterations << " ( "
+           << Fn.getFunction()->getName()
+           << " " << numSRReducedThisFunc
+           << " " << Fn.size()
+           << " )\n";
+      dbgs() << "-----------------------------------------------------------\n";
+      dumpSRSets();
+      dbgs() << "-----------------------------------------------------------\n";
+      if (numSRReducedThisFunc)
+        verifySpillRestorePlacement();
+    });
+}
+
+// Debugging methods.
+#ifndef NDEBUG
+/// findFastExitPath - debugging method used to detect functions
+/// with at least one path from the entry block to a return block
+/// directly or which has a very small number of edges.
+///
+void PEI::findFastExitPath() {
+  if (! EntryBlock)
+    return;
+  // Fina a path from EntryBlock to any return block that does not branch:
+  //        Entry
+  //          |     ...
+  //          v      |
+  //         B1<-----+
+  //          |
+  //          v
+  //       Return
+  for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(),
+         SE = EntryBlock->succ_end(); SI != SE; ++SI) {
+    MachineBasicBlock* SUCC = *SI;
+
+    // Assume positive, disprove existence of fast path.
+    HasFastExitPath = true;
+
+    // Check the immediate successors.
+    if (isReturnBlock(SUCC)) {
+      if (ShrinkWrapDebugging >= BasicInfo)
+        dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
+             << "->" << getBasicBlockName(SUCC) << "\n";
+      break;
+    }
+    // Traverse df from SUCC, look for a branch block.
+    std::string exitPath = getBasicBlockName(SUCC);
+    for (df_iterator<MachineBasicBlock*> BI = df_begin(SUCC),
+           BE = df_end(SUCC); BI != BE; ++BI) {
+      MachineBasicBlock* SBB = *BI;
+      // Reject paths with branch nodes.
+      if (SBB->succ_size() > 1) {
+        HasFastExitPath = false;
+        break;
+      }
+      exitPath += "->" + getBasicBlockName(SBB);
+    }
+    if (HasFastExitPath) {
+      if (ShrinkWrapDebugging >= BasicInfo)
+        dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock)
+             << "->" << exitPath << "\n";
+      break;
+    }
+  }
+}
+
+/// verifySpillRestorePlacement - check the current spill/restore
+/// sets for safety. Attempt to find spills without restores or
+/// restores without spills.
+/// Spills: walk df from each MBB in spill set ensuring that
+///         all CSRs spilled at MMBB are restored on all paths
+///         from MBB to all exit blocks.
+/// Restores: walk idf from each MBB in restore set ensuring that
+///           all CSRs restored at MBB are spilled on all paths
+///           reaching MBB.
+///
+void PEI::verifySpillRestorePlacement() {
+  unsigned numReturnBlocks = 0;
+  for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+       MBBI != MBBE; ++MBBI) {
+    MachineBasicBlock* MBB = MBBI;
+    if (isReturnBlock(MBB) || MBB->succ_size() == 0)
+      ++numReturnBlocks;
+  }
+  for (CSRegBlockMap::iterator BI = CSRSave.begin(),
+         BE = CSRSave.end(); BI != BE; ++BI) {
+    MachineBasicBlock* MBB = BI->first;
+    CSRegSet spilled = BI->second;
+    CSRegSet restored;
+
+    if (spilled.empty())
+      continue;
+
+    DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(spilled)
+                 << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(CSRRestore[MBB]) << "\n");
+
+    if (CSRRestore[MBB].intersects(spilled)) {
+      restored |= (CSRRestore[MBB] & spilled);
+    }
+
+    // Walk depth first from MBB to find restores of all CSRs spilled at MBB:
+    // we must find restores for all spills w/no intervening spills on all
+    // paths from MBB to all return blocks.
+    for (df_iterator<MachineBasicBlock*> BI = df_begin(MBB),
+           BE = df_end(MBB); BI != BE; ++BI) {
+      MachineBasicBlock* SBB = *BI;
+      if (SBB == MBB)
+        continue;
+      // Stop when we encounter spills of any CSRs spilled at MBB that
+      // have not yet been seen to be restored.
+      if (CSRSave[SBB].intersects(spilled) &&
+          !restored.contains(CSRSave[SBB] & spilled))
+        break;
+      // Collect the CSRs spilled at MBB that are restored
+      // at this DF successor of MBB.
+      if (CSRRestore[SBB].intersects(spilled))
+        restored |= (CSRRestore[SBB] & spilled);
+      // If we are at a retun block, check that the restores
+      // we have seen so far exhaust the spills at MBB, then
+      // reset the restores.
+      if (isReturnBlock(SBB) || SBB->succ_size() == 0) {
+        if (restored != spilled) {
+          CSRegSet notRestored = (spilled - restored);
+          DEBUG(dbgs() << MF->getFunction()->getName() << ": "
+                       << stringifyCSRegSet(notRestored)
+                       << " spilled at " << getBasicBlockName(MBB)
+                       << " are never restored on path to return "
+                       << getBasicBlockName(SBB) << "\n");
+        }
+        restored.clear();
+      }
+    }
+  }
+
+  // Check restore placements.
+  for (CSRegBlockMap::iterator BI = CSRRestore.begin(),
+         BE = CSRRestore.end(); BI != BE; ++BI) {
+    MachineBasicBlock* MBB = BI->first;
+    CSRegSet restored = BI->second;
+    CSRegSet spilled;
+
+    if (restored.empty())
+      continue;
+
+    DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(CSRSave[MBB])
+                 << "  RESTORE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(restored) << "\n");
+
+    if (CSRSave[MBB].intersects(restored)) {
+      spilled |= (CSRSave[MBB] & restored);
+    }
+    // Walk inverse depth first from MBB to find spills of all
+    // CSRs restored at MBB:
+    for (idf_iterator<MachineBasicBlock*> BI = idf_begin(MBB),
+           BE = idf_end(MBB); BI != BE; ++BI) {
+      MachineBasicBlock* PBB = *BI;
+      if (PBB == MBB)
+        continue;
+      // Stop when we encounter restores of any CSRs restored at MBB that
+      // have not yet been seen to be spilled.
+      if (CSRRestore[PBB].intersects(restored) &&
+          !spilled.contains(CSRRestore[PBB] & restored))
+        break;
+      // Collect the CSRs restored at MBB that are spilled
+      // at this DF predecessor of MBB.
+      if (CSRSave[PBB].intersects(restored))
+        spilled |= (CSRSave[PBB] & restored);
+    }
+    if (spilled != restored) {
+      CSRegSet notSpilled = (restored - spilled);
+      DEBUG(dbgs() << MF->getFunction()->getName() << ": "
+                   << stringifyCSRegSet(notSpilled)
+                   << " restored at " << getBasicBlockName(MBB)
+                   << " are never spilled\n");
+    }
+  }
+}
+
+// Debugging print methods.
+std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) {
+  if (!MBB)
+    return "";
+
+  if (MBB->getBasicBlock())
+    return MBB->getBasicBlock()->getNameStr();
+
+  std::ostringstream name;
+  name << "_MBB_" << MBB->getNumber();
+  return name.str();
+}
+
+std::string PEI::stringifyCSRegSet(const CSRegSet& s) {
+  const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+  const std::vector<CalleeSavedInfo> CSI =
+    MF->getFrameInfo()->getCalleeSavedInfo();
+
+  std::ostringstream srep;
+  if (CSI.size() == 0) {
+    srep << "[]";
+    return srep.str();
+  }
+  srep << "[";
+  CSRegSet::iterator I = s.begin(), E = s.end();
+  if (I != E) {
+    unsigned reg = CSI[*I].getReg();
+    srep << TRI->getName(reg);
+    for (++I; I != E; ++I) {
+      reg = CSI[*I].getReg();
+      srep << ",";
+      srep << TRI->getName(reg);
+    }
+  }
+  srep << "]";
+  return srep.str();
+}
+
+void PEI::dumpSet(const CSRegSet& s) {
+  DEBUG(dbgs() << stringifyCSRegSet(s) << "\n");
+}
+
+void PEI::dumpUsed(MachineBasicBlock* MBB) {
+  DEBUG({
+      if (MBB)
+        dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = "
+               << stringifyCSRegSet(CSRUsed[MBB])  << "\n";
+    });
+}
+
+void PEI::dumpAllUsed() {
+    for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+         MBBI != MBBE; ++MBBI) {
+      MachineBasicBlock* MBB = MBBI;
+      dumpUsed(MBB);
+    }
+}
+
+void PEI::dumpSets(MachineBasicBlock* MBB) {
+  DEBUG({
+      if (MBB)
+        dbgs() << getBasicBlockName(MBB)           << " | "
+               << stringifyCSRegSet(CSRUsed[MBB])  << " | "
+               << stringifyCSRegSet(AnticIn[MBB])  << " | "
+               << stringifyCSRegSet(AnticOut[MBB]) << " | "
+               << stringifyCSRegSet(AvailIn[MBB])  << " | "
+               << stringifyCSRegSet(AvailOut[MBB]) << "\n";
+    });
+}
+
+void PEI::dumpSets1(MachineBasicBlock* MBB) {
+  DEBUG({
+      if (MBB)
+        dbgs() << getBasicBlockName(MBB)             << " | "
+               << stringifyCSRegSet(CSRUsed[MBB])    << " | "
+               << stringifyCSRegSet(AnticIn[MBB])    << " | "
+               << stringifyCSRegSet(AnticOut[MBB])   << " | "
+               << stringifyCSRegSet(AvailIn[MBB])    << " | "
+               << stringifyCSRegSet(AvailOut[MBB])   << " | "
+               << stringifyCSRegSet(CSRSave[MBB])    << " | "
+               << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+    });
+}
+
+void PEI::dumpAllSets() {
+    for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+         MBBI != MBBE; ++MBBI) {
+      MachineBasicBlock* MBB = MBBI;
+      dumpSets1(MBB);
+    }
+}
+
+void PEI::dumpSRSets() {
+  DEBUG({
+      for (MachineFunction::iterator MBB = MF->begin(), E = MF->end();
+           MBB != E; ++MBB) {
+        if (!CSRSave[MBB].empty()) {
+          dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(CSRSave[MBB]);
+          if (CSRRestore[MBB].empty())
+            dbgs() << '\n';
+        }
+
+        if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty())
+          dbgs() << "    "
+                 << "RESTORE[" << getBasicBlockName(MBB) << "] = "
+                 << stringifyCSRegSet(CSRRestore[MBB]) << "\n";
+      }
+    });
+}
+#endif
diff --git a/final/lib/CodeGen/SimpleRegisterCoalescing.cpp b/final/lib/CodeGen/SimpleRegisterCoalescing.cpp
new file mode 100644
index 00000000000..8a102702ef3
--- /dev/null
+++ b/final/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -0,0 +1,1789 @@
+//===-- SimpleRegisterCoalescing.cpp - Register Coalescing ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple register coalescing pass that attempts to
+// aggressively coalesce every register copy that it can.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regcoalescing"
+#include "SimpleRegisterCoalescing.h"
+#include "VirtRegMap.h"
+#include "LiveDebugVariables.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(numJoins    , "Number of interval joins performed");
+STATISTIC(numCrossRCs , "Number of cross class joins performed");
+STATISTIC(numCommutes , "Number of instruction commuting performed");
+STATISTIC(numExtends  , "Number of copies extended");
+STATISTIC(NumReMats   , "Number of instructions re-materialized");
+STATISTIC(numPeep     , "Number of identity moves eliminated after coalescing");
+STATISTIC(numAborts   , "Number of times interval joining aborted");
+STATISTIC(numDeadValNo, "Number of valno def marked dead");
+
+char SimpleRegisterCoalescing::ID = 0;
+static cl::opt<bool>
+EnableJoining("join-liveintervals",
+              cl::desc("Coalesce copies (default=true)"),
+              cl::init(true));
+
+static cl::opt<bool>
+DisableCrossClassJoin("disable-cross-class-join",
+               cl::desc("Avoid coalescing cross register class copies"),
+               cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+DisablePhysicalJoin("disable-physical-join",
+               cl::desc("Avoid coalescing physical register copies"),
+               cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+VerifyCoalescing("verify-coalescing",
+         cl::desc("Verify machine instrs before and after register coalescing"),
+         cl::Hidden);
+
+INITIALIZE_AG_PASS_BEGIN(SimpleRegisterCoalescing, RegisterCoalescer,
+                "simple-register-coalescing", "Simple Register Coalescing", 
+                false, false, true)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_PASS_END(SimpleRegisterCoalescing, RegisterCoalescer,
+                "simple-register-coalescing", "Simple Register Coalescing", 
+                false, false, true)
+
+char &llvm::SimpleRegisterCoalescingID = SimpleRegisterCoalescing::ID;
+
+void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<LiveIntervals>();
+  AU.addPreserved<LiveIntervals>();
+  AU.addRequired<LiveDebugVariables>();
+  AU.addPreserved<LiveDebugVariables>();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequired<MachineLoopInfo>();
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreservedID(MachineDominatorsID);
+  AU.addPreservedID(StrongPHIEliminationID);
+  AU.addPreservedID(PHIEliminationID);
+  AU.addPreservedID(TwoAddressInstructionPassID);
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// being the source and IntB being the dest, thus this defines a value number
+/// in IntB.  If the source value number (in IntA) is defined by a copy from B,
+/// see if we can merge these two pieces of B into a single value number,
+/// eliminating a copy.  For example:
+///
+///  A3 = B0
+///    ...
+///  B1 = A3      <- this copy
+///
+/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
+/// value number to be replaced with B0 (which simplifies the B liveinterval).
+///
+/// This returns true if an interval was modified.
+///
+bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
+                                                    MachineInstr *CopyMI) {
+  // Bail if there is no dst interval - can happen when merging physical subreg
+  // operations.
+  if (!li_->hasInterval(CP.getDstReg()))
+    return false;
+
+  LiveInterval &IntA =
+    li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+  LiveInterval &IntB =
+    li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+
+  // BValNo is a value number in B that is defined by a copy from A.  'B3' in
+  // the example above.
+  LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
+  if (BLR == IntB.end()) return false;
+  VNInfo *BValNo = BLR->valno;
+
+  // Get the location that B is defined at.  Two options: either this value has
+  // an unknown definition point or it is defined at CopyIdx.  If unknown, we
+  // can't process it.
+  if (!BValNo->isDefByCopy()) return false;
+  assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+  // AValNo is the value number in A that defines the copy, A3 in the example.
+  SlotIndex CopyUseIdx = CopyIdx.getUseIndex();
+  LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
+  // The live range might not exist after fun with physreg coalescing.
+  if (ALR == IntA.end()) return false;
+  VNInfo *AValNo = ALR->valno;
+  // If it's re-defined by an early clobber somewhere in the live range, then
+  // it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
+  // See PR3149:
+  // 172     %ECX<def> = MOV32rr %reg1039<kill>
+  // 180     INLINEASM <es:subl $5,$1
+  //         sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9,
+  //         %EAX<kill>,
+  // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
+  // 188     %EAX<def> = MOV32rr %EAX<kill>
+  // 196     %ECX<def> = MOV32rr %ECX<kill>
+  // 204     %ECX<def> = MOV32rr %ECX<kill>
+  // 212     %EAX<def> = MOV32rr %EAX<kill>
+  // 220     %EAX<def> = MOV32rr %EAX
+  // 228     %reg1039<def> = MOV32rr %ECX<kill>
+  // The early clobber operand ties ECX input to the ECX def.
+  //
+  // The live interval of ECX is represented as this:
+  // %reg20,inf = [46,47:1)[174,230:0)  0@174-(230) 1@46-(47)
+  // The coalescer has no idea there was a def in the middle of [174,230].
+  if (AValNo->hasRedefByEC())
+    return false;
+
+  // If AValNo is defined as a copy from IntB, we can potentially process this.
+  // Get the instruction that defines this value number.
+  if (!CP.isCoalescable(AValNo->getCopy()))
+    return false;
+
+  // Get the LiveRange in IntB that this value number starts with.
+  LiveInterval::iterator ValLR =
+    IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot());
+  if (ValLR == IntB.end())
+    return false;
+
+  // Make sure that the end of the live range is inside the same block as
+  // CopyMI.
+  MachineInstr *ValLREndInst =
+    li_->getInstructionFromIndex(ValLR->end.getPrevSlot());
+  if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent())
+    return false;
+
+  // Okay, we now know that ValLR ends in the same block that the CopyMI
+  // live-range starts.  If there are no intervening live ranges between them in
+  // IntB, we can merge them.
+  if (ValLR+1 != BLR) return false;
+
+  // If a live interval is a physical register, conservatively check if any
+  // of its sub-registers is overlapping the live interval of the virtual
+  // register. If so, do not coalesce.
+  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg) &&
+      *tri_->getSubRegisters(IntB.reg)) {
+    for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
+      if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) {
+        DEBUG({
+            dbgs() << "\t\tInterfere with sub-register ";
+            li_->getInterval(*SR).print(dbgs(), tri_);
+          });
+        return false;
+      }
+  }
+
+  DEBUG({
+      dbgs() << "Extending: ";
+      IntB.print(dbgs(), tri_);
+    });
+
+  SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
+  // We are about to delete CopyMI, so need to remove it as the 'instruction
+  // that defines this value #'. Update the valnum with the new defining
+  // instruction #.
+  BValNo->def  = FillerStart;
+  BValNo->setCopy(0);
+
+  // Okay, we can merge them.  We need to insert a new liverange:
+  // [ValLR.end, BLR.begin) of either value number, then we merge the
+  // two value numbers.
+  IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
+
+  // If the IntB live range is assigned to a physical register, and if that
+  // physreg has sub-registers, update their live intervals as well.
+  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+    for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+      if (!li_->hasInterval(*SR))
+        continue;
+      LiveInterval &SRLI = li_->getInterval(*SR);
+      SRLI.addRange(LiveRange(FillerStart, FillerEnd,
+                              SRLI.getNextValue(FillerStart, 0,
+                                                li_->getVNInfoAllocator())));
+    }
+  }
+
+  // Okay, merge "B1" into the same value number as "B0".
+  if (BValNo != ValLR->valno) {
+    IntB.MergeValueNumberInto(BValNo, ValLR->valno);
+  }
+  DEBUG({
+      dbgs() << "   result = ";
+      IntB.print(dbgs(), tri_);
+      dbgs() << "\n";
+    });
+
+  // If the source instruction was killing the source register before the
+  // merge, unset the isKill marker given the live range has been extended.
+  int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
+  if (UIdx != -1) {
+    ValLREndInst->getOperand(UIdx).setIsKill(false);
+  }
+
+  // If the copy instruction was killing the destination register before the
+  // merge, find the last use and trim the live range. That will also add the
+  // isKill marker.
+  if (ALR->end == CopyIdx)
+    TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR);
+
+  ++numExtends;
+  return true;
+}
+
+/// HasOtherReachingDefs - Return true if there are definitions of IntB
+/// other than BValNo val# that can reach uses of AValno val# of IntA.
+bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
+                                                    LiveInterval &IntB,
+                                                    VNInfo *AValNo,
+                                                    VNInfo *BValNo) {
+  for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+       AI != AE; ++AI) {
+    if (AI->valno != AValNo) continue;
+    LiveInterval::Ranges::iterator BI =
+      std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start);
+    if (BI != IntB.ranges.begin())
+      --BI;
+    for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
+      if (BI->valno == BValNo)
+        continue;
+      if (BI->start <= AI->start && BI->end > AI->start)
+        return true;
+      if (BI->start > AI->start && BI->start < AI->end)
+        return true;
+    }
+  }
+  return false;
+}
+
+/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
+/// IntA being the source and IntB being the dest, thus this defines a value
+/// number in IntB.  If the source value number (in IntA) is defined by a
+/// commutable instruction and its other operand is coalesced to the copy dest
+/// register, see if we can transform the copy into a noop by commuting the
+/// definition. For example,
+///
+///  A3 = op A2 B0<kill>
+///    ...
+///  B1 = A3      <- this copy
+///    ...
+///     = op A3   <- more uses
+///
+/// ==>
+///
+///  B2 = op B0 A2<kill>
+///    ...
+///  B1 = B2      <- now an identify copy
+///    ...
+///     = op B2   <- more uses
+///
+/// This returns true if an interval was modified.
+///
+bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
+                                                        MachineInstr *CopyMI) {
+  // FIXME: For now, only eliminate the copy by commuting its def when the
+  // source register is a virtual register. We want to guard against cases
+  // where the copy is a back edge copy and commuting the def lengthen the
+  // live interval of the source register to the entire loop.
+  if (CP.isPhys() && CP.isFlipped())
+    return false;
+
+  // Bail if there is no dst interval.
+  if (!li_->hasInterval(CP.getDstReg()))
+    return false;
+
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+
+  LiveInterval &IntA =
+    li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+  LiveInterval &IntB =
+    li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
+  // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+  // the example above.
+  VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
+  if (!BValNo || !BValNo->isDefByCopy())
+    return false;
+
+  assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+  // AValNo is the value number in A that defines the copy, A3 in the example.
+  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex());
+  assert(AValNo && "COPY source not live");
+
+  // If other defs can reach uses of this def, then it's not safe to perform
+  // the optimization.
+  if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
+    return false;
+  MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
+  if (!DefMI)
+    return false;
+  const TargetInstrDesc &TID = DefMI->getDesc();
+  if (!TID.isCommutable())
+    return false;
+  // If DefMI is a two-address instruction then commuting it will change the
+  // destination register.
+  int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
+  assert(DefIdx != -1);
+  unsigned UseOpIdx;
+  if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
+    return false;
+  unsigned Op1, Op2, NewDstIdx;
+  if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2))
+    return false;
+  if (Op1 == UseOpIdx)
+    NewDstIdx = Op2;
+  else if (Op2 == UseOpIdx)
+    NewDstIdx = Op1;
+  else
+    return false;
+
+  MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+  unsigned NewReg = NewDstMO.getReg();
+  if (NewReg != IntB.reg || !NewDstMO.isKill())
+    return false;
+
+  // Make sure there are no other definitions of IntB that would reach the
+  // uses which the new definition can reach.
+  if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
+    return false;
+
+  // Abort if the aliases of IntB.reg have values that are not simply the
+  // clobbers from the superreg.
+  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
+    for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
+      if (li_->hasInterval(*AS) &&
+          HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0))
+        return false;
+
+  // If some of the uses of IntA.reg is already coalesced away, return false.
+  // It's not possible to determine whether it's safe to perform the coalescing.
+  for (MachineRegisterInfo::use_nodbg_iterator UI = 
+         mri_->use_nodbg_begin(IntA.reg), 
+       UE = mri_->use_nodbg_end(); UI != UE; ++UI) {
+    MachineInstr *UseMI = &*UI;
+    SlotIndex UseIdx = li_->getInstructionIndex(UseMI);
+    LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+    if (ULR == IntA.end())
+      continue;
+    if (ULR->valno == AValNo && JoinedCopies.count(UseMI))
+      return false;
+  }
+
+  DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t'
+               << *DefMI);
+
+  // At this point we have decided that it is legal to do this
+  // transformation.  Start by commuting the instruction.
+  MachineBasicBlock *MBB = DefMI->getParent();
+  MachineInstr *NewMI = tii_->commuteInstruction(DefMI);
+  if (!NewMI)
+    return false;
+  if (NewMI != DefMI) {
+    li_->ReplaceMachineInstrInMaps(DefMI, NewMI);
+    MBB->insert(DefMI, NewMI);
+    MBB->erase(DefMI);
+  }
+  unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
+  NewMI->getOperand(OpIdx).setIsKill();
+
+  // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
+  // A = or A, B
+  // ...
+  // B = A
+  // ...
+  // C = A<kill>
+  // ...
+  //   = B
+
+  // Update uses of IntA of the specific Val# with IntB.
+  for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
+         UE = mri_->use_end(); UI != UE;) {
+    MachineOperand &UseMO = UI.getOperand();
+    MachineInstr *UseMI = &*UI;
+    ++UI;
+    if (JoinedCopies.count(UseMI))
+      continue;
+    if (UseMI->isDebugValue()) {
+      // FIXME These don't have an instruction index.  Not clear we have enough
+      // info to decide whether to do this replacement or not.  For now do it.
+      UseMO.setReg(NewReg);
+      continue;
+    }
+    SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex();
+    LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+    if (ULR == IntA.end() || ULR->valno != AValNo)
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+      UseMO.substPhysReg(NewReg, *tri_);
+    else
+      UseMO.setReg(NewReg);
+    if (UseMI == CopyMI)
+      continue;
+    if (!UseMI->isCopy())
+      continue;
+    if (UseMI->getOperand(0).getReg() != IntB.reg ||
+        UseMI->getOperand(0).getSubReg())
+      continue;
+
+    // This copy will become a noop. If it's defining a new val#, merge it into
+    // BValNo.
+    SlotIndex DefIdx = UseIdx.getDefIndex();
+    VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
+    if (!DVNI)
+      continue;
+    DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
+    assert(DVNI->def == DefIdx);
+    BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
+    JoinedCopies.insert(UseMI);
+  }
+
+  // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
+  // is updated.
+  VNInfo *ValNo = BValNo;
+  ValNo->def = AValNo->def;
+  ValNo->setCopy(0);
+  for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+       AI != AE; ++AI) {
+    if (AI->valno != AValNo) continue;
+    IntB.addRange(LiveRange(AI->start, AI->end, ValNo));
+  }
+  DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
+
+  IntA.removeValNo(AValNo);
+  DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
+  ++numCommutes;
+  return true;
+}
+
+/// isSameOrFallThroughBB - Return true if MBB == SuccMBB or MBB simply
+/// fallthoughs to SuccMBB.
+static bool isSameOrFallThroughBB(MachineBasicBlock *MBB,
+                                  MachineBasicBlock *SuccMBB,
+                                  const TargetInstrInfo *tii_) {
+  if (MBB == SuccMBB)
+    return true;
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  return !tii_->AnalyzeBranch(*MBB, TBB, FBB, Cond) && !TBB && !FBB &&
+    MBB->isSuccessor(SuccMBB);
+}
+
+/// removeRange - Wrapper for LiveInterval::removeRange. This removes a range
+/// from a physical register live interval as well as from the live intervals
+/// of its sub-registers.
+static void removeRange(LiveInterval &li,
+                        SlotIndex Start, SlotIndex End,
+                        LiveIntervals *li_, const TargetRegisterInfo *tri_) {
+  li.removeRange(Start, End, true);
+  if (TargetRegisterInfo::isPhysicalRegister(li.reg)) {
+    for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) {
+      if (!li_->hasInterval(*SR))
+        continue;
+      LiveInterval &sli = li_->getInterval(*SR);
+      SlotIndex RemoveStart = Start;
+      SlotIndex RemoveEnd = Start;
+
+      while (RemoveEnd != End) {
+        LiveInterval::iterator LR = sli.FindLiveRangeContaining(RemoveStart);
+        if (LR == sli.end())
+          break;
+        RemoveEnd = (LR->end < End) ? LR->end : End;
+        sli.removeRange(RemoveStart, RemoveEnd, true);
+        RemoveStart = RemoveEnd;
+      }
+    }
+  }
+}
+
+/// TrimLiveIntervalToLastUse - If there is a last use in the same basic block
+/// as the copy instruction, trim the live interval to the last use and return
+/// true.
+bool
+SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
+                                                    MachineBasicBlock *CopyMBB,
+                                                    LiveInterval &li,
+                                                    const LiveRange *LR) {
+  SlotIndex MBBStart = li_->getMBBStartIdx(CopyMBB);
+  SlotIndex LastUseIdx;
+  MachineOperand *LastUse =
+    lastRegisterUse(LR->start, CopyIdx.getPrevSlot(), li.reg, LastUseIdx);
+  if (LastUse) {
+    MachineInstr *LastUseMI = LastUse->getParent();
+    if (!isSameOrFallThroughBB(LastUseMI->getParent(), CopyMBB, tii_)) {
+      // r1024 = op
+      // ...
+      // BB1:
+      //       = r1024
+      //
+      // BB2:
+      // r1025<dead> = r1024<kill>
+      if (MBBStart < LR->end)
+        removeRange(li, MBBStart, LR->end, li_, tri_);
+      return true;
+    }
+
+    // There are uses before the copy, just shorten the live range to the end
+    // of last use.
+    LastUse->setIsKill();
+    removeRange(li, LastUseIdx.getDefIndex(), LR->end, li_, tri_);
+    if (LastUseMI->isCopy()) {
+      MachineOperand &DefMO = LastUseMI->getOperand(0);
+      if (DefMO.getReg() == li.reg && !DefMO.getSubReg())
+        DefMO.setIsDead();
+    }
+    return true;
+  }
+
+  // Is it livein?
+  if (LR->start <= MBBStart && LR->end > MBBStart) {
+    if (LR->start == li_->getZeroIndex()) {
+      assert(TargetRegisterInfo::isPhysicalRegister(li.reg));
+      // Live-in to the function but dead. Remove it from entry live-in set.
+      mf_->begin()->removeLiveIn(li.reg);
+    }
+    // FIXME: Shorten intervals in BBs that reaches this BB.
+  }
+
+  return false;
+}
+
+/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+/// computation, replace the copy by rematerialize the definition.
+bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
+                                                       bool preserveSrcInt,
+                                                       unsigned DstReg,
+                                                       unsigned DstSubIdx,
+                                                       MachineInstr *CopyMI) {
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex();
+  LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
+  assert(SrcLR != SrcInt.end() && "Live range not found!");
+  VNInfo *ValNo = SrcLR->valno;
+  // If other defs can reach uses of this def, then it's not safe to perform
+  // the optimization.
+  if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill())
+    return false;
+  MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
+  if (!DefMI)
+    return false;
+  assert(DefMI && "Defining instruction disappeared");
+  const TargetInstrDesc &TID = DefMI->getDesc();
+  if (!TID.isAsCheapAsAMove())
+    return false;
+  if (!tii_->isTriviallyReMaterializable(DefMI, AA))
+    return false;
+  bool SawStore = false;
+  if (!DefMI->isSafeToMove(tii_, AA, SawStore))
+    return false;
+  if (TID.getNumDefs() != 1)
+    return false;
+  if (!DefMI->isImplicitDef()) {
+    // Make sure the copy destination register class fits the instruction
+    // definition register class. The mismatch can happen as a result of earlier
+    // extract_subreg, insert_subreg, subreg_to_reg coalescing.
+    const TargetRegisterClass *RC = TID.OpInfo[0].getRegClass(tri_);
+    if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+      if (mri_->getRegClass(DstReg) != RC)
+        return false;
+    } else if (!RC->contains(DstReg))
+      return false;
+  }
+
+  // If destination register has a sub-register index on it, make sure it
+  // matches the instruction register class.
+  if (DstSubIdx) {
+    const TargetInstrDesc &TID = DefMI->getDesc();
+    if (TID.getNumDefs() != 1)
+      return false;
+    const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg);
+    const TargetRegisterClass *DstSubRC =
+      DstRC->getSubRegisterRegClass(DstSubIdx);
+    const TargetRegisterClass *DefRC = TID.OpInfo[0].getRegClass(tri_);
+    if (DefRC == DstRC)
+      DstSubIdx = 0;
+    else if (DefRC != DstSubRC)
+      return false;
+  }
+
+  RemoveCopyFlag(DstReg, CopyMI);
+
+  MachineBasicBlock *MBB = CopyMI->getParent();
+  MachineBasicBlock::iterator MII =
+    llvm::next(MachineBasicBlock::iterator(CopyMI));
+  tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_);
+  MachineInstr *NewMI = prior(MII);
+
+  // CopyMI may have implicit operands, transfer them over to the newly
+  // rematerialized instruction. And update implicit def interval valnos.
+  for (unsigned i = CopyMI->getDesc().getNumOperands(),
+         e = CopyMI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = CopyMI->getOperand(i);
+    if (MO.isReg() && MO.isImplicit())
+      NewMI->addOperand(MO);
+    if (MO.isDef())
+      RemoveCopyFlag(MO.getReg(), CopyMI);
+  }
+
+  NewMI->copyImplicitOps(CopyMI);
+  li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+  CopyMI->eraseFromParent();
+  ReMatCopies.insert(CopyMI);
+  ReMatDefs.insert(DefMI);
+  DEBUG(dbgs() << "Remat: " << *NewMI);
+  ++NumReMats;
+
+  // The source interval can become smaller because we removed a use.
+  if (preserveSrcInt)
+    li_->shrinkToUses(&SrcInt);
+
+  return true;
+}
+
+/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+/// update the subregister number if it is not zero. If DstReg is a
+/// physical register and the existing subregister number of the def / use
+/// being updated is not zero, make sure to set it to the correct physical
+/// subregister.
+void
+SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
+  bool DstIsPhys = CP.isPhys();
+  unsigned SrcReg = CP.getSrcReg();
+  unsigned DstReg = CP.getDstReg();
+  unsigned SubIdx = CP.getSubIdx();
+
+  // Update LiveDebugVariables.
+  ldv_->renameRegister(SrcReg, DstReg, SubIdx);
+
+  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg);
+       MachineInstr *UseMI = I.skipInstruction();) {
+    // A PhysReg copy that won't be coalesced can perhaps be rematerialized
+    // instead.
+    if (DstIsPhys) {
+      if (UseMI->isCopy() &&
+          !UseMI->getOperand(1).getSubReg() &&
+          !UseMI->getOperand(0).getSubReg() &&
+          UseMI->getOperand(1).getReg() == SrcReg &&
+          UseMI->getOperand(0).getReg() != SrcReg &&
+          UseMI->getOperand(0).getReg() != DstReg &&
+          !JoinedCopies.count(UseMI) &&
+          ReMaterializeTrivialDef(li_->getInterval(SrcReg), false,
+                                  UseMI->getOperand(0).getReg(), 0, UseMI))
+        continue;
+    }
+
+    SmallVector<unsigned,8> Ops;
+    bool Reads, Writes;
+    tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
+    bool Kills = false, Deads = false;
+
+    // Replace SrcReg with DstReg in all UseMI operands.
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+      MachineOperand &MO = UseMI->getOperand(Ops[i]);
+      Kills |= MO.isKill();
+      Deads |= MO.isDead();
+
+      if (DstIsPhys)
+        MO.substPhysReg(DstReg, *tri_);
+      else
+        MO.substVirtReg(DstReg, SubIdx, *tri_);
+    }
+
+    // This instruction is a copy that will be removed.
+    if (JoinedCopies.count(UseMI))
+      continue;
+
+    if (SubIdx) {
+      // If UseMI was a simple SrcReg def, make sure we didn't turn it into a
+      // read-modify-write of DstReg.
+      if (Deads)
+        UseMI->addRegisterDead(DstReg, tri_);
+      else if (!Reads && Writes)
+        UseMI->addRegisterDefined(DstReg, tri_);
+
+      // Kill flags apply to the whole physical register.
+      if (DstIsPhys && Kills)
+        UseMI->addRegisterKilled(DstReg, tri_);
+    }
+
+    DEBUG({
+        dbgs() << "\t\tupdated: ";
+        if (!UseMI->isDebugValue())
+          dbgs() << li_->getInstructionIndex(UseMI) << "\t";
+        dbgs() << *UseMI;
+      });
+  }
+}
+
+/// removeIntervalIfEmpty - Check if the live interval of a physical register
+/// is empty, if so remove it and also remove the empty intervals of its
+/// sub-registers. Return true if live interval is removed.
+static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_,
+                                  const TargetRegisterInfo *tri_) {
+  if (li.empty()) {
+    if (TargetRegisterInfo::isPhysicalRegister(li.reg))
+      for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) {
+        if (!li_->hasInterval(*SR))
+          continue;
+        LiveInterval &sli = li_->getInterval(*SR);
+        if (sli.empty())
+          li_->removeInterval(*SR);
+      }
+    li_->removeInterval(li.reg);
+    return true;
+  }
+  return false;
+}
+
+/// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy.
+/// Return true if live interval is removed.
+bool SimpleRegisterCoalescing::ShortenDeadCopyLiveRange(LiveInterval &li,
+                                                        MachineInstr *CopyMI) {
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI);
+  LiveInterval::iterator MLR =
+    li.FindLiveRangeContaining(CopyIdx.getDefIndex());
+  if (MLR == li.end())
+    return false;  // Already removed by ShortenDeadCopySrcLiveRange.
+  SlotIndex RemoveStart = MLR->start;
+  SlotIndex RemoveEnd = MLR->end;
+  SlotIndex DefIdx = CopyIdx.getDefIndex();
+  // Remove the liverange that's defined by this.
+  if (RemoveStart == DefIdx && RemoveEnd == DefIdx.getStoreIndex()) {
+    removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
+    return removeIntervalIfEmpty(li, li_, tri_);
+  }
+  return false;
+}
+
+/// RemoveDeadDef - If a def of a live interval is now determined dead, remove
+/// the val# it defines. If the live interval becomes empty, remove it as well.
+bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li,
+                                             MachineInstr *DefMI) {
+  SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex();
+  LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
+  if (DefIdx != MLR->valno->def)
+    return false;
+  li.removeValNo(MLR->valno);
+  return removeIntervalIfEmpty(li, li_, tri_);
+}
+
+void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg,
+                                              const MachineInstr *CopyMI) {
+  SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+  if (li_->hasInterval(DstReg)) {
+    LiveInterval &LI = li_->getInterval(DstReg);
+    if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
+      if (LR->valno->def == DefIdx)
+        LR->valno->setCopy(0);
+  }
+  if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
+    return;
+  for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) {
+    if (!li_->hasInterval(*AS))
+      continue;
+    LiveInterval &LI = li_->getInterval(*AS);
+    if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
+      if (LR->valno->def == DefIdx)
+        LR->valno->setCopy(0);
+  }
+}
+
+/// PropagateDeadness - Propagate the dead marker to the instruction which
+/// defines the val#.
+static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI,
+                              SlotIndex &LRStart, LiveIntervals *li_,
+                              const TargetRegisterInfo* tri_) {
+  MachineInstr *DefMI =
+    li_->getInstructionFromIndex(LRStart.getDefIndex());
+  if (DefMI && DefMI != CopyMI) {
+    int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg);
+    if (DeadIdx != -1)
+      DefMI->getOperand(DeadIdx).setIsDead();
+    else
+      DefMI->addOperand(MachineOperand::CreateReg(li.reg,
+                   /*def*/true, /*implicit*/true, /*kill*/false, /*dead*/true));
+    LRStart = LRStart.getNextSlot();
+  }
+}
+
+/// ShortenDeadCopySrcLiveRange - Shorten a live range as it's artificially
+/// extended by a dead copy. Mark the last use (if any) of the val# as kill as
+/// ends the live range there. If there isn't another use, then this live range
+/// is dead. Return true if live interval is removed.
+bool
+SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
+                                                      MachineInstr *CopyMI) {
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI);
+  if (CopyIdx == SlotIndex()) {
+    // FIXME: special case: function live in. It can be a general case if the
+    // first instruction index starts at > 0 value.
+    assert(TargetRegisterInfo::isPhysicalRegister(li.reg));
+    // Live-in to the function but dead. Remove it from entry live-in set.
+    if (mf_->begin()->isLiveIn(li.reg))
+      mf_->begin()->removeLiveIn(li.reg);
+    if (const LiveRange *LR = li.getLiveRangeContaining(CopyIdx))
+      removeRange(li, LR->start, LR->end, li_, tri_);
+    return removeIntervalIfEmpty(li, li_, tri_);
+  }
+
+  LiveInterval::iterator LR =
+    li.FindLiveRangeContaining(CopyIdx.getPrevIndex().getStoreIndex());
+  if (LR == li.end())
+    // Livein but defined by a phi.
+    return false;
+
+  SlotIndex RemoveStart = LR->start;
+  SlotIndex RemoveEnd = CopyIdx.getStoreIndex();
+  if (LR->end > RemoveEnd)
+    // More uses past this copy? Nothing to do.
+    return false;
+
+  // If there is a last use in the same bb, we can't remove the live range.
+  // Shorten the live interval and return.
+  MachineBasicBlock *CopyMBB = CopyMI->getParent();
+  if (TrimLiveIntervalToLastUse(CopyIdx, CopyMBB, li, LR))
+    return false;
+
+  // There are other kills of the val#. Nothing to do.
+  if (!li.isOnlyLROfValNo(LR))
+    return false;
+
+  MachineBasicBlock *StartMBB = li_->getMBBFromIndex(RemoveStart);
+  if (!isSameOrFallThroughBB(StartMBB, CopyMBB, tii_))
+    // If the live range starts in another mbb and the copy mbb is not a fall
+    // through mbb, then we can only cut the range from the beginning of the
+    // copy mbb.
+    RemoveStart = li_->getMBBStartIdx(CopyMBB).getNextIndex().getBaseIndex();
+
+  if (LR->valno->def == RemoveStart) {
+    // If the def MI defines the val# and this copy is the only kill of the
+    // val#, then propagate the dead marker.
+    PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_);
+    ++numDeadValNo;
+  }
+
+  removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
+  return removeIntervalIfEmpty(li, li_, tri_);
+}
+
+
+/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+/// two virtual registers from different register classes.
+bool
+SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg,
+                                                unsigned DstReg,
+                                             const TargetRegisterClass *SrcRC,
+                                             const TargetRegisterClass *DstRC,
+                                             const TargetRegisterClass *NewRC) {
+  unsigned NewRCCount = allocatableRCRegs_[NewRC].count();
+  // This heuristics is good enough in practice, but it's obviously not *right*.
+  // 4 is a magic number that works well enough for x86, ARM, etc. It filter
+  // out all but the most restrictive register classes.
+  if (NewRCCount > 4 ||
+      // Early exit if the function is fairly small, coalesce aggressively if
+      // that's the case. For really special register classes with 3 or
+      // fewer registers, be a bit more careful.
+      (li_->getFuncInstructionCount() / NewRCCount) < 8)
+    return true;
+  LiveInterval &SrcInt = li_->getInterval(SrcReg);
+  LiveInterval &DstInt = li_->getInterval(DstReg);
+  unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt);
+  unsigned DstSize = li_->getApproximateInstructionCount(DstInt);
+  if (SrcSize <= NewRCCount && DstSize <= NewRCCount)
+    return true;
+  // Estimate *register use density*. If it doubles or more, abort.
+  unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg),
+                                   mri_->use_nodbg_end());
+  unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg),
+                                   mri_->use_nodbg_end());
+  unsigned NewUses = SrcUses + DstUses;
+  unsigned NewSize = SrcSize + DstSize;
+  if (SrcRC != NewRC && SrcSize > NewRCCount) {
+    unsigned SrcRCCount = allocatableRCRegs_[SrcRC].count();
+    if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount)
+      return false;
+  }
+  if (DstRC != NewRC && DstSize > NewRCCount) {
+    unsigned DstRCCount = allocatableRCRegs_[DstRC].count();
+    if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount)
+      return false;
+  }
+  return true;
+}
+
+
+/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// which are the src/dst of the copy instruction CopyMI.  This returns true
+/// if the copy was successfully coalesced away. If it is not currently
+/// possible to coalesce this interval, but it may be possible if other
+/// things get coalesced, then it returns true by reference in 'Again'.
+bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
+  MachineInstr *CopyMI = TheCopy.MI;
+
+  Again = false;
+  if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
+    return false; // Already done.
+
+  DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
+
+  CoalescerPair CP(*tii_, *tri_);
+  if (!CP.setRegisters(CopyMI)) {
+    DEBUG(dbgs() << "\tNot coalescable.\n");
+    return false;
+  }
+
+  // If they are already joined we continue.
+  if (CP.getSrcReg() == CP.getDstReg()) {
+    DEBUG(dbgs() << "\tCopy already coalesced.\n");
+    return false;  // Not coalescable.
+  }
+
+  if (DisablePhysicalJoin && CP.isPhys()) {
+    DEBUG(dbgs() << "\tPhysical joins disabled.\n");
+    return false;
+  }
+
+  DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_));
+
+  // Enforce policies.
+  if (CP.isPhys()) {
+    DEBUG(dbgs() <<" with physreg " << PrintReg(CP.getDstReg(), tri_) << "\n");
+    // Only coalesce to allocatable physreg.
+    if (!li_->isAllocatable(CP.getDstReg())) {
+      DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
+      return false;  // Not coalescable.
+    }
+  } else {
+    DEBUG(dbgs() << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx())
+                 << " to " << CP.getNewRC()->getName() << "\n");
+
+    // Avoid constraining virtual register regclass too much.
+    if (CP.isCrossClass()) {
+      if (DisableCrossClassJoin) {
+        DEBUG(dbgs() << "\tCross-class joins disabled.\n");
+        return false;
+      }
+      if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(),
+                                 mri_->getRegClass(CP.getSrcReg()),
+                                 mri_->getRegClass(CP.getDstReg()),
+                                 CP.getNewRC())) {
+        DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: "
+                     << CP.getNewRC()->getName() << ".\n");
+        Again = true;  // May be possible to coalesce later.
+        return false;
+      }
+    }
+
+    // When possible, let DstReg be the larger interval.
+    if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() >
+                           li_->getInterval(CP.getDstReg()).ranges.size())
+      CP.flip();
+  }
+
+  // We need to be careful about coalescing a source physical register with a
+  // virtual register. Once the coalescing is done, it cannot be broken and
+  // these are not spillable! If the destination interval uses are far away,
+  // think twice about coalescing them!
+  // FIXME: Why are we skipping this test for partial copies?
+  //        CodeGen/X86/phys_subreg_coalesce-3.ll needs it.
+  if (!CP.isPartial() && CP.isPhys()) {
+    LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg());
+
+    // Don't join with physregs that have a ridiculous number of live
+    // ranges. The data structure performance is really bad when that
+    // happens.
+    if (li_->hasInterval(CP.getDstReg()) &&
+        li_->getInterval(CP.getDstReg()).ranges.size() > 1000) {
+      ++numAborts;
+      DEBUG(dbgs()
+           << "\tPhysical register live interval too complicated, abort!\n");
+      return false;
+    }
+
+    const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
+    unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
+    unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
+    if (Length > Threshold) {
+      // Before giving up coalescing, if definition of source is defined by
+      // trivial computation, try rematerializing it.
+      if (!CP.isFlipped() &&
+          ReMaterializeTrivialDef(JoinVInt, true, CP.getDstReg(), 0, CopyMI))
+        return true;
+
+      ++numAborts;
+      DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
+      Again = true;  // May be possible to coalesce later.
+      return false;
+    }
+  }
+
+  // Okay, attempt to join these two intervals.  On failure, this returns false.
+  // Otherwise, if one of the intervals being joined is a physreg, this method
+  // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
+  // been modified, so we can use this information below to update aliases.
+  if (!JoinIntervals(CP)) {
+    // Coalescing failed.
+
+    // If definition of source is defined by trivial computation, try
+    // rematerializing it.
+    if (!CP.isFlipped() &&
+        ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
+                                CP.getDstReg(), 0, CopyMI))
+      return true;
+
+    // If we can eliminate the copy without merging the live ranges, do so now.
+    if (!CP.isPartial()) {
+      if (AdjustCopiesBackFrom(CP, CopyMI) ||
+          RemoveCopyByCommutingDef(CP, CopyMI)) {
+        JoinedCopies.insert(CopyMI);
+        DEBUG(dbgs() << "\tTrivial!\n");
+        return true;
+      }
+    }
+
+    // Otherwise, we are unable to join the intervals.
+    DEBUG(dbgs() << "\tInterference!\n");
+    Again = true;  // May be possible to coalesce later.
+    return false;
+  }
+
+  // Coalescing to a virtual register that is of a sub-register class of the
+  // other. Make sure the resulting register is set to the right register class.
+  if (CP.isCrossClass()) {
+    ++numCrossRCs;
+    mri_->setRegClass(CP.getDstReg(), CP.getNewRC());
+  }
+
+  // Remember to delete the copy instruction.
+  JoinedCopies.insert(CopyMI);
+
+  UpdateRegDefsUses(CP);
+
+  // If we have extended the live range of a physical register, make sure we
+  // update live-in lists as well.
+  if (CP.isPhys()) {
+    SmallVector<MachineBasicBlock*, 16> BlockSeq;
+    // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the
+    // ranges for this, and they are preserved.
+    LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg());
+    for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end();
+         I != E; ++I ) {
+      li_->findLiveInMBBs(I->start, I->end, BlockSeq);
+      for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) {
+        MachineBasicBlock &block = *BlockSeq[idx];
+        if (!block.isLiveIn(CP.getDstReg()))
+          block.addLiveIn(CP.getDstReg());
+      }
+      BlockSeq.clear();
+    }
+  }
+
+  // SrcReg is guarateed to be the register whose live interval that is
+  // being merged.
+  li_->removeInterval(CP.getSrcReg());
+
+  // Update regalloc hint.
+  tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_);
+
+  DEBUG({
+    LiveInterval &DstInt = li_->getInterval(CP.getDstReg());
+    dbgs() << "\tJoined. Result = ";
+    DstInt.print(dbgs(), tri_);
+    dbgs() << "\n";
+  });
+
+  ++numJoins;
+  return true;
+}
+
+/// ComputeUltimateVN - Assuming we are going to join two live intervals,
+/// compute what the resultant value numbers for each value in the input two
+/// ranges will be.  This is complicated by copies between the two which can
+/// and will commonly cause multiple value numbers to be merged into one.
+///
+/// VN is the value number that we're trying to resolve.  InstDefiningValue
+/// keeps track of the new InstDefiningValue assignment for the result
+/// LiveInterval.  ThisFromOther/OtherFromThis are sets that keep track of
+/// whether a value in this or other is a copy from the opposite set.
+/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have
+/// already been assigned.
+///
+/// ThisFromOther[x] - If x is defined as a copy from the other interval, this
+/// contains the value number the copy is from.
+///
+static unsigned ComputeUltimateVN(VNInfo *VNI,
+                                  SmallVector<VNInfo*, 16> &NewVNInfo,
+                                  DenseMap<VNInfo*, VNInfo*> &ThisFromOther,
+                                  DenseMap<VNInfo*, VNInfo*> &OtherFromThis,
+                                  SmallVector<int, 16> &ThisValNoAssignments,
+                                  SmallVector<int, 16> &OtherValNoAssignments) {
+  unsigned VN = VNI->id;
+
+  // If the VN has already been computed, just return it.
+  if (ThisValNoAssignments[VN] >= 0)
+    return ThisValNoAssignments[VN];
+  assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers");
+
+  // If this val is not a copy from the other val, then it must be a new value
+  // number in the destination.
+  DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI);
+  if (I == ThisFromOther.end()) {
+    NewVNInfo.push_back(VNI);
+    return ThisValNoAssignments[VN] = NewVNInfo.size()-1;
+  }
+  VNInfo *OtherValNo = I->second;
+
+  // Otherwise, this *is* a copy from the RHS.  If the other side has already
+  // been computed, return it.
+  if (OtherValNoAssignments[OtherValNo->id] >= 0)
+    return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id];
+
+  // Mark this value number as currently being computed, then ask what the
+  // ultimate value # of the other value is.
+  ThisValNoAssignments[VN] = -2;
+  unsigned UltimateVN =
+    ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther,
+                      OtherValNoAssignments, ThisValNoAssignments);
+  return ThisValNoAssignments[VN] = UltimateVN;
+}
+
+/// JoinIntervals - Attempt to join these two intervals.  On failure, this
+/// returns false.
+bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) {
+  LiveInterval &RHS = li_->getInterval(CP.getSrcReg());
+  DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; });
+
+  // If a live interval is a physical register, check for interference with any
+  // aliases. The interference check implemented here is a bit more conservative
+  // than the full interfeence check below. We allow overlapping live ranges
+  // only when one is a copy of the other.
+  if (CP.isPhys()) {
+    for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){
+      if (!li_->hasInterval(*AS))
+        continue;
+      const LiveInterval &LHS = li_->getInterval(*AS);
+      LiveInterval::const_iterator LI = LHS.begin();
+      for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end();
+           RI != RE; ++RI) {
+        LI = std::lower_bound(LI, LHS.end(), RI->start);
+        // Does LHS have an overlapping live range starting before RI?
+        if ((LI != LHS.begin() && LI[-1].end > RI->start) &&
+            (RI->start != RI->valno->def ||
+             !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) {
+          DEBUG({
+            dbgs() << "\t\tInterference from alias: ";
+            LHS.print(dbgs(), tri_);
+            dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n";
+          });
+          return false;
+        }
+
+        // Check that LHS ranges beginning in this range are copies.
+        for (; LI != LHS.end() && LI->start < RI->end; ++LI) {
+          if (LI->start != LI->valno->def ||
+              !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) {
+            DEBUG({
+              dbgs() << "\t\tInterference from alias: ";
+              LHS.print(dbgs(), tri_);
+              dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n";
+            });
+            return false;
+          }
+        }
+      }
+    }
+  }
+
+  // Compute the final value assignment, assuming that the live ranges can be
+  // coalesced.
+  SmallVector<int, 16> LHSValNoAssignments;
+  SmallVector<int, 16> RHSValNoAssignments;
+  DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS;
+  DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS;
+  SmallVector<VNInfo*, 16> NewVNInfo;
+
+  LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg());
+  DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; });
+
+  // Loop over the value numbers of the LHS, seeing if any are defined from
+  // the RHS.
+  for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+       i != e; ++i) {
+    VNInfo *VNI = *i;
+    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
+      continue;
+
+    // Never join with a register that has EarlyClobber redefs.
+    if (VNI->hasRedefByEC())
+      return false;
+
+    // DstReg is known to be a register in the LHS interval.  If the src is
+    // from the RHS interval, we can use its value #.
+    if (!CP.isCoalescable(VNI->getCopy()))
+      continue;
+
+    // Figure out the value # from the RHS.
+    LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+    // The copy could be to an aliased physreg.
+    if (!lr) continue;
+    LHSValsDefinedFromRHS[VNI] = lr->valno;
+  }
+
+  // Loop over the value numbers of the RHS, seeing if any are defined from
+  // the LHS.
+  for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+       i != e; ++i) {
+    VNInfo *VNI = *i;
+    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
+      continue;
+
+    // Never join with a register that has EarlyClobber redefs.
+    if (VNI->hasRedefByEC())
+      return false;
+
+    // DstReg is known to be a register in the RHS interval.  If the src is
+    // from the LHS interval, we can use its value #.
+    if (!CP.isCoalescable(VNI->getCopy()))
+      continue;
+
+    // Figure out the value # from the LHS.
+    LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+    // The copy could be to an aliased physreg.
+    if (!lr) continue;
+    RHSValsDefinedFromLHS[VNI] = lr->valno;
+  }
+
+  LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+  RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+  NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
+
+  for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+       i != e; ++i) {
+    VNInfo *VNI = *i;
+    unsigned VN = VNI->id;
+    if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+      continue;
+    ComputeUltimateVN(VNI, NewVNInfo,
+                      LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
+                      LHSValNoAssignments, RHSValNoAssignments);
+  }
+  for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+       i != e; ++i) {
+    VNInfo *VNI = *i;
+    unsigned VN = VNI->id;
+    if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+      continue;
+    // If this value number isn't a copy from the LHS, it's a new number.
+    if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
+      NewVNInfo.push_back(VNI);
+      RHSValNoAssignments[VN] = NewVNInfo.size()-1;
+      continue;
+    }
+
+    ComputeUltimateVN(VNI, NewVNInfo,
+                      RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
+                      RHSValNoAssignments, LHSValNoAssignments);
+  }
+
+  // Armed with the mappings of LHS/RHS values to ultimate values, walk the
+  // interval lists to see if these intervals are coalescable.
+  LiveInterval::const_iterator I = LHS.begin();
+  LiveInterval::const_iterator IE = LHS.end();
+  LiveInterval::const_iterator J = RHS.begin();
+  LiveInterval::const_iterator JE = RHS.end();
+
+  // Skip ahead until the first place of potential sharing.
+  if (I != IE && J != JE) {
+    if (I->start < J->start) {
+      I = std::upper_bound(I, IE, J->start);
+      if (I != LHS.begin()) --I;
+    } else if (J->start < I->start) {
+      J = std::upper_bound(J, JE, I->start);
+      if (J != RHS.begin()) --J;
+    }
+  }
+
+  while (I != IE && J != JE) {
+    // Determine if these two live ranges overlap.
+    bool Overlaps;
+    if (I->start < J->start) {
+      Overlaps = I->end > J->start;
+    } else {
+      Overlaps = J->end > I->start;
+    }
+
+    // If so, check value # info to determine if they are really different.
+    if (Overlaps) {
+      // If the live range overlap will map to the same value number in the
+      // result liverange, we can still coalesce them.  If not, we can't.
+      if (LHSValNoAssignments[I->valno->id] !=
+          RHSValNoAssignments[J->valno->id])
+        return false;
+      // If it's re-defined by an early clobber somewhere in the live range,
+      // then conservatively abort coalescing.
+      if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC())
+        return false;
+    }
+
+    if (I->end < J->end)
+      ++I;
+    else
+      ++J;
+  }
+
+  // Update kill info. Some live ranges are extended due to copy coalescing.
+  for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(),
+         E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
+    VNInfo *VNI = I->first;
+    unsigned LHSValID = LHSValNoAssignments[VNI->id];
+    if (VNI->hasPHIKill())
+      NewVNInfo[LHSValID]->setHasPHIKill(true);
+  }
+
+  // Update kill info. Some live ranges are extended due to copy coalescing.
+  for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(),
+         E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
+    VNInfo *VNI = I->first;
+    unsigned RHSValID = RHSValNoAssignments[VNI->id];
+    if (VNI->hasPHIKill())
+      NewVNInfo[RHSValID]->setHasPHIKill(true);
+  }
+
+  if (LHSValNoAssignments.empty())
+    LHSValNoAssignments.push_back(-1);
+  if (RHSValNoAssignments.empty())
+    RHSValNoAssignments.push_back(-1);
+
+  // If we get here, we know that we can coalesce the live ranges.  Ask the
+  // intervals to coalesce themselves now.
+  LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
+           mri_);
+  return true;
+}
+
+namespace {
+  // DepthMBBCompare - Comparison predicate that sort first based on the loop
+  // depth of the basic block (the unsigned), and then on the MBB number.
+  struct DepthMBBCompare {
+    typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
+    bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
+      // Deeper loops first
+      if (LHS.first != RHS.first)
+        return LHS.first > RHS.first;
+
+      // Prefer blocks that are more connected in the CFG. This takes care of
+      // the most difficult copies first while intervals are short.
+      unsigned cl = LHS.second->pred_size() + LHS.second->succ_size();
+      unsigned cr = RHS.second->pred_size() + RHS.second->succ_size();
+      if (cl != cr)
+        return cl > cr;
+
+      // As a last resort, sort by block number.
+      return LHS.second->getNumber() < RHS.second->getNumber();
+    }
+  };
+}
+
+void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
+                                               std::vector<CopyRec> &TryAgain) {
+  DEBUG(dbgs() << MBB->getName() << ":\n");
+
+  SmallVector<CopyRec, 8> VirtCopies;
+  SmallVector<CopyRec, 8> PhysCopies;
+  SmallVector<CopyRec, 8> ImpDefCopies;
+  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+       MII != E;) {
+    MachineInstr *Inst = MII++;
+
+    // If this isn't a copy nor a extract_subreg, we can't join intervals.
+    unsigned SrcReg, DstReg;
+    if (Inst->isCopy()) {
+      DstReg = Inst->getOperand(0).getReg();
+      SrcReg = Inst->getOperand(1).getReg();
+    } else if (Inst->isSubregToReg()) {
+      DstReg = Inst->getOperand(0).getReg();
+      SrcReg = Inst->getOperand(2).getReg();
+    } else
+      continue;
+
+    bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+    bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+    if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
+      ImpDefCopies.push_back(CopyRec(Inst, 0));
+    else if (SrcIsPhys || DstIsPhys)
+      PhysCopies.push_back(CopyRec(Inst, 0));
+    else
+      VirtCopies.push_back(CopyRec(Inst, 0));
+  }
+
+  // Try coalescing implicit copies and insert_subreg <undef> first,
+  // followed by copies to / from physical registers, then finally copies
+  // from virtual registers to virtual registers.
+  for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) {
+    CopyRec &TheCopy = ImpDefCopies[i];
+    bool Again = false;
+    if (!JoinCopy(TheCopy, Again))
+      if (Again)
+        TryAgain.push_back(TheCopy);
+  }
+  for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) {
+    CopyRec &TheCopy = PhysCopies[i];
+    bool Again = false;
+    if (!JoinCopy(TheCopy, Again))
+      if (Again)
+        TryAgain.push_back(TheCopy);
+  }
+  for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) {
+    CopyRec &TheCopy = VirtCopies[i];
+    bool Again = false;
+    if (!JoinCopy(TheCopy, Again))
+      if (Again)
+        TryAgain.push_back(TheCopy);
+  }
+}
+
+void SimpleRegisterCoalescing::joinIntervals() {
+  DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
+
+  std::vector<CopyRec> TryAgainList;
+  if (loopInfo->empty()) {
+    // If there are no loops in the function, join intervals in function order.
+    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();
+         I != E; ++I)
+      CopyCoalesceInMBB(I, TryAgainList);
+  } else {
+    // Otherwise, join intervals in inner loops before other intervals.
+    // Unfortunately we can't just iterate over loop hierarchy here because
+    // there may be more MBB's than BB's.  Collect MBB's for sorting.
+
+    // Join intervals in the function prolog first. We want to join physical
+    // registers with virtual registers before the intervals got too long.
+    std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
+    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){
+      MachineBasicBlock *MBB = I;
+      MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I));
+    }
+
+    // Sort by loop depth.
+    std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
+
+    // Finally, join intervals in loop nest order.
+    for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
+      CopyCoalesceInMBB(MBBs[i].second, TryAgainList);
+  }
+
+  // Joining intervals can allow other intervals to be joined.  Iteratively join
+  // until we make no progress.
+  bool ProgressMade = true;
+  while (ProgressMade) {
+    ProgressMade = false;
+
+    for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
+      CopyRec &TheCopy = TryAgainList[i];
+      if (!TheCopy.MI)
+        continue;
+
+      bool Again = false;
+      bool Success = JoinCopy(TheCopy, Again);
+      if (Success || !Again) {
+        TheCopy.MI = 0;   // Mark this one as done.
+        ProgressMade = true;
+      }
+    }
+  }
+}
+
+/// Return true if the two specified registers belong to different register
+/// classes.  The registers may be either phys or virt regs.
+bool
+SimpleRegisterCoalescing::differingRegisterClasses(unsigned RegA,
+                                                   unsigned RegB) const {
+  // Get the register classes for the first reg.
+  if (TargetRegisterInfo::isPhysicalRegister(RegA)) {
+    assert(TargetRegisterInfo::isVirtualRegister(RegB) &&
+           "Shouldn't consider two physregs!");
+    return !mri_->getRegClass(RegB)->contains(RegA);
+  }
+
+  // Compare against the regclass for the second reg.
+  const TargetRegisterClass *RegClassA = mri_->getRegClass(RegA);
+  if (TargetRegisterInfo::isVirtualRegister(RegB)) {
+    const TargetRegisterClass *RegClassB = mri_->getRegClass(RegB);
+    return RegClassA != RegClassB;
+  }
+  return !RegClassA->contains(RegB);
+}
+
+/// lastRegisterUse - Returns the last (non-debug) use of the specific register
+/// between cycles Start and End or NULL if there are no uses.
+MachineOperand *
+SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
+                                          SlotIndex End,
+                                          unsigned Reg,
+                                          SlotIndex &UseIdx) const{
+  UseIdx = SlotIndex();
+  if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    MachineOperand *LastUse = NULL;
+    for (MachineRegisterInfo::use_nodbg_iterator I = mri_->use_nodbg_begin(Reg),
+           E = mri_->use_nodbg_end(); I != E; ++I) {
+      MachineOperand &Use = I.getOperand();
+      MachineInstr *UseMI = Use.getParent();
+      if (UseMI->isIdentityCopy())
+        continue;
+      SlotIndex Idx = li_->getInstructionIndex(UseMI);
+      if (Idx >= Start && Idx < End && (!UseIdx.isValid() || Idx >= UseIdx)) {
+        LastUse = &Use;
+        UseIdx = Idx.getUseIndex();
+      }
+    }
+    return LastUse;
+  }
+
+  SlotIndex s = Start;
+  SlotIndex e = End.getPrevSlot().getBaseIndex();
+  while (e >= s) {
+    // Skip deleted instructions
+    MachineInstr *MI = li_->getInstructionFromIndex(e);
+    while (e != SlotIndex() && e.getPrevIndex() >= s && !MI) {
+      e = e.getPrevIndex();
+      MI = li_->getInstructionFromIndex(e);
+    }
+    if (e < s || MI == NULL)
+      return NULL;
+
+    // Ignore identity copies.
+    if (!MI->isIdentityCopy())
+      for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
+        MachineOperand &Use = MI->getOperand(i);
+        if (Use.isReg() && Use.isUse() && Use.getReg() &&
+            tri_->regsOverlap(Use.getReg(), Reg)) {
+          UseIdx = e.getUseIndex();
+          return &Use;
+        }
+      }
+
+    e = e.getPrevIndex();
+  }
+
+  return NULL;
+}
+
+void SimpleRegisterCoalescing::releaseMemory() {
+  JoinedCopies.clear();
+  ReMatCopies.clear();
+  ReMatDefs.clear();
+}
+
+bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
+  mf_ = &fn;
+  mri_ = &fn.getRegInfo();
+  tm_ = &fn.getTarget();
+  tri_ = tm_->getRegisterInfo();
+  tii_ = tm_->getInstrInfo();
+  li_ = &getAnalysis<LiveIntervals>();
+  ldv_ = &getAnalysis<LiveDebugVariables>();
+  AA = &getAnalysis<AliasAnalysis>();
+  loopInfo = &getAnalysis<MachineLoopInfo>();
+
+  DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
+               << "********** Function: "
+               << ((Value*)mf_->getFunction())->getName() << '\n');
+
+  if (VerifyCoalescing)
+    mf_->verify(this, "Before register coalescing");
+
+  for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(),
+         E = tri_->regclass_end(); I != E; ++I)
+    allocatableRCRegs_.insert(std::make_pair(*I,
+                                             tri_->getAllocatableSet(fn, *I)));
+
+  // Join (coalesce) intervals if requested.
+  if (EnableJoining) {
+    joinIntervals();
+    DEBUG({
+        dbgs() << "********** INTERVALS POST JOINING **********\n";
+        for (LiveIntervals::iterator I = li_->begin(), E = li_->end();
+             I != E; ++I){
+          I->second->print(dbgs(), tri_);
+          dbgs() << "\n";
+        }
+      });
+  }
+
+  // Perform a final pass over the instructions and compute spill weights
+  // and remove identity moves.
+  SmallVector<unsigned, 4> DeadDefs;
+  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+       mbbi != mbbe; ++mbbi) {
+    MachineBasicBlock* mbb = mbbi;
+    for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
+         mii != mie; ) {
+      MachineInstr *MI = mii;
+      if (JoinedCopies.count(MI)) {
+        // Delete all coalesced copies.
+        bool DoDelete = true;
+        assert(MI->isCopyLike() && "Unrecognized copy instruction");
+        unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
+        if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+            MI->getNumOperands() > 2)
+          // Do not delete extract_subreg, insert_subreg of physical
+          // registers unless the definition is dead. e.g.
+          // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
+          // or else the scavenger may complain. LowerSubregs will
+          // delete them later.
+          DoDelete = false;
+        
+        if (MI->allDefsAreDead()) {
+          if (li_->hasInterval(SrcReg)) {
+            LiveInterval &li = li_->getInterval(SrcReg);
+            if (!ShortenDeadCopySrcLiveRange(li, MI))
+              ShortenDeadCopyLiveRange(li, MI);
+          }
+          DoDelete = true;
+        }
+        if (!DoDelete) {
+          // We need the instruction to adjust liveness, so make it a KILL.
+          if (MI->isSubregToReg()) {
+            MI->RemoveOperand(3);
+            MI->RemoveOperand(1);
+          }
+          MI->setDesc(tii_->get(TargetOpcode::KILL));
+          mii = llvm::next(mii);
+        } else {
+          li_->RemoveMachineInstrFromMaps(MI);
+          mii = mbbi->erase(mii);
+          ++numPeep;
+        }
+        continue;
+      }
+
+      // Now check if this is a remat'ed def instruction which is now dead.
+      if (ReMatDefs.count(MI)) {
+        bool isDead = true;
+        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+          const MachineOperand &MO = MI->getOperand(i);
+          if (!MO.isReg())
+            continue;
+          unsigned Reg = MO.getReg();
+          if (!Reg)
+            continue;
+          if (TargetRegisterInfo::isVirtualRegister(Reg))
+            DeadDefs.push_back(Reg);
+          if (MO.isDead())
+            continue;
+          if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+              !mri_->use_nodbg_empty(Reg)) {
+            isDead = false;
+            break;
+          }
+        }
+        if (isDead) {
+          while (!DeadDefs.empty()) {
+            unsigned DeadDef = DeadDefs.back();
+            DeadDefs.pop_back();
+            RemoveDeadDef(li_->getInterval(DeadDef), MI);
+          }
+          li_->RemoveMachineInstrFromMaps(mii);
+          mii = mbbi->erase(mii);
+          continue;
+        } else
+          DeadDefs.clear();
+      }
+
+      // If the move will be an identity move delete it
+      if (MI->isIdentityCopy()) {
+        unsigned SrcReg = MI->getOperand(1).getReg();
+        if (li_->hasInterval(SrcReg)) {
+          LiveInterval &RegInt = li_->getInterval(SrcReg);
+          // If def of this move instruction is dead, remove its live range
+          // from the destination register's live interval.
+          if (MI->allDefsAreDead()) {
+            if (!ShortenDeadCopySrcLiveRange(RegInt, MI))
+              ShortenDeadCopyLiveRange(RegInt, MI);
+          }
+        }
+        li_->RemoveMachineInstrFromMaps(MI);
+        mii = mbbi->erase(mii);
+        ++numPeep;
+        continue;
+      }
+
+      ++mii;
+
+      // Check for now unnecessary kill flags.
+      if (li_->isNotInMIMap(MI)) continue;
+      SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex();
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.isKill()) continue;
+        unsigned reg = MO.getReg();
+        if (!reg || !li_->hasInterval(reg)) continue;
+        if (!li_->getInterval(reg).killedAt(DefIdx)) {
+          MO.setIsKill(false);
+          continue;
+        }
+        // When leaving a kill flag on a physreg, check if any subregs should
+        // remain alive.
+        if (!TargetRegisterInfo::isPhysicalRegister(reg))
+          continue;
+        for (const unsigned *SR = tri_->getSubRegisters(reg);
+             unsigned S = *SR; ++SR)
+          if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx))
+            MI->addRegisterDefined(S, tri_);
+      }
+    }
+  }
+
+  DEBUG(dump());
+  DEBUG(ldv_->dump());
+  if (VerifyCoalescing)
+    mf_->verify(this, "After register coalescing");
+  return true;
+}
+
+/// print - Implement the dump method.
+void SimpleRegisterCoalescing::print(raw_ostream &O, const Module* m) const {
+   li_->print(O, m);
+}
+
+RegisterCoalescer* llvm::createSimpleRegisterCoalescer() {
+  return new SimpleRegisterCoalescing();
+}
+
+// Make sure that anything that uses RegisterCoalescer pulls in this file...
+DEFINING_FILE_FOR(SimpleRegisterCoalescing)
diff --git a/final/lib/CodeGen/SimpleRegisterCoalescing.h b/final/lib/CodeGen/SimpleRegisterCoalescing.h
new file mode 100644
index 00000000000..56703dfa2dd
--- /dev/null
+++ b/final/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -0,0 +1,193 @@
+//===-- SimpleRegisterCoalescing.h - Register Coalescing --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple register copy coalescing phase.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H
+#define LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+  class SimpleRegisterCoalescing;
+  class LiveDebugVariables;
+  class TargetRegisterInfo;
+  class TargetInstrInfo;
+  class VirtRegMap;
+  class MachineLoopInfo;
+
+  /// CopyRec - Representation for copy instructions in coalescer queue.
+  ///
+  struct CopyRec {
+    MachineInstr *MI;
+    unsigned LoopDepth;
+    CopyRec(MachineInstr *mi, unsigned depth)
+      : MI(mi), LoopDepth(depth) {}
+  };
+
+  class SimpleRegisterCoalescing : public MachineFunctionPass,
+                                   public RegisterCoalescer {
+    MachineFunction* mf_;
+    MachineRegisterInfo* mri_;
+    const TargetMachine* tm_;
+    const TargetRegisterInfo* tri_;
+    const TargetInstrInfo* tii_;
+    LiveIntervals *li_;
+    LiveDebugVariables *ldv_;
+    const MachineLoopInfo* loopInfo;
+    AliasAnalysis *AA;
+    
+    DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs_;
+
+    /// JoinedCopies - Keep track of copies eliminated due to coalescing.
+    ///
+    SmallPtrSet<MachineInstr*, 32> JoinedCopies;
+
+    /// ReMatCopies - Keep track of copies eliminated due to remat.
+    ///
+    SmallPtrSet<MachineInstr*, 32> ReMatCopies;
+
+    /// ReMatDefs - Keep track of definition instructions which have
+    /// been remat'ed.
+    SmallPtrSet<MachineInstr*, 8> ReMatDefs;
+
+  public:
+    static char ID; // Pass identifcation, replacement for typeid
+    SimpleRegisterCoalescing() : MachineFunctionPass(ID) {
+      initializeSimpleRegisterCoalescingPass(*PassRegistry::getPassRegistry());
+    }
+
+    struct InstrSlots {
+      enum {
+        LOAD  = 0,
+        USE   = 1,
+        DEF   = 2,
+        STORE = 3,
+        NUM   = 4
+      };
+    };
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual void releaseMemory();
+
+    /// runOnMachineFunction - pass entry point
+    virtual bool runOnMachineFunction(MachineFunction&);
+
+    bool coalesceFunction(MachineFunction &mf, RegallocQuery &) {
+      // This runs as an independent pass, so don't do anything.
+      return false;
+    }
+
+    /// print - Implement the dump method.
+    virtual void print(raw_ostream &O, const Module* = 0) const;
+
+  private:
+    /// joinIntervals - join compatible live intervals
+    void joinIntervals();
+
+    /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting
+    /// copies that cannot yet be coalesced into the "TryAgain" list.
+    void CopyCoalesceInMBB(MachineBasicBlock *MBB,
+                           std::vector<CopyRec> &TryAgain);
+
+    /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+    /// which are the src/dst of the copy instruction CopyMI.  This returns true
+    /// if the copy was successfully coalesced away. If it is not currently
+    /// possible to coalesce this interval, but it may be possible if other
+    /// things get coalesced, then it returns true by reference in 'Again'.
+    bool JoinCopy(CopyRec &TheCopy, bool &Again);
+
+    /// JoinIntervals - Attempt to join these two intervals.  On failure, this
+    /// returns false.  The output "SrcInt" will not have been modified, so we can
+    /// use this information below to update aliases.
+    bool JoinIntervals(CoalescerPair &CP);
+
+    /// Return true if the two specified registers belong to different register
+    /// classes.  The registers may be either phys or virt regs.
+    bool differingRegisterClasses(unsigned RegA, unsigned RegB) const;
+
+    /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
+    /// the source value number is defined by a copy from the destination reg
+    /// see if we can merge these two destination reg valno# into a single
+    /// value number, eliminating a copy.
+    bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
+
+    /// HasOtherReachingDefs - Return true if there are definitions of IntB
+    /// other than BValNo val# that can reach uses of AValno val# of IntA.
+    bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
+                              VNInfo *AValNo, VNInfo *BValNo);
+
+    /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy.
+    /// If the source value number is defined by a commutable instruction and
+    /// its other operand is coalesced to the copy dest register, see if we
+    /// can transform the copy into a noop by commuting the definition.
+    bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
+
+    /// TrimLiveIntervalToLastUse - If there is a last use in the same basic
+    /// block as the copy instruction, trim the ive interval to the last use
+    /// and return true.
+    bool TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
+                                   MachineBasicBlock *CopyMBB,
+                                   LiveInterval &li, const LiveRange *LR);
+
+    /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+    /// computation, replace the copy by rematerialize the definition.
+    /// If PreserveSrcInt is true, make sure SrcInt is valid after the call.
+    bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt,
+                                 unsigned DstReg, unsigned DstSubIdx,
+                                 MachineInstr *CopyMI);
+
+    /// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+    /// two virtual registers from different register classes.
+    bool isWinToJoinCrossClass(unsigned SrcReg,
+                               unsigned DstReg,
+                               const TargetRegisterClass *SrcRC,
+                               const TargetRegisterClass *DstRC,
+                               const TargetRegisterClass *NewRC);
+
+    /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+    /// update the subregister number if it is not zero. If DstReg is a
+    /// physical register and the existing subregister number of the def / use
+    /// being updated is not zero, make sure to set it to the correct physical
+    /// subregister.
+    void UpdateRegDefsUses(const CoalescerPair &CP);
+
+    /// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy.
+    /// Return true if live interval is removed.
+    bool ShortenDeadCopyLiveRange(LiveInterval &li, MachineInstr *CopyMI);
+
+    /// ShortenDeadCopyLiveRange - Shorten a live range as it's artificially
+    /// extended by a dead copy. Mark the last use (if any) of the val# as kill
+    /// as ends the live range there. If there isn't another use, then this
+    /// live range is dead. Return true if live interval is removed.
+    bool ShortenDeadCopySrcLiveRange(LiveInterval &li, MachineInstr *CopyMI);
+
+    /// RemoveDeadDef - If a def of a live interval is now determined dead,
+    /// remove the val# it defines. If the live interval becomes empty, remove
+    /// it as well.
+    bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
+
+    /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the
+    /// VNInfo copy flag for DstReg and all aliases.
+    void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI);
+
+    /// lastRegisterUse - Returns the last use of the specific register between
+    /// cycles Start and End or NULL if there are no uses.
+    MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End,
+                                    unsigned Reg, SlotIndex &LastUseIdx) const;
+  };
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/CodeGen/SjLjEHPrepare.cpp b/final/lib/CodeGen/SjLjEHPrepare.cpp
new file mode 100644
index 00000000000..13e1454fa5f
--- /dev/null
+++ b/final/lib/CodeGen/SjLjEHPrepare.cpp
@@ -0,0 +1,592 @@
+//===- SjLjEHPass.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which use SjLj
+// based exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sjljehprepare"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumUnwinds, "Number of unwinds replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+namespace {
+  class SjLjEHPass : public FunctionPass {
+
+    const TargetLowering *TLI;
+
+    const Type *FunctionContextTy;
+    Constant *RegisterFn;
+    Constant *UnregisterFn;
+    Constant *BuiltinSetjmpFn;
+    Constant *FrameAddrFn;
+    Constant *StackAddrFn;
+    Constant *StackRestoreFn;
+    Constant *LSDAAddrFn;
+    Value *PersonalityFn;
+    Constant *SelectorFn;
+    Constant *ExceptionFn;
+    Constant *CallSiteFn;
+    Constant *DispatchSetupFn;
+
+    Value *CallSite;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit SjLjEHPass(const TargetLowering *tli = NULL)
+      : FunctionPass(ID), TLI(tli) { }
+    bool doInitialization(Module &M);
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+    const char *getPassName() const {
+      return "SJLJ Exception Handling preparation";
+    }
+
+  private:
+    void insertCallSiteStore(Instruction *I, int Number, Value *CallSite);
+    void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite,
+                            SwitchInst *CatchSwitch);
+    void splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
+    bool insertSjLjEHSupport(Function &F);
+  };
+} // end anonymous namespace
+
+char SjLjEHPass::ID = 0;
+
+// Public Interface To the SjLjEHPass pass.
+FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) {
+  return new SjLjEHPass(TLI);
+}
+// doInitialization - Set up decalarations and types needed to process
+// exceptions.
+bool SjLjEHPass::doInitialization(Module &M) {
+  // Build the function context structure.
+  // builtin_setjmp uses a five word jbuf
+  const Type *VoidPtrTy =
+          Type::getInt8PtrTy(M.getContext());
+  const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+  FunctionContextTy =
+    StructType::get(M.getContext(),
+                    VoidPtrTy,                        // __prev
+                    Int32Ty,                          // call_site
+                    ArrayType::get(Int32Ty, 4),       // __data
+                    VoidPtrTy,                        // __personality
+                    VoidPtrTy,                        // __lsda
+                    ArrayType::get(VoidPtrTy, 5),     // __jbuf
+                    NULL);
+  RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register",
+                                     Type::getVoidTy(M.getContext()),
+                                     PointerType::getUnqual(FunctionContextTy),
+                                     (Type *)0);
+  UnregisterFn =
+    M.getOrInsertFunction("_Unwind_SjLj_Unregister",
+                          Type::getVoidTy(M.getContext()),
+                          PointerType::getUnqual(FunctionContextTy),
+                          (Type *)0);
+  FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
+  StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+  StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+  BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
+  LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
+  SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
+  ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception);
+  CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
+  DispatchSetupFn
+    = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup);
+  PersonalityFn = 0;
+
+  return true;
+}
+
+/// insertCallSiteStore - Insert a store of the call-site value to the
+/// function context
+void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number,
+                                     Value *CallSite) {
+  ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()),
+                                              Number);
+  // Insert a store of the call-site number
+  new StoreInst(CallSiteNoC, CallSite, true, I);  // volatile
+}
+
+/// markInvokeCallSite - Insert code to mark the call_site for this invoke
+void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo,
+                                    Value *CallSite,
+                                    SwitchInst *CatchSwitch) {
+  ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                              InvokeNo);
+  // The runtime comes back to the dispatcher with the call_site - 1 in
+  // the context. Odd, but there it is.
+  ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                            InvokeNo - 1);
+
+  // If the unwind edge has phi nodes, split the edge.
+  if (isa<PHINode>(II->getUnwindDest()->begin())) {
+    SplitCriticalEdge(II, 1, this);
+
+    // If there are any phi nodes left, they must have a single predecessor.
+    while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
+      PN->replaceAllUsesWith(PN->getIncomingValue(0));
+      PN->eraseFromParent();
+    }
+  }
+
+  // Insert the store of the call site value
+  insertCallSiteStore(II, InvokeNo, CallSite);
+
+  // Record the call site value for the back end so it stays associated with
+  // the invoke.
+  CallInst::Create(CallSiteFn, CallSiteNoC, "", II);
+
+  // Add a switch case to our unwind block.
+  CatchSwitch->addCase(SwitchValC, II->getUnwindDest());
+  // We still want this to look like an invoke so we emit the LSDA properly,
+  // so we don't transform the invoke into a call here.
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
+  if (!LiveBBs.insert(BB).second) return; // already been here.
+
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge
+/// we spill into a stack location, guaranteeing that there is nothing live
+/// across the unwind edge.  This process also splits all critical edges
+/// coming out of invoke's.
+/// FIXME: Move this function to a common utility file (Local.cpp?) so
+/// both SjLj and LowerInvoke can use it.
+void SjLjEHPass::
+splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
+  // First step, split all critical edges from invoke instructions.
+  for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+    InvokeInst *II = Invokes[i];
+    SplitCriticalEdge(II, 0, this);
+    SplitCriticalEdge(II, 1, this);
+    assert(!isa<PHINode>(II->getNormalDest()) &&
+           !isa<PHINode>(II->getUnwindDest()) &&
+           "critical edge splitting left single entry phi nodes?");
+  }
+
+  Function *F = Invokes.back()->getParent()->getParent();
+
+  // To avoid having to handle incoming arguments specially, we lower each arg
+  // to a copy instruction in the entry block.  This ensures that the argument
+  // value itself cannot be live across the entry block.
+  BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
+  while (isa<AllocaInst>(AfterAllocaInsertPt) &&
+        isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
+    ++AfterAllocaInsertPt;
+  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+       AI != E; ++AI) {
+    const Type *Ty = AI->getType();
+    // Aggregate types can't be cast, but are legal argument types, so we have
+    // to handle them differently. We use an extract/insert pair as a
+    // lightweight method to achieve the same goal.
+    if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+      Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
+      Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+      NI->insertAfter(EI);
+      AI->replaceAllUsesWith(NI);
+      // Set the operand of the instructions back to the AllocaInst.
+      EI->setOperand(0, AI);
+      NI->setOperand(0, AI);
+    } else {
+      // This is always a no-op cast because we're casting AI to AI->getType()
+      // so src and destination types are identical. BitCast is the only
+      // possibility.
+      CastInst *NC = new BitCastInst(
+        AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+      AI->replaceAllUsesWith(NC);
+      // Set the operand of the cast instruction back to the AllocaInst.
+      // Normally it's forbidden to replace a CastInst's operand because it
+      // could cause the opcode to reflect an illegal conversion. However,
+      // we're replacing it here with the same value it was constructed with.
+      // We do this because the above replaceAllUsesWith() clobbered the
+      // operand, but we want this one to remain.
+      NC->setOperand(0, AI);
+    }
+  }
+
+  // Finally, scan the code looking for instructions with bad live ranges.
+  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+      // Ignore obvious cases we don't have to handle.  In particular, most
+      // instructions either have no uses or only have a single use inside the
+      // current block.  Ignore them quickly.
+      Instruction *Inst = II;
+      if (Inst->use_empty()) continue;
+      if (Inst->hasOneUse() &&
+          cast<Instruction>(Inst->use_back())->getParent() == BB &&
+          !isa<PHINode>(Inst->use_back())) continue;
+
+      // If this is an alloca in the entry block, it's not a real register
+      // value.
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+        if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
+          continue;
+
+      // Avoid iterator invalidation by copying users to a temporary vector.
+      SmallVector<Instruction*,16> Users;
+      for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+           UI != E; ++UI) {
+        Instruction *User = cast<Instruction>(*UI);
+        if (User->getParent() != BB || isa<PHINode>(User))
+          Users.push_back(User);
+      }
+
+      // Find all of the blocks that this value is live in.
+      std::set<BasicBlock*> LiveBBs;
+      LiveBBs.insert(Inst->getParent());
+      while (!Users.empty()) {
+        Instruction *U = Users.back();
+        Users.pop_back();
+
+        if (!isa<PHINode>(U)) {
+          MarkBlocksLiveIn(U->getParent(), LiveBBs);
+        } else {
+          // Uses for a PHI node occur in their predecessor block.
+          PHINode *PN = cast<PHINode>(U);
+          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+            if (PN->getIncomingValue(i) == Inst)
+              MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+        }
+      }
+
+      // Now that we know all of the blocks that this thing is live in, see if
+      // it includes any of the unwind locations.
+      bool NeedsSpill = false;
+      for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+        BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+        if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+          NeedsSpill = true;
+        }
+      }
+
+      // If we decided we need a spill, do it.
+      // FIXME: Spilling this way is overkill, as it forces all uses of
+      // the value to be reloaded from the stack slot, even those that aren't
+      // in the unwind blocks. We should be more selective.
+      if (NeedsSpill) {
+        ++NumSpilled;
+        DemoteRegToStack(*Inst, true);
+      }
+    }
+}
+
+bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
+  SmallVector<ReturnInst*,16> Returns;
+  SmallVector<UnwindInst*,16> Unwinds;
+  SmallVector<InvokeInst*,16> Invokes;
+
+  // Look through the terminators of the basic blocks to find invokes, returns
+  // and unwinds.
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+      // Remember all return instructions in case we insert an invoke into this
+      // function.
+      Returns.push_back(RI);
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      Invokes.push_back(II);
+    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      Unwinds.push_back(UI);
+    }
+  }
+
+  NumInvokes += Invokes.size();
+  NumUnwinds += Unwinds.size();
+
+  // If we don't have any invokes, there's nothing to do.
+  if (Invokes.empty()) return false;
+
+  // Find the eh.selector.*, eh.exception and alloca calls.
+  //
+  // Remember any allocas() that aren't in the entry block, as the
+  // jmpbuf saved SP will need to be updated for them.
+  //
+  // We'll use the first eh.selector to determine the right personality
+  // function to use. For SJLJ, we always use the same personality for the
+  // whole function, not on a per-selector basis.
+  // FIXME: That's a bit ugly. Better way?
+  SmallVector<CallInst*,16> EH_Selectors;
+  SmallVector<CallInst*,16> EH_Exceptions;
+  SmallVector<Instruction*,16> JmpbufUpdatePoints;
+
+  // Note: Skip the entry block since there's nothing there that interests
+  // us. eh.selector and eh.exception shouldn't ever be there, and we
+  // want to disregard any allocas that are there.
+  for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      if (CallInst *CI = dyn_cast<CallInst>(I)) {
+        if (CI->getCalledFunction() == SelectorFn) {
+          if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1);
+          EH_Selectors.push_back(CI);
+        } else if (CI->getCalledFunction() == ExceptionFn) {
+          EH_Exceptions.push_back(CI);
+        } else if (CI->getCalledFunction() == StackRestoreFn) {
+          JmpbufUpdatePoints.push_back(CI);
+        }
+      } else if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+        JmpbufUpdatePoints.push_back(AI);
+      }
+    }
+  }
+
+  // If we don't have any eh.selector calls, we can't determine the personality
+  // function. Without a personality function, we can't process exceptions.
+  if (!PersonalityFn) return false;
+
+  // We have invokes, so we need to add register/unregister calls to get this
+  // function onto the global unwind stack.
+  //
+  // First thing we need to do is scan the whole function for values that are
+  // live across unwind edges.  Each value that is live across an unwind edge we
+  // spill into a stack location, guaranteeing that there is nothing live across
+  // the unwind edge.  This process also splits all critical edges coming out of
+  // invoke's.
+  splitLiveRangesAcrossInvokes(Invokes);
+
+  BasicBlock *EntryBB = F.begin();
+  // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+  // that needs to be restored on all exits from the function.  This is an
+  // alloca because the value needs to be added to the global context list.
+  unsigned Align = 4; // FIXME: Should be a TLI check?
+  AllocaInst *FunctionContext =
+    new AllocaInst(FunctionContextTy, 0, Align,
+                   "fcn_context", F.begin()->begin());
+
+  Value *Idxs[2];
+  const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+  Value *Zero = ConstantInt::get(Int32Ty, 0);
+  // We need to also keep around a reference to the call_site field
+  Idxs[0] = Zero;
+  Idxs[1] = ConstantInt::get(Int32Ty, 1);
+  CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                       "call_site",
+                                       EntryBB->getTerminator());
+
+  // The exception selector comes back in context->data[1]
+  Idxs[1] = ConstantInt::get(Int32Ty, 2);
+  Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                            "fc_data",
+                                            EntryBB->getTerminator());
+  Idxs[1] = ConstantInt::get(Int32Ty, 1);
+  Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+                                                  "exc_selector_gep",
+                                                  EntryBB->getTerminator());
+  // The exception value comes back in context->data[0]
+  Idxs[1] = Zero;
+  Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+                                                   "exception_gep",
+                                                   EntryBB->getTerminator());
+
+  // The result of the eh.selector call will be replaced with a a reference to
+  // the selector value returned in the function context. We leave the selector
+  // itself so the EH analysis later can use it.
+  for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
+    CallInst *I = EH_Selectors[i];
+    Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
+    I->replaceAllUsesWith(SelectorVal);
+  }
+
+  // eh.exception calls are replaced with references to the proper location in
+  // the context. Unlike eh.selector, the eh.exception calls are removed
+  // entirely.
+  for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
+    CallInst *I = EH_Exceptions[i];
+    // Possible for there to be duplicates, so check to make sure the
+    // instruction hasn't already been removed.
+    if (!I->getParent()) continue;
+    Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
+    const Type *Ty = Type::getInt8PtrTy(F.getContext());
+    Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
+
+    I->replaceAllUsesWith(Val);
+    I->eraseFromParent();
+  }
+
+  // The entry block changes to have the eh.sjlj.setjmp, with a conditional
+  // branch to a dispatch block for non-zero returns. If we return normally,
+  // we're not handling an exception and just register the function context and
+  // continue.
+
+  // Create the dispatch block.  The dispatch block is basically a big switch
+  // statement that goes to all of the invoke landing pads.
+  BasicBlock *DispatchBlock =
+    BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
+
+  // Add a call to dispatch_setup at the start of the dispatch block. This is
+  // expanded to any target-specific setup that needs to be done.
+  Value *SetupArg =
+    CastInst::Create(Instruction::BitCast, FunctionContext,
+                     Type::getInt8PtrTy(F.getContext()), "",
+                     DispatchBlock);
+  CallInst::Create(DispatchSetupFn, SetupArg, "", DispatchBlock);
+
+  // Insert a load of the callsite in the dispatch block, and a switch on its
+  // value.  By default, we go to a block that just does an unwind (which is the
+  // correct action for a standard call).
+  BasicBlock *UnwindBlock =
+    BasicBlock::Create(F.getContext(), "unwindbb", &F);
+  Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));
+
+  Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
+                                     DispatchBlock);
+  SwitchInst *DispatchSwitch =
+    SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(),
+                       DispatchBlock);
+  // Split the entry block to insert the conditional branch for the setjmp.
+  BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+                                                   "eh.sjlj.setjmp.cont");
+
+  // Populate the Function Context
+  //   1. LSDA address
+  //   2. Personality function address
+  //   3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
+
+  // LSDA address
+  Idxs[0] = Zero;
+  Idxs[1] = ConstantInt::get(Int32Ty, 4);
+  Value *LSDAFieldPtr =
+    GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                              "lsda_gep",
+                              EntryBB->getTerminator());
+  Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
+                                 EntryBB->getTerminator());
+  new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
+
+  Idxs[1] = ConstantInt::get(Int32Ty, 3);
+  Value *PersonalityFieldPtr =
+    GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                              "lsda_gep",
+                              EntryBB->getTerminator());
+  new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
+                EntryBB->getTerminator());
+
+  // Save the frame pointer.
+  Idxs[1] = ConstantInt::get(Int32Ty, 5);
+  Value *JBufPtr
+    = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+                                "jbuf_gep",
+                                EntryBB->getTerminator());
+  Idxs[1] = ConstantInt::get(Int32Ty, 0);
+  Value *FramePtr =
+    GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+                              EntryBB->getTerminator());
+
+  Value *Val = CallInst::Create(FrameAddrFn,
+                                ConstantInt::get(Int32Ty, 0),
+                                "fp",
+                                EntryBB->getTerminator());
+  new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
+
+  // Save the stack pointer.
+  Idxs[1] = ConstantInt::get(Int32Ty, 2);
+  Value *StackPtr =
+    GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep",
+                              EntryBB->getTerminator());
+
+  Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
+  new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
+
+  // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
+  Value *SetjmpArg =
+    CastInst::Create(Instruction::BitCast, JBufPtr,
+                     Type::getInt8PtrTy(F.getContext()), "",
+                     EntryBB->getTerminator());
+  Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
+                                        "dispatch",
+                                        EntryBB->getTerminator());
+  // check the return value of the setjmp. non-zero goes to dispatcher.
+  Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+                                 ICmpInst::ICMP_EQ, DispatchVal, Zero,
+                                 "notunwind");
+  // Nuke the uncond branch.
+  EntryBB->getTerminator()->eraseFromParent();
+
+  // Put in a new condbranch in its place.
+  BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
+
+  // Register the function context and make sure it's known to not throw
+  CallInst *Register =
+    CallInst::Create(RegisterFn, FunctionContext, "",
+                     ContBlock->getTerminator());
+  Register->setDoesNotThrow();
+
+  // At this point, we are all set up, update the invoke instructions to mark
+  // their call_site values, and fill in the dispatch switch accordingly.
+  for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+    markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
+
+  // Mark call instructions that aren't nounwind as no-action (call_site ==
+  // -1). Skip the entry block, as prior to then, no function context has been
+  // created for this function and any unexpected exceptions thrown will go
+  // directly to the caller's context, which is what we want anyway, so no need
+  // to do anything here.
+  for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
+    for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
+      if (CallInst *CI = dyn_cast<CallInst>(I)) {
+        // Ignore calls to the EH builtins (eh.selector, eh.exception)
+        Constant *Callee = CI->getCalledFunction();
+        if (Callee != SelectorFn && Callee != ExceptionFn
+            && !CI->doesNotThrow())
+          insertCallSiteStore(CI, -1, CallSite);
+      }
+  }
+
+  // Replace all unwinds with a branch to the unwind handler.
+  // ??? Should this ever happen with sjlj exceptions?
+  for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+    BranchInst::Create(UnwindBlock, Unwinds[i]);
+    Unwinds[i]->eraseFromParent();
+  }
+
+  // Following any allocas not in the entry block, update the saved SP in the
+  // jmpbuf to the new value.
+  for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
+    Instruction *AI = JmpbufUpdatePoints[i];
+    Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+    StackAddr->insertAfter(AI);
+    Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+    StoreStackAddr->insertAfter(StackAddr);
+  }
+
+  // Finally, for any returns from this function, if this function contains an
+  // invoke, add a call to unregister the function context.
+  for (unsigned i = 0, e = Returns.size(); i != e; ++i)
+    CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
+
+  return true;
+}
+
+bool SjLjEHPass::runOnFunction(Function &F) {
+  bool Res = insertSjLjEHSupport(F);
+  return Res;
+}
diff --git a/final/lib/CodeGen/SlotIndexes.cpp b/final/lib/CodeGen/SlotIndexes.cpp
new file mode 100644
index 00000000000..c0ae34301dc
--- /dev/null
+++ b/final/lib/CodeGen/SlotIndexes.cpp
@@ -0,0 +1,181 @@
+//===-- SlotIndexes.cpp - Slot Indexes Pass  ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "slotindexes"
+
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+char SlotIndexes::ID = 0;
+INITIALIZE_PASS(SlotIndexes, "slotindexes",
+                "Slot index numbering", false, false)
+
+STATISTIC(NumLocalRenum,  "Number of local renumberings");
+STATISTIC(NumGlobalRenum, "Number of global renumberings");
+
+void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
+  au.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(au);
+}
+
+void SlotIndexes::releaseMemory() {
+  mi2iMap.clear();
+  mbb2IdxMap.clear();
+  idx2MBBMap.clear();
+  clearList();
+}
+
+bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
+
+  // Compute numbering as follows:
+  // Grab an iterator to the start of the index list.
+  // Iterate over all MBBs, and within each MBB all MIs, keeping the MI
+  // iterator in lock-step (though skipping it over indexes which have
+  // null pointers in the instruction field).
+  // At each iteration assert that the instruction pointed to in the index
+  // is the same one pointed to by the MI iterator. This 
+
+  // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should
+  // only need to be set up once after the first numbering is computed.
+
+  mf = &fn;
+  initList();
+
+  // Check that the list contains only the sentinal.
+  assert(indexListHead->getNext() == 0 &&
+         "Index list non-empty at initial numbering?");
+  assert(idx2MBBMap.empty() &&
+         "Index -> MBB mapping non-empty at initial numbering?");
+  assert(mbb2IdxMap.empty() &&
+         "MBB -> Index mapping non-empty at initial numbering?");
+  assert(mi2iMap.empty() &&
+         "MachineInstr -> Index mapping non-empty at initial numbering?");
+
+  functionSize = 0;
+  unsigned index = 0;
+
+  push_back(createEntry(0, index));
+
+  // Iterate over the function.
+  for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end();
+       mbbItr != mbbEnd; ++mbbItr) {
+    MachineBasicBlock *mbb = &*mbbItr;
+
+    // Insert an index for the MBB start.
+    SlotIndex blockStartIndex(back(), SlotIndex::LOAD);
+
+    for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
+         miItr != miEnd; ++miItr) {
+      MachineInstr *mi = miItr;
+      if (mi->isDebugValue())
+        continue;
+
+      // Insert a store index for the instr.
+      push_back(createEntry(mi, index += SlotIndex::InstrDist));
+
+      // Save this base index in the maps.
+      mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD)));
+ 
+      ++functionSize;
+    }
+
+    // We insert one blank instructions between basic blocks.
+    push_back(createEntry(0, index += SlotIndex::InstrDist));
+
+    SlotIndex blockEndIndex(back(), SlotIndex::LOAD);
+    mbb2IdxMap.insert(
+      std::make_pair(mbb, std::make_pair(blockStartIndex, blockEndIndex)));
+
+    idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb));
+  }
+
+  // Sort the Idx2MBBMap
+  std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare());
+
+  DEBUG(dump());
+
+  // And we're done!
+  return false;
+}
+
+void SlotIndexes::renumberIndexes() {
+  // Renumber updates the index of every element of the index list.
+  DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
+  ++NumGlobalRenum;
+
+  unsigned index = 0;
+
+  for (IndexListEntry *curEntry = front(); curEntry != getTail();
+       curEntry = curEntry->getNext()) {
+    curEntry->setIndex(index);
+    index += SlotIndex::InstrDist;
+  }
+}
+
+// Renumber indexes locally after curEntry was inserted, but failed to get a new
+// index.
+void SlotIndexes::renumberIndexes(IndexListEntry *curEntry) {
+  // Number indexes with half the default spacing so we can catch up quickly.
+  const unsigned Space = SlotIndex::InstrDist/2;
+  assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM");
+
+  IndexListEntry *start = curEntry->getPrev();
+  unsigned index = start->getIndex();
+  IndexListEntry *tail = getTail();
+  do {
+    curEntry->setIndex(index += Space);
+    curEntry = curEntry->getNext();
+    // If the next index is bigger, we have caught up.
+  } while (curEntry != tail && curEntry->getIndex() <= index);
+
+  DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << start->getIndex() << '-'
+               << index << " ***\n");
+  ++NumLocalRenum;
+}
+
+
+void SlotIndexes::dump() const {
+  for (const IndexListEntry *itr = front(); itr != getTail();
+       itr = itr->getNext()) {
+    dbgs() << itr->getIndex() << " ";
+
+    if (itr->getInstr() != 0) {
+      dbgs() << *itr->getInstr();
+    } else {
+      dbgs() << "\n";
+    }
+  }
+
+  for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin();
+       itr != mbb2IdxMap.end(); ++itr) {
+    dbgs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - ["
+           << itr->second.first << ", " << itr->second.second << "]\n";
+  }
+}
+
+// Print a SlotIndex to a raw_ostream.
+void SlotIndex::print(raw_ostream &os) const {
+  if (isValid())
+    os << entry().getIndex() << "LudS"[getSlot()];
+  else
+    os << "invalid";
+}
+
+// Dump a SlotIndex to stderr.
+void SlotIndex::dump() const {
+  print(dbgs());
+  dbgs() << "\n";
+}
+
diff --git a/final/lib/CodeGen/SpillPlacement.cpp b/final/lib/CodeGen/SpillPlacement.cpp
new file mode 100644
index 00000000000..57951ed8068
--- /dev/null
+++ b/final/lib/CodeGen/SpillPlacement.cpp
@@ -0,0 +1,324 @@
+//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the spill code placement analysis.
+//
+// Each edge bundle corresponds to a node in a Hopfield network. Constraints on
+// basic blocks are weighted by the block frequency and added to become the node
+// bias.
+//
+// Transparent basic blocks have the variable live through, but don't care if it
+// is spilled or in a register. These blocks become connections in the Hopfield
+// network, again weighted by block frequency.
+//
+// The Hopfield network minimizes (possibly locally) its energy function:
+//
+//   E = -sum_n V_n * ( B_n + sum_{n, m linked by b} V_m * F_b )
+//
+// The energy function represents the expected spill code execution frequency,
+// or the cost of spilling. This is a Lyapunov function which never increases
+// when a node is updated. It is guaranteed to converge to a local minimum.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spillplacement"
+#include "SpillPlacement.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+char SpillPlacement::ID = 0;
+INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement",
+                      "Spill Code Placement Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement",
+                    "Spill Code Placement Analysis", true, true)
+
+char &llvm::SpillPlacementID = SpillPlacement::ID;
+
+void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequiredTransitive<EdgeBundles>();
+  AU.addRequiredTransitive<MachineLoopInfo>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Node - Each edge bundle corresponds to a Hopfield node.
+///
+/// The node contains precomputed frequency data that only depends on the CFG,
+/// but Bias and Links are computed each time placeSpills is called.
+///
+/// The node Value is positive when the variable should be in a register. The
+/// value can change when linked nodes change, but convergence is very fast
+/// because all weights are positive.
+///
+struct SpillPlacement::Node {
+  /// Frequency - Total block frequency feeding into[0] or out of[1] the bundle.
+  /// Ideally, these two numbers should be identical, but inaccuracies in the
+  /// block frequency estimates means that we need to normalize ingoing and
+  /// outgoing frequencies separately so they are commensurate.
+  float Frequency[2];
+
+  /// Bias - Normalized contributions from non-transparent blocks.
+  /// A bundle connected to a MustSpill block has a huge negative bias,
+  /// otherwise it is a number in the range [-2;2].
+  float Bias;
+
+  /// Value - Output value of this node computed from the Bias and links.
+  /// This is always in the range [-1;1]. A positive number means the variable
+  /// should go in a register through this bundle.
+  float Value;
+
+  typedef SmallVector<std::pair<float, unsigned>, 4> LinkVector;
+
+  /// Links - (Weight, BundleNo) for all transparent blocks connecting to other
+  /// bundles. The weights are all positive and add up to at most 2, weights
+  /// from ingoing and outgoing nodes separately add up to a most 1. The weight
+  /// sum can be less than 2 when the variable is not live into / out of some
+  /// connected basic blocks.
+  LinkVector Links;
+
+  /// preferReg - Return true when this node prefers to be in a register.
+  bool preferReg() const {
+    // Undecided nodes (Value==0) go on the stack.
+    return Value > 0;
+  }
+
+  /// mustSpill - Return True if this node is so biased that it must spill.
+  bool mustSpill() const {
+    // Actually, we must spill if Bias < sum(weights).
+    // It may be worth it to compute the weight sum here?
+    return Bias < -2.0f;
+  }
+
+  /// Node - Create a blank Node.
+  Node() {
+    Frequency[0] = Frequency[1] = 0;
+  }
+
+  /// clear - Reset per-query data, but preserve frequencies that only depend on
+  // the CFG.
+  void clear() {
+    Bias = Value = 0;
+    Links.clear();
+  }
+
+  /// addLink - Add a link to bundle b with weight w.
+  /// out=0 for an ingoing link, and 1 for an outgoing link.
+  void addLink(unsigned b, float w, bool out) {
+    // Normalize w relative to all connected blocks from that direction.
+    w /= Frequency[out];
+
+    // There can be multiple links to the same bundle, add them up.
+    for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+      if (I->second == b) {
+        I->first += w;
+        return;
+      }
+    // This must be the first link to b.
+    Links.push_back(std::make_pair(w, b));
+  }
+
+  /// addBias - Bias this node from an ingoing[0] or outgoing[1] link.
+  void addBias(float w, bool out) {
+    // Normalize w relative to all connected blocks from that direction.
+    w /= Frequency[out];
+    Bias += w;
+  }
+
+  /// update - Recompute Value from Bias and Links. Return true when node
+  /// preference changes.
+  bool update(const Node nodes[]) {
+    // Compute the weighted sum of inputs.
+    float Sum = Bias;
+    for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+      Sum += I->first * nodes[I->second].Value;
+
+    // The weighted sum is going to be in the range [-2;2]. Ideally, we should
+    // simply set Value = sign(Sum), but we will add a dead zone around 0 for
+    // two reasons:
+    //  1. It avoids arbitrary bias when all links are 0 as is possible during
+    //     initial iterations.
+    //  2. It helps tame rounding errors when the links nominally sum to 0.
+    const float Thres = 1e-4f;
+    bool Before = preferReg();
+    if (Sum < -Thres)
+      Value = -1;
+    else if (Sum > Thres)
+      Value = 1;
+    else
+      Value = 0;
+    return Before != preferReg();
+  }
+};
+
+bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  bundles = &getAnalysis<EdgeBundles>();
+  loops = &getAnalysis<MachineLoopInfo>();
+
+  assert(!nodes && "Leaking node array");
+  nodes = new Node[bundles->getNumBundles()];
+
+  // Compute total ingoing and outgoing block frequencies for all bundles.
+  BlockFrequency.resize(mf.getNumBlockIDs());
+  for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
+    float Freq = LiveIntervals::getSpillWeight(true, false,
+                                               loops->getLoopDepth(I));
+    unsigned Num = I->getNumber();
+    BlockFrequency[Num] = Freq;
+    nodes[bundles->getBundle(Num, 1)].Frequency[0] += Freq;
+    nodes[bundles->getBundle(Num, 0)].Frequency[1] += Freq;
+  }
+
+  // We never change the function.
+  return false;
+}
+
+void SpillPlacement::releaseMemory() {
+  delete[] nodes;
+  nodes = 0;
+}
+
+/// activate - mark node n as active if it wasn't already.
+void SpillPlacement::activate(unsigned n) {
+  if (ActiveNodes->test(n))
+    return;
+  ActiveNodes->set(n);
+  nodes[n].clear();
+}
+
+
+/// prepareNodes - Compute node biases and weights from a set of constraints.
+/// Set a bit in NodeMask for each active node.
+void SpillPlacement::
+prepareNodes(const SmallVectorImpl<BlockConstraint> &LiveBlocks) {
+  for (SmallVectorImpl<BlockConstraint>::const_iterator I = LiveBlocks.begin(),
+       E = LiveBlocks.end(); I != E; ++I) {
+    float Freq = getBlockFrequency(I->Number);
+
+    // Is this a transparent block? Link ingoing and outgoing bundles.
+    if (I->Entry == DontCare && I->Exit == DontCare) {
+      unsigned ib = bundles->getBundle(I->Number, 0);
+      unsigned ob = bundles->getBundle(I->Number, 1);
+
+      // Ignore self-loops.
+      if (ib == ob)
+        continue;
+      activate(ib);
+      activate(ob);
+      nodes[ib].addLink(ob, Freq, 1);
+      nodes[ob].addLink(ib, Freq, 0);
+      continue;
+    }
+
+    // This block is not transparent, but it can still add bias.
+    const float Bias[] = {
+      0,           // DontCare,
+      1,           // PrefReg,
+      -1,          // PrefSpill
+      -HUGE_VALF   // MustSpill
+    };
+
+    // Live-in to block?
+    if (I->Entry != DontCare) {
+      unsigned ib = bundles->getBundle(I->Number, 0);
+      activate(ib);
+      nodes[ib].addBias(Freq * Bias[I->Entry], 1);
+    }
+
+    // Live-out from block?
+    if (I->Exit != DontCare) {
+      unsigned ob = bundles->getBundle(I->Number, 1);
+      activate(ob);
+      nodes[ob].addBias(Freq * Bias[I->Exit], 0);
+    }
+  }
+}
+
+/// iterate - Repeatedly update the Hopfield nodes until stability or the
+/// maximum number of iterations is reached.
+/// @param Linked - Numbers of linked nodes that need updating.
+void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) {
+  if (Linked.empty())
+    return;
+
+  // Run up to 10 iterations. The edge bundle numbering is closely related to
+  // basic block numbering, so there is a strong tendency towards chains of
+  // linked nodes with sequential numbers. By scanning the linked nodes
+  // backwards and forwards, we make it very likely that a single node can
+  // affect the entire network in a single iteration. That means very fast
+  // convergence, usually in a single iteration.
+  for (unsigned iteration = 0; iteration != 10; ++iteration) {
+    // Scan backwards, skipping the last node which was just updated.
+    bool Changed = false;
+    for (SmallVectorImpl<unsigned>::const_reverse_iterator I =
+           llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) {
+      unsigned n = *I;
+      bool C = nodes[n].update(nodes);
+      Changed |= C;
+    }
+    if (!Changed)
+      return;
+
+    // Scan forwards, skipping the first node which was just updated.
+    Changed = false;
+    for (SmallVectorImpl<unsigned>::const_iterator I =
+           llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
+      unsigned n = *I;
+      bool C = nodes[n].update(nodes);
+      Changed |= C;
+    }
+    if (!Changed)
+      return;
+  }
+}
+
+bool
+SpillPlacement::placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks,
+                            BitVector &RegBundles) {
+  // Reuse RegBundles as our ActiveNodes vector.
+  ActiveNodes = &RegBundles;
+  ActiveNodes->clear();
+  ActiveNodes->resize(bundles->getNumBundles());
+
+  // Compute active nodes, links and biases.
+  prepareNodes(LiveBlocks);
+
+  // Update all active nodes, and find the ones that are actually linked to
+  // something so their value may change when iterating.
+  SmallVector<unsigned, 8> Linked;
+  for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n)) {
+    nodes[n].update(nodes);
+    // A node that must spill, or a node without any links is not going to
+    // change its value ever again, so exclude it from iterations.
+    if (!nodes[n].Links.empty() && !nodes[n].mustSpill())
+      Linked.push_back(n);
+  }
+
+  // Iterate the network to convergence.
+  iterate(Linked);
+
+  // Write preferences back to RegBundles.
+  bool Perfect = true;
+  for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n))
+    if (!nodes[n].preferReg()) {
+      RegBundles.reset(n);
+      Perfect = false;
+    }
+  return Perfect;
+}
diff --git a/final/lib/CodeGen/SpillPlacement.h b/final/lib/CodeGen/SpillPlacement.h
new file mode 100644
index 00000000000..b0135cbc365
--- /dev/null
+++ b/final/lib/CodeGen/SpillPlacement.h
@@ -0,0 +1,113 @@
+//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis computes the optimal spill code placement between basic blocks.
+//
+// The runOnMachineFunction() method only precomputes some profiling information
+// about the CFG. The real work is done by placeSpills() which is called by the
+// register allocator.
+//
+// Given a variable that is live across multiple basic blocks, and given
+// constraints on the basic blocks where the variable is live, determine which
+// edge bundles should have the variable in a register and which edge bundles
+// should have the variable in a stack slot.
+//
+// The returned bit vector can be used to place optimal spill code at basic
+// block entries and exits. Spill code placement inside a basic block is not
+// considered.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLPLACEMENT_H
+#define LLVM_CODEGEN_SPILLPLACEMENT_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class BitVector;
+class EdgeBundles;
+class MachineBasicBlock;
+class MachineLoopInfo;
+
+class SpillPlacement  : public MachineFunctionPass {
+  struct Node;
+  const MachineFunction *MF;
+  const EdgeBundles *bundles;
+  const MachineLoopInfo *loops;
+  Node *nodes;
+
+  // Nodes that are active in the current computation. Owned by the placeSpills
+  // caller.
+  BitVector *ActiveNodes;
+
+  // Block frequencies are computed once. Indexed by block number.
+  SmallVector<float, 4> BlockFrequency;
+
+public:
+  static char ID; // Pass identification, replacement for typeid.
+
+  SpillPlacement() : MachineFunctionPass(ID), nodes(0) {}
+  ~SpillPlacement() { releaseMemory(); }
+
+  /// BorderConstraint - A basic block has separate constraints for entry and
+  /// exit.
+  enum BorderConstraint {
+    DontCare,  ///< Block doesn't care / variable not live.
+    PrefReg,   ///< Block entry/exit prefers a register.
+    PrefSpill, ///< Block entry/exit prefers a stack slot.
+    MustSpill  ///< A register is impossible, variable must be spilled.
+  };
+
+  /// BlockConstraint - Entry and exit constraints for a basic block.
+  struct BlockConstraint {
+    unsigned Number;            ///< Basic block number (from MBB::getNumber()).
+    BorderConstraint Entry : 8; ///< Constraint on block entry.
+    BorderConstraint Exit : 8;  ///< Constraint on block exit.
+  };
+
+  /// placeSpills - Compute the optimal spill code placement given the
+  /// constraints. No MustSpill constraints will be violated, and the smallest
+  /// possible number of PrefX constraints will be violated, weighted by
+  /// expected execution frequencies.
+  /// @param LiveBlocks Constraints for blocks that have the variable live in or
+  ///                   live out. DontCare/DontCare means the variable is live
+  ///                   through the block. DontCare/X means the variable is live
+  ///                   out, but not live in.
+  /// @param RegBundles Bit vector to receive the edge bundles where the
+  ///                   variable should be kept in a register. Each bit
+  ///                   corresponds to an edge bundle, a set bit means the
+  ///                   variable should be kept in a register through the
+  ///                   bundle. A clear bit means the variable should be
+  ///                   spilled.
+  /// @return True if a perfect solution was found, allowing the variable to be
+  ///         in a register through all relevant bundles.
+  bool placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks,
+                   BitVector &RegBundles);
+
+  /// getBlockFrequency - Return the estimated block execution frequency per
+  /// function invocation.
+  float getBlockFrequency(unsigned Number) const {
+    return BlockFrequency[Number];
+  }
+
+private:
+  virtual bool runOnMachineFunction(MachineFunction&);
+  virtual void getAnalysisUsage(AnalysisUsage&) const;
+  virtual void releaseMemory();
+
+  void activate(unsigned);
+  void prepareNodes(const SmallVectorImpl<BlockConstraint>&);
+  void iterate(const SmallVectorImpl<unsigned>&);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/CodeGen/Spiller.cpp b/final/lib/CodeGen/Spiller.cpp
new file mode 100644
index 00000000000..fd385824aff
--- /dev/null
+++ b/final/lib/CodeGen/Spiller.cpp
@@ -0,0 +1,243 @@
+//===-- llvm/CodeGen/Spiller.cpp -  Spiller -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spiller"
+
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <set>
+
+using namespace llvm;
+
+namespace {
+  enum SpillerName { trivial, standard, inline_ };
+}
+
+static cl::opt<SpillerName>
+spillerOpt("spiller",
+           cl::desc("Spiller to use: (default: standard)"),
+           cl::Prefix,
+           cl::values(clEnumVal(trivial,   "trivial spiller"),
+                      clEnumVal(standard,  "default spiller"),
+                      clEnumValN(inline_,  "inline", "inline spiller"),
+                      clEnumValEnd),
+           cl::init(standard));
+
+// Spiller virtual destructor implementation.
+Spiller::~Spiller() {}
+
+namespace {
+
+/// Utility class for spillers.
+class SpillerBase : public Spiller {
+protected:
+  MachineFunctionPass *pass;
+  MachineFunction *mf;
+  VirtRegMap *vrm;
+  LiveIntervals *lis;
+  MachineFrameInfo *mfi;
+  MachineRegisterInfo *mri;
+  const TargetInstrInfo *tii;
+  const TargetRegisterInfo *tri;
+
+  /// Construct a spiller base.
+  SpillerBase(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm)
+    : pass(&pass), mf(&mf), vrm(&vrm)
+  {
+    lis = &pass.getAnalysis<LiveIntervals>();
+    mfi = mf.getFrameInfo();
+    mri = &mf.getRegInfo();
+    tii = mf.getTarget().getInstrInfo();
+    tri = mf.getTarget().getRegisterInfo();
+  }
+
+  /// Add spill ranges for every use/def of the live interval, inserting loads
+  /// immediately before each use, and stores after each def. No folding or
+  /// remat is attempted.
+  void trivialSpillEverywhere(LiveInterval *li,
+                              SmallVectorImpl<LiveInterval*> &newIntervals) {
+    DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
+
+    assert(li->weight != HUGE_VALF &&
+           "Attempting to spill already spilled value.");
+
+    assert(!TargetRegisterInfo::isStackSlot(li->reg) &&
+           "Trying to spill a stack slot.");
+
+    DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
+
+    const TargetRegisterClass *trc = mri->getRegClass(li->reg);
+    unsigned ss = vrm->assignVirt2StackSlot(li->reg);
+
+    // Iterate over reg uses/defs.
+    for (MachineRegisterInfo::reg_iterator
+         regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) {
+
+      // Grab the use/def instr.
+      MachineInstr *mi = &*regItr;
+
+      DEBUG(dbgs() << "  Processing " << *mi);
+
+      // Step regItr to the next use/def instr.
+      do {
+        ++regItr;
+      } while (regItr != mri->reg_end() && (&*regItr == mi));
+
+      // Collect uses & defs for this instr.
+      SmallVector<unsigned, 2> indices;
+      bool hasUse = false;
+      bool hasDef = false;
+      for (unsigned i = 0; i != mi->getNumOperands(); ++i) {
+        MachineOperand &op = mi->getOperand(i);
+        if (!op.isReg() || op.getReg() != li->reg)
+          continue;
+        hasUse |= mi->getOperand(i).isUse();
+        hasDef |= mi->getOperand(i).isDef();
+        indices.push_back(i);
+      }
+
+      // Create a new vreg & interval for this instr.
+      unsigned newVReg = mri->createVirtualRegister(trc);
+      vrm->grow();
+      vrm->assignVirt2StackSlot(newVReg, ss);
+      LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
+      newLI->weight = HUGE_VALF;
+
+      // Update the reg operands & kill flags.
+      for (unsigned i = 0; i < indices.size(); ++i) {
+        unsigned mopIdx = indices[i];
+        MachineOperand &mop = mi->getOperand(mopIdx);
+        mop.setReg(newVReg);
+        if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) {
+          mop.setIsKill(true);
+        }
+      }
+      assert(hasUse || hasDef);
+
+      // Insert reload if necessary.
+      MachineBasicBlock::iterator miItr(mi);
+      if (hasUse) {
+        tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc,
+                                  tri);
+        MachineInstr *loadInstr(prior(miItr));
+        SlotIndex loadIndex =
+          lis->InsertMachineInstrInMaps(loadInstr).getDefIndex();
+        vrm->addSpillSlotUse(ss, loadInstr);
+        SlotIndex endIndex = loadIndex.getNextIndex();
+        VNInfo *loadVNI =
+          newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator());
+        newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
+      }
+
+      // Insert store if necessary.
+      if (hasDef) {
+        tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg,
+                                 true, ss, trc, tri);
+        MachineInstr *storeInstr(llvm::next(miItr));
+        SlotIndex storeIndex =
+          lis->InsertMachineInstrInMaps(storeInstr).getDefIndex();
+        vrm->addSpillSlotUse(ss, storeInstr);
+        SlotIndex beginIndex = storeIndex.getPrevIndex();
+        VNInfo *storeVNI =
+          newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator());
+        newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
+      }
+
+      newIntervals.push_back(newLI);
+    }
+  }
+};
+
+} // end anonymous namespace
+
+namespace {
+
+/// Spills any live range using the spill-everywhere method with no attempt at
+/// folding.
+class TrivialSpiller : public SpillerBase {
+public:
+
+  TrivialSpiller(MachineFunctionPass &pass, MachineFunction &mf,
+                 VirtRegMap &vrm)
+    : SpillerBase(pass, mf, vrm) {}
+
+  void spill(LiveInterval *li,
+             SmallVectorImpl<LiveInterval*> &newIntervals,
+             const SmallVectorImpl<LiveInterval*> &) {
+    // Ignore spillIs - we don't use it.
+    trivialSpillEverywhere(li, newIntervals);
+  }
+};
+
+} // end anonymous namespace
+
+namespace {
+
+/// Falls back on LiveIntervals::addIntervalsForSpills.
+class StandardSpiller : public Spiller {
+protected:
+  MachineFunction *mf;
+  LiveIntervals *lis;
+  LiveStacks *lss;
+  MachineLoopInfo *loopInfo;
+  VirtRegMap *vrm;
+public:
+  StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf,
+                  VirtRegMap &vrm)
+    : mf(&mf),
+      lis(&pass.getAnalysis<LiveIntervals>()),
+      lss(&pass.getAnalysis<LiveStacks>()),
+      loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()),
+      vrm(&vrm) {}
+
+  /// Falls back on LiveIntervals::addIntervalsForSpills.
+  void spill(LiveInterval *li,
+             SmallVectorImpl<LiveInterval*> &newIntervals,
+             const SmallVectorImpl<LiveInterval*> &spillIs) {
+    std::vector<LiveInterval*> added =
+      lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
+    newIntervals.insert(newIntervals.end(), added.begin(), added.end());
+
+    // Update LiveStacks.
+    int SS = vrm->getStackSlot(li->reg);
+    if (SS == VirtRegMap::NO_STACK_SLOT)
+      return;
+    const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(li->reg);
+    LiveInterval &SI = lss->getOrCreateInterval(SS, RC);
+    if (!SI.hasAtLeastOneValue())
+      SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator());
+    SI.MergeRangesInAsValue(*li, SI.getValNumInfo(0));
+  }
+};
+
+} // end anonymous namespace
+
+llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
+                                   MachineFunction &mf,
+                                   VirtRegMap &vrm) {
+  switch (spillerOpt) {
+  default: assert(0 && "unknown spiller");
+  case trivial: return new TrivialSpiller(pass, mf, vrm);
+  case standard: return new StandardSpiller(pass, mf, vrm);
+  case inline_: return createInlineSpiller(pass, mf, vrm);
+  }
+}
diff --git a/final/lib/CodeGen/Spiller.h b/final/lib/CodeGen/Spiller.h
new file mode 100644
index 00000000000..f017583494e
--- /dev/null
+++ b/final/lib/CodeGen/Spiller.h
@@ -0,0 +1,56 @@
+//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLER_H
+#define LLVM_CODEGEN_SPILLER_H
+
+namespace llvm {
+
+  class LiveInterval;
+  class MachineFunction;
+  class MachineFunctionPass;
+  class SlotIndex;
+  template <typename T> class SmallVectorImpl;
+  class VirtRegMap;
+
+  /// Spiller interface.
+  ///
+  /// Implementations are utility classes which insert spill or remat code on
+  /// demand.
+  class Spiller {
+  public:
+    virtual ~Spiller() = 0;
+
+    /// spill - Spill the given live interval. The method used will depend on
+    /// the Spiller implementation selected.
+    ///
+    /// @param li            The live interval to be spilled.
+    /// @param spillIs       A list of intervals that are about to be spilled,
+    ///                      and so cannot be used for remat etc.
+    /// @param newIntervals  The newly created intervals will be appended here.
+    virtual void spill(LiveInterval *li,
+                       SmallVectorImpl<LiveInterval*> &newIntervals,
+                       const SmallVectorImpl<LiveInterval*> &spillIs) = 0;
+
+  };
+
+  /// Create and return a spiller object, as specified on the command line.
+  Spiller* createSpiller(MachineFunctionPass &pass,
+                         MachineFunction &mf,
+                         VirtRegMap &vrm);
+
+  /// Create and return a spiller that will insert spill code directly instead
+  /// of deferring though VirtRegMap.
+  Spiller *createInlineSpiller(MachineFunctionPass &pass,
+                               MachineFunction &mf,
+                               VirtRegMap &vrm);
+
+}
+
+#endif
diff --git a/final/lib/CodeGen/SplitKit.cpp b/final/lib/CodeGen/SplitKit.cpp
new file mode 100644
index 00000000000..c4ae8c474c5
--- /dev/null
+++ b/final/lib/CodeGen/SplitKit.cpp
@@ -0,0 +1,921 @@
+//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "SplitKit.h"
+#include "LiveRangeEdit.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+AllowSplit("spiller-splits-edges",
+           cl::desc("Allow critical edge splitting during spilling"));
+
+STATISTIC(NumFinished, "Number of splits finished");
+STATISTIC(NumSimple,   "Number of splits that were simple");
+
+//===----------------------------------------------------------------------===//
+//                                 Split Analysis
+//===----------------------------------------------------------------------===//
+
+SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm,
+                             const LiveIntervals &lis,
+                             const MachineLoopInfo &mli)
+  : MF(vrm.getMachineFunction()),
+    VRM(vrm),
+    LIS(lis),
+    Loops(mli),
+    TII(*MF.getTarget().getInstrInfo()),
+    CurLI(0) {}
+
+void SplitAnalysis::clear() {
+  UseSlots.clear();
+  UsingInstrs.clear();
+  UsingBlocks.clear();
+  LiveBlocks.clear();
+  CurLI = 0;
+}
+
+bool SplitAnalysis::canAnalyzeBranch(const MachineBasicBlock *MBB) {
+  MachineBasicBlock *T, *F;
+  SmallVector<MachineOperand, 4> Cond;
+  return !TII.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond);
+}
+
+/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
+void SplitAnalysis::analyzeUses() {
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(CurLI->reg),
+       E = MRI.reg_end(); I != E; ++I) {
+    MachineOperand &MO = I.getOperand();
+    if (MO.isUse() && MO.isUndef())
+      continue;
+    MachineInstr *MI = MO.getParent();
+    if (MI->isDebugValue() || !UsingInstrs.insert(MI))
+      continue;
+    UseSlots.push_back(LIS.getInstructionIndex(MI).getDefIndex());
+    MachineBasicBlock *MBB = MI->getParent();
+    UsingBlocks[MBB]++;
+  }
+  array_pod_sort(UseSlots.begin(), UseSlots.end());
+
+  // Compute per-live block info.
+  if (!calcLiveBlockInfo()) {
+    // FIXME: calcLiveBlockInfo found inconsistencies in the live range.
+    // I am looking at you, SimpleRegisterCoalescing!
+    DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");
+    const_cast<LiveIntervals&>(LIS)
+      .shrinkToUses(const_cast<LiveInterval*>(CurLI));
+    LiveBlocks.clear();
+    bool fixed = calcLiveBlockInfo();
+    (void)fixed;
+    assert(fixed && "Couldn't fix broken live interval");
+  }
+
+  DEBUG(dbgs() << "  counted "
+               << UsingInstrs.size() << " instrs, "
+               << UsingBlocks.size() << " blocks.\n");
+}
+
+/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
+/// where CurLI is live.
+bool SplitAnalysis::calcLiveBlockInfo() {
+  if (CurLI->empty())
+    return true;
+
+  LiveInterval::const_iterator LVI = CurLI->begin();
+  LiveInterval::const_iterator LVE = CurLI->end();
+
+  SmallVectorImpl<SlotIndex>::const_iterator UseI, UseE;
+  UseI = UseSlots.begin();
+  UseE = UseSlots.end();
+
+  // Loop over basic blocks where CurLI is live.
+  MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start);
+  for (;;) {
+    BlockInfo BI;
+    BI.MBB = MFI;
+    tie(BI.Start, BI.Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+    // The last split point is the latest possible insertion point that dominates
+    // all successor blocks. If interference reaches LastSplitPoint, it is not
+    // possible to insert a split or reload that makes CurLI live in the
+    // outgoing bundle.
+    MachineBasicBlock::iterator LSP = LIS.getLastSplitPoint(*CurLI, BI.MBB);
+    if (LSP == BI.MBB->end())
+      BI.LastSplitPoint = BI.Stop;
+    else
+      BI.LastSplitPoint = LIS.getInstructionIndex(LSP);
+
+    // LVI is the first live segment overlapping MBB.
+    BI.LiveIn = LVI->start <= BI.Start;
+    if (!BI.LiveIn)
+      BI.Def = LVI->start;
+
+    // Find the first and last uses in the block.
+    BI.Uses = hasUses(MFI);
+    if (BI.Uses && UseI != UseE) {
+      BI.FirstUse = *UseI;
+      assert(BI.FirstUse >= BI.Start);
+      do ++UseI;
+      while (UseI != UseE && *UseI < BI.Stop);
+      BI.LastUse = UseI[-1];
+      assert(BI.LastUse < BI.Stop);
+    }
+
+    // Look for gaps in the live range.
+    bool hasGap = false;
+    BI.LiveOut = true;
+    while (LVI->end < BI.Stop) {
+      SlotIndex LastStop = LVI->end;
+      if (++LVI == LVE || LVI->start >= BI.Stop) {
+        BI.Kill = LastStop;
+        BI.LiveOut = false;
+        break;
+      }
+      if (LastStop < LVI->start) {
+        hasGap = true;
+        BI.Kill = LastStop;
+        BI.Def = LVI->start;
+      }
+    }
+
+    // Don't set LiveThrough when the block has a gap.
+    BI.LiveThrough = !hasGap && BI.LiveIn && BI.LiveOut;
+    LiveBlocks.push_back(BI);
+
+    // FIXME: This should never happen. The live range stops or starts without a
+    // corresponding use. An earlier pass did something wrong.
+    if (!BI.LiveThrough && !BI.Uses)
+      return false;
+
+    // LVI is now at LVE or LVI->end >= Stop.
+    if (LVI == LVE)
+      break;
+
+    // Live segment ends exactly at Stop. Move to the next segment.
+    if (LVI->end == BI.Stop && ++LVI == LVE)
+      break;
+
+    // Pick the next basic block.
+    if (LVI->start < BI.Stop)
+      ++MFI;
+    else
+      MFI = LIS.getMBBFromIndex(LVI->start);
+  }
+  return true;
+}
+
+bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const {
+  unsigned OrigReg = VRM.getOriginal(CurLI->reg);
+  const LiveInterval &Orig = LIS.getInterval(OrigReg);
+  assert(!Orig.empty() && "Splitting empty interval?");
+  LiveInterval::const_iterator I = Orig.find(Idx);
+
+  // Range containing Idx should begin at Idx.
+  if (I != Orig.end() && I->start <= Idx)
+    return I->start == Idx;
+
+  // Range does not contain Idx, previous must end at Idx.
+  return I != Orig.begin() && (--I)->end == Idx;
+}
+
+void SplitAnalysis::print(const BlockPtrSet &B, raw_ostream &OS) const {
+  for (BlockPtrSet::const_iterator I = B.begin(), E = B.end(); I != E; ++I) {
+    unsigned count = UsingBlocks.lookup(*I);
+    OS << " BB#" << (*I)->getNumber();
+    if (count)
+      OS << '(' << count << ')';
+  }
+}
+
+void SplitAnalysis::analyze(const LiveInterval *li) {
+  clear();
+  CurLI = li;
+  analyzeUses();
+}
+
+
+//===----------------------------------------------------------------------===//
+//                               Split Editor
+//===----------------------------------------------------------------------===//
+
+/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+SplitEditor::SplitEditor(SplitAnalysis &sa,
+                         LiveIntervals &lis,
+                         VirtRegMap &vrm,
+                         MachineDominatorTree &mdt)
+  : SA(sa), LIS(lis), VRM(vrm),
+    MRI(vrm.getMachineFunction().getRegInfo()),
+    MDT(mdt),
+    TII(*vrm.getMachineFunction().getTarget().getInstrInfo()),
+    TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
+    Edit(0),
+    OpenIdx(0),
+    RegAssign(Allocator)
+{}
+
+void SplitEditor::reset(LiveRangeEdit &lre) {
+  Edit = &lre;
+  OpenIdx = 0;
+  RegAssign.clear();
+  Values.clear();
+
+  // We don't need to clear LiveOutCache, only LiveOutSeen entries are read.
+  LiveOutSeen.clear();
+
+  // We don't need an AliasAnalysis since we will only be performing
+  // cheap-as-a-copy remats anyway.
+  Edit->anyRematerializable(LIS, TII, 0);
+}
+
+void SplitEditor::dump() const {
+  if (RegAssign.empty()) {
+    dbgs() << " empty\n";
+    return;
+  }
+
+  for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I)
+    dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value();
+  dbgs() << '\n';
+}
+
+VNInfo *SplitEditor::defValue(unsigned RegIdx,
+                              const VNInfo *ParentVNI,
+                              SlotIndex Idx) {
+  assert(ParentVNI && "Mapping  NULL value");
+  assert(Idx.isValid() && "Invalid SlotIndex");
+  assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI");
+  LiveInterval *LI = Edit->get(RegIdx);
+
+  // Create a new value.
+  VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+
+  // Preserve the PHIDef bit.
+  if (ParentVNI->isPHIDef() && Idx == ParentVNI->def)
+    VNI->setIsPHIDef(true);
+
+  // Use insert for lookup, so we can add missing values with a second lookup.
+  std::pair<ValueMap::iterator, bool> InsP =
+    Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), VNI));
+
+  // This was the first time (RegIdx, ParentVNI) was mapped.
+  // Keep it as a simple def without any liveness.
+  if (InsP.second)
+    return VNI;
+
+  // If the previous value was a simple mapping, add liveness for it now.
+  if (VNInfo *OldVNI = InsP.first->second) {
+    SlotIndex Def = OldVNI->def;
+    LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI));
+    // No longer a simple mapping.
+    InsP.first->second = 0;
+  }
+
+  // This is a complex mapping, add liveness for VNI
+  SlotIndex Def = VNI->def;
+  LI->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+
+  return VNI;
+}
+
+void SplitEditor::markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI) {
+  assert(ParentVNI && "Mapping  NULL value");
+  VNInfo *&VNI = Values[std::make_pair(RegIdx, ParentVNI->id)];
+
+  // ParentVNI was either unmapped or already complex mapped. Either way.
+  if (!VNI)
+    return;
+
+  // This was previously a single mapping. Make sure the old def is represented
+  // by a trivial live range.
+  SlotIndex Def = VNI->def;
+  Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+  VNI = 0;
+}
+
+// extendRange - Extend the live range to reach Idx.
+// Potentially create phi-def values.
+void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) {
+  assert(Idx.isValid() && "Invalid SlotIndex");
+  MachineBasicBlock *IdxMBB = LIS.getMBBFromIndex(Idx);
+  assert(IdxMBB && "No MBB at Idx");
+  LiveInterval *LI = Edit->get(RegIdx);
+
+  // Is there a def in the same MBB we can extend?
+  if (LI->extendInBlock(LIS.getMBBStartIdx(IdxMBB), Idx))
+    return;
+
+  // Now for the fun part. We know that ParentVNI potentially has multiple defs,
+  // and we may need to create even more phi-defs to preserve VNInfo SSA form.
+  // Perform a search for all predecessor blocks where we know the dominating
+  // VNInfo. Insert phi-def VNInfos along the path back to IdxMBB.
+
+  // Initialize the live-out cache the first time it is needed.
+  if (LiveOutSeen.empty()) {
+    unsigned N = VRM.getMachineFunction().getNumBlockIDs();
+    LiveOutSeen.resize(N);
+    LiveOutCache.resize(N);
+  }
+
+  // Blocks where LI should be live-in.
+  SmallVector<MachineDomTreeNode*, 16> LiveIn;
+  LiveIn.push_back(MDT[IdxMBB]);
+
+  // Remember if we have seen more than one value.
+  bool UniqueVNI = true;
+  VNInfo *IdxVNI = 0;
+
+  // Using LiveOutCache as a visited set, perform a BFS for all reaching defs.
+  for (unsigned i = 0; i != LiveIn.size(); ++i) {
+    MachineBasicBlock *MBB = LiveIn[i]->getBlock();
+    for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+           PE = MBB->pred_end(); PI != PE; ++PI) {
+       MachineBasicBlock *Pred = *PI;
+       LiveOutPair &LOP = LiveOutCache[Pred];
+
+       // Is this a known live-out block?
+       if (LiveOutSeen.test(Pred->getNumber())) {
+         if (VNInfo *VNI = LOP.first) {
+           if (IdxVNI && IdxVNI != VNI)
+             UniqueVNI = false;
+           IdxVNI = VNI;
+         }
+         continue;
+       }
+
+       // First time. LOP is garbage and must be cleared below.
+       LiveOutSeen.set(Pred->getNumber());
+
+       // Does Pred provide a live-out value?
+       SlotIndex Start, Last;
+       tie(Start, Last) = LIS.getSlotIndexes()->getMBBRange(Pred);
+       Last = Last.getPrevSlot();
+       VNInfo *VNI = LI->extendInBlock(Start, Last);
+       LOP.first = VNI;
+       if (VNI) {
+         LOP.second = MDT[LIS.getMBBFromIndex(VNI->def)];
+         if (IdxVNI && IdxVNI != VNI)
+           UniqueVNI = false;
+         IdxVNI = VNI;
+         continue;
+       }
+       LOP.second = 0;
+
+       // No, we need a live-in value for Pred as well
+       if (Pred != IdxMBB)
+         LiveIn.push_back(MDT[Pred]);
+       else
+         UniqueVNI = false; // Loopback to IdxMBB, ask updateSSA() for help.
+    }
+  }
+
+  // We may need to add phi-def values to preserve the SSA form.
+  if (UniqueVNI) {
+    LiveOutPair LOP(IdxVNI, MDT[LIS.getMBBFromIndex(IdxVNI->def)]);
+    // Update LiveOutCache, but skip IdxMBB at LiveIn[0].
+    for (unsigned i = 1, e = LiveIn.size(); i != e; ++i)
+      LiveOutCache[LiveIn[i]->getBlock()] = LOP;
+  } else
+    IdxVNI = updateSSA(RegIdx, LiveIn, Idx, IdxMBB);
+
+  // Since we went through the trouble of a full BFS visiting all reaching defs,
+  // the values in LiveIn are now accurate. No more phi-defs are needed
+  // for these blocks, so we can color the live ranges.
+  for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = LiveIn[i]->getBlock();
+    SlotIndex Start = LIS.getMBBStartIdx(MBB);
+    VNInfo *VNI = LiveOutCache[MBB].first;
+
+    // Anything in LiveIn other than IdxMBB is live-through.
+    // In IdxMBB, we should stop at Idx unless the same value is live-out.
+    if (MBB == IdxMBB && IdxVNI != VNI)
+      LI->addRange(LiveRange(Start, Idx.getNextSlot(), IdxVNI));
+    else
+      LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
+  }
+}
+
+VNInfo *SplitEditor::updateSSA(unsigned RegIdx,
+                               SmallVectorImpl<MachineDomTreeNode*> &LiveIn,
+                               SlotIndex Idx,
+                               const MachineBasicBlock *IdxMBB) {
+  // This is essentially the same iterative algorithm that SSAUpdater uses,
+  // except we already have a dominator tree, so we don't have to recompute it.
+  LiveInterval *LI = Edit->get(RegIdx);
+  VNInfo *IdxVNI = 0;
+  unsigned Changes;
+  do {
+    Changes = 0;
+    // Propagate live-out values down the dominator tree, inserting phi-defs
+    // when necessary. Since LiveIn was created by a BFS, going backwards makes
+    // it more likely for us to visit immediate dominators before their
+    // children.
+    for (unsigned i = LiveIn.size(); i; --i) {
+      MachineDomTreeNode *Node = LiveIn[i-1];
+      MachineBasicBlock *MBB = Node->getBlock();
+      MachineDomTreeNode *IDom = Node->getIDom();
+      LiveOutPair IDomValue;
+
+      // We need a live-in value to a block with no immediate dominator?
+      // This is probably an unreachable block that has survived somehow.
+      bool needPHI = !IDom || !LiveOutSeen.test(IDom->getBlock()->getNumber());
+
+      // IDom dominates all of our predecessors, but it may not be the immediate
+      // dominator. Check if any of them have live-out values that are properly
+      // dominated by IDom. If so, we need a phi-def here.
+      if (!needPHI) {
+        IDomValue = LiveOutCache[IDom->getBlock()];
+        for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+               PE = MBB->pred_end(); PI != PE; ++PI) {
+          LiveOutPair Value = LiveOutCache[*PI];
+          if (!Value.first || Value.first == IDomValue.first)
+            continue;
+          // This predecessor is carrying something other than IDomValue.
+          // It could be because IDomValue hasn't propagated yet, or it could be
+          // because MBB is in the dominance frontier of that value.
+          if (MDT.dominates(IDom, Value.second)) {
+            needPHI = true;
+            break;
+          }
+        }
+      }
+
+      // Create a phi-def if required.
+      if (needPHI) {
+        ++Changes;
+        SlotIndex Start = LIS.getMBBStartIdx(MBB);
+        VNInfo *VNI = LI->getNextValue(Start, 0, LIS.getVNInfoAllocator());
+        VNI->setIsPHIDef(true);
+        // We no longer need LI to be live-in.
+        LiveIn.erase(LiveIn.begin()+(i-1));
+        // Blocks in LiveIn are either IdxMBB, or have a value live-through.
+        if (MBB == IdxMBB)
+          IdxVNI = VNI;
+        // Check if we need to update live-out info.
+        LiveOutPair &LOP = LiveOutCache[MBB];
+        if (LOP.second == Node || !LiveOutSeen.test(MBB->getNumber())) {
+          // We already have a live-out defined in MBB, so this must be IdxMBB.
+          assert(MBB == IdxMBB && "Adding phi-def to known live-out");
+          LI->addRange(LiveRange(Start, Idx.getNextSlot(), VNI));
+        } else {
+          // This phi-def is also live-out, so color the whole block.
+          LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
+          LOP = LiveOutPair(VNI, Node);
+        }
+      } else if (IDomValue.first) {
+        // No phi-def here. Remember incoming value for IdxMBB.
+        if (MBB == IdxMBB) {
+          IdxVNI = IDomValue.first;
+          // IdxMBB need not be live-out.
+          if (!LiveOutSeen.test(MBB->getNumber()))
+            continue;
+        }
+        assert(LiveOutSeen.test(MBB->getNumber()) && "Expected live-out block");
+        // Propagate IDomValue if needed:
+        // MBB is live-out and doesn't define its own value.
+        LiveOutPair &LOP = LiveOutCache[MBB];
+        if (LOP.second != Node && LOP.first != IDomValue.first) {
+          ++Changes;
+          LOP = IDomValue;
+        }
+      }
+    }
+  } while (Changes);
+
+  assert(IdxVNI && "Didn't find value for Idx");
+  return IdxVNI;
+}
+
+VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
+                                   VNInfo *ParentVNI,
+                                   SlotIndex UseIdx,
+                                   MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I) {
+  MachineInstr *CopyMI = 0;
+  SlotIndex Def;
+  LiveInterval *LI = Edit->get(RegIdx);
+
+  // Attempt cheap-as-a-copy rematerialization.
+  LiveRangeEdit::Remat RM(ParentVNI);
+  if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) {
+    Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI);
+  } else {
+    // Can't remat, just insert a copy from parent.
+    CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
+               .addReg(Edit->getReg());
+    Def = LIS.InsertMachineInstrInMaps(CopyMI).getDefIndex();
+  }
+
+  // Define the value in Reg.
+  VNInfo *VNI = defValue(RegIdx, ParentVNI, Def);
+  VNI->setCopy(CopyMI);
+  return VNI;
+}
+
+/// Create a new virtual register and live interval.
+void SplitEditor::openIntv() {
+  assert(!OpenIdx && "Previous LI not closed before openIntv");
+
+  // Create the complement as index 0.
+  if (Edit->empty())
+    Edit->create(MRI, LIS, VRM);
+
+  // Create the open interval.
+  OpenIdx = Edit->size();
+  Edit->create(MRI, LIS, VRM);
+}
+
+SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
+  assert(OpenIdx && "openIntv not called before enterIntvBefore");
+  DEBUG(dbgs() << "    enterIntvBefore " << Idx);
+  Idx = Idx.getBaseIndex();
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Idx;
+  }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+  MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+  assert(MI && "enterIntvBefore called with invalid index");
+
+  VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), MI);
+  return VNI->def;
+}
+
+SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
+  assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
+  SlotIndex End = LIS.getMBBEndIdx(&MBB);
+  SlotIndex Last = End.getPrevSlot();
+  DEBUG(dbgs() << "    enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last);
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return End;
+  }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id);
+  VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
+                              LIS.getLastSplitPoint(Edit->getParent(), &MBB));
+  RegAssign.insert(VNI->def, End, OpenIdx);
+  DEBUG(dump());
+  return VNI->def;
+}
+
+/// useIntv - indicate that all instructions in MBB should use OpenLI.
+void SplitEditor::useIntv(const MachineBasicBlock &MBB) {
+  useIntv(LIS.getMBBStartIdx(&MBB), LIS.getMBBEndIdx(&MBB));
+}
+
+void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) {
+  assert(OpenIdx && "openIntv not called before useIntv");
+  DEBUG(dbgs() << "    useIntv [" << Start << ';' << End << "):");
+  RegAssign.insert(Start, End, OpenIdx);
+  DEBUG(dump());
+}
+
+SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
+  assert(OpenIdx && "openIntv not called before leaveIntvAfter");
+  DEBUG(dbgs() << "    leaveIntvAfter " << Idx);
+
+  // The interval must be live beyond the instruction at Idx.
+  Idx = Idx.getBoundaryIndex();
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Idx.getNextSlot();
+  }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+
+  MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+  assert(MI && "No instruction at index");
+  VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(),
+                              llvm::next(MachineBasicBlock::iterator(MI)));
+  return VNI->def;
+}
+
+SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
+  assert(OpenIdx && "openIntv not called before leaveIntvBefore");
+  DEBUG(dbgs() << "    leaveIntvBefore " << Idx);
+
+  // The interval must be live into the instruction at Idx.
+  Idx = Idx.getBoundaryIndex();
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Idx.getNextSlot();
+  }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+
+  MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+  assert(MI && "No instruction at index");
+  VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+  return VNI->def;
+}
+
+SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
+  assert(OpenIdx && "openIntv not called before leaveIntvAtTop");
+  SlotIndex Start = LIS.getMBBStartIdx(&MBB);
+  DEBUG(dbgs() << "    leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start);
+
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Start;
+  }
+
+  VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
+                              MBB.SkipPHIsAndLabels(MBB.begin()));
+  RegAssign.insert(Start, VNI->def, OpenIdx);
+  DEBUG(dump());
+  return VNI->def;
+}
+
+void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
+  assert(OpenIdx && "openIntv not called before overlapIntv");
+  const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start);
+  assert(ParentVNI == Edit->getParent().getVNInfoAt(End.getPrevSlot()) &&
+         "Parent changes value in extended range");
+  assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
+         "Range cannot span basic blocks");
+
+  // The complement interval will be extended as needed by extendRange().
+  markComplexMapped(0, ParentVNI);
+  DEBUG(dbgs() << "    overlapIntv [" << Start << ';' << End << "):");
+  RegAssign.insert(Start, End, OpenIdx);
+  DEBUG(dump());
+}
+
+/// closeIntv - Indicate that we are done editing the currently open
+/// LiveInterval, and ranges can be trimmed.
+void SplitEditor::closeIntv() {
+  assert(OpenIdx && "openIntv not called before closeIntv");
+  OpenIdx = 0;
+}
+
+/// transferSimpleValues - Transfer all simply defined values to the new live
+/// ranges.
+/// Values that were rematerialized or that have multiple defs are left alone.
+bool SplitEditor::transferSimpleValues() {
+  bool Skipped = false;
+  RegAssignMap::const_iterator AssignI = RegAssign.begin();
+  for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(),
+         ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) {
+    DEBUG(dbgs() << "  blit " << *ParentI << ':');
+    VNInfo *ParentVNI = ParentI->valno;
+    // RegAssign has holes where RegIdx 0 should be used.
+    SlotIndex Start = ParentI->start;
+    AssignI.advanceTo(Start);
+    do {
+      unsigned RegIdx;
+      SlotIndex End = ParentI->end;
+      if (!AssignI.valid()) {
+        RegIdx = 0;
+      } else if (AssignI.start() <= Start) {
+        RegIdx = AssignI.value();
+        if (AssignI.stop() < End) {
+          End = AssignI.stop();
+          ++AssignI;
+        }
+      } else {
+        RegIdx = 0;
+        End = std::min(End, AssignI.start());
+      }
+      DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx);
+      if (VNInfo *VNI = Values.lookup(std::make_pair(RegIdx, ParentVNI->id))) {
+        DEBUG(dbgs() << ':' << VNI->id);
+        Edit->get(RegIdx)->addRange(LiveRange(Start, End, VNI));
+      } else
+        Skipped = true;
+      Start = End;
+    } while (Start != ParentI->end);
+    DEBUG(dbgs() << '\n');
+  }
+  return Skipped;
+}
+
+void SplitEditor::extendPHIKillRanges() {
+    // Extend live ranges to be live-out for successor PHI values.
+  for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(),
+       E = Edit->getParent().vni_end(); I != E; ++I) {
+    const VNInfo *PHIVNI = *I;
+    if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
+      continue;
+    unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
+    MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
+    for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+         PE = MBB->pred_end(); PI != PE; ++PI) {
+      SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot();
+      // The predecessor may not have a live-out value. That is OK, like an
+      // undef PHI operand.
+      if (Edit->getParent().liveAt(End)) {
+        assert(RegAssign.lookup(End) == RegIdx &&
+               "Different register assignment in phi predecessor");
+        extendRange(RegIdx, End);
+      }
+    }
+  }
+}
+
+/// rewriteAssigned - Rewrite all uses of Edit->getReg().
+void SplitEditor::rewriteAssigned(bool ExtendRanges) {
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()),
+       RE = MRI.reg_end(); RI != RE;) {
+    MachineOperand &MO = RI.getOperand();
+    MachineInstr *MI = MO.getParent();
+    ++RI;
+    // LiveDebugVariables should have handled all DBG_VALUE instructions.
+    if (MI->isDebugValue()) {
+      DEBUG(dbgs() << "Zapping " << *MI);
+      MO.setReg(0);
+      continue;
+    }
+
+    // <undef> operands don't really read the register, so just assign them to
+    // the complement.
+    if (MO.isUse() && MO.isUndef()) {
+      MO.setReg(Edit->get(0)->reg);
+      continue;
+    }
+
+    SlotIndex Idx = LIS.getInstructionIndex(MI);
+    Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+
+    // Rewrite to the mapped register at Idx.
+    unsigned RegIdx = RegAssign.lookup(Idx);
+    MO.setReg(Edit->get(RegIdx)->reg);
+    DEBUG(dbgs() << "  rewr BB#" << MI->getParent()->getNumber() << '\t'
+                 << Idx << ':' << RegIdx << '\t' << *MI);
+
+    // Extend liveness to Idx.
+    if (ExtendRanges)
+      extendRange(RegIdx, Idx);
+  }
+}
+
+/// rewriteSplit - Rewrite uses of Intvs[0] according to the ConEQ mapping.
+void SplitEditor::rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs,
+                                    const ConnectedVNInfoEqClasses &ConEq) {
+  for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Intvs[0]->reg),
+       RE = MRI.reg_end(); RI != RE;) {
+    MachineOperand &MO = RI.getOperand();
+    MachineInstr *MI = MO.getParent();
+    ++RI;
+    if (MO.isUse() && MO.isUndef())
+      continue;
+    // DBG_VALUE instructions should have been eliminated earlier.
+    SlotIndex Idx = LIS.getInstructionIndex(MI);
+    Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+    DEBUG(dbgs() << "  rewr BB#" << MI->getParent()->getNumber() << '\t'
+                 << Idx << ':');
+    const VNInfo *VNI = Intvs[0]->getVNInfoAt(Idx);
+    assert(VNI && "Interval not live at use.");
+    MO.setReg(Intvs[ConEq.getEqClass(VNI)]->reg);
+    DEBUG(dbgs() << VNI->id << '\t' << *MI);
+  }
+}
+
+void SplitEditor::finish() {
+  assert(OpenIdx == 0 && "Previous LI not closed before rewrite");
+  ++NumFinished;
+
+  // At this point, the live intervals in Edit contain VNInfos corresponding to
+  // the inserted copies.
+
+  // Add the original defs from the parent interval.
+  for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(),
+         E = Edit->getParent().vni_end(); I != E; ++I) {
+    const VNInfo *ParentVNI = *I;
+    if (ParentVNI->isUnused())
+      continue;
+    unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
+    defValue(RegIdx, ParentVNI, ParentVNI->def);
+    // Mark rematted values as complex everywhere to force liveness computation.
+    // The new live ranges may be truncated.
+    if (Edit->didRematerialize(ParentVNI))
+      for (unsigned i = 0, e = Edit->size(); i != e; ++i)
+        markComplexMapped(i, ParentVNI);
+  }
+
+#ifndef NDEBUG
+  // Every new interval must have a def by now, otherwise the split is bogus.
+  for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I)
+    assert((*I)->hasAtLeastOneValue() && "Split interval has no value");
+#endif
+
+  // Transfer the simply mapped values, check if any are complex.
+  bool Complex = transferSimpleValues();
+  if (Complex)
+    extendPHIKillRanges();
+  else
+    ++NumSimple;
+
+  // Rewrite virtual registers, possibly extending ranges.
+  rewriteAssigned(Complex);
+
+  // FIXME: Delete defs that were rematted everywhere.
+
+  // Get rid of unused values and set phi-kill flags.
+  for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I)
+    (*I)->RenumberValues(LIS);
+
+  // Now check if any registers were separated into multiple components.
+  ConnectedVNInfoEqClasses ConEQ(LIS);
+  for (unsigned i = 0, e = Edit->size(); i != e; ++i) {
+    // Don't use iterators, they are invalidated by create() below.
+    LiveInterval *li = Edit->get(i);
+    unsigned NumComp = ConEQ.Classify(li);
+    if (NumComp <= 1)
+      continue;
+    DEBUG(dbgs() << "  " << NumComp << " components: " << *li << '\n');
+    SmallVector<LiveInterval*, 8> dups;
+    dups.push_back(li);
+    for (unsigned i = 1; i != NumComp; ++i)
+      dups.push_back(&Edit->create(MRI, LIS, VRM));
+    rewriteComponents(dups, ConEQ);
+    ConEQ.Distribute(&dups[0]);
+  }
+
+  // Calculate spill weight and allocation hints for new intervals.
+  VirtRegAuxInfo vrai(VRM.getMachineFunction(), LIS, SA.Loops);
+  for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){
+    LiveInterval &li = **I;
+    vrai.CalculateRegClass(li.reg);
+    vrai.CalculateWeightAndHint(li);
+    DEBUG(dbgs() << "  new interval " << MRI.getRegClass(li.reg)->getName()
+                 << ":" << li << '\n');
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                            Single Block Splitting
+//===----------------------------------------------------------------------===//
+
+/// getMultiUseBlocks - if CurLI has more than one use in a basic block, it
+/// may be an advantage to split CurLI for the duration of the block.
+bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) {
+  // If CurLI is local to one block, there is no point to splitting it.
+  if (LiveBlocks.size() <= 1)
+    return false;
+  // Add blocks with multiple uses.
+  for (unsigned i = 0, e = LiveBlocks.size(); i != e; ++i) {
+    const BlockInfo &BI = LiveBlocks[i];
+    if (!BI.Uses)
+      continue;
+    unsigned Instrs = UsingBlocks.lookup(BI.MBB);
+    if (Instrs <= 1)
+      continue;
+    if (Instrs == 2 && BI.LiveIn && BI.LiveOut && !BI.LiveThrough)
+      continue;
+    Blocks.insert(BI.MBB);
+  }
+  return !Blocks.empty();
+}
+
+/// splitSingleBlocks - Split CurLI into a separate live interval inside each
+/// basic block in Blocks.
+void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
+  DEBUG(dbgs() << "  splitSingleBlocks for " << Blocks.size() << " blocks.\n");
+
+  for (unsigned i = 0, e = SA.LiveBlocks.size(); i != e; ++i) {
+    const SplitAnalysis::BlockInfo &BI = SA.LiveBlocks[i];
+    if (!BI.Uses || !Blocks.count(BI.MBB))
+      continue;
+
+    openIntv();
+    SlotIndex SegStart = enterIntvBefore(BI.FirstUse);
+    if (!BI.LiveOut || BI.LastUse < BI.LastSplitPoint) {
+      useIntv(SegStart, leaveIntvAfter(BI.LastUse));
+    } else {
+      // The last use is after the last valid split point.
+      SlotIndex SegStop = leaveIntvBefore(BI.LastSplitPoint);
+      useIntv(SegStart, SegStop);
+      overlapIntv(SegStop, BI.LastUse);
+    }
+    closeIntv();
+  }
+  finish();
+}
diff --git a/final/lib/CodeGen/SplitKit.h b/final/lib/CodeGen/SplitKit.h
new file mode 100644
index 00000000000..0e35df0ed6c
--- /dev/null
+++ b/final/lib/CodeGen/SplitKit.h
@@ -0,0 +1,346 @@
+//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SplitAnalysis class as well as mutator functions for
+// live range splitting.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+namespace llvm {
+
+class ConnectedVNInfoEqClasses;
+class LiveInterval;
+class LiveIntervals;
+class LiveRangeEdit;
+class MachineInstr;
+class MachineLoopInfo;
+class MachineRegisterInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+class VirtRegMap;
+class VNInfo;
+class raw_ostream;
+
+/// At some point we should just include MachineDominators.h:
+class MachineDominatorTree;
+template <class NodeT> class DomTreeNodeBase;
+typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
+
+
+/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
+/// opportunities.
+class SplitAnalysis {
+public:
+  const MachineFunction &MF;
+  const VirtRegMap &VRM;
+  const LiveIntervals &LIS;
+  const MachineLoopInfo &Loops;
+  const TargetInstrInfo &TII;
+
+  // Instructions using the the current register.
+  typedef SmallPtrSet<const MachineInstr*, 16> InstrPtrSet;
+  InstrPtrSet UsingInstrs;
+
+  // Sorted slot indexes of using instructions.
+  SmallVector<SlotIndex, 8> UseSlots;
+
+  // The number of instructions using CurLI in each basic block.
+  typedef DenseMap<const MachineBasicBlock*, unsigned> BlockCountMap;
+  BlockCountMap UsingBlocks;
+
+  /// Additional information about basic blocks where the current variable is
+  /// live. Such a block will look like one of these templates:
+  ///
+  ///  1. |   o---x   | Internal to block. Variable is only live in this block.
+  ///  2. |---x       | Live-in, kill.
+  ///  3. |       o---| Def, live-out.
+  ///  4. |---x   o---| Live-in, kill, def, live-out.
+  ///  5. |---o---o---| Live-through with uses or defs.
+  ///  6. |-----------| Live-through without uses. Transparent.
+  ///
+  struct BlockInfo {
+    MachineBasicBlock *MBB;
+    SlotIndex Start;      ///< Beginining of block.
+    SlotIndex Stop;       ///< End of block.
+    SlotIndex FirstUse;   ///< First instr using current reg.
+    SlotIndex LastUse;    ///< Last instr using current reg.
+    SlotIndex Kill;       ///< Interval end point inside block.
+    SlotIndex Def;        ///< Interval start point inside block.
+    /// Last possible point for splitting live ranges.
+    SlotIndex LastSplitPoint;
+    bool Uses;            ///< Current reg has uses or defs in block.
+    bool LiveThrough;     ///< Live in whole block (Templ 5. or 6. above).
+    bool LiveIn;          ///< Current reg is live in.
+    bool LiveOut;         ///< Current reg is live out.
+
+    // Per-interference pattern scratch data.
+    bool OverlapEntry;    ///< Interference overlaps entering interval.
+    bool OverlapExit;     ///< Interference overlaps exiting interval.
+  };
+
+  /// Basic blocks where var is live. This array is parallel to
+  /// SpillConstraints.
+  SmallVector<BlockInfo, 8> LiveBlocks;
+
+private:
+  // Current live interval.
+  const LiveInterval *CurLI;
+
+  // Sumarize statistics by counting instructions using CurLI.
+  void analyzeUses();
+
+  /// calcLiveBlockInfo - Compute per-block information about CurLI.
+  bool calcLiveBlockInfo();
+
+  /// canAnalyzeBranch - Return true if MBB ends in a branch that can be
+  /// analyzed.
+  bool canAnalyzeBranch(const MachineBasicBlock *MBB);
+
+public:
+  SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
+                const MachineLoopInfo &mli);
+
+  /// analyze - set CurLI to the specified interval, and analyze how it may be
+  /// split.
+  void analyze(const LiveInterval *li);
+
+  /// clear - clear all data structures so SplitAnalysis is ready to analyze a
+  /// new interval.
+  void clear();
+
+  /// getParent - Return the last analyzed interval.
+  const LiveInterval &getParent() const { return *CurLI; }
+
+  /// hasUses - Return true if MBB has any uses of CurLI.
+  bool hasUses(const MachineBasicBlock *MBB) const {
+    return UsingBlocks.lookup(MBB);
+  }
+
+  /// isOriginalEndpoint - Return true if the original live range was killed or
+  /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def,
+  /// and 'use' for an early-clobber def.
+  /// This can be used to recognize code inserted by earlier live range
+  /// splitting.
+  bool isOriginalEndpoint(SlotIndex Idx) const;
+
+  typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
+
+  // Print a set of blocks with use counts.
+  void print(const BlockPtrSet&, raw_ostream&) const;
+
+  /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from
+  /// having CurLI split to a new live interval. Return true if Blocks can be
+  /// passed to SplitEditor::splitSingleBlocks.
+  bool getMultiUseBlocks(BlockPtrSet &Blocks);
+};
+
+
+/// SplitEditor - Edit machine code and LiveIntervals for live range
+/// splitting.
+///
+/// - Create a SplitEditor from a SplitAnalysis.
+/// - Start a new live interval with openIntv.
+/// - Mark the places where the new interval is entered using enterIntv*
+/// - Mark the ranges where the new interval is used with useIntv* 
+/// - Mark the places where the interval is exited with exitIntv*.
+/// - Finish the current interval with closeIntv and repeat from 2.
+/// - Rewrite instructions with finish().
+///
+class SplitEditor {
+  SplitAnalysis &SA;
+  LiveIntervals &LIS;
+  VirtRegMap &VRM;
+  MachineRegisterInfo &MRI;
+  MachineDominatorTree &MDT;
+  const TargetInstrInfo &TII;
+  const TargetRegisterInfo &TRI;
+
+  /// Edit - The current parent register and new intervals created.
+  LiveRangeEdit *Edit;
+
+  /// Index into Edit of the currently open interval.
+  /// The index 0 is used for the complement, so the first interval started by
+  /// openIntv will be 1.
+  unsigned OpenIdx;
+
+  typedef IntervalMap<SlotIndex, unsigned> RegAssignMap;
+
+  /// Allocator for the interval map. This will eventually be shared with
+  /// SlotIndexes and LiveIntervals.
+  RegAssignMap::Allocator Allocator;
+
+  /// RegAssign - Map of the assigned register indexes.
+  /// Edit.get(RegAssign.lookup(Idx)) is the register that should be live at
+  /// Idx.
+  RegAssignMap RegAssign;
+
+  typedef DenseMap<std::pair<unsigned, unsigned>, VNInfo*> ValueMap;
+
+  /// Values - keep track of the mapping from parent values to values in the new
+  /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains:
+  ///
+  /// 1. No entry - the value is not mapped to Edit.get(RegIdx).
+  /// 2. Null - the value is mapped to multiple values in Edit.get(RegIdx).
+  ///    Each value is represented by a minimal live range at its def.
+  /// 3. A non-null VNInfo - the value is mapped to a single new value.
+  ///    The new value has no live ranges anywhere.
+  ValueMap Values;
+
+  typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
+  typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap;
+
+  // LiveOutCache - Map each basic block where a new register is live out to the
+  // live-out value and its defining block.
+  // One of these conditions shall be true:
+  //
+  //  1. !LiveOutCache.count(MBB)
+  //  2. LiveOutCache[MBB].second.getNode() == MBB
+  //  3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB]
+  //
+  // This is only a cache, the values can be computed as:
+  //
+  //  VNI = Edit.get(RegIdx)->getVNInfoAt(LIS.getMBBEndIdx(MBB))
+  //  Node = mbt_[LIS.getMBBFromIndex(VNI->def)]
+  //
+  // The cache is also used as a visited set by extendRange(). It can be shared
+  // by all the new registers because at most one is live out of each block.
+  LiveOutMap LiveOutCache;
+
+  // LiveOutSeen - Indexed by MBB->getNumber(), a bit is set for each valid
+  // entry in LiveOutCache.
+  BitVector LiveOutSeen;
+
+  /// defValue - define a value in RegIdx from ParentVNI at Idx.
+  /// Idx does not have to be ParentVNI->def, but it must be contained within
+  /// ParentVNI's live range in ParentLI. The new value is added to the value
+  /// map.
+  /// Return the new LI value.
+  VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx);
+
+  /// markComplexMapped - Mark ParentVNI as complex mapped in RegIdx regardless
+  /// of the number of defs.
+  void markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI);
+
+  /// defFromParent - Define Reg from ParentVNI at UseIdx using either
+  /// rematerialization or a COPY from parent. Return the new value.
+  VNInfo *defFromParent(unsigned RegIdx,
+                        VNInfo *ParentVNI,
+                        SlotIndex UseIdx,
+                        MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator I);
+
+  /// extendRange - Extend the live range of Edit.get(RegIdx) so it reaches Idx.
+  /// Insert PHIDefs as needed to preserve SSA form.
+  void extendRange(unsigned RegIdx, SlotIndex Idx);
+
+  /// updateSSA - Insert PHIDefs as necessary and update LiveOutCache such that
+  /// Edit.get(RegIdx) is live-in to all the blocks in LiveIn.
+  /// Return the value that is eventually live-in to IdxMBB.
+  VNInfo *updateSSA(unsigned RegIdx,
+                    SmallVectorImpl<MachineDomTreeNode*> &LiveIn,
+                    SlotIndex Idx,
+                    const MachineBasicBlock *IdxMBB);
+
+  /// transferSimpleValues - Transfer simply defined values to the new ranges.
+  /// Return true if any complex ranges were skipped.
+  bool transferSimpleValues();
+
+  /// extendPHIKillRanges - Extend the ranges of all values killed by original
+  /// parent PHIDefs.
+  void extendPHIKillRanges();
+
+  /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers.
+  void rewriteAssigned(bool ExtendRanges);
+
+  /// rewriteComponents - Rewrite all uses of Intv[0] according to the eq
+  /// classes in ConEQ.
+  /// This must be done when Intvs[0] is styill live at all uses, before calling
+  /// ConEq.Distribute().
+  void rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs,
+                         const ConnectedVNInfoEqClasses &ConEq);
+
+public:
+  /// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
+  /// Newly created intervals will be appended to newIntervals.
+  SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&,
+              MachineDominatorTree&);
+
+  /// reset - Prepare for a new split.
+  void reset(LiveRangeEdit&);
+
+  /// Create a new virtual register and live interval.
+  void openIntv();
+
+  /// enterIntvBefore - Enter the open interval before the instruction at Idx.
+  /// If the parent interval is not live before Idx, a COPY is not inserted.
+  /// Return the beginning of the new live range.
+  SlotIndex enterIntvBefore(SlotIndex Idx);
+
+  /// enterIntvAtEnd - Enter the open interval at the end of MBB.
+  /// Use the open interval from he inserted copy to the MBB end.
+  /// Return the beginning of the new live range.
+  SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB);
+
+  /// useIntv - indicate that all instructions in MBB should use OpenLI.
+  void useIntv(const MachineBasicBlock &MBB);
+
+  /// useIntv - indicate that all instructions in range should use OpenLI.
+  void useIntv(SlotIndex Start, SlotIndex End);
+
+  /// leaveIntvAfter - Leave the open interval after the instruction at Idx.
+  /// Return the end of the live range.
+  SlotIndex leaveIntvAfter(SlotIndex Idx);
+
+  /// leaveIntvBefore - Leave the open interval before the instruction at Idx.
+  /// Return the end of the live range.
+  SlotIndex leaveIntvBefore(SlotIndex Idx);
+
+  /// leaveIntvAtTop - Leave the interval at the top of MBB.
+  /// Add liveness from the MBB top to the copy.
+  /// Return the end of the live range.
+  SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB);
+
+  /// overlapIntv - Indicate that all instructions in range should use the open
+  /// interval, but also let the complement interval be live.
+  ///
+  /// This doubles the register pressure, but is sometimes required to deal with
+  /// register uses after the last valid split point.
+  ///
+  /// The Start index should be a return value from a leaveIntv* call, and End
+  /// should be in the same basic block. The parent interval must have the same
+  /// value across the range.
+  ///
+  void overlapIntv(SlotIndex Start, SlotIndex End);
+
+  /// closeIntv - Indicate that we are done editing the currently open
+  /// LiveInterval, and ranges can be trimmed.
+  void closeIntv();
+
+  /// finish - after all the new live ranges have been created, compute the
+  /// remaining live range, and rewrite instructions to use the new registers.
+  void finish();
+
+  /// dump - print the current interval maping to dbgs().
+  void dump() const;
+
+  // ===--- High level methods ---===
+
+  /// splitSingleBlocks - Split CurLI into a separate live interval inside each
+  /// basic block in Blocks.
+  void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
+};
+
+}
diff --git a/final/lib/CodeGen/Splitter.cpp b/final/lib/CodeGen/Splitter.cpp
new file mode 100644
index 00000000000..08aee82b8c5
--- /dev/null
+++ b/final/lib/CodeGen/Splitter.cpp
@@ -0,0 +1,827 @@
+//===-- llvm/CodeGen/Splitter.cpp -  Splitter -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loopsplitter"
+
+#include "Splitter.h"
+
+#include "SimpleRegisterCoalescing.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+char LoopSplitter::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting",
+                "Split virtual regists across loop boundaries.", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LoopSplitter, "loop-splitting",
+                "Split virtual regists across loop boundaries.", false, false)
+
+namespace llvm {
+
+  class StartSlotComparator {
+  public:
+    StartSlotComparator(LiveIntervals &lis) : lis(lis) {}
+    bool operator()(const MachineBasicBlock *mbb1,
+                    const MachineBasicBlock *mbb2) const {
+      return lis.getMBBStartIdx(mbb1) < lis.getMBBStartIdx(mbb2);
+    }
+  private:
+    LiveIntervals &lis;
+  };
+
+  class LoopSplit {
+  public:
+    LoopSplit(LoopSplitter &ls, LiveInterval &li, MachineLoop &loop)
+      : ls(ls), li(li), loop(loop), valid(true), inSplit(false), newLI(0) {
+      assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
+             "Cannot split physical registers.");
+    }
+
+    LiveInterval& getLI() const { return li; }
+
+    MachineLoop& getLoop() const { return loop; }
+
+    bool isValid() const { return valid; }
+
+    bool isWorthwhile() const { return valid && (inSplit || !outSplits.empty()); }
+
+    void invalidate() { valid = false; }
+
+    void splitIncoming() { inSplit = true; }
+
+    void splitOutgoing(MachineLoop::Edge &edge) { outSplits.insert(edge); }
+
+    void addLoopInstr(MachineInstr *i) { loopInstrs.push_back(i); }
+
+    void apply() {
+      assert(valid && "Attempt to apply invalid split.");
+      applyIncoming();
+      applyOutgoing();
+      copyRanges();
+      renameInside();
+    }
+
+  private:
+    LoopSplitter &ls;
+    LiveInterval &li;
+    MachineLoop &loop;
+    bool valid, inSplit;
+    std::set<MachineLoop::Edge> outSplits;
+    std::vector<MachineInstr*> loopInstrs;
+
+    LiveInterval *newLI;
+    std::map<VNInfo*, VNInfo*> vniMap;
+
+    LiveInterval* getNewLI() {
+      if (newLI == 0) {
+        const TargetRegisterClass *trc = ls.mri->getRegClass(li.reg);
+        unsigned vreg = ls.mri->createVirtualRegister(trc);
+        newLI = &ls.lis->getOrCreateInterval(vreg);
+      }
+      return newLI;
+    }
+
+    VNInfo* getNewVNI(VNInfo *oldVNI) {
+      VNInfo *newVNI = vniMap[oldVNI];
+
+      if (newVNI == 0) {
+        newVNI = getNewLI()->createValueCopy(oldVNI,
+                                             ls.lis->getVNInfoAllocator());
+        vniMap[oldVNI] = newVNI;
+      }
+
+      return newVNI;
+    }
+
+    void applyIncoming() {
+      if (!inSplit) {
+        return;
+      }
+
+      MachineBasicBlock *preHeader = loop.getLoopPreheader();
+      if (preHeader == 0) {
+        assert(ls.canInsertPreHeader(loop) &&
+               "Can't insert required preheader.");
+        preHeader = &ls.insertPreHeader(loop);
+      }
+
+      LiveRange *preHeaderRange =
+        ls.lis->findExitingRange(li, preHeader);
+      assert(preHeaderRange != 0 && "Range not live into preheader.");
+
+      // Insert the new copy.
+      MachineInstr *copy = BuildMI(*preHeader,
+                                   preHeader->getFirstTerminator(),
+                                   DebugLoc(),
+                                   ls.tii->get(TargetOpcode::COPY))
+        .addReg(getNewLI()->reg, RegState::Define)
+        .addReg(li.reg, RegState::Kill);
+
+      ls.lis->InsertMachineInstrInMaps(copy);
+
+      SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
+
+      VNInfo *newVal = getNewVNI(preHeaderRange->valno);
+      newVal->def = copyDefIdx;
+      newVal->setCopy(copy);
+      li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true);
+
+      getNewLI()->addRange(LiveRange(copyDefIdx,
+                                     ls.lis->getMBBEndIdx(preHeader),
+                                     newVal));
+    }
+
+    void applyOutgoing() {
+
+      for (std::set<MachineLoop::Edge>::iterator osItr = outSplits.begin(),
+                                                 osEnd = outSplits.end();
+           osItr != osEnd; ++osItr) {
+        MachineLoop::Edge edge = *osItr;
+        MachineBasicBlock *outBlock = edge.second;
+        if (ls.isCriticalEdge(edge)) {
+          assert(ls.canSplitEdge(edge) && "Unsplitable critical edge.");
+          outBlock = &ls.splitEdge(edge, loop);
+        }
+        LiveRange *outRange = ls.lis->findEnteringRange(li, outBlock);
+        assert(outRange != 0 && "No exiting range?");
+
+        MachineInstr *copy = BuildMI(*outBlock, outBlock->begin(),
+                                     DebugLoc(),
+                                     ls.tii->get(TargetOpcode::COPY))
+          .addReg(li.reg, RegState::Define)
+          .addReg(getNewLI()->reg, RegState::Kill);
+
+        ls.lis->InsertMachineInstrInMaps(copy);
+
+        SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex();
+        
+        // Blow away output range definition.
+        outRange->valno->def = ls.lis->getInvalidIndex();
+        li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx);
+
+        SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock);
+        assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 &&
+               "PHI def index points at actual instruction.");
+        VNInfo *newVal =
+          getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator());
+
+        getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock),
+                                       copyDefIdx, newVal));
+                                       
+      }
+    }
+
+    void copyRange(LiveRange &lr) {
+      std::pair<bool, LoopSplitter::SlotPair> lsr =
+        ls.getLoopSubRange(lr, loop);
+      
+      if (!lsr.first)
+        return;
+
+      LiveRange loopRange(lsr.second.first, lsr.second.second,
+                          getNewVNI(lr.valno));
+
+      li.removeRange(loopRange.start, loopRange.end, true);
+
+      getNewLI()->addRange(loopRange);
+    }
+
+    void copyRanges() {
+      for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
+                                                iEnd = loopInstrs.end();
+           iItr != iEnd; ++iItr) {
+        MachineInstr &instr = **iItr;
+        SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr);
+        if (instr.modifiesRegister(li.reg, 0)) {
+          LiveRange *defRange =
+            li.getLiveRangeContaining(instrIdx.getDefIndex());
+          if (defRange != 0) // May have caught this already.
+            copyRange(*defRange);
+        }
+        if (instr.readsRegister(li.reg, 0)) {
+          LiveRange *useRange =
+            li.getLiveRangeContaining(instrIdx.getUseIndex());
+          if (useRange != 0) { // May have caught this already.
+            copyRange(*useRange);
+          }
+        }
+      }
+
+      for (MachineLoop::block_iterator bbItr = loop.block_begin(),
+                                       bbEnd = loop.block_end();
+           bbItr != bbEnd; ++bbItr) {
+        MachineBasicBlock &loopBlock = **bbItr;
+        LiveRange *enteringRange =
+          ls.lis->findEnteringRange(li, &loopBlock);
+        if (enteringRange != 0) {
+          copyRange(*enteringRange);
+        }
+      }
+    } 
+
+    void renameInside() {
+      for (std::vector<MachineInstr*>::iterator iItr = loopInstrs.begin(),
+                                                iEnd = loopInstrs.end();
+           iItr != iEnd; ++iItr) {
+        MachineInstr &instr = **iItr;
+        for (unsigned i = 0; i < instr.getNumOperands(); ++i) {
+          MachineOperand &mop = instr.getOperand(i);
+          if (mop.isReg() && mop.getReg() == li.reg) {
+            mop.setReg(getNewLI()->reg);
+          }
+        }
+      }
+    }
+
+  };
+
+  void LoopSplitter::getAnalysisUsage(AnalysisUsage &au) const {
+    au.addRequired<MachineDominatorTree>();
+    au.addPreserved<MachineDominatorTree>();
+    au.addRequired<MachineLoopInfo>();
+    au.addPreserved<MachineLoopInfo>();
+    au.addPreserved<RegisterCoalescer>();
+    au.addPreserved<CalculateSpillWeights>();
+    au.addPreserved<LiveStacks>();
+    au.addRequired<SlotIndexes>();
+    au.addPreserved<SlotIndexes>();
+    au.addRequired<LiveIntervals>();
+    au.addPreserved<LiveIntervals>();
+    MachineFunctionPass::getAnalysisUsage(au);
+  }
+
+  bool LoopSplitter::runOnMachineFunction(MachineFunction &fn) {
+
+    mf = &fn;
+    mri = &mf->getRegInfo();
+    tii = mf->getTarget().getInstrInfo();
+    tri = mf->getTarget().getRegisterInfo();
+    sis = &getAnalysis<SlotIndexes>();
+    lis = &getAnalysis<LiveIntervals>();
+    mli = &getAnalysis<MachineLoopInfo>();
+    mdt = &getAnalysis<MachineDominatorTree>();
+
+    fqn = mf->getFunction()->getParent()->getModuleIdentifier() + "." +
+      mf->getFunction()->getName().str();
+
+    dbgs() << "Splitting " << mf->getFunction()->getName() << ".";
+
+    dumpOddTerminators();
+
+//      dbgs() << "----------------------------------------\n";
+//      lis->dump();
+//      dbgs() << "----------------------------------------\n";
+       
+//     std::deque<MachineLoop*> loops;
+//     std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
+//     dbgs() << "Loops:\n";
+//     while (!loops.empty()) {
+//       MachineLoop &loop = *loops.front();
+//       loops.pop_front();
+//       std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
+
+//       dumpLoopInfo(loop);
+//     }
+    
+    //lis->dump();
+    //exit(0);
+
+    // Setup initial intervals.
+    for (LiveIntervals::iterator liItr = lis->begin(), liEnd = lis->end();
+         liItr != liEnd; ++liItr) {
+      LiveInterval *li = liItr->second;
+
+      if (TargetRegisterInfo::isVirtualRegister(li->reg) &&
+          !lis->intervalIsInOneMBB(*li)) {
+        intervals.push_back(li);
+      }
+    }
+
+    processIntervals();
+
+    intervals.clear();
+
+//     dbgs() << "----------------------------------------\n";
+//     lis->dump();
+//     dbgs() << "----------------------------------------\n";
+
+    dumpOddTerminators();
+
+    //exit(1);
+
+    return false;
+  }
+
+  void LoopSplitter::releaseMemory() {
+    fqn.clear();
+    intervals.clear();
+    loopRangeMap.clear();
+  }
+
+  void LoopSplitter::dumpOddTerminators() {
+    for (MachineFunction::iterator bbItr = mf->begin(), bbEnd = mf->end();
+         bbItr != bbEnd; ++bbItr) {
+      MachineBasicBlock *mbb = &*bbItr;
+      MachineBasicBlock *a = 0, *b = 0;
+      SmallVector<MachineOperand, 4> c;
+      if (tii->AnalyzeBranch(*mbb, a, b, c)) {
+        dbgs() << "MBB#" << mbb->getNumber() << " has multiway terminator.\n";
+        dbgs() << "  Terminators:\n";
+        for (MachineBasicBlock::iterator iItr = mbb->begin(), iEnd = mbb->end();
+             iItr != iEnd; ++iItr) {
+          MachineInstr *instr= &*iItr;
+          dbgs() << "    " << *instr << "";
+        }
+        dbgs() << "\n  Listed successors: [ ";
+        for (MachineBasicBlock::succ_iterator sItr = mbb->succ_begin(), sEnd = mbb->succ_end();
+             sItr != sEnd; ++sItr) {
+          MachineBasicBlock *succMBB = *sItr;
+          dbgs() << succMBB->getNumber() << " ";
+        }
+        dbgs() << "]\n\n";
+      }
+    }
+  }
+
+  void LoopSplitter::dumpLoopInfo(MachineLoop &loop) {
+    MachineBasicBlock &headerBlock = *loop.getHeader();
+    typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
+    ExitEdgesList exitEdges;
+    loop.getExitEdges(exitEdges);
+
+    dbgs() << "  Header: BB#" << headerBlock.getNumber() << ", Contains: [ ";
+    for (std::vector<MachineBasicBlock*>::const_iterator
+           subBlockItr = loop.getBlocks().begin(),
+           subBlockEnd = loop.getBlocks().end();
+         subBlockItr != subBlockEnd; ++subBlockItr) {
+      MachineBasicBlock &subBlock = **subBlockItr;
+      dbgs() << "BB#" << subBlock.getNumber() << " ";
+    }
+    dbgs() << "], Exit edges: [ ";
+    for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
+                                 exitEdgeEnd = exitEdges.end();
+         exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
+      MachineLoop::Edge &exitEdge = *exitEdgeItr;
+      dbgs() << "(MBB#" << exitEdge.first->getNumber()
+             << ", MBB#" << exitEdge.second->getNumber() << ") ";
+    }
+    dbgs() << "], Sub-Loop Headers: [ ";
+    for (MachineLoop::iterator subLoopItr = loop.begin(),
+                               subLoopEnd = loop.end();
+         subLoopItr != subLoopEnd; ++subLoopItr) {
+      MachineLoop &subLoop = **subLoopItr;
+      MachineBasicBlock &subLoopBlock = *subLoop.getHeader();
+      dbgs() << "BB#" << subLoopBlock.getNumber() << " ";
+    }
+    dbgs() << "]\n";
+  }
+
+  void LoopSplitter::updateTerminators(MachineBasicBlock &mbb) {
+    mbb.updateTerminator();
+
+    for (MachineBasicBlock::iterator miItr = mbb.begin(), miEnd = mbb.end();
+         miItr != miEnd; ++miItr) {
+      if (lis->isNotInMIMap(miItr)) {
+        lis->InsertMachineInstrInMaps(miItr);
+      }
+    }
+  }
+
+  bool LoopSplitter::canInsertPreHeader(MachineLoop &loop) {
+    MachineBasicBlock *header = loop.getHeader();
+    MachineBasicBlock *a = 0, *b = 0;
+    SmallVector<MachineOperand, 4> c;
+
+    for (MachineBasicBlock::pred_iterator pbItr = header->pred_begin(),
+                                          pbEnd = header->pred_end();
+         pbItr != pbEnd; ++pbItr) {
+      MachineBasicBlock *predBlock = *pbItr;
+      if (!!tii->AnalyzeBranch(*predBlock, a, b, c)) {
+        return false;
+      }
+    }
+
+    MachineFunction::iterator headerItr(header);
+    if (headerItr == mf->begin())
+      return true;
+    MachineBasicBlock *headerLayoutPred = llvm::prior(headerItr);
+    assert(headerLayoutPred != 0 && "Header should have layout pred.");
+
+    return (!tii->AnalyzeBranch(*headerLayoutPred, a, b, c));
+  }
+
+  MachineBasicBlock& LoopSplitter::insertPreHeader(MachineLoop &loop) {
+    assert(loop.getLoopPreheader() == 0 && "Loop already has preheader.");
+
+    MachineBasicBlock &header = *loop.getHeader();
+
+    // Save the preds - we'll need to update them once we insert the preheader.
+    typedef std::set<MachineBasicBlock*> HeaderPreds;
+    HeaderPreds headerPreds;
+
+    for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
+                                          predEnd = header.pred_end();
+         predItr != predEnd; ++predItr) {
+      if (!loop.contains(*predItr))
+        headerPreds.insert(*predItr);
+    }
+
+    assert(!headerPreds.empty() && "No predecessors for header?");
+
+    //dbgs() << fqn << " MBB#" << header.getNumber() << " inserting preheader...";
+
+    MachineBasicBlock *preHeader =
+      mf->CreateMachineBasicBlock(header.getBasicBlock());
+
+    assert(preHeader != 0 && "Failed to create pre-header.");
+
+    mf->insert(header, preHeader);
+
+    for (HeaderPreds::iterator hpItr = headerPreds.begin(),
+                               hpEnd = headerPreds.end(); 
+         hpItr != hpEnd; ++hpItr) {
+      assert(*hpItr != 0 && "How'd a null predecessor get into this set?");
+      MachineBasicBlock &hp = **hpItr;
+      hp.ReplaceUsesOfBlockWith(&header, preHeader);
+    }
+    preHeader->addSuccessor(&header);
+
+    MachineBasicBlock *oldLayoutPred =
+      llvm::prior(MachineFunction::iterator(preHeader));
+    if (oldLayoutPred != 0) {
+      updateTerminators(*oldLayoutPred);
+    }
+
+    lis->InsertMBBInMaps(preHeader);
+
+    if (MachineLoop *parentLoop = loop.getParentLoop()) {
+      assert(parentLoop->getHeader() != loop.getHeader() &&
+             "Parent loop has same header?");
+      parentLoop->addBasicBlockToLoop(preHeader, mli->getBase());
+
+      // Invalidate all parent loop ranges.
+      while (parentLoop != 0) {
+        loopRangeMap.erase(parentLoop);
+        parentLoop = parentLoop->getParentLoop();
+      }
+    }
+
+    for (LiveIntervals::iterator liItr = lis->begin(),
+                                 liEnd = lis->end();
+         liItr != liEnd; ++liItr) {
+      LiveInterval &li = *liItr->second;
+
+      // Is this safe for physregs?
+      // TargetRegisterInfo::isPhysicalRegister(li.reg) ||
+      if (!lis->isLiveInToMBB(li, &header))
+        continue;
+
+      if (lis->isLiveInToMBB(li, preHeader)) {
+        assert(lis->isLiveOutOfMBB(li, preHeader) &&
+               "Range terminates in newly added preheader?");
+        continue;
+      }
+
+      bool insertRange = false;
+
+      for (MachineBasicBlock::pred_iterator predItr = preHeader->pred_begin(),
+                                            predEnd = preHeader->pred_end();
+           predItr != predEnd; ++predItr) {
+        MachineBasicBlock *predMBB = *predItr;
+        if (lis->isLiveOutOfMBB(li, predMBB)) {
+          insertRange = true;
+          break;
+        }
+      }
+
+      if (!insertRange)
+        continue;
+
+      SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader);
+      assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
+             "PHI def index points at actual instruction.");
+      VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator());
+      li.addRange(LiveRange(lis->getMBBStartIdx(preHeader),
+                            lis->getMBBEndIdx(preHeader),
+                            newVal));
+    }
+
+
+    //dbgs() << "Dumping SlotIndexes:\n";
+    //sis->dump();
+
+    //dbgs() << "done. (Added MBB#" << preHeader->getNumber() << ")\n";
+
+    return *preHeader;
+  }
+
+  bool LoopSplitter::isCriticalEdge(MachineLoop::Edge &edge) {
+    assert(edge.first->succ_size() > 1 && "Non-sensical edge.");
+    if (edge.second->pred_size() > 1)
+      return true;
+    return false;
+  }
+
+  bool LoopSplitter::canSplitEdge(MachineLoop::Edge &edge) {
+    MachineFunction::iterator outBlockItr(edge.second);
+    if (outBlockItr == mf->begin())
+      return true;
+    MachineBasicBlock *outBlockLayoutPred = llvm::prior(outBlockItr);
+    assert(outBlockLayoutPred != 0 && "Should have a layout pred if out!=begin.");
+    MachineBasicBlock *a = 0, *b = 0;
+    SmallVector<MachineOperand, 4> c;
+    return (!tii->AnalyzeBranch(*outBlockLayoutPred, a, b, c) &&
+            !tii->AnalyzeBranch(*edge.first, a, b, c));
+  }
+
+  MachineBasicBlock& LoopSplitter::splitEdge(MachineLoop::Edge &edge,
+                                             MachineLoop &loop) {
+
+    MachineBasicBlock &inBlock = *edge.first;
+    MachineBasicBlock &outBlock = *edge.second;
+
+    assert((inBlock.succ_size() > 1) && (outBlock.pred_size() > 1) &&
+           "Splitting non-critical edge?");
+
+    //dbgs() << fqn << " Splitting edge (MBB#" << inBlock.getNumber()
+    //       << " -> MBB#" << outBlock.getNumber() << ")...";
+
+    MachineBasicBlock *splitBlock =
+      mf->CreateMachineBasicBlock();
+
+    assert(splitBlock != 0 && "Failed to create split block.");
+
+    mf->insert(&outBlock, splitBlock);
+
+    inBlock.ReplaceUsesOfBlockWith(&outBlock, splitBlock);
+    splitBlock->addSuccessor(&outBlock);
+
+    MachineBasicBlock *oldLayoutPred =
+      llvm::prior(MachineFunction::iterator(splitBlock));
+    if (oldLayoutPred != 0) {
+      updateTerminators(*oldLayoutPred);
+    }
+
+    lis->InsertMBBInMaps(splitBlock);
+
+    loopRangeMap.erase(&loop);
+
+    MachineLoop *splitParentLoop = loop.getParentLoop();
+    while (splitParentLoop != 0 &&
+           !splitParentLoop->contains(&outBlock)) {
+      splitParentLoop = splitParentLoop->getParentLoop();
+    }
+
+    if (splitParentLoop != 0) {
+      assert(splitParentLoop->contains(&loop) &&
+             "Split-block parent doesn't contain original loop?");
+      splitParentLoop->addBasicBlockToLoop(splitBlock, mli->getBase());
+      
+      // Invalidate all parent loop ranges.
+      while (splitParentLoop != 0) {
+        loopRangeMap.erase(splitParentLoop);
+        splitParentLoop = splitParentLoop->getParentLoop();
+      }
+    }
+
+
+    for (LiveIntervals::iterator liItr = lis->begin(),
+                                 liEnd = lis->end();
+         liItr != liEnd; ++liItr) {
+      LiveInterval &li = *liItr->second;
+      bool intersects = lis->isLiveOutOfMBB(li, &inBlock) &&
+                       lis->isLiveInToMBB(li, &outBlock);
+      if (lis->isLiveInToMBB(li, splitBlock)) {
+        if (!intersects) {
+          li.removeRange(lis->getMBBStartIdx(splitBlock),
+                         lis->getMBBEndIdx(splitBlock), true);
+        }
+      } else if (intersects) {
+        SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock);
+        assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
+               "PHI def index points at actual instruction.");
+        VNInfo *newVal = li.getNextValue(newDefIdx, 0,
+                                         lis->getVNInfoAllocator());
+        li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock),
+                              lis->getMBBEndIdx(splitBlock),
+                              newVal));
+      }
+    }
+
+    //dbgs() << "done. (Added MBB#" << splitBlock->getNumber() << ")\n";
+
+    return *splitBlock;
+  }
+
+  LoopSplitter::LoopRanges& LoopSplitter::getLoopRanges(MachineLoop &loop) {
+    typedef std::set<MachineBasicBlock*, StartSlotComparator> LoopMBBSet;
+    LoopRangeMap::iterator lrItr = loopRangeMap.find(&loop);
+    if (lrItr == loopRangeMap.end()) {
+      LoopMBBSet loopMBBs((StartSlotComparator(*lis))); 
+      std::copy(loop.block_begin(), loop.block_end(),
+                std::inserter(loopMBBs, loopMBBs.begin()));
+
+      assert(!loopMBBs.empty() && "No blocks in loop?");
+
+      LoopRanges &loopRanges = loopRangeMap[&loop];
+      assert(loopRanges.empty() && "Loop encountered but not processed?");
+      SlotIndex oldEnd = lis->getMBBEndIdx(*loopMBBs.begin());
+      loopRanges.push_back(
+        std::make_pair(lis->getMBBStartIdx(*loopMBBs.begin()),
+                       lis->getInvalidIndex()));
+      for (LoopMBBSet::iterator curBlockItr = llvm::next(loopMBBs.begin()),
+                                curBlockEnd = loopMBBs.end();
+           curBlockItr != curBlockEnd; ++curBlockItr) {
+        SlotIndex newStart = lis->getMBBStartIdx(*curBlockItr);
+        if (newStart != oldEnd) {
+          loopRanges.back().second = oldEnd;
+          loopRanges.push_back(std::make_pair(newStart,
+                                              lis->getInvalidIndex()));
+        }
+        oldEnd = lis->getMBBEndIdx(*curBlockItr);
+      }
+
+      loopRanges.back().second =
+        lis->getMBBEndIdx(*llvm::prior(loopMBBs.end()));
+
+      return loopRanges;
+    }
+    return lrItr->second;
+  }
+
+  std::pair<bool, LoopSplitter::SlotPair> LoopSplitter::getLoopSubRange(
+                                                           const LiveRange &lr,
+                                                           MachineLoop &loop) {
+    LoopRanges &loopRanges = getLoopRanges(loop);
+    LoopRanges::iterator lrItr = loopRanges.begin(),
+                         lrEnd = loopRanges.end();
+    while (lrItr != lrEnd && lr.start >= lrItr->second) {
+      ++lrItr;
+    }
+
+    if (lrItr == lrEnd) {
+      SlotIndex invalid = lis->getInvalidIndex();
+      return std::make_pair(false, SlotPair(invalid, invalid));
+    }
+
+    SlotIndex srStart(lr.start < lrItr->first ? lrItr->first : lr.start);
+    SlotIndex srEnd(lr.end > lrItr->second ? lrItr->second : lr.end);
+
+    return std::make_pair(true, SlotPair(srStart, srEnd));      
+  }
+
+  void LoopSplitter::dumpLoopRanges(MachineLoop &loop) {
+    LoopRanges &loopRanges = getLoopRanges(loop);
+    dbgs() << "For loop MBB#" << loop.getHeader()->getNumber() << ", subranges are: [ ";
+    for (LoopRanges::iterator lrItr = loopRanges.begin(), lrEnd = loopRanges.end();
+         lrItr != lrEnd; ++lrItr) {
+      dbgs() << "[" << lrItr->first << ", " << lrItr->second << ") ";
+    }
+    dbgs() << "]\n";
+  }
+
+  void LoopSplitter::processHeader(LoopSplit &split) {
+    MachineBasicBlock &header = *split.getLoop().getHeader();
+    //dbgs() << "  Processing loop header BB#" << header.getNumber() << "\n";
+
+    if (!lis->isLiveInToMBB(split.getLI(), &header))
+      return; // Not live in, but nothing wrong so far.
+
+    MachineBasicBlock *preHeader = split.getLoop().getLoopPreheader();
+    if (!preHeader) {
+
+      if (!canInsertPreHeader(split.getLoop())) {
+        split.invalidate();
+        return; // Couldn't insert a pre-header. Bail on this interval.
+      }
+
+      for (MachineBasicBlock::pred_iterator predItr = header.pred_begin(),
+                                            predEnd = header.pred_end();
+           predItr != predEnd; ++predItr) {
+        if (lis->isLiveOutOfMBB(split.getLI(), *predItr)) {
+          split.splitIncoming();
+          break;
+        }
+      }
+    } else if (lis->isLiveOutOfMBB(split.getLI(), preHeader)) {
+      split.splitIncoming();
+    }
+  }
+
+  void LoopSplitter::processLoopExits(LoopSplit &split) {
+    typedef SmallVector<MachineLoop::Edge, 8> ExitEdgesList;
+    ExitEdgesList exitEdges;
+    split.getLoop().getExitEdges(exitEdges);
+
+    //dbgs() << "  Processing loop exits:\n";
+
+    for (ExitEdgesList::iterator exitEdgeItr = exitEdges.begin(),
+                                 exitEdgeEnd = exitEdges.end();
+         exitEdgeItr != exitEdgeEnd; ++exitEdgeItr) {
+      MachineLoop::Edge exitEdge = *exitEdgeItr;
+
+      LiveRange *outRange =
+        split.getLI().getLiveRangeContaining(lis->getMBBStartIdx(exitEdge.second));
+
+      if (outRange != 0) {
+        if (isCriticalEdge(exitEdge) && !canSplitEdge(exitEdge)) {
+          split.invalidate();
+          return;
+        }
+
+        split.splitOutgoing(exitEdge);
+      }
+    }
+  }
+
+  void LoopSplitter::processLoopUses(LoopSplit &split) {
+    std::set<MachineInstr*> processed;
+
+    for (MachineRegisterInfo::reg_iterator
+           rItr = mri->reg_begin(split.getLI().reg),
+           rEnd = mri->reg_end();
+      rItr != rEnd; ++rItr) {
+      MachineInstr &instr = *rItr;
+      if (split.getLoop().contains(&instr) && processed.count(&instr) == 0) {
+        split.addLoopInstr(&instr);
+        processed.insert(&instr);
+      }
+    }
+
+    //dbgs() << "  Rewriting reg" << li.reg << " to reg" << newLI->reg
+    //       << " in blocks [ ";
+    //dbgs() << "]\n";
+  }
+
+  bool LoopSplitter::splitOverLoop(LiveInterval &li, MachineLoop &loop) {
+    assert(TargetRegisterInfo::isVirtualRegister(li.reg) &&
+           "Attempt to split physical register.");
+
+    LoopSplit split(*this, li, loop);
+    processHeader(split);
+    if (split.isValid())
+      processLoopExits(split);
+    if (split.isValid())
+      processLoopUses(split);
+    if (split.isValid() /* && split.isWorthwhile() */) {
+      split.apply();
+      DEBUG(dbgs() << "Success.\n");
+      return true;
+    }
+    DEBUG(dbgs() << "Failed.\n");
+    return false;
+  }
+
+  void LoopSplitter::processInterval(LiveInterval &li) {
+    std::deque<MachineLoop*> loops;
+    std::copy(mli->begin(), mli->end(), std::back_inserter(loops));
+
+    while (!loops.empty()) {
+      MachineLoop &loop = *loops.front();
+      loops.pop_front();
+      DEBUG(
+        dbgs() << fqn << " reg" << li.reg << " " << li.weight << " BB#"
+               << loop.getHeader()->getNumber() << " ";
+      );
+      if (!splitOverLoop(li, loop)) {
+        // Couldn't split over outer loop, schedule sub-loops to be checked.
+	std::copy(loop.begin(), loop.end(), std::back_inserter(loops));
+      }
+    }
+  }
+
+  void LoopSplitter::processIntervals() {
+    while (!intervals.empty()) {
+      LiveInterval &li = *intervals.front();
+      intervals.pop_front();
+
+      assert(!lis->intervalIsInOneMBB(li) &&
+             "Single interval in process worklist.");
+
+      processInterval(li);      
+    }
+  }
+
+}
diff --git a/final/lib/CodeGen/Splitter.h b/final/lib/CodeGen/Splitter.h
new file mode 100644
index 00000000000..9fb1b8b3013
--- /dev/null
+++ b/final/lib/CodeGen/Splitter.h
@@ -0,0 +1,101 @@
+//===-- llvm/CodeGen/Splitter.h - Splitter -*- C++ -*----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPLITTER_H
+#define LLVM_CODEGEN_SPLITTER_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+
+#include <deque>
+#include <map>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+  class LiveInterval;
+  class LiveIntervals;
+  struct LiveRange;
+  class LoopSplit;
+  class MachineDominatorTree;
+  class MachineRegisterInfo;
+  class SlotIndexes;
+  class TargetInstrInfo;
+  class VNInfo;
+
+  class LoopSplitter : public MachineFunctionPass {
+    friend class LoopSplit;
+  public:
+    static char ID;
+
+    LoopSplitter() : MachineFunctionPass(ID) {
+      initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+    virtual bool runOnMachineFunction(MachineFunction &fn);
+
+    virtual void releaseMemory();
+
+
+  private:
+
+    MachineFunction *mf;
+    LiveIntervals *lis;
+    MachineLoopInfo *mli;
+    MachineRegisterInfo *mri;
+    MachineDominatorTree *mdt;
+    SlotIndexes *sis;
+    const TargetInstrInfo *tii;
+    const TargetRegisterInfo *tri;
+
+    std::string fqn;
+    std::deque<LiveInterval*> intervals;
+
+    typedef std::pair<SlotIndex, SlotIndex> SlotPair;
+    typedef std::vector<SlotPair> LoopRanges;
+    typedef std::map<MachineLoop*, LoopRanges> LoopRangeMap;
+    LoopRangeMap loopRangeMap;
+
+    void dumpLoopInfo(MachineLoop &loop);
+
+    void dumpOddTerminators();
+
+    void updateTerminators(MachineBasicBlock &mbb);
+
+    bool canInsertPreHeader(MachineLoop &loop);
+    MachineBasicBlock& insertPreHeader(MachineLoop &loop);
+
+    bool isCriticalEdge(MachineLoop::Edge &edge);
+    bool canSplitEdge(MachineLoop::Edge &edge);
+    MachineBasicBlock& splitEdge(MachineLoop::Edge &edge, MachineLoop &loop);
+
+    LoopRanges& getLoopRanges(MachineLoop &loop);
+    std::pair<bool, SlotPair> getLoopSubRange(const LiveRange &lr,
+                                              MachineLoop &loop);
+
+    void dumpLoopRanges(MachineLoop &loop);
+
+    void processHeader(LoopSplit &split);
+    void processLoopExits(LoopSplit &split);
+    void processLoopUses(LoopSplit &split);
+
+    bool splitOverLoop(LiveInterval &li, MachineLoop &loop);
+
+    void processInterval(LiveInterval &li);
+
+    void processIntervals();
+  };
+
+}
+
+#endif
diff --git a/final/lib/CodeGen/StackProtector.cpp b/final/lib/CodeGen/StackProtector.cpp
new file mode 100644
index 00000000000..fcaee4208ba
--- /dev/null
+++ b/final/lib/CodeGen/StackProtector.cpp
@@ -0,0 +1,262 @@
+//===-- StackProtector.cpp - Stack Protector Insertion --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass inserts stack protectors into functions which need them. A variable
+// with a random value in it is stored onto the stack before the local variables
+// are allocated. Upon exiting the block, the stored value is checked. If it's
+// changed, then there was some sort of violation and the program aborts.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stack-protector"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+using namespace llvm;
+
+// SSPBufferSize - The lower bound for a buffer to be considered for stack
+// smashing protection.
+static cl::opt<unsigned>
+SSPBufferSize("stack-protector-buffer-size", cl::init(8),
+              cl::desc("Lower bound for a buffer to be considered for "
+                       "stack protection"));
+
+namespace {
+  class StackProtector : public FunctionPass {
+    /// TLI - Keep a pointer of a TargetLowering to consult for determining
+    /// target type sizes.
+    const TargetLowering *TLI;
+
+    Function *F;
+    Module *M;
+
+    DominatorTree* DT;
+
+    /// InsertStackProtectors - Insert code into the prologue and epilogue of
+    /// the function.
+    ///
+    ///  - The prologue code loads and stores the stack guard onto the stack.
+    ///  - The epilogue checks the value stored in the prologue against the
+    ///    original value. It calls __stack_chk_fail if they differ.
+    bool InsertStackProtectors();
+
+    /// CreateFailBB - Create a basic block to jump to when the stack protector
+    /// check fails.
+    BasicBlock *CreateFailBB();
+
+    /// RequiresStackProtector - Check whether or not this function needs a
+    /// stack protector based upon the stack protector level.
+    bool RequiresStackProtector() const;
+  public:
+    static char ID;             // Pass identification, replacement for typeid.
+    StackProtector() : FunctionPass(ID), TLI(0) {
+      initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+    }
+    StackProtector(const TargetLowering *tli)
+      : FunctionPass(ID), TLI(tli) {
+        initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
+    }
+
+    virtual bool runOnFunction(Function &Fn);
+  };
+} // end anonymous namespace
+
+char StackProtector::ID = 0;
+INITIALIZE_PASS(StackProtector, "stack-protector",
+                "Insert stack protectors", false, false)
+
+FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
+  return new StackProtector(tli);
+}
+
+bool StackProtector::runOnFunction(Function &Fn) {
+  F = &Fn;
+  M = F->getParent();
+  DT = getAnalysisIfAvailable<DominatorTree>();
+
+  if (!RequiresStackProtector()) return false;
+  
+  return InsertStackProtectors();
+}
+
+/// RequiresStackProtector - Check whether or not this function needs a stack
+/// protector based upon the stack protector level. The heuristic we use is to
+/// add a guard variable to functions that call alloca, and functions with
+/// buffers larger than SSPBufferSize bytes.
+bool StackProtector::RequiresStackProtector() const {
+  if (F->hasFnAttr(Attribute::StackProtectReq))
+    return true;
+
+  if (!F->hasFnAttr(Attribute::StackProtect))
+    return false;
+
+  const TargetData *TD = TLI->getTargetData();
+
+  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) {
+    BasicBlock *BB = I;
+
+    for (BasicBlock::iterator
+           II = BB->begin(), IE = BB->end(); II != IE; ++II)
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+        if (AI->isArrayAllocation())
+          // This is a call to alloca with a variable size. Emit stack
+          // protectors.
+          return true;
+
+        if (const ArrayType *AT = dyn_cast<ArrayType>(AI->getAllocatedType())) {
+          // We apparently only care about character arrays.
+          if (!AT->getElementType()->isIntegerTy(8))
+            continue;
+
+          // If an array has more than SSPBufferSize bytes of allocated space,
+          // then we emit stack protectors.
+          if (SSPBufferSize <= TD->getTypeAllocSize(AT))
+            return true;
+        }
+      }
+  }
+
+  return false;
+}
+
+/// InsertStackProtectors - Insert code into the prologue and epilogue of the
+/// function.
+///
+///  - The prologue code loads and stores the stack guard onto the stack.
+///  - The epilogue checks the value stored in the prologue against the original
+///    value. It calls __stack_chk_fail if they differ.
+bool StackProtector::InsertStackProtectors() {
+  BasicBlock *FailBB = 0;       // The basic block to jump to if check fails.
+  BasicBlock *FailBBDom = 0;    // FailBB's dominator.
+  AllocaInst *AI = 0;           // Place on stack that stores the stack guard.
+  Value *StackGuardVar = 0;  // The stack guard variable.
+
+  for (Function::iterator I = F->begin(), E = F->end(); I != E; ) {
+    BasicBlock *BB = I++;
+
+    ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
+    if (!RI) continue;
+
+    if (!FailBB) {
+      // Insert code into the entry block that stores the __stack_chk_guard
+      // variable onto the stack:
+      //
+      //   entry:
+      //     StackGuardSlot = alloca i8*
+      //     StackGuard = load __stack_chk_guard
+      //     call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
+      // 
+      const PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
+      unsigned AddressSpace, Offset;
+      if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
+        Constant *OffsetVal =
+          ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
+        
+        StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal,
+                                      PointerType::get(PtrTy, AddressSpace));
+      } else {
+        StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); 
+      }
+
+      BasicBlock &Entry = F->getEntryBlock();
+      Instruction *InsPt = &Entry.front();
+
+      AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt);
+      LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt);
+
+      Value *Args[] = { LI, AI };
+      CallInst::
+        Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
+               &Args[0], array_endof(Args), "", InsPt);
+
+      // Create the basic block to jump to when the guard check fails.
+      FailBB = CreateFailBB();
+      if (DT)
+        FailBBDom = DT->isReachableFromEntry(BB) ? BB : 0;
+    }
+
+    // For each block with a return instruction, convert this:
+    //
+    //   return:
+    //     ...
+    //     ret ...
+    //
+    // into this:
+    //
+    //   return:
+    //     ...
+    //     %1 = load __stack_chk_guard
+    //     %2 = load StackGuardSlot
+    //     %3 = cmp i1 %1, %2
+    //     br i1 %3, label %SP_return, label %CallStackCheckFailBlk
+    //
+    //   SP_return:
+    //     ret ...
+    //
+    //   CallStackCheckFailBlk:
+    //     call void @__stack_chk_fail()
+    //     unreachable
+
+    // Split the basic block before the return instruction.
+    BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+    if (DT) {
+      DT->addNewBlock(NewBB, DT->isReachableFromEntry(BB) ? BB : 0);
+      FailBBDom = DT->findNearestCommonDominator(FailBBDom, BB);
+    }
+
+    // Remove default branch instruction to the new BB.
+    BB->getTerminator()->eraseFromParent();
+
+    // Move the newly created basic block to the point right after the old basic
+    // block so that it's in the "fall through" position.
+    NewBB->moveAfter(BB);
+
+    // Generate the stack protector instructions in the old basic block.
+    LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB);
+    LoadInst *LI2 = new LoadInst(AI, "", true, BB);
+    ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, "");
+    BranchInst::Create(NewBB, FailBB, Cmp, BB);
+  }
+
+  // Return if we didn't modify any basic blocks. I.e., there are no return
+  // statements in the function.
+  if (!FailBB) return false;
+
+  if (DT)
+    DT->addNewBlock(FailBB, FailBBDom);
+
+  return true;
+}
+
+/// CreateFailBB - Create a basic block to jump to when the stack protector
+/// check fails.
+BasicBlock *StackProtector::CreateFailBB() {
+  BasicBlock *FailBB = BasicBlock::Create(F->getContext(),
+                                          "CallStackCheckFailBlk", F);
+  Constant *StackChkFail =
+    M->getOrInsertFunction("__stack_chk_fail",
+                           Type::getVoidTy(F->getContext()), NULL);
+  CallInst::Create(StackChkFail, "", FailBB);
+  new UnreachableInst(F->getContext(), FailBB);
+  return FailBB;
+}
diff --git a/final/lib/CodeGen/StackSlotColoring.cpp b/final/lib/CodeGen/StackSlotColoring.cpp
new file mode 100644
index 00000000000..01f5b5627f4
--- /dev/null
+++ b/final/lib/CodeGen/StackSlotColoring.cpp
@@ -0,0 +1,768 @@
+//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stack slot coloring pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "stackcoloring"
+#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include <vector>
+using namespace llvm;
+
+static cl::opt<bool>
+DisableSharing("no-stack-slot-sharing",
+             cl::init(false), cl::Hidden,
+             cl::desc("Suppress slot sharing during stack coloring"));
+
+static cl::opt<bool>
+ColorWithRegsOpt("color-ss-with-regs",
+                 cl::init(false), cl::Hidden,
+                 cl::desc("Color stack slots with free registers"));
+
+
+static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden);
+
+STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring");
+STATISTIC(NumRegRepl,    "Number of stack slot refs replaced with reg refs");
+STATISTIC(NumLoadElim,   "Number of loads eliminated");
+STATISTIC(NumStoreElim,  "Number of stores eliminated");
+STATISTIC(NumDead,       "Number of trivially dead stack accesses eliminated");
+
+namespace {
+  class StackSlotColoring : public MachineFunctionPass {
+    bool ColorWithRegs;
+    LiveStacks* LS;
+    VirtRegMap* VRM;
+    MachineFrameInfo *MFI;
+    MachineRegisterInfo *MRI;
+    const TargetInstrInfo  *TII;
+    const TargetRegisterInfo *TRI;
+    const MachineLoopInfo *loopInfo;
+
+    // SSIntervals - Spill slot intervals.
+    std::vector<LiveInterval*> SSIntervals;
+
+    // SSRefs - Keep a list of frame index references for each spill slot.
+    SmallVector<SmallVector<MachineInstr*, 8>, 16> SSRefs;
+
+    // OrigAlignments - Alignments of stack objects before coloring.
+    SmallVector<unsigned, 16> OrigAlignments;
+
+    // OrigSizes - Sizess of stack objects before coloring.
+    SmallVector<unsigned, 16> OrigSizes;
+
+    // AllColors - If index is set, it's a spill slot, i.e. color.
+    // FIXME: This assumes PEI locate spill slot with smaller indices
+    // closest to stack pointer / frame pointer. Therefore, smaller
+    // index == better color.
+    BitVector AllColors;
+
+    // NextColor - Next "color" that's not yet used.
+    int NextColor;
+
+    // UsedColors - "Colors" that have been assigned.
+    BitVector UsedColors;
+
+    // Assignments - Color to intervals mapping.
+    SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments;
+
+  public:
+    static char ID; // Pass identification
+    StackSlotColoring() :
+      MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {
+        initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+      }
+    StackSlotColoring(bool RegColor) :
+      MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {
+        initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+      }
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<SlotIndexes>();
+      AU.addPreserved<SlotIndexes>();
+      AU.addRequired<LiveStacks>();
+      AU.addRequired<VirtRegMap>();
+      AU.addPreserved<VirtRegMap>();      
+      AU.addRequired<MachineLoopInfo>();
+      AU.addPreserved<MachineLoopInfo>();
+      AU.addPreservedID(MachineDominatorsID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char* getPassName() const {
+      return "Stack Slot Coloring";
+    }
+
+  private:
+    void InitializeSlots();
+    void ScanForSpillSlotRefs(MachineFunction &MF);
+    bool OverlapWithAssignments(LiveInterval *li, int Color) const;
+    int ColorSlot(LiveInterval *li);
+    bool ColorSlots(MachineFunction &MF);
+    bool ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
+                                SmallVector<SmallVector<int, 4>, 16> &RevMap,
+                                BitVector &SlotIsReg);
+    void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI,
+                            MachineFunction &MF);
+    bool PropagateBackward(MachineBasicBlock::iterator MII,
+                           MachineBasicBlock *MBB,
+                           unsigned OldReg, unsigned NewReg);
+    bool PropagateForward(MachineBasicBlock::iterator MII,
+                          MachineBasicBlock *MBB,
+                          unsigned OldReg, unsigned NewReg);
+    void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
+                                    unsigned Reg, const TargetRegisterClass *RC,
+                                    SmallSet<unsigned, 4> &Defs,
+                                    MachineFunction &MF);
+    bool AllMemRefsCanBeUnfolded(int SS);
+    bool RemoveDeadStores(MachineBasicBlock* MBB);
+  };
+} // end anonymous namespace
+
+char StackSlotColoring::ID = 0;
+
+INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
+                "Stack Slot Coloring", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
+                "Stack Slot Coloring", false, false)
+
+FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
+  return new StackSlotColoring(RegColor);
+}
+
+namespace {
+  // IntervalSorter - Comparison predicate that sort live intervals by
+  // their weight.
+  struct IntervalSorter {
+    bool operator()(LiveInterval* LHS, LiveInterval* RHS) const {
+      return LHS->weight > RHS->weight;
+    }
+  };
+}
+
+/// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot
+/// references and update spill slot weights.
+void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) {
+  SSRefs.resize(MFI->getObjectIndexEnd());
+
+  // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands.
+  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock *MBB = &*MBBI;
+    unsigned loopDepth = loopInfo->getLoopDepth(MBB);
+    for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end();
+         MII != EE; ++MII) {
+      MachineInstr *MI = &*MII;
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isFI())
+          continue;
+        int FI = MO.getIndex();
+        if (FI < 0)
+          continue;
+        if (!LS->hasInterval(FI))
+          continue;
+        LiveInterval &li = LS->getInterval(FI);
+        if (!MI->isDebugValue())
+          li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth);
+        SSRefs[FI].push_back(MI);
+      }
+    }
+  }
+}
+
+/// InitializeSlots - Process all spill stack slot liveintervals and add them
+/// to a sorted (by weight) list.
+void StackSlotColoring::InitializeSlots() {
+  int LastFI = MFI->getObjectIndexEnd();
+  OrigAlignments.resize(LastFI);
+  OrigSizes.resize(LastFI);
+  AllColors.resize(LastFI);
+  UsedColors.resize(LastFI);
+  Assignments.resize(LastFI);
+
+  // Gather all spill slots into a list.
+  DEBUG(dbgs() << "Spill slot intervals:\n");
+  for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
+    LiveInterval &li = i->second;
+    DEBUG(li.dump());
+    int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
+    if (MFI->isDeadObjectIndex(FI))
+      continue;
+    SSIntervals.push_back(&li);
+    OrigAlignments[FI] = MFI->getObjectAlignment(FI);
+    OrigSizes[FI]      = MFI->getObjectSize(FI);
+    AllColors.set(FI);
+  }
+  DEBUG(dbgs() << '\n');
+
+  // Sort them by weight.
+  std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+  // Get first "color".
+  NextColor = AllColors.find_first();
+}
+
+/// OverlapWithAssignments - Return true if LiveInterval overlaps with any
+/// LiveIntervals that have already been assigned to the specified color.
+bool
+StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
+  const SmallVector<LiveInterval*,4> &OtherLIs = Assignments[Color];
+  for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) {
+    LiveInterval *OtherLI = OtherLIs[i];
+    if (OtherLI->overlaps(*li))
+      return true;
+  }
+  return false;
+}
+
+/// ColorSlotsWithFreeRegs - If there are any free registers available, try
+/// replacing spill slots references with registers instead.
+bool
+StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
+                                   SmallVector<SmallVector<int, 4>, 16> &RevMap,
+                                   BitVector &SlotIsReg) {
+  if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters())
+    return false;
+
+  bool Changed = false;
+  DEBUG(dbgs() << "Assigning unused registers to spill slots:\n");
+  for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+    LiveInterval *li = SSIntervals[i];
+    int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+    if (!UsedColors[SS] || li->weight < 20)
+      // If the weight is < 20, i.e. two references in a loop with depth 1,
+      // don't bother with it.
+      continue;
+
+    // These slots allow to share the same registers.
+    bool AllColored = true;
+    SmallVector<unsigned, 4> ColoredRegs;
+    for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) {
+      int RSS = RevMap[SS][j];
+      const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS);
+      // If it's not colored to another stack slot, try coloring it
+      // to a "free" register.
+      if (!RC) {
+        AllColored = false;
+        continue;
+      }
+      unsigned Reg = VRM->getFirstUnusedRegister(RC);
+      if (!Reg) {
+        AllColored = false;
+        continue;
+      }
+      if (!AllMemRefsCanBeUnfolded(RSS)) {
+        AllColored = false;
+        continue;
+      } else {
+        DEBUG(dbgs() << "Assigning fi#" << RSS << " to "
+                     << TRI->getName(Reg) << '\n');
+        ColoredRegs.push_back(Reg);
+        SlotMapping[RSS] = Reg;
+        SlotIsReg.set(RSS);
+        Changed = true;
+      }
+    }
+
+    // Register and its sub-registers are no longer free.
+    while (!ColoredRegs.empty()) {
+      unsigned Reg = ColoredRegs.back();
+      ColoredRegs.pop_back();
+      VRM->setRegisterUsed(Reg);
+      // If reg is a callee-saved register, it will have to be spilled in
+      // the prologue.
+      MRI->setPhysRegUsed(Reg);
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+        VRM->setRegisterUsed(*AS);
+        MRI->setPhysRegUsed(*AS);
+      }
+    }
+    // This spill slot is dead after the rewrites
+    if (AllColored) {
+      MFI->RemoveStackObject(SS);
+      ++NumEliminated;
+    }
+  }
+  DEBUG(dbgs() << '\n');
+
+  return Changed;
+}
+
+/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
+///
+int StackSlotColoring::ColorSlot(LiveInterval *li) {
+  int Color = -1;
+  bool Share = false;
+  if (!DisableSharing) {
+    // Check if it's possible to reuse any of the used colors.
+    Color = UsedColors.find_first();
+    while (Color != -1) {
+      if (!OverlapWithAssignments(li, Color)) {
+        Share = true;
+        ++NumEliminated;
+        break;
+      }
+      Color = UsedColors.find_next(Color);
+    }
+  }
+
+  // Assign it to the first available color (assumed to be the best) if it's
+  // not possible to share a used color with other objects.
+  if (!Share) {
+    assert(NextColor != -1 && "No more spill slots?");
+    Color = NextColor;
+    UsedColors.set(Color);
+    NextColor = AllColors.find_next(NextColor);
+  }
+
+  // Record the assignment.
+  Assignments[Color].push_back(li);
+  int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
+  DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
+
+  // Change size and alignment of the allocated slot. If there are multiple
+  // objects sharing the same slot, then make sure the size and alignment
+  // are large enough for all.
+  unsigned Align = OrigAlignments[FI];
+  if (!Share || Align > MFI->getObjectAlignment(Color))
+    MFI->setObjectAlignment(Color, Align);
+  int64_t Size = OrigSizes[FI];
+  if (!Share || Size > MFI->getObjectSize(Color))
+    MFI->setObjectSize(Color, Size);
+  return Color;
+}
+
+/// Colorslots - Color all spill stack slots and rewrite all frameindex machine
+/// operands in the function.
+bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
+  unsigned NumObjs = MFI->getObjectIndexEnd();
+  SmallVector<int, 16> SlotMapping(NumObjs, -1);
+  SmallVector<float, 16> SlotWeights(NumObjs, 0.0);
+  SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs);
+  BitVector SlotIsReg(NumObjs);
+  BitVector UsedColors(NumObjs);
+
+  DEBUG(dbgs() << "Color spill slot intervals:\n");
+  bool Changed = false;
+  for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+    LiveInterval *li = SSIntervals[i];
+    int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+    int NewSS = ColorSlot(li);
+    assert(NewSS >= 0 && "Stack coloring failed?");
+    SlotMapping[SS] = NewSS;
+    RevMap[NewSS].push_back(SS);
+    SlotWeights[NewSS] += li->weight;
+    UsedColors.set(NewSS);
+    Changed |= (SS != NewSS);
+  }
+
+  DEBUG(dbgs() << "\nSpill slots after coloring:\n");
+  for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
+    LiveInterval *li = SSIntervals[i];
+    int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+    li->weight = SlotWeights[SS];
+  }
+  // Sort them by new weight.
+  std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+
+#ifndef NDEBUG
+  for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
+    DEBUG(SSIntervals[i]->dump());
+  DEBUG(dbgs() << '\n');
+#endif
+
+  // Can we "color" a stack slot with a unused register?
+  Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg);
+
+  if (!Changed)
+    return false;
+
+  // Rewrite all MO_FrameIndex operands.
+  SmallVector<SmallSet<unsigned, 4>, 4> NewDefs(MF.getNumBlockIDs());
+  for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) {
+    bool isReg = SlotIsReg[SS];
+    int NewFI = SlotMapping[SS];
+    if (NewFI == -1 || (NewFI == (int)SS && !isReg))
+      continue;
+
+    const TargetRegisterClass *RC = LS->getIntervalRegClass(SS);
+    SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
+    for (unsigned i = 0, e = RefMIs.size(); i != e; ++i)
+      if (!isReg)
+        RewriteInstruction(RefMIs[i], SS, NewFI, MF);
+      else {
+        // Rewrite to use a register instead.
+        unsigned MBBId = RefMIs[i]->getParent()->getNumber();
+        SmallSet<unsigned, 4> &Defs = NewDefs[MBBId];
+        UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF);
+      }
+  }
+
+  // Delete unused stack slots.
+  while (NextColor != -1) {
+    DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n");
+    MFI->RemoveStackObject(NextColor);
+    NextColor = AllColors.find_next(NextColor);
+  }
+
+  return true;
+}
+
+/// AllMemRefsCanBeUnfolded - Return true if all references of the specified
+/// spill slot index can be unfolded.
+bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) {
+  SmallVector<MachineInstr*, 8> &RefMIs = SSRefs[SS];
+  for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) {
+    MachineInstr *MI = RefMIs[i];
+    if (TII->isLoadFromStackSlot(MI, SS) ||
+        TII->isStoreToStackSlot(MI, SS))
+      // Restore and spill will become copies.
+      return true;
+    if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false))
+      return false;
+    for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
+      MachineOperand &MO = MI->getOperand(j);
+      if (MO.isFI() && MO.getIndex() != SS)
+        // If it uses another frameindex, we can, currently* unfold it.
+        return false;
+    }
+  }
+  return true;
+}
+
+/// RewriteInstruction - Rewrite specified instruction by replacing references
+/// to old frame index with new one.
+void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI,
+                                           int NewFI, MachineFunction &MF) {
+  // Update the operands.
+  for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isFI())
+      continue;
+    int FI = MO.getIndex();
+    if (FI != OldFI)
+      continue;
+    MO.setIndex(NewFI);
+  }
+
+  // Update the memory references. This changes the MachineMemOperands
+  // directly. They may be in use by multiple instructions, however all
+  // instructions using OldFI are being rewritten to use NewFI.
+  const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI);
+  const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI);
+  for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+       E = MI->memoperands_end(); I != E; ++I)
+    if ((*I)->getValue() == OldSV)
+      (*I)->setValue(NewSV);
+}
+
+/// PropagateBackward - Traverse backward and look for the definition of
+/// OldReg. If it can successfully update all of the references with NewReg,
+/// do so and return true.
+bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
+                                          MachineBasicBlock *MBB,
+                                          unsigned OldReg, unsigned NewReg) {
+  if (MII == MBB->begin())
+    return false;
+
+  SmallVector<MachineOperand*, 4> Uses;
+  SmallVector<MachineOperand*, 4> Refs;
+  while (--MII != MBB->begin()) {
+    bool FoundDef = false;  // Not counting 2address def.
+
+    Uses.clear();
+    const TargetInstrDesc &TID = MII->getDesc();
+    for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MII->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0)
+        continue;
+      if (Reg == OldReg) {
+        if (MO.isImplicit())
+          return false;
+
+        // Abort the use is actually a sub-register def. We don't have enough
+        // information to figure out if it is really legal.
+        if (MO.getSubReg() || MII->isSubregToReg())
+          return false;
+
+        const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
+        if (RC && !RC->contains(NewReg))
+          return false;
+
+        if (MO.isUse()) {
+          Uses.push_back(&MO);
+        } else {
+          Refs.push_back(&MO);
+          if (!MII->isRegTiedToUseOperand(i))
+            FoundDef = true;
+        }
+      } else if (TRI->regsOverlap(Reg, NewReg)) {
+        return false;
+      } else if (TRI->regsOverlap(Reg, OldReg)) {
+        if (!MO.isUse() || !MO.isKill())
+          return false;
+      }
+    }
+
+    if (FoundDef) {
+      // Found non-two-address def. Stop here.
+      for (unsigned i = 0, e = Refs.size(); i != e; ++i)
+        Refs[i]->setReg(NewReg);
+      return true;
+    }
+
+    // Two-address uses must be updated as well.
+    for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+      Refs.push_back(Uses[i]);
+  }
+  return false;
+}
+
+/// PropagateForward - Traverse forward and look for the kill of OldReg. If
+/// it can successfully update all of the uses with NewReg, do so and
+/// return true.
+bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
+                                         MachineBasicBlock *MBB,
+                                         unsigned OldReg, unsigned NewReg) {
+  if (MII == MBB->end())
+    return false;
+
+  SmallVector<MachineOperand*, 4> Uses;
+  while (++MII != MBB->end()) {
+    bool FoundKill = false;
+    const TargetInstrDesc &TID = MII->getDesc();
+    for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MII->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == 0)
+        continue;
+      if (Reg == OldReg) {
+        if (MO.isDef() || MO.isImplicit())
+          return false;
+
+        // Abort the use is actually a sub-register use. We don't have enough
+        // information to figure out if it is really legal.
+        if (MO.getSubReg())
+          return false;
+
+        const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
+        if (RC && !RC->contains(NewReg))
+          return false;
+        if (MO.isKill())
+          FoundKill = true;
+
+        Uses.push_back(&MO);
+      } else if (TRI->regsOverlap(Reg, NewReg) ||
+                 TRI->regsOverlap(Reg, OldReg))
+        return false;
+    }
+    if (FoundKill) {
+      for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+        Uses[i]->setReg(NewReg);
+      return true;
+    }
+  }
+  return false;
+}
+
+/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding
+/// folded memory references and replacing those references with register
+/// references instead.
+void
+StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
+                                               unsigned Reg,
+                                               const TargetRegisterClass *RC,
+                                               SmallSet<unsigned, 4> &Defs,
+                                               MachineFunction &MF) {
+  MachineBasicBlock *MBB = MI->getParent();
+  if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) {
+    if (PropagateForward(MI, MBB, DstReg, Reg)) {
+      DEBUG(dbgs() << "Eliminated load: ");
+      DEBUG(MI->dump());
+      ++NumLoadElim;
+    } else {
+      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
+              DstReg).addReg(Reg);
+      ++NumRegRepl;
+    }
+
+    if (!Defs.count(Reg)) {
+      // If this is the first use of Reg in this MBB and it wasn't previously
+      // defined in MBB, add it to livein.
+      MBB->addLiveIn(Reg);
+      Defs.insert(Reg);
+    }
+  } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) {
+    if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) {
+      DEBUG(dbgs() << "Eliminated store: ");
+      DEBUG(MI->dump());
+      ++NumStoreElim;
+    } else {
+      BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+        .addReg(SrcReg);
+      ++NumRegRepl;
+    }
+
+    // Remember reg has been defined in MBB.
+    Defs.insert(Reg);
+  } else {
+    SmallVector<MachineInstr*, 4> NewMIs;
+    bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs);
+    (void)Success; // Silence compiler warning.
+    assert(Success && "Failed to unfold!");
+    MachineInstr *NewMI = NewMIs[0];
+    MBB->insert(MI, NewMI);
+    ++NumRegRepl;
+
+    if (NewMI->readsRegister(Reg)) {
+      if (!Defs.count(Reg))
+        // If this is the first use of Reg in this MBB and it wasn't previously
+        // defined in MBB, add it to livein.
+        MBB->addLiveIn(Reg);
+      Defs.insert(Reg);
+    }
+  }
+  MBB->erase(MI);
+}
+
+/// RemoveDeadStores - Scan through a basic block and look for loads followed
+/// by stores.  If they're both using the same stack slot, then the store is
+/// definitely dead.  This could obviously be much more aggressive (consider
+/// pairs with instructions between them), but such extensions might have a
+/// considerable compile time impact.
+bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
+  // FIXME: This could be much more aggressive, but we need to investigate
+  // the compile time impact of doing so.
+  bool changed = false;
+
+  SmallVector<MachineInstr*, 4> toErase;
+
+  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+       I != E; ++I) {
+    if (DCELimit != -1 && (int)NumDead >= DCELimit)
+      break;
+    
+    MachineBasicBlock::iterator NextMI = llvm::next(I);
+    if (NextMI == MBB->end()) continue;
+    
+    int FirstSS, SecondSS;
+    unsigned LoadReg = 0;
+    unsigned StoreReg = 0;
+    if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue;
+    if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue;
+    if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue;
+    
+    ++NumDead;
+    changed = true;
+    
+    if (NextMI->findRegisterUseOperandIdx(LoadReg, true, 0) != -1) {
+      ++NumDead;
+      toErase.push_back(I);
+    }
+    
+    toErase.push_back(NextMI);
+    ++I;
+  }
+  
+  for (SmallVector<MachineInstr*, 4>::iterator I = toErase.begin(),
+       E = toErase.end(); I != E; ++I)
+    (*I)->eraseFromParent();
+  
+  return changed;
+}
+
+
+bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG({
+      dbgs() << "********** Stack Slot Coloring **********\n"
+             << "********** Function: " 
+             << MF.getFunction()->getName() << '\n';
+    });
+
+  MFI = MF.getFrameInfo();
+  MRI = &MF.getRegInfo(); 
+  TII = MF.getTarget().getInstrInfo();
+  TRI = MF.getTarget().getRegisterInfo();
+  LS = &getAnalysis<LiveStacks>();
+  VRM = &getAnalysis<VirtRegMap>();
+  loopInfo = &getAnalysis<MachineLoopInfo>();
+
+  bool Changed = false;
+
+  unsigned NumSlots = LS->getNumIntervals();
+  if (NumSlots < 2) {
+    if (NumSlots == 0 || !VRM->HasUnusedRegisters())
+      // Nothing to do!
+      return false;
+  }
+
+  // If there are calls to setjmp or sigsetjmp, don't perform stack slot
+  // coloring. The stack could be modified before the longjmp is executed,
+  // resulting in the wrong value being used afterwards. (See
+  // <rdar://problem/8007500>.)
+  if (MF.callsSetJmp())
+    return false;
+
+  // Gather spill slot references
+  ScanForSpillSlotRefs(MF);
+  InitializeSlots();
+  Changed = ColorSlots(MF);
+
+  NextColor = -1;
+  SSIntervals.clear();
+  for (unsigned i = 0, e = SSRefs.size(); i != e; ++i)
+    SSRefs[i].clear();
+  SSRefs.clear();
+  OrigAlignments.clear();
+  OrigSizes.clear();
+  AllColors.clear();
+  UsedColors.clear();
+  for (unsigned i = 0, e = Assignments.size(); i != e; ++i)
+    Assignments[i].clear();
+  Assignments.clear();
+
+  if (Changed) {
+    for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+      Changed |= RemoveDeadStores(I);
+  }
+
+  return Changed;
+}
diff --git a/final/lib/CodeGen/StrongPHIElimination.cpp b/final/lib/CodeGen/StrongPHIElimination.cpp
new file mode 100644
index 00000000000..ec7829ec39f
--- /dev/null
+++ b/final/lib/CodeGen/StrongPHIElimination.cpp
@@ -0,0 +1,829 @@
+//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass eliminates PHI instructions by aggressively coalescing the copies
+// that would be inserted by a naive algorithm and only inserting the copies
+// that are necessary. The coalescing technique initially assumes that all
+// registers appearing in a PHI instruction do not interfere. It then eliminates
+// proven interferences, using dominators to only perform a linear number of
+// interference tests instead of the quadratic number of interference tests
+// that this would naively require. This is a technique derived from:
+// 
+//    Budimlic, et al. Fast copy coalescing and live-range identification.
+//    In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
+//    Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
+//    PLDI '02. ACM, New York, NY, 25-32.
+//
+// The original implementation constructs a data structure they call a dominance
+// forest for this purpose. The dominance forest was shown to be unnecessary,
+// as it is possible to emulate the creation and traversal of a dominance forest
+// by directly using the dominator tree, rather than actually constructing the
+// dominance forest.  This technique is explained in:
+//
+//   Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code
+//     Quality and Efficiency,
+//   In Proceedings of the 7th annual IEEE/ACM International Symposium on Code
+//   Generation and Optimization (Seattle, Washington, March 22 - 25, 2009).
+//   CGO '09. IEEE, Washington, DC, 114-125.
+//
+// Careful implementation allows for all of the dominator forest interference
+// checks to be performed at once in a single depth-first traversal of the
+// dominator tree, which is what is implemented here.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "strongphielim"
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+  class StrongPHIElimination : public MachineFunctionPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    StrongPHIElimination() : MachineFunctionPass(ID) {
+      initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage&) const;
+    bool runOnMachineFunction(MachineFunction&);
+
+  private:
+    /// This struct represents a single node in the union-find data structure
+    /// representing the variable congruence classes. There is one difference
+    /// from a normal union-find data structure. We steal two bits from the parent
+    /// pointer . One of these bits is used to represent whether the register
+    /// itself has been isolated, and the other is used to represent whether the
+    /// PHI with that register as its destination has been isolated.
+    ///
+    /// Note that this leads to the strange situation where the leader of a
+    /// congruence class may no longer logically be a member, due to being
+    /// isolated.
+    struct Node {
+      enum Flags {
+        kRegisterIsolatedFlag = 1,
+        kPHIIsolatedFlag = 2
+      };
+      Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); }
+
+      Node *getLeader();
+
+      PointerIntPair<Node*, 2> parent;
+      unsigned value;
+      unsigned rank;
+    };
+
+    /// Add a register in a new congruence class containing only itself.
+    void addReg(unsigned);
+
+    /// Join the congruence classes of two registers. This function is biased
+    /// towards the left argument, i.e. after
+    ///
+    /// addReg(r2);
+    /// unionRegs(r1, r2);
+    ///
+    /// the leader of the unioned congruence class is the same as the leader of
+    /// r1's congruence class prior to the union. This is actually relied upon
+    /// in the copy insertion code.
+    void unionRegs(unsigned, unsigned);
+
+    /// Get the color of a register. The color is 0 if the register has been
+    /// isolated.
+    unsigned getRegColor(unsigned);
+
+    // Isolate a register.
+    void isolateReg(unsigned);
+
+    /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been
+    /// isolated. Otherwise, it is the original color of its destination and
+    /// all of its operands (before they were isolated, if they were).
+    unsigned getPHIColor(MachineInstr*);
+
+    /// Isolate a PHI.
+    void isolatePHI(MachineInstr*);
+
+    /// Traverses a basic block, splitting any interferences found between
+    /// registers in the same congruence class. It takes two DenseMaps as
+    /// arguments that it also updates: CurrentDominatingParent, which maps
+    /// a color to the register in that congruence class whose definition was
+    /// most recently seen, and ImmediateDominatingParent, which maps a register
+    /// to the register in the same congruence class that most immediately
+    /// dominates it.
+    ///
+    /// This function assumes that it is being called in a depth-first traversal
+    /// of the dominator tree.
+    void SplitInterferencesForBasicBlock(
+      MachineBasicBlock&,
+      DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+      DenseMap<unsigned, unsigned> &ImmediateDominatingParent);
+
+    // Lowers a PHI instruction, inserting copies of the source and destination
+    // registers as necessary.
+    void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*);
+
+    // Merges the live interval of Reg into NewReg and renames Reg to NewReg
+    // everywhere that Reg appears. Requires Reg and NewReg to have non-
+    // overlapping lifetimes.
+    void MergeLIsAndRename(unsigned Reg, unsigned NewReg);
+
+    MachineRegisterInfo *MRI;
+    const TargetInstrInfo *TII;
+    MachineDominatorTree *DT;
+    LiveIntervals *LI;
+
+    BumpPtrAllocator Allocator;
+
+    DenseMap<unsigned, Node*> RegNodeMap;
+
+    // Maps a basic block to a list of its defs of registers that appear as PHI
+    // sources.
+    DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs;
+
+    // Maps a color to a pair of a MachineInstr* and a virtual register, which
+    // is the operand of that PHI corresponding to the current basic block.
+    DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor;
+
+    // FIXME: Can these two data structures be combined? Would a std::multimap
+    // be any better?
+
+    // Stores pairs of predecessor basic blocks and the source registers of
+    // inserted copy instructions.
+    typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet;
+    SrcCopySet InsertedSrcCopySet;
+
+    // Maps pairs of predecessor basic blocks and colors to their defining copy
+    // instructions.
+    typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*>
+      SrcCopyMap;
+    SrcCopyMap InsertedSrcCopyMap;
+
+    // Maps inserted destination copy registers to their defining copy
+    // instructions.
+    typedef DenseMap<unsigned, MachineInstr*> DestCopyMap;
+    DestCopyMap InsertedDestCopies;
+  };
+
+  struct MIIndexCompare {
+    MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { }
+
+    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+      return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS);
+    }
+
+    LiveIntervals *LI;
+  };
+} // namespace
+
+STATISTIC(NumPHIsLowered, "Number of PHIs lowered");
+STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted");
+STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted");
+
+char StrongPHIElimination::ID = 0;
+INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination",
+  "Eliminate PHI nodes for register allocation, intelligently", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination",
+  "Eliminate PHI nodes for register allocation, intelligently", false, false)
+
+char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
+
+void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addRequired<SlotIndexes>();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequired<LiveIntervals>();
+  AU.addPreserved<LiveIntervals>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
+  // FIXME: This only needs to check from the first terminator, as only the
+  // first terminator can use a virtual register.
+  for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) {
+    assert (RI != MBB->rend());
+    MachineInstr *MI = &*RI;
+
+    for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+         OE = MI->operands_end(); OI != OE; ++OI) {
+      MachineOperand &MO = *OI;
+      if (MO.isReg() && MO.isUse() && MO.getReg() == Reg)
+        return &MO;
+    }
+  }
+  return NULL;
+}
+
+bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
+  MRI = &MF.getRegInfo();
+  TII = MF.getTarget().getInstrInfo();
+  DT = &getAnalysis<MachineDominatorTree>();
+  LI = &getAnalysis<LiveIntervals>();
+
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->isPHI(); ++BBI) {
+      unsigned DestReg = BBI->getOperand(0).getReg();
+      addReg(DestReg);
+      PHISrcDefs[I].push_back(BBI);
+
+      for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+        MachineOperand &SrcMO = BBI->getOperand(i);
+        unsigned SrcReg = SrcMO.getReg();
+        addReg(SrcReg);
+        unionRegs(DestReg, SrcReg);
+
+        MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+        if (DefMI)
+          PHISrcDefs[DefMI->getParent()].push_back(DefMI);
+      }
+    }
+  }
+
+  // Perform a depth-first traversal of the dominator tree, splitting
+  // interferences amongst PHI-congruence classes.
+  DenseMap<unsigned, unsigned> CurrentDominatingParent;
+  DenseMap<unsigned, unsigned> ImmediateDominatingParent;
+  for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()),
+       DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
+    SplitInterferencesForBasicBlock(*DI->getBlock(),
+                                    CurrentDominatingParent,
+                                    ImmediateDominatingParent);
+  }
+
+  // Insert copies for all PHI source and destination registers.
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->isPHI(); ++BBI) {
+      InsertCopiesForPHI(BBI, I);
+    }
+  }
+
+  // FIXME: Preserve the equivalence classes during copy insertion and use
+  // the preversed equivalence classes instead of recomputing them.
+  RegNodeMap.clear();
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+         BBI != BBE && BBI->isPHI(); ++BBI) {
+      unsigned DestReg = BBI->getOperand(0).getReg();
+      addReg(DestReg);
+
+      for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+        unsigned SrcReg = BBI->getOperand(i).getReg();
+        addReg(SrcReg);
+        unionRegs(DestReg, SrcReg);
+      }
+    }
+  }
+
+  DenseMap<unsigned, unsigned> RegRenamingMap;
+  bool Changed = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E; ++I) {
+    MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+    while (BBI != BBE && BBI->isPHI()) {
+      MachineInstr *PHI = BBI;
+
+      assert(PHI->getNumOperands() > 0);
+
+      unsigned SrcReg = PHI->getOperand(1).getReg();
+      unsigned SrcColor = getRegColor(SrcReg);
+      unsigned NewReg = RegRenamingMap[SrcColor];
+      if (!NewReg) {
+        NewReg = SrcReg;
+        RegRenamingMap[SrcColor] = SrcReg;
+      }
+      MergeLIsAndRename(SrcReg, NewReg);
+
+      unsigned DestReg = PHI->getOperand(0).getReg();
+      if (!InsertedDestCopies.count(DestReg))
+        MergeLIsAndRename(DestReg, NewReg);
+
+      for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) {
+        unsigned SrcReg = PHI->getOperand(i).getReg();
+        MergeLIsAndRename(SrcReg, NewReg);
+      }
+
+      ++BBI;
+      LI->RemoveMachineInstrFromMaps(PHI);
+      PHI->eraseFromParent();
+      Changed = true;
+    }
+  }
+
+  // Due to the insertion of copies to split live ranges, the live intervals are
+  // guaranteed to not overlap, except in one case: an original PHI source and a
+  // PHI destination copy. In this case, they have the same value and thus don't
+  // truly intersect, so we merge them into the value live at that point.
+  // FIXME: Is there some better way we can handle this?
+  for (DestCopyMap::iterator I = InsertedDestCopies.begin(),
+       E = InsertedDestCopies.end(); I != E; ++I) {
+    unsigned DestReg = I->first;
+    unsigned DestColor = getRegColor(DestReg);
+    unsigned NewReg = RegRenamingMap[DestColor];
+
+    LiveInterval &DestLI = LI->getInterval(DestReg);
+    LiveInterval &NewLI = LI->getInterval(NewReg);
+
+    assert(DestLI.ranges.size() == 1
+           && "PHI destination copy's live interval should be a single live "
+               "range from the beginning of the BB to the copy instruction.");
+    LiveRange *DestLR = DestLI.begin();
+    VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start);
+    if (!NewVNI) {
+      NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator());
+      MachineInstr *CopyInstr = I->second;
+      CopyInstr->getOperand(1).setIsKill(true);
+    }
+
+    LiveRange NewLR(DestLR->start, DestLR->end, NewVNI);
+    NewLI.addRange(NewLR);
+
+    LI->removeInterval(DestReg);
+    MRI->replaceRegWith(DestReg, NewReg);
+  }
+
+  // Adjust the live intervals of all PHI source registers to handle the case
+  // where the PHIs in successor blocks were the only later uses of the source
+  // register.
+  for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(),
+       E = InsertedSrcCopySet.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I->first;
+    unsigned SrcReg = I->second;
+    if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)])
+      SrcReg = RenamedRegister;
+
+    LiveInterval &SrcLI = LI->getInterval(SrcReg);
+
+    bool isLiveOut = false;
+    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI) {
+      if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) {
+        isLiveOut = true;
+        break;
+      }
+    }
+
+    if (isLiveOut)
+      continue;
+
+    MachineOperand *LastUse = findLastUse(MBB, SrcReg);
+    assert(LastUse);
+    SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
+    SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB));
+    LastUse->setIsKill(true);
+  }
+
+  LI->renumber();
+
+  Allocator.Reset();
+  RegNodeMap.clear();
+  PHISrcDefs.clear();
+  InsertedSrcCopySet.clear();
+  InsertedSrcCopyMap.clear();
+  InsertedDestCopies.clear();
+
+  return Changed;
+}
+
+void StrongPHIElimination::addReg(unsigned Reg) {
+  if (RegNodeMap.count(Reg))
+    return;
+  RegNodeMap[Reg] = new (Allocator) Node(Reg);
+}
+
+StrongPHIElimination::Node*
+StrongPHIElimination::Node::getLeader() {
+  Node *N = this;
+  Node *Parent = parent.getPointer();
+  Node *Grandparent = Parent->parent.getPointer();
+
+  while (Parent != Grandparent) {
+    N->parent.setPointer(Grandparent);
+    N = Grandparent;
+    Parent = Parent->parent.getPointer();
+    Grandparent = Parent->parent.getPointer();
+  }
+
+  return Parent;
+}
+
+unsigned StrongPHIElimination::getRegColor(unsigned Reg) {
+  DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg);
+  if (RI == RegNodeMap.end())
+    return 0;
+  Node *Node = RI->second;
+  if (Node->parent.getInt() & Node::kRegisterIsolatedFlag)
+    return 0;
+  return Node->getLeader()->value;
+}
+
+void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) {
+  Node *Node1 = RegNodeMap[Reg1]->getLeader();
+  Node *Node2 = RegNodeMap[Reg2]->getLeader();
+
+  if (Node1->rank > Node2->rank) {
+    Node2->parent.setPointer(Node1->getLeader());
+  } else if (Node1->rank < Node2->rank) {
+    Node1->parent.setPointer(Node2->getLeader());
+  } else if (Node1 != Node2) {
+    Node2->parent.setPointer(Node1->getLeader());
+    Node1->rank++;
+  }
+}
+
+void StrongPHIElimination::isolateReg(unsigned Reg) {
+  Node *Node = RegNodeMap[Reg];
+  Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag);
+}
+
+unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) {
+  assert(PHI->isPHI());
+
+  unsigned DestReg = PHI->getOperand(0).getReg();
+  Node *DestNode = RegNodeMap[DestReg];
+  if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag)
+    return 0;
+
+  for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+    unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg());
+    if (SrcColor)
+      return SrcColor;
+  }
+  return 0;
+}
+
+void StrongPHIElimination::isolatePHI(MachineInstr *PHI) {
+  assert(PHI->isPHI());
+  Node *Node = RegNodeMap[PHI->getOperand(0).getReg()];
+  Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag);
+}
+
+/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
+/// interferences found between registers in the same congruence class. It
+/// takes two DenseMaps as arguments that it also updates:
+///
+/// 1) CurrentDominatingParent, which maps a color to the register in that
+///    congruence class whose definition was most recently seen.
+///
+/// 2) ImmediateDominatingParent, which maps a register to the register in the
+///    same congruence class that most immediately dominates it.
+///
+/// This function assumes that it is being called in a depth-first traversal
+/// of the dominator tree.
+///
+/// The algorithm used here is a generalization of the dominance-based SSA test
+/// for two variables. If there are variables a_1, ..., a_n such that
+///
+///   def(a_1) dom ... dom def(a_n),
+///
+/// then we can test for an interference between any two a_i by only using O(n)
+/// interference tests between pairs of variables. If i < j and a_i and a_j
+/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1).
+/// Thus, in order to test for an interference involving a_i, we need only check
+/// for a potential interference with a_i+1.
+///
+/// This method can be generalized to arbitrary sets of variables by performing
+/// a depth-first traversal of the dominator tree. As we traverse down a branch
+/// of the dominator tree, we keep track of the current dominating variable and
+/// only perform an interference test with that variable. However, when we go to
+/// another branch of the dominator tree, the definition of the current dominating
+/// variable may no longer dominate the current block. In order to correct this,
+/// we need to use a stack of past choices of the current dominating variable
+/// and pop from this stack until we find a variable whose definition actually
+/// dominates the current block.
+/// 
+/// There will be one push on this stack for each variable that has become the
+/// current dominating variable, so instead of using an explicit stack we can
+/// simply associate the previous choice for a current dominating variable with
+/// the new choice. This works better in our implementation, where we test for
+/// interference in multiple distinct sets at once.
+void
+StrongPHIElimination::SplitInterferencesForBasicBlock(
+    MachineBasicBlock &MBB,
+    DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+    DenseMap<unsigned, unsigned> &ImmediateDominatingParent) {
+  // Sort defs by their order in the original basic block, as the code below
+  // assumes that it is processing definitions in dominance order.
+  std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB];
+  std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI));
+
+  for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(),
+       BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
+    for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(),
+         E = (*BBI)->operands_end(); I != E; ++I) {
+      const MachineOperand &MO = *I;
+
+      // FIXME: This would be faster if it were possible to bail out of checking
+      // an instruction's operands after the explicit defs, but this is incorrect
+      // for variadic instructions, which may appear before register allocation
+      // in the future.
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+
+      unsigned DestReg = MO.getReg();
+      if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg))
+        continue;
+
+      // If the virtual register being defined is not used in any PHI or has
+      // already been isolated, then there are no more interferences to check.
+      unsigned DestColor = getRegColor(DestReg);
+      if (!DestColor)
+        continue;
+
+      // The input to this pass sometimes is not in SSA form in every basic
+      // block, as some virtual registers have redefinitions. We could eliminate
+      // this by fixing the passes that generate the non-SSA code, or we could
+      // handle it here by tracking defining machine instructions rather than
+      // virtual registers. For now, we just handle the situation conservatively
+      // in a way that will possibly lead to false interferences.
+      unsigned &CurrentParent = CurrentDominatingParent[DestColor];
+      unsigned NewParent = CurrentParent;
+      if (NewParent == DestReg)
+        continue;
+
+      // Pop registers from the stack represented by ImmediateDominatingParent
+      // until we find a parent that dominates the current instruction.
+      while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI)
+                           || !getRegColor(NewParent)))
+        NewParent = ImmediateDominatingParent[NewParent];
+
+      // If NewParent is nonzero, then its definition dominates the current
+      // instruction, so it is only necessary to check for the liveness of
+      // NewParent in order to check for an interference.
+      if (NewParent
+          && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) {
+        // If there is an interference, always isolate the new register. This
+        // could be improved by using a heuristic that decides which of the two
+        // registers to isolate.
+        isolateReg(DestReg);
+        CurrentParent = NewParent;
+      } else {
+        // If there is no interference, update ImmediateDominatingParent and set
+        // the CurrentDominatingParent for this color to the current register.
+        ImmediateDominatingParent[DestReg] = NewParent;
+        CurrentParent = DestReg;
+      }
+    }
+  }
+
+  // We now walk the PHIs in successor blocks and check for interferences. This
+  // is necesary because the use of a PHI's operands are logically contained in
+  // the predecessor block. The def of a PHI's destination register is processed
+  // along with the other defs in a basic block.
+
+  CurrentPHIForColor.clear();
+
+  for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
+       SE = MBB.succ_end(); SI != SE; ++SI) {
+    for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end();
+         BBI != BBE && BBI->isPHI(); ++BBI) {
+      MachineInstr *PHI = BBI;
+
+      // If a PHI is already isolated, either by being isolated directly or
+      // having all of its operands isolated, ignore it.
+      unsigned Color = getPHIColor(PHI);
+      if (!Color)
+        continue;
+
+      // Find the index of the PHI operand that corresponds to this basic block.
+      unsigned PredIndex;
+      for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) {
+        if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB)
+          break;
+      }
+      assert(PredIndex < PHI->getNumOperands());
+      unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg();
+
+      // Pop registers from the stack represented by ImmediateDominatingParent
+      // until we find a parent that dominates the current instruction.
+      unsigned &CurrentParent = CurrentDominatingParent[Color];
+      unsigned NewParent = CurrentParent;
+      while (NewParent
+             && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB)
+                 || !getRegColor(NewParent)))
+        NewParent = ImmediateDominatingParent[NewParent];
+      CurrentParent = NewParent;
+
+      // If there is an interference with a register, always isolate the
+      // register rather than the PHI. It is also possible to isolate the
+      // PHI, but that introduces copies for all of the registers involved
+      // in that PHI.
+      if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB)
+                    && NewParent != PredOperandReg)
+        isolateReg(NewParent);
+
+      std::pair<MachineInstr*, unsigned>
+        &CurrentPHI = CurrentPHIForColor[Color];
+
+      // If two PHIs have the same operand from every shared predecessor, then
+      // they don't actually interfere. Otherwise, isolate the current PHI. This
+      // could possibly be improved, e.g. we could isolate the PHI with the
+      // fewest operands.
+      if (CurrentPHI.first && CurrentPHI.second != PredOperandReg)
+        isolatePHI(PHI);
+      else
+        CurrentPHI = std::make_pair(PHI, PredOperandReg);
+    }
+  }
+}
+
+void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
+                                              MachineBasicBlock *MBB) {
+  assert(PHI->isPHI());
+  ++NumPHIsLowered;
+  unsigned PHIColor = getPHIColor(PHI);
+
+  for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+    MachineOperand &SrcMO = PHI->getOperand(i);
+
+    // If a source is defined by an implicit def, there is no need to insert a
+    // copy in the predecessor.
+    if (SrcMO.isUndef())
+      continue;
+
+    unsigned SrcReg = SrcMO.getReg();
+    assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+           "Machine PHI Operands must all be virtual registers!");
+
+    MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB();
+    unsigned SrcColor = getRegColor(SrcReg);
+
+    // If neither the PHI nor the operand were isolated, then we only need to
+    // set the phi-kill flag on the VNInfo at this PHI.
+    if (PHIColor && SrcColor == PHIColor) {
+      LiveInterval &SrcInterval = LI->getInterval(SrcReg);
+      SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
+      VNInfo *SrcVNI = SrcInterval.getVNInfoAt(PredIndex.getPrevIndex());
+      assert(SrcVNI);
+      SrcVNI->setHasPHIKill(true);
+      continue;
+    }
+
+    unsigned CopyReg = 0;
+    if (PHIColor) {
+      SrcCopyMap::const_iterator I
+        = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor));
+      CopyReg
+        = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0;
+    }
+
+    if (!CopyReg) {
+      const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+      CopyReg = MRI->createVirtualRegister(RC);
+
+      MachineBasicBlock::iterator
+        CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg);
+      unsigned SrcSubReg = SrcMO.getSubReg();
+      MachineInstr *CopyInstr = BuildMI(*PredBB,
+                                        CopyInsertPoint,
+                                        PHI->getDebugLoc(),
+                                        TII->get(TargetOpcode::COPY),
+                                        CopyReg).addReg(SrcReg, 0, SrcSubReg);
+      LI->InsertMachineInstrInMaps(CopyInstr);
+      ++NumSrcCopiesInserted;
+
+      // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for
+      // the newly added range.
+      LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr);
+      InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg));
+
+      addReg(CopyReg);
+      if (PHIColor) {
+        unionRegs(PHIColor, CopyReg);
+        assert(getRegColor(CopyReg) != CopyReg);
+      } else {
+        PHIColor = CopyReg;
+        assert(getRegColor(CopyReg) == CopyReg);
+      }
+
+      if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor)))
+        InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr;
+    }
+
+    SrcMO.setReg(CopyReg);
+
+    // If SrcReg is not live beyond the PHI, trim its interval so that it is no
+    // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are
+    // processed later, but this is still correct to do at this point because we
+    // never rely on LiveIntervals being correct while inserting copies.
+    // FIXME: Should this just count uses at PHIs like the normal PHIElimination
+    // pass does?
+    LiveInterval &SrcLI = LI->getInterval(SrcReg);
+    SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+    SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+    SlotIndex NextInstrIndex = PHIIndex.getNextIndex();
+    if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex))
+      SrcLI.removeRange(MBBStartIndex, PHIIndex, true);
+  }
+
+  unsigned DestReg = PHI->getOperand(0).getReg();
+  unsigned DestColor = getRegColor(DestReg);
+
+  if (PHIColor && DestColor == PHIColor) {
+    LiveInterval &DestLI = LI->getInterval(DestReg);
+
+    // Set the phi-def flag for the VN at this PHI.
+    SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+    VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex());
+    assert(DestVNI);
+    DestVNI->setIsPHIDef(true);
+  
+    // Prior to PHI elimination, the live ranges of PHIs begin at their defining
+    // instruction. After PHI elimination, PHI instructions are replaced by VNs
+    // with the phi-def flag set, and the live ranges of these VNs start at the
+    // beginning of the basic block.
+    SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+    DestVNI->def = MBBStartIndex;
+    DestLI.addRange(LiveRange(MBBStartIndex,
+                              PHIIndex.getDefIndex(),
+                              DestVNI));
+    return;
+  }
+
+  const TargetRegisterClass *RC = MRI->getRegClass(DestReg);
+  unsigned CopyReg = MRI->createVirtualRegister(RC);
+
+  MachineInstr *CopyInstr = BuildMI(*MBB,
+                                    MBB->SkipPHIsAndLabels(MBB->begin()),
+                                    PHI->getDebugLoc(),
+                                    TII->get(TargetOpcode::COPY),
+                                    DestReg).addReg(CopyReg);
+  LI->InsertMachineInstrInMaps(CopyInstr);
+  PHI->getOperand(0).setReg(CopyReg);
+  ++NumDestCopiesInserted;
+
+  // Add the region from the beginning of MBB to the copy instruction to
+  // CopyReg's live interval, and give the VNInfo the phidef flag.
+  LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg);
+  SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+  SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
+  VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
+                                        CopyInstr,
+                                        LI->getVNInfoAllocator());
+  CopyVNI->setIsPHIDef(true);
+  CopyLI.addRange(LiveRange(MBBStartIndex,
+                            DestCopyIndex.getDefIndex(),
+                            CopyVNI));
+
+  // Adjust DestReg's live interval to adjust for its new definition at
+  // CopyInstr.
+  LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
+  SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+  DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex());
+
+  VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex());
+  assert(DestVNI);
+  DestVNI->def = DestCopyIndex.getDefIndex();
+
+  InsertedDestCopies[CopyReg] = CopyInstr;
+}
+
+void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) {
+  if (Reg == NewReg)
+    return;
+
+  LiveInterval &OldLI = LI->getInterval(Reg);
+  LiveInterval &NewLI = LI->getInterval(NewReg);
+
+  // Merge the live ranges of the two registers.
+  DenseMap<VNInfo*, VNInfo*> VNMap;
+  for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end();
+       LRI != LRE; ++LRI) {
+    LiveRange OldLR = *LRI;
+    VNInfo *OldVN = OldLR.valno;
+
+    VNInfo *&NewVN = VNMap[OldVN];
+    if (!NewVN) {
+      NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator());
+      VNMap[OldVN] = NewVN;
+    }
+
+    LiveRange LR(OldLR.start, OldLR.end, NewVN);
+    NewLI.addRange(LR);
+  }
+
+  // Remove the LiveInterval for the register being renamed and replace all
+  // of its defs and uses with the new register.
+  LI->removeInterval(Reg);
+  MRI->replaceRegWith(Reg, NewReg);
+}
diff --git a/final/lib/CodeGen/TailDuplication.cpp b/final/lib/CodeGen/TailDuplication.cpp
new file mode 100644
index 00000000000..04d3d311b41
--- /dev/null
+++ b/final/lib/CodeGen/TailDuplication.cpp
@@ -0,0 +1,658 @@
+//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass duplicates basic blocks ending in unconditional branches into
+// the tails of their predecessors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailduplication"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumTails     , "Number of tails duplicated");
+STATISTIC(NumTailDups  , "Number of tail duplicated blocks");
+STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
+STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
+
+// Heuristic for tail duplication.
+static cl::opt<unsigned>
+TailDuplicateSize("tail-dup-size",
+                  cl::desc("Maximum instructions to consider tail duplicating"),
+                  cl::init(2), cl::Hidden);
+
+static cl::opt<bool>
+TailDupVerify("tail-dup-verify",
+              cl::desc("Verify sanity of PHI instructions during taildup"),
+              cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned>
+TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden);
+
+typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
+
+namespace {
+  /// TailDuplicatePass - Perform tail duplication.
+  class TailDuplicatePass : public MachineFunctionPass {
+    bool PreRegAlloc;
+    const TargetInstrInfo *TII;
+    MachineModuleInfo *MMI;
+    MachineRegisterInfo *MRI;
+
+    // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
+    SmallVector<unsigned, 16> SSAUpdateVRs;
+
+    // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of
+    // source virtual registers.
+    DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
+
+  public:
+    static char ID;
+    explicit TailDuplicatePass(bool PreRA) :
+      MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+    virtual const char *getPassName() const { return "Tail Duplication"; }
+
+  private:
+    void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+                           MachineBasicBlock *BB);
+    void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
+                    MachineBasicBlock *PredBB,
+                    DenseMap<unsigned, unsigned> &LocalVRMap,
+                    SmallVector<std::pair<unsigned,unsigned>, 4> &Copies);
+    void DuplicateInstruction(MachineInstr *MI,
+                              MachineBasicBlock *TailBB,
+                              MachineBasicBlock *PredBB,
+                              MachineFunction &MF,
+                              DenseMap<unsigned, unsigned> &LocalVRMap);
+    void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+                              SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                              SmallSetVector<MachineBasicBlock*, 8> &Succs);
+    bool TailDuplicateBlocks(MachineFunction &MF);
+    bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
+                       SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                       SmallVector<MachineInstr*, 16> &Copies);
+    void RemoveDeadBlock(MachineBasicBlock *MBB);
+  };
+
+  char TailDuplicatePass::ID = 0;
+}
+
+FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) {
+  return new TailDuplicatePass(PreRegAlloc);
+}
+
+bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
+  TII = MF.getTarget().getInstrInfo();
+  MRI = &MF.getRegInfo();
+  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+
+  bool MadeChange = false;
+  while (TailDuplicateBlocks(MF))
+    MadeChange = true;
+
+  return MadeChange;
+}
+
+static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
+  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I;
+    SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(),
+                                                MBB->pred_end());
+    MachineBasicBlock::iterator MI = MBB->begin();
+    while (MI != MBB->end()) {
+      if (!MI->isPHI())
+        break;
+      for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+             PE = Preds.end(); PI != PE; ++PI) {
+        MachineBasicBlock *PredBB = *PI;
+        bool Found = false;
+        for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+          MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
+          if (PHIBB == PredBB) {
+            Found = true;
+            break;
+          }
+        }
+        if (!Found) {
+          dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+          dbgs() << "  missing input from predecessor BB#"
+                 << PredBB->getNumber() << '\n';
+          llvm_unreachable(0);
+        }
+      }
+
+      for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+        MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
+        if (CheckExtra && !Preds.count(PHIBB)) {
+          // This is not a hard error.
+          dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
+                 << ": " << *MI;
+          dbgs() << "  extra input from predecessor BB#"
+                 << PHIBB->getNumber() << '\n';
+        }
+        if (PHIBB->getNumber() < 0) {
+          dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+          dbgs() << "  non-existing BB#" << PHIBB->getNumber() << '\n';
+          llvm_unreachable(0);
+        }
+      }
+      ++MI;
+    }
+  }
+}
+
+/// TailDuplicateBlocks - Look for small blocks that are unconditionally
+/// branched to and do not fall through. Tail-duplicate their instructions
+/// into their predecessors to eliminate (dynamic) branches.
+bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  if (PreRegAlloc && TailDupVerify) {
+    DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
+    VerifyPHIs(MF, true);
+  }
+
+  SmallVector<MachineInstr*, 8> NewPHIs;
+  MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
+
+  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+    MachineBasicBlock *MBB = I++;
+
+    if (NumTails == TailDupLimit)
+      break;
+
+    // Only duplicate blocks that end with unconditional branches.
+    if (MBB->canFallThrough())
+      continue;
+
+    // Save the successors list.
+    SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
+                                                MBB->succ_end());
+
+    SmallVector<MachineBasicBlock*, 8> TDBBs;
+    SmallVector<MachineInstr*, 16> Copies;
+    if (TailDuplicate(MBB, MF, TDBBs, Copies)) {
+      ++NumTails;
+
+      // TailBB's immediate successors are now successors of those predecessors
+      // which duplicated TailBB. Add the predecessors as sources to the PHI
+      // instructions.
+      bool isDead = MBB->pred_empty();
+      if (PreRegAlloc)
+        UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
+
+      // If it is dead, remove it.
+      if (isDead) {
+        NumInstrDups -= MBB->size();
+        RemoveDeadBlock(MBB);
+        ++NumDeadBlocks;
+      }
+
+      // Update SSA form.
+      if (!SSAUpdateVRs.empty()) {
+        for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
+          unsigned VReg = SSAUpdateVRs[i];
+          SSAUpdate.Initialize(VReg);
+
+          // If the original definition is still around, add it as an available
+          // value.
+          MachineInstr *DefMI = MRI->getVRegDef(VReg);
+          MachineBasicBlock *DefBB = 0;
+          if (DefMI) {
+            DefBB = DefMI->getParent();
+            SSAUpdate.AddAvailableValue(DefBB, VReg);
+          }
+
+          // Add the new vregs as available values.
+          DenseMap<unsigned, AvailableValsTy>::iterator LI =
+            SSAUpdateVals.find(VReg);  
+          for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+            MachineBasicBlock *SrcBB = LI->second[j].first;
+            unsigned SrcReg = LI->second[j].second;
+            SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
+          }
+
+          // Rewrite uses that are outside of the original def's block.
+          MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
+          while (UI != MRI->use_end()) {
+            MachineOperand &UseMO = UI.getOperand();
+            MachineInstr *UseMI = &*UI;
+            ++UI;
+            if (UseMI->getParent() == DefBB)
+              continue;
+            SSAUpdate.RewriteUse(UseMO);
+          }
+        }
+
+        SSAUpdateVRs.clear();
+        SSAUpdateVals.clear();
+      }
+
+      // Eliminate some of the copies inserted by tail duplication to maintain
+      // SSA form.
+      for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+        MachineInstr *Copy = Copies[i];
+        if (!Copy->isCopy())
+          continue;
+        unsigned Dst = Copy->getOperand(0).getReg();
+        unsigned Src = Copy->getOperand(1).getReg();
+        MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
+        if (++UI == MRI->use_end()) {
+          // Copy is the only use. Do trivial copy propagation here.
+          MRI->replaceRegWith(Dst, Src);
+          Copy->eraseFromParent();
+        }
+      }
+
+      if (PreRegAlloc && TailDupVerify)
+        VerifyPHIs(MF, false);
+      MadeChange = true;
+    }
+  }
+
+  return MadeChange;
+}
+
+static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
+                         const MachineRegisterInfo *MRI) {
+  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+         UE = MRI->use_end(); UI != UE; ++UI) {
+    MachineInstr *UseMI = &*UI;
+    if (UseMI->getParent() != BB)
+      return true;
+  }
+  return false;
+}
+
+static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
+  for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
+    if (MI->getOperand(i+1).getMBB() == SrcBB)
+      return i;
+  return 0;
+}
+
+/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
+/// SSA update.
+void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+                                          MachineBasicBlock *BB) {
+  DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg);
+  if (LI != SSAUpdateVals.end())
+    LI->second.push_back(std::make_pair(BB, NewReg));
+  else {
+    AvailableValsTy Vals;
+    Vals.push_back(std::make_pair(BB, NewReg));
+    SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
+    SSAUpdateVRs.push_back(OrigReg);
+  }
+}
+
+/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB.
+/// Remember the source register that's contributed by PredBB and update SSA
+/// update map.
+void TailDuplicatePass::ProcessPHI(MachineInstr *MI,
+                                   MachineBasicBlock *TailBB,
+                                   MachineBasicBlock *PredBB,
+                                   DenseMap<unsigned, unsigned> &LocalVRMap,
+                         SmallVector<std::pair<unsigned,unsigned>, 4> &Copies) {
+  unsigned DefReg = MI->getOperand(0).getReg();
+  unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
+  assert(SrcOpIdx && "Unable to find matching PHI source?");
+  unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
+  const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+  LocalVRMap.insert(std::make_pair(DefReg, SrcReg));
+
+  // Insert a copy from source to the end of the block. The def register is the
+  // available value liveout of the block.
+  unsigned NewDef = MRI->createVirtualRegister(RC);
+  Copies.push_back(std::make_pair(NewDef, SrcReg));
+  if (isDefLiveOut(DefReg, TailBB, MRI))
+    AddSSAUpdateEntry(DefReg, NewDef, PredBB);
+
+  // Remove PredBB from the PHI node.
+  MI->RemoveOperand(SrcOpIdx+1);
+  MI->RemoveOperand(SrcOpIdx);
+  if (MI->getNumOperands() == 1)
+    MI->eraseFromParent();
+}
+
+/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update
+/// the source operands due to earlier PHI translation.
+void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
+                                     MachineBasicBlock *TailBB,
+                                     MachineBasicBlock *PredBB,
+                                     MachineFunction &MF,
+                                     DenseMap<unsigned, unsigned> &LocalVRMap) {
+  MachineInstr *NewMI = TII->duplicate(MI, MF);
+  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = NewMI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (MO.isDef()) {
+      const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+      unsigned NewReg = MRI->createVirtualRegister(RC);
+      MO.setReg(NewReg);
+      LocalVRMap.insert(std::make_pair(Reg, NewReg));
+      if (isDefLiveOut(Reg, TailBB, MRI))
+        AddSSAUpdateEntry(Reg, NewReg, PredBB);
+    } else {
+      DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
+      if (VI != LocalVRMap.end())
+        MO.setReg(VI->second);
+    }
+  }
+  PredBB->insert(PredBB->end(), NewMI);
+}
+
+/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
+/// blocks, the successors have gained new predecessors. Update the PHI
+/// instructions in them accordingly.
+void
+TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+                                  SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                                  SmallSetVector<MachineBasicBlock*,8> &Succs) {
+  for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(),
+         SE = Succs.end(); SI != SE; ++SI) {
+    MachineBasicBlock *SuccBB = *SI;
+    for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
+         II != EE; ++II) {
+      if (!II->isPHI())
+        break;
+      unsigned Idx = 0;
+      for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
+        MachineOperand &MO = II->getOperand(i+1);
+        if (MO.getMBB() == FromBB) {
+          Idx = i;
+          break;
+        }
+      }
+
+      assert(Idx != 0);
+      MachineOperand &MO0 = II->getOperand(Idx);
+      unsigned Reg = MO0.getReg();
+      if (isDead) {
+        // Folded into the previous BB.
+        // There could be duplicate phi source entries. FIXME: Should sdisel
+        // or earlier pass fixed this?
+        for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) {
+          MachineOperand &MO = II->getOperand(i+1);
+          if (MO.getMBB() == FromBB) {
+            II->RemoveOperand(i+1);
+            II->RemoveOperand(i);
+          }
+        }
+      } else
+        Idx = 0;
+
+      // If Idx is set, the operands at Idx and Idx+1 must be removed.
+      // We reuse the location to avoid expensive RemoveOperand calls.
+
+      DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
+      if (LI != SSAUpdateVals.end()) {
+        // This register is defined in the tail block.
+        for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+          MachineBasicBlock *SrcBB = LI->second[j].first;
+          unsigned SrcReg = LI->second[j].second;
+          if (Idx != 0) {
+            II->getOperand(Idx).setReg(SrcReg);
+            II->getOperand(Idx+1).setMBB(SrcBB);
+            Idx = 0;
+          } else {
+            II->addOperand(MachineOperand::CreateReg(SrcReg, false));
+            II->addOperand(MachineOperand::CreateMBB(SrcBB));
+          }
+        }
+      } else {
+        // Live in tail block, must also be live in predecessors.
+        for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
+          MachineBasicBlock *SrcBB = TDBBs[j];
+          if (Idx != 0) {
+            II->getOperand(Idx).setReg(Reg);
+            II->getOperand(Idx+1).setMBB(SrcBB);
+            Idx = 0;
+          } else {
+            II->addOperand(MachineOperand::CreateReg(Reg, false));
+            II->addOperand(MachineOperand::CreateMBB(SrcBB));
+          }
+        }
+      }
+      if (Idx != 0) {
+        II->RemoveOperand(Idx+1);
+        II->RemoveOperand(Idx);
+      }
+    }
+  }
+}
+
+/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
+/// of its predecessors.
+bool
+TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
+                                 SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                                 SmallVector<MachineInstr*, 16> &Copies) {
+  // Set the limit on the number of instructions to duplicate, with a default
+  // of one less than the tail-merge threshold. When optimizing for size,
+  // duplicate only one, because one branch instruction can be eliminated to
+  // compensate for the duplication.
+  unsigned MaxDuplicateCount;
+  if (TailDuplicateSize.getNumOccurrences() == 0 &&
+      MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+    MaxDuplicateCount = 1;
+  else
+    MaxDuplicateCount = TailDuplicateSize;
+
+  if (PreRegAlloc) {
+    if (TailBB->empty())
+      return false;
+    const TargetInstrDesc &TID = TailBB->back().getDesc();
+    // Pre-regalloc tail duplication hurts compile time and doesn't help
+    // much except for indirect branches and returns.
+    if (!TID.isIndirectBranch() && !TID.isReturn())
+      return false;
+    // If the target has hardware branch prediction that can handle indirect
+    // branches, duplicating them can often make them predictable when there
+    // are common paths through the code.  The limit needs to be high enough
+    // to allow undoing the effects of tail merging and other optimizations
+    // that rearrange the predecessors of the indirect branch.
+    MaxDuplicateCount = 20;
+  }
+
+  // Don't try to tail-duplicate single-block loops.
+  if (TailBB->isSuccessor(TailBB))
+    return false;
+
+  // Check the instructions in the block to determine whether tail-duplication
+  // is invalid or unlikely to be profitable.
+  unsigned InstrCount = 0;
+  bool HasCall = false;
+  for (MachineBasicBlock::iterator I = TailBB->begin();
+       I != TailBB->end(); ++I) {
+    // Non-duplicable things shouldn't be tail-duplicated.
+    if (I->getDesc().isNotDuplicable()) return false;
+    // Do not duplicate 'return' instructions if this is a pre-regalloc run.
+    // A return may expand into a lot more instructions (e.g. reload of callee
+    // saved registers) after PEI.
+    if (PreRegAlloc && I->getDesc().isReturn()) return false;
+    // Don't duplicate more than the threshold.
+    if (InstrCount == MaxDuplicateCount) return false;
+    // Remember if we saw a call.
+    if (I->getDesc().isCall()) HasCall = true;
+    if (!I->isPHI() && !I->isDebugValue())
+      InstrCount += 1;
+  }
+  // Don't tail-duplicate calls before register allocation. Calls presents a
+  // barrier to register allocation so duplicating them may end up increasing
+  // spills.
+  if (InstrCount > 1 && (PreRegAlloc && HasCall))
+    return false;
+
+  DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
+
+  // Iterate through all the unique predecessors and tail-duplicate this
+  // block into them, if possible. Copying the list ahead of time also
+  // avoids trouble with the predecessor list reallocating.
+  bool Changed = false;
+  SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+                                              TailBB->pred_end());
+  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+       PE = Preds.end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+
+    assert(TailBB != PredBB &&
+           "Single-block loop should have been rejected earlier!");
+    if (PredBB->succ_size() > 1) continue;
+
+    MachineBasicBlock *PredTBB, *PredFBB;
+    SmallVector<MachineOperand, 4> PredCond;
+    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+      continue;
+    if (!PredCond.empty())
+      continue;
+    // EH edges are ignored by AnalyzeBranch.
+    if (PredBB->succ_size() != 1)
+      continue;
+    // Don't duplicate into a fall-through predecessor (at least for now).
+    if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+      continue;
+
+    DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+                 << "From Succ: " << *TailBB);
+
+    TDBBs.push_back(PredBB);
+
+    // Remove PredBB's unconditional branch.
+    TII->RemoveBranch(*PredBB);
+
+    // Clone the contents of TailBB into PredBB.
+    DenseMap<unsigned, unsigned> LocalVRMap;
+    SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+    MachineBasicBlock::iterator I = TailBB->begin();
+    while (I != TailBB->end()) {
+      MachineInstr *MI = &*I;
+      ++I;
+      if (MI->isPHI()) {
+        // Replace the uses of the def of the PHI with the register coming
+        // from PredBB.
+        ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos);
+      } else {
+        // Replace def of virtual registers with new registers, and update
+        // uses with PHI source register or the new registers.
+        DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap);
+      }
+    }
+    MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
+    for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+      Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
+                               TII->get(TargetOpcode::COPY),
+                               CopyInfos[i].first).addReg(CopyInfos[i].second));
+    }
+    NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
+
+    // Update the CFG.
+    PredBB->removeSuccessor(PredBB->succ_begin());
+    assert(PredBB->succ_empty() &&
+           "TailDuplicate called on block with multiple successors!");
+    for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
+           E = TailBB->succ_end(); I != E; ++I)
+      PredBB->addSuccessor(*I);
+
+    Changed = true;
+    ++NumTailDups;
+  }
+
+  // If TailBB was duplicated into all its predecessors except for the prior
+  // block, which falls through unconditionally, move the contents of this
+  // block into the prior block.
+  MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
+  MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+  SmallVector<MachineOperand, 4> PriorCond;
+  bool PriorUnAnalyzable =
+    TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+  // This has to check PrevBB->succ_size() because EH edges are ignored by
+  // AnalyzeBranch.
+  if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB &&
+      TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 &&
+      !TailBB->hasAddressTaken()) {
+    DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
+          << "From MBB: " << *TailBB);
+    if (PreRegAlloc) {
+      DenseMap<unsigned, unsigned> LocalVRMap;
+      SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+      MachineBasicBlock::iterator I = TailBB->begin();
+      // Process PHI instructions first.
+      while (I != TailBB->end() && I->isPHI()) {
+        // Replace the uses of the def of the PHI with the register coming
+        // from PredBB.
+        MachineInstr *MI = &*I++;
+        ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos);
+        if (MI->getParent())
+          MI->eraseFromParent();
+      }
+
+      // Now copy the non-PHI instructions.
+      while (I != TailBB->end()) {
+        // Replace def of virtual registers with new registers, and update
+        // uses with PHI source register or the new registers.
+        MachineInstr *MI = &*I++;
+        DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap);
+        MI->eraseFromParent();
+      }
+      MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
+      for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+        Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
+                                 TII->get(TargetOpcode::COPY),
+                                 CopyInfos[i].first)
+                           .addReg(CopyInfos[i].second));
+      }
+    } else {
+      // No PHIs to worry about, just splice the instructions over.
+      PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
+    }
+    PrevBB->removeSuccessor(PrevBB->succ_begin());
+    assert(PrevBB->succ_empty());
+    PrevBB->transferSuccessors(TailBB);
+    TDBBs.push_back(PrevBB);
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+/// RemoveDeadBlock - Remove the specified dead machine basic block from the
+/// function, updating the CFG.
+void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) {
+  assert(MBB->pred_empty() && "MBB must be dead!");
+  DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
+
+  // Remove all successors.
+  while (!MBB->succ_empty())
+    MBB->removeSuccessor(MBB->succ_end()-1);
+
+  // Remove the block.
+  MBB->eraseFromParent();
+}
+
diff --git a/final/lib/CodeGen/TargetInstrInfoImpl.cpp b/final/lib/CodeGen/TargetInstrInfoImpl.cpp
new file mode 100644
index 00000000000..15340a3f108
--- /dev/null
+++ b/final/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -0,0 +1,449 @@
+//===-- TargetInstrInfoImpl.cpp - Target Instruction Information ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfoImpl class, it just provides default
+// implementations of various methods.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool> DisableHazardRecognizer(
+  "disable-sched-hazard", cl::Hidden, cl::init(false),
+  cl::desc("Disable hazard detection during preRA scheduling"));
+
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.
+void
+TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+                                             MachineBasicBlock *NewDest) const {
+  MachineBasicBlock *MBB = Tail->getParent();
+
+  // Remove all the old successors of MBB from the CFG.
+  while (!MBB->succ_empty())
+    MBB->removeSuccessor(MBB->succ_begin());
+
+  // Remove all the dead instructions from the end of MBB.
+  MBB->erase(Tail, MBB->end());
+
+  // If MBB isn't immediately before MBB, insert a branch to it.
+  if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
+    InsertBranch(*MBB, NewDest, 0, SmallVector<MachineOperand, 0>(),
+                 Tail->getDebugLoc());
+  MBB->addSuccessor(NewDest);
+}
+
+// commuteInstruction - The default implementation of this method just exchanges
+// the two operands returned by findCommutedOpIndices.
+MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
+                                                      bool NewMI) const {
+  const TargetInstrDesc &TID = MI->getDesc();
+  bool HasDef = TID.getNumDefs();
+  if (HasDef && !MI->getOperand(0).isReg())
+    // No idea how to commute this instruction. Target should implement its own.
+    return 0;
+  unsigned Idx1, Idx2;
+  if (!findCommutedOpIndices(MI, Idx1, Idx2)) {
+    std::string msg;
+    raw_string_ostream Msg(msg);
+    Msg << "Don't know how to commute: " << *MI;
+    report_fatal_error(Msg.str());
+  }
+
+  assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() &&
+         "This only knows how to commute register operands so far");
+  unsigned Reg1 = MI->getOperand(Idx1).getReg();
+  unsigned Reg2 = MI->getOperand(Idx2).getReg();
+  bool Reg1IsKill = MI->getOperand(Idx1).isKill();
+  bool Reg2IsKill = MI->getOperand(Idx2).isKill();
+  bool ChangeReg0 = false;
+  if (HasDef && MI->getOperand(0).getReg() == Reg1) {
+    // Must be two address instruction!
+    assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
+           "Expecting a two-address instruction!");
+    Reg2IsKill = false;
+    ChangeReg0 = true;
+  }
+
+  if (NewMI) {
+    // Create a new instruction.
+    unsigned Reg0 = HasDef
+      ? (ChangeReg0 ? Reg2 : MI->getOperand(0).getReg()) : 0;
+    bool Reg0IsDead = HasDef ? MI->getOperand(0).isDead() : false;
+    MachineFunction &MF = *MI->getParent()->getParent();
+    if (HasDef)
+      return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+        .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
+        .addReg(Reg2, getKillRegState(Reg2IsKill))
+        .addReg(Reg1, getKillRegState(Reg2IsKill));
+    else
+      return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+        .addReg(Reg2, getKillRegState(Reg2IsKill))
+        .addReg(Reg1, getKillRegState(Reg2IsKill));
+  }
+
+  if (ChangeReg0)
+    MI->getOperand(0).setReg(Reg2);
+  MI->getOperand(Idx2).setReg(Reg1);
+  MI->getOperand(Idx1).setReg(Reg2);
+  MI->getOperand(Idx2).setIsKill(Reg1IsKill);
+  MI->getOperand(Idx1).setIsKill(Reg2IsKill);
+  return MI;
+}
+
+/// findCommutedOpIndices - If specified MI is commutable, return the two
+/// operand indices that would swap value. Return true if the instruction
+/// is not in a form which this routine understands.
+bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
+                                                unsigned &SrcOpIdx1,
+                                                unsigned &SrcOpIdx2) const {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.isCommutable())
+    return false;
+  // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
+  // is not true, then the target must implement this.
+  SrcOpIdx1 = TID.getNumDefs();
+  SrcOpIdx2 = SrcOpIdx1 + 1;
+  if (!MI->getOperand(SrcOpIdx1).isReg() ||
+      !MI->getOperand(SrcOpIdx2).isReg())
+    // No idea.
+    return false;
+  return true;
+}
+
+
+bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
+                            const SmallVectorImpl<MachineOperand> &Pred) const {
+  bool MadeChange = false;
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.isPredicable())
+    return false;
+
+  for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    if (TID.OpInfo[i].isPredicate()) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg()) {
+        MO.setReg(Pred[j].getReg());
+        MadeChange = true;
+      } else if (MO.isImm()) {
+        MO.setImm(Pred[j].getImm());
+        MadeChange = true;
+      } else if (MO.isMBB()) {
+        MO.setMBB(Pred[j].getMBB());
+        MadeChange = true;
+      }
+      ++j;
+    }
+  }
+  return MadeChange;
+}
+
+void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator I,
+                                        unsigned DestReg,
+                                        unsigned SubIdx,
+                                        const MachineInstr *Orig,
+                                        const TargetRegisterInfo &TRI) const {
+  MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+  MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+  MBB.insert(I, MI);
+}
+
+bool
+TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
+                                      const MachineInstr *MI1,
+                                      const MachineRegisterInfo *MRI) const {
+  return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
+MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
+                                             MachineFunction &MF) const {
+  assert(!Orig->getDesc().isNotDuplicable() &&
+         "Instruction cannot be duplicated");
+  return MF.CloneMachineInstr(Orig);
+}
+
+// If the COPY instruction in MI can be folded to a stack operation, return
+// the register class to use.
+static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
+                                              unsigned FoldIdx) {
+  assert(MI->isCopy() && "MI must be a COPY instruction");
+  if (MI->getNumOperands() != 2)
+    return 0;
+  assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
+
+  const MachineOperand &FoldOp = MI->getOperand(FoldIdx);
+  const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx);
+
+  if (FoldOp.getSubReg() || LiveOp.getSubReg())
+    return 0;
+
+  unsigned FoldReg = FoldOp.getReg();
+  unsigned LiveReg = LiveOp.getReg();
+
+  assert(TargetRegisterInfo::isVirtualRegister(FoldReg) &&
+         "Cannot fold physregs");
+
+  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
+
+  if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
+    return RC->contains(LiveOp.getReg()) ? RC : 0;
+
+  const TargetRegisterClass *LiveRC = MRI.getRegClass(LiveReg);
+  if (RC == LiveRC || RC->hasSubClass(LiveRC))
+    return RC;
+
+  // FIXME: Allow folding when register classes are memory compatible.
+  return 0;
+}
+
+bool TargetInstrInfoImpl::
+canFoldMemoryOperand(const MachineInstr *MI,
+                     const SmallVectorImpl<unsigned> &Ops) const {
+  return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
+}
+
+/// foldMemoryOperand - Attempt to fold a load or store of the specified stack
+/// slot into the specified machine instruction for the specified operand(s).
+/// If this is possible, a new instruction is returned with the specified
+/// operand folded, otherwise NULL is returned. The client is responsible for
+/// removing the old instruction and adding the new one in the instruction
+/// stream.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+                                   const SmallVectorImpl<unsigned> &Ops,
+                                   int FI) const {
+  unsigned Flags = 0;
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    if (MI->getOperand(Ops[i]).isDef())
+      Flags |= MachineMemOperand::MOStore;
+    else
+      Flags |= MachineMemOperand::MOLoad;
+
+  MachineBasicBlock *MBB = MI->getParent();
+  assert(MBB && "foldMemoryOperand needs an inserted instruction");
+  MachineFunction &MF = *MBB->getParent();
+
+  // Ask the target to do the actual folding.
+  if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
+    // Add a memory operand, foldMemoryOperandImpl doesn't do that.
+    assert((!(Flags & MachineMemOperand::MOStore) ||
+            NewMI->getDesc().mayStore()) &&
+           "Folded a def to a non-store!");
+    assert((!(Flags & MachineMemOperand::MOLoad) ||
+            NewMI->getDesc().mayLoad()) &&
+           "Folded a use to a non-load!");
+    const MachineFrameInfo &MFI = *MF.getFrameInfo();
+    assert(MFI.getObjectOffset(FI) != -1);
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(
+                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              Flags, MFI.getObjectSize(FI),
+                              MFI.getObjectAlignment(FI));
+    NewMI->addMemOperand(MF, MMO);
+
+    // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI.
+    return MBB->insert(MI, NewMI);
+  }
+
+  // Straight COPY may fold as load/store.
+  if (!MI->isCopy() || Ops.size() != 1)
+    return 0;
+
+  const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]);
+  if (!RC)
+    return 0;
+
+  const MachineOperand &MO = MI->getOperand(1-Ops[0]);
+  MachineBasicBlock::iterator Pos = MI;
+  const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+  if (Flags == MachineMemOperand::MOStore)
+    storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
+  else
+    loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI);
+  return --Pos;
+}
+
+/// foldMemoryOperand - Same as the previous version except it allows folding
+/// of any load and store from / to any address, not just from a specific
+/// stack slot.
+MachineInstr*
+TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
+                                   const SmallVectorImpl<unsigned> &Ops,
+                                   MachineInstr* LoadMI) const {
+  assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!");
+#ifndef NDEBUG
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+    assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
+#endif
+  MachineBasicBlock &MBB = *MI->getParent();
+  MachineFunction &MF = *MBB.getParent();
+
+  // Ask the target to do the actual folding.
+  MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI);
+  if (!NewMI) return 0;
+
+  NewMI = MBB.insert(MI, NewMI);
+
+  // Copy the memoperands from the load to the folded instruction.
+  NewMI->setMemRefs(LoadMI->memoperands_begin(),
+                    LoadMI->memoperands_end());
+
+  return NewMI;
+}
+
+bool TargetInstrInfo::
+isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
+                                         AliasAnalysis *AA) const {
+  const MachineFunction &MF = *MI->getParent()->getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetMachine &TM = MF.getTarget();
+  const TargetInstrInfo &TII = *TM.getInstrInfo();
+  const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
+
+  // A load from a fixed stack slot can be rematerialized. This may be
+  // redundant with subsequent checks, but it's target-independent,
+  // simple, and a common case.
+  int FrameIdx = 0;
+  if (TII.isLoadFromStackSlot(MI, FrameIdx) &&
+      MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
+    return true;
+
+  const TargetInstrDesc &TID = MI->getDesc();
+
+  // Avoid instructions obviously unsafe for remat.
+  if (TID.isNotDuplicable() || TID.mayStore() ||
+      MI->hasUnmodeledSideEffects())
+    return false;
+
+  // Don't remat inline asm. We have no idea how expensive it is
+  // even if it's side effect free.
+  if (MI->isInlineAsm())
+    return false;
+
+  // Avoid instructions which load from potentially varying memory.
+  if (TID.mayLoad() && !MI->isInvariantLoad(AA))
+    return false;
+
+  // If any of the registers accessed are non-constant, conservatively assume
+  // the instruction is not rematerializable.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0)
+      continue;
+
+    // Check for a well-behaved physical register.
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      if (MO.isUse()) {
+        // If the physreg has no defs anywhere, it's just an ambient register
+        // and we can freely move its uses. Alternatively, if it's allocatable,
+        // it could get allocated to something with a def during allocation.
+        if (!MRI.def_empty(Reg))
+          return false;
+        BitVector AllocatableRegs = TRI.getAllocatableSet(MF, 0);
+        if (AllocatableRegs.test(Reg))
+          return false;
+        // Check for a def among the register's aliases too.
+        for (const unsigned *Alias = TRI.getAliasSet(Reg); *Alias; ++Alias) {
+          unsigned AliasReg = *Alias;
+          if (!MRI.def_empty(AliasReg))
+            return false;
+          if (AllocatableRegs.test(AliasReg))
+            return false;
+        }
+      } else {
+        // A physreg def. We can't remat it.
+        return false;
+      }
+      continue;
+    }
+
+    // Only allow one virtual-register def, and that in the first operand.
+    if (MO.isDef() != (i == 0))
+      return false;
+
+    // For the def, it should be the only def of that register.
+    if (MO.isDef() && (llvm::next(MRI.def_begin(Reg)) != MRI.def_end() ||
+                       MRI.isLiveIn(Reg)))
+      return false;
+
+    // Don't allow any virtual-register uses. Rematting an instruction with
+    // virtual register uses would length the live ranges of the uses, which
+    // is not necessarily a good idea, certainly not "trivial".
+    if (MO.isUse())
+      return false;
+  }
+
+  // Everything checked out.
+  return true;
+}
+
+/// isSchedulingBoundary - Test if the given instruction should be
+/// considered a scheduling boundary. This primarily includes labels
+/// and terminators.
+bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
+                                               const MachineBasicBlock *MBB,
+                                               const MachineFunction &MF) const{
+  // Terminators and labels can't be scheduled around.
+  if (MI->getDesc().isTerminator() || MI->isLabel())
+    return true;
+
+  // Don't attempt to schedule around any instruction that defines
+  // a stack-oriented pointer, as it's unlikely to be profitable. This
+  // saves compile time, because it doesn't require every single
+  // stack slot reference to depend on the instruction that does the
+  // modification.
+  const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
+  if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore()))
+    return true;
+
+  return false;
+}
+
+// Provide a global flag for disabling the PreRA hazard recognizer that targets
+// may choose to honor.
+bool TargetInstrInfoImpl::usePreRAHazardRecognizer() const {
+  return !DisableHazardRecognizer;
+}
+
+// Default implementation of CreateTargetRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfoImpl::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+                             const ScheduleDAG *DAG) const {
+  // Dummy hazard recognizer allows all instructions to issue.
+  return new ScheduleHazardRecognizer();
+}
+
+// Default implementation of CreateTargetPostRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfoImpl::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+                                   const ScheduleDAG *DAG) const {
+  return (ScheduleHazardRecognizer *)
+    new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
+}
diff --git a/final/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/final/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
new file mode 100644
index 00000000000..fa311dc5d66
--- /dev/null
+++ b/final/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -0,0 +1,1026 @@
+//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+using namespace llvm;
+using namespace dwarf;
+
+//===----------------------------------------------------------------------===//
+//                                  ELF
+//===----------------------------------------------------------------------===//
+
+void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
+                                             const TargetMachine &TM) {
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+
+  BSSSection =
+    getContext().getELFSection(".bss", ELF::SHT_NOBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+                               SectionKind::getBSS());
+
+  TextSection =
+    getContext().getELFSection(".text", ELF::SHT_PROGBITS,
+                               ELF::SHF_EXECINSTR |
+                               ELF::SHF_ALLOC,
+                               SectionKind::getText());
+
+  DataSection =
+    getContext().getELFSection(".data", ELF::SHT_PROGBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
+
+  ReadOnlySection =
+    getContext().getELFSection(".rodata", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC,
+                               SectionKind::getReadOnly());
+
+  TLSDataSection =
+    getContext().getELFSection(".tdata", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC | ELF::SHF_TLS |
+                               ELF::SHF_WRITE,
+                               SectionKind::getThreadData());
+
+  TLSBSSSection =
+    getContext().getELFSection(".tbss", ELF::SHT_NOBITS,
+                               ELF::SHF_ALLOC | ELF::SHF_TLS |
+                               ELF::SHF_WRITE,
+                               SectionKind::getThreadBSS());
+
+  DataRelSection =
+    getContext().getELFSection(".data.rel", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
+                               SectionKind::getDataRel());
+
+  DataRelLocalSection =
+    getContext().getELFSection(".data.rel.local", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
+                               SectionKind::getDataRelLocal());
+
+  DataRelROSection =
+    getContext().getELFSection(".data.rel.ro", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
+                               SectionKind::getReadOnlyWithRel());
+
+  DataRelROLocalSection =
+    getContext().getELFSection(".data.rel.ro.local", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
+                               SectionKind::getReadOnlyWithRelLocal());
+
+  MergeableConst4Section =
+    getContext().getELFSection(".rodata.cst4", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_MERGE,
+                               SectionKind::getMergeableConst4());
+
+  MergeableConst8Section =
+    getContext().getELFSection(".rodata.cst8", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_MERGE,
+                               SectionKind::getMergeableConst8());
+
+  MergeableConst16Section =
+    getContext().getELFSection(".rodata.cst16", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_MERGE,
+                               SectionKind::getMergeableConst16());
+
+  StaticCtorSection =
+    getContext().getELFSection(".ctors", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
+                               SectionKind::getDataRel());
+
+  StaticDtorSection =
+    getContext().getELFSection(".dtors", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC |ELF::SHF_WRITE,
+                               SectionKind::getDataRel());
+
+  // Exception Handling Sections.
+
+  // FIXME: We're emitting LSDA info into a readonly section on ELF, even though
+  // it contains relocatable pointers.  In PIC mode, this is probably a big
+  // runtime hit for C++ apps.  Either the contents of the LSDA need to be
+  // adjusted or this should be a data section.
+  LSDASection =
+    getContext().getELFSection(".gcc_except_table", ELF::SHT_PROGBITS,
+                               ELF::SHF_ALLOC,
+                               SectionKind::getReadOnly());
+  // Debug Info Sections.
+  DwarfAbbrevSection =
+    getContext().getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfInfoSection =
+    getContext().getELFSection(".debug_info", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfLineSection =
+    getContext().getELFSection(".debug_line", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfFrameSection =
+    getContext().getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    getContext().getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfPubTypesSection =
+    getContext().getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfStrSection =
+    getContext().getELFSection(".debug_str", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfLocSection =
+    getContext().getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfARangesSection =
+    getContext().getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfRangesSection =
+    getContext().getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+  DwarfMacroInfoSection =
+    getContext().getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0,
+                               SectionKind::getMetadata());
+}
+
+const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const {
+  return getContext().getELFSection(".eh_frame", ELF::SHT_PROGBITS,
+                                    ELF::SHF_ALLOC,
+                                    SectionKind::getDataRel());
+}
+
+static SectionKind
+getELFKindForNamedSection(StringRef Name, SectionKind K) {
+  // FIXME: Why is this here? Codegen is should not be in the business
+  // of figuring section flags. If the user wrote section(".eh_frame"),
+  // we should just pass that to MC which will defer to the assembly
+  // or use its default if producing an object file.
+  if (Name.empty() || Name[0] != '.') return K;
+
+  // Some lame default implementation based on some magic section names.
+  if (Name == ".bss" ||
+      Name.startswith(".bss.") ||
+      Name.startswith(".gnu.linkonce.b.") ||
+      Name.startswith(".llvm.linkonce.b.") ||
+      Name == ".sbss" ||
+      Name.startswith(".sbss.") ||
+      Name.startswith(".gnu.linkonce.sb.") ||
+      Name.startswith(".llvm.linkonce.sb."))
+    return SectionKind::getBSS();
+
+  if (Name == ".tdata" ||
+      Name.startswith(".tdata.") ||
+      Name.startswith(".gnu.linkonce.td.") ||
+      Name.startswith(".llvm.linkonce.td."))
+    return SectionKind::getThreadData();
+
+  if (Name == ".tbss" ||
+      Name.startswith(".tbss.") ||
+      Name.startswith(".gnu.linkonce.tb.") ||
+      Name.startswith(".llvm.linkonce.tb."))
+    return SectionKind::getThreadBSS();
+
+  if (Name == ".eh_frame")
+    return SectionKind::getDataRel();
+
+  return K;
+}
+
+
+static unsigned getELFSectionType(StringRef Name, SectionKind K) {
+
+  if (Name == ".init_array")
+    return ELF::SHT_INIT_ARRAY;
+
+  if (Name == ".fini_array")
+    return ELF::SHT_FINI_ARRAY;
+
+  if (Name == ".preinit_array")
+    return ELF::SHT_PREINIT_ARRAY;
+
+  if (K.isBSS() || K.isThreadBSS())
+    return ELF::SHT_NOBITS;
+
+  return ELF::SHT_PROGBITS;
+}
+
+
+static unsigned
+getELFSectionFlags(SectionKind K) {
+  unsigned Flags = 0;
+
+  if (!K.isMetadata())
+    Flags |= ELF::SHF_ALLOC;
+
+  if (K.isText())
+    Flags |= ELF::SHF_EXECINSTR;
+
+  if (K.isWriteable())
+    Flags |= ELF::SHF_WRITE;
+
+  if (K.isThreadLocal())
+    Flags |= ELF::SHF_TLS;
+
+  // K.isMergeableConst() is left out to honour PR4650
+  if (K.isMergeableCString() || K.isMergeableConst4() ||
+      K.isMergeableConst8() || K.isMergeableConst16())
+    Flags |= ELF::SHF_MERGE;
+
+  if (K.isMergeableCString())
+    Flags |= ELF::SHF_STRINGS;
+
+  return Flags;
+}
+
+
+const MCSection *TargetLoweringObjectFileELF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const {
+  StringRef SectionName = GV->getSection();
+
+  // Infer section flags from the section name if we can.
+  Kind = getELFKindForNamedSection(SectionName, Kind);
+
+  return getContext().getELFSection(SectionName,
+                                    getELFSectionType(SectionName, Kind),
+                                    getELFSectionFlags(Kind), Kind);
+}
+
+/// getSectionPrefixForGlobal - Return the section prefix name used by options
+/// FunctionsSections and DataSections.
+static const char *getSectionPrefixForGlobal(SectionKind Kind) {
+  if (Kind.isText())                 return ".text.";
+  if (Kind.isReadOnly())             return ".rodata.";
+
+  if (Kind.isThreadData())           return ".tdata.";
+  if (Kind.isThreadBSS())            return ".tbss.";
+
+  if (Kind.isDataNoRel())            return ".data.";
+  if (Kind.isDataRelLocal())         return ".data.rel.local.";
+  if (Kind.isDataRel())              return ".data.rel.";
+  if (Kind.isReadOnlyWithRelLocal()) return ".data.rel.ro.local.";
+
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return ".data.rel.ro.";
+}
+
+
+const MCSection *TargetLoweringObjectFileELF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+  // If we have -ffunction-section or -fdata-section then we should emit the
+  // global value to a uniqued section specifically for it.
+  bool EmitUniquedSection;
+  if (Kind.isText())
+    EmitUniquedSection = TM.getFunctionSections();
+  else
+    EmitUniquedSection = TM.getDataSections();
+
+  // If this global is linkonce/weak and the target handles this by emitting it
+  // into a 'uniqued' section name, create and return the section now.
+  if ((GV->isWeakForLinker() || EmitUniquedSection) &&
+      !Kind.isCommon() && !Kind.isBSS()) {
+    const char *Prefix;
+    Prefix = getSectionPrefixForGlobal(Kind);
+
+    SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    Name.append(Sym->getName().begin(), Sym->getName().end());
+    StringRef Group = "";
+    unsigned Flags = getELFSectionFlags(Kind);
+    if (GV->isWeakForLinker()) {
+      Group = Sym->getName();
+      Flags |= ELF::SHF_GROUP;
+    }
+
+    return getContext().getELFSection(Name.str(),
+                                      getELFSectionType(Name.str(), Kind),
+                                      Flags, Kind, 0, Group);
+  }
+
+  if (Kind.isText()) return TextSection;
+
+  if (Kind.isMergeable1ByteCString() ||
+      Kind.isMergeable2ByteCString() ||
+      Kind.isMergeable4ByteCString()) {
+
+    // We also need alignment here.
+    // FIXME: this is getting the alignment of the character, not the
+    // alignment of the global!
+    unsigned Align =
+      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV));
+
+    const char *SizeSpec = ".rodata.str1.";
+    if (Kind.isMergeable2ByteCString())
+      SizeSpec = ".rodata.str2.";
+    else if (Kind.isMergeable4ByteCString())
+      SizeSpec = ".rodata.str4.";
+    else
+      assert(Kind.isMergeable1ByteCString() && "unknown string width");
+
+
+    std::string Name = SizeSpec + utostr(Align);
+    return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+                                      ELF::SHF_ALLOC |
+                                      ELF::SHF_MERGE |
+                                      ELF::SHF_STRINGS,
+                                      Kind);
+  }
+
+  if (Kind.isMergeableConst()) {
+    if (Kind.isMergeableConst4() && MergeableConst4Section)
+      return MergeableConst4Section;
+    if (Kind.isMergeableConst8() && MergeableConst8Section)
+      return MergeableConst8Section;
+    if (Kind.isMergeableConst16() && MergeableConst16Section)
+      return MergeableConst16Section;
+    return ReadOnlySection;  // .const
+  }
+
+  if (Kind.isReadOnly())             return ReadOnlySection;
+
+  if (Kind.isThreadData())           return TLSDataSection;
+  if (Kind.isThreadBSS())            return TLSBSSSection;
+
+  // Note: we claim that common symbols are put in BSSSection, but they are
+  // really emitted with the magic .comm directive, which creates a symbol table
+  // entry but not a section.
+  if (Kind.isBSS() || Kind.isCommon()) return BSSSection;
+
+  if (Kind.isDataNoRel())            return DataSection;
+  if (Kind.isDataRelLocal())         return DataRelLocalSection;
+  if (Kind.isDataRel())              return DataRelSection;
+  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return DataRelROSection;
+}
+
+/// getSectionForConstant - Given a mergeable constant with the
+/// specified size and relocation information, return a section that it
+/// should be placed in.
+const MCSection *TargetLoweringObjectFileELF::
+getSectionForConstant(SectionKind Kind) const {
+  if (Kind.isMergeableConst4() && MergeableConst4Section)
+    return MergeableConst4Section;
+  if (Kind.isMergeableConst8() && MergeableConst8Section)
+    return MergeableConst8Section;
+  if (Kind.isMergeableConst16() && MergeableConst16Section)
+    return MergeableConst16Section;
+  if (Kind.isReadOnly())
+    return ReadOnlySection;
+
+  if (Kind.isReadOnlyWithRelLocal()) return DataRelROLocalSection;
+  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+  return DataRelROSection;
+}
+
+const MCExpr *TargetLoweringObjectFileELF::
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                               MachineModuleInfo *MMI,
+                               unsigned Encoding, MCStreamer &Streamer) const {
+
+  if (Encoding & dwarf::DW_EH_PE_indirect) {
+    MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    SmallString<128> Name;
+    Mang->getNameWithPrefix(Name, GV, true);
+    Name += ".DW.stub";
+
+    // Add information about the stub reference to ELFMMI so that the stub
+    // gets emitted by the asmprinter.
+    MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
+    if (StubSym.getPointer() == 0) {
+      MCSymbol *Sym = Mang->getSymbol(GV);
+      StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+    }
+
+    return TargetLoweringObjectFile::
+      getExprForDwarfReference(SSym, Mang, MMI,
+                               Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+  }
+
+  return TargetLoweringObjectFile::
+    getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+//===----------------------------------------------------------------------===//
+//                                 MachO
+//===----------------------------------------------------------------------===//
+
+void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
+                                               const TargetMachine &TM) {
+  // _foo.eh symbols are currently always exported so that the linker knows
+  // about them.  This is not necessary on 10.6 and later, but it
+  // doesn't hurt anything.
+  // FIXME: I need to get this from Triple.
+  IsFunctionEHSymbolGlobal = true;
+  IsFunctionEHFrameSymbolPrivate = false;
+  SupportsWeakOmittedEHFrame = false;
+
+  Triple T(((LLVMTargetMachine&)TM).getTargetTriple());
+  if (T.getOS() == Triple::Darwin) {
+    switch (T.getDarwinMajorNumber()) {
+    case 7:  // 10.3 Panther.
+    case 8:  // 10.4 Tiger.
+      CommDirectiveSupportsAlignment = false;
+      break;
+    case 9:   // 10.5 Leopard.
+    case 10:  // 10.6 SnowLeopard.
+      break;
+    }
+  }
+
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+
+  TextSection // .text
+    = getContext().getMachOSection("__TEXT", "__text",
+                                   MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                   SectionKind::getText());
+  DataSection // .data
+    = getContext().getMachOSection("__DATA", "__data", 0,
+                                   SectionKind::getDataRel());
+
+  TLSDataSection // .tdata
+    = getContext().getMachOSection("__DATA", "__thread_data",
+                                   MCSectionMachO::S_THREAD_LOCAL_REGULAR,
+                                   SectionKind::getDataRel());
+  TLSBSSSection // .tbss
+    = getContext().getMachOSection("__DATA", "__thread_bss",
+                                   MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
+                                   SectionKind::getThreadBSS());
+
+  // TODO: Verify datarel below.
+  TLSTLVSection // .tlv
+    = getContext().getMachOSection("__DATA", "__thread_vars",
+                                   MCSectionMachO::S_THREAD_LOCAL_VARIABLES,
+                                   SectionKind::getDataRel());
+
+  TLSThreadInitSection
+    = getContext().getMachOSection("__DATA", "__thread_init",
+                          MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
+                          SectionKind::getDataRel());
+
+  CStringSection // .cstring
+    = getContext().getMachOSection("__TEXT", "__cstring",
+                                   MCSectionMachO::S_CSTRING_LITERALS,
+                                   SectionKind::getMergeable1ByteCString());
+  UStringSection
+    = getContext().getMachOSection("__TEXT","__ustring", 0,
+                                   SectionKind::getMergeable2ByteCString());
+  FourByteConstantSection // .literal4
+    = getContext().getMachOSection("__TEXT", "__literal4",
+                                   MCSectionMachO::S_4BYTE_LITERALS,
+                                   SectionKind::getMergeableConst4());
+  EightByteConstantSection // .literal8
+    = getContext().getMachOSection("__TEXT", "__literal8",
+                                   MCSectionMachO::S_8BYTE_LITERALS,
+                                   SectionKind::getMergeableConst8());
+
+  // ld_classic doesn't support .literal16 in 32-bit mode, and ld64 falls back
+  // to using it in -static mode.
+  SixteenByteConstantSection = 0;
+  if (TM.getRelocationModel() != Reloc::Static &&
+      TM.getTargetData()->getPointerSize() == 32)
+    SixteenByteConstantSection =   // .literal16
+      getContext().getMachOSection("__TEXT", "__literal16",
+                                   MCSectionMachO::S_16BYTE_LITERALS,
+                                   SectionKind::getMergeableConst16());
+
+  ReadOnlySection  // .const
+    = getContext().getMachOSection("__TEXT", "__const", 0,
+                                   SectionKind::getReadOnly());
+
+  TextCoalSection
+    = getContext().getMachOSection("__TEXT", "__textcoal_nt",
+                                   MCSectionMachO::S_COALESCED |
+                                   MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                   SectionKind::getText());
+  ConstTextCoalSection
+    = getContext().getMachOSection("__TEXT", "__const_coal",
+                                   MCSectionMachO::S_COALESCED,
+                                   SectionKind::getReadOnly());
+  ConstDataSection  // .const_data
+    = getContext().getMachOSection("__DATA", "__const", 0,
+                                   SectionKind::getReadOnlyWithRel());
+  DataCoalSection
+    = getContext().getMachOSection("__DATA","__datacoal_nt",
+                                   MCSectionMachO::S_COALESCED,
+                                   SectionKind::getDataRel());
+  DataCommonSection
+    = getContext().getMachOSection("__DATA","__common",
+                                   MCSectionMachO::S_ZEROFILL,
+                                   SectionKind::getBSS());
+  DataBSSSection
+    = getContext().getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
+                                   SectionKind::getBSS());
+
+
+  LazySymbolPointerSection
+    = getContext().getMachOSection("__DATA", "__la_symbol_ptr",
+                                   MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
+                                   SectionKind::getMetadata());
+  NonLazySymbolPointerSection
+    = getContext().getMachOSection("__DATA", "__nl_symbol_ptr",
+                                   MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+                                   SectionKind::getMetadata());
+
+  if (TM.getRelocationModel() == Reloc::Static) {
+    StaticCtorSection
+      = getContext().getMachOSection("__TEXT", "__constructor", 0,
+                                     SectionKind::getDataRel());
+    StaticDtorSection
+      = getContext().getMachOSection("__TEXT", "__destructor", 0,
+                                     SectionKind::getDataRel());
+  } else {
+    StaticCtorSection
+      = getContext().getMachOSection("__DATA", "__mod_init_func",
+                                     MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
+                                     SectionKind::getDataRel());
+    StaticDtorSection
+      = getContext().getMachOSection("__DATA", "__mod_term_func",
+                                     MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
+                                     SectionKind::getDataRel());
+  }
+
+  // Exception Handling.
+  LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0,
+                                             SectionKind::getReadOnlyWithRel());
+  // Debug Information.
+  DwarfAbbrevSection =
+    getContext().getMachOSection("__DWARF", "__debug_abbrev",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfInfoSection =
+    getContext().getMachOSection("__DWARF", "__debug_info",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfLineSection =
+    getContext().getMachOSection("__DWARF", "__debug_line",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfFrameSection =
+    getContext().getMachOSection("__DWARF", "__debug_frame",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    getContext().getMachOSection("__DWARF", "__debug_pubnames",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfPubTypesSection =
+    getContext().getMachOSection("__DWARF", "__debug_pubtypes",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfStrSection =
+    getContext().getMachOSection("__DWARF", "__debug_str",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfLocSection =
+    getContext().getMachOSection("__DWARF", "__debug_loc",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfARangesSection =
+    getContext().getMachOSection("__DWARF", "__debug_aranges",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfRangesSection =
+    getContext().getMachOSection("__DWARF", "__debug_ranges",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfMacroInfoSection =
+    getContext().getMachOSection("__DWARF", "__debug_macinfo",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+  DwarfDebugInlineSection =
+    getContext().getMachOSection("__DWARF", "__debug_inlined",
+                                 MCSectionMachO::S_ATTR_DEBUG,
+                                 SectionKind::getMetadata());
+
+  TLSExtraDataSection = TLSTLVSection;
+}
+
+const MCSection *TargetLoweringObjectFileMachO::getEHFrameSection() const {
+  return getContext().getMachOSection("__TEXT", "__eh_frame",
+                                      MCSectionMachO::S_COALESCED |
+                                      MCSectionMachO::S_ATTR_NO_TOC |
+                                      MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
+                                      MCSectionMachO::S_ATTR_LIVE_SUPPORT,
+                                      SectionKind::getReadOnly());
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const {
+  // Parse the section specifier and create it if valid.
+  StringRef Segment, Section;
+  unsigned TAA = (unsigned)MCSectionMachO::SECTION_ATTRIBUTES, StubSize = 0;
+  std::string ErrorCode =
+    MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
+                                          TAA, StubSize);
+  if (!ErrorCode.empty()) {
+    // If invalid, report the error with report_fatal_error.
+    report_fatal_error("Global variable '" + GV->getNameStr() +
+                      "' has an invalid section specifier '" + GV->getSection()+
+                      "': " + ErrorCode + ".");
+    // Fall back to dropping it into the data section.
+    return DataSection;
+  }
+
+  bool TAAWasSet = (TAA != MCSectionMachO::SECTION_ATTRIBUTES);
+  if (!TAAWasSet)
+    TAA = 0;      // Sensible default if this is a new section.
+    
+  // Get the section.
+  const MCSectionMachO *S =
+    getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind);
+
+  // If TAA wasn't set by ParseSectionSpecifier() above,
+  // use the value returned by getMachOSection() as a default.
+  if (!TAAWasSet)
+    TAA = S->getTypeAndAttributes();
+
+  // Okay, now that we got the section, verify that the TAA & StubSize agree.
+  // If the user declared multiple globals with different section flags, we need
+  // to reject it here.
+  if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
+    // If invalid, report the error with report_fatal_error.
+    report_fatal_error("Global variable '" + GV->getNameStr() +
+                      "' section type or attributes does not match previous"
+                      " section specifier");
+  }
+
+  return S;
+}
+
+const MCSection *TargetLoweringObjectFileMachO::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+
+  // Handle thread local data.
+  if (Kind.isThreadBSS()) return TLSBSSSection;
+  if (Kind.isThreadData()) return TLSDataSection;
+
+  if (Kind.isText())
+    return GV->isWeakForLinker() ? TextCoalSection : TextSection;
+
+  // If this is weak/linkonce, put this in a coalescable section, either in text
+  // or data depending on if it is writable.
+  if (GV->isWeakForLinker()) {
+    if (Kind.isReadOnly())
+      return ConstTextCoalSection;
+    return DataCoalSection;
+  }
+
+  // FIXME: Alignment check should be handled by section classifier.
+  if (Kind.isMergeable1ByteCString() &&
+      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+    return CStringSection;
+
+  // Do not put 16-bit arrays in the UString section if they have an
+  // externally visible label, this runs into issues with certain linker
+  // versions.
+  if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
+      TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
+    return UStringSection;
+
+  if (Kind.isMergeableConst()) {
+    if (Kind.isMergeableConst4())
+      return FourByteConstantSection;
+    if (Kind.isMergeableConst8())
+      return EightByteConstantSection;
+    if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+      return SixteenByteConstantSection;
+  }
+
+  // Otherwise, if it is readonly, but not something we can specially optimize,
+  // just drop it in .const.
+  if (Kind.isReadOnly())
+    return ReadOnlySection;
+
+  // If this is marked const, put it into a const section.  But if the dynamic
+  // linker needs to write to it, put it in the data segment.
+  if (Kind.isReadOnlyWithRel())
+    return ConstDataSection;
+
+  // Put zero initialized globals with strong external linkage in the
+  // DATA, __common section with the .zerofill directive.
+  if (Kind.isBSSExtern())
+    return DataCommonSection;
+
+  // Put zero initialized globals with local linkage in __DATA,__bss directive
+  // with the .zerofill directive (aka .lcomm).
+  if (Kind.isBSSLocal())
+    return DataBSSSection;
+
+  // Otherwise, just drop the variable in the normal data section.
+  return DataSection;
+}
+
+const MCSection *
+TargetLoweringObjectFileMachO::getSectionForConstant(SectionKind Kind) const {
+  // If this constant requires a relocation, we have to put it in the data
+  // segment, not in the text segment.
+  if (Kind.isDataRel() || Kind.isReadOnlyWithRel())
+    return ConstDataSection;
+
+  if (Kind.isMergeableConst4())
+    return FourByteConstantSection;
+  if (Kind.isMergeableConst8())
+    return EightByteConstantSection;
+  if (Kind.isMergeableConst16() && SixteenByteConstantSection)
+    return SixteenByteConstantSection;
+  return ReadOnlySection;  // .const
+}
+
+/// shouldEmitUsedDirectiveFor - This hook allows targets to selectively decide
+/// not to emit the UsedDirective for some symbols in llvm.used.
+// FIXME: REMOVE this (rdar://7071300)
+bool TargetLoweringObjectFileMachO::
+shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const {
+  /// On Darwin, internally linked data beginning with "L" or "l" does not have
+  /// the directive emitted (this occurs in ObjC metadata).
+  if (!GV) return false;
+
+  // Check whether the mangled name has the "Private" or "LinkerPrivate" prefix.
+  if (GV->hasLocalLinkage() && !isa<Function>(GV)) {
+    // FIXME: ObjC metadata is currently emitted as internal symbols that have
+    // \1L and \0l prefixes on them.  Fix them to be Private/LinkerPrivate and
+    // this horrible hack can go away.
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l')
+      return false;
+  }
+
+  return true;
+}
+
+const MCExpr *TargetLoweringObjectFileMachO::
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                               MachineModuleInfo *MMI, unsigned Encoding,
+                               MCStreamer &Streamer) const {
+  // The mach-o version of this method defaults to returning a stub reference.
+
+  if (Encoding & DW_EH_PE_indirect) {
+    MachineModuleInfoMachO &MachOMMI =
+      MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+    SmallString<128> Name;
+    Mang->getNameWithPrefix(Name, GV, true);
+    Name += "$non_lazy_ptr";
+
+    // Add information about the stub reference to MachOMMI so that the stub
+    // gets emitted by the asmprinter.
+    MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
+    if (StubSym.getPointer() == 0) {
+      MCSymbol *Sym = Mang->getSymbol(GV);
+      StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
+    }
+
+    return TargetLoweringObjectFile::
+      getExprForDwarfReference(SSym, Mang, MMI,
+                               Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
+  }
+
+  return TargetLoweringObjectFile::
+    getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const {
+  return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+unsigned TargetLoweringObjectFileMachO::getLSDAEncoding() const {
+  return DW_EH_PE_pcrel;
+}
+
+unsigned TargetLoweringObjectFileMachO::getFDEEncoding() const {
+  return DW_EH_PE_pcrel;
+}
+
+unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const {
+  return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+//===----------------------------------------------------------------------===//
+//                                  COFF
+//===----------------------------------------------------------------------===//
+
+void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
+                                              const TargetMachine &TM) {
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+  TextSection =
+    getContext().getCOFFSection(".text",
+                                COFF::IMAGE_SCN_CNT_CODE |
+                                COFF::IMAGE_SCN_MEM_EXECUTE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getText());
+  DataSection =
+    getContext().getCOFFSection(".data",
+                                COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                COFF::IMAGE_SCN_MEM_READ |
+                                COFF::IMAGE_SCN_MEM_WRITE,
+                                SectionKind::getDataRel());
+  ReadOnlySection =
+    getContext().getCOFFSection(".rdata",
+                                COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getReadOnly());
+  StaticCtorSection =
+    getContext().getCOFFSection(".ctors",
+                                COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                COFF::IMAGE_SCN_MEM_READ |
+                                COFF::IMAGE_SCN_MEM_WRITE,
+                                SectionKind::getDataRel());
+  StaticDtorSection =
+    getContext().getCOFFSection(".dtors",
+                                COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                COFF::IMAGE_SCN_MEM_READ |
+                                COFF::IMAGE_SCN_MEM_WRITE,
+                                SectionKind::getDataRel());
+
+  // FIXME: We're emitting LSDA info into a readonly section on COFF, even
+  // though it contains relocatable pointers.  In PIC mode, this is probably a
+  // big runtime hit for C++ apps.  Either the contents of the LSDA need to be
+  // adjusted or this should be a data section.
+  LSDASection =
+    getContext().getCOFFSection(".gcc_except_table",
+                                COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getReadOnly());
+  // Debug info.
+  DwarfAbbrevSection =
+    getContext().getCOFFSection(".debug_abbrev",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfInfoSection =
+    getContext().getCOFFSection(".debug_info",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfLineSection =
+    getContext().getCOFFSection(".debug_line",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfFrameSection =
+    getContext().getCOFFSection(".debug_frame",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfPubNamesSection =
+    getContext().getCOFFSection(".debug_pubnames",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfPubTypesSection =
+    getContext().getCOFFSection(".debug_pubtypes",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfStrSection =
+    getContext().getCOFFSection(".debug_str",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfLocSection =
+    getContext().getCOFFSection(".debug_loc",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfARangesSection =
+    getContext().getCOFFSection(".debug_aranges",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfRangesSection =
+    getContext().getCOFFSection(".debug_ranges",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+  DwarfMacroInfoSection =
+    getContext().getCOFFSection(".debug_macinfo",
+                                COFF::IMAGE_SCN_MEM_DISCARDABLE |
+                                COFF::IMAGE_SCN_MEM_READ,
+                                SectionKind::getMetadata());
+
+  DrectveSection =
+    getContext().getCOFFSection(".drectve",
+                                COFF::IMAGE_SCN_LNK_INFO,
+                                SectionKind::getMetadata());
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::getEHFrameSection() const {
+  return getContext().getCOFFSection(".eh_frame",
+                                     COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                     COFF::IMAGE_SCN_MEM_READ |
+                                     COFF::IMAGE_SCN_MEM_WRITE,
+                                     SectionKind::getDataRel());
+}
+
+
+static unsigned
+getCOFFSectionFlags(SectionKind K) {
+  unsigned Flags = 0;
+
+  if (K.isMetadata())
+    Flags |=
+      COFF::IMAGE_SCN_MEM_DISCARDABLE;
+  else if (K.isText())
+    Flags |=
+      COFF::IMAGE_SCN_MEM_EXECUTE |
+      COFF::IMAGE_SCN_MEM_READ |
+      COFF::IMAGE_SCN_CNT_CODE;
+  else if (K.isBSS ())
+    Flags |=
+      COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+      COFF::IMAGE_SCN_MEM_READ |
+      COFF::IMAGE_SCN_MEM_WRITE;
+  else if (K.isReadOnly())
+    Flags |=
+      COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+      COFF::IMAGE_SCN_MEM_READ;
+  else if (K.isWriteable())
+    Flags |=
+      COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+      COFF::IMAGE_SCN_MEM_READ |
+      COFF::IMAGE_SCN_MEM_WRITE;
+
+  return Flags;
+}
+
+const MCSection *TargetLoweringObjectFileCOFF::
+getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
+                         Mangler *Mang, const TargetMachine &TM) const {
+  return getContext().getCOFFSection(GV->getSection(),
+                                     getCOFFSectionFlags(Kind),
+                                     Kind);
+}
+
+static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
+  if (Kind.isText())
+    return ".text$";
+  if (Kind.isBSS ())
+    return ".bss$";
+  if (Kind.isWriteable())
+    return ".data$";
+  return ".rdata$";
+}
+
+
+const MCSection *TargetLoweringObjectFileCOFF::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+  assert(!Kind.isThreadLocal() && "Doesn't support TLS");
+
+  // If this global is linkonce/weak and the target handles this by emitting it
+  // into a 'uniqued' section name, create and return the section now.
+  if (GV->isWeakForLinker()) {
+    const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
+    SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
+    MCSymbol *Sym = Mang->getSymbol(GV);
+    Name.append(Sym->getName().begin() + 1, Sym->getName().end());
+
+    unsigned Characteristics = getCOFFSectionFlags(Kind);
+
+    Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
+
+    return getContext().getCOFFSection(Name.str(), Characteristics,
+                          COFF::IMAGE_COMDAT_SELECT_ANY, Kind);
+  }
+
+  if (Kind.isText())
+    return getTextSection();
+
+  return getDataSection();
+}
+
diff --git a/final/lib/CodeGen/TwoAddressInstructionPass.cpp b/final/lib/CodeGen/TwoAddressInstructionPass.cpp
new file mode 100644
index 00000000000..52ea87231cc
--- /dev/null
+++ b/final/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -0,0 +1,1527 @@
+//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TwoAddress instruction pass which is used
+// by most register allocators. Two-Address instructions are rewritten
+// from:
+//
+//     A = B op C
+//
+// to:
+//
+//     A = B
+//     A op= C
+//
+// Note that if a register allocator chooses to use this pass, that it
+// has to be capable of handling the non-SSA nature of these rewritten
+// virtual registers.
+//
+// It is also worth noting that the duplicate operand of the two
+// address instruction is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "twoaddrinstr"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
+STATISTIC(NumCommuted        , "Number of instructions commuted to coalesce");
+STATISTIC(NumAggrCommuted    , "Number of instructions aggressively commuted");
+STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
+STATISTIC(Num3AddrSunk,        "Number of 3-address instructions sunk");
+STATISTIC(NumReMats,           "Number of instructions re-materialized");
+STATISTIC(NumDeletes,          "Number of dead instructions deleted");
+
+namespace {
+  class TwoAddressInstructionPass : public MachineFunctionPass {
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineRegisterInfo *MRI;
+    LiveVariables *LV;
+    AliasAnalysis *AA;
+
+    // DistanceMap - Keep track the distance of a MI from the start of the
+    // current basic block.
+    DenseMap<MachineInstr*, unsigned> DistanceMap;
+
+    // SrcRegMap - A map from virtual registers to physical registers which
+    // are likely targets to be coalesced to due to copies from physical
+    // registers to virtual registers. e.g. v1024 = move r0.
+    DenseMap<unsigned, unsigned> SrcRegMap;
+
+    // DstRegMap - A map from virtual registers to physical registers which
+    // are likely targets to be coalesced to due to copies to physical
+    // registers from virtual registers. e.g. r1 = move v1024.
+    DenseMap<unsigned, unsigned> DstRegMap;
+
+    /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen
+    /// during the initial walk of the machine function.
+    SmallVector<MachineInstr*, 16> RegSequences;
+
+    bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI,
+                              unsigned Reg,
+                              MachineBasicBlock::iterator OldPos);
+
+    bool isProfitableToReMat(unsigned Reg, const TargetRegisterClass *RC,
+                             MachineInstr *MI, MachineInstr *DefMI,
+                             MachineBasicBlock *MBB, unsigned Loc);
+
+    bool NoUseAfterLastDef(unsigned Reg, MachineBasicBlock *MBB, unsigned Dist,
+                           unsigned &LastDef);
+
+    MachineInstr *FindLastUseInMBB(unsigned Reg, MachineBasicBlock *MBB,
+                                   unsigned Dist);
+
+    bool isProfitableToCommute(unsigned regB, unsigned regC,
+                               MachineInstr *MI, MachineBasicBlock *MBB,
+                               unsigned Dist);
+
+    bool CommuteInstruction(MachineBasicBlock::iterator &mi,
+                            MachineFunction::iterator &mbbi,
+                            unsigned RegB, unsigned RegC, unsigned Dist);
+
+    bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
+
+    bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
+                            MachineBasicBlock::iterator &nmi,
+                            MachineFunction::iterator &mbbi,
+                            unsigned RegA, unsigned RegB, unsigned Dist);
+
+    typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill;
+    bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
+                               SmallVector<NewKill, 4> &NewKills,
+                               MachineBasicBlock *MBB, unsigned Dist);
+    bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
+                           MachineBasicBlock::iterator &nmi,
+                           MachineFunction::iterator &mbbi, unsigned Dist);
+
+    bool TryInstructionTransform(MachineBasicBlock::iterator &mi,
+                                 MachineBasicBlock::iterator &nmi,
+                                 MachineFunction::iterator &mbbi,
+                                 unsigned SrcIdx, unsigned DstIdx,
+                                 unsigned Dist,
+                                 SmallPtrSet<MachineInstr*, 8> &Processed);
+
+    void ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
+                  SmallPtrSet<MachineInstr*, 8> &Processed);
+
+    void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
+                     SmallPtrSet<MachineInstr*, 8> &Processed);
+
+    void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg);
+
+    /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
+    /// of the de-ssa process. This replaces sources of REG_SEQUENCE as
+    /// sub-register references of the register defined by REG_SEQUENCE.
+    bool EliminateRegSequences();
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    TwoAddressInstructionPass() : MachineFunctionPass(ID) {
+      initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<AliasAnalysis>();
+      AU.addPreserved<LiveVariables>();
+      AU.addPreservedID(MachineLoopInfoID);
+      AU.addPreservedID(MachineDominatorsID);
+      AU.addPreservedID(PHIEliminationID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    /// runOnMachineFunction - Pass entry point.
+    bool runOnMachineFunction(MachineFunction&);
+  };
+}
+
+char TwoAddressInstructionPass::ID = 0;
+INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
+                "Two-Address instruction pass", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
+                "Two-Address instruction pass", false, false)
+
+char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
+
+/// Sink3AddrInstruction - A two-address instruction has been converted to a
+/// three-address instruction to avoid clobbering a register. Try to sink it
+/// past the instruction that would kill the above mentioned register to reduce
+/// register pressure.
+bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
+                                           MachineInstr *MI, unsigned SavedReg,
+                                           MachineBasicBlock::iterator OldPos) {
+  // Check if it's safe to move this instruction.
+  bool SeenStore = true; // Be conservative.
+  if (!MI->isSafeToMove(TII, AA, SeenStore))
+    return false;
+
+  unsigned DefReg = 0;
+  SmallSet<unsigned, 4> UseRegs;
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned MOReg = MO.getReg();
+    if (!MOReg)
+      continue;
+    if (MO.isUse() && MOReg != SavedReg)
+      UseRegs.insert(MO.getReg());
+    if (!MO.isDef())
+      continue;
+    if (MO.isImplicit())
+      // Don't try to move it if it implicitly defines a register.
+      return false;
+    if (DefReg)
+      // For now, don't move any instructions that define multiple registers.
+      return false;
+    DefReg = MO.getReg();
+  }
+
+  // Find the instruction that kills SavedReg.
+  MachineInstr *KillMI = NULL;
+  for (MachineRegisterInfo::use_nodbg_iterator
+         UI = MRI->use_nodbg_begin(SavedReg),
+         UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+    MachineOperand &UseMO = UI.getOperand();
+    if (!UseMO.isKill())
+      continue;
+    KillMI = UseMO.getParent();
+    break;
+  }
+
+  if (!KillMI || KillMI->getParent() != MBB || KillMI == MI)
+    return false;
+
+  // If any of the definitions are used by another instruction between the
+  // position and the kill use, then it's not safe to sink it.
+  // 
+  // FIXME: This can be sped up if there is an easy way to query whether an
+  // instruction is before or after another instruction. Then we can use
+  // MachineRegisterInfo def / use instead.
+  MachineOperand *KillMO = NULL;
+  MachineBasicBlock::iterator KillPos = KillMI;
+  ++KillPos;
+
+  unsigned NumVisited = 0;
+  for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) {
+    MachineInstr *OtherMI = I;
+    // DBG_VALUE cannot be counted against the limit.
+    if (OtherMI->isDebugValue())
+      continue;
+    if (NumVisited > 30)  // FIXME: Arbitrary limit to reduce compile time cost.
+      return false;
+    ++NumVisited;
+    for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = OtherMI->getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned MOReg = MO.getReg();
+      if (!MOReg)
+        continue;
+      if (DefReg == MOReg)
+        return false;
+
+      if (MO.isKill()) {
+        if (OtherMI == KillMI && MOReg == SavedReg)
+          // Save the operand that kills the register. We want to unset the kill
+          // marker if we can sink MI past it.
+          KillMO = &MO;
+        else if (UseRegs.count(MOReg))
+          // One of the uses is killed before the destination.
+          return false;
+      }
+    }
+  }
+
+  // Update kill and LV information.
+  KillMO->setIsKill(false);
+  KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
+  KillMO->setIsKill(true);
+  
+  if (LV)
+    LV->replaceKillInstruction(SavedReg, KillMI, MI);
+
+  // Move instruction to its destination.
+  MBB->remove(MI);
+  MBB->insert(KillPos, MI);
+
+  ++Num3AddrSunk;
+  return true;
+}
+
+/// isTwoAddrUse - Return true if the specified MI is using the specified
+/// register as a two-address operand.
+static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) {
+  const TargetInstrDesc &TID = UseMI->getDesc();
+  for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = UseMI->getOperand(i);
+    if (MO.isReg() && MO.getReg() == Reg &&
+        (MO.isDef() || UseMI->isRegTiedToDefOperand(i)))
+      // Earlier use is a two-address one.
+      return true;
+  }
+  return false;
+}
+
+/// isProfitableToReMat - Return true if the heuristics determines it is likely
+/// to be profitable to re-materialize the definition of Reg rather than copy
+/// the register.
+bool
+TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
+                                         const TargetRegisterClass *RC,
+                                         MachineInstr *MI, MachineInstr *DefMI,
+                                         MachineBasicBlock *MBB, unsigned Loc) {
+  bool OtherUse = false;
+  for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg),
+         UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+    MachineOperand &UseMO = UI.getOperand();
+    MachineInstr *UseMI = UseMO.getParent();
+    MachineBasicBlock *UseMBB = UseMI->getParent();
+    if (UseMBB == MBB) {
+      DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+      if (DI != DistanceMap.end() && DI->second == Loc)
+        continue;  // Current use.
+      OtherUse = true;
+      // There is at least one other use in the MBB that will clobber the
+      // register. 
+      if (isTwoAddrUse(UseMI, Reg))
+        return true;
+    }
+  }
+
+  // If other uses in MBB are not two-address uses, then don't remat.
+  if (OtherUse)
+    return false;
+
+  // No other uses in the same block, remat if it's defined in the same
+  // block so it does not unnecessarily extend the live range.
+  return MBB == DefMI->getParent();
+}
+
+/// NoUseAfterLastDef - Return true if there are no intervening uses between the
+/// last instruction in the MBB that defines the specified register and the
+/// two-address instruction which is being processed. It also returns the last
+/// def location by reference
+bool TwoAddressInstructionPass::NoUseAfterLastDef(unsigned Reg,
+                                           MachineBasicBlock *MBB, unsigned Dist,
+                                           unsigned &LastDef) {
+  LastDef = 0;
+  unsigned LastUse = Dist;
+  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
+         E = MRI->reg_end(); I != E; ++I) {
+    MachineOperand &MO = I.getOperand();
+    MachineInstr *MI = MO.getParent();
+    if (MI->getParent() != MBB || MI->isDebugValue())
+      continue;
+    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+    if (DI == DistanceMap.end())
+      continue;
+    if (MO.isUse() && DI->second < LastUse)
+      LastUse = DI->second;
+    if (MO.isDef() && DI->second > LastDef)
+      LastDef = DI->second;
+  }
+
+  return !(LastUse > LastDef && LastUse < Dist);
+}
+
+MachineInstr *TwoAddressInstructionPass::FindLastUseInMBB(unsigned Reg,
+                                                         MachineBasicBlock *MBB,
+                                                         unsigned Dist) {
+  unsigned LastUseDist = 0;
+  MachineInstr *LastUse = 0;
+  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Reg),
+         E = MRI->reg_end(); I != E; ++I) {
+    MachineOperand &MO = I.getOperand();
+    MachineInstr *MI = MO.getParent();
+    if (MI->getParent() != MBB || MI->isDebugValue())
+      continue;
+    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI);
+    if (DI == DistanceMap.end())
+      continue;
+    if (DI->second >= Dist)
+      continue;
+
+    if (MO.isUse() && DI->second > LastUseDist) {
+      LastUse = DI->first;
+      LastUseDist = DI->second;
+    }
+  }
+  return LastUse;
+}
+
+/// isCopyToReg - Return true if the specified MI is a copy instruction or
+/// a extract_subreg instruction. It also returns the source and destination
+/// registers and whether they are physical registers by reference.
+static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
+                        unsigned &SrcReg, unsigned &DstReg,
+                        bool &IsSrcPhys, bool &IsDstPhys) {
+  SrcReg = 0;
+  DstReg = 0;
+  if (MI.isCopy()) {
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
+  } else if (MI.isInsertSubreg() || MI.isSubregToReg()) {
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(2).getReg();
+  } else
+    return false;
+
+  IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+  IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+  return true;
+}
+
+/// isKilled - Test if the given register value, which is used by the given
+/// instruction, is killed by the given instruction. This looks through
+/// coalescable copies to see if the original value is potentially not killed.
+///
+/// For example, in this code:
+///
+///   %reg1034 = copy %reg1024
+///   %reg1035 = copy %reg1025<kill>
+///   %reg1036 = add %reg1034<kill>, %reg1035<kill>
+///
+/// %reg1034 is not considered to be killed, since it is copied from a
+/// register which is not killed. Treating it as not killed lets the
+/// normal heuristics commute the (two-address) add, which lets
+/// coalescing eliminate the extra copy.
+///
+static bool isKilled(MachineInstr &MI, unsigned Reg,
+                     const MachineRegisterInfo *MRI,
+                     const TargetInstrInfo *TII) {
+  MachineInstr *DefMI = &MI;
+  for (;;) {
+    if (!DefMI->killsRegister(Reg))
+      return false;
+    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+      return true;
+    MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
+    // If there are multiple defs, we can't do a simple analysis, so just
+    // go with what the kill flag says.
+    if (llvm::next(Begin) != MRI->def_end())
+      return true;
+    DefMI = &*Begin;
+    bool IsSrcPhys, IsDstPhys;
+    unsigned SrcReg,  DstReg;
+    // If the def is something other than a copy, then it isn't going to
+    // be coalesced, so follow the kill flag.
+    if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+      return true;
+    Reg = SrcReg;
+  }
+}
+
+/// isTwoAddrUse - Return true if the specified MI uses the specified register
+/// as a two-address use. If so, return the destination register by reference.
+static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
+  const TargetInstrDesc &TID = MI.getDesc();
+  unsigned NumOps = MI.isInlineAsm() ? MI.getNumOperands():TID.getNumOperands();
+  for (unsigned i = 0; i != NumOps; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
+      continue;
+    unsigned ti;
+    if (MI.isRegTiedToDefOperand(i, &ti)) {
+      DstReg = MI.getOperand(ti).getReg();
+      return true;
+    }
+  }
+  return false;
+}
+
+/// findOnlyInterestingUse - Given a register, if has a single in-basic block
+/// use, return the use instruction if it's a copy or a two-address use.
+static
+MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
+                                     MachineRegisterInfo *MRI,
+                                     const TargetInstrInfo *TII,
+                                     bool &IsCopy,
+                                     unsigned &DstReg, bool &IsDstPhys) {
+  if (!MRI->hasOneNonDBGUse(Reg))
+    // None or more than one use.
+    return 0;
+  MachineInstr &UseMI = *MRI->use_nodbg_begin(Reg);
+  if (UseMI.getParent() != MBB)
+    return 0;
+  unsigned SrcReg;
+  bool IsSrcPhys;
+  if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) {
+    IsCopy = true;
+    return &UseMI;
+  }
+  IsDstPhys = false;
+  if (isTwoAddrUse(UseMI, Reg, DstReg)) {
+    IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+    return &UseMI;
+  }
+  return 0;
+}
+
+/// getMappedReg - Return the physical register the specified virtual register
+/// might be mapped to.
+static unsigned
+getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
+  while (TargetRegisterInfo::isVirtualRegister(Reg))  {
+    DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg);
+    if (SI == RegMap.end())
+      return 0;
+    Reg = SI->second;
+  }
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return Reg;
+  return 0;
+}
+
+/// regsAreCompatible - Return true if the two registers are equal or aliased.
+///
+static bool
+regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) {
+  if (RegA == RegB)
+    return true;
+  if (!RegA || !RegB)
+    return false;
+  return TRI->regsOverlap(RegA, RegB);
+}
+
+
+/// isProfitableToReMat - Return true if it's potentially profitable to commute
+/// the two-address instruction that's being processed.
+bool
+TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
+                                       MachineInstr *MI, MachineBasicBlock *MBB,
+                                       unsigned Dist) {
+  // Determine if it's profitable to commute this two address instruction. In
+  // general, we want no uses between this instruction and the definition of
+  // the two-address register.
+  // e.g.
+  // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+  // %reg1029<def> = MOV8rr %reg1028
+  // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+  // insert => %reg1030<def> = MOV8rr %reg1028
+  // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+  // In this case, it might not be possible to coalesce the second MOV8rr
+  // instruction if the first one is coalesced. So it would be profitable to
+  // commute it:
+  // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1
+  // %reg1029<def> = MOV8rr %reg1028
+  // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead>
+  // insert => %reg1030<def> = MOV8rr %reg1029
+  // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead>  
+
+  if (!MI->killsRegister(regC))
+    return false;
+
+  // Ok, we have something like:
+  // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead>
+  // let's see if it's worth commuting it.
+
+  // Look for situations like this:
+  // %reg1024<def> = MOV r1
+  // %reg1025<def> = MOV r0
+  // %reg1026<def> = ADD %reg1024, %reg1025
+  // r0            = MOV %reg1026
+  // Commute the ADD to hopefully eliminate an otherwise unavoidable copy.
+  unsigned FromRegB = getMappedReg(regB, SrcRegMap);
+  unsigned FromRegC = getMappedReg(regC, SrcRegMap);
+  unsigned ToRegB = getMappedReg(regB, DstRegMap);
+  unsigned ToRegC = getMappedReg(regC, DstRegMap);
+  if ((FromRegB && ToRegB && !regsAreCompatible(FromRegB, ToRegB, TRI)) &&
+      ((!FromRegC && !ToRegC) ||
+       regsAreCompatible(FromRegB, ToRegC, TRI) ||
+       regsAreCompatible(FromRegC, ToRegB, TRI)))
+    return true;
+
+  // If there is a use of regC between its last def (could be livein) and this
+  // instruction, then bail.
+  unsigned LastDefC = 0;
+  if (!NoUseAfterLastDef(regC, MBB, Dist, LastDefC))
+    return false;
+
+  // If there is a use of regB between its last def (could be livein) and this
+  // instruction, then go ahead and make this transformation.
+  unsigned LastDefB = 0;
+  if (!NoUseAfterLastDef(regB, MBB, Dist, LastDefB))
+    return true;
+
+  // Since there are no intervening uses for both registers, then commute
+  // if the def of regC is closer. Its live interval is shorter.
+  return LastDefB && LastDefC && LastDefC > LastDefB;
+}
+
+/// CommuteInstruction - Commute a two-address instruction and update the basic
+/// block, distance map, and live variables if needed. Return true if it is
+/// successful.
+bool
+TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi,
+                               MachineFunction::iterator &mbbi,
+                               unsigned RegB, unsigned RegC, unsigned Dist) {
+  MachineInstr *MI = mi;
+  DEBUG(dbgs() << "2addr: COMMUTING  : " << *MI);
+  MachineInstr *NewMI = TII->commuteInstruction(MI);
+
+  if (NewMI == 0) {
+    DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n");
+    return false;
+  }
+
+  DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI);
+  // If the instruction changed to commute it, update livevar.
+  if (NewMI != MI) {
+    if (LV)
+      // Update live variables
+      LV->replaceKillInstruction(RegC, MI, NewMI);
+
+    mbbi->insert(mi, NewMI);           // Insert the new inst
+    mbbi->erase(mi);                   // Nuke the old inst.
+    mi = NewMI;
+    DistanceMap.insert(std::make_pair(NewMI, Dist));
+  }
+
+  // Update source register map.
+  unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
+  if (FromRegC) {
+    unsigned RegA = MI->getOperand(0).getReg();
+    SrcRegMap[RegA] = FromRegC;
+  }
+
+  return true;
+}
+
+/// isProfitableToConv3Addr - Return true if it is profitable to convert the
+/// given 2-address instruction to a 3-address one.
+bool
+TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){
+  // Look for situations like this:
+  // %reg1024<def> = MOV r1
+  // %reg1025<def> = MOV r0
+  // %reg1026<def> = ADD %reg1024, %reg1025
+  // r2            = MOV %reg1026
+  // Turn ADD into a 3-address instruction to avoid a copy.
+  unsigned FromRegB = getMappedReg(RegB, SrcRegMap);
+  if (!FromRegB)
+    return false;
+  unsigned ToRegA = getMappedReg(RegA, DstRegMap);
+  return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI));
+}
+
+/// ConvertInstTo3Addr - Convert the specified two-address instruction into a
+/// three address one. Return true if this transformation was successful.
+bool
+TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
+                                              MachineBasicBlock::iterator &nmi,
+                                              MachineFunction::iterator &mbbi,
+                                              unsigned RegA, unsigned RegB,
+                                              unsigned Dist) {
+  MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
+  if (NewMI) {
+    DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
+    DEBUG(dbgs() << "2addr:         TO 3-ADDR: " << *NewMI);
+    bool Sunk = false;
+
+    if (NewMI->findRegisterUseOperand(RegB, false, TRI))
+      // FIXME: Temporary workaround. If the new instruction doesn't
+      // uses RegB, convertToThreeAddress must have created more
+      // then one instruction.
+      Sunk = Sink3AddrInstruction(mbbi, NewMI, RegB, mi);
+
+    mbbi->erase(mi); // Nuke the old inst.
+
+    if (!Sunk) {
+      DistanceMap.insert(std::make_pair(NewMI, Dist));
+      mi = NewMI;
+      nmi = llvm::next(mi);
+    }
+
+    // Update source and destination register maps.
+    SrcRegMap.erase(RegA);
+    DstRegMap.erase(RegB);
+    return true;
+  }
+
+  return false;
+}
+
+/// ScanUses - Scan forward recursively for only uses, update maps if the use
+/// is a copy or a two-address instruction.
+void
+TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB,
+                                    SmallPtrSet<MachineInstr*, 8> &Processed) {
+  SmallVector<unsigned, 4> VirtRegPairs;
+  bool IsDstPhys;
+  bool IsCopy = false;
+  unsigned NewReg = 0;
+  unsigned Reg = DstReg;
+  while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy,
+                                                      NewReg, IsDstPhys)) {
+    if (IsCopy && !Processed.insert(UseMI))
+      break;
+
+    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI);
+    if (DI != DistanceMap.end())
+      // Earlier in the same MBB.Reached via a back edge.
+      break;
+
+    if (IsDstPhys) {
+      VirtRegPairs.push_back(NewReg);
+      break;
+    }
+    bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second;
+    if (!isNew)
+      assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!");
+    VirtRegPairs.push_back(NewReg);
+    Reg = NewReg;
+  }
+
+  if (!VirtRegPairs.empty()) {
+    unsigned ToReg = VirtRegPairs.back();
+    VirtRegPairs.pop_back();
+    while (!VirtRegPairs.empty()) {
+      unsigned FromReg = VirtRegPairs.back();
+      VirtRegPairs.pop_back();
+      bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second;
+      if (!isNew)
+        assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!");
+      ToReg = FromReg;
+    }
+    bool isNew = DstRegMap.insert(std::make_pair(DstReg, ToReg)).second;
+    if (!isNew)
+      assert(DstRegMap[DstReg] == ToReg && "Can't map to two dst registers!");
+  }
+}
+
+/// ProcessCopy - If the specified instruction is not yet processed, process it
+/// if it's a copy. For a copy instruction, we find the physical registers the
+/// source and destination registers might be mapped to. These are kept in
+/// point-to maps used to determine future optimizations. e.g.
+/// v1024 = mov r0
+/// v1025 = mov r1
+/// v1026 = add v1024, v1025
+/// r1    = mov r1026
+/// If 'add' is a two-address instruction, v1024, v1026 are both potentially
+/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is
+/// potentially joined with r1 on the output side. It's worthwhile to commute
+/// 'add' to eliminate a copy.
+void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
+                                     MachineBasicBlock *MBB,
+                                     SmallPtrSet<MachineInstr*, 8> &Processed) {
+  if (Processed.count(MI))
+    return;
+
+  bool IsSrcPhys, IsDstPhys;
+  unsigned SrcReg, DstReg;
+  if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys))
+    return;
+
+  if (IsDstPhys && !IsSrcPhys)
+    DstRegMap.insert(std::make_pair(SrcReg, DstReg));
+  else if (!IsDstPhys && IsSrcPhys) {
+    bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second;
+    if (!isNew)
+      assert(SrcRegMap[DstReg] == SrcReg &&
+             "Can't map to two src physical registers!");
+
+    ScanUses(DstReg, MBB, Processed);
+  }
+
+  Processed.insert(MI);
+  return;
+}
+
+/// isSafeToDelete - If the specified instruction does not produce any side
+/// effects and all of its defs are dead, then it's safe to delete.
+static bool isSafeToDelete(MachineInstr *MI,
+                           const TargetInstrInfo *TII,
+                           SmallVector<unsigned, 4> &Kills) {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (TID.mayStore() || TID.isCall())
+    return false;
+  if (TID.isTerminator() || MI->hasUnmodeledSideEffects())
+    return false;
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    if (MO.isDef() && !MO.isDead())
+      return false;
+    if (MO.isUse() && MO.isKill())
+      Kills.push_back(MO.getReg());
+  }
+  return true;
+}
+
+/// canUpdateDeletedKills - Check if all the registers listed in Kills are
+/// killed by instructions in MBB preceding the current instruction at
+/// position Dist.  If so, return true and record information about the
+/// preceding kills in NewKills.
+bool TwoAddressInstructionPass::
+canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
+                      SmallVector<NewKill, 4> &NewKills,
+                      MachineBasicBlock *MBB, unsigned Dist) {
+  while (!Kills.empty()) {
+    unsigned Kill = Kills.back();
+    Kills.pop_back();
+    if (TargetRegisterInfo::isPhysicalRegister(Kill))
+      return false;
+
+    MachineInstr *LastKill = FindLastUseInMBB(Kill, MBB, Dist);
+    if (!LastKill)
+      return false;
+
+    bool isModRef = LastKill->definesRegister(Kill);
+    NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef),
+                                      LastKill));
+  }
+  return true;
+}
+
+/// DeleteUnusedInstr - If an instruction with a tied register operand can
+/// be safely deleted, just delete it.
+bool
+TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi,
+                                             MachineBasicBlock::iterator &nmi,
+                                             MachineFunction::iterator &mbbi,
+                                             unsigned Dist) {
+  // Check if the instruction has no side effects and if all its defs are dead.
+  SmallVector<unsigned, 4> Kills;
+  if (!isSafeToDelete(mi, TII, Kills))
+    return false;
+
+  // If this instruction kills some virtual registers, we need to
+  // update the kill information. If it's not possible to do so,
+  // then bail out.
+  SmallVector<NewKill, 4> NewKills;
+  if (!canUpdateDeletedKills(Kills, NewKills, &*mbbi, Dist))
+    return false;
+
+  if (LV) {
+    while (!NewKills.empty()) {
+      MachineInstr *NewKill = NewKills.back().second;
+      unsigned Kill = NewKills.back().first.first;
+      bool isDead = NewKills.back().first.second;
+      NewKills.pop_back();
+      if (LV->removeVirtualRegisterKilled(Kill, mi)) {
+        if (isDead)
+          LV->addVirtualRegisterDead(Kill, NewKill);
+        else
+          LV->addVirtualRegisterKilled(Kill, NewKill);
+      }
+    }
+  }
+
+  mbbi->erase(mi); // Nuke the old inst.
+  mi = nmi;
+  return true;
+}
+
+/// TryInstructionTransform - For the case where an instruction has a single
+/// pair of tied register operands, attempt some transformations that may
+/// either eliminate the tied operands or improve the opportunities for
+/// coalescing away the register copy.  Returns true if the tied operands
+/// are eliminated altogether.
+bool TwoAddressInstructionPass::
+TryInstructionTransform(MachineBasicBlock::iterator &mi,
+                        MachineBasicBlock::iterator &nmi,
+                        MachineFunction::iterator &mbbi,
+                        unsigned SrcIdx, unsigned DstIdx, unsigned Dist,
+                        SmallPtrSet<MachineInstr*, 8> &Processed) {
+  const TargetInstrDesc &TID = mi->getDesc();
+  unsigned regA = mi->getOperand(DstIdx).getReg();
+  unsigned regB = mi->getOperand(SrcIdx).getReg();
+
+  assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+         "cannot make instruction into two-address form");
+
+  // If regA is dead and the instruction can be deleted, just delete
+  // it so it doesn't clobber regB.
+  bool regBKilled = isKilled(*mi, regB, MRI, TII);
+  if (!regBKilled && mi->getOperand(DstIdx).isDead() &&
+      DeleteUnusedInstr(mi, nmi, mbbi, Dist)) {
+    ++NumDeletes;
+    return true; // Done with this instruction.
+  }
+
+  // Check if it is profitable to commute the operands.
+  unsigned SrcOp1, SrcOp2;
+  unsigned regC = 0;
+  unsigned regCIdx = ~0U;
+  bool TryCommute = false;
+  bool AggressiveCommute = false;
+  if (TID.isCommutable() && mi->getNumOperands() >= 3 &&
+      TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) {
+    if (SrcIdx == SrcOp1)
+      regCIdx = SrcOp2;
+    else if (SrcIdx == SrcOp2)
+      regCIdx = SrcOp1;
+
+    if (regCIdx != ~0U) {
+      regC = mi->getOperand(regCIdx).getReg();
+      if (!regBKilled && isKilled(*mi, regC, MRI, TII))
+        // If C dies but B does not, swap the B and C operands.
+        // This makes the live ranges of A and C joinable.
+        TryCommute = true;
+      else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) {
+        TryCommute = true;
+        AggressiveCommute = true;
+      }
+    }
+  }
+
+  // If it's profitable to commute, try to do so.
+  if (TryCommute && CommuteInstruction(mi, mbbi, regB, regC, Dist)) {
+    ++NumCommuted;
+    if (AggressiveCommute)
+      ++NumAggrCommuted;
+    return false;
+  }
+
+  if (TargetRegisterInfo::isVirtualRegister(regA))
+    ScanUses(regA, &*mbbi, Processed);
+
+  if (TID.isConvertibleTo3Addr()) {
+    // This instruction is potentially convertible to a true
+    // three-address instruction.  Check if it is profitable.
+    if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
+      // Try to convert it.
+      if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) {
+        ++NumConvertedTo3Addr;
+        return true; // Done with this instruction.
+      }
+    }
+  }
+
+  // If this is an instruction with a load folded into it, try unfolding
+  // the load, e.g. avoid this:
+  //   movq %rdx, %rcx
+  //   addq (%rax), %rcx
+  // in favor of this:
+  //   movq (%rax), %rcx
+  //   addq %rdx, %rcx
+  // because it's preferable to schedule a load than a register copy.
+  if (TID.mayLoad() && !regBKilled) {
+    // Determine if a load can be unfolded.
+    unsigned LoadRegIndex;
+    unsigned NewOpc =
+      TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(),
+                                      /*UnfoldLoad=*/true,
+                                      /*UnfoldStore=*/false,
+                                      &LoadRegIndex);
+    if (NewOpc != 0) {
+      const TargetInstrDesc &UnfoldTID = TII->get(NewOpc);
+      if (UnfoldTID.getNumDefs() == 1) {
+        MachineFunction &MF = *mbbi->getParent();
+
+        // Unfold the load.
+        DEBUG(dbgs() << "2addr:   UNFOLDING: " << *mi);
+        const TargetRegisterClass *RC =
+          UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI);
+        unsigned Reg = MRI->createVirtualRegister(RC);
+        SmallVector<MachineInstr *, 2> NewMIs;
+        if (!TII->unfoldMemoryOperand(MF, mi, Reg,
+                                      /*UnfoldLoad=*/true,/*UnfoldStore=*/false,
+                                      NewMIs)) {
+          DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+          return false;
+        }
+        assert(NewMIs.size() == 2 &&
+               "Unfolded a load into multiple instructions!");
+        // The load was previously folded, so this is the only use.
+        NewMIs[1]->addRegisterKilled(Reg, TRI);
+
+        // Tentatively insert the instructions into the block so that they
+        // look "normal" to the transformation logic.
+        mbbi->insert(mi, NewMIs[0]);
+        mbbi->insert(mi, NewMIs[1]);
+
+        DEBUG(dbgs() << "2addr:    NEW LOAD: " << *NewMIs[0]
+                     << "2addr:    NEW INST: " << *NewMIs[1]);
+
+        // Transform the instruction, now that it no longer has a load.
+        unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA);
+        unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
+        MachineBasicBlock::iterator NewMI = NewMIs[1];
+        bool TransformSuccess =
+          TryInstructionTransform(NewMI, mi, mbbi,
+                                  NewSrcIdx, NewDstIdx, Dist, Processed);
+        if (TransformSuccess ||
+            NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
+          // Success, or at least we made an improvement. Keep the unfolded
+          // instructions and discard the original.
+          if (LV) {
+            for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+              MachineOperand &MO = mi->getOperand(i);
+              if (MO.isReg() && 
+                  TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+                if (MO.isUse()) {
+                  if (MO.isKill()) {
+                    if (NewMIs[0]->killsRegister(MO.getReg()))
+                      LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]);
+                    else {
+                      assert(NewMIs[1]->killsRegister(MO.getReg()) &&
+                             "Kill missing after load unfold!");
+                      LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]);
+                    }
+                  }
+                } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) {
+                  if (NewMIs[1]->registerDefIsDead(MO.getReg()))
+                    LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]);
+                  else {
+                    assert(NewMIs[0]->registerDefIsDead(MO.getReg()) &&
+                           "Dead flag missing after load unfold!");
+                    LV->addVirtualRegisterDead(MO.getReg(), NewMIs[0]);
+                  }
+                }
+              }
+            }
+            LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
+          }
+          mi->eraseFromParent();
+          mi = NewMIs[1];
+          if (TransformSuccess)
+            return true;
+        } else {
+          // Transforming didn't eliminate the tie and didn't lead to an
+          // improvement. Clean up the unfolded instructions and keep the
+          // original.
+          DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+          NewMIs[0]->eraseFromParent();
+          NewMIs[1]->eraseFromParent();
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+/// runOnMachineFunction - Reduce two-address instructions to two operands.
+///
+bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "Machine Function\n");
+  const TargetMachine &TM = MF.getTarget();
+  MRI = &MF.getRegInfo();
+  TII = TM.getInstrInfo();
+  TRI = TM.getRegisterInfo();
+  LV = getAnalysisIfAvailable<LiveVariables>();
+  AA = &getAnalysis<AliasAnalysis>();
+
+  bool MadeChange = false;
+
+  DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n");
+  DEBUG(dbgs() << "********** Function: " 
+        << MF.getFunction()->getName() << '\n');
+
+  // ReMatRegs - Keep track of the registers whose def's are remat'ed.
+  BitVector ReMatRegs(MRI->getNumVirtRegs());
+
+  typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
+    TiedOperandMap;
+  TiedOperandMap TiedOperands(4);
+
+  SmallPtrSet<MachineInstr*, 8> Processed;
+  for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end();
+       mbbi != mbbe; ++mbbi) {
+    unsigned Dist = 0;
+    DistanceMap.clear();
+    SrcRegMap.clear();
+    DstRegMap.clear();
+    Processed.clear();
+    for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
+         mi != me; ) {
+      MachineBasicBlock::iterator nmi = llvm::next(mi);
+      if (mi->isDebugValue()) {
+        mi = nmi;
+        continue;
+      }
+
+      // Remember REG_SEQUENCE instructions, we'll deal with them later.
+      if (mi->isRegSequence())
+        RegSequences.push_back(&*mi);
+
+      const TargetInstrDesc &TID = mi->getDesc();
+      bool FirstTied = true;
+
+      DistanceMap.insert(std::make_pair(mi, ++Dist));
+
+      ProcessCopy(&*mi, &*mbbi, Processed);
+
+      // First scan through all the tied register uses in this instruction
+      // and record a list of pairs of tied operands for each register.
+      unsigned NumOps = mi->isInlineAsm()
+        ? mi->getNumOperands() : TID.getNumOperands();
+      for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
+        unsigned DstIdx = 0;
+        if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx))
+          continue;
+
+        if (FirstTied) {
+          FirstTied = false;
+          ++NumTwoAddressInstrs;
+          DEBUG(dbgs() << '\t' << *mi);
+        }
+
+        assert(mi->getOperand(SrcIdx).isReg() &&
+               mi->getOperand(SrcIdx).getReg() &&
+               mi->getOperand(SrcIdx).isUse() &&
+               "two address instruction invalid");
+
+        unsigned regB = mi->getOperand(SrcIdx).getReg();
+        TiedOperandMap::iterator OI = TiedOperands.find(regB);
+        if (OI == TiedOperands.end()) {
+          SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair;
+          OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first;
+        }
+        OI->second.push_back(std::make_pair(SrcIdx, DstIdx));
+      }
+
+      // Now iterate over the information collected above.
+      for (TiedOperandMap::iterator OI = TiedOperands.begin(),
+             OE = TiedOperands.end(); OI != OE; ++OI) {
+        SmallVector<std::pair<unsigned, unsigned>, 4> &TiedPairs = OI->second;
+
+        // If the instruction has a single pair of tied operands, try some
+        // transformations that may either eliminate the tied operands or
+        // improve the opportunities for coalescing away the register copy.
+        if (TiedOperands.size() == 1 && TiedPairs.size() == 1) {
+          unsigned SrcIdx = TiedPairs[0].first;
+          unsigned DstIdx = TiedPairs[0].second;
+
+          // If the registers are already equal, nothing needs to be done.
+          if (mi->getOperand(SrcIdx).getReg() ==
+              mi->getOperand(DstIdx).getReg())
+            break; // Done with this instruction.
+
+          if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist,
+                                      Processed))
+            break; // The tied operands have been eliminated.
+        }
+
+        bool RemovedKillFlag = false;
+        bool AllUsesCopied = true;
+        unsigned LastCopiedReg = 0;
+        unsigned regB = OI->first;
+        for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) {
+          unsigned SrcIdx = TiedPairs[tpi].first;
+          unsigned DstIdx = TiedPairs[tpi].second;
+          unsigned regA = mi->getOperand(DstIdx).getReg();
+          // Grab regB from the instruction because it may have changed if the
+          // instruction was commuted.
+          regB = mi->getOperand(SrcIdx).getReg();
+
+          if (regA == regB) {
+            // The register is tied to multiple destinations (or else we would
+            // not have continued this far), but this use of the register
+            // already matches the tied destination.  Leave it.
+            AllUsesCopied = false;
+            continue;
+          }
+          LastCopiedReg = regA;
+
+          assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+                 "cannot make instruction into two-address form");
+
+#ifndef NDEBUG
+          // First, verify that we don't have a use of "a" in the instruction
+          // (a = b + a for example) because our transformation will not
+          // work. This should never occur because we are in SSA form.
+          for (unsigned i = 0; i != mi->getNumOperands(); ++i)
+            assert(i == DstIdx ||
+                   !mi->getOperand(i).isReg() ||
+                   mi->getOperand(i).getReg() != regA);
+#endif
+
+          // Emit a copy or rematerialize the definition.
+          const TargetRegisterClass *rc = MRI->getRegClass(regB);
+          MachineInstr *DefMI = MRI->getVRegDef(regB);
+          // If it's safe and profitable, remat the definition instead of
+          // copying it.
+          if (DefMI &&
+              DefMI->getDesc().isAsCheapAsAMove() &&
+              DefMI->isSafeToReMat(TII, AA, regB) &&
+              isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
+            DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
+            unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
+            TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI);
+            ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB));
+            ++NumReMats;
+          } else {
+            BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY),
+                    regA).addReg(regB);
+          }
+
+          MachineBasicBlock::iterator prevMI = prior(mi);
+          // Update DistanceMap.
+          DistanceMap.insert(std::make_pair(prevMI, Dist));
+          DistanceMap[mi] = ++Dist;
+
+          DEBUG(dbgs() << "\t\tprepend:\t" << *prevMI);
+
+          MachineOperand &MO = mi->getOperand(SrcIdx);
+          assert(MO.isReg() && MO.getReg() == regB && MO.isUse() &&
+                 "inconsistent operand info for 2-reg pass");
+          if (MO.isKill()) {
+            MO.setIsKill(false);
+            RemovedKillFlag = true;
+          }
+          MO.setReg(regA);
+        }
+
+        if (AllUsesCopied) {
+          // Replace other (un-tied) uses of regB with LastCopiedReg.
+          for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+            MachineOperand &MO = mi->getOperand(i);
+            if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
+              if (MO.isKill()) {
+                MO.setIsKill(false);
+                RemovedKillFlag = true;
+              }
+              MO.setReg(LastCopiedReg);
+            }
+          }
+
+          // Update live variables for regB.
+          if (RemovedKillFlag && LV && LV->getVarInfo(regB).removeKill(mi))
+            LV->addVirtualRegisterKilled(regB, prior(mi));
+
+        } else if (RemovedKillFlag) {
+          // Some tied uses of regB matched their destination registers, so
+          // regB is still used in this instruction, but a kill flag was
+          // removed from a different tied use of regB, so now we need to add
+          // a kill flag to one of the remaining uses of regB.
+          for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+            MachineOperand &MO = mi->getOperand(i);
+            if (MO.isReg() && MO.getReg() == regB && MO.isUse()) {
+              MO.setIsKill(true);
+              break;
+            }
+          }
+        }
+
+        // Schedule the source copy / remat inserted to form two-address
+        // instruction. FIXME: Does it matter the distance map may not be
+        // accurate after it's scheduled?
+        TII->scheduleTwoAddrSource(prior(mi), mi, *TRI);
+
+        MadeChange = true;
+
+        DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
+      }
+
+      // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
+      if (mi->isInsertSubreg()) {
+        // From %reg = INSERT_SUBREG %reg, %subreg, subidx
+        // To   %reg:subidx = COPY %subreg
+        unsigned SubIdx = mi->getOperand(3).getImm();
+        mi->RemoveOperand(3);
+        assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
+        mi->getOperand(0).setSubReg(SubIdx);
+        mi->RemoveOperand(1);
+        mi->setDesc(TII->get(TargetOpcode::COPY));
+        DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+      }
+
+      // Clear TiedOperands here instead of at the top of the loop
+      // since most instructions do not have tied operands.
+      TiedOperands.clear();
+      mi = nmi;
+    }
+  }
+
+  // Some remat'ed instructions are dead.
+  for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) {
+    unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
+    if (MRI->use_nodbg_empty(VReg)) {
+      MachineInstr *DefMI = MRI->getVRegDef(VReg);
+      DefMI->eraseFromParent();
+    }
+  }
+
+  // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
+  // SSA form. It's now safe to de-SSA.
+  MadeChange |= EliminateRegSequences();
+
+  return MadeChange;
+}
+
+static void UpdateRegSequenceSrcs(unsigned SrcReg,
+                                  unsigned DstReg, unsigned SubIdx,
+                                  MachineRegisterInfo *MRI,
+                                  const TargetRegisterInfo &TRI) {
+  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
+         RE = MRI->reg_end(); RI != RE; ) {
+    MachineOperand &MO = RI.getOperand();
+    ++RI;
+    MO.substVirtReg(DstReg, SubIdx, TRI);
+  }
+}
+
+/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are
+/// EXTRACT_SUBREG from the same register and to the same virtual register
+/// with different sub-register indices, attempt to combine the
+/// EXTRACT_SUBREGs and pre-coalesce them. e.g.
+/// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
+/// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6
+/// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5
+/// Since D subregs 5, 6 can combine to a Q register, we can coalesce
+/// reg1026 to reg1029.
+void
+TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
+                                              unsigned DstReg) {
+  SmallSet<unsigned, 4> Seen;
+  for (unsigned i = 0, e = Srcs.size(); i != e; ++i) {
+    unsigned SrcReg = Srcs[i];
+    if (!Seen.insert(SrcReg))
+      continue;
+
+    // Check that the instructions are all in the same basic block.
+    MachineInstr *SrcDefMI = MRI->getVRegDef(SrcReg);
+    MachineInstr *DstDefMI = MRI->getVRegDef(DstReg);
+    if (SrcDefMI->getParent() != DstDefMI->getParent())
+      continue;
+
+    // If there are no other uses than copies which feed into
+    // the reg_sequence, then we might be able to coalesce them.
+    bool CanCoalesce = true;
+    SmallVector<unsigned, 4> SrcSubIndices, DstSubIndices;
+    for (MachineRegisterInfo::use_nodbg_iterator
+           UI = MRI->use_nodbg_begin(SrcReg),
+           UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+      MachineInstr *UseMI = &*UI;
+      if (!UseMI->isCopy() || UseMI->getOperand(0).getReg() != DstReg) {
+        CanCoalesce = false;
+        break;
+      }
+      SrcSubIndices.push_back(UseMI->getOperand(1).getSubReg());
+      DstSubIndices.push_back(UseMI->getOperand(0).getSubReg());
+    }
+
+    if (!CanCoalesce || SrcSubIndices.size() < 2)
+      continue;
+
+    // Check that the source subregisters can be combined.
+    std::sort(SrcSubIndices.begin(), SrcSubIndices.end());
+    unsigned NewSrcSubIdx = 0;
+    if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SrcSubIndices,
+                                      NewSrcSubIdx))
+      continue;
+
+    // Check that the destination subregisters can also be combined.
+    std::sort(DstSubIndices.begin(), DstSubIndices.end());
+    unsigned NewDstSubIdx = 0;
+    if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), DstSubIndices,
+                                      NewDstSubIdx))
+      continue;
+
+    // If neither source nor destination can be combined to the full register,
+    // just give up.  This could be improved if it ever matters.
+    if (NewSrcSubIdx != 0 && NewDstSubIdx != 0)
+      continue;
+
+    // Now that we know that all the uses are extract_subregs and that those
+    // subregs can somehow be combined, scan all the extract_subregs again to
+    // make sure the subregs are in the right order and can be composed.
+    MachineInstr *SomeMI = 0;
+    CanCoalesce = true;
+    for (MachineRegisterInfo::use_nodbg_iterator
+           UI = MRI->use_nodbg_begin(SrcReg),
+           UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+      MachineInstr *UseMI = &*UI;
+      assert(UseMI->isCopy());
+      unsigned DstSubIdx = UseMI->getOperand(0).getSubReg();
+      unsigned SrcSubIdx = UseMI->getOperand(1).getSubReg();
+      assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination");
+      if ((NewDstSubIdx == 0 &&
+           TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) ||
+          (NewSrcSubIdx == 0 &&
+           TRI->composeSubRegIndices(NewDstSubIdx, SrcSubIdx) != DstSubIdx)) {
+        CanCoalesce = false;
+        break;
+      }
+      // Keep track of one of the uses.
+      SomeMI = UseMI;
+    }
+    if (!CanCoalesce)
+      continue;
+
+    // Insert a copy to replace the original.
+    MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
+                                   SomeMI->getDebugLoc(),
+                                   TII->get(TargetOpcode::COPY))
+      .addReg(DstReg, RegState::Define, NewDstSubIdx)
+      .addReg(SrcReg, 0, NewSrcSubIdx);
+
+    // Remove all the old extract instructions.
+    for (MachineRegisterInfo::use_nodbg_iterator
+           UI = MRI->use_nodbg_begin(SrcReg),
+           UE = MRI->use_nodbg_end(); UI != UE; ) {
+      MachineInstr *UseMI = &*UI;
+      ++UI;
+      if (UseMI == CopyMI)
+        continue;
+      assert(UseMI->isCopy());
+      // Move any kills to the new copy or extract instruction.
+      if (UseMI->getOperand(1).isKill()) {
+        CopyMI->getOperand(1).setIsKill();
+        if (LV)
+          // Update live variables
+          LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI);
+      }
+      UseMI->eraseFromParent();
+    }
+  }
+}
+
+static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq,
+                                    MachineRegisterInfo *MRI) {
+  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+         UE = MRI->use_end(); UI != UE; ++UI) {
+    MachineInstr *UseMI = &*UI;
+    if (UseMI != RegSeq && UseMI->isRegSequence())
+      return true;
+  }
+  return false;
+}
+
+/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
+/// of the de-ssa process. This replaces sources of REG_SEQUENCE as
+/// sub-register references of the register defined by REG_SEQUENCE. e.g.
+///
+/// %reg1029<def>, %reg1030<def> = VLD1q16 %reg1024<kill>, ...
+/// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6
+/// =>
+/// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
+bool TwoAddressInstructionPass::EliminateRegSequences() {
+  if (RegSequences.empty())
+    return false;
+
+  for (unsigned i = 0, e = RegSequences.size(); i != e; ++i) {
+    MachineInstr *MI = RegSequences[i];
+    unsigned DstReg = MI->getOperand(0).getReg();
+    if (MI->getOperand(0).getSubReg() ||
+        TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+        !(MI->getNumOperands() & 1)) {
+      DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
+      llvm_unreachable(0);
+    }
+
+    bool IsImpDef = true;
+    SmallVector<unsigned, 4> RealSrcs;
+    SmallSet<unsigned, 4> Seen;
+    for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+      unsigned SrcReg = MI->getOperand(i).getReg();
+      unsigned SubIdx = MI->getOperand(i+1).getImm();
+      if (MI->getOperand(i).getSubReg() ||
+          TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+        DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
+        llvm_unreachable(0);
+      }
+
+      MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+      if (DefMI->isImplicitDef()) {
+        DefMI->eraseFromParent();
+        continue;
+      }
+      IsImpDef = false;
+
+      // Remember COPY sources. These might be candidate for coalescing.
+      if (DefMI->isCopy() && DefMI->getOperand(1).getSubReg())
+        RealSrcs.push_back(DefMI->getOperand(1).getReg());
+
+      bool isKill = MI->getOperand(i).isKill();
+      if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() ||
+          !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) ||
+          !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg),
+                                         MRI->getRegClass(SrcReg), SubIdx)) {
+        // REG_SEQUENCE cannot have duplicated operands, add a copy.
+        // Also add an copy if the source is live-in the block. We don't want
+        // to end up with a partial-redef of a livein, e.g.
+        // BB0:
+        // reg1051:10<def> =
+        // ...
+        // BB1:
+        // ... = reg1051:10
+        // BB2:
+        // reg1051:9<def> =
+        // LiveIntervalAnalysis won't like it.
+        //
+        // If the REG_SEQUENCE doesn't kill its source, keeping live variables
+        // correctly up to date becomes very difficult. Insert a copy.
+
+        // Defer any kill flag to the last operand using SrcReg. Otherwise, we
+        // might insert a COPY that uses SrcReg after is was killed.
+        if (isKill)
+          for (unsigned j = i + 2; j < e; j += 2)
+            if (MI->getOperand(j).getReg() == SrcReg) {
+              MI->getOperand(j).setIsKill();
+              isKill = false;
+              break;
+            }
+
+        MachineBasicBlock::iterator InsertLoc = MI;
+        MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
+                                MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
+            .addReg(DstReg, RegState::Define, SubIdx)
+            .addReg(SrcReg, getKillRegState(isKill));
+        MI->getOperand(i).setReg(0);
+        if (LV && isKill)
+          LV->replaceKillInstruction(SrcReg, MI, CopyMI);
+        DEBUG(dbgs() << "Inserted: " << *CopyMI);
+      }
+    }
+
+    for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+      unsigned SrcReg = MI->getOperand(i).getReg();
+      if (!SrcReg) continue;
+      unsigned SubIdx = MI->getOperand(i+1).getImm();
+      UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI);
+    }
+
+    if (IsImpDef) {
+      DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
+      MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+      for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
+        MI->RemoveOperand(j);      
+    } else {
+      DEBUG(dbgs() << "Eliminated: " << *MI);
+      MI->eraseFromParent();
+    }
+
+    // Try coalescing some EXTRACT_SUBREG instructions. This can create
+    // INSERT_SUBREG instructions that must have <undef> flags added by
+    // LiveIntervalAnalysis, so only run it when LiveVariables is available.
+    if (LV)
+      CoalesceExtSubRegs(RealSrcs, DstReg);
+  }
+
+  RegSequences.clear();
+  return true;
+}
diff --git a/final/lib/CodeGen/UnreachableBlockElim.cpp b/final/lib/CodeGen/UnreachableBlockElim.cpp
new file mode 100644
index 00000000000..48d8ab1658d
--- /dev/null
+++ b/final/lib/CodeGen/UnreachableBlockElim.cpp
@@ -0,0 +1,212 @@
+//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is an extremely simple version of the SimplifyCFG pass.  Its sole
+// job is to delete LLVM basic blocks that are not reachable from the entry
+// node.  To do this, it performs a simple depth first traversal of the CFG,
+// then deletes any unvisited nodes.
+//
+// Note that this pass is really a hack.  In particular, the instruction
+// selectors for various targets should just not generate code for unreachable
+// blocks.  Until LLVM has a more systematic way of defining instruction
+// selectors, however, we cannot really expect them to handle additional
+// complexity.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Constant.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+namespace {
+  class UnreachableBlockElim : public FunctionPass {
+    virtual bool runOnFunction(Function &F);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    UnreachableBlockElim() : FunctionPass(ID) {
+      initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<ProfileInfo>();
+    }
+  };
+}
+char UnreachableBlockElim::ID = 0;
+INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim",
+                "Remove unreachable blocks from the CFG", false, false)
+
+FunctionPass *llvm::createUnreachableBlockEliminationPass() {
+  return new UnreachableBlockElim();
+}
+
+bool UnreachableBlockElim::runOnFunction(Function &F) {
+  SmallPtrSet<BasicBlock*, 8> Reachable;
+
+  // Mark all reachable blocks.
+  for (df_ext_iterator<Function*, SmallPtrSet<BasicBlock*, 8> > I =
+       df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable); I != E; ++I)
+    /* Mark all reachable blocks */;
+
+  // Loop over all dead blocks, remembering them and deleting all instructions
+  // in them.
+  std::vector<BasicBlock*> DeadBlocks;
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+    if (!Reachable.count(I)) {
+      BasicBlock *BB = I;
+      DeadBlocks.push_back(BB);
+      while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+        PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
+        BB->getInstList().pop_front();
+      }
+      for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+        (*SI)->removePredecessor(BB);
+      BB->dropAllReferences();
+    }
+
+  // Actually remove the blocks now.
+  ProfileInfo *PI = getAnalysisIfAvailable<ProfileInfo>();
+  for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
+    if (PI) PI->removeBlock(DeadBlocks[i]);
+    DeadBlocks[i]->eraseFromParent();
+  }
+
+  return DeadBlocks.size();
+}
+
+
+namespace {
+  class UnreachableMachineBlockElim : public MachineFunctionPass {
+    virtual bool runOnMachineFunction(MachineFunction &F);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    MachineModuleInfo *MMI;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    UnreachableMachineBlockElim() : MachineFunctionPass(ID) {}
+  };
+}
+char UnreachableMachineBlockElim::ID = 0;
+
+INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination",
+  "Remove unreachable machine basic blocks", false, false)
+
+char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID;
+
+void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreserved<MachineDominatorTree>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
+  SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+  bool ModifiedPHI = false;
+
+  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
+  MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
+
+  // Mark all reachable blocks.
+  for (df_ext_iterator<MachineFunction*, SmallPtrSet<MachineBasicBlock*, 8> >
+       I = df_ext_begin(&F, Reachable), E = df_ext_end(&F, Reachable);
+       I != E; ++I)
+    /* Mark all reachable blocks */;
+
+  // Loop over all dead blocks, remembering them and deleting all instructions
+  // in them.
+  std::vector<MachineBasicBlock*> DeadBlocks;
+  for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    MachineBasicBlock *BB = I;
+
+    // Test for deadness.
+    if (!Reachable.count(BB)) {
+      DeadBlocks.push_back(BB);
+
+      // Update dominator and loop info.
+      if (MLI) MLI->removeBlock(BB);
+      if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB);
+
+      while (BB->succ_begin() != BB->succ_end()) {
+        MachineBasicBlock* succ = *BB->succ_begin();
+
+        MachineBasicBlock::iterator start = succ->begin();
+        while (start != succ->end() && start->isPHI()) {
+          for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
+            if (start->getOperand(i).isMBB() &&
+                start->getOperand(i).getMBB() == BB) {
+              start->RemoveOperand(i);
+              start->RemoveOperand(i-1);
+            }
+
+          start++;
+        }
+
+        BB->removeSuccessor(BB->succ_begin());
+      }
+    }
+  }
+
+  // Actually remove the blocks now.
+  for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
+    DeadBlocks[i]->eraseFromParent();
+
+  // Cleanup PHI nodes.
+  for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    MachineBasicBlock *BB = I;
+    // Prune unneeded PHI entries.
+    SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(),
+                                             BB->pred_end());
+    MachineBasicBlock::iterator phi = BB->begin();
+    while (phi != BB->end() && phi->isPHI()) {
+      for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
+        if (!preds.count(phi->getOperand(i).getMBB())) {
+          phi->RemoveOperand(i);
+          phi->RemoveOperand(i-1);
+          ModifiedPHI = true;
+        }
+
+      if (phi->getNumOperands() == 3) {
+        unsigned Input = phi->getOperand(1).getReg();
+        unsigned Output = phi->getOperand(0).getReg();
+
+        MachineInstr* temp = phi;
+        ++phi;
+        temp->eraseFromParent();
+        ModifiedPHI = true;
+
+        if (Input != Output)
+          F.getRegInfo().replaceRegWith(Output, Input);
+
+        continue;
+      }
+
+      ++phi;
+    }
+  }
+
+  F.RenumberBlocks();
+
+  return (DeadBlocks.size() || ModifiedPHI);
+}
diff --git a/final/lib/CodeGen/VirtRegMap.cpp b/final/lib/CodeGen/VirtRegMap.cpp
new file mode 100644
index 00000000000..734b87e62f6
--- /dev/null
+++ b/final/lib/CodeGen/VirtRegMap.cpp
@@ -0,0 +1,354 @@
+//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the VirtRegMap class.
+//
+// It also contains implementations of the Spiller interface, which, given a
+// virtual register map and a machine function, eliminates all virtual
+// references by replacing them with physical register references - adding spill
+// code as necessary.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "virtregmap"
+#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumSpills  , "Number of register spills");
+
+//===----------------------------------------------------------------------===//
+//  VirtRegMap implementation
+//===----------------------------------------------------------------------===//
+
+char VirtRegMap::ID = 0;
+
+INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false)
+
+bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
+  MRI = &mf.getRegInfo();
+  TII = mf.getTarget().getInstrInfo();
+  TRI = mf.getTarget().getRegisterInfo();
+  MF = &mf;
+
+  ReMatId = MAX_STACK_SLOT+1;
+  LowSpillSlot = HighSpillSlot = NO_STACK_SLOT;
+  
+  Virt2PhysMap.clear();
+  Virt2StackSlotMap.clear();
+  Virt2ReMatIdMap.clear();
+  Virt2SplitMap.clear();
+  Virt2SplitKillMap.clear();
+  ReMatMap.clear();
+  ImplicitDefed.clear();
+  SpillSlotToUsesMap.clear();
+  MI2VirtMap.clear();
+  SpillPt2VirtMap.clear();
+  RestorePt2VirtMap.clear();
+  EmergencySpillMap.clear();
+  EmergencySpillSlots.clear();
+  
+  SpillSlotToUsesMap.resize(8);
+  ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs());
+
+  allocatableRCRegs.clear();
+  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+         E = TRI->regclass_end(); I != E; ++I)
+    allocatableRCRegs.insert(std::make_pair(*I,
+                                            TRI->getAllocatableSet(mf, *I)));
+
+  grow();
+  
+  return false;
+}
+
+void VirtRegMap::grow() {
+  unsigned NumRegs = MF->getRegInfo().getNumVirtRegs();
+  Virt2PhysMap.resize(NumRegs);
+  Virt2StackSlotMap.resize(NumRegs);
+  Virt2ReMatIdMap.resize(NumRegs);
+  Virt2SplitMap.resize(NumRegs);
+  Virt2SplitKillMap.resize(NumRegs);
+  ReMatMap.resize(NumRegs);
+  ImplicitDefed.resize(NumRegs);
+}
+
+unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
+  int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+                                                      RC->getAlignment());
+  if (LowSpillSlot == NO_STACK_SLOT)
+    LowSpillSlot = SS;
+  if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
+    HighSpillSlot = SS;
+  assert(SS >= LowSpillSlot && "Unexpected low spill slot");
+  unsigned Idx = SS-LowSpillSlot;
+  while (Idx >= SpillSlotToUsesMap.size())
+    SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
+  return SS;
+}
+
+unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) {
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg);
+  unsigned physReg = Hint.second;
+  if (TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
+    physReg = getPhys(physReg);
+  if (Hint.first == 0)
+    return (TargetRegisterInfo::isPhysicalRegister(physReg))
+      ? physReg : 0;
+  return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF);
+}
+
+int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
+  assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+  assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+         "attempt to assign stack slot to already spilled register");
+  const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
+  ++NumSpills;
+  return Virt2StackSlotMap[virtReg] = createSpillSlot(RC);
+}
+
+void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
+  assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+  assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+         "attempt to assign stack slot to already spilled register");
+  assert((SS >= 0 ||
+          (SS >= MF->getFrameInfo()->getObjectIndexBegin())) &&
+         "illegal fixed frame index");
+  Virt2StackSlotMap[virtReg] = SS;
+}
+
+int VirtRegMap::assignVirtReMatId(unsigned virtReg) {
+  assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+  assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
+         "attempt to assign re-mat id to already spilled register");
+  Virt2ReMatIdMap[virtReg] = ReMatId;
+  return ReMatId++;
+}
+
+void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) {
+  assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+  assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT &&
+         "attempt to assign re-mat id to already spilled register");
+  Virt2ReMatIdMap[virtReg] = id;
+}
+
+int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
+  std::map<const TargetRegisterClass*, int>::iterator I =
+    EmergencySpillSlots.find(RC);
+  if (I != EmergencySpillSlots.end())
+    return I->second;
+  return EmergencySpillSlots[RC] = createSpillSlot(RC);
+}
+
+void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) {
+  if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) {
+    // If FI < LowSpillSlot, this stack reference was produced by
+    // instruction selection and is not a spill
+    if (FI >= LowSpillSlot) {
+      assert(FI >= 0 && "Spill slot index should not be negative!");
+      assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
+             && "Invalid spill slot");
+      SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI);
+    }
+  }
+}
+
+void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI,
+                            MachineInstr *NewMI, ModRef MRInfo) {
+  // Move previous memory references folded to new instruction.
+  MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI);
+  for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI),
+         E = MI2VirtMap.end(); I != E && I->first == OldMI; ) {
+    MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second));
+    MI2VirtMap.erase(I++);
+  }
+
+  // add new memory reference
+  MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo)));
+}
+
+void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) {
+  MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI);
+  MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo)));
+}
+
+void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isFI())
+      continue;
+    int FI = MO.getIndex();
+    if (MF->getFrameInfo()->isFixedObjectIndex(FI))
+      continue;
+    // This stack reference was produced by instruction selection and
+    // is not a spill
+    if (FI < LowSpillSlot)
+      continue;
+    assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size()
+           && "Invalid spill slot");
+    SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI);
+  }
+  MI2VirtMap.erase(MI);
+  SpillPt2VirtMap.erase(MI);
+  RestorePt2VirtMap.erase(MI);
+  EmergencySpillMap.erase(MI);
+}
+
+/// FindUnusedRegisters - Gather a list of allocatable registers that
+/// have not been allocated to any virtual register.
+bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
+  unsigned NumRegs = TRI->getNumRegs();
+  UnusedRegs.reset();
+  UnusedRegs.resize(NumRegs);
+
+  BitVector Used(NumRegs);
+  for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG)
+      Used.set(Virt2PhysMap[Reg]);
+  }
+
+  BitVector Allocatable = TRI->getAllocatableSet(*MF);
+  bool AnyUnused = false;
+  for (unsigned Reg = 1; Reg < NumRegs; ++Reg) {
+    if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) {
+      bool ReallyUnused = true;
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+        if (Used[*AS] || LIs->hasInterval(*AS)) {
+          ReallyUnused = false;
+          break;
+        }
+      }
+      if (ReallyUnused) {
+        AnyUnused = true;
+        UnusedRegs.set(Reg);
+      }
+    }
+  }
+
+  return AnyUnused;
+}
+
+void VirtRegMap::rewrite(SlotIndexes *Indexes) {
+  DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
+               << "********** Function: "
+               << MF->getFunction()->getName() << '\n');
+
+  SmallVector<unsigned, 8> SuperKills;
+
+  for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+       MBBI != MBBE; ++MBBI) {
+    DEBUG(MBBI->print(dbgs(), Indexes));
+    for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end();
+         MII != MIE;) {
+      MachineInstr *MI = MII;
+      ++MII;
+
+      for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+           MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+        MachineOperand &MO = *MOI;
+        if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+          continue;
+        unsigned VirtReg = MO.getReg();
+        unsigned PhysReg = getPhys(VirtReg);
+        assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg");
+
+        // Preserve semantics of sub-register operands.
+        if (MO.getSubReg()) {
+          // A virtual register kill refers to the whole register, so we may
+          // have to add <imp-use,kill> operands for the super-register.
+          if (MO.isUse() && MO.isKill() && !MO.isUndef())
+            SuperKills.push_back(PhysReg);
+
+          // We don't have to deal with sub-register defs because
+          // LiveIntervalAnalysis already added the necessary <imp-def>
+          // operands.
+
+          // PhysReg operands cannot have subregister indexes.
+          PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg());
+          assert(PhysReg && "Invalid SubReg for physical register");
+          MO.setSubReg(0);
+        }
+        // Rewrite. Note we could have used MachineOperand::substPhysReg(), but
+        // we need the inlining here.
+        MO.setReg(PhysReg);
+      }
+
+      // Add any missing super-register kills after rewriting the whole
+      // instruction.
+      while (!SuperKills.empty())
+        MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
+
+      DEBUG(dbgs() << "> " << *MI);
+
+      // Finally, remove any identity copies.
+      if (MI->isIdentityCopy()) {
+        DEBUG(dbgs() << "Deleting identity copy.\n");
+        RemoveMachineInstrFromMaps(MI);
+        if (Indexes)
+          Indexes->removeMachineInstrFromMaps(MI);
+        // It's safe to erase MI because MII has already been incremented.
+        MI->eraseFromParent();
+      }
+    }
+  }
+
+  // Tell MRI about physical registers in use.
+  for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg)
+    if (!MRI->reg_nodbg_empty(Reg))
+      MRI->setPhysRegUsed(Reg);
+}
+
+void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
+  const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+  const MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  OS << "********** REGISTER MAP **********\n";
+  for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
+      OS << '[' << PrintReg(Reg, TRI) << " -> "
+         << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
+         << MRI.getRegClass(Reg)->getName() << "\n";
+    }
+  }
+
+  for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
+      OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
+         << "] " << MRI.getRegClass(Reg)->getName() << "\n";
+    }
+  }
+  OS << '\n';
+}
+
+void VirtRegMap::dump() const {
+  print(dbgs());
+}
diff --git a/final/lib/CodeGen/VirtRegMap.h b/final/lib/CodeGen/VirtRegMap.h
new file mode 100644
index 00000000000..ba50f4e4230
--- /dev/null
+++ b/final/lib/CodeGen/VirtRegMap.h
@@ -0,0 +1,523 @@
+//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a virtual register map. This maps virtual registers to
+// physical registers and virtual registers to stack slots. It is created and
+// updated by a register allocator and then used by a machine code rewriter that
+// adds spill code and rewrites virtual into physical register references.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_VIRTREGMAP_H
+#define LLVM_CODEGEN_VIRTREGMAP_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include <map>
+
+namespace llvm {
+  class LiveIntervals;
+  class MachineInstr;
+  class MachineFunction;
+  class MachineRegisterInfo;
+  class TargetInstrInfo;
+  class TargetRegisterInfo;
+  class raw_ostream;
+  class SlotIndexes;
+
+  class VirtRegMap : public MachineFunctionPass {
+  public:
+    enum {
+      NO_PHYS_REG = 0,
+      NO_STACK_SLOT = (1L << 30)-1,
+      MAX_STACK_SLOT = (1L << 18)-1
+    };
+
+    enum ModRef { isRef = 1, isMod = 2, isModRef = 3 };
+    typedef std::multimap<MachineInstr*,
+                          std::pair<unsigned, ModRef> > MI2VirtMapTy;
+
+  private:
+    MachineRegisterInfo *MRI;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineFunction *MF;
+
+    DenseMap<const TargetRegisterClass*, BitVector> allocatableRCRegs;
+
+    /// Virt2PhysMap - This is a virtual to physical register
+    /// mapping. Each virtual register is required to have an entry in
+    /// it; even spilled virtual registers (the register mapped to a
+    /// spilled register is the temporary used to load it from the
+    /// stack).
+    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysMap;
+
+    /// Virt2StackSlotMap - This is virtual register to stack slot
+    /// mapping. Each spilled virtual register has an entry in it
+    /// which corresponds to the stack slot this register is spilled
+    /// at.
+    IndexedMap<int, VirtReg2IndexFunctor> Virt2StackSlotMap;
+
+    /// Virt2ReMatIdMap - This is virtual register to rematerialization id
+    /// mapping. Each spilled virtual register that should be remat'd has an
+    /// entry in it which corresponds to the remat id.
+    IndexedMap<int, VirtReg2IndexFunctor> Virt2ReMatIdMap;
+
+    /// Virt2SplitMap - This is virtual register to splitted virtual register
+    /// mapping.
+    IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2SplitMap;
+
+    /// Virt2SplitKillMap - This is splitted virtual register to its last use
+    /// (kill) index mapping.
+    IndexedMap<SlotIndex, VirtReg2IndexFunctor> Virt2SplitKillMap;
+
+    /// ReMatMap - This is virtual register to re-materialized instruction
+    /// mapping. Each virtual register whose definition is going to be
+    /// re-materialized has an entry in it.
+    IndexedMap<MachineInstr*, VirtReg2IndexFunctor> ReMatMap;
+
+    /// MI2VirtMap - This is MachineInstr to virtual register
+    /// mapping. In the case of memory spill code being folded into
+    /// instructions, we need to know which virtual register was
+    /// read/written by this instruction.
+    MI2VirtMapTy MI2VirtMap;
+
+    /// SpillPt2VirtMap - This records the virtual registers which should
+    /// be spilled right after the MachineInstr due to live interval
+    /// splitting.
+    std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >
+    SpillPt2VirtMap;
+
+    /// RestorePt2VirtMap - This records the virtual registers which should
+    /// be restored right before the MachineInstr due to live interval
+    /// splitting.
+    std::map<MachineInstr*, std::vector<unsigned> > RestorePt2VirtMap;
+
+    /// EmergencySpillMap - This records the physical registers that should
+    /// be spilled / restored around the MachineInstr since the register
+    /// allocator has run out of registers.
+    std::map<MachineInstr*, std::vector<unsigned> > EmergencySpillMap;
+
+    /// EmergencySpillSlots - This records emergency spill slots used to
+    /// spill physical registers when the register allocator runs out of
+    /// registers. Ideally only one stack slot is used per function per
+    /// register class.
+    std::map<const TargetRegisterClass*, int> EmergencySpillSlots;
+
+    /// ReMatId - Instead of assigning a stack slot to a to be rematerialized
+    /// virtual register, an unique id is being assigned. This keeps track of
+    /// the highest id used so far. Note, this starts at (1<<18) to avoid
+    /// conflicts with stack slot numbers.
+    int ReMatId;
+
+    /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes.
+    int LowSpillSlot, HighSpillSlot;
+
+    /// SpillSlotToUsesMap - Records uses for each register spill slot.
+    SmallVector<SmallPtrSet<MachineInstr*, 4>, 8> SpillSlotToUsesMap;
+
+    /// ImplicitDefed - One bit for each virtual register. If set it indicates
+    /// the register is implicitly defined.
+    BitVector ImplicitDefed;
+
+    /// UnusedRegs - A list of physical registers that have not been used.
+    BitVector UnusedRegs;
+
+    /// createSpillSlot - Allocate a spill slot for RC from MFI.
+    unsigned createSpillSlot(const TargetRegisterClass *RC);
+
+    VirtRegMap(const VirtRegMap&);     // DO NOT IMPLEMENT
+    void operator=(const VirtRegMap&); // DO NOT IMPLEMENT
+
+  public:
+    static char ID;
+    VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG),
+                   Virt2StackSlotMap(NO_STACK_SLOT), 
+                   Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0),
+                   Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL),
+                   ReMatId(MAX_STACK_SLOT+1),
+                   LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { }
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    MachineFunction &getMachineFunction() const {
+      assert(MF && "getMachineFunction called before runOnMachineFunction");
+      return *MF;
+    }
+
+    MachineRegisterInfo &getRegInfo() const { return *MRI; }
+    const TargetRegisterInfo &getTargetRegInfo() const { return *TRI; }
+
+    void grow();
+
+    /// @brief returns true if the specified virtual register is
+    /// mapped to a physical register
+    bool hasPhys(unsigned virtReg) const {
+      return getPhys(virtReg) != NO_PHYS_REG;
+    }
+
+    /// @brief returns the physical register mapped to the specified
+    /// virtual register
+    unsigned getPhys(unsigned virtReg) const {
+      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+      return Virt2PhysMap[virtReg];
+    }
+
+    /// @brief creates a mapping for the specified virtual register to
+    /// the specified physical register
+    void assignVirt2Phys(unsigned virtReg, unsigned physReg) {
+      assert(TargetRegisterInfo::isVirtualRegister(virtReg) &&
+             TargetRegisterInfo::isPhysicalRegister(physReg));
+      assert(Virt2PhysMap[virtReg] == NO_PHYS_REG &&
+             "attempt to assign physical register to already mapped "
+             "virtual register");
+      Virt2PhysMap[virtReg] = physReg;
+    }
+
+    /// @brief clears the specified virtual register's, physical
+    /// register mapping
+    void clearVirt(unsigned virtReg) {
+      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+      assert(Virt2PhysMap[virtReg] != NO_PHYS_REG &&
+             "attempt to clear a not assigned virtual register");
+      Virt2PhysMap[virtReg] = NO_PHYS_REG;
+    }
+
+    /// @brief clears all virtual to physical register mappings
+    void clearAllVirt() {
+      Virt2PhysMap.clear();
+      grow();
+    }
+
+    /// @brief returns the register allocation preference.
+    unsigned getRegAllocPref(unsigned virtReg);
+
+    /// @brief records virtReg is a split live interval from SReg.
+    void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
+      Virt2SplitMap[virtReg] = SReg;
+    }
+
+    /// @brief returns the live interval virtReg is split from.
+    unsigned getPreSplitReg(unsigned virtReg) const {
+      return Virt2SplitMap[virtReg];
+    }
+
+    /// getOriginal - Return the original virtual register that VirtReg descends
+    /// from through splitting.
+    /// A register that was not created by splitting is its own original.
+    /// This operation is idempotent.
+    unsigned getOriginal(unsigned VirtReg) const {
+      unsigned Orig = getPreSplitReg(VirtReg);
+      return Orig ? Orig : VirtReg;
+    }
+
+    /// @brief returns true if the specified virtual register is not
+    /// mapped to a stack slot or rematerialized.
+    bool isAssignedReg(unsigned virtReg) const {
+      if (getStackSlot(virtReg) == NO_STACK_SLOT &&
+          getReMatId(virtReg) == NO_STACK_SLOT)
+        return true;
+      // Split register can be assigned a physical register as well as a
+      // stack slot or remat id.
+      return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG);
+    }
+
+    /// @brief returns the stack slot mapped to the specified virtual
+    /// register
+    int getStackSlot(unsigned virtReg) const {
+      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+      return Virt2StackSlotMap[virtReg];
+    }
+
+    /// @brief returns the rematerialization id mapped to the specified virtual
+    /// register
+    int getReMatId(unsigned virtReg) const {
+      assert(TargetRegisterInfo::isVirtualRegister(virtReg));
+      return Virt2ReMatIdMap[virtReg];
+    }
+
+    /// @brief create a mapping for the specifed virtual register to
+    /// the next available stack slot
+    int assignVirt2StackSlot(unsigned virtReg);
+    /// @brief create a mapping for the specified virtual register to
+    /// the specified stack slot
+    void assignVirt2StackSlot(unsigned virtReg, int frameIndex);
+
+    /// @brief assign an unique re-materialization id to the specified
+    /// virtual register.
+    int assignVirtReMatId(unsigned virtReg);
+    /// @brief assign an unique re-materialization id to the specified
+    /// virtual register.
+    void assignVirtReMatId(unsigned virtReg, int id);
+
+    /// @brief returns true if the specified virtual register is being
+    /// re-materialized.
+    bool isReMaterialized(unsigned virtReg) const {
+      return ReMatMap[virtReg] != NULL;
+    }
+
+    /// @brief returns the original machine instruction being re-issued
+    /// to re-materialize the specified virtual register.
+    MachineInstr *getReMaterializedMI(unsigned virtReg) const {
+      return ReMatMap[virtReg];
+    }
+
+    /// @brief records the specified virtual register will be
+    /// re-materialized and the original instruction which will be re-issed
+    /// for this purpose.  If parameter all is true, then all uses of the
+    /// registers are rematerialized and it's safe to delete the definition.
+    void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) {
+      ReMatMap[virtReg] = def;
+    }
+
+    /// @brief record the last use (kill) of a split virtual register.
+    void addKillPoint(unsigned virtReg, SlotIndex index) {
+      Virt2SplitKillMap[virtReg] = index;
+    }
+
+    SlotIndex getKillPoint(unsigned virtReg) const {
+      return Virt2SplitKillMap[virtReg];
+    }
+
+    /// @brief remove the last use (kill) of a split virtual register.
+    void removeKillPoint(unsigned virtReg) {
+      Virt2SplitKillMap[virtReg] = SlotIndex();
+    }
+
+    /// @brief returns true if the specified MachineInstr is a spill point.
+    bool isSpillPt(MachineInstr *Pt) const {
+      return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end();
+    }
+
+    /// @brief returns the virtual registers that should be spilled due to
+    /// splitting right after the specified MachineInstr.
+    std::vector<std::pair<unsigned,bool> > &getSpillPtSpills(MachineInstr *Pt) {
+      return SpillPt2VirtMap[Pt];
+    }
+
+    /// @brief records the specified MachineInstr as a spill point for virtReg.
+    void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) {
+      std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
+        I = SpillPt2VirtMap.find(Pt);
+      if (I != SpillPt2VirtMap.end())
+        I->second.push_back(std::make_pair(virtReg, isKill));
+      else {
+        std::vector<std::pair<unsigned,bool> > Virts;
+        Virts.push_back(std::make_pair(virtReg, isKill));
+        SpillPt2VirtMap.insert(std::make_pair(Pt, Virts));
+      }
+    }
+
+    /// @brief - transfer spill point information from one instruction to
+    /// another.
+    void transferSpillPts(MachineInstr *Old, MachineInstr *New) {
+      std::map<MachineInstr*, std::vector<std::pair<unsigned,bool> > >::iterator
+        I = SpillPt2VirtMap.find(Old);
+      if (I == SpillPt2VirtMap.end())
+        return;
+      while (!I->second.empty()) {
+        unsigned virtReg = I->second.back().first;
+        bool isKill = I->second.back().second;
+        I->second.pop_back();
+        addSpillPoint(virtReg, isKill, New);
+      }
+      SpillPt2VirtMap.erase(I);
+    }
+
+    /// @brief returns true if the specified MachineInstr is a restore point.
+    bool isRestorePt(MachineInstr *Pt) const {
+      return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end();
+    }
+
+    /// @brief returns the virtual registers that should be restoreed due to
+    /// splitting right after the specified MachineInstr.
+    std::vector<unsigned> &getRestorePtRestores(MachineInstr *Pt) {
+      return RestorePt2VirtMap[Pt];
+    }
+
+    /// @brief records the specified MachineInstr as a restore point for virtReg.
+    void addRestorePoint(unsigned virtReg, MachineInstr *Pt) {
+      std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
+        RestorePt2VirtMap.find(Pt);
+      if (I != RestorePt2VirtMap.end())
+        I->second.push_back(virtReg);
+      else {
+        std::vector<unsigned> Virts;
+        Virts.push_back(virtReg);
+        RestorePt2VirtMap.insert(std::make_pair(Pt, Virts));
+      }
+    }
+
+    /// @brief - transfer restore point information from one instruction to
+    /// another.
+    void transferRestorePts(MachineInstr *Old, MachineInstr *New) {
+      std::map<MachineInstr*, std::vector<unsigned> >::iterator I =
+        RestorePt2VirtMap.find(Old);
+      if (I == RestorePt2VirtMap.end())
+        return;
+      while (!I->second.empty()) {
+        unsigned virtReg = I->second.back();
+        I->second.pop_back();
+        addRestorePoint(virtReg, New);
+      }
+      RestorePt2VirtMap.erase(I);
+    }
+
+    /// @brief records that the specified physical register must be spilled
+    /// around the specified machine instr.
+    void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) {
+      if (EmergencySpillMap.find(MI) != EmergencySpillMap.end())
+        EmergencySpillMap[MI].push_back(PhysReg);
+      else {
+        std::vector<unsigned> PhysRegs;
+        PhysRegs.push_back(PhysReg);
+        EmergencySpillMap.insert(std::make_pair(MI, PhysRegs));
+      }
+    }
+
+    /// @brief returns true if one or more physical registers must be spilled
+    /// around the specified instruction.
+    bool hasEmergencySpills(MachineInstr *MI) const {
+      return EmergencySpillMap.find(MI) != EmergencySpillMap.end();
+    }
+
+    /// @brief returns the physical registers to be spilled and restored around
+    /// the instruction.
+    std::vector<unsigned> &getEmergencySpills(MachineInstr *MI) {
+      return EmergencySpillMap[MI];
+    }
+
+    /// @brief - transfer emergency spill information from one instruction to
+    /// another.
+    void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) {
+      std::map<MachineInstr*,std::vector<unsigned> >::iterator I =
+        EmergencySpillMap.find(Old);
+      if (I == EmergencySpillMap.end())
+        return;
+      while (!I->second.empty()) {
+        unsigned virtReg = I->second.back();
+        I->second.pop_back();
+        addEmergencySpill(virtReg, New);
+      }
+      EmergencySpillMap.erase(I);
+    }
+
+    /// @brief return or get a emergency spill slot for the register class.
+    int getEmergencySpillSlot(const TargetRegisterClass *RC);
+
+    /// @brief Return lowest spill slot index.
+    int getLowSpillSlot() const {
+      return LowSpillSlot;
+    }
+
+    /// @brief Return highest spill slot index.
+    int getHighSpillSlot() const {
+      return HighSpillSlot;
+    }
+
+    /// @brief Records a spill slot use.
+    void addSpillSlotUse(int FrameIndex, MachineInstr *MI);
+
+    /// @brief Returns true if spill slot has been used.
+    bool isSpillSlotUsed(int FrameIndex) const {
+      assert(FrameIndex >= 0 && "Spill slot index should not be negative!");
+      return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty();
+    }
+
+    /// @brief Mark the specified register as being implicitly defined.
+    void setIsImplicitlyDefined(unsigned VirtReg) {
+      ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+    }
+
+    /// @brief Returns true if the virtual register is implicitly defined.
+    bool isImplicitlyDefined(unsigned VirtReg) const {
+      return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)];
+    }
+
+    /// @brief Updates information about the specified virtual register's value
+    /// folded into newMI machine instruction.
+    void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI,
+                    ModRef MRInfo);
+
+    /// @brief Updates information about the specified virtual register's value
+    /// folded into the specified machine instruction.
+    void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo);
+
+    /// @brief returns the virtual registers' values folded in memory
+    /// operands of this instruction
+    std::pair<MI2VirtMapTy::const_iterator, MI2VirtMapTy::const_iterator>
+    getFoldedVirts(MachineInstr* MI) const {
+      return MI2VirtMap.equal_range(MI);
+    }
+    
+    /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the
+    /// the folded instruction map and spill point map.
+    void RemoveMachineInstrFromMaps(MachineInstr *MI);
+
+    /// FindUnusedRegisters - Gather a list of allocatable registers that
+    /// have not been allocated to any virtual register.
+    bool FindUnusedRegisters(LiveIntervals* LIs);
+
+    /// HasUnusedRegisters - Return true if there are any allocatable registers
+    /// that have not been allocated to any virtual register.
+    bool HasUnusedRegisters() const {
+      return !UnusedRegs.none();
+    }
+
+    /// setRegisterUsed - Remember the physical register is now used.
+    void setRegisterUsed(unsigned Reg) {
+      UnusedRegs.reset(Reg);
+    }
+
+    /// isRegisterUnused - Return true if the physical register has not been
+    /// used.
+    bool isRegisterUnused(unsigned Reg) const {
+      return UnusedRegs[Reg];
+    }
+
+    /// getFirstUnusedRegister - Return the first physical register that has not
+    /// been used.
+    unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) {
+      int Reg = UnusedRegs.find_first();
+      while (Reg != -1) {
+        if (allocatableRCRegs[RC][Reg])
+          return (unsigned)Reg;
+        Reg = UnusedRegs.find_next(Reg);
+      }
+      return 0;
+    }
+
+    /// rewrite - Rewrite all instructions in MF to use only physical registers
+    /// by mapping all virtual register operands to their assigned physical
+    /// registers.
+    ///
+    /// @param Indexes Optionally remove deleted instructions from indexes.
+    void rewrite(SlotIndexes *Indexes);
+
+    void print(raw_ostream &OS, const Module* M = 0) const;
+    void dump() const;
+  };
+
+  inline raw_ostream &operator<<(raw_ostream &OS, const VirtRegMap &VRM) {
+    VRM.print(OS);
+    return OS;
+  }
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/CodeGen/VirtRegRewriter.cpp b/final/lib/CodeGen/VirtRegRewriter.cpp
new file mode 100644
index 00000000000..ec149dddc1d
--- /dev/null
+++ b/final/lib/CodeGen/VirtRegRewriter.cpp
@@ -0,0 +1,2604 @@
+//===-- llvm/CodeGen/Rewriter.cpp -  Rewriter -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "virtregrewriter"
+#include "VirtRegRewriter.h"
+#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDSE     , "Number of dead stores elided");
+STATISTIC(NumDSS     , "Number of dead spill slots removed");
+STATISTIC(NumCommutes, "Number of instructions commuted");
+STATISTIC(NumDRM     , "Number of re-materializable defs elided");
+STATISTIC(NumStores  , "Number of stores added");
+STATISTIC(NumPSpills , "Number of physical register spills");
+STATISTIC(NumOmitted , "Number of reloads omited");
+STATISTIC(NumAvoided , "Number of reloads deemed unnecessary");
+STATISTIC(NumCopified, "Number of available reloads turned into copies");
+STATISTIC(NumReMats  , "Number of re-materialization");
+STATISTIC(NumLoads   , "Number of loads added");
+STATISTIC(NumReused  , "Number of values reused");
+STATISTIC(NumDCE     , "Number of copies elided");
+STATISTIC(NumSUnfold , "Number of stores unfolded");
+STATISTIC(NumModRefUnfold, "Number of modref unfolded");
+
+namespace {
+  enum RewriterName { local, trivial };
+}
+
+static cl::opt<RewriterName>
+RewriterOpt("rewriter",
+            cl::desc("Rewriter to use (default=local)"),
+            cl::Prefix,
+            cl::values(clEnumVal(local,   "local rewriter"),
+                       clEnumVal(trivial, "trivial rewriter"),
+                       clEnumValEnd),
+            cl::init(local));
+
+static cl::opt<bool>
+ScheduleSpills("schedule-spills",
+               cl::desc("Schedule spill code"),
+               cl::init(false));
+
+VirtRegRewriter::~VirtRegRewriter() {}
+
+/// substitutePhysReg - Replace virtual register in MachineOperand with a
+/// physical register. Do the right thing with the sub-register index.
+/// Note that operands may be added, so the MO reference is no longer valid.
+static void substitutePhysReg(MachineOperand &MO, unsigned Reg,
+                              const TargetRegisterInfo &TRI) {
+  if (MO.getSubReg()) {
+    MO.substPhysReg(Reg, TRI);
+
+    // Any kill flags apply to the full virtual register, so they also apply to
+    // the full physical register.
+    // We assume that partial defs have already been decorated with a super-reg
+    // <imp-def> operand by LiveIntervals.
+    MachineInstr &MI = *MO.getParent();
+    if (MO.isUse() && !MO.isUndef() &&
+        (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0))))
+      MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true);
+  } else {
+    MO.setReg(Reg);
+  }
+}
+
+namespace {
+
+/// This class is intended for use with the new spilling framework only. It
+/// rewrites vreg def/uses to use the assigned preg, but does not insert any
+/// spill code.
+struct TrivialRewriter : public VirtRegRewriter {
+
+  bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+                            LiveIntervals* LIs) {
+    DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n");
+    DEBUG(dbgs() << "********** Function: "
+          << MF.getFunction()->getName() << '\n');
+    DEBUG(dbgs() << "**** Machine Instrs"
+          << "(NOTE! Does not include spills and reloads!) ****\n");
+    DEBUG(MF.dump());
+
+    MachineRegisterInfo *mri = &MF.getRegInfo();
+    const TargetRegisterInfo *tri = MF.getTarget().getRegisterInfo();
+
+    bool changed = false;
+
+    for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end();
+         liItr != liEnd; ++liItr) {
+
+      const LiveInterval *li = liItr->second;
+      unsigned reg = li->reg;
+
+      if (TargetRegisterInfo::isPhysicalRegister(reg)) {
+        if (!li->empty())
+          mri->setPhysRegUsed(reg);
+      }
+      else {
+        if (!VRM.hasPhys(reg))
+          continue;
+        unsigned pReg = VRM.getPhys(reg);
+        mri->setPhysRegUsed(pReg);
+        // Copy the register use-list before traversing it.
+        SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist;
+        for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg),
+               E = mri->reg_end(); I != E; ++I)
+          reglist.push_back(std::make_pair(&*I, I.getOperandNo()));
+        for (unsigned N=0; N != reglist.size(); ++N)
+          substitutePhysReg(reglist[N].first->getOperand(reglist[N].second),
+                            pReg, *tri);
+        changed |= !reglist.empty();
+      }
+    }
+
+    DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
+    DEBUG(MF.dump());
+
+    return changed;
+  }
+
+};
+
+}
+
+// ************************************************************************ //
+
+namespace {
+
+/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB
+/// from top down, keep track of which spill slots or remat are available in
+/// each register.
+///
+/// Note that not all physregs are created equal here.  In particular, some
+/// physregs are reloads that we are allowed to clobber or ignore at any time.
+/// Other physregs are values that the register allocated program is using
+/// that we cannot CHANGE, but we can read if we like.  We keep track of this
+/// on a per-stack-slot / remat id basis as the low bit in the value of the
+/// SpillSlotsAvailable entries.  The predicate 'canClobberPhysReg()' checks
+/// this bit and addAvailable sets it if.
+class AvailableSpills {
+  const TargetRegisterInfo *TRI;
+  const TargetInstrInfo *TII;
+
+  // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled
+  // or remat'ed virtual register values that are still available, due to
+  // being loaded or stored to, but not invalidated yet.
+  std::map<int, unsigned> SpillSlotsOrReMatsAvailable;
+
+  // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable,
+  // indicating which stack slot values are currently held by a physreg.  This
+  // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a
+  // physreg is modified.
+  std::multimap<unsigned, int> PhysRegsAvailable;
+
+  void disallowClobberPhysRegOnly(unsigned PhysReg);
+
+  void ClobberPhysRegOnly(unsigned PhysReg);
+public:
+  AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii)
+    : TRI(tri), TII(tii) {
+  }
+
+  /// clear - Reset the state.
+  void clear() {
+    SpillSlotsOrReMatsAvailable.clear();
+    PhysRegsAvailable.clear();
+  }
+
+  const TargetRegisterInfo *getRegInfo() const { return TRI; }
+
+  /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is
+  /// available in a physical register, return that PhysReg, otherwise
+  /// return 0.
+  unsigned getSpillSlotOrReMatPhysReg(int Slot) const {
+    std::map<int, unsigned>::const_iterator I =
+      SpillSlotsOrReMatsAvailable.find(Slot);
+    if (I != SpillSlotsOrReMatsAvailable.end()) {
+      return I->second >> 1;  // Remove the CanClobber bit.
+    }
+    return 0;
+  }
+
+  /// addAvailable - Mark that the specified stack slot / remat is available
+  /// in the specified physreg.  If CanClobber is true, the physreg can be
+  /// modified at any time without changing the semantics of the program.
+  void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) {
+    // If this stack slot is thought to be available in some other physreg,
+    // remove its record.
+    ModifyStackSlotOrReMat(SlotOrReMat);
+
+    PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat));
+    SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) |
+                                              (unsigned)CanClobber;
+
+    if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
+      DEBUG(dbgs() << "Remembering RM#"
+                   << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
+    else
+      DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat);
+    DEBUG(dbgs() << " in physreg " << TRI->getName(Reg)
+          << (CanClobber ? " canclobber" : "") << "\n");
+  }
+
+  /// canClobberPhysRegForSS - Return true if the spiller is allowed to change
+  /// the value of the specified stackslot register if it desires. The
+  /// specified stack slot must be available in a physreg for this query to
+  /// make sense.
+  bool canClobberPhysRegForSS(int SlotOrReMat) const {
+    assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) &&
+           "Value not available!");
+    return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1;
+  }
+
+  /// canClobberPhysReg - Return true if the spiller is allowed to clobber the
+  /// physical register where values for some stack slot(s) might be
+  /// available.
+  bool canClobberPhysReg(unsigned PhysReg) const {
+    std::multimap<unsigned, int>::const_iterator I =
+      PhysRegsAvailable.lower_bound(PhysReg);
+    while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+      int SlotOrReMat = I->second;
+      I++;
+      if (!canClobberPhysRegForSS(SlotOrReMat))
+        return false;
+    }
+    return true;
+  }
+
+  /// disallowClobberPhysReg - Unset the CanClobber bit of the specified
+  /// stackslot register. The register is still available but is no longer
+  /// allowed to be modifed.
+  void disallowClobberPhysReg(unsigned PhysReg);
+
+  /// ClobberPhysReg - This is called when the specified physreg changes
+  /// value.  We use this to invalidate any info about stuff that lives in
+  /// it and any of its aliases.
+  void ClobberPhysReg(unsigned PhysReg);
+
+  /// ModifyStackSlotOrReMat - This method is called when the value in a stack
+  /// slot changes.  This removes information about which register the
+  /// previous value for this slot lives in (as the previous value is dead
+  /// now).
+  void ModifyStackSlotOrReMat(int SlotOrReMat);
+
+  /// AddAvailableRegsToLiveIn - Availability information is being kept coming
+  /// into the specified MBB. Add available physical registers as potential
+  /// live-in's. If they are reused in the MBB, they will be added to the
+  /// live-in set to make register scavenger and post-allocation scheduler.
+  void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills,
+                                std::vector<MachineOperand*> &KillOps);
+};
+
+}
+
+// ************************************************************************ //
+
+// Given a location where a reload of a spilled register or a remat of
+// a constant is to be inserted, attempt to find a safe location to
+// insert the load at an earlier point in the basic-block, to hide
+// latency of the load and to avoid address-generation interlock
+// issues.
+static MachineBasicBlock::iterator
+ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc,
+                 MachineBasicBlock::iterator const Begin,
+                 unsigned PhysReg,
+                 const TargetRegisterInfo *TRI,
+                 bool DoReMat,
+                 int SSorRMId,
+                 const TargetInstrInfo *TII,
+                 const MachineFunction &MF)
+{
+  if (!ScheduleSpills)
+    return InsertLoc;
+
+  // Spill backscheduling is of primary interest to addresses, so
+  // don't do anything if the register isn't in the register class
+  // used for pointers.
+
+  const TargetLowering *TL = MF.getTarget().getTargetLowering();
+
+  if (!TL->isTypeLegal(TL->getPointerTy()))
+    // Believe it or not, this is true on 16-bit targets like PIC16.
+    return InsertLoc;
+
+  const TargetRegisterClass *ptrRegClass =
+    TL->getRegClassFor(TL->getPointerTy());
+  if (!ptrRegClass->contains(PhysReg))
+    return InsertLoc;
+
+  // Scan upwards through the preceding instructions. If an instruction doesn't
+  // reference the stack slot or the register we're loading, we can
+  // backschedule the reload up past it.
+  MachineBasicBlock::iterator NewInsertLoc = InsertLoc;
+  while (NewInsertLoc != Begin) {
+    MachineBasicBlock::iterator Prev = prior(NewInsertLoc);
+    for (unsigned i = 0; i < Prev->getNumOperands(); ++i) {
+      MachineOperand &Op = Prev->getOperand(i);
+      if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId)
+        goto stop;
+    }
+    if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 ||
+        Prev->findRegisterDefOperand(PhysReg))
+      goto stop;
+    for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias)
+      if (Prev->findRegisterUseOperandIdx(*Alias) != -1 ||
+          Prev->findRegisterDefOperand(*Alias))
+        goto stop;
+    NewInsertLoc = Prev;
+  }
+stop:;
+
+  // If we made it to the beginning of the block, turn around and move back
+  // down just past any existing reloads. They're likely to be reloads/remats
+  // for instructions earlier than what our current reload/remat is for, so
+  // they should be scheduled earlier.
+  if (NewInsertLoc == Begin) {
+    int FrameIdx;
+    while (InsertLoc != NewInsertLoc &&
+           (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) ||
+            TII->isTriviallyReMaterializable(NewInsertLoc)))
+      ++NewInsertLoc;
+  }
+
+  return NewInsertLoc;
+}
+
+namespace {
+
+// ReusedOp - For each reused operand, we keep track of a bit of information,
+// in case we need to rollback upon processing a new operand.  See comments
+// below.
+struct ReusedOp {
+  // The MachineInstr operand that reused an available value.
+  unsigned Operand;
+
+  // StackSlotOrReMat - The spill slot or remat id of the value being reused.
+  unsigned StackSlotOrReMat;
+
+  // PhysRegReused - The physical register the value was available in.
+  unsigned PhysRegReused;
+
+  // AssignedPhysReg - The physreg that was assigned for use by the reload.
+  unsigned AssignedPhysReg;
+
+  // VirtReg - The virtual register itself.
+  unsigned VirtReg;
+
+  ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr,
+           unsigned vreg)
+    : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr),
+      AssignedPhysReg(apr), VirtReg(vreg) {}
+};
+
+/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that
+/// is reused instead of reloaded.
+class ReuseInfo {
+  MachineInstr &MI;
+  std::vector<ReusedOp> Reuses;
+  BitVector PhysRegsClobbered;
+public:
+  ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) {
+    PhysRegsClobbered.resize(tri->getNumRegs());
+  }
+
+  bool hasReuses() const {
+    return !Reuses.empty();
+  }
+
+  /// addReuse - If we choose to reuse a virtual register that is already
+  /// available instead of reloading it, remember that we did so.
+  void addReuse(unsigned OpNo, unsigned StackSlotOrReMat,
+                unsigned PhysRegReused, unsigned AssignedPhysReg,
+                unsigned VirtReg) {
+    // If the reload is to the assigned register anyway, no undo will be
+    // required.
+    if (PhysRegReused == AssignedPhysReg) return;
+
+    // Otherwise, remember this.
+    Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused,
+                              AssignedPhysReg, VirtReg));
+  }
+
+  void markClobbered(unsigned PhysReg) {
+    PhysRegsClobbered.set(PhysReg);
+  }
+
+  bool isClobbered(unsigned PhysReg) const {
+    return PhysRegsClobbered.test(PhysReg);
+  }
+
+  /// GetRegForReload - We are about to emit a reload into PhysReg.  If there
+  /// is some other operand that is using the specified register, either pick
+  /// a new register to use, or evict the previous reload and use this reg.
+  unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg,
+                           MachineFunction &MF, MachineInstr *MI,
+                           AvailableSpills &Spills,
+                           std::vector<MachineInstr*> &MaybeDeadStores,
+                           SmallSet<unsigned, 8> &Rejected,
+                           BitVector &RegKills,
+                           std::vector<MachineOperand*> &KillOps,
+                           VirtRegMap &VRM);
+
+  /// GetRegForReload - Helper for the above GetRegForReload(). Add a
+  /// 'Rejected' set to remember which registers have been considered and
+  /// rejected for the reload. This avoids infinite looping in case like
+  /// this:
+  /// t1 := op t2, t3
+  /// t2 <- assigned r0 for use by the reload but ended up reuse r1
+  /// t3 <- assigned r1 for use by the reload but ended up reuse r0
+  /// t1 <- desires r1
+  ///       sees r1 is taken by t2, tries t2's reload register r0
+  ///       sees r0 is taken by t3, tries t3's reload register r1
+  ///       sees r1 is taken by t2, tries t2's reload register r0 ...
+  unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI,
+                           AvailableSpills &Spills,
+                           std::vector<MachineInstr*> &MaybeDeadStores,
+                           BitVector &RegKills,
+                           std::vector<MachineOperand*> &KillOps,
+                           VirtRegMap &VRM) {
+    SmallSet<unsigned, 8> Rejected;
+    MachineFunction &MF = *MI->getParent()->getParent();
+    const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg);
+    return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores,
+                           Rejected, RegKills, KillOps, VRM);
+  }
+};
+
+}
+
+// ****************** //
+// Utility Functions  //
+// ****************** //
+
+/// findSinglePredSuccessor - Return via reference a vector of machine basic
+/// blocks each of which is a successor of the specified BB and has no other
+/// predecessor.
+static void findSinglePredSuccessor(MachineBasicBlock *MBB,
+                                   SmallVectorImpl<MachineBasicBlock *> &Succs){
+  for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+         SE = MBB->succ_end(); SI != SE; ++SI) {
+    MachineBasicBlock *SuccMBB = *SI;
+    if (SuccMBB->pred_size() == 1)
+      Succs.push_back(SuccMBB);
+  }
+}
+
+/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed
+/// but not re-defined and it's being reused. Remove the kill flag for the
+/// register and unset the kill's marker and last kill operand.
+static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI,
+                                   BitVector &RegKills,
+                                   std::vector<MachineOperand*> &KillOps) {
+  DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n");
+
+  MachineOperand *KillOp = KillOps[Reg];
+  KillOp->setIsKill(false);
+  // KillOps[Reg] might be a def of a super-register.
+  unsigned KReg = KillOp->getReg();
+  if (!RegKills[KReg])
+    return;
+
+  assert(KillOps[KReg]->getParent() == KillOp->getParent() &&
+         "invalid superreg kill flags");
+  KillOps[KReg] = NULL;
+  RegKills.reset(KReg);
+
+  // If it's a def of a super-register. Its other sub-regsters are no
+  // longer killed as well.
+  for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
+    DEBUG(dbgs() << "  Resurrect subreg " << TRI->getName(*SR) << "\n");
+
+    assert(KillOps[*SR]->getParent() == KillOp->getParent() &&
+           "invalid subreg kill flags");
+    KillOps[*SR] = NULL;
+    RegKills.reset(*SR);
+  }
+}
+
+/// ResurrectKill - Invalidate kill info associated with a previous MI. An
+/// optimization may have decided that it's safe to reuse a previously killed
+/// register. If we fail to erase the invalid kill flags, then the register
+/// scavenger may later clobber the register used by this MI. Note that this
+/// must be done even if this MI is being deleted! Consider:
+///
+/// USE $r1 (vreg1) <kill>
+/// ...
+/// $r1(vreg3) = COPY $r1 (vreg2)
+///
+/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially
+/// vreg1's only use is a kill. The rewriter doesn't know it should be live
+/// until it rewrites vreg2. At that points it sees that the copy is dead and
+/// deletes it. However, deleting the copy implicitly forwards liveness of $r1
+/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at
+/// vreg1 before deleting the copy.
+static void ResurrectKill(MachineInstr &MI, unsigned Reg,
+                          const TargetRegisterInfo* TRI, BitVector &RegKills,
+                          std::vector<MachineOperand*> &KillOps) {
+  if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
+    ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
+    return;
+  }
+  // No previous kill for this reg. Check for subreg kills as well.
+  // d4 =
+  // store d4, fi#0
+  // ...
+  //    = s8<kill>
+  // ...
+  //    = d4  <avoiding reload>
+  for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+    unsigned SReg = *SR;
+    if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI)
+      ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps);
+  }
+}
+
+/// InvalidateKills - MI is going to be deleted. If any of its operands are
+/// marked kill, then invalidate the information.
+static void InvalidateKills(MachineInstr &MI,
+                            const TargetRegisterInfo* TRI,
+                            BitVector &RegKills,
+                            std::vector<MachineOperand*> &KillOps,
+                            SmallVector<unsigned, 2> *KillRegs = NULL) {
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+    if (KillRegs)
+      KillRegs->push_back(Reg);
+    assert(Reg < KillOps.size());
+    if (KillOps[Reg] == &MO) {
+      // This operand was the kill, now no longer.
+      KillOps[Reg] = NULL;
+      RegKills.reset(Reg);
+      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+        if (RegKills[*SR]) {
+          assert(KillOps[*SR] == &MO && "bad subreg kill flags");
+          KillOps[*SR] = NULL;
+          RegKills.reset(*SR);
+        }
+      }
+    }
+    else {
+      // This operand may have reused a previously killed reg. Keep it live in
+      // case it continues to be used after erasing this instruction.
+      ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
+    }
+  }
+}
+
+/// InvalidateRegDef - If the def operand of the specified def MI is now dead
+/// (since its spill instruction is removed), mark it isDead. Also checks if
+/// the def MI has other definition operands that are not dead. Returns it by
+/// reference.
+static bool InvalidateRegDef(MachineBasicBlock::iterator I,
+                             MachineInstr &NewDef, unsigned Reg,
+                             bool &HasLiveDef,
+                             const TargetRegisterInfo *TRI) {
+  // Due to remat, it's possible this reg isn't being reused. That is,
+  // the def of this reg (by prev MI) is now dead.
+  MachineInstr *DefMI = I;
+  MachineOperand *DefOp = NULL;
+  for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = DefMI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef())
+      continue;
+    if (MO.getReg() == Reg)
+      DefOp = &MO;
+    else if (!MO.isDead())
+      HasLiveDef = true;
+  }
+  if (!DefOp)
+    return false;
+
+  bool FoundUse = false, Done = false;
+  MachineBasicBlock::iterator E = &NewDef;
+  ++I; ++E;
+  for (; !Done && I != E; ++I) {
+    MachineInstr *NMI = I;
+    for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) {
+      MachineOperand &MO = NMI->getOperand(j);
+      if (!MO.isReg() || MO.getReg() == 0 ||
+          (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg())))
+        continue;
+      if (MO.isUse())
+        FoundUse = true;
+      Done = true; // Stop after scanning all the operands of this MI.
+    }
+  }
+  if (!FoundUse) {
+    // Def is dead!
+    DefOp->setIsDead();
+    return true;
+  }
+  return false;
+}
+
+/// UpdateKills - Track and update kill info. If a MI reads a register that is
+/// marked kill, then it must be due to register reuse. Transfer the kill info
+/// over.
+static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
+                        BitVector &RegKills,
+                        std::vector<MachineOperand*> &KillOps) {
+  // These do not affect kill info at all.
+  if (MI.isDebugValue())
+    return;
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.isUse() || MO.isUndef())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0)
+      continue;
+
+    // This operand may have reused a previously killed reg. Keep it live.
+    ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
+
+    if (MO.isKill()) {
+      RegKills.set(Reg);
+      KillOps[Reg] = &MO;
+      for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+        RegKills.set(*SR);
+        KillOps[*SR] = &MO;
+      }
+    }
+  }
+
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.getReg() || !MO.isDef())
+      continue;
+    unsigned Reg = MO.getReg();
+    RegKills.reset(Reg);
+    KillOps[Reg] = NULL;
+    // It also defines (or partially define) aliases.
+    for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+      RegKills.reset(*SR);
+      KillOps[*SR] = NULL;
+    }
+    for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) {
+      RegKills.reset(*SR);
+      KillOps[*SR] = NULL;
+    }
+  }
+}
+
+/// ReMaterialize - Re-materialize definition for Reg targetting DestReg.
+///
+static void ReMaterialize(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator &MII,
+                          unsigned DestReg, unsigned Reg,
+                          const TargetInstrInfo *TII,
+                          const TargetRegisterInfo *TRI,
+                          VirtRegMap &VRM) {
+  MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg);
+#ifndef NDEBUG
+  const TargetInstrDesc &TID = ReMatDefMI->getDesc();
+  assert(TID.getNumDefs() == 1 &&
+         "Don't know how to remat instructions that define > 1 values!");
+#endif
+  TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI);
+  MachineInstr *NewMI = prior(MII);
+  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = NewMI->getOperand(i);
+    if (!MO.isReg() || MO.getReg() == 0)
+      continue;
+    unsigned VirtReg = MO.getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(VirtReg))
+      continue;
+    assert(MO.isUse());
+    unsigned Phys = VRM.getPhys(VirtReg);
+    assert(Phys && "Virtual register is not assigned a register?");
+    substitutePhysReg(MO, Phys, *TRI);
+  }
+  ++NumReMats;
+}
+
+/// findSuperReg - Find the SubReg's super-register of given register class
+/// where its SubIdx sub-register is SubReg.
+static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg,
+                             unsigned SubIdx, const TargetRegisterInfo *TRI) {
+  for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+       I != E; ++I) {
+    unsigned Reg = *I;
+    if (TRI->getSubReg(Reg, SubIdx) == SubReg)
+      return Reg;
+  }
+  return 0;
+}
+
+// ******************************** //
+// Available Spills Implementation  //
+// ******************************** //
+
+/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified
+/// stackslot register. The register is still available but is no longer
+/// allowed to be modifed.
+void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) {
+  std::multimap<unsigned, int>::iterator I =
+    PhysRegsAvailable.lower_bound(PhysReg);
+  while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+    int SlotOrReMat = I->second;
+    I++;
+    assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
+           "Bidirectional map mismatch!");
+    SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1;
+    DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
+         << " copied, it is available for use but can no longer be modified\n");
+  }
+}
+
+/// disallowClobberPhysReg - Unset the CanClobber bit of the specified
+/// stackslot register and its aliases. The register and its aliases may
+/// still available but is no longer allowed to be modifed.
+void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) {
+  for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
+    disallowClobberPhysRegOnly(*AS);
+  disallowClobberPhysRegOnly(PhysReg);
+}
+
+/// ClobberPhysRegOnly - This is called when the specified physreg changes
+/// value.  We use this to invalidate any info about stuff we thing lives in it.
+void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) {
+  std::multimap<unsigned, int>::iterator I =
+    PhysRegsAvailable.lower_bound(PhysReg);
+  while (I != PhysRegsAvailable.end() && I->first == PhysReg) {
+    int SlotOrReMat = I->second;
+    PhysRegsAvailable.erase(I++);
+    assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg &&
+           "Bidirectional map mismatch!");
+    SpillSlotsOrReMatsAvailable.erase(SlotOrReMat);
+    DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg)
+          << " clobbered, invalidating ");
+    if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT)
+      DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n");
+    else
+      DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n");
+  }
+}
+
+/// ClobberPhysReg - This is called when the specified physreg changes
+/// value.  We use this to invalidate any info about stuff we thing lives in
+/// it and any of its aliases.
+void AvailableSpills::ClobberPhysReg(unsigned PhysReg) {
+  for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS)
+    ClobberPhysRegOnly(*AS);
+  ClobberPhysRegOnly(PhysReg);
+}
+
+/// AddAvailableRegsToLiveIn - Availability information is being kept coming
+/// into the specified MBB. Add available physical registers as potential
+/// live-in's. If they are reused in the MBB, they will be added to the
+/// live-in set to make register scavenger and post-allocation scheduler.
+void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
+                                        BitVector &RegKills,
+                                        std::vector<MachineOperand*> &KillOps) {
+  std::set<unsigned> NotAvailable;
+  for (std::multimap<unsigned, int>::iterator
+         I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end();
+       I != E; ++I) {
+    unsigned Reg = I->first;
+    const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg);
+    // FIXME: A temporary workaround. We can't reuse available value if it's
+    // not safe to move the def of the virtual register's class. e.g.
+    // X86::RFP* register classes. Do not add it as a live-in.
+    if (!TII->isSafeToMoveRegClassDefs(RC))
+      // This is no longer available.
+      NotAvailable.insert(Reg);
+    else {
+      MBB.addLiveIn(Reg);
+      if (RegKills[Reg])
+        ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
+    }
+
+    // Skip over the same register.
+    std::multimap<unsigned, int>::iterator NI = llvm::next(I);
+    while (NI != E && NI->first == Reg) {
+      ++I;
+      ++NI;
+    }
+  }
+
+  for (std::set<unsigned>::iterator I = NotAvailable.begin(),
+         E = NotAvailable.end(); I != E; ++I) {
+    ClobberPhysReg(*I);
+    for (const unsigned *SubRegs = TRI->getSubRegisters(*I);
+       *SubRegs; ++SubRegs)
+      ClobberPhysReg(*SubRegs);
+  }
+}
+
+/// ModifyStackSlotOrReMat - This method is called when the value in a stack
+/// slot changes.  This removes information about which register the previous
+/// value for this slot lives in (as the previous value is dead now).
+void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) {
+  std::map<int, unsigned>::iterator It =
+    SpillSlotsOrReMatsAvailable.find(SlotOrReMat);
+  if (It == SpillSlotsOrReMatsAvailable.end()) return;
+  unsigned Reg = It->second >> 1;
+  SpillSlotsOrReMatsAvailable.erase(It);
+
+  // This register may hold the value of multiple stack slots, only remove this
+  // stack slot from the set of values the register contains.
+  std::multimap<unsigned, int>::iterator I = PhysRegsAvailable.lower_bound(Reg);
+  for (; ; ++I) {
+    assert(I != PhysRegsAvailable.end() && I->first == Reg &&
+           "Map inverse broken!");
+    if (I->second == SlotOrReMat) break;
+  }
+  PhysRegsAvailable.erase(I);
+}
+
+// ************************** //
+// Reuse Info Implementation  //
+// ************************** //
+
+/// GetRegForReload - We are about to emit a reload into PhysReg.  If there
+/// is some other operand that is using the specified register, either pick
+/// a new register to use, or evict the previous reload and use this reg.
+unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
+                         unsigned PhysReg,
+                         MachineFunction &MF,
+                         MachineInstr *MI, AvailableSpills &Spills,
+                         std::vector<MachineInstr*> &MaybeDeadStores,
+                         SmallSet<unsigned, 8> &Rejected,
+                         BitVector &RegKills,
+                         std::vector<MachineOperand*> &KillOps,
+                         VirtRegMap &VRM) {
+  const TargetInstrInfo* TII = MF.getTarget().getInstrInfo();
+  const TargetRegisterInfo *TRI = Spills.getRegInfo();
+
+  if (Reuses.empty()) return PhysReg;  // This is most often empty.
+
+  for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) {
+    ReusedOp &Op = Reuses[ro];
+    // If we find some other reuse that was supposed to use this register
+    // exactly for its reload, we can change this reload to use ITS reload
+    // register. That is, unless its reload register has already been
+    // considered and subsequently rejected because it has also been reused
+    // by another operand.
+    if (Op.PhysRegReused == PhysReg &&
+        Rejected.count(Op.AssignedPhysReg) == 0 &&
+        RC->contains(Op.AssignedPhysReg)) {
+      // Yup, use the reload register that we didn't use before.
+      unsigned NewReg = Op.AssignedPhysReg;
+      Rejected.insert(PhysReg);
+      return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores,
+                             Rejected, RegKills, KillOps, VRM);
+    } else {
+      // Otherwise, we might also have a problem if a previously reused
+      // value aliases the new register. If so, codegen the previous reload
+      // and use this one.
+      unsigned PRRU = Op.PhysRegReused;
+      if (TRI->regsOverlap(PRRU, PhysReg)) {
+        // Okay, we found out that an alias of a reused register
+        // was used.  This isn't good because it means we have
+        // to undo a previous reuse.
+        MachineBasicBlock *MBB = MI->getParent();
+        const TargetRegisterClass *AliasRC =
+          MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg);
+
+        // Copy Op out of the vector and remove it, we're going to insert an
+        // explicit load for it.
+        ReusedOp NewOp = Op;
+        Reuses.erase(Reuses.begin()+ro);
+
+        // MI may be using only a sub-register of PhysRegUsed.
+        unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg();
+        unsigned SubIdx = 0;
+        assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) &&
+               "A reuse cannot be a virtual register");
+        if (PRRU != RealPhysRegUsed) {
+          // What was the sub-register index?
+          SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed);
+          assert(SubIdx &&
+                 "Operand physreg is not a sub-register of PhysRegUsed");
+        }
+
+        // Ok, we're going to try to reload the assigned physreg into the
+        // slot that we were supposed to in the first place.  However, that
+        // register could hold a reuse.  Check to see if it conflicts or
+        // would prefer us to use a different register.
+        unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg,
+                                              MF, MI, Spills, MaybeDeadStores,
+                                              Rejected, RegKills, KillOps, VRM);
+
+        bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT;
+        int SSorRMId = DoReMat
+          ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat;
+
+        // Back-schedule reloads and remats.
+        MachineBasicBlock::iterator InsertLoc =
+          ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI,
+                           DoReMat, SSorRMId, TII, MF);
+
+        if (DoReMat) {
+          ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII,
+                        TRI, VRM);
+        } else {
+          TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg,
+                                    NewOp.StackSlotOrReMat, AliasRC, TRI);
+          MachineInstr *LoadMI = prior(InsertLoc);
+          VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI);
+          // Any stores to this stack slot are not dead anymore.
+          MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL;
+          ++NumLoads;
+        }
+        Spills.ClobberPhysReg(NewPhysReg);
+        Spills.ClobberPhysReg(NewOp.PhysRegReused);
+
+        unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) :NewPhysReg;
+        MI->getOperand(NewOp.Operand).setReg(RReg);
+        MI->getOperand(NewOp.Operand).setSubReg(0);
+
+        Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg);
+        UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+        DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+
+        DEBUG(dbgs() << "Reuse undone!\n");
+        --NumReused;
+
+        // Finally, PhysReg is now available, go ahead and use it.
+        return PhysReg;
+      }
+    }
+  }
+  return PhysReg;
+}
+
+// ************************************************************************ //
+
+/// FoldsStackSlotModRef - Return true if the specified MI folds the specified
+/// stack slot mod/ref. It also checks if it's possible to unfold the
+/// instruction by having it define a specified physical register instead.
+static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg,
+                                 const TargetInstrInfo *TII,
+                                 const TargetRegisterInfo *TRI,
+                                 VirtRegMap &VRM) {
+  if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI))
+    return false;
+
+  bool Found = false;
+  VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+  for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
+    unsigned VirtReg = I->second.first;
+    VirtRegMap::ModRef MR = I->second.second;
+    if (MR & VirtRegMap::isModRef)
+      if (VRM.getStackSlot(VirtReg) == SS) {
+        Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0;
+        break;
+      }
+  }
+  if (!Found)
+    return false;
+
+  // Does the instruction uses a register that overlaps the scratch register?
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.getReg() == 0)
+      continue;
+    unsigned Reg = MO.getReg();
+    if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+      if (!VRM.hasPhys(Reg))
+        continue;
+      Reg = VRM.getPhys(Reg);
+    }
+    if (TRI->regsOverlap(PhysReg, Reg))
+      return false;
+  }
+  return true;
+}
+
+/// FindFreeRegister - Find a free register of a given register class by looking
+/// at (at most) the last two machine instructions.
+static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
+                                 MachineBasicBlock &MBB,
+                                 const TargetRegisterClass *RC,
+                                 const TargetRegisterInfo *TRI,
+                                 BitVector &AllocatableRegs) {
+  BitVector Defs(TRI->getNumRegs());
+  BitVector Uses(TRI->getNumRegs());
+  SmallVector<unsigned, 4> LocalUses;
+  SmallVector<unsigned, 4> Kills;
+
+  // Take a look at 2 instructions at most.
+  unsigned Count = 0;
+  while (Count < 2) {
+    if (MII == MBB.begin())
+      break;
+    MachineInstr *PrevMI = prior(MII);
+    MII = PrevMI;
+
+    if (PrevMI->isDebugValue())
+      continue; // Skip over dbg_value instructions.
+    ++Count;
+
+    for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = PrevMI->getOperand(i);
+      if (!MO.isReg() || MO.getReg() == 0)
+        continue;
+      unsigned Reg = MO.getReg();
+      if (MO.isDef()) {
+        Defs.set(Reg);
+        for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+          Defs.set(*AS);
+      } else  {
+        LocalUses.push_back(Reg);
+        if (MO.isKill() && AllocatableRegs[Reg])
+          Kills.push_back(Reg);
+      }
+    }
+
+    for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
+      unsigned Kill = Kills[i];
+      if (!Defs[Kill] && !Uses[Kill] &&
+          RC->contains(Kill))
+        return Kill;
+    }
+    for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+      unsigned Reg = LocalUses[i];
+      Uses.set(Reg);
+      for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+        Uses.set(*AS);
+    }
+  }
+
+  return 0;
+}
+
+static
+void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg,
+                         const TargetRegisterInfo &TRI) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.getReg() == VirtReg)
+      substitutePhysReg(MO, PhysReg, TRI);
+  }
+}
+
+namespace {
+
+struct RefSorter {
+  bool operator()(const std::pair<MachineInstr*, int> &A,
+                  const std::pair<MachineInstr*, int> &B) {
+    return A.second < B.second;
+  }
+};
+
+// ***************************** //
+// Local Spiller Implementation  //
+// ***************************** //
+
+class LocalRewriter : public VirtRegRewriter {
+  MachineRegisterInfo *MRI;
+  const TargetRegisterInfo *TRI;
+  const TargetInstrInfo *TII;
+  VirtRegMap *VRM;
+  LiveIntervals *LIs;
+  BitVector AllocatableRegs;
+  DenseMap<MachineInstr*, unsigned> DistanceMap;
+  DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues;
+
+  MachineBasicBlock *MBB;       // Basic block currently being processed.
+
+public:
+
+  bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+                            LiveIntervals* LIs);
+
+private:
+  void EraseInstr(MachineInstr *MI) {
+    VRM->RemoveMachineInstrFromMaps(MI);
+    LIs->RemoveMachineInstrFromMaps(MI);
+    MI->eraseFromParent();
+  }
+
+  bool OptimizeByUnfold2(unsigned VirtReg, int SS,
+                         MachineBasicBlock::iterator &MII,
+                         std::vector<MachineInstr*> &MaybeDeadStores,
+                         AvailableSpills &Spills,
+                         BitVector &RegKills,
+                         std::vector<MachineOperand*> &KillOps);
+
+  bool OptimizeByUnfold(MachineBasicBlock::iterator &MII,
+                        std::vector<MachineInstr*> &MaybeDeadStores,
+                        AvailableSpills &Spills,
+                        BitVector &RegKills,
+                        std::vector<MachineOperand*> &KillOps);
+
+  bool CommuteToFoldReload(MachineBasicBlock::iterator &MII,
+                           unsigned VirtReg, unsigned SrcReg, int SS,
+                           AvailableSpills &Spills,
+                           BitVector &RegKills,
+                           std::vector<MachineOperand*> &KillOps,
+                           const TargetRegisterInfo *TRI);
+
+  void SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
+                           int Idx, unsigned PhysReg, int StackSlot,
+                           const TargetRegisterClass *RC,
+                           bool isAvailable, MachineInstr *&LastStore,
+                           AvailableSpills &Spills,
+                           SmallSet<MachineInstr*, 4> &ReMatDefs,
+                           BitVector &RegKills,
+                           std::vector<MachineOperand*> &KillOps);
+
+  void TransferDeadness(unsigned Reg, BitVector &RegKills,
+                        std::vector<MachineOperand*> &KillOps);
+
+  bool InsertEmergencySpills(MachineInstr *MI);
+
+  bool InsertRestores(MachineInstr *MI,
+                      AvailableSpills &Spills,
+                      BitVector &RegKills,
+                      std::vector<MachineOperand*> &KillOps);
+
+  bool InsertSpills(MachineInstr *MI);
+
+  void ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
+                   std::vector<MachineInstr*> &MaybeDeadStores,
+                   BitVector &RegKills,
+                   ReuseInfo &ReusedOperands,
+                   std::vector<MachineOperand*> &KillOps);
+
+  void RewriteMBB(LiveIntervals *LIs,
+                  AvailableSpills &Spills, BitVector &RegKills,
+                  std::vector<MachineOperand*> &KillOps);
+};
+}
+
+bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
+                                         LiveIntervals* lis) {
+  MRI = &MF.getRegInfo();
+  TRI = MF.getTarget().getRegisterInfo();
+  TII = MF.getTarget().getInstrInfo();
+  VRM = &vrm;
+  LIs = lis;
+  AllocatableRegs = TRI->getAllocatableSet(MF);
+  DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
+        << MF.getFunction()->getName() << "':\n");
+  DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
+        " reloads!) ****\n");
+  DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
+
+  // Spills - Keep track of which spilled values are available in physregs
+  // so that we can choose to reuse the physregs instead of emitting
+  // reloads. This is usually refreshed per basic block.
+  AvailableSpills Spills(TRI, TII);
+
+  // Keep track of kill information.
+  BitVector RegKills(TRI->getNumRegs());
+  std::vector<MachineOperand*> KillOps;
+  KillOps.resize(TRI->getNumRegs(), NULL);
+
+  // SingleEntrySuccs - Successor blocks which have a single predecessor.
+  SmallVector<MachineBasicBlock*, 4> SinglePredSuccs;
+  SmallPtrSet<MachineBasicBlock*,16> EarlyVisited;
+
+  // Traverse the basic blocks depth first.
+  MachineBasicBlock *Entry = MF.begin();
+  SmallPtrSet<MachineBasicBlock*,16> Visited;
+  for (df_ext_iterator<MachineBasicBlock*,
+         SmallPtrSet<MachineBasicBlock*,16> >
+         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
+       DFI != E; ++DFI) {
+    MBB = *DFI;
+    if (!EarlyVisited.count(MBB))
+      RewriteMBB(LIs, Spills, RegKills, KillOps);
+
+    // If this MBB is the only predecessor of a successor. Keep the
+    // availability information and visit it next.
+    do {
+      // Keep visiting single predecessor successor as long as possible.
+      SinglePredSuccs.clear();
+      findSinglePredSuccessor(MBB, SinglePredSuccs);
+      if (SinglePredSuccs.empty())
+        MBB = 0;
+      else {
+        // FIXME: More than one successors, each of which has MBB has
+        // the only predecessor.
+        MBB = SinglePredSuccs[0];
+        if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) {
+          Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps);
+          RewriteMBB(LIs, Spills, RegKills, KillOps);
+        }
+      }
+    } while (MBB);
+
+    // Clear the availability info.
+    Spills.clear();
+  }
+
+  DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
+  DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
+
+  // Mark unused spill slots.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  int SS = VRM->getLowSpillSlot();
+  if (SS != VirtRegMap::NO_STACK_SLOT) {
+    for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) {
+      SmallVector<MachineInstr*, 4> &DbgValues = Slot2DbgValues[SS];
+      if (!VRM->isSpillSlotUsed(SS)) {
+        MFI->RemoveStackObject(SS);
+        for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) {
+          MachineInstr *DVMI = DbgValues[j];
+          DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n');
+          EraseInstr(DVMI);
+        }
+        ++NumDSS;
+      }
+      DbgValues.clear();
+    }
+  }
+  Slot2DbgValues.clear();
+
+  return true;
+}
+
+/// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if
+/// a scratch register is available.
+///     xorq  %r12<kill>, %r13
+///     addq  %rax, -184(%rbp)
+///     addq  %r13, -184(%rbp)
+/// ==>
+///     xorq  %r12<kill>, %r13
+///     movq  -184(%rbp), %r12
+///     addq  %rax, %r12
+///     addq  %r13, %r12
+///     movq  %r12, -184(%rbp)
+bool LocalRewriter::
+OptimizeByUnfold2(unsigned VirtReg, int SS,
+                  MachineBasicBlock::iterator &MII,
+                  std::vector<MachineInstr*> &MaybeDeadStores,
+                  AvailableSpills &Spills,
+                  BitVector &RegKills,
+                  std::vector<MachineOperand*> &KillOps) {
+
+  MachineBasicBlock::iterator NextMII = llvm::next(MII);
+  // Skip over dbg_value instructions.
+  while (NextMII != MBB->end() && NextMII->isDebugValue())
+    NextMII = llvm::next(NextMII);
+  if (NextMII == MBB->end())
+    return false;
+
+  if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0)
+    return false;
+
+  // Now let's see if the last couple of instructions happens to have freed up
+  // a register.
+  const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+  unsigned PhysReg = FindFreeRegister(MII, *MBB, RC, TRI, AllocatableRegs);
+  if (!PhysReg)
+    return false;
+
+  MachineFunction &MF = *MBB->getParent();
+  TRI = MF.getTarget().getRegisterInfo();
+  MachineInstr &MI = *MII;
+  if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, *VRM))
+    return false;
+
+  // If the next instruction also folds the same SS modref and can be unfoled,
+  // then it's worthwhile to issue a load from SS into the free register and
+  // then unfold these instructions.
+  if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM))
+    return false;
+
+  // Back-schedule reloads and remats.
+  ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF);
+
+  // Load from SS to the spare physical register.
+  TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC, TRI);
+  // This invalidates Phys.
+  Spills.ClobberPhysReg(PhysReg);
+  // Remember it's available.
+  Spills.addAvailable(SS, PhysReg);
+  MaybeDeadStores[SS] = NULL;
+
+  // Unfold current MI.
+  SmallVector<MachineInstr*, 4> NewMIs;
+  if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs))
+    llvm_unreachable("Unable unfold the load / store folding instruction!");
+  assert(NewMIs.size() == 1);
+  AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
+  VRM->transferRestorePts(&MI, NewMIs[0]);
+  MII = MBB->insert(MII, NewMIs[0]);
+  InvalidateKills(MI, TRI, RegKills, KillOps);
+  EraseInstr(&MI);
+  ++NumModRefUnfold;
+
+  // Unfold next instructions that fold the same SS.
+  do {
+    MachineInstr &NextMI = *NextMII;
+    NextMII = llvm::next(NextMII);
+    NewMIs.clear();
+    if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
+      llvm_unreachable("Unable unfold the load / store folding instruction!");
+    assert(NewMIs.size() == 1);
+    AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI);
+    VRM->transferRestorePts(&NextMI, NewMIs[0]);
+    MBB->insert(NextMII, NewMIs[0]);
+    InvalidateKills(NextMI, TRI, RegKills, KillOps);
+    EraseInstr(&NextMI);
+    ++NumModRefUnfold;
+    // Skip over dbg_value instructions.
+    while (NextMII != MBB->end() && NextMII->isDebugValue())
+      NextMII = llvm::next(NextMII);
+    if (NextMII == MBB->end())
+      break;
+  } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM));
+
+  // Store the value back into SS.
+  TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC, TRI);
+  MachineInstr *StoreMI = prior(NextMII);
+  VRM->addSpillSlotUse(SS, StoreMI);
+  VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+
+  return true;
+}
+
+/// OptimizeByUnfold - Turn a store folding instruction into a load folding
+/// instruction. e.g.
+///     xorl  %edi, %eax
+///     movl  %eax, -32(%ebp)
+///     movl  -36(%ebp), %eax
+///     orl   %eax, -32(%ebp)
+/// ==>
+///     xorl  %edi, %eax
+///     orl   -36(%ebp), %eax
+///     mov   %eax, -32(%ebp)
+/// This enables unfolding optimization for a subsequent instruction which will
+/// also eliminate the newly introduced store instruction.
+bool LocalRewriter::
+OptimizeByUnfold(MachineBasicBlock::iterator &MII,
+                 std::vector<MachineInstr*> &MaybeDeadStores,
+                 AvailableSpills &Spills,
+                 BitVector &RegKills,
+                 std::vector<MachineOperand*> &KillOps) {
+  MachineFunction &MF = *MBB->getParent();
+  MachineInstr &MI = *MII;
+  unsigned UnfoldedOpc = 0;
+  unsigned UnfoldPR = 0;
+  unsigned UnfoldVR = 0;
+  int FoldedSS = VirtRegMap::NO_STACK_SLOT;
+  VirtRegMap::MI2VirtMapTy::const_iterator I, End;
+  for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) {
+    // Only transform a MI that folds a single register.
+    if (UnfoldedOpc)
+      return false;
+    UnfoldVR = I->second.first;
+    VirtRegMap::ModRef MR = I->second.second;
+    // MI2VirtMap be can updated which invalidate the iterator.
+    // Increment the iterator first.
+    ++I;
+    if (VRM->isAssignedReg(UnfoldVR))
+      continue;
+    // If this reference is not a use, any previous store is now dead.
+    // Otherwise, the store to this stack slot is not dead anymore.
+    FoldedSS = VRM->getStackSlot(UnfoldVR);
+    MachineInstr* DeadStore = MaybeDeadStores[FoldedSS];
+    if (DeadStore && (MR & VirtRegMap::isModRef)) {
+      unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS);
+      if (!PhysReg || !DeadStore->readsRegister(PhysReg))
+        continue;
+      UnfoldPR = PhysReg;
+      UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
+                                                    false, true);
+    }
+  }
+
+  if (!UnfoldedOpc) {
+    if (!UnfoldVR)
+      return false;
+
+    // Look for other unfolding opportunities.
+    return OptimizeByUnfold2(UnfoldVR, FoldedSS, MII, MaybeDeadStores, Spills,
+                             RegKills, KillOps);
+  }
+
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse())
+      continue;
+    unsigned VirtReg = MO.getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg())
+      continue;
+    if (VRM->isAssignedReg(VirtReg)) {
+      unsigned PhysReg = VRM->getPhys(VirtReg);
+      if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR))
+        return false;
+    } else if (VRM->isReMaterialized(VirtReg))
+      continue;
+    int SS = VRM->getStackSlot(VirtReg);
+    unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+    if (PhysReg) {
+      if (TRI->regsOverlap(PhysReg, UnfoldPR))
+        return false;
+      continue;
+    }
+    if (VRM->hasPhys(VirtReg)) {
+      PhysReg = VRM->getPhys(VirtReg);
+      if (!TRI->regsOverlap(PhysReg, UnfoldPR))
+        continue;
+    }
+
+    // Ok, we'll need to reload the value into a register which makes
+    // it impossible to perform the store unfolding optimization later.
+    // Let's see if it is possible to fold the load if the store is
+    // unfolded. This allows us to perform the store unfolding
+    // optimization.
+    SmallVector<MachineInstr*, 4> NewMIs;
+    if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
+      assert(NewMIs.size() == 1);
+      MachineInstr *NewMI = NewMIs.back();
+      MBB->insert(MII, NewMI);
+      NewMIs.clear();
+      int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false);
+      assert(Idx != -1);
+      SmallVector<unsigned, 1> Ops;
+      Ops.push_back(Idx);
+      MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS);
+      NewMI->eraseFromParent();
+      if (FoldedMI) {
+        VRM->addSpillSlotUse(SS, FoldedMI);
+        if (!VRM->hasPhys(UnfoldVR))
+          VRM->assignVirt2Phys(UnfoldVR, UnfoldPR);
+        VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+        MII = FoldedMI;
+        InvalidateKills(MI, TRI, RegKills, KillOps);
+        EraseInstr(&MI);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+/// CommuteChangesDestination - We are looking for r0 = op r1, r2 and
+/// where SrcReg is r1 and it is tied to r0. Return true if after
+/// commuting this instruction it will be r0 = op r2, r1.
+static bool CommuteChangesDestination(MachineInstr *DefMI,
+                                      const TargetInstrDesc &TID,
+                                      unsigned SrcReg,
+                                      const TargetInstrInfo *TII,
+                                      unsigned &DstIdx) {
+  if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3)
+    return false;
+  if (!DefMI->getOperand(1).isReg() ||
+      DefMI->getOperand(1).getReg() != SrcReg)
+    return false;
+  unsigned DefIdx;
+  if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0)
+    return false;
+  unsigned SrcIdx1, SrcIdx2;
+  if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2))
+    return false;
+  if (SrcIdx1 == 1 && SrcIdx2 == 2) {
+    DstIdx = 2;
+    return true;
+  }
+  return false;
+}
+
+/// CommuteToFoldReload -
+/// Look for
+/// r1 = load fi#1
+/// r1 = op r1, r2<kill>
+/// store r1, fi#1
+///
+/// If op is commutable and r2 is killed, then we can xform these to
+/// r2 = op r2, fi#1
+/// store r2, fi#1
+bool LocalRewriter::
+CommuteToFoldReload(MachineBasicBlock::iterator &MII,
+                    unsigned VirtReg, unsigned SrcReg, int SS,
+                    AvailableSpills &Spills,
+                    BitVector &RegKills,
+                    std::vector<MachineOperand*> &KillOps,
+                    const TargetRegisterInfo *TRI) {
+  if (MII == MBB->begin() || !MII->killsRegister(SrcReg))
+    return false;
+
+  MachineInstr &MI = *MII;
+  MachineBasicBlock::iterator DefMII = prior(MII);
+  MachineInstr *DefMI = DefMII;
+  const TargetInstrDesc &TID = DefMI->getDesc();
+  unsigned NewDstIdx;
+  if (DefMII != MBB->begin() &&
+      TID.isCommutable() &&
+      CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) {
+    MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+    unsigned NewReg = NewDstMO.getReg();
+    if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
+      return false;
+    MachineInstr *ReloadMI = prior(DefMII);
+    int FrameIdx;
+    unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx);
+    if (DestReg != SrcReg || FrameIdx != SS)
+      return false;
+    int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false);
+    if (UseIdx == -1)
+      return false;
+    unsigned DefIdx;
+    if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx))
+      return false;
+    assert(DefMI->getOperand(DefIdx).isReg() &&
+           DefMI->getOperand(DefIdx).getReg() == SrcReg);
+
+    // Now commute def instruction.
+    MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true);
+    if (!CommutedMI)
+      return false;
+    MBB->insert(MII, CommutedMI);
+    SmallVector<unsigned, 1> Ops;
+    Ops.push_back(NewDstIdx);
+    MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS);
+    // Not needed since foldMemoryOperand returns new MI.
+    CommutedMI->eraseFromParent();
+    if (!FoldedMI)
+      return false;
+
+    VRM->addSpillSlotUse(SS, FoldedMI);
+    VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
+    // Insert new def MI and spill MI.
+    const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+    TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC, TRI);
+    MII = prior(MII);
+    MachineInstr *StoreMI = MII;
+    VRM->addSpillSlotUse(SS, StoreMI);
+    VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+    MII = FoldedMI;  // Update MII to backtrack.
+
+    // Delete all 3 old instructions.
+    InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
+    EraseInstr(ReloadMI);
+    InvalidateKills(*DefMI, TRI, RegKills, KillOps);
+    EraseInstr(DefMI);
+    InvalidateKills(MI, TRI, RegKills, KillOps);
+    EraseInstr(&MI);
+
+    // If NewReg was previously holding value of some SS, it's now clobbered.
+    // This has to be done now because it's a physical register. When this
+    // instruction is re-visited, it's ignored.
+    Spills.ClobberPhysReg(NewReg);
+
+    ++NumCommutes;
+    return true;
+  }
+
+  return false;
+}
+
+/// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if
+/// the last store to the same slot is now dead. If so, remove the last store.
+void LocalRewriter::
+SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
+                    int Idx, unsigned PhysReg, int StackSlot,
+                    const TargetRegisterClass *RC,
+                    bool isAvailable, MachineInstr *&LastStore,
+                    AvailableSpills &Spills,
+                    SmallSet<MachineInstr*, 4> &ReMatDefs,
+                    BitVector &RegKills,
+                    std::vector<MachineOperand*> &KillOps) {
+
+  MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
+  TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC,
+                           TRI);
+  MachineInstr *StoreMI = prior(oldNextMII);
+  VRM->addSpillSlotUse(StackSlot, StoreMI);
+  DEBUG(dbgs() << "Store:\t" << *StoreMI);
+
+  // If there is a dead store to this stack slot, nuke it now.
+  if (LastStore) {
+    DEBUG(dbgs() << "Removed dead store:\t" << *LastStore);
+    ++NumDSE;
+    SmallVector<unsigned, 2> KillRegs;
+    InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs);
+    MachineBasicBlock::iterator PrevMII = LastStore;
+    bool CheckDef = PrevMII != MBB->begin();
+    if (CheckDef)
+      --PrevMII;
+    EraseInstr(LastStore);
+    if (CheckDef) {
+      // Look at defs of killed registers on the store. Mark the defs
+      // as dead since the store has been deleted and they aren't
+      // being reused.
+      for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
+        bool HasOtherDef = false;
+        if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) {
+          MachineInstr *DeadDef = PrevMII;
+          if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
+            // FIXME: This assumes a remat def does not have side effects.
+            EraseInstr(DeadDef);
+            ++NumDRM;
+          }
+        }
+      }
+    }
+  }
+
+  // Allow for multi-instruction spill sequences, as on PPC Altivec.  Presume
+  // the last of multiple instructions is the actual store.
+  LastStore = prior(oldNextMII);
+
+  // If the stack slot value was previously available in some other
+  // register, change it now.  Otherwise, make the register available,
+  // in PhysReg.
+  Spills.ModifyStackSlotOrReMat(StackSlot);
+  Spills.ClobberPhysReg(PhysReg);
+  Spills.addAvailable(StackSlot, PhysReg, isAvailable);
+  ++NumStores;
+}
+
+/// isSafeToDelete - Return true if this instruction doesn't produce any side
+/// effect and all of its defs are dead.
+static bool isSafeToDelete(MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+  if (TID.mayLoad() || TID.mayStore() || TID.isTerminator() ||
+      TID.isCall() || TID.isBarrier() || TID.isReturn() ||
+      MI.isLabel() || MI.isDebugValue() ||
+      MI.hasUnmodeledSideEffects())
+    return false;
+
+  // Technically speaking inline asm without side effects and no defs can still
+  // be deleted. But there is so much bad inline asm code out there, we should
+  // let them be.
+  if (MI.isInlineAsm())
+    return false;
+
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || !MO.getReg())
+      continue;
+    if (MO.isDef() && !MO.isDead())
+      return false;
+    if (MO.isUse() && MO.isKill())
+      // FIXME: We can't remove kill markers or else the scavenger will assert.
+      // An alternative is to add a ADD pseudo instruction to replace kill
+      // markers.
+      return false;
+  }
+  return true;
+}
+
+/// TransferDeadness - A identity copy definition is dead and it's being
+/// removed. Find the last def or use and mark it as dead / kill.
+void LocalRewriter::
+TransferDeadness(unsigned Reg, BitVector &RegKills,
+                 std::vector<MachineOperand*> &KillOps) {
+  SmallPtrSet<MachineInstr*, 4> Seens;
+  SmallVector<std::pair<MachineInstr*, int>,8> Refs;
+  for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
+         RE = MRI->reg_end(); RI != RE; ++RI) {
+    MachineInstr *UDMI = &*RI;
+    if (UDMI->isDebugValue() || UDMI->getParent() != MBB)
+      continue;
+    DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
+    if (DI == DistanceMap.end())
+      continue;
+    if (Seens.insert(UDMI))
+      Refs.push_back(std::make_pair(UDMI, DI->second));
+  }
+
+  if (Refs.empty())
+    return;
+  std::sort(Refs.begin(), Refs.end(), RefSorter());
+
+  while (!Refs.empty()) {
+    MachineInstr *LastUDMI = Refs.back().first;
+    Refs.pop_back();
+
+    MachineOperand *LastUD = NULL;
+    for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = LastUDMI->getOperand(i);
+      if (!MO.isReg() || MO.getReg() != Reg)
+        continue;
+      if (!LastUD || (LastUD->isUse() && MO.isDef()))
+        LastUD = &MO;
+      if (LastUDMI->isRegTiedToDefOperand(i))
+        break;
+    }
+    if (LastUD->isDef()) {
+      // If the instruction has no side effect, delete it and propagate
+      // backward further. Otherwise, mark is dead and we are done.
+      if (!isSafeToDelete(*LastUDMI)) {
+        LastUD->setIsDead();
+        break;
+      }
+      EraseInstr(LastUDMI);
+    } else {
+      LastUD->setIsKill();
+      RegKills.set(Reg);
+      KillOps[Reg] = LastUD;
+      break;
+    }
+  }
+}
+
+/// InsertEmergencySpills - Insert emergency spills before MI if requested by
+/// VRM. Return true if spills were inserted.
+bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) {
+  if (!VRM->hasEmergencySpills(MI))
+    return false;
+  MachineBasicBlock::iterator MII = MI;
+  SmallSet<int, 4> UsedSS;
+  std::vector<unsigned> &EmSpills = VRM->getEmergencySpills(MI);
+  for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) {
+    unsigned PhysReg = EmSpills[i];
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg);
+    assert(RC && "Unable to determine register class!");
+    int SS = VRM->getEmergencySpillSlot(RC);
+    if (UsedSS.count(SS))
+      llvm_unreachable("Need to spill more than one physical registers!");
+    UsedSS.insert(SS);
+    TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC, TRI);
+    MachineInstr *StoreMI = prior(MII);
+    VRM->addSpillSlotUse(SS, StoreMI);
+
+    // Back-schedule reloads and remats.
+    MachineBasicBlock::iterator InsertLoc =
+      ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS,
+                       TII, *MBB->getParent());
+
+    TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC, TRI);
+
+    MachineInstr *LoadMI = prior(InsertLoc);
+    VRM->addSpillSlotUse(SS, LoadMI);
+    ++NumPSpills;
+    DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+  }
+  return true;
+}
+
+/// InsertRestores - Restore registers before MI is requested by VRM. Return
+/// true is any instructions were inserted.
+bool LocalRewriter::InsertRestores(MachineInstr *MI,
+                                   AvailableSpills &Spills,
+                                   BitVector &RegKills,
+                                   std::vector<MachineOperand*> &KillOps) {
+  if (!VRM->isRestorePt(MI))
+    return false;
+  MachineBasicBlock::iterator MII = MI;
+  std::vector<unsigned> &RestoreRegs = VRM->getRestorePtRestores(MI);
+  for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) {
+    unsigned VirtReg = RestoreRegs[e-i-1];  // Reverse order.
+    if (!VRM->getPreSplitReg(VirtReg))
+      continue; // Split interval spilled again.
+    unsigned Phys = VRM->getPhys(VirtReg);
+    MRI->setPhysRegUsed(Phys);
+
+    // Check if the value being restored if available. If so, it must be
+    // from a predecessor BB that fallthrough into this BB. We do not
+    // expect:
+    // BB1:
+    // r1 = load fi#1
+    // ...
+    //    = r1<kill>
+    // ... # r1 not clobbered
+    // ...
+    //    = load fi#1
+    bool DoReMat = VRM->isReMaterialized(VirtReg);
+    int SSorRMId = DoReMat
+      ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
+    unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+    if (InReg == Phys) {
+      // If the value is already available in the expected register, save
+      // a reload / remat.
+      if (SSorRMId)
+        DEBUG(dbgs() << "Reusing RM#"
+                     << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
+      else
+        DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
+      DEBUG(dbgs() << " from physreg "
+                   << TRI->getName(InReg) << " for vreg"
+                   << VirtReg <<" instead of reloading into physreg "
+                   << TRI->getName(Phys) << '\n');
+
+      // Reusing a physreg may resurrect it. But we expect ProcessUses to update
+      // the kill flags for the current instruction after processing it.
+
+      ++NumOmitted;
+      continue;
+    } else if (InReg && InReg != Phys) {
+      if (SSorRMId)
+        DEBUG(dbgs() << "Reusing RM#"
+                     << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1);
+      else
+        DEBUG(dbgs() << "Reusing SS#" << SSorRMId);
+      DEBUG(dbgs() << " from physreg "
+                   << TRI->getName(InReg) << " for vreg"
+                   << VirtReg <<" by copying it into physreg "
+                   << TRI->getName(Phys) << '\n');
+
+      // If the reloaded / remat value is available in another register,
+      // copy it to the desired register.
+
+      // Back-schedule reloads and remats.
+      MachineBasicBlock::iterator InsertLoc =
+        ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
+                         *MBB->getParent());
+      MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(),
+                                     TII->get(TargetOpcode::COPY), Phys)
+                               .addReg(InReg, RegState::Kill);
+
+      // This invalidates Phys.
+      Spills.ClobberPhysReg(Phys);
+      // Remember it's available.
+      Spills.addAvailable(SSorRMId, Phys);
+
+      CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+      UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+      DEBUG(dbgs() << '\t' << *CopyMI);
+      ++NumCopified;
+      continue;
+    }
+
+    // Back-schedule reloads and remats.
+    MachineBasicBlock::iterator InsertLoc =
+      ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
+                       *MBB->getParent());
+
+    if (VRM->isReMaterialized(VirtReg)) {
+      ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM);
+    } else {
+      const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+      TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC, TRI);
+      MachineInstr *LoadMI = prior(InsertLoc);
+      VRM->addSpillSlotUse(SSorRMId, LoadMI);
+      ++NumLoads;
+      DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+    }
+
+    // This invalidates Phys.
+    Spills.ClobberPhysReg(Phys);
+    // Remember it's available.
+    Spills.addAvailable(SSorRMId, Phys);
+
+    UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+    DEBUG(dbgs() << '\t' << *prior(MII));
+  }
+  return true;
+}
+
+/// InsertSpills - Insert spills after MI if requested by VRM. Return
+/// true if spills were inserted.
+bool LocalRewriter::InsertSpills(MachineInstr *MI) {
+  if (!VRM->isSpillPt(MI))
+    return false;
+  MachineBasicBlock::iterator MII = MI;
+  std::vector<std::pair<unsigned,bool> > &SpillRegs =
+    VRM->getSpillPtSpills(MI);
+  for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) {
+    unsigned VirtReg = SpillRegs[i].first;
+    bool isKill = SpillRegs[i].second;
+    if (!VRM->getPreSplitReg(VirtReg))
+      continue; // Split interval spilled again.
+    const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+    unsigned Phys = VRM->getPhys(VirtReg);
+    int StackSlot = VRM->getStackSlot(VirtReg);
+    MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
+    TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot,
+                             RC, TRI);
+    MachineInstr *StoreMI = prior(oldNextMII);
+    VRM->addSpillSlotUse(StackSlot, StoreMI);
+    DEBUG(dbgs() << "Store:\t" << *StoreMI);
+    VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
+  }
+  return true;
+}
+
+
+/// ProcessUses - Process all of MI's spilled operands and all available
+/// operands.
+void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
+                                std::vector<MachineInstr*> &MaybeDeadStores,
+                                BitVector &RegKills,
+                                ReuseInfo &ReusedOperands,
+                                std::vector<MachineOperand*> &KillOps) {
+  // Clear kill info.
+  SmallSet<unsigned, 2> KilledMIRegs;
+  SmallVector<unsigned, 4> VirtUseOps;
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.getReg() == 0)
+      continue;   // Ignore non-register operands.
+
+    unsigned VirtReg = MO.getReg();
+
+    if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
+      // Ignore physregs for spilling, but remember that it is used by this
+      // function.
+      MRI->setPhysRegUsed(VirtReg);
+      continue;
+    }
+
+    // We want to process implicit virtual register uses first.
+    if (MO.isImplicit())
+      // If the virtual register is implicitly defined, emit a implicit_def
+      // before so scavenger knows it's "defined".
+      // FIXME: This is a horrible hack done the by register allocator to
+      // remat a definition with virtual register operand.
+      VirtUseOps.insert(VirtUseOps.begin(), i);
+    else
+      VirtUseOps.push_back(i);
+
+    // A partial def causes problems because the same operand both reads and
+    // writes the register. This rewriter is designed to rewrite uses and defs
+    // separately, so a partial def would already have been rewritten to a
+    // physreg by the time we get to processing defs.
+    // Add an implicit use operand to model the partial def.
+    if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) &&
+        MI.findRegisterUseOperandIdx(VirtReg) == -1) {
+      VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands());
+      MI.addOperand(MachineOperand::CreateReg(VirtReg,
+                                              false,  // isDef
+                                              true)); // isImplicit
+      DEBUG(dbgs() << "Partial redef: " << MI);
+    }
+  }
+
+  // Process all of the spilled uses and all non spilled reg references.
+  SmallVector<int, 2> PotentialDeadStoreSlots;
+  KilledMIRegs.clear();
+  for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
+    unsigned i = VirtUseOps[j];
+    unsigned VirtReg = MI.getOperand(i).getReg();
+    assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+           "Not a virtual register?");
+
+    unsigned SubIdx = MI.getOperand(i).getSubReg();
+    if (VRM->isAssignedReg(VirtReg)) {
+      // This virtual register was assigned a physreg!
+      unsigned Phys = VRM->getPhys(VirtReg);
+      MRI->setPhysRegUsed(Phys);
+      if (MI.getOperand(i).isDef())
+        ReusedOperands.markClobbered(Phys);
+      substitutePhysReg(MI.getOperand(i), Phys, *TRI);
+      if (VRM->isImplicitlyDefined(VirtReg))
+        // FIXME: Is this needed?
+        BuildMI(*MBB, &MI, MI.getDebugLoc(),
+                TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
+      continue;
+    }
+
+    // This virtual register is now known to be a spilled value.
+    if (!MI.getOperand(i).isUse())
+      continue;  // Handle defs in the loop below (handle use&def here though)
+
+    bool AvoidReload = MI.getOperand(i).isUndef();
+    // Check if it is defined by an implicit def. It should not be spilled.
+    // Note, this is for correctness reason. e.g.
+    // 8   %reg1024<def> = IMPLICIT_DEF
+    // 12  %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+    // The live range [12, 14) are not part of the r1024 live interval since
+    // it's defined by an implicit def. It will not conflicts with live
+    // interval of r1025. Now suppose both registers are spilled, you can
+    // easily see a situation where both registers are reloaded before
+    // the INSERT_SUBREG and both target registers that would overlap.
+    bool DoReMat = VRM->isReMaterialized(VirtReg);
+    int SSorRMId = DoReMat
+      ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
+    int ReuseSlot = SSorRMId;
+
+    // Check to see if this stack slot is available.
+    unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+
+    // If this is a sub-register use, make sure the reuse register is in the
+    // right register class. For example, for x86 not all of the 32-bit
+    // registers have accessible sub-registers.
+    // Similarly so for EXTRACT_SUBREG. Consider this:
+    // EDI = op
+    // MOV32_mr fi#1, EDI
+    // ...
+    //       = EXTRACT_SUBREG fi#1
+    // fi#1 is available in EDI, but it cannot be reused because it's not in
+    // the right register file.
+    if (PhysReg && !AvoidReload && SubIdx) {
+      const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+      if (!RC->contains(PhysReg))
+        PhysReg = 0;
+    }
+
+    if (PhysReg && !AvoidReload) {
+      // This spilled operand might be part of a two-address operand.  If this
+      // is the case, then changing it will necessarily require changing the
+      // def part of the instruction as well.  However, in some cases, we
+      // aren't allowed to modify the reused register.  If none of these cases
+      // apply, reuse it.
+      bool CanReuse = true;
+      bool isTied = MI.isRegTiedToDefOperand(i);
+      if (isTied) {
+        // Okay, we have a two address operand.  We can reuse this physreg as
+        // long as we are allowed to clobber the value and there isn't an
+        // earlier def that has already clobbered the physreg.
+        CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
+          Spills.canClobberPhysReg(PhysReg);
+      }
+      // If this is an asm, and a PhysReg alias is used elsewhere as an
+      // earlyclobber operand, we can't also use it as an input.
+      if (MI.isInlineAsm()) {
+        for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
+          MachineOperand &MOk = MI.getOperand(k);
+          if (MOk.isReg() && MOk.isEarlyClobber() &&
+              TRI->regsOverlap(MOk.getReg(), PhysReg)) {
+            CanReuse = false;
+            DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
+                         << " for vreg" << VirtReg << ": " << MOk << '\n');
+            break;
+          }
+        }
+      }
+
+      if (CanReuse) {
+        // If this stack slot value is already available, reuse it!
+        if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+          DEBUG(dbgs() << "Reusing RM#"
+                << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+        else
+          DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+        DEBUG(dbgs() << " from physreg "
+              << TRI->getName(PhysReg) << " for vreg"
+              << VirtReg <<" instead of reloading into physreg "
+              << TRI->getName(VRM->getPhys(VirtReg)) << '\n');
+        unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+        MI.getOperand(i).setReg(RReg);
+        MI.getOperand(i).setSubReg(0);
+
+        // Reusing a physreg may resurrect it. But we expect ProcessUses to
+        // update the kill flags for the current instr after processing it.
+
+        // The only technical detail we have is that we don't know that
+        // PhysReg won't be clobbered by a reloaded stack slot that occurs
+        // later in the instruction.  In particular, consider 'op V1, V2'.
+        // If V1 is available in physreg R0, we would choose to reuse it
+        // here, instead of reloading it into the register the allocator
+        // indicated (say R1).  However, V2 might have to be reloaded
+        // later, and it might indicate that it needs to live in R0.  When
+        // this occurs, we need to have information available that
+        // indicates it is safe to use R1 for the reload instead of R0.
+        //
+        // To further complicate matters, we might conflict with an alias,
+        // or R0 and R1 might not be compatible with each other.  In this
+        // case, we actually insert a reload for V1 in R1, ensuring that
+        // we can get at R0 or its alias.
+        ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
+                                VRM->getPhys(VirtReg), VirtReg);
+        if (isTied)
+          // Only mark it clobbered if this is a use&def operand.
+          ReusedOperands.markClobbered(PhysReg);
+        ++NumReused;
+
+        if (MI.getOperand(i).isKill() &&
+            ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
+
+          // The store of this spilled value is potentially dead, but we
+          // won't know for certain until we've confirmed that the re-use
+          // above is valid, which means waiting until the other operands
+          // are processed. For now we just track the spill slot, we'll
+          // remove it after the other operands are processed if valid.
+
+          PotentialDeadStoreSlots.push_back(ReuseSlot);
+        }
+
+        // Mark is isKill if it's there no other uses of the same virtual
+        // register and it's not a two-address operand. IsKill will be
+        // unset if reg is reused.
+        if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
+          MI.getOperand(i).setIsKill();
+          KilledMIRegs.insert(VirtReg);
+        }
+        continue;
+      }  // CanReuse
+
+      // Otherwise we have a situation where we have a two-address instruction
+      // whose mod/ref operand needs to be reloaded.  This reload is already
+      // available in some register "PhysReg", but if we used PhysReg as the
+      // operand to our 2-addr instruction, the instruction would modify
+      // PhysReg.  This isn't cool if something later uses PhysReg and expects
+      // to get its initial value.
+      //
+      // To avoid this problem, and to avoid doing a load right after a store,
+      // we emit a copy from PhysReg into the designated register for this
+      // operand.
+      //
+      // This case also applies to an earlyclobber'd PhysReg.
+      unsigned DesignatedReg = VRM->getPhys(VirtReg);
+      assert(DesignatedReg && "Must map virtreg to physreg!");
+
+      // Note that, if we reused a register for a previous operand, the
+      // register we want to reload into might not actually be
+      // available.  If this occurs, use the register indicated by the
+      // reuser.
+      if (ReusedOperands.hasReuses())
+        DesignatedReg = ReusedOperands.
+          GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
+                          MaybeDeadStores, RegKills, KillOps, *VRM);
+
+      // If the mapped designated register is actually the physreg we have
+      // incoming, we don't need to inserted a dead copy.
+      if (DesignatedReg == PhysReg) {
+        // If this stack slot value is already available, reuse it!
+        if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+          DEBUG(dbgs() << "Reusing RM#"
+                << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+        else
+          DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+        DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
+              << " for vreg" << VirtReg
+              << " instead of reloading into same physreg.\n");
+        unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+        MI.getOperand(i).setReg(RReg);
+        MI.getOperand(i).setSubReg(0);
+        ReusedOperands.markClobbered(RReg);
+        ++NumReused;
+        continue;
+      }
+
+      MRI->setPhysRegUsed(DesignatedReg);
+      ReusedOperands.markClobbered(DesignatedReg);
+
+      // Back-schedule reloads and remats.
+      MachineBasicBlock::iterator InsertLoc =
+        ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
+                         SSorRMId, TII, *MBB->getParent());
+      MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
+                                     TII->get(TargetOpcode::COPY),
+                                     DesignatedReg).addReg(PhysReg);
+      CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+      UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+      // This invalidates DesignatedReg.
+      Spills.ClobberPhysReg(DesignatedReg);
+
+      Spills.addAvailable(ReuseSlot, DesignatedReg);
+      unsigned RReg =
+        SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
+      MI.getOperand(i).setReg(RReg);
+      MI.getOperand(i).setSubReg(0);
+      DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+      ++NumReused;
+      continue;
+    } // if (PhysReg)
+
+    // Otherwise, reload it and remember that we have it.
+    PhysReg = VRM->getPhys(VirtReg);
+    assert(PhysReg && "Must map virtreg to physreg!");
+
+    // Note that, if we reused a register for a previous operand, the
+    // register we want to reload into might not actually be
+    // available.  If this occurs, use the register indicated by the
+    // reuser.
+    if (ReusedOperands.hasReuses())
+      PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+                  Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
+
+    MRI->setPhysRegUsed(PhysReg);
+    ReusedOperands.markClobbered(PhysReg);
+    if (AvoidReload)
+      ++NumAvoided;
+    else {
+      // Back-schedule reloads and remats.
+      MachineBasicBlock::iterator InsertLoc =
+        ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat,
+                         SSorRMId, TII, *MBB->getParent());
+
+      if (DoReMat) {
+        ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
+      } else {
+        const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+        TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
+        MachineInstr *LoadMI = prior(InsertLoc);
+        VRM->addSpillSlotUse(SSorRMId, LoadMI);
+        ++NumLoads;
+        DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+      }
+      // This invalidates PhysReg.
+      Spills.ClobberPhysReg(PhysReg);
+
+      // Any stores to this stack slot are not dead anymore.
+      if (!DoReMat)
+        MaybeDeadStores[SSorRMId] = NULL;
+      Spills.addAvailable(SSorRMId, PhysReg);
+      // Assumes this is the last use. IsKill will be unset if reg is reused
+      // unless it's a two-address operand.
+      if (!MI.isRegTiedToDefOperand(i) &&
+          KilledMIRegs.count(VirtReg) == 0) {
+        MI.getOperand(i).setIsKill();
+        KilledMIRegs.insert(VirtReg);
+      }
+
+      UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+      DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+    }
+    unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+    MI.getOperand(i).setReg(RReg);
+    MI.getOperand(i).setSubReg(0);
+  }
+
+  // Ok - now we can remove stores that have been confirmed dead.
+  for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
+    // This was the last use and the spilled value is still available
+    // for reuse. That means the spill was unnecessary!
+    int PDSSlot = PotentialDeadStoreSlots[j];
+    MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
+    if (DeadStore) {
+      DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+      InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+      EraseInstr(DeadStore);
+      MaybeDeadStores[PDSSlot] = NULL;
+      ++NumDSE;
+    }
+  }
+}
+
+/// rewriteMBB - Keep track of which spills are available even after the
+/// register allocator is done with them.  If possible, avoid reloading vregs.
+void
+LocalRewriter::RewriteMBB(LiveIntervals *LIs,
+                          AvailableSpills &Spills, BitVector &RegKills,
+                          std::vector<MachineOperand*> &KillOps) {
+
+  DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '"
+               << MBB->getName() << "':\n");
+
+  MachineFunction &MF = *MBB->getParent();
+
+  // MaybeDeadStores - When we need to write a value back into a stack slot,
+  // keep track of the inserted store.  If the stack slot value is never read
+  // (because the value was used from some available register, for example), and
+  // subsequently stored to, the original store is dead.  This map keeps track
+  // of inserted stores that are not used.  If we see a subsequent store to the
+  // same stack slot, the original store is deleted.
+  std::vector<MachineInstr*> MaybeDeadStores;
+  MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL);
+
+  // ReMatDefs - These are rematerializable def MIs which are not deleted.
+  SmallSet<MachineInstr*, 4> ReMatDefs;
+
+  // Keep track of the registers we have already spilled in case there are
+  // multiple defs of the same register in MI.
+  SmallSet<unsigned, 8> SpilledMIRegs;
+
+  RegKills.reset();
+  KillOps.clear();
+  KillOps.resize(TRI->getNumRegs(), NULL);
+
+  DistanceMap.clear();
+  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+       MII != E; ) {
+    MachineBasicBlock::iterator NextMII = llvm::next(MII);
+
+    if (OptimizeByUnfold(MII, MaybeDeadStores, Spills, RegKills, KillOps))
+      NextMII = llvm::next(MII);
+
+    if (InsertEmergencySpills(MII))
+      NextMII = llvm::next(MII);
+
+    InsertRestores(MII, Spills, RegKills, KillOps);
+
+    if (InsertSpills(MII))
+      NextMII = llvm::next(MII);
+
+    bool Erased = false;
+    bool BackTracked = false;
+    MachineInstr &MI = *MII;
+
+    // Remember DbgValue's which reference stack slots.
+    if (MI.isDebugValue() && MI.getOperand(0).isFI())
+      Slot2DbgValues[MI.getOperand(0).getIndex()].push_back(&MI);
+
+    /// ReusedOperands - Keep track of operand reuse in case we need to undo
+    /// reuse.
+    ReuseInfo ReusedOperands(MI, TRI);
+
+    ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps);
+
+    DEBUG(dbgs() << '\t' << MI);
+
+
+    // If we have folded references to memory operands, make sure we clear all
+    // physical registers that may contain the value of the spilled virtual
+    // register
+
+    // Copy the folded virts to a small vector, we may change MI2VirtMap.
+    SmallVector<std::pair<unsigned, VirtRegMap::ModRef>, 4> FoldedVirts;
+    // C++0x FTW!
+    for (std::pair<VirtRegMap::MI2VirtMapTy::const_iterator,
+                   VirtRegMap::MI2VirtMapTy::const_iterator> FVRange =
+           VRM->getFoldedVirts(&MI);
+         FVRange.first != FVRange.second; ++FVRange.first)
+      FoldedVirts.push_back(FVRange.first->second);
+
+    SmallSet<int, 2> FoldedSS;
+    for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) {
+      unsigned VirtReg = FoldedVirts[FVI].first;
+      VirtRegMap::ModRef MR = FoldedVirts[FVI].second;
+      DEBUG(dbgs() << "Folded vreg: " << VirtReg << "  MR: " << MR);
+
+      int SS = VRM->getStackSlot(VirtReg);
+      if (SS == VirtRegMap::NO_STACK_SLOT)
+        continue;
+      FoldedSS.insert(SS);
+      DEBUG(dbgs() << " - StackSlot: " << SS << "\n");
+
+      // If this folded instruction is just a use, check to see if it's a
+      // straight load from the virt reg slot.
+      if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) {
+        int FrameIdx;
+        unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx);
+        if (DestReg && FrameIdx == SS) {
+          // If this spill slot is available, turn it into a copy (or nothing)
+          // instead of leaving it as a load!
+          if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
+            DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
+            if (DestReg != InReg) {
+              MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
+              MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(),
+                                             TII->get(TargetOpcode::COPY))
+                .addReg(DestReg, RegState::Define, DefMO->getSubReg())
+                .addReg(InReg, RegState::Kill);
+              // Revisit the copy so we make sure to notice the effects of the
+              // operation on the destreg (either needing to RA it if it's
+              // virtual or needing to clobber any values if it's physical).
+              NextMII = CopyMI;
+              NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+              BackTracked = true;
+            } else {
+              DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+              // InvalidateKills resurrects any prior kill of the copy's source
+              // allowing the source reg to be reused in place of the copy.
+              Spills.disallowClobberPhysReg(InReg);
+            }
+
+            InvalidateKills(MI, TRI, RegKills, KillOps);
+            EraseInstr(&MI);
+            Erased = true;
+            goto ProcessNextInst;
+          }
+        } else {
+          unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+          SmallVector<MachineInstr*, 4> NewMIs;
+          if (PhysReg &&
+              TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){
+            MBB->insert(MII, NewMIs[0]);
+            InvalidateKills(MI, TRI, RegKills, KillOps);
+            EraseInstr(&MI);
+            Erased = true;
+            --NextMII;  // backtrack to the unfolded instruction.
+            BackTracked = true;
+            goto ProcessNextInst;
+          }
+        }
+      }
+
+      // If this reference is not a use, any previous store is now dead.
+      // Otherwise, the store to this stack slot is not dead anymore.
+      MachineInstr* DeadStore = MaybeDeadStores[SS];
+      if (DeadStore) {
+        bool isDead = !(MR & VirtRegMap::isRef);
+        MachineInstr *NewStore = NULL;
+        if (MR & VirtRegMap::isModRef) {
+          unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
+          SmallVector<MachineInstr*, 4> NewMIs;
+          // We can reuse this physreg as long as we are allowed to clobber
+          // the value and there isn't an earlier def that has already clobbered
+          // the physreg.
+          if (PhysReg &&
+              !ReusedOperands.isClobbered(PhysReg) &&
+              Spills.canClobberPhysReg(PhysReg) &&
+              !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable!
+            MachineOperand *KillOpnd =
+              DeadStore->findRegisterUseOperand(PhysReg, true);
+            // Note, if the store is storing a sub-register, it's possible the
+            // super-register is needed below.
+            if (KillOpnd && !KillOpnd->getSubReg() &&
+                TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){
+              MBB->insert(MII, NewMIs[0]);
+              NewStore = NewMIs[1];
+              MBB->insert(MII, NewStore);
+              VRM->addSpillSlotUse(SS, NewStore);
+              InvalidateKills(MI, TRI, RegKills, KillOps);
+              EraseInstr(&MI);
+              Erased = true;
+              --NextMII;
+              --NextMII;  // backtrack to the unfolded instruction.
+              BackTracked = true;
+              isDead = true;
+              ++NumSUnfold;
+            }
+          }
+        }
+
+        if (isDead) {  // Previous store is dead.
+          // If we get here, the store is dead, nuke it now.
+          DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+          InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+          EraseInstr(DeadStore);
+          if (!NewStore)
+            ++NumDSE;
+        }
+
+        MaybeDeadStores[SS] = NULL;
+        if (NewStore) {
+          // Treat this store as a spill merged into a copy. That makes the
+          // stack slot value available.
+          VRM->virtFolded(VirtReg, NewStore, VirtRegMap::isMod);
+          goto ProcessNextInst;
+        }
+      }
+
+      // If the spill slot value is available, and this is a new definition of
+      // the value, the value is not available anymore.
+      if (MR & VirtRegMap::isMod) {
+        // Notice that the value in this stack slot has been modified.
+        Spills.ModifyStackSlotOrReMat(SS);
+
+        // If this is *just* a mod of the value, check to see if this is just a
+        // store to the spill slot (i.e. the spill got merged into the copy). If
+        // so, realize that the vreg is available now, and add the store to the
+        // MaybeDeadStore info.
+        int StackSlot;
+        if (!(MR & VirtRegMap::isRef)) {
+          if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) {
+            assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+                   "Src hasn't been allocated yet?");
+
+            if (CommuteToFoldReload(MII, VirtReg, SrcReg, StackSlot,
+                                    Spills, RegKills, KillOps, TRI)) {
+              NextMII = llvm::next(MII);
+              BackTracked = true;
+              goto ProcessNextInst;
+            }
+
+            // Okay, this is certainly a store of SrcReg to [StackSlot].  Mark
+            // this as a potentially dead store in case there is a subsequent
+            // store into the stack slot without a read from it.
+            MaybeDeadStores[StackSlot] = &MI;
+
+            // If the stack slot value was previously available in some other
+            // register, change it now.  Otherwise, make the register
+            // available in PhysReg.
+            Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg));
+          }
+        }
+      }
+    }
+
+    // Process all of the spilled defs.
+    SpilledMIRegs.clear();
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI.getOperand(i);
+      if (!(MO.isReg() && MO.getReg() && MO.isDef()))
+        continue;
+
+      unsigned VirtReg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) {
+        // Check to see if this is a noop copy.  If so, eliminate the
+        // instruction before considering the dest reg to be changed.
+        // Also check if it's copying from an "undef", if so, we can't
+        // eliminate this or else the undef marker is lost and it will
+        // confuses the scavenger. This is extremely rare.
+        if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() &&
+            MI.getNumOperands() == 2) {
+          ++NumDCE;
+          DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+          SmallVector<unsigned, 2> KillRegs;
+          InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
+          if (MO.isDead() && !KillRegs.empty()) {
+            // Source register or an implicit super/sub-register use is killed.
+            assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg()));
+            // Last def is now dead.
+            TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps);
+          }
+          EraseInstr(&MI);
+          Erased = true;
+          Spills.disallowClobberPhysReg(VirtReg);
+          goto ProcessNextInst;
+        }
+
+        // If it's not a no-op copy, it clobbers the value in the destreg.
+        Spills.ClobberPhysReg(VirtReg);
+        ReusedOperands.markClobbered(VirtReg);
+
+        // Check to see if this instruction is a load from a stack slot into
+        // a register.  If so, this provides the stack slot value in the reg.
+        int FrameIdx;
+        if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
+          assert(DestReg == VirtReg && "Unknown load situation!");
+
+          // If it is a folded reference, then it's not safe to clobber.
+          bool Folded = FoldedSS.count(FrameIdx);
+          // Otherwise, if it wasn't available, remember that it is now!
+          Spills.addAvailable(FrameIdx, DestReg, !Folded);
+          goto ProcessNextInst;
+        }
+
+        continue;
+      }
+
+      unsigned SubIdx = MO.getSubReg();
+      bool DoReMat = VRM->isReMaterialized(VirtReg);
+      if (DoReMat)
+        ReMatDefs.insert(&MI);
+
+      // The only vregs left are stack slot definitions.
+      int StackSlot = VRM->getStackSlot(VirtReg);
+      const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+
+      // If this def is part of a two-address operand, make sure to execute
+      // the store from the correct physical register.
+      unsigned PhysReg;
+      unsigned TiedOp;
+      if (MI.isRegTiedToUseOperand(i, &TiedOp)) {
+        PhysReg = MI.getOperand(TiedOp).getReg();
+        if (SubIdx) {
+          unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI);
+          assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg &&
+                 "Can't find corresponding super-register!");
+          PhysReg = SuperReg;
+        }
+      } else {
+        PhysReg = VRM->getPhys(VirtReg);
+        if (ReusedOperands.isClobbered(PhysReg)) {
+          // Another def has taken the assigned physreg. It must have been a
+          // use&def which got it due to reuse. Undo the reuse!
+          PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+                      Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
+        }
+      }
+
+      assert(PhysReg && "VR not assigned a physical register?");
+      MRI->setPhysRegUsed(PhysReg);
+      unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+      ReusedOperands.markClobbered(RReg);
+      MI.getOperand(i).setReg(RReg);
+      MI.getOperand(i).setSubReg(0);
+
+      if (!MO.isDead() && SpilledMIRegs.insert(VirtReg)) {
+        MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
+        SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true,
+          LastStore, Spills, ReMatDefs, RegKills, KillOps);
+        NextMII = llvm::next(MII);
+
+        // Check to see if this is a noop copy.  If so, eliminate the
+        // instruction before considering the dest reg to be changed.
+        if (MI.isIdentityCopy()) {
+          ++NumDCE;
+          DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+          InvalidateKills(MI, TRI, RegKills, KillOps);
+          EraseInstr(&MI);
+          Erased = true;
+          UpdateKills(*LastStore, TRI, RegKills, KillOps);
+          goto ProcessNextInst;
+        }
+      }
+    }
+    ProcessNextInst:
+    // Delete dead instructions without side effects.
+    if (!Erased && !BackTracked && isSafeToDelete(MI)) {
+      InvalidateKills(MI, TRI, RegKills, KillOps);
+      EraseInstr(&MI);
+      Erased = true;
+    }
+    if (!Erased)
+      DistanceMap.insert(std::make_pair(&MI, DistanceMap.size()));
+    if (!Erased && !BackTracked) {
+      for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II)
+        UpdateKills(*II, TRI, RegKills, KillOps);
+    }
+    MII = NextMII;
+  }
+
+}
+
+llvm::VirtRegRewriter* llvm::createVirtRegRewriter() {
+  switch (RewriterOpt) {
+  default: llvm_unreachable("Unreachable!");
+  case local:
+    return new LocalRewriter();
+  case trivial:
+    return new TrivialRewriter();
+  }
+}
diff --git a/final/lib/CodeGen/VirtRegRewriter.h b/final/lib/CodeGen/VirtRegRewriter.h
new file mode 100644
index 00000000000..93474e0d7ff
--- /dev/null
+++ b/final/lib/CodeGen/VirtRegRewriter.h
@@ -0,0 +1,32 @@
+//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H
+#define LLVM_CODEGEN_VIRTREGREWRITER_H
+
+namespace llvm {
+  class LiveIntervals;
+  class MachineFunction;
+  class VirtRegMap;
+  
+  /// VirtRegRewriter interface: Implementations of this interface assign
+  /// spilled virtual registers to stack slots, rewriting the code.
+  struct VirtRegRewriter {
+    virtual ~VirtRegRewriter();
+    virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM,
+                                      LiveIntervals* LIs) = 0;
+  };
+
+  /// createVirtRegRewriter - Create an return a rewriter object, as specified
+  /// on the command line.
+  VirtRegRewriter* createVirtRegRewriter();
+
+}
+
+#endif
diff --git a/final/lib/CompilerDriver/Action.cpp b/final/lib/CompilerDriver/Action.cpp
new file mode 100644
index 00000000000..a8d625c7ac0
--- /dev/null
+++ b/final/lib/CompilerDriver/Action.cpp
@@ -0,0 +1,134 @@
+//===--- Action.cpp - The LLVM Compiler Driver ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Action class - implementation and auxiliary functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/Action.h"
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/Error.h"
+#include "llvm/CompilerDriver/Main.h"
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/TimeValue.h"
+
+#include <stdexcept>
+#include <string>
+
+using namespace llvm;
+using namespace llvmc;
+
+namespace llvmc {
+
+extern const char* ProgramName;
+
+}
+
+namespace {
+
+  void PrintString (const std::string& str) {
+    errs() << str << ' ';
+  }
+
+  void PrintCommand (const std::string& Cmd, const StrVector& Args) {
+    errs() << Cmd << ' ';
+    std::for_each(Args.begin(), Args.end(), &PrintString);
+    errs() << '\n';
+  }
+
+  bool IsSegmentationFault (int returnCode) {
+#ifdef LLVM_ON_WIN32
+    return (returnCode >= 0xc0000000UL)
+#else
+    return (returnCode < 0);
+#endif
+  }
+
+  int ExecuteProgram (const std::string& name, const StrVector& args) {
+    sys::Path prog(name);
+
+    if (sys::path::is_relative(prog.str())) {
+      prog = PrependMainExecutablePath(name, ProgramName,
+                                       (void *)(intptr_t)&Main);
+
+      if (!prog.canExecute()) {
+        prog = sys::Program::FindProgramByName(name);
+        if (prog.isEmpty()) {
+          PrintError("Can't find program '" + name + "'");
+          return -1;
+        }
+      }
+    }
+    if (!prog.canExecute()) {
+      PrintError("Program '" + name + "' is not executable.");
+      return -1;
+    }
+
+    // Build the command line vector and the redirects array.
+    const sys::Path* redirects[3] = {0,0,0};
+    sys::Path stdout_redirect;
+
+    std::vector<const char*> argv;
+    argv.reserve((args.size()+2));
+    argv.push_back(name.c_str());
+
+    for (StrVector::const_iterator B = args.begin(), E = args.end();
+         B!=E; ++B) {
+      if (*B == ">") {
+        ++B;
+        stdout_redirect.set(*B);
+        redirects[1] = &stdout_redirect;
+      }
+      else {
+        argv.push_back((*B).c_str());
+      }
+    }
+    argv.push_back(0);  // null terminate list.
+
+    // Invoke the program.
+    int ret = sys::Program::ExecuteAndWait(prog, &argv[0], 0, &redirects[0]);
+
+    if (IsSegmentationFault(ret)) {
+      errs() << "Segmentation fault: ";
+      PrintCommand(name, args);
+    }
+
+    return ret;
+  }
+}
+
+namespace llvmc {
+  void AppendToGlobalTimeLog (const std::string& cmd, double time);
+}
+
+int llvmc::Action::Execute () const {
+  if (DryRun || VerboseMode)
+    PrintCommand(Command_, Args_);
+
+  if (!DryRun) {
+    if (Time) {
+      sys::TimeValue now = sys::TimeValue::now();
+      int ret = ExecuteProgram(Command_, Args_);
+      sys::TimeValue now2 = sys::TimeValue::now();
+      now2 -= now;
+      double elapsed = now2.seconds()  + now2.microseconds()  / 1000000.0;
+      AppendToGlobalTimeLog(Command_, elapsed);
+
+      return ret;
+    }
+    else {
+      return ExecuteProgram(Command_, Args_);
+    }
+  }
+
+  return 0;
+}
diff --git a/final/lib/CompilerDriver/BuiltinOptions.cpp b/final/lib/CompilerDriver/BuiltinOptions.cpp
new file mode 100644
index 00000000000..38442038d73
--- /dev/null
+++ b/final/lib/CompilerDriver/BuiltinOptions.cpp
@@ -0,0 +1,61 @@
+//===--- BuiltinOptions.cpp - The LLVM Compiler Driver ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Definitions of all global command-line option variables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+
+#ifdef ENABLE_LLVMC_DYNAMIC_PLUGINS
+#include "llvm/Support/PluginLoader.h"
+#endif
+
+namespace cl = llvm::cl;
+
+namespace llvmc {
+
+cl::list<std::string> InputFilenames(cl::Positional, cl::desc("<input file>"),
+                                     cl::ZeroOrMore);
+cl::opt<std::string> OutputFilename("o", cl::desc("Output file name"),
+                                    cl::value_desc("file"), cl::Prefix);
+cl::opt<std::string> TempDirname("temp-dir", cl::desc("Temp dir name"),
+                                 cl::value_desc("<directory>"), cl::Prefix);
+cl::list<std::string> Languages("x",
+          cl::desc("Specify the language of the following input files"),
+          cl::ZeroOrMore);
+
+cl::opt<bool> DryRun("dry-run",
+                     cl::desc("Only pretend to run commands"));
+cl::opt<bool> Time("time", cl::desc("Time individual commands"));
+cl::opt<bool> VerboseMode("v",
+                          cl::desc("Enable verbose mode"));
+
+cl::opt<bool> CheckGraph("check-graph",
+                         cl::desc("Check the compilation graph for errors"),
+                         cl::Hidden);
+cl::opt<bool> WriteGraph("write-graph",
+                         cl::desc("Write compilation-graph.dot file"),
+                         cl::Hidden);
+cl::opt<bool> ViewGraph("view-graph",
+                         cl::desc("Show compilation graph in GhostView"),
+                         cl::Hidden);
+
+cl::opt<SaveTempsEnum::Values> SaveTemps
+("save-temps", cl::desc("Keep temporary files"),
+ cl::init(SaveTempsEnum::Unset),
+ cl::values(clEnumValN(SaveTempsEnum::Obj, "obj",
+                       "Save files in the directory specified with -o"),
+            clEnumValN(SaveTempsEnum::Cwd, "cwd",
+                       "Use current working directory"),
+            clEnumValN(SaveTempsEnum::Obj, "", "Same as 'cwd'"),
+            clEnumValEnd),
+ cl::ValueOptional);
+
+}  // End namespace llvmc.
diff --git a/final/lib/CompilerDriver/CMakeLists.txt b/final/lib/CompilerDriver/CMakeLists.txt
new file mode 100644
index 00000000000..2248de01b95
--- /dev/null
+++ b/final/lib/CompilerDriver/CMakeLists.txt
@@ -0,0 +1,10 @@
+set(LLVM_LINK_COMPONENTS support)
+set(LLVM_REQUIRES_EH 1)
+
+add_llvm_tool(llvmc
+  Action.cpp
+  CompilationGraph.cpp
+  llvmc.cpp
+  Plugin.cpp
+  Tool.cpp
+  )
diff --git a/final/lib/CompilerDriver/CompilationGraph.cpp b/final/lib/CompilerDriver/CompilationGraph.cpp
new file mode 100644
index 00000000000..33c6566499b
--- /dev/null
+++ b/final/lib/CompilerDriver/CompilationGraph.cpp
@@ -0,0 +1,655 @@
+//===--- CompilationGraph.cpp - The LLVM Compiler Driver --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Compilation graph - implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/CompilationGraph.h"
+#include "llvm/CompilerDriver/Error.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <cstring>
+#include <iterator>
+#include <limits>
+#include <queue>
+
+using namespace llvm;
+using namespace llvmc;
+
+namespace llvmc {
+
+  const std::string* LanguageMap::GetLanguage(const sys::Path& File) const {
+    // Remove the '.'.
+    StringRef suf = sys::path::extension(File.str()).substr(1);
+    LanguageMap::const_iterator Lang =
+      this->find(suf.empty() ? "*empty*" : suf);
+    if (Lang == this->end()) {
+      PrintError("File '" + File.str() + "' has unknown suffix '"
+                 + suf.str() + '\'');
+      return 0;
+    }
+    return &Lang->second;
+  }
+}
+
+namespace {
+
+  /// ChooseEdge - Return the edge with the maximum weight. Returns 0 on error.
+  template <class C>
+  const Edge* ChooseEdge(const C& EdgesContainer,
+                         const InputLanguagesSet& InLangs,
+                         const std::string& NodeName = "root") {
+    const Edge* MaxEdge = 0;
+    int MaxWeight = 0;
+    bool SingleMax = true;
+
+    // TODO: fix calculation of SingleMax.
+    for (typename C::const_iterator B = EdgesContainer.begin(),
+           E = EdgesContainer.end(); B != E; ++B) {
+      const Edge* e = B->getPtr();
+      int EW = e->Weight(InLangs);
+      if (EW < 0) {
+        // (error) invocation in TableGen -> we don't need to print an error
+        // message.
+        return 0;
+      }
+      if (EW > MaxWeight) {
+        MaxEdge = e;
+        MaxWeight = EW;
+        SingleMax = true;
+      } else if (EW == MaxWeight) {
+        SingleMax = false;
+      }
+    }
+
+    if (!SingleMax) {
+      PrintError("Node " + NodeName + ": multiple maximal outward edges found!"
+                 " Most probably a specification error.");
+      return 0;
+    }
+    if (!MaxEdge) {
+      PrintError("Node " + NodeName + ": no maximal outward edge found!"
+                 " Most probably a specification error.");
+      return 0;
+    }
+    return MaxEdge;
+  }
+
+}
+
+void Node::AddEdge(Edge* Edg) {
+  // If there already was an edge between two nodes, modify it instead
+  // of adding a new edge.
+  const std::string& ToolName = Edg->ToolName();
+  for (container_type::iterator B = OutEdges.begin(), E = OutEdges.end();
+       B != E; ++B) {
+    if ((*B)->ToolName() == ToolName) {
+      llvm::IntrusiveRefCntPtr<Edge>(Edg).swap(*B);
+      return;
+    }
+  }
+  OutEdges.push_back(llvm::IntrusiveRefCntPtr<Edge>(Edg));
+}
+
+CompilationGraph::CompilationGraph() {
+  NodesMap["root"] = Node(this);
+}
+
+Node* CompilationGraph::getNode(const std::string& ToolName) {
+  nodes_map_type::iterator I = NodesMap.find(ToolName);
+  if (I == NodesMap.end()) {
+    PrintError("Node " + ToolName + " is not in the graph");
+    return 0;
+  }
+  return &I->second;
+}
+
+const Node* CompilationGraph::getNode(const std::string& ToolName) const {
+  nodes_map_type::const_iterator I = NodesMap.find(ToolName);
+  if (I == NodesMap.end()) {
+    PrintError("Node " + ToolName + " is not in the graph!");
+    return 0;
+  }
+  return &I->second;
+}
+
+// Find the tools list corresponding to the given language name.
+const CompilationGraph::tools_vector_type*
+CompilationGraph::getToolsVector(const std::string& LangName) const
+{
+  tools_map_type::const_iterator I = ToolsMap.find(LangName);
+  if (I == ToolsMap.end()) {
+    PrintError("No tool corresponding to the language " + LangName + " found");
+    return 0;
+  }
+  return &I->second;
+}
+
+void CompilationGraph::insertNode(Tool* V) {
+  if (NodesMap.count(V->Name()) == 0)
+    NodesMap[V->Name()] = Node(this, V);
+}
+
+int CompilationGraph::insertEdge(const std::string& A, Edge* Edg) {
+  Node* B = getNode(Edg->ToolName());
+  if (B == 0)
+    return 1;
+
+  if (A == "root") {
+    const char** InLangs = B->ToolPtr->InputLanguages();
+    for (;*InLangs; ++InLangs)
+      ToolsMap[*InLangs].push_back(IntrusiveRefCntPtr<Edge>(Edg));
+    NodesMap["root"].AddEdge(Edg);
+  }
+  else {
+    Node* N = getNode(A);
+    if (N == 0)
+      return 1;
+
+    N->AddEdge(Edg);
+  }
+  // Increase the inward edge counter.
+  B->IncrInEdges();
+
+  return 0;
+}
+
+// Pass input file through the chain until we bump into a Join node or
+// a node that says that it is the last.
+int CompilationGraph::PassThroughGraph (const sys::Path& InFile,
+                                        const Node* StartNode,
+                                        const InputLanguagesSet& InLangs,
+                                        const sys::Path& TempDir,
+                                        const LanguageMap& LangMap) const {
+  sys::Path In = InFile;
+  const Node* CurNode = StartNode;
+
+  while(true) {
+    Tool* CurTool = CurNode->ToolPtr.getPtr();
+
+    if (CurTool->IsJoin()) {
+      JoinTool& JT = static_cast<JoinTool&>(*CurTool);
+      JT.AddToJoinList(In);
+      break;
+    }
+
+    Action CurAction;
+    if (int ret = CurTool->GenerateAction(CurAction, In, CurNode->HasChildren(),
+                                          TempDir, InLangs, LangMap)) {
+      return ret;
+    }
+
+    if (int ret = CurAction.Execute())
+      return ret;
+
+    if (CurAction.StopCompilation())
+      return 0;
+
+    const Edge* Edg = ChooseEdge(CurNode->OutEdges, InLangs, CurNode->Name());
+    if (Edg == 0)
+      return 1;
+
+    CurNode = getNode(Edg->ToolName());
+    if (CurNode == 0)
+      return 1;
+
+    In = CurAction.OutFile();
+  }
+
+  return 0;
+}
+
+// Find the head of the toolchain corresponding to the given file.
+// Also, insert an input language into InLangs.
+const Node* CompilationGraph::
+FindToolChain(const sys::Path& In, const std::string* ForceLanguage,
+              InputLanguagesSet& InLangs, const LanguageMap& LangMap) const {
+
+  // Determine the input language.
+  const std::string* InLang = (ForceLanguage ? ForceLanguage
+                               : LangMap.GetLanguage(In));
+  if (InLang == 0)
+    return 0;
+  const std::string& InLanguage = *InLang;
+
+  // Add the current input language to the input language set.
+  InLangs.insert(InLanguage);
+
+  // Find the toolchain for the input language.
+  const tools_vector_type* pTV = getToolsVector(InLanguage);
+  if (pTV == 0)
+    return 0;
+
+  const tools_vector_type& TV = *pTV;
+  if (TV.empty()) {
+    PrintError("No toolchain corresponding to language "
+               + InLanguage + " found");
+    return 0;
+  }
+
+  const Edge* Edg = ChooseEdge(TV, InLangs);
+  if (Edg == 0)
+    return 0;
+
+  return getNode(Edg->ToolName());
+}
+
+// Helper function used by Build().
+// Traverses initial portions of the toolchains (up to the first Join node).
+// This function is also responsible for handling the -x option.
+int CompilationGraph::BuildInitial (InputLanguagesSet& InLangs,
+                                    const sys::Path& TempDir,
+                                    const LanguageMap& LangMap) {
+  // This is related to -x option handling.
+  cl::list<std::string>::const_iterator xIter = Languages.begin(),
+    xBegin = xIter, xEnd = Languages.end();
+  bool xEmpty = true;
+  const std::string* xLanguage = 0;
+  unsigned xPos = 0, xPosNext = 0, filePos = 0;
+
+  if (xIter != xEnd) {
+    xEmpty = false;
+    xPos = Languages.getPosition(xIter - xBegin);
+    cl::list<std::string>::const_iterator xNext = llvm::next(xIter);
+    xPosNext = (xNext == xEnd) ? std::numeric_limits<unsigned>::max()
+      : Languages.getPosition(xNext - xBegin);
+    xLanguage = (*xIter == "none") ? 0 : &(*xIter);
+  }
+
+  // For each input file:
+  for (cl::list<std::string>::const_iterator B = InputFilenames.begin(),
+         CB = B, E = InputFilenames.end(); B != E; ++B) {
+    sys::Path In = sys::Path(*B);
+
+    // Code for handling the -x option.
+    // Output: std::string* xLanguage (can be NULL).
+    if (!xEmpty) {
+      filePos = InputFilenames.getPosition(B - CB);
+
+      if (xPos < filePos) {
+        if (filePos < xPosNext) {
+          xLanguage = (*xIter == "none") ? 0 : &(*xIter);
+        }
+        else { // filePos >= xPosNext
+          // Skip xIters while filePos > xPosNext
+          while (filePos > xPosNext) {
+            ++xIter;
+            xPos = xPosNext;
+
+            cl::list<std::string>::const_iterator xNext = llvm::next(xIter);
+            if (xNext == xEnd)
+              xPosNext = std::numeric_limits<unsigned>::max();
+            else
+              xPosNext = Languages.getPosition(xNext - xBegin);
+            xLanguage = (*xIter == "none") ? 0 : &(*xIter);
+          }
+        }
+      }
+    }
+
+    // Find the toolchain corresponding to this file.
+    const Node* N = FindToolChain(In, xLanguage, InLangs, LangMap);
+    if (N == 0)
+      return 1;
+    // Pass file through the chain starting at head.
+    if (int ret = PassThroughGraph(In, N, InLangs, TempDir, LangMap))
+      return ret;
+  }
+
+  return 0;
+}
+
+// Sort the nodes in topological order.
+int CompilationGraph::TopologicalSort(std::vector<const Node*>& Out) {
+  std::queue<const Node*> Q;
+
+  Node* Root = getNode("root");
+  if (Root == 0)
+    return 1;
+
+  Q.push(Root);
+
+  while (!Q.empty()) {
+    const Node* A = Q.front();
+    Q.pop();
+    Out.push_back(A);
+    for (Node::const_iterator EB = A->EdgesBegin(), EE = A->EdgesEnd();
+         EB != EE; ++EB) {
+      Node* B = getNode((*EB)->ToolName());
+      if (B == 0)
+        return 1;
+
+      B->DecrInEdges();
+      if (B->HasNoInEdges())
+        Q.push(B);
+    }
+  }
+
+  return 0;
+}
+
+namespace {
+  bool NotJoinNode(const Node* N) {
+    return N->ToolPtr ? !N->ToolPtr->IsJoin() : true;
+  }
+}
+
+// Call TopologicalSort and filter the resulting list to include
+// only Join nodes.
+int CompilationGraph::
+TopologicalSortFilterJoinNodes(std::vector<const Node*>& Out) {
+  std::vector<const Node*> TopSorted;
+  if (int ret = TopologicalSort(TopSorted))
+    return ret;
+  std::remove_copy_if(TopSorted.begin(), TopSorted.end(),
+                      std::back_inserter(Out), NotJoinNode);
+
+  return 0;
+}
+
+int CompilationGraph::Build (const sys::Path& TempDir,
+                             const LanguageMap& LangMap) {
+  InputLanguagesSet InLangs;
+  bool WasSomeActionGenerated = !InputFilenames.empty();
+
+  // Traverse initial parts of the toolchains and fill in InLangs.
+  if (int ret = BuildInitial(InLangs, TempDir, LangMap))
+    return ret;
+
+  std::vector<const Node*> JTV;
+  if (int ret = TopologicalSortFilterJoinNodes(JTV))
+    return ret;
+
+  // For all join nodes in topological order:
+  for (std::vector<const Node*>::iterator B = JTV.begin(), E = JTV.end();
+       B != E; ++B) {
+
+    const Node* CurNode = *B;
+    JoinTool* JT = &static_cast<JoinTool&>(*CurNode->ToolPtr.getPtr());
+
+    // Are there any files in the join list?
+    if (JT->JoinListEmpty() && !(JT->WorksOnEmpty() && InputFilenames.empty()))
+      continue;
+
+    WasSomeActionGenerated = true;
+    Action CurAction;
+    if (int ret = JT->GenerateAction(CurAction, CurNode->HasChildren(),
+                                     TempDir, InLangs, LangMap)) {
+      return ret;
+    }
+
+    if (int ret = CurAction.Execute())
+      return ret;
+
+    if (CurAction.StopCompilation())
+      return 0;
+
+    const Edge* Edg = ChooseEdge(CurNode->OutEdges, InLangs, CurNode->Name());
+    if (Edg == 0)
+      return 1;
+
+    const Node* NextNode = getNode(Edg->ToolName());
+    if (NextNode == 0)
+      return 1;
+
+    if (int ret = PassThroughGraph(sys::Path(CurAction.OutFile()), NextNode,
+                                   InLangs, TempDir, LangMap)) {
+      return ret;
+    }
+  }
+
+  if (!WasSomeActionGenerated) {
+    PrintError("no input files");
+    return 1;
+  }
+
+  return 0;
+}
+
+int CompilationGraph::CheckLanguageNames() const {
+  int ret = 0;
+
+  // Check that names for output and input languages on all edges do match.
+  for (const_nodes_iterator B = this->NodesMap.begin(),
+         E = this->NodesMap.end(); B != E; ++B) {
+
+    const Node & N1 = B->second;
+    if (N1.ToolPtr) {
+      for (Node::const_iterator EB = N1.EdgesBegin(), EE = N1.EdgesEnd();
+           EB != EE; ++EB) {
+        const Node* N2 = this->getNode((*EB)->ToolName());
+        if (N2 == 0)
+          return 1;
+
+        if (!N2->ToolPtr) {
+          ++ret;
+          errs() << "Error: there is an edge from '" << N1.ToolPtr->Name()
+                 << "' back to the root!\n\n";
+          continue;
+        }
+
+        const char** OutLangs = N1.ToolPtr->OutputLanguages();
+        const char** InLangs = N2->ToolPtr->InputLanguages();
+        bool eq = false;
+        const char* OutLang = 0;
+        for (;*OutLangs; ++OutLangs) {
+          OutLang = *OutLangs;
+          for (;*InLangs; ++InLangs) {
+            if (std::strcmp(OutLang, *InLangs) == 0) {
+              eq = true;
+              break;
+            }
+          }
+        }
+
+        if (!eq) {
+          ++ret;
+          errs() << "Error: Output->input language mismatch in the edge '"
+                 << N1.ToolPtr->Name() << "' -> '" << N2->ToolPtr->Name()
+                 << "'!\n"
+                 << "Expected one of { ";
+
+          InLangs = N2->ToolPtr->InputLanguages();
+          for (;*InLangs; ++InLangs) {
+            errs() << '\'' << *InLangs << (*(InLangs+1) ? "', " : "'");
+          }
+
+          errs() << " }, but got '" << OutLang << "'!\n\n";
+        }
+
+      }
+    }
+  }
+
+  return ret;
+}
+
+int CompilationGraph::CheckMultipleDefaultEdges() const {
+  int ret = 0;
+  InputLanguagesSet Dummy;
+
+  // For all nodes, just iterate over the outgoing edges and check if there is
+  // more than one edge with maximum weight.
+  for (const_nodes_iterator B = this->NodesMap.begin(),
+         E = this->NodesMap.end(); B != E; ++B) {
+    const Node& N = B->second;
+    int MaxWeight = -1024;
+
+    // Ignore the root node.
+    if (!N.ToolPtr)
+      continue;
+
+    for (Node::const_iterator EB = N.EdgesBegin(), EE = N.EdgesEnd();
+         EB != EE; ++EB) {
+      int EdgeWeight = (*EB)->Weight(Dummy);
+      if (EdgeWeight > MaxWeight) {
+        MaxWeight = EdgeWeight;
+      }
+      else if (EdgeWeight == MaxWeight) {
+        ++ret;
+        errs() << "Error: there are multiple maximal edges stemming from the '"
+               << N.ToolPtr->Name() << "' node!\n\n";
+        break;
+      }
+    }
+  }
+
+  return ret;
+}
+
+int CompilationGraph::CheckCycles() {
+  unsigned deleted = 0;
+  std::queue<Node*> Q;
+
+  Node* Root = getNode("root");
+  if (Root == 0)
+    return 1;
+
+  Q.push(Root);
+
+  // Try to delete all nodes that have no ingoing edges, starting from the
+  // root. If there are any nodes left after this operation, then we have a
+  // cycle. This relies on '--check-graph' not performing the topological sort.
+  while (!Q.empty()) {
+    Node* A = Q.front();
+    Q.pop();
+    ++deleted;
+
+    for (Node::iterator EB = A->EdgesBegin(), EE = A->EdgesEnd();
+         EB != EE; ++EB) {
+      Node* B = getNode((*EB)->ToolName());
+      if (B == 0)
+        return 1;
+
+      B->DecrInEdges();
+      if (B->HasNoInEdges())
+        Q.push(B);
+    }
+  }
+
+  if (deleted != NodesMap.size()) {
+    errs() << "Error: there are cycles in the compilation graph!\n"
+           << "Try inspecting the diagram produced by "
+           << "'llvmc --view-graph'.\n\n";
+    return 1;
+  }
+
+  return 0;
+}
+
+int CompilationGraph::Check () {
+  // We try to catch as many errors as we can in one go.
+  int errs = 0;
+  int ret = 0;
+
+  // Check that output/input language names match.
+  ret = this->CheckLanguageNames();
+  if (ret < 0)
+    return 1;
+  errs += ret;
+
+  // Check for multiple default edges.
+  ret = this->CheckMultipleDefaultEdges();
+  if (ret < 0)
+    return 1;
+  errs += ret;
+
+  // Check for cycles.
+  ret = this->CheckCycles();
+  if (ret < 0)
+    return 1;
+  errs += ret;
+
+  return errs;
+}
+
+// Code related to graph visualization.
+
+namespace {
+
+std::string SquashStrArray (const char** StrArr) {
+  std::string ret;
+
+  for (; *StrArr; ++StrArr) {
+    if (*(StrArr + 1)) {
+      ret += *StrArr;
+      ret +=  ", ";
+    }
+    else {
+      ret += *StrArr;
+    }
+  }
+
+  return ret;
+}
+
+} // End anonymous namespace.
+
+namespace llvm {
+  template <>
+  struct DOTGraphTraits<llvmc::CompilationGraph*>
+    : public DefaultDOTGraphTraits
+  {
+    DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+    template<typename GraphType>
+    static std::string getNodeLabel(const Node* N, const GraphType&)
+    {
+      if (N->ToolPtr)
+        if (N->ToolPtr->IsJoin())
+          return N->Name() + "\n (join" +
+            (N->HasChildren() ? ")"
+             : std::string(": ") +
+             SquashStrArray(N->ToolPtr->OutputLanguages()) + ')');
+        else
+          return N->Name();
+      else
+        return "root";
+    }
+
+    template<typename EdgeIter>
+    static std::string getEdgeSourceLabel(const Node* N, EdgeIter I) {
+      if (N->ToolPtr) {
+        return SquashStrArray(N->ToolPtr->OutputLanguages());
+      }
+      else {
+        return SquashStrArray(I->ToolPtr->InputLanguages());
+      }
+    }
+  };
+
+} // End namespace llvm
+
+int CompilationGraph::writeGraph(const std::string& OutputFilename) {
+  std::string ErrorInfo;
+  raw_fd_ostream O(OutputFilename.c_str(), ErrorInfo);
+
+  if (ErrorInfo.empty()) {
+    errs() << "Writing '"<< OutputFilename << "' file...";
+    llvm::WriteGraph(O, this);
+    errs() << "done.\n";
+  }
+  else {
+    PrintError("Error opening file '" + OutputFilename + "' for writing!");
+    return 1;
+  }
+
+  return 0;
+}
+
+void CompilationGraph::viewGraph() {
+  llvm::ViewGraph(this, "compilation-graph");
+}
diff --git a/final/lib/CompilerDriver/Main.cpp b/final/lib/CompilerDriver/Main.cpp
new file mode 100644
index 00000000000..7120027f7ce
--- /dev/null
+++ b/final/lib/CompilerDriver/Main.cpp
@@ -0,0 +1,146 @@
+//===--- Main.cpp - The LLVM Compiler Driver --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  llvmc::Main function - driver entry point.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/AutoGenerated.h"
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/CompilationGraph.h"
+#include "llvm/CompilerDriver/Error.h"
+
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Path.h"
+
+#include <sstream>
+#include <string>
+
+namespace cl = llvm::cl;
+namespace sys = llvm::sys;
+using namespace llvmc;
+
+namespace {
+
+  std::stringstream* GlobalTimeLog;
+
+  /// GetTempDir - Get the temporary directory location. Returns non-zero value
+  /// on error.
+  int GetTempDir(sys::Path& tempDir) {
+    // The --temp-dir option.
+    if (!TempDirname.empty()) {
+      tempDir = TempDirname;
+    }
+    // GCC 4.5-style -save-temps handling.
+    else if (SaveTemps == SaveTempsEnum::Unset) {
+      tempDir = sys::Path::GetTemporaryDirectory();
+      return 0;
+    }
+    else if (SaveTemps == SaveTempsEnum::Obj && !OutputFilename.empty()) {
+      tempDir = sys::path::parent_path(OutputFilename);
+    }
+    else {
+      // SaveTemps == Cwd --> use current dir (leave tempDir empty).
+      return 0;
+    }
+
+    bool Exists;
+    if (llvm::sys::fs::exists(tempDir.str(), Exists) || !Exists) {
+      std::string ErrMsg;
+      if (tempDir.createDirectoryOnDisk(true, &ErrMsg)) {
+        PrintError(ErrMsg);
+        return 1;
+      }
+    }
+
+    return 0;
+  }
+
+  /// BuildTargets - A small wrapper for CompilationGraph::Build. Returns
+  /// non-zero value in case of error.
+  int BuildTargets(CompilationGraph& graph, const LanguageMap& langMap) {
+    int ret;
+    sys::Path tempDir;
+    bool toDelete = (SaveTemps == SaveTempsEnum::Unset);
+
+    if (int ret = GetTempDir(tempDir))
+      return ret;
+
+    ret = graph.Build(tempDir, langMap);
+
+    if (toDelete)
+      tempDir.eraseFromDisk(true);
+
+    return ret;
+  }
+}
+
+namespace llvmc {
+
+// Used to implement -time option. External linkage is intentional.
+void AppendToGlobalTimeLog(const std::string& cmd, double time) {
+  *GlobalTimeLog << "# " << cmd << ' ' << time << '\n';
+}
+
+// Sometimes user code wants to access the argv[0] value.
+const char* ProgramName;
+
+int Main(int argc, char** argv) {
+  int ret = 0;
+  LanguageMap langMap;
+  CompilationGraph graph;
+
+  ProgramName = argv[0];
+
+  cl::ParseCommandLineOptions
+    (argc, argv,
+     /* Overview = */ "LLVM Compiler Driver (Work In Progress)",
+     /* ReadResponseFiles = */ false);
+
+  if (int ret = autogenerated::RunInitialization(langMap, graph))
+    return ret;
+
+  if (CheckGraph) {
+    ret = graph.Check();
+    if (!ret)
+      llvm::errs() << "check-graph: no errors found.\n";
+
+    return ret;
+  }
+
+  if (ViewGraph) {
+    graph.viewGraph();
+    if (!WriteGraph)
+      return 0;
+  }
+
+  if (WriteGraph) {
+    const std::string& Out = (OutputFilename.empty()
+                              ? std::string("compilation-graph.dot")
+                              : OutputFilename);
+    return graph.writeGraph(Out);
+  }
+
+  if (Time) {
+    GlobalTimeLog = new std::stringstream;
+    GlobalTimeLog->precision(2);
+  }
+
+  ret = BuildTargets(graph, langMap);
+
+  if (Time) {
+    llvm::errs() << GlobalTimeLog->str();
+    delete GlobalTimeLog;
+  }
+
+  return ret;
+}
+
+} // end namespace llvmc
diff --git a/final/lib/CompilerDriver/Makefile b/final/lib/CompilerDriver/Makefile
new file mode 100644
index 00000000000..10cfa4f0292
--- /dev/null
+++ b/final/lib/CompilerDriver/Makefile
@@ -0,0 +1,20 @@
+##===- lib/CompilerDriver/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open
+# Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+
+# We don't want this library to appear in `llvm-config --libs` output, so its
+# name doesn't start with "LLVM" and NO_LLVM_CONFIG is set.
+
+LIBRARYNAME = CompilerDriver
+LINK_COMPONENTS = support
+NO_LLVM_CONFIG = 1
+
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/CompilerDriver/Tool.cpp b/final/lib/CompilerDriver/Tool.cpp
new file mode 100644
index 00000000000..876759aa72b
--- /dev/null
+++ b/final/lib/CompilerDriver/Tool.cpp
@@ -0,0 +1,95 @@
+//===--- Tool.cpp - The LLVM Compiler Driver --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Tool base class - implementation details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/Tool.h"
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Path.h"
+
+#include <algorithm>
+
+using namespace llvm;
+using namespace llvmc;
+
+namespace {
+  sys::Path MakeTempFile(const sys::Path& TempDir, const std::string& BaseName,
+                         const std::string& Suffix) {
+    sys::Path Out;
+
+    // Make sure we don't end up with path names like '/file.o' if the
+    // TempDir is empty.
+    if (TempDir.empty()) {
+      Out.set(BaseName);
+    }
+    else {
+      Out = TempDir;
+      Out.appendComponent(BaseName);
+    }
+    Out.appendSuffix(Suffix);
+    // NOTE: makeUnique always *creates* a unique temporary file,
+    // which is good, since there will be no races. However, some
+    // tools do not like it when the output file already exists, so
+    // they need to be placated with -f or something like that.
+    Out.makeUnique(true, NULL);
+    return Out;
+  }
+}
+
+sys::Path Tool::OutFilename(const sys::Path& In,
+                            const sys::Path& TempDir,
+                            bool StopCompilation,
+                            const char* OutputSuffix) const {
+  sys::Path Out;
+
+  if (StopCompilation) {
+    if (!OutputFilename.empty()) {
+      Out.set(OutputFilename);
+    }
+    else if (IsJoin()) {
+      Out.set("a");
+      Out.appendSuffix(OutputSuffix);
+    }
+    else {
+      Out.set(sys::path::stem(In.str()));
+      Out.appendSuffix(OutputSuffix);
+    }
+  }
+  else {
+    if (IsJoin())
+      Out = MakeTempFile(TempDir, "tmp", OutputSuffix);
+    else
+      Out = MakeTempFile(TempDir, sys::path::stem(In.str()), OutputSuffix);
+  }
+  return Out;
+}
+
+namespace {
+  template <class A, class B>
+  bool CompareFirst (std::pair<A,B> p1, std::pair<A,B> p2) {
+    return std::less<A>()(p1.first, p2.first);
+  }
+}
+
+StrVector Tool::SortArgs(ArgsVector& Args) const {
+  StrVector Out;
+
+  // HACK: this won't be needed when we'll migrate away from CommandLine.
+  std::stable_sort(Args.begin(), Args.end(),
+                   &CompareFirst<unsigned, std::string>);
+  for (ArgsVector::iterator B = Args.begin(), E = Args.end(); B != E; ++B) {
+    Out.push_back(B->second);
+  }
+
+  return Out;
+}
diff --git a/final/lib/ExecutionEngine/CMakeLists.txt b/final/lib/ExecutionEngine/CMakeLists.txt
new file mode 100644
index 00000000000..b5632d2bc5c
--- /dev/null
+++ b/final/lib/ExecutionEngine/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_llvm_library(LLVMExecutionEngine
+  ExecutionEngine.cpp
+  ExecutionEngineBindings.cpp
+  )
+
+add_subdirectory(Interpreter)
+add_subdirectory(JIT)
+add_subdirectory(MCJIT)
diff --git a/final/lib/ExecutionEngine/ExecutionEngine.cpp b/final/lib/ExecutionEngine/ExecutionEngine.cpp
new file mode 100644
index 00000000000..25a61c02890
--- /dev/null
+++ b/final/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -0,0 +1,1103 @@
+//===-- ExecutionEngine.cpp - Common Implementation shared by EEs ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common interface used by the various execution engine
+// subclasses.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Target/TargetData.h"
+#include <cmath>
+#include <cstring>
+using namespace llvm;
+
+STATISTIC(NumInitBytes, "Number of bytes of global vars initialized");
+STATISTIC(NumGlobals  , "Number of global vars initialized");
+
+ExecutionEngine *(*ExecutionEngine::JITCtor)(
+  Module *M,
+  std::string *ErrorStr,
+  JITMemoryManager *JMM,
+  CodeGenOpt::Level OptLevel,
+  bool GVsWithCode,
+  CodeModel::Model CMM,
+  StringRef MArch,
+  StringRef MCPU,
+  const SmallVectorImpl<std::string>& MAttrs) = 0;
+ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
+  Module *M,
+  std::string *ErrorStr,
+  JITMemoryManager *JMM,
+  CodeGenOpt::Level OptLevel,
+  bool GVsWithCode,
+  CodeModel::Model CMM,
+  StringRef MArch,
+  StringRef MCPU,
+  const SmallVectorImpl<std::string>& MAttrs) = 0;
+ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M,
+                                                std::string *ErrorStr) = 0;
+
+ExecutionEngine::ExecutionEngine(Module *M)
+  : EEState(*this),
+    LazyFunctionCreator(0),
+    ExceptionTableRegister(0),
+    ExceptionTableDeregister(0) {
+  CompilingLazily         = false;
+  GVCompilationDisabled   = false;
+  SymbolSearchingDisabled = false;
+  Modules.push_back(M);
+  assert(M && "Module is null?");
+}
+
+ExecutionEngine::~ExecutionEngine() {
+  clearAllGlobalMappings();
+  for (unsigned i = 0, e = Modules.size(); i != e; ++i)
+    delete Modules[i];
+}
+
+void ExecutionEngine::DeregisterAllTables() {
+  if (ExceptionTableDeregister) {
+    DenseMap<const Function*, void*>::iterator it = AllExceptionTables.begin();
+    DenseMap<const Function*, void*>::iterator ite = AllExceptionTables.end();
+    for (; it != ite; ++it)
+      ExceptionTableDeregister(it->second);
+    AllExceptionTables.clear();
+  }
+}
+
+namespace {
+/// \brief Helper class which uses a value handler to automatically deletes the
+/// memory block when the GlobalVariable is destroyed.
+class GVMemoryBlock : public CallbackVH {
+  GVMemoryBlock(const GlobalVariable *GV)
+    : CallbackVH(const_cast<GlobalVariable*>(GV)) {}
+
+public:
+  /// \brief Returns the address the GlobalVariable should be written into.  The
+  /// GVMemoryBlock object prefixes that.
+  static char *Create(const GlobalVariable *GV, const TargetData& TD) {
+    const Type *ElTy = GV->getType()->getElementType();
+    size_t GVSize = (size_t)TD.getTypeAllocSize(ElTy);
+    void *RawMemory = ::operator new(
+      TargetData::RoundUpAlignment(sizeof(GVMemoryBlock),
+                                   TD.getPreferredAlignment(GV))
+      + GVSize);
+    new(RawMemory) GVMemoryBlock(GV);
+    return static_cast<char*>(RawMemory) + sizeof(GVMemoryBlock);
+  }
+
+  virtual void deleted() {
+    // We allocated with operator new and with some extra memory hanging off the
+    // end, so don't just delete this.  I'm not sure if this is actually
+    // required.
+    this->~GVMemoryBlock();
+    ::operator delete(this);
+  }
+};
+}  // anonymous namespace
+
+char *ExecutionEngine::getMemoryForGV(const GlobalVariable *GV) {
+  return GVMemoryBlock::Create(GV, *getTargetData());
+}
+
+bool ExecutionEngine::removeModule(Module *M) {
+  for(SmallVector<Module *, 1>::iterator I = Modules.begin(),
+        E = Modules.end(); I != E; ++I) {
+    Module *Found = *I;
+    if (Found == M) {
+      Modules.erase(I);
+      clearGlobalMappingsFromModule(M);
+      return true;
+    }
+  }
+  return false;
+}
+
+Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
+  for (unsigned i = 0, e = Modules.size(); i != e; ++i) {
+    if (Function *F = Modules[i]->getFunction(FnName))
+      return F;
+  }
+  return 0;
+}
+
+
+void *ExecutionEngineState::RemoveMapping(const MutexGuard &,
+                                          const GlobalValue *ToUnmap) {
+  GlobalAddressMapTy::iterator I = GlobalAddressMap.find(ToUnmap);
+  void *OldVal;
+
+  // FIXME: This is silly, we shouldn't end up with a mapping -> 0 in the
+  // GlobalAddressMap.
+  if (I == GlobalAddressMap.end())
+    OldVal = 0;
+  else {
+    OldVal = I->second;
+    GlobalAddressMap.erase(I);
+  }
+
+  GlobalAddressReverseMap.erase(OldVal);
+  return OldVal;
+}
+
+void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) {
+  MutexGuard locked(lock);
+
+  DEBUG(dbgs() << "JIT: Map \'" << GV->getName()
+        << "\' to [" << Addr << "]\n";);
+  void *&CurVal = EEState.getGlobalAddressMap(locked)[GV];
+  assert((CurVal == 0 || Addr == 0) && "GlobalMapping already established!");
+  CurVal = Addr;
+
+  // If we are using the reverse mapping, add it too.
+  if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
+    AssertingVH<const GlobalValue> &V =
+      EEState.getGlobalAddressReverseMap(locked)[Addr];
+    assert((V == 0 || GV == 0) && "GlobalMapping already established!");
+    V = GV;
+  }
+}
+
+void ExecutionEngine::clearAllGlobalMappings() {
+  MutexGuard locked(lock);
+
+  EEState.getGlobalAddressMap(locked).clear();
+  EEState.getGlobalAddressReverseMap(locked).clear();
+}
+
+void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) {
+  MutexGuard locked(lock);
+
+  for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
+    EEState.RemoveMapping(locked, FI);
+  for (Module::global_iterator GI = M->global_begin(), GE = M->global_end();
+       GI != GE; ++GI)
+    EEState.RemoveMapping(locked, GI);
+}
+
+void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) {
+  MutexGuard locked(lock);
+
+  ExecutionEngineState::GlobalAddressMapTy &Map =
+    EEState.getGlobalAddressMap(locked);
+
+  // Deleting from the mapping?
+  if (Addr == 0)
+    return EEState.RemoveMapping(locked, GV);
+
+  void *&CurVal = Map[GV];
+  void *OldVal = CurVal;
+
+  if (CurVal && !EEState.getGlobalAddressReverseMap(locked).empty())
+    EEState.getGlobalAddressReverseMap(locked).erase(CurVal);
+  CurVal = Addr;
+
+  // If we are using the reverse mapping, add it too.
+  if (!EEState.getGlobalAddressReverseMap(locked).empty()) {
+    AssertingVH<const GlobalValue> &V =
+      EEState.getGlobalAddressReverseMap(locked)[Addr];
+    assert((V == 0 || GV == 0) && "GlobalMapping already established!");
+    V = GV;
+  }
+  return OldVal;
+}
+
+void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) {
+  MutexGuard locked(lock);
+
+  ExecutionEngineState::GlobalAddressMapTy::iterator I =
+    EEState.getGlobalAddressMap(locked).find(GV);
+  return I != EEState.getGlobalAddressMap(locked).end() ? I->second : 0;
+}
+
+const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
+  MutexGuard locked(lock);
+
+  // If we haven't computed the reverse mapping yet, do so first.
+  if (EEState.getGlobalAddressReverseMap(locked).empty()) {
+    for (ExecutionEngineState::GlobalAddressMapTy::iterator
+         I = EEState.getGlobalAddressMap(locked).begin(),
+         E = EEState.getGlobalAddressMap(locked).end(); I != E; ++I)
+      EEState.getGlobalAddressReverseMap(locked).insert(std::make_pair(
+                                                          I->second, I->first));
+  }
+
+  std::map<void *, AssertingVH<const GlobalValue> >::iterator I =
+    EEState.getGlobalAddressReverseMap(locked).find(Addr);
+  return I != EEState.getGlobalAddressReverseMap(locked).end() ? I->second : 0;
+}
+
+namespace {
+class ArgvArray {
+  char *Array;
+  std::vector<char*> Values;
+public:
+  ArgvArray() : Array(NULL) {}
+  ~ArgvArray() { clear(); }
+  void clear() {
+    delete[] Array;
+    Array = NULL;
+    for (size_t I = 0, E = Values.size(); I != E; ++I) {
+      delete[] Values[I];
+    }
+    Values.clear();
+  }
+  /// Turn a vector of strings into a nice argv style array of pointers to null
+  /// terminated strings.
+  void *reset(LLVMContext &C, ExecutionEngine *EE,
+              const std::vector<std::string> &InputArgv);
+};
+}  // anonymous namespace
+void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
+                       const std::vector<std::string> &InputArgv) {
+  clear();  // Free the old contents.
+  unsigned PtrSize = EE->getTargetData()->getPointerSize();
+  Array = new char[(InputArgv.size()+1)*PtrSize];
+
+  DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array << "\n");
+  const Type *SBytePtr = Type::getInt8PtrTy(C);
+
+  for (unsigned i = 0; i != InputArgv.size(); ++i) {
+    unsigned Size = InputArgv[i].size()+1;
+    char *Dest = new char[Size];
+    Values.push_back(Dest);
+    DEBUG(dbgs() << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n");
+
+    std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest);
+    Dest[Size-1] = 0;
+
+    // Endian safe: Array[i] = (PointerTy)Dest;
+    EE->StoreValueToMemory(PTOGV(Dest), (GenericValue*)(Array+i*PtrSize),
+                           SBytePtr);
+  }
+
+  // Null terminate it
+  EE->StoreValueToMemory(PTOGV(0),
+                         (GenericValue*)(Array+InputArgv.size()*PtrSize),
+                         SBytePtr);
+  return Array;
+}
+
+void ExecutionEngine::runStaticConstructorsDestructors(Module *module,
+                                                       bool isDtors) {
+  const char *Name = isDtors ? "llvm.global_dtors" : "llvm.global_ctors";
+  GlobalVariable *GV = module->getNamedGlobal(Name);
+
+  // If this global has internal linkage, or if it has a use, then it must be
+  // an old-style (llvmgcc3) static ctor with __main linked in and in use.  If
+  // this is the case, don't execute any of the global ctors, __main will do
+  // it.
+  if (!GV || GV->isDeclaration() || GV->hasLocalLinkage()) return;
+
+  // Should be an array of '{ int, void ()* }' structs.  The first value is
+  // the init priority, which we ignore.
+  ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!InitList) return;
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+    ConstantStruct *CS =
+      dyn_cast<ConstantStruct>(InitList->getOperand(i));
+    if (!CS) continue;
+    if (CS->getNumOperands() != 2) return; // Not array of 2-element structs.
+
+    Constant *FP = CS->getOperand(1);
+    if (FP->isNullValue())
+      break;  // Found a null terminator, exit.
+
+    // Strip off constant expression casts.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
+      if (CE->isCast())
+        FP = CE->getOperand(0);
+
+    // Execute the ctor/dtor function!
+    if (Function *F = dyn_cast<Function>(FP))
+      runFunction(F, std::vector<GenericValue>());
+
+    // FIXME: It is marginally lame that we just do nothing here if we see an
+    // entry we don't recognize. It might not be unreasonable for the verifier
+    // to not even allow this and just assert here.
+  }
+}
+
+void ExecutionEngine::runStaticConstructorsDestructors(bool isDtors) {
+  // Execute global ctors/dtors for each module in the program.
+  for (unsigned i = 0, e = Modules.size(); i != e; ++i)
+    runStaticConstructorsDestructors(Modules[i], isDtors);
+}
+
+#ifndef NDEBUG
+/// isTargetNullPtr - Return whether the target pointer stored at Loc is null.
+static bool isTargetNullPtr(ExecutionEngine *EE, void *Loc) {
+  unsigned PtrSize = EE->getTargetData()->getPointerSize();
+  for (unsigned i = 0; i < PtrSize; ++i)
+    if (*(i + (uint8_t*)Loc))
+      return false;
+  return true;
+}
+#endif
+
+int ExecutionEngine::runFunctionAsMain(Function *Fn,
+                                       const std::vector<std::string> &argv,
+                                       const char * const * envp) {
+  std::vector<GenericValue> GVArgs;
+  GenericValue GVArgc;
+  GVArgc.IntVal = APInt(32, argv.size());
+
+  // Check main() type
+  unsigned NumArgs = Fn->getFunctionType()->getNumParams();
+  const FunctionType *FTy = Fn->getFunctionType();
+  const Type* PPInt8Ty = Type::getInt8PtrTy(Fn->getContext())->getPointerTo();
+
+  // Check the argument types.
+  if (NumArgs > 3)
+    report_fatal_error("Invalid number of arguments of main() supplied");
+  if (NumArgs >= 3 && FTy->getParamType(2) != PPInt8Ty)
+    report_fatal_error("Invalid type for third argument of main() supplied");
+  if (NumArgs >= 2 && FTy->getParamType(1) != PPInt8Ty)
+    report_fatal_error("Invalid type for second argument of main() supplied");
+  if (NumArgs >= 1 && !FTy->getParamType(0)->isIntegerTy(32))
+    report_fatal_error("Invalid type for first argument of main() supplied");
+  if (!FTy->getReturnType()->isIntegerTy() &&
+      !FTy->getReturnType()->isVoidTy())
+    report_fatal_error("Invalid return type of main() supplied");
+
+  ArgvArray CArgv;
+  ArgvArray CEnv;
+  if (NumArgs) {
+    GVArgs.push_back(GVArgc); // Arg #0 = argc.
+    if (NumArgs > 1) {
+      // Arg #1 = argv.
+      GVArgs.push_back(PTOGV(CArgv.reset(Fn->getContext(), this, argv)));
+      assert(!isTargetNullPtr(this, GVTOP(GVArgs[1])) &&
+             "argv[0] was null after CreateArgv");
+      if (NumArgs > 2) {
+        std::vector<std::string> EnvVars;
+        for (unsigned i = 0; envp[i]; ++i)
+          EnvVars.push_back(envp[i]);
+        // Arg #2 = envp.
+        GVArgs.push_back(PTOGV(CEnv.reset(Fn->getContext(), this, EnvVars)));
+      }
+    }
+  }
+
+  return runFunction(Fn, GVArgs).IntVal.getZExtValue();
+}
+
+ExecutionEngine *ExecutionEngine::create(Module *M,
+                                         bool ForceInterpreter,
+                                         std::string *ErrorStr,
+                                         CodeGenOpt::Level OptLevel,
+                                         bool GVsWithCode) {
+  return EngineBuilder(M)
+      .setEngineKind(ForceInterpreter
+                     ? EngineKind::Interpreter
+                     : EngineKind::JIT)
+      .setErrorStr(ErrorStr)
+      .setOptLevel(OptLevel)
+      .setAllocateGVsWithCode(GVsWithCode)
+      .create();
+}
+
+ExecutionEngine *EngineBuilder::create() {
+  // Make sure we can resolve symbols in the program as well. The zero arg
+  // to the function tells DynamicLibrary to load the program, not a library.
+  if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
+    return 0;
+
+  // If the user specified a memory manager but didn't specify which engine to
+  // create, we assume they only want the JIT, and we fail if they only want
+  // the interpreter.
+  if (JMM) {
+    if (WhichEngine & EngineKind::JIT)
+      WhichEngine = EngineKind::JIT;
+    else {
+      if (ErrorStr)
+        *ErrorStr = "Cannot create an interpreter with a memory manager.";
+      return 0;
+    }
+  }
+
+  // Unless the interpreter was explicitly selected or the JIT is not linked,
+  // try making a JIT.
+  if (WhichEngine & EngineKind::JIT) {
+    if (UseMCJIT && ExecutionEngine::MCJITCtor) {
+      ExecutionEngine *EE =
+        ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, OptLevel,
+                                   AllocateGVsWithCode, CMModel,
+                                   MArch, MCPU, MAttrs);
+      if (EE) return EE;
+    } else if (ExecutionEngine::JITCtor) {
+      ExecutionEngine *EE =
+        ExecutionEngine::JITCtor(M, ErrorStr, JMM, OptLevel,
+                                 AllocateGVsWithCode, CMModel,
+                                 MArch, MCPU, MAttrs);
+      if (EE) return EE;
+    }
+  }
+
+  // If we can't make a JIT and we didn't request one specifically, try making
+  // an interpreter instead.
+  if (WhichEngine & EngineKind::Interpreter) {
+    if (ExecutionEngine::InterpCtor)
+      return ExecutionEngine::InterpCtor(M, ErrorStr);
+    if (ErrorStr)
+      *ErrorStr = "Interpreter has not been linked in.";
+    return 0;
+  }
+
+  if ((WhichEngine & EngineKind::JIT) && ExecutionEngine::JITCtor == 0) {
+    if (ErrorStr)
+      *ErrorStr = "JIT has not been linked in.";
+  }
+
+  return 0;
+}
+
+void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) {
+  if (Function *F = const_cast<Function*>(dyn_cast<Function>(GV)))
+    return getPointerToFunction(F);
+
+  MutexGuard locked(lock);
+  if (void *P = EEState.getGlobalAddressMap(locked)[GV])
+    return P;
+
+  // Global variable might have been added since interpreter started.
+  if (GlobalVariable *GVar =
+          const_cast<GlobalVariable *>(dyn_cast<GlobalVariable>(GV)))
+    EmitGlobalVariable(GVar);
+  else
+    llvm_unreachable("Global hasn't had an address allocated yet!");
+
+  return EEState.getGlobalAddressMap(locked)[GV];
+}
+
+/// \brief Converts a Constant* into a GenericValue, including handling of
+/// ConstantExpr values.
+GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
+  // If its undefined, return the garbage.
+  if (isa<UndefValue>(C)) {
+    GenericValue Result;
+    switch (C->getType()->getTypeID()) {
+    case Type::IntegerTyID:
+    case Type::X86_FP80TyID:
+    case Type::FP128TyID:
+    case Type::PPC_FP128TyID:
+      // Although the value is undefined, we still have to construct an APInt
+      // with the correct bit width.
+      Result.IntVal = APInt(C->getType()->getPrimitiveSizeInBits(), 0);
+      break;
+    default:
+      break;
+    }
+    return Result;
+  }
+
+  // Otherwise, if the value is a ConstantExpr...
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    Constant *Op0 = CE->getOperand(0);
+    switch (CE->getOpcode()) {
+    case Instruction::GetElementPtr: {
+      // Compute the index
+      GenericValue Result = getConstantValue(Op0);
+      SmallVector<Value*, 8> Indices(CE->op_begin()+1, CE->op_end());
+      uint64_t Offset =
+        TD->getIndexedOffset(Op0->getType(), &Indices[0], Indices.size());
+
+      char* tmp = (char*) Result.PointerVal;
+      Result = PTOGV(tmp + Offset);
+      return Result;
+    }
+    case Instruction::Trunc: {
+      GenericValue GV = getConstantValue(Op0);
+      uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+      GV.IntVal = GV.IntVal.trunc(BitWidth);
+      return GV;
+    }
+    case Instruction::ZExt: {
+      GenericValue GV = getConstantValue(Op0);
+      uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+      GV.IntVal = GV.IntVal.zext(BitWidth);
+      return GV;
+    }
+    case Instruction::SExt: {
+      GenericValue GV = getConstantValue(Op0);
+      uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+      GV.IntVal = GV.IntVal.sext(BitWidth);
+      return GV;
+    }
+    case Instruction::FPTrunc: {
+      // FIXME long double
+      GenericValue GV = getConstantValue(Op0);
+      GV.FloatVal = float(GV.DoubleVal);
+      return GV;
+    }
+    case Instruction::FPExt:{
+      // FIXME long double
+      GenericValue GV = getConstantValue(Op0);
+      GV.DoubleVal = double(GV.FloatVal);
+      return GV;
+    }
+    case Instruction::UIToFP: {
+      GenericValue GV = getConstantValue(Op0);
+      if (CE->getType()->isFloatTy())
+        GV.FloatVal = float(GV.IntVal.roundToDouble());
+      else if (CE->getType()->isDoubleTy())
+        GV.DoubleVal = GV.IntVal.roundToDouble();
+      else if (CE->getType()->isX86_FP80Ty()) {
+        APFloat apf = APFloat::getZero(APFloat::x87DoubleExtended);
+        (void)apf.convertFromAPInt(GV.IntVal,
+                                   false,
+                                   APFloat::rmNearestTiesToEven);
+        GV.IntVal = apf.bitcastToAPInt();
+      }
+      return GV;
+    }
+    case Instruction::SIToFP: {
+      GenericValue GV = getConstantValue(Op0);
+      if (CE->getType()->isFloatTy())
+        GV.FloatVal = float(GV.IntVal.signedRoundToDouble());
+      else if (CE->getType()->isDoubleTy())
+        GV.DoubleVal = GV.IntVal.signedRoundToDouble();
+      else if (CE->getType()->isX86_FP80Ty()) {
+        APFloat apf = APFloat::getZero(APFloat::x87DoubleExtended);
+        (void)apf.convertFromAPInt(GV.IntVal,
+                                   true,
+                                   APFloat::rmNearestTiesToEven);
+        GV.IntVal = apf.bitcastToAPInt();
+      }
+      return GV;
+    }
+    case Instruction::FPToUI: // double->APInt conversion handles sign
+    case Instruction::FPToSI: {
+      GenericValue GV = getConstantValue(Op0);
+      uint32_t BitWidth = cast<IntegerType>(CE->getType())->getBitWidth();
+      if (Op0->getType()->isFloatTy())
+        GV.IntVal = APIntOps::RoundFloatToAPInt(GV.FloatVal, BitWidth);
+      else if (Op0->getType()->isDoubleTy())
+        GV.IntVal = APIntOps::RoundDoubleToAPInt(GV.DoubleVal, BitWidth);
+      else if (Op0->getType()->isX86_FP80Ty()) {
+        APFloat apf = APFloat(GV.IntVal);
+        uint64_t v;
+        bool ignored;
+        (void)apf.convertToInteger(&v, BitWidth,
+                                   CE->getOpcode()==Instruction::FPToSI,
+                                   APFloat::rmTowardZero, &ignored);
+        GV.IntVal = v; // endian?
+      }
+      return GV;
+    }
+    case Instruction::PtrToInt: {
+      GenericValue GV = getConstantValue(Op0);
+      uint32_t PtrWidth = TD->getPointerSizeInBits();
+      GV.IntVal = APInt(PtrWidth, uintptr_t(GV.PointerVal));
+      return GV;
+    }
+    case Instruction::IntToPtr: {
+      GenericValue GV = getConstantValue(Op0);
+      uint32_t PtrWidth = TD->getPointerSizeInBits();
+      if (PtrWidth != GV.IntVal.getBitWidth())
+        GV.IntVal = GV.IntVal.zextOrTrunc(PtrWidth);
+      assert(GV.IntVal.getBitWidth() <= 64 && "Bad pointer width");
+      GV.PointerVal = PointerTy(uintptr_t(GV.IntVal.getZExtValue()));
+      return GV;
+    }
+    case Instruction::BitCast: {
+      GenericValue GV = getConstantValue(Op0);
+      const Type* DestTy = CE->getType();
+      switch (Op0->getType()->getTypeID()) {
+        default: llvm_unreachable("Invalid bitcast operand");
+        case Type::IntegerTyID:
+          assert(DestTy->isFloatingPointTy() && "invalid bitcast");
+          if (DestTy->isFloatTy())
+            GV.FloatVal = GV.IntVal.bitsToFloat();
+          else if (DestTy->isDoubleTy())
+            GV.DoubleVal = GV.IntVal.bitsToDouble();
+          break;
+        case Type::FloatTyID:
+          assert(DestTy->isIntegerTy(32) && "Invalid bitcast");
+          GV.IntVal = APInt::floatToBits(GV.FloatVal);
+          break;
+        case Type::DoubleTyID:
+          assert(DestTy->isIntegerTy(64) && "Invalid bitcast");
+          GV.IntVal = APInt::doubleToBits(GV.DoubleVal);
+          break;
+        case Type::PointerTyID:
+          assert(DestTy->isPointerTy() && "Invalid bitcast");
+          break; // getConstantValue(Op0)  above already converted it
+      }
+      return GV;
+    }
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor: {
+      GenericValue LHS = getConstantValue(Op0);
+      GenericValue RHS = getConstantValue(CE->getOperand(1));
+      GenericValue GV;
+      switch (CE->getOperand(0)->getType()->getTypeID()) {
+      default: llvm_unreachable("Bad add type!");
+      case Type::IntegerTyID:
+        switch (CE->getOpcode()) {
+          default: llvm_unreachable("Invalid integer opcode");
+          case Instruction::Add: GV.IntVal = LHS.IntVal + RHS.IntVal; break;
+          case Instruction::Sub: GV.IntVal = LHS.IntVal - RHS.IntVal; break;
+          case Instruction::Mul: GV.IntVal = LHS.IntVal * RHS.IntVal; break;
+          case Instruction::UDiv:GV.IntVal = LHS.IntVal.udiv(RHS.IntVal); break;
+          case Instruction::SDiv:GV.IntVal = LHS.IntVal.sdiv(RHS.IntVal); break;
+          case Instruction::URem:GV.IntVal = LHS.IntVal.urem(RHS.IntVal); break;
+          case Instruction::SRem:GV.IntVal = LHS.IntVal.srem(RHS.IntVal); break;
+          case Instruction::And: GV.IntVal = LHS.IntVal & RHS.IntVal; break;
+          case Instruction::Or:  GV.IntVal = LHS.IntVal | RHS.IntVal; break;
+          case Instruction::Xor: GV.IntVal = LHS.IntVal ^ RHS.IntVal; break;
+        }
+        break;
+      case Type::FloatTyID:
+        switch (CE->getOpcode()) {
+          default: llvm_unreachable("Invalid float opcode");
+          case Instruction::FAdd:
+            GV.FloatVal = LHS.FloatVal + RHS.FloatVal; break;
+          case Instruction::FSub:
+            GV.FloatVal = LHS.FloatVal - RHS.FloatVal; break;
+          case Instruction::FMul:
+            GV.FloatVal = LHS.FloatVal * RHS.FloatVal; break;
+          case Instruction::FDiv:
+            GV.FloatVal = LHS.FloatVal / RHS.FloatVal; break;
+          case Instruction::FRem:
+            GV.FloatVal = std::fmod(LHS.FloatVal,RHS.FloatVal); break;
+        }
+        break;
+      case Type::DoubleTyID:
+        switch (CE->getOpcode()) {
+          default: llvm_unreachable("Invalid double opcode");
+          case Instruction::FAdd:
+            GV.DoubleVal = LHS.DoubleVal + RHS.DoubleVal; break;
+          case Instruction::FSub:
+            GV.DoubleVal = LHS.DoubleVal - RHS.DoubleVal; break;
+          case Instruction::FMul:
+            GV.DoubleVal = LHS.DoubleVal * RHS.DoubleVal; break;
+          case Instruction::FDiv:
+            GV.DoubleVal = LHS.DoubleVal / RHS.DoubleVal; break;
+          case Instruction::FRem:
+            GV.DoubleVal = std::fmod(LHS.DoubleVal,RHS.DoubleVal); break;
+        }
+        break;
+      case Type::X86_FP80TyID:
+      case Type::PPC_FP128TyID:
+      case Type::FP128TyID: {
+        APFloat apfLHS = APFloat(LHS.IntVal);
+        switch (CE->getOpcode()) {
+          default: llvm_unreachable("Invalid long double opcode");
+          case Instruction::FAdd:
+            apfLHS.add(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+            GV.IntVal = apfLHS.bitcastToAPInt();
+            break;
+          case Instruction::FSub:
+            apfLHS.subtract(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+            GV.IntVal = apfLHS.bitcastToAPInt();
+            break;
+          case Instruction::FMul:
+            apfLHS.multiply(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+            GV.IntVal = apfLHS.bitcastToAPInt();
+            break;
+          case Instruction::FDiv:
+            apfLHS.divide(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+            GV.IntVal = apfLHS.bitcastToAPInt();
+            break;
+          case Instruction::FRem:
+            apfLHS.mod(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven);
+            GV.IntVal = apfLHS.bitcastToAPInt();
+            break;
+          }
+        }
+        break;
+      }
+      return GV;
+    }
+    default:
+      break;
+    }
+
+    SmallString<256> Msg;
+    raw_svector_ostream OS(Msg);
+    OS << "ConstantExpr not handled: " << *CE;
+    report_fatal_error(OS.str());
+  }
+
+  // Otherwise, we have a simple constant.
+  GenericValue Result;
+  switch (C->getType()->getTypeID()) {
+  case Type::FloatTyID:
+    Result.FloatVal = cast<ConstantFP>(C)->getValueAPF().convertToFloat();
+    break;
+  case Type::DoubleTyID:
+    Result.DoubleVal = cast<ConstantFP>(C)->getValueAPF().convertToDouble();
+    break;
+  case Type::X86_FP80TyID:
+  case Type::FP128TyID:
+  case Type::PPC_FP128TyID:
+    Result.IntVal = cast <ConstantFP>(C)->getValueAPF().bitcastToAPInt();
+    break;
+  case Type::IntegerTyID:
+    Result.IntVal = cast<ConstantInt>(C)->getValue();
+    break;
+  case Type::PointerTyID:
+    if (isa<ConstantPointerNull>(C))
+      Result.PointerVal = 0;
+    else if (const Function *F = dyn_cast<Function>(C))
+      Result = PTOGV(getPointerToFunctionOrStub(const_cast<Function*>(F)));
+    else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+      Result = PTOGV(getOrEmitGlobalVariable(const_cast<GlobalVariable*>(GV)));
+    else if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
+      Result = PTOGV(getPointerToBasicBlock(const_cast<BasicBlock*>(
+                                                        BA->getBasicBlock())));
+    else
+      llvm_unreachable("Unknown constant pointer type!");
+    break;
+  default:
+    SmallString<256> Msg;
+    raw_svector_ostream OS(Msg);
+    OS << "ERROR: Constant unimplemented for type: " << *C->getType();
+    report_fatal_error(OS.str());
+  }
+
+  return Result;
+}
+
+/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
+/// with the integer held in IntVal.
+static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
+                             unsigned StoreBytes) {
+  assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
+  uint8_t *Src = (uint8_t *)IntVal.getRawData();
+
+  if (sys::isLittleEndianHost()) {
+    // Little-endian host - the source is ordered from LSB to MSB.  Order the
+    // destination from LSB to MSB: Do a straight copy.
+    memcpy(Dst, Src, StoreBytes);
+  } else {
+    // Big-endian host - the source is an array of 64 bit words ordered from
+    // LSW to MSW.  Each word is ordered from MSB to LSB.  Order the destination
+    // from MSB to LSB: Reverse the word order, but not the bytes in a word.
+    while (StoreBytes > sizeof(uint64_t)) {
+      StoreBytes -= sizeof(uint64_t);
+      // May not be aligned so use memcpy.
+      memcpy(Dst + StoreBytes, Src, sizeof(uint64_t));
+      Src += sizeof(uint64_t);
+    }
+
+    memcpy(Dst, Src + sizeof(uint64_t) - StoreBytes, StoreBytes);
+  }
+}
+
+void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
+                                         GenericValue *Ptr, const Type *Ty) {
+  const unsigned StoreBytes = getTargetData()->getTypeStoreSize(Ty);
+
+  switch (Ty->getTypeID()) {
+  case Type::IntegerTyID:
+    StoreIntToMemory(Val.IntVal, (uint8_t*)Ptr, StoreBytes);
+    break;
+  case Type::FloatTyID:
+    *((float*)Ptr) = Val.FloatVal;
+    break;
+  case Type::DoubleTyID:
+    *((double*)Ptr) = Val.DoubleVal;
+    break;
+  case Type::X86_FP80TyID:
+    memcpy(Ptr, Val.IntVal.getRawData(), 10);
+    break;
+  case Type::PointerTyID:
+    // Ensure 64 bit target pointers are fully initialized on 32 bit hosts.
+    if (StoreBytes != sizeof(PointerTy))
+      memset(Ptr, 0, StoreBytes);
+
+    *((PointerTy*)Ptr) = Val.PointerVal;
+    break;
+  default:
+    dbgs() << "Cannot store value of type " << *Ty << "!\n";
+  }
+
+  if (sys::isLittleEndianHost() != getTargetData()->isLittleEndian())
+    // Host and target are different endian - reverse the stored bytes.
+    std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr);
+}
+
+/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
+/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
+static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
+  assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
+  uint8_t *Dst = (uint8_t *)IntVal.getRawData();
+
+  if (sys::isLittleEndianHost())
+    // Little-endian host - the destination must be ordered from LSB to MSB.
+    // The source is ordered from LSB to MSB: Do a straight copy.
+    memcpy(Dst, Src, LoadBytes);
+  else {
+    // Big-endian - the destination is an array of 64 bit words ordered from
+    // LSW to MSW.  Each word must be ordered from MSB to LSB.  The source is
+    // ordered from MSB to LSB: Reverse the word order, but not the bytes in
+    // a word.
+    while (LoadBytes > sizeof(uint64_t)) {
+      LoadBytes -= sizeof(uint64_t);
+      // May not be aligned so use memcpy.
+      memcpy(Dst, Src + LoadBytes, sizeof(uint64_t));
+      Dst += sizeof(uint64_t);
+    }
+
+    memcpy(Dst + sizeof(uint64_t) - LoadBytes, Src, LoadBytes);
+  }
+}
+
+/// FIXME: document
+///
+void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
+                                          GenericValue *Ptr,
+                                          const Type *Ty) {
+  const unsigned LoadBytes = getTargetData()->getTypeStoreSize(Ty);
+
+  switch (Ty->getTypeID()) {
+  case Type::IntegerTyID:
+    // An APInt with all words initially zero.
+    Result.IntVal = APInt(cast<IntegerType>(Ty)->getBitWidth(), 0);
+    LoadIntFromMemory(Result.IntVal, (uint8_t*)Ptr, LoadBytes);
+    break;
+  case Type::FloatTyID:
+    Result.FloatVal = *((float*)Ptr);
+    break;
+  case Type::DoubleTyID:
+    Result.DoubleVal = *((double*)Ptr);
+    break;
+  case Type::PointerTyID:
+    Result.PointerVal = *((PointerTy*)Ptr);
+    break;
+  case Type::X86_FP80TyID: {
+    // This is endian dependent, but it will only work on x86 anyway.
+    // FIXME: Will not trap if loading a signaling NaN.
+    uint64_t y[2];
+    memcpy(y, Ptr, 10);
+    Result.IntVal = APInt(80, 2, y);
+    break;
+  }
+  default:
+    SmallString<256> Msg;
+    raw_svector_ostream OS(Msg);
+    OS << "Cannot load value of type " << *Ty << "!";
+    report_fatal_error(OS.str());
+  }
+}
+
+void ExecutionEngine::InitializeMemory(const Constant *Init, void *Addr) {
+  DEBUG(dbgs() << "JIT: Initializing " << Addr << " ");
+  DEBUG(Init->dump());
+  if (isa<UndefValue>(Init)) {
+    return;
+  } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(Init)) {
+    unsigned ElementSize =
+      getTargetData()->getTypeAllocSize(CP->getType()->getElementType());
+    for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
+      InitializeMemory(CP->getOperand(i), (char*)Addr+i*ElementSize);
+    return;
+  } else if (isa<ConstantAggregateZero>(Init)) {
+    memset(Addr, 0, (size_t)getTargetData()->getTypeAllocSize(Init->getType()));
+    return;
+  } else if (const ConstantArray *CPA = dyn_cast<ConstantArray>(Init)) {
+    unsigned ElementSize =
+      getTargetData()->getTypeAllocSize(CPA->getType()->getElementType());
+    for (unsigned i = 0, e = CPA->getNumOperands(); i != e; ++i)
+      InitializeMemory(CPA->getOperand(i), (char*)Addr+i*ElementSize);
+    return;
+  } else if (const ConstantStruct *CPS = dyn_cast<ConstantStruct>(Init)) {
+    const StructLayout *SL =
+      getTargetData()->getStructLayout(cast<StructType>(CPS->getType()));
+    for (unsigned i = 0, e = CPS->getNumOperands(); i != e; ++i)
+      InitializeMemory(CPS->getOperand(i), (char*)Addr+SL->getElementOffset(i));
+    return;
+  } else if (Init->getType()->isFirstClassType()) {
+    GenericValue Val = getConstantValue(Init);
+    StoreValueToMemory(Val, (GenericValue*)Addr, Init->getType());
+    return;
+  }
+
+  DEBUG(dbgs() << "Bad Type: " << *Init->getType() << "\n");
+  llvm_unreachable("Unknown constant type to initialize memory with!");
+}
+
+/// EmitGlobals - Emit all of the global variables to memory, storing their
+/// addresses into GlobalAddress.  This must make sure to copy the contents of
+/// their initializers into the memory.
+void ExecutionEngine::emitGlobals() {
+  // Loop over all of the global variables in the program, allocating the memory
+  // to hold them.  If there is more than one module, do a prepass over globals
+  // to figure out how the different modules should link together.
+  std::map<std::pair<std::string, const Type*>,
+           const GlobalValue*> LinkedGlobalsMap;
+
+  if (Modules.size() != 1) {
+    for (unsigned m = 0, e = Modules.size(); m != e; ++m) {
+      Module &M = *Modules[m];
+      for (Module::const_global_iterator I = M.global_begin(),
+           E = M.global_end(); I != E; ++I) {
+        const GlobalValue *GV = I;
+        if (GV->hasLocalLinkage() || GV->isDeclaration() ||
+            GV->hasAppendingLinkage() || !GV->hasName())
+          continue;// Ignore external globals and globals with internal linkage.
+
+        const GlobalValue *&GVEntry =
+          LinkedGlobalsMap[std::make_pair(GV->getName(), GV->getType())];
+
+        // If this is the first time we've seen this global, it is the canonical
+        // version.
+        if (!GVEntry) {
+          GVEntry = GV;
+          continue;
+        }
+
+        // If the existing global is strong, never replace it.
+        if (GVEntry->hasExternalLinkage() ||
+            GVEntry->hasDLLImportLinkage() ||
+            GVEntry->hasDLLExportLinkage())
+          continue;
+
+        // Otherwise, we know it's linkonce/weak, replace it if this is a strong
+        // symbol.  FIXME is this right for common?
+        if (GV->hasExternalLinkage() || GVEntry->hasExternalWeakLinkage())
+          GVEntry = GV;
+      }
+    }
+  }
+
+  std::vector<const GlobalValue*> NonCanonicalGlobals;
+  for (unsigned m = 0, e = Modules.size(); m != e; ++m) {
+    Module &M = *Modules[m];
+    for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+         I != E; ++I) {
+      // In the multi-module case, see what this global maps to.
+      if (!LinkedGlobalsMap.empty()) {
+        if (const GlobalValue *GVEntry =
+              LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())]) {
+          // If something else is the canonical global, ignore this one.
+          if (GVEntry != &*I) {
+            NonCanonicalGlobals.push_back(I);
+            continue;
+          }
+        }
+      }
+
+      if (!I->isDeclaration()) {
+        addGlobalMapping(I, getMemoryForGV(I));
+      } else {
+        // External variable reference. Try to use the dynamic loader to
+        // get a pointer to it.
+        if (void *SymAddr =
+            sys::DynamicLibrary::SearchForAddressOfSymbol(I->getName()))
+          addGlobalMapping(I, SymAddr);
+        else {
+          report_fatal_error("Could not resolve external global address: "
+                            +I->getName());
+        }
+      }
+    }
+
+    // If there are multiple modules, map the non-canonical globals to their
+    // canonical location.
+    if (!NonCanonicalGlobals.empty()) {
+      for (unsigned i = 0, e = NonCanonicalGlobals.size(); i != e; ++i) {
+        const GlobalValue *GV = NonCanonicalGlobals[i];
+        const GlobalValue *CGV =
+          LinkedGlobalsMap[std::make_pair(GV->getName(), GV->getType())];
+        void *Ptr = getPointerToGlobalIfAvailable(CGV);
+        assert(Ptr && "Canonical global wasn't codegen'd!");
+        addGlobalMapping(GV, Ptr);
+      }
+    }
+
+    // Now that all of the globals are set up in memory, loop through them all
+    // and initialize their contents.
+    for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+         I != E; ++I) {
+      if (!I->isDeclaration()) {
+        if (!LinkedGlobalsMap.empty()) {
+          if (const GlobalValue *GVEntry =
+                LinkedGlobalsMap[std::make_pair(I->getName(), I->getType())])
+            if (GVEntry != &*I)  // Not the canonical variable.
+              continue;
+        }
+        EmitGlobalVariable(I);
+      }
+    }
+  }
+}
+
+// EmitGlobalVariable - This method emits the specified global variable to the
+// address specified in GlobalAddresses, or allocates new memory if it's not
+// already in the map.
+void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) {
+  void *GA = getPointerToGlobalIfAvailable(GV);
+
+  if (GA == 0) {
+    // If it's not already specified, allocate memory for the global.
+    GA = getMemoryForGV(GV);
+    addGlobalMapping(GV, GA);
+  }
+
+  // Don't initialize if it's thread local, let the client do it.
+  if (!GV->isThreadLocal())
+    InitializeMemory(GV->getInitializer(), GA);
+
+  const Type *ElTy = GV->getType()->getElementType();
+  size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy);
+  NumInitBytes += (unsigned)GVSize;
+  ++NumGlobals;
+}
+
+ExecutionEngineState::ExecutionEngineState(ExecutionEngine &EE)
+  : EE(EE), GlobalAddressMap(this) {
+}
+
+sys::Mutex *
+ExecutionEngineState::AddressMapConfig::getMutex(ExecutionEngineState *EES) {
+  return &EES->EE.lock;
+}
+
+void ExecutionEngineState::AddressMapConfig::onDelete(ExecutionEngineState *EES,
+                                                      const GlobalValue *Old) {
+  void *OldVal = EES->GlobalAddressMap.lookup(Old);
+  EES->GlobalAddressReverseMap.erase(OldVal);
+}
+
+void ExecutionEngineState::AddressMapConfig::onRAUW(ExecutionEngineState *,
+                                                    const GlobalValue *,
+                                                    const GlobalValue *) {
+  assert(false && "The ExecutionEngine doesn't know how to handle a"
+         " RAUW on a value it has a global mapping for.");
+}
diff --git a/final/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/final/lib/ExecutionEngine/ExecutionEngineBindings.cpp
new file mode 100644
index 00000000000..f8f1f4a78ee
--- /dev/null
+++ b/final/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -0,0 +1,254 @@
+//===-- ExecutionEngineBindings.cpp - C bindings for EEs ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the C bindings for the ExecutionEngine library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "llvm-c/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstring>
+
+using namespace llvm;
+
+/*===-- Operations on generic values --------------------------------------===*/
+
+LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty,
+                                                unsigned long long N,
+                                                LLVMBool IsSigned) {
+  GenericValue *GenVal = new GenericValue();
+  GenVal->IntVal = APInt(unwrap<IntegerType>(Ty)->getBitWidth(), N, IsSigned);
+  return wrap(GenVal);
+}
+
+LLVMGenericValueRef LLVMCreateGenericValueOfPointer(void *P) {
+  GenericValue *GenVal = new GenericValue();
+  GenVal->PointerVal = P;
+  return wrap(GenVal);
+}
+
+LLVMGenericValueRef LLVMCreateGenericValueOfFloat(LLVMTypeRef TyRef, double N) {
+  GenericValue *GenVal = new GenericValue();
+  switch (unwrap(TyRef)->getTypeID()) {
+  case Type::FloatTyID:
+    GenVal->FloatVal = N;
+    break;
+  case Type::DoubleTyID:
+    GenVal->DoubleVal = N;
+    break;
+  default:
+    llvm_unreachable("LLVMGenericValueToFloat supports only float and double.");
+  }
+  return wrap(GenVal);
+}
+
+unsigned LLVMGenericValueIntWidth(LLVMGenericValueRef GenValRef) {
+  return unwrap(GenValRef)->IntVal.getBitWidth();
+}
+
+unsigned long long LLVMGenericValueToInt(LLVMGenericValueRef GenValRef,
+                                         LLVMBool IsSigned) {
+  GenericValue *GenVal = unwrap(GenValRef);
+  if (IsSigned)
+    return GenVal->IntVal.getSExtValue();
+  else
+    return GenVal->IntVal.getZExtValue();
+}
+
+void *LLVMGenericValueToPointer(LLVMGenericValueRef GenVal) {
+  return unwrap(GenVal)->PointerVal;
+}
+
+double LLVMGenericValueToFloat(LLVMTypeRef TyRef, LLVMGenericValueRef GenVal) {
+  switch (unwrap(TyRef)->getTypeID()) {
+  case Type::FloatTyID:
+    return unwrap(GenVal)->FloatVal;
+  case Type::DoubleTyID:
+    return unwrap(GenVal)->DoubleVal;
+  default:
+    llvm_unreachable("LLVMGenericValueToFloat supports only float and double.");
+    break;
+  }
+  return 0; // Not reached
+}
+
+void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal) {
+  delete unwrap(GenVal);
+}
+
+/*===-- Operations on execution engines -----------------------------------===*/
+
+LLVMBool LLVMCreateExecutionEngineForModule(LLVMExecutionEngineRef *OutEE,
+                                            LLVMModuleRef M,
+                                            char **OutError) {
+  std::string Error;
+  EngineBuilder builder(unwrap(M));
+  builder.setEngineKind(EngineKind::Either)
+         .setErrorStr(&Error);
+  if (ExecutionEngine *EE = builder.create()){
+    *OutEE = wrap(EE);
+    return 0;
+  }
+  *OutError = strdup(Error.c_str());
+  return 1;
+}
+
+LLVMBool LLVMCreateInterpreterForModule(LLVMExecutionEngineRef *OutInterp,
+                                        LLVMModuleRef M,
+                                        char **OutError) {
+  std::string Error;
+  EngineBuilder builder(unwrap(M));
+  builder.setEngineKind(EngineKind::Interpreter)
+         .setErrorStr(&Error);
+  if (ExecutionEngine *Interp = builder.create()) {
+    *OutInterp = wrap(Interp);
+    return 0;
+  }
+  *OutError = strdup(Error.c_str());
+  return 1;
+}
+
+LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
+                                        LLVMModuleRef M,
+                                        unsigned OptLevel,
+                                        char **OutError) {
+  std::string Error;
+  EngineBuilder builder(unwrap(M));
+  builder.setEngineKind(EngineKind::JIT)
+         .setErrorStr(&Error)
+         .setOptLevel((CodeGenOpt::Level)OptLevel);
+  if (ExecutionEngine *JIT = builder.create()) {
+    *OutJIT = wrap(JIT);
+    return 0;
+  }
+  *OutError = strdup(Error.c_str());
+  return 1;
+}
+
+LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
+                                   LLVMModuleProviderRef MP,
+                                   char **OutError) {
+  /* The module provider is now actually a module. */
+  return LLVMCreateExecutionEngineForModule(OutEE,
+                                            reinterpret_cast<LLVMModuleRef>(MP),
+                                            OutError);
+}
+
+LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
+                               LLVMModuleProviderRef MP,
+                               char **OutError) {
+  /* The module provider is now actually a module. */
+  return LLVMCreateInterpreterForModule(OutInterp,
+                                        reinterpret_cast<LLVMModuleRef>(MP),
+                                        OutError);
+}
+
+LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
+                               LLVMModuleProviderRef MP,
+                               unsigned OptLevel,
+                               char **OutError) {
+  /* The module provider is now actually a module. */
+  return LLVMCreateJITCompilerForModule(OutJIT,
+                                        reinterpret_cast<LLVMModuleRef>(MP),
+                                        OptLevel, OutError);
+}
+
+
+void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE) {
+  delete unwrap(EE);
+}
+
+void LLVMRunStaticConstructors(LLVMExecutionEngineRef EE) {
+  unwrap(EE)->runStaticConstructorsDestructors(false);
+}
+
+void LLVMRunStaticDestructors(LLVMExecutionEngineRef EE) {
+  unwrap(EE)->runStaticConstructorsDestructors(true);
+}
+
+int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F,
+                          unsigned ArgC, const char * const *ArgV,
+                          const char * const *EnvP) {
+  std::vector<std::string> ArgVec;
+  for (unsigned I = 0; I != ArgC; ++I)
+    ArgVec.push_back(ArgV[I]);
+  
+  return unwrap(EE)->runFunctionAsMain(unwrap<Function>(F), ArgVec, EnvP);
+}
+
+LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F,
+                                    unsigned NumArgs,
+                                    LLVMGenericValueRef *Args) {
+  std::vector<GenericValue> ArgVec;
+  ArgVec.reserve(NumArgs);
+  for (unsigned I = 0; I != NumArgs; ++I)
+    ArgVec.push_back(*unwrap(Args[I]));
+  
+  GenericValue *Result = new GenericValue();
+  *Result = unwrap(EE)->runFunction(unwrap<Function>(F), ArgVec);
+  return wrap(Result);
+}
+
+void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F) {
+  unwrap(EE)->freeMachineCodeForFunction(unwrap<Function>(F));
+}
+
+void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M){
+  unwrap(EE)->addModule(unwrap(M));
+}
+
+void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){
+  /* The module provider is now actually a module. */
+  LLVMAddModule(EE, reinterpret_cast<LLVMModuleRef>(MP));
+}
+
+LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M,
+                          LLVMModuleRef *OutMod, char **OutError) {
+  Module *Mod = unwrap(M);
+  unwrap(EE)->removeModule(Mod);
+  *OutMod = wrap(Mod);
+  return 0;
+}
+
+LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
+                                  LLVMModuleProviderRef MP,
+                                  LLVMModuleRef *OutMod, char **OutError) {
+  /* The module provider is now actually a module. */
+  return LLVMRemoveModule(EE, reinterpret_cast<LLVMModuleRef>(MP), OutMod,
+                          OutError);
+}
+
+LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
+                          LLVMValueRef *OutFn) {
+  if (Function *F = unwrap(EE)->FindFunctionNamed(Name)) {
+    *OutFn = wrap(F);
+    return 0;
+  }
+  return 1;
+}
+
+void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn) {
+  return unwrap(EE)->recompileAndRelinkFunction(unwrap<Function>(Fn));
+}
+
+LLVMTargetDataRef LLVMGetExecutionEngineTargetData(LLVMExecutionEngineRef EE) {
+  return wrap(unwrap(EE)->getTargetData());
+}
+
+void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
+                          void* Addr) {
+  unwrap(EE)->addGlobalMapping(unwrap<GlobalValue>(Global), Addr);
+}
+
+void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global) {
+  return unwrap(EE)->getPointerToGlobal(unwrap<GlobalValue>(Global));
+}
diff --git a/final/lib/ExecutionEngine/Interpreter/CMakeLists.txt b/final/lib/ExecutionEngine/Interpreter/CMakeLists.txt
new file mode 100644
index 00000000000..d331f830b62
--- /dev/null
+++ b/final/lib/ExecutionEngine/Interpreter/CMakeLists.txt
@@ -0,0 +1,17 @@
+# Make sure that the path to libffi headers is on the command
+# line. That path can be a compiler's non-default path even when
+# FFI_INCLUDE_DIR was not used, because cmake has its own paths for
+# searching for headers (CMAKE_SYSTEM_INCLUDE_PATH, for instance):
+if( FFI_INCLUDE_PATH )
+  include_directories( ${FFI_INCLUDE_PATH} )
+endif()
+
+add_llvm_library(LLVMInterpreter
+  Execution.cpp
+  ExternalFunctions.cpp
+  Interpreter.cpp
+  )
+
+if( LLVM_ENABLE_FFI )
+  target_link_libraries( LLVMInterpreter ${FFI_LIBRARY_PATH} )
+endif()
diff --git a/final/lib/ExecutionEngine/Interpreter/Execution.cpp b/final/lib/ExecutionEngine/Interpreter/Execution.cpp
new file mode 100644
index 00000000000..498063bf655
--- /dev/null
+++ b/final/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -0,0 +1,1350 @@
+//===-- Execution.cpp - Implement code to simulate the program ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file contains the actual instruction interpreter.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "interpreter"
+#include "Interpreter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cmath>
+using namespace llvm;
+
+STATISTIC(NumDynamicInsts, "Number of dynamic instructions executed");
+
+static cl::opt<bool> PrintVolatile("interpreter-print-volatile", cl::Hidden,
+          cl::desc("make the interpreter print every volatile load and store"));
+
+//===----------------------------------------------------------------------===//
+//                     Various Helper Functions
+//===----------------------------------------------------------------------===//
+
+static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF) {
+  SF.Values[V] = Val;
+}
+
+//===----------------------------------------------------------------------===//
+//                    Binary Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+#define IMPLEMENT_BINARY_OPERATOR(OP, TY) \
+   case Type::TY##TyID: \
+     Dest.TY##Val = Src1.TY##Val OP Src2.TY##Val; \
+     break
+
+static void executeFAddInst(GenericValue &Dest, GenericValue Src1,
+                            GenericValue Src2, const Type *Ty) {
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_BINARY_OPERATOR(+, Float);
+    IMPLEMENT_BINARY_OPERATOR(+, Double);
+  default:
+    dbgs() << "Unhandled type for FAdd instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+}
+
+static void executeFSubInst(GenericValue &Dest, GenericValue Src1,
+                            GenericValue Src2, const Type *Ty) {
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_BINARY_OPERATOR(-, Float);
+    IMPLEMENT_BINARY_OPERATOR(-, Double);
+  default:
+    dbgs() << "Unhandled type for FSub instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+}
+
+static void executeFMulInst(GenericValue &Dest, GenericValue Src1,
+                            GenericValue Src2, const Type *Ty) {
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_BINARY_OPERATOR(*, Float);
+    IMPLEMENT_BINARY_OPERATOR(*, Double);
+  default:
+    dbgs() << "Unhandled type for FMul instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+}
+
+static void executeFDivInst(GenericValue &Dest, GenericValue Src1, 
+                            GenericValue Src2, const Type *Ty) {
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_BINARY_OPERATOR(/, Float);
+    IMPLEMENT_BINARY_OPERATOR(/, Double);
+  default:
+    dbgs() << "Unhandled type for FDiv instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+}
+
+static void executeFRemInst(GenericValue &Dest, GenericValue Src1, 
+                            GenericValue Src2, const Type *Ty) {
+  switch (Ty->getTypeID()) {
+  case Type::FloatTyID:
+    Dest.FloatVal = fmod(Src1.FloatVal, Src2.FloatVal);
+    break;
+  case Type::DoubleTyID:
+    Dest.DoubleVal = fmod(Src1.DoubleVal, Src2.DoubleVal);
+    break;
+  default:
+    dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+}
+
+#define IMPLEMENT_INTEGER_ICMP(OP, TY) \
+   case Type::IntegerTyID:  \
+      Dest.IntVal = APInt(1,Src1.IntVal.OP(Src2.IntVal)); \
+      break;
+
+// Handle pointers specially because they must be compared with only as much
+// width as the host has.  We _do not_ want to be comparing 64 bit values when
+// running on a 32-bit target, otherwise the upper 32 bits might mess up
+// comparisons if they contain garbage.
+#define IMPLEMENT_POINTER_ICMP(OP) \
+   case Type::PointerTyID: \
+      Dest.IntVal = APInt(1,(void*)(intptr_t)Src1.PointerVal OP \
+                            (void*)(intptr_t)Src2.PointerVal); \
+      break;
+
+static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(eq,Ty);
+    IMPLEMENT_POINTER_ICMP(==);
+  default:
+    dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(ne,Ty);
+    IMPLEMENT_POINTER_ICMP(!=);
+  default:
+    dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(ult,Ty);
+    IMPLEMENT_POINTER_ICMP(<);
+  default:
+    dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(slt,Ty);
+    IMPLEMENT_POINTER_ICMP(<);
+  default:
+    dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(ugt,Ty);
+    IMPLEMENT_POINTER_ICMP(>);
+  default:
+    dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(sgt,Ty);
+    IMPLEMENT_POINTER_ICMP(>);
+  default:
+    dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(ule,Ty);
+    IMPLEMENT_POINTER_ICMP(<=);
+  default:
+    dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(sle,Ty);
+    IMPLEMENT_POINTER_ICMP(<=);
+  default:
+    dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(uge,Ty);
+    IMPLEMENT_POINTER_ICMP(>=);
+  default:
+    dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_INTEGER_ICMP(sge,Ty);
+    IMPLEMENT_POINTER_ICMP(>=);
+  default:
+    dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+void Interpreter::visitICmpInst(ICmpInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  const Type *Ty    = I.getOperand(0)->getType();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue R;   // Result
+  
+  switch (I.getPredicate()) {
+  case ICmpInst::ICMP_EQ:  R = executeICMP_EQ(Src1,  Src2, Ty); break;
+  case ICmpInst::ICMP_NE:  R = executeICMP_NE(Src1,  Src2, Ty); break;
+  case ICmpInst::ICMP_ULT: R = executeICMP_ULT(Src1, Src2, Ty); break;
+  case ICmpInst::ICMP_SLT: R = executeICMP_SLT(Src1, Src2, Ty); break;
+  case ICmpInst::ICMP_UGT: R = executeICMP_UGT(Src1, Src2, Ty); break;
+  case ICmpInst::ICMP_SGT: R = executeICMP_SGT(Src1, Src2, Ty); break;
+  case ICmpInst::ICMP_ULE: R = executeICMP_ULE(Src1, Src2, Ty); break;
+  case ICmpInst::ICMP_SLE: R = executeICMP_SLE(Src1, Src2, Ty); break;
+  case ICmpInst::ICMP_UGE: R = executeICMP_UGE(Src1, Src2, Ty); break;
+  case ICmpInst::ICMP_SGE: R = executeICMP_SGE(Src1, Src2, Ty); break;
+  default:
+    dbgs() << "Don't know how to handle this ICmp predicate!\n-->" << I;
+    llvm_unreachable(0);
+  }
+ 
+  SetValue(&I, R, SF);
+}
+
+#define IMPLEMENT_FCMP(OP, TY) \
+   case Type::TY##TyID: \
+     Dest.IntVal = APInt(1,Src1.TY##Val OP Src2.TY##Val); \
+     break
+
+static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_FCMP(==, Float);
+    IMPLEMENT_FCMP(==, Double);
+  default:
+    dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_FCMP(!=, Float);
+    IMPLEMENT_FCMP(!=, Double);
+
+  default:
+    dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_FCMP(<=, Float);
+    IMPLEMENT_FCMP(<=, Double);
+  default:
+    dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_FCMP(>=, Float);
+    IMPLEMENT_FCMP(>=, Double);
+  default:
+    dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_FCMP(<, Float);
+    IMPLEMENT_FCMP(<, Double);
+  default:
+    dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
+                                     const Type *Ty) {
+  GenericValue Dest;
+  switch (Ty->getTypeID()) {
+    IMPLEMENT_FCMP(>, Float);
+    IMPLEMENT_FCMP(>, Double);
+  default:
+    dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+  return Dest;
+}
+
+#define IMPLEMENT_UNORDERED(TY, X,Y)                                     \
+  if (TY->isFloatTy()) {                                                 \
+    if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) {          \
+      Dest.IntVal = APInt(1,true);                                       \
+      return Dest;                                                       \
+    }                                                                    \
+  } else if (X.DoubleVal != X.DoubleVal || Y.DoubleVal != Y.DoubleVal) { \
+    Dest.IntVal = APInt(1,true);                                         \
+    return Dest;                                                         \
+  }
+
+
+static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  return executeFCMP_OEQ(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_UNE(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  return executeFCMP_ONE(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_ULE(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  return executeFCMP_OLE(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_UGE(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  return executeFCMP_OGE(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_ULT(GenericValue Src1, GenericValue Src2,
+                                   const Type *Ty) {
+  GenericValue Dest;
+  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  return executeFCMP_OLT(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2,
+                                     const Type *Ty) {
+  GenericValue Dest;
+  IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  return executeFCMP_OGT(Src1, Src2, Ty);
+}
+
+static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
+                                     const Type *Ty) {
+  GenericValue Dest;
+  if (Ty->isFloatTy())
+    Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal && 
+                           Src2.FloatVal == Src2.FloatVal));
+  else
+    Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal && 
+                           Src2.DoubleVal == Src2.DoubleVal));
+  return Dest;
+}
+
+static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
+                                     const Type *Ty) {
+  GenericValue Dest;
+  if (Ty->isFloatTy())
+    Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal || 
+                           Src2.FloatVal != Src2.FloatVal));
+  else
+    Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal || 
+                           Src2.DoubleVal != Src2.DoubleVal));
+  return Dest;
+}
+
+void Interpreter::visitFCmpInst(FCmpInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  const Type *Ty    = I.getOperand(0)->getType();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue R;   // Result
+  
+  switch (I.getPredicate()) {
+  case FCmpInst::FCMP_FALSE: R.IntVal = APInt(1,false); break;
+  case FCmpInst::FCMP_TRUE:  R.IntVal = APInt(1,true); break;
+  case FCmpInst::FCMP_ORD:   R = executeFCMP_ORD(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_UNO:   R = executeFCMP_UNO(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_UEQ:   R = executeFCMP_UEQ(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_OEQ:   R = executeFCMP_OEQ(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_UNE:   R = executeFCMP_UNE(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_ONE:   R = executeFCMP_ONE(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_ULT:   R = executeFCMP_ULT(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_OLT:   R = executeFCMP_OLT(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_UGT:   R = executeFCMP_UGT(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_OGT:   R = executeFCMP_OGT(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_ULE:   R = executeFCMP_ULE(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_OLE:   R = executeFCMP_OLE(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_UGE:   R = executeFCMP_UGE(Src1, Src2, Ty); break;
+  case FCmpInst::FCMP_OGE:   R = executeFCMP_OGE(Src1, Src2, Ty); break;
+  default:
+    dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
+    llvm_unreachable(0);
+  }
+ 
+  SetValue(&I, R, SF);
+}
+
+static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1, 
+                                   GenericValue Src2, const Type *Ty) {
+  GenericValue Result;
+  switch (predicate) {
+  case ICmpInst::ICMP_EQ:    return executeICMP_EQ(Src1, Src2, Ty);
+  case ICmpInst::ICMP_NE:    return executeICMP_NE(Src1, Src2, Ty);
+  case ICmpInst::ICMP_UGT:   return executeICMP_UGT(Src1, Src2, Ty);
+  case ICmpInst::ICMP_SGT:   return executeICMP_SGT(Src1, Src2, Ty);
+  case ICmpInst::ICMP_ULT:   return executeICMP_ULT(Src1, Src2, Ty);
+  case ICmpInst::ICMP_SLT:   return executeICMP_SLT(Src1, Src2, Ty);
+  case ICmpInst::ICMP_UGE:   return executeICMP_UGE(Src1, Src2, Ty);
+  case ICmpInst::ICMP_SGE:   return executeICMP_SGE(Src1, Src2, Ty);
+  case ICmpInst::ICMP_ULE:   return executeICMP_ULE(Src1, Src2, Ty);
+  case ICmpInst::ICMP_SLE:   return executeICMP_SLE(Src1, Src2, Ty);
+  case FCmpInst::FCMP_ORD:   return executeFCMP_ORD(Src1, Src2, Ty);
+  case FCmpInst::FCMP_UNO:   return executeFCMP_UNO(Src1, Src2, Ty);
+  case FCmpInst::FCMP_OEQ:   return executeFCMP_OEQ(Src1, Src2, Ty);
+  case FCmpInst::FCMP_UEQ:   return executeFCMP_UEQ(Src1, Src2, Ty);
+  case FCmpInst::FCMP_ONE:   return executeFCMP_ONE(Src1, Src2, Ty);
+  case FCmpInst::FCMP_UNE:   return executeFCMP_UNE(Src1, Src2, Ty);
+  case FCmpInst::FCMP_OLT:   return executeFCMP_OLT(Src1, Src2, Ty);
+  case FCmpInst::FCMP_ULT:   return executeFCMP_ULT(Src1, Src2, Ty);
+  case FCmpInst::FCMP_OGT:   return executeFCMP_OGT(Src1, Src2, Ty);
+  case FCmpInst::FCMP_UGT:   return executeFCMP_UGT(Src1, Src2, Ty);
+  case FCmpInst::FCMP_OLE:   return executeFCMP_OLE(Src1, Src2, Ty);
+  case FCmpInst::FCMP_ULE:   return executeFCMP_ULE(Src1, Src2, Ty);
+  case FCmpInst::FCMP_OGE:   return executeFCMP_OGE(Src1, Src2, Ty);
+  case FCmpInst::FCMP_UGE:   return executeFCMP_UGE(Src1, Src2, Ty);
+  case FCmpInst::FCMP_FALSE: { 
+    GenericValue Result;
+    Result.IntVal = APInt(1, false);
+    return Result;
+  }
+  case FCmpInst::FCMP_TRUE: {
+    GenericValue Result;
+    Result.IntVal = APInt(1, true);
+    return Result;
+  }
+  default:
+    dbgs() << "Unhandled Cmp predicate\n";
+    llvm_unreachable(0);
+  }
+}
+
+void Interpreter::visitBinaryOperator(BinaryOperator &I) {
+  ExecutionContext &SF = ECStack.back();
+  const Type *Ty    = I.getOperand(0)->getType();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue R;   // Result
+
+  switch (I.getOpcode()) {
+  case Instruction::Add:   R.IntVal = Src1.IntVal + Src2.IntVal; break;
+  case Instruction::Sub:   R.IntVal = Src1.IntVal - Src2.IntVal; break;
+  case Instruction::Mul:   R.IntVal = Src1.IntVal * Src2.IntVal; break;
+  case Instruction::FAdd:  executeFAddInst(R, Src1, Src2, Ty); break;
+  case Instruction::FSub:  executeFSubInst(R, Src1, Src2, Ty); break;
+  case Instruction::FMul:  executeFMulInst(R, Src1, Src2, Ty); break;
+  case Instruction::FDiv:  executeFDivInst(R, Src1, Src2, Ty); break;
+  case Instruction::FRem:  executeFRemInst(R, Src1, Src2, Ty); break;
+  case Instruction::UDiv:  R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
+  case Instruction::SDiv:  R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
+  case Instruction::URem:  R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
+  case Instruction::SRem:  R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
+  case Instruction::And:   R.IntVal = Src1.IntVal & Src2.IntVal; break;
+  case Instruction::Or:    R.IntVal = Src1.IntVal | Src2.IntVal; break;
+  case Instruction::Xor:   R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
+  default:
+    dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+    llvm_unreachable(0);
+  }
+
+  SetValue(&I, R, SF);
+}
+
+static GenericValue executeSelectInst(GenericValue Src1, GenericValue Src2,
+                                      GenericValue Src3) {
+  return Src1.IntVal == 0 ? Src3 : Src2;
+}
+
+void Interpreter::visitSelectInst(SelectInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue Src3 = getOperandValue(I.getOperand(2), SF);
+  GenericValue R = executeSelectInst(Src1, Src2, Src3);
+  SetValue(&I, R, SF);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                     Terminator Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+void Interpreter::exitCalled(GenericValue GV) {
+  // runAtExitHandlers() assumes there are no stack frames, but
+  // if exit() was called, then it had a stack frame. Blow away
+  // the stack before interpreting atexit handlers.
+  ECStack.clear();
+  runAtExitHandlers();
+  exit(GV.IntVal.zextOrTrunc(32).getZExtValue());
+}
+
+/// Pop the last stack frame off of ECStack and then copy the result
+/// back into the result variable if we are not returning void. The
+/// result variable may be the ExitValue, or the Value of the calling
+/// CallInst if there was a previous stack frame. This method may
+/// invalidate any ECStack iterators you have. This method also takes
+/// care of switching to the normal destination BB, if we are returning
+/// from an invoke.
+///
+void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy,
+                                                 GenericValue Result) {
+  // Pop the current stack frame.
+  ECStack.pop_back();
+
+  if (ECStack.empty()) {  // Finished main.  Put result into exit code...
+    if (RetTy && !RetTy->isVoidTy()) {          // Nonvoid return type?
+      ExitValue = Result;   // Capture the exit value of the program
+    } else {
+      memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
+    }
+  } else {
+    // If we have a previous stack frame, and we have a previous call,
+    // fill in the return value...
+    ExecutionContext &CallingSF = ECStack.back();
+    if (Instruction *I = CallingSF.Caller.getInstruction()) {
+      // Save result...
+      if (!CallingSF.Caller.getType()->isVoidTy())
+        SetValue(I, Result, CallingSF);
+      if (InvokeInst *II = dyn_cast<InvokeInst> (I))
+        SwitchToNewBasicBlock (II->getNormalDest (), CallingSF);
+      CallingSF.Caller = CallSite();          // We returned from the call...
+    }
+  }
+}
+
+void Interpreter::visitReturnInst(ReturnInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  const Type *RetTy = Type::getVoidTy(I.getContext());
+  GenericValue Result;
+
+  // Save away the return value... (if we are not 'ret void')
+  if (I.getNumOperands()) {
+    RetTy  = I.getReturnValue()->getType();
+    Result = getOperandValue(I.getReturnValue(), SF);
+  }
+
+  popStackAndReturnValueToCaller(RetTy, Result);
+}
+
+void Interpreter::visitUnwindInst(UnwindInst &I) {
+  // Unwind stack
+  Instruction *Inst;
+  do {
+    ECStack.pop_back();
+    if (ECStack.empty())
+      report_fatal_error("Empty stack during unwind!");
+    Inst = ECStack.back().Caller.getInstruction();
+  } while (!(Inst && isa<InvokeInst>(Inst)));
+
+  // Return from invoke
+  ExecutionContext &InvokingSF = ECStack.back();
+  InvokingSF.Caller = CallSite();
+
+  // Go to exceptional destination BB of invoke instruction
+  SwitchToNewBasicBlock(cast<InvokeInst>(Inst)->getUnwindDest(), InvokingSF);
+}
+
+void Interpreter::visitUnreachableInst(UnreachableInst &I) {
+  report_fatal_error("Program executed an 'unreachable' instruction!");
+}
+
+void Interpreter::visitBranchInst(BranchInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  BasicBlock *Dest;
+
+  Dest = I.getSuccessor(0);          // Uncond branches have a fixed dest...
+  if (!I.isUnconditional()) {
+    Value *Cond = I.getCondition();
+    if (getOperandValue(Cond, SF).IntVal == 0) // If false cond...
+      Dest = I.getSuccessor(1);
+  }
+  SwitchToNewBasicBlock(Dest, SF);
+}
+
+void Interpreter::visitSwitchInst(SwitchInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue CondVal = getOperandValue(I.getOperand(0), SF);
+  const Type *ElTy = I.getOperand(0)->getType();
+
+  // Check to see if any of the cases match...
+  BasicBlock *Dest = 0;
+  for (unsigned i = 2, e = I.getNumOperands(); i != e; i += 2)
+    if (executeICMP_EQ(CondVal, getOperandValue(I.getOperand(i), SF), ElTy)
+        .IntVal != 0) {
+      Dest = cast<BasicBlock>(I.getOperand(i+1));
+      break;
+    }
+
+  if (!Dest) Dest = I.getDefaultDest();   // No cases matched: use default
+  SwitchToNewBasicBlock(Dest, SF);
+}
+
+void Interpreter::visitIndirectBrInst(IndirectBrInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  void *Dest = GVTOP(getOperandValue(I.getAddress(), SF));
+  SwitchToNewBasicBlock((BasicBlock*)Dest, SF);
+}
+
+
+// SwitchToNewBasicBlock - This method is used to jump to a new basic block.
+// This function handles the actual updating of block and instruction iterators
+// as well as execution of all of the PHI nodes in the destination block.
+//
+// This method does this because all of the PHI nodes must be executed
+// atomically, reading their inputs before any of the results are updated.  Not
+// doing this can cause problems if the PHI nodes depend on other PHI nodes for
+// their inputs.  If the input PHI node is updated before it is read, incorrect
+// results can happen.  Thus we use a two phase approach.
+//
+void Interpreter::SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF){
+  BasicBlock *PrevBB = SF.CurBB;      // Remember where we came from...
+  SF.CurBB   = Dest;                  // Update CurBB to branch destination
+  SF.CurInst = SF.CurBB->begin();     // Update new instruction ptr...
+
+  if (!isa<PHINode>(SF.CurInst)) return;  // Nothing fancy to do
+
+  // Loop over all of the PHI nodes in the current block, reading their inputs.
+  std::vector<GenericValue> ResultValues;
+
+  for (; PHINode *PN = dyn_cast<PHINode>(SF.CurInst); ++SF.CurInst) {
+    // Search for the value corresponding to this previous bb...
+    int i = PN->getBasicBlockIndex(PrevBB);
+    assert(i != -1 && "PHINode doesn't contain entry for predecessor??");
+    Value *IncomingValue = PN->getIncomingValue(i);
+
+    // Save the incoming value for this PHI node...
+    ResultValues.push_back(getOperandValue(IncomingValue, SF));
+  }
+
+  // Now loop over all of the PHI nodes setting their values...
+  SF.CurInst = SF.CurBB->begin();
+  for (unsigned i = 0; isa<PHINode>(SF.CurInst); ++SF.CurInst, ++i) {
+    PHINode *PN = cast<PHINode>(SF.CurInst);
+    SetValue(PN, ResultValues[i], SF);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                     Memory Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+void Interpreter::visitAllocaInst(AllocaInst &I) {
+  ExecutionContext &SF = ECStack.back();
+
+  const Type *Ty = I.getType()->getElementType();  // Type to be allocated
+
+  // Get the number of elements being allocated by the array...
+  unsigned NumElements = 
+    getOperandValue(I.getOperand(0), SF).IntVal.getZExtValue();
+
+  unsigned TypeSize = (size_t)TD.getTypeAllocSize(Ty);
+
+  // Avoid malloc-ing zero bytes, use max()...
+  unsigned MemToAlloc = std::max(1U, NumElements * TypeSize);
+
+  // Allocate enough memory to hold the type...
+  void *Memory = malloc(MemToAlloc);
+
+  DEBUG(dbgs() << "Allocated Type: " << *Ty << " (" << TypeSize << " bytes) x " 
+               << NumElements << " (Total: " << MemToAlloc << ") at "
+               << uintptr_t(Memory) << '\n');
+
+  GenericValue Result = PTOGV(Memory);
+  assert(Result.PointerVal != 0 && "Null pointer returned by malloc!");
+  SetValue(&I, Result, SF);
+
+  if (I.getOpcode() == Instruction::Alloca)
+    ECStack.back().Allocas.add(Memory);
+}
+
+// getElementOffset - The workhorse for getelementptr.
+//
+GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
+                                              gep_type_iterator E,
+                                              ExecutionContext &SF) {
+  assert(Ptr->getType()->isPointerTy() &&
+         "Cannot getElementOffset of a nonpointer type!");
+
+  uint64_t Total = 0;
+
+  for (; I != E; ++I) {
+    if (const StructType *STy = dyn_cast<StructType>(*I)) {
+      const StructLayout *SLO = TD.getStructLayout(STy);
+
+      const ConstantInt *CPU = cast<ConstantInt>(I.getOperand());
+      unsigned Index = unsigned(CPU->getZExtValue());
+
+      Total += SLO->getElementOffset(Index);
+    } else {
+      const SequentialType *ST = cast<SequentialType>(*I);
+      // Get the index number for the array... which must be long type...
+      GenericValue IdxGV = getOperandValue(I.getOperand(), SF);
+
+      int64_t Idx;
+      unsigned BitWidth = 
+        cast<IntegerType>(I.getOperand()->getType())->getBitWidth();
+      if (BitWidth == 32)
+        Idx = (int64_t)(int32_t)IdxGV.IntVal.getZExtValue();
+      else {
+        assert(BitWidth == 64 && "Invalid index type for getelementptr");
+        Idx = (int64_t)IdxGV.IntVal.getZExtValue();
+      }
+      Total += TD.getTypeAllocSize(ST->getElementType())*Idx;
+    }
+  }
+
+  GenericValue Result;
+  Result.PointerVal = ((char*)getOperandValue(Ptr, SF).PointerVal) + Total;
+  DEBUG(dbgs() << "GEP Index " << Total << " bytes.\n");
+  return Result;
+}
+
+void Interpreter::visitGetElementPtrInst(GetElementPtrInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeGEPOperation(I.getPointerOperand(),
+                                   gep_type_begin(I), gep_type_end(I), SF), SF);
+}
+
+void Interpreter::visitLoadInst(LoadInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue SRC = getOperandValue(I.getPointerOperand(), SF);
+  GenericValue *Ptr = (GenericValue*)GVTOP(SRC);
+  GenericValue Result;
+  LoadValueFromMemory(Result, Ptr, I.getType());
+  SetValue(&I, Result, SF);
+  if (I.isVolatile() && PrintVolatile)
+    dbgs() << "Volatile load " << I;
+}
+
+void Interpreter::visitStoreInst(StoreInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue Val = getOperandValue(I.getOperand(0), SF);
+  GenericValue SRC = getOperandValue(I.getPointerOperand(), SF);
+  StoreValueToMemory(Val, (GenericValue *)GVTOP(SRC),
+                     I.getOperand(0)->getType());
+  if (I.isVolatile() && PrintVolatile)
+    dbgs() << "Volatile store: " << I;
+}
+
+//===----------------------------------------------------------------------===//
+//                 Miscellaneous Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+void Interpreter::visitCallSite(CallSite CS) {
+  ExecutionContext &SF = ECStack.back();
+
+  // Check to see if this is an intrinsic function call...
+  Function *F = CS.getCalledFunction();
+  if (F && F->isDeclaration())
+    switch (F->getIntrinsicID()) {
+    case Intrinsic::not_intrinsic:
+      break;
+    case Intrinsic::vastart: { // va_start
+      GenericValue ArgIndex;
+      ArgIndex.UIntPairVal.first = ECStack.size() - 1;
+      ArgIndex.UIntPairVal.second = 0;
+      SetValue(CS.getInstruction(), ArgIndex, SF);
+      return;
+    }
+    case Intrinsic::vaend:    // va_end is a noop for the interpreter
+      return;
+    case Intrinsic::vacopy:   // va_copy: dest = src
+      SetValue(CS.getInstruction(), getOperandValue(*CS.arg_begin(), SF), SF);
+      return;
+    default:
+      // If it is an unknown intrinsic function, use the intrinsic lowering
+      // class to transform it into hopefully tasty LLVM code.
+      //
+      BasicBlock::iterator me(CS.getInstruction());
+      BasicBlock *Parent = CS.getInstruction()->getParent();
+      bool atBegin(Parent->begin() == me);
+      if (!atBegin)
+        --me;
+      IL->LowerIntrinsicCall(cast<CallInst>(CS.getInstruction()));
+
+      // Restore the CurInst pointer to the first instruction newly inserted, if
+      // any.
+      if (atBegin) {
+        SF.CurInst = Parent->begin();
+      } else {
+        SF.CurInst = me;
+        ++SF.CurInst;
+      }
+      return;
+    }
+
+
+  SF.Caller = CS;
+  std::vector<GenericValue> ArgVals;
+  const unsigned NumArgs = SF.Caller.arg_size();
+  ArgVals.reserve(NumArgs);
+  uint16_t pNum = 1;
+  for (CallSite::arg_iterator i = SF.Caller.arg_begin(),
+         e = SF.Caller.arg_end(); i != e; ++i, ++pNum) {
+    Value *V = *i;
+    ArgVals.push_back(getOperandValue(V, SF));
+  }
+
+  // To handle indirect calls, we must get the pointer value from the argument
+  // and treat it as a function pointer.
+  GenericValue SRC = getOperandValue(SF.Caller.getCalledValue(), SF);
+  callFunction((Function*)GVTOP(SRC), ArgVals);
+}
+
+void Interpreter::visitShl(BinaryOperator &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue Dest;
+  if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
+    Dest.IntVal = Src1.IntVal.shl(Src2.IntVal.getZExtValue());
+  else
+    Dest.IntVal = Src1.IntVal;
+  
+  SetValue(&I, Dest, SF);
+}
+
+void Interpreter::visitLShr(BinaryOperator &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue Dest;
+  if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
+    Dest.IntVal = Src1.IntVal.lshr(Src2.IntVal.getZExtValue());
+  else
+    Dest.IntVal = Src1.IntVal;
+  
+  SetValue(&I, Dest, SF);
+}
+
+void Interpreter::visitAShr(BinaryOperator &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue Dest;
+  if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
+    Dest.IntVal = Src1.IntVal.ashr(Src2.IntVal.getZExtValue());
+  else
+    Dest.IntVal = Src1.IntVal;
+  
+  SetValue(&I, Dest, SF);
+}
+
+GenericValue Interpreter::executeTruncInst(Value *SrcVal, const Type *DstTy,
+                                           ExecutionContext &SF) {
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  const IntegerType *DITy = cast<IntegerType>(DstTy);
+  unsigned DBitWidth = DITy->getBitWidth();
+  Dest.IntVal = Src.IntVal.trunc(DBitWidth);
+  return Dest;
+}
+
+GenericValue Interpreter::executeSExtInst(Value *SrcVal, const Type *DstTy,
+                                          ExecutionContext &SF) {
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  const IntegerType *DITy = cast<IntegerType>(DstTy);
+  unsigned DBitWidth = DITy->getBitWidth();
+  Dest.IntVal = Src.IntVal.sext(DBitWidth);
+  return Dest;
+}
+
+GenericValue Interpreter::executeZExtInst(Value *SrcVal, const Type *DstTy,
+                                          ExecutionContext &SF) {
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  const IntegerType *DITy = cast<IntegerType>(DstTy);
+  unsigned DBitWidth = DITy->getBitWidth();
+  Dest.IntVal = Src.IntVal.zext(DBitWidth);
+  return Dest;
+}
+
+GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, const Type *DstTy,
+                                             ExecutionContext &SF) {
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  assert(SrcVal->getType()->isDoubleTy() && DstTy->isFloatTy() &&
+         "Invalid FPTrunc instruction");
+  Dest.FloatVal = (float) Src.DoubleVal;
+  return Dest;
+}
+
+GenericValue Interpreter::executeFPExtInst(Value *SrcVal, const Type *DstTy,
+                                           ExecutionContext &SF) {
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  assert(SrcVal->getType()->isFloatTy() && DstTy->isDoubleTy() &&
+         "Invalid FPTrunc instruction");
+  Dest.DoubleVal = (double) Src.FloatVal;
+  return Dest;
+}
+
+GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, const Type *DstTy,
+                                            ExecutionContext &SF) {
+  const Type *SrcTy = SrcVal->getType();
+  uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction");
+
+  if (SrcTy->getTypeID() == Type::FloatTyID)
+    Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
+  else
+    Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+  return Dest;
+}
+
+GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, const Type *DstTy,
+                                            ExecutionContext &SF) {
+  const Type *SrcTy = SrcVal->getType();
+  uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction");
+
+  if (SrcTy->getTypeID() == Type::FloatTyID)
+    Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
+  else
+    Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+  return Dest;
+}
+
+GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, const Type *DstTy,
+                                            ExecutionContext &SF) {
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction");
+
+  if (DstTy->getTypeID() == Type::FloatTyID)
+    Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal);
+  else
+    Dest.DoubleVal = APIntOps::RoundAPIntToDouble(Src.IntVal);
+  return Dest;
+}
+
+GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, const Type *DstTy,
+                                            ExecutionContext &SF) {
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction");
+
+  if (DstTy->getTypeID() == Type::FloatTyID)
+    Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal);
+  else
+    Dest.DoubleVal = APIntOps::RoundSignedAPIntToDouble(Src.IntVal);
+  return Dest;
+
+}
+
+GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy,
+                                              ExecutionContext &SF) {
+  uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  assert(SrcVal->getType()->isPointerTy() && "Invalid PtrToInt instruction");
+
+  Dest.IntVal = APInt(DBitWidth, (intptr_t) Src.PointerVal);
+  return Dest;
+}
+
+GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, const Type *DstTy,
+                                              ExecutionContext &SF) {
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction");
+
+  uint32_t PtrSize = TD.getPointerSizeInBits();
+  if (PtrSize != Src.IntVal.getBitWidth())
+    Src.IntVal = Src.IntVal.zextOrTrunc(PtrSize);
+
+  Dest.PointerVal = PointerTy(intptr_t(Src.IntVal.getZExtValue()));
+  return Dest;
+}
+
+GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy,
+                                             ExecutionContext &SF) {
+  
+  const Type *SrcTy = SrcVal->getType();
+  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  if (DstTy->isPointerTy()) {
+    assert(SrcTy->isPointerTy() && "Invalid BitCast");
+    Dest.PointerVal = Src.PointerVal;
+  } else if (DstTy->isIntegerTy()) {
+    if (SrcTy->isFloatTy()) {
+      Dest.IntVal = APInt::floatToBits(Src.FloatVal);
+    } else if (SrcTy->isDoubleTy()) {
+      Dest.IntVal = APInt::doubleToBits(Src.DoubleVal);
+    } else if (SrcTy->isIntegerTy()) {
+      Dest.IntVal = Src.IntVal;
+    } else 
+      llvm_unreachable("Invalid BitCast");
+  } else if (DstTy->isFloatTy()) {
+    if (SrcTy->isIntegerTy())
+      Dest.FloatVal = Src.IntVal.bitsToFloat();
+    else
+      Dest.FloatVal = Src.FloatVal;
+  } else if (DstTy->isDoubleTy()) {
+    if (SrcTy->isIntegerTy())
+      Dest.DoubleVal = Src.IntVal.bitsToDouble();
+    else
+      Dest.DoubleVal = Src.DoubleVal;
+  } else
+    llvm_unreachable("Invalid Bitcast");
+
+  return Dest;
+}
+
+void Interpreter::visitTruncInst(TruncInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeTruncInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitSExtInst(SExtInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeSExtInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitZExtInst(ZExtInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeZExtInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPTruncInst(FPTruncInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeFPTruncInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPExtInst(FPExtInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeFPExtInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitUIToFPInst(UIToFPInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeUIToFPInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitSIToFPInst(SIToFPInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeSIToFPInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPToUIInst(FPToUIInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeFPToUIInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitFPToSIInst(FPToSIInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeFPToSIInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitPtrToIntInst(PtrToIntInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executePtrToIntInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitIntToPtrInst(IntToPtrInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeIntToPtrInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+void Interpreter::visitBitCastInst(BitCastInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  SetValue(&I, executeBitCastInst(I.getOperand(0), I.getType(), SF), SF);
+}
+
+#define IMPLEMENT_VAARG(TY) \
+   case Type::TY##TyID: Dest.TY##Val = Src.TY##Val; break
+
+void Interpreter::visitVAArgInst(VAArgInst &I) {
+  ExecutionContext &SF = ECStack.back();
+
+  // Get the incoming valist parameter.  LLI treats the valist as a
+  // (ec-stack-depth var-arg-index) pair.
+  GenericValue VAList = getOperandValue(I.getOperand(0), SF);
+  GenericValue Dest;
+  GenericValue Src = ECStack[VAList.UIntPairVal.first]
+                      .VarArgs[VAList.UIntPairVal.second];
+  const Type *Ty = I.getType();
+  switch (Ty->getTypeID()) {
+    case Type::IntegerTyID: Dest.IntVal = Src.IntVal;
+    IMPLEMENT_VAARG(Pointer);
+    IMPLEMENT_VAARG(Float);
+    IMPLEMENT_VAARG(Double);
+  default:
+    dbgs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n";
+    llvm_unreachable(0);
+  }
+
+  // Set the Value of this Instruction.
+  SetValue(&I, Dest, SF);
+
+  // Move the pointer to the next vararg.
+  ++VAList.UIntPairVal.second;
+}
+
+GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
+                                                ExecutionContext &SF) {
+  switch (CE->getOpcode()) {
+  case Instruction::Trunc:   
+      return executeTruncInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::ZExt:
+      return executeZExtInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::SExt:
+      return executeSExtInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::FPTrunc:
+      return executeFPTruncInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::FPExt:
+      return executeFPExtInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::UIToFP:
+      return executeUIToFPInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::SIToFP:
+      return executeSIToFPInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::FPToUI:
+      return executeFPToUIInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::FPToSI:
+      return executeFPToSIInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::PtrToInt:
+      return executePtrToIntInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::IntToPtr:
+      return executeIntToPtrInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::BitCast:
+      return executeBitCastInst(CE->getOperand(0), CE->getType(), SF);
+  case Instruction::GetElementPtr:
+    return executeGEPOperation(CE->getOperand(0), gep_type_begin(CE),
+                               gep_type_end(CE), SF);
+  case Instruction::FCmp:
+  case Instruction::ICmp:
+    return executeCmpInst(CE->getPredicate(),
+                          getOperandValue(CE->getOperand(0), SF),
+                          getOperandValue(CE->getOperand(1), SF),
+                          CE->getOperand(0)->getType());
+  case Instruction::Select:
+    return executeSelectInst(getOperandValue(CE->getOperand(0), SF),
+                             getOperandValue(CE->getOperand(1), SF),
+                             getOperandValue(CE->getOperand(2), SF));
+  default :
+    break;
+  }
+
+  // The cases below here require a GenericValue parameter for the result
+  // so we initialize one, compute it and then return it.
+  GenericValue Op0 = getOperandValue(CE->getOperand(0), SF);
+  GenericValue Op1 = getOperandValue(CE->getOperand(1), SF);
+  GenericValue Dest;
+  const Type * Ty = CE->getOperand(0)->getType();
+  switch (CE->getOpcode()) {
+  case Instruction::Add:  Dest.IntVal = Op0.IntVal + Op1.IntVal; break;
+  case Instruction::Sub:  Dest.IntVal = Op0.IntVal - Op1.IntVal; break;
+  case Instruction::Mul:  Dest.IntVal = Op0.IntVal * Op1.IntVal; break;
+  case Instruction::FAdd: executeFAddInst(Dest, Op0, Op1, Ty); break;
+  case Instruction::FSub: executeFSubInst(Dest, Op0, Op1, Ty); break;
+  case Instruction::FMul: executeFMulInst(Dest, Op0, Op1, Ty); break;
+  case Instruction::FDiv: executeFDivInst(Dest, Op0, Op1, Ty); break;
+  case Instruction::FRem: executeFRemInst(Dest, Op0, Op1, Ty); break;
+  case Instruction::SDiv: Dest.IntVal = Op0.IntVal.sdiv(Op1.IntVal); break;
+  case Instruction::UDiv: Dest.IntVal = Op0.IntVal.udiv(Op1.IntVal); break;
+  case Instruction::URem: Dest.IntVal = Op0.IntVal.urem(Op1.IntVal); break;
+  case Instruction::SRem: Dest.IntVal = Op0.IntVal.srem(Op1.IntVal); break;
+  case Instruction::And:  Dest.IntVal = Op0.IntVal & Op1.IntVal; break;
+  case Instruction::Or:   Dest.IntVal = Op0.IntVal | Op1.IntVal; break;
+  case Instruction::Xor:  Dest.IntVal = Op0.IntVal ^ Op1.IntVal; break;
+  case Instruction::Shl:  
+    Dest.IntVal = Op0.IntVal.shl(Op1.IntVal.getZExtValue());
+    break;
+  case Instruction::LShr: 
+    Dest.IntVal = Op0.IntVal.lshr(Op1.IntVal.getZExtValue());
+    break;
+  case Instruction::AShr: 
+    Dest.IntVal = Op0.IntVal.ashr(Op1.IntVal.getZExtValue());
+    break;
+  default:
+    dbgs() << "Unhandled ConstantExpr: " << *CE << "\n";
+    llvm_unreachable(0);
+    return GenericValue();
+  }
+  return Dest;
+}
+
+GenericValue Interpreter::getOperandValue(Value *V, ExecutionContext &SF) {
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    return getConstantExprValue(CE, SF);
+  } else if (Constant *CPV = dyn_cast<Constant>(V)) {
+    return getConstantValue(CPV);
+  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    return PTOGV(getPointerToGlobal(GV));
+  } else {
+    return SF.Values[V];
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                        Dispatch and Execution Code
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// callFunction - Execute the specified function...
+//
+void Interpreter::callFunction(Function *F,
+                               const std::vector<GenericValue> &ArgVals) {
+  assert((ECStack.empty() || ECStack.back().Caller.getInstruction() == 0 ||
+          ECStack.back().Caller.arg_size() == ArgVals.size()) &&
+         "Incorrect number of arguments passed into function call!");
+  // Make a new stack frame... and fill it in.
+  ECStack.push_back(ExecutionContext());
+  ExecutionContext &StackFrame = ECStack.back();
+  StackFrame.CurFunction = F;
+
+  // Special handling for external functions.
+  if (F->isDeclaration()) {
+    GenericValue Result = callExternalFunction (F, ArgVals);
+    // Simulate a 'ret' instruction of the appropriate type.
+    popStackAndReturnValueToCaller (F->getReturnType (), Result);
+    return;
+  }
+
+  // Get pointers to first LLVM BB & Instruction in function.
+  StackFrame.CurBB     = F->begin();
+  StackFrame.CurInst   = StackFrame.CurBB->begin();
+
+  // Run through the function arguments and initialize their values...
+  assert((ArgVals.size() == F->arg_size() ||
+         (ArgVals.size() > F->arg_size() && F->getFunctionType()->isVarArg()))&&
+         "Invalid number of values passed to function invocation!");
+
+  // Handle non-varargs arguments...
+  unsigned i = 0;
+  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); 
+       AI != E; ++AI, ++i)
+    SetValue(AI, ArgVals[i], StackFrame);
+
+  // Handle varargs arguments...
+  StackFrame.VarArgs.assign(ArgVals.begin()+i, ArgVals.end());
+}
+
+
+void Interpreter::run() {
+  while (!ECStack.empty()) {
+    // Interpret a single instruction & increment the "PC".
+    ExecutionContext &SF = ECStack.back();  // Current stack frame
+    Instruction &I = *SF.CurInst++;         // Increment before execute
+
+    // Track the number of dynamic instructions executed.
+    ++NumDynamicInsts;
+
+    DEBUG(dbgs() << "About to interpret: " << I);
+    visit(I);   // Dispatch to one of the visit* methods...
+#if 0
+    // This is not safe, as visiting the instruction could lower it and free I.
+DEBUG(
+    if (!isa<CallInst>(I) && !isa<InvokeInst>(I) && 
+        I.getType() != Type::VoidTy) {
+      dbgs() << "  --> ";
+      const GenericValue &Val = SF.Values[&I];
+      switch (I.getType()->getTypeID()) {
+      default: llvm_unreachable("Invalid GenericValue Type");
+      case Type::VoidTyID:    dbgs() << "void"; break;
+      case Type::FloatTyID:   dbgs() << "float " << Val.FloatVal; break;
+      case Type::DoubleTyID:  dbgs() << "double " << Val.DoubleVal; break;
+      case Type::PointerTyID: dbgs() << "void* " << intptr_t(Val.PointerVal);
+        break;
+      case Type::IntegerTyID: 
+        dbgs() << "i" << Val.IntVal.getBitWidth() << " "
+               << Val.IntVal.toStringUnsigned(10)
+               << " (0x" << Val.IntVal.toStringUnsigned(16) << ")\n";
+        break;
+      }
+    });
+#endif
+  }
+}
diff --git a/final/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/final/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
new file mode 100644
index 00000000000..062256a2ac7
--- /dev/null
+++ b/final/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -0,0 +1,492 @@
+//===-- ExternalFunctions.cpp - Implement External Functions --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file contains both code to deal with invoking "external" functions, but
+//  also contains code that implements "exported" external functions.
+//
+//  There are currently two mechanisms for handling external functions in the
+//  Interpreter.  The first is to implement lle_* wrapper functions that are
+//  specific to well-known library functions which manually translate the
+//  arguments from GenericValues and make the call.  If such a wrapper does
+//  not exist, and libffi is available, then the Interpreter will attempt to
+//  invoke the function using libffi, after finding its address.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Interpreter.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Config/config.h"     // Detect libffi
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include <csignal>
+#include <cstdio>
+#include <map>
+#include <cmath>
+#include <cstring>
+
+#ifdef HAVE_FFI_CALL
+#ifdef HAVE_FFI_H
+#include <ffi.h>
+#define USE_LIBFFI
+#elif HAVE_FFI_FFI_H
+#include <ffi/ffi.h>
+#define USE_LIBFFI
+#endif
+#endif
+
+using namespace llvm;
+
+static ManagedStatic<sys::Mutex> FunctionsLock;
+
+typedef GenericValue (*ExFunc)(const FunctionType *,
+                               const std::vector<GenericValue> &);
+static ManagedStatic<std::map<const Function *, ExFunc> > ExportedFunctions;
+static std::map<std::string, ExFunc> FuncNames;
+
+#ifdef USE_LIBFFI
+typedef void (*RawFunc)();
+static ManagedStatic<std::map<const Function *, RawFunc> > RawFunctions;
+#endif
+
+static Interpreter *TheInterpreter;
+
+static char getTypeID(const Type *Ty) {
+  switch (Ty->getTypeID()) {
+  case Type::VoidTyID:    return 'V';
+  case Type::IntegerTyID:
+    switch (cast<IntegerType>(Ty)->getBitWidth()) {
+      case 1:  return 'o';
+      case 8:  return 'B';
+      case 16: return 'S';
+      case 32: return 'I';
+      case 64: return 'L';
+      default: return 'N';
+    }
+  case Type::FloatTyID:   return 'F';
+  case Type::DoubleTyID:  return 'D';
+  case Type::PointerTyID: return 'P';
+  case Type::FunctionTyID:return 'M';
+  case Type::StructTyID:  return 'T';
+  case Type::ArrayTyID:   return 'A';
+  case Type::OpaqueTyID:  return 'O';
+  default: return 'U';
+  }
+}
+
+// Try to find address of external function given a Function object.
+// Please note, that interpreter doesn't know how to assemble a
+// real call in general case (this is JIT job), that's why it assumes,
+// that all external functions has the same (and pretty "general") signature.
+// The typical example of such functions are "lle_X_" ones.
+static ExFunc lookupFunction(const Function *F) {
+  // Function not found, look it up... start by figuring out what the
+  // composite function name should be.
+  std::string ExtName = "lle_";
+  const FunctionType *FT = F->getFunctionType();
+  for (unsigned i = 0, e = FT->getNumContainedTypes(); i != e; ++i)
+    ExtName += getTypeID(FT->getContainedType(i));
+  ExtName + "_" + F->getNameStr();
+
+  sys::ScopedLock Writer(*FunctionsLock);
+  ExFunc FnPtr = FuncNames[ExtName];
+  if (FnPtr == 0)
+    FnPtr = FuncNames["lle_X_" + F->getNameStr()];
+  if (FnPtr == 0)  // Try calling a generic function... if it exists...
+    FnPtr = (ExFunc)(intptr_t)
+      sys::DynamicLibrary::SearchForAddressOfSymbol("lle_X_"+F->getNameStr());
+  if (FnPtr != 0)
+    ExportedFunctions->insert(std::make_pair(F, FnPtr));  // Cache for later
+  return FnPtr;
+}
+
+#ifdef USE_LIBFFI
+static ffi_type *ffiTypeFor(const Type *Ty) {
+  switch (Ty->getTypeID()) {
+    case Type::VoidTyID: return &ffi_type_void;
+    case Type::IntegerTyID:
+      switch (cast<IntegerType>(Ty)->getBitWidth()) {
+        case 8:  return &ffi_type_sint8;
+        case 16: return &ffi_type_sint16;
+        case 32: return &ffi_type_sint32;
+        case 64: return &ffi_type_sint64;
+      }
+    case Type::FloatTyID:   return &ffi_type_float;
+    case Type::DoubleTyID:  return &ffi_type_double;
+    case Type::PointerTyID: return &ffi_type_pointer;
+    default: break;
+  }
+  // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
+  report_fatal_error("Type could not be mapped for use with libffi.");
+  return NULL;
+}
+
+static void *ffiValueFor(const Type *Ty, const GenericValue &AV,
+                         void *ArgDataPtr) {
+  switch (Ty->getTypeID()) {
+    case Type::IntegerTyID:
+      switch (cast<IntegerType>(Ty)->getBitWidth()) {
+        case 8: {
+          int8_t *I8Ptr = (int8_t *) ArgDataPtr;
+          *I8Ptr = (int8_t) AV.IntVal.getZExtValue();
+          return ArgDataPtr;
+        }
+        case 16: {
+          int16_t *I16Ptr = (int16_t *) ArgDataPtr;
+          *I16Ptr = (int16_t) AV.IntVal.getZExtValue();
+          return ArgDataPtr;
+        }
+        case 32: {
+          int32_t *I32Ptr = (int32_t *) ArgDataPtr;
+          *I32Ptr = (int32_t) AV.IntVal.getZExtValue();
+          return ArgDataPtr;
+        }
+        case 64: {
+          int64_t *I64Ptr = (int64_t *) ArgDataPtr;
+          *I64Ptr = (int64_t) AV.IntVal.getZExtValue();
+          return ArgDataPtr;
+        }
+      }
+    case Type::FloatTyID: {
+      float *FloatPtr = (float *) ArgDataPtr;
+      *FloatPtr = AV.FloatVal;
+      return ArgDataPtr;
+    }
+    case Type::DoubleTyID: {
+      double *DoublePtr = (double *) ArgDataPtr;
+      *DoublePtr = AV.DoubleVal;
+      return ArgDataPtr;
+    }
+    case Type::PointerTyID: {
+      void **PtrPtr = (void **) ArgDataPtr;
+      *PtrPtr = GVTOP(AV);
+      return ArgDataPtr;
+    }
+    default: break;
+  }
+  // TODO: Support other types such as StructTyID, ArrayTyID, OpaqueTyID, etc.
+  report_fatal_error("Type value could not be mapped for use with libffi.");
+  return NULL;
+}
+
+static bool ffiInvoke(RawFunc Fn, Function *F,
+                      const std::vector<GenericValue> &ArgVals,
+                      const TargetData *TD, GenericValue &Result) {
+  ffi_cif cif;
+  const FunctionType *FTy = F->getFunctionType();
+  const unsigned NumArgs = F->arg_size();
+
+  // TODO: We don't have type information about the remaining arguments, because
+  // this information is never passed into ExecutionEngine::runFunction().
+  if (ArgVals.size() > NumArgs && F->isVarArg()) {
+    report_fatal_error("Calling external var arg function '" + F->getName()
+                      + "' is not supported by the Interpreter.");
+  }
+
+  unsigned ArgBytes = 0;
+
+  std::vector<ffi_type*> args(NumArgs);
+  for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end();
+       A != E; ++A) {
+    const unsigned ArgNo = A->getArgNo();
+    const Type *ArgTy = FTy->getParamType(ArgNo);
+    args[ArgNo] = ffiTypeFor(ArgTy);
+    ArgBytes += TD->getTypeStoreSize(ArgTy);
+  }
+
+  SmallVector<uint8_t, 128> ArgData;
+  ArgData.resize(ArgBytes);
+  uint8_t *ArgDataPtr = ArgData.data();
+  SmallVector<void*, 16> values(NumArgs);
+  for (Function::const_arg_iterator A = F->arg_begin(), E = F->arg_end();
+       A != E; ++A) {
+    const unsigned ArgNo = A->getArgNo();
+    const Type *ArgTy = FTy->getParamType(ArgNo);
+    values[ArgNo] = ffiValueFor(ArgTy, ArgVals[ArgNo], ArgDataPtr);
+    ArgDataPtr += TD->getTypeStoreSize(ArgTy);
+  }
+
+  const Type *RetTy = FTy->getReturnType();
+  ffi_type *rtype = ffiTypeFor(RetTy);
+
+  if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, NumArgs, rtype, &args[0]) == FFI_OK) {
+    SmallVector<uint8_t, 128> ret;
+    if (RetTy->getTypeID() != Type::VoidTyID)
+      ret.resize(TD->getTypeStoreSize(RetTy));
+    ffi_call(&cif, Fn, ret.data(), values.data());
+    switch (RetTy->getTypeID()) {
+      case Type::IntegerTyID:
+        switch (cast<IntegerType>(RetTy)->getBitWidth()) {
+          case 8:  Result.IntVal = APInt(8 , *(int8_t *) ret.data()); break;
+          case 16: Result.IntVal = APInt(16, *(int16_t*) ret.data()); break;
+          case 32: Result.IntVal = APInt(32, *(int32_t*) ret.data()); break;
+          case 64: Result.IntVal = APInt(64, *(int64_t*) ret.data()); break;
+        }
+        break;
+      case Type::FloatTyID:   Result.FloatVal   = *(float *) ret.data(); break;
+      case Type::DoubleTyID:  Result.DoubleVal  = *(double*) ret.data(); break;
+      case Type::PointerTyID: Result.PointerVal = *(void **) ret.data(); break;
+      default: break;
+    }
+    return true;
+  }
+
+  return false;
+}
+#endif // USE_LIBFFI
+
+GenericValue Interpreter::callExternalFunction(Function *F,
+                                     const std::vector<GenericValue> &ArgVals) {
+  TheInterpreter = this;
+
+  FunctionsLock->acquire();
+
+  // Do a lookup to see if the function is in our cache... this should just be a
+  // deferred annotation!
+  std::map<const Function *, ExFunc>::iterator FI = ExportedFunctions->find(F);
+  if (ExFunc Fn = (FI == ExportedFunctions->end()) ? lookupFunction(F)
+                                                   : FI->second) {
+    FunctionsLock->release();
+    return Fn(F->getFunctionType(), ArgVals);
+  }
+
+#ifdef USE_LIBFFI
+  std::map<const Function *, RawFunc>::iterator RF = RawFunctions->find(F);
+  RawFunc RawFn;
+  if (RF == RawFunctions->end()) {
+    RawFn = (RawFunc)(intptr_t)
+      sys::DynamicLibrary::SearchForAddressOfSymbol(F->getName());
+    if (!RawFn)
+      RawFn = (RawFunc)(intptr_t)getPointerToGlobalIfAvailable(F);
+    if (RawFn != 0)
+      RawFunctions->insert(std::make_pair(F, RawFn));  // Cache for later
+  } else {
+    RawFn = RF->second;
+  }
+
+  FunctionsLock->release();
+
+  GenericValue Result;
+  if (RawFn != 0 && ffiInvoke(RawFn, F, ArgVals, getTargetData(), Result))
+    return Result;
+#endif // USE_LIBFFI
+
+  if (F->getName() == "__main")
+    errs() << "Tried to execute an unknown external function: "
+      << F->getType()->getDescription() << " __main\n";
+  else
+    report_fatal_error("Tried to execute an unknown external function: " +
+                      F->getType()->getDescription() + " " +F->getName());
+#ifndef USE_LIBFFI
+  errs() << "Recompiling LLVM with --enable-libffi might help.\n";
+#endif
+  return GenericValue();
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Functions "exported" to the running application...
+//
+
+// Visual Studio warns about returning GenericValue in extern "C" linkage
+#ifdef _MSC_VER
+    #pragma warning(disable : 4190)
+#endif
+
+extern "C" {  // Don't add C++ manglings to llvm mangling :)
+
+// void atexit(Function*)
+GenericValue lle_X_atexit(const FunctionType *FT,
+                          const std::vector<GenericValue> &Args) {
+  assert(Args.size() == 1);
+  TheInterpreter->addAtExitHandler((Function*)GVTOP(Args[0]));
+  GenericValue GV;
+  GV.IntVal = 0;
+  return GV;
+}
+
+// void exit(int)
+GenericValue lle_X_exit(const FunctionType *FT,
+                        const std::vector<GenericValue> &Args) {
+  TheInterpreter->exitCalled(Args[0]);
+  return GenericValue();
+}
+
+// void abort(void)
+GenericValue lle_X_abort(const FunctionType *FT,
+                         const std::vector<GenericValue> &Args) {
+  //FIXME: should we report or raise here?
+  //report_fatal_error("Interpreted program raised SIGABRT");
+  raise (SIGABRT);
+  return GenericValue();
+}
+
+// int sprintf(char *, const char *, ...) - a very rough implementation to make
+// output useful.
+GenericValue lle_X_sprintf(const FunctionType *FT,
+                           const std::vector<GenericValue> &Args) {
+  char *OutputBuffer = (char *)GVTOP(Args[0]);
+  const char *FmtStr = (const char *)GVTOP(Args[1]);
+  unsigned ArgNo = 2;
+
+  // printf should return # chars printed.  This is completely incorrect, but
+  // close enough for now.
+  GenericValue GV;
+  GV.IntVal = APInt(32, strlen(FmtStr));
+  while (1) {
+    switch (*FmtStr) {
+    case 0: return GV;             // Null terminator...
+    default:                       // Normal nonspecial character
+      sprintf(OutputBuffer++, "%c", *FmtStr++);
+      break;
+    case '\\': {                   // Handle escape codes
+      sprintf(OutputBuffer, "%c%c", *FmtStr, *(FmtStr+1));
+      FmtStr += 2; OutputBuffer += 2;
+      break;
+    }
+    case '%': {                    // Handle format specifiers
+      char FmtBuf[100] = "", Buffer[1000] = "";
+      char *FB = FmtBuf;
+      *FB++ = *FmtStr++;
+      char Last = *FB++ = *FmtStr++;
+      unsigned HowLong = 0;
+      while (Last != 'c' && Last != 'd' && Last != 'i' && Last != 'u' &&
+             Last != 'o' && Last != 'x' && Last != 'X' && Last != 'e' &&
+             Last != 'E' && Last != 'g' && Last != 'G' && Last != 'f' &&
+             Last != 'p' && Last != 's' && Last != '%') {
+        if (Last == 'l' || Last == 'L') HowLong++;  // Keep track of l's
+        Last = *FB++ = *FmtStr++;
+      }
+      *FB = 0;
+
+      switch (Last) {
+      case '%':
+        memcpy(Buffer, "%", 2); break;
+      case 'c':
+        sprintf(Buffer, FmtBuf, uint32_t(Args[ArgNo++].IntVal.getZExtValue()));
+        break;
+      case 'd': case 'i':
+      case 'u': case 'o':
+      case 'x': case 'X':
+        if (HowLong >= 1) {
+          if (HowLong == 1 &&
+              TheInterpreter->getTargetData()->getPointerSizeInBits() == 64 &&
+              sizeof(long) < sizeof(int64_t)) {
+            // Make sure we use %lld with a 64 bit argument because we might be
+            // compiling LLI on a 32 bit compiler.
+            unsigned Size = strlen(FmtBuf);
+            FmtBuf[Size] = FmtBuf[Size-1];
+            FmtBuf[Size+1] = 0;
+            FmtBuf[Size-1] = 'l';
+          }
+          sprintf(Buffer, FmtBuf, Args[ArgNo++].IntVal.getZExtValue());
+        } else
+          sprintf(Buffer, FmtBuf,uint32_t(Args[ArgNo++].IntVal.getZExtValue()));
+        break;
+      case 'e': case 'E': case 'g': case 'G': case 'f':
+        sprintf(Buffer, FmtBuf, Args[ArgNo++].DoubleVal); break;
+      case 'p':
+        sprintf(Buffer, FmtBuf, (void*)GVTOP(Args[ArgNo++])); break;
+      case 's':
+        sprintf(Buffer, FmtBuf, (char*)GVTOP(Args[ArgNo++])); break;
+      default:
+        errs() << "<unknown printf code '" << *FmtStr << "'!>";
+        ArgNo++; break;
+      }
+      size_t Len = strlen(Buffer);
+      memcpy(OutputBuffer, Buffer, Len + 1);
+      OutputBuffer += Len;
+      }
+      break;
+    }
+  }
+  return GV;
+}
+
+// int printf(const char *, ...) - a very rough implementation to make output
+// useful.
+GenericValue lle_X_printf(const FunctionType *FT,
+                          const std::vector<GenericValue> &Args) {
+  char Buffer[10000];
+  std::vector<GenericValue> NewArgs;
+  NewArgs.push_back(PTOGV((void*)&Buffer[0]));
+  NewArgs.insert(NewArgs.end(), Args.begin(), Args.end());
+  GenericValue GV = lle_X_sprintf(FT, NewArgs);
+  outs() << Buffer;
+  return GV;
+}
+
+// int sscanf(const char *format, ...);
+GenericValue lle_X_sscanf(const FunctionType *FT,
+                          const std::vector<GenericValue> &args) {
+  assert(args.size() < 10 && "Only handle up to 10 args to sscanf right now!");
+
+  char *Args[10];
+  for (unsigned i = 0; i < args.size(); ++i)
+    Args[i] = (char*)GVTOP(args[i]);
+
+  GenericValue GV;
+  GV.IntVal = APInt(32, sscanf(Args[0], Args[1], Args[2], Args[3], Args[4],
+                        Args[5], Args[6], Args[7], Args[8], Args[9]));
+  return GV;
+}
+
+// int scanf(const char *format, ...);
+GenericValue lle_X_scanf(const FunctionType *FT,
+                         const std::vector<GenericValue> &args) {
+  assert(args.size() < 10 && "Only handle up to 10 args to scanf right now!");
+
+  char *Args[10];
+  for (unsigned i = 0; i < args.size(); ++i)
+    Args[i] = (char*)GVTOP(args[i]);
+
+  GenericValue GV;
+  GV.IntVal = APInt(32, scanf( Args[0], Args[1], Args[2], Args[3], Args[4],
+                        Args[5], Args[6], Args[7], Args[8], Args[9]));
+  return GV;
+}
+
+// int fprintf(FILE *, const char *, ...) - a very rough implementation to make
+// output useful.
+GenericValue lle_X_fprintf(const FunctionType *FT,
+                           const std::vector<GenericValue> &Args) {
+  assert(Args.size() >= 2);
+  char Buffer[10000];
+  std::vector<GenericValue> NewArgs;
+  NewArgs.push_back(PTOGV(Buffer));
+  NewArgs.insert(NewArgs.end(), Args.begin()+1, Args.end());
+  GenericValue GV = lle_X_sprintf(FT, NewArgs);
+
+  fputs(Buffer, (FILE *) GVTOP(Args[0]));
+  return GV;
+}
+
+} // End extern "C"
+
+// Done with externals; turn the warning back on
+#ifdef _MSC_VER
+    #pragma warning(default: 4190)
+#endif
+
+
+void Interpreter::initializeExternalFunctions() {
+  sys::ScopedLock Writer(*FunctionsLock);
+  FuncNames["lle_X_atexit"]       = lle_X_atexit;
+  FuncNames["lle_X_exit"]         = lle_X_exit;
+  FuncNames["lle_X_abort"]        = lle_X_abort;
+
+  FuncNames["lle_X_printf"]       = lle_X_printf;
+  FuncNames["lle_X_sprintf"]      = lle_X_sprintf;
+  FuncNames["lle_X_sscanf"]       = lle_X_sscanf;
+  FuncNames["lle_X_scanf"]        = lle_X_scanf;
+  FuncNames["lle_X_fprintf"]      = lle_X_fprintf;
+}
diff --git a/final/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/final/lib/ExecutionEngine/Interpreter/Interpreter.cpp
new file mode 100644
index 00000000000..43e34533c7b
--- /dev/null
+++ b/final/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -0,0 +1,98 @@
+//===- Interpreter.cpp - Top-Level LLVM Interpreter Implementation --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the top-level functionality for the LLVM interpreter.
+// This interpreter is designed to be a very simple, portable, inefficient
+// interpreter.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Interpreter.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include <cstring>
+using namespace llvm;
+
+namespace {
+
+static struct RegisterInterp {
+  RegisterInterp() { Interpreter::Register(); }
+} InterpRegistrator;
+
+}
+
+extern "C" void LLVMLinkInInterpreter() { }
+
+/// create - Create a new interpreter object.  This can never fail.
+///
+ExecutionEngine *Interpreter::create(Module *M, std::string* ErrStr) {
+  // Tell this Module to materialize everything and release the GVMaterializer.
+  if (M->MaterializeAllPermanently(ErrStr))
+    // We got an error, just return 0
+    return 0;
+
+  return new Interpreter(M);
+}
+
+//===----------------------------------------------------------------------===//
+// Interpreter ctor - Initialize stuff
+//
+Interpreter::Interpreter(Module *M)
+  : ExecutionEngine(M), TD(M) {
+      
+  memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
+  setTargetData(&TD);
+  // Initialize the "backend"
+  initializeExecutionEngine();
+  initializeExternalFunctions();
+  emitGlobals();
+
+  IL = new IntrinsicLowering(TD);
+}
+
+Interpreter::~Interpreter() {
+  delete IL;
+}
+
+void Interpreter::runAtExitHandlers () {
+  while (!AtExitHandlers.empty()) {
+    callFunction(AtExitHandlers.back(), std::vector<GenericValue>());
+    AtExitHandlers.pop_back();
+    run();
+  }
+}
+
+/// run - Start execution with the specified function and arguments.
+///
+GenericValue
+Interpreter::runFunction(Function *F,
+                         const std::vector<GenericValue> &ArgValues) {
+  assert (F && "Function *F was null at entry to run()");
+
+  // Try extra hard not to pass extra args to a function that isn't
+  // expecting them.  C programmers frequently bend the rules and
+  // declare main() with fewer parameters than it actually gets
+  // passed, and the interpreter barfs if you pass a function more
+  // parameters than it is declared to take. This does not attempt to
+  // take into account gratuitous differences in declared types,
+  // though.
+  std::vector<GenericValue> ActualArgs;
+  const unsigned ArgCount = F->getFunctionType()->getNumParams();
+  for (unsigned i = 0; i < ArgCount; ++i)
+    ActualArgs.push_back(ArgValues[i]);
+
+  // Set up the function call.
+  callFunction(F, ActualArgs);
+
+  // Start executing the function.
+  run();
+
+  return ExitValue;
+}
diff --git a/final/lib/ExecutionEngine/Interpreter/Interpreter.h b/final/lib/ExecutionEngine/Interpreter/Interpreter.h
new file mode 100644
index 00000000000..bfebe3debfc
--- /dev/null
+++ b/final/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -0,0 +1,242 @@
+//===-- Interpreter.h ------------------------------------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header file defines the interpreter structure
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLI_INTERPRETER_H
+#define LLI_INTERPRETER_H
+
+#include "llvm/Function.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+namespace llvm {
+
+class IntrinsicLowering;
+struct FunctionInfo;
+template<typename T> class generic_gep_type_iterator;
+class ConstantExpr;
+typedef generic_gep_type_iterator<User::const_op_iterator> gep_type_iterator;
+
+
+// AllocaHolder - Object to track all of the blocks of memory allocated by
+// alloca.  When the function returns, this object is popped off the execution
+// stack, which causes the dtor to be run, which frees all the alloca'd memory.
+//
+class AllocaHolder {
+  friend class AllocaHolderHandle;
+  std::vector<void*> Allocations;
+  unsigned RefCnt;
+public:
+  AllocaHolder() : RefCnt(0) {}
+  void add(void *mem) { Allocations.push_back(mem); }
+  ~AllocaHolder() {
+    for (unsigned i = 0; i < Allocations.size(); ++i)
+      free(Allocations[i]);
+  }
+};
+
+// AllocaHolderHandle gives AllocaHolder value semantics so we can stick it into
+// a vector...
+//
+class AllocaHolderHandle {
+  AllocaHolder *H;
+public:
+  AllocaHolderHandle() : H(new AllocaHolder()) { H->RefCnt++; }
+  AllocaHolderHandle(const AllocaHolderHandle &AH) : H(AH.H) { H->RefCnt++; }
+  ~AllocaHolderHandle() { if (--H->RefCnt == 0) delete H; }
+
+  void add(void *mem) { H->add(mem); }
+};
+
+typedef std::vector<GenericValue> ValuePlaneTy;
+
+// ExecutionContext struct - This struct represents one stack frame currently
+// executing.
+//
+struct ExecutionContext {
+  Function             *CurFunction;// The currently executing function
+  BasicBlock           *CurBB;      // The currently executing BB
+  BasicBlock::iterator  CurInst;    // The next instruction to execute
+  std::map<Value *, GenericValue> Values; // LLVM values used in this invocation
+  std::vector<GenericValue>  VarArgs; // Values passed through an ellipsis
+  CallSite             Caller;     // Holds the call that called subframes.
+                                   // NULL if main func or debugger invoked fn
+  AllocaHolderHandle    Allocas;    // Track memory allocated by alloca
+};
+
+// Interpreter - This class represents the entirety of the interpreter.
+//
+class Interpreter : public ExecutionEngine, public InstVisitor<Interpreter> {
+  GenericValue ExitValue;          // The return value of the called function
+  TargetData TD;
+  IntrinsicLowering *IL;
+
+  // The runtime stack of executing code.  The top of the stack is the current
+  // function record.
+  std::vector<ExecutionContext> ECStack;
+
+  // AtExitHandlers - List of functions to call when the program exits,
+  // registered with the atexit() library function.
+  std::vector<Function*> AtExitHandlers;
+
+public:
+  explicit Interpreter(Module *M);
+  ~Interpreter();
+
+  /// runAtExitHandlers - Run any functions registered by the program's calls to
+  /// atexit(3), which we intercept and store in AtExitHandlers.
+  ///
+  void runAtExitHandlers();
+
+  static void Register() {
+    InterpCtor = create;
+  }
+  
+  /// create - Create an interpreter ExecutionEngine. This can never fail.
+  ///
+  static ExecutionEngine *create(Module *M, std::string *ErrorStr = 0);
+
+  /// run - Start execution with the specified function and arguments.
+  ///
+  virtual GenericValue runFunction(Function *F,
+                                   const std::vector<GenericValue> &ArgValues);
+
+  /// recompileAndRelinkFunction - For the interpreter, functions are always
+  /// up-to-date.
+  ///
+  virtual void *recompileAndRelinkFunction(Function *F) {
+    return getPointerToFunction(F);
+  }
+
+  /// freeMachineCodeForFunction - The interpreter does not generate any code.
+  ///
+  void freeMachineCodeForFunction(Function *F) { }
+
+  // Methods used to execute code:
+  // Place a call on the stack
+  void callFunction(Function *F, const std::vector<GenericValue> &ArgVals);
+  void run();                // Execute instructions until nothing left to do
+
+  // Opcode Implementations
+  void visitReturnInst(ReturnInst &I);
+  void visitBranchInst(BranchInst &I);
+  void visitSwitchInst(SwitchInst &I);
+  void visitIndirectBrInst(IndirectBrInst &I);
+
+  void visitBinaryOperator(BinaryOperator &I);
+  void visitICmpInst(ICmpInst &I);
+  void visitFCmpInst(FCmpInst &I);
+  void visitAllocaInst(AllocaInst &I);
+  void visitLoadInst(LoadInst &I);
+  void visitStoreInst(StoreInst &I);
+  void visitGetElementPtrInst(GetElementPtrInst &I);
+  void visitPHINode(PHINode &PN) { 
+    llvm_unreachable("PHI nodes already handled!"); 
+  }
+  void visitTruncInst(TruncInst &I);
+  void visitZExtInst(ZExtInst &I);
+  void visitSExtInst(SExtInst &I);
+  void visitFPTruncInst(FPTruncInst &I);
+  void visitFPExtInst(FPExtInst &I);
+  void visitUIToFPInst(UIToFPInst &I);
+  void visitSIToFPInst(SIToFPInst &I);
+  void visitFPToUIInst(FPToUIInst &I);
+  void visitFPToSIInst(FPToSIInst &I);
+  void visitPtrToIntInst(PtrToIntInst &I);
+  void visitIntToPtrInst(IntToPtrInst &I);
+  void visitBitCastInst(BitCastInst &I);
+  void visitSelectInst(SelectInst &I);
+
+
+  void visitCallSite(CallSite CS);
+  void visitCallInst(CallInst &I) { visitCallSite (CallSite (&I)); }
+  void visitInvokeInst(InvokeInst &I) { visitCallSite (CallSite (&I)); }
+  void visitUnwindInst(UnwindInst &I);
+  void visitUnreachableInst(UnreachableInst &I);
+
+  void visitShl(BinaryOperator &I);
+  void visitLShr(BinaryOperator &I);
+  void visitAShr(BinaryOperator &I);
+
+  void visitVAArgInst(VAArgInst &I);
+  void visitInstruction(Instruction &I) {
+    errs() << I;
+    llvm_unreachable("Instruction not interpretable yet!");
+  }
+
+  GenericValue callExternalFunction(Function *F,
+                                    const std::vector<GenericValue> &ArgVals);
+  void exitCalled(GenericValue GV);
+
+  void addAtExitHandler(Function *F) {
+    AtExitHandlers.push_back(F);
+  }
+
+  GenericValue *getFirstVarArg () {
+    return &(ECStack.back ().VarArgs[0]);
+  }
+
+private:  // Helper functions
+  GenericValue executeGEPOperation(Value *Ptr, gep_type_iterator I,
+                                   gep_type_iterator E, ExecutionContext &SF);
+
+  // SwitchToNewBasicBlock - Start execution in a new basic block and run any
+  // PHI nodes in the top of the block.  This is used for intraprocedural
+  // control flow.
+  //
+  void SwitchToNewBasicBlock(BasicBlock *Dest, ExecutionContext &SF);
+
+  void *getPointerToFunction(Function *F) { return (void*)F; }
+  void *getPointerToBasicBlock(BasicBlock *BB) { return (void*)BB; }
+
+  void initializeExecutionEngine() { }
+  void initializeExternalFunctions();
+  GenericValue getConstantExprValue(ConstantExpr *CE, ExecutionContext &SF);
+  GenericValue getOperandValue(Value *V, ExecutionContext &SF);
+  GenericValue executeTruncInst(Value *SrcVal, const Type *DstTy,
+                                ExecutionContext &SF);
+  GenericValue executeSExtInst(Value *SrcVal, const Type *DstTy,
+                               ExecutionContext &SF);
+  GenericValue executeZExtInst(Value *SrcVal, const Type *DstTy,
+                               ExecutionContext &SF);
+  GenericValue executeFPTruncInst(Value *SrcVal, const Type *DstTy,
+                                  ExecutionContext &SF);
+  GenericValue executeFPExtInst(Value *SrcVal, const Type *DstTy,
+                                ExecutionContext &SF);
+  GenericValue executeFPToUIInst(Value *SrcVal, const Type *DstTy,
+                                 ExecutionContext &SF);
+  GenericValue executeFPToSIInst(Value *SrcVal, const Type *DstTy,
+                                 ExecutionContext &SF);
+  GenericValue executeUIToFPInst(Value *SrcVal, const Type *DstTy,
+                                 ExecutionContext &SF);
+  GenericValue executeSIToFPInst(Value *SrcVal, const Type *DstTy,
+                                 ExecutionContext &SF);
+  GenericValue executePtrToIntInst(Value *SrcVal, const Type *DstTy,
+                                   ExecutionContext &SF);
+  GenericValue executeIntToPtrInst(Value *SrcVal, const Type *DstTy,
+                                   ExecutionContext &SF);
+  GenericValue executeBitCastInst(Value *SrcVal, const Type *DstTy,
+                                  ExecutionContext &SF);
+  GenericValue executeCastOperation(Instruction::CastOps opcode, Value *SrcVal, 
+                                    const Type *Ty, ExecutionContext &SF);
+  void popStackAndReturnValueToCaller(const Type *RetTy, GenericValue Result);
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/ExecutionEngine/Interpreter/Makefile b/final/lib/ExecutionEngine/Interpreter/Makefile
new file mode 100644
index 00000000000..5def1365c61
--- /dev/null
+++ b/final/lib/ExecutionEngine/Interpreter/Makefile
@@ -0,0 +1,13 @@
+##===- lib/ExecutionEngine/Interpreter/Makefile ------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMInterpreter
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/ExecutionEngine/JIT/CMakeLists.txt b/final/lib/ExecutionEngine/JIT/CMakeLists.txt
new file mode 100644
index 00000000000..42020d69af0
--- /dev/null
+++ b/final/lib/ExecutionEngine/JIT/CMakeLists.txt
@@ -0,0 +1,13 @@
+# TODO: Support other architectures. See Makefile.
+add_definitions(-DENABLE_X86_JIT)
+
+add_llvm_library(LLVMJIT
+  Intercept.cpp
+  JIT.cpp
+  JITDebugRegisterer.cpp
+  JITDwarfEmitter.cpp
+  JITEmitter.cpp
+  JITMemoryManager.cpp
+  OProfileJITEventListener.cpp
+  TargetSelect.cpp
+  )
diff --git a/final/lib/ExecutionEngine/JIT/Intercept.cpp b/final/lib/ExecutionEngine/JIT/Intercept.cpp
new file mode 100644
index 00000000000..169e1bae547
--- /dev/null
+++ b/final/lib/ExecutionEngine/JIT/Intercept.cpp
@@ -0,0 +1,161 @@
+//===-- Intercept.cpp - System function interception routines -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// If a function call occurs to an external function, the JIT is designed to use
+// the dynamic loader interface to find a function to call.  This is useful for
+// calling system calls and library functions that are not available in LLVM.
+// Some system calls, however, need to be handled specially.  For this reason,
+// we intercept some of them here and use our own stubs to handle them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+// AtExitHandlers - List of functions to call when the program exits,
+// registered with the atexit() library function.
+static std::vector<void (*)()> AtExitHandlers;
+
+/// runAtExitHandlers - Run any functions registered by the program's
+/// calls to atexit(3), which we intercept and store in
+/// AtExitHandlers.
+///
+static void runAtExitHandlers() {
+  while (!AtExitHandlers.empty()) {
+    void (*Fn)() = AtExitHandlers.back();
+    AtExitHandlers.pop_back();
+    Fn();
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Function stubs that are invoked instead of certain library calls
+//===----------------------------------------------------------------------===//
+
+// Force the following functions to be linked in to anything that uses the
+// JIT. This is a hack designed to work around the all-too-clever Glibc
+// strategy of making these functions work differently when inlined vs. when
+// not inlined, and hiding their real definitions in a separate archive file
+// that the dynamic linker can't see. For more info, search for
+// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+#if defined(__linux__)
+#if defined(HAVE_SYS_STAT_H)
+#include <sys/stat.h>
+#endif
+#include <fcntl.h>
+/* stat functions are redirecting to __xstat with a version number.  On x86-64 
+ * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' 
+ * available as an exported symbol, so we have to add it explicitly.
+ */
+namespace {
+class StatSymbols {
+public:
+  StatSymbols() {
+    sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
+    sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
+    sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
+    sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
+    sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
+    sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
+    sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
+    sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
+    sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
+    sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
+    sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
+  }
+};
+}
+static StatSymbols initStatSymbols;
+#endif // __linux__
+
+// jit_exit - Used to intercept the "exit" library call.
+static void jit_exit(int Status) {
+  runAtExitHandlers();   // Run atexit handlers...
+  exit(Status);
+}
+
+// jit_atexit - Used to intercept the "atexit" library call.
+static int jit_atexit(void (*Fn)()) {
+  AtExitHandlers.push_back(Fn);    // Take note of atexit handler...
+  return 0;  // Always successful
+}
+
+static int jit_noop() {
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//
+/// getPointerToNamedFunction - This method returns the address of the specified
+/// function by using the dynamic loader interface.  As such it is only useful
+/// for resolving library symbols, not code generated symbols.
+///
+void *JIT::getPointerToNamedFunction(const std::string &Name,
+                                     bool AbortOnFailure) {
+  if (!isSymbolSearchingDisabled()) {
+    // Check to see if this is one of the functions we want to intercept.  Note,
+    // we cast to intptr_t here to silence a -pedantic warning that complains
+    // about casting a function pointer to a normal pointer.
+    if (Name == "exit") return (void*)(intptr_t)&jit_exit;
+    if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+
+    // We should not invoke parent's ctors/dtors from generated main()!
+    // On Mingw and Cygwin, the symbol __main is resolved to
+    // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+    // (and register wrong callee's dtors with atexit(3)).
+    // We expect ExecutionEngine::runStaticConstructorsDestructors()
+    // is called before ExecutionEngine::runFunctionAsMain() is called.
+    if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+    const char *NameStr = Name.c_str();
+    // If this is an asm specifier, skip the sentinal.
+    if (NameStr[0] == 1) ++NameStr;
+    
+    // If it's an external function, look it up in the process image...
+    void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+    if (Ptr) return Ptr;
+    
+    // If it wasn't found and if it starts with an underscore ('_') character,
+    // and has an asm specifier, try again without the underscore.
+    if (Name[0] == 1 && NameStr[0] == '_') {
+      Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+      if (Ptr) return Ptr;
+    }
+    
+    // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf.  These
+    // are references to hidden visibility symbols that dlsym cannot resolve.
+    // If we have one of these, strip off $LDBLStub and try again.
+#if defined(__APPLE__) && defined(__ppc__)
+    if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
+        memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
+      // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
+      // This mirrors logic in libSystemStubs.a.
+      std::string Prefix = std::string(Name.begin(), Name.end()-9);
+      if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
+        return Ptr;
+      if (void *Ptr = getPointerToNamedFunction(Prefix, false))
+        return Ptr;
+    }
+#endif
+  }
+  
+  /// If a LazyFunctionCreator is installed, use it to get/create the function.
+  if (LazyFunctionCreator)
+    if (void *RP = LazyFunctionCreator(Name))
+      return RP;
+
+  if (AbortOnFailure) {
+    report_fatal_error("Program used external function '"+Name+
+                      "' which could not be resolved!");
+  }
+  return 0;
+}
diff --git a/final/lib/ExecutionEngine/JIT/JIT.cpp b/final/lib/ExecutionEngine/JIT/JIT.cpp
new file mode 100644
index 00000000000..cc76b138a8a
--- /dev/null
+++ b/final/lib/ExecutionEngine/JIT/JIT.cpp
@@ -0,0 +1,843 @@
+//===-- JIT.cpp - LLVM Just in Time Compiler ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tool implements a just-in-time compiler for LLVM, allowing direct
+// execution of LLVM bitcode in an efficient manner.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Config/config.h"
+
+using namespace llvm;
+
+#ifdef __APPLE__ 
+// Apple gcc defaults to -fuse-cxa-atexit (i.e. calls __cxa_atexit instead
+// of atexit). It passes the address of linker generated symbol __dso_handle
+// to the function.
+// This configuration change happened at version 5330.
+# include <AvailabilityMacros.h>
+# if defined(MAC_OS_X_VERSION_10_4) && \
+     ((MAC_OS_X_VERSION_MIN_REQUIRED > MAC_OS_X_VERSION_10_4) || \
+      (MAC_OS_X_VERSION_MIN_REQUIRED == MAC_OS_X_VERSION_10_4 && \
+       __APPLE_CC__ >= 5330))
+#  ifndef HAVE___DSO_HANDLE
+#   define HAVE___DSO_HANDLE 1
+#  endif
+# endif
+#endif
+
+#if HAVE___DSO_HANDLE
+extern void *__dso_handle __attribute__ ((__visibility__ ("hidden")));
+#endif
+
+namespace {
+
+static struct RegisterJIT {
+  RegisterJIT() { JIT::Register(); }
+} JITRegistrator;
+
+}
+
+extern "C" void LLVMLinkInJIT() {
+}
+
+// Determine whether we can register EH tables.
+#if (defined(__GNUC__) && !defined(__ARM_EABI__) && \
+     !defined(__USING_SJLJ_EXCEPTIONS__))
+#define HAVE_EHTABLE_SUPPORT 1
+#else
+#define HAVE_EHTABLE_SUPPORT 0
+#endif
+
+#if HAVE_EHTABLE_SUPPORT
+ 
+// libgcc defines the __register_frame function to dynamically register new
+// dwarf frames for exception handling. This functionality is not portable
+// across compilers and is only provided by GCC. We use the __register_frame
+// function here so that code generated by the JIT cooperates with the unwinding
+// runtime of libgcc. When JITting with exception handling enable, LLVM
+// generates dwarf frames and registers it to libgcc with __register_frame.
+//
+// The __register_frame function works with Linux.
+//
+// Unfortunately, this functionality seems to be in libgcc after the unwinding
+// library of libgcc for darwin was written. The code for darwin overwrites the
+// value updated by __register_frame with a value fetched with "keymgr".
+// "keymgr" is an obsolete functionality, which should be rewritten some day.
+// In the meantime, since "keymgr" is on all libgccs shipped with apple-gcc, we
+// need a workaround in LLVM which uses the "keymgr" to dynamically modify the
+// values of an opaque key, used by libgcc to find dwarf tables.
+
+extern "C" void __register_frame(void*);
+extern "C" void __deregister_frame(void*);
+
+#if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED <= 1050
+# define USE_KEYMGR 1
+#else
+# define USE_KEYMGR 0
+#endif
+
+#if USE_KEYMGR
+
+namespace {
+
+// LibgccObject - This is the structure defined in libgcc. There is no #include
+// provided for this structure, so we also define it here. libgcc calls it
+// "struct object". The structure is undocumented in libgcc.
+struct LibgccObject {
+  void *unused1;
+  void *unused2;
+  void *unused3;
+  
+  /// frame - Pointer to the exception table.
+  void *frame;
+  
+  /// encoding -  The encoding of the object?
+  union {
+    struct {
+      unsigned long sorted : 1;
+      unsigned long from_array : 1;
+      unsigned long mixed_encoding : 1;
+      unsigned long encoding : 8;
+      unsigned long count : 21; 
+    } b;
+    size_t i;
+  } encoding;
+  
+  /// fde_end - libgcc defines this field only if some macro is defined. We
+  /// include this field even if it may not there, to make libgcc happy.
+  char *fde_end;
+  
+  /// next - At least we know it's a chained list!
+  struct LibgccObject *next;
+};
+
+// "kemgr" stuff. Apparently, all frame tables are stored there.
+extern "C" void _keymgr_set_and_unlock_processwide_ptr(int, void *);
+extern "C" void *_keymgr_get_and_lock_processwide_ptr(int);
+#define KEYMGR_GCC3_DW2_OBJ_LIST        302     /* Dwarf2 object list  */
+
+/// LibgccObjectInfo - libgcc defines this struct as km_object_info. It
+/// probably contains all dwarf tables that are loaded.
+struct LibgccObjectInfo {
+
+  /// seenObjects - LibgccObjects already parsed by the unwinding runtime.
+  ///
+  struct LibgccObject* seenObjects;
+
+  /// unseenObjects - LibgccObjects not parsed yet by the unwinding runtime.
+  ///
+  struct LibgccObject* unseenObjects;
+  
+  unsigned unused[2];
+};
+
+/// darwin_register_frame - Since __register_frame does not work with darwin's
+/// libgcc,we provide our own function, which "tricks" libgcc by modifying the
+/// "Dwarf2 object list" key.
+void DarwinRegisterFrame(void* FrameBegin) {
+  // Get the key.
+  LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
+    _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
+  assert(LOI && "This should be preallocated by the runtime");
+  
+  // Allocate a new LibgccObject to represent this frame. Deallocation of this
+  // object may be impossible: since darwin code in libgcc was written after
+  // the ability to dynamically register frames, things may crash if we
+  // deallocate it.
+  struct LibgccObject* ob = (struct LibgccObject*)
+    malloc(sizeof(struct LibgccObject));
+  
+  // Do like libgcc for the values of the field.
+  ob->unused1 = (void *)-1;
+  ob->unused2 = 0;
+  ob->unused3 = 0;
+  ob->frame = FrameBegin;
+  ob->encoding.i = 0; 
+  ob->encoding.b.encoding = llvm::dwarf::DW_EH_PE_omit;
+  
+  // Put the info on both places, as libgcc uses the first or the second
+  // field. Note that we rely on having two pointers here. If fde_end was a
+  // char, things would get complicated.
+  ob->fde_end = (char*)LOI->unseenObjects;
+  ob->next = LOI->unseenObjects;
+  
+  // Update the key's unseenObjects list.
+  LOI->unseenObjects = ob;
+  
+  // Finally update the "key". Apparently, libgcc requires it. 
+  _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST,
+                                         LOI);
+
+}
+
+}
+#endif // __APPLE__
+#endif // HAVE_EHTABLE_SUPPORT
+
+/// createJIT - This is the factory method for creating a JIT for the current
+/// machine, it does not fall back to the interpreter.  This takes ownership
+/// of the module.
+ExecutionEngine *ExecutionEngine::createJIT(Module *M,
+                                            std::string *ErrorStr,
+                                            JITMemoryManager *JMM,
+                                            CodeGenOpt::Level OptLevel,
+                                            bool GVsWithCode,
+                                            CodeModel::Model CMM) {
+  // Use the defaults for extra parameters.  Users can use EngineBuilder to
+  // set them.
+  StringRef MArch = "";
+  StringRef MCPU = "";
+  SmallVector<std::string, 1> MAttrs;
+  return JIT::createJIT(M, ErrorStr, JMM, OptLevel, GVsWithCode, CMM,
+                        MArch, MCPU, MAttrs);
+}
+
+ExecutionEngine *JIT::createJIT(Module *M,
+                                std::string *ErrorStr,
+                                JITMemoryManager *JMM,
+                                CodeGenOpt::Level OptLevel,
+                                bool GVsWithCode,
+                                CodeModel::Model CMM,
+                                StringRef MArch,
+                                StringRef MCPU,
+                                const SmallVectorImpl<std::string>& MAttrs) {
+  // Try to register the program as a source of symbols to resolve against.
+  sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
+
+  // Pick a target either via -march or by guessing the native arch.
+  TargetMachine *TM = JIT::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr);
+  if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
+  TM->setCodeModel(CMM);
+
+  // If the target supports JIT code generation, create a the JIT.
+  if (TargetJITInfo *TJ = TM->getJITInfo()) {
+    return new JIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode);
+  } else {
+    if (ErrorStr)
+      *ErrorStr = "target does not support JIT code generation";
+    return 0;
+  }
+}
+
+namespace {
+/// This class supports the global getPointerToNamedFunction(), which allows
+/// bugpoint or gdb users to search for a function by name without any context.
+class JitPool {
+  SmallPtrSet<JIT*, 1> JITs;  // Optimize for process containing just 1 JIT.
+  mutable sys::Mutex Lock;
+public:
+  void Add(JIT *jit) {
+    MutexGuard guard(Lock);
+    JITs.insert(jit);
+  }
+  void Remove(JIT *jit) {
+    MutexGuard guard(Lock);
+    JITs.erase(jit);
+  }
+  void *getPointerToNamedFunction(const char *Name) const {
+    MutexGuard guard(Lock);
+    assert(JITs.size() != 0 && "No Jit registered");
+    //search function in every instance of JIT
+    for (SmallPtrSet<JIT*, 1>::const_iterator Jit = JITs.begin(),
+           end = JITs.end();
+         Jit != end; ++Jit) {
+      if (Function *F = (*Jit)->FindFunctionNamed(Name))
+        return (*Jit)->getPointerToFunction(F);
+    }
+    // The function is not available : fallback on the first created (will
+    // search in symbol of the current program/library)
+    return (*JITs.begin())->getPointerToNamedFunction(Name);
+  }
+};
+ManagedStatic<JitPool> AllJits;
+}
+extern "C" {
+  // getPointerToNamedFunction - This function is used as a global wrapper to
+  // JIT::getPointerToNamedFunction for the purpose of resolving symbols when
+  // bugpoint is debugging the JIT. In that scenario, we are loading an .so and
+  // need to resolve function(s) that are being mis-codegenerated, so we need to
+  // resolve their addresses at runtime, and this is the way to do it.
+  void *getPointerToNamedFunction(const char *Name) {
+    return AllJits->getPointerToNamedFunction(Name);
+  }
+}
+
+JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
+         JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool GVsWithCode)
+  : ExecutionEngine(M), TM(tm), TJI(tji), AllocateGVsWithCode(GVsWithCode),
+    isAlreadyCodeGenerating(false) {
+  setTargetData(TM.getTargetData());
+
+  jitstate = new JITState(M);
+
+  // Initialize JCE
+  JCE = createEmitter(*this, JMM, TM);
+
+  // Register in global list of all JITs.
+  AllJits->Add(this);
+
+  // Add target data
+  MutexGuard locked(lock);
+  FunctionPassManager &PM = jitstate->getPM(locked);
+  PM.add(new TargetData(*TM.getTargetData()));
+
+  // Turn the machine code intermediate representation into bytes in memory that
+  // may be executed.
+  if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) {
+    report_fatal_error("Target does not support machine code emission!");
+  }
+  
+  // Register routine for informing unwinding runtime about new EH frames
+#if HAVE_EHTABLE_SUPPORT
+#if USE_KEYMGR
+  struct LibgccObjectInfo* LOI = (struct LibgccObjectInfo*)
+    _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
+  
+  // The key is created on demand, and libgcc creates it the first time an
+  // exception occurs. Since we need the key to register frames, we create
+  // it now.
+  if (!LOI)
+    LOI = (LibgccObjectInfo*)calloc(sizeof(struct LibgccObjectInfo), 1); 
+  _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, LOI);
+  InstallExceptionTableRegister(DarwinRegisterFrame);
+  // Not sure about how to deregister on Darwin.
+#else
+  InstallExceptionTableRegister(__register_frame);
+  InstallExceptionTableDeregister(__deregister_frame);
+#endif // __APPLE__
+#endif // HAVE_EHTABLE_SUPPORT
+  
+  // Initialize passes.
+  PM.doInitialization();
+}
+
+JIT::~JIT() {
+  // Unregister all exception tables registered by this JIT.
+  DeregisterAllTables();
+  // Cleanup.
+  AllJits->Remove(this);
+  delete jitstate;
+  delete JCE;
+  delete &TM;
+}
+
+/// addModule - Add a new Module to the JIT.  If we previously removed the last
+/// Module, we need re-initialize jitstate with a valid Module.
+void JIT::addModule(Module *M) {
+  MutexGuard locked(lock);
+
+  if (Modules.empty()) {
+    assert(!jitstate && "jitstate should be NULL if Modules vector is empty!");
+
+    jitstate = new JITState(M);
+
+    FunctionPassManager &PM = jitstate->getPM(locked);
+    PM.add(new TargetData(*TM.getTargetData()));
+
+    // Turn the machine code intermediate representation into bytes in memory
+    // that may be executed.
+    if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+      report_fatal_error("Target does not support machine code emission!");
+    }
+    
+    // Initialize passes.
+    PM.doInitialization();
+  }
+  
+  ExecutionEngine::addModule(M);
+}
+
+/// removeModule - If we are removing the last Module, invalidate the jitstate
+/// since the PassManager it contains references a released Module.
+bool JIT::removeModule(Module *M) {
+  bool result = ExecutionEngine::removeModule(M);
+  
+  MutexGuard locked(lock);
+  
+  if (jitstate->getModule() == M) {
+    delete jitstate;
+    jitstate = 0;
+  }
+  
+  if (!jitstate && !Modules.empty()) {
+    jitstate = new JITState(Modules[0]);
+
+    FunctionPassManager &PM = jitstate->getPM(locked);
+    PM.add(new TargetData(*TM.getTargetData()));
+    
+    // Turn the machine code intermediate representation into bytes in memory
+    // that may be executed.
+    if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) {
+      report_fatal_error("Target does not support machine code emission!");
+    }
+    
+    // Initialize passes.
+    PM.doInitialization();
+  }    
+  return result;
+}
+
+/// run - Start execution with the specified function and arguments.
+///
+GenericValue JIT::runFunction(Function *F,
+                              const std::vector<GenericValue> &ArgValues) {
+  assert(F && "Function *F was null at entry to run()");
+
+  void *FPtr = getPointerToFunction(F);
+  assert(FPtr && "Pointer to fn's code was null after getPointerToFunction");
+  const FunctionType *FTy = F->getFunctionType();
+  const Type *RetTy = FTy->getReturnType();
+
+  assert((FTy->getNumParams() == ArgValues.size() ||
+          (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) &&
+         "Wrong number of arguments passed into function!");
+  assert(FTy->getNumParams() == ArgValues.size() &&
+         "This doesn't support passing arguments through varargs (yet)!");
+
+  // Handle some common cases first.  These cases correspond to common `main'
+  // prototypes.
+  if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) {
+    switch (ArgValues.size()) {
+    case 3:
+      if (FTy->getParamType(0)->isIntegerTy(32) &&
+          FTy->getParamType(1)->isPointerTy() &&
+          FTy->getParamType(2)->isPointerTy()) {
+        int (*PF)(int, char **, const char **) =
+          (int(*)(int, char **, const char **))(intptr_t)FPtr;
+
+        // Call the function.
+        GenericValue rv;
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), 
+                                 (char **)GVTOP(ArgValues[1]),
+                                 (const char **)GVTOP(ArgValues[2])));
+        return rv;
+      }
+      break;
+    case 2:
+      if (FTy->getParamType(0)->isIntegerTy(32) &&
+          FTy->getParamType(1)->isPointerTy()) {
+        int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr;
+
+        // Call the function.
+        GenericValue rv;
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), 
+                                 (char **)GVTOP(ArgValues[1])));
+        return rv;
+      }
+      break;
+    case 1:
+      if (FTy->getNumParams() == 1 &&
+          FTy->getParamType(0)->isIntegerTy(32)) {
+        GenericValue rv;
+        int (*PF)(int) = (int(*)(int))(intptr_t)FPtr;
+        rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue()));
+        return rv;
+      }
+      break;
+    }
+  }
+
+  // Handle cases where no arguments are passed first.
+  if (ArgValues.empty()) {
+    GenericValue rv;
+    switch (RetTy->getTypeID()) {
+    default: llvm_unreachable("Unknown return type for function call!");
+    case Type::IntegerTyID: {
+      unsigned BitWidth = cast<IntegerType>(RetTy)->getBitWidth();
+      if (BitWidth == 1)
+        rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 8)
+        rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 16)
+        rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 32)
+        rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)());
+      else if (BitWidth <= 64)
+        rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)());
+      else 
+        llvm_unreachable("Integer types > 64 bits not supported");
+      return rv;
+    }
+    case Type::VoidTyID:
+      rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)());
+      return rv;
+    case Type::FloatTyID:
+      rv.FloatVal = ((float(*)())(intptr_t)FPtr)();
+      return rv;
+    case Type::DoubleTyID:
+      rv.DoubleVal = ((double(*)())(intptr_t)FPtr)();
+      return rv;
+    case Type::X86_FP80TyID:
+    case Type::FP128TyID:
+    case Type::PPC_FP128TyID:
+      llvm_unreachable("long double not supported yet");
+      return rv;
+    case Type::PointerTyID:
+      return PTOGV(((void*(*)())(intptr_t)FPtr)());
+    }
+  }
+
+  // Okay, this is not one of our quick and easy cases.  Because we don't have a
+  // full FFI, we have to codegen a nullary stub function that just calls the
+  // function we are interested in, passing in constants for all of the
+  // arguments.  Make this function and return.
+
+  // First, create the function.
+  FunctionType *STy=FunctionType::get(RetTy, false);
+  Function *Stub = Function::Create(STy, Function::InternalLinkage, "",
+                                    F->getParent());
+
+  // Insert a basic block.
+  BasicBlock *StubBB = BasicBlock::Create(F->getContext(), "", Stub);
+
+  // Convert all of the GenericValue arguments over to constants.  Note that we
+  // currently don't support varargs.
+  SmallVector<Value*, 8> Args;
+  for (unsigned i = 0, e = ArgValues.size(); i != e; ++i) {
+    Constant *C = 0;
+    const Type *ArgTy = FTy->getParamType(i);
+    const GenericValue &AV = ArgValues[i];
+    switch (ArgTy->getTypeID()) {
+    default: llvm_unreachable("Unknown argument type for function call!");
+    case Type::IntegerTyID:
+        C = ConstantInt::get(F->getContext(), AV.IntVal);
+        break;
+    case Type::FloatTyID:
+        C = ConstantFP::get(F->getContext(), APFloat(AV.FloatVal));
+        break;
+    case Type::DoubleTyID:
+        C = ConstantFP::get(F->getContext(), APFloat(AV.DoubleVal));
+        break;
+    case Type::PPC_FP128TyID:
+    case Type::X86_FP80TyID:
+    case Type::FP128TyID:
+        C = ConstantFP::get(F->getContext(), APFloat(AV.IntVal));
+        break;
+    case Type::PointerTyID:
+      void *ArgPtr = GVTOP(AV);
+      if (sizeof(void*) == 4)
+        C = ConstantInt::get(Type::getInt32Ty(F->getContext()), 
+                             (int)(intptr_t)ArgPtr);
+      else
+        C = ConstantInt::get(Type::getInt64Ty(F->getContext()),
+                             (intptr_t)ArgPtr);
+      // Cast the integer to pointer
+      C = ConstantExpr::getIntToPtr(C, ArgTy);
+      break;
+    }
+    Args.push_back(C);
+  }
+
+  CallInst *TheCall = CallInst::Create(F, Args.begin(), Args.end(),
+                                       "", StubBB);
+  TheCall->setCallingConv(F->getCallingConv());
+  TheCall->setTailCall();
+  if (!TheCall->getType()->isVoidTy())
+    // Return result of the call.
+    ReturnInst::Create(F->getContext(), TheCall, StubBB);
+  else
+    ReturnInst::Create(F->getContext(), StubBB);           // Just return void.
+
+  // Finally, call our nullary stub function.
+  GenericValue Result = runFunction(Stub, std::vector<GenericValue>());
+  // Erase it, since no other function can have a reference to it.
+  Stub->eraseFromParent();
+  // And return the result.
+  return Result;
+}
+
+void JIT::RegisterJITEventListener(JITEventListener *L) {
+  if (L == NULL)
+    return;
+  MutexGuard locked(lock);
+  EventListeners.push_back(L);
+}
+void JIT::UnregisterJITEventListener(JITEventListener *L) {
+  if (L == NULL)
+    return;
+  MutexGuard locked(lock);
+  std::vector<JITEventListener*>::reverse_iterator I=
+      std::find(EventListeners.rbegin(), EventListeners.rend(), L);
+  if (I != EventListeners.rend()) {
+    std::swap(*I, EventListeners.back());
+    EventListeners.pop_back();
+  }
+}
+void JIT::NotifyFunctionEmitted(
+    const Function &F,
+    void *Code, size_t Size,
+    const JITEvent_EmittedFunctionDetails &Details) {
+  MutexGuard locked(lock);
+  for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
+    EventListeners[I]->NotifyFunctionEmitted(F, Code, Size, Details);
+  }
+}
+
+void JIT::NotifyFreeingMachineCode(void *OldPtr) {
+  MutexGuard locked(lock);
+  for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
+    EventListeners[I]->NotifyFreeingMachineCode(OldPtr);
+  }
+}
+
+/// runJITOnFunction - Run the FunctionPassManager full of
+/// just-in-time compilation passes on F, hopefully filling in
+/// GlobalAddress[F] with the address of F's machine code.
+///
+void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) {
+  MutexGuard locked(lock);
+
+  class MCIListener : public JITEventListener {
+    MachineCodeInfo *const MCI;
+   public:
+    MCIListener(MachineCodeInfo *mci) : MCI(mci) {}
+    virtual void NotifyFunctionEmitted(const Function &,
+                                       void *Code, size_t Size,
+                                       const EmittedFunctionDetails &) {
+      MCI->setAddress(Code);
+      MCI->setSize(Size);
+    }
+  };
+  MCIListener MCIL(MCI);
+  if (MCI)
+    RegisterJITEventListener(&MCIL);
+
+  runJITOnFunctionUnlocked(F, locked);
+
+  if (MCI)
+    UnregisterJITEventListener(&MCIL);
+}
+
+void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
+  assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!");
+
+  jitTheFunction(F, locked);
+
+  // If the function referred to another function that had not yet been
+  // read from bitcode, and we are jitting non-lazily, emit it now.
+  while (!jitstate->getPendingFunctions(locked).empty()) {
+    Function *PF = jitstate->getPendingFunctions(locked).back();
+    jitstate->getPendingFunctions(locked).pop_back();
+
+    assert(!PF->hasAvailableExternallyLinkage() &&
+           "Externally-defined function should not be in pending list.");
+
+    jitTheFunction(PF, locked);
+    
+    // Now that the function has been jitted, ask the JITEmitter to rewrite
+    // the stub with real address of the function.
+    updateFunctionStub(PF);
+  }
+}
+
+void JIT::jitTheFunction(Function *F, const MutexGuard &locked) {
+  isAlreadyCodeGenerating = true;
+  jitstate->getPM(locked).run(*F);
+  isAlreadyCodeGenerating = false;
+
+  // clear basic block addresses after this function is done
+  getBasicBlockAddressMap(locked).clear();
+}
+
+/// getPointerToFunction - This method is used to get the address of the
+/// specified function, compiling it if neccesary.
+///
+void *JIT::getPointerToFunction(Function *F) {
+
+  if (void *Addr = getPointerToGlobalIfAvailable(F))
+    return Addr;   // Check if function already code gen'd
+
+  MutexGuard locked(lock);
+
+  // Now that this thread owns the lock, make sure we read in the function if it
+  // exists in this Module.
+  std::string ErrorMsg;
+  if (F->Materialize(&ErrorMsg)) {
+    report_fatal_error("Error reading function '" + F->getName()+
+                      "' from bitcode file: " + ErrorMsg);
+  }
+
+  // ... and check if another thread has already code gen'd the function.
+  if (void *Addr = getPointerToGlobalIfAvailable(F))
+    return Addr;
+
+  if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
+    bool AbortOnFailure = !F->hasExternalWeakLinkage();
+    void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure);
+    addGlobalMapping(F, Addr);
+    return Addr;
+  }
+
+  runJITOnFunctionUnlocked(F, locked);
+
+  void *Addr = getPointerToGlobalIfAvailable(F);
+  assert(Addr && "Code generation didn't add function to GlobalAddress table!");
+  return Addr;
+}
+
+void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) {
+  MutexGuard locked(lock);
+  
+  BasicBlockAddressMapTy::iterator I =
+    getBasicBlockAddressMap(locked).find(BB);
+  if (I == getBasicBlockAddressMap(locked).end()) {
+    getBasicBlockAddressMap(locked)[BB] = Addr;
+  } else {
+    // ignore repeats: some BBs can be split into few MBBs?
+  }
+}
+
+void JIT::clearPointerToBasicBlock(const BasicBlock *BB) {
+  MutexGuard locked(lock);
+  getBasicBlockAddressMap(locked).erase(BB);
+}
+
+void *JIT::getPointerToBasicBlock(BasicBlock *BB) {
+  // make sure it's function is compiled by JIT
+  (void)getPointerToFunction(BB->getParent());
+
+  // resolve basic block address
+  MutexGuard locked(lock);
+  
+  BasicBlockAddressMapTy::iterator I =
+    getBasicBlockAddressMap(locked).find(BB);
+  if (I != getBasicBlockAddressMap(locked).end()) {
+    return I->second;
+  } else {
+    assert(0 && "JIT does not have BB address for address-of-label, was"
+           " it eliminated by optimizer?");
+    return 0;
+  }
+}
+
+/// getOrEmitGlobalVariable - Return the address of the specified global
+/// variable, possibly emitting it to memory if needed.  This is used by the
+/// Emitter.
+void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) {
+  MutexGuard locked(lock);
+
+  void *Ptr = getPointerToGlobalIfAvailable(GV);
+  if (Ptr) return Ptr;
+
+  // If the global is external, just remember the address.
+  if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) {
+#if HAVE___DSO_HANDLE
+    if (GV->getName() == "__dso_handle")
+      return (void*)&__dso_handle;
+#endif
+    Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName());
+    if (Ptr == 0) {
+      report_fatal_error("Could not resolve external global address: "
+                        +GV->getName());
+    }
+    addGlobalMapping(GV, Ptr);
+  } else {
+    // If the global hasn't been emitted to memory yet, allocate space and
+    // emit it into memory.
+    Ptr = getMemoryForGV(GV);
+    addGlobalMapping(GV, Ptr);
+    EmitGlobalVariable(GV);  // Initialize the variable.
+  }
+  return Ptr;
+}
+
+/// recompileAndRelinkFunction - This method is used to force a function
+/// which has already been compiled, to be compiled again, possibly
+/// after it has been modified. Then the entry to the old copy is overwritten
+/// with a branch to the new copy. If there was no old copy, this acts
+/// just like JIT::getPointerToFunction().
+///
+void *JIT::recompileAndRelinkFunction(Function *F) {
+  void *OldAddr = getPointerToGlobalIfAvailable(F);
+
+  // If it's not already compiled there is no reason to patch it up.
+  if (OldAddr == 0) { return getPointerToFunction(F); }
+
+  // Delete the old function mapping.
+  addGlobalMapping(F, 0);
+
+  // Recodegen the function
+  runJITOnFunction(F);
+
+  // Update state, forward the old function to the new function.
+  void *Addr = getPointerToGlobalIfAvailable(F);
+  assert(Addr && "Code generation didn't add function to GlobalAddress table!");
+  TJI.replaceMachineCodeForFunction(OldAddr, Addr);
+  return Addr;
+}
+
+/// getMemoryForGV - This method abstracts memory allocation of global
+/// variable so that the JIT can allocate thread local variables depending
+/// on the target.
+///
+char* JIT::getMemoryForGV(const GlobalVariable* GV) {
+  char *Ptr;
+
+  // GlobalVariable's which are not "constant" will cause trouble in a server
+  // situation. It's returned in the same block of memory as code which may
+  // not be writable.
+  if (isGVCompilationDisabled() && !GV->isConstant()) {
+    report_fatal_error("Compilation of non-internal GlobalValue is disabled!");
+  }
+
+  // Some applications require globals and code to live together, so they may
+  // be allocated into the same buffer, but in general globals are allocated
+  // through the memory manager which puts them near the code but not in the
+  // same buffer.
+  const Type *GlobalType = GV->getType()->getElementType();
+  size_t S = getTargetData()->getTypeAllocSize(GlobalType);
+  size_t A = getTargetData()->getPreferredAlignment(GV);
+  if (GV->isThreadLocal()) {
+    MutexGuard locked(lock);
+    Ptr = TJI.allocateThreadLocalMemory(S);
+  } else if (TJI.allocateSeparateGVMemory()) {
+    if (A <= 8) {
+      Ptr = (char*)malloc(S);
+    } else {
+      // Allocate S+A bytes of memory, then use an aligned pointer within that
+      // space.
+      Ptr = (char*)malloc(S+A);
+      unsigned MisAligned = ((intptr_t)Ptr & (A-1));
+      Ptr = Ptr + (MisAligned ? (A-MisAligned) : 0);
+    }
+  } else if (AllocateGVsWithCode) {
+    Ptr = (char*)JCE->allocateSpace(S, A);
+  } else {
+    Ptr = (char*)JCE->allocateGlobal(S, A);
+  }
+  return Ptr;
+}
+
+void JIT::addPendingFunction(Function *F) {
+  MutexGuard locked(lock);
+  jitstate->getPendingFunctions(locked).push_back(F);
+}
+
+
+JITEventListener::~JITEventListener() {}
diff --git a/final/lib/ExecutionEngine/JIT/JIT.h b/final/lib/ExecutionEngine/JIT/JIT.h
new file mode 100644
index 00000000000..1d1763edd4d
--- /dev/null
+++ b/final/lib/ExecutionEngine/JIT/JIT.h
@@ -0,0 +1,237 @@
+//===-- JIT.h - Class definition for the JIT --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the top-level JIT data structure.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef JIT_H
+#define JIT_H
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/ValueHandle.h"
+
+namespace llvm {
+
+class Function;
+struct JITEvent_EmittedFunctionDetails;
+class MachineCodeEmitter;
+class MachineCodeInfo;
+class TargetJITInfo;
+class TargetMachine;
+
+class JITState {
+private:
+  FunctionPassManager PM;  // Passes to compile a function
+  Module *M;               // Module used to create the PM
+
+  /// PendingFunctions - Functions which have not been code generated yet, but
+  /// were called from a function being code generated.
+  std::vector<AssertingVH<Function> > PendingFunctions;
+
+public:
+  explicit JITState(Module *M) : PM(M), M(M) {}
+
+  FunctionPassManager &getPM(const MutexGuard &L) {
+    return PM;
+  }
+  
+  Module *getModule() const { return M; }
+  std::vector<AssertingVH<Function> > &getPendingFunctions(const MutexGuard &L){
+    return PendingFunctions;
+  }
+};
+
+
+class JIT : public ExecutionEngine {
+  /// types
+  typedef ValueMap<const BasicBlock *, void *>
+      BasicBlockAddressMapTy;
+  /// data
+  TargetMachine &TM;       // The current target we are compiling to
+  TargetJITInfo &TJI;      // The JITInfo for the target we are compiling to
+  JITCodeEmitter *JCE;     // JCE object
+  std::vector<JITEventListener*> EventListeners;
+
+  /// AllocateGVsWithCode - Some applications require that global variables and
+  /// code be allocated into the same region of memory, in which case this flag
+  /// should be set to true.  Doing so breaks freeMachineCodeForFunction.
+  bool AllocateGVsWithCode;
+
+  /// True while the JIT is generating code.  Used to assert against recursive
+  /// entry.
+  bool isAlreadyCodeGenerating;
+
+  JITState *jitstate;
+
+  /// BasicBlockAddressMap - A mapping between LLVM basic blocks and their
+  /// actualized version, only filled for basic blocks that have their address
+  /// taken.
+  BasicBlockAddressMapTy BasicBlockAddressMap;
+
+
+  JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
+      JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
+      bool AllocateGVsWithCode);
+public:
+  ~JIT();
+
+  static void Register() {
+    JITCtor = createJIT;
+  }
+  
+  /// getJITInfo - Return the target JIT information structure.
+  ///
+  TargetJITInfo &getJITInfo() const { return TJI; }
+
+  /// create - Create an return a new JIT compiler if there is one available
+  /// for the current target.  Otherwise, return null.
+  ///
+  static ExecutionEngine *create(Module *M,
+                                 std::string *Err,
+                                 JITMemoryManager *JMM,
+                                 CodeGenOpt::Level OptLevel =
+                                   CodeGenOpt::Default,
+                                 bool GVsWithCode = true,
+                                 CodeModel::Model CMM = CodeModel::Default) {
+    return ExecutionEngine::createJIT(M, Err, JMM, OptLevel, GVsWithCode,
+                                      CMM);
+  }
+
+  virtual void addModule(Module *M);
+  
+  /// removeModule - Remove a Module from the list of modules.  Returns true if
+  /// M is found.
+  virtual bool removeModule(Module *M);
+
+  /// runFunction - Start execution with the specified function and arguments.
+  ///
+  virtual GenericValue runFunction(Function *F,
+                                   const std::vector<GenericValue> &ArgValues);
+
+  /// getPointerToNamedFunction - This method returns the address of the
+  /// specified function by using the dlsym function call.  As such it is only
+  /// useful for resolving library symbols, not code generated symbols.
+  ///
+  /// If AbortOnFailure is false and no function with the given name is
+  /// found, this function silently returns a null pointer. Otherwise,
+  /// it prints a message to stderr and aborts.
+  ///
+  void *getPointerToNamedFunction(const std::string &Name,
+                                  bool AbortOnFailure = true);
+
+  // CompilationCallback - Invoked the first time that a call site is found,
+  // which causes lazy compilation of the target function.
+  //
+  static void CompilationCallback();
+
+  /// getPointerToFunction - This returns the address of the specified function,
+  /// compiling it if necessary.
+  ///
+  void *getPointerToFunction(Function *F);
+
+  /// addPointerToBasicBlock - Adds address of the specific basic block.
+  void addPointerToBasicBlock(const BasicBlock *BB, void *Addr);
+
+  /// clearPointerToBasicBlock - Removes address of specific basic block.
+  void clearPointerToBasicBlock(const BasicBlock *BB);
+
+  /// getPointerToBasicBlock - This returns the address of the specified basic
+  /// block, assuming function is compiled.
+  void *getPointerToBasicBlock(BasicBlock *BB);
+  
+  /// getOrEmitGlobalVariable - Return the address of the specified global
+  /// variable, possibly emitting it to memory if needed.  This is used by the
+  /// Emitter.
+  void *getOrEmitGlobalVariable(const GlobalVariable *GV);
+
+  /// getPointerToFunctionOrStub - If the specified function has been
+  /// code-gen'd, return a pointer to the function.  If not, compile it, or use
+  /// a stub to implement lazy compilation if available.
+  ///
+  void *getPointerToFunctionOrStub(Function *F);
+
+  /// recompileAndRelinkFunction - This method is used to force a function
+  /// which has already been compiled, to be compiled again, possibly
+  /// after it has been modified. Then the entry to the old copy is overwritten
+  /// with a branch to the new copy. If there was no old copy, this acts
+  /// just like JIT::getPointerToFunction().
+  ///
+  void *recompileAndRelinkFunction(Function *F);
+
+  /// freeMachineCodeForFunction - deallocate memory used to code-generate this
+  /// Function.
+  ///
+  void freeMachineCodeForFunction(Function *F);
+
+  /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd
+  /// function was encountered.  Add it to a pending list to be processed after 
+  /// the current function.
+  ///
+  void addPendingFunction(Function *F);
+
+  /// getCodeEmitter - Return the code emitter this JIT is emitting into.
+  ///
+  JITCodeEmitter *getCodeEmitter() const { return JCE; }
+
+  /// selectTarget - Pick a target either via -march or by guessing the native
+  /// arch.  Add any CPU features specified via -mcpu or -mattr.
+  static TargetMachine *selectTarget(Module *M,
+                                     StringRef MArch,
+                                     StringRef MCPU,
+                                     const SmallVectorImpl<std::string>& MAttrs,
+                                     std::string *Err);
+
+  static ExecutionEngine *createJIT(Module *M,
+                                    std::string *ErrorStr,
+                                    JITMemoryManager *JMM,
+                                    CodeGenOpt::Level OptLevel,
+                                    bool GVsWithCode,
+                                    CodeModel::Model CMM,
+                                    StringRef MArch,
+                                    StringRef MCPU,
+                                    const SmallVectorImpl<std::string>& MAttrs);
+
+  // Run the JIT on F and return information about the generated code
+  void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0);
+
+  virtual void RegisterJITEventListener(JITEventListener *L);
+  virtual void UnregisterJITEventListener(JITEventListener *L);
+  /// These functions correspond to the methods on JITEventListener.  They
+  /// iterate over the registered listeners and call the corresponding method on
+  /// each.
+  void NotifyFunctionEmitted(
+      const Function &F, void *Code, size_t Size,
+      const JITEvent_EmittedFunctionDetails &Details);
+  void NotifyFreeingMachineCode(void *OldPtr);
+
+  BasicBlockAddressMapTy &
+  getBasicBlockAddressMap(const MutexGuard &) {
+    return BasicBlockAddressMap;
+  }
+
+
+private:
+  static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM,
+                                       TargetMachine &tm);
+  void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked);
+  void updateFunctionStub(Function *F);
+  void jitTheFunction(Function *F, const MutexGuard &locked);
+
+protected:
+
+  /// getMemoryforGV - Allocate memory for a global variable.
+  virtual char* getMemoryForGV(const GlobalVariable* GV);
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/final/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
new file mode 100644
index 00000000000..3b5acb7ecc4
--- /dev/null
+++ b/final/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
@@ -0,0 +1,212 @@
+//===-- JITDebugRegisterer.cpp - Register debug symbols for JIT -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDebugRegisterer object that is used by the JIT to
+// register debug info with debuggers like GDB.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JITDebugRegisterer.h"
+#include "../../CodeGen/ELF.h"
+#include "../../CodeGen/ELFWriter.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+// This must be kept in sync with gdb/gdb/jit.h .
+extern "C" {
+
+  // Debuggers puts a breakpoint in this function.
+  LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() { }
+
+  // We put information about the JITed function in this global, which the
+  // debugger reads.  Make sure to specify the version statically, because the
+  // debugger checks the version before we can set it during runtime.
+  struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
+
+}
+
+namespace {
+
+  /// JITDebugLock - Used to serialize all code registration events, since they
+  /// modify global variables.
+  sys::Mutex JITDebugLock;
+
+}
+
+JITDebugRegisterer::JITDebugRegisterer(TargetMachine &tm) : TM(tm), FnMap() { }
+
+JITDebugRegisterer::~JITDebugRegisterer() {
+  // Free all ELF memory.
+  for (RegisteredFunctionsMap::iterator I = FnMap.begin(), E = FnMap.end();
+       I != E; ++I) {
+    // Call the private method that doesn't update the map so our iterator
+    // doesn't break.
+    UnregisterFunctionInternal(I);
+  }
+  FnMap.clear();
+}
+
+std::string JITDebugRegisterer::MakeELF(const Function *F, DebugInfo &I) {
+  // Stack allocate an empty module with an empty LLVMContext for the ELFWriter
+  // API.  We don't use the real module because then the ELFWriter would write
+  // out unnecessary GlobalValues during finalization.
+  LLVMContext Context;
+  Module M("", Context);
+
+  // Make a buffer for the ELF in memory.
+  std::string Buffer;
+  raw_string_ostream O(Buffer);
+  ELFWriter EW(O, TM);
+  EW.doInitialization(M);
+
+  // Copy the binary into the .text section.  This isn't necessary, but it's
+  // useful to be able to disassemble the ELF by hand.
+  ELFSection &Text = EW.getTextSection(const_cast<Function *>(F));
+  Text.Addr = (uint64_t)I.FnStart;
+  // TODO: We could eliminate this copy if we somehow used a pointer/size pair
+  // instead of a vector.
+  Text.getData().assign(I.FnStart, I.FnEnd);
+
+  // Copy the exception handling call frame information into the .eh_frame
+  // section.  This allows GDB to get a good stack trace, particularly on
+  // linux x86_64.  Mark this as a PROGBITS section that needs to be loaded
+  // into memory at runtime.
+  ELFSection &EH = EW.getSection(".eh_frame", ELF::SHT_PROGBITS,
+                                 ELF::SHF_ALLOC);
+  // Pointers in the DWARF EH info are all relative to the EH frame start,
+  // which is stored here.
+  EH.Addr = (uint64_t)I.EhStart;
+  // TODO: We could eliminate this copy if we somehow used a pointer/size pair
+  // instead of a vector.
+  EH.getData().assign(I.EhStart, I.EhEnd);
+
+  // Add this single function to the symbol table, so the debugger prints the
+  // name instead of '???'.  We give the symbol default global visibility.
+  ELFSym *FnSym = ELFSym::getGV(F,
+                                ELF::STB_GLOBAL,
+                                ELF::STT_FUNC,
+                                ELF::STV_DEFAULT);
+  FnSym->SectionIdx = Text.SectionIdx;
+  FnSym->Size = I.FnEnd - I.FnStart;
+  FnSym->Value = 0;  // Offset from start of section.
+  EW.SymbolList.push_back(FnSym);
+
+  EW.doFinalization(M);
+  O.flush();
+
+  // When trying to debug why GDB isn't getting the debug info right, it's
+  // awfully helpful to write the object file to disk so that it can be
+  // inspected with readelf and objdump.
+  if (JITEmitDebugInfoToDisk) {
+    std::string Filename;
+    raw_string_ostream O2(Filename);
+    O2 << "/tmp/llvm_function_" << I.FnStart << "_" << F->getNameStr() << ".o";
+    O2.flush();
+    std::string Errors;
+    raw_fd_ostream O3(Filename.c_str(), Errors);
+    O3 << Buffer;
+    O3.close();
+  }
+
+  return Buffer;
+}
+
+void JITDebugRegisterer::RegisterFunction(const Function *F, DebugInfo &I) {
+  // TODO: Support non-ELF platforms.
+  if (!TM.getELFWriterInfo())
+    return;
+
+  std::string Buffer = MakeELF(F, I);
+
+  jit_code_entry *JITCodeEntry = new jit_code_entry();
+  JITCodeEntry->symfile_addr = Buffer.c_str();
+  JITCodeEntry->symfile_size = Buffer.size();
+
+  // Add a mapping from F to the entry and buffer, so we can delete this
+  // info later.
+  FnMap[F] = std::make_pair<std::string, jit_code_entry*>(Buffer, JITCodeEntry);
+
+  // Acquire the lock and do the registration.
+  {
+    MutexGuard locked(JITDebugLock);
+    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
+
+    // Insert this entry at the head of the list.
+    JITCodeEntry->prev_entry = NULL;
+    jit_code_entry *NextEntry = __jit_debug_descriptor.first_entry;
+    JITCodeEntry->next_entry = NextEntry;
+    if (NextEntry != NULL) {
+      NextEntry->prev_entry = JITCodeEntry;
+    }
+    __jit_debug_descriptor.first_entry = JITCodeEntry;
+    __jit_debug_descriptor.relevant_entry = JITCodeEntry;
+    __jit_debug_register_code();
+  }
+}
+
+void JITDebugRegisterer::UnregisterFunctionInternal(
+    RegisteredFunctionsMap::iterator I) {
+  jit_code_entry *&JITCodeEntry = I->second.second;
+
+  // Acquire the lock and do the unregistration.
+  {
+    MutexGuard locked(JITDebugLock);
+    __jit_debug_descriptor.action_flag = JIT_UNREGISTER_FN;
+
+    // Remove the jit_code_entry from the linked list.
+    jit_code_entry *PrevEntry = JITCodeEntry->prev_entry;
+    jit_code_entry *NextEntry = JITCodeEntry->next_entry;
+    if (NextEntry) {
+      NextEntry->prev_entry = PrevEntry;
+    }
+    if (PrevEntry) {
+      PrevEntry->next_entry = NextEntry;
+    } else {
+      assert(__jit_debug_descriptor.first_entry == JITCodeEntry);
+      __jit_debug_descriptor.first_entry = NextEntry;
+    }
+
+    // Tell GDB which entry we removed, and unregister the code.
+    __jit_debug_descriptor.relevant_entry = JITCodeEntry;
+    __jit_debug_register_code();
+  }
+
+  delete JITCodeEntry;
+  JITCodeEntry = NULL;
+
+  // Free the ELF file in memory.
+  std::string &Buffer = I->second.first;
+  Buffer.clear();
+}
+
+void JITDebugRegisterer::UnregisterFunction(const Function *F) {
+  // TODO: Support non-ELF platforms.
+  if (!TM.getELFWriterInfo())
+    return;
+
+  RegisteredFunctionsMap::iterator I = FnMap.find(F);
+  if (I == FnMap.end()) return;
+  UnregisterFunctionInternal(I);
+  FnMap.erase(I);
+}
+
+} // end namespace llvm
diff --git a/final/lib/ExecutionEngine/JIT/JITDebugRegisterer.h b/final/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
new file mode 100644
index 00000000000..dce506bbfef
--- /dev/null
+++ b/final/lib/ExecutionEngine/JIT/JITDebugRegisterer.h
@@ -0,0 +1,116 @@
+//===-- JITDebugRegisterer.h - Register debug symbols for JIT -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDebugRegisterer object that is used by the JIT to
+// register debug info with debuggers like GDB.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
+#define LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/DataTypes.h"
+#include <string>
+
+// This must be kept in sync with gdb/gdb/jit.h .
+extern "C" {
+
+  typedef enum {
+    JIT_NOACTION = 0,
+    JIT_REGISTER_FN,
+    JIT_UNREGISTER_FN
+  } jit_actions_t;
+
+  struct jit_code_entry {
+    struct jit_code_entry *next_entry;
+    struct jit_code_entry *prev_entry;
+    const char *symfile_addr;
+    uint64_t symfile_size;
+  };
+
+  struct jit_descriptor {
+    uint32_t version;
+    // This should be jit_actions_t, but we want to be specific about the
+    // bit-width.
+    uint32_t action_flag;
+    struct jit_code_entry *relevant_entry;
+    struct jit_code_entry *first_entry;
+  };
+
+}
+
+namespace llvm {
+
+class ELFSection;
+class Function;
+class TargetMachine;
+
+
+/// This class encapsulates information we want to send to the debugger.
+///
+struct DebugInfo {
+  uint8_t *FnStart;
+  uint8_t *FnEnd;
+  uint8_t *EhStart;
+  uint8_t *EhEnd;
+
+  DebugInfo() : FnStart(0), FnEnd(0), EhStart(0), EhEnd(0) {}
+};
+
+typedef DenseMap< const Function*, std::pair<std::string, jit_code_entry*> >
+  RegisteredFunctionsMap;
+
+/// This class registers debug info for JITed code with an attached debugger.
+/// Without proper debug info, GDB can't do things like source level debugging
+/// or even produce a proper stack trace on linux-x86_64.  To use this class,
+/// whenever a function is JITed, create a DebugInfo struct and pass it to the
+/// RegisterFunction method.  The method will then do whatever is necessary to
+/// inform the debugger about the JITed function.
+class JITDebugRegisterer {
+
+  TargetMachine &TM;
+
+  /// FnMap - A map of functions that have been registered to the associated
+  /// temporary files.  Used for cleanup.
+  RegisteredFunctionsMap FnMap;
+
+  /// MakeELF - Builds the ELF file in memory and returns a std::string that
+  /// contains the ELF.
+  std::string MakeELF(const Function *F, DebugInfo &I);
+
+public:
+  JITDebugRegisterer(TargetMachine &tm);
+
+  /// ~JITDebugRegisterer - Unregisters all code and frees symbol files.
+  ///
+  ~JITDebugRegisterer();
+
+  /// RegisterFunction - Register debug info for the given function with an
+  /// attached debugger.  Clients must call UnregisterFunction on all
+  /// registered functions before deleting them to free the associated symbol
+  /// file and unregister it from the debugger.
+  void RegisterFunction(const Function *F, DebugInfo &I);
+
+  /// UnregisterFunction - Unregister the debug info for the given function
+  /// from the debugger and free associated memory.
+  void UnregisterFunction(const Function *F);
+
+private:
+  /// UnregisterFunctionInternal - Unregister the debug info for the given
+  /// function from the debugger and delete any temporary files.  The private
+  /// version of this method does not remove the function from FnMap so that it
+  /// can be called while iterating over FnMap.
+  void UnregisterFunctionInternal(RegisteredFunctionsMap::iterator I);
+
+};
+
+} // end namespace llvm
+
+#endif // LLVM_EXECUTION_ENGINE_JIT_DEBUGREGISTERER_H
diff --git a/final/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/final/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
new file mode 100644
index 00000000000..f54cccadea6
--- /dev/null
+++ b/final/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -0,0 +1,598 @@
+//===----- JITDwarfEmitter.cpp - Write dwarf tables into memory -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDwarfEmitter object that is used by the JIT to
+// write dwarf tables to memory.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "JITDwarfEmitter.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+JITDwarfEmitter::JITDwarfEmitter(JIT& theJit) : MMI(0), Jit(theJit) {}
+
+
+unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, 
+                                               JITCodeEmitter& jce,
+                                               unsigned char* StartFunction,
+                                               unsigned char* EndFunction,
+                                               unsigned char* &EHFramePtr) {
+  assert(MMI && "MachineModuleInfo not registered!");
+
+  const TargetMachine& TM = F.getTarget();
+  TD = TM.getTargetData();
+  stackGrowthDirection = TM.getFrameLowering()->getStackGrowthDirection();
+  RI = TM.getRegisterInfo();
+  TFI = TM.getFrameLowering();
+  JCE = &jce;
+  
+  unsigned char* ExceptionTable = EmitExceptionTable(&F, StartFunction,
+                                                     EndFunction);
+      
+  unsigned char* Result = 0;
+
+  const std::vector<const Function *> Personalities = MMI->getPersonalities();
+  EHFramePtr = EmitCommonEHFrame(Personalities[MMI->getPersonalityIndex()]);
+
+  Result = EmitEHFrame(Personalities[MMI->getPersonalityIndex()], EHFramePtr,
+                       StartFunction, EndFunction, ExceptionTable);
+
+  return Result;
+}
+
+
+void 
+JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
+                                const std::vector<MachineMove> &Moves) const {
+  unsigned PointerSize = TD->getPointerSize();
+  int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
+          PointerSize : -PointerSize;
+  MCSymbol *BaseLabel = 0;
+
+  for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+    const MachineMove &Move = Moves[i];
+    MCSymbol *Label = Move.getLabel();
+    
+    // Throw out move if the label is invalid.
+    if (Label && (*JCE->getLabelLocations())[Label] == 0)
+      continue;
+    
+    intptr_t LabelPtr = 0;
+    if (Label) LabelPtr = JCE->getLabelAddress(Label);
+
+    const MachineLocation &Dst = Move.getDestination();
+    const MachineLocation &Src = Move.getSource();
+    
+    // Advance row if new location.
+    if (BaseLabelPtr && Label && BaseLabel != Label) {
+      JCE->emitByte(dwarf::DW_CFA_advance_loc4);
+      JCE->emitInt32(LabelPtr - BaseLabelPtr);
+      
+      BaseLabel = Label; 
+      BaseLabelPtr = LabelPtr;
+    }
+    
+    // If advancing cfa.
+    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+      if (!Src.isReg()) {
+        if (Src.getReg() == MachineLocation::VirtualFP) {
+          JCE->emitByte(dwarf::DW_CFA_def_cfa_offset);
+        } else {
+          JCE->emitByte(dwarf::DW_CFA_def_cfa);
+          JCE->emitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), true));
+        }
+        
+        JCE->emitULEB128Bytes(-Src.getOffset());
+      } else {
+        llvm_unreachable("Machine move not supported yet.");
+      }
+    } else if (Src.isReg() &&
+      Src.getReg() == MachineLocation::VirtualFP) {
+      if (Dst.isReg()) {
+        JCE->emitByte(dwarf::DW_CFA_def_cfa_register);
+        JCE->emitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), true));
+      } else {
+        llvm_unreachable("Machine move not supported yet.");
+      }
+    } else {
+      unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true);
+      int Offset = Dst.getOffset() / stackGrowth;
+      
+      if (Offset < 0) {
+        JCE->emitByte(dwarf::DW_CFA_offset_extended_sf);
+        JCE->emitULEB128Bytes(Reg);
+        JCE->emitSLEB128Bytes(Offset);
+      } else if (Reg < 64) {
+        JCE->emitByte(dwarf::DW_CFA_offset + Reg);
+        JCE->emitULEB128Bytes(Offset);
+      } else {
+        JCE->emitByte(dwarf::DW_CFA_offset_extended);
+        JCE->emitULEB128Bytes(Reg);
+        JCE->emitULEB128Bytes(Offset);
+      }
+    }
+  }
+}
+
+/// SharedTypeIds - How many leading type ids two landing pads have in common.
+static unsigned SharedTypeIds(const LandingPadInfo *L,
+                              const LandingPadInfo *R) {
+  const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+  unsigned LSize = LIds.size(), RSize = RIds.size();
+  unsigned MinSize = LSize < RSize ? LSize : RSize;
+  unsigned Count = 0;
+
+  for (; Count != MinSize; ++Count)
+    if (LIds[Count] != RIds[Count])
+      return Count;
+
+  return Count;
+}
+
+
+/// PadLT - Order landing pads lexicographically by type id.
+static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
+  const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
+  unsigned LSize = LIds.size(), RSize = RIds.size();
+  unsigned MinSize = LSize < RSize ? LSize : RSize;
+
+  for (unsigned i = 0; i != MinSize; ++i)
+    if (LIds[i] != RIds[i])
+      return LIds[i] < RIds[i];
+
+  return LSize < RSize;
+}
+
+namespace {
+
+/// ActionEntry - Structure describing an entry in the actions table.
+struct ActionEntry {
+  int ValueForTypeID; // The value to write - may not be equal to the type id.
+  int NextAction;
+  struct ActionEntry *Previous;
+};
+
+/// PadRange - Structure holding a try-range and the associated landing pad.
+struct PadRange {
+  // The index of the landing pad.
+  unsigned PadIndex;
+  // The index of the begin and end labels in the landing pad's label lists.
+  unsigned RangeIndex;
+};
+
+typedef DenseMap<MCSymbol*, PadRange> RangeMapType;
+
+/// CallSiteEntry - Structure describing an entry in the call-site table.
+struct CallSiteEntry {
+  MCSymbol *BeginLabel; // zero indicates the start of the function.
+  MCSymbol *EndLabel;   // zero indicates the end of the function.
+  MCSymbol *PadLabel;   // zero indicates that there is no landing pad.
+  unsigned Action;
+};
+
+}
+
+unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
+                                         unsigned char* StartFunction,
+                                         unsigned char* EndFunction) const {
+  assert(MMI && "MachineModuleInfo not registered!");
+
+  // Map all labels and get rid of any dead landing pads.
+  MMI->TidyLandingPads(JCE->getLabelLocations());
+
+  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
+  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+  if (PadInfos.empty()) return 0;
+
+  // Sort the landing pads in order of their type ids.  This is used to fold
+  // duplicate actions.
+  SmallVector<const LandingPadInfo *, 64> LandingPads;
+  LandingPads.reserve(PadInfos.size());
+  for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
+    LandingPads.push_back(&PadInfos[i]);
+  std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
+
+  // Negative type ids index into FilterIds, positive type ids index into
+  // TypeInfos.  The value written for a positive type id is just the type
+  // id itself.  For a negative type id, however, the value written is the
+  // (negative) byte offset of the corresponding FilterIds entry.  The byte
+  // offset is usually equal to the type id, because the FilterIds entries
+  // are written using a variable width encoding which outputs one byte per
+  // entry as long as the value written is not too large, but can differ.
+  // This kind of complication does not occur for positive type ids because
+  // type infos are output using a fixed width encoding.
+  // FilterOffsets[i] holds the byte offset corresponding to FilterIds[i].
+  SmallVector<int, 16> FilterOffsets;
+  FilterOffsets.reserve(FilterIds.size());
+  int Offset = -1;
+  for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
+    E = FilterIds.end(); I != E; ++I) {
+    FilterOffsets.push_back(Offset);
+    Offset -= MCAsmInfo::getULEB128Size(*I);
+  }
+
+  // Compute the actions table and gather the first action index for each
+  // landing pad site.
+  SmallVector<ActionEntry, 32> Actions;
+  SmallVector<unsigned, 64> FirstActions;
+  FirstActions.reserve(LandingPads.size());
+
+  int FirstAction = 0;
+  unsigned SizeActions = 0;
+  for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+    const LandingPadInfo *LP = LandingPads[i];
+    const std::vector<int> &TypeIds = LP->TypeIds;
+    const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0;
+    unsigned SizeSiteActions = 0;
+
+    if (NumShared < TypeIds.size()) {
+      unsigned SizeAction = 0;
+      ActionEntry *PrevAction = 0;
+
+      if (NumShared) {
+        const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
+        assert(Actions.size());
+        PrevAction = &Actions.back();
+        SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) +
+          MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+        for (unsigned j = NumShared; j != SizePrevIds; ++j) {
+          SizeAction -= MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
+          SizeAction += -PrevAction->NextAction;
+          PrevAction = PrevAction->Previous;
+        }
+      }
+
+      // Compute the actions.
+      for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) {
+        int TypeID = TypeIds[I];
+        assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
+        int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
+        unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
+
+        int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
+        SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
+        SizeSiteActions += SizeAction;
+
+        ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
+        Actions.push_back(Action);
+
+        PrevAction = &Actions.back();
+      }
+
+      // Record the first action of the landing pad site.
+      FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
+    } // else identical - re-use previous FirstAction
+
+    FirstActions.push_back(FirstAction);
+
+    // Compute this sites contribution to size.
+    SizeActions += SizeSiteActions;
+  }
+
+  // Compute the call-site table.  Entries must be ordered by address.
+  SmallVector<CallSiteEntry, 64> CallSites;
+
+  RangeMapType PadMap;
+  for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
+    const LandingPadInfo *LandingPad = LandingPads[i];
+    for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
+      MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
+      assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
+      PadRange P = { i, j };
+      PadMap[BeginLabel] = P;
+    }
+  }
+
+  bool MayThrow = false;
+  MCSymbol *LastLabel = 0;
+  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+        I != E; ++I) {
+    for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
+          MI != E; ++MI) {
+      if (!MI->isLabel()) {
+        MayThrow |= MI->getDesc().isCall();
+        continue;
+      }
+
+      MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
+      assert(BeginLabel && "Invalid label!");
+
+      if (BeginLabel == LastLabel)
+        MayThrow = false;
+
+      RangeMapType::iterator L = PadMap.find(BeginLabel);
+
+      if (L == PadMap.end())
+        continue;
+
+      PadRange P = L->second;
+      const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
+
+      assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
+              "Inconsistent landing pad map!");
+
+      // If some instruction between the previous try-range and this one may
+      // throw, create a call-site entry with no landing pad for the region
+      // between the try-ranges.
+      if (MayThrow) {
+        CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0};
+        CallSites.push_back(Site);
+      }
+
+      LastLabel = LandingPad->EndLabels[P.RangeIndex];
+      CallSiteEntry Site = {BeginLabel, LastLabel,
+        LandingPad->LandingPadLabel, FirstActions[P.PadIndex]};
+
+      assert(Site.BeginLabel && Site.EndLabel && Site.PadLabel &&
+              "Invalid landing pad!");
+
+      // Try to merge with the previous call-site.
+      if (CallSites.size()) {
+        CallSiteEntry &Prev = CallSites.back();
+        if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
+          // Extend the range of the previous entry.
+          Prev.EndLabel = Site.EndLabel;
+          continue;
+        }
+      }
+
+      // Otherwise, create a new call-site.
+      CallSites.push_back(Site);
+    }
+  }
+  // If some instruction between the previous try-range and the end of the
+  // function may throw, create a call-site entry with no landing pad for the
+  // region following the try-range.
+  if (MayThrow) {
+    CallSiteEntry Site = {LastLabel, 0, 0, 0};
+    CallSites.push_back(Site);
+  }
+
+  // Final tallies.
+  unsigned SizeSites = CallSites.size() * (sizeof(int32_t) + // Site start.
+                                            sizeof(int32_t) + // Site length.
+                                            sizeof(int32_t)); // Landing pad.
+  for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
+    SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+
+  unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize();
+
+  unsigned TypeOffset = sizeof(int8_t) + // Call site format
+                        // Call-site table length
+                        MCAsmInfo::getULEB128Size(SizeSites) + 
+                        SizeSites + SizeActions + SizeTypes;
+
+  // Begin the exception table.
+  JCE->emitAlignmentWithFill(4, 0);
+  // Asm->EOL("Padding");
+
+  unsigned char* DwarfExceptionTable = (unsigned char*)JCE->getCurrentPCValue();
+
+  // Emit the header.
+  JCE->emitByte(dwarf::DW_EH_PE_omit);
+  // Asm->EOL("LPStart format (DW_EH_PE_omit)");
+  JCE->emitByte(dwarf::DW_EH_PE_absptr);
+  // Asm->EOL("TType format (DW_EH_PE_absptr)");
+  JCE->emitULEB128Bytes(TypeOffset);
+  // Asm->EOL("TType base offset");
+  JCE->emitByte(dwarf::DW_EH_PE_udata4);
+  // Asm->EOL("Call site format (DW_EH_PE_udata4)");
+  JCE->emitULEB128Bytes(SizeSites);
+  // Asm->EOL("Call-site table length");
+
+  // Emit the landing pad site information.
+  for (unsigned i = 0; i < CallSites.size(); ++i) {
+    CallSiteEntry &S = CallSites[i];
+    intptr_t BeginLabelPtr = 0;
+    intptr_t EndLabelPtr = 0;
+
+    if (!S.BeginLabel) {
+      BeginLabelPtr = (intptr_t)StartFunction;
+      JCE->emitInt32(0);
+    } else {
+      BeginLabelPtr = JCE->getLabelAddress(S.BeginLabel);
+      JCE->emitInt32(BeginLabelPtr - (intptr_t)StartFunction);
+    }
+
+    // Asm->EOL("Region start");
+
+    if (!S.EndLabel)
+      EndLabelPtr = (intptr_t)EndFunction;
+    else
+      EndLabelPtr = JCE->getLabelAddress(S.EndLabel);
+
+    JCE->emitInt32(EndLabelPtr - BeginLabelPtr);
+    //Asm->EOL("Region length");
+
+    if (!S.PadLabel) {
+      JCE->emitInt32(0);
+    } else {
+      unsigned PadLabelPtr = JCE->getLabelAddress(S.PadLabel);
+      JCE->emitInt32(PadLabelPtr - (intptr_t)StartFunction);
+    }
+    // Asm->EOL("Landing pad");
+
+    JCE->emitULEB128Bytes(S.Action);
+    // Asm->EOL("Action");
+  }
+
+  // Emit the actions.
+  for (unsigned I = 0, N = Actions.size(); I != N; ++I) {
+    ActionEntry &Action = Actions[I];
+
+    JCE->emitSLEB128Bytes(Action.ValueForTypeID);
+    //Asm->EOL("TypeInfo index");
+    JCE->emitSLEB128Bytes(Action.NextAction);
+    //Asm->EOL("Next action");
+  }
+
+  // Emit the type ids.
+  for (unsigned M = TypeInfos.size(); M; --M) {
+    const GlobalVariable *GV = TypeInfos[M - 1];
+    
+    if (GV) {
+      if (TD->getPointerSize() == sizeof(int32_t))
+        JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV));
+      else
+        JCE->emitInt64((intptr_t)Jit.getOrEmitGlobalVariable(GV));
+    } else {
+      if (TD->getPointerSize() == sizeof(int32_t))
+        JCE->emitInt32(0);
+      else
+        JCE->emitInt64(0);
+    }
+    // Asm->EOL("TypeInfo");
+  }
+
+  // Emit the filter typeids.
+  for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) {
+    unsigned TypeID = FilterIds[j];
+    JCE->emitULEB128Bytes(TypeID);
+    //Asm->EOL("Filter TypeInfo index");
+  }
+
+  JCE->emitAlignmentWithFill(4, 0);
+
+  return DwarfExceptionTable;
+}
+
+unsigned char*
+JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
+  unsigned PointerSize = TD->getPointerSize();
+  int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
+          PointerSize : -PointerSize;
+  
+  unsigned char* StartCommonPtr = (unsigned char*)JCE->getCurrentPCValue();
+  // EH Common Frame header
+  JCE->allocateSpace(4, 0);
+  unsigned char* FrameCommonBeginPtr = (unsigned char*)JCE->getCurrentPCValue();
+  JCE->emitInt32((int)0);
+  JCE->emitByte(dwarf::DW_CIE_VERSION);
+  JCE->emitString(Personality ? "zPLR" : "zR");
+  JCE->emitULEB128Bytes(1);
+  JCE->emitSLEB128Bytes(stackGrowth);
+  JCE->emitByte(RI->getDwarfRegNum(RI->getRARegister(), true));
+
+  if (Personality) {
+    // Augmentation Size: 3 small ULEBs of one byte each, and the personality
+    // function which size is PointerSize.
+    JCE->emitULEB128Bytes(3 + PointerSize); 
+    
+    // We set the encoding of the personality as direct encoding because we use
+    // the function pointer. The encoding is not relative because the current
+    // PC value may be bigger than the personality function pointer.
+    if (PointerSize == 4) {
+      JCE->emitByte(dwarf::DW_EH_PE_sdata4); 
+      JCE->emitInt32(((intptr_t)Jit.getPointerToGlobal(Personality)));
+    } else {
+      JCE->emitByte(dwarf::DW_EH_PE_sdata8);
+      JCE->emitInt64(((intptr_t)Jit.getPointerToGlobal(Personality)));
+    }
+
+    // LSDA encoding: This must match the encoding used in EmitEHFrame ()
+    if (PointerSize == 4)
+      JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+    else
+      JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8);
+    JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+  } else {
+    JCE->emitULEB128Bytes(1);
+    JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
+  }
+
+  std::vector<MachineMove> Moves;
+  TFI->getInitialFrameState(Moves);
+  EmitFrameMoves(0, Moves);
+
+  JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop);
+
+  JCE->emitInt32At((uintptr_t*)StartCommonPtr,
+                   (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() -
+                               FrameCommonBeginPtr));
+
+  return StartCommonPtr;
+}
+
+
+unsigned char*
+JITDwarfEmitter::EmitEHFrame(const Function* Personality,
+                             unsigned char* StartCommonPtr,
+                             unsigned char* StartFunction, 
+                             unsigned char* EndFunction,
+                             unsigned char* ExceptionTable) const {
+  unsigned PointerSize = TD->getPointerSize();
+  
+  // EH frame header.
+  unsigned char* StartEHPtr = (unsigned char*)JCE->getCurrentPCValue();
+  JCE->allocateSpace(4, 0);
+  unsigned char* FrameBeginPtr = (unsigned char*)JCE->getCurrentPCValue();
+  // FDE CIE Offset
+  JCE->emitInt32(FrameBeginPtr - StartCommonPtr);
+  JCE->emitInt32(StartFunction - (unsigned char*)JCE->getCurrentPCValue());
+  JCE->emitInt32(EndFunction - StartFunction);
+
+  // If there is a personality and landing pads then point to the language
+  // specific data area in the exception table.
+  if (Personality) {
+    JCE->emitULEB128Bytes(PointerSize == 4 ? 4 : 8);
+        
+    if (PointerSize == 4) {
+      if (!MMI->getLandingPads().empty())
+        JCE->emitInt32(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue());
+      else
+        JCE->emitInt32((int)0);
+    } else {
+      if (!MMI->getLandingPads().empty())
+        JCE->emitInt64(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue());
+      else
+        JCE->emitInt64((int)0);
+    }
+  } else {
+    JCE->emitULEB128Bytes(0);
+  }
+      
+  // Indicate locations of function specific  callee saved registers in
+  // frame.
+  EmitFrameMoves((intptr_t)StartFunction, MMI->getFrameMoves());
+
+  JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop);
+
+  // Indicate the size of the table
+  JCE->emitInt32At((uintptr_t*)StartEHPtr,
+                   (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() -
+                               StartEHPtr));
+
+  // Double zeroes for the unwind runtime
+  if (PointerSize == 8) {
+    JCE->emitInt64(0);
+    JCE->emitInt64(0);
+  } else {
+    JCE->emitInt32(0);
+    JCE->emitInt32(0);
+  }
+  
+  return StartEHPtr;
+}
diff --git a/final/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/final/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
new file mode 100644
index 00000000000..9495697a1aa
--- /dev/null
+++ b/final/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
@@ -0,0 +1,73 @@
+//===------ JITDwarfEmitter.h - Write dwarf tables into memory ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITDwarfEmitter object that is used by the JIT to
+// write dwarf tables to memory.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
+#define LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
+
+namespace llvm {
+
+class Function;
+class JITCodeEmitter;
+class MachineFunction;
+class MachineModuleInfo;
+class MachineMove;
+class TargetData;
+class TargetFrameLowering;
+class TargetMachine;
+class TargetRegisterInfo;
+
+class JITDwarfEmitter {
+  const TargetData* TD;
+  JITCodeEmitter* JCE;
+  const TargetRegisterInfo* RI;
+  const TargetFrameLowering *TFI;
+  MachineModuleInfo* MMI;
+  JIT& Jit;
+  bool stackGrowthDirection;
+  
+  unsigned char* EmitExceptionTable(MachineFunction* MF,
+                                    unsigned char* StartFunction, 
+                                    unsigned char* EndFunction) const;
+
+  void EmitFrameMoves(intptr_t BaseLabelPtr, 
+                      const std::vector<MachineMove> &Moves) const;
+    
+  unsigned char* EmitCommonEHFrame(const Function* Personality) const;
+
+  unsigned char* EmitEHFrame(const Function* Personality, 
+                             unsigned char* StartBufferPtr,
+                             unsigned char* StartFunction, 
+                             unsigned char* EndFunction,
+                             unsigned char* ExceptionTable) const;
+    
+public:
+  
+  JITDwarfEmitter(JIT& jit);
+  
+  unsigned char* EmitDwarfTable(MachineFunction& F, 
+                                JITCodeEmitter& JCE,
+                                unsigned char* StartFunction,
+                                unsigned char* EndFunction,
+                                unsigned char* &EHFramePtr);
+  
+  
+  void setModuleInfo(MachineModuleInfo* Info) {
+    MMI = Info;
+  }
+};
+
+
+} // end namespace llvm
+
+#endif // LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
diff --git a/final/lib/ExecutionEngine/JIT/JITEmitter.cpp b/final/lib/ExecutionEngine/JIT/JITEmitter.cpp
new file mode 100644
index 00000000000..fa3c5a01c1e
--- /dev/null
+++ b/final/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -0,0 +1,1305 @@
+//===-- JITEmitter.cpp - Write machine code to executable memory ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a MachineCodeEmitter object that is used by the JIT to
+// write machine code to memory and remember where relocatable values are.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "JIT.h"
+#include "JITDebugRegisterer.h"
+#include "JITDwarfEmitter.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRelocation.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MutexGuard.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Disassembler.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/ValueMap.h"
+#include <algorithm>
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+using namespace llvm;
+
+STATISTIC(NumBytes, "Number of bytes of machine code compiled");
+STATISTIC(NumRelos, "Number of relocations applied");
+STATISTIC(NumRetries, "Number of retries with more memory");
+
+
+// A declaration may stop being a declaration once it's fully read from bitcode.
+// This function returns true if F is fully read and is still a declaration.
+static bool isNonGhostDeclaration(const Function *F) {
+  return F->isDeclaration() && !F->isMaterializable();
+}
+
+//===----------------------------------------------------------------------===//
+// JIT lazy compilation code.
+//
+namespace {
+  class JITEmitter;
+  class JITResolverState;
+
+  template<typename ValueTy>
+  struct NoRAUWValueMapConfig : public ValueMapConfig<ValueTy> {
+    typedef JITResolverState *ExtraData;
+    static void onRAUW(JITResolverState *, Value *Old, Value *New) {
+      assert(false && "The JIT doesn't know how to handle a"
+             " RAUW on a value it has emitted.");
+    }
+  };
+
+  struct CallSiteValueMapConfig : public NoRAUWValueMapConfig<Function*> {
+    typedef JITResolverState *ExtraData;
+    static void onDelete(JITResolverState *JRS, Function *F);
+  };
+
+  class JITResolverState {
+  public:
+    typedef ValueMap<Function*, void*, NoRAUWValueMapConfig<Function*> >
+      FunctionToLazyStubMapTy;
+    typedef std::map<void*, AssertingVH<Function> > CallSiteToFunctionMapTy;
+    typedef ValueMap<Function *, SmallPtrSet<void*, 1>,
+                     CallSiteValueMapConfig> FunctionToCallSitesMapTy;
+    typedef std::map<AssertingVH<GlobalValue>, void*> GlobalToIndirectSymMapTy;
+  private:
+    /// FunctionToLazyStubMap - Keep track of the lazy stub created for a
+    /// particular function so that we can reuse them if necessary.
+    FunctionToLazyStubMapTy FunctionToLazyStubMap;
+
+    /// CallSiteToFunctionMap - Keep track of the function that each lazy call
+    /// site corresponds to, and vice versa.
+    CallSiteToFunctionMapTy CallSiteToFunctionMap;
+    FunctionToCallSitesMapTy FunctionToCallSitesMap;
+
+    /// GlobalToIndirectSymMap - Keep track of the indirect symbol created for a
+    /// particular GlobalVariable so that we can reuse them if necessary.
+    GlobalToIndirectSymMapTy GlobalToIndirectSymMap;
+
+    /// Instance of the JIT this ResolverState serves.
+    JIT *TheJIT;
+
+  public:
+    JITResolverState(JIT *jit) : FunctionToLazyStubMap(this),
+                                 FunctionToCallSitesMap(this),
+                                 TheJIT(jit) {}
+
+    FunctionToLazyStubMapTy& getFunctionToLazyStubMap(
+      const MutexGuard& locked) {
+      assert(locked.holds(TheJIT->lock));
+      return FunctionToLazyStubMap;
+    }
+
+    GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& locked) {
+      assert(locked.holds(TheJIT->lock));
+      return GlobalToIndirectSymMap;
+    }
+
+    pair<void *, Function *> LookupFunctionFromCallSite(
+        const MutexGuard &locked, void *CallSite) const {
+      assert(locked.holds(TheJIT->lock));
+
+      // The address given to us for the stub may not be exactly right, it might be
+      // a little bit after the stub.  As such, use upper_bound to find it.
+      CallSiteToFunctionMapTy::const_iterator I =
+        CallSiteToFunctionMap.upper_bound(CallSite);
+      assert(I != CallSiteToFunctionMap.begin() &&
+             "This is not a known call site!");
+      --I;
+      return *I;
+    }
+
+    void AddCallSite(const MutexGuard &locked, void *CallSite, Function *F) {
+      assert(locked.holds(TheJIT->lock));
+
+      bool Inserted = CallSiteToFunctionMap.insert(
+          std::make_pair(CallSite, F)).second;
+      (void)Inserted;
+      assert(Inserted && "Pair was already in CallSiteToFunctionMap");
+      FunctionToCallSitesMap[F].insert(CallSite);
+    }
+
+    void EraseAllCallSitesForPrelocked(Function *F);
+
+    // Erases _all_ call sites regardless of their function.  This is used to
+    // unregister the stub addresses from the StubToResolverMap in
+    // ~JITResolver().
+    void EraseAllCallSitesPrelocked();
+  };
+
+  /// JITResolver - Keep track of, and resolve, call sites for functions that
+  /// have not yet been compiled.
+  class JITResolver {
+    typedef JITResolverState::FunctionToLazyStubMapTy FunctionToLazyStubMapTy;
+    typedef JITResolverState::CallSiteToFunctionMapTy CallSiteToFunctionMapTy;
+    typedef JITResolverState::GlobalToIndirectSymMapTy GlobalToIndirectSymMapTy;
+
+    /// LazyResolverFn - The target lazy resolver function that we actually
+    /// rewrite instructions to use.
+    TargetJITInfo::LazyResolverFn LazyResolverFn;
+
+    JITResolverState state;
+
+    /// ExternalFnToStubMap - This is the equivalent of FunctionToLazyStubMap
+    /// for external functions.  TODO: Of course, external functions don't need
+    /// a lazy stub.  It's actually here to make it more likely that far calls
+    /// succeed, but no single stub can guarantee that.  I'll remove this in a
+    /// subsequent checkin when I actually fix far calls.
+    std::map<void*, void*> ExternalFnToStubMap;
+
+    /// revGOTMap - map addresses to indexes in the GOT
+    std::map<void*, unsigned> revGOTMap;
+    unsigned nextGOTIndex;
+
+    JITEmitter &JE;
+
+    /// Instance of JIT corresponding to this Resolver.
+    JIT *TheJIT;
+
+  public:
+    explicit JITResolver(JIT &jit, JITEmitter &je)
+      : state(&jit), nextGOTIndex(0), JE(je), TheJIT(&jit) {
+      LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn);
+    }
+
+    ~JITResolver();
+
+    /// getLazyFunctionStubIfAvailable - This returns a pointer to a function's
+    /// lazy-compilation stub if it has already been created.
+    void *getLazyFunctionStubIfAvailable(Function *F);
+
+    /// getLazyFunctionStub - This returns a pointer to a function's
+    /// lazy-compilation stub, creating one on demand as needed.
+    void *getLazyFunctionStub(Function *F);
+
+    /// getExternalFunctionStub - Return a stub for the function at the
+    /// specified address, created lazily on demand.
+    void *getExternalFunctionStub(void *FnAddr);
+
+    /// getGlobalValueIndirectSym - Return an indirect symbol containing the
+    /// specified GV address.
+    void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress);
+
+    /// getGOTIndexForAddress - Return a new or existing index in the GOT for
+    /// an address.  This function only manages slots, it does not manage the
+    /// contents of the slots or the memory associated with the GOT.
+    unsigned getGOTIndexForAddr(void *addr);
+
+    /// JITCompilerFn - This function is called to resolve a stub to a compiled
+    /// address.  If the LLVM Function corresponding to the stub has not yet
+    /// been compiled, this function compiles it first.
+    static void *JITCompilerFn(void *Stub);
+  };
+
+  class StubToResolverMapTy {
+    /// Map a stub address to a specific instance of a JITResolver so that
+    /// lazily-compiled functions can find the right resolver to use.
+    ///
+    /// Guarded by Lock.
+    std::map<void*, JITResolver*> Map;
+
+    /// Guards Map from concurrent accesses.
+    mutable sys::Mutex Lock;
+
+  public:
+    /// Registers a Stub to be resolved by Resolver.
+    void RegisterStubResolver(void *Stub, JITResolver *Resolver) {
+      MutexGuard guard(Lock);
+      Map.insert(std::make_pair(Stub, Resolver));
+    }
+    /// Unregisters the Stub when it's invalidated.
+    void UnregisterStubResolver(void *Stub) {
+      MutexGuard guard(Lock);
+      Map.erase(Stub);
+    }
+    /// Returns the JITResolver instance that owns the Stub.
+    JITResolver *getResolverFromStub(void *Stub) const {
+      MutexGuard guard(Lock);
+      // The address given to us for the stub may not be exactly right, it might
+      // be a little bit after the stub.  As such, use upper_bound to find it.
+      // This is the same trick as in LookupFunctionFromCallSite from
+      // JITResolverState.
+      std::map<void*, JITResolver*>::const_iterator I = Map.upper_bound(Stub);
+      assert(I != Map.begin() && "This is not a known stub!");
+      --I;
+      return I->second;
+    }
+    /// True if any stubs refer to the given resolver. Only used in an assert().
+    /// O(N)
+    bool ResolverHasStubs(JITResolver* Resolver) const {
+      MutexGuard guard(Lock);
+      for (std::map<void*, JITResolver*>::const_iterator I = Map.begin(),
+             E = Map.end(); I != E; ++I) {
+        if (I->second == Resolver)
+          return true;
+      }
+      return false;
+    }
+  };
+  /// This needs to be static so that a lazy call stub can access it with no
+  /// context except the address of the stub.
+  ManagedStatic<StubToResolverMapTy> StubToResolverMap;
+
+  /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is
+  /// used to output functions to memory for execution.
+  class JITEmitter : public JITCodeEmitter {
+    JITMemoryManager *MemMgr;
+
+    // When outputting a function stub in the context of some other function, we
+    // save BufferBegin/BufferEnd/CurBufferPtr here.
+    uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
+
+    // When reattempting to JIT a function after running out of space, we store
+    // the estimated size of the function we're trying to JIT here, so we can
+    // ask the memory manager for at least this much space.  When we
+    // successfully emit the function, we reset this back to zero.
+    uintptr_t SizeEstimate;
+
+    /// Relocations - These are the relocations that the function needs, as
+    /// emitted.
+    std::vector<MachineRelocation> Relocations;
+
+    /// MBBLocations - This vector is a mapping from MBB ID's to their address.
+    /// It is filled in by the StartMachineBasicBlock callback and queried by
+    /// the getMachineBasicBlockAddress callback.
+    std::vector<uintptr_t> MBBLocations;
+
+    /// ConstantPool - The constant pool for the current function.
+    ///
+    MachineConstantPool *ConstantPool;
+
+    /// ConstantPoolBase - A pointer to the first entry in the constant pool.
+    ///
+    void *ConstantPoolBase;
+
+    /// ConstPoolAddresses - Addresses of individual constant pool entries.
+    ///
+    SmallVector<uintptr_t, 8> ConstPoolAddresses;
+
+    /// JumpTable - The jump tables for the current function.
+    ///
+    MachineJumpTableInfo *JumpTable;
+
+    /// JumpTableBase - A pointer to the first entry in the jump table.
+    ///
+    void *JumpTableBase;
+
+    /// Resolver - This contains info about the currently resolved functions.
+    JITResolver Resolver;
+
+    /// DE - The dwarf emitter for the jit.
+    OwningPtr<JITDwarfEmitter> DE;
+
+    /// DR - The debug registerer for the jit.
+    OwningPtr<JITDebugRegisterer> DR;
+
+    /// LabelLocations - This vector is a mapping from Label ID's to their
+    /// address.
+    DenseMap<MCSymbol*, uintptr_t> LabelLocations;
+
+    /// MMI - Machine module info for exception informations
+    MachineModuleInfo* MMI;
+
+    // CurFn - The llvm function being emitted.  Only valid during
+    // finishFunction().
+    const Function *CurFn;
+
+    /// Information about emitted code, which is passed to the
+    /// JITEventListeners.  This is reset in startFunction and used in
+    /// finishFunction.
+    JITEvent_EmittedFunctionDetails EmissionDetails;
+
+    struct EmittedCode {
+      void *FunctionBody;  // Beginning of the function's allocation.
+      void *Code;  // The address the function's code actually starts at.
+      void *ExceptionTable;
+      EmittedCode() : FunctionBody(0), Code(0), ExceptionTable(0) {}
+    };
+    struct EmittedFunctionConfig : public ValueMapConfig<const Function*> {
+      typedef JITEmitter *ExtraData;
+      static void onDelete(JITEmitter *, const Function*);
+      static void onRAUW(JITEmitter *, const Function*, const Function*);
+    };
+    ValueMap<const Function *, EmittedCode,
+             EmittedFunctionConfig> EmittedFunctions;
+
+    DebugLoc PrevDL;
+
+    /// Instance of the JIT
+    JIT *TheJIT;
+
+  public:
+    JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
+      : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
+        EmittedFunctions(this), TheJIT(&jit) {
+      MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
+      if (jit.getJITInfo().needsGOT()) {
+        MemMgr->AllocateGOT();
+        DEBUG(dbgs() << "JIT is managing a GOT\n");
+      }
+
+      if (JITExceptionHandling || JITEmitDebugInfo) {
+        DE.reset(new JITDwarfEmitter(jit));
+      }
+      if (JITEmitDebugInfo) {
+        DR.reset(new JITDebugRegisterer(TM));
+      }
+    }
+    ~JITEmitter() {
+      delete MemMgr;
+    }
+
+    /// classof - Methods for support type inquiry through isa, cast, and
+    /// dyn_cast:
+    ///
+    static inline bool classof(const MachineCodeEmitter*) { return true; }
+
+    JITResolver &getJITResolver() { return Resolver; }
+
+    virtual void startFunction(MachineFunction &F);
+    virtual bool finishFunction(MachineFunction &F);
+
+    void emitConstantPool(MachineConstantPool *MCP);
+    void initJumpTableInfo(MachineJumpTableInfo *MJTI);
+    void emitJumpTableInfo(MachineJumpTableInfo *MJTI);
+
+    void startGVStub(const GlobalValue* GV,
+                     unsigned StubSize, unsigned Alignment = 1);
+    void startGVStub(void *Buffer, unsigned StubSize);
+    void finishGVStub();
+    virtual void *allocIndirectGV(const GlobalValue *GV,
+                                  const uint8_t *Buffer, size_t Size,
+                                  unsigned Alignment);
+
+    /// allocateSpace - Reserves space in the current block if any, or
+    /// allocate a new one of the given size.
+    virtual void *allocateSpace(uintptr_t Size, unsigned Alignment);
+
+    /// allocateGlobal - Allocate memory for a global.  Unlike allocateSpace,
+    /// this method does not allocate memory in the current output buffer,
+    /// because a global may live longer than the current function.
+    virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment);
+
+    virtual void addRelocation(const MachineRelocation &MR) {
+      Relocations.push_back(MR);
+    }
+
+    virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
+      if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+        MBBLocations.resize((MBB->getNumber()+1)*2);
+      MBBLocations[MBB->getNumber()] = getCurrentPCValue();
+      if (MBB->hasAddressTaken())
+        TheJIT->addPointerToBasicBlock(MBB->getBasicBlock(),
+                                       (void*)getCurrentPCValue());
+      DEBUG(dbgs() << "JIT: Emitting BB" << MBB->getNumber() << " at ["
+                   << (void*) getCurrentPCValue() << "]\n");
+    }
+
+    virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const;
+    virtual uintptr_t getJumpTableEntryAddress(unsigned Entry) const;
+
+    virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const{
+      assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
+             MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+      return MBBLocations[MBB->getNumber()];
+    }
+
+    /// retryWithMoreMemory - Log a retry and deallocate all memory for the
+    /// given function.  Increase the minimum allocation size so that we get
+    /// more memory next time.
+    void retryWithMoreMemory(MachineFunction &F);
+
+    /// deallocateMemForFunction - Deallocate all memory for the specified
+    /// function body.
+    void deallocateMemForFunction(const Function *F);
+
+    virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn);
+
+    virtual void emitLabel(MCSymbol *Label) {
+      LabelLocations[Label] = getCurrentPCValue();
+    }
+
+    virtual DenseMap<MCSymbol*, uintptr_t> *getLabelLocations() {
+      return &LabelLocations;
+    }
+
+    virtual uintptr_t getLabelAddress(MCSymbol *Label) const {
+      assert(LabelLocations.count(Label) && "Label not emitted!");
+      return LabelLocations.find(Label)->second;
+    }
+
+    virtual void setModuleInfo(MachineModuleInfo* Info) {
+      MMI = Info;
+      if (DE.get()) DE->setModuleInfo(Info);
+    }
+
+  private:
+    void *getPointerToGlobal(GlobalValue *GV, void *Reference,
+                             bool MayNeedFarStub);
+    void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference);
+  };
+}
+
+void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) {
+  JRS->EraseAllCallSitesForPrelocked(F);
+}
+
+void JITResolverState::EraseAllCallSitesForPrelocked(Function *F) {
+  FunctionToCallSitesMapTy::iterator F2C = FunctionToCallSitesMap.find(F);
+  if (F2C == FunctionToCallSitesMap.end())
+    return;
+  StubToResolverMapTy &S2RMap = *StubToResolverMap;
+  for (SmallPtrSet<void*, 1>::const_iterator I = F2C->second.begin(),
+         E = F2C->second.end(); I != E; ++I) {
+    S2RMap.UnregisterStubResolver(*I);
+    bool Erased = CallSiteToFunctionMap.erase(*I);
+    (void)Erased;
+    assert(Erased && "Missing call site->function mapping");
+  }
+  FunctionToCallSitesMap.erase(F2C);
+}
+
+void JITResolverState::EraseAllCallSitesPrelocked() {
+  StubToResolverMapTy &S2RMap = *StubToResolverMap;
+  for (CallSiteToFunctionMapTy::const_iterator
+         I = CallSiteToFunctionMap.begin(),
+         E = CallSiteToFunctionMap.end(); I != E; ++I) {
+    S2RMap.UnregisterStubResolver(I->first);
+  }
+  CallSiteToFunctionMap.clear();
+  FunctionToCallSitesMap.clear();
+}
+
+JITResolver::~JITResolver() {
+  // No need to lock because we're in the destructor, and state isn't shared.
+  state.EraseAllCallSitesPrelocked();
+  assert(!StubToResolverMap->ResolverHasStubs(this) &&
+         "Resolver destroyed with stubs still alive.");
+}
+
+/// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub
+/// if it has already been created.
+void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) {
+  MutexGuard locked(TheJIT->lock);
+
+  // If we already have a stub for this function, recycle it.
+  return state.getFunctionToLazyStubMap(locked).lookup(F);
+}
+
+/// getFunctionStub - This returns a pointer to a function stub, creating
+/// one on demand as needed.
+void *JITResolver::getLazyFunctionStub(Function *F) {
+  MutexGuard locked(TheJIT->lock);
+
+  // If we already have a lazy stub for this function, recycle it.
+  void *&Stub = state.getFunctionToLazyStubMap(locked)[F];
+  if (Stub) return Stub;
+
+  // Call the lazy resolver function if we are JIT'ing lazily.  Otherwise we
+  // must resolve the symbol now.
+  void *Actual = TheJIT->isCompilingLazily()
+    ? (void *)(intptr_t)LazyResolverFn : (void *)0;
+
+  // If this is an external declaration, attempt to resolve the address now
+  // to place in the stub.
+  if (isNonGhostDeclaration(F) || F->hasAvailableExternallyLinkage()) {
+    Actual = TheJIT->getPointerToFunction(F);
+
+    // If we resolved the symbol to a null address (eg. a weak external)
+    // don't emit a stub. Return a null pointer to the application.
+    if (!Actual) return 0;
+  }
+
+  TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout();
+  JE.startGVStub(F, SL.Size, SL.Alignment);
+  // Codegen a new stub, calling the lazy resolver or the actual address of the
+  // external function, if it was resolved.
+  Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual, JE);
+  JE.finishGVStub();
+
+  if (Actual != (void*)(intptr_t)LazyResolverFn) {
+    // If we are getting the stub for an external function, we really want the
+    // address of the stub in the GlobalAddressMap for the JIT, not the address
+    // of the external function.
+    TheJIT->updateGlobalMapping(F, Stub);
+  }
+
+  DEBUG(dbgs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '"
+        << F->getName() << "'\n");
+
+  if (TheJIT->isCompilingLazily()) {
+    // Register this JITResolver as the one corresponding to this call site so
+    // JITCompilerFn will be able to find it.
+    StubToResolverMap->RegisterStubResolver(Stub, this);
+
+    // Finally, keep track of the stub-to-Function mapping so that the
+    // JITCompilerFn knows which function to compile!
+    state.AddCallSite(locked, Stub, F);
+  } else if (!Actual) {
+    // If we are JIT'ing non-lazily but need to call a function that does not
+    // exist yet, add it to the JIT's work list so that we can fill in the
+    // stub address later.
+    assert(!isNonGhostDeclaration(F) && !F->hasAvailableExternallyLinkage() &&
+           "'Actual' should have been set above.");
+    TheJIT->addPendingFunction(F);
+  }
+
+  return Stub;
+}
+
+/// getGlobalValueIndirectSym - Return a lazy pointer containing the specified
+/// GV address.
+void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) {
+  MutexGuard locked(TheJIT->lock);
+
+  // If we already have a stub for this global variable, recycle it.
+  void *&IndirectSym = state.getGlobalToIndirectSymMap(locked)[GV];
+  if (IndirectSym) return IndirectSym;
+
+  // Otherwise, codegen a new indirect symbol.
+  IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress,
+                                                                JE);
+
+  DEBUG(dbgs() << "JIT: Indirect symbol emitted at [" << IndirectSym
+        << "] for GV '" << GV->getName() << "'\n");
+
+  return IndirectSym;
+}
+
+/// getExternalFunctionStub - Return a stub for the function at the
+/// specified address, created lazily on demand.
+void *JITResolver::getExternalFunctionStub(void *FnAddr) {
+  // If we already have a stub for this function, recycle it.
+  void *&Stub = ExternalFnToStubMap[FnAddr];
+  if (Stub) return Stub;
+
+  TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout();
+  JE.startGVStub(0, SL.Size, SL.Alignment);
+  Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, JE);
+  JE.finishGVStub();
+
+  DEBUG(dbgs() << "JIT: Stub emitted at [" << Stub
+               << "] for external function at '" << FnAddr << "'\n");
+  return Stub;
+}
+
+unsigned JITResolver::getGOTIndexForAddr(void* addr) {
+  unsigned idx = revGOTMap[addr];
+  if (!idx) {
+    idx = ++nextGOTIndex;
+    revGOTMap[addr] = idx;
+    DEBUG(dbgs() << "JIT: Adding GOT entry " << idx << " for addr ["
+                 << addr << "]\n");
+  }
+  return idx;
+}
+
+/// JITCompilerFn - This function is called when a lazy compilation stub has
+/// been entered.  It looks up which function this stub corresponds to, compiles
+/// it if necessary, then returns the resultant function pointer.
+void *JITResolver::JITCompilerFn(void *Stub) {
+  JITResolver *JR = StubToResolverMap->getResolverFromStub(Stub);
+  assert(JR && "Unable to find the corresponding JITResolver to the call site");
+
+  Function* F = 0;
+  void* ActualPtr = 0;
+
+  {
+    // Only lock for getting the Function. The call getPointerToFunction made
+    // in this function might trigger function materializing, which requires
+    // JIT lock to be unlocked.
+    MutexGuard locked(JR->TheJIT->lock);
+
+    // The address given to us for the stub may not be exactly right, it might
+    // be a little bit after the stub.  As such, use upper_bound to find it.
+    pair<void*, Function*> I =
+      JR->state.LookupFunctionFromCallSite(locked, Stub);
+    F = I.second;
+    ActualPtr = I.first;
+  }
+
+  // If we have already code generated the function, just return the address.
+  void *Result = JR->TheJIT->getPointerToGlobalIfAvailable(F);
+
+  if (!Result) {
+    // Otherwise we don't have it, do lazy compilation now.
+
+    // If lazy compilation is disabled, emit a useful error message and abort.
+    if (!JR->TheJIT->isCompilingLazily()) {
+      report_fatal_error("LLVM JIT requested to do lazy compilation of function '"
+                        + F->getName() + "' when lazy compiles are disabled!");
+    }
+
+    DEBUG(dbgs() << "JIT: Lazily resolving function '" << F->getName()
+          << "' In stub ptr = " << Stub << " actual ptr = "
+          << ActualPtr << "\n");
+
+    Result = JR->TheJIT->getPointerToFunction(F);
+  }
+
+  // Reacquire the lock to update the GOT map.
+  MutexGuard locked(JR->TheJIT->lock);
+
+  // We might like to remove the call site from the CallSiteToFunction map, but
+  // we can't do that! Multiple threads could be stuck, waiting to acquire the
+  // lock above. As soon as the 1st function finishes compiling the function,
+  // the next one will be released, and needs to be able to find the function it
+  // needs to call.
+
+  // FIXME: We could rewrite all references to this stub if we knew them.
+
+  // What we will do is set the compiled function address to map to the
+  // same GOT entry as the stub so that later clients may update the GOT
+  // if they see it still using the stub address.
+  // Note: this is done so the Resolver doesn't have to manage GOT memory
+  // Do this without allocating map space if the target isn't using a GOT
+  if(JR->revGOTMap.find(Stub) != JR->revGOTMap.end())
+    JR->revGOTMap[Result] = JR->revGOTMap[Stub];
+
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// JITEmitter code.
+//
+void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
+                                     bool MayNeedFarStub) {
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    return TheJIT->getOrEmitGlobalVariable(GV);
+
+  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+    return TheJIT->getPointerToGlobal(GA->resolveAliasedGlobal(false));
+
+  // If we have already compiled the function, return a pointer to its body.
+  Function *F = cast<Function>(V);
+
+  void *FnStub = Resolver.getLazyFunctionStubIfAvailable(F);
+  if (FnStub) {
+    // Return the function stub if it's already created.  We do this first so
+    // that we're returning the same address for the function as any previous
+    // call.  TODO: Yes, this is wrong. The lazy stub isn't guaranteed to be
+    // close enough to call.
+    return FnStub;
+  }
+
+  // If we know the target can handle arbitrary-distance calls, try to
+  // return a direct pointer.
+  if (!MayNeedFarStub) {
+    // If we have code, go ahead and return that.
+    void *ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F);
+    if (ResultPtr) return ResultPtr;
+
+    // If this is an external function pointer, we can force the JIT to
+    // 'compile' it, which really just adds it to the map.
+    if (isNonGhostDeclaration(F) || F->hasAvailableExternallyLinkage())
+      return TheJIT->getPointerToFunction(F);
+  }
+
+  // Otherwise, we may need a to emit a stub, and, conservatively, we always do
+  // so.  Note that it's possible to return null from getLazyFunctionStub in the
+  // case of a weak extern that fails to resolve.
+  return Resolver.getLazyFunctionStub(F);
+}
+
+void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
+  // Make sure GV is emitted first, and create a stub containing the fully
+  // resolved address.
+  void *GVAddress = getPointerToGlobal(V, Reference, false);
+  void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress);
+  return StubAddr;
+}
+
+void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
+  if (DL.isUnknown()) return;
+  if (!BeforePrintingInsn) return;
+  
+  const LLVMContext &Context = EmissionDetails.MF->getFunction()->getContext();
+
+  if (DL.getScope(Context) != 0 && PrevDL != DL) {
+    JITEvent_EmittedFunctionDetails::LineStart NextLine;
+    NextLine.Address = getCurrentPCValue();
+    NextLine.Loc = DL;
+    EmissionDetails.LineStarts.push_back(NextLine);
+  }
+
+  PrevDL = DL;
+}
+
+static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
+                                           const TargetData *TD) {
+  const std::vector<MachineConstantPoolEntry> &Constants = MCP->getConstants();
+  if (Constants.empty()) return 0;
+
+  unsigned Size = 0;
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+    MachineConstantPoolEntry CPE = Constants[i];
+    unsigned AlignMask = CPE.getAlignment() - 1;
+    Size = (Size + AlignMask) & ~AlignMask;
+    const Type *Ty = CPE.getType();
+    Size += TD->getTypeAllocSize(Ty);
+  }
+  return Size;
+}
+
+void JITEmitter::startFunction(MachineFunction &F) {
+  DEBUG(dbgs() << "JIT: Starting CodeGen of Function "
+        << F.getFunction()->getName() << "\n");
+
+  uintptr_t ActualSize = 0;
+  // Set the memory writable, if it's not already
+  MemMgr->setMemoryWritable();
+  
+  if (SizeEstimate > 0) {
+    // SizeEstimate will be non-zero on reallocation attempts.
+    ActualSize = SizeEstimate;
+  }
+
+  BufferBegin = CurBufferPtr = MemMgr->startFunctionBody(F.getFunction(),
+                                                         ActualSize);
+  BufferEnd = BufferBegin+ActualSize;
+  EmittedFunctions[F.getFunction()].FunctionBody = BufferBegin;
+
+  // Ensure the constant pool/jump table info is at least 4-byte aligned.
+  emitAlignment(16);
+
+  emitConstantPool(F.getConstantPool());
+  if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo())
+    initJumpTableInfo(MJTI);
+
+  // About to start emitting the machine code for the function.
+  emitAlignment(std::max(F.getFunction()->getAlignment(), 8U));
+  TheJIT->updateGlobalMapping(F.getFunction(), CurBufferPtr);
+  EmittedFunctions[F.getFunction()].Code = CurBufferPtr;
+
+  MBBLocations.clear();
+
+  EmissionDetails.MF = &F;
+  EmissionDetails.LineStarts.clear();
+}
+
+bool JITEmitter::finishFunction(MachineFunction &F) {
+  if (CurBufferPtr == BufferEnd) {
+    // We must call endFunctionBody before retrying, because
+    // deallocateMemForFunction requires it.
+    MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr);
+    retryWithMoreMemory(F);
+    return true;
+  }
+
+  if (MachineJumpTableInfo *MJTI = F.getJumpTableInfo())
+    emitJumpTableInfo(MJTI);
+
+  // FnStart is the start of the text, not the start of the constant pool and
+  // other per-function data.
+  uint8_t *FnStart =
+    (uint8_t *)TheJIT->getPointerToGlobalIfAvailable(F.getFunction());
+
+  // FnEnd is the end of the function's machine code.
+  uint8_t *FnEnd = CurBufferPtr;
+
+  if (!Relocations.empty()) {
+    CurFn = F.getFunction();
+    NumRelos += Relocations.size();
+
+    // Resolve the relocations to concrete pointers.
+    for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
+      MachineRelocation &MR = Relocations[i];
+      void *ResultPtr = 0;
+      if (!MR.letTargetResolve()) {
+        if (MR.isExternalSymbol()) {
+          ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(),
+                                                        false);
+          DEBUG(dbgs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to ["
+                       << ResultPtr << "]\n");
+
+          // If the target REALLY wants a stub for this function, emit it now.
+          if (MR.mayNeedFarStub()) {
+            ResultPtr = Resolver.getExternalFunctionStub(ResultPtr);
+          }
+        } else if (MR.isGlobalValue()) {
+          ResultPtr = getPointerToGlobal(MR.getGlobalValue(),
+                                         BufferBegin+MR.getMachineCodeOffset(),
+                                         MR.mayNeedFarStub());
+        } else if (MR.isIndirectSymbol()) {
+          ResultPtr = getPointerToGVIndirectSym(
+              MR.getGlobalValue(), BufferBegin+MR.getMachineCodeOffset());
+        } else if (MR.isBasicBlock()) {
+          ResultPtr = (void*)getMachineBasicBlockAddress(MR.getBasicBlock());
+        } else if (MR.isConstantPoolIndex()) {
+          ResultPtr = (void*)getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+        } else {
+          assert(MR.isJumpTableIndex());
+          ResultPtr=(void*)getJumpTableEntryAddress(MR.getJumpTableIndex());
+        }
+
+        MR.setResultPointer(ResultPtr);
+      }
+
+      // if we are managing the GOT and the relocation wants an index,
+      // give it one
+      if (MR.isGOTRelative() && MemMgr->isManagingGOT()) {
+        unsigned idx = Resolver.getGOTIndexForAddr(ResultPtr);
+        MR.setGOTIndex(idx);
+        if (((void**)MemMgr->getGOTBase())[idx] != ResultPtr) {
+          DEBUG(dbgs() << "JIT: GOT was out of date for " << ResultPtr
+                       << " pointing at " << ((void**)MemMgr->getGOTBase())[idx]
+                       << "\n");
+          ((void**)MemMgr->getGOTBase())[idx] = ResultPtr;
+        }
+      }
+    }
+
+    CurFn = 0;
+    TheJIT->getJITInfo().relocate(BufferBegin, &Relocations[0],
+                                  Relocations.size(), MemMgr->getGOTBase());
+  }
+
+  // Update the GOT entry for F to point to the new code.
+  if (MemMgr->isManagingGOT()) {
+    unsigned idx = Resolver.getGOTIndexForAddr((void*)BufferBegin);
+    if (((void**)MemMgr->getGOTBase())[idx] != (void*)BufferBegin) {
+      DEBUG(dbgs() << "JIT: GOT was out of date for " << (void*)BufferBegin
+                   << " pointing at " << ((void**)MemMgr->getGOTBase())[idx]
+                   << "\n");
+      ((void**)MemMgr->getGOTBase())[idx] = (void*)BufferBegin;
+    }
+  }
+
+  // CurBufferPtr may have moved beyond FnEnd, due to memory allocation for
+  // global variables that were referenced in the relocations.
+  MemMgr->endFunctionBody(F.getFunction(), BufferBegin, CurBufferPtr);
+
+  if (CurBufferPtr == BufferEnd) {
+    retryWithMoreMemory(F);
+    return true;
+  } else {
+    // Now that we've succeeded in emitting the function, reset the
+    // SizeEstimate back down to zero.
+    SizeEstimate = 0;
+  }
+
+  BufferBegin = CurBufferPtr = 0;
+  NumBytes += FnEnd-FnStart;
+
+  // Invalidate the icache if necessary.
+  sys::Memory::InvalidateInstructionCache(FnStart, FnEnd-FnStart);
+
+  TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart,
+                                EmissionDetails);
+
+  // Reset the previous debug location.
+  PrevDL = DebugLoc();
+
+  DEBUG(dbgs() << "JIT: Finished CodeGen of [" << (void*)FnStart
+        << "] Function: " << F.getFunction()->getName()
+        << ": " << (FnEnd-FnStart) << " bytes of text, "
+        << Relocations.size() << " relocations\n");
+
+  Relocations.clear();
+  ConstPoolAddresses.clear();
+
+  // Mark code region readable and executable if it's not so already.
+  MemMgr->setMemoryExecutable();
+
+  DEBUG({
+      if (sys::hasDisassembler()) {
+        dbgs() << "JIT: Disassembled code:\n";
+        dbgs() << sys::disassembleBuffer(FnStart, FnEnd-FnStart,
+                                         (uintptr_t)FnStart);
+      } else {
+        dbgs() << "JIT: Binary code:\n";
+        uint8_t* q = FnStart;
+        for (int i = 0; q < FnEnd; q += 4, ++i) {
+          if (i == 4)
+            i = 0;
+          if (i == 0)
+            dbgs() << "JIT: " << (long)(q - FnStart) << ": ";
+          bool Done = false;
+          for (int j = 3; j >= 0; --j) {
+            if (q + j >= FnEnd)
+              Done = true;
+            else
+              dbgs() << (unsigned short)q[j];
+          }
+          if (Done)
+            break;
+          dbgs() << ' ';
+          if (i == 3)
+            dbgs() << '\n';
+        }
+        dbgs()<< '\n';
+      }
+    });
+
+  if (JITExceptionHandling || JITEmitDebugInfo) {
+    uintptr_t ActualSize = 0;
+    SavedBufferBegin = BufferBegin;
+    SavedBufferEnd = BufferEnd;
+    SavedCurBufferPtr = CurBufferPtr;
+
+    BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(),
+                                                             ActualSize);
+    BufferEnd = BufferBegin+ActualSize;
+    EmittedFunctions[F.getFunction()].ExceptionTable = BufferBegin;
+    uint8_t *EhStart;
+    uint8_t *FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd,
+                                                EhStart);
+    MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr,
+                              FrameRegister);
+    uint8_t *EhEnd = CurBufferPtr;
+    BufferBegin = SavedBufferBegin;
+    BufferEnd = SavedBufferEnd;
+    CurBufferPtr = SavedCurBufferPtr;
+
+    if (JITExceptionHandling) {
+      TheJIT->RegisterTable(F.getFunction(), FrameRegister);
+    }
+
+    if (JITEmitDebugInfo) {
+      DebugInfo I;
+      I.FnStart = FnStart;
+      I.FnEnd = FnEnd;
+      I.EhStart = EhStart;
+      I.EhEnd = EhEnd;
+      DR->RegisterFunction(F.getFunction(), I);
+    }
+  }
+
+  if (MMI)
+    MMI->EndFunction();
+
+  return false;
+}
+
+void JITEmitter::retryWithMoreMemory(MachineFunction &F) {
+  DEBUG(dbgs() << "JIT: Ran out of space for native code.  Reattempting.\n");
+  Relocations.clear();  // Clear the old relocations or we'll reapply them.
+  ConstPoolAddresses.clear();
+  ++NumRetries;
+  deallocateMemForFunction(F.getFunction());
+  // Try again with at least twice as much free space.
+  SizeEstimate = (uintptr_t)(2 * (BufferEnd - BufferBegin));
+
+  for (MachineFunction::iterator MBB = F.begin(), E = F.end(); MBB != E; ++MBB){
+    if (MBB->hasAddressTaken())
+      TheJIT->clearPointerToBasicBlock(MBB->getBasicBlock());
+  }
+}
+
+/// deallocateMemForFunction - Deallocate all memory for the specified
+/// function body.  Also drop any references the function has to stubs.
+/// May be called while the Function is being destroyed inside ~Value().
+void JITEmitter::deallocateMemForFunction(const Function *F) {
+  ValueMap<const Function *, EmittedCode, EmittedFunctionConfig>::iterator
+    Emitted = EmittedFunctions.find(F);
+  if (Emitted != EmittedFunctions.end()) {
+    MemMgr->deallocateFunctionBody(Emitted->second.FunctionBody);
+    MemMgr->deallocateExceptionTable(Emitted->second.ExceptionTable);
+    TheJIT->NotifyFreeingMachineCode(Emitted->second.Code);
+
+    EmittedFunctions.erase(Emitted);
+  }
+
+  if(JITExceptionHandling) {
+    TheJIT->DeregisterTable(F);
+  }
+
+  if (JITEmitDebugInfo) {
+    DR->UnregisterFunction(F);
+  }
+}
+
+
+void* JITEmitter::allocateSpace(uintptr_t Size, unsigned Alignment) {
+  if (BufferBegin)
+    return JITCodeEmitter::allocateSpace(Size, Alignment);
+
+  // create a new memory block if there is no active one.
+  // care must be taken so that BufferBegin is invalidated when a
+  // block is trimmed
+  BufferBegin = CurBufferPtr = MemMgr->allocateSpace(Size, Alignment);
+  BufferEnd = BufferBegin+Size;
+  return CurBufferPtr;
+}
+
+void* JITEmitter::allocateGlobal(uintptr_t Size, unsigned Alignment) {
+  // Delegate this call through the memory manager.
+  return MemMgr->allocateGlobal(Size, Alignment);
+}
+
+void JITEmitter::emitConstantPool(MachineConstantPool *MCP) {
+  if (TheJIT->getJITInfo().hasCustomConstantPool())
+    return;
+
+  const std::vector<MachineConstantPoolEntry> &Constants = MCP->getConstants();
+  if (Constants.empty()) return;
+
+  unsigned Size = GetConstantPoolSizeInBytes(MCP, TheJIT->getTargetData());
+  unsigned Align = MCP->getConstantPoolAlignment();
+  ConstantPoolBase = allocateSpace(Size, Align);
+  ConstantPool = MCP;
+
+  if (ConstantPoolBase == 0) return;  // Buffer overflow.
+
+  DEBUG(dbgs() << "JIT: Emitted constant pool at [" << ConstantPoolBase
+               << "] (size: " << Size << ", alignment: " << Align << ")\n");
+
+  // Initialize the memory for all of the constant pool entries.
+  unsigned Offset = 0;
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+    MachineConstantPoolEntry CPE = Constants[i];
+    unsigned AlignMask = CPE.getAlignment() - 1;
+    Offset = (Offset + AlignMask) & ~AlignMask;
+
+    uintptr_t CAddr = (uintptr_t)ConstantPoolBase + Offset;
+    ConstPoolAddresses.push_back(CAddr);
+    if (CPE.isMachineConstantPoolEntry()) {
+      // FIXME: add support to lower machine constant pool values into bytes!
+      report_fatal_error("Initialize memory with machine specific constant pool"
+                        "entry has not been implemented!");
+    }
+    TheJIT->InitializeMemory(CPE.Val.ConstVal, (void*)CAddr);
+    DEBUG(dbgs() << "JIT:   CP" << i << " at [0x";
+          dbgs().write_hex(CAddr) << "]\n");
+
+    const Type *Ty = CPE.Val.ConstVal->getType();
+    Offset += TheJIT->getTargetData()->getTypeAllocSize(Ty);
+  }
+}
+
+void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) {
+  if (TheJIT->getJITInfo().hasCustomJumpTables())
+    return;
+  if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline)
+    return;
+
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  if (JT.empty()) return;
+
+  unsigned NumEntries = 0;
+  for (unsigned i = 0, e = JT.size(); i != e; ++i)
+    NumEntries += JT[i].MBBs.size();
+
+  unsigned EntrySize = MJTI->getEntrySize(*TheJIT->getTargetData());
+
+  // Just allocate space for all the jump tables now.  We will fix up the actual
+  // MBB entries in the tables after we emit the code for each block, since then
+  // we will know the final locations of the MBBs in memory.
+  JumpTable = MJTI;
+  JumpTableBase = allocateSpace(NumEntries * EntrySize,
+                             MJTI->getEntryAlignment(*TheJIT->getTargetData()));
+}
+
+void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
+  if (TheJIT->getJITInfo().hasCustomJumpTables())
+    return;
+
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  if (JT.empty() || JumpTableBase == 0) return;
+
+  
+  switch (MJTI->getEntryKind()) {
+  case MachineJumpTableInfo::EK_Inline:
+    return;
+  case MachineJumpTableInfo::EK_BlockAddress: {
+    // EK_BlockAddress - Each entry is a plain address of block, e.g.:
+    //     .word LBB123
+    assert(MJTI->getEntrySize(*TheJIT->getTargetData()) == sizeof(void*) &&
+           "Cross JIT'ing?");
+    
+    // For each jump table, map each target in the jump table to the address of
+    // an emitted MachineBasicBlock.
+    intptr_t *SlotPtr = (intptr_t*)JumpTableBase;
+    
+    for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+      const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+      // Store the address of the basic block for this jump table slot in the
+      // memory we allocated for the jump table in 'initJumpTableInfo'
+      for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi)
+        *SlotPtr++ = getMachineBasicBlockAddress(MBBs[mi]);
+    }
+    break;
+  }
+      
+  case MachineJumpTableInfo::EK_Custom32:
+  case MachineJumpTableInfo::EK_GPRel32BlockAddress:
+  case MachineJumpTableInfo::EK_LabelDifference32: {
+    assert(MJTI->getEntrySize(*TheJIT->getTargetData()) == 4&&"Cross JIT'ing?");
+    // For each jump table, place the offset from the beginning of the table
+    // to the target address.
+    int *SlotPtr = (int*)JumpTableBase;
+
+    for (unsigned i = 0, e = JT.size(); i != e; ++i) {
+      const std::vector<MachineBasicBlock*> &MBBs = JT[i].MBBs;
+      // Store the offset of the basic block for this jump table slot in the
+      // memory we allocated for the jump table in 'initJumpTableInfo'
+      uintptr_t Base = (uintptr_t)SlotPtr;
+      for (unsigned mi = 0, me = MBBs.size(); mi != me; ++mi) {
+        uintptr_t MBBAddr = getMachineBasicBlockAddress(MBBs[mi]);
+        /// FIXME: USe EntryKind instead of magic "getPICJumpTableEntry" hook.
+        *SlotPtr++ = TheJIT->getJITInfo().getPICJumpTableEntry(MBBAddr, Base);
+      }
+    }
+    break;
+  }
+  }
+}
+
+void JITEmitter::startGVStub(const GlobalValue* GV,
+                             unsigned StubSize, unsigned Alignment) {
+  SavedBufferBegin = BufferBegin;
+  SavedBufferEnd = BufferEnd;
+  SavedCurBufferPtr = CurBufferPtr;
+
+  BufferBegin = CurBufferPtr = MemMgr->allocateStub(GV, StubSize, Alignment);
+  BufferEnd = BufferBegin+StubSize+1;
+}
+
+void JITEmitter::startGVStub(void *Buffer, unsigned StubSize) {
+  SavedBufferBegin = BufferBegin;
+  SavedBufferEnd = BufferEnd;
+  SavedCurBufferPtr = CurBufferPtr;
+
+  BufferBegin = CurBufferPtr = (uint8_t *)Buffer;
+  BufferEnd = BufferBegin+StubSize+1;
+}
+
+void JITEmitter::finishGVStub() {
+  assert(CurBufferPtr != BufferEnd && "Stub overflowed allocated space.");
+  NumBytes += getCurrentPCOffset();
+  BufferBegin = SavedBufferBegin;
+  BufferEnd = SavedBufferEnd;
+  CurBufferPtr = SavedCurBufferPtr;
+}
+
+void *JITEmitter::allocIndirectGV(const GlobalValue *GV,
+                                  const uint8_t *Buffer, size_t Size,
+                                  unsigned Alignment) {
+  uint8_t *IndGV = MemMgr->allocateStub(GV, Size, Alignment);
+  memcpy(IndGV, Buffer, Size);
+  return IndGV;
+}
+
+// getConstantPoolEntryAddress - Return the address of the 'ConstantNum' entry
+// in the constant pool that was last emitted with the 'emitConstantPool'
+// method.
+//
+uintptr_t JITEmitter::getConstantPoolEntryAddress(unsigned ConstantNum) const {
+  assert(ConstantNum < ConstantPool->getConstants().size() &&
+         "Invalid ConstantPoolIndex!");
+  return ConstPoolAddresses[ConstantNum];
+}
+
+// getJumpTableEntryAddress - Return the address of the JumpTable with index
+// 'Index' in the jumpp table that was last initialized with 'initJumpTableInfo'
+//
+uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const {
+  const std::vector<MachineJumpTableEntry> &JT = JumpTable->getJumpTables();
+  assert(Index < JT.size() && "Invalid jump table index!");
+
+  unsigned EntrySize = JumpTable->getEntrySize(*TheJIT->getTargetData());
+
+  unsigned Offset = 0;
+  for (unsigned i = 0; i < Index; ++i)
+    Offset += JT[i].MBBs.size();
+
+   Offset *= EntrySize;
+
+  return (uintptr_t)((char *)JumpTableBase + Offset);
+}
+
+void JITEmitter::EmittedFunctionConfig::onDelete(
+  JITEmitter *Emitter, const Function *F) {
+  Emitter->deallocateMemForFunction(F);
+}
+void JITEmitter::EmittedFunctionConfig::onRAUW(
+  JITEmitter *, const Function*, const Function*) {
+  llvm_unreachable("The JIT doesn't know how to handle a"
+                   " RAUW on a value it has emitted.");
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Public interface to this file
+//===----------------------------------------------------------------------===//
+
+JITCodeEmitter *JIT::createEmitter(JIT &jit, JITMemoryManager *JMM,
+                                   TargetMachine &tm) {
+  return new JITEmitter(jit, JMM, tm);
+}
+
+// getPointerToFunctionOrStub - If the specified function has been
+// code-gen'd, return a pointer to the function.  If not, compile it, or use
+// a stub to implement lazy compilation if available.
+//
+void *JIT::getPointerToFunctionOrStub(Function *F) {
+  // If we have already code generated the function, just return the address.
+  if (void *Addr = getPointerToGlobalIfAvailable(F))
+    return Addr;
+
+  // Get a stub if the target supports it.
+  assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
+  JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
+  return JE->getJITResolver().getLazyFunctionStub(F);
+}
+
+void JIT::updateFunctionStub(Function *F) {
+  // Get the empty stub we generated earlier.
+  assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
+  JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
+  void *Stub = JE->getJITResolver().getLazyFunctionStub(F);
+  void *Addr = getPointerToGlobalIfAvailable(F);
+  assert(Addr != Stub && "Function must have non-stub address to be updated.");
+
+  // Tell the target jit info to rewrite the stub at the specified address,
+  // rather than creating a new one.
+  TargetJITInfo::StubLayout layout = getJITInfo().getStubLayout();
+  JE->startGVStub(Stub, layout.Size);
+  getJITInfo().emitFunctionStub(F, Addr, *getCodeEmitter());
+  JE->finishGVStub();
+}
+
+/// freeMachineCodeForFunction - release machine code memory for given Function.
+///
+void JIT::freeMachineCodeForFunction(Function *F) {
+  // Delete translation for this from the ExecutionEngine, so it will get
+  // retranslated next time it is used.
+  updateGlobalMapping(F, 0);
+
+  // Free the actual memory for the function body and related stuff.
+  assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
+  cast<JITEmitter>(JCE)->deallocateMemForFunction(F);
+}
diff --git a/final/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/final/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
new file mode 100644
index 00000000000..eec23cec0af
--- /dev/null
+++ b/final/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -0,0 +1,727 @@
+//===-- JITMemoryManager.cpp - Memory Allocator for JIT'd code ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DefaultJITMemoryManager class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Memory.h"
+#include <vector>
+#include <cassert>
+#include <climits>
+#include <cstring>
+using namespace llvm;
+
+STATISTIC(NumSlabs, "Number of slabs of memory allocated by the JIT");
+
+JITMemoryManager::~JITMemoryManager() {}
+
+//===----------------------------------------------------------------------===//
+// Memory Block Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// MemoryRangeHeader - For a range of memory, this is the header that we put
+  /// on the block of memory.  It is carefully crafted to be one word of memory.
+  /// Allocated blocks have just this header, free'd blocks have FreeRangeHeader
+  /// which starts with this.
+  struct FreeRangeHeader;
+  struct MemoryRangeHeader {
+    /// ThisAllocated - This is true if this block is currently allocated.  If
+    /// not, this can be converted to a FreeRangeHeader.
+    unsigned ThisAllocated : 1;
+
+    /// PrevAllocated - Keep track of whether the block immediately before us is
+    /// allocated.  If not, the word immediately before this header is the size
+    /// of the previous block.
+    unsigned PrevAllocated : 1;
+
+    /// BlockSize - This is the size in bytes of this memory block,
+    /// including this header.
+    uintptr_t BlockSize : (sizeof(intptr_t)*CHAR_BIT - 2);
+
+
+    /// getBlockAfter - Return the memory block immediately after this one.
+    ///
+    MemoryRangeHeader &getBlockAfter() const {
+      return *(MemoryRangeHeader*)((char*)this+BlockSize);
+    }
+
+    /// getFreeBlockBefore - If the block before this one is free, return it,
+    /// otherwise return null.
+    FreeRangeHeader *getFreeBlockBefore() const {
+      if (PrevAllocated) return 0;
+      intptr_t PrevSize = ((intptr_t *)this)[-1];
+      return (FreeRangeHeader*)((char*)this-PrevSize);
+    }
+
+    /// FreeBlock - Turn an allocated block into a free block, adjusting
+    /// bits in the object headers, and adding an end of region memory block.
+    FreeRangeHeader *FreeBlock(FreeRangeHeader *FreeList);
+
+    /// TrimAllocationToSize - If this allocated block is significantly larger
+    /// than NewSize, split it into two pieces (where the former is NewSize
+    /// bytes, including the header), and add the new block to the free list.
+    FreeRangeHeader *TrimAllocationToSize(FreeRangeHeader *FreeList,
+                                          uint64_t NewSize);
+  };
+
+  /// FreeRangeHeader - For a memory block that isn't already allocated, this
+  /// keeps track of the current block and has a pointer to the next free block.
+  /// Free blocks are kept on a circularly linked list.
+  struct FreeRangeHeader : public MemoryRangeHeader {
+    FreeRangeHeader *Prev;
+    FreeRangeHeader *Next;
+
+    /// getMinBlockSize - Get the minimum size for a memory block.  Blocks
+    /// smaller than this size cannot be created.
+    static unsigned getMinBlockSize() {
+      return sizeof(FreeRangeHeader)+sizeof(intptr_t);
+    }
+
+    /// SetEndOfBlockSizeMarker - The word at the end of every free block is
+    /// known to be the size of the free block.  Set it for this block.
+    void SetEndOfBlockSizeMarker() {
+      void *EndOfBlock = (char*)this + BlockSize;
+      ((intptr_t *)EndOfBlock)[-1] = BlockSize;
+    }
+
+    FreeRangeHeader *RemoveFromFreeList() {
+      assert(Next->Prev == this && Prev->Next == this && "Freelist broken!");
+      Next->Prev = Prev;
+      return Prev->Next = Next;
+    }
+
+    void AddToFreeList(FreeRangeHeader *FreeList) {
+      Next = FreeList;
+      Prev = FreeList->Prev;
+      Prev->Next = this;
+      Next->Prev = this;
+    }
+
+    /// GrowBlock - The block after this block just got deallocated.  Merge it
+    /// into the current block.
+    void GrowBlock(uintptr_t NewSize);
+
+    /// AllocateBlock - Mark this entire block allocated, updating freelists
+    /// etc.  This returns a pointer to the circular free-list.
+    FreeRangeHeader *AllocateBlock();
+  };
+}
+
+
+/// AllocateBlock - Mark this entire block allocated, updating freelists
+/// etc.  This returns a pointer to the circular free-list.
+FreeRangeHeader *FreeRangeHeader::AllocateBlock() {
+  assert(!ThisAllocated && !getBlockAfter().PrevAllocated &&
+         "Cannot allocate an allocated block!");
+  // Mark this block allocated.
+  ThisAllocated = 1;
+  getBlockAfter().PrevAllocated = 1;
+
+  // Remove it from the free list.
+  return RemoveFromFreeList();
+}
+
+/// FreeBlock - Turn an allocated block into a free block, adjusting
+/// bits in the object headers, and adding an end of region memory block.
+/// If possible, coalesce this block with neighboring blocks.  Return the
+/// FreeRangeHeader to allocate from.
+FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) {
+  MemoryRangeHeader *FollowingBlock = &getBlockAfter();
+  assert(ThisAllocated && "This block is already free!");
+  assert(FollowingBlock->PrevAllocated && "Flags out of sync!");
+
+  FreeRangeHeader *FreeListToReturn = FreeList;
+
+  // If the block after this one is free, merge it into this block.
+  if (!FollowingBlock->ThisAllocated) {
+    FreeRangeHeader &FollowingFreeBlock = *(FreeRangeHeader *)FollowingBlock;
+    // "FreeList" always needs to be a valid free block.  If we're about to
+    // coalesce with it, update our notion of what the free list is.
+    if (&FollowingFreeBlock == FreeList) {
+      FreeList = FollowingFreeBlock.Next;
+      FreeListToReturn = 0;
+      assert(&FollowingFreeBlock != FreeList && "No tombstone block?");
+    }
+    FollowingFreeBlock.RemoveFromFreeList();
+
+    // Include the following block into this one.
+    BlockSize += FollowingFreeBlock.BlockSize;
+    FollowingBlock = &FollowingFreeBlock.getBlockAfter();
+
+    // Tell the block after the block we are coalescing that this block is
+    // allocated.
+    FollowingBlock->PrevAllocated = 1;
+  }
+
+  assert(FollowingBlock->ThisAllocated && "Missed coalescing?");
+
+  if (FreeRangeHeader *PrevFreeBlock = getFreeBlockBefore()) {
+    PrevFreeBlock->GrowBlock(PrevFreeBlock->BlockSize + BlockSize);
+    return FreeListToReturn ? FreeListToReturn : PrevFreeBlock;
+  }
+
+  // Otherwise, mark this block free.
+  FreeRangeHeader &FreeBlock = *(FreeRangeHeader*)this;
+  FollowingBlock->PrevAllocated = 0;
+  FreeBlock.ThisAllocated = 0;
+
+  // Link this into the linked list of free blocks.
+  FreeBlock.AddToFreeList(FreeList);
+
+  // Add a marker at the end of the block, indicating the size of this free
+  // block.
+  FreeBlock.SetEndOfBlockSizeMarker();
+  return FreeListToReturn ? FreeListToReturn : &FreeBlock;
+}
+
+/// GrowBlock - The block after this block just got deallocated.  Merge it
+/// into the current block.
+void FreeRangeHeader::GrowBlock(uintptr_t NewSize) {
+  assert(NewSize > BlockSize && "Not growing block?");
+  BlockSize = NewSize;
+  SetEndOfBlockSizeMarker();
+  getBlockAfter().PrevAllocated = 0;
+}
+
+/// TrimAllocationToSize - If this allocated block is significantly larger
+/// than NewSize, split it into two pieces (where the former is NewSize
+/// bytes, including the header), and add the new block to the free list.
+FreeRangeHeader *MemoryRangeHeader::
+TrimAllocationToSize(FreeRangeHeader *FreeList, uint64_t NewSize) {
+  assert(ThisAllocated && getBlockAfter().PrevAllocated &&
+         "Cannot deallocate part of an allocated block!");
+
+  // Don't allow blocks to be trimmed below minimum required size
+  NewSize = std::max<uint64_t>(FreeRangeHeader::getMinBlockSize(), NewSize);
+
+  // Round up size for alignment of header.
+  unsigned HeaderAlign = __alignof(FreeRangeHeader);
+  NewSize = (NewSize+ (HeaderAlign-1)) & ~(HeaderAlign-1);
+
+  // Size is now the size of the block we will remove from the start of the
+  // current block.
+  assert(NewSize <= BlockSize &&
+         "Allocating more space from this block than exists!");
+
+  // If splitting this block will cause the remainder to be too small, do not
+  // split the block.
+  if (BlockSize <= NewSize+FreeRangeHeader::getMinBlockSize())
+    return FreeList;
+
+  // Otherwise, we splice the required number of bytes out of this block, form
+  // a new block immediately after it, then mark this block allocated.
+  MemoryRangeHeader &FormerNextBlock = getBlockAfter();
+
+  // Change the size of this block.
+  BlockSize = NewSize;
+
+  // Get the new block we just sliced out and turn it into a free block.
+  FreeRangeHeader &NewNextBlock = (FreeRangeHeader &)getBlockAfter();
+  NewNextBlock.BlockSize = (char*)&FormerNextBlock - (char*)&NewNextBlock;
+  NewNextBlock.ThisAllocated = 0;
+  NewNextBlock.PrevAllocated = 1;
+  NewNextBlock.SetEndOfBlockSizeMarker();
+  FormerNextBlock.PrevAllocated = 0;
+  NewNextBlock.AddToFreeList(FreeList);
+  return &NewNextBlock;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Block Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+  class DefaultJITMemoryManager;
+
+  class JITSlabAllocator : public SlabAllocator {
+    DefaultJITMemoryManager &JMM;
+  public:
+    JITSlabAllocator(DefaultJITMemoryManager &jmm) : JMM(jmm) { }
+    virtual ~JITSlabAllocator() { }
+    virtual MemSlab *Allocate(size_t Size);
+    virtual void Deallocate(MemSlab *Slab);
+  };
+
+  /// DefaultJITMemoryManager - Manage memory for the JIT code generation.
+  /// This splits a large block of MAP_NORESERVE'd memory into two
+  /// sections, one for function stubs, one for the functions themselves.  We
+  /// have to do this because we may need to emit a function stub while in the
+  /// middle of emitting a function, and we don't know how large the function we
+  /// are emitting is.
+  class DefaultJITMemoryManager : public JITMemoryManager {
+
+    // Whether to poison freed memory.
+    bool PoisonMemory;
+
+    /// LastSlab - This points to the last slab allocated and is used as the
+    /// NearBlock parameter to AllocateRWX so that we can attempt to lay out all
+    /// stubs, data, and code contiguously in memory.  In general, however, this
+    /// is not possible because the NearBlock parameter is ignored on Windows
+    /// platforms and even on Unix it works on a best-effort pasis.
+    sys::MemoryBlock LastSlab;
+
+    // Memory slabs allocated by the JIT.  We refer to them as slabs so we don't
+    // confuse them with the blocks of memory described above.
+    std::vector<sys::MemoryBlock> CodeSlabs;
+    JITSlabAllocator BumpSlabAllocator;
+    BumpPtrAllocator StubAllocator;
+    BumpPtrAllocator DataAllocator;
+
+    // Circular list of free blocks.
+    FreeRangeHeader *FreeMemoryList;
+
+    // When emitting code into a memory block, this is the block.
+    MemoryRangeHeader *CurBlock;
+
+    uint8_t *GOTBase;     // Target Specific reserved memory
+  public:
+    DefaultJITMemoryManager();
+    ~DefaultJITMemoryManager();
+
+    /// allocateNewSlab - Allocates a new MemoryBlock and remembers it as the
+    /// last slab it allocated, so that subsequent allocations follow it.
+    sys::MemoryBlock allocateNewSlab(size_t size);
+
+    /// DefaultCodeSlabSize - When we have to go map more memory, we allocate at
+    /// least this much unless more is requested.
+    static const size_t DefaultCodeSlabSize;
+
+    /// DefaultSlabSize - Allocate data into slabs of this size unless we get
+    /// an allocation above SizeThreshold.
+    static const size_t DefaultSlabSize;
+
+    /// DefaultSizeThreshold - For any allocation larger than this threshold, we
+    /// should allocate a separate slab.
+    static const size_t DefaultSizeThreshold;
+
+    void AllocateGOT();
+
+    // Testing methods.
+    virtual bool CheckInvariants(std::string &ErrorStr);
+    size_t GetDefaultCodeSlabSize() { return DefaultCodeSlabSize; }
+    size_t GetDefaultDataSlabSize() { return DefaultSlabSize; }
+    size_t GetDefaultStubSlabSize() { return DefaultSlabSize; }
+    unsigned GetNumCodeSlabs() { return CodeSlabs.size(); }
+    unsigned GetNumDataSlabs() { return DataAllocator.GetNumSlabs(); }
+    unsigned GetNumStubSlabs() { return StubAllocator.GetNumSlabs(); }
+
+    /// startFunctionBody - When a function starts, allocate a block of free
+    /// executable memory, returning a pointer to it and its actual size.
+    uint8_t *startFunctionBody(const Function *F, uintptr_t &ActualSize) {
+
+      FreeRangeHeader* candidateBlock = FreeMemoryList;
+      FreeRangeHeader* head = FreeMemoryList;
+      FreeRangeHeader* iter = head->Next;
+
+      uintptr_t largest = candidateBlock->BlockSize;
+
+      // Search for the largest free block
+      while (iter != head) {
+        if (iter->BlockSize > largest) {
+          largest = iter->BlockSize;
+          candidateBlock = iter;
+        }
+        iter = iter->Next;
+      }
+
+      largest = largest - sizeof(MemoryRangeHeader);
+
+      // If this block isn't big enough for the allocation desired, allocate
+      // another block of memory and add it to the free list.
+      if (largest < ActualSize ||
+          largest <= FreeRangeHeader::getMinBlockSize()) {
+        DEBUG(dbgs() << "JIT: Allocating another slab of memory for function.");
+        candidateBlock = allocateNewCodeSlab((size_t)ActualSize);
+      }
+
+      // Select this candidate block for allocation
+      CurBlock = candidateBlock;
+
+      // Allocate the entire memory block.
+      FreeMemoryList = candidateBlock->AllocateBlock();
+      ActualSize = CurBlock->BlockSize - sizeof(MemoryRangeHeader);
+      return (uint8_t *)(CurBlock + 1);
+    }
+
+    /// allocateNewCodeSlab - Helper method to allocate a new slab of code
+    /// memory from the OS and add it to the free list.  Returns the new
+    /// FreeRangeHeader at the base of the slab.
+    FreeRangeHeader *allocateNewCodeSlab(size_t MinSize) {
+      // If the user needs at least MinSize free memory, then we account for
+      // two MemoryRangeHeaders: the one in the user's block, and the one at the
+      // end of the slab.
+      size_t PaddedMin = MinSize + 2 * sizeof(MemoryRangeHeader);
+      size_t SlabSize = std::max(DefaultCodeSlabSize, PaddedMin);
+      sys::MemoryBlock B = allocateNewSlab(SlabSize);
+      CodeSlabs.push_back(B);
+      char *MemBase = (char*)(B.base());
+
+      // Put a tiny allocated block at the end of the memory chunk, so when
+      // FreeBlock calls getBlockAfter it doesn't fall off the end.
+      MemoryRangeHeader *EndBlock =
+          (MemoryRangeHeader*)(MemBase + B.size()) - 1;
+      EndBlock->ThisAllocated = 1;
+      EndBlock->PrevAllocated = 0;
+      EndBlock->BlockSize = sizeof(MemoryRangeHeader);
+
+      // Start out with a vast new block of free memory.
+      FreeRangeHeader *NewBlock = (FreeRangeHeader*)MemBase;
+      NewBlock->ThisAllocated = 0;
+      // Make sure getFreeBlockBefore doesn't look into unmapped memory.
+      NewBlock->PrevAllocated = 1;
+      NewBlock->BlockSize = (uintptr_t)EndBlock - (uintptr_t)NewBlock;
+      NewBlock->SetEndOfBlockSizeMarker();
+      NewBlock->AddToFreeList(FreeMemoryList);
+
+      assert(NewBlock->BlockSize - sizeof(MemoryRangeHeader) >= MinSize &&
+             "The block was too small!");
+      return NewBlock;
+    }
+
+    /// endFunctionBody - The function F is now allocated, and takes the memory
+    /// in the range [FunctionStart,FunctionEnd).
+    void endFunctionBody(const Function *F, uint8_t *FunctionStart,
+                         uint8_t *FunctionEnd) {
+      assert(FunctionEnd > FunctionStart);
+      assert(FunctionStart == (uint8_t *)(CurBlock+1) &&
+             "Mismatched function start/end!");
+
+      uintptr_t BlockSize = FunctionEnd - (uint8_t *)CurBlock;
+
+      // Release the memory at the end of this block that isn't needed.
+      FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
+    }
+
+    /// allocateSpace - Allocate a memory block of the given size.  This method
+    /// cannot be called between calls to startFunctionBody and endFunctionBody.
+    uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
+      CurBlock = FreeMemoryList;
+      FreeMemoryList = FreeMemoryList->AllocateBlock();
+
+      uint8_t *result = (uint8_t *)(CurBlock + 1);
+
+      if (Alignment == 0) Alignment = 1;
+      result = (uint8_t*)(((intptr_t)result+Alignment-1) &
+               ~(intptr_t)(Alignment-1));
+
+      uintptr_t BlockSize = result + Size - (uint8_t *)CurBlock;
+      FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
+
+      return result;
+    }
+
+    /// allocateStub - Allocate memory for a function stub.
+    uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
+                          unsigned Alignment) {
+      return (uint8_t*)StubAllocator.Allocate(StubSize, Alignment);
+    }
+
+    /// allocateGlobal - Allocate memory for a global.
+    uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
+      return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
+    }
+
+    /// startExceptionTable - Use startFunctionBody to allocate memory for the
+    /// function's exception table.
+    uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) {
+      return startFunctionBody(F, ActualSize);
+    }
+
+    /// endExceptionTable - The exception table of F is now allocated,
+    /// and takes the memory in the range [TableStart,TableEnd).
+    void endExceptionTable(const Function *F, uint8_t *TableStart,
+                           uint8_t *TableEnd, uint8_t* FrameRegister) {
+      assert(TableEnd > TableStart);
+      assert(TableStart == (uint8_t *)(CurBlock+1) &&
+             "Mismatched table start/end!");
+
+      uintptr_t BlockSize = TableEnd - (uint8_t *)CurBlock;
+
+      // Release the memory at the end of this block that isn't needed.
+      FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
+    }
+
+    uint8_t *getGOTBase() const {
+      return GOTBase;
+    }
+
+    void deallocateBlock(void *Block) {
+      // Find the block that is allocated for this function.
+      MemoryRangeHeader *MemRange = static_cast<MemoryRangeHeader*>(Block) - 1;
+      assert(MemRange->ThisAllocated && "Block isn't allocated!");
+
+      // Fill the buffer with garbage!
+      if (PoisonMemory) {
+        memset(MemRange+1, 0xCD, MemRange->BlockSize-sizeof(*MemRange));
+      }
+
+      // Free the memory.
+      FreeMemoryList = MemRange->FreeBlock(FreeMemoryList);
+    }
+
+    /// deallocateFunctionBody - Deallocate all memory for the specified
+    /// function body.
+    void deallocateFunctionBody(void *Body) {
+      if (Body) deallocateBlock(Body);
+    }
+
+    /// deallocateExceptionTable - Deallocate memory for the specified
+    /// exception table.
+    void deallocateExceptionTable(void *ET) {
+      if (ET) deallocateBlock(ET);
+    }
+
+    /// setMemoryWritable - When code generation is in progress,
+    /// the code pages may need permissions changed.
+    void setMemoryWritable()
+    {
+      for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
+        sys::Memory::setWritable(CodeSlabs[i]);
+    }
+    /// setMemoryExecutable - When code generation is done and we're ready to
+    /// start execution, the code pages may need permissions changed.
+    void setMemoryExecutable()
+    {
+      for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
+        sys::Memory::setExecutable(CodeSlabs[i]);
+    }
+
+    /// setPoisonMemory - Controls whether we write garbage over freed memory.
+    ///
+    void setPoisonMemory(bool poison) {
+      PoisonMemory = poison;
+    }
+  };
+}
+
+MemSlab *JITSlabAllocator::Allocate(size_t Size) {
+  sys::MemoryBlock B = JMM.allocateNewSlab(Size);
+  MemSlab *Slab = (MemSlab*)B.base();
+  Slab->Size = B.size();
+  Slab->NextPtr = 0;
+  return Slab;
+}
+
+void JITSlabAllocator::Deallocate(MemSlab *Slab) {
+  sys::MemoryBlock B(Slab, Slab->Size);
+  sys::Memory::ReleaseRWX(B);
+}
+
+DefaultJITMemoryManager::DefaultJITMemoryManager()
+  :
+#ifdef NDEBUG
+    PoisonMemory(false),
+#else
+    PoisonMemory(true),
+#endif
+    LastSlab(0, 0),
+    BumpSlabAllocator(*this),
+    StubAllocator(DefaultSlabSize, DefaultSizeThreshold, BumpSlabAllocator),
+    DataAllocator(DefaultSlabSize, DefaultSizeThreshold, BumpSlabAllocator) {
+
+  // Allocate space for code.
+  sys::MemoryBlock MemBlock = allocateNewSlab(DefaultCodeSlabSize);
+  CodeSlabs.push_back(MemBlock);
+  uint8_t *MemBase = (uint8_t*)MemBlock.base();
+
+  // We set up the memory chunk with 4 mem regions, like this:
+  //  [ START
+  //    [ Free      #0 ] -> Large space to allocate functions from.
+  //    [ Allocated #1 ] -> Tiny space to separate regions.
+  //    [ Free      #2 ] -> Tiny space so there is always at least 1 free block.
+  //    [ Allocated #3 ] -> Tiny space to prevent looking past end of block.
+  //  END ]
+  //
+  // The last three blocks are never deallocated or touched.
+
+  // Add MemoryRangeHeader to the end of the memory region, indicating that
+  // the space after the block of memory is allocated.  This is block #3.
+  MemoryRangeHeader *Mem3 = (MemoryRangeHeader*)(MemBase+MemBlock.size())-1;
+  Mem3->ThisAllocated = 1;
+  Mem3->PrevAllocated = 0;
+  Mem3->BlockSize     = sizeof(MemoryRangeHeader);
+
+  /// Add a tiny free region so that the free list always has one entry.
+  FreeRangeHeader *Mem2 =
+    (FreeRangeHeader *)(((char*)Mem3)-FreeRangeHeader::getMinBlockSize());
+  Mem2->ThisAllocated = 0;
+  Mem2->PrevAllocated = 1;
+  Mem2->BlockSize     = FreeRangeHeader::getMinBlockSize();
+  Mem2->SetEndOfBlockSizeMarker();
+  Mem2->Prev = Mem2;   // Mem2 *is* the free list for now.
+  Mem2->Next = Mem2;
+
+  /// Add a tiny allocated region so that Mem2 is never coalesced away.
+  MemoryRangeHeader *Mem1 = (MemoryRangeHeader*)Mem2-1;
+  Mem1->ThisAllocated = 1;
+  Mem1->PrevAllocated = 0;
+  Mem1->BlockSize     = sizeof(MemoryRangeHeader);
+
+  // Add a FreeRangeHeader to the start of the function body region, indicating
+  // that the space is free.  Mark the previous block allocated so we never look
+  // at it.
+  FreeRangeHeader *Mem0 = (FreeRangeHeader*)MemBase;
+  Mem0->ThisAllocated = 0;
+  Mem0->PrevAllocated = 1;
+  Mem0->BlockSize = (char*)Mem1-(char*)Mem0;
+  Mem0->SetEndOfBlockSizeMarker();
+  Mem0->AddToFreeList(Mem2);
+
+  // Start out with the freelist pointing to Mem0.
+  FreeMemoryList = Mem0;
+
+  GOTBase = NULL;
+}
+
+void DefaultJITMemoryManager::AllocateGOT() {
+  assert(GOTBase == 0 && "Cannot allocate the got multiple times");
+  GOTBase = new uint8_t[sizeof(void*) * 8192];
+  HasGOT = true;
+}
+
+DefaultJITMemoryManager::~DefaultJITMemoryManager() {
+  for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
+    sys::Memory::ReleaseRWX(CodeSlabs[i]);
+
+  delete[] GOTBase;
+}
+
+sys::MemoryBlock DefaultJITMemoryManager::allocateNewSlab(size_t size) {
+  // Allocate a new block close to the last one.
+  std::string ErrMsg;
+  sys::MemoryBlock *LastSlabPtr = LastSlab.base() ? &LastSlab : 0;
+  sys::MemoryBlock B = sys::Memory::AllocateRWX(size, LastSlabPtr, &ErrMsg);
+  if (B.base() == 0) {
+    report_fatal_error("Allocation failed when allocating new memory in the"
+                       " JIT\n" + Twine(ErrMsg));
+  }
+  LastSlab = B;
+  ++NumSlabs;
+  // Initialize the slab to garbage when debugging.
+  if (PoisonMemory) {
+    memset(B.base(), 0xCD, B.size());
+  }
+  return B;
+}
+
+/// CheckInvariants - For testing only.  Return "" if all internal invariants
+/// are preserved, and a helpful error message otherwise.  For free and
+/// allocated blocks, make sure that adding BlockSize gives a valid block.
+/// For free blocks, make sure they're in the free list and that their end of
+/// block size marker is correct.  This function should return an error before
+/// accessing bad memory.  This function is defined here instead of in
+/// JITMemoryManagerTest.cpp so that we don't have to expose all of the
+/// implementation details of DefaultJITMemoryManager.
+bool DefaultJITMemoryManager::CheckInvariants(std::string &ErrorStr) {
+  raw_string_ostream Err(ErrorStr);
+
+  // Construct a the set of FreeRangeHeader pointers so we can query it
+  // efficiently.
+  llvm::SmallPtrSet<MemoryRangeHeader*, 16> FreeHdrSet;
+  FreeRangeHeader* FreeHead = FreeMemoryList;
+  FreeRangeHeader* FreeRange = FreeHead;
+
+  do {
+    // Check that the free range pointer is in the blocks we've allocated.
+    bool Found = false;
+    for (std::vector<sys::MemoryBlock>::iterator I = CodeSlabs.begin(),
+         E = CodeSlabs.end(); I != E && !Found; ++I) {
+      char *Start = (char*)I->base();
+      char *End = Start + I->size();
+      Found = (Start <= (char*)FreeRange && (char*)FreeRange < End);
+    }
+    if (!Found) {
+      Err << "Corrupt free list; points to " << FreeRange;
+      return false;
+    }
+
+    if (FreeRange->Next->Prev != FreeRange) {
+      Err << "Next and Prev pointers do not match.";
+      return false;
+    }
+
+    // Otherwise, add it to the set.
+    FreeHdrSet.insert(FreeRange);
+    FreeRange = FreeRange->Next;
+  } while (FreeRange != FreeHead);
+
+  // Go over each block, and look at each MemoryRangeHeader.
+  for (std::vector<sys::MemoryBlock>::iterator I = CodeSlabs.begin(),
+       E = CodeSlabs.end(); I != E; ++I) {
+    char *Start = (char*)I->base();
+    char *End = Start + I->size();
+
+    // Check each memory range.
+    for (MemoryRangeHeader *Hdr = (MemoryRangeHeader*)Start, *LastHdr = NULL;
+         Start <= (char*)Hdr && (char*)Hdr < End;
+         Hdr = &Hdr->getBlockAfter()) {
+      if (Hdr->ThisAllocated == 0) {
+        // Check that this range is in the free list.
+        if (!FreeHdrSet.count(Hdr)) {
+          Err << "Found free header at " << Hdr << " that is not in free list.";
+          return false;
+        }
+
+        // Now make sure the size marker at the end of the block is correct.
+        uintptr_t *Marker = ((uintptr_t*)&Hdr->getBlockAfter()) - 1;
+        if (!(Start <= (char*)Marker && (char*)Marker < End)) {
+          Err << "Block size in header points out of current MemoryBlock.";
+          return false;
+        }
+        if (Hdr->BlockSize != *Marker) {
+          Err << "End of block size marker (" << *Marker << ") "
+              << "and BlockSize (" << Hdr->BlockSize << ") don't match.";
+          return false;
+        }
+      }
+
+      if (LastHdr && LastHdr->ThisAllocated != Hdr->PrevAllocated) {
+        Err << "Hdr->PrevAllocated (" << Hdr->PrevAllocated << ") != "
+            << "LastHdr->ThisAllocated (" << LastHdr->ThisAllocated << ")";
+        return false;
+      } else if (!LastHdr && !Hdr->PrevAllocated) {
+        Err << "The first header should have PrevAllocated true.";
+        return false;
+      }
+
+      // Remember the last header.
+      LastHdr = Hdr;
+    }
+  }
+
+  // All invariants are preserved.
+  return true;
+}
+
+JITMemoryManager *JITMemoryManager::CreateDefaultMemManager() {
+  return new DefaultJITMemoryManager();
+}
+
+// Allocate memory for code in 512K slabs.
+const size_t DefaultJITMemoryManager::DefaultCodeSlabSize = 512 * 1024;
+
+// Allocate globals and stubs in slabs of 64K.  (probably 16 pages)
+const size_t DefaultJITMemoryManager::DefaultSlabSize = 64 * 1024;
+
+// Waste at most 16K at the end of each bump slab.  (probably 4 pages)
+const size_t DefaultJITMemoryManager::DefaultSizeThreshold = 16 * 1024;
diff --git a/final/lib/ExecutionEngine/JIT/Makefile b/final/lib/ExecutionEngine/JIT/Makefile
new file mode 100644
index 00000000000..aafa3d9d420
--- /dev/null
+++ b/final/lib/ExecutionEngine/JIT/Makefile
@@ -0,0 +1,38 @@
+##===- lib/ExecutionEngine/JIT/Makefile --------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMJIT
+
+# Get the $(ARCH) setting
+include $(LEVEL)/Makefile.config
+
+# Enable the X86 JIT if compiling on X86
+ifeq ($(ARCH), x86)
+  ENABLE_X86_JIT = 1
+endif
+
+# This flag can also be used on the command line to force inclusion
+# of the X86 JIT on non-X86 hosts
+ifdef ENABLE_X86_JIT
+  CPPFLAGS += -DENABLE_X86_JIT
+endif
+
+# Enable the Sparc JIT if compiling on Sparc
+ifeq ($(ARCH), Sparc)
+  ENABLE_SPARC_JIT = 1
+endif
+
+# This flag can also be used on the command line to force inclusion
+# of the Sparc JIT on non-Sparc hosts
+ifdef ENABLE_SPARC_JIT
+  CPPFLAGS += -DENABLE_SPARC_JIT
+endif
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/final/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
new file mode 100644
index 00000000000..670fa7da1fe
--- /dev/null
+++ b/final/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
@@ -0,0 +1,192 @@
+//===-- OProfileJITEventListener.cpp - Tell OProfile about JITted code ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object that calls into OProfile to tell
+// it about JITted functions.  For now, we only record function names and sizes,
+// but eventually we'll also record line number information.
+//
+// See http://oprofile.sourceforge.net/doc/devel/jit-interface.html for the
+// definition of the interface we're using.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "oprofile-jit-event-listener"
+#include "llvm/Function.h"
+#include "llvm/Metadata.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Config/config.h"
+#include <stddef.h>
+using namespace llvm;
+
+#if USE_OPROFILE
+
+#include <opagent.h>
+
+namespace {
+
+class OProfileJITEventListener : public JITEventListener {
+  op_agent_t Agent;
+public:
+  OProfileJITEventListener();
+  ~OProfileJITEventListener();
+
+  virtual void NotifyFunctionEmitted(const Function &F,
+                                     void *FnStart, size_t FnSize,
+                                     const EmittedFunctionDetails &Details);
+  virtual void NotifyFreeingMachineCode(void *OldPtr);
+};
+
+OProfileJITEventListener::OProfileJITEventListener()
+    : Agent(op_open_agent()) {
+  if (Agent == NULL) {
+    const std::string err_str = sys::StrError();
+    DEBUG(dbgs() << "Failed to connect to OProfile agent: " << err_str << "\n");
+  } else {
+    DEBUG(dbgs() << "Connected to OProfile agent.\n");
+  }
+}
+
+OProfileJITEventListener::~OProfileJITEventListener() {
+  if (Agent != NULL) {
+    if (op_close_agent(Agent) == -1) {
+      const std::string err_str = sys::StrError();
+      DEBUG(dbgs() << "Failed to disconnect from OProfile agent: "
+                   << err_str << "\n");
+    } else {
+      DEBUG(dbgs() << "Disconnected from OProfile agent.\n");
+    }
+  }
+}
+
+class FilenameCache {
+  // Holds the filename of each Scope, so that we can pass a null-terminated
+  // string into oprofile.  Use an AssertingVH rather than a ValueMap because we
+  // shouldn't be modifying any MDNodes while this map is alive.
+  DenseMap<AssertingVH<MDNode>, std::string> Filenames;
+
+ public:
+  const char *getFilename(MDNode *Scope) {
+    std::string &Filename = Filenames[Scope];
+    if (Filename.empty()) {
+      Filename = DIScope(Scope).getFilename();
+    }
+    return Filename.c_str();
+  }
+};
+
+static debug_line_info LineStartToOProfileFormat(
+    const MachineFunction &MF, FilenameCache &Filenames,
+    uintptr_t Address, DebugLoc Loc) {
+  debug_line_info Result;
+  Result.vma = Address;
+  Result.lineno = Loc.getLine();
+  Result.filename = Filenames.getFilename(
+    Loc.getScope(MF.getFunction()->getContext()));
+  DEBUG(dbgs() << "Mapping " << reinterpret_cast<void*>(Result.vma) << " to "
+               << Result.filename << ":" << Result.lineno << "\n");
+  return Result;
+}
+
+// Adds the just-emitted function to the symbol table.
+void OProfileJITEventListener::NotifyFunctionEmitted(
+    const Function &F, void *FnStart, size_t FnSize,
+    const EmittedFunctionDetails &Details) {
+  assert(F.hasName() && FnStart != 0 && "Bad symbol to add");
+  if (op_write_native_code(Agent, F.getName().data(),
+                           reinterpret_cast<uint64_t>(FnStart),
+                           FnStart, FnSize) == -1) {
+    DEBUG(dbgs() << "Failed to tell OProfile about native function " 
+          << F.getName() << " at [" 
+          << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
+    return;
+  }
+
+  if (!Details.LineStarts.empty()) {
+    // Now we convert the line number information from the address/DebugLoc
+    // format in Details to the address/filename/lineno format that OProfile
+    // expects.  Note that OProfile 0.9.4 has a bug that causes it to ignore
+    // line numbers for addresses above 4G.
+    FilenameCache Filenames;
+    std::vector<debug_line_info> LineInfo;
+    LineInfo.reserve(1 + Details.LineStarts.size());
+
+    DebugLoc FirstLoc = Details.LineStarts[0].Loc;
+    assert(!FirstLoc.isUnknown()
+           && "LineStarts should not contain unknown DebugLocs");
+    MDNode *FirstLocScope = FirstLoc.getScope(F.getContext());
+    DISubprogram FunctionDI = getDISubprogram(FirstLocScope);
+    if (FunctionDI.Verify()) {
+      // If we have debug info for the function itself, use that as the line
+      // number of the first several instructions.  Otherwise, after filling
+      // LineInfo, we'll adjust the address of the first line number to point at
+      // the start of the function.
+      debug_line_info line_info;
+      line_info.vma = reinterpret_cast<uintptr_t>(FnStart);
+      line_info.lineno = FunctionDI.getLineNumber();
+      line_info.filename = Filenames.getFilename(FirstLocScope);
+      LineInfo.push_back(line_info);
+    }
+
+    for (std::vector<EmittedFunctionDetails::LineStart>::const_iterator
+           I = Details.LineStarts.begin(), E = Details.LineStarts.end();
+         I != E; ++I) {
+      LineInfo.push_back(LineStartToOProfileFormat(
+                           *Details.MF, Filenames, I->Address, I->Loc));
+    }
+
+    // In case the function didn't have line info of its own, adjust the first
+    // line info's address to include the start of the function.
+    LineInfo[0].vma = reinterpret_cast<uintptr_t>(FnStart);
+
+    if (op_write_debug_line_info(Agent, FnStart,
+                                 LineInfo.size(), &*LineInfo.begin()) == -1) {
+      DEBUG(dbgs() 
+            << "Failed to tell OProfile about line numbers for native function "
+            << F.getName() << " at [" 
+            << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n");
+    }
+  }
+}
+
+// Removes the being-deleted function from the symbol table.
+void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) {
+  assert(FnStart && "Invalid function pointer");
+  if (op_unload_native_code(Agent, reinterpret_cast<uint64_t>(FnStart)) == -1) {
+    DEBUG(dbgs()
+          << "Failed to tell OProfile about unload of native function at "
+          << FnStart << "\n");
+  }
+}
+
+}  // anonymous namespace.
+
+namespace llvm {
+JITEventListener *createOProfileJITEventListener() {
+  return new OProfileJITEventListener;
+}
+}
+
+#else  // USE_OPROFILE
+
+namespace llvm {
+// By defining this to return NULL, we can let clients call it unconditionally,
+// even if they haven't configured with the OProfile libraries.
+JITEventListener *createOProfileJITEventListener() {
+  return NULL;
+}
+}  // namespace llvm
+
+#endif  // USE_OPROFILE
diff --git a/final/lib/ExecutionEngine/JIT/TargetSelect.cpp b/final/lib/ExecutionEngine/JIT/TargetSelect.cpp
new file mode 100644
index 00000000000..6b7173cece1
--- /dev/null
+++ b/final/lib/ExecutionEngine/JIT/TargetSelect.cpp
@@ -0,0 +1,91 @@
+//===-- TargetSelect.cpp - Target Chooser Code ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This just asks the TargetRegistry for the appropriate JIT to use, and allows
+// the user to specify a specific one on the commandline with -march=x. Clients
+// should initialize targets prior to calling createJIT.
+//
+//===----------------------------------------------------------------------===//
+
+#include "JIT.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Target/SubtargetFeature.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+/// selectTarget - Pick a target either via -march or by guessing the native
+/// arch.  Add any CPU features specified via -mcpu or -mattr.
+TargetMachine *JIT::selectTarget(Module *Mod,
+                                 StringRef MArch,
+                                 StringRef MCPU,
+                                 const SmallVectorImpl<std::string>& MAttrs,
+                                 std::string *ErrorStr) {
+  Triple TheTriple(Mod->getTargetTriple());
+  if (TheTriple.getTriple().empty())
+    TheTriple.setTriple(sys::getHostTriple());
+
+  // Adjust the triple to match what the user requested.
+  const Target *TheTarget = 0;
+  if (!MArch.empty()) {
+    for (TargetRegistry::iterator it = TargetRegistry::begin(),
+           ie = TargetRegistry::end(); it != ie; ++it) {
+      if (MArch == it->getName()) {
+        TheTarget = &*it;
+        break;
+      }
+    }
+
+    if (!TheTarget) {
+      *ErrorStr = "No available targets are compatible with this -march, "
+        "see -version for the available targets.\n";
+      return 0;
+    }
+
+    // Adjust the triple to match (if known), otherwise stick with the
+    // module/host triple.
+    Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
+    if (Type != Triple::UnknownArch)
+      TheTriple.setArch(Type);
+  } else {
+    std::string Error;
+    TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), Error);
+    if (TheTarget == 0) {
+      if (ErrorStr)
+        *ErrorStr = Error;
+      return 0;
+    }
+  }
+
+  if (!TheTarget->hasJIT()) {
+    errs() << "WARNING: This target JIT is not designed for the host you are"
+           << " running.  If bad things happen, please choose a different "
+           << "-march switch.\n";
+  }
+
+  // Package up features to be passed to target/subtarget
+  std::string FeaturesStr;
+  if (!MCPU.empty() || !MAttrs.empty()) {
+    SubtargetFeatures Features;
+    Features.setCPU(MCPU);
+    for (unsigned i = 0; i != MAttrs.size(); ++i)
+      Features.AddFeature(MAttrs[i]);
+    FeaturesStr = Features.getString();
+  }
+
+  // Allocate a target...
+  TargetMachine *Target = 
+    TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr);
+  assert(Target && "Could not allocate target machine!");
+  return Target;
+}
diff --git a/final/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/final/lib/ExecutionEngine/MCJIT/CMakeLists.txt
new file mode 100644
index 00000000000..f7ed176fef7
--- /dev/null
+++ b/final/lib/ExecutionEngine/MCJIT/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMCJIT
+  MCJIT.cpp
+  TargetSelect.cpp
+  )
diff --git a/final/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/final/lib/ExecutionEngine/MCJIT/MCJIT.cpp
new file mode 100644
index 00000000000..f1e9dab250b
--- /dev/null
+++ b/final/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -0,0 +1,92 @@
+//===-- JIT.cpp - MC-based Just-in-Time Compiler --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCJIT.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/MCJIT.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/DynamicLibrary.h"
+
+using namespace llvm;
+
+namespace {
+
+static struct RegisterJIT {
+  RegisterJIT() { MCJIT::Register(); }
+} JITRegistrator;
+
+}
+
+extern "C" void LLVMLinkInMCJIT() {
+}
+
+ExecutionEngine *MCJIT::createJIT(Module *M,
+                                  std::string *ErrorStr,
+                                  JITMemoryManager *JMM,
+                                  CodeGenOpt::Level OptLevel,
+                                  bool GVsWithCode,
+                                  CodeModel::Model CMM,
+                                  StringRef MArch,
+                                  StringRef MCPU,
+                                  const SmallVectorImpl<std::string>& MAttrs) {
+  // Try to register the program as a source of symbols to resolve against.
+  //
+  // FIXME: Don't do this here.
+  sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
+
+  // Pick a target either via -march or by guessing the native arch.
+  //
+  // FIXME: This should be lifted out of here, it isn't something which should
+  // be part of the JIT policy, rather the burden for this selection should be
+  // pushed to clients.
+  TargetMachine *TM = MCJIT::selectTarget(M, MArch, MCPU, MAttrs, ErrorStr);
+  if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
+  TM->setCodeModel(CMM);
+
+  // If the target supports JIT code generation, create the JIT.
+  if (TargetJITInfo *TJ = TM->getJITInfo())
+    return new MCJIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode);
+
+  if (ErrorStr)
+    *ErrorStr = "target does not support JIT code generation";
+  return 0;
+}
+
+MCJIT::MCJIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
+             JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
+             bool AllocateGVsWithCode)
+  : ExecutionEngine(M) {
+}
+
+MCJIT::~MCJIT() {
+}
+
+void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
+  report_fatal_error("not yet implemented");
+  return 0;
+}
+
+void *MCJIT::getPointerToFunction(Function *F) {
+  report_fatal_error("not yet implemented");
+  return 0;
+}
+
+void *MCJIT::recompileAndRelinkFunction(Function *F) {
+  report_fatal_error("not yet implemented");
+}
+
+void MCJIT::freeMachineCodeForFunction(Function *F) {
+  report_fatal_error("not yet implemented");
+}
+
+GenericValue MCJIT::runFunction(Function *F,
+                                const std::vector<GenericValue> &ArgValues) {
+  report_fatal_error("not yet implemented");
+  return GenericValue();
+}
diff --git a/final/lib/ExecutionEngine/MCJIT/MCJIT.h b/final/lib/ExecutionEngine/MCJIT/MCJIT.h
new file mode 100644
index 00000000000..cd1f989b10c
--- /dev/null
+++ b/final/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -0,0 +1,68 @@
+//===-- MCJIT.h - Class definition for the MCJIT ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_H
+#define LLVM_LIB_EXECUTIONENGINE_MCJIT_H
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+
+namespace llvm {
+
+class MCJIT : public ExecutionEngine {
+  MCJIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
+        JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
+        bool AllocateGVsWithCode);
+public:
+  ~MCJIT();
+
+  /// @name ExecutionEngine interface implementation
+  /// @{
+
+  virtual void *getPointerToBasicBlock(BasicBlock *BB);
+
+  virtual void *getPointerToFunction(Function *F);
+
+  virtual void *recompileAndRelinkFunction(Function *F);
+
+  virtual void freeMachineCodeForFunction(Function *F);
+
+  virtual GenericValue runFunction(Function *F,
+                                   const std::vector<GenericValue> &ArgValues);
+
+  /// @}
+  /// @name (Private) Registration Interfaces
+  /// @{
+
+  static void Register() {
+    MCJITCtor = createJIT;
+  }
+
+  // FIXME: This routine is scheduled for termination. Do not use it.
+  static TargetMachine *selectTarget(Module *M,
+                                     StringRef MArch,
+                                     StringRef MCPU,
+                                     const SmallVectorImpl<std::string>& MAttrs,
+                                     std::string *Err);
+
+  static ExecutionEngine *createJIT(Module *M,
+                                    std::string *ErrorStr,
+                                    JITMemoryManager *JMM,
+                                    CodeGenOpt::Level OptLevel,
+                                    bool GVsWithCode,
+                                    CodeModel::Model CMM,
+                                    StringRef MArch,
+                                    StringRef MCPU,
+                                    const SmallVectorImpl<std::string>& MAttrs);
+
+  // @}
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/ExecutionEngine/MCJIT/Makefile b/final/lib/ExecutionEngine/MCJIT/Makefile
new file mode 100644
index 00000000000..967efbc0efa
--- /dev/null
+++ b/final/lib/ExecutionEngine/MCJIT/Makefile
@@ -0,0 +1,13 @@
+##===- lib/ExecutionEngine/MCJIT/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMMCJIT
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/ExecutionEngine/MCJIT/TargetSelect.cpp b/final/lib/ExecutionEngine/MCJIT/TargetSelect.cpp
new file mode 100644
index 00000000000..50f65938bb0
--- /dev/null
+++ b/final/lib/ExecutionEngine/MCJIT/TargetSelect.cpp
@@ -0,0 +1,91 @@
+//===-- TargetSelect.cpp - Target Chooser Code ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This just asks the TargetRegistry for the appropriate JIT to use, and allows
+// the user to specify a specific one on the commandline with -march=x. Clients
+// should initialize targets prior to calling createJIT.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCJIT.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Target/SubtargetFeature.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+/// selectTarget - Pick a target either via -march or by guessing the native
+/// arch.  Add any CPU features specified via -mcpu or -mattr.
+TargetMachine *MCJIT::selectTarget(Module *Mod,
+                                 StringRef MArch,
+                                 StringRef MCPU,
+                                 const SmallVectorImpl<std::string>& MAttrs,
+                                 std::string *ErrorStr) {
+  Triple TheTriple(Mod->getTargetTriple());
+  if (TheTriple.getTriple().empty())
+    TheTriple.setTriple(sys::getHostTriple());
+
+  // Adjust the triple to match what the user requested.
+  const Target *TheTarget = 0;
+  if (!MArch.empty()) {
+    for (TargetRegistry::iterator it = TargetRegistry::begin(),
+           ie = TargetRegistry::end(); it != ie; ++it) {
+      if (MArch == it->getName()) {
+        TheTarget = &*it;
+        break;
+      }
+    }
+
+    if (!TheTarget) {
+      *ErrorStr = "No available targets are compatible with this -march, "
+        "see -version for the available targets.\n";
+      return 0;
+    }
+
+    // Adjust the triple to match (if known), otherwise stick with the
+    // module/host triple.
+    Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
+    if (Type != Triple::UnknownArch)
+      TheTriple.setArch(Type);
+  } else {
+    std::string Error;
+    TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), Error);
+    if (TheTarget == 0) {
+      if (ErrorStr)
+        *ErrorStr = Error;
+      return 0;
+    }
+  }
+
+  if (!TheTarget->hasJIT()) {
+    errs() << "WARNING: This target JIT is not designed for the host you are"
+           << " running.  If bad things happen, please choose a different "
+           << "-march switch.\n";
+  }
+
+  // Package up features to be passed to target/subtarget
+  std::string FeaturesStr;
+  if (!MCPU.empty() || !MAttrs.empty()) {
+    SubtargetFeatures Features;
+    Features.setCPU(MCPU);
+    for (unsigned i = 0; i != MAttrs.size(); ++i)
+      Features.AddFeature(MAttrs[i]);
+    FeaturesStr = Features.getString();
+  }
+
+  // Allocate a target...
+  TargetMachine *Target =
+    TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr);
+  assert(Target && "Could not allocate target machine!");
+  return Target;
+}
diff --git a/final/lib/ExecutionEngine/Makefile b/final/lib/ExecutionEngine/Makefile
new file mode 100644
index 00000000000..1858d776616
--- /dev/null
+++ b/final/lib/ExecutionEngine/Makefile
@@ -0,0 +1,13 @@
+##===- lib/ExecutionEngine/Makefile ------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+LIBRARYNAME = LLVMExecutionEngine
+PARALLEL_DIRS = Interpreter JIT MCJIT
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Linker/CMakeLists.txt b/final/lib/Linker/CMakeLists.txt
new file mode 100644
index 00000000000..0b6d2f4218e
--- /dev/null
+++ b/final/lib/Linker/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_library(LLVMLinker
+  LinkArchives.cpp
+  LinkItems.cpp
+  LinkModules.cpp
+  Linker.cpp
+  )
diff --git a/final/lib/Linker/LinkArchives.cpp b/final/lib/Linker/LinkArchives.cpp
new file mode 100644
index 00000000000..2c4ed7fdc17
--- /dev/null
+++ b/final/lib/Linker/LinkArchives.cpp
@@ -0,0 +1,198 @@
+//===- lib/Linker/LinkArchives.cpp - Link LLVM objects and libraries ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines to handle linking together LLVM bitcode files,
+// and to handle annoying things like static libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/Bitcode/Archive.h"
+#include "llvm/Config/config.h"
+#include <memory>
+#include <set>
+using namespace llvm;
+
+/// GetAllUndefinedSymbols - calculates the set of undefined symbols that still
+/// exist in an LLVM module. This is a bit tricky because there may be two
+/// symbols with the same name but different LLVM types that will be resolved to
+/// each other but aren't currently (thus we need to treat it as resolved).
+///
+/// Inputs:
+///  M - The module in which to find undefined symbols.
+///
+/// Outputs:
+///  UndefinedSymbols - A set of C++ strings containing the name of all
+///                     undefined symbols.
+///
+static void
+GetAllUndefinedSymbols(Module *M, std::set<std::string> &UndefinedSymbols) {
+  std::set<std::string> DefinedSymbols;
+  UndefinedSymbols.clear();
+
+  // If the program doesn't define a main, try pulling one in from a .a file.
+  // This is needed for programs where the main function is defined in an
+  // archive, such f2c'd programs.
+  Function *Main = M->getFunction("main");
+  if (Main == 0 || Main->isDeclaration())
+    UndefinedSymbols.insert("main");
+
+  for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
+    if (I->hasName()) {
+      if (I->isDeclaration())
+        UndefinedSymbols.insert(I->getName());
+      else if (!I->hasLocalLinkage()) {
+        assert(!I->hasDLLImportLinkage()
+               && "Found dllimported non-external symbol!");
+        DefinedSymbols.insert(I->getName());
+      }      
+    }
+
+  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I)
+    if (I->hasName()) {
+      if (I->isDeclaration())
+        UndefinedSymbols.insert(I->getName());
+      else if (!I->hasLocalLinkage()) {
+        assert(!I->hasDLLImportLinkage()
+               && "Found dllimported non-external symbol!");
+        DefinedSymbols.insert(I->getName());
+      }      
+    }
+
+  for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    if (I->hasName())
+      DefinedSymbols.insert(I->getName());
+
+  // Prune out any defined symbols from the undefined symbols set...
+  for (std::set<std::string>::iterator I = UndefinedSymbols.begin();
+       I != UndefinedSymbols.end(); )
+    if (DefinedSymbols.count(*I))
+      UndefinedSymbols.erase(I++);  // This symbol really is defined!
+    else
+      ++I; // Keep this symbol in the undefined symbols list
+}
+
+/// LinkInArchive - opens an archive library and link in all objects which
+/// provide symbols that are currently undefined.
+///
+/// Inputs:
+///  Filename - The pathname of the archive.
+///
+/// Return Value:
+///  TRUE  - An error occurred.
+///  FALSE - No errors.
+bool
+Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
+  // Make sure this is an archive file we're dealing with
+  if (!Filename.isArchive())
+    return error("File '" + Filename.str() + "' is not an archive.");
+
+  // Open the archive file
+  verbose("Linking archive file '" + Filename.str() + "'");
+
+  // Find all of the symbols currently undefined in the bitcode program.
+  // If all the symbols are defined, the program is complete, and there is
+  // no reason to link in any archive files.
+  std::set<std::string> UndefinedSymbols;
+  GetAllUndefinedSymbols(Composite, UndefinedSymbols);
+
+  if (UndefinedSymbols.empty()) {
+    verbose("No symbols undefined, skipping library '" + Filename.str() + "'");
+    return false;  // No need to link anything in!
+  }
+
+  std::string ErrMsg;
+  std::auto_ptr<Archive> AutoArch (
+    Archive::OpenAndLoadSymbols(Filename, Context, &ErrMsg));
+
+  Archive* arch = AutoArch.get();
+
+  if (!arch)
+    return error("Cannot read archive '" + Filename.str() +
+                 "': " + ErrMsg);
+  if (!arch->isBitcodeArchive()) {
+    is_native = true;
+    return false;
+  }
+  is_native = false;
+
+  // Save a set of symbols that are not defined by the archive. Since we're
+  // entering a loop, there's no point searching for these multiple times. This
+  // variable is used to "set_subtract" from the set of undefined symbols.
+  std::set<std::string> NotDefinedByArchive;
+
+  // Save the current set of undefined symbols, because we may have to make
+  // multiple passes over the archive:
+  std::set<std::string> CurrentlyUndefinedSymbols;
+
+  do {
+    CurrentlyUndefinedSymbols = UndefinedSymbols;
+
+    // Find the modules we need to link into the target module.  Note that arch
+    // keeps ownership of these modules and may return the same Module* from a
+    // subsequent call.
+    std::set<Module*> Modules;
+    if (!arch->findModulesDefiningSymbols(UndefinedSymbols, Modules, &ErrMsg))
+      return error("Cannot find symbols in '" + Filename.str() + 
+                   "': " + ErrMsg);
+
+    // If we didn't find any more modules to link this time, we are done
+    // searching this archive.
+    if (Modules.empty())
+      break;
+
+    // Any symbols remaining in UndefinedSymbols after
+    // findModulesDefiningSymbols are ones that the archive does not define. So
+    // we add them to the NotDefinedByArchive variable now.
+    NotDefinedByArchive.insert(UndefinedSymbols.begin(),
+        UndefinedSymbols.end());
+
+    // Loop over all the Modules that we got back from the archive
+    for (std::set<Module*>::iterator I=Modules.begin(), E=Modules.end();
+         I != E; ++I) {
+
+      // Get the module we must link in.
+      std::string moduleErrorMsg;
+      Module* aModule = *I;
+      if (aModule != NULL) {
+        if (aModule->MaterializeAll(&moduleErrorMsg))
+          return error("Could not load a module: " + moduleErrorMsg);
+
+        verbose("  Linking in module: " + aModule->getModuleIdentifier());
+
+        // Link it in
+        if (LinkInModule(aModule, &moduleErrorMsg))
+          return error("Cannot link in module '" +
+                       aModule->getModuleIdentifier() + "': " + moduleErrorMsg);
+      } 
+    }
+    
+    // Get the undefined symbols from the aggregate module. This recomputes the
+    // symbols we still need after the new modules have been linked in.
+    GetAllUndefinedSymbols(Composite, UndefinedSymbols);
+
+    // At this point we have two sets of undefined symbols: UndefinedSymbols
+    // which holds the undefined symbols from all the modules, and
+    // NotDefinedByArchive which holds symbols we know the archive doesn't
+    // define. There's no point searching for symbols that we won't find in the
+    // archive so we subtract these sets.
+    set_subtract(UndefinedSymbols, NotDefinedByArchive);
+
+    // If there's no symbols left, no point in continuing to search the
+    // archive.
+    if (UndefinedSymbols.empty())
+      break;
+  } while (CurrentlyUndefinedSymbols != UndefinedSymbols);
+
+  return false;
+}
diff --git a/final/lib/Linker/LinkItems.cpp b/final/lib/Linker/LinkItems.cpp
new file mode 100644
index 00000000000..52a0d175a5c
--- /dev/null
+++ b/final/lib/Linker/LinkItems.cpp
@@ -0,0 +1,241 @@
+//===- lib/Linker/LinkItems.cpp - Link LLVM objects and libraries ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains routines to handle linking together LLVM bitcode files,
+// and to handle annoying things like static libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Module.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+
+// LinkItems - This function is the main entry point into linking. It takes a
+// list of LinkItem which indicates the order the files should be linked and
+// how each file should be treated (plain file or with library search). The
+// function only links bitcode and produces a result list of items that are
+// native objects. 
+bool
+Linker::LinkInItems(const ItemList& Items, ItemList& NativeItems) {
+  // Clear the NativeItems just in case
+  NativeItems.clear();
+
+  // For each linkage item ...
+  for (ItemList::const_iterator I = Items.begin(), E = Items.end();
+       I != E; ++I) {
+    if (I->second) {
+      // Link in the library suggested.
+      bool is_native = false;
+      if (LinkInLibrary(I->first, is_native))
+        return true;
+      if (is_native)
+        NativeItems.push_back(*I);
+    } else {
+      // Link in the file suggested
+      bool is_native = false;
+      if (LinkInFile(sys::Path(I->first), is_native))
+        return true;
+      if (is_native)
+        NativeItems.push_back(*I);
+    }
+  }
+
+  // At this point we have processed all the link items provided to us. Since
+  // we have an aggregated module at this point, the dependent libraries in
+  // that module should also be aggregated with duplicates eliminated. This is
+  // now the time to process the dependent libraries to resolve any remaining
+  // symbols.
+  bool is_native;
+  for (Module::lib_iterator I = Composite->lib_begin(),
+         E = Composite->lib_end(); I != E; ++I) {
+    if(LinkInLibrary(*I, is_native))
+      return true;
+    if (is_native)
+      NativeItems.push_back(std::make_pair(*I, true));
+  }
+
+  return false;
+}
+
+
+/// LinkInLibrary - links one library into the HeadModule.
+///
+bool Linker::LinkInLibrary(StringRef Lib, bool& is_native) {
+  is_native = false;
+  // Determine where this library lives.
+  sys::Path Pathname = FindLib(Lib);
+  if (Pathname.isEmpty())
+    return error("Cannot find library '" + Lib.str() + "'");
+
+  // If its an archive, try to link it in
+  std::string Magic;
+  Pathname.getMagicNumber(Magic, 64);
+  switch (sys::IdentifyFileType(Magic.c_str(), 64)) {
+    default: llvm_unreachable("Bad file type identification");
+    case sys::Unknown_FileType:
+      return warning("Supposed library '" + Lib.str() + "' isn't a library.");
+
+    case sys::Bitcode_FileType:
+      // LLVM ".so" file.
+      if (LinkInFile(Pathname, is_native))
+        return true;
+      break;
+
+    case sys::Archive_FileType:
+      if (LinkInArchive(Pathname, is_native))
+        return error("Cannot link archive '" + Pathname.str() + "'");
+      break;
+
+    case sys::ELF_Relocatable_FileType:
+    case sys::ELF_SharedObject_FileType:
+    case sys::Mach_O_Object_FileType:
+    case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+    case sys::COFF_FileType:
+      is_native = true;
+      break;
+  }
+  return false;
+}
+
+/// LinkLibraries - takes the specified library files and links them into the
+/// main bitcode object file.
+///
+/// Inputs:
+///  Libraries  - The list of libraries to link into the module.
+///
+/// Return value:
+///  FALSE - No error.
+///  TRUE  - Error.
+///
+bool Linker::LinkInLibraries(const std::vector<std::string> &Libraries) {
+
+  // Process the set of libraries we've been provided.
+  bool is_native = false;
+  for (unsigned i = 0; i < Libraries.size(); ++i)
+    if (LinkInLibrary(Libraries[i], is_native))
+      return true;
+
+  // At this point we have processed all the libraries provided to us. Since
+  // we have an aggregated module at this point, the dependent libraries in
+  // that module should also be aggregated with duplicates eliminated. This is
+  // now the time to process the dependent libraries to resolve any remaining
+  // symbols.
+  const Module::LibraryListType& DepLibs = Composite->getLibraries();
+  for (Module::LibraryListType::const_iterator I = DepLibs.begin(),
+         E = DepLibs.end(); I != E; ++I)
+    if (LinkInLibrary(*I, is_native))
+      return true;
+
+  return false;
+}
+
+/// LinkInFile - opens a bitcode file and links in all objects which
+/// provide symbols that are currently undefined.
+///
+/// Inputs:
+///  File - The pathname of the bitcode file.
+///
+/// Outputs:
+///  ErrorMessage - A C++ string detailing what error occurred, if any.
+///
+/// Return Value:
+///  TRUE  - An error occurred.
+///  FALSE - No errors.
+///
+bool Linker::LinkInFile(const sys::Path &File, bool &is_native) {
+  is_native = false;
+  
+  // Check for a file of name "-", which means "read standard input"
+  if (File.str() == "-") {
+    std::auto_ptr<Module> M;
+    OwningPtr<MemoryBuffer> Buffer;
+    error_code ec;
+    if (!(ec = MemoryBuffer::getSTDIN(Buffer))) {
+      if (!Buffer->getBufferSize()) {
+        Error = "standard input is empty";
+      } else {
+        M.reset(ParseBitcodeFile(Buffer.get(), Context, &Error));
+        if (M.get())
+          if (!LinkInModule(M.get(), &Error))
+            return false;
+      }
+    }
+    return error("Cannot link stdin: " + ec.message());
+  }
+
+  // Determine what variety of file it is.
+  std::string Magic;
+  if (!File.getMagicNumber(Magic, 64))
+    return error("Cannot find linker input '" + File.str() + "'");
+
+  switch (sys::IdentifyFileType(Magic.c_str(), 64)) {
+    default: llvm_unreachable("Bad file type identification");
+    case sys::Unknown_FileType:
+      return warning("Ignoring file '" + File.str() + 
+                   "' because does not contain bitcode.");
+
+    case sys::Archive_FileType:
+      // A user may specify an ar archive without -l, perhaps because it
+      // is not installed as a library. Detect that and link the archive.
+      if (LinkInArchive(File, is_native))
+        return true;
+      break;
+
+    case sys::Bitcode_FileType: {
+      verbose("Linking bitcode file '" + File.str() + "'");
+      std::auto_ptr<Module> M(LoadObject(File));
+      if (M.get() == 0)
+        return error("Cannot load file '" + File.str() + "': " + Error);
+      if (LinkInModule(M.get(), &Error))
+        return error("Cannot link file '" + File.str() + "': " + Error);
+
+      verbose("Linked in file '" + File.str() + "'");
+      break;
+    }
+
+    case sys::ELF_Relocatable_FileType:
+    case sys::ELF_SharedObject_FileType:
+    case sys::Mach_O_Object_FileType:
+    case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+    case sys::COFF_FileType:
+      is_native = true;
+      break;
+  }
+  return false;
+}
+
+/// LinkFiles - takes a module and a list of files and links them all together.
+/// It locates the file either in the current directory, as its absolute
+/// or relative pathname, or as a file somewhere in LLVM_LIB_SEARCH_PATH.
+///
+/// Inputs:
+///  Files      - A vector of sys::Path indicating the LLVM bitcode filenames
+///               to be linked.  The names can refer to a mixture of pure LLVM
+///               bitcode files and archive (ar) formatted files.
+///
+/// Return value:
+///  FALSE - No errors.
+///  TRUE  - Some error occurred.
+///
+bool Linker::LinkInFiles(const std::vector<sys::Path> &Files) {
+  bool is_native;
+  for (unsigned i = 0; i < Files.size(); ++i)
+    if (LinkInFile(Files[i], is_native))
+      return true;
+  return false;
+}
diff --git a/final/lib/Linker/LinkModules.cpp b/final/lib/Linker/LinkModules.cpp
new file mode 100644
index 00000000000..5aa06abdd98
--- /dev/null
+++ b/final/lib/Linker/LinkModules.cpp
@@ -0,0 +1,1301 @@
+//===- lib/Linker/LinkModules.cpp - Module Linker Implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM module linker.
+//
+// Specifically, this:
+//  * Merges global variables between the two modules
+//    * Uninit + Uninit = Init, Init + Uninit = Init, Init + Init = Error if !=
+//  * Merges functions between two modules
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/ADT/DenseMap.h"
+using namespace llvm;
+
+// Error - Simple wrapper function to conditionally assign to E and return true.
+// This just makes error return conditions a little bit simpler...
+static inline bool Error(std::string *E, const Twine &Message) {
+  if (E) *E = Message.str();
+  return true;
+}
+
+// Function: ResolveTypes()
+//
+// Description:
+//  Attempt to link the two specified types together.
+//
+// Inputs:
+//  DestTy - The type to which we wish to resolve.
+//  SrcTy  - The original type which we want to resolve.
+//
+// Outputs:
+//  DestST - The symbol table in which the new type should be placed.
+//
+// Return value:
+//  true  - There is an error and the types cannot yet be linked.
+//  false - No errors.
+//
+static bool ResolveTypes(const Type *DestTy, const Type *SrcTy) {
+  if (DestTy == SrcTy) return false;       // If already equal, noop
+  assert(DestTy && SrcTy && "Can't handle null types");
+
+  if (const OpaqueType *OT = dyn_cast<OpaqueType>(DestTy)) {
+    // Type _is_ in module, just opaque...
+    const_cast<OpaqueType*>(OT)->refineAbstractTypeTo(SrcTy);
+  } else if (const OpaqueType *OT = dyn_cast<OpaqueType>(SrcTy)) {
+    const_cast<OpaqueType*>(OT)->refineAbstractTypeTo(DestTy);
+  } else {
+    return true;  // Cannot link types... not-equal and neither is opaque.
+  }
+  return false;
+}
+
+/// LinkerTypeMap - This implements a map of types that is stable
+/// even if types are resolved/refined to other types.  This is not a general
+/// purpose map, it is specific to the linker's use.
+namespace {
+class LinkerTypeMap : public AbstractTypeUser {
+  typedef DenseMap<const Type*, PATypeHolder> TheMapTy;
+  TheMapTy TheMap;
+
+  LinkerTypeMap(const LinkerTypeMap&); // DO NOT IMPLEMENT
+  void operator=(const LinkerTypeMap&); // DO NOT IMPLEMENT
+public:
+  LinkerTypeMap() {}
+  ~LinkerTypeMap() {
+    for (DenseMap<const Type*, PATypeHolder>::iterator I = TheMap.begin(),
+         E = TheMap.end(); I != E; ++I)
+      I->first->removeAbstractTypeUser(this);
+  }
+
+  /// lookup - Return the value for the specified type or null if it doesn't
+  /// exist.
+  const Type *lookup(const Type *Ty) const {
+    TheMapTy::const_iterator I = TheMap.find(Ty);
+    if (I != TheMap.end()) return I->second;
+    return 0;
+  }
+
+  /// insert - This returns true if the pointer was new to the set, false if it
+  /// was already in the set.
+  bool insert(const Type *Src, const Type *Dst) {
+    if (!TheMap.insert(std::make_pair(Src, PATypeHolder(Dst))).second)
+      return false;  // Already in map.
+    if (Src->isAbstract())
+      Src->addAbstractTypeUser(this);
+    return true;
+  }
+
+protected:
+  /// refineAbstractType - The callback method invoked when an abstract type is
+  /// resolved to another type.  An object must override this method to update
+  /// its internal state to reference NewType instead of OldType.
+  ///
+  virtual void refineAbstractType(const DerivedType *OldTy,
+                                  const Type *NewTy) {
+    TheMapTy::iterator I = TheMap.find(OldTy);
+    const Type *DstTy = I->second;
+
+    TheMap.erase(I);
+    if (OldTy->isAbstract())
+      OldTy->removeAbstractTypeUser(this);
+
+    // Don't reinsert into the map if the key is concrete now.
+    if (NewTy->isAbstract())
+      insert(NewTy, DstTy);
+  }
+
+  /// The other case which AbstractTypeUsers must be aware of is when a type
+  /// makes the transition from being abstract (where it has clients on it's
+  /// AbstractTypeUsers list) to concrete (where it does not).  This method
+  /// notifies ATU's when this occurs for a type.
+  virtual void typeBecameConcrete(const DerivedType *AbsTy) {
+    TheMap.erase(AbsTy);
+    AbsTy->removeAbstractTypeUser(this);
+  }
+
+  // for debugging...
+  virtual void dump() const {
+    dbgs() << "AbstractTypeSet!\n";
+  }
+};
+}
+
+
+// RecursiveResolveTypes - This is just like ResolveTypes, except that it
+// recurses down into derived types, merging the used types if the parent types
+// are compatible.
+static bool RecursiveResolveTypesI(const Type *DstTy, const Type *SrcTy,
+                                   LinkerTypeMap &Pointers) {
+  if (DstTy == SrcTy) return false;       // If already equal, noop
+
+  // If we found our opaque type, resolve it now!
+  if (DstTy->isOpaqueTy() || SrcTy->isOpaqueTy())
+    return ResolveTypes(DstTy, SrcTy);
+
+  // Two types cannot be resolved together if they are of different primitive
+  // type.  For example, we cannot resolve an int to a float.
+  if (DstTy->getTypeID() != SrcTy->getTypeID()) return true;
+
+  // If neither type is abstract, then they really are just different types.
+  if (!DstTy->isAbstract() && !SrcTy->isAbstract())
+    return true;
+
+  // Otherwise, resolve the used type used by this derived type...
+  switch (DstTy->getTypeID()) {
+  default:
+    return true;
+  case Type::FunctionTyID: {
+    const FunctionType *DstFT = cast<FunctionType>(DstTy);
+    const FunctionType *SrcFT = cast<FunctionType>(SrcTy);
+    if (DstFT->isVarArg() != SrcFT->isVarArg() ||
+        DstFT->getNumContainedTypes() != SrcFT->getNumContainedTypes())
+      return true;
+
+    // Use TypeHolder's so recursive resolution won't break us.
+    PATypeHolder ST(SrcFT), DT(DstFT);
+    for (unsigned i = 0, e = DstFT->getNumContainedTypes(); i != e; ++i) {
+      const Type *SE = ST->getContainedType(i), *DE = DT->getContainedType(i);
+      if (SE != DE && RecursiveResolveTypesI(DE, SE, Pointers))
+        return true;
+    }
+    return false;
+  }
+  case Type::StructTyID: {
+    const StructType *DstST = cast<StructType>(DstTy);
+    const StructType *SrcST = cast<StructType>(SrcTy);
+    if (DstST->getNumContainedTypes() != SrcST->getNumContainedTypes())
+      return true;
+
+    PATypeHolder ST(SrcST), DT(DstST);
+    for (unsigned i = 0, e = DstST->getNumContainedTypes(); i != e; ++i) {
+      const Type *SE = ST->getContainedType(i), *DE = DT->getContainedType(i);
+      if (SE != DE && RecursiveResolveTypesI(DE, SE, Pointers))
+        return true;
+    }
+    return false;
+  }
+  case Type::ArrayTyID: {
+    const ArrayType *DAT = cast<ArrayType>(DstTy);
+    const ArrayType *SAT = cast<ArrayType>(SrcTy);
+    if (DAT->getNumElements() != SAT->getNumElements()) return true;
+    return RecursiveResolveTypesI(DAT->getElementType(), SAT->getElementType(),
+                                  Pointers);
+  }
+  case Type::VectorTyID: {
+    const VectorType *DVT = cast<VectorType>(DstTy);
+    const VectorType *SVT = cast<VectorType>(SrcTy);
+    if (DVT->getNumElements() != SVT->getNumElements()) return true;
+    return RecursiveResolveTypesI(DVT->getElementType(), SVT->getElementType(),
+                                  Pointers);
+  }
+  case Type::PointerTyID: {
+    const PointerType *DstPT = cast<PointerType>(DstTy);
+    const PointerType *SrcPT = cast<PointerType>(SrcTy);
+
+    if (DstPT->getAddressSpace() != SrcPT->getAddressSpace())
+      return true;
+
+    // If this is a pointer type, check to see if we have already seen it.  If
+    // so, we are in a recursive branch.  Cut off the search now.  We cannot use
+    // an associative container for this search, because the type pointers (keys
+    // in the container) change whenever types get resolved.
+    if (SrcPT->isAbstract())
+      if (const Type *ExistingDestTy = Pointers.lookup(SrcPT))
+        return ExistingDestTy != DstPT;
+
+    if (DstPT->isAbstract())
+      if (const Type *ExistingSrcTy = Pointers.lookup(DstPT))
+        return ExistingSrcTy != SrcPT;
+    // Otherwise, add the current pointers to the vector to stop recursion on
+    // this pair.
+    if (DstPT->isAbstract())
+      Pointers.insert(DstPT, SrcPT);
+    if (SrcPT->isAbstract())
+      Pointers.insert(SrcPT, DstPT);
+
+    return RecursiveResolveTypesI(DstPT->getElementType(),
+                                  SrcPT->getElementType(), Pointers);
+  }
+  }
+}
+
+static bool RecursiveResolveTypes(const Type *DestTy, const Type *SrcTy) {
+  LinkerTypeMap PointerTypes;
+  return RecursiveResolveTypesI(DestTy, SrcTy, PointerTypes);
+}
+
+
+// LinkTypes - Go through the symbol table of the Src module and see if any
+// types are named in the src module that are not named in the Dst module.
+// Make sure there are no type name conflicts.
+static bool LinkTypes(Module *Dest, const Module *Src, std::string *Err) {
+        TypeSymbolTable *DestST = &Dest->getTypeSymbolTable();
+  const TypeSymbolTable *SrcST  = &Src->getTypeSymbolTable();
+
+  // Look for a type plane for Type's...
+  TypeSymbolTable::const_iterator TI = SrcST->begin();
+  TypeSymbolTable::const_iterator TE = SrcST->end();
+  if (TI == TE) return false;  // No named types, do nothing.
+
+  // Some types cannot be resolved immediately because they depend on other
+  // types being resolved to each other first.  This contains a list of types we
+  // are waiting to recheck.
+  std::vector<std::string> DelayedTypesToResolve;
+
+  for ( ; TI != TE; ++TI ) {
+    const std::string &Name = TI->first;
+    const Type *RHS = TI->second;
+
+    // Check to see if this type name is already in the dest module.
+    Type *Entry = DestST->lookup(Name);
+
+    // If the name is just in the source module, bring it over to the dest.
+    if (Entry == 0) {
+      if (!Name.empty())
+        DestST->insert(Name, const_cast<Type*>(RHS));
+    } else if (ResolveTypes(Entry, RHS)) {
+      // They look different, save the types 'till later to resolve.
+      DelayedTypesToResolve.push_back(Name);
+    }
+  }
+
+  // Iteratively resolve types while we can...
+  while (!DelayedTypesToResolve.empty()) {
+    // Loop over all of the types, attempting to resolve them if possible...
+    unsigned OldSize = DelayedTypesToResolve.size();
+
+    // Try direct resolution by name...
+    for (unsigned i = 0; i != DelayedTypesToResolve.size(); ++i) {
+      const std::string &Name = DelayedTypesToResolve[i];
+      Type *T1 = SrcST->lookup(Name);
+      Type *T2 = DestST->lookup(Name);
+      if (!ResolveTypes(T2, T1)) {
+        // We are making progress!
+        DelayedTypesToResolve.erase(DelayedTypesToResolve.begin()+i);
+        --i;
+      }
+    }
+
+    // Did we not eliminate any types?
+    if (DelayedTypesToResolve.size() == OldSize) {
+      // Attempt to resolve subelements of types.  This allows us to merge these
+      // two types: { int* } and { opaque* }
+      for (unsigned i = 0, e = DelayedTypesToResolve.size(); i != e; ++i) {
+        const std::string &Name = DelayedTypesToResolve[i];
+        if (!RecursiveResolveTypes(SrcST->lookup(Name), DestST->lookup(Name))) {
+          // We are making progress!
+          DelayedTypesToResolve.erase(DelayedTypesToResolve.begin()+i);
+
+          // Go back to the main loop, perhaps we can resolve directly by name
+          // now...
+          break;
+        }
+      }
+
+      // If we STILL cannot resolve the types, then there is something wrong.
+      if (DelayedTypesToResolve.size() == OldSize) {
+        // Remove the symbol name from the destination.
+        DelayedTypesToResolve.pop_back();
+      }
+    }
+  }
+
+
+  return false;
+}
+
+/// ForceRenaming - The LLVM SymbolTable class autorenames globals that conflict
+/// in the symbol table.  This is good for all clients except for us.  Go
+/// through the trouble to force this back.
+static void ForceRenaming(GlobalValue *GV, const std::string &Name) {
+  assert(GV->getName() != Name && "Can't force rename to self");
+  ValueSymbolTable &ST = GV->getParent()->getValueSymbolTable();
+
+  // If there is a conflict, rename the conflict.
+  if (GlobalValue *ConflictGV = cast_or_null<GlobalValue>(ST.lookup(Name))) {
+    assert(ConflictGV->hasLocalLinkage() &&
+           "Not conflicting with a static global, should link instead!");
+    GV->takeName(ConflictGV);
+    ConflictGV->setName(Name);    // This will cause ConflictGV to get renamed
+    assert(ConflictGV->getName() != Name && "ForceRenaming didn't work");
+  } else {
+    GV->setName(Name);              // Force the name back
+  }
+}
+
+/// CopyGVAttributes - copy additional attributes (those not needed to construct
+/// a GlobalValue) from the SrcGV to the DestGV.
+static void CopyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
+  // Use the maximum alignment, rather than just copying the alignment of SrcGV.
+  unsigned Alignment = std::max(DestGV->getAlignment(), SrcGV->getAlignment());
+  DestGV->copyAttributesFrom(SrcGV);
+  DestGV->setAlignment(Alignment);
+}
+
+/// GetLinkageResult - This analyzes the two global values and determines what
+/// the result will look like in the destination module.  In particular, it
+/// computes the resultant linkage type, computes whether the global in the
+/// source should be copied over to the destination (replacing the existing
+/// one), and computes whether this linkage is an error or not. It also performs
+/// visibility checks: we cannot link together two symbols with different
+/// visibilities.
+static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
+                             GlobalValue::LinkageTypes &LT, bool &LinkFromSrc,
+                             std::string *Err) {
+  assert((!Dest || !Src->hasLocalLinkage()) &&
+         "If Src has internal linkage, Dest shouldn't be set!");
+  if (!Dest) {
+    // Linking something to nothing.
+    LinkFromSrc = true;
+    LT = Src->getLinkage();
+  } else if (Src->isDeclaration()) {
+    // If Src is external or if both Src & Dest are external..  Just link the
+    // external globals, we aren't adding anything.
+    if (Src->hasDLLImportLinkage()) {
+      // If one of GVs has DLLImport linkage, result should be dllimport'ed.
+      if (Dest->isDeclaration()) {
+        LinkFromSrc = true;
+        LT = Src->getLinkage();
+      }
+    } else if (Dest->hasExternalWeakLinkage()) {
+      // If the Dest is weak, use the source linkage.
+      LinkFromSrc = true;
+      LT = Src->getLinkage();
+    } else {
+      LinkFromSrc = false;
+      LT = Dest->getLinkage();
+    }
+  } else if (Dest->isDeclaration() && !Dest->hasDLLImportLinkage()) {
+    // If Dest is external but Src is not:
+    LinkFromSrc = true;
+    LT = Src->getLinkage();
+  } else if (Src->hasAppendingLinkage() || Dest->hasAppendingLinkage()) {
+    if (Src->getLinkage() != Dest->getLinkage())
+      return Error(Err, "Linking globals named '" + Src->getName() +
+            "': can only link appending global with another appending global!");
+    LinkFromSrc = true; // Special cased.
+    LT = Src->getLinkage();
+  } else if (Src->isWeakForLinker()) {
+    // At this point we know that Dest has LinkOnce, External*, Weak, Common,
+    // or DLL* linkage.
+    if (Dest->hasExternalWeakLinkage() ||
+        Dest->hasAvailableExternallyLinkage() ||
+        (Dest->hasLinkOnceLinkage() &&
+         (Src->hasWeakLinkage() || Src->hasCommonLinkage()))) {
+      LinkFromSrc = true;
+      LT = Src->getLinkage();
+    } else {
+      LinkFromSrc = false;
+      LT = Dest->getLinkage();
+    }
+  } else if (Dest->isWeakForLinker()) {
+    // At this point we know that Src has External* or DLL* linkage.
+    if (Src->hasExternalWeakLinkage()) {
+      LinkFromSrc = false;
+      LT = Dest->getLinkage();
+    } else {
+      LinkFromSrc = true;
+      LT = GlobalValue::ExternalLinkage;
+    }
+  } else {
+    assert((Dest->hasExternalLinkage() ||
+            Dest->hasDLLImportLinkage() ||
+            Dest->hasDLLExportLinkage() ||
+            Dest->hasExternalWeakLinkage()) &&
+           (Src->hasExternalLinkage() ||
+            Src->hasDLLImportLinkage() ||
+            Src->hasDLLExportLinkage() ||
+            Src->hasExternalWeakLinkage()) &&
+           "Unexpected linkage type!");
+    return Error(Err, "Linking globals named '" + Src->getName() +
+                 "': symbol multiply defined!");
+  }
+
+  // Check visibility
+  if (Dest && Src->getVisibility() != Dest->getVisibility() &&
+      !Src->isDeclaration() && !Dest->isDeclaration() &&
+      !Src->hasAvailableExternallyLinkage() &&
+      !Dest->hasAvailableExternallyLinkage())
+      return Error(Err, "Linking globals named '" + Src->getName() +
+                   "': symbols have different visibilities!");
+  return false;
+}
+
+// Insert all of the named mdnoes in Src into the Dest module.
+static void LinkNamedMDNodes(Module *Dest, Module *Src,
+                             ValueToValueMapTy &ValueMap) {
+  for (Module::const_named_metadata_iterator I = Src->named_metadata_begin(),
+         E = Src->named_metadata_end(); I != E; ++I) {
+    const NamedMDNode *SrcNMD = I;
+    NamedMDNode *DestNMD = Dest->getOrInsertNamedMetadata(SrcNMD->getName());
+    // Add Src elements into Dest node.
+    for (unsigned i = 0, e = SrcNMD->getNumOperands(); i != e; ++i)
+      DestNMD->addOperand(cast<MDNode>(MapValue(SrcNMD->getOperand(i),
+                                                ValueMap)));
+  }
+}
+
+// LinkGlobals - Loop through the global variables in the src module and merge
+// them into the dest module.
+static bool LinkGlobals(Module *Dest, const Module *Src,
+                        ValueToValueMapTy &ValueMap,
+                    std::multimap<std::string, GlobalVariable *> &AppendingVars,
+                        std::string *Err) {
+  ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable();
+
+  // Loop over all of the globals in the src module, mapping them over as we go
+  for (Module::const_global_iterator I = Src->global_begin(),
+       E = Src->global_end(); I != E; ++I) {
+    const GlobalVariable *SGV = I;
+    GlobalValue *DGV = 0;
+
+    // Check to see if may have to link the global with the global, alias or
+    // function.
+    if (SGV->hasName() && !SGV->hasLocalLinkage())
+      DGV = cast_or_null<GlobalValue>(DestSymTab.lookup(SGV->getName()));
+
+    // If we found a global with the same name in the dest module, but it has
+    // internal linkage, we are really not doing any linkage here.
+    if (DGV && DGV->hasLocalLinkage())
+      DGV = 0;
+
+    // If types don't agree due to opaque types, try to resolve them.
+    if (DGV && DGV->getType() != SGV->getType())
+      RecursiveResolveTypes(SGV->getType(), DGV->getType());
+
+    assert((SGV->hasInitializer() || SGV->hasExternalWeakLinkage() ||
+            SGV->hasExternalLinkage() || SGV->hasDLLImportLinkage()) &&
+           "Global must either be external or have an initializer!");
+
+    GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+    bool LinkFromSrc = false;
+    if (GetLinkageResult(DGV, SGV, NewLinkage, LinkFromSrc, Err))
+      return true;
+
+    if (DGV == 0) {
+      // No linking to be performed, simply create an identical version of the
+      // symbol over in the dest module... the initializer will be filled in
+      // later by LinkGlobalInits.
+      GlobalVariable *NewDGV =
+        new GlobalVariable(*Dest, SGV->getType()->getElementType(),
+                           SGV->isConstant(), SGV->getLinkage(), /*init*/0,
+                           SGV->getName(), 0, false,
+                           SGV->getType()->getAddressSpace());
+      // Propagate alignment, visibility and section info.
+      CopyGVAttributes(NewDGV, SGV);
+
+      // If the LLVM runtime renamed the global, but it is an externally visible
+      // symbol, DGV must be an existing global with internal linkage.  Rename
+      // it.
+      if (!NewDGV->hasLocalLinkage() && NewDGV->getName() != SGV->getName())
+        ForceRenaming(NewDGV, SGV->getName());
+
+      // Make sure to remember this mapping.
+      ValueMap[SGV] = NewDGV;
+
+      // Keep track that this is an appending variable.
+      if (SGV->hasAppendingLinkage())
+        AppendingVars.insert(std::make_pair(SGV->getName(), NewDGV));
+      continue;
+    }
+
+    bool HasUnnamedAddr = SGV->hasUnnamedAddr() && DGV->hasUnnamedAddr();
+
+    // If the visibilities of the symbols disagree and the destination is a
+    // prototype, take the visibility of its input.
+    if (DGV->isDeclaration())
+      DGV->setVisibility(SGV->getVisibility());
+
+    if (DGV->hasAppendingLinkage()) {
+      // No linking is performed yet.  Just insert a new copy of the global, and
+      // keep track of the fact that it is an appending variable in the
+      // AppendingVars map.  The name is cleared out so that no linkage is
+      // performed.
+      GlobalVariable *NewDGV =
+        new GlobalVariable(*Dest, SGV->getType()->getElementType(),
+                           SGV->isConstant(), SGV->getLinkage(), /*init*/0,
+                           "", 0, false,
+                           SGV->getType()->getAddressSpace());
+
+      // Set alignment allowing CopyGVAttributes merge it with alignment of SGV.
+      NewDGV->setAlignment(DGV->getAlignment());
+      // Propagate alignment, section and visibility info.
+      CopyGVAttributes(NewDGV, SGV);
+
+      // Make sure to remember this mapping...
+      ValueMap[SGV] = NewDGV;
+
+      // Keep track that this is an appending variable...
+      AppendingVars.insert(std::make_pair(SGV->getName(), NewDGV));
+      continue;
+    }
+
+    if (LinkFromSrc) {
+      if (isa<GlobalAlias>(DGV))
+        return Error(Err, "Global-Alias Collision on '" + SGV->getName() +
+                     "': symbol multiple defined");
+
+      // If the types don't match, and if we are to link from the source, nuke
+      // DGV and create a new one of the appropriate type.  Note that the thing
+      // we are replacing may be a function (if a prototype, weak, etc) or a
+      // global variable.
+      GlobalVariable *NewDGV =
+        new GlobalVariable(*Dest, SGV->getType()->getElementType(),
+                           SGV->isConstant(), NewLinkage, /*init*/0,
+                           DGV->getName(), 0, false,
+                           SGV->getType()->getAddressSpace());
+
+      // Set the unnamed_addr.
+      NewDGV->setUnnamedAddr(HasUnnamedAddr);
+
+      // Propagate alignment, section, and visibility info.
+      CopyGVAttributes(NewDGV, SGV);
+      DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV,
+                                                              DGV->getType()));
+
+      // DGV will conflict with NewDGV because they both had the same
+      // name. We must erase this now so ForceRenaming doesn't assert
+      // because DGV might not have internal linkage.
+      if (GlobalVariable *Var = dyn_cast<GlobalVariable>(DGV))
+        Var->eraseFromParent();
+      else
+        cast<Function>(DGV)->eraseFromParent();
+
+      // If the symbol table renamed the global, but it is an externally visible
+      // symbol, DGV must be an existing global with internal linkage.  Rename.
+      if (NewDGV->getName() != SGV->getName() && !NewDGV->hasLocalLinkage())
+        ForceRenaming(NewDGV, SGV->getName());
+
+      // Inherit const as appropriate.
+      NewDGV->setConstant(SGV->isConstant());
+
+      // Make sure to remember this mapping.
+      ValueMap[SGV] = NewDGV;
+      continue;
+    }
+
+    // Not "link from source", keep the one in the DestModule and remap the
+    // input onto it.
+
+    // Special case for const propagation.
+    if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV))
+      if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant())
+        DGVar->setConstant(true);
+
+    // SGV is global, but DGV is alias.
+    if (isa<GlobalAlias>(DGV)) {
+      // The only valid mappings are:
+      // - SGV is external declaration, which is effectively a no-op.
+      // - SGV is weak, when we just need to throw SGV out.
+      if (!SGV->isDeclaration() && !SGV->isWeakForLinker())
+        return Error(Err, "Global-Alias Collision on '" + SGV->getName() +
+                     "': symbol multiple defined");
+    }
+
+    // Set calculated linkage and unnamed_addr
+    DGV->setLinkage(NewLinkage);
+    DGV->setUnnamedAddr(HasUnnamedAddr);
+
+    // Make sure to remember this mapping...
+    ValueMap[SGV] = ConstantExpr::getBitCast(DGV, SGV->getType());
+  }
+  return false;
+}
+
+static GlobalValue::LinkageTypes
+CalculateAliasLinkage(const GlobalValue *SGV, const GlobalValue *DGV) {
+  GlobalValue::LinkageTypes SL = SGV->getLinkage();
+  GlobalValue::LinkageTypes DL = DGV->getLinkage();
+  if (SL == GlobalValue::ExternalLinkage || DL == GlobalValue::ExternalLinkage)
+    return GlobalValue::ExternalLinkage;
+  else if (SL == GlobalValue::WeakAnyLinkage ||
+           DL == GlobalValue::WeakAnyLinkage)
+    return GlobalValue::WeakAnyLinkage;
+  else if (SL == GlobalValue::WeakODRLinkage ||
+           DL == GlobalValue::WeakODRLinkage)
+    return GlobalValue::WeakODRLinkage;
+  else if (SL == GlobalValue::InternalLinkage &&
+           DL == GlobalValue::InternalLinkage)
+    return GlobalValue::InternalLinkage;
+  else if (SL == GlobalValue::LinkerPrivateLinkage &&
+           DL == GlobalValue::LinkerPrivateLinkage)
+    return GlobalValue::LinkerPrivateLinkage;
+  else if (SL == GlobalValue::LinkerPrivateWeakLinkage &&
+           DL == GlobalValue::LinkerPrivateWeakLinkage)
+    return GlobalValue::LinkerPrivateWeakLinkage;
+  else if (SL == GlobalValue::LinkerPrivateWeakDefAutoLinkage &&
+           DL == GlobalValue::LinkerPrivateWeakDefAutoLinkage)
+    return GlobalValue::LinkerPrivateWeakDefAutoLinkage;
+  else {
+    assert (SL == GlobalValue::PrivateLinkage &&
+            DL == GlobalValue::PrivateLinkage && "Unexpected linkage type");
+    return GlobalValue::PrivateLinkage;
+  }
+}
+
+// LinkAlias - Loop through the alias in the src module and link them into the
+// dest module. We're assuming, that all functions/global variables were already
+// linked in.
+static bool LinkAlias(Module *Dest, const Module *Src,
+                      ValueToValueMapTy &ValueMap,
+                      std::string *Err) {
+  // Loop over all alias in the src module
+  for (Module::const_alias_iterator I = Src->alias_begin(),
+         E = Src->alias_end(); I != E; ++I) {
+    const GlobalAlias *SGA = I;
+    const GlobalValue *SAliasee = SGA->getAliasedGlobal();
+    GlobalAlias *NewGA = NULL;
+
+    // Globals were already linked, thus we can just query ValueMap for variant
+    // of SAliasee in Dest.
+    ValueToValueMapTy::const_iterator VMI = ValueMap.find(SAliasee);
+    assert(VMI != ValueMap.end() && "Aliasee not linked");
+    GlobalValue* DAliasee = cast<GlobalValue>(VMI->second);
+    GlobalValue* DGV = NULL;
+
+    // Fixup aliases to bitcasts.  Note that aliases to GEPs are still broken
+    // by this, but aliases to GEPs are broken to a lot of other things, so
+    // it's less important.
+    Constant *DAliaseeConst = DAliasee;
+    if (SGA->getType() != DAliasee->getType())
+      DAliaseeConst = ConstantExpr::getBitCast(DAliasee, SGA->getType());
+
+    // Try to find something 'similar' to SGA in destination module.
+    if (!DGV && !SGA->hasLocalLinkage()) {
+      DGV = Dest->getNamedAlias(SGA->getName());
+
+      // If types don't agree due to opaque types, try to resolve them.
+      if (DGV && DGV->getType() != SGA->getType())
+        RecursiveResolveTypes(SGA->getType(), DGV->getType());
+    }
+
+    if (!DGV && !SGA->hasLocalLinkage()) {
+      DGV = Dest->getGlobalVariable(SGA->getName());
+
+      // If types don't agree due to opaque types, try to resolve them.
+      if (DGV && DGV->getType() != SGA->getType())
+        RecursiveResolveTypes(SGA->getType(), DGV->getType());
+    }
+
+    if (!DGV && !SGA->hasLocalLinkage()) {
+      DGV = Dest->getFunction(SGA->getName());
+
+      // If types don't agree due to opaque types, try to resolve them.
+      if (DGV && DGV->getType() != SGA->getType())
+        RecursiveResolveTypes(SGA->getType(), DGV->getType());
+    }
+
+    // No linking to be performed on internal stuff.
+    if (DGV && DGV->hasLocalLinkage())
+      DGV = NULL;
+
+    if (GlobalAlias *DGA = dyn_cast_or_null<GlobalAlias>(DGV)) {
+      // Types are known to be the same, check whether aliasees equal. As
+      // globals are already linked we just need query ValueMap to find the
+      // mapping.
+      if (DAliasee == DGA->getAliasedGlobal()) {
+        // This is just two copies of the same alias. Propagate linkage, if
+        // necessary.
+        DGA->setLinkage(CalculateAliasLinkage(SGA, DGA));
+
+        NewGA = DGA;
+        // Proceed to 'common' steps
+      } else
+        return Error(Err, "Alias Collision on '"  + SGA->getName()+
+                     "': aliases have different aliasees");
+    } else if (GlobalVariable *DGVar = dyn_cast_or_null<GlobalVariable>(DGV)) {
+      // The only allowed way is to link alias with external declaration or weak
+      // symbol..
+      if (DGVar->isDeclaration() || DGVar->isWeakForLinker()) {
+        // But only if aliasee is global too...
+        if (!isa<GlobalVariable>(DAliasee))
+          return Error(Err, "Global-Alias Collision on '" + SGA->getName() +
+                       "': aliasee is not global variable");
+
+        NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
+                                SGA->getName(), DAliaseeConst, Dest);
+        CopyGVAttributes(NewGA, SGA);
+
+        // Any uses of DGV need to change to NewGA, with cast, if needed.
+        if (SGA->getType() != DGVar->getType())
+          DGVar->replaceAllUsesWith(ConstantExpr::getBitCast(NewGA,
+                                                             DGVar->getType()));
+        else
+          DGVar->replaceAllUsesWith(NewGA);
+
+        // DGVar will conflict with NewGA because they both had the same
+        // name. We must erase this now so ForceRenaming doesn't assert
+        // because DGV might not have internal linkage.
+        DGVar->eraseFromParent();
+
+        // Proceed to 'common' steps
+      } else
+        return Error(Err, "Global-Alias Collision on '" + SGA->getName() +
+                     "': symbol multiple defined");
+    } else if (Function *DF = dyn_cast_or_null<Function>(DGV)) {
+      // The only allowed way is to link alias with external declaration or weak
+      // symbol...
+      if (DF->isDeclaration() || DF->isWeakForLinker()) {
+        // But only if aliasee is function too...
+        if (!isa<Function>(DAliasee))
+          return Error(Err, "Function-Alias Collision on '" + SGA->getName() +
+                       "': aliasee is not function");
+
+        NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
+                                SGA->getName(), DAliaseeConst, Dest);
+        CopyGVAttributes(NewGA, SGA);
+
+        // Any uses of DF need to change to NewGA, with cast, if needed.
+        if (SGA->getType() != DF->getType())
+          DF->replaceAllUsesWith(ConstantExpr::getBitCast(NewGA,
+                                                          DF->getType()));
+        else
+          DF->replaceAllUsesWith(NewGA);
+
+        // DF will conflict with NewGA because they both had the same
+        // name. We must erase this now so ForceRenaming doesn't assert
+        // because DF might not have internal linkage.
+        DF->eraseFromParent();
+
+        // Proceed to 'common' steps
+      } else
+        return Error(Err, "Function-Alias Collision on '" + SGA->getName() +
+                     "': symbol multiple defined");
+    } else {
+      // No linking to be performed, simply create an identical version of the
+      // alias over in the dest module...
+      NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
+                              SGA->getName(), DAliaseeConst, Dest);
+      CopyGVAttributes(NewGA, SGA);
+
+      // Proceed to 'common' steps
+    }
+
+    assert(NewGA && "No alias was created in destination module!");
+
+    // If the symbol table renamed the alias, but it is an externally visible
+    // symbol, DGA must be an global value with internal linkage. Rename it.
+    if (NewGA->getName() != SGA->getName() &&
+        !NewGA->hasLocalLinkage())
+      ForceRenaming(NewGA, SGA->getName());
+
+    // Remember this mapping so uses in the source module get remapped
+    // later by MapValue.
+    ValueMap[SGA] = NewGA;
+  }
+
+  return false;
+}
+
+
+// LinkGlobalInits - Update the initializers in the Dest module now that all
+// globals that may be referenced are in Dest.
+static bool LinkGlobalInits(Module *Dest, const Module *Src,
+                            ValueToValueMapTy &ValueMap,
+                            std::string *Err) {
+  // Loop over all of the globals in the src module, mapping them over as we go
+  for (Module::const_global_iterator I = Src->global_begin(),
+       E = Src->global_end(); I != E; ++I) {
+    const GlobalVariable *SGV = I;
+
+    if (SGV->hasInitializer()) {      // Only process initialized GV's
+      // Figure out what the initializer looks like in the dest module.
+      Constant *SInit =
+        cast<Constant>(MapValue(SGV->getInitializer(), ValueMap));
+      // Grab destination global variable or alias.
+      GlobalValue *DGV = cast<GlobalValue>(ValueMap[SGV]->stripPointerCasts());
+
+      // If dest if global variable, check that initializers match.
+      if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV)) {
+        if (DGVar->hasInitializer()) {
+          if (SGV->hasExternalLinkage()) {
+            if (DGVar->getInitializer() != SInit)
+              return Error(Err, "Global Variable Collision on '" +
+                           SGV->getName() +
+                           "': global variables have different initializers");
+          } else if (DGVar->isWeakForLinker()) {
+            // Nothing is required, mapped values will take the new global
+            // automatically.
+          } else if (SGV->isWeakForLinker()) {
+            // Nothing is required, mapped values will take the new global
+            // automatically.
+          } else if (DGVar->hasAppendingLinkage()) {
+            llvm_unreachable("Appending linkage unimplemented!");
+          } else {
+            llvm_unreachable("Unknown linkage!");
+          }
+        } else {
+          // Copy the initializer over now...
+          DGVar->setInitializer(SInit);
+        }
+      } else {
+        // Destination is alias, the only valid situation is when source is
+        // weak. Also, note, that we already checked linkage in LinkGlobals(),
+        // thus we assert here.
+        // FIXME: Should we weaken this assumption, 'dereference' alias and
+        // check for initializer of aliasee?
+        assert(SGV->isWeakForLinker());
+      }
+    }
+  }
+  return false;
+}
+
+// LinkFunctionProtos - Link the functions together between the two modules,
+// without doing function bodies... this just adds external function prototypes
+// to the Dest function...
+//
+static bool LinkFunctionProtos(Module *Dest, const Module *Src,
+                               ValueToValueMapTy &ValueMap,
+                               std::string *Err) {
+  ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable();
+
+  // Loop over all of the functions in the src module, mapping them over
+  for (Module::const_iterator I = Src->begin(), E = Src->end(); I != E; ++I) {
+    const Function *SF = I;   // SrcFunction
+    GlobalValue *DGV = 0;
+
+    // Check to see if may have to link the function with the global, alias or
+    // function.
+    if (SF->hasName() && !SF->hasLocalLinkage())
+      DGV = cast_or_null<GlobalValue>(DestSymTab.lookup(SF->getName()));
+
+    // If we found a global with the same name in the dest module, but it has
+    // internal linkage, we are really not doing any linkage here.
+    if (DGV && DGV->hasLocalLinkage())
+      DGV = 0;
+
+    // If types don't agree due to opaque types, try to resolve them.
+    if (DGV && DGV->getType() != SF->getType())
+      RecursiveResolveTypes(SF->getType(), DGV->getType());
+
+    GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+    bool LinkFromSrc = false;
+    if (GetLinkageResult(DGV, SF, NewLinkage, LinkFromSrc, Err))
+      return true;
+
+    // If there is no linkage to be performed, just bring over SF without
+    // modifying it.
+    if (DGV == 0) {
+      // Function does not already exist, simply insert an function signature
+      // identical to SF into the dest module.
+      Function *NewDF = Function::Create(SF->getFunctionType(),
+                                         SF->getLinkage(),
+                                         SF->getName(), Dest);
+      CopyGVAttributes(NewDF, SF);
+
+      // If the LLVM runtime renamed the function, but it is an externally
+      // visible symbol, DF must be an existing function with internal linkage.
+      // Rename it.
+      if (!NewDF->hasLocalLinkage() && NewDF->getName() != SF->getName())
+        ForceRenaming(NewDF, SF->getName());
+
+      // ... and remember this mapping...
+      ValueMap[SF] = NewDF;
+      continue;
+    }
+
+    // If the visibilities of the symbols disagree and the destination is a
+    // prototype, take the visibility of its input.
+    if (DGV->isDeclaration())
+      DGV->setVisibility(SF->getVisibility());
+
+    if (LinkFromSrc) {
+      if (isa<GlobalAlias>(DGV))
+        return Error(Err, "Function-Alias Collision on '" + SF->getName() +
+                     "': symbol multiple defined");
+
+      // We have a definition of the same name but different type in the
+      // source module. Copy the prototype to the destination and replace
+      // uses of the destination's prototype with the new prototype.
+      Function *NewDF = Function::Create(SF->getFunctionType(), NewLinkage,
+                                         SF->getName(), Dest);
+      CopyGVAttributes(NewDF, SF);
+
+      // Any uses of DF need to change to NewDF, with cast
+      DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF,
+                                                              DGV->getType()));
+
+      // DF will conflict with NewDF because they both had the same. We must
+      // erase this now so ForceRenaming doesn't assert because DF might
+      // not have internal linkage.
+      if (GlobalVariable *Var = dyn_cast<GlobalVariable>(DGV))
+        Var->eraseFromParent();
+      else
+        cast<Function>(DGV)->eraseFromParent();
+
+      // If the symbol table renamed the function, but it is an externally
+      // visible symbol, DF must be an existing function with internal
+      // linkage.  Rename it.
+      if (NewDF->getName() != SF->getName() && !NewDF->hasLocalLinkage())
+        ForceRenaming(NewDF, SF->getName());
+
+      // Remember this mapping so uses in the source module get remapped
+      // later by MapValue.
+      ValueMap[SF] = NewDF;
+      continue;
+    }
+
+    // Not "link from source", keep the one in the DestModule and remap the
+    // input onto it.
+
+    if (isa<GlobalAlias>(DGV)) {
+      // The only valid mappings are:
+      // - SF is external declaration, which is effectively a no-op.
+      // - SF is weak, when we just need to throw SF out.
+      if (!SF->isDeclaration() && !SF->isWeakForLinker())
+        return Error(Err, "Function-Alias Collision on '" + SF->getName() +
+                     "': symbol multiple defined");
+    }
+
+    // Set calculated linkage
+    DGV->setLinkage(NewLinkage);
+
+    // Make sure to remember this mapping.
+    ValueMap[SF] = ConstantExpr::getBitCast(DGV, SF->getType());
+  }
+  return false;
+}
+
+// LinkFunctionBody - Copy the source function over into the dest function and
+// fix up references to values.  At this point we know that Dest is an external
+// function, and that Src is not.
+static bool LinkFunctionBody(Function *Dest, Function *Src,
+                             ValueToValueMapTy &ValueMap,
+                             std::string *Err) {
+  assert(Src && Dest && Dest->isDeclaration() && !Src->isDeclaration());
+
+  // Go through and convert function arguments over, remembering the mapping.
+  Function::arg_iterator DI = Dest->arg_begin();
+  for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
+       I != E; ++I, ++DI) {
+    DI->setName(I->getName());  // Copy the name information over...
+
+    // Add a mapping to our local map
+    ValueMap[I] = DI;
+  }
+
+  // Splice the body of the source function into the dest function.
+  Dest->getBasicBlockList().splice(Dest->end(), Src->getBasicBlockList());
+
+  // At this point, all of the instructions and values of the function are now
+  // copied over.  The only problem is that they are still referencing values in
+  // the Source function as operands.  Loop through all of the operands of the
+  // functions and patch them up to point to the local versions.
+  for (Function::iterator BB = Dest->begin(), BE = Dest->end(); BB != BE; ++BB)
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries);
+
+  // There is no need to map the arguments anymore.
+  for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
+       I != E; ++I)
+    ValueMap.erase(I);
+
+  return false;
+}
+
+
+// LinkFunctionBodies - Link in the function bodies that are defined in the
+// source module into the DestModule.  This consists basically of copying the
+// function over and fixing up references to values.
+static bool LinkFunctionBodies(Module *Dest, Module *Src,
+                               ValueToValueMapTy &ValueMap,
+                               std::string *Err) {
+
+  // Loop over all of the functions in the src module, mapping them over as we
+  // go
+  for (Module::iterator SF = Src->begin(), E = Src->end(); SF != E; ++SF) {
+    if (!SF->isDeclaration()) {               // No body if function is external
+      Function *DF = dyn_cast<Function>(ValueMap[SF]); // Destination function
+
+      // DF not external SF external?
+      if (DF && DF->isDeclaration())
+        // Only provide the function body if there isn't one already.
+        if (LinkFunctionBody(DF, SF, ValueMap, Err))
+          return true;
+    }
+  }
+  return false;
+}
+
+// LinkAppendingVars - If there were any appending global variables, link them
+// together now.  Return true on error.
+static bool LinkAppendingVars(Module *M,
+                  std::multimap<std::string, GlobalVariable *> &AppendingVars,
+                              std::string *ErrorMsg) {
+  if (AppendingVars.empty()) return false; // Nothing to do.
+
+  // Loop over the multimap of appending vars, processing any variables with the
+  // same name, forming a new appending global variable with both of the
+  // initializers merged together, then rewrite references to the old variables
+  // and delete them.
+  std::vector<Constant*> Inits;
+  while (AppendingVars.size() > 1) {
+    // Get the first two elements in the map...
+    std::multimap<std::string,
+      GlobalVariable*>::iterator Second = AppendingVars.begin(), First=Second++;
+
+    // If the first two elements are for different names, there is no pair...
+    // Otherwise there is a pair, so link them together...
+    if (First->first == Second->first) {
+      GlobalVariable *G1 = First->second, *G2 = Second->second;
+      const ArrayType *T1 = cast<ArrayType>(G1->getType()->getElementType());
+      const ArrayType *T2 = cast<ArrayType>(G2->getType()->getElementType());
+
+      // Check to see that they two arrays agree on type...
+      if (T1->getElementType() != T2->getElementType())
+        return Error(ErrorMsg,
+         "Appending variables with different element types need to be linked!");
+      if (G1->isConstant() != G2->isConstant())
+        return Error(ErrorMsg,
+                     "Appending variables linked with different const'ness!");
+
+      if (G1->getAlignment() != G2->getAlignment())
+        return Error(ErrorMsg,
+         "Appending variables with different alignment need to be linked!");
+
+      if (G1->getVisibility() != G2->getVisibility())
+        return Error(ErrorMsg,
+         "Appending variables with different visibility need to be linked!");
+
+      if (G1->getSection() != G2->getSection())
+        return Error(ErrorMsg,
+         "Appending variables with different section name need to be linked!");
+
+      unsigned NewSize = T1->getNumElements() + T2->getNumElements();
+      ArrayType *NewType = ArrayType::get(T1->getElementType(),
+                                                         NewSize);
+
+      G1->setName("");   // Clear G1's name in case of a conflict!
+
+      // Create the new global variable...
+      GlobalVariable *NG =
+        new GlobalVariable(*M, NewType, G1->isConstant(), G1->getLinkage(),
+                           /*init*/0, First->first, 0, G1->isThreadLocal(),
+                           G1->getType()->getAddressSpace());
+
+      // Propagate alignment, visibility and section info.
+      CopyGVAttributes(NG, G1);
+
+      // Merge the initializer...
+      Inits.reserve(NewSize);
+      if (ConstantArray *I = dyn_cast<ConstantArray>(G1->getInitializer())) {
+        for (unsigned i = 0, e = T1->getNumElements(); i != e; ++i)
+          Inits.push_back(I->getOperand(i));
+      } else {
+        assert(isa<ConstantAggregateZero>(G1->getInitializer()));
+        Constant *CV = Constant::getNullValue(T1->getElementType());
+        for (unsigned i = 0, e = T1->getNumElements(); i != e; ++i)
+          Inits.push_back(CV);
+      }
+      if (ConstantArray *I = dyn_cast<ConstantArray>(G2->getInitializer())) {
+        for (unsigned i = 0, e = T2->getNumElements(); i != e; ++i)
+          Inits.push_back(I->getOperand(i));
+      } else {
+        assert(isa<ConstantAggregateZero>(G2->getInitializer()));
+        Constant *CV = Constant::getNullValue(T2->getElementType());
+        for (unsigned i = 0, e = T2->getNumElements(); i != e; ++i)
+          Inits.push_back(CV);
+      }
+      NG->setInitializer(ConstantArray::get(NewType, Inits));
+      Inits.clear();
+
+      // Replace any uses of the two global variables with uses of the new
+      // global...
+
+      // FIXME: This should rewrite simple/straight-forward uses such as
+      // getelementptr instructions to not use the Cast!
+      G1->replaceAllUsesWith(ConstantExpr::getBitCast(NG,
+                             G1->getType()));
+      G2->replaceAllUsesWith(ConstantExpr::getBitCast(NG,
+                             G2->getType()));
+
+      // Remove the two globals from the module now...
+      M->getGlobalList().erase(G1);
+      M->getGlobalList().erase(G2);
+
+      // Put the new global into the AppendingVars map so that we can handle
+      // linking of more than two vars...
+      Second->second = NG;
+    }
+    AppendingVars.erase(First);
+  }
+
+  return false;
+}
+
+static bool ResolveAliases(Module *Dest) {
+  for (Module::alias_iterator I = Dest->alias_begin(), E = Dest->alias_end();
+       I != E; ++I)
+    // We can't sue resolveGlobalAlias here because we need to preserve
+    // bitcasts and GEPs.
+    if (const Constant *C = I->getAliasee()) {
+      while (dyn_cast<GlobalAlias>(C))
+        C = cast<GlobalAlias>(C)->getAliasee();
+      const GlobalValue *GV = dyn_cast<GlobalValue>(C);
+      if (C != I && !(GV && GV->isDeclaration()))
+        I->replaceAllUsesWith(const_cast<Constant*>(C));
+    }
+
+  return false;
+}
+
+// LinkModules - This function links two modules together, with the resulting
+// left module modified to be the composite of the two input modules.  If an
+// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
+// the problem.  Upon failure, the Dest module could be in a modified state, and
+// shouldn't be relied on to be consistent.
+bool
+Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
+  assert(Dest != 0 && "Invalid Destination module");
+  assert(Src  != 0 && "Invalid Source Module");
+
+  if (Dest->getDataLayout().empty()) {
+    if (!Src->getDataLayout().empty()) {
+      Dest->setDataLayout(Src->getDataLayout());
+    } else {
+      std::string DataLayout;
+
+      if (Dest->getEndianness() == Module::AnyEndianness) {
+        if (Src->getEndianness() == Module::BigEndian)
+          DataLayout.append("E");
+        else if (Src->getEndianness() == Module::LittleEndian)
+          DataLayout.append("e");
+      }
+
+      if (Dest->getPointerSize() == Module::AnyPointerSize) {
+        if (Src->getPointerSize() == Module::Pointer64)
+          DataLayout.append(DataLayout.length() == 0 ? "p:64:64" : "-p:64:64");
+        else if (Src->getPointerSize() == Module::Pointer32)
+          DataLayout.append(DataLayout.length() == 0 ? "p:32:32" : "-p:32:32");
+      }
+      Dest->setDataLayout(DataLayout);
+    }
+  }
+
+  // Copy the target triple from the source to dest if the dest's is empty.
+  if (Dest->getTargetTriple().empty() && !Src->getTargetTriple().empty())
+    Dest->setTargetTriple(Src->getTargetTriple());
+
+  if (!Src->getDataLayout().empty() && !Dest->getDataLayout().empty() &&
+      Src->getDataLayout() != Dest->getDataLayout())
+    errs() << "WARNING: Linking two modules of different data layouts!\n";
+  if (!Src->getTargetTriple().empty() &&
+      Dest->getTargetTriple() != Src->getTargetTriple()) {
+    errs() << "WARNING: Linking two modules of different target triples: ";
+    if (!Src->getModuleIdentifier().empty())
+      errs() << Src->getModuleIdentifier() << ": ";
+    errs() << "'" << Src->getTargetTriple() << "' and '" 
+           << Dest->getTargetTriple() << "'\n";
+  }
+
+  // Append the module inline asm string.
+  if (!Src->getModuleInlineAsm().empty()) {
+    if (Dest->getModuleInlineAsm().empty())
+      Dest->setModuleInlineAsm(Src->getModuleInlineAsm());
+    else
+      Dest->setModuleInlineAsm(Dest->getModuleInlineAsm()+"\n"+
+                               Src->getModuleInlineAsm());
+  }
+
+  // Update the destination module's dependent libraries list with the libraries
+  // from the source module. There's no opportunity for duplicates here as the
+  // Module ensures that duplicate insertions are discarded.
+  for (Module::lib_iterator SI = Src->lib_begin(), SE = Src->lib_end();
+       SI != SE; ++SI)
+    Dest->addLibrary(*SI);
+
+  // LinkTypes - Go through the symbol table of the Src module and see if any
+  // types are named in the src module that are not named in the Dst module.
+  // Make sure there are no type name conflicts.
+  if (LinkTypes(Dest, Src, ErrorMsg))
+    return true;
+
+  // ValueMap - Mapping of values from what they used to be in Src, to what they
+  // are now in Dest.  ValueToValueMapTy is a ValueMap, which involves some
+  // overhead due to the use of Value handles which the Linker doesn't actually
+  // need, but this allows us to reuse the ValueMapper code.
+  ValueToValueMapTy ValueMap;
+
+  // AppendingVars - Keep track of global variables in the destination module
+  // with appending linkage.  After the module is linked together, they are
+  // appended and the module is rewritten.
+  std::multimap<std::string, GlobalVariable *> AppendingVars;
+  for (Module::global_iterator I = Dest->global_begin(), E = Dest->global_end();
+       I != E; ++I) {
+    // Add all of the appending globals already in the Dest module to
+    // AppendingVars.
+    if (I->hasAppendingLinkage())
+      AppendingVars.insert(std::make_pair(I->getName(), I));
+  }
+
+  // Insert all of the globals in src into the Dest module... without linking
+  // initializers (which could refer to functions not yet mapped over).
+  if (LinkGlobals(Dest, Src, ValueMap, AppendingVars, ErrorMsg))
+    return true;
+
+  // Link the functions together between the two modules, without doing function
+  // bodies... this just adds external function prototypes to the Dest
+  // function...  We do this so that when we begin processing function bodies,
+  // all of the global values that may be referenced are available in our
+  // ValueMap.
+  if (LinkFunctionProtos(Dest, Src, ValueMap, ErrorMsg))
+    return true;
+
+  // If there were any alias, link them now. We really need to do this now,
+  // because all of the aliases that may be referenced need to be available in
+  // ValueMap
+  if (LinkAlias(Dest, Src, ValueMap, ErrorMsg)) return true;
+
+  // Update the initializers in the Dest module now that all globals that may
+  // be referenced are in Dest.
+  if (LinkGlobalInits(Dest, Src, ValueMap, ErrorMsg)) return true;
+
+  // Link in the function bodies that are defined in the source module into the
+  // DestModule.  This consists basically of copying the function over and
+  // fixing up references to values.
+  if (LinkFunctionBodies(Dest, Src, ValueMap, ErrorMsg)) return true;
+
+  // If there were any appending global variables, link them together now.
+  if (LinkAppendingVars(Dest, AppendingVars, ErrorMsg)) return true;
+
+  // Resolve all uses of aliases with aliasees
+  if (ResolveAliases(Dest)) return true;
+
+  // Remap all of the named mdnoes in Src into the Dest module. We do this
+  // after linking GlobalValues so that MDNodes that reference GlobalValues
+  // are properly remapped.
+  LinkNamedMDNodes(Dest, Src, ValueMap);
+
+  // If the source library's module id is in the dependent library list of the
+  // destination library, remove it since that module is now linked in.
+  const std::string &modId = Src->getModuleIdentifier();
+  if (!modId.empty())
+    Dest->removeLibrary(sys::path::stem(modId));
+
+  return false;
+}
+
+// vim: sw=2
diff --git a/final/lib/Linker/Linker.cpp b/final/lib/Linker/Linker.cpp
new file mode 100644
index 00000000000..fba91da5ddd
--- /dev/null
+++ b/final/lib/Linker/Linker.cpp
@@ -0,0 +1,174 @@
+//===- lib/Linker/Linker.cpp - Basic Linker functionality  ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains basic Linker functionality that all usages will need.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/Module.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+
+Linker::Linker(StringRef progname, StringRef modname,
+               LLVMContext& C, unsigned flags):
+  Context(C),
+  Composite(new Module(modname, C)),
+  LibPaths(),
+  Flags(flags),
+  Error(),
+  ProgramName(progname) { }
+
+Linker::Linker(StringRef progname, Module* aModule, unsigned flags) :
+  Context(aModule->getContext()),
+  Composite(aModule),
+  LibPaths(),
+  Flags(flags),
+  Error(),
+  ProgramName(progname) { }
+
+Linker::~Linker() {
+  delete Composite;
+}
+
+bool
+Linker::error(StringRef message) {
+  Error = message;
+  if (!(Flags&QuietErrors))
+    errs() << ProgramName << ": error: " << message << "\n";
+  return true;
+}
+
+bool
+Linker::warning(StringRef message) {
+  Error = message;
+  if (!(Flags&QuietWarnings))
+    errs() << ProgramName << ": warning: " << message << "\n";
+  return false;
+}
+
+void
+Linker::verbose(StringRef message) {
+  if (Flags&Verbose)
+    errs() << "  " << message << "\n";
+}
+
+void
+Linker::addPath(const sys::Path& path) {
+  LibPaths.push_back(path);
+}
+
+void
+Linker::addPaths(const std::vector<std::string>& paths) {
+  for (unsigned i = 0, e = paths.size(); i != e; ++i)
+    LibPaths.push_back(sys::Path(paths[i]));
+}
+
+void
+Linker::addSystemPaths() {
+  sys::Path::GetBitcodeLibraryPaths(LibPaths);
+  LibPaths.insert(LibPaths.begin(),sys::Path("./"));
+}
+
+Module*
+Linker::releaseModule() {
+  Module* result = Composite;
+  LibPaths.clear();
+  Error.clear();
+  Composite = 0;
+  Flags = 0;
+  return result;
+}
+
+// LoadObject - Read in and parse the bitcode file named by FN and return the
+// module it contains (wrapped in an auto_ptr), or auto_ptr<Module>() and set
+// Error if an error occurs.
+std::auto_ptr<Module>
+Linker::LoadObject(const sys::Path &FN) {
+  std::string ParseErrorMessage;
+  Module *Result = 0;
+
+  OwningPtr<MemoryBuffer> Buffer;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(FN.c_str(), Buffer))
+    ParseErrorMessage = "Error reading file '" + FN.str() + "'" + ": "
+                      + ec.message();
+  else
+    Result = ParseBitcodeFile(Buffer.get(), Context, &ParseErrorMessage);
+
+  if (Result)
+    return std::auto_ptr<Module>(Result);
+  Error = "Bitcode file '" + FN.str() + "' could not be loaded";
+  if (ParseErrorMessage.size())
+    Error += ": " + ParseErrorMessage;
+  return std::auto_ptr<Module>();
+}
+
+// IsLibrary - Determine if "Name" is a library in "Directory". Return
+// a non-empty sys::Path if its found, an empty one otherwise.
+static inline sys::Path IsLibrary(StringRef Name,
+                                  const sys::Path &Directory) {
+
+  sys::Path FullPath(Directory);
+
+  // Try the libX.a form
+  FullPath.appendComponent(("lib" + Name).str());
+  FullPath.appendSuffix("a");
+  if (FullPath.isArchive())
+    return FullPath;
+
+  // Try the libX.bca form
+  FullPath.eraseSuffix();
+  FullPath.appendSuffix("bca");
+  if (FullPath.isArchive())
+    return FullPath;
+
+  // Try the libX.so (or .dylib) form
+  FullPath.eraseSuffix();
+  FullPath.appendSuffix(sys::Path::GetDLLSuffix());
+  if (FullPath.isDynamicLibrary())  // Native shared library?
+    return FullPath;
+  if (FullPath.isBitcodeFile())    // .so file containing bitcode?
+    return FullPath;
+
+  // Not found .. fall through
+
+  // Indicate that the library was not found in the directory.
+  FullPath.clear();
+  return FullPath;
+}
+
+/// FindLib - Try to convert Filename into the name of a file that we can open,
+/// if it does not already name a file we can open, by first trying to open
+/// Filename, then libFilename.[suffix] for each of a set of several common
+/// library suffixes, in each of the directories in LibPaths. Returns an empty
+/// Path if no matching file can be found.
+///
+sys::Path
+Linker::FindLib(StringRef Filename) {
+  // Determine if the pathname can be found as it stands.
+  sys::Path FilePath(Filename);
+  if (FilePath.canRead() &&
+      (FilePath.isArchive() || FilePath.isDynamicLibrary()))
+    return FilePath;
+
+  // Iterate over the directories in Paths to see if we can find the library
+  // there.
+  for (unsigned Index = 0; Index != LibPaths.size(); ++Index) {
+    sys::Path Directory(LibPaths[Index]);
+    sys::Path FullPath = IsLibrary(Filename, Directory);
+    if (!FullPath.isEmpty())
+      return FullPath;
+  }
+  return sys::Path();
+}
diff --git a/final/lib/Linker/Makefile b/final/lib/Linker/Makefile
new file mode 100644
index 00000000000..19e646b7483
--- /dev/null
+++ b/final/lib/Linker/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Linker/Makefile ---------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMLinker
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/MC/CMakeLists.txt b/final/lib/MC/CMakeLists.txt
new file mode 100644
index 00000000000..6aed059f0f4
--- /dev/null
+++ b/final/lib/MC/CMakeLists.txt
@@ -0,0 +1,40 @@
+add_llvm_library(LLVMMC
+  ELFObjectWriter.cpp
+  MCAsmInfo.cpp
+  MCAsmInfoCOFF.cpp
+  MCAsmInfoDarwin.cpp
+  MCAsmStreamer.cpp
+  MCAssembler.cpp
+  MCCodeEmitter.cpp
+  MCContext.cpp
+  MCDisassembler.cpp
+  MCELF.cpp
+  MCELFObjectTargetWriter.cpp
+  MCELFStreamer.cpp
+  MCExpr.cpp
+  MCInst.cpp
+  MCInstPrinter.cpp
+  MCLabel.cpp
+  MCDwarf.cpp
+  MCLoggingStreamer.cpp
+  MCMachOStreamer.cpp
+  MCMachObjectTargetWriter.cpp
+  MCNullStreamer.cpp
+  MCObjectStreamer.cpp
+  MCObjectWriter.cpp
+  MCPureStreamer.cpp
+  MCSection.cpp
+  MCSectionCOFF.cpp
+  MCSectionELF.cpp
+  MCSectionMachO.cpp
+  MCStreamer.cpp
+  MCSymbol.cpp
+  MCValue.cpp
+  MachObjectWriter.cpp
+  WinCOFFStreamer.cpp
+  WinCOFFObjectWriter.cpp
+  TargetAsmBackend.cpp
+  )
+
+add_subdirectory(MCParser)
+add_subdirectory(MCDisassembler)
diff --git a/final/lib/MC/ELFObjectWriter.cpp b/final/lib/MC/ELFObjectWriter.cpp
new file mode 100644
index 00000000000..3492cbc55d6
--- /dev/null
+++ b/final/lib/MC/ELFObjectWriter.cpp
@@ -0,0 +1,1494 @@
+//===- lib/MC/ELFObjectWriter.cpp - ELF File Writer -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ELFObjectWriter.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/ADT/StringSwitch.h"
+
+#include "../Target/X86/X86FixupKinds.h"
+#include "../Target/ARM/ARMFixupKinds.h"
+
+#include <vector>
+using namespace llvm;
+
+bool ELFObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
+  const MCFixupKindInfo &FKI =
+    Asm.getBackend().getFixupKindInfo((MCFixupKind) Kind);
+
+  return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
+}
+
+bool ELFObjectWriter::RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) {
+  switch (Variant) {
+  default:
+    return false;
+  case MCSymbolRefExpr::VK_GOT:
+  case MCSymbolRefExpr::VK_PLT:
+  case MCSymbolRefExpr::VK_GOTPCREL:
+  case MCSymbolRefExpr::VK_TPOFF:
+  case MCSymbolRefExpr::VK_TLSGD:
+  case MCSymbolRefExpr::VK_GOTTPOFF:
+  case MCSymbolRefExpr::VK_INDNTPOFF:
+  case MCSymbolRefExpr::VK_NTPOFF:
+  case MCSymbolRefExpr::VK_GOTNTPOFF:
+  case MCSymbolRefExpr::VK_TLSLDM:
+  case MCSymbolRefExpr::VK_DTPOFF:
+  case MCSymbolRefExpr::VK_TLSLD:
+    return true;
+  }
+}
+
+ELFObjectWriter::~ELFObjectWriter()
+{}
+
+// Emit the ELF header.
+void ELFObjectWriter::WriteHeader(uint64_t SectionDataSize,
+                                  unsigned NumberOfSections) {
+  // ELF Header
+  // ----------
+  //
+  // Note
+  // ----
+  // emitWord method behaves differently for ELF32 and ELF64, writing
+  // 4 bytes in the former and 8 in the latter.
+
+  Write8(0x7f); // e_ident[EI_MAG0]
+  Write8('E');  // e_ident[EI_MAG1]
+  Write8('L');  // e_ident[EI_MAG2]
+  Write8('F');  // e_ident[EI_MAG3]
+
+  Write8(is64Bit() ? ELF::ELFCLASS64 : ELF::ELFCLASS32); // e_ident[EI_CLASS]
+
+  // e_ident[EI_DATA]
+  Write8(isLittleEndian() ? ELF::ELFDATA2LSB : ELF::ELFDATA2MSB);
+
+  Write8(ELF::EV_CURRENT);        // e_ident[EI_VERSION]
+  // e_ident[EI_OSABI]
+  switch (TargetObjectWriter->getOSType()) {
+    case Triple::FreeBSD:  Write8(ELF::ELFOSABI_FREEBSD); break;
+    case Triple::Linux:    Write8(ELF::ELFOSABI_LINUX); break;
+    default:               Write8(ELF::ELFOSABI_NONE); break;
+  }
+  Write8(0);                  // e_ident[EI_ABIVERSION]
+
+  WriteZeros(ELF::EI_NIDENT - ELF::EI_PAD);
+
+  Write16(ELF::ET_REL);             // e_type
+
+  Write16(TargetObjectWriter->getEMachine()); // e_machine = target
+
+  Write32(ELF::EV_CURRENT);         // e_version
+  WriteWord(0);                    // e_entry, no entry point in .o file
+  WriteWord(0);                    // e_phoff, no program header for .o
+  WriteWord(SectionDataSize + (is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
+            sizeof(ELF::Elf32_Ehdr)));  // e_shoff = sec hdr table off in bytes
+
+  // e_flags = whatever the target wants
+  WriteEFlags();
+
+  // e_ehsize = ELF header size
+  Write16(is64Bit() ? sizeof(ELF::Elf64_Ehdr) : sizeof(ELF::Elf32_Ehdr));
+
+  Write16(0);                  // e_phentsize = prog header entry size
+  Write16(0);                  // e_phnum = # prog header entries = 0
+
+  // e_shentsize = Section header entry size
+  Write16(is64Bit() ? sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr));
+
+  // e_shnum     = # of section header ents
+  if (NumberOfSections >= ELF::SHN_LORESERVE)
+    Write16(0);
+  else
+    Write16(NumberOfSections);
+
+  // e_shstrndx  = Section # of '.shstrtab'
+  if (NumberOfSections >= ELF::SHN_LORESERVE)
+    Write16(ELF::SHN_XINDEX);
+  else
+    Write16(ShstrtabIndex);
+}
+
+void ELFObjectWriter::WriteSymbolEntry(MCDataFragment *SymtabF,
+                                       MCDataFragment *ShndxF,
+                                       uint64_t name,
+                                       uint8_t info, uint64_t value,
+                                       uint64_t size, uint8_t other,
+                                       uint32_t shndx,
+                                       bool Reserved) {
+  if (ShndxF) {
+    if (shndx >= ELF::SHN_LORESERVE && !Reserved)
+      String32(*ShndxF, shndx);
+    else
+      String32(*ShndxF, 0);
+  }
+
+  uint16_t Index = (shndx >= ELF::SHN_LORESERVE && !Reserved) ?
+    uint16_t(ELF::SHN_XINDEX) : shndx;
+
+  if (is64Bit()) {
+    String32(*SymtabF, name);  // st_name
+    String8(*SymtabF, info);   // st_info
+    String8(*SymtabF, other);  // st_other
+    String16(*SymtabF, Index); // st_shndx
+    String64(*SymtabF, value); // st_value
+    String64(*SymtabF, size);  // st_size
+  } else {
+    String32(*SymtabF, name);  // st_name
+    String32(*SymtabF, value); // st_value
+    String32(*SymtabF, size);  // st_size
+    String8(*SymtabF, info);   // st_info
+    String8(*SymtabF, other);  // st_other
+    String16(*SymtabF, Index); // st_shndx
+  }
+}
+
+uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &Data,
+                                      const MCAsmLayout &Layout) {
+  if (Data.isCommon() && Data.isExternal())
+    return Data.getCommonAlignment();
+
+  const MCSymbol &Symbol = Data.getSymbol();
+
+  if (Symbol.isAbsolute() && Symbol.isVariable()) {
+    if (const MCExpr *Value = Symbol.getVariableValue()) {
+      int64_t IntValue;
+      if (Value->EvaluateAsAbsolute(IntValue, Layout))
+	return (uint64_t)IntValue;
+    }
+  }
+
+  if (!Symbol.isInSection())
+    return 0;
+
+  if (Data.getFragment())
+    return Layout.getSymbolOffset(&Data);
+
+  return 0;
+}
+
+void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+                                               const MCAsmLayout &Layout) {
+  // The presence of symbol versions causes undefined symbols and
+  // versions declared with @@@ to be renamed.
+
+  for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+         ie = Asm.symbol_end(); it != ie; ++it) {
+    const MCSymbol &Alias = it->getSymbol();
+    const MCSymbol &Symbol = Alias.AliasedSymbol();
+    MCSymbolData &SD = Asm.getSymbolData(Symbol);
+
+    // Not an alias.
+    if (&Symbol == &Alias)
+      continue;
+
+    StringRef AliasName = Alias.getName();
+    size_t Pos = AliasName.find('@');
+    if (Pos == StringRef::npos)
+      continue;
+
+    // Aliases defined with .symvar copy the binding from the symbol they alias.
+    // This is the first place we are able to copy this information.
+    it->setExternal(SD.isExternal());
+    MCELF::SetBinding(*it, MCELF::GetBinding(SD));
+
+    StringRef Rest = AliasName.substr(Pos);
+    if (!Symbol.isUndefined() && !Rest.startswith("@@@"))
+      continue;
+
+    // FIXME: produce a better error message.
+    if (Symbol.isUndefined() && Rest.startswith("@@") &&
+        !Rest.startswith("@@@"))
+      report_fatal_error("A @@ version cannot be undefined");
+
+    Renames.insert(std::make_pair(&Symbol, &Alias));
+  }
+}
+
+void ELFObjectWriter::WriteSymbol(MCDataFragment *SymtabF,
+                                  MCDataFragment *ShndxF,
+                                  ELFSymbolData &MSD,
+                                  const MCAsmLayout &Layout) {
+  MCSymbolData &OrigData = *MSD.SymbolData;
+  MCSymbolData &Data =
+    Layout.getAssembler().getSymbolData(OrigData.getSymbol().AliasedSymbol());
+
+  bool IsReserved = Data.isCommon() || Data.getSymbol().isAbsolute() ||
+    Data.getSymbol().isVariable();
+
+  uint8_t Binding = MCELF::GetBinding(OrigData);
+  uint8_t Visibility = MCELF::GetVisibility(OrigData);
+  uint8_t Type = MCELF::GetType(Data);
+
+  uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift);
+  uint8_t Other = Visibility;
+
+  uint64_t Value = SymbolValue(Data, Layout);
+  uint64_t Size = 0;
+
+  assert(!(Data.isCommon() && !Data.isExternal()));
+
+  const MCExpr *ESize = Data.getSize();
+  if (ESize) {
+    int64_t Res;
+    if (!ESize->EvaluateAsAbsolute(Res, Layout))
+      report_fatal_error("Size expression must be absolute.");
+    Size = Res;
+  }
+
+  // Write out the symbol table entry
+  WriteSymbolEntry(SymtabF, ShndxF, MSD.StringIndex, Info, Value,
+                   Size, Other, MSD.SectionIndex, IsReserved);
+}
+
+void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF,
+                                       MCDataFragment *ShndxF,
+                                       const MCAssembler &Asm,
+                                       const MCAsmLayout &Layout,
+                                     const SectionIndexMapTy &SectionIndexMap) {
+  // The string table must be emitted first because we need the index
+  // into the string table for all the symbol names.
+  assert(StringTable.size() && "Missing string table");
+
+  // FIXME: Make sure the start of the symbol table is aligned.
+
+  // The first entry is the undefined symbol entry.
+  WriteSymbolEntry(SymtabF, ShndxF, 0, 0, 0, 0, 0, 0, false);
+
+  // Write the symbol table entries.
+  LastLocalSymbolIndex = LocalSymbolData.size() + 1;
+  for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) {
+    ELFSymbolData &MSD = LocalSymbolData[i];
+    WriteSymbol(SymtabF, ShndxF, MSD, Layout);
+  }
+
+  // Write out a symbol table entry for each regular section.
+  for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e;
+       ++i) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF&>(i->getSection());
+    if (Section.getType() == ELF::SHT_RELA ||
+        Section.getType() == ELF::SHT_REL ||
+        Section.getType() == ELF::SHT_STRTAB ||
+        Section.getType() == ELF::SHT_SYMTAB)
+      continue;
+    WriteSymbolEntry(SymtabF, ShndxF, 0, ELF::STT_SECTION, 0, 0,
+                     ELF::STV_DEFAULT, SectionIndexMap.lookup(&Section), false);
+    LastLocalSymbolIndex++;
+  }
+
+  for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) {
+    ELFSymbolData &MSD = ExternalSymbolData[i];
+    MCSymbolData &Data = *MSD.SymbolData;
+    assert(((Data.getFlags() & ELF_STB_Global) ||
+            (Data.getFlags() & ELF_STB_Weak)) &&
+           "External symbol requires STB_GLOBAL or STB_WEAK flag");
+    WriteSymbol(SymtabF, ShndxF, MSD, Layout);
+    if (MCELF::GetBinding(Data) == ELF::STB_LOCAL)
+      LastLocalSymbolIndex++;
+  }
+
+  for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) {
+    ELFSymbolData &MSD = UndefinedSymbolData[i];
+    MCSymbolData &Data = *MSD.SymbolData;
+    WriteSymbol(SymtabF, ShndxF, MSD, Layout);
+    if (MCELF::GetBinding(Data) == ELF::STB_LOCAL)
+      LastLocalSymbolIndex++;
+  }
+}
+
+const MCSymbol *ELFObjectWriter::SymbolToReloc(const MCAssembler &Asm,
+                                               const MCValue &Target,
+                                               const MCFragment &F) const {
+  const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+  const MCSymbol &ASymbol = Symbol.AliasedSymbol();
+  const MCSymbol *Renamed = Renames.lookup(&Symbol);
+  const MCSymbolData &SD = Asm.getSymbolData(Symbol);
+
+  if (ASymbol.isUndefined()) {
+    if (Renamed)
+      return Renamed;
+    return &ASymbol;
+  }
+
+  if (SD.isExternal()) {
+    if (Renamed)
+      return Renamed;
+    return &Symbol;
+  }
+
+  const MCSectionELF &Section =
+    static_cast<const MCSectionELF&>(ASymbol.getSection());
+  const SectionKind secKind = Section.getKind();
+
+  if (secKind.isBSS())
+    return ExplicitRelSym(Asm, Target, F, true);
+
+  if (secKind.isThreadLocal()) {
+    if (Renamed)
+      return Renamed;
+    return &Symbol;
+  }
+
+  MCSymbolRefExpr::VariantKind Kind = Target.getSymA()->getKind();
+  const MCSectionELF &Sec2 =
+    static_cast<const MCSectionELF&>(F.getParent()->getSection());
+
+  if (&Sec2 != &Section &&
+      (Kind == MCSymbolRefExpr::VK_PLT ||
+       Kind == MCSymbolRefExpr::VK_GOTPCREL ||
+       Kind == MCSymbolRefExpr::VK_GOTOFF)) {
+    if (Renamed)
+      return Renamed;
+    return &Symbol;
+  }
+
+  if (Section.getFlags() & ELF::SHF_MERGE) {
+    if (Target.getConstant() == 0)
+      return NULL;
+    if (Renamed)
+      return Renamed;
+    return &Symbol;
+  }
+
+  return ExplicitRelSym(Asm, Target, F, false);
+}
+
+
+void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm,
+                                       const MCAsmLayout &Layout,
+                                       const MCFragment *Fragment,
+                                       const MCFixup &Fixup,
+                                       MCValue Target,
+                                       uint64_t &FixedValue) {
+  int64_t Addend = 0;
+  int Index = 0;
+  int64_t Value = Target.getConstant();
+  const MCSymbol *RelocSymbol = NULL;
+
+  bool IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+  if (!Target.isAbsolute()) {
+    const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+    const MCSymbol &ASymbol = Symbol.AliasedSymbol();
+    RelocSymbol = SymbolToReloc(Asm, Target, *Fragment);
+
+    if (const MCSymbolRefExpr *RefB = Target.getSymB()) {
+      const MCSymbol &SymbolB = RefB->getSymbol();
+      MCSymbolData &SDB = Asm.getSymbolData(SymbolB);
+      IsPCRel = true;
+
+      // Offset of the symbol in the section
+      int64_t a = Layout.getSymbolOffset(&SDB);
+
+      // Ofeset of the relocation in the section
+      int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+      Value += b - a;
+    }
+
+    if (!RelocSymbol) {
+      MCSymbolData &SD = Asm.getSymbolData(ASymbol);
+      MCFragment *F = SD.getFragment();
+
+      Index = F->getParent()->getOrdinal() + 1;
+
+      // Offset of the symbol in the section
+      Value += Layout.getSymbolOffset(&SD);
+    } else {
+      if (Asm.getSymbolData(Symbol).getFlags() & ELF_Other_Weakref)
+        WeakrefUsedInReloc.insert(RelocSymbol);
+      else
+        UsedInReloc.insert(RelocSymbol);
+      Index = -1;
+    }
+    Addend = Value;
+    // Compensate for the addend on i386.
+    if (is64Bit())
+      Value = 0;
+  }
+
+  FixedValue = Value;
+  unsigned Type = GetRelocType(Target, Fixup, IsPCRel,
+                               (RelocSymbol != 0), Addend);
+
+  uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) +
+    Fixup.getOffset();
+
+  if (!hasRelocationAddend())
+    Addend = 0;
+  ELFRelocationEntry ERE(RelocOffset, Index, Type, RelocSymbol, Addend);
+  Relocations[Fragment->getParent()].push_back(ERE);
+}
+
+
+uint64_t
+ELFObjectWriter::getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+                                             const MCSymbol *S) {
+  MCSymbolData &SD = Asm.getSymbolData(*S);
+  return SD.getIndex();
+}
+
+bool ELFObjectWriter::isInSymtab(const MCAssembler &Asm,
+                                 const MCSymbolData &Data,
+                                 bool Used, bool Renamed) {
+  if (Data.getFlags() & ELF_Other_Weakref)
+    return false;
+
+  if (Used)
+    return true;
+
+  if (Renamed)
+    return false;
+
+  const MCSymbol &Symbol = Data.getSymbol();
+
+  if (Symbol.getName() == "_GLOBAL_OFFSET_TABLE_")
+    return true;
+
+  const MCSymbol &A = Symbol.AliasedSymbol();
+  if (Symbol.isVariable() && !A.isVariable() && A.isUndefined())
+    return false;
+
+  bool IsGlobal = MCELF::GetBinding(Data) == ELF::STB_GLOBAL;
+  if (!Symbol.isVariable() && Symbol.isUndefined() && !IsGlobal)
+    return false;
+
+  if (!Asm.isSymbolLinkerVisible(Symbol) && !Symbol.isUndefined())
+    return false;
+
+  if (Symbol.isTemporary())
+    return false;
+
+  return true;
+}
+
+bool ELFObjectWriter::isLocal(const MCSymbolData &Data, bool isSignature,
+                              bool isUsedInReloc) {
+  if (Data.isExternal())
+    return false;
+
+  const MCSymbol &Symbol = Data.getSymbol();
+  const MCSymbol &RefSymbol = Symbol.AliasedSymbol();
+
+  if (RefSymbol.isUndefined() && !RefSymbol.isVariable()) {
+    if (isSignature && !isUsedInReloc)
+      return true;
+
+    return false;
+  }
+
+  return true;
+}
+
+void ELFObjectWriter::ComputeIndexMap(MCAssembler &Asm,
+                                      SectionIndexMapTy &SectionIndexMap) {
+  unsigned Index = 1;
+  for (MCAssembler::iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF &>(it->getSection());
+    if (Section.getType() != ELF::SHT_GROUP)
+      continue;
+    SectionIndexMap[&Section] = Index++;
+  }
+
+  for (MCAssembler::iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF &>(it->getSection());
+    if (Section.getType() == ELF::SHT_GROUP)
+      continue;
+    SectionIndexMap[&Section] = Index++;
+  }
+}
+
+void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm,
+                                      const SectionIndexMapTy &SectionIndexMap,
+                                      RevGroupMapTy RevGroupMap) {
+  // FIXME: Is this the correct place to do this?
+  if (NeedsGOT) {
+    llvm::StringRef Name = "_GLOBAL_OFFSET_TABLE_";
+    MCSymbol *Sym = Asm.getContext().GetOrCreateSymbol(Name);
+    MCSymbolData &Data = Asm.getOrCreateSymbolData(*Sym);
+    Data.setExternal(true);
+    MCELF::SetBinding(Data, ELF::STB_GLOBAL);
+  }
+
+  // Build section lookup table.
+  int NumRegularSections = Asm.size();
+
+  // Index 0 is always the empty string.
+  StringMap<uint64_t> StringIndexMap;
+  StringTable += '\x00';
+
+  // Add the data for the symbols.
+  for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+         ie = Asm.symbol_end(); it != ie; ++it) {
+    const MCSymbol &Symbol = it->getSymbol();
+
+    bool Used = UsedInReloc.count(&Symbol);
+    bool WeakrefUsed = WeakrefUsedInReloc.count(&Symbol);
+    bool isSignature = RevGroupMap.count(&Symbol);
+
+    if (!isInSymtab(Asm, *it,
+                    Used || WeakrefUsed || isSignature,
+                    Renames.count(&Symbol)))
+      continue;
+
+    ELFSymbolData MSD;
+    MSD.SymbolData = it;
+    const MCSymbol &RefSymbol = Symbol.AliasedSymbol();
+
+    // Undefined symbols are global, but this is the first place we
+    // are able to set it.
+    bool Local = isLocal(*it, isSignature, Used);
+    if (!Local && MCELF::GetBinding(*it) == ELF::STB_LOCAL) {
+      MCSymbolData &SD = Asm.getSymbolData(RefSymbol);
+      MCELF::SetBinding(*it, ELF::STB_GLOBAL);
+      MCELF::SetBinding(SD, ELF::STB_GLOBAL);
+    }
+
+    if (RefSymbol.isUndefined() && !Used && WeakrefUsed)
+      MCELF::SetBinding(*it, ELF::STB_WEAK);
+
+    if (it->isCommon()) {
+      assert(!Local);
+      MSD.SectionIndex = ELF::SHN_COMMON;
+    } else if (Symbol.isAbsolute() || RefSymbol.isVariable()) {
+      MSD.SectionIndex = ELF::SHN_ABS;
+    } else if (RefSymbol.isUndefined()) {
+      if (isSignature && !Used)
+        MSD.SectionIndex = SectionIndexMap.lookup(RevGroupMap[&Symbol]);
+      else
+        MSD.SectionIndex = ELF::SHN_UNDEF;
+    } else {
+      const MCSectionELF &Section =
+        static_cast<const MCSectionELF&>(RefSymbol.getSection());
+      MSD.SectionIndex = SectionIndexMap.lookup(&Section);
+      if (MSD.SectionIndex >= ELF::SHN_LORESERVE)
+        NeedsSymtabShndx = true;
+      assert(MSD.SectionIndex && "Invalid section index!");
+    }
+
+    // The @@@ in symbol version is replaced with @ in undefined symbols and
+    // @@ in defined ones.
+    StringRef Name = Symbol.getName();
+    SmallString<32> Buf;
+
+    size_t Pos = Name.find("@@@");
+    if (Pos != StringRef::npos) {
+      Buf += Name.substr(0, Pos);
+      unsigned Skip = MSD.SectionIndex == ELF::SHN_UNDEF ? 2 : 1;
+      Buf += Name.substr(Pos + Skip);
+      Name = Buf;
+    }
+
+    uint64_t &Entry = StringIndexMap[Name];
+    if (!Entry) {
+      Entry = StringTable.size();
+      StringTable += Name;
+      StringTable += '\x00';
+    }
+    MSD.StringIndex = Entry;
+    if (MSD.SectionIndex == ELF::SHN_UNDEF)
+      UndefinedSymbolData.push_back(MSD);
+    else if (Local)
+      LocalSymbolData.push_back(MSD);
+    else
+      ExternalSymbolData.push_back(MSD);
+  }
+
+  // Symbols are required to be in lexicographic order.
+  array_pod_sort(LocalSymbolData.begin(), LocalSymbolData.end());
+  array_pod_sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+  array_pod_sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
+  // Set the symbol indices. Local symbols must come before all other
+  // symbols with non-local bindings.
+  unsigned Index = 1;
+  for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+    LocalSymbolData[i].SymbolData->setIndex(Index++);
+
+  Index += NumRegularSections;
+
+  for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+    ExternalSymbolData[i].SymbolData->setIndex(Index++);
+  for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+    UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+}
+
+void ELFObjectWriter::WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
+                                      const MCSectionData &SD) {
+  if (!Relocations[&SD].empty()) {
+    MCContext &Ctx = Asm.getContext();
+    const MCSectionELF *RelaSection;
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF&>(SD.getSection());
+
+    const StringRef SectionName = Section.getSectionName();
+    std::string RelaSectionName = hasRelocationAddend() ? ".rela" : ".rel";
+    RelaSectionName += SectionName;
+
+    unsigned EntrySize;
+    if (hasRelocationAddend())
+      EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rela) : sizeof(ELF::Elf32_Rela);
+    else
+      EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel);
+
+    RelaSection = Ctx.getELFSection(RelaSectionName, hasRelocationAddend() ?
+                                    ELF::SHT_RELA : ELF::SHT_REL, 0,
+                                    SectionKind::getReadOnly(),
+                                    EntrySize, "");
+
+    MCSectionData &RelaSD = Asm.getOrCreateSectionData(*RelaSection);
+    RelaSD.setAlignment(is64Bit() ? 8 : 4);
+
+    MCDataFragment *F = new MCDataFragment(&RelaSD);
+
+    WriteRelocationsFragment(Asm, F, &SD);
+  }
+}
+
+void ELFObjectWriter::WriteSecHdrEntry(uint32_t Name, uint32_t Type,
+                                       uint64_t Flags, uint64_t Address,
+                                       uint64_t Offset, uint64_t Size,
+                                       uint32_t Link, uint32_t Info,
+                                       uint64_t Alignment,
+                                       uint64_t EntrySize) {
+  Write32(Name);        // sh_name: index into string table
+  Write32(Type);        // sh_type
+  WriteWord(Flags);     // sh_flags
+  WriteWord(Address);   // sh_addr
+  WriteWord(Offset);    // sh_offset
+  WriteWord(Size);      // sh_size
+  Write32(Link);        // sh_link
+  Write32(Info);        // sh_info
+  WriteWord(Alignment); // sh_addralign
+  WriteWord(EntrySize); // sh_entsize
+}
+
+void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm,
+                                               MCDataFragment *F,
+                                               const MCSectionData *SD) {
+  std::vector<ELFRelocationEntry> &Relocs = Relocations[SD];
+  // sort by the r_offset just like gnu as does
+  array_pod_sort(Relocs.begin(), Relocs.end());
+
+  for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+    ELFRelocationEntry entry = Relocs[e - i - 1];
+
+    if (!entry.Index)
+      ;
+    else if (entry.Index < 0)
+      entry.Index = getSymbolIndexInSymbolTable(Asm, entry.Symbol);
+    else
+      entry.Index += LocalSymbolData.size();
+    if (is64Bit()) {
+      String64(*F, entry.r_offset);
+
+      struct ELF::Elf64_Rela ERE64;
+      ERE64.setSymbolAndType(entry.Index, entry.Type);
+      String64(*F, ERE64.r_info);
+
+      if (hasRelocationAddend())
+        String64(*F, entry.r_addend);
+    } else {
+      String32(*F, entry.r_offset);
+
+      struct ELF::Elf32_Rela ERE32;
+      ERE32.setSymbolAndType(entry.Index, entry.Type);
+      String32(*F, ERE32.r_info);
+
+      if (hasRelocationAddend())
+        String32(*F, entry.r_addend);
+    }
+  }
+}
+
+void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm,
+                                             MCAsmLayout &Layout,
+                                    const SectionIndexMapTy &SectionIndexMap) {
+  MCContext &Ctx = Asm.getContext();
+  MCDataFragment *F;
+
+  unsigned EntrySize = is64Bit() ? ELF::SYMENTRY_SIZE64 : ELF::SYMENTRY_SIZE32;
+
+  // We construct .shstrtab, .symtab and .strtab in this order to match gnu as.
+  const MCSectionELF *ShstrtabSection =
+    Ctx.getELFSection(".shstrtab", ELF::SHT_STRTAB, 0,
+                      SectionKind::getReadOnly());
+  MCSectionData &ShstrtabSD = Asm.getOrCreateSectionData(*ShstrtabSection);
+  ShstrtabSD.setAlignment(1);
+  ShstrtabIndex = Asm.size();
+
+  const MCSectionELF *SymtabSection =
+    Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0,
+                      SectionKind::getReadOnly(),
+                      EntrySize, "");
+  MCSectionData &SymtabSD = Asm.getOrCreateSectionData(*SymtabSection);
+  SymtabSD.setAlignment(is64Bit() ? 8 : 4);
+  SymbolTableIndex = Asm.size();
+
+  MCSectionData *SymtabShndxSD = NULL;
+
+  if (NeedsSymtabShndx) {
+    const MCSectionELF *SymtabShndxSection =
+      Ctx.getELFSection(".symtab_shndx", ELF::SHT_SYMTAB_SHNDX, 0,
+                        SectionKind::getReadOnly(), 4, "");
+    SymtabShndxSD = &Asm.getOrCreateSectionData(*SymtabShndxSection);
+    SymtabShndxSD->setAlignment(4);
+  }
+
+  const MCSection *StrtabSection;
+  StrtabSection = Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0,
+                                    SectionKind::getReadOnly());
+  MCSectionData &StrtabSD = Asm.getOrCreateSectionData(*StrtabSection);
+  StrtabSD.setAlignment(1);
+  StringTableIndex = Asm.size();
+
+  WriteRelocations(Asm, Layout);
+
+  // Symbol table
+  F = new MCDataFragment(&SymtabSD);
+  MCDataFragment *ShndxF = NULL;
+  if (NeedsSymtabShndx) {
+    ShndxF = new MCDataFragment(SymtabShndxSD);
+  }
+  WriteSymbolTable(F, ShndxF, Asm, Layout, SectionIndexMap);
+
+  F = new MCDataFragment(&StrtabSD);
+  F->getContents().append(StringTable.begin(), StringTable.end());
+
+  F = new MCDataFragment(&ShstrtabSD);
+
+  // Section header string table.
+  //
+  // The first entry of a string table holds a null character so skip
+  // section 0.
+  uint64_t Index = 1;
+  F->getContents() += '\x00';
+
+  StringMap<uint64_t> SecStringMap;
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF&>(it->getSection());
+    // FIXME: We could merge suffixes like in .text and .rela.text.
+
+    StringRef Name = Section.getSectionName();
+    if (SecStringMap.count(Name)) {
+      SectionStringTableIndex[&Section] =  SecStringMap[Name];
+      continue;
+    }
+    // Remember the index into the string table so we can write it
+    // into the sh_name field of the section header table.
+    SectionStringTableIndex[&Section] = Index;
+    SecStringMap[Name] = Index;
+
+    Index += Name.size() + 1;
+    F->getContents() += Name;
+    F->getContents() += '\x00';
+  }
+}
+
+void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm,
+                                            MCAsmLayout &Layout,
+                                            GroupMapTy &GroupMap,
+                                            RevGroupMapTy &RevGroupMap) {
+  // Create the .note.GNU-stack section if needed.
+  MCContext &Ctx = Asm.getContext();
+  if (Asm.getNoExecStack()) {
+    const MCSectionELF *GnuStackSection =
+      Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, 0,
+                        SectionKind::getReadOnly());
+    Asm.getOrCreateSectionData(*GnuStackSection);
+  }
+
+  // Build the groups
+  for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
+       it != ie; ++it) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF&>(it->getSection());
+    if (!(Section.getFlags() & ELF::SHF_GROUP))
+      continue;
+
+    const MCSymbol *SignatureSymbol = Section.getGroup();
+    Asm.getOrCreateSymbolData(*SignatureSymbol);
+    const MCSectionELF *&Group = RevGroupMap[SignatureSymbol];
+    if (!Group) {
+      Group = Ctx.CreateELFGroupSection();
+      MCSectionData &Data = Asm.getOrCreateSectionData(*Group);
+      Data.setAlignment(4);
+      MCDataFragment *F = new MCDataFragment(&Data);
+      String32(*F, ELF::GRP_COMDAT);
+    }
+    GroupMap[Group] = SignatureSymbol;
+  }
+
+  // Add sections to the groups
+  unsigned Index = 1;
+  unsigned NumGroups = RevGroupMap.size();
+  for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
+       it != ie; ++it, ++Index) {
+    const MCSectionELF &Section =
+      static_cast<const MCSectionELF&>(it->getSection());
+    if (!(Section.getFlags() & ELF::SHF_GROUP))
+      continue;
+    const MCSectionELF *Group = RevGroupMap[Section.getGroup()];
+    MCSectionData &Data = Asm.getOrCreateSectionData(*Group);
+    // FIXME: we could use the previous fragment
+    MCDataFragment *F = new MCDataFragment(&Data);
+    String32(*F, NumGroups + Index);
+  }
+}
+
+void ELFObjectWriter::WriteSection(MCAssembler &Asm,
+                                   const SectionIndexMapTy &SectionIndexMap,
+                                   uint32_t GroupSymbolIndex,
+                                   uint64_t Offset, uint64_t Size,
+                                   uint64_t Alignment,
+                                   const MCSectionELF &Section) {
+  uint64_t sh_link = 0;
+  uint64_t sh_info = 0;
+
+  switch(Section.getType()) {
+  case ELF::SHT_DYNAMIC:
+    sh_link = SectionStringTableIndex[&Section];
+    sh_info = 0;
+    break;
+
+  case ELF::SHT_REL:
+  case ELF::SHT_RELA: {
+    const MCSectionELF *SymtabSection;
+    const MCSectionELF *InfoSection;
+    SymtabSection = Asm.getContext().getELFSection(".symtab", ELF::SHT_SYMTAB,
+                                                   0,
+                                                   SectionKind::getReadOnly());
+    sh_link = SectionIndexMap.lookup(SymtabSection);
+    assert(sh_link && ".symtab not found");
+
+    // Remove ".rel" and ".rela" prefixes.
+    unsigned SecNameLen = (Section.getType() == ELF::SHT_REL) ? 4 : 5;
+    StringRef SectionName = Section.getSectionName().substr(SecNameLen);
+
+    InfoSection = Asm.getContext().getELFSection(SectionName,
+                                                 ELF::SHT_PROGBITS, 0,
+                                                 SectionKind::getReadOnly());
+    sh_info = SectionIndexMap.lookup(InfoSection);
+    break;
+  }
+
+  case ELF::SHT_SYMTAB:
+  case ELF::SHT_DYNSYM:
+    sh_link = StringTableIndex;
+    sh_info = LastLocalSymbolIndex;
+    break;
+
+  case ELF::SHT_SYMTAB_SHNDX:
+    sh_link = SymbolTableIndex;
+    break;
+
+  case ELF::SHT_PROGBITS:
+  case ELF::SHT_STRTAB:
+  case ELF::SHT_NOBITS:
+  case ELF::SHT_NOTE:
+  case ELF::SHT_NULL:
+  case ELF::SHT_ARM_ATTRIBUTES:
+  case ELF::SHT_INIT_ARRAY:
+  case ELF::SHT_FINI_ARRAY:
+  case ELF::SHT_PREINIT_ARRAY:
+  case ELF::SHT_X86_64_UNWIND:
+    // Nothing to do.
+    break;
+
+  case ELF::SHT_GROUP: {
+    sh_link = SymbolTableIndex;
+    sh_info = GroupSymbolIndex;
+    break;
+  }
+
+  default:
+    assert(0 && "FIXME: sh_type value not supported!");
+    break;
+  }
+
+  WriteSecHdrEntry(SectionStringTableIndex[&Section], Section.getType(),
+                   Section.getFlags(), 0, Offset, Size, sh_link, sh_info,
+                   Alignment, Section.getEntrySize());
+}
+
+bool ELFObjectWriter::IsELFMetaDataSection(const MCSectionData &SD) {
+  return SD.getOrdinal() == ~UINT32_C(0) &&
+    !SD.getSection().isVirtualSection();
+}
+
+uint64_t ELFObjectWriter::DataSectionSize(const MCSectionData &SD) {
+  uint64_t Ret = 0;
+  for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
+       ++i) {
+    const MCFragment &F = *i;
+    assert(F.getKind() == MCFragment::FT_Data);
+    Ret += cast<MCDataFragment>(F).getContents().size();
+  }
+  return Ret;
+}
+
+uint64_t ELFObjectWriter::GetSectionFileSize(const MCAsmLayout &Layout,
+                                             const MCSectionData &SD) {
+  if (IsELFMetaDataSection(SD))
+    return DataSectionSize(SD);
+  return Layout.getSectionFileSize(&SD);
+}
+
+uint64_t ELFObjectWriter::GetSectionAddressSize(const MCAsmLayout &Layout,
+                                                const MCSectionData &SD) {
+  if (IsELFMetaDataSection(SD))
+    return DataSectionSize(SD);
+  return Layout.getSectionAddressSize(&SD);
+}
+
+void ELFObjectWriter::WriteDataSectionData(ELFObjectWriter *W,
+                                           const MCSectionData &SD) {
+  for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e;
+       ++i) {
+    const MCFragment &F = *i;
+    assert(F.getKind() == MCFragment::FT_Data);
+    W->WriteBytes(cast<MCDataFragment>(F).getContents().str());
+  }
+}
+
+void ELFObjectWriter::WriteObject(MCAssembler &Asm,
+                                  const MCAsmLayout &Layout) {
+  GroupMapTy GroupMap;
+  RevGroupMapTy RevGroupMap;
+  CreateIndexedSections(Asm, const_cast<MCAsmLayout&>(Layout), GroupMap,
+                        RevGroupMap);
+
+  SectionIndexMapTy SectionIndexMap;
+
+  ComputeIndexMap(Asm, SectionIndexMap);
+
+  // Compute symbol table information.
+  ComputeSymbolTable(Asm, SectionIndexMap, RevGroupMap);
+
+  CreateMetadataSections(const_cast<MCAssembler&>(Asm),
+                         const_cast<MCAsmLayout&>(Layout),
+                         SectionIndexMap);
+
+  // Update to include the metadata sections.
+  ComputeIndexMap(Asm, SectionIndexMap);
+
+  // Add 1 for the null section.
+  unsigned NumSections = Asm.size() + 1;
+  uint64_t NaturalAlignment = is64Bit() ? 8 : 4;
+  uint64_t HeaderSize = is64Bit() ? sizeof(ELF::Elf64_Ehdr) :
+                                    sizeof(ELF::Elf32_Ehdr);
+  uint64_t FileOff = HeaderSize;
+
+  std::vector<const MCSectionELF*> Sections;
+  Sections.resize(NumSections);
+
+  for (SectionIndexMapTy::const_iterator i=
+         SectionIndexMap.begin(), e = SectionIndexMap.end(); i != e; ++i) {
+    const std::pair<const MCSectionELF*, uint32_t> &p = *i;
+    Sections[p.second] = p.first;
+  }
+
+  for (unsigned i = 1; i < NumSections; ++i) {
+    const MCSectionELF &Section = *Sections[i];
+    const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+
+    FileOff = RoundUpToAlignment(FileOff, SD.getAlignment());
+
+    // Get the size of the section in the output file (including padding).
+    FileOff += GetSectionFileSize(Layout, SD);
+  }
+
+  FileOff = RoundUpToAlignment(FileOff, NaturalAlignment);
+
+  // Write out the ELF header ...
+  WriteHeader(FileOff - HeaderSize, NumSections);
+
+  FileOff = HeaderSize;
+
+  // ... then all of the sections ...
+  DenseMap<const MCSection*, uint64_t> SectionOffsetMap;
+
+  for (unsigned i = 1; i < NumSections; ++i) {
+    const MCSectionELF &Section = *Sections[i];
+    const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+
+    uint64_t Padding = OffsetToAlignment(FileOff, SD.getAlignment());
+    WriteZeros(Padding);
+    FileOff += Padding;
+
+    // Remember the offset into the file for this section.
+    SectionOffsetMap[&Section] = FileOff;
+
+    FileOff += GetSectionFileSize(Layout, SD);
+
+    if (IsELFMetaDataSection(SD))
+      WriteDataSectionData(this, SD);
+    else
+      Asm.WriteSectionData(&SD, Layout);
+  }
+
+  uint64_t Padding = OffsetToAlignment(FileOff, NaturalAlignment);
+  WriteZeros(Padding);
+  FileOff += Padding;
+
+  // ... and then the section header table.
+  // Should we align the section header table?
+  //
+  // Null section first.
+  uint64_t FirstSectionSize =
+    NumSections >= ELF::SHN_LORESERVE ? NumSections : 0;
+  uint32_t FirstSectionLink =
+    ShstrtabIndex >= ELF::SHN_LORESERVE ? ShstrtabIndex : 0;
+  WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, FirstSectionLink, 0, 0, 0);
+
+  for (unsigned i = 1; i < NumSections; ++i) {
+    const MCSectionELF &Section = *Sections[i];
+    const MCSectionData &SD = Asm.getOrCreateSectionData(Section);
+    uint32_t GroupSymbolIndex;
+    if (Section.getType() != ELF::SHT_GROUP)
+      GroupSymbolIndex = 0;
+    else
+      GroupSymbolIndex = getSymbolIndexInSymbolTable(Asm, GroupMap[&Section]);
+
+    uint64_t Size = GetSectionAddressSize(Layout, SD);
+
+    WriteSection(Asm, SectionIndexMap, GroupSymbolIndex,
+                 SectionOffsetMap[&Section], Size,
+                 SD.getAlignment(), Section);
+  }
+}
+
+bool
+ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                                      const MCSymbolData &DataA,
+                                                      const MCFragment &FB,
+                                                      bool InSet,
+                                                      bool IsPCRel) const {
+  if (DataA.getFlags() & ELF_STB_Weak)
+    return false;
+  return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(
+                                                 Asm, DataA, FB,InSet, IsPCRel);
+}
+
+MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                                            raw_ostream &OS,
+                                            bool IsLittleEndian) {
+  switch (MOTW->getEMachine()) {
+    case ELF::EM_386:
+    case ELF::EM_X86_64:
+      return new X86ELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+    case ELF::EM_ARM:
+      return new ARMELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+    case ELF::EM_MBLAZE:
+      return new MBlazeELFObjectWriter(MOTW, OS, IsLittleEndian); break;
+    default: llvm_unreachable("Unsupported architecture"); break;
+  }
+}
+
+
+/// START OF SUBCLASSES for ELFObjectWriter
+//===- ARMELFObjectWriter -------------------------------------------===//
+
+ARMELFObjectWriter::ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                                       raw_ostream &_OS,
+                                       bool IsLittleEndian)
+  : ELFObjectWriter(MOTW, _OS, IsLittleEndian)
+{}
+
+ARMELFObjectWriter::~ARMELFObjectWriter()
+{}
+
+// FIXME: get the real EABI Version from the Triple.
+void ARMELFObjectWriter::WriteEFlags() {
+  Write32(ELF::EF_ARM_EABIMASK & DefaultEABIVersion);
+}
+
+// In ARM, _MergedGlobals and other most symbols get emitted directly.
+// I.e. not as an offset to a section symbol.
+// This code is a first-cut approximation of what ARM/gcc does.
+
+const MCSymbol *ARMELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+                                                   const MCValue &Target,
+                                                   const MCFragment &F,
+                                                   bool IsBSS) const {
+  const MCSymbol &Symbol = Target.getSymA()->getSymbol();
+  bool EmitThisSym = false;
+
+  if (IsBSS) {
+    EmitThisSym = StringSwitch<bool>(Symbol.getName())
+      .Case("_MergedGlobals", true)
+      .Default(false);
+  } else {
+    EmitThisSym = StringSwitch<bool>(Symbol.getName())
+      .Case("_MergedGlobals", true)
+      .StartsWith(".L.str", true)
+      .Default(false);
+  }
+  if (EmitThisSym)
+    return &Symbol;
+  if (! Symbol.isTemporary())
+    return &Symbol;
+  return NULL;
+}
+
+unsigned ARMELFObjectWriter::GetRelocType(const MCValue &Target,
+                                          const MCFixup &Fixup,
+                                          bool IsPCRel,
+                                          bool IsRelocWithSymbol,
+                                          int64_t Addend) {
+  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
+  unsigned Type = 0;
+  if (IsPCRel) {
+    switch ((unsigned)Fixup.getKind()) {
+    default: assert(0 && "Unimplemented");
+    case FK_Data_4:
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_ARM_BASE_PREL;
+        break;
+      case MCSymbolRefExpr::VK_ARM_TLSGD:
+        assert(0 && "unimplemented");
+        break;
+      case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+        Type = ELF::R_ARM_TLS_IE32;
+        break;
+      }
+      break;
+    case ARM::fixup_arm_uncondbranch:
+      switch (Modifier) {
+      case MCSymbolRefExpr::VK_ARM_PLT:
+        Type = ELF::R_ARM_PLT32;
+        break;
+      default:
+        Type = ELF::R_ARM_CALL;
+        break;
+      }
+      break;
+    case ARM::fixup_arm_condbranch:
+      Type = ELF::R_ARM_JUMP24;
+      break;
+    case ARM::fixup_arm_movt_hi16:
+    case ARM::fixup_arm_movt_hi16_pcrel:
+      Type = ELF::R_ARM_MOVT_PREL;
+      break;
+    case ARM::fixup_arm_movw_lo16:
+    case ARM::fixup_arm_movw_lo16_pcrel:
+      Type = ELF::R_ARM_MOVW_PREL_NC;
+      break;
+    case ARM::fixup_t2_movt_hi16:
+    case ARM::fixup_t2_movt_hi16_pcrel:
+      Type = ELF::R_ARM_THM_MOVT_PREL;
+      break;
+    case ARM::fixup_t2_movw_lo16:
+    case ARM::fixup_t2_movw_lo16_pcrel:
+      Type = ELF::R_ARM_THM_MOVW_PREL_NC;
+      break;
+    }
+  } else {
+    switch ((unsigned)Fixup.getKind()) {
+    default: llvm_unreachable("invalid fixup kind!");
+    case FK_Data_4:
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier"); break;
+      case MCSymbolRefExpr::VK_ARM_GOT:
+        Type = ELF::R_ARM_GOT_BREL;
+        break;
+      case MCSymbolRefExpr::VK_ARM_TLSGD:
+        Type = ELF::R_ARM_TLS_GD32;
+        break;
+      case MCSymbolRefExpr::VK_ARM_TPOFF:
+        Type = ELF::R_ARM_TLS_LE32;
+        break;
+      case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+        Type = ELF::R_ARM_TLS_IE32;
+        break;
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_ARM_ABS32;
+        break;
+      case MCSymbolRefExpr::VK_ARM_GOTOFF:
+        Type = ELF::R_ARM_GOTOFF32;
+        break;
+      }
+      break;
+    case ARM::fixup_arm_ldst_pcrel_12:
+    case ARM::fixup_arm_pcrel_10:
+    case ARM::fixup_arm_adr_pcrel_12:
+    case ARM::fixup_arm_thumb_bl:
+    case ARM::fixup_arm_thumb_cb:
+    case ARM::fixup_arm_thumb_cp:
+    case ARM::fixup_arm_thumb_br:
+      assert(0 && "Unimplemented");
+      break;
+    case ARM::fixup_arm_uncondbranch:
+      Type = ELF::R_ARM_CALL;
+      break;
+    case ARM::fixup_arm_condbranch:
+      Type = ELF::R_ARM_JUMP24;
+      break;
+    case ARM::fixup_arm_movt_hi16:
+      Type = ELF::R_ARM_MOVT_ABS;
+      break;
+    case ARM::fixup_arm_movw_lo16:
+      Type = ELF::R_ARM_MOVW_ABS_NC;
+      break;
+    case ARM::fixup_t2_movt_hi16:
+      Type = ELF::R_ARM_THM_MOVT_ABS;
+      break;
+    case ARM::fixup_t2_movw_lo16:
+      Type = ELF::R_ARM_THM_MOVW_ABS_NC;
+      break;
+    }
+  }
+
+  if (RelocNeedsGOT(Modifier))
+    NeedsGOT = true;
+
+  return Type;
+}
+
+//===- MBlazeELFObjectWriter -------------------------------------------===//
+
+MBlazeELFObjectWriter::MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                                             raw_ostream &_OS,
+                                             bool IsLittleEndian)
+  : ELFObjectWriter(MOTW, _OS, IsLittleEndian) {
+}
+
+MBlazeELFObjectWriter::~MBlazeELFObjectWriter() {
+}
+
+unsigned MBlazeELFObjectWriter::GetRelocType(const MCValue &Target,
+                                             const MCFixup &Fixup,
+                                             bool IsPCRel,
+                                             bool IsRelocWithSymbol,
+                                             int64_t Addend) {
+  // determine the type of the relocation
+  unsigned Type;
+  if (IsPCRel) {
+    switch ((unsigned)Fixup.getKind()) {
+    default:
+      llvm_unreachable("Unimplemented");
+    case FK_PCRel_4:
+      Type = ELF::R_MICROBLAZE_64_PCREL;
+      break;
+    case FK_PCRel_2:
+      Type = ELF::R_MICROBLAZE_32_PCREL;
+      break;
+    }
+  } else {
+    switch ((unsigned)Fixup.getKind()) {
+    default: llvm_unreachable("invalid fixup kind!");
+    case FK_Data_4:
+      Type = ((IsRelocWithSymbol || Addend !=0)
+              ? ELF::R_MICROBLAZE_32
+              : ELF::R_MICROBLAZE_64);
+      break;
+    case FK_Data_2:
+      Type = ELF::R_MICROBLAZE_32;
+      break;
+    }
+  }
+  return Type;
+}
+
+//===- X86ELFObjectWriter -------------------------------------------===//
+
+
+X86ELFObjectWriter::X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                                       raw_ostream &_OS,
+                                       bool IsLittleEndian)
+  : ELFObjectWriter(MOTW, _OS, IsLittleEndian)
+{}
+
+X86ELFObjectWriter::~X86ELFObjectWriter()
+{}
+
+unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target,
+                                          const MCFixup &Fixup,
+                                          bool IsPCRel,
+                                          bool IsRelocWithSymbol,
+                                          int64_t Addend) {
+  // determine the type of the relocation
+
+  MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+    MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+  unsigned Type;
+  if (is64Bit()) {
+    if (IsPCRel) {
+      switch ((unsigned)Fixup.getKind()) {
+      default: llvm_unreachable("invalid fixup kind!");
+      case FK_PCRel_8:
+        assert(Modifier == MCSymbolRefExpr::VK_None);
+        Type = ELF::R_X86_64_PC64;
+        break;
+      case X86::reloc_signed_4byte:
+      case X86::reloc_riprel_4byte_movq_load:
+      case FK_Data_4: // FIXME?
+      case X86::reloc_riprel_4byte:
+      case FK_PCRel_4:
+        switch (Modifier) {
+        default:
+          llvm_unreachable("Unimplemented");
+        case MCSymbolRefExpr::VK_None:
+          Type = ELF::R_X86_64_PC32;
+          break;
+        case MCSymbolRefExpr::VK_PLT:
+          Type = ELF::R_X86_64_PLT32;
+          break;
+        case MCSymbolRefExpr::VK_GOTPCREL:
+          Type = ELF::R_X86_64_GOTPCREL;
+          break;
+        case MCSymbolRefExpr::VK_GOTTPOFF:
+          Type = ELF::R_X86_64_GOTTPOFF;
+        break;
+        case MCSymbolRefExpr::VK_TLSGD:
+          Type = ELF::R_X86_64_TLSGD;
+          break;
+        case MCSymbolRefExpr::VK_TLSLD:
+          Type = ELF::R_X86_64_TLSLD;
+          break;
+        }
+        break;
+      case FK_PCRel_2:
+        assert(Modifier == MCSymbolRefExpr::VK_None);
+        Type = ELF::R_X86_64_PC16;
+        break;
+      case FK_PCRel_1:
+        assert(Modifier == MCSymbolRefExpr::VK_None);
+        Type = ELF::R_X86_64_PC8;
+        break;
+      }
+    } else {
+      switch ((unsigned)Fixup.getKind()) {
+      default: llvm_unreachable("invalid fixup kind!");
+      case FK_Data_8: Type = ELF::R_X86_64_64; break;
+      case X86::reloc_signed_4byte:
+        assert(isInt<32>(Target.getConstant()));
+        switch (Modifier) {
+        default:
+          llvm_unreachable("Unimplemented");
+        case MCSymbolRefExpr::VK_None:
+          Type = ELF::R_X86_64_32S;
+          break;
+        case MCSymbolRefExpr::VK_GOT:
+          Type = ELF::R_X86_64_GOT32;
+          break;
+        case MCSymbolRefExpr::VK_GOTPCREL:
+          Type = ELF::R_X86_64_GOTPCREL;
+          break;
+        case MCSymbolRefExpr::VK_TPOFF:
+          Type = ELF::R_X86_64_TPOFF32;
+          break;
+        case MCSymbolRefExpr::VK_DTPOFF:
+          Type = ELF::R_X86_64_DTPOFF32;
+          break;
+        }
+        break;
+      case FK_Data_4:
+        Type = ELF::R_X86_64_32;
+        break;
+      case FK_Data_2: Type = ELF::R_X86_64_16; break;
+      case FK_PCRel_1:
+      case FK_Data_1: Type = ELF::R_X86_64_8; break;
+      }
+    }
+  } else {
+    if (IsPCRel) {
+      switch (Modifier) {
+      default:
+        llvm_unreachable("Unimplemented");
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_386_PC32;
+        break;
+      case MCSymbolRefExpr::VK_PLT:
+        Type = ELF::R_386_PLT32;
+        break;
+      }
+    } else {
+      switch ((unsigned)Fixup.getKind()) {
+      default: llvm_unreachable("invalid fixup kind!");
+
+      case X86::reloc_global_offset_table:
+        Type = ELF::R_386_GOTPC;
+        break;
+
+      // FIXME: Should we avoid selecting reloc_signed_4byte in 32 bit mode
+      // instead?
+      case X86::reloc_signed_4byte:
+      case FK_PCRel_4:
+      case FK_Data_4:
+        switch (Modifier) {
+        default:
+          llvm_unreachable("Unimplemented");
+        case MCSymbolRefExpr::VK_None:
+          Type = ELF::R_386_32;
+          break;
+        case MCSymbolRefExpr::VK_GOT:
+          Type = ELF::R_386_GOT32;
+          break;
+        case MCSymbolRefExpr::VK_GOTOFF:
+          Type = ELF::R_386_GOTOFF;
+          break;
+        case MCSymbolRefExpr::VK_TLSGD:
+          Type = ELF::R_386_TLS_GD;
+          break;
+        case MCSymbolRefExpr::VK_TPOFF:
+          Type = ELF::R_386_TLS_LE_32;
+          break;
+        case MCSymbolRefExpr::VK_INDNTPOFF:
+          Type = ELF::R_386_TLS_IE;
+          break;
+        case MCSymbolRefExpr::VK_NTPOFF:
+          Type = ELF::R_386_TLS_LE;
+          break;
+        case MCSymbolRefExpr::VK_GOTNTPOFF:
+          Type = ELF::R_386_TLS_GOTIE;
+          break;
+        case MCSymbolRefExpr::VK_TLSLDM:
+          Type = ELF::R_386_TLS_LDM;
+          break;
+        case MCSymbolRefExpr::VK_DTPOFF:
+          Type = ELF::R_386_TLS_LDO_32;
+          break;
+        }
+        break;
+      case FK_Data_2: Type = ELF::R_386_16; break;
+      case FK_PCRel_1:
+      case FK_Data_1: Type = ELF::R_386_8; break;
+      }
+    }
+  }
+
+  if (RelocNeedsGOT(Modifier))
+    NeedsGOT = true;
+
+  return Type;
+}
diff --git a/final/lib/MC/ELFObjectWriter.h b/final/lib/MC/ELFObjectWriter.h
new file mode 100644
index 00000000000..9457623556d
--- /dev/null
+++ b/final/lib/MC/ELFObjectWriter.h
@@ -0,0 +1,391 @@
+//===- lib/MC/ELFObjectWriter.h - ELF File Writer -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_ELFOBJECTWRITER_H
+#define LLVM_MC_ELFOBJECTWRITER_H
+
+#include "MCELF.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+
+#include <vector>
+
+namespace llvm {
+
+class MCSection;
+class MCDataFragment;
+class MCSectionELF;
+
+class ELFObjectWriter : public MCObjectWriter {
+  protected:
+
+    static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind);
+    static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant);
+    static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout);
+    static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data,
+                           bool Used, bool Renamed);
+    static bool isLocal(const MCSymbolData &Data, bool isSignature,
+                        bool isUsedInReloc);
+    static bool IsELFMetaDataSection(const MCSectionData &SD);
+    static uint64_t DataSectionSize(const MCSectionData &SD);
+    static uint64_t GetSectionFileSize(const MCAsmLayout &Layout,
+                                       const MCSectionData &SD);
+    static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout,
+                                          const MCSectionData &SD);
+    static void WriteDataSectionData(ELFObjectWriter *W,
+                                     const MCSectionData &SD);
+
+    /*static bool isFixupKindX86RIPRel(unsigned Kind) {
+      return Kind == X86::reloc_riprel_4byte ||
+        Kind == X86::reloc_riprel_4byte_movq_load;
+    }*/
+
+    /// ELFSymbolData - Helper struct for containing some precomputed
+    /// information on symbols.
+    struct ELFSymbolData {
+      MCSymbolData *SymbolData;
+      uint64_t StringIndex;
+      uint32_t SectionIndex;
+
+      // Support lexicographic sorting.
+      bool operator<(const ELFSymbolData &RHS) const {
+        if (MCELF::GetType(*SymbolData) == ELF::STT_FILE)
+          return true;
+        if (MCELF::GetType(*RHS.SymbolData) == ELF::STT_FILE)
+          return false;
+        return SymbolData->getSymbol().getName() <
+               RHS.SymbolData->getSymbol().getName();
+      }
+    };
+
+    /// @name Relocation Data
+    /// @{
+
+    struct ELFRelocationEntry {
+      // Make these big enough for both 32-bit and 64-bit
+      uint64_t r_offset;
+      int Index;
+      unsigned Type;
+      const MCSymbol *Symbol;
+      uint64_t r_addend;
+
+      ELFRelocationEntry()
+        : r_offset(0), Index(0), Type(0), Symbol(0), r_addend(0) {}
+
+      ELFRelocationEntry(uint64_t RelocOffset, int Idx,
+                         unsigned RelType, const MCSymbol *Sym,
+                         uint64_t Addend)
+        : r_offset(RelocOffset), Index(Idx), Type(RelType),
+          Symbol(Sym), r_addend(Addend) {}
+
+      // Support lexicographic sorting.
+      bool operator<(const ELFRelocationEntry &RE) const {
+        return RE.r_offset < r_offset;
+      }
+    };
+
+    /// The target specific ELF writer instance.
+    llvm::OwningPtr<MCELFObjectTargetWriter> TargetObjectWriter;
+
+    SmallPtrSet<const MCSymbol *, 16> UsedInReloc;
+    SmallPtrSet<const MCSymbol *, 16> WeakrefUsedInReloc;
+    DenseMap<const MCSymbol *, const MCSymbol *> Renames;
+
+    llvm::DenseMap<const MCSectionData*,
+                   std::vector<ELFRelocationEntry> > Relocations;
+    DenseMap<const MCSection*, uint64_t> SectionStringTableIndex;
+
+    /// @}
+    /// @name Symbol Table Data
+    /// @{
+
+    SmallString<256> StringTable;
+    std::vector<ELFSymbolData> LocalSymbolData;
+    std::vector<ELFSymbolData> ExternalSymbolData;
+    std::vector<ELFSymbolData> UndefinedSymbolData;
+
+    /// @}
+
+    bool NeedsGOT;
+
+    bool NeedsSymtabShndx;
+
+    // This holds the symbol table index of the last local symbol.
+    unsigned LastLocalSymbolIndex;
+    // This holds the .strtab section index.
+    unsigned StringTableIndex;
+    // This holds the .symtab section index.
+    unsigned SymbolTableIndex;
+
+    unsigned ShstrtabIndex;
+
+
+    const MCSymbol *SymbolToReloc(const MCAssembler &Asm,
+                                  const MCValue &Target,
+                                  const MCFragment &F) const;
+
+    // For arch-specific emission of explicit reloc symbol
+    virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+                                           const MCValue &Target,
+                                           const MCFragment &F,
+                                           bool IsBSS) const {
+      return NULL;
+    }
+
+    bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
+    bool hasRelocationAddend() const {
+      return TargetObjectWriter->hasRelocationAddend();
+    }
+
+  public:
+    ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                    raw_ostream &_OS, bool IsLittleEndian)
+      : MCObjectWriter(_OS, IsLittleEndian),
+        TargetObjectWriter(MOTW),
+        NeedsGOT(false), NeedsSymtabShndx(false){
+    }
+
+    virtual ~ELFObjectWriter();
+
+    void WriteWord(uint64_t W) {
+      if (is64Bit())
+        Write64(W);
+      else
+        Write32(W);
+    }
+
+    void StringLE16(char *buf, uint16_t Value) {
+      buf[0] = char(Value >> 0);
+      buf[1] = char(Value >> 8);
+    }
+
+    void StringLE32(char *buf, uint32_t Value) {
+      StringLE16(buf, uint16_t(Value >> 0));
+      StringLE16(buf + 2, uint16_t(Value >> 16));
+    }
+
+    void StringLE64(char *buf, uint64_t Value) {
+      StringLE32(buf, uint32_t(Value >> 0));
+      StringLE32(buf + 4, uint32_t(Value >> 32));
+    }
+
+    void StringBE16(char *buf ,uint16_t Value) {
+      buf[0] = char(Value >> 8);
+      buf[1] = char(Value >> 0);
+    }
+
+    void StringBE32(char *buf, uint32_t Value) {
+      StringBE16(buf, uint16_t(Value >> 16));
+      StringBE16(buf + 2, uint16_t(Value >> 0));
+    }
+
+    void StringBE64(char *buf, uint64_t Value) {
+      StringBE32(buf, uint32_t(Value >> 32));
+      StringBE32(buf + 4, uint32_t(Value >> 0));
+    }
+
+    void String8(MCDataFragment &F, uint8_t Value) {
+      char buf[1];
+      buf[0] = Value;
+      F.getContents() += StringRef(buf, 1);
+    }
+
+    void String16(MCDataFragment &F, uint16_t Value) {
+      char buf[2];
+      if (isLittleEndian())
+        StringLE16(buf, Value);
+      else
+        StringBE16(buf, Value);
+      F.getContents() += StringRef(buf, 2);
+    }
+
+    void String32(MCDataFragment &F, uint32_t Value) {
+      char buf[4];
+      if (isLittleEndian())
+        StringLE32(buf, Value);
+      else
+        StringBE32(buf, Value);
+      F.getContents() += StringRef(buf, 4);
+    }
+
+    void String64(MCDataFragment &F, uint64_t Value) {
+      char buf[8];
+      if (isLittleEndian())
+        StringLE64(buf, Value);
+      else
+        StringBE64(buf, Value);
+      F.getContents() += StringRef(buf, 8);
+    }
+
+    virtual void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections);
+
+    /// Default e_flags = 0
+    virtual void WriteEFlags() { Write32(0); }
+
+    virtual void WriteSymbolEntry(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
+                          uint64_t name, uint8_t info,
+                          uint64_t value, uint64_t size,
+                          uint8_t other, uint32_t shndx,
+                          bool Reserved);
+
+    virtual void WriteSymbol(MCDataFragment *SymtabF,  MCDataFragment *ShndxF,
+                     ELFSymbolData &MSD,
+                     const MCAsmLayout &Layout);
+
+    typedef DenseMap<const MCSectionELF*, uint32_t> SectionIndexMapTy;
+    virtual void WriteSymbolTable(MCDataFragment *SymtabF, MCDataFragment *ShndxF,
+                          const MCAssembler &Asm,
+                          const MCAsmLayout &Layout,
+                          const SectionIndexMapTy &SectionIndexMap);
+
+    virtual void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                                  const MCFragment *Fragment, const MCFixup &Fixup,
+                                  MCValue Target, uint64_t &FixedValue);
+
+    virtual uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm,
+                                         const MCSymbol *S);
+
+    // Map from a group section to the signature symbol
+    typedef DenseMap<const MCSectionELF*, const MCSymbol*> GroupMapTy;
+    // Map from a signature symbol to the group section
+    typedef DenseMap<const MCSymbol*, const MCSectionELF*> RevGroupMapTy;
+
+    /// ComputeSymbolTable - Compute the symbol table data
+    ///
+    /// \param StringTable [out] - The string table data.
+    /// \param StringIndexMap [out] - Map from symbol names to offsets in the
+    /// string table.
+    virtual void ComputeSymbolTable(MCAssembler &Asm,
+                            const SectionIndexMapTy &SectionIndexMap,
+                            RevGroupMapTy RevGroupMap);
+
+    virtual void ComputeIndexMap(MCAssembler &Asm,
+                         SectionIndexMapTy &SectionIndexMap);
+
+    virtual void WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout,
+                         const MCSectionData &SD);
+
+    virtual void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout) {
+      for (MCAssembler::const_iterator it = Asm.begin(),
+             ie = Asm.end(); it != ie; ++it) {
+        WriteRelocation(Asm, Layout, *it);
+      }
+    }
+
+    virtual void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout,
+                                const SectionIndexMapTy &SectionIndexMap);
+
+    // Create the sections that show up in the symbol table. Currently
+    // those are the .note.GNU-stack section and the group sections.
+    virtual void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout,
+                                       GroupMapTy &GroupMap,
+                                       RevGroupMapTy &RevGroupMap);
+
+    virtual void ExecutePostLayoutBinding(MCAssembler &Asm,
+                                          const MCAsmLayout &Layout);
+
+    virtual void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags,
+                          uint64_t Address, uint64_t Offset,
+                          uint64_t Size, uint32_t Link, uint32_t Info,
+                          uint64_t Alignment, uint64_t EntrySize);
+
+    virtual void WriteRelocationsFragment(const MCAssembler &Asm,
+                                          MCDataFragment *F,
+                                          const MCSectionData *SD);
+
+    virtual bool
+    IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                           const MCSymbolData &DataA,
+                                           const MCFragment &FB,
+                                           bool InSet,
+                                           bool IsPCRel) const;
+
+    virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+    virtual void WriteSection(MCAssembler &Asm,
+                      const SectionIndexMapTy &SectionIndexMap,
+                      uint32_t GroupSymbolIndex,
+                      uint64_t Offset, uint64_t Size, uint64_t Alignment,
+                      const MCSectionELF &Section);
+
+  protected:
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend) = 0;
+  };
+
+  //===- X86ELFObjectWriter -------------------------------------------===//
+
+  class X86ELFObjectWriter : public ELFObjectWriter {
+  public:
+    X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                       raw_ostream &_OS,
+                       bool IsLittleEndian);
+
+    virtual ~X86ELFObjectWriter();
+  protected:
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend);
+  };
+
+
+  //===- ARMELFObjectWriter -------------------------------------------===//
+
+  class ARMELFObjectWriter : public ELFObjectWriter {
+  public:
+    // FIXME: MCAssembler can't yet return the Subtarget,
+    enum { DefaultEABIVersion = 0x05000000U };
+
+    ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                       raw_ostream &_OS,
+                       bool IsLittleEndian);
+
+    virtual ~ARMELFObjectWriter();
+
+    virtual void WriteEFlags();
+  protected:
+    virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+                                           const MCValue &Target,
+                                           const MCFragment &F,
+                                           bool IsBSS) const;
+
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend);
+  };
+
+  //===- MBlazeELFObjectWriter -------------------------------------------===//
+
+  class MBlazeELFObjectWriter : public ELFObjectWriter {
+  public:
+    MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW,
+                          raw_ostream &_OS,
+                          bool IsLittleEndian);
+
+    virtual ~MBlazeELFObjectWriter();
+  protected:
+    virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
+                                  bool IsPCRel, bool IsRelocWithSymbol,
+                                  int64_t Addend);
+  };
+}
+
+#endif
diff --git a/final/lib/MC/MCAsmInfo.cpp b/final/lib/MC/MCAsmInfo.cpp
new file mode 100644
index 00000000000..8199fb2e158
--- /dev/null
+++ b/final/lib/MC/MCAsmInfo.cpp
@@ -0,0 +1,108 @@
+//===-- MCAsmInfo.cpp - Asm Info -------------------------------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/DataTypes.h"
+#include <cctype>
+#include <cstring>
+using namespace llvm;
+
+MCAsmInfo::MCAsmInfo() {
+  HasSubsectionsViaSymbols = false;
+  HasMachoZeroFillDirective = false;
+  HasMachoTBSSDirective = false;
+  HasStaticCtorDtorReferenceInStaticMode = false;
+  LinkerRequiresNonEmptyDwarfLines = false;
+  MaxInstLength = 4;
+  PCSymbol = "$";
+  SeparatorChar = ';';
+  CommentColumn = 40;
+  CommentString = "#";
+  LabelSuffix = ":";
+  GlobalPrefix = "";
+  PrivateGlobalPrefix = ".";
+  LinkerPrivateGlobalPrefix = "";
+  InlineAsmStart = "APP";
+  InlineAsmEnd = "NO_APP";
+  AssemblerDialect = 0;
+  AllowQuotesInName = false;
+  AllowNameToStartWithDigit = false;
+  AllowPeriodsInName = true;
+  ZeroDirective = "\t.zero\t";
+  AsciiDirective = "\t.ascii\t";
+  AscizDirective = "\t.asciz\t";
+  Data8bitsDirective = "\t.byte\t";
+  Data16bitsDirective = "\t.short\t";
+  Data32bitsDirective = "\t.long\t";
+  Data64bitsDirective = "\t.quad\t";
+  SunStyleELFSectionSwitchSyntax = false;
+  UsesELFSectionDirectiveForBSS = false;
+  AlignDirective = "\t.align\t";
+  AlignmentIsInBytes = true;
+  TextAlignFillValue = 0;
+  GPRel32Directive = 0;
+  GlobalDirective = "\t.globl\t";
+  HasSetDirective = true;
+  HasAggressiveSymbolFolding = true;
+  HasLCOMMDirective = false;
+  COMMDirectiveAlignmentIsInBytes = true;
+  HasDotTypeDotSizeDirective = true;
+  HasSingleParameterDotFile = true;
+  HasNoDeadStrip = false;
+  HasSymbolResolver = false;
+  WeakRefDirective = 0;
+  WeakDefDirective = 0;
+  LinkOnceDirective = 0;
+  HiddenVisibilityAttr = MCSA_Hidden;
+  HiddenDeclarationVisibilityAttr = MCSA_Hidden;
+  ProtectedVisibilityAttr = MCSA_Protected;
+  HasLEB128 = false;
+  SupportsDebugInformation = false;
+  ExceptionsType = ExceptionHandling::None;
+  DwarfRequiresFrameSection = true;
+  DwarfUsesInlineInfoSection = false;
+  DwarfUsesAbsoluteLabelForStmtList = true;
+  DwarfSectionOffsetDirective = 0;
+  DwarfUsesLabelOffsetForRanges = true;
+  HasMicrosoftFastStdCallMangling = false;
+
+  AsmTransCBE = 0;
+}
+
+MCAsmInfo::~MCAsmInfo() {
+}
+
+
+unsigned MCAsmInfo::getULEB128Size(unsigned Value) {
+  unsigned Size = 0;
+  do {
+    Value >>= 7;
+    Size += sizeof(int8_t);
+  } while (Value);
+  return Size;
+}
+
+unsigned MCAsmInfo::getSLEB128Size(int Value) {
+  unsigned Size = 0;
+  int Sign = Value >> (8 * sizeof(Value) - 1);
+  bool IsMore;
+
+  do {
+    unsigned Byte = Value & 0x7f;
+    Value >>= 7;
+    IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
+    Size += sizeof(int8_t);
+  } while (IsMore);
+  return Size;
+}
diff --git a/final/lib/MC/MCAsmInfoCOFF.cpp b/final/lib/MC/MCAsmInfoCOFF.cpp
new file mode 100644
index 00000000000..7fc7d7abb23
--- /dev/null
+++ b/final/lib/MC/MCAsmInfoCOFF.cpp
@@ -0,0 +1,37 @@
+//===-- MCAsmInfoCOFF.cpp - COFF asm properties -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on COFF-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfoCOFF.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+MCAsmInfoCOFF::MCAsmInfoCOFF() {
+  GlobalPrefix = "_";
+  COMMDirectiveAlignmentIsInBytes = false;
+  HasLCOMMDirective = true;
+  HasDotTypeDotSizeDirective = false;
+  HasSingleParameterDotFile = false;
+  PrivateGlobalPrefix = "L";  // Prefix for private global symbols
+  WeakRefDirective = "\t.weak\t";
+  LinkOnceDirective = "\t.linkonce discard\n";
+  
+  // Doesn't support visibility:
+  HiddenVisibilityAttr = ProtectedVisibilityAttr = MCSA_Invalid;
+
+  // Set up DWARF directives
+  HasLEB128 = true;  // Target asm supports leb128 directives (little-endian)
+  SupportsDebugInformation = true;
+  DwarfSectionOffsetDirective = "\t.secrel32\t";
+  HasMicrosoftFastStdCallMangling = true;
+}
diff --git a/final/lib/MC/MCAsmInfoDarwin.cpp b/final/lib/MC/MCAsmInfoDarwin.cpp
new file mode 100644
index 00000000000..526ad0da42a
--- /dev/null
+++ b/final/lib/MC/MCAsmInfoDarwin.cpp
@@ -0,0 +1,59 @@
+//===-- MCAsmInfoDarwin.cpp - Darwin asm properties -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target asm properties related what form asm statements
+// should take in general on Darwin-based targets
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+using namespace llvm;
+
+MCAsmInfoDarwin::MCAsmInfoDarwin() {
+  // Common settings for all Darwin targets.
+  // Syntax:
+  GlobalPrefix = "_";
+  PrivateGlobalPrefix = "L";
+  LinkerPrivateGlobalPrefix = "l";
+  AllowQuotesInName = true;
+  HasSingleParameterDotFile = false;
+  HasSubsectionsViaSymbols = true;
+
+  AlignmentIsInBytes = false;
+  COMMDirectiveAlignmentIsInBytes = false;
+  InlineAsmStart = " InlineAsm Start";
+  InlineAsmEnd = " InlineAsm End";
+
+  // Directives:
+  WeakDefDirective = "\t.weak_definition ";
+  WeakRefDirective = "\t.weak_reference ";
+  ZeroDirective = "\t.space\t";  // ".space N" emits N zeros.
+  HasMachoZeroFillDirective = true;  // Uses .zerofill
+  HasMachoTBSSDirective = true; // Uses .tbss
+  HasStaticCtorDtorReferenceInStaticMode = true;
+
+  // FIXME: Darwin 10 and newer don't need this.
+  LinkerRequiresNonEmptyDwarfLines = true;
+
+  // FIXME: Change this once MC is the system assembler.
+  HasAggressiveSymbolFolding = false;
+
+  HiddenVisibilityAttr = MCSA_PrivateExtern;
+  HiddenDeclarationVisibilityAttr = MCSA_Invalid;
+  // Doesn't support protected visibility.
+  ProtectedVisibilityAttr = MCSA_Global;
+  
+  HasDotTypeDotSizeDirective = false;
+  HasNoDeadStrip = true;
+  HasSymbolResolver = true;
+
+  DwarfUsesAbsoluteLabelForStmtList = false;
+  DwarfUsesLabelOffsetForRanges = false;
+}
+
diff --git a/final/lib/MC/MCAsmStreamer.cpp b/final/lib/MC/MCAsmStreamer.cpp
new file mode 100644
index 00000000000..c7ecf53c7f3
--- /dev/null
+++ b/final/lib/MC/MCAsmStreamer.cpp
@@ -0,0 +1,974 @@
+//===- lib/MC/MCAsmStreamer.cpp - Text Assembly Output --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+class MCAsmStreamer : public MCStreamer {
+  formatted_raw_ostream &OS;
+  const MCAsmInfo &MAI;
+  OwningPtr<MCInstPrinter> InstPrinter;
+  OwningPtr<MCCodeEmitter> Emitter;
+  OwningPtr<TargetAsmBackend> AsmBackend;
+
+  SmallString<128> CommentToEmit;
+  raw_svector_ostream CommentStream;
+
+  unsigned IsVerboseAsm : 1;
+  unsigned ShowInst : 1;
+  unsigned UseLoc : 1;
+
+  bool needsSet(const MCExpr *Value);
+
+public:
+  MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os,
+                bool isVerboseAsm,
+                bool useLoc,
+                MCInstPrinter *printer, MCCodeEmitter *emitter,
+                TargetAsmBackend *asmbackend,
+                bool showInst)
+    : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
+      InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend),
+      CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm),
+      ShowInst(showInst), UseLoc(useLoc) {
+    if (InstPrinter && IsVerboseAsm)
+      InstPrinter->setCommentStream(CommentStream);
+  }
+  ~MCAsmStreamer() {}
+
+  inline void EmitEOL() {
+    // If we don't have any comments, just emit a \n.
+    if (!IsVerboseAsm) {
+      OS << '\n';
+      return;
+    }
+    EmitCommentsAndEOL();
+  }
+  void EmitCommentsAndEOL();
+
+  /// isVerboseAsm - Return true if this streamer supports verbose assembly at
+  /// all.
+  virtual bool isVerboseAsm() const { return IsVerboseAsm; }
+
+  /// hasRawTextSupport - We support EmitRawText.
+  virtual bool hasRawTextSupport() const { return true; }
+
+  /// AddComment - Add a comment that can be emitted to the generated .s
+  /// file if applicable as a QoI issue to make the output of the compiler
+  /// more readable.  This only affects the MCAsmStreamer, and only when
+  /// verbose assembly output is enabled.
+  virtual void AddComment(const Twine &T);
+
+  /// AddEncodingComment - Add a comment showing the encoding of an instruction.
+  virtual void AddEncodingComment(const MCInst &Inst);
+
+  /// GetCommentOS - Return a raw_ostream that comments can be written to.
+  /// Unlike AddComment, you are required to terminate comments with \n if you
+  /// use this method.
+  virtual raw_ostream &GetCommentOS() {
+    if (!IsVerboseAsm)
+      return nulls();  // Discard comments unless in verbose asm mode.
+    return CommentStream;
+  }
+
+  /// AddBlankLine - Emit a blank line to a .s file to pretty it up.
+  virtual void AddBlankLine() {
+    EmitEOL();
+  }
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void ChangeSection(const MCSection *Section);
+
+  virtual void InitSections() {
+    // FIXME, this is MachO specific, but the testsuite
+    // expects this.
+    SwitchSection(getContext().getMachOSection("__TEXT", "__text",
+                         MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                         0, SectionKind::getText()));
+  }
+
+  virtual void EmitLabel(MCSymbol *Symbol);
+
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
+
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+  virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                        const MCSymbol *LastLabel,
+                                        const MCSymbol *Label);
+
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+  virtual void EmitCOFFSymbolType(int Type);
+  virtual void EndCOFFSymbolDef();
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+
+  /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
+  ///
+  /// @param Symbol - The common symbol to emit.
+  /// @param Size - The size of the common symbol.
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0);
+
+  virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
+                               uint64_t Size, unsigned ByteAlignment = 0);
+
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             bool isPCRel, unsigned AddrSpace);
+  virtual void EmitIntValue(uint64_t Value, unsigned Size,
+                            unsigned AddrSpace = 0);
+
+  virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+
+  virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+
+  virtual void EmitGPRel32Value(const MCExpr *Value);
+
+
+  virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+                        unsigned AddrSpace);
+
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitValueToOffset(const MCExpr *Offset,
+                                 unsigned char Value = 0);
+
+  virtual void EmitFileDirective(StringRef Filename);
+  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+  virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                     unsigned Column, unsigned Flags,
+                                     unsigned Isa, unsigned Discriminator);
+
+  virtual bool EmitCFIStartProc();
+  virtual bool EmitCFIEndProc();
+  virtual bool EmitCFIDefCfaOffset(int64_t Offset);
+  virtual bool EmitCFIDefCfaRegister(int64_t Register);
+  virtual bool EmitCFIOffset(int64_t Register, int64_t Offset);
+  virtual bool EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding);
+  virtual bool EmitCFILsda(const MCSymbol *Sym, unsigned Encoding);
+
+  virtual void EmitFnStart();
+  virtual void EmitFnEnd();
+  virtual void EmitCantUnwind();
+  virtual void EmitPersonality(const MCSymbol *Personality);
+  virtual void EmitHandlerData();
+  virtual void EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0);
+  virtual void EmitPad(int64_t Offset);
+  virtual void EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool);
+
+
+  virtual void EmitInstruction(const MCInst &Inst);
+
+  /// EmitRawText - If this file is backed by an assembly streamer, this dumps
+  /// the specified string in the output .s file.  This capability is
+  /// indicated by the hasRawTextSupport() predicate.
+  virtual void EmitRawText(StringRef String);
+
+  virtual void Finish();
+
+  /// @}
+};
+
+} // end anonymous namespace.
+
+/// AddComment - Add a comment that can be emitted to the generated .s
+/// file if applicable as a QoI issue to make the output of the compiler
+/// more readable.  This only affects the MCAsmStreamer, and only when
+/// verbose assembly output is enabled.
+void MCAsmStreamer::AddComment(const Twine &T) {
+  if (!IsVerboseAsm) return;
+
+  // Make sure that CommentStream is flushed.
+  CommentStream.flush();
+
+  T.toVector(CommentToEmit);
+  // Each comment goes on its own line.
+  CommentToEmit.push_back('\n');
+
+  // Tell the comment stream that the vector changed underneath it.
+  CommentStream.resync();
+}
+
+void MCAsmStreamer::EmitCommentsAndEOL() {
+  if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) {
+    OS << '\n';
+    return;
+  }
+
+  CommentStream.flush();
+  StringRef Comments = CommentToEmit.str();
+
+  assert(Comments.back() == '\n' &&
+         "Comment array not newline terminated");
+  do {
+    // Emit a line of comments.
+    OS.PadToColumn(MAI.getCommentColumn());
+    size_t Position = Comments.find('\n');
+    OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n';
+
+    Comments = Comments.substr(Position+1);
+  } while (!Comments.empty());
+
+  CommentToEmit.clear();
+  // Tell the comment stream that the vector changed underneath it.
+  CommentStream.resync();
+}
+
+static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
+  assert(Bytes && "Invalid size!");
+  return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
+}
+
+void MCAsmStreamer::ChangeSection(const MCSection *Section) {
+  assert(Section && "Cannot switch to a null section!");
+  Section->PrintSwitchToSection(MAI, OS);
+}
+
+void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+  assert(getCurrentSection() && "Cannot emit before setting section!");
+
+  OS << *Symbol << MAI.getLabelSuffix();
+  EmitEOL();
+  Symbol->setSection(*getCurrentSection());
+}
+
+void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+  switch (Flag) {
+  default: assert(0 && "Invalid flag!");
+  case MCAF_SyntaxUnified:         OS << "\t.syntax unified"; break;
+  case MCAF_SubsectionsViaSymbols: OS << ".subsections_via_symbols"; break;
+  case MCAF_Code16:                OS << "\t.code\t16"; break;
+  case MCAF_Code32:                OS << "\t.code\t32"; break;
+  }
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {
+  // This needs to emit to a temporary string to get properly quoted
+  // MCSymbols when they have spaces in them.
+  OS << "\t.thumb_func";
+  if (Func)
+    OS << '\t' << *Func;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  OS << *Symbol << " = " << *Value;
+  EmitEOL();
+
+  // FIXME: Lift context changes into super class.
+  Symbol->setVariableValue(Value);
+}
+
+void MCAsmStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+  OS << ".weakref " << *Alias << ", " << *Symbol;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                             const MCSymbol *LastLabel,
+                                             const MCSymbol *Label) {
+  EmitDwarfSetLineAddr(LineDelta, Label,
+                       getContext().getTargetAsmInfo().getPointerSize());
+}
+
+void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+                                        MCSymbolAttr Attribute) {
+  switch (Attribute) {
+  case MCSA_Invalid: assert(0 && "Invalid symbol attribute");
+  case MCSA_ELF_TypeFunction:    /// .type _foo, STT_FUNC  # aka @function
+  case MCSA_ELF_TypeIndFunction: /// .type _foo, STT_GNU_IFUNC
+  case MCSA_ELF_TypeObject:      /// .type _foo, STT_OBJECT  # aka @object
+  case MCSA_ELF_TypeTLS:         /// .type _foo, STT_TLS     # aka @tls_object
+  case MCSA_ELF_TypeCommon:      /// .type _foo, STT_COMMON  # aka @common
+  case MCSA_ELF_TypeNoType:      /// .type _foo, STT_NOTYPE  # aka @notype
+  case MCSA_ELF_TypeGnuUniqueObject:  /// .type _foo, @gnu_unique_object
+    assert(MAI.hasDotTypeDotSizeDirective() && "Symbol Attr not supported");
+    OS << "\t.type\t" << *Symbol << ','
+       << ((MAI.getCommentString()[0] != '@') ? '@' : '%');
+    switch (Attribute) {
+    default: assert(0 && "Unknown ELF .type");
+    case MCSA_ELF_TypeFunction:    OS << "function"; break;
+    case MCSA_ELF_TypeIndFunction: OS << "gnu_indirect_function"; break;
+    case MCSA_ELF_TypeObject:      OS << "object"; break;
+    case MCSA_ELF_TypeTLS:         OS << "tls_object"; break;
+    case MCSA_ELF_TypeCommon:      OS << "common"; break;
+    case MCSA_ELF_TypeNoType:      OS << "no_type"; break;
+    case MCSA_ELF_TypeGnuUniqueObject: OS << "gnu_unique_object"; break;
+    }
+    EmitEOL();
+    return;
+  case MCSA_Global: // .globl/.global
+    OS << MAI.getGlobalDirective();
+    break;
+  case MCSA_Hidden:         OS << "\t.hidden\t";          break;
+  case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break;
+  case MCSA_Internal:       OS << "\t.internal\t";        break;
+  case MCSA_LazyReference:  OS << "\t.lazy_reference\t";  break;
+  case MCSA_Local:          OS << "\t.local\t";           break;
+  case MCSA_NoDeadStrip:    OS << "\t.no_dead_strip\t";   break;
+  case MCSA_SymbolResolver: OS << "\t.symbol_resolver\t"; break;
+  case MCSA_PrivateExtern:  OS << "\t.private_extern\t";  break;
+  case MCSA_Protected:      OS << "\t.protected\t";       break;
+  case MCSA_Reference:      OS << "\t.reference\t";       break;
+  case MCSA_Weak:           OS << "\t.weak\t";            break;
+  case MCSA_WeakDefinition: OS << "\t.weak_definition\t"; break;
+      // .weak_reference
+  case MCSA_WeakReference:  OS << MAI.getWeakRefDirective(); break;
+  case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break;
+  }
+
+  OS << *Symbol;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+  OS << ".desc" << ' ' << *Symbol << ',' << DescValue;
+  EmitEOL();
+}
+
+void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+  OS << "\t.def\t " << *Symbol << ';';
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {
+  OS << "\t.scl\t" << StorageClass << ';';
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCOFFSymbolType (int Type) {
+  OS << "\t.type\t" << Type << ';';
+  EmitEOL();
+}
+
+void MCAsmStreamer::EndCOFFSymbolDef() {
+  OS << "\t.endef";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+  assert(MAI.hasDotTypeDotSizeDirective());
+  OS << "\t.size\t" << *Symbol << ", " << *Value << '\n';
+}
+
+void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                     unsigned ByteAlignment) {
+  OS << "\t.comm\t" << *Symbol << ',' << Size;
+  if (ByteAlignment != 0) {
+    if (MAI.getCOMMDirectiveAlignmentIsInBytes())
+      OS << ',' << ByteAlignment;
+    else
+      OS << ',' << Log2_32(ByteAlignment);
+  }
+  EmitEOL();
+}
+
+/// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
+///
+/// @param Symbol - The common symbol to emit.
+/// @param Size - The size of the common symbol.
+void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+  assert(MAI.hasLCOMMDirective() && "Doesn't have .lcomm, can't emit it!");
+  OS << "\t.lcomm\t" << *Symbol << ',' << Size;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                                 unsigned Size, unsigned ByteAlignment) {
+  // Note: a .zerofill directive does not switch sections.
+  OS << ".zerofill ";
+
+  // This is a mach-o specific directive.
+  const MCSectionMachO *MOSection = ((const MCSectionMachO*)Section);
+  OS << MOSection->getSegmentName() << "," << MOSection->getSectionName();
+
+  if (Symbol != NULL) {
+    OS << ',' << *Symbol << ',' << Size;
+    if (ByteAlignment != 0)
+      OS << ',' << Log2_32(ByteAlignment);
+  }
+  EmitEOL();
+}
+
+// .tbss sym, size, align
+// This depends that the symbol has already been mangled from the original,
+// e.g. _a.
+void MCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                   uint64_t Size, unsigned ByteAlignment) {
+  assert(Symbol != NULL && "Symbol shouldn't be NULL!");
+  // Instead of using the Section we'll just use the shortcut.
+  // This is a mach-o specific directive and section.
+  OS << ".tbss " << *Symbol << ", " << Size;
+
+  // Output align if we have it.  We default to 1 so don't bother printing
+  // that.
+  if (ByteAlignment > 1) OS << ", " << Log2_32(ByteAlignment);
+
+  EmitEOL();
+}
+
+static inline char toOctal(int X) { return (X&7)+'0'; }
+
+static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
+  OS << '"';
+
+  for (unsigned i = 0, e = Data.size(); i != e; ++i) {
+    unsigned char C = Data[i];
+    if (C == '"' || C == '\\') {
+      OS << '\\' << (char)C;
+      continue;
+    }
+
+    if (isprint((unsigned char)C)) {
+      OS << (char)C;
+      continue;
+    }
+
+    switch (C) {
+      case '\b': OS << "\\b"; break;
+      case '\f': OS << "\\f"; break;
+      case '\n': OS << "\\n"; break;
+      case '\r': OS << "\\r"; break;
+      case '\t': OS << "\\t"; break;
+      default:
+        OS << '\\';
+        OS << toOctal(C >> 6);
+        OS << toOctal(C >> 3);
+        OS << toOctal(C >> 0);
+        break;
+    }
+  }
+
+  OS << '"';
+}
+
+
+void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  if (Data.empty()) return;
+
+  if (Data.size() == 1) {
+    OS << MAI.getData8bitsDirective(AddrSpace);
+    OS << (unsigned)(unsigned char)Data[0];
+    EmitEOL();
+    return;
+  }
+
+  // If the data ends with 0 and the target supports .asciz, use it, otherwise
+  // use .ascii
+  if (MAI.getAscizDirective() && Data.back() == 0) {
+    OS << MAI.getAscizDirective();
+    Data = Data.substr(0, Data.size()-1);
+  } else {
+    OS << MAI.getAsciiDirective();
+  }
+
+  OS << ' ';
+  PrintQuotedString(Data, OS);
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size,
+                                 unsigned AddrSpace) {
+  EmitValue(MCConstantExpr::Create(Value, getContext()), Size, AddrSpace);
+}
+
+void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+                                  bool isPCRel, unsigned AddrSpace) {
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  assert(!isPCRel && "Cannot emit pc relative relocations!");
+  const char *Directive = 0;
+  switch (Size) {
+  default: break;
+  case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break;
+  case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break;
+  case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break;
+  case 8:
+    Directive = MAI.getData64bitsDirective(AddrSpace);
+    // If the target doesn't support 64-bit data, emit as two 32-bit halves.
+    if (Directive) break;
+    int64_t IntValue;
+    if (!Value->EvaluateAsAbsolute(IntValue))
+      report_fatal_error("Don't know how to emit this value.");
+    if (getContext().getTargetAsmInfo().isLittleEndian()) {
+      EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+      EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+    } else {
+      EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+      EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+    }
+    return;
+  }
+
+  assert(Directive && "Invalid size for machine code value!");
+  OS << Directive << *Value;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace) {
+  int64_t IntValue;
+  if (Value->EvaluateAsAbsolute(IntValue)) {
+    EmitULEB128IntValue(IntValue, AddrSpace);
+    return;
+  }
+  assert(MAI.hasLEB128() && "Cannot print a .uleb");
+  OS << ".uleb128 " << *Value;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace) {
+  int64_t IntValue;
+  if (Value->EvaluateAsAbsolute(IntValue)) {
+    EmitSLEB128IntValue(IntValue, AddrSpace);
+    return;
+  }
+  assert(MAI.hasLEB128() && "Cannot print a .sleb");
+  OS << ".sleb128 " << *Value;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
+  assert(MAI.getGPRel32Directive() != 0);
+  OS << MAI.getGPRel32Directive() << *Value;
+  EmitEOL();
+}
+
+
+/// EmitFill - Emit NumBytes bytes worth of the value specified by
+/// FillValue.  This implements directives such as '.space'.
+void MCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+                             unsigned AddrSpace) {
+  if (NumBytes == 0) return;
+
+  if (AddrSpace == 0)
+    if (const char *ZeroDirective = MAI.getZeroDirective()) {
+      OS << ZeroDirective << NumBytes;
+      if (FillValue != 0)
+        OS << ',' << (int)FillValue;
+      EmitEOL();
+      return;
+    }
+
+  // Emit a byte at a time.
+  MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace);
+}
+
+void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+                                         unsigned ValueSize,
+                                         unsigned MaxBytesToEmit) {
+  // Some assemblers don't support non-power of two alignments, so we always
+  // emit alignments as a power of two if possible.
+  if (isPowerOf2_32(ByteAlignment)) {
+    switch (ValueSize) {
+    default: llvm_unreachable("Invalid size for machine code value!");
+    case 1: OS << MAI.getAlignDirective(); break;
+    // FIXME: use MAI for this!
+    case 2: OS << ".p2alignw "; break;
+    case 4: OS << ".p2alignl "; break;
+    case 8: llvm_unreachable("Unsupported alignment size!");
+    }
+
+    if (MAI.getAlignmentIsInBytes())
+      OS << ByteAlignment;
+    else
+      OS << Log2_32(ByteAlignment);
+
+    if (Value || MaxBytesToEmit) {
+      OS << ", 0x";
+      OS.write_hex(truncateToSize(Value, ValueSize));
+
+      if (MaxBytesToEmit)
+        OS << ", " << MaxBytesToEmit;
+    }
+    EmitEOL();
+    return;
+  }
+
+  // Non-power of two alignment.  This is not widely supported by assemblers.
+  // FIXME: Parameterize this based on MAI.
+  switch (ValueSize) {
+  default: llvm_unreachable("Invalid size for machine code value!");
+  case 1: OS << ".balign";  break;
+  case 2: OS << ".balignw"; break;
+  case 4: OS << ".balignl"; break;
+  case 8: llvm_unreachable("Unsupported alignment size!");
+  }
+
+  OS << ' ' << ByteAlignment;
+  OS << ", " << truncateToSize(Value, ValueSize);
+  if (MaxBytesToEmit)
+    OS << ", " << MaxBytesToEmit;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+                                      unsigned MaxBytesToEmit) {
+  // Emit with a text fill value.
+  EmitValueToAlignment(ByteAlignment, MAI.getTextAlignFillValue(),
+                       1, MaxBytesToEmit);
+}
+
+void MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+                                      unsigned char Value) {
+  // FIXME: Verify that Offset is associated with the current section.
+  OS << ".org " << *Offset << ", " << (unsigned) Value;
+  EmitEOL();
+}
+
+
+void MCAsmStreamer::EmitFileDirective(StringRef Filename) {
+  assert(MAI.hasSingleParameterDotFile());
+  OS << "\t.file\t";
+  PrintQuotedString(Filename, OS);
+  EmitEOL();
+}
+
+bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Filename){
+  if (UseLoc) {
+    OS << "\t.file\t" << FileNo << ' ';
+    PrintQuotedString(Filename, OS);
+    EmitEOL();
+  }
+  return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename);
+}
+
+void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                          unsigned Column, unsigned Flags,
+                                          unsigned Isa,
+                                          unsigned Discriminator) {
+  this->MCStreamer::EmitDwarfLocDirective(FileNo, Line, Column, Flags,
+                                          Isa, Discriminator);
+  if (!UseLoc)
+    return;
+
+  OS << "\t.loc\t" << FileNo << " " << Line << " " << Column;
+  if (Flags & DWARF2_FLAG_BASIC_BLOCK)
+    OS << " basic_block";
+  if (Flags & DWARF2_FLAG_PROLOGUE_END)
+    OS << " prologue_end";
+  if (Flags & DWARF2_FLAG_EPILOGUE_BEGIN)
+    OS << " epilogue_begin";
+
+  unsigned OldFlags = getContext().getCurrentDwarfLoc().getFlags();
+  if ((Flags & DWARF2_FLAG_IS_STMT) != (OldFlags & DWARF2_FLAG_IS_STMT)) {
+    OS << " is_stmt ";
+
+    if (Flags & DWARF2_FLAG_IS_STMT)
+      OS << "1";
+    else
+      OS << "0";
+  }
+
+  if (Isa)
+    OS << "isa " << Isa;
+  if (Discriminator)
+    OS << "discriminator " << Discriminator;
+  EmitEOL();
+}
+
+bool MCAsmStreamer::EmitCFIStartProc() {
+  if (this->MCStreamer::EmitCFIStartProc())
+    return true;
+
+  OS << "\t.cfi_startproc";
+  EmitEOL();
+
+  return false;
+}
+
+bool MCAsmStreamer::EmitCFIEndProc() {
+  if (this->MCStreamer::EmitCFIEndProc())
+    return true;
+
+  OS << "\t.cfi_endproc";
+  EmitEOL();
+
+  return false;
+}
+
+bool MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
+  if (this->MCStreamer::EmitCFIDefCfaOffset(Offset))
+    return true;
+
+  OS << "\t.cfi_def_cfa_offset " << Offset;
+  EmitEOL();
+
+  return false;
+}
+
+bool MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) {
+  if (this->MCStreamer::EmitCFIDefCfaRegister(Register))
+    return true;
+
+  OS << "\t.cfi_def_cfa_register " << Register;
+  EmitEOL();
+
+  return false;
+}
+
+bool MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
+  if (this->MCStreamer::EmitCFIOffset(Register, Offset))
+    return true;
+
+  OS << "\t.cfi_offset " << Register << ", " << Offset;
+  EmitEOL();
+
+  return false;
+}
+
+bool MCAsmStreamer::EmitCFIPersonality(const MCSymbol *Sym,
+                                       unsigned Encoding) {
+  if (this->MCStreamer::EmitCFIPersonality(Sym, Encoding))
+    return true;
+
+  OS << "\t.cfi_personality " << Encoding << ", " << *Sym;
+  EmitEOL();
+
+  return false;
+}
+
+bool MCAsmStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
+  if (this->MCStreamer::EmitCFILsda(Sym, Encoding))
+    return true;
+
+  OS << "\t.cfi_lsda " << Encoding << ", " << *Sym;
+  EmitEOL();
+
+  return false;
+}
+
+void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
+  raw_ostream &OS = GetCommentOS();
+  SmallString<256> Code;
+  SmallVector<MCFixup, 4> Fixups;
+  raw_svector_ostream VecOS(Code);
+  Emitter->EncodeInstruction(Inst, VecOS, Fixups);
+  VecOS.flush();
+
+  // If we are showing fixups, create symbolic markers in the encoded
+  // representation. We do this by making a per-bit map to the fixup item index,
+  // then trying to display it as nicely as possible.
+  SmallVector<uint8_t, 64> FixupMap;
+  FixupMap.resize(Code.size() * 8);
+  for (unsigned i = 0, e = Code.size() * 8; i != e; ++i)
+    FixupMap[i] = 0;
+
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+    MCFixup &F = Fixups[i];
+    const MCFixupKindInfo &Info = AsmBackend->getFixupKindInfo(F.getKind());
+    for (unsigned j = 0; j != Info.TargetSize; ++j) {
+      unsigned Index = F.getOffset() * 8 + Info.TargetOffset + j;
+      assert(Index < Code.size() * 8 && "Invalid offset in fixup!");
+      FixupMap[Index] = 1 + i;
+    }
+  }
+
+  // FIXME: Node the fixup comments for Thumb2 are completely bogus since the
+  // high order halfword of a 32-bit Thumb2 instruction is emitted first.
+  OS << "encoding: [";
+  for (unsigned i = 0, e = Code.size(); i != e; ++i) {
+    if (i)
+      OS << ',';
+
+    // See if all bits are the same map entry.
+    uint8_t MapEntry = FixupMap[i * 8 + 0];
+    for (unsigned j = 1; j != 8; ++j) {
+      if (FixupMap[i * 8 + j] == MapEntry)
+        continue;
+
+      MapEntry = uint8_t(~0U);
+      break;
+    }
+
+    if (MapEntry != uint8_t(~0U)) {
+      if (MapEntry == 0) {
+        OS << format("0x%02x", uint8_t(Code[i]));
+      } else {
+        if (Code[i]) {
+          // FIXME: Some of the 8 bits require fix up.
+          OS << format("0x%02x", uint8_t(Code[i])) << '\''
+             << char('A' + MapEntry - 1) << '\'';
+        } else
+          OS << char('A' + MapEntry - 1);
+      }
+    } else {
+      // Otherwise, write out in binary.
+      OS << "0b";
+      for (unsigned j = 8; j--;) {
+        unsigned Bit = (Code[i] >> j) & 1;
+        
+        unsigned FixupBit;
+        if (getContext().getTargetAsmInfo().isLittleEndian())
+          FixupBit = i * 8 + j;
+        else
+          FixupBit = i * 8 + (7-j);
+        
+        if (uint8_t MapEntry = FixupMap[FixupBit]) {
+          assert(Bit == 0 && "Encoder wrote into fixed up bit!");
+          OS << char('A' + MapEntry - 1);
+        } else
+          OS << Bit;
+      }
+    }
+  }
+  OS << "]\n";
+
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+    MCFixup &F = Fixups[i];
+    const MCFixupKindInfo &Info = AsmBackend->getFixupKindInfo(F.getKind());
+    OS << "  fixup " << char('A' + i) << " - " << "offset: " << F.getOffset()
+       << ", value: " << *F.getValue() << ", kind: " << Info.Name << "\n";
+  }
+}
+
+void MCAsmStreamer::EmitFnStart() {
+  OS << "\t.fnstart";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitFnEnd() {
+  OS << "\t.fnend";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCantUnwind() {
+  OS << "\t.cantunwind";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitHandlerData() {
+  OS << "\t.handlerdata";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitPersonality(const MCSymbol *Personality) {
+  OS << "\t.personality " << Personality->getName();
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) {
+  OS << "\t.setfp\t" << InstPrinter->getRegName(FpReg)
+     << ", "        << InstPrinter->getRegName(SpReg);
+  if (Offset)
+    OS << ", #" << Offset;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitPad(int64_t Offset) {
+  OS << "\t.pad\t#" << Offset;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList,
+                                bool isVector) {
+  assert(RegList.size() && "RegList should not be empty");
+  if (isVector)
+    OS << "\t.vsave\t{";
+  else
+    OS << "\t.save\t{";
+
+  OS << InstPrinter->getRegName(RegList[0]);
+
+  for (unsigned i = 1, e = RegList.size(); i != e; ++i)
+    OS << ", " << InstPrinter->getRegName(RegList[i]);
+
+  OS << "}";
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+
+  if (!UseLoc)
+    MCLineEntry::Make(this, getCurrentSection());
+
+  // Show the encoding in a comment if we have a code emitter.
+  if (Emitter)
+    AddEncodingComment(Inst);
+
+  // Show the MCInst if enabled.
+  if (ShowInst) {
+    Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
+    GetCommentOS() << "\n";
+  }
+
+  // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
+  if (InstPrinter)
+    InstPrinter->printInst(&Inst, OS);
+  else
+    Inst.print(OS, &MAI);
+  EmitEOL();
+}
+
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
+/// the specified string in the output .s file.  This capability is
+/// indicated by the hasRawTextSupport() predicate.
+void MCAsmStreamer::EmitRawText(StringRef String) {
+  if (!String.empty() && String.back() == '\n')
+    String = String.substr(0, String.size()-1);
+  OS << String;
+  EmitEOL();
+}
+
+void MCAsmStreamer::Finish() {
+  // Dump out the dwarf file & directory tables and line tables.
+  if (getContext().hasDwarfFiles() && !UseLoc)
+    MCDwarfFileTable::Emit(this);
+}
+
+MCStreamer *llvm::createAsmStreamer(MCContext &Context,
+                                    formatted_raw_ostream &OS,
+                                    bool isVerboseAsm, bool useLoc,
+                                    MCInstPrinter *IP, MCCodeEmitter *CE,
+                                    TargetAsmBackend *TAB, bool ShowInst) {
+  return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
+                           IP, CE, TAB, ShowInst);
+}
diff --git a/final/lib/MC/MCAssembler.cpp b/final/lib/MC/MCAssembler.cpp
new file mode 100644
index 00000000000..99926460422
--- /dev/null
+++ b/final/lib/MC/MCAssembler.cpp
@@ -0,0 +1,949 @@
+//===- lib/MC/MCAssembler.cpp - Assembler Backend Implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "assembler"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+#include <vector>
+using namespace llvm;
+
+namespace {
+namespace stats {
+STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
+STATISTIC(EvaluateFixup, "Number of evaluated fixups");
+STATISTIC(FragmentLayouts, "Number of fragment layouts");
+STATISTIC(ObjectBytes, "Number of emitted object file bytes");
+STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps");
+STATISTIC(RelaxedInstructions, "Number of relaxed instructions");
+}
+}
+
+// FIXME FIXME FIXME: There are number of places in this file where we convert
+// what is a 64-bit assembler value used for computation into a value in the
+// object file, which may truncate it. We should detect that truncation where
+// invalid and report errors back.
+
+/* *** */
+
+MCAsmLayout::MCAsmLayout(MCAssembler &Asm)
+  : Assembler(Asm), LastValidFragment()
+ {
+  // Compute the section layout order. Virtual sections must go last.
+  for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
+    if (!it->getSection().isVirtualSection())
+      SectionOrder.push_back(&*it);
+  for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
+    if (it->getSection().isVirtualSection())
+      SectionOrder.push_back(&*it);
+}
+
+bool MCAsmLayout::isFragmentUpToDate(const MCFragment *F) const {
+  const MCSectionData &SD = *F->getParent();
+  const MCFragment *LastValid = LastValidFragment.lookup(&SD);
+  if (!LastValid)
+    return false;
+  assert(LastValid->getParent() == F->getParent());
+  return F->getLayoutOrder() <= LastValid->getLayoutOrder();
+}
+
+void MCAsmLayout::Invalidate(MCFragment *F) {
+  // If this fragment wasn't already up-to-date, we don't need to do anything.
+  if (!isFragmentUpToDate(F))
+    return;
+
+  // Otherwise, reset the last valid fragment to this fragment.
+  const MCSectionData &SD = *F->getParent();
+  LastValidFragment[&SD] = F;
+}
+
+void MCAsmLayout::EnsureValid(const MCFragment *F) const {
+  MCSectionData &SD = *F->getParent();
+
+  MCFragment *Cur = LastValidFragment[&SD];
+  if (!Cur)
+    Cur = &*SD.begin();
+  else
+    Cur = Cur->getNextNode();
+
+  // Advance the layout position until the fragment is up-to-date.
+  while (!isFragmentUpToDate(F)) {
+    const_cast<MCAsmLayout*>(this)->LayoutFragment(Cur);
+    Cur = Cur->getNextNode();
+  }
+}
+
+uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
+  EnsureValid(F);
+  assert(F->Offset != ~UINT64_C(0) && "Address not set!");
+  return F->Offset;
+}
+
+uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const {
+  assert(SD->getFragment() && "Invalid getOffset() on undefined symbol!");
+  return getFragmentOffset(SD->getFragment()) + SD->getOffset();
+}
+
+uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const {
+  // The size is the last fragment's end offset.
+  const MCFragment &F = SD->getFragmentList().back();
+  return getFragmentOffset(&F) + getAssembler().ComputeFragmentSize(*this, F);
+}
+
+uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
+  // Virtual sections have no file size.
+  if (SD->getSection().isVirtualSection())
+    return 0;
+
+  // Otherwise, the file size is the same as the address space size.
+  return getSectionAddressSize(SD);
+}
+
+/* *** */
+
+MCFragment::MCFragment() : Kind(FragmentType(~0)) {
+}
+
+MCFragment::~MCFragment() {
+}
+
+MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
+  : Kind(_Kind), Parent(_Parent), Atom(0), Offset(~UINT64_C(0))
+{
+  if (Parent)
+    Parent->getFragmentList().push_back(this);
+}
+
+/* *** */
+
+MCSectionData::MCSectionData() : Section(0) {}
+
+MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
+  : Section(&_Section),
+    Ordinal(~UINT32_C(0)),
+    Alignment(1),
+    HasInstructions(false)
+{
+  if (A)
+    A->getSectionList().push_back(this);
+}
+
+/* *** */
+
+MCSymbolData::MCSymbolData() : Symbol(0) {}
+
+MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
+                           uint64_t _Offset, MCAssembler *A)
+  : Symbol(&_Symbol), Fragment(_Fragment), Offset(_Offset),
+    IsExternal(false), IsPrivateExtern(false),
+    CommonSize(0), SymbolSize(0), CommonAlign(0),
+    Flags(0), Index(0)
+{
+  if (A)
+    A->getSymbolList().push_back(this);
+}
+
+/* *** */
+
+MCAssembler::MCAssembler(MCContext &Context_, TargetAsmBackend &Backend_,
+                         MCCodeEmitter &Emitter_, MCObjectWriter &Writer_,
+                         raw_ostream &OS_)
+  : Context(Context_), Backend(Backend_), Emitter(Emitter_), Writer(Writer_),
+    OS(OS_), RelaxAll(false), NoExecStack(false), SubsectionsViaSymbols(false)
+{
+}
+
+MCAssembler::~MCAssembler() {
+}
+
+bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
+  // Non-temporary labels should always be visible to the linker.
+  if (!Symbol.isTemporary())
+    return true;
+
+  // Absolute temporary labels are never visible.
+  if (!Symbol.isInSection())
+    return false;
+
+  // Otherwise, check if the section requires symbols even for temporary labels.
+  return getBackend().doesSectionRequireSymbols(Symbol.getSection());
+}
+
+const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
+  // Linker visible symbols define atoms.
+  if (isSymbolLinkerVisible(SD->getSymbol()))
+    return SD;
+
+  // Absolute and undefined symbols have no defining atom.
+  if (!SD->getFragment())
+    return 0;
+
+  // Non-linker visible symbols in sections which can't be atomized have no
+  // defining atom.
+  if (!getBackend().isSectionAtomizable(
+        SD->getFragment()->getParent()->getSection()))
+    return 0;
+
+  // Otherwise, return the atom for the containing fragment.
+  return SD->getFragment()->getAtom();
+}
+
+bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
+                                const MCFixup &Fixup, const MCFragment *DF,
+                                MCValue &Target, uint64_t &Value) const {
+  ++stats::EvaluateFixup;
+
+  if (!Fixup.getValue()->EvaluateAsRelocatable(Target, Layout))
+    report_fatal_error("expected relocatable expression");
+
+  bool IsPCRel = Backend.getFixupKindInfo(
+    Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel;
+
+  bool IsResolved;
+  if (IsPCRel) {
+    if (Target.getSymB()) {
+      IsResolved = false;
+    } else if (!Target.getSymA()) {
+      IsResolved = false;
+    } else {
+      const MCSymbolRefExpr *A = Target.getSymA();
+      const MCSymbol &SA = A->getSymbol();
+      if (A->getKind() != MCSymbolRefExpr::VK_None ||
+          SA.AliasedSymbol().isUndefined()) {
+        IsResolved = false;
+      } else {
+        const MCSymbolData &DataA = getSymbolData(SA);
+        IsResolved =
+          getWriter().IsSymbolRefDifferenceFullyResolvedImpl(*this, DataA,
+                                                             *DF, false, true);
+      }
+    }
+  } else {
+    IsResolved = Target.isAbsolute();
+  }
+
+  Value = Target.getConstant();
+
+  bool IsThumb = false;
+  if (const MCSymbolRefExpr *A = Target.getSymA()) {
+    const MCSymbol &Sym = A->getSymbol().AliasedSymbol();
+    if (Sym.isDefined())
+      Value += Layout.getSymbolOffset(&getSymbolData(Sym));
+    if (isThumbFunc(&Sym))
+      IsThumb = true;
+  }
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    const MCSymbol &Sym = B->getSymbol().AliasedSymbol();
+    if (Sym.isDefined())
+      Value -= Layout.getSymbolOffset(&getSymbolData(Sym));
+  }
+
+
+  bool ShouldAlignPC = Backend.getFixupKindInfo(Fixup.getKind()).Flags &
+                         MCFixupKindInfo::FKF_IsAlignedDownTo32Bits;
+  assert((ShouldAlignPC ? IsPCRel : true) &&
+    "FKF_IsAlignedDownTo32Bits is only allowed on PC-relative fixups!");
+
+  if (IsPCRel) {
+    uint32_t Offset = Layout.getFragmentOffset(DF) + Fixup.getOffset();
+    
+    // A number of ARM fixups in Thumb mode require that the effective PC
+    // address be determined as the 32-bit aligned version of the actual offset.
+    if (ShouldAlignPC) Offset &= ~0x3;
+    Value -= Offset;
+  }
+
+  // ARM fixups based from a thumb function address need to have the low
+  // bit set. The actual value is always at least 16-bit aligned, so the
+  // low bit is normally clear and available for use as an ISA flag for
+  // interworking.
+  if (IsThumb)
+    Value |= 1;
+
+  return IsResolved;
+}
+
+uint64_t MCAssembler::ComputeFragmentSize(const MCAsmLayout &Layout,
+                                          const MCFragment &F) const {
+  switch (F.getKind()) {
+  case MCFragment::FT_Data:
+    return cast<MCDataFragment>(F).getContents().size();
+  case MCFragment::FT_Fill:
+    return cast<MCFillFragment>(F).getSize();
+  case MCFragment::FT_Inst:
+    return cast<MCInstFragment>(F).getInstSize();
+
+  case MCFragment::FT_LEB:
+    return cast<MCLEBFragment>(F).getContents().size();
+
+  case MCFragment::FT_Align: {
+    const MCAlignFragment &AF = cast<MCAlignFragment>(F);
+    unsigned Offset = Layout.getFragmentOffset(&AF);
+    unsigned Size = OffsetToAlignment(Offset, AF.getAlignment());
+    if (Size > AF.getMaxBytesToEmit())
+      return 0;
+    return Size;
+  }
+
+  case MCFragment::FT_Org: {
+    MCOrgFragment &OF = cast<MCOrgFragment>(F);
+    int64_t TargetLocation;
+    if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, Layout))
+      report_fatal_error("expected assembly-time absolute expression");
+
+    // FIXME: We need a way to communicate this error.
+    uint64_t FragmentOffset = Layout.getFragmentOffset(&OF);
+    int64_t Size = TargetLocation - FragmentOffset;
+    if (Size < 0 || Size >= 0x40000000)
+      report_fatal_error("invalid .org offset '" + Twine(TargetLocation) +
+                         "' (at offset '" + Twine(FragmentOffset) + "')");
+    return Size;
+  }
+
+  case MCFragment::FT_Dwarf:
+    return cast<MCDwarfLineAddrFragment>(F).getContents().size();
+  case MCFragment::FT_DwarfFrame:
+    return cast<MCDwarfCallFrameFragment>(F).getContents().size();
+  }
+
+  assert(0 && "invalid fragment kind");
+  return 0;
+}
+
+void MCAsmLayout::LayoutFragment(MCFragment *F) {
+  MCFragment *Prev = F->getPrevNode();
+
+  // We should never try to recompute something which is up-to-date.
+  assert(!isFragmentUpToDate(F) && "Attempt to recompute up-to-date fragment!");
+  // We should never try to compute the fragment layout if it's predecessor
+  // isn't up-to-date.
+  assert((!Prev || isFragmentUpToDate(Prev)) &&
+         "Attempt to compute fragment before it's predecessor!");
+
+  ++stats::FragmentLayouts;
+
+  // Compute fragment offset and size.
+  uint64_t Offset = 0;
+  if (Prev)
+    Offset += Prev->Offset + getAssembler().ComputeFragmentSize(*this, *Prev);
+
+  F->Offset = Offset;
+  LastValidFragment[F->getParent()] = F;
+}
+
+/// WriteFragmentData - Write the \arg F data to the output file.
+static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                              const MCFragment &F) {
+  MCObjectWriter *OW = &Asm.getWriter();
+  uint64_t Start = OW->getStream().tell();
+  (void) Start;
+
+  ++stats::EmittedFragments;
+
+  // FIXME: Embed in fragments instead?
+  uint64_t FragmentSize = Asm.ComputeFragmentSize(Layout, F);
+  switch (F.getKind()) {
+  case MCFragment::FT_Align: {
+    MCAlignFragment &AF = cast<MCAlignFragment>(F);
+    uint64_t Count = FragmentSize / AF.getValueSize();
+
+    assert(AF.getValueSize() && "Invalid virtual align in concrete fragment!");
+
+    // FIXME: This error shouldn't actually occur (the front end should emit
+    // multiple .align directives to enforce the semantics it wants), but is
+    // severe enough that we want to report it. How to handle this?
+    if (Count * AF.getValueSize() != FragmentSize)
+      report_fatal_error("undefined .align directive, value size '" +
+                        Twine(AF.getValueSize()) +
+                        "' is not a divisor of padding size '" +
+                        Twine(FragmentSize) + "'");
+
+    // See if we are aligning with nops, and if so do that first to try to fill
+    // the Count bytes.  Then if that did not fill any bytes or there are any
+    // bytes left to fill use the the Value and ValueSize to fill the rest.
+    // If we are aligning with nops, ask that target to emit the right data.
+    if (AF.hasEmitNops()) {
+      if (!Asm.getBackend().WriteNopData(Count, OW))
+        report_fatal_error("unable to write nop sequence of " +
+                          Twine(Count) + " bytes");
+      break;
+    }
+
+    // Otherwise, write out in multiples of the value size.
+    for (uint64_t i = 0; i != Count; ++i) {
+      switch (AF.getValueSize()) {
+      default:
+        assert(0 && "Invalid size!");
+      case 1: OW->Write8 (uint8_t (AF.getValue())); break;
+      case 2: OW->Write16(uint16_t(AF.getValue())); break;
+      case 4: OW->Write32(uint32_t(AF.getValue())); break;
+      case 8: OW->Write64(uint64_t(AF.getValue())); break;
+      }
+    }
+    break;
+  }
+
+  case MCFragment::FT_Data: {
+    MCDataFragment &DF = cast<MCDataFragment>(F);
+    assert(FragmentSize == DF.getContents().size() && "Invalid size!");
+    OW->WriteBytes(DF.getContents().str());
+    break;
+  }
+
+  case MCFragment::FT_Fill: {
+    MCFillFragment &FF = cast<MCFillFragment>(F);
+
+    assert(FF.getValueSize() && "Invalid virtual align in concrete fragment!");
+
+    for (uint64_t i = 0, e = FF.getSize() / FF.getValueSize(); i != e; ++i) {
+      switch (FF.getValueSize()) {
+      default:
+        assert(0 && "Invalid size!");
+      case 1: OW->Write8 (uint8_t (FF.getValue())); break;
+      case 2: OW->Write16(uint16_t(FF.getValue())); break;
+      case 4: OW->Write32(uint32_t(FF.getValue())); break;
+      case 8: OW->Write64(uint64_t(FF.getValue())); break;
+      }
+    }
+    break;
+  }
+
+  case MCFragment::FT_Inst: {
+    MCInstFragment &IF = cast<MCInstFragment>(F);
+    OW->WriteBytes(StringRef(IF.getCode().begin(), IF.getCode().size()));
+    break;
+  }
+
+  case MCFragment::FT_LEB: {
+    MCLEBFragment &LF = cast<MCLEBFragment>(F);
+    OW->WriteBytes(LF.getContents().str());
+    break;
+  }
+
+  case MCFragment::FT_Org: {
+    MCOrgFragment &OF = cast<MCOrgFragment>(F);
+
+    for (uint64_t i = 0, e = FragmentSize; i != e; ++i)
+      OW->Write8(uint8_t(OF.getValue()));
+
+    break;
+  }
+
+  case MCFragment::FT_Dwarf: {
+    const MCDwarfLineAddrFragment &OF = cast<MCDwarfLineAddrFragment>(F);
+    OW->WriteBytes(OF.getContents().str());
+    break;
+  }
+  case MCFragment::FT_DwarfFrame: {
+    const MCDwarfCallFrameFragment &CF = cast<MCDwarfCallFrameFragment>(F);
+    OW->WriteBytes(CF.getContents().str());
+    break;
+  }
+  }
+
+  assert(OW->getStream().tell() - Start == FragmentSize);
+}
+
+void MCAssembler::WriteSectionData(const MCSectionData *SD,
+                                   const MCAsmLayout &Layout) const {
+  // Ignore virtual sections.
+  if (SD->getSection().isVirtualSection()) {
+    assert(Layout.getSectionFileSize(SD) == 0 && "Invalid size for section!");
+
+    // Check that contents are only things legal inside a virtual section.
+    for (MCSectionData::const_iterator it = SD->begin(),
+           ie = SD->end(); it != ie; ++it) {
+      switch (it->getKind()) {
+      default:
+        assert(0 && "Invalid fragment in virtual section!");
+      case MCFragment::FT_Data: {
+        // Check that we aren't trying to write a non-zero contents (or fixups)
+        // into a virtual section. This is to support clients which use standard
+        // directives to fill the contents of virtual sections.
+        MCDataFragment &DF = cast<MCDataFragment>(*it);
+        assert(DF.fixup_begin() == DF.fixup_end() &&
+               "Cannot have fixups in virtual section!");
+        for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i)
+          assert(DF.getContents()[i] == 0 &&
+                 "Invalid data value for virtual section!");
+        break;
+      }
+      case MCFragment::FT_Align:
+        // Check that we aren't trying to write a non-zero value into a virtual
+        // section.
+        assert((!cast<MCAlignFragment>(it)->getValueSize() ||
+                !cast<MCAlignFragment>(it)->getValue()) &&
+               "Invalid align in virtual section!");
+        break;
+      case MCFragment::FT_Fill:
+        assert(!cast<MCFillFragment>(it)->getValueSize() &&
+               "Invalid fill in virtual section!");
+        break;
+      }
+    }
+
+    return;
+  }
+
+  uint64_t Start = getWriter().getStream().tell();
+  (void) Start;
+
+  for (MCSectionData::const_iterator it = SD->begin(),
+         ie = SD->end(); it != ie; ++it)
+    WriteFragmentData(*this, Layout, *it);
+
+  assert(getWriter().getStream().tell() - Start ==
+         Layout.getSectionAddressSize(SD));
+}
+
+
+uint64_t MCAssembler::HandleFixup(const MCAsmLayout &Layout,
+                                  MCFragment &F,
+                                  const MCFixup &Fixup) {
+   // Evaluate the fixup.
+   MCValue Target;
+   uint64_t FixedValue;
+   if (!EvaluateFixup(Layout, Fixup, &F, Target, FixedValue)) {
+     // The fixup was unresolved, we need a relocation. Inform the object
+     // writer of the relocation, and give it an opportunity to adjust the
+     // fixup value if need be.
+     getWriter().RecordRelocation(*this, Layout, &F, Fixup, Target, FixedValue);
+   }
+   return FixedValue;
+ }
+
+void MCAssembler::Finish() {
+  DEBUG_WITH_TYPE("mc-dump", {
+      llvm::errs() << "assembler backend - pre-layout\n--\n";
+      dump(); });
+
+  // Create the layout object.
+  MCAsmLayout Layout(*this);
+
+  // Create dummy fragments and assign section ordinals.
+  unsigned SectionIndex = 0;
+  for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
+    // Create dummy fragments to eliminate any empty sections, this simplifies
+    // layout.
+    if (it->getFragmentList().empty())
+      new MCDataFragment(it);
+
+    it->setOrdinal(SectionIndex++);
+  }
+
+  // Assign layout order indices to sections and fragments.
+  for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) {
+    MCSectionData *SD = Layout.getSectionOrder()[i];
+    SD->setLayoutOrder(i);
+
+    unsigned FragmentIndex = 0;
+    for (MCSectionData::iterator it2 = SD->begin(),
+           ie2 = SD->end(); it2 != ie2; ++it2)
+      it2->setLayoutOrder(FragmentIndex++);
+  }
+
+  // Layout until everything fits.
+  while (LayoutOnce(Layout))
+    continue;
+
+  DEBUG_WITH_TYPE("mc-dump", {
+      llvm::errs() << "assembler backend - post-relaxation\n--\n";
+      dump(); });
+
+  // Finalize the layout, including fragment lowering.
+  FinishLayout(Layout);
+
+  DEBUG_WITH_TYPE("mc-dump", {
+      llvm::errs() << "assembler backend - final-layout\n--\n";
+      dump(); });
+
+  uint64_t StartOffset = OS.tell();
+
+  // Allow the object writer a chance to perform post-layout binding (for
+  // example, to set the index fields in the symbol data).
+  getWriter().ExecutePostLayoutBinding(*this, Layout);
+
+  // Evaluate and apply the fixups, generating relocation entries as necessary.
+  for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
+    for (MCSectionData::iterator it2 = it->begin(),
+           ie2 = it->end(); it2 != ie2; ++it2) {
+      MCDataFragment *DF = dyn_cast<MCDataFragment>(it2);
+      if (DF) {
+        for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
+               ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
+          MCFixup &Fixup = *it3;
+          uint64_t FixedValue = HandleFixup(Layout, *DF, Fixup);
+          getBackend().ApplyFixup(Fixup, DF->getContents().data(),
+                                  DF->getContents().size(), FixedValue);
+        }
+      }
+      MCInstFragment *IF = dyn_cast<MCInstFragment>(it2);
+      if (IF) {
+        for (MCInstFragment::fixup_iterator it3 = IF->fixup_begin(),
+               ie3 = IF->fixup_end(); it3 != ie3; ++it3) {
+          MCFixup &Fixup = *it3;
+          uint64_t FixedValue = HandleFixup(Layout, *IF, Fixup);
+          getBackend().ApplyFixup(Fixup, IF->getCode().data(),
+                                  IF->getCode().size(), FixedValue);
+        }
+      }
+    }
+  }
+
+  // Write the object file.
+  getWriter().WriteObject(*this, Layout);
+
+  stats::ObjectBytes += OS.tell() - StartOffset;
+}
+
+bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup,
+                                       const MCFragment *DF,
+                                       const MCAsmLayout &Layout) const {
+  if (getRelaxAll())
+    return true;
+
+  // If we cannot resolve the fixup value, it requires relaxation.
+  MCValue Target;
+  uint64_t Value;
+  if (!EvaluateFixup(Layout, Fixup, DF, Target, Value))
+    return true;
+
+  // Otherwise, relax if the value is too big for a (signed) i8.
+  //
+  // FIXME: This is target dependent!
+  return int64_t(Value) != int64_t(int8_t(Value));
+}
+
+bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF,
+                                          const MCAsmLayout &Layout) const {
+  // If this inst doesn't ever need relaxation, ignore it. This occurs when we
+  // are intentionally pushing out inst fragments, or because we relaxed a
+  // previous instruction to one that doesn't need relaxation.
+  if (!getBackend().MayNeedRelaxation(IF->getInst()))
+    return false;
+
+  for (MCInstFragment::const_fixup_iterator it = IF->fixup_begin(),
+         ie = IF->fixup_end(); it != ie; ++it)
+    if (FixupNeedsRelaxation(*it, IF, Layout))
+      return true;
+
+  return false;
+}
+
+bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout,
+                                   MCInstFragment &IF) {
+  if (!FragmentNeedsRelaxation(&IF, Layout))
+    return false;
+
+  ++stats::RelaxedInstructions;
+
+  // FIXME-PERF: We could immediately lower out instructions if we can tell
+  // they are fully resolved, to avoid retesting on later passes.
+
+  // Relax the fragment.
+
+  MCInst Relaxed;
+  getBackend().RelaxInstruction(IF.getInst(), Relaxed);
+
+  // Encode the new instruction.
+  //
+  // FIXME-PERF: If it matters, we could let the target do this. It can
+  // probably do so more efficiently in many cases.
+  SmallVector<MCFixup, 4> Fixups;
+  SmallString<256> Code;
+  raw_svector_ostream VecOS(Code);
+  getEmitter().EncodeInstruction(Relaxed, VecOS, Fixups);
+  VecOS.flush();
+
+  // Update the instruction fragment.
+  IF.setInst(Relaxed);
+  IF.getCode() = Code;
+  IF.getFixups().clear();
+  // FIXME: Eliminate copy.
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
+    IF.getFixups().push_back(Fixups[i]);
+
+  return true;
+}
+
+bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
+  int64_t Value = 0;
+  uint64_t OldSize = LF.getContents().size();
+  LF.getValue().EvaluateAsAbsolute(Value, Layout);
+  SmallString<8> &Data = LF.getContents();
+  Data.clear();
+  raw_svector_ostream OSE(Data);
+  if (LF.isSigned())
+    MCObjectWriter::EncodeSLEB128(Value, OSE);
+  else
+    MCObjectWriter::EncodeULEB128(Value, OSE);
+  OSE.flush();
+  return OldSize != LF.getContents().size();
+}
+
+bool MCAssembler::RelaxDwarfLineAddr(MCAsmLayout &Layout,
+				     MCDwarfLineAddrFragment &DF) {
+  int64_t AddrDelta = 0;
+  uint64_t OldSize = DF.getContents().size();
+  bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
+  (void)IsAbs;
+  assert(IsAbs);
+  int64_t LineDelta;
+  LineDelta = DF.getLineDelta();
+  SmallString<8> &Data = DF.getContents();
+  Data.clear();
+  raw_svector_ostream OSE(Data);
+  MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OSE);
+  OSE.flush();
+  return OldSize != Data.size();
+}
+
+bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout,
+                                              MCDwarfCallFrameFragment &DF) {
+  int64_t AddrDelta = 0;
+  uint64_t OldSize = DF.getContents().size();
+  bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout);
+  (void)IsAbs;
+  assert(IsAbs);
+  SmallString<8> &Data = DF.getContents();
+  Data.clear();
+  raw_svector_ostream OSE(Data);
+  MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OSE);
+  OSE.flush();
+  return OldSize != Data.size();
+}
+
+bool MCAssembler::LayoutSectionOnce(MCAsmLayout &Layout,
+                                    MCSectionData &SD) {
+  MCFragment *FirstInvalidFragment = NULL;
+  // Scan for fragments that need relaxation.
+  for (MCSectionData::iterator it2 = SD.begin(),
+         ie2 = SD.end(); it2 != ie2; ++it2) {
+    // Check if this is an fragment that needs relaxation.
+    bool relaxedFrag = false;
+    switch(it2->getKind()) {
+    default:
+          break;
+    case MCFragment::FT_Inst:
+      relaxedFrag = RelaxInstruction(Layout, *cast<MCInstFragment>(it2));
+      break;
+    case MCFragment::FT_Dwarf:
+      relaxedFrag = RelaxDwarfLineAddr(Layout,
+                                       *cast<MCDwarfLineAddrFragment>(it2));
+      break;
+    case MCFragment::FT_DwarfFrame:
+      relaxedFrag =
+        RelaxDwarfCallFrameFragment(Layout,
+                                    *cast<MCDwarfCallFrameFragment>(it2));
+      break;
+    case MCFragment::FT_LEB:
+      relaxedFrag = RelaxLEB(Layout, *cast<MCLEBFragment>(it2));
+      break;
+    }
+    // Update the layout, and remember that we relaxed.
+    if (relaxedFrag && !FirstInvalidFragment)
+      FirstInvalidFragment = it2;
+  }
+  if (FirstInvalidFragment) {
+    Layout.Invalidate(FirstInvalidFragment);
+    return true;
+  }
+  return false;
+}
+
+bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
+  ++stats::RelaxationSteps;
+
+  bool WasRelaxed = false;
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    MCSectionData &SD = *it;
+    while(LayoutSectionOnce(Layout, SD))
+      WasRelaxed = true;
+  }
+
+  return WasRelaxed;
+}
+
+void MCAssembler::FinishLayout(MCAsmLayout &Layout) {
+  // The layout is done. Mark every fragment as valid.
+  for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
+    Layout.getFragmentOffset(&*Layout.getSectionOrder()[i]->rbegin());
+  }
+}
+
+// Debugging methods
+
+namespace llvm {
+
+raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) {
+  OS << "<MCFixup" << " Offset:" << AF.getOffset()
+     << " Value:" << *AF.getValue()
+     << " Kind:" << AF.getKind() << ">";
+  return OS;
+}
+
+}
+
+void MCFragment::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<";
+  switch (getKind()) {
+  case MCFragment::FT_Align: OS << "MCAlignFragment"; break;
+  case MCFragment::FT_Data:  OS << "MCDataFragment"; break;
+  case MCFragment::FT_Fill:  OS << "MCFillFragment"; break;
+  case MCFragment::FT_Inst:  OS << "MCInstFragment"; break;
+  case MCFragment::FT_Org:   OS << "MCOrgFragment"; break;
+  case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break;
+  case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break;
+  case MCFragment::FT_LEB:   OS << "MCLEBFragment"; break;
+  }
+
+  OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder
+     << " Offset:" << Offset << ">";
+
+  switch (getKind()) {
+  case MCFragment::FT_Align: {
+    const MCAlignFragment *AF = cast<MCAlignFragment>(this);
+    if (AF->hasEmitNops())
+      OS << " (emit nops)";
+    OS << "\n       ";
+    OS << " Alignment:" << AF->getAlignment()
+       << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize()
+       << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">";
+    break;
+  }
+  case MCFragment::FT_Data:  {
+    const MCDataFragment *DF = cast<MCDataFragment>(this);
+    OS << "\n       ";
+    OS << " Contents:[";
+    const SmallVectorImpl<char> &Contents = DF->getContents();
+    for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
+      if (i) OS << ",";
+      OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
+    }
+    OS << "] (" << Contents.size() << " bytes)";
+
+    if (!DF->getFixups().empty()) {
+      OS << ",\n       ";
+      OS << " Fixups:[";
+      for (MCDataFragment::const_fixup_iterator it = DF->fixup_begin(),
+             ie = DF->fixup_end(); it != ie; ++it) {
+        if (it != DF->fixup_begin()) OS << ",\n                ";
+        OS << *it;
+      }
+      OS << "]";
+    }
+    break;
+  }
+  case MCFragment::FT_Fill:  {
+    const MCFillFragment *FF = cast<MCFillFragment>(this);
+    OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize()
+       << " Size:" << FF->getSize();
+    break;
+  }
+  case MCFragment::FT_Inst:  {
+    const MCInstFragment *IF = cast<MCInstFragment>(this);
+    OS << "\n       ";
+    OS << " Inst:";
+    IF->getInst().dump_pretty(OS);
+    break;
+  }
+  case MCFragment::FT_Org:  {
+    const MCOrgFragment *OF = cast<MCOrgFragment>(this);
+    OS << "\n       ";
+    OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue();
+    break;
+  }
+  case MCFragment::FT_Dwarf:  {
+    const MCDwarfLineAddrFragment *OF = cast<MCDwarfLineAddrFragment>(this);
+    OS << "\n       ";
+    OS << " AddrDelta:" << OF->getAddrDelta()
+       << " LineDelta:" << OF->getLineDelta();
+    break;
+  }
+  case MCFragment::FT_DwarfFrame:  {
+    const MCDwarfCallFrameFragment *CF = cast<MCDwarfCallFrameFragment>(this);
+    OS << "\n       ";
+    OS << " AddrDelta:" << CF->getAddrDelta();
+    break;
+  }
+  case MCFragment::FT_LEB: {
+    const MCLEBFragment *LF = cast<MCLEBFragment>(this);
+    OS << "\n       ";
+    OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned();
+    break;
+  }
+  }
+  OS << ">";
+}
+
+void MCSectionData::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCSectionData";
+  OS << " Alignment:" << getAlignment() << " Fragments:[\n      ";
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    if (it != begin()) OS << ",\n      ";
+    it->dump();
+  }
+  OS << "]>";
+}
+
+void MCSymbolData::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCSymbolData Symbol:" << getSymbol()
+     << " Fragment:" << getFragment() << " Offset:" << getOffset()
+     << " Flags:" << getFlags() << " Index:" << getIndex();
+  if (isCommon())
+    OS << " (common, size:" << getCommonSize()
+       << " align: " << getCommonAlignment() << ")";
+  if (isExternal())
+    OS << " (external)";
+  if (isPrivateExtern())
+    OS << " (private extern)";
+  OS << ">";
+}
+
+void MCAssembler::dump() {
+  raw_ostream &OS = llvm::errs();
+
+  OS << "<MCAssembler\n";
+  OS << "  Sections:[\n    ";
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    if (it != begin()) OS << ",\n    ";
+    it->dump();
+  }
+  OS << "],\n";
+  OS << "  Symbols:[";
+
+  for (symbol_iterator it = symbol_begin(), ie = symbol_end(); it != ie; ++it) {
+    if (it != symbol_begin()) OS << ",\n           ";
+    it->dump();
+  }
+  OS << "]>\n";
+}
diff --git a/final/lib/MC/MCCodeEmitter.cpp b/final/lib/MC/MCCodeEmitter.cpp
new file mode 100644
index 00000000000..c122763b2fe
--- /dev/null
+++ b/final/lib/MC/MCCodeEmitter.cpp
@@ -0,0 +1,18 @@
+//===-- MCCodeEmitter.cpp - Instruction Encoding --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCCodeEmitter.h"
+
+using namespace llvm;
+
+MCCodeEmitter::MCCodeEmitter() {
+}
+
+MCCodeEmitter::~MCCodeEmitter() {
+}
diff --git a/final/lib/MC/MCContext.cpp b/final/lib/MC/MCContext.cpp
new file mode 100644
index 00000000000..018f00c08f6
--- /dev/null
+++ b/final/lib/MC/MCContext.cpp
@@ -0,0 +1,312 @@
+//===- lib/MC/MCContext.cpp - Machine Code Context ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCLabel.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
+typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
+typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
+
+
+MCContext::MCContext(const MCAsmInfo &mai, const TargetAsmInfo *tai) :
+  MAI(mai), TAI(tai), NextUniqueID(0),
+  CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0) {
+  MachOUniquingMap = 0;
+  ELFUniquingMap = 0;
+  COFFUniquingMap = 0;
+
+  SecureLogFile = getenv("AS_SECURE_LOG_FILE");
+  SecureLog = 0;
+  SecureLogUsed = false;
+
+  DwarfLocSeen = false;
+}
+
+MCContext::~MCContext() {
+  // NOTE: The symbols are all allocated out of a bump pointer allocator,
+  // we don't need to free them here.
+
+  // If we have the MachO uniquing map, free it.
+  delete (MachOUniqueMapTy*)MachOUniquingMap;
+  delete (ELFUniqueMapTy*)ELFUniquingMap;
+  delete (COFFUniqueMapTy*)COFFUniquingMap;
+
+  // If the stream for the .secure_log_unique directive was created free it.
+  delete (raw_ostream*)SecureLog;
+
+  delete TAI;
+}
+
+//===----------------------------------------------------------------------===//
+// Symbol Manipulation
+//===----------------------------------------------------------------------===//
+
+MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) {
+  assert(!Name.empty() && "Normal symbols cannot be unnamed!");
+
+  // Do the lookup and get the entire StringMapEntry.  We want access to the
+  // key if we are creating the entry.
+  StringMapEntry<MCSymbol*> &Entry = Symbols.GetOrCreateValue(Name);
+  MCSymbol *Sym = Entry.getValue();
+
+  if (Sym)
+    return Sym;
+
+  Sym = CreateSymbol(Name);
+  Entry.setValue(Sym);
+  return Sym;
+}
+
+MCSymbol *MCContext::CreateSymbol(StringRef Name) {
+  // Determine whether this is an assembler temporary or normal label.
+  bool isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix());
+
+  StringMapEntry<bool> *NameEntry = &UsedNames.GetOrCreateValue(Name);
+  if (NameEntry->getValue()) {
+    assert(isTemporary && "Cannot rename non temporary symbols");
+    SmallString<128> NewName;
+    do {
+      Twine T = Name + Twine(NextUniqueID++);
+      T.toVector(NewName);
+      StringRef foo = NewName;
+      NameEntry = &UsedNames.GetOrCreateValue(foo);
+    } while (NameEntry->getValue());
+  }
+  NameEntry->setValue(true);
+
+  // Ok, the entry doesn't already exist.  Have the MCSymbol object itself refer
+  // to the copy of the string that is embedded in the UsedNames entry.
+  MCSymbol *Result = new (*this) MCSymbol(NameEntry->getKey(), isTemporary);
+
+  return Result;
+}
+
+MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) {
+  SmallString<128> NameSV;
+  Name.toVector(NameSV);
+  return GetOrCreateSymbol(NameSV.str());
+}
+
+MCSymbol *MCContext::CreateTempSymbol() {
+  SmallString<128> NameSV;
+  Twine Name = Twine(MAI.getPrivateGlobalPrefix()) + "tmp" +
+    Twine(NextUniqueID++);
+  Name.toVector(NameSV);
+  return CreateSymbol(NameSV);
+}
+
+unsigned MCContext::NextInstance(int64_t LocalLabelVal) {
+  MCLabel *&Label = Instances[LocalLabelVal];
+  if (!Label)
+    Label = new (*this) MCLabel(0);
+  return Label->incInstance();
+}
+
+unsigned MCContext::GetInstance(int64_t LocalLabelVal) {
+  MCLabel *&Label = Instances[LocalLabelVal];
+  if (!Label)
+    Label = new (*this) MCLabel(0);
+  return Label->getInstance();
+}
+
+MCSymbol *MCContext::CreateDirectionalLocalSymbol(int64_t LocalLabelVal) {
+  return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
+                           Twine(LocalLabelVal) +
+                           "\2" +
+                           Twine(NextInstance(LocalLabelVal)));
+}
+MCSymbol *MCContext::GetDirectionalLocalSymbol(int64_t LocalLabelVal,
+                                               int bORf) {
+  return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
+                           Twine(LocalLabelVal) +
+                           "\2" +
+                           Twine(GetInstance(LocalLabelVal) + bORf));
+}
+
+MCSymbol *MCContext::LookupSymbol(StringRef Name) const {
+  return Symbols.lookup(Name);
+}
+
+//===----------------------------------------------------------------------===//
+// Section Management
+//===----------------------------------------------------------------------===//
+
+const MCSectionMachO *MCContext::
+getMachOSection(StringRef Segment, StringRef Section,
+                unsigned TypeAndAttributes,
+                unsigned Reserved2, SectionKind Kind) {
+
+  // We unique sections by their segment/section pair.  The returned section
+  // may not have the same flags as the requested section, if so this should be
+  // diagnosed by the client as an error.
+
+  // Create the map if it doesn't already exist.
+  if (MachOUniquingMap == 0)
+    MachOUniquingMap = new MachOUniqueMapTy();
+  MachOUniqueMapTy &Map = *(MachOUniqueMapTy*)MachOUniquingMap;
+
+  // Form the name to look up.
+  SmallString<64> Name;
+  Name += Segment;
+  Name.push_back(',');
+  Name += Section;
+
+  // Do the lookup, if we have a hit, return it.
+  const MCSectionMachO *&Entry = Map[Name.str()];
+  if (Entry) return Entry;
+
+  // Otherwise, return a new section.
+  return Entry = new (*this) MCSectionMachO(Segment, Section, TypeAndAttributes,
+                                            Reserved2, Kind);
+}
+
+const MCSectionELF *MCContext::
+getELFSection(StringRef Section, unsigned Type, unsigned Flags,
+              SectionKind Kind) {
+  return getELFSection(Section, Type, Flags, Kind, 0, "");
+}
+
+const MCSectionELF *MCContext::
+getELFSection(StringRef Section, unsigned Type, unsigned Flags,
+              SectionKind Kind, unsigned EntrySize, StringRef Group) {
+  if (ELFUniquingMap == 0)
+    ELFUniquingMap = new ELFUniqueMapTy();
+  ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap;
+
+  // Do the lookup, if we have a hit, return it.
+  StringMapEntry<const MCSectionELF*> &Entry = Map.GetOrCreateValue(Section);
+  if (Entry.getValue()) return Entry.getValue();
+
+  // Possibly refine the entry size first.
+  if (!EntrySize) {
+    EntrySize = MCSectionELF::DetermineEntrySize(Kind);
+  }
+
+  MCSymbol *GroupSym = NULL;
+  if (!Group.empty())
+    GroupSym = GetOrCreateSymbol(Group);
+
+  MCSectionELF *Result = new (*this) MCSectionELF(Entry.getKey(), Type, Flags,
+                                                  Kind, EntrySize, GroupSym);
+  Entry.setValue(Result);
+  return Result;
+}
+
+const MCSectionELF *MCContext::CreateELFGroupSection() {
+  MCSectionELF *Result =
+    new (*this) MCSectionELF(".group", ELF::SHT_GROUP, 0,
+                             SectionKind::getReadOnly(), 4, NULL);
+  return Result;
+}
+
+const MCSection *MCContext::getCOFFSection(StringRef Section,
+                                           unsigned Characteristics,
+                                           int Selection,
+                                           SectionKind Kind) {
+  if (COFFUniquingMap == 0)
+    COFFUniquingMap = new COFFUniqueMapTy();
+  COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap;
+
+  // Do the lookup, if we have a hit, return it.
+  StringMapEntry<const MCSectionCOFF*> &Entry = Map.GetOrCreateValue(Section);
+  if (Entry.getValue()) return Entry.getValue();
+
+  MCSectionCOFF *Result = new (*this) MCSectionCOFF(Entry.getKey(),
+                                                    Characteristics,
+                                                    Selection, Kind);
+
+  Entry.setValue(Result);
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// Dwarf Management
+//===----------------------------------------------------------------------===//
+
+/// GetDwarfFile - takes a file name an number to place in the dwarf file and
+/// directory tables.  If the file number has already been allocated it is an
+/// error and zero is returned and the client reports the error, else the
+/// allocated file number is returned.  The file numbers may be in any order.
+unsigned MCContext::GetDwarfFile(StringRef FileName, unsigned FileNumber) {
+  // TODO: a FileNumber of zero says to use the next available file number.
+  // Note: in GenericAsmParser::ParseDirectiveFile() FileNumber was checked
+  // to not be less than one.  This needs to be change to be not less than zero.
+
+  // Make space for this FileNumber in the MCDwarfFiles vector if needed.
+  if (FileNumber >= MCDwarfFiles.size()) {
+    MCDwarfFiles.resize(FileNumber + 1);
+  } else {
+    MCDwarfFile *&ExistingFile = MCDwarfFiles[FileNumber];
+    if (ExistingFile)
+      // It is an error to use see the same number more than once.
+      return 0;
+  }
+
+  // Get the new MCDwarfFile slot for this FileNumber.
+  MCDwarfFile *&File = MCDwarfFiles[FileNumber];
+
+  // Separate the directory part from the basename of the FileName.
+  std::pair<StringRef, StringRef> Slash = FileName.rsplit('/');
+
+  // Find or make a entry in the MCDwarfDirs vector for this Directory.
+  StringRef Name;
+  unsigned DirIndex;
+  // Capture directory name.
+  if (Slash.second.empty()) {
+    Name = Slash.first;
+    DirIndex = 0; // For FileNames with no directories a DirIndex of 0 is used.
+  } else {
+    StringRef Directory = Slash.first;
+    Name = Slash.second;
+    for (DirIndex = 0; DirIndex < MCDwarfDirs.size(); DirIndex++) {
+      if (Directory == MCDwarfDirs[DirIndex])
+        break;
+    }
+    if (DirIndex >= MCDwarfDirs.size()) {
+      char *Buf = static_cast<char *>(Allocate(Directory.size()));
+      memcpy(Buf, Directory.data(), Directory.size());
+      MCDwarfDirs.push_back(StringRef(Buf, Directory.size()));
+    }
+    // The DirIndex is one based, as DirIndex of 0 is used for FileNames with
+    // no directories.  MCDwarfDirs[] is unlike MCDwarfFiles[] in that the
+    // directory names are stored at MCDwarfDirs[DirIndex-1] where FileNames are
+    // stored at MCDwarfFiles[FileNumber].Name .
+    DirIndex++;
+  }
+
+  // Now make the MCDwarfFile entry and place it in the slot in the MCDwarfFiles
+  // vector.
+  char *Buf = static_cast<char *>(Allocate(Name.size()));
+  memcpy(Buf, Name.data(), Name.size());
+  File = new (*this) MCDwarfFile(StringRef(Buf, Name.size()), DirIndex);
+
+  // return the allocated FileNumber.
+  return FileNumber;
+}
+
+/// isValidDwarfFileNumber - takes a dwarf file number and returns true if it
+/// currently is assigned and false otherwise.
+bool MCContext::isValidDwarfFileNumber(unsigned FileNumber) {
+  if(FileNumber == 0 || FileNumber >= MCDwarfFiles.size())
+    return false;
+
+  return MCDwarfFiles[FileNumber] != 0;
+}
diff --git a/final/lib/MC/MCDisassembler.cpp b/final/lib/MC/MCDisassembler.cpp
new file mode 100644
index 00000000000..08096906462
--- /dev/null
+++ b/final/lib/MC/MCDisassembler.cpp
@@ -0,0 +1,14 @@
+//===-- lib/MC/MCDisassembler.cpp - Disassembler interface ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCDisassembler.h"
+using namespace llvm;
+
+MCDisassembler::~MCDisassembler() {
+}
diff --git a/final/lib/MC/MCDisassembler/CMakeLists.txt b/final/lib/MC/MCDisassembler/CMakeLists.txt
new file mode 100644
index 00000000000..5fa7b70194b
--- /dev/null
+++ b/final/lib/MC/MCDisassembler/CMakeLists.txt
@@ -0,0 +1,7 @@
+
+add_llvm_library(LLVMMCDisassembler
+  EDDisassembler.cpp
+  EDOperand.cpp
+  EDInst.cpp
+  EDToken.cpp
+  )
diff --git a/final/lib/MC/MCDisassembler/EDDisassembler.cpp b/final/lib/MC/MCDisassembler/EDDisassembler.cpp
new file mode 100644
index 00000000000..2fd14db2a45
--- /dev/null
+++ b/final/lib/MC/MCDisassembler/EDDisassembler.cpp
@@ -0,0 +1,402 @@
+//===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's  disassembler class.
+// The disassembler is responsible for vending individual instructions according
+// to a given architecture and disassembly syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSelect.h"
+using namespace llvm;
+
+bool EDDisassembler::sInitialized = false;
+EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
+
+struct TripleMap {
+  Triple::ArchType Arch;
+  const char *String;
+};
+
+static struct TripleMap triplemap[] = {
+  { Triple::x86,          "i386-unknown-unknown"    },
+  { Triple::x86_64,       "x86_64-unknown-unknown"  },
+  { Triple::arm,          "arm-unknown-unknown"     },
+  { Triple::thumb,        "thumb-unknown-unknown"   },
+  { Triple::InvalidArch,  NULL,                     }
+};
+
+/// infoFromArch - Returns the TripleMap corresponding to a given architecture,
+///   or NULL if there is an error
+///
+/// @arg arch - The Triple::ArchType for the desired architecture
+static const char *tripleFromArch(Triple::ArchType arch) {
+  unsigned int infoIndex;
+  
+  for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) {
+    if (arch == triplemap[infoIndex].Arch)
+      return triplemap[infoIndex].String;
+  }
+  
+  return NULL;
+}
+
+/// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
+///   for the desired assembly syntax, suitable for passing to 
+///   Target::createMCInstPrinter()
+///
+/// @arg arch   - The target architecture
+/// @arg syntax - The assembly syntax in sd form
+static int getLLVMSyntaxVariant(Triple::ArchType arch,
+                                EDDisassembler::AssemblySyntax syntax) {
+  switch (syntax) {
+  default:
+    return -1;
+  // Mappings below from X86AsmPrinter.cpp
+  case EDDisassembler::kEDAssemblySyntaxX86ATT:
+    if (arch == Triple::x86 || arch == Triple::x86_64)
+      return 0;
+    else
+      return -1;
+  case EDDisassembler::kEDAssemblySyntaxX86Intel:
+    if (arch == Triple::x86 || arch == Triple::x86_64)
+      return 1;
+    else
+      return -1;
+  case EDDisassembler::kEDAssemblySyntaxARMUAL:
+    if (arch == Triple::arm || arch == Triple::thumb)
+      return 0;
+    else
+      return -1;
+  }
+}
+
+void EDDisassembler::initialize() {
+  if (sInitialized)
+    return;
+  
+  sInitialized = true;
+  
+  InitializeAllTargetInfos();
+  InitializeAllTargets();
+  InitializeAllAsmPrinters();
+  InitializeAllAsmParsers();
+  InitializeAllDisassemblers();
+}
+
+#undef BRINGUP_TARGET
+
+EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
+                                                AssemblySyntax syntax) {
+  CPUKey key;
+  key.Arch = arch;
+  key.Syntax = syntax;
+  
+  EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
+  
+  if (i != sDisassemblers.end()) {
+    return i->second;
+  } else {
+    EDDisassembler* sdd = new EDDisassembler(key);
+    if (!sdd->valid()) {
+      delete sdd;
+      return NULL;
+    }
+    
+    sDisassemblers[key] = sdd;
+    
+    return sdd;
+  }
+  
+  return NULL;
+}
+
+EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
+                                                AssemblySyntax syntax) {
+  return getDisassembler(Triple(str).getArch(), syntax);
+}
+
+EDDisassembler::EDDisassembler(CPUKey &key) : 
+  Valid(false), 
+  HasSemantics(false), 
+  ErrorStream(nulls()), 
+  Key(key) {
+  const char *triple = tripleFromArch(key.Arch);
+    
+  if (!triple)
+    return;
+  
+  LLVMSyntaxVariant = getLLVMSyntaxVariant(key.Arch, key.Syntax);
+  
+  if (LLVMSyntaxVariant < 0)
+    return;
+  
+  std::string tripleString(triple);
+  std::string errorString;
+  
+  Tgt = TargetRegistry::lookupTarget(tripleString, 
+                                     errorString);
+  
+  if (!Tgt)
+    return;
+  
+  std::string featureString;
+  
+  TargetMachine.reset(Tgt->createTargetMachine(tripleString,
+                                               featureString));
+  
+  const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo();
+  
+  if (!registerInfo)
+    return;
+    
+  initMaps(*registerInfo);
+  
+  AsmInfo.reset(Tgt->createAsmInfo(tripleString));
+  
+  if (!AsmInfo)
+    return;
+  
+  Disassembler.reset(Tgt->createMCDisassembler());
+  
+  if (!Disassembler)
+    return;
+    
+  InstInfos = Disassembler->getEDInfo();
+  
+  InstString.reset(new std::string);
+  InstStream.reset(new raw_string_ostream(*InstString));
+  InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
+  
+  if (!InstPrinter)
+    return;
+    
+  GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
+  SpecificAsmLexer.reset(Tgt->createAsmLexer(*AsmInfo));
+  SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
+  
+  initMaps(*TargetMachine->getRegisterInfo());
+    
+  Valid = true;
+}
+
+EDDisassembler::~EDDisassembler() {
+  if (!valid())
+    return;
+}
+
+namespace {
+  /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
+  ///   as provided by the sd interface.  See MemoryObject.
+  class EDMemoryObject : public llvm::MemoryObject {
+  private:
+    EDByteReaderCallback Callback;
+    void *Arg;
+  public:
+    EDMemoryObject(EDByteReaderCallback callback,
+                   void *arg) : Callback(callback), Arg(arg) { }
+    ~EDMemoryObject() { }
+    uint64_t getBase() const { return 0x0; }
+    uint64_t getExtent() const { return (uint64_t)-1; }
+    int readByte(uint64_t address, uint8_t *ptr) const {
+      if (!Callback)
+        return -1;
+      
+      if (Callback(ptr, address, Arg))
+        return -1;
+      
+      return 0;
+    }
+  };
+}
+
+EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader, 
+                                   uint64_t address, 
+                                   void *arg) {
+  EDMemoryObject memoryObject(byteReader, arg);
+  
+  MCInst* inst = new MCInst;
+  uint64_t byteSize;
+  
+  if (!Disassembler->getInstruction(*inst,
+                                    byteSize,
+                                    memoryObject,
+                                    address,
+                                    ErrorStream)) {
+    delete inst;
+    return NULL;
+  } else {
+    const llvm::EDInstInfo *thisInstInfo;
+
+    thisInstInfo = &InstInfos[inst->getOpcode()];
+    
+    EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
+    return sdInst;
+  }
+}
+
+void EDDisassembler::initMaps(const TargetRegisterInfo &registerInfo) {
+  unsigned numRegisters = registerInfo.getNumRegs();
+  unsigned registerIndex;
+  
+  for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
+    const char* registerName = registerInfo.get(registerIndex).Name;
+    
+    RegVec.push_back(registerName);
+    RegRMap[registerName] = registerIndex;
+  }
+  
+  switch (Key.Arch) {
+  default:
+    break;
+  case Triple::x86:
+  case Triple::x86_64:
+    stackPointers.insert(registerIDWithName("SP"));
+    stackPointers.insert(registerIDWithName("ESP"));
+    stackPointers.insert(registerIDWithName("RSP"));
+    
+    programCounters.insert(registerIDWithName("IP"));
+    programCounters.insert(registerIDWithName("EIP"));
+    programCounters.insert(registerIDWithName("RIP"));
+    break;
+  case Triple::arm:
+  case Triple::thumb:
+    stackPointers.insert(registerIDWithName("SP"));
+    
+    programCounters.insert(registerIDWithName("PC"));
+    break;  
+  }
+}
+
+const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
+  if (registerID >= RegVec.size())
+    return NULL;
+  else
+    return RegVec[registerID].c_str();
+}
+
+unsigned EDDisassembler::registerIDWithName(const char *name) const {
+  regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
+  if (iter == RegRMap.end())
+    return 0;
+  else
+    return (*iter).second;
+}
+
+bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
+  return (stackPointers.find(registerID) != stackPointers.end());
+}
+
+bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
+  return (programCounters.find(registerID) != programCounters.end());
+}
+
+int EDDisassembler::printInst(std::string &str, MCInst &inst) {
+  PrinterMutex.acquire();
+  
+  InstPrinter->printInst(&inst, *InstStream);
+  InstStream->flush();
+  str = *InstString;
+  InstString->clear();
+  
+  PrinterMutex.release();
+  
+  return 0;
+}
+
+int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
+                              SmallVectorImpl<AsmToken> &tokens,
+                              const std::string &str) {
+  int ret = 0;
+  
+  switch (Key.Arch) {
+  default:
+    return -1;
+  case Triple::x86:
+  case Triple::x86_64:
+  case Triple::arm:
+  case Triple::thumb:
+    break;
+  }
+  
+  const char *cStr = str.c_str();
+  MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
+  
+  StringRef instName;
+  SMLoc instLoc;
+  
+  SourceMgr sourceMgr;
+  sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
+  MCContext context(*AsmInfo, NULL);
+  OwningPtr<MCStreamer> streamer(createNullStreamer(context));
+  OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
+                                                         context, *streamer,
+                                                         *AsmInfo));
+  OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*genericParser,
+                                                               *TargetMachine));
+  
+  AsmToken OpcodeToken = genericParser->Lex();
+  AsmToken NextToken = genericParser->Lex();  // consume next token, because specificParser expects us to
+    
+  if (OpcodeToken.is(AsmToken::Identifier)) {
+    instName = OpcodeToken.getString();
+    instLoc = OpcodeToken.getLoc();
+    
+    if (NextToken.isNot(AsmToken::Eof) &&
+        TargetParser->ParseInstruction(instName, instLoc, operands))
+      ret = -1;
+  } else {
+    ret = -1;
+  }
+  
+  ParserMutex.acquire();
+  
+  if (!ret) {
+    GenericAsmLexer->setBuffer(buf);
+  
+    while (SpecificAsmLexer->Lex(),
+           SpecificAsmLexer->isNot(AsmToken::Eof) &&
+           SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
+      if (SpecificAsmLexer->is(AsmToken::Error)) {
+        ret = -1;
+        break;
+      }
+      tokens.push_back(SpecificAsmLexer->getTok());
+    }
+  }
+
+  ParserMutex.release();
+  
+  return ret;
+}
+
+int EDDisassembler::llvmSyntaxVariant() const {
+  return LLVMSyntaxVariant;
+}
diff --git a/final/lib/MC/MCDisassembler/EDDisassembler.h b/final/lib/MC/MCDisassembler/EDDisassembler.h
new file mode 100644
index 00000000000..71e45f0b042
--- /dev/null
+++ b/final/lib/MC/MCDisassembler/EDDisassembler.h
@@ -0,0 +1,273 @@
+//===-- EDDisassembler.h - LLVM Enhanced Disassembler -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// disassembler class.  The disassembler is responsible for vending individual
+// instructions according to a given architecture and disassembly syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDDISASSEMBLER_H
+#define LLVM_EDDISASSEMBLER_H
+
+#include "EDInfo.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+
+#include <map>
+#include <set>
+#include <vector>
+
+namespace llvm {
+class AsmLexer;
+class AsmToken;
+class MCContext;
+class MCAsmInfo;
+class MCAsmLexer;
+class AsmParser;
+class TargetAsmLexer;
+class TargetAsmParser;
+class MCDisassembler;
+class MCInstPrinter;
+class MCInst;
+class MCParsedAsmOperand;
+class MCStreamer;
+template <typename T> class SmallVectorImpl;
+class SourceMgr;
+class Target;
+class TargetMachine;
+class TargetRegisterInfo;
+
+struct EDInstInfo;
+struct EDInst;
+struct EDOperand;
+struct EDToken;
+
+typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
+
+/// EDDisassembler - Encapsulates a disassembler for a single architecture and
+///   disassembly syntax.  Also manages the static disassembler registry.
+struct EDDisassembler {
+  typedef enum {
+    /*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
+    kEDAssemblySyntaxX86Intel  = 0,
+    /*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
+    kEDAssemblySyntaxX86ATT    = 1,
+    kEDAssemblySyntaxARMUAL    = 2
+  } AssemblySyntax;
+  
+  
+  ////////////////////
+  // Static members //
+  ////////////////////
+  
+  /// CPUKey - Encapsulates the descriptor of an architecture/disassembly-syntax
+  ///   pair
+  struct CPUKey {
+    /// The architecture type
+    llvm::Triple::ArchType Arch;
+    
+    /// The assembly syntax
+    AssemblySyntax Syntax;
+    
+    /// operator== - Equality operator
+    bool operator==(const CPUKey &key) const {
+      return (Arch == key.Arch &&
+              Syntax == key.Syntax);
+    }
+    
+    /// operator< - Less-than operator
+    bool operator<(const CPUKey &key) const {
+      if(Arch > key.Arch)
+        return false;
+      else if (Arch == key.Arch) {
+        if(Syntax > key.Syntax)
+          return false;
+      }
+      return true;
+    }
+  };
+  
+  typedef std::map<CPUKey, EDDisassembler*> DisassemblerMap_t;
+  
+  /// True if the disassembler registry has been initialized; false if not
+  static bool sInitialized;
+  /// A map from disassembler specifications to disassemblers.  Populated
+  ///   lazily.
+  static DisassemblerMap_t sDisassemblers;
+
+  /// getDisassembler - Returns the specified disassemble, or NULL on failure
+  ///
+  /// @arg arch   - The desired architecture
+  /// @arg syntax - The desired disassembly syntax
+  static EDDisassembler *getDisassembler(llvm::Triple::ArchType arch,
+                                         AssemblySyntax syntax);
+  
+  /// getDisassembler - Returns the disassembler for a given combination of
+  ///   CPU type, CPU subtype, and assembly syntax, or NULL on failure
+  ///
+  /// @arg str    - The string representation of the architecture triple, e.g.,
+  ///               "x86_64-apple-darwin"
+  /// @arg syntax - The disassembly syntax for the required disassembler
+  static EDDisassembler *getDisassembler(llvm::StringRef str,
+                                         AssemblySyntax syntax);
+  
+  /// initialize - Initializes the disassembler registry and the LLVM backend
+  static void initialize();
+  
+  ////////////////////////
+  // Per-object members //
+  ////////////////////////
+  
+  /// True only if the object has been successfully initialized
+  bool Valid;
+  /// True if the disassembler can provide semantic information
+  bool HasSemantics;
+  
+  /// The stream to write errors to
+  llvm::raw_ostream &ErrorStream;
+
+  /// The architecture/syntax pair for the current architecture
+  CPUKey Key;
+  /// The LLVM target corresponding to the disassembler
+  const llvm::Target *Tgt;
+  /// The target machine instance.
+  llvm::OwningPtr<llvm::TargetMachine> TargetMachine;
+  /// The assembly information for the target architecture
+  llvm::OwningPtr<const llvm::MCAsmInfo> AsmInfo;
+  /// The disassembler for the target architecture
+  llvm::OwningPtr<const llvm::MCDisassembler> Disassembler;
+  /// The output string for the instruction printer; must be guarded with 
+  ///   PrinterMutex
+  llvm::OwningPtr<std::string> InstString;
+  /// The output stream for the disassembler; must be guarded with
+  ///   PrinterMutex
+  llvm::OwningPtr<llvm::raw_string_ostream> InstStream;
+  /// The instruction printer for the target architecture; must be guarded with
+  ///   PrinterMutex when printing
+  llvm::OwningPtr<llvm::MCInstPrinter> InstPrinter;
+  /// The mutex that guards the instruction printer's printing functions, which
+  ///   use a shared stream
+  llvm::sys::Mutex PrinterMutex;
+  /// The array of instruction information provided by the TableGen backend for
+  ///   the target architecture
+  const llvm::EDInstInfo *InstInfos;
+  /// The target-specific lexer for use in tokenizing strings, in
+  ///   target-independent and target-specific portions
+  llvm::OwningPtr<llvm::AsmLexer> GenericAsmLexer;
+  llvm::OwningPtr<llvm::TargetAsmLexer> SpecificAsmLexer;
+  /// The guard for the above
+  llvm::sys::Mutex ParserMutex;
+  /// The LLVM number used for the target disassembly syntax variant
+  int LLVMSyntaxVariant;
+    
+  typedef std::vector<std::string> regvec_t;
+  typedef std::map<std::string, unsigned> regrmap_t;
+  
+  /// A vector of registers for quick mapping from LLVM register IDs to names
+  regvec_t RegVec;
+  /// A map of registers for quick mapping from register names to LLVM IDs
+  regrmap_t RegRMap;
+  
+  /// A set of register IDs for aliases of the stack pointer for the current
+  ///   architecture
+  std::set<unsigned> stackPointers;
+  /// A set of register IDs for aliases of the program counter for the current
+  ///   architecture
+  std::set<unsigned> programCounters;
+  
+  /// Constructor - initializes a disassembler with all the necessary objects,
+  ///   which come pre-allocated from the registry accessor function
+  ///
+  /// @arg key                - the architecture and disassembly syntax for the 
+  ///                           disassembler
+  EDDisassembler(CPUKey& key);
+  
+  /// valid - reports whether there was a failure in the constructor.
+  bool valid() {
+    return Valid;
+  }
+  
+  /// hasSemantics - reports whether the disassembler can provide operands and
+  ///   tokens.
+  bool hasSemantics() {
+    return HasSemantics;
+  }
+  
+  ~EDDisassembler();
+  
+  /// createInst - creates and returns an instruction given a callback and
+  ///   memory address, or NULL on failure
+  ///
+  /// @arg byteReader - A callback function that provides machine code bytes
+  /// @arg address    - The address of the first byte of the instruction,
+  ///                   suitable for passing to byteReader
+  /// @arg arg        - An opaque argument for byteReader
+  EDInst *createInst(EDByteReaderCallback byteReader, 
+                     uint64_t address, 
+                     void *arg);
+
+  /// initMaps - initializes regVec and regRMap using the provided register
+  ///   info
+  ///
+  /// @arg registerInfo - the register information to use as a source
+  void initMaps(const llvm::TargetRegisterInfo &registerInfo);
+  /// nameWithRegisterID - Returns the name (owned by the EDDisassembler) of a 
+  ///   register for a given register ID, or NULL on failure
+  ///
+  /// @arg registerID - the ID of the register to be queried
+  const char *nameWithRegisterID(unsigned registerID) const;
+  /// registerIDWithName - Returns the ID of a register for a given register
+  ///   name, or (unsigned)-1 on failure
+  ///
+  /// @arg name - The name of the register
+  unsigned registerIDWithName(const char *name) const;
+  
+  /// registerIsStackPointer - reports whether a register ID is an alias for the
+  ///   stack pointer register
+  ///
+  /// @arg registerID - The LLVM register ID
+  bool registerIsStackPointer(unsigned registerID);
+  /// registerIsStackPointer - reports whether a register ID is an alias for the
+  ///   stack pointer register
+  ///
+  /// @arg registerID - The LLVM register ID
+  bool registerIsProgramCounter(unsigned registerID);
+  
+  /// printInst - prints an MCInst to a string, returning 0 on success, or -1
+  ///   otherwise
+  ///
+  /// @arg str  - A reference to a string which is filled in with the string
+  ///             representation of the instruction
+  /// @arg inst - A reference to the MCInst to be printed
+  int printInst(std::string& str,
+                llvm::MCInst& inst);
+  
+  /// parseInst - extracts operands and tokens from a string for use in
+  ///   tokenizing the string.  Returns 0 on success, or -1 otherwise.
+  ///
+  /// @arg operands - A reference to a vector that will be filled in with the
+  ///                 parsed operands
+  /// @arg tokens   - A reference to a vector that will be filled in with the
+  ///                 tokens
+  /// @arg str      - The string representation of the instruction
+  int parseInst(llvm::SmallVectorImpl<llvm::MCParsedAsmOperand*> &operands,
+                llvm::SmallVectorImpl<llvm::AsmToken> &tokens,
+                const std::string &str);
+  
+  /// llvmSyntaxVariant - returns the LLVM syntax variant for this disassembler
+  int llvmSyntaxVariant() const;  
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/MC/MCDisassembler/EDInfo.h b/final/lib/MC/MCDisassembler/EDInfo.h
new file mode 100644
index 00000000000..627c06641db
--- /dev/null
+++ b/final/lib/MC/MCDisassembler/EDInfo.h
@@ -0,0 +1,73 @@
+//===-- EDInfo.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDINFO_H
+#define LLVM_EDINFO_H
+
+enum {
+  EDIS_MAX_OPERANDS = 13,
+  EDIS_MAX_SYNTAXES = 2
+};
+
+enum OperandTypes {
+  kOperandTypeNone,
+  kOperandTypeImmediate,
+  kOperandTypeRegister,
+  kOperandTypeX86Memory,
+  kOperandTypeX86EffectiveAddress,
+  kOperandTypeX86PCRelative,
+  kOperandTypeARMBranchTarget,
+  kOperandTypeARMSoReg,
+  kOperandTypeARMSoImm,
+  kOperandTypeARMSoImm2Part,
+  kOperandTypeARMPredicate,
+  kOperandTypeARMAddrMode2,
+  kOperandTypeARMAddrMode2Offset,
+  kOperandTypeARMAddrMode3,
+  kOperandTypeARMAddrMode3Offset,
+  kOperandTypeARMAddrMode4,
+  kOperandTypeARMAddrMode5,
+  kOperandTypeARMAddrMode6,
+  kOperandTypeARMAddrMode6Offset,
+  kOperandTypeARMAddrModePC,
+  kOperandTypeARMRegisterList,
+  kOperandTypeARMTBAddrMode,
+  kOperandTypeThumbITMask,
+  kOperandTypeThumbAddrModeS1,
+  kOperandTypeThumbAddrModeS2,
+  kOperandTypeThumbAddrModeS4,
+  kOperandTypeThumbAddrModeRR,
+  kOperandTypeThumbAddrModeSP,
+  kOperandTypeThumb2SoReg,
+  kOperandTypeThumb2SoImm,
+  kOperandTypeThumb2AddrModeImm8,
+  kOperandTypeThumb2AddrModeImm8Offset,
+  kOperandTypeThumb2AddrModeImm12,
+  kOperandTypeThumb2AddrModeSoReg,
+  kOperandTypeThumb2AddrModeImm8s4,
+  kOperandTypeThumb2AddrModeImm8s4Offset
+};
+
+enum OperandFlags {
+  kOperandFlagSource = 0x1,
+  kOperandFlagTarget = 0x2
+};
+
+enum InstructionTypes {
+  kInstructionTypeNone,
+  kInstructionTypeMove,
+  kInstructionTypeBranch,
+  kInstructionTypePush,
+  kInstructionTypePop,
+  kInstructionTypeCall,
+  kInstructionTypeReturn
+};
+
+
+#endif
diff --git a/final/lib/MC/MCDisassembler/EDInst.cpp b/final/lib/MC/MCDisassembler/EDInst.cpp
new file mode 100644
index 00000000000..63b049fe40f
--- /dev/null
+++ b/final/lib/MC/MCDisassembler/EDInst.cpp
@@ -0,0 +1,209 @@
+//===-EDInst.cpp - LLVM Enhanced Disassembler -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's instruction class.
+// The instruction is responsible for vending the string representation, 
+// individual tokens, and operands for a single instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDInst.h"
+#include "EDDisassembler.h"
+#include "EDOperand.h"
+#include "EDToken.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+
+using namespace llvm;
+
+EDInst::EDInst(llvm::MCInst *inst,
+               uint64_t byteSize, 
+               EDDisassembler &disassembler,
+               const llvm::EDInstInfo *info) :
+  Disassembler(disassembler),
+  Inst(inst),
+  ThisInstInfo(info),
+  ByteSize(byteSize),
+  BranchTarget(-1),
+  MoveSource(-1),
+  MoveTarget(-1) {
+  OperandOrder = ThisInstInfo->operandOrders[Disassembler.llvmSyntaxVariant()];
+}
+
+EDInst::~EDInst() {
+  unsigned int index;
+  unsigned int numOperands = Operands.size();
+  
+  for (index = 0; index < numOperands; ++index)
+    delete Operands[index];
+  
+  unsigned int numTokens = Tokens.size();
+  
+  for (index = 0; index < numTokens; ++index)
+    delete Tokens[index];
+  
+  delete Inst;
+}
+
+uint64_t EDInst::byteSize() {
+  return ByteSize;
+}
+
+int EDInst::stringify() {
+  if (StringifyResult.valid())
+    return StringifyResult.result();
+  
+  if (Disassembler.printInst(String, *Inst))
+    return StringifyResult.setResult(-1);
+
+  String.push_back('\n');
+  
+  return StringifyResult.setResult(0);
+}
+
+int EDInst::getString(const char*& str) {
+  if (stringify())
+    return -1;
+  
+  str = String.c_str();
+  
+  return 0;
+}
+
+unsigned EDInst::instID() {
+  return Inst->getOpcode();
+}
+
+bool EDInst::isBranch() {
+  if (ThisInstInfo)
+    return 
+      ThisInstInfo->instructionType == kInstructionTypeBranch ||
+      ThisInstInfo->instructionType == kInstructionTypeCall;
+  else
+    return false;
+}
+
+bool EDInst::isMove() {
+  if (ThisInstInfo)
+    return ThisInstInfo->instructionType == kInstructionTypeMove;
+  else
+    return false;
+}
+
+int EDInst::parseOperands() {
+  if (ParseResult.valid())
+    return ParseResult.result();
+  
+  if (!ThisInstInfo)
+    return ParseResult.setResult(-1);
+  
+  unsigned int opIndex;
+  unsigned int mcOpIndex = 0;
+  
+  for (opIndex = 0; opIndex < ThisInstInfo->numOperands; ++opIndex) {
+    if (isBranch() &&
+        (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)) {
+      BranchTarget = opIndex;
+    }
+    else if (isMove()) {
+      if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagSource)
+        MoveSource = opIndex;
+      else if (ThisInstInfo->operandFlags[opIndex] & kOperandFlagTarget)
+        MoveTarget = opIndex;
+    }
+    
+    EDOperand *operand = new EDOperand(Disassembler, *this, opIndex, mcOpIndex);
+    
+    Operands.push_back(operand);
+  }
+  
+  return ParseResult.setResult(0);
+}
+
+int EDInst::branchTargetID() {
+  if (parseOperands())
+    return -1;
+  return BranchTarget;
+}
+
+int EDInst::moveSourceID() {
+  if (parseOperands())
+    return -1;
+  return MoveSource;
+}
+
+int EDInst::moveTargetID() {
+  if (parseOperands())
+    return -1;
+  return MoveTarget;
+}
+
+int EDInst::numOperands() {
+  if (parseOperands())
+    return -1;
+  return Operands.size();
+}
+
+int EDInst::getOperand(EDOperand *&operand, unsigned int index) {
+  if (parseOperands())
+    return -1;
+  
+  if (index >= Operands.size())
+    return -1;
+  
+  operand = Operands[index];
+  return 0;
+}
+
+int EDInst::tokenize() {
+  if (TokenizeResult.valid())
+    return TokenizeResult.result();
+  
+  if (stringify())
+    return TokenizeResult.setResult(-1);
+    
+  return TokenizeResult.setResult(EDToken::tokenize(Tokens,
+                                                    String,
+                                                    OperandOrder,
+                                                    Disassembler));
+    
+}
+
+int EDInst::numTokens() {
+  if (tokenize())
+    return -1;
+  return Tokens.size();
+}
+
+int EDInst::getToken(EDToken *&token, unsigned int index) {
+  if (tokenize())
+    return -1;
+  token = Tokens[index];
+  return 0;
+}
+
+#ifdef __BLOCKS__
+int EDInst::visitTokens(EDTokenVisitor_t visitor) {
+  if (tokenize())
+    return -1;
+  
+  tokvec_t::iterator iter;
+  
+  for (iter = Tokens.begin(); iter != Tokens.end(); ++iter) {
+    int ret = visitor(*iter);
+    if (ret == 1)
+      return 0;
+    if (ret != 0)
+      return -1;
+  }
+  
+  return 0;
+}
+#endif
diff --git a/final/lib/MC/MCDisassembler/EDInst.h b/final/lib/MC/MCDisassembler/EDInst.h
new file mode 100644
index 00000000000..ceb9505028d
--- /dev/null
+++ b/final/lib/MC/MCDisassembler/EDInst.h
@@ -0,0 +1,182 @@
+//===-- EDInst.h - LLVM Enhanced Disassembler -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's
+// instruction class.  The instruction is responsible for vending the string
+// representation, individual tokens and operands for a single instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDINST_H
+#define LLVM_EDINST_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/SmallVector.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+  class MCInst;
+  struct EDInstInfo;
+  struct EDToken;
+  struct EDDisassembler;
+  struct EDOperand;
+
+#ifdef __BLOCKS__
+  typedef int (^EDTokenVisitor_t)(EDToken *token);
+#endif
+
+/// CachedResult - Encapsulates the result of a function along with the validity
+///   of that result, so that slow functions don't need to run twice
+struct CachedResult {
+  /// True if the result has been obtained by executing the function
+  bool Valid;
+  /// The result last obtained from the function
+  int Result;
+  
+  /// Constructor - Initializes an invalid result
+  CachedResult() : Valid(false) { }
+  /// valid - Returns true if the result has been obtained by executing the
+  ///   function and false otherwise
+  bool valid() { return Valid; }
+  /// result - Returns the result of the function or an undefined value if
+  ///   valid() is false
+  int result() { return Result; }
+  /// setResult - Sets the result of the function and declares it valid
+  ///   returning the result (so that setResult() can be called from inside a
+  ///   return statement)
+  /// @arg result - The result of the function
+  int setResult(int result) { Result = result; Valid = true; return result; }
+};
+
+/// EDInst - Encapsulates a single instruction, which can be queried for its
+///   string representation, as well as its operands and tokens
+struct EDInst {
+  /// The parent disassembler
+  EDDisassembler &Disassembler;
+  /// The containing MCInst
+  llvm::MCInst *Inst;
+  /// The instruction information provided by TableGen for this instruction
+  const llvm::EDInstInfo *ThisInstInfo;
+  /// The number of bytes for the machine code representation of the instruction
+  uint64_t ByteSize;
+  
+  /// The result of the stringify() function
+  CachedResult StringifyResult;
+  /// The string representation of the instruction
+  std::string String;
+  /// The order in which operands from the InstInfo's operand information appear
+  /// in String
+  const char* OperandOrder;
+  
+  /// The result of the parseOperands() function
+  CachedResult ParseResult;
+  typedef llvm::SmallVector<EDOperand*, 5> opvec_t;
+  /// The instruction's operands
+  opvec_t Operands;
+  /// The operand corresponding to the target, if the instruction is a branch
+  int BranchTarget;
+  /// The operand corresponding to the source, if the instruction is a move
+  int MoveSource;
+  /// The operand corresponding to the target, if the instruction is a move
+  int MoveTarget;
+  
+  /// The result of the tokenize() function
+  CachedResult TokenizeResult;
+  typedef std::vector<EDToken*> tokvec_t;
+  /// The instruction's tokens
+  tokvec_t Tokens;
+  
+  /// Constructor - initializes an instruction given the output of the LLVM
+  ///   C++ disassembler
+  ///
+  /// @arg inst         - The MCInst, which will now be owned by this object
+  /// @arg byteSize     - The size of the consumed instruction, in bytes
+  /// @arg disassembler - The parent disassembler
+  /// @arg instInfo     - The instruction information produced by the table
+  ///                     generator for this instruction
+  EDInst(llvm::MCInst *inst,
+         uint64_t byteSize,
+         EDDisassembler &disassembler,
+         const llvm::EDInstInfo *instInfo);
+  ~EDInst();
+  
+  /// byteSize - returns the number of bytes consumed by the machine code
+  ///   representation of the instruction
+  uint64_t byteSize();
+  /// instID - returns the LLVM instruction ID of the instruction
+  unsigned instID();
+  
+  /// stringify - populates the String and AsmString members of the instruction,
+  ///   returning 0 on success or -1 otherwise
+  int stringify();
+  /// getString - retrieves a pointer to the string representation of the
+  ///   instructinon, returning 0 on success or -1 otherwise
+  ///
+  /// @arg str - A reference to a pointer that, on success, is set to point to
+  ///   the string representation of the instruction; this string is still owned
+  ///   by the instruction and will be deleted when it is
+  int getString(const char *&str);
+  
+  /// isBranch - Returns true if the instruction is a branch
+  bool isBranch();
+  /// isMove - Returns true if the instruction is a move
+  bool isMove();
+  
+  /// parseOperands - populates the Operands member of the instruction,
+  ///   returning 0 on success or -1 otherwise
+  int parseOperands();
+  /// branchTargetID - returns the ID (suitable for use with getOperand()) of 
+  ///   the target operand if the instruction is a branch, or -1 otherwise
+  int branchTargetID();
+  /// moveSourceID - returns the ID of the source operand if the instruction
+  ///   is a move, or -1 otherwise
+  int moveSourceID();
+  /// moveTargetID - returns the ID of the target operand if the instruction
+  ///   is a move, or -1 otherwise
+  int moveTargetID();
+  
+  /// numOperands - returns the number of operands available to retrieve, or -1
+  ///   on error
+  int numOperands();
+  /// getOperand - retrieves an operand from the instruction's operand list by
+  ///   index, returning 0 on success or -1 on error
+  ///
+  /// @arg operand  - A reference whose target is pointed at the operand on
+  ///                 success, although the operand is still owned by the EDInst
+  /// @arg index    - The index of the operand in the instruction
+  int getOperand(EDOperand *&operand, unsigned int index);
+
+  /// tokenize - populates the Tokens member of the instruction, returning 0 on
+  ///   success or -1 otherwise
+  int tokenize();
+  /// numTokens - returns the number of tokens in the instruction, or -1 on
+  ///   error
+  int numTokens();
+  /// getToken - retrieves a token from the instruction's token list by index,
+  ///   returning 0 on success or -1 on error
+  ///
+  /// @arg token  - A reference whose target is pointed at the token on success,
+  ///               although the token is still owned by the EDInst
+  /// @arg index  - The index of the token in the instrcutino
+  int getToken(EDToken *&token, unsigned int index);
+
+#ifdef __BLOCKS__
+  /// visitTokens - Visits each token in turn and applies a block to it,
+  ///   returning 0 if all blocks are visited and/or the block signals
+  ///   termination by returning 1; returns -1 on error
+  ///
+  /// @arg visitor  - The visitor block to apply to all tokens.
+  int visitTokens(EDTokenVisitor_t visitor);
+#endif
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/MC/MCDisassembler/EDOperand.cpp b/final/lib/MC/MCDisassembler/EDOperand.cpp
new file mode 100644
index 00000000000..2b0c73e8059
--- /dev/null
+++ b/final/lib/MC/MCDisassembler/EDOperand.cpp
@@ -0,0 +1,293 @@
+//===-- EDOperand.cpp - LLVM Enhanced Disassembler ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembly library's operand class.  The
+// operand is responsible for allowing evaluation given a particular register 
+// context.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDOperand.h"
+#include "EDDisassembler.h"
+#include "EDInst.h"
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+using namespace llvm;
+
+EDOperand::EDOperand(const EDDisassembler &disassembler,
+                     const EDInst &inst,
+                     unsigned int opIndex,
+                     unsigned int &mcOpIndex) :
+  Disassembler(disassembler),
+  Inst(inst),
+  OpIndex(opIndex),
+  MCOpIndex(mcOpIndex) {
+  unsigned int numMCOperands = 0;
+    
+  if (Disassembler.Key.Arch == Triple::x86 ||
+      Disassembler.Key.Arch == Triple::x86_64) {
+    uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
+    
+    switch (operandType) {
+    default:
+      break;
+    case kOperandTypeImmediate:
+      numMCOperands = 1;
+      break;
+    case kOperandTypeRegister:
+      numMCOperands = 1;
+      break;
+    case kOperandTypeX86Memory:
+      numMCOperands = 5;
+      break;
+    case kOperandTypeX86EffectiveAddress:
+      numMCOperands = 4;
+      break;
+    case kOperandTypeX86PCRelative:
+      numMCOperands = 1;
+      break;
+    }
+  }
+  else if (Disassembler.Key.Arch == Triple::arm ||
+           Disassembler.Key.Arch == Triple::thumb) {
+    uint8_t operandType = inst.ThisInstInfo->operandTypes[opIndex];
+    
+    switch (operandType) {
+    default:
+    case kOperandTypeARMRegisterList:
+      break;
+    case kOperandTypeImmediate:
+    case kOperandTypeRegister:
+    case kOperandTypeARMBranchTarget:
+    case kOperandTypeARMSoImm:
+    case kOperandTypeThumb2SoImm:
+    case kOperandTypeARMSoImm2Part:
+    case kOperandTypeARMPredicate:
+    case kOperandTypeThumbITMask:
+    case kOperandTypeThumb2AddrModeImm8Offset:
+    case kOperandTypeARMTBAddrMode:
+    case kOperandTypeThumb2AddrModeImm8s4Offset:
+      numMCOperands = 1;
+      break;
+    case kOperandTypeThumb2SoReg:
+    case kOperandTypeARMAddrMode2Offset:
+    case kOperandTypeARMAddrMode3Offset:
+    case kOperandTypeARMAddrMode4:
+    case kOperandTypeARMAddrMode5:
+    case kOperandTypeARMAddrModePC:
+    case kOperandTypeThumb2AddrModeImm8:
+    case kOperandTypeThumb2AddrModeImm12:
+    case kOperandTypeThumb2AddrModeImm8s4:
+    case kOperandTypeThumbAddrModeRR:
+    case kOperandTypeThumbAddrModeSP:
+      numMCOperands = 2;
+      break;
+    case kOperandTypeARMSoReg:
+    case kOperandTypeARMAddrMode2:
+    case kOperandTypeARMAddrMode3:
+    case kOperandTypeThumb2AddrModeSoReg:
+    case kOperandTypeThumbAddrModeS1:
+    case kOperandTypeThumbAddrModeS2:
+    case kOperandTypeThumbAddrModeS4:
+    case kOperandTypeARMAddrMode6Offset:
+      numMCOperands = 3;
+      break;
+    case kOperandTypeARMAddrMode6:
+      numMCOperands = 4;
+      break;
+    }
+  }
+    
+  mcOpIndex += numMCOperands;
+}
+
+EDOperand::~EDOperand() {
+}
+
+int EDOperand::evaluate(uint64_t &result,
+                        EDRegisterReaderCallback callback,
+                        void *arg) {
+  uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
+  
+  switch (Disassembler.Key.Arch) {
+  default:
+    return -1;  
+  case Triple::x86:
+  case Triple::x86_64:    
+    switch (operandType) {
+    default:
+      return -1;
+    case kOperandTypeImmediate:
+      result = Inst.Inst->getOperand(MCOpIndex).getImm();
+      return 0;
+    case kOperandTypeRegister:
+    {
+      unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
+      return callback(&result, reg, arg);
+    }
+    case kOperandTypeX86PCRelative:
+    {
+      int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
+        
+      uint64_t ripVal;
+        
+      // TODO fix how we do this
+        
+      if (callback(&ripVal, Disassembler.registerIDWithName("RIP"), arg))
+        return -1;
+        
+      result = ripVal + displacement;
+      return 0;
+    }
+    case kOperandTypeX86Memory:
+    case kOperandTypeX86EffectiveAddress:  
+    {
+      unsigned baseReg = Inst.Inst->getOperand(MCOpIndex).getReg();
+      uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm();
+      unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg();
+      int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm();
+    
+      uint64_t addr = 0;
+        
+      unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg();
+        
+      if (segmentReg != 0 && Disassembler.Key.Arch == Triple::x86_64) {
+        unsigned fsID = Disassembler.registerIDWithName("FS");
+        unsigned gsID = Disassembler.registerIDWithName("GS");
+        
+        if (segmentReg == fsID ||
+            segmentReg == gsID) {
+          uint64_t segmentBase;
+          if (!callback(&segmentBase, segmentReg, arg))
+            addr += segmentBase;        
+        }
+      }
+        
+      if (baseReg) {
+        uint64_t baseVal;
+        if (callback(&baseVal, baseReg, arg))
+          return -1;
+        addr += baseVal;
+      }
+        
+      if (indexReg) {
+        uint64_t indexVal;
+        if (callback(&indexVal, indexReg, arg))
+          return -1;
+        addr += (scaleAmount * indexVal);
+      }
+       
+      addr += displacement;
+       
+      result = addr;
+      return 0;
+    }
+    } // switch (operandType)
+    break;
+  case Triple::arm:
+  case Triple::thumb:
+    switch (operandType) {
+    default:
+      return -1;
+    case kOperandTypeImmediate:
+      result = Inst.Inst->getOperand(MCOpIndex).getImm();
+      return 0;
+    case kOperandTypeRegister:
+    {
+      unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg();
+      return callback(&result, reg, arg);
+    }
+    case kOperandTypeARMBranchTarget:
+    {
+      int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm();
+      
+      uint64_t pcVal;
+      
+      if (callback(&pcVal, Disassembler.registerIDWithName("PC"), arg))
+        return -1;
+      
+      result = pcVal + displacement;
+      return 0;
+    }
+    }
+    break;
+  }
+  
+  return -1;
+}
+
+int EDOperand::isRegister() {
+  return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeRegister);
+}
+
+unsigned EDOperand::regVal() {
+  return Inst.Inst->getOperand(MCOpIndex).getReg(); 
+}
+
+int EDOperand::isImmediate() {
+  return(Inst.ThisInstInfo->operandFlags[OpIndex] == kOperandTypeImmediate);
+}
+
+uint64_t EDOperand::immediateVal() {
+  return Inst.Inst->getOperand(MCOpIndex).getImm();
+}
+
+int EDOperand::isMemory() {
+  uint8_t operandType = Inst.ThisInstInfo->operandTypes[OpIndex];
+    
+  switch (operandType) {
+  default:
+    return 0;
+  case kOperandTypeX86Memory:
+  case kOperandTypeX86PCRelative:
+  case kOperandTypeX86EffectiveAddress:
+  case kOperandTypeARMSoReg:
+  case kOperandTypeARMSoImm:
+  case kOperandTypeARMAddrMode2:
+  case kOperandTypeARMAddrMode2Offset:
+  case kOperandTypeARMAddrMode3:
+  case kOperandTypeARMAddrMode3Offset:
+  case kOperandTypeARMAddrMode4:
+  case kOperandTypeARMAddrMode5:
+  case kOperandTypeARMAddrMode6:
+  case kOperandTypeARMAddrModePC:
+  case kOperandTypeARMBranchTarget:
+  case kOperandTypeThumbAddrModeS1:
+  case kOperandTypeThumbAddrModeS2:
+  case kOperandTypeThumbAddrModeS4:
+  case kOperandTypeThumbAddrModeRR:
+  case kOperandTypeThumbAddrModeSP:
+  case kOperandTypeThumb2SoImm:
+  case kOperandTypeThumb2AddrModeImm8:
+  case kOperandTypeThumb2AddrModeImm8Offset:
+  case kOperandTypeThumb2AddrModeImm12:
+  case kOperandTypeThumb2AddrModeSoReg:
+  case kOperandTypeThumb2AddrModeImm8s4:
+    return 1;
+  }
+}
+
+#ifdef __BLOCKS__
+namespace {
+  struct RegisterReaderWrapper {
+    EDOperand::EDRegisterBlock_t regBlock;
+  };
+}
+
+static int readerWrapperCallback(uint64_t *value, unsigned regID, void *arg) {
+  RegisterReaderWrapper *wrapper = (RegisterReaderWrapper *)arg;
+  return wrapper->regBlock(value, regID);
+}
+
+int EDOperand::evaluate(uint64_t &result, EDRegisterBlock_t regBlock) {
+  RegisterReaderWrapper wrapper;
+  wrapper.regBlock = regBlock;
+  return evaluate(result, readerWrapperCallback, (void*)&wrapper);
+}
+#endif
diff --git a/final/lib/MC/MCDisassembler/EDOperand.h b/final/lib/MC/MCDisassembler/EDOperand.h
new file mode 100644
index 00000000000..50260ec965a
--- /dev/null
+++ b/final/lib/MC/MCDisassembler/EDOperand.h
@@ -0,0 +1,91 @@
+//===-EDOperand.h - LLVM Enhanced Disassembler ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's 
+// operand class.  The operand is responsible for allowing evaluation given a
+// particular register context.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDOPERAND_H
+#define LLVM_EDOPERAND_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+struct EDDisassembler;
+struct EDInst;
+  
+typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID, 
+                                        void* arg);
+
+
+/// EDOperand - Encapsulates a single operand, which can be evaluated by the
+///   client
+struct EDOperand {
+  /// The parent disassembler
+  const EDDisassembler &Disassembler;
+  /// The parent instruction
+  const EDInst &Inst;
+  
+  /// The index of the operand in the EDInst
+  unsigned int OpIndex;
+  /// The index of the first component of the operand in the MCInst
+  unsigned int MCOpIndex;
+  
+  /// Constructor - Initializes an EDOperand
+  ///
+  /// @arg disassembler - The disassembler responsible for the operand
+  /// @arg inst         - The instruction containing this operand
+  /// @arg opIndex      - The index of the operand in inst
+  /// @arg mcOpIndex    - The index of the operand in the original MCInst
+  EDOperand(const EDDisassembler &disassembler,
+            const EDInst &inst,
+            unsigned int opIndex,
+            unsigned int &mcOpIndex);
+  ~EDOperand();
+  
+  /// evaluate - Returns the numeric value of an operand to the extent possible,
+  ///   returning 0 on success or -1 if there was some problem (such as a 
+  ///   register not being readable)
+  ///
+  /// @arg result   - A reference whose target is filled in with the value of
+  ///                 the operand (the address if it is a memory operand)
+  /// @arg callback - A function to call to obtain register values
+  /// @arg arg      - An opaque argument to pass to callback
+  int evaluate(uint64_t &result,
+               EDRegisterReaderCallback callback,
+               void *arg);
+
+  /// isRegister - Returns 1 if the operand is a register or 0 otherwise
+  int isRegister();
+  /// regVal - Returns the register value.
+  unsigned regVal();
+  
+  /// isImmediate - Returns 1 if the operand is an immediate or 0 otherwise
+  int isImmediate();
+  /// immediateVal - Returns the immediate value.
+  uint64_t immediateVal();
+  
+  /// isMemory - Returns 1 if the operand is a memory location or 0 otherwise
+  int isMemory();
+  
+#ifdef __BLOCKS__
+  typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID);
+
+  /// evaluate - Like evaluate for a callback, but uses a block instead
+  int evaluate(uint64_t &result,
+               EDRegisterBlock_t regBlock);
+#endif
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/MC/MCDisassembler/EDToken.cpp b/final/lib/MC/MCDisassembler/EDToken.cpp
new file mode 100644
index 00000000000..de770b41ef3
--- /dev/null
+++ b/final/lib/MC/MCDisassembler/EDToken.cpp
@@ -0,0 +1,210 @@
+//===-- EDToken.cpp - LLVM Enhanced Disassembler --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Enhanced Disassembler library's token class.  The
+// token is responsible for vending information about the token, such as its
+// type and logical value.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDToken.h"
+#include "EDDisassembler.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+EDToken::EDToken(StringRef str,
+                 enum tokenType type,
+                 uint64_t localType,
+                 EDDisassembler &disassembler) :
+  Disassembler(disassembler),
+  Str(str),
+  Type(type),
+  LocalType(localType),
+  OperandID(-1) {
+}
+
+EDToken::~EDToken() {
+}
+
+void EDToken::makeLiteral(bool sign, uint64_t absoluteValue) {
+  Type = kTokenLiteral;
+  LiteralSign = sign;
+  LiteralAbsoluteValue = absoluteValue;
+}
+
+void EDToken::makeRegister(unsigned registerID) {
+  Type = kTokenRegister;
+  RegisterID = registerID;
+}
+
+void EDToken::setOperandID(int operandID) {
+  OperandID = operandID;
+}
+
+enum EDToken::tokenType EDToken::type() const {
+  return Type;
+}
+
+uint64_t EDToken::localType() const {
+  return LocalType;
+}
+
+StringRef EDToken::string() const {
+  return Str;
+}
+
+int EDToken::operandID() const {
+  return OperandID;
+}
+
+int EDToken::literalSign() const {
+  if (Type != kTokenLiteral)
+    return -1;
+  return (LiteralSign ? 1 : 0);
+}
+
+int EDToken::literalAbsoluteValue(uint64_t &value) const {
+  if (Type != kTokenLiteral)
+    return -1;
+  value = LiteralAbsoluteValue;
+  return 0;
+}
+
+int EDToken::registerID(unsigned &registerID) const {
+  if (Type != kTokenRegister)
+    return -1;
+  registerID = RegisterID;
+  return 0;
+}
+
+int EDToken::tokenize(std::vector<EDToken*> &tokens,
+                      std::string &str,
+                      const char *operandOrder,
+                      EDDisassembler &disassembler) {
+  SmallVector<MCParsedAsmOperand*, 5> parsedOperands;
+  SmallVector<AsmToken, 10> asmTokens;
+  
+  if (disassembler.parseInst(parsedOperands, asmTokens, str))
+    return -1;
+  
+  SmallVectorImpl<MCParsedAsmOperand*>::iterator operandIterator;
+  unsigned int operandIndex;
+  SmallVectorImpl<AsmToken>::iterator tokenIterator;
+  
+  operandIterator = parsedOperands.begin();
+  operandIndex = 0;
+  
+  bool readOpcode = false;
+  
+  const char *wsPointer = asmTokens.begin()->getLoc().getPointer();
+  
+  for (tokenIterator = asmTokens.begin();
+       tokenIterator != asmTokens.end();
+       ++tokenIterator) {
+    SMLoc tokenLoc = tokenIterator->getLoc();
+    
+    const char *tokenPointer = tokenLoc.getPointer();
+    
+    if (tokenPointer > wsPointer) {
+      unsigned long wsLength = tokenPointer - wsPointer;
+      
+      EDToken *whitespaceToken = new EDToken(StringRef(wsPointer, wsLength),
+                                             EDToken::kTokenWhitespace,
+                                             0,
+                                             disassembler);
+      
+      tokens.push_back(whitespaceToken);
+    }
+    
+    wsPointer = tokenPointer + tokenIterator->getString().size();
+    
+    while (operandIterator != parsedOperands.end() &&
+           tokenLoc.getPointer() > 
+           (*operandIterator)->getEndLoc().getPointer()) {
+      ++operandIterator;
+      ++operandIndex;
+    }
+    
+    EDToken *token;
+    
+    switch (tokenIterator->getKind()) {
+    case AsmToken::Identifier:
+      if (!readOpcode) {
+        token = new EDToken(tokenIterator->getString(),
+                            EDToken::kTokenOpcode,
+                            (uint64_t)tokenIterator->getKind(),
+                            disassembler);
+        readOpcode = true;
+        break;
+      }
+      // any identifier that isn't an opcode is mere punctuation; so we fall
+      // through
+    default:
+      token = new EDToken(tokenIterator->getString(),
+                          EDToken::kTokenPunctuation,
+                          (uint64_t)tokenIterator->getKind(),
+                          disassembler);
+      break;
+    case AsmToken::Integer:
+    {
+      token = new EDToken(tokenIterator->getString(),
+                          EDToken::kTokenLiteral,
+                          (uint64_t)tokenIterator->getKind(),
+                          disassembler);
+        
+      int64_t intVal = tokenIterator->getIntVal();
+      
+      if (intVal < 0)  
+        token->makeLiteral(true, -intVal);
+      else
+        token->makeLiteral(false, intVal);
+      break;
+    }
+    case AsmToken::Register:
+    {
+      token = new EDToken(tokenIterator->getString(),
+                          EDToken::kTokenLiteral,
+                          (uint64_t)tokenIterator->getKind(),
+                          disassembler);
+      
+      token->makeRegister((unsigned)tokenIterator->getRegVal());
+      break;
+    }
+    }
+    
+    if (operandIterator != parsedOperands.end() &&
+       tokenLoc.getPointer() >= 
+       (*operandIterator)->getStartLoc().getPointer()) {
+      /// operandIndex == 0 means the operand is the instruction (which the
+      /// AsmParser treats as an operand but edis does not).  We therefore skip
+      /// operandIndex == 0 and subtract 1 from all other operand indices.
+      
+      if (operandIndex > 0)
+        token->setOperandID(operandOrder[operandIndex - 1]);
+    }
+    
+    tokens.push_back(token);
+  }
+  
+  // Free any parsed operands.
+  for (unsigned i = 0, e = parsedOperands.size(); i != e; ++i)
+    delete parsedOperands[i];
+
+  return 0;
+}
+
+int EDToken::getString(const char*& buf) {
+  if (PermStr.length() == 0) {
+    PermStr = Str.str();
+  }
+  buf = PermStr.c_str();
+  return 0;
+}
diff --git a/final/lib/MC/MCDisassembler/EDToken.h b/final/lib/MC/MCDisassembler/EDToken.h
new file mode 100644
index 00000000000..ba467078686
--- /dev/null
+++ b/final/lib/MC/MCDisassembler/EDToken.h
@@ -0,0 +1,139 @@
+//===-EDToken.h - LLVM Enhanced Disassembler --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for the Enhanced Disassembly library's token
+// class.  The token is responsible for vending information about the token, 
+// such as its type and logical value.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EDTOKEN_H
+#define LLVM_EDTOKEN_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/DataTypes.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+  
+struct EDDisassembler;
+
+/// EDToken - Encapsulates a single token, which can provide a string
+///   representation of itself or interpret itself in various ways, depending
+///   on the token type.
+struct EDToken {
+  enum tokenType {
+    kTokenWhitespace,
+    kTokenOpcode,
+    kTokenLiteral,
+    kTokenRegister,
+    kTokenPunctuation
+  };
+  
+  /// The parent disassembler
+  EDDisassembler &Disassembler;
+
+  /// The token's string representation
+  llvm::StringRef Str;
+  /// The token's string representation, but in a form suitable for export
+  std::string PermStr;
+  /// The type of the token, as exposed through the external API
+  enum tokenType Type;
+  /// The type of the token, as recorded by the syntax-specific tokenizer
+  uint64_t LocalType;
+  /// The operand corresponding to the token, or (unsigned int)-1 if not
+  ///   part of an operand.
+  int OperandID;
+  
+  /// The sign if the token is a literal (1 if negative, 0 otherwise)
+  bool LiteralSign;
+  /// The absolute value if the token is a literal
+  uint64_t LiteralAbsoluteValue;
+  /// The LLVM register ID if the token is a register name
+  unsigned RegisterID;
+  
+  /// Constructor - Initializes an EDToken with the information common to all
+  ///   tokens
+  ///
+  /// @arg str          - The string corresponding to the token
+  /// @arg type         - The token's type as exposed through the public API
+  /// @arg localType    - The token's type as recorded by the tokenizer
+  /// @arg disassembler - The disassembler responsible for the token
+  EDToken(llvm::StringRef str,
+          enum tokenType type,
+          uint64_t localType,
+          EDDisassembler &disassembler);
+  
+  /// makeLiteral - Adds the information specific to a literal
+  /// @arg sign           - The sign of the literal (1 if negative, 0 
+  ///                       otherwise)
+  ///
+  /// @arg absoluteValue  - The absolute value of the literal
+  void makeLiteral(bool sign, uint64_t absoluteValue);
+  /// makeRegister - Adds the information specific to a register
+  ///
+  /// @arg registerID - The LLVM register ID
+  void makeRegister(unsigned registerID);
+  
+  /// setOperandID - Links the token to a numbered operand
+  ///
+  /// @arg operandID  - The operand ID to link to
+  void setOperandID(int operandID);
+  
+  ~EDToken();
+  
+  /// type - Returns the public type of the token
+  enum tokenType type() const;
+  /// localType - Returns the tokenizer-specific type of the token
+  uint64_t localType() const;
+  /// string - Returns the string representation of the token
+  llvm::StringRef string() const;
+  /// operandID - Returns the operand ID of the token
+  int operandID() const;
+  
+  /// literalSign - Returns the sign of the token 
+  ///   (1 if negative, 0 if positive or unsigned, -1 if it is not a literal)
+  int literalSign() const;
+  /// literalAbsoluteValue - Retrieves the absolute value of the token, and
+  ///   returns -1 if the token is not a literal
+  /// @arg value  - A reference to a value that is filled in with the absolute
+  ///               value, if it is valid
+  int literalAbsoluteValue(uint64_t &value) const;
+  /// registerID - Retrieves the register ID of the token, and returns -1 if the
+  ///   token is not a register
+  ///
+  /// @arg registerID - A reference to a value that is filled in with the 
+  ///                   register ID, if it is valid
+  int registerID(unsigned &registerID) const;
+  
+  /// tokenize - Tokenizes a string using the platform- and syntax-specific
+  ///   tokenizer, and returns 0 on success (-1 on failure)
+  ///
+  /// @arg tokens       - A vector that will be filled in with pointers to
+  ///                     allocated tokens
+  /// @arg str          - The string, as outputted by the AsmPrinter
+  /// @arg operandOrder - The order of the operands from the operandFlags array
+  ///                     as they appear in str
+  /// @arg disassembler - The disassembler for the desired target and
+  //                      assembly syntax
+  static int tokenize(std::vector<EDToken*> &tokens,
+                      std::string &str,
+                      const char *operandOrder,
+                      EDDisassembler &disassembler);
+  
+  /// getString - Directs a character pointer to the string, returning 0 on
+  ///   success (-1 on failure)
+  /// @arg buf  - A reference to a pointer that is set to point to the string.
+  ///   The string is still owned by the token.
+  int getString(const char*& buf);
+};
+
+} // end namespace llvm
+#endif
diff --git a/final/lib/MC/MCDisassembler/Makefile b/final/lib/MC/MCDisassembler/Makefile
new file mode 100644
index 00000000000..7d71cd381a7
--- /dev/null
+++ b/final/lib/MC/MCDisassembler/Makefile
@@ -0,0 +1,14 @@
+##===- lib/MC/MCDisassembler/Makefile ----------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMMCDisassembler
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/MC/MCDwarf.cpp b/final/lib/MC/MCDwarf.cpp
new file mode 100644
index 00000000000..112d7d887a2
--- /dev/null
+++ b/final/lib/MC/MCDwarf.cpp
@@ -0,0 +1,814 @@
+//===- lib/MC/MCDwarf.cpp - MCDwarf implementation ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
+using namespace llvm;
+
+// Given a special op, return the address skip amount (in units of
+// DWARF2_LINE_MIN_INSN_LENGTH.
+#define SPECIAL_ADDR(op) (((op) - DWARF2_LINE_OPCODE_BASE)/DWARF2_LINE_RANGE)
+
+// The maximum address skip amount that can be encoded with a special op.
+#define MAX_SPECIAL_ADDR_DELTA		SPECIAL_ADDR(255)
+
+// First special line opcode - leave room for the standard opcodes.
+// Note: If you want to change this, you'll have to update the
+// "standard_opcode_lengths" table that is emitted in DwarfFileTable::Emit().  
+#define DWARF2_LINE_OPCODE_BASE		13
+
+// Minimum line offset in a special line info. opcode.  This value
+// was chosen to give a reasonable range of values.
+#define DWARF2_LINE_BASE		-5
+
+// Range of line offsets in a special line info. opcode.
+# define DWARF2_LINE_RANGE		14
+
+// Define the architecture-dependent minimum instruction length (in bytes).
+// This value should be rather too small than too big.
+# define DWARF2_LINE_MIN_INSN_LENGTH	1
+
+// Note: when DWARF2_LINE_MIN_INSN_LENGTH == 1 which is the current setting,
+// this routine is a nop and will be optimized away.
+static inline uint64_t ScaleAddrDelta(uint64_t AddrDelta)
+{
+  if (DWARF2_LINE_MIN_INSN_LENGTH == 1)
+    return AddrDelta;
+  if (AddrDelta % DWARF2_LINE_MIN_INSN_LENGTH != 0) {
+    // TODO: report this error, but really only once.
+    ;
+  }
+  return AddrDelta / DWARF2_LINE_MIN_INSN_LENGTH;
+}
+
+//
+// This is called when an instruction is assembled into the specified section
+// and if there is information from the last .loc directive that has yet to have
+// a line entry made for it is made.
+//
+void MCLineEntry::Make(MCStreamer *MCOS, const MCSection *Section) {
+  if (!MCOS->getContext().getDwarfLocSeen())
+    return;
+
+  // Create a symbol at in the current section for use in the line entry.
+  MCSymbol *LineSym = MCOS->getContext().CreateTempSymbol();
+  // Set the value of the symbol to use for the MCLineEntry.
+  MCOS->EmitLabel(LineSym);
+
+  // Get the current .loc info saved in the context.
+  const MCDwarfLoc &DwarfLoc = MCOS->getContext().getCurrentDwarfLoc();
+
+  // Create a (local) line entry with the symbol and the current .loc info.
+  MCLineEntry LineEntry(LineSym, DwarfLoc);
+
+  // clear DwarfLocSeen saying the current .loc info is now used.
+  MCOS->getContext().ClearDwarfLocSeen();
+
+  // Get the MCLineSection for this section, if one does not exist for this
+  // section create it.
+  const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+    MCOS->getContext().getMCLineSections();
+  MCLineSection *LineSection = MCLineSections.lookup(Section);
+  if (!LineSection) {
+    // Create a new MCLineSection.  This will be deleted after the dwarf line
+    // table is created using it by iterating through the MCLineSections
+    // DenseMap.
+    LineSection = new MCLineSection;
+    // Save a pointer to the new LineSection into the MCLineSections DenseMap.
+    MCOS->getContext().addMCLineSection(Section, LineSection);
+  }
+
+  // Add the line entry to this section's entries.
+  LineSection->addLineEntry(LineEntry);
+}
+
+//
+// This helper routine returns an expression of End - Start + IntVal .
+// 
+static inline const MCExpr *MakeStartMinusEndExpr(const MCStreamer &MCOS,
+                                                  const MCSymbol &Start,
+                                                  const MCSymbol &End,
+                                                  int IntVal) {
+  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+  const MCExpr *Res =
+    MCSymbolRefExpr::Create(&End, Variant, MCOS.getContext());
+  const MCExpr *RHS =
+    MCSymbolRefExpr::Create(&Start, Variant, MCOS.getContext());
+  const MCExpr *Res1 =
+    MCBinaryExpr::Create(MCBinaryExpr::Sub, Res, RHS, MCOS.getContext());
+  const MCExpr *Res2 =
+    MCConstantExpr::Create(IntVal, MCOS.getContext());
+  const MCExpr *Res3 =
+    MCBinaryExpr::Create(MCBinaryExpr::Sub, Res1, Res2, MCOS.getContext());
+  return Res3;
+}
+
+//
+// This emits the Dwarf line table for the specified section from the entries
+// in the LineSection.
+//
+static inline void EmitDwarfLineTable(MCStreamer *MCOS,
+                                      const MCSection *Section,
+                                      const MCLineSection *LineSection) {
+  unsigned FileNum = 1;
+  unsigned LastLine = 1;
+  unsigned Column = 0;
+  unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
+  unsigned Isa = 0;
+  MCSymbol *LastLabel = NULL;
+
+  // Loop through each MCLineEntry and encode the dwarf line number table.
+  for (MCLineSection::const_iterator
+         it = LineSection->getMCLineEntries()->begin(),
+         ie = LineSection->getMCLineEntries()->end(); it != ie; ++it) {
+
+    if (FileNum != it->getFileNum()) {
+      FileNum = it->getFileNum();
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_file, 1);
+      MCOS->EmitULEB128IntValue(FileNum);
+    }
+    if (Column != it->getColumn()) {
+      Column = it->getColumn();
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_column, 1);
+      MCOS->EmitULEB128IntValue(Column);
+    }
+    if (Isa != it->getIsa()) {
+      Isa = it->getIsa();
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_isa, 1);
+      MCOS->EmitULEB128IntValue(Isa);
+    }
+    if ((it->getFlags() ^ Flags) & DWARF2_FLAG_IS_STMT) {
+      Flags = it->getFlags();
+      MCOS->EmitIntValue(dwarf::DW_LNS_negate_stmt, 1);
+    }
+    if (it->getFlags() & DWARF2_FLAG_BASIC_BLOCK)
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_basic_block, 1);
+    if (it->getFlags() & DWARF2_FLAG_PROLOGUE_END)
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_prologue_end, 1);
+    if (it->getFlags() & DWARF2_FLAG_EPILOGUE_BEGIN)
+      MCOS->EmitIntValue(dwarf::DW_LNS_set_epilogue_begin, 1);
+
+    int64_t LineDelta = static_cast<int64_t>(it->getLine()) - LastLine;
+    MCSymbol *Label = it->getLabel();
+
+    // At this point we want to emit/create the sequence to encode the delta in
+    // line numbers and the increment of the address from the previous Label
+    // and the current Label.
+    MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label);
+
+    LastLine = it->getLine();
+    LastLabel = Label;
+  }
+
+  // Emit a DW_LNE_end_sequence for the end of the section.
+  // Using the pointer Section create a temporary label at the end of the
+  // section and use that and the LastLabel to compute the address delta
+  // and use INT64_MAX as the line delta which is the signal that this is
+  // actually a DW_LNE_end_sequence.
+
+  // Switch to the section to be able to create a symbol at its end.
+  MCOS->SwitchSection(Section);
+
+  MCContext &context = MCOS->getContext();
+  // Create a symbol at the end of the section.
+  MCSymbol *SectionEnd = context.CreateTempSymbol();
+  // Set the value of the symbol, as we are at the end of the section.
+  MCOS->EmitLabel(SectionEnd);
+
+  // Switch back the the dwarf line section.
+  MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection());
+
+  MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd);
+}
+
+//
+// This emits the Dwarf file and the line tables.
+//
+void MCDwarfFileTable::Emit(MCStreamer *MCOS) {
+  MCContext &context = MCOS->getContext();
+  // Switch to the section where the table will be emitted into.
+  MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection());
+
+  // Create a symbol at the beginning of this section.
+  MCSymbol *LineStartSym = context.CreateTempSymbol();
+  // Set the value of the symbol, as we are at the start of the section.
+  MCOS->EmitLabel(LineStartSym);
+
+  // Create a symbol for the end of the section (to be set when we get there).
+  MCSymbol *LineEndSym = context.CreateTempSymbol();
+
+  // The first 4 bytes is the total length of the information for this
+  // compilation unit (not including these 4 bytes for the length).
+  MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *LineEndSym,4),
+                     4);
+
+  // Next 2 bytes is the Version, which is Dwarf 2.
+  MCOS->EmitIntValue(2, 2);
+
+  // Create a symbol for the end of the prologue (to be set when we get there).
+  MCSymbol *ProEndSym = context.CreateTempSymbol(); // Lprologue_end
+
+  // Length of the prologue, is the next 4 bytes.  Which is the start of the
+  // section to the end of the prologue.  Not including the 4 bytes for the
+  // total length, the 2 bytes for the version, and these 4 bytes for the
+  // length of the prologue.
+  MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *ProEndSym,
+                                        (4 + 2 + 4)),
+                  4, 0);
+
+  // Parameters of the state machine, are next.
+  MCOS->EmitIntValue(DWARF2_LINE_MIN_INSN_LENGTH, 1);
+  MCOS->EmitIntValue(DWARF2_LINE_DEFAULT_IS_STMT, 1);
+  MCOS->EmitIntValue(DWARF2_LINE_BASE, 1);
+  MCOS->EmitIntValue(DWARF2_LINE_RANGE, 1);
+  MCOS->EmitIntValue(DWARF2_LINE_OPCODE_BASE, 1);
+
+  // Standard opcode lengths
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_copy
+  MCOS->EmitIntValue(1, 1); // length of DW_LNS_advance_pc
+  MCOS->EmitIntValue(1, 1); // length of DW_LNS_advance_line
+  MCOS->EmitIntValue(1, 1); // length of DW_LNS_set_file
+  MCOS->EmitIntValue(1, 1); // length of DW_LNS_set_column
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_negate_stmt
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_basic_block
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_const_add_pc
+  MCOS->EmitIntValue(1, 1); // length of DW_LNS_fixed_advance_pc
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_prologue_end
+  MCOS->EmitIntValue(0, 1); // length of DW_LNS_set_epilogue_begin
+  MCOS->EmitIntValue(1, 1); // DW_LNS_set_isa
+
+  // Put out the directory and file tables.
+
+  // First the directory table.
+  const std::vector<StringRef> &MCDwarfDirs =
+    context.getMCDwarfDirs();
+  for (unsigned i = 0; i < MCDwarfDirs.size(); i++) {
+    MCOS->EmitBytes(MCDwarfDirs[i], 0); // the DirectoryName
+    MCOS->EmitBytes(StringRef("\0", 1), 0); // the null term. of the string
+  }
+  MCOS->EmitIntValue(0, 1); // Terminate the directory list
+
+  // Second the file table.
+  const std::vector<MCDwarfFile *> &MCDwarfFiles =
+    MCOS->getContext().getMCDwarfFiles();
+  for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
+    MCOS->EmitBytes(MCDwarfFiles[i]->getName(), 0); // FileName
+    MCOS->EmitBytes(StringRef("\0", 1), 0); // the null term. of the string
+    // the Directory num
+    MCOS->EmitULEB128IntValue(MCDwarfFiles[i]->getDirIndex());
+    MCOS->EmitIntValue(0, 1); // last modification timestamp (always 0)
+    MCOS->EmitIntValue(0, 1); // filesize (always 0)
+  }
+  MCOS->EmitIntValue(0, 1); // Terminate the file list
+
+  // This is the end of the prologue, so set the value of the symbol at the
+  // end of the prologue (that was used in a previous expression).
+  MCOS->EmitLabel(ProEndSym);
+
+  // Put out the line tables.
+  const DenseMap<const MCSection *, MCLineSection *> &MCLineSections =
+    MCOS->getContext().getMCLineSections();
+  const std::vector<const MCSection *> &MCLineSectionOrder =
+    MCOS->getContext().getMCLineSectionOrder();
+  for (std::vector<const MCSection*>::const_iterator it =
+	MCLineSectionOrder.begin(), ie = MCLineSectionOrder.end(); it != ie;
+       ++it) {
+    const MCSection *Sec = *it;
+    const MCLineSection *Line = MCLineSections.lookup(Sec);
+    EmitDwarfLineTable(MCOS, Sec, Line);
+
+    // Now delete the MCLineSections that were created in MCLineEntry::Make()
+    // and used to emit the line table.
+    delete Line;
+  }
+
+  if (MCOS->getContext().getAsmInfo().getLinkerRequiresNonEmptyDwarfLines()
+      && MCLineSectionOrder.begin() == MCLineSectionOrder.end()) {
+    // The darwin9 linker has a bug (see PR8715). For for 32-bit architectures
+    // it requires:  
+    // total_length >= prologue_length + 10
+    // We are 4 bytes short, since we have total_length = 51 and
+    // prologue_length = 45
+
+    // The regular end_sequence should be sufficient.
+    MCDwarfLineAddr::Emit(MCOS, INT64_MAX, 0);
+  }
+
+  // This is the end of the section, so set the value of the symbol at the end
+  // of this section (that was used in a previous expression).
+  MCOS->EmitLabel(LineEndSym);
+}
+
+/// Utility function to write the encoding to an object writer.
+void MCDwarfLineAddr::Write(MCObjectWriter *OW, int64_t LineDelta,
+                            uint64_t AddrDelta) {
+  SmallString<256> Tmp;
+  raw_svector_ostream OS(Tmp);
+  MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OS);
+  OW->WriteBytes(OS.str());
+}
+
+/// Utility function to emit the encoding to a streamer.
+void MCDwarfLineAddr::Emit(MCStreamer *MCOS, int64_t LineDelta,
+                           uint64_t AddrDelta) {
+  SmallString<256> Tmp;
+  raw_svector_ostream OS(Tmp);
+  MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OS);
+  MCOS->EmitBytes(OS.str(), /*AddrSpace=*/0);
+}
+
+/// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas.
+void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
+                             raw_ostream &OS) {
+  uint64_t Temp, Opcode;
+  bool NeedCopy = false;
+
+  // Scale the address delta by the minimum instruction length.
+  AddrDelta = ScaleAddrDelta(AddrDelta);
+
+  // A LineDelta of INT64_MAX is a signal that this is actually a
+  // DW_LNE_end_sequence. We cannot use special opcodes here, since we want the 
+  // end_sequence to emit the matrix entry.
+  if (LineDelta == INT64_MAX) {
+    if (AddrDelta == MAX_SPECIAL_ADDR_DELTA)
+      OS << char(dwarf::DW_LNS_const_add_pc);
+    else {
+      OS << char(dwarf::DW_LNS_advance_pc);
+      SmallString<32> Tmp;
+      raw_svector_ostream OSE(Tmp);
+      MCObjectWriter::EncodeULEB128(AddrDelta, OSE);
+      OS << OSE.str();
+    }
+    OS << char(dwarf::DW_LNS_extended_op);
+    OS << char(1);
+    OS << char(dwarf::DW_LNE_end_sequence);
+    return;
+  }
+
+  // Bias the line delta by the base.
+  Temp = LineDelta - DWARF2_LINE_BASE;
+
+  // If the line increment is out of range of a special opcode, we must encode
+  // it with DW_LNS_advance_line.
+  if (Temp >= DWARF2_LINE_RANGE) {
+    OS << char(dwarf::DW_LNS_advance_line);
+    SmallString<32> Tmp;
+    raw_svector_ostream OSE(Tmp);
+    MCObjectWriter::EncodeSLEB128(LineDelta, OSE);
+    OS << OSE.str();
+
+    LineDelta = 0;
+    Temp = 0 - DWARF2_LINE_BASE;
+    NeedCopy = true;
+  }
+
+  // Use DW_LNS_copy instead of a "line +0, addr +0" special opcode.
+  if (LineDelta == 0 && AddrDelta == 0) {
+    OS << char(dwarf::DW_LNS_copy);
+    return;
+  }
+
+  // Bias the opcode by the special opcode base.
+  Temp += DWARF2_LINE_OPCODE_BASE;
+
+  // Avoid overflow when addr_delta is large.
+  if (AddrDelta < 256 + MAX_SPECIAL_ADDR_DELTA) {
+    // Try using a special opcode.
+    Opcode = Temp + AddrDelta * DWARF2_LINE_RANGE;
+    if (Opcode <= 255) {
+      OS << char(Opcode);
+      return;
+    }
+
+    // Try using DW_LNS_const_add_pc followed by special op.
+    Opcode = Temp + (AddrDelta - MAX_SPECIAL_ADDR_DELTA) * DWARF2_LINE_RANGE;
+    if (Opcode <= 255) {
+      OS << char(dwarf::DW_LNS_const_add_pc);
+      OS << char(Opcode);
+      return;
+    }
+  }
+
+  // Otherwise use DW_LNS_advance_pc.
+  OS << char(dwarf::DW_LNS_advance_pc);
+  SmallString<32> Tmp;
+  raw_svector_ostream OSE(Tmp);
+  MCObjectWriter::EncodeULEB128(AddrDelta, OSE);
+  OS << OSE.str();
+
+  if (NeedCopy)
+    OS << char(dwarf::DW_LNS_copy);
+  else
+    OS << char(Temp);
+}
+
+void MCDwarfFile::print(raw_ostream &OS) const {
+  OS << '"' << getName() << '"';
+}
+
+void MCDwarfFile::dump() const {
+  print(dbgs());
+}
+
+static int getDataAlignmentFactor(MCStreamer &streamer) {
+  MCContext &context = streamer.getContext();
+  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  int size = asmInfo.getPointerSize();
+  if (asmInfo.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
+    return size;
+ else
+   return -size;
+}
+
+static void EmitCFIInstruction(MCStreamer &Streamer,
+                               const MCCFIInstruction &Instr) {
+  int dataAlignmentFactor = getDataAlignmentFactor(Streamer);
+
+  switch (Instr.getOperation()) {
+  case MCCFIInstruction::Move: {
+    const MachineLocation &Dst = Instr.getDestination();
+    const MachineLocation &Src = Instr.getSource();
+
+    // If advancing cfa.
+    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+      assert(!Src.isReg() && "Machine move not supported yet.");
+
+      if (Src.getReg() == MachineLocation::VirtualFP) {
+        Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_offset, 1);
+      } else {
+        Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1);
+        Streamer.EmitULEB128IntValue(Src.getReg());
+      }
+
+      Streamer.EmitULEB128IntValue(-Src.getOffset(), 1);
+      return;
+    }
+
+    if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+      assert(Dst.isReg() && "Machine move not supported yet.");
+      Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1);
+      Streamer.EmitULEB128IntValue(Dst.getReg());
+      return;
+    }
+
+    unsigned Reg = Src.getReg();
+    int Offset = Dst.getOffset() / dataAlignmentFactor;
+
+    if (Offset < 0) {
+      Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended_sf, 1);
+      Streamer.EmitULEB128IntValue(Reg);
+      Streamer.EmitSLEB128IntValue(Offset);
+    } else if (Reg < 64) {
+      Streamer.EmitIntValue(dwarf::DW_CFA_offset + Reg, 1);
+      Streamer.EmitULEB128IntValue(Offset, 1);
+    } else {
+      Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended, 1);
+      Streamer.EmitULEB128IntValue(Reg, 1);
+      Streamer.EmitULEB128IntValue(Offset, 1);
+    }
+    return;
+  }
+  case MCCFIInstruction::Remember:
+    Streamer.EmitIntValue(dwarf::DW_CFA_remember_state, 1);
+    return;
+  case MCCFIInstruction::Restore:
+    Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1);
+    return;
+  }
+  llvm_unreachable("Unhandled case in switch");
+}
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+static void EmitCFIInstructions(MCStreamer &streamer,
+                                const std::vector<MCCFIInstruction> &Instrs,
+                                MCSymbol *BaseLabel) {
+  for (unsigned i = 0, N = Instrs.size(); i < N; ++i) {
+    const MCCFIInstruction &Instr = Instrs[i];
+    MCSymbol *Label = Instr.getLabel();
+    // Throw out move if the label is invalid.
+    if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
+
+    // Advance row if new location.
+    if (BaseLabel && Label) {
+      MCSymbol *ThisSym = Label;
+      if (ThisSym != BaseLabel) {
+        streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym);
+        BaseLabel = ThisSym;
+      }
+    }
+
+    EmitCFIInstruction(streamer, Instr);
+  }
+}
+
+static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
+                       unsigned symbolEncoding) {
+  MCContext &context = streamer.getContext();
+  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  unsigned format = symbolEncoding & 0x0f;
+  unsigned application = symbolEncoding & 0x70;
+  unsigned size;
+  switch (format) {
+  default:
+    assert(0 && "Unknown Encoding");
+  case dwarf::DW_EH_PE_absptr:
+  case dwarf::DW_EH_PE_signed:
+    size = asmInfo.getPointerSize();
+    break;
+  case dwarf::DW_EH_PE_udata2:
+  case dwarf::DW_EH_PE_sdata2:
+    size = 2;
+    break;
+  case dwarf::DW_EH_PE_udata4:
+  case dwarf::DW_EH_PE_sdata4:
+    size = 4;
+    break;
+  case dwarf::DW_EH_PE_udata8:
+  case dwarf::DW_EH_PE_sdata8:
+    size = 8;
+    break;
+  }
+  switch (application) {
+  default:
+    assert(0 && "Unknown Encoding");
+    break;
+  case 0:
+    streamer.EmitSymbolValue(&symbol, size);
+    break;
+  case dwarf::DW_EH_PE_pcrel:
+    streamer.EmitPCRelSymbolValue(&symbol, size);
+    break;
+  }
+}
+
+static const MachineLocation TranslateMachineLocation(
+                                                  const TargetAsmInfo &AsmInfo,
+                                                  const MachineLocation &Loc) {
+  unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ?
+    MachineLocation::VirtualFP :
+    unsigned(AsmInfo.getDwarfRegNum(Loc.getReg(), true));
+  const MachineLocation &NewLoc = Loc.isReg() ?
+    MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset());
+  return NewLoc;
+}
+
+static const MCSymbol &EmitCIE(MCStreamer &streamer,
+                               const MCSymbol *personality,
+                               unsigned personalityEncoding,
+                               const MCSymbol *lsda,
+                               unsigned lsdaEncoding) {
+  MCContext &context = streamer.getContext();
+  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  const MCSection &section = *asmInfo.getEHFrameSection();
+  streamer.SwitchSection(&section);
+  MCSymbol *sectionStart = streamer.getContext().CreateTempSymbol();
+  MCSymbol *sectionEnd = streamer.getContext().CreateTempSymbol();
+
+  // Length
+  const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart,
+                                               *sectionEnd, 4);
+  streamer.EmitLabel(sectionStart);
+  streamer.EmitValue(Length, 4);
+
+  // CIE ID
+  streamer.EmitIntValue(0, 4);
+
+  // Version
+  streamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1);
+
+  // Augmentation String
+  SmallString<8> Augmentation;
+  Augmentation += "z";
+  if (personality)
+    Augmentation += "P";
+  if (lsda)
+    Augmentation += "L";
+  Augmentation += "R";
+  streamer.EmitBytes(Augmentation.str(), 0);
+  streamer.EmitIntValue(0, 1);
+
+  // Code Alignment Factor
+  streamer.EmitULEB128IntValue(1);
+
+  // Data Alignment Factor
+  streamer.EmitSLEB128IntValue(getDataAlignmentFactor(streamer));
+
+  // Return Address Register
+  streamer.EmitULEB128IntValue(asmInfo.getDwarfRARegNum(true));
+
+  // Augmentation Data Length (optional)
+  MCSymbol *augmentationStart = streamer.getContext().CreateTempSymbol();
+  MCSymbol *augmentationEnd = streamer.getContext().CreateTempSymbol();
+  const MCExpr *augmentationLength = MakeStartMinusEndExpr(streamer,
+                                                           *augmentationStart,
+                                                           *augmentationEnd, 0);
+  streamer.EmitULEB128Value(augmentationLength);
+
+  // Augmentation Data (optional)
+  streamer.EmitLabel(augmentationStart);
+  if (personality) {
+    // Personality Encoding
+    streamer.EmitIntValue(personalityEncoding, 1);
+    // Personality
+    EmitSymbol(streamer, *personality, personalityEncoding);
+  }
+  if (lsda) {
+    // LSDA Encoding
+    streamer.EmitIntValue(lsdaEncoding, 1);
+  }
+  // Encoding of the FDE pointers
+  streamer.EmitIntValue(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4, 1);
+  streamer.EmitLabel(augmentationEnd);
+
+  // Initial Instructions
+
+  const std::vector<MachineMove> Moves = asmInfo.getInitialFrameState();
+  std::vector<MCCFIInstruction> Instructions;
+
+  for (int i = 0, n = Moves.size(); i != n; ++i) {
+    MCSymbol *Label = Moves[i].getLabel();
+    const MachineLocation &Dst =
+      TranslateMachineLocation(asmInfo, Moves[i].getDestination());
+    const MachineLocation &Src =
+      TranslateMachineLocation(asmInfo, Moves[i].getSource());
+    MCCFIInstruction Inst(Label, Dst, Src);
+    Instructions.push_back(Inst);
+  }
+
+  EmitCFIInstructions(streamer, Instructions, NULL);
+
+  // Padding
+  streamer.EmitValueToAlignment(4);
+
+  streamer.EmitLabel(sectionEnd);
+  return *sectionStart;
+}
+
+static MCSymbol *EmitFDE(MCStreamer &streamer,
+                         const MCSymbol &cieStart,
+                         const MCDwarfFrameInfo &frame) {
+  MCContext &context = streamer.getContext();
+  MCSymbol *fdeStart = context.CreateTempSymbol();
+  MCSymbol *fdeEnd = context.CreateTempSymbol();
+
+  // Length
+  const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0);
+  streamer.EmitValue(Length, 4);
+
+  streamer.EmitLabel(fdeStart);
+  // CIE Pointer
+  const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart,
+                                               0);
+  streamer.EmitValue(offset, 4);
+
+  // PC Begin
+  streamer.EmitPCRelSymbolValue(frame.Begin, 4);
+
+  // PC Range
+  const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
+                                              *frame.End, 0);
+  streamer.EmitValue(Range, 4);
+
+  // Augmentation Data Length
+  MCSymbol *augmentationStart = streamer.getContext().CreateTempSymbol();
+  MCSymbol *augmentationEnd = streamer.getContext().CreateTempSymbol();
+  const MCExpr *augmentationLength = MakeStartMinusEndExpr(streamer,
+                                                           *augmentationStart,
+                                                           *augmentationEnd, 0);
+  streamer.EmitULEB128Value(augmentationLength);
+
+  // Augmentation Data
+  streamer.EmitLabel(augmentationStart);
+  if (frame.Lsda)
+    EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding);
+  streamer.EmitLabel(augmentationEnd);
+  // Call Frame Instructions
+
+  EmitCFIInstructions(streamer, frame.Instructions, frame.Begin);
+
+  // Padding
+  streamer.EmitValueToAlignment(4);
+
+  return fdeEnd;
+}
+
+namespace {
+  struct CIEKey {
+    static const CIEKey getEmptyKey() { return CIEKey(0, 0, -1); }
+    static const CIEKey getTombstoneKey() { return CIEKey(0, -1, 0); }
+
+    CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_,
+           unsigned LsdaEncoding_) : Personality(Personality_),
+                                     PersonalityEncoding(PersonalityEncoding_),
+                                     LsdaEncoding(LsdaEncoding_) {
+    }
+    const MCSymbol* Personality;
+    unsigned PersonalityEncoding;
+    unsigned LsdaEncoding;
+  };
+}
+
+namespace llvm {
+  template <>
+  struct DenseMapInfo<CIEKey> {
+    static CIEKey getEmptyKey() {
+      return CIEKey::getEmptyKey();
+    }
+    static CIEKey getTombstoneKey() {
+      return CIEKey::getTombstoneKey();
+    }
+    static unsigned getHashValue(const CIEKey &Key) {
+      FoldingSetNodeID ID;
+      ID.AddPointer(Key.Personality);
+      ID.AddInteger(Key.PersonalityEncoding);
+      ID.AddInteger(Key.LsdaEncoding);
+      return ID.ComputeHash();
+    }
+    static bool isEqual(const CIEKey &LHS,
+                        const CIEKey &RHS) {
+      return LHS.Personality == RHS.Personality &&
+        LHS.PersonalityEncoding == RHS.PersonalityEncoding &&
+        LHS.LsdaEncoding == RHS.LsdaEncoding;
+    }
+  };
+}
+
+void MCDwarfFrameEmitter::Emit(MCStreamer &streamer) {
+  const MCContext &context = streamer.getContext();
+  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  MCSymbol *fdeEnd = NULL;
+  DenseMap<CIEKey, const MCSymbol*> CIEStarts;
+
+  for (unsigned i = 0, n = streamer.getNumFrameInfos(); i < n; ++i) {
+    const MCDwarfFrameInfo &frame = streamer.getFrameInfo(i);
+    CIEKey key(frame.Personality, frame.PersonalityEncoding,
+               frame.LsdaEncoding);
+    const MCSymbol *&cieStart = CIEStarts[key];
+    if (!cieStart)
+      cieStart = &EmitCIE(streamer, frame.Personality,
+                          frame.PersonalityEncoding, frame.Lsda,
+                          frame.LsdaEncoding);
+    fdeEnd = EmitFDE(streamer, *cieStart, frame);
+    if (i != n - 1)
+      streamer.EmitLabel(fdeEnd);
+  }
+
+  streamer.EmitValueToAlignment(asmInfo.getPointerSize());
+  if (fdeEnd)
+    streamer.EmitLabel(fdeEnd);
+}
+
+void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer,
+                                         uint64_t AddrDelta) {
+  SmallString<256> Tmp;
+  raw_svector_ostream OS(Tmp);
+  MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OS);
+  Streamer.EmitBytes(OS.str(), /*AddrSpace=*/0);
+}
+
+void MCDwarfFrameEmitter::EncodeAdvanceLoc(uint64_t AddrDelta,
+                                           raw_ostream &OS) {
+  // FIXME: Assumes the code alignment factor is 1.
+  if (AddrDelta == 0) {
+  } else if (isUIntN(6, AddrDelta)) {
+    uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta;
+    OS << Opcode;
+  } else if (isUInt<8>(AddrDelta)) {
+    OS << uint8_t(dwarf::DW_CFA_advance_loc1);
+    OS << uint8_t(AddrDelta);
+  } else if (isUInt<16>(AddrDelta)) {
+    // FIXME: check what is the correct behavior on a big endian machine.
+    OS << uint8_t(dwarf::DW_CFA_advance_loc2);
+    OS << uint8_t( AddrDelta       & 0xff);
+    OS << uint8_t((AddrDelta >> 8) & 0xff);
+  } else {
+    // FIXME: check what is the correct behavior on a big endian machine.
+    assert(isUInt<32>(AddrDelta));
+    OS << uint8_t(dwarf::DW_CFA_advance_loc4);
+    OS << uint8_t( AddrDelta        & 0xff);
+    OS << uint8_t((AddrDelta >> 8)  & 0xff);
+    OS << uint8_t((AddrDelta >> 16) & 0xff);
+    OS << uint8_t((AddrDelta >> 24) & 0xff);
+
+  }
+}
diff --git a/final/lib/MC/MCELF.cpp b/final/lib/MC/MCELF.cpp
new file mode 100644
index 00000000000..ce7783e2862
--- /dev/null
+++ b/final/lib/MC/MCELF.cpp
@@ -0,0 +1,72 @@
+//===- lib/MC/MCELF.cpp - MC ELF ------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF object file writer information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCELF.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+namespace llvm {
+
+void MCELF::SetBinding(MCSymbolData &SD, unsigned Binding) {
+  assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+         Binding == ELF::STB_WEAK);
+  uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift);
+  SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift));
+}
+
+unsigned MCELF::GetBinding(const MCSymbolData &SD) {
+  uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift;
+  assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL ||
+         Binding == ELF::STB_WEAK);
+  return Binding;
+}
+
+void MCELF::SetType(MCSymbolData &SD, unsigned Type) {
+  assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
+         Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
+         Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
+         Type == ELF::STT_TLS);
+
+  uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift);
+  SD.setFlags(OtherFlags | (Type << ELF_STT_Shift));
+}
+
+unsigned MCELF::GetType(const MCSymbolData &SD) {
+  uint32_t Type = (SD.getFlags() & (0xf << ELF_STT_Shift)) >> ELF_STT_Shift;
+  assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT ||
+         Type == ELF::STT_FUNC || Type == ELF::STT_SECTION ||
+         Type == ELF::STT_FILE || Type == ELF::STT_COMMON ||
+         Type == ELF::STT_TLS);
+  return Type;
+}
+
+void MCELF::SetVisibility(MCSymbolData &SD, unsigned Visibility) {
+  assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
+         Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+
+  uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STV_Shift);
+  SD.setFlags(OtherFlags | (Visibility << ELF_STV_Shift));
+}
+
+unsigned MCELF::GetVisibility(MCSymbolData &SD) {
+  unsigned Visibility =
+    (SD.getFlags() & (0xf << ELF_STV_Shift)) >> ELF_STV_Shift;
+  assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL ||
+         Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED);
+  return Visibility;
+}
+
+}
diff --git a/final/lib/MC/MCELF.h b/final/lib/MC/MCELF.h
new file mode 100644
index 00000000000..e08f1e65429
--- /dev/null
+++ b/final/lib/MC/MCELF.h
@@ -0,0 +1,35 @@
+//===- lib/MC/MCELF.h - ELF MC --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some support functions used by the ELF Streamer and
+// ObjectWriter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELF_H
+#define LLVM_MC_MCELF_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+class MCSymbolData;
+
+class MCELF {
+ public:
+  static void SetBinding(MCSymbolData &SD, unsigned Binding);
+  static unsigned GetBinding(const MCSymbolData &SD);
+  static void SetType(MCSymbolData &SD, unsigned Type);
+  static unsigned GetType(const MCSymbolData &SD);
+  static void SetVisibility(MCSymbolData &SD, unsigned Visibility);
+  static unsigned GetVisibility(MCSymbolData &SD);
+};
+
+}
+
+#endif
diff --git a/final/lib/MC/MCELFObjectTargetWriter.cpp b/final/lib/MC/MCELFObjectTargetWriter.cpp
new file mode 100644
index 00000000000..12a02a9e974
--- /dev/null
+++ b/final/lib/MC/MCELFObjectTargetWriter.cpp
@@ -0,0 +1,23 @@
+//===-- MCELFObjectTargetWriter.cpp - ELF Target Writer Subclass ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCELFObjectWriter.h"
+
+using namespace llvm;
+
+MCELFObjectTargetWriter::MCELFObjectTargetWriter(bool Is64Bit_,
+                                                 Triple::OSType OSType_,
+                                                 uint16_t EMachine_,
+                                                 bool HasRelocationAddend_)
+  : OSType(OSType_), EMachine(EMachine_),
+    HasRelocationAddend(HasRelocationAddend_), Is64Bit(Is64Bit_) {
+}
+
+MCELFObjectTargetWriter::~MCELFObjectTargetWriter() {
+}
diff --git a/final/lib/MC/MCELFStreamer.cpp b/final/lib/MC/MCELFStreamer.cpp
new file mode 100644
index 00000000000..9fc9173914d
--- /dev/null
+++ b/final/lib/MC/MCELFStreamer.cpp
@@ -0,0 +1,383 @@
+//===- lib/MC/MCELFStreamer.cpp - ELF Object Output ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ELF .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCELFStreamer.h"
+#include "MCELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
+
+using namespace llvm;
+
+void MCELFStreamer::InitSections() {
+  // This emulates the same behavior of GNU as. This makes it easier
+  // to compare the output as the major sections are in the same order.
+  SetSectionText();
+  SetSectionData();
+  SetSectionBss();
+  SetSectionText();
+}
+
+void MCELFStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+  MCObjectStreamer::EmitLabel(Symbol);
+
+  const MCSectionELF &Section =
+    static_cast<const MCSectionELF&>(Symbol->getSection());
+  MCSymbolData &SD = getAssembler().getSymbolData(*Symbol);
+  if (Section.getFlags() & ELF::SHF_TLS)
+    MCELF::SetType(SD, ELF::STT_TLS);
+}
+
+void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+  switch (Flag) {
+  case MCAF_SyntaxUnified: return; // no-op here.
+  case MCAF_Code16: return; // no-op here.
+  case MCAF_Code32: return; // no-op here.
+  case MCAF_SubsectionsViaSymbols:
+    getAssembler().setSubsectionsViaSymbols(true);
+    return;
+  }
+
+  assert(0 && "invalid assembler flag!");
+}
+
+void MCELFStreamer::EmitThumbFunc(MCSymbol *Func) {
+  // FIXME: Anything needed here to flag the function as thumb?
+}
+
+void MCELFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  // FIXME: Lift context changes into super class.
+  getAssembler().getOrCreateSymbolData(*Symbol);
+  Symbol->setVariableValue(AddValueSymbols(Value));
+}
+
+void MCELFStreamer::ChangeSection(const MCSection *Section) {
+  const MCSymbol *Grp = static_cast<const MCSectionELF *>(Section)->getGroup();
+  if (Grp)
+    getAssembler().getOrCreateSymbolData(*Grp);
+  this->MCObjectStreamer::ChangeSection(Section);
+}
+
+void MCELFStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+  getAssembler().getOrCreateSymbolData(*Symbol);
+  MCSymbolData &AliasSD = getAssembler().getOrCreateSymbolData(*Alias);
+  AliasSD.setFlags(AliasSD.getFlags() | ELF_Other_Weakref);
+  const MCExpr *Value = MCSymbolRefExpr::Create(Symbol, getContext());
+  Alias->setVariableValue(Value);
+}
+
+void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+                                          MCSymbolAttr Attribute) {
+  // Indirect symbols are handled differently, to match how 'as' handles
+  // them. This makes writing matching .o files easier.
+  if (Attribute == MCSA_IndirectSymbol) {
+    // Note that we intentionally cannot use the symbol data here; this is
+    // important for matching the string table that 'as' generates.
+    IndirectSymbolData ISD;
+    ISD.Symbol = Symbol;
+    ISD.SectionData = getCurrentSectionData();
+    getAssembler().getIndirectSymbols().push_back(ISD);
+    return;
+  }
+
+  // Adding a symbol attribute always introduces the symbol, note that an
+  // important side effect of calling getOrCreateSymbolData here is to register
+  // the symbol with the assembler.
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  // The implementation of symbol attributes is designed to match 'as', but it
+  // leaves much to desired. It doesn't really make sense to arbitrarily add and
+  // remove flags, but 'as' allows this (in particular, see .desc).
+  //
+  // In the future it might be worth trying to make these operations more well
+  // defined.
+  switch (Attribute) {
+  case MCSA_LazyReference:
+  case MCSA_Reference:
+  case MCSA_NoDeadStrip:
+  case MCSA_SymbolResolver:
+  case MCSA_PrivateExtern:
+  case MCSA_WeakDefinition:
+  case MCSA_WeakDefAutoPrivate:
+  case MCSA_Invalid:
+  case MCSA_ELF_TypeIndFunction:
+  case MCSA_IndirectSymbol:
+    assert(0 && "Invalid symbol attribute for ELF!");
+    break;
+
+  case MCSA_ELF_TypeGnuUniqueObject:
+    // Ignore for now.
+    break;
+
+  case MCSA_Global:
+    MCELF::SetBinding(SD, ELF::STB_GLOBAL);
+    SD.setExternal(true);
+    BindingExplicitlySet.insert(Symbol);
+    break;
+
+  case MCSA_WeakReference:
+  case MCSA_Weak:
+    MCELF::SetBinding(SD, ELF::STB_WEAK);
+    SD.setExternal(true);
+    BindingExplicitlySet.insert(Symbol);
+    break;
+
+  case MCSA_Local:
+    MCELF::SetBinding(SD, ELF::STB_LOCAL);
+    SD.setExternal(false);
+    BindingExplicitlySet.insert(Symbol);
+    break;
+
+  case MCSA_ELF_TypeFunction:
+    MCELF::SetType(SD, ELF::STT_FUNC);
+    break;
+
+  case MCSA_ELF_TypeObject:
+    MCELF::SetType(SD, ELF::STT_OBJECT);
+    break;
+
+  case MCSA_ELF_TypeTLS:
+    MCELF::SetType(SD, ELF::STT_TLS);
+    break;
+
+  case MCSA_ELF_TypeCommon:
+    MCELF::SetType(SD, ELF::STT_COMMON);
+    break;
+
+  case MCSA_ELF_TypeNoType:
+    MCELF::SetType(SD, ELF::STT_NOTYPE);
+    break;
+
+  case MCSA_Protected:
+    MCELF::SetVisibility(SD, ELF::STV_PROTECTED);
+    break;
+
+  case MCSA_Hidden:
+    MCELF::SetVisibility(SD, ELF::STV_HIDDEN);
+    break;
+
+  case MCSA_Internal:
+    MCELF::SetVisibility(SD, ELF::STV_INTERNAL);
+    break;
+  }
+}
+
+void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                       unsigned ByteAlignment) {
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  if (!BindingExplicitlySet.count(Symbol)) {
+    MCELF::SetBinding(SD, ELF::STB_GLOBAL);
+    SD.setExternal(true);
+  }
+
+  MCELF::SetType(SD, ELF::STT_OBJECT);
+
+  if (MCELF::GetBinding(SD) == ELF_STB_Local) {
+    const MCSection *Section = getAssembler().getContext().getELFSection(".bss",
+                                                                    ELF::SHT_NOBITS,
+                                                                    ELF::SHF_WRITE |
+                                                                    ELF::SHF_ALLOC,
+                                                                    SectionKind::getBSS());
+    Symbol->setSection(*Section);
+
+    struct LocalCommon L = {&SD, Size, ByteAlignment};
+    LocalCommons.push_back(L);
+  } else {
+    SD.setCommon(Size, ByteAlignment);
+  }
+
+  SD.setSize(MCConstantExpr::Create(Size, getContext()));
+}
+
+void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+  // FIXME: Should this be caught and done earlier?
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+  MCELF::SetBinding(SD, ELF::STB_LOCAL);
+  SD.setExternal(false);
+  BindingExplicitlySet.insert(Symbol);
+  // FIXME: ByteAlignment is not needed here, but is required.
+  EmitCommonSymbol(Symbol, Size, 1);
+}
+
+void MCELFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+                                           int64_t Value, unsigned ValueSize,
+                                           unsigned MaxBytesToEmit) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+                      getCurrentSectionData());
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCELFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+                                        unsigned MaxBytesToEmit) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+                                           getCurrentSectionData());
+  F->setEmitNops(true);
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+// Add a symbol for the file name of this module. This is the second
+// entry in the module's symbol table (the first being the null symbol).
+void MCELFStreamer::EmitFileDirective(StringRef Filename) {
+  MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename);
+  Symbol->setSection(*getCurrentSection());
+  Symbol->setAbsolute();
+
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  SD.setFlags(ELF_STT_File | ELF_STB_Local | ELF_STV_Default);
+}
+
+void  MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) {
+  switch (expr->getKind()) {
+  case MCExpr::Target: llvm_unreachable("Can't handle target exprs yet!");
+  case MCExpr::Constant:
+    break;
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *be = cast<MCBinaryExpr>(expr);
+    fixSymbolsInTLSFixups(be->getLHS());
+    fixSymbolsInTLSFixups(be->getRHS());
+    break;
+  }
+
+  case MCExpr::SymbolRef: {
+    const MCSymbolRefExpr &symRef = *cast<MCSymbolRefExpr>(expr);
+    switch (symRef.getKind()) {
+    default:
+      return;
+    case MCSymbolRefExpr::VK_GOTTPOFF:
+    case MCSymbolRefExpr::VK_INDNTPOFF:
+    case MCSymbolRefExpr::VK_NTPOFF:
+    case MCSymbolRefExpr::VK_GOTNTPOFF:
+    case MCSymbolRefExpr::VK_TLSGD:
+    case MCSymbolRefExpr::VK_TLSLD:
+    case MCSymbolRefExpr::VK_TLSLDM:
+    case MCSymbolRefExpr::VK_TPOFF:
+    case MCSymbolRefExpr::VK_DTPOFF:
+    case MCSymbolRefExpr::VK_ARM_TLSGD:
+    case MCSymbolRefExpr::VK_ARM_TPOFF:
+    case MCSymbolRefExpr::VK_ARM_GOTTPOFF:
+      break;
+    }
+    MCSymbolData &SD = getAssembler().getOrCreateSymbolData(symRef.getSymbol());
+    MCELF::SetType(SD, ELF::STT_TLS);
+    break;
+  }
+
+  case MCExpr::Unary:
+    fixSymbolsInTLSFixups(cast<MCUnaryExpr>(expr)->getSubExpr());
+    break;
+  }
+}
+
+void MCELFStreamer::EmitInstToFragment(const MCInst &Inst) {
+  this->MCObjectStreamer::EmitInstToFragment(Inst);
+  MCInstFragment &F = *cast<MCInstFragment>(getCurrentFragment());
+
+  for (unsigned i = 0, e = F.getFixups().size(); i != e; ++i)
+    fixSymbolsInTLSFixups(F.getFixups()[i].getValue());
+}
+
+void MCELFStreamer::EmitInstToData(const MCInst &Inst) {
+  MCDataFragment *DF = getOrCreateDataFragment();
+
+  SmallVector<MCFixup, 4> Fixups;
+  SmallString<256> Code;
+  raw_svector_ostream VecOS(Code);
+  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+  VecOS.flush();
+
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
+    fixSymbolsInTLSFixups(Fixups[i].getValue());
+
+  // Add the fixups and data.
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+    Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+    DF->addFixup(Fixups[i]);
+  }
+  DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCELFStreamer::Finish() {
+  if (getNumFrameInfos())
+    MCDwarfFrameEmitter::Emit(*this);
+
+  for (std::vector<LocalCommon>::const_iterator i = LocalCommons.begin(),
+                                                e = LocalCommons.end();
+       i != e; ++i) {
+    MCSymbolData *SD = i->SD;
+    uint64_t Size = i->Size;
+    unsigned ByteAlignment = i->ByteAlignment;
+    const MCSymbol &Symbol = SD->getSymbol();
+    const MCSection &Section = Symbol.getSection();
+
+    MCSectionData &SectData = getAssembler().getOrCreateSectionData(Section);
+    new MCAlignFragment(ByteAlignment, 0, 1, ByteAlignment, &SectData);
+
+    MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
+    SD->setFragment(F);
+
+    // Update the maximum alignment of the section if necessary.
+    if (ByteAlignment > SectData.getAlignment())
+      SectData.setAlignment(ByteAlignment);
+  }
+
+  this->MCObjectStreamer::Finish();
+}
+
+MCStreamer *llvm::createELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                                    raw_ostream &OS, MCCodeEmitter *CE,
+                                    bool RelaxAll, bool NoExecStack) {
+  MCELFStreamer *S = new MCELFStreamer(Context, TAB, OS, CE);
+  if (RelaxAll)
+    S->getAssembler().setRelaxAll(true);
+  if (NoExecStack)
+    S->getAssembler().setNoExecStack(true);
+  return S;
+}
diff --git a/final/lib/MC/MCELFStreamer.h b/final/lib/MC/MCELFStreamer.h
new file mode 100644
index 00000000000..091101dceba
--- /dev/null
+++ b/final/lib/MC/MCELFStreamer.h
@@ -0,0 +1,268 @@
+//===- lib/MC/MCELFStreamer.h - ELF Object Output -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ELF .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELFSTREAMER_H
+#define LLVM_MC_MCELFSTREAMER_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSectionELF.h"
+
+namespace llvm {
+
+class MCELFStreamer : public MCObjectStreamer {
+public:
+  MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                  raw_ostream &OS, MCCodeEmitter *Emitter)
+    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+  ~MCELFStreamer() {}
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void InitSections();
+  virtual void ChangeSection(const MCSection *Section);
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EmitCOFFSymbolType(int Type) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EndCOFFSymbolDef() {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+     SD.setSize(Value);
+  }
+
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitFileDirective(StringRef Filename);
+
+  virtual void Finish();
+
+private:
+  virtual void EmitInstToFragment(const MCInst &Inst);
+  virtual void EmitInstToData(const MCInst &Inst);
+
+  void fixSymbolsInTLSFixups(const MCExpr *expr);
+
+  struct LocalCommon {
+    MCSymbolData *SD;
+    uint64_t Size;
+    unsigned ByteAlignment;
+  };
+  std::vector<LocalCommon> LocalCommons;
+
+  SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
+  /// @}
+  void SetSection(StringRef Section, unsigned Type, unsigned Flags,
+                  SectionKind Kind) {
+    SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
+  }
+
+  void SetSectionData() {
+    SetSection(".data", ELF::SHT_PROGBITS,
+               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+               SectionKind::getDataRel());
+    EmitCodeAlignment(4, 0);
+  }
+  void SetSectionText() {
+    SetSection(".text", ELF::SHT_PROGBITS,
+               ELF::SHF_EXECINSTR |
+               ELF::SHF_ALLOC, SectionKind::getText());
+    EmitCodeAlignment(4, 0);
+  }
+  void SetSectionBss() {
+    SetSection(".bss", ELF::SHT_NOBITS,
+               ELF::SHF_WRITE |
+               ELF::SHF_ALLOC, SectionKind::getBSS());
+    EmitCodeAlignment(4, 0);
+  }
+};
+
+} // end llvm namespace
+
+#endif
+//===- lib/MC/MCELFStreamer.h - ELF Object Output -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file assembles .s files and emits ELF .o object files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCELFSTREAMER_H
+#define LLVM_MC_MCELFSTREAMER_H
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSectionELF.h"
+
+namespace llvm {
+
+class MCELFStreamer : public MCObjectStreamer {
+public:
+  MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                  raw_ostream &OS, MCCodeEmitter *Emitter)
+    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+  ~MCELFStreamer() {}
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void InitSections();
+  virtual void ChangeSection(const MCSection *Section);
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EmitCOFFSymbolType(int Type) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EndCOFFSymbolDef() {
+    assert(0 && "ELF doesn't support this directive");
+  }
+
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+     SD.setSize(Value);
+  }
+
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0) {
+    assert(0 && "ELF doesn't support this directive");
+  }
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitFileDirective(StringRef Filename);
+
+  virtual void Finish();
+
+private:
+  virtual void EmitInstToFragment(const MCInst &Inst);
+  virtual void EmitInstToData(const MCInst &Inst);
+
+  void fixSymbolsInTLSFixups(const MCExpr *expr);
+
+  struct LocalCommon {
+    MCSymbolData *SD;
+    uint64_t Size;
+    unsigned ByteAlignment;
+  };
+  std::vector<LocalCommon> LocalCommons;
+
+  SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
+  /// @}
+  void SetSection(StringRef Section, unsigned Type, unsigned Flags,
+                  SectionKind Kind) {
+    SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
+  }
+
+  void SetSectionData() {
+    SetSection(".data", ELF::SHT_PROGBITS,
+               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+               SectionKind::getDataRel());
+    EmitCodeAlignment(4, 0);
+  }
+  void SetSectionText() {
+    SetSection(".text", ELF::SHT_PROGBITS,
+               ELF::SHF_EXECINSTR |
+               ELF::SHF_ALLOC, SectionKind::getText());
+    EmitCodeAlignment(4, 0);
+  }
+  void SetSectionBss() {
+    SetSection(".bss", ELF::SHT_NOBITS,
+               ELF::SHF_WRITE |
+               ELF::SHF_ALLOC, SectionKind::getBSS());
+    EmitCodeAlignment(4, 0);
+  }
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/final/lib/MC/MCExpr.cpp b/final/lib/MC/MCExpr.cpp
new file mode 100644
index 00000000000..54d3743e68e
--- /dev/null
+++ b/final/lib/MC/MCExpr.cpp
@@ -0,0 +1,556 @@
+//===- MCExpr.cpp - Assembly Level Expression Implementation --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mcexpr"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+namespace {
+namespace stats {
+STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations");
+}
+}
+
+void MCExpr::print(raw_ostream &OS) const {
+  switch (getKind()) {
+  case MCExpr::Target:
+    return cast<MCTargetExpr>(this)->PrintImpl(OS);
+  case MCExpr::Constant:
+    OS << cast<MCConstantExpr>(*this).getValue();
+    return;
+
+  case MCExpr::SymbolRef: {
+    const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this);
+    const MCSymbol &Sym = SRE.getSymbol();
+    // Parenthesize names that start with $ so that they don't look like
+    // absolute names.
+    bool UseParens = Sym.getName()[0] == '$';
+
+    if (SRE.getKind() == MCSymbolRefExpr::VK_PPC_HA16 ||
+        SRE.getKind() == MCSymbolRefExpr::VK_PPC_LO16) {
+      OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+      UseParens = true;
+    }
+
+    if (UseParens)
+      OS << '(' << Sym << ')';
+    else
+      OS << Sym;
+
+    if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_PLT ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_TLSGD ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOT ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTOFF ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_TPOFF ||
+        SRE.getKind() == MCSymbolRefExpr::VK_ARM_GOTTPOFF)
+      OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+    else if (SRE.getKind() != MCSymbolRefExpr::VK_None &&
+             SRE.getKind() != MCSymbolRefExpr::VK_PPC_HA16 &&
+             SRE.getKind() != MCSymbolRefExpr::VK_PPC_LO16)
+      OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+
+    return;
+  }
+
+  case MCExpr::Unary: {
+    const MCUnaryExpr &UE = cast<MCUnaryExpr>(*this);
+    switch (UE.getOpcode()) {
+    default: assert(0 && "Invalid opcode!");
+    case MCUnaryExpr::LNot:  OS << '!'; break;
+    case MCUnaryExpr::Minus: OS << '-'; break;
+    case MCUnaryExpr::Not:   OS << '~'; break;
+    case MCUnaryExpr::Plus:  OS << '+'; break;
+    }
+    OS << *UE.getSubExpr();
+    return;
+  }
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr &BE = cast<MCBinaryExpr>(*this);
+
+    // Only print parens around the LHS if it is non-trivial.
+    if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS())) {
+      OS << *BE.getLHS();
+    } else {
+      OS << '(' << *BE.getLHS() << ')';
+    }
+
+    switch (BE.getOpcode()) {
+    default: assert(0 && "Invalid opcode!");
+    case MCBinaryExpr::Add:
+      // Print "X-42" instead of "X+-42".
+      if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) {
+        if (RHSC->getValue() < 0) {
+          OS << RHSC->getValue();
+          return;
+        }
+      }
+
+      OS <<  '+';
+      break;
+    case MCBinaryExpr::And:  OS <<  '&'; break;
+    case MCBinaryExpr::Div:  OS <<  '/'; break;
+    case MCBinaryExpr::EQ:   OS << "=="; break;
+    case MCBinaryExpr::GT:   OS <<  '>'; break;
+    case MCBinaryExpr::GTE:  OS << ">="; break;
+    case MCBinaryExpr::LAnd: OS << "&&"; break;
+    case MCBinaryExpr::LOr:  OS << "||"; break;
+    case MCBinaryExpr::LT:   OS <<  '<'; break;
+    case MCBinaryExpr::LTE:  OS << "<="; break;
+    case MCBinaryExpr::Mod:  OS <<  '%'; break;
+    case MCBinaryExpr::Mul:  OS <<  '*'; break;
+    case MCBinaryExpr::NE:   OS << "!="; break;
+    case MCBinaryExpr::Or:   OS <<  '|'; break;
+    case MCBinaryExpr::Shl:  OS << "<<"; break;
+    case MCBinaryExpr::Shr:  OS << ">>"; break;
+    case MCBinaryExpr::Sub:  OS <<  '-'; break;
+    case MCBinaryExpr::Xor:  OS <<  '^'; break;
+    }
+
+    // Only print parens around the LHS if it is non-trivial.
+    if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) {
+      OS << *BE.getRHS();
+    } else {
+      OS << '(' << *BE.getRHS() << ')';
+    }
+    return;
+  }
+  }
+
+  assert(0 && "Invalid expression kind!");
+}
+
+void MCExpr::dump() const {
+  print(dbgs());
+  dbgs() << '\n';
+}
+
+/* *** */
+
+const MCBinaryExpr *MCBinaryExpr::Create(Opcode Opc, const MCExpr *LHS,
+                                         const MCExpr *RHS, MCContext &Ctx) {
+  return new (Ctx) MCBinaryExpr(Opc, LHS, RHS);
+}
+
+const MCUnaryExpr *MCUnaryExpr::Create(Opcode Opc, const MCExpr *Expr,
+                                       MCContext &Ctx) {
+  return new (Ctx) MCUnaryExpr(Opc, Expr);
+}
+
+const MCConstantExpr *MCConstantExpr::Create(int64_t Value, MCContext &Ctx) {
+  return new (Ctx) MCConstantExpr(Value);
+}
+
+/* *** */
+
+const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym,
+                                               VariantKind Kind,
+                                               MCContext &Ctx) {
+  return new (Ctx) MCSymbolRefExpr(Sym, Kind);
+}
+
+const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, VariantKind Kind,
+                                               MCContext &Ctx) {
+  return Create(Ctx.GetOrCreateSymbol(Name), Kind, Ctx);
+}
+
+StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
+  switch (Kind) {
+  default:
+  case VK_Invalid: return "<<invalid>>";
+  case VK_None: return "<<none>>";
+
+  case VK_GOT: return "GOT";
+  case VK_GOTOFF: return "GOTOFF";
+  case VK_GOTPCREL: return "GOTPCREL";
+  case VK_GOTTPOFF: return "GOTTPOFF";
+  case VK_INDNTPOFF: return "INDNTPOFF";
+  case VK_NTPOFF: return "NTPOFF";
+  case VK_GOTNTPOFF: return "GOTNTPOFF";
+  case VK_PLT: return "PLT";
+  case VK_TLSGD: return "TLSGD";
+  case VK_TLSLD: return "TLSLD";
+  case VK_TLSLDM: return "TLSLDM";
+  case VK_TPOFF: return "TPOFF";
+  case VK_DTPOFF: return "DTPOFF";
+  case VK_TLVP: return "TLVP";
+  case VK_ARM_PLT: return "(PLT)";
+  case VK_ARM_GOT: return "(GOT)";
+  case VK_ARM_GOTOFF: return "(GOTOFF)";
+  case VK_ARM_TPOFF: return "(tpoff)";
+  case VK_ARM_GOTTPOFF: return "(gottpoff)";
+  case VK_ARM_TLSGD: return "(tlsgd)";
+  case VK_PPC_TOC: return "toc";
+  case VK_PPC_HA16: return "ha16";
+  case VK_PPC_LO16: return "lo16";
+  }
+}
+
+MCSymbolRefExpr::VariantKind
+MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
+  return StringSwitch<VariantKind>(Name)
+    .Case("GOT", VK_GOT)
+    .Case("got", VK_GOT)
+    .Case("GOTOFF", VK_GOTOFF)
+    .Case("gotoff", VK_GOTOFF)
+    .Case("GOTPCREL", VK_GOTPCREL)
+    .Case("gotpcrel", VK_GOTPCREL)
+    .Case("GOTTPOFF", VK_GOTTPOFF)
+    .Case("gottpoff", VK_GOTTPOFF)
+    .Case("INDNTPOFF", VK_INDNTPOFF)
+    .Case("indntpoff", VK_INDNTPOFF)
+    .Case("NTPOFF", VK_NTPOFF)
+    .Case("ntpoff", VK_NTPOFF)
+    .Case("GOTNTPOFF", VK_GOTNTPOFF)
+    .Case("gotntpoff", VK_GOTNTPOFF)
+    .Case("PLT", VK_PLT)
+    .Case("plt", VK_PLT)
+    .Case("TLSGD", VK_TLSGD)
+    .Case("tlsgd", VK_TLSGD)
+    .Case("TLSLD", VK_TLSLD)
+    .Case("tlsld", VK_TLSLD)
+    .Case("TLSLDM", VK_TLSLDM)
+    .Case("tlsldm", VK_TLSLDM)
+    .Case("TPOFF", VK_TPOFF)
+    .Case("tpoff", VK_TPOFF)
+    .Case("DTPOFF", VK_DTPOFF)
+    .Case("dtpoff", VK_DTPOFF)
+    .Case("TLVP", VK_TLVP)
+    .Case("tlvp", VK_TLVP)
+    .Default(VK_Invalid);
+}
+
+/* *** */
+
+void MCTargetExpr::Anchor() {}
+
+/* *** */
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res) const {
+  return EvaluateAsAbsolute(Res, 0, 0, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
+                                const MCAsmLayout &Layout) const {
+  return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res,
+                                const MCAsmLayout &Layout,
+                                const SectionAddrMap &Addrs) const {
+  return EvaluateAsAbsolute(Res, &Layout.getAssembler(), &Layout, &Addrs);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler &Asm) const {
+  return EvaluateAsAbsolute(Res, &Asm, 0, 0);
+}
+
+bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAssembler *Asm,
+                                const MCAsmLayout *Layout,
+                                const SectionAddrMap *Addrs) const {
+  MCValue Value;
+
+  // Fast path constants.
+  if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(this)) {
+    Res = CE->getValue();
+    return true;
+  }
+
+  // FIXME: The use if InSet = Addrs is a hack. Setting InSet causes us
+  // absolutize differences across sections and that is what the MachO writer
+  // uses Addrs for.
+  bool IsRelocatable =
+    EvaluateAsRelocatableImpl(Value, Asm, Layout, Addrs, /*InSet*/ Addrs);
+
+  // Record the current value.
+  Res = Value.getConstant();
+
+  return IsRelocatable && Value.isAbsolute();
+}
+
+/// \brief Helper method for \see EvaluateSymbolAdd().
+static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm,
+                                                const MCAsmLayout *Layout,
+                                                const SectionAddrMap *Addrs,
+                                                bool InSet,
+                                                const MCSymbolRefExpr *&A,
+                                                const MCSymbolRefExpr *&B,
+                                                int64_t &Addend) {
+  if (!A || !B)
+    return;
+
+  const MCSymbol &SA = A->getSymbol();
+  const MCSymbol &SB = B->getSymbol();
+
+  if (SA.isUndefined() || SB.isUndefined())
+    return;
+
+  if (!Asm->getWriter().IsSymbolRefDifferenceFullyResolved(*Asm, A, B, InSet))
+    return;
+
+  MCSymbolData &AD = Asm->getSymbolData(SA);
+  MCSymbolData &BD = Asm->getSymbolData(SB);
+
+  if (AD.getFragment() == BD.getFragment()) {
+    Addend += (AD.getOffset() - BD.getOffset());
+
+    // Clear the symbol expr pointers to indicate we have folded these
+    // operands.
+    A = B = 0;
+    return;
+  }
+
+  if (!Layout)
+    return;
+
+  const MCSectionData &SecA = *AD.getFragment()->getParent();
+  const MCSectionData &SecB = *BD.getFragment()->getParent();
+
+  if ((&SecA != &SecB) && !Addrs)
+    return;
+
+  // Eagerly evaluate.
+  Addend += (Layout->getSymbolOffset(&Asm->getSymbolData(A->getSymbol())) -
+             Layout->getSymbolOffset(&Asm->getSymbolData(B->getSymbol())));
+  if (Addrs && (&SecA != &SecB))
+    Addend += (Addrs->lookup(&SecA) - Addrs->lookup(&SecB));
+
+  // Clear the symbol expr pointers to indicate we have folded these
+  // operands.
+  A = B = 0;
+}
+
+/// \brief Evaluate the result of an add between (conceptually) two MCValues.
+///
+/// This routine conceptually attempts to construct an MCValue:
+///   Result = (Result_A - Result_B + Result_Cst)
+/// from two MCValue's LHS and RHS where
+///   Result = LHS + RHS
+/// and
+///   Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
+///
+/// This routine attempts to aggresively fold the operands such that the result
+/// is representable in an MCValue, but may not always succeed.
+///
+/// \returns True on success, false if the result is not representable in an
+/// MCValue.
+
+/// NOTE: It is really important to have both the Asm and Layout arguments.
+/// They might look redundant, but this function can be used before layout
+/// is done (see the object streamer for example) and having the Asm argument
+/// lets us avoid relaxations early.
+static bool EvaluateSymbolicAdd(const MCAssembler *Asm,
+                                const MCAsmLayout *Layout,
+                                const SectionAddrMap *Addrs,
+                                bool InSet,
+                                const MCValue &LHS,const MCSymbolRefExpr *RHS_A,
+                                const MCSymbolRefExpr *RHS_B, int64_t RHS_Cst,
+                                MCValue &Res) {
+  // FIXME: This routine (and other evaluation parts) are *incredibly* sloppy
+  // about dealing with modifiers. This will ultimately bite us, one day.
+  const MCSymbolRefExpr *LHS_A = LHS.getSymA();
+  const MCSymbolRefExpr *LHS_B = LHS.getSymB();
+  int64_t LHS_Cst = LHS.getConstant();
+
+  // Fold the result constant immediately.
+  int64_t Result_Cst = LHS_Cst + RHS_Cst;
+
+  assert((!Layout || Asm) &&
+         "Must have an assembler object if layout is given!");
+
+  // If we have a layout, we can fold resolved differences.
+  if (Asm) {
+    // First, fold out any differences which are fully resolved. By
+    // reassociating terms in
+    //   Result = (LHS_A - LHS_B + LHS_Cst) + (RHS_A - RHS_B + RHS_Cst).
+    // we have the four possible differences:
+    //   (LHS_A - LHS_B),
+    //   (LHS_A - RHS_B),
+    //   (RHS_A - LHS_B),
+    //   (RHS_A - RHS_B).
+    // Since we are attempting to be as aggresive as possible about folding, we
+    // attempt to evaluate each possible alternative.
+    AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, LHS_B,
+                                        Result_Cst);
+    AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, RHS_B,
+                                        Result_Cst);
+    AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, RHS_A, LHS_B,
+                                        Result_Cst);
+    AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, RHS_A, RHS_B,
+                                        Result_Cst);
+  }
+
+  // We can't represent the addition or subtraction of two symbols.
+  if ((LHS_A && RHS_A) || (LHS_B && RHS_B))
+    return false;
+
+  // At this point, we have at most one additive symbol and one subtractive
+  // symbol -- find them.
+  const MCSymbolRefExpr *A = LHS_A ? LHS_A : RHS_A;
+  const MCSymbolRefExpr *B = LHS_B ? LHS_B : RHS_B;
+
+  // If we have a negated symbol, then we must have also have a non-negated
+  // symbol in order to encode the expression.
+  if (B && !A)
+    return false;
+
+  Res = MCValue::get(A, B, Result_Cst);
+  return true;
+}
+
+bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
+                                   const MCAsmLayout &Layout) const {
+  return EvaluateAsRelocatableImpl(Res, &Layout.getAssembler(), &Layout,
+                                   0, false);
+}
+
+bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+                                       const MCAssembler *Asm,
+                                       const MCAsmLayout *Layout,
+                                       const SectionAddrMap *Addrs,
+                                       bool InSet) const {
+  ++stats::MCExprEvaluate;
+
+  switch (getKind()) {
+  case Target:
+    return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res, Layout);
+
+  case Constant:
+    Res = MCValue::get(cast<MCConstantExpr>(this)->getValue());
+    return true;
+
+  case SymbolRef: {
+    const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this);
+    const MCSymbol &Sym = SRE->getSymbol();
+
+    // Evaluate recursively if this is a variable.
+    if (Sym.isVariable() && SRE->getKind() == MCSymbolRefExpr::VK_None) {
+      bool Ret = Sym.getVariableValue()->EvaluateAsRelocatableImpl(Res, Asm,
+                                                                   Layout,
+                                                                   Addrs,
+                                                                   true);
+      // If we failed to simplify this to a constant, let the target
+      // handle it.
+      if (Ret && !Res.getSymA() && !Res.getSymB())
+        return true;
+    }
+
+    Res = MCValue::get(SRE, 0, 0);
+    return true;
+  }
+
+  case Unary: {
+    const MCUnaryExpr *AUE = cast<MCUnaryExpr>(this);
+    MCValue Value;
+
+    if (!AUE->getSubExpr()->EvaluateAsRelocatableImpl(Value, Asm, Layout,
+                                                      Addrs, InSet))
+      return false;
+
+    switch (AUE->getOpcode()) {
+    case MCUnaryExpr::LNot:
+      if (!Value.isAbsolute())
+        return false;
+      Res = MCValue::get(!Value.getConstant());
+      break;
+    case MCUnaryExpr::Minus:
+      /// -(a - b + const) ==> (b - a - const)
+      if (Value.getSymA() && !Value.getSymB())
+        return false;
+      Res = MCValue::get(Value.getSymB(), Value.getSymA(),
+                         -Value.getConstant());
+      break;
+    case MCUnaryExpr::Not:
+      if (!Value.isAbsolute())
+        return false;
+      Res = MCValue::get(~Value.getConstant());
+      break;
+    case MCUnaryExpr::Plus:
+      Res = Value;
+      break;
+    }
+
+    return true;
+  }
+
+  case Binary: {
+    const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this);
+    MCValue LHSValue, RHSValue;
+
+    if (!ABE->getLHS()->EvaluateAsRelocatableImpl(LHSValue, Asm, Layout,
+                                                  Addrs, InSet) ||
+        !ABE->getRHS()->EvaluateAsRelocatableImpl(RHSValue, Asm, Layout,
+                                                  Addrs, InSet))
+      return false;
+
+    // We only support a few operations on non-constant expressions, handle
+    // those first.
+    if (!LHSValue.isAbsolute() || !RHSValue.isAbsolute()) {
+      switch (ABE->getOpcode()) {
+      default:
+        return false;
+      case MCBinaryExpr::Sub:
+        // Negate RHS and add.
+        return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
+                                   RHSValue.getSymB(), RHSValue.getSymA(),
+                                   -RHSValue.getConstant(),
+                                   Res);
+
+      case MCBinaryExpr::Add:
+        return EvaluateSymbolicAdd(Asm, Layout, Addrs, InSet, LHSValue,
+                                   RHSValue.getSymA(), RHSValue.getSymB(),
+                                   RHSValue.getConstant(),
+                                   Res);
+      }
+    }
+
+    // FIXME: We need target hooks for the evaluation. It may be limited in
+    // width, and gas defines the result of comparisons and right shifts
+    // differently from Apple as.
+    int64_t LHS = LHSValue.getConstant(), RHS = RHSValue.getConstant();
+    int64_t Result = 0;
+    switch (ABE->getOpcode()) {
+    case MCBinaryExpr::Add:  Result = LHS + RHS; break;
+    case MCBinaryExpr::And:  Result = LHS & RHS; break;
+    case MCBinaryExpr::Div:  Result = LHS / RHS; break;
+    case MCBinaryExpr::EQ:   Result = LHS == RHS; break;
+    case MCBinaryExpr::GT:   Result = LHS > RHS; break;
+    case MCBinaryExpr::GTE:  Result = LHS >= RHS; break;
+    case MCBinaryExpr::LAnd: Result = LHS && RHS; break;
+    case MCBinaryExpr::LOr:  Result = LHS || RHS; break;
+    case MCBinaryExpr::LT:   Result = LHS < RHS; break;
+    case MCBinaryExpr::LTE:  Result = LHS <= RHS; break;
+    case MCBinaryExpr::Mod:  Result = LHS % RHS; break;
+    case MCBinaryExpr::Mul:  Result = LHS * RHS; break;
+    case MCBinaryExpr::NE:   Result = LHS != RHS; break;
+    case MCBinaryExpr::Or:   Result = LHS | RHS; break;
+    case MCBinaryExpr::Shl:  Result = LHS << RHS; break;
+    case MCBinaryExpr::Shr:  Result = LHS >> RHS; break;
+    case MCBinaryExpr::Sub:  Result = LHS - RHS; break;
+    case MCBinaryExpr::Xor:  Result = LHS ^ RHS; break;
+    }
+
+    Res = MCValue::get(Result);
+    return true;
+  }
+  }
+
+  assert(0 && "Invalid assembly expression kind!");
+  return false;
+}
diff --git a/final/lib/MC/MCInst.cpp b/final/lib/MC/MCInst.cpp
new file mode 100644
index 00000000000..4cb628b395c
--- /dev/null
+++ b/final/lib/MC/MCInst.cpp
@@ -0,0 +1,66 @@
+//===- lib/MC/MCInst.cpp - MCInst implementation --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+void MCOperand::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+  OS << "<MCOperand ";
+  if (!isValid())
+    OS << "INVALID";
+  else if (isReg())
+    OS << "Reg:" << getReg();
+  else if (isImm())
+    OS << "Imm:" << getImm();
+  else if (isExpr()) {
+    OS << "Expr:(" << *getExpr() << ")";
+  } else
+    OS << "UNDEFINED";
+  OS << ">";
+}
+
+void MCOperand::dump() const {
+  print(dbgs(), 0);
+  dbgs() << "\n";
+}
+
+void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+  OS << "<MCInst " << getOpcode();
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    OS << " ";
+    getOperand(i).print(OS, MAI);
+  }
+  OS << ">";
+}
+
+void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI,
+                         const MCInstPrinter *Printer,
+                         StringRef Separator) const {
+  OS << "<MCInst #" << getOpcode();
+
+  // Show the instruction opcode name if we have access to a printer.
+  if (Printer)
+    OS << ' ' << Printer->getOpcodeName(getOpcode());
+
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    OS << Separator;
+    getOperand(i).print(OS, MAI);
+  }
+  OS << ">";
+}
+
+void MCInst::dump() const {
+  print(dbgs(), 0);
+  dbgs() << "\n";
+}
diff --git a/final/lib/MC/MCInstPrinter.cpp b/final/lib/MC/MCInstPrinter.cpp
new file mode 100644
index 00000000000..212b85eb1fe
--- /dev/null
+++ b/final/lib/MC/MCInstPrinter.cpp
@@ -0,0 +1,26 @@
+//===-- MCInstPrinter.cpp - Convert an MCInst to target assembly syntax ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/ADT/StringRef.h"
+using namespace llvm;
+
+MCInstPrinter::~MCInstPrinter() {
+}
+
+/// getOpcodeName - Return the name of the specified opcode enum (e.g.
+/// "MOV32ri") or empty if we can't resolve it.
+StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return "";
+}
+
+StringRef MCInstPrinter::getRegName(unsigned RegNo) const {
+  assert(0 && "Target should implement this");
+  return "";
+}
diff --git a/final/lib/MC/MCLabel.cpp b/final/lib/MC/MCLabel.cpp
new file mode 100644
index 00000000000..9c0fc92e6c0
--- /dev/null
+++ b/final/lib/MC/MCLabel.cpp
@@ -0,0 +1,21 @@
+//===- lib/MC/MCLabel.cpp - MCLabel implementation ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCLabel.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+void MCLabel::print(raw_ostream &OS) const {
+  OS << '"' << getInstance() << '"';
+}
+
+void MCLabel::dump() const {
+  print(dbgs());
+}
diff --git a/final/lib/MC/MCLoggingStreamer.cpp b/final/lib/MC/MCLoggingStreamer.cpp
new file mode 100644
index 00000000000..012c7f62f8a
--- /dev/null
+++ b/final/lib/MC/MCLoggingStreamer.cpp
@@ -0,0 +1,248 @@
+//===- lib/MC/MCLoggingStreamer.cpp - API Logging Streamer ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+class MCLoggingStreamer : public MCStreamer {
+  llvm::OwningPtr<MCStreamer> Child;
+  
+  raw_ostream &OS;
+
+public:
+  MCLoggingStreamer(MCStreamer *_Child, raw_ostream &_OS)
+    : MCStreamer(_Child->getContext()), Child(_Child), OS(_OS) {}
+
+  void LogCall(const char *Function) {
+    OS << Function << "\n";
+  }
+
+  void LogCall(const char *Function, const Twine &Message) {
+    OS << Function << ": " << Message << "\n";
+  }
+
+  virtual bool isVerboseAsm() const { return Child->isVerboseAsm(); }
+  
+  virtual bool hasRawTextSupport() const { return Child->hasRawTextSupport(); }
+
+  virtual raw_ostream &GetCommentOS() { return Child->GetCommentOS(); }
+
+  virtual void AddComment(const Twine &T) {
+    LogCall("AddComment", T);
+    return Child->AddComment(T);
+  }
+
+  virtual void AddBlankLine() {
+    LogCall("AddBlankLine");
+    return Child->AddBlankLine();
+  }
+
+  virtual void ChangeSection(const MCSection *Section) {
+    LogCall("ChangeSection");
+    return Child->ChangeSection(Section);
+  }
+
+  virtual void InitSections() {
+    LogCall("InitSections");
+    return Child->InitSections();
+  }
+
+  virtual void EmitLabel(MCSymbol *Symbol) {
+    LogCall("EmitLabel");
+    return Child->EmitLabel(Symbol);
+  }
+
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
+    LogCall("EmitAssemblerFlag");
+    return Child->EmitAssemblerFlag(Flag);
+  }
+
+  virtual void EmitThumbFunc(MCSymbol *Func) {
+    LogCall("EmitThumbFunc");
+    return Child->EmitThumbFunc(Func);
+  }
+
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+    LogCall("EmitAssignment");
+    return Child->EmitAssignment(Symbol, Value);
+  }
+
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
+    LogCall("EmitWeakReference");
+    return Child->EmitWeakReference(Alias, Symbol);
+  }
+
+  virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                        const MCSymbol *LastLabel,
+                                        const MCSymbol *Label) {
+    LogCall("EmitDwarfAdvanceLineAddr");
+    return Child->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label);
+  }
+
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
+    LogCall("EmitSymbolAttribute");
+    return Child->EmitSymbolAttribute(Symbol, Attribute);
+  }
+
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+    LogCall("EmitSymbolDesc");
+    return Child->EmitSymbolDesc(Symbol, DescValue);
+  }
+
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+    LogCall("BeginCOFFSymbolDef");
+    return Child->BeginCOFFSymbolDef(Symbol);
+  }
+
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+    LogCall("EmitCOFFSymbolStorageClass");
+    return Child->EmitCOFFSymbolStorageClass(StorageClass);
+  }
+
+  virtual void EmitCOFFSymbolType(int Type) {
+    LogCall("EmitCOFFSymbolType");
+    return Child->EmitCOFFSymbolType(Type);
+  }
+
+  virtual void EndCOFFSymbolDef() {
+    LogCall("EndCOFFSymbolDef");
+    return Child->EndCOFFSymbolDef();
+  }
+
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+    LogCall("EmitELFSize");
+    return Child->EmitELFSize(Symbol, Value);
+  }
+
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment) {
+    LogCall("EmitCommonSymbol");
+    return Child->EmitCommonSymbol(Symbol, Size, ByteAlignment);
+  }
+
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+    LogCall("EmitLocalCommonSymbol");
+    return Child->EmitLocalCommonSymbol(Symbol, Size);
+  }
+  
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0) {
+    LogCall("EmitZerofill");
+    return Child->EmitZerofill(Section, Symbol, Size, ByteAlignment);
+  }
+
+  virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
+                               uint64_t Size, unsigned ByteAlignment = 0) {
+    LogCall("EmitTBSSSymbol");
+    return Child->EmitTBSSSymbol(Section, Symbol, Size, ByteAlignment);
+  }
+
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+    LogCall("EmitBytes");
+    return Child->EmitBytes(Data, AddrSpace);
+  }
+
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             bool isPCRel, unsigned AddrSpace){
+    LogCall("EmitValue");
+    return Child->EmitValueImpl(Value, Size, isPCRel, AddrSpace);
+  }
+
+  virtual void EmitULEB128Value(const MCExpr *Value,
+                                unsigned AddrSpace = 0) {
+    LogCall("EmitULEB128Value");
+    return Child->EmitULEB128Value(Value, AddrSpace);
+  }
+
+  virtual void EmitSLEB128Value(const MCExpr *Value,
+                                unsigned AddrSpace = 0) {
+    LogCall("EmitSLEB128Value");
+    return Child->EmitSLEB128Value(Value, AddrSpace);
+  }
+
+  virtual void EmitGPRel32Value(const MCExpr *Value) {
+    LogCall("EmitGPRel32Value");
+    return Child->EmitGPRel32Value(Value);
+  }
+
+  virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+                        unsigned AddrSpace) {
+    LogCall("EmitFill");
+    return Child->EmitFill(NumBytes, FillValue, AddrSpace);
+  }
+
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0) {
+    LogCall("EmitValueToAlignment");
+    return Child->EmitValueToAlignment(ByteAlignment, Value,
+                                       ValueSize, MaxBytesToEmit);
+  }
+
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0) {
+    LogCall("EmitCodeAlignment");
+    return Child->EmitCodeAlignment(ByteAlignment, MaxBytesToEmit);
+  }
+
+  virtual void EmitValueToOffset(const MCExpr *Offset,
+                                 unsigned char Value = 0) {
+    LogCall("EmitValueToOffset");
+    return Child->EmitValueToOffset(Offset, Value);
+  }
+
+  virtual void EmitFileDirective(StringRef Filename) {
+    LogCall("EmitFileDirective", "FileName:" + Filename);
+    return Child->EmitFileDirective(Filename);
+  }
+
+  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+    LogCall("EmitDwarfFileDirective",
+            "FileNo:" + Twine(FileNo) + " Filename:" + Filename);
+    return Child->EmitDwarfFileDirective(FileNo, Filename);
+  }
+
+  virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                     unsigned Column, unsigned Flags,
+                                     unsigned Isa, unsigned Discriminator) {
+    LogCall("EmitDwarfLocDirective",
+            "FileNo:" + Twine(FileNo) + " Line:" + Twine(Line) +
+            " Column:" + Twine(Column) + " Flags:" + Twine(Flags) +
+            " Isa:" + Twine(Isa) + " Discriminator:" + Twine(Discriminator));
+            return Child->EmitDwarfLocDirective(FileNo, Line, Column, Flags,
+                                                Isa, Discriminator);
+  }
+
+  virtual void EmitInstruction(const MCInst &Inst) {
+    LogCall("EmitInstruction");
+    return Child->EmitInstruction(Inst);
+  }
+
+  virtual void EmitRawText(StringRef String) {
+    LogCall("EmitRawText", "\"" + String + "\"");
+    return Child->EmitRawText(String);
+  }
+
+  virtual void Finish() {
+    LogCall("Finish");
+    return Child->Finish();
+  }
+
+};
+
+} // end anonymous namespace.
+
+MCStreamer *llvm::createLoggingStreamer(MCStreamer *Child, raw_ostream &OS) {
+  return new MCLoggingStreamer(Child, OS);
+}
diff --git a/final/lib/MC/MCMachOStreamer.cpp b/final/lib/MC/MCMachOStreamer.cpp
new file mode 100644
index 00000000000..d1f9f5cd568
--- /dev/null
+++ b/final/lib/MC/MCMachOStreamer.cpp
@@ -0,0 +1,405 @@
+//===- lib/MC/MCMachOStreamer.cpp - Mach-O Object Output ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
+
+using namespace llvm;
+
+namespace {
+
+class MCMachOStreamer : public MCObjectStreamer {
+private:
+  virtual void EmitInstToData(const MCInst &Inst);
+
+public:
+  MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                  raw_ostream &OS, MCCodeEmitter *Emitter)
+    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void InitSections();
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+    assert(0 && "macho doesn't support this directive");
+  }
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+    assert(0 && "macho doesn't support this directive");
+  }
+  virtual void EmitCOFFSymbolType(int Type) {
+    assert(0 && "macho doesn't support this directive");
+  }
+  virtual void EndCOFFSymbolDef() {
+    assert(0 && "macho doesn't support this directive");
+  }
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+    assert(0 && "macho doesn't support this directive");
+  }
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+    assert(0 && "macho doesn't support this directive");
+  }
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0);
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0);
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitFileDirective(StringRef Filename) {
+    // FIXME: Just ignore the .file; it isn't important enough to fail the
+    // entire assembly.
+
+    //report_fatal_error("unsupported directive: '.file'");
+  }
+
+  virtual void Finish();
+
+  /// @}
+};
+
+} // end anonymous namespace.
+
+void MCMachOStreamer::InitSections() {
+  SwitchSection(getContext().getMachOSection("__TEXT", "__text",
+                                    MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                    0, SectionKind::getText()));
+
+}
+
+void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+  // isSymbolLinkerVisible uses the section.
+  Symbol->setSection(*getCurrentSection());
+  // We have to create a new fragment if this is an atom defining symbol,
+  // fragments cannot span atoms.
+  if (getAssembler().isSymbolLinkerVisible(*Symbol))
+    new MCDataFragment(getCurrentSectionData());
+
+  MCObjectStreamer::EmitLabel(Symbol);
+
+  MCSymbolData &SD = getAssembler().getSymbolData(*Symbol);
+  // This causes the reference type flag to be cleared. Darwin 'as' was "trying"
+  // to clear the weak reference and weak definition bits too, but the
+  // implementation was buggy. For now we just try to match 'as', for
+  // diffability.
+  //
+  // FIXME: Cleanup this code, these bits should be emitted based on semantic
+  // properties, not on the order of definition, etc.
+  SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask);
+}
+
+void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+  // Let the target do whatever target specific stuff it needs to do.
+  getAssembler().getBackend().HandleAssemblerFlag(Flag);
+  // Do any generic stuff we need to do.
+  switch (Flag) {
+  case MCAF_SyntaxUnified: return; // no-op here.
+  case MCAF_Code16: return; // no-op here.
+  case MCAF_Code32: return; // no-op here.
+  case MCAF_SubsectionsViaSymbols:
+    getAssembler().setSubsectionsViaSymbols(true);
+    return;
+  default:
+    llvm_unreachable("invalid assembler flag!");
+  }
+}
+
+void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) {
+  // FIXME: Flag the function ISA as thumb with DW_AT_APPLE_isa.
+
+  // Remember that the function is a thumb function. Fixup and relocation
+  // values will need adjusted.
+  getAssembler().setIsThumbFunc(Symbol);
+
+  // Mark the thumb bit on the symbol.
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+  SD.setFlags(SD.getFlags() | SF_ThumbFunc);
+}
+
+void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  // FIXME: Lift context changes into super class.
+  getAssembler().getOrCreateSymbolData(*Symbol);
+  Symbol->setVariableValue(AddValueSymbols(Value));
+}
+
+void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+                                          MCSymbolAttr Attribute) {
+  // Indirect symbols are handled differently, to match how 'as' handles
+  // them. This makes writing matching .o files easier.
+  if (Attribute == MCSA_IndirectSymbol) {
+    // Note that we intentionally cannot use the symbol data here; this is
+    // important for matching the string table that 'as' generates.
+    IndirectSymbolData ISD;
+    ISD.Symbol = Symbol;
+    ISD.SectionData = getCurrentSectionData();
+    getAssembler().getIndirectSymbols().push_back(ISD);
+    return;
+  }
+
+  // Adding a symbol attribute always introduces the symbol, note that an
+  // important side effect of calling getOrCreateSymbolData here is to register
+  // the symbol with the assembler.
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  // The implementation of symbol attributes is designed to match 'as', but it
+  // leaves much to desired. It doesn't really make sense to arbitrarily add and
+  // remove flags, but 'as' allows this (in particular, see .desc).
+  //
+  // In the future it might be worth trying to make these operations more well
+  // defined.
+  switch (Attribute) {
+  case MCSA_Invalid:
+  case MCSA_ELF_TypeFunction:
+  case MCSA_ELF_TypeIndFunction:
+  case MCSA_ELF_TypeObject:
+  case MCSA_ELF_TypeTLS:
+  case MCSA_ELF_TypeCommon:
+  case MCSA_ELF_TypeNoType:
+  case MCSA_ELF_TypeGnuUniqueObject:
+  case MCSA_IndirectSymbol:
+  case MCSA_Hidden:
+  case MCSA_Internal:
+  case MCSA_Protected:
+  case MCSA_Weak:
+  case MCSA_Local:
+    assert(0 && "Invalid symbol attribute for Mach-O!");
+    break;
+
+  case MCSA_Global:
+    SD.setExternal(true);
+    // This effectively clears the undefined lazy bit, in Darwin 'as', although
+    // it isn't very consistent because it implements this as part of symbol
+    // lookup.
+    //
+    // FIXME: Cleanup this code, these bits should be emitted based on semantic
+    // properties, not on the order of definition, etc.
+    SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeUndefinedLazy);
+    break;
+
+  case MCSA_LazyReference:
+    // FIXME: This requires -dynamic.
+    SD.setFlags(SD.getFlags() | SF_NoDeadStrip);
+    if (Symbol->isUndefined())
+      SD.setFlags(SD.getFlags() | SF_ReferenceTypeUndefinedLazy);
+    break;
+
+    // Since .reference sets the no dead strip bit, it is equivalent to
+    // .no_dead_strip in practice.
+  case MCSA_Reference:
+  case MCSA_NoDeadStrip:
+    SD.setFlags(SD.getFlags() | SF_NoDeadStrip);
+    break;
+
+  case MCSA_SymbolResolver:
+    SD.setFlags(SD.getFlags() | SF_SymbolResolver);
+    break;
+
+  case MCSA_PrivateExtern:
+    SD.setExternal(true);
+    SD.setPrivateExtern(true);
+    break;
+
+  case MCSA_WeakReference:
+    // FIXME: This requires -dynamic.
+    if (Symbol->isUndefined())
+      SD.setFlags(SD.getFlags() | SF_WeakReference);
+    break;
+
+  case MCSA_WeakDefinition:
+    // FIXME: 'as' enforces that this is defined and global. The manual claims
+    // it has to be in a coalesced section, but this isn't enforced.
+    SD.setFlags(SD.getFlags() | SF_WeakDefinition);
+    break;
+
+  case MCSA_WeakDefAutoPrivate:
+    SD.setFlags(SD.getFlags() | SF_WeakDefinition | SF_WeakReference);
+    break;
+  }
+}
+
+void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+  // Encode the 'desc' value into the lowest implementation defined bits.
+  assert(DescValue == (DescValue & SF_DescFlagsMask) &&
+         "Invalid .desc value!");
+  getAssembler().getOrCreateSymbolData(*Symbol).setFlags(
+    DescValue & SF_DescFlagsMask);
+}
+
+void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                       unsigned ByteAlignment) {
+  // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself.
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+  SD.setExternal(true);
+  SD.setCommon(Size, ByteAlignment);
+}
+
+void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                                   unsigned Size, unsigned ByteAlignment) {
+  MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section);
+
+  // The symbol may not be present, which only creates the section.
+  if (!Symbol)
+    return;
+
+  // FIXME: Assert that this section has the zerofill type.
+
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  // Emit an align fragment if necessary.
+  if (ByteAlignment != 1)
+    new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectData);
+
+  MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
+  SD.setFragment(F);
+
+  Symbol->setSection(*Section);
+
+  // Update the maximum alignment on the zero fill section if necessary.
+  if (ByteAlignment > SectData.getAlignment())
+    SectData.setAlignment(ByteAlignment);
+}
+
+// This should always be called with the thread local bss section.  Like the
+// .zerofill directive this doesn't actually switch sections on us.
+void MCMachOStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                     uint64_t Size, unsigned ByteAlignment) {
+  EmitZerofill(Section, Symbol, Size, ByteAlignment);
+  return;
+}
+
+void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+                                           int64_t Value, unsigned ValueSize,
+                                           unsigned MaxBytesToEmit) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+                      getCurrentSectionData());
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+                                        unsigned MaxBytesToEmit) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+                                           getCurrentSectionData());
+  F->setEmitNops(true);
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCMachOStreamer::EmitInstToData(const MCInst &Inst) {
+  MCDataFragment *DF = getOrCreateDataFragment();
+
+  SmallVector<MCFixup, 4> Fixups;
+  SmallString<256> Code;
+  raw_svector_ostream VecOS(Code);
+  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+  VecOS.flush();
+
+  // Add the fixups and data.
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+    Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+    DF->addFixup(Fixups[i]);
+  }
+  DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCMachOStreamer::Finish() {
+  // We have to set the fragment atom associations so we can relax properly for
+  // Mach-O.
+
+  // First, scan the symbol table to build a lookup table from fragments to
+  // defining symbols.
+  DenseMap<const MCFragment*, MCSymbolData*> DefiningSymbolMap;
+  for (MCAssembler::symbol_iterator it = getAssembler().symbol_begin(),
+         ie = getAssembler().symbol_end(); it != ie; ++it) {
+    if (getAssembler().isSymbolLinkerVisible(it->getSymbol()) &&
+        it->getFragment()) {
+      // An atom defining symbol should never be internal to a fragment.
+      assert(it->getOffset() == 0 && "Invalid offset in atom defining symbol!");
+      DefiningSymbolMap[it->getFragment()] = it;
+    }
+  }
+
+  // Set the fragment atom associations by tracking the last seen atom defining
+  // symbol.
+  for (MCAssembler::iterator it = getAssembler().begin(),
+         ie = getAssembler().end(); it != ie; ++it) {
+    MCSymbolData *CurrentAtom = 0;
+    for (MCSectionData::iterator it2 = it->begin(),
+           ie2 = it->end(); it2 != ie2; ++it2) {
+      if (MCSymbolData *SD = DefiningSymbolMap.lookup(it2))
+        CurrentAtom = SD;
+      it2->setAtom(CurrentAtom);
+    }
+  }
+
+  this->MCObjectStreamer::Finish();
+}
+
+MCStreamer *llvm::createMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                                      raw_ostream &OS, MCCodeEmitter *CE,
+                                      bool RelaxAll) {
+  MCMachOStreamer *S = new MCMachOStreamer(Context, TAB, OS, CE);
+  if (RelaxAll)
+    S->getAssembler().setRelaxAll(true);
+  return S;
+}
diff --git a/final/lib/MC/MCMachObjectTargetWriter.cpp b/final/lib/MC/MCMachObjectTargetWriter.cpp
new file mode 100644
index 00000000000..146cebf01a3
--- /dev/null
+++ b/final/lib/MC/MCMachObjectTargetWriter.cpp
@@ -0,0 +1,22 @@
+//===-- MCMachObjectTargetWriter.cpp - Mach-O Target Writer Subclass ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCMachObjectWriter.h"
+
+using namespace llvm;
+
+MCMachObjectTargetWriter::MCMachObjectTargetWriter(
+  bool Is64Bit_, uint32_t CPUType_, uint32_t CPUSubtype_,
+  bool UseAggressiveSymbolFolding_)
+  : Is64Bit(Is64Bit_), CPUType(CPUType_), CPUSubtype(CPUSubtype_),
+    UseAggressiveSymbolFolding(UseAggressiveSymbolFolding_) {
+}
+
+MCMachObjectTargetWriter::~MCMachObjectTargetWriter() {
+}
diff --git a/final/lib/MC/MCNullStreamer.cpp b/final/lib/MC/MCNullStreamer.cpp
new file mode 100644
index 00000000000..08ddf01d1a3
--- /dev/null
+++ b/final/lib/MC/MCNullStreamer.cpp
@@ -0,0 +1,104 @@
+//===- lib/MC/MCNullStreamer.cpp - Dummy Streamer Implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+
+using namespace llvm;
+
+namespace {
+
+  class MCNullStreamer : public MCStreamer {
+  public:
+    MCNullStreamer(MCContext &Context) : MCStreamer(Context) {}
+
+    /// @name MCStreamer Interface
+    /// @{
+
+    virtual void InitSections() {
+    }
+
+    virtual void ChangeSection(const MCSection *Section) {
+    }
+
+    virtual void EmitLabel(MCSymbol *Symbol) {
+      assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+      assert(getCurrentSection() && "Cannot emit before setting section!");
+      Symbol->setSection(*getCurrentSection());
+    }
+
+    virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+    virtual void EmitThumbFunc(MCSymbol *Func) {}
+
+    virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {}
+    virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol){}
+    virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                          const MCSymbol *LastLabel,
+                                          const MCSymbol *Label) {}
+
+    virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){}
+
+    virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+
+    virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+    virtual void EmitCOFFSymbolStorageClass(int StorageClass) {}
+    virtual void EmitCOFFSymbolType(int Type) {}
+    virtual void EndCOFFSymbolDef() {}
+
+    virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+    virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                  unsigned ByteAlignment) {}
+    virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {}
+
+    virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                              unsigned Size = 0, unsigned ByteAlignment = 0) {}
+    virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                uint64_t Size, unsigned ByteAlignment) {}
+    virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {}
+
+    virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                               bool isPCRel, unsigned AddrSpace) {}
+    virtual void EmitULEB128Value(const MCExpr *Value,
+                                  unsigned AddrSpace = 0) {}
+    virtual void EmitSLEB128Value(const MCExpr *Value,
+                                  unsigned AddrSpace = 0) {}
+    virtual void EmitGPRel32Value(const MCExpr *Value) {}
+    virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                      unsigned ValueSize = 1,
+                                      unsigned MaxBytesToEmit = 0) {}
+
+    virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                   unsigned MaxBytesToEmit = 0) {}
+
+    virtual void EmitValueToOffset(const MCExpr *Offset,
+                                   unsigned char Value = 0) {}
+    
+    virtual void EmitFileDirective(StringRef Filename) {}
+    virtual bool EmitDwarfFileDirective(unsigned FileNo,StringRef Filename) {
+      return false;
+    }
+    virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                       unsigned Column, unsigned Flags,
+                                       unsigned Isa, unsigned Discriminator) {}
+    virtual void EmitInstruction(const MCInst &Inst) {}
+
+    virtual void Finish() {}
+    
+    /// @}
+  };
+
+}
+    
+MCStreamer *llvm::createNullStreamer(MCContext &Context) {
+  return new MCNullStreamer(Context);
+}
diff --git a/final/lib/MC/MCObjectStreamer.cpp b/final/lib/MC/MCObjectStreamer.cpp
new file mode 100644
index 00000000000..e67d9b03a95
--- /dev/null
+++ b/final/lib/MC/MCObjectStreamer.cpp
@@ -0,0 +1,270 @@
+//===- lib/MC/MCObjectStreamer.cpp - Object File MCStreamer Interface -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCObjectStreamer.h"
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmInfo.h"
+using namespace llvm;
+
+MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                                   raw_ostream &OS, MCCodeEmitter *Emitter_)
+  : MCStreamer(Context),
+    Assembler(new MCAssembler(Context, TAB,
+                              *Emitter_, *TAB.createObjectWriter(OS),
+                              OS)),
+    CurSectionData(0)
+{
+}
+
+MCObjectStreamer::~MCObjectStreamer() {
+  delete &Assembler->getBackend();
+  delete &Assembler->getEmitter();
+  delete &Assembler->getWriter();
+  delete Assembler;
+}
+
+MCFragment *MCObjectStreamer::getCurrentFragment() const {
+  assert(getCurrentSectionData() && "No current section!");
+
+  if (!getCurrentSectionData()->empty())
+    return &getCurrentSectionData()->getFragmentList().back();
+
+  return 0;
+}
+
+MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const {
+  MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+  if (!F)
+    F = new MCDataFragment(getCurrentSectionData());
+  return F;
+}
+
+const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) {
+  switch (Value->getKind()) {
+  case MCExpr::Target:
+    cast<MCTargetExpr>(Value)->AddValueSymbols(Assembler);
+    break;
+
+  case MCExpr::Constant:
+    break;
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+    AddValueSymbols(BE->getLHS());
+    AddValueSymbols(BE->getRHS());
+    break;
+  }
+
+  case MCExpr::SymbolRef:
+    Assembler->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+    break;
+
+  case MCExpr::Unary:
+    AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr());
+    break;
+  }
+
+  return Value;
+}
+
+void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+                                     bool isPCRel, unsigned AddrSpace) {
+  assert(AddrSpace == 0 && "Address space must be 0!");
+  MCDataFragment *DF = getOrCreateDataFragment();
+
+  // Avoid fixups when possible.
+  int64_t AbsValue;
+  if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue, getAssembler())) {
+    EmitIntValue(AbsValue, Size, AddrSpace);
+    return;
+  }
+  DF->addFixup(MCFixup::Create(DF->getContents().size(),
+                               Value,
+                               MCFixup::getKindForSize(Size, isPCRel)));
+  DF->getContents().resize(DF->getContents().size() + Size, 0);
+}
+
+void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+  assert(getCurrentSection() && "Cannot emit before setting section!");
+
+  Symbol->setSection(*getCurrentSection());
+
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  // FIXME: This is wasteful, we don't necessarily need to create a data
+  // fragment. Instead, we should mark the symbol as pointing into the data
+  // fragment if it exists, otherwise we should just queue the label and set its
+  // fragment pointer when we emit the next fragment.
+  MCDataFragment *F = getOrCreateDataFragment();
+  assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+  SD.setFragment(F);
+  SD.setOffset(F->getContents().size());
+}
+
+void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value,
+                                        unsigned AddrSpace) {
+  int64_t IntValue;
+  if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
+    EmitULEB128IntValue(IntValue, AddrSpace);
+    return;
+  }
+  new MCLEBFragment(*Value, false, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value,
+                                        unsigned AddrSpace) {
+  int64_t IntValue;
+  if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) {
+    EmitSLEB128IntValue(IntValue, AddrSpace);
+    return;
+  }
+  new MCLEBFragment(*Value, true, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitWeakReference(MCSymbol *Alias,
+                                         const MCSymbol *Symbol) {
+  report_fatal_error("This file format doesn't support weak aliases.");
+}
+
+void MCObjectStreamer::ChangeSection(const MCSection *Section) {
+  assert(Section && "Cannot switch to a null section!");
+
+  CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
+}
+
+void MCObjectStreamer::EmitInstruction(const MCInst &Inst) {
+  // Scan for values.
+  for (unsigned i = Inst.getNumOperands(); i--; )
+    if (Inst.getOperand(i).isExpr())
+      AddValueSymbols(Inst.getOperand(i).getExpr());
+
+  getCurrentSectionData()->setHasInstructions(true);
+
+  // Now that a machine instruction has been assembled into this section, make
+  // a line entry for any .loc directive that has been seen.
+  MCLineEntry::Make(this, getCurrentSection());
+
+  // If this instruction doesn't need relaxation, just emit it as data.
+  if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) {
+    EmitInstToData(Inst);
+    return;
+  }
+
+  // Otherwise, if we are relaxing everything, relax the instruction as much as
+  // possible and emit it as data.
+  if (getAssembler().getRelaxAll()) {
+    MCInst Relaxed;
+    getAssembler().getBackend().RelaxInstruction(Inst, Relaxed);
+    while (getAssembler().getBackend().MayNeedRelaxation(Relaxed))
+      getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed);
+    EmitInstToData(Relaxed);
+    return;
+  }
+
+  // Otherwise emit to a separate fragment.
+  EmitInstToFragment(Inst);
+}
+
+void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) {
+  MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
+
+  raw_svector_ostream VecOS(IF->getCode());
+  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, IF->getFixups());
+}
+
+static const MCExpr *BuildSymbolDiff(MCContext &Context,
+                                     const MCSymbol *A, const MCSymbol *B) {
+  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+  const MCExpr *ARef =
+    MCSymbolRefExpr::Create(A, Variant, Context);
+  const MCExpr *BRef =
+    MCSymbolRefExpr::Create(B, Variant, Context);
+  const MCExpr *AddrDelta =
+    MCBinaryExpr::Create(MCBinaryExpr::Sub, ARef, BRef, Context);
+  return AddrDelta;
+}
+
+static const MCExpr *ForceExpAbs(MCObjectStreamer *Streamer,
+                                  MCContext &Context, const MCExpr* Expr) {
+ if (Context.getAsmInfo().hasAggressiveSymbolFolding())
+   return Expr;
+
+ MCSymbol *ABS = Context.CreateTempSymbol();
+ Streamer->EmitAssignment(ABS, Expr);
+ return MCSymbolRefExpr::Create(ABS, Context);
+}
+
+void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                                const MCSymbol *LastLabel,
+                                                const MCSymbol *Label) {
+  if (!LastLabel) {
+    int PointerSize = getContext().getTargetAsmInfo().getPointerSize();
+    EmitDwarfSetLineAddr(LineDelta, Label, PointerSize);
+    return;
+  }
+  const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+  int64_t Res;
+  if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) {
+    MCDwarfLineAddr::Emit(this, LineDelta, Res);
+    return;
+  }
+  AddrDelta = ForceExpAbs(this, getContext(), AddrDelta);
+  new MCDwarfLineAddrFragment(LineDelta, *AddrDelta, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
+                                                 const MCSymbol *Label) {
+  const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel);
+  int64_t Res;
+  if (AddrDelta->EvaluateAsAbsolute(Res, getAssembler())) {
+    MCDwarfFrameEmitter::EmitAdvanceLoc(*this, Res);
+    return;
+  }
+  AddrDelta = ForceExpAbs(this, getContext(), AddrDelta);
+  new MCDwarfCallFrameFragment(*AddrDelta, getCurrentSectionData());
+}
+
+void MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset,
+                                        unsigned char Value) {
+  int64_t Res;
+  if (Offset->EvaluateAsAbsolute(Res, getAssembler())) {
+    new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+    return;
+  }
+
+  MCSymbol *CurrentPos = getContext().CreateTempSymbol();
+  EmitLabel(CurrentPos);
+  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+  const MCExpr *Ref =
+    MCSymbolRefExpr::Create(CurrentPos, Variant, getContext());
+  const MCExpr *Delta =
+    MCBinaryExpr::Create(MCBinaryExpr::Sub, Offset, Ref, getContext());
+
+  if (!Delta->EvaluateAsAbsolute(Res, getAssembler()))
+    report_fatal_error("expected assembly-time absolute expression");
+  EmitFill(Res, Value, 0);
+}
+
+void MCObjectStreamer::Finish() {
+  // Dump out the dwarf file & directory tables and line tables.
+  if (getContext().hasDwarfFiles())
+    MCDwarfFileTable::Emit(this);
+
+  getAssembler().Finish();
+}
diff --git a/final/lib/MC/MCObjectWriter.cpp b/final/lib/MC/MCObjectWriter.cpp
new file mode 100644
index 00000000000..efe9f68ee22
--- /dev/null
+++ b/final/lib/MC/MCObjectWriter.cpp
@@ -0,0 +1,80 @@
+//===- lib/MC/MCObjectWriter.cpp - MCObjectWriter implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSymbol.h"
+
+using namespace llvm;
+
+MCObjectWriter::~MCObjectWriter() {
+}
+
+/// Utility function to encode a SLEB128 value.
+void MCObjectWriter::EncodeSLEB128(int64_t Value, raw_ostream &OS) {
+  bool More;
+  do {
+    uint8_t Byte = Value & 0x7f;
+    // NOTE: this assumes that this signed shift is an arithmetic right shift.
+    Value >>= 7;
+    More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) ||
+              ((Value == -1) && ((Byte & 0x40) != 0))));
+    if (More)
+      Byte |= 0x80; // Mark this byte that that more bytes will follow.
+    OS << char(Byte);
+  } while (More);
+}
+
+/// Utility function to encode a ULEB128 value.
+void MCObjectWriter::EncodeULEB128(uint64_t Value, raw_ostream &OS) {
+  do {
+    uint8_t Byte = Value & 0x7f;
+    Value >>= 7;
+    if (Value != 0)
+      Byte |= 0x80; // Mark this byte that that more bytes will follow.
+    OS << char(Byte);
+  } while (Value != 0);
+}
+
+bool
+MCObjectWriter::IsSymbolRefDifferenceFullyResolved(const MCAssembler &Asm,
+                                                   const MCSymbolRefExpr *A,
+                                                   const MCSymbolRefExpr *B,
+                                                   bool InSet) const {
+  // Modified symbol references cannot be resolved.
+  if (A->getKind() != MCSymbolRefExpr::VK_None ||
+      B->getKind() != MCSymbolRefExpr::VK_None)
+    return false;
+
+  const MCSymbol &SA = A->getSymbol();
+  const MCSymbol &SB = B->getSymbol();
+  if (SA.AliasedSymbol().isUndefined() || SB.AliasedSymbol().isUndefined())
+    return false;
+
+  const MCSymbolData &DataA = Asm.getSymbolData(SA);
+  const MCSymbolData &DataB = Asm.getSymbolData(SB);
+
+  return IsSymbolRefDifferenceFullyResolvedImpl(Asm, DataA,
+                                                *DataB.getFragment(),
+                                                InSet,
+                                                false);
+}
+
+bool
+MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                                      const MCSymbolData &DataA,
+                                                      const MCFragment &FB,
+                                                      bool InSet,
+                                                      bool IsPCRel) const {
+  const MCSection &SecA = DataA.getSymbol().AliasedSymbol().getSection();
+  const MCSection &SecB = FB.getParent()->getSection();
+  // On ELF and COFF  A - B is absolute if A and B are in the same section.
+  return &SecA == &SecB;
+}
diff --git a/final/lib/MC/MCParser/AsmLexer.cpp b/final/lib/MC/MCParser/AsmLexer.cpp
new file mode 100644
index 00000000000..89374d0c3fb
--- /dev/null
+++ b/final/lib/MC/MCParser/AsmLexer.cpp
@@ -0,0 +1,430 @@
+//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the lexer for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include <cctype>
+#include <cerrno>
+#include <cstdio>
+#include <cstdlib>
+using namespace llvm;
+
+AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI)  {
+  CurBuf = NULL;
+  CurPtr = NULL;
+}
+
+AsmLexer::~AsmLexer() {
+}
+
+void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) {
+  CurBuf = buf;
+
+  if (ptr)
+    CurPtr = ptr;
+  else
+    CurPtr = CurBuf->getBufferStart();
+
+  TokStart = 0;
+}
+
+/// ReturnError - Set the error to the specified string at the specified
+/// location.  This is defined to always return AsmToken::Error.
+AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
+  SetError(SMLoc::getFromPointer(Loc), Msg);
+
+  return AsmToken(AsmToken::Error, StringRef(Loc, 0));
+}
+
+int AsmLexer::getNextChar() {
+  char CurChar = *CurPtr++;
+  switch (CurChar) {
+  default:
+    return (unsigned char)CurChar;
+  case 0:
+    // A nul character in the stream is either the end of the current buffer or
+    // a random nul in the file.  Disambiguate that here.
+    if (CurPtr-1 != CurBuf->getBufferEnd())
+      return 0;  // Just whitespace.
+
+    // Otherwise, return end of file.
+    --CurPtr;  // Another call to lex will return EOF again.
+    return EOF;
+  }
+}
+
+/// LexFloatLiteral: [0-9]*[.][0-9]*([eE][+-]?[0-9]*)?
+///
+/// The leading integral digit sequence and dot should have already been
+/// consumed, some or all of the fractional digit sequence *can* have been
+/// consumed.
+AsmToken AsmLexer::LexFloatLiteral() {
+  // Skip the fractional digit sequence.
+  while (isdigit(*CurPtr))
+    ++CurPtr;
+
+  // Check for exponent; we intentionally accept a slighlty wider set of
+  // literals here and rely on the upstream client to reject invalid ones (e.g.,
+  // "1e+").
+  if (*CurPtr == 'e' || *CurPtr == 'E') {
+    ++CurPtr;
+    if (*CurPtr == '-' || *CurPtr == '+')
+      ++CurPtr;
+    while (isdigit(*CurPtr))
+      ++CurPtr;
+  }
+
+  return AsmToken(AsmToken::Real,
+                  StringRef(TokStart, CurPtr - TokStart));
+}
+
+/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+static bool IsIdentifierChar(char c) {
+  return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@';
+}
+AsmToken AsmLexer::LexIdentifier() {
+  // Check for floating point literals.
+  if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
+    // Disambiguate a .1243foo identifier from a floating literal.
+    while (isdigit(*CurPtr))
+      ++CurPtr;
+    if (*CurPtr == 'e' || *CurPtr == 'E' || !IsIdentifierChar(*CurPtr))
+      return LexFloatLiteral();
+  }
+
+  while (IsIdentifierChar(*CurPtr))
+    ++CurPtr;
+
+  // Handle . as a special case.
+  if (CurPtr == TokStart+1 && TokStart[0] == '.')
+    return AsmToken(AsmToken::Dot, StringRef(TokStart, 1));
+
+  return AsmToken(AsmToken::Identifier, StringRef(TokStart, CurPtr - TokStart));
+}
+
+/// LexSlash: Slash: /
+///           C-Style Comment: /* ... */
+AsmToken AsmLexer::LexSlash() {
+  switch (*CurPtr) {
+  case '*': break; // C style comment.
+  case '/': return ++CurPtr, LexLineComment();
+  default:  return AsmToken(AsmToken::Slash, StringRef(CurPtr-1, 1));
+  }
+
+  // C Style comment.
+  ++CurPtr;  // skip the star.
+  while (1) {
+    int CurChar = getNextChar();
+    switch (CurChar) {
+    case EOF:
+      return ReturnError(TokStart, "unterminated comment");
+    case '*':
+      // End of the comment?
+      if (CurPtr[0] != '/') break;
+
+      ++CurPtr;   // End the */.
+      return LexToken();
+    }
+  }
+}
+
+/// LexLineComment: Comment: #[^\n]*
+///                        : //[^\n]*
+AsmToken AsmLexer::LexLineComment() {
+  // FIXME: This is broken if we happen to a comment at the end of a file, which
+  // was .included, and which doesn't end with a newline.
+  int CurChar = getNextChar();
+  while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF)
+    CurChar = getNextChar();
+
+  if (CurChar == EOF)
+    return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0));
+  return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0));
+}
+
+static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
+  if (CurPtr[0] == 'L' && CurPtr[1] == 'L')
+    CurPtr += 2;
+  if (CurPtr[0] == 'U' && CurPtr[1] == 'L' && CurPtr[2] == 'L')
+    CurPtr += 3;
+}
+
+/// LexDigit: First character is [0-9].
+///   Local Label: [0-9][:]
+///   Forward/Backward Label: [0-9][fb]
+///   Binary integer: 0b[01]+
+///   Octal integer: 0[0-7]+
+///   Hex integer: 0x[0-9a-fA-F]+
+///   Decimal integer: [1-9][0-9]*
+AsmToken AsmLexer::LexDigit() {
+  // Decimal integer: [1-9][0-9]*
+  if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
+    while (isdigit(*CurPtr))
+      ++CurPtr;
+
+    // Check for floating point literals.
+    if (*CurPtr == '.' || *CurPtr == 'e') {
+      ++CurPtr;
+      return LexFloatLiteral();
+    }
+
+    StringRef Result(TokStart, CurPtr - TokStart);
+
+    long long Value;
+    if (Result.getAsInteger(10, Value)) {
+      // Allow positive values that are too large to fit into a signed 64-bit
+      // integer, but that do fit in an unsigned one, we just convert them over.
+      unsigned long long UValue;
+      if (Result.getAsInteger(10, UValue))
+        return ReturnError(TokStart, "invalid decimal number");
+      Value = (long long)UValue;
+    }
+
+    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+    // suffixes on integer literals.
+    SkipIgnoredIntegerSuffix(CurPtr);
+
+    return AsmToken(AsmToken::Integer, Result, Value);
+  }
+
+  if (*CurPtr == 'b') {
+    ++CurPtr;
+    // See if we actually have "0b" as part of something like "jmp 0b\n"
+    if (!isdigit(CurPtr[0])) {
+      --CurPtr;
+      StringRef Result(TokStart, CurPtr - TokStart);
+      return AsmToken(AsmToken::Integer, Result, 0);
+    }
+    const char *NumStart = CurPtr;
+    while (CurPtr[0] == '0' || CurPtr[0] == '1')
+      ++CurPtr;
+
+    // Requires at least one binary digit.
+    if (CurPtr == NumStart)
+      return ReturnError(TokStart, "Invalid binary number");
+
+    StringRef Result(TokStart, CurPtr - TokStart);
+
+    long long Value;
+    if (Result.substr(2).getAsInteger(2, Value))
+      return ReturnError(TokStart, "Invalid binary number");
+
+    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+    // suffixes on integer literals.
+    SkipIgnoredIntegerSuffix(CurPtr);
+
+    return AsmToken(AsmToken::Integer, Result, Value);
+  }
+
+  if (*CurPtr == 'x') {
+    ++CurPtr;
+    const char *NumStart = CurPtr;
+    while (isxdigit(CurPtr[0]))
+      ++CurPtr;
+
+    // Requires at least one hex digit.
+    if (CurPtr == NumStart)
+      return ReturnError(CurPtr-2, "Invalid hexadecimal number");
+
+    unsigned long long Result;
+    if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result))
+      return ReturnError(TokStart, "Invalid hexadecimal number");
+
+    // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+    // suffixes on integer literals.
+    SkipIgnoredIntegerSuffix(CurPtr);
+
+    return AsmToken(AsmToken::Integer, StringRef(TokStart, CurPtr - TokStart),
+                    (int64_t)Result);
+  }
+
+  // Must be an octal number, it starts with 0.
+  while (*CurPtr >= '0' && *CurPtr <= '7')
+    ++CurPtr;
+
+  StringRef Result(TokStart, CurPtr - TokStart);
+  long long Value;
+  if (Result.getAsInteger(8, Value))
+    return ReturnError(TokStart, "Invalid octal number");
+
+  // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
+  // suffixes on integer literals.
+  SkipIgnoredIntegerSuffix(CurPtr);
+
+  return AsmToken(AsmToken::Integer, Result, Value);
+}
+
+/// LexSingleQuote: Integer: 'b'
+AsmToken AsmLexer::LexSingleQuote() {
+  int CurChar = getNextChar();
+
+  if (CurChar == '\\')
+    CurChar = getNextChar();
+
+  if (CurChar == EOF)
+    return ReturnError(TokStart, "unterminated single quote");
+
+  CurChar = getNextChar();
+
+  if (CurChar != '\'')
+    return ReturnError(TokStart, "single quote way too long");
+
+  // The idea here being that 'c' is basically just an integral
+  // constant.
+  StringRef Res = StringRef(TokStart,CurPtr - TokStart);
+  long long Value;
+
+  if (Res.startswith("\'\\")) {
+    char theChar = Res[2];
+    switch (theChar) {
+      default: Value = theChar; break;
+      case '\'': Value = '\''; break;
+      case 't': Value = '\t'; break;
+      case 'n': Value = '\n'; break;
+      case 'b': Value = '\b'; break;
+    }
+  } else
+    Value = TokStart[1];
+
+  return AsmToken(AsmToken::Integer, Res, Value);
+}
+
+
+/// LexQuote: String: "..."
+AsmToken AsmLexer::LexQuote() {
+  int CurChar = getNextChar();
+  // TODO: does gas allow multiline string constants?
+  while (CurChar != '"') {
+    if (CurChar == '\\') {
+      // Allow \", etc.
+      CurChar = getNextChar();
+    }
+
+    if (CurChar == EOF)
+      return ReturnError(TokStart, "unterminated string constant");
+
+    CurChar = getNextChar();
+  }
+
+  return AsmToken(AsmToken::String, StringRef(TokStart, CurPtr - TokStart));
+}
+
+StringRef AsmLexer::LexUntilEndOfStatement() {
+  TokStart = CurPtr;
+
+  while (!isAtStartOfComment(*CurPtr) && // Start of line comment.
+          *CurPtr != ';' &&  // End of statement marker.
+         *CurPtr != '\n' &&
+         *CurPtr != '\r' &&
+         (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
+    ++CurPtr;
+  }
+  return StringRef(TokStart, CurPtr-TokStart);
+}
+
+bool AsmLexer::isAtStartOfComment(char Char) {
+  // FIXME: This won't work for multi-character comment indicators like "//".
+  return Char == *MAI.getCommentString();
+}
+
+AsmToken AsmLexer::LexToken() {
+  TokStart = CurPtr;
+  // This always consumes at least one character.
+  int CurChar = getNextChar();
+
+  if (isAtStartOfComment(CurChar))
+    return LexLineComment();
+
+  switch (CurChar) {
+  default:
+    // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]*
+    if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
+      return LexIdentifier();
+
+    // Unknown character, emit an error.
+    return ReturnError(TokStart, "invalid character in input");
+  case EOF: return AsmToken(AsmToken::Eof, StringRef(TokStart, 0));
+  case 0:
+  case ' ':
+  case '\t':
+    // Ignore whitespace.
+    return LexToken();
+  case '\n': // FALL THROUGH.
+  case '\r': // FALL THROUGH.
+  case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1));
+  case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1));
+  case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1));
+  case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1));
+  case '~': return AsmToken(AsmToken::Tilde, StringRef(TokStart, 1));
+  case '(': return AsmToken(AsmToken::LParen, StringRef(TokStart, 1));
+  case ')': return AsmToken(AsmToken::RParen, StringRef(TokStart, 1));
+  case '[': return AsmToken(AsmToken::LBrac, StringRef(TokStart, 1));
+  case ']': return AsmToken(AsmToken::RBrac, StringRef(TokStart, 1));
+  case '{': return AsmToken(AsmToken::LCurly, StringRef(TokStart, 1));
+  case '}': return AsmToken(AsmToken::RCurly, StringRef(TokStart, 1));
+  case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
+  case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
+  case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
+  case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
+  case '=':
+    if (*CurPtr == '=')
+      return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
+    return AsmToken(AsmToken::Equal, StringRef(TokStart, 1));
+  case '|':
+    if (*CurPtr == '|')
+      return ++CurPtr, AsmToken(AsmToken::PipePipe, StringRef(TokStart, 2));
+    return AsmToken(AsmToken::Pipe, StringRef(TokStart, 1));
+  case '^': return AsmToken(AsmToken::Caret, StringRef(TokStart, 1));
+  case '&':
+    if (*CurPtr == '&')
+      return ++CurPtr, AsmToken(AsmToken::AmpAmp, StringRef(TokStart, 2));
+    return AsmToken(AsmToken::Amp, StringRef(TokStart, 1));
+  case '!':
+    if (*CurPtr == '=')
+      return ++CurPtr, AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
+    return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
+  case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
+  case '/': return LexSlash();
+  case '#': return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
+  case '\'': return LexSingleQuote();
+  case '"': return LexQuote();
+  case '0': case '1': case '2': case '3': case '4':
+  case '5': case '6': case '7': case '8': case '9':
+    return LexDigit();
+  case '<':
+    switch (*CurPtr) {
+    case '<': return ++CurPtr, AsmToken(AsmToken::LessLess,
+                                        StringRef(TokStart, 2));
+    case '=': return ++CurPtr, AsmToken(AsmToken::LessEqual,
+                                        StringRef(TokStart, 2));
+    case '>': return ++CurPtr, AsmToken(AsmToken::LessGreater,
+                                        StringRef(TokStart, 2));
+    default: return AsmToken(AsmToken::Less, StringRef(TokStart, 1));
+    }
+  case '>':
+    switch (*CurPtr) {
+    case '>': return ++CurPtr, AsmToken(AsmToken::GreaterGreater,
+                                        StringRef(TokStart, 2));
+    case '=': return ++CurPtr, AsmToken(AsmToken::GreaterEqual,
+                                        StringRef(TokStart, 2));
+    default: return AsmToken(AsmToken::Greater, StringRef(TokStart, 1));
+    }
+
+  // TODO: Quoted identifiers (objc methods etc)
+  // local labels: [0-9][:]
+  // Forward/backward labels: [0-9][fb]
+  // Integers, fp constants, character constants.
+  }
+}
diff --git a/final/lib/MC/MCParser/AsmParser.cpp b/final/lib/MC/MCParser/AsmParser.cpp
new file mode 100644
index 00000000000..a84917ffb86
--- /dev/null
+++ b/final/lib/MC/MCParser/AsmParser.cpp
@@ -0,0 +1,2497 @@
+//===- AsmParser.cpp - Parser for Assembly Files --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the parser for assembly files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCParser/AsmCond.h"
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include <cctype>
+#include <vector>
+using namespace llvm;
+
+namespace {
+
+/// \brief Helper class for tracking macro definitions.
+struct Macro {
+  StringRef Name;
+  StringRef Body;
+
+public:
+  Macro(StringRef N, StringRef B) : Name(N), Body(B) {}
+};
+
+/// \brief Helper class for storing information about an active macro
+/// instantiation.
+struct MacroInstantiation {
+  /// The macro being instantiated.
+  const Macro *TheMacro;
+
+  /// The macro instantiation with substitutions.
+  MemoryBuffer *Instantiation;
+
+  /// The location of the instantiation.
+  SMLoc InstantiationLoc;
+
+  /// The location where parsing should resume upon instantiation completion.
+  SMLoc ExitLoc;
+
+public:
+  MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
+                     const std::vector<std::vector<AsmToken> > &A);
+};
+
+/// \brief The concrete assembly parser instance.
+class AsmParser : public MCAsmParser {
+  friend class GenericAsmParser;
+
+  AsmParser(const AsmParser &);   // DO NOT IMPLEMENT
+  void operator=(const AsmParser &);  // DO NOT IMPLEMENT
+private:
+  AsmLexer Lexer;
+  MCContext &Ctx;
+  MCStreamer &Out;
+  SourceMgr &SrcMgr;
+  MCAsmParserExtension *GenericParser;
+  MCAsmParserExtension *PlatformParser;
+
+  /// This is the current buffer index we're lexing from as managed by the
+  /// SourceMgr object.
+  int CurBuffer;
+
+  AsmCond TheCondState;
+  std::vector<AsmCond> TheCondStack;
+
+  /// DirectiveMap - This is a table handlers for directives.  Each handler is
+  /// invoked after the directive identifier is read and is responsible for
+  /// parsing and validating the rest of the directive.  The handler is passed
+  /// in the directive name and the location of the directive keyword.
+  StringMap<std::pair<MCAsmParserExtension*, DirectiveHandler> > DirectiveMap;
+
+  /// MacroMap - Map of currently defined macros.
+  StringMap<Macro*> MacroMap;
+
+  /// ActiveMacros - Stack of active macro instantiations.
+  std::vector<MacroInstantiation*> ActiveMacros;
+
+  /// Boolean tracking whether macro substitution is enabled.
+  unsigned MacrosEnabled : 1;
+
+  /// Flag tracking whether any errors have been encountered.
+  unsigned HadError : 1;
+
+public:
+  AsmParser(const Target &T, SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
+            const MCAsmInfo &MAI);
+  ~AsmParser();
+
+  virtual bool Run(bool NoInitialTextSection, bool NoFinalize = false);
+
+  void AddDirectiveHandler(MCAsmParserExtension *Object,
+                           StringRef Directive,
+                           DirectiveHandler Handler) {
+    DirectiveMap[Directive] = std::make_pair(Object, Handler);
+  }
+
+public:
+  /// @name MCAsmParser Interface
+  /// {
+
+  virtual SourceMgr &getSourceManager() { return SrcMgr; }
+  virtual MCAsmLexer &getLexer() { return Lexer; }
+  virtual MCContext &getContext() { return Ctx; }
+  virtual MCStreamer &getStreamer() { return Out; }
+
+  virtual void Warning(SMLoc L, const Twine &Meg);
+  virtual bool Error(SMLoc L, const Twine &Msg);
+
+  const AsmToken &Lex();
+
+  bool ParseExpression(const MCExpr *&Res);
+  virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc);
+  virtual bool ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc);
+  virtual bool ParseAbsoluteExpression(int64_t &Res);
+
+  /// }
+
+private:
+  void CheckForValidSection();
+
+  bool ParseStatement();
+
+  bool HandleMacroEntry(StringRef Name, SMLoc NameLoc, const Macro *M);
+  void HandleMacroExit();
+
+  void PrintMacroInstantiations();
+  void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type) const {
+    SrcMgr.PrintMessage(Loc, Msg, Type);
+  }
+
+  /// EnterIncludeFile - Enter the specified file. This returns true on failure.
+  bool EnterIncludeFile(const std::string &Filename);
+
+  /// \brief Reset the current lexer position to that given by \arg Loc. The
+  /// current token is not set; clients should ensure Lex() is called
+  /// subsequently.
+  void JumpToLoc(SMLoc Loc);
+
+  void EatToEndOfStatement();
+
+  /// \brief Parse up to the end of statement and a return the contents from the
+  /// current token until the end of the statement; the current token on exit
+  /// will be either the EndOfStatement or EOF.
+  StringRef ParseStringToEndOfStatement();
+
+  bool ParseAssignment(StringRef Name, bool allow_redef);
+
+  bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc);
+  bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc);
+  bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc);
+  bool ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc);
+
+  /// ParseIdentifier - Parse an identifier or string (as a quoted identifier)
+  /// and set \arg Res to the identifier contents.
+  bool ParseIdentifier(StringRef &Res);
+
+  // Directive Parsing.
+
+ // ".ascii", ".asciiz", ".string"
+  bool ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
+  bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ...
+  bool ParseDirectiveRealValue(const fltSemantics &); // ".single", ...
+  bool ParseDirectiveFill(); // ".fill"
+  bool ParseDirectiveSpace(); // ".space"
+  bool ParseDirectiveZero(); // ".zero"
+  bool ParseDirectiveSet(StringRef IDVal, bool allow_redef); // ".set", ".equ", ".equiv"
+  bool ParseDirectiveOrg(); // ".org"
+  // ".align{,32}", ".p2align{,w,l}"
+  bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize);
+
+  /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which
+  /// accepts a single symbol (which should be a label or an external).
+  bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr);
+
+  bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm"
+
+  bool ParseDirectiveAbort(); // ".abort"
+  bool ParseDirectiveInclude(); // ".include"
+
+  bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if"
+  // ".ifdef" or ".ifndef", depending on expect_defined
+  bool ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined);
+  bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif"
+  bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else"
+  bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif
+
+  /// ParseEscapedString - Parse the current token as a string which may include
+  /// escaped characters and return the string contents.
+  bool ParseEscapedString(std::string &Data);
+
+  const MCExpr *ApplyModifierToExpr(const MCExpr *E,
+                                    MCSymbolRefExpr::VariantKind Variant);
+};
+
+/// \brief Generic implementations of directive handling, etc. which is shared
+/// (or the default, at least) for all assembler parser.
+class GenericAsmParser : public MCAsmParserExtension {
+  template<bool (GenericAsmParser::*Handler)(StringRef, SMLoc)>
+  void AddDirectiveHandler(StringRef Directive) {
+    getParser().AddDirectiveHandler(this, Directive,
+                                    HandleDirective<GenericAsmParser, Handler>);
+  }
+public:
+  GenericAsmParser() {}
+
+  AsmParser &getParser() {
+    return (AsmParser&) this->MCAsmParserExtension::getParser();
+  }
+
+  virtual void Initialize(MCAsmParser &Parser) {
+    // Call the base implementation.
+    this->MCAsmParserExtension::Initialize(Parser);
+
+    // Debugging directives.
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveFile>(".file");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLine>(".line");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLoc>(".loc");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveStabs>(".stabs");
+
+    // CFI directives.
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIStartProc>(
+                                                              ".cfi_startproc");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIEndProc>(
+                                                                ".cfi_endproc");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfa>(
+                                                         ".cfi_def_cfa");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaOffset>(
+                                                         ".cfi_def_cfa_offset");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaRegister>(
+                                                       ".cfi_def_cfa_register");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIOffset>(
+                                                                 ".cfi_offset");
+    AddDirectiveHandler<
+     &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_personality");
+    AddDirectiveHandler<
+            &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_lsda");
+    AddDirectiveHandler<
+      &GenericAsmParser::ParseDirectiveCFIRememberState>(".cfi_remember_state");
+    AddDirectiveHandler<
+      &GenericAsmParser::ParseDirectiveCFIRestoreState>(".cfi_restore_state");
+
+    // Macro directives.
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
+      ".macros_on");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>(
+      ".macros_off");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacro>(".macro");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endm");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveEndMacro>(".endmacro");
+
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLEB128>(".sleb128");
+    AddDirectiveHandler<&GenericAsmParser::ParseDirectiveLEB128>(".uleb128");
+  }
+
+  bool ParseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
+
+  bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveStabs(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIStartProc(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIDefCfa(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIDefCfaOffset(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIDefCfaRegister(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIPersonalityOrLsda(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIRememberState(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveCFIRestoreState(StringRef, SMLoc DirectiveLoc);
+
+  bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc);
+  bool ParseDirectiveEndMacro(StringRef, SMLoc DirectiveLoc);
+
+  bool ParseDirectiveLEB128(StringRef, SMLoc);
+};
+
+}
+
+namespace llvm {
+
+extern MCAsmParserExtension *createDarwinAsmParser();
+extern MCAsmParserExtension *createELFAsmParser();
+extern MCAsmParserExtension *createCOFFAsmParser();
+
+}
+
+enum { DEFAULT_ADDRSPACE = 0 };
+
+AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
+                     MCStreamer &_Out, const MCAsmInfo &_MAI)
+  : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM),
+    GenericParser(new GenericAsmParser), PlatformParser(0),
+    CurBuffer(0), MacrosEnabled(true) {
+  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+
+  // Initialize the generic parser.
+  GenericParser->Initialize(*this);
+
+  // Initialize the platform / file format parser.
+  //
+  // FIXME: This is a hack, we need to (majorly) cleanup how these objects are
+  // created.
+  if (_MAI.hasMicrosoftFastStdCallMangling()) {
+    PlatformParser = createCOFFAsmParser();
+    PlatformParser->Initialize(*this);
+  } else if (_MAI.hasSubsectionsViaSymbols()) {
+    PlatformParser = createDarwinAsmParser();
+    PlatformParser->Initialize(*this);
+  } else {
+    PlatformParser = createELFAsmParser();
+    PlatformParser->Initialize(*this);
+  }
+}
+
+AsmParser::~AsmParser() {
+  assert(ActiveMacros.empty() && "Unexpected active macro instantiation!");
+
+  // Destroy any macros.
+  for (StringMap<Macro*>::iterator it = MacroMap.begin(),
+         ie = MacroMap.end(); it != ie; ++it)
+    delete it->getValue();
+
+  delete PlatformParser;
+  delete GenericParser;
+}
+
+void AsmParser::PrintMacroInstantiations() {
+  // Print the active macro instantiation stack.
+  for (std::vector<MacroInstantiation*>::const_reverse_iterator
+         it = ActiveMacros.rbegin(), ie = ActiveMacros.rend(); it != ie; ++it)
+    PrintMessage((*it)->InstantiationLoc, "while in macro instantiation",
+                 "note");
+}
+
+void AsmParser::Warning(SMLoc L, const Twine &Msg) {
+  PrintMessage(L, Msg, "warning");
+  PrintMacroInstantiations();
+}
+
+bool AsmParser::Error(SMLoc L, const Twine &Msg) {
+  HadError = true;
+  PrintMessage(L, Msg, "error");
+  PrintMacroInstantiations();
+  return true;
+}
+
+bool AsmParser::EnterIncludeFile(const std::string &Filename) {
+  int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc());
+  if (NewBuf == -1)
+    return true;
+
+  CurBuffer = NewBuf;
+
+  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+
+  return false;
+}
+
+void AsmParser::JumpToLoc(SMLoc Loc) {
+  CurBuffer = SrcMgr.FindBufferContainingLoc(Loc);
+  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer());
+}
+
+const AsmToken &AsmParser::Lex() {
+  const AsmToken *tok = &Lexer.Lex();
+
+  if (tok->is(AsmToken::Eof)) {
+    // If this is the end of an included file, pop the parent file off the
+    // include stack.
+    SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
+    if (ParentIncludeLoc != SMLoc()) {
+      JumpToLoc(ParentIncludeLoc);
+      tok = &Lexer.Lex();
+    }
+  }
+
+  if (tok->is(AsmToken::Error))
+    Error(Lexer.getErrLoc(), Lexer.getErr());
+
+  return *tok;
+}
+
+bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
+  // Create the initial section, if requested.
+  if (!NoInitialTextSection)
+    Out.InitSections();
+
+  // Prime the lexer.
+  Lex();
+
+  HadError = false;
+  AsmCond StartingCondState = TheCondState;
+
+  // While we have input, parse each statement.
+  while (Lexer.isNot(AsmToken::Eof)) {
+    if (!ParseStatement()) continue;
+
+    // We had an error, validate that one was emitted and recover by skipping to
+    // the next line.
+    assert(HadError && "Parse statement returned an error, but none emitted!");
+    EatToEndOfStatement();
+  }
+
+  if (TheCondState.TheCond != StartingCondState.TheCond ||
+      TheCondState.Ignore != StartingCondState.Ignore)
+    return TokError("unmatched .ifs or .elses");
+
+  // Check to see there are no empty DwarfFile slots.
+  const std::vector<MCDwarfFile *> &MCDwarfFiles =
+    getContext().getMCDwarfFiles();
+  for (unsigned i = 1; i < MCDwarfFiles.size(); i++) {
+    if (!MCDwarfFiles[i])
+      TokError("unassigned file number: " + Twine(i) + " for .file directives");
+  }
+
+  // Finalize the output stream if there are no errors and if the client wants
+  // us to.
+  if (!HadError && !NoFinalize)
+    Out.Finish();
+
+  return HadError;
+}
+
+void AsmParser::CheckForValidSection() {
+  if (!getStreamer().getCurrentSection()) {
+    TokError("expected section directive before assembly directive");
+    Out.SwitchSection(Ctx.getMachOSection(
+                        "__TEXT", "__text",
+                        MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                        0, SectionKind::getText()));
+  }
+}
+
+/// EatToEndOfStatement - Throw away the rest of the line for testing purposes.
+void AsmParser::EatToEndOfStatement() {
+  while (Lexer.isNot(AsmToken::EndOfStatement) &&
+         Lexer.isNot(AsmToken::Eof))
+    Lex();
+
+  // Eat EOL.
+  if (Lexer.is(AsmToken::EndOfStatement))
+    Lex();
+}
+
+StringRef AsmParser::ParseStringToEndOfStatement() {
+  const char *Start = getTok().getLoc().getPointer();
+
+  while (Lexer.isNot(AsmToken::EndOfStatement) &&
+         Lexer.isNot(AsmToken::Eof))
+    Lex();
+
+  const char *End = getTok().getLoc().getPointer();
+  return StringRef(Start, End - Start);
+}
+
+/// ParseParenExpr - Parse a paren expression and return it.
+/// NOTE: This assumes the leading '(' has already been consumed.
+///
+/// parenexpr ::= expr)
+///
+bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+  if (ParseExpression(Res)) return true;
+  if (Lexer.isNot(AsmToken::RParen))
+    return TokError("expected ')' in parentheses expression");
+  EndLoc = Lexer.getLoc();
+  Lex();
+  return false;
+}
+
+/// ParseBracketExpr - Parse a bracket expression and return it.
+/// NOTE: This assumes the leading '[' has already been consumed.
+///
+/// bracketexpr ::= expr]
+///
+bool AsmParser::ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+  if (ParseExpression(Res)) return true;
+  if (Lexer.isNot(AsmToken::RBrac))
+    return TokError("expected ']' in brackets expression");
+  EndLoc = Lexer.getLoc();
+  Lex();
+  return false;
+}
+
+/// ParsePrimaryExpr - Parse a primary expression and return it.
+///  primaryexpr ::= (parenexpr
+///  primaryexpr ::= symbol
+///  primaryexpr ::= number
+///  primaryexpr ::= '.'
+///  primaryexpr ::= ~,+,- primaryexpr
+bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
+  switch (Lexer.getKind()) {
+  default:
+    return TokError("unknown token in expression");
+  case AsmToken::Exclaim:
+    Lex(); // Eat the operator.
+    if (ParsePrimaryExpr(Res, EndLoc))
+      return true;
+    Res = MCUnaryExpr::CreateLNot(Res, getContext());
+    return false;
+  case AsmToken::Dollar:
+  case AsmToken::String:
+  case AsmToken::Identifier: {
+    EndLoc = Lexer.getLoc();
+
+    StringRef Identifier;
+    if (ParseIdentifier(Identifier))
+      return false;
+
+    // This is a symbol reference.
+    std::pair<StringRef, StringRef> Split = Identifier.split('@');
+    MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
+
+    // Lookup the symbol variant if used.
+    MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+    if (Split.first.size() != Identifier.size()) {
+      Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
+      if (Variant == MCSymbolRefExpr::VK_Invalid) {
+        Variant = MCSymbolRefExpr::VK_None;
+        return TokError("invalid variant '" + Split.second + "'");
+      }
+    }
+
+    // If this is an absolute variable reference, substitute it now to preserve
+    // semantics in the face of reassignment.
+    if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
+      if (Variant)
+        return Error(EndLoc, "unexpected modifier on variable reference");
+
+      Res = Sym->getVariableValue();
+      return false;
+    }
+
+    // Otherwise create a symbol ref.
+    Res = MCSymbolRefExpr::Create(Sym, Variant, getContext());
+    return false;
+  }
+  case AsmToken::Integer: {
+    SMLoc Loc = getTok().getLoc();
+    int64_t IntVal = getTok().getIntVal();
+    Res = MCConstantExpr::Create(IntVal, getContext());
+    EndLoc = Lexer.getLoc();
+    Lex(); // Eat token.
+    // Look for 'b' or 'f' following an Integer as a directional label
+    if (Lexer.getKind() == AsmToken::Identifier) {
+      StringRef IDVal = getTok().getString();
+      if (IDVal == "f" || IDVal == "b"){
+        MCSymbol *Sym = Ctx.GetDirectionalLocalSymbol(IntVal,
+                                                      IDVal == "f" ? 1 : 0);
+        Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
+                                      getContext());
+        if(IDVal == "b" && Sym->isUndefined())
+          return Error(Loc, "invalid reference to undefined symbol");
+        EndLoc = Lexer.getLoc();
+        Lex(); // Eat identifier.
+      }
+    }
+    return false;
+  }
+  case AsmToken::Real: {
+    APFloat RealVal(APFloat::IEEEdouble, getTok().getString());
+    uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
+    Res = MCConstantExpr::Create(IntVal, getContext());
+    Lex(); // Eat token.
+    return false;
+  }
+  case AsmToken::Dot: {
+    // This is a '.' reference, which references the current PC.  Emit a
+    // temporary label to the streamer and refer to it.
+    MCSymbol *Sym = Ctx.CreateTempSymbol();
+    Out.EmitLabel(Sym);
+    Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, getContext());
+    EndLoc = Lexer.getLoc();
+    Lex(); // Eat identifier.
+    return false;
+  }
+  case AsmToken::LParen:
+    Lex(); // Eat the '('.
+    return ParseParenExpr(Res, EndLoc);
+  case AsmToken::LBrac:
+    if (!PlatformParser->HasBracketExpressions())
+      return TokError("brackets expression not supported on this target");
+    Lex(); // Eat the '['.
+    return ParseBracketExpr(Res, EndLoc);
+  case AsmToken::Minus:
+    Lex(); // Eat the operator.
+    if (ParsePrimaryExpr(Res, EndLoc))
+      return true;
+    Res = MCUnaryExpr::CreateMinus(Res, getContext());
+    return false;
+  case AsmToken::Plus:
+    Lex(); // Eat the operator.
+    if (ParsePrimaryExpr(Res, EndLoc))
+      return true;
+    Res = MCUnaryExpr::CreatePlus(Res, getContext());
+    return false;
+  case AsmToken::Tilde:
+    Lex(); // Eat the operator.
+    if (ParsePrimaryExpr(Res, EndLoc))
+      return true;
+    Res = MCUnaryExpr::CreateNot(Res, getContext());
+    return false;
+  }
+}
+
+bool AsmParser::ParseExpression(const MCExpr *&Res) {
+  SMLoc EndLoc;
+  return ParseExpression(Res, EndLoc);
+}
+
+const MCExpr *
+AsmParser::ApplyModifierToExpr(const MCExpr *E,
+                               MCSymbolRefExpr::VariantKind Variant) {
+  // Recurse over the given expression, rebuilding it to apply the given variant
+  // if there is exactly one symbol.
+  switch (E->getKind()) {
+  case MCExpr::Target:
+  case MCExpr::Constant:
+    return 0;
+
+  case MCExpr::SymbolRef: {
+    const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+
+    if (SRE->getKind() != MCSymbolRefExpr::VK_None) {
+      TokError("invalid variant on expression '" +
+               getTok().getIdentifier() + "' (already modified)");
+      return E;
+    }
+
+    return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext());
+  }
+
+  case MCExpr::Unary: {
+    const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
+    const MCExpr *Sub = ApplyModifierToExpr(UE->getSubExpr(), Variant);
+    if (!Sub)
+      return 0;
+    return MCUnaryExpr::Create(UE->getOpcode(), Sub, getContext());
+  }
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+    const MCExpr *LHS = ApplyModifierToExpr(BE->getLHS(), Variant);
+    const MCExpr *RHS = ApplyModifierToExpr(BE->getRHS(), Variant);
+
+    if (!LHS && !RHS)
+      return 0;
+
+    if (!LHS) LHS = BE->getLHS();
+    if (!RHS) RHS = BE->getRHS();
+
+    return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
+  }
+  }
+
+  assert(0 && "Invalid expression kind!");
+  return 0;
+}
+
+/// ParseExpression - Parse an expression and return it.
+///
+///  expr ::= expr +,- expr          -> lowest.
+///  expr ::= expr |,^,&,! expr      -> middle.
+///  expr ::= expr *,/,%,<<,>> expr  -> highest.
+///  expr ::= primaryexpr
+///
+bool AsmParser::ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) {
+  // Parse the expression.
+  Res = 0;
+  if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc))
+    return true;
+
+  // As a special case, we support 'a op b @ modifier' by rewriting the
+  // expression to include the modifier. This is inefficient, but in general we
+  // expect users to use 'a@modifier op b'.
+  if (Lexer.getKind() == AsmToken::At) {
+    Lex();
+
+    if (Lexer.isNot(AsmToken::Identifier))
+      return TokError("unexpected symbol modifier following '@'");
+
+    MCSymbolRefExpr::VariantKind Variant =
+      MCSymbolRefExpr::getVariantKindForName(getTok().getIdentifier());
+    if (Variant == MCSymbolRefExpr::VK_Invalid)
+      return TokError("invalid variant '" + getTok().getIdentifier() + "'");
+
+    const MCExpr *ModifiedRes = ApplyModifierToExpr(Res, Variant);
+    if (!ModifiedRes) {
+      return TokError("invalid modifier '" + getTok().getIdentifier() +
+                      "' (no symbols present)");
+      return true;
+    }
+
+    Res = ModifiedRes;
+    Lex();
+  }
+
+  // Try to constant fold it up front, if possible.
+  int64_t Value;
+  if (Res->EvaluateAsAbsolute(Value))
+    Res = MCConstantExpr::Create(Value, getContext());
+
+  return false;
+}
+
+bool AsmParser::ParseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) {
+  Res = 0;
+  return ParseParenExpr(Res, EndLoc) ||
+         ParseBinOpRHS(1, Res, EndLoc);
+}
+
+bool AsmParser::ParseAbsoluteExpression(int64_t &Res) {
+  const MCExpr *Expr;
+
+  SMLoc StartLoc = Lexer.getLoc();
+  if (ParseExpression(Expr))
+    return true;
+
+  if (!Expr->EvaluateAsAbsolute(Res))
+    return Error(StartLoc, "expected absolute expression");
+
+  return false;
+}
+
+static unsigned getBinOpPrecedence(AsmToken::TokenKind K,
+                                   MCBinaryExpr::Opcode &Kind) {
+  switch (K) {
+  default:
+    return 0;    // not a binop.
+
+    // Lowest Precedence: &&, ||, @
+  case AsmToken::AmpAmp:
+    Kind = MCBinaryExpr::LAnd;
+    return 1;
+  case AsmToken::PipePipe:
+    Kind = MCBinaryExpr::LOr;
+    return 1;
+
+
+    // Low Precedence: |, &, ^
+    //
+    // FIXME: gas seems to support '!' as an infix operator?
+  case AsmToken::Pipe:
+    Kind = MCBinaryExpr::Or;
+    return 2;
+  case AsmToken::Caret:
+    Kind = MCBinaryExpr::Xor;
+    return 2;
+  case AsmToken::Amp:
+    Kind = MCBinaryExpr::And;
+    return 2;
+
+    // Low Intermediate Precedence: ==, !=, <>, <, <=, >, >=
+  case AsmToken::EqualEqual:
+    Kind = MCBinaryExpr::EQ;
+    return 3;
+  case AsmToken::ExclaimEqual:
+  case AsmToken::LessGreater:
+    Kind = MCBinaryExpr::NE;
+    return 3;
+  case AsmToken::Less:
+    Kind = MCBinaryExpr::LT;
+    return 3;
+  case AsmToken::LessEqual:
+    Kind = MCBinaryExpr::LTE;
+    return 3;
+  case AsmToken::Greater:
+    Kind = MCBinaryExpr::GT;
+    return 3;
+  case AsmToken::GreaterEqual:
+    Kind = MCBinaryExpr::GTE;
+    return 3;
+
+    // High Intermediate Precedence: +, -
+  case AsmToken::Plus:
+    Kind = MCBinaryExpr::Add;
+    return 4;
+  case AsmToken::Minus:
+    Kind = MCBinaryExpr::Sub;
+    return 4;
+
+    // Highest Precedence: *, /, %, <<, >>
+  case AsmToken::Star:
+    Kind = MCBinaryExpr::Mul;
+    return 5;
+  case AsmToken::Slash:
+    Kind = MCBinaryExpr::Div;
+    return 5;
+  case AsmToken::Percent:
+    Kind = MCBinaryExpr::Mod;
+    return 5;
+  case AsmToken::LessLess:
+    Kind = MCBinaryExpr::Shl;
+    return 5;
+  case AsmToken::GreaterGreater:
+    Kind = MCBinaryExpr::Shr;
+    return 5;
+  }
+}
+
+
+/// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'.
+/// Res contains the LHS of the expression on input.
+bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
+                              SMLoc &EndLoc) {
+  while (1) {
+    MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
+    unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind);
+
+    // If the next token is lower precedence than we are allowed to eat, return
+    // successfully with what we ate already.
+    if (TokPrec < Precedence)
+      return false;
+
+    Lex();
+
+    // Eat the next primary expression.
+    const MCExpr *RHS;
+    if (ParsePrimaryExpr(RHS, EndLoc)) return true;
+
+    // If BinOp binds less tightly with RHS than the operator after RHS, let
+    // the pending operator take RHS as its LHS.
+    MCBinaryExpr::Opcode Dummy;
+    unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy);
+    if (TokPrec < NextTokPrec) {
+      if (ParseBinOpRHS(Precedence+1, RHS, EndLoc)) return true;
+    }
+
+    // Merge LHS and RHS according to operator.
+    Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext());
+  }
+}
+
+
+
+
+/// ParseStatement:
+///   ::= EndOfStatement
+///   ::= Label* Directive ...Operands... EndOfStatement
+///   ::= Label* Identifier OperandList* EndOfStatement
+bool AsmParser::ParseStatement() {
+  if (Lexer.is(AsmToken::EndOfStatement)) {
+    Out.AddBlankLine();
+    Lex();
+    return false;
+  }
+
+  // Statements always start with an identifier or are a full line comment.
+  AsmToken ID = getTok();
+  SMLoc IDLoc = ID.getLoc();
+  StringRef IDVal;
+  int64_t LocalLabelVal = -1;
+  // A full line comment is a '#' as the first token.
+  if (Lexer.is(AsmToken::Hash)) {
+    EatToEndOfStatement();
+    return false;
+  }
+  // Allow an integer followed by a ':' as a directional local label.
+  if (Lexer.is(AsmToken::Integer)) {
+    LocalLabelVal = getTok().getIntVal();
+    if (LocalLabelVal < 0) {
+      if (!TheCondState.Ignore)
+        return TokError("unexpected token at start of statement");
+      IDVal = "";
+    }
+    else {
+      IDVal = getTok().getString();
+      Lex(); // Consume the integer token to be used as an identifier token.
+      if (Lexer.getKind() != AsmToken::Colon) {
+        if (!TheCondState.Ignore)
+          return TokError("unexpected token at start of statement");
+      }
+    }
+  }
+  else if (ParseIdentifier(IDVal)) {
+    if (!TheCondState.Ignore)
+      return TokError("unexpected token at start of statement");
+    IDVal = "";
+  }
+
+  // Handle conditional assembly here before checking for skipping.  We
+  // have to do this so that .endif isn't skipped in a ".if 0" block for
+  // example.
+  if (IDVal == ".if")
+    return ParseDirectiveIf(IDLoc);
+  if (IDVal == ".ifdef")
+    return ParseDirectiveIfdef(IDLoc, true);
+  if (IDVal == ".ifndef" || IDVal == ".ifnotdef")
+    return ParseDirectiveIfdef(IDLoc, false);
+  if (IDVal == ".elseif")
+    return ParseDirectiveElseIf(IDLoc);
+  if (IDVal == ".else")
+    return ParseDirectiveElse(IDLoc);
+  if (IDVal == ".endif")
+    return ParseDirectiveEndIf(IDLoc);
+
+  // If we are in a ".if 0" block, ignore this statement.
+  if (TheCondState.Ignore) {
+    EatToEndOfStatement();
+    return false;
+  }
+
+  // FIXME: Recurse on local labels?
+
+  // See what kind of statement we have.
+  switch (Lexer.getKind()) {
+  case AsmToken::Colon: {
+    CheckForValidSection();
+
+    // identifier ':'   -> Label.
+    Lex();
+
+    // Diagnose attempt to use a variable as a label.
+    //
+    // FIXME: Diagnostics. Note the location of the definition as a label.
+    // FIXME: This doesn't diagnose assignment to a symbol which has been
+    // implicitly marked as external.
+    MCSymbol *Sym;
+    if (LocalLabelVal == -1)
+      Sym = getContext().GetOrCreateSymbol(IDVal);
+    else
+      Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal);
+    if (!Sym->isUndefined() || Sym->isVariable())
+      return Error(IDLoc, "invalid symbol redefinition");
+
+    // Emit the label.
+    Out.EmitLabel(Sym);
+
+    // Consume any end of statement token, if present, to avoid spurious
+    // AddBlankLine calls().
+    if (Lexer.is(AsmToken::EndOfStatement)) {
+      Lex();
+      if (Lexer.is(AsmToken::Eof))
+        return false;
+    }
+
+    return ParseStatement();
+  }
+
+  case AsmToken::Equal:
+    // identifier '=' ... -> assignment statement
+    Lex();
+
+    return ParseAssignment(IDVal, true);
+
+  default: // Normal instruction or directive.
+    break;
+  }
+
+  // If macros are enabled, check to see if this is a macro instantiation.
+  if (MacrosEnabled)
+    if (const Macro *M = MacroMap.lookup(IDVal))
+      return HandleMacroEntry(IDVal, IDLoc, M);
+
+  // Otherwise, we have a normal instruction or directive.
+  if (IDVal[0] == '.') {
+    // Assembler features
+    if (IDVal == ".set" || IDVal == ".equ")
+      return ParseDirectiveSet(IDVal, true);
+    if (IDVal == ".equiv")
+      return ParseDirectiveSet(IDVal, false);
+
+    // Data directives
+
+    if (IDVal == ".ascii")
+      return ParseDirectiveAscii(IDVal, false);
+    if (IDVal == ".asciz" || IDVal == ".string")
+      return ParseDirectiveAscii(IDVal, true);
+
+    if (IDVal == ".byte")
+      return ParseDirectiveValue(1);
+    if (IDVal == ".short")
+      return ParseDirectiveValue(2);
+    if (IDVal == ".value")
+      return ParseDirectiveValue(2);
+    if (IDVal == ".2byte")
+      return ParseDirectiveValue(2);
+    if (IDVal == ".long")
+      return ParseDirectiveValue(4);
+    if (IDVal == ".int")
+      return ParseDirectiveValue(4);
+    if (IDVal == ".4byte")
+      return ParseDirectiveValue(4);
+    if (IDVal == ".quad")
+      return ParseDirectiveValue(8);
+    if (IDVal == ".8byte")
+      return ParseDirectiveValue(8);
+    if (IDVal == ".single" || IDVal == ".float")
+      return ParseDirectiveRealValue(APFloat::IEEEsingle);
+    if (IDVal == ".double")
+      return ParseDirectiveRealValue(APFloat::IEEEdouble);
+
+    if (IDVal == ".align") {
+      bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
+      return ParseDirectiveAlign(IsPow2, /*ExprSize=*/1);
+    }
+    if (IDVal == ".align32") {
+      bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes();
+      return ParseDirectiveAlign(IsPow2, /*ExprSize=*/4);
+    }
+    if (IDVal == ".balign")
+      return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1);
+    if (IDVal == ".balignw")
+      return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2);
+    if (IDVal == ".balignl")
+      return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4);
+    if (IDVal == ".p2align")
+      return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1);
+    if (IDVal == ".p2alignw")
+      return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2);
+    if (IDVal == ".p2alignl")
+      return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4);
+
+    if (IDVal == ".org")
+      return ParseDirectiveOrg();
+
+    if (IDVal == ".fill")
+      return ParseDirectiveFill();
+    if (IDVal == ".space")
+      return ParseDirectiveSpace();
+    if (IDVal == ".zero")
+      return ParseDirectiveZero();
+
+    // Symbol attribute directives
+
+    if (IDVal == ".globl" || IDVal == ".global")
+      return ParseDirectiveSymbolAttribute(MCSA_Global);
+    // ELF only? Should it be here?
+    if (IDVal == ".local")
+      return ParseDirectiveSymbolAttribute(MCSA_Local);
+    if (IDVal == ".hidden")
+      return ParseDirectiveSymbolAttribute(MCSA_Hidden);
+    if (IDVal == ".indirect_symbol")
+      return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol);
+    if (IDVal == ".internal")
+      return ParseDirectiveSymbolAttribute(MCSA_Internal);
+    if (IDVal == ".lazy_reference")
+      return ParseDirectiveSymbolAttribute(MCSA_LazyReference);
+    if (IDVal == ".no_dead_strip")
+      return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip);
+    if (IDVal == ".symbol_resolver")
+      return ParseDirectiveSymbolAttribute(MCSA_SymbolResolver);
+    if (IDVal == ".private_extern")
+      return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern);
+    if (IDVal == ".protected")
+      return ParseDirectiveSymbolAttribute(MCSA_Protected);
+    if (IDVal == ".reference")
+      return ParseDirectiveSymbolAttribute(MCSA_Reference);
+    if (IDVal == ".weak")
+      return ParseDirectiveSymbolAttribute(MCSA_Weak);
+    if (IDVal == ".weak_definition")
+      return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition);
+    if (IDVal == ".weak_reference")
+      return ParseDirectiveSymbolAttribute(MCSA_WeakReference);
+    if (IDVal == ".weak_def_can_be_hidden")
+      return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
+
+    if (IDVal == ".comm")
+      return ParseDirectiveComm(/*IsLocal=*/false);
+    if (IDVal == ".lcomm")
+      return ParseDirectiveComm(/*IsLocal=*/true);
+
+    if (IDVal == ".abort")
+      return ParseDirectiveAbort();
+    if (IDVal == ".include")
+      return ParseDirectiveInclude();
+
+    if (IDVal == ".code16" || IDVal == ".code32" || IDVal == ".code64")
+      return TokError(Twine(IDVal) + " not supported yet");
+
+    // Look up the handler in the handler table.
+    std::pair<MCAsmParserExtension*, DirectiveHandler> Handler =
+      DirectiveMap.lookup(IDVal);
+    if (Handler.first)
+      return (*Handler.second)(Handler.first, IDVal, IDLoc);
+
+    // Target hook for parsing target specific directives.
+    if (!getTargetParser().ParseDirective(ID))
+      return false;
+
+    Warning(IDLoc, "ignoring directive for now");
+    EatToEndOfStatement();
+    return false;
+  }
+
+  CheckForValidSection();
+
+  // Canonicalize the opcode to lower case.
+  SmallString<128> Opcode;
+  for (unsigned i = 0, e = IDVal.size(); i != e; ++i)
+    Opcode.push_back(tolower(IDVal[i]));
+
+  SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
+  bool HadError = getTargetParser().ParseInstruction(Opcode.str(), IDLoc,
+                                                     ParsedOperands);
+
+  // Dump the parsed representation, if requested.
+  if (getShowParsedOperands()) {
+    SmallString<256> Str;
+    raw_svector_ostream OS(Str);
+    OS << "parsed instruction: [";
+    for (unsigned i = 0; i != ParsedOperands.size(); ++i) {
+      if (i != 0)
+        OS << ", ";
+      ParsedOperands[i]->dump(OS);
+    }
+    OS << "]";
+
+    PrintMessage(IDLoc, OS.str(), "note");
+  }
+
+  // If parsing succeeded, match the instruction.
+  if (!HadError)
+    HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, ParsedOperands,
+                                                         Out);
+
+  // Free any parsed operands.
+  for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i)
+    delete ParsedOperands[i];
+
+  // Don't skip the rest of the line, the instruction parser is responsible for
+  // that.
+  return false;
+}
+
+MacroInstantiation::MacroInstantiation(const Macro *M, SMLoc IL, SMLoc EL,
+                                   const std::vector<std::vector<AsmToken> > &A)
+  : TheMacro(M), InstantiationLoc(IL), ExitLoc(EL)
+{
+  // Macro instantiation is lexical, unfortunately. We construct a new buffer
+  // to hold the macro body with substitutions.
+  SmallString<256> Buf;
+  raw_svector_ostream OS(Buf);
+
+  StringRef Body = M->Body;
+  while (!Body.empty()) {
+    // Scan for the next substitution.
+    std::size_t End = Body.size(), Pos = 0;
+    for (; Pos != End; ++Pos) {
+      // Check for a substitution or escape.
+      if (Body[Pos] != '$' || Pos + 1 == End)
+        continue;
+
+      char Next = Body[Pos + 1];
+      if (Next == '$' || Next == 'n' || isdigit(Next))
+        break;
+    }
+
+    // Add the prefix.
+    OS << Body.slice(0, Pos);
+
+    // Check if we reached the end.
+    if (Pos == End)
+      break;
+
+    switch (Body[Pos+1]) {
+       // $$ => $
+    case '$':
+      OS << '$';
+      break;
+
+      // $n => number of arguments
+    case 'n':
+      OS << A.size();
+      break;
+
+       // $[0-9] => argument
+    default: {
+      // Missing arguments are ignored.
+      unsigned Index = Body[Pos+1] - '0';
+      if (Index >= A.size())
+        break;
+
+      // Otherwise substitute with the token values, with spaces eliminated.
+      for (std::vector<AsmToken>::const_iterator it = A[Index].begin(),
+             ie = A[Index].end(); it != ie; ++it)
+        OS << it->getString();
+      break;
+    }
+    }
+
+    // Update the scan point.
+    Body = Body.substr(Pos + 2);
+  }
+
+  // We include the .endmacro in the buffer as our queue to exit the macro
+  // instantiation.
+  OS << ".endmacro\n";
+
+  Instantiation = MemoryBuffer::getMemBufferCopy(OS.str(), "<instantiation>");
+}
+
+bool AsmParser::HandleMacroEntry(StringRef Name, SMLoc NameLoc,
+                                 const Macro *M) {
+  // Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate
+  // this, although we should protect against infinite loops.
+  if (ActiveMacros.size() == 20)
+    return TokError("macros cannot be nested more than 20 levels deep");
+
+  // Parse the macro instantiation arguments.
+  std::vector<std::vector<AsmToken> > MacroArguments;
+  MacroArguments.push_back(std::vector<AsmToken>());
+  unsigned ParenLevel = 0;
+  for (;;) {
+    if (Lexer.is(AsmToken::Eof))
+      return TokError("unexpected token in macro instantiation");
+    if (Lexer.is(AsmToken::EndOfStatement))
+      break;
+
+    // If we aren't inside parentheses and this is a comma, start a new token
+    // list.
+    if (ParenLevel == 0 && Lexer.is(AsmToken::Comma)) {
+      MacroArguments.push_back(std::vector<AsmToken>());
+    } else {
+      // Adjust the current parentheses level.
+      if (Lexer.is(AsmToken::LParen))
+        ++ParenLevel;
+      else if (Lexer.is(AsmToken::RParen) && ParenLevel)
+        --ParenLevel;
+
+      // Append the token to the current argument list.
+      MacroArguments.back().push_back(getTok());
+    }
+    Lex();
+  }
+
+  // Create the macro instantiation object and add to the current macro
+  // instantiation stack.
+  MacroInstantiation *MI = new MacroInstantiation(M, NameLoc,
+                                                  getTok().getLoc(),
+                                                  MacroArguments);
+  ActiveMacros.push_back(MI);
+
+  // Jump to the macro instantiation and prime the lexer.
+  CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc());
+  Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
+  Lex();
+
+  return false;
+}
+
+void AsmParser::HandleMacroExit() {
+  // Jump to the EndOfStatement we should return to, and consume it.
+  JumpToLoc(ActiveMacros.back()->ExitLoc);
+  Lex();
+
+  // Pop the instantiation entry.
+  delete ActiveMacros.back();
+  ActiveMacros.pop_back();
+}
+
+static void MarkUsed(const MCExpr *Value) {
+  switch (Value->getKind()) {
+  case MCExpr::Binary:
+    MarkUsed(static_cast<const MCBinaryExpr*>(Value)->getLHS());
+    MarkUsed(static_cast<const MCBinaryExpr*>(Value)->getRHS());
+    break;
+  case MCExpr::Target:
+  case MCExpr::Constant:
+    break;
+  case MCExpr::SymbolRef: {
+    static_cast<const MCSymbolRefExpr*>(Value)->getSymbol().setUsed(true);
+    break;
+  }
+  case MCExpr::Unary:
+    MarkUsed(static_cast<const MCUnaryExpr*>(Value)->getSubExpr());
+    break;
+  }
+}
+
+bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) {
+  // FIXME: Use better location, we should use proper tokens.
+  SMLoc EqualLoc = Lexer.getLoc();
+
+  const MCExpr *Value;
+  if (ParseExpression(Value))
+    return true;
+
+  MarkUsed(Value);
+
+  if (Lexer.isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in assignment");
+
+  // Eat the end of statement marker.
+  Lex();
+
+  // Validate that the LHS is allowed to be a variable (either it has not been
+  // used as a symbol, or it is an absolute symbol).
+  MCSymbol *Sym = getContext().LookupSymbol(Name);
+  if (Sym) {
+    // Diagnose assignment to a label.
+    //
+    // FIXME: Diagnostics. Note the location of the definition as a label.
+    // FIXME: Diagnose assignment to protected identifier (e.g., register name).
+    if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable())
+      ; // Allow redefinitions of undefined symbols only used in directives.
+    else if (!Sym->isUndefined() && (!Sym->isAbsolute() || !allow_redef))
+      return Error(EqualLoc, "redefinition of '" + Name + "'");
+    else if (!Sym->isVariable())
+      return Error(EqualLoc, "invalid assignment to '" + Name + "'");
+    else if (!isa<MCConstantExpr>(Sym->getVariableValue()))
+      return Error(EqualLoc, "invalid reassignment of non-absolute variable '" +
+                   Name + "'");
+
+    // Don't count these checks as uses.
+    Sym->setUsed(false);
+  } else
+    Sym = getContext().GetOrCreateSymbol(Name);
+
+  // FIXME: Handle '.'.
+
+  // Do the assignment.
+  Out.EmitAssignment(Sym, Value);
+
+  return false;
+}
+
+/// ParseIdentifier:
+///   ::= identifier
+///   ::= string
+bool AsmParser::ParseIdentifier(StringRef &Res) {
+  // The assembler has relaxed rules for accepting identifiers, in particular we
+  // allow things like '.globl $foo', which would normally be separate
+  // tokens. At this level, we have already lexed so we cannot (currently)
+  // handle this as a context dependent token, instead we detect adjacent tokens
+  // and return the combined identifier.
+  if (Lexer.is(AsmToken::Dollar)) {
+    SMLoc DollarLoc = getLexer().getLoc();
+
+    // Consume the dollar sign, and check for a following identifier.
+    Lex();
+    if (Lexer.isNot(AsmToken::Identifier))
+      return true;
+
+    // We have a '$' followed by an identifier, make sure they are adjacent.
+    if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer())
+      return true;
+
+    // Construct the joined identifier and consume the token.
+    Res = StringRef(DollarLoc.getPointer(),
+                    getTok().getIdentifier().size() + 1);
+    Lex();
+    return false;
+  }
+
+  if (Lexer.isNot(AsmToken::Identifier) &&
+      Lexer.isNot(AsmToken::String))
+    return true;
+
+  Res = getTok().getIdentifier();
+
+  Lex(); // Consume the identifier token.
+
+  return false;
+}
+
+/// ParseDirectiveSet:
+///   ::= .equ identifier ',' expression
+///   ::= .equiv identifier ',' expression
+///   ::= .set identifier ',' expression
+bool AsmParser::ParseDirectiveSet(StringRef IDVal, bool allow_redef) {
+  StringRef Name;
+
+  if (ParseIdentifier(Name))
+    return TokError("expected identifier after '" + Twine(IDVal) + "'");
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in '" + Twine(IDVal) + "'");
+  Lex();
+
+  return ParseAssignment(Name, allow_redef);
+}
+
+bool AsmParser::ParseEscapedString(std::string &Data) {
+  assert(getLexer().is(AsmToken::String) && "Unexpected current token!");
+
+  Data = "";
+  StringRef Str = getTok().getStringContents();
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    if (Str[i] != '\\') {
+      Data += Str[i];
+      continue;
+    }
+
+    // Recognize escaped characters. Note that this escape semantics currently
+    // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes.
+    ++i;
+    if (i == e)
+      return TokError("unexpected backslash at end of string");
+
+    // Recognize octal sequences.
+    if ((unsigned) (Str[i] - '0') <= 7) {
+      // Consume up to three octal characters.
+      unsigned Value = Str[i] - '0';
+
+      if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+        ++i;
+        Value = Value * 8 + (Str[i] - '0');
+
+        if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
+          ++i;
+          Value = Value * 8 + (Str[i] - '0');
+        }
+      }
+
+      if (Value > 255)
+        return TokError("invalid octal escape sequence (out of range)");
+
+      Data += (unsigned char) Value;
+      continue;
+    }
+
+    // Otherwise recognize individual escapes.
+    switch (Str[i]) {
+    default:
+      // Just reject invalid escape sequences for now.
+      return TokError("invalid escape sequence (unrecognized character)");
+
+    case 'b': Data += '\b'; break;
+    case 'f': Data += '\f'; break;
+    case 'n': Data += '\n'; break;
+    case 'r': Data += '\r'; break;
+    case 't': Data += '\t'; break;
+    case '"': Data += '"'; break;
+    case '\\': Data += '\\'; break;
+    }
+  }
+
+  return false;
+}
+
+/// ParseDirectiveAscii:
+///   ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
+bool AsmParser::ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    CheckForValidSection();
+
+    for (;;) {
+      if (getLexer().isNot(AsmToken::String))
+        return TokError("expected string in '" + Twine(IDVal) + "' directive");
+
+      std::string Data;
+      if (ParseEscapedString(Data))
+        return true;
+
+      getStreamer().EmitBytes(Data, DEFAULT_ADDRSPACE);
+      if (ZeroTerminated)
+        getStreamer().EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE);
+
+      Lex();
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      if (getLexer().isNot(AsmToken::Comma))
+        return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
+      Lex();
+    }
+  }
+
+  Lex();
+  return false;
+}
+
+/// ParseDirectiveValue
+///  ::= (.byte | .short | ... ) [ expression (, expression)* ]
+bool AsmParser::ParseDirectiveValue(unsigned Size) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    CheckForValidSection();
+
+    for (;;) {
+      const MCExpr *Value;
+      if (ParseExpression(Value))
+        return true;
+
+      // Special case constant expressions to match code generator.
+      if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value))
+        getStreamer().EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE);
+      else
+        getStreamer().EmitValue(Value, Size, DEFAULT_ADDRSPACE);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return TokError("unexpected token in directive");
+      Lex();
+    }
+  }
+
+  Lex();
+  return false;
+}
+
+/// ParseDirectiveRealValue
+///  ::= (.single | .double) [ expression (, expression)* ]
+bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    CheckForValidSection();
+
+    for (;;) {
+      // We don't truly support arithmetic on floating point expressions, so we
+      // have to manually parse unary prefixes.
+      bool IsNeg = false;
+      if (getLexer().is(AsmToken::Minus)) {
+        Lex();
+        IsNeg = true;
+      } else if (getLexer().is(AsmToken::Plus))
+        Lex();
+
+      if (getLexer().isNot(AsmToken::Integer) &&
+          getLexer().isNot(AsmToken::Real))
+        return TokError("unexpected token in directive");
+
+      // Convert to an APFloat.
+      APFloat Value(Semantics);
+      if (Value.convertFromString(getTok().getString(),
+                                  APFloat::rmNearestTiesToEven) ==
+          APFloat::opInvalidOp)
+        return TokError("invalid floating point literal");
+      if (IsNeg)
+        Value.changeSign();
+
+      // Consume the numeric token.
+      Lex();
+
+      // Emit the value as an integer.
+      APInt AsInt = Value.bitcastToAPInt();
+      getStreamer().EmitIntValue(AsInt.getLimitedValue(),
+                                 AsInt.getBitWidth() / 8, DEFAULT_ADDRSPACE);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      if (getLexer().isNot(AsmToken::Comma))
+        return TokError("unexpected token in directive");
+      Lex();
+    }
+  }
+
+  Lex();
+  return false;
+}
+
+/// ParseDirectiveSpace
+///  ::= .space expression [ , expression ]
+bool AsmParser::ParseDirectiveSpace() {
+  CheckForValidSection();
+
+  int64_t NumBytes;
+  if (ParseAbsoluteExpression(NumBytes))
+    return true;
+
+  int64_t FillExpr = 0;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    if (getLexer().isNot(AsmToken::Comma))
+      return TokError("unexpected token in '.space' directive");
+    Lex();
+
+    if (ParseAbsoluteExpression(FillExpr))
+      return true;
+
+    if (getLexer().isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in '.space' directive");
+  }
+
+  Lex();
+
+  if (NumBytes <= 0)
+    return TokError("invalid number of bytes in '.space' directive");
+
+  // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
+  getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE);
+
+  return false;
+}
+
+/// ParseDirectiveZero
+///  ::= .zero expression
+bool AsmParser::ParseDirectiveZero() {
+  CheckForValidSection();
+
+  int64_t NumBytes;
+  if (ParseAbsoluteExpression(NumBytes))
+    return true;
+
+  int64_t Val = 0;
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+    if (ParseAbsoluteExpression(Val))
+      return true;
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.zero' directive");
+
+  Lex();
+
+  getStreamer().EmitFill(NumBytes, Val, DEFAULT_ADDRSPACE);
+
+  return false;
+}
+
+/// ParseDirectiveFill
+///  ::= .fill expression , expression , expression
+bool AsmParser::ParseDirectiveFill() {
+  CheckForValidSection();
+
+  int64_t NumValues;
+  if (ParseAbsoluteExpression(NumValues))
+    return true;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in '.fill' directive");
+  Lex();
+
+  int64_t FillSize;
+  if (ParseAbsoluteExpression(FillSize))
+    return true;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in '.fill' directive");
+  Lex();
+
+  int64_t FillExpr;
+  if (ParseAbsoluteExpression(FillExpr))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.fill' directive");
+
+  Lex();
+
+  if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8)
+    return TokError("invalid '.fill' size, expected 1, 2, 4, or 8");
+
+  for (uint64_t i = 0, e = NumValues; i != e; ++i)
+    getStreamer().EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE);
+
+  return false;
+}
+
+/// ParseDirectiveOrg
+///  ::= .org expression [ , expression ]
+bool AsmParser::ParseDirectiveOrg() {
+  CheckForValidSection();
+
+  const MCExpr *Offset;
+  if (ParseExpression(Offset))
+    return true;
+
+  // Parse optional fill expression.
+  int64_t FillExpr = 0;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    if (getLexer().isNot(AsmToken::Comma))
+      return TokError("unexpected token in '.org' directive");
+    Lex();
+
+    if (ParseAbsoluteExpression(FillExpr))
+      return true;
+
+    if (getLexer().isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in '.org' directive");
+  }
+
+  Lex();
+
+  // FIXME: Only limited forms of relocatable expressions are accepted here, it
+  // has to be relative to the current section.
+  getStreamer().EmitValueToOffset(Offset, FillExpr);
+
+  return false;
+}
+
+/// ParseDirectiveAlign
+///  ::= {.align, ...} expression [ , expression [ , expression ]]
+bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
+  CheckForValidSection();
+
+  SMLoc AlignmentLoc = getLexer().getLoc();
+  int64_t Alignment;
+  if (ParseAbsoluteExpression(Alignment))
+    return true;
+
+  SMLoc MaxBytesLoc;
+  bool HasFillExpr = false;
+  int64_t FillExpr = 0;
+  int64_t MaxBytesToFill = 0;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    if (getLexer().isNot(AsmToken::Comma))
+      return TokError("unexpected token in directive");
+    Lex();
+
+    // The fill expression can be omitted while specifying a maximum number of
+    // alignment bytes, e.g:
+    //  .align 3,,4
+    if (getLexer().isNot(AsmToken::Comma)) {
+      HasFillExpr = true;
+      if (ParseAbsoluteExpression(FillExpr))
+        return true;
+    }
+
+    if (getLexer().isNot(AsmToken::EndOfStatement)) {
+      if (getLexer().isNot(AsmToken::Comma))
+        return TokError("unexpected token in directive");
+      Lex();
+
+      MaxBytesLoc = getLexer().getLoc();
+      if (ParseAbsoluteExpression(MaxBytesToFill))
+        return true;
+
+      if (getLexer().isNot(AsmToken::EndOfStatement))
+        return TokError("unexpected token in directive");
+    }
+  }
+
+  Lex();
+
+  if (!HasFillExpr)
+    FillExpr = 0;
+
+  // Compute alignment in bytes.
+  if (IsPow2) {
+    // FIXME: Diagnose overflow.
+    if (Alignment >= 32) {
+      Error(AlignmentLoc, "invalid alignment value");
+      Alignment = 31;
+    }
+
+    Alignment = 1ULL << Alignment;
+  }
+
+  // Diagnose non-sensical max bytes to align.
+  if (MaxBytesLoc.isValid()) {
+    if (MaxBytesToFill < 1) {
+      Error(MaxBytesLoc, "alignment directive can never be satisfied in this "
+            "many bytes, ignoring maximum bytes expression");
+      MaxBytesToFill = 0;
+    }
+
+    if (MaxBytesToFill >= Alignment) {
+      Warning(MaxBytesLoc, "maximum bytes expression exceeds alignment and "
+              "has no effect");
+      MaxBytesToFill = 0;
+    }
+  }
+
+  // Check whether we should use optimal code alignment for this .align
+  // directive.
+  bool UseCodeAlign = getStreamer().getCurrentSection()->UseCodeAlign();
+  if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
+      ValueSize == 1 && UseCodeAlign) {
+    getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill);
+  } else {
+    // FIXME: Target specific behavior about how the "extra" bytes are filled.
+    getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize,
+                                       MaxBytesToFill);
+  }
+
+  return false;
+}
+
+/// ParseDirectiveSymbolAttribute
+///  ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
+bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      StringRef Name;
+
+      if (ParseIdentifier(Name))
+        return TokError("expected identifier in directive");
+
+      MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+      getStreamer().EmitSymbolAttribute(Sym, Attr);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      if (getLexer().isNot(AsmToken::Comma))
+        return TokError("unexpected token in directive");
+      Lex();
+    }
+  }
+
+  Lex();
+  return false;
+}
+
+/// ParseDirectiveComm
+///  ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
+bool AsmParser::ParseDirectiveComm(bool IsLocal) {
+  CheckForValidSection();
+
+  SMLoc IDLoc = getLexer().getLoc();
+  StringRef Name;
+  if (ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  int64_t Size;
+  SMLoc SizeLoc = getLexer().getLoc();
+  if (ParseAbsoluteExpression(Size))
+    return true;
+
+  int64_t Pow2Alignment = 0;
+  SMLoc Pow2AlignmentLoc;
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+    Pow2AlignmentLoc = getLexer().getLoc();
+    if (ParseAbsoluteExpression(Pow2Alignment))
+      return true;
+
+    // If this target takes alignments in bytes (not log) validate and convert.
+    if (Lexer.getMAI().getAlignmentIsInBytes()) {
+      if (!isPowerOf2_64(Pow2Alignment))
+        return Error(Pow2AlignmentLoc, "alignment must be a power of 2");
+      Pow2Alignment = Log2_64(Pow2Alignment);
+    }
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.comm' or '.lcomm' directive");
+
+  Lex();
+
+  // NOTE: a size of zero for a .comm should create a undefined symbol
+  // but a size of .lcomm creates a bss symbol of size zero.
+  if (Size < 0)
+    return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
+                 "be less than zero");
+
+  // NOTE: The alignment in the directive is a power of 2 value, the assembler
+  // may internally end up wanting an alignment in bytes.
+  // FIXME: Diagnose overflow.
+  if (Pow2Alignment < 0)
+    return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
+                 "alignment, can't be less than zero");
+
+  if (!Sym->isUndefined())
+    return Error(IDLoc, "invalid symbol redefinition");
+
+  // '.lcomm' is equivalent to '.zerofill'.
+  // Create the Symbol as a common or local common with Size and Pow2Alignment
+  if (IsLocal) {
+    getStreamer().EmitZerofill(Ctx.getMachOSection(
+                                 "__DATA", "__bss", MCSectionMachO::S_ZEROFILL,
+                                 0, SectionKind::getBSS()),
+                               Sym, Size, 1 << Pow2Alignment);
+    return false;
+  }
+
+  getStreamer().EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment);
+  return false;
+}
+
+/// ParseDirectiveAbort
+///  ::= .abort [... message ...]
+bool AsmParser::ParseDirectiveAbort() {
+  // FIXME: Use loc from directive.
+  SMLoc Loc = getLexer().getLoc();
+
+  StringRef Str = ParseStringToEndOfStatement();
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.abort' directive");
+
+  Lex();
+
+  if (Str.empty())
+    Error(Loc, ".abort detected. Assembly stopping.");
+  else
+    Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
+  // FIXME: Actually abort assembly here.
+
+  return false;
+}
+
+/// ParseDirectiveInclude
+///  ::= .include "filename"
+bool AsmParser::ParseDirectiveInclude() {
+  if (getLexer().isNot(AsmToken::String))
+    return TokError("expected string in '.include' directive");
+
+  std::string Filename = getTok().getString();
+  SMLoc IncludeLoc = getLexer().getLoc();
+  Lex();
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.include' directive");
+
+  // Strip the quotes.
+  Filename = Filename.substr(1, Filename.size()-2);
+
+  // Attempt to switch the lexer to the included file before consuming the end
+  // of statement to avoid losing it when we switch.
+  if (EnterIncludeFile(Filename)) {
+    Error(IncludeLoc, "Could not find include file '" + Filename + "'");
+    return true;
+  }
+
+  return false;
+}
+
+/// ParseDirectiveIf
+/// ::= .if expression
+bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
+  TheCondStack.push_back(TheCondState);
+  TheCondState.TheCond = AsmCond::IfCond;
+  if(TheCondState.Ignore) {
+    EatToEndOfStatement();
+  }
+  else {
+    int64_t ExprValue;
+    if (ParseAbsoluteExpression(ExprValue))
+      return true;
+
+    if (getLexer().isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in '.if' directive");
+
+    Lex();
+
+    TheCondState.CondMet = ExprValue;
+    TheCondState.Ignore = !TheCondState.CondMet;
+  }
+
+  return false;
+}
+
+bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
+  StringRef Name;
+  TheCondStack.push_back(TheCondState);
+  TheCondState.TheCond = AsmCond::IfCond;
+
+  if (TheCondState.Ignore) {
+    EatToEndOfStatement();
+  } else {
+    if (ParseIdentifier(Name))
+      return TokError("expected identifier after '.ifdef'");
+
+    Lex();
+
+    MCSymbol *Sym = getContext().LookupSymbol(Name);
+
+    if (expect_defined)
+      TheCondState.CondMet = (Sym != NULL && !Sym->isUndefined());
+    else
+      TheCondState.CondMet = (Sym == NULL || Sym->isUndefined());
+    TheCondState.Ignore = !TheCondState.CondMet;
+  }
+
+  return false;
+}
+
+/// ParseDirectiveElseIf
+/// ::= .elseif expression
+bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
+  if (TheCondState.TheCond != AsmCond::IfCond &&
+      TheCondState.TheCond != AsmCond::ElseIfCond)
+      Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
+                          " an .elseif");
+  TheCondState.TheCond = AsmCond::ElseIfCond;
+
+  bool LastIgnoreState = false;
+  if (!TheCondStack.empty())
+      LastIgnoreState = TheCondStack.back().Ignore;
+  if (LastIgnoreState || TheCondState.CondMet) {
+    TheCondState.Ignore = true;
+    EatToEndOfStatement();
+  }
+  else {
+    int64_t ExprValue;
+    if (ParseAbsoluteExpression(ExprValue))
+      return true;
+
+    if (getLexer().isNot(AsmToken::EndOfStatement))
+      return TokError("unexpected token in '.elseif' directive");
+
+    Lex();
+    TheCondState.CondMet = ExprValue;
+    TheCondState.Ignore = !TheCondState.CondMet;
+  }
+
+  return false;
+}
+
+/// ParseDirectiveElse
+/// ::= .else
+bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.else' directive");
+
+  Lex();
+
+  if (TheCondState.TheCond != AsmCond::IfCond &&
+      TheCondState.TheCond != AsmCond::ElseIfCond)
+      Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
+                          ".elseif");
+  TheCondState.TheCond = AsmCond::ElseCond;
+  bool LastIgnoreState = false;
+  if (!TheCondStack.empty())
+    LastIgnoreState = TheCondStack.back().Ignore;
+  if (LastIgnoreState || TheCondState.CondMet)
+    TheCondState.Ignore = true;
+  else
+    TheCondState.Ignore = false;
+
+  return false;
+}
+
+/// ParseDirectiveEndIf
+/// ::= .endif
+bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.endif' directive");
+
+  Lex();
+
+  if ((TheCondState.TheCond == AsmCond::NoCond) ||
+      TheCondStack.empty())
+    Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or "
+                        ".else");
+  if (!TheCondStack.empty()) {
+    TheCondState = TheCondStack.back();
+    TheCondStack.pop_back();
+  }
+
+  return false;
+}
+
+/// ParseDirectiveFile
+/// ::= .file [number] string
+bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
+  // FIXME: I'm not sure what this is.
+  int64_t FileNumber = -1;
+  SMLoc FileNumberLoc = getLexer().getLoc();
+  if (getLexer().is(AsmToken::Integer)) {
+    FileNumber = getTok().getIntVal();
+    Lex();
+
+    if (FileNumber < 1)
+      return TokError("file number less than one");
+  }
+
+  if (getLexer().isNot(AsmToken::String))
+    return TokError("unexpected token in '.file' directive");
+
+  StringRef Filename = getTok().getString();
+  Filename = Filename.substr(1, Filename.size()-2);
+  Lex();
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.file' directive");
+
+  if (FileNumber == -1)
+    getStreamer().EmitFileDirective(Filename);
+  else {
+    if (getStreamer().EmitDwarfFileDirective(FileNumber, Filename))
+      Error(FileNumberLoc, "file number already allocated");
+  }
+
+  return false;
+}
+
+/// ParseDirectiveLine
+/// ::= .line [number]
+bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    if (getLexer().isNot(AsmToken::Integer))
+      return TokError("unexpected token in '.line' directive");
+
+    int64_t LineNumber = getTok().getIntVal();
+    (void) LineNumber;
+    Lex();
+
+    // FIXME: Do something with the .line.
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.line' directive");
+
+  return false;
+}
+
+
+/// ParseDirectiveLoc
+/// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end]
+///                                [epilogue_begin] [is_stmt VALUE] [isa VALUE]
+/// The first number is a file number, must have been previously assigned with
+/// a .file directive, the second number is the line number and optionally the
+/// third number is a column position (zero if not specified).  The remaining
+/// optional items are .loc sub-directives.
+bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
+
+  if (getLexer().isNot(AsmToken::Integer))
+    return TokError("unexpected token in '.loc' directive");
+  int64_t FileNumber = getTok().getIntVal();
+  if (FileNumber < 1)
+    return TokError("file number less than one in '.loc' directive");
+  if (!getContext().isValidDwarfFileNumber(FileNumber))
+    return TokError("unassigned file number in '.loc' directive");
+  Lex();
+
+  int64_t LineNumber = 0;
+  if (getLexer().is(AsmToken::Integer)) {
+    LineNumber = getTok().getIntVal();
+    if (LineNumber < 1)
+      return TokError("line number less than one in '.loc' directive");
+    Lex();
+  }
+
+  int64_t ColumnPos = 0;
+  if (getLexer().is(AsmToken::Integer)) {
+    ColumnPos = getTok().getIntVal();
+    if (ColumnPos < 0)
+      return TokError("column position less than zero in '.loc' directive");
+    Lex();
+  }
+
+  unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
+  unsigned Isa = 0;
+  int64_t Discriminator = 0;
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      StringRef Name;
+      SMLoc Loc = getTok().getLoc();
+      if (getParser().ParseIdentifier(Name))
+        return TokError("unexpected token in '.loc' directive");
+
+      if (Name == "basic_block")
+        Flags |= DWARF2_FLAG_BASIC_BLOCK;
+      else if (Name == "prologue_end")
+        Flags |= DWARF2_FLAG_PROLOGUE_END;
+      else if (Name == "epilogue_begin")
+        Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
+      else if (Name == "is_stmt") {
+        SMLoc Loc = getTok().getLoc();
+        const MCExpr *Value;
+        if (getParser().ParseExpression(Value))
+          return true;
+        // The expression must be the constant 0 or 1.
+        if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+          int Value = MCE->getValue();
+          if (Value == 0)
+            Flags &= ~DWARF2_FLAG_IS_STMT;
+          else if (Value == 1)
+            Flags |= DWARF2_FLAG_IS_STMT;
+          else
+            return Error(Loc, "is_stmt value not 0 or 1");
+        }
+        else {
+          return Error(Loc, "is_stmt value not the constant value of 0 or 1");
+        }
+      }
+      else if (Name == "isa") {
+        SMLoc Loc = getTok().getLoc();
+        const MCExpr *Value;
+        if (getParser().ParseExpression(Value))
+          return true;
+        // The expression must be a constant greater or equal to 0.
+        if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+          int Value = MCE->getValue();
+          if (Value < 0)
+            return Error(Loc, "isa number less than zero");
+          Isa = Value;
+        }
+        else {
+          return Error(Loc, "isa number not a constant value");
+        }
+      }
+      else if (Name == "discriminator") {
+        if (getParser().ParseAbsoluteExpression(Discriminator))
+          return true;
+      }
+      else {
+        return Error(Loc, "unknown sub-directive in '.loc' directive");
+      }
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+    }
+  }
+
+  getStreamer().EmitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
+                                      Isa, Discriminator);
+
+  return false;
+}
+
+/// ParseDirectiveStabs
+/// ::= .stabs string, number, number, number
+bool GenericAsmParser::ParseDirectiveStabs(StringRef Directive,
+                                           SMLoc DirectiveLoc) {
+  return TokError("unsupported directive '" + Directive + "'");
+}
+
+/// ParseDirectiveCFIStartProc
+/// ::= .cfi_startproc
+bool GenericAsmParser::ParseDirectiveCFIStartProc(StringRef,
+                                                  SMLoc DirectiveLoc) {
+  return getStreamer().EmitCFIStartProc();
+}
+
+/// ParseDirectiveCFIEndProc
+/// ::= .cfi_endproc
+bool GenericAsmParser::ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc) {
+  return getStreamer().EmitCFIEndProc();
+}
+
+/// ParseRegisterOrRegisterNumber - parse register name or number.
+bool GenericAsmParser::ParseRegisterOrRegisterNumber(int64_t &Register,
+                                                     SMLoc DirectiveLoc) {
+  unsigned RegNo;
+
+  if (getLexer().is(AsmToken::Percent)) {
+    if (getParser().getTargetParser().ParseRegister(RegNo, DirectiveLoc,
+      DirectiveLoc))
+      return true;
+    Register = getContext().getTargetAsmInfo().getDwarfRegNum(RegNo, true);
+  } else
+    return getParser().ParseAbsoluteExpression(Register);
+
+  return false;
+}
+
+/// ParseDirectiveCFIDefCfa
+/// ::= .cfi_def_cfa register,  offset
+bool GenericAsmParser::ParseDirectiveCFIDefCfa(StringRef,
+                                               SMLoc DirectiveLoc) {
+  int64_t Register = 0;
+  if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+    return true;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  int64_t Offset = 0;
+  if (getParser().ParseAbsoluteExpression(Offset))
+    return true;
+
+  return getStreamer().EmitCFIDefCfa(Register, Offset);
+}
+
+/// ParseDirectiveCFIDefCfaOffset
+/// ::= .cfi_def_cfa_offset offset
+bool GenericAsmParser::ParseDirectiveCFIDefCfaOffset(StringRef,
+                                                     SMLoc DirectiveLoc) {
+  int64_t Offset = 0;
+  if (getParser().ParseAbsoluteExpression(Offset))
+    return true;
+
+  return getStreamer().EmitCFIDefCfaOffset(Offset);
+}
+
+/// ParseDirectiveCFIDefCfaRegister
+/// ::= .cfi_def_cfa_register register
+bool GenericAsmParser::ParseDirectiveCFIDefCfaRegister(StringRef,
+                                                       SMLoc DirectiveLoc) {
+  int64_t Register = 0;
+  if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+    return true;
+
+  return getStreamer().EmitCFIDefCfaRegister(Register);
+}
+
+/// ParseDirectiveCFIOffset
+/// ::= .cfi_off register, offset
+bool GenericAsmParser::ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc) {
+  int64_t Register = 0;
+  int64_t Offset = 0;
+
+  if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc))
+    return true;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  if (getParser().ParseAbsoluteExpression(Offset))
+    return true;
+
+  return getStreamer().EmitCFIOffset(Register, Offset);
+}
+
+static bool isValidEncoding(int64_t Encoding) {
+  if (Encoding & ~0xff)
+    return false;
+
+  if (Encoding == dwarf::DW_EH_PE_omit)
+    return true;
+
+  const unsigned Format = Encoding & 0xf;
+  if (Format != dwarf::DW_EH_PE_absptr && Format != dwarf::DW_EH_PE_udata2 &&
+      Format != dwarf::DW_EH_PE_udata4 && Format != dwarf::DW_EH_PE_udata8 &&
+      Format != dwarf::DW_EH_PE_sdata2 && Format != dwarf::DW_EH_PE_sdata4 &&
+      Format != dwarf::DW_EH_PE_sdata8 && Format != dwarf::DW_EH_PE_signed)
+    return false;
+
+  const unsigned Application = Encoding & 0x70;
+  if (Application != dwarf::DW_EH_PE_absptr &&
+      Application != dwarf::DW_EH_PE_pcrel)
+    return false;
+
+  return true;
+}
+
+/// ParseDirectiveCFIPersonalityOrLsda
+/// ::= .cfi_personality encoding, [symbol_name]
+/// ::= .cfi_lsda encoding, [symbol_name]
+bool GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda(StringRef IDVal,
+                                                    SMLoc DirectiveLoc) {
+  int64_t Encoding = 0;
+  if (getParser().ParseAbsoluteExpression(Encoding))
+    return true;
+  if (Encoding == dwarf::DW_EH_PE_omit)
+    return false;
+
+  if (!isValidEncoding(Encoding))
+    return TokError("unsupported encoding.");
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  if (IDVal == ".cfi_personality")
+    return getStreamer().EmitCFIPersonality(Sym, Encoding);
+  else {
+    assert(IDVal == ".cfi_lsda");
+    return getStreamer().EmitCFILsda(Sym, Encoding);
+  }
+}
+
+/// ParseDirectiveCFIRememberState
+/// ::= .cfi_remember_state
+bool GenericAsmParser::ParseDirectiveCFIRememberState(StringRef IDVal,
+                                                      SMLoc DirectiveLoc) {
+  return getStreamer().EmitCFIRememberState();
+}
+
+/// ParseDirectiveCFIRestoreState
+/// ::= .cfi_remember_state
+bool GenericAsmParser::ParseDirectiveCFIRestoreState(StringRef IDVal,
+                                                     SMLoc DirectiveLoc) {
+  return getStreamer().EmitCFIRestoreState();
+}
+
+/// ParseDirectiveMacrosOnOff
+/// ::= .macros_on
+/// ::= .macros_off
+bool GenericAsmParser::ParseDirectiveMacrosOnOff(StringRef Directive,
+                                                 SMLoc DirectiveLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(getLexer().getLoc(),
+                 "unexpected token in '" + Directive + "' directive");
+
+  getParser().MacrosEnabled = Directive == ".macros_on";
+
+  return false;
+}
+
+/// ParseDirectiveMacro
+/// ::= .macro name
+bool GenericAsmParser::ParseDirectiveMacro(StringRef Directive,
+                                           SMLoc DirectiveLoc) {
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.macro' directive");
+
+  // Eat the end of statement.
+  Lex();
+
+  AsmToken EndToken, StartToken = getTok();
+
+  // Lex the macro definition.
+  for (;;) {
+    // Check whether we have reached the end of the file.
+    if (getLexer().is(AsmToken::Eof))
+      return Error(DirectiveLoc, "no matching '.endmacro' in definition");
+
+    // Otherwise, check whether we have reach the .endmacro.
+    if (getLexer().is(AsmToken::Identifier) &&
+        (getTok().getIdentifier() == ".endm" ||
+         getTok().getIdentifier() == ".endmacro")) {
+      EndToken = getTok();
+      Lex();
+      if (getLexer().isNot(AsmToken::EndOfStatement))
+        return TokError("unexpected token in '" + EndToken.getIdentifier() +
+                        "' directive");
+      break;
+    }
+
+    // Otherwise, scan til the end of the statement.
+    getParser().EatToEndOfStatement();
+  }
+
+  if (getParser().MacroMap.lookup(Name)) {
+    return Error(DirectiveLoc, "macro '" + Name + "' is already defined");
+  }
+
+  const char *BodyStart = StartToken.getLoc().getPointer();
+  const char *BodyEnd = EndToken.getLoc().getPointer();
+  StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart);
+  getParser().MacroMap[Name] = new Macro(Name, Body);
+  return false;
+}
+
+/// ParseDirectiveEndMacro
+/// ::= .endm
+/// ::= .endmacro
+bool GenericAsmParser::ParseDirectiveEndMacro(StringRef Directive,
+                                           SMLoc DirectiveLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '" + Directive + "' directive");
+
+  // If we are inside a macro instantiation, terminate the current
+  // instantiation.
+  if (!getParser().ActiveMacros.empty()) {
+    getParser().HandleMacroExit();
+    return false;
+  }
+
+  // Otherwise, this .endmacro is a stray entry in the file; well formed
+  // .endmacro directives are handled during the macro definition parsing.
+  return TokError("unexpected '" + Directive + "' in file, "
+                  "no current macro definition");
+}
+
+bool GenericAsmParser::ParseDirectiveLEB128(StringRef DirName, SMLoc) {
+  getParser().CheckForValidSection();
+
+  const MCExpr *Value;
+
+  if (getParser().ParseExpression(Value))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  if (DirName[1] == 's')
+    getStreamer().EmitSLEB128Value(Value);
+  else
+    getStreamer().EmitULEB128Value(Value);
+
+  return false;
+}
+
+
+/// \brief Create an MCAsmParser instance.
+MCAsmParser *llvm::createMCAsmParser(const Target &T, SourceMgr &SM,
+                                     MCContext &C, MCStreamer &Out,
+                                     const MCAsmInfo &MAI) {
+  return new AsmParser(T, SM, C, Out, MAI);
+}
diff --git a/final/lib/MC/MCParser/CMakeLists.txt b/final/lib/MC/MCParser/CMakeLists.txt
new file mode 100644
index 00000000000..eaea9f6cd3c
--- /dev/null
+++ b/final/lib/MC/MCParser/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_llvm_library(LLVMMCParser
+  AsmLexer.cpp
+  AsmParser.cpp
+  COFFAsmParser.cpp
+  DarwinAsmParser.cpp
+  ELFAsmParser.cpp
+  MCAsmLexer.cpp
+  MCAsmParser.cpp
+  MCAsmParserExtension.cpp
+  TargetAsmParser.cpp
+  )
diff --git a/final/lib/MC/MCParser/COFFAsmParser.cpp b/final/lib/MC/MCParser/COFFAsmParser.cpp
new file mode 100644
index 00000000000..5ecab03b00f
--- /dev/null
+++ b/final/lib/MC/MCParser/COFFAsmParser.cpp
@@ -0,0 +1,144 @@
+//===- COFFAsmParser.cpp - COFF Assembly Parser ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/COFF.h"
+using namespace llvm;
+
+namespace {
+
+class COFFAsmParser : public MCAsmParserExtension {
+  template<bool (COFFAsmParser::*Handler)(StringRef, SMLoc)>
+  void AddDirectiveHandler(StringRef Directive) {
+    getParser().AddDirectiveHandler(this, Directive,
+                                    HandleDirective<COFFAsmParser, Handler>);
+  }
+
+  bool ParseSectionSwitch(StringRef Section,
+                          unsigned Characteristics,
+                          SectionKind Kind);
+
+  virtual void Initialize(MCAsmParser &Parser) {
+    // Call the base implementation.
+    MCAsmParserExtension::Initialize(Parser);
+
+    AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveText>(".text");
+    AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveData>(".data");
+    AddDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveBSS>(".bss");
+    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveDef>(".def");
+    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveScl>(".scl");
+    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveType>(".type");
+    AddDirectiveHandler<&COFFAsmParser::ParseDirectiveEndef>(".endef");
+  }
+
+  bool ParseSectionDirectiveText(StringRef, SMLoc) {
+    return ParseSectionSwitch(".text",
+                              COFF::IMAGE_SCN_CNT_CODE
+                            | COFF::IMAGE_SCN_MEM_EXECUTE
+                            | COFF::IMAGE_SCN_MEM_READ,
+                              SectionKind::getText());
+  }
+  bool ParseSectionDirectiveData(StringRef, SMLoc) {
+    return ParseSectionSwitch(".data",
+                              COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
+                            | COFF::IMAGE_SCN_MEM_READ
+                            | COFF::IMAGE_SCN_MEM_WRITE,
+                              SectionKind::getDataRel());
+  }
+  bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
+    return ParseSectionSwitch(".bss",
+                              COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA
+                            | COFF::IMAGE_SCN_MEM_READ
+                            | COFF::IMAGE_SCN_MEM_WRITE,
+                              SectionKind::getBSS());
+  }
+
+  bool ParseDirectiveDef(StringRef, SMLoc);
+  bool ParseDirectiveScl(StringRef, SMLoc);
+  bool ParseDirectiveType(StringRef, SMLoc);
+  bool ParseDirectiveEndef(StringRef, SMLoc);
+
+public:
+  COFFAsmParser() {}
+};
+
+} // end annonomous namespace.
+
+bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
+                                       unsigned Characteristics,
+                                       SectionKind Kind) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in section switching directive");
+  Lex();
+
+  getStreamer().SwitchSection(getContext().getCOFFSection(
+                                Section, Characteristics, Kind));
+
+  return false;
+}
+
+bool COFFAsmParser::ParseDirectiveDef(StringRef, SMLoc) {
+  StringRef SymbolName;
+
+  if (getParser().ParseIdentifier(SymbolName))
+    return TokError("expected identifier in directive");
+
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
+
+  getStreamer().BeginCOFFSymbolDef(Sym);
+
+  Lex();
+  return false;
+}
+
+bool COFFAsmParser::ParseDirectiveScl(StringRef, SMLoc) {
+  int64_t SymbolStorageClass;
+  if (getParser().ParseAbsoluteExpression(SymbolStorageClass))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  Lex();
+  getStreamer().EmitCOFFSymbolStorageClass(SymbolStorageClass);
+  return false;
+}
+
+bool COFFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
+  int64_t Type;
+  if (getParser().ParseAbsoluteExpression(Type))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  Lex();
+  getStreamer().EmitCOFFSymbolType(Type);
+  return false;
+}
+
+bool COFFAsmParser::ParseDirectiveEndef(StringRef, SMLoc) {
+  Lex();
+  getStreamer().EndCOFFSymbolDef();
+  return false;
+}
+
+namespace llvm {
+
+MCAsmParserExtension *createCOFFAsmParser() {
+  return new COFFAsmParser;
+}
+
+}
diff --git a/final/lib/MC/MCParser/DarwinAsmParser.cpp b/final/lib/MC/MCParser/DarwinAsmParser.cpp
new file mode 100644
index 00000000000..44f234566af
--- /dev/null
+++ b/final/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -0,0 +1,661 @@
+//===- DarwinAsmParser.cpp - Darwin (Mach-O) Assembly Parser --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+using namespace llvm;
+
+namespace {
+
+/// \brief Implementation of directive handling which is shared across all
+/// Darwin targets.
+class DarwinAsmParser : public MCAsmParserExtension {
+  template<bool (DarwinAsmParser::*Handler)(StringRef, SMLoc)>
+  void AddDirectiveHandler(StringRef Directive) {
+    getParser().AddDirectiveHandler(this, Directive,
+                                    HandleDirective<DarwinAsmParser, Handler>);
+  }
+
+  bool ParseSectionSwitch(const char *Segment, const char *Section,
+                          unsigned TAA = 0, unsigned ImplicitAlign = 0,
+                          unsigned StubSize = 0);
+
+public:
+  DarwinAsmParser() {}
+
+  virtual void Initialize(MCAsmParser &Parser) {
+    // Call the base implementation.
+    this->MCAsmParserExtension::Initialize(Parser);
+
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDesc>(".desc");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveLsym>(".lsym");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols>(
+      ".subsections_via_symbols");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".dump");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveDumpOrLoad>(".load");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSection>(".section");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogUnique>(
+      ".secure_log_unique");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveSecureLogReset>(
+      ".secure_log_reset");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveTBSS>(".tbss");
+    AddDirectiveHandler<&DarwinAsmParser::ParseDirectiveZerofill>(".zerofill");
+
+    // Special section directives.
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>(".const_data");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstructor>(".constructor");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveCString>(".cstring");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveData>(".data");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDestructor>(".destructor");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveDyld>(".dyld");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit0>(".fvmlib_init0");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveFVMLibInit1>(".fvmlib_init1");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers>(".lazy_symbol_pointer");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral16>(".literal16");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral4>(".literal4");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveLiteral8>(".literal8");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModInitFunc>(".mod_init_func");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveModTermFunc>(".mod_term_func");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers>(".non_lazy_symbol_pointer");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth>(".objc_cat_cls_meth");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth>(".objc_cat_inst_meth");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCCategory>(".objc_category");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClass>(".objc_class");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassNames>(".objc_class_names");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClassVars>(".objc_class_vars");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsMeth>(".objc_cls_meth");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCClsRefs>(".objc_cls_refs");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstMeth>(".objc_inst_meth");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars>(".objc_instance_vars");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs>(".objc_message_refs");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMetaClass>(".objc_meta_class");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames>(".objc_meth_var_names");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes>(".objc_meth_var_types");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo>(".objc_module_info");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCProtocol>(".objc_protocol");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs>(".objc_selector_strs");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCStringObject>(".objc_string_object");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveObjCSymbols>(".objc_symbols");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectivePICSymbolStub>(".picsymbol_stub");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticConst>(".static_const");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveStaticData>(".static_data");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveSymbolStub>(".symbol_stub");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTData>(".tdata");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveText>(".text");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>(".thread_init_func");
+    AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTLV>(".tlv");
+  }
+
+  bool ParseDirectiveDesc(StringRef, SMLoc);
+  bool ParseDirectiveDumpOrLoad(StringRef, SMLoc);
+  bool ParseDirectiveLsym(StringRef, SMLoc);
+  bool ParseDirectiveSection(StringRef, SMLoc);
+  bool ParseDirectiveSecureLogReset(StringRef, SMLoc);
+  bool ParseDirectiveSecureLogUnique(StringRef, SMLoc);
+  bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc);
+  bool ParseDirectiveTBSS(StringRef, SMLoc);
+  bool ParseDirectiveZerofill(StringRef, SMLoc);
+
+  // Named Section Directive
+  bool ParseSectionDirectiveConst(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT", "__const");
+  }
+  bool ParseSectionDirectiveStaticConst(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT", "__static_const");
+  }
+  bool ParseSectionDirectiveCString(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT","__cstring",
+                              MCSectionMachO::S_CSTRING_LITERALS);
+  }
+  bool ParseSectionDirectiveLiteral4(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT", "__literal4",
+                              MCSectionMachO::S_4BYTE_LITERALS, 4);
+  }
+  bool ParseSectionDirectiveLiteral8(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT", "__literal8",
+                              MCSectionMachO::S_8BYTE_LITERALS, 8);
+  }
+  bool ParseSectionDirectiveLiteral16(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT","__literal16",
+                              MCSectionMachO::S_16BYTE_LITERALS, 16);
+  }
+  bool ParseSectionDirectiveConstructor(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT","__constructor");
+  }
+  bool ParseSectionDirectiveDestructor(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT","__destructor");
+  }
+  bool ParseSectionDirectiveFVMLibInit0(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT","__fvmlib_init0");
+  }
+  bool ParseSectionDirectiveFVMLibInit1(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT","__fvmlib_init1");
+  }
+  bool ParseSectionDirectiveSymbolStub(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT","__symbol_stub",
+                              MCSectionMachO::S_SYMBOL_STUBS |
+                              MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                              // FIXME: Different on PPC and ARM.
+                              0, 16);
+  }
+  bool ParseSectionDirectivePICSymbolStub(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT","__picsymbol_stub",
+                              MCSectionMachO::S_SYMBOL_STUBS |
+                              MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 0, 26);
+  }
+  bool ParseSectionDirectiveData(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__data");
+  }
+  bool ParseSectionDirectiveStaticData(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__static_data");
+  }
+  bool ParseSectionDirectiveNonLazySymbolPointers(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__nl_symbol_ptr",
+                              MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, 4);
+  }
+  bool ParseSectionDirectiveLazySymbolPointers(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__la_symbol_ptr",
+                              MCSectionMachO::S_LAZY_SYMBOL_POINTERS, 4);
+  }
+  bool ParseSectionDirectiveDyld(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__dyld");
+  }
+  bool ParseSectionDirectiveModInitFunc(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__mod_init_func",
+                              MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, 4);
+  }
+  bool ParseSectionDirectiveModTermFunc(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__mod_term_func",
+                              MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, 4);
+  }
+  bool ParseSectionDirectiveConstData(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__const");
+  }
+  bool ParseSectionDirectiveObjCClass(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__class",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCMetaClass(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__meta_class",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCCatClsMeth(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__cat_cls_meth",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCCatInstMeth(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__cat_inst_meth",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCProtocol(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__protocol",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCStringObject(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__string_object",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCClsMeth(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__cls_meth",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCInstMeth(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__inst_meth",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCClsRefs(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__cls_refs",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP |
+                              MCSectionMachO::S_LITERAL_POINTERS, 4);
+  }
+  bool ParseSectionDirectiveObjCMessageRefs(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__message_refs",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP |
+                              MCSectionMachO::S_LITERAL_POINTERS, 4);
+  }
+  bool ParseSectionDirectiveObjCSymbols(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__symbols",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCCategory(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__category",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCClassVars(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__class_vars",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCInstanceVars(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__instance_vars",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCModuleInfo(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__module_info",
+                              MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+  }
+  bool ParseSectionDirectiveObjCClassNames(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT", "__cstring",
+                              MCSectionMachO::S_CSTRING_LITERALS);
+  }
+  bool ParseSectionDirectiveObjCMethVarTypes(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT", "__cstring",
+                              MCSectionMachO::S_CSTRING_LITERALS);
+  }
+  bool ParseSectionDirectiveObjCMethVarNames(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT", "__cstring",
+                              MCSectionMachO::S_CSTRING_LITERALS);
+  }
+  bool ParseSectionDirectiveObjCSelectorStrs(StringRef, SMLoc) {
+    return ParseSectionSwitch("__OBJC", "__selector_strs",
+                              MCSectionMachO::S_CSTRING_LITERALS);
+  }
+  bool ParseSectionDirectiveTData(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__thread_data",
+                              MCSectionMachO::S_THREAD_LOCAL_REGULAR);
+  }
+  bool ParseSectionDirectiveText(StringRef, SMLoc) {
+    return ParseSectionSwitch("__TEXT", "__text",
+                              MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+  }
+  bool ParseSectionDirectiveTLV(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__thread_vars",
+                              MCSectionMachO::S_THREAD_LOCAL_VARIABLES);
+  }
+  bool ParseSectionDirectiveThreadInitFunc(StringRef, SMLoc) {
+    return ParseSectionSwitch("__DATA", "__thread_init",
+                         MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS);
+  }
+
+};
+
+}
+
+bool DarwinAsmParser::ParseSectionSwitch(const char *Segment,
+                                         const char *Section,
+                                         unsigned TAA, unsigned Align,
+                                         unsigned StubSize) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in section switching directive");
+  Lex();
+
+  // FIXME: Arch specific.
+  bool isText = StringRef(Segment) == "__TEXT";  // FIXME: Hack.
+  getStreamer().SwitchSection(getContext().getMachOSection(
+                                Segment, Section, TAA, StubSize,
+                                isText ? SectionKind::getText()
+                                       : SectionKind::getDataRel()));
+
+  // Set the implicit alignment, if any.
+  //
+  // FIXME: This isn't really what 'as' does; I think it just uses the implicit
+  // alignment on the section (e.g., if one manually inserts bytes into the
+  // section, then just issuing the section switch directive will not realign
+  // the section. However, this is arguably more reasonable behavior, and there
+  // is no good reason for someone to intentionally emit incorrectly sized
+  // values into the implicitly aligned sections.
+  if (Align)
+    getStreamer().EmitValueToAlignment(Align, 0, 1, 0);
+
+  return false;
+}
+
+/// ParseDirectiveDesc
+///  ::= .desc identifier , expression
+bool DarwinAsmParser::ParseDirectiveDesc(StringRef, SMLoc) {
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in '.desc' directive");
+  Lex();
+
+  int64_t DescValue;
+  if (getParser().ParseAbsoluteExpression(DescValue))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.desc' directive");
+
+  Lex();
+
+  // Set the n_desc field of this Symbol to this DescValue
+  getStreamer().EmitSymbolDesc(Sym, DescValue);
+
+  return false;
+}
+
+/// ParseDirectiveDumpOrLoad
+///  ::= ( .dump | .load ) "filename"
+bool DarwinAsmParser::ParseDirectiveDumpOrLoad(StringRef Directive,
+                                               SMLoc IDLoc) {
+  bool IsDump = Directive == ".dump";
+  if (getLexer().isNot(AsmToken::String))
+    return TokError("expected string in '.dump' or '.load' directive");
+
+  Lex();
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.dump' or '.load' directive");
+
+  Lex();
+
+  // FIXME: If/when .dump and .load are implemented they will be done in the
+  // the assembly parser and not have any need for an MCStreamer API.
+  if (IsDump)
+    Warning(IDLoc, "ignoring directive .dump for now");
+  else
+    Warning(IDLoc, "ignoring directive .load for now");
+
+  return false;
+}
+
+/// ParseDirectiveLsym
+///  ::= .lsym identifier , expression
+bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) {
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in '.lsym' directive");
+  Lex();
+
+  const MCExpr *Value;
+  if (getParser().ParseExpression(Value))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.lsym' directive");
+
+  Lex();
+
+  // We don't currently support this directive.
+  //
+  // FIXME: Diagnostic location!
+  (void) Sym;
+  return TokError("directive '.lsym' is unsupported");
+}
+
+/// ParseDirectiveSection:
+///   ::= .section identifier (',' identifier)*
+bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+  SMLoc Loc = getLexer().getLoc();
+
+  StringRef SectionName;
+  if (getParser().ParseIdentifier(SectionName))
+    return Error(Loc, "expected identifier after '.section' directive");
+
+  // Verify there is a following comma.
+  if (!getLexer().is(AsmToken::Comma))
+    return TokError("unexpected token in '.section' directive");
+
+  std::string SectionSpec = SectionName;
+  SectionSpec += ",";
+
+  // Add all the tokens until the end of the line, ParseSectionSpecifier will
+  // handle this.
+  StringRef EOL = getLexer().LexUntilEndOfStatement();
+  SectionSpec.append(EOL.begin(), EOL.end());
+
+  Lex();
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.section' directive");
+  Lex();
+
+
+  StringRef Segment, Section;
+  unsigned TAA, StubSize;
+  std::string ErrorStr =
+    MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section,
+                                          TAA, StubSize);
+
+  if (!ErrorStr.empty())
+    return Error(Loc, ErrorStr.c_str());
+
+  // FIXME: Arch specific.
+  bool isText = Segment == "__TEXT";  // FIXME: Hack.
+  getStreamer().SwitchSection(getContext().getMachOSection(
+                                Segment, Section, TAA, StubSize,
+                                isText ? SectionKind::getText()
+                                : SectionKind::getDataRel()));
+  return false;
+}
+
+/// ParseDirectiveSecureLogUnique
+///  ::= .secure_log_unique ... message ...
+bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
+  StringRef LogMessage = getParser().ParseStringToEndOfStatement();
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.secure_log_unique' directive");
+
+  if (getContext().getSecureLogUsed() != false)
+    return Error(IDLoc, ".secure_log_unique specified multiple times");
+
+  // Get the secure log path.
+  const char *SecureLogFile = getContext().getSecureLogFile();
+  if (SecureLogFile == NULL)
+    return Error(IDLoc, ".secure_log_unique used but AS_SECURE_LOG_FILE "
+                 "environment variable unset.");
+
+  // Open the secure log file if we haven't already.
+  raw_ostream *OS = getContext().getSecureLog();
+  if (OS == NULL) {
+    std::string Err;
+    OS = new raw_fd_ostream(SecureLogFile, Err, raw_fd_ostream::F_Append);
+    if (!Err.empty()) {
+       delete OS;
+       return Error(IDLoc, Twine("can't open secure log file: ") +
+                    SecureLogFile + " (" + Err + ")");
+    }
+    getContext().setSecureLog(OS);
+  }
+
+  // Write the message.
+  int CurBuf = getSourceManager().FindBufferContainingLoc(IDLoc);
+  *OS << getSourceManager().getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
+      << ":" << getSourceManager().FindLineNumber(IDLoc, CurBuf) << ":"
+      << LogMessage + "\n";
+
+  getContext().setSecureLogUsed(true);
+
+  return false;
+}
+
+/// ParseDirectiveSecureLogReset
+///  ::= .secure_log_reset
+bool DarwinAsmParser::ParseDirectiveSecureLogReset(StringRef, SMLoc IDLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.secure_log_reset' directive");
+
+  Lex();
+
+  getContext().setSecureLogUsed(false);
+
+  return false;
+}
+
+/// ParseDirectiveSubsectionsViaSymbols
+///  ::= .subsections_via_symbols
+bool DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.subsections_via_symbols' directive");
+
+  Lex();
+
+  getStreamer().EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+
+  return false;
+}
+
+/// ParseDirectiveTBSS
+///  ::= .tbss identifier, size, align
+bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) {
+  SMLoc IDLoc = getLexer().getLoc();
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  int64_t Size;
+  SMLoc SizeLoc = getLexer().getLoc();
+  if (getParser().ParseAbsoluteExpression(Size))
+    return true;
+
+  int64_t Pow2Alignment = 0;
+  SMLoc Pow2AlignmentLoc;
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+    Pow2AlignmentLoc = getLexer().getLoc();
+    if (getParser().ParseAbsoluteExpression(Pow2Alignment))
+      return true;
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.tbss' directive");
+
+  Lex();
+
+  if (Size < 0)
+    return Error(SizeLoc, "invalid '.tbss' directive size, can't be less than"
+                 "zero");
+
+  // FIXME: Diagnose overflow.
+  if (Pow2Alignment < 0)
+    return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less"
+                 "than zero");
+
+  if (!Sym->isUndefined())
+    return Error(IDLoc, "invalid symbol redefinition");
+
+  getStreamer().EmitTBSSSymbol(getContext().getMachOSection(
+                                 "__DATA", "__thread_bss",
+                                 MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
+                                 0, SectionKind::getThreadBSS()),
+                               Sym, Size, 1 << Pow2Alignment);
+
+  return false;
+}
+
+/// ParseDirectiveZerofill
+///  ::= .zerofill segname , sectname [, identifier , size_expression [
+///      , align_expression ]]
+bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) {
+  StringRef Segment;
+  if (getParser().ParseIdentifier(Segment))
+    return TokError("expected segment name after '.zerofill' directive");
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  StringRef Section;
+  if (getParser().ParseIdentifier(Section))
+    return TokError("expected section name after comma in '.zerofill' "
+                    "directive");
+
+  // If this is the end of the line all that was wanted was to create the
+  // the section but with no symbol.
+  if (getLexer().is(AsmToken::EndOfStatement)) {
+    // Create the zerofill section but no symbol
+    getStreamer().EmitZerofill(getContext().getMachOSection(
+                                 Segment, Section, MCSectionMachO::S_ZEROFILL,
+                                 0, SectionKind::getBSS()));
+    return false;
+  }
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  SMLoc IDLoc = getLexer().getLoc();
+  StringRef IDStr;
+  if (getParser().ParseIdentifier(IDStr))
+    return TokError("expected identifier in directive");
+
+  // handle the identifier as the key symbol.
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(IDStr);
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  int64_t Size;
+  SMLoc SizeLoc = getLexer().getLoc();
+  if (getParser().ParseAbsoluteExpression(Size))
+    return true;
+
+  int64_t Pow2Alignment = 0;
+  SMLoc Pow2AlignmentLoc;
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+    Pow2AlignmentLoc = getLexer().getLoc();
+    if (getParser().ParseAbsoluteExpression(Pow2Alignment))
+      return true;
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.zerofill' directive");
+
+  Lex();
+
+  if (Size < 0)
+    return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less "
+                 "than zero");
+
+  // NOTE: The alignment in the directive is a power of 2 value, the assembler
+  // may internally end up wanting an alignment in bytes.
+  // FIXME: Diagnose overflow.
+  if (Pow2Alignment < 0)
+    return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, "
+                 "can't be less than zero");
+
+  if (!Sym->isUndefined())
+    return Error(IDLoc, "invalid symbol redefinition");
+
+  // Create the zerofill Symbol with Size and Pow2Alignment
+  //
+  // FIXME: Arch specific.
+  getStreamer().EmitZerofill(getContext().getMachOSection(
+                               Segment, Section, MCSectionMachO::S_ZEROFILL,
+                               0, SectionKind::getBSS()),
+                             Sym, Size, 1 << Pow2Alignment);
+
+  return false;
+}
+
+namespace llvm {
+
+MCAsmParserExtension *createDarwinAsmParser() {
+  return new DarwinAsmParser;
+}
+
+}
diff --git a/final/lib/MC/MCParser/ELFAsmParser.cpp b/final/lib/MC/MCParser/ELFAsmParser.cpp
new file mode 100644
index 00000000000..dcf689a6f0e
--- /dev/null
+++ b/final/lib/MC/MCParser/ELFAsmParser.cpp
@@ -0,0 +1,533 @@
+//===- ELFAsmParser.cpp - ELF Assembly Parser -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+namespace {
+
+class ELFAsmParser : public MCAsmParserExtension {
+  template<bool (ELFAsmParser::*Handler)(StringRef, SMLoc)>
+  void AddDirectiveHandler(StringRef Directive) {
+    getParser().AddDirectiveHandler(this, Directive,
+                                    HandleDirective<ELFAsmParser, Handler>);
+  }
+
+  bool ParseSectionSwitch(StringRef Section, unsigned Type,
+                          unsigned Flags, SectionKind Kind);
+  bool SeenIdent;
+
+public:
+  ELFAsmParser() : SeenIdent(false) {
+    BracketExpressionsSupported = true;
+  }
+
+  virtual void Initialize(MCAsmParser &Parser) {
+    // Call the base implementation.
+    this->MCAsmParserExtension::Initialize(Parser);
+
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveData>(".data");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveText>(".text");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveBSS>(".bss");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveRoData>(".rodata");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTData>(".tdata");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveTBSS>(".tbss");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRel>(".data.rel");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRo>(".data.rel.ro");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveDataRelRoLocal>(".data.rel.ro.local");
+    AddDirectiveHandler<&ELFAsmParser::ParseSectionDirectiveEhFrame>(".eh_frame");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSection>(".section");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectivePushSection>(".pushsection");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectivePopSection>(".popsection");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSize>(".size");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectivePrevious>(".previous");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveType>(".type");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveIdent>(".ident");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveSymver>(".symver");
+    AddDirectiveHandler<&ELFAsmParser::ParseDirectiveWeakref>(".weakref");
+  }
+
+  // FIXME: Part of this logic is duplicated in the MCELFStreamer. What is
+  // the best way for us to get access to it?
+  bool ParseSectionDirectiveData(StringRef, SMLoc) {
+    return ParseSectionSwitch(".data", ELF::SHT_PROGBITS,
+                              ELF::SHF_WRITE |ELF::SHF_ALLOC,
+                              SectionKind::getDataRel());
+  }
+  bool ParseSectionDirectiveText(StringRef, SMLoc) {
+    return ParseSectionSwitch(".text", ELF::SHT_PROGBITS,
+                              ELF::SHF_EXECINSTR |
+                              ELF::SHF_ALLOC, SectionKind::getText());
+  }
+  bool ParseSectionDirectiveBSS(StringRef, SMLoc) {
+    return ParseSectionSwitch(".bss", ELF::SHT_NOBITS,
+                              ELF::SHF_WRITE |
+                              ELF::SHF_ALLOC, SectionKind::getBSS());
+  }
+  bool ParseSectionDirectiveRoData(StringRef, SMLoc) {
+    return ParseSectionSwitch(".rodata", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC,
+                              SectionKind::getReadOnly());
+  }
+  bool ParseSectionDirectiveTData(StringRef, SMLoc) {
+    return ParseSectionSwitch(".tdata", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_TLS | ELF::SHF_WRITE,
+                              SectionKind::getThreadData());
+  }
+  bool ParseSectionDirectiveTBSS(StringRef, SMLoc) {
+    return ParseSectionSwitch(".tbss", ELF::SHT_NOBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_TLS | ELF::SHF_WRITE,
+                              SectionKind::getThreadBSS());
+  }
+  bool ParseSectionDirectiveDataRel(StringRef, SMLoc) {
+    return ParseSectionSwitch(".data.rel", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_WRITE,
+                              SectionKind::getDataRel());
+  }
+  bool ParseSectionDirectiveDataRelRo(StringRef, SMLoc) {
+    return ParseSectionSwitch(".data.rel.ro", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_WRITE,
+                              SectionKind::getReadOnlyWithRel());
+  }
+  bool ParseSectionDirectiveDataRelRoLocal(StringRef, SMLoc) {
+    return ParseSectionSwitch(".data.rel.ro.local", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_WRITE,
+                              SectionKind::getReadOnlyWithRelLocal());
+  }
+  bool ParseSectionDirectiveEhFrame(StringRef, SMLoc) {
+    return ParseSectionSwitch(".eh_frame", ELF::SHT_PROGBITS,
+                              ELF::SHF_ALLOC |
+                              ELF::SHF_WRITE,
+                              SectionKind::getDataRel());
+  }
+  bool ParseDirectivePushSection(StringRef, SMLoc);
+  bool ParseDirectivePopSection(StringRef, SMLoc);
+  bool ParseDirectiveSection(StringRef, SMLoc);
+  bool ParseDirectiveSize(StringRef, SMLoc);
+  bool ParseDirectivePrevious(StringRef, SMLoc);
+  bool ParseDirectiveType(StringRef, SMLoc);
+  bool ParseDirectiveIdent(StringRef, SMLoc);
+  bool ParseDirectiveSymver(StringRef, SMLoc);
+  bool ParseDirectiveWeakref(StringRef, SMLoc);
+
+private:
+  bool ParseSectionName(StringRef &SectionName);
+};
+
+}
+
+bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type,
+                                      unsigned Flags, SectionKind Kind) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in section switching directive");
+  Lex();
+
+  getStreamer().SwitchSection(getContext().getELFSection(
+                                Section, Type, Flags, Kind));
+
+  return false;
+}
+
+bool ELFAsmParser::ParseDirectiveSize(StringRef, SMLoc) {
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);;
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in directive");
+  Lex();
+
+  const MCExpr *Expr;
+  if (getParser().ParseExpression(Expr))
+    return true;
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  getStreamer().EmitELFSize(Sym, Expr);
+  return false;
+}
+
+bool ELFAsmParser::ParseSectionName(StringRef &SectionName) {
+  // A section name can contain -, so we cannot just use
+  // ParseIdentifier.
+  SMLoc FirstLoc = getLexer().getLoc();
+  unsigned Size = 0;
+
+  if (getLexer().is(AsmToken::String)) {
+    SectionName = getTok().getIdentifier();
+    Lex();
+    return false;
+  }
+
+  for (;;) {
+    StringRef Tmp;
+    unsigned CurSize;
+
+    SMLoc PrevLoc = getLexer().getLoc();
+    if (getLexer().is(AsmToken::Minus)) {
+      CurSize = 1;
+      Lex(); // Consume the "-".
+    } else if (getLexer().is(AsmToken::String)) {
+      CurSize = getTok().getIdentifier().size() + 2;
+      Lex();
+    } else if (getLexer().is(AsmToken::Identifier)) {
+      CurSize = getTok().getIdentifier().size();
+      Lex();
+    } else {
+      break;
+    }
+
+    Size += CurSize;
+    SectionName = StringRef(FirstLoc.getPointer(), Size);
+
+    // Make sure the following token is adjacent.
+    if (PrevLoc.getPointer() + CurSize != getTok().getLoc().getPointer())
+      break;
+  }
+  if (Size == 0)
+    return true;
+
+  return false;
+}
+
+static SectionKind computeSectionKind(unsigned Flags) {
+  if (Flags & ELF::SHF_EXECINSTR)
+    return SectionKind::getText();
+  if (Flags & ELF::SHF_TLS)
+    return SectionKind::getThreadData();
+  return SectionKind::getDataRel();
+}
+
+static int parseSectionFlags(StringRef flagsStr) {
+  int flags = 0;
+
+  for (unsigned i = 0; i < flagsStr.size(); i++) {
+    switch (flagsStr[i]) {
+    case 'a':
+      flags |= ELF::SHF_ALLOC;
+      break;
+    case 'x':
+      flags |= ELF::SHF_EXECINSTR;
+      break;
+    case 'w':
+      flags |= ELF::SHF_WRITE;
+      break;
+    case 'M':
+      flags |= ELF::SHF_MERGE;
+      break;
+    case 'S':
+      flags |= ELF::SHF_STRINGS;
+      break;
+    case 'T':
+      flags |= ELF::SHF_TLS;
+      break;
+    case 'c':
+      flags |= ELF::XCORE_SHF_CP_SECTION;
+      break;
+    case 'd':
+      flags |= ELF::XCORE_SHF_DP_SECTION;
+      break;
+    case 'G':
+      flags |= ELF::SHF_GROUP;
+      break;
+    default:
+      return -1;
+    }
+  }
+
+  return flags;
+}
+
+bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) {
+  getStreamer().PushSection();
+
+  if (ParseDirectiveSection(s, loc)) {
+    getStreamer().PopSection();
+    return true;
+  }
+
+  return false;
+}
+
+bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
+  if (!getStreamer().PopSection())
+    return TokError(".popsection without corresponding .pushsection");
+  return false;
+}
+
+// FIXME: This is a work in progress.
+bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+  StringRef SectionName;
+
+  if (ParseSectionName(SectionName))
+    return TokError("expected identifier in directive");
+
+  StringRef TypeName;
+  int64_t Size = 0;
+  StringRef GroupName;
+  unsigned Flags = 0;
+
+  // Set the defaults first.
+  if (SectionName == ".fini" || SectionName == ".init" ||
+      SectionName == ".rodata")
+    Flags |= ELF::SHF_ALLOC;
+  if (SectionName == ".fini" || SectionName == ".init")
+    Flags |= ELF::SHF_EXECINSTR;
+
+  if (getLexer().is(AsmToken::Comma)) {
+    Lex();
+
+    if (getLexer().isNot(AsmToken::String))
+      return TokError("expected string in directive");
+
+    StringRef FlagsStr = getTok().getStringContents();
+    Lex();
+
+    int extraFlags = parseSectionFlags(FlagsStr);
+    if (extraFlags < 0)
+      return TokError("unknown flag");
+    Flags |= extraFlags;
+
+    bool Mergeable = Flags & ELF::SHF_MERGE;
+    bool Group = Flags & ELF::SHF_GROUP;
+
+    if (getLexer().isNot(AsmToken::Comma)) {
+      if (Mergeable)
+        return TokError("Mergeable section must specify the type");
+      if (Group)
+        return TokError("Group section must specify the type");
+    } else {
+      Lex();
+      if (getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::At))
+        return TokError("expected '@' or '%' before type");
+
+      Lex();
+      if (getParser().ParseIdentifier(TypeName))
+        return TokError("expected identifier in directive");
+
+      if (Mergeable) {
+        if (getLexer().isNot(AsmToken::Comma))
+          return TokError("expected the entry size");
+        Lex();
+        if (getParser().ParseAbsoluteExpression(Size))
+          return true;
+        if (Size <= 0)
+          return TokError("entry size must be positive");
+      }
+
+      if (Group) {
+        if (getLexer().isNot(AsmToken::Comma))
+          return TokError("expected group name");
+        Lex();
+        if (getParser().ParseIdentifier(GroupName))
+          return true;
+        if (getLexer().is(AsmToken::Comma)) {
+          Lex();
+          StringRef Linkage;
+          if (getParser().ParseIdentifier(Linkage))
+            return true;
+          if (Linkage != "comdat")
+            return TokError("Linkage must be 'comdat'");
+        }
+      }
+    }
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in directive");
+
+  unsigned Type = ELF::SHT_PROGBITS;
+
+  if (!TypeName.empty()) {
+    if (TypeName == "init_array")
+      Type = ELF::SHT_INIT_ARRAY;
+    else if (TypeName == "fini_array")
+      Type = ELF::SHT_FINI_ARRAY;
+    else if (TypeName == "preinit_array")
+      Type = ELF::SHT_PREINIT_ARRAY;
+    else if (TypeName == "nobits")
+      Type = ELF::SHT_NOBITS;
+    else if (TypeName == "progbits")
+      Type = ELF::SHT_PROGBITS;
+    else if (TypeName == "note")
+      Type = ELF::SHT_NOTE;
+    else if (TypeName == "unwind")
+      Type = ELF::SHT_X86_64_UNWIND;
+    else
+      return TokError("unknown section type");
+  }
+
+  SectionKind Kind = computeSectionKind(Flags);
+  getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type,
+                                                         Flags, Kind, Size,
+                                                         GroupName));
+  return false;
+}
+
+bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) {
+  const MCSection *PreviousSection = getStreamer().getPreviousSection();
+  if (PreviousSection == NULL)
+      return TokError(".previous without corresponding .section");
+  getStreamer().SwitchSection(PreviousSection);
+
+  return false;
+}
+
+/// ParseDirectiveELFType
+///  ::= .type identifier , @attribute
+bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) {
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  // Handle the identifier as the key symbol.
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("unexpected token in '.type' directive");
+  Lex();
+
+  if (getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::At))
+    return TokError("expected '@' or '%' before type");
+  Lex();
+
+  StringRef Type;
+  SMLoc TypeLoc;
+
+  TypeLoc = getLexer().getLoc();
+  if (getParser().ParseIdentifier(Type))
+    return TokError("expected symbol type in directive");
+
+  MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type)
+    .Case("function", MCSA_ELF_TypeFunction)
+    .Case("object", MCSA_ELF_TypeObject)
+    .Case("tls_object", MCSA_ELF_TypeTLS)
+    .Case("common", MCSA_ELF_TypeCommon)
+    .Case("notype", MCSA_ELF_TypeNoType)
+    .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject)
+    .Default(MCSA_Invalid);
+
+  if (Attr == MCSA_Invalid)
+    return Error(TypeLoc, "unsupported attribute in '.type' directive");
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return TokError("unexpected token in '.type' directive");
+
+  Lex();
+
+  getStreamer().EmitSymbolAttribute(Sym, Attr);
+
+  return false;
+}
+
+/// ParseDirectiveIdent
+///  ::= .ident string
+bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) {
+  if (getLexer().isNot(AsmToken::String))
+    return TokError("unexpected token in '.ident' directive");
+
+  StringRef Data = getTok().getIdentifier();
+
+  Lex();
+
+  const MCSection *Comment =
+    getContext().getELFSection(".comment", ELF::SHT_PROGBITS,
+                               ELF::SHF_MERGE |
+                               ELF::SHF_STRINGS,
+                               SectionKind::getReadOnly(),
+                               1, "");
+
+  getStreamer().PushSection();
+  getStreamer().SwitchSection(Comment);
+  if (!SeenIdent) {
+    getStreamer().EmitIntValue(0, 1);
+    SeenIdent = true;
+  }
+  getStreamer().EmitBytes(Data, 0);
+  getStreamer().EmitIntValue(0, 1);
+  getStreamer().PopSection();
+  return false;
+}
+
+/// ParseDirectiveSymver
+///  ::= .symver foo, bar2@zed
+bool ELFAsmParser::ParseDirectiveSymver(StringRef, SMLoc) {
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("expected a comma");
+
+  Lex();
+
+  StringRef AliasName;
+  if (getParser().ParseIdentifier(AliasName))
+    return TokError("expected identifier in directive");
+
+  if (AliasName.find('@') == StringRef::npos)
+    return TokError("expected a '@' in the name");
+
+  MCSymbol *Alias = getContext().GetOrCreateSymbol(AliasName);
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+  const MCExpr *Value = MCSymbolRefExpr::Create(Sym, getContext());
+
+  getStreamer().EmitAssignment(Alias, Value);
+  return false;
+}
+
+/// ParseDirectiveWeakref
+///  ::= .weakref foo, bar
+bool ELFAsmParser::ParseDirectiveWeakref(StringRef, SMLoc) {
+  // FIXME: Share code with the other alias building directives.
+
+  StringRef AliasName;
+  if (getParser().ParseIdentifier(AliasName))
+    return TokError("expected identifier in directive");
+
+  if (getLexer().isNot(AsmToken::Comma))
+    return TokError("expected a comma");
+
+  Lex();
+
+  StringRef Name;
+  if (getParser().ParseIdentifier(Name))
+    return TokError("expected identifier in directive");
+
+  MCSymbol *Alias = getContext().GetOrCreateSymbol(AliasName);
+
+  MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+  getStreamer().EmitWeakReference(Alias, Sym);
+  return false;
+}
+
+namespace llvm {
+
+MCAsmParserExtension *createELFAsmParser() {
+  return new ELFAsmParser;
+}
+
+}
diff --git a/final/lib/MC/MCParser/MCAsmLexer.cpp b/final/lib/MC/MCParser/MCAsmLexer.cpp
new file mode 100644
index 00000000000..dceece78ba1
--- /dev/null
+++ b/final/lib/MC/MCParser/MCAsmLexer.cpp
@@ -0,0 +1,27 @@
+//===-- MCAsmLexer.cpp - Abstract Asm Lexer Interface ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/Support/SourceMgr.h"
+
+using namespace llvm;
+
+MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()), TokStart(0) {
+}
+
+MCAsmLexer::~MCAsmLexer() {
+}
+
+SMLoc MCAsmLexer::getLoc() const {
+  return SMLoc::getFromPointer(TokStart);
+}
+
+SMLoc AsmToken::getLoc() const {
+  return SMLoc::getFromPointer(Str.data());
+}
diff --git a/final/lib/MC/MCParser/MCAsmParser.cpp b/final/lib/MC/MCParser/MCAsmParser.cpp
new file mode 100644
index 00000000000..70295efc613
--- /dev/null
+++ b/final/lib/MC/MCParser/MCAsmParser.cpp
@@ -0,0 +1,44 @@
+//===-- MCAsmParser.cpp - Abstract Asm Parser Interface -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Target/TargetAsmParser.h"
+using namespace llvm;
+
+MCAsmParser::MCAsmParser() : TargetParser(0), ShowParsedOperands(0) {
+}
+
+MCAsmParser::~MCAsmParser() {
+}
+
+void MCAsmParser::setTargetParser(TargetAsmParser &P) {
+  assert(!TargetParser && "Target parser is already initialized!");
+  TargetParser = &P;
+  TargetParser->Initialize(*this);
+}
+
+const AsmToken &MCAsmParser::getTok() {
+  return getLexer().getTok();
+}
+
+bool MCAsmParser::TokError(const Twine &Msg) {
+  Error(getLexer().getLoc(), Msg);
+  return true;
+}
+
+bool MCAsmParser::ParseExpression(const MCExpr *&Res) {
+  SMLoc L;
+  return ParseExpression(Res, L);
+}
+
+
diff --git a/final/lib/MC/MCParser/MCAsmParserExtension.cpp b/final/lib/MC/MCParser/MCAsmParserExtension.cpp
new file mode 100644
index 00000000000..3f25a14926b
--- /dev/null
+++ b/final/lib/MC/MCParser/MCAsmParserExtension.cpp
@@ -0,0 +1,22 @@
+//===-- MCAsmParserExtension.cpp - Asm Parser Hooks -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+using namespace llvm;
+
+MCAsmParserExtension::MCAsmParserExtension() :
+  BracketExpressionsSupported(false) {
+}
+
+MCAsmParserExtension::~MCAsmParserExtension() {
+}
+
+void MCAsmParserExtension::Initialize(MCAsmParser &Parser) {
+  this->Parser = &Parser;
+}
diff --git a/final/lib/MC/MCParser/Makefile b/final/lib/MC/MCParser/Makefile
new file mode 100644
index 00000000000..4477757657c
--- /dev/null
+++ b/final/lib/MC/MCParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/MC/MCParser/Makefile ----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMMCParser
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/MC/MCParser/TargetAsmParser.cpp b/final/lib/MC/MCParser/TargetAsmParser.cpp
new file mode 100644
index 00000000000..8d43c21f4bc
--- /dev/null
+++ b/final/lib/MC/MCParser/TargetAsmParser.cpp
@@ -0,0 +1,19 @@
+//===-- TargetAsmParser.cpp - Target Assembly Parser -----------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmParser.h"
+using namespace llvm;
+
+TargetAsmParser::TargetAsmParser(const Target &T) 
+  : TheTarget(T), AvailableFeatures(0)
+{
+}
+
+TargetAsmParser::~TargetAsmParser() {
+}
diff --git a/final/lib/MC/MCPureStreamer.cpp b/final/lib/MC/MCPureStreamer.cpp
new file mode 100644
index 00000000000..6098e6b8f38
--- /dev/null
+++ b/final/lib/MC/MCPureStreamer.cpp
@@ -0,0 +1,234 @@
+//===- lib/MC/MCPureStreamer.cpp - MC "Pure" Object Output ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectStreamer.h"
+// FIXME: Remove this.
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+
+class MCPureStreamer : public MCObjectStreamer {
+private:
+  virtual void EmitInstToFragment(const MCInst &Inst);
+  virtual void EmitInstToData(const MCInst &Inst);
+
+public:
+  MCPureStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                 raw_ostream &OS, MCCodeEmitter *Emitter)
+    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void InitSections();
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0);
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+  virtual void EmitValueToOffset(const MCExpr *Offset,
+                                 unsigned char Value = 0);
+  virtual void Finish();
+
+
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitThumbFunc(MCSymbol *Func) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitCOFFSymbolType(int Type) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EndCOFFSymbolDef() {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual void EmitFileDirective(StringRef Filename) {
+    report_fatal_error("unsupported directive in pure streamer");
+  }
+  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+    report_fatal_error("unsupported directive in pure streamer");
+    return false;
+  }
+
+  /// @}
+};
+
+} // end anonymous namespace.
+
+void MCPureStreamer::InitSections() {
+  // FIMXE: To what!?
+  SwitchSection(getContext().getMachOSection("__TEXT", "__text",
+                                    MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                    0, SectionKind::getText()));
+
+}
+
+void MCPureStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+  assert(getCurrentSection() && "Cannot emit before setting section!");
+
+  Symbol->setSection(*getCurrentSection());
+
+  MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  // We have to create a new fragment if this is an atom defining symbol,
+  // fragments cannot span atoms.
+  if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()))
+    new MCDataFragment(getCurrentSectionData());
+
+  // FIXME: This is wasteful, we don't necessarily need to create a data
+  // fragment. Instead, we should mark the symbol as pointing into the data
+  // fragment if it exists, otherwise we should just queue the label and set its
+  // fragment pointer when we emit the next fragment.
+  MCDataFragment *F = getOrCreateDataFragment();
+  assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
+  SD.setFragment(F);
+  SD.setOffset(F->getContents().size());
+}
+
+void MCPureStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  // FIXME: Lift context changes into super class.
+  getAssembler().getOrCreateSymbolData(*Symbol);
+  Symbol->setVariableValue(AddValueSymbols(Value));
+}
+
+void MCPureStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                                  unsigned Size, unsigned ByteAlignment) {
+  report_fatal_error("not yet implemented in pure streamer");
+}
+
+void MCPureStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void MCPureStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+                                          int64_t Value, unsigned ValueSize,
+                                          unsigned MaxBytesToEmit) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+                      getCurrentSectionData());
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCPureStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+                                       unsigned MaxBytesToEmit) {
+  // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into
+  // MCObjectStreamer.
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+                                           getCurrentSectionData());
+  F->setEmitNops(true);
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void MCPureStreamer::EmitValueToOffset(const MCExpr *Offset,
+                                       unsigned char Value) {
+  new MCOrgFragment(*Offset, Value, getCurrentSectionData());
+}
+
+void MCPureStreamer::EmitInstToFragment(const MCInst &Inst) {
+  MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
+
+  // Add the fixups and data.
+  //
+  // FIXME: Revisit this design decision when relaxation is done, we may be
+  // able to get away with not storing any extra data in the MCInst.
+  SmallVector<MCFixup, 4> Fixups;
+  SmallString<256> Code;
+  raw_svector_ostream VecOS(Code);
+  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+  VecOS.flush();
+
+  IF->getCode() = Code;
+  IF->getFixups() = Fixups;
+}
+
+void MCPureStreamer::EmitInstToData(const MCInst &Inst) {
+  MCDataFragment *DF = getOrCreateDataFragment();
+
+  SmallVector<MCFixup, 4> Fixups;
+  SmallString<256> Code;
+  raw_svector_ostream VecOS(Code);
+  getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+  VecOS.flush();
+
+  // Add the fixups and data.
+  for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+    Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+    DF->addFixup(Fixups[i]);
+  }
+  DF->getContents().append(Code.begin(), Code.end());
+}
+
+void MCPureStreamer::Finish() {
+  // FIXME: Handle DWARF tables?
+
+  this->MCObjectStreamer::Finish();
+}
+
+MCStreamer *llvm::createPureStreamer(MCContext &Context, TargetAsmBackend &TAB,
+                                     raw_ostream &OS, MCCodeEmitter *CE) {
+  return new MCPureStreamer(Context, TAB, OS, CE);
+}
diff --git a/final/lib/MC/MCSection.cpp b/final/lib/MC/MCSection.cpp
new file mode 100644
index 00000000000..a792d563179
--- /dev/null
+++ b/final/lib/MC/MCSection.cpp
@@ -0,0 +1,22 @@
+//===- lib/MC/MCSection.cpp - Machine Code Section Representation ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MCSection
+//===----------------------------------------------------------------------===//
+
+MCSection::~MCSection() {
+}
+
diff --git a/final/lib/MC/MCSectionCOFF.cpp b/final/lib/MC/MCSectionCOFF.cpp
new file mode 100644
index 00000000000..90091f06e9a
--- /dev/null
+++ b/final/lib/MC/MCSectionCOFF.cpp
@@ -0,0 +1,84 @@
+//===- lib/MC/MCSectionCOFF.cpp - COFF Code Section Representation --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+MCSectionCOFF::~MCSectionCOFF() {} // anchor.
+
+// ShouldOmitSectionDirective - Decides whether a '.section' directive
+// should be printed before the section name
+bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name,
+                                               const MCAsmInfo &MAI) const {
+  
+  // FIXME: Does .section .bss/.data/.text work everywhere??
+  if (Name == ".text" || Name == ".data" || Name == ".bss")
+    return true;
+
+  return false;
+}
+
+void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
+                                         raw_ostream &OS) const {
+  
+  // standard sections don't require the '.section'
+  if (ShouldOmitSectionDirective(SectionName, MAI)) {
+    OS << '\t' << getSectionName() << '\n';
+    return;
+  }
+
+  OS << "\t.section\t" << getSectionName() << ",\"";
+  if (getKind().isText())
+    OS << 'x';
+  if (getKind().isWriteable())
+    OS << 'w';
+  else
+    OS << 'r';
+  if (getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE)
+    OS << 'n';
+  OS << "\"\n";
+  
+  if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) {
+    switch (Selection) {
+      case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES:
+        OS << "\t.linkonce one_only\n";
+        break;
+      case COFF::IMAGE_COMDAT_SELECT_ANY:
+        OS << "\t.linkonce discard\n";
+        break;
+      case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE:
+        OS << "\t.linkonce same_size\n";
+        break;
+      case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH:
+        OS << "\t.linkonce same_contents\n";
+        break;
+    //NOTE: as of binutils 2.20, there is no way to specifiy select largest
+    //      with the .linkonce directive. For now, we treat it as an invalid
+    //      comdat selection value.
+      case COFF::IMAGE_COMDAT_SELECT_LARGEST:
+    //  OS << "\t.linkonce largest\n";
+    //  break;
+      default:
+        assert (0 && "unsupported COFF selection type");
+        break;
+    }
+  }
+}
+
+bool MCSectionCOFF::UseCodeAlign() const {
+  return getKind().isText();
+}
+
+bool MCSectionCOFF::isVirtualSection() const {
+  return getCharacteristics() & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
+}
diff --git a/final/lib/MC/MCSectionELF.cpp b/final/lib/MC/MCSectionELF.cpp
new file mode 100644
index 00000000000..dfd77c3fe81
--- /dev/null
+++ b/final/lib/MC/MCSectionELF.cpp
@@ -0,0 +1,150 @@
+//===- lib/MC/MCSectionELF.cpp - ELF Code Section Representation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+MCSectionELF::~MCSectionELF() {} // anchor.
+
+// ShouldOmitSectionDirective - Decides whether a '.section' directive
+// should be printed before the section name
+bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name,
+                                              const MCAsmInfo &MAI) const {
+  
+  // FIXME: Does .section .bss/.data/.text work everywhere??
+  if (Name == ".text" || Name == ".data" ||
+      (Name == ".bss" && !MAI.usesELFSectionDirectiveForBSS()))
+    return true;
+
+  return false;
+}
+
+void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
+                                        raw_ostream &OS) const {
+   
+  if (ShouldOmitSectionDirective(SectionName, MAI)) {
+    OS << '\t' << getSectionName() << '\n';
+    return;
+  }
+
+  StringRef name = getSectionName();
+  if (name.find_first_not_of("0123456789_."
+                             "abcdefghijklmnopqrstuvwxyz"
+                             "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == name.npos) {
+    OS << "\t.section\t" << name;
+  } else {
+    OS << "\t.section\t\"";
+    for (const char *b = name.begin(), *e = name.end(); b < e; ++b) {
+      if (*b == '"') // Unquoted "
+        OS << "\\\"";
+      else if (*b != '\\') // Neither " or backslash
+        OS << *b;
+      else if (b + 1 == e) // Trailing backslash
+        OS << "\\\\";
+      else {
+        OS << b[0] << b[1]; // Quoted character
+        ++b;
+      }
+    }
+    OS << '"';
+  }
+
+  // Handle the weird solaris syntax if desired.
+  if (MAI.usesSunStyleELFSectionSwitchSyntax() && 
+      !(Flags & ELF::SHF_MERGE)) {
+    if (Flags & ELF::SHF_ALLOC)
+      OS << ",#alloc";
+    if (Flags & ELF::SHF_EXECINSTR)
+      OS << ",#execinstr";
+    if (Flags & ELF::SHF_WRITE)
+      OS << ",#write";
+    if (Flags & ELF::SHF_TLS)
+      OS << ",#tls";
+    OS << '\n';
+    return;
+  }
+  
+  OS << ",\"";
+  if (Flags & ELF::SHF_ALLOC)
+    OS << 'a';
+  if (Flags & ELF::SHF_EXECINSTR)
+    OS << 'x';
+  if (Flags & ELF::SHF_GROUP)
+    OS << 'G';
+  if (Flags & ELF::SHF_WRITE)
+    OS << 'w';
+  if (Flags & ELF::SHF_MERGE)
+    OS << 'M';
+  if (Flags & ELF::SHF_STRINGS)
+    OS << 'S';
+  if (Flags & ELF::SHF_TLS)
+    OS << 'T';
+  
+  // If there are target-specific flags, print them.
+  if (Flags & ELF::XCORE_SHF_CP_SECTION)
+    OS << 'c';
+  if (Flags & ELF::XCORE_SHF_DP_SECTION)
+    OS << 'd';
+  
+  OS << '"';
+
+  OS << ',';
+
+  // If comment string is '@', e.g. as on ARM - use '%' instead
+  if (MAI.getCommentString()[0] == '@')
+    OS << '%';
+  else
+    OS << '@';
+
+  if (Type == ELF::SHT_INIT_ARRAY)
+    OS << "init_array";
+  else if (Type == ELF::SHT_FINI_ARRAY)
+    OS << "fini_array";
+  else if (Type == ELF::SHT_PREINIT_ARRAY)
+    OS << "preinit_array";
+  else if (Type == ELF::SHT_NOBITS)
+    OS << "nobits";
+  else if (Type == ELF::SHT_NOTE)
+    OS << "note";
+  else if (Type == ELF::SHT_PROGBITS)
+    OS << "progbits";
+
+  if (EntrySize) {
+    assert(Flags & ELF::SHF_MERGE);
+    OS << "," << EntrySize;
+  }
+
+  if (Flags & ELF::SHF_GROUP)
+    OS << "," << Group->getName() << ",comdat";
+  OS << '\n';
+}
+
+bool MCSectionELF::UseCodeAlign() const {
+  return getFlags() & ELF::SHF_EXECINSTR;
+}
+
+bool MCSectionELF::isVirtualSection() const {
+  return getType() == ELF::SHT_NOBITS;
+}
+
+unsigned MCSectionELF::DetermineEntrySize(SectionKind Kind) {
+  if (Kind.isMergeable1ByteCString()) return 1;
+  if (Kind.isMergeable2ByteCString()) return 2;
+  if (Kind.isMergeable4ByteCString()) return 4;
+  if (Kind.isMergeableConst4())       return 4;
+  if (Kind.isMergeableConst8())       return 8;
+  if (Kind.isMergeableConst16())      return 16;
+  return 0;
+}
diff --git a/final/lib/MC/MCSectionMachO.cpp b/final/lib/MC/MCSectionMachO.cpp
new file mode 100644
index 00000000000..577e93aed6b
--- /dev/null
+++ b/final/lib/MC/MCSectionMachO.cpp
@@ -0,0 +1,299 @@
+//===- lib/MC/MCSectionMachO.cpp - MachO Code Section Representation ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+using namespace llvm;
+
+/// SectionTypeDescriptors - These are strings that describe the various section
+/// types.  This *must* be kept in order with and stay synchronized with the
+/// section type list.
+static const struct {
+  const char *AssemblerName, *EnumName;
+} SectionTypeDescriptors[MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1] = {
+  { "regular",                  "S_REGULAR" },                    // 0x00
+  { 0,                          "S_ZEROFILL" },                   // 0x01
+  { "cstring_literals",         "S_CSTRING_LITERALS" },           // 0x02
+  { "4byte_literals",           "S_4BYTE_LITERALS" },             // 0x03
+  { "8byte_literals",           "S_8BYTE_LITERALS" },             // 0x04
+  { "literal_pointers",         "S_LITERAL_POINTERS" },           // 0x05
+  { "non_lazy_symbol_pointers", "S_NON_LAZY_SYMBOL_POINTERS" },   // 0x06
+  { "lazy_symbol_pointers",     "S_LAZY_SYMBOL_POINTERS" },       // 0x07
+  { "symbol_stubs",             "S_SYMBOL_STUBS" },               // 0x08
+  { "mod_init_funcs",           "S_MOD_INIT_FUNC_POINTERS" },     // 0x09
+  { "mod_term_funcs",           "S_MOD_TERM_FUNC_POINTERS" },     // 0x0A
+  { "coalesced",                "S_COALESCED" },                  // 0x0B
+  { 0, /*FIXME??*/              "S_GB_ZEROFILL" },                // 0x0C
+  { "interposing",              "S_INTERPOSING" },                // 0x0D
+  { "16byte_literals",          "S_16BYTE_LITERALS" },            // 0x0E
+  { 0, /*FIXME??*/              "S_DTRACE_DOF" },                 // 0x0F
+  { 0, /*FIXME??*/              "S_LAZY_DYLIB_SYMBOL_POINTERS" }, // 0x10
+  { "thread_local_regular",     "S_THREAD_LOCAL_REGULAR" },       // 0x11
+  { "thread_local_zerofill",    "S_THREAD_LOCAL_ZEROFILL" },      // 0x12
+  { "thread_local_variables",   "S_THREAD_LOCAL_VARIABLES" },     // 0x13
+  { "thread_local_variable_pointers",
+    "S_THREAD_LOCAL_VARIABLE_POINTERS" },                         // 0x14
+  { "thread_local_init_function_pointers",
+    "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS"},                     // 0x15
+};
+
+
+/// SectionAttrDescriptors - This is an array of descriptors for section
+/// attributes.  Unlike the SectionTypeDescriptors, this is not directly indexed
+/// by attribute, instead it is searched.  The last entry has an AttrFlagEnd
+/// AttrFlag value.
+static const struct {
+  unsigned AttrFlag;
+  const char *AssemblerName, *EnumName;
+} SectionAttrDescriptors[] = {
+#define ENTRY(ASMNAME, ENUM) \
+  { MCSectionMachO::ENUM, ASMNAME, #ENUM },
+ENTRY("pure_instructions",   S_ATTR_PURE_INSTRUCTIONS)
+ENTRY("no_toc",              S_ATTR_NO_TOC)
+ENTRY("strip_static_syms",   S_ATTR_STRIP_STATIC_SYMS)
+ENTRY("no_dead_strip",       S_ATTR_NO_DEAD_STRIP)
+ENTRY("live_support",        S_ATTR_LIVE_SUPPORT)
+ENTRY("self_modifying_code", S_ATTR_SELF_MODIFYING_CODE)
+ENTRY("debug",               S_ATTR_DEBUG)
+ENTRY(0 /*FIXME*/,           S_ATTR_SOME_INSTRUCTIONS)
+ENTRY(0 /*FIXME*/,           S_ATTR_EXT_RELOC)
+ENTRY(0 /*FIXME*/,           S_ATTR_LOC_RELOC)
+#undef ENTRY
+  { 0, "none", 0 }, // used if section has no attributes but has a stub size
+#define AttrFlagEnd 0xffffffff // non legal value, multiple attribute bits set
+  { AttrFlagEnd, 0, 0 }
+};
+
+MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
+                               unsigned TAA, unsigned reserved2, SectionKind K)
+  : MCSection(SV_MachO, K), TypeAndAttributes(TAA), Reserved2(reserved2) {
+  assert(Segment.size() <= 16 && Section.size() <= 16 &&
+         "Segment or section string too long");
+  for (unsigned i = 0; i != 16; ++i) {
+    if (i < Segment.size())
+      SegmentName[i] = Segment[i];
+    else
+      SegmentName[i] = 0;
+
+    if (i < Section.size())
+      SectionName[i] = Section[i];
+    else
+      SectionName[i] = 0;
+  }
+}
+
+void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
+                                          raw_ostream &OS) const {
+  OS << "\t.section\t" << getSegmentName() << ',' << getSectionName();
+
+  // Get the section type and attributes.
+  unsigned TAA = getTypeAndAttributes();
+  if (TAA == 0) {
+    OS << '\n';
+    return;
+  }
+
+  unsigned SectionType = TAA & MCSectionMachO::SECTION_TYPE;
+  assert(SectionType <= MCSectionMachO::LAST_KNOWN_SECTION_TYPE &&
+         "Invalid SectionType specified!");
+
+  if (SectionTypeDescriptors[SectionType].AssemblerName) {
+    OS << ',';
+    OS << SectionTypeDescriptors[SectionType].AssemblerName;
+  } else {
+    // If we have no name for the attribute, stop here.
+    OS << '\n';
+    return;
+  }
+
+  // If we don't have any attributes, we're done.
+  unsigned SectionAttrs = TAA & MCSectionMachO::SECTION_ATTRIBUTES;
+  if (SectionAttrs == 0) {
+    // If we have a S_SYMBOL_STUBS size specified, print it along with 'none' as
+    // the attribute specifier.
+    if (Reserved2 != 0)
+      OS << ",none," << Reserved2;
+    OS << '\n';
+    return;
+  }
+
+  // Check each attribute to see if we have it.
+  char Separator = ',';
+  for (unsigned i = 0;
+       SectionAttrs != 0 && SectionAttrDescriptors[i].AttrFlag;
+       ++i) {
+    // Check to see if we have this attribute.
+    if ((SectionAttrDescriptors[i].AttrFlag & SectionAttrs) == 0)
+      continue;
+
+    // Yep, clear it and print it.
+    SectionAttrs &= ~SectionAttrDescriptors[i].AttrFlag;
+
+    OS << Separator;
+    if (SectionAttrDescriptors[i].AssemblerName)
+      OS << SectionAttrDescriptors[i].AssemblerName;
+    else
+      OS << "<<" << SectionAttrDescriptors[i].EnumName << ">>";
+    Separator = '+';
+  }
+
+  assert(SectionAttrs == 0 && "Unknown section attributes!");
+
+  // If we have a S_SYMBOL_STUBS size specified, print it.
+  if (Reserved2 != 0)
+    OS << ',' << Reserved2;
+  OS << '\n';
+}
+
+bool MCSectionMachO::UseCodeAlign() const {
+  return hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+}
+
+bool MCSectionMachO::isVirtualSection() const {
+  return (getType() == MCSectionMachO::S_ZEROFILL ||
+          getType() == MCSectionMachO::S_GB_ZEROFILL ||
+          getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL);
+}
+
+/// StripSpaces - This removes leading and trailing spaces from the StringRef.
+static void StripSpaces(StringRef &Str) {
+  while (!Str.empty() && isspace(Str[0]))
+    Str = Str.substr(1);
+  while (!Str.empty() && isspace(Str.back()))
+    Str = Str.substr(0, Str.size()-1);
+}
+
+/// ParseSectionSpecifier - Parse the section specifier indicated by "Spec".
+/// This is a string that can appear after a .section directive in a mach-o
+/// flavored .s file.  If successful, this fills in the specified Out
+/// parameters and returns an empty string.  When an invalid section
+/// specifier is present, this returns a string indicating the problem.
+std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
+                                                  StringRef &Segment,    // Out.
+                                                  StringRef &Section,    // Out.
+                                                  unsigned  &TAA,        // Out.
+                                                  unsigned  &StubSize) { // Out.
+  // Find the first comma.
+  std::pair<StringRef, StringRef> Comma = Spec.split(',');
+
+  // If there is no comma, we fail.
+  if (Comma.second.empty())
+    return "mach-o section specifier requires a segment and section "
+           "separated by a comma";
+
+  // Capture segment, remove leading and trailing whitespace.
+  Segment = Comma.first;
+  StripSpaces(Segment);
+
+  // Verify that the segment is present and not too long.
+  if (Segment.empty() || Segment.size() > 16)
+    return "mach-o section specifier requires a segment whose length is "
+           "between 1 and 16 characters";
+
+  // Split the section name off from any attributes if present.
+  Comma = Comma.second.split(',');
+
+  // Capture section, remove leading and trailing whitespace.
+  Section = Comma.first;
+  StripSpaces(Section);
+
+  // Verify that the section is present and not too long.
+  if (Section.empty() || Section.size() > 16)
+    return "mach-o section specifier requires a section whose length is "
+           "between 1 and 16 characters";
+
+  // If there is no comma after the section, we're done.
+  StubSize = 0;
+  if (Comma.second.empty())
+    return "";
+
+  // Otherwise, we need to parse the section type and attributes.
+  Comma = Comma.second.split(',');
+
+  // Get the section type.
+  StringRef SectionType = Comma.first;
+  StripSpaces(SectionType);
+
+  // Figure out which section type it is.
+  unsigned TypeID;
+  for (TypeID = 0; TypeID !=MCSectionMachO::LAST_KNOWN_SECTION_TYPE+1; ++TypeID)
+    if (SectionTypeDescriptors[TypeID].AssemblerName &&
+        SectionType == SectionTypeDescriptors[TypeID].AssemblerName)
+      break;
+
+  // If we didn't find the section type, reject it.
+  if (TypeID > MCSectionMachO::LAST_KNOWN_SECTION_TYPE)
+    return "mach-o section specifier uses an unknown section type";
+
+  // Remember the TypeID.
+  TAA = TypeID;
+
+  // If we have no comma after the section type, there are no attributes.
+  if (Comma.second.empty()) {
+    // S_SYMBOL_STUBS always require a symbol stub size specifier.
+    if (TAA == MCSectionMachO::S_SYMBOL_STUBS)
+      return "mach-o section specifier of type 'symbol_stubs' requires a size "
+             "specifier";
+    return "";
+  }
+
+  // Otherwise, we do have some attributes.  Split off the size specifier if
+  // present.
+  Comma = Comma.second.split(',');
+  StringRef Attrs = Comma.first;
+
+  // The attribute list is a '+' separated list of attributes.
+  std::pair<StringRef, StringRef> Plus = Attrs.split('+');
+
+  while (1) {
+    StringRef Attr = Plus.first;
+    StripSpaces(Attr);
+
+    // Look up the attribute.
+    for (unsigned i = 0; ; ++i) {
+      if (SectionAttrDescriptors[i].AttrFlag == AttrFlagEnd)
+        return "mach-o section specifier has invalid attribute";
+
+      if (SectionAttrDescriptors[i].AssemblerName &&
+          Attr == SectionAttrDescriptors[i].AssemblerName) {
+        TAA |= SectionAttrDescriptors[i].AttrFlag;
+        break;
+      }
+    }
+
+    if (Plus.second.empty()) break;
+    Plus = Plus.second.split('+');
+  };
+
+  // Okay, we've parsed the section attributes, see if we have a stub size spec.
+  if (Comma.second.empty()) {
+    // S_SYMBOL_STUBS always require a symbol stub size specifier.
+    if (TAA == MCSectionMachO::S_SYMBOL_STUBS)
+      return "mach-o section specifier of type 'symbol_stubs' requires a size "
+      "specifier";
+    return "";
+  }
+
+  // If we have a stub size spec, we must have a sectiontype of S_SYMBOL_STUBS.
+  if ((TAA & MCSectionMachO::SECTION_TYPE) != MCSectionMachO::S_SYMBOL_STUBS)
+    return "mach-o section specifier cannot have a stub size specified because "
+           "it does not have type 'symbol_stubs'";
+
+  // Okay, if we do, it must be a number.
+  StringRef StubSizeStr = Comma.second;
+  StripSpaces(StubSizeStr);
+
+  // Convert the stub size from a string to an integer.
+  if (StubSizeStr.getAsInteger(0, StubSize))
+    return "mach-o section specifier has a malformed stub size";
+
+  return "";
+}
diff --git a/final/lib/MC/MCStreamer.cpp b/final/lib/MC/MCStreamer.cpp
new file mode 100644
index 00000000000..1bd287b2b5b
--- /dev/null
+++ b/final/lib/MC/MCStreamer.cpp
@@ -0,0 +1,315 @@
+//===- lib/MC/MCStreamer.cpp - Streaming Machine Code Output --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include <cstdlib>
+using namespace llvm;
+
+MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx) {
+  const MCSection *section = NULL;
+  SectionStack.push_back(std::make_pair(section, section));
+}
+
+MCStreamer::~MCStreamer() {
+}
+
+raw_ostream &MCStreamer::GetCommentOS() {
+  // By default, discard comments.
+  return nulls();
+}
+
+void MCStreamer::EmitDwarfSetLineAddr(int64_t LineDelta,
+                                      const MCSymbol *Label, int PointerSize) {
+  // emit the sequence to set the address
+  EmitIntValue(dwarf::DW_LNS_extended_op, 1);
+  EmitULEB128IntValue(PointerSize + 1);
+  EmitIntValue(dwarf::DW_LNE_set_address, 1);
+  EmitSymbolValue(Label, PointerSize);
+
+  // emit the sequence for the LineDelta (from 1) and a zero address delta.
+  MCDwarfLineAddr::Emit(this, LineDelta, 0);
+}
+
+/// EmitIntValue - Special case of EmitValue that avoids the client having to
+/// pass in a MCExpr for constant integers.
+void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size,
+                              unsigned AddrSpace) {
+  assert(Size <= 8 && "Invalid size");
+  assert((isUIntN(8 * Size, Value) || isIntN(8 * Size, Value)) &&
+         "Invalid size");
+  char buf[8];
+  // FIXME: Endianness assumption.
+  for (unsigned i = 0; i != Size; ++i)
+    buf[i] = uint8_t(Value >> (i * 8));
+  EmitBytes(StringRef(buf, Size), AddrSpace);
+}
+
+/// EmitULEB128Value - Special case of EmitULEB128Value that avoids the
+/// client having to pass in a MCExpr for constant integers.
+void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace) {
+  SmallString<32> Tmp;
+  raw_svector_ostream OSE(Tmp);
+  MCObjectWriter::EncodeULEB128(Value, OSE);
+  EmitBytes(OSE.str(), AddrSpace);
+}
+
+/// EmitSLEB128Value - Special case of EmitSLEB128Value that avoids the
+/// client having to pass in a MCExpr for constant integers.
+void MCStreamer::EmitSLEB128IntValue(int64_t Value, unsigned AddrSpace) {
+  SmallString<32> Tmp;
+  raw_svector_ostream OSE(Tmp);
+  MCObjectWriter::EncodeSLEB128(Value, OSE);
+  EmitBytes(OSE.str(), AddrSpace);
+}
+
+void MCStreamer::EmitAbsValue(const MCExpr *Value, unsigned Size,
+                              unsigned AddrSpace) {
+  if (getContext().getAsmInfo().hasAggressiveSymbolFolding()) {
+    EmitValue(Value, Size, AddrSpace);
+    return;
+  }
+  MCSymbol *ABS = getContext().CreateTempSymbol();
+  EmitAssignment(ABS, Value);
+  EmitSymbolValue(ABS, Size, AddrSpace);
+}
+
+
+void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size,
+                           unsigned AddrSpace) {
+  EmitValueImpl(Value, Size, false, AddrSpace);
+}
+
+void MCStreamer::EmitPCRelValue(const MCExpr *Value, unsigned Size,
+                                unsigned AddrSpace) {
+  EmitValueImpl(Value, Size, true, AddrSpace);
+}
+
+void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
+                                 bool isPCRel, unsigned AddrSpace) {
+  EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size, isPCRel,
+                AddrSpace);
+}
+
+void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
+                                 unsigned AddrSpace) {
+  EmitSymbolValue(Sym, Size, false, AddrSpace);
+}
+
+void MCStreamer::EmitPCRelSymbolValue(const MCSymbol *Sym, unsigned Size,
+                                      unsigned AddrSpace) {
+  EmitSymbolValue(Sym, Size, true, AddrSpace);
+}
+
+void MCStreamer::EmitGPRel32Value(const MCExpr *Value) {
+  report_fatal_error("unsupported directive in streamer");
+}
+
+/// EmitFill - Emit NumBytes bytes worth of the value specified by
+/// FillValue.  This implements directives such as '.space'.
+void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+                          unsigned AddrSpace) {
+  const MCExpr *E = MCConstantExpr::Create(FillValue, getContext());
+  for (uint64_t i = 0, e = NumBytes; i != e; ++i)
+    EmitValue(E, 1, AddrSpace);
+}
+
+bool MCStreamer::EmitDwarfFileDirective(unsigned FileNo,
+                                        StringRef Filename) {
+  return getContext().GetDwarfFile(Filename, FileNo) == 0;
+}
+
+void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
+                                       unsigned Column, unsigned Flags,
+                                       unsigned Isa,
+                                       unsigned Discriminator) {
+  getContext().setCurrentDwarfLoc(FileNo, Line, Column, Flags, Isa,
+                                  Discriminator);
+}
+
+MCDwarfFrameInfo *MCStreamer::getCurrentFrameInfo() {
+  if (FrameInfos.empty())
+    return NULL;
+  return &FrameInfos.back();
+}
+
+void MCStreamer::EnsureValidFrame() {
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  if (!CurFrame || CurFrame->End)
+    report_fatal_error("No open frame");
+}
+
+bool MCStreamer::EmitCFIStartProc() {
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  if (CurFrame && !CurFrame->End) {
+    report_fatal_error("Starting a frame before finishing the previous one!");
+    return true;
+  }
+  MCDwarfFrameInfo Frame;
+  Frame.Begin = getContext().CreateTempSymbol();
+  EmitLabel(Frame.Begin);
+  FrameInfos.push_back(Frame);
+  return false;
+}
+
+bool MCStreamer::EmitCFIEndProc() {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  CurFrame->End = getContext().CreateTempSymbol();
+  EmitLabel(CurFrame->End);
+  return false;
+}
+
+bool MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MachineLocation Dest(MachineLocation::VirtualFP);
+  MachineLocation Source(Register, -Offset);
+  MCCFIInstruction Instruction(Label, Dest, Source);
+  CurFrame->Instructions.push_back(Instruction);
+  return false;
+}
+
+bool MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MachineLocation Dest(MachineLocation::VirtualFP);
+  MachineLocation Source(MachineLocation::VirtualFP, -Offset);
+  MCCFIInstruction Instruction(Label, Dest, Source);
+  CurFrame->Instructions.push_back(Instruction);
+  return false;
+}
+
+bool MCStreamer::EmitCFIDefCfaRegister(int64_t Register) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MachineLocation Dest(Register);
+  MachineLocation Source(MachineLocation::VirtualFP);
+  MCCFIInstruction Instruction(Label, Dest, Source);
+  CurFrame->Instructions.push_back(Instruction);
+  return false;
+}
+
+bool MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MachineLocation Dest(Register, Offset);
+  MachineLocation Source(Register, Offset);
+  MCCFIInstruction Instruction(Label, Dest, Source);
+  CurFrame->Instructions.push_back(Instruction);
+  return false;
+}
+
+bool MCStreamer::EmitCFIPersonality(const MCSymbol *Sym,
+                                    unsigned Encoding) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  CurFrame->Personality = Sym;
+  CurFrame->PersonalityEncoding = Encoding;
+  return false;
+}
+
+bool MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  CurFrame->Lsda = Sym;
+  CurFrame->LsdaEncoding = Encoding;
+  return false;
+}
+
+bool MCStreamer::EmitCFIRememberState() {
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MCCFIInstruction Instruction(MCCFIInstruction::Remember, Label);
+  CurFrame->Instructions.push_back(Instruction);
+  return false;
+}
+
+bool MCStreamer::EmitCFIRestoreState() {
+  // FIXME: Error if there is no matching cfi_remember_state.
+  EnsureValidFrame();
+  MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo();
+  MCSymbol *Label = getContext().CreateTempSymbol();
+  EmitLabel(Label);
+  MCCFIInstruction Instruction(MCCFIInstruction::Restore, Label);
+  CurFrame->Instructions.push_back(Instruction);
+  return false;
+}
+
+void MCStreamer::EmitFnStart() {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitFnEnd() {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitCantUnwind() {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitHandlerData() {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitPersonality(const MCSymbol *Personality) {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitPad(int64_t Offset) {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+void MCStreamer::EmitRegSave(const SmallVectorImpl<unsigned> &RegList, bool) {
+  errs() << "Not implemented yet\n";
+  abort();
+}
+
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
+/// the specified string in the output .s file.  This capability is
+/// indicated by the hasRawTextSupport() predicate.
+void MCStreamer::EmitRawText(StringRef String) {
+  errs() << "EmitRawText called on an MCStreamer that doesn't support it, "
+  " something must not be fully mc'ized\n";
+  abort();
+}
+
+void MCStreamer::EmitRawText(const Twine &T) {
+  SmallString<128> Str;
+  T.toVector(Str);
+  EmitRawText(Str.str());
+}
diff --git a/final/lib/MC/MCSymbol.cpp b/final/lib/MC/MCSymbol.cpp
new file mode 100644
index 00000000000..1c71f267a4b
--- /dev/null
+++ b/final/lib/MC/MCSymbol.cpp
@@ -0,0 +1,80 @@
+//===- lib/MC/MCSymbol.cpp - MCSymbol implementation ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Sentinel value for the absolute pseudo section.
+const MCSection *MCSymbol::AbsolutePseudoSection =
+  reinterpret_cast<const MCSection *>(1);
+
+static bool isAcceptableChar(char C) {
+  if ((C < 'a' || C > 'z') &&
+      (C < 'A' || C > 'Z') &&
+      (C < '0' || C > '9') &&
+      C != '_' && C != '$' && C != '.' && C != '@')
+    return false;
+  return true;
+}
+
+/// NameNeedsQuoting - Return true if the identifier \arg Str needs quotes to be
+/// syntactically correct.
+static bool NameNeedsQuoting(StringRef Str) {
+  assert(!Str.empty() && "Cannot create an empty MCSymbol");
+  
+  // If any of the characters in the string is an unacceptable character, force
+  // quotes.
+  for (unsigned i = 0, e = Str.size(); i != e; ++i)
+    if (!isAcceptableChar(Str[i]))
+      return true;
+  return false;
+}
+
+const MCSymbol &MCSymbol::AliasedSymbol() const {
+  const MCSymbol *S = this;
+  while (S->isVariable()) {
+    const MCExpr *Value = S->getVariableValue();
+    if (Value->getKind() != MCExpr::SymbolRef)
+      return *S;
+    const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Value);
+    S = &Ref->getSymbol();
+  }
+  return *S;
+}
+
+void MCSymbol::setVariableValue(const MCExpr *Value) {
+  assert(!IsUsed && "Cannot set a variable that has already been used.");
+  assert(Value && "Invalid variable value!");
+  assert((isUndefined() || (isAbsolute() && isa<MCConstantExpr>(Value))) &&
+         "Invalid redefinition!");
+  this->Value = Value;
+
+  // Mark the variable as absolute as appropriate.
+  if (isa<MCConstantExpr>(Value))
+    setAbsolute();
+}
+
+void MCSymbol::print(raw_ostream &OS) const {
+  // The name for this MCSymbol is required to be a valid target name.  However,
+  // some targets support quoting names with funny characters.  If the name
+  // contains a funny character, then print it quoted.
+  if (!NameNeedsQuoting(getName())) {
+    OS << getName();
+    return;
+  }
+    
+  OS << '"' << getName() << '"';
+}
+
+void MCSymbol::dump() const {
+  print(dbgs());
+}
diff --git a/final/lib/MC/MCValue.cpp b/final/lib/MC/MCValue.cpp
new file mode 100644
index 00000000000..c6ea16ce7b4
--- /dev/null
+++ b/final/lib/MC/MCValue.cpp
@@ -0,0 +1,36 @@
+//===- lib/MC/MCValue.cpp - MCValue implementation ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
+  if (isAbsolute()) {
+    OS << getConstant();
+    return;
+  }
+
+  getSymA()->print(OS);
+
+  if (getSymB()) {
+    OS << " - ";
+    getSymB()->print(OS);
+  }
+
+  if (getConstant())
+    OS << " + " << getConstant();
+}
+
+void MCValue::dump() const {
+  print(dbgs(), 0);
+}
diff --git a/final/lib/MC/MachObjectWriter.cpp b/final/lib/MC/MachObjectWriter.cpp
new file mode 100644
index 00000000000..de5349459d1
--- /dev/null
+++ b/final/lib/MC/MachObjectWriter.cpp
@@ -0,0 +1,1598 @@
+//===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
+// FIXME: Gross.
+#include "../Target/ARM/ARMFixupKinds.h"
+#include "../Target/X86/X86FixupKinds.h"
+
+#include <vector>
+using namespace llvm;
+using namespace llvm::object;
+
+// FIXME: this has been copied from (or to) X86AsmBackend.cpp
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("invalid fixup kind!");
+  case FK_PCRel_1:
+  case FK_Data_1: return 0;
+  case FK_PCRel_2:
+  case FK_Data_2: return 1;
+  case FK_PCRel_4:
+    // FIXME: Remove these!!!
+  case X86::reloc_riprel_4byte:
+  case X86::reloc_riprel_4byte_movq_load:
+  case X86::reloc_signed_4byte:
+  case FK_Data_4: return 2;
+  case FK_Data_8: return 3;
+  }
+}
+
+static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) {
+  // Undefined symbols are always extern.
+  if (SD->Symbol->isUndefined())
+    return true;
+
+  // References to weak definitions require external relocation entries; the
+  // definition may not always be the one in the same object file.
+  if (SD->getFlags() & SF_WeakDefinition)
+    return true;
+
+  // Otherwise, we can use an internal relocation.
+  return false;
+}
+
+namespace {
+
+class MachObjectWriter : public MCObjectWriter {
+  /// MachSymbolData - Helper struct for containing some precomputed information
+  /// on symbols.
+  struct MachSymbolData {
+    MCSymbolData *SymbolData;
+    uint64_t StringIndex;
+    uint8_t SectionIndex;
+
+    // Support lexicographic sorting.
+    bool operator<(const MachSymbolData &RHS) const {
+      return SymbolData->getSymbol().getName() <
+             RHS.SymbolData->getSymbol().getName();
+    }
+  };
+
+  /// The target specific Mach-O writer instance.
+  llvm::OwningPtr<MCMachObjectTargetWriter> TargetObjectWriter;
+
+  /// @name Relocation Data
+  /// @{
+
+  llvm::DenseMap<const MCSectionData*,
+                 std::vector<macho::RelocationEntry> > Relocations;
+  llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase;
+
+  /// @}
+  /// @name Symbol Table Data
+  /// @{
+
+  SmallString<256> StringTable;
+  std::vector<MachSymbolData> LocalSymbolData;
+  std::vector<MachSymbolData> ExternalSymbolData;
+  std::vector<MachSymbolData> UndefinedSymbolData;
+
+  /// @}
+
+private:
+  /// @name Utility Methods
+  /// @{
+
+  bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
+    const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
+      (MCFixupKind) Kind);
+
+    return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
+  }
+
+  /// @}
+
+  SectionAddrMap SectionAddress;
+  uint64_t getSectionAddress(const MCSectionData* SD) const {
+    return SectionAddress.lookup(SD);
+  }
+  uint64_t getSymbolAddress(const MCSymbolData* SD,
+                            const MCAsmLayout &Layout) const {
+    return getSectionAddress(SD->getFragment()->getParent()) +
+      Layout.getSymbolOffset(SD);
+  }
+  uint64_t getFragmentAddress(const MCFragment *Fragment,
+                            const MCAsmLayout &Layout) const {
+    return getSectionAddress(Fragment->getParent()) +
+      Layout.getFragmentOffset(Fragment);
+  }
+
+  uint64_t getPaddingSize(const MCSectionData *SD,
+                          const MCAsmLayout &Layout) const {
+    uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD);
+    unsigned Next = SD->getLayoutOrder() + 1;
+    if (Next >= Layout.getSectionOrder().size())
+      return 0;
+
+    const MCSectionData &NextSD = *Layout.getSectionOrder()[Next];
+    if (NextSD.getSection().isVirtualSection())
+      return 0;
+    return OffsetToAlignment(EndAddr, NextSD.getAlignment());
+  }
+
+public:
+  MachObjectWriter(MCMachObjectTargetWriter *MOTW, raw_ostream &_OS,
+                   bool _IsLittleEndian)
+    : MCObjectWriter(_OS, _IsLittleEndian), TargetObjectWriter(MOTW) {
+  }
+
+  /// @name Target Writer Proxy Accessors
+  /// @{
+
+  bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
+  bool isARM() const {
+    uint32_t CPUType = TargetObjectWriter->getCPUType() & ~mach::CTFM_ArchMask;
+    return CPUType == mach::CTM_ARM;
+  }
+
+  /// @}
+
+  void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
+                   bool SubsectionsViaSymbols) {
+    uint32_t Flags = 0;
+
+    if (SubsectionsViaSymbols)
+      Flags |= macho::HF_SubsectionsViaSymbols;
+
+    // struct mach_header (28 bytes) or
+    // struct mach_header_64 (32 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32);
+
+    Write32(TargetObjectWriter->getCPUType());
+    Write32(TargetObjectWriter->getCPUSubtype());
+
+    Write32(macho::HFT_Object);
+    Write32(NumLoadCommands);
+    Write32(LoadCommandsSize);
+    Write32(Flags);
+    if (is64Bit())
+      Write32(0); // reserved
+
+    assert(OS.tell() - Start ==
+           (is64Bit() ? macho::Header64Size : macho::Header32Size));
+  }
+
+  /// WriteSegmentLoadCommand - Write a segment load command.
+  ///
+  /// \arg NumSections - The number of sections in this segment.
+  /// \arg SectionDataSize - The total size of the sections.
+  void WriteSegmentLoadCommand(unsigned NumSections,
+                               uint64_t VMSize,
+                               uint64_t SectionDataStartOffset,
+                               uint64_t SectionDataSize) {
+    // struct segment_command (56 bytes) or
+    // struct segment_command_64 (72 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    unsigned SegmentLoadCommandSize =
+      is64Bit() ? macho::SegmentLoadCommand64Size:
+      macho::SegmentLoadCommand32Size;
+    Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment);
+    Write32(SegmentLoadCommandSize +
+            NumSections * (is64Bit() ? macho::Section64Size :
+                           macho::Section32Size));
+
+    WriteBytes("", 16);
+    if (is64Bit()) {
+      Write64(0); // vmaddr
+      Write64(VMSize); // vmsize
+      Write64(SectionDataStartOffset); // file offset
+      Write64(SectionDataSize); // file size
+    } else {
+      Write32(0); // vmaddr
+      Write32(VMSize); // vmsize
+      Write32(SectionDataStartOffset); // file offset
+      Write32(SectionDataSize); // file size
+    }
+    Write32(0x7); // maxprot
+    Write32(0x7); // initprot
+    Write32(NumSections);
+    Write32(0); // flags
+
+    assert(OS.tell() - Start == SegmentLoadCommandSize);
+  }
+
+  void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                    const MCSectionData &SD, uint64_t FileOffset,
+                    uint64_t RelocationsStart, unsigned NumRelocations) {
+    uint64_t SectionSize = Layout.getSectionAddressSize(&SD);
+
+    // The offset is unused for virtual sections.
+    if (SD.getSection().isVirtualSection()) {
+      assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!");
+      FileOffset = 0;
+    }
+
+    // struct section (68 bytes) or
+    // struct section_64 (80 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
+    WriteBytes(Section.getSectionName(), 16);
+    WriteBytes(Section.getSegmentName(), 16);
+    if (is64Bit()) {
+      Write64(getSectionAddress(&SD)); // address
+      Write64(SectionSize); // size
+    } else {
+      Write32(getSectionAddress(&SD)); // address
+      Write32(SectionSize); // size
+    }
+    Write32(FileOffset);
+
+    unsigned Flags = Section.getTypeAndAttributes();
+    if (SD.hasInstructions())
+      Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
+
+    assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
+    Write32(Log2_32(SD.getAlignment()));
+    Write32(NumRelocations ? RelocationsStart : 0);
+    Write32(NumRelocations);
+    Write32(Flags);
+    Write32(IndirectSymBase.lookup(&SD)); // reserved1
+    Write32(Section.getStubSize()); // reserved2
+    if (is64Bit())
+      Write32(0); // reserved3
+
+    assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size :
+           macho::Section32Size));
+  }
+
+  void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
+                              uint32_t StringTableOffset,
+                              uint32_t StringTableSize) {
+    // struct symtab_command (24 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    Write32(macho::LCT_Symtab);
+    Write32(macho::SymtabLoadCommandSize);
+    Write32(SymbolOffset);
+    Write32(NumSymbols);
+    Write32(StringTableOffset);
+    Write32(StringTableSize);
+
+    assert(OS.tell() - Start == macho::SymtabLoadCommandSize);
+  }
+
+  void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
+                                uint32_t NumLocalSymbols,
+                                uint32_t FirstExternalSymbol,
+                                uint32_t NumExternalSymbols,
+                                uint32_t FirstUndefinedSymbol,
+                                uint32_t NumUndefinedSymbols,
+                                uint32_t IndirectSymbolOffset,
+                                uint32_t NumIndirectSymbols) {
+    // struct dysymtab_command (80 bytes)
+
+    uint64_t Start = OS.tell();
+    (void) Start;
+
+    Write32(macho::LCT_Dysymtab);
+    Write32(macho::DysymtabLoadCommandSize);
+    Write32(FirstLocalSymbol);
+    Write32(NumLocalSymbols);
+    Write32(FirstExternalSymbol);
+    Write32(NumExternalSymbols);
+    Write32(FirstUndefinedSymbol);
+    Write32(NumUndefinedSymbols);
+    Write32(0); // tocoff
+    Write32(0); // ntoc
+    Write32(0); // modtaboff
+    Write32(0); // nmodtab
+    Write32(0); // extrefsymoff
+    Write32(0); // nextrefsyms
+    Write32(IndirectSymbolOffset);
+    Write32(NumIndirectSymbols);
+    Write32(0); // extreloff
+    Write32(0); // nextrel
+    Write32(0); // locreloff
+    Write32(0); // nlocrel
+
+    assert(OS.tell() - Start == macho::DysymtabLoadCommandSize);
+  }
+
+  void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) {
+    MCSymbolData &Data = *MSD.SymbolData;
+    const MCSymbol &Symbol = Data.getSymbol();
+    uint8_t Type = 0;
+    uint16_t Flags = Data.getFlags();
+    uint32_t Address = 0;
+
+    // Set the N_TYPE bits. See <mach-o/nlist.h>.
+    //
+    // FIXME: Are the prebound or indirect fields possible here?
+    if (Symbol.isUndefined())
+      Type = macho::STT_Undefined;
+    else if (Symbol.isAbsolute())
+      Type = macho::STT_Absolute;
+    else
+      Type = macho::STT_Section;
+
+    // FIXME: Set STAB bits.
+
+    if (Data.isPrivateExtern())
+      Type |= macho::STF_PrivateExtern;
+
+    // Set external bit.
+    if (Data.isExternal() || Symbol.isUndefined())
+      Type |= macho::STF_External;
+
+    // Compute the symbol address.
+    if (Symbol.isDefined()) {
+      if (Symbol.isAbsolute()) {
+        Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue();
+      } else {
+        Address = getSymbolAddress(&Data, Layout);
+      }
+    } else if (Data.isCommon()) {
+      // Common symbols are encoded with the size in the address
+      // field, and their alignment in the flags.
+      Address = Data.getCommonSize();
+
+      // Common alignment is packed into the 'desc' bits.
+      if (unsigned Align = Data.getCommonAlignment()) {
+        unsigned Log2Size = Log2_32(Align);
+        assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
+        if (Log2Size > 15)
+          report_fatal_error("invalid 'common' alignment '" +
+                            Twine(Align) + "'");
+        // FIXME: Keep this mask with the SymbolFlags enumeration.
+        Flags = (Flags & 0xF0FF) | (Log2Size << 8);
+      }
+    }
+
+    // struct nlist (12 bytes)
+
+    Write32(MSD.StringIndex);
+    Write8(Type);
+    Write8(MSD.SectionIndex);
+
+    // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
+    // value.
+    Write16(Flags);
+    if (is64Bit())
+      Write64(Address);
+    else
+      Write32(Address);
+  }
+
+  // FIXME: We really need to improve the relocation validation. Basically, we
+  // want to implement a separate computation which evaluates the relocation
+  // entry as the linker would, and verifies that the resultant fixup value is
+  // exactly what the encoder wanted. This will catch several classes of
+  // problems:
+  //
+  //  - Relocation entry bugs, the two algorithms are unlikely to have the same
+  //    exact bug.
+  //
+  //  - Relaxation issues, where we forget to relax something.
+  //
+  //  - Input errors, where something cannot be correctly encoded. 'as' allows
+  //    these through in many cases.
+
+  static bool isFixupKindRIPRel(unsigned Kind) {
+    return Kind == X86::reloc_riprel_4byte ||
+      Kind == X86::reloc_riprel_4byte_movq_load;
+  }
+  void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                              const MCFragment *Fragment,
+                              const MCFixup &Fixup, MCValue Target,
+                              uint64_t &FixedValue) {
+    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+    unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
+    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+
+    // See <reloc.h>.
+    uint32_t FixupOffset =
+      Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+    uint32_t FixupAddress =
+      getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
+    int64_t Value = 0;
+    unsigned Index = 0;
+    unsigned IsExtern = 0;
+    unsigned Type = 0;
+
+    Value = Target.getConstant();
+
+    if (IsPCRel) {
+      // Compensate for the relocation offset, Darwin x86_64 relocations only
+      // have the addend and appear to have attempted to define it to be the
+      // actual expression addend without the PCrel bias. However, instructions
+      // with data following the relocation are not accomodated for (see comment
+      // below regarding SIGNED{1,2,4}), so it isn't exactly that either.
+      Value += 1LL << Log2Size;
+    }
+
+    if (Target.isAbsolute()) { // constant
+      // SymbolNum of 0 indicates the absolute section.
+      Type = macho::RIT_X86_64_Unsigned;
+      Index = 0;
+
+      // FIXME: I believe this is broken, I don't think the linker can
+      // understand it. I think it would require a local relocation, but I'm not
+      // sure if that would work either. The official way to get an absolute
+      // PCrel relocation is to use an absolute symbol (which we don't support
+      // yet).
+      if (IsPCRel) {
+        IsExtern = 1;
+        Type = macho::RIT_X86_64_Branch;
+      }
+    } else if (Target.getSymB()) { // A - B + constant
+      const MCSymbol *A = &Target.getSymA()->getSymbol();
+      MCSymbolData &A_SD = Asm.getSymbolData(*A);
+      const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
+
+      const MCSymbol *B = &Target.getSymB()->getSymbol();
+      MCSymbolData &B_SD = Asm.getSymbolData(*B);
+      const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
+
+      // Neither symbol can be modified.
+      if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
+          Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
+        report_fatal_error("unsupported relocation of modified symbol");
+
+      // We don't support PCrel relocations of differences. Darwin 'as' doesn't
+      // implement most of these correctly.
+      if (IsPCRel)
+        report_fatal_error("unsupported pc-relative relocation of difference");
+
+      // The support for the situation where one or both of the symbols would
+      // require a local relocation is handled just like if the symbols were
+      // external.  This is certainly used in the case of debug sections where
+      // the section has only temporary symbols and thus the symbols don't have
+      // base symbols.  This is encoded using the section ordinal and
+      // non-extern relocation entries.
+
+      // Darwin 'as' doesn't emit correct relocations for this (it ends up with
+      // a single SIGNED relocation); reject it for now.  Except the case where
+      // both symbols don't have a base, equal but both NULL.
+      if (A_Base == B_Base && A_Base)
+        report_fatal_error("unsupported relocation with identical base");
+
+      Value += getSymbolAddress(&A_SD, Layout) -
+        (A_Base == NULL ? 0 : getSymbolAddress(A_Base, Layout));
+      Value -= getSymbolAddress(&B_SD, Layout) -
+        (B_Base == NULL ? 0 : getSymbolAddress(B_Base, Layout));
+
+      if (A_Base) {
+        Index = A_Base->getIndex();
+        IsExtern = 1;
+      }
+      else {
+        Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
+        IsExtern = 0;
+      }
+      Type = macho::RIT_X86_64_Unsigned;
+
+      macho::RelocationEntry MRE;
+      MRE.Word0 = FixupOffset;
+      MRE.Word1 = ((Index     <<  0) |
+                   (IsPCRel   << 24) |
+                   (Log2Size  << 25) |
+                   (IsExtern  << 27) |
+                   (Type      << 28));
+      Relocations[Fragment->getParent()].push_back(MRE);
+
+      if (B_Base) {
+        Index = B_Base->getIndex();
+        IsExtern = 1;
+      }
+      else {
+        Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
+        IsExtern = 0;
+      }
+      Type = macho::RIT_X86_64_Subtractor;
+    } else {
+      const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
+      MCSymbolData &SD = Asm.getSymbolData(*Symbol);
+      const MCSymbolData *Base = Asm.getAtom(&SD);
+
+      // Relocations inside debug sections always use local relocations when
+      // possible. This seems to be done because the debugger doesn't fully
+      // understand x86_64 relocation entries, and expects to find values that
+      // have already been fixed up.
+      if (Symbol->isInSection()) {
+        const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
+          Fragment->getParent()->getSection());
+        if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
+          Base = 0;
+      }
+
+      // x86_64 almost always uses external relocations, except when there is no
+      // symbol to use as a base address (a local symbol with no preceeding
+      // non-local symbol).
+      if (Base) {
+        Index = Base->getIndex();
+        IsExtern = 1;
+
+        // Add the local offset, if needed.
+        if (Base != &SD)
+          Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
+      } else if (Symbol->isInSection()) {
+        // The index is the section ordinal (1-based).
+        Index = SD.getFragment()->getParent()->getOrdinal() + 1;
+        IsExtern = 0;
+        Value += getSymbolAddress(&SD, Layout);
+
+        if (IsPCRel)
+          Value -= FixupAddress + (1 << Log2Size);
+      } else if (Symbol->isVariable()) {
+        const MCExpr *Value = Symbol->getVariableValue();
+        int64_t Res;
+        bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, SectionAddress);
+        if (isAbs) {
+          FixedValue = Res;
+          return;
+        } else {
+          report_fatal_error("unsupported relocation of variable '" +
+                             Symbol->getName() + "'");
+        }
+      } else {
+        report_fatal_error("unsupported relocation of undefined symbol '" +
+                           Symbol->getName() + "'");
+      }
+
+      MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
+      if (IsPCRel) {
+        if (IsRIPRel) {
+          if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+            // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
+            // rewrite the movq to an leaq at link time if the symbol ends up in
+            // the same linkage unit.
+            if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
+              Type = macho::RIT_X86_64_GOTLoad;
+            else
+              Type = macho::RIT_X86_64_GOT;
+          }  else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+            Type = macho::RIT_X86_64_TLV;
+          }  else if (Modifier != MCSymbolRefExpr::VK_None) {
+            report_fatal_error("unsupported symbol modifier in relocation");
+          } else {
+            Type = macho::RIT_X86_64_Signed;
+
+            // The Darwin x86_64 relocation format has a problem where it cannot
+            // encode an address (L<foo> + <constant>) which is outside the atom
+            // containing L<foo>. Generally, this shouldn't occur but it does
+            // happen when we have a RIPrel instruction with data following the
+            // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
+            // adjustment Darwin x86_64 uses, the offset is still negative and
+            // the linker has no way to recognize this.
+            //
+            // To work around this, Darwin uses several special relocation types
+            // to indicate the offsets. However, the specification or
+            // implementation of these seems to also be incomplete; they should
+            // adjust the addend as well based on the actual encoded instruction
+            // (the additional bias), but instead appear to just look at the
+            // final offset.
+            switch (-(Target.getConstant() + (1LL << Log2Size))) {
+            case 1: Type = macho::RIT_X86_64_Signed1; break;
+            case 2: Type = macho::RIT_X86_64_Signed2; break;
+            case 4: Type = macho::RIT_X86_64_Signed4; break;
+            }
+          }
+        } else {
+          if (Modifier != MCSymbolRefExpr::VK_None)
+            report_fatal_error("unsupported symbol modifier in branch "
+                              "relocation");
+
+          Type = macho::RIT_X86_64_Branch;
+        }
+      } else {
+        if (Modifier == MCSymbolRefExpr::VK_GOT) {
+          Type = macho::RIT_X86_64_GOT;
+        } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+          // GOTPCREL is allowed as a modifier on non-PCrel instructions, in
+          // which case all we do is set the PCrel bit in the relocation entry;
+          // this is used with exception handling, for example. The source is
+          // required to include any necessary offset directly.
+          Type = macho::RIT_X86_64_GOT;
+          IsPCRel = 1;
+        } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+          report_fatal_error("TLVP symbol modifier should have been rip-rel");
+        } else if (Modifier != MCSymbolRefExpr::VK_None)
+          report_fatal_error("unsupported symbol modifier in relocation");
+        else
+          Type = macho::RIT_X86_64_Unsigned;
+      }
+    }
+
+    // x86_64 always writes custom values into the fixups.
+    FixedValue = Value;
+
+    // struct relocation_info (8 bytes)
+    macho::RelocationEntry MRE;
+    MRE.Word0 = FixupOffset;
+    MRE.Word1 = ((Index     <<  0) |
+                 (IsPCRel   << 24) |
+                 (Log2Size  << 25) |
+                 (IsExtern  << 27) |
+                 (Type      << 28));
+    Relocations[Fragment->getParent()].push_back(MRE);
+  }
+
+  void RecordScatteredRelocation(const MCAssembler &Asm,
+                                 const MCAsmLayout &Layout,
+                                 const MCFragment *Fragment,
+                                 const MCFixup &Fixup, MCValue Target,
+                                 unsigned Log2Size,
+                                 uint64_t &FixedValue) {
+    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+    unsigned Type = macho::RIT_Vanilla;
+
+    // See <reloc.h>.
+    const MCSymbol *A = &Target.getSymA()->getSymbol();
+    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+    if (!A_SD->getFragment())
+      report_fatal_error("symbol '" + A->getName() +
+                        "' can not be undefined in a subtraction expression");
+
+    uint32_t Value = getSymbolAddress(A_SD, Layout);
+    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
+    FixedValue += SecAddr;
+    uint32_t Value2 = 0;
+
+    if (const MCSymbolRefExpr *B = Target.getSymB()) {
+      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+      if (!B_SD->getFragment())
+        report_fatal_error("symbol '" + B->getSymbol().getName() +
+                          "' can not be undefined in a subtraction expression");
+
+      // Select the appropriate difference relocation type.
+      //
+      // Note that there is no longer any semantic difference between these two
+      // relocation types from the linkers point of view, this is done solely
+      // for pedantic compatibility with 'as'.
+      Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
+        (unsigned)macho::RIT_Generic_LocalDifference;
+      Value2 = getSymbolAddress(B_SD, Layout);
+      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
+    }
+
+    // Relocations are written out in reverse order, so the PAIR comes first.
+    if (Type == macho::RIT_Difference ||
+        Type == macho::RIT_Generic_LocalDifference) {
+      macho::RelocationEntry MRE;
+      MRE.Word0 = ((0         <<  0) |
+                   (macho::RIT_Pair  << 24) |
+                   (Log2Size  << 28) |
+                   (IsPCRel   << 30) |
+                   macho::RF_Scattered);
+      MRE.Word1 = Value2;
+      Relocations[Fragment->getParent()].push_back(MRE);
+    }
+
+    macho::RelocationEntry MRE;
+    MRE.Word0 = ((FixupOffset <<  0) |
+                 (Type        << 24) |
+                 (Log2Size    << 28) |
+                 (IsPCRel     << 30) |
+                 macho::RF_Scattered);
+    MRE.Word1 = Value;
+    Relocations[Fragment->getParent()].push_back(MRE);
+  }
+
+  void RecordARMScatteredRelocation(const MCAssembler &Asm,
+                                    const MCAsmLayout &Layout,
+                                    const MCFragment *Fragment,
+                                    const MCFixup &Fixup, MCValue Target,
+                                    unsigned Log2Size,
+                                    uint64_t &FixedValue) {
+    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+    unsigned Type = macho::RIT_Vanilla;
+
+    // See <reloc.h>.
+    const MCSymbol *A = &Target.getSymA()->getSymbol();
+    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+    if (!A_SD->getFragment())
+      report_fatal_error("symbol '" + A->getName() +
+                        "' can not be undefined in a subtraction expression");
+
+    uint32_t Value = getSymbolAddress(A_SD, Layout);
+    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
+    FixedValue += SecAddr;
+    uint32_t Value2 = 0;
+
+    if (const MCSymbolRefExpr *B = Target.getSymB()) {
+      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+      if (!B_SD->getFragment())
+        report_fatal_error("symbol '" + B->getSymbol().getName() +
+                          "' can not be undefined in a subtraction expression");
+
+      // Select the appropriate difference relocation type.
+      Type = macho::RIT_Difference;
+      Value2 = getSymbolAddress(B_SD, Layout);
+      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
+    }
+
+    // Relocations are written out in reverse order, so the PAIR comes first.
+    if (Type == macho::RIT_Difference ||
+        Type == macho::RIT_Generic_LocalDifference) {
+      macho::RelocationEntry MRE;
+      MRE.Word0 = ((0         <<  0) |
+                   (macho::RIT_Pair  << 24) |
+                   (Log2Size  << 28) |
+                   (IsPCRel   << 30) |
+                   macho::RF_Scattered);
+      MRE.Word1 = Value2;
+      Relocations[Fragment->getParent()].push_back(MRE);
+    }
+
+    macho::RelocationEntry MRE;
+    MRE.Word0 = ((FixupOffset <<  0) |
+                 (Type        << 24) |
+                 (Log2Size    << 28) |
+                 (IsPCRel     << 30) |
+                 macho::RF_Scattered);
+    MRE.Word1 = Value;
+    Relocations[Fragment->getParent()].push_back(MRE);
+  }
+
+  void RecordARMMovwMovtRelocation(const MCAssembler &Asm,
+                                   const MCAsmLayout &Layout,
+                                   const MCFragment *Fragment,
+                                   const MCFixup &Fixup, MCValue Target,
+                                   uint64_t &FixedValue) {
+    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+    unsigned Type = macho::RIT_ARM_Half;
+
+    // See <reloc.h>.
+    const MCSymbol *A = &Target.getSymA()->getSymbol();
+    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+    if (!A_SD->getFragment())
+      report_fatal_error("symbol '" + A->getName() +
+                        "' can not be undefined in a subtraction expression");
+
+    uint32_t Value = getSymbolAddress(A_SD, Layout);
+    uint32_t Value2 = 0;
+    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
+    FixedValue += SecAddr;
+
+    if (const MCSymbolRefExpr *B = Target.getSymB()) {
+      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+      if (!B_SD->getFragment())
+        report_fatal_error("symbol '" + B->getSymbol().getName() +
+                          "' can not be undefined in a subtraction expression");
+
+      // Select the appropriate difference relocation type.
+      Type = macho::RIT_ARM_HalfDifference;
+      Value2 = getSymbolAddress(B_SD, Layout);
+      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
+    }
+
+    // Relocations are written out in reverse order, so the PAIR comes first.
+    // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field:
+    //
+    // For these two r_type relocations they always have a pair following them
+    // and the r_length bits are used differently.  The encoding of the
+    // r_length is as follows:
+    // low bit of r_length:
+    //  0 - :lower16: for movw instructions
+    //  1 - :upper16: for movt instructions
+    // high bit of r_length:
+    //  0 - arm instructions
+    //  1 - thumb instructions   
+    // the other half of the relocated expression is in the following pair
+    // relocation entry in the the low 16 bits of r_address field.
+    unsigned ThumbBit = 0;
+    unsigned MovtBit = 0;
+    switch (Fixup.getKind()) {
+    default: break;
+    case ARM::fixup_arm_movt_hi16:
+    case ARM::fixup_arm_movt_hi16_pcrel:
+      MovtBit = 1;
+      break;
+    case ARM::fixup_t2_movt_hi16:
+    case ARM::fixup_t2_movt_hi16_pcrel:
+      MovtBit = 1;
+      // Fallthrough
+    case ARM::fixup_t2_movw_lo16:
+    case ARM::fixup_t2_movw_lo16_pcrel:
+      ThumbBit = 1;
+      break;
+    }
+
+
+    if (Type == macho::RIT_ARM_HalfDifference) {
+      uint32_t OtherHalf = MovtBit
+        ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
+
+      macho::RelocationEntry MRE;
+      MRE.Word0 = ((OtherHalf       <<  0) |
+                   (macho::RIT_Pair << 24) |
+                   (MovtBit         << 28) |
+                   (ThumbBit        << 29) |
+                   (IsPCRel         << 30) |
+                   macho::RF_Scattered);
+      MRE.Word1 = Value2;
+      Relocations[Fragment->getParent()].push_back(MRE);
+    }
+
+    macho::RelocationEntry MRE;
+    MRE.Word0 = ((FixupOffset <<  0) |
+                 (Type        << 24) |
+                 (MovtBit     << 28) |
+                 (ThumbBit    << 29) |
+                 (IsPCRel     << 30) |
+                 macho::RF_Scattered);
+    MRE.Word1 = Value;
+    Relocations[Fragment->getParent()].push_back(MRE);
+  }
+
+  void RecordTLVPRelocation(const MCAssembler &Asm,
+                            const MCAsmLayout &Layout,
+                            const MCFragment *Fragment,
+                            const MCFixup &Fixup, MCValue Target,
+                            uint64_t &FixedValue) {
+    assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
+           !is64Bit() &&
+           "Should only be called with a 32-bit TLVP relocation!");
+
+    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+    uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+    unsigned IsPCRel = 0;
+
+    // Get the symbol data.
+    MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+    unsigned Index = SD_A->getIndex();
+
+    // We're only going to have a second symbol in pic mode and it'll be a
+    // subtraction from the picbase. For 32-bit pic the addend is the difference
+    // between the picbase and the next address.  For 32-bit static the addend
+    // is zero.
+    if (Target.getSymB()) {
+      // If this is a subtraction then we're pcrel.
+      uint32_t FixupAddress =
+        getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
+      MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
+      IsPCRel = 1;
+      FixedValue = (FixupAddress - getSymbolAddress(SD_B, Layout) +
+                    Target.getConstant());
+      FixedValue += 1ULL << Log2Size;
+    } else {
+      FixedValue = 0;
+    }
+
+    // struct relocation_info (8 bytes)
+    macho::RelocationEntry MRE;
+    MRE.Word0 = Value;
+    MRE.Word1 = ((Index                  <<  0) |
+                 (IsPCRel                << 24) |
+                 (Log2Size               << 25) |
+                 (1                      << 27) | // Extern
+                 (macho::RIT_Generic_TLV << 28)); // Type
+    Relocations[Fragment->getParent()].push_back(MRE);
+  }
+
+  static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
+                                       unsigned &Log2Size) {
+    RelocType = unsigned(macho::RIT_Vanilla);
+    Log2Size = ~0U;
+
+    switch (Kind) {
+    default:
+      return false;
+
+    case FK_Data_1:
+      Log2Size = llvm::Log2_32(1);
+      return true;
+    case FK_Data_2:
+      Log2Size = llvm::Log2_32(2);
+      return true;
+    case FK_Data_4:
+      Log2Size = llvm::Log2_32(4);
+      return true;
+    case FK_Data_8:
+      Log2Size = llvm::Log2_32(8);
+      return true;
+
+      // Handle 24-bit branch kinds.
+    case ARM::fixup_arm_ldst_pcrel_12:
+    case ARM::fixup_arm_pcrel_10:
+    case ARM::fixup_arm_adr_pcrel_12:
+    case ARM::fixup_arm_condbranch:
+    case ARM::fixup_arm_uncondbranch:
+      RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
+      // Report as 'long', even though that is not quite accurate.
+      Log2Size = llvm::Log2_32(4);
+      return true;
+
+      // Handle Thumb branches.
+    case ARM::fixup_arm_thumb_br:
+      RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+      Log2Size = llvm::Log2_32(2);
+      return true;
+
+    case ARM::fixup_arm_thumb_bl:
+      RelocType = unsigned(macho::RIT_ARM_ThumbBranch32Bit);
+      Log2Size = llvm::Log2_32(4);
+      return true;
+
+    case ARM::fixup_arm_thumb_blx:
+      RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+      // Report as 'long', even though that is not quite accurate.
+      Log2Size = llvm::Log2_32(4);
+      return true;
+
+    case ARM::fixup_arm_movt_hi16:
+    case ARM::fixup_arm_movt_hi16_pcrel:
+    case ARM::fixup_t2_movt_hi16:
+    case ARM::fixup_t2_movt_hi16_pcrel:
+      RelocType = unsigned(macho::RIT_ARM_HalfDifference);
+      // Report as 'long', even though that is not quite accurate.
+      Log2Size = llvm::Log2_32(4);
+      return true;
+
+    case ARM::fixup_arm_movw_lo16:
+    case ARM::fixup_arm_movw_lo16_pcrel:
+    case ARM::fixup_t2_movw_lo16:
+    case ARM::fixup_t2_movw_lo16_pcrel:
+      RelocType = unsigned(macho::RIT_ARM_Half);
+      // Report as 'long', even though that is not quite accurate.
+      Log2Size = llvm::Log2_32(4);
+      return true;
+    }
+  }
+  void RecordARMRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                           const MCFragment *Fragment, const MCFixup &Fixup,
+                           MCValue Target, uint64_t &FixedValue) {
+    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+    unsigned Log2Size;
+    unsigned RelocType = macho::RIT_Vanilla;
+    if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
+      report_fatal_error("unknown ARM fixup kind!");
+      return;
+    }
+
+    // If this is a difference or a defined symbol plus an offset, then we need
+    // a scattered relocation entry.  Differences always require scattered
+    // relocations.
+    if (Target.getSymB()) {
+      if (RelocType == macho::RIT_ARM_Half ||
+          RelocType == macho::RIT_ARM_HalfDifference)
+        return RecordARMMovwMovtRelocation(Asm, Layout, Fragment, Fixup,
+                                           Target, FixedValue);
+      return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup,
+                                          Target, Log2Size, FixedValue);
+    }
+
+    // Get the symbol data, if any.
+    MCSymbolData *SD = 0;
+    if (Target.getSymA())
+      SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+    // FIXME: For other platforms, we need to use scattered relocations for
+    // internal relocations with offsets.  If this is an internal relocation
+    // with an offset, it also needs a scattered relocation entry.
+    //
+    // Is this right for ARM?
+    uint32_t Offset = Target.getConstant();
+    if (IsPCRel && RelocType == macho::RIT_Vanilla)
+      Offset += 1 << Log2Size;
+    if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
+      return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, Target,
+                                          Log2Size, FixedValue);
+
+    // See <reloc.h>.
+    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+    unsigned Index = 0;
+    unsigned IsExtern = 0;
+    unsigned Type = 0;
+
+    if (Target.isAbsolute()) { // constant
+      // FIXME!
+      report_fatal_error("FIXME: relocations to absolute targets "
+                         "not yet implemented");
+    } else if (SD->getSymbol().isVariable()) {
+      int64_t Res;
+      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+            Res, Layout, SectionAddress)) {
+        FixedValue = Res;
+        return;
+      }
+
+      report_fatal_error("unsupported relocation of variable '" +
+                         SD->getSymbol().getName() + "'");
+    } else {
+      // Check whether we need an external or internal relocation.
+      if (doesSymbolRequireExternRelocation(SD)) {
+        IsExtern = 1;
+        Index = SD->getIndex();
+        // For external relocations, make sure to offset the fixup value to
+        // compensate for the addend of the symbol address, if it was
+        // undefined. This occurs with weak definitions, for example.
+        if (!SD->Symbol->isUndefined())
+          FixedValue -= Layout.getSymbolOffset(SD);
+      } else {
+        // The index is the section ordinal (1-based).
+        Index = SD->getFragment()->getParent()->getOrdinal() + 1;
+        FixedValue += getSectionAddress(SD->getFragment()->getParent());
+      }
+      if (IsPCRel)
+        FixedValue -= getSectionAddress(Fragment->getParent());
+
+      // The type is determined by the fixup kind.
+      Type = RelocType;
+    }
+
+    // struct relocation_info (8 bytes)
+    macho::RelocationEntry MRE;
+    MRE.Word0 = FixupOffset;
+    MRE.Word1 = ((Index     <<  0) |
+                 (IsPCRel   << 24) |
+                 (Log2Size  << 25) |
+                 (IsExtern  << 27) |
+                 (Type      << 28));
+    Relocations[Fragment->getParent()].push_back(MRE);
+  }
+
+  void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                        const MCFragment *Fragment, const MCFixup &Fixup,
+                        MCValue Target, uint64_t &FixedValue) {
+    // FIXME: These needs to be factored into the target Mach-O writer.
+    if (isARM()) {
+      RecordARMRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
+      return;
+    }
+    if (is64Bit()) {
+      RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
+      return;
+    }
+
+    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
+    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+
+    // If this is a 32-bit TLVP reloc it's handled a bit differently.
+    if (Target.getSymA() &&
+        Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
+      RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
+      return;
+    }
+
+    // If this is a difference or a defined symbol plus an offset, then we need
+    // a scattered relocation entry.
+    // Differences always require scattered relocations.
+    if (Target.getSymB())
+        return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
+                                         Target, Log2Size, FixedValue);
+
+    // Get the symbol data, if any.
+    MCSymbolData *SD = 0;
+    if (Target.getSymA())
+      SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+    // If this is an internal relocation with an offset, it also needs a
+    // scattered relocation entry.
+    uint32_t Offset = Target.getConstant();
+    if (IsPCRel)
+      Offset += 1 << Log2Size;
+    if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
+      return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
+                                       Target, Log2Size, FixedValue);
+
+    // See <reloc.h>.
+    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+    unsigned Index = 0;
+    unsigned IsExtern = 0;
+    unsigned Type = 0;
+
+    if (Target.isAbsolute()) { // constant
+      // SymbolNum of 0 indicates the absolute section.
+      //
+      // FIXME: Currently, these are never generated (see code below). I cannot
+      // find a case where they are actually emitted.
+      Type = macho::RIT_Vanilla;
+    } else if (SD->getSymbol().isVariable()) {
+      int64_t Res;
+      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+            Res, Layout, SectionAddress)) {
+        FixedValue = Res;
+        return;
+      }
+
+      report_fatal_error("unsupported relocation of variable '" +
+                         SD->getSymbol().getName() + "'");
+    } else {
+      // Check whether we need an external or internal relocation.
+      if (doesSymbolRequireExternRelocation(SD)) {
+        IsExtern = 1;
+        Index = SD->getIndex();
+        // For external relocations, make sure to offset the fixup value to
+        // compensate for the addend of the symbol address, if it was
+        // undefined. This occurs with weak definitions, for example.
+        if (!SD->Symbol->isUndefined())
+          FixedValue -= Layout.getSymbolOffset(SD);
+      } else {
+        // The index is the section ordinal (1-based).
+        Index = SD->getFragment()->getParent()->getOrdinal() + 1;
+        FixedValue += getSectionAddress(SD->getFragment()->getParent());
+      }
+      if (IsPCRel)
+        FixedValue -= getSectionAddress(Fragment->getParent());
+
+      Type = macho::RIT_Vanilla;
+    }
+
+    // struct relocation_info (8 bytes)
+    macho::RelocationEntry MRE;
+    MRE.Word0 = FixupOffset;
+    MRE.Word1 = ((Index     <<  0) |
+                 (IsPCRel   << 24) |
+                 (Log2Size  << 25) |
+                 (IsExtern  << 27) |
+                 (Type      << 28));
+    Relocations[Fragment->getParent()].push_back(MRE);
+  }
+
+  void BindIndirectSymbols(MCAssembler &Asm) {
+    // This is the point where 'as' creates actual symbols for indirect symbols
+    // (in the following two passes). It would be easier for us to do this
+    // sooner when we see the attribute, but that makes getting the order in the
+    // symbol table much more complicated than it is worth.
+    //
+    // FIXME: Revisit this when the dust settles.
+
+    // Bind non lazy symbol pointers first.
+    unsigned IndirectIndex = 0;
+    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+           ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
+      const MCSectionMachO &Section =
+        cast<MCSectionMachO>(it->SectionData->getSection());
+
+      if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
+        continue;
+
+      // Initialize the section indirect symbol base, if necessary.
+      if (!IndirectSymBase.count(it->SectionData))
+        IndirectSymBase[it->SectionData] = IndirectIndex;
+
+      Asm.getOrCreateSymbolData(*it->Symbol);
+    }
+
+    // Then lazy symbol pointers and symbol stubs.
+    IndirectIndex = 0;
+    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+           ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
+      const MCSectionMachO &Section =
+        cast<MCSectionMachO>(it->SectionData->getSection());
+
+      if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
+          Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
+        continue;
+
+      // Initialize the section indirect symbol base, if necessary.
+      if (!IndirectSymBase.count(it->SectionData))
+        IndirectSymBase[it->SectionData] = IndirectIndex;
+
+      // Set the symbol type to undefined lazy, but only on construction.
+      //
+      // FIXME: Do not hardcode.
+      bool Created;
+      MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
+      if (Created)
+        Entry.setFlags(Entry.getFlags() | 0x0001);
+    }
+  }
+
+  /// ComputeSymbolTable - Compute the symbol table data
+  ///
+  /// \param StringTable [out] - The string table data.
+  /// \param StringIndexMap [out] - Map from symbol names to offsets in the
+  /// string table.
+  void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
+                          std::vector<MachSymbolData> &LocalSymbolData,
+                          std::vector<MachSymbolData> &ExternalSymbolData,
+                          std::vector<MachSymbolData> &UndefinedSymbolData) {
+    // Build section lookup table.
+    DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+    unsigned Index = 1;
+    for (MCAssembler::iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it, ++Index)
+      SectionIndexMap[&it->getSection()] = Index;
+    assert(Index <= 256 && "Too many sections!");
+
+    // Index 0 is always the empty string.
+    StringMap<uint64_t> StringIndexMap;
+    StringTable += '\x00';
+
+    // Build the symbol arrays and the string table, but only for non-local
+    // symbols.
+    //
+    // The particular order that we collect the symbols and create the string
+    // table, then sort the symbols is chosen to match 'as'. Even though it
+    // doesn't matter for correctness, this is important for letting us diff .o
+    // files.
+    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+           ie = Asm.symbol_end(); it != ie; ++it) {
+      const MCSymbol &Symbol = it->getSymbol();
+
+      // Ignore non-linker visible symbols.
+      if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
+        continue;
+
+      if (!it->isExternal() && !Symbol.isUndefined())
+        continue;
+
+      uint64_t &Entry = StringIndexMap[Symbol.getName()];
+      if (!Entry) {
+        Entry = StringTable.size();
+        StringTable += Symbol.getName();
+        StringTable += '\x00';
+      }
+
+      MachSymbolData MSD;
+      MSD.SymbolData = it;
+      MSD.StringIndex = Entry;
+
+      if (Symbol.isUndefined()) {
+        MSD.SectionIndex = 0;
+        UndefinedSymbolData.push_back(MSD);
+      } else if (Symbol.isAbsolute()) {
+        MSD.SectionIndex = 0;
+        ExternalSymbolData.push_back(MSD);
+      } else {
+        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+        assert(MSD.SectionIndex && "Invalid section index!");
+        ExternalSymbolData.push_back(MSD);
+      }
+    }
+
+    // Now add the data for local symbols.
+    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+           ie = Asm.symbol_end(); it != ie; ++it) {
+      const MCSymbol &Symbol = it->getSymbol();
+
+      // Ignore non-linker visible symbols.
+      if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
+        continue;
+
+      if (it->isExternal() || Symbol.isUndefined())
+        continue;
+
+      uint64_t &Entry = StringIndexMap[Symbol.getName()];
+      if (!Entry) {
+        Entry = StringTable.size();
+        StringTable += Symbol.getName();
+        StringTable += '\x00';
+      }
+
+      MachSymbolData MSD;
+      MSD.SymbolData = it;
+      MSD.StringIndex = Entry;
+
+      if (Symbol.isAbsolute()) {
+        MSD.SectionIndex = 0;
+        LocalSymbolData.push_back(MSD);
+      } else {
+        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+        assert(MSD.SectionIndex && "Invalid section index!");
+        LocalSymbolData.push_back(MSD);
+      }
+    }
+
+    // External and undefined symbols are required to be in lexicographic order.
+    std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+    std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
+    // Set the symbol indices.
+    Index = 0;
+    for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+      LocalSymbolData[i].SymbolData->setIndex(Index++);
+    for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+      ExternalSymbolData[i].SymbolData->setIndex(Index++);
+    for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+      UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+
+    // The string table is padded to a multiple of 4.
+    while (StringTable.size() % 4)
+      StringTable += '\x00';
+  }
+
+  void computeSectionAddresses(const MCAssembler &Asm,
+                               const MCAsmLayout &Layout) {
+    uint64_t StartAddress = 0;
+    const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder();
+    for (int i = 0, n = Order.size(); i != n ; ++i) {
+      const MCSectionData *SD = Order[i];
+      StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
+      SectionAddress[SD] = StartAddress;
+      StartAddress += Layout.getSectionAddressSize(SD);
+      // Explicitly pad the section to match the alignment requirements of the
+      // following one. This is for 'gas' compatibility, it shouldn't
+      /// strictly be necessary.
+      StartAddress += getPaddingSize(SD, Layout);
+    }
+  }
+
+  void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) {
+    computeSectionAddresses(Asm, Layout);
+
+    // Create symbol data for any indirect symbols.
+    BindIndirectSymbols(Asm);
+
+    // Compute symbol table information and bind symbol indices.
+    ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
+                       UndefinedSymbolData);
+  }
+
+  virtual bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                                      const MCSymbolData &DataA,
+                                                      const MCFragment &FB,
+                                                      bool InSet,
+                                                      bool IsPCRel) const {
+    if (InSet)
+      return true;
+
+    // The effective address is
+    //     addr(atom(A)) + offset(A)
+    //   - addr(atom(B)) - offset(B)
+    // and the offsets are not relocatable, so the fixup is fully resolved when
+    //  addr(atom(A)) - addr(atom(B)) == 0.
+    const MCSymbolData *A_Base = 0, *B_Base = 0;
+
+    const MCSymbol &SA = DataA.getSymbol().AliasedSymbol();
+    const MCSection &SecA = SA.getSection();
+    const MCSection &SecB = FB.getParent()->getSection();
+
+    if (IsPCRel) {
+      // The simple (Darwin, except on x86_64) way of dealing with this was to
+      // assume that any reference to a temporary symbol *must* be a temporary
+      // symbol in the same atom, unless the sections differ. Therefore, any
+      // PCrel relocation to a temporary symbol (in the same section) is fully
+      // resolved. This also works in conjunction with absolutized .set, which
+      // requires the compiler to use .set to absolutize the differences between
+      // symbols which the compiler knows to be assembly time constants, so we
+      // don't need to worry about considering symbol differences fully
+      // resolved.
+
+      if (!Asm.getBackend().hasReliableSymbolDifference()) {
+        if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB)
+          return false;
+        return true;
+      }
+    } else {
+      if (!TargetObjectWriter->useAggressiveSymbolFolding())
+        return false;
+    }
+
+    const MCFragment &FA = *Asm.getSymbolData(SA).getFragment();
+
+    A_Base = FA.getAtom();
+    if (!A_Base)
+      return false;
+
+    B_Base = FB.getAtom();
+    if (!B_Base)
+      return false;
+
+    // If the atoms are the same, they are guaranteed to have the same address.
+    if (A_Base == B_Base)
+      return true;
+
+    // Otherwise, we can't prove this is fully resolved.
+    return false;
+  }
+
+  void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
+    unsigned NumSections = Asm.size();
+
+    // The section data starts after the header, the segment load command (and
+    // section headers) and the symbol table.
+    unsigned NumLoadCommands = 1;
+    uint64_t LoadCommandsSize = is64Bit() ?
+      macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size :
+      macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size;
+
+    // Add the symbol table load command sizes, if used.
+    unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
+      UndefinedSymbolData.size();
+    if (NumSymbols) {
+      NumLoadCommands += 2;
+      LoadCommandsSize += (macho::SymtabLoadCommandSize +
+                           macho::DysymtabLoadCommandSize);
+    }
+
+    // Compute the total size of the section data, as well as its file size and
+    // vm size.
+    uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size :
+                                 macho::Header32Size) + LoadCommandsSize;
+    uint64_t SectionDataSize = 0;
+    uint64_t SectionDataFileSize = 0;
+    uint64_t VMSize = 0;
+    for (MCAssembler::const_iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it) {
+      const MCSectionData &SD = *it;
+      uint64_t Address = getSectionAddress(&SD);
+      uint64_t Size = Layout.getSectionAddressSize(&SD);
+      uint64_t FileSize = Layout.getSectionFileSize(&SD);
+      FileSize += getPaddingSize(&SD, Layout);
+
+      VMSize = std::max(VMSize, Address + Size);
+
+      if (SD.getSection().isVirtualSection())
+        continue;
+
+      SectionDataSize = std::max(SectionDataSize, Address + Size);
+      SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
+    }
+
+    // The section data is padded to 4 bytes.
+    //
+    // FIXME: Is this machine dependent?
+    unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
+    SectionDataFileSize += SectionDataPadding;
+
+    // Write the prolog, starting with the header and load command...
+    WriteHeader(NumLoadCommands, LoadCommandsSize,
+                Asm.getSubsectionsViaSymbols());
+    WriteSegmentLoadCommand(NumSections, VMSize,
+                            SectionDataStart, SectionDataSize);
+
+    // ... and then the section headers.
+    uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
+    for (MCAssembler::const_iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it) {
+      std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
+      unsigned NumRelocs = Relocs.size();
+      uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
+      WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
+      RelocTableEnd += NumRelocs * macho::RelocationInfoSize;
+    }
+
+    // Write the symbol table load command, if used.
+    if (NumSymbols) {
+      unsigned FirstLocalSymbol = 0;
+      unsigned NumLocalSymbols = LocalSymbolData.size();
+      unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
+      unsigned NumExternalSymbols = ExternalSymbolData.size();
+      unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
+      unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
+      unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
+      unsigned NumSymTabSymbols =
+        NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
+      uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
+      uint64_t IndirectSymbolOffset = 0;
+
+      // If used, the indirect symbols are written after the section data.
+      if (NumIndirectSymbols)
+        IndirectSymbolOffset = RelocTableEnd;
+
+      // The symbol table is written after the indirect symbol data.
+      uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
+
+      // The string table is written after symbol table.
+      uint64_t StringTableOffset =
+        SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size :
+                                                macho::Nlist32Size);
+      WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
+                             StringTableOffset, StringTable.size());
+
+      WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
+                               FirstExternalSymbol, NumExternalSymbols,
+                               FirstUndefinedSymbol, NumUndefinedSymbols,
+                               IndirectSymbolOffset, NumIndirectSymbols);
+    }
+
+    // Write the actual section data.
+    for (MCAssembler::const_iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it) {
+      Asm.WriteSectionData(it, Layout);
+
+      uint64_t Pad = getPaddingSize(it, Layout);
+      for (unsigned int i = 0; i < Pad; ++i)
+        Write8(0);
+    }
+
+    // Write the extra padding.
+    WriteZeros(SectionDataPadding);
+
+    // Write the relocation entries.
+    for (MCAssembler::const_iterator it = Asm.begin(),
+           ie = Asm.end(); it != ie; ++it) {
+      // Write the section relocation entries, in reverse order to match 'as'
+      // (approximately, the exact algorithm is more complicated than this).
+      std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
+      for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+        Write32(Relocs[e - i - 1].Word0);
+        Write32(Relocs[e - i - 1].Word1);
+      }
+    }
+
+    // Write the symbol table data, if used.
+    if (NumSymbols) {
+      // Write the indirect symbol entries.
+      for (MCAssembler::const_indirect_symbol_iterator
+             it = Asm.indirect_symbol_begin(),
+             ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+        // Indirect symbols in the non lazy symbol pointer section have some
+        // special handling.
+        const MCSectionMachO &Section =
+          static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+        if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
+          // If this symbol is defined and internal, mark it as such.
+          if (it->Symbol->isDefined() &&
+              !Asm.getSymbolData(*it->Symbol).isExternal()) {
+            uint32_t Flags = macho::ISF_Local;
+            if (it->Symbol->isAbsolute())
+              Flags |= macho::ISF_Absolute;
+            Write32(Flags);
+            continue;
+          }
+        }
+
+        Write32(Asm.getSymbolData(*it->Symbol).getIndex());
+      }
+
+      // FIXME: Check that offsets match computed ones.
+
+      // Write the symbol table entries.
+      for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+        WriteNlist(LocalSymbolData[i], Layout);
+      for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+        WriteNlist(ExternalSymbolData[i], Layout);
+      for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+        WriteNlist(UndefinedSymbolData[i], Layout);
+
+      // Write the string table.
+      OS << StringTable.str();
+    }
+  }
+};
+
+}
+
+MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
+                                             raw_ostream &OS,
+                                             bool IsLittleEndian) {
+  return new MachObjectWriter(MOTW, OS, IsLittleEndian);
+}
diff --git a/final/lib/MC/Makefile b/final/lib/MC/Makefile
new file mode 100644
index 00000000000..bf8b7c0e783
--- /dev/null
+++ b/final/lib/MC/Makefile
@@ -0,0 +1,16 @@
+##===- lib/MC/Makefile -------------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMMC
+BUILD_ARCHIVE := 1
+PARALLEL_DIRS := MCParser MCDisassembler
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/MC/TargetAsmBackend.cpp b/final/lib/MC/TargetAsmBackend.cpp
new file mode 100644
index 00000000000..19275574253
--- /dev/null
+++ b/final/lib/MC/TargetAsmBackend.cpp
@@ -0,0 +1,37 @@
+//===-- TargetAsmBackend.cpp - Target Assembly Backend ---------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+TargetAsmBackend::TargetAsmBackend()
+  : HasReliableSymbolDifference(false)
+{
+}
+
+TargetAsmBackend::~TargetAsmBackend() {
+}
+
+const MCFixupKindInfo &
+TargetAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+  static const MCFixupKindInfo Builtins[] = {
+    { "FK_Data_1", 0, 8, 0 },
+    { "FK_Data_2", 0, 16, 0 },
+    { "FK_Data_4", 0, 32, 0 },
+    { "FK_Data_8", 0, 64, 0 },
+    { "FK_PCRel_1", 0, 8, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_PCRel_2", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_PCRel_4", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_PCRel_8", 0, 64, MCFixupKindInfo::FKF_IsPCRel }
+  };
+  
+  assert((size_t)Kind <= sizeof(Builtins) / sizeof(Builtins[0]) &&
+         "Unknown fixup kind");
+  return Builtins[Kind];
+}
diff --git a/final/lib/MC/WinCOFFObjectWriter.cpp b/final/lib/MC/WinCOFFObjectWriter.cpp
new file mode 100644
index 00000000000..6ca5d37fc32
--- /dev/null
+++ b/final/lib/MC/WinCOFFObjectWriter.cpp
@@ -0,0 +1,877 @@
+//===-- llvm/MC/WinCOFFObjectWriter.cpp -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of a Win32 COFF object file writer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "WinCOFFObjectWriter"
+
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCSectionCOFF.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include "llvm/Support/TimeValue.h"
+
+#include "../Target/X86/X86FixupKinds.h"
+
+#include <cstdio>
+
+using namespace llvm;
+
+namespace {
+typedef llvm::SmallString<COFF::NameSize> name;
+
+enum AuxiliaryType {
+  ATFunctionDefinition,
+  ATbfAndefSymbol,
+  ATWeakExternal,
+  ATFile,
+  ATSectionDefinition
+};
+
+struct AuxSymbol {
+  AuxiliaryType   AuxType;
+  COFF::Auxiliary Aux;
+};
+
+class COFFSymbol;
+class COFFSection;
+
+class COFFSymbol {
+public:
+  COFF::symbol Data;
+
+  typedef llvm::SmallVector<AuxSymbol, 1> AuxiliarySymbols;
+
+  name             Name;
+  int              Index;
+  AuxiliarySymbols Aux;
+  COFFSymbol      *Other;
+  COFFSection     *Section;
+  int              Relocations;
+
+  MCSymbolData const *MCData;
+
+  COFFSymbol(llvm::StringRef name);
+  size_t size() const;
+  void set_name_offset(uint32_t Offset);
+
+  bool should_keep() const;
+};
+
+// This class contains staging data for a COFF relocation entry.
+struct COFFRelocation {
+  COFF::relocation Data;
+  COFFSymbol          *Symb;
+
+  COFFRelocation() : Symb(NULL) {}
+  static size_t size() { return COFF::RelocationSize; }
+};
+
+typedef std::vector<COFFRelocation> relocations;
+
+class COFFSection {
+public:
+  COFF::section Header;
+
+  std::string          Name;
+  int                  Number;
+  MCSectionData const *MCData;
+  COFFSymbol          *Symbol;
+  relocations          Relocations;
+
+  COFFSection(llvm::StringRef name);
+  static size_t size();
+};
+
+// This class holds the COFF string table.
+class StringTable {
+  typedef llvm::StringMap<size_t> map;
+  map Map;
+
+  void update_length();
+public:
+  std::vector<char> Data;
+
+  StringTable();
+  size_t size() const;
+  size_t insert(llvm::StringRef String);
+};
+
+class WinCOFFObjectWriter : public MCObjectWriter {
+public:
+
+  typedef std::vector<COFFSymbol*>  symbols;
+  typedef std::vector<COFFSection*> sections;
+
+  typedef DenseMap<MCSymbol  const *, COFFSymbol *>   symbol_map;
+  typedef DenseMap<MCSection const *, COFFSection *> section_map;
+
+  // Root level file contents.
+  bool Is64Bit;
+  COFF::header Header;
+  sections     Sections;
+  symbols      Symbols;
+  StringTable  Strings;
+
+  // Maps used during object file creation.
+  section_map SectionMap;
+  symbol_map  SymbolMap;
+
+  WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit);
+  ~WinCOFFObjectWriter();
+
+  COFFSymbol *createSymbol(StringRef Name);
+  COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol * Symbol);
+  COFFSection *createSection(StringRef Name);
+
+  template <typename object_t, typename list_t>
+  object_t *createCOFFEntity(llvm::StringRef Name, list_t &List);
+
+  void DefineSection(MCSectionData const &SectionData);
+  void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler);
+
+  void MakeSymbolReal(COFFSymbol &S, size_t Index);
+  void MakeSectionReal(COFFSection &S, size_t Number);
+
+  bool ExportSection(COFFSection const *S);
+  bool ExportSymbol(MCSymbolData const &SymbolData, MCAssembler &Asm);
+
+  bool IsPhysicalSection(COFFSection *S);
+
+  // Entity writing methods.
+
+  void WriteFileHeader(const COFF::header &Header);
+  void WriteSymbol(const COFFSymbol *S);
+  void WriteAuxiliarySymbols(const COFFSymbol::AuxiliarySymbols &S);
+  void WriteSectionHeader(const COFF::section &S);
+  void WriteRelocation(const COFF::relocation &R);
+
+  // MCObjectWriter interface implementation.
+
+  void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout);
+
+  void RecordRelocation(const MCAssembler &Asm,
+                        const MCAsmLayout &Layout,
+                        const MCFragment *Fragment,
+                        const MCFixup &Fixup,
+                        MCValue Target,
+                        uint64_t &FixedValue);
+
+  void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+};
+}
+
+static inline void write_uint32_le(void *Data, uint32_t const &Value) {
+  uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+  Ptr[0] = (Value & 0x000000FF) >>  0;
+  Ptr[1] = (Value & 0x0000FF00) >>  8;
+  Ptr[2] = (Value & 0x00FF0000) >> 16;
+  Ptr[3] = (Value & 0xFF000000) >> 24;
+}
+
+static inline void write_uint16_le(void *Data, uint16_t const &Value) {
+  uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+  Ptr[0] = (Value & 0x00FF) >> 0;
+  Ptr[1] = (Value & 0xFF00) >> 8;
+}
+
+static inline void write_uint8_le(void *Data, uint8_t const &Value) {
+  uint8_t *Ptr = reinterpret_cast<uint8_t *>(Data);
+  Ptr[0] = (Value & 0xFF) >> 0;
+}
+
+//------------------------------------------------------------------------------
+// Symbol class implementation
+
+COFFSymbol::COFFSymbol(llvm::StringRef name)
+  : Name(name.begin(), name.end())
+  , Other(NULL)
+  , Section(NULL)
+  , Relocations(0)
+  , MCData(NULL) {
+  memset(&Data, 0, sizeof(Data));
+}
+
+size_t COFFSymbol::size() const {
+  return COFF::SymbolSize + (Data.NumberOfAuxSymbols * COFF::SymbolSize);
+}
+
+// In the case that the name does not fit within 8 bytes, the offset
+// into the string table is stored in the last 4 bytes instead, leaving
+// the first 4 bytes as 0.
+void COFFSymbol::set_name_offset(uint32_t Offset) {
+  write_uint32_le(Data.Name + 0, 0);
+  write_uint32_le(Data.Name + 4, Offset);
+}
+
+/// logic to decide if the symbol should be reported in the symbol table
+bool COFFSymbol::should_keep() const {
+  // no section means its external, keep it
+  if (Section == NULL)
+    return true;
+
+  // if it has relocations pointing at it, keep it
+  if (Relocations > 0)   {
+    assert(Section->Number != -1 && "Sections with relocations must be real!");
+    return true;
+  }
+
+  // if the section its in is being droped, drop it
+  if (Section->Number == -1)
+      return false;
+
+  // if it is the section symbol, keep it
+  if (Section->Symbol == this)
+    return true;
+
+  // if its temporary, drop it
+  if (MCData && MCData->getSymbol().isTemporary())
+      return false;
+
+  // otherwise, keep it
+  return true;
+}
+
+//------------------------------------------------------------------------------
+// Section class implementation
+
+COFFSection::COFFSection(llvm::StringRef name)
+  : Name(name)
+  , MCData(NULL)
+  , Symbol(NULL) {
+  memset(&Header, 0, sizeof(Header));
+}
+
+size_t COFFSection::size() {
+  return COFF::SectionSize;
+}
+
+//------------------------------------------------------------------------------
+// StringTable class implementation
+
+/// Write the length of the string table into Data.
+/// The length of the string table includes uint32 length header.
+void StringTable::update_length() {
+  write_uint32_le(&Data.front(), Data.size());
+}
+
+StringTable::StringTable() {
+  // The string table data begins with the length of the entire string table
+  // including the length header. Allocate space for this header.
+  Data.resize(4);
+}
+
+size_t StringTable::size() const {
+  return Data.size();
+}
+
+/// Add String to the table iff it is not already there.
+/// @returns the index into the string table where the string is now located.
+size_t StringTable::insert(llvm::StringRef String) {
+  map::iterator i = Map.find(String);
+
+  if (i != Map.end())
+    return i->second;
+
+  size_t Offset = Data.size();
+
+  // Insert string data into string table.
+  Data.insert(Data.end(), String.begin(), String.end());
+  Data.push_back('\0');
+
+  // Put a reference to it in the map.
+  Map[String] = Offset;
+
+  // Update the internal length field.
+  update_length();
+
+  return Offset;
+}
+
+//------------------------------------------------------------------------------
+// WinCOFFObjectWriter class implementation
+
+WinCOFFObjectWriter::WinCOFFObjectWriter(raw_ostream &OS, bool is64Bit)
+  : MCObjectWriter(OS, true)
+  , Is64Bit(is64Bit) {
+  memset(&Header, 0, sizeof(Header));
+
+  Is64Bit ? Header.Machine = COFF::IMAGE_FILE_MACHINE_AMD64
+          : Header.Machine = COFF::IMAGE_FILE_MACHINE_I386;
+}
+
+WinCOFFObjectWriter::~WinCOFFObjectWriter() {
+  for (symbols::iterator I = Symbols.begin(), E = Symbols.end(); I != E; ++I)
+    delete *I;
+  for (sections::iterator I = Sections.begin(), E = Sections.end(); I != E; ++I)
+    delete *I;
+}
+
+COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) {
+  return createCOFFEntity<COFFSymbol>(Name, Symbols);
+}
+
+COFFSymbol *WinCOFFObjectWriter::GetOrCreateCOFFSymbol(const MCSymbol * Symbol){
+  symbol_map::iterator i = SymbolMap.find(Symbol);
+  if (i != SymbolMap.end())
+    return i->second;
+  COFFSymbol *RetSymbol
+    = createCOFFEntity<COFFSymbol>(Symbol->getName(), Symbols);
+  SymbolMap[Symbol] = RetSymbol;
+  return RetSymbol;
+}
+
+COFFSection *WinCOFFObjectWriter::createSection(llvm::StringRef Name) {
+  return createCOFFEntity<COFFSection>(Name, Sections);
+}
+
+/// A template used to lookup or create a symbol/section, and initialize it if
+/// needed.
+template <typename object_t, typename list_t>
+object_t *WinCOFFObjectWriter::createCOFFEntity(llvm::StringRef Name,
+                                                list_t &List) {
+  object_t *Object = new object_t(Name);
+
+  List.push_back(Object);
+
+  return Object;
+}
+
+/// This function takes a section data object from the assembler
+/// and creates the associated COFF section staging object.
+void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) {
+  assert(SectionData.getSection().getVariant() == MCSection::SV_COFF
+    && "Got non COFF section in the COFF backend!");
+  // FIXME: Not sure how to verify this (at least in a debug build).
+  MCSectionCOFF const &Sec =
+    static_cast<MCSectionCOFF const &>(SectionData.getSection());
+
+  COFFSection *coff_section = createSection(Sec.getSectionName());
+  COFFSymbol  *coff_symbol = createSymbol(Sec.getSectionName());
+
+  coff_section->Symbol = coff_symbol;
+  coff_symbol->Section = coff_section;
+  coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_STATIC;
+
+  // In this case the auxiliary symbol is a Section Definition.
+  coff_symbol->Aux.resize(1);
+  memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
+  coff_symbol->Aux[0].AuxType = ATSectionDefinition;
+  coff_symbol->Aux[0].Aux.SectionDefinition.Selection = Sec.getSelection();
+
+  coff_section->Header.Characteristics = Sec.getCharacteristics();
+
+  uint32_t &Characteristics = coff_section->Header.Characteristics;
+  switch (SectionData.getAlignment()) {
+  case 1:    Characteristics |= COFF::IMAGE_SCN_ALIGN_1BYTES;    break;
+  case 2:    Characteristics |= COFF::IMAGE_SCN_ALIGN_2BYTES;    break;
+  case 4:    Characteristics |= COFF::IMAGE_SCN_ALIGN_4BYTES;    break;
+  case 8:    Characteristics |= COFF::IMAGE_SCN_ALIGN_8BYTES;    break;
+  case 16:   Characteristics |= COFF::IMAGE_SCN_ALIGN_16BYTES;   break;
+  case 32:   Characteristics |= COFF::IMAGE_SCN_ALIGN_32BYTES;   break;
+  case 64:   Characteristics |= COFF::IMAGE_SCN_ALIGN_64BYTES;   break;
+  case 128:  Characteristics |= COFF::IMAGE_SCN_ALIGN_128BYTES;  break;
+  case 256:  Characteristics |= COFF::IMAGE_SCN_ALIGN_256BYTES;  break;
+  case 512:  Characteristics |= COFF::IMAGE_SCN_ALIGN_512BYTES;  break;
+  case 1024: Characteristics |= COFF::IMAGE_SCN_ALIGN_1024BYTES; break;
+  case 2048: Characteristics |= COFF::IMAGE_SCN_ALIGN_2048BYTES; break;
+  case 4096: Characteristics |= COFF::IMAGE_SCN_ALIGN_4096BYTES; break;
+  case 8192: Characteristics |= COFF::IMAGE_SCN_ALIGN_8192BYTES; break;
+  default:
+    llvm_unreachable("unsupported section alignment");
+  }
+
+  // Bind internal COFF section to MC section.
+  coff_section->MCData = &SectionData;
+  SectionMap[&SectionData.getSection()] = coff_section;
+}
+
+/// This function takes a section data object from the assembler
+/// and creates the associated COFF symbol staging object.
+void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData,
+                                       MCAssembler &Assembler) {
+  COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&SymbolData.getSymbol());
+
+  coff_symbol->Data.Type         = (SymbolData.getFlags() & 0x0000FFFF) >>  0;
+  coff_symbol->Data.StorageClass = (SymbolData.getFlags() & 0x00FF0000) >> 16;
+
+  if (SymbolData.getFlags() & COFF::SF_WeakExternal) {
+    coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+
+    if (SymbolData.getSymbol().isVariable()) {
+      coff_symbol->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
+      const MCExpr *Value = SymbolData.getSymbol().getVariableValue();
+
+      // FIXME: This assert message isn't very good.
+      assert(Value->getKind() == MCExpr::SymbolRef &&
+              "Value must be a SymbolRef!");
+
+      const MCSymbolRefExpr *SymbolRef =
+        static_cast<const MCSymbolRefExpr *>(Value);
+      coff_symbol->Other = GetOrCreateCOFFSymbol(&SymbolRef->getSymbol());
+    } else {
+      std::string WeakName = std::string(".weak.")
+                           +  SymbolData.getSymbol().getName().str()
+                           + ".default";
+      COFFSymbol *WeakDefault = createSymbol(WeakName);
+      WeakDefault->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
+      WeakDefault->Data.StorageClass  = COFF::IMAGE_SYM_CLASS_EXTERNAL;
+      WeakDefault->Data.Type          = 0;
+      WeakDefault->Data.Value         = 0;
+      coff_symbol->Other = WeakDefault;
+    }
+
+    // Setup the Weak External auxiliary symbol.
+    coff_symbol->Aux.resize(1);
+    memset(&coff_symbol->Aux[0], 0, sizeof(coff_symbol->Aux[0]));
+    coff_symbol->Aux[0].AuxType = ATWeakExternal;
+    coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = 0;
+    coff_symbol->Aux[0].Aux.WeakExternal.Characteristics =
+      COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY;
+  }
+
+  // If no storage class was specified in the streamer, define it here.
+  if (coff_symbol->Data.StorageClass == 0) {
+    bool external = SymbolData.isExternal() || (SymbolData.Fragment == NULL);
+
+    coff_symbol->Data.StorageClass =
+      external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC;
+  }
+
+  if (SymbolData.Fragment != NULL)
+    coff_symbol->Section =
+      SectionMap[&SymbolData.Fragment->getParent()->getSection()];
+
+  // Bind internal COFF symbol to MC symbol.
+  coff_symbol->MCData = &SymbolData;
+  SymbolMap[&SymbolData.getSymbol()] = coff_symbol;
+}
+
+/// making a section real involves assigned it a number and putting
+/// name into the string table if needed
+void WinCOFFObjectWriter::MakeSectionReal(COFFSection &S, size_t Number) {
+  if (S.Name.size() > COFF::NameSize) {
+    size_t StringTableEntry = Strings.insert(S.Name.c_str());
+
+    // FIXME: Why is this number 999999? This number is never mentioned in the
+    // spec. I'm assuming this is due to the printed value needing to fit into
+    // the S.Header.Name field. In which case why not 9999999 (7 9's instead of
+    // 6)? The spec does not state if this entry should be null terminated in
+    // this case, and thus this seems to be the best way to do it. I think I
+    // just solved my own FIXME...
+    if (StringTableEntry > 999999)
+      report_fatal_error("COFF string table is greater than 999999 bytes.");
+
+    std::sprintf(S.Header.Name, "/%d", unsigned(StringTableEntry));
+  } else
+    std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
+
+  S.Number = Number;
+  S.Symbol->Data.SectionNumber = S.Number;
+  S.Symbol->Aux[0].Aux.SectionDefinition.Number = S.Number;
+}
+
+void WinCOFFObjectWriter::MakeSymbolReal(COFFSymbol &S, size_t Index) {
+  if (S.Name.size() > COFF::NameSize) {
+    size_t StringTableEntry = Strings.insert(S.Name.c_str());
+
+    S.set_name_offset(StringTableEntry);
+  } else
+    std::memcpy(S.Data.Name, S.Name.c_str(), S.Name.size());
+  S.Index = Index;
+}
+
+bool WinCOFFObjectWriter::ExportSection(COFFSection const *S) {
+  return !S->MCData->getFragmentList().empty();
+}
+
+bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData,
+                                       MCAssembler &Asm) {
+  // This doesn't seem to be right. Strings referred to from the .data section
+  // need symbols so they can be linked to code in the .text section right?
+
+  // return Asm.isSymbolLinkerVisible (&SymbolData);
+
+  // For now, all non-variable symbols are exported,
+  // the linker will sort the rest out for us.
+  return SymbolData.isExternal() || !SymbolData.getSymbol().isVariable();
+}
+
+bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) {
+  return (S->Header.Characteristics
+         & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) == 0;
+}
+
+//------------------------------------------------------------------------------
+// entity writing methods
+
+void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) {
+  WriteLE16(Header.Machine);
+  WriteLE16(Header.NumberOfSections);
+  WriteLE32(Header.TimeDateStamp);
+  WriteLE32(Header.PointerToSymbolTable);
+  WriteLE32(Header.NumberOfSymbols);
+  WriteLE16(Header.SizeOfOptionalHeader);
+  WriteLE16(Header.Characteristics);
+}
+
+void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol *S) {
+  WriteBytes(StringRef(S->Data.Name, COFF::NameSize));
+  WriteLE32(S->Data.Value);
+  WriteLE16(S->Data.SectionNumber);
+  WriteLE16(S->Data.Type);
+  Write8(S->Data.StorageClass);
+  Write8(S->Data.NumberOfAuxSymbols);
+  WriteAuxiliarySymbols(S->Aux);
+}
+
+void WinCOFFObjectWriter::WriteAuxiliarySymbols(
+                                        const COFFSymbol::AuxiliarySymbols &S) {
+  for(COFFSymbol::AuxiliarySymbols::const_iterator i = S.begin(), e = S.end();
+      i != e; ++i) {
+    switch(i->AuxType) {
+    case ATFunctionDefinition:
+      WriteLE32(i->Aux.FunctionDefinition.TagIndex);
+      WriteLE32(i->Aux.FunctionDefinition.TotalSize);
+      WriteLE32(i->Aux.FunctionDefinition.PointerToLinenumber);
+      WriteLE32(i->Aux.FunctionDefinition.PointerToNextFunction);
+      WriteZeros(sizeof(i->Aux.FunctionDefinition.unused));
+      break;
+    case ATbfAndefSymbol:
+      WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused1));
+      WriteLE16(i->Aux.bfAndefSymbol.Linenumber);
+      WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused2));
+      WriteLE32(i->Aux.bfAndefSymbol.PointerToNextFunction);
+      WriteZeros(sizeof(i->Aux.bfAndefSymbol.unused3));
+      break;
+    case ATWeakExternal:
+      WriteLE32(i->Aux.WeakExternal.TagIndex);
+      WriteLE32(i->Aux.WeakExternal.Characteristics);
+      WriteZeros(sizeof(i->Aux.WeakExternal.unused));
+      break;
+    case ATFile:
+      WriteBytes(StringRef(reinterpret_cast<const char *>(i->Aux.File.FileName),
+                 sizeof(i->Aux.File.FileName)));
+      break;
+    case ATSectionDefinition:
+      WriteLE32(i->Aux.SectionDefinition.Length);
+      WriteLE16(i->Aux.SectionDefinition.NumberOfRelocations);
+      WriteLE16(i->Aux.SectionDefinition.NumberOfLinenumbers);
+      WriteLE32(i->Aux.SectionDefinition.CheckSum);
+      WriteLE16(i->Aux.SectionDefinition.Number);
+      Write8(i->Aux.SectionDefinition.Selection);
+      WriteZeros(sizeof(i->Aux.SectionDefinition.unused));
+      break;
+    }
+  }
+}
+
+void WinCOFFObjectWriter::WriteSectionHeader(const COFF::section &S) {
+  WriteBytes(StringRef(S.Name, COFF::NameSize));
+
+  WriteLE32(S.VirtualSize);
+  WriteLE32(S.VirtualAddress);
+  WriteLE32(S.SizeOfRawData);
+  WriteLE32(S.PointerToRawData);
+  WriteLE32(S.PointerToRelocations);
+  WriteLE32(S.PointerToLineNumbers);
+  WriteLE16(S.NumberOfRelocations);
+  WriteLE16(S.NumberOfLineNumbers);
+  WriteLE32(S.Characteristics);
+}
+
+void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) {
+  WriteLE32(R.VirtualAddress);
+  WriteLE32(R.SymbolTableIndex);
+  WriteLE16(R.Type);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// MCObjectWriter interface implementations
+
+void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+                                                   const MCAsmLayout &Layout) {
+  // "Define" each section & symbol. This creates section & symbol
+  // entries in the staging area.
+
+  for (MCAssembler::const_iterator i = Asm.begin(), e = Asm.end(); i != e; i++)
+    DefineSection(*i);
+
+  for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(),
+                                          e = Asm.symbol_end(); i != e; i++) {
+    if (ExportSymbol(*i, Asm))
+      DefineSymbol(*i, Asm);
+  }
+}
+
+void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
+                                           const MCAsmLayout &Layout,
+                                           const MCFragment *Fragment,
+                                           const MCFixup &Fixup,
+                                           MCValue Target,
+                                           uint64_t &FixedValue) {
+  assert(Target.getSymA() != NULL && "Relocation must reference a symbol!");
+
+  const MCSymbol *A = &Target.getSymA()->getSymbol();
+  MCSymbolData &A_SD = Asm.getSymbolData(*A);
+
+  MCSectionData const *SectionData = Fragment->getParent();
+
+  // Mark this symbol as requiring an entry in the symbol table.
+  assert(SectionMap.find(&SectionData->getSection()) != SectionMap.end() &&
+         "Section must already have been defined in ExecutePostLayoutBinding!");
+  assert(SymbolMap.find(&A_SD.getSymbol()) != SymbolMap.end() &&
+         "Symbol must already have been defined in ExecutePostLayoutBinding!");
+
+  COFFSection *coff_section = SectionMap[&SectionData->getSection()];
+  COFFSymbol *coff_symbol = SymbolMap[&A_SD.getSymbol()];
+
+  if (Target.getSymB()) {
+    if (&Target.getSymA()->getSymbol().getSection()
+     != &Target.getSymB()->getSymbol().getSection()) {
+      llvm_unreachable("Symbol relative relocations are only allowed between "
+                       "symbols in the same section");
+    }
+    const MCSymbol *B = &Target.getSymB()->getSymbol();
+    MCSymbolData &B_SD = Asm.getSymbolData(*B);
+
+    FixedValue = Layout.getSymbolOffset(&A_SD) - Layout.getSymbolOffset(&B_SD);
+
+    // In the case where we have SymbA and SymB, we just need to store the delta
+    // between the two symbols.  Update FixedValue to account for the delta, and
+    // skip recording the relocation.
+    return;
+  } else {
+    FixedValue = Target.getConstant();
+  }
+
+  COFFRelocation Reloc;
+
+  Reloc.Data.SymbolTableIndex = 0;
+  Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
+
+  // Turn relocations for temporary symbols into section relocations.
+  if (coff_symbol->MCData->getSymbol().isTemporary()) {
+    Reloc.Symb = coff_symbol->Section->Symbol;
+    FixedValue += Layout.getFragmentOffset(coff_symbol->MCData->Fragment)
+                + coff_symbol->MCData->getOffset();
+  } else
+    Reloc.Symb = coff_symbol;
+
+  ++Reloc.Symb->Relocations;
+
+  Reloc.Data.VirtualAddress += Fixup.getOffset();
+
+  switch ((unsigned)Fixup.getKind()) {
+  case FK_PCRel_4:
+  case X86::reloc_riprel_4byte:
+  case X86::reloc_riprel_4byte_movq_load:
+    Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_REL32
+                              : COFF::IMAGE_REL_I386_REL32;
+    // FIXME: Can anyone explain what this does other than adjust for the size
+    // of the offset?
+    FixedValue += 4;
+    break;
+  case FK_Data_4:
+  case X86::reloc_signed_4byte:
+    Reloc.Data.Type = Is64Bit ? COFF::IMAGE_REL_AMD64_ADDR32
+                              : COFF::IMAGE_REL_I386_DIR32;
+    break;
+  case FK_Data_8:
+    if (Is64Bit)
+      Reloc.Data.Type = COFF::IMAGE_REL_AMD64_ADDR64;
+    else
+      llvm_unreachable("unsupported relocation type");
+    break;
+  default:
+    llvm_unreachable("unsupported relocation type");
+  }
+
+  coff_section->Relocations.push_back(Reloc);
+}
+
+void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm,
+                                      const MCAsmLayout &Layout) {
+  // Assign symbol and section indexes and offsets.
+  Header.NumberOfSections = 0;
+
+  for (sections::iterator i = Sections.begin(),
+                          e = Sections.end(); i != e; i++) {
+    if (Layout.getSectionAddressSize((*i)->MCData) > 0) {
+      MakeSectionReal(**i, ++Header.NumberOfSections);
+    } else {
+      (*i)->Number = -1;
+    }
+  }
+
+  Header.NumberOfSymbols = 0;
+
+  for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) {
+    COFFSymbol *coff_symbol = *i;
+    MCSymbolData const *SymbolData = coff_symbol->MCData;
+
+    // Update section number & offset for symbols that have them.
+    if ((SymbolData != NULL) && (SymbolData->Fragment != NULL)) {
+      assert(coff_symbol->Section != NULL);
+
+      coff_symbol->Data.SectionNumber = coff_symbol->Section->Number;
+      coff_symbol->Data.Value = Layout.getFragmentOffset(SymbolData->Fragment)
+                              + SymbolData->Offset;
+    }
+
+    if (coff_symbol->should_keep()) {
+      MakeSymbolReal(*coff_symbol, Header.NumberOfSymbols++);
+
+      // Update auxiliary symbol info.
+      coff_symbol->Data.NumberOfAuxSymbols = coff_symbol->Aux.size();
+      Header.NumberOfSymbols += coff_symbol->Data.NumberOfAuxSymbols;
+    } else
+      coff_symbol->Index = -1;
+  }
+
+  // Fixup weak external references.
+  for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++) {
+    COFFSymbol *coff_symbol = *i;
+    if (coff_symbol->Other != NULL) {
+      assert(coff_symbol->Index != -1);
+      assert(coff_symbol->Aux.size() == 1 &&
+             "Symbol must contain one aux symbol!");
+      assert(coff_symbol->Aux[0].AuxType == ATWeakExternal &&
+             "Symbol's aux symbol must be a Weak External!");
+      coff_symbol->Aux[0].Aux.WeakExternal.TagIndex = coff_symbol->Other->Index;
+    }
+  }
+
+  // Assign file offsets to COFF object file structures.
+
+  unsigned offset = 0;
+
+  offset += COFF::HeaderSize;
+  offset += COFF::SectionSize * Header.NumberOfSections;
+
+  for (MCAssembler::const_iterator i = Asm.begin(),
+                                   e = Asm.end();
+                                   i != e; i++) {
+    COFFSection *Sec = SectionMap[&i->getSection()];
+
+    if (Sec->Number == -1)
+      continue;
+
+    Sec->Header.SizeOfRawData = Layout.getSectionAddressSize(i);
+
+    if (IsPhysicalSection(Sec)) {
+      Sec->Header.PointerToRawData = offset;
+
+      offset += Sec->Header.SizeOfRawData;
+    }
+
+    if (Sec->Relocations.size() > 0) {
+      Sec->Header.NumberOfRelocations = Sec->Relocations.size();
+      Sec->Header.PointerToRelocations = offset;
+
+      offset += COFF::RelocationSize * Sec->Relocations.size();
+
+      for (relocations::iterator cr = Sec->Relocations.begin(),
+                                 er = Sec->Relocations.end();
+                                 cr != er; ++cr) {
+        assert((*cr).Symb->Index != -1);
+        (*cr).Data.SymbolTableIndex = (*cr).Symb->Index;
+      }
+    }
+
+    assert(Sec->Symbol->Aux.size() == 1
+      && "Section's symbol must have one aux!");
+    AuxSymbol &Aux = Sec->Symbol->Aux[0];
+    assert(Aux.AuxType == ATSectionDefinition &&
+           "Section's symbol's aux symbol must be a Section Definition!");
+    Aux.Aux.SectionDefinition.Length = Sec->Header.SizeOfRawData;
+    Aux.Aux.SectionDefinition.NumberOfRelocations =
+                                                Sec->Header.NumberOfRelocations;
+    Aux.Aux.SectionDefinition.NumberOfLinenumbers =
+                                                Sec->Header.NumberOfLineNumbers;
+  }
+
+  Header.PointerToSymbolTable = offset;
+
+  Header.TimeDateStamp = sys::TimeValue::now().toEpochTime();
+
+  // Write it all to disk...
+  WriteFileHeader(Header);
+
+  {
+    sections::iterator i, ie;
+    MCAssembler::const_iterator j, je;
+
+    for (i = Sections.begin(), ie = Sections.end(); i != ie; i++)
+      if ((*i)->Number != -1)
+        WriteSectionHeader((*i)->Header);
+
+    for (i = Sections.begin(), ie = Sections.end(),
+         j = Asm.begin(), je = Asm.end();
+         (i != ie) && (j != je); ++i, ++j) {
+
+      if ((*i)->Number == -1)
+        continue;
+
+      if ((*i)->Header.PointerToRawData != 0) {
+        assert(OS.tell() == (*i)->Header.PointerToRawData &&
+               "Section::PointerToRawData is insane!");
+
+        Asm.WriteSectionData(j, Layout);
+      }
+
+      if ((*i)->Relocations.size() > 0) {
+        assert(OS.tell() == (*i)->Header.PointerToRelocations &&
+               "Section::PointerToRelocations is insane!");
+
+        for (relocations::const_iterator k = (*i)->Relocations.begin(),
+                                               ke = (*i)->Relocations.end();
+                                               k != ke; k++) {
+          WriteRelocation(k->Data);
+        }
+      } else
+        assert((*i)->Header.PointerToRelocations == 0 &&
+               "Section::PointerToRelocations is insane!");
+    }
+  }
+
+  assert(OS.tell() == Header.PointerToSymbolTable &&
+         "Header::PointerToSymbolTable is insane!");
+
+  for (symbols::iterator i = Symbols.begin(), e = Symbols.end(); i != e; i++)
+    if ((*i)->Index != -1)
+      WriteSymbol(*i);
+
+  OS.write((char const *)&Strings.Data.front(), Strings.Data.size());
+}
+
+//------------------------------------------------------------------------------
+// WinCOFFObjectWriter factory function
+
+namespace llvm {
+  MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS, bool is64Bit) {
+    return new WinCOFFObjectWriter(OS, is64Bit);
+  }
+}
diff --git a/final/lib/MC/WinCOFFStreamer.cpp b/final/lib/MC/WinCOFFStreamer.cpp
new file mode 100644
index 00000000000..46968e601be
--- /dev/null
+++ b/final/lib/MC/WinCOFFStreamer.cpp
@@ -0,0 +1,395 @@
+//===-- llvm/MC/WinCOFFStreamer.cpp -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of a Win32 COFF object file streamer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "WinCOFFStreamer"
+
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/ADT/StringMap.h"
+
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class WinCOFFStreamer : public MCObjectStreamer {
+public:
+  MCSymbol const *CurSymbol;
+
+  WinCOFFStreamer(MCContext &Context,
+                  TargetAsmBackend &TAB,
+                  MCCodeEmitter &CE,
+                  raw_ostream &OS);
+
+  void AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                       unsigned ByteAlignment, bool External);
+
+  // MCStreamer interface
+
+  virtual void InitSections();
+  virtual void EmitLabel(MCSymbol *Symbol);
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+  virtual void EmitThumbFunc(MCSymbol *Func);
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+  virtual void BeginCOFFSymbolDef(MCSymbol const *Symbol);
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+  virtual void EmitCOFFSymbolType(int Type);
+  virtual void EndCOFFSymbolDef();
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                            unsigned Size,unsigned ByteAlignment);
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment);
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+                                   unsigned ValueSize, unsigned MaxBytesToEmit);
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit);
+  virtual void EmitFileDirective(StringRef Filename);
+  virtual void EmitInstruction(const MCInst &Instruction);
+  virtual void Finish();
+
+private:
+  virtual void EmitInstToFragment(const MCInst &Inst) {
+    llvm_unreachable("Not used by WinCOFF.");
+  }
+  virtual void EmitInstToData(const MCInst &Inst) {
+    llvm_unreachable("Not used by WinCOFF.");
+  }
+
+  void SetSection(StringRef Section,
+                  unsigned Characteristics,
+                  SectionKind Kind) {
+    SwitchSection(getContext().getCOFFSection(Section, Characteristics, Kind));
+  }
+
+  void SetSectionText() {
+    SetSection(".text",
+               COFF::IMAGE_SCN_CNT_CODE
+             | COFF::IMAGE_SCN_MEM_EXECUTE
+             | COFF::IMAGE_SCN_MEM_READ,
+               SectionKind::getText());
+    EmitCodeAlignment(4, 0);
+  }
+
+  void SetSectionData() {
+    SetSection(".data",
+               COFF::IMAGE_SCN_CNT_INITIALIZED_DATA
+             | COFF::IMAGE_SCN_MEM_READ
+             | COFF::IMAGE_SCN_MEM_WRITE,
+               SectionKind::getDataRel());
+    EmitCodeAlignment(4, 0);
+  }
+
+  void SetSectionBSS() {
+    SetSection(".bss",
+               COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA
+             | COFF::IMAGE_SCN_MEM_READ
+             | COFF::IMAGE_SCN_MEM_WRITE,
+               SectionKind::getBSS());
+    EmitCodeAlignment(4, 0);
+  }
+
+};
+} // end anonymous namespace.
+
+WinCOFFStreamer::WinCOFFStreamer(MCContext &Context,
+                                 TargetAsmBackend &TAB,
+                                 MCCodeEmitter &CE,
+                                 raw_ostream &OS)
+    : MCObjectStreamer(Context, TAB, OS, &CE)
+    , CurSymbol(NULL) {
+}
+
+void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                      unsigned ByteAlignment, bool External) {
+  assert(!Symbol->isInSection() && "Symbol must not already have a section!");
+
+  std::string SectionName(".bss$linkonce");
+  SectionName.append(Symbol->getName().begin(), Symbol->getName().end());
+
+  MCSymbolData &SymbolData = getAssembler().getOrCreateSymbolData(*Symbol);
+
+  unsigned Characteristics =
+    COFF::IMAGE_SCN_LNK_COMDAT |
+    COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+    COFF::IMAGE_SCN_MEM_READ |
+    COFF::IMAGE_SCN_MEM_WRITE;
+
+  int Selection = COFF::IMAGE_COMDAT_SELECT_LARGEST;
+
+  const MCSection *Section = MCStreamer::getContext().getCOFFSection(
+    SectionName, Characteristics, Selection, SectionKind::getBSS());
+
+  MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section);
+
+  if (SectionData.getAlignment() < ByteAlignment)
+    SectionData.setAlignment(ByteAlignment);
+
+  SymbolData.setExternal(External);
+
+  Symbol->setSection(*Section);
+
+  if (ByteAlignment != 1)
+      new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectionData);
+
+  SymbolData.setFragment(new MCFillFragment(0, 0, Size, &SectionData));
+}
+
+// MCStreamer interface
+
+void WinCOFFStreamer::InitSections() {
+  SetSectionText();
+  SetSectionData();
+  SetSectionBSS();
+  SetSectionText();
+}
+
+void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+  MCObjectStreamer::EmitLabel(Symbol);
+}
+
+void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+  llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) {
+  llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  assert((Symbol->isInSection()
+         ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+         : true) && "Got non COFF section in the COFF backend!");
+  // FIXME: This is all very ugly and depressing. What needs to happen here
+  // depends on quite a few things that are all part of relaxation, which we
+  // don't really even do.
+
+  if (Value->getKind() != MCExpr::SymbolRef) {
+    // TODO: This is exactly the same as MachOStreamer. Consider merging into
+    // MCObjectStreamer.
+    getAssembler().getOrCreateSymbolData(*Symbol);
+    AddValueSymbols(Value);
+    Symbol->setVariableValue(Value);
+  } else {
+    // FIXME: This is a horrible way to do this :(. This should really be
+    // handled after we are done with the MC* objects and immediately before
+    // writing out the object file when we know exactly what the symbol should
+    // look like in the coff symbol table. I'm not doing that now because the
+    // COFF object writer doesn't have a clearly defined separation between MC
+    // data structures, the object writers data structures, and the raw, POD,
+    // data structures that get written to disk.
+
+    // Copy over the aliased data.
+    MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+    const MCSymbolData &RealSD = getAssembler().getOrCreateSymbolData(
+      dyn_cast<const MCSymbolRefExpr>(Value)->getSymbol());
+
+    // FIXME: This is particularly nasty because it breaks as soon as any data
+    // members of MCSymbolData change.
+    SD.CommonAlign     = RealSD.CommonAlign;
+    SD.CommonSize      = RealSD.CommonSize;
+    SD.Flags           = RealSD.Flags;
+    SD.Fragment        = RealSD.Fragment;
+    SD.Index           = RealSD.Index;
+    SD.IsExternal      = RealSD.IsExternal;
+    SD.IsPrivateExtern = RealSD.IsPrivateExtern;
+    SD.Offset          = RealSD.Offset;
+    SD.SymbolSize      = RealSD.SymbolSize;
+  }
+}
+
+void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+                                          MCSymbolAttr Attribute) {
+  assert(Symbol && "Symbol must be non-null!");
+  assert((Symbol->isInSection()
+         ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+         : true) && "Got non COFF section in the COFF backend!");
+  switch (Attribute) {
+  case MCSA_WeakReference:
+  case MCSA_Weak: {
+      MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
+      SD.modifyFlags(COFF::SF_WeakExternal, COFF::SF_WeakExternal);
+      SD.setExternal(true);
+    }
+    break;
+
+  case MCSA_Global:
+    getAssembler().getOrCreateSymbolData(*Symbol).setExternal(true);
+    break;
+
+  default:
+    llvm_unreachable("unsupported attribute");
+    break;
+  }
+}
+
+void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+  llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) {
+  assert((Symbol->isInSection()
+         ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+         : true) && "Got non COFF section in the COFF backend!");
+  assert(CurSymbol == NULL && "EndCOFFSymbolDef must be called between calls "
+                              "to BeginCOFFSymbolDef!");
+  CurSymbol = Symbol;
+}
+
+void WinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
+  assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+  assert((StorageClass & ~0xFF) == 0 && "StorageClass must only have data in "
+                                        "the first byte!");
+
+  getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags(
+    StorageClass << COFF::SF_ClassShift,
+    COFF::SF_ClassMask);
+}
+
+void WinCOFFStreamer::EmitCOFFSymbolType(int Type) {
+  assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+  assert((Type & ~0xFFFF) == 0 && "Type must only have data in the first 2 "
+                                  "bytes");
+
+  getAssembler().getOrCreateSymbolData(*CurSymbol).modifyFlags(
+    Type << COFF::SF_TypeShift,
+    COFF::SF_TypeMask);
+}
+
+void WinCOFFStreamer::EndCOFFSymbolDef() {
+  assert(CurSymbol != NULL && "BeginCOFFSymbolDef must be called first!");
+  CurSymbol = NULL;
+}
+
+void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+  llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                       unsigned ByteAlignment) {
+  assert((Symbol->isInSection()
+         ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+         : true) && "Got non COFF section in the COFF backend!");
+  AddCommonSymbol(Symbol, Size, ByteAlignment, true);
+}
+
+void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+  assert((Symbol->isInSection()
+         ? Symbol->getSection().getVariant() == MCSection::SV_COFF
+         : true) && "Got non COFF section in the COFF backend!");
+  AddCommonSymbol(Symbol, Size, 1, false);
+}
+
+void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                                   unsigned Size,unsigned ByteAlignment) {
+  llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                     uint64_t Size, unsigned ByteAlignment) {
+  llvm_unreachable("not implemented");
+}
+
+void WinCOFFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+  // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+  // MCObjectStreamer?
+  getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
+}
+
+void WinCOFFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+                                           int64_t Value,
+                                           unsigned ValueSize,
+                                           unsigned MaxBytesToEmit) {
+  // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+  // MCObjectStreamer?
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+                      getCurrentSectionData());
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void WinCOFFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+                                        unsigned MaxBytesToEmit) {
+  // TODO: This is copied exactly from the MachOStreamer. Consider merging into
+  // MCObjectStreamer?
+  if (MaxBytesToEmit == 0)
+    MaxBytesToEmit = ByteAlignment;
+  MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+                                           getCurrentSectionData());
+  F->setEmitNops(true);
+
+  // Update the maximum alignment on the current section if necessary.
+  if (ByteAlignment > getCurrentSectionData()->getAlignment())
+    getCurrentSectionData()->setAlignment(ByteAlignment);
+}
+
+void WinCOFFStreamer::EmitFileDirective(StringRef Filename) {
+  // Ignore for now, linkers don't care, and proper debug
+  // info will be a much large effort.
+}
+
+void WinCOFFStreamer::EmitInstruction(const MCInst &Instruction) {
+  for (unsigned i = 0, e = Instruction.getNumOperands(); i != e; ++i)
+    if (Instruction.getOperand(i).isExpr())
+      AddValueSymbols(Instruction.getOperand(i).getExpr());
+
+  getCurrentSectionData()->setHasInstructions(true);
+
+  MCInstFragment *Fragment =
+    new MCInstFragment(Instruction, getCurrentSectionData());
+
+  raw_svector_ostream VecOS(Fragment->getCode());
+
+  getAssembler().getEmitter().EncodeInstruction(Instruction, VecOS,
+                                                Fragment->getFixups());
+}
+
+void WinCOFFStreamer::Finish() {
+  MCObjectStreamer::Finish();
+}
+
+namespace llvm
+{
+  MCStreamer *createWinCOFFStreamer(MCContext &Context,
+                                    TargetAsmBackend &TAB,
+                                    MCCodeEmitter &CE,
+                                    raw_ostream &OS,
+                                    bool RelaxAll) {
+    WinCOFFStreamer *S = new WinCOFFStreamer(Context, TAB, CE, OS);
+    S->getAssembler().setRelaxAll(RelaxAll);
+    return S;
+  }
+}
diff --git a/final/lib/Makefile b/final/lib/Makefile
new file mode 100644
index 00000000000..ed27854f22c
--- /dev/null
+++ b/final/lib/Makefile
@@ -0,0 +1,17 @@
+##===- lib/Makefile ----------------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ..
+
+include $(LEVEL)/Makefile.config
+
+PARALLEL_DIRS := VMCore AsmParser Bitcode Archive Analysis Transforms CodeGen \
+                Target ExecutionEngine Linker MC CompilerDriver Object
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Object/CMakeLists.txt b/final/lib/Object/CMakeLists.txt
new file mode 100644
index 00000000000..6a6814fd37d
--- /dev/null
+++ b/final/lib/Object/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_llvm_library(LLVMObject
+  MachOObject.cpp
+  ObjectFile.cpp
+  COFFObjectFile.cpp
+  ELFObjectFile.cpp
+  )
diff --git a/final/lib/Object/COFFObjectFile.cpp b/final/lib/Object/COFFObjectFile.cpp
new file mode 100644
index 00000000000..cfee82a0b21
--- /dev/null
+++ b/final/lib/Object/COFFObjectFile.cpp
@@ -0,0 +1,375 @@
+//===- COFFObjectFile.cpp - COFF object file implementation -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the COFFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Endian.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+using support::ulittle8_t;
+using support::ulittle16_t;
+using support::ulittle32_t;
+using support::little16_t;
+}
+
+namespace {
+struct coff_file_header {
+  ulittle16_t Machine;
+  ulittle16_t NumberOfSections;
+  ulittle32_t TimeDateStamp;
+  ulittle32_t PointerToSymbolTable;
+  ulittle32_t NumberOfSymbols;
+  ulittle16_t SizeOfOptionalHeader;
+  ulittle16_t Characteristics;
+};
+}
+
+extern char coff_file_header_layout_static_assert
+            [sizeof(coff_file_header) == 20 ? 1 : -1];
+
+namespace {
+struct coff_symbol {
+  struct StringTableOffset {
+    ulittle32_t Zeroes;
+    ulittle32_t Offset;
+  };
+
+  union {
+    char ShortName[8];
+    StringTableOffset Offset;
+  } Name;
+
+  ulittle32_t Value;
+  little16_t SectionNumber;
+
+  struct {
+    ulittle8_t BaseType;
+    ulittle8_t ComplexType;
+  } Type;
+
+  ulittle8_t  StorageClass;
+  ulittle8_t  NumberOfAuxSymbols;
+};
+}
+
+extern char coff_coff_symbol_layout_static_assert
+            [sizeof(coff_symbol) == 18 ? 1 : -1];
+
+namespace {
+struct coff_section {
+  char Name[8];
+  ulittle32_t VirtualSize;
+  ulittle32_t VirtualAddress;
+  ulittle32_t SizeOfRawData;
+  ulittle32_t PointerToRawData;
+  ulittle32_t PointerToRelocations;
+  ulittle32_t PointerToLinenumbers;
+  ulittle16_t NumberOfRelocations;
+  ulittle16_t NumberOfLinenumbers;
+  ulittle32_t Characteristics;
+};
+}
+
+extern char coff_coff_section_layout_static_assert
+            [sizeof(coff_section) == 40 ? 1 : -1];
+
+namespace {
+class COFFObjectFile : public ObjectFile {
+private:
+  const coff_file_header *Header;
+  const coff_section     *SectionTable;
+  const coff_symbol      *SymbolTable;
+  const char             *StringTable;
+
+  const coff_section     *getSection(std::size_t index) const;
+  const char             *getString(std::size_t offset) const;
+
+protected:
+  virtual SymbolRef getSymbolNext(DataRefImpl Symb) const;
+  virtual StringRef getSymbolName(DataRefImpl Symb) const;
+  virtual uint64_t  getSymbolAddress(DataRefImpl Symb) const;
+  virtual uint64_t  getSymbolSize(DataRefImpl Symb) const;
+  virtual char      getSymbolNMTypeChar(DataRefImpl Symb) const;
+  virtual bool      isSymbolInternal(DataRefImpl Symb) const;
+
+  virtual SectionRef getSectionNext(DataRefImpl Sec) const;
+  virtual StringRef  getSectionName(DataRefImpl Sec) const;
+  virtual uint64_t   getSectionAddress(DataRefImpl Sec) const;
+  virtual uint64_t   getSectionSize(DataRefImpl Sec) const;
+  virtual StringRef  getSectionContents(DataRefImpl Sec) const;
+  virtual bool       isSectionText(DataRefImpl Sec) const;
+
+public:
+  COFFObjectFile(MemoryBuffer *Object);
+  virtual symbol_iterator begin_symbols() const;
+  virtual symbol_iterator end_symbols() const;
+  virtual section_iterator begin_sections() const;
+  virtual section_iterator end_sections() const;
+
+  virtual uint8_t getBytesInAddress() const;
+  virtual StringRef getFileFormatName() const;
+  virtual unsigned getArch() const;
+};
+} // end namespace
+
+SymbolRef COFFObjectFile::getSymbolNext(DataRefImpl Symb) const {
+  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+  symb += 1 + symb->NumberOfAuxSymbols;
+  Symb.p = reinterpret_cast<intptr_t>(symb);
+  return SymbolRef(Symb, this);
+}
+
+StringRef COFFObjectFile::getSymbolName(DataRefImpl Symb) const {
+  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+  // Check for string table entry. First 4 bytes are 0.
+  if (symb->Name.Offset.Zeroes == 0) {
+    uint32_t Offset = symb->Name.Offset.Offset;
+    return StringRef(getString(Offset));
+  }
+
+  if (symb->Name.ShortName[7] == 0)
+    // Null terminated, let ::strlen figure out the length.
+    return StringRef(symb->Name.ShortName);
+  // Not null terminated, use all 8 bytes.
+  return StringRef(symb->Name.ShortName, 8);
+}
+
+uint64_t COFFObjectFile::getSymbolAddress(DataRefImpl Symb) const {
+  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+  const coff_section *Section = getSection(symb->SectionNumber);
+  char Type = getSymbolNMTypeChar(Symb);
+  if (Type == 'U' || Type == 'w')
+    return UnknownAddressOrSize;
+  if (Section)
+    return Section->VirtualAddress + symb->Value;
+  return symb->Value;
+}
+
+uint64_t COFFObjectFile::getSymbolSize(DataRefImpl Symb) const {
+  // FIXME: Return the correct size. This requires looking at all the symbols
+  //        in the same section as this symbol, and looking for either the next
+  //        symbol, or the end of the section.
+  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+  const coff_section *Section = getSection(symb->SectionNumber);
+  char Type = getSymbolNMTypeChar(Symb);
+  if (Type == 'U' || Type == 'w')
+    return UnknownAddressOrSize;
+  if (Section)
+    return Section->SizeOfRawData - symb->Value;
+  return 0;
+}
+
+char COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb) const {
+  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+  char ret = StringSwitch<char>(getSymbolName(Symb))
+    .StartsWith(".debug", 'N')
+    .StartsWith(".sxdata", 'N')
+    .Default('?');
+
+  if (ret != '?')
+    return ret;
+
+  uint32_t Characteristics = 0;
+  uint32_t PointerToRawData = 0;
+  const coff_section *Section = getSection(symb->SectionNumber);
+  if (Section) {
+    Characteristics = Section->Characteristics;
+    PointerToRawData = Section->PointerToRawData;
+  }
+
+  switch (symb->SectionNumber) {
+  case COFF::IMAGE_SYM_UNDEFINED:
+    // Check storage classes.
+    if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL)
+      return 'w'; // Don't do ::toupper.
+    else
+      ret = 'u';
+    break;
+  case COFF::IMAGE_SYM_ABSOLUTE:
+    ret = 'a';
+    break;
+  case COFF::IMAGE_SYM_DEBUG:
+    ret = 'n';
+    break;
+  default:
+    // Check section type.
+    if (Characteristics & COFF::IMAGE_SCN_CNT_CODE)
+      ret = 't';
+    else if (  Characteristics & COFF::IMAGE_SCN_MEM_READ
+            && ~Characteristics & COFF::IMAGE_SCN_MEM_WRITE) // Read only.
+      ret = 'r';
+    else if (Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA)
+      ret = 'd';
+    else if (Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
+      ret = 'b';
+    else if (Characteristics & COFF::IMAGE_SCN_LNK_INFO)
+      ret = 'i';
+
+    // Check for section symbol.
+    else if (  symb->StorageClass == COFF::IMAGE_SYM_CLASS_STATIC
+            && symb->Value == 0)
+       ret = 's';
+  }
+
+  if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL)
+    ret = ::toupper(ret);
+
+  return ret;
+}
+
+bool COFFObjectFile::isSymbolInternal(DataRefImpl Symb) const {
+  return false;
+}
+
+SectionRef COFFObjectFile::getSectionNext(DataRefImpl Sec) const {
+  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+  sec += 1;
+  Sec.p = reinterpret_cast<intptr_t>(sec);
+  return SectionRef(Sec, this);
+}
+
+StringRef COFFObjectFile::getSectionName(DataRefImpl Sec) const {
+  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+  StringRef name;
+  if (sec->Name[7] == 0)
+    // Null terminated, let ::strlen figure out the length.
+    name = sec->Name;
+  else
+    // Not null terminated, use all 8 bytes.
+    name = StringRef(sec->Name, 8);
+
+  // Check for string table entry. First byte is '/'.
+  if (name[0] == '/') {
+    uint32_t Offset;
+    name.getAsInteger(10, Offset);
+    return StringRef(getString(Offset));
+  }
+
+  // It's just a normal name.
+  return name;
+}
+
+uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Sec) const {
+  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+  return sec->VirtualAddress;
+}
+
+uint64_t COFFObjectFile::getSectionSize(DataRefImpl Sec) const {
+  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+  return sec->SizeOfRawData;
+}
+
+StringRef COFFObjectFile::getSectionContents(DataRefImpl Sec) const {
+  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+  return StringRef(reinterpret_cast<const char *>(base + sec->PointerToRawData),
+                   sec->SizeOfRawData);
+}
+
+bool COFFObjectFile::isSectionText(DataRefImpl Sec) const {
+  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+  return sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE;
+}
+
+COFFObjectFile::COFFObjectFile(MemoryBuffer *Object)
+  : ObjectFile(Object) {
+  Header = reinterpret_cast<const coff_file_header *>(base);
+  SectionTable =
+    reinterpret_cast<const coff_section *>( base
+                                          + sizeof(coff_file_header)
+                                          + Header->SizeOfOptionalHeader);
+  SymbolTable =
+    reinterpret_cast<const coff_symbol *>(base + Header->PointerToSymbolTable);
+
+  // Find string table.
+  StringTable = reinterpret_cast<const char *>(base)
+              + Header->PointerToSymbolTable
+              + Header->NumberOfSymbols * 18;
+}
+
+ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const {
+  DataRefImpl ret;
+  ret.p = reinterpret_cast<intptr_t>(SymbolTable);
+  return symbol_iterator(SymbolRef(ret, this));
+}
+
+ObjectFile::symbol_iterator COFFObjectFile::end_symbols() const {
+  // The symbol table ends where the string table begins.
+  DataRefImpl ret;
+  ret.p = reinterpret_cast<intptr_t>(StringTable);
+  return symbol_iterator(SymbolRef(ret, this));
+}
+
+ObjectFile::section_iterator COFFObjectFile::begin_sections() const {
+  DataRefImpl ret;
+  ret.p = reinterpret_cast<intptr_t>(SectionTable);
+  return section_iterator(SectionRef(ret, this));
+}
+
+ObjectFile::section_iterator COFFObjectFile::end_sections() const {
+  DataRefImpl ret;
+  ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections);
+  return section_iterator(SectionRef(ret, this));
+}
+
+uint8_t COFFObjectFile::getBytesInAddress() const {
+  return getArch() == Triple::x86_64 ? 8 : 4;
+}
+
+StringRef COFFObjectFile::getFileFormatName() const {
+  switch(Header->Machine) {
+  case COFF::IMAGE_FILE_MACHINE_I386:
+    return "COFF-i386";
+  case COFF::IMAGE_FILE_MACHINE_AMD64:
+    return "COFF-x86-64";
+  default:
+    return "COFF-<unknown arch>";
+  }
+}
+
+unsigned COFFObjectFile::getArch() const {
+  switch(Header->Machine) {
+  case COFF::IMAGE_FILE_MACHINE_I386:
+    return Triple::x86;
+  case COFF::IMAGE_FILE_MACHINE_AMD64:
+    return Triple::x86_64;
+  default:
+    return Triple::UnknownArch;
+  }
+}
+
+const coff_section *COFFObjectFile::getSection(std::size_t index) const {
+  if (index > 0 && index <= Header->NumberOfSections)
+    return SectionTable + (index - 1);
+  return 0;
+}
+
+const char *COFFObjectFile::getString(std::size_t offset) const {
+  const ulittle32_t *StringTableSize =
+    reinterpret_cast<const ulittle32_t *>(StringTable);
+  if (offset < *StringTableSize)
+    return StringTable + offset;
+  return 0;
+}
+
+namespace llvm {
+
+  ObjectFile *ObjectFile::createCOFFObjectFile(MemoryBuffer *Object) {
+    return new COFFObjectFile(Object);
+  }
+
+} // end namespace llvm
diff --git a/final/lib/Object/ELFObjectFile.cpp b/final/lib/Object/ELFObjectFile.cpp
new file mode 100644
index 00000000000..682be770f48
--- /dev/null
+++ b/final/lib/Object/ELFObjectFile.cpp
@@ -0,0 +1,686 @@
+//===- ELFObjectFile.cpp - ELF object file implementation -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ELFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <limits>
+#include <utility>
+
+using namespace llvm;
+using namespace object;
+
+// Templates to choose Elf_Addr and Elf_Off depending on is64Bits.
+namespace {
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelperCommon {
+  typedef support::detail::packed_endian_specific_integral
+    <uint16_t, target_endianness, support::aligned> Elf_Half;
+  typedef support::detail::packed_endian_specific_integral
+    <uint32_t, target_endianness, support::aligned> Elf_Word;
+  typedef support::detail::packed_endian_specific_integral
+    <int32_t, target_endianness, support::aligned> Elf_Sword;
+  typedef support::detail::packed_endian_specific_integral
+    <uint64_t, target_endianness, support::aligned> Elf_Xword;
+  typedef support::detail::packed_endian_specific_integral
+    <int64_t, target_endianness, support::aligned> Elf_Sxword;
+};
+}
+
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+struct ELFDataTypeTypedefHelper;
+
+/// ELF 32bit types.
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelper<target_endianness, false>
+  : ELFDataTypeTypedefHelperCommon<target_endianness> {
+  typedef support::detail::packed_endian_specific_integral
+    <uint32_t, target_endianness, support::aligned> Elf_Addr;
+  typedef support::detail::packed_endian_specific_integral
+    <uint32_t, target_endianness, support::aligned> Elf_Off;
+};
+
+/// ELF 64bit types.
+template<support::endianness target_endianness>
+struct ELFDataTypeTypedefHelper<target_endianness, true>
+  : ELFDataTypeTypedefHelperCommon<target_endianness>{
+  typedef support::detail::packed_endian_specific_integral
+    <uint64_t, target_endianness, support::aligned> Elf_Addr;
+  typedef support::detail::packed_endian_specific_integral
+    <uint64_t, target_endianness, support::aligned> Elf_Off;
+};
+}
+
+// I really don't like doing this, but the alternative is copypasta.
+#define LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Addr Elf_Addr; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Off Elf_Off; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Half Elf_Half; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Word Elf_Word; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sword Elf_Sword; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Xword Elf_Xword; \
+typedef typename \
+  ELFDataTypeTypedefHelper<target_endianness, is64Bits>::Elf_Sxword Elf_Sxword;
+
+  // Section header.
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Shdr_Base;
+
+template<support::endianness target_endianness>
+struct Elf_Shdr_Base<target_endianness, false> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+  Elf_Word sh_name;     // Section name (index into string table)
+  Elf_Word sh_type;     // Section type (SHT_*)
+  Elf_Word sh_flags;    // Section flags (SHF_*)
+  Elf_Addr sh_addr;     // Address where section is to be loaded
+  Elf_Off  sh_offset;   // File offset of section data, in bytes
+  Elf_Word sh_size;     // Size of section, in bytes
+  Elf_Word sh_link;     // Section type-specific header table index link
+  Elf_Word sh_info;     // Section type-specific extra information
+  Elf_Word sh_addralign;// Section address alignment
+  Elf_Word sh_entsize;  // Size of records contained within the section
+};
+
+template<support::endianness target_endianness>
+struct Elf_Shdr_Base<target_endianness, true> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+  Elf_Word  sh_name;     // Section name (index into string table)
+  Elf_Word  sh_type;     // Section type (SHT_*)
+  Elf_Xword sh_flags;    // Section flags (SHF_*)
+  Elf_Addr  sh_addr;     // Address where section is to be loaded
+  Elf_Off   sh_offset;   // File offset of section data, in bytes
+  Elf_Xword sh_size;     // Size of section, in bytes
+  Elf_Word  sh_link;     // Section type-specific header table index link
+  Elf_Word  sh_info;     // Section type-specific extra information
+  Elf_Xword sh_addralign;// Section address alignment
+  Elf_Xword sh_entsize;  // Size of records contained within the section
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Shdr_Impl : Elf_Shdr_Base<target_endianness, is64Bits> {
+  using Elf_Shdr_Base<target_endianness, is64Bits>::sh_entsize;
+  using Elf_Shdr_Base<target_endianness, is64Bits>::sh_size;
+
+  /// @brief Get the number of entities this section contains if it has any.
+  unsigned getEntityCount() const {
+    if (sh_entsize == 0)
+      return 0;
+    return sh_size / sh_entsize;
+  }
+};
+}
+
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Sym_Base;
+
+template<support::endianness target_endianness>
+struct Elf_Sym_Base<target_endianness, false> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, false)
+  Elf_Word      st_name;  // Symbol name (index into string table)
+  Elf_Addr      st_value; // Value or address associated with the symbol
+  Elf_Word      st_size;  // Size of the symbol
+  unsigned char st_info;  // Symbol's type and binding attributes
+  unsigned char st_other; // Must be zero; reserved
+  Elf_Half      st_shndx; // Which section (header table index) it's defined in
+};
+
+template<support::endianness target_endianness>
+struct Elf_Sym_Base<target_endianness, true> {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, true)
+  Elf_Word      st_name;  // Symbol name (index into string table)
+  unsigned char st_info;  // Symbol's type and binding attributes
+  unsigned char st_other; // Must be zero; reserved
+  Elf_Half      st_shndx; // Which section (header table index) it's defined in
+  Elf_Addr      st_value; // Value or address associated with the symbol
+  Elf_Xword     st_size;  // Size of the symbol
+};
+
+template<support::endianness target_endianness, bool is64Bits>
+struct Elf_Sym_Impl : Elf_Sym_Base<target_endianness, is64Bits> {
+  using Elf_Sym_Base<target_endianness, is64Bits>::st_info;
+
+  // These accessors and mutators correspond to the ELF32_ST_BIND,
+  // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification:
+  unsigned char getBinding() const { return st_info >> 4; }
+  unsigned char getType() const { return st_info & 0x0f; }
+  void setBinding(unsigned char b) { setBindingAndType(b, getType()); }
+  void setType(unsigned char t) { setBindingAndType(getBinding(), t); }
+  void setBindingAndType(unsigned char b, unsigned char t) {
+    st_info = (b << 4) + (t & 0x0f);
+  }
+};
+}
+
+namespace {
+template<support::endianness target_endianness, bool is64Bits>
+class ELFObjectFile : public ObjectFile {
+  LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits)
+
+  typedef Elf_Shdr_Impl<target_endianness, is64Bits> Elf_Shdr;
+  typedef Elf_Sym_Impl<target_endianness, is64Bits> Elf_Sym;
+
+  struct Elf_Ehdr {
+    unsigned char e_ident[ELF::EI_NIDENT]; // ELF Identification bytes
+    Elf_Half e_type;     // Type of file (see ET_*)
+    Elf_Half e_machine;  // Required architecture for this file (see EM_*)
+    Elf_Word e_version;  // Must be equal to 1
+    Elf_Addr e_entry;    // Address to jump to in order to start program
+    Elf_Off  e_phoff;    // Program header table's file offset, in bytes
+    Elf_Off  e_shoff;    // Section header table's file offset, in bytes
+    Elf_Word e_flags;    // Processor-specific flags
+    Elf_Half e_ehsize;   // Size of ELF header, in bytes
+    Elf_Half e_phentsize;// Size of an entry in the program header table
+    Elf_Half e_phnum;    // Number of entries in the program header table
+    Elf_Half e_shentsize;// Size of an entry in the section header table
+    Elf_Half e_shnum;    // Number of entries in the section header table
+    Elf_Half e_shstrndx; // Section header table index of section name
+                                  // string table
+    bool checkMagic() const {
+      return (memcmp(e_ident, ELF::ElfMagic, strlen(ELF::ElfMagic))) == 0;
+    }
+    unsigned char getFileClass() const { return e_ident[ELF::EI_CLASS]; }
+    unsigned char getDataEncoding() const { return e_ident[ELF::EI_DATA]; }
+  };
+
+  typedef SmallVector<const Elf_Shdr*, 1> SymbolTableSections_t;
+
+  const Elf_Ehdr *Header;
+  const Elf_Shdr *SectionHeaderTable;
+  const Elf_Shdr *dot_shstrtab_sec; // Section header string table.
+  const Elf_Shdr *dot_strtab_sec;   // Symbol header string table.
+  SymbolTableSections_t SymbolTableSections;
+
+  void            validateSymbol(DataRefImpl Symb) const;
+  const Elf_Sym  *getSymbol(DataRefImpl Symb) const;
+  const Elf_Shdr *getSection(DataRefImpl index) const;
+  const Elf_Shdr *getSection(uint16_t index) const;
+  const char     *getString(uint16_t section, uint32_t offset) const;
+  const char     *getString(const Elf_Shdr *section, uint32_t offset) const;
+
+protected:
+  virtual SymbolRef getSymbolNext(DataRefImpl Symb) const;
+  virtual StringRef getSymbolName(DataRefImpl Symb) const;
+  virtual uint64_t  getSymbolAddress(DataRefImpl Symb) const;
+  virtual uint64_t  getSymbolSize(DataRefImpl Symb) const;
+  virtual char      getSymbolNMTypeChar(DataRefImpl Symb) const;
+  virtual bool      isSymbolInternal(DataRefImpl Symb) const;
+
+  virtual SectionRef getSectionNext(DataRefImpl Sec) const;
+  virtual StringRef  getSectionName(DataRefImpl Sec) const;
+  virtual uint64_t   getSectionAddress(DataRefImpl Sec) const;
+  virtual uint64_t   getSectionSize(DataRefImpl Sec) const;
+  virtual StringRef  getSectionContents(DataRefImpl Sec) const;
+  virtual bool       isSectionText(DataRefImpl Sec) const;
+
+public:
+  ELFObjectFile(MemoryBuffer *Object);
+  virtual symbol_iterator begin_symbols() const;
+  virtual symbol_iterator end_symbols() const;
+  virtual section_iterator begin_sections() const;
+  virtual section_iterator end_sections() const;
+
+  virtual uint8_t getBytesInAddress() const;
+  virtual StringRef getFileFormatName() const;
+  virtual unsigned getArch() const;
+};
+} // end namespace
+
+template<support::endianness target_endianness, bool is64Bits>
+void ELFObjectFile<target_endianness, is64Bits>
+                  ::validateSymbol(DataRefImpl Symb) const {
+  const Elf_Sym  *symb = getSymbol(Symb);
+  const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
+  // FIXME: We really need to do proper error handling in the case of an invalid
+  //        input file. Because we don't use exceptions, I think we'll just pass
+  //        an error object around.
+  if (!(  symb
+        && SymbolTableSection
+        && symb >= (const Elf_Sym*)(base
+                   + SymbolTableSection->sh_offset)
+        && symb <  (const Elf_Sym*)(base
+                   + SymbolTableSection->sh_offset
+                   + SymbolTableSection->sh_size)))
+    // FIXME: Proper error handling.
+    report_fatal_error("Symb must point to a valid symbol!");
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+SymbolRef ELFObjectFile<target_endianness, is64Bits>
+                       ::getSymbolNext(DataRefImpl Symb) const {
+  validateSymbol(Symb);
+  const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
+
+  ++Symb.d.a;
+  // Check to see if we are at the end of this symbol table.
+  if (Symb.d.a >= SymbolTableSection->getEntityCount()) {
+    // We are at the end. If there are other symbol tables, jump to them.
+    ++Symb.d.b;
+    Symb.d.a = 1; // The 0th symbol in ELF is fake.
+    // Otherwise return the terminator.
+    if (Symb.d.b >= SymbolTableSections.size()) {
+      Symb.d.a = std::numeric_limits<uint32_t>::max();
+      Symb.d.b = std::numeric_limits<uint32_t>::max();
+    }
+  }
+
+  return SymbolRef(Symb, this);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+StringRef ELFObjectFile<target_endianness, is64Bits>
+                       ::getSymbolName(DataRefImpl Symb) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  if (symb->st_name == 0) {
+    const Elf_Shdr *section = getSection(symb->st_shndx);
+    if (!section)
+      return "";
+    return getString(dot_shstrtab_sec, section->sh_name);
+  }
+
+  // Use the default symbol table name section.
+  return getString(dot_strtab_sec, symb->st_name);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint64_t ELFObjectFile<target_endianness, is64Bits>
+                      ::getSymbolAddress(DataRefImpl Symb) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  const Elf_Shdr *Section;
+  switch (symb->st_shndx) {
+  case ELF::SHN_COMMON:
+   // Undefined symbols have no address yet.
+  case ELF::SHN_UNDEF: return UnknownAddressOrSize;
+  case ELF::SHN_ABS: return symb->st_value;
+  default: Section = getSection(symb->st_shndx);
+  }
+
+  switch (symb->getType()) {
+  case ELF::STT_SECTION: return Section ? Section->sh_addr
+                                        : UnknownAddressOrSize;
+  case ELF::STT_FUNC:
+  case ELF::STT_OBJECT:
+  case ELF::STT_NOTYPE:
+    return symb->st_value;
+  default: return UnknownAddressOrSize;
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint64_t ELFObjectFile<target_endianness, is64Bits>
+                      ::getSymbolSize(DataRefImpl Symb) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  if (symb->st_size == 0)
+    return UnknownAddressOrSize;
+  return symb->st_size;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+char ELFObjectFile<target_endianness, is64Bits>
+                  ::getSymbolNMTypeChar(DataRefImpl Symb) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+  const Elf_Shdr *Section = getSection(symb->st_shndx);
+
+  char ret = '?';
+
+  if (Section) {
+    switch (Section->sh_type) {
+    case ELF::SHT_PROGBITS:
+    case ELF::SHT_DYNAMIC:
+      switch (Section->sh_flags) {
+      case (ELF::SHF_ALLOC | ELF::SHF_EXECINSTR):
+        ret = 't'; break;
+      case (ELF::SHF_ALLOC | ELF::SHF_WRITE):
+        ret = 'd'; break;
+      case ELF::SHF_ALLOC:
+      case (ELF::SHF_ALLOC | ELF::SHF_MERGE):
+      case (ELF::SHF_ALLOC | ELF::SHF_MERGE | ELF::SHF_STRINGS):
+        ret = 'r'; break;
+      }
+      break;
+    case ELF::SHT_NOBITS: ret = 'b';
+    }
+  }
+
+  switch (symb->st_shndx) {
+  case ELF::SHN_UNDEF:
+    if (ret == '?')
+      ret = 'U';
+    break;
+  case ELF::SHN_ABS: ret = 'a'; break;
+  case ELF::SHN_COMMON: ret = 'c'; break;
+  }
+
+  switch (symb->getBinding()) {
+  case ELF::STB_GLOBAL: ret = ::toupper(ret); break;
+  case ELF::STB_WEAK:
+    if (symb->st_shndx == ELF::SHN_UNDEF)
+      ret = 'w';
+    else
+      if (symb->getType() == ELF::STT_OBJECT)
+        ret = 'V';
+      else
+        ret = 'W';
+  }
+
+  if (ret == '?' && symb->getType() == ELF::STT_SECTION)
+    return StringSwitch<char>(getSymbolName(Symb))
+      .StartsWith(".debug", 'N')
+      .StartsWith(".note", 'n');
+
+  return ret;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+bool ELFObjectFile<target_endianness, is64Bits>
+                  ::isSymbolInternal(DataRefImpl Symb) const {
+  validateSymbol(Symb);
+  const Elf_Sym  *symb = getSymbol(Symb);
+
+  if (  symb->getType() == ELF::STT_FILE
+     || symb->getType() == ELF::STT_SECTION)
+    return true;
+  return false;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+SectionRef ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionNext(DataRefImpl Sec) const {
+  const uint8_t *sec = reinterpret_cast<const uint8_t *>(Sec.p);
+  sec += Header->e_shentsize;
+  Sec.p = reinterpret_cast<intptr_t>(sec);
+  return SectionRef(Sec, this);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+StringRef ELFObjectFile<target_endianness, is64Bits>
+                       ::getSectionName(DataRefImpl Sec) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  return StringRef(getString(dot_shstrtab_sec, sec->sh_name));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint64_t ELFObjectFile<target_endianness, is64Bits>
+                      ::getSectionAddress(DataRefImpl Sec) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  return sec->sh_addr;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint64_t ELFObjectFile<target_endianness, is64Bits>
+                      ::getSectionSize(DataRefImpl Sec) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  return sec->sh_size;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+StringRef ELFObjectFile<target_endianness, is64Bits>
+                       ::getSectionContents(DataRefImpl Sec) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  const char *start = (char*)base + sec->sh_offset;
+  return StringRef(start, sec->sh_size);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+bool ELFObjectFile<target_endianness, is64Bits>
+                  ::isSectionText(DataRefImpl Sec) const {
+  const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
+  if (sec->sh_flags & ELF::SHF_EXECINSTR)
+    return true;
+  return false;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object)
+  : ObjectFile(Object)
+  , SectionHeaderTable(0)
+  , dot_shstrtab_sec(0)
+  , dot_strtab_sec(0) {
+  Header = reinterpret_cast<const Elf_Ehdr *>(base);
+
+  if (Header->e_shoff == 0)
+    return;
+
+  SectionHeaderTable =
+    reinterpret_cast<const Elf_Shdr *>(base + Header->e_shoff);
+  uint32_t SectionTableSize = Header->e_shnum * Header->e_shentsize;
+  if (!(  (const uint8_t *)SectionHeaderTable + SectionTableSize
+         <= base + MapFile->getBufferSize()))
+    // FIXME: Proper error handling.
+    report_fatal_error("Section table goes past end of file!");
+
+
+  // To find the symbol tables we walk the section table to find SHT_STMTAB.
+  for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable),
+                  *e = i + Header->e_shnum * Header->e_shentsize;
+                   i != e; i += Header->e_shentsize) {
+    const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i);
+    if (sh->sh_type == ELF::SHT_SYMTAB) {
+      SymbolTableSections.push_back(sh);
+    }
+  }
+
+  // Get string table sections.
+  dot_shstrtab_sec = getSection(Header->e_shstrndx);
+  if (dot_shstrtab_sec) {
+    // Verify that the last byte in the string table in a null.
+    if (((const char*)base + dot_shstrtab_sec->sh_offset)
+        [dot_shstrtab_sec->sh_size - 1] != 0)
+      // FIXME: Proper error handling.
+      report_fatal_error("String table must end with a null terminator!");
+  }
+
+  // Merge this into the above loop.
+  for (const char *i = reinterpret_cast<const char *>(SectionHeaderTable),
+                  *e = i + Header->e_shnum * Header->e_shentsize;
+                   i != e; i += Header->e_shentsize) {
+    const Elf_Shdr *sh = reinterpret_cast<const Elf_Shdr*>(i);
+    if (sh->sh_type == ELF::SHT_STRTAB) {
+      StringRef SectionName(getString(dot_shstrtab_sec, sh->sh_name));
+      if (SectionName == ".strtab") {
+        if (dot_strtab_sec != 0)
+          // FIXME: Proper error handling.
+          report_fatal_error("Already found section named .strtab!");
+        dot_strtab_sec = sh;
+        const char *dot_strtab = (const char*)base + sh->sh_offset;
+          if (dot_strtab[sh->sh_size - 1] != 0)
+            // FIXME: Proper error handling.
+            report_fatal_error("String table must end with a null terminator!");
+      }
+    }
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits>
+                                         ::begin_symbols() const {
+  DataRefImpl SymbolData;
+  memset(&SymbolData, 0, sizeof(SymbolData));
+  if (SymbolTableSections.size() == 0) {
+    SymbolData.d.a = std::numeric_limits<uint32_t>::max();
+    SymbolData.d.b = std::numeric_limits<uint32_t>::max();
+  } else {
+    SymbolData.d.a = 1; // The 0th symbol in ELF is fake.
+    SymbolData.d.b = 0;
+  }
+  return symbol_iterator(SymbolRef(SymbolData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::symbol_iterator ELFObjectFile<target_endianness, is64Bits>
+                                         ::end_symbols() const {
+  DataRefImpl SymbolData;
+  memset(&SymbolData, 0, sizeof(SymbolData));
+  SymbolData.d.a = std::numeric_limits<uint32_t>::max();
+  SymbolData.d.b = std::numeric_limits<uint32_t>::max();
+  return symbol_iterator(SymbolRef(SymbolData, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
+                                          ::begin_sections() const {
+  DataRefImpl ret;
+  ret.p = reinterpret_cast<intptr_t>(base + Header->e_shoff);
+  return section_iterator(SectionRef(ret, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
+                                          ::end_sections() const {
+  DataRefImpl ret;
+  ret.p = reinterpret_cast<intptr_t>(base
+                                     + Header->e_shoff
+                                     + (Header->e_shentsize * Header->e_shnum));
+  return section_iterator(SectionRef(ret, this));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+uint8_t ELFObjectFile<target_endianness, is64Bits>::getBytesInAddress() const {
+  return is64Bits ? 8 : 4;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+StringRef ELFObjectFile<target_endianness, is64Bits>
+                       ::getFileFormatName() const {
+  switch(Header->e_ident[ELF::EI_CLASS]) {
+  case ELF::ELFCLASS32:
+    switch(Header->e_machine) {
+    case ELF::EM_386:
+      return "ELF32-i386";
+    case ELF::EM_X86_64:
+      return "ELF32-x86-64";
+    default:
+      return "ELF32-unknown";
+    }
+  case ELF::ELFCLASS64:
+    switch(Header->e_machine) {
+    case ELF::EM_386:
+      return "ELF64-i386";
+    case ELF::EM_X86_64:
+      return "ELF64-x86-64";
+    default:
+      return "ELF64-unknown";
+    }
+  default:
+    // FIXME: Proper error handling.
+    report_fatal_error("Invalid ELFCLASS!");
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+unsigned ELFObjectFile<target_endianness, is64Bits>::getArch() const {
+  switch(Header->e_machine) {
+  case ELF::EM_386:
+    return Triple::x86;
+  case ELF::EM_X86_64:
+    return Triple::x86_64;
+  default:
+    return Triple::UnknownArch;
+  }
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym *
+ELFObjectFile<target_endianness, is64Bits>::getSymbol(DataRefImpl Symb) const {
+  const Elf_Shdr *sec = SymbolTableSections[Symb.d.b];
+  return reinterpret_cast<const Elf_Sym *>(
+           base
+           + sec->sh_offset
+           + (Symb.d.a * sec->sh_entsize));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
+ELFObjectFile<target_endianness, is64Bits>::getSection(DataRefImpl Symb) const {
+  const Elf_Shdr *sec = getSection(Symb.d.b);
+  if (sec->sh_type != ELF::SHT_SYMTAB)
+    // FIXME: Proper error handling.
+    report_fatal_error("Invalid symbol table section!");
+  return sec;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Shdr *
+ELFObjectFile<target_endianness, is64Bits>::getSection(uint16_t index) const {
+  if (index == 0 || index >= ELF::SHN_LORESERVE)
+    return 0;
+  if (!SectionHeaderTable || index >= Header->e_shnum)
+    // FIXME: Proper error handling.
+    report_fatal_error("Invalid section index!");
+
+  return reinterpret_cast<const Elf_Shdr *>(
+         reinterpret_cast<const char *>(SectionHeaderTable)
+         + (index * Header->e_shentsize));
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const char *ELFObjectFile<target_endianness, is64Bits>
+                         ::getString(uint16_t section,
+                                     ELF::Elf32_Word offset) const {
+  return getString(getSection(section), offset);
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+const char *ELFObjectFile<target_endianness, is64Bits>
+                         ::getString(const Elf_Shdr *section,
+                                     ELF::Elf32_Word offset) const {
+  assert(section && section->sh_type == ELF::SHT_STRTAB && "Invalid section!");
+  if (offset >= section->sh_size)
+    // FIXME: Proper error handling.
+    report_fatal_error("Sybol name offset outside of string table!");
+  return (const char *)base + section->sh_offset + offset;
+}
+
+// EI_CLASS, EI_DATA.
+static std::pair<unsigned char, unsigned char>
+getElfArchType(MemoryBuffer *Object) {
+  if (Object->getBufferSize() < ELF::EI_NIDENT)
+    return std::make_pair((uint8_t)ELF::ELFCLASSNONE,(uint8_t)ELF::ELFDATANONE);
+  return std::make_pair( (uint8_t)Object->getBufferStart()[ELF::EI_CLASS]
+                       , (uint8_t)Object->getBufferStart()[ELF::EI_DATA]);
+}
+
+namespace llvm {
+
+  ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) {
+    std::pair<unsigned char, unsigned char> Ident = getElfArchType(Object);
+    if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB)
+      return new ELFObjectFile<support::little, false>(Object);
+    else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB)
+      return new ELFObjectFile<support::big, false>(Object);
+    else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB)
+      return new ELFObjectFile<support::little, true>(Object);
+    else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB)
+      return new ELFObjectFile<support::big, true>(Object);
+    // FIXME: Proper error handling.
+    report_fatal_error("Not an ELF object file!");
+  }
+
+} // end namespace llvm
diff --git a/final/lib/Object/MachOObject.cpp b/final/lib/Object/MachOObject.cpp
new file mode 100644
index 00000000000..5e64d632328
--- /dev/null
+++ b/final/lib/Object/MachOObject.cpp
@@ -0,0 +1,342 @@
+//===- MachOObject.cpp - Mach-O Object File Wrapper -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/MachOObject.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/SwapByteOrder.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+/* Translation Utilities */
+
+template<typename T>
+static void SwapValue(T &Value) {
+  Value = sys::SwapByteOrder(Value);
+}
+
+template<typename T>
+static void SwapStruct(T &Value);
+
+template<typename T>
+static void ReadInMemoryStruct(const MachOObject &MOO,
+                               StringRef Buffer, uint64_t Base,
+                               InMemoryStruct<T> &Res) {
+  typedef T struct_type;
+  uint64_t Size = sizeof(struct_type);
+
+  // Check that the buffer contains the expected data.
+  if (Base + Size >  Buffer.size()) {
+    Res = 0;
+    return;
+  }
+
+  // Check whether we can return a direct pointer.
+  struct_type *Ptr = (struct_type *) (Buffer.data() + Base);
+  if (!MOO.isSwappedEndian()) {
+    Res = Ptr;
+    return;
+  }
+
+  // Otherwise, copy the struct and translate the values.
+  Res = *Ptr;
+  SwapStruct(*Res);
+}
+
+/* *** */
+
+MachOObject::MachOObject(MemoryBuffer *Buffer_, bool IsLittleEndian_,
+                         bool Is64Bit_)
+  : Buffer(Buffer_), IsLittleEndian(IsLittleEndian_), Is64Bit(Is64Bit_),
+    IsSwappedEndian(IsLittleEndian != sys::isLittleEndianHost()),
+    HasStringTable(false), LoadCommands(0), NumLoadedCommands(0) {
+  // Load the common header.
+  memcpy(&Header, Buffer->getBuffer().data(), sizeof(Header));
+  if (IsSwappedEndian) {
+    SwapValue(Header.Magic);
+    SwapValue(Header.CPUType);
+    SwapValue(Header.CPUSubtype);
+    SwapValue(Header.FileType);
+    SwapValue(Header.NumLoadCommands);
+    SwapValue(Header.SizeOfLoadCommands);
+    SwapValue(Header.Flags);
+  }
+
+  if (is64Bit()) {
+    memcpy(&Header64Ext, Buffer->getBuffer().data() + sizeof(Header),
+           sizeof(Header64Ext));
+    if (IsSwappedEndian) {
+      SwapValue(Header64Ext.Reserved);
+    }
+  }
+
+  // Create the load command array if sane.
+  if (getHeader().NumLoadCommands < (1 << 20))
+    LoadCommands = new LoadCommandInfo[getHeader().NumLoadCommands];
+}
+
+MachOObject::~MachOObject() {
+  delete [] LoadCommands;
+}
+
+MachOObject *MachOObject::LoadFromBuffer(MemoryBuffer *Buffer,
+                                         std::string *ErrorStr) {
+  // First, check the magic value and initialize the basic object info.
+  bool IsLittleEndian = false, Is64Bit = false;
+  StringRef Magic = Buffer->getBuffer().slice(0, 4);
+  if (Magic == "\xFE\xED\xFA\xCE") {
+  }  else if (Magic == "\xCE\xFA\xED\xFE") {
+    IsLittleEndian = true;
+  } else if (Magic == "\xFE\xED\xFA\xCF") {
+    Is64Bit = true;
+  } else if (Magic == "\xCF\xFA\xED\xFE") {
+    IsLittleEndian = true;
+    Is64Bit = true;
+  } else {
+    if (ErrorStr) *ErrorStr = "not a Mach object file (invalid magic)";
+    return 0;
+  }
+
+  // Ensure that the at least the full header is present.
+  unsigned HeaderSize = Is64Bit ? macho::Header64Size : macho::Header32Size;
+  if (Buffer->getBufferSize() < HeaderSize) {
+    if (ErrorStr) *ErrorStr = "not a Mach object file (invalid header)";
+    return 0;
+  }
+
+  OwningPtr<MachOObject> Object(new MachOObject(Buffer, IsLittleEndian,
+                                                Is64Bit));
+
+  // Check for bogus number of load commands.
+  if (Object->getHeader().NumLoadCommands >= (1 << 20)) {
+    if (ErrorStr) *ErrorStr = "not a Mach object file (unreasonable header)";
+    return 0;
+  }
+
+  if (ErrorStr) *ErrorStr = "";
+  return Object.take();
+}
+
+StringRef MachOObject::getData(size_t Offset, size_t Size) const {
+  return Buffer->getBuffer().substr(Offset,Size);
+}
+
+void MachOObject::RegisterStringTable(macho::SymtabLoadCommand &SLC) {
+  HasStringTable = true;
+  StringTable = Buffer->getBuffer().substr(SLC.StringTableOffset,
+                                           SLC.StringTableSize);
+}
+
+const MachOObject::LoadCommandInfo &
+MachOObject::getLoadCommandInfo(unsigned Index) const {
+  assert(Index < getHeader().NumLoadCommands && "Invalid index!");
+
+  // Load the command, if necessary.
+  if (Index >= NumLoadedCommands) {
+    uint64_t Offset;
+    if (Index == 0) {
+      Offset = getHeaderSize();
+    } else {
+      const LoadCommandInfo &Prev = getLoadCommandInfo(Index - 1);
+      Offset = Prev.Offset + Prev.Command.Size;
+    }
+
+    LoadCommandInfo &Info = LoadCommands[Index];
+    memcpy(&Info.Command, Buffer->getBuffer().data() + Offset,
+           sizeof(macho::LoadCommand));
+    if (IsSwappedEndian) {
+      SwapValue(Info.Command.Type);
+      SwapValue(Info.Command.Size);
+    }
+    Info.Offset = Offset;
+    NumLoadedCommands = Index + 1;
+  }
+
+  return LoadCommands[Index];
+}
+
+template<>
+void SwapStruct(macho::SegmentLoadCommand &Value) {
+  SwapValue(Value.Type);
+  SwapValue(Value.Size);
+  SwapValue(Value.VMAddress);
+  SwapValue(Value.VMSize);
+  SwapValue(Value.FileOffset);
+  SwapValue(Value.FileSize);
+  SwapValue(Value.MaxVMProtection);
+  SwapValue(Value.InitialVMProtection);
+  SwapValue(Value.NumSections);
+  SwapValue(Value.Flags);
+}
+void MachOObject::ReadSegmentLoadCommand(const LoadCommandInfo &LCI,
+                         InMemoryStruct<macho::SegmentLoadCommand> &Res) const {
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Segment64LoadCommand &Value) {
+  SwapValue(Value.Type);
+  SwapValue(Value.Size);
+  SwapValue(Value.VMAddress);
+  SwapValue(Value.VMSize);
+  SwapValue(Value.FileOffset);
+  SwapValue(Value.FileSize);
+  SwapValue(Value.MaxVMProtection);
+  SwapValue(Value.InitialVMProtection);
+  SwapValue(Value.NumSections);
+  SwapValue(Value.Flags);
+}
+void MachOObject::ReadSegment64LoadCommand(const LoadCommandInfo &LCI,
+                       InMemoryStruct<macho::Segment64LoadCommand> &Res) const {
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::SymtabLoadCommand &Value) {
+  SwapValue(Value.Type);
+  SwapValue(Value.Size);
+  SwapValue(Value.SymbolTableOffset);
+  SwapValue(Value.NumSymbolTableEntries);
+  SwapValue(Value.StringTableOffset);
+  SwapValue(Value.StringTableSize);
+}
+void MachOObject::ReadSymtabLoadCommand(const LoadCommandInfo &LCI,
+                          InMemoryStruct<macho::SymtabLoadCommand> &Res) const {
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::DysymtabLoadCommand &Value) {
+  SwapValue(Value.Type);
+  SwapValue(Value.Size);
+  SwapValue(Value.LocalSymbolsIndex);
+  SwapValue(Value.NumLocalSymbols);
+  SwapValue(Value.ExternalSymbolsIndex);
+  SwapValue(Value.NumExternalSymbols);
+  SwapValue(Value.UndefinedSymbolsIndex);
+  SwapValue(Value.NumUndefinedSymbols);
+  SwapValue(Value.TOCOffset);
+  SwapValue(Value.NumTOCEntries);
+  SwapValue(Value.ModuleTableOffset);
+  SwapValue(Value.NumModuleTableEntries);
+  SwapValue(Value.ReferenceSymbolTableOffset);
+  SwapValue(Value.NumReferencedSymbolTableEntries);
+  SwapValue(Value.IndirectSymbolTableOffset);
+  SwapValue(Value.NumIndirectSymbolTableEntries);
+  SwapValue(Value.ExternalRelocationTableOffset);
+  SwapValue(Value.NumExternalRelocationTableEntries);
+  SwapValue(Value.LocalRelocationTableOffset);
+  SwapValue(Value.NumLocalRelocationTableEntries);
+}
+void MachOObject::ReadDysymtabLoadCommand(const LoadCommandInfo &LCI,
+                        InMemoryStruct<macho::DysymtabLoadCommand> &Res) const {
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), LCI.Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::IndirectSymbolTableEntry &Value) {
+  SwapValue(Value.Index);
+}
+void
+MachOObject::ReadIndirectSymbolTableEntry(const macho::DysymtabLoadCommand &DLC,
+                                          unsigned Index,
+                   InMemoryStruct<macho::IndirectSymbolTableEntry> &Res) const {
+  uint64_t Offset = (DLC.IndirectSymbolTableOffset +
+                     Index * sizeof(macho::IndirectSymbolTableEntry));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+
+template<>
+void SwapStruct(macho::Section &Value) {
+  SwapValue(Value.Address);
+  SwapValue(Value.Size);
+  SwapValue(Value.Offset);
+  SwapValue(Value.Align);
+  SwapValue(Value.RelocationTableOffset);
+  SwapValue(Value.NumRelocationTableEntries);
+  SwapValue(Value.Flags);
+  SwapValue(Value.Reserved1);
+  SwapValue(Value.Reserved2);
+}
+void MachOObject::ReadSection(const LoadCommandInfo &LCI,
+                              unsigned Index,
+                              InMemoryStruct<macho::Section> &Res) const {
+  assert(LCI.Command.Type == macho::LCT_Segment &&
+         "Unexpected load command info!");
+  uint64_t Offset = (LCI.Offset + sizeof(macho::SegmentLoadCommand) +
+                     Index * sizeof(macho::Section));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Section64 &Value) {
+  SwapValue(Value.Address);
+  SwapValue(Value.Size);
+  SwapValue(Value.Offset);
+  SwapValue(Value.Align);
+  SwapValue(Value.RelocationTableOffset);
+  SwapValue(Value.NumRelocationTableEntries);
+  SwapValue(Value.Flags);
+  SwapValue(Value.Reserved1);
+  SwapValue(Value.Reserved2);
+  SwapValue(Value.Reserved3);
+}
+void MachOObject::ReadSection64(const LoadCommandInfo &LCI,
+                                unsigned Index,
+                                InMemoryStruct<macho::Section64> &Res) const {
+  assert(LCI.Command.Type == macho::LCT_Segment64 &&
+         "Unexpected load command info!");
+  uint64_t Offset = (LCI.Offset + sizeof(macho::Segment64LoadCommand) +
+                     Index * sizeof(macho::Section64));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::RelocationEntry &Value) {
+  SwapValue(Value.Word0);
+  SwapValue(Value.Word1);
+}
+void MachOObject::ReadRelocationEntry(uint64_t RelocationTableOffset,
+                                      unsigned Index,
+                            InMemoryStruct<macho::RelocationEntry> &Res) const {
+  uint64_t Offset = (RelocationTableOffset +
+                     Index * sizeof(macho::RelocationEntry));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::SymbolTableEntry &Value) {
+  SwapValue(Value.StringIndex);
+  SwapValue(Value.Flags);
+  SwapValue(Value.Value);
+}
+void MachOObject::ReadSymbolTableEntry(uint64_t SymbolTableOffset,
+                                       unsigned Index,
+                           InMemoryStruct<macho::SymbolTableEntry> &Res) const {
+  uint64_t Offset = (SymbolTableOffset +
+                     Index * sizeof(macho::SymbolTableEntry));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
+
+template<>
+void SwapStruct(macho::Symbol64TableEntry &Value) {
+  SwapValue(Value.StringIndex);
+  SwapValue(Value.Flags);
+  SwapValue(Value.Value);
+}
+void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset,
+                                       unsigned Index,
+                         InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
+  uint64_t Offset = (SymbolTableOffset +
+                     Index * sizeof(macho::Symbol64TableEntry));
+  ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res);
+}
diff --git a/final/lib/Object/Makefile b/final/lib/Object/Makefile
new file mode 100644
index 00000000000..79388dc97f1
--- /dev/null
+++ b/final/lib/Object/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Object/Makefile ---------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMObject
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Object/ObjectFile.cpp b/final/lib/Object/ObjectFile.cpp
new file mode 100644
index 00000000000..161ae3a083f
--- /dev/null
+++ b/final/lib/Object/ObjectFile.cpp
@@ -0,0 +1,71 @@
+//===- ObjectFile.cpp - File format independent object file -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a file format independent ObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/system_error.h"
+
+using namespace llvm;
+using namespace object;
+
+ObjectFile::ObjectFile(MemoryBuffer *Object)
+  : MapFile(Object) {
+  assert(MapFile && "Must be a valid MemoryBuffer!");
+  base = reinterpret_cast<const uint8_t *>(MapFile->getBufferStart());
+}
+
+ObjectFile::~ObjectFile() {
+  delete MapFile;
+}
+
+StringRef ObjectFile::getFilename() const {
+  return MapFile->getBufferIdentifier();
+}
+
+ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
+  if (!Object || Object->getBufferSize() < 64)
+    return 0;
+  sys::LLVMFileType type = sys::IdentifyFileType(Object->getBufferStart(),
+                                static_cast<unsigned>(Object->getBufferSize()));
+  switch (type) {
+    case sys::ELF_Relocatable_FileType:
+    case sys::ELF_Executable_FileType:
+    case sys::ELF_SharedObject_FileType:
+    case sys::ELF_Core_FileType:
+      return createELFObjectFile(Object);
+    case sys::Mach_O_Object_FileType:
+    case sys::Mach_O_Executable_FileType:
+    case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+    case sys::Mach_O_Core_FileType:
+    case sys::Mach_O_PreloadExecutable_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+    case sys::Mach_O_DynamicLinker_FileType:
+    case sys::Mach_O_Bundle_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+      return 0;
+    case sys::COFF_FileType:
+      return createCOFFObjectFile(Object);
+    default:
+      llvm_unreachable("Unknown Object File Type");
+  }
+}
+
+ObjectFile *ObjectFile::createObjectFile(StringRef ObjectPath) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFile(ObjectPath, File))
+    return NULL;
+  return createObjectFile(File.take());
+}
diff --git a/final/lib/Support/APFloat.cpp b/final/lib/Support/APFloat.cpp
new file mode 100644
index 00000000000..93806facff0
--- /dev/null
+++ b/final/lib/Support/APFloat.cpp
@@ -0,0 +1,3564 @@
+//===-- APFloat.cpp - Implement APFloat class -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a class to represent arbitrary precision floating
+// point values and provide a variety of arithmetic operations on them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <limits.h>
+#include <cstring>
+
+using namespace llvm;
+
+#define convolve(lhs, rhs) ((lhs) * 4 + (rhs))
+
+/* Assumed in hexadecimal significand parsing, and conversion to
+   hexadecimal strings.  */
+#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
+COMPILE_TIME_ASSERT(integerPartWidth % 4 == 0);
+
+namespace llvm {
+
+  /* Represents floating point arithmetic semantics.  */
+  struct fltSemantics {
+    /* The largest E such that 2^E is representable; this matches the
+       definition of IEEE 754.  */
+    exponent_t maxExponent;
+
+    /* The smallest E such that 2^E is a normalized number; this
+       matches the definition of IEEE 754.  */
+    exponent_t minExponent;
+
+    /* Number of bits in the significand.  This includes the integer
+       bit.  */
+    unsigned int precision;
+
+    /* True if arithmetic is supported.  */
+    unsigned int arithmeticOK;
+  };
+
+  const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, true };
+  const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, true };
+  const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, true };
+  const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, true };
+  const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, true };
+  const fltSemantics APFloat::Bogus = { 0, 0, 0, true };
+
+  // The PowerPC format consists of two doubles.  It does not map cleanly
+  // onto the usual format above.  For now only storage of constants of
+  // this type is supported, no arithmetic.
+  const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022, 106, false };
+
+  /* A tight upper bound on number of parts required to hold the value
+     pow(5, power) is
+
+       power * 815 / (351 * integerPartWidth) + 1
+
+     However, whilst the result may require only this many parts,
+     because we are multiplying two values to get it, the
+     multiplication may require an extra part with the excess part
+     being zero (consider the trivial case of 1 * 1, tcFullMultiply
+     requires two parts to hold the single-part result).  So we add an
+     extra one to guarantee enough space whilst multiplying.  */
+  const unsigned int maxExponent = 16383;
+  const unsigned int maxPrecision = 113;
+  const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
+  const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
+                                                / (351 * integerPartWidth));
+}
+
+/* A bunch of private, handy routines.  */
+
+static inline unsigned int
+partCountForBits(unsigned int bits)
+{
+  return ((bits) + integerPartWidth - 1) / integerPartWidth;
+}
+
+/* Returns 0U-9U.  Return values >= 10U are not digits.  */
+static inline unsigned int
+decDigitValue(unsigned int c)
+{
+  return c - '0';
+}
+
+static unsigned int
+hexDigitValue(unsigned int c)
+{
+  unsigned int r;
+
+  r = c - '0';
+  if (r <= 9)
+    return r;
+
+  r = c - 'A';
+  if (r <= 5)
+    return r + 10;
+
+  r = c - 'a';
+  if (r <= 5)
+    return r + 10;
+
+  return -1U;
+}
+
+static inline void
+assertArithmeticOK(const llvm::fltSemantics &semantics) {
+  assert(semantics.arithmeticOK &&
+         "Compile-time arithmetic does not support these semantics");
+}
+
+/* Return the value of a decimal exponent of the form
+   [+-]ddddddd.
+
+   If the exponent overflows, returns a large exponent with the
+   appropriate sign.  */
+static int
+readExponent(StringRef::iterator begin, StringRef::iterator end)
+{
+  bool isNegative;
+  unsigned int absExponent;
+  const unsigned int overlargeExponent = 24000;  /* FIXME.  */
+  StringRef::iterator p = begin;
+
+  assert(p != end && "Exponent has no digits");
+
+  isNegative = (*p == '-');
+  if (*p == '-' || *p == '+') {
+    p++;
+    assert(p != end && "Exponent has no digits");
+  }
+
+  absExponent = decDigitValue(*p++);
+  assert(absExponent < 10U && "Invalid character in exponent");
+
+  for (; p != end; ++p) {
+    unsigned int value;
+
+    value = decDigitValue(*p);
+    assert(value < 10U && "Invalid character in exponent");
+
+    value += absExponent * 10;
+    if (absExponent >= overlargeExponent) {
+      absExponent = overlargeExponent;
+      p = end;  /* outwit assert below */
+      break;
+    }
+    absExponent = value;
+  }
+
+  assert(p == end && "Invalid exponent in exponent");
+
+  if (isNegative)
+    return -(int) absExponent;
+  else
+    return (int) absExponent;
+}
+
+/* This is ugly and needs cleaning up, but I don't immediately see
+   how whilst remaining safe.  */
+static int
+totalExponent(StringRef::iterator p, StringRef::iterator end,
+              int exponentAdjustment)
+{
+  int unsignedExponent;
+  bool negative, overflow;
+  int exponent = 0;
+
+  assert(p != end && "Exponent has no digits");
+
+  negative = *p == '-';
+  if (*p == '-' || *p == '+') {
+    p++;
+    assert(p != end && "Exponent has no digits");
+  }
+
+  unsignedExponent = 0;
+  overflow = false;
+  for (; p != end; ++p) {
+    unsigned int value;
+
+    value = decDigitValue(*p);
+    assert(value < 10U && "Invalid character in exponent");
+
+    unsignedExponent = unsignedExponent * 10 + value;
+    if (unsignedExponent > 32767)
+      overflow = true;
+  }
+
+  if (exponentAdjustment > 32767 || exponentAdjustment < -32768)
+    overflow = true;
+
+  if (!overflow) {
+    exponent = unsignedExponent;
+    if (negative)
+      exponent = -exponent;
+    exponent += exponentAdjustment;
+    if (exponent > 32767 || exponent < -32768)
+      overflow = true;
+  }
+
+  if (overflow)
+    exponent = negative ? -32768: 32767;
+
+  return exponent;
+}
+
+static StringRef::iterator
+skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
+                           StringRef::iterator *dot)
+{
+  StringRef::iterator p = begin;
+  *dot = end;
+  while (*p == '0' && p != end)
+    p++;
+
+  if (*p == '.') {
+    *dot = p++;
+
+    assert(end - begin != 1 && "Significand has no digits");
+
+    while (*p == '0' && p != end)
+      p++;
+  }
+
+  return p;
+}
+
+/* Given a normal decimal floating point number of the form
+
+     dddd.dddd[eE][+-]ddd
+
+   where the decimal point and exponent are optional, fill out the
+   structure D.  Exponent is appropriate if the significand is
+   treated as an integer, and normalizedExponent if the significand
+   is taken to have the decimal point after a single leading
+   non-zero digit.
+
+   If the value is zero, V->firstSigDigit points to a non-digit, and
+   the return exponent is zero.
+*/
+struct decimalInfo {
+  const char *firstSigDigit;
+  const char *lastSigDigit;
+  int exponent;
+  int normalizedExponent;
+};
+
+static void
+interpretDecimal(StringRef::iterator begin, StringRef::iterator end,
+                 decimalInfo *D)
+{
+  StringRef::iterator dot = end;
+  StringRef::iterator p = skipLeadingZeroesAndAnyDot (begin, end, &dot);
+
+  D->firstSigDigit = p;
+  D->exponent = 0;
+  D->normalizedExponent = 0;
+
+  for (; p != end; ++p) {
+    if (*p == '.') {
+      assert(dot == end && "String contains multiple dots");
+      dot = p++;
+      if (p == end)
+        break;
+    }
+    if (decDigitValue(*p) >= 10U)
+      break;
+  }
+
+  if (p != end) {
+    assert((*p == 'e' || *p == 'E') && "Invalid character in significand");
+    assert(p != begin && "Significand has no digits");
+    assert((dot == end || p - begin != 1) && "Significand has no digits");
+
+    /* p points to the first non-digit in the string */
+    D->exponent = readExponent(p + 1, end);
+
+    /* Implied decimal point?  */
+    if (dot == end)
+      dot = p;
+  }
+
+  /* If number is all zeroes accept any exponent.  */
+  if (p != D->firstSigDigit) {
+    /* Drop insignificant trailing zeroes.  */
+    if (p != begin) {
+      do
+        do
+          p--;
+        while (p != begin && *p == '0');
+      while (p != begin && *p == '.');
+    }
+
+    /* Adjust the exponents for any decimal point.  */
+    D->exponent += static_cast<exponent_t>((dot - p) - (dot > p));
+    D->normalizedExponent = (D->exponent +
+              static_cast<exponent_t>((p - D->firstSigDigit)
+                                      - (dot > D->firstSigDigit && dot < p)));
+  }
+
+  D->lastSigDigit = p;
+}
+
+/* Return the trailing fraction of a hexadecimal number.
+   DIGITVALUE is the first hex digit of the fraction, P points to
+   the next digit.  */
+static lostFraction
+trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
+                            unsigned int digitValue)
+{
+  unsigned int hexDigit;
+
+  /* If the first trailing digit isn't 0 or 8 we can work out the
+     fraction immediately.  */
+  if (digitValue > 8)
+    return lfMoreThanHalf;
+  else if (digitValue < 8 && digitValue > 0)
+    return lfLessThanHalf;
+
+  /* Otherwise we need to find the first non-zero digit.  */
+  while (*p == '0')
+    p++;
+
+  assert(p != end && "Invalid trailing hexadecimal fraction!");
+
+  hexDigit = hexDigitValue(*p);
+
+  /* If we ran off the end it is exactly zero or one-half, otherwise
+     a little more.  */
+  if (hexDigit == -1U)
+    return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
+  else
+    return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
+}
+
+/* Return the fraction lost were a bignum truncated losing the least
+   significant BITS bits.  */
+static lostFraction
+lostFractionThroughTruncation(const integerPart *parts,
+                              unsigned int partCount,
+                              unsigned int bits)
+{
+  unsigned int lsb;
+
+  lsb = APInt::tcLSB(parts, partCount);
+
+  /* Note this is guaranteed true if bits == 0, or LSB == -1U.  */
+  if (bits <= lsb)
+    return lfExactlyZero;
+  if (bits == lsb + 1)
+    return lfExactlyHalf;
+  if (bits <= partCount * integerPartWidth &&
+      APInt::tcExtractBit(parts, bits - 1))
+    return lfMoreThanHalf;
+
+  return lfLessThanHalf;
+}
+
+/* Shift DST right BITS bits noting lost fraction.  */
+static lostFraction
+shiftRight(integerPart *dst, unsigned int parts, unsigned int bits)
+{
+  lostFraction lost_fraction;
+
+  lost_fraction = lostFractionThroughTruncation(dst, parts, bits);
+
+  APInt::tcShiftRight(dst, parts, bits);
+
+  return lost_fraction;
+}
+
+/* Combine the effect of two lost fractions.  */
+static lostFraction
+combineLostFractions(lostFraction moreSignificant,
+                     lostFraction lessSignificant)
+{
+  if (lessSignificant != lfExactlyZero) {
+    if (moreSignificant == lfExactlyZero)
+      moreSignificant = lfLessThanHalf;
+    else if (moreSignificant == lfExactlyHalf)
+      moreSignificant = lfMoreThanHalf;
+  }
+
+  return moreSignificant;
+}
+
+/* The error from the true value, in half-ulps, on multiplying two
+   floating point numbers, which differ from the value they
+   approximate by at most HUE1 and HUE2 half-ulps, is strictly less
+   than the returned value.
+
+   See "How to Read Floating Point Numbers Accurately" by William D
+   Clinger.  */
+static unsigned int
+HUerrBound(bool inexactMultiply, unsigned int HUerr1, unsigned int HUerr2)
+{
+  assert(HUerr1 < 2 || HUerr2 < 2 || (HUerr1 + HUerr2 < 8));
+
+  if (HUerr1 + HUerr2 == 0)
+    return inexactMultiply * 2;  /* <= inexactMultiply half-ulps.  */
+  else
+    return inexactMultiply + 2 * (HUerr1 + HUerr2);
+}
+
+/* The number of ulps from the boundary (zero, or half if ISNEAREST)
+   when the least significant BITS are truncated.  BITS cannot be
+   zero.  */
+static integerPart
+ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest)
+{
+  unsigned int count, partBits;
+  integerPart part, boundary;
+
+  assert(bits != 0);
+
+  bits--;
+  count = bits / integerPartWidth;
+  partBits = bits % integerPartWidth + 1;
+
+  part = parts[count] & (~(integerPart) 0 >> (integerPartWidth - partBits));
+
+  if (isNearest)
+    boundary = (integerPart) 1 << (partBits - 1);
+  else
+    boundary = 0;
+
+  if (count == 0) {
+    if (part - boundary <= boundary - part)
+      return part - boundary;
+    else
+      return boundary - part;
+  }
+
+  if (part == boundary) {
+    while (--count)
+      if (parts[count])
+        return ~(integerPart) 0; /* A lot.  */
+
+    return parts[0];
+  } else if (part == boundary - 1) {
+    while (--count)
+      if (~parts[count])
+        return ~(integerPart) 0; /* A lot.  */
+
+    return -parts[0];
+  }
+
+  return ~(integerPart) 0; /* A lot.  */
+}
+
+/* Place pow(5, power) in DST, and return the number of parts used.
+   DST must be at least one part larger than size of the answer.  */
+static unsigned int
+powerOf5(integerPart *dst, unsigned int power)
+{
+  static const integerPart firstEightPowers[] = { 1, 5, 25, 125, 625, 3125,
+                                                  15625, 78125 };
+  integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
+  pow5s[0] = 78125 * 5;
+
+  unsigned int partsCount[16] = { 1 };
+  integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
+  unsigned int result;
+  assert(power <= maxExponent);
+
+  p1 = dst;
+  p2 = scratch;
+
+  *p1 = firstEightPowers[power & 7];
+  power >>= 3;
+
+  result = 1;
+  pow5 = pow5s;
+
+  for (unsigned int n = 0; power; power >>= 1, n++) {
+    unsigned int pc;
+
+    pc = partsCount[n];
+
+    /* Calculate pow(5,pow(2,n+3)) if we haven't yet.  */
+    if (pc == 0) {
+      pc = partsCount[n - 1];
+      APInt::tcFullMultiply(pow5, pow5 - pc, pow5 - pc, pc, pc);
+      pc *= 2;
+      if (pow5[pc - 1] == 0)
+        pc--;
+      partsCount[n] = pc;
+    }
+
+    if (power & 1) {
+      integerPart *tmp;
+
+      APInt::tcFullMultiply(p2, p1, pow5, result, pc);
+      result += pc;
+      if (p2[result - 1] == 0)
+        result--;
+
+      /* Now result is in p1 with partsCount parts and p2 is scratch
+         space.  */
+      tmp = p1, p1 = p2, p2 = tmp;
+    }
+
+    pow5 += pc;
+  }
+
+  if (p1 != dst)
+    APInt::tcAssign(dst, p1, result);
+
+  return result;
+}
+
+/* Zero at the end to avoid modular arithmetic when adding one; used
+   when rounding up during hexadecimal output.  */
+static const char hexDigitsLower[] = "0123456789abcdef0";
+static const char hexDigitsUpper[] = "0123456789ABCDEF0";
+static const char infinityL[] = "infinity";
+static const char infinityU[] = "INFINITY";
+static const char NaNL[] = "nan";
+static const char NaNU[] = "NAN";
+
+/* Write out an integerPart in hexadecimal, starting with the most
+   significant nibble.  Write out exactly COUNT hexdigits, return
+   COUNT.  */
+static unsigned int
+partAsHex (char *dst, integerPart part, unsigned int count,
+           const char *hexDigitChars)
+{
+  unsigned int result = count;
+
+  assert(count != 0 && count <= integerPartWidth / 4);
+
+  part >>= (integerPartWidth - 4 * count);
+  while (count--) {
+    dst[count] = hexDigitChars[part & 0xf];
+    part >>= 4;
+  }
+
+  return result;
+}
+
+/* Write out an unsigned decimal integer.  */
+static char *
+writeUnsignedDecimal (char *dst, unsigned int n)
+{
+  char buff[40], *p;
+
+  p = buff;
+  do
+    *p++ = '0' + n % 10;
+  while (n /= 10);
+
+  do
+    *dst++ = *--p;
+  while (p != buff);
+
+  return dst;
+}
+
+/* Write out a signed decimal integer.  */
+static char *
+writeSignedDecimal (char *dst, int value)
+{
+  if (value < 0) {
+    *dst++ = '-';
+    dst = writeUnsignedDecimal(dst, -(unsigned) value);
+  } else
+    dst = writeUnsignedDecimal(dst, value);
+
+  return dst;
+}
+
+/* Constructors.  */
+void
+APFloat::initialize(const fltSemantics *ourSemantics)
+{
+  unsigned int count;
+
+  semantics = ourSemantics;
+  count = partCount();
+  if (count > 1)
+    significand.parts = new integerPart[count];
+}
+
+void
+APFloat::freeSignificand()
+{
+  if (partCount() > 1)
+    delete [] significand.parts;
+}
+
+void
+APFloat::assign(const APFloat &rhs)
+{
+  assert(semantics == rhs.semantics);
+
+  sign = rhs.sign;
+  category = rhs.category;
+  exponent = rhs.exponent;
+  sign2 = rhs.sign2;
+  exponent2 = rhs.exponent2;
+  if (category == fcNormal || category == fcNaN)
+    copySignificand(rhs);
+}
+
+void
+APFloat::copySignificand(const APFloat &rhs)
+{
+  assert(category == fcNormal || category == fcNaN);
+  assert(rhs.partCount() >= partCount());
+
+  APInt::tcAssign(significandParts(), rhs.significandParts(),
+                  partCount());
+}
+
+/* Make this number a NaN, with an arbitrary but deterministic value
+   for the significand.  If double or longer, this is a signalling NaN,
+   which may not be ideal.  If float, this is QNaN(0).  */
+void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
+{
+  category = fcNaN;
+  sign = Negative;
+
+  integerPart *significand = significandParts();
+  unsigned numParts = partCount();
+
+  // Set the significand bits to the fill.
+  if (!fill || fill->getNumWords() < numParts)
+    APInt::tcSet(significand, 0, numParts);
+  if (fill) {
+    APInt::tcAssign(significand, fill->getRawData(),
+                    std::min(fill->getNumWords(), numParts));
+
+    // Zero out the excess bits of the significand.
+    unsigned bitsToPreserve = semantics->precision - 1;
+    unsigned part = bitsToPreserve / 64;
+    bitsToPreserve %= 64;
+    significand[part] &= ((1ULL << bitsToPreserve) - 1);
+    for (part++; part != numParts; ++part)
+      significand[part] = 0;
+  }
+
+  unsigned QNaNBit = semantics->precision - 2;
+
+  if (SNaN) {
+    // We always have to clear the QNaN bit to make it an SNaN.
+    APInt::tcClearBit(significand, QNaNBit);
+
+    // If there are no bits set in the payload, we have to set
+    // *something* to make it a NaN instead of an infinity;
+    // conventionally, this is the next bit down from the QNaN bit.
+    if (APInt::tcIsZero(significand, numParts))
+      APInt::tcSetBit(significand, QNaNBit - 1);
+  } else {
+    // We always have to set the QNaN bit to make it a QNaN.
+    APInt::tcSetBit(significand, QNaNBit);
+  }
+
+  // For x87 extended precision, we want to make a NaN, not a
+  // pseudo-NaN.  Maybe we should expose the ability to make
+  // pseudo-NaNs?
+  if (semantics == &APFloat::x87DoubleExtended)
+    APInt::tcSetBit(significand, QNaNBit + 1);
+}
+
+APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
+                         const APInt *fill) {
+  APFloat value(Sem, uninitialized);
+  value.makeNaN(SNaN, Negative, fill);
+  return value;
+}
+
+APFloat &
+APFloat::operator=(const APFloat &rhs)
+{
+  if (this != &rhs) {
+    if (semantics != rhs.semantics) {
+      freeSignificand();
+      initialize(rhs.semantics);
+    }
+    assign(rhs);
+  }
+
+  return *this;
+}
+
+bool
+APFloat::bitwiseIsEqual(const APFloat &rhs) const {
+  if (this == &rhs)
+    return true;
+  if (semantics != rhs.semantics ||
+      category != rhs.category ||
+      sign != rhs.sign)
+    return false;
+  if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
+      sign2 != rhs.sign2)
+    return false;
+  if (category==fcZero || category==fcInfinity)
+    return true;
+  else if (category==fcNormal && exponent!=rhs.exponent)
+    return false;
+  else if (semantics==(const llvm::fltSemantics*)&PPCDoubleDouble &&
+           exponent2!=rhs.exponent2)
+    return false;
+  else {
+    int i= partCount();
+    const integerPart* p=significandParts();
+    const integerPart* q=rhs.significandParts();
+    for (; i>0; i--, p++, q++) {
+      if (*p != *q)
+        return false;
+    }
+    return true;
+  }
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value)
+  : exponent2(0), sign2(0) {
+  assertArithmeticOK(ourSemantics);
+  initialize(&ourSemantics);
+  sign = 0;
+  zeroSignificand();
+  exponent = ourSemantics.precision - 1;
+  significandParts()[0] = value;
+  normalize(rmNearestTiesToEven, lfExactlyZero);
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics) : exponent2(0), sign2(0) {
+  assertArithmeticOK(ourSemantics);
+  initialize(&ourSemantics);
+  category = fcZero;
+  sign = false;
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
+  : exponent2(0), sign2(0) {
+  assertArithmeticOK(ourSemantics);
+  // Allocates storage if necessary but does not initialize it.
+  initialize(&ourSemantics);
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics,
+                 fltCategory ourCategory, bool negative)
+  : exponent2(0), sign2(0) {
+  assertArithmeticOK(ourSemantics);
+  initialize(&ourSemantics);
+  category = ourCategory;
+  sign = negative;
+  if (category == fcNormal)
+    category = fcZero;
+  else if (ourCategory == fcNaN)
+    makeNaN();
+}
+
+APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text)
+  : exponent2(0), sign2(0) {
+  assertArithmeticOK(ourSemantics);
+  initialize(&ourSemantics);
+  convertFromString(text, rmNearestTiesToEven);
+}
+
+APFloat::APFloat(const APFloat &rhs) : exponent2(0), sign2(0) {
+  initialize(rhs.semantics);
+  assign(rhs);
+}
+
+APFloat::~APFloat()
+{
+  freeSignificand();
+}
+
+// Profile - This method 'profiles' an APFloat for use with FoldingSet.
+void APFloat::Profile(FoldingSetNodeID& ID) const {
+  ID.Add(bitcastToAPInt());
+}
+
+unsigned int
+APFloat::partCount() const
+{
+  return partCountForBits(semantics->precision + 1);
+}
+
+unsigned int
+APFloat::semanticsPrecision(const fltSemantics &semantics)
+{
+  return semantics.precision;
+}
+
+const integerPart *
+APFloat::significandParts() const
+{
+  return const_cast<APFloat *>(this)->significandParts();
+}
+
+integerPart *
+APFloat::significandParts()
+{
+  assert(category == fcNormal || category == fcNaN);
+
+  if (partCount() > 1)
+    return significand.parts;
+  else
+    return &significand.part;
+}
+
+void
+APFloat::zeroSignificand()
+{
+  category = fcNormal;
+  APInt::tcSet(significandParts(), 0, partCount());
+}
+
+/* Increment an fcNormal floating point number's significand.  */
+void
+APFloat::incrementSignificand()
+{
+  integerPart carry;
+
+  carry = APInt::tcIncrement(significandParts(), partCount());
+
+  /* Our callers should never cause us to overflow.  */
+  assert(carry == 0);
+}
+
+/* Add the significand of the RHS.  Returns the carry flag.  */
+integerPart
+APFloat::addSignificand(const APFloat &rhs)
+{
+  integerPart *parts;
+
+  parts = significandParts();
+
+  assert(semantics == rhs.semantics);
+  assert(exponent == rhs.exponent);
+
+  return APInt::tcAdd(parts, rhs.significandParts(), 0, partCount());
+}
+
+/* Subtract the significand of the RHS with a borrow flag.  Returns
+   the borrow flag.  */
+integerPart
+APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
+{
+  integerPart *parts;
+
+  parts = significandParts();
+
+  assert(semantics == rhs.semantics);
+  assert(exponent == rhs.exponent);
+
+  return APInt::tcSubtract(parts, rhs.significandParts(), borrow,
+                           partCount());
+}
+
+/* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
+   on to the full-precision result of the multiplication.  Returns the
+   lost fraction.  */
+lostFraction
+APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
+{
+  unsigned int omsb;        // One, not zero, based MSB.
+  unsigned int partsCount, newPartsCount, precision;
+  integerPart *lhsSignificand;
+  integerPart scratch[4];
+  integerPart *fullSignificand;
+  lostFraction lost_fraction;
+  bool ignored;
+
+  assert(semantics == rhs.semantics);
+
+  precision = semantics->precision;
+  newPartsCount = partCountForBits(precision * 2);
+
+  if (newPartsCount > 4)
+    fullSignificand = new integerPart[newPartsCount];
+  else
+    fullSignificand = scratch;
+
+  lhsSignificand = significandParts();
+  partsCount = partCount();
+
+  APInt::tcFullMultiply(fullSignificand, lhsSignificand,
+                        rhs.significandParts(), partsCount, partsCount);
+
+  lost_fraction = lfExactlyZero;
+  omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
+  exponent += rhs.exponent;
+
+  if (addend) {
+    Significand savedSignificand = significand;
+    const fltSemantics *savedSemantics = semantics;
+    fltSemantics extendedSemantics;
+    opStatus status;
+    unsigned int extendedPrecision;
+
+    /* Normalize our MSB.  */
+    extendedPrecision = precision + precision - 1;
+    if (omsb != extendedPrecision) {
+      APInt::tcShiftLeft(fullSignificand, newPartsCount,
+                         extendedPrecision - omsb);
+      exponent -= extendedPrecision - omsb;
+    }
+
+    /* Create new semantics.  */
+    extendedSemantics = *semantics;
+    extendedSemantics.precision = extendedPrecision;
+
+    if (newPartsCount == 1)
+      significand.part = fullSignificand[0];
+    else
+      significand.parts = fullSignificand;
+    semantics = &extendedSemantics;
+
+    APFloat extendedAddend(*addend);
+    status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
+    assert(status == opOK);
+    lost_fraction = addOrSubtractSignificand(extendedAddend, false);
+
+    /* Restore our state.  */
+    if (newPartsCount == 1)
+      fullSignificand[0] = significand.part;
+    significand = savedSignificand;
+    semantics = savedSemantics;
+
+    omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
+  }
+
+  exponent -= (precision - 1);
+
+  if (omsb > precision) {
+    unsigned int bits, significantParts;
+    lostFraction lf;
+
+    bits = omsb - precision;
+    significantParts = partCountForBits(omsb);
+    lf = shiftRight(fullSignificand, significantParts, bits);
+    lost_fraction = combineLostFractions(lf, lost_fraction);
+    exponent += bits;
+  }
+
+  APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
+
+  if (newPartsCount > 4)
+    delete [] fullSignificand;
+
+  return lost_fraction;
+}
+
+/* Multiply the significands of LHS and RHS to DST.  */
+lostFraction
+APFloat::divideSignificand(const APFloat &rhs)
+{
+  unsigned int bit, i, partsCount;
+  const integerPart *rhsSignificand;
+  integerPart *lhsSignificand, *dividend, *divisor;
+  integerPart scratch[4];
+  lostFraction lost_fraction;
+
+  assert(semantics == rhs.semantics);
+
+  lhsSignificand = significandParts();
+  rhsSignificand = rhs.significandParts();
+  partsCount = partCount();
+
+  if (partsCount > 2)
+    dividend = new integerPart[partsCount * 2];
+  else
+    dividend = scratch;
+
+  divisor = dividend + partsCount;
+
+  /* Copy the dividend and divisor as they will be modified in-place.  */
+  for (i = 0; i < partsCount; i++) {
+    dividend[i] = lhsSignificand[i];
+    divisor[i] = rhsSignificand[i];
+    lhsSignificand[i] = 0;
+  }
+
+  exponent -= rhs.exponent;
+
+  unsigned int precision = semantics->precision;
+
+  /* Normalize the divisor.  */
+  bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
+  if (bit) {
+    exponent += bit;
+    APInt::tcShiftLeft(divisor, partsCount, bit);
+  }
+
+  /* Normalize the dividend.  */
+  bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
+  if (bit) {
+    exponent -= bit;
+    APInt::tcShiftLeft(dividend, partsCount, bit);
+  }
+
+  /* Ensure the dividend >= divisor initially for the loop below.
+     Incidentally, this means that the division loop below is
+     guaranteed to set the integer bit to one.  */
+  if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
+    exponent--;
+    APInt::tcShiftLeft(dividend, partsCount, 1);
+    assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
+  }
+
+  /* Long division.  */
+  for (bit = precision; bit; bit -= 1) {
+    if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
+      APInt::tcSubtract(dividend, divisor, 0, partsCount);
+      APInt::tcSetBit(lhsSignificand, bit - 1);
+    }
+
+    APInt::tcShiftLeft(dividend, partsCount, 1);
+  }
+
+  /* Figure out the lost fraction.  */
+  int cmp = APInt::tcCompare(dividend, divisor, partsCount);
+
+  if (cmp > 0)
+    lost_fraction = lfMoreThanHalf;
+  else if (cmp == 0)
+    lost_fraction = lfExactlyHalf;
+  else if (APInt::tcIsZero(dividend, partsCount))
+    lost_fraction = lfExactlyZero;
+  else
+    lost_fraction = lfLessThanHalf;
+
+  if (partsCount > 2)
+    delete [] dividend;
+
+  return lost_fraction;
+}
+
+unsigned int
+APFloat::significandMSB() const
+{
+  return APInt::tcMSB(significandParts(), partCount());
+}
+
+unsigned int
+APFloat::significandLSB() const
+{
+  return APInt::tcLSB(significandParts(), partCount());
+}
+
+/* Note that a zero result is NOT normalized to fcZero.  */
+lostFraction
+APFloat::shiftSignificandRight(unsigned int bits)
+{
+  /* Our exponent should not overflow.  */
+  assert((exponent_t) (exponent + bits) >= exponent);
+
+  exponent += bits;
+
+  return shiftRight(significandParts(), partCount(), bits);
+}
+
+/* Shift the significand left BITS bits, subtract BITS from its exponent.  */
+void
+APFloat::shiftSignificandLeft(unsigned int bits)
+{
+  assert(bits < semantics->precision);
+
+  if (bits) {
+    unsigned int partsCount = partCount();
+
+    APInt::tcShiftLeft(significandParts(), partsCount, bits);
+    exponent -= bits;
+
+    assert(!APInt::tcIsZero(significandParts(), partsCount));
+  }
+}
+
+APFloat::cmpResult
+APFloat::compareAbsoluteValue(const APFloat &rhs) const
+{
+  int compare;
+
+  assert(semantics == rhs.semantics);
+  assert(category == fcNormal);
+  assert(rhs.category == fcNormal);
+
+  compare = exponent - rhs.exponent;
+
+  /* If exponents are equal, do an unsigned bignum comparison of the
+     significands.  */
+  if (compare == 0)
+    compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
+                               partCount());
+
+  if (compare > 0)
+    return cmpGreaterThan;
+  else if (compare < 0)
+    return cmpLessThan;
+  else
+    return cmpEqual;
+}
+
+/* Handle overflow.  Sign is preserved.  We either become infinity or
+   the largest finite number.  */
+APFloat::opStatus
+APFloat::handleOverflow(roundingMode rounding_mode)
+{
+  /* Infinity?  */
+  if (rounding_mode == rmNearestTiesToEven ||
+      rounding_mode == rmNearestTiesToAway ||
+      (rounding_mode == rmTowardPositive && !sign) ||
+      (rounding_mode == rmTowardNegative && sign)) {
+    category = fcInfinity;
+    return (opStatus) (opOverflow | opInexact);
+  }
+
+  /* Otherwise we become the largest finite number.  */
+  category = fcNormal;
+  exponent = semantics->maxExponent;
+  APInt::tcSetLeastSignificantBits(significandParts(), partCount(),
+                                   semantics->precision);
+
+  return opInexact;
+}
+
+/* Returns TRUE if, when truncating the current number, with BIT the
+   new LSB, with the given lost fraction and rounding mode, the result
+   would need to be rounded away from zero (i.e., by increasing the
+   signficand).  This routine must work for fcZero of both signs, and
+   fcNormal numbers.  */
+bool
+APFloat::roundAwayFromZero(roundingMode rounding_mode,
+                           lostFraction lost_fraction,
+                           unsigned int bit) const
+{
+  /* NaNs and infinities should not have lost fractions.  */
+  assert(category == fcNormal || category == fcZero);
+
+  /* Current callers never pass this so we don't handle it.  */
+  assert(lost_fraction != lfExactlyZero);
+
+  switch (rounding_mode) {
+  default:
+    llvm_unreachable(0);
+
+  case rmNearestTiesToAway:
+    return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
+
+  case rmNearestTiesToEven:
+    if (lost_fraction == lfMoreThanHalf)
+      return true;
+
+    /* Our zeroes don't have a significand to test.  */
+    if (lost_fraction == lfExactlyHalf && category != fcZero)
+      return APInt::tcExtractBit(significandParts(), bit);
+
+    return false;
+
+  case rmTowardZero:
+    return false;
+
+  case rmTowardPositive:
+    return sign == false;
+
+  case rmTowardNegative:
+    return sign == true;
+  }
+}
+
+APFloat::opStatus
+APFloat::normalize(roundingMode rounding_mode,
+                   lostFraction lost_fraction)
+{
+  unsigned int omsb;                /* One, not zero, based MSB.  */
+  int exponentChange;
+
+  if (category != fcNormal)
+    return opOK;
+
+  /* Before rounding normalize the exponent of fcNormal numbers.  */
+  omsb = significandMSB() + 1;
+
+  if (omsb) {
+    /* OMSB is numbered from 1.  We want to place it in the integer
+       bit numbered PRECISON if possible, with a compensating change in
+       the exponent.  */
+    exponentChange = omsb - semantics->precision;
+
+    /* If the resulting exponent is too high, overflow according to
+       the rounding mode.  */
+    if (exponent + exponentChange > semantics->maxExponent)
+      return handleOverflow(rounding_mode);
+
+    /* Subnormal numbers have exponent minExponent, and their MSB
+       is forced based on that.  */
+    if (exponent + exponentChange < semantics->minExponent)
+      exponentChange = semantics->minExponent - exponent;
+
+    /* Shifting left is easy as we don't lose precision.  */
+    if (exponentChange < 0) {
+      assert(lost_fraction == lfExactlyZero);
+
+      shiftSignificandLeft(-exponentChange);
+
+      return opOK;
+    }
+
+    if (exponentChange > 0) {
+      lostFraction lf;
+
+      /* Shift right and capture any new lost fraction.  */
+      lf = shiftSignificandRight(exponentChange);
+
+      lost_fraction = combineLostFractions(lf, lost_fraction);
+
+      /* Keep OMSB up-to-date.  */
+      if (omsb > (unsigned) exponentChange)
+        omsb -= exponentChange;
+      else
+        omsb = 0;
+    }
+  }
+
+  /* Now round the number according to rounding_mode given the lost
+     fraction.  */
+
+  /* As specified in IEEE 754, since we do not trap we do not report
+     underflow for exact results.  */
+  if (lost_fraction == lfExactlyZero) {
+    /* Canonicalize zeroes.  */
+    if (omsb == 0)
+      category = fcZero;
+
+    return opOK;
+  }
+
+  /* Increment the significand if we're rounding away from zero.  */
+  if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
+    if (omsb == 0)
+      exponent = semantics->minExponent;
+
+    incrementSignificand();
+    omsb = significandMSB() + 1;
+
+    /* Did the significand increment overflow?  */
+    if (omsb == (unsigned) semantics->precision + 1) {
+      /* Renormalize by incrementing the exponent and shifting our
+         significand right one.  However if we already have the
+         maximum exponent we overflow to infinity.  */
+      if (exponent == semantics->maxExponent) {
+        category = fcInfinity;
+
+        return (opStatus) (opOverflow | opInexact);
+      }
+
+      shiftSignificandRight(1);
+
+      return opInexact;
+    }
+  }
+
+  /* The normal case - we were and are not denormal, and any
+     significand increment above didn't overflow.  */
+  if (omsb == semantics->precision)
+    return opInexact;
+
+  /* We have a non-zero denormal.  */
+  assert(omsb < semantics->precision);
+
+  /* Canonicalize zeroes.  */
+  if (omsb == 0)
+    category = fcZero;
+
+  /* The fcZero case is a denormal that underflowed to zero.  */
+  return (opStatus) (opUnderflow | opInexact);
+}
+
+APFloat::opStatus
+APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
+{
+  switch (convolve(category, rhs.category)) {
+  default:
+    llvm_unreachable(0);
+
+  case convolve(fcNaN, fcZero):
+  case convolve(fcNaN, fcNormal):
+  case convolve(fcNaN, fcInfinity):
+  case convolve(fcNaN, fcNaN):
+  case convolve(fcNormal, fcZero):
+  case convolve(fcInfinity, fcNormal):
+  case convolve(fcInfinity, fcZero):
+    return opOK;
+
+  case convolve(fcZero, fcNaN):
+  case convolve(fcNormal, fcNaN):
+  case convolve(fcInfinity, fcNaN):
+    category = fcNaN;
+    copySignificand(rhs);
+    return opOK;
+
+  case convolve(fcNormal, fcInfinity):
+  case convolve(fcZero, fcInfinity):
+    category = fcInfinity;
+    sign = rhs.sign ^ subtract;
+    return opOK;
+
+  case convolve(fcZero, fcNormal):
+    assign(rhs);
+    sign = rhs.sign ^ subtract;
+    return opOK;
+
+  case convolve(fcZero, fcZero):
+    /* Sign depends on rounding mode; handled by caller.  */
+    return opOK;
+
+  case convolve(fcInfinity, fcInfinity):
+    /* Differently signed infinities can only be validly
+       subtracted.  */
+    if (((sign ^ rhs.sign)!=0) != subtract) {
+      makeNaN();
+      return opInvalidOp;
+    }
+
+    return opOK;
+
+  case convolve(fcNormal, fcNormal):
+    return opDivByZero;
+  }
+}
+
+/* Add or subtract two normal numbers.  */
+lostFraction
+APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
+{
+  integerPart carry;
+  lostFraction lost_fraction;
+  int bits;
+
+  /* Determine if the operation on the absolute values is effectively
+     an addition or subtraction.  */
+  subtract ^= (sign ^ rhs.sign) ? true : false;
+
+  /* Are we bigger exponent-wise than the RHS?  */
+  bits = exponent - rhs.exponent;
+
+  /* Subtraction is more subtle than one might naively expect.  */
+  if (subtract) {
+    APFloat temp_rhs(rhs);
+    bool reverse;
+
+    if (bits == 0) {
+      reverse = compareAbsoluteValue(temp_rhs) == cmpLessThan;
+      lost_fraction = lfExactlyZero;
+    } else if (bits > 0) {
+      lost_fraction = temp_rhs.shiftSignificandRight(bits - 1);
+      shiftSignificandLeft(1);
+      reverse = false;
+    } else {
+      lost_fraction = shiftSignificandRight(-bits - 1);
+      temp_rhs.shiftSignificandLeft(1);
+      reverse = true;
+    }
+
+    if (reverse) {
+      carry = temp_rhs.subtractSignificand
+        (*this, lost_fraction != lfExactlyZero);
+      copySignificand(temp_rhs);
+      sign = !sign;
+    } else {
+      carry = subtractSignificand
+        (temp_rhs, lost_fraction != lfExactlyZero);
+    }
+
+    /* Invert the lost fraction - it was on the RHS and
+       subtracted.  */
+    if (lost_fraction == lfLessThanHalf)
+      lost_fraction = lfMoreThanHalf;
+    else if (lost_fraction == lfMoreThanHalf)
+      lost_fraction = lfLessThanHalf;
+
+    /* The code above is intended to ensure that no borrow is
+       necessary.  */
+    assert(!carry);
+  } else {
+    if (bits > 0) {
+      APFloat temp_rhs(rhs);
+
+      lost_fraction = temp_rhs.shiftSignificandRight(bits);
+      carry = addSignificand(temp_rhs);
+    } else {
+      lost_fraction = shiftSignificandRight(-bits);
+      carry = addSignificand(rhs);
+    }
+
+    /* We have a guard bit; generating a carry cannot happen.  */
+    assert(!carry);
+  }
+
+  return lost_fraction;
+}
+
+APFloat::opStatus
+APFloat::multiplySpecials(const APFloat &rhs)
+{
+  switch (convolve(category, rhs.category)) {
+  default:
+    llvm_unreachable(0);
+
+  case convolve(fcNaN, fcZero):
+  case convolve(fcNaN, fcNormal):
+  case convolve(fcNaN, fcInfinity):
+  case convolve(fcNaN, fcNaN):
+    return opOK;
+
+  case convolve(fcZero, fcNaN):
+  case convolve(fcNormal, fcNaN):
+  case convolve(fcInfinity, fcNaN):
+    category = fcNaN;
+    copySignificand(rhs);
+    return opOK;
+
+  case convolve(fcNormal, fcInfinity):
+  case convolve(fcInfinity, fcNormal):
+  case convolve(fcInfinity, fcInfinity):
+    category = fcInfinity;
+    return opOK;
+
+  case convolve(fcZero, fcNormal):
+  case convolve(fcNormal, fcZero):
+  case convolve(fcZero, fcZero):
+    category = fcZero;
+    return opOK;
+
+  case convolve(fcZero, fcInfinity):
+  case convolve(fcInfinity, fcZero):
+    makeNaN();
+    return opInvalidOp;
+
+  case convolve(fcNormal, fcNormal):
+    return opOK;
+  }
+}
+
+APFloat::opStatus
+APFloat::divideSpecials(const APFloat &rhs)
+{
+  switch (convolve(category, rhs.category)) {
+  default:
+    llvm_unreachable(0);
+
+  case convolve(fcNaN, fcZero):
+  case convolve(fcNaN, fcNormal):
+  case convolve(fcNaN, fcInfinity):
+  case convolve(fcNaN, fcNaN):
+  case convolve(fcInfinity, fcZero):
+  case convolve(fcInfinity, fcNormal):
+  case convolve(fcZero, fcInfinity):
+  case convolve(fcZero, fcNormal):
+    return opOK;
+
+  case convolve(fcZero, fcNaN):
+  case convolve(fcNormal, fcNaN):
+  case convolve(fcInfinity, fcNaN):
+    category = fcNaN;
+    copySignificand(rhs);
+    return opOK;
+
+  case convolve(fcNormal, fcInfinity):
+    category = fcZero;
+    return opOK;
+
+  case convolve(fcNormal, fcZero):
+    category = fcInfinity;
+    return opDivByZero;
+
+  case convolve(fcInfinity, fcInfinity):
+  case convolve(fcZero, fcZero):
+    makeNaN();
+    return opInvalidOp;
+
+  case convolve(fcNormal, fcNormal):
+    return opOK;
+  }
+}
+
+APFloat::opStatus
+APFloat::modSpecials(const APFloat &rhs)
+{
+  switch (convolve(category, rhs.category)) {
+  default:
+    llvm_unreachable(0);
+
+  case convolve(fcNaN, fcZero):
+  case convolve(fcNaN, fcNormal):
+  case convolve(fcNaN, fcInfinity):
+  case convolve(fcNaN, fcNaN):
+  case convolve(fcZero, fcInfinity):
+  case convolve(fcZero, fcNormal):
+  case convolve(fcNormal, fcInfinity):
+    return opOK;
+
+  case convolve(fcZero, fcNaN):
+  case convolve(fcNormal, fcNaN):
+  case convolve(fcInfinity, fcNaN):
+    category = fcNaN;
+    copySignificand(rhs);
+    return opOK;
+
+  case convolve(fcNormal, fcZero):
+  case convolve(fcInfinity, fcZero):
+  case convolve(fcInfinity, fcNormal):
+  case convolve(fcInfinity, fcInfinity):
+  case convolve(fcZero, fcZero):
+    makeNaN();
+    return opInvalidOp;
+
+  case convolve(fcNormal, fcNormal):
+    return opOK;
+  }
+}
+
+/* Change sign.  */
+void
+APFloat::changeSign()
+{
+  /* Look mummy, this one's easy.  */
+  sign = !sign;
+}
+
+void
+APFloat::clearSign()
+{
+  /* So is this one. */
+  sign = 0;
+}
+
+void
+APFloat::copySign(const APFloat &rhs)
+{
+  /* And this one. */
+  sign = rhs.sign;
+}
+
+/* Normalized addition or subtraction.  */
+APFloat::opStatus
+APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
+                       bool subtract)
+{
+  opStatus fs;
+
+  assertArithmeticOK(*semantics);
+
+  fs = addOrSubtractSpecials(rhs, subtract);
+
+  /* This return code means it was not a simple case.  */
+  if (fs == opDivByZero) {
+    lostFraction lost_fraction;
+
+    lost_fraction = addOrSubtractSignificand(rhs, subtract);
+    fs = normalize(rounding_mode, lost_fraction);
+
+    /* Can only be zero if we lost no fraction.  */
+    assert(category != fcZero || lost_fraction == lfExactlyZero);
+  }
+
+  /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
+     positive zero unless rounding to minus infinity, except that
+     adding two like-signed zeroes gives that zero.  */
+  if (category == fcZero) {
+    if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
+      sign = (rounding_mode == rmTowardNegative);
+  }
+
+  return fs;
+}
+
+/* Normalized addition.  */
+APFloat::opStatus
+APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
+{
+  return addOrSubtract(rhs, rounding_mode, false);
+}
+
+/* Normalized subtraction.  */
+APFloat::opStatus
+APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
+{
+  return addOrSubtract(rhs, rounding_mode, true);
+}
+
+/* Normalized multiply.  */
+APFloat::opStatus
+APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
+{
+  opStatus fs;
+
+  assertArithmeticOK(*semantics);
+  sign ^= rhs.sign;
+  fs = multiplySpecials(rhs);
+
+  if (category == fcNormal) {
+    lostFraction lost_fraction = multiplySignificand(rhs, 0);
+    fs = normalize(rounding_mode, lost_fraction);
+    if (lost_fraction != lfExactlyZero)
+      fs = (opStatus) (fs | opInexact);
+  }
+
+  return fs;
+}
+
+/* Normalized divide.  */
+APFloat::opStatus
+APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
+{
+  opStatus fs;
+
+  assertArithmeticOK(*semantics);
+  sign ^= rhs.sign;
+  fs = divideSpecials(rhs);
+
+  if (category == fcNormal) {
+    lostFraction lost_fraction = divideSignificand(rhs);
+    fs = normalize(rounding_mode, lost_fraction);
+    if (lost_fraction != lfExactlyZero)
+      fs = (opStatus) (fs | opInexact);
+  }
+
+  return fs;
+}
+
+/* Normalized remainder.  This is not currently correct in all cases.  */
+APFloat::opStatus
+APFloat::remainder(const APFloat &rhs)
+{
+  opStatus fs;
+  APFloat V = *this;
+  unsigned int origSign = sign;
+
+  assertArithmeticOK(*semantics);
+  fs = V.divide(rhs, rmNearestTiesToEven);
+  if (fs == opDivByZero)
+    return fs;
+
+  int parts = partCount();
+  integerPart *x = new integerPart[parts];
+  bool ignored;
+  fs = V.convertToInteger(x, parts * integerPartWidth, true,
+                          rmNearestTiesToEven, &ignored);
+  if (fs==opInvalidOp)
+    return fs;
+
+  fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
+                                        rmNearestTiesToEven);
+  assert(fs==opOK);   // should always work
+
+  fs = V.multiply(rhs, rmNearestTiesToEven);
+  assert(fs==opOK || fs==opInexact);   // should not overflow or underflow
+
+  fs = subtract(V, rmNearestTiesToEven);
+  assert(fs==opOK || fs==opInexact);   // likewise
+
+  if (isZero())
+    sign = origSign;    // IEEE754 requires this
+  delete[] x;
+  return fs;
+}
+
+/* Normalized llvm frem (C fmod).
+   This is not currently correct in all cases.  */
+APFloat::opStatus
+APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
+{
+  opStatus fs;
+  assertArithmeticOK(*semantics);
+  fs = modSpecials(rhs);
+
+  if (category == fcNormal && rhs.category == fcNormal) {
+    APFloat V = *this;
+    unsigned int origSign = sign;
+
+    fs = V.divide(rhs, rmNearestTiesToEven);
+    if (fs == opDivByZero)
+      return fs;
+
+    int parts = partCount();
+    integerPart *x = new integerPart[parts];
+    bool ignored;
+    fs = V.convertToInteger(x, parts * integerPartWidth, true,
+                            rmTowardZero, &ignored);
+    if (fs==opInvalidOp)
+      return fs;
+
+    fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
+                                          rmNearestTiesToEven);
+    assert(fs==opOK);   // should always work
+
+    fs = V.multiply(rhs, rounding_mode);
+    assert(fs==opOK || fs==opInexact);   // should not overflow or underflow
+
+    fs = subtract(V, rounding_mode);
+    assert(fs==opOK || fs==opInexact);   // likewise
+
+    if (isZero())
+      sign = origSign;    // IEEE754 requires this
+    delete[] x;
+  }
+  return fs;
+}
+
+/* Normalized fused-multiply-add.  */
+APFloat::opStatus
+APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
+                          const APFloat &addend,
+                          roundingMode rounding_mode)
+{
+  opStatus fs;
+
+  assertArithmeticOK(*semantics);
+
+  /* Post-multiplication sign, before addition.  */
+  sign ^= multiplicand.sign;
+
+  /* If and only if all arguments are normal do we need to do an
+     extended-precision calculation.  */
+  if (category == fcNormal &&
+      multiplicand.category == fcNormal &&
+      addend.category == fcNormal) {
+    lostFraction lost_fraction;
+
+    lost_fraction = multiplySignificand(multiplicand, &addend);
+    fs = normalize(rounding_mode, lost_fraction);
+    if (lost_fraction != lfExactlyZero)
+      fs = (opStatus) (fs | opInexact);
+
+    /* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
+       positive zero unless rounding to minus infinity, except that
+       adding two like-signed zeroes gives that zero.  */
+    if (category == fcZero && sign != addend.sign)
+      sign = (rounding_mode == rmTowardNegative);
+  } else {
+    fs = multiplySpecials(multiplicand);
+
+    /* FS can only be opOK or opInvalidOp.  There is no more work
+       to do in the latter case.  The IEEE-754R standard says it is
+       implementation-defined in this case whether, if ADDEND is a
+       quiet NaN, we raise invalid op; this implementation does so.
+
+       If we need to do the addition we can do so with normal
+       precision.  */
+    if (fs == opOK)
+      fs = addOrSubtract(addend, rounding_mode, false);
+  }
+
+  return fs;
+}
+
+/* Comparison requires normalized numbers.  */
+APFloat::cmpResult
+APFloat::compare(const APFloat &rhs) const
+{
+  cmpResult result;
+
+  assertArithmeticOK(*semantics);
+  assert(semantics == rhs.semantics);
+
+  switch (convolve(category, rhs.category)) {
+  default:
+    llvm_unreachable(0);
+
+  case convolve(fcNaN, fcZero):
+  case convolve(fcNaN, fcNormal):
+  case convolve(fcNaN, fcInfinity):
+  case convolve(fcNaN, fcNaN):
+  case convolve(fcZero, fcNaN):
+  case convolve(fcNormal, fcNaN):
+  case convolve(fcInfinity, fcNaN):
+    return cmpUnordered;
+
+  case convolve(fcInfinity, fcNormal):
+  case convolve(fcInfinity, fcZero):
+  case convolve(fcNormal, fcZero):
+    if (sign)
+      return cmpLessThan;
+    else
+      return cmpGreaterThan;
+
+  case convolve(fcNormal, fcInfinity):
+  case convolve(fcZero, fcInfinity):
+  case convolve(fcZero, fcNormal):
+    if (rhs.sign)
+      return cmpGreaterThan;
+    else
+      return cmpLessThan;
+
+  case convolve(fcInfinity, fcInfinity):
+    if (sign == rhs.sign)
+      return cmpEqual;
+    else if (sign)
+      return cmpLessThan;
+    else
+      return cmpGreaterThan;
+
+  case convolve(fcZero, fcZero):
+    return cmpEqual;
+
+  case convolve(fcNormal, fcNormal):
+    break;
+  }
+
+  /* Two normal numbers.  Do they have the same sign?  */
+  if (sign != rhs.sign) {
+    if (sign)
+      result = cmpLessThan;
+    else
+      result = cmpGreaterThan;
+  } else {
+    /* Compare absolute values; invert result if negative.  */
+    result = compareAbsoluteValue(rhs);
+
+    if (sign) {
+      if (result == cmpLessThan)
+        result = cmpGreaterThan;
+      else if (result == cmpGreaterThan)
+        result = cmpLessThan;
+    }
+  }
+
+  return result;
+}
+
+/// APFloat::convert - convert a value of one floating point type to another.
+/// The return value corresponds to the IEEE754 exceptions.  *losesInfo
+/// records whether the transformation lost information, i.e. whether
+/// converting the result back to the original type will produce the
+/// original value (this is almost the same as return value==fsOK, but there
+/// are edge cases where this is not so).
+
+APFloat::opStatus
+APFloat::convert(const fltSemantics &toSemantics,
+                 roundingMode rounding_mode, bool *losesInfo)
+{
+  lostFraction lostFraction;
+  unsigned int newPartCount, oldPartCount;
+  opStatus fs;
+
+  assertArithmeticOK(*semantics);
+  assertArithmeticOK(toSemantics);
+  lostFraction = lfExactlyZero;
+  newPartCount = partCountForBits(toSemantics.precision + 1);
+  oldPartCount = partCount();
+
+  /* Handle storage complications.  If our new form is wider,
+     re-allocate our bit pattern into wider storage.  If it is
+     narrower, we ignore the excess parts, but if narrowing to a
+     single part we need to free the old storage.
+     Be careful not to reference significandParts for zeroes
+     and infinities, since it aborts.  */
+  if (newPartCount > oldPartCount) {
+    integerPart *newParts;
+    newParts = new integerPart[newPartCount];
+    APInt::tcSet(newParts, 0, newPartCount);
+    if (category==fcNormal || category==fcNaN)
+      APInt::tcAssign(newParts, significandParts(), oldPartCount);
+    freeSignificand();
+    significand.parts = newParts;
+  } else if (newPartCount < oldPartCount) {
+    /* Capture any lost fraction through truncation of parts so we get
+       correct rounding whilst normalizing.  */
+    if (category==fcNormal)
+      lostFraction = lostFractionThroughTruncation
+        (significandParts(), oldPartCount, toSemantics.precision);
+    if (newPartCount == 1) {
+        integerPart newPart = 0;
+        if (category==fcNormal || category==fcNaN)
+          newPart = significandParts()[0];
+        freeSignificand();
+        significand.part = newPart;
+    }
+  }
+
+  if (category == fcNormal) {
+    /* Re-interpret our bit-pattern.  */
+    exponent += toSemantics.precision - semantics->precision;
+    semantics = &toSemantics;
+    fs = normalize(rounding_mode, lostFraction);
+    *losesInfo = (fs != opOK);
+  } else if (category == fcNaN) {
+    int shift = toSemantics.precision - semantics->precision;
+    // Do this now so significandParts gets the right answer
+    const fltSemantics *oldSemantics = semantics;
+    semantics = &toSemantics;
+    *losesInfo = false;
+    // No normalization here, just truncate
+    if (shift>0)
+      APInt::tcShiftLeft(significandParts(), newPartCount, shift);
+    else if (shift < 0) {
+      unsigned ushift = -shift;
+      // Figure out if we are losing information.  This happens
+      // if are shifting out something other than 0s, or if the x87 long
+      // double input did not have its integer bit set (pseudo-NaN), or if the
+      // x87 long double input did not have its QNan bit set (because the x87
+      // hardware sets this bit when converting a lower-precision NaN to
+      // x87 long double).
+      if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
+        *losesInfo = true;
+      if (oldSemantics == &APFloat::x87DoubleExtended &&
+          (!(*significandParts() & 0x8000000000000000ULL) ||
+           !(*significandParts() & 0x4000000000000000ULL)))
+        *losesInfo = true;
+      APInt::tcShiftRight(significandParts(), newPartCount, ushift);
+    }
+    // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
+    // does not give you back the same bits.  This is dubious, and we
+    // don't currently do it.  You're really supposed to get
+    // an invalid operation signal at runtime, but nobody does that.
+    fs = opOK;
+  } else {
+    semantics = &toSemantics;
+    fs = opOK;
+    *losesInfo = false;
+  }
+
+  return fs;
+}
+
+/* Convert a floating point number to an integer according to the
+   rounding mode.  If the rounded integer value is out of range this
+   returns an invalid operation exception and the contents of the
+   destination parts are unspecified.  If the rounded value is in
+   range but the floating point number is not the exact integer, the C
+   standard doesn't require an inexact exception to be raised.  IEEE
+   854 does require it so we do that.
+
+   Note that for conversions to integer type the C standard requires
+   round-to-zero to always be used.  */
+APFloat::opStatus
+APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
+                                      bool isSigned,
+                                      roundingMode rounding_mode,
+                                      bool *isExact) const
+{
+  lostFraction lost_fraction;
+  const integerPart *src;
+  unsigned int dstPartsCount, truncatedBits;
+
+  assertArithmeticOK(*semantics);
+
+  *isExact = false;
+
+  /* Handle the three special cases first.  */
+  if (category == fcInfinity || category == fcNaN)
+    return opInvalidOp;
+
+  dstPartsCount = partCountForBits(width);
+
+  if (category == fcZero) {
+    APInt::tcSet(parts, 0, dstPartsCount);
+    // Negative zero can't be represented as an int.
+    *isExact = !sign;
+    return opOK;
+  }
+
+  src = significandParts();
+
+  /* Step 1: place our absolute value, with any fraction truncated, in
+     the destination.  */
+  if (exponent < 0) {
+    /* Our absolute value is less than one; truncate everything.  */
+    APInt::tcSet(parts, 0, dstPartsCount);
+    /* For exponent -1 the integer bit represents .5, look at that.
+       For smaller exponents leftmost truncated bit is 0. */
+    truncatedBits = semantics->precision -1U - exponent;
+  } else {
+    /* We want the most significant (exponent + 1) bits; the rest are
+       truncated.  */
+    unsigned int bits = exponent + 1U;
+
+    /* Hopelessly large in magnitude?  */
+    if (bits > width)
+      return opInvalidOp;
+
+    if (bits < semantics->precision) {
+      /* We truncate (semantics->precision - bits) bits.  */
+      truncatedBits = semantics->precision - bits;
+      APInt::tcExtract(parts, dstPartsCount, src, bits, truncatedBits);
+    } else {
+      /* We want at least as many bits as are available.  */
+      APInt::tcExtract(parts, dstPartsCount, src, semantics->precision, 0);
+      APInt::tcShiftLeft(parts, dstPartsCount, bits - semantics->precision);
+      truncatedBits = 0;
+    }
+  }
+
+  /* Step 2: work out any lost fraction, and increment the absolute
+     value if we would round away from zero.  */
+  if (truncatedBits) {
+    lost_fraction = lostFractionThroughTruncation(src, partCount(),
+                                                  truncatedBits);
+    if (lost_fraction != lfExactlyZero &&
+        roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
+      if (APInt::tcIncrement(parts, dstPartsCount))
+        return opInvalidOp;     /* Overflow.  */
+    }
+  } else {
+    lost_fraction = lfExactlyZero;
+  }
+
+  /* Step 3: check if we fit in the destination.  */
+  unsigned int omsb = APInt::tcMSB(parts, dstPartsCount) + 1;
+
+  if (sign) {
+    if (!isSigned) {
+      /* Negative numbers cannot be represented as unsigned.  */
+      if (omsb != 0)
+        return opInvalidOp;
+    } else {
+      /* It takes omsb bits to represent the unsigned integer value.
+         We lose a bit for the sign, but care is needed as the
+         maximally negative integer is a special case.  */
+      if (omsb == width && APInt::tcLSB(parts, dstPartsCount) + 1 != omsb)
+        return opInvalidOp;
+
+      /* This case can happen because of rounding.  */
+      if (omsb > width)
+        return opInvalidOp;
+    }
+
+    APInt::tcNegate (parts, dstPartsCount);
+  } else {
+    if (omsb >= width + !isSigned)
+      return opInvalidOp;
+  }
+
+  if (lost_fraction == lfExactlyZero) {
+    *isExact = true;
+    return opOK;
+  } else
+    return opInexact;
+}
+
+/* Same as convertToSignExtendedInteger, except we provide
+   deterministic values in case of an invalid operation exception,
+   namely zero for NaNs and the minimal or maximal value respectively
+   for underflow or overflow.
+   The *isExact output tells whether the result is exact, in the sense
+   that converting it back to the original floating point type produces
+   the original value.  This is almost equivalent to result==opOK,
+   except for negative zeroes.
+*/
+APFloat::opStatus
+APFloat::convertToInteger(integerPart *parts, unsigned int width,
+                          bool isSigned,
+                          roundingMode rounding_mode, bool *isExact) const
+{
+  opStatus fs;
+
+  fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
+                                    isExact);
+
+  if (fs == opInvalidOp) {
+    unsigned int bits, dstPartsCount;
+
+    dstPartsCount = partCountForBits(width);
+
+    if (category == fcNaN)
+      bits = 0;
+    else if (sign)
+      bits = isSigned;
+    else
+      bits = width - isSigned;
+
+    APInt::tcSetLeastSignificantBits(parts, dstPartsCount, bits);
+    if (sign && isSigned)
+      APInt::tcShiftLeft(parts, dstPartsCount, width - 1);
+  }
+
+  return fs;
+}
+
+/* Convert an unsigned integer SRC to a floating point number,
+   rounding according to ROUNDING_MODE.  The sign of the floating
+   point number is not modified.  */
+APFloat::opStatus
+APFloat::convertFromUnsignedParts(const integerPart *src,
+                                  unsigned int srcCount,
+                                  roundingMode rounding_mode)
+{
+  unsigned int omsb, precision, dstCount;
+  integerPart *dst;
+  lostFraction lost_fraction;
+
+  assertArithmeticOK(*semantics);
+  category = fcNormal;
+  omsb = APInt::tcMSB(src, srcCount) + 1;
+  dst = significandParts();
+  dstCount = partCount();
+  precision = semantics->precision;
+
+  /* We want the most significant PRECISON bits of SRC.  There may not
+     be that many; extract what we can.  */
+  if (precision <= omsb) {
+    exponent = omsb - 1;
+    lost_fraction = lostFractionThroughTruncation(src, srcCount,
+                                                  omsb - precision);
+    APInt::tcExtract(dst, dstCount, src, precision, omsb - precision);
+  } else {
+    exponent = precision - 1;
+    lost_fraction = lfExactlyZero;
+    APInt::tcExtract(dst, dstCount, src, omsb, 0);
+  }
+
+  return normalize(rounding_mode, lost_fraction);
+}
+
+APFloat::opStatus
+APFloat::convertFromAPInt(const APInt &Val,
+                          bool isSigned,
+                          roundingMode rounding_mode)
+{
+  unsigned int partCount = Val.getNumWords();
+  APInt api = Val;
+
+  sign = false;
+  if (isSigned && api.isNegative()) {
+    sign = true;
+    api = -api;
+  }
+
+  return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
+}
+
+/* Convert a two's complement integer SRC to a floating point number,
+   rounding according to ROUNDING_MODE.  ISSIGNED is true if the
+   integer is signed, in which case it must be sign-extended.  */
+APFloat::opStatus
+APFloat::convertFromSignExtendedInteger(const integerPart *src,
+                                        unsigned int srcCount,
+                                        bool isSigned,
+                                        roundingMode rounding_mode)
+{
+  opStatus status;
+
+  assertArithmeticOK(*semantics);
+  if (isSigned &&
+      APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
+    integerPart *copy;
+
+    /* If we're signed and negative negate a copy.  */
+    sign = true;
+    copy = new integerPart[srcCount];
+    APInt::tcAssign(copy, src, srcCount);
+    APInt::tcNegate(copy, srcCount);
+    status = convertFromUnsignedParts(copy, srcCount, rounding_mode);
+    delete [] copy;
+  } else {
+    sign = false;
+    status = convertFromUnsignedParts(src, srcCount, rounding_mode);
+  }
+
+  return status;
+}
+
+/* FIXME: should this just take a const APInt reference?  */
+APFloat::opStatus
+APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
+                                        unsigned int width, bool isSigned,
+                                        roundingMode rounding_mode)
+{
+  unsigned int partCount = partCountForBits(width);
+  APInt api = APInt(width, partCount, parts);
+
+  sign = false;
+  if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
+    sign = true;
+    api = -api;
+  }
+
+  return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
+}
+
+APFloat::opStatus
+APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
+{
+  lostFraction lost_fraction = lfExactlyZero;
+  integerPart *significand;
+  unsigned int bitPos, partsCount;
+  StringRef::iterator dot, firstSignificantDigit;
+
+  zeroSignificand();
+  exponent = 0;
+  category = fcNormal;
+
+  significand = significandParts();
+  partsCount = partCount();
+  bitPos = partsCount * integerPartWidth;
+
+  /* Skip leading zeroes and any (hexa)decimal point.  */
+  StringRef::iterator begin = s.begin();
+  StringRef::iterator end = s.end();
+  StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
+  firstSignificantDigit = p;
+
+  for (; p != end;) {
+    integerPart hex_value;
+
+    if (*p == '.') {
+      assert(dot == end && "String contains multiple dots");
+      dot = p++;
+      if (p == end) {
+        break;
+      }
+    }
+
+    hex_value = hexDigitValue(*p);
+    if (hex_value == -1U) {
+      break;
+    }
+
+    p++;
+
+    if (p == end) {
+      break;
+    } else {
+      /* Store the number whilst 4-bit nibbles remain.  */
+      if (bitPos) {
+        bitPos -= 4;
+        hex_value <<= bitPos % integerPartWidth;
+        significand[bitPos / integerPartWidth] |= hex_value;
+      } else {
+        lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
+        while (p != end && hexDigitValue(*p) != -1U)
+          p++;
+        break;
+      }
+    }
+  }
+
+  /* Hex floats require an exponent but not a hexadecimal point.  */
+  assert(p != end && "Hex strings require an exponent");
+  assert((*p == 'p' || *p == 'P') && "Invalid character in significand");
+  assert(p != begin && "Significand has no digits");
+  assert((dot == end || p - begin != 1) && "Significand has no digits");
+
+  /* Ignore the exponent if we are zero.  */
+  if (p != firstSignificantDigit) {
+    int expAdjustment;
+
+    /* Implicit hexadecimal point?  */
+    if (dot == end)
+      dot = p;
+
+    /* Calculate the exponent adjustment implicit in the number of
+       significant digits.  */
+    expAdjustment = static_cast<int>(dot - firstSignificantDigit);
+    if (expAdjustment < 0)
+      expAdjustment++;
+    expAdjustment = expAdjustment * 4 - 1;
+
+    /* Adjust for writing the significand starting at the most
+       significant nibble.  */
+    expAdjustment += semantics->precision;
+    expAdjustment -= partsCount * integerPartWidth;
+
+    /* Adjust for the given exponent.  */
+    exponent = totalExponent(p + 1, end, expAdjustment);
+  }
+
+  return normalize(rounding_mode, lost_fraction);
+}
+
+APFloat::opStatus
+APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
+                                      unsigned sigPartCount, int exp,
+                                      roundingMode rounding_mode)
+{
+  unsigned int parts, pow5PartCount;
+  fltSemantics calcSemantics = { 32767, -32767, 0, true };
+  integerPart pow5Parts[maxPowerOfFiveParts];
+  bool isNearest;
+
+  isNearest = (rounding_mode == rmNearestTiesToEven ||
+               rounding_mode == rmNearestTiesToAway);
+
+  parts = partCountForBits(semantics->precision + 11);
+
+  /* Calculate pow(5, abs(exp)).  */
+  pow5PartCount = powerOf5(pow5Parts, exp >= 0 ? exp: -exp);
+
+  for (;; parts *= 2) {
+    opStatus sigStatus, powStatus;
+    unsigned int excessPrecision, truncatedBits;
+
+    calcSemantics.precision = parts * integerPartWidth - 1;
+    excessPrecision = calcSemantics.precision - semantics->precision;
+    truncatedBits = excessPrecision;
+
+    APFloat decSig(calcSemantics, fcZero, sign);
+    APFloat pow5(calcSemantics, fcZero, false);
+
+    sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
+                                                rmNearestTiesToEven);
+    powStatus = pow5.convertFromUnsignedParts(pow5Parts, pow5PartCount,
+                                              rmNearestTiesToEven);
+    /* Add exp, as 10^n = 5^n * 2^n.  */
+    decSig.exponent += exp;
+
+    lostFraction calcLostFraction;
+    integerPart HUerr, HUdistance;
+    unsigned int powHUerr;
+
+    if (exp >= 0) {
+      /* multiplySignificand leaves the precision-th bit set to 1.  */
+      calcLostFraction = decSig.multiplySignificand(pow5, NULL);
+      powHUerr = powStatus != opOK;
+    } else {
+      calcLostFraction = decSig.divideSignificand(pow5);
+      /* Denormal numbers have less precision.  */
+      if (decSig.exponent < semantics->minExponent) {
+        excessPrecision += (semantics->minExponent - decSig.exponent);
+        truncatedBits = excessPrecision;
+        if (excessPrecision > calcSemantics.precision)
+          excessPrecision = calcSemantics.precision;
+      }
+      /* Extra half-ulp lost in reciprocal of exponent.  */
+      powHUerr = (powStatus == opOK && calcLostFraction == lfExactlyZero) ? 0:2;
+    }
+
+    /* Both multiplySignificand and divideSignificand return the
+       result with the integer bit set.  */
+    assert(APInt::tcExtractBit
+           (decSig.significandParts(), calcSemantics.precision - 1) == 1);
+
+    HUerr = HUerrBound(calcLostFraction != lfExactlyZero, sigStatus != opOK,
+                       powHUerr);
+    HUdistance = 2 * ulpsFromBoundary(decSig.significandParts(),
+                                      excessPrecision, isNearest);
+
+    /* Are we guaranteed to round correctly if we truncate?  */
+    if (HUdistance >= HUerr) {
+      APInt::tcExtract(significandParts(), partCount(), decSig.significandParts(),
+                       calcSemantics.precision - excessPrecision,
+                       excessPrecision);
+      /* Take the exponent of decSig.  If we tcExtract-ed less bits
+         above we must adjust our exponent to compensate for the
+         implicit right shift.  */
+      exponent = (decSig.exponent + semantics->precision
+                  - (calcSemantics.precision - excessPrecision));
+      calcLostFraction = lostFractionThroughTruncation(decSig.significandParts(),
+                                                       decSig.partCount(),
+                                                       truncatedBits);
+      return normalize(rounding_mode, calcLostFraction);
+    }
+  }
+}
+
+APFloat::opStatus
+APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
+{
+  decimalInfo D;
+  opStatus fs;
+
+  /* Scan the text.  */
+  StringRef::iterator p = str.begin();
+  interpretDecimal(p, str.end(), &D);
+
+  /* Handle the quick cases.  First the case of no significant digits,
+     i.e. zero, and then exponents that are obviously too large or too
+     small.  Writing L for log 10 / log 2, a number d.ddddd*10^exp
+     definitely overflows if
+
+           (exp - 1) * L >= maxExponent
+
+     and definitely underflows to zero where
+
+           (exp + 1) * L <= minExponent - precision
+
+     With integer arithmetic the tightest bounds for L are
+
+           93/28 < L < 196/59            [ numerator <= 256 ]
+           42039/12655 < L < 28738/8651  [ numerator <= 65536 ]
+  */
+
+  if (decDigitValue(*D.firstSigDigit) >= 10U) {
+    category = fcZero;
+    fs = opOK;
+
+  /* Check whether the normalized exponent is high enough to overflow
+     max during the log-rebasing in the max-exponent check below. */
+  } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
+    fs = handleOverflow(rounding_mode);
+
+  /* If it wasn't, then it also wasn't high enough to overflow max
+     during the log-rebasing in the min-exponent check.  Check that it
+     won't overflow min in either check, then perform the min-exponent
+     check. */
+  } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
+             (D.normalizedExponent + 1) * 28738 <=
+               8651 * (semantics->minExponent - (int) semantics->precision)) {
+    /* Underflow to zero and round.  */
+    zeroSignificand();
+    fs = normalize(rounding_mode, lfLessThanHalf);
+
+  /* We can finally safely perform the max-exponent check. */
+  } else if ((D.normalizedExponent - 1) * 42039
+             >= 12655 * semantics->maxExponent) {
+    /* Overflow and round.  */
+    fs = handleOverflow(rounding_mode);
+  } else {
+    integerPart *decSignificand;
+    unsigned int partCount;
+
+    /* A tight upper bound on number of bits required to hold an
+       N-digit decimal integer is N * 196 / 59.  Allocate enough space
+       to hold the full significand, and an extra part required by
+       tcMultiplyPart.  */
+    partCount = static_cast<unsigned int>(D.lastSigDigit - D.firstSigDigit) + 1;
+    partCount = partCountForBits(1 + 196 * partCount / 59);
+    decSignificand = new integerPart[partCount + 1];
+    partCount = 0;
+
+    /* Convert to binary efficiently - we do almost all multiplication
+       in an integerPart.  When this would overflow do we do a single
+       bignum multiplication, and then revert again to multiplication
+       in an integerPart.  */
+    do {
+      integerPart decValue, val, multiplier;
+
+      val = 0;
+      multiplier = 1;
+
+      do {
+        if (*p == '.') {
+          p++;
+          if (p == str.end()) {
+            break;
+          }
+        }
+        decValue = decDigitValue(*p++);
+        assert(decValue < 10U && "Invalid character in significand");
+        multiplier *= 10;
+        val = val * 10 + decValue;
+        /* The maximum number that can be multiplied by ten with any
+           digit added without overflowing an integerPart.  */
+      } while (p <= D.lastSigDigit && multiplier <= (~ (integerPart) 0 - 9) / 10);
+
+      /* Multiply out the current part.  */
+      APInt::tcMultiplyPart(decSignificand, decSignificand, multiplier, val,
+                            partCount, partCount + 1, false);
+
+      /* If we used another part (likely but not guaranteed), increase
+         the count.  */
+      if (decSignificand[partCount])
+        partCount++;
+    } while (p <= D.lastSigDigit);
+
+    category = fcNormal;
+    fs = roundSignificandWithExponent(decSignificand, partCount,
+                                      D.exponent, rounding_mode);
+
+    delete [] decSignificand;
+  }
+
+  return fs;
+}
+
+APFloat::opStatus
+APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
+{
+  assertArithmeticOK(*semantics);
+  assert(!str.empty() && "Invalid string length");
+
+  /* Handle a leading minus sign.  */
+  StringRef::iterator p = str.begin();
+  size_t slen = str.size();
+  sign = *p == '-' ? 1 : 0;
+  if (*p == '-' || *p == '+') {
+    p++;
+    slen--;
+    assert(slen && "String has no digits");
+  }
+
+  if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+    assert(slen - 2 && "Invalid string");
+    return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
+                                        rounding_mode);
+  }
+
+  return convertFromDecimalString(StringRef(p, slen), rounding_mode);
+}
+
+/* Write out a hexadecimal representation of the floating point value
+   to DST, which must be of sufficient size, in the C99 form
+   [-]0xh.hhhhp[+-]d.  Return the number of characters written,
+   excluding the terminating NUL.
+
+   If UPPERCASE, the output is in upper case, otherwise in lower case.
+
+   HEXDIGITS digits appear altogether, rounding the value if
+   necessary.  If HEXDIGITS is 0, the minimal precision to display the
+   number precisely is used instead.  If nothing would appear after
+   the decimal point it is suppressed.
+
+   The decimal exponent is always printed and has at least one digit.
+   Zero values display an exponent of zero.  Infinities and NaNs
+   appear as "infinity" or "nan" respectively.
+
+   The above rules are as specified by C99.  There is ambiguity about
+   what the leading hexadecimal digit should be.  This implementation
+   uses whatever is necessary so that the exponent is displayed as
+   stored.  This implies the exponent will fall within the IEEE format
+   range, and the leading hexadecimal digit will be 0 (for denormals),
+   1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
+   any other digits zero).
+*/
+unsigned int
+APFloat::convertToHexString(char *dst, unsigned int hexDigits,
+                            bool upperCase, roundingMode rounding_mode) const
+{
+  char *p;
+
+  assertArithmeticOK(*semantics);
+
+  p = dst;
+  if (sign)
+    *dst++ = '-';
+
+  switch (category) {
+  case fcInfinity:
+    memcpy (dst, upperCase ? infinityU: infinityL, sizeof infinityU - 1);
+    dst += sizeof infinityL - 1;
+    break;
+
+  case fcNaN:
+    memcpy (dst, upperCase ? NaNU: NaNL, sizeof NaNU - 1);
+    dst += sizeof NaNU - 1;
+    break;
+
+  case fcZero:
+    *dst++ = '0';
+    *dst++ = upperCase ? 'X': 'x';
+    *dst++ = '0';
+    if (hexDigits > 1) {
+      *dst++ = '.';
+      memset (dst, '0', hexDigits - 1);
+      dst += hexDigits - 1;
+    }
+    *dst++ = upperCase ? 'P': 'p';
+    *dst++ = '0';
+    break;
+
+  case fcNormal:
+    dst = convertNormalToHexString (dst, hexDigits, upperCase, rounding_mode);
+    break;
+  }
+
+  *dst = 0;
+
+  return static_cast<unsigned int>(dst - p);
+}
+
+/* Does the hard work of outputting the correctly rounded hexadecimal
+   form of a normal floating point number with the specified number of
+   hexadecimal digits.  If HEXDIGITS is zero the minimum number of
+   digits necessary to print the value precisely is output.  */
+char *
+APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
+                                  bool upperCase,
+                                  roundingMode rounding_mode) const
+{
+  unsigned int count, valueBits, shift, partsCount, outputDigits;
+  const char *hexDigitChars;
+  const integerPart *significand;
+  char *p;
+  bool roundUp;
+
+  *dst++ = '0';
+  *dst++ = upperCase ? 'X': 'x';
+
+  roundUp = false;
+  hexDigitChars = upperCase ? hexDigitsUpper: hexDigitsLower;
+
+  significand = significandParts();
+  partsCount = partCount();
+
+  /* +3 because the first digit only uses the single integer bit, so
+     we have 3 virtual zero most-significant-bits.  */
+  valueBits = semantics->precision + 3;
+  shift = integerPartWidth - valueBits % integerPartWidth;
+
+  /* The natural number of digits required ignoring trailing
+     insignificant zeroes.  */
+  outputDigits = (valueBits - significandLSB () + 3) / 4;
+
+  /* hexDigits of zero means use the required number for the
+     precision.  Otherwise, see if we are truncating.  If we are,
+     find out if we need to round away from zero.  */
+  if (hexDigits) {
+    if (hexDigits < outputDigits) {
+      /* We are dropping non-zero bits, so need to check how to round.
+         "bits" is the number of dropped bits.  */
+      unsigned int bits;
+      lostFraction fraction;
+
+      bits = valueBits - hexDigits * 4;
+      fraction = lostFractionThroughTruncation (significand, partsCount, bits);
+      roundUp = roundAwayFromZero(rounding_mode, fraction, bits);
+    }
+    outputDigits = hexDigits;
+  }
+
+  /* Write the digits consecutively, and start writing in the location
+     of the hexadecimal point.  We move the most significant digit
+     left and add the hexadecimal point later.  */
+  p = ++dst;
+
+  count = (valueBits + integerPartWidth - 1) / integerPartWidth;
+
+  while (outputDigits && count) {
+    integerPart part;
+
+    /* Put the most significant integerPartWidth bits in "part".  */
+    if (--count == partsCount)
+      part = 0;  /* An imaginary higher zero part.  */
+    else
+      part = significand[count] << shift;
+
+    if (count && shift)
+      part |= significand[count - 1] >> (integerPartWidth - shift);
+
+    /* Convert as much of "part" to hexdigits as we can.  */
+    unsigned int curDigits = integerPartWidth / 4;
+
+    if (curDigits > outputDigits)
+      curDigits = outputDigits;
+    dst += partAsHex (dst, part, curDigits, hexDigitChars);
+    outputDigits -= curDigits;
+  }
+
+  if (roundUp) {
+    char *q = dst;
+
+    /* Note that hexDigitChars has a trailing '0'.  */
+    do {
+      q--;
+      *q = hexDigitChars[hexDigitValue (*q) + 1];
+    } while (*q == '0');
+    assert(q >= p);
+  } else {
+    /* Add trailing zeroes.  */
+    memset (dst, '0', outputDigits);
+    dst += outputDigits;
+  }
+
+  /* Move the most significant digit to before the point, and if there
+     is something after the decimal point add it.  This must come
+     after rounding above.  */
+  p[-1] = p[0];
+  if (dst -1 == p)
+    dst--;
+  else
+    p[0] = '.';
+
+  /* Finally output the exponent.  */
+  *dst++ = upperCase ? 'P': 'p';
+
+  return writeSignedDecimal (dst, exponent);
+}
+
+// For good performance it is desirable for different APFloats
+// to produce different integers.
+uint32_t
+APFloat::getHashValue() const
+{
+  if (category==fcZero) return sign<<8 | semantics->precision ;
+  else if (category==fcInfinity) return sign<<9 | semantics->precision;
+  else if (category==fcNaN) return 1<<10 | semantics->precision;
+  else {
+    uint32_t hash = sign<<11 | semantics->precision | exponent<<12;
+    const integerPart* p = significandParts();
+    for (int i=partCount(); i>0; i--, p++)
+      hash ^= ((uint32_t)*p) ^ (uint32_t)((*p)>>32);
+    return hash;
+  }
+}
+
+// Conversion from APFloat to/from host float/double.  It may eventually be
+// possible to eliminate these and have everybody deal with APFloats, but that
+// will take a while.  This approach will not easily extend to long double.
+// Current implementation requires integerPartWidth==64, which is correct at
+// the moment but could be made more general.
+
+// Denormals have exponent minExponent in APFloat, but minExponent-1 in
+// the actual IEEE respresentations.  We compensate for that here.
+
+APInt
+APFloat::convertF80LongDoubleAPFloatToAPInt() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
+  assert(partCount()==2);
+
+  uint64_t myexponent, mysignificand;
+
+  if (category==fcNormal) {
+    myexponent = exponent+16383; //bias
+    mysignificand = significandParts()[0];
+    if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL))
+      myexponent = 0;   // denormal
+  } else if (category==fcZero) {
+    myexponent = 0;
+    mysignificand = 0;
+  } else if (category==fcInfinity) {
+    myexponent = 0x7fff;
+    mysignificand = 0x8000000000000000ULL;
+  } else {
+    assert(category == fcNaN && "Unknown category");
+    myexponent = 0x7fff;
+    mysignificand = significandParts()[0];
+  }
+
+  uint64_t words[2];
+  words[0] = mysignificand;
+  words[1] =  ((uint64_t)(sign & 1) << 15) |
+              (myexponent & 0x7fffLL);
+  return APInt(80, 2, words);
+}
+
+APInt
+APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
+  assert(partCount()==2);
+
+  uint64_t myexponent, mysignificand, myexponent2, mysignificand2;
+
+  if (category==fcNormal) {
+    myexponent = exponent + 1023; //bias
+    myexponent2 = exponent2 + 1023;
+    mysignificand = significandParts()[0];
+    mysignificand2 = significandParts()[1];
+    if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
+      myexponent = 0;   // denormal
+    if (myexponent2==1 && !(mysignificand2 & 0x10000000000000LL))
+      myexponent2 = 0;   // denormal
+  } else if (category==fcZero) {
+    myexponent = 0;
+    mysignificand = 0;
+    myexponent2 = 0;
+    mysignificand2 = 0;
+  } else if (category==fcInfinity) {
+    myexponent = 0x7ff;
+    myexponent2 = 0;
+    mysignificand = 0;
+    mysignificand2 = 0;
+  } else {
+    assert(category == fcNaN && "Unknown category");
+    myexponent = 0x7ff;
+    mysignificand = significandParts()[0];
+    myexponent2 = exponent2;
+    mysignificand2 = significandParts()[1];
+  }
+
+  uint64_t words[2];
+  words[0] =  ((uint64_t)(sign & 1) << 63) |
+              ((myexponent & 0x7ff) <<  52) |
+              (mysignificand & 0xfffffffffffffLL);
+  words[1] =  ((uint64_t)(sign2 & 1) << 63) |
+              ((myexponent2 & 0x7ff) <<  52) |
+              (mysignificand2 & 0xfffffffffffffLL);
+  return APInt(128, 2, words);
+}
+
+APInt
+APFloat::convertQuadrupleAPFloatToAPInt() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
+  assert(partCount()==2);
+
+  uint64_t myexponent, mysignificand, mysignificand2;
+
+  if (category==fcNormal) {
+    myexponent = exponent+16383; //bias
+    mysignificand = significandParts()[0];
+    mysignificand2 = significandParts()[1];
+    if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
+      myexponent = 0;   // denormal
+  } else if (category==fcZero) {
+    myexponent = 0;
+    mysignificand = mysignificand2 = 0;
+  } else if (category==fcInfinity) {
+    myexponent = 0x7fff;
+    mysignificand = mysignificand2 = 0;
+  } else {
+    assert(category == fcNaN && "Unknown category!");
+    myexponent = 0x7fff;
+    mysignificand = significandParts()[0];
+    mysignificand2 = significandParts()[1];
+  }
+
+  uint64_t words[2];
+  words[0] = mysignificand;
+  words[1] = ((uint64_t)(sign & 1) << 63) |
+             ((myexponent & 0x7fff) << 48) |
+             (mysignificand2 & 0xffffffffffffLL);
+
+  return APInt(128, 2, words);
+}
+
+APInt
+APFloat::convertDoubleAPFloatToAPInt() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
+  assert(partCount()==1);
+
+  uint64_t myexponent, mysignificand;
+
+  if (category==fcNormal) {
+    myexponent = exponent+1023; //bias
+    mysignificand = *significandParts();
+    if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
+      myexponent = 0;   // denormal
+  } else if (category==fcZero) {
+    myexponent = 0;
+    mysignificand = 0;
+  } else if (category==fcInfinity) {
+    myexponent = 0x7ff;
+    mysignificand = 0;
+  } else {
+    assert(category == fcNaN && "Unknown category!");
+    myexponent = 0x7ff;
+    mysignificand = *significandParts();
+  }
+
+  return APInt(64, ((((uint64_t)(sign & 1) << 63) |
+                     ((myexponent & 0x7ff) <<  52) |
+                     (mysignificand & 0xfffffffffffffLL))));
+}
+
+APInt
+APFloat::convertFloatAPFloatToAPInt() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
+  assert(partCount()==1);
+
+  uint32_t myexponent, mysignificand;
+
+  if (category==fcNormal) {
+    myexponent = exponent+127; //bias
+    mysignificand = (uint32_t)*significandParts();
+    if (myexponent == 1 && !(mysignificand & 0x800000))
+      myexponent = 0;   // denormal
+  } else if (category==fcZero) {
+    myexponent = 0;
+    mysignificand = 0;
+  } else if (category==fcInfinity) {
+    myexponent = 0xff;
+    mysignificand = 0;
+  } else {
+    assert(category == fcNaN && "Unknown category!");
+    myexponent = 0xff;
+    mysignificand = (uint32_t)*significandParts();
+  }
+
+  return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
+                    (mysignificand & 0x7fffff)));
+}
+
+APInt
+APFloat::convertHalfAPFloatToAPInt() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
+  assert(partCount()==1);
+
+  uint32_t myexponent, mysignificand;
+
+  if (category==fcNormal) {
+    myexponent = exponent+15; //bias
+    mysignificand = (uint32_t)*significandParts();
+    if (myexponent == 1 && !(mysignificand & 0x400))
+      myexponent = 0;   // denormal
+  } else if (category==fcZero) {
+    myexponent = 0;
+    mysignificand = 0;
+  } else if (category==fcInfinity) {
+    myexponent = 0x1f;
+    mysignificand = 0;
+  } else {
+    assert(category == fcNaN && "Unknown category!");
+    myexponent = 0x1f;
+    mysignificand = (uint32_t)*significandParts();
+  }
+
+  return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
+                    (mysignificand & 0x3ff)));
+}
+
+// This function creates an APInt that is just a bit map of the floating
+// point constant as it would appear in memory.  It is not a conversion,
+// and treating the result as a normal integer is unlikely to be useful.
+
+APInt
+APFloat::bitcastToAPInt() const
+{
+  if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
+    return convertHalfAPFloatToAPInt();
+
+  if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
+    return convertFloatAPFloatToAPInt();
+
+  if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
+    return convertDoubleAPFloatToAPInt();
+
+  if (semantics == (const llvm::fltSemantics*)&IEEEquad)
+    return convertQuadrupleAPFloatToAPInt();
+
+  if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
+    return convertPPCDoubleDoubleAPFloatToAPInt();
+
+  assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
+         "unknown format!");
+  return convertF80LongDoubleAPFloatToAPInt();
+}
+
+float
+APFloat::convertToFloat() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
+         "Float semantics are not IEEEsingle");
+  APInt api = bitcastToAPInt();
+  return api.bitsToFloat();
+}
+
+double
+APFloat::convertToDouble() const
+{
+  assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
+         "Float semantics are not IEEEdouble");
+  APInt api = bitcastToAPInt();
+  return api.bitsToDouble();
+}
+
+/// Integer bit is explicit in this format.  Intel hardware (387 and later)
+/// does not support these bit patterns:
+///  exponent = all 1's, integer bit 0, significand 0 ("pseudoinfinity")
+///  exponent = all 1's, integer bit 0, significand nonzero ("pseudoNaN")
+///  exponent = 0, integer bit 1 ("pseudodenormal")
+///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
+/// At the moment, the first two are treated as NaNs, the second two as Normal.
+void
+APFloat::initFromF80LongDoubleAPInt(const APInt &api)
+{
+  assert(api.getBitWidth()==80);
+  uint64_t i1 = api.getRawData()[0];
+  uint64_t i2 = api.getRawData()[1];
+  uint64_t myexponent = (i2 & 0x7fff);
+  uint64_t mysignificand = i1;
+
+  initialize(&APFloat::x87DoubleExtended);
+  assert(partCount()==2);
+
+  sign = static_cast<unsigned int>(i2>>15);
+  if (myexponent==0 && mysignificand==0) {
+    // exponent, significand meaningless
+    category = fcZero;
+  } else if (myexponent==0x7fff && mysignificand==0x8000000000000000ULL) {
+    // exponent, significand meaningless
+    category = fcInfinity;
+  } else if (myexponent==0x7fff && mysignificand!=0x8000000000000000ULL) {
+    // exponent meaningless
+    category = fcNaN;
+    significandParts()[0] = mysignificand;
+    significandParts()[1] = 0;
+  } else {
+    category = fcNormal;
+    exponent = myexponent - 16383;
+    significandParts()[0] = mysignificand;
+    significandParts()[1] = 0;
+    if (myexponent==0)          // denormal
+      exponent = -16382;
+  }
+}
+
+void
+APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
+{
+  assert(api.getBitWidth()==128);
+  uint64_t i1 = api.getRawData()[0];
+  uint64_t i2 = api.getRawData()[1];
+  uint64_t myexponent = (i1 >> 52) & 0x7ff;
+  uint64_t mysignificand = i1 & 0xfffffffffffffLL;
+  uint64_t myexponent2 = (i2 >> 52) & 0x7ff;
+  uint64_t mysignificand2 = i2 & 0xfffffffffffffLL;
+
+  initialize(&APFloat::PPCDoubleDouble);
+  assert(partCount()==2);
+
+  sign = static_cast<unsigned int>(i1>>63);
+  sign2 = static_cast<unsigned int>(i2>>63);
+  if (myexponent==0 && mysignificand==0) {
+    // exponent, significand meaningless
+    // exponent2 and significand2 are required to be 0; we don't check
+    category = fcZero;
+  } else if (myexponent==0x7ff && mysignificand==0) {
+    // exponent, significand meaningless
+    // exponent2 and significand2 are required to be 0; we don't check
+    category = fcInfinity;
+  } else if (myexponent==0x7ff && mysignificand!=0) {
+    // exponent meaningless.  So is the whole second word, but keep it
+    // for determinism.
+    category = fcNaN;
+    exponent2 = myexponent2;
+    significandParts()[0] = mysignificand;
+    significandParts()[1] = mysignificand2;
+  } else {
+    category = fcNormal;
+    // Note there is no category2; the second word is treated as if it is
+    // fcNormal, although it might be something else considered by itself.
+    exponent = myexponent - 1023;
+    exponent2 = myexponent2 - 1023;
+    significandParts()[0] = mysignificand;
+    significandParts()[1] = mysignificand2;
+    if (myexponent==0)          // denormal
+      exponent = -1022;
+    else
+      significandParts()[0] |= 0x10000000000000LL;  // integer bit
+    if (myexponent2==0)
+      exponent2 = -1022;
+    else
+      significandParts()[1] |= 0x10000000000000LL;  // integer bit
+  }
+}
+
+void
+APFloat::initFromQuadrupleAPInt(const APInt &api)
+{
+  assert(api.getBitWidth()==128);
+  uint64_t i1 = api.getRawData()[0];
+  uint64_t i2 = api.getRawData()[1];
+  uint64_t myexponent = (i2 >> 48) & 0x7fff;
+  uint64_t mysignificand  = i1;
+  uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
+
+  initialize(&APFloat::IEEEquad);
+  assert(partCount()==2);
+
+  sign = static_cast<unsigned int>(i2>>63);
+  if (myexponent==0 &&
+      (mysignificand==0 && mysignificand2==0)) {
+    // exponent, significand meaningless
+    category = fcZero;
+  } else if (myexponent==0x7fff &&
+             (mysignificand==0 && mysignificand2==0)) {
+    // exponent, significand meaningless
+    category = fcInfinity;
+  } else if (myexponent==0x7fff &&
+             (mysignificand!=0 || mysignificand2 !=0)) {
+    // exponent meaningless
+    category = fcNaN;
+    significandParts()[0] = mysignificand;
+    significandParts()[1] = mysignificand2;
+  } else {
+    category = fcNormal;
+    exponent = myexponent - 16383;
+    significandParts()[0] = mysignificand;
+    significandParts()[1] = mysignificand2;
+    if (myexponent==0)          // denormal
+      exponent = -16382;
+    else
+      significandParts()[1] |= 0x1000000000000LL;  // integer bit
+  }
+}
+
+void
+APFloat::initFromDoubleAPInt(const APInt &api)
+{
+  assert(api.getBitWidth()==64);
+  uint64_t i = *api.getRawData();
+  uint64_t myexponent = (i >> 52) & 0x7ff;
+  uint64_t mysignificand = i & 0xfffffffffffffLL;
+
+  initialize(&APFloat::IEEEdouble);
+  assert(partCount()==1);
+
+  sign = static_cast<unsigned int>(i>>63);
+  if (myexponent==0 && mysignificand==0) {
+    // exponent, significand meaningless
+    category = fcZero;
+  } else if (myexponent==0x7ff && mysignificand==0) {
+    // exponent, significand meaningless
+    category = fcInfinity;
+  } else if (myexponent==0x7ff && mysignificand!=0) {
+    // exponent meaningless
+    category = fcNaN;
+    *significandParts() = mysignificand;
+  } else {
+    category = fcNormal;
+    exponent = myexponent - 1023;
+    *significandParts() = mysignificand;
+    if (myexponent==0)          // denormal
+      exponent = -1022;
+    else
+      *significandParts() |= 0x10000000000000LL;  // integer bit
+  }
+}
+
+void
+APFloat::initFromFloatAPInt(const APInt & api)
+{
+  assert(api.getBitWidth()==32);
+  uint32_t i = (uint32_t)*api.getRawData();
+  uint32_t myexponent = (i >> 23) & 0xff;
+  uint32_t mysignificand = i & 0x7fffff;
+
+  initialize(&APFloat::IEEEsingle);
+  assert(partCount()==1);
+
+  sign = i >> 31;
+  if (myexponent==0 && mysignificand==0) {
+    // exponent, significand meaningless
+    category = fcZero;
+  } else if (myexponent==0xff && mysignificand==0) {
+    // exponent, significand meaningless
+    category = fcInfinity;
+  } else if (myexponent==0xff && mysignificand!=0) {
+    // sign, exponent, significand meaningless
+    category = fcNaN;
+    *significandParts() = mysignificand;
+  } else {
+    category = fcNormal;
+    exponent = myexponent - 127;  //bias
+    *significandParts() = mysignificand;
+    if (myexponent==0)    // denormal
+      exponent = -126;
+    else
+      *significandParts() |= 0x800000; // integer bit
+  }
+}
+
+void
+APFloat::initFromHalfAPInt(const APInt & api)
+{
+  assert(api.getBitWidth()==16);
+  uint32_t i = (uint32_t)*api.getRawData();
+  uint32_t myexponent = (i >> 10) & 0x1f;
+  uint32_t mysignificand = i & 0x3ff;
+
+  initialize(&APFloat::IEEEhalf);
+  assert(partCount()==1);
+
+  sign = i >> 15;
+  if (myexponent==0 && mysignificand==0) {
+    // exponent, significand meaningless
+    category = fcZero;
+  } else if (myexponent==0x1f && mysignificand==0) {
+    // exponent, significand meaningless
+    category = fcInfinity;
+  } else if (myexponent==0x1f && mysignificand!=0) {
+    // sign, exponent, significand meaningless
+    category = fcNaN;
+    *significandParts() = mysignificand;
+  } else {
+    category = fcNormal;
+    exponent = myexponent - 15;  //bias
+    *significandParts() = mysignificand;
+    if (myexponent==0)    // denormal
+      exponent = -14;
+    else
+      *significandParts() |= 0x400; // integer bit
+  }
+}
+
+/// Treat api as containing the bits of a floating point number.  Currently
+/// we infer the floating point type from the size of the APInt.  The
+/// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
+/// when the size is anything else).
+void
+APFloat::initFromAPInt(const APInt& api, bool isIEEE)
+{
+  if (api.getBitWidth() == 16)
+    return initFromHalfAPInt(api);
+  else if (api.getBitWidth() == 32)
+    return initFromFloatAPInt(api);
+  else if (api.getBitWidth()==64)
+    return initFromDoubleAPInt(api);
+  else if (api.getBitWidth()==80)
+    return initFromF80LongDoubleAPInt(api);
+  else if (api.getBitWidth()==128)
+    return (isIEEE ?
+            initFromQuadrupleAPInt(api) : initFromPPCDoubleDoubleAPInt(api));
+  else
+    llvm_unreachable(0);
+}
+
+APFloat
+APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
+{
+  return APFloat(APInt::getAllOnesValue(BitWidth), isIEEE);
+}
+
+APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
+  APFloat Val(Sem, fcNormal, Negative);
+
+  // We want (in interchange format):
+  //   sign = {Negative}
+  //   exponent = 1..10
+  //   significand = 1..1
+
+  Val.exponent = Sem.maxExponent; // unbiased
+
+  // 1-initialize all bits....
+  Val.zeroSignificand();
+  integerPart *significand = Val.significandParts();
+  unsigned N = partCountForBits(Sem.precision);
+  for (unsigned i = 0; i != N; ++i)
+    significand[i] = ~((integerPart) 0);
+
+  // ...and then clear the top bits for internal consistency.
+  significand[N-1] &=
+    (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1)) - 1;
+
+  return Val;
+}
+
+APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
+  APFloat Val(Sem, fcNormal, Negative);
+
+  // We want (in interchange format):
+  //   sign = {Negative}
+  //   exponent = 0..0
+  //   significand = 0..01
+
+  Val.exponent = Sem.minExponent; // unbiased
+  Val.zeroSignificand();
+  Val.significandParts()[0] = 1;
+  return Val;
+}
+
+APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
+  APFloat Val(Sem, fcNormal, Negative);
+
+  // We want (in interchange format):
+  //   sign = {Negative}
+  //   exponent = 0..0
+  //   significand = 10..0
+
+  Val.exponent = Sem.minExponent;
+  Val.zeroSignificand();
+  Val.significandParts()[partCountForBits(Sem.precision)-1] |=
+    (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1));
+
+  return Val;
+}
+
+APFloat::APFloat(const APInt& api, bool isIEEE) : exponent2(0), sign2(0) {
+  initFromAPInt(api, isIEEE);
+}
+
+APFloat::APFloat(float f) : exponent2(0), sign2(0) {
+  initFromAPInt(APInt::floatToBits(f));
+}
+
+APFloat::APFloat(double d) : exponent2(0), sign2(0) {
+  initFromAPInt(APInt::doubleToBits(d));
+}
+
+namespace {
+  static void append(SmallVectorImpl<char> &Buffer,
+                     unsigned N, const char *Str) {
+    unsigned Start = Buffer.size();
+    Buffer.set_size(Start + N);
+    memcpy(&Buffer[Start], Str, N);
+  }
+
+  template <unsigned N>
+  void append(SmallVectorImpl<char> &Buffer, const char (&Str)[N]) {
+    append(Buffer, N, Str);
+  }
+
+  /// Removes data from the given significand until it is no more
+  /// precise than is required for the desired precision.
+  void AdjustToPrecision(APInt &significand,
+                         int &exp, unsigned FormatPrecision) {
+    unsigned bits = significand.getActiveBits();
+
+    // 196/59 is a very slight overestimate of lg_2(10).
+    unsigned bitsRequired = (FormatPrecision * 196 + 58) / 59;
+
+    if (bits <= bitsRequired) return;
+
+    unsigned tensRemovable = (bits - bitsRequired) * 59 / 196;
+    if (!tensRemovable) return;
+
+    exp += tensRemovable;
+
+    APInt divisor(significand.getBitWidth(), 1);
+    APInt powten(significand.getBitWidth(), 10);
+    while (true) {
+      if (tensRemovable & 1)
+        divisor *= powten;
+      tensRemovable >>= 1;
+      if (!tensRemovable) break;
+      powten *= powten;
+    }
+
+    significand = significand.udiv(divisor);
+
+    // Truncate the significand down to its active bit count, but
+    // don't try to drop below 32.
+    unsigned newPrecision = std::max(32U, significand.getActiveBits());
+    significand = significand.trunc(newPrecision);
+  }
+
+
+  void AdjustToPrecision(SmallVectorImpl<char> &buffer,
+                         int &exp, unsigned FormatPrecision) {
+    unsigned N = buffer.size();
+    if (N <= FormatPrecision) return;
+
+    // The most significant figures are the last ones in the buffer.
+    unsigned FirstSignificant = N - FormatPrecision;
+
+    // Round.
+    // FIXME: this probably shouldn't use 'round half up'.
+
+    // Rounding down is just a truncation, except we also want to drop
+    // trailing zeros from the new result.
+    if (buffer[FirstSignificant - 1] < '5') {
+      while (buffer[FirstSignificant] == '0')
+        FirstSignificant++;
+
+      exp += FirstSignificant;
+      buffer.erase(&buffer[0], &buffer[FirstSignificant]);
+      return;
+    }
+
+    // Rounding up requires a decimal add-with-carry.  If we continue
+    // the carry, the newly-introduced zeros will just be truncated.
+    for (unsigned I = FirstSignificant; I != N; ++I) {
+      if (buffer[I] == '9') {
+        FirstSignificant++;
+      } else {
+        buffer[I]++;
+        break;
+      }
+    }
+
+    // If we carried through, we have exactly one digit of precision.
+    if (FirstSignificant == N) {
+      exp += FirstSignificant;
+      buffer.clear();
+      buffer.push_back('1');
+      return;
+    }
+
+    exp += FirstSignificant;
+    buffer.erase(&buffer[0], &buffer[FirstSignificant]);
+  }
+}
+
+void APFloat::toString(SmallVectorImpl<char> &Str,
+                       unsigned FormatPrecision,
+                       unsigned FormatMaxPadding) const {
+  switch (category) {
+  case fcInfinity:
+    if (isNegative())
+      return append(Str, "-Inf");
+    else
+      return append(Str, "+Inf");
+
+  case fcNaN: return append(Str, "NaN");
+
+  case fcZero:
+    if (isNegative())
+      Str.push_back('-');
+
+    if (!FormatMaxPadding)
+      append(Str, "0.0E+0");
+    else
+      Str.push_back('0');
+    return;
+
+  case fcNormal:
+    break;
+  }
+
+  if (isNegative())
+    Str.push_back('-');
+
+  // Decompose the number into an APInt and an exponent.
+  int exp = exponent - ((int) semantics->precision - 1);
+  APInt significand(semantics->precision,
+                    partCountForBits(semantics->precision),
+                    significandParts());
+
+  // Set FormatPrecision if zero.  We want to do this before we
+  // truncate trailing zeros, as those are part of the precision.
+  if (!FormatPrecision) {
+    // It's an interesting question whether to use the nominal
+    // precision or the active precision here for denormals.
+
+    // FormatPrecision = ceil(significandBits / lg_2(10))
+    FormatPrecision = (semantics->precision * 59 + 195) / 196;
+  }
+
+  // Ignore trailing binary zeros.
+  int trailingZeros = significand.countTrailingZeros();
+  exp += trailingZeros;
+  significand = significand.lshr(trailingZeros);
+
+  // Change the exponent from 2^e to 10^e.
+  if (exp == 0) {
+    // Nothing to do.
+  } else if (exp > 0) {
+    // Just shift left.
+    significand = significand.zext(semantics->precision + exp);
+    significand <<= exp;
+    exp = 0;
+  } else { /* exp < 0 */
+    int texp = -exp;
+
+    // We transform this using the identity:
+    //   (N)(2^-e) == (N)(5^e)(10^-e)
+    // This means we have to multiply N (the significand) by 5^e.
+    // To avoid overflow, we have to operate on numbers large
+    // enough to store N * 5^e:
+    //   log2(N * 5^e) == log2(N) + e * log2(5)
+    //                 <= semantics->precision + e * 137 / 59
+    //   (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
+
+    unsigned precision = semantics->precision + 137 * texp / 59;
+
+    // Multiply significand by 5^e.
+    //   N * 5^0101 == N * 5^(1*1) * 5^(0*2) * 5^(1*4) * 5^(0*8)
+    significand = significand.zext(precision);
+    APInt five_to_the_i(precision, 5);
+    while (true) {
+      if (texp & 1) significand *= five_to_the_i;
+
+      texp >>= 1;
+      if (!texp) break;
+      five_to_the_i *= five_to_the_i;
+    }
+  }
+
+  AdjustToPrecision(significand, exp, FormatPrecision);
+
+  llvm::SmallVector<char, 256> buffer;
+
+  // Fill the buffer.
+  unsigned precision = significand.getBitWidth();
+  APInt ten(precision, 10);
+  APInt digit(precision, 0);
+
+  bool inTrail = true;
+  while (significand != 0) {
+    // digit <- significand % 10
+    // significand <- significand / 10
+    APInt::udivrem(significand, ten, significand, digit);
+
+    unsigned d = digit.getZExtValue();
+
+    // Drop trailing zeros.
+    if (inTrail && !d) exp++;
+    else {
+      buffer.push_back((char) ('0' + d));
+      inTrail = false;
+    }
+  }
+
+  assert(!buffer.empty() && "no characters in buffer!");
+
+  // Drop down to FormatPrecision.
+  // TODO: don't do more precise calculations above than are required.
+  AdjustToPrecision(buffer, exp, FormatPrecision);
+
+  unsigned NDigits = buffer.size();
+
+  // Check whether we should use scientific notation.
+  bool FormatScientific;
+  if (!FormatMaxPadding)
+    FormatScientific = true;
+  else {
+    if (exp >= 0) {
+      // 765e3 --> 765000
+      //              ^^^
+      // But we shouldn't make the number look more precise than it is.
+      FormatScientific = ((unsigned) exp > FormatMaxPadding ||
+                          NDigits + (unsigned) exp > FormatPrecision);
+    } else {
+      // Power of the most significant digit.
+      int MSD = exp + (int) (NDigits - 1);
+      if (MSD >= 0) {
+        // 765e-2 == 7.65
+        FormatScientific = false;
+      } else {
+        // 765e-5 == 0.00765
+        //           ^ ^^
+        FormatScientific = ((unsigned) -MSD) > FormatMaxPadding;
+      }
+    }
+  }
+
+  // Scientific formatting is pretty straightforward.
+  if (FormatScientific) {
+    exp += (NDigits - 1);
+
+    Str.push_back(buffer[NDigits-1]);
+    Str.push_back('.');
+    if (NDigits == 1)
+      Str.push_back('0');
+    else
+      for (unsigned I = 1; I != NDigits; ++I)
+        Str.push_back(buffer[NDigits-1-I]);
+    Str.push_back('E');
+
+    Str.push_back(exp >= 0 ? '+' : '-');
+    if (exp < 0) exp = -exp;
+    SmallVector<char, 6> expbuf;
+    do {
+      expbuf.push_back((char) ('0' + (exp % 10)));
+      exp /= 10;
+    } while (exp);
+    for (unsigned I = 0, E = expbuf.size(); I != E; ++I)
+      Str.push_back(expbuf[E-1-I]);
+    return;
+  }
+
+  // Non-scientific, positive exponents.
+  if (exp >= 0) {
+    for (unsigned I = 0; I != NDigits; ++I)
+      Str.push_back(buffer[NDigits-1-I]);
+    for (unsigned I = 0; I != (unsigned) exp; ++I)
+      Str.push_back('0');
+    return;
+  }
+
+  // Non-scientific, negative exponents.
+
+  // The number of digits to the left of the decimal point.
+  int NWholeDigits = exp + (int) NDigits;
+
+  unsigned I = 0;
+  if (NWholeDigits > 0) {
+    for (; I != (unsigned) NWholeDigits; ++I)
+      Str.push_back(buffer[NDigits-I-1]);
+    Str.push_back('.');
+  } else {
+    unsigned NZeros = 1 + (unsigned) -NWholeDigits;
+
+    Str.push_back('0');
+    Str.push_back('.');
+    for (unsigned Z = 1; Z != NZeros; ++Z)
+      Str.push_back('0');
+  }
+
+  for (; I != NDigits; ++I)
+    Str.push_back(buffer[NDigits-I-1]);
+}
diff --git a/final/lib/Support/APInt.cpp b/final/lib/Support/APInt.cpp
new file mode 100644
index 00000000000..08f36d2af3a
--- /dev/null
+++ b/final/lib/Support/APInt.cpp
@@ -0,0 +1,2904 @@
+//===-- APInt.cpp - Implement APInt class ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a class to represent arbitrary precision integer
+// constant values and provide a variety of arithmetic operations on them.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "apint"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cmath>
+#include <limits>
+#include <cstring>
+#include <cstdlib>
+using namespace llvm;
+
+/// A utility function for allocating memory, checking for allocation failures,
+/// and ensuring the contents are zeroed.
+inline static uint64_t* getClearedMemory(unsigned numWords) {
+  uint64_t * result = new uint64_t[numWords];
+  assert(result && "APInt memory allocation fails!");
+  memset(result, 0, numWords * sizeof(uint64_t));
+  return result;
+}
+
+/// A utility function for allocating memory and checking for allocation
+/// failure.  The content is not zeroed.
+inline static uint64_t* getMemory(unsigned numWords) {
+  uint64_t * result = new uint64_t[numWords];
+  assert(result && "APInt memory allocation fails!");
+  return result;
+}
+
+/// A utility function that converts a character to a digit.
+inline static unsigned getDigit(char cdigit, uint8_t radix) {
+  unsigned r;
+
+  if (radix == 16) {
+    r = cdigit - '0';
+    if (r <= 9)
+      return r;
+
+    r = cdigit - 'A';
+    if (r <= 5)
+      return r + 10;
+
+    r = cdigit - 'a';
+    if (r <= 5)
+      return r + 10;
+  }
+
+  r = cdigit - '0';
+  if (r < radix)
+    return r;
+
+  return -1U;
+}
+
+
+void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) {
+  pVal = getClearedMemory(getNumWords());
+  pVal[0] = val;
+  if (isSigned && int64_t(val) < 0)
+    for (unsigned i = 1; i < getNumWords(); ++i)
+      pVal[i] = -1ULL;
+}
+
+void APInt::initSlowCase(const APInt& that) {
+  pVal = getMemory(getNumWords());
+  memcpy(pVal, that.pVal, getNumWords() * APINT_WORD_SIZE);
+}
+
+
+APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
+  : BitWidth(numBits), VAL(0) {
+  assert(BitWidth && "Bitwidth too small");
+  assert(bigVal && "Null pointer detected!");
+  if (isSingleWord())
+    VAL = bigVal[0];
+  else {
+    // Get memory, cleared to 0
+    pVal = getClearedMemory(getNumWords());
+    // Calculate the number of words to copy
+    unsigned words = std::min<unsigned>(numWords, getNumWords());
+    // Copy the words from bigVal to pVal
+    memcpy(pVal, bigVal, words * APINT_WORD_SIZE);
+  }
+  // Make sure unused high bits are cleared
+  clearUnusedBits();
+}
+
+APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
+  : BitWidth(numbits), VAL(0) {
+  assert(BitWidth && "Bitwidth too small");
+  fromString(numbits, Str, radix);
+}
+
+APInt& APInt::AssignSlowCase(const APInt& RHS) {
+  // Don't do anything for X = X
+  if (this == &RHS)
+    return *this;
+
+  if (BitWidth == RHS.getBitWidth()) {
+    // assume same bit-width single-word case is already handled
+    assert(!isSingleWord());
+    memcpy(pVal, RHS.pVal, getNumWords() * APINT_WORD_SIZE);
+    return *this;
+  }
+
+  if (isSingleWord()) {
+    // assume case where both are single words is already handled
+    assert(!RHS.isSingleWord());
+    VAL = 0;
+    pVal = getMemory(RHS.getNumWords());
+    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+  } else if (getNumWords() == RHS.getNumWords())
+    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+  else if (RHS.isSingleWord()) {
+    delete [] pVal;
+    VAL = RHS.VAL;
+  } else {
+    delete [] pVal;
+    pVal = getMemory(RHS.getNumWords());
+    memcpy(pVal, RHS.pVal, RHS.getNumWords() * APINT_WORD_SIZE);
+  }
+  BitWidth = RHS.BitWidth;
+  return clearUnusedBits();
+}
+
+APInt& APInt::operator=(uint64_t RHS) {
+  if (isSingleWord())
+    VAL = RHS;
+  else {
+    pVal[0] = RHS;
+    memset(pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
+  }
+  return clearUnusedBits();
+}
+
+/// Profile - This method 'profiles' an APInt for use with FoldingSet.
+void APInt::Profile(FoldingSetNodeID& ID) const {
+  ID.AddInteger(BitWidth);
+
+  if (isSingleWord()) {
+    ID.AddInteger(VAL);
+    return;
+  }
+
+  unsigned NumWords = getNumWords();
+  for (unsigned i = 0; i < NumWords; ++i)
+    ID.AddInteger(pVal[i]);
+}
+
+/// add_1 - This function adds a single "digit" integer, y, to the multiple
+/// "digit" integer array,  x[]. x[] is modified to reflect the addition and
+/// 1 is returned if there is a carry out, otherwise 0 is returned.
+/// @returns the carry of the addition.
+static bool add_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
+  for (unsigned i = 0; i < len; ++i) {
+    dest[i] = y + x[i];
+    if (dest[i] < y)
+      y = 1; // Carry one to next digit.
+    else {
+      y = 0; // No need to carry so exit early
+      break;
+    }
+  }
+  return y;
+}
+
+/// @brief Prefix increment operator. Increments the APInt by one.
+APInt& APInt::operator++() {
+  if (isSingleWord())
+    ++VAL;
+  else
+    add_1(pVal, pVal, getNumWords(), 1);
+  return clearUnusedBits();
+}
+
+/// sub_1 - This function subtracts a single "digit" (64-bit word), y, from
+/// the multi-digit integer array, x[], propagating the borrowed 1 value until
+/// no further borrowing is neeeded or it runs out of "digits" in x.  The result
+/// is 1 if "borrowing" exhausted the digits in x, or 0 if x was not exhausted.
+/// In other words, if y > x then this function returns 1, otherwise 0.
+/// @returns the borrow out of the subtraction
+static bool sub_1(uint64_t x[], unsigned len, uint64_t y) {
+  for (unsigned i = 0; i < len; ++i) {
+    uint64_t X = x[i];
+    x[i] -= y;
+    if (y > X)
+      y = 1;  // We have to "borrow 1" from next "digit"
+    else {
+      y = 0;  // No need to borrow
+      break;  // Remaining digits are unchanged so exit early
+    }
+  }
+  return bool(y);
+}
+
+/// @brief Prefix decrement operator. Decrements the APInt by one.
+APInt& APInt::operator--() {
+  if (isSingleWord())
+    --VAL;
+  else
+    sub_1(pVal, getNumWords(), 1);
+  return clearUnusedBits();
+}
+
+/// add - This function adds the integer array x to the integer array Y and
+/// places the result in dest.
+/// @returns the carry out from the addition
+/// @brief General addition of 64-bit integer arrays
+static bool add(uint64_t *dest, const uint64_t *x, const uint64_t *y,
+                unsigned len) {
+  bool carry = false;
+  for (unsigned i = 0; i< len; ++i) {
+    uint64_t limit = std::min(x[i],y[i]); // must come first in case dest == x
+    dest[i] = x[i] + y[i] + carry;
+    carry = dest[i] < limit || (carry && dest[i] == limit);
+  }
+  return carry;
+}
+
+/// Adds the RHS APint to this APInt.
+/// @returns this, after addition of RHS.
+/// @brief Addition assignment operator.
+APInt& APInt::operator+=(const APInt& RHS) {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord())
+    VAL += RHS.VAL;
+  else {
+    add(pVal, pVal, RHS.pVal, getNumWords());
+  }
+  return clearUnusedBits();
+}
+
+/// Subtracts the integer array y from the integer array x
+/// @returns returns the borrow out.
+/// @brief Generalized subtraction of 64-bit integer arrays.
+static bool sub(uint64_t *dest, const uint64_t *x, const uint64_t *y,
+                unsigned len) {
+  bool borrow = false;
+  for (unsigned i = 0; i < len; ++i) {
+    uint64_t x_tmp = borrow ? x[i] - 1 : x[i];
+    borrow = y[i] > x_tmp || (borrow && x[i] == 0);
+    dest[i] = x_tmp - y[i];
+  }
+  return borrow;
+}
+
+/// Subtracts the RHS APInt from this APInt
+/// @returns this, after subtraction
+/// @brief Subtraction assignment operator.
+APInt& APInt::operator-=(const APInt& RHS) {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord())
+    VAL -= RHS.VAL;
+  else
+    sub(pVal, pVal, RHS.pVal, getNumWords());
+  return clearUnusedBits();
+}
+
+/// Multiplies an integer array, x, by a uint64_t integer and places the result
+/// into dest.
+/// @returns the carry out of the multiplication.
+/// @brief Multiply a multi-digit APInt by a single digit (64-bit) integer.
+static uint64_t mul_1(uint64_t dest[], uint64_t x[], unsigned len, uint64_t y) {
+  // Split y into high 32-bit part (hy)  and low 32-bit part (ly)
+  uint64_t ly = y & 0xffffffffULL, hy = y >> 32;
+  uint64_t carry = 0;
+
+  // For each digit of x.
+  for (unsigned i = 0; i < len; ++i) {
+    // Split x into high and low words
+    uint64_t lx = x[i] & 0xffffffffULL;
+    uint64_t hx = x[i] >> 32;
+    // hasCarry - A flag to indicate if there is a carry to the next digit.
+    // hasCarry == 0, no carry
+    // hasCarry == 1, has carry
+    // hasCarry == 2, no carry and the calculation result == 0.
+    uint8_t hasCarry = 0;
+    dest[i] = carry + lx * ly;
+    // Determine if the add above introduces carry.
+    hasCarry = (dest[i] < carry) ? 1 : 0;
+    carry = hx * ly + (dest[i] >> 32) + (hasCarry ? (1ULL << 32) : 0);
+    // The upper limit of carry can be (2^32 - 1)(2^32 - 1) +
+    // (2^32 - 1) + 2^32 = 2^64.
+    hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
+
+    carry += (lx * hy) & 0xffffffffULL;
+    dest[i] = (carry << 32) | (dest[i] & 0xffffffffULL);
+    carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0) +
+            (carry >> 32) + ((lx * hy) >> 32) + hx * hy;
+  }
+  return carry;
+}
+
+/// Multiplies integer array x by integer array y and stores the result into
+/// the integer array dest. Note that dest's size must be >= xlen + ylen.
+/// @brief Generalized multiplicate of integer arrays.
+static void mul(uint64_t dest[], uint64_t x[], unsigned xlen, uint64_t y[],
+                unsigned ylen) {
+  dest[xlen] = mul_1(dest, x, xlen, y[0]);
+  for (unsigned i = 1; i < ylen; ++i) {
+    uint64_t ly = y[i] & 0xffffffffULL, hy = y[i] >> 32;
+    uint64_t carry = 0, lx = 0, hx = 0;
+    for (unsigned j = 0; j < xlen; ++j) {
+      lx = x[j] & 0xffffffffULL;
+      hx = x[j] >> 32;
+      // hasCarry - A flag to indicate if has carry.
+      // hasCarry == 0, no carry
+      // hasCarry == 1, has carry
+      // hasCarry == 2, no carry and the calculation result == 0.
+      uint8_t hasCarry = 0;
+      uint64_t resul = carry + lx * ly;
+      hasCarry = (resul < carry) ? 1 : 0;
+      carry = (hasCarry ? (1ULL << 32) : 0) + hx * ly + (resul >> 32);
+      hasCarry = (!carry && hasCarry) ? 1 : (!carry ? 2 : 0);
+
+      carry += (lx * hy) & 0xffffffffULL;
+      resul = (carry << 32) | (resul & 0xffffffffULL);
+      dest[i+j] += resul;
+      carry = (((!carry && hasCarry != 2) || hasCarry == 1) ? (1ULL << 32) : 0)+
+              (carry >> 32) + (dest[i+j] < resul ? 1 : 0) +
+              ((lx * hy) >> 32) + hx * hy;
+    }
+    dest[i+xlen] = carry;
+  }
+}
+
+APInt& APInt::operator*=(const APInt& RHS) {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord()) {
+    VAL *= RHS.VAL;
+    clearUnusedBits();
+    return *this;
+  }
+
+  // Get some bit facts about LHS and check for zero
+  unsigned lhsBits = getActiveBits();
+  unsigned lhsWords = !lhsBits ? 0 : whichWord(lhsBits - 1) + 1;
+  if (!lhsWords)
+    // 0 * X ===> 0
+    return *this;
+
+  // Get some bit facts about RHS and check for zero
+  unsigned rhsBits = RHS.getActiveBits();
+  unsigned rhsWords = !rhsBits ? 0 : whichWord(rhsBits - 1) + 1;
+  if (!rhsWords) {
+    // X * 0 ===> 0
+    clearAllBits();
+    return *this;
+  }
+
+  // Allocate space for the result
+  unsigned destWords = rhsWords + lhsWords;
+  uint64_t *dest = getMemory(destWords);
+
+  // Perform the long multiply
+  mul(dest, pVal, lhsWords, RHS.pVal, rhsWords);
+
+  // Copy result back into *this
+  clearAllBits();
+  unsigned wordsToCopy = destWords >= getNumWords() ? getNumWords() : destWords;
+  memcpy(pVal, dest, wordsToCopy * APINT_WORD_SIZE);
+
+  // delete dest array and return
+  delete[] dest;
+  return *this;
+}
+
+APInt& APInt::operator&=(const APInt& RHS) {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord()) {
+    VAL &= RHS.VAL;
+    return *this;
+  }
+  unsigned numWords = getNumWords();
+  for (unsigned i = 0; i < numWords; ++i)
+    pVal[i] &= RHS.pVal[i];
+  return *this;
+}
+
+APInt& APInt::operator|=(const APInt& RHS) {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord()) {
+    VAL |= RHS.VAL;
+    return *this;
+  }
+  unsigned numWords = getNumWords();
+  for (unsigned i = 0; i < numWords; ++i)
+    pVal[i] |= RHS.pVal[i];
+  return *this;
+}
+
+APInt& APInt::operator^=(const APInt& RHS) {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord()) {
+    VAL ^= RHS.VAL;
+    this->clearUnusedBits();
+    return *this;
+  }
+  unsigned numWords = getNumWords();
+  for (unsigned i = 0; i < numWords; ++i)
+    pVal[i] ^= RHS.pVal[i];
+  return clearUnusedBits();
+}
+
+APInt APInt::AndSlowCase(const APInt& RHS) const {
+  unsigned numWords = getNumWords();
+  uint64_t* val = getMemory(numWords);
+  for (unsigned i = 0; i < numWords; ++i)
+    val[i] = pVal[i] & RHS.pVal[i];
+  return APInt(val, getBitWidth());
+}
+
+APInt APInt::OrSlowCase(const APInt& RHS) const {
+  unsigned numWords = getNumWords();
+  uint64_t *val = getMemory(numWords);
+  for (unsigned i = 0; i < numWords; ++i)
+    val[i] = pVal[i] | RHS.pVal[i];
+  return APInt(val, getBitWidth());
+}
+
+APInt APInt::XorSlowCase(const APInt& RHS) const {
+  unsigned numWords = getNumWords();
+  uint64_t *val = getMemory(numWords);
+  for (unsigned i = 0; i < numWords; ++i)
+    val[i] = pVal[i] ^ RHS.pVal[i];
+
+  // 0^0==1 so clear the high bits in case they got set.
+  return APInt(val, getBitWidth()).clearUnusedBits();
+}
+
+bool APInt::operator !() const {
+  if (isSingleWord())
+    return !VAL;
+
+  for (unsigned i = 0; i < getNumWords(); ++i)
+    if (pVal[i])
+      return false;
+  return true;
+}
+
+APInt APInt::operator*(const APInt& RHS) const {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord())
+    return APInt(BitWidth, VAL * RHS.VAL);
+  APInt Result(*this);
+  Result *= RHS;
+  return Result.clearUnusedBits();
+}
+
+APInt APInt::operator+(const APInt& RHS) const {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord())
+    return APInt(BitWidth, VAL + RHS.VAL);
+  APInt Result(BitWidth, 0);
+  add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
+  return Result.clearUnusedBits();
+}
+
+APInt APInt::operator-(const APInt& RHS) const {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord())
+    return APInt(BitWidth, VAL - RHS.VAL);
+  APInt Result(BitWidth, 0);
+  sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
+  return Result.clearUnusedBits();
+}
+
+bool APInt::operator[](unsigned bitPosition) const {
+  assert(bitPosition < getBitWidth() && "Bit position out of bounds!");
+  return (maskBit(bitPosition) &
+          (isSingleWord() ?  VAL : pVal[whichWord(bitPosition)])) != 0;
+}
+
+bool APInt::EqualSlowCase(const APInt& RHS) const {
+  // Get some facts about the number of bits used in the two operands.
+  unsigned n1 = getActiveBits();
+  unsigned n2 = RHS.getActiveBits();
+
+  // If the number of bits isn't the same, they aren't equal
+  if (n1 != n2)
+    return false;
+
+  // If the number of bits fits in a word, we only need to compare the low word.
+  if (n1 <= APINT_BITS_PER_WORD)
+    return pVal[0] == RHS.pVal[0];
+
+  // Otherwise, compare everything
+  for (int i = whichWord(n1 - 1); i >= 0; --i)
+    if (pVal[i] != RHS.pVal[i])
+      return false;
+  return true;
+}
+
+bool APInt::EqualSlowCase(uint64_t Val) const {
+  unsigned n = getActiveBits();
+  if (n <= APINT_BITS_PER_WORD)
+    return pVal[0] == Val;
+  else
+    return false;
+}
+
+bool APInt::ult(const APInt& RHS) const {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
+  if (isSingleWord())
+    return VAL < RHS.VAL;
+
+  // Get active bit length of both operands
+  unsigned n1 = getActiveBits();
+  unsigned n2 = RHS.getActiveBits();
+
+  // If magnitude of LHS is less than RHS, return true.
+  if (n1 < n2)
+    return true;
+
+  // If magnitude of RHS is greather than LHS, return false.
+  if (n2 < n1)
+    return false;
+
+  // If they bot fit in a word, just compare the low order word
+  if (n1 <= APINT_BITS_PER_WORD && n2 <= APINT_BITS_PER_WORD)
+    return pVal[0] < RHS.pVal[0];
+
+  // Otherwise, compare all words
+  unsigned topWord = whichWord(std::max(n1,n2)-1);
+  for (int i = topWord; i >= 0; --i) {
+    if (pVal[i] > RHS.pVal[i])
+      return false;
+    if (pVal[i] < RHS.pVal[i])
+      return true;
+  }
+  return false;
+}
+
+bool APInt::slt(const APInt& RHS) const {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be same for comparison");
+  if (isSingleWord()) {
+    int64_t lhsSext = (int64_t(VAL) << (64-BitWidth)) >> (64-BitWidth);
+    int64_t rhsSext = (int64_t(RHS.VAL) << (64-BitWidth)) >> (64-BitWidth);
+    return lhsSext < rhsSext;
+  }
+
+  APInt lhs(*this);
+  APInt rhs(RHS);
+  bool lhsNeg = isNegative();
+  bool rhsNeg = rhs.isNegative();
+  if (lhsNeg) {
+    // Sign bit is set so perform two's complement to make it positive
+    lhs.flipAllBits();
+    lhs++;
+  }
+  if (rhsNeg) {
+    // Sign bit is set so perform two's complement to make it positive
+    rhs.flipAllBits();
+    rhs++;
+  }
+
+  // Now we have unsigned values to compare so do the comparison if necessary
+  // based on the negativeness of the values.
+  if (lhsNeg)
+    if (rhsNeg)
+      return lhs.ugt(rhs);
+    else
+      return true;
+  else if (rhsNeg)
+    return false;
+  else
+    return lhs.ult(rhs);
+}
+
+void APInt::setBit(unsigned bitPosition) {
+  if (isSingleWord())
+    VAL |= maskBit(bitPosition);
+  else
+    pVal[whichWord(bitPosition)] |= maskBit(bitPosition);
+}
+
+/// Set the given bit to 0 whose position is given as "bitPosition".
+/// @brief Set a given bit to 0.
+void APInt::clearBit(unsigned bitPosition) {
+  if (isSingleWord())
+    VAL &= ~maskBit(bitPosition);
+  else
+    pVal[whichWord(bitPosition)] &= ~maskBit(bitPosition);
+}
+
+/// @brief Toggle every bit to its opposite value.
+
+/// Toggle a given bit to its opposite value whose position is given
+/// as "bitPosition".
+/// @brief Toggles a given bit to its opposite value.
+void APInt::flipBit(unsigned bitPosition) {
+  assert(bitPosition < BitWidth && "Out of the bit-width range!");
+  if ((*this)[bitPosition]) clearBit(bitPosition);
+  else setBit(bitPosition);
+}
+
+unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
+  assert(!str.empty() && "Invalid string length");
+  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
+         "Radix should be 2, 8, 10, or 16!");
+
+  size_t slen = str.size();
+
+  // Each computation below needs to know if it's negative.
+  StringRef::iterator p = str.begin();
+  unsigned isNegative = *p == '-';
+  if (*p == '-' || *p == '+') {
+    p++;
+    slen--;
+    assert(slen && "String is only a sign, needs a value.");
+  }
+
+  // For radixes of power-of-two values, the bits required is accurately and
+  // easily computed
+  if (radix == 2)
+    return slen + isNegative;
+  if (radix == 8)
+    return slen * 3 + isNegative;
+  if (radix == 16)
+    return slen * 4 + isNegative;
+
+  // This is grossly inefficient but accurate. We could probably do something
+  // with a computation of roughly slen*64/20 and then adjust by the value of
+  // the first few digits. But, I'm not sure how accurate that could be.
+
+  // Compute a sufficient number of bits that is always large enough but might
+  // be too large. This avoids the assertion in the constructor. This
+  // calculation doesn't work appropriately for the numbers 0-9, so just use 4
+  // bits in that case.
+  unsigned sufficient = slen == 1 ? 4 : slen * 64/18;
+
+  // Convert to the actual binary value.
+  APInt tmp(sufficient, StringRef(p, slen), radix);
+
+  // Compute how many bits are required. If the log is infinite, assume we need
+  // just bit.
+  unsigned log = tmp.logBase2();
+  if (log == (unsigned)-1) {
+    return isNegative + 1;
+  } else {
+    return isNegative + log + 1;
+  }
+}
+
+// From http://www.burtleburtle.net, byBob Jenkins.
+// When targeting x86, both GCC and LLVM seem to recognize this as a
+// rotate instruction.
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+// From http://www.burtleburtle.net, by Bob Jenkins.
+#define mix(a,b,c) \
+  { \
+    a -= c;  a ^= rot(c, 4);  c += b; \
+    b -= a;  b ^= rot(a, 6);  a += c; \
+    c -= b;  c ^= rot(b, 8);  b += a; \
+    a -= c;  a ^= rot(c,16);  c += b; \
+    b -= a;  b ^= rot(a,19);  a += c; \
+    c -= b;  c ^= rot(b, 4);  b += a; \
+  }
+
+// From http://www.burtleburtle.net, by Bob Jenkins.
+#define final(a,b,c) \
+  { \
+    c ^= b; c -= rot(b,14); \
+    a ^= c; a -= rot(c,11); \
+    b ^= a; b -= rot(a,25); \
+    c ^= b; c -= rot(b,16); \
+    a ^= c; a -= rot(c,4);  \
+    b ^= a; b -= rot(a,14); \
+    c ^= b; c -= rot(b,24); \
+  }
+
+// hashword() was adapted from http://www.burtleburtle.net, by Bob
+// Jenkins.  k is a pointer to an array of uint32_t values; length is
+// the length of the key, in 32-bit chunks.  This version only handles
+// keys that are a multiple of 32 bits in size.
+static inline uint32_t hashword(const uint64_t *k64, size_t length)
+{
+  const uint32_t *k = reinterpret_cast<const uint32_t *>(k64);
+  uint32_t a,b,c;
+
+  /* Set up the internal state */
+  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
+
+  /*------------------------------------------------- handle most of the key */
+  while (length > 3) {
+    a += k[0];
+    b += k[1];
+    c += k[2];
+    mix(a,b,c);
+    length -= 3;
+    k += 3;
+  }
+
+  /*------------------------------------------- handle the last 3 uint32_t's */
+  switch (length) {                  /* all the case statements fall through */
+  case 3 : c+=k[2];
+  case 2 : b+=k[1];
+  case 1 : a+=k[0];
+    final(a,b,c);
+    case 0:     /* case 0: nothing left to add */
+      break;
+    }
+  /*------------------------------------------------------ report the result */
+  return c;
+}
+
+// hashword8() was adapted from http://www.burtleburtle.net, by Bob
+// Jenkins.  This computes a 32-bit hash from one 64-bit word.  When
+// targeting x86 (32 or 64 bit), both LLVM and GCC compile this
+// function into about 35 instructions when inlined.
+static inline uint32_t hashword8(const uint64_t k64)
+{
+  uint32_t a,b,c;
+  a = b = c = 0xdeadbeef + 4;
+  b += k64 >> 32;
+  a += k64 & 0xffffffff;
+  final(a,b,c);
+  return c;
+}
+#undef final
+#undef mix
+#undef rot
+
+uint64_t APInt::getHashValue() const {
+  uint64_t hash;
+  if (isSingleWord())
+    hash = hashword8(VAL);
+  else
+    hash = hashword(pVal, getNumWords()*2);
+  return hash;
+}
+
+/// HiBits - This function returns the high "numBits" bits of this APInt.
+APInt APInt::getHiBits(unsigned numBits) const {
+  return APIntOps::lshr(*this, BitWidth - numBits);
+}
+
+/// LoBits - This function returns the low "numBits" bits of this APInt.
+APInt APInt::getLoBits(unsigned numBits) const {
+  return APIntOps::lshr(APIntOps::shl(*this, BitWidth - numBits),
+                        BitWidth - numBits);
+}
+
+unsigned APInt::countLeadingZerosSlowCase() const {
+  // Treat the most significand word differently because it might have
+  // meaningless bits set beyond the precision.
+  unsigned BitsInMSW = BitWidth % APINT_BITS_PER_WORD;
+  integerPart MSWMask;
+  if (BitsInMSW) MSWMask = (integerPart(1) << BitsInMSW) - 1;
+  else {
+    MSWMask = ~integerPart(0);
+    BitsInMSW = APINT_BITS_PER_WORD;
+  }
+
+  unsigned i = getNumWords();
+  integerPart MSW = pVal[i-1] & MSWMask;
+  if (MSW)
+    return CountLeadingZeros_64(MSW) - (APINT_BITS_PER_WORD - BitsInMSW);
+
+  unsigned Count = BitsInMSW;
+  for (--i; i > 0u; --i) {
+    if (pVal[i-1] == 0)
+      Count += APINT_BITS_PER_WORD;
+    else {
+      Count += CountLeadingZeros_64(pVal[i-1]);
+      break;
+    }
+  }
+  return Count;
+}
+
+static unsigned countLeadingOnes_64(uint64_t V, unsigned skip) {
+  unsigned Count = 0;
+  if (skip)
+    V <<= skip;
+  while (V && (V & (1ULL << 63))) {
+    Count++;
+    V <<= 1;
+  }
+  return Count;
+}
+
+unsigned APInt::countLeadingOnes() const {
+  if (isSingleWord())
+    return countLeadingOnes_64(VAL, APINT_BITS_PER_WORD - BitWidth);
+
+  unsigned highWordBits = BitWidth % APINT_BITS_PER_WORD;
+  unsigned shift;
+  if (!highWordBits) {
+    highWordBits = APINT_BITS_PER_WORD;
+    shift = 0;
+  } else {
+    shift = APINT_BITS_PER_WORD - highWordBits;
+  }
+  int i = getNumWords() - 1;
+  unsigned Count = countLeadingOnes_64(pVal[i], shift);
+  if (Count == highWordBits) {
+    for (i--; i >= 0; --i) {
+      if (pVal[i] == -1ULL)
+        Count += APINT_BITS_PER_WORD;
+      else {
+        Count += countLeadingOnes_64(pVal[i], 0);
+        break;
+      }
+    }
+  }
+  return Count;
+}
+
+unsigned APInt::countTrailingZeros() const {
+  if (isSingleWord())
+    return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth);
+  unsigned Count = 0;
+  unsigned i = 0;
+  for (; i < getNumWords() && pVal[i] == 0; ++i)
+    Count += APINT_BITS_PER_WORD;
+  if (i < getNumWords())
+    Count += CountTrailingZeros_64(pVal[i]);
+  return std::min(Count, BitWidth);
+}
+
+unsigned APInt::countTrailingOnesSlowCase() const {
+  unsigned Count = 0;
+  unsigned i = 0;
+  for (; i < getNumWords() && pVal[i] == -1ULL; ++i)
+    Count += APINT_BITS_PER_WORD;
+  if (i < getNumWords())
+    Count += CountTrailingOnes_64(pVal[i]);
+  return std::min(Count, BitWidth);
+}
+
+unsigned APInt::countPopulationSlowCase() const {
+  unsigned Count = 0;
+  for (unsigned i = 0; i < getNumWords(); ++i)
+    Count += CountPopulation_64(pVal[i]);
+  return Count;
+}
+
+APInt APInt::byteSwap() const {
+  assert(BitWidth >= 16 && BitWidth % 16 == 0 && "Cannot byteswap!");
+  if (BitWidth == 16)
+    return APInt(BitWidth, ByteSwap_16(uint16_t(VAL)));
+  else if (BitWidth == 32)
+    return APInt(BitWidth, ByteSwap_32(unsigned(VAL)));
+  else if (BitWidth == 48) {
+    unsigned Tmp1 = unsigned(VAL >> 16);
+    Tmp1 = ByteSwap_32(Tmp1);
+    uint16_t Tmp2 = uint16_t(VAL);
+    Tmp2 = ByteSwap_16(Tmp2);
+    return APInt(BitWidth, (uint64_t(Tmp2) << 32) | Tmp1);
+  } else if (BitWidth == 64)
+    return APInt(BitWidth, ByteSwap_64(VAL));
+  else {
+    APInt Result(BitWidth, 0);
+    char *pByte = (char*)Result.pVal;
+    for (unsigned i = 0; i < BitWidth / APINT_WORD_SIZE / 2; ++i) {
+      char Tmp = pByte[i];
+      pByte[i] = pByte[BitWidth / APINT_WORD_SIZE - 1 - i];
+      pByte[BitWidth / APINT_WORD_SIZE - i - 1] = Tmp;
+    }
+    return Result;
+  }
+}
+
+APInt llvm::APIntOps::GreatestCommonDivisor(const APInt& API1,
+                                            const APInt& API2) {
+  APInt A = API1, B = API2;
+  while (!!B) {
+    APInt T = B;
+    B = APIntOps::urem(A, B);
+    A = T;
+  }
+  return A;
+}
+
+APInt llvm::APIntOps::RoundDoubleToAPInt(double Double, unsigned width) {
+  union {
+    double D;
+    uint64_t I;
+  } T;
+  T.D = Double;
+
+  // Get the sign bit from the highest order bit
+  bool isNeg = T.I >> 63;
+
+  // Get the 11-bit exponent and adjust for the 1023 bit bias
+  int64_t exp = ((T.I >> 52) & 0x7ff) - 1023;
+
+  // If the exponent is negative, the value is < 0 so just return 0.
+  if (exp < 0)
+    return APInt(width, 0u);
+
+  // Extract the mantissa by clearing the top 12 bits (sign + exponent).
+  uint64_t mantissa = (T.I & (~0ULL >> 12)) | 1ULL << 52;
+
+  // If the exponent doesn't shift all bits out of the mantissa
+  if (exp < 52)
+    return isNeg ? -APInt(width, mantissa >> (52 - exp)) :
+                    APInt(width, mantissa >> (52 - exp));
+
+  // If the client didn't provide enough bits for us to shift the mantissa into
+  // then the result is undefined, just return 0
+  if (width <= exp - 52)
+    return APInt(width, 0);
+
+  // Otherwise, we have to shift the mantissa bits up to the right location
+  APInt Tmp(width, mantissa);
+  Tmp = Tmp.shl((unsigned)exp - 52);
+  return isNeg ? -Tmp : Tmp;
+}
+
+/// RoundToDouble - This function converts this APInt to a double.
+/// The layout for double is as following (IEEE Standard 754):
+///  --------------------------------------
+/// |  Sign    Exponent    Fraction    Bias |
+/// |-------------------------------------- |
+/// |  1[63]   11[62-52]   52[51-00]   1023 |
+///  --------------------------------------
+double APInt::roundToDouble(bool isSigned) const {
+
+  // Handle the simple case where the value is contained in one uint64_t.
+  // It is wrong to optimize getWord(0) to VAL; there might be more than one word.
+  if (isSingleWord() || getActiveBits() <= APINT_BITS_PER_WORD) {
+    if (isSigned) {
+      int64_t sext = (int64_t(getWord(0)) << (64-BitWidth)) >> (64-BitWidth);
+      return double(sext);
+    } else
+      return double(getWord(0));
+  }
+
+  // Determine if the value is negative.
+  bool isNeg = isSigned ? (*this)[BitWidth-1] : false;
+
+  // Construct the absolute value if we're negative.
+  APInt Tmp(isNeg ? -(*this) : (*this));
+
+  // Figure out how many bits we're using.
+  unsigned n = Tmp.getActiveBits();
+
+  // The exponent (without bias normalization) is just the number of bits
+  // we are using. Note that the sign bit is gone since we constructed the
+  // absolute value.
+  uint64_t exp = n;
+
+  // Return infinity for exponent overflow
+  if (exp > 1023) {
+    if (!isSigned || !isNeg)
+      return std::numeric_limits<double>::infinity();
+    else
+      return -std::numeric_limits<double>::infinity();
+  }
+  exp += 1023; // Increment for 1023 bias
+
+  // Number of bits in mantissa is 52. To obtain the mantissa value, we must
+  // extract the high 52 bits from the correct words in pVal.
+  uint64_t mantissa;
+  unsigned hiWord = whichWord(n-1);
+  if (hiWord == 0) {
+    mantissa = Tmp.pVal[0];
+    if (n > 52)
+      mantissa >>= n - 52; // shift down, we want the top 52 bits.
+  } else {
+    assert(hiWord > 0 && "huh?");
+    uint64_t hibits = Tmp.pVal[hiWord] << (52 - n % APINT_BITS_PER_WORD);
+    uint64_t lobits = Tmp.pVal[hiWord-1] >> (11 + n % APINT_BITS_PER_WORD);
+    mantissa = hibits | lobits;
+  }
+
+  // The leading bit of mantissa is implicit, so get rid of it.
+  uint64_t sign = isNeg ? (1ULL << (APINT_BITS_PER_WORD - 1)) : 0;
+  union {
+    double D;
+    uint64_t I;
+  } T;
+  T.I = sign | (exp << 52) | mantissa;
+  return T.D;
+}
+
+// Truncate to new width.
+APInt APInt::trunc(unsigned width) const {
+  assert(width < BitWidth && "Invalid APInt Truncate request");
+  assert(width && "Can't truncate to 0 bits");
+
+  if (width <= APINT_BITS_PER_WORD)
+    return APInt(width, getRawData()[0]);
+
+  APInt Result(getMemory(getNumWords(width)), width);
+
+  // Copy full words.
+  unsigned i;
+  for (i = 0; i != width / APINT_BITS_PER_WORD; i++)
+    Result.pVal[i] = pVal[i];
+
+  // Truncate and copy any partial word.
+  unsigned bits = (0 - width) % APINT_BITS_PER_WORD;
+  if (bits != 0)
+    Result.pVal[i] = pVal[i] << bits >> bits;
+
+  return Result;
+}
+
+// Sign extend to a new width.
+APInt APInt::sext(unsigned width) const {
+  assert(width > BitWidth && "Invalid APInt SignExtend request");
+
+  if (width <= APINT_BITS_PER_WORD) {
+    uint64_t val = VAL << (APINT_BITS_PER_WORD - BitWidth);
+    val = (int64_t)val >> (width - BitWidth);
+    return APInt(width, val >> (APINT_BITS_PER_WORD - width));
+  }
+
+  APInt Result(getMemory(getNumWords(width)), width);
+
+  // Copy full words.
+  unsigned i;
+  uint64_t word = 0;
+  for (i = 0; i != BitWidth / APINT_BITS_PER_WORD; i++) {
+    word = getRawData()[i];
+    Result.pVal[i] = word;
+  }
+
+  // Read and sign-extend any partial word.
+  unsigned bits = (0 - BitWidth) % APINT_BITS_PER_WORD;
+  if (bits != 0)
+    word = (int64_t)getRawData()[i] << bits >> bits;
+  else
+    word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
+
+  // Write remaining full words.
+  for (; i != width / APINT_BITS_PER_WORD; i++) {
+    Result.pVal[i] = word;
+    word = (int64_t)word >> (APINT_BITS_PER_WORD - 1);
+  }
+
+  // Write any partial word.
+  bits = (0 - width) % APINT_BITS_PER_WORD;
+  if (bits != 0)
+    Result.pVal[i] = word << bits >> bits;
+
+  return Result;
+}
+
+//  Zero extend to a new width.
+APInt APInt::zext(unsigned width) const {
+  assert(width > BitWidth && "Invalid APInt ZeroExtend request");
+
+  if (width <= APINT_BITS_PER_WORD)
+    return APInt(width, VAL);
+
+  APInt Result(getMemory(getNumWords(width)), width);
+
+  // Copy words.
+  unsigned i;
+  for (i = 0; i != getNumWords(); i++)
+    Result.pVal[i] = getRawData()[i];
+
+  // Zero remaining words.
+  memset(&Result.pVal[i], 0, (Result.getNumWords() - i) * APINT_WORD_SIZE);
+
+  return Result;
+}
+
+APInt APInt::zextOrTrunc(unsigned width) const {
+  if (BitWidth < width)
+    return zext(width);
+  if (BitWidth > width)
+    return trunc(width);
+  return *this;
+}
+
+APInt APInt::sextOrTrunc(unsigned width) const {
+  if (BitWidth < width)
+    return sext(width);
+  if (BitWidth > width)
+    return trunc(width);
+  return *this;
+}
+
+/// Arithmetic right-shift this APInt by shiftAmt.
+/// @brief Arithmetic right-shift function.
+APInt APInt::ashr(const APInt &shiftAmt) const {
+  return ashr((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+/// Arithmetic right-shift this APInt by shiftAmt.
+/// @brief Arithmetic right-shift function.
+APInt APInt::ashr(unsigned shiftAmt) const {
+  assert(shiftAmt <= BitWidth && "Invalid shift amount");
+  // Handle a degenerate case
+  if (shiftAmt == 0)
+    return *this;
+
+  // Handle single word shifts with built-in ashr
+  if (isSingleWord()) {
+    if (shiftAmt == BitWidth)
+      return APInt(BitWidth, 0); // undefined
+    else {
+      unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
+      return APInt(BitWidth,
+        (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
+    }
+  }
+
+  // If all the bits were shifted out, the result is, technically, undefined.
+  // We return -1 if it was negative, 0 otherwise. We check this early to avoid
+  // issues in the algorithm below.
+  if (shiftAmt == BitWidth) {
+    if (isNegative())
+      return APInt(BitWidth, -1ULL, true);
+    else
+      return APInt(BitWidth, 0);
+  }
+
+  // Create some space for the result.
+  uint64_t * val = new uint64_t[getNumWords()];
+
+  // Compute some values needed by the following shift algorithms
+  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD; // bits to shift per word
+  unsigned offset = shiftAmt / APINT_BITS_PER_WORD; // word offset for shift
+  unsigned breakWord = getNumWords() - 1 - offset; // last word affected
+  unsigned bitsInWord = whichBit(BitWidth); // how many bits in last word?
+  if (bitsInWord == 0)
+    bitsInWord = APINT_BITS_PER_WORD;
+
+  // If we are shifting whole words, just move whole words
+  if (wordShift == 0) {
+    // Move the words containing significant bits
+    for (unsigned i = 0; i <= breakWord; ++i)
+      val[i] = pVal[i+offset]; // move whole word
+
+    // Adjust the top significant word for sign bit fill, if negative
+    if (isNegative())
+      if (bitsInWord < APINT_BITS_PER_WORD)
+        val[breakWord] |= ~0ULL << bitsInWord; // set high bits
+  } else {
+    // Shift the low order words
+    for (unsigned i = 0; i < breakWord; ++i) {
+      // This combines the shifted corresponding word with the low bits from
+      // the next word (shifted into this word's high bits).
+      val[i] = (pVal[i+offset] >> wordShift) |
+               (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
+    }
+
+    // Shift the break word. In this case there are no bits from the next word
+    // to include in this word.
+    val[breakWord] = pVal[breakWord+offset] >> wordShift;
+
+    // Deal with sign extenstion in the break word, and possibly the word before
+    // it.
+    if (isNegative()) {
+      if (wordShift > bitsInWord) {
+        if (breakWord > 0)
+          val[breakWord-1] |=
+            ~0ULL << (APINT_BITS_PER_WORD - (wordShift - bitsInWord));
+        val[breakWord] |= ~0ULL;
+      } else
+        val[breakWord] |= (~0ULL << (bitsInWord - wordShift));
+    }
+  }
+
+  // Remaining words are 0 or -1, just assign them.
+  uint64_t fillValue = (isNegative() ? -1ULL : 0);
+  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
+    val[i] = fillValue;
+  return APInt(val, BitWidth).clearUnusedBits();
+}
+
+/// Logical right-shift this APInt by shiftAmt.
+/// @brief Logical right-shift function.
+APInt APInt::lshr(const APInt &shiftAmt) const {
+  return lshr((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+/// Logical right-shift this APInt by shiftAmt.
+/// @brief Logical right-shift function.
+APInt APInt::lshr(unsigned shiftAmt) const {
+  if (isSingleWord()) {
+    if (shiftAmt == BitWidth)
+      return APInt(BitWidth, 0);
+    else
+      return APInt(BitWidth, this->VAL >> shiftAmt);
+  }
+
+  // If all the bits were shifted out, the result is 0. This avoids issues
+  // with shifting by the size of the integer type, which produces undefined
+  // results. We define these "undefined results" to always be 0.
+  if (shiftAmt == BitWidth)
+    return APInt(BitWidth, 0);
+
+  // If none of the bits are shifted out, the result is *this. This avoids
+  // issues with shifting by the size of the integer type, which produces
+  // undefined results in the code below. This is also an optimization.
+  if (shiftAmt == 0)
+    return *this;
+
+  // Create some space for the result.
+  uint64_t * val = new uint64_t[getNumWords()];
+
+  // If we are shifting less than a word, compute the shift with a simple carry
+  if (shiftAmt < APINT_BITS_PER_WORD) {
+    uint64_t carry = 0;
+    for (int i = getNumWords()-1; i >= 0; --i) {
+      val[i] = (pVal[i] >> shiftAmt) | carry;
+      carry = pVal[i] << (APINT_BITS_PER_WORD - shiftAmt);
+    }
+    return APInt(val, BitWidth).clearUnusedBits();
+  }
+
+  // Compute some values needed by the remaining shift algorithms
+  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
+  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
+
+  // If we are shifting whole words, just move whole words
+  if (wordShift == 0) {
+    for (unsigned i = 0; i < getNumWords() - offset; ++i)
+      val[i] = pVal[i+offset];
+    for (unsigned i = getNumWords()-offset; i < getNumWords(); i++)
+      val[i] = 0;
+    return APInt(val,BitWidth).clearUnusedBits();
+  }
+
+  // Shift the low order words
+  unsigned breakWord = getNumWords() - offset -1;
+  for (unsigned i = 0; i < breakWord; ++i)
+    val[i] = (pVal[i+offset] >> wordShift) |
+             (pVal[i+offset+1] << (APINT_BITS_PER_WORD - wordShift));
+  // Shift the break word.
+  val[breakWord] = pVal[breakWord+offset] >> wordShift;
+
+  // Remaining words are 0
+  for (unsigned i = breakWord+1; i < getNumWords(); ++i)
+    val[i] = 0;
+  return APInt(val, BitWidth).clearUnusedBits();
+}
+
+/// Left-shift this APInt by shiftAmt.
+/// @brief Left-shift function.
+APInt APInt::shl(const APInt &shiftAmt) const {
+  // It's undefined behavior in C to shift by BitWidth or greater.
+  return shl((unsigned)shiftAmt.getLimitedValue(BitWidth));
+}
+
+APInt APInt::shlSlowCase(unsigned shiftAmt) const {
+  // If all the bits were shifted out, the result is 0. This avoids issues
+  // with shifting by the size of the integer type, which produces undefined
+  // results. We define these "undefined results" to always be 0.
+  if (shiftAmt == BitWidth)
+    return APInt(BitWidth, 0);
+
+  // If none of the bits are shifted out, the result is *this. This avoids a
+  // lshr by the words size in the loop below which can produce incorrect
+  // results. It also avoids the expensive computation below for a common case.
+  if (shiftAmt == 0)
+    return *this;
+
+  // Create some space for the result.
+  uint64_t * val = new uint64_t[getNumWords()];
+
+  // If we are shifting less than a word, do it the easy way
+  if (shiftAmt < APINT_BITS_PER_WORD) {
+    uint64_t carry = 0;
+    for (unsigned i = 0; i < getNumWords(); i++) {
+      val[i] = pVal[i] << shiftAmt | carry;
+      carry = pVal[i] >> (APINT_BITS_PER_WORD - shiftAmt);
+    }
+    return APInt(val, BitWidth).clearUnusedBits();
+  }
+
+  // Compute some values needed by the remaining shift algorithms
+  unsigned wordShift = shiftAmt % APINT_BITS_PER_WORD;
+  unsigned offset = shiftAmt / APINT_BITS_PER_WORD;
+
+  // If we are shifting whole words, just move whole words
+  if (wordShift == 0) {
+    for (unsigned i = 0; i < offset; i++)
+      val[i] = 0;
+    for (unsigned i = offset; i < getNumWords(); i++)
+      val[i] = pVal[i-offset];
+    return APInt(val,BitWidth).clearUnusedBits();
+  }
+
+  // Copy whole words from this to Result.
+  unsigned i = getNumWords() - 1;
+  for (; i > offset; --i)
+    val[i] = pVal[i-offset] << wordShift |
+             pVal[i-offset-1] >> (APINT_BITS_PER_WORD - wordShift);
+  val[offset] = pVal[0] << wordShift;
+  for (i = 0; i < offset; ++i)
+    val[i] = 0;
+  return APInt(val, BitWidth).clearUnusedBits();
+}
+
+APInt APInt::rotl(const APInt &rotateAmt) const {
+  return rotl((unsigned)rotateAmt.getLimitedValue(BitWidth));
+}
+
+APInt APInt::rotl(unsigned rotateAmt) const {
+  if (rotateAmt == 0)
+    return *this;
+  // Don't get too fancy, just use existing shift/or facilities
+  APInt hi(*this);
+  APInt lo(*this);
+  hi.shl(rotateAmt);
+  lo.lshr(BitWidth - rotateAmt);
+  return hi | lo;
+}
+
+APInt APInt::rotr(const APInt &rotateAmt) const {
+  return rotr((unsigned)rotateAmt.getLimitedValue(BitWidth));
+}
+
+APInt APInt::rotr(unsigned rotateAmt) const {
+  if (rotateAmt == 0)
+    return *this;
+  // Don't get too fancy, just use existing shift/or facilities
+  APInt hi(*this);
+  APInt lo(*this);
+  lo.lshr(rotateAmt);
+  hi.shl(BitWidth - rotateAmt);
+  return hi | lo;
+}
+
+// Square Root - this method computes and returns the square root of "this".
+// Three mechanisms are used for computation. For small values (<= 5 bits),
+// a table lookup is done. This gets some performance for common cases. For
+// values using less than 52 bits, the value is converted to double and then
+// the libc sqrt function is called. The result is rounded and then converted
+// back to a uint64_t which is then used to construct the result. Finally,
+// the Babylonian method for computing square roots is used.
+APInt APInt::sqrt() const {
+
+  // Determine the magnitude of the value.
+  unsigned magnitude = getActiveBits();
+
+  // Use a fast table for some small values. This also gets rid of some
+  // rounding errors in libc sqrt for small values.
+  if (magnitude <= 5) {
+    static const uint8_t results[32] = {
+      /*     0 */ 0,
+      /*  1- 2 */ 1, 1,
+      /*  3- 6 */ 2, 2, 2, 2,
+      /*  7-12 */ 3, 3, 3, 3, 3, 3,
+      /* 13-20 */ 4, 4, 4, 4, 4, 4, 4, 4,
+      /* 21-30 */ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+      /*    31 */ 6
+    };
+    return APInt(BitWidth, results[ (isSingleWord() ? VAL : pVal[0]) ]);
+  }
+
+  // If the magnitude of the value fits in less than 52 bits (the precision of
+  // an IEEE double precision floating point value), then we can use the
+  // libc sqrt function which will probably use a hardware sqrt computation.
+  // This should be faster than the algorithm below.
+  if (magnitude < 52) {
+#if HAVE_ROUND
+    return APInt(BitWidth,
+                 uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
+#else
+    return APInt(BitWidth,
+                 uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
+#endif
+  }
+
+  // Okay, all the short cuts are exhausted. We must compute it. The following
+  // is a classical Babylonian method for computing the square root. This code
+  // was adapted to APINt from a wikipedia article on such computations.
+  // See http://www.wikipedia.org/ and go to the page named
+  // Calculate_an_integer_square_root.
+  unsigned nbits = BitWidth, i = 4;
+  APInt testy(BitWidth, 16);
+  APInt x_old(BitWidth, 1);
+  APInt x_new(BitWidth, 0);
+  APInt two(BitWidth, 2);
+
+  // Select a good starting value using binary logarithms.
+  for (;; i += 2, testy = testy.shl(2))
+    if (i >= nbits || this->ule(testy)) {
+      x_old = x_old.shl(i / 2);
+      break;
+    }
+
+  // Use the Babylonian method to arrive at the integer square root:
+  for (;;) {
+    x_new = (this->udiv(x_old) + x_old).udiv(two);
+    if (x_old.ule(x_new))
+      break;
+    x_old = x_new;
+  }
+
+  // Make sure we return the closest approximation
+  // NOTE: The rounding calculation below is correct. It will produce an
+  // off-by-one discrepancy with results from pari/gp. That discrepancy has been
+  // determined to be a rounding issue with pari/gp as it begins to use a
+  // floating point representation after 192 bits. There are no discrepancies
+  // between this algorithm and pari/gp for bit widths < 192 bits.
+  APInt square(x_old * x_old);
+  APInt nextSquare((x_old + 1) * (x_old +1));
+  if (this->ult(square))
+    return x_old;
+  else if (this->ule(nextSquare)) {
+    APInt midpoint((nextSquare - square).udiv(two));
+    APInt offset(*this - square);
+    if (offset.ult(midpoint))
+      return x_old;
+    else
+      return x_old + 1;
+  } else
+    llvm_unreachable("Error in APInt::sqrt computation");
+  return x_old + 1;
+}
+
+/// Computes the multiplicative inverse of this APInt for a given modulo. The
+/// iterative extended Euclidean algorithm is used to solve for this value,
+/// however we simplify it to speed up calculating only the inverse, and take
+/// advantage of div+rem calculations. We also use some tricks to avoid copying
+/// (potentially large) APInts around.
+APInt APInt::multiplicativeInverse(const APInt& modulo) const {
+  assert(ult(modulo) && "This APInt must be smaller than the modulo");
+
+  // Using the properties listed at the following web page (accessed 06/21/08):
+  //   http://www.numbertheory.org/php/euclid.html
+  // (especially the properties numbered 3, 4 and 9) it can be proved that
+  // BitWidth bits suffice for all the computations in the algorithm implemented
+  // below. More precisely, this number of bits suffice if the multiplicative
+  // inverse exists, but may not suffice for the general extended Euclidean
+  // algorithm.
+
+  APInt r[2] = { modulo, *this };
+  APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) };
+  APInt q(BitWidth, 0);
+
+  unsigned i;
+  for (i = 0; r[i^1] != 0; i ^= 1) {
+    // An overview of the math without the confusing bit-flipping:
+    // q = r[i-2] / r[i-1]
+    // r[i] = r[i-2] % r[i-1]
+    // t[i] = t[i-2] - t[i-1] * q
+    udivrem(r[i], r[i^1], q, r[i]);
+    t[i] -= t[i^1] * q;
+  }
+
+  // If this APInt and the modulo are not coprime, there is no multiplicative
+  // inverse, so return 0. We check this by looking at the next-to-last
+  // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean
+  // algorithm.
+  if (r[i] != 1)
+    return APInt(BitWidth, 0);
+
+  // The next-to-last t is the multiplicative inverse.  However, we are
+  // interested in a positive inverse. Calcuate a positive one from a negative
+  // one if necessary. A simple addition of the modulo suffices because
+  // abs(t[i]) is known to be less than *this/2 (see the link above).
+  return t[i].isNegative() ? t[i] + modulo : t[i];
+}
+
+/// Calculate the magic numbers required to implement a signed integer division
+/// by a constant as a sequence of multiplies, adds and shifts.  Requires that
+/// the divisor not be 0, 1, or -1.  Taken from "Hacker's Delight", Henry S.
+/// Warren, Jr., chapter 10.
+APInt::ms APInt::magic() const {
+  const APInt& d = *this;
+  unsigned p;
+  APInt ad, anc, delta, q1, r1, q2, r2, t;
+  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
+  struct ms mag;
+
+  ad = d.abs();
+  t = signedMin + (d.lshr(d.getBitWidth() - 1));
+  anc = t - 1 - t.urem(ad);   // absolute value of nc
+  p = d.getBitWidth() - 1;    // initialize p
+  q1 = signedMin.udiv(anc);   // initialize q1 = 2p/abs(nc)
+  r1 = signedMin - q1*anc;    // initialize r1 = rem(2p,abs(nc))
+  q2 = signedMin.udiv(ad);    // initialize q2 = 2p/abs(d)
+  r2 = signedMin - q2*ad;     // initialize r2 = rem(2p,abs(d))
+  do {
+    p = p + 1;
+    q1 = q1<<1;          // update q1 = 2p/abs(nc)
+    r1 = r1<<1;          // update r1 = rem(2p/abs(nc))
+    if (r1.uge(anc)) {  // must be unsigned comparison
+      q1 = q1 + 1;
+      r1 = r1 - anc;
+    }
+    q2 = q2<<1;          // update q2 = 2p/abs(d)
+    r2 = r2<<1;          // update r2 = rem(2p/abs(d))
+    if (r2.uge(ad)) {   // must be unsigned comparison
+      q2 = q2 + 1;
+      r2 = r2 - ad;
+    }
+    delta = ad - r2;
+  } while (q1.ult(delta) || (q1 == delta && r1 == 0));
+
+  mag.m = q2 + 1;
+  if (d.isNegative()) mag.m = -mag.m;   // resulting magic number
+  mag.s = p - d.getBitWidth();          // resulting shift
+  return mag;
+}
+
+/// Calculate the magic numbers required to implement an unsigned integer
+/// division by a constant as a sequence of multiplies, adds and shifts.
+/// Requires that the divisor not be 0.  Taken from "Hacker's Delight", Henry
+/// S. Warren, Jr., chapter 10.
+APInt::mu APInt::magicu() const {
+  const APInt& d = *this;
+  unsigned p;
+  APInt nc, delta, q1, r1, q2, r2;
+  struct mu magu;
+  magu.a = 0;               // initialize "add" indicator
+  APInt allOnes = APInt::getAllOnesValue(d.getBitWidth());
+  APInt signedMin = APInt::getSignedMinValue(d.getBitWidth());
+  APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth());
+
+  nc = allOnes - (-d).urem(d);
+  p = d.getBitWidth() - 1;  // initialize p
+  q1 = signedMin.udiv(nc);  // initialize q1 = 2p/nc
+  r1 = signedMin - q1*nc;   // initialize r1 = rem(2p,nc)
+  q2 = signedMax.udiv(d);   // initialize q2 = (2p-1)/d
+  r2 = signedMax - q2*d;    // initialize r2 = rem((2p-1),d)
+  do {
+    p = p + 1;
+    if (r1.uge(nc - r1)) {
+      q1 = q1 + q1 + 1;  // update q1
+      r1 = r1 + r1 - nc; // update r1
+    }
+    else {
+      q1 = q1+q1; // update q1
+      r1 = r1+r1; // update r1
+    }
+    if ((r2 + 1).uge(d - r2)) {
+      if (q2.uge(signedMax)) magu.a = 1;
+      q2 = q2+q2 + 1;     // update q2
+      r2 = r2+r2 + 1 - d; // update r2
+    }
+    else {
+      if (q2.uge(signedMin)) magu.a = 1;
+      q2 = q2+q2;     // update q2
+      r2 = r2+r2 + 1; // update r2
+    }
+    delta = d - 1 - r2;
+  } while (p < d.getBitWidth()*2 &&
+           (q1.ult(delta) || (q1 == delta && r1 == 0)));
+  magu.m = q2 + 1; // resulting magic number
+  magu.s = p - d.getBitWidth();  // resulting shift
+  return magu;
+}
+
+/// Implementation of Knuth's Algorithm D (Division of nonnegative integers)
+/// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The
+/// variables here have the same names as in the algorithm. Comments explain
+/// the algorithm and any deviation from it.
+static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
+                     unsigned m, unsigned n) {
+  assert(u && "Must provide dividend");
+  assert(v && "Must provide divisor");
+  assert(q && "Must provide quotient");
+  assert(u != v && u != q && v != q && "Must us different memory");
+  assert(n>1 && "n must be > 1");
+
+  // Knuth uses the value b as the base of the number system. In our case b
+  // is 2^31 so we just set it to -1u.
+  uint64_t b = uint64_t(1) << 32;
+
+#if 0
+  DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
+  DEBUG(dbgs() << "KnuthDiv: original:");
+  DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+  DEBUG(dbgs() << " by");
+  DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
+  DEBUG(dbgs() << '\n');
+#endif
+  // D1. [Normalize.] Set d = b / (v[n-1] + 1) and multiply all the digits of
+  // u and v by d. Note that we have taken Knuth's advice here to use a power
+  // of 2 value for d such that d * v[n-1] >= b/2 (b is the base). A power of
+  // 2 allows us to shift instead of multiply and it is easy to determine the
+  // shift amount from the leading zeros.  We are basically normalizing the u
+  // and v so that its high bits are shifted to the top of v's range without
+  // overflow. Note that this can require an extra word in u so that u must
+  // be of length m+n+1.
+  unsigned shift = CountLeadingZeros_32(v[n-1]);
+  unsigned v_carry = 0;
+  unsigned u_carry = 0;
+  if (shift) {
+    for (unsigned i = 0; i < m+n; ++i) {
+      unsigned u_tmp = u[i] >> (32 - shift);
+      u[i] = (u[i] << shift) | u_carry;
+      u_carry = u_tmp;
+    }
+    for (unsigned i = 0; i < n; ++i) {
+      unsigned v_tmp = v[i] >> (32 - shift);
+      v[i] = (v[i] << shift) | v_carry;
+      v_carry = v_tmp;
+    }
+  }
+  u[m+n] = u_carry;
+#if 0
+  DEBUG(dbgs() << "KnuthDiv:   normal:");
+  DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+  DEBUG(dbgs() << " by");
+  DEBUG(for (int i = n; i >0; i--) dbgs() << " " << v[i-1]);
+  DEBUG(dbgs() << '\n');
+#endif
+
+  // D2. [Initialize j.]  Set j to m. This is the loop counter over the places.
+  int j = m;
+  do {
+    DEBUG(dbgs() << "KnuthDiv: quotient digit #" << j << '\n');
+    // D3. [Calculate q'.].
+    //     Set qp = (u[j+n]*b + u[j+n-1]) / v[n-1]. (qp=qprime=q')
+    //     Set rp = (u[j+n]*b + u[j+n-1]) % v[n-1]. (rp=rprime=r')
+    // Now test if qp == b or qp*v[n-2] > b*rp + u[j+n-2]; if so, decrease
+    // qp by 1, inrease rp by v[n-1], and repeat this test if rp < b. The test
+    // on v[n-2] determines at high speed most of the cases in which the trial
+    // value qp is one too large, and it eliminates all cases where qp is two
+    // too large.
+    uint64_t dividend = ((uint64_t(u[j+n]) << 32) + u[j+n-1]);
+    DEBUG(dbgs() << "KnuthDiv: dividend == " << dividend << '\n');
+    uint64_t qp = dividend / v[n-1];
+    uint64_t rp = dividend % v[n-1];
+    if (qp == b || qp*v[n-2] > b*rp + u[j+n-2]) {
+      qp--;
+      rp += v[n-1];
+      if (rp < b && (qp == b || qp*v[n-2] > b*rp + u[j+n-2]))
+        qp--;
+    }
+    DEBUG(dbgs() << "KnuthDiv: qp == " << qp << ", rp == " << rp << '\n');
+
+    // D4. [Multiply and subtract.] Replace (u[j+n]u[j+n-1]...u[j]) with
+    // (u[j+n]u[j+n-1]..u[j]) - qp * (v[n-1]...v[1]v[0]). This computation
+    // consists of a simple multiplication by a one-place number, combined with
+    // a subtraction.
+    bool isNeg = false;
+    for (unsigned i = 0; i < n; ++i) {
+      uint64_t u_tmp = uint64_t(u[j+i]) | (uint64_t(u[j+i+1]) << 32);
+      uint64_t subtrahend = uint64_t(qp) * uint64_t(v[i]);
+      bool borrow = subtrahend > u_tmp;
+      DEBUG(dbgs() << "KnuthDiv: u_tmp == " << u_tmp
+                   << ", subtrahend == " << subtrahend
+                   << ", borrow = " << borrow << '\n');
+
+      uint64_t result = u_tmp - subtrahend;
+      unsigned k = j + i;
+      u[k++] = (unsigned)(result & (b-1)); // subtract low word
+      u[k++] = (unsigned)(result >> 32);   // subtract high word
+      while (borrow && k <= m+n) { // deal with borrow to the left
+        borrow = u[k] == 0;
+        u[k]--;
+        k++;
+      }
+      isNeg |= borrow;
+      DEBUG(dbgs() << "KnuthDiv: u[j+i] == " << u[j+i] << ",  u[j+i+1] == " <<
+                    u[j+i+1] << '\n');
+    }
+    DEBUG(dbgs() << "KnuthDiv: after subtraction:");
+    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+    DEBUG(dbgs() << '\n');
+    // The digits (u[j+n]...u[j]) should be kept positive; if the result of
+    // this step is actually negative, (u[j+n]...u[j]) should be left as the
+    // true value plus b**(n+1), namely as the b's complement of
+    // the true value, and a "borrow" to the left should be remembered.
+    //
+    if (isNeg) {
+      bool carry = true;  // true because b's complement is "complement + 1"
+      for (unsigned i = 0; i <= m+n; ++i) {
+        u[i] = ~u[i] + carry; // b's complement
+        carry = carry && u[i] == 0;
+      }
+    }
+    DEBUG(dbgs() << "KnuthDiv: after complement:");
+    DEBUG(for (int i = m+n; i >=0; i--) dbgs() << " " << u[i]);
+    DEBUG(dbgs() << '\n');
+
+    // D5. [Test remainder.] Set q[j] = qp. If the result of step D4 was
+    // negative, go to step D6; otherwise go on to step D7.
+    q[j] = (unsigned)qp;
+    if (isNeg) {
+      // D6. [Add back]. The probability that this step is necessary is very
+      // small, on the order of only 2/b. Make sure that test data accounts for
+      // this possibility. Decrease q[j] by 1
+      q[j]--;
+      // and add (0v[n-1]...v[1]v[0]) to (u[j+n]u[j+n-1]...u[j+1]u[j]).
+      // A carry will occur to the left of u[j+n], and it should be ignored
+      // since it cancels with the borrow that occurred in D4.
+      bool carry = false;
+      for (unsigned i = 0; i < n; i++) {
+        unsigned limit = std::min(u[j+i],v[i]);
+        u[j+i] += v[i] + carry;
+        carry = u[j+i] < limit || (carry && u[j+i] == limit);
+      }
+      u[j+n] += carry;
+    }
+    DEBUG(dbgs() << "KnuthDiv: after correction:");
+    DEBUG(for (int i = m+n; i >=0; i--) dbgs() <<" " << u[i]);
+    DEBUG(dbgs() << "\nKnuthDiv: digit result = " << q[j] << '\n');
+
+  // D7. [Loop on j.]  Decrease j by one. Now if j >= 0, go back to D3.
+  } while (--j >= 0);
+
+  DEBUG(dbgs() << "KnuthDiv: quotient:");
+  DEBUG(for (int i = m; i >=0; i--) dbgs() <<" " << q[i]);
+  DEBUG(dbgs() << '\n');
+
+  // D8. [Unnormalize]. Now q[...] is the desired quotient, and the desired
+  // remainder may be obtained by dividing u[...] by d. If r is non-null we
+  // compute the remainder (urem uses this).
+  if (r) {
+    // The value d is expressed by the "shift" value above since we avoided
+    // multiplication by d by using a shift left. So, all we have to do is
+    // shift right here. In order to mak
+    if (shift) {
+      unsigned carry = 0;
+      DEBUG(dbgs() << "KnuthDiv: remainder:");
+      for (int i = n-1; i >= 0; i--) {
+        r[i] = (u[i] >> shift) | carry;
+        carry = u[i] << (32 - shift);
+        DEBUG(dbgs() << " " << r[i]);
+      }
+    } else {
+      for (int i = n-1; i >= 0; i--) {
+        r[i] = u[i];
+        DEBUG(dbgs() << " " << r[i]);
+      }
+    }
+    DEBUG(dbgs() << '\n');
+  }
+#if 0
+  DEBUG(dbgs() << '\n');
+#endif
+}
+
+void APInt::divide(const APInt LHS, unsigned lhsWords,
+                   const APInt &RHS, unsigned rhsWords,
+                   APInt *Quotient, APInt *Remainder)
+{
+  assert(lhsWords >= rhsWords && "Fractional result");
+
+  // First, compose the values into an array of 32-bit words instead of
+  // 64-bit words. This is a necessity of both the "short division" algorithm
+  // and the Knuth "classical algorithm" which requires there to be native
+  // operations for +, -, and * on an m bit value with an m*2 bit result. We
+  // can't use 64-bit operands here because we don't have native results of
+  // 128-bits. Furthermore, casting the 64-bit values to 32-bit values won't
+  // work on large-endian machines.
+  uint64_t mask = ~0ull >> (sizeof(unsigned)*CHAR_BIT);
+  unsigned n = rhsWords * 2;
+  unsigned m = (lhsWords * 2) - n;
+
+  // Allocate space for the temporary values we need either on the stack, if
+  // it will fit, or on the heap if it won't.
+  unsigned SPACE[128];
+  unsigned *U = 0;
+  unsigned *V = 0;
+  unsigned *Q = 0;
+  unsigned *R = 0;
+  if ((Remainder?4:3)*n+2*m+1 <= 128) {
+    U = &SPACE[0];
+    V = &SPACE[m+n+1];
+    Q = &SPACE[(m+n+1) + n];
+    if (Remainder)
+      R = &SPACE[(m+n+1) + n + (m+n)];
+  } else {
+    U = new unsigned[m + n + 1];
+    V = new unsigned[n];
+    Q = new unsigned[m+n];
+    if (Remainder)
+      R = new unsigned[n];
+  }
+
+  // Initialize the dividend
+  memset(U, 0, (m+n+1)*sizeof(unsigned));
+  for (unsigned i = 0; i < lhsWords; ++i) {
+    uint64_t tmp = (LHS.getNumWords() == 1 ? LHS.VAL : LHS.pVal[i]);
+    U[i * 2] = (unsigned)(tmp & mask);
+    U[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
+  }
+  U[m+n] = 0; // this extra word is for "spill" in the Knuth algorithm.
+
+  // Initialize the divisor
+  memset(V, 0, (n)*sizeof(unsigned));
+  for (unsigned i = 0; i < rhsWords; ++i) {
+    uint64_t tmp = (RHS.getNumWords() == 1 ? RHS.VAL : RHS.pVal[i]);
+    V[i * 2] = (unsigned)(tmp & mask);
+    V[i * 2 + 1] = (unsigned)(tmp >> (sizeof(unsigned)*CHAR_BIT));
+  }
+
+  // initialize the quotient and remainder
+  memset(Q, 0, (m+n) * sizeof(unsigned));
+  if (Remainder)
+    memset(R, 0, n * sizeof(unsigned));
+
+  // Now, adjust m and n for the Knuth division. n is the number of words in
+  // the divisor. m is the number of words by which the dividend exceeds the
+  // divisor (i.e. m+n is the length of the dividend). These sizes must not
+  // contain any zero words or the Knuth algorithm fails.
+  for (unsigned i = n; i > 0 && V[i-1] == 0; i--) {
+    n--;
+    m++;
+  }
+  for (unsigned i = m+n; i > 0 && U[i-1] == 0; i--)
+    m--;
+
+  // If we're left with only a single word for the divisor, Knuth doesn't work
+  // so we implement the short division algorithm here. This is much simpler
+  // and faster because we are certain that we can divide a 64-bit quantity
+  // by a 32-bit quantity at hardware speed and short division is simply a
+  // series of such operations. This is just like doing short division but we
+  // are using base 2^32 instead of base 10.
+  assert(n != 0 && "Divide by zero?");
+  if (n == 1) {
+    unsigned divisor = V[0];
+    unsigned remainder = 0;
+    for (int i = m+n-1; i >= 0; i--) {
+      uint64_t partial_dividend = uint64_t(remainder) << 32 | U[i];
+      if (partial_dividend == 0) {
+        Q[i] = 0;
+        remainder = 0;
+      } else if (partial_dividend < divisor) {
+        Q[i] = 0;
+        remainder = (unsigned)partial_dividend;
+      } else if (partial_dividend == divisor) {
+        Q[i] = 1;
+        remainder = 0;
+      } else {
+        Q[i] = (unsigned)(partial_dividend / divisor);
+        remainder = (unsigned)(partial_dividend - (Q[i] * divisor));
+      }
+    }
+    if (R)
+      R[0] = remainder;
+  } else {
+    // Now we're ready to invoke the Knuth classical divide algorithm. In this
+    // case n > 1.
+    KnuthDiv(U, V, Q, R, m, n);
+  }
+
+  // If the caller wants the quotient
+  if (Quotient) {
+    // Set up the Quotient value's memory.
+    if (Quotient->BitWidth != LHS.BitWidth) {
+      if (Quotient->isSingleWord())
+        Quotient->VAL = 0;
+      else
+        delete [] Quotient->pVal;
+      Quotient->BitWidth = LHS.BitWidth;
+      if (!Quotient->isSingleWord())
+        Quotient->pVal = getClearedMemory(Quotient->getNumWords());
+    } else
+      Quotient->clearAllBits();
+
+    // The quotient is in Q. Reconstitute the quotient into Quotient's low
+    // order words.
+    if (lhsWords == 1) {
+      uint64_t tmp =
+        uint64_t(Q[0]) | (uint64_t(Q[1]) << (APINT_BITS_PER_WORD / 2));
+      if (Quotient->isSingleWord())
+        Quotient->VAL = tmp;
+      else
+        Quotient->pVal[0] = tmp;
+    } else {
+      assert(!Quotient->isSingleWord() && "Quotient APInt not large enough");
+      for (unsigned i = 0; i < lhsWords; ++i)
+        Quotient->pVal[i] =
+          uint64_t(Q[i*2]) | (uint64_t(Q[i*2+1]) << (APINT_BITS_PER_WORD / 2));
+    }
+  }
+
+  // If the caller wants the remainder
+  if (Remainder) {
+    // Set up the Remainder value's memory.
+    if (Remainder->BitWidth != RHS.BitWidth) {
+      if (Remainder->isSingleWord())
+        Remainder->VAL = 0;
+      else
+        delete [] Remainder->pVal;
+      Remainder->BitWidth = RHS.BitWidth;
+      if (!Remainder->isSingleWord())
+        Remainder->pVal = getClearedMemory(Remainder->getNumWords());
+    } else
+      Remainder->clearAllBits();
+
+    // The remainder is in R. Reconstitute the remainder into Remainder's low
+    // order words.
+    if (rhsWords == 1) {
+      uint64_t tmp =
+        uint64_t(R[0]) | (uint64_t(R[1]) << (APINT_BITS_PER_WORD / 2));
+      if (Remainder->isSingleWord())
+        Remainder->VAL = tmp;
+      else
+        Remainder->pVal[0] = tmp;
+    } else {
+      assert(!Remainder->isSingleWord() && "Remainder APInt not large enough");
+      for (unsigned i = 0; i < rhsWords; ++i)
+        Remainder->pVal[i] =
+          uint64_t(R[i*2]) | (uint64_t(R[i*2+1]) << (APINT_BITS_PER_WORD / 2));
+    }
+  }
+
+  // Clean up the memory we allocated.
+  if (U != &SPACE[0]) {
+    delete [] U;
+    delete [] V;
+    delete [] Q;
+    delete [] R;
+  }
+}
+
+APInt APInt::udiv(const APInt& RHS) const {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+
+  // First, deal with the easy case
+  if (isSingleWord()) {
+    assert(RHS.VAL != 0 && "Divide by zero?");
+    return APInt(BitWidth, VAL / RHS.VAL);
+  }
+
+  // Get some facts about the LHS and RHS number of bits and words
+  unsigned rhsBits = RHS.getActiveBits();
+  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
+  assert(rhsWords && "Divided by zero???");
+  unsigned lhsBits = this->getActiveBits();
+  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
+
+  // Deal with some degenerate cases
+  if (!lhsWords)
+    // 0 / X ===> 0
+    return APInt(BitWidth, 0);
+  else if (lhsWords < rhsWords || this->ult(RHS)) {
+    // X / Y ===> 0, iff X < Y
+    return APInt(BitWidth, 0);
+  } else if (*this == RHS) {
+    // X / X ===> 1
+    return APInt(BitWidth, 1);
+  } else if (lhsWords == 1 && rhsWords == 1) {
+    // All high words are zero, just use native divide
+    return APInt(BitWidth, this->pVal[0] / RHS.pVal[0]);
+  }
+
+  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+  APInt Quotient(1,0); // to hold result.
+  divide(*this, lhsWords, RHS, rhsWords, &Quotient, 0);
+  return Quotient;
+}
+
+APInt APInt::urem(const APInt& RHS) const {
+  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
+  if (isSingleWord()) {
+    assert(RHS.VAL != 0 && "Remainder by zero?");
+    return APInt(BitWidth, VAL % RHS.VAL);
+  }
+
+  // Get some facts about the LHS
+  unsigned lhsBits = getActiveBits();
+  unsigned lhsWords = !lhsBits ? 0 : (whichWord(lhsBits - 1) + 1);
+
+  // Get some facts about the RHS
+  unsigned rhsBits = RHS.getActiveBits();
+  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
+  assert(rhsWords && "Performing remainder operation by zero ???");
+
+  // Check the degenerate cases
+  if (lhsWords == 0) {
+    // 0 % Y ===> 0
+    return APInt(BitWidth, 0);
+  } else if (lhsWords < rhsWords || this->ult(RHS)) {
+    // X % Y ===> X, iff X < Y
+    return *this;
+  } else if (*this == RHS) {
+    // X % X == 0;
+    return APInt(BitWidth, 0);
+  } else if (lhsWords == 1) {
+    // All high words are zero, just use native remainder
+    return APInt(BitWidth, pVal[0] % RHS.pVal[0]);
+  }
+
+  // We have to compute it the hard way. Invoke the Knuth divide algorithm.
+  APInt Remainder(1,0);
+  divide(*this, lhsWords, RHS, rhsWords, 0, &Remainder);
+  return Remainder;
+}
+
+void APInt::udivrem(const APInt &LHS, const APInt &RHS,
+                    APInt &Quotient, APInt &Remainder) {
+  // Get some size facts about the dividend and divisor
+  unsigned lhsBits  = LHS.getActiveBits();
+  unsigned lhsWords = !lhsBits ? 0 : (APInt::whichWord(lhsBits - 1) + 1);
+  unsigned rhsBits  = RHS.getActiveBits();
+  unsigned rhsWords = !rhsBits ? 0 : (APInt::whichWord(rhsBits - 1) + 1);
+
+  // Check the degenerate cases
+  if (lhsWords == 0) {
+    Quotient = 0;                // 0 / Y ===> 0
+    Remainder = 0;               // 0 % Y ===> 0
+    return;
+  }
+
+  if (lhsWords < rhsWords || LHS.ult(RHS)) {
+    Remainder = LHS;            // X % Y ===> X, iff X < Y
+    Quotient = 0;               // X / Y ===> 0, iff X < Y
+    return;
+  }
+
+  if (LHS == RHS) {
+    Quotient  = 1;              // X / X ===> 1
+    Remainder = 0;              // X % X ===> 0;
+    return;
+  }
+
+  if (lhsWords == 1 && rhsWords == 1) {
+    // There is only one word to consider so use the native versions.
+    uint64_t lhsValue = LHS.isSingleWord() ? LHS.VAL : LHS.pVal[0];
+    uint64_t rhsValue = RHS.isSingleWord() ? RHS.VAL : RHS.pVal[0];
+    Quotient = APInt(LHS.getBitWidth(), lhsValue / rhsValue);
+    Remainder = APInt(LHS.getBitWidth(), lhsValue % rhsValue);
+    return;
+  }
+
+  // Okay, lets do it the long way
+  divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
+}
+
+APInt APInt::sadd_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this+RHS;
+  Overflow = isNonNegative() == RHS.isNonNegative() &&
+             Res.isNonNegative() != isNonNegative();
+  return Res;
+}
+
+APInt APInt::uadd_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this+RHS;
+  Overflow = Res.ult(RHS);
+  return Res;
+}
+
+APInt APInt::ssub_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this - RHS;
+  Overflow = isNonNegative() != RHS.isNonNegative() &&
+             Res.isNonNegative() != isNonNegative();
+  return Res;
+}
+
+APInt APInt::usub_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this-RHS;
+  Overflow = Res.ugt(*this);
+  return Res;
+}
+
+APInt APInt::sdiv_ov(const APInt &RHS, bool &Overflow) const {
+  // MININT/-1  -->  overflow.
+  Overflow = isMinSignedValue() && RHS.isAllOnesValue();
+  return sdiv(RHS);
+}
+
+APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
+  APInt Res = *this * RHS;
+  
+  if (*this != 0 && RHS != 0)
+    Overflow = Res.sdiv(RHS) != *this || Res.sdiv(*this) != RHS;
+  else
+    Overflow = false;
+  return Res;
+}
+
+APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
+  Overflow = ShAmt >= getBitWidth();
+  if (Overflow)
+    ShAmt = getBitWidth()-1;
+
+  if (isNonNegative()) // Don't allow sign change.
+    Overflow = ShAmt >= countLeadingZeros();
+  else
+    Overflow = ShAmt >= countLeadingOnes();
+  
+  return *this << ShAmt;
+}
+
+
+
+
+void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
+  // Check our assumptions here
+  assert(!str.empty() && "Invalid string length");
+  assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
+         "Radix should be 2, 8, 10, or 16!");
+
+  StringRef::iterator p = str.begin();
+  size_t slen = str.size();
+  bool isNeg = *p == '-';
+  if (*p == '-' || *p == '+') {
+    p++;
+    slen--;
+    assert(slen && "String is only a sign, needs a value.");
+  }
+  assert((slen <= numbits || radix != 2) && "Insufficient bit width");
+  assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
+  assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
+  assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
+         "Insufficient bit width");
+
+  // Allocate memory
+  if (!isSingleWord())
+    pVal = getClearedMemory(getNumWords());
+
+  // Figure out if we can shift instead of multiply
+  unsigned shift = (radix == 16 ? 4 : radix == 8 ? 3 : radix == 2 ? 1 : 0);
+
+  // Set up an APInt for the digit to add outside the loop so we don't
+  // constantly construct/destruct it.
+  APInt apdigit(getBitWidth(), 0);
+  APInt apradix(getBitWidth(), radix);
+
+  // Enter digit traversal loop
+  for (StringRef::iterator e = str.end(); p != e; ++p) {
+    unsigned digit = getDigit(*p, radix);
+    assert(digit < radix && "Invalid character in digit string");
+
+    // Shift or multiply the value by the radix
+    if (slen > 1) {
+      if (shift)
+        *this <<= shift;
+      else
+        *this *= apradix;
+    }
+
+    // Add in the digit we just interpreted
+    if (apdigit.isSingleWord())
+      apdigit.VAL = digit;
+    else
+      apdigit.pVal[0] = digit;
+    *this += apdigit;
+  }
+  // If its negative, put it in two's complement form
+  if (isNeg) {
+    (*this)--;
+    this->flipAllBits();
+  }
+}
+
+void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
+                     bool Signed) const {
+  assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
+         "Radix should be 2, 8, 10, or 16!");
+
+  // First, check for a zero value and just short circuit the logic below.
+  if (*this == 0) {
+    Str.push_back('0');
+    return;
+  }
+
+  static const char Digits[] = "0123456789ABCDEF";
+
+  if (isSingleWord()) {
+    char Buffer[65];
+    char *BufPtr = Buffer+65;
+
+    uint64_t N;
+    if (!Signed) {
+      N = getZExtValue();
+    } else {
+      int64_t I = getSExtValue();
+      if (I >= 0) {
+        N = I;
+      } else {
+        Str.push_back('-');
+        N = -(uint64_t)I;
+      }
+    }
+
+    while (N) {
+      *--BufPtr = Digits[N % Radix];
+      N /= Radix;
+    }
+    Str.append(BufPtr, Buffer+65);
+    return;
+  }
+
+  APInt Tmp(*this);
+
+  if (Signed && isNegative()) {
+    // They want to print the signed version and it is a negative value
+    // Flip the bits and add one to turn it into the equivalent positive
+    // value and put a '-' in the result.
+    Tmp.flipAllBits();
+    Tmp++;
+    Str.push_back('-');
+  }
+
+  // We insert the digits backward, then reverse them to get the right order.
+  unsigned StartDig = Str.size();
+
+  // For the 2, 8 and 16 bit cases, we can just shift instead of divide
+  // because the number of bits per digit (1, 3 and 4 respectively) divides
+  // equaly.  We just shift until the value is zero.
+  if (Radix != 10) {
+    // Just shift tmp right for each digit width until it becomes zero
+    unsigned ShiftAmt = (Radix == 16 ? 4 : (Radix == 8 ? 3 : 1));
+    unsigned MaskAmt = Radix - 1;
+
+    while (Tmp != 0) {
+      unsigned Digit = unsigned(Tmp.getRawData()[0]) & MaskAmt;
+      Str.push_back(Digits[Digit]);
+      Tmp = Tmp.lshr(ShiftAmt);
+    }
+  } else {
+    APInt divisor(4, 10);
+    while (Tmp != 0) {
+      APInt APdigit(1, 0);
+      APInt tmp2(Tmp.getBitWidth(), 0);
+      divide(Tmp, Tmp.getNumWords(), divisor, divisor.getNumWords(), &tmp2,
+             &APdigit);
+      unsigned Digit = (unsigned)APdigit.getZExtValue();
+      assert(Digit < Radix && "divide failed");
+      Str.push_back(Digits[Digit]);
+      Tmp = tmp2;
+    }
+  }
+
+  // Reverse the digits before returning.
+  std::reverse(Str.begin()+StartDig, Str.end());
+}
+
+/// toString - This returns the APInt as a std::string.  Note that this is an
+/// inefficient method.  It is better to pass in a SmallVector/SmallString
+/// to the methods above.
+std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
+  SmallString<40> S;
+  toString(S, Radix, Signed);
+  return S.str();
+}
+
+
+void APInt::dump() const {
+  SmallString<40> S, U;
+  this->toStringUnsigned(U);
+  this->toStringSigned(S);
+  dbgs() << "APInt(" << BitWidth << "b, "
+         << U.str() << "u " << S.str() << "s)";
+}
+
+void APInt::print(raw_ostream &OS, bool isSigned) const {
+  SmallString<40> S;
+  this->toString(S, 10, isSigned);
+  OS << S.str();
+}
+
+// This implements a variety of operations on a representation of
+// arbitrary precision, two's-complement, bignum integer values.
+
+// Assumed by lowHalf, highHalf, partMSB and partLSB.  A fairly safe
+// and unrestricting assumption.
+#define COMPILE_TIME_ASSERT(cond) extern int CTAssert[(cond) ? 1 : -1]
+COMPILE_TIME_ASSERT(integerPartWidth % 2 == 0);
+
+/* Some handy functions local to this file.  */
+namespace {
+
+  /* Returns the integer part with the least significant BITS set.
+     BITS cannot be zero.  */
+  static inline integerPart
+  lowBitMask(unsigned int bits)
+  {
+    assert(bits != 0 && bits <= integerPartWidth);
+
+    return ~(integerPart) 0 >> (integerPartWidth - bits);
+  }
+
+  /* Returns the value of the lower half of PART.  */
+  static inline integerPart
+  lowHalf(integerPart part)
+  {
+    return part & lowBitMask(integerPartWidth / 2);
+  }
+
+  /* Returns the value of the upper half of PART.  */
+  static inline integerPart
+  highHalf(integerPart part)
+  {
+    return part >> (integerPartWidth / 2);
+  }
+
+  /* Returns the bit number of the most significant set bit of a part.
+     If the input number has no bits set -1U is returned.  */
+  static unsigned int
+  partMSB(integerPart value)
+  {
+    unsigned int n, msb;
+
+    if (value == 0)
+      return -1U;
+
+    n = integerPartWidth / 2;
+
+    msb = 0;
+    do {
+      if (value >> n) {
+        value >>= n;
+        msb += n;
+      }
+
+      n >>= 1;
+    } while (n);
+
+    return msb;
+  }
+
+  /* Returns the bit number of the least significant set bit of a
+     part.  If the input number has no bits set -1U is returned.  */
+  static unsigned int
+  partLSB(integerPart value)
+  {
+    unsigned int n, lsb;
+
+    if (value == 0)
+      return -1U;
+
+    lsb = integerPartWidth - 1;
+    n = integerPartWidth / 2;
+
+    do {
+      if (value << n) {
+        value <<= n;
+        lsb -= n;
+      }
+
+      n >>= 1;
+    } while (n);
+
+    return lsb;
+  }
+}
+
+/* Sets the least significant part of a bignum to the input value, and
+   zeroes out higher parts.  */
+void
+APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
+{
+  unsigned int i;
+
+  assert(parts > 0);
+
+  dst[0] = part;
+  for (i = 1; i < parts; i++)
+    dst[i] = 0;
+}
+
+/* Assign one bignum to another.  */
+void
+APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
+{
+  unsigned int i;
+
+  for (i = 0; i < parts; i++)
+    dst[i] = src[i];
+}
+
+/* Returns true if a bignum is zero, false otherwise.  */
+bool
+APInt::tcIsZero(const integerPart *src, unsigned int parts)
+{
+  unsigned int i;
+
+  for (i = 0; i < parts; i++)
+    if (src[i])
+      return false;
+
+  return true;
+}
+
+/* Extract the given bit of a bignum; returns 0 or 1.  */
+int
+APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
+{
+  return (parts[bit / integerPartWidth] &
+          ((integerPart) 1 << bit % integerPartWidth)) != 0;
+}
+
+/* Set the given bit of a bignum. */
+void
+APInt::tcSetBit(integerPart *parts, unsigned int bit)
+{
+  parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
+}
+
+/* Clears the given bit of a bignum. */
+void
+APInt::tcClearBit(integerPart *parts, unsigned int bit)
+{
+  parts[bit / integerPartWidth] &=
+    ~((integerPart) 1 << (bit % integerPartWidth));
+}
+
+/* Returns the bit number of the least significant set bit of a
+   number.  If the input number has no bits set -1U is returned.  */
+unsigned int
+APInt::tcLSB(const integerPart *parts, unsigned int n)
+{
+  unsigned int i, lsb;
+
+  for (i = 0; i < n; i++) {
+      if (parts[i] != 0) {
+          lsb = partLSB(parts[i]);
+
+          return lsb + i * integerPartWidth;
+      }
+  }
+
+  return -1U;
+}
+
+/* Returns the bit number of the most significant set bit of a number.
+   If the input number has no bits set -1U is returned.  */
+unsigned int
+APInt::tcMSB(const integerPart *parts, unsigned int n)
+{
+  unsigned int msb;
+
+  do {
+    --n;
+
+    if (parts[n] != 0) {
+      msb = partMSB(parts[n]);
+
+      return msb + n * integerPartWidth;
+    }
+  } while (n);
+
+  return -1U;
+}
+
+/* Copy the bit vector of width srcBITS from SRC, starting at bit
+   srcLSB, to DST, of dstCOUNT parts, such that the bit srcLSB becomes
+   the least significant bit of DST.  All high bits above srcBITS in
+   DST are zero-filled.  */
+void
+APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
+                 unsigned int srcBits, unsigned int srcLSB)
+{
+  unsigned int firstSrcPart, dstParts, shift, n;
+
+  dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
+  assert(dstParts <= dstCount);
+
+  firstSrcPart = srcLSB / integerPartWidth;
+  tcAssign (dst, src + firstSrcPart, dstParts);
+
+  shift = srcLSB % integerPartWidth;
+  tcShiftRight (dst, dstParts, shift);
+
+  /* We now have (dstParts * integerPartWidth - shift) bits from SRC
+     in DST.  If this is less that srcBits, append the rest, else
+     clear the high bits.  */
+  n = dstParts * integerPartWidth - shift;
+  if (n < srcBits) {
+    integerPart mask = lowBitMask (srcBits - n);
+    dst[dstParts - 1] |= ((src[firstSrcPart + dstParts] & mask)
+                          << n % integerPartWidth);
+  } else if (n > srcBits) {
+    if (srcBits % integerPartWidth)
+      dst[dstParts - 1] &= lowBitMask (srcBits % integerPartWidth);
+  }
+
+  /* Clear high parts.  */
+  while (dstParts < dstCount)
+    dst[dstParts++] = 0;
+}
+
+/* DST += RHS + C where C is zero or one.  Returns the carry flag.  */
+integerPart
+APInt::tcAdd(integerPart *dst, const integerPart *rhs,
+             integerPart c, unsigned int parts)
+{
+  unsigned int i;
+
+  assert(c <= 1);
+
+  for (i = 0; i < parts; i++) {
+    integerPart l;
+
+    l = dst[i];
+    if (c) {
+      dst[i] += rhs[i] + 1;
+      c = (dst[i] <= l);
+    } else {
+      dst[i] += rhs[i];
+      c = (dst[i] < l);
+    }
+  }
+
+  return c;
+}
+
+/* DST -= RHS + C where C is zero or one.  Returns the carry flag.  */
+integerPart
+APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
+                  integerPart c, unsigned int parts)
+{
+  unsigned int i;
+
+  assert(c <= 1);
+
+  for (i = 0; i < parts; i++) {
+    integerPart l;
+
+    l = dst[i];
+    if (c) {
+      dst[i] -= rhs[i] + 1;
+      c = (dst[i] >= l);
+    } else {
+      dst[i] -= rhs[i];
+      c = (dst[i] > l);
+    }
+  }
+
+  return c;
+}
+
+/* Negate a bignum in-place.  */
+void
+APInt::tcNegate(integerPart *dst, unsigned int parts)
+{
+  tcComplement(dst, parts);
+  tcIncrement(dst, parts);
+}
+
+/*  DST += SRC * MULTIPLIER + CARRY   if add is true
+    DST  = SRC * MULTIPLIER + CARRY   if add is false
+
+    Requires 0 <= DSTPARTS <= SRCPARTS + 1.  If DST overlaps SRC
+    they must start at the same point, i.e. DST == SRC.
+
+    If DSTPARTS == SRCPARTS + 1 no overflow occurs and zero is
+    returned.  Otherwise DST is filled with the least significant
+    DSTPARTS parts of the result, and if all of the omitted higher
+    parts were zero return zero, otherwise overflow occurred and
+    return one.  */
+int
+APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
+                      integerPart multiplier, integerPart carry,
+                      unsigned int srcParts, unsigned int dstParts,
+                      bool add)
+{
+  unsigned int i, n;
+
+  /* Otherwise our writes of DST kill our later reads of SRC.  */
+  assert(dst <= src || dst >= src + srcParts);
+  assert(dstParts <= srcParts + 1);
+
+  /* N loops; minimum of dstParts and srcParts.  */
+  n = dstParts < srcParts ? dstParts: srcParts;
+
+  for (i = 0; i < n; i++) {
+    integerPart low, mid, high, srcPart;
+
+      /* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
+
+         This cannot overflow, because
+
+         (n - 1) * (n - 1) + 2 (n - 1) = (n - 1) * (n + 1)
+
+         which is less than n^2.  */
+
+    srcPart = src[i];
+
+    if (multiplier == 0 || srcPart == 0)        {
+      low = carry;
+      high = 0;
+    } else {
+      low = lowHalf(srcPart) * lowHalf(multiplier);
+      high = highHalf(srcPart) * highHalf(multiplier);
+
+      mid = lowHalf(srcPart) * highHalf(multiplier);
+      high += highHalf(mid);
+      mid <<= integerPartWidth / 2;
+      if (low + mid < low)
+        high++;
+      low += mid;
+
+      mid = highHalf(srcPart) * lowHalf(multiplier);
+      high += highHalf(mid);
+      mid <<= integerPartWidth / 2;
+      if (low + mid < low)
+        high++;
+      low += mid;
+
+      /* Now add carry.  */
+      if (low + carry < low)
+        high++;
+      low += carry;
+    }
+
+    if (add) {
+      /* And now DST[i], and store the new low part there.  */
+      if (low + dst[i] < low)
+        high++;
+      dst[i] += low;
+    } else
+      dst[i] = low;
+
+    carry = high;
+  }
+
+  if (i < dstParts) {
+    /* Full multiplication, there is no overflow.  */
+    assert(i + 1 == dstParts);
+    dst[i] = carry;
+    return 0;
+  } else {
+    /* We overflowed if there is carry.  */
+    if (carry)
+      return 1;
+
+    /* We would overflow if any significant unwritten parts would be
+       non-zero.  This is true if any remaining src parts are non-zero
+       and the multiplier is non-zero.  */
+    if (multiplier)
+      for (; i < srcParts; i++)
+        if (src[i])
+          return 1;
+
+    /* We fitted in the narrow destination.  */
+    return 0;
+  }
+}
+
+/* DST = LHS * RHS, where DST has the same width as the operands and
+   is filled with the least significant parts of the result.  Returns
+   one if overflow occurred, otherwise zero.  DST must be disjoint
+   from both operands.  */
+int
+APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
+                  const integerPart *rhs, unsigned int parts)
+{
+  unsigned int i;
+  int overflow;
+
+  assert(dst != lhs && dst != rhs);
+
+  overflow = 0;
+  tcSet(dst, 0, parts);
+
+  for (i = 0; i < parts; i++)
+    overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
+                               parts - i, true);
+
+  return overflow;
+}
+
+/* DST = LHS * RHS, where DST has width the sum of the widths of the
+   operands.  No overflow occurs.  DST must be disjoint from both
+   operands.  Returns the number of parts required to hold the
+   result.  */
+unsigned int
+APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
+                      const integerPart *rhs, unsigned int lhsParts,
+                      unsigned int rhsParts)
+{
+  /* Put the narrower number on the LHS for less loops below.  */
+  if (lhsParts > rhsParts) {
+    return tcFullMultiply (dst, rhs, lhs, rhsParts, lhsParts);
+  } else {
+    unsigned int n;
+
+    assert(dst != lhs && dst != rhs);
+
+    tcSet(dst, 0, rhsParts);
+
+    for (n = 0; n < lhsParts; n++)
+      tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
+
+    n = lhsParts + rhsParts;
+
+    return n - (dst[n - 1] == 0);
+  }
+}
+
+/* If RHS is zero LHS and REMAINDER are left unchanged, return one.
+   Otherwise set LHS to LHS / RHS with the fractional part discarded,
+   set REMAINDER to the remainder, return zero.  i.e.
+
+   OLD_LHS = RHS * LHS + REMAINDER
+
+   SCRATCH is a bignum of the same size as the operands and result for
+   use by the routine; its contents need not be initialized and are
+   destroyed.  LHS, REMAINDER and SCRATCH must be distinct.
+*/
+int
+APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
+                integerPart *remainder, integerPart *srhs,
+                unsigned int parts)
+{
+  unsigned int n, shiftCount;
+  integerPart mask;
+
+  assert(lhs != remainder && lhs != srhs && remainder != srhs);
+
+  shiftCount = tcMSB(rhs, parts) + 1;
+  if (shiftCount == 0)
+    return true;
+
+  shiftCount = parts * integerPartWidth - shiftCount;
+  n = shiftCount / integerPartWidth;
+  mask = (integerPart) 1 << (shiftCount % integerPartWidth);
+
+  tcAssign(srhs, rhs, parts);
+  tcShiftLeft(srhs, parts, shiftCount);
+  tcAssign(remainder, lhs, parts);
+  tcSet(lhs, 0, parts);
+
+  /* Loop, subtracting SRHS if REMAINDER is greater and adding that to
+     the total.  */
+  for (;;) {
+      int compare;
+
+      compare = tcCompare(remainder, srhs, parts);
+      if (compare >= 0) {
+        tcSubtract(remainder, srhs, 0, parts);
+        lhs[n] |= mask;
+      }
+
+      if (shiftCount == 0)
+        break;
+      shiftCount--;
+      tcShiftRight(srhs, parts, 1);
+      if ((mask >>= 1) == 0)
+        mask = (integerPart) 1 << (integerPartWidth - 1), n--;
+  }
+
+  return false;
+}
+
+/* Shift a bignum left COUNT bits in-place.  Shifted in bits are zero.
+   There are no restrictions on COUNT.  */
+void
+APInt::tcShiftLeft(integerPart *dst, unsigned int parts, unsigned int count)
+{
+  if (count) {
+    unsigned int jump, shift;
+
+    /* Jump is the inter-part jump; shift is is intra-part shift.  */
+    jump = count / integerPartWidth;
+    shift = count % integerPartWidth;
+
+    while (parts > jump) {
+      integerPart part;
+
+      parts--;
+
+      /* dst[i] comes from the two parts src[i - jump] and, if we have
+         an intra-part shift, src[i - jump - 1].  */
+      part = dst[parts - jump];
+      if (shift) {
+        part <<= shift;
+        if (parts >= jump + 1)
+          part |= dst[parts - jump - 1] >> (integerPartWidth - shift);
+      }
+
+      dst[parts] = part;
+    }
+
+    while (parts > 0)
+      dst[--parts] = 0;
+  }
+}
+
+/* Shift a bignum right COUNT bits in-place.  Shifted in bits are
+   zero.  There are no restrictions on COUNT.  */
+void
+APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
+{
+  if (count) {
+    unsigned int i, jump, shift;
+
+    /* Jump is the inter-part jump; shift is is intra-part shift.  */
+    jump = count / integerPartWidth;
+    shift = count % integerPartWidth;
+
+    /* Perform the shift.  This leaves the most significant COUNT bits
+       of the result at zero.  */
+    for (i = 0; i < parts; i++) {
+      integerPart part;
+
+      if (i + jump >= parts) {
+        part = 0;
+      } else {
+        part = dst[i + jump];
+        if (shift) {
+          part >>= shift;
+          if (i + jump + 1 < parts)
+            part |= dst[i + jump + 1] << (integerPartWidth - shift);
+        }
+      }
+
+      dst[i] = part;
+    }
+  }
+}
+
+/* Bitwise and of two bignums.  */
+void
+APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
+{
+  unsigned int i;
+
+  for (i = 0; i < parts; i++)
+    dst[i] &= rhs[i];
+}
+
+/* Bitwise inclusive or of two bignums.  */
+void
+APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
+{
+  unsigned int i;
+
+  for (i = 0; i < parts; i++)
+    dst[i] |= rhs[i];
+}
+
+/* Bitwise exclusive or of two bignums.  */
+void
+APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
+{
+  unsigned int i;
+
+  for (i = 0; i < parts; i++)
+    dst[i] ^= rhs[i];
+}
+
+/* Complement a bignum in-place.  */
+void
+APInt::tcComplement(integerPart *dst, unsigned int parts)
+{
+  unsigned int i;
+
+  for (i = 0; i < parts; i++)
+    dst[i] = ~dst[i];
+}
+
+/* Comparison (unsigned) of two bignums.  */
+int
+APInt::tcCompare(const integerPart *lhs, const integerPart *rhs,
+                 unsigned int parts)
+{
+  while (parts) {
+      parts--;
+      if (lhs[parts] == rhs[parts])
+        continue;
+
+      if (lhs[parts] > rhs[parts])
+        return 1;
+      else
+        return -1;
+    }
+
+  return 0;
+}
+
+/* Increment a bignum in-place, return the carry flag.  */
+integerPart
+APInt::tcIncrement(integerPart *dst, unsigned int parts)
+{
+  unsigned int i;
+
+  for (i = 0; i < parts; i++)
+    if (++dst[i] != 0)
+      break;
+
+  return i == parts;
+}
+
+/* Set the least significant BITS bits of a bignum, clear the
+   rest.  */
+void
+APInt::tcSetLeastSignificantBits(integerPart *dst, unsigned int parts,
+                                 unsigned int bits)
+{
+  unsigned int i;
+
+  i = 0;
+  while (bits > integerPartWidth) {
+    dst[i++] = ~(integerPart) 0;
+    bits -= integerPartWidth;
+  }
+
+  if (bits)
+    dst[i++] = ~(integerPart) 0 >> (integerPartWidth - bits);
+
+  while (i < parts)
+    dst[i++] = 0;
+}
diff --git a/final/lib/Support/APSInt.cpp b/final/lib/Support/APSInt.cpp
new file mode 100644
index 00000000000..73acafa690c
--- /dev/null
+++ b/final/lib/Support/APSInt.cpp
@@ -0,0 +1,23 @@
+//===-- llvm/ADT/APSInt.cpp - Arbitrary Precision Signed Int ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the APSInt class, which is a simple class that
+// represents an arbitrary sized integer that knows its signedness.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/FoldingSet.h"
+
+using namespace llvm;
+
+void APSInt::Profile(FoldingSetNodeID& ID) const {
+  ID.AddInteger((unsigned) (IsUnsigned ? 1 : 0));
+  APInt::Profile(ID);
+}
diff --git a/final/lib/Support/Allocator.cpp b/final/lib/Support/Allocator.cpp
new file mode 100644
index 00000000000..5e27df6628e
--- /dev/null
+++ b/final/lib/Support/Allocator.cpp
@@ -0,0 +1,180 @@
+//===--- Allocator.cpp - Simple memory allocation abstraction -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BumpPtrAllocator interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Recycler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Memory.h"
+#include <cstring>
+
+namespace llvm {
+
+BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold,
+                                   SlabAllocator &allocator)
+    : SlabSize(size), SizeThreshold(threshold), Allocator(allocator),
+      CurSlab(0), BytesAllocated(0) { }
+
+BumpPtrAllocator::~BumpPtrAllocator() {
+  DeallocateSlabs(CurSlab);
+}
+
+/// AlignPtr - Align Ptr to Alignment bytes, rounding up.  Alignment should
+/// be a power of two.  This method rounds up, so AlignPtr(7, 4) == 8 and
+/// AlignPtr(8, 4) == 8.
+char *BumpPtrAllocator::AlignPtr(char *Ptr, size_t Alignment) {
+  assert(Alignment && (Alignment & (Alignment - 1)) == 0 &&
+         "Alignment is not a power of two!");
+
+  // Do the alignment.
+  return (char*)(((uintptr_t)Ptr + Alignment - 1) &
+                 ~(uintptr_t)(Alignment - 1));
+}
+
+/// StartNewSlab - Allocate a new slab and move the bump pointers over into
+/// the new slab.  Modifies CurPtr and End.
+void BumpPtrAllocator::StartNewSlab() {
+  // If we allocated a big number of slabs already it's likely that we're going
+  // to allocate more. Increase slab size to reduce mallocs and possibly memory
+  // overhead. The factors are chosen conservatively to avoid overallocation.
+  if (BytesAllocated >= SlabSize * 128)
+    SlabSize *= 2;
+
+  MemSlab *NewSlab = Allocator.Allocate(SlabSize);
+  NewSlab->NextPtr = CurSlab;
+  CurSlab = NewSlab;
+  CurPtr = (char*)(CurSlab + 1);
+  End = ((char*)CurSlab) + CurSlab->Size;
+}
+
+/// DeallocateSlabs - Deallocate all memory slabs after and including this
+/// one.
+void BumpPtrAllocator::DeallocateSlabs(MemSlab *Slab) {
+  while (Slab) {
+    MemSlab *NextSlab = Slab->NextPtr;
+#ifndef NDEBUG
+    // Poison the memory so stale pointers crash sooner.  Note we must
+    // preserve the Size and NextPtr fields at the beginning.
+    sys::Memory::setRangeWritable(Slab + 1, Slab->Size - sizeof(MemSlab));
+    memset(Slab + 1, 0xCD, Slab->Size - sizeof(MemSlab));
+#endif
+    Allocator.Deallocate(Slab);
+    Slab = NextSlab;
+  }
+}
+
+/// Reset - Deallocate all but the current slab and reset the current pointer
+/// to the beginning of it, freeing all memory allocated so far.
+void BumpPtrAllocator::Reset() {
+  if (!CurSlab)
+    return;
+  DeallocateSlabs(CurSlab->NextPtr);
+  CurSlab->NextPtr = 0;
+  CurPtr = (char*)(CurSlab + 1);
+  End = ((char*)CurSlab) + CurSlab->Size;
+}
+
+/// Allocate - Allocate space at the specified alignment.
+///
+void *BumpPtrAllocator::Allocate(size_t Size, size_t Alignment) {
+  if (!CurSlab) // Start a new slab if we haven't allocated one already.
+    StartNewSlab();
+
+  // Keep track of how many bytes we've allocated.
+  BytesAllocated += Size;
+
+  // 0-byte alignment means 1-byte alignment.
+  if (Alignment == 0) Alignment = 1;
+
+  // Allocate the aligned space, going forwards from CurPtr.
+  char *Ptr = AlignPtr(CurPtr, Alignment);
+
+  // Check if we can hold it.
+  if (Ptr + Size <= End) {
+    CurPtr = Ptr + Size;
+    return Ptr;
+  }
+
+  // If Size is really big, allocate a separate slab for it.
+  size_t PaddedSize = Size + sizeof(MemSlab) + Alignment - 1;
+  if (PaddedSize > SizeThreshold) {
+    MemSlab *NewSlab = Allocator.Allocate(PaddedSize);
+
+    // Put the new slab after the current slab, since we are not allocating
+    // into it.
+    NewSlab->NextPtr = CurSlab->NextPtr;
+    CurSlab->NextPtr = NewSlab;
+
+    Ptr = AlignPtr((char*)(NewSlab + 1), Alignment);
+    assert((uintptr_t)Ptr + Size <= (uintptr_t)NewSlab + NewSlab->Size);
+    return Ptr;
+  }
+
+  // Otherwise, start a new slab and try again.
+  StartNewSlab();
+  Ptr = AlignPtr(CurPtr, Alignment);
+  CurPtr = Ptr + Size;
+  assert(CurPtr <= End && "Unable to allocate memory!");
+  return Ptr;
+}
+
+unsigned BumpPtrAllocator::GetNumSlabs() const {
+  unsigned NumSlabs = 0;
+  for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) {
+    ++NumSlabs;
+  }
+  return NumSlabs;
+}
+
+void BumpPtrAllocator::PrintStats() const {
+  unsigned NumSlabs = 0;
+  size_t TotalMemory = 0;
+  for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) {
+    TotalMemory += Slab->Size;
+    ++NumSlabs;
+  }
+
+  errs() << "\nNumber of memory regions: " << NumSlabs << '\n'
+         << "Bytes used: " << BytesAllocated << '\n'
+         << "Bytes allocated: " << TotalMemory << '\n'
+         << "Bytes wasted: " << (TotalMemory - BytesAllocated)
+         << " (includes alignment, etc)\n";
+}
+
+MallocSlabAllocator BumpPtrAllocator::DefaultSlabAllocator =
+  MallocSlabAllocator();
+
+SlabAllocator::~SlabAllocator() { }
+
+MallocSlabAllocator::~MallocSlabAllocator() { }
+
+MemSlab *MallocSlabAllocator::Allocate(size_t Size) {
+  MemSlab *Slab = (MemSlab*)Allocator.Allocate(Size, 0);
+  Slab->Size = Size;
+  Slab->NextPtr = 0;
+  return Slab;
+}
+
+void MallocSlabAllocator::Deallocate(MemSlab *Slab) {
+  Allocator.Deallocate(Slab);
+}
+
+void PrintRecyclerStats(size_t Size,
+                        size_t Align,
+                        size_t FreeListSize) {
+  errs() << "Recycler element size: " << Size << '\n'
+         << "Recycler element alignment: " << Align << '\n'
+         << "Number of elements free for recycling: " << FreeListSize << '\n';
+}
+
+}
diff --git a/final/lib/Support/Atomic.cpp b/final/lib/Support/Atomic.cpp
new file mode 100644
index 00000000000..c7b4bff2794
--- /dev/null
+++ b/final/lib/Support/Atomic.cpp
@@ -0,0 +1,112 @@
+//===-- Atomic.cpp - Atomic Operations --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements atomic operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Atomic.h"
+#include "llvm/Config/config.h"
+
+using namespace llvm;
+
+#if defined(_MSC_VER)
+#include <windows.h>
+#undef MemoryFence
+#endif
+
+void sys::MemoryFence() {
+#if LLVM_MULTITHREADED==0
+  return;
+#else
+#  if defined(__GNUC__)
+  __sync_synchronize();
+#  elif defined(_MSC_VER)
+  MemoryBarrier();
+#  else
+# error No memory fence implementation for your platform!
+#  endif
+#endif
+}
+
+sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr,
+                                  sys::cas_flag new_value,
+                                  sys::cas_flag old_value) {
+#if LLVM_MULTITHREADED==0
+  sys::cas_flag result = *ptr;
+  if (result == old_value)
+    *ptr = new_value;
+  return result;
+#elif defined(__GNUC__)
+  return __sync_val_compare_and_swap(ptr, old_value, new_value);
+#elif defined(_MSC_VER)
+  return InterlockedCompareExchange(ptr, new_value, old_value);
+#else
+#  error No compare-and-swap implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) {
+#if LLVM_MULTITHREADED==0
+  ++(*ptr);
+  return *ptr;
+#elif defined(__GNUC__)
+  return __sync_add_and_fetch(ptr, 1);
+#elif defined(_MSC_VER)
+  return InterlockedIncrement(ptr);
+#else
+#  error No atomic increment implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) {
+#if LLVM_MULTITHREADED==0
+  --(*ptr);
+  return *ptr;
+#elif defined(__GNUC__)
+  return __sync_sub_and_fetch(ptr, 1);
+#elif defined(_MSC_VER)
+  return InterlockedDecrement(ptr);
+#else
+#  error No atomic decrement implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicAdd(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+#if LLVM_MULTITHREADED==0
+  *ptr += val;
+  return *ptr;
+#elif defined(__GNUC__)
+  return __sync_add_and_fetch(ptr, val);
+#elif defined(_MSC_VER)
+  return InterlockedExchangeAdd(ptr, val) + val;
+#else
+#  error No atomic add implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicMul(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+  sys::cas_flag original, result;
+  do {
+    original = *ptr;
+    result = original * val;
+  } while (sys::CompareAndSwap(ptr, result, original) != original);
+
+  return result;
+}
+
+sys::cas_flag sys::AtomicDiv(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+  sys::cas_flag original, result;
+  do {
+    original = *ptr;
+    result = original / val;
+  } while (sys::CompareAndSwap(ptr, result, original) != original);
+
+  return result;
+}
diff --git a/final/lib/Support/CMakeLists.txt b/final/lib/Support/CMakeLists.txt
new file mode 100644
index 00000000000..a0e997d349f
--- /dev/null
+++ b/final/lib/Support/CMakeLists.txt
@@ -0,0 +1,103 @@
+## FIXME: This only requires RTTI because tblgen uses it.  Fix that.
+set(LLVM_REQUIRES_RTTI 1)
+if( MINGW )
+  set(LLVM_REQUIRES_EH 1)
+endif()
+
+add_llvm_library(LLVMSupport
+  APFloat.cpp
+  APInt.cpp
+  APSInt.cpp
+  Allocator.cpp
+  circular_raw_ostream.cpp
+  CommandLine.cpp
+  ConstantRange.cpp
+  CrashRecoveryContext.cpp
+  Debug.cpp
+  DeltaAlgorithm.cpp
+  DAGDeltaAlgorithm.cpp
+  Dwarf.cpp
+  ErrorHandling.cpp
+  FileUtilities.cpp
+  FoldingSet.cpp
+  FormattedStream.cpp
+  GraphWriter.cpp
+  IntEqClasses.cpp
+  IntervalMap.cpp
+  IsInf.cpp
+  IsNAN.cpp
+  ManagedStatic.cpp
+  MemoryBuffer.cpp
+  MemoryObject.cpp
+  PluginLoader.cpp
+  PrettyStackTrace.cpp
+  Regex.cpp
+  SmallPtrSet.cpp
+  SmallVector.cpp
+  SourceMgr.cpp
+  Statistic.cpp
+  StringExtras.cpp
+  StringMap.cpp
+  StringPool.cpp
+  StringRef.cpp
+  SystemUtils.cpp
+  TargetRegistry.cpp
+  Timer.cpp
+  ToolOutputFile.cpp
+  Triple.cpp
+  Twine.cpp
+  raw_os_ostream.cpp
+  raw_ostream.cpp
+  regcomp.c
+  regerror.c
+  regexec.c
+  regfree.c
+  regstrlcpy.c
+
+# System
+  Atomic.cpp
+  Disassembler.cpp
+  DynamicLibrary.cpp
+  Errno.cpp
+  Host.cpp
+  IncludeFile.cpp
+  Memory.cpp
+  Mutex.cpp
+  Path.cpp
+  PathV2.cpp
+  Process.cpp
+  Program.cpp
+  RWMutex.cpp
+  SearchForAddressOfSpecialSymbol.cpp
+  Signals.cpp
+  system_error.cpp
+  ThreadLocal.cpp
+  Threading.cpp
+  TimeValue.cpp
+  Valgrind.cpp
+  Unix/Host.inc
+  Unix/Memory.inc
+  Unix/Mutex.inc
+  Unix/Path.inc
+  Unix/PathV2.inc
+  Unix/Process.inc
+  Unix/Program.inc
+  Unix/RWMutex.inc
+  Unix/Signals.inc
+  Unix/system_error.inc
+  Unix/ThreadLocal.inc
+  Unix/TimeValue.inc
+  Windows/DynamicLibrary.inc
+  Windows/Host.inc
+  Windows/Memory.inc
+  Windows/Mutex.inc
+  Windows/Path.inc
+  Windows/PathV2.inc
+  Windows/Process.inc
+  Windows/Program.inc
+  Windows/RWMutex.inc
+  Windows/Signals.inc
+  Windows/system_error.inc
+  Windows/ThreadLocal.inc
+  Windows/TimeValue.inc
+  )
diff --git a/final/lib/Support/COPYRIGHT.regex b/final/lib/Support/COPYRIGHT.regex
new file mode 100644
index 00000000000..a6392fd37c3
--- /dev/null
+++ b/final/lib/Support/COPYRIGHT.regex
@@ -0,0 +1,54 @@
+$OpenBSD: COPYRIGHT,v 1.3 2003/06/02 20:18:36 millert Exp $
+
+Copyright 1992, 1993, 1994 Henry Spencer.  All rights reserved.
+This software is not subject to any license of the American Telephone
+and Telegraph Company or of the Regents of the University of California.
+
+Permission is granted to anyone to use this software for any purpose on
+any computer system, and to alter it and redistribute it, subject
+to the following restrictions:
+
+1. The author is not responsible for the consequences of use of this
+   software, no matter how awful, even if they arise from flaws in it.
+
+2. The origin of this software must not be misrepresented, either by
+   explicit claim or by omission.  Since few users ever read sources,
+   credits must appear in the documentation.
+
+3. Altered versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.  Since few users
+   ever read sources, credits must appear in the documentation.
+
+4. This notice may not be removed or altered.
+
+=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)COPYRIGHT	8.1 (Berkeley) 3/16/94
+ */
diff --git a/final/lib/Support/CommandLine.cpp b/final/lib/Support/CommandLine.cpp
new file mode 100644
index 00000000000..7e744993a7c
--- /dev/null
+++ b/final/lib/Support/CommandLine.cpp
@@ -0,0 +1,1295 @@
+//===-- CommandLine.cpp - Command line parser implementation --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements a command line argument processor that is useful when
+// creating a tool.  It provides a simple, minimalistic interface that is easily
+// extensible and supports nonlocal (library) command line options.
+//
+// Note that rather than trying to figure out what this code does, you could try
+// reading the library documentation located in docs/CommandLine.html
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Path.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Config/config.h"
+#include <cerrno>
+#include <cstdlib>
+using namespace llvm;
+using namespace cl;
+
+//===----------------------------------------------------------------------===//
+// Template instantiations and anchors.
+//
+namespace llvm { namespace cl {
+TEMPLATE_INSTANTIATION(class basic_parser<bool>);
+TEMPLATE_INSTANTIATION(class basic_parser<boolOrDefault>);
+TEMPLATE_INSTANTIATION(class basic_parser<int>);
+TEMPLATE_INSTANTIATION(class basic_parser<unsigned>);
+TEMPLATE_INSTANTIATION(class basic_parser<double>);
+TEMPLATE_INSTANTIATION(class basic_parser<float>);
+TEMPLATE_INSTANTIATION(class basic_parser<std::string>);
+TEMPLATE_INSTANTIATION(class basic_parser<char>);
+
+TEMPLATE_INSTANTIATION(class opt<unsigned>);
+TEMPLATE_INSTANTIATION(class opt<int>);
+TEMPLATE_INSTANTIATION(class opt<std::string>);
+TEMPLATE_INSTANTIATION(class opt<char>);
+TEMPLATE_INSTANTIATION(class opt<bool>);
+} } // end namespace llvm::cl
+
+void Option::anchor() {}
+void basic_parser_impl::anchor() {}
+void parser<bool>::anchor() {}
+void parser<boolOrDefault>::anchor() {}
+void parser<int>::anchor() {}
+void parser<unsigned>::anchor() {}
+void parser<double>::anchor() {}
+void parser<float>::anchor() {}
+void parser<std::string>::anchor() {}
+void parser<char>::anchor() {}
+
+//===----------------------------------------------------------------------===//
+
+// Globals for name and overview of program.  Program name is not a string to
+// avoid static ctor/dtor issues.
+static char ProgramName[80] = "<premain>";
+static const char *ProgramOverview = 0;
+
+// This collects additional help to be printed.
+static ManagedStatic<std::vector<const char*> > MoreHelp;
+
+extrahelp::extrahelp(const char *Help)
+  : morehelp(Help) {
+  MoreHelp->push_back(Help);
+}
+
+static bool OptionListChanged = false;
+
+// MarkOptionsChanged - Internal helper function.
+void cl::MarkOptionsChanged() {
+  OptionListChanged = true;
+}
+
+/// RegisteredOptionList - This is the list of the command line options that
+/// have statically constructed themselves.
+static Option *RegisteredOptionList = 0;
+
+void Option::addArgument() {
+  assert(NextRegistered == 0 && "argument multiply registered!");
+
+  NextRegistered = RegisteredOptionList;
+  RegisteredOptionList = this;
+  MarkOptionsChanged();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Basic, shared command line option processing machinery.
+//
+
+/// GetOptionInfo - Scan the list of registered options, turning them into data
+/// structures that are easier to handle.
+static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts,
+                          SmallVectorImpl<Option*> &SinkOpts,
+                          StringMap<Option*> &OptionsMap) {
+  SmallVector<const char*, 16> OptionNames;
+  Option *CAOpt = 0;  // The ConsumeAfter option if it exists.
+  for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) {
+    // If this option wants to handle multiple option names, get the full set.
+    // This handles enum options like "-O1 -O2" etc.
+    O->getExtraOptionNames(OptionNames);
+    if (O->ArgStr[0])
+      OptionNames.push_back(O->ArgStr);
+
+    // Handle named options.
+    for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
+      // Add argument to the argument map!
+      if (OptionsMap.GetOrCreateValue(OptionNames[i], O).second != O) {
+        errs() << ProgramName << ": CommandLine Error: Argument '"
+             << OptionNames[i] << "' defined more than once!\n";
+      }
+    }
+
+    OptionNames.clear();
+
+    // Remember information about positional options.
+    if (O->getFormattingFlag() == cl::Positional)
+      PositionalOpts.push_back(O);
+    else if (O->getMiscFlags() & cl::Sink) // Remember sink options
+      SinkOpts.push_back(O);
+    else if (O->getNumOccurrencesFlag() == cl::ConsumeAfter) {
+      if (CAOpt)
+        O->error("Cannot specify more than one option with cl::ConsumeAfter!");
+      CAOpt = O;
+    }
+  }
+
+  if (CAOpt)
+    PositionalOpts.push_back(CAOpt);
+
+  // Make sure that they are in order of registration not backwards.
+  std::reverse(PositionalOpts.begin(), PositionalOpts.end());
+}
+
+
+/// LookupOption - Lookup the option specified by the specified option on the
+/// command line.  If there is a value specified (after an equal sign) return
+/// that as well.  This assumes that leading dashes have already been stripped.
+static Option *LookupOption(StringRef &Arg, StringRef &Value,
+                            const StringMap<Option*> &OptionsMap) {
+  // Reject all dashes.
+  if (Arg.empty()) return 0;
+
+  size_t EqualPos = Arg.find('=');
+
+  // If we have an equals sign, remember the value.
+  if (EqualPos == StringRef::npos) {
+    // Look up the option.
+    StringMap<Option*>::const_iterator I = OptionsMap.find(Arg);
+    return I != OptionsMap.end() ? I->second : 0;
+  }
+
+  // If the argument before the = is a valid option name, we match.  If not,
+  // return Arg unmolested.
+  StringMap<Option*>::const_iterator I =
+    OptionsMap.find(Arg.substr(0, EqualPos));
+  if (I == OptionsMap.end()) return 0;
+
+  Value = Arg.substr(EqualPos+1);
+  Arg = Arg.substr(0, EqualPos);
+  return I->second;
+}
+
+/// LookupNearestOption - Lookup the closest match to the option specified by
+/// the specified option on the command line.  If there is a value specified
+/// (after an equal sign) return that as well.  This assumes that leading dashes
+/// have already been stripped.
+static Option *LookupNearestOption(StringRef Arg,
+                                   const StringMap<Option*> &OptionsMap,
+                                   const char *&NearestString) {
+  // Reject all dashes.
+  if (Arg.empty()) return 0;
+
+  // Split on any equal sign.
+  StringRef LHS = Arg.split('=').first;
+
+  // Find the closest match.
+  Option *Best = 0;
+  unsigned BestDistance = 0;
+  for (StringMap<Option*>::const_iterator it = OptionsMap.begin(),
+         ie = OptionsMap.end(); it != ie; ++it) {
+    Option *O = it->second;
+    SmallVector<const char*, 16> OptionNames;
+    O->getExtraOptionNames(OptionNames);
+    if (O->ArgStr[0])
+      OptionNames.push_back(O->ArgStr);
+
+    for (size_t i = 0, e = OptionNames.size(); i != e; ++i) {
+      StringRef Name = OptionNames[i];
+      unsigned Distance = StringRef(Name).edit_distance(
+        Arg, /*AllowReplacements=*/true, /*MaxEditDistance=*/BestDistance);
+      if (!Best || Distance < BestDistance) {
+        Best = O;
+        NearestString = OptionNames[i];
+        BestDistance = Distance;
+      }
+    }
+  }
+
+  return Best;
+}
+
+/// CommaSeparateAndAddOccurence - A wrapper around Handler->addOccurence() that
+/// does special handling of cl::CommaSeparated options.
+static bool CommaSeparateAndAddOccurence(Option *Handler, unsigned pos,
+                                         StringRef ArgName,
+                                         StringRef Value, bool MultiArg = false)
+{
+  // Check to see if this option accepts a comma separated list of values.  If
+  // it does, we have to split up the value into multiple values.
+  if (Handler->getMiscFlags() & CommaSeparated) {
+    StringRef Val(Value);
+    StringRef::size_type Pos = Val.find(',');
+
+    while (Pos != StringRef::npos) {
+      // Process the portion before the comma.
+      if (Handler->addOccurrence(pos, ArgName, Val.substr(0, Pos), MultiArg))
+        return true;
+      // Erase the portion before the comma, AND the comma.
+      Val = Val.substr(Pos+1);
+      Value.substr(Pos+1);  // Increment the original value pointer as well.
+      // Check for another comma.
+      Pos = Val.find(',');
+    }
+
+    Value = Val;
+  }
+
+  if (Handler->addOccurrence(pos, ArgName, Value, MultiArg))
+    return true;
+
+  return false;
+}
+
+/// ProvideOption - For Value, this differentiates between an empty value ("")
+/// and a null value (StringRef()).  The later is accepted for arguments that
+/// don't allow a value (-foo) the former is rejected (-foo=).
+static inline bool ProvideOption(Option *Handler, StringRef ArgName,
+                                 StringRef Value, int argc, char **argv,
+                                 int &i) {
+  // Is this a multi-argument option?
+  unsigned NumAdditionalVals = Handler->getNumAdditionalVals();
+
+  // Enforce value requirements
+  switch (Handler->getValueExpectedFlag()) {
+  case ValueRequired:
+    if (Value.data() == 0) {       // No value specified?
+      if (i+1 >= argc)
+        return Handler->error("requires a value!");
+      // Steal the next argument, like for '-o filename'
+      Value = argv[++i];
+    }
+    break;
+  case ValueDisallowed:
+    if (NumAdditionalVals > 0)
+      return Handler->error("multi-valued option specified"
+                            " with ValueDisallowed modifier!");
+
+    if (Value.data())
+      return Handler->error("does not allow a value! '" +
+                            Twine(Value) + "' specified.");
+    break;
+  case ValueOptional:
+    break;
+
+  default:
+    errs() << ProgramName
+         << ": Bad ValueMask flag! CommandLine usage error:"
+         << Handler->getValueExpectedFlag() << "\n";
+    llvm_unreachable(0);
+  }
+
+  // If this isn't a multi-arg option, just run the handler.
+  if (NumAdditionalVals == 0)
+    return CommaSeparateAndAddOccurence(Handler, i, ArgName, Value);
+
+  // If it is, run the handle several times.
+  bool MultiArg = false;
+
+  if (Value.data()) {
+    if (CommaSeparateAndAddOccurence(Handler, i, ArgName, Value, MultiArg))
+      return true;
+    --NumAdditionalVals;
+    MultiArg = true;
+  }
+
+  while (NumAdditionalVals > 0) {
+    if (i+1 >= argc)
+      return Handler->error("not enough values!");
+    Value = argv[++i];
+
+    if (CommaSeparateAndAddOccurence(Handler, i, ArgName, Value, MultiArg))
+      return true;
+    MultiArg = true;
+    --NumAdditionalVals;
+  }
+  return false;
+}
+
+static bool ProvidePositionalOption(Option *Handler, StringRef Arg, int i) {
+  int Dummy = i;
+  return ProvideOption(Handler, Handler->ArgStr, Arg, 0, 0, Dummy);
+}
+
+
+// Option predicates...
+static inline bool isGrouping(const Option *O) {
+  return O->getFormattingFlag() == cl::Grouping;
+}
+static inline bool isPrefixedOrGrouping(const Option *O) {
+  return isGrouping(O) || O->getFormattingFlag() == cl::Prefix;
+}
+
+// getOptionPred - Check to see if there are any options that satisfy the
+// specified predicate with names that are the prefixes in Name.  This is
+// checked by progressively stripping characters off of the name, checking to
+// see if there options that satisfy the predicate.  If we find one, return it,
+// otherwise return null.
+//
+static Option *getOptionPred(StringRef Name, size_t &Length,
+                             bool (*Pred)(const Option*),
+                             const StringMap<Option*> &OptionsMap) {
+
+  StringMap<Option*>::const_iterator OMI = OptionsMap.find(Name);
+
+  // Loop while we haven't found an option and Name still has at least two
+  // characters in it (so that the next iteration will not be the empty
+  // string.
+  while (OMI == OptionsMap.end() && Name.size() > 1) {
+    Name = Name.substr(0, Name.size()-1);   // Chop off the last character.
+    OMI = OptionsMap.find(Name);
+  }
+
+  if (OMI != OptionsMap.end() && Pred(OMI->second)) {
+    Length = Name.size();
+    return OMI->second;    // Found one!
+  }
+  return 0;                // No option found!
+}
+
+/// HandlePrefixedOrGroupedOption - The specified argument string (which started
+/// with at least one '-') does not fully match an available option.  Check to
+/// see if this is a prefix or grouped option.  If so, split arg into output an
+/// Arg/Value pair and return the Option to parse it with.
+static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value,
+                                             bool &ErrorParsing,
+                                         const StringMap<Option*> &OptionsMap) {
+  if (Arg.size() == 1) return 0;
+
+  // Do the lookup!
+  size_t Length = 0;
+  Option *PGOpt = getOptionPred(Arg, Length, isPrefixedOrGrouping, OptionsMap);
+  if (PGOpt == 0) return 0;
+
+  // If the option is a prefixed option, then the value is simply the
+  // rest of the name...  so fall through to later processing, by
+  // setting up the argument name flags and value fields.
+  if (PGOpt->getFormattingFlag() == cl::Prefix) {
+    Value = Arg.substr(Length);
+    Arg = Arg.substr(0, Length);
+    assert(OptionsMap.count(Arg) && OptionsMap.find(Arg)->second == PGOpt);
+    return PGOpt;
+  }
+
+  // This must be a grouped option... handle them now.  Grouping options can't
+  // have values.
+  assert(isGrouping(PGOpt) && "Broken getOptionPred!");
+
+  do {
+    // Move current arg name out of Arg into OneArgName.
+    StringRef OneArgName = Arg.substr(0, Length);
+    Arg = Arg.substr(Length);
+
+    // Because ValueRequired is an invalid flag for grouped arguments,
+    // we don't need to pass argc/argv in.
+    assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired &&
+           "Option can not be cl::Grouping AND cl::ValueRequired!");
+    int Dummy = 0;
+    ErrorParsing |= ProvideOption(PGOpt, OneArgName,
+                                  StringRef(), 0, 0, Dummy);
+
+    // Get the next grouping option.
+    PGOpt = getOptionPred(Arg, Length, isGrouping, OptionsMap);
+  } while (PGOpt && Length != Arg.size());
+
+  // Return the last option with Arg cut down to just the last one.
+  return PGOpt;
+}
+
+
+
+static bool RequiresValue(const Option *O) {
+  return O->getNumOccurrencesFlag() == cl::Required ||
+         O->getNumOccurrencesFlag() == cl::OneOrMore;
+}
+
+static bool EatsUnboundedNumberOfValues(const Option *O) {
+  return O->getNumOccurrencesFlag() == cl::ZeroOrMore ||
+         O->getNumOccurrencesFlag() == cl::OneOrMore;
+}
+
+/// ParseCStringVector - Break INPUT up wherever one or more
+/// whitespace characters are found, and store the resulting tokens in
+/// OUTPUT. The tokens stored in OUTPUT are dynamically allocated
+/// using strdup(), so it is the caller's responsibility to free()
+/// them later.
+///
+static void ParseCStringVector(std::vector<char *> &OutputVector,
+                               const char *Input) {
+  // Characters which will be treated as token separators:
+  StringRef Delims = " \v\f\t\r\n";
+
+  StringRef WorkStr(Input);
+  while (!WorkStr.empty()) {
+    // If the first character is a delimiter, strip them off.
+    if (Delims.find(WorkStr[0]) != StringRef::npos) {
+      size_t Pos = WorkStr.find_first_not_of(Delims);
+      if (Pos == StringRef::npos) Pos = WorkStr.size();
+      WorkStr = WorkStr.substr(Pos);
+      continue;
+    }
+
+    // Find position of first delimiter.
+    size_t Pos = WorkStr.find_first_of(Delims);
+    if (Pos == StringRef::npos) Pos = WorkStr.size();
+
+    // Everything from 0 to Pos is the next word to copy.
+    char *NewStr = (char*)malloc(Pos+1);
+    memcpy(NewStr, WorkStr.data(), Pos);
+    NewStr[Pos] = 0;
+    OutputVector.push_back(NewStr);
+
+    WorkStr = WorkStr.substr(Pos);
+  }
+}
+
+/// ParseEnvironmentOptions - An alternative entry point to the
+/// CommandLine library, which allows you to read the program's name
+/// from the caller (as PROGNAME) and its command-line arguments from
+/// an environment variable (whose name is given in ENVVAR).
+///
+void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
+                                 const char *Overview, bool ReadResponseFiles) {
+  // Check args.
+  assert(progName && "Program name not specified");
+  assert(envVar && "Environment variable name missing");
+
+  // Get the environment variable they want us to parse options out of.
+  const char *envValue = getenv(envVar);
+  if (!envValue)
+    return;
+
+  // Get program's "name", which we wouldn't know without the caller
+  // telling us.
+  std::vector<char*> newArgv;
+  newArgv.push_back(strdup(progName));
+
+  // Parse the value of the environment variable into a "command line"
+  // and hand it off to ParseCommandLineOptions().
+  ParseCStringVector(newArgv, envValue);
+  int newArgc = static_cast<int>(newArgv.size());
+  ParseCommandLineOptions(newArgc, &newArgv[0], Overview, ReadResponseFiles);
+
+  // Free all the strdup()ed strings.
+  for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end();
+       i != e; ++i)
+    free(*i);
+}
+
+
+/// ExpandResponseFiles - Copy the contents of argv into newArgv,
+/// substituting the contents of the response files for the arguments
+/// of type @file.
+static void ExpandResponseFiles(unsigned argc, char** argv,
+                                std::vector<char*>& newArgv) {
+  for (unsigned i = 1; i != argc; ++i) {
+    char *arg = argv[i];
+
+    if (arg[0] == '@') {
+      sys::PathWithStatus respFile(++arg);
+
+      // Check that the response file is not empty (mmap'ing empty
+      // files can be problematic).
+      const sys::FileStatus *FileStat = respFile.getFileStatus();
+      if (FileStat && FileStat->getSize() != 0) {
+
+        // If we could open the file, parse its contents, otherwise
+        // pass the @file option verbatim.
+
+        // TODO: we should also support recursive loading of response files,
+        // since this is how gcc behaves. (From their man page: "The file may
+        // itself contain additional @file options; any such options will be
+        // processed recursively.")
+
+        // Mmap the response file into memory.
+        OwningPtr<MemoryBuffer> respFilePtr;
+        if (!MemoryBuffer::getFile(respFile.c_str(), respFilePtr)) {
+          ParseCStringVector(newArgv, respFilePtr->getBufferStart());
+          continue;
+        }
+      }
+    }
+    newArgv.push_back(strdup(arg));
+  }
+}
+
+void cl::ParseCommandLineOptions(int argc, char **argv,
+                                 const char *Overview, bool ReadResponseFiles) {
+  // Process all registered options.
+  SmallVector<Option*, 4> PositionalOpts;
+  SmallVector<Option*, 4> SinkOpts;
+  StringMap<Option*> Opts;
+  GetOptionInfo(PositionalOpts, SinkOpts, Opts);
+
+  assert((!Opts.empty() || !PositionalOpts.empty()) &&
+         "No options specified!");
+
+  // Expand response files.
+  std::vector<char*> newArgv;
+  if (ReadResponseFiles) {
+    newArgv.push_back(strdup(argv[0]));
+    ExpandResponseFiles(argc, argv, newArgv);
+    argv = &newArgv[0];
+    argc = static_cast<int>(newArgv.size());
+  }
+
+  // Copy the program name into ProgName, making sure not to overflow it.
+  std::string ProgName = sys::path::filename(argv[0]);
+  size_t Len = std::min(ProgName.size(), size_t(79));
+  memcpy(ProgramName, ProgName.data(), Len);
+  ProgramName[Len] = '\0';
+
+  ProgramOverview = Overview;
+  bool ErrorParsing = false;
+
+  // Check out the positional arguments to collect information about them.
+  unsigned NumPositionalRequired = 0;
+
+  // Determine whether or not there are an unlimited number of positionals
+  bool HasUnlimitedPositionals = false;
+
+  Option *ConsumeAfterOpt = 0;
+  if (!PositionalOpts.empty()) {
+    if (PositionalOpts[0]->getNumOccurrencesFlag() == cl::ConsumeAfter) {
+      assert(PositionalOpts.size() > 1 &&
+             "Cannot specify cl::ConsumeAfter without a positional argument!");
+      ConsumeAfterOpt = PositionalOpts[0];
+    }
+
+    // Calculate how many positional values are _required_.
+    bool UnboundedFound = false;
+    for (size_t i = ConsumeAfterOpt != 0, e = PositionalOpts.size();
+         i != e; ++i) {
+      Option *Opt = PositionalOpts[i];
+      if (RequiresValue(Opt))
+        ++NumPositionalRequired;
+      else if (ConsumeAfterOpt) {
+        // ConsumeAfter cannot be combined with "optional" positional options
+        // unless there is only one positional argument...
+        if (PositionalOpts.size() > 2)
+          ErrorParsing |=
+            Opt->error("error - this positional option will never be matched, "
+                       "because it does not Require a value, and a "
+                       "cl::ConsumeAfter option is active!");
+      } else if (UnboundedFound && !Opt->ArgStr[0]) {
+        // This option does not "require" a value...  Make sure this option is
+        // not specified after an option that eats all extra arguments, or this
+        // one will never get any!
+        //
+        ErrorParsing |= Opt->error("error - option can never match, because "
+                                   "another positional argument will match an "
+                                   "unbounded number of values, and this option"
+                                   " does not require a value!");
+      }
+      UnboundedFound |= EatsUnboundedNumberOfValues(Opt);
+    }
+    HasUnlimitedPositionals = UnboundedFound || ConsumeAfterOpt;
+  }
+
+  // PositionalVals - A vector of "positional" arguments we accumulate into
+  // the process at the end.
+  //
+  SmallVector<std::pair<StringRef,unsigned>, 4> PositionalVals;
+
+  // If the program has named positional arguments, and the name has been run
+  // across, keep track of which positional argument was named.  Otherwise put
+  // the positional args into the PositionalVals list...
+  Option *ActivePositionalArg = 0;
+
+  // Loop over all of the arguments... processing them.
+  bool DashDashFound = false;  // Have we read '--'?
+  for (int i = 1; i < argc; ++i) {
+    Option *Handler = 0;
+    Option *NearestHandler = 0;
+    const char *NearestHandlerString = 0;
+    StringRef Value;
+    StringRef ArgName = "";
+
+    // If the option list changed, this means that some command line
+    // option has just been registered or deregistered.  This can occur in
+    // response to things like -load, etc.  If this happens, rescan the options.
+    if (OptionListChanged) {
+      PositionalOpts.clear();
+      SinkOpts.clear();
+      Opts.clear();
+      GetOptionInfo(PositionalOpts, SinkOpts, Opts);
+      OptionListChanged = false;
+    }
+
+    // Check to see if this is a positional argument.  This argument is
+    // considered to be positional if it doesn't start with '-', if it is "-"
+    // itself, or if we have seen "--" already.
+    //
+    if (argv[i][0] != '-' || argv[i][1] == 0 || DashDashFound) {
+      // Positional argument!
+      if (ActivePositionalArg) {
+        ProvidePositionalOption(ActivePositionalArg, argv[i], i);
+        continue;  // We are done!
+      }
+
+      if (!PositionalOpts.empty()) {
+        PositionalVals.push_back(std::make_pair(argv[i],i));
+
+        // All of the positional arguments have been fulfulled, give the rest to
+        // the consume after option... if it's specified...
+        //
+        if (PositionalVals.size() >= NumPositionalRequired &&
+            ConsumeAfterOpt != 0) {
+          for (++i; i < argc; ++i)
+            PositionalVals.push_back(std::make_pair(argv[i],i));
+          break;   // Handle outside of the argument processing loop...
+        }
+
+        // Delay processing positional arguments until the end...
+        continue;
+      }
+    } else if (argv[i][0] == '-' && argv[i][1] == '-' && argv[i][2] == 0 &&
+               !DashDashFound) {
+      DashDashFound = true;  // This is the mythical "--"?
+      continue;              // Don't try to process it as an argument itself.
+    } else if (ActivePositionalArg &&
+               (ActivePositionalArg->getMiscFlags() & PositionalEatsArgs)) {
+      // If there is a positional argument eating options, check to see if this
+      // option is another positional argument.  If so, treat it as an argument,
+      // otherwise feed it to the eating positional.
+      ArgName = argv[i]+1;
+      // Eat leading dashes.
+      while (!ArgName.empty() && ArgName[0] == '-')
+        ArgName = ArgName.substr(1);
+
+      Handler = LookupOption(ArgName, Value, Opts);
+      if (!Handler || Handler->getFormattingFlag() != cl::Positional) {
+        ProvidePositionalOption(ActivePositionalArg, argv[i], i);
+        continue;  // We are done!
+      }
+
+    } else {     // We start with a '-', must be an argument.
+      ArgName = argv[i]+1;
+      // Eat leading dashes.
+      while (!ArgName.empty() && ArgName[0] == '-')
+        ArgName = ArgName.substr(1);
+
+      Handler = LookupOption(ArgName, Value, Opts);
+
+      // Check to see if this "option" is really a prefixed or grouped argument.
+      if (Handler == 0)
+        Handler = HandlePrefixedOrGroupedOption(ArgName, Value,
+                                                ErrorParsing, Opts);
+
+      // Otherwise, look for the closest available option to report to the user
+      // in the upcoming error.
+      if (Handler == 0 && SinkOpts.empty())
+        NearestHandler = LookupNearestOption(ArgName, Opts,
+                                             NearestHandlerString);
+    }
+
+    if (Handler == 0) {
+      if (SinkOpts.empty()) {
+        errs() << ProgramName << ": Unknown command line argument '"
+             << argv[i] << "'.  Try: '" << argv[0] << " -help'\n";
+
+        if (NearestHandler) {
+          // If we know a near match, report it as well.
+          errs() << ProgramName << ": Did you mean '-"
+                 << NearestHandlerString << "'?\n";
+        }
+
+        ErrorParsing = true;
+      } else {
+        for (SmallVectorImpl<Option*>::iterator I = SinkOpts.begin(),
+               E = SinkOpts.end(); I != E ; ++I)
+          (*I)->addOccurrence(i, "", argv[i]);
+      }
+      continue;
+    }
+
+    // If this is a named positional argument, just remember that it is the
+    // active one...
+    if (Handler->getFormattingFlag() == cl::Positional)
+      ActivePositionalArg = Handler;
+    else
+      ErrorParsing |= ProvideOption(Handler, ArgName, Value, argc, argv, i);
+  }
+
+  // Check and handle positional arguments now...
+  if (NumPositionalRequired > PositionalVals.size()) {
+    errs() << ProgramName
+         << ": Not enough positional command line arguments specified!\n"
+         << "Must specify at least " << NumPositionalRequired
+         << " positional arguments: See: " << argv[0] << " -help\n";
+
+    ErrorParsing = true;
+  } else if (!HasUnlimitedPositionals &&
+             PositionalVals.size() > PositionalOpts.size()) {
+    errs() << ProgramName
+         << ": Too many positional arguments specified!\n"
+         << "Can specify at most " << PositionalOpts.size()
+         << " positional arguments: See: " << argv[0] << " -help\n";
+    ErrorParsing = true;
+
+  } else if (ConsumeAfterOpt == 0) {
+    // Positional args have already been handled if ConsumeAfter is specified.
+    unsigned ValNo = 0, NumVals = static_cast<unsigned>(PositionalVals.size());
+    for (size_t i = 0, e = PositionalOpts.size(); i != e; ++i) {
+      if (RequiresValue(PositionalOpts[i])) {
+        ProvidePositionalOption(PositionalOpts[i], PositionalVals[ValNo].first,
+                                PositionalVals[ValNo].second);
+        ValNo++;
+        --NumPositionalRequired;  // We fulfilled our duty...
+      }
+
+      // If we _can_ give this option more arguments, do so now, as long as we
+      // do not give it values that others need.  'Done' controls whether the
+      // option even _WANTS_ any more.
+      //
+      bool Done = PositionalOpts[i]->getNumOccurrencesFlag() == cl::Required;
+      while (NumVals-ValNo > NumPositionalRequired && !Done) {
+        switch (PositionalOpts[i]->getNumOccurrencesFlag()) {
+        case cl::Optional:
+          Done = true;          // Optional arguments want _at most_ one value
+          // FALL THROUGH
+        case cl::ZeroOrMore:    // Zero or more will take all they can get...
+        case cl::OneOrMore:     // One or more will take all they can get...
+          ProvidePositionalOption(PositionalOpts[i],
+                                  PositionalVals[ValNo].first,
+                                  PositionalVals[ValNo].second);
+          ValNo++;
+          break;
+        default:
+          llvm_unreachable("Internal error, unexpected NumOccurrences flag in "
+                 "positional argument processing!");
+        }
+      }
+    }
+  } else {
+    assert(ConsumeAfterOpt && NumPositionalRequired <= PositionalVals.size());
+    unsigned ValNo = 0;
+    for (size_t j = 1, e = PositionalOpts.size(); j != e; ++j)
+      if (RequiresValue(PositionalOpts[j])) {
+        ErrorParsing |= ProvidePositionalOption(PositionalOpts[j],
+                                                PositionalVals[ValNo].first,
+                                                PositionalVals[ValNo].second);
+        ValNo++;
+      }
+
+    // Handle the case where there is just one positional option, and it's
+    // optional.  In this case, we want to give JUST THE FIRST option to the
+    // positional option and keep the rest for the consume after.  The above
+    // loop would have assigned no values to positional options in this case.
+    //
+    if (PositionalOpts.size() == 2 && ValNo == 0 && !PositionalVals.empty()) {
+      ErrorParsing |= ProvidePositionalOption(PositionalOpts[1],
+                                              PositionalVals[ValNo].first,
+                                              PositionalVals[ValNo].second);
+      ValNo++;
+    }
+
+    // Handle over all of the rest of the arguments to the
+    // cl::ConsumeAfter command line option...
+    for (; ValNo != PositionalVals.size(); ++ValNo)
+      ErrorParsing |= ProvidePositionalOption(ConsumeAfterOpt,
+                                              PositionalVals[ValNo].first,
+                                              PositionalVals[ValNo].second);
+  }
+
+  // Loop over args and make sure all required args are specified!
+  for (StringMap<Option*>::iterator I = Opts.begin(),
+         E = Opts.end(); I != E; ++I) {
+    switch (I->second->getNumOccurrencesFlag()) {
+    case Required:
+    case OneOrMore:
+      if (I->second->getNumOccurrences() == 0) {
+        I->second->error("must be specified at least once!");
+        ErrorParsing = true;
+      }
+      // Fall through
+    default:
+      break;
+    }
+  }
+
+  // Now that we know if -debug is specified, we can use it.
+  // Note that if ReadResponseFiles == true, this must be done before the
+  // memory allocated for the expanded command line is free()d below.
+  DEBUG(dbgs() << "Args: ";
+        for (int i = 0; i < argc; ++i)
+          dbgs() << argv[i] << ' ';
+        dbgs() << '\n';
+       );
+
+  // Free all of the memory allocated to the map.  Command line options may only
+  // be processed once!
+  Opts.clear();
+  PositionalOpts.clear();
+  MoreHelp->clear();
+
+  // Free the memory allocated by ExpandResponseFiles.
+  if (ReadResponseFiles) {
+    // Free all the strdup()ed strings.
+    for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end();
+         i != e; ++i)
+      free(*i);
+  }
+
+  // If we had an error processing our arguments, don't let the program execute
+  if (ErrorParsing) exit(1);
+}
+
+//===----------------------------------------------------------------------===//
+// Option Base class implementation
+//
+
+bool Option::error(const Twine &Message, StringRef ArgName) {
+  if (ArgName.data() == 0) ArgName = ArgStr;
+  if (ArgName.empty())
+    errs() << HelpStr;  // Be nice for positional arguments
+  else
+    errs() << ProgramName << ": for the -" << ArgName;
+
+  errs() << " option: " << Message << "\n";
+  return true;
+}
+
+bool Option::addOccurrence(unsigned pos, StringRef ArgName,
+                           StringRef Value, bool MultiArg) {
+  if (!MultiArg)
+    NumOccurrences++;   // Increment the number of times we have been seen
+
+  switch (getNumOccurrencesFlag()) {
+  case Optional:
+    if (NumOccurrences > 1)
+      return error("may only occur zero or one times!", ArgName);
+    break;
+  case Required:
+    if (NumOccurrences > 1)
+      return error("must occur exactly one time!", ArgName);
+    // Fall through
+  case OneOrMore:
+  case ZeroOrMore:
+  case ConsumeAfter: break;
+  default: return error("bad num occurrences flag value!");
+  }
+
+  return handleOccurrence(pos, ArgName, Value);
+}
+
+
+// getValueStr - Get the value description string, using "DefaultMsg" if nothing
+// has been specified yet.
+//
+static const char *getValueStr(const Option &O, const char *DefaultMsg) {
+  if (O.ValueStr[0] == 0) return DefaultMsg;
+  return O.ValueStr;
+}
+
+//===----------------------------------------------------------------------===//
+// cl::alias class implementation
+//
+
+// Return the width of the option tag for printing...
+size_t alias::getOptionWidth() const {
+  return std::strlen(ArgStr)+6;
+}
+
+// Print out the option for the alias.
+void alias::printOptionInfo(size_t GlobalWidth) const {
+  size_t L = std::strlen(ArgStr);
+  errs() << "  -" << ArgStr;
+  errs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n";
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Parser Implementation code...
+//
+
+// basic_parser implementation
+//
+
+// Return the width of the option tag for printing...
+size_t basic_parser_impl::getOptionWidth(const Option &O) const {
+  size_t Len = std::strlen(O.ArgStr);
+  if (const char *ValName = getValueName())
+    Len += std::strlen(getValueStr(O, ValName))+3;
+
+  return Len + 6;
+}
+
+// printOptionInfo - Print out information about this option.  The
+// to-be-maintained width is specified.
+//
+void basic_parser_impl::printOptionInfo(const Option &O,
+                                        size_t GlobalWidth) const {
+  outs() << "  -" << O.ArgStr;
+
+  if (const char *ValName = getValueName())
+    outs() << "=<" << getValueStr(O, ValName) << '>';
+
+  outs().indent(GlobalWidth-getOptionWidth(O)) << " - " << O.HelpStr << '\n';
+}
+
+
+
+
+// parser<bool> implementation
+//
+bool parser<bool>::parse(Option &O, StringRef ArgName,
+                         StringRef Arg, bool &Value) {
+  if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" ||
+      Arg == "1") {
+    Value = true;
+    return false;
+  }
+
+  if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") {
+    Value = false;
+    return false;
+  }
+  return O.error("'" + Arg +
+                 "' is invalid value for boolean argument! Try 0 or 1");
+}
+
+// parser<boolOrDefault> implementation
+//
+bool parser<boolOrDefault>::parse(Option &O, StringRef ArgName,
+                                  StringRef Arg, boolOrDefault &Value) {
+  if (Arg == "" || Arg == "true" || Arg == "TRUE" || Arg == "True" ||
+      Arg == "1") {
+    Value = BOU_TRUE;
+    return false;
+  }
+  if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") {
+    Value = BOU_FALSE;
+    return false;
+  }
+
+  return O.error("'" + Arg +
+                 "' is invalid value for boolean argument! Try 0 or 1");
+}
+
+// parser<int> implementation
+//
+bool parser<int>::parse(Option &O, StringRef ArgName,
+                        StringRef Arg, int &Value) {
+  if (Arg.getAsInteger(0, Value))
+    return O.error("'" + Arg + "' value invalid for integer argument!");
+  return false;
+}
+
+// parser<unsigned> implementation
+//
+bool parser<unsigned>::parse(Option &O, StringRef ArgName,
+                             StringRef Arg, unsigned &Value) {
+
+  if (Arg.getAsInteger(0, Value))
+    return O.error("'" + Arg + "' value invalid for uint argument!");
+  return false;
+}
+
+// parser<double>/parser<float> implementation
+//
+static bool parseDouble(Option &O, StringRef Arg, double &Value) {
+  SmallString<32> TmpStr(Arg.begin(), Arg.end());
+  const char *ArgStart = TmpStr.c_str();
+  char *End;
+  Value = strtod(ArgStart, &End);
+  if (*End != 0)
+    return O.error("'" + Arg + "' value invalid for floating point argument!");
+  return false;
+}
+
+bool parser<double>::parse(Option &O, StringRef ArgName,
+                           StringRef Arg, double &Val) {
+  return parseDouble(O, Arg, Val);
+}
+
+bool parser<float>::parse(Option &O, StringRef ArgName,
+                          StringRef Arg, float &Val) {
+  double dVal;
+  if (parseDouble(O, Arg, dVal))
+    return true;
+  Val = (float)dVal;
+  return false;
+}
+
+
+
+// generic_parser_base implementation
+//
+
+// findOption - Return the option number corresponding to the specified
+// argument string.  If the option is not found, getNumOptions() is returned.
+//
+unsigned generic_parser_base::findOption(const char *Name) {
+  unsigned e = getNumOptions();
+
+  for (unsigned i = 0; i != e; ++i) {
+    if (strcmp(getOption(i), Name) == 0)
+      return i;
+  }
+  return e;
+}
+
+
+// Return the width of the option tag for printing...
+size_t generic_parser_base::getOptionWidth(const Option &O) const {
+  if (O.hasArgStr()) {
+    size_t Size = std::strlen(O.ArgStr)+6;
+    for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
+      Size = std::max(Size, std::strlen(getOption(i))+8);
+    return Size;
+  } else {
+    size_t BaseSize = 0;
+    for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
+      BaseSize = std::max(BaseSize, std::strlen(getOption(i))+8);
+    return BaseSize;
+  }
+}
+
+// printOptionInfo - Print out information about this option.  The
+// to-be-maintained width is specified.
+//
+void generic_parser_base::printOptionInfo(const Option &O,
+                                          size_t GlobalWidth) const {
+  if (O.hasArgStr()) {
+    size_t L = std::strlen(O.ArgStr);
+    outs() << "  -" << O.ArgStr;
+    outs().indent(GlobalWidth-L-6) << " - " << O.HelpStr << '\n';
+
+    for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+      size_t NumSpaces = GlobalWidth-strlen(getOption(i))-8;
+      outs() << "    =" << getOption(i);
+      outs().indent(NumSpaces) << " -   " << getDescription(i) << '\n';
+    }
+  } else {
+    if (O.HelpStr[0])
+      outs() << "  " << O.HelpStr << '\n';
+    for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
+      size_t L = std::strlen(getOption(i));
+      outs() << "    -" << getOption(i);
+      outs().indent(GlobalWidth-L-8) << " - " << getDescription(i) << '\n';
+    }
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+// -help and -help-hidden option implementation
+//
+
+static int OptNameCompare(const void *LHS, const void *RHS) {
+  typedef std::pair<const char *, Option*> pair_ty;
+
+  return strcmp(((pair_ty*)LHS)->first, ((pair_ty*)RHS)->first);
+}
+
+namespace {
+
+class HelpPrinter {
+  size_t MaxArgLen;
+  const Option *EmptyArg;
+  const bool ShowHidden;
+
+public:
+  explicit HelpPrinter(bool showHidden) : ShowHidden(showHidden) {
+    EmptyArg = 0;
+  }
+
+  void operator=(bool Value) {
+    if (Value == false) return;
+
+    // Get all the options.
+    SmallVector<Option*, 4> PositionalOpts;
+    SmallVector<Option*, 4> SinkOpts;
+    StringMap<Option*> OptMap;
+    GetOptionInfo(PositionalOpts, SinkOpts, OptMap);
+
+    // Copy Options into a vector so we can sort them as we like.
+    SmallVector<std::pair<const char *, Option*>, 128> Opts;
+    SmallPtrSet<Option*, 128> OptionSet;  // Duplicate option detection.
+
+    for (StringMap<Option*>::iterator I = OptMap.begin(), E = OptMap.end();
+         I != E; ++I) {
+      // Ignore really-hidden options.
+      if (I->second->getOptionHiddenFlag() == ReallyHidden)
+        continue;
+
+      // Unless showhidden is set, ignore hidden flags.
+      if (I->second->getOptionHiddenFlag() == Hidden && !ShowHidden)
+        continue;
+
+      // If we've already seen this option, don't add it to the list again.
+      if (!OptionSet.insert(I->second))
+        continue;
+
+      Opts.push_back(std::pair<const char *, Option*>(I->getKey().data(),
+                                                      I->second));
+    }
+
+    // Sort the options list alphabetically.
+    qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare);
+
+    if (ProgramOverview)
+      outs() << "OVERVIEW: " << ProgramOverview << "\n";
+
+    outs() << "USAGE: " << ProgramName << " [options]";
+
+    // Print out the positional options.
+    Option *CAOpt = 0;   // The cl::ConsumeAfter option, if it exists...
+    if (!PositionalOpts.empty() &&
+        PositionalOpts[0]->getNumOccurrencesFlag() == ConsumeAfter)
+      CAOpt = PositionalOpts[0];
+
+    for (size_t i = CAOpt != 0, e = PositionalOpts.size(); i != e; ++i) {
+      if (PositionalOpts[i]->ArgStr[0])
+        outs() << " --" << PositionalOpts[i]->ArgStr;
+      outs() << " " << PositionalOpts[i]->HelpStr;
+    }
+
+    // Print the consume after option info if it exists...
+    if (CAOpt) outs() << " " << CAOpt->HelpStr;
+
+    outs() << "\n\n";
+
+    // Compute the maximum argument length...
+    MaxArgLen = 0;
+    for (size_t i = 0, e = Opts.size(); i != e; ++i)
+      MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth());
+
+    outs() << "OPTIONS:\n";
+    for (size_t i = 0, e = Opts.size(); i != e; ++i)
+      Opts[i].second->printOptionInfo(MaxArgLen);
+
+    // Print any extra help the user has declared.
+    for (std::vector<const char *>::iterator I = MoreHelp->begin(),
+          E = MoreHelp->end(); I != E; ++I)
+      outs() << *I;
+    MoreHelp->clear();
+
+    // Halt the program since help information was printed
+    exit(1);
+  }
+};
+} // End anonymous namespace
+
+// Define the two HelpPrinter instances that are used to print out help, or
+// help-hidden...
+//
+static HelpPrinter NormalPrinter(false);
+static HelpPrinter HiddenPrinter(true);
+
+static cl::opt<HelpPrinter, true, parser<bool> >
+HOp("help", cl::desc("Display available options (-help-hidden for more)"),
+    cl::location(NormalPrinter), cl::ValueDisallowed);
+
+static cl::opt<HelpPrinter, true, parser<bool> >
+HHOp("help-hidden", cl::desc("Display all available options"),
+     cl::location(HiddenPrinter), cl::Hidden, cl::ValueDisallowed);
+
+static void (*OverrideVersionPrinter)() = 0;
+
+static int TargetArraySortFn(const void *LHS, const void *RHS) {
+  typedef std::pair<const char *, const Target*> pair_ty;
+  return strcmp(((const pair_ty*)LHS)->first, ((const pair_ty*)RHS)->first);
+}
+
+namespace {
+class VersionPrinter {
+public:
+  void print() {
+    raw_ostream &OS = outs();
+    OS << "Low Level Virtual Machine (http://llvm.org/):\n"
+       << "  " << PACKAGE_NAME << " version " << PACKAGE_VERSION;
+#ifdef LLVM_VERSION_INFO
+    OS << LLVM_VERSION_INFO;
+#endif
+    OS << "\n  ";
+#ifndef __OPTIMIZE__
+    OS << "DEBUG build";
+#else
+    OS << "Optimized build";
+#endif
+#ifndef NDEBUG
+    OS << " with assertions";
+#endif
+    std::string CPU = sys::getHostCPUName();
+    if (CPU == "generic") CPU = "(unknown)";
+    OS << ".\n"
+#if (ENABLE_TIMESTAMPS == 1)
+       << "  Built " << __DATE__ << " (" << __TIME__ << ").\n"
+#endif
+       << "  Host: " << sys::getHostTriple() << '\n'
+       << "  Host CPU: " << CPU << '\n'
+       << '\n'
+       << "  Registered Targets:\n";
+
+    std::vector<std::pair<const char *, const Target*> > Targets;
+    size_t Width = 0;
+    for (TargetRegistry::iterator it = TargetRegistry::begin(),
+           ie = TargetRegistry::end(); it != ie; ++it) {
+      Targets.push_back(std::make_pair(it->getName(), &*it));
+      Width = std::max(Width, strlen(Targets.back().first));
+    }
+    if (!Targets.empty())
+      qsort(&Targets[0], Targets.size(), sizeof(Targets[0]),
+            TargetArraySortFn);
+
+    for (unsigned i = 0, e = Targets.size(); i != e; ++i) {
+      OS << "    " << Targets[i].first;
+      OS.indent(Width - strlen(Targets[i].first)) << " - "
+             << Targets[i].second->getShortDescription() << '\n';
+    }
+    if (Targets.empty())
+      OS << "    (none)\n";
+  }
+  void operator=(bool OptionWasSpecified) {
+    if (!OptionWasSpecified) return;
+
+    if (OverrideVersionPrinter == 0) {
+      print();
+      exit(1);
+    }
+    (*OverrideVersionPrinter)();
+    exit(1);
+  }
+};
+} // End anonymous namespace
+
+
+// Define the --version option that prints out the LLVM version for the tool
+static VersionPrinter VersionPrinterInstance;
+
+static cl::opt<VersionPrinter, true, parser<bool> >
+VersOp("version", cl::desc("Display the version of this program"),
+    cl::location(VersionPrinterInstance), cl::ValueDisallowed);
+
+// Utility function for printing the help message.
+void cl::PrintHelpMessage() {
+  // This looks weird, but it actually prints the help message. The
+  // NormalPrinter variable is a HelpPrinter and the help gets printed when
+  // its operator= is invoked. That's because the "normal" usages of the
+  // help printer is to be assigned true/false depending on whether the
+  // -help option was given or not. Since we're circumventing that we have
+  // to make it look like -help was given, so we assign true.
+  NormalPrinter = true;
+}
+
+/// Utility function for printing version number.
+void cl::PrintVersionMessage() {
+  VersionPrinterInstance.print();
+}
+
+void cl::SetVersionPrinter(void (*func)()) {
+  OverrideVersionPrinter = func;
+}
diff --git a/final/lib/Support/ConstantRange.cpp b/final/lib/Support/ConstantRange.cpp
new file mode 100644
index 00000000000..493f7083dbb
--- /dev/null
+++ b/final/lib/Support/ConstantRange.cpp
@@ -0,0 +1,702 @@
+//===-- ConstantRange.cpp - ConstantRange implementation ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Represent a range of possible values that may occur when the program is run
+// for an integral value.  This keeps track of a lower and upper bound for the
+// constant, which MAY wrap around the end of the numeric range.  To do this, it
+// keeps track of a [lower, upper) bound, which specifies an interval just like
+// STL iterators.  When used with boolean values, the following are important
+// ranges (other integral ranges use min/max values for special range values):
+//
+//  [F, F) = {}     = Empty set
+//  [T, F) = {T}
+//  [F, T) = {F}
+//  [T, T) = {F, T} = Full set
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Instructions.h"
+using namespace llvm;
+
+/// Initialize a full (the default) or empty set for the specified type.
+///
+ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) {
+  if (Full)
+    Lower = Upper = APInt::getMaxValue(BitWidth);
+  else
+    Lower = Upper = APInt::getMinValue(BitWidth);
+}
+
+/// Initialize a range to hold the single specified value.
+///
+ConstantRange::ConstantRange(const APInt &V) : Lower(V), Upper(V + 1) {}
+
+ConstantRange::ConstantRange(const APInt &L, const APInt &U) :
+  Lower(L), Upper(U) {
+  assert(L.getBitWidth() == U.getBitWidth() &&
+         "ConstantRange with unequal bit widths");
+  assert((L != U || (L.isMaxValue() || L.isMinValue())) &&
+         "Lower == Upper, but they aren't min or max value!");
+}
+
+ConstantRange ConstantRange::makeICmpRegion(unsigned Pred,
+                                            const ConstantRange &CR) {
+  if (CR.isEmptySet())
+    return CR;
+
+  uint32_t W = CR.getBitWidth();
+  switch (Pred) {
+    default: assert(!"Invalid ICmp predicate to makeICmpRegion()");
+    case ICmpInst::ICMP_EQ:
+      return CR;
+    case ICmpInst::ICMP_NE:
+      if (CR.isSingleElement())
+        return ConstantRange(CR.getUpper(), CR.getLower());
+      return ConstantRange(W);
+    case ICmpInst::ICMP_ULT: {
+      APInt UMax(CR.getUnsignedMax());
+      if (UMax.isMinValue())
+        return ConstantRange(W, /* empty */ false);
+      return ConstantRange(APInt::getMinValue(W), UMax);
+    }
+    case ICmpInst::ICMP_SLT: {
+      APInt SMax(CR.getSignedMax());
+      if (SMax.isMinSignedValue())
+        return ConstantRange(W, /* empty */ false);
+      return ConstantRange(APInt::getSignedMinValue(W), SMax);
+    }
+    case ICmpInst::ICMP_ULE: {
+      APInt UMax(CR.getUnsignedMax());
+      if (UMax.isMaxValue())
+        return ConstantRange(W);
+      return ConstantRange(APInt::getMinValue(W), UMax + 1);
+    }
+    case ICmpInst::ICMP_SLE: {
+      APInt SMax(CR.getSignedMax());
+      if (SMax.isMaxSignedValue())
+        return ConstantRange(W);
+      return ConstantRange(APInt::getSignedMinValue(W), SMax + 1);
+    }
+    case ICmpInst::ICMP_UGT: {
+      APInt UMin(CR.getUnsignedMin());
+      if (UMin.isMaxValue())
+        return ConstantRange(W, /* empty */ false);
+      return ConstantRange(UMin + 1, APInt::getNullValue(W));
+    }
+    case ICmpInst::ICMP_SGT: {
+      APInt SMin(CR.getSignedMin());
+      if (SMin.isMaxSignedValue())
+        return ConstantRange(W, /* empty */ false);
+      return ConstantRange(SMin + 1, APInt::getSignedMinValue(W));
+    }
+    case ICmpInst::ICMP_UGE: {
+      APInt UMin(CR.getUnsignedMin());
+      if (UMin.isMinValue())
+        return ConstantRange(W);
+      return ConstantRange(UMin, APInt::getNullValue(W));
+    }
+    case ICmpInst::ICMP_SGE: {
+      APInt SMin(CR.getSignedMin());
+      if (SMin.isMinSignedValue())
+        return ConstantRange(W);
+      return ConstantRange(SMin, APInt::getSignedMinValue(W));
+    }
+  }
+}
+
+/// isFullSet - Return true if this set contains all of the elements possible
+/// for this data-type
+bool ConstantRange::isFullSet() const {
+  return Lower == Upper && Lower.isMaxValue();
+}
+
+/// isEmptySet - Return true if this set contains no members.
+///
+bool ConstantRange::isEmptySet() const {
+  return Lower == Upper && Lower.isMinValue();
+}
+
+/// isWrappedSet - Return true if this set wraps around the top of the range,
+/// for example: [100, 8)
+///
+bool ConstantRange::isWrappedSet() const {
+  return Lower.ugt(Upper);
+}
+
+/// isSignWrappedSet - Return true if this set wraps around the INT_MIN of
+/// its bitwidth, for example: i8 [120, 140).
+///
+bool ConstantRange::isSignWrappedSet() const {
+  return contains(APInt::getSignedMaxValue(getBitWidth())) &&
+         contains(APInt::getSignedMinValue(getBitWidth()));
+}
+
+/// getSetSize - Return the number of elements in this set.
+///
+APInt ConstantRange::getSetSize() const {
+  if (isEmptySet()) 
+    return APInt(getBitWidth(), 0);
+  if (getBitWidth() == 1) {
+    if (Lower != Upper)  // One of T or F in the set...
+      return APInt(2, 1);
+    return APInt(2, 2);      // Must be full set...
+  }
+
+  // Simply subtract the bounds...
+  return Upper - Lower;
+}
+
+/// getUnsignedMax - Return the largest unsigned value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getUnsignedMax() const {
+  if (isFullSet() || isWrappedSet())
+    return APInt::getMaxValue(getBitWidth());
+  else
+    return getUpper() - 1;
+}
+
+/// getUnsignedMin - Return the smallest unsigned value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getUnsignedMin() const {
+  if (isFullSet() || (isWrappedSet() && getUpper() != 0))
+    return APInt::getMinValue(getBitWidth());
+  else
+    return getLower();
+}
+
+/// getSignedMax - Return the largest signed value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getSignedMax() const {
+  APInt SignedMax(APInt::getSignedMaxValue(getBitWidth()));
+  if (!isWrappedSet()) {
+    if (getLower().sle(getUpper() - 1))
+      return getUpper() - 1;
+    else
+      return SignedMax;
+  } else {
+    if (getLower().isNegative() == getUpper().isNegative())
+      return SignedMax;
+    else
+      return getUpper() - 1;
+  }
+}
+
+/// getSignedMin - Return the smallest signed value contained in the
+/// ConstantRange.
+///
+APInt ConstantRange::getSignedMin() const {
+  APInt SignedMin(APInt::getSignedMinValue(getBitWidth()));
+  if (!isWrappedSet()) {
+    if (getLower().sle(getUpper() - 1))
+      return getLower();
+    else
+      return SignedMin;
+  } else {
+    if ((getUpper() - 1).slt(getLower())) {
+      if (getUpper() != SignedMin)
+        return SignedMin;
+      else
+        return getLower();
+    } else {
+      return getLower();
+    }
+  }
+}
+
+/// contains - Return true if the specified value is in the set.
+///
+bool ConstantRange::contains(const APInt &V) const {
+  if (Lower == Upper)
+    return isFullSet();
+
+  if (!isWrappedSet())
+    return Lower.ule(V) && V.ult(Upper);
+  else
+    return Lower.ule(V) || V.ult(Upper);
+}
+
+/// contains - Return true if the argument is a subset of this range.
+/// Two equal sets contain each other. The empty set contained by all other
+/// sets.
+///
+bool ConstantRange::contains(const ConstantRange &Other) const {
+  if (isFullSet() || Other.isEmptySet()) return true;
+  if (isEmptySet() || Other.isFullSet()) return false;
+
+  if (!isWrappedSet()) {
+    if (Other.isWrappedSet())
+      return false;
+
+    return Lower.ule(Other.getLower()) && Other.getUpper().ule(Upper);
+  }
+
+  if (!Other.isWrappedSet())
+    return Other.getUpper().ule(Upper) ||
+           Lower.ule(Other.getLower());
+
+  return Other.getUpper().ule(Upper) && Lower.ule(Other.getLower());
+}
+
+/// subtract - Subtract the specified constant from the endpoints of this
+/// constant range.
+ConstantRange ConstantRange::subtract(const APInt &Val) const {
+  assert(Val.getBitWidth() == getBitWidth() && "Wrong bit width");
+  // If the set is empty or full, don't modify the endpoints.
+  if (Lower == Upper) 
+    return *this;
+  return ConstantRange(Lower - Val, Upper - Val);
+}
+
+/// intersectWith - Return the range that results from the intersection of this
+/// range with another range.  The resultant range is guaranteed to include all
+/// elements contained in both input ranges, and to have the smallest possible
+/// set size that does so.  Because there may be two intersections with the
+/// same set size, A.intersectWith(B) might not be equal to B.intersectWith(A).
+ConstantRange ConstantRange::intersectWith(const ConstantRange &CR) const {
+  assert(getBitWidth() == CR.getBitWidth() && 
+         "ConstantRange types don't agree!");
+
+  // Handle common cases.
+  if (   isEmptySet() || CR.isFullSet()) return *this;
+  if (CR.isEmptySet() ||    isFullSet()) return CR;
+
+  if (!isWrappedSet() && CR.isWrappedSet())
+    return CR.intersectWith(*this);
+
+  if (!isWrappedSet() && !CR.isWrappedSet()) {
+    if (Lower.ult(CR.Lower)) {
+      if (Upper.ule(CR.Lower))
+        return ConstantRange(getBitWidth(), false);
+
+      if (Upper.ult(CR.Upper))
+        return ConstantRange(CR.Lower, Upper);
+
+      return CR;
+    } else {
+      if (Upper.ult(CR.Upper))
+        return *this;
+
+      if (Lower.ult(CR.Upper))
+        return ConstantRange(Lower, CR.Upper);
+
+      return ConstantRange(getBitWidth(), false);
+    }
+  }
+
+  if (isWrappedSet() && !CR.isWrappedSet()) {
+    if (CR.Lower.ult(Upper)) {
+      if (CR.Upper.ult(Upper))
+        return CR;
+
+      if (CR.Upper.ult(Lower))
+        return ConstantRange(CR.Lower, Upper);
+
+      if (getSetSize().ult(CR.getSetSize()))
+        return *this;
+      else
+        return CR;
+    } else if (CR.Lower.ult(Lower)) {
+      if (CR.Upper.ule(Lower))
+        return ConstantRange(getBitWidth(), false);
+
+      return ConstantRange(Lower, CR.Upper);
+    }
+    return CR;
+  }
+
+  if (CR.Upper.ult(Upper)) {
+    if (CR.Lower.ult(Upper)) {
+      if (getSetSize().ult(CR.getSetSize()))
+        return *this;
+      else
+        return CR;
+    }
+
+    if (CR.Lower.ult(Lower))
+      return ConstantRange(Lower, CR.Upper);
+
+    return CR;
+  } else if (CR.Upper.ult(Lower)) {
+    if (CR.Lower.ult(Lower))
+      return *this;
+
+    return ConstantRange(CR.Lower, Upper);
+  }
+  if (getSetSize().ult(CR.getSetSize()))
+    return *this;
+  else
+    return CR;
+}
+
+
+/// unionWith - Return the range that results from the union of this range with
+/// another range.  The resultant range is guaranteed to include the elements of
+/// both sets, but may contain more.  For example, [3, 9) union [12,15) is
+/// [3, 15), which includes 9, 10, and 11, which were not included in either
+/// set before.
+///
+ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
+  assert(getBitWidth() == CR.getBitWidth() && 
+         "ConstantRange types don't agree!");
+
+  if (   isFullSet() || CR.isEmptySet()) return *this;
+  if (CR.isFullSet() ||    isEmptySet()) return CR;
+
+  if (!isWrappedSet() && CR.isWrappedSet()) return CR.unionWith(*this);
+
+  if (!isWrappedSet() && !CR.isWrappedSet()) {
+    if (CR.Upper.ult(Lower) || Upper.ult(CR.Lower)) {
+      // If the two ranges are disjoint, find the smaller gap and bridge it.
+      APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
+      if (d1.ult(d2))
+        return ConstantRange(Lower, CR.Upper);
+      else
+        return ConstantRange(CR.Lower, Upper);
+    }
+
+    APInt L = Lower, U = Upper;
+    if (CR.Lower.ult(L))
+      L = CR.Lower;
+    if ((CR.Upper - 1).ugt(U - 1))
+      U = CR.Upper;
+
+    if (L == 0 && U == 0)
+      return ConstantRange(getBitWidth());
+
+    return ConstantRange(L, U);
+  }
+
+  if (!CR.isWrappedSet()) {
+    // ------U   L-----  and  ------U   L----- : this
+    //   L--U                            L--U  : CR
+    if (CR.Upper.ule(Upper) || CR.Lower.uge(Lower))
+      return *this;
+
+    // ------U   L----- : this
+    //    L---------U   : CR
+    if (CR.Lower.ule(Upper) && Lower.ule(CR.Upper))
+      return ConstantRange(getBitWidth());
+
+    // ----U       L---- : this
+    //       L---U       : CR
+    //    <d1>  <d2>
+    if (Upper.ule(CR.Lower) && CR.Upper.ule(Lower)) {
+      APInt d1 = CR.Lower - Upper, d2 = Lower - CR.Upper;
+      if (d1.ult(d2))
+        return ConstantRange(Lower, CR.Upper);
+      else
+        return ConstantRange(CR.Lower, Upper);
+    }
+
+    // ----U     L----- : this
+    //        L----U    : CR
+    if (Upper.ult(CR.Lower) && Lower.ult(CR.Upper))
+      return ConstantRange(CR.Lower, Upper);
+
+    // ------U    L---- : this
+    //    L-----U       : CR
+    if (CR.Lower.ult(Upper) && CR.Upper.ult(Lower))
+      return ConstantRange(Lower, CR.Upper);
+  }
+
+  assert(isWrappedSet() && CR.isWrappedSet() &&
+         "ConstantRange::unionWith missed wrapped union unwrapped case");
+
+  // ------U    L----  and  ------U    L---- : this
+  // -U  L-----------  and  ------------U  L : CR
+  if (CR.Lower.ule(Upper) || Lower.ule(CR.Upper))
+    return ConstantRange(getBitWidth());
+
+  APInt L = Lower, U = Upper;
+  if (CR.Upper.ugt(U))
+    U = CR.Upper;
+  if (CR.Lower.ult(L))
+    L = CR.Lower;
+
+  return ConstantRange(L, U);
+}
+
+/// zeroExtend - Return a new range in the specified integer type, which must
+/// be strictly larger than the current type.  The returned range will
+/// correspond to the possible range of values as if the source range had been
+/// zero extended.
+ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const {
+  if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
+
+  unsigned SrcTySize = getBitWidth();
+  assert(SrcTySize < DstTySize && "Not a value extension");
+  if (isFullSet() || isWrappedSet())
+    // Change into [0, 1 << src bit width)
+    return ConstantRange(APInt(DstTySize,0), APInt(DstTySize,1).shl(SrcTySize));
+
+  return ConstantRange(Lower.zext(DstTySize), Upper.zext(DstTySize));
+}
+
+/// signExtend - Return a new range in the specified integer type, which must
+/// be strictly larger than the current type.  The returned range will
+/// correspond to the possible range of values as if the source range had been
+/// sign extended.
+ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const {
+  if (isEmptySet()) return ConstantRange(DstTySize, /*isFullSet=*/false);
+
+  unsigned SrcTySize = getBitWidth();
+  assert(SrcTySize < DstTySize && "Not a value extension");
+  if (isFullSet() || isSignWrappedSet()) {
+    return ConstantRange(APInt::getHighBitsSet(DstTySize,DstTySize-SrcTySize+1),
+                         APInt::getLowBitsSet(DstTySize, SrcTySize-1) + 1);
+  }
+
+  return ConstantRange(Lower.sext(DstTySize), Upper.sext(DstTySize));
+}
+
+/// truncate - Return a new range in the specified integer type, which must be
+/// strictly smaller than the current type.  The returned range will
+/// correspond to the possible range of values as if the source range had been
+/// truncated to the specified type.
+ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
+  unsigned SrcTySize = getBitWidth();
+  assert(SrcTySize > DstTySize && "Not a value truncation");
+  APInt Size(APInt::getLowBitsSet(SrcTySize, DstTySize));
+  if (isFullSet() || getSetSize().ugt(Size))
+    return ConstantRange(DstTySize, /*isFullSet=*/true);
+
+  return ConstantRange(Lower.trunc(DstTySize), Upper.trunc(DstTySize));
+}
+
+/// zextOrTrunc - make this range have the bit width given by \p DstTySize. The
+/// value is zero extended, truncated, or left alone to make it that width.
+ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const {
+  unsigned SrcTySize = getBitWidth();
+  if (SrcTySize > DstTySize)
+    return truncate(DstTySize);
+  else if (SrcTySize < DstTySize)
+    return zeroExtend(DstTySize);
+  else
+    return *this;
+}
+
+/// sextOrTrunc - make this range have the bit width given by \p DstTySize. The
+/// value is sign extended, truncated, or left alone to make it that width.
+ConstantRange ConstantRange::sextOrTrunc(uint32_t DstTySize) const {
+  unsigned SrcTySize = getBitWidth();
+  if (SrcTySize > DstTySize)
+    return truncate(DstTySize);
+  else if (SrcTySize < DstTySize)
+    return signExtend(DstTySize);
+  else
+    return *this;
+}
+
+ConstantRange
+ConstantRange::add(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  if (isFullSet() || Other.isFullSet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize();
+  APInt NewLower = getLower() + Other.getLower();
+  APInt NewUpper = getUpper() + Other.getUpper() - 1;
+  if (NewLower == NewUpper)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  ConstantRange X = ConstantRange(NewLower, NewUpper);
+  if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y))
+    // We've wrapped, therefore, full set.
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  return X;
+}
+
+ConstantRange
+ConstantRange::sub(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  if (isFullSet() || Other.isFullSet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize();
+  APInt NewLower = getLower() - Other.getLower();
+  APInt NewUpper = getUpper() - Other.getUpper() + 1;
+  if (NewLower == NewUpper)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  ConstantRange X = ConstantRange(NewLower, NewUpper);
+  if (X.getSetSize().ult(Spread_X) || X.getSetSize().ult(Spread_Y))
+    // We've wrapped, therefore, full set.
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  return X;
+}
+
+ConstantRange
+ConstantRange::multiply(const ConstantRange &Other) const {
+  // TODO: If either operand is a single element and the multiply is known to
+  // be non-wrapping, round the result min and max value to the appropriate
+  // multiple of that element. If wrapping is possible, at least adjust the
+  // range according to the greatest power-of-two factor of the single element.
+
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  if (isFullSet() || Other.isFullSet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  APInt this_min = getUnsignedMin().zext(getBitWidth() * 2);
+  APInt this_max = getUnsignedMax().zext(getBitWidth() * 2);
+  APInt Other_min = Other.getUnsignedMin().zext(getBitWidth() * 2);
+  APInt Other_max = Other.getUnsignedMax().zext(getBitWidth() * 2);
+
+  ConstantRange Result_zext = ConstantRange(this_min * Other_min,
+                                            this_max * Other_max + 1);
+  return Result_zext.truncate(getBitWidth());
+}
+
+ConstantRange
+ConstantRange::smax(const ConstantRange &Other) const {
+  // X smax Y is: range(smax(X_smin, Y_smin),
+  //                    smax(X_smax, Y_smax))
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  APInt NewL = APIntOps::smax(getSignedMin(), Other.getSignedMin());
+  APInt NewU = APIntOps::smax(getSignedMax(), Other.getSignedMax()) + 1;
+  if (NewU == NewL)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  return ConstantRange(NewL, NewU);
+}
+
+ConstantRange
+ConstantRange::umax(const ConstantRange &Other) const {
+  // X umax Y is: range(umax(X_umin, Y_umin),
+  //                    umax(X_umax, Y_umax))
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  APInt NewL = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
+  APInt NewU = APIntOps::umax(getUnsignedMax(), Other.getUnsignedMax()) + 1;
+  if (NewU == NewL)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  return ConstantRange(NewL, NewU);
+}
+
+ConstantRange
+ConstantRange::udiv(const ConstantRange &RHS) const {
+  if (isEmptySet() || RHS.isEmptySet() || RHS.getUnsignedMax() == 0)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  if (RHS.isFullSet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  APInt Lower = getUnsignedMin().udiv(RHS.getUnsignedMax());
+
+  APInt RHS_umin = RHS.getUnsignedMin();
+  if (RHS_umin == 0) {
+    // We want the lowest value in RHS excluding zero. Usually that would be 1
+    // except for a range in the form of [X, 1) in which case it would be X.
+    if (RHS.getUpper() == 1)
+      RHS_umin = RHS.getLower();
+    else
+      RHS_umin = APInt(getBitWidth(), 1);
+  }
+
+  APInt Upper = getUnsignedMax().udiv(RHS_umin) + 1;
+
+  // If the LHS is Full and the RHS is a wrapped interval containing 1 then
+  // this could occur.
+  if (Lower == Upper)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  return ConstantRange(Lower, Upper);
+}
+
+ConstantRange
+ConstantRange::binaryAnd(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+  // TODO: replace this with something less conservative
+
+  APInt umin = APIntOps::umin(Other.getUnsignedMax(), getUnsignedMax());
+  if (umin.isAllOnesValue())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  return ConstantRange(APInt::getNullValue(getBitWidth()), umin + 1);
+}
+
+ConstantRange
+ConstantRange::binaryOr(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+  // TODO: replace this with something less conservative
+
+  APInt umax = APIntOps::umax(getUnsignedMin(), Other.getUnsignedMin());
+  if (umax.isMinValue())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  return ConstantRange(umax, APInt::getNullValue(getBitWidth()));
+}
+
+ConstantRange
+ConstantRange::shl(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+
+  APInt min = getUnsignedMin().shl(Other.getUnsignedMin());
+  APInt max = getUnsignedMax().shl(Other.getUnsignedMax());
+
+  // there's no overflow!
+  APInt Zeros(getBitWidth(), getUnsignedMax().countLeadingZeros());
+  if (Zeros.ugt(Other.getUnsignedMax()))
+    return ConstantRange(min, max + 1);
+
+  // FIXME: implement the other tricky cases
+  return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+}
+
+ConstantRange
+ConstantRange::lshr(const ConstantRange &Other) const {
+  if (isEmptySet() || Other.isEmptySet())
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  
+  APInt max = getUnsignedMax().lshr(Other.getUnsignedMin());
+  APInt min = getUnsignedMin().lshr(Other.getUnsignedMax());
+  if (min == max + 1)
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+
+  return ConstantRange(min, max + 1);
+}
+
+ConstantRange ConstantRange::inverse() const {
+  if (isFullSet()) {
+    return ConstantRange(getBitWidth(), /*isFullSet=*/false);
+  } else if (isEmptySet()) {
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  }
+  return ConstantRange(Upper, Lower);
+}
+
+/// print - Print out the bounds to a stream...
+///
+void ConstantRange::print(raw_ostream &OS) const {
+  if (isFullSet())
+    OS << "full-set";
+  else if (isEmptySet())
+    OS << "empty-set";
+  else
+    OS << "[" << Lower << "," << Upper << ")";
+}
+
+/// dump - Allow printing from a debugger easily...
+///
+void ConstantRange::dump() const {
+  print(dbgs());
+}
diff --git a/final/lib/Support/CrashRecoveryContext.cpp b/final/lib/Support/CrashRecoveryContext.cpp
new file mode 100644
index 00000000000..bf8ca3f844b
--- /dev/null
+++ b/final/lib/Support/CrashRecoveryContext.cpp
@@ -0,0 +1,230 @@
+//===--- CrashRecoveryContext.cpp - Crash Recovery ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CrashRecoveryContext.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/ThreadLocal.h"
+#include <setjmp.h>
+#include <cstdio>
+using namespace llvm;
+
+namespace {
+
+struct CrashRecoveryContextImpl;
+
+static sys::ThreadLocal<const CrashRecoveryContextImpl> CurrentContext;
+
+struct CrashRecoveryContextImpl {
+  CrashRecoveryContext *CRC;
+  std::string Backtrace;
+  ::jmp_buf JumpBuffer;
+  volatile unsigned Failed : 1;
+
+public:
+  CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC),
+                                                        Failed(false) {
+    CurrentContext.set(this);
+  }
+  ~CrashRecoveryContextImpl() {
+    CurrentContext.erase();
+  }
+
+  void HandleCrash() {
+    // Eliminate the current context entry, to avoid re-entering in case the
+    // cleanup code crashes.
+    CurrentContext.erase();
+
+    assert(!Failed && "Crash recovery context already failed!");
+    Failed = true;
+
+    // FIXME: Stash the backtrace.
+
+    // Jump back to the RunSafely we were called under.
+    longjmp(JumpBuffer, 1);
+  }
+};
+
+}
+
+static sys::Mutex gCrashRecoveryContexMutex;
+static bool gCrashRecoveryEnabled = false;
+
+CrashRecoveryContext::~CrashRecoveryContext() {
+  CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
+  delete CRCI;
+}
+
+CrashRecoveryContext *CrashRecoveryContext::GetCurrent() {
+  const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+  if (!CRCI)
+    return 0;
+
+  return CRCI->CRC;
+}
+
+#ifdef LLVM_ON_WIN32
+
+// FIXME: No real Win32 implementation currently.
+
+void CrashRecoveryContext::Enable() {
+  sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+  if (gCrashRecoveryEnabled)
+    return;
+
+  gCrashRecoveryEnabled = true;
+}
+
+void CrashRecoveryContext::Disable() {
+  sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+  if (!gCrashRecoveryEnabled)
+    return;
+
+  gCrashRecoveryEnabled = false;
+}
+
+#else
+
+// Generic POSIX implementation.
+//
+// This implementation relies on synchronous signals being delivered to the
+// current thread. We use a thread local object to keep track of the active
+// crash recovery context, and install signal handlers to invoke HandleCrash on
+// the active object.
+//
+// This implementation does not to attempt to chain signal handlers in any
+// reliable fashion -- if we get a signal outside of a crash recovery context we
+// simply disable crash recovery and raise the signal again.
+
+#include <signal.h>
+
+static int Signals[] = { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP };
+static const unsigned NumSignals = sizeof(Signals) / sizeof(Signals[0]);
+static struct sigaction PrevActions[NumSignals];
+
+static void CrashRecoverySignalHandler(int Signal) {
+  // Lookup the current thread local recovery object.
+  const CrashRecoveryContextImpl *CRCI = CurrentContext.get();
+
+  if (!CRCI) {
+    // We didn't find a crash recovery context -- this means either we got a
+    // signal on a thread we didn't expect it on, the application got a signal
+    // outside of a crash recovery context, or something else went horribly
+    // wrong.
+    //
+    // Disable crash recovery and raise the signal again. The assumption here is
+    // that the enclosing application will terminate soon, and we won't want to
+    // attempt crash recovery again.
+    //
+    // This call of Disable isn't thread safe, but it doesn't actually matter.
+    CrashRecoveryContext::Disable();
+    raise(Signal);
+
+    // The signal will be thrown once the signal mask is restored.
+    return;
+  }
+
+  // Unblock the signal we received.
+  sigset_t SigMask;
+  sigemptyset(&SigMask);
+  sigaddset(&SigMask, Signal);
+  sigprocmask(SIG_UNBLOCK, &SigMask, 0);
+
+  if (CRCI)
+    const_cast<CrashRecoveryContextImpl*>(CRCI)->HandleCrash();
+}
+
+void CrashRecoveryContext::Enable() {
+  sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+  if (gCrashRecoveryEnabled)
+    return;
+
+  gCrashRecoveryEnabled = true;
+
+  // Setup the signal handler.
+  struct sigaction Handler;
+  Handler.sa_handler = CrashRecoverySignalHandler;
+  Handler.sa_flags = 0;
+  sigemptyset(&Handler.sa_mask);
+
+  for (unsigned i = 0; i != NumSignals; ++i) {
+    sigaction(Signals[i], &Handler, &PrevActions[i]);
+  }
+}
+
+void CrashRecoveryContext::Disable() {
+  sys::ScopedLock L(gCrashRecoveryContexMutex);
+
+  if (!gCrashRecoveryEnabled)
+    return;
+
+  gCrashRecoveryEnabled = false;
+
+  // Restore the previous signal handlers.
+  for (unsigned i = 0; i != NumSignals; ++i)
+    sigaction(Signals[i], &PrevActions[i], 0);
+}
+
+#endif
+
+bool CrashRecoveryContext::RunSafely(void (*Fn)(void*), void *UserData) {
+  // If crash recovery is disabled, do nothing.
+  if (gCrashRecoveryEnabled) {
+    assert(!Impl && "Crash recovery context already initialized!");
+    CrashRecoveryContextImpl *CRCI = new CrashRecoveryContextImpl(this);
+    Impl = CRCI;
+
+    if (setjmp(CRCI->JumpBuffer) != 0) {
+      return false;
+    }
+  }
+
+  Fn(UserData);
+  return true;
+}
+
+void CrashRecoveryContext::HandleCrash() {
+  CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl;
+  assert(CRCI && "Crash recovery context never initialized!");
+  CRCI->HandleCrash();
+}
+
+const std::string &CrashRecoveryContext::getBacktrace() const {
+  CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *) Impl;
+  assert(CRC && "Crash recovery context never initialized!");
+  assert(CRC->Failed && "No crash was detected!");
+  return CRC->Backtrace;
+}
+
+//
+
+namespace {
+struct RunSafelyOnThreadInfo {
+  void (*UserFn)(void*);
+  void *UserData;
+  CrashRecoveryContext *CRC;
+  bool Result;
+};
+}
+
+static void RunSafelyOnThread_Dispatch(void *UserData) {
+  RunSafelyOnThreadInfo *Info =
+    reinterpret_cast<RunSafelyOnThreadInfo*>(UserData);
+  Info->Result = Info->CRC->RunSafely(Info->UserFn, Info->UserData);
+}
+bool CrashRecoveryContext::RunSafelyOnThread(void (*Fn)(void*), void *UserData,
+                                             unsigned RequestedStackSize) {
+  RunSafelyOnThreadInfo Info = { Fn, UserData, this, false };
+  llvm_execute_on_thread(RunSafelyOnThread_Dispatch, &Info, RequestedStackSize);
+  return Info.Result;
+}
diff --git a/final/lib/Support/DAGDeltaAlgorithm.cpp b/final/lib/Support/DAGDeltaAlgorithm.cpp
new file mode 100644
index 00000000000..814566494d3
--- /dev/null
+++ b/final/lib/Support/DAGDeltaAlgorithm.cpp
@@ -0,0 +1,357 @@
+//===--- DAGDeltaAlgorithm.cpp - A DAG Minimization Algorithm --*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// The algorithm we use attempts to exploit the dependency information by
+// minimizing top-down. We start by constructing an initial root set R, and
+// then iteratively:
+//
+//   1. Minimize the set R using the test predicate:
+//       P'(S) = P(S union pred*(S))
+//
+//   2. Extend R to R' = R union pred(R).
+//
+// until a fixed point is reached.
+//
+// The idea is that we want to quickly prune entire portions of the graph, so we
+// try to find high-level nodes that can be eliminated with all of their
+// dependents.
+//
+// FIXME: The current algorithm doesn't actually provide a strong guarantee
+// about the minimality of the result. The problem is that after adding nodes to
+// the required set, we no longer consider them for elimination. For strictly
+// well formed predicates, this doesn't happen, but it commonly occurs in
+// practice when there are unmodelled dependencies. I believe we can resolve
+// this by allowing the required set to be minimized as well, but need more test
+// cases first.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DAGDeltaAlgorithm.h"
+#include "llvm/ADT/DeltaAlgorithm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <map>
+using namespace llvm;
+
+namespace {
+
+class DAGDeltaAlgorithmImpl {
+  friend class DeltaActiveSetHelper;
+
+public:
+  typedef DAGDeltaAlgorithm::change_ty change_ty;
+  typedef DAGDeltaAlgorithm::changeset_ty changeset_ty;
+  typedef DAGDeltaAlgorithm::changesetlist_ty changesetlist_ty;
+  typedef DAGDeltaAlgorithm::edge_ty edge_ty;
+
+private:
+  typedef std::vector<change_ty>::iterator pred_iterator_ty;
+  typedef std::vector<change_ty>::iterator succ_iterator_ty;
+  typedef std::set<change_ty>::iterator pred_closure_iterator_ty;
+  typedef std::set<change_ty>::iterator succ_closure_iterator_ty;
+
+  DAGDeltaAlgorithm &DDA;
+
+  const changeset_ty &Changes;
+  const std::vector<edge_ty> &Dependencies;
+
+  std::vector<change_ty> Roots;
+
+  /// Cache of failed test results. Successful test results are never cached
+  /// since we always reduce following a success. We maintain an independent
+  /// cache from that used by the individual delta passes because we may get
+  /// hits across multiple individual delta invocations.
+  mutable std::set<changeset_ty> FailedTestsCache;
+
+  // FIXME: Gross.
+  std::map<change_ty, std::vector<change_ty> > Predecessors;
+  std::map<change_ty, std::vector<change_ty> > Successors;
+
+  std::map<change_ty, std::set<change_ty> > PredClosure;
+  std::map<change_ty, std::set<change_ty> > SuccClosure;
+
+private:
+  pred_iterator_ty pred_begin(change_ty Node) {
+    assert(Predecessors.count(Node) && "Invalid node!");
+    return Predecessors[Node].begin();
+  }
+  pred_iterator_ty pred_end(change_ty Node) {
+    assert(Predecessors.count(Node) && "Invalid node!");
+    return Predecessors[Node].end();
+  }
+
+  pred_closure_iterator_ty pred_closure_begin(change_ty Node) {
+    assert(PredClosure.count(Node) && "Invalid node!");
+    return PredClosure[Node].begin();
+  }
+  pred_closure_iterator_ty pred_closure_end(change_ty Node) {
+    assert(PredClosure.count(Node) && "Invalid node!");
+    return PredClosure[Node].end();
+  }
+  
+  succ_iterator_ty succ_begin(change_ty Node) {
+    assert(Successors.count(Node) && "Invalid node!");
+    return Successors[Node].begin();
+  }
+  succ_iterator_ty succ_end(change_ty Node) {
+    assert(Successors.count(Node) && "Invalid node!");
+    return Successors[Node].end();
+  }
+
+  succ_closure_iterator_ty succ_closure_begin(change_ty Node) {
+    assert(SuccClosure.count(Node) && "Invalid node!");
+    return SuccClosure[Node].begin();
+  }
+  succ_closure_iterator_ty succ_closure_end(change_ty Node) {
+    assert(SuccClosure.count(Node) && "Invalid node!");
+    return SuccClosure[Node].end();
+  }
+
+  void UpdatedSearchState(const changeset_ty &Changes,
+                          const changesetlist_ty &Sets,
+                          const changeset_ty &Required) {
+    DDA.UpdatedSearchState(Changes, Sets, Required);
+  }
+
+  /// ExecuteOneTest - Execute a single test predicate on the change set \arg S.
+  bool ExecuteOneTest(const changeset_ty &S) {
+    // Check dependencies invariant.
+    DEBUG({
+        for (changeset_ty::const_iterator it = S.begin(),
+               ie = S.end(); it != ie; ++it)
+          for (succ_iterator_ty it2 = succ_begin(*it),
+                 ie2 = succ_end(*it); it2 != ie2; ++it2)
+            assert(S.count(*it2) && "Attempt to run invalid changeset!");
+      });
+
+    return DDA.ExecuteOneTest(S);
+  }
+
+public:
+  DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA,
+                        const changeset_ty &_Changes,
+                        const std::vector<edge_ty> &_Dependencies);
+
+  changeset_ty Run();
+
+  /// GetTestResult - Get the test result for the active set \arg Changes with
+  /// \arg Required changes from the cache, executing the test if necessary.
+  ///
+  /// \param Changes - The set of active changes being minimized, which should
+  /// have their pred closure included in the test.
+  /// \param Required - The set of changes which have previously been
+  /// established to be required.
+  /// \return - The test result.
+  bool GetTestResult(const changeset_ty &Changes, const changeset_ty &Required);
+};
+
+/// Helper object for minimizing an active set of changes.
+class DeltaActiveSetHelper : public DeltaAlgorithm {
+  DAGDeltaAlgorithmImpl &DDAI;
+
+  const changeset_ty &Required;
+
+protected:
+  /// UpdatedSearchState - Callback used when the search state changes.
+  virtual void UpdatedSearchState(const changeset_ty &Changes,
+                                  const changesetlist_ty &Sets) {
+    DDAI.UpdatedSearchState(Changes, Sets, Required);
+  }
+
+  virtual bool ExecuteOneTest(const changeset_ty &S) {
+    return DDAI.GetTestResult(S, Required);
+  }
+
+public:
+  DeltaActiveSetHelper(DAGDeltaAlgorithmImpl &_DDAI,
+                       const changeset_ty &_Required)
+    : DDAI(_DDAI), Required(_Required) {}
+};
+
+}
+
+DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA,
+                                             const changeset_ty &_Changes,
+                                             const std::vector<edge_ty>
+                                               &_Dependencies)
+  : DDA(_DDA),
+    Changes(_Changes),
+    Dependencies(_Dependencies)
+{
+  for (changeset_ty::const_iterator it = Changes.begin(),
+         ie = Changes.end(); it != ie; ++it) {
+    Predecessors.insert(std::make_pair(*it, std::vector<change_ty>()));
+    Successors.insert(std::make_pair(*it, std::vector<change_ty>()));
+  }
+  for (std::vector<edge_ty>::const_iterator it = Dependencies.begin(),
+         ie = Dependencies.end(); it != ie; ++it) {
+    Predecessors[it->second].push_back(it->first);
+    Successors[it->first].push_back(it->second);
+  }
+
+  // Compute the roots.
+  for (changeset_ty::const_iterator it = Changes.begin(),
+         ie = Changes.end(); it != ie; ++it)
+    if (succ_begin(*it) == succ_end(*it))
+      Roots.push_back(*it);
+
+  // Pre-compute the closure of the successor relation.
+  std::vector<change_ty> Worklist(Roots.begin(), Roots.end());
+  while (!Worklist.empty()) {
+    change_ty Change = Worklist.back();
+    Worklist.pop_back();
+
+    std::set<change_ty> &ChangeSuccs = SuccClosure[Change];
+    for (pred_iterator_ty it = pred_begin(Change), 
+           ie = pred_end(Change); it != ie; ++it) {
+      SuccClosure[*it].insert(Change);
+      SuccClosure[*it].insert(ChangeSuccs.begin(), ChangeSuccs.end());
+      Worklist.push_back(*it);
+    }
+  }
+
+  // Invert to form the predecessor closure map.
+  for (changeset_ty::const_iterator it = Changes.begin(),
+         ie = Changes.end(); it != ie; ++it)
+    PredClosure.insert(std::make_pair(*it, std::set<change_ty>()));
+  for (changeset_ty::const_iterator it = Changes.begin(),
+         ie = Changes.end(); it != ie; ++it)
+    for (succ_closure_iterator_ty it2 = succ_closure_begin(*it),
+           ie2 = succ_closure_end(*it); it2 != ie2; ++it2)
+      PredClosure[*it2].insert(*it);
+  
+  // Dump useful debug info.
+  DEBUG({
+      llvm::errs() << "-- DAGDeltaAlgorithmImpl --\n";
+      llvm::errs() << "Changes: [";
+      for (changeset_ty::const_iterator it = Changes.begin(),
+             ie = Changes.end(); it != ie; ++it) {
+        if (it != Changes.begin()) llvm::errs() << ", ";
+        llvm::errs() << *it;
+
+        if (succ_begin(*it) != succ_end(*it)) {
+          llvm::errs() << "(";
+          for (succ_iterator_ty it2 = succ_begin(*it),
+                 ie2 = succ_end(*it); it2 != ie2; ++it2) {
+            if (it2 != succ_begin(*it)) llvm::errs() << ", ";
+            llvm::errs() << "->" << *it2;
+          }
+          llvm::errs() << ")";
+        }
+      }
+      llvm::errs() << "]\n";
+
+      llvm::errs() << "Roots: [";
+      for (std::vector<change_ty>::const_iterator it = Roots.begin(),
+             ie = Roots.end(); it != ie; ++it) {
+        if (it != Roots.begin()) llvm::errs() << ", ";
+        llvm::errs() << *it;
+      }
+      llvm::errs() << "]\n";
+
+      llvm::errs() << "Predecessor Closure:\n";
+      for (changeset_ty::const_iterator it = Changes.begin(),
+             ie = Changes.end(); it != ie; ++it) {
+        llvm::errs() << format("  %-4d: [", *it);
+        for (pred_closure_iterator_ty it2 = pred_closure_begin(*it),
+               ie2 = pred_closure_end(*it); it2 != ie2; ++it2) {
+          if (it2 != pred_closure_begin(*it)) llvm::errs() << ", ";
+          llvm::errs() << *it2;
+        }
+        llvm::errs() << "]\n";
+      }
+      
+      llvm::errs() << "Successor Closure:\n";
+      for (changeset_ty::const_iterator it = Changes.begin(),
+             ie = Changes.end(); it != ie; ++it) {
+        llvm::errs() << format("  %-4d: [", *it);
+        for (succ_closure_iterator_ty it2 = succ_closure_begin(*it),
+               ie2 = succ_closure_end(*it); it2 != ie2; ++it2) {
+          if (it2 != succ_closure_begin(*it)) llvm::errs() << ", ";
+          llvm::errs() << *it2;
+        }
+        llvm::errs() << "]\n";
+      }
+
+      llvm::errs() << "\n\n";
+    });
+}
+
+bool DAGDeltaAlgorithmImpl::GetTestResult(const changeset_ty &Changes,
+                                          const changeset_ty &Required) {
+  changeset_ty Extended(Required);
+  Extended.insert(Changes.begin(), Changes.end());
+  for (changeset_ty::const_iterator it = Changes.begin(),
+         ie = Changes.end(); it != ie; ++it)
+    Extended.insert(pred_closure_begin(*it), pred_closure_end(*it));
+
+  if (FailedTestsCache.count(Extended))
+    return false;
+
+  bool Result = ExecuteOneTest(Extended);
+  if (!Result)
+    FailedTestsCache.insert(Extended);
+
+  return Result;
+}
+
+DAGDeltaAlgorithm::changeset_ty
+DAGDeltaAlgorithmImpl::Run() {
+  // The current set of changes we are minimizing, starting at the roots.
+  changeset_ty CurrentSet(Roots.begin(), Roots.end());
+
+  // The set of required changes.
+  changeset_ty Required;
+
+  // Iterate until the active set of changes is empty. Convergence is guaranteed
+  // assuming input was a DAG.
+  //
+  // Invariant:  CurrentSet intersect Required == {}
+  // Invariant:  Required == (Required union succ*(Required))
+  while (!CurrentSet.empty()) {
+    DEBUG({
+        llvm::errs() << "DAG_DD - " << CurrentSet.size() << " active changes, "
+                     << Required.size() << " required changes\n";
+      });
+
+    // Minimize the current set of changes.
+    DeltaActiveSetHelper Helper(*this, Required);
+    changeset_ty CurrentMinSet = Helper.Run(CurrentSet);
+
+    // Update the set of required changes. Since
+    //   CurrentMinSet subset CurrentSet
+    // and after the last iteration,
+    //   succ(CurrentSet) subset Required
+    // then
+    //   succ(CurrentMinSet) subset Required
+    // and our invariant on Required is maintained.
+    Required.insert(CurrentMinSet.begin(), CurrentMinSet.end());
+
+    // Replace the current set with the predecssors of the minimized set of
+    // active changes.
+    CurrentSet.clear();
+    for (changeset_ty::const_iterator it = CurrentMinSet.begin(),
+           ie = CurrentMinSet.end(); it != ie; ++it)
+      CurrentSet.insert(pred_begin(*it), pred_end(*it));
+
+    // FIXME: We could enforce CurrentSet intersect Required == {} here if we
+    // wanted to protect against cyclic graphs.
+  }
+
+  return Required;
+}
+
+DAGDeltaAlgorithm::changeset_ty
+DAGDeltaAlgorithm::Run(const changeset_ty &Changes,
+                       const std::vector<edge_ty> &Dependencies) {
+  return DAGDeltaAlgorithmImpl(*this, Changes, Dependencies).Run();
+}
diff --git a/final/lib/Support/Debug.cpp b/final/lib/Support/Debug.cpp
new file mode 100644
index 00000000000..9fdb12ecfdc
--- /dev/null
+++ b/final/lib/Support/Debug.cpp
@@ -0,0 +1,134 @@
+//===-- Debug.cpp - An easy way to add debug output to your code ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a handle way of adding debugging information to your
+// code, without it being enabled all of the time, and without having to add
+// command line options to enable it.
+//
+// In particular, just wrap your code with the DEBUG() macro, and it will be
+// enabled automatically if you specify '-debug' on the command-line.
+// Alternatively, you can also use the SET_DEBUG_TYPE("foo") macro to specify
+// that your debug code belongs to class "foo".  Then, on the command line, you
+// can specify '-debug-only=foo' to enable JUST the debug information for the
+// foo class.
+//
+// When compiling in release mode, the -debug-* options and all code in DEBUG()
+// statements disappears, so it does not effect the runtime of the code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/circular_raw_ostream.h"
+#include "llvm/Support/Signals.h"
+
+using namespace llvm;
+
+// All Debug.h functionality is a no-op in NDEBUG mode.
+#ifndef NDEBUG
+bool llvm::DebugFlag;  // DebugFlag - Exported boolean set by the -debug option
+
+// -debug - Command line option to enable the DEBUG statements in the passes.
+// This flag may only be enabled in debug builds.
+static cl::opt<bool, true>
+Debug("debug", cl::desc("Enable debug output"), cl::Hidden,
+      cl::location(DebugFlag));
+
+// -debug-buffer-size - Buffer the last N characters of debug output
+//until program termination.
+static cl::opt<unsigned>
+DebugBufferSize("debug-buffer-size",
+                cl::desc("Buffer the last N characters of debug output"
+                         "until program termination. "
+                         "[default 0 -- immediate print-out]"),
+                cl::Hidden,
+                cl::init(0));
+
+static std::string CurrentDebugType;
+
+namespace {
+
+struct DebugOnlyOpt {
+  void operator=(const std::string &Val) const {
+    DebugFlag |= !Val.empty();
+    CurrentDebugType = Val;
+  }
+};
+
+}
+
+static DebugOnlyOpt DebugOnlyOptLoc;
+
+static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> >
+DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"),
+          cl::Hidden, cl::value_desc("debug string"),
+          cl::location(DebugOnlyOptLoc), cl::ValueRequired);
+
+// Signal handlers - dump debug output on termination.
+static void debug_user_sig_handler(void *Cookie) {
+  // This is a bit sneaky.  Since this is under #ifndef NDEBUG, we
+  // know that debug mode is enabled and dbgs() really is a
+  // circular_raw_ostream.  If NDEBUG is defined, then dbgs() ==
+  // errs() but this will never be invoked.
+  llvm::circular_raw_ostream *dbgout =
+    static_cast<llvm::circular_raw_ostream *>(&llvm::dbgs());
+  dbgout->flushBufferWithBanner();
+}
+
+// isCurrentDebugType - Return true if the specified string is the debug type
+// specified on the command line, or if none was specified on the command line
+// with the -debug-only=X option.
+//
+bool llvm::isCurrentDebugType(const char *DebugType) {
+  return CurrentDebugType.empty() || DebugType == CurrentDebugType;
+}
+
+/// SetCurrentDebugType - Set the current debug type, as if the -debug-only=X
+/// option were specified.  Note that DebugFlag also needs to be set to true for
+/// debug output to be produced.
+///
+void llvm::SetCurrentDebugType(const char *Type) {
+  CurrentDebugType = Type;
+}
+
+/// dbgs - Return a circular-buffered debug stream.
+raw_ostream &llvm::dbgs() {
+  // Do one-time initialization in a thread-safe way.
+  static struct dbgstream {
+    circular_raw_ostream strm;
+
+    dbgstream() :
+        strm(errs(), "*** Debug Log Output ***\n",
+             (!EnableDebugBuffering || !DebugFlag) ? 0 : DebugBufferSize) {
+      if (EnableDebugBuffering && DebugFlag && DebugBufferSize != 0)
+        // TODO: Add a handler for SIGUSER1-type signals so the user can
+        // force a debug dump.
+        sys::AddSignalHandler(&debug_user_sig_handler, 0);
+      // Otherwise we've already set the debug stream buffer size to
+      // zero, disabling buffering so it will output directly to errs().
+    }
+  } thestrm;
+
+  return thestrm.strm;
+}
+
+#else
+// Avoid "has no symbols" warning.
+namespace llvm {
+  /// dbgs - Return errs().
+  raw_ostream &dbgs() {
+    return errs();
+  }
+}
+
+#endif
+
+/// EnableDebugBuffering - Turn on signal handler installation.
+///
+bool llvm::EnableDebugBuffering = false;
diff --git a/final/lib/Support/DeltaAlgorithm.cpp b/final/lib/Support/DeltaAlgorithm.cpp
new file mode 100644
index 00000000000..9e52874de83
--- /dev/null
+++ b/final/lib/Support/DeltaAlgorithm.cpp
@@ -0,0 +1,114 @@
+//===--- DeltaAlgorithm.cpp - A Set Minimization Algorithm -----*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DeltaAlgorithm.h"
+#include <algorithm>
+#include <iterator>
+using namespace llvm;
+
+DeltaAlgorithm::~DeltaAlgorithm() {
+}
+
+bool DeltaAlgorithm::GetTestResult(const changeset_ty &Changes) {
+  if (FailedTestsCache.count(Changes))
+    return false;
+
+  bool Result = ExecuteOneTest(Changes);
+  if (!Result)
+    FailedTestsCache.insert(Changes);
+
+  return Result;
+}
+
+void DeltaAlgorithm::Split(const changeset_ty &S, changesetlist_ty &Res) {
+  // FIXME: Allow clients to provide heuristics for improved splitting.
+
+  // FIXME: This is really slow.
+  changeset_ty LHS, RHS;
+  unsigned idx = 0, N = S.size() / 2;
+  for (changeset_ty::const_iterator it = S.begin(),
+         ie = S.end(); it != ie; ++it, ++idx)
+    ((idx < N) ? LHS : RHS).insert(*it);
+  if (!LHS.empty())
+    Res.push_back(LHS);
+  if (!RHS.empty())
+    Res.push_back(RHS);
+}
+
+DeltaAlgorithm::changeset_ty
+DeltaAlgorithm::Delta(const changeset_ty &Changes,
+                      const changesetlist_ty &Sets) {
+  // Invariant: union(Res) == Changes
+  UpdatedSearchState(Changes, Sets);
+
+  // If there is nothing left we can remove, we are done.
+  if (Sets.size() <= 1)
+    return Changes;
+
+  // Look for a passing subset.
+  changeset_ty Res;
+  if (Search(Changes, Sets, Res))
+    return Res;
+
+  // Otherwise, partition the sets if possible; if not we are done.
+  changesetlist_ty SplitSets;
+  for (changesetlist_ty::const_iterator it = Sets.begin(),
+         ie = Sets.end(); it != ie; ++it)
+    Split(*it, SplitSets);
+  if (SplitSets.size() == Sets.size())
+    return Changes;
+
+  return Delta(Changes, SplitSets);
+}
+
+bool DeltaAlgorithm::Search(const changeset_ty &Changes,
+                            const changesetlist_ty &Sets,
+                            changeset_ty &Res) {
+  // FIXME: Parallelize.
+  for (changesetlist_ty::const_iterator it = Sets.begin(),
+         ie = Sets.end(); it != ie; ++it) {
+    // If the test passes on this subset alone, recurse.
+    if (GetTestResult(*it)) {
+      changesetlist_ty Sets;
+      Split(*it, Sets);
+      Res = Delta(*it, Sets);
+      return true;
+    }
+
+    // Otherwise, if we have more than two sets, see if test passes on the
+    // complement.
+    if (Sets.size() > 2) {
+      // FIXME: This is really slow.
+      changeset_ty Complement;
+      std::set_difference(
+        Changes.begin(), Changes.end(), it->begin(), it->end(),
+        std::insert_iterator<changeset_ty>(Complement, Complement.begin()));
+      if (GetTestResult(Complement)) {
+        changesetlist_ty ComplementSets;
+        ComplementSets.insert(ComplementSets.end(), Sets.begin(), it);
+        ComplementSets.insert(ComplementSets.end(), it + 1, Sets.end());
+        Res = Delta(Complement, ComplementSets);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+DeltaAlgorithm::changeset_ty DeltaAlgorithm::Run(const changeset_ty &Changes) {
+  // Check empty set first to quickly find poor test functions.
+  if (GetTestResult(changeset_ty()))
+    return changeset_ty();
+
+  // Otherwise run the real delta algorithm.
+  changesetlist_ty Sets;
+  Split(Changes, Sets);
+
+  return Delta(Changes, Sets);
+}
diff --git a/final/lib/Support/Disassembler.cpp b/final/lib/Support/Disassembler.cpp
new file mode 100644
index 00000000000..6362aff43a9
--- /dev/null
+++ b/final/lib/Support/Disassembler.cpp
@@ -0,0 +1,75 @@
+//===- lib/System/Disassembler.cpp ------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the necessary glue to call external disassembler
+// libraries.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/Disassembler.h"
+
+#include <cassert>
+#include <iomanip>
+#include <string>
+#include <sstream>
+
+#if USE_UDIS86
+#include <udis86.h>
+#endif
+
+using namespace llvm;
+
+bool llvm::sys::hasDisassembler()
+{
+#if defined (__i386__) || defined (__amd64__) || defined (__x86_64__)
+  // We have option to enable udis86 library.
+# if USE_UDIS86
+  return true;
+#else
+  return false;
+#endif
+#else
+  return false;
+#endif
+}
+
+std::string llvm::sys::disassembleBuffer(uint8_t* start, size_t length,
+                                         uint64_t pc) {
+  std::stringstream res;
+
+#if (defined (__i386__) || defined (__amd64__) || defined (__x86_64__)) \
+  && USE_UDIS86
+  unsigned bits;
+# if defined(__i386__)
+  bits = 32;
+# else
+  bits = 64;
+# endif
+
+  ud_t ud_obj;
+
+  ud_init(&ud_obj);
+  ud_set_input_buffer(&ud_obj, start, length);
+  ud_set_mode(&ud_obj, bits);
+  ud_set_pc(&ud_obj, pc);
+  ud_set_syntax(&ud_obj, UD_SYN_ATT);
+
+  res << std::setbase(16)
+      << std::setw(bits/4);
+
+  while (ud_disassemble(&ud_obj)) {
+    res << ud_insn_off(&ud_obj) << ":\t" << ud_insn_asm(&ud_obj) << "\n";
+  }
+#else
+  res << "No disassembler available. See configure help for options.\n";
+#endif
+
+  return res.str();
+}
diff --git a/final/lib/Support/Dwarf.cpp b/final/lib/Support/Dwarf.cpp
new file mode 100644
index 00000000000..9799ef54792
--- /dev/null
+++ b/final/lib/Support/Dwarf.cpp
@@ -0,0 +1,652 @@
+//===-- llvm/Support/Dwarf.cpp - Dwarf Framework ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for generic dwarf information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Dwarf.h"
+using namespace llvm;
+using namespace dwarf;
+
+/// TagString - Return the string for the specified tag.
+///
+const char *llvm::dwarf::TagString(unsigned Tag) {
+  switch (Tag) {
+  case DW_TAG_array_type:                return "DW_TAG_array_type";
+  case DW_TAG_class_type:                return "DW_TAG_class_type";
+  case DW_TAG_entry_point:               return "DW_TAG_entry_point";
+  case DW_TAG_enumeration_type:          return "DW_TAG_enumeration_type";
+  case DW_TAG_formal_parameter:          return "DW_TAG_formal_parameter";
+  case DW_TAG_imported_declaration:      return "DW_TAG_imported_declaration";
+  case DW_TAG_label:                     return "DW_TAG_label";
+  case DW_TAG_lexical_block:             return "DW_TAG_lexical_block";
+  case DW_TAG_member:                    return "DW_TAG_member";
+  case DW_TAG_pointer_type:              return "DW_TAG_pointer_type";
+  case DW_TAG_reference_type:            return "DW_TAG_reference_type";
+  case DW_TAG_compile_unit:              return "DW_TAG_compile_unit";
+  case DW_TAG_string_type:               return "DW_TAG_string_type";
+  case DW_TAG_structure_type:            return "DW_TAG_structure_type";
+  case DW_TAG_subroutine_type:           return "DW_TAG_subroutine_type";
+  case DW_TAG_typedef:                   return "DW_TAG_typedef";
+  case DW_TAG_union_type:                return "DW_TAG_union_type";
+  case DW_TAG_unspecified_parameters:    return "DW_TAG_unspecified_parameters";
+  case DW_TAG_variant:                   return "DW_TAG_variant";
+  case DW_TAG_common_block:              return "DW_TAG_common_block";
+  case DW_TAG_common_inclusion:          return "DW_TAG_common_inclusion";
+  case DW_TAG_inheritance:               return "DW_TAG_inheritance";
+  case DW_TAG_inlined_subroutine:        return "DW_TAG_inlined_subroutine";
+  case DW_TAG_module:                    return "DW_TAG_module";
+  case DW_TAG_ptr_to_member_type:        return "DW_TAG_ptr_to_member_type";
+  case DW_TAG_set_type:                  return "DW_TAG_set_type";
+  case DW_TAG_subrange_type:             return "DW_TAG_subrange_type";
+  case DW_TAG_with_stmt:                 return "DW_TAG_with_stmt";
+  case DW_TAG_access_declaration:        return "DW_TAG_access_declaration";
+  case DW_TAG_base_type:                 return "DW_TAG_base_type";
+  case DW_TAG_catch_block:               return "DW_TAG_catch_block";
+  case DW_TAG_const_type:                return "DW_TAG_const_type";
+  case DW_TAG_constant:                  return "DW_TAG_constant";
+  case DW_TAG_enumerator:                return "DW_TAG_enumerator";
+  case DW_TAG_file_type:                 return "DW_TAG_file_type";
+  case DW_TAG_friend:                    return "DW_TAG_friend";
+  case DW_TAG_namelist:                  return "DW_TAG_namelist";
+  case DW_TAG_namelist_item:             return "DW_TAG_namelist_item";
+  case DW_TAG_packed_type:               return "DW_TAG_packed_type";
+  case DW_TAG_subprogram:                return "DW_TAG_subprogram";
+  case DW_TAG_template_type_parameter:  return "DW_TAG_template_type_parameter";
+  case DW_TAG_template_value_parameter:return "DW_TAG_template_value_parameter";
+  case DW_TAG_thrown_type:               return "DW_TAG_thrown_type";
+  case DW_TAG_try_block:                 return "DW_TAG_try_block";
+  case DW_TAG_variant_part:              return "DW_TAG_variant_part";
+  case DW_TAG_variable:                  return "DW_TAG_variable";
+  case DW_TAG_volatile_type:             return "DW_TAG_volatile_type";
+  case DW_TAG_dwarf_procedure:           return "DW_TAG_dwarf_procedure";
+  case DW_TAG_restrict_type:             return "DW_TAG_restrict_type";
+  case DW_TAG_interface_type:            return "DW_TAG_interface_type";
+  case DW_TAG_namespace:                 return "DW_TAG_namespace";
+  case DW_TAG_imported_module:           return "DW_TAG_imported_module";
+  case DW_TAG_unspecified_type:          return "DW_TAG_unspecified_type";
+  case DW_TAG_partial_unit:              return "DW_TAG_partial_unit";
+  case DW_TAG_imported_unit:             return "DW_TAG_imported_unit";
+  case DW_TAG_condition:                 return "DW_TAG_condition";
+  case DW_TAG_shared_type:               return "DW_TAG_shared_type";
+  case DW_TAG_lo_user:                   return "DW_TAG_lo_user";
+  case DW_TAG_hi_user:                   return "DW_TAG_hi_user";
+  case DW_TAG_auto_variable:             return "DW_TAG_auto_variable";
+  case DW_TAG_arg_variable:              return "DW_TAG_arg_variable";
+  case DW_TAG_return_variable:           return "DW_TAG_return_variable";
+  case DW_TAG_vector_type:               return "DW_TAG_vector_type";
+  }
+  return 0;
+}
+
+/// ChildrenString - Return the string for the specified children flag.
+///
+const char *llvm::dwarf::ChildrenString(unsigned Children) {
+  switch (Children) {
+  case DW_CHILDREN_no:                   return "DW_CHILDREN_no";
+  case DW_CHILDREN_yes:                  return "DW_CHILDREN_yes";
+  }
+  return 0;
+}
+
+/// AttributeString - Return the string for the specified attribute.
+///
+const char *llvm::dwarf::AttributeString(unsigned Attribute) {
+  switch (Attribute) {
+  case DW_AT_sibling:                    return "DW_AT_sibling";
+  case DW_AT_location:                   return "DW_AT_location";
+  case DW_AT_name:                       return "DW_AT_name";
+  case DW_AT_ordering:                   return "DW_AT_ordering";
+  case DW_AT_byte_size:                  return "DW_AT_byte_size";
+  case DW_AT_bit_offset:                 return "DW_AT_bit_offset";
+  case DW_AT_bit_size:                   return "DW_AT_bit_size";
+  case DW_AT_stmt_list:                  return "DW_AT_stmt_list";
+  case DW_AT_low_pc:                     return "DW_AT_low_pc";
+  case DW_AT_high_pc:                    return "DW_AT_high_pc";
+  case DW_AT_language:                   return "DW_AT_language";
+  case DW_AT_discr:                      return "DW_AT_discr";
+  case DW_AT_discr_value:                return "DW_AT_discr_value";
+  case DW_AT_visibility:                 return "DW_AT_visibility";
+  case DW_AT_import:                     return "DW_AT_import";
+  case DW_AT_string_length:              return "DW_AT_string_length";
+  case DW_AT_common_reference:           return "DW_AT_common_reference";
+  case DW_AT_comp_dir:                   return "DW_AT_comp_dir";
+  case DW_AT_const_value:                return "DW_AT_const_value";
+  case DW_AT_containing_type:            return "DW_AT_containing_type";
+  case DW_AT_default_value:              return "DW_AT_default_value";
+  case DW_AT_inline:                     return "DW_AT_inline";
+  case DW_AT_is_optional:                return "DW_AT_is_optional";
+  case DW_AT_lower_bound:                return "DW_AT_lower_bound";
+  case DW_AT_producer:                   return "DW_AT_producer";
+  case DW_AT_prototyped:                 return "DW_AT_prototyped";
+  case DW_AT_return_addr:                return "DW_AT_return_addr";
+  case DW_AT_start_scope:                return "DW_AT_start_scope";
+  case DW_AT_bit_stride:                 return "DW_AT_bit_stride";
+  case DW_AT_upper_bound:                return "DW_AT_upper_bound";
+  case DW_AT_abstract_origin:            return "DW_AT_abstract_origin";
+  case DW_AT_accessibility:              return "DW_AT_accessibility";
+  case DW_AT_address_class:              return "DW_AT_address_class";
+  case DW_AT_artificial:                 return "DW_AT_artificial";
+  case DW_AT_base_types:                 return "DW_AT_base_types";
+  case DW_AT_calling_convention:         return "DW_AT_calling_convention";
+  case DW_AT_count:                      return "DW_AT_count";
+  case DW_AT_data_member_location:       return "DW_AT_data_member_location";
+  case DW_AT_decl_column:                return "DW_AT_decl_column";
+  case DW_AT_decl_file:                  return "DW_AT_decl_file";
+  case DW_AT_decl_line:                  return "DW_AT_decl_line";
+  case DW_AT_declaration:                return "DW_AT_declaration";
+  case DW_AT_discr_list:                 return "DW_AT_discr_list";
+  case DW_AT_encoding:                   return "DW_AT_encoding";
+  case DW_AT_external:                   return "DW_AT_external";
+  case DW_AT_frame_base:                 return "DW_AT_frame_base";
+  case DW_AT_friend:                     return "DW_AT_friend";
+  case DW_AT_identifier_case:            return "DW_AT_identifier_case";
+  case DW_AT_macro_info:                 return "DW_AT_macro_info";
+  case DW_AT_namelist_item:              return "DW_AT_namelist_item";
+  case DW_AT_priority:                   return "DW_AT_priority";
+  case DW_AT_segment:                    return "DW_AT_segment";
+  case DW_AT_specification:              return "DW_AT_specification";
+  case DW_AT_static_link:                return "DW_AT_static_link";
+  case DW_AT_type:                       return "DW_AT_type";
+  case DW_AT_use_location:               return "DW_AT_use_location";
+  case DW_AT_variable_parameter:         return "DW_AT_variable_parameter";
+  case DW_AT_virtuality:                 return "DW_AT_virtuality";
+  case DW_AT_vtable_elem_location:       return "DW_AT_vtable_elem_location";
+  case DW_AT_allocated:                  return "DW_AT_allocated";
+  case DW_AT_associated:                 return "DW_AT_associated";
+  case DW_AT_data_location:              return "DW_AT_data_location";
+  case DW_AT_byte_stride:                return "DW_AT_byte_stride";
+  case DW_AT_entry_pc:                   return "DW_AT_entry_pc";
+  case DW_AT_use_UTF8:                   return "DW_AT_use_UTF8";
+  case DW_AT_extension:                  return "DW_AT_extension";
+  case DW_AT_ranges:                     return "DW_AT_ranges";
+  case DW_AT_trampoline:                 return "DW_AT_trampoline";
+  case DW_AT_call_column:                return "DW_AT_call_column";
+  case DW_AT_call_file:                  return "DW_AT_call_file";
+  case DW_AT_call_line:                  return "DW_AT_call_line";
+  case DW_AT_description:                return "DW_AT_description";
+  case DW_AT_binary_scale:               return "DW_AT_binary_scale";
+  case DW_AT_decimal_scale:              return "DW_AT_decimal_scale";
+  case DW_AT_small:                      return "DW_AT_small";
+  case DW_AT_decimal_sign:               return "DW_AT_decimal_sign";
+  case DW_AT_digit_count:                return "DW_AT_digit_count";
+  case DW_AT_picture_string:             return "DW_AT_picture_string";
+  case DW_AT_mutable:                    return "DW_AT_mutable";
+  case DW_AT_threads_scaled:             return "DW_AT_threads_scaled";
+  case DW_AT_explicit:                   return "DW_AT_explicit";
+  case DW_AT_object_pointer:             return "DW_AT_object_pointer";
+  case DW_AT_endianity:                  return "DW_AT_endianity";
+  case DW_AT_elemental:                  return "DW_AT_elemental";
+  case DW_AT_pure:                       return "DW_AT_pure";
+  case DW_AT_recursive:                  return "DW_AT_recursive";
+  case DW_AT_MIPS_linkage_name:          return "DW_AT_MIPS_linkage_name";
+  case DW_AT_sf_names:                   return "DW_AT_sf_names";
+  case DW_AT_src_info:                   return "DW_AT_src_info";
+  case DW_AT_mac_info:                   return "DW_AT_mac_info";
+  case DW_AT_src_coords:                 return "DW_AT_src_coords";
+  case DW_AT_body_begin:                 return "DW_AT_body_begin";
+  case DW_AT_body_end:                   return "DW_AT_body_end";
+  case DW_AT_GNU_vector:                 return "DW_AT_GNU_vector";
+  case DW_AT_lo_user:                    return "DW_AT_lo_user";
+  case DW_AT_hi_user:                    return "DW_AT_hi_user";
+  case DW_AT_APPLE_optimized:            return "DW_AT_APPLE_optimized";
+  case DW_AT_APPLE_flags:                return "DW_AT_APPLE_flags";
+  case DW_AT_APPLE_isa:                  return "DW_AT_APPLE_isa";
+  case DW_AT_APPLE_block:                return "DW_AT_APPLE_block";
+  case DW_AT_APPLE_major_runtime_vers:   return "DW_AT_APPLE_major_runtime_vers";
+  case DW_AT_APPLE_runtime_class:        return "DW_AT_APPLE_runtime_class";
+  case DW_AT_APPLE_omit_frame_ptr:       return "DW_AT_APPLE_omit_frame_ptr";
+  }
+  return 0;
+}
+
+/// FormEncodingString - Return the string for the specified form encoding.
+///
+const char *llvm::dwarf::FormEncodingString(unsigned Encoding) {
+  switch (Encoding) {
+  case DW_FORM_addr:                     return "DW_FORM_addr";
+  case DW_FORM_block2:                   return "DW_FORM_block2";
+  case DW_FORM_block4:                   return "DW_FORM_block4";
+  case DW_FORM_data2:                    return "DW_FORM_data2";
+  case DW_FORM_data4:                    return "DW_FORM_data4";
+  case DW_FORM_data8:                    return "DW_FORM_data8";
+  case DW_FORM_string:                   return "DW_FORM_string";
+  case DW_FORM_block:                    return "DW_FORM_block";
+  case DW_FORM_block1:                   return "DW_FORM_block1";
+  case DW_FORM_data1:                    return "DW_FORM_data1";
+  case DW_FORM_flag:                     return "DW_FORM_flag";
+  case DW_FORM_sdata:                    return "DW_FORM_sdata";
+  case DW_FORM_strp:                     return "DW_FORM_strp";
+  case DW_FORM_udata:                    return "DW_FORM_udata";
+  case DW_FORM_ref_addr:                 return "DW_FORM_ref_addr";
+  case DW_FORM_ref1:                     return "DW_FORM_ref1";
+  case DW_FORM_ref2:                     return "DW_FORM_ref2";
+  case DW_FORM_ref4:                     return "DW_FORM_ref4";
+  case DW_FORM_ref8:                     return "DW_FORM_ref8";
+  case DW_FORM_ref_udata:                return "DW_FORM_ref_udata";
+  case DW_FORM_indirect:                 return "DW_FORM_indirect";
+  }
+  return 0;
+}
+
+/// OperationEncodingString - Return the string for the specified operation
+/// encoding.
+const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) {
+  switch (Encoding) {
+  case DW_OP_addr:                       return "DW_OP_addr";
+  case DW_OP_deref:                      return "DW_OP_deref";
+  case DW_OP_const1u:                    return "DW_OP_const1u";
+  case DW_OP_const1s:                    return "DW_OP_const1s";
+  case DW_OP_const2u:                    return "DW_OP_const2u";
+  case DW_OP_const2s:                    return "DW_OP_const2s";
+  case DW_OP_const4u:                    return "DW_OP_const4u";
+  case DW_OP_const4s:                    return "DW_OP_const4s";
+  case DW_OP_const8u:                    return "DW_OP_const8u";
+  case DW_OP_const8s:                    return "DW_OP_const8s";
+  case DW_OP_constu:                     return "DW_OP_constu";
+  case DW_OP_consts:                     return "DW_OP_consts";
+  case DW_OP_dup:                        return "DW_OP_dup";
+  case DW_OP_drop:                       return "DW_OP_drop";
+  case DW_OP_over:                       return "DW_OP_over";
+  case DW_OP_pick:                       return "DW_OP_pick";
+  case DW_OP_swap:                       return "DW_OP_swap";
+  case DW_OP_rot:                        return "DW_OP_rot";
+  case DW_OP_xderef:                     return "DW_OP_xderef";
+  case DW_OP_abs:                        return "DW_OP_abs";
+  case DW_OP_and:                        return "DW_OP_and";
+  case DW_OP_div:                        return "DW_OP_div";
+  case DW_OP_minus:                      return "DW_OP_minus";
+  case DW_OP_mod:                        return "DW_OP_mod";
+  case DW_OP_mul:                        return "DW_OP_mul";
+  case DW_OP_neg:                        return "DW_OP_neg";
+  case DW_OP_not:                        return "DW_OP_not";
+  case DW_OP_or:                         return "DW_OP_or";
+  case DW_OP_plus:                       return "DW_OP_plus";
+  case DW_OP_plus_uconst:                return "DW_OP_plus_uconst";
+  case DW_OP_shl:                        return "DW_OP_shl";
+  case DW_OP_shr:                        return "DW_OP_shr";
+  case DW_OP_shra:                       return "DW_OP_shra";
+  case DW_OP_xor:                        return "DW_OP_xor";
+  case DW_OP_skip:                       return "DW_OP_skip";
+  case DW_OP_bra:                        return "DW_OP_bra";
+  case DW_OP_eq:                         return "DW_OP_eq";
+  case DW_OP_ge:                         return "DW_OP_ge";
+  case DW_OP_gt:                         return "DW_OP_gt";
+  case DW_OP_le:                         return "DW_OP_le";
+  case DW_OP_lt:                         return "DW_OP_lt";
+  case DW_OP_ne:                         return "DW_OP_ne";
+  case DW_OP_lit0:                       return "DW_OP_lit0";
+  case DW_OP_lit1:                       return "DW_OP_lit1";
+  case DW_OP_lit2:                       return "DW_OP_lit2";
+  case DW_OP_lit3:                       return "DW_OP_lit3";
+  case DW_OP_lit4:                       return "DW_OP_lit4";
+  case DW_OP_lit5:                       return "DW_OP_lit5";
+  case DW_OP_lit6:                       return "DW_OP_lit6";
+  case DW_OP_lit7:                       return "DW_OP_lit7";
+  case DW_OP_lit8:                       return "DW_OP_lit8";
+  case DW_OP_lit9:                       return "DW_OP_lit9";
+  case DW_OP_lit10:                      return "DW_OP_lit10";
+  case DW_OP_lit11:                      return "DW_OP_lit11";
+  case DW_OP_lit12:                      return "DW_OP_lit12";
+  case DW_OP_lit13:                      return "DW_OP_lit13";
+  case DW_OP_lit14:                      return "DW_OP_lit14";
+  case DW_OP_lit15:                      return "DW_OP_lit15";
+  case DW_OP_lit16:                      return "DW_OP_lit16";
+  case DW_OP_lit17:                      return "DW_OP_lit17";
+  case DW_OP_lit18:                      return "DW_OP_lit18";
+  case DW_OP_lit19:                      return "DW_OP_lit19";
+  case DW_OP_lit20:                      return "DW_OP_lit20";
+  case DW_OP_lit21:                      return "DW_OP_lit21";
+  case DW_OP_lit22:                      return "DW_OP_lit22";
+  case DW_OP_lit23:                      return "DW_OP_lit23";
+  case DW_OP_lit24:                      return "DW_OP_lit24";
+  case DW_OP_lit25:                      return "DW_OP_lit25";
+  case DW_OP_lit26:                      return "DW_OP_lit26";
+  case DW_OP_lit27:                      return "DW_OP_lit27";
+  case DW_OP_lit28:                      return "DW_OP_lit28";
+  case DW_OP_lit29:                      return "DW_OP_lit29";
+  case DW_OP_lit30:                      return "DW_OP_lit30";
+  case DW_OP_lit31:                      return "DW_OP_lit31";
+  case DW_OP_reg0:                       return "DW_OP_reg0";
+  case DW_OP_reg1:                       return "DW_OP_reg1";
+  case DW_OP_reg2:                       return "DW_OP_reg2";
+  case DW_OP_reg3:                       return "DW_OP_reg3";
+  case DW_OP_reg4:                       return "DW_OP_reg4";
+  case DW_OP_reg5:                       return "DW_OP_reg5";
+  case DW_OP_reg6:                       return "DW_OP_reg6";
+  case DW_OP_reg7:                       return "DW_OP_reg7";
+  case DW_OP_reg8:                       return "DW_OP_reg8";
+  case DW_OP_reg9:                       return "DW_OP_reg9";
+  case DW_OP_reg10:                      return "DW_OP_reg10";
+  case DW_OP_reg11:                      return "DW_OP_reg11";
+  case DW_OP_reg12:                      return "DW_OP_reg12";
+  case DW_OP_reg13:                      return "DW_OP_reg13";
+  case DW_OP_reg14:                      return "DW_OP_reg14";
+  case DW_OP_reg15:                      return "DW_OP_reg15";
+  case DW_OP_reg16:                      return "DW_OP_reg16";
+  case DW_OP_reg17:                      return "DW_OP_reg17";
+  case DW_OP_reg18:                      return "DW_OP_reg18";
+  case DW_OP_reg19:                      return "DW_OP_reg19";
+  case DW_OP_reg20:                      return "DW_OP_reg20";
+  case DW_OP_reg21:                      return "DW_OP_reg21";
+  case DW_OP_reg22:                      return "DW_OP_reg22";
+  case DW_OP_reg23:                      return "DW_OP_reg23";
+  case DW_OP_reg24:                      return "DW_OP_reg24";
+  case DW_OP_reg25:                      return "DW_OP_reg25";
+  case DW_OP_reg26:                      return "DW_OP_reg26";
+  case DW_OP_reg27:                      return "DW_OP_reg27";
+  case DW_OP_reg28:                      return "DW_OP_reg28";
+  case DW_OP_reg29:                      return "DW_OP_reg29";
+  case DW_OP_reg30:                      return "DW_OP_reg30";
+  case DW_OP_reg31:                      return "DW_OP_reg31";
+  case DW_OP_breg0:                      return "DW_OP_breg0";
+  case DW_OP_breg1:                      return "DW_OP_breg1";
+  case DW_OP_breg2:                      return "DW_OP_breg2";
+  case DW_OP_breg3:                      return "DW_OP_breg3";
+  case DW_OP_breg4:                      return "DW_OP_breg4";
+  case DW_OP_breg5:                      return "DW_OP_breg5";
+  case DW_OP_breg6:                      return "DW_OP_breg6";
+  case DW_OP_breg7:                      return "DW_OP_breg7";
+  case DW_OP_breg8:                      return "DW_OP_breg8";
+  case DW_OP_breg9:                      return "DW_OP_breg9";
+  case DW_OP_breg10:                     return "DW_OP_breg10";
+  case DW_OP_breg11:                     return "DW_OP_breg11";
+  case DW_OP_breg12:                     return "DW_OP_breg12";
+  case DW_OP_breg13:                     return "DW_OP_breg13";
+  case DW_OP_breg14:                     return "DW_OP_breg14";
+  case DW_OP_breg15:                     return "DW_OP_breg15";
+  case DW_OP_breg16:                     return "DW_OP_breg16";
+  case DW_OP_breg17:                     return "DW_OP_breg17";
+  case DW_OP_breg18:                     return "DW_OP_breg18";
+  case DW_OP_breg19:                     return "DW_OP_breg19";
+  case DW_OP_breg20:                     return "DW_OP_breg20";
+  case DW_OP_breg21:                     return "DW_OP_breg21";
+  case DW_OP_breg22:                     return "DW_OP_breg22";
+  case DW_OP_breg23:                     return "DW_OP_breg23";
+  case DW_OP_breg24:                     return "DW_OP_breg24";
+  case DW_OP_breg25:                     return "DW_OP_breg25";
+  case DW_OP_breg26:                     return "DW_OP_breg26";
+  case DW_OP_breg27:                     return "DW_OP_breg27";
+  case DW_OP_breg28:                     return "DW_OP_breg28";
+  case DW_OP_breg29:                     return "DW_OP_breg29";
+  case DW_OP_breg30:                     return "DW_OP_breg30";
+  case DW_OP_breg31:                     return "DW_OP_breg31";
+  case DW_OP_regx:                       return "DW_OP_regx";
+  case DW_OP_fbreg:                      return "DW_OP_fbreg";
+  case DW_OP_bregx:                      return "DW_OP_bregx";
+  case DW_OP_piece:                      return "DW_OP_piece";
+  case DW_OP_deref_size:                 return "DW_OP_deref_size";
+  case DW_OP_xderef_size:                return "DW_OP_xderef_size";
+  case DW_OP_nop:                        return "DW_OP_nop";
+  case DW_OP_push_object_address:        return "DW_OP_push_object_address";
+  case DW_OP_call2:                      return "DW_OP_call2";
+  case DW_OP_call4:                      return "DW_OP_call4";
+  case DW_OP_call_ref:                   return "DW_OP_call_ref";
+  case DW_OP_form_tls_address:           return "DW_OP_form_tls_address";
+  case DW_OP_call_frame_cfa:             return "DW_OP_call_frame_cfa";
+  case DW_OP_lo_user:                    return "DW_OP_lo_user";
+  case DW_OP_hi_user:                    return "DW_OP_hi_user";
+  }
+  return 0;
+}
+
+/// AttributeEncodingString - Return the string for the specified attribute
+/// encoding.
+const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
+  switch (Encoding) {
+  case DW_ATE_address:                   return "DW_ATE_address";
+  case DW_ATE_boolean:                   return "DW_ATE_boolean";
+  case DW_ATE_complex_float:             return "DW_ATE_complex_float";
+  case DW_ATE_float:                     return "DW_ATE_float";
+  case DW_ATE_signed:                    return "DW_ATE_signed";
+  case DW_ATE_signed_char:               return "DW_ATE_signed_char";
+  case DW_ATE_unsigned:                  return "DW_ATE_unsigned";
+  case DW_ATE_unsigned_char:             return "DW_ATE_unsigned_char";
+  case DW_ATE_imaginary_float:           return "DW_ATE_imaginary_float";
+  case DW_ATE_packed_decimal:            return "DW_ATE_packed_decimal";
+  case DW_ATE_numeric_string:            return "DW_ATE_numeric_string";
+  case DW_ATE_edited:                    return "DW_ATE_edited";
+  case DW_ATE_signed_fixed:              return "DW_ATE_signed_fixed";
+  case DW_ATE_unsigned_fixed:            return "DW_ATE_unsigned_fixed";
+  case DW_ATE_decimal_float:             return "DW_ATE_decimal_float";
+  case DW_ATE_lo_user:                   return "DW_ATE_lo_user";
+  case DW_ATE_hi_user:                   return "DW_ATE_hi_user";
+  }
+  return 0;
+}
+
+/// DecimalSignString - Return the string for the specified decimal sign
+/// attribute.
+const char *llvm::dwarf::DecimalSignString(unsigned Sign) {
+  switch (Sign) {
+  case DW_DS_unsigned:                   return "DW_DS_unsigned";
+  case DW_DS_leading_overpunch:          return "DW_DS_leading_overpunch";
+  case DW_DS_trailing_overpunch:         return "DW_DS_trailing_overpunch";
+  case DW_DS_leading_separate:           return "DW_DS_leading_separate";
+  case DW_DS_trailing_separate:          return "DW_DS_trailing_separate";
+  }
+  return 0;
+}
+
+/// EndianityString - Return the string for the specified endianity.
+///
+const char *llvm::dwarf::EndianityString(unsigned Endian) {
+  switch (Endian) {
+  case DW_END_default:                   return "DW_END_default";
+  case DW_END_big:                       return "DW_END_big";
+  case DW_END_little:                    return "DW_END_little";
+  case DW_END_lo_user:                   return "DW_END_lo_user";
+  case DW_END_hi_user:                   return "DW_END_hi_user";
+  }
+  return 0;
+}
+
+/// AccessibilityString - Return the string for the specified accessibility.
+///
+const char *llvm::dwarf::AccessibilityString(unsigned Access) {
+  switch (Access) {
+  // Accessibility codes
+  case DW_ACCESS_public:                 return "DW_ACCESS_public";
+  case DW_ACCESS_protected:              return "DW_ACCESS_protected";
+  case DW_ACCESS_private:                return "DW_ACCESS_private";
+  }
+  return 0;
+}
+
+/// VisibilityString - Return the string for the specified visibility.
+///
+const char *llvm::dwarf::VisibilityString(unsigned Visibility) {
+  switch (Visibility) {
+  case DW_VIS_local:                     return "DW_VIS_local";
+  case DW_VIS_exported:                  return "DW_VIS_exported";
+  case DW_VIS_qualified:                 return "DW_VIS_qualified";
+  }
+  return 0;
+}
+
+/// VirtualityString - Return the string for the specified virtuality.
+///
+const char *llvm::dwarf::VirtualityString(unsigned Virtuality) {
+  switch (Virtuality) {
+  case DW_VIRTUALITY_none:               return "DW_VIRTUALITY_none";
+  case DW_VIRTUALITY_virtual:            return "DW_VIRTUALITY_virtual";
+  case DW_VIRTUALITY_pure_virtual:       return "DW_VIRTUALITY_pure_virtual";
+  }
+  return 0;
+}
+
+/// LanguageString - Return the string for the specified language.
+///
+const char *llvm::dwarf::LanguageString(unsigned Language) {
+  switch (Language) {
+  case DW_LANG_C89:                      return "DW_LANG_C89";
+  case DW_LANG_C:                        return "DW_LANG_C";
+  case DW_LANG_Ada83:                    return "DW_LANG_Ada83";
+  case DW_LANG_C_plus_plus:              return "DW_LANG_C_plus_plus";
+  case DW_LANG_Cobol74:                  return "DW_LANG_Cobol74";
+  case DW_LANG_Cobol85:                  return "DW_LANG_Cobol85";
+  case DW_LANG_Fortran77:                return "DW_LANG_Fortran77";
+  case DW_LANG_Fortran90:                return "DW_LANG_Fortran90";
+  case DW_LANG_Pascal83:                 return "DW_LANG_Pascal83";
+  case DW_LANG_Modula2:                  return "DW_LANG_Modula2";
+  case DW_LANG_Java:                     return "DW_LANG_Java";
+  case DW_LANG_C99:                      return "DW_LANG_C99";
+  case DW_LANG_Ada95:                    return "DW_LANG_Ada95";
+  case DW_LANG_Fortran95:                return "DW_LANG_Fortran95";
+  case DW_LANG_PLI:                      return "DW_LANG_PLI";
+  case DW_LANG_ObjC:                     return "DW_LANG_ObjC";
+  case DW_LANG_ObjC_plus_plus:           return "DW_LANG_ObjC_plus_plus";
+  case DW_LANG_UPC:                      return "DW_LANG_UPC";
+  case DW_LANG_D:                        return "DW_LANG_D";
+  case DW_LANG_lo_user:                  return "DW_LANG_lo_user";
+  case DW_LANG_hi_user:                  return "DW_LANG_hi_user";
+  }
+  return 0;
+}
+
+/// CaseString - Return the string for the specified identifier case.
+///
+const char *llvm::dwarf::CaseString(unsigned Case) {
+  switch (Case) {
+  case DW_ID_case_sensitive:             return "DW_ID_case_sensitive";
+  case DW_ID_up_case:                    return "DW_ID_up_case";
+  case DW_ID_down_case:                  return "DW_ID_down_case";
+  case DW_ID_case_insensitive:           return "DW_ID_case_insensitive";
+  }
+  return 0;
+}
+
+/// ConventionString - Return the string for the specified calling convention.
+///
+const char *llvm::dwarf::ConventionString(unsigned Convention) {
+   switch (Convention) {
+   case DW_CC_normal:                     return "DW_CC_normal";
+   case DW_CC_program:                    return "DW_CC_program";
+   case DW_CC_nocall:                     return "DW_CC_nocall";
+   case DW_CC_lo_user:                    return "DW_CC_lo_user";
+   case DW_CC_hi_user:                    return "DW_CC_hi_user";
+  }
+  return 0;
+}
+
+/// InlineCodeString - Return the string for the specified inline code.
+///
+const char *llvm::dwarf::InlineCodeString(unsigned Code) {
+  switch (Code) {
+  case DW_INL_not_inlined:               return "DW_INL_not_inlined";
+  case DW_INL_inlined:                   return "DW_INL_inlined";
+  case DW_INL_declared_not_inlined:      return "DW_INL_declared_not_inlined";
+  case DW_INL_declared_inlined:          return "DW_INL_declared_inlined";
+  }
+  return 0;
+}
+
+/// ArrayOrderString - Return the string for the specified array order.
+///
+const char *llvm::dwarf::ArrayOrderString(unsigned Order) {
+  switch (Order) {
+  case DW_ORD_row_major:                 return "DW_ORD_row_major";
+  case DW_ORD_col_major:                 return "DW_ORD_col_major";
+  }
+  return 0;
+}
+
+/// DiscriminantString - Return the string for the specified discriminant
+/// descriptor.
+const char *llvm::dwarf::DiscriminantString(unsigned Discriminant) {
+  switch (Discriminant) {
+  case DW_DSC_label:                     return "DW_DSC_label";
+  case DW_DSC_range:                     return "DW_DSC_range";
+  }
+  return 0;
+}
+
+/// LNStandardString - Return the string for the specified line number standard.
+///
+const char *llvm::dwarf::LNStandardString(unsigned Standard) {
+  switch (Standard) {
+  case DW_LNS_copy:                      return "DW_LNS_copy";
+  case DW_LNS_advance_pc:                return "DW_LNS_advance_pc";
+  case DW_LNS_advance_line:              return "DW_LNS_advance_line";
+  case DW_LNS_set_file:                  return "DW_LNS_set_file";
+  case DW_LNS_set_column:                return "DW_LNS_set_column";
+  case DW_LNS_negate_stmt:               return "DW_LNS_negate_stmt";
+  case DW_LNS_set_basic_block:           return "DW_LNS_set_basic_block";
+  case DW_LNS_const_add_pc:              return "DW_LNS_const_add_pc";
+  case DW_LNS_fixed_advance_pc:          return "DW_LNS_fixed_advance_pc";
+  case DW_LNS_set_prologue_end:          return "DW_LNS_set_prologue_end";
+  case DW_LNS_set_epilogue_begin:        return "DW_LNS_set_epilogue_begin";
+  case DW_LNS_set_isa:                   return "DW_LNS_set_isa";
+  }
+  return 0;
+}
+
+/// LNExtendedString - Return the string for the specified line number extended
+/// opcode encodings.
+const char *llvm::dwarf::LNExtendedString(unsigned Encoding) {
+  switch (Encoding) {
+  // Line Number Extended Opcode Encodings
+  case DW_LNE_end_sequence:              return "DW_LNE_end_sequence";
+  case DW_LNE_set_address:               return "DW_LNE_set_address";
+  case DW_LNE_define_file:               return "DW_LNE_define_file";
+  case DW_LNE_lo_user:                   return "DW_LNE_lo_user";
+  case DW_LNE_hi_user:                   return "DW_LNE_hi_user";
+  }
+  return 0;
+}
+
+/// MacinfoString - Return the string for the specified macinfo type encodings.
+///
+const char *llvm::dwarf::MacinfoString(unsigned Encoding) {
+  switch (Encoding) {
+  // Macinfo Type Encodings
+  case DW_MACINFO_define:                return "DW_MACINFO_define";
+  case DW_MACINFO_undef:                 return "DW_MACINFO_undef";
+  case DW_MACINFO_start_file:            return "DW_MACINFO_start_file";
+  case DW_MACINFO_end_file:              return "DW_MACINFO_end_file";
+  case DW_MACINFO_vendor_ext:            return "DW_MACINFO_vendor_ext";
+  }
+  return 0;
+}
+
+/// CallFrameString - Return the string for the specified call frame instruction
+/// encodings.
+const char *llvm::dwarf::CallFrameString(unsigned Encoding) {
+  switch (Encoding) {
+  case DW_CFA_advance_loc:               return "DW_CFA_advance_loc";
+  case DW_CFA_offset:                    return "DW_CFA_offset";
+  case DW_CFA_restore:                   return "DW_CFA_restore";
+  case DW_CFA_set_loc:                   return "DW_CFA_set_loc";
+  case DW_CFA_advance_loc1:              return "DW_CFA_advance_loc1";
+  case DW_CFA_advance_loc2:              return "DW_CFA_advance_loc2";
+  case DW_CFA_advance_loc4:              return "DW_CFA_advance_loc4";
+  case DW_CFA_offset_extended:           return "DW_CFA_offset_extended";
+  case DW_CFA_restore_extended:          return "DW_CFA_restore_extended";
+  case DW_CFA_undefined:                 return "DW_CFA_undefined";
+  case DW_CFA_same_value:                return "DW_CFA_same_value";
+  case DW_CFA_register:                  return "DW_CFA_register";
+  case DW_CFA_remember_state:            return "DW_CFA_remember_state";
+  case DW_CFA_restore_state:             return "DW_CFA_restore_state";
+  case DW_CFA_def_cfa:                   return "DW_CFA_def_cfa";
+  case DW_CFA_def_cfa_register:          return "DW_CFA_def_cfa_register";
+  case DW_CFA_def_cfa_offset:            return "DW_CFA_def_cfa_offset";
+  case DW_CFA_def_cfa_expression:        return "DW_CFA_def_cfa_expression";
+  case DW_CFA_expression:                return "DW_CFA_expression";
+  case DW_CFA_offset_extended_sf:        return "DW_CFA_offset_extended_sf";
+  case DW_CFA_def_cfa_sf:                return "DW_CFA_def_cfa_sf";
+  case DW_CFA_def_cfa_offset_sf:         return "DW_CFA_def_cfa_offset_sf";
+  case DW_CFA_val_offset:                return "DW_CFA_val_offset";
+  case DW_CFA_val_offset_sf:             return "DW_CFA_val_offset_sf";
+  case DW_CFA_val_expression:            return "DW_CFA_val_expression";
+  case DW_CFA_lo_user:                   return "DW_CFA_lo_user";
+  case DW_CFA_hi_user:                   return "DW_CFA_hi_user";
+  }
+  return 0;
+}
diff --git a/final/lib/Support/DynamicLibrary.cpp b/final/lib/Support/DynamicLibrary.cpp
new file mode 100644
index 00000000000..455c3801cc6
--- /dev/null
+++ b/final/lib/Support/DynamicLibrary.cpp
@@ -0,0 +1,170 @@
+//===-- DynamicLibrary.cpp - Runtime link/load libraries --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements the operating system DynamicLibrary concept.
+//
+// FIXME: This file leaks the ExplicitSymbols and OpenedHandles vector, and is
+// not thread safe!
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Config/config.h"
+#include <cstdio>
+#include <cstring>
+#include <map>
+#include <vector>
+
+// Collection of symbol name/value pairs to be searched prior to any libraries.
+static std::map<std::string, void*> *ExplicitSymbols = 0;
+
+namespace {
+
+struct ExplicitSymbolsDeleter {
+  ~ExplicitSymbolsDeleter() {
+    if (ExplicitSymbols)
+      delete ExplicitSymbols;
+  }
+};
+
+}
+
+static ExplicitSymbolsDeleter Dummy;
+
+void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName,
+                                          void *symbolValue) {
+  if (ExplicitSymbols == 0)
+    ExplicitSymbols = new std::map<std::string, void*>();
+  (*ExplicitSymbols)[symbolName] = symbolValue;
+}
+
+#ifdef LLVM_ON_WIN32
+
+#include "Windows/DynamicLibrary.inc"
+
+#else
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+using namespace llvm;
+using namespace llvm::sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+static std::vector<void *> *OpenedHandles = 0;
+
+
+static SmartMutex<true>& getMutex() {
+  static SmartMutex<true> HandlesMutex;
+  return HandlesMutex;
+}
+
+
+bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
+                                            std::string *ErrMsg) {
+  void *H = dlopen(Filename, RTLD_LAZY|RTLD_GLOBAL);
+  if (H == 0) {
+    if (ErrMsg) *ErrMsg = dlerror();
+    return true;
+  }
+#ifdef __CYGWIN__
+  // Cygwin searches symbols only in the main
+  // with the handle of dlopen(NULL, RTLD_GLOBAL).
+  if (Filename == NULL)
+    H = RTLD_DEFAULT;
+#endif
+  SmartScopedLock<true> Lock(getMutex());
+  if (OpenedHandles == 0)
+    OpenedHandles = new std::vector<void *>();
+  OpenedHandles->push_back(H);
+  return false;
+}
+#else
+
+using namespace llvm;
+using namespace llvm::sys;
+
+bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
+                                            std::string *ErrMsg) {
+  if (ErrMsg) *ErrMsg = "dlopen() not supported on this platform";
+  return true;
+}
+#endif
+
+namespace llvm {
+void *SearchForAddressOfSpecialSymbol(const char* symbolName);
+}
+
+void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
+  // First check symbols added via AddSymbol().
+  if (ExplicitSymbols) {
+    std::map<std::string, void *>::iterator I =
+      ExplicitSymbols->find(symbolName);
+    std::map<std::string, void *>::iterator E = ExplicitSymbols->end();
+
+    if (I != E)
+      return I->second;
+  }
+
+#if HAVE_DLFCN_H
+  // Now search the libraries.
+  SmartScopedLock<true> Lock(getMutex());
+  if (OpenedHandles) {
+    for (std::vector<void *>::iterator I = OpenedHandles->begin(),
+         E = OpenedHandles->end(); I != E; ++I) {
+      //lt_ptr ptr = lt_dlsym(*I, symbolName);
+      void *ptr = dlsym(*I, symbolName);
+      if (ptr) {
+        return ptr;
+      }
+    }
+  }
+#endif
+
+  if (void *Result = llvm::SearchForAddressOfSpecialSymbol(symbolName))
+    return Result;
+
+// This macro returns the address of a well-known, explicit symbol
+#define EXPLICIT_SYMBOL(SYM) \
+   if (!strcmp(symbolName, #SYM)) return &SYM
+
+// On linux we have a weird situation. The stderr/out/in symbols are both
+// macros and global variables because of standards requirements. So, we
+// boldly use the EXPLICIT_SYMBOL macro without checking for a #define first.
+#if defined(__linux__)
+  {
+    EXPLICIT_SYMBOL(stderr);
+    EXPLICIT_SYMBOL(stdout);
+    EXPLICIT_SYMBOL(stdin);
+  }
+#else
+  // For everything else, we want to check to make sure the symbol isn't defined
+  // as a macro before using EXPLICIT_SYMBOL.
+  {
+#ifndef stdin
+    EXPLICIT_SYMBOL(stdin);
+#endif
+#ifndef stdout
+    EXPLICIT_SYMBOL(stdout);
+#endif
+#ifndef stderr
+    EXPLICIT_SYMBOL(stderr);
+#endif
+  }
+#endif
+#undef EXPLICIT_SYMBOL
+
+  return 0;
+}
+
+#endif // LLVM_ON_WIN32
diff --git a/final/lib/Support/Errno.cpp b/final/lib/Support/Errno.cpp
new file mode 100644
index 00000000000..18c658173a7
--- /dev/null
+++ b/final/lib/Support/Errno.cpp
@@ -0,0 +1,74 @@
+//===- Errno.cpp - errno support --------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the errno wrappers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Errno.h"
+#include "llvm/Config/config.h"     // Get autoconf configuration settings
+
+#if HAVE_STRING_H
+#include <string.h>
+
+#if HAVE_ERRNO_H
+#include <errno.h>
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+namespace sys {
+
+#if HAVE_ERRNO_H
+std::string StrError() {
+  return StrError(errno);
+}
+#endif  // HAVE_ERRNO_H
+
+std::string StrError(int errnum) {
+  const int MaxErrStrLen = 2000;
+  char buffer[MaxErrStrLen];
+  buffer[0] = '\0';
+  char* str = buffer;
+#ifdef HAVE_STRERROR_R
+  // strerror_r is thread-safe.
+  if (errnum)
+# if defined(__GLIBC__) && defined(_GNU_SOURCE)
+    // glibc defines its own incompatible version of strerror_r
+    // which may not use the buffer supplied.
+    str = strerror_r(errnum,buffer,MaxErrStrLen-1);
+# else
+    strerror_r(errnum,buffer,MaxErrStrLen-1);
+# endif
+#elif HAVE_DECL_STRERROR_S // "Windows Secure API"
+    if (errnum)
+      strerror_s(buffer, errnum);
+#elif defined(HAVE_STRERROR)
+  // Copy the thread un-safe result of strerror into
+  // the buffer as fast as possible to minimize impact
+  // of collision of strerror in multiple threads.
+  if (errnum)
+    strncpy(buffer,strerror(errnum),MaxErrStrLen-1);
+  buffer[MaxErrStrLen-1] = '\0';
+#else
+  // Strange that this system doesn't even have strerror
+  // but, oh well, just use a generic message
+  sprintf(buffer, "Error #%d", errnum);
+#endif
+  return str;
+}
+
+}  // namespace sys
+}  // namespace llvm
+
+#endif  // HAVE_STRING_H
diff --git a/final/lib/Support/ErrorHandling.cpp b/final/lib/Support/ErrorHandling.cpp
new file mode 100644
index 00000000000..3579546d757
--- /dev/null
+++ b/final/lib/Support/ErrorHandling.cpp
@@ -0,0 +1,100 @@
+//===- lib/Support/ErrorHandling.cpp - Callbacks for errors ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an API used to indicate fatal error conditions.  Non-fatal
+// errors (most of them) should be handled through LLVMContext.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Config/config.h"
+#include <cassert>
+#include <cstdlib>
+
+#if defined(HAVE_UNISTD_H)
+# include <unistd.h>
+#endif
+#if defined(_MSC_VER)
+# include <io.h>
+# include <fcntl.h>
+#endif
+
+using namespace llvm;
+using namespace std;
+
+static fatal_error_handler_t ErrorHandler = 0;
+static void *ErrorHandlerUserData = 0;
+
+void llvm::install_fatal_error_handler(fatal_error_handler_t handler,
+                                       void *user_data) {
+  assert(!llvm_is_multithreaded() &&
+         "Cannot register error handlers after starting multithreaded mode!\n");
+  assert(!ErrorHandler && "Error handler already registered!\n");
+  ErrorHandler = handler;
+  ErrorHandlerUserData = user_data;
+}
+
+void llvm::remove_fatal_error_handler() {
+  ErrorHandler = 0;
+}
+
+void llvm::report_fatal_error(const char *Reason) {
+  report_fatal_error(Twine(Reason));
+}
+
+void llvm::report_fatal_error(const std::string &Reason) {
+  report_fatal_error(Twine(Reason));
+}
+
+void llvm::report_fatal_error(StringRef Reason) {
+  report_fatal_error(Twine(Reason));
+}
+
+void llvm::report_fatal_error(const Twine &Reason) {
+  if (ErrorHandler) {
+    ErrorHandler(ErrorHandlerUserData, Reason.str());
+  } else {
+    // Blast the result out to stderr.  We don't try hard to make sure this
+    // succeeds (e.g. handling EINTR) and we can't use errs() here because
+    // raw ostreams can call report_fatal_error.
+    SmallVector<char, 64> Buffer;
+    raw_svector_ostream OS(Buffer);
+    OS << "LLVM ERROR: " << Reason << "\n";
+    StringRef MessageStr = OS.str();
+    ssize_t written = ::write(2, MessageStr.data(), MessageStr.size());
+    (void)written; // If something went wrong, we deliberately just give up.
+  }
+
+  // If we reached here, we are failing ungracefully. Run the interrupt handlers
+  // to make sure any special cleanups get done, in particular that we remove
+  // files registered with RemoveFileOnSignal.
+  sys::RunInterruptHandlers();
+
+  exit(1);
+}
+
+void llvm::llvm_unreachable_internal(const char *msg, const char *file,
+                                     unsigned line) {
+  // This code intentionally doesn't call the ErrorHandler callback, because
+  // llvm_unreachable is intended to be used to indicate "impossible"
+  // situations, and not legitimate runtime errors.
+  if (msg)
+    dbgs() << msg << "\n";
+  dbgs() << "UNREACHABLE executed";
+  if (file)
+    dbgs() << " at " << file << ":" << line;
+  dbgs() << "!\n";
+  abort();
+}
diff --git a/final/lib/Support/FileUtilities.cpp b/final/lib/Support/FileUtilities.cpp
new file mode 100644
index 00000000000..5dbabee7a7e
--- /dev/null
+++ b/final/lib/Support/FileUtilities.cpp
@@ -0,0 +1,281 @@
+//===- Support/FileUtilities.cpp - File System Utilities ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a family of utility functions which are useful for doing
+// various things with files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include <cstdlib>
+#include <cstring>
+#include <cctype>
+using namespace llvm;
+
+static bool isSignedChar(char C) {
+  return (C == '+' || C == '-');
+}
+
+static bool isExponentChar(char C) {
+  switch (C) {
+  case 'D':  // Strange exponential notation.
+  case 'd':  // Strange exponential notation.
+  case 'e':
+  case 'E': return true;
+  default: return false;
+  }
+}
+
+static bool isNumberChar(char C) {
+  switch (C) {
+  case '0': case '1': case '2': case '3': case '4':
+  case '5': case '6': case '7': case '8': case '9':
+  case '.': return true;
+  default: return isSignedChar(C) || isExponentChar(C);
+  }
+}
+
+static const char *BackupNumber(const char *Pos, const char *FirstChar) {
+  // If we didn't stop in the middle of a number, don't backup.
+  if (!isNumberChar(*Pos)) return Pos;
+
+  // Otherwise, return to the start of the number.
+  bool HasPeriod = false;
+  while (Pos > FirstChar && isNumberChar(Pos[-1])) {
+    // Backup over at most one period.
+    if (Pos[-1] == '.') {
+      if (HasPeriod)
+        break;
+      HasPeriod = true;
+    }
+
+    --Pos;
+    if (Pos > FirstChar && isSignedChar(Pos[0]) && !isExponentChar(Pos[-1]))
+      break;
+  }
+  return Pos;
+}
+
+/// EndOfNumber - Return the first character that is not part of the specified
+/// number.  This assumes that the buffer is null terminated, so it won't fall
+/// off the end.
+static const char *EndOfNumber(const char *Pos) {
+  while (isNumberChar(*Pos))
+    ++Pos;
+  return Pos;
+}
+
+/// CompareNumbers - compare two numbers, returning true if they are different.
+static bool CompareNumbers(const char *&F1P, const char *&F2P,
+                           const char *F1End, const char *F2End,
+                           double AbsTolerance, double RelTolerance,
+                           std::string *ErrorMsg) {
+  const char *F1NumEnd, *F2NumEnd;
+  double V1 = 0.0, V2 = 0.0;
+
+  // If one of the positions is at a space and the other isn't, chomp up 'til
+  // the end of the space.
+  while (isspace(*F1P) && F1P != F1End)
+    ++F1P;
+  while (isspace(*F2P) && F2P != F2End)
+    ++F2P;
+
+  // If we stop on numbers, compare their difference.
+  if (!isNumberChar(*F1P) || !isNumberChar(*F2P)) {
+    // The diff failed.
+    F1NumEnd = F1P;
+    F2NumEnd = F2P;
+  } else {
+    // Note that some ugliness is built into this to permit support for numbers
+    // that use "D" or "d" as their exponential marker, e.g. "1.234D45".  This
+    // occurs in 200.sixtrack in spec2k.
+    V1 = strtod(F1P, const_cast<char**>(&F1NumEnd));
+    V2 = strtod(F2P, const_cast<char**>(&F2NumEnd));
+
+    if (*F1NumEnd == 'D' || *F1NumEnd == 'd') {
+      // Copy string into tmp buffer to replace the 'D' with an 'e'.
+      SmallString<200> StrTmp(F1P, EndOfNumber(F1NumEnd)+1);
+      // Strange exponential notation!
+      StrTmp[static_cast<unsigned>(F1NumEnd-F1P)] = 'e';
+
+      V1 = strtod(&StrTmp[0], const_cast<char**>(&F1NumEnd));
+      F1NumEnd = F1P + (F1NumEnd-&StrTmp[0]);
+    }
+
+    if (*F2NumEnd == 'D' || *F2NumEnd == 'd') {
+      // Copy string into tmp buffer to replace the 'D' with an 'e'.
+      SmallString<200> StrTmp(F2P, EndOfNumber(F2NumEnd)+1);
+      // Strange exponential notation!
+      StrTmp[static_cast<unsigned>(F2NumEnd-F2P)] = 'e';
+
+      V2 = strtod(&StrTmp[0], const_cast<char**>(&F2NumEnd));
+      F2NumEnd = F2P + (F2NumEnd-&StrTmp[0]);
+    }
+  }
+
+  if (F1NumEnd == F1P || F2NumEnd == F2P) {
+    if (ErrorMsg) {
+      *ErrorMsg = "FP Comparison failed, not a numeric difference between '";
+      *ErrorMsg += F1P[0];
+      *ErrorMsg += "' and '";
+      *ErrorMsg += F2P[0];
+      *ErrorMsg += "'";
+    }
+    return true;
+  }
+
+  // Check to see if these are inside the absolute tolerance
+  if (AbsTolerance < std::abs(V1-V2)) {
+    // Nope, check the relative tolerance...
+    double Diff;
+    if (V2)
+      Diff = std::abs(V1/V2 - 1.0);
+    else if (V1)
+      Diff = std::abs(V2/V1 - 1.0);
+    else
+      Diff = 0;  // Both zero.
+    if (Diff > RelTolerance) {
+      if (ErrorMsg) {
+        raw_string_ostream(*ErrorMsg)
+          << "Compared: " << V1 << " and " << V2 << '\n'
+          << "abs. diff = " << std::abs(V1-V2) << " rel.diff = " << Diff << '\n'
+          << "Out of tolerance: rel/abs: " << RelTolerance << '/'
+          << AbsTolerance;
+      }
+      return true;
+    }
+  }
+
+  // Otherwise, advance our read pointers to the end of the numbers.
+  F1P = F1NumEnd;  F2P = F2NumEnd;
+  return false;
+}
+
+/// DiffFilesWithTolerance - Compare the two files specified, returning 0 if the
+/// files match, 1 if they are different, and 2 if there is a file error.  This
+/// function differs from DiffFiles in that you can specify an absolete and
+/// relative FP error that is allowed to exist.  If you specify a string to fill
+/// in for the error option, it will set the string to an error message if an
+/// error occurs, allowing the caller to distinguish between a failed diff and a
+/// file system error.
+///
+int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA,
+                                 const sys::PathWithStatus &FileB,
+                                 double AbsTol, double RelTol,
+                                 std::string *Error) {
+  const sys::FileStatus *FileAStat = FileA.getFileStatus(false, Error);
+  if (!FileAStat)
+    return 2;
+  const sys::FileStatus *FileBStat = FileB.getFileStatus(false, Error);
+  if (!FileBStat)
+    return 2;
+
+  // Check for zero length files because some systems croak when you try to
+  // mmap an empty file.
+  size_t A_size = FileAStat->getSize();
+  size_t B_size = FileBStat->getSize();
+
+  // If they are both zero sized then they're the same
+  if (A_size == 0 && B_size == 0)
+    return 0;
+
+  // If only one of them is zero sized then they can't be the same
+  if ((A_size == 0 || B_size == 0)) {
+    if (Error)
+      *Error = "Files differ: one is zero-sized, the other isn't";
+    return 1;
+  }
+
+  // Now its safe to mmap the files into memory becasue both files
+  // have a non-zero size.
+  error_code ec;
+  OwningPtr<MemoryBuffer> F1;
+  if (error_code ec = MemoryBuffer::getFile(FileA.c_str(), F1)) {
+    if (Error)
+      *Error = ec.message();
+    return 2;
+  }
+  OwningPtr<MemoryBuffer> F2;
+  if (error_code ec = MemoryBuffer::getFile(FileB.c_str(), F2)) {
+    if (Error)
+      *Error = ec.message();
+    return 2;
+  }
+
+  // Okay, now that we opened the files, scan them for the first difference.
+  const char *File1Start = F1->getBufferStart();
+  const char *File2Start = F2->getBufferStart();
+  const char *File1End = F1->getBufferEnd();
+  const char *File2End = F2->getBufferEnd();
+  const char *F1P = File1Start;
+  const char *F2P = File2Start;
+
+  // Are the buffers identical?  Common case: Handle this efficiently.
+  if (A_size == B_size &&
+      std::memcmp(File1Start, File2Start, A_size) == 0)
+    return 0;
+
+  // Otherwise, we are done a tolerances are set.
+  if (AbsTol == 0 && RelTol == 0) {
+    if (Error)
+      *Error = "Files differ without tolerance allowance";
+    return 1;   // Files different!
+  }
+
+  bool CompareFailed = false;
+  while (1) {
+    // Scan for the end of file or next difference.
+    while (F1P < File1End && F2P < File2End && *F1P == *F2P)
+      ++F1P, ++F2P;
+
+    if (F1P >= File1End || F2P >= File2End) break;
+
+    // Okay, we must have found a difference.  Backup to the start of the
+    // current number each stream is at so that we can compare from the
+    // beginning.
+    F1P = BackupNumber(F1P, File1Start);
+    F2P = BackupNumber(F2P, File2Start);
+
+    // Now that we are at the start of the numbers, compare them, exiting if
+    // they don't match.
+    if (CompareNumbers(F1P, F2P, File1End, File2End, AbsTol, RelTol, Error)) {
+      CompareFailed = true;
+      break;
+    }
+  }
+
+  // Okay, we reached the end of file.  If both files are at the end, we
+  // succeeded.
+  bool F1AtEnd = F1P >= File1End;
+  bool F2AtEnd = F2P >= File2End;
+  if (!CompareFailed && (!F1AtEnd || !F2AtEnd)) {
+    // Else, we might have run off the end due to a number: backup and retry.
+    if (F1AtEnd && isNumberChar(F1P[-1])) --F1P;
+    if (F2AtEnd && isNumberChar(F2P[-1])) --F2P;
+    F1P = BackupNumber(F1P, File1Start);
+    F2P = BackupNumber(F2P, File2Start);
+
+    // Now that we are at the start of the numbers, compare them, exiting if
+    // they don't match.
+    if (CompareNumbers(F1P, F2P, File1End, File2End, AbsTol, RelTol, Error))
+      CompareFailed = true;
+
+    // If we found the end, we succeeded.
+    if (F1P < File1End || F2P < File2End)
+      CompareFailed = true;
+  }
+
+  return CompareFailed;
+}
diff --git a/final/lib/Support/FoldingSet.cpp b/final/lib/Support/FoldingSet.cpp
new file mode 100644
index 00000000000..a4f80a90d6d
--- /dev/null
+++ b/final/lib/Support/FoldingSet.cpp
@@ -0,0 +1,421 @@
+//===-- Support/FoldingSet.cpp - Uniquing Hash Set --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a hash set that can be used to remove duplication of
+// nodes in a graph.  This code was originally created by Chris Lattner for use
+// with SelectionDAGCSEMap, but was isolated to provide use across the llvm code
+// set. 
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Host.h"
+#include <cassert>
+#include <cstring>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// FoldingSetNodeIDRef Implementation
+
+/// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
+/// used to lookup the node in the FoldingSetImpl.
+unsigned FoldingSetNodeIDRef::ComputeHash() const {
+  // This is adapted from SuperFastHash by Paul Hsieh.
+  unsigned Hash = static_cast<unsigned>(Size);
+  for (const unsigned *BP = Data, *E = BP+Size; BP != E; ++BP) {
+    unsigned Data = *BP;
+    Hash         += Data & 0xFFFF;
+    unsigned Tmp  = ((Data >> 16) << 11) ^ Hash;
+    Hash          = (Hash << 16) ^ Tmp;
+    Hash         += Hash >> 11;
+  }
+  
+  // Force "avalanching" of final 127 bits.
+  Hash ^= Hash << 3;
+  Hash += Hash >> 5;
+  Hash ^= Hash << 4;
+  Hash += Hash >> 17;
+  Hash ^= Hash << 25;
+  Hash += Hash >> 6;
+  return Hash;
+}
+
+bool FoldingSetNodeIDRef::operator==(FoldingSetNodeIDRef RHS) const {
+  if (Size != RHS.Size) return false;
+  return memcmp(Data, RHS.Data, Size*sizeof(*Data)) == 0;
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetNodeID Implementation
+
+/// Add* - Add various data types to Bit data.
+///
+void FoldingSetNodeID::AddPointer(const void *Ptr) {
+  // Note: this adds pointers to the hash using sizes and endianness that
+  // depend on the host.  It doesn't matter however, because hashing on
+  // pointer values in inherently unstable.  Nothing  should depend on the 
+  // ordering of nodes in the folding set.
+  intptr_t PtrI = (intptr_t)Ptr;
+  Bits.push_back(unsigned(PtrI));
+  if (sizeof(intptr_t) > sizeof(unsigned))
+    Bits.push_back(unsigned(uint64_t(PtrI) >> 32));
+}
+void FoldingSetNodeID::AddInteger(signed I) {
+  Bits.push_back(I);
+}
+void FoldingSetNodeID::AddInteger(unsigned I) {
+  Bits.push_back(I);
+}
+void FoldingSetNodeID::AddInteger(long I) {
+  AddInteger((unsigned long)I);
+}
+void FoldingSetNodeID::AddInteger(unsigned long I) {
+  if (sizeof(long) == sizeof(int))
+    AddInteger(unsigned(I));
+  else if (sizeof(long) == sizeof(long long)) {
+    AddInteger((unsigned long long)I);
+  } else {
+    llvm_unreachable("unexpected sizeof(long)");
+  }
+}
+void FoldingSetNodeID::AddInteger(long long I) {
+  AddInteger((unsigned long long)I);
+}
+void FoldingSetNodeID::AddInteger(unsigned long long I) {
+  AddInteger(unsigned(I));
+  if ((uint64_t)(int)I != I)
+    Bits.push_back(unsigned(I >> 32));
+}
+
+void FoldingSetNodeID::AddString(StringRef String) {
+  unsigned Size =  String.size();
+  Bits.push_back(Size);
+  if (!Size) return;
+
+  unsigned Units = Size / 4;
+  unsigned Pos = 0;
+  const unsigned *Base = (const unsigned*) String.data();
+  
+  // If the string is aligned do a bulk transfer.
+  if (!((intptr_t)Base & 3)) {
+    Bits.append(Base, Base + Units);
+    Pos = (Units + 1) * 4;
+  } else {
+    // Otherwise do it the hard way.
+    // To be compatible with above bulk transfer, we need to take endianness
+    // into account.
+    if (sys::isBigEndianHost()) {
+      for (Pos += 4; Pos <= Size; Pos += 4) {
+        unsigned V = ((unsigned char)String[Pos - 4] << 24) |
+                     ((unsigned char)String[Pos - 3] << 16) |
+                     ((unsigned char)String[Pos - 2] << 8) |
+                      (unsigned char)String[Pos - 1];
+        Bits.push_back(V);
+      }
+    } else {
+      assert(sys::isLittleEndianHost() && "Unexpected host endianness");
+      for (Pos += 4; Pos <= Size; Pos += 4) {
+        unsigned V = ((unsigned char)String[Pos - 1] << 24) |
+                     ((unsigned char)String[Pos - 2] << 16) |
+                     ((unsigned char)String[Pos - 3] << 8) |
+                      (unsigned char)String[Pos - 4];
+        Bits.push_back(V);
+      }
+    }
+  }
+  
+  // With the leftover bits.
+  unsigned V = 0;
+  // Pos will have overshot size by 4 - #bytes left over.
+  // No need to take endianness into account here - this is always executed.
+  switch (Pos - Size) {
+  case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru.
+  case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru.
+  case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break;
+  default: return; // Nothing left.
+  }
+
+  Bits.push_back(V);
+}
+
+/// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to 
+/// lookup the node in the FoldingSetImpl.
+unsigned FoldingSetNodeID::ComputeHash() const {
+  return FoldingSetNodeIDRef(Bits.data(), Bits.size()).ComputeHash();
+}
+
+/// operator== - Used to compare two nodes to each other.
+///
+bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{
+  return *this == FoldingSetNodeIDRef(RHS.Bits.data(), RHS.Bits.size());
+}
+
+/// operator== - Used to compare two nodes to each other.
+///
+bool FoldingSetNodeID::operator==(FoldingSetNodeIDRef RHS) const {
+  return FoldingSetNodeIDRef(Bits.data(), Bits.size()) == RHS;
+}
+
+/// Intern - Copy this node's data to a memory region allocated from the
+/// given allocator and return a FoldingSetNodeIDRef describing the
+/// interned data.
+FoldingSetNodeIDRef
+FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const {
+  unsigned *New = Allocator.Allocate<unsigned>(Bits.size());
+  std::uninitialized_copy(Bits.begin(), Bits.end(), New);
+  return FoldingSetNodeIDRef(New, Bits.size());
+}
+
+//===----------------------------------------------------------------------===//
+/// Helper functions for FoldingSetImpl.
+
+/// GetNextPtr - In order to save space, each bucket is a
+/// singly-linked-list. In order to make deletion more efficient, we make
+/// the list circular, so we can delete a node without computing its hash.
+/// The problem with this is that the start of the hash buckets are not
+/// Nodes.  If NextInBucketPtr is a bucket pointer, this method returns null:
+/// use GetBucketPtr when this happens.
+static FoldingSetImpl::Node *GetNextPtr(void *NextInBucketPtr) {
+  // The low bit is set if this is the pointer back to the bucket.
+  if (reinterpret_cast<intptr_t>(NextInBucketPtr) & 1)
+    return 0;
+  
+  return static_cast<FoldingSetImpl::Node*>(NextInBucketPtr);
+}
+
+
+/// testing.
+static void **GetBucketPtr(void *NextInBucketPtr) {
+  intptr_t Ptr = reinterpret_cast<intptr_t>(NextInBucketPtr);
+  assert((Ptr & 1) && "Not a bucket pointer");
+  return reinterpret_cast<void**>(Ptr & ~intptr_t(1));
+}
+
+/// GetBucketFor - Hash the specified node ID and return the hash bucket for
+/// the specified ID.
+static void **GetBucketFor(unsigned Hash, void **Buckets, unsigned NumBuckets) {
+  // NumBuckets is always a power of 2.
+  unsigned BucketNum = Hash & (NumBuckets-1);
+  return Buckets + BucketNum;
+}
+
+/// AllocateBuckets - Allocated initialized bucket memory.
+static void **AllocateBuckets(unsigned NumBuckets) {
+  void **Buckets = static_cast<void**>(calloc(NumBuckets+1, sizeof(void*)));
+  // Set the very last bucket to be a non-null "pointer".
+  Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
+  return Buckets;
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetImpl Implementation
+
+FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
+  assert(5 < Log2InitSize && Log2InitSize < 32 &&
+         "Initial hash table size out of range");
+  NumBuckets = 1 << Log2InitSize;
+  Buckets = AllocateBuckets(NumBuckets);
+  NumNodes = 0;
+}
+FoldingSetImpl::~FoldingSetImpl() {
+  free(Buckets);
+}
+void FoldingSetImpl::clear() {
+  // Set all but the last bucket to null pointers.
+  memset(Buckets, 0, NumBuckets*sizeof(void*));
+
+  // Set the very last bucket to be a non-null "pointer".
+  Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
+
+  // Reset the node count to zero.
+  NumNodes = 0;
+}
+
+/// GrowHashTable - Double the size of the hash table and rehash everything.
+///
+void FoldingSetImpl::GrowHashTable() {
+  void **OldBuckets = Buckets;
+  unsigned OldNumBuckets = NumBuckets;
+  NumBuckets <<= 1;
+  
+  // Clear out new buckets.
+  Buckets = AllocateBuckets(NumBuckets);
+  NumNodes = 0;
+
+  // Walk the old buckets, rehashing nodes into their new place.
+  FoldingSetNodeID TempID;
+  for (unsigned i = 0; i != OldNumBuckets; ++i) {
+    void *Probe = OldBuckets[i];
+    if (!Probe) continue;
+    while (Node *NodeInBucket = GetNextPtr(Probe)) {
+      // Figure out the next link, remove NodeInBucket from the old link.
+      Probe = NodeInBucket->getNextInBucket();
+      NodeInBucket->SetNextInBucket(0);
+
+      // Insert the node into the new bucket, after recomputing the hash.
+      InsertNode(NodeInBucket,
+                 GetBucketFor(ComputeNodeHash(NodeInBucket, TempID),
+                              Buckets, NumBuckets));
+      TempID.clear();
+    }
+  }
+  
+  free(OldBuckets);
+}
+
+/// FindNodeOrInsertPos - Look up the node specified by ID.  If it exists,
+/// return it.  If not, return the insertion token that will make insertion
+/// faster.
+FoldingSetImpl::Node
+*FoldingSetImpl::FindNodeOrInsertPos(const FoldingSetNodeID &ID,
+                                     void *&InsertPos) {
+  
+  void **Bucket = GetBucketFor(ID.ComputeHash(), Buckets, NumBuckets);
+  void *Probe = *Bucket;
+  
+  InsertPos = 0;
+  
+  FoldingSetNodeID TempID;
+  while (Node *NodeInBucket = GetNextPtr(Probe)) {
+    if (NodeEquals(NodeInBucket, ID, TempID))
+      return NodeInBucket;
+    TempID.clear();
+
+    Probe = NodeInBucket->getNextInBucket();
+  }
+  
+  // Didn't find the node, return null with the bucket as the InsertPos.
+  InsertPos = Bucket;
+  return 0;
+}
+
+/// InsertNode - Insert the specified node into the folding set, knowing that it
+/// is not already in the map.  InsertPos must be obtained from 
+/// FindNodeOrInsertPos.
+void FoldingSetImpl::InsertNode(Node *N, void *InsertPos) {
+  assert(N->getNextInBucket() == 0);
+  // Do we need to grow the hashtable?
+  if (NumNodes+1 > NumBuckets*2) {
+    GrowHashTable();
+    FoldingSetNodeID TempID;
+    InsertPos = GetBucketFor(ComputeNodeHash(N, TempID), Buckets, NumBuckets);
+  }
+
+  ++NumNodes;
+  
+  /// The insert position is actually a bucket pointer.
+  void **Bucket = static_cast<void**>(InsertPos);
+  
+  void *Next = *Bucket;
+  
+  // If this is the first insertion into this bucket, its next pointer will be
+  // null.  Pretend as if it pointed to itself, setting the low bit to indicate
+  // that it is a pointer to the bucket.
+  if (Next == 0)
+    Next = reinterpret_cast<void*>(reinterpret_cast<intptr_t>(Bucket)|1);
+
+  // Set the node's next pointer, and make the bucket point to the node.
+  N->SetNextInBucket(Next);
+  *Bucket = N;
+}
+
+/// RemoveNode - Remove a node from the folding set, returning true if one was
+/// removed or false if the node was not in the folding set.
+bool FoldingSetImpl::RemoveNode(Node *N) {
+  // Because each bucket is a circular list, we don't need to compute N's hash
+  // to remove it.
+  void *Ptr = N->getNextInBucket();
+  if (Ptr == 0) return false;  // Not in folding set.
+
+  --NumNodes;
+  N->SetNextInBucket(0);
+
+  // Remember what N originally pointed to, either a bucket or another node.
+  void *NodeNextPtr = Ptr;
+  
+  // Chase around the list until we find the node (or bucket) which points to N.
+  while (true) {
+    if (Node *NodeInBucket = GetNextPtr(Ptr)) {
+      // Advance pointer.
+      Ptr = NodeInBucket->getNextInBucket();
+      
+      // We found a node that points to N, change it to point to N's next node,
+      // removing N from the list.
+      if (Ptr == N) {
+        NodeInBucket->SetNextInBucket(NodeNextPtr);
+        return true;
+      }
+    } else {
+      void **Bucket = GetBucketPtr(Ptr);
+      Ptr = *Bucket;
+      
+      // If we found that the bucket points to N, update the bucket to point to
+      // whatever is next.
+      if (Ptr == N) {
+        *Bucket = NodeNextPtr;
+        return true;
+      }
+    }
+  }
+}
+
+/// GetOrInsertNode - If there is an existing simple Node exactly
+/// equal to the specified node, return it.  Otherwise, insert 'N' and it
+/// instead.
+FoldingSetImpl::Node *FoldingSetImpl::GetOrInsertNode(FoldingSetImpl::Node *N) {
+  FoldingSetNodeID ID;
+  GetNodeProfile(N, ID);
+  void *IP;
+  if (Node *E = FindNodeOrInsertPos(ID, IP))
+    return E;
+  InsertNode(N, IP);
+  return N;
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetIteratorImpl Implementation
+
+FoldingSetIteratorImpl::FoldingSetIteratorImpl(void **Bucket) {
+  // Skip to the first non-null non-self-cycle bucket.
+  while (*Bucket != reinterpret_cast<void*>(-1) &&
+         (*Bucket == 0 || GetNextPtr(*Bucket) == 0))
+    ++Bucket;
+  
+  NodePtr = static_cast<FoldingSetNode*>(*Bucket);
+}
+
+void FoldingSetIteratorImpl::advance() {
+  // If there is another link within this bucket, go to it.
+  void *Probe = NodePtr->getNextInBucket();
+
+  if (FoldingSetNode *NextNodeInBucket = GetNextPtr(Probe))
+    NodePtr = NextNodeInBucket;
+  else {
+    // Otherwise, this is the last link in this bucket.  
+    void **Bucket = GetBucketPtr(Probe);
+
+    // Skip to the next non-null non-self-cycle bucket.
+    do {
+      ++Bucket;
+    } while (*Bucket != reinterpret_cast<void*>(-1) &&
+             (*Bucket == 0 || GetNextPtr(*Bucket) == 0));
+    
+    NodePtr = static_cast<FoldingSetNode*>(*Bucket);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// FoldingSetBucketIteratorImpl Implementation
+
+FoldingSetBucketIteratorImpl::FoldingSetBucketIteratorImpl(void **Bucket) {
+  Ptr = (*Bucket == 0 || GetNextPtr(*Bucket) == 0) ? (void*) Bucket : *Bucket;
+}
diff --git a/final/lib/Support/FormattedStream.cpp b/final/lib/Support/FormattedStream.cpp
new file mode 100644
index 00000000000..231ae48759e
--- /dev/null
+++ b/final/lib/Support/FormattedStream.cpp
@@ -0,0 +1,101 @@
+//===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of formatted_raw_ostream.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+/// CountColumns - Examine the given char sequence and figure out which
+/// column we end up in after output.
+///
+static unsigned CountColumns(unsigned Column, const char *Ptr, size_t Size) {
+  // Keep track of the current column by scanning the string for
+  // special characters
+
+  for (const char *End = Ptr + Size; Ptr != End; ++Ptr) {
+    ++Column;
+    if (*Ptr == '\n' || *Ptr == '\r')
+      Column = 0;
+    else if (*Ptr == '\t')
+      // Assumes tab stop = 8 characters.
+      Column += (8 - (Column & 0x7)) & 0x7;
+  }
+
+  return Column;
+}
+
+/// ComputeColumn - Examine the current output and figure out which
+/// column we end up in after output.
+void formatted_raw_ostream::ComputeColumn(const char *Ptr, size_t Size) {
+  // If our previous scan pointer is inside the buffer, assume we already
+  // scanned those bytes. This depends on raw_ostream to not change our buffer
+  // in unexpected ways.
+  if (Ptr <= Scanned && Scanned <= Ptr + Size) {
+    // Scan all characters added since our last scan to determine the new
+    // column.
+    ColumnScanned = CountColumns(ColumnScanned, Scanned, 
+                                 Size - (Scanned - Ptr));
+  } else
+    ColumnScanned = CountColumns(ColumnScanned, Ptr, Size);
+
+  // Update the scanning pointer.
+  Scanned = Ptr + Size;
+}
+
+/// PadToColumn - Align the output to some column number.
+///
+/// \param NewCol - The column to move to.
+///
+formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) { 
+  // Figure out what's in the buffer and add it to the column count.
+  ComputeColumn(getBufferStart(), GetNumBytesInBuffer());
+
+  // Output spaces until we reach the desired column.
+  indent(std::max(int(NewCol - ColumnScanned), 1));
+  return *this;
+}
+
+void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) {
+  // Figure out what's in the buffer and add it to the column count.
+  ComputeColumn(Ptr, Size);
+
+  // Write the data to the underlying stream (which is unbuffered, so
+  // the data will be immediately written out).
+  TheStream->write(Ptr, Size);
+
+  // Reset the scanning pointer.
+  Scanned = 0;
+}
+
+/// fouts() - This returns a reference to a formatted_raw_ostream for
+/// standard output.  Use it like: fouts() << "foo" << "bar";
+formatted_raw_ostream &llvm::fouts() {
+  static formatted_raw_ostream S(outs());
+  return S;
+}
+
+/// ferrs() - This returns a reference to a formatted_raw_ostream for
+/// standard error.  Use it like: ferrs() << "foo" << "bar";
+formatted_raw_ostream &llvm::ferrs() {
+  static formatted_raw_ostream S(errs());
+  return S;
+}
+
+/// fdbgs() - This returns a reference to a formatted_raw_ostream for
+/// the debug stream.  Use it like: fdbgs() << "foo" << "bar";
+formatted_raw_ostream &llvm::fdbgs() {
+  static formatted_raw_ostream S(dbgs());
+  return S;
+}
diff --git a/final/lib/Support/GraphWriter.cpp b/final/lib/Support/GraphWriter.cpp
new file mode 100644
index 00000000000..0dba28a2530
--- /dev/null
+++ b/final/lib/Support/GraphWriter.cpp
@@ -0,0 +1,200 @@
+//===-- GraphWriter.cpp - Implements GraphWriter support routines ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements misc. GraphWriter support routines.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+
+std::string llvm::DOT::EscapeString(const std::string &Label) {
+  std::string Str(Label);
+  for (unsigned i = 0; i != Str.length(); ++i)
+  switch (Str[i]) {
+    case '\n':
+      Str.insert(Str.begin()+i, '\\');  // Escape character...
+      ++i;
+      Str[i] = 'n';
+      break;
+    case '\t':
+      Str.insert(Str.begin()+i, ' ');  // Convert to two spaces
+      ++i;
+      Str[i] = ' ';
+      break;
+    case '\\':
+      if (i+1 != Str.length())
+        switch (Str[i+1]) {
+          case 'l': continue; // don't disturb \l
+          case '|': case '{': case '}':
+            Str.erase(Str.begin()+i); continue;
+          default: break;
+        }
+    case '{': case '}':
+    case '<': case '>':
+    case '|': case '"':
+      Str.insert(Str.begin()+i, '\\');  // Escape character...
+      ++i;  // don't infinite loop
+      break;
+  }
+  return Str;
+}
+
+
+
+void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
+                        GraphProgram::Name program) {
+  std::string ErrMsg;
+#if HAVE_GRAPHVIZ
+  sys::Path Graphviz(LLVM_PATH_GRAPHVIZ);
+
+  std::vector<const char*> args;
+  args.push_back(Graphviz.c_str());
+  args.push_back(Filename.c_str());
+  args.push_back(0);
+  
+  errs() << "Running 'Graphviz' program... ";
+  if (sys::Program::ExecuteAndWait(Graphviz, &args[0],0,0,0,0,&ErrMsg)) {
+    errs() << "Error: " << ErrMsg << "\n";
+    return;
+  }
+  Filename.eraseFromDisk();
+  errs() << " done. \n";
+
+#elif HAVE_XDOT_PY
+  std::vector<const char*> args;
+  args.push_back(LLVM_PATH_XDOT_PY);
+  args.push_back(Filename.c_str());
+
+  switch (program) {
+  case GraphProgram::DOT:   args.push_back("-f"); args.push_back("dot"); break;
+  case GraphProgram::FDP:   args.push_back("-f"); args.push_back("fdp"); break;
+  case GraphProgram::NEATO: args.push_back("-f"); args.push_back("neato");break;
+  case GraphProgram::TWOPI: args.push_back("-f"); args.push_back("twopi");break;
+  case GraphProgram::CIRCO: args.push_back("-f"); args.push_back("circo");break;
+  default: errs() << "Unknown graph layout name; using default.\n";
+  }
+  
+  args.push_back(0);
+
+  errs() << "Running 'xdot.py' program... ";
+  if (sys::Program::ExecuteAndWait(sys::Path(LLVM_PATH_XDOT_PY),
+                                   &args[0],0,0,0,0,&ErrMsg)) {
+    errs() << "Error: " << ErrMsg << "\n";
+    return;
+  }
+  Filename.eraseFromDisk();
+  errs() << " done. \n";
+
+#elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \
+                   HAVE_TWOPI || HAVE_CIRCO))
+  sys::Path PSFilename = Filename;
+  PSFilename.appendSuffix("ps");
+
+  sys::Path prog;
+
+  // Set default grapher
+#if HAVE_CIRCO
+  prog = sys::Path(LLVM_PATH_CIRCO);
+#endif
+#if HAVE_TWOPI
+  prog = sys::Path(LLVM_PATH_TWOPI);
+#endif
+#if HAVE_NEATO
+  prog = sys::Path(LLVM_PATH_NEATO);
+#endif
+#if HAVE_FDP
+  prog = sys::Path(LLVM_PATH_FDP);
+#endif
+#if HAVE_DOT
+  prog = sys::Path(LLVM_PATH_DOT);
+#endif
+
+  // Find which program the user wants
+#if HAVE_DOT
+  if (program == GraphProgram::DOT)
+    prog = sys::Path(LLVM_PATH_DOT);
+#endif
+#if (HAVE_FDP)
+  if (program == GraphProgram::FDP)
+    prog = sys::Path(LLVM_PATH_FDP);
+#endif
+#if (HAVE_NEATO)
+  if (program == GraphProgram::NEATO)
+    prog = sys::Path(LLVM_PATH_NEATO);
+#endif
+#if (HAVE_TWOPI)
+  if (program == GraphProgram::TWOPI)
+    prog = sys::Path(LLVM_PATH_TWOPI);
+#endif
+#if (HAVE_CIRCO)
+  if (program == GraphProgram::CIRCO)
+    prog = sys::Path(LLVM_PATH_CIRCO);
+#endif
+
+  std::vector<const char*> args;
+  args.push_back(prog.c_str());
+  args.push_back("-Tps");
+  args.push_back("-Nfontname=Courier");
+  args.push_back("-Gsize=7.5,10");
+  args.push_back(Filename.c_str());
+  args.push_back("-o");
+  args.push_back(PSFilename.c_str());
+  args.push_back(0);
+  
+  errs() << "Running '" << prog.str() << "' program... ";
+
+  if (sys::Program::ExecuteAndWait(prog, &args[0], 0, 0, 0, 0, &ErrMsg)) {
+    errs() << "Error: " << ErrMsg << "\n";
+    return;
+  }
+  errs() << " done. \n";
+
+  sys::Path gv(LLVM_PATH_GV);
+  args.clear();
+  args.push_back(gv.c_str());
+  args.push_back(PSFilename.c_str());
+  args.push_back("--spartan");
+  args.push_back(0);
+  
+  ErrMsg.clear();
+  if (wait) {
+     if (sys::Program::ExecuteAndWait(gv, &args[0],0,0,0,0,&ErrMsg))
+        errs() << "Error: " << ErrMsg << "\n";
+     Filename.eraseFromDisk();
+     PSFilename.eraseFromDisk();
+  }
+  else {
+     sys::Program::ExecuteNoWait(gv, &args[0],0,0,0,&ErrMsg);
+     errs() << "Remember to erase graph files: " << Filename.str() << " "
+            << PSFilename.str() << "\n";
+  }
+#elif HAVE_DOTTY
+  sys::Path dotty(LLVM_PATH_DOTTY);
+
+  std::vector<const char*> args;
+  args.push_back(dotty.c_str());
+  args.push_back(Filename.c_str());
+  args.push_back(0);
+  
+  errs() << "Running 'dotty' program... ";
+  if (sys::Program::ExecuteAndWait(dotty, &args[0],0,0,0,0,&ErrMsg)) {
+     errs() << "Error: " << ErrMsg << "\n";
+  } else {
+// Dotty spawns another app and doesn't wait until it returns
+#if defined (__MINGW32__) || defined (_WINDOWS)
+    return;
+#endif
+    Filename.eraseFromDisk();
+  }
+#endif
+}
diff --git a/final/lib/Support/Host.cpp b/final/lib/Support/Host.cpp
new file mode 100644
index 00000000000..4dacf9691d6
--- /dev/null
+++ b/final/lib/Support/Host.cpp
@@ -0,0 +1,307 @@
+//===-- Host.cpp - Implement OS Host Concept --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements the operating system Host concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Host.h"
+#include "llvm/Config/config.h"
+#include <string.h>
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Host.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Host.inc"
+#endif
+#ifdef _MSC_VER
+#include <intrin.h>
+#endif
+
+//===----------------------------------------------------------------------===//
+//
+//  Implementations of the CPU detection routines
+//
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+
+/// GetX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments.  If we can't run cpuid on the host, return true.
+static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
+                            unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+  #if defined(__GNUC__)
+    // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+    asm ("movq\t%%rbx, %%rsi\n\t"
+         "cpuid\n\t"
+         "xchgq\t%%rbx, %%rsi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value));
+    return false;
+  #elif defined(_MSC_VER)
+    int registers[4];
+    __cpuid(registers, value);
+    *rEAX = registers[0];
+    *rEBX = registers[1];
+    *rECX = registers[2];
+    *rEDX = registers[3];
+    return false;
+  #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+  #if defined(__GNUC__)
+    asm ("movl\t%%ebx, %%esi\n\t"
+         "cpuid\n\t"
+         "xchgl\t%%ebx, %%esi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value));
+    return false;
+  #elif defined(_MSC_VER)
+    __asm {
+      mov   eax,value
+      cpuid
+      mov   esi,rEAX
+      mov   dword ptr [esi],eax
+      mov   esi,rEBX
+      mov   dword ptr [esi],ebx
+      mov   esi,rECX
+      mov   dword ptr [esi],ecx
+      mov   esi,rEDX
+      mov   dword ptr [esi],edx
+    }
+    return false;
+  #endif
+#endif
+  return true;
+}
+
+static void DetectX86FamilyModel(unsigned EAX, unsigned &Family,
+                                 unsigned &Model) {
+  Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+  Model  = (EAX >> 4) & 0xf; // Bits 4 - 7
+  if (Family == 6 || Family == 0xf) {
+    if (Family == 0xf)
+      // Examine extended family ID if family ID is F.
+      Family += (EAX >> 20) & 0xff;    // Bits 20 - 27
+    // Examine extended model ID if family ID is 6 or F.
+    Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+  }
+}
+
+std::string sys::getHostCPUName() {
+  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+  if (GetX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
+    return "generic";
+  unsigned Family = 0;
+  unsigned Model  = 0;
+  DetectX86FamilyModel(EAX, Family, Model);
+
+  bool HasSSE3 = (ECX & 0x1);
+  GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+  bool Em64T = (EDX >> 29) & 0x1;
+
+  union {
+    unsigned u[3];
+    char     c[12];
+  } text;
+
+  GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
+  if (memcmp(text.c, "GenuineIntel", 12) == 0) {
+    switch (Family) {
+    case 3:
+      return "i386";
+    case 4:
+      switch (Model) {
+      case 0: // Intel486 DX processors
+      case 1: // Intel486 DX processors
+      case 2: // Intel486 SX processors
+      case 3: // Intel487 processors, IntelDX2 OverDrive processors,
+              // IntelDX2 processors
+      case 4: // Intel486 SL processor
+      case 5: // IntelSX2 processors
+      case 7: // Write-Back Enhanced IntelDX2 processors
+      case 8: // IntelDX4 OverDrive processors, IntelDX4 processors
+      default: return "i486";
+      }
+    case 5:
+      switch (Model) {
+      case  1: // Pentium OverDrive processor for Pentium processor (60, 66),
+               // Pentium processors (60, 66)
+      case  2: // Pentium OverDrive processor for Pentium processor (75, 90,
+               // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
+               // 150, 166, 200)
+      case  3: // Pentium OverDrive processors for Intel486 processor-based
+               // systems
+        return "pentium";
+
+      case  4: // Pentium OverDrive processor with MMX technology for Pentium
+               // processor (75, 90, 100, 120, 133), Pentium processor with
+               // MMX technology (166, 200)
+        return "pentium-mmx";
+
+      default: return "pentium";
+      }
+    case 6:
+      switch (Model) {
+      case  1: // Pentium Pro processor
+        return "pentiumpro";
+
+      case  3: // Intel Pentium II OverDrive processor, Pentium II processor,
+               // model 03
+      case  5: // Pentium II processor, model 05, Pentium II Xeon processor,
+               // model 05, and Intel Celeron processor, model 05
+      case  6: // Celeron processor, model 06
+        return "pentium2";
+
+      case  7: // Pentium III processor, model 07, and Pentium III Xeon
+               // processor, model 07
+      case  8: // Pentium III processor, model 08, Pentium III Xeon processor,
+               // model 08, and Celeron processor, model 08
+      case 10: // Pentium III Xeon processor, model 0Ah
+      case 11: // Pentium III processor, model 0Bh
+        return "pentium3";
+
+      case  9: // Intel Pentium M processor, Intel Celeron M processor model 09.
+      case 13: // Intel Pentium M processor, Intel Celeron M processor, model
+               // 0Dh. All processors are manufactured using the 90 nm process.
+        return "pentium-m";
+
+      case 14: // Intel Core Duo processor, Intel Core Solo processor, model
+               // 0Eh. All processors are manufactured using the 65 nm process.
+        return "yonah";
+
+      case 15: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
+               // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
+               // mobile processor, Intel Core 2 Extreme processor, Intel
+               // Pentium Dual-Core processor, Intel Xeon processor, model
+               // 0Fh. All processors are manufactured using the 65 nm process.
+      case 22: // Intel Celeron processor model 16h. All processors are
+               // manufactured using the 65 nm process
+        return "core2";
+
+      case 21: // Intel EP80579 Integrated Processor and Intel EP80579
+               // Integrated Processor with Intel QuickAssist Technology
+        return "i686"; // FIXME: ???
+
+      case 23: // Intel Core 2 Extreme processor, Intel Xeon processor, model
+               // 17h. All processors are manufactured using the 45 nm process.
+               //
+               // 45nm: Penryn , Wolfdale, Yorkfield (XE)
+        return "penryn";
+
+      case 26: // Intel Core i7 processor and Intel Xeon processor. All
+               // processors are manufactured using the 45 nm process.
+      case 29: // Intel Xeon processor MP. All processors are manufactured using
+               // the 45 nm process.
+      case 30: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
+               // As found in a Summer 2010 model iMac.
+      case 37: // Intel Core i7, laptop version.
+        return "corei7";
+
+      case 28: // Intel Atom processor. All processors are manufactured using
+               // the 45 nm process
+        return "atom";
+
+      default: return "i686";
+      }
+    case 15: {
+      switch (Model) {
+      case  0: // Pentium 4 processor, Intel Xeon processor. All processors are
+               // model 00h and manufactured using the 0.18 micron process.
+      case  1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
+               // processor MP, and Intel Celeron processor. All processors are
+               // model 01h and manufactured using the 0.18 micron process.
+      case  2: // Pentium 4 processor, Mobile Intel Pentium 4 processor - M,
+               // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
+               // processor, and Mobile Intel Celeron processor. All processors
+               // are model 02h and manufactured using the 0.13 micron process.
+        return (Em64T) ? "x86-64" : "pentium4";
+
+      case  3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
+               // processor. All processors are model 03h and manufactured using
+               // the 90 nm process.
+      case  4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
+               // Pentium D processor, Intel Xeon processor, Intel Xeon
+               // processor MP, Intel Celeron D processor. All processors are
+               // model 04h and manufactured using the 90 nm process.
+      case  6: // Pentium 4 processor, Pentium D processor, Pentium processor
+               // Extreme Edition, Intel Xeon processor, Intel Xeon processor
+               // MP, Intel Celeron D processor. All processors are model 06h
+               // and manufactured using the 65 nm process.
+        return (Em64T) ? "nocona" : "prescott";
+
+      default:
+        return (Em64T) ? "x86-64" : "pentium4";
+      }
+    }
+
+    default:
+      return "generic";
+    }
+  } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) {
+    // FIXME: this poorly matches the generated SubtargetFeatureKV table.  There
+    // appears to be no way to generate the wide variety of AMD-specific targets
+    // from the information returned from CPUID.
+    switch (Family) {
+      case 4:
+        return "i486";
+      case 5:
+        switch (Model) {
+        case 6:
+        case 7:  return "k6";
+        case 8:  return "k6-2";
+        case 9:
+        case 13: return "k6-3";
+        default: return "pentium";
+        }
+      case 6:
+        switch (Model) {
+        case 4:  return "athlon-tbird";
+        case 6:
+        case 7:
+        case 8:  return "athlon-mp";
+        case 10: return "athlon-xp";
+        default: return "athlon";
+        }
+      case 15:
+        if (HasSSE3)
+          return "k8-sse3";
+        switch (Model) {
+        case 1:  return "opteron";
+        case 5:  return "athlon-fx"; // also opteron
+        default: return "athlon64";
+        }
+      case 16:
+        return "amdfam10";
+    default:
+      return "generic";
+    }
+  }
+  return "generic";
+}
+#else
+std::string sys::getHostCPUName() {
+  return "generic";
+}
+#endif
+
+bool sys::getHostCPUFeatures(StringMap<bool> &Features){
+  return false;
+}
diff --git a/final/lib/Support/IncludeFile.cpp b/final/lib/Support/IncludeFile.cpp
new file mode 100644
index 00000000000..5da88261ce5
--- /dev/null
+++ b/final/lib/Support/IncludeFile.cpp
@@ -0,0 +1,20 @@
+//===- lib/System/IncludeFile.cpp - Ensure Linking Of Implementation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IncludeFile constructor.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/IncludeFile.h"
+
+using namespace llvm;
+
+// This constructor is used to ensure linking of other modules. See the
+// llvm/Support/IncludeFile.h header for details.
+IncludeFile::IncludeFile(const void*) {}
diff --git a/final/lib/Support/IntEqClasses.cpp b/final/lib/Support/IntEqClasses.cpp
new file mode 100644
index 00000000000..11344956e4c
--- /dev/null
+++ b/final/lib/Support/IntEqClasses.cpp
@@ -0,0 +1,70 @@
+//===-- llvm/ADT/IntEqClasses.cpp - Equivalence Classes of Integers -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Equivalence classes for small integers. This is a mapping of the integers
+// 0 .. N-1 into M equivalence classes numbered 0 .. M-1.
+//
+// Initially each integer has its own equivalence class. Classes are joined by
+// passing a representative member of each class to join().
+//
+// Once the classes are built, compress() will number them 0 .. M-1 and prevent
+// further changes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntEqClasses.h"
+
+using namespace llvm;
+
+void IntEqClasses::grow(unsigned N) {
+  assert(NumClasses == 0 && "grow() called after compress().");
+  EC.reserve(N);
+  while (EC.size() < N)
+    EC.push_back(EC.size());
+}
+
+void IntEqClasses::join(unsigned a, unsigned b) {
+  assert(NumClasses == 0 && "join() called after compress().");
+  unsigned eca = EC[a];
+  unsigned ecb = EC[b];
+  // Update pointers while searching for the leaders, compressing the paths
+  // incrementally. The larger leader will eventually be updated, joining the
+  // classes.
+  while (eca != ecb)
+    if (eca < ecb)
+      EC[b] = eca, b = ecb, ecb = EC[b];
+    else
+      EC[a] = ecb, a = eca, eca = EC[a];
+}
+
+unsigned IntEqClasses::findLeader(unsigned a) const {
+  assert(NumClasses == 0 && "findLeader() called after compress().");
+  while (a != EC[a])
+    a = EC[a];
+  return a;
+}
+
+void IntEqClasses::compress() {
+  if (NumClasses)
+    return;
+  for (unsigned i = 0, e = EC.size(); i != e; ++i)
+    EC[i] = (EC[i] == i) ? NumClasses++ : EC[EC[i]];
+}
+
+void IntEqClasses::uncompress() {
+  if (!NumClasses)
+    return;
+  SmallVector<unsigned, 8> Leader;
+  for (unsigned i = 0, e = EC.size(); i != e; ++i)
+    if (EC[i] < Leader.size())
+      EC[i] = Leader[EC[i]];
+    else
+      Leader.push_back(EC[i] = i);
+  NumClasses = 0;
+}
diff --git a/final/lib/Support/IntervalMap.cpp b/final/lib/Support/IntervalMap.cpp
new file mode 100644
index 00000000000..4dfcc404ca4
--- /dev/null
+++ b/final/lib/Support/IntervalMap.cpp
@@ -0,0 +1,161 @@
+//===- lib/Support/IntervalMap.cpp - A sorted interval map ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the few non-templated functions in IntervalMap.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntervalMap.h"
+
+namespace llvm {
+namespace IntervalMapImpl {
+
+void Path::replaceRoot(void *Root, unsigned Size, IdxPair Offsets) {
+  assert(!path.empty() && "Can't replace missing root");
+  path.front() = Entry(Root, Size, Offsets.first);
+  path.insert(path.begin() + 1, Entry(subtree(0), Offsets.second));
+}
+
+NodeRef Path::getLeftSibling(unsigned Level) const {
+  // The root has no siblings.
+  if (Level == 0)
+    return NodeRef();
+
+  // Go up the tree until we can go left.
+  unsigned l = Level - 1;
+  while (l && path[l].offset == 0)
+    --l;
+
+  // We can't go left.
+  if (path[l].offset == 0)
+    return NodeRef();
+
+  // NR is the subtree containing our left sibling.
+  NodeRef NR = path[l].subtree(path[l].offset - 1);
+
+  // Keep right all the way down.
+  for (++l; l != Level; ++l)
+    NR = NR.subtree(NR.size() - 1);
+  return NR;
+}
+
+void Path::moveLeft(unsigned Level) {
+  assert(Level != 0 && "Cannot move the root node");
+
+  // Go up the tree until we can go left.
+  unsigned l = 0;
+  if (valid()) {
+    l = Level - 1;
+    while (path[l].offset == 0) {
+      assert(l != 0 && "Cannot move beyond begin()");
+      --l;
+    }
+  } else if (height() < Level)
+    // end() may have created a height=0 path.
+    path.resize(Level + 1, Entry(0, 0, 0));
+
+  // NR is the subtree containing our left sibling.
+  --path[l].offset;
+  NodeRef NR = subtree(l);
+
+  // Get the rightmost node in the subtree.
+  for (++l; l != Level; ++l) {
+    path[l] = Entry(NR, NR.size() - 1);
+    NR = NR.subtree(NR.size() - 1);
+  }
+  path[l] = Entry(NR, NR.size() - 1);
+}
+
+NodeRef Path::getRightSibling(unsigned Level) const {
+  // The root has no siblings.
+  if (Level == 0)
+    return NodeRef();
+
+  // Go up the tree until we can go right.
+  unsigned l = Level - 1;
+  while (l && atLastEntry(l))
+    --l;
+
+  // We can't go right.
+  if (atLastEntry(l))
+    return NodeRef();
+
+  // NR is the subtree containing our right sibling.
+  NodeRef NR = path[l].subtree(path[l].offset + 1);
+
+  // Keep left all the way down.
+  for (++l; l != Level; ++l)
+    NR = NR.subtree(0);
+  return NR;
+}
+
+void Path::moveRight(unsigned Level) {
+  assert(Level != 0 && "Cannot move the root node");
+
+  // Go up the tree until we can go right.
+  unsigned l = Level - 1;
+  while (l && atLastEntry(l))
+    --l;
+
+  // NR is the subtree containing our right sibling. If we hit end(), we have
+  // offset(0) == node(0).size().
+  if (++path[l].offset == path[l].size)
+    return;
+  NodeRef NR = subtree(l);
+
+  for (++l; l != Level; ++l) {
+    path[l] = Entry(NR, 0);
+    NR = NR.subtree(0);
+  }
+  path[l] = Entry(NR, 0);
+}
+
+
+IdxPair distribute(unsigned Nodes, unsigned Elements, unsigned Capacity,
+                   const unsigned *CurSize, unsigned NewSize[],
+                   unsigned Position, bool Grow) {
+  assert(Elements + Grow <= Nodes * Capacity && "Not enough room for elements");
+  assert(Position <= Elements && "Invalid position");
+  if (!Nodes)
+    return IdxPair();
+
+  // Trivial algorithm: left-leaning even distribution.
+  const unsigned PerNode = (Elements + Grow) / Nodes;
+  const unsigned Extra = (Elements + Grow) % Nodes;
+  IdxPair PosPair = IdxPair(Nodes, 0);
+  unsigned Sum = 0;
+  for (unsigned n = 0; n != Nodes; ++n) {
+    Sum += NewSize[n] = PerNode + (n < Extra);
+    if (PosPair.first == Nodes && Sum > Position)
+      PosPair = IdxPair(n, Position - (Sum - NewSize[n]));
+  }
+  assert(Sum == Elements + Grow && "Bad distribution sum");
+
+  // Subtract the Grow element that was added.
+  if (Grow) {
+    assert(PosPair.first < Nodes && "Bad algebra");
+    assert(NewSize[PosPair.first] && "Too few elements to need Grow");
+    --NewSize[PosPair.first];
+  }
+
+#ifndef NDEBUG
+  Sum = 0;
+  for (unsigned n = 0; n != Nodes; ++n) {
+    assert(NewSize[n] <= Capacity && "Overallocated node");
+    Sum += NewSize[n];
+  }
+  assert(Sum == Elements && "Bad distribution sum");
+#endif
+
+  return PosPair;
+}
+
+} // namespace IntervalMapImpl
+} // namespace llvm
+
diff --git a/final/lib/Support/IsInf.cpp b/final/lib/Support/IsInf.cpp
new file mode 100644
index 00000000000..d6da0c99e8d
--- /dev/null
+++ b/final/lib/Support/IsInf.cpp
@@ -0,0 +1,49 @@
+//===-- IsInf.cpp - Platform-independent wrapper around C99 isinf() -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Platform-independent wrapper around C99 isinf()
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+
+#if HAVE_ISINF_IN_MATH_H
+# include <math.h>
+#elif HAVE_ISINF_IN_CMATH
+# include <cmath>
+#elif HAVE_STD_ISINF_IN_CMATH
+# include <cmath>
+using std::isinf;
+#elif HAVE_FINITE_IN_IEEEFP_H
+// A handy workaround I found at http://www.unixguide.net/sun/faq ...
+// apparently this has been a problem with Solaris for years.
+# include <ieeefp.h>
+static int isinf(double x) { return !finite(x) && x==x; }
+#elif defined(_MSC_VER)
+#include <float.h>
+#define isinf(X) (!_finite(X))
+#elif defined(_AIX) && defined(__GNUC__)
+// GCC's fixincludes seems to be removing the isinf() declaration from the
+// system header /usr/include/math.h
+# include <math.h>
+static int isinf(double x) { return !finite(x) && x==x; }
+#elif defined(__hpux)
+// HP-UX is "special"
+#include <math.h>
+static int isinf(double x) { return ((x) == INFINITY) || ((x) == -INFINITY); }
+#else
+# error "Don't know how to get isinf()"
+#endif
+
+namespace llvm {
+
+int IsInf(float f)  { return isinf(f); }
+int IsInf(double d) { return isinf(d); }
+
+} // end namespace llvm;
diff --git a/final/lib/Support/IsNAN.cpp b/final/lib/Support/IsNAN.cpp
new file mode 100644
index 00000000000..bdfdfbf3155
--- /dev/null
+++ b/final/lib/Support/IsNAN.cpp
@@ -0,0 +1,33 @@
+//===-- IsNAN.cpp ---------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Platform-independent wrapper around C99 isnan().
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+
+#if HAVE_ISNAN_IN_MATH_H
+# include <math.h>
+#elif HAVE_ISNAN_IN_CMATH
+# include <cmath>
+#elif HAVE_STD_ISNAN_IN_CMATH
+# include <cmath>
+using std::isnan;
+#elif defined(_MSC_VER)
+#include <float.h>
+#define isnan _isnan
+#else
+# error "Don't know how to get isnan()"
+#endif
+
+namespace llvm {
+  int IsNAN(float f)  { return isnan(f); }
+  int IsNAN(double d) { return isnan(d); }
+} // end namespace llvm;
diff --git a/final/lib/Support/Makefile b/final/lib/Support/Makefile
new file mode 100644
index 00000000000..d68e500ca5f
--- /dev/null
+++ b/final/lib/Support/Makefile
@@ -0,0 +1,22 @@
+##===- lib/Support/Makefile --------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMSupport
+BUILD_ARCHIVE = 1
+
+## FIXME: This only requires RTTI because tblgen uses it.  Fix that.
+REQUIRES_RTTI = 1
+
+EXTRA_DIST = Unix Win32 README.txt
+
+include $(LEVEL)/Makefile.common
+
+CompileCommonOpts := $(filter-out -pedantic,$(CompileCommonOpts))
+CompileCommonOpts := $(filter-out -Wno-long-long,$(CompileCommonOpts))
diff --git a/final/lib/Support/ManagedStatic.cpp b/final/lib/Support/ManagedStatic.cpp
new file mode 100644
index 00000000000..c767c15e71c
--- /dev/null
+++ b/final/lib/Support/ManagedStatic.cpp
@@ -0,0 +1,75 @@
+//===-- ManagedStatic.cpp - Static Global wrapper -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ManagedStatic class and llvm_shutdown().
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Atomic.h"
+#include <cassert>
+using namespace llvm;
+
+static const ManagedStaticBase *StaticList = 0;
+
+void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
+                                              void (*Deleter)(void*)) const {
+  if (llvm_is_multithreaded()) {
+    llvm_acquire_global_lock();
+
+    if (Ptr == 0) {
+      void* tmp = Creator ? Creator() : 0;
+
+      sys::MemoryFence();
+      Ptr = tmp;
+      DeleterFn = Deleter;
+      
+      // Add to list of managed statics.
+      Next = StaticList;
+      StaticList = this;
+    }
+
+    llvm_release_global_lock();
+  } else {
+    assert(Ptr == 0 && DeleterFn == 0 && Next == 0 &&
+           "Partially initialized ManagedStatic!?");
+    Ptr = Creator ? Creator() : 0;
+    DeleterFn = Deleter;
+  
+    // Add to list of managed statics.
+    Next = StaticList;
+    StaticList = this;
+  }
+}
+
+void ManagedStaticBase::destroy() const {
+  assert(DeleterFn && "ManagedStatic not initialized correctly!");
+  assert(StaticList == this &&
+         "Not destroyed in reverse order of construction?");
+  // Unlink from list.
+  StaticList = Next;
+  Next = 0;
+
+  // Destroy memory.
+  DeleterFn(Ptr);
+  
+  // Cleanup.
+  Ptr = 0;
+  DeleterFn = 0;
+}
+
+/// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
+void llvm::llvm_shutdown() {
+  while (StaticList)
+    StaticList->destroy();
+
+  if (llvm_is_multithreaded()) llvm_stop_multithreaded();
+}
+
diff --git a/final/lib/Support/Memory.cpp b/final/lib/Support/Memory.cpp
new file mode 100644
index 00000000000..a9689b2c39f
--- /dev/null
+++ b/final/lib/Support/Memory.cpp
@@ -0,0 +1,74 @@
+//===- Memory.cpp - Memory Handling Support ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for allocating memory and dealing
+// with memory mapped files
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Valgrind.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Memory.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Memory.inc"
+#endif
+
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+
+/// InvalidateInstructionCache - Before the JIT can run a block of code
+/// that has been emitted it must invalidate the instruction cache on some
+/// platforms.
+void llvm::sys::Memory::InvalidateInstructionCache(const void *Addr,
+                                                   size_t Len) {
+
+// icache invalidation for PPC and ARM.
+#if defined(__APPLE__)
+
+#  if (defined(__POWERPC__) || defined (__ppc__) || \
+     defined(_POWER) || defined(_ARCH_PPC)) || defined(__arm__)
+  sys_icache_invalidate(Addr, Len);
+#  endif
+
+#else
+
+#  if (defined(__POWERPC__) || defined (__ppc__) || \
+       defined(_POWER) || defined(_ARCH_PPC)) && defined(__GNUC__)
+  const size_t LineSize = 32;
+
+  const intptr_t Mask = ~(LineSize - 1);
+  const intptr_t StartLine = ((intptr_t) Addr) & Mask;
+  const intptr_t EndLine = ((intptr_t) Addr + Len + LineSize - 1) & Mask;
+
+  for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+    asm volatile("dcbf 0, %0" : : "r"(Line));
+  asm volatile("sync");
+
+  for (intptr_t Line = StartLine; Line < EndLine; Line += LineSize)
+    asm volatile("icbi 0, %0" : : "r"(Line));
+  asm volatile("isync");
+#  elif defined(__arm__) && defined(__GNUC__)
+  // FIXME: Can we safely always call this for __GNUC__ everywhere?
+  char *Start = (char*) Addr;
+  char *End = Start + Len;
+  __clear_cache(Start, End);
+#  endif
+
+#endif  // end apple
+
+  ValgrindDiscardTranslations(Addr, Len);
+}
diff --git a/final/lib/Support/MemoryBuffer.cpp b/final/lib/Support/MemoryBuffer.cpp
new file mode 100644
index 00000000000..a0c650d6820
--- /dev/null
+++ b/final/lib/Support/MemoryBuffer.cpp
@@ -0,0 +1,298 @@
+//===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the MemoryBuffer interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/system_error.h"
+#include <cassert>
+#include <cstdio>
+#include <cstring>
+#include <cerrno>
+#include <new>
+#include <sys/types.h>
+#include <sys/stat.h>
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#include <sys/uio.h>
+#else
+#include <io.h>
+#endif
+#include <fcntl.h>
+using namespace llvm;
+
+namespace { const llvm::error_code success; }
+
+//===----------------------------------------------------------------------===//
+// MemoryBuffer implementation itself.
+//===----------------------------------------------------------------------===//
+
+MemoryBuffer::~MemoryBuffer() { }
+
+/// init - Initialize this MemoryBuffer as a reference to externally allocated
+/// memory, memory that we know is already null terminated.
+void MemoryBuffer::init(const char *BufStart, const char *BufEnd) {
+  assert(BufEnd[0] == 0 && "Buffer is not null terminated!");
+  BufferStart = BufStart;
+  BufferEnd = BufEnd;
+}
+
+//===----------------------------------------------------------------------===//
+// MemoryBufferMem implementation.
+//===----------------------------------------------------------------------===//
+
+/// CopyStringRef - Copies contents of a StringRef into a block of memory and
+/// null-terminates it.
+static void CopyStringRef(char *Memory, StringRef Data) {
+  memcpy(Memory, Data.data(), Data.size());
+  Memory[Data.size()] = 0; // Null terminate string.
+}
+
+/// GetNamedBuffer - Allocates a new MemoryBuffer with Name copied after it.
+template <typename T>
+static T* GetNamedBuffer(StringRef Buffer, StringRef Name) {
+  char *Mem = static_cast<char*>(operator new(sizeof(T) + Name.size() + 1));
+  CopyStringRef(Mem + sizeof(T), Name);
+  return new (Mem) T(Buffer);
+}
+
+namespace {
+/// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory.
+class MemoryBufferMem : public MemoryBuffer {
+public:
+  MemoryBufferMem(StringRef InputData) {
+    init(InputData.begin(), InputData.end());
+  }
+
+  virtual const char *getBufferIdentifier() const {
+     // The name is stored after the class itself.
+    return reinterpret_cast<const char*>(this + 1);
+  }
+};
+}
+
+/// getMemBuffer - Open the specified memory range as a MemoryBuffer.  Note
+/// that EndPtr[0] must be a null byte and be accessible!
+MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData,
+                                         StringRef BufferName) {
+  return GetNamedBuffer<MemoryBufferMem>(InputData, BufferName);
+}
+
+/// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
+/// copying the contents and taking ownership of it.  This has no requirements
+/// on EndPtr[0].
+MemoryBuffer *MemoryBuffer::getMemBufferCopy(StringRef InputData,
+                                             StringRef BufferName) {
+  MemoryBuffer *Buf = getNewUninitMemBuffer(InputData.size(), BufferName);
+  if (!Buf) return 0;
+  memcpy(const_cast<char*>(Buf->getBufferStart()), InputData.data(),
+         InputData.size());
+  return Buf;
+}
+
+/// getNewUninitMemBuffer - Allocate a new MemoryBuffer of the specified size
+/// that is not initialized.  Note that the caller should initialize the
+/// memory allocated by this method.  The memory is owned by the MemoryBuffer
+/// object.
+MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size,
+                                                  StringRef BufferName) {
+  // Allocate space for the MemoryBuffer, the data and the name. It is important
+  // that MemoryBuffer and data are aligned so PointerIntPair works with them.
+  size_t AlignedStringLen =
+    RoundUpToAlignment(sizeof(MemoryBufferMem) + BufferName.size() + 1,
+                       sizeof(void*)); // TODO: Is sizeof(void*) enough?
+  size_t RealLen = AlignedStringLen + Size + 1;
+  char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
+  if (!Mem) return 0;
+
+  // The name is stored after the class itself.
+  CopyStringRef(Mem + sizeof(MemoryBufferMem), BufferName);
+
+  // The buffer begins after the name and must be aligned.
+  char *Buf = Mem + AlignedStringLen;
+  Buf[Size] = 0; // Null terminate buffer.
+
+  return new (Mem) MemoryBufferMem(StringRef(Buf, Size));
+}
+
+/// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that
+/// is completely initialized to zeros.  Note that the caller should
+/// initialize the memory allocated by this method.  The memory is owned by
+/// the MemoryBuffer object.
+MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) {
+  MemoryBuffer *SB = getNewUninitMemBuffer(Size, BufferName);
+  if (!SB) return 0;
+  memset(const_cast<char*>(SB->getBufferStart()), 0, Size);
+  return SB;
+}
+
+
+/// getFileOrSTDIN - Open the specified file as a MemoryBuffer, or open stdin
+/// if the Filename is "-".  If an error occurs, this returns null and fills
+/// in *ErrStr with a reason.  If stdin is empty, this API (unlike getSTDIN)
+/// returns an empty buffer.
+error_code MemoryBuffer::getFileOrSTDIN(StringRef Filename,
+                                        OwningPtr<MemoryBuffer> &result,
+                                        int64_t FileSize) {
+  if (Filename == "-")
+    return getSTDIN(result);
+  return getFile(Filename, result, FileSize);
+}
+
+error_code MemoryBuffer::getFileOrSTDIN(const char *Filename,
+                                        OwningPtr<MemoryBuffer> &result,
+                                        int64_t FileSize) {
+  if (strcmp(Filename, "-") == 0)
+    return getSTDIN(result);
+  return getFile(Filename, result, FileSize);
+}
+
+//===----------------------------------------------------------------------===//
+// MemoryBuffer::getFile implementation.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// MemoryBufferMMapFile - This represents a file that was mapped in with the
+/// sys::Path::MapInFilePages method.  When destroyed, it calls the
+/// sys::Path::UnMapFilePages method.
+class MemoryBufferMMapFile : public MemoryBufferMem {
+public:
+  MemoryBufferMMapFile(StringRef Buffer)
+    : MemoryBufferMem(Buffer) { }
+
+  ~MemoryBufferMMapFile() {
+    sys::Path::UnMapFilePages(getBufferStart(), getBufferSize());
+  }
+};
+}
+
+error_code MemoryBuffer::getFile(StringRef Filename,
+                                 OwningPtr<MemoryBuffer> &result,
+                                 int64_t FileSize) {
+  // Ensure the path is null terminated.
+  SmallString<256> PathBuf(Filename.begin(), Filename.end());
+  return MemoryBuffer::getFile(PathBuf.c_str(), result, FileSize);
+}
+
+error_code MemoryBuffer::getFile(const char *Filename,
+                                 OwningPtr<MemoryBuffer> &result,
+                                 int64_t FileSize) {
+  int OpenFlags = O_RDONLY;
+#ifdef O_BINARY
+  OpenFlags |= O_BINARY;  // Open input file in binary mode on win32.
+#endif
+  int FD = ::open(Filename, OpenFlags);
+  if (FD == -1) {
+    return error_code(errno, posix_category());
+  }
+  error_code ret = getOpenFile(FD, Filename, result, FileSize);
+  close(FD);
+  return ret;
+}
+
+error_code MemoryBuffer::getOpenFile(int FD, const char *Filename,
+                                     OwningPtr<MemoryBuffer> &result,
+                                     int64_t FileSize) {
+  // If we don't know the file size, use fstat to find out.  fstat on an open
+  // file descriptor is cheaper than stat on a random path.
+  if (FileSize == -1) {
+    struct stat FileInfo;
+    // TODO: This should use fstat64 when available.
+    if (fstat(FD, &FileInfo) == -1) {
+      return error_code(errno, posix_category());
+    }
+    FileSize = FileInfo.st_size;
+  }
+
+
+  // If the file is large, try to use mmap to read it in.  We don't use mmap
+  // for small files, because this can severely fragment our address space. Also
+  // don't try to map files that are exactly a multiple of the system page size,
+  // as the file would not have the required null terminator.
+  //
+  // FIXME: Can we just mmap an extra page in the latter case?
+  if (FileSize >= 4096*4 &&
+      (FileSize & (sys::Process::GetPageSize()-1)) != 0) {
+    if (const char *Pages = sys::Path::MapInFilePages(FD, FileSize)) {
+      result.reset(GetNamedBuffer<MemoryBufferMMapFile>(
+        StringRef(Pages, FileSize), Filename));
+      return success;
+    }
+  }
+
+  MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(FileSize, Filename);
+  if (!Buf) {
+    // Failed to create a buffer. The only way it can fail is if
+    // new(std::nothrow) returns 0.
+    return make_error_code(errc::not_enough_memory);
+  }
+
+  OwningPtr<MemoryBuffer> SB(Buf);
+  char *BufPtr = const_cast<char*>(SB->getBufferStart());
+
+  size_t BytesLeft = FileSize;
+  while (BytesLeft) {
+    ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
+    if (NumRead == -1) {
+      if (errno == EINTR)
+        continue;
+      // Error while reading.
+      return error_code(errno, posix_category());
+    } else if (NumRead == 0) {
+      // We hit EOF early, truncate and terminate buffer.
+      Buf->BufferEnd = BufPtr;
+      *BufPtr = 0;
+      result.swap(SB);
+      return success;
+    }
+    BytesLeft -= NumRead;
+    BufPtr += NumRead;
+  }
+
+  result.swap(SB);
+  return success;
+}
+
+//===----------------------------------------------------------------------===//
+// MemoryBuffer::getSTDIN implementation.
+//===----------------------------------------------------------------------===//
+
+error_code MemoryBuffer::getSTDIN(OwningPtr<MemoryBuffer> &result) {
+  // Read in all of the data from stdin, we cannot mmap stdin.
+  //
+  // FIXME: That isn't necessarily true, we should try to mmap stdin and
+  // fallback if it fails.
+  sys::Program::ChangeStdinToBinary();
+
+  const ssize_t ChunkSize = 4096*4;
+  SmallString<ChunkSize> Buffer;
+  ssize_t ReadBytes;
+  // Read into Buffer until we hit EOF.
+  do {
+    Buffer.reserve(Buffer.size() + ChunkSize);
+    ReadBytes = read(0, Buffer.end(), ChunkSize);
+    if (ReadBytes == -1) {
+      if (errno == EINTR) continue;
+      return error_code(errno, posix_category());
+    }
+    Buffer.set_size(Buffer.size() + ReadBytes);
+  } while (ReadBytes != 0);
+
+  result.reset(getMemBufferCopy(Buffer, "<stdin>"));
+  return success;
+}
diff --git a/final/lib/Support/MemoryObject.cpp b/final/lib/Support/MemoryObject.cpp
new file mode 100644
index 00000000000..91e3ecd23a2
--- /dev/null
+++ b/final/lib/Support/MemoryObject.cpp
@@ -0,0 +1,34 @@
+//===- MemoryObject.cpp - Abstract memory interface -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/MemoryObject.h"
+using namespace llvm;
+  
+MemoryObject::~MemoryObject() {
+}
+
+int MemoryObject::readBytes(uint64_t address,
+                            uint64_t size,
+                            uint8_t* buf,
+                            uint64_t* copied) const {
+  uint64_t current = address;
+  uint64_t limit = getBase() + getExtent();
+  
+  while (current - address < size && current < limit) {
+    if (readByte(current, &buf[(current - address)]))
+      return -1;
+    
+    current++;
+  }
+  
+  if (copied)
+    *copied = current - address;
+  
+  return 0;
+}
diff --git a/final/lib/Support/Mutex.cpp b/final/lib/Support/Mutex.cpp
new file mode 100644
index 00000000000..b408973bbad
--- /dev/null
+++ b/final/lib/Support/Mutex.cpp
@@ -0,0 +1,157 @@
+//===- Mutex.cpp - Mutual Exclusion Lock ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/Mutex.h"
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+MutexImpl::MutexImpl( bool recursive) { }
+MutexImpl::~MutexImpl() { }
+bool MutexImpl::acquire() { return true; }
+bool MutexImpl::release() { return true; }
+bool MutexImpl::tryacquire() { return true; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_MUTEX_LOCK)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+
+// This variable is useful for situations where the pthread library has been
+// compiled with weak linkage for its interface symbols. This allows the
+// threading support to be turned off by simply not linking against -lpthread.
+// In that situation, the value of pthread_mutex_init will be 0 and
+// consequently pthread_enabled will be false. In such situations, all the
+// pthread operations become no-ops and the functions all return false. If
+// pthread_mutex_init does have an address, then mutex support is enabled.
+// Note: all LLVM tools will link against -lpthread if its available since it
+//       is configured into the LIBS variable.
+// Note: this line of code generates a warning if pthread_mutex_init is not
+//       declared with weak linkage. It's safe to ignore the warning.
+static const bool pthread_enabled = true;
+
+// Construct a Mutex using pthread calls
+MutexImpl::MutexImpl( bool recursive)
+  : data_(0)
+{
+  if (pthread_enabled)
+  {
+    // Declare the pthread_mutex data structures
+    pthread_mutex_t* mutex =
+      static_cast<pthread_mutex_t*>(malloc(sizeof(pthread_mutex_t)));
+    pthread_mutexattr_t attr;
+
+    // Initialize the mutex attributes
+    int errorcode = pthread_mutexattr_init(&attr);
+    assert(errorcode == 0);
+
+    // Initialize the mutex as a recursive mutex, if requested, or normal
+    // otherwise.
+    int kind = ( recursive  ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL );
+    errorcode = pthread_mutexattr_settype(&attr, kind);
+    assert(errorcode == 0);
+
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
+    // Make it a process local mutex
+    errorcode = pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
+    assert(errorcode == 0);
+#endif
+
+    // Initialize the mutex
+    errorcode = pthread_mutex_init(mutex, &attr);
+    assert(errorcode == 0);
+
+    // Destroy the attributes
+    errorcode = pthread_mutexattr_destroy(&attr);
+    assert(errorcode == 0);
+
+    // Assign the data member
+    data_ = mutex;
+  }
+}
+
+// Destruct a Mutex
+MutexImpl::~MutexImpl()
+{
+  if (pthread_enabled)
+  {
+    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+    assert(mutex != 0);
+    pthread_mutex_destroy(mutex);
+    free(mutex);
+  }
+}
+
+bool
+MutexImpl::acquire()
+{
+  if (pthread_enabled)
+  {
+    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+    assert(mutex != 0);
+
+    int errorcode = pthread_mutex_lock(mutex);
+    return errorcode == 0;
+  } else return false;
+}
+
+bool
+MutexImpl::release()
+{
+  if (pthread_enabled)
+  {
+    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+    assert(mutex != 0);
+
+    int errorcode = pthread_mutex_unlock(mutex);
+    return errorcode == 0;
+  } else return false;
+}
+
+bool
+MutexImpl::tryacquire()
+{
+  if (pthread_enabled)
+  {
+    pthread_mutex_t* mutex = static_cast<pthread_mutex_t*>(data_);
+    assert(mutex != 0);
+
+    int errorcode = pthread_mutex_trylock(mutex);
+    return errorcode == 0;
+  } else return false;
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/Mutex.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Windows/Mutex.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
+#endif
+#endif
diff --git a/final/lib/Support/Path.cpp b/final/lib/Support/Path.cpp
new file mode 100644
index 00000000000..e5e875bc54d
--- /dev/null
+++ b/final/lib/Support/Path.cpp
@@ -0,0 +1,283 @@
+//===-- Path.cpp - Implement OS Path Concept --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements the operating system Path concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Path.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/FileSystem.h"
+#include <cassert>
+#include <cstring>
+#include <ostream>
+using namespace llvm;
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+bool Path::operator==(const Path &that) const {
+  return path == that.path;
+}
+
+bool Path::operator<(const Path& that) const {
+  return path < that.path;
+}
+
+Path
+Path::GetLLVMConfigDir() {
+  Path result;
+#ifdef LLVM_ETCDIR
+  if (result.set(LLVM_ETCDIR))
+    return result;
+#endif
+  return GetLLVMDefaultConfigDir();
+}
+
+LLVMFileType
+sys::IdentifyFileType(const char *magic, unsigned length) {
+  assert(magic && "Invalid magic number string");
+  assert(length >=4 && "Invalid magic number length");
+  switch ((unsigned char)magic[0]) {
+    case 0xDE:  // 0x0B17C0DE = BC wraper
+      if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 &&
+          magic[3] == (char)0x0B)
+        return Bitcode_FileType;
+      break;
+    case 'B':
+      if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE)
+        return Bitcode_FileType;
+      break;
+    case '!':
+      if (length >= 8)
+        if (memcmp(magic,"!<arch>\n",8) == 0)
+          return Archive_FileType;
+      break;
+
+    case '\177':
+      if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') {
+        if (length >= 18 && magic[17] == 0)
+          switch (magic[16]) {
+            default: break;
+            case 1: return ELF_Relocatable_FileType;
+            case 2: return ELF_Executable_FileType;
+            case 3: return ELF_SharedObject_FileType;
+            case 4: return ELF_Core_FileType;
+          }
+      }
+      break;
+
+    case 0xCA:
+      if (magic[1] == char(0xFE) && magic[2] == char(0xBA) &&
+          magic[3] == char(0xBE)) {
+        // This is complicated by an overlap with Java class files.
+        // See the Mach-O section in /usr/share/file/magic for details.
+        if (length >= 8 && magic[7] < 43)
+          // FIXME: Universal Binary of any type.
+          return Mach_O_DynamicallyLinkedSharedLib_FileType;
+      }
+      break;
+
+    case 0xFE:
+    case 0xCE: {
+      uint16_t type = 0;
+      if (magic[0] == char(0xFE) && magic[1] == char(0xED) &&
+          magic[2] == char(0xFA) && magic[3] == char(0xCE)) {
+        /* Native endian */
+        if (length >= 16) type = magic[14] << 8 | magic[15];
+      } else if (magic[0] == char(0xCE) && magic[1] == char(0xFA) &&
+                 magic[2] == char(0xED) && magic[3] == char(0xFE)) {
+        /* Reverse endian */
+        if (length >= 14) type = magic[13] << 8 | magic[12];
+      }
+      switch (type) {
+        default: break;
+        case 1: return Mach_O_Object_FileType;
+        case 2: return Mach_O_Executable_FileType;
+        case 3: return Mach_O_FixedVirtualMemorySharedLib_FileType;
+        case 4: return Mach_O_Core_FileType;
+        case 5: return Mach_O_PreloadExecutable_FileType;
+        case 6: return Mach_O_DynamicallyLinkedSharedLib_FileType;
+        case 7: return Mach_O_DynamicLinker_FileType;
+        case 8: return Mach_O_Bundle_FileType;
+        case 9: return Mach_O_DynamicallyLinkedSharedLibStub_FileType;
+        case 10: break; // FIXME: MH_DSYM companion file with only debug.
+      }
+      break;
+    }
+    case 0xF0: // PowerPC Windows
+    case 0x83: // Alpha 32-bit
+    case 0x84: // Alpha 64-bit
+    case 0x66: // MPS R4000 Windows
+    case 0x50: // mc68K
+    case 0x4c: // 80386 Windows
+      if (magic[1] == 0x01)
+        return COFF_FileType;
+
+    case 0x90: // PA-RISC Windows
+    case 0x68: // mc68K Windows
+      if (magic[1] == 0x02)
+        return COFF_FileType;
+      break;
+    case 0x64: // x86-64 Windows.
+      if (magic[1] == char(0x86))
+        return COFF_FileType;
+      break;
+
+    default:
+      break;
+  }
+  return Unknown_FileType;
+}
+
+bool
+Path::isArchive() const {
+  LLVMFileType type;
+  if (fs::identify_magic(str(), type))
+    return false;
+  return type == Archive_FileType;
+}
+
+bool
+Path::isDynamicLibrary() const {
+  LLVMFileType type;
+  if (fs::identify_magic(str(), type))
+    return false;
+  switch (type) {
+    default: return false;
+    case Mach_O_FixedVirtualMemorySharedLib_FileType:
+    case Mach_O_DynamicallyLinkedSharedLib_FileType:
+    case Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+    case ELF_SharedObject_FileType:
+    case COFF_FileType:  return true;
+  }
+}
+
+bool
+Path::isObjectFile() const {
+  LLVMFileType type;
+  if (fs::identify_magic(str(), type) || type == Unknown_FileType)
+    return false;
+  return true;
+}
+
+Path
+Path::FindLibrary(std::string& name) {
+  std::vector<sys::Path> LibPaths;
+  GetSystemLibraryPaths(LibPaths);
+  for (unsigned i = 0; i < LibPaths.size(); ++i) {
+    sys::Path FullPath(LibPaths[i]);
+    FullPath.appendComponent("lib" + name + LTDL_SHLIB_EXT);
+    if (FullPath.isDynamicLibrary())
+      return FullPath;
+    FullPath.eraseSuffix();
+    FullPath.appendSuffix("a");
+    if (FullPath.isArchive())
+      return FullPath;
+  }
+  return sys::Path();
+}
+
+StringRef Path::GetDLLSuffix() {
+  return &(LTDL_SHLIB_EXT[1]);
+}
+
+void
+Path::appendSuffix(StringRef suffix) {
+  if (!suffix.empty()) {
+    path.append(".");
+    path.append(suffix);
+  }
+}
+
+bool
+Path::isBitcodeFile() const {
+  LLVMFileType type;
+  if (fs::identify_magic(str(), type))
+    return false;
+  return type == Bitcode_FileType;
+}
+
+bool Path::hasMagicNumber(StringRef Magic) const {
+  std::string actualMagic;
+  if (getMagicNumber(actualMagic, static_cast<unsigned>(Magic.size())))
+    return Magic == actualMagic;
+  return false;
+}
+
+static void getPathList(const char*path, std::vector<Path>& Paths) {
+  const char* at = path;
+  const char* delim = strchr(at, PathSeparator);
+  Path tmpPath;
+  while (delim != 0) {
+    std::string tmp(at, size_t(delim-at));
+    if (tmpPath.set(tmp))
+      if (tmpPath.canRead())
+        Paths.push_back(tmpPath);
+    at = delim + 1;
+    delim = strchr(at, PathSeparator);
+  }
+
+  if (*at != 0)
+    if (tmpPath.set(std::string(at)))
+      if (tmpPath.canRead())
+        Paths.push_back(tmpPath);
+}
+
+static StringRef getDirnameCharSep(StringRef path, const char *Sep) {
+  assert(Sep[0] != '\0' && Sep[1] == '\0' &&
+         "Sep must be a 1-character string literal.");
+  if (path.empty())
+    return ".";
+
+  // If the path is all slashes, return a single slash.
+  // Otherwise, remove all trailing slashes.
+
+  signed pos = static_cast<signed>(path.size()) - 1;
+
+  while (pos >= 0 && path[pos] == Sep[0])
+    --pos;
+
+  if (pos < 0)
+    return path[0] == Sep[0] ? Sep : ".";
+
+  // Any slashes left?
+  signed i = 0;
+
+  while (i < pos && path[i] != Sep[0])
+    ++i;
+
+  if (i == pos) // No slashes?  Return "."
+    return ".";
+
+  // There is at least one slash left.  Remove all trailing non-slashes.
+  while (pos >= 0 && path[pos] != Sep[0])
+    --pos;
+
+  // Remove any trailing slashes.
+  while (pos >= 0 && path[pos] == Sep[0])
+    --pos;
+
+  if (pos < 0)
+    return path[0] == Sep[0] ? Sep : ".";
+
+  return path.substr(0, pos+1);
+}
+
+// Include the truly platform-specific parts of this class.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/Path.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/Path.inc"
+#endif
diff --git a/final/lib/Support/PathV2.cpp b/final/lib/Support/PathV2.cpp
new file mode 100644
index 00000000000..896c94c071b
--- /dev/null
+++ b/final/lib/Support/PathV2.cpp
@@ -0,0 +1,774 @@
+//===-- PathV2.cpp - Implement OS Path Concept ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the operating system PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/PathV2.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cctype>
+#include <cstdio>
+#include <cstring>
+
+namespace {
+  using llvm::StringRef;
+  using llvm::sys::path::is_separator;
+
+#ifdef LLVM_ON_WIN32
+  const StringRef separators = "\\/";
+  const char      prefered_separator = '\\';
+#else
+  const StringRef separators = "/";
+  const char      prefered_separator = '/';
+#endif
+
+  const llvm::error_code success;
+
+  StringRef find_first_component(StringRef path) {
+    // Look for this first component in the following order.
+    // * empty (in this case we return an empty string)
+    // * either C: or {//,\\}net.
+    // * {/,\}
+    // * {.,..}
+    // * {file,directory}name
+
+    if (path.empty())
+      return path;
+
+#ifdef LLVM_ON_WIN32
+    // C:
+    if (path.size() >= 2 && std::isalpha(path[0]) && path[1] == ':')
+      return path.substr(0, 2);
+#endif
+
+    // //net
+    if ((path.size() > 2) &&
+        is_separator(path[0]) &&
+        path[0] == path[1] &&
+        !is_separator(path[2])) {
+      // Find the next directory separator.
+      size_t end = path.find_first_of(separators, 2);
+      return path.substr(0, end);
+    }
+
+    // {/,\}
+    if (is_separator(path[0]))
+      return path.substr(0, 1);
+
+    if (path.startswith(".."))
+      return path.substr(0, 2);
+
+    if (path[0] == '.')
+      return path.substr(0, 1);
+
+    // * {file,directory}name
+    size_t end = path.find_first_of(separators, 2);
+    return path.substr(0, end);
+  }
+
+  size_t filename_pos(StringRef str) {
+    if (str.size() == 2 &&
+        is_separator(str[0]) &&
+        str[0] == str[1])
+      return 0;
+
+    if (str.size() > 0 && is_separator(str[str.size() - 1]))
+      return str.size() - 1;
+
+    size_t pos = str.find_last_of(separators, str.size() - 1);
+
+#ifdef LLVM_ON_WIN32
+    if (pos == StringRef::npos)
+      pos = str.find_last_of(':', str.size() - 2);
+#endif
+
+    if (pos == StringRef::npos ||
+        (pos == 1 && is_separator(str[0])))
+      return 0;
+
+    return pos + 1;
+  }
+
+  size_t root_dir_start(StringRef str) {
+    // case "c:/"
+#ifdef LLVM_ON_WIN32
+    if (str.size() > 2 &&
+        str[1] == ':' &&
+        is_separator(str[2]))
+      return 2;
+#endif
+
+    // case "//"
+    if (str.size() == 2 &&
+        is_separator(str[0]) &&
+        str[0] == str[1])
+      return StringRef::npos;
+
+    // case "//net"
+    if (str.size() > 3 &&
+        is_separator(str[0]) &&
+        str[0] == str[1] &&
+        !is_separator(str[2])) {
+      return str.find_first_of(separators, 2);
+    }
+
+    // case "/"
+    if (str.size() > 0 && is_separator(str[0]))
+      return 0;
+
+    return StringRef::npos;
+  }
+
+  size_t parent_path_end(StringRef path) {
+    size_t end_pos = filename_pos(path);
+
+    bool filename_was_sep = path.size() > 0 && is_separator(path[end_pos]);
+
+    // Skip separators except for root dir.
+    size_t root_dir_pos = root_dir_start(path.substr(0, end_pos));
+
+    while(end_pos > 0 &&
+          (end_pos - 1) != root_dir_pos &&
+          is_separator(path[end_pos - 1]))
+      --end_pos;
+
+    if (end_pos == 1 && root_dir_pos == 0 && filename_was_sep)
+      return StringRef::npos;
+
+    return end_pos;
+  }
+} // end unnamed namespace
+
+namespace llvm {
+namespace sys  {
+namespace path {
+
+const_iterator begin(StringRef path) {
+  const_iterator i;
+  i.Path      = path;
+  i.Component = find_first_component(path);
+  i.Position  = 0;
+  return i;
+}
+
+const_iterator end(StringRef path) {
+  const_iterator i;
+  i.Path      = path;
+  i.Position  = path.size();
+  return i;
+}
+
+const_iterator &const_iterator::operator++() {
+  assert(Position < Path.size() && "Tried to increment past end!");
+
+  // Increment Position to past the current component
+  Position += Component.size();
+
+  // Check for end.
+  if (Position == Path.size()) {
+    Component = StringRef();
+    return *this;
+  }
+
+  // Both POSIX and Windows treat paths that begin with exactly two separators
+  // specially.
+  bool was_net = Component.size() > 2 &&
+    is_separator(Component[0]) &&
+    Component[1] == Component[0] &&
+    !is_separator(Component[2]);
+
+  // Handle separators.
+  if (is_separator(Path[Position])) {
+    // Root dir.
+    if (was_net
+#ifdef LLVM_ON_WIN32
+        // c:/
+        || Component.endswith(":")
+#endif
+        ) {
+      Component = Path.substr(Position, 1);
+      return *this;
+    }
+
+    // Skip extra separators.
+    while (Position != Path.size() &&
+           is_separator(Path[Position])) {
+      ++Position;
+    }
+
+    // Treat trailing '/' as a '.'.
+    if (Position == Path.size()) {
+      --Position;
+      Component = ".";
+      return *this;
+    }
+  }
+
+  // Find next component.
+  size_t end_pos = Path.find_first_of(separators, Position);
+  Component = Path.slice(Position, end_pos);
+
+  return *this;
+}
+
+const_iterator &const_iterator::operator--() {
+  // If we're at the end and the previous char was a '/', return '.'.
+  if (Position == Path.size() &&
+      Path.size() > 1 &&
+      is_separator(Path[Position - 1])
+#ifdef LLVM_ON_WIN32
+      && Path[Position - 2] != ':'
+#endif
+      ) {
+    --Position;
+    Component = ".";
+    return *this;
+  }
+
+  // Skip separators unless it's the root directory.
+  size_t root_dir_pos = root_dir_start(Path);
+  size_t end_pos = Position;
+
+  while(end_pos > 0 &&
+        (end_pos - 1) != root_dir_pos &&
+        is_separator(Path[end_pos - 1]))
+    --end_pos;
+
+  // Find next separator.
+  size_t start_pos = filename_pos(Path.substr(0, end_pos));
+  Component = Path.slice(start_pos, end_pos);
+  Position = start_pos;
+  return *this;
+}
+
+bool const_iterator::operator==(const const_iterator &RHS) const {
+  return Path.begin() == RHS.Path.begin() &&
+         Position == RHS.Position;
+}
+
+bool const_iterator::operator!=(const const_iterator &RHS) const {
+  return !(*this == RHS);
+}
+
+ptrdiff_t const_iterator::operator-(const const_iterator &RHS) const {
+  return Position - RHS.Position;
+}
+
+const StringRef root_path(StringRef path) {
+  const_iterator b = begin(path),
+                 pos = b,
+                 e = end(path);
+  if (b != e) {
+    bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+    bool has_drive =
+#ifdef LLVM_ON_WIN32
+      b->endswith(":");
+#else
+      false;
+#endif
+
+    if (has_net || has_drive) {
+      if ((++pos != e) && is_separator((*pos)[0])) {
+        // {C:/,//net/}, so get the first two components.
+        return path.substr(0, b->size() + pos->size());
+      } else {
+        // just {C:,//net}, return the first component.
+        return *b;
+      }
+    }
+
+    // POSIX style root directory.
+    if (is_separator((*b)[0])) {
+      return *b;
+    }
+  }
+
+  return StringRef();
+}
+
+const StringRef root_name(StringRef path) {
+  const_iterator b = begin(path),
+                 e = end(path);
+  if (b != e) {
+    bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+    bool has_drive =
+#ifdef LLVM_ON_WIN32
+      b->endswith(":");
+#else
+      false;
+#endif
+
+    if (has_net || has_drive) {
+      // just {C:,//net}, return the first component.
+      return *b;
+    }
+  }
+
+  // No path or no name.
+  return StringRef();
+}
+
+const StringRef root_directory(StringRef path) {
+  const_iterator b = begin(path),
+                 pos = b,
+                 e = end(path);
+  if (b != e) {
+    bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0];
+    bool has_drive =
+#ifdef LLVM_ON_WIN32
+      b->endswith(":");
+#else
+      false;
+#endif
+
+    if ((has_net || has_drive) &&
+        // {C:,//net}, skip to the next component.
+        (++pos != e) && is_separator((*pos)[0])) {
+      return *pos;
+    }
+
+    // POSIX style root directory.
+    if (!has_net && is_separator((*b)[0])) {
+      return *b;
+    }
+  }
+
+  // No path or no root.
+  return StringRef();
+}
+
+const StringRef relative_path(StringRef path) {
+  StringRef root = root_path(path);
+  return root.substr(root.size());
+}
+
+void append(SmallVectorImpl<char> &path, const Twine &a,
+                                         const Twine &b,
+                                         const Twine &c,
+                                         const Twine &d) {
+  SmallString<32> a_storage;
+  SmallString<32> b_storage;
+  SmallString<32> c_storage;
+  SmallString<32> d_storage;
+
+  SmallVector<StringRef, 4> components;
+  if (!a.isTriviallyEmpty()) components.push_back(a.toStringRef(a_storage));
+  if (!b.isTriviallyEmpty()) components.push_back(b.toStringRef(b_storage));
+  if (!c.isTriviallyEmpty()) components.push_back(c.toStringRef(c_storage));
+  if (!d.isTriviallyEmpty()) components.push_back(d.toStringRef(d_storage));
+
+  for (SmallVectorImpl<StringRef>::const_iterator i = components.begin(),
+                                                  e = components.end();
+                                                  i != e; ++i) {
+    bool path_has_sep = !path.empty() && is_separator(path[path.size() - 1]);
+    bool component_has_sep = !i->empty() && is_separator((*i)[0]);
+    bool is_root_name = has_root_name(*i);
+
+    if (path_has_sep) {
+      // Strip separators from beginning of component.
+      size_t loc = i->find_first_not_of(separators);
+      StringRef c = i->substr(loc);
+
+      // Append it.
+      path.append(c.begin(), c.end());
+      continue;
+    }
+
+    if (!component_has_sep && !(path.empty() || is_root_name)) {
+      // Add a separator.
+      path.push_back(prefered_separator);
+    }
+
+    path.append(i->begin(), i->end());
+  }
+}
+
+void append(SmallVectorImpl<char> &path,
+            const_iterator begin, const_iterator end) {
+  for (; begin != end; ++begin)
+    path::append(path, *begin);
+}
+
+const StringRef parent_path(StringRef path) {
+  size_t end_pos = parent_path_end(path);
+  if (end_pos == StringRef::npos)
+    return StringRef();
+  else
+    return path.substr(0, end_pos);
+}
+
+void remove_filename(SmallVectorImpl<char> &path) {
+  size_t end_pos = parent_path_end(StringRef(path.begin(), path.size()));
+  if (end_pos != StringRef::npos)
+    path.set_size(end_pos);
+}
+
+void replace_extension(SmallVectorImpl<char> &path, const Twine &extension) {
+  StringRef p(path.begin(), path.size());
+  SmallString<32> ext_storage;
+  StringRef ext = extension.toStringRef(ext_storage);
+
+  // Erase existing extension.
+  size_t pos = p.find_last_of('.');
+  if (pos != StringRef::npos && pos >= filename_pos(p))
+    path.set_size(pos);
+
+  // Append '.' if needed.
+  if (ext.size() > 0 && ext[0] != '.')
+    path.push_back('.');
+
+  // Append extension.
+  path.append(ext.begin(), ext.end());
+}
+
+void native(const Twine &path, SmallVectorImpl<char> &result) {
+  // Clear result.
+  result.clear();
+#ifdef LLVM_ON_WIN32
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+  result.reserve(p.size());
+  for (StringRef::const_iterator i = p.begin(),
+                                 e = p.end();
+                                 i != e;
+                                 ++i) {
+    if (*i == '/')
+      result.push_back('\\');
+    else
+      result.push_back(*i);
+  }
+#else
+  path.toVector(result);
+#endif
+}
+
+const StringRef filename(StringRef path) {
+  return *(--end(path));
+}
+
+const StringRef stem(StringRef path) {
+  StringRef fname = filename(path);
+  size_t pos = fname.find_last_of('.');
+  if (pos == StringRef::npos)
+    return fname;
+  else
+    if ((fname.size() == 1 && fname == ".") ||
+        (fname.size() == 2 && fname == ".."))
+      return fname;
+    else
+      return fname.substr(0, pos);
+}
+
+const StringRef extension(StringRef path) {
+  StringRef fname = filename(path);
+  size_t pos = fname.find_last_of('.');
+  if (pos == StringRef::npos)
+    return StringRef();
+  else
+    if ((fname.size() == 1 && fname == ".") ||
+        (fname.size() == 2 && fname == ".."))
+      return StringRef();
+    else
+      return fname.substr(pos);
+}
+
+bool is_separator(char value) {
+  switch(value) {
+#ifdef LLVM_ON_WIN32
+    case '\\': // fall through
+#endif
+    case '/': return true;
+    default: return false;
+  }
+}
+
+bool has_root_name(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !root_name(p).empty();
+}
+
+bool has_root_directory(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !root_directory(p).empty();
+}
+
+bool has_root_path(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !root_path(p).empty();
+}
+
+bool has_relative_path(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !relative_path(p).empty();
+}
+
+bool has_filename(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !filename(p).empty();
+}
+
+bool has_parent_path(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !parent_path(p).empty();
+}
+
+bool has_stem(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !stem(p).empty();
+}
+
+bool has_extension(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  return !extension(p).empty();
+}
+
+bool is_absolute(const Twine &path) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  bool rootDir = has_root_directory(p),
+#ifdef LLVM_ON_WIN32
+       rootName = has_root_name(p);
+#else
+       rootName = true;
+#endif
+
+  return rootDir && rootName;
+}
+
+bool is_relative(const Twine &path) {
+  return !is_absolute(path);
+}
+
+} // end namespace path
+
+namespace fs {
+
+error_code make_absolute(SmallVectorImpl<char> &path) {
+  StringRef p(path.data(), path.size());
+
+  bool rootName      = path::has_root_name(p),
+       rootDirectory = path::has_root_directory(p);
+
+  // Already absolute.
+  if (rootName && rootDirectory)
+    return success;
+
+  // All of the following conditions will need the current directory.
+  SmallString<128> current_dir;
+  if (error_code ec = current_path(current_dir)) return ec;
+
+  // Relative path. Prepend the current directory.
+  if (!rootName && !rootDirectory) {
+    // Append path to the current directory.
+    path::append(current_dir, p);
+    // Set path to the result.
+    path.swap(current_dir);
+    return success;
+  }
+
+  if (!rootName && rootDirectory) {
+    StringRef cdrn = path::root_name(current_dir);
+    SmallString<128> curDirRootName(cdrn.begin(), cdrn.end());
+    path::append(curDirRootName, p);
+    // Set path to the result.
+    path.swap(curDirRootName);
+    return success;
+  }
+
+  if (rootName && !rootDirectory) {
+    StringRef pRootName      = path::root_name(p);
+    StringRef bRootDirectory = path::root_directory(current_dir);
+    StringRef bRelativePath  = path::relative_path(current_dir);
+    StringRef pRelativePath  = path::relative_path(p);
+
+    SmallString<128> res;
+    path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath);
+    path.swap(res);
+    return success;
+  }
+
+  llvm_unreachable("All rootName and rootDirectory combinations should have "
+                   "occurred above!");
+}
+
+error_code create_directories(const Twine &path, bool &existed) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  StringRef parent = path::parent_path(p);
+  bool parent_exists;
+
+  if (error_code ec = fs::exists(parent, parent_exists)) return ec;
+
+  if (!parent_exists)
+    return create_directories(parent, existed);
+
+  return create_directory(p, existed);
+}
+
+bool exists(file_status status) {
+  return status_known(status) && status.type() != file_type::file_not_found;
+}
+
+bool status_known(file_status s) {
+  return s.type() != file_type::status_error;
+}
+
+bool is_directory(file_status status) {
+  return status.type() == file_type::directory_file;
+}
+
+error_code is_directory(const Twine &path, bool &result) {
+  file_status st;
+  if (error_code ec = status(path, st))
+    return ec;
+  result = is_directory(st);
+  return success;
+}
+
+bool is_regular_file(file_status status) {
+  return status.type() == file_type::regular_file;
+}
+
+error_code is_regular_file(const Twine &path, bool &result) {
+  file_status st;
+  if (error_code ec = status(path, st))
+    return ec;
+  result = is_regular_file(st);
+  return success;
+}
+
+bool is_symlink(file_status status) {
+  return status.type() == file_type::symlink_file;
+}
+
+error_code is_symlink(const Twine &path, bool &result) {
+  file_status st;
+  if (error_code ec = status(path, st))
+    return ec;
+  result = is_symlink(st);
+  return success;
+}
+
+bool is_other(file_status status) {
+  return exists(status) &&
+         !is_regular_file(status) &&
+         !is_directory(status) &&
+         !is_symlink(status);
+}
+
+void directory_entry::replace_filename(const Twine &filename, file_status st,
+                                       file_status symlink_st) {
+  SmallString<128> path(Path.begin(), Path.end());
+  path::remove_filename(path);
+  path::append(path, filename);
+  Path = path.str();
+  Status = st;
+  SymlinkStatus = symlink_st;
+}
+
+error_code has_magic(const Twine &path, const Twine &magic, bool &result) {
+  SmallString<32>  MagicStorage;
+  StringRef Magic = magic.toStringRef(MagicStorage);
+  SmallString<32> Buffer;
+
+  if (error_code ec = get_magic(path, Magic.size(), Buffer)) {
+    if (ec == errc::value_too_large) {
+      // Magic.size() > file_size(Path).
+      result = false;
+      return success;
+    }
+    return ec;
+  }
+
+  result = Magic == Buffer;
+  return success;
+}
+
+error_code identify_magic(const Twine &path, LLVMFileType &result) {
+  SmallString<32> Magic;
+  error_code ec = get_magic(path, Magic.capacity(), Magic);
+  if (ec && ec != errc::value_too_large)
+    return ec;
+
+  result = IdentifyFileType(Magic.data(), Magic.size());
+  return success;
+}
+
+namespace {
+error_code remove_all_r(StringRef path, file_type ft, uint32_t &count) {
+  if (ft == file_type::directory_file) {
+    // This code would be a lot better with exceptions ;/.
+    error_code ec;
+    for (directory_iterator i(path, ec), e; i != e; i.increment(ec)) {
+      if (ec) return ec;
+      file_status st;
+      if (error_code ec = i->status(st)) return ec;
+      if (error_code ec = remove_all_r(i->path(), st.type(), count)) return ec;
+    }
+    bool obviously_this_exists;
+    if (error_code ec = remove(path, obviously_this_exists)) return ec;
+    assert(obviously_this_exists);
+    ++count; // Include the directory itself in the items removed.
+  } else {
+    bool obviously_this_exists;
+    if (error_code ec = remove(path, obviously_this_exists)) return ec;
+    assert(obviously_this_exists);
+    ++count;
+  }
+
+  return success;
+}
+} // end unnamed namespace
+
+error_code remove_all(const Twine &path, uint32_t &num_removed) {
+  SmallString<128> path_storage;
+  StringRef p = path.toStringRef(path_storage);
+
+  file_status fs;
+  if (error_code ec = status(path, fs))
+    return ec;
+  num_removed = 0;
+  return remove_all_r(p, fs.type(), num_removed);
+}
+
+error_code directory_entry::status(file_status &result) const {
+  return fs::status(Path, result);
+}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
+
+// Include the truly platform-specific parts.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/PathV2.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/PathV2.inc"
+#endif
diff --git a/final/lib/Support/PluginLoader.cpp b/final/lib/Support/PluginLoader.cpp
new file mode 100644
index 00000000000..2924cfa3889
--- /dev/null
+++ b/final/lib/Support/PluginLoader.cpp
@@ -0,0 +1,47 @@
+//===-- PluginLoader.cpp - Implement -load command line option ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the -load <plugin> command line option handler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DONT_GET_PLUGIN_LOADER_OPTION
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PluginLoader.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Mutex.h"
+#include <vector>
+using namespace llvm;
+
+static ManagedStatic<std::vector<std::string> > Plugins;
+static ManagedStatic<sys::SmartMutex<true> > PluginsLock;
+
+void PluginLoader::operator=(const std::string &Filename) {
+  sys::SmartScopedLock<true> Lock(*PluginsLock);
+  std::string Error;
+  if (sys::DynamicLibrary::LoadLibraryPermanently(Filename.c_str(), &Error)) {
+    errs() << "Error opening '" << Filename << "': " << Error
+           << "\n  -load request ignored.\n";
+  } else {
+    Plugins->push_back(Filename);
+  }
+}
+
+unsigned PluginLoader::getNumPlugins() {
+  sys::SmartScopedLock<true> Lock(*PluginsLock);
+  return Plugins.isConstructed() ? Plugins->size() : 0;
+}
+
+std::string &PluginLoader::getPlugin(unsigned num) {
+  sys::SmartScopedLock<true> Lock(*PluginsLock);
+  assert(Plugins.isConstructed() && num < Plugins->size() &&
+         "Asking for an out of bounds plugin");
+  return (*Plugins)[num];
+}
diff --git a/final/lib/Support/PrettyStackTrace.cpp b/final/lib/Support/PrettyStackTrace.cpp
new file mode 100644
index 00000000000..a9f4709e4b9
--- /dev/null
+++ b/final/lib/Support/PrettyStackTrace.cpp
@@ -0,0 +1,133 @@
+//===- PrettyStackTrace.cpp - Pretty Crash Handling -----------------------===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occuring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"     // Get autoconf configuration settings
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/ThreadLocal.h"
+#include "llvm/ADT/SmallString.h"
+
+#ifdef HAVE_CRASHREPORTERCLIENT_H
+#include <CrashReporterClient.h>
+#endif
+
+using namespace llvm;
+
+namespace llvm {
+  bool DisablePrettyStackTrace = false;
+}
+
+// FIXME: This should be thread local when llvm supports threads.
+static sys::ThreadLocal<const PrettyStackTraceEntry> PrettyStackTraceHead;
+
+static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
+  unsigned NextID = 0;
+  if (Entry->getNextEntry())
+    NextID = PrintStack(Entry->getNextEntry(), OS);
+  OS << NextID << ".\t";
+  Entry->print(OS);
+  
+  return NextID+1;
+}
+
+/// PrintCurStackTrace - Print the current stack trace to the specified stream.
+static void PrintCurStackTrace(raw_ostream &OS) {
+  // Don't print an empty trace.
+  if (PrettyStackTraceHead.get() == 0) return;
+  
+  // If there are pretty stack frames registered, walk and emit them.
+  OS << "Stack dump:\n";
+  
+  PrintStack(PrettyStackTraceHead.get(), OS);
+  OS.flush();
+}
+
+// Integrate with crash reporter libraries.
+#if defined (__APPLE__) && HAVE_CRASHREPORTERCLIENT_H
+//  If any clients of llvm try to link to libCrashReporterClient.a themselves,
+//  only one crash info struct will be used.
+extern "C" {
+CRASH_REPORTER_CLIENT_HIDDEN 
+struct crashreporter_annotations_t gCRAnnotations 
+        __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION))) 
+        = { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0 };
+}
+#elif defined (__APPLE__) && HAVE_CRASHREPORTER_INFO
+static const char *__crashreporter_info__ = 0;
+asm(".desc ___crashreporter_info__, 0x10");
+#endif
+
+
+/// CrashHandler - This callback is run if a fatal signal is delivered to the
+/// process, it prints the pretty stack trace.
+static void CrashHandler(void *) {
+#ifndef __APPLE__
+  // On non-apple systems, just emit the crash stack trace to stderr.
+  PrintCurStackTrace(errs());
+#else
+  // Otherwise, emit to a smallvector of chars, send *that* to stderr, but also
+  // put it into __crashreporter_info__.
+  SmallString<2048> TmpStr;
+  {
+    raw_svector_ostream Stream(TmpStr);
+    PrintCurStackTrace(Stream);
+  }
+  
+  if (!TmpStr.empty()) {
+#ifdef HAVE_CRASHREPORTERCLIENT_H
+    // Cast to void to avoid warning.
+    (void)CRSetCrashLogMessage(std::string(TmpStr.str()).c_str());
+#elif HAVE_CRASHREPORTER_INFO 
+    __crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str());
+#endif
+    errs() << TmpStr.str();
+  }
+  
+#endif
+}
+
+static bool RegisterCrashPrinter() {
+  if (!DisablePrettyStackTrace)
+    sys::AddSignalHandler(CrashHandler, 0);
+  return false;
+}
+
+PrettyStackTraceEntry::PrettyStackTraceEntry() {
+  // The first time this is called, we register the crash printer.
+  static bool HandlerRegistered = RegisterCrashPrinter();
+  (void)HandlerRegistered;
+    
+  // Link ourselves.
+  NextEntry = PrettyStackTraceHead.get();
+  PrettyStackTraceHead.set(this);
+}
+
+PrettyStackTraceEntry::~PrettyStackTraceEntry() {
+  assert(PrettyStackTraceHead.get() == this &&
+         "Pretty stack trace entry destruction is out of order");
+  PrettyStackTraceHead.set(getNextEntry());
+}
+
+void PrettyStackTraceString::print(raw_ostream &OS) const {
+  OS << Str << "\n";
+}
+
+void PrettyStackTraceProgram::print(raw_ostream &OS) const {
+  OS << "Program arguments: ";
+  // Print the argument list.
+  for (unsigned i = 0, e = ArgC; i != e; ++i)
+    OS << ArgV[i] << ' ';
+  OS << '\n';
+}
diff --git a/final/lib/Support/Process.cpp b/final/lib/Support/Process.cpp
new file mode 100644
index 00000000000..88ca7c3f220
--- /dev/null
+++ b/final/lib/Support/Process.cpp
@@ -0,0 +1,33 @@
+//===-- Process.cpp - Implement OS Process Concept --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements the operating system Process concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Process.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Process.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Process.inc"
+#endif
diff --git a/final/lib/Support/Program.cpp b/final/lib/Support/Program.cpp
new file mode 100644
index 00000000000..01860b082d6
--- /dev/null
+++ b/final/lib/Support/Program.cpp
@@ -0,0 +1,56 @@
+//===-- Program.cpp - Implement OS Program Concept --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This header file implements the operating system Program concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Program.h"
+#include "llvm/Config/config.h"
+using namespace llvm;
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+int
+Program::ExecuteAndWait(const Path& path,
+                        const char** args,
+                        const char** envp,
+                        const Path** redirects,
+                        unsigned secondsToWait,
+                        unsigned memoryLimit,
+                        std::string* ErrMsg) {
+  Program prg;
+  if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg))
+    return prg.Wait(path, secondsToWait, ErrMsg);
+  else
+    return -1;
+}
+
+void
+Program::ExecuteNoWait(const Path& path,
+                       const char** args,
+                       const char** envp,
+                       const Path** redirects,
+                       unsigned memoryLimit,
+                       std::string* ErrMsg) {
+  Program prg;
+  prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg);
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Program.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Program.inc"
+#endif
diff --git a/final/lib/Support/README.txt.system b/final/lib/Support/README.txt.system
new file mode 100644
index 00000000000..7a906b8dba4
--- /dev/null
+++ b/final/lib/Support/README.txt.system
@@ -0,0 +1,43 @@
+Design Of lib/System
+====================
+
+The software in this directory is designed to completely shield LLVM from any
+and all operating system specific functionality. It is not intended to be a
+complete operating system wrapper (such as ACE), but only to provide the
+functionality necessary to support LLVM.
+
+The software located here, of necessity, has very specific and stringent design
+rules. Violation of these rules means that cracks in the shield could form and
+the primary goal of the library is defeated. By consistently using this library,
+LLVM becomes more easily ported to new platforms since the only thing requiring
+porting is this library.
+
+Complete documentation for the library can be found in the file:
+  llvm/docs/SystemLibrary.html
+or at this URL:
+  http://llvm.org/docs/SystemLibrary.html
+
+While we recommend that you read the more detailed documentation, for the
+impatient, here's a high level summary of the library's requirements.
+
+ 1. No system header files are to be exposed through the interface.
+ 2. Std C++ and Std C header files are okay to be exposed through the interface.
+ 3. No exposed system-specific functions.
+ 4. No exposed system-specific data.
+ 5. Data in lib/System classes must use only simple C++ intrinsic types.
+ 6. Errors are handled by returning "true" and setting an optional std::string
+ 7. Library must not throw any exceptions, period.
+ 8. Interface functions must not have throw() specifications.
+ 9. No duplicate function impementations are permitted within an operating
+    system class.
+
+To accomplish these requirements, the library has numerous design criteria that
+must be satisfied. Here's a high level summary of the library's design criteria:
+
+ 1. No unused functionality (only what LLVM needs)
+ 2. High-Level Interfaces
+ 3. Use Opaque Classes
+ 4. Common Implementations
+ 5. Multiple Implementations
+ 6. Minimize Memory Allocation
+ 7. No Virtual Methods
diff --git a/final/lib/Support/RWMutex.cpp b/final/lib/Support/RWMutex.cpp
new file mode 100644
index 00000000000..fc02f9cf7c1
--- /dev/null
+++ b/final/lib/Support/RWMutex.cpp
@@ -0,0 +1,157 @@
+//===- RWMutex.cpp - Reader/Writer Mutual Exclusion Lock --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/RWMutex.h"
+#include <cstring>
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+RWMutexImpl::RWMutexImpl() { }
+RWMutexImpl::~RWMutexImpl() { }
+bool RWMutexImpl::reader_acquire() { return true; }
+bool RWMutexImpl::reader_release() { return true; }
+bool RWMutexImpl::writer_acquire() { return true; }
+bool RWMutexImpl::writer_release() { return true; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_RWLOCK_INIT)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+
+// This variable is useful for situations where the pthread library has been
+// compiled with weak linkage for its interface symbols. This allows the
+// threading support to be turned off by simply not linking against -lpthread.
+// In that situation, the value of pthread_mutex_init will be 0 and
+// consequently pthread_enabled will be false. In such situations, all the
+// pthread operations become no-ops and the functions all return false. If
+// pthread_rwlock_init does have an address, then rwlock support is enabled.
+// Note: all LLVM tools will link against -lpthread if its available since it
+//       is configured into the LIBS variable.
+// Note: this line of code generates a warning if pthread_rwlock_init is not
+//       declared with weak linkage. It's safe to ignore the warning.
+static const bool pthread_enabled = true;
+
+// Construct a RWMutex using pthread calls
+RWMutexImpl::RWMutexImpl()
+  : data_(0)
+{
+  if (pthread_enabled)
+  {
+    // Declare the pthread_rwlock data structures
+    pthread_rwlock_t* rwlock =
+      static_cast<pthread_rwlock_t*>(malloc(sizeof(pthread_rwlock_t)));
+
+#ifdef __APPLE__
+    // Workaround a bug/mis-feature in Darwin's pthread_rwlock_init.
+    bzero(rwlock, sizeof(pthread_rwlock_t));
+#endif
+
+    // Initialize the rwlock
+    int errorcode = pthread_rwlock_init(rwlock, NULL);
+    (void)errorcode;
+    assert(errorcode == 0);
+
+    // Assign the data member
+    data_ = rwlock;
+  }
+}
+
+// Destruct a RWMutex
+RWMutexImpl::~RWMutexImpl()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+    pthread_rwlock_destroy(rwlock);
+    free(rwlock);
+  }
+}
+
+bool
+RWMutexImpl::reader_acquire()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_rdlock(rwlock);
+    return errorcode == 0;
+  } else return false;
+}
+
+bool
+RWMutexImpl::reader_release()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_unlock(rwlock);
+    return errorcode == 0;
+  } else return false;
+}
+
+bool
+RWMutexImpl::writer_acquire()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_wrlock(rwlock);
+    return errorcode == 0;
+  } else return false;
+}
+
+bool
+RWMutexImpl::writer_release()
+{
+  if (pthread_enabled)
+  {
+    pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+    assert(rwlock != 0);
+
+    int errorcode = pthread_rwlock_unlock(rwlock);
+    return errorcode == 0;
+  } else return false;
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/RWMutex.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Windows/RWMutex.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
+#endif
+#endif
diff --git a/final/lib/Support/Regex.cpp b/final/lib/Support/Regex.cpp
new file mode 100644
index 00000000000..309ffb02dec
--- /dev/null
+++ b/final/lib/Support/Regex.cpp
@@ -0,0 +1,168 @@
+//===-- Regex.cpp - Regular Expression matcher implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a POSIX regular expression matcher.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallVector.h"
+#include "regex_impl.h"
+#include <string>
+using namespace llvm;
+
+Regex::Regex(StringRef regex, unsigned Flags) {
+  unsigned flags = 0;
+  preg = new llvm_regex();
+  preg->re_endp = regex.end();
+  if (Flags & IgnoreCase) 
+    flags |= REG_ICASE;
+  if (Flags & Newline)
+    flags |= REG_NEWLINE;
+  error = llvm_regcomp(preg, regex.data(), flags|REG_EXTENDED|REG_PEND);
+}
+
+Regex::~Regex() {
+  llvm_regfree(preg);
+  delete preg;
+}
+
+bool Regex::isValid(std::string &Error) {
+  if (!error)
+    return true;
+  
+  size_t len = llvm_regerror(error, preg, NULL, 0);
+  
+  Error.resize(len);
+  llvm_regerror(error, preg, &Error[0], len);
+  return false;
+}
+
+/// getNumMatches - In a valid regex, return the number of parenthesized
+/// matches it contains.
+unsigned Regex::getNumMatches() const {
+  return preg->re_nsub;
+}
+
+bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
+  unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
+
+  // pmatch needs to have at least one element.
+  SmallVector<llvm_regmatch_t, 8> pm;
+  pm.resize(nmatch > 0 ? nmatch : 1);
+  pm[0].rm_so = 0;
+  pm[0].rm_eo = String.size();
+
+  int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND);
+
+  if (rc == REG_NOMATCH)
+    return false;
+  if (rc != 0) {
+    // regexec can fail due to invalid pattern or running out of memory.
+    error = rc;
+    return false;
+  }
+
+  // There was a match.
+
+  if (Matches) { // match position requested
+    Matches->clear();
+    
+    for (unsigned i = 0; i != nmatch; ++i) {
+      if (pm[i].rm_so == -1) {
+        // this group didn't match
+        Matches->push_back(StringRef());
+        continue;
+      }
+      assert(pm[i].rm_eo > pm[i].rm_so);
+      Matches->push_back(StringRef(String.data()+pm[i].rm_so,
+                                   pm[i].rm_eo-pm[i].rm_so));
+    }
+  }
+
+  return true;
+}
+
+std::string Regex::sub(StringRef Repl, StringRef String,
+                       std::string *Error) {
+  SmallVector<StringRef, 8> Matches;
+
+  // Reset error, if given.
+  if (Error && !Error->empty()) *Error = "";
+
+  // Return the input if there was no match.
+  if (!match(String, &Matches))
+    return String;
+
+  // Otherwise splice in the replacement string, starting with the prefix before
+  // the match.
+  std::string Res(String.begin(), Matches[0].begin());
+
+  // Then the replacement string, honoring possible substitutions.
+  while (!Repl.empty()) {
+    // Skip to the next escape.
+    std::pair<StringRef, StringRef> Split = Repl.split('\\');
+
+    // Add the skipped substring.
+    Res += Split.first;
+
+    // Check for terminimation and trailing backslash.
+    if (Split.second.empty()) {
+      if (Repl.size() != Split.first.size() &&
+          Error && Error->empty())
+        *Error = "replacement string contained trailing backslash";
+      break;
+    }
+
+    // Otherwise update the replacement string and interpret escapes.
+    Repl = Split.second;
+
+    // FIXME: We should have a StringExtras function for mapping C99 escapes.
+    switch (Repl[0]) {
+      // Treat all unrecognized characters as self-quoting.
+    default:
+      Res += Repl[0];
+      Repl = Repl.substr(1);
+      break;
+
+      // Single character escapes.
+    case 't':
+      Res += '\t';
+      Repl = Repl.substr(1);
+      break;
+    case 'n':
+      Res += '\n';
+      Repl = Repl.substr(1);
+      break;
+
+      // Decimal escapes are backreferences.
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9': {
+      // Extract the backreference number.
+      StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789"));
+      Repl = Repl.substr(Ref.size());
+
+      unsigned RefValue;
+      if (!Ref.getAsInteger(10, RefValue) &&
+          RefValue < Matches.size())
+        Res += Matches[RefValue];
+      else if (Error && Error->empty())
+        *Error = "invalid backreference string '" + Ref.str() + "'";
+      break;
+    }
+    }
+  }
+
+  // And finally the suffix.
+  Res += StringRef(Matches[0].end(), String.end() - Matches[0].end());
+
+  return Res;
+}
diff --git a/final/lib/Support/SearchForAddressOfSpecialSymbol.cpp b/final/lib/Support/SearchForAddressOfSpecialSymbol.cpp
new file mode 100644
index 00000000000..d63830185c3
--- /dev/null
+++ b/final/lib/Support/SearchForAddressOfSpecialSymbol.cpp
@@ -0,0 +1,73 @@
+//===- SearchForAddressOfSpecialSymbol.cpp - Function addresses -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file pulls the addresses of certain symbols out of the linker.  It must
+//  include as few header files as possible because it declares the symbols as
+//  void*, which would conflict with the actual symbol type if any header
+//  declared it.
+//
+//===----------------------------------------------------------------------===//
+
+#include <string.h>
+
+// Must declare the symbols in the global namespace.
+static void *DoSearch(const char* symbolName) {
+#define EXPLICIT_SYMBOL(SYM) \
+   extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM
+
+  // If this is darwin, it has some funky issues, try to solve them here.  Some
+  // important symbols are marked 'private external' which doesn't allow
+  // SearchForAddressOfSymbol to find them.  As such, we special case them here,
+  // there is only a small handful of them.
+
+#ifdef __APPLE__
+  {
+    EXPLICIT_SYMBOL(__ashldi3);
+    EXPLICIT_SYMBOL(__ashrdi3);
+    EXPLICIT_SYMBOL(__cmpdi2);
+    EXPLICIT_SYMBOL(__divdi3);
+    EXPLICIT_SYMBOL(__fixdfdi);
+    EXPLICIT_SYMBOL(__fixsfdi);
+    EXPLICIT_SYMBOL(__fixunsdfdi);
+    EXPLICIT_SYMBOL(__fixunssfdi);
+    EXPLICIT_SYMBOL(__floatdidf);
+    EXPLICIT_SYMBOL(__floatdisf);
+    EXPLICIT_SYMBOL(__lshrdi3);
+    EXPLICIT_SYMBOL(__moddi3);
+    EXPLICIT_SYMBOL(__udivdi3);
+    EXPLICIT_SYMBOL(__umoddi3);
+
+    // __eprintf is sometimes used for assert() handling on x86.
+    //
+    // FIXME: Currently disabled when using Clang, as we don't always have our
+    // runtime support libraries available.
+#ifndef __clang__
+#ifdef __i386__
+    EXPLICIT_SYMBOL(__eprintf);
+#endif
+#endif
+  }
+#endif
+
+#ifdef __CYGWIN__
+  {
+    EXPLICIT_SYMBOL(_alloca);
+    EXPLICIT_SYMBOL(__main);
+  }
+#endif
+
+#undef EXPLICIT_SYMBOL
+  return 0;
+}
+
+namespace llvm {
+void *SearchForAddressOfSpecialSymbol(const char* symbolName) {
+  return DoSearch(symbolName);
+}
+}  // namespace llvm
diff --git a/final/lib/Support/Signals.cpp b/final/lib/Support/Signals.cpp
new file mode 100644
index 00000000000..a3af37d5fe6
--- /dev/null
+++ b/final/lib/Support/Signals.cpp
@@ -0,0 +1,34 @@
+//===- Signals.cpp - Signal Handling support --------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occuring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Signals.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+}
+
+// Include the platform-specific parts of this class.
+#ifdef LLVM_ON_UNIX
+#include "Unix/Signals.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/Signals.inc"
+#endif
diff --git a/final/lib/Support/SmallPtrSet.cpp b/final/lib/Support/SmallPtrSet.cpp
new file mode 100644
index 00000000000..504e6497a3c
--- /dev/null
+++ b/final/lib/Support/SmallPtrSet.cpp
@@ -0,0 +1,226 @@
+//===- llvm/ADT/SmallPtrSet.cpp - 'Normally small' pointer set ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SmallPtrSet class.  See SmallPtrSet.h for an
+// overview of the algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstdlib>
+
+using namespace llvm;
+
+void SmallPtrSetImpl::shrink_and_clear() {
+  assert(!isSmall() && "Can't shrink a small set!");
+  free(CurArray);
+
+  // Reduce the number of buckets.
+  CurArraySize = NumElements > 16 ? 1 << (Log2_32_Ceil(NumElements) + 1) : 32;
+  NumElements = NumTombstones = 0;
+
+  // Install the new array.  Clear all the buckets to empty.
+  CurArray = (const void**)malloc(sizeof(void*) * (CurArraySize+1));
+  assert(CurArray && "Failed to allocate memory?");
+  memset(CurArray, -1, CurArraySize*sizeof(void*));
+  
+  // The end pointer, always valid, is set to a valid element to help the
+  // iterator.
+  CurArray[CurArraySize] = 0;
+}
+
+bool SmallPtrSetImpl::insert_imp(const void * Ptr) {
+  if (isSmall()) {
+    // Check to see if it is already in the set.
+    for (const void **APtr = SmallArray, **E = SmallArray+NumElements;
+         APtr != E; ++APtr)
+      if (*APtr == Ptr)
+        return false;
+    
+    // Nope, there isn't.  If we stay small, just 'pushback' now.
+    if (NumElements < CurArraySize-1) {
+      SmallArray[NumElements++] = Ptr;
+      return true;
+    }
+    // Otherwise, hit the big set case, which will call grow.
+  }
+  
+  // If more than 3/4 of the array is full, grow.
+  if (NumElements*4 >= CurArraySize*3 ||
+      CurArraySize-(NumElements+NumTombstones) < CurArraySize/8)
+    Grow();
+  
+  // Okay, we know we have space.  Find a hash bucket.
+  const void **Bucket = const_cast<const void**>(FindBucketFor(Ptr));
+  if (*Bucket == Ptr) return false; // Already inserted, good.
+  
+  // Otherwise, insert it!
+  if (*Bucket == getTombstoneMarker())
+    --NumTombstones;
+  *Bucket = Ptr;
+  ++NumElements;  // Track density.
+  return true;
+}
+
+bool SmallPtrSetImpl::erase_imp(const void * Ptr) {
+  if (isSmall()) {
+    // Check to see if it is in the set.
+    for (const void **APtr = SmallArray, **E = SmallArray+NumElements;
+         APtr != E; ++APtr)
+      if (*APtr == Ptr) {
+        // If it is in the set, replace this element.
+        *APtr = E[-1];
+        E[-1] = getEmptyMarker();
+        --NumElements;
+        return true;
+      }
+    
+    return false;
+  }
+  
+  // Okay, we know we have space.  Find a hash bucket.
+  void **Bucket = const_cast<void**>(FindBucketFor(Ptr));
+  if (*Bucket != Ptr) return false;  // Not in the set?
+
+  // Set this as a tombstone.
+  *Bucket = getTombstoneMarker();
+  --NumElements;
+  ++NumTombstones;
+  return true;
+}
+
+const void * const *SmallPtrSetImpl::FindBucketFor(const void *Ptr) const {
+  unsigned Bucket = Hash(Ptr);
+  unsigned ArraySize = CurArraySize;
+  unsigned ProbeAmt = 1;
+  const void *const *Array = CurArray;
+  const void *const *Tombstone = 0;
+  while (1) {
+    // Found Ptr's bucket?
+    if (Array[Bucket] == Ptr)
+      return Array+Bucket;
+    
+    // If we found an empty bucket, the pointer doesn't exist in the set.
+    // Return a tombstone if we've seen one so far, or the empty bucket if
+    // not.
+    if (Array[Bucket] == getEmptyMarker())
+      return Tombstone ? Tombstone : Array+Bucket;
+    
+    // If this is a tombstone, remember it.  If Ptr ends up not in the set, we
+    // prefer to return it than something that would require more probing.
+    if (Array[Bucket] == getTombstoneMarker() && !Tombstone)
+      Tombstone = Array+Bucket;  // Remember the first tombstone found.
+    
+    // It's a hash collision or a tombstone. Reprobe.
+    Bucket = (Bucket + ProbeAmt++) & (ArraySize-1);
+  }
+}
+
+/// Grow - Allocate a larger backing store for the buckets and move it over.
+///
+void SmallPtrSetImpl::Grow() {
+  // Allocate at twice as many buckets, but at least 128.
+  unsigned OldSize = CurArraySize;
+  unsigned NewSize = OldSize < 64 ? 128 : OldSize*2;
+  
+  const void **OldBuckets = CurArray;
+  bool WasSmall = isSmall();
+  
+  // Install the new array.  Clear all the buckets to empty.
+  CurArray = (const void**)malloc(sizeof(void*) * (NewSize+1));
+  assert(CurArray && "Failed to allocate memory?");
+  CurArraySize = NewSize;
+  memset(CurArray, -1, NewSize*sizeof(void*));
+  
+  // The end pointer, always valid, is set to a valid element to help the
+  // iterator.
+  CurArray[NewSize] = 0;
+  
+  // Copy over all the elements.
+  if (WasSmall) {
+    // Small sets store their elements in order.
+    for (const void **BucketPtr = OldBuckets, **E = OldBuckets+NumElements;
+         BucketPtr != E; ++BucketPtr) {
+      const void *Elt = *BucketPtr;
+      *const_cast<void**>(FindBucketFor(Elt)) = const_cast<void*>(Elt);
+    }
+  } else {
+    // Copy over all valid entries.
+    for (const void **BucketPtr = OldBuckets, **E = OldBuckets+OldSize;
+         BucketPtr != E; ++BucketPtr) {
+      // Copy over the element if it is valid.
+      const void *Elt = *BucketPtr;
+      if (Elt != getTombstoneMarker() && Elt != getEmptyMarker())
+        *const_cast<void**>(FindBucketFor(Elt)) = const_cast<void*>(Elt);
+    }
+    
+    free(OldBuckets);
+    NumTombstones = 0;
+  }
+}
+
+SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage,
+                                 const SmallPtrSetImpl& that) {
+  SmallArray = SmallStorage;
+
+  // If we're becoming small, prepare to insert into our stack space
+  if (that.isSmall()) {
+    CurArray = SmallArray;
+  // Otherwise, allocate new heap space (unless we were the same size)
+  } else {
+    CurArray = (const void**)malloc(sizeof(void*) * (that.CurArraySize+1));
+    assert(CurArray && "Failed to allocate memory?");
+  }
+  
+  // Copy over the new array size
+  CurArraySize = that.CurArraySize;
+
+  // Copy over the contents from the other set
+  memcpy(CurArray, that.CurArray, sizeof(void*)*(CurArraySize+1));
+  
+  NumElements = that.NumElements;
+  NumTombstones = that.NumTombstones;
+}
+
+/// CopyFrom - implement operator= from a smallptrset that has the same pointer
+/// type, but may have a different small size.
+void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
+  if (isSmall() && RHS.isSmall())
+    assert(CurArraySize == RHS.CurArraySize &&
+           "Cannot assign sets with different small sizes");
+           
+  // If we're becoming small, prepare to insert into our stack space
+  if (RHS.isSmall()) {
+    if (!isSmall())
+      free(CurArray);
+    CurArray = SmallArray;
+  // Otherwise, allocate new heap space (unless we were the same size)
+  } else if (CurArraySize != RHS.CurArraySize) {
+    if (isSmall())
+      CurArray = (const void**)malloc(sizeof(void*) * (RHS.CurArraySize+1));
+    else
+      CurArray = (const void**)realloc(CurArray, sizeof(void*)*(RHS.CurArraySize+1));
+    assert(CurArray && "Failed to allocate memory?");
+  }
+  
+  // Copy over the new array size
+  CurArraySize = RHS.CurArraySize;
+
+  // Copy over the contents from the other set
+  memcpy(CurArray, RHS.CurArray, sizeof(void*)*(CurArraySize+1));
+  
+  NumElements = RHS.NumElements;
+  NumTombstones = RHS.NumTombstones;
+}
+
+SmallPtrSetImpl::~SmallPtrSetImpl() {
+  if (!isSmall())
+    free(CurArray);
+}
diff --git a/final/lib/Support/SmallVector.cpp b/final/lib/Support/SmallVector.cpp
new file mode 100644
index 00000000000..a89f1495763
--- /dev/null
+++ b/final/lib/Support/SmallVector.cpp
@@ -0,0 +1,40 @@
+//===- llvm/ADT/SmallVector.cpp - 'Normally small' vectors ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SmallVector class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+/// grow_pod - This is an implementation of the grow() method which only works
+/// on POD-like datatypes and is out of line to reduce code duplication.
+void SmallVectorBase::grow_pod(size_t MinSizeInBytes, size_t TSize) {
+  size_t CurSizeBytes = size_in_bytes();
+  size_t NewCapacityInBytes = 2 * capacity_in_bytes() + TSize; // Always grow.
+  if (NewCapacityInBytes < MinSizeInBytes)
+    NewCapacityInBytes = MinSizeInBytes;
+
+  void *NewElts;
+  if (this->isSmall()) {
+    NewElts = malloc(NewCapacityInBytes);
+
+    // Copy the elements over.  No need to run dtors on PODs.
+    memcpy(NewElts, this->BeginX, CurSizeBytes);
+  } else {
+    // If this wasn't grown from the inline copy, grow the allocated space.
+    NewElts = realloc(this->BeginX, NewCapacityInBytes);
+  }
+
+  this->EndX = (char*)NewElts+CurSizeBytes;
+  this->BeginX = NewElts;
+  this->CapacityX = (char*)this->BeginX + NewCapacityInBytes;
+}
+
diff --git a/final/lib/Support/SourceMgr.cpp b/final/lib/Support/SourceMgr.cpp
new file mode 100644
index 00000000000..ef099163c22
--- /dev/null
+++ b/final/lib/Support/SourceMgr.cpp
@@ -0,0 +1,230 @@
+//===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SourceMgr class.  This class is used as a simple
+// substrate for diagnostics, #include handling, and other low level things for
+// simple parsers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+
+namespace {
+  struct LineNoCacheTy {
+    int LastQueryBufferID;
+    const char *LastQuery;
+    unsigned LineNoOfQuery;
+  };
+}
+
+static LineNoCacheTy *getCache(void *Ptr) {
+  return (LineNoCacheTy*)Ptr;
+}
+
+
+SourceMgr::~SourceMgr() {
+  // Delete the line # cache if allocated.
+  if (LineNoCacheTy *Cache = getCache(LineNoCache))
+    delete Cache;
+
+  while (!Buffers.empty()) {
+    delete Buffers.back().Buffer;
+    Buffers.pop_back();
+  }
+}
+
+/// AddIncludeFile - Search for a file with the specified name in the current
+/// directory or in one of the IncludeDirs.  If no file is found, this returns
+/// ~0, otherwise it returns the buffer ID of the stacked file.
+unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
+                                   SMLoc IncludeLoc) {
+  OwningPtr<MemoryBuffer> NewBuf;
+  MemoryBuffer::getFile(Filename.c_str(), NewBuf);
+
+  // If the file didn't exist directly, see if it's in an include path.
+  for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
+    std::string IncFile = IncludeDirectories[i] + "/" + Filename;
+    MemoryBuffer::getFile(IncFile.c_str(), NewBuf);
+  }
+
+  if (NewBuf == 0) return ~0U;
+
+  return AddNewSourceBuffer(NewBuf.take(), IncludeLoc);
+}
+
+
+/// FindBufferContainingLoc - Return the ID of the buffer containing the
+/// specified location, returning -1 if not found.
+int SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
+  for (unsigned i = 0, e = Buffers.size(); i != e; ++i)
+    if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() &&
+        // Use <= here so that a pointer to the null at the end of the buffer
+        // is included as part of the buffer.
+        Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd())
+      return i;
+  return -1;
+}
+
+/// FindLineNumber - Find the line number for the specified location in the
+/// specified file.  This is not a fast method.
+unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const {
+  if (BufferID == -1) BufferID = FindBufferContainingLoc(Loc);
+  assert(BufferID != -1 && "Invalid Location!");
+
+  MemoryBuffer *Buff = getBufferInfo(BufferID).Buffer;
+
+  // Count the number of \n's between the start of the file and the specified
+  // location.
+  unsigned LineNo = 1;
+
+  const char *Ptr = Buff->getBufferStart();
+
+  // If we have a line number cache, and if the query is to a later point in the
+  // same file, start searching from the last query location.  This optimizes
+  // for the case when multiple diagnostics come out of one file in order.
+  if (LineNoCacheTy *Cache = getCache(LineNoCache))
+    if (Cache->LastQueryBufferID == BufferID &&
+        Cache->LastQuery <= Loc.getPointer()) {
+      Ptr = Cache->LastQuery;
+      LineNo = Cache->LineNoOfQuery;
+    }
+
+  // Scan for the location being queried, keeping track of the number of lines
+  // we see.
+  for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr)
+    if (*Ptr == '\n') ++LineNo;
+
+
+  // Allocate the line number cache if it doesn't exist.
+  if (LineNoCache == 0)
+    LineNoCache = new LineNoCacheTy();
+
+  // Update the line # cache.
+  LineNoCacheTy &Cache = *getCache(LineNoCache);
+  Cache.LastQueryBufferID = BufferID;
+  Cache.LastQuery = Ptr;
+  Cache.LineNoOfQuery = LineNo;
+  return LineNo;
+}
+
+void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
+  if (IncludeLoc == SMLoc()) return;  // Top of stack.
+
+  int CurBuf = FindBufferContainingLoc(IncludeLoc);
+  assert(CurBuf != -1 && "Invalid or unspecified location!");
+
+  PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
+
+  OS << "Included from "
+     << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
+     << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
+}
+
+
+/// GetMessage - Return an SMDiagnostic at the specified location with the
+/// specified string.
+///
+/// @param Type - If non-null, the kind of message (e.g., "error") which is
+/// prefixed to the message.
+SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const Twine &Msg,
+                                   const char *Type, bool ShowLine) const {
+
+  // First thing to do: find the current buffer containing the specified
+  // location.
+  int CurBuf = FindBufferContainingLoc(Loc);
+  assert(CurBuf != -1 && "Invalid or unspecified location!");
+
+  MemoryBuffer *CurMB = getBufferInfo(CurBuf).Buffer;
+
+  // Scan backward to find the start of the line.
+  const char *LineStart = Loc.getPointer();
+  while (LineStart != CurMB->getBufferStart() &&
+         LineStart[-1] != '\n' && LineStart[-1] != '\r')
+    --LineStart;
+
+  std::string LineStr;
+  if (ShowLine) {
+    // Get the end of the line.
+    const char *LineEnd = Loc.getPointer();
+    while (LineEnd != CurMB->getBufferEnd() &&
+           LineEnd[0] != '\n' && LineEnd[0] != '\r')
+      ++LineEnd;
+    LineStr = std::string(LineStart, LineEnd);
+  }
+
+  std::string PrintedMsg;
+  raw_string_ostream OS(PrintedMsg);
+  if (Type)
+    OS << Type << ": ";
+  OS << Msg;
+
+  return SMDiagnostic(*this, Loc,
+                      CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf),
+                      Loc.getPointer()-LineStart, OS.str(),
+                      LineStr, ShowLine);
+}
+
+void SourceMgr::PrintMessage(SMLoc Loc, const Twine &Msg,
+                             const char *Type, bool ShowLine) const {
+  // Report the message with the diagnostic handler if present.
+  if (DiagHandler) {
+    DiagHandler(GetMessage(Loc, Msg, Type, ShowLine), DiagContext);
+    return;
+  }
+
+  raw_ostream &OS = errs();
+
+  int CurBuf = FindBufferContainingLoc(Loc);
+  assert(CurBuf != -1 && "Invalid or unspecified location!");
+  PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
+
+  GetMessage(Loc, Msg, Type, ShowLine).Print(0, OS);
+}
+
+//===----------------------------------------------------------------------===//
+// SMDiagnostic Implementation
+//===----------------------------------------------------------------------===//
+
+void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) const {
+  if (ProgName && ProgName[0])
+    S << ProgName << ": ";
+
+  if (!Filename.empty()) {
+    if (Filename == "-")
+      S << "<stdin>";
+    else
+      S << Filename;
+
+    if (LineNo != -1) {
+      S << ':' << LineNo;
+      if (ColumnNo != -1)
+        S << ':' << (ColumnNo+1);
+    }
+    S << ": ";
+  }
+
+  S << Message << '\n';
+
+  if (LineNo != -1 && ColumnNo != -1 && ShowLine) {
+    S << LineContents << '\n';
+
+    // Print out spaces/tabs before the caret.
+    for (unsigned i = 0; i != unsigned(ColumnNo); ++i)
+      S << (LineContents[i] == '\t' ? '\t' : ' ');
+    S << "^\n";
+  }
+}
+
+
diff --git a/final/lib/Support/Statistic.cpp b/final/lib/Support/Statistic.cpp
new file mode 100644
index 00000000000..1e733d92e61
--- /dev/null
+++ b/final/lib/Support/Statistic.cpp
@@ -0,0 +1,152 @@
+//===-- Statistic.cpp - Easy way to expose stats information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the 'Statistic' class, which is designed to be an easy
+// way to expose various success metrics from passes.  These statistics are
+// printed at the end of a run, when the -stats command line option is enabled
+// on the command line.
+//
+// This is useful for reporting information like the number of instructions
+// simplified, optimized or removed by various transformations, like this:
+//
+// static Statistic NumInstEliminated("GCSE", "Number of instructions killed");
+//
+// Later, in the code: ++NumInstEliminated;
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cstring>
+using namespace llvm;
+
+// CreateInfoOutputFile - Return a file stream to print our output on.
+namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
+
+/// -stats - Command line option to cause transformations to emit stats about
+/// what they did.
+///
+static cl::opt<bool>
+Enabled("stats", cl::desc("Enable statistics output from program"));
+
+
+namespace {
+/// StatisticInfo - This class is used in a ManagedStatic so that it is created
+/// on demand (when the first statistic is bumped) and destroyed only when
+/// llvm_shutdown is called.  We print statistics from the destructor.
+class StatisticInfo {
+  std::vector<const Statistic*> Stats;
+  friend void llvm::PrintStatistics();
+  friend void llvm::PrintStatistics(raw_ostream &OS);
+public:
+  ~StatisticInfo();
+
+  void addStatistic(const Statistic *S) {
+    Stats.push_back(S);
+  }
+};
+}
+
+static ManagedStatic<StatisticInfo> StatInfo;
+static ManagedStatic<sys::SmartMutex<true> > StatLock;
+
+/// RegisterStatistic - The first time a statistic is bumped, this method is
+/// called.
+void Statistic::RegisterStatistic() {
+  // If stats are enabled, inform StatInfo that this statistic should be
+  // printed.
+  sys::SmartScopedLock<true> Writer(*StatLock);
+  if (!Initialized) {
+    if (Enabled)
+      StatInfo->addStatistic(this);
+
+    sys::MemoryFence();
+    // Remember we have been registered.
+    Initialized = true;
+  }
+}
+
+namespace {
+
+struct NameCompare {
+  bool operator()(const Statistic *LHS, const Statistic *RHS) const {
+    int Cmp = std::strcmp(LHS->getName(), RHS->getName());
+    if (Cmp != 0) return Cmp < 0;
+
+    // Secondary key is the description.
+    return std::strcmp(LHS->getDesc(), RHS->getDesc()) < 0;
+  }
+};
+
+}
+
+// Print information when destroyed, iff command line option is specified.
+StatisticInfo::~StatisticInfo() {
+  llvm::PrintStatistics();
+}
+
+void llvm::EnableStatistics() {
+  Enabled.setValue(true);
+}
+
+bool llvm::AreStatisticsEnabled() {
+  return Enabled;
+}
+
+void llvm::PrintStatistics(raw_ostream &OS) {
+  StatisticInfo &Stats = *StatInfo;
+
+  // Figure out how long the biggest Value and Name fields are.
+  unsigned MaxNameLen = 0, MaxValLen = 0;
+  for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) {
+    MaxValLen = std::max(MaxValLen,
+                         (unsigned)utostr(Stats.Stats[i]->getValue()).size());
+    MaxNameLen = std::max(MaxNameLen,
+                          (unsigned)std::strlen(Stats.Stats[i]->getName()));
+  }
+
+  // Sort the fields by name.
+  std::stable_sort(Stats.Stats.begin(), Stats.Stats.end(), NameCompare());
+
+  // Print out the statistics header...
+  OS << "===" << std::string(73, '-') << "===\n"
+     << "                          ... Statistics Collected ...\n"
+     << "===" << std::string(73, '-') << "===\n\n";
+
+  // Print all of the statistics.
+  for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) {
+    std::string CountStr = utostr(Stats.Stats[i]->getValue());
+    OS << std::string(MaxValLen-CountStr.size(), ' ')
+       << CountStr << " " << Stats.Stats[i]->getName()
+       << std::string(MaxNameLen-std::strlen(Stats.Stats[i]->getName()), ' ')
+       << " - " << Stats.Stats[i]->getDesc() << "\n";
+  }
+
+  OS << '\n';  // Flush the output stream.
+  OS.flush();
+
+}
+
+void llvm::PrintStatistics() {
+  StatisticInfo &Stats = *StatInfo;
+
+  // Statistics not enabled?
+  if (Stats.Stats.empty()) return;
+
+  // Get the stream to write to.
+  raw_ostream &OutStream = *CreateInfoOutputFile();
+  PrintStatistics(OutStream);
+  delete &OutStream;   // Close the file.
+}
diff --git a/final/lib/Support/StringExtras.cpp b/final/lib/Support/StringExtras.cpp
new file mode 100644
index 00000000000..eb2fa084218
--- /dev/null
+++ b/final/lib/Support/StringExtras.cpp
@@ -0,0 +1,81 @@
+//===-- StringExtras.cpp - Implement the StringExtras header --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringExtras.h header
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+/// StrInStrNoCase - Portable version of strcasestr.  Locates the first
+/// occurrence of string 's1' in string 's2', ignoring case.  Returns
+/// the offset of s2 in s1 or npos if s2 cannot be found.
+StringRef::size_type llvm::StrInStrNoCase(StringRef s1, StringRef s2) {
+  size_t N = s2.size(), M = s1.size();
+  if (N > M)
+    return StringRef::npos;
+  for (size_t i = 0, e = M - N + 1; i != e; ++i)
+    if (s1.substr(i, N).equals_lower(s2))
+      return i;
+  return StringRef::npos;
+}
+
+/// getToken - This function extracts one token from source, ignoring any
+/// leading characters that appear in the Delimiters string, and ending the
+/// token at any of the characters that appear in the Delimiters string.  If
+/// there are no tokens in the source string, an empty string is returned.
+/// The function returns a pair containing the extracted token and the
+/// remaining tail string.
+std::pair<StringRef, StringRef> llvm::getToken(StringRef Source,
+                                               StringRef Delimiters) {
+  // Figure out where the token starts.
+  StringRef::size_type Start = Source.find_first_not_of(Delimiters);
+
+  // Find the next occurrence of the delimiter.
+  StringRef::size_type End = Source.find_first_of(Delimiters, Start);
+
+  return std::make_pair(Source.slice(Start, End), Source.substr(End));
+}
+
+/// SplitString - Split up the specified string according to the specified
+/// delimiters, appending the result fragments to the output list.
+void llvm::SplitString(StringRef Source,
+                       SmallVectorImpl<StringRef> &OutFragments,
+                       StringRef Delimiters) {
+  StringRef S2, S;
+  tie(S2, S) = getToken(Source, Delimiters);
+  while (!S2.empty()) {
+    OutFragments.push_back(S2);
+    tie(S2, S) = getToken(S, Delimiters);
+  }
+}
+
+void llvm::StringRef::split(SmallVectorImpl<StringRef> &A,
+                            StringRef Separators, int MaxSplit,
+                            bool KeepEmpty) const {
+  StringRef rest = *this;
+
+  // rest.data() is used to distinguish cases like "a," that splits into
+  // "a" + "" and "a" that splits into "a" + 0.
+  for (int splits = 0;
+       rest.data() != NULL && (MaxSplit < 0 || splits < MaxSplit);
+       ++splits) {
+    std::pair<llvm::StringRef, llvm::StringRef> p = rest.split(Separators);
+
+    if (p.first.size() != 0 || KeepEmpty)
+      A.push_back(p.first);
+    rest = p.second;
+  }
+  // If we have a tail left, add it.
+  if (rest.data() != NULL && (rest.size() != 0 || KeepEmpty))
+    A.push_back(rest);
+}
diff --git a/final/lib/Support/StringMap.cpp b/final/lib/Support/StringMap.cpp
new file mode 100644
index 00000000000..90ec2995026
--- /dev/null
+++ b/final/lib/Support/StringMap.cpp
@@ -0,0 +1,215 @@
+//===--- StringMap.cpp - String Hash table map implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringMap class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include <cassert>
+using namespace llvm;
+
+StringMapImpl::StringMapImpl(unsigned InitSize, unsigned itemSize) {
+  ItemSize = itemSize;
+  
+  // If a size is specified, initialize the table with that many buckets.
+  if (InitSize) {
+    init(InitSize);
+    return;
+  }
+  
+  // Otherwise, initialize it with zero buckets to avoid the allocation.
+  TheTable = 0;
+  NumBuckets = 0;
+  NumItems = 0;
+  NumTombstones = 0;
+}
+
+void StringMapImpl::init(unsigned InitSize) {
+  assert((InitSize & (InitSize-1)) == 0 &&
+         "Init Size must be a power of 2 or zero!");
+  NumBuckets = InitSize ? InitSize : 16;
+  NumItems = 0;
+  NumTombstones = 0;
+  
+  TheTable = (ItemBucket*)calloc(NumBuckets+1, sizeof(ItemBucket));
+  
+  // Allocate one extra bucket, set it to look filled so the iterators stop at
+  // end.
+  TheTable[NumBuckets].Item = (StringMapEntryBase*)2;
+}
+
+
+/// LookupBucketFor - Look up the bucket that the specified string should end
+/// up in.  If it already exists as a key in the map, the Item pointer for the
+/// specified bucket will be non-null.  Otherwise, it will be null.  In either
+/// case, the FullHashValue field of the bucket will be set to the hash value
+/// of the string.
+unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
+  unsigned HTSize = NumBuckets;
+  if (HTSize == 0) {  // Hash table unallocated so far?
+    init(16);
+    HTSize = NumBuckets;
+  }
+  unsigned FullHashValue = HashString(Name);
+  unsigned BucketNo = FullHashValue & (HTSize-1);
+  
+  unsigned ProbeAmt = 1;
+  int FirstTombstone = -1;
+  while (1) {
+    ItemBucket &Bucket = TheTable[BucketNo];
+    StringMapEntryBase *BucketItem = Bucket.Item;
+    // If we found an empty bucket, this key isn't in the table yet, return it.
+    if (BucketItem == 0) {
+      // If we found a tombstone, we want to reuse the tombstone instead of an
+      // empty bucket.  This reduces probing.
+      if (FirstTombstone != -1) {
+        TheTable[FirstTombstone].FullHashValue = FullHashValue;
+        return FirstTombstone;
+      }
+      
+      Bucket.FullHashValue = FullHashValue;
+      return BucketNo;
+    }
+    
+    if (BucketItem == getTombstoneVal()) {
+      // Skip over tombstones.  However, remember the first one we see.
+      if (FirstTombstone == -1) FirstTombstone = BucketNo;
+    } else if (Bucket.FullHashValue == FullHashValue) {
+      // If the full hash value matches, check deeply for a match.  The common
+      // case here is that we are only looking at the buckets (for item info
+      // being non-null and for the full hash value) not at the items.  This
+      // is important for cache locality.
+      
+      // Do the comparison like this because Name isn't necessarily
+      // null-terminated!
+      char *ItemStr = (char*)BucketItem+ItemSize;
+      if (Name == StringRef(ItemStr, BucketItem->getKeyLength())) {
+        // We found a match!
+        return BucketNo;
+      }
+    }
+    
+    // Okay, we didn't find the item.  Probe to the next bucket.
+    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
+    
+    // Use quadratic probing, it has fewer clumping artifacts than linear
+    // probing and has good cache behavior in the common case.
+    ++ProbeAmt;
+  }
+}
+
+
+/// FindKey - Look up the bucket that contains the specified key. If it exists
+/// in the map, return the bucket number of the key.  Otherwise return -1.
+/// This does not modify the map.
+int StringMapImpl::FindKey(StringRef Key) const {
+  unsigned HTSize = NumBuckets;
+  if (HTSize == 0) return -1;  // Really empty table?
+  unsigned FullHashValue = HashString(Key);
+  unsigned BucketNo = FullHashValue & (HTSize-1);
+  
+  unsigned ProbeAmt = 1;
+  while (1) {
+    ItemBucket &Bucket = TheTable[BucketNo];
+    StringMapEntryBase *BucketItem = Bucket.Item;
+    // If we found an empty bucket, this key isn't in the table yet, return.
+    if (BucketItem == 0)
+      return -1;
+    
+    if (BucketItem == getTombstoneVal()) {
+      // Ignore tombstones.
+    } else if (Bucket.FullHashValue == FullHashValue) {
+      // If the full hash value matches, check deeply for a match.  The common
+      // case here is that we are only looking at the buckets (for item info
+      // being non-null and for the full hash value) not at the items.  This
+      // is important for cache locality.
+      
+      // Do the comparison like this because NameStart isn't necessarily
+      // null-terminated!
+      char *ItemStr = (char*)BucketItem+ItemSize;
+      if (Key == StringRef(ItemStr, BucketItem->getKeyLength())) {
+        // We found a match!
+        return BucketNo;
+      }
+    }
+    
+    // Okay, we didn't find the item.  Probe to the next bucket.
+    BucketNo = (BucketNo+ProbeAmt) & (HTSize-1);
+    
+    // Use quadratic probing, it has fewer clumping artifacts than linear
+    // probing and has good cache behavior in the common case.
+    ++ProbeAmt;
+  }
+}
+
+/// RemoveKey - Remove the specified StringMapEntry from the table, but do not
+/// delete it.  This aborts if the value isn't in the table.
+void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
+  const char *VStr = (char*)V + ItemSize;
+  StringMapEntryBase *V2 = RemoveKey(StringRef(VStr, V->getKeyLength()));
+  (void)V2;
+  assert(V == V2 && "Didn't find key?");
+}
+
+/// RemoveKey - Remove the StringMapEntry for the specified key from the
+/// table, returning it.  If the key is not in the table, this returns null.
+StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
+  int Bucket = FindKey(Key);
+  if (Bucket == -1) return 0;
+  
+  StringMapEntryBase *Result = TheTable[Bucket].Item;
+  TheTable[Bucket].Item = getTombstoneVal();
+  --NumItems;
+  ++NumTombstones;
+  return Result;
+}
+
+
+
+/// RehashTable - Grow the table, redistributing values into the buckets with
+/// the appropriate mod-of-hashtable-size.
+void StringMapImpl::RehashTable() {
+  unsigned NewSize = NumBuckets*2;
+  // Allocate one extra bucket which will always be non-empty.  This allows the
+  // iterators to stop at end.
+  ItemBucket *NewTableArray =(ItemBucket*)calloc(NewSize+1, sizeof(ItemBucket));
+  NewTableArray[NewSize].Item = (StringMapEntryBase*)2;
+  
+  // Rehash all the items into their new buckets.  Luckily :) we already have
+  // the hash values available, so we don't have to rehash any strings.
+  for (ItemBucket *IB = TheTable, *E = TheTable+NumBuckets; IB != E; ++IB) {
+    if (IB->Item && IB->Item != getTombstoneVal()) {
+      // Fast case, bucket available.
+      unsigned FullHash = IB->FullHashValue;
+      unsigned NewBucket = FullHash & (NewSize-1);
+      if (NewTableArray[NewBucket].Item == 0) {
+        NewTableArray[FullHash & (NewSize-1)].Item = IB->Item;
+        NewTableArray[FullHash & (NewSize-1)].FullHashValue = FullHash;
+        continue;
+      }
+      
+      // Otherwise probe for a spot.
+      unsigned ProbeSize = 1;
+      do {
+        NewBucket = (NewBucket + ProbeSize++) & (NewSize-1);
+      } while (NewTableArray[NewBucket].Item);
+      
+      // Finally found a slot.  Fill it in.
+      NewTableArray[NewBucket].Item = IB->Item;
+      NewTableArray[NewBucket].FullHashValue = FullHash;
+    }
+  }
+  
+  free(TheTable);
+  
+  TheTable = NewTableArray;
+  NumBuckets = NewSize;
+}
diff --git a/final/lib/Support/StringPool.cpp b/final/lib/Support/StringPool.cpp
new file mode 100644
index 00000000000..ff607cf8c4a
--- /dev/null
+++ b/final/lib/Support/StringPool.cpp
@@ -0,0 +1,35 @@
+//===-- StringPool.cpp - Interned string pool -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringPool class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/StringPool.h"
+#include "llvm/ADT/StringRef.h"
+
+using namespace llvm;
+
+StringPool::StringPool() {}
+
+StringPool::~StringPool() {
+  assert(InternTable.empty() && "PooledStringPtr leaked!");
+}
+
+PooledStringPtr StringPool::intern(StringRef Key) {
+  table_t::iterator I = InternTable.find(Key);
+  if (I != InternTable.end())
+    return PooledStringPtr(&*I);
+  
+  entry_t *S = entry_t::Create(Key.begin(), Key.end());
+  S->getValue().Pool = this;
+  InternTable.insert(S);
+  
+  return PooledStringPtr(S);
+}
diff --git a/final/lib/Support/StringRef.cpp b/final/lib/Support/StringRef.cpp
new file mode 100644
index 00000000000..53980519645
--- /dev/null
+++ b/final/lib/Support/StringRef.cpp
@@ -0,0 +1,415 @@
+//===-- StringRef.cpp - Lightweight String References ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/OwningPtr.h"
+#include <bitset>
+
+using namespace llvm;
+
+// MSVC emits references to this into the translation units which reference it.
+#ifndef _MSC_VER
+const size_t StringRef::npos;
+#endif
+
+static char ascii_tolower(char x) {
+  if (x >= 'A' && x <= 'Z')
+    return x - 'A' + 'a';
+  return x;
+}
+
+static bool ascii_isdigit(char x) {
+  return x >= '0' && x <= '9';
+}
+
+/// compare_lower - Compare strings, ignoring case.
+int StringRef::compare_lower(StringRef RHS) const {
+  for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
+    unsigned char LHC = ascii_tolower(Data[I]);
+    unsigned char RHC = ascii_tolower(RHS.Data[I]);
+    if (LHC != RHC)
+      return LHC < RHC ? -1 : 1;
+  }
+
+  if (Length == RHS.Length)
+    return 0;
+  return Length < RHS.Length ? -1 : 1;
+}
+
+/// compare_numeric - Compare strings, handle embedded numbers.
+int StringRef::compare_numeric(StringRef RHS) const {
+  for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
+    if (Data[I] == RHS.Data[I])
+      continue;
+    if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
+      // The longer sequence of numbers is larger. This doesn't really handle
+      // prefixed zeros well.
+      for (size_t J = I+1; J != E+1; ++J) {
+        bool ld = J < Length && ascii_isdigit(Data[J]);
+        bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
+        if (ld != rd)
+          return rd ? -1 : 1;
+        if (!rd)
+          break;
+      }
+    }
+    return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
+  }
+  if (Length == RHS.Length)
+    return 0;
+  return Length < RHS.Length ? -1 : 1;
+}
+
+// Compute the edit distance between the two given strings.
+unsigned StringRef::edit_distance(llvm::StringRef Other,
+                                  bool AllowReplacements,
+                                  unsigned MaxEditDistance) {
+  // The algorithm implemented below is the "classic"
+  // dynamic-programming algorithm for computing the Levenshtein
+  // distance, which is described here:
+  //
+  //   http://en.wikipedia.org/wiki/Levenshtein_distance
+  //
+  // Although the algorithm is typically described using an m x n
+  // array, only two rows are used at a time, so this implemenation
+  // just keeps two separate vectors for those two rows.
+  size_type m = size();
+  size_type n = Other.size();
+
+  const unsigned SmallBufferSize = 64;
+  unsigned SmallBuffer[SmallBufferSize];
+  llvm::OwningArrayPtr<unsigned> Allocated;
+  unsigned *previous = SmallBuffer;
+  if (2*(n + 1) > SmallBufferSize) {
+    previous = new unsigned [2*(n+1)];
+    Allocated.reset(previous);
+  }
+  unsigned *current = previous + (n + 1);
+
+  for (unsigned i = 0; i <= n; ++i)
+    previous[i] = i;
+
+  for (size_type y = 1; y <= m; ++y) {
+    current[0] = y;
+    unsigned BestThisRow = current[0];
+
+    for (size_type x = 1; x <= n; ++x) {
+      if (AllowReplacements) {
+        current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u),
+                         min(current[x-1], previous[x])+1);
+      }
+      else {
+        if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1];
+        else current[x] = min(current[x-1], previous[x]) + 1;
+      }
+      BestThisRow = min(BestThisRow, current[x]);
+    }
+
+    if (MaxEditDistance && BestThisRow > MaxEditDistance)
+      return MaxEditDistance + 1;
+
+    unsigned *tmp = current;
+    current = previous;
+    previous = tmp;
+  }
+
+  unsigned Result = previous[n];
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// String Searching
+//===----------------------------------------------------------------------===//
+
+
+/// find - Search for the first string \arg Str in the string.
+///
+/// \return - The index of the first occurence of \arg Str, or npos if not
+/// found.
+size_t StringRef::find(StringRef Str, size_t From) const {
+  size_t N = Str.size();
+  if (N > Length)
+    return npos;
+  for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i)
+    if (substr(i, N).equals(Str))
+      return i;
+  return npos;
+}
+
+/// rfind - Search for the last string \arg Str in the string.
+///
+/// \return - The index of the last occurence of \arg Str, or npos if not
+/// found.
+size_t StringRef::rfind(StringRef Str) const {
+  size_t N = Str.size();
+  if (N > Length)
+    return npos;
+  for (size_t i = Length - N + 1, e = 0; i != e;) {
+    --i;
+    if (substr(i, N).equals(Str))
+      return i;
+  }
+  return npos;
+}
+
+/// find_first_of - Find the first character in the string that is in \arg
+/// Chars, or npos if not found.
+///
+/// Note: O(size() + Chars.size())
+StringRef::size_type StringRef::find_first_of(StringRef Chars,
+                                              size_t From) const {
+  std::bitset<1 << CHAR_BIT> CharBits;
+  for (size_type i = 0; i != Chars.size(); ++i)
+    CharBits.set((unsigned char)Chars[i]);
+
+  for (size_type i = min(From, Length), e = Length; i != e; ++i)
+    if (CharBits.test((unsigned char)Data[i]))
+      return i;
+  return npos;
+}
+
+/// find_first_not_of - Find the first character in the string that is not
+/// \arg C or npos if not found.
+StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
+  for (size_type i = min(From, Length), e = Length; i != e; ++i)
+    if (Data[i] != C)
+      return i;
+  return npos;
+}
+
+/// find_first_not_of - Find the first character in the string that is not
+/// in the string \arg Chars, or npos if not found.
+///
+/// Note: O(size() + Chars.size())
+StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
+                                                  size_t From) const {
+  std::bitset<1 << CHAR_BIT> CharBits;
+  for (size_type i = 0; i != Chars.size(); ++i)
+    CharBits.set((unsigned char)Chars[i]);
+
+  for (size_type i = min(From, Length), e = Length; i != e; ++i)
+    if (!CharBits.test((unsigned char)Data[i]))
+      return i;
+  return npos;
+}
+
+/// find_last_of - Find the last character in the string that is in \arg C,
+/// or npos if not found.
+///
+/// Note: O(size() + Chars.size())
+StringRef::size_type StringRef::find_last_of(StringRef Chars,
+                                             size_t From) const {
+  std::bitset<1 << CHAR_BIT> CharBits;
+  for (size_type i = 0; i != Chars.size(); ++i)
+    CharBits.set((unsigned char)Chars[i]);
+
+  for (size_type i = min(From, Length) - 1, e = -1; i != e; --i)
+    if (CharBits.test((unsigned char)Data[i]))
+      return i;
+  return npos;
+}
+
+//===----------------------------------------------------------------------===//
+// Helpful Algorithms
+//===----------------------------------------------------------------------===//
+
+/// count - Return the number of non-overlapped occurrences of \arg Str in
+/// the string.
+size_t StringRef::count(StringRef Str) const {
+  size_t Count = 0;
+  size_t N = Str.size();
+  if (N > Length)
+    return 0;
+  for (size_t i = 0, e = Length - N + 1; i != e; ++i)
+    if (substr(i, N).equals(Str))
+      ++Count;
+  return Count;
+}
+
+static unsigned GetAutoSenseRadix(StringRef &Str) {
+  if (Str.startswith("0x")) {
+    Str = Str.substr(2);
+    return 16;
+  } else if (Str.startswith("0b")) {
+    Str = Str.substr(2);
+    return 2;
+  } else if (Str.startswith("0")) {
+    return 8;
+  } else {
+    return 10;
+  }
+}
+
+
+/// GetAsUnsignedInteger - Workhorse method that converts a integer character
+/// sequence of radix up to 36 to an unsigned long long value.
+static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
+                                 unsigned long long &Result) {
+  // Autosense radix if not specified.
+  if (Radix == 0)
+    Radix = GetAutoSenseRadix(Str);
+
+  // Empty strings (after the radix autosense) are invalid.
+  if (Str.empty()) return true;
+
+  // Parse all the bytes of the string given this radix.  Watch for overflow.
+  Result = 0;
+  while (!Str.empty()) {
+    unsigned CharVal;
+    if (Str[0] >= '0' && Str[0] <= '9')
+      CharVal = Str[0]-'0';
+    else if (Str[0] >= 'a' && Str[0] <= 'z')
+      CharVal = Str[0]-'a'+10;
+    else if (Str[0] >= 'A' && Str[0] <= 'Z')
+      CharVal = Str[0]-'A'+10;
+    else
+      return true;
+
+    // If the parsed value is larger than the integer radix, the string is
+    // invalid.
+    if (CharVal >= Radix)
+      return true;
+
+    // Add in this character.
+    unsigned long long PrevResult = Result;
+    Result = Result*Radix+CharVal;
+
+    // Check for overflow.
+    if (Result < PrevResult)
+      return true;
+
+    Str = Str.substr(1);
+  }
+
+  return false;
+}
+
+bool StringRef::getAsInteger(unsigned Radix, unsigned long long &Result) const {
+  return GetAsUnsignedInteger(*this, Radix, Result);
+}
+
+
+bool StringRef::getAsInteger(unsigned Radix, long long &Result) const {
+  unsigned long long ULLVal;
+
+  // Handle positive strings first.
+  if (empty() || front() != '-') {
+    if (GetAsUnsignedInteger(*this, Radix, ULLVal) ||
+        // Check for value so large it overflows a signed value.
+        (long long)ULLVal < 0)
+      return true;
+    Result = ULLVal;
+    return false;
+  }
+
+  // Get the positive part of the value.
+  if (GetAsUnsignedInteger(substr(1), Radix, ULLVal) ||
+      // Reject values so large they'd overflow as negative signed, but allow
+      // "-0".  This negates the unsigned so that the negative isn't undefined
+      // on signed overflow.
+      (long long)-ULLVal > 0)
+    return true;
+
+  Result = -ULLVal;
+  return false;
+}
+
+bool StringRef::getAsInteger(unsigned Radix, int &Result) const {
+  long long Val;
+  if (getAsInteger(Radix, Val) ||
+      (int)Val != Val)
+    return true;
+  Result = Val;
+  return false;
+}
+
+bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
+  unsigned long long Val;
+  if (getAsInteger(Radix, Val) ||
+      (unsigned)Val != Val)
+    return true;
+  Result = Val;
+  return false;
+}
+
+bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
+  StringRef Str = *this;
+
+  // Autosense radix if not specified.
+  if (Radix == 0)
+    Radix = GetAutoSenseRadix(Str);
+
+  assert(Radix > 1 && Radix <= 36);
+
+  // Empty strings (after the radix autosense) are invalid.
+  if (Str.empty()) return true;
+
+  // Skip leading zeroes.  This can be a significant improvement if
+  // it means we don't need > 64 bits.
+  while (!Str.empty() && Str.front() == '0')
+    Str = Str.substr(1);
+
+  // If it was nothing but zeroes....
+  if (Str.empty()) {
+    Result = APInt(64, 0);
+    return false;
+  }
+
+  // (Over-)estimate the required number of bits.
+  unsigned Log2Radix = 0;
+  while ((1U << Log2Radix) < Radix) Log2Radix++;
+  bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
+
+  unsigned BitWidth = Log2Radix * Str.size();
+  if (BitWidth < Result.getBitWidth())
+    BitWidth = Result.getBitWidth(); // don't shrink the result
+  else
+    Result = Result.zext(BitWidth);
+
+  APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
+  if (!IsPowerOf2Radix) {
+    // These must have the same bit-width as Result.
+    RadixAP = APInt(BitWidth, Radix);
+    CharAP = APInt(BitWidth, 0);
+  }
+
+  // Parse all the bytes of the string given this radix.
+  Result = 0;
+  while (!Str.empty()) {
+    unsigned CharVal;
+    if (Str[0] >= '0' && Str[0] <= '9')
+      CharVal = Str[0]-'0';
+    else if (Str[0] >= 'a' && Str[0] <= 'z')
+      CharVal = Str[0]-'a'+10;
+    else if (Str[0] >= 'A' && Str[0] <= 'Z')
+      CharVal = Str[0]-'A'+10;
+    else
+      return true;
+
+    // If the parsed value is larger than the integer radix, the string is
+    // invalid.
+    if (CharVal >= Radix)
+      return true;
+
+    // Add in this character.
+    if (IsPowerOf2Radix) {
+      Result <<= Log2Radix;
+      Result |= CharVal;
+    } else {
+      Result *= RadixAP;
+      CharAP = CharVal;
+      Result += CharAP;
+    }
+
+    Str = Str.substr(1);
+  }
+
+  return false;
+}
diff --git a/final/lib/Support/SystemUtils.cpp b/final/lib/Support/SystemUtils.cpp
new file mode 100644
index 00000000000..54b5e97bfe1
--- /dev/null
+++ b/final/lib/Support/SystemUtils.cpp
@@ -0,0 +1,55 @@
+//===- SystemUtils.cpp - Utilities for low-level system tasks -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains functions used to do a variety of low-level, often
+// system-specific, tasks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+bool llvm::CheckBitcodeOutputToConsole(raw_ostream &stream_to_check,
+                                       bool print_warning) {
+  if (stream_to_check.is_displayed()) {
+    if (print_warning) {
+      errs() << "WARNING: You're attempting to print out a bitcode file.\n"
+                "This is inadvisable as it may cause display problems. If\n"
+                "you REALLY want to taste LLVM bitcode first-hand, you\n"
+                "can force output with the `-f' option.\n\n";
+    }
+    return true;
+  }
+  return false;
+}
+
+/// PrependMainExecutablePath - Prepend the path to the program being executed
+/// to \p ExeName, given the value of argv[0] and the address of main()
+/// itself. This allows us to find another LLVM tool if it is built in the same
+/// directory. An empty string is returned on error; note that this function
+/// just mainpulates the path and doesn't check for executability.
+/// @brief Find a named executable.
+sys::Path llvm::PrependMainExecutablePath(const std::string &ExeName,
+                                          const char *Argv0, void *MainAddr) {
+  // Check the directory that the calling program is in.  We can do
+  // this if ProgramPath contains at least one / character, indicating that it
+  // is a relative path to the executable itself.
+  sys::Path Result = sys::Path::GetMainExecutable(Argv0, MainAddr);
+  Result.eraseComponent();
+
+  if (!Result.isEmpty()) {
+    Result.appendComponent(ExeName);
+    Result.appendSuffix(sys::Path::GetEXESuffix());
+  }
+
+  return Result;
+}
diff --git a/final/lib/Support/TargetRegistry.cpp b/final/lib/Support/TargetRegistry.cpp
new file mode 100644
index 00000000000..293a5d7a016
--- /dev/null
+++ b/final/lib/Support/TargetRegistry.cpp
@@ -0,0 +1,92 @@
+//===--- TargetRegistry.cpp - Target registration -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Host.h"
+#include <cassert>
+using namespace llvm;
+
+// Clients are responsible for avoid race conditions in registration.
+static Target *FirstTarget = 0;
+
+TargetRegistry::iterator TargetRegistry::begin() {
+  return iterator(FirstTarget);
+}
+
+const Target *TargetRegistry::lookupTarget(const std::string &TT,
+                                           std::string &Error) {
+  // Provide special warning when no targets are initialized.
+  if (begin() == end()) {
+    Error = "Unable to find target for this triple (no targets are registered)";
+    return 0;
+  }
+  const Target *Best = 0, *EquallyBest = 0;
+  unsigned BestQuality = 0;
+  for (iterator it = begin(), ie = end(); it != ie; ++it) {
+    if (unsigned Qual = it->TripleMatchQualityFn(TT)) {
+      if (!Best || Qual > BestQuality) {
+        Best = &*it;
+        EquallyBest = 0;
+        BestQuality = Qual;
+      } else if (Qual == BestQuality)
+        EquallyBest = &*it;
+    }
+  }
+
+  if (!Best) {
+    Error = "No available targets are compatible with this triple, "
+      "see -version for the available targets.";
+    return 0;
+  }
+
+  // Otherwise, take the best target, but make sure we don't have two equally
+  // good best targets.
+  if (EquallyBest) {
+    Error = std::string("Cannot choose between targets \"") +
+      Best->Name  + "\" and \"" + EquallyBest->Name + "\"";
+    return 0;
+  }
+
+  return Best;
+}
+
+void TargetRegistry::RegisterTarget(Target &T,
+                                    const char *Name,
+                                    const char *ShortDesc,
+                                    Target::TripleMatchQualityFnTy TQualityFn,
+                                    bool HasJIT) {
+  assert(Name && ShortDesc && TQualityFn &&
+         "Missing required target information!");
+
+  // Check if this target has already been initialized, we allow this as a
+  // convenience to some clients.
+  if (T.Name)
+    return;
+         
+  // Add to the list of targets.
+  T.Next = FirstTarget;
+  FirstTarget = &T;
+
+  T.Name = Name;
+  T.ShortDesc = ShortDesc;
+  T.TripleMatchQualityFn = TQualityFn;
+  T.HasJIT = HasJIT;
+}
+
+const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) {
+  const Target *TheTarget = lookupTarget(sys::getHostTriple(), Error);
+
+  if (TheTarget && !TheTarget->hasJIT()) {
+    Error = "No JIT compatible target available for this host";
+    return 0;
+  }
+
+  return TheTarget;
+}
+
diff --git a/final/lib/Support/ThreadLocal.cpp b/final/lib/Support/ThreadLocal.cpp
new file mode 100644
index 00000000000..6b43048da15
--- /dev/null
+++ b/final/lib/Support/ThreadLocal.cpp
@@ -0,0 +1,84 @@
+//===- ThreadLocal.cpp - Thread Local Data ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/ThreadLocal.h"
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+ThreadLocalImpl::ThreadLocalImpl() { }
+ThreadLocalImpl::~ThreadLocalImpl() { }
+void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
+const void* ThreadLocalImpl::getInstance() { return data; }
+void ThreadLocalImpl::removeInstance() { data = 0; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_GETSPECIFIC)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+ThreadLocalImpl::ThreadLocalImpl() : data(0) {
+  pthread_key_t* key = new pthread_key_t;
+  int errorcode = pthread_key_create(key, NULL);
+  assert(errorcode == 0);
+  (void) errorcode;
+  data = (void*)key;
+}
+
+ThreadLocalImpl::~ThreadLocalImpl() {
+  pthread_key_t* key = static_cast<pthread_key_t*>(data);
+  int errorcode = pthread_key_delete(*key);
+  assert(errorcode == 0);
+  (void) errorcode;
+  delete key;
+}
+
+void ThreadLocalImpl::setInstance(const void* d) {
+  pthread_key_t* key = static_cast<pthread_key_t*>(data);
+  int errorcode = pthread_setspecific(*key, d);
+  assert(errorcode == 0);
+  (void) errorcode;
+}
+
+const void* ThreadLocalImpl::getInstance() {
+  pthread_key_t* key = static_cast<pthread_key_t*>(data);
+  return pthread_getspecific(*key);
+}
+
+void ThreadLocalImpl::removeInstance() {
+  setInstance(0);
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/ThreadLocal.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Windows/ThreadLocal.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/ThreadLocal.cpp
+#endif
+#endif
diff --git a/final/lib/Support/Threading.cpp b/final/lib/Support/Threading.cpp
new file mode 100644
index 00000000000..29579567ac6
--- /dev/null
+++ b/final/lib/Support/Threading.cpp
@@ -0,0 +1,116 @@
+//===-- llvm/Support/Threading.cpp- Control multithreading mode --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements llvm_start_multithreaded() and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Config/config.h"
+#include <cassert>
+
+using namespace llvm;
+
+static bool multithreaded_mode = false;
+
+static sys::Mutex* global_lock = 0;
+
+bool llvm::llvm_start_multithreaded() {
+#ifdef LLVM_MULTITHREADED
+  assert(!multithreaded_mode && "Already multithreaded!");
+  multithreaded_mode = true;
+  global_lock = new sys::Mutex(true);
+
+  // We fence here to ensure that all initialization is complete BEFORE we
+  // return from llvm_start_multithreaded().
+  sys::MemoryFence();
+  return true;
+#else
+  return false;
+#endif
+}
+
+void llvm::llvm_stop_multithreaded() {
+#ifdef LLVM_MULTITHREADED
+  assert(multithreaded_mode && "Not currently multithreaded!");
+
+  // We fence here to insure that all threaded operations are complete BEFORE we
+  // return from llvm_stop_multithreaded().
+  sys::MemoryFence();
+
+  multithreaded_mode = false;
+  delete global_lock;
+#endif
+}
+
+bool llvm::llvm_is_multithreaded() {
+  return multithreaded_mode;
+}
+
+void llvm::llvm_acquire_global_lock() {
+  if (multithreaded_mode) global_lock->acquire();
+}
+
+void llvm::llvm_release_global_lock() {
+  if (multithreaded_mode) global_lock->release();
+}
+
+#if defined(LLVM_MULTITHREADED) && defined(HAVE_PTHREAD_H)
+#include <pthread.h>
+
+struct ThreadInfo {
+  void (*UserFn)(void *);
+  void *UserData;
+};
+static void *ExecuteOnThread_Dispatch(void *Arg) {
+  ThreadInfo *TI = reinterpret_cast<ThreadInfo*>(Arg);
+  TI->UserFn(TI->UserData);
+  return 0;
+}
+
+void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
+                                  unsigned RequestedStackSize) {
+  ThreadInfo Info = { Fn, UserData };
+  pthread_attr_t Attr;
+  pthread_t Thread;
+
+  // Construct the attributes object.
+  if (::pthread_attr_init(&Attr) != 0)
+    return;
+
+  // Set the requested stack size, if given.
+  if (RequestedStackSize != 0) {
+    if (::pthread_attr_setstacksize(&Attr, RequestedStackSize) != 0)
+      goto error;
+  }
+
+  // Construct and execute the thread.
+  if (::pthread_create(&Thread, &Attr, ExecuteOnThread_Dispatch, &Info) != 0)
+    goto error;
+
+  // Wait for the thread and clean up.
+  ::pthread_join(Thread, 0);
+
+ error:
+  ::pthread_attr_destroy(&Attr);
+}
+
+#else
+
+// No non-pthread implementation, currently.
+
+void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
+                                  unsigned RequestedStackSize) {
+  (void) RequestedStackSize;
+  Fn(UserData);
+}
+
+#endif
diff --git a/final/lib/Support/TimeValue.cpp b/final/lib/Support/TimeValue.cpp
new file mode 100644
index 00000000000..1a0f7bc3639
--- /dev/null
+++ b/final/lib/Support/TimeValue.cpp
@@ -0,0 +1,57 @@
+//===-- TimeValue.cpp - Implement OS TimeValue Concept ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the operating system TimeValue concept.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TimeValue.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+using namespace sys;
+
+const TimeValue TimeValue::MinTime       = TimeValue ( INT64_MIN,0 );
+const TimeValue TimeValue::MaxTime       = TimeValue ( INT64_MAX,0 );
+const TimeValue TimeValue::ZeroTime      = TimeValue ( 0,0 );
+const TimeValue TimeValue::PosixZeroTime = TimeValue ( -946684800,0 );
+const TimeValue TimeValue::Win32ZeroTime = TimeValue ( -12591158400ULL,0 );
+
+void
+TimeValue::normalize( void ) {
+  if ( nanos_ >= NANOSECONDS_PER_SECOND ) {
+    do {
+      seconds_++;
+      nanos_ -= NANOSECONDS_PER_SECOND;
+    } while ( nanos_ >= NANOSECONDS_PER_SECOND );
+  } else if (nanos_ <= -NANOSECONDS_PER_SECOND ) {
+    do {
+      seconds_--;
+      nanos_ += NANOSECONDS_PER_SECOND;
+    } while (nanos_ <= -NANOSECONDS_PER_SECOND);
+  }
+
+  if (seconds_ >= 1 && nanos_ < 0) {
+    seconds_--;
+    nanos_ += NANOSECONDS_PER_SECOND;
+  } else if (seconds_ < 0 && nanos_ > 0) {
+    seconds_++;
+    nanos_ -= NANOSECONDS_PER_SECOND;
+  }
+}
+
+}
+
+/// Include the platform specific portion of TimeValue class
+#ifdef LLVM_ON_UNIX
+#include "Unix/TimeValue.inc"
+#endif
+#ifdef LLVM_ON_WIN32
+#include "Windows/TimeValue.inc"
+#endif
diff --git a/final/lib/Support/Timer.cpp b/final/lib/Support/Timer.cpp
new file mode 100644
index 00000000000..a9ed5eecfa7
--- /dev/null
+++ b/final/lib/Support/Timer.cpp
@@ -0,0 +1,393 @@
+//===-- Timer.cpp - Interval Timing Support -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interval Timing implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Timer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Process.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
+using namespace llvm;
+
+// CreateInfoOutputFile - Return a file stream to print our output on.
+namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
+
+// getLibSupportInfoOutputFilename - This ugly hack is brought to you courtesy
+// of constructor/destructor ordering being unspecified by C++.  Basically the
+// problem is that a Statistic object gets destroyed, which ends up calling
+// 'GetLibSupportInfoOutputFile()' (below), which calls this function.
+// LibSupportInfoOutputFilename used to be a global variable, but sometimes it
+// would get destroyed before the Statistic, causing havoc to ensue.  We "fix"
+// this by creating the string the first time it is needed and never destroying
+// it.
+static ManagedStatic<std::string> LibSupportInfoOutputFilename;
+static std::string &getLibSupportInfoOutputFilename() {
+  return *LibSupportInfoOutputFilename;
+}
+
+static ManagedStatic<sys::SmartMutex<true> > TimerLock;
+
+namespace {
+  static cl::opt<bool>
+  TrackSpace("track-memory", cl::desc("Enable -time-passes memory "
+                                      "tracking (this may be slow)"),
+             cl::Hidden);
+
+  static cl::opt<std::string, true>
+  InfoOutputFilename("info-output-file", cl::value_desc("filename"),
+                     cl::desc("File to append -stats and -timer output to"),
+                   cl::Hidden, cl::location(getLibSupportInfoOutputFilename()));
+}
+
+// CreateInfoOutputFile - Return a file stream to print our output on.
+raw_ostream *llvm::CreateInfoOutputFile() {
+  const std::string &OutputFilename = getLibSupportInfoOutputFilename();
+  if (OutputFilename.empty())
+    return new raw_fd_ostream(2, false); // stderr.
+  if (OutputFilename == "-")
+    return new raw_fd_ostream(1, false); // stdout.
+  
+  // Append mode is used because the info output file is opened and closed
+  // each time -stats or -time-passes wants to print output to it. To
+  // compensate for this, the test-suite Makefiles have code to delete the
+  // info output file before running commands which write to it.
+  std::string Error;
+  raw_ostream *Result = new raw_fd_ostream(OutputFilename.c_str(),
+                                           Error, raw_fd_ostream::F_Append);
+  if (Error.empty())
+    return Result;
+  
+  errs() << "Error opening info-output-file '"
+    << OutputFilename << " for appending!\n";
+  delete Result;
+  return new raw_fd_ostream(2, false); // stderr.
+}
+
+
+static TimerGroup *DefaultTimerGroup = 0;
+static TimerGroup *getDefaultTimerGroup() {
+  TimerGroup *tmp = DefaultTimerGroup;
+  sys::MemoryFence();
+  if (tmp) return tmp;
+  
+  llvm_acquire_global_lock();
+  tmp = DefaultTimerGroup;
+  if (!tmp) {
+    tmp = new TimerGroup("Miscellaneous Ungrouped Timers");
+    sys::MemoryFence();
+    DefaultTimerGroup = tmp;
+  }
+  llvm_release_global_lock();
+
+  return tmp;
+}
+
+//===----------------------------------------------------------------------===//
+// Timer Implementation
+//===----------------------------------------------------------------------===//
+
+void Timer::init(StringRef N) {
+  assert(TG == 0 && "Timer already initialized");
+  Name.assign(N.begin(), N.end());
+  Started = false;
+  TG = getDefaultTimerGroup();
+  TG->addTimer(*this);
+}
+
+void Timer::init(StringRef N, TimerGroup &tg) {
+  assert(TG == 0 && "Timer already initialized");
+  Name.assign(N.begin(), N.end());
+  Started = false;
+  TG = &tg;
+  TG->addTimer(*this);
+}
+
+Timer::~Timer() {
+  if (!TG) return;  // Never initialized, or already cleared.
+  TG->removeTimer(*this);
+}
+
+static inline size_t getMemUsage() {
+  if (!TrackSpace) return 0;
+  return sys::Process::GetMallocUsage();
+}
+
+TimeRecord TimeRecord::getCurrentTime(bool Start) {
+  TimeRecord Result;
+  sys::TimeValue now(0,0), user(0,0), sys(0,0);
+  
+  if (Start) {
+    Result.MemUsed = getMemUsage();
+    sys::Process::GetTimeUsage(now, user, sys);
+  } else {
+    sys::Process::GetTimeUsage(now, user, sys);
+    Result.MemUsed = getMemUsage();
+  }
+
+  Result.WallTime   =  now.seconds() +  now.microseconds() / 1000000.0;
+  Result.UserTime   = user.seconds() + user.microseconds() / 1000000.0;
+  Result.SystemTime =  sys.seconds() +  sys.microseconds() / 1000000.0;
+  return Result;
+}
+
+static ManagedStatic<std::vector<Timer*> > ActiveTimers;
+
+void Timer::startTimer() {
+  Started = true;
+  ActiveTimers->push_back(this);
+  Time -= TimeRecord::getCurrentTime(true);
+}
+
+void Timer::stopTimer() {
+  Time += TimeRecord::getCurrentTime(false);
+
+  if (ActiveTimers->back() == this) {
+    ActiveTimers->pop_back();
+  } else {
+    std::vector<Timer*>::iterator I =
+      std::find(ActiveTimers->begin(), ActiveTimers->end(), this);
+    assert(I != ActiveTimers->end() && "stop but no startTimer?");
+    ActiveTimers->erase(I);
+  }
+}
+
+static void printVal(double Val, double Total, raw_ostream &OS) {
+  if (Total < 1e-7)   // Avoid dividing by zero.
+    OS << "        -----     ";
+  else {
+    OS << "  " << format("%7.4f", Val) << " (";
+    OS << format("%5.1f", Val*100/Total) << "%)";
+  }
+}
+
+void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const {
+  if (Total.getUserTime())
+    printVal(getUserTime(), Total.getUserTime(), OS);
+  if (Total.getSystemTime())
+    printVal(getSystemTime(), Total.getSystemTime(), OS);
+  if (Total.getProcessTime())
+    printVal(getProcessTime(), Total.getProcessTime(), OS);
+  printVal(getWallTime(), Total.getWallTime(), OS);
+  
+  OS << "  ";
+  
+  if (Total.getMemUsed())
+    OS << format("%9lld", (long long)getMemUsed()) << "  ";
+}
+
+
+//===----------------------------------------------------------------------===//
+//   NamedRegionTimer Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+typedef StringMap<Timer> Name2TimerMap;
+
+class Name2PairMap {
+  StringMap<std::pair<TimerGroup*, Name2TimerMap> > Map;
+public:
+  ~Name2PairMap() {
+    for (StringMap<std::pair<TimerGroup*, Name2TimerMap> >::iterator
+         I = Map.begin(), E = Map.end(); I != E; ++I)
+      delete I->second.first;
+  }
+  
+  Timer &get(StringRef Name, StringRef GroupName) {
+    sys::SmartScopedLock<true> L(*TimerLock);
+    
+    std::pair<TimerGroup*, Name2TimerMap> &GroupEntry = Map[GroupName];
+    
+    if (!GroupEntry.first)
+      GroupEntry.first = new TimerGroup(GroupName);
+    
+    Timer &T = GroupEntry.second[Name];
+    if (!T.isInitialized())
+      T.init(Name, *GroupEntry.first);
+    return T;
+  }
+};
+
+}
+
+static ManagedStatic<Name2TimerMap> NamedTimers;
+static ManagedStatic<Name2PairMap> NamedGroupedTimers;
+
+static Timer &getNamedRegionTimer(StringRef Name) {
+  sys::SmartScopedLock<true> L(*TimerLock);
+  
+  Timer &T = (*NamedTimers)[Name];
+  if (!T.isInitialized())
+    T.init(Name);
+  return T;
+}
+
+NamedRegionTimer::NamedRegionTimer(StringRef Name,
+                                   bool Enabled)
+  : TimeRegion(!Enabled ? 0 : &getNamedRegionTimer(Name)) {}
+
+NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName,
+                                   bool Enabled)
+  : TimeRegion(!Enabled ? 0 : &NamedGroupedTimers->get(Name, GroupName)) {}
+
+//===----------------------------------------------------------------------===//
+//   TimerGroup Implementation
+//===----------------------------------------------------------------------===//
+
+/// TimerGroupList - This is the global list of TimerGroups, maintained by the
+/// TimerGroup ctor/dtor and is protected by the TimerLock lock.
+static TimerGroup *TimerGroupList = 0;
+
+TimerGroup::TimerGroup(StringRef name)
+  : Name(name.begin(), name.end()), FirstTimer(0) {
+    
+  // Add the group to TimerGroupList.
+  sys::SmartScopedLock<true> L(*TimerLock);
+  if (TimerGroupList)
+    TimerGroupList->Prev = &Next;
+  Next = TimerGroupList;
+  Prev = &TimerGroupList;
+  TimerGroupList = this;
+}
+
+TimerGroup::~TimerGroup() {
+  // If the timer group is destroyed before the timers it owns, accumulate and
+  // print the timing data.
+  while (FirstTimer != 0)
+    removeTimer(*FirstTimer);
+  
+  // Remove the group from the TimerGroupList.
+  sys::SmartScopedLock<true> L(*TimerLock);
+  *Prev = Next;
+  if (Next)
+    Next->Prev = Prev;
+}
+
+
+void TimerGroup::removeTimer(Timer &T) {
+  sys::SmartScopedLock<true> L(*TimerLock);
+  
+  // If the timer was started, move its data to TimersToPrint.
+  if (T.Started)
+    TimersToPrint.push_back(std::make_pair(T.Time, T.Name));
+
+  T.TG = 0;
+  
+  // Unlink the timer from our list.
+  *T.Prev = T.Next;
+  if (T.Next)
+    T.Next->Prev = T.Prev;
+  
+  // Print the report when all timers in this group are destroyed if some of
+  // them were started.
+  if (FirstTimer != 0 || TimersToPrint.empty())
+    return;
+  
+  raw_ostream *OutStream = CreateInfoOutputFile();
+  PrintQueuedTimers(*OutStream);
+  delete OutStream;   // Close the file.
+}
+
+void TimerGroup::addTimer(Timer &T) {
+  sys::SmartScopedLock<true> L(*TimerLock);
+  
+  // Add the timer to our list.
+  if (FirstTimer)
+    FirstTimer->Prev = &T.Next;
+  T.Next = FirstTimer;
+  T.Prev = &FirstTimer;
+  FirstTimer = &T;
+}
+
+void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
+  // Sort the timers in descending order by amount of time taken.
+  std::sort(TimersToPrint.begin(), TimersToPrint.end());
+  
+  TimeRecord Total;
+  for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i)
+    Total += TimersToPrint[i].first;
+  
+  // Print out timing header.
+  OS << "===" << std::string(73, '-') << "===\n";
+  // Figure out how many spaces to indent TimerGroup name.
+  unsigned Padding = (80-Name.length())/2;
+  if (Padding > 80) Padding = 0;         // Don't allow "negative" numbers
+  OS.indent(Padding) << Name << '\n';
+  OS << "===" << std::string(73, '-') << "===\n";
+  
+  // If this is not an collection of ungrouped times, print the total time.
+  // Ungrouped timers don't really make sense to add up.  We still print the
+  // TOTAL line to make the percentages make sense.
+  if (this != DefaultTimerGroup) {
+    OS << "  Total Execution Time: ";
+    OS << format("%5.4f", Total.getProcessTime()) << " seconds (";
+    OS << format("%5.4f", Total.getWallTime()) << " wall clock)\n";
+  }
+  OS << '\n';
+  
+  if (Total.getUserTime())
+    OS << "   ---User Time---";
+  if (Total.getSystemTime())
+    OS << "   --System Time--";
+  if (Total.getProcessTime())
+    OS << "   --User+System--";
+  OS << "   ---Wall Time---";
+  if (Total.getMemUsed())
+    OS << "  ---Mem---";
+  OS << "  --- Name ---\n";
+  
+  // Loop through all of the timing data, printing it out.
+  for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i) {
+    const std::pair<TimeRecord, std::string> &Entry = TimersToPrint[e-i-1];
+    Entry.first.print(Total, OS);
+    OS << Entry.second << '\n';
+  }
+  
+  Total.print(Total, OS);
+  OS << "Total\n\n";
+  OS.flush();
+  
+  TimersToPrint.clear();
+}
+
+/// print - Print any started timers in this group and zero them.
+void TimerGroup::print(raw_ostream &OS) {
+  sys::SmartScopedLock<true> L(*TimerLock);
+
+  // See if any of our timers were started, if so add them to TimersToPrint and
+  // reset them.
+  for (Timer *T = FirstTimer; T; T = T->Next) {
+    if (!T->Started) continue;
+    TimersToPrint.push_back(std::make_pair(T->Time, T->Name));
+    
+    // Clear out the time.
+    T->Started = 0;
+    T->Time = TimeRecord();
+  }
+
+  // If any timers were started, print the group.
+  if (!TimersToPrint.empty())
+    PrintQueuedTimers(OS);
+}
+
+/// printAll - This static method prints all timers and clears them all out.
+void TimerGroup::printAll(raw_ostream &OS) {
+  sys::SmartScopedLock<true> L(*TimerLock);
+
+  for (TimerGroup *TG = TimerGroupList; TG; TG = TG->Next)
+    TG->print(OS);
+}
diff --git a/final/lib/Support/ToolOutputFile.cpp b/final/lib/Support/ToolOutputFile.cpp
new file mode 100644
index 00000000000..e7ca927ea53
--- /dev/null
+++ b/final/lib/Support/ToolOutputFile.cpp
@@ -0,0 +1,43 @@
+//===--- ToolOutputFile.cpp - Implement the tool_output_file class --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the tool_output_file class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Signals.h"
+using namespace llvm;
+
+tool_output_file::CleanupInstaller::CleanupInstaller(const char *filename)
+  : Filename(filename), Keep(false) {
+  // Arrange for the file to be deleted if the process is killed.
+  if (Filename != "-")
+    sys::RemoveFileOnSignal(sys::Path(Filename));
+}
+
+tool_output_file::CleanupInstaller::~CleanupInstaller() {
+  // Delete the file if the client hasn't told us not to.
+  if (!Keep && Filename != "-")
+    sys::Path(Filename).eraseFromDisk();
+
+  // Ok, the file is successfully written and closed, or deleted. There's no
+  // further need to clean it up on signals.
+  if (Filename != "-")
+    sys::DontRemoveFileOnSignal(sys::Path(Filename));
+}
+
+tool_output_file::tool_output_file(const char *filename, std::string &ErrorInfo,
+                                   unsigned Flags)
+  : Installer(filename),
+    OS(filename, ErrorInfo, Flags) {
+  // If open fails, no cleanup is needed.
+  if (!ErrorInfo.empty())
+    Installer.Keep = true;
+}
diff --git a/final/lib/Support/Triple.cpp b/final/lib/Support/Triple.cpp
new file mode 100644
index 00000000000..36edf6eefa7
--- /dev/null
+++ b/final/lib/Support/Triple.cpp
@@ -0,0 +1,640 @@
+//===--- Triple.cpp - Target triple helper class --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Triple.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
+#include <cassert>
+#include <cstring>
+using namespace llvm;
+
+//
+
+const char *Triple::getArchTypeName(ArchType Kind) {
+  switch (Kind) {
+  case InvalidArch: return "<invalid>";
+  case UnknownArch: return "unknown";
+
+  case alpha:   return "alpha";
+  case arm:     return "arm";
+  case bfin:    return "bfin";
+  case cellspu: return "cellspu";
+  case mips:    return "mips";
+  case mipsel:  return "mipsel";
+  case msp430:  return "msp430";
+  case ppc64:   return "powerpc64";
+  case ppc:     return "powerpc";
+  case sparc:   return "sparc";
+  case sparcv9: return "sparcv9";
+  case systemz: return "s390x";
+  case tce:     return "tce";
+  case thumb:   return "thumb";
+  case x86:     return "i386";
+  case x86_64:  return "x86_64";
+  case xcore:   return "xcore";
+  case mblaze:  return "mblaze";
+  case ptx:     return "ptx";
+  }
+
+  return "<invalid>";
+}
+
+const char *Triple::getArchTypePrefix(ArchType Kind) {
+  switch (Kind) {
+  default:
+    return 0;
+
+  case alpha:   return "alpha";
+
+  case arm:
+  case thumb:   return "arm";
+
+  case bfin:    return "bfin";
+
+  case cellspu: return "spu";
+
+  case ppc64:
+  case ppc:     return "ppc";
+
+  case mblaze:  return "mblaze";
+
+  case sparcv9:
+  case sparc:   return "sparc";
+
+  case x86:
+  case x86_64:  return "x86";
+
+  case xcore:   return "xcore";
+
+  case ptx:     return "ptx";
+  }
+}
+
+const char *Triple::getVendorTypeName(VendorType Kind) {
+  switch (Kind) {
+  case UnknownVendor: return "unknown";
+
+  case Apple: return "apple";
+  case PC: return "pc";
+  }
+
+  return "<invalid>";
+}
+
+const char *Triple::getOSTypeName(OSType Kind) {
+  switch (Kind) {
+  case UnknownOS: return "unknown";
+
+  case AuroraUX: return "auroraux";
+  case Cygwin: return "cygwin";
+  case Darwin: return "darwin";
+  case DragonFly: return "dragonfly";
+  case FreeBSD: return "freebsd";
+  case Linux: return "linux";
+  case Lv2: return "lv2";
+  case MinGW32: return "mingw32";
+  case NetBSD: return "netbsd";
+  case OpenBSD: return "openbsd";
+  case Psp: return "psp";
+  case Solaris: return "solaris";
+  case Win32: return "win32";
+  case Haiku: return "haiku";
+  case Minix: return "minix";
+  }
+
+  return "<invalid>";
+}
+
+const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
+  switch (Kind) {
+  case UnknownEnvironment: return "unknown";
+  case GNU: return "gnu";
+  case GNUEABI: return "gnueabi";
+  case EABI: return "eabi";
+  case MachO: return "macho";
+  }
+
+  return "<invalid>";
+}
+
+Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
+  if (Name == "alpha")
+    return alpha;
+  if (Name == "arm")
+    return arm;
+  if (Name == "bfin")
+    return bfin;
+  if (Name == "cellspu")
+    return cellspu;
+  if (Name == "mips")
+    return mips;
+  if (Name == "mipsel")
+    return mipsel;
+  if (Name == "msp430")
+    return msp430;
+  if (Name == "ppc64")
+    return ppc64;
+  if (Name == "ppc")
+    return ppc;
+  if (Name == "mblaze")
+    return mblaze;
+  if (Name == "sparc")
+    return sparc;
+  if (Name == "sparcv9")
+    return sparcv9;
+  if (Name == "systemz")
+    return systemz;
+  if (Name == "tce")
+    return tce;
+  if (Name == "thumb")
+    return thumb;
+  if (Name == "x86")
+    return x86;
+  if (Name == "x86-64")
+    return x86_64;
+  if (Name == "xcore")
+    return xcore;
+  if (Name == "ptx")
+    return ptx;
+
+  return UnknownArch;
+}
+
+Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
+  // See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for
+  // archs which Darwin doesn't use.
+
+  // The matching this routine does is fairly pointless, since it is neither the
+  // complete architecture list, nor a reasonable subset. The problem is that
+  // historically the driver driver accepts this and also ties its -march=
+  // handling to the architecture name, so we need to be careful before removing
+  // support for it.
+
+  // This code must be kept in sync with Clang's Darwin specific argument
+  // translation.
+
+  if (Str == "ppc" || Str == "ppc601" || Str == "ppc603" || Str == "ppc604" ||
+      Str == "ppc604e" || Str == "ppc750" || Str == "ppc7400" ||
+      Str == "ppc7450" || Str == "ppc970")
+    return Triple::ppc;
+
+  if (Str == "ppc64")
+    return Triple::ppc64;
+
+  if (Str == "i386" || Str == "i486" || Str == "i486SX" || Str == "pentium" ||
+      Str == "i586" || Str == "pentpro" || Str == "i686" || Str == "pentIIm3" ||
+      Str == "pentIIm5" || Str == "pentium4")
+    return Triple::x86;
+
+  if (Str == "x86_64")
+    return Triple::x86_64;
+
+  // This is derived from the driver driver.
+  if (Str == "arm" || Str == "armv4t" || Str == "armv5" || Str == "xscale" ||
+      Str == "armv6" || Str == "armv7")
+    return Triple::arm;
+
+  if (Str == "ptx")
+    return Triple::ptx;
+
+  return Triple::UnknownArch;
+}
+
+// Returns architecture name that is understood by the target assembler.
+const char *Triple::getArchNameForAssembler() {
+  if (getOS() != Triple::Darwin && getVendor() != Triple::Apple)
+    return NULL;
+
+  StringRef Str = getArchName();
+  if (Str == "i386")
+    return "i386";
+  if (Str == "x86_64")
+    return "x86_64";
+  if (Str == "powerpc")
+    return "ppc";
+  if (Str == "powerpc64")
+    return "ppc64";
+  if (Str == "mblaze" || Str == "microblaze")
+    return "mblaze";
+  if (Str == "arm")
+    return "arm";
+  if (Str == "armv4t" || Str == "thumbv4t")
+    return "armv4t";
+  if (Str == "armv5" || Str == "armv5e" || Str == "thumbv5"
+      || Str == "thumbv5e")
+    return "armv5";
+  if (Str == "armv6" || Str == "thumbv6")
+    return "armv6";
+  if (Str == "armv7" || Str == "thumbv7")
+    return "armv7";
+  if (Str == "ptx")
+    return "ptx";
+  return NULL;
+}
+
+//
+
+Triple::ArchType Triple::ParseArch(StringRef ArchName) {
+  if (ArchName.size() == 4 && ArchName[0] == 'i' &&
+      ArchName[2] == '8' && ArchName[3] == '6' &&
+      ArchName[1] - '3' < 6) // i[3-9]86
+    return x86;
+  else if (ArchName == "amd64" || ArchName == "x86_64")
+    return x86_64;
+  else if (ArchName == "bfin")
+    return bfin;
+  else if (ArchName == "powerpc")
+    return ppc;
+  else if ((ArchName == "powerpc64") || (ArchName == "ppu"))
+    return ppc64;
+  else if (ArchName == "mblaze")
+    return mblaze;
+  else if (ArchName == "arm" ||
+           ArchName.startswith("armv") ||
+           ArchName == "xscale")
+    return arm;
+  else if (ArchName == "thumb" ||
+           ArchName.startswith("thumbv"))
+    return thumb;
+  else if (ArchName.startswith("alpha"))
+    return alpha;
+  else if (ArchName == "spu" || ArchName == "cellspu")
+    return cellspu;
+  else if (ArchName == "msp430")
+    return msp430;
+  else if (ArchName == "mips" || ArchName == "mipsallegrex")
+    return mips;
+  else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" ||
+           ArchName == "psp")
+    return mipsel;
+  else if (ArchName == "sparc")
+    return sparc;
+  else if (ArchName == "sparcv9")
+    return sparcv9;
+  else if (ArchName == "s390x")
+    return systemz;
+  else if (ArchName == "tce")
+    return tce;
+  else if (ArchName == "xcore")
+    return xcore;
+  else if (ArchName == "ptx")
+    return ptx;
+  else
+    return UnknownArch;
+}
+
+Triple::VendorType Triple::ParseVendor(StringRef VendorName) {
+  if (VendorName == "apple")
+    return Apple;
+  else if (VendorName == "pc")
+    return PC;
+  else
+    return UnknownVendor;
+}
+
+Triple::OSType Triple::ParseOS(StringRef OSName) {
+  if (OSName.startswith("auroraux"))
+    return AuroraUX;
+  else if (OSName.startswith("cygwin"))
+    return Cygwin;
+  else if (OSName.startswith("darwin"))
+    return Darwin;
+  else if (OSName.startswith("dragonfly"))
+    return DragonFly;
+  else if (OSName.startswith("freebsd"))
+    return FreeBSD;
+  else if (OSName.startswith("linux"))
+    return Linux;
+  else if (OSName.startswith("lv2"))
+    return Lv2;
+  else if (OSName.startswith("mingw32"))
+    return MinGW32;
+  else if (OSName.startswith("netbsd"))
+    return NetBSD;
+  else if (OSName.startswith("openbsd"))
+    return OpenBSD;
+  else if (OSName.startswith("psp"))
+    return Psp;
+  else if (OSName.startswith("solaris"))
+    return Solaris;
+  else if (OSName.startswith("win32"))
+    return Win32;
+  else if (OSName.startswith("haiku"))
+    return Haiku;
+  else if (OSName.startswith("minix"))
+    return Minix;
+  else
+    return UnknownOS;
+}
+
+Triple::EnvironmentType Triple::ParseEnvironment(StringRef EnvironmentName) {
+  if (EnvironmentName.startswith("eabi"))
+    return EABI;
+  else if (EnvironmentName.startswith("gnueabi"))
+    return GNUEABI;
+  else if (EnvironmentName.startswith("gnu"))
+    return GNU;
+  else if (EnvironmentName.startswith("macho"))
+    return MachO;
+  else
+    return UnknownEnvironment;
+}
+
+void Triple::Parse() const {
+  assert(!isInitialized() && "Invalid parse call.");
+
+  Arch = ParseArch(getArchName());
+  Vendor = ParseVendor(getVendorName());
+  OS = ParseOS(getOSName());
+  Environment = ParseEnvironment(getEnvironmentName());
+
+  assert(isInitialized() && "Failed to initialize!");
+}
+
+std::string Triple::normalize(StringRef Str) {
+  // Parse into components.
+  SmallVector<StringRef, 4> Components;
+  for (size_t First = 0, Last = 0; Last != StringRef::npos; First = Last + 1) {
+    Last = Str.find('-', First);
+    Components.push_back(Str.slice(First, Last));
+  }
+
+  // If the first component corresponds to a known architecture, preferentially
+  // use it for the architecture.  If the second component corresponds to a
+  // known vendor, preferentially use it for the vendor, etc.  This avoids silly
+  // component movement when a component parses as (eg) both a valid arch and a
+  // valid os.
+  ArchType Arch = UnknownArch;
+  if (Components.size() > 0)
+    Arch = ParseArch(Components[0]);
+  VendorType Vendor = UnknownVendor;
+  if (Components.size() > 1)
+    Vendor = ParseVendor(Components[1]);
+  OSType OS = UnknownOS;
+  if (Components.size() > 2)
+    OS = ParseOS(Components[2]);
+  EnvironmentType Environment = UnknownEnvironment;
+  if (Components.size() > 3)
+    Environment = ParseEnvironment(Components[3]);
+
+  // Note which components are already in their final position.  These will not
+  // be moved.
+  bool Found[4];
+  Found[0] = Arch != UnknownArch;
+  Found[1] = Vendor != UnknownVendor;
+  Found[2] = OS != UnknownOS;
+  Found[3] = Environment != UnknownEnvironment;
+
+  // If they are not there already, permute the components into their canonical
+  // positions by seeing if they parse as a valid architecture, and if so moving
+  // the component to the architecture position etc.
+  for (unsigned Pos = 0; Pos != array_lengthof(Found); ++Pos) {
+    if (Found[Pos])
+      continue; // Already in the canonical position.
+
+    for (unsigned Idx = 0; Idx != Components.size(); ++Idx) {
+      // Do not reparse any components that already matched.
+      if (Idx < array_lengthof(Found) && Found[Idx])
+        continue;
+
+      // Does this component parse as valid for the target position?
+      bool Valid = false;
+      StringRef Comp = Components[Idx];
+      switch (Pos) {
+      default:
+        assert(false && "unexpected component type!");
+      case 0:
+        Arch = ParseArch(Comp);
+        Valid = Arch != UnknownArch;
+        break;
+      case 1:
+        Vendor = ParseVendor(Comp);
+        Valid = Vendor != UnknownVendor;
+        break;
+      case 2:
+        OS = ParseOS(Comp);
+        Valid = OS != UnknownOS;
+        break;
+      case 3:
+        Environment = ParseEnvironment(Comp);
+        Valid = Environment != UnknownEnvironment;
+        break;
+      }
+      if (!Valid)
+        continue; // Nope, try the next component.
+
+      // Move the component to the target position, pushing any non-fixed
+      // components that are in the way to the right.  This tends to give
+      // good results in the common cases of a forgotten vendor component
+      // or a wrongly positioned environment.
+      if (Pos < Idx) {
+        // Insert left, pushing the existing components to the right.  For
+        // example, a-b-i386 -> i386-a-b when moving i386 to the front.
+        StringRef CurrentComponent(""); // The empty component.
+        // Replace the component we are moving with an empty component.
+        std::swap(CurrentComponent, Components[Idx]);
+        // Insert the component being moved at Pos, displacing any existing
+        // components to the right.
+        for (unsigned i = Pos; !CurrentComponent.empty(); ++i) {
+          // Skip over any fixed components.
+          while (i < array_lengthof(Found) && Found[i]) ++i;
+          // Place the component at the new position, getting the component
+          // that was at this position - it will be moved right.
+          std::swap(CurrentComponent, Components[i]);
+        }
+      } else if (Pos > Idx) {
+        // Push right by inserting empty components until the component at Idx
+        // reaches the target position Pos.  For example, pc-a -> -pc-a when
+        // moving pc to the second position.
+        do {
+          // Insert one empty component at Idx.
+          StringRef CurrentComponent(""); // The empty component.
+          for (unsigned i = Idx; i < Components.size();) {
+            // Place the component at the new position, getting the component
+            // that was at this position - it will be moved right.
+            std::swap(CurrentComponent, Components[i]);
+            // If it was placed on top of an empty component then we are done.
+            if (CurrentComponent.empty())
+              break;
+            // Advance to the next component, skipping any fixed components.
+            while (++i < array_lengthof(Found) && Found[i])
+              ;
+          }
+          // The last component was pushed off the end - append it.
+          if (!CurrentComponent.empty())
+            Components.push_back(CurrentComponent);
+
+          // Advance Idx to the component's new position.
+          while (++Idx < array_lengthof(Found) && Found[Idx]) {}
+        } while (Idx < Pos); // Add more until the final position is reached.
+      }
+      assert(Pos < Components.size() && Components[Pos] == Comp &&
+             "Component moved wrong!");
+      Found[Pos] = true;
+      break;
+    }
+  }
+
+  // Special case logic goes here.  At this point Arch, Vendor and OS have the
+  // correct values for the computed components.
+
+  // Stick the corrected components back together to form the normalized string.
+  std::string Normalized;
+  for (unsigned i = 0, e = Components.size(); i != e; ++i) {
+    if (i) Normalized += '-';
+    Normalized += Components[i];
+  }
+  return Normalized;
+}
+
+StringRef Triple::getArchName() const {
+  return StringRef(Data).split('-').first;           // Isolate first component
+}
+
+StringRef Triple::getVendorName() const {
+  StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+  return Tmp.split('-').first;                       // Isolate second component
+}
+
+StringRef Triple::getOSName() const {
+  StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+  Tmp = Tmp.split('-').second;                       // Strip second component
+  return Tmp.split('-').first;                       // Isolate third component
+}
+
+StringRef Triple::getEnvironmentName() const {
+  StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+  Tmp = Tmp.split('-').second;                       // Strip second component
+  return Tmp.split('-').second;                      // Strip third component
+}
+
+StringRef Triple::getOSAndEnvironmentName() const {
+  StringRef Tmp = StringRef(Data).split('-').second; // Strip first component
+  return Tmp.split('-').second;                      // Strip second component
+}
+
+static unsigned EatNumber(StringRef &Str) {
+  assert(!Str.empty() && Str[0] >= '0' && Str[0] <= '9' && "Not a number");
+  unsigned Result = Str[0]-'0';
+
+  // Eat the digit.
+  Str = Str.substr(1);
+
+  // Handle "darwin11".
+  if (Result == 1 && !Str.empty() && Str[0] >= '0' && Str[0] <= '9') {
+    Result = Result*10 + (Str[0] - '0');
+    // Eat the digit.
+    Str = Str.substr(1);
+  }
+
+  return Result;
+}
+
+/// getDarwinNumber - Parse the 'darwin number' out of the specific target
+/// triple.  For example, if we have darwin8.5 return 8,5,0.  If any entry is
+/// not defined, return 0's.  This requires that the triple have an OSType of
+/// darwin before it is called.
+void Triple::getDarwinNumber(unsigned &Maj, unsigned &Min,
+                             unsigned &Revision) const {
+  assert(getOS() == Darwin && "Not a darwin target triple!");
+  StringRef OSName = getOSName();
+  assert(OSName.startswith("darwin") && "Unknown darwin target triple!");
+
+  // Strip off "darwin".
+  OSName = OSName.substr(6);
+
+  Maj = Min = Revision = 0;
+
+  if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
+    return;
+
+  // The major version is the first digit.
+  Maj = EatNumber(OSName);
+  if (OSName.empty()) return;
+
+  // Handle minor version: 10.4.9 -> darwin8.9.
+  if (OSName[0] != '.')
+    return;
+
+  // Eat the '.'.
+  OSName = OSName.substr(1);
+
+  if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
+    return;
+
+  Min = EatNumber(OSName);
+  if (OSName.empty()) return;
+
+  // Handle revision darwin8.9.1
+  if (OSName[0] != '.')
+    return;
+
+  // Eat the '.'.
+  OSName = OSName.substr(1);
+
+  if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9')
+    return;
+
+  Revision = EatNumber(OSName);
+}
+
+void Triple::setTriple(const Twine &Str) {
+  Data = Str.str();
+  Arch = InvalidArch;
+}
+
+void Triple::setArch(ArchType Kind) {
+  setArchName(getArchTypeName(Kind));
+}
+
+void Triple::setVendor(VendorType Kind) {
+  setVendorName(getVendorTypeName(Kind));
+}
+
+void Triple::setOS(OSType Kind) {
+  setOSName(getOSTypeName(Kind));
+}
+
+void Triple::setEnvironment(EnvironmentType Kind) {
+  setEnvironmentName(getEnvironmentTypeName(Kind));
+}
+
+void Triple::setArchName(StringRef Str) {
+  // Work around a miscompilation bug for Twines in gcc 4.0.3.
+  SmallString<64> Triple;
+  Triple += Str;
+  Triple += "-";
+  Triple += getVendorName();
+  Triple += "-";
+  Triple += getOSAndEnvironmentName();
+  setTriple(Triple.str());
+}
+
+void Triple::setVendorName(StringRef Str) {
+  setTriple(getArchName() + "-" + Str + "-" + getOSAndEnvironmentName());
+}
+
+void Triple::setOSName(StringRef Str) {
+  if (hasEnvironment())
+    setTriple(getArchName() + "-" + getVendorName() + "-" + Str +
+              "-" + getEnvironmentName());
+  else
+    setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
+}
+
+void Triple::setEnvironmentName(StringRef Str) {
+  setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() +
+            "-" + Str);
+}
+
+void Triple::setOSAndEnvironmentName(StringRef Str) {
+  setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
+}
diff --git a/final/lib/Support/Twine.cpp b/final/lib/Support/Twine.cpp
new file mode 100644
index 00000000000..75cea2961a9
--- /dev/null
+++ b/final/lib/Support/Twine.cpp
@@ -0,0 +1,160 @@
+//===-- Twine.cpp - Fast Temporary String Concatenation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+std::string Twine::str() const {
+  SmallString<256> Vec;
+  return toStringRef(Vec).str();
+}
+
+void Twine::toVector(SmallVectorImpl<char> &Out) const {
+  raw_svector_ostream OS(Out);
+  print(OS);
+}
+
+StringRef Twine::toStringRef(SmallVectorImpl<char> &Out) const {
+  if (isSingleStringRef())
+    return getSingleStringRef();
+  toVector(Out);
+  return StringRef(Out.data(), Out.size());
+}
+
+StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const {
+  if (isUnary()) {
+    switch (getLHSKind()) {
+    case CStringKind:
+      // Already null terminated, yay!
+      return StringRef(static_cast<const char*>(LHS));
+    case StdStringKind: {
+        const std::string *str = static_cast<const std::string*>(LHS);
+        return StringRef(str->c_str(), str->size());
+      }
+    default:
+      break;
+    }
+  }
+  toVector(Out);
+  Out.push_back(0);
+  Out.pop_back();
+  return StringRef(Out.data(), Out.size());
+}
+
+void Twine::printOneChild(raw_ostream &OS, const void *Ptr,
+                          NodeKind Kind) const {
+  switch (Kind) {
+  case Twine::NullKind: break;
+  case Twine::EmptyKind: break;
+  case Twine::TwineKind:
+    static_cast<const Twine*>(Ptr)->print(OS);
+    break;
+  case Twine::CStringKind:
+    OS << static_cast<const char*>(Ptr);
+    break;
+  case Twine::StdStringKind:
+    OS << *static_cast<const std::string*>(Ptr);
+    break;
+  case Twine::StringRefKind:
+    OS << *static_cast<const StringRef*>(Ptr);
+    break;
+  case Twine::DecUIKind:
+    OS << (unsigned)(uintptr_t)Ptr;
+    break;
+  case Twine::DecIKind:
+    OS << (int)(intptr_t)Ptr;
+    break;
+  case Twine::DecULKind:
+    OS << *static_cast<const unsigned long*>(Ptr);
+    break;
+  case Twine::DecLKind:
+    OS << *static_cast<const long*>(Ptr);
+    break;
+  case Twine::DecULLKind:
+    OS << *static_cast<const unsigned long long*>(Ptr);
+    break;
+  case Twine::DecLLKind:
+    OS << *static_cast<const long long*>(Ptr);
+    break;
+  case Twine::UHexKind:
+    OS.write_hex(*static_cast<const uint64_t*>(Ptr));
+    break;
+  }
+}
+
+void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr,
+                              NodeKind Kind) const {
+  switch (Kind) {
+  case Twine::NullKind:
+    OS << "null"; break;
+  case Twine::EmptyKind:
+    OS << "empty"; break;
+  case Twine::TwineKind:
+    OS << "rope:";
+    static_cast<const Twine*>(Ptr)->printRepr(OS);
+    break;
+  case Twine::CStringKind:
+    OS << "cstring:\""
+       << static_cast<const char*>(Ptr) << "\"";
+    break;
+  case Twine::StdStringKind:
+    OS << "std::string:\""
+       << static_cast<const std::string*>(Ptr) << "\"";
+    break;
+  case Twine::StringRefKind:
+    OS << "stringref:\""
+       << static_cast<const StringRef*>(Ptr) << "\"";
+    break;
+  case Twine::DecUIKind:
+    OS << "decUI:\"" << (unsigned)(uintptr_t)Ptr << "\"";
+    break;
+  case Twine::DecIKind:
+    OS << "decI:\"" << (int)(intptr_t)Ptr << "\"";
+    break;
+  case Twine::DecULKind:
+    OS << "decUL:\"" << *static_cast<const unsigned long*>(Ptr) << "\"";
+    break;
+  case Twine::DecLKind:
+    OS << "decL:\"" << *static_cast<const long*>(Ptr) << "\"";
+    break;
+  case Twine::DecULLKind:
+    OS << "decULL:\"" << *static_cast<const unsigned long long*>(Ptr) << "\"";
+    break;
+  case Twine::DecLLKind:
+    OS << "decLL:\"" << *static_cast<const long long*>(Ptr) << "\"";
+    break;
+  case Twine::UHexKind:
+    OS << "uhex:\"" << static_cast<const uint64_t*>(Ptr) << "\"";
+    break;
+  }
+}
+
+void Twine::print(raw_ostream &OS) const {
+  printOneChild(OS, LHS, getLHSKind());
+  printOneChild(OS, RHS, getRHSKind());
+}
+
+void Twine::printRepr(raw_ostream &OS) const {
+  OS << "(Twine ";
+  printOneChildRepr(OS, LHS, getLHSKind());
+  OS << " ";
+  printOneChildRepr(OS, RHS, getRHSKind());
+  OS << ")";
+}
+
+void Twine::dump() const {
+  print(llvm::dbgs());
+}
+
+void Twine::dumpRepr() const {
+  printRepr(llvm::dbgs());
+}
diff --git a/final/lib/Support/Unix/Host.inc b/final/lib/Support/Unix/Host.inc
new file mode 100644
index 00000000000..ed74b675990
--- /dev/null
+++ b/final/lib/Support/Unix/Host.inc
@@ -0,0 +1,97 @@
+ //===- llvm/Support/Unix/Host.inc -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the UNIX Host support.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ADT/StringRef.h"
+#include "Unix.h"
+#include <sys/utsname.h>
+#include <cctype>
+#include <string>
+
+using namespace llvm;
+
+static std::string getOSVersion() {
+  struct utsname info;
+
+  if (uname(&info))
+    return "";
+
+  return info.release;
+}
+
+std::string sys::getHostTriple() {
+  // FIXME: Derive directly instead of relying on the autoconf generated
+  // variable.
+
+  StringRef HostTripleString(LLVM_HOSTTRIPLE);
+  std::pair<StringRef, StringRef> ArchSplit = HostTripleString.split('-');
+
+  // Normalize the arch, since the host triple may not actually match the host.
+  std::string Arch = ArchSplit.first;
+
+  // It would be nice to do this in terms of llvm::Triple, but that is in
+  // Support which is layered above us.
+#if defined(__x86_64__)
+  Arch = "x86_64";
+#elif defined(__i386__)
+  Arch = "i386";
+#elif defined(__ppc64__)
+  Arch = "powerpc64";
+#elif defined(__ppc__)
+  Arch = "powerpc";
+#elif defined(__arm__)
+
+  // FIXME: We need to pick the right ARM triple (which involves querying the
+  // chip). However, for now this is most important for LLVM arch selection, so
+  // we only need to make sure to distinguish ARM and Thumb.
+#  if defined(__thumb__)
+  Arch = "thumb";
+#  else
+  Arch = "arm";
+#  endif
+
+#else
+
+  // FIXME: When enough auto-detection is in place, this should just
+  // #error. Then at least the arch selection is done, and we only need the OS
+  // etc selection to kill off the use of LLVM_HOSTTRIPLE.
+
+#endif
+
+  std::string Triple(Arch);
+  Triple += '-';
+  Triple += ArchSplit.second;
+
+  // Force i<N>86 to i386.
+  if (Triple[0] == 'i' && isdigit(Triple[1]) &&
+      Triple[2] == '8' && Triple[3] == '6')
+    Triple[1] = '3';
+
+  // On darwin, we want to update the version to match that of the
+  // host.
+  std::string::size_type DarwinDashIdx = Triple.find("-darwin");
+  if (DarwinDashIdx != std::string::npos) {
+    Triple.resize(DarwinDashIdx + strlen("-darwin"));
+
+    // Only add the major part of the os version.
+    std::string Version = getOSVersion();
+    Triple += Version.substr(0, Version.find('.'));
+  }
+
+  return Triple;
+}
diff --git a/final/lib/Support/Unix/Memory.inc b/final/lib/Support/Unix/Memory.inc
new file mode 100644
index 00000000000..4312d67183c
--- /dev/null
+++ b/final/lib/Support/Unix/Memory.inc
@@ -0,0 +1,151 @@
+//===- Unix/Memory.cpp - Generic UNIX System Configuration ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some functions for various memory management utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Process.h"
+
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#ifdef __APPLE__
+#include <mach/mach.h>
+#endif
+
+/// AllocateRWX - Allocate a slab of memory with read/write/execute
+/// permissions.  This is typically used for JIT applications where we want
+/// to emit code to the memory then jump to it.  Getting this type of memory
+/// is very OS specific.
+///
+llvm::sys::MemoryBlock
+llvm::sys::Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
+                               std::string *ErrMsg) {
+  if (NumBytes == 0) return MemoryBlock();
+
+  size_t pageSize = Process::GetPageSize();
+  size_t NumPages = (NumBytes+pageSize-1)/pageSize;
+
+  int fd = -1;
+#ifdef NEED_DEV_ZERO_FOR_MMAP
+  static int zero_fd = open("/dev/zero", O_RDWR);
+  if (zero_fd == -1) {
+    MakeErrMsg(ErrMsg, "Can't open /dev/zero device");
+    return MemoryBlock();
+  }
+  fd = zero_fd;
+#endif
+
+  int flags = MAP_PRIVATE |
+#ifdef HAVE_MMAP_ANONYMOUS
+  MAP_ANONYMOUS
+#else
+  MAP_ANON
+#endif
+  ;
+
+  void* start = NearBlock ? (unsigned char*)NearBlock->base() +
+                            NearBlock->size() : 0;
+
+#if defined(__APPLE__) && defined(__arm__)
+  void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_EXEC,
+                    flags, fd, 0);
+#else
+  void *pa = ::mmap(start, pageSize*NumPages, PROT_READ|PROT_WRITE|PROT_EXEC,
+                    flags, fd, 0);
+#endif
+  if (pa == MAP_FAILED) {
+    if (NearBlock) //Try again without a near hint
+      return AllocateRWX(NumBytes, 0);
+
+    MakeErrMsg(ErrMsg, "Can't allocate RWX Memory");
+    return MemoryBlock();
+  }
+
+#if defined(__APPLE__) && defined(__arm__)
+  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)pa,
+                                (vm_size_t)(pageSize*NumPages), 0,
+                                VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+  if (KERN_SUCCESS != kr) {
+    MakeErrMsg(ErrMsg, "vm_protect max RX failed");
+    return sys::MemoryBlock();
+  }
+
+  kr = vm_protect(mach_task_self(), (vm_address_t)pa,
+                  (vm_size_t)(pageSize*NumPages), 0,
+                  VM_PROT_READ | VM_PROT_WRITE);
+  if (KERN_SUCCESS != kr) {
+    MakeErrMsg(ErrMsg, "vm_protect RW failed");
+    return sys::MemoryBlock();
+  }
+#endif
+
+  MemoryBlock result;
+  result.Address = pa;
+  result.Size = NumPages*pageSize;
+
+  return result;
+}
+
+bool llvm::sys::Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
+  if (M.Address == 0 || M.Size == 0) return false;
+  if (0 != ::munmap(M.Address, M.Size))
+    return MakeErrMsg(ErrMsg, "Can't release RWX Memory");
+  return false;
+}
+
+bool llvm::sys::Memory::setWritable (MemoryBlock &M, std::string *ErrMsg) {
+#if defined(__APPLE__) && defined(__arm__)
+  if (M.Address == 0 || M.Size == 0) return false;
+  sys::Memory::InvalidateInstructionCache(M.Address, M.Size);
+  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
+    (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_WRITE);
+  return KERN_SUCCESS == kr;
+#else
+  return true;
+#endif
+}
+
+bool llvm::sys::Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) {
+#if defined(__APPLE__) && defined(__arm__)
+  if (M.Address == 0 || M.Size == 0) return false;
+  sys::Memory::InvalidateInstructionCache(M.Address, M.Size);
+  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address,
+    (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+  return KERN_SUCCESS == kr;
+#else
+  return false;
+#endif
+}
+
+bool llvm::sys::Memory::setRangeWritable(const void *Addr, size_t Size) {
+#if defined(__APPLE__) && defined(__arm__)
+  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
+                                (vm_size_t)Size, 0,
+                                VM_PROT_READ | VM_PROT_WRITE);
+  return KERN_SUCCESS == kr;
+#else
+  return true;
+#endif
+}
+
+bool llvm::sys::Memory::setRangeExecutable(const void *Addr, size_t Size) {
+#if defined(__APPLE__) && defined(__arm__)
+  kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)Addr,
+                                (vm_size_t)Size, 0,
+                                VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY);
+  return KERN_SUCCESS == kr;
+#else
+  return true;
+#endif
+}
diff --git a/final/lib/Support/Unix/Mutex.inc b/final/lib/Support/Unix/Mutex.inc
new file mode 100644
index 00000000000..fe6b1704145
--- /dev/null
+++ b/final/lib/Support/Unix/Mutex.inc
@@ -0,0 +1,43 @@
+//===- llvm/Support/Unix/Mutex.inc - Unix Mutex Implementation ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm
+{
+using namespace sys;
+
+MutexImpl::MutexImpl( bool recursive)
+{
+}
+
+MutexImpl::~MutexImpl()
+{
+}
+
+bool
+MutexImpl::release()
+{
+  return true;
+}
+
+bool
+MutexImpl::tryacquire( void )
+{
+  return true;
+}
+
+}
diff --git a/final/lib/Support/Unix/Path.inc b/final/lib/Support/Unix/Path.inc
new file mode 100644
index 00000000000..0f6e800505e
--- /dev/null
+++ b/final/lib/Support/Unix/Path.inc
@@ -0,0 +1,887 @@
+//===- llvm/Support/Unix/Path.cpp - Unix Path Implementation -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the Path class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_UTIME_H
+#include <utime.h>
+#endif
+#if HAVE_TIME_H
+#include <time.h>
+#endif
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+#  include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+#  include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+#  include <ndir.h>
+# endif
+#endif
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#ifdef __APPLE__
+#include <mach-o/dyld.h>
+#endif
+
+// Put in a hack for Cygwin which falsely reports that the mkdtemp function
+// is available when it is not.
+#ifdef __CYGWIN__
+# undef HAVE_MKDTEMP
+#endif
+
+namespace {
+inline bool lastIsSlash(const std::string& path) {
+  return !path.empty() && path[path.length() - 1] == '/';
+}
+
+}
+
+namespace llvm {
+using namespace sys;
+
+const char sys::PathSeparator = ':';
+
+StringRef Path::GetEXESuffix() {
+  return StringRef();
+}
+
+Path::Path(StringRef p)
+  : path(p) {}
+
+Path::Path(const char *StrStart, unsigned StrLen)
+  : path(StrStart, StrLen) {}
+
+Path&
+Path::operator=(StringRef that) {
+  path.assign(that.data(), that.size());
+  return *this;
+}
+
+bool
+Path::isValid() const {
+  // Empty paths are considered invalid here.
+  // This code doesn't check MAXPATHLEN because there's no need. Nothing in
+  // LLVM manipulates Paths with fixed-sizes arrays, and if the OS can't
+  // handle names longer than some limit, it'll report this on demand using
+  // ENAMETOLONG.
+  return !path.empty();
+}
+
+bool
+Path::isAbsolute(const char *NameStart, unsigned NameLen) {
+  assert(NameStart);
+  if (NameLen == 0)
+    return false;
+  return NameStart[0] == '/';
+}
+
+bool
+Path::isAbsolute() const {
+  if (path.empty())
+    return false;
+  return path[0] == '/';
+}
+
+Path
+Path::GetRootDirectory() {
+  Path result;
+  result.set("/");
+  return result;
+}
+
+Path
+Path::GetTemporaryDirectory(std::string *ErrMsg) {
+#if defined(HAVE_MKDTEMP)
+  // The best way is with mkdtemp but that's not available on many systems,
+  // Linux and FreeBSD have it. Others probably won't.
+  char pathname[] = "/tmp/llvm_XXXXXX";
+  if (0 == mkdtemp(pathname)) {
+    MakeErrMsg(ErrMsg,
+               std::string(pathname) + ": can't create temporary directory");
+    return Path();
+  }
+  return Path(pathname);
+#elif defined(HAVE_MKSTEMP)
+  // If no mkdtemp is available, mkstemp can be used to create a temporary file
+  // which is then removed and created as a directory. We prefer this over
+  // mktemp because of mktemp's inherent security and threading risks. We still
+  // have a slight race condition from the time the temporary file is created to
+  // the time it is re-created as a directoy.
+  char pathname[] = "/tmp/llvm_XXXXXX";
+  int fd = 0;
+  if (-1 == (fd = mkstemp(pathname))) {
+    MakeErrMsg(ErrMsg,
+      std::string(pathname) + ": can't create temporary directory");
+    return Path();
+  }
+  ::close(fd);
+  ::unlink(pathname); // start race condition, ignore errors
+  if (-1 == ::mkdir(pathname, S_IRWXU)) { // end race condition
+    MakeErrMsg(ErrMsg,
+      std::string(pathname) + ": can't create temporary directory");
+    return Path();
+  }
+  return Path(pathname);
+#elif defined(HAVE_MKTEMP)
+  // If a system doesn't have mkdtemp(3) or mkstemp(3) but it does have
+  // mktemp(3) then we'll assume that system (e.g. AIX) has a reasonable
+  // implementation of mktemp(3) and doesn't follow BSD 4.3's lead of replacing
+  // the XXXXXX with the pid of the process and a letter. That leads to only
+  // twenty six temporary files that can be generated.
+  char pathname[] = "/tmp/llvm_XXXXXX";
+  char *TmpName = ::mktemp(pathname);
+  if (TmpName == 0) {
+    MakeErrMsg(ErrMsg,
+      std::string(TmpName) + ": can't create unique directory name");
+    return Path();
+  }
+  if (-1 == ::mkdir(TmpName, S_IRWXU)) {
+    MakeErrMsg(ErrMsg,
+        std::string(TmpName) + ": can't create temporary directory");
+    return Path();
+  }
+  return Path(TmpName);
+#else
+  // This is the worst case implementation. tempnam(3) leaks memory unless its
+  // on an SVID2 (or later) system. On BSD 4.3 it leaks. tmpnam(3) has thread
+  // issues. The mktemp(3) function doesn't have enough variability in the
+  // temporary name generated. So, we provide our own implementation that
+  // increments an integer from a random number seeded by the current time. This
+  // should be sufficiently unique that we don't have many collisions between
+  // processes. Generally LLVM processes don't run very long and don't use very
+  // many temporary files so this shouldn't be a big issue for LLVM.
+  static time_t num = ::time(0);
+  char pathname[MAXPATHLEN];
+  do {
+    num++;
+    sprintf(pathname, "/tmp/llvm_%010u", unsigned(num));
+  } while ( 0 == access(pathname, F_OK ) );
+  if (-1 == ::mkdir(pathname, S_IRWXU)) {
+    MakeErrMsg(ErrMsg,
+      std::string(pathname) + ": can't create temporary directory");
+    return Path();
+  }
+  return Path(pathname);
+#endif
+}
+
+void
+Path::GetSystemLibraryPaths(std::vector<sys::Path>& Paths) {
+#ifdef LTDL_SHLIBPATH_VAR
+  char* env_var = getenv(LTDL_SHLIBPATH_VAR);
+  if (env_var != 0) {
+    getPathList(env_var,Paths);
+  }
+#endif
+  // FIXME: Should this look at LD_LIBRARY_PATH too?
+  Paths.push_back(sys::Path("/usr/local/lib/"));
+  Paths.push_back(sys::Path("/usr/X11R6/lib/"));
+  Paths.push_back(sys::Path("/usr/lib/"));
+  Paths.push_back(sys::Path("/lib/"));
+}
+
+void
+Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
+  char * env_var = getenv("LLVM_LIB_SEARCH_PATH");
+  if (env_var != 0) {
+    getPathList(env_var,Paths);
+  }
+#ifdef LLVM_LIBDIR
+  {
+    Path tmpPath;
+    if (tmpPath.set(LLVM_LIBDIR))
+      if (tmpPath.canRead())
+        Paths.push_back(tmpPath);
+  }
+#endif
+  GetSystemLibraryPaths(Paths);
+}
+
+Path
+Path::GetLLVMDefaultConfigDir() {
+  return Path("/etc/llvm/");
+}
+
+Path
+Path::GetUserHomeDirectory() {
+  const char* home = getenv("HOME");
+  Path result;
+  if (home && result.set(home))
+    return result;
+  result.set("/");
+  return result;
+}
+
+Path
+Path::GetCurrentDirectory() {
+  char pathname[MAXPATHLEN];
+  if (!getcwd(pathname,MAXPATHLEN)) {
+    assert (false && "Could not query current working directory.");
+    return Path();
+  }
+
+  return Path(pathname);
+}
+
+#if defined(__FreeBSD__) || defined (__NetBSD__) || \
+    defined(__OpenBSD__) || defined(__minix)
+static int
+test_dir(char buf[PATH_MAX], char ret[PATH_MAX],
+    const char *dir, const char *bin)
+{
+  struct stat sb;
+
+  snprintf(buf, PATH_MAX, "%s/%s", dir, bin);
+  if (realpath(buf, ret) == NULL)
+    return (1);
+  if (stat(buf, &sb) != 0)
+    return (1);
+
+  return (0);
+}
+
+static char *
+getprogpath(char ret[PATH_MAX], const char *bin)
+{
+  char *pv, *s, *t, buf[PATH_MAX];
+
+  /* First approach: absolute path. */
+  if (bin[0] == '/') {
+    if (test_dir(buf, ret, "/", bin) == 0)
+      return (ret);
+    return (NULL);
+  }
+
+  /* Second approach: relative path. */
+  if (strchr(bin, '/') != NULL) {
+    if (getcwd(buf, PATH_MAX) == NULL)
+      return (NULL);
+    if (test_dir(buf, ret, buf, bin) == 0)
+      return (ret);
+    return (NULL);
+  }
+
+  /* Third approach: $PATH */
+  if ((pv = getenv("PATH")) == NULL)
+    return (NULL);
+  s = pv = strdup(pv);
+  if (pv == NULL)
+    return (NULL);
+  while ((t = strsep(&s, ":")) != NULL) {
+    if (test_dir(buf, ret, t, bin) == 0) {
+      free(pv);
+      return (ret);
+    }
+  }
+  free(pv);
+  return (NULL);
+}
+#endif // __FreeBSD__ || __NetBSD__
+
+/// GetMainExecutable - Return the path to the main executable, given the
+/// value of argv[0] from program startup.
+Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
+#if defined(__APPLE__)
+  // On OS X the executable path is saved to the stack by dyld. Reading it
+  // from there is much faster than calling dladdr, especially for large
+  // binaries with symbols.
+  char exe_path[MAXPATHLEN];
+  uint32_t size = sizeof(exe_path);
+  if (_NSGetExecutablePath(exe_path, &size) == 0) {
+    char link_path[MAXPATHLEN];
+    if (realpath(exe_path, link_path))
+      return Path(link_path);
+  }
+#elif defined(__FreeBSD__) || defined (__NetBSD__) || \
+      defined(__OpenBSD__) || defined(__minix)
+  char exe_path[PATH_MAX];
+
+  if (getprogpath(exe_path, argv0) != NULL)
+    return Path(exe_path);
+#elif defined(__linux__) || defined(__CYGWIN__)
+  char exe_path[MAXPATHLEN];
+  ssize_t len = readlink("/proc/self/exe", exe_path, sizeof(exe_path));
+  if (len >= 0)
+    return Path(StringRef(exe_path, len));
+#elif defined(HAVE_DLFCN_H)
+  // Use dladdr to get executable path if available.
+  Dl_info DLInfo;
+  int err = dladdr(MainAddr, &DLInfo);
+  if (err == 0)
+    return Path();
+
+  // If the filename is a symlink, we need to resolve and return the location of
+  // the actual executable.
+  char link_path[MAXPATHLEN];
+  if (realpath(DLInfo.dli_fname, link_path))
+    return Path(link_path);
+#else
+#error GetMainExecutable is not implemented on this host yet.
+#endif
+  return Path();
+}
+
+
+StringRef Path::getDirname() const {
+  return getDirnameCharSep(path, "/");
+}
+
+StringRef
+Path::getBasename() const {
+  // Find the last slash
+  std::string::size_type slash = path.rfind('/');
+  if (slash == std::string::npos)
+    slash = 0;
+  else
+    slash++;
+
+  std::string::size_type dot = path.rfind('.');
+  if (dot == std::string::npos || dot < slash)
+    return StringRef(path).substr(slash);
+  else
+    return StringRef(path).substr(slash, dot - slash);
+}
+
+StringRef
+Path::getSuffix() const {
+  // Find the last slash
+  std::string::size_type slash = path.rfind('/');
+  if (slash == std::string::npos)
+    slash = 0;
+  else
+    slash++;
+
+  std::string::size_type dot = path.rfind('.');
+  if (dot == std::string::npos || dot < slash)
+    return StringRef();
+  else
+    return StringRef(path).substr(dot + 1);
+}
+
+bool Path::getMagicNumber(std::string &Magic, unsigned len) const {
+  assert(len < 1024 && "Request for magic string too long");
+  char Buf[1025];
+  int fd = ::open(path.c_str(), O_RDONLY);
+  if (fd < 0)
+    return false;
+  ssize_t bytes_read = ::read(fd, Buf, len);
+  ::close(fd);
+  if (ssize_t(len) != bytes_read)
+    return false;
+  Magic.assign(Buf, len);
+  return true;
+}
+
+bool
+Path::exists() const {
+  return 0 == access(path.c_str(), F_OK );
+}
+
+bool
+Path::isDirectory() const {
+  struct stat buf;
+  if (0 != stat(path.c_str(), &buf))
+    return false;
+  return ((buf.st_mode & S_IFMT) == S_IFDIR) ? true : false;
+}
+
+bool
+Path::isSymLink() const {
+  struct stat buf;
+  if (0 != lstat(path.c_str(), &buf))
+    return false;
+  return S_ISLNK(buf.st_mode);
+}
+
+
+bool
+Path::canRead() const {
+  return 0 == access(path.c_str(), R_OK);
+}
+
+bool
+Path::canWrite() const {
+  return 0 == access(path.c_str(), W_OK);
+}
+
+bool
+Path::isRegularFile() const {
+  // Get the status so we can determine if it's a file or directory
+  struct stat buf;
+
+  if (0 != stat(path.c_str(), &buf))
+    return false;
+
+  if (S_ISREG(buf.st_mode))
+    return true;
+
+  return false;
+}
+
+bool
+Path::canExecute() const {
+  if (0 != access(path.c_str(), R_OK | X_OK ))
+    return false;
+  struct stat buf;
+  if (0 != stat(path.c_str(), &buf))
+    return false;
+  if (!S_ISREG(buf.st_mode))
+    return false;
+  return true;
+}
+
+StringRef
+Path::getLast() const {
+  // Find the last slash
+  size_t pos = path.rfind('/');
+
+  // Handle the corner cases
+  if (pos == std::string::npos)
+    return path;
+
+  // If the last character is a slash
+  if (pos == path.length()-1) {
+    // Find the second to last slash
+    size_t pos2 = path.rfind('/', pos-1);
+    if (pos2 == std::string::npos)
+      return StringRef(path).substr(0,pos);
+    else
+      return StringRef(path).substr(pos2+1,pos-pos2-1);
+  }
+  // Return everything after the last slash
+  return StringRef(path).substr(pos+1);
+}
+
+const FileStatus *
+PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
+  if (!fsIsValid || update) {
+    struct stat buf;
+    if (0 != stat(path.c_str(), &buf)) {
+      MakeErrMsg(ErrStr, path + ": can't get status of file");
+      return 0;
+    }
+    status.fileSize = buf.st_size;
+    status.modTime.fromEpochTime(buf.st_mtime);
+    status.mode = buf.st_mode;
+    status.user = buf.st_uid;
+    status.group = buf.st_gid;
+    status.uniqueID = uint64_t(buf.st_ino);
+    status.isDir  = S_ISDIR(buf.st_mode);
+    status.isFile = S_ISREG(buf.st_mode);
+    fsIsValid = true;
+  }
+  return &status;
+}
+
+static bool AddPermissionBits(const Path &File, int bits) {
+  // Get the umask value from the operating system.  We want to use it
+  // when changing the file's permissions. Since calling umask() sets
+  // the umask and returns its old value, we must call it a second
+  // time to reset it to the user's preference.
+  int mask = umask(0777); // The arg. to umask is arbitrary.
+  umask(mask);            // Restore the umask.
+
+  // Get the file's current mode.
+  struct stat buf;
+  if (0 != stat(File.c_str(), &buf))
+    return false;
+  // Change the file to have whichever permissions bits from 'bits'
+  // that the umask would not disable.
+  if ((chmod(File.c_str(), (buf.st_mode | (bits & ~mask)))) == -1)
+      return false;
+  return true;
+}
+
+bool Path::makeReadableOnDisk(std::string* ErrMsg) {
+  if (!AddPermissionBits(*this, 0444))
+    return MakeErrMsg(ErrMsg, path + ": can't make file readable");
+  return false;
+}
+
+bool Path::makeWriteableOnDisk(std::string* ErrMsg) {
+  if (!AddPermissionBits(*this, 0222))
+    return MakeErrMsg(ErrMsg, path + ": can't make file writable");
+  return false;
+}
+
+bool Path::makeExecutableOnDisk(std::string* ErrMsg) {
+  if (!AddPermissionBits(*this, 0111))
+    return MakeErrMsg(ErrMsg, path + ": can't make file executable");
+  return false;
+}
+
+bool
+Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
+  DIR* direntries = ::opendir(path.c_str());
+  if (direntries == 0)
+    return MakeErrMsg(ErrMsg, path + ": can't open directory");
+
+  std::string dirPath = path;
+  if (!lastIsSlash(dirPath))
+    dirPath += '/';
+
+  result.clear();
+  struct dirent* de = ::readdir(direntries);
+  for ( ; de != 0; de = ::readdir(direntries)) {
+    if (de->d_name[0] != '.') {
+      Path aPath(dirPath + (const char*)de->d_name);
+      struct stat st;
+      if (0 != lstat(aPath.path.c_str(), &st)) {
+        if (S_ISLNK(st.st_mode))
+          continue; // dangling symlink -- ignore
+        return MakeErrMsg(ErrMsg,
+                          aPath.path +  ": can't determine file object type");
+      }
+      result.insert(aPath);
+    }
+  }
+
+  closedir(direntries);
+  return false;
+}
+
+bool
+Path::set(StringRef a_path) {
+  if (a_path.empty())
+    return false;
+  path = a_path;
+  return true;
+}
+
+bool
+Path::appendComponent(StringRef name) {
+  if (name.empty())
+    return false;
+  if (!lastIsSlash(path))
+    path += '/';
+  path += name;
+  return true;
+}
+
+bool
+Path::eraseComponent() {
+  size_t slashpos = path.rfind('/',path.size());
+  if (slashpos == 0 || slashpos == std::string::npos) {
+    path.erase();
+    return true;
+  }
+  if (slashpos == path.size() - 1)
+    slashpos = path.rfind('/',slashpos-1);
+  if (slashpos == std::string::npos) {
+    path.erase();
+    return true;
+  }
+  path.erase(slashpos);
+  return true;
+}
+
+bool
+Path::eraseSuffix() {
+  size_t dotpos = path.rfind('.',path.size());
+  size_t slashpos = path.rfind('/',path.size());
+  if (dotpos != std::string::npos) {
+    if (slashpos == std::string::npos || dotpos > slashpos+1) {
+      path.erase(dotpos, path.size()-dotpos);
+      return true;
+    }
+  }
+  return false;
+}
+
+static bool createDirectoryHelper(char* beg, char* end, bool create_parents) {
+
+  if (access(beg, R_OK | W_OK) == 0)
+    return false;
+
+  if (create_parents) {
+
+    char* c = end;
+
+    for (; c != beg; --c)
+      if (*c == '/') {
+
+        // Recurse to handling the parent directory.
+        *c = '\0';
+        bool x = createDirectoryHelper(beg, c, create_parents);
+        *c = '/';
+
+        // Return if we encountered an error.
+        if (x)
+          return true;
+
+        break;
+      }
+  }
+
+  return mkdir(beg, S_IRWXU | S_IRWXG) != 0;
+}
+
+bool
+Path::createDirectoryOnDisk( bool create_parents, std::string* ErrMsg ) {
+  // Get a writeable copy of the path name
+  std::string pathname(path);
+
+  // Null-terminate the last component
+  size_t lastchar = path.length() - 1 ;
+
+  if (pathname[lastchar] != '/')
+    ++lastchar;
+
+  pathname[lastchar] = '\0';
+
+  if (createDirectoryHelper(&pathname[0], &pathname[lastchar], create_parents))
+    return MakeErrMsg(ErrMsg, pathname + ": can't create directory");
+
+  return false;
+}
+
+bool
+Path::createFileOnDisk(std::string* ErrMsg) {
+  // Create the file
+  int fd = ::creat(path.c_str(), S_IRUSR | S_IWUSR);
+  if (fd < 0)
+    return MakeErrMsg(ErrMsg, path + ": can't create file");
+  ::close(fd);
+  return false;
+}
+
+bool
+Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
+  // Make this into a unique file name
+  if (makeUnique( reuse_current, ErrMsg ))
+    return true;
+
+  // create the file
+  int fd = ::open(path.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666);
+  if (fd < 0)
+    return MakeErrMsg(ErrMsg, path + ": can't create temporary file");
+  ::close(fd);
+  return false;
+}
+
+bool
+Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
+  // Get the status so we can determine if it's a file or directory.
+  struct stat buf;
+  if (0 != stat(path.c_str(), &buf)) {
+    MakeErrMsg(ErrStr, path + ": can't get status of file");
+    return true;
+  }
+
+  // Note: this check catches strange situations. In all cases, LLVM should
+  // only be involved in the creation and deletion of regular files.  This
+  // check ensures that what we're trying to erase is a regular file. It
+  // effectively prevents LLVM from erasing things like /dev/null, any block
+  // special file, or other things that aren't "regular" files.
+  if (S_ISREG(buf.st_mode)) {
+    if (unlink(path.c_str()) != 0)
+      return MakeErrMsg(ErrStr, path + ": can't destroy file");
+    return false;
+  }
+
+  if (!S_ISDIR(buf.st_mode)) {
+    if (ErrStr) *ErrStr = "not a file or directory";
+    return true;
+  }
+
+  if (remove_contents) {
+    // Recursively descend the directory to remove its contents.
+    std::string cmd = "/bin/rm -rf " + path;
+    if (system(cmd.c_str()) != 0) {
+      MakeErrMsg(ErrStr, path + ": failed to recursively remove directory.");
+      return true;
+    }
+    return false;
+  }
+
+  // Otherwise, try to just remove the one directory.
+  std::string pathname(path);
+  size_t lastchar = path.length() - 1;
+  if (pathname[lastchar] == '/')
+    pathname[lastchar] = '\0';
+  else
+    pathname[lastchar+1] = '\0';
+
+  if (rmdir(pathname.c_str()) != 0)
+    return MakeErrMsg(ErrStr, pathname + ": can't erase directory");
+  return false;
+}
+
+bool
+Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
+  if (0 != ::rename(path.c_str(), newName.c_str()))
+    return MakeErrMsg(ErrMsg, std::string("can't rename '") + path + "' as '" +
+               newName.str() + "'");
+  return false;
+}
+
+bool
+Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrStr) const {
+  struct utimbuf utb;
+  utb.actime = si.modTime.toPosixTime();
+  utb.modtime = utb.actime;
+  if (0 != ::utime(path.c_str(),&utb))
+    return MakeErrMsg(ErrStr, path + ": can't set file modification time");
+  if (0 != ::chmod(path.c_str(),si.mode))
+    return MakeErrMsg(ErrStr, path + ": can't set mode");
+  return false;
+}
+
+bool
+sys::CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg){
+  int inFile = -1;
+  int outFile = -1;
+  inFile = ::open(Src.c_str(), O_RDONLY);
+  if (inFile == -1)
+    return MakeErrMsg(ErrMsg, Src.str() +
+      ": can't open source file to copy");
+
+  outFile = ::open(Dest.c_str(), O_WRONLY|O_CREAT, 0666);
+  if (outFile == -1) {
+    ::close(inFile);
+    return MakeErrMsg(ErrMsg, Dest.str() +
+      ": can't create destination file for copy");
+  }
+
+  char Buffer[16*1024];
+  while (ssize_t Amt = ::read(inFile, Buffer, 16*1024)) {
+    if (Amt == -1) {
+      if (errno != EINTR && errno != EAGAIN) {
+        ::close(inFile);
+        ::close(outFile);
+        return MakeErrMsg(ErrMsg, Src.str()+": can't read source file");
+      }
+    } else {
+      char *BufPtr = Buffer;
+      while (Amt) {
+        ssize_t AmtWritten = ::write(outFile, BufPtr, Amt);
+        if (AmtWritten == -1) {
+          if (errno != EINTR && errno != EAGAIN) {
+            ::close(inFile);
+            ::close(outFile);
+            return MakeErrMsg(ErrMsg, Dest.str() +
+              ": can't write destination file");
+          }
+        } else {
+          Amt -= AmtWritten;
+          BufPtr += AmtWritten;
+        }
+      }
+    }
+  }
+  ::close(inFile);
+  ::close(outFile);
+  return false;
+}
+
+bool
+Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
+  bool Exists;
+  if (reuse_current && (fs::exists(path, Exists) || !Exists))
+    return false; // File doesn't exist already, just use it!
+
+  // Append an XXXXXX pattern to the end of the file for use with mkstemp,
+  // mktemp or our own implementation.
+  // This uses std::vector instead of SmallVector to avoid a dependence on
+  // libSupport. And performance isn't critical here.
+  std::vector<char> Buf;
+  Buf.resize(path.size()+8);
+  char *FNBuffer = &Buf[0];
+    path.copy(FNBuffer,path.size());
+  bool isdir;
+  if (!fs::is_directory(path, isdir) && isdir)
+    strcpy(FNBuffer+path.size(), "/XXXXXX");
+  else
+    strcpy(FNBuffer+path.size(), "-XXXXXX");
+
+#if defined(HAVE_MKSTEMP)
+  int TempFD;
+  if ((TempFD = mkstemp(FNBuffer)) == -1)
+    return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
+
+  // We don't need to hold the temp file descriptor... we will trust that no one
+  // will overwrite/delete the file before we can open it again.
+  close(TempFD);
+
+  // Save the name
+  path = FNBuffer;
+#elif defined(HAVE_MKTEMP)
+  // If we don't have mkstemp, use the old and obsolete mktemp function.
+  if (mktemp(FNBuffer) == 0)
+    return MakeErrMsg(ErrMsg, path + ": can't make unique filename");
+
+  // Save the name
+  path = FNBuffer;
+#else
+  // Okay, looks like we have to do it all by our lonesome.
+  static unsigned FCounter = 0;
+  // Try to initialize with unique value.
+  if (FCounter == 0) FCounter = ((unsigned)getpid() & 0xFFFF) << 8;
+  char* pos = strstr(FNBuffer, "XXXXXX");
+  do {
+    if (++FCounter > 0xFFFFFF) {
+      return MakeErrMsg(ErrMsg,
+        path + ": can't make unique filename: too many files");
+    }
+    sprintf(pos, "%06X", FCounter);
+    path = FNBuffer;
+  } while (exists());
+  // POSSIBLE SECURITY BUG: An attacker can easily guess the name and exploit
+  // LLVM.
+#endif
+  return false;
+}
+
+const char *Path::MapInFilePages(int FD, uint64_t FileSize) {
+  int Flags = MAP_PRIVATE;
+#ifdef MAP_FILE
+  Flags |= MAP_FILE;
+#endif
+  void *BasePtr = ::mmap(0, FileSize, PROT_READ, Flags, FD, 0);
+  if (BasePtr == MAP_FAILED)
+    return 0;
+  return (const char*)BasePtr;
+}
+
+void Path::UnMapFilePages(const char *BasePtr, uint64_t FileSize) {
+  ::munmap((void*)BasePtr, FileSize);
+}
+
+} // end llvm namespace
diff --git a/final/lib/Support/Unix/PathV2.inc b/final/lib/Support/Unix/PathV2.inc
new file mode 100644
index 00000000000..03ff28367e4
--- /dev/null
+++ b/final/lib/Support/Unix/PathV2.inc
@@ -0,0 +1,507 @@
+//===- llvm/Support/Unix/PathV2.cpp - Unix Path Implementation --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific implementation of the PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#if HAVE_DIRENT_H
+# include <dirent.h>
+# define NAMLEN(dirent) strlen((dirent)->d_name)
+#else
+# define dirent direct
+# define NAMLEN(dirent) (dirent)->d_namlen
+# if HAVE_SYS_NDIR_H
+#  include <sys/ndir.h>
+# endif
+# if HAVE_SYS_DIR_H
+#  include <sys/dir.h>
+# endif
+# if HAVE_NDIR_H
+#  include <ndir.h>
+# endif
+#endif
+#if HAVE_STDIO_H
+#include <stdio.h>
+#endif
+
+using namespace llvm;
+
+namespace {
+  /// This class automatically closes the given file descriptor when it goes out
+  /// of scope. You can take back explicit ownership of the file descriptor by
+  /// calling take(). The destructor does not verify that close was successful.
+  /// Therefore, never allow this class to call close on a file descriptor that
+  /// has been read from or written to.
+  struct AutoFD {
+    int FileDescriptor;
+
+    AutoFD(int fd) : FileDescriptor(fd) {}
+    ~AutoFD() {
+      if (FileDescriptor >= 0)
+        ::close(FileDescriptor);
+    }
+
+    int take() {
+      int ret = FileDescriptor;
+      FileDescriptor = -1;
+      return ret;
+    }
+
+    operator int() const {return FileDescriptor;}
+  };
+
+  error_code TempDir(SmallVectorImpl<char> &result) {
+    // FIXME: Don't use TMPDIR if program is SUID or SGID enabled.
+    const char *dir = 0;
+    (dir = std::getenv("TMPDIR" )) ||
+    (dir = std::getenv("TMP"    )) ||
+    (dir = std::getenv("TEMP"   )) ||
+    (dir = std::getenv("TEMPDIR")) ||
+#ifdef P_tmpdir
+    (dir = P_tmpdir) ||
+#endif
+    (dir = "/tmp");
+
+    result.clear();
+    StringRef d(dir);
+    result.append(d.begin(), d.end());
+    return success;
+  }
+}
+
+namespace llvm {
+namespace sys  {
+namespace fs {
+
+error_code current_path(SmallVectorImpl<char> &result) {
+  result.reserve(MAXPATHLEN);
+
+  while (true) {
+    if (::getcwd(result.data(), result.capacity()) == 0) {
+      // See if there was a real error.
+      if (errno != errc::not_enough_memory)
+        return error_code(errno, system_category());
+      // Otherwise there just wasn't enough space.
+      result.reserve(result.capacity() * 2);
+    } else
+      break;
+  }
+
+  result.set_size(strlen(result.data()));
+  return success;
+}
+
+error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
+ // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toNullTerminatedStringRef(from_storage);
+  StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+  const size_t buf_sz = 32768;
+  char buffer[buf_sz];
+  int from_file = -1, to_file = -1;
+
+  // Open from.
+  if ((from_file = ::open(f.begin(), O_RDONLY)) < 0)
+    return error_code(errno, system_category());
+  AutoFD from_fd(from_file);
+
+  // Stat from.
+  struct stat from_stat;
+  if (::stat(f.begin(), &from_stat) != 0)
+    return error_code(errno, system_category());
+
+  // Setup to flags.
+  int to_flags = O_CREAT | O_WRONLY;
+  if (copt == copy_option::fail_if_exists)
+    to_flags |= O_EXCL;
+
+  // Open to.
+  if ((to_file = ::open(t.begin(), to_flags, from_stat.st_mode)) < 0)
+    return error_code(errno, system_category());
+  AutoFD to_fd(to_file);
+
+  // Copy!
+  ssize_t sz, sz_read = 1, sz_write;
+  while (sz_read > 0 &&
+         (sz_read = ::read(from_fd, buffer, buf_sz)) > 0) {
+    // Allow for partial writes - see Advanced Unix Programming (2nd Ed.),
+    // Marc Rochkind, Addison-Wesley, 2004, page 94
+    sz_write = 0;
+    do {
+      if ((sz = ::write(to_fd, buffer + sz_write, sz_read - sz_write)) < 0) {
+        sz_read = sz;  // cause read loop termination.
+        break;         // error.
+      }
+      sz_write += sz;
+    } while (sz_write < sz_read);
+  }
+
+  // After all the file operations above the return value of close actually
+  // matters.
+  if (::close(from_fd.take()) < 0) sz_read = -1;
+  if (::close(to_fd.take()) < 0) sz_read = -1;
+
+  // Check for errors.
+  if (sz_read < 0)
+    return error_code(errno, system_category());
+
+  return success;
+}
+
+error_code create_directory(const Twine &path, bool &existed) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  if (::mkdir(p.begin(), S_IRWXU | S_IRWXG) == -1) {
+    if (errno != errc::file_exists)
+      return error_code(errno, system_category());
+    existed = true;
+  } else
+    existed = false;
+
+  return success;
+}
+
+error_code create_hard_link(const Twine &to, const Twine &from) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toNullTerminatedStringRef(from_storage);
+  StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+  if (::link(t.begin(), f.begin()) == -1)
+    return error_code(errno, system_category());
+
+  return success;
+}
+
+error_code create_symlink(const Twine &to, const Twine &from) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toNullTerminatedStringRef(from_storage);
+  StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+  if (::symlink(t.begin(), f.begin()) == -1)
+    return error_code(errno, system_category());
+
+  return success;
+}
+
+error_code remove(const Twine &path, bool &existed) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  if (::remove(p.begin()) == -1) {
+    if (errno != errc::no_such_file_or_directory)
+      return error_code(errno, system_category());
+    existed = false;
+  } else
+    existed = true;
+
+  return success;
+}
+
+error_code rename(const Twine &from, const Twine &to) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toNullTerminatedStringRef(from_storage);
+  StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+  if (::rename(f.begin(), t.begin()) == -1) {
+    // If it's a cross device link, copy then delete, otherwise return the error
+    if (errno == EXDEV) {
+      if (error_code ec = copy_file(from, to, copy_option::overwrite_if_exists))
+        return ec;
+      bool Existed;
+      if (error_code ec = remove(from, Existed))
+        return ec;
+    } else
+      return error_code(errno, system_category());
+  }
+
+  return success;
+}
+
+error_code resize_file(const Twine &path, uint64_t size) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  if (::truncate(p.begin(), size) == -1)
+    return error_code(errno, system_category());
+
+  return success;
+}
+
+error_code exists(const Twine &path, bool &result) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  struct stat status;
+  if (::stat(p.begin(), &status) == -1) {
+    if (errno != errc::no_such_file_or_directory)
+      return error_code(errno, system_category());
+    result = false;
+  } else
+    result = true;
+
+  return success;
+}
+
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+  // Get arguments.
+  SmallString<128> a_storage;
+  SmallString<128> b_storage;
+  StringRef a = A.toNullTerminatedStringRef(a_storage);
+  StringRef b = B.toNullTerminatedStringRef(b_storage);
+
+  struct stat stat_a, stat_b;
+  int error_b = ::stat(b.begin(), &stat_b);
+  int error_a = ::stat(a.begin(), &stat_a);
+
+  // If both are invalid, it's an error. If only one is, the result is false.
+  if (error_a != 0 || error_b != 0) {
+    if (error_a == error_b)
+      return error_code(errno, system_category());
+    result = false;
+  } else {
+    result =
+      stat_a.st_dev == stat_b.st_dev &&
+      stat_a.st_ino == stat_b.st_ino;
+  }
+
+  return success;
+}
+
+error_code file_size(const Twine &path, uint64_t &result) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  struct stat status;
+  if (::stat(p.begin(), &status) == -1)
+    return error_code(errno, system_category());
+  if (!S_ISREG(status.st_mode))
+    return make_error_code(errc::operation_not_permitted);
+
+  result = status.st_size;
+  return success;
+}
+
+error_code status(const Twine &path, file_status &result) {
+  SmallString<128> path_storage;
+  StringRef p = path.toNullTerminatedStringRef(path_storage);
+
+  struct stat status;
+  if (::stat(p.begin(), &status) != 0) {
+    error_code ec(errno, system_category());
+    if (ec == errc::no_such_file_or_directory)
+      result = file_status(file_type::file_not_found);
+    else
+      result = file_status(file_type::status_error);
+    return ec;
+  }
+
+  if (S_ISDIR(status.st_mode))
+    result = file_status(file_type::directory_file);
+  else if (S_ISREG(status.st_mode))
+    result = file_status(file_type::regular_file);
+  else if (S_ISBLK(status.st_mode))
+    result = file_status(file_type::block_file);
+  else if (S_ISCHR(status.st_mode))
+    result = file_status(file_type::character_file);
+  else if (S_ISFIFO(status.st_mode))
+    result = file_status(file_type::fifo_file);
+  else if (S_ISSOCK(status.st_mode))
+    result = file_status(file_type::socket_file);
+  else
+    result = file_status(file_type::type_unknown);
+
+  return success;
+}
+
+error_code unique_file(const Twine &model, int &result_fd,
+                             SmallVectorImpl<char> &result_path) {
+  SmallString<128> Model;
+  model.toVector(Model);
+  // Null terminate.
+  Model.c_str();
+
+  // Make model absolute by prepending a temp directory if it's not already.
+  bool absolute = path::is_absolute(Twine(Model));
+  if (!absolute) {
+    SmallString<128> TDir;
+    if (error_code ec = TempDir(TDir)) return ec;
+    path::append(TDir, Twine(Model));
+    Model.swap(TDir);
+  }
+
+  // Replace '%' with random chars. From here on, DO NOT modify model. It may be
+  // needed if the randomly chosen path already exists.
+  SmallString<128> RandomPath;
+  RandomPath.reserve(Model.size() + 1);
+  ::srand(::time(NULL));
+
+retry_random_path:
+  // This is opened here instead of above to make it easier to track when to
+  // close it. Collisions should be rare enough for the possible extra syscalls
+  // not to matter.
+  FILE *RandomSource = ::fopen("/dev/urandom", "r");
+  RandomPath.set_size(0);
+  for (SmallVectorImpl<char>::const_iterator i = Model.begin(),
+                                             e = Model.end(); i != e; ++i) {
+    if (*i == '%') {
+      char val = 0;
+      if (RandomSource)
+        val = fgetc(RandomSource);
+      else
+        val = ::rand();
+      RandomPath.push_back("0123456789abcdef"[val & 15]);
+    } else
+      RandomPath.push_back(*i);
+  }
+
+  if (RandomSource)
+    ::fclose(RandomSource);
+
+  // Try to open + create the file.
+rety_open_create:
+  int RandomFD = ::open(RandomPath.c_str(), O_RDWR | O_CREAT | O_EXCL, 0600);
+  if (RandomFD == -1) {
+    // If the file existed, try again, otherwise, error.
+    if (errno == errc::file_exists)
+      goto retry_random_path;
+    // The path prefix doesn't exist.
+    if (errno == errc::no_such_file_or_directory) {
+      StringRef p(RandomPath.begin(), RandomPath.size());
+      SmallString<64> dir_to_create;
+      for (path::const_iterator i = path::begin(p),
+                                e = --path::end(p); i != e; ++i) {
+        path::append(dir_to_create, *i);
+        bool Exists;
+        if (error_code ec = exists(Twine(dir_to_create), Exists)) return ec;
+        if (!Exists) {
+          // Don't try to create network paths.
+          if (i->size() > 2 && (*i)[0] == '/' &&
+                               (*i)[1] == '/' &&
+                               (*i)[2] != '/')
+            return make_error_code(errc::no_such_file_or_directory);
+          if (::mkdir(dir_to_create.c_str(), 0700) == -1)
+            return error_code(errno, system_category());
+        }
+      }
+      goto rety_open_create;
+    }
+    return error_code(errno, system_category());
+  }
+
+   // Make the path absolute.
+  char real_path_buff[PATH_MAX + 1];
+  if (realpath(RandomPath.c_str(), real_path_buff) == NULL) {
+    int error = errno;
+    ::close(RandomFD);
+    ::unlink(RandomPath.c_str());
+    return error_code(error, system_category());
+  }
+
+  result_path.clear();
+  StringRef d(real_path_buff);
+  result_path.append(d.begin(), d.end());
+
+  result_fd = RandomFD;
+  return success;
+}
+
+error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+  SmallString<128> path_null(path);
+  DIR *directory = ::opendir(path_null.c_str());
+  if (directory == 0)
+    return error_code(errno, system_category());
+
+  it.IterationHandle = reinterpret_cast<intptr_t>(directory);
+  // Add something for replace_filename to replace.
+  path::append(path_null, ".");
+  it.CurrentEntry = directory_entry(path_null.str());
+  return directory_iterator_increment(it);
+}
+
+error_code directory_iterator_destruct(directory_iterator& it) {
+  if (it.IterationHandle)
+    ::closedir(reinterpret_cast<DIR *>(it.IterationHandle));
+  it.IterationHandle = 0;
+  it.CurrentEntry = directory_entry();
+  return success;
+}
+
+error_code directory_iterator_increment(directory_iterator& it) {
+  errno = 0;
+  dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle));
+  if (cur_dir == 0 && errno != 0) {
+    return error_code(errno, system_category());
+  } else if (cur_dir != 0) {
+    StringRef name(cur_dir->d_name, NAMLEN(cur_dir));
+    if ((name.size() == 1 && name[0] == '.') ||
+        (name.size() == 2 && name[0] == '.' && name[1] == '.'))
+      return directory_iterator_increment(it);
+    it.CurrentEntry.replace_filename(name);
+  } else
+    return directory_iterator_destruct(it);
+
+  return success;
+}
+
+error_code get_magic(const Twine &path, uint32_t len,
+                     SmallVectorImpl<char> &result) {
+  SmallString<128> PathStorage;
+  StringRef Path = path.toNullTerminatedStringRef(PathStorage);
+  result.set_size(0);
+
+  // Open path.
+  std::FILE *file = std::fopen(Path.data(), "rb");
+  if (file == 0)
+    return error_code(errno, system_category());
+
+  // Reserve storage.
+  result.reserve(len);
+
+  // Read magic!
+  size_t size = std::fread(result.data(), 1, len, file);
+  if (std::ferror(file) != 0) {
+    std::fclose(file);
+    return error_code(errno, system_category());
+  } else if (size != result.size()) {
+    if (std::feof(file) != 0) {
+      std::fclose(file);
+      result.set_size(size);
+      return make_error_code(errc::value_too_large);
+    }
+  }
+  std::fclose(file);
+  result.set_size(len);
+  return success;
+}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
diff --git a/final/lib/Support/Unix/Process.inc b/final/lib/Support/Unix/Process.inc
new file mode 100644
index 00000000000..5cdb11ccebc
--- /dev/null
+++ b/final/lib/Support/Unix/Process.inc
@@ -0,0 +1,295 @@
+//===- Unix/Process.cpp - Unix Process Implementation --------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the generic Unix implementation of the Process class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#endif
+#ifdef HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+// DragonFly BSD has deprecated <malloc.h> for <stdlib.h> instead,
+//  Unix.h includes this for us already.
+#if defined(HAVE_MALLOC_H) && !defined(__DragonFly__)
+#include <malloc.h>
+#endif
+#ifdef HAVE_MALLOC_MALLOC_H
+#include <malloc/malloc.h>
+#endif
+#ifdef HAVE_SYS_IOCTL_H
+#  include <sys/ioctl.h>
+#endif
+#ifdef HAVE_TERMIOS_H
+#  include <termios.h>
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+using namespace sys;
+
+unsigned
+Process::GetPageSize()
+{
+#if defined(__CYGWIN__)
+  // On Cygwin, getpagesize() returns 64k but the page size for the purposes of
+  // memory protection and mmap() is 4k.
+  // See http://www.cygwin.com/ml/cygwin/2009-01/threads.html#00492
+  const int page_size = 0x1000;
+#elif defined(HAVE_GETPAGESIZE)
+  const int page_size = ::getpagesize();
+#elif defined(HAVE_SYSCONF)
+  long page_size = ::sysconf(_SC_PAGE_SIZE);
+#else
+#warning Cannot get the page size on this machine
+#endif
+  return static_cast<unsigned>(page_size);
+}
+
+size_t Process::GetMallocUsage() {
+#if defined(HAVE_MALLINFO)
+  struct mallinfo mi;
+  mi = ::mallinfo();
+  return mi.uordblks;
+#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H)
+  malloc_statistics_t Stats;
+  malloc_zone_statistics(malloc_default_zone(), &Stats);
+  return Stats.size_in_use;   // darwin
+#elif defined(HAVE_SBRK)
+  // Note this is only an approximation and more closely resembles
+  // the value returned by mallinfo in the arena field.
+  static char *StartOfMemory = reinterpret_cast<char*>(::sbrk(0));
+  char *EndOfMemory = (char*)sbrk(0);
+  if (EndOfMemory != ((char*)-1) && StartOfMemory != ((char*)-1))
+    return EndOfMemory - StartOfMemory;
+  else
+    return 0;
+#else
+#warning Cannot get malloc info on this platform
+  return 0;
+#endif
+}
+
+size_t
+Process::GetTotalMemoryUsage()
+{
+#if defined(HAVE_MALLINFO)
+  struct mallinfo mi = ::mallinfo();
+  return mi.uordblks + mi.hblkhd;
+#elif defined(HAVE_MALLOC_ZONE_STATISTICS) && defined(HAVE_MALLOC_MALLOC_H)
+  malloc_statistics_t Stats;
+  malloc_zone_statistics(malloc_default_zone(), &Stats);
+  return Stats.size_allocated;   // darwin
+#elif defined(HAVE_GETRUSAGE) && !defined(__HAIKU__)
+  struct rusage usage;
+  ::getrusage(RUSAGE_SELF, &usage);
+  return usage.ru_maxrss;
+#else
+#warning Cannot get total memory size on this platform
+  return 0;
+#endif
+}
+
+void
+Process::GetTimeUsage(TimeValue& elapsed, TimeValue& user_time,
+                      TimeValue& sys_time)
+{
+  elapsed = TimeValue::now();
+#if defined(HAVE_GETRUSAGE)
+  struct rusage usage;
+  ::getrusage(RUSAGE_SELF, &usage);
+  user_time = TimeValue(
+    static_cast<TimeValue::SecondsType>( usage.ru_utime.tv_sec ),
+    static_cast<TimeValue::NanoSecondsType>( usage.ru_utime.tv_usec *
+      TimeValue::NANOSECONDS_PER_MICROSECOND ) );
+  sys_time = TimeValue(
+    static_cast<TimeValue::SecondsType>( usage.ru_stime.tv_sec ),
+    static_cast<TimeValue::NanoSecondsType>( usage.ru_stime.tv_usec *
+      TimeValue::NANOSECONDS_PER_MICROSECOND ) );
+#else
+#warning Cannot get usage times on this platform
+  user_time.seconds(0);
+  user_time.microseconds(0);
+  sys_time.seconds(0);
+  sys_time.microseconds(0);
+#endif
+}
+
+int Process::GetCurrentUserId() {
+  return getuid();
+}
+
+int Process::GetCurrentGroupId() {
+  return getgid();
+}
+
+#ifdef HAVE_MACH_MACH_H
+#include <mach/mach.h>
+#endif
+
+// Some LLVM programs such as bugpoint produce core files as a normal part of
+// their operation. To prevent the disk from filling up, this function
+// does what's necessary to prevent their generation.
+void Process::PreventCoreFiles() {
+#if HAVE_SETRLIMIT
+  struct rlimit rlim;
+  rlim.rlim_cur = rlim.rlim_max = 0;
+  setrlimit(RLIMIT_CORE, &rlim);
+#endif
+
+#ifdef HAVE_MACH_MACH_H
+  // Disable crash reporting on Mac OS X 10.0-10.4
+
+  // get information about the original set of exception ports for the task
+  mach_msg_type_number_t Count = 0;
+  exception_mask_t OriginalMasks[EXC_TYPES_COUNT];
+  exception_port_t OriginalPorts[EXC_TYPES_COUNT];
+  exception_behavior_t OriginalBehaviors[EXC_TYPES_COUNT];
+  thread_state_flavor_t OriginalFlavors[EXC_TYPES_COUNT];
+  kern_return_t err =
+    task_get_exception_ports(mach_task_self(), EXC_MASK_ALL, OriginalMasks,
+                             &Count, OriginalPorts, OriginalBehaviors,
+                             OriginalFlavors);
+  if (err == KERN_SUCCESS) {
+    // replace each with MACH_PORT_NULL.
+    for (unsigned i = 0; i != Count; ++i)
+      task_set_exception_ports(mach_task_self(), OriginalMasks[i],
+                               MACH_PORT_NULL, OriginalBehaviors[i],
+                               OriginalFlavors[i]);
+  }
+
+  // Disable crash reporting on Mac OS X 10.5
+  signal(SIGABRT, _exit);
+  signal(SIGILL,  _exit);
+  signal(SIGFPE,  _exit);
+  signal(SIGSEGV, _exit);
+  signal(SIGBUS,  _exit);
+#endif
+}
+
+bool Process::StandardInIsUserInput() {
+  return FileDescriptorIsDisplayed(STDIN_FILENO);
+}
+
+bool Process::StandardOutIsDisplayed() {
+  return FileDescriptorIsDisplayed(STDOUT_FILENO);
+}
+
+bool Process::StandardErrIsDisplayed() {
+  return FileDescriptorIsDisplayed(STDERR_FILENO);
+}
+
+bool Process::FileDescriptorIsDisplayed(int fd) {
+#if HAVE_ISATTY
+  return isatty(fd);
+#else
+  // If we don't have isatty, just return false.
+  return false;
+#endif
+}
+
+static unsigned getColumns(int FileID) {
+  // If COLUMNS is defined in the environment, wrap to that many columns.
+  if (const char *ColumnsStr = std::getenv("COLUMNS")) {
+    int Columns = std::atoi(ColumnsStr);
+    if (Columns > 0)
+      return Columns;
+  }
+
+  unsigned Columns = 0;
+
+#if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H)
+  // Try to determine the width of the terminal.
+  struct winsize ws;
+  if (ioctl(FileID, TIOCGWINSZ, &ws) == 0)
+    Columns = ws.ws_col;
+#endif
+
+  return Columns;
+}
+
+unsigned Process::StandardOutColumns() {
+  if (!StandardOutIsDisplayed())
+    return 0;
+
+  return getColumns(1);
+}
+
+unsigned Process::StandardErrColumns() {
+  if (!StandardErrIsDisplayed())
+    return 0;
+
+  return getColumns(2);
+}
+
+static bool terminalHasColors() {
+  if (const char *term = std::getenv("TERM")) {
+    // Most modern terminals support ANSI escape sequences for colors.
+    // We could check terminfo, or have a list of known terms that support
+    // colors, but that would be overkill.
+    // The user can always ask for no colors by setting TERM to dumb, or
+    // using a commandline flag.
+    return strcmp(term, "dumb") != 0;
+  }
+  return false;
+}
+
+bool Process::StandardOutHasColors() {
+  if (!StandardOutIsDisplayed())
+    return false;
+  return terminalHasColors();
+}
+
+bool Process::StandardErrHasColors() {
+  if (!StandardErrIsDisplayed())
+    return false;
+  return terminalHasColors();
+}
+
+bool Process::ColorNeedsFlush() {
+  // No, we use ANSI escape sequences.
+  return false;
+}
+
+#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m"
+
+#define ALLCOLORS(FGBG,BOLD) {\
+    COLOR(FGBG, "0", BOLD),\
+    COLOR(FGBG, "1", BOLD),\
+    COLOR(FGBG, "2", BOLD),\
+    COLOR(FGBG, "3", BOLD),\
+    COLOR(FGBG, "4", BOLD),\
+    COLOR(FGBG, "5", BOLD),\
+    COLOR(FGBG, "6", BOLD),\
+    COLOR(FGBG, "7", BOLD)\
+  }
+
+static const char colorcodes[2][2][8][10] = {
+ { ALLCOLORS("3",""), ALLCOLORS("3","1;") },
+ { ALLCOLORS("4",""), ALLCOLORS("4","1;") }
+};
+
+const char *Process::OutputColor(char code, bool bold, bool bg) {
+  return colorcodes[bg?1:0][bold?1:0][code&7];
+}
+
+const char *Process::OutputBold(bool bg) {
+  return "\033[1m";
+}
+
+const char *Process::ResetColor() {
+  return "\033[0m";
+}
diff --git a/final/lib/Support/Unix/Program.inc b/final/lib/Support/Unix/Program.inc
new file mode 100644
index 00000000000..1104bc7503e
--- /dev/null
+++ b/final/lib/Support/Unix/Program.inc
@@ -0,0 +1,424 @@
+//===- llvm/Support/Unix/Program.cpp -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the Program class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include <llvm/Config/config.h>
+#include "llvm/Support/FileSystem.h"
+#include "Unix.h"
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_SYS_RESOURCE_H
+#include <sys/resource.h>
+#endif
+#if HAVE_SIGNAL_H
+#include <signal.h>
+#endif
+#if HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_POSIX_SPAWN
+#include <spawn.h>
+#if !defined(__APPLE__)
+  extern char **environ;
+#else
+#include <crt_externs.h> // _NSGetEnviron
+#endif
+#endif
+
+namespace llvm {
+using namespace sys;
+
+Program::Program() : Data_(0) {}
+
+Program::~Program() {}
+
+unsigned Program::GetPid() const {
+  uint64_t pid = reinterpret_cast<uint64_t>(Data_);
+  return static_cast<unsigned>(pid);
+}
+
+// This function just uses the PATH environment variable to find the program.
+Path
+Program::FindProgramByName(const std::string& progName) {
+
+  // Check some degenerate cases
+  if (progName.length() == 0) // no program
+    return Path();
+  Path temp;
+  if (!temp.set(progName)) // invalid name
+    return Path();
+  // Use the given path verbatim if it contains any slashes; this matches
+  // the behavior of sh(1) and friends.
+  if (progName.find('/') != std::string::npos)
+    return temp;
+
+  // At this point, the file name is valid and does not contain slashes. Search
+  // for it through the directories specified in the PATH environment variable.
+
+  // Get the path. If its empty, we can't do anything to find it.
+  const char *PathStr = getenv("PATH");
+  if (PathStr == 0)
+    return Path();
+
+  // Now we have a colon separated list of directories to search; try them.
+  size_t PathLen = strlen(PathStr);
+  while (PathLen) {
+    // Find the first colon...
+    const char *Colon = std::find(PathStr, PathStr+PathLen, ':');
+
+    // Check to see if this first directory contains the executable...
+    Path FilePath;
+    if (FilePath.set(std::string(PathStr,Colon))) {
+      FilePath.appendComponent(progName);
+      if (FilePath.canExecute())
+        return FilePath;                    // Found the executable!
+    }
+
+    // Nope it wasn't in this directory, check the next path in the list!
+    PathLen -= Colon-PathStr;
+    PathStr = Colon;
+
+    // Advance past duplicate colons
+    while (*PathStr == ':') {
+      PathStr++;
+      PathLen--;
+    }
+  }
+  return Path();
+}
+
+static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
+  if (Path == 0) // Noop
+    return false;
+  const char *File;
+  if (Path->isEmpty())
+    // Redirect empty paths to /dev/null
+    File = "/dev/null";
+  else
+    File = Path->c_str();
+
+  // Open the file
+  int InFD = open(File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666);
+  if (InFD == -1) {
+    MakeErrMsg(ErrMsg, "Cannot open file '" + std::string(File) + "' for "
+              + (FD == 0 ? "input" : "output"));
+    return true;
+  }
+
+  // Install it as the requested FD
+  if (dup2(InFD, FD) == -1) {
+    MakeErrMsg(ErrMsg, "Cannot dup2");
+    close(InFD);
+    return true;
+  }
+  close(InFD);      // Close the original FD
+  return false;
+}
+
+#ifdef HAVE_POSIX_SPAWN
+static bool RedirectIO_PS(const Path *Path, int FD, std::string *ErrMsg,
+                          posix_spawn_file_actions_t &FileActions) {
+  if (Path == 0) // Noop
+    return false;
+  const char *File;
+  if (Path->isEmpty())
+    // Redirect empty paths to /dev/null
+    File = "/dev/null";
+  else
+    File = Path->c_str();
+
+  if (int Err = posix_spawn_file_actions_addopen(&FileActions, FD,
+                            File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666))
+    return MakeErrMsg(ErrMsg, "Cannot dup2", Err);
+  return false;
+}
+#endif
+
+static void TimeOutHandler(int Sig) {
+}
+
+static void SetMemoryLimits (unsigned size)
+{
+#if HAVE_SYS_RESOURCE_H && HAVE_GETRLIMIT && HAVE_SETRLIMIT
+  struct rlimit r;
+  __typeof__ (r.rlim_cur) limit = (__typeof__ (r.rlim_cur)) (size) * 1048576;
+
+  // Heap size
+  getrlimit (RLIMIT_DATA, &r);
+  r.rlim_cur = limit;
+  setrlimit (RLIMIT_DATA, &r);
+#ifdef RLIMIT_RSS
+  // Resident set size.
+  getrlimit (RLIMIT_RSS, &r);
+  r.rlim_cur = limit;
+  setrlimit (RLIMIT_RSS, &r);
+#endif
+#ifdef RLIMIT_AS  // e.g. NetBSD doesn't have it.
+  // Virtual memory.
+  getrlimit (RLIMIT_AS, &r);
+  r.rlim_cur = limit;
+  setrlimit (RLIMIT_AS, &r);
+#endif
+#endif
+}
+
+bool
+Program::Execute(const Path &path, const char **args, const char **envp,
+                 const Path **redirects, unsigned memoryLimit,
+                  std::string *ErrMsg) {
+  // If this OS has posix_spawn and there is no memory limit being implied, use
+  // posix_spawn.  It is more efficient than fork/exec.
+#ifdef HAVE_POSIX_SPAWN
+  if (memoryLimit == 0) {
+    posix_spawn_file_actions_t FileActions;
+    posix_spawn_file_actions_init(&FileActions);
+
+    if (redirects) {
+      // Redirect stdin/stdout.
+      if (RedirectIO_PS(redirects[0], 0, ErrMsg, FileActions) ||
+          RedirectIO_PS(redirects[1], 1, ErrMsg, FileActions))
+        return false;
+      if (redirects[1] == 0 || redirects[2] == 0 ||
+          *redirects[1] != *redirects[2]) {
+        // Just redirect stderr
+        if (RedirectIO_PS(redirects[2], 2, ErrMsg, FileActions)) return false;
+      } else {
+        // If stdout and stderr should go to the same place, redirect stderr
+        // to the FD already open for stdout.
+        if (int Err = posix_spawn_file_actions_adddup2(&FileActions, 1, 2))
+          return !MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout", Err);
+      }
+    }
+
+    if (!envp)
+#if !defined(__APPLE__)
+      envp = const_cast<const char **>(environ);
+#else
+      // environ is missing in dylibs.
+      envp = const_cast<const char **>(*_NSGetEnviron());
+#endif
+
+    // Explicitly initialized to prevent what appears to be a valgrind false
+    // positive.
+    pid_t PID = 0;
+    int Err = posix_spawn(&PID, path.c_str(), &FileActions, /*attrp*/0,
+                          const_cast<char **>(args), const_cast<char **>(envp));
+
+    posix_spawn_file_actions_destroy(&FileActions);
+
+    if (Err)
+     return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err);
+
+    Data_ = reinterpret_cast<void*>(PID);
+    return true;
+  }
+#endif
+
+  // Create a child process.
+  int child = fork();
+  switch (child) {
+    // An error occured:  Return to the caller.
+    case -1:
+      MakeErrMsg(ErrMsg, "Couldn't fork");
+      return false;
+
+    // Child process: Execute the program.
+    case 0: {
+      // Redirect file descriptors...
+      if (redirects) {
+        // Redirect stdin
+        if (RedirectIO(redirects[0], 0, ErrMsg)) { return false; }
+        // Redirect stdout
+        if (RedirectIO(redirects[1], 1, ErrMsg)) { return false; }
+        if (redirects[1] && redirects[2] &&
+            *(redirects[1]) == *(redirects[2])) {
+          // If stdout and stderr should go to the same place, redirect stderr
+          // to the FD already open for stdout.
+          if (-1 == dup2(1,2)) {
+            MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout");
+            return false;
+          }
+        } else {
+          // Just redirect stderr
+          if (RedirectIO(redirects[2], 2, ErrMsg)) { return false; }
+        }
+      }
+
+      // Set memory limits
+      if (memoryLimit!=0) {
+        SetMemoryLimits(memoryLimit);
+      }
+
+      // Execute!
+      if (envp != 0)
+        execve(path.c_str(),
+               const_cast<char **>(args),
+               const_cast<char **>(envp));
+      else
+        execv(path.c_str(),
+              const_cast<char **>(args));
+      // If the execve() failed, we should exit. Follow Unix protocol and
+      // return 127 if the executable was not found, and 126 otherwise.
+      // Use _exit rather than exit so that atexit functions and static
+      // object destructors cloned from the parent process aren't
+      // redundantly run, and so that any data buffered in stdio buffers
+      // cloned from the parent aren't redundantly written out.
+      _exit(errno == ENOENT ? 127 : 126);
+    }
+
+    // Parent process: Break out of the switch to do our processing.
+    default:
+      break;
+  }
+
+  Data_ = reinterpret_cast<void*>(child);
+
+  return true;
+}
+
+int
+Program::Wait(const sys::Path &path,
+              unsigned secondsToWait,
+              std::string* ErrMsg)
+{
+#ifdef HAVE_SYS_WAIT_H
+  struct sigaction Act, Old;
+
+  if (Data_ == 0) {
+    MakeErrMsg(ErrMsg, "Process not started!");
+    return -1;
+  }
+
+  // Install a timeout handler.  The handler itself does nothing, but the simple
+  // fact of having a handler at all causes the wait below to return with EINTR,
+  // unlike if we used SIG_IGN.
+  if (secondsToWait) {
+    memset(&Act, 0, sizeof(Act));
+    Act.sa_handler = TimeOutHandler;
+    sigemptyset(&Act.sa_mask);
+    sigaction(SIGALRM, &Act, &Old);
+    alarm(secondsToWait);
+  }
+
+  // Parent process: Wait for the child process to terminate.
+  int status;
+  uint64_t pid = reinterpret_cast<uint64_t>(Data_);
+  pid_t child = static_cast<pid_t>(pid);
+  while (waitpid(pid, &status, 0) != child)
+    if (secondsToWait && errno == EINTR) {
+      // Kill the child.
+      kill(child, SIGKILL);
+
+      // Turn off the alarm and restore the signal handler
+      alarm(0);
+      sigaction(SIGALRM, &Old, 0);
+
+      // Wait for child to die
+      if (wait(&status) != child)
+        MakeErrMsg(ErrMsg, "Child timed out but wouldn't die");
+      else
+        MakeErrMsg(ErrMsg, "Child timed out", 0);
+
+      return -1;   // Timeout detected
+    } else if (errno != EINTR) {
+      MakeErrMsg(ErrMsg, "Error waiting for child process");
+      return -1;
+    }
+
+  // We exited normally without timeout, so turn off the timer.
+  if (secondsToWait) {
+    alarm(0);
+    sigaction(SIGALRM, &Old, 0);
+  }
+
+  // Return the proper exit status. Detect error conditions
+  // so we can return -1 for them and set ErrMsg informatively.
+  int result = 0;
+  if (WIFEXITED(status)) {
+    result = WEXITSTATUS(status);
+#ifdef HAVE_POSIX_SPAWN
+    // The posix_spawn child process returns 127 on any kind of error.
+    // Following the POSIX convention for command-line tools (which posix_spawn
+    // itself apparently does not), check to see if the failure was due to some
+    // reason other than the file not existing, and return 126 in this case.
+    bool Exists;
+    if (result == 127 && !llvm::sys::fs::exists(path.str(), Exists) && Exists)
+      result = 126;
+#endif
+    if (result == 127) {
+      if (ErrMsg)
+        *ErrMsg = llvm::sys::StrError(ENOENT);
+      return -1;
+    }
+    if (result == 126) {
+      if (ErrMsg)
+        *ErrMsg = "Program could not be executed";
+      return -1;
+    }
+  } else if (WIFSIGNALED(status)) {
+    if (ErrMsg) {
+      *ErrMsg = strsignal(WTERMSIG(status));
+#ifdef WCOREDUMP
+      if (WCOREDUMP(status))
+        *ErrMsg += " (core dumped)";
+#endif
+    }
+    return -1;
+  }
+  return result;
+#else
+  if (ErrMsg)
+    *ErrMsg = "Program::Wait is not implemented on this platform yet!";
+  return -1;
+#endif
+}
+
+bool
+Program::Kill(std::string* ErrMsg) {
+  if (Data_ == 0) {
+    MakeErrMsg(ErrMsg, "Process not started!");
+    return true;
+  }
+
+  uint64_t pid64 = reinterpret_cast<uint64_t>(Data_);
+  pid_t pid = static_cast<pid_t>(pid64);
+
+  if (kill(pid, SIGKILL) != 0) {
+    MakeErrMsg(ErrMsg, "The process couldn't be killed!");
+    return true;
+  }
+
+  return false;
+}
+
+bool Program::ChangeStdinToBinary(){
+  // Do nothing, as Unix doesn't differentiate between text and binary.
+  return false;
+}
+
+bool Program::ChangeStdoutToBinary(){
+  // Do nothing, as Unix doesn't differentiate between text and binary.
+  return false;
+}
+
+bool Program::ChangeStderrToBinary(){
+  // Do nothing, as Unix doesn't differentiate between text and binary.
+  return false;
+}
+
+}
diff --git a/final/lib/Support/Unix/README.txt b/final/lib/Support/Unix/README.txt
new file mode 100644
index 00000000000..3d547c2990d
--- /dev/null
+++ b/final/lib/Support/Unix/README.txt
@@ -0,0 +1,16 @@
+llvm/lib/Support/Unix README
+===========================
+
+This directory provides implementations of the lib/System classes that
+are common to two or more variants of UNIX. For example, the directory
+structure underneath this directory could look like this:
+
+Unix           - only code that is truly generic to all UNIX platforms
+  Posix        - code that is specific to Posix variants of UNIX
+  SUS          - code that is specific to the Single Unix Specification
+  SysV         - code that is specific to System V variants of UNIX
+
+As a rule, only those directories actually needing to be created should be
+created. Also, further subdirectories could be created to reflect versions of
+the various standards. For example, under SUS there could be v1, v2, and v3
+subdirectories to reflect the three major versions of SUS.
diff --git a/final/lib/Support/Unix/RWMutex.inc b/final/lib/Support/Unix/RWMutex.inc
new file mode 100644
index 00000000000..40e87ff1311
--- /dev/null
+++ b/final/lib/Support/Unix/RWMutex.inc
@@ -0,0 +1,43 @@
+//= llvm/Support/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock  =//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+using namespace sys;
+
+RWMutexImpl::RWMutexImpl() { }
+
+RWMutexImpl::~RWMutexImpl() { }
+
+bool RWMutexImpl::reader_acquire() {
+  return true;
+}
+
+bool RWMutexImpl::reader_release() {
+  return true;
+}
+
+bool RWMutexImpl::writer_acquire() {
+  return true;
+}
+
+bool RWMutexImpl::writer_release() {
+  return true;
+}
+
+}
diff --git a/final/lib/Support/Unix/Signals.inc b/final/lib/Support/Unix/Signals.inc
new file mode 100644
index 00000000000..0a617591551
--- /dev/null
+++ b/final/lib/Support/Unix/Signals.inc
@@ -0,0 +1,303 @@
+//===- Signals.cpp - Generic Unix Signals Implementation -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines some helpful functions for dealing with the possibility of
+// Unix signals occuring while your program is running.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Mutex.h"
+#include <vector>
+#include <algorithm>
+#if HAVE_EXECINFO_H
+# include <execinfo.h>         // For backtrace().
+#endif
+#if HAVE_SIGNAL_H
+#include <signal.h>
+#endif
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#if HAVE_DLFCN_H && __GNUG__
+#include <dlfcn.h>
+#include <cxxabi.h>
+#endif
+using namespace llvm;
+
+static RETSIGTYPE SignalHandler(int Sig);  // defined below.
+
+static SmartMutex<true> SignalsMutex;
+
+/// InterruptFunction - The function to call if ctrl-c is pressed.
+static void (*InterruptFunction)() = 0;
+
+static std::vector<sys::Path> FilesToRemove;
+static std::vector<std::pair<void(*)(void*), void*> > CallBacksToRun;
+
+// IntSigs - Signals that may interrupt the program at any time.
+static const int IntSigs[] = {
+  SIGHUP, SIGINT, SIGQUIT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2
+};
+static const int *const IntSigsEnd =
+  IntSigs + sizeof(IntSigs) / sizeof(IntSigs[0]);
+
+// KillSigs - Signals that are synchronous with the program that will cause it
+// to die.
+static const int KillSigs[] = {
+  SIGILL, SIGTRAP, SIGABRT, SIGFPE, SIGBUS, SIGSEGV
+#ifdef SIGSYS
+  , SIGSYS
+#endif
+#ifdef SIGXCPU
+  , SIGXCPU
+#endif
+#ifdef SIGXFSZ
+  , SIGXFSZ
+#endif
+#ifdef SIGEMT
+  , SIGEMT
+#endif
+};
+static const int *const KillSigsEnd =
+  KillSigs + sizeof(KillSigs) / sizeof(KillSigs[0]);
+
+static unsigned NumRegisteredSignals = 0;
+static struct {
+  struct sigaction SA;
+  int SigNo;
+} RegisteredSignalInfo[(sizeof(IntSigs)+sizeof(KillSigs))/sizeof(KillSigs[0])];
+
+
+static void RegisterHandler(int Signal) {
+  assert(NumRegisteredSignals <
+         sizeof(RegisteredSignalInfo)/sizeof(RegisteredSignalInfo[0]) &&
+         "Out of space for signal handlers!");
+
+  struct sigaction NewHandler;
+
+  NewHandler.sa_handler = SignalHandler;
+  NewHandler.sa_flags = SA_NODEFER|SA_RESETHAND;
+  sigemptyset(&NewHandler.sa_mask);
+
+  // Install the new handler, save the old one in RegisteredSignalInfo.
+  sigaction(Signal, &NewHandler,
+            &RegisteredSignalInfo[NumRegisteredSignals].SA);
+  RegisteredSignalInfo[NumRegisteredSignals].SigNo = Signal;
+  ++NumRegisteredSignals;
+}
+
+static void RegisterHandlers() {
+  // If the handlers are already registered, we're done.
+  if (NumRegisteredSignals != 0) return;
+
+  std::for_each(IntSigs, IntSigsEnd, RegisterHandler);
+  std::for_each(KillSigs, KillSigsEnd, RegisterHandler);
+}
+
+static void UnregisterHandlers() {
+  // Restore all of the signal handlers to how they were before we showed up.
+  for (unsigned i = 0, e = NumRegisteredSignals; i != e; ++i)
+    sigaction(RegisteredSignalInfo[i].SigNo,
+              &RegisteredSignalInfo[i].SA, 0);
+  NumRegisteredSignals = 0;
+}
+
+
+/// RemoveFilesToRemove - Process the FilesToRemove list. This function
+/// should be called with the SignalsMutex lock held.
+static void RemoveFilesToRemove() {
+  while (!FilesToRemove.empty()) {
+    FilesToRemove.back().eraseFromDisk(true);
+    FilesToRemove.pop_back();
+  }
+}
+
+// SignalHandler - The signal handler that runs.
+static RETSIGTYPE SignalHandler(int Sig) {
+  // Restore the signal behavior to default, so that the program actually
+  // crashes when we return and the signal reissues.  This also ensures that if
+  // we crash in our signal handler that the program will terminate immediately
+  // instead of recursing in the signal handler.
+  UnregisterHandlers();
+
+  // Unmask all potentially blocked kill signals.
+  sigset_t SigMask;
+  sigfillset(&SigMask);
+  sigprocmask(SIG_UNBLOCK, &SigMask, 0);
+
+  SignalsMutex.acquire();
+  RemoveFilesToRemove();
+
+  if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) {
+    if (InterruptFunction) {
+      void (*IF)() = InterruptFunction;
+      SignalsMutex.release();
+      InterruptFunction = 0;
+      IF();        // run the interrupt function.
+      return;
+    }
+
+    SignalsMutex.release();
+    raise(Sig);   // Execute the default handler.
+    return;
+  }
+
+  SignalsMutex.release();
+
+  // Otherwise if it is a fault (like SEGV) run any handler.
+  for (unsigned i = 0, e = CallBacksToRun.size(); i != e; ++i)
+    CallBacksToRun[i].first(CallBacksToRun[i].second);
+}
+
+void llvm::sys::RunInterruptHandlers() {
+  SignalsMutex.acquire();
+  RemoveFilesToRemove();
+  SignalsMutex.release();
+}
+
+void llvm::sys::SetInterruptFunction(void (*IF)()) {
+  SignalsMutex.acquire();
+  InterruptFunction = IF;
+  SignalsMutex.release();
+  RegisterHandlers();
+}
+
+// RemoveFileOnSignal - The public API
+bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename,
+                                   std::string* ErrMsg) {
+  SignalsMutex.acquire();
+  FilesToRemove.push_back(Filename);
+
+  SignalsMutex.release();
+
+  RegisterHandlers();
+  return false;
+}
+
+// DontRemoveFileOnSignal - The public API
+void llvm::sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
+  SignalsMutex.acquire();
+  std::vector<sys::Path>::reverse_iterator I =
+    std::find(FilesToRemove.rbegin(), FilesToRemove.rend(), Filename);
+  if (I != FilesToRemove.rend())
+    FilesToRemove.erase(I.base()-1);
+  SignalsMutex.release();
+}
+
+/// AddSignalHandler - Add a function to be called when a signal is delivered
+/// to the process.  The handler can have a cookie passed to it to identify
+/// what instance of the handler it is.
+void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
+  CallBacksToRun.push_back(std::make_pair(FnPtr, Cookie));
+  RegisterHandlers();
+}
+
+
+// PrintStackTrace - In the case of a program crash or fault, print out a stack
+// trace so that the user has an indication of why and where we died.
+//
+// On glibc systems we have the 'backtrace' function, which works nicely, but
+// doesn't demangle symbols.
+static void PrintStackTrace(void *) {
+#ifdef HAVE_BACKTRACE
+  static void* StackTrace[256];
+  // Use backtrace() to output a backtrace on Linux systems with glibc.
+  int depth = backtrace(StackTrace,
+                        static_cast<int>(array_lengthof(StackTrace)));
+#if HAVE_DLFCN_H && __GNUG__
+  int width = 0;
+  for (int i = 0; i < depth; ++i) {
+    Dl_info dlinfo;
+    dladdr(StackTrace[i], &dlinfo);
+    const char* name = strrchr(dlinfo.dli_fname, '/');
+
+    int nwidth;
+    if (name == NULL) nwidth = strlen(dlinfo.dli_fname);
+    else              nwidth = strlen(name) - 1;
+
+    if (nwidth > width) width = nwidth;
+  }
+
+  for (int i = 0; i < depth; ++i) {
+    Dl_info dlinfo;
+    dladdr(StackTrace[i], &dlinfo);
+
+    fprintf(stderr, "%-2d", i);
+
+    const char* name = strrchr(dlinfo.dli_fname, '/');
+    if (name == NULL) fprintf(stderr, " %-*s", width, dlinfo.dli_fname);
+    else              fprintf(stderr, " %-*s", width, name+1);
+
+    fprintf(stderr, " %#0*lx",
+            (int)(sizeof(void*) * 2) + 2, (unsigned long)StackTrace[i]);
+
+    if (dlinfo.dli_sname != NULL) {
+      int res;
+      fputc(' ', stderr);
+      char* d = abi::__cxa_demangle(dlinfo.dli_sname, NULL, NULL, &res);
+      if (d == NULL) fputs(dlinfo.dli_sname, stderr);
+      else           fputs(d, stderr);
+      free(d);
+
+      fprintf(stderr, " + %tu",(char*)StackTrace[i]-(char*)dlinfo.dli_saddr);
+    }
+    fputc('\n', stderr);
+  }
+#else
+  backtrace_symbols_fd(StackTrace, depth, STDERR_FILENO);
+#endif
+#endif
+}
+
+/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
+/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
+void llvm::sys::PrintStackTraceOnErrorSignal() {
+  AddSignalHandler(PrintStackTrace, 0);
+}
+
+
+/***/
+
+// On Darwin, raise sends a signal to the main thread instead of the current
+// thread. This has the unfortunate effect that assert() and abort() will end up
+// bypassing our crash recovery attempts. We work around this for anything in
+// the same linkage unit by just defining our own versions of the assert handler
+// and abort.
+
+#ifdef __APPLE__
+
+int raise(int sig) {
+  return pthread_kill(pthread_self(), sig);
+}
+
+void __assert_rtn(const char *func,
+                  const char *file,
+                  int line,
+                  const char *expr) {
+  if (func)
+    fprintf(stderr, "Assertion failed: (%s), function %s, file %s, line %d.\n",
+            expr, func, file, line);
+  else
+    fprintf(stderr, "Assertion failed: (%s), file %s, line %d.\n",
+            expr, file, line);
+  abort();
+}
+
+#include <signal.h>
+#include <pthread.h>
+
+void abort() {
+  raise(SIGABRT);
+  usleep(1000);
+  __builtin_trap();
+}
+
+#endif
diff --git a/final/lib/Support/Unix/ThreadLocal.inc b/final/lib/Support/Unix/ThreadLocal.inc
new file mode 100644
index 00000000000..2b4c9017cd9
--- /dev/null
+++ b/final/lib/Support/Unix/ThreadLocal.inc
@@ -0,0 +1,26 @@
+//=== llvm/Support/Unix/ThreadLocal.inc - Unix Thread Local Data -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+using namespace sys;
+ThreadLocalImpl::ThreadLocalImpl() { }
+ThreadLocalImpl::~ThreadLocalImpl() { }
+void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
+const void* ThreadLocalImpl::getInstance() { return data; }
+void ThreadLocalImpl::removeInstance() { setInstance(0); }
+}
diff --git a/final/lib/Support/Unix/TimeValue.inc b/final/lib/Support/Unix/TimeValue.inc
new file mode 100644
index 00000000000..5cf5a9d44ed
--- /dev/null
+++ b/final/lib/Support/Unix/TimeValue.inc
@@ -0,0 +1,56 @@
+//===- Unix/TimeValue.cpp - Unix TimeValue Implementation -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific portion of the TimeValue class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "Unix.h"
+
+namespace llvm {
+  using namespace sys;
+
+std::string TimeValue::str() const {
+  char buffer[32];
+
+  time_t ourTime = time_t(this->toEpochTime());
+#ifdef __hpux
+// note that the following line needs -D_REENTRANT on HP-UX to be picked up
+  asctime_r(localtime(&ourTime), buffer);
+#else
+  ::asctime_r(::localtime(&ourTime), buffer);
+#endif
+
+  std::string result(buffer);
+  return result.substr(0,24);
+}
+
+TimeValue TimeValue::now() {
+  struct timeval the_time;
+  timerclear(&the_time);
+  if (0 != ::gettimeofday(&the_time,0)) {
+    // This is *really* unlikely to occur because the only gettimeofday
+    // errors concern the timezone parameter which we're passing in as 0.
+    // In the unlikely case it does happen, just return MinTime, no error
+    // message needed.
+    return MinTime;
+  }
+
+  return TimeValue(
+    static_cast<TimeValue::SecondsType>( the_time.tv_sec + PosixZeroTime.seconds_ ),
+    static_cast<TimeValue::NanoSecondsType>( the_time.tv_usec *
+      NANOSECONDS_PER_MICROSECOND ) );
+}
+
+}
diff --git a/final/lib/Support/Unix/Unix.h b/final/lib/Support/Unix/Unix.h
new file mode 100644
index 00000000000..b7be3111d43
--- /dev/null
+++ b/final/lib/Support/Unix/Unix.h
@@ -0,0 +1,87 @@
+//===- llvm/Support/Unix/Unix.h - Common Unix Include File -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines things specific to Unix implementations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_UNIX_UNIX_H
+#define LLVM_SYSTEM_UNIX_UNIX_H
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on all UNIX variants.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"     // Get autoconf configuration settings
+#include "llvm/Support/Errno.h"
+#include <cstdlib>
+#include <cstdio>
+#include <cstring>
+#include <cerrno>
+#include <string>
+#include <algorithm>
+
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#ifdef HAVE_SYS_TYPES_H
+#include <sys/types.h>
+#endif
+
+#ifdef HAVE_SYS_PARAM_H
+#include <sys/param.h>
+#endif
+
+#ifdef HAVE_ASSERT_H
+#include <assert.h>
+#endif
+
+#ifdef TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+#else
+# ifdef HAVE_SYS_TIME_H
+#  include <sys/time.h>
+# else
+#  include <time.h>
+# endif
+#endif
+
+#ifdef HAVE_SYS_WAIT_H
+# include <sys/wait.h>
+#endif
+
+#ifndef WEXITSTATUS
+# define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8)
+#endif
+
+#ifndef WIFEXITED
+# define WIFEXITED(stat_val) (((stat_val) & 255) == 0)
+#endif
+
+/// This function builds an error message into \p ErrMsg using the \p prefix
+/// string and the Unix error number given by \p errnum. If errnum is -1, the
+/// default then the value of errno is used.
+/// @brief Make an error message
+///
+/// If the error number can be converted to a string, it will be
+/// separated from prefix by ": ".
+static inline bool MakeErrMsg(
+  std::string* ErrMsg, const std::string& prefix, int errnum = -1) {
+  if (!ErrMsg)
+    return true;
+  if (errnum == -1)
+    errnum = errno;
+  *ErrMsg = prefix + ": " + llvm::sys::StrError(errnum);
+  return true;
+}
+
+#endif
diff --git a/final/lib/Support/Unix/system_error.inc b/final/lib/Support/Unix/system_error.inc
new file mode 100644
index 00000000000..681e919edb4
--- /dev/null
+++ b/final/lib/Support/Unix/system_error.inc
@@ -0,0 +1,34 @@
+//===- llvm/Support/Unix/system_error.inc - Unix error_code ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Unix specific implementation of the error_code
+// and error_condition classes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+
+std::string
+_system_error_category::message(int ev) const {
+  return _do_message::message(ev);
+}
+
+error_condition
+_system_error_category::default_error_condition(int ev) const {
+#ifdef ELAST
+  if (ev > ELAST)
+    return error_condition(ev, system_category());
+#endif  // ELAST
+  return error_condition(ev, generic_category());
+}
diff --git a/final/lib/Support/Valgrind.cpp b/final/lib/Support/Valgrind.cpp
new file mode 100644
index 00000000000..703448524ed
--- /dev/null
+++ b/final/lib/Support/Valgrind.cpp
@@ -0,0 +1,54 @@
+//===-- Valgrind.cpp - Implement Valgrind communication ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Defines Valgrind communication methods, if HAVE_VALGRIND_VALGRIND_H is
+//  defined.  If we have valgrind.h but valgrind isn't running, its macros are
+//  no-ops.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Valgrind.h"
+#include "llvm/Config/config.h"
+
+#if HAVE_VALGRIND_VALGRIND_H
+#include <valgrind/valgrind.h>
+
+static bool InitNotUnderValgrind() {
+  return !RUNNING_ON_VALGRIND;
+}
+
+// This bool is negated from what we'd expect because code may run before it
+// gets initialized.  If that happens, it will appear to be 0 (false), and we
+// want that to cause the rest of the code in this file to run the
+// Valgrind-provided macros.
+static const bool NotUnderValgrind = InitNotUnderValgrind();
+
+bool llvm::sys::RunningOnValgrind() {
+  if (NotUnderValgrind)
+    return false;
+  return RUNNING_ON_VALGRIND;
+}
+
+void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
+  if (NotUnderValgrind)
+    return;
+
+  VALGRIND_DISCARD_TRANSLATIONS(Addr, Len);
+}
+
+#else  // !HAVE_VALGRIND_VALGRIND_H
+
+bool llvm::sys::RunningOnValgrind() {
+  return false;
+}
+
+void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) {
+}
+
+#endif  // !HAVE_VALGRIND_VALGRIND_H
diff --git a/final/lib/Support/Windows/DynamicLibrary.inc b/final/lib/Support/Windows/DynamicLibrary.inc
new file mode 100644
index 00000000000..2c14366c076
--- /dev/null
+++ b/final/lib/Support/Windows/DynamicLibrary.inc
@@ -0,0 +1,166 @@
+//===- Win32/DynamicLibrary.cpp - Win32 DL Implementation -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of DynamicLibrary.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+
+#ifdef __MINGW32__
+ #include <imagehlp.h>
+#else
+ #include <dbghelp.h>
+#endif
+
+#ifdef _MSC_VER
+ #include <ntverp.h>
+#endif
+
+#ifdef __MINGW32__
+ #if (HAVE_LIBIMAGEHLP != 1)
+  #error "libimagehlp.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "dbghelp.lib")
+#endif
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//===          and must not be UNIX code.
+//===----------------------------------------------------------------------===//
+
+static std::vector<HMODULE> OpenedHandles;
+
+#ifdef _WIN64
+  typedef DWORD64 ModuleBaseType;
+#else
+  typedef ULONG ModuleBaseType;
+#endif
+
+extern "C" {
+// Use old callback if:
+//  - Not using Visual Studio
+//  - Visual Studio 2005 or earlier but only if we are not using the Windows SDK
+//    or Windows SDK version is older than 6.0
+// Use new callback if:
+//  - Newer Visual Studio (comes with newer SDK).
+//  - Visual Studio 2005 with Windows SDK 6.0+
+#if defined(_MSC_VER)
+  #if _MSC_VER < 1500 && (!defined(VER_PRODUCTBUILD) || VER_PRODUCTBUILD < 6000)
+    #define OLD_ELM_CALLBACK_DECL 1
+  #endif
+#elif defined(__MINGW64__)
+  // Use new callback.
+#elif defined(__MINGW32__)
+  #define OLD_ELM_CALLBACK_DECL 1
+#endif
+
+#ifdef OLD_ELM_CALLBACK_DECL
+  static BOOL CALLBACK ELM_Callback(PSTR  ModuleName,
+                                    ModuleBaseType ModuleBase,
+                                    ULONG ModuleSize,
+                                    PVOID UserContext)
+#else
+  static BOOL CALLBACK ELM_Callback(PCSTR  ModuleName,
+                                    ModuleBaseType ModuleBase,
+                                    ULONG ModuleSize,
+                                    PVOID UserContext)
+#endif
+  {
+    // Ignore VC++ runtimes prior to 7.1.  Somehow some of them get loaded
+    // into the process.
+    if (stricmp(ModuleName, "msvci70") != 0 &&
+        stricmp(ModuleName, "msvcirt") != 0 &&
+        stricmp(ModuleName, "msvcp50") != 0 &&
+        stricmp(ModuleName, "msvcp60") != 0 &&
+        stricmp(ModuleName, "msvcp70") != 0 &&
+        stricmp(ModuleName, "msvcr70") != 0 &&
+#ifndef __MINGW32__
+        // Mingw32 uses msvcrt.dll by default. Don't ignore it.
+        // Otherwise, user should be aware, what he's doing :)
+        stricmp(ModuleName, "msvcrt") != 0 &&
+#endif
+        stricmp(ModuleName, "msvcrt20") != 0 &&
+        stricmp(ModuleName, "msvcrt40") != 0) {
+      OpenedHandles.push_back((HMODULE)ModuleBase);
+    }
+    return TRUE;
+  }
+}
+
+bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
+                                            std::string *ErrMsg) {
+  if (filename) {
+    HMODULE a_handle = LoadLibrary(filename);
+
+    if (a_handle == 0)
+      return MakeErrMsg(ErrMsg, std::string(filename) + ": Can't open : ");
+
+    OpenedHandles.push_back(a_handle);
+  } else {
+    // When no file is specified, enumerate all DLLs and EXEs in the
+    // process.
+    EnumerateLoadedModules(GetCurrentProcess(), ELM_Callback, 0);
+  }
+
+  // Because we don't remember the handle, we will never free it; hence,
+  // it is loaded permanently.
+  return false;
+}
+
+// Stack probing routines are in the support library (e.g. libgcc), but we don't
+// have dynamic linking on windows. Provide a hook.
+#define EXPLICIT_SYMBOL(SYM)                    \
+  extern "C" { extern void *SYM; }
+#define EXPLICIT_SYMBOL2(SYMFROM, SYMTO) EXPLICIT_SYMBOL(SYMTO)
+
+#include "explicit_symbols.inc"
+
+#undef EXPLICIT_SYMBOL
+#undef EXPLICIT_SYMBOL2
+
+void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
+  // First check symbols added via AddSymbol().
+  if (ExplicitSymbols) {
+    std::map<std::string, void *>::iterator I =
+      ExplicitSymbols->find(symbolName);
+    std::map<std::string, void *>::iterator E = ExplicitSymbols->end();
+    if (I != E)
+      return I->second;
+  }
+
+  // Now search the libraries.
+  for (std::vector<HMODULE>::iterator I = OpenedHandles.begin(),
+       E = OpenedHandles.end(); I != E; ++I) {
+    FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName);
+    if (ptr) {
+      return (void *) ptr;
+    }
+  }
+
+  #define EXPLICIT_SYMBOL(SYM)                    \
+    if (!strcmp(symbolName, #SYM)) return (void*)&SYM;
+  #define EXPLICIT_SYMBOL2(SYMFROM, SYMTO)        \
+    if (!strcmp(symbolName, #SYMFROM)) return (void*)&SYMTO;
+
+  {
+    #include "explicit_symbols.inc"
+  }
+
+  #undef EXPLICIT_SYMBOL
+  #undef EXPLICIT_SYMBOL2
+
+  return 0;
+}
+
+}
diff --git a/final/lib/Support/Windows/Host.inc b/final/lib/Support/Windows/Host.inc
new file mode 100644
index 00000000000..733830e82f0
--- /dev/null
+++ b/final/lib/Support/Windows/Host.inc
@@ -0,0 +1,23 @@
+//===- llvm/Support/Win32/Host.inc -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 Host support.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <cstdio>
+#include <string>
+
+using namespace llvm;
+
+std::string sys::getHostTriple() {
+  // FIXME: Adapt to running version.
+  return LLVM_HOSTTRIPLE;
+}
diff --git a/final/lib/Support/Windows/Memory.inc b/final/lib/Support/Windows/Memory.inc
new file mode 100644
index 00000000000..9f69e7367e6
--- /dev/null
+++ b/final/lib/Support/Windows/Memory.inc
@@ -0,0 +1,73 @@
+//===- Win32/Memory.cpp - Win32 Memory Implementation -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of various Memory
+// management utilities
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Process.h"
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//===          and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+MemoryBlock Memory::AllocateRWX(size_t NumBytes,
+                                const MemoryBlock *NearBlock,
+                                std::string *ErrMsg) {
+  if (NumBytes == 0) return MemoryBlock();
+
+  static const size_t pageSize = Process::GetPageSize();
+  size_t NumPages = (NumBytes+pageSize-1)/pageSize;
+
+  //FIXME: support NearBlock if ever needed on Win64.
+
+  void *pa = VirtualAlloc(NULL, NumPages*pageSize, MEM_COMMIT,
+                  PAGE_EXECUTE_READWRITE);
+  if (pa == NULL) {
+    MakeErrMsg(ErrMsg, "Can't allocate RWX Memory: ");
+    return MemoryBlock();
+  }
+
+  MemoryBlock result;
+  result.Address = pa;
+  result.Size = NumPages*pageSize;
+  return result;
+}
+
+bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) {
+  if (M.Address == 0 || M.Size == 0) return false;
+  if (!VirtualFree(M.Address, 0, MEM_RELEASE))
+    return MakeErrMsg(ErrMsg, "Can't release RWX Memory: ");
+  return false;
+}
+
+bool Memory::setWritable(MemoryBlock &M, std::string *ErrMsg) {
+  return true;
+}
+
+bool Memory::setExecutable(MemoryBlock &M, std::string *ErrMsg) {
+  return false;
+}
+
+bool Memory::setRangeWritable(const void *Addr, size_t Size) {
+  return true;
+}
+
+bool Memory::setRangeExecutable(const void *Addr, size_t Size) {
+  return false;
+}
+
+}
diff --git a/final/lib/Support/Windows/Mutex.inc b/final/lib/Support/Windows/Mutex.inc
new file mode 100644
index 00000000000..583dc6359a1
--- /dev/null
+++ b/final/lib/Support/Windows/Mutex.inc
@@ -0,0 +1,58 @@
+//===- llvm/Support/Win32/Mutex.inc - Win32 Mutex Implementation -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) Mutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include "llvm/Support/Mutex.h"
+
+namespace llvm {
+using namespace sys;
+
+MutexImpl::MutexImpl(bool /*recursive*/)
+{
+  data_ = new CRITICAL_SECTION;
+  InitializeCriticalSection((LPCRITICAL_SECTION)data_);
+}
+
+MutexImpl::~MutexImpl()
+{
+  DeleteCriticalSection((LPCRITICAL_SECTION)data_);
+  delete (LPCRITICAL_SECTION)data_;
+  data_ = 0;
+}
+
+bool
+MutexImpl::acquire()
+{
+  EnterCriticalSection((LPCRITICAL_SECTION)data_);
+  return true;
+}
+
+bool
+MutexImpl::release()
+{
+  LeaveCriticalSection((LPCRITICAL_SECTION)data_);
+  return true;
+}
+
+bool
+MutexImpl::tryacquire()
+{
+  return TryEnterCriticalSection((LPCRITICAL_SECTION)data_);
+}
+
+}
diff --git a/final/lib/Support/Windows/Path.inc b/final/lib/Support/Windows/Path.inc
new file mode 100644
index 00000000000..d6b582318d7
--- /dev/null
+++ b/final/lib/Support/Windows/Path.inc
@@ -0,0 +1,931 @@
+//===- llvm/Support/Win32/Path.cpp - Win32 Path Implementation ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Path class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <malloc.h>
+#include <cstdio>
+
+// We need to undo a macro defined in Windows.h, otherwise we won't compile:
+#undef CopyFile
+#undef GetCurrentDirectory
+
+// Windows happily accepts either forward or backward slashes, though any path
+// returned by a Win32 API will have backward slashes.  As LLVM code basically
+// assumes forward slashes are used, backward slashs are converted where they
+// can be introduced into a path.
+//
+// Another invariant is that a path ends with a slash if and only if the path
+// is a root directory.  Any other use of a trailing slash is stripped.  Unlike
+// in Unix, Windows has a rather complicated notion of a root path and this
+// invariant helps simply the code.
+
+static void FlipBackSlashes(std::string& s) {
+  for (size_t i = 0; i < s.size(); i++)
+    if (s[i] == '\\')
+      s[i] = '/';
+}
+
+namespace llvm {
+namespace sys {
+
+const char PathSeparator = ';';
+
+StringRef Path::GetEXESuffix() {
+  return "exe";
+}
+
+Path::Path(llvm::StringRef p)
+  : path(p) {
+  FlipBackSlashes(path);
+}
+
+Path::Path(const char *StrStart, unsigned StrLen)
+  : path(StrStart, StrLen) {
+  FlipBackSlashes(path);
+}
+
+Path&
+Path::operator=(StringRef that) {
+  path.assign(that.data(), that.size());
+  FlipBackSlashes(path);
+  return *this;
+}
+
+// push_back 0 on create, and pop_back on delete.
+struct ScopedNullTerminator {
+  std::string &str;
+  ScopedNullTerminator(std::string &s) : str(s) { str.push_back(0); }
+  ~ScopedNullTerminator() {
+    // str.pop_back(); But wait, C++03 doesn't have this...
+    assert(!str.empty() && str[str.size() - 1] == 0
+      && "Null char not present!");
+    str.resize(str.size() - 1);
+  }
+};
+
+bool
+Path::isValid() const {
+  if (path.empty())
+    return false;
+
+  // If there is a colon, it must be the second character, preceded by a letter
+  // and followed by something.
+  size_t len = path.size();
+  // This code assumes that path is null terminated, so make sure it is.
+  ScopedNullTerminator snt(path);
+  size_t pos = path.rfind(':',len);
+  size_t rootslash = 0;
+  if (pos != std::string::npos) {
+    if (pos != 1 || !isalpha(path[0]) || len < 3)
+      return false;
+      rootslash = 2;
+  }
+
+  // Look for a UNC path, and if found adjust our notion of the root slash.
+  if (len > 3 && path[0] == '/' && path[1] == '/') {
+    rootslash = path.find('/', 2);
+    if (rootslash == std::string::npos)
+      rootslash = 0;
+  }
+
+  // Check for illegal characters.
+  if (path.find_first_of("\\<>\"|\001\002\003\004\005\006\007\010\011\012"
+                         "\013\014\015\016\017\020\021\022\023\024\025\026"
+                         "\027\030\031\032\033\034\035\036\037")
+      != std::string::npos)
+    return false;
+
+  // Remove trailing slash, unless it's a root slash.
+  if (len > rootslash+1 && path[len-1] == '/')
+    path.erase(--len);
+
+  // Check each component for legality.
+  for (pos = 0; pos < len; ++pos) {
+    // A component may not end in a space.
+    if (path[pos] == ' ') {
+      if (path[pos+1] == '/' || path[pos+1] == '\0')
+        return false;
+    }
+
+    // A component may not end in a period.
+    if (path[pos] == '.') {
+      if (path[pos+1] == '/' || path[pos+1] == '\0') {
+        // Unless it is the pseudo-directory "."...
+        if (pos == 0 || path[pos-1] == '/' || path[pos-1] == ':')
+          return true;
+        // or "..".
+        if (pos > 0 && path[pos-1] == '.') {
+          if (pos == 1 || path[pos-2] == '/' || path[pos-2] == ':')
+            return true;
+        }
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+void Path::makeAbsolute() {
+  TCHAR  FullPath[MAX_PATH + 1] = {0};
+  LPTSTR FilePart = NULL;
+
+  DWORD RetLength = ::GetFullPathNameA(path.c_str(),
+                        sizeof(FullPath)/sizeof(FullPath[0]),
+                        FullPath, &FilePart);
+
+  if (0 == RetLength) {
+    // FIXME: Report the error GetLastError()
+    assert(0 && "Unable to make absolute path!");
+  } else if (RetLength > MAX_PATH) {
+    // FIXME: Report too small buffer (needed RetLength bytes).
+    assert(0 && "Unable to make absolute path!");
+  } else {
+    path = FullPath;
+  }
+}
+
+bool
+Path::isAbsolute(const char *NameStart, unsigned NameLen) {
+  assert(NameStart);
+  // FIXME: This does not handle correctly an absolute path starting from
+  // a drive letter or in UNC format.
+  switch (NameLen) {
+  case 0:
+    return false;
+  case 1:
+  case 2:
+    return NameStart[0] == '/';
+  default:
+    return
+      (NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/')) ||
+      (NameStart[0] == '\\' || (NameStart[1] == ':' && NameStart[2] == '\\'));
+  }
+}
+
+bool
+Path::isAbsolute() const {
+  // FIXME: This does not handle correctly an absolute path starting from
+  // a drive letter or in UNC format.
+  switch (path.length()) {
+    case 0:
+      return false;
+    case 1:
+    case 2:
+      return path[0] == '/';
+    default:
+      return path[0] == '/' || (path[1] == ':' && path[2] == '/');
+  }
+}
+
+static Path *TempDirectory;
+
+Path
+Path::GetTemporaryDirectory(std::string* ErrMsg) {
+  if (TempDirectory)
+    return *TempDirectory;
+
+  char pathname[MAX_PATH];
+  if (!GetTempPath(MAX_PATH, pathname)) {
+    if (ErrMsg)
+      *ErrMsg = "Can't determine temporary directory";
+    return Path();
+  }
+
+  Path result;
+  result.set(pathname);
+
+  // Append a subdirectory passed on our process id so multiple LLVMs don't
+  // step on each other's toes.
+#ifdef __MINGW32__
+  // Mingw's Win32 header files are broken.
+  sprintf(pathname, "LLVM_%u", unsigned(GetCurrentProcessId()));
+#else
+  sprintf(pathname, "LLVM_%u", GetCurrentProcessId());
+#endif
+  result.appendComponent(pathname);
+
+  // If there's a directory left over from a previous LLVM execution that
+  // happened to have the same process id, get rid of it.
+  result.eraseFromDisk(true);
+
+  // And finally (re-)create the empty directory.
+  result.createDirectoryOnDisk(false);
+  TempDirectory = new Path(result);
+  return *TempDirectory;
+}
+
+// FIXME: the following set of functions don't map to Windows very well.
+Path
+Path::GetRootDirectory() {
+  // This is the only notion that that Windows has of a root directory. Nothing
+  // is here except for drives.
+  return Path("file:///");
+}
+
+void
+Path::GetSystemLibraryPaths(std::vector<sys::Path>& Paths) {
+  char buff[MAX_PATH];
+  // Generic form of C:\Windows\System32
+  HRESULT res =  SHGetFolderPathA(NULL,
+                                  CSIDL_FLAG_CREATE | CSIDL_SYSTEM,
+                                  NULL,
+                                  SHGFP_TYPE_CURRENT,
+                                  buff);
+  if (res != S_OK) {
+    assert(0 && "Failed to get system directory");
+    return;
+  }
+  Paths.push_back(sys::Path(buff));
+
+  // Reset buff.
+  buff[0] = 0;
+  // Generic form of C:\Windows
+  res =  SHGetFolderPathA(NULL,
+                          CSIDL_FLAG_CREATE | CSIDL_WINDOWS,
+                          NULL,
+                          SHGFP_TYPE_CURRENT,
+                          buff);
+  if (res != S_OK) {
+    assert(0 && "Failed to get windows directory");
+    return;
+  }
+  Paths.push_back(sys::Path(buff));
+}
+
+void
+Path::GetBitcodeLibraryPaths(std::vector<sys::Path>& Paths) {
+  char * env_var = getenv("LLVM_LIB_SEARCH_PATH");
+  if (env_var != 0) {
+    getPathList(env_var,Paths);
+  }
+#ifdef LLVM_LIBDIR
+  {
+    Path tmpPath;
+    if (tmpPath.set(LLVM_LIBDIR))
+      if (tmpPath.canRead())
+        Paths.push_back(tmpPath);
+  }
+#endif
+  GetSystemLibraryPaths(Paths);
+}
+
+Path
+Path::GetLLVMDefaultConfigDir() {
+  Path ret = GetUserHomeDirectory();
+  if (!ret.appendComponent(".llvm"))
+    assert(0 && "Failed to append .llvm");
+  return ret;
+}
+
+Path
+Path::GetUserHomeDirectory() {
+  char buff[MAX_PATH];
+  HRESULT res = SHGetFolderPathA(NULL,
+                                 CSIDL_FLAG_CREATE | CSIDL_APPDATA,
+                                 NULL,
+                                 SHGFP_TYPE_CURRENT,
+                                 buff);
+  if (res != S_OK)
+    assert(0 && "Failed to get user home directory");
+  return Path(buff);
+}
+
+Path
+Path::GetCurrentDirectory() {
+  char pathname[MAX_PATH];
+  ::GetCurrentDirectoryA(MAX_PATH,pathname);
+  return Path(pathname);
+}
+
+/// GetMainExecutable - Return the path to the main executable, given the
+/// value of argv[0] from program startup.
+Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
+  char pathname[MAX_PATH];
+  DWORD ret = ::GetModuleFileNameA(NULL, pathname, MAX_PATH);
+  return ret != MAX_PATH ? Path(pathname) : Path();
+}
+
+
+// FIXME: the above set of functions don't map to Windows very well.
+
+
+StringRef Path::getDirname() const {
+  return getDirnameCharSep(path, "/");
+}
+
+StringRef
+Path::getBasename() const {
+  // Find the last slash
+  size_t slash = path.rfind('/');
+  if (slash == std::string::npos)
+    slash = 0;
+  else
+    slash++;
+
+  size_t dot = path.rfind('.');
+  if (dot == std::string::npos || dot < slash)
+    return StringRef(path).substr(slash);
+  else
+    return StringRef(path).substr(slash, dot - slash);
+}
+
+StringRef
+Path::getSuffix() const {
+  // Find the last slash
+  size_t slash = path.rfind('/');
+  if (slash == std::string::npos)
+    slash = 0;
+  else
+    slash++;
+
+  size_t dot = path.rfind('.');
+  if (dot == std::string::npos || dot < slash)
+    return StringRef("");
+  else
+    return StringRef(path).substr(dot + 1);
+}
+
+bool
+Path::exists() const {
+  DWORD attr = GetFileAttributes(path.c_str());
+  return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::isDirectory() const {
+  DWORD attr = GetFileAttributes(path.c_str());
+  return (attr != INVALID_FILE_ATTRIBUTES) &&
+         (attr & FILE_ATTRIBUTE_DIRECTORY);
+}
+
+bool
+Path::isSymLink() const {
+  DWORD attributes = GetFileAttributes(path.c_str());
+
+  if (attributes == INVALID_FILE_ATTRIBUTES)
+    // There's no sane way to report this :(.
+    assert(0 && "GetFileAttributes returned INVALID_FILE_ATTRIBUTES");
+
+  // This isn't exactly what defines a NTFS symlink, but it is only true for
+  // paths that act like a symlink.
+  return attributes & FILE_ATTRIBUTE_REPARSE_POINT;
+}
+
+bool
+Path::canRead() const {
+  // FIXME: take security attributes into account.
+  DWORD attr = GetFileAttributes(path.c_str());
+  return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::canWrite() const {
+  // FIXME: take security attributes into account.
+  DWORD attr = GetFileAttributes(path.c_str());
+  return (attr != INVALID_FILE_ATTRIBUTES) && !(attr & FILE_ATTRIBUTE_READONLY);
+}
+
+bool
+Path::canExecute() const {
+  // FIXME: take security attributes into account.
+  DWORD attr = GetFileAttributes(path.c_str());
+  return attr != INVALID_FILE_ATTRIBUTES;
+}
+
+bool
+Path::isRegularFile() const {
+  bool res;
+  if (fs::is_regular_file(path, res))
+    return false;
+  return res;
+}
+
+StringRef
+Path::getLast() const {
+  // Find the last slash
+  size_t pos = path.rfind('/');
+
+  // Handle the corner cases
+  if (pos == std::string::npos)
+    return path;
+
+  // If the last character is a slash, we have a root directory
+  if (pos == path.length()-1)
+    return path;
+
+  // Return everything after the last slash
+  return StringRef(path).substr(pos+1);
+}
+
+const FileStatus *
+PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const {
+  if (!fsIsValid || update) {
+    WIN32_FILE_ATTRIBUTE_DATA fi;
+    if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) {
+      MakeErrMsg(ErrStr, "getStatusInfo():" + std::string(path) +
+                      ": Can't get status: ");
+      return 0;
+    }
+
+    status.fileSize = fi.nFileSizeHigh;
+    status.fileSize <<= sizeof(fi.nFileSizeHigh)*8;
+    status.fileSize += fi.nFileSizeLow;
+
+    status.mode = fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY ? 0555 : 0777;
+    status.user = 9999;    // Not applicable to Windows, so...
+    status.group = 9999;   // Not applicable to Windows, so...
+
+    // FIXME: this is only unique if the file is accessed by the same file path.
+    // How do we do this for C:\dir\file and ..\dir\file ? Unix has inode
+    // numbers, but the concept doesn't exist in Windows.
+    status.uniqueID = 0;
+    for (unsigned i = 0; i < path.length(); ++i)
+      status.uniqueID += path[i];
+
+    ULARGE_INTEGER ui;
+    ui.LowPart = fi.ftLastWriteTime.dwLowDateTime;
+    ui.HighPart = fi.ftLastWriteTime.dwHighDateTime;
+    status.modTime.fromWin32Time(ui.QuadPart);
+
+    status.isDir = fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY;
+    fsIsValid = true;
+  }
+  return &status;
+}
+
+bool Path::makeReadableOnDisk(std::string* ErrMsg) {
+  // All files are readable on Windows (ignoring security attributes).
+  return false;
+}
+
+bool Path::makeWriteableOnDisk(std::string* ErrMsg) {
+  DWORD attr = GetFileAttributes(path.c_str());
+
+  // If it doesn't exist, we're done.
+  if (attr == INVALID_FILE_ATTRIBUTES)
+    return false;
+
+  if (attr & FILE_ATTRIBUTE_READONLY) {
+    if (!SetFileAttributes(path.c_str(), attr & ~FILE_ATTRIBUTE_READONLY)) {
+      MakeErrMsg(ErrMsg, std::string(path) + ": Can't make file writable: ");
+      return true;
+    }
+  }
+  return false;
+}
+
+bool Path::makeExecutableOnDisk(std::string* ErrMsg) {
+  // All files are executable on Windows (ignoring security attributes).
+  return false;
+}
+
+bool
+Path::getDirectoryContents(std::set<Path>& result, std::string* ErrMsg) const {
+  WIN32_FILE_ATTRIBUTE_DATA fi;
+  if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) {
+    MakeErrMsg(ErrMsg, path + ": can't get status of file");
+    return true;
+  }
+
+  if (!(fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
+    if (ErrMsg)
+      *ErrMsg = path + ": not a directory";
+    return true;
+  }
+
+  result.clear();
+  WIN32_FIND_DATA fd;
+  std::string searchpath = path;
+  if (path.size() == 0 || searchpath[path.size()-1] == '/')
+    searchpath += "*";
+  else
+    searchpath += "/*";
+
+  HANDLE h = FindFirstFile(searchpath.c_str(), &fd);
+  if (h == INVALID_HANDLE_VALUE) {
+    if (GetLastError() == ERROR_FILE_NOT_FOUND)
+      return true; // not really an error, now is it?
+    MakeErrMsg(ErrMsg, path + ": Can't read directory: ");
+    return true;
+  }
+
+  do {
+    if (fd.cFileName[0] == '.')
+      continue;
+    Path aPath(path);
+    aPath.appendComponent(&fd.cFileName[0]);
+    result.insert(aPath);
+  } while (FindNextFile(h, &fd));
+
+  DWORD err = GetLastError();
+  FindClose(h);
+  if (err != ERROR_NO_MORE_FILES) {
+    SetLastError(err);
+    MakeErrMsg(ErrMsg, path + ": Can't read directory: ");
+    return true;
+  }
+  return false;
+}
+
+bool
+Path::set(StringRef a_path) {
+  if (a_path.empty())
+    return false;
+  std::string save(path);
+  path = a_path;
+  FlipBackSlashes(path);
+  if (!isValid()) {
+    path = save;
+    return false;
+  }
+  return true;
+}
+
+bool
+Path::appendComponent(StringRef name) {
+  if (name.empty())
+    return false;
+  std::string save(path);
+  if (!path.empty()) {
+    size_t last = path.size() - 1;
+    if (path[last] != '/')
+      path += '/';
+  }
+  path += name;
+  if (!isValid()) {
+    path = save;
+    return false;
+  }
+  return true;
+}
+
+bool
+Path::eraseComponent() {
+  size_t slashpos = path.rfind('/',path.size());
+  if (slashpos == path.size() - 1 || slashpos == std::string::npos)
+    return false;
+  std::string save(path);
+  path.erase(slashpos);
+  if (!isValid()) {
+    path = save;
+    return false;
+  }
+  return true;
+}
+
+bool
+Path::eraseSuffix() {
+  size_t dotpos = path.rfind('.',path.size());
+  size_t slashpos = path.rfind('/',path.size());
+  if (dotpos != std::string::npos) {
+    if (slashpos == std::string::npos || dotpos > slashpos+1) {
+      std::string save(path);
+      path.erase(dotpos, path.size()-dotpos);
+      if (!isValid()) {
+        path = save;
+        return false;
+      }
+      return true;
+    }
+  }
+  return false;
+}
+
+inline bool PathMsg(std::string* ErrMsg, const char* pathname, const char*msg) {
+  if (ErrMsg)
+    *ErrMsg = std::string(pathname) + ": " + std::string(msg);
+  return true;
+}
+
+bool
+Path::createDirectoryOnDisk(bool create_parents, std::string* ErrMsg) {
+  // Get a writeable copy of the path name
+  size_t len = path.length();
+  char *pathname = reinterpret_cast<char *>(_alloca(len+2));
+  path.copy(pathname, len);
+  pathname[len] = 0;
+
+  // Make sure it ends with a slash.
+  if (len == 0 || pathname[len - 1] != '/') {
+    pathname[len] = '/';
+    pathname[++len] = 0;
+  }
+
+  // Determine starting point for initial / search.
+  char *next = pathname;
+  if (pathname[0] == '/' && pathname[1] == '/') {
+    // Skip host name.
+    next = strchr(pathname+2, '/');
+    if (next == NULL)
+      return PathMsg(ErrMsg, pathname, "badly formed remote directory");
+
+    // Skip share name.
+    next = strchr(next+1, '/');
+    if (next == NULL)
+      return PathMsg(ErrMsg, pathname,"badly formed remote directory");
+
+    next++;
+    if (*next == 0)
+      return PathMsg(ErrMsg, pathname, "badly formed remote directory");
+
+  } else {
+    if (pathname[1] == ':')
+      next += 2;    // skip drive letter
+    if (*next == '/')
+      next++;       // skip root directory
+  }
+
+  // If we're supposed to create intermediate directories
+  if (create_parents) {
+    // Loop through the directory components until we're done
+    while (*next) {
+      next = strchr(next, '/');
+      *next = 0;
+      if (!CreateDirectory(pathname, NULL) &&
+          GetLastError() != ERROR_ALREADY_EXISTS)
+          return MakeErrMsg(ErrMsg,
+            std::string(pathname) + ": Can't create directory: ");
+      *next++ = '/';
+    }
+  } else {
+    // Drop trailing slash.
+    pathname[len-1] = 0;
+    if (!CreateDirectory(pathname, NULL) &&
+        GetLastError() != ERROR_ALREADY_EXISTS) {
+      return MakeErrMsg(ErrMsg, std::string(pathname) +
+                        ": Can't create directory: ");
+    }
+  }
+  return false;
+}
+
+bool
+Path::createFileOnDisk(std::string* ErrMsg) {
+  // Create the file
+  HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW,
+                        FILE_ATTRIBUTE_NORMAL, NULL);
+  if (h == INVALID_HANDLE_VALUE)
+    return MakeErrMsg(ErrMsg, path + ": Can't create file: ");
+
+  CloseHandle(h);
+  return false;
+}
+
+bool
+Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
+  WIN32_FILE_ATTRIBUTE_DATA fi;
+  if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi))
+    return true;
+
+  if (fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
+    // If it doesn't exist, we're done.
+    bool Exists;
+    if (fs::exists(path, Exists) || !Exists)
+      return false;
+
+    char *pathname = reinterpret_cast<char *>(_alloca(path.length()+3));
+    int lastchar = path.length() - 1 ;
+    path.copy(pathname, lastchar+1);
+
+    // Make path end with '/*'.
+    if (pathname[lastchar] != '/')
+      pathname[++lastchar] = '/';
+    pathname[lastchar+1] = '*';
+    pathname[lastchar+2] = 0;
+
+    if (remove_contents) {
+      WIN32_FIND_DATA fd;
+      HANDLE h = FindFirstFile(pathname, &fd);
+
+      // It's a bad idea to alter the contents of a directory while enumerating
+      // its contents. So build a list of its contents first, then destroy them.
+
+      if (h != INVALID_HANDLE_VALUE) {
+        std::vector<Path> list;
+
+        do {
+          if (strcmp(fd.cFileName, ".") == 0)
+            continue;
+          if (strcmp(fd.cFileName, "..") == 0)
+            continue;
+
+          Path aPath(path);
+          aPath.appendComponent(&fd.cFileName[0]);
+          list.push_back(aPath);
+        } while (FindNextFile(h, &fd));
+
+        DWORD err = GetLastError();
+        FindClose(h);
+        if (err != ERROR_NO_MORE_FILES) {
+          SetLastError(err);
+          return MakeErrMsg(ErrStr, path + ": Can't read directory: ");
+        }
+
+        for (std::vector<Path>::iterator I = list.begin(); I != list.end();
+             ++I) {
+          Path &aPath = *I;
+          aPath.eraseFromDisk(true);
+        }
+      } else {
+        if (GetLastError() != ERROR_FILE_NOT_FOUND)
+          return MakeErrMsg(ErrStr, path + ": Can't read directory: ");
+      }
+    }
+
+    pathname[lastchar] = 0;
+    if (!RemoveDirectory(pathname))
+      return MakeErrMsg(ErrStr,
+        std::string(pathname) + ": Can't destroy directory: ");
+    return false;
+  } else {
+    // Read-only files cannot be deleted on Windows.  Must remove the read-only
+    // attribute first.
+    if (fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) {
+      if (!SetFileAttributes(path.c_str(),
+                             fi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY))
+        return MakeErrMsg(ErrStr, path + ": Can't destroy file: ");
+    }
+
+    if (!DeleteFile(path.c_str()))
+      return MakeErrMsg(ErrStr, path + ": Can't destroy file: ");
+    return false;
+  }
+}
+
+bool Path::getMagicNumber(std::string& Magic, unsigned len) const {
+  assert(len < 1024 && "Request for magic string too long");
+  char* buf = reinterpret_cast<char*>(alloca(len));
+
+  HANDLE h = CreateFile(path.c_str(),
+                        GENERIC_READ,
+                        FILE_SHARE_READ,
+                        NULL,
+                        OPEN_EXISTING,
+                        FILE_ATTRIBUTE_NORMAL,
+                        NULL);
+  if (h == INVALID_HANDLE_VALUE)
+    return false;
+
+  DWORD nRead = 0;
+  BOOL ret = ReadFile(h, buf, len, &nRead, NULL);
+  CloseHandle(h);
+
+  if (!ret || nRead != len)
+    return false;
+
+  Magic = std::string(buf, len);
+  return true;
+}
+
+bool
+Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) {
+  if (!MoveFileEx(path.c_str(), newName.c_str(), MOVEFILE_REPLACE_EXISTING))
+    return MakeErrMsg(ErrMsg, "Can't move '" + path + "' to '" + newName.path
+        + "': ");
+  return false;
+}
+
+bool
+Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrMsg) const {
+  // FIXME: should work on directories also.
+  if (!si.isFile) {
+    return true;
+  }
+
+  HANDLE h = CreateFile(path.c_str(),
+                        FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES,
+                        FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
+                        NULL,
+                        OPEN_EXISTING,
+                        FILE_ATTRIBUTE_NORMAL,
+                        NULL);
+  if (h == INVALID_HANDLE_VALUE)
+    return true;
+
+  BY_HANDLE_FILE_INFORMATION bhfi;
+  if (!GetFileInformationByHandle(h, &bhfi)) {
+    DWORD err = GetLastError();
+    CloseHandle(h);
+    SetLastError(err);
+    return MakeErrMsg(ErrMsg, path + ": GetFileInformationByHandle: ");
+  }
+
+  ULARGE_INTEGER ui;
+  ui.QuadPart = si.modTime.toWin32Time();
+  FILETIME ft;
+  ft.dwLowDateTime = ui.LowPart;
+  ft.dwHighDateTime = ui.HighPart;
+  BOOL ret = SetFileTime(h, NULL, &ft, &ft);
+  DWORD err = GetLastError();
+  CloseHandle(h);
+  if (!ret) {
+    SetLastError(err);
+    return MakeErrMsg(ErrMsg, path + ": SetFileTime: ");
+  }
+
+  // Best we can do with Unix permission bits is to interpret the owner
+  // writable bit.
+  if (si.mode & 0200) {
+    if (bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) {
+      if (!SetFileAttributes(path.c_str(),
+              bhfi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY))
+        return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: ");
+    }
+  } else {
+    if (!(bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) {
+      if (!SetFileAttributes(path.c_str(),
+              bhfi.dwFileAttributes | FILE_ATTRIBUTE_READONLY))
+        return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: ");
+    }
+  }
+
+  return false;
+}
+
+bool
+CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg) {
+  // Can't use CopyFile macro defined in Windows.h because it would mess up the
+  // above line.  We use the expansion it would have in a non-UNICODE build.
+  if (!::CopyFileA(Src.c_str(), Dest.c_str(), false))
+    return MakeErrMsg(ErrMsg, "Can't copy '" + Src.str() +
+               "' to '" + Dest.str() + "': ");
+  return false;
+}
+
+bool
+Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
+  bool Exists;
+  if (reuse_current && (fs::exists(path, Exists) || !Exists))
+    return false; // File doesn't exist already, just use it!
+
+  // Reserve space for -XXXXXX at the end.
+  char *FNBuffer = (char*) alloca(path.size()+8);
+  unsigned offset = path.size();
+  path.copy(FNBuffer, offset);
+
+  // Find a numeric suffix that isn't used by an existing file.  Assume there
+  // won't be more than 1 million files with the same prefix.  Probably a safe
+  // bet.
+  static int FCounter = -1;
+  if (FCounter < 0) {
+    // Give arbitrary initial seed.
+    // FIXME: We should use sys::fs::unique_file() in future.
+    LARGE_INTEGER cnt64;
+    DWORD x = GetCurrentProcessId();
+    x = (x << 16) | (x >> 16);
+    if (QueryPerformanceCounter(&cnt64))    // RDTSC
+      x ^= cnt64.HighPart ^ cnt64.LowPart;
+    FCounter = x % 1000000;
+  }
+  do {
+    sprintf(FNBuffer+offset, "-%06u", FCounter);
+    if (++FCounter > 999999)
+      FCounter = 0;
+    path = FNBuffer;
+  } while (!fs::exists(path, Exists) && Exists);
+  return false;
+}
+
+bool
+Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
+  // Make this into a unique file name
+  makeUnique(reuse_current, ErrMsg);
+
+  // Now go and create it
+  HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW,
+                        FILE_ATTRIBUTE_NORMAL, NULL);
+  if (h == INVALID_HANDLE_VALUE)
+    return MakeErrMsg(ErrMsg, path + ": can't create file");
+
+  CloseHandle(h);
+  return false;
+}
+
+/// MapInFilePages - Not yet implemented on win32.
+const char *Path::MapInFilePages(int FD, uint64_t FileSize) {
+  return 0;
+}
+
+/// MapInFilePages - Not yet implemented on win32.
+void Path::UnMapFilePages(const char *Base, uint64_t FileSize) {
+  assert(0 && "NOT IMPLEMENTED");
+}
+
+}
+}
diff --git a/final/lib/Support/Windows/PathV2.inc b/final/lib/Support/Windows/PathV2.inc
new file mode 100644
index 00000000000..8f306d19fe1
--- /dev/null
+++ b/final/lib/Support/Windows/PathV2.inc
@@ -0,0 +1,750 @@
+//===- llvm/Support/Windows/PathV2.inc - Windows Path Impl ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Windows specific implementation of the PathV2 API.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Windows code that
+//===          is guaranteed to work on *all* Windows variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <wincrypt.h>
+#include <fcntl.h>
+#include <io.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+// MinGW doesn't define this.
+#ifndef _ERRNO_T_DEFINED
+#define _ERRNO_T_DEFINED
+typedef int errno_t;
+#endif
+
+using namespace llvm;
+
+namespace {
+  typedef BOOLEAN (WINAPI *PtrCreateSymbolicLinkW)(
+    /*__in*/ LPCWSTR lpSymlinkFileName,
+    /*__in*/ LPCWSTR lpTargetFileName,
+    /*__in*/ DWORD dwFlags);
+
+  PtrCreateSymbolicLinkW create_symbolic_link_api = PtrCreateSymbolicLinkW(
+    ::GetProcAddress(::GetModuleHandleA("kernel32.dll"),
+                     "CreateSymbolicLinkW"));
+
+  error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl<wchar_t> &utf16) {
+    int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
+                                    utf8.begin(), utf8.size(),
+                                    utf16.begin(), 0);
+
+    if (len == 0)
+      return windows_error(::GetLastError());
+
+    utf16.reserve(len + 1);
+    utf16.set_size(len);
+
+    len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
+                                    utf8.begin(), utf8.size(),
+                                    utf16.begin(), utf16.size());
+
+    if (len == 0)
+      return windows_error(::GetLastError());
+
+    // Make utf16 null terminated.
+    utf16.push_back(0);
+    utf16.pop_back();
+
+    return success;
+  }
+
+  error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len,
+                               SmallVectorImpl<char> &utf8) {
+    // Get length.
+    int len = ::WideCharToMultiByte(CP_UTF8, 0,
+                                    utf16, utf16_len,
+                                    utf8.begin(), 0,
+                                    NULL, NULL);
+
+    if (len == 0)
+      return windows_error(::GetLastError());
+
+    utf8.reserve(len);
+    utf8.set_size(len);
+
+    // Now do the actual conversion.
+    len = ::WideCharToMultiByte(CP_UTF8, 0,
+                                utf16, utf16_len,
+                                utf8.data(), utf8.size(),
+                                NULL, NULL);
+
+    if (len == 0)
+      return windows_error(::GetLastError());
+
+    // Make utf8 null terminated.
+    utf8.push_back(0);
+    utf8.pop_back();
+
+    return success;
+  }
+
+  error_code TempDir(SmallVectorImpl<wchar_t> &result) {
+  retry_temp_dir:
+    DWORD len = ::GetTempPathW(result.capacity(), result.begin());
+
+    if (len == 0)
+      return windows_error(::GetLastError());
+
+    if (len > result.capacity()) {
+      result.reserve(len);
+      goto retry_temp_dir;
+    }
+
+    result.set_size(len);
+    return success;
+  }
+
+  // Forwarder for ScopedHandle.
+  BOOL WINAPI CryptReleaseContext(HCRYPTPROV Provider) {
+    return ::CryptReleaseContext(Provider, 0);
+  }
+
+  typedef ScopedHandle<HCRYPTPROV, uintptr_t(-1),
+                       BOOL (WINAPI*)(HCRYPTPROV), CryptReleaseContext>
+    ScopedCryptContext;
+  bool is_separator(const wchar_t value) {
+    switch (value) {
+    case L'\\':
+    case L'/':
+      return true;
+    default:
+      return false;
+    }
+  }
+}
+
+namespace llvm {
+namespace sys  {
+namespace fs {
+
+error_code current_path(SmallVectorImpl<char> &result) {
+  SmallVector<wchar_t, 128> cur_path;
+  cur_path.reserve(128);
+retry_cur_dir:
+  DWORD len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data());
+
+  // A zero return value indicates a failure other than insufficient space.
+  if (len == 0)
+    return windows_error(::GetLastError());
+
+  // If there's insufficient space, the len returned is larger than the len
+  // given.
+  if (len > cur_path.capacity()) {
+    cur_path.reserve(len);
+    goto retry_cur_dir;
+  }
+
+  cur_path.set_size(len);
+  // cur_path now holds the current directory in utf-16. Convert to utf-8.
+
+  // Find out how much space we need. Sadly, this function doesn't return the
+  // size needed unless you tell it the result size is 0, which means you
+  // _always_ have to call it twice.
+  len = ::WideCharToMultiByte(CP_UTF8, 0,
+                              cur_path.data(), cur_path.size(),
+                              result.data(), 0,
+                              NULL, NULL);
+
+  if (len == 0)
+    return make_error_code(windows_error(::GetLastError()));
+
+  result.reserve(len);
+  result.set_size(len);
+  // Now do the actual conversion.
+  len = ::WideCharToMultiByte(CP_UTF8, 0,
+                              cur_path.data(), cur_path.size(),
+                              result.data(), result.size(),
+                              NULL, NULL);
+  if (len == 0)
+    return windows_error(::GetLastError());
+
+  return success;
+}
+
+error_code copy_file(const Twine &from, const Twine &to, copy_option copt) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toStringRef(from_storage);
+  StringRef t = to.toStringRef(to_storage);
+
+  // Convert to utf-16.
+  SmallVector<wchar_t, 128> wide_from;
+  SmallVector<wchar_t, 128> wide_to;
+  if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+  if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+  // Copy the file.
+  BOOL res = ::CopyFileW(wide_from.begin(), wide_to.begin(),
+                         copt != copy_option::overwrite_if_exists);
+
+  if (res == 0)
+    return windows_error(::GetLastError());
+
+  return success;
+}
+
+error_code create_directory(const Twine &path, bool &existed) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  if (!::CreateDirectoryW(path_utf16.begin(), NULL)) {
+    error_code ec = windows_error(::GetLastError());
+    if (ec == windows_error::already_exists)
+      existed = true;
+    else
+      return ec;
+  } else
+    existed = false;
+
+  return success;
+}
+
+error_code create_hard_link(const Twine &to, const Twine &from) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toStringRef(from_storage);
+  StringRef t = to.toStringRef(to_storage);
+
+  // Convert to utf-16.
+  SmallVector<wchar_t, 128> wide_from;
+  SmallVector<wchar_t, 128> wide_to;
+  if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+  if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+  if (!::CreateHardLinkW(wide_from.begin(), wide_to.begin(), NULL))
+    return windows_error(::GetLastError());
+
+  return success;
+}
+
+error_code create_symlink(const Twine &to, const Twine &from) {
+  // Only do it if the function is available at runtime.
+  if (!create_symbolic_link_api)
+    return make_error_code(errc::function_not_supported);
+
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toStringRef(from_storage);
+  StringRef t = to.toStringRef(to_storage);
+
+  // Convert to utf-16.
+  SmallVector<wchar_t, 128> wide_from;
+  SmallVector<wchar_t, 128> wide_to;
+  if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+  if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+  if (!create_symbolic_link_api(wide_from.begin(), wide_to.begin(), 0))
+    return windows_error(::GetLastError());
+
+  return success;
+}
+
+error_code remove(const Twine &path, bool &existed) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  file_status st;
+  if (error_code ec = status(path, st))
+    return ec;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  if (st.type() == file_type::directory_file) {
+    if (!::RemoveDirectoryW(c_str(path_utf16))) {
+      error_code ec = windows_error(::GetLastError());
+      if (ec != windows_error::file_not_found)
+        return ec;
+      existed = false;
+    } else
+      existed = true;
+  } else {
+    if (!::DeleteFileW(c_str(path_utf16))) {
+      error_code ec = windows_error(::GetLastError());
+      if (ec != windows_error::file_not_found)
+        return ec;
+      existed = false;
+    } else
+      existed = true;
+  }
+
+  return success;
+}
+
+error_code rename(const Twine &from, const Twine &to) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toStringRef(from_storage);
+  StringRef t = to.toStringRef(to_storage);
+
+  // Convert to utf-16.
+  SmallVector<wchar_t, 128> wide_from;
+  SmallVector<wchar_t, 128> wide_to;
+  if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec;
+  if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec;
+
+  if (!::MoveFileExW(wide_from.begin(), wide_to.begin(),
+                     MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING))
+    return windows_error(::GetLastError());
+
+  return success;
+}
+
+error_code resize_file(const Twine &path, uint64_t size) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  int fd = ::_wopen(path_utf16.begin(), O_BINARY, S_IREAD | S_IWRITE);
+  if (fd == -1)
+    return error_code(errno, generic_category());
+#ifdef HAVE__CHSIZE_S
+  errno_t error = ::_chsize_s(fd, size);
+#else
+  errno_t error = ::_chsize(fd, size);
+#endif
+  ::close(fd);
+  return error_code(error, generic_category());
+}
+
+error_code exists(const Twine &path, bool &result) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  DWORD attributes = ::GetFileAttributesW(path_utf16.begin());
+
+  if (attributes == INVALID_FILE_ATTRIBUTES) {
+    // See if the file didn't actually exist.
+    error_code ec = make_error_code(windows_error(::GetLastError()));
+    if (ec != windows_error::file_not_found &&
+        ec != windows_error::path_not_found)
+      return ec;
+    result = false;
+  } else
+    result = true;
+  return success;
+}
+
+error_code equivalent(const Twine &A, const Twine &B, bool &result) {
+  // Get arguments.
+  SmallString<128> a_storage;
+  SmallString<128> b_storage;
+  StringRef a = A.toStringRef(a_storage);
+  StringRef b = B.toStringRef(b_storage);
+
+  // Convert to utf-16.
+  SmallVector<wchar_t, 128> wide_a;
+  SmallVector<wchar_t, 128> wide_b;
+  if (error_code ec = UTF8ToUTF16(a, wide_a)) return ec;
+  if (error_code ec = UTF8ToUTF16(b, wide_b)) return ec;
+
+  AutoHandle HandleB(
+    ::CreateFileW(wide_b.begin(),
+                  0,
+                  FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+                  0,
+                  OPEN_EXISTING,
+                  FILE_FLAG_BACKUP_SEMANTICS,
+                  0));
+
+  AutoHandle HandleA(
+    ::CreateFileW(wide_a.begin(),
+                  0,
+                  FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+                  0,
+                  OPEN_EXISTING,
+                  FILE_FLAG_BACKUP_SEMANTICS,
+                  0));
+
+  // If both handles are invalid, it's an error.
+  if (HandleA == INVALID_HANDLE_VALUE &&
+      HandleB == INVALID_HANDLE_VALUE)
+    return windows_error(::GetLastError());
+
+  // If only one is invalid, it's false.
+  if (HandleA == INVALID_HANDLE_VALUE &&
+      HandleB == INVALID_HANDLE_VALUE) {
+    result = false;
+    return success;
+  }
+
+  // Get file information.
+  BY_HANDLE_FILE_INFORMATION InfoA, InfoB;
+  if (!::GetFileInformationByHandle(HandleA, &InfoA))
+    return windows_error(::GetLastError());
+  if (!::GetFileInformationByHandle(HandleB, &InfoB))
+    return windows_error(::GetLastError());
+
+  // See if it's all the same.
+  result =
+    InfoA.dwVolumeSerialNumber           == InfoB.dwVolumeSerialNumber &&
+    InfoA.nFileIndexHigh                 == InfoB.nFileIndexHigh &&
+    InfoA.nFileIndexLow                  == InfoB.nFileIndexLow &&
+    InfoA.nFileSizeHigh                  == InfoB.nFileSizeHigh &&
+    InfoA.nFileSizeLow                   == InfoB.nFileSizeLow &&
+    InfoA.ftLastWriteTime.dwLowDateTime  ==
+      InfoB.ftLastWriteTime.dwLowDateTime &&
+    InfoA.ftLastWriteTime.dwHighDateTime ==
+      InfoB.ftLastWriteTime.dwHighDateTime;
+
+  return success;
+}
+
+error_code file_size(const Twine &path, uint64_t &result) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  WIN32_FILE_ATTRIBUTE_DATA FileData;
+  if (!::GetFileAttributesExW(path_utf16.begin(),
+                              ::GetFileExInfoStandard,
+                              &FileData))
+    return windows_error(::GetLastError());
+
+  result =
+    (uint64_t(FileData.nFileSizeHigh) << (sizeof(FileData.nFileSizeLow) * 8))
+    + FileData.nFileSizeLow;
+
+  return success;
+}
+
+error_code status(const Twine &path, file_status &result) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  DWORD attr = ::GetFileAttributesW(path_utf16.begin());
+  if (attr == INVALID_FILE_ATTRIBUTES)
+    goto handle_status_error;
+
+  // Handle reparse points.
+  if (attr & FILE_ATTRIBUTE_REPARSE_POINT) {
+    AutoHandle h(
+      ::CreateFileW(path_utf16.begin(),
+                    0, // Attributes only.
+                    FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
+                    NULL,
+                    OPEN_EXISTING,
+                    FILE_FLAG_BACKUP_SEMANTICS,
+                    0));
+    if (h == INVALID_HANDLE_VALUE)
+      goto handle_status_error;
+  }
+
+  if (attr & FILE_ATTRIBUTE_DIRECTORY)
+    result = file_status(file_type::directory_file);
+  else
+    result = file_status(file_type::regular_file);
+
+  return success;
+
+handle_status_error:
+  error_code ec = windows_error(::GetLastError());
+  if (ec == windows_error::file_not_found ||
+      ec == windows_error::path_not_found)
+    result = file_status(file_type::file_not_found);
+  else if (ec == windows_error::sharing_violation)
+    result = file_status(file_type::type_unknown);
+  else {
+    result = file_status(file_type::status_error);
+    return ec;
+  }
+
+  return success;
+}
+
+error_code unique_file(const Twine &model, int &result_fd,
+                             SmallVectorImpl<char> &result_path) {
+  // Use result_path as temp storage.
+  result_path.set_size(0);
+  StringRef m = model.toStringRef(result_path);
+
+  SmallVector<wchar_t, 128> model_utf16;
+  if (error_code ec = UTF8ToUTF16(m, model_utf16)) return ec;
+
+  // Make model absolute by prepending a temp directory if it's not already.
+  bool absolute = path::is_absolute(m);
+
+  if (!absolute) {
+    SmallVector<wchar_t, 64> temp_dir;
+    if (error_code ec = TempDir(temp_dir)) return ec;
+    // Handle c: by removing it.
+    if (model_utf16.size() > 2 && model_utf16[1] == L':') {
+      model_utf16.erase(model_utf16.begin(), model_utf16.begin() + 2);
+    }
+    model_utf16.insert(model_utf16.begin(), temp_dir.begin(), temp_dir.end());
+  }
+
+  // Replace '%' with random chars. From here on, DO NOT modify model. It may be
+  // needed if the randomly chosen path already exists.
+  SmallVector<wchar_t, 128> random_path_utf16;
+
+  // Get a Crypto Provider for CryptGenRandom.
+  HCRYPTPROV HCPC;
+  if (!::CryptAcquireContextW(&HCPC,
+                              NULL,
+                              NULL,
+                              PROV_RSA_FULL,
+                              CRYPT_VERIFYCONTEXT))
+    return windows_error(::GetLastError());
+  ScopedCryptContext CryptoProvider(HCPC);
+
+retry_random_path:
+  random_path_utf16.set_size(0);
+  for (SmallVectorImpl<wchar_t>::const_iterator i = model_utf16.begin(),
+                                                e = model_utf16.end();
+                                                i != e; ++i) {
+    if (*i == L'%') {
+      BYTE val = 0;
+      if (!::CryptGenRandom(CryptoProvider, 1, &val))
+          return windows_error(::GetLastError());
+      random_path_utf16.push_back("0123456789abcdef"[val & 15]);
+    }
+    else
+      random_path_utf16.push_back(*i);
+  }
+  // Make random_path_utf16 null terminated.
+  random_path_utf16.push_back(0);
+  random_path_utf16.pop_back();
+
+  // Try to create + open the path.
+retry_create_file:
+  HANDLE TempFileHandle = ::CreateFileW(random_path_utf16.begin(),
+                                        GENERIC_READ | GENERIC_WRITE,
+                                        FILE_SHARE_READ,
+                                        NULL,
+                                        // Return ERROR_FILE_EXISTS if the file
+                                        // already exists.
+                                        CREATE_NEW,
+                                        FILE_ATTRIBUTE_TEMPORARY,
+                                        NULL);
+  if (TempFileHandle == INVALID_HANDLE_VALUE) {
+    // If the file existed, try again, otherwise, error.
+    error_code ec = windows_error(::GetLastError());
+    if (ec == windows_error::file_exists)
+      goto retry_random_path;
+    // Check for non-existing parent directories.
+    if (ec == windows_error::path_not_found) {
+      // Create the directories using result_path as temp storage.
+      if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(),
+                                      random_path_utf16.size(), result_path))
+        return ec;
+      StringRef p(result_path.begin(), result_path.size());
+      SmallString<64> dir_to_create;
+      for (path::const_iterator i = path::begin(p),
+                                e = --path::end(p); i != e; ++i) {
+        path::append(dir_to_create, *i);
+        bool Exists;
+        if (error_code ec = exists(Twine(dir_to_create), Exists)) return ec;
+        if (!Exists) {
+          // If c: doesn't exist, bail.
+          if (i->endswith(":"))
+            return ec;
+
+          SmallVector<wchar_t, 64> dir_to_create_utf16;
+          if (error_code ec = UTF8ToUTF16(dir_to_create, dir_to_create_utf16))
+            return ec;
+
+          // Create the directory.
+          if (!::CreateDirectoryW(dir_to_create_utf16.begin(), NULL))
+            return windows_error(::GetLastError());
+        }
+      }
+      goto retry_create_file;
+    }
+    return ec;
+  }
+
+  // Set result_path to the utf-8 representation of the path.
+  if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(),
+                                  random_path_utf16.size(), result_path)) {
+    ::CloseHandle(TempFileHandle);
+    ::DeleteFileW(random_path_utf16.begin());
+    return ec;
+  }
+
+  // Convert the Windows API file handle into a C-runtime handle.
+  int fd = ::_open_osfhandle(intptr_t(TempFileHandle), 0);
+  if (fd == -1) {
+    ::CloseHandle(TempFileHandle);
+    ::DeleteFileW(random_path_utf16.begin());
+    // MSDN doesn't say anything about _open_osfhandle setting errno or
+    // GetLastError(), so just return invalid_handle.
+    return windows_error::invalid_handle;
+  }
+
+  result_fd = fd;
+  return success;
+}
+
+error_code get_magic(const Twine &path, uint32_t len,
+                     SmallVectorImpl<char> &result) {
+  SmallString<128> path_storage;
+  SmallVector<wchar_t, 128> path_utf16;
+  result.set_size(0);
+
+  // Convert path to UTF-16.
+  if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage),
+                                  path_utf16))
+    return ec;
+
+  // Open file.
+  HANDLE file = ::CreateFileW(c_str(path_utf16),
+                              GENERIC_READ,
+                              FILE_SHARE_READ,
+                              NULL,
+                              OPEN_EXISTING,
+                              FILE_ATTRIBUTE_READONLY,
+                              NULL);
+  if (file == INVALID_HANDLE_VALUE)
+    return windows_error(::GetLastError());
+
+  // Allocate buffer.
+  result.reserve(len);
+
+  // Get magic!
+  DWORD bytes_read = 0;
+  BOOL read_success = ::ReadFile(file, result.data(), len, &bytes_read, NULL);
+  error_code ec = windows_error(::GetLastError());
+  ::CloseHandle(file);
+  if (!read_success || (bytes_read != len)) {
+    // Set result size to the number of bytes read if it's valid.
+    if (bytes_read <= len)
+      result.set_size(bytes_read);
+    // ERROR_HANDLE_EOF is mapped to errc::value_too_large.
+    return ec;
+  }
+
+  result.set_size(len);
+  return success;
+}
+
+error_code directory_iterator_construct(directory_iterator &it, StringRef path){
+  SmallVector<wchar_t, 128> path_utf16;
+
+  if (error_code ec = UTF8ToUTF16(path,
+                                  path_utf16))
+    return ec;
+
+  // Convert path to the format that Windows is happy with.
+  if (path_utf16.size() > 0 &&
+      !is_separator(path_utf16[path.size() - 1]) &&
+      path_utf16[path.size() - 1] != L':') {
+    path_utf16.push_back(L'\\');
+    path_utf16.push_back(L'*');
+  } else {
+    path_utf16.push_back(L'*');
+  }
+
+  //  Get the first directory entry.
+  WIN32_FIND_DATAW FirstFind;
+  ScopedFindHandle FindHandle(::FindFirstFileW(c_str(path_utf16), &FirstFind));
+  if (!FindHandle)
+    return windows_error(::GetLastError());
+
+  size_t FilenameLen = ::wcslen(FirstFind.cFileName);
+  while ((FilenameLen == 1 && FirstFind.cFileName[0] == L'.') ||
+         (FilenameLen == 2 && FirstFind.cFileName[0] == L'.' &&
+                              FirstFind.cFileName[1] == L'.'))
+    if (!::FindNextFileW(FindHandle, &FirstFind)) {
+      error_code ec = windows_error(::GetLastError());
+      // Check for end.
+      if (ec == windows_error::no_more_files)
+        return directory_iterator_destruct(it);
+      return ec;
+    } else
+      FilenameLen = ::wcslen(FirstFind.cFileName);
+
+  // Construct the current directory entry.
+  SmallString<128> directory_entry_name_utf8;
+  if (error_code ec = UTF16ToUTF8(FirstFind.cFileName,
+                                  ::wcslen(FirstFind.cFileName),
+                                  directory_entry_name_utf8))
+    return ec;
+
+  it.IterationHandle = intptr_t(FindHandle.take());
+  SmallString<128> directory_entry_path(path);
+  path::append(directory_entry_path, directory_entry_name_utf8.str());
+  it.CurrentEntry = directory_entry(directory_entry_path.str());
+
+  return success;
+}
+
+error_code directory_iterator_destruct(directory_iterator& it) {
+  if (it.IterationHandle != 0)
+    // Closes the handle if it's valid.
+    ScopedFindHandle close(HANDLE(it.IterationHandle));
+  it.IterationHandle = 0;
+  it.CurrentEntry = directory_entry();
+  return success;
+}
+
+error_code directory_iterator_increment(directory_iterator& it) {
+  WIN32_FIND_DATAW FindData;
+  if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) {
+    error_code ec = windows_error(::GetLastError());
+    // Check for end.
+    if (ec == windows_error::no_more_files)
+      return directory_iterator_destruct(it);
+    return ec;
+  }
+
+  size_t FilenameLen = ::wcslen(FindData.cFileName);
+  if ((FilenameLen == 1 && FindData.cFileName[0] == L'.') ||
+      (FilenameLen == 2 && FindData.cFileName[0] == L'.' &&
+                           FindData.cFileName[1] == L'.'))
+    return directory_iterator_increment(it);
+
+  SmallString<128> directory_entry_path_utf8;
+  if (error_code ec = UTF16ToUTF8(FindData.cFileName,
+                                  ::wcslen(FindData.cFileName),
+                                  directory_entry_path_utf8))
+    return ec;
+
+  it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8));
+  return success;
+}
+
+} // end namespace fs
+} // end namespace sys
+} // end namespace llvm
diff --git a/final/lib/Support/Windows/Process.inc b/final/lib/Support/Windows/Process.inc
new file mode 100644
index 00000000000..06a7f0054d5
--- /dev/null
+++ b/final/lib/Support/Windows/Process.inc
@@ -0,0 +1,222 @@
+//===- Win32/Process.cpp - Win32 Process Implementation ------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Process class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <psapi.h>
+#include <malloc.h>
+#include <io.h>
+
+#ifdef __MINGW32__
+ #if (HAVE_LIBPSAPI != 1)
+  #error "libpsapi.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "psapi.lib")
+#endif
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//===          and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+#ifdef __MINGW32__
+// This ban should be lifted when MinGW 1.0+ has defined this value.
+#  define _HEAPOK (-2)
+#endif
+
+namespace llvm {
+using namespace sys;
+
+// This function retrieves the page size using GetSystemInfo and is present
+// solely so it can be called once in Process::GetPageSize to initialize the
+// static variable PageSize.
+inline unsigned GetPageSizeOnce() {
+  // NOTE: A 32-bit application running under WOW64 is supposed to use
+  // GetNativeSystemInfo.  However, this interface is not present prior
+  // to Windows XP so to use it requires dynamic linking.  It is not clear
+  // how this affects the reported page size, if at all.  One could argue
+  // that LLVM ought to run as 64-bits on a 64-bit system, anyway.
+  SYSTEM_INFO info;
+  GetSystemInfo(&info);
+  return static_cast<unsigned>(info.dwPageSize);
+}
+
+unsigned
+Process::GetPageSize() {
+  static const unsigned PageSize = GetPageSizeOnce();
+  return PageSize;
+}
+
+size_t
+Process::GetMallocUsage()
+{
+  _HEAPINFO hinfo;
+  hinfo._pentry = NULL;
+
+  size_t size = 0;
+
+  while (_heapwalk(&hinfo) == _HEAPOK)
+    size += hinfo._size;
+
+  return size;
+}
+
+size_t
+Process::GetTotalMemoryUsage()
+{
+  PROCESS_MEMORY_COUNTERS pmc;
+  GetProcessMemoryInfo(GetCurrentProcess(), &pmc, sizeof(pmc));
+  return pmc.PagefileUsage;
+}
+
+void
+Process::GetTimeUsage(
+  TimeValue& elapsed, TimeValue& user_time, TimeValue& sys_time)
+{
+  elapsed = TimeValue::now();
+
+  uint64_t ProcCreate, ProcExit, KernelTime, UserTime;
+  GetProcessTimes(GetCurrentProcess(), (FILETIME*)&ProcCreate,
+                  (FILETIME*)&ProcExit, (FILETIME*)&KernelTime,
+                  (FILETIME*)&UserTime);
+
+  // FILETIME's are # of 100 nanosecond ticks (1/10th of a microsecond)
+  user_time.seconds( UserTime / 10000000 );
+  user_time.nanoseconds( unsigned(UserTime % 10000000) * 100 );
+  sys_time.seconds( KernelTime / 10000000 );
+  sys_time.nanoseconds( unsigned(KernelTime % 10000000) * 100 );
+}
+
+int Process::GetCurrentUserId()
+{
+  return 65536;
+}
+
+int Process::GetCurrentGroupId()
+{
+  return 65536;
+}
+
+// Some LLVM programs such as bugpoint produce core files as a normal part of
+// their operation. To prevent the disk from filling up, this configuration item
+// does what's necessary to prevent their generation.
+void Process::PreventCoreFiles() {
+  // Windows doesn't do core files, but it does do modal pop-up message
+  // boxes.  As this method is used by bugpoint, preventing these pop-ups
+  // is the moral equivalent of suppressing core files.
+  SetErrorMode(SEM_FAILCRITICALERRORS |
+               SEM_NOGPFAULTERRORBOX |
+               SEM_NOOPENFILEERRORBOX);
+}
+
+bool Process::StandardInIsUserInput() {
+  return FileDescriptorIsDisplayed(0);
+}
+
+bool Process::StandardOutIsDisplayed() {
+  return FileDescriptorIsDisplayed(1);
+}
+
+bool Process::StandardErrIsDisplayed() {
+  return FileDescriptorIsDisplayed(2);
+}
+
+bool Process::FileDescriptorIsDisplayed(int fd) {
+  DWORD Mode;	// Unused
+  return (GetConsoleMode((HANDLE)_get_osfhandle(fd), &Mode) != 0);
+}
+
+unsigned Process::StandardOutColumns() {
+  unsigned Columns = 0;
+  CONSOLE_SCREEN_BUFFER_INFO csbi;
+  if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
+    Columns = csbi.dwSize.X;
+  return Columns;
+}
+
+unsigned Process::StandardErrColumns() {
+  unsigned Columns = 0;
+  CONSOLE_SCREEN_BUFFER_INFO csbi;
+  if (GetConsoleScreenBufferInfo(GetStdHandle(STD_ERROR_HANDLE), &csbi))
+    Columns = csbi.dwSize.X;
+  return Columns;
+}
+
+// It always has colors.
+bool Process::StandardErrHasColors() {
+  return StandardErrIsDisplayed();
+}
+
+bool Process::StandardOutHasColors() {
+  return StandardOutIsDisplayed();
+}
+
+namespace {
+class DefaultColors
+{
+  private:
+    WORD defaultColor;
+  public:
+    DefaultColors()
+     :defaultColor(GetCurrentColor()) {}
+    static unsigned GetCurrentColor() {
+      CONSOLE_SCREEN_BUFFER_INFO csbi;
+      if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
+        return csbi.wAttributes;
+      return 0;
+    }
+    WORD operator()() const { return defaultColor; }
+};
+
+DefaultColors defaultColors;
+}
+
+bool Process::ColorNeedsFlush() {
+  return true;
+}
+
+const char *Process::OutputBold(bool bg) {
+  WORD colors = DefaultColors::GetCurrentColor();
+  if (bg)
+    colors |= BACKGROUND_INTENSITY;
+  else
+    colors |= FOREGROUND_INTENSITY;
+  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
+  return 0;
+}
+
+const char *Process::OutputColor(char code, bool bold, bool bg) {
+  WORD colors;
+  if (bg) {
+    colors = ((code&1) ? BACKGROUND_RED : 0) |
+      ((code&2) ? BACKGROUND_GREEN : 0 ) |
+      ((code&4) ? BACKGROUND_BLUE : 0);
+    if (bold)
+      colors |= BACKGROUND_INTENSITY;
+  } else {
+    colors = ((code&1) ? FOREGROUND_RED : 0) |
+      ((code&2) ? FOREGROUND_GREEN : 0 ) |
+      ((code&4) ? FOREGROUND_BLUE : 0);
+    if (bold)
+      colors |= FOREGROUND_INTENSITY;
+  }
+  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors);
+  return 0;
+}
+
+const char *Process::ResetColor() {
+  SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), defaultColors());
+  return 0;
+}
+
+}
diff --git a/final/lib/Support/Windows/Program.inc b/final/lib/Support/Windows/Program.inc
new file mode 100644
index 00000000000..350363cf710
--- /dev/null
+++ b/final/lib/Support/Windows/Program.inc
@@ -0,0 +1,403 @@
+//===- Win32/Program.cpp - Win32 Program Implementation ------- -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Program class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <cstdio>
+#include <malloc.h>
+#include <io.h>
+#include <fcntl.h>
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//===          and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+namespace {
+  struct Win32ProcessInfo {
+    HANDLE hProcess;
+    DWORD  dwProcessId;
+  };
+}
+
+namespace llvm {
+using namespace sys;
+
+Program::Program() : Data_(0) {}
+
+Program::~Program() {
+  if (Data_) {
+    Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+    CloseHandle(wpi->hProcess);
+    delete wpi;
+    Data_ = 0;
+  }
+}
+
+unsigned Program::GetPid() const {
+  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+  return wpi->dwProcessId;
+}
+
+// This function just uses the PATH environment variable to find the program.
+Path
+Program::FindProgramByName(const std::string& progName) {
+
+  // Check some degenerate cases
+  if (progName.length() == 0) // no program
+    return Path();
+  Path temp;
+  if (!temp.set(progName)) // invalid name
+    return Path();
+  // Return paths with slashes verbatim.
+  if (progName.find('\\') != std::string::npos ||
+      progName.find('/') != std::string::npos)
+    return temp;
+
+  // At this point, the file name is valid and does not contain slashes.
+  // Let Windows search for it.
+  char buffer[MAX_PATH];
+  char *dummy = NULL;
+  DWORD len = SearchPath(NULL, progName.c_str(), ".exe", MAX_PATH,
+                         buffer, &dummy);
+
+  // See if it wasn't found.
+  if (len == 0)
+    return Path();
+
+  // See if we got the entire path.
+  if (len < MAX_PATH)
+    return Path(buffer);
+
+  // Buffer was too small; grow and retry.
+  while (true) {
+    char *b = reinterpret_cast<char *>(_alloca(len+1));
+    DWORD len2 = SearchPath(NULL, progName.c_str(), ".exe", len+1, b, &dummy);
+
+    // It is unlikely the search failed, but it's always possible some file
+    // was added or removed since the last search, so be paranoid...
+    if (len2 == 0)
+      return Path();
+    else if (len2 <= len)
+      return Path(b);
+
+    len = len2;
+  }
+}
+
+static HANDLE RedirectIO(const Path *path, int fd, std::string* ErrMsg) {
+  HANDLE h;
+  if (path == 0) {
+    DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd),
+                    GetCurrentProcess(), &h,
+                    0, TRUE, DUPLICATE_SAME_ACCESS);
+    return h;
+  }
+
+  const char *fname;
+  if (path->isEmpty())
+    fname = "NUL";
+  else
+    fname = path->c_str();
+
+  SECURITY_ATTRIBUTES sa;
+  sa.nLength = sizeof(sa);
+  sa.lpSecurityDescriptor = 0;
+  sa.bInheritHandle = TRUE;
+
+  h = CreateFile(fname, fd ? GENERIC_WRITE : GENERIC_READ, FILE_SHARE_READ,
+                 &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS,
+                 FILE_ATTRIBUTE_NORMAL, NULL);
+  if (h == INVALID_HANDLE_VALUE) {
+    MakeErrMsg(ErrMsg, std::string(fname) + ": Can't open file for " +
+        (fd ? "input: " : "output: "));
+  }
+
+  return h;
+}
+
+/// ArgNeedsQuotes - Check whether argument needs to be quoted when calling
+/// CreateProcess.
+static bool ArgNeedsQuotes(const char *Str) {
+  return Str[0] == '\0' || strpbrk(Str, "\t \"&\'()*<>\\`^|") != 0;
+}
+
+
+/// ArgLenWithQuotes - Check whether argument needs to be quoted when calling
+/// CreateProcess and returns length of quoted arg with escaped quotes
+static unsigned int ArgLenWithQuotes(const char *Str) {
+  unsigned int len = ArgNeedsQuotes(Str) ? 2 : 0;
+
+  while (*Str != '\0') {
+    if (*Str == '\"')
+      ++len;
+
+    ++len;
+    ++Str;
+  }
+
+  return len;
+}
+
+
+bool
+Program::Execute(const Path& path,
+                 const char** args,
+                 const char** envp,
+                 const Path** redirects,
+                 unsigned memoryLimit,
+                 std::string* ErrMsg) {
+  if (Data_) {
+    Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+    CloseHandle(wpi->hProcess);
+    delete wpi;
+    Data_ = 0;
+  }
+
+  if (!path.canExecute()) {
+    if (ErrMsg)
+      *ErrMsg = "program not executable";
+    return false;
+  }
+
+  // Windows wants a command line, not an array of args, to pass to the new
+  // process.  We have to concatenate them all, while quoting the args that
+  // have embedded spaces (or are empty).
+
+  // First, determine the length of the command line.
+  unsigned len = 0;
+  for (unsigned i = 0; args[i]; i++) {
+    len += ArgLenWithQuotes(args[i]) + 1;
+  }
+
+  // Now build the command line.
+  char *command = reinterpret_cast<char *>(_alloca(len+1));
+  char *p = command;
+
+  for (unsigned i = 0; args[i]; i++) {
+    const char *arg = args[i];
+
+    bool needsQuoting = ArgNeedsQuotes(arg);
+    if (needsQuoting)
+      *p++ = '"';
+
+    while (*arg != '\0') {
+      if (*arg == '\"')
+        *p++ = '\\';
+
+      *p++ = *arg++;
+    }
+
+    if (needsQuoting)
+      *p++ = '"';
+    *p++ = ' ';
+  }
+
+  *p = 0;
+
+  // The pointer to the environment block for the new process.
+  char *envblock = 0;
+
+  if (envp) {
+    // An environment block consists of a null-terminated block of
+    // null-terminated strings. Convert the array of environment variables to
+    // an environment block by concatenating them.
+
+    // First, determine the length of the environment block.
+    len = 0;
+    for (unsigned i = 0; envp[i]; i++)
+      len += strlen(envp[i]) + 1;
+
+    // Now build the environment block.
+    envblock = reinterpret_cast<char *>(_alloca(len+1));
+    p = envblock;
+
+    for (unsigned i = 0; envp[i]; i++) {
+      const char *ev = envp[i];
+      size_t len = strlen(ev) + 1;
+      memcpy(p, ev, len);
+      p += len;
+    }
+
+    *p = 0;
+  }
+
+  // Create a child process.
+  STARTUPINFO si;
+  memset(&si, 0, sizeof(si));
+  si.cb = sizeof(si);
+  si.hStdInput = INVALID_HANDLE_VALUE;
+  si.hStdOutput = INVALID_HANDLE_VALUE;
+  si.hStdError = INVALID_HANDLE_VALUE;
+
+  if (redirects) {
+    si.dwFlags = STARTF_USESTDHANDLES;
+
+    si.hStdInput = RedirectIO(redirects[0], 0, ErrMsg);
+    if (si.hStdInput == INVALID_HANDLE_VALUE) {
+      MakeErrMsg(ErrMsg, "can't redirect stdin");
+      return false;
+    }
+    si.hStdOutput = RedirectIO(redirects[1], 1, ErrMsg);
+    if (si.hStdOutput == INVALID_HANDLE_VALUE) {
+      CloseHandle(si.hStdInput);
+      MakeErrMsg(ErrMsg, "can't redirect stdout");
+      return false;
+    }
+    if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) {
+      // If stdout and stderr should go to the same place, redirect stderr
+      // to the handle already open for stdout.
+      DuplicateHandle(GetCurrentProcess(), si.hStdOutput,
+                      GetCurrentProcess(), &si.hStdError,
+                      0, TRUE, DUPLICATE_SAME_ACCESS);
+    } else {
+      // Just redirect stderr
+      si.hStdError = RedirectIO(redirects[2], 2, ErrMsg);
+      if (si.hStdError == INVALID_HANDLE_VALUE) {
+        CloseHandle(si.hStdInput);
+        CloseHandle(si.hStdOutput);
+        MakeErrMsg(ErrMsg, "can't redirect stderr");
+        return false;
+      }
+    }
+  }
+
+  PROCESS_INFORMATION pi;
+  memset(&pi, 0, sizeof(pi));
+
+  fflush(stdout);
+  fflush(stderr);
+  BOOL rc = CreateProcess(path.c_str(), command, NULL, NULL, TRUE, 0,
+                          envblock, NULL, &si, &pi);
+  DWORD err = GetLastError();
+
+  // Regardless of whether the process got created or not, we are done with
+  // the handles we created for it to inherit.
+  CloseHandle(si.hStdInput);
+  CloseHandle(si.hStdOutput);
+  CloseHandle(si.hStdError);
+
+  // Now return an error if the process didn't get created.
+  if (!rc) {
+    SetLastError(err);
+    MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") +
+               path.str() + "'");
+    return false;
+  }
+  Win32ProcessInfo* wpi = new Win32ProcessInfo;
+  wpi->hProcess = pi.hProcess;
+  wpi->dwProcessId = pi.dwProcessId;
+  Data_ = wpi;
+
+  // Make sure these get closed no matter what.
+  AutoHandle hThread(pi.hThread);
+
+  // Assign the process to a job if a memory limit is defined.
+  AutoHandle hJob(0);
+  if (memoryLimit != 0) {
+    hJob = CreateJobObject(0, 0);
+    bool success = false;
+    if (hJob != 0) {
+      JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli;
+      memset(&jeli, 0, sizeof(jeli));
+      jeli.BasicLimitInformation.LimitFlags = JOB_OBJECT_LIMIT_PROCESS_MEMORY;
+      jeli.ProcessMemoryLimit = uintptr_t(memoryLimit) * 1048576;
+      if (SetInformationJobObject(hJob, JobObjectExtendedLimitInformation,
+                                  &jeli, sizeof(jeli))) {
+        if (AssignProcessToJobObject(hJob, pi.hProcess))
+          success = true;
+      }
+    }
+    if (!success) {
+      SetLastError(GetLastError());
+      MakeErrMsg(ErrMsg, std::string("Unable to set memory limit"));
+      TerminateProcess(pi.hProcess, 1);
+      WaitForSingleObject(pi.hProcess, INFINITE);
+      return false;
+    }
+  }
+
+  return true;
+}
+
+int
+Program::Wait(const Path &path,
+              unsigned secondsToWait,
+              std::string* ErrMsg) {
+  if (Data_ == 0) {
+    MakeErrMsg(ErrMsg, "Process not started!");
+    return -1;
+  }
+
+  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+  HANDLE hProcess = wpi->hProcess;
+
+  // Wait for the process to terminate.
+  DWORD millisecondsToWait = INFINITE;
+  if (secondsToWait > 0)
+    millisecondsToWait = secondsToWait * 1000;
+
+  if (WaitForSingleObject(hProcess, millisecondsToWait) == WAIT_TIMEOUT) {
+    if (!TerminateProcess(hProcess, 1)) {
+      MakeErrMsg(ErrMsg, "Failed to terminate timed-out program.");
+      return -1;
+    }
+    WaitForSingleObject(hProcess, INFINITE);
+  }
+
+  // Get its exit status.
+  DWORD status;
+  BOOL rc = GetExitCodeProcess(hProcess, &status);
+  DWORD err = GetLastError();
+
+  if (!rc) {
+    SetLastError(err);
+    MakeErrMsg(ErrMsg, "Failed getting status for program.");
+    return -1;
+  }
+
+  return status;
+}
+
+bool
+Program::Kill(std::string* ErrMsg) {
+  if (Data_ == 0) {
+    MakeErrMsg(ErrMsg, "Process not started!");
+    return true;
+  }
+
+  Win32ProcessInfo* wpi = reinterpret_cast<Win32ProcessInfo*>(Data_);
+  HANDLE hProcess = wpi->hProcess;
+  if (TerminateProcess(hProcess, 1) == 0) {
+    MakeErrMsg(ErrMsg, "The process couldn't be killed!");
+    return true;
+  }
+
+  return false;
+}
+
+bool Program::ChangeStdinToBinary(){
+  int result = _setmode( _fileno(stdin), _O_BINARY );
+  return result == -1;
+}
+
+bool Program::ChangeStdoutToBinary(){
+  int result = _setmode( _fileno(stdout), _O_BINARY );
+  return result == -1;
+}
+
+bool Program::ChangeStderrToBinary(){
+  int result = _setmode( _fileno(stderr), _O_BINARY );
+  return result == -1;
+}
+
+}
diff --git a/final/lib/Support/Windows/RWMutex.inc b/final/lib/Support/Windows/RWMutex.inc
new file mode 100644
index 00000000000..471f8fa294b
--- /dev/null
+++ b/final/lib/Support/Windows/RWMutex.inc
@@ -0,0 +1,58 @@
+//= llvm/Support/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock  =//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+
+// FIXME: Windows does not have reader-writer locks pre-Vista.  If you want
+// real reader-writer locks, you a threads implementation for Windows.
+
+namespace llvm {
+using namespace sys;
+
+RWMutexImpl::RWMutexImpl() {
+  data_ = calloc(1, sizeof(CRITICAL_SECTION));
+  InitializeCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+}
+
+RWMutexImpl::~RWMutexImpl() {
+  DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  free(data_);
+}
+
+bool RWMutexImpl::reader_acquire() {
+  EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+bool RWMutexImpl::reader_release() {
+  LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+bool RWMutexImpl::writer_acquire() {
+  EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+bool RWMutexImpl::writer_release() {
+  LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+  return true;
+}
+
+
+}
diff --git a/final/lib/Support/Windows/Signals.inc b/final/lib/Support/Windows/Signals.inc
new file mode 100644
index 00000000000..14f3f21f02a
--- /dev/null
+++ b/final/lib/Support/Windows/Signals.inc
@@ -0,0 +1,328 @@
+//===- Win32/Signals.cpp - Win32 Signals Implementation ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 specific implementation of the Signals class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <stdio.h>
+#include <vector>
+#include <algorithm>
+
+#ifdef __MINGW32__
+ #include <imagehlp.h>
+#else
+ #include <dbghelp.h>
+#endif
+#include <psapi.h>
+
+#ifdef __MINGW32__
+ #if ((HAVE_LIBIMAGEHLP != 1) || (HAVE_LIBPSAPI != 1))
+  #error "libimagehlp.a & libpsapi.a should be present"
+ #endif
+#else
+ #pragma comment(lib, "psapi.lib")
+ #pragma comment(lib, "dbghelp.lib")
+#endif
+
+// Forward declare.
+static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep);
+static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType);
+
+// InterruptFunction - The function to call if ctrl-c is pressed.
+static void (*InterruptFunction)() = 0;
+
+static std::vector<llvm::sys::Path> *FilesToRemove = NULL;
+static std::vector<std::pair<void(*)(void*), void*> > *CallBacksToRun = 0;
+static bool RegisteredUnhandledExceptionFilter = false;
+static bool CleanupExecuted = false;
+static bool ExitOnUnhandledExceptions = false;
+static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
+
+// Windows creates a new thread to execute the console handler when an event
+// (such as CTRL/C) occurs.  This causes concurrency issues with the above
+// globals which this critical section addresses.
+static CRITICAL_SECTION CriticalSection;
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code
+//===          and must not be UNIX code
+//===----------------------------------------------------------------------===//
+
+#ifdef _MSC_VER
+/// CRTReportHook - Function called on a CRT debugging event.
+static int CRTReportHook(int ReportType, char *Message, int *Return) {
+  // Don't cause a DebugBreak() on return.
+  if (Return)
+    *Return = 0;
+
+  switch (ReportType) {
+  default:
+  case _CRT_ASSERT:
+    fprintf(stderr, "CRT assert: %s\n", Message);
+    // FIXME: Is there a way to just crash? Perhaps throw to the unhandled
+    // exception code? Perhaps SetErrorMode() handles this.
+    _exit(3);
+    break;
+  case _CRT_ERROR:
+    fprintf(stderr, "CRT error: %s\n", Message);
+    // FIXME: Is there a way to just crash? Perhaps throw to the unhandled
+    // exception code? Perhaps SetErrorMode() handles this.
+    _exit(3);
+    break;
+  case _CRT_WARN:
+    fprintf(stderr, "CRT warn: %s\n", Message);
+    break;
+  }
+
+  // Don't call _CrtDbgReport.
+  return TRUE;
+}
+#endif
+
+static void RegisterHandler() {
+  if (RegisteredUnhandledExceptionFilter) {
+    EnterCriticalSection(&CriticalSection);
+    return;
+  }
+
+  // Now's the time to create the critical section.  This is the first time
+  // through here, and there's only one thread.
+  InitializeCriticalSection(&CriticalSection);
+
+  // Enter it immediately.  Now if someone hits CTRL/C, the console handler
+  // can't proceed until the globals are updated.
+  EnterCriticalSection(&CriticalSection);
+
+  RegisteredUnhandledExceptionFilter = true;
+  OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter);
+  SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE);
+
+  // Environment variable to disable any kind of crash dialog.
+  if (getenv("LLVM_DISABLE_CRT_DEBUG")) {
+#ifdef _MSC_VER
+    _CrtSetReportHook(CRTReportHook);
+#endif
+    SetErrorMode(SEM_FAILCRITICALERRORS |
+                 SEM_NOGPFAULTERRORBOX |
+                 SEM_NOOPENFILEERRORBOX);
+    ExitOnUnhandledExceptions = true;
+  }
+
+  // IMPORTANT NOTE: Caller must call LeaveCriticalSection(&CriticalSection) or
+  // else multi-threading problems will ensue.
+}
+
+// RemoveFileOnSignal - The public API
+bool sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) {
+  RegisterHandler();
+
+  if (CleanupExecuted) {
+    if (ErrMsg)
+      *ErrMsg = "Process terminating -- cannot register for removal";
+    return true;
+  }
+
+  if (FilesToRemove == NULL)
+    FilesToRemove = new std::vector<sys::Path>;
+
+  FilesToRemove->push_back(Filename);
+
+  LeaveCriticalSection(&CriticalSection);
+  return false;
+}
+
+// DontRemoveFileOnSignal - The public API
+void sys::DontRemoveFileOnSignal(const sys::Path &Filename) {
+  if (FilesToRemove == NULL)
+    return;
+
+  RegisterHandler();
+
+  FilesToRemove->push_back(Filename);
+  std::vector<sys::Path>::reverse_iterator I =
+  std::find(FilesToRemove->rbegin(), FilesToRemove->rend(), Filename);
+  if (I != FilesToRemove->rend())
+    FilesToRemove->erase(I.base()-1);
+
+  LeaveCriticalSection(&CriticalSection);
+}
+
+/// PrintStackTraceOnErrorSignal - When an error signal (such as SIBABRT or
+/// SIGSEGV) is delivered to the process, print a stack trace and then exit.
+void sys::PrintStackTraceOnErrorSignal() {
+  RegisterHandler();
+  LeaveCriticalSection(&CriticalSection);
+}
+
+
+void sys::SetInterruptFunction(void (*IF)()) {
+  RegisterHandler();
+  InterruptFunction = IF;
+  LeaveCriticalSection(&CriticalSection);
+}
+
+
+/// AddSignalHandler - Add a function to be called when a signal is delivered
+/// to the process.  The handler can have a cookie passed to it to identify
+/// what instance of the handler it is.
+void sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
+  if (CallBacksToRun == 0)
+    CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >();
+  CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie));
+  RegisterHandler();
+  LeaveCriticalSection(&CriticalSection);
+}
+}
+
+static void Cleanup() {
+  EnterCriticalSection(&CriticalSection);
+
+  // Prevent other thread from registering new files and directories for
+  // removal, should we be executing because of the console handler callback.
+  CleanupExecuted = true;
+
+  // FIXME: open files cannot be deleted.
+
+  if (FilesToRemove != NULL)
+    while (!FilesToRemove->empty()) {
+      FilesToRemove->back().eraseFromDisk();
+      FilesToRemove->pop_back();
+    }
+
+  if (CallBacksToRun)
+    for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i)
+      (*CallBacksToRun)[i].first((*CallBacksToRun)[i].second);
+
+  LeaveCriticalSection(&CriticalSection);
+}
+
+void llvm::sys::RunInterruptHandlers() {
+  Cleanup();
+}
+
+static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
+  Cleanup();
+
+#ifdef _WIN64
+  // TODO: provide a x64 friendly version of the following
+#else
+
+  // Initialize the STACKFRAME structure.
+  STACKFRAME StackFrame;
+  memset(&StackFrame, 0, sizeof(StackFrame));
+
+  StackFrame.AddrPC.Offset = ep->ContextRecord->Eip;
+  StackFrame.AddrPC.Mode = AddrModeFlat;
+  StackFrame.AddrStack.Offset = ep->ContextRecord->Esp;
+  StackFrame.AddrStack.Mode = AddrModeFlat;
+  StackFrame.AddrFrame.Offset = ep->ContextRecord->Ebp;
+  StackFrame.AddrFrame.Mode = AddrModeFlat;
+
+  HANDLE hProcess = GetCurrentProcess();
+  HANDLE hThread = GetCurrentThread();
+
+  // Initialize the symbol handler.
+  SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_LOAD_LINES);
+  SymInitialize(hProcess, NULL, TRUE);
+
+  while (true) {
+    if (!StackWalk(IMAGE_FILE_MACHINE_I386, hProcess, hThread, &StackFrame,
+                   ep->ContextRecord, NULL, SymFunctionTableAccess,
+                   SymGetModuleBase, NULL)) {
+      break;
+    }
+
+    if (StackFrame.AddrFrame.Offset == 0)
+      break;
+
+    // Print the PC in hexadecimal.
+    DWORD PC = StackFrame.AddrPC.Offset;
+    fprintf(stderr, "%08lX", PC);
+
+    // Print the parameters.  Assume there are four.
+    fprintf(stderr, " (0x%08lX 0x%08lX 0x%08lX 0x%08lX)",
+            StackFrame.Params[0],
+            StackFrame.Params[1], StackFrame.Params[2], StackFrame.Params[3]);
+
+    // Verify the PC belongs to a module in this process.
+    if (!SymGetModuleBase(hProcess, PC)) {
+      fputs(" <unknown module>\n", stderr);
+      continue;
+    }
+
+    // Print the symbol name.
+    char buffer[512];
+    IMAGEHLP_SYMBOL *symbol = reinterpret_cast<IMAGEHLP_SYMBOL *>(buffer);
+    memset(symbol, 0, sizeof(IMAGEHLP_SYMBOL));
+    symbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL);
+    symbol->MaxNameLength = 512 - sizeof(IMAGEHLP_SYMBOL);
+
+    DWORD dwDisp;
+    if (!SymGetSymFromAddr(hProcess, PC, &dwDisp, symbol)) {
+      fputc('\n', stderr);
+      continue;
+    }
+
+    buffer[511] = 0;
+    if (dwDisp > 0)
+      fprintf(stderr, ", %s()+%04lu bytes(s)", symbol->Name, dwDisp);
+    else
+      fprintf(stderr, ", %s", symbol->Name);
+
+    // Print the source file and line number information.
+    IMAGEHLP_LINE line;
+    memset(&line, 0, sizeof(line));
+    line.SizeOfStruct = sizeof(line);
+    if (SymGetLineFromAddr(hProcess, PC, &dwDisp, &line)) {
+      fprintf(stderr, ", %s, line %lu", line.FileName, line.LineNumber);
+      if (dwDisp > 0)
+        fprintf(stderr, "+%04lu byte(s)", dwDisp);
+    }
+
+    fputc('\n', stderr);
+  }
+
+#endif
+
+  if (ExitOnUnhandledExceptions)
+    _exit(-3);
+
+  // Allow dialog box to pop up allowing choice to start debugger.
+  if (OldFilter)
+    return (*OldFilter)(ep);
+  else
+    return EXCEPTION_CONTINUE_SEARCH;
+}
+
+static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType) {
+  // We are running in our very own thread, courtesy of Windows.
+  EnterCriticalSection(&CriticalSection);
+  Cleanup();
+
+  // If an interrupt function has been set, go and run one it; otherwise,
+  // the process dies.
+  void (*IF)() = InterruptFunction;
+  InterruptFunction = 0;      // Don't run it on another CTRL-C.
+
+  if (IF) {
+    // Note: if the interrupt function throws an exception, there is nothing
+    // to catch it in this thread so it will kill the process.
+    IF();                     // Run it now.
+    LeaveCriticalSection(&CriticalSection);
+    return TRUE;              // Don't kill the process.
+  }
+
+  // Allow normal processing to take place; i.e., the process dies.
+  LeaveCriticalSection(&CriticalSection);
+  return FALSE;
+}
diff --git a/final/lib/Support/Windows/ThreadLocal.inc b/final/lib/Support/Windows/ThreadLocal.inc
new file mode 100644
index 00000000000..512462d8900
--- /dev/null
+++ b/final/lib/Support/Windows/ThreadLocal.inc
@@ -0,0 +1,54 @@
+//= llvm/Support/Win32/ThreadLocal.inc - Win32 Thread Local Data -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include "llvm/Support/ThreadLocal.h"
+
+namespace llvm {
+using namespace sys;
+
+ThreadLocalImpl::ThreadLocalImpl() {
+  DWORD* tls = new DWORD;
+  *tls = TlsAlloc();
+  assert(*tls != TLS_OUT_OF_INDEXES);
+  data = tls;
+}
+
+ThreadLocalImpl::~ThreadLocalImpl() {
+  DWORD* tls = static_cast<DWORD*>(data);
+  TlsFree(*tls);
+  delete tls;
+}
+
+const void* ThreadLocalImpl::getInstance() {
+  DWORD* tls = static_cast<DWORD*>(data);
+  return TlsGetValue(*tls);
+}
+
+void ThreadLocalImpl::setInstance(const void* d){
+  DWORD* tls = static_cast<DWORD*>(data);
+  int errorcode = TlsSetValue(*tls, const_cast<void*>(d));
+  assert(errorcode != 0);
+  (void)errorcode;
+}
+
+void ThreadLocalImpl::removeInstance() {
+  setInstance(0);
+}
+
+}
diff --git a/final/lib/Support/Windows/TimeValue.inc b/final/lib/Support/Windows/TimeValue.inc
new file mode 100644
index 00000000000..12275526f1c
--- /dev/null
+++ b/final/lib/Support/Windows/TimeValue.inc
@@ -0,0 +1,51 @@
+//===- Win32/TimeValue.cpp - Win32 TimeValue Implementation -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Win32 implementation of the TimeValue class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Windows.h"
+#include <time.h>
+
+namespace llvm {
+using namespace sys;
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only Win32 specific code.
+//===----------------------------------------------------------------------===//
+
+TimeValue TimeValue::now() {
+  uint64_t ft;
+  GetSystemTimeAsFileTime(reinterpret_cast<FILETIME *>(&ft));
+
+  TimeValue t(0, 0);
+  t.fromWin32Time(ft);
+  return t;
+}
+
+std::string TimeValue::str() const {
+#ifdef __MINGW32__
+  // This ban may be lifted by either:
+  // (i) a future MinGW version other than 1.0 inherents the __time64_t type, or
+  // (ii) configure tests for either the time_t or __time64_t type.
+  time_t ourTime = time_t(this->toEpochTime());
+  struct tm *lt = ::localtime(&ourTime);
+#else
+  __time64_t ourTime = this->toEpochTime();
+  struct tm *lt = ::_localtime64(&ourTime);
+#endif
+
+  char buffer[25];
+  strftime(buffer, 25, "%a %b %d %H:%M:%S %Y", lt);
+  return std::string(buffer);
+}
+
+
+}
diff --git a/final/lib/Support/Windows/Windows.h b/final/lib/Support/Windows/Windows.h
new file mode 100644
index 00000000000..4a1553b599d
--- /dev/null
+++ b/final/lib/Support/Windows/Windows.h
@@ -0,0 +1,120 @@
+//===- Win32/Win32.h - Common Win32 Include File ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines things specific to Win32 implementations.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+// mingw-w64 tends to define it as 0x0502 in its headers.
+#undef _WIN32_WINNT
+
+// Require at least Windows 2000 API.
+#define _WIN32_WINNT 0x0500
+#define _WIN32_IE    0x0500 // MinGW at it again.
+#define WIN32_LEAN_AND_MEAN
+
+#include "llvm/Config/config.h" // Get build system configuration settings
+#include <windows.h>
+#include <shlobj.h>
+#include <cassert>
+#include <string>
+
+inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
+  if (!ErrMsg)
+    return true;
+  char *buffer = NULL;
+  FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM,
+      NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL);
+  *ErrMsg = prefix + buffer;
+  LocalFree(buffer);
+  return true;
+}
+
+class AutoHandle {
+  HANDLE handle;
+
+public:
+  AutoHandle(HANDLE h) : handle(h) {}
+
+  ~AutoHandle() {
+    if (handle)
+      CloseHandle(handle);
+  }
+
+  operator HANDLE() {
+    return handle;
+  }
+
+  AutoHandle &operator=(HANDLE h) {
+    handle = h;
+    return *this;
+  }
+};
+
+template <class HandleType, uintptr_t InvalidHandle,
+          class DeleterType, DeleterType D>
+class ScopedHandle {
+  HandleType Handle;
+
+public:
+  ScopedHandle() : Handle(InvalidHandle) {}
+  ScopedHandle(HandleType handle) : Handle(handle) {}
+
+  ~ScopedHandle() {
+    if (Handle != HandleType(InvalidHandle))
+      D(Handle);
+  }
+
+  HandleType take() {
+    HandleType temp = Handle;
+    Handle = HandleType(InvalidHandle);
+    return temp;
+  }
+
+  operator HandleType() const { return Handle; }
+
+  ScopedHandle &operator=(HandleType handle) {
+    Handle = handle;
+    return *this;
+  }
+
+  typedef void (*unspecified_bool_type)();
+  static void unspecified_bool_true() {}
+
+  // True if Handle is valid.
+  operator unspecified_bool_type() const {
+    return Handle == HandleType(InvalidHandle) ? 0 : unspecified_bool_true;
+  }
+
+  bool operator!() const {
+    return Handle == HandleType(InvalidHandle);
+  }
+};
+
+typedef ScopedHandle<HANDLE, uintptr_t(-1),
+                      BOOL (WINAPI*)(HANDLE), ::FindClose>
+  ScopedFindHandle;
+
+namespace llvm {
+template <class T>
+class SmallVectorImpl;
+
+template <class T>
+typename SmallVectorImpl<T>::const_pointer
+c_str(SmallVectorImpl<T> &str) {
+  str.push_back(0);
+  str.pop_back();
+  return str.data();
+}
+} // end namespace llvm.
diff --git a/final/lib/Support/Windows/explicit_symbols.inc b/final/lib/Support/Windows/explicit_symbols.inc
new file mode 100644
index 00000000000..84862d69e2b
--- /dev/null
+++ b/final/lib/Support/Windows/explicit_symbols.inc
@@ -0,0 +1,66 @@
+/* in libgcc.a */
+
+#ifdef HAVE__ALLOCA
+  EXPLICIT_SYMBOL(_alloca)
+  EXPLICIT_SYMBOL2(alloca, _alloca);
+#endif
+#ifdef HAVE___ALLOCA
+  EXPLICIT_SYMBOL(__alloca)
+#endif
+#ifdef HAVE___CHKSTK
+  EXPLICIT_SYMBOL(__chkstk)
+#endif
+#ifdef HAVE____CHKSTK
+  EXPLICIT_SYMBOL(___chkstk)
+#endif
+#ifdef HAVE___MAIN
+  EXPLICIT_SYMBOL(__main) // FIXME: Don't call it.
+#endif
+
+#ifdef HAVE___ASHLDI3
+  EXPLICIT_SYMBOL(__ashldi3)
+#endif
+#ifdef HAVE___ASHRDI3
+  EXPLICIT_SYMBOL(__ashrdi3)
+#endif
+#ifdef HAVE___CMPDI2 // FIXME: unused
+  EXPLICIT_SYMBOL(__cmpdi2)
+#endif
+#ifdef HAVE___DIVDI3
+  EXPLICIT_SYMBOL(__divdi3)
+#endif
+#ifdef HAVE___FIXDFDI
+  EXPLICIT_SYMBOL(__fixdfdi)
+#endif
+#ifdef HAVE___FIXSFDI
+  EXPLICIT_SYMBOL(__fixsfdi)
+#endif
+#ifdef HAVE___FIXUNSDFDI
+  EXPLICIT_SYMBOL(__fixunsdfdi)
+#endif
+#ifdef HAVE___FIXUNSSFDI
+  EXPLICIT_SYMBOL(__fixunssfdi)
+#endif
+#ifdef HAVE___FLOATDIDF
+  EXPLICIT_SYMBOL(__floatdidf)
+#endif
+#ifdef HAVE___FLOATDISF
+  EXPLICIT_SYMBOL(__floatdisf)
+#endif
+#ifdef HAVE___LSHRDI3
+  EXPLICIT_SYMBOL(__lshrdi3)
+#endif
+#ifdef HAVE___MODDI3
+  EXPLICIT_SYMBOL(__moddi3)
+#endif
+#ifdef HAVE___UDIVDI3
+  EXPLICIT_SYMBOL(__udivdi3)
+#endif
+#ifdef HAVE___UMODDI3
+  EXPLICIT_SYMBOL(__umoddi3)
+#endif
+
+/* msvcrt */
+#if defined(_MSC_VER)
+  EXPLICIT_SYMBOL2(alloca, _alloca_probe);
+#endif
diff --git a/final/lib/Support/Windows/system_error.inc b/final/lib/Support/Windows/system_error.inc
new file mode 100644
index 00000000000..37ec81dd363
--- /dev/null
+++ b/final/lib/Support/Windows/system_error.inc
@@ -0,0 +1,142 @@
+//===- llvm/Support/Win32/system_error.inc - Windows error_code --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the Windows specific implementation of the error_code
+// and error_condition classes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Windows code that
+//===          is guaranteed to work on *all* Windows variants.
+//===----------------------------------------------------------------------===//
+
+#include <windows.h>
+#include <winerror.h>
+
+using namespace llvm;
+
+std::string
+_system_error_category::message(int ev) const {
+  LPVOID lpMsgBuf = 0;
+  DWORD retval = ::FormatMessageA(
+    FORMAT_MESSAGE_ALLOCATE_BUFFER |
+    FORMAT_MESSAGE_FROM_SYSTEM |
+    FORMAT_MESSAGE_IGNORE_INSERTS,
+    NULL,
+    ev,
+    MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language
+    (LPSTR) &lpMsgBuf,
+    0,
+    NULL);
+  if (retval == 0) {
+    ::LocalFree(lpMsgBuf);
+    return std::string("Unknown error");
+  }
+
+  std::string str( static_cast<LPCSTR>(lpMsgBuf) );
+  ::LocalFree(lpMsgBuf);
+
+  while (str.size()
+     && (str[str.size()-1] == '\n' || str[str.size()-1] == '\r'))
+    str.erase( str.size()-1 );
+  if (str.size() && str[str.size()-1] == '.')
+    str.erase( str.size()-1 );
+  return str;
+}
+
+// I'd rather not double the line count of the following.
+#define MAP_ERR_TO_COND(x, y) case x: return make_error_condition(errc::y)
+
+error_condition
+_system_error_category::default_error_condition(int ev) const {
+  switch (ev) {
+  MAP_ERR_TO_COND(0, success);
+  // Windows system -> posix_errno decode table  ---------------------------//
+  // see WinError.h comments for descriptions of errors
+  MAP_ERR_TO_COND(ERROR_ACCESS_DENIED,       permission_denied);
+  MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS,      file_exists);
+  MAP_ERR_TO_COND(ERROR_BAD_UNIT,            no_such_device);
+  MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW,     filename_too_long);
+  MAP_ERR_TO_COND(ERROR_BUSY,                device_or_resource_busy);
+  MAP_ERR_TO_COND(ERROR_BUSY_DRIVE,          device_or_resource_busy);
+  MAP_ERR_TO_COND(ERROR_CANNOT_MAKE,         permission_denied);
+  MAP_ERR_TO_COND(ERROR_CANTOPEN,            io_error);
+  MAP_ERR_TO_COND(ERROR_CANTREAD,            io_error);
+  MAP_ERR_TO_COND(ERROR_CANTWRITE,           io_error);
+  MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY,   permission_denied);
+  MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST,       no_such_device);
+  MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE,       device_or_resource_busy);
+  MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY,       directory_not_empty);
+  MAP_ERR_TO_COND(ERROR_DIRECTORY,           invalid_argument);
+  MAP_ERR_TO_COND(ERROR_DISK_FULL,           no_space_on_device);
+  MAP_ERR_TO_COND(ERROR_FILE_EXISTS,         file_exists);
+  MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND,      no_such_file_or_directory);
+  MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL,    no_space_on_device);
+  MAP_ERR_TO_COND(ERROR_HANDLE_EOF,          value_too_large);
+  MAP_ERR_TO_COND(ERROR_INVALID_ACCESS,      permission_denied);
+  MAP_ERR_TO_COND(ERROR_INVALID_DRIVE,       no_such_device);
+  MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION,    function_not_supported);
+  MAP_ERR_TO_COND(ERROR_INVALID_HANDLE,      invalid_argument);
+  MAP_ERR_TO_COND(ERROR_INVALID_NAME,        invalid_argument);
+  MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION,      no_lock_available);
+  MAP_ERR_TO_COND(ERROR_LOCKED,              no_lock_available);
+  MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK,       invalid_argument);
+  MAP_ERR_TO_COND(ERROR_NOACCESS,            permission_denied);
+  MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY,   not_enough_memory);
+  MAP_ERR_TO_COND(ERROR_NOT_READY,           resource_unavailable_try_again);
+  MAP_ERR_TO_COND(ERROR_NOT_SAME_DEVICE,     cross_device_link);
+  MAP_ERR_TO_COND(ERROR_OPEN_FAILED,         io_error);
+  MAP_ERR_TO_COND(ERROR_OPEN_FILES,          device_or_resource_busy);
+  MAP_ERR_TO_COND(ERROR_OPERATION_ABORTED,   operation_canceled);
+  MAP_ERR_TO_COND(ERROR_OUTOFMEMORY,         not_enough_memory);
+  MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND,      no_such_file_or_directory);
+  MAP_ERR_TO_COND(ERROR_BAD_NETPATH,         no_such_file_or_directory);
+  MAP_ERR_TO_COND(ERROR_READ_FAULT,          io_error);
+  MAP_ERR_TO_COND(ERROR_RETRY,               resource_unavailable_try_again);
+  MAP_ERR_TO_COND(ERROR_SEEK,                io_error);
+  MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION,   permission_denied);
+  MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open);
+  MAP_ERR_TO_COND(ERROR_WRITE_FAULT,         io_error);
+  MAP_ERR_TO_COND(ERROR_WRITE_PROTECT,       permission_denied);
+  MAP_ERR_TO_COND(ERROR_SEM_TIMEOUT,         timed_out);
+  MAP_ERR_TO_COND(WSAEACCES,                 permission_denied);
+  MAP_ERR_TO_COND(WSAEADDRINUSE,             address_in_use);
+  MAP_ERR_TO_COND(WSAEADDRNOTAVAIL,          address_not_available);
+  MAP_ERR_TO_COND(WSAEAFNOSUPPORT,           address_family_not_supported);
+  MAP_ERR_TO_COND(WSAEALREADY,               connection_already_in_progress);
+  MAP_ERR_TO_COND(WSAEBADF,                  bad_file_descriptor);
+  MAP_ERR_TO_COND(WSAECONNABORTED,           connection_aborted);
+  MAP_ERR_TO_COND(WSAECONNREFUSED,           connection_refused);
+  MAP_ERR_TO_COND(WSAECONNRESET,             connection_reset);
+  MAP_ERR_TO_COND(WSAEDESTADDRREQ,           destination_address_required);
+  MAP_ERR_TO_COND(WSAEFAULT,                 bad_address);
+  MAP_ERR_TO_COND(WSAEHOSTUNREACH,           host_unreachable);
+  MAP_ERR_TO_COND(WSAEINPROGRESS,            operation_in_progress);
+  MAP_ERR_TO_COND(WSAEINTR,                  interrupted);
+  MAP_ERR_TO_COND(WSAEINVAL,                 invalid_argument);
+  MAP_ERR_TO_COND(WSAEISCONN,                already_connected);
+  MAP_ERR_TO_COND(WSAEMFILE,                 too_many_files_open);
+  MAP_ERR_TO_COND(WSAEMSGSIZE,               message_size);
+  MAP_ERR_TO_COND(WSAENAMETOOLONG,           filename_too_long);
+  MAP_ERR_TO_COND(WSAENETDOWN,               network_down);
+  MAP_ERR_TO_COND(WSAENETRESET,              network_reset);
+  MAP_ERR_TO_COND(WSAENETUNREACH,            network_unreachable);
+  MAP_ERR_TO_COND(WSAENOBUFS,                no_buffer_space);
+  MAP_ERR_TO_COND(WSAENOPROTOOPT,            no_protocol_option);
+  MAP_ERR_TO_COND(WSAENOTCONN,               not_connected);
+  MAP_ERR_TO_COND(WSAENOTSOCK,               not_a_socket);
+  MAP_ERR_TO_COND(WSAEOPNOTSUPP,             operation_not_supported);
+  MAP_ERR_TO_COND(WSAEPROTONOSUPPORT,        protocol_not_supported);
+  MAP_ERR_TO_COND(WSAEPROTOTYPE,             wrong_protocol_type);
+  MAP_ERR_TO_COND(WSAETIMEDOUT,              timed_out);
+  MAP_ERR_TO_COND(WSAEWOULDBLOCK,            operation_would_block);
+  default: return error_condition(ev, system_category());
+  }
+}
diff --git a/final/lib/Support/circular_raw_ostream.cpp b/final/lib/Support/circular_raw_ostream.cpp
new file mode 100644
index 00000000000..ca0d30db388
--- /dev/null
+++ b/final/lib/Support/circular_raw_ostream.cpp
@@ -0,0 +1,45 @@
+//===- circular_raw_ostream.cpp - Implement circular_raw_ostream ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements support for circular buffered streams.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/circular_raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+void circular_raw_ostream::write_impl(const char *Ptr, size_t Size) {
+  if (BufferSize == 0) {
+    TheStream->write(Ptr, Size);
+    return;
+  }
+
+  // Write into the buffer, wrapping if necessary.
+  while (Size != 0) {
+    unsigned Bytes =
+      std::min(unsigned(Size), unsigned(BufferSize - (Cur - BufferArray)));
+    memcpy(Cur, Ptr, Bytes);
+    Size -= Bytes;
+    Cur += Bytes;
+    if (Cur == BufferArray + BufferSize) {
+      // Reset the output pointer to the start of the buffer.
+      Cur = BufferArray;
+      Filled = true;
+    }
+  }    
+}
+
+void circular_raw_ostream::flushBufferWithBanner() {
+  if (BufferSize != 0) {
+    // Write out the buffer
+    TheStream->write(Banner, std::strlen(Banner));
+    flushBuffer();
+  }
+}
diff --git a/final/lib/Support/raw_os_ostream.cpp b/final/lib/Support/raw_os_ostream.cpp
new file mode 100644
index 00000000000..44f2325d7f8
--- /dev/null
+++ b/final/lib/Support/raw_os_ostream.cpp
@@ -0,0 +1,30 @@
+//===--- raw_os_ostream.cpp - Implement the raw_os_ostream class ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements support adapting raw_ostream to std::ostream.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_os_ostream.h"
+#include <ostream>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  raw_os_ostream
+//===----------------------------------------------------------------------===//
+
+raw_os_ostream::~raw_os_ostream() {
+  flush();
+}
+
+void raw_os_ostream::write_impl(const char *Ptr, size_t Size) {
+  OS.write(Ptr, Size);
+}
+
+uint64_t raw_os_ostream::current_pos() const { return OS.tellp(); }
diff --git a/final/lib/Support/raw_ostream.cpp b/final/lib/Support/raw_ostream.cpp
new file mode 100644
index 00000000000..5a71fa3d8ce
--- /dev/null
+++ b/final/lib/Support/raw_ostream.cpp
@@ -0,0 +1,763 @@
+//===--- raw_ostream.cpp - Implement the raw_ostream classes --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements support for bulk buffered stream output.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Process.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cctype>
+#include <cerrno>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#if defined(HAVE_UNISTD_H)
+# include <unistd.h>
+#endif
+#if defined(HAVE_FCNTL_H)
+# include <fcntl.h>
+#endif
+#if defined(HAVE_SYS_UIO_H) && defined(HAVE_WRITEV)
+#  include <sys/uio.h>
+#endif
+
+#if defined(__CYGWIN__)
+#include <io.h>
+#endif
+
+#if defined(_MSC_VER)
+#include <io.h>
+#include <fcntl.h>
+#ifndef STDIN_FILENO
+# define STDIN_FILENO 0
+#endif
+#ifndef STDOUT_FILENO
+# define STDOUT_FILENO 1
+#endif
+#ifndef STDERR_FILENO
+# define STDERR_FILENO 2
+#endif
+#endif
+
+using namespace llvm;
+
+raw_ostream::~raw_ostream() {
+  // raw_ostream's subclasses should take care to flush the buffer
+  // in their destructors.
+  assert(OutBufCur == OutBufStart &&
+         "raw_ostream destructor called with non-empty buffer!");
+
+  if (BufferMode == InternalBuffer)
+    delete [] OutBufStart;
+}
+
+// An out of line virtual method to provide a home for the class vtable.
+void raw_ostream::handle() {}
+
+size_t raw_ostream::preferred_buffer_size() const {
+  // BUFSIZ is intended to be a reasonable default.
+  return BUFSIZ;
+}
+
+void raw_ostream::SetBuffered() {
+  // Ask the subclass to determine an appropriate buffer size.
+  if (size_t Size = preferred_buffer_size())
+    SetBufferSize(Size);
+  else
+    // It may return 0, meaning this stream should be unbuffered.
+    SetUnbuffered();
+}
+
+void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size,
+                                    BufferKind Mode) {
+  assert(((Mode == Unbuffered && BufferStart == 0 && Size == 0) ||
+          (Mode != Unbuffered && BufferStart && Size)) &&
+         "stream must be unbuffered or have at least one byte");
+  // Make sure the current buffer is free of content (we can't flush here; the
+  // child buffer management logic will be in write_impl).
+  assert(GetNumBytesInBuffer() == 0 && "Current buffer is non-empty!");
+
+  if (BufferMode == InternalBuffer)
+    delete [] OutBufStart;
+  OutBufStart = BufferStart;
+  OutBufEnd = OutBufStart+Size;
+  OutBufCur = OutBufStart;
+  BufferMode = Mode;
+
+  assert(OutBufStart <= OutBufEnd && "Invalid size!");
+}
+
+raw_ostream &raw_ostream::operator<<(unsigned long N) {
+  // Zero is a special case.
+  if (N == 0)
+    return *this << '0';
+
+  char NumberBuffer[20];
+  char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+  char *CurPtr = EndPtr;
+
+  while (N) {
+    *--CurPtr = '0' + char(N % 10);
+    N /= 10;
+  }
+  return write(CurPtr, EndPtr-CurPtr);
+}
+
+raw_ostream &raw_ostream::operator<<(long N) {
+  if (N <  0) {
+    *this << '-';
+    N = -N;
+  }
+
+  return this->operator<<(static_cast<unsigned long>(N));
+}
+
+raw_ostream &raw_ostream::operator<<(unsigned long long N) {
+  // Output using 32-bit div/mod when possible.
+  if (N == static_cast<unsigned long>(N))
+    return this->operator<<(static_cast<unsigned long>(N));
+
+  char NumberBuffer[20];
+  char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+  char *CurPtr = EndPtr;
+
+  while (N) {
+    *--CurPtr = '0' + char(N % 10);
+    N /= 10;
+  }
+  return write(CurPtr, EndPtr-CurPtr);
+}
+
+raw_ostream &raw_ostream::operator<<(long long N) {
+  if (N < 0) {
+    *this << '-';
+    // Avoid undefined behavior on INT64_MIN with a cast.
+    N = -(unsigned long long)N;
+  }
+
+  return this->operator<<(static_cast<unsigned long long>(N));
+}
+
+raw_ostream &raw_ostream::write_hex(unsigned long long N) {
+  // Zero is a special case.
+  if (N == 0)
+    return *this << '0';
+
+  char NumberBuffer[20];
+  char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
+  char *CurPtr = EndPtr;
+
+  while (N) {
+    uintptr_t x = N % 16;
+    *--CurPtr = (x < 10 ? '0' + x : 'a' + x - 10);
+    N /= 16;
+  }
+
+  return write(CurPtr, EndPtr-CurPtr);
+}
+
+raw_ostream &raw_ostream::write_escaped(StringRef Str,
+                                        bool UseHexEscapes) {
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    unsigned char c = Str[i];
+
+    switch (c) {
+    case '\\':
+      *this << '\\' << '\\';
+      break;
+    case '\t':
+      *this << '\\' << 't';
+      break;
+    case '\n':
+      *this << '\\' << 'n';
+      break;
+    case '"':
+      *this << '\\' << '"';
+      break;
+    default:
+      if (std::isprint(c)) {
+        *this << c;
+        break;
+      }
+
+      // Write out the escaped representation.
+      if (UseHexEscapes) {
+        *this << '\\' << 'x';
+        *this << hexdigit((c >> 4 & 0xF));
+        *this << hexdigit((c >> 0) & 0xF);
+      } else {
+        // Always use a full 3-character octal escape.
+        *this << '\\';
+        *this << char('0' + ((c >> 6) & 7));
+        *this << char('0' + ((c >> 3) & 7));
+        *this << char('0' + ((c >> 0) & 7));
+      }
+    }
+  }
+
+  return *this;
+}
+
+raw_ostream &raw_ostream::operator<<(const void *P) {
+  *this << '0' << 'x';
+
+  return write_hex((uintptr_t) P);
+}
+
+raw_ostream &raw_ostream::operator<<(double N) {
+#ifdef _WIN32
+  // On MSVCRT and compatible, output of %e is incompatible to Posix
+  // by default. Number of exponent digits should be at least 2. "%+03d"
+  // FIXME: Implement our formatter to here or Support/Format.h!
+  int fpcl = _fpclass(N);
+
+  // negative zero
+  if (fpcl == _FPCLASS_NZ)
+    return *this << "-0.000000e+00";
+
+  char buf[16];
+  unsigned len;
+  len = snprintf(buf, sizeof(buf), "%e", N);
+  if (len <= sizeof(buf) - 2) {
+    if (len >= 5 && buf[len - 5] == 'e' && buf[len - 3] == '0') {
+      int cs = buf[len - 4];
+      if (cs == '+' || cs == '-') {
+        int c1 = buf[len - 2];
+        int c0 = buf[len - 1];
+        if (isdigit(c1) && isdigit(c0)) {
+          // Trim leading '0': "...e+012" -> "...e+12\0"
+          buf[len - 3] = c1;
+          buf[len - 2] = c0;
+          buf[--len] = 0;
+        }
+      }
+    }
+    return this->operator<<(buf);
+  }
+#endif
+  return this->operator<<(format("%e", N));
+}
+
+
+
+void raw_ostream::flush_nonempty() {
+  assert(OutBufCur > OutBufStart && "Invalid call to flush_nonempty.");
+  size_t Length = OutBufCur - OutBufStart;
+  OutBufCur = OutBufStart;
+  write_impl(OutBufStart, Length);
+}
+
+raw_ostream &raw_ostream::write(unsigned char C) {
+  // Group exceptional cases into a single branch.
+  if (BUILTIN_EXPECT(OutBufCur >= OutBufEnd, false)) {
+    if (BUILTIN_EXPECT(!OutBufStart, false)) {
+      if (BufferMode == Unbuffered) {
+        write_impl(reinterpret_cast<char*>(&C), 1);
+        return *this;
+      }
+      // Set up a buffer and start over.
+      SetBuffered();
+      return write(C);
+    }
+
+    flush_nonempty();
+  }
+
+  *OutBufCur++ = C;
+  return *this;
+}
+
+raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) {
+  // Group exceptional cases into a single branch.
+  if (BUILTIN_EXPECT(OutBufCur+Size > OutBufEnd, false)) {
+    if (BUILTIN_EXPECT(!OutBufStart, false)) {
+      if (BufferMode == Unbuffered) {
+        write_impl(Ptr, Size);
+        return *this;
+      }
+      // Set up a buffer and start over.
+      SetBuffered();
+      return write(Ptr, Size);
+    }
+
+    size_t NumBytes = OutBufEnd - OutBufCur;
+
+    // If the buffer is empty at this point we have a string that is larger
+    // than the buffer. Directly write the chunk that is a multiple of the
+    // preferred buffer size and put the remainder in the buffer.
+    if (BUILTIN_EXPECT(OutBufCur == OutBufStart, false)) {
+      size_t BytesToWrite = Size - (Size % NumBytes);
+      write_impl(Ptr, BytesToWrite);
+      copy_to_buffer(Ptr + BytesToWrite, Size - BytesToWrite);
+      return *this;
+    }
+
+    // We don't have enough space in the buffer to fit the string in. Insert as
+    // much as possible, flush and start over with the remainder.
+    copy_to_buffer(Ptr, NumBytes);
+    flush_nonempty();
+    return write(Ptr + NumBytes, Size - NumBytes);
+  }
+
+  copy_to_buffer(Ptr, Size);
+
+  return *this;
+}
+
+void raw_ostream::copy_to_buffer(const char *Ptr, size_t Size) {
+  assert(Size <= size_t(OutBufEnd - OutBufCur) && "Buffer overrun!");
+
+  // Handle short strings specially, memcpy isn't very good at very short
+  // strings.
+  switch (Size) {
+  case 4: OutBufCur[3] = Ptr[3]; // FALL THROUGH
+  case 3: OutBufCur[2] = Ptr[2]; // FALL THROUGH
+  case 2: OutBufCur[1] = Ptr[1]; // FALL THROUGH
+  case 1: OutBufCur[0] = Ptr[0]; // FALL THROUGH
+  case 0: break;
+  default:
+    memcpy(OutBufCur, Ptr, Size);
+    break;
+  }
+
+  OutBufCur += Size;
+}
+
+// Formatted output.
+raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) {
+  // If we have more than a few bytes left in our output buffer, try
+  // formatting directly onto its end.
+  size_t NextBufferSize = 127;
+  size_t BufferBytesLeft = OutBufEnd - OutBufCur;
+  if (BufferBytesLeft > 3) {
+    size_t BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft);
+
+    // Common case is that we have plenty of space.
+    if (BytesUsed <= BufferBytesLeft) {
+      OutBufCur += BytesUsed;
+      return *this;
+    }
+
+    // Otherwise, we overflowed and the return value tells us the size to try
+    // again with.
+    NextBufferSize = BytesUsed;
+  }
+
+  // If we got here, we didn't have enough space in the output buffer for the
+  // string.  Try printing into a SmallVector that is resized to have enough
+  // space.  Iterate until we win.
+  SmallVector<char, 128> V;
+
+  while (1) {
+    V.resize(NextBufferSize);
+
+    // Try formatting into the SmallVector.
+    size_t BytesUsed = Fmt.print(V.data(), NextBufferSize);
+
+    // If BytesUsed fit into the vector, we win.
+    if (BytesUsed <= NextBufferSize)
+      return write(V.data(), BytesUsed);
+
+    // Otherwise, try again with a new size.
+    assert(BytesUsed > NextBufferSize && "Didn't grow buffer!?");
+    NextBufferSize = BytesUsed;
+  }
+}
+
+/// indent - Insert 'NumSpaces' spaces.
+raw_ostream &raw_ostream::indent(unsigned NumSpaces) {
+  static const char Spaces[] = "                                "
+                               "                                "
+                               "                ";
+
+  // Usually the indentation is small, handle it with a fastpath.
+  if (NumSpaces < array_lengthof(Spaces))
+    return write(Spaces, NumSpaces);
+
+  while (NumSpaces) {
+    unsigned NumToWrite = std::min(NumSpaces,
+                                   (unsigned)array_lengthof(Spaces)-1);
+    write(Spaces, NumToWrite);
+    NumSpaces -= NumToWrite;
+  }
+  return *this;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Formatted Output
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method.
+void format_object_base::home() {
+}
+
+//===----------------------------------------------------------------------===//
+//  raw_fd_ostream
+//===----------------------------------------------------------------------===//
+
+/// raw_fd_ostream - Open the specified file for writing. If an error
+/// occurs, information about the error is put into ErrorInfo, and the
+/// stream should be immediately destroyed; the string will be empty
+/// if no error occurred.
+raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
+                               unsigned Flags)
+  : Error(false), UseAtomicWrites(false), pos(0)
+{
+  assert(Filename != 0 && "Filename is null");
+  // Verify that we don't have both "append" and "excl".
+  assert((!(Flags & F_Excl) || !(Flags & F_Append)) &&
+         "Cannot specify both 'excl' and 'append' file creation flags!");
+
+  ErrorInfo.clear();
+
+  // Handle "-" as stdout. Note that when we do this, we consider ourself
+  // the owner of stdout. This means that we can do things like close the
+  // file descriptor when we're done and set the "binary" flag globally.
+  if (Filename[0] == '-' && Filename[1] == 0) {
+    FD = STDOUT_FILENO;
+    // If user requested binary then put stdout into binary mode if
+    // possible.
+    if (Flags & F_Binary)
+      sys::Program::ChangeStdoutToBinary();
+    // Close stdout when we're done, to detect any output errors.
+    ShouldClose = true;
+    return;
+  }
+
+  int OpenFlags = O_WRONLY|O_CREAT;
+#ifdef O_BINARY
+  if (Flags & F_Binary)
+    OpenFlags |= O_BINARY;
+#endif
+
+  if (Flags & F_Append)
+    OpenFlags |= O_APPEND;
+  else
+    OpenFlags |= O_TRUNC;
+  if (Flags & F_Excl)
+    OpenFlags |= O_EXCL;
+
+  while ((FD = open(Filename, OpenFlags, 0664)) < 0) {
+    if (errno != EINTR) {
+      ErrorInfo = "Error opening output file '" + std::string(Filename) + "'";
+      ShouldClose = false;
+      return;
+    }
+  }
+
+  // Ok, we successfully opened the file, so it'll need to be closed.
+  ShouldClose = true;
+}
+
+/// raw_fd_ostream ctor - FD is the file descriptor that this writes to.  If
+/// ShouldClose is true, this closes the file when the stream is destroyed.
+raw_fd_ostream::raw_fd_ostream(int fd, bool shouldClose, bool unbuffered)
+  : raw_ostream(unbuffered), FD(fd),
+    ShouldClose(shouldClose), Error(false), UseAtomicWrites(false) {
+#ifdef O_BINARY
+  // Setting STDOUT and STDERR to binary mode is necessary in Win32
+  // to avoid undesirable linefeed conversion.
+  if (fd == STDOUT_FILENO || fd == STDERR_FILENO)
+    setmode(fd, O_BINARY);
+#endif
+
+  // Get the starting position.
+  off_t loc = ::lseek(FD, 0, SEEK_CUR);
+  if (loc == (off_t)-1)
+    pos = 0;
+  else
+    pos = static_cast<uint64_t>(loc);
+}
+
+raw_fd_ostream::~raw_fd_ostream() {
+  if (FD >= 0) {
+    flush();
+    if (ShouldClose)
+      while (::close(FD) != 0)
+        if (errno != EINTR) {
+          error_detected();
+          break;
+        }
+  }
+
+#ifdef __MINGW32__
+  // On mingw, global dtors should not call exit().
+  // report_fatal_error() invokes exit(). We know report_fatal_error()
+  // might not write messages to stderr when any errors were detected
+  // on FD == 2.
+  if (FD == 2) return;
+#endif
+
+  // If there are any pending errors, report them now. Clients wishing
+  // to avoid report_fatal_error calls should check for errors with
+  // has_error() and clear the error flag with clear_error() before
+  // destructing raw_ostream objects which may have errors.
+  if (has_error())
+    report_fatal_error("IO failure on output stream.");
+}
+
+
+void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
+  assert(FD >= 0 && "File already closed.");
+  pos += Size;
+
+  do {
+    ssize_t ret;
+
+    // Check whether we should attempt to use atomic writes.
+    if (BUILTIN_EXPECT(!UseAtomicWrites, true)) {
+      ret = ::write(FD, Ptr, Size);
+    } else {
+      // Use ::writev() where available.
+#if defined(HAVE_WRITEV)
+      struct iovec IOV = { (void*) Ptr, Size };
+      ret = ::writev(FD, &IOV, 1);
+#else
+      ret = ::write(FD, Ptr, Size);
+#endif
+    }
+
+    if (ret < 0) {
+      // If it's a recoverable error, swallow it and retry the write.
+      //
+      // Ideally we wouldn't ever see EAGAIN or EWOULDBLOCK here, since
+      // raw_ostream isn't designed to do non-blocking I/O. However, some
+      // programs, such as old versions of bjam, have mistakenly used
+      // O_NONBLOCK. For compatibility, emulate blocking semantics by
+      // spinning until the write succeeds. If you don't want spinning,
+      // don't use O_NONBLOCK file descriptors with raw_ostream.
+      if (errno == EINTR || errno == EAGAIN
+#ifdef EWOULDBLOCK
+          || errno == EWOULDBLOCK
+#endif
+          )
+        continue;
+
+      // Otherwise it's a non-recoverable error. Note it and quit.
+      error_detected();
+      break;
+    }
+
+    // The write may have written some or all of the data. Update the
+    // size and buffer pointer to reflect the remainder that needs
+    // to be written. If there are no bytes left, we're done.
+    Ptr += ret;
+    Size -= ret;
+  } while (Size > 0);
+}
+
+void raw_fd_ostream::close() {
+  assert(ShouldClose);
+  ShouldClose = false;
+  flush();
+  while (::close(FD) != 0)
+    if (errno != EINTR) {
+      error_detected();
+      break;
+    }
+  FD = -1;
+}
+
+uint64_t raw_fd_ostream::seek(uint64_t off) {
+  flush();
+  pos = ::lseek(FD, off, SEEK_SET);
+  if (pos != off)
+    error_detected();
+  return pos;
+}
+
+size_t raw_fd_ostream::preferred_buffer_size() const {
+#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix)
+  // Windows and Minix have no st_blksize.
+  assert(FD >= 0 && "File not yet open!");
+  struct stat statbuf;
+  if (fstat(FD, &statbuf) != 0)
+    return 0;
+
+  // If this is a terminal, don't use buffering. Line buffering
+  // would be a more traditional thing to do, but it's not worth
+  // the complexity.
+  if (S_ISCHR(statbuf.st_mode) && isatty(FD))
+    return 0;
+  // Return the preferred block size.
+  return statbuf.st_blksize;
+#else
+  return raw_ostream::preferred_buffer_size();
+#endif
+}
+
+raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold,
+                                         bool bg) {
+  if (sys::Process::ColorNeedsFlush())
+    flush();
+  const char *colorcode =
+    (colors == SAVEDCOLOR) ? sys::Process::OutputBold(bg)
+    : sys::Process::OutputColor(colors, bold, bg);
+  if (colorcode) {
+    size_t len = strlen(colorcode);
+    write(colorcode, len);
+    // don't account colors towards output characters
+    pos -= len;
+  }
+  return *this;
+}
+
+raw_ostream &raw_fd_ostream::resetColor() {
+  if (sys::Process::ColorNeedsFlush())
+    flush();
+  const char *colorcode = sys::Process::ResetColor();
+  if (colorcode) {
+    size_t len = strlen(colorcode);
+    write(colorcode, len);
+    // don't account colors towards output characters
+    pos -= len;
+  }
+  return *this;
+}
+
+bool raw_fd_ostream::is_displayed() const {
+  return sys::Process::FileDescriptorIsDisplayed(FD);
+}
+
+//===----------------------------------------------------------------------===//
+//  outs(), errs(), nulls()
+//===----------------------------------------------------------------------===//
+
+/// outs() - This returns a reference to a raw_ostream for standard output.
+/// Use it like: outs() << "foo" << "bar";
+raw_ostream &llvm::outs() {
+  // Set buffer settings to model stdout behavior.
+  // Delete the file descriptor when the program exists, forcing error
+  // detection. If you don't want this behavior, don't use outs().
+  static raw_fd_ostream S(STDOUT_FILENO, true);
+  return S;
+}
+
+/// errs() - This returns a reference to a raw_ostream for standard error.
+/// Use it like: errs() << "foo" << "bar";
+raw_ostream &llvm::errs() {
+  // Set standard error to be unbuffered by default.
+  static raw_fd_ostream S(STDERR_FILENO, false, true);
+  return S;
+}
+
+/// nulls() - This returns a reference to a raw_ostream which discards output.
+raw_ostream &llvm::nulls() {
+  static raw_null_ostream S;
+  return S;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  raw_string_ostream
+//===----------------------------------------------------------------------===//
+
+raw_string_ostream::~raw_string_ostream() {
+  flush();
+}
+
+void raw_string_ostream::write_impl(const char *Ptr, size_t Size) {
+  OS.append(Ptr, Size);
+}
+
+//===----------------------------------------------------------------------===//
+//  raw_svector_ostream
+//===----------------------------------------------------------------------===//
+
+// The raw_svector_ostream implementation uses the SmallVector itself as the
+// buffer for the raw_ostream. We guarantee that the raw_ostream buffer is
+// always pointing past the end of the vector, but within the vector
+// capacity. This allows raw_ostream to write directly into the correct place,
+// and we only need to set the vector size when the data is flushed.
+
+raw_svector_ostream::raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) {
+  // Set up the initial external buffer. We make sure that the buffer has at
+  // least 128 bytes free; raw_ostream itself only requires 64, but we want to
+  // make sure that we don't grow the buffer unnecessarily on destruction (when
+  // the data is flushed). See the FIXME below.
+  OS.reserve(OS.size() + 128);
+  SetBuffer(OS.end(), OS.capacity() - OS.size());
+}
+
+raw_svector_ostream::~raw_svector_ostream() {
+  // FIXME: Prevent resizing during this flush().
+  flush();
+}
+
+/// resync - This is called when the SmallVector we're appending to is changed
+/// outside of the raw_svector_ostream's control.  It is only safe to do this
+/// if the raw_svector_ostream has previously been flushed.
+void raw_svector_ostream::resync() {
+  assert(GetNumBytesInBuffer() == 0 && "Didn't flush before mutating vector");
+
+  if (OS.capacity() - OS.size() < 64)
+    OS.reserve(OS.capacity() * 2);
+  SetBuffer(OS.end(), OS.capacity() - OS.size());
+}
+
+void raw_svector_ostream::write_impl(const char *Ptr, size_t Size) {
+  // If we're writing bytes from the end of the buffer into the smallvector, we
+  // don't need to copy the bytes, just commit the bytes because they are
+  // already in the right place.
+  if (Ptr == OS.end()) {
+    assert(OS.size() + Size <= OS.capacity() && "Invalid write_impl() call!");
+    OS.set_size(OS.size() + Size);
+  } else {
+    assert(GetNumBytesInBuffer() == 0 &&
+           "Should be writing from buffer if some bytes in it");
+    // Otherwise, do copy the bytes.
+    OS.append(Ptr, Ptr+Size);
+  }
+
+  // Grow the vector if necessary.
+  if (OS.capacity() - OS.size() < 64)
+    OS.reserve(OS.capacity() * 2);
+
+  // Update the buffer position.
+  SetBuffer(OS.end(), OS.capacity() - OS.size());
+}
+
+uint64_t raw_svector_ostream::current_pos() const {
+   return OS.size();
+}
+
+StringRef raw_svector_ostream::str() {
+  flush();
+  return StringRef(OS.begin(), OS.size());
+}
+
+//===----------------------------------------------------------------------===//
+//  raw_null_ostream
+//===----------------------------------------------------------------------===//
+
+raw_null_ostream::~raw_null_ostream() {
+#ifndef NDEBUG
+  // ~raw_ostream asserts that the buffer is empty. This isn't necessary
+  // with raw_null_ostream, but it's better to have raw_null_ostream follow
+  // the rules than to change the rules just for raw_null_ostream.
+  flush();
+#endif
+}
+
+void raw_null_ostream::write_impl(const char *Ptr, size_t Size) {
+}
+
+uint64_t raw_null_ostream::current_pos() const {
+  return 0;
+}
diff --git a/final/lib/Support/regcclass.h b/final/lib/Support/regcclass.h
new file mode 100644
index 00000000000..2cea3e4e540
--- /dev/null
+++ b/final/lib/Support/regcclass.h
@@ -0,0 +1,70 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cclass.h	8.3 (Berkeley) 3/20/94
+ */
+
+/* character-class table */
+static struct cclass {
+	const char *name;
+	const char *chars;
+	const char *multis;
+} cclasses[] = {
+	{ "alnum",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789",				""} ,
+	{ "alpha",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
+					""} ,
+	{ "blank",	" \t",		""} ,
+	{ "cntrl",	"\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
+\25\26\27\30\31\32\33\34\35\36\37\177",	""} ,
+	{ "digit",	"0123456789",	""} ,
+	{ "graph",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+					""} ,
+	{ "lower",	"abcdefghijklmnopqrstuvwxyz",
+					""} ,
+	{ "print",	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
+0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
+					""} ,
+	{ "punct",	"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
+					""} ,
+	{ "space",	"\t\n\v\f\r ",	""} ,
+	{ "upper",	"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
+					""} ,
+	{ "xdigit",	"0123456789ABCDEFabcdef",
+					""} ,
+	{ NULL,		0,		"" }
+};
diff --git a/final/lib/Support/regcname.h b/final/lib/Support/regcname.h
new file mode 100644
index 00000000000..3c0bb248ffa
--- /dev/null
+++ b/final/lib/Support/regcname.h
@@ -0,0 +1,139 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cname.h	8.3 (Berkeley) 3/20/94
+ */
+
+/* character-name table */
+static struct cname {
+	const char *name;
+	char code;
+} cnames[] = {
+	{ "NUL",			'\0' },
+	{ "SOH",			'\001' },
+	{ "STX",			'\002' },
+	{ "ETX",			'\003' },
+	{ "EOT",			'\004' },
+	{ "ENQ",			'\005' },
+	{ "ACK",			'\006' },
+	{ "BEL",			'\007' },
+	{ "alert",			'\007' },
+	{ "BS",				'\010' },
+	{ "backspace",			'\b' },
+	{ "HT",				'\011' },
+	{ "tab",			'\t' },
+	{ "LF",				'\012' },
+	{ "newline",			'\n' },
+	{ "VT",				'\013' },
+	{ "vertical-tab",		'\v' },
+	{ "FF",				'\014' },
+	{ "form-feed",			'\f' },
+	{ "CR",				'\015' },
+	{ "carriage-return",		'\r' },
+	{ "SO",				'\016' },
+	{ "SI",				'\017' },
+	{ "DLE",			'\020' },
+	{ "DC1",			'\021' },
+	{ "DC2",			'\022' },
+	{ "DC3",			'\023' },
+	{ "DC4",			'\024' },
+	{ "NAK",			'\025' },
+	{ "SYN",			'\026' },
+	{ "ETB",			'\027' },
+	{ "CAN",			'\030' },
+	{ "EM",				'\031' },
+	{ "SUB",			'\032' },
+	{ "ESC",			'\033' },
+	{ "IS4",			'\034' },
+	{ "FS",				'\034' },
+	{ "IS3",			'\035' },
+	{ "GS",				'\035' },
+	{ "IS2",			'\036' },
+	{ "RS",				'\036' },
+	{ "IS1",			'\037' },
+	{ "US",				'\037' },
+	{ "space",			' ' },
+	{ "exclamation-mark",		'!' },
+	{ "quotation-mark",		'"' },
+	{ "number-sign",		'#' },
+	{ "dollar-sign",		'$' },
+	{ "percent-sign",		'%' },
+	{ "ampersand",			'&' },
+	{ "apostrophe",			'\'' },
+	{ "left-parenthesis",		'(' },
+	{ "right-parenthesis",		')' },
+	{ "asterisk",			'*' },
+	{ "plus-sign",			'+' },
+	{ "comma",			',' },
+	{ "hyphen",			'-' },
+	{ "hyphen-minus",		'-' },
+	{ "period",			'.' },
+	{ "full-stop",			'.' },
+	{ "slash",			'/' },
+	{ "solidus",			'/' },
+	{ "zero",			'0' },
+	{ "one",			'1' },
+	{ "two",			'2' },
+	{ "three",			'3' },
+	{ "four",			'4' },
+	{ "five",			'5' },
+	{ "six",			'6' },
+	{ "seven",			'7' },
+	{ "eight",			'8' },
+	{ "nine",			'9' },
+	{ "colon",			':' },
+	{ "semicolon",			';' },
+	{ "less-than-sign",		'<' },
+	{ "equals-sign",		'=' },
+	{ "greater-than-sign",		'>' },
+	{ "question-mark",		'?' },
+	{ "commercial-at",		'@' },
+	{ "left-square-bracket",	'[' },
+	{ "backslash",			'\\' },
+	{ "reverse-solidus",		'\\' },
+	{ "right-square-bracket",	']' },
+	{ "circumflex",			'^' },
+	{ "circumflex-accent",		'^' },
+	{ "underscore",			'_' },
+	{ "low-line",			'_' },
+	{ "grave-accent",		'`' },
+	{ "left-brace",			'{' },
+	{ "left-curly-bracket",		'{' },
+	{ "vertical-line",		'|' },
+	{ "right-brace",		'}' },
+	{ "right-curly-bracket",	'}' },
+	{ "tilde",			'~' },
+	{ "DEL",			'\177' },
+	{ NULL,				0 }
+};
diff --git a/final/lib/Support/regcomp.c b/final/lib/Support/regcomp.c
new file mode 100644
index 00000000000..46c91a9c497
--- /dev/null
+++ b/final/lib/Support/regcomp.c
@@ -0,0 +1,1525 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regcomp.c	8.5 (Berkeley) 3/20/94
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+#include "regex2.h"
+
+#include "regcclass.h"
+#include "regcname.h"
+
+/*
+ * parse structure, passed up and down to avoid global variables and
+ * other clumsinesses
+ */
+struct parse {
+	char *next;		/* next character in RE */
+	char *end;		/* end of string (-> NUL normally) */
+	int error;		/* has an error been seen? */
+	sop *strip;		/* malloced strip */
+	sopno ssize;		/* malloced strip size (allocated) */
+	sopno slen;		/* malloced strip length (used) */
+	int ncsalloc;		/* number of csets allocated */
+	struct re_guts *g;
+#	define	NPAREN	10	/* we need to remember () 1-9 for back refs */
+	sopno pbegin[NPAREN];	/* -> ( ([0] unused) */
+	sopno pend[NPAREN];	/* -> ) ([0] unused) */
+};
+
+static void p_ere(struct parse *, int);
+static void p_ere_exp(struct parse *);
+static void p_str(struct parse *);
+static void p_bre(struct parse *, int, int);
+static int p_simp_re(struct parse *, int);
+static int p_count(struct parse *);
+static void p_bracket(struct parse *);
+static void p_b_term(struct parse *, cset *);
+static void p_b_cclass(struct parse *, cset *);
+static void p_b_eclass(struct parse *, cset *);
+static char p_b_symbol(struct parse *);
+static char p_b_coll_elem(struct parse *, int);
+static char othercase(int);
+static void bothcases(struct parse *, int);
+static void ordinary(struct parse *, int);
+static void nonnewline(struct parse *);
+static void repeat(struct parse *, sopno, int, int);
+static int seterr(struct parse *, int);
+static cset *allocset(struct parse *);
+static void freeset(struct parse *, cset *);
+static int freezeset(struct parse *, cset *);
+static int firstch(struct parse *, cset *);
+static int nch(struct parse *, cset *);
+static void mcadd(struct parse *, cset *, const char *);
+static void mcinvert(struct parse *, cset *);
+static void mccase(struct parse *, cset *);
+static int isinsets(struct re_guts *, int);
+static int samesets(struct re_guts *, int, int);
+static void categorize(struct parse *, struct re_guts *);
+static sopno dupl(struct parse *, sopno, sopno);
+static void doemit(struct parse *, sop, size_t);
+static void doinsert(struct parse *, sop, size_t, sopno);
+static void dofwd(struct parse *, sopno, sop);
+static void enlarge(struct parse *, sopno);
+static void stripsnug(struct parse *, struct re_guts *);
+static void findmust(struct parse *, struct re_guts *);
+static sopno pluscount(struct parse *, struct re_guts *);
+
+static char nuls[10];		/* place to point scanner in event of error */
+
+/*
+ * macros for use with parse structure
+ * BEWARE:  these know that the parse structure is named `p' !!!
+ */
+#define	PEEK()	(*p->next)
+#define	PEEK2()	(*(p->next+1))
+#define	MORE()	(p->next < p->end)
+#define	MORE2()	(p->next+1 < p->end)
+#define	SEE(c)	(MORE() && PEEK() == (c))
+#define	SEETWO(a, b)	(MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
+#define	EAT(c)	((SEE(c)) ? (NEXT(), 1) : 0)
+#define	EATTWO(a, b)	((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
+#define	NEXT()	(p->next++)
+#define	NEXT2()	(p->next += 2)
+#define	NEXTn(n)	(p->next += (n))
+#define	GETNEXT()	(*p->next++)
+#define	SETERROR(e)	seterr(p, (e))
+#define	REQUIRE(co, e)	(void)((co) || SETERROR(e))
+#define	MUSTSEE(c, e)	(REQUIRE(MORE() && PEEK() == (c), e))
+#define	MUSTEAT(c, e)	(REQUIRE(MORE() && GETNEXT() == (c), e))
+#define	MUSTNOTSEE(c, e)	(REQUIRE(!MORE() || PEEK() != (c), e))
+#define	EMIT(op, sopnd)	doemit(p, (sop)(op), (size_t)(sopnd))
+#define	INSERT(op, pos)	doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
+#define	AHEAD(pos)		dofwd(p, pos, HERE()-(pos))
+#define	ASTERN(sop, pos)	EMIT(sop, HERE()-pos)
+#define	HERE()		(p->slen)
+#define	THERE()		(p->slen - 1)
+#define	THERETHERE()	(p->slen - 2)
+#define	DROP(n)	(p->slen -= (n))
+
+#ifdef	_POSIX2_RE_DUP_MAX
+#define	DUPMAX	_POSIX2_RE_DUP_MAX
+#else
+#define	DUPMAX	255
+#endif
+#define	INFINITY	(DUPMAX + 1)
+
+#ifndef NDEBUG
+static int never = 0;		/* for use in asserts; shuts lint up */
+#else
+#define	never	0		/* some <assert.h>s have bugs too */
+#endif
+
+/*
+ - llvm_regcomp - interface for parser and compilation
+ */
+int				/* 0 success, otherwise REG_something */
+llvm_regcomp(llvm_regex_t *preg, const char *pattern, int cflags)
+{
+	struct parse pa;
+	struct re_guts *g;
+	struct parse *p = &pa;
+	int i;
+	size_t len;
+#ifdef REDEBUG
+#	define	GOODFLAGS(f)	(f)
+#else
+#	define	GOODFLAGS(f)	((f)&~REG_DUMP)
+#endif
+
+	cflags = GOODFLAGS(cflags);
+	if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
+		return(REG_INVARG);
+
+	if (cflags&REG_PEND) {
+		if (preg->re_endp < pattern)
+			return(REG_INVARG);
+		len = preg->re_endp - pattern;
+	} else
+		len = strlen((const char *)pattern);
+
+	/* do the mallocs early so failure handling is easy */
+	g = (struct re_guts *)malloc(sizeof(struct re_guts) +
+							(NC-1)*sizeof(cat_t));
+	if (g == NULL)
+		return(REG_ESPACE);
+	p->ssize = len/(size_t)2*(size_t)3 + (size_t)1;	/* ugh */
+	p->strip = (sop *)calloc(p->ssize, sizeof(sop));
+	p->slen = 0;
+	if (p->strip == NULL) {
+		free((char *)g);
+		return(REG_ESPACE);
+	}
+
+	/* set things up */
+	p->g = g;
+	p->next = (char *)pattern;	/* convenience; we do not modify it */
+	p->end = p->next + len;
+	p->error = 0;
+	p->ncsalloc = 0;
+	for (i = 0; i < NPAREN; i++) {
+		p->pbegin[i] = 0;
+		p->pend[i] = 0;
+	}
+	g->csetsize = NC;
+	g->sets = NULL;
+	g->setbits = NULL;
+	g->ncsets = 0;
+	g->cflags = cflags;
+	g->iflags = 0;
+	g->nbol = 0;
+	g->neol = 0;
+	g->must = NULL;
+	g->mlen = 0;
+	g->nsub = 0;
+	g->ncategories = 1;	/* category 0 is "everything else" */
+	g->categories = &g->catspace[-(CHAR_MIN)];
+	(void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
+	g->backrefs = 0;
+
+	/* do it */
+	EMIT(OEND, 0);
+	g->firststate = THERE();
+	if (cflags&REG_EXTENDED)
+		p_ere(p, OUT);
+	else if (cflags&REG_NOSPEC)
+		p_str(p);
+	else
+		p_bre(p, OUT, OUT);
+	EMIT(OEND, 0);
+	g->laststate = THERE();
+
+	/* tidy up loose ends and fill things in */
+	categorize(p, g);
+	stripsnug(p, g);
+	findmust(p, g);
+	g->nplus = pluscount(p, g);
+	g->magic = MAGIC2;
+	preg->re_nsub = g->nsub;
+	preg->re_g = g;
+	preg->re_magic = MAGIC1;
+#ifndef REDEBUG
+	/* not debugging, so can't rely on the assert() in llvm_regexec() */
+	if (g->iflags&REGEX_BAD)
+		SETERROR(REG_ASSERT);
+#endif
+
+	/* win or lose, we're done */
+	if (p->error != 0)	/* lose */
+		llvm_regfree(preg);
+	return(p->error);
+}
+
+/*
+ - p_ere - ERE parser top level, concatenation and alternation
+ */
+static void
+p_ere(struct parse *p, int stop)	/* character this ERE should end at */
+{
+	char c;
+	sopno prevback = 0;
+	sopno prevfwd = 0;
+	sopno conc;
+	int first = 1;		/* is this the first alternative? */
+
+	for (;;) {
+		/* do a bunch of concatenated expressions */
+		conc = HERE();
+		while (MORE() && (c = PEEK()) != '|' && c != stop)
+			p_ere_exp(p);
+		REQUIRE(HERE() != conc, REG_EMPTY);	/* require nonempty */
+
+		if (!EAT('|'))
+			break;		/* NOTE BREAK OUT */
+
+		if (first) {
+			INSERT(OCH_, conc);	/* offset is wrong */
+			prevfwd = conc;
+			prevback = conc;
+			first = 0;
+		}
+		ASTERN(OOR1, prevback);
+		prevback = THERE();
+		AHEAD(prevfwd);			/* fix previous offset */
+		prevfwd = HERE();
+		EMIT(OOR2, 0);			/* offset is very wrong */
+	}
+
+	if (!first) {		/* tail-end fixups */
+		AHEAD(prevfwd);
+		ASTERN(O_CH, prevback);
+	}
+
+	assert(!MORE() || SEE(stop));
+}
+
+/*
+ - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
+ */
+static void
+p_ere_exp(struct parse *p)
+{
+	char c;
+	sopno pos;
+	int count;
+	int count2;
+	sopno subno;
+	int wascaret = 0;
+
+	assert(MORE());		/* caller should have ensured this */
+	c = GETNEXT();
+
+	pos = HERE();
+	switch (c) {
+	case '(':
+		REQUIRE(MORE(), REG_EPAREN);
+		p->g->nsub++;
+		subno = p->g->nsub;
+		if (subno < NPAREN)
+			p->pbegin[subno] = HERE();
+		EMIT(OLPAREN, subno);
+		if (!SEE(')'))
+			p_ere(p, ')');
+		if (subno < NPAREN) {
+			p->pend[subno] = HERE();
+			assert(p->pend[subno] != 0);
+		}
+		EMIT(ORPAREN, subno);
+		MUSTEAT(')', REG_EPAREN);
+		break;
+#ifndef POSIX_MISTAKE
+	case ')':		/* happens only if no current unmatched ( */
+		/*
+		 * You may ask, why the ifndef?  Because I didn't notice
+		 * this until slightly too late for 1003.2, and none of the
+		 * other 1003.2 regular-expression reviewers noticed it at
+		 * all.  So an unmatched ) is legal POSIX, at least until
+		 * we can get it fixed.
+		 */
+		SETERROR(REG_EPAREN);
+		break;
+#endif
+	case '^':
+		EMIT(OBOL, 0);
+		p->g->iflags |= USEBOL;
+		p->g->nbol++;
+		wascaret = 1;
+		break;
+	case '$':
+		EMIT(OEOL, 0);
+		p->g->iflags |= USEEOL;
+		p->g->neol++;
+		break;
+	case '|':
+		SETERROR(REG_EMPTY);
+		break;
+	case '*':
+	case '+':
+	case '?':
+		SETERROR(REG_BADRPT);
+		break;
+	case '.':
+		if (p->g->cflags&REG_NEWLINE)
+			nonnewline(p);
+		else
+			EMIT(OANY, 0);
+		break;
+	case '[':
+		p_bracket(p);
+		break;
+	case '\\':
+		REQUIRE(MORE(), REG_EESCAPE);
+		c = GETNEXT();
+		ordinary(p, c);
+		break;
+	case '{':		/* okay as ordinary except if digit follows */
+		REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
+		/* FALLTHROUGH */
+	default:
+		ordinary(p, c);
+		break;
+	}
+
+	if (!MORE())
+		return;
+	c = PEEK();
+	/* we call { a repetition if followed by a digit */
+	if (!( c == '*' || c == '+' || c == '?' ||
+				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ))
+		return;		/* no repetition, we're done */
+	NEXT();
+
+	REQUIRE(!wascaret, REG_BADRPT);
+	switch (c) {
+	case '*':	/* implemented as +? */
+		/* this case does not require the (y|) trick, noKLUDGE */
+		INSERT(OPLUS_, pos);
+		ASTERN(O_PLUS, pos);
+		INSERT(OQUEST_, pos);
+		ASTERN(O_QUEST, pos);
+		break;
+	case '+':
+		INSERT(OPLUS_, pos);
+		ASTERN(O_PLUS, pos);
+		break;
+	case '?':
+		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+		INSERT(OCH_, pos);		/* offset slightly wrong */
+		ASTERN(OOR1, pos);		/* this one's right */
+		AHEAD(pos);			/* fix the OCH_ */
+		EMIT(OOR2, 0);			/* offset very wrong... */
+		AHEAD(THERE());			/* ...so fix it */
+		ASTERN(O_CH, THERETHERE());
+		break;
+	case '{':
+		count = p_count(p);
+		if (EAT(',')) {
+			if (isdigit((uch)PEEK())) {
+				count2 = p_count(p);
+				REQUIRE(count <= count2, REG_BADBR);
+			} else		/* single number with comma */
+				count2 = INFINITY;
+		} else		/* just a single number */
+			count2 = count;
+		repeat(p, pos, count, count2);
+		if (!EAT('}')) {	/* error heuristics */
+			while (MORE() && PEEK() != '}')
+				NEXT();
+			REQUIRE(MORE(), REG_EBRACE);
+			SETERROR(REG_BADBR);
+		}
+		break;
+	}
+
+	if (!MORE())
+		return;
+	c = PEEK();
+	if (!( c == '*' || c == '+' || c == '?' ||
+				(c == '{' && MORE2() && isdigit((uch)PEEK2())) ) )
+		return;
+	SETERROR(REG_BADRPT);
+}
+
+/*
+ - p_str - string (no metacharacters) "parser"
+ */
+static void
+p_str(struct parse *p)
+{
+	REQUIRE(MORE(), REG_EMPTY);
+	while (MORE())
+		ordinary(p, GETNEXT());
+}
+
+/*
+ - p_bre - BRE parser top level, anchoring and concatenation
+ * Giving end1 as OUT essentially eliminates the end1/end2 check.
+ *
+ * This implementation is a bit of a kludge, in that a trailing $ is first
+ * taken as an ordinary character and then revised to be an anchor.  The
+ * only undesirable side effect is that '$' gets included as a character
+ * category in such cases.  This is fairly harmless; not worth fixing.
+ * The amount of lookahead needed to avoid this kludge is excessive.
+ */
+static void
+p_bre(struct parse *p,
+    int end1,		/* first terminating character */
+    int end2)		/* second terminating character */
+{
+	sopno start = HERE();
+	int first = 1;			/* first subexpression? */
+	int wasdollar = 0;
+
+	if (EAT('^')) {
+		EMIT(OBOL, 0);
+		p->g->iflags |= USEBOL;
+		p->g->nbol++;
+	}
+	while (MORE() && !SEETWO(end1, end2)) {
+		wasdollar = p_simp_re(p, first);
+		first = 0;
+	}
+	if (wasdollar) {	/* oops, that was a trailing anchor */
+		DROP(1);
+		EMIT(OEOL, 0);
+		p->g->iflags |= USEEOL;
+		p->g->neol++;
+	}
+
+	REQUIRE(HERE() != start, REG_EMPTY);	/* require nonempty */
+}
+
+/*
+ - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
+ */
+static int			/* was the simple RE an unbackslashed $? */
+p_simp_re(struct parse *p,
+    int starordinary)		/* is a leading * an ordinary character? */
+{
+	int c;
+	int count;
+	int count2;
+	sopno pos;
+	int i;
+	sopno subno;
+#	define	BACKSL	(1<<CHAR_BIT)
+
+	pos = HERE();		/* repetion op, if any, covers from here */
+
+	assert(MORE());		/* caller should have ensured this */
+	c = GETNEXT();
+	if (c == '\\') {
+		REQUIRE(MORE(), REG_EESCAPE);
+		c = BACKSL | GETNEXT();
+	}
+	switch (c) {
+	case '.':
+		if (p->g->cflags&REG_NEWLINE)
+			nonnewline(p);
+		else
+			EMIT(OANY, 0);
+		break;
+	case '[':
+		p_bracket(p);
+		break;
+	case BACKSL|'{':
+		SETERROR(REG_BADRPT);
+		break;
+	case BACKSL|'(':
+		p->g->nsub++;
+		subno = p->g->nsub;
+		if (subno < NPAREN)
+			p->pbegin[subno] = HERE();
+		EMIT(OLPAREN, subno);
+		/* the MORE here is an error heuristic */
+		if (MORE() && !SEETWO('\\', ')'))
+			p_bre(p, '\\', ')');
+		if (subno < NPAREN) {
+			p->pend[subno] = HERE();
+			assert(p->pend[subno] != 0);
+		}
+		EMIT(ORPAREN, subno);
+		REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
+		break;
+	case BACKSL|')':	/* should not get here -- must be user */
+	case BACKSL|'}':
+		SETERROR(REG_EPAREN);
+		break;
+	case BACKSL|'1':
+	case BACKSL|'2':
+	case BACKSL|'3':
+	case BACKSL|'4':
+	case BACKSL|'5':
+	case BACKSL|'6':
+	case BACKSL|'7':
+	case BACKSL|'8':
+	case BACKSL|'9':
+		i = (c&~BACKSL) - '0';
+		assert(i < NPAREN);
+		if (p->pend[i] != 0) {
+			assert(i <= p->g->nsub);
+			EMIT(OBACK_, i);
+			assert(p->pbegin[i] != 0);
+			assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
+			assert(OP(p->strip[p->pend[i]]) == ORPAREN);
+			(void) dupl(p, p->pbegin[i]+1, p->pend[i]);
+			EMIT(O_BACK, i);
+		} else
+			SETERROR(REG_ESUBREG);
+		p->g->backrefs = 1;
+		break;
+	case '*':
+		REQUIRE(starordinary, REG_BADRPT);
+		/* FALLTHROUGH */
+	default:
+		ordinary(p, (char)c);
+		break;
+	}
+
+	if (EAT('*')) {		/* implemented as +? */
+		/* this case does not require the (y|) trick, noKLUDGE */
+		INSERT(OPLUS_, pos);
+		ASTERN(O_PLUS, pos);
+		INSERT(OQUEST_, pos);
+		ASTERN(O_QUEST, pos);
+	} else if (EATTWO('\\', '{')) {
+		count = p_count(p);
+		if (EAT(',')) {
+			if (MORE() && isdigit((uch)PEEK())) {
+				count2 = p_count(p);
+				REQUIRE(count <= count2, REG_BADBR);
+			} else		/* single number with comma */
+				count2 = INFINITY;
+		} else		/* just a single number */
+			count2 = count;
+		repeat(p, pos, count, count2);
+		if (!EATTWO('\\', '}')) {	/* error heuristics */
+			while (MORE() && !SEETWO('\\', '}'))
+				NEXT();
+			REQUIRE(MORE(), REG_EBRACE);
+			SETERROR(REG_BADBR);
+		}
+	} else if (c == '$')	/* $ (but not \$) ends it */
+		return(1);
+
+	return(0);
+}
+
+/*
+ - p_count - parse a repetition count
+ */
+static int			/* the value */
+p_count(struct parse *p)
+{
+	int count = 0;
+	int ndigits = 0;
+
+	while (MORE() && isdigit((uch)PEEK()) && count <= DUPMAX) {
+		count = count*10 + (GETNEXT() - '0');
+		ndigits++;
+	}
+
+	REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
+	return(count);
+}
+
+/*
+ - p_bracket - parse a bracketed character list
+ *
+ * Note a significant property of this code:  if the allocset() did SETERROR,
+ * no set operations are done.
+ */
+static void
+p_bracket(struct parse *p)
+{
+	cset *cs;
+	int invert = 0;
+
+	/* Dept of Truly Sickening Special-Case Kludges */
+	if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
+		EMIT(OBOW, 0);
+		NEXTn(6);
+		return;
+	}
+	if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
+		EMIT(OEOW, 0);
+		NEXTn(6);
+		return;
+	}
+
+	if ((cs = allocset(p)) == NULL) {
+		/* allocset did set error status in p */
+		return;
+	}
+
+	if (EAT('^'))
+		invert++;	/* make note to invert set at end */
+	if (EAT(']'))
+		CHadd(cs, ']');
+	else if (EAT('-'))
+		CHadd(cs, '-');
+	while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
+		p_b_term(p, cs);
+	if (EAT('-'))
+		CHadd(cs, '-');
+	MUSTEAT(']', REG_EBRACK);
+
+	if (p->error != 0) {	/* don't mess things up further */
+		freeset(p, cs);
+		return;
+	}
+
+	if (p->g->cflags&REG_ICASE) {
+		int i;
+		int ci;
+
+		for (i = p->g->csetsize - 1; i >= 0; i--)
+			if (CHIN(cs, i) && isalpha(i)) {
+				ci = othercase(i);
+				if (ci != i)
+					CHadd(cs, ci);
+			}
+		if (cs->multis != NULL)
+			mccase(p, cs);
+	}
+	if (invert) {
+		int i;
+
+		for (i = p->g->csetsize - 1; i >= 0; i--)
+			if (CHIN(cs, i))
+				CHsub(cs, i);
+			else
+				CHadd(cs, i);
+		if (p->g->cflags&REG_NEWLINE)
+			CHsub(cs, '\n');
+		if (cs->multis != NULL)
+			mcinvert(p, cs);
+	}
+
+	assert(cs->multis == NULL);		/* xxx */
+
+	if (nch(p, cs) == 1) {		/* optimize singleton sets */
+		ordinary(p, firstch(p, cs));
+		freeset(p, cs);
+	} else
+		EMIT(OANYOF, freezeset(p, cs));
+}
+
+/*
+ - p_b_term - parse one term of a bracketed character list
+ */
+static void
+p_b_term(struct parse *p, cset *cs)
+{
+	char c;
+	char start, finish;
+	int i;
+
+	/* classify what we've got */
+	switch ((MORE()) ? PEEK() : '\0') {
+	case '[':
+		c = (MORE2()) ? PEEK2() : '\0';
+		break;
+	case '-':
+		SETERROR(REG_ERANGE);
+		return;			/* NOTE RETURN */
+		break;
+	default:
+		c = '\0';
+		break;
+	}
+
+	switch (c) {
+	case ':':		/* character class */
+		NEXT2();
+		REQUIRE(MORE(), REG_EBRACK);
+		c = PEEK();
+		REQUIRE(c != '-' && c != ']', REG_ECTYPE);
+		p_b_cclass(p, cs);
+		REQUIRE(MORE(), REG_EBRACK);
+		REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
+		break;
+	case '=':		/* equivalence class */
+		NEXT2();
+		REQUIRE(MORE(), REG_EBRACK);
+		c = PEEK();
+		REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
+		p_b_eclass(p, cs);
+		REQUIRE(MORE(), REG_EBRACK);
+		REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
+		break;
+	default:		/* symbol, ordinary character, or range */
+/* xxx revision needed for multichar stuff */
+		start = p_b_symbol(p);
+		if (SEE('-') && MORE2() && PEEK2() != ']') {
+			/* range */
+			NEXT();
+			if (EAT('-'))
+				finish = '-';
+			else
+				finish = p_b_symbol(p);
+		} else
+			finish = start;
+/* xxx what about signed chars here... */
+		REQUIRE(start <= finish, REG_ERANGE);
+		for (i = start; i <= finish; i++)
+			CHadd(cs, i);
+		break;
+	}
+}
+
+/*
+ - p_b_cclass - parse a character-class name and deal with it
+ */
+static void
+p_b_cclass(struct parse *p, cset *cs)
+{
+	char *sp = p->next;
+	struct cclass *cp;
+	size_t len;
+	const char *u;
+	char c;
+
+	while (MORE() && isalpha((uch)PEEK()))
+		NEXT();
+	len = p->next - sp;
+	for (cp = cclasses; cp->name != NULL; cp++)
+		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+			break;
+	if (cp->name == NULL) {
+		/* oops, didn't find it */
+		SETERROR(REG_ECTYPE);
+		return;
+	}
+
+	u = cp->chars;
+	while ((c = *u++) != '\0')
+		CHadd(cs, c);
+	for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
+		MCadd(p, cs, u);
+}
+
+/*
+ - p_b_eclass - parse an equivalence-class name and deal with it
+ *
+ * This implementation is incomplete. xxx
+ */
+static void
+p_b_eclass(struct parse *p, cset *cs)
+{
+	char c;
+
+	c = p_b_coll_elem(p, '=');
+	CHadd(cs, c);
+}
+
+/*
+ - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
+ */
+static char			/* value of symbol */
+p_b_symbol(struct parse *p)
+{
+	char value;
+
+	REQUIRE(MORE(), REG_EBRACK);
+	if (!EATTWO('[', '.'))
+		return(GETNEXT());
+
+	/* collating symbol */
+	value = p_b_coll_elem(p, '.');
+	REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
+	return(value);
+}
+
+/*
+ - p_b_coll_elem - parse a collating-element name and look it up
+ */
+static char			/* value of collating element */
+p_b_coll_elem(struct parse *p,
+    int endc)			/* name ended by endc,']' */
+{
+	char *sp = p->next;
+	struct cname *cp;
+	int len;
+
+	while (MORE() && !SEETWO(endc, ']'))
+		NEXT();
+	if (!MORE()) {
+		SETERROR(REG_EBRACK);
+		return(0);
+	}
+	len = p->next - sp;
+	for (cp = cnames; cp->name != NULL; cp++)
+		if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
+			return(cp->code);	/* known name */
+	if (len == 1)
+		return(*sp);	/* single character */
+	SETERROR(REG_ECOLLATE);			/* neither */
+	return(0);
+}
+
+/*
+ - othercase - return the case counterpart of an alphabetic
+ */
+static char			/* if no counterpart, return ch */
+othercase(int ch)
+{
+	ch = (uch)ch;
+	assert(isalpha(ch));
+	if (isupper(ch))
+		return ((uch)tolower(ch));
+	else if (islower(ch))
+		return ((uch)toupper(ch));
+	else			/* peculiar, but could happen */
+		return(ch);
+}
+
+/*
+ - bothcases - emit a dualcase version of a two-case character
+ *
+ * Boy, is this implementation ever a kludge...
+ */
+static void
+bothcases(struct parse *p, int ch)
+{
+	char *oldnext = p->next;
+	char *oldend = p->end;
+	char bracket[3];
+
+	ch = (uch)ch;
+	assert(othercase(ch) != ch);	/* p_bracket() would recurse */
+	p->next = bracket;
+	p->end = bracket+2;
+	bracket[0] = ch;
+	bracket[1] = ']';
+	bracket[2] = '\0';
+	p_bracket(p);
+	assert(p->next == bracket+2);
+	p->next = oldnext;
+	p->end = oldend;
+}
+
+/*
+ - ordinary - emit an ordinary character
+ */
+static void
+ordinary(struct parse *p, int ch)
+{
+	cat_t *cap = p->g->categories;
+
+	if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
+		bothcases(p, ch);
+	else {
+		EMIT(OCHAR, (uch)ch);
+		if (cap[ch] == 0)
+			cap[ch] = p->g->ncategories++;
+	}
+}
+
+/*
+ - nonnewline - emit REG_NEWLINE version of OANY
+ *
+ * Boy, is this implementation ever a kludge...
+ */
+static void
+nonnewline(struct parse *p)
+{
+	char *oldnext = p->next;
+	char *oldend = p->end;
+	char bracket[4];
+
+	p->next = bracket;
+	p->end = bracket+3;
+	bracket[0] = '^';
+	bracket[1] = '\n';
+	bracket[2] = ']';
+	bracket[3] = '\0';
+	p_bracket(p);
+	assert(p->next == bracket+3);
+	p->next = oldnext;
+	p->end = oldend;
+}
+
+/*
+ - repeat - generate code for a bounded repetition, recursively if needed
+ */
+static void
+repeat(struct parse *p,
+    sopno start,		/* operand from here to end of strip */
+    int from,			/* repeated from this number */
+    int to)			/* to this number of times (maybe INFINITY) */
+{
+	sopno finish = HERE();
+#	define	N	2
+#	define	INF	3
+#	define	REP(f, t)	((f)*8 + (t))
+#	define	MAP(n)	(((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
+	sopno copy;
+
+	if (p->error != 0)	/* head off possible runaway recursion */
+		return;
+
+	assert(from <= to);
+
+	switch (REP(MAP(from), MAP(to))) {
+	case REP(0, 0):			/* must be user doing this */
+		DROP(finish-start);	/* drop the operand */
+		break;
+	case REP(0, 1):			/* as x{1,1}? */
+	case REP(0, N):			/* as x{1,n}? */
+	case REP(0, INF):		/* as x{1,}? */
+		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+		INSERT(OCH_, start);		/* offset is wrong... */
+		repeat(p, start+1, 1, to);
+		ASTERN(OOR1, start);
+		AHEAD(start);			/* ... fix it */
+		EMIT(OOR2, 0);
+		AHEAD(THERE());
+		ASTERN(O_CH, THERETHERE());
+		break;
+	case REP(1, 1):			/* trivial case */
+		/* done */
+		break;
+	case REP(1, N):			/* as x?x{1,n-1} */
+		/* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
+		INSERT(OCH_, start);
+		ASTERN(OOR1, start);
+		AHEAD(start);
+		EMIT(OOR2, 0);			/* offset very wrong... */
+		AHEAD(THERE());			/* ...so fix it */
+		ASTERN(O_CH, THERETHERE());
+		copy = dupl(p, start+1, finish+1);
+		assert(copy == finish+4);
+		repeat(p, copy, 1, to-1);
+		break;
+	case REP(1, INF):		/* as x+ */
+		INSERT(OPLUS_, start);
+		ASTERN(O_PLUS, start);
+		break;
+	case REP(N, N):			/* as xx{m-1,n-1} */
+		copy = dupl(p, start, finish);
+		repeat(p, copy, from-1, to-1);
+		break;
+	case REP(N, INF):		/* as xx{n-1,INF} */
+		copy = dupl(p, start, finish);
+		repeat(p, copy, from-1, to);
+		break;
+	default:			/* "can't happen" */
+		SETERROR(REG_ASSERT);	/* just in case */
+		break;
+	}
+}
+
+/*
+ - seterr - set an error condition
+ */
+static int			/* useless but makes type checking happy */
+seterr(struct parse *p, int e)
+{
+	if (p->error == 0)	/* keep earliest error condition */
+		p->error = e;
+	p->next = nuls;		/* try to bring things to a halt */
+	p->end = nuls;
+	return(0);		/* make the return value well-defined */
+}
+
+/*
+ - allocset - allocate a set of characters for []
+ */
+static cset *
+allocset(struct parse *p)
+{
+	int no = p->g->ncsets++;
+	size_t nc;
+	size_t nbytes;
+	cset *cs;
+	size_t css = (size_t)p->g->csetsize;
+	int i;
+
+	if (no >= p->ncsalloc) {	/* need another column of space */
+		void *ptr;
+
+		p->ncsalloc += CHAR_BIT;
+		nc = p->ncsalloc;
+		assert(nc % CHAR_BIT == 0);
+		nbytes = nc / CHAR_BIT * css;
+
+		ptr = (cset *)realloc((char *)p->g->sets, nc * sizeof(cset));
+		if (ptr == NULL)
+			goto nomem;
+		p->g->sets = ptr;
+
+		ptr = (uch *)realloc((char *)p->g->setbits, nbytes);
+		if (ptr == NULL)
+			goto nomem;
+		p->g->setbits = ptr;
+
+		for (i = 0; i < no; i++)
+			p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
+
+		(void) memset((char *)p->g->setbits + (nbytes - css), 0, css);
+	}
+	/* XXX should not happen */
+	if (p->g->sets == NULL || p->g->setbits == NULL)
+		goto nomem;
+
+	cs = &p->g->sets[no];
+	cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
+	cs->mask = 1 << ((no) % CHAR_BIT);
+	cs->hash = 0;
+	cs->smultis = 0;
+	cs->multis = NULL;
+
+	return(cs);
+nomem:
+	free(p->g->sets);
+	p->g->sets = NULL;
+	free(p->g->setbits);
+	p->g->setbits = NULL;
+
+	SETERROR(REG_ESPACE);
+	/* caller's responsibility not to do set ops */
+	return(NULL);
+}
+
+/*
+ - freeset - free a now-unused set
+ */
+static void
+freeset(struct parse *p, cset *cs)
+{
+	size_t i;
+	cset *top = &p->g->sets[p->g->ncsets];
+	size_t css = (size_t)p->g->csetsize;
+
+	for (i = 0; i < css; i++)
+		CHsub(cs, i);
+	if (cs == top-1)	/* recover only the easy case */
+		p->g->ncsets--;
+}
+
+/*
+ - freezeset - final processing on a set of characters
+ *
+ * The main task here is merging identical sets.  This is usually a waste
+ * of time (although the hash code minimizes the overhead), but can win
+ * big if REG_ICASE is being used.  REG_ICASE, by the way, is why the hash
+ * is done using addition rather than xor -- all ASCII [aA] sets xor to
+ * the same value!
+ */
+static int			/* set number */
+freezeset(struct parse *p, cset *cs)
+{
+	uch h = cs->hash;
+	size_t i;
+	cset *top = &p->g->sets[p->g->ncsets];
+	cset *cs2;
+	size_t css = (size_t)p->g->csetsize;
+
+	/* look for an earlier one which is the same */
+	for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
+		if (cs2->hash == h && cs2 != cs) {
+			/* maybe */
+			for (i = 0; i < css; i++)
+				if (!!CHIN(cs2, i) != !!CHIN(cs, i))
+					break;		/* no */
+			if (i == css)
+				break;			/* yes */
+		}
+
+	if (cs2 < top) {	/* found one */
+		freeset(p, cs);
+		cs = cs2;
+	}
+
+	return((int)(cs - p->g->sets));
+}
+
+/*
+ - firstch - return first character in a set (which must have at least one)
+ */
+static int			/* character; there is no "none" value */
+firstch(struct parse *p, cset *cs)
+{
+	size_t i;
+	size_t css = (size_t)p->g->csetsize;
+
+	for (i = 0; i < css; i++)
+		if (CHIN(cs, i))
+			return((char)i);
+	assert(never);
+	return(0);		/* arbitrary */
+}
+
+/*
+ - nch - number of characters in a set
+ */
+static int
+nch(struct parse *p, cset *cs)
+{
+	size_t i;
+	size_t css = (size_t)p->g->csetsize;
+	int n = 0;
+
+	for (i = 0; i < css; i++)
+		if (CHIN(cs, i))
+			n++;
+	return(n);
+}
+
+/*
+ - mcadd - add a collating element to a cset
+ */
+static void
+mcadd( struct parse *p, cset *cs, const char *cp)
+{
+	size_t oldend = cs->smultis;
+	void *np;
+
+	cs->smultis += strlen(cp) + 1;
+	np = realloc(cs->multis, cs->smultis);
+	if (np == NULL) {
+		if (cs->multis)
+			free(cs->multis);
+		cs->multis = NULL;
+		SETERROR(REG_ESPACE);
+		return;
+	}
+	cs->multis = np;
+
+	llvm_strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1);
+}
+
+/*
+ - mcinvert - invert the list of collating elements in a cset
+ *
+ * This would have to know the set of possibilities.  Implementation
+ * is deferred.
+ */
+/* ARGSUSED */
+static void
+mcinvert(struct parse *p, cset *cs)
+{
+	assert(cs->multis == NULL);	/* xxx */
+}
+
+/*
+ - mccase - add case counterparts of the list of collating elements in a cset
+ *
+ * This would have to know the set of possibilities.  Implementation
+ * is deferred.
+ */
+/* ARGSUSED */
+static void
+mccase(struct parse *p, cset *cs)
+{
+	assert(cs->multis == NULL);	/* xxx */
+}
+
+/*
+ - isinsets - is this character in any sets?
+ */
+static int			/* predicate */
+isinsets(struct re_guts *g, int c)
+{
+	uch *col;
+	int i;
+	int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
+	unsigned uc = (uch)c;
+
+	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
+		if (col[uc] != 0)
+			return(1);
+	return(0);
+}
+
+/*
+ - samesets - are these two characters in exactly the same sets?
+ */
+static int			/* predicate */
+samesets(struct re_guts *g, int c1, int c2)
+{
+	uch *col;
+	int i;
+	int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
+	unsigned uc1 = (uch)c1;
+	unsigned uc2 = (uch)c2;
+
+	for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
+		if (col[uc1] != col[uc2])
+			return(0);
+	return(1);
+}
+
+/*
+ - categorize - sort out character categories
+ */
+static void
+categorize(struct parse *p, struct re_guts *g)
+{
+	cat_t *cats = g->categories;
+	int c;
+	int c2;
+	cat_t cat;
+
+	/* avoid making error situations worse */
+	if (p->error != 0)
+		return;
+
+	for (c = CHAR_MIN; c <= CHAR_MAX; c++)
+		if (cats[c] == 0 && isinsets(g, c)) {
+			cat = g->ncategories++;
+			cats[c] = cat;
+			for (c2 = c+1; c2 <= CHAR_MAX; c2++)
+				if (cats[c2] == 0 && samesets(g, c, c2))
+					cats[c2] = cat;
+		}
+}
+
+/*
+ - dupl - emit a duplicate of a bunch of sops
+ */
+static sopno			/* start of duplicate */
+dupl(struct parse *p,
+    sopno start,		/* from here */
+    sopno finish)		/* to this less one */
+{
+	sopno ret = HERE();
+	sopno len = finish - start;
+
+	assert(finish >= start);
+	if (len == 0)
+		return(ret);
+	enlarge(p, p->ssize + len);	/* this many unexpected additions */
+	assert(p->ssize >= p->slen + len);
+	(void) memmove((char *)(p->strip + p->slen),
+		(char *)(p->strip + start), (size_t)len*sizeof(sop));
+	p->slen += len;
+	return(ret);
+}
+
+/*
+ - doemit - emit a strip operator
+ *
+ * It might seem better to implement this as a macro with a function as
+ * hard-case backup, but it's just too big and messy unless there are
+ * some changes to the data structures.  Maybe later.
+ */
+static void
+doemit(struct parse *p, sop op, size_t opnd)
+{
+	/* avoid making error situations worse */
+	if (p->error != 0)
+		return;
+
+	/* deal with oversize operands ("can't happen", more or less) */
+	assert(opnd < 1<<OPSHIFT);
+
+	/* deal with undersized strip */
+	if (p->slen >= p->ssize)
+		enlarge(p, (p->ssize+1) / 2 * 3);	/* +50% */
+	assert(p->slen < p->ssize);
+
+	/* finally, it's all reduced to the easy case */
+	p->strip[p->slen++] = SOP(op, opnd);
+}
+
+/*
+ - doinsert - insert a sop into the strip
+ */
+static void
+doinsert(struct parse *p, sop op, size_t opnd, sopno pos)
+{
+	sopno sn;
+	sop s;
+	int i;
+
+	/* avoid making error situations worse */
+	if (p->error != 0)
+		return;
+
+	sn = HERE();
+	EMIT(op, opnd);		/* do checks, ensure space */
+	assert(HERE() == sn+1);
+	s = p->strip[sn];
+
+	/* adjust paren pointers */
+	assert(pos > 0);
+	for (i = 1; i < NPAREN; i++) {
+		if (p->pbegin[i] >= pos) {
+			p->pbegin[i]++;
+		}
+		if (p->pend[i] >= pos) {
+			p->pend[i]++;
+		}
+	}
+
+	memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
+						(HERE()-pos-1)*sizeof(sop));
+	p->strip[pos] = s;
+}
+
+/*
+ - dofwd - complete a forward reference
+ */
+static void
+dofwd(struct parse *p, sopno pos, sop value)
+{
+	/* avoid making error situations worse */
+	if (p->error != 0)
+		return;
+
+	assert(value < 1<<OPSHIFT);
+	p->strip[pos] = OP(p->strip[pos]) | value;
+}
+
+/*
+ - enlarge - enlarge the strip
+ */
+static void
+enlarge(struct parse *p, sopno size)
+{
+	sop *sp;
+
+	if (p->ssize >= size)
+		return;
+
+	sp = (sop *)realloc(p->strip, size*sizeof(sop));
+	if (sp == NULL) {
+		SETERROR(REG_ESPACE);
+		return;
+	}
+	p->strip = sp;
+	p->ssize = size;
+}
+
+/*
+ - stripsnug - compact the strip
+ */
+static void
+stripsnug(struct parse *p, struct re_guts *g)
+{
+	g->nstates = p->slen;
+	g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
+	if (g->strip == NULL) {
+		SETERROR(REG_ESPACE);
+		g->strip = p->strip;
+	}
+}
+
+/*
+ - findmust - fill in must and mlen with longest mandatory literal string
+ *
+ * This algorithm could do fancy things like analyzing the operands of |
+ * for common subsequences.  Someday.  This code is simple and finds most
+ * of the interesting cases.
+ *
+ * Note that must and mlen got initialized during setup.
+ */
+static void
+findmust(struct parse *p, struct re_guts *g)
+{
+	sop *scan;
+	sop *start = 0; /* start initialized in the default case, after that */
+	sop *newstart = 0; /* newstart was initialized in the OCHAR case */
+	sopno newlen;
+	sop s;
+	char *cp;
+	sopno i;
+
+	/* avoid making error situations worse */
+	if (p->error != 0)
+		return;
+
+	/* find the longest OCHAR sequence in strip */
+	newlen = 0;
+	scan = g->strip + 1;
+	do {
+		s = *scan++;
+		switch (OP(s)) {
+		case OCHAR:		/* sequence member */
+			if (newlen == 0)		/* new sequence */
+				newstart = scan - 1;
+			newlen++;
+			break;
+		case OPLUS_:		/* things that don't break one */
+		case OLPAREN:
+		case ORPAREN:
+			break;
+		case OQUEST_:		/* things that must be skipped */
+		case OCH_:
+			scan--;
+			do {
+				scan += OPND(s);
+				s = *scan;
+				/* assert() interferes w debug printouts */
+				if (OP(s) != O_QUEST && OP(s) != O_CH &&
+							OP(s) != OOR2) {
+					g->iflags |= REGEX_BAD;
+					return;
+				}
+			} while (OP(s) != O_QUEST && OP(s) != O_CH);
+			/* fallthrough */
+		default:		/* things that break a sequence */
+			if (newlen > g->mlen) {		/* ends one */
+				start = newstart;
+				g->mlen = newlen;
+			}
+			newlen = 0;
+			break;
+		}
+	} while (OP(s) != OEND);
+
+	if (g->mlen == 0)		/* there isn't one */
+		return;
+
+	/* turn it into a character string */
+	g->must = malloc((size_t)g->mlen + 1);
+	if (g->must == NULL) {		/* argh; just forget it */
+		g->mlen = 0;
+		return;
+	}
+	cp = g->must;
+	scan = start;
+	for (i = g->mlen; i > 0; i--) {
+		while (OP(s = *scan++) != OCHAR)
+			continue;
+		assert(cp < g->must + g->mlen);
+		*cp++ = (char)OPND(s);
+	}
+	assert(cp == g->must + g->mlen);
+	*cp++ = '\0';		/* just on general principles */
+}
+
+/*
+ - pluscount - count + nesting
+ */
+static sopno			/* nesting depth */
+pluscount(struct parse *p, struct re_guts *g)
+{
+	sop *scan;
+	sop s;
+	sopno plusnest = 0;
+	sopno maxnest = 0;
+
+	if (p->error != 0)
+		return(0);	/* there may not be an OEND */
+
+	scan = g->strip + 1;
+	do {
+		s = *scan++;
+		switch (OP(s)) {
+		case OPLUS_:
+			plusnest++;
+			break;
+		case O_PLUS:
+			if (plusnest > maxnest)
+				maxnest = plusnest;
+			plusnest--;
+			break;
+		}
+	} while (OP(s) != OEND);
+	if (plusnest != 0)
+		g->iflags |= REGEX_BAD;
+	return(maxnest);
+}
diff --git a/final/lib/Support/regengine.inc b/final/lib/Support/regengine.inc
new file mode 100644
index 00000000000..7e41f96f359
--- /dev/null
+++ b/final/lib/Support/regengine.inc
@@ -0,0 +1,1034 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)engine.c	8.5 (Berkeley) 3/20/94
+ */
+
+/*
+ * The matching engine and friends.  This file is #included by regexec.c
+ * after suitable #defines of a variety of macros used herein, so that
+ * different state representations can be used without duplicating masses
+ * of code.
+ */
+
+#ifdef SNAMES
+#define	matcher	smatcher
+#define	fast	sfast
+#define	slow	sslow
+#define	dissect	sdissect
+#define	backref	sbackref
+#define	step	sstep
+#define	print	sprint
+#define	at	sat
+#define	match	smat
+#define	nope	snope
+#endif
+#ifdef LNAMES
+#define	matcher	lmatcher
+#define	fast	lfast
+#define	slow	lslow
+#define	dissect	ldissect
+#define	backref	lbackref
+#define	step	lstep
+#define	print	lprint
+#define	at	lat
+#define	match	lmat
+#define	nope	lnope
+#endif
+
+/* another structure passed up and down to avoid zillions of parameters */
+struct match {
+	struct re_guts *g;
+	int eflags;
+	llvm_regmatch_t *pmatch;	/* [nsub+1] (0 element unused) */
+	const char *offp;		/* offsets work from here */
+	const char *beginp;		/* start of string -- virtual NUL precedes */
+	const char *endp;		/* end of string -- virtual NUL here */
+	const char *coldp;		/* can be no match starting before here */
+	const char **lastpos;		/* [nplus+1] */
+	STATEVARS;
+	states st;		/* current states */
+	states fresh;		/* states for a fresh start */
+	states tmp;		/* temporary */
+	states empty;		/* empty set of states */
+};
+
+static int matcher(struct re_guts *, const char *, size_t,
+                   llvm_regmatch_t[], int);
+static const char *dissect(struct match *, const char *, const char *, sopno,
+                           sopno);
+static const char *backref(struct match *, const char *, const char *, sopno,
+                           sopno, sopno, int);
+static const char *fast(struct match *, const char *, const char *, sopno, sopno);
+static const char *slow(struct match *, const char *, const char *, sopno, sopno);
+static states step(struct re_guts *, sopno, sopno, states, int, states);
+#define MAX_RECURSION	100
+#define	BOL	(OUT+1)
+#define	EOL	(BOL+1)
+#define	BOLEOL	(BOL+2)
+#define	NOTHING	(BOL+3)
+#define	BOW	(BOL+4)
+#define	EOW	(BOL+5)
+#define	CODEMAX	(BOL+5)		/* highest code used */
+#define	NONCHAR(c)	((c) > CHAR_MAX)
+#define	NNONCHAR	(CODEMAX-CHAR_MAX)
+#ifdef REDEBUG
+static void print(struct match *, char *, states, int, FILE *);
+#endif
+#ifdef REDEBUG
+static void at(struct match *, char *, char *, char *, sopno, sopno);
+#endif
+#ifdef REDEBUG
+static char *pchar(int);
+#endif
+
+#ifdef REDEBUG
+#define	SP(t, s, c)	print(m, t, s, c, stdout)
+#define	AT(t, p1, p2, s1, s2)	at(m, t, p1, p2, s1, s2)
+#define	NOTE(str)	{ if (m->eflags&REG_TRACE) (void)printf("=%s\n", (str)); }
+static int nope = 0;
+#else
+#define	SP(t, s, c)	/* nothing */
+#define	AT(t, p1, p2, s1, s2)	/* nothing */
+#define	NOTE(s)	/* nothing */
+#endif
+
+/*
+ - matcher - the actual matching engine
+ */
+static int			/* 0 success, REG_NOMATCH failure */
+matcher(struct re_guts *g, const char *string, size_t nmatch,
+        llvm_regmatch_t pmatch[],
+    int eflags)
+{
+	const char *endp;
+	size_t i;
+	struct match mv;
+	struct match *m = &mv;
+	const char *dp;
+	const sopno gf = g->firststate+1;	/* +1 for OEND */
+	const sopno gl = g->laststate;
+	const char *start;
+	const char *stop;
+
+	/* simplify the situation where possible */
+	if (g->cflags&REG_NOSUB)
+		nmatch = 0;
+	if (eflags&REG_STARTEND) {
+		start = string + pmatch[0].rm_so;
+		stop = string + pmatch[0].rm_eo;
+	} else {
+		start = string;
+		stop = start + strlen(start);
+	}
+	if (stop < start)
+		return(REG_INVARG);
+
+	/* prescreening; this does wonders for this rather slow code */
+	if (g->must != NULL) {
+		for (dp = start; dp < stop; dp++)
+			if (*dp == g->must[0] && stop - dp >= g->mlen &&
+				memcmp(dp, g->must, (size_t)g->mlen) == 0)
+				break;
+		if (dp == stop)		/* we didn't find g->must */
+			return(REG_NOMATCH);
+	}
+
+	/* match struct setup */
+	m->g = g;
+	m->eflags = eflags;
+	m->pmatch = NULL;
+	m->lastpos = NULL;
+	m->offp = string;
+	m->beginp = start;
+	m->endp = stop;
+	STATESETUP(m, 4);
+	SETUP(m->st);
+	SETUP(m->fresh);
+	SETUP(m->tmp);
+	SETUP(m->empty);
+	CLEAR(m->empty);
+
+	/* this loop does only one repetition except for backrefs */
+	for (;;) {
+		endp = fast(m, start, stop, gf, gl);
+		if (endp == NULL) {		/* a miss */
+			free(m->pmatch);
+			free((void*)m->lastpos);
+			STATETEARDOWN(m);
+			return(REG_NOMATCH);
+		}
+		if (nmatch == 0 && !g->backrefs)
+			break;		/* no further info needed */
+
+		/* where? */
+		assert(m->coldp != NULL);
+		for (;;) {
+			NOTE("finding start");
+			endp = slow(m, m->coldp, stop, gf, gl);
+			if (endp != NULL)
+				break;
+			assert(m->coldp < m->endp);
+			m->coldp++;
+		}
+		if (nmatch == 1 && !g->backrefs)
+			break;		/* no further info needed */
+
+		/* oh my, he wants the subexpressions... */
+		if (m->pmatch == NULL)
+			m->pmatch = (llvm_regmatch_t *)malloc((m->g->nsub + 1) *
+							sizeof(llvm_regmatch_t));
+		if (m->pmatch == NULL) {
+			STATETEARDOWN(m);
+			return(REG_ESPACE);
+		}
+		for (i = 1; i <= m->g->nsub; i++)
+			m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
+		if (!g->backrefs && !(m->eflags&REG_BACKR)) {
+			NOTE("dissecting");
+			dp = dissect(m, m->coldp, endp, gf, gl);
+		} else {
+			if (g->nplus > 0 && m->lastpos == NULL)
+				m->lastpos = (const char **)malloc((g->nplus+1) *
+							sizeof(char *));
+			if (g->nplus > 0 && m->lastpos == NULL) {
+				free(m->pmatch);
+				STATETEARDOWN(m);
+				return(REG_ESPACE);
+			}
+			NOTE("backref dissect");
+			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
+		}
+		if (dp != NULL)
+			break;
+
+		/* uh-oh... we couldn't find a subexpression-level match */
+		assert(g->backrefs);	/* must be back references doing it */
+		assert(g->nplus == 0 || m->lastpos != NULL);
+		for (;;) {
+			if (dp != NULL || endp <= m->coldp)
+				break;		/* defeat */
+			NOTE("backoff");
+			endp = slow(m, m->coldp, endp-1, gf, gl);
+			if (endp == NULL)
+				break;		/* defeat */
+			/* try it on a shorter possibility */
+#ifndef NDEBUG
+			for (i = 1; i <= m->g->nsub; i++) {
+				assert(m->pmatch[i].rm_so == -1);
+				assert(m->pmatch[i].rm_eo == -1);
+			}
+#endif
+			NOTE("backoff dissect");
+			dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
+		}
+		assert(dp == NULL || dp == endp);
+		if (dp != NULL)		/* found a shorter one */
+			break;
+
+		/* despite initial appearances, there is no match here */
+		NOTE("false alarm");
+		if (m->coldp == stop)
+			break;
+		start = m->coldp + 1;	/* recycle starting later */
+	}
+
+	/* fill in the details if requested */
+	if (nmatch > 0) {
+		pmatch[0].rm_so = m->coldp - m->offp;
+		pmatch[0].rm_eo = endp - m->offp;
+	}
+	if (nmatch > 1) {
+		assert(m->pmatch != NULL);
+		for (i = 1; i < nmatch; i++)
+			if (i <= m->g->nsub)
+				pmatch[i] = m->pmatch[i];
+			else {
+				pmatch[i].rm_so = -1;
+				pmatch[i].rm_eo = -1;
+			}
+	}
+
+	if (m->pmatch != NULL)
+		free((char *)m->pmatch);
+	if (m->lastpos != NULL)
+		free((char *)m->lastpos);
+	STATETEARDOWN(m);
+	return(0);
+}
+
+/*
+ - dissect - figure out what matched what, no back references
+ */
+static const char *			/* == stop (success) always */
+dissect(struct match *m, const char *start, const char *stop, sopno startst,
+        sopno stopst)
+{
+	int i;
+	sopno ss;	/* start sop of current subRE */
+	sopno es;	/* end sop of current subRE */
+	const char *sp;	/* start of string matched by it */
+	const char *stp;	/* string matched by it cannot pass here */
+	const char *rest;	/* start of rest of string */
+	const char *tail;	/* string unmatched by rest of RE */
+	sopno ssub;	/* start sop of subsubRE */
+	sopno esub;	/* end sop of subsubRE */
+	const char *ssp;	/* start of string matched by subsubRE */
+	const char *sep;	/* end of string matched by subsubRE */
+	const char *oldssp;	/* previous ssp */
+
+	AT("diss", start, stop, startst, stopst);
+	sp = start;
+	for (ss = startst; ss < stopst; ss = es) {
+		/* identify end of subRE */
+		es = ss;
+		switch (OP(m->g->strip[es])) {
+		case OPLUS_:
+		case OQUEST_:
+			es += OPND(m->g->strip[es]);
+			break;
+		case OCH_:
+			while (OP(m->g->strip[es]) != O_CH)
+				es += OPND(m->g->strip[es]);
+			break;
+		}
+		es++;
+
+		/* figure out what it matched */
+		switch (OP(m->g->strip[ss])) {
+		case OEND:
+			assert(nope);
+			break;
+		case OCHAR:
+			sp++;
+			break;
+		case OBOL:
+		case OEOL:
+		case OBOW:
+		case OEOW:
+			break;
+		case OANY:
+		case OANYOF:
+			sp++;
+			break;
+		case OBACK_:
+		case O_BACK:
+			assert(nope);
+			break;
+		/* cases where length of match is hard to find */
+		case OQUEST_:
+			stp = stop;
+			for (;;) {
+				/* how long could this one be? */
+				rest = slow(m, sp, stp, ss, es);
+				assert(rest != NULL);	/* it did match */
+				/* could the rest match the rest? */
+				tail = slow(m, rest, stop, es, stopst);
+				if (tail == stop)
+					break;		/* yes! */
+				/* no -- try a shorter match for this one */
+				stp = rest - 1;
+				assert(stp >= sp);	/* it did work */
+			}
+			ssub = ss + 1;
+			esub = es - 1;
+			/* did innards match? */
+			if (slow(m, sp, rest, ssub, esub) != NULL) {
+				const char *dp = dissect(m, sp, rest, ssub, esub);
+				(void)dp; /* avoid warning if assertions off */
+				assert(dp == rest);
+			} else		/* no */
+				assert(sp == rest);
+			sp = rest;
+			break;
+		case OPLUS_:
+			stp = stop;
+			for (;;) {
+				/* how long could this one be? */
+				rest = slow(m, sp, stp, ss, es);
+				assert(rest != NULL);	/* it did match */
+				/* could the rest match the rest? */
+				tail = slow(m, rest, stop, es, stopst);
+				if (tail == stop)
+					break;		/* yes! */
+				/* no -- try a shorter match for this one */
+				stp = rest - 1;
+				assert(stp >= sp);	/* it did work */
+			}
+			ssub = ss + 1;
+			esub = es - 1;
+			ssp = sp;
+			oldssp = ssp;
+			for (;;) {	/* find last match of innards */
+				sep = slow(m, ssp, rest, ssub, esub);
+				if (sep == NULL || sep == ssp)
+					break;	/* failed or matched null */
+				oldssp = ssp;	/* on to next try */
+				ssp = sep;
+			}
+			if (sep == NULL) {
+				/* last successful match */
+				sep = ssp;
+				ssp = oldssp;
+			}
+			assert(sep == rest);	/* must exhaust substring */
+			assert(slow(m, ssp, sep, ssub, esub) == rest);
+			{
+				const char *dp = dissect(m, ssp, sep, ssub, esub);
+				(void)dp; /* avoid warning if assertions off */
+				assert(dp == sep);
+			}
+			sp = rest;
+			break;
+		case OCH_:
+			stp = stop;
+			for (;;) {
+				/* how long could this one be? */
+				rest = slow(m, sp, stp, ss, es);
+				assert(rest != NULL);	/* it did match */
+				/* could the rest match the rest? */
+				tail = slow(m, rest, stop, es, stopst);
+				if (tail == stop)
+					break;		/* yes! */
+				/* no -- try a shorter match for this one */
+				stp = rest - 1;
+				assert(stp >= sp);	/* it did work */
+			}
+			ssub = ss + 1;
+			esub = ss + OPND(m->g->strip[ss]) - 1;
+			assert(OP(m->g->strip[esub]) == OOR1);
+			for (;;) {	/* find first matching branch */
+				if (slow(m, sp, rest, ssub, esub) == rest)
+					break;	/* it matched all of it */
+				/* that one missed, try next one */
+				assert(OP(m->g->strip[esub]) == OOR1);
+				esub++;
+				assert(OP(m->g->strip[esub]) == OOR2);
+				ssub = esub + 1;
+				esub += OPND(m->g->strip[esub]);
+				if (OP(m->g->strip[esub]) == OOR2)
+					esub--;
+				else
+					assert(OP(m->g->strip[esub]) == O_CH);
+			}
+			{
+				const char *dp = dissect(m, sp, rest, ssub, esub);
+				(void)dp; /* avoid warning if assertions off */
+				assert(dp == rest);
+			}
+			sp = rest;
+			break;
+		case O_PLUS:
+		case O_QUEST:
+		case OOR1:
+		case OOR2:
+		case O_CH:
+			assert(nope);
+			break;
+		case OLPAREN:
+			i = OPND(m->g->strip[ss]);
+			assert(0 < i && i <= m->g->nsub);
+			m->pmatch[i].rm_so = sp - m->offp;
+			break;
+		case ORPAREN:
+			i = OPND(m->g->strip[ss]);
+			assert(0 < i && i <= m->g->nsub);
+			m->pmatch[i].rm_eo = sp - m->offp;
+			break;
+		default:		/* uh oh */
+			assert(nope);
+			break;
+		}
+	}
+
+	assert(sp == stop);
+	return(sp);
+}
+
+/*
+ - backref - figure out what matched what, figuring in back references
+ */
+static const char *			/* == stop (success) or NULL (failure) */
+backref(struct match *m, const char *start, const char *stop, sopno startst,
+        sopno stopst, sopno lev, int rec)			/* PLUS nesting level */
+{
+	int i;
+	sopno ss;	/* start sop of current subRE */
+	const char *sp;	/* start of string matched by it */
+	sopno ssub;	/* start sop of subsubRE */
+	sopno esub;	/* end sop of subsubRE */
+	const char *ssp;	/* start of string matched by subsubRE */
+	const char *dp;
+	size_t len;
+	int hard;
+	sop s;
+	llvm_regoff_t offsave;
+	cset *cs;
+
+	AT("back", start, stop, startst, stopst);
+	sp = start;
+
+	/* get as far as we can with easy stuff */
+	hard = 0;
+	for (ss = startst; !hard && ss < stopst; ss++)
+		switch (OP(s = m->g->strip[ss])) {
+		case OCHAR:
+			if (sp == stop || *sp++ != (char)OPND(s))
+				return(NULL);
+			break;
+		case OANY:
+			if (sp == stop)
+				return(NULL);
+			sp++;
+			break;
+		case OANYOF:
+			cs = &m->g->sets[OPND(s)];
+			if (sp == stop || !CHIN(cs, *sp++))
+				return(NULL);
+			break;
+		case OBOL:
+			if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
+					(sp < m->endp && *(sp-1) == '\n' &&
+						(m->g->cflags&REG_NEWLINE)) )
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case OEOL:
+			if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
+					(sp < m->endp && *sp == '\n' &&
+						(m->g->cflags&REG_NEWLINE)) )
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case OBOW:
+			if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
+					(sp < m->endp && *(sp-1) == '\n' &&
+						(m->g->cflags&REG_NEWLINE)) ||
+					(sp > m->beginp &&
+							!ISWORD(*(sp-1))) ) &&
+					(sp < m->endp && ISWORD(*sp)) )
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case OEOW:
+			if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
+					(sp < m->endp && *sp == '\n' &&
+						(m->g->cflags&REG_NEWLINE)) ||
+					(sp < m->endp && !ISWORD(*sp)) ) &&
+					(sp > m->beginp && ISWORD(*(sp-1))) )
+				{ /* yes */ }
+			else
+				return(NULL);
+			break;
+		case O_QUEST:
+			break;
+		case OOR1:	/* matches null but needs to skip */
+			ss++;
+			s = m->g->strip[ss];
+			do {
+				assert(OP(s) == OOR2);
+				ss += OPND(s);
+			} while (OP(s = m->g->strip[ss]) != O_CH);
+			/* note that the ss++ gets us past the O_CH */
+			break;
+		default:	/* have to make a choice */
+			hard = 1;
+			break;
+		}
+	if (!hard) {		/* that was it! */
+		if (sp != stop)
+			return(NULL);
+		return(sp);
+	}
+	ss--;			/* adjust for the for's final increment */
+
+	/* the hard stuff */
+	AT("hard", sp, stop, ss, stopst);
+	s = m->g->strip[ss];
+	switch (OP(s)) {
+	case OBACK_:		/* the vilest depths */
+		i = OPND(s);
+		assert(0 < i && i <= m->g->nsub);
+		if (m->pmatch[i].rm_eo == -1)
+			return(NULL);
+		assert(m->pmatch[i].rm_so != -1);
+		len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
+		if (len == 0 && rec++ > MAX_RECURSION)
+			return(NULL);
+		assert(stop - m->beginp >= len);
+		if (sp > stop - len)
+			return(NULL);	/* not enough left to match */
+		ssp = m->offp + m->pmatch[i].rm_so;
+		if (memcmp(sp, ssp, len) != 0)
+			return(NULL);
+		while (m->g->strip[ss] != SOP(O_BACK, i))
+			ss++;
+		return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
+		break;
+	case OQUEST_:		/* to null or not */
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+		if (dp != NULL)
+			return(dp);	/* not */
+		return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
+		break;
+	case OPLUS_:
+		assert(m->lastpos != NULL);
+		assert(lev+1 <= m->g->nplus);
+		m->lastpos[lev+1] = sp;
+		return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
+		break;
+	case O_PLUS:
+		if (sp == m->lastpos[lev])	/* last pass matched null */
+			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
+		/* try another pass */
+		m->lastpos[lev] = sp;
+		dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
+		if (dp == NULL)
+			return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
+		else
+			return(dp);
+		break;
+	case OCH_:		/* find the right one, if any */
+		ssub = ss + 1;
+		esub = ss + OPND(s) - 1;
+		assert(OP(m->g->strip[esub]) == OOR1);
+		for (;;) {	/* find first matching branch */
+			dp = backref(m, sp, stop, ssub, esub, lev, rec);
+			if (dp != NULL)
+				return(dp);
+			/* that one missed, try next one */
+			if (OP(m->g->strip[esub]) == O_CH)
+				return(NULL);	/* there is none */
+			esub++;
+			assert(OP(m->g->strip[esub]) == OOR2);
+			ssub = esub + 1;
+			esub += OPND(m->g->strip[esub]);
+			if (OP(m->g->strip[esub]) == OOR2)
+				esub--;
+			else
+				assert(OP(m->g->strip[esub]) == O_CH);
+		}
+		break;
+	case OLPAREN:		/* must undo assignment if rest fails */
+		i = OPND(s);
+		assert(0 < i && i <= m->g->nsub);
+		offsave = m->pmatch[i].rm_so;
+		m->pmatch[i].rm_so = sp - m->offp;
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+		if (dp != NULL)
+			return(dp);
+		m->pmatch[i].rm_so = offsave;
+		return(NULL);
+		break;
+	case ORPAREN:		/* must undo assignment if rest fails */
+		i = OPND(s);
+		assert(0 < i && i <= m->g->nsub);
+		offsave = m->pmatch[i].rm_eo;
+		m->pmatch[i].rm_eo = sp - m->offp;
+		dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
+		if (dp != NULL)
+			return(dp);
+		m->pmatch[i].rm_eo = offsave;
+		return(NULL);
+		break;
+	default:		/* uh oh */
+		assert(nope);
+		break;
+	}
+
+	/* "can't happen" */
+	assert(nope);
+	/* NOTREACHED */
+        return NULL;
+}
+
+/*
+ - fast - step through the string at top speed
+ */
+static const char *			/* where tentative match ended, or NULL */
+fast(struct match *m, const char *start, const char *stop, sopno startst,
+     sopno stopst)
+{
+	states st = m->st;
+	states fresh = m->fresh;
+	states tmp = m->tmp;
+	const char *p = start;
+	int c = (start == m->beginp) ? OUT : *(start-1);
+	int lastc;	/* previous c */
+	int flagch;
+	int i;
+	const char *coldp;	/* last p after which no match was underway */
+
+	CLEAR(st);
+	SET1(st, startst);
+	st = step(m->g, startst, stopst, st, NOTHING, st);
+	ASSIGN(fresh, st);
+	SP("start", st, *p);
+	coldp = NULL;
+	for (;;) {
+		/* next character */
+		lastc = c;
+		c = (p == m->endp) ? OUT : *p;
+		if (EQ(st, fresh))
+			coldp = p;
+
+		/* is there an EOL and/or BOL between lastc and c? */
+		flagch = '\0';
+		i = 0;
+		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
+				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
+			flagch = BOL;
+			i = m->g->nbol;
+		}
+		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
+				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
+			flagch = (flagch == BOL) ? BOLEOL : EOL;
+			i += m->g->neol;
+		}
+		if (i != 0) {
+			for (; i > 0; i--)
+				st = step(m->g, startst, stopst, st, flagch, st);
+			SP("boleol", st, c);
+		}
+
+		/* how about a word boundary? */
+		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
+					(c != OUT && ISWORD(c)) ) {
+			flagch = BOW;
+		}
+		if ( (lastc != OUT && ISWORD(lastc)) &&
+				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+			flagch = EOW;
+		}
+		if (flagch == BOW || flagch == EOW) {
+			st = step(m->g, startst, stopst, st, flagch, st);
+			SP("boweow", st, c);
+		}
+
+		/* are we done? */
+		if (ISSET(st, stopst) || p == stop)
+			break;		/* NOTE BREAK OUT */
+
+		/* no, we must deal with this character */
+		ASSIGN(tmp, st);
+		ASSIGN(st, fresh);
+		assert(c != OUT);
+		st = step(m->g, startst, stopst, tmp, c, st);
+		SP("aft", st, c);
+		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+		p++;
+	}
+
+	assert(coldp != NULL);
+	m->coldp = coldp;
+	if (ISSET(st, stopst))
+		return(p+1);
+	else
+		return(NULL);
+}
+
+/*
+ - slow - step through the string more deliberately
+ */
+static const char *			/* where it ended */
+slow(struct match *m, const char *start, const char *stop, sopno startst,
+     sopno stopst)
+{
+	states st = m->st;
+	states empty = m->empty;
+	states tmp = m->tmp;
+	const char *p = start;
+	int c = (start == m->beginp) ? OUT : *(start-1);
+	int lastc;	/* previous c */
+	int flagch;
+	int i;
+	const char *matchp;	/* last p at which a match ended */
+
+	AT("slow", start, stop, startst, stopst);
+	CLEAR(st);
+	SET1(st, startst);
+	SP("sstart", st, *p);
+	st = step(m->g, startst, stopst, st, NOTHING, st);
+	matchp = NULL;
+	for (;;) {
+		/* next character */
+		lastc = c;
+		c = (p == m->endp) ? OUT : *p;
+
+		/* is there an EOL and/or BOL between lastc and c? */
+		flagch = '\0';
+		i = 0;
+		if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
+				(lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
+			flagch = BOL;
+			i = m->g->nbol;
+		}
+		if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
+				(c == OUT && !(m->eflags&REG_NOTEOL)) ) {
+			flagch = (flagch == BOL) ? BOLEOL : EOL;
+			i += m->g->neol;
+		}
+		if (i != 0) {
+			for (; i > 0; i--)
+				st = step(m->g, startst, stopst, st, flagch, st);
+			SP("sboleol", st, c);
+		}
+
+		/* how about a word boundary? */
+		if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
+					(c != OUT && ISWORD(c)) ) {
+			flagch = BOW;
+		}
+		if ( (lastc != OUT && ISWORD(lastc)) &&
+				(flagch == EOL || (c != OUT && !ISWORD(c))) ) {
+			flagch = EOW;
+		}
+		if (flagch == BOW || flagch == EOW) {
+			st = step(m->g, startst, stopst, st, flagch, st);
+			SP("sboweow", st, c);
+		}
+
+		/* are we done? */
+		if (ISSET(st, stopst))
+			matchp = p;
+		if (EQ(st, empty) || p == stop)
+			break;		/* NOTE BREAK OUT */
+
+		/* no, we must deal with this character */
+		ASSIGN(tmp, st);
+		ASSIGN(st, empty);
+		assert(c != OUT);
+		st = step(m->g, startst, stopst, tmp, c, st);
+		SP("saft", st, c);
+		assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
+		p++;
+	}
+
+	return(matchp);
+}
+
+
+/*
+ - step - map set of states reachable before char to set reachable after
+ */
+static states
+step(struct re_guts *g,
+    sopno start,		/* start state within strip */
+    sopno stop,			/* state after stop state within strip */
+    states bef,			/* states reachable before */
+    int ch,			/* character or NONCHAR code */
+    states aft)			/* states already known reachable after */
+{
+	cset *cs;
+	sop s;
+	sopno pc;
+	onestate here;		/* note, macros know this name */
+	sopno look;
+	int i;
+
+	for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
+		s = g->strip[pc];
+		switch (OP(s)) {
+		case OEND:
+			assert(pc == stop-1);
+			break;
+		case OCHAR:
+			/* only characters can match */
+			assert(!NONCHAR(ch) || ch != (char)OPND(s));
+			if (ch == (char)OPND(s))
+				FWD(aft, bef, 1);
+			break;
+		case OBOL:
+			if (ch == BOL || ch == BOLEOL)
+				FWD(aft, bef, 1);
+			break;
+		case OEOL:
+			if (ch == EOL || ch == BOLEOL)
+				FWD(aft, bef, 1);
+			break;
+		case OBOW:
+			if (ch == BOW)
+				FWD(aft, bef, 1);
+			break;
+		case OEOW:
+			if (ch == EOW)
+				FWD(aft, bef, 1);
+			break;
+		case OANY:
+			if (!NONCHAR(ch))
+				FWD(aft, bef, 1);
+			break;
+		case OANYOF:
+			cs = &g->sets[OPND(s)];
+			if (!NONCHAR(ch) && CHIN(cs, ch))
+				FWD(aft, bef, 1);
+			break;
+		case OBACK_:		/* ignored here */
+		case O_BACK:
+			FWD(aft, aft, 1);
+			break;
+		case OPLUS_:		/* forward, this is just an empty */
+			FWD(aft, aft, 1);
+			break;
+		case O_PLUS:		/* both forward and back */
+			FWD(aft, aft, 1);
+			i = ISSETBACK(aft, OPND(s));
+			BACK(aft, aft, OPND(s));
+			if (!i && ISSETBACK(aft, OPND(s))) {
+				/* oho, must reconsider loop body */
+				pc -= OPND(s) + 1;
+				INIT(here, pc);
+			}
+			break;
+		case OQUEST_:		/* two branches, both forward */
+			FWD(aft, aft, 1);
+			FWD(aft, aft, OPND(s));
+			break;
+		case O_QUEST:		/* just an empty */
+			FWD(aft, aft, 1);
+			break;
+		case OLPAREN:		/* not significant here */
+		case ORPAREN:
+			FWD(aft, aft, 1);
+			break;
+		case OCH_:		/* mark the first two branches */
+			FWD(aft, aft, 1);
+			assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+			FWD(aft, aft, OPND(s));
+			break;
+		case OOR1:		/* done a branch, find the O_CH */
+			if (ISSTATEIN(aft, here)) {
+				for (look = 1;
+						OP(s = g->strip[pc+look]) != O_CH;
+						look += OPND(s))
+					assert(OP(s) == OOR2);
+				FWD(aft, aft, look);
+			}
+			break;
+		case OOR2:		/* propagate OCH_'s marking */
+			FWD(aft, aft, 1);
+			if (OP(g->strip[pc+OPND(s)]) != O_CH) {
+				assert(OP(g->strip[pc+OPND(s)]) == OOR2);
+				FWD(aft, aft, OPND(s));
+			}
+			break;
+		case O_CH:		/* just empty */
+			FWD(aft, aft, 1);
+			break;
+		default:		/* ooooops... */
+			assert(nope);
+			break;
+		}
+	}
+
+	return(aft);
+}
+
+#ifdef REDEBUG
+/*
+ - print - print a set of states
+ */
+static void
+print(struct match *m, char *caption, states st, int ch, FILE *d)
+{
+	struct re_guts *g = m->g;
+	int i;
+	int first = 1;
+
+	if (!(m->eflags&REG_TRACE))
+		return;
+
+	(void)fprintf(d, "%s", caption);
+	if (ch != '\0')
+		(void)fprintf(d, " %s", pchar(ch));
+	for (i = 0; i < g->nstates; i++)
+		if (ISSET(st, i)) {
+			(void)fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
+			first = 0;
+		}
+	(void)fprintf(d, "\n");
+}
+
+/* 
+ - at - print current situation
+ */
+static void
+at(struct match *m, char *title, char *start, char *stop, sopno startst,
+    sopno stopst)
+{
+	if (!(m->eflags&REG_TRACE))
+		return;
+
+	(void)printf("%s %s-", title, pchar(*start));
+	(void)printf("%s ", pchar(*stop));
+	(void)printf("%ld-%ld\n", (long)startst, (long)stopst);
+}
+
+#ifndef PCHARDONE
+#define	PCHARDONE	/* never again */
+/*
+ - pchar - make a character printable
+ *
+ * Is this identical to regchar() over in debug.c?  Well, yes.  But a
+ * duplicate here avoids having a debugging-capable regexec.o tied to
+ * a matching debug.o, and this is convenient.  It all disappears in
+ * the non-debug compilation anyway, so it doesn't matter much.
+ */
+static char *			/* -> representation */
+pchar(int ch)
+{
+	static char pbuf[10];
+
+	if (isprint(ch) || ch == ' ')
+		(void)snprintf(pbuf, sizeof pbuf, "%c", ch);
+	else
+		(void)snprintf(pbuf, sizeof pbuf, "\\%o", ch);
+	return(pbuf);
+}
+#endif
+#endif
+
+#undef	matcher
+#undef	fast
+#undef	slow
+#undef	dissect
+#undef	backref
+#undef	step
+#undef	print
+#undef	at
+#undef	match
+#undef	nope
diff --git a/final/lib/Support/regerror.c b/final/lib/Support/regerror.c
new file mode 100644
index 00000000000..1d67c9a2b03
--- /dev/null
+++ b/final/lib/Support/regerror.c
@@ -0,0 +1,135 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regerror.c	8.4 (Berkeley) 3/20/94
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+
+#ifdef _MSC_VER
+#define snprintf _snprintf
+#endif
+
+static const char *regatoi(const llvm_regex_t *, char *, int);
+
+static struct rerr {
+	int code;
+	const char *name;
+	const char *explain;
+} rerrs[] = {
+	{ REG_NOMATCH,	"REG_NOMATCH",	"llvm_regexec() failed to match" },
+	{ REG_BADPAT,	"REG_BADPAT",	"invalid regular expression" },
+	{ REG_ECOLLATE,	"REG_ECOLLATE",	"invalid collating element" },
+	{ REG_ECTYPE,	"REG_ECTYPE",	"invalid character class" },
+	{ REG_EESCAPE,	"REG_EESCAPE",	"trailing backslash (\\)" },
+	{ REG_ESUBREG,	"REG_ESUBREG",	"invalid backreference number" },
+	{ REG_EBRACK,	"REG_EBRACK",	"brackets ([ ]) not balanced" },
+	{ REG_EPAREN,	"REG_EPAREN",	"parentheses not balanced" },
+	{ REG_EBRACE,	"REG_EBRACE",	"braces not balanced" },
+	{ REG_BADBR,	"REG_BADBR",	"invalid repetition count(s)" },
+	{ REG_ERANGE,	"REG_ERANGE",	"invalid character range" },
+	{ REG_ESPACE,	"REG_ESPACE",	"out of memory" },
+	{ REG_BADRPT,	"REG_BADRPT",	"repetition-operator operand invalid" },
+	{ REG_EMPTY,	"REG_EMPTY",	"empty (sub)expression" },
+	{ REG_ASSERT,	"REG_ASSERT",	"\"can't happen\" -- you found a bug" },
+	{ REG_INVARG,	"REG_INVARG",	"invalid argument to regex routine" },
+	{ 0,		"",		"*** unknown regexp error code ***" }
+};
+
+/*
+ - llvm_regerror - the interface to error numbers
+ = extern size_t llvm_regerror(int, const llvm_regex_t *, char *, size_t);
+ */
+/* ARGSUSED */
+size_t
+llvm_regerror(int errcode, const llvm_regex_t *preg, char *errbuf, size_t errbuf_size)
+{
+	struct rerr *r;
+	size_t len;
+	int target = errcode &~ REG_ITOA;
+	const char *s;
+	char convbuf[50];
+
+	if (errcode == REG_ATOI)
+		s = regatoi(preg, convbuf, sizeof convbuf);
+	else {
+		for (r = rerrs; r->code != 0; r++)
+			if (r->code == target)
+				break;
+	
+		if (errcode&REG_ITOA) {
+			if (r->code != 0) {
+				assert(strlen(r->name) < sizeof(convbuf));
+				(void) llvm_strlcpy(convbuf, r->name, sizeof convbuf);
+			} else
+				(void)snprintf(convbuf, sizeof convbuf,
+				    "REG_0x%x", target);
+			s = convbuf;
+		} else
+			s = r->explain;
+	}
+
+	len = strlen(s) + 1;
+	if (errbuf_size > 0) {
+		llvm_strlcpy(errbuf, s, errbuf_size);
+	}
+
+	return(len);
+}
+
+/*
+ - regatoi - internal routine to implement REG_ATOI
+ */
+static const char *
+regatoi(const llvm_regex_t *preg, char *localbuf, int localbufsize)
+{
+	struct rerr *r;
+
+	for (r = rerrs; r->code != 0; r++)
+		if (strcmp(r->name, preg->re_endp) == 0)
+			break;
+	if (r->code == 0)
+		return("0");
+
+	(void)snprintf(localbuf, localbufsize, "%d", r->code);
+	return(localbuf);
+}
diff --git a/final/lib/Support/regex2.h b/final/lib/Support/regex2.h
new file mode 100644
index 00000000000..21659c34449
--- /dev/null
+++ b/final/lib/Support/regex2.h
@@ -0,0 +1,157 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regex2.h	8.4 (Berkeley) 3/20/94
+ */
+
+/*
+ * internals of regex_t
+ */
+#define	MAGIC1	((('r'^0200)<<8) | 'e')
+
+/*
+ * The internal representation is a *strip*, a sequence of
+ * operators ending with an endmarker.  (Some terminology etc. is a
+ * historical relic of earlier versions which used multiple strips.)
+ * Certain oddities in the representation are there to permit running
+ * the machinery backwards; in particular, any deviation from sequential
+ * flow must be marked at both its source and its destination.  Some
+ * fine points:
+ *
+ * - OPLUS_ and O_PLUS are *inside* the loop they create.
+ * - OQUEST_ and O_QUEST are *outside* the bypass they create.
+ * - OCH_ and O_CH are *outside* the multi-way branch they create, while
+ *   OOR1 and OOR2 are respectively the end and the beginning of one of
+ *   the branches.  Note that there is an implicit OOR2 following OCH_
+ *   and an implicit OOR1 preceding O_CH.
+ *
+ * In state representations, an operator's bit is on to signify a state
+ * immediately *preceding* "execution" of that operator.
+ */
+typedef unsigned long sop;	/* strip operator */
+typedef long sopno;
+#define	OPRMASK	0xf8000000LU
+#define	OPDMASK	0x07ffffffLU
+#define	OPSHIFT	((unsigned)27)
+#define	OP(n)	((n)&OPRMASK)
+#define	OPND(n)	((n)&OPDMASK)
+#define	SOP(op, opnd)	((op)|(opnd))
+/* operators			   meaning	operand			*/
+/*						(back, fwd are offsets)	*/
+#define	OEND	(1LU<<OPSHIFT)	/* endmarker	-			*/
+#define	OCHAR	(2LU<<OPSHIFT)	/* character	unsigned char		*/
+#define	OBOL	(3LU<<OPSHIFT)	/* left anchor	-			*/
+#define	OEOL	(4LU<<OPSHIFT)	/* right anchor	-			*/
+#define	OANY	(5LU<<OPSHIFT)	/* .		-			*/
+#define	OANYOF	(6LU<<OPSHIFT)	/* [...]	set number		*/
+#define	OBACK_	(7LU<<OPSHIFT)	/* begin \d	paren number		*/
+#define	O_BACK	(8LU<<OPSHIFT)	/* end \d	paren number		*/
+#define	OPLUS_	(9LU<<OPSHIFT)	/* + prefix	fwd to suffix		*/
+#define	O_PLUS	(10LU<<OPSHIFT)	/* + suffix	back to prefix		*/
+#define	OQUEST_	(11LU<<OPSHIFT)	/* ? prefix	fwd to suffix		*/
+#define	O_QUEST	(12LU<<OPSHIFT)	/* ? suffix	back to prefix		*/
+#define	OLPAREN	(13LU<<OPSHIFT)	/* (		fwd to )		*/
+#define	ORPAREN	(14LU<<OPSHIFT)	/* )		back to (		*/
+#define	OCH_	(15LU<<OPSHIFT)	/* begin choice	fwd to OOR2		*/
+#define	OOR1	(16LU<<OPSHIFT)	/* | pt. 1	back to OOR1 or OCH_	*/
+#define	OOR2	(17LU<<OPSHIFT)	/* | pt. 2	fwd to OOR2 or O_CH	*/
+#define	O_CH	(18LU<<OPSHIFT)	/* end choice	back to OOR1		*/
+#define	OBOW	(19LU<<OPSHIFT)	/* begin word	-			*/
+#define	OEOW	(20LU<<OPSHIFT)	/* end word	-			*/
+
+/*
+ * Structure for [] character-set representation.  Character sets are
+ * done as bit vectors, grouped 8 to a byte vector for compactness.
+ * The individual set therefore has both a pointer to the byte vector
+ * and a mask to pick out the relevant bit of each byte.  A hash code
+ * simplifies testing whether two sets could be identical.
+ *
+ * This will get trickier for multicharacter collating elements.  As
+ * preliminary hooks for dealing with such things, we also carry along
+ * a string of multi-character elements, and decide the size of the
+ * vectors at run time.
+ */
+typedef struct {
+	uch *ptr;		/* -> uch [csetsize] */
+	uch mask;		/* bit within array */
+	uch hash;		/* hash code */
+	size_t smultis;
+	char *multis;		/* -> char[smulti]  ab\0cd\0ef\0\0 */
+} cset;
+/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
+#define	CHadd(cs, c)	((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c))
+#define	CHsub(cs, c)	((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c))
+#define	CHIN(cs, c)	((cs)->ptr[(uch)(c)] & (cs)->mask)
+#define	MCadd(p, cs, cp)	mcadd(p, cs, cp)	/* llvm_regcomp() internal fns */
+#define	MCsub(p, cs, cp)	mcsub(p, cs, cp)
+#define	MCin(p, cs, cp)	mcin(p, cs, cp)
+
+/* stuff for character categories */
+typedef unsigned char cat_t;
+
+/*
+ * main compiled-expression structure
+ */
+struct re_guts {
+	int magic;
+#		define	MAGIC2	((('R'^0200)<<8)|'E')
+	sop *strip;		/* malloced area for strip */
+	int csetsize;		/* number of bits in a cset vector */
+	int ncsets;		/* number of csets in use */
+	cset *sets;		/* -> cset [ncsets] */
+	uch *setbits;		/* -> uch[csetsize][ncsets/CHAR_BIT] */
+	int cflags;		/* copy of llvm_regcomp() cflags argument */
+	sopno nstates;		/* = number of sops */
+	sopno firststate;	/* the initial OEND (normally 0) */
+	sopno laststate;	/* the final OEND */
+	int iflags;		/* internal flags */
+#		define	USEBOL	01	/* used ^ */
+#		define	USEEOL	02	/* used $ */
+#		define	REGEX_BAD	04	/* something wrong */
+	int nbol;		/* number of ^ used */
+	int neol;		/* number of $ used */
+	int ncategories;	/* how many character categories */
+	cat_t *categories;	/* ->catspace[-CHAR_MIN] */
+	char *must;		/* match must contain this string */
+	int mlen;		/* length of must */
+	size_t nsub;		/* copy of re_nsub */
+	int backrefs;		/* does it use back references? */
+	sopno nplus;		/* how deep does it nest +s? */
+	/* catspace must be last */
+	cat_t catspace[1];	/* actually [NC] */
+};
+
+/* misc utilities */
+#define	OUT	(CHAR_MAX+1)	/* a non-character value */
+#define	ISWORD(c)	(isalnum(c&0xff) || (c) == '_')
diff --git a/final/lib/Support/regex_impl.h b/final/lib/Support/regex_impl.h
new file mode 100644
index 00000000000..f8296c9ff75
--- /dev/null
+++ b/final/lib/Support/regex_impl.h
@@ -0,0 +1,108 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992 Henry Spencer.
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer of the University of Toronto.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regex.h	8.1 (Berkeley) 6/2/93
+ */
+
+#ifndef _REGEX_H_
+#define	_REGEX_H_
+
+#include <sys/types.h>
+typedef off_t llvm_regoff_t;
+typedef struct {
+  llvm_regoff_t rm_so;		/* start of match */
+  llvm_regoff_t rm_eo;		/* end of match */
+} llvm_regmatch_t;
+
+typedef struct llvm_regex {
+  int re_magic;
+  size_t re_nsub;		/* number of parenthesized subexpressions */
+  const char *re_endp;	/* end pointer for REG_PEND */
+  struct re_guts *re_g;	/* none of your business :-) */
+} llvm_regex_t;
+
+/* llvm_regcomp() flags */
+#define	REG_BASIC	0000
+#define	REG_EXTENDED	0001
+#define	REG_ICASE	0002
+#define	REG_NOSUB	0004
+#define	REG_NEWLINE	0010
+#define	REG_NOSPEC	0020
+#define	REG_PEND	0040
+#define	REG_DUMP	0200
+
+/* llvm_regerror() flags */
+#define	REG_NOMATCH	 1
+#define	REG_BADPAT	 2
+#define	REG_ECOLLATE	 3
+#define	REG_ECTYPE	 4
+#define	REG_EESCAPE	 5
+#define	REG_ESUBREG	 6
+#define	REG_EBRACK	 7
+#define	REG_EPAREN	 8
+#define	REG_EBRACE	 9
+#define	REG_BADBR	10
+#define	REG_ERANGE	11
+#define	REG_ESPACE	12
+#define	REG_BADRPT	13
+#define	REG_EMPTY	14
+#define	REG_ASSERT	15
+#define	REG_INVARG	16
+#define	REG_ATOI	255	/* convert name to number (!) */
+#define	REG_ITOA	0400	/* convert number to name (!) */
+
+/* llvm_regexec() flags */
+#define	REG_NOTBOL	00001
+#define	REG_NOTEOL	00002
+#define	REG_STARTEND	00004
+#define	REG_TRACE	00400	/* tracing of execution */
+#define	REG_LARGE	01000	/* force large representation */
+#define	REG_BACKR	02000	/* force use of backref code */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int	llvm_regcomp(llvm_regex_t *, const char *, int);
+size_t	llvm_regerror(int, const llvm_regex_t *, char *, size_t);
+int	llvm_regexec(const llvm_regex_t *, const char *, size_t, 
+                     llvm_regmatch_t [], int);
+void	llvm_regfree(llvm_regex_t *);
+size_t  llvm_strlcpy(char *dst, const char *src, size_t siz);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !_REGEX_H_ */
diff --git a/final/lib/Support/regexec.c b/final/lib/Support/regexec.c
new file mode 100644
index 00000000000..007861675ba
--- /dev/null
+++ b/final/lib/Support/regexec.c
@@ -0,0 +1,162 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regexec.c	8.3 (Berkeley) 3/20/94
+ */
+
+/*
+ * the outer shell of llvm_regexec()
+ *
+ * This file includes engine.inc *twice*, after muchos fiddling with the
+ * macros that code uses.  This lets the same code operate on two different
+ * representations for state sets.
+ */
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <ctype.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+#include "regex2.h"
+
+/* macros for manipulating states, small version */
+/* FIXME: 'states' is assumed as 'long' on small version. */
+#define	states1	long		/* for later use in llvm_regexec() decision */
+#define	states	states1
+#define	CLEAR(v)	((v) = 0)
+#define	SET0(v, n)	((v) &= ~((unsigned long)1 << (n)))
+#define	SET1(v, n)	((v) |= (unsigned long)1 << (n))
+#define	ISSET(v, n)	(((v) & ((unsigned long)1 << (n))) != 0)
+#define	ASSIGN(d, s)	((d) = (s))
+#define	EQ(a, b)	((a) == (b))
+#define	STATEVARS	long dummy	/* dummy version */
+#define	STATESETUP(m, n)	/* nothing */
+#define	STATETEARDOWN(m)	/* nothing */
+#define	SETUP(v)	((v) = 0)
+#define	onestate	long
+#define	INIT(o, n)	((o) = (unsigned long)1 << (n))
+#define	INC(o)		((o) <<= 1)
+#define	ISSTATEIN(v, o)	(((v) & (o)) != 0)
+/* some abbreviations; note that some of these know variable names! */
+/* do "if I'm here, I can also be there" etc without branches */
+#define	FWD(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) << (n))
+#define	BACK(dst, src, n)	((dst) |= ((unsigned long)(src)&(here)) >> (n))
+#define	ISSETBACK(v, n)		(((v) & ((unsigned long)here >> (n))) != 0)
+/* function names */
+#define SNAMES			/* engine.inc looks after details */
+
+#include "regengine.inc"
+
+/* now undo things */
+#undef	states
+#undef	CLEAR
+#undef	SET0
+#undef	SET1
+#undef	ISSET
+#undef	ASSIGN
+#undef	EQ
+#undef	STATEVARS
+#undef	STATESETUP
+#undef	STATETEARDOWN
+#undef	SETUP
+#undef	onestate
+#undef	INIT
+#undef	INC
+#undef	ISSTATEIN
+#undef	FWD
+#undef	BACK
+#undef	ISSETBACK
+#undef	SNAMES
+
+/* macros for manipulating states, large version */
+#define	states	char *
+#define	CLEAR(v)	memset(v, 0, m->g->nstates)
+#define	SET0(v, n)	((v)[n] = 0)
+#define	SET1(v, n)	((v)[n] = 1)
+#define	ISSET(v, n)	((v)[n])
+#define	ASSIGN(d, s)	memmove(d, s, m->g->nstates)
+#define	EQ(a, b)	(memcmp(a, b, m->g->nstates) == 0)
+#define	STATEVARS	long vn; char *space
+#define	STATESETUP(m, nv)	{ (m)->space = malloc((nv)*(m)->g->nstates); \
+				if ((m)->space == NULL) return(REG_ESPACE); \
+				(m)->vn = 0; }
+#define	STATETEARDOWN(m)	{ free((m)->space); }
+#define	SETUP(v)	((v) = &m->space[m->vn++ * m->g->nstates])
+#define	onestate	long
+#define	INIT(o, n)	((o) = (n))
+#define	INC(o)	((o)++)
+#define	ISSTATEIN(v, o)	((v)[o])
+/* some abbreviations; note that some of these know variable names! */
+/* do "if I'm here, I can also be there" etc without branches */
+#define	FWD(dst, src, n)	((dst)[here+(n)] |= (src)[here])
+#define	BACK(dst, src, n)	((dst)[here-(n)] |= (src)[here])
+#define	ISSETBACK(v, n)	((v)[here - (n)])
+/* function names */
+#define	LNAMES			/* flag */
+
+#include "regengine.inc"
+
+/*
+ - llvm_regexec - interface for matching
+ *
+ * We put this here so we can exploit knowledge of the state representation
+ * when choosing which matcher to call.  Also, by this point the matchers
+ * have been prototyped.
+ */
+int				/* 0 success, REG_NOMATCH failure */
+llvm_regexec(const llvm_regex_t *preg, const char *string, size_t nmatch,
+             llvm_regmatch_t pmatch[], int eflags)
+{
+	struct re_guts *g = preg->re_g;
+#ifdef REDEBUG
+#	define	GOODFLAGS(f)	(f)
+#else
+#	define	GOODFLAGS(f)	((f)&(REG_NOTBOL|REG_NOTEOL|REG_STARTEND))
+#endif
+
+	if (preg->re_magic != MAGIC1 || g->magic != MAGIC2)
+		return(REG_BADPAT);
+	assert(!(g->iflags&REGEX_BAD));
+	if (g->iflags&REGEX_BAD)		/* backstop for no-debug case */
+		return(REG_BADPAT);
+	eflags = GOODFLAGS(eflags);
+
+	if (g->nstates <= (long)(CHAR_BIT*sizeof(states1)) && !(eflags&REG_LARGE))
+		return(smatcher(g, string, nmatch, pmatch, eflags));
+	else
+		return(lmatcher(g, string, nmatch, pmatch, eflags));
+}
diff --git a/final/lib/Support/regfree.c b/final/lib/Support/regfree.c
new file mode 100644
index 00000000000..dc2b4af90fa
--- /dev/null
+++ b/final/lib/Support/regfree.c
@@ -0,0 +1,72 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)regfree.c	8.3 (Berkeley) 3/20/94
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "regex_impl.h"
+
+#include "regutils.h"
+#include "regex2.h"
+
+/*
+ - llvm_regfree - free everything
+ */
+void
+llvm_regfree(llvm_regex_t *preg)
+{
+	struct re_guts *g;
+
+	if (preg->re_magic != MAGIC1)	/* oops */
+		return;			/* nice to complain, but hard */
+
+	g = preg->re_g;
+	if (g == NULL || g->magic != MAGIC2)	/* oops again */
+		return;
+	preg->re_magic = 0;		/* mark it invalid */
+	g->magic = 0;			/* mark it invalid */
+
+	if (g->strip != NULL)
+		free((char *)g->strip);
+	if (g->sets != NULL)
+		free((char *)g->sets);
+	if (g->setbits != NULL)
+		free((char *)g->setbits);
+	if (g->must != NULL)
+		free(g->must);
+	free((char *)g);
+}
diff --git a/final/lib/Support/regstrlcpy.c b/final/lib/Support/regstrlcpy.c
new file mode 100644
index 00000000000..8b68afdf75f
--- /dev/null
+++ b/final/lib/Support/regstrlcpy.c
@@ -0,0 +1,52 @@
+/*
+ * This code is derived from OpenBSD's libc, original license follows:
+ *
+ * Copyright (c) 1998 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <string.h>
+
+#include "regex_impl.h"
+/*
+ * Copy src to string dst of size siz.  At most siz-1 characters
+ * will be copied.  Always NUL terminates (unless siz == 0).
+ * Returns strlen(src); if retval >= siz, truncation occurred.
+ */
+size_t
+llvm_strlcpy(char *dst, const char *src, size_t siz)
+{
+	char *d = dst;
+	const char *s = src;
+	size_t n = siz;
+
+	/* Copy as many bytes as will fit */
+	if (n != 0) {
+		while (--n != 0) {
+			if ((*d++ = *s++) == '\0')
+				break;
+		}
+	}
+
+	/* Not enough room in dst, add NUL and traverse rest of src */
+	if (n == 0) {
+		if (siz != 0)
+			*d = '\0';		/* NUL-terminate dst */
+		while (*s++)
+			;
+	}
+
+	return(s - src - 1);	/* count does not include NUL */
+}
diff --git a/final/lib/Support/regutils.h b/final/lib/Support/regutils.h
new file mode 100644
index 00000000000..d0ee100a382
--- /dev/null
+++ b/final/lib/Support/regutils.h
@@ -0,0 +1,53 @@
+/*-
+ * This code is derived from OpenBSD's libc/regex, original license follows:
+ *
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)utils.h	8.3 (Berkeley) 3/20/94
+ */
+
+/* utility definitions */
+#define	NC		(CHAR_MAX - CHAR_MIN + 1)
+typedef unsigned char uch;
+
+/* switch off assertions (if not already off) if no REDEBUG */
+#ifndef REDEBUG
+#ifndef NDEBUG
+#define	NDEBUG	/* no assertions please */
+#endif
+#endif
+#include <assert.h>
+
+/* for old systems with bcopy() but no memmove() */
+#ifdef USEBCOPY
+#define	memmove(d, s, c)	bcopy(s, d, c)
+#endif
diff --git a/final/lib/Support/system_error.cpp b/final/lib/Support/system_error.cpp
new file mode 100644
index 00000000000..56898de3152
--- /dev/null
+++ b/final/lib/Support/system_error.cpp
@@ -0,0 +1,130 @@
+//===---------------------- system_error.cpp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This was lifted from libc++ and modified for C++03.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/Errno.h"
+#include <string>
+#include <cstring>
+
+namespace llvm {
+
+// class error_category
+
+error_category::error_category() {
+}
+
+error_category::~error_category() {
+}
+
+error_condition
+error_category::default_error_condition(int ev) const {
+  return error_condition(ev, *this);
+}
+
+bool
+error_category::equivalent(int code, const error_condition& condition) const {
+  return default_error_condition(code) == condition;
+}
+
+bool
+error_category::equivalent(const error_code& code, int condition) const {
+  return *this == code.category() && code.value() == condition;
+}
+
+std::string
+_do_message::message(int ev) const {
+  return std::string(sys::StrError(ev));
+}
+
+class _generic_error_category : public _do_message {
+public:
+  virtual const char* name() const;
+  virtual std::string message(int ev) const;
+};
+
+const char*
+_generic_error_category::name() const {
+  return "generic";
+}
+
+std::string
+_generic_error_category::message(int ev) const {
+#ifdef ELAST
+  if (ev > ELAST)
+    return std::string("unspecified generic_category error");
+#endif  // ELAST
+  return _do_message::message(ev);
+}
+
+const error_category&
+generic_category() {
+  static _generic_error_category s;
+  return s;
+}
+
+class _system_error_category : public _do_message {
+public:
+  virtual const char* name() const;
+  virtual std::string message(int ev) const;
+  virtual error_condition default_error_condition(int ev) const;
+};
+
+const char*
+_system_error_category::name() const {
+  return "system";
+}
+
+// std::string _system_error_category::message(int ev) const {
+// Is in Platform/system_error.inc
+
+// error_condition _system_error_category::default_error_condition(int ev) const
+// Is in Platform/system_error.inc
+
+const error_category&
+system_category() {
+  static _system_error_category s;
+  return s;
+}
+
+const error_category&
+posix_category() {
+#ifdef LLVM_ON_WIN32
+  return generic_category();
+#else
+  return system_category();
+#endif
+}
+
+// error_condition
+
+std::string
+error_condition::message() const {
+  return _cat_->message(_val_);
+}
+
+// error_code
+
+std::string
+error_code::message() const {
+  return _cat_->message(_val_);
+}
+
+} // end namespace llvm
+
+// Include the truly platform-specific parts of this class.
+#if defined(LLVM_ON_UNIX)
+#include "Unix/system_error.inc"
+#endif
+#if defined(LLVM_ON_WIN32)
+#include "Windows/system_error.inc"
+#endif
diff --git a/final/lib/Target/ARM/ARM.h b/final/lib/Target/ARM/ARM.h
new file mode 100644
index 00000000000..4679f7443bf
--- /dev/null
+++ b/final/lib/Target/ARM/ARM.h
@@ -0,0 +1,63 @@
+//===-- ARM.h - Top-level interface for ARM representation---- --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// ARM back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ARM_H
+#define TARGET_ARM_H
+
+#include "ARMBaseInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+namespace llvm {
+
+class ARMBaseTargetMachine;
+class FunctionPass;
+class JITCodeEmitter;
+class formatted_raw_ostream;
+class MCCodeEmitter;
+class TargetAsmBackend;
+class MachineInstr;
+class ARMAsmPrinter;
+class MCInst;
+
+MCCodeEmitter *createARMMCCodeEmitter(const Target &,
+                                      TargetMachine &TM,
+                                      MCContext &Ctx);
+
+TargetAsmBackend *createARMAsmBackend(const Target &, const std::string &);
+
+FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
+                               CodeGenOpt::Level OptLevel);
+
+FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
+                                          JITCodeEmitter &JCE);
+
+FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
+FunctionPass *createARMExpandPseudoPass();
+FunctionPass *createARMGlobalMergePass(const TargetLowering* tli);
+FunctionPass *createARMConstantIslandPass();
+FunctionPass *createNEONMoveFixPass();
+FunctionPass *createMLxExpansionPass();
+FunctionPass *createThumb2ITBlockPass();
+FunctionPass *createThumb2SizeReductionPass();
+
+extern Target TheARMTarget, TheThumbTarget;
+
+void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+                                  ARMAsmPrinter &AP);
+
+} // end namespace llvm;
+
+#endif
diff --git a/final/lib/Target/ARM/ARM.td b/final/lib/Target/ARM/ARM.td
new file mode 100644
index 00000000000..bf4315fc6c3
--- /dev/null
+++ b/final/lib/Target/ARM/ARM.td
@@ -0,0 +1,215 @@
+//===- ARM.td - Describe the ARM Target Machine ------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+
+//===----------------------------------------------------------------------===//
+// ARM Subtarget features.
+//
+
+def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
+                                   "Enable VFP2 instructions">;
+def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
+                                   "Enable VFP3 instructions">;
+def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON",
+                                   "Enable NEON instructions">;
+def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
+                                     "Enable Thumb2 instructions">;
+def FeatureNoARM  : SubtargetFeature<"noarm", "NoARM", "true",
+                                     "Does not support ARM mode execution">;
+def FeatureFP16   : SubtargetFeature<"fp16", "HasFP16", "true",
+                                     "Enable half-precision floating point">;
+def FeatureD16    : SubtargetFeature<"d16", "HasD16", "true",
+                                     "Restrict VFP3 to 16 double registers">;
+def FeatureHWDiv  : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
+                                     "Enable divide instructions">;
+def FeatureT2XtPk : SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
+                                 "Enable Thumb2 extract and pack instructions">;
+def FeatureDB     : SubtargetFeature<"db", "HasDataBarrier", "true",
+                                   "Has data barrier (dmb / dsb) instructions">;
+def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
+                                         "FP compare + branch is slow">;
+def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true",
+                          "Floating point unit supports single precision only">;
+
+// Some processors have FP multiply-accumulate instructions that don't
+// play nicely with other VFP / NEON instructions, and it's generally better
+// to just not use them.
+def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
+                                         "Disable VFP / NEON MAC instructions">;
+// Some processors benefit from using NEON instructions for scalar
+// single-precision FP operations.
+def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
+                                        "true",
+                                        "Use NEON for single precision FP">;
+
+// Disable 32-bit to 16-bit narrowing for experimentation.
+def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true",
+                                             "Prefer 32-bit Thumb instrs">;
+
+// Multiprocessing extension.
+def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
+                                 "Supports Multiprocessing extension">;
+
+// ARM architectures.
+def ArchV4T     : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
+                                   "ARM v4T">;
+def ArchV5T     : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
+                                   "ARM v5T">;
+def ArchV5TE    : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
+                                   "ARM v5TE, v5TEj, v5TExp">;
+def ArchV6      : SubtargetFeature<"v6", "ARMArchVersion", "V6",
+                                   "ARM v6">;
+def ArchV6M     : SubtargetFeature<"v6m", "ARMArchVersion", "V6M",
+                                   "ARM v6m",
+                                   [FeatureNoARM, FeatureDB]>;
+def ArchV6T2    : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
+                                   "ARM v6t2",
+                                   [FeatureThumb2]>;
+def ArchV7A     : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
+                                   "ARM v7A",
+                                   [FeatureThumb2, FeatureNEON, FeatureDB]>;
+def ArchV7M     : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
+                                   "ARM v7M",
+                                   [FeatureThumb2, FeatureNoARM, FeatureDB,
+                                    FeatureHWDiv]>;
+
+//===----------------------------------------------------------------------===//
+// ARM Processors supported.
+//
+
+include "ARMSchedule.td"
+
+// ARM processor families.
+def ProcOthers  : SubtargetFeature<"others", "ARMProcFamily", "Others",
+                                   "One of the other ARM processor families">;
+def ProcA8      : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
+                                   "Cortex-A8 ARM processors",
+                                   [FeatureSlowFPBrcc, FeatureNEONForFP,
+                                    FeatureHasSlowFPVMLx, FeatureT2XtPk]>;
+def ProcA9      : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
+                                   "Cortex-A9 ARM processors",
+                                   [FeatureHasSlowFPVMLx, FeatureT2XtPk,
+                                    FeatureFP16]>;
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, GenericItineraries, Features>;
+
+// V4 Processors.
+def : ProcNoItin<"generic",         []>;
+def : ProcNoItin<"arm8",            []>;
+def : ProcNoItin<"arm810",          []>;
+def : ProcNoItin<"strongarm",       []>;
+def : ProcNoItin<"strongarm110",    []>;
+def : ProcNoItin<"strongarm1100",   []>;
+def : ProcNoItin<"strongarm1110",   []>;
+
+// V4T Processors.
+def : ProcNoItin<"arm7tdmi",        [ArchV4T]>;
+def : ProcNoItin<"arm7tdmi-s",      [ArchV4T]>;
+def : ProcNoItin<"arm710t",         [ArchV4T]>;
+def : ProcNoItin<"arm720t",         [ArchV4T]>;
+def : ProcNoItin<"arm9",            [ArchV4T]>;
+def : ProcNoItin<"arm9tdmi",        [ArchV4T]>;
+def : ProcNoItin<"arm920",          [ArchV4T]>;
+def : ProcNoItin<"arm920t",         [ArchV4T]>;
+def : ProcNoItin<"arm922t",         [ArchV4T]>;
+def : ProcNoItin<"arm940t",         [ArchV4T]>;
+def : ProcNoItin<"ep9312",          [ArchV4T]>;
+
+// V5T Processors.
+def : ProcNoItin<"arm10tdmi",       [ArchV5T]>;
+def : ProcNoItin<"arm1020t",        [ArchV5T]>;
+
+// V5TE Processors.
+def : ProcNoItin<"arm9e",           [ArchV5TE]>;
+def : ProcNoItin<"arm926ej-s",      [ArchV5TE]>;
+def : ProcNoItin<"arm946e-s",       [ArchV5TE]>;
+def : ProcNoItin<"arm966e-s",       [ArchV5TE]>;
+def : ProcNoItin<"arm968e-s",       [ArchV5TE]>;
+def : ProcNoItin<"arm10e",          [ArchV5TE]>;
+def : ProcNoItin<"arm1020e",        [ArchV5TE]>;
+def : ProcNoItin<"arm1022e",        [ArchV5TE]>;
+def : ProcNoItin<"xscale",          [ArchV5TE]>;
+def : ProcNoItin<"iwmmxt",          [ArchV5TE]>;
+
+// V6 Processors.
+def : Processor<"arm1136j-s",       ARMV6Itineraries, [ArchV6]>;
+def : Processor<"arm1136jf-s",      ARMV6Itineraries, [ArchV6, FeatureVFP2,
+                                                       FeatureHasSlowFPVMLx]>;
+def : Processor<"arm1176jz-s",      ARMV6Itineraries, [ArchV6]>;
+def : Processor<"arm1176jzf-s",     ARMV6Itineraries, [ArchV6, FeatureVFP2,
+                                                       FeatureHasSlowFPVMLx]>;
+def : Processor<"mpcorenovfp",      ARMV6Itineraries, [ArchV6]>;
+def : Processor<"mpcore",           ARMV6Itineraries, [ArchV6, FeatureVFP2,
+                                                       FeatureHasSlowFPVMLx]>;
+
+// V6M Processors.
+def : Processor<"cortex-m0",        ARMV6Itineraries, [ArchV6M]>;
+
+// V6T2 Processors.
+def : Processor<"arm1156t2-s",      ARMV6Itineraries, [ArchV6T2]>;
+def : Processor<"arm1156t2f-s",     ARMV6Itineraries, [ArchV6T2, FeatureVFP2,
+                                                       FeatureHasSlowFPVMLx]>;
+
+// V7 Processors.
+def : Processor<"cortex-a8",        CortexA8Itineraries,
+                                    [ArchV7A, ProcA8]>;
+def : Processor<"cortex-a9",        CortexA9Itineraries,
+                                    [ArchV7A, ProcA9]>;
+
+// V7M Processors.
+def : ProcNoItin<"cortex-m3",       [ArchV7M]>;
+def : ProcNoItin<"cortex-m4",       [ArchV7M, FeatureVFP2, FeatureVFPOnlySP]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "ARMRegisterInfo.td"
+
+include "ARMCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrInfo.td"
+
+def ARMInstrInfo : InstrInfo;
+
+
+//===----------------------------------------------------------------------===//
+// Assembly printer
+//===----------------------------------------------------------------------===//
+// ARM Uses the MC printer for asm output, so make sure the TableGen
+// AsmWriter bits get associated with the correct class.
+def ARMAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def ARM : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = ARMInstrInfo;
+
+  let AssemblyWriters = [ARMAsmWriter];
+}
diff --git a/final/lib/Target/ARM/ARMAddressingModes.h b/final/lib/Target/ARM/ARMAddressingModes.h
new file mode 100644
index 00000000000..19fbf0548b0
--- /dev/null
+++ b/final/lib/Target/ARM/ARMAddressingModes.h
@@ -0,0 +1,585 @@
+//===- ARMAddressingModes.h - ARM Addressing Modes --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM addressing mode implementation stuff.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+#define LLVM_TARGET_ARM_ARMADDRESSINGMODES_H
+
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+
+/// ARM_AM - ARM Addressing Mode Stuff
+namespace ARM_AM {
+  enum ShiftOpc {
+    no_shift = 0,
+    asr,
+    lsl,
+    lsr,
+    ror,
+    rrx
+  };
+
+  enum AddrOpc {
+    add = '+', sub = '-'
+  };
+
+  static inline const char *getAddrOpcStr(AddrOpc Op) {
+    return Op == sub ? "-" : "";
+  }
+
+  static inline const char *getShiftOpcStr(ShiftOpc Op) {
+    switch (Op) {
+    default: assert(0 && "Unknown shift opc!");
+    case ARM_AM::asr: return "asr";
+    case ARM_AM::lsl: return "lsl";
+    case ARM_AM::lsr: return "lsr";
+    case ARM_AM::ror: return "ror";
+    case ARM_AM::rrx: return "rrx";
+    }
+  }
+
+  static inline unsigned getShiftOpcEncoding(ShiftOpc Op) {
+    switch (Op) {
+    default: assert(0 && "Unknown shift opc!");
+    case ARM_AM::asr: return 2;
+    case ARM_AM::lsl: return 0;
+    case ARM_AM::lsr: return 1;
+    case ARM_AM::ror: return 3;
+    }
+  }
+
+  static inline ShiftOpc getShiftOpcForNode(SDValue N) {
+    switch (N.getOpcode()) {
+    default:          return ARM_AM::no_shift;
+    case ISD::SHL:    return ARM_AM::lsl;
+    case ISD::SRL:    return ARM_AM::lsr;
+    case ISD::SRA:    return ARM_AM::asr;
+    case ISD::ROTR:   return ARM_AM::ror;
+    //case ISD::ROTL:  // Only if imm -> turn into ROTR.
+    // Can't handle RRX here, because it would require folding a flag into
+    // the addressing mode.  :(  This causes us to miss certain things.
+    //case ARMISD::RRX: return ARM_AM::rrx;
+    }
+  }
+
+  enum AMSubMode {
+    bad_am_submode = 0,
+    ia,
+    ib,
+    da,
+    db
+  };
+
+  static inline const char *getAMSubModeStr(AMSubMode Mode) {
+    switch (Mode) {
+    default: assert(0 && "Unknown addressing sub-mode!");
+    case ARM_AM::ia: return "ia";
+    case ARM_AM::ib: return "ib";
+    case ARM_AM::da: return "da";
+    case ARM_AM::db: return "db";
+    }
+  }
+
+  /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
+  ///
+  static inline unsigned rotr32(unsigned Val, unsigned Amt) {
+    assert(Amt < 32 && "Invalid rotate amount");
+    return (Val >> Amt) | (Val << ((32-Amt)&31));
+  }
+
+  /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
+  ///
+  static inline unsigned rotl32(unsigned Val, unsigned Amt) {
+    assert(Amt < 32 && "Invalid rotate amount");
+    return (Val << Amt) | (Val >> ((32-Amt)&31));
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #1: shift_operand with registers
+  //===--------------------------------------------------------------------===//
+  //
+  // This 'addressing mode' is used for arithmetic instructions.  It can
+  // represent things like:
+  //   reg
+  //   reg [asr|lsl|lsr|ror|rrx] reg
+  //   reg [asr|lsl|lsr|ror|rrx] imm
+  //
+  // This is stored three operands [rega, regb, opc].  The first is the base
+  // reg, the second is the shift amount (or reg0 if not present or imm).  The
+  // third operand encodes the shift opcode and the imm if a reg isn't present.
+  //
+  static inline unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm) {
+    return ShOp | (Imm << 3);
+  }
+  static inline unsigned getSORegOffset(unsigned Op) {
+    return Op >> 3;
+  }
+  static inline ShiftOpc getSORegShOp(unsigned Op) {
+    return (ShiftOpc)(Op & 7);
+  }
+
+  /// getSOImmValImm - Given an encoded imm field for the reg/imm form, return
+  /// the 8-bit imm value.
+  static inline unsigned getSOImmValImm(unsigned Imm) {
+    return Imm & 0xFF;
+  }
+  /// getSOImmValRot - Given an encoded imm field for the reg/imm form, return
+  /// the rotate amount.
+  static inline unsigned getSOImmValRot(unsigned Imm) {
+    return (Imm >> 8) * 2;
+  }
+
+  /// getSOImmValRotate - Try to handle Imm with an immediate shifter operand,
+  /// computing the rotate amount to use.  If this immediate value cannot be
+  /// handled with a single shifter-op, determine a good rotate amount that will
+  /// take a maximal chunk of bits out of the immediate.
+  static inline unsigned getSOImmValRotate(unsigned Imm) {
+    // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+    // of zero.
+    if ((Imm & ~255U) == 0) return 0;
+
+    // Use CTZ to compute the rotate amount.
+    unsigned TZ = CountTrailingZeros_32(Imm);
+
+    // Rotate amount must be even.  Something like 0x200 must be rotated 8 bits,
+    // not 9.
+    unsigned RotAmt = TZ & ~1;
+
+    // If we can handle this spread, return it.
+    if ((rotr32(Imm, RotAmt) & ~255U) == 0)
+      return (32-RotAmt)&31;  // HW rotates right, not left.
+
+    // For values like 0xF000000F, we should ignore the low 6 bits, then
+    // retry the hunt.
+    if (Imm & 63U) {
+      unsigned TZ2 = CountTrailingZeros_32(Imm & ~63U);
+      unsigned RotAmt2 = TZ2 & ~1;
+      if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
+        return (32-RotAmt2)&31;  // HW rotates right, not left.
+    }
+
+    // Otherwise, we have no way to cover this span of bits with a single
+    // shifter_op immediate.  Return a chunk of bits that will be useful to
+    // handle.
+    return (32-RotAmt)&31;  // HW rotates right, not left.
+  }
+
+  /// getSOImmVal - Given a 32-bit immediate, if it is something that can fit
+  /// into an shifter_operand immediate operand, return the 12-bit encoding for
+  /// it.  If not, return -1.
+  static inline int getSOImmVal(unsigned Arg) {
+    // 8-bit (or less) immediates are trivially shifter_operands with a rotate
+    // of zero.
+    if ((Arg & ~255U) == 0) return Arg;
+
+    unsigned RotAmt = getSOImmValRotate(Arg);
+
+    // If this cannot be handled with a single shifter_op, bail out.
+    if (rotr32(~255U, RotAmt) & Arg)
+      return -1;
+
+    // Encode this correctly.
+    return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
+  }
+
+  /// isSOImmTwoPartVal - Return true if the specified value can be obtained by
+  /// or'ing together two SOImmVal's.
+  static inline bool isSOImmTwoPartVal(unsigned V) {
+    // If this can be handled with a single shifter_op, bail out.
+    V = rotr32(~255U, getSOImmValRotate(V)) & V;
+    if (V == 0)
+      return false;
+
+    // If this can be handled with two shifter_op's, accept.
+    V = rotr32(~255U, getSOImmValRotate(V)) & V;
+    return V == 0;
+  }
+
+  /// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
+  /// return the first chunk of it.
+  static inline unsigned getSOImmTwoPartFirst(unsigned V) {
+    return rotr32(255U, getSOImmValRotate(V)) & V;
+  }
+
+  /// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
+  /// return the second chunk of it.
+  static inline unsigned getSOImmTwoPartSecond(unsigned V) {
+    // Mask out the first hunk.
+    V = rotr32(~255U, getSOImmValRotate(V)) & V;
+
+    // Take what's left.
+    assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
+    return V;
+  }
+
+  /// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
+  /// by a left shift. Returns the shift amount to use.
+  static inline unsigned getThumbImmValShift(unsigned Imm) {
+    // 8-bit (or less) immediates are trivially immediate operand with a shift
+    // of zero.
+    if ((Imm & ~255U) == 0) return 0;
+
+    // Use CTZ to compute the shift amount.
+    return CountTrailingZeros_32(Imm);
+  }
+
+  /// isThumbImmShiftedVal - Return true if the specified value can be obtained
+  /// by left shifting a 8-bit immediate.
+  static inline bool isThumbImmShiftedVal(unsigned V) {
+    // If this can be handled with
+    V = (~255U << getThumbImmValShift(V)) & V;
+    return V == 0;
+  }
+
+  /// getThumbImm16ValShift - Try to handle Imm with a 16-bit immediate followed
+  /// by a left shift. Returns the shift amount to use.
+  static inline unsigned getThumbImm16ValShift(unsigned Imm) {
+    // 16-bit (or less) immediates are trivially immediate operand with a shift
+    // of zero.
+    if ((Imm & ~65535U) == 0) return 0;
+
+    // Use CTZ to compute the shift amount.
+    return CountTrailingZeros_32(Imm);
+  }
+
+  /// isThumbImm16ShiftedVal - Return true if the specified value can be
+  /// obtained by left shifting a 16-bit immediate.
+  static inline bool isThumbImm16ShiftedVal(unsigned V) {
+    // If this can be handled with
+    V = (~65535U << getThumbImm16ValShift(V)) & V;
+    return V == 0;
+  }
+
+  /// getThumbImmNonShiftedVal - If V is a value that satisfies
+  /// isThumbImmShiftedVal, return the non-shiftd value.
+  static inline unsigned getThumbImmNonShiftedVal(unsigned V) {
+    return V >> getThumbImmValShift(V);
+  }
+
+
+  /// getT2SOImmValSplat - Return the 12-bit encoded representation
+  /// if the specified value can be obtained by splatting the low 8 bits
+  /// into every other byte or every byte of a 32-bit value. i.e.,
+  ///     00000000 00000000 00000000 abcdefgh    control = 0
+  ///     00000000 abcdefgh 00000000 abcdefgh    control = 1
+  ///     abcdefgh 00000000 abcdefgh 00000000    control = 2
+  ///     abcdefgh abcdefgh abcdefgh abcdefgh    control = 3
+  /// Return -1 if none of the above apply.
+  /// See ARM Reference Manual A6.3.2.
+  static inline int getT2SOImmValSplatVal(unsigned V) {
+    unsigned u, Vs, Imm;
+    // control = 0
+    if ((V & 0xffffff00) == 0)
+      return V;
+
+    // If the value is zeroes in the first byte, just shift those off
+    Vs = ((V & 0xff) == 0) ? V >> 8 : V;
+    // Any passing value only has 8 bits of payload, splatted across the word
+    Imm = Vs & 0xff;
+    // Likewise, any passing values have the payload splatted into the 3rd byte
+    u = Imm | (Imm << 16);
+
+    // control = 1 or 2
+    if (Vs == u)
+      return (((Vs == V) ? 1 : 2) << 8) | Imm;
+
+    // control = 3
+    if (Vs == (u | (u << 8)))
+      return (3 << 8) | Imm;
+
+    return -1;
+  }
+
+  /// getT2SOImmValRotateVal - Return the 12-bit encoded representation if the
+  /// specified value is a rotated 8-bit value. Return -1 if no rotation
+  /// encoding is possible.
+  /// See ARM Reference Manual A6.3.2.
+  static inline int getT2SOImmValRotateVal(unsigned V) {
+    unsigned RotAmt = CountLeadingZeros_32(V);
+    if (RotAmt >= 24)
+      return -1;
+
+    // If 'Arg' can be handled with a single shifter_op return the value.
+    if ((rotr32(0xff000000U, RotAmt) & V) == V)
+      return (rotr32(V, 24 - RotAmt) & 0x7f) | ((RotAmt + 8) << 7);
+
+    return -1;
+  }
+
+  /// getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit
+  /// into a Thumb-2 shifter_operand immediate operand, return the 12-bit
+  /// encoding for it.  If not, return -1.
+  /// See ARM Reference Manual A6.3.2.
+  static inline int getT2SOImmVal(unsigned Arg) {
+    // If 'Arg' is an 8-bit splat, then get the encoded value.
+    int Splat = getT2SOImmValSplatVal(Arg);
+    if (Splat != -1)
+      return Splat;
+
+    // If 'Arg' can be handled with a single shifter_op return the value.
+    int Rot = getT2SOImmValRotateVal(Arg);
+    if (Rot != -1)
+      return Rot;
+
+    return -1;
+  }
+
+  static inline unsigned getT2SOImmValRotate(unsigned V) {
+    if ((V & ~255U) == 0) return 0;
+    // Use CTZ to compute the rotate amount.
+    unsigned RotAmt = CountTrailingZeros_32(V);
+    return (32 - RotAmt) & 31;
+  }
+
+  static inline bool isT2SOImmTwoPartVal (unsigned Imm) {
+    unsigned V = Imm;
+    // Passing values can be any combination of splat values and shifter
+    // values. If this can be handled with a single shifter or splat, bail
+    // out. Those should be handled directly, not with a two-part val.
+    if (getT2SOImmValSplatVal(V) != -1)
+      return false;
+    V = rotr32 (~255U, getT2SOImmValRotate(V)) & V;
+    if (V == 0)
+      return false;
+
+    // If this can be handled as an immediate, accept.
+    if (getT2SOImmVal(V) != -1) return true;
+
+    // Likewise, try masking out a splat value first.
+    V = Imm;
+    if (getT2SOImmValSplatVal(V & 0xff00ff00U) != -1)
+      V &= ~0xff00ff00U;
+    else if (getT2SOImmValSplatVal(V & 0x00ff00ffU) != -1)
+      V &= ~0x00ff00ffU;
+    // If what's left can be handled as an immediate, accept.
+    if (getT2SOImmVal(V) != -1) return true;
+
+    // Otherwise, do not accept.
+    return false;
+  }
+
+  static inline unsigned getT2SOImmTwoPartFirst(unsigned Imm) {
+    assert (isT2SOImmTwoPartVal(Imm) &&
+            "Immedate cannot be encoded as two part immediate!");
+    // Try a shifter operand as one part
+    unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm;
+    // If the rest is encodable as an immediate, then return it.
+    if (getT2SOImmVal(V) != -1) return V;
+
+    // Try masking out a splat value first.
+    if (getT2SOImmValSplatVal(Imm & 0xff00ff00U) != -1)
+      return Imm & 0xff00ff00U;
+
+    // The other splat is all that's left as an option.
+    assert (getT2SOImmValSplatVal(Imm & 0x00ff00ffU) != -1);
+    return Imm & 0x00ff00ffU;
+  }
+
+  static inline unsigned getT2SOImmTwoPartSecond(unsigned Imm) {
+    // Mask out the first hunk
+    Imm ^= getT2SOImmTwoPartFirst(Imm);
+    // Return what's left
+    assert (getT2SOImmVal(Imm) != -1 &&
+            "Unable to encode second part of T2 two part SO immediate");
+    return Imm;
+  }
+
+
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #2
+  //===--------------------------------------------------------------------===//
+  //
+  // This is used for most simple load/store instructions.
+  //
+  // addrmode2 := reg +/- reg shop imm
+  // addrmode2 := reg +/- imm12
+  //
+  // The first operand is always a Reg.  The second operand is a reg if in
+  // reg/reg form, otherwise it's reg#0.  The third field encodes the operation
+  // in bit 12, the immediate in bits 0-11, and the shift op in 13-15.
+  //
+  // If this addressing mode is a frame index (before prolog/epilog insertion
+  // and code rewriting), this operand will have the form:  FI#, reg0, <offs>
+  // with no shift amount for the frame offset.
+  //
+  static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) {
+    assert(Imm12 < (1 << 12) && "Imm too large!");
+    bool isSub = Opc == sub;
+    return Imm12 | ((int)isSub << 12) | (SO << 13);
+  }
+  static inline unsigned getAM2Offset(unsigned AM2Opc) {
+    return AM2Opc & ((1 << 12)-1);
+  }
+  static inline AddrOpc getAM2Op(unsigned AM2Opc) {
+    return ((AM2Opc >> 12) & 1) ? sub : add;
+  }
+  static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) {
+    return (ShiftOpc)(AM2Opc >> 13);
+  }
+
+
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #3
+  //===--------------------------------------------------------------------===//
+  //
+  // This is used for sign-extending loads, and load/store-pair instructions.
+  //
+  // addrmode3 := reg +/- reg
+  // addrmode3 := reg +/- imm8
+  //
+  // The first operand is always a Reg.  The second operand is a reg if in
+  // reg/reg form, otherwise it's reg#0.  The third field encodes the operation
+  // in bit 8, the immediate in bits 0-7.
+
+  /// getAM3Opc - This function encodes the addrmode3 opc field.
+  static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) {
+    bool isSub = Opc == sub;
+    return ((int)isSub << 8) | Offset;
+  }
+  static inline unsigned char getAM3Offset(unsigned AM3Opc) {
+    return AM3Opc & 0xFF;
+  }
+  static inline AddrOpc getAM3Op(unsigned AM3Opc) {
+    return ((AM3Opc >> 8) & 1) ? sub : add;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #4
+  //===--------------------------------------------------------------------===//
+  //
+  // This is used for load / store multiple instructions.
+  //
+  // addrmode4 := reg, <mode>
+  //
+  // The four modes are:
+  //    IA - Increment after
+  //    IB - Increment before
+  //    DA - Decrement after
+  //    DB - Decrement before
+  // For VFP instructions, only the IA and DB modes are valid.
+
+  static inline AMSubMode getAM4SubMode(unsigned Mode) {
+    return (AMSubMode)(Mode & 0x7);
+  }
+
+  static inline unsigned getAM4ModeImm(AMSubMode SubMode) {
+    return (int)SubMode;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #5
+  //===--------------------------------------------------------------------===//
+  //
+  // This is used for coprocessor instructions, such as FP load/stores.
+  //
+  // addrmode5 := reg +/- imm8*4
+  //
+  // The first operand is always a Reg.  The second operand encodes the
+  // operation in bit 8 and the immediate in bits 0-7.
+
+  /// getAM5Opc - This function encodes the addrmode5 opc field.
+  static inline unsigned getAM5Opc(AddrOpc Opc, unsigned char Offset) {
+    bool isSub = Opc == sub;
+    return ((int)isSub << 8) | Offset;
+  }
+  static inline unsigned char getAM5Offset(unsigned AM5Opc) {
+    return AM5Opc & 0xFF;
+  }
+  static inline AddrOpc getAM5Op(unsigned AM5Opc) {
+    return ((AM5Opc >> 8) & 1) ? sub : add;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Addressing Mode #6
+  //===--------------------------------------------------------------------===//
+  //
+  // This is used for NEON load / store instructions.
+  //
+  // addrmode6 := reg with optional alignment
+  //
+  // This is stored in two operands [regaddr, align].  The first is the
+  // address register.  The second operand is the value of the alignment
+  // specifier in bytes or zero if no explicit alignment.
+  // Valid alignments depend on the specific instruction.
+
+  //===--------------------------------------------------------------------===//
+  // NEON Modified Immediates
+  //===--------------------------------------------------------------------===//
+  //
+  // Several NEON instructions (e.g., VMOV) take a "modified immediate"
+  // vector operand, where a small immediate encoded in the instruction
+  // specifies a full NEON vector value.  These modified immediates are
+  // represented here as encoded integers.  The low 8 bits hold the immediate
+  // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
+  // the "Cmode" field of the instruction.  The interfaces below treat the
+  // Op and Cmode values as a single 5-bit value.
+
+  static inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) {
+    return (OpCmode << 8) | Val;
+  }
+  static inline unsigned getNEONModImmOpCmode(unsigned ModImm) {
+    return (ModImm >> 8) & 0x1f;
+  }
+  static inline unsigned getNEONModImmVal(unsigned ModImm) {
+    return ModImm & 0xff;
+  }
+
+  /// decodeNEONModImm - Decode a NEON modified immediate value into the
+  /// element value and the element size in bits.  (If the element size is
+  /// smaller than the vector, it is splatted into all the elements.)
+  static inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) {
+    unsigned OpCmode = getNEONModImmOpCmode(ModImm);
+    unsigned Imm8 = getNEONModImmVal(ModImm);
+    uint64_t Val = 0;
+
+    if (OpCmode == 0xe) {
+      // 8-bit vector elements
+      Val = Imm8;
+      EltBits = 8;
+    } else if ((OpCmode & 0xc) == 0x8) {
+      // 16-bit vector elements
+      unsigned ByteNum = (OpCmode & 0x6) >> 1;
+      Val = Imm8 << (8 * ByteNum);
+      EltBits = 16;
+    } else if ((OpCmode & 0x8) == 0) {
+      // 32-bit vector elements, zero with one byte set
+      unsigned ByteNum = (OpCmode & 0x6) >> 1;
+      Val = Imm8 << (8 * ByteNum);
+      EltBits = 32;
+    } else if ((OpCmode & 0xe) == 0xc) {
+      // 32-bit vector elements, one byte with low bits set
+      unsigned ByteNum = 1 + (OpCmode & 0x1);
+      Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
+      EltBits = 32;
+    } else if (OpCmode == 0x1e) {
+      // 64-bit vector elements
+      for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
+        if ((ModImm >> ByteNum) & 1)
+          Val |= (uint64_t)0xff << (8 * ByteNum);
+      }
+      EltBits = 64;
+    } else {
+      assert(false && "Unsupported NEON immediate");
+    }
+    return Val;
+  }
+
+  AMSubMode getLoadStoreMultipleSubMode(int Opcode);
+
+} // end namespace ARM_AM
+} // end namespace llvm
+
+#endif
+
diff --git a/final/lib/Target/ARM/ARMAsmBackend.cpp b/final/lib/Target/ARM/ARMAsmBackend.cpp
new file mode 100644
index 00000000000..ec23449d7d4
--- /dev/null
+++ b/final/lib/Target/ARM/ARMAsmBackend.cpp
@@ -0,0 +1,512 @@
+//===-- ARMAsmBackend.cpp - ARM Assembler Backend -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+namespace {
+class ARMMachObjectWriter : public MCMachObjectTargetWriter {
+public:
+  ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+                      uint32_t CPUSubtype)
+    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+                               /*UseAggressiveSymbolFolding=*/true) {}
+};
+
+class ARMELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  ARMELFObjectWriter(Triple::OSType OSType)
+    : MCELFObjectTargetWriter(/*Is64Bit*/ false, OSType, ELF::EM_ARM,
+                              /*HasRelocationAddend*/ false) {}
+};
+
+class ARMAsmBackend : public TargetAsmBackend {
+  bool isThumbMode;  // Currently emitting Thumb code.
+public:
+  ARMAsmBackend(const Target &T) : TargetAsmBackend(), isThumbMode(false) {}
+
+  unsigned getNumFixupKinds() const { return ARM::NumTargetFixupKinds; }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+    const static MCFixupKindInfo Infos[ARM::NumTargetFixupKinds] = {
+// This table *must* be in the order that the fixup_* kinds are defined in
+// ARMFixupKinds.h.
+//
+// Name                      Offset (bits) Size (bits)     Flags
+{ "fixup_arm_ldst_pcrel_12", 1,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_ldst_pcrel_12",  0,            32,  MCFixupKindInfo::FKF_IsPCRel |
+                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_pcrel_10",      1,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_pcrel_10",       0,            32,  MCFixupKindInfo::FKF_IsPCRel |
+                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_thumb_adr_pcrel_10",0,            8,   MCFixupKindInfo::FKF_IsPCRel |
+                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_adr_pcrel_12",  1,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_adr_pcrel_12",   0,            32,  MCFixupKindInfo::FKF_IsPCRel |
+                                   MCFixupKindInfo::FKF_IsAlignedDownTo32Bits},
+{ "fixup_arm_condbranch",    0,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_uncondbranch",  0,            24,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_condbranch",     0,            32,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_uncondbranch",   0,            32,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_br",      0,            16,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_bl",      0,            32,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_blx",     7,            21,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cb",      0,            16,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_cp",      1,             8,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_thumb_bcc",     1,             8,  MCFixupKindInfo::FKF_IsPCRel },
+// movw / movt: 16-bits immediate but scattered into two chunks 0 - 12, 16 - 19.
+{ "fixup_arm_movt_hi16",     0,            20,  0 },
+{ "fixup_arm_movw_lo16",     0,            20,  0 },
+{ "fixup_t2_movt_hi16",      0,            20,  0 },
+{ "fixup_t2_movw_lo16",      0,            20,  0 },
+{ "fixup_arm_movt_hi16_pcrel", 0,          20,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_arm_movw_lo16_pcrel", 0,          20,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_movt_hi16_pcrel", 0,           20,  MCFixupKindInfo::FKF_IsPCRel },
+{ "fixup_t2_movw_lo16_pcrel", 0,           20,  MCFixupKindInfo::FKF_IsPCRel },
+    };
+
+    if (Kind < FirstTargetFixupKind)
+      return TargetAsmBackend::getFixupKindInfo(Kind);
+
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+
+  bool MayNeedRelaxation(const MCInst &Inst) const;
+
+  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+  bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+  void HandleAssemblerFlag(MCAssemblerFlag Flag) {
+    switch (Flag) {
+    default: break;
+    case MCAF_Code16:
+      setIsThumb(true);
+      break;
+    case MCAF_Code32:
+      setIsThumb(false);
+      break;
+    }
+  }
+
+  unsigned getPointerSize() const { return 4; }
+  bool isThumb() const { return isThumbMode; }
+  void setIsThumb(bool it) { isThumbMode = it; }
+};
+} // end anonymous namespace
+
+bool ARMAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+  // FIXME: Thumb targets, different move constant targets..
+  return false;
+}
+
+void ARMAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+  assert(0 && "ARMAsmBackend::RelaxInstruction() unimplemented");
+  return;
+}
+
+bool ARMAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+  if (isThumb()) {
+    // FIXME: 0xbf00 is the ARMv7 value. For v6 and before, we'll need to
+    // use 0x46c0 (which is a 'mov r8, r8' insn).
+    uint64_t NumNops = Count / 2;
+    for (uint64_t i = 0; i != NumNops; ++i)
+      OW->Write16(0xbf00);
+    if (Count & 1)
+      OW->Write8(0);
+    return true;
+  }
+  // ARM mode
+  uint64_t NumNops = Count / 4;
+  for (uint64_t i = 0; i != NumNops; ++i)
+    OW->Write32(0xe1a00000);
+  switch (Count % 4) {
+  default: break; // No leftover bytes to write
+  case 1: OW->Write8(0); break;
+  case 2: OW->Write16(0); break;
+  case 3: OW->Write16(0); OW->Write8(0xa0); break;
+  }
+
+  return true;
+}
+
+static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unknown fixup kind!");
+  case FK_Data_1:
+  case FK_Data_2:
+  case FK_Data_4:
+    return Value;
+  case ARM::fixup_arm_movt_hi16:
+  case ARM::fixup_arm_movt_hi16_pcrel:
+    Value >>= 16;
+    // Fallthrough
+  case ARM::fixup_arm_movw_lo16:
+  case ARM::fixup_arm_movw_lo16_pcrel: {
+    unsigned Hi4 = (Value & 0xF000) >> 12;
+    unsigned Lo12 = Value & 0x0FFF;
+    // inst{19-16} = Hi4;
+    // inst{11-0} = Lo12;
+    Value = (Hi4 << 16) | (Lo12);
+    return Value;
+  }
+  case ARM::fixup_t2_movt_hi16:
+  case ARM::fixup_t2_movt_hi16_pcrel:
+    Value >>= 16;
+    // Fallthrough
+  case ARM::fixup_t2_movw_lo16:
+  case ARM::fixup_t2_movw_lo16_pcrel: {
+    unsigned Hi4 = (Value & 0xF000) >> 12;
+    unsigned i = (Value & 0x800) >> 11;
+    unsigned Mid3 = (Value & 0x700) >> 8;
+    unsigned Lo8 = Value & 0x0FF;
+    // inst{19-16} = Hi4;
+    // inst{26} = i;
+    // inst{14-12} = Mid3;
+    // inst{7-0} = Lo8;
+    Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
+
+    uint64_t swapped = (Value & 0xFFFF0000) >> 16;
+    swapped |= (Value & 0x0000FFFF) << 16;
+    return swapped;
+  }
+  case ARM::fixup_arm_ldst_pcrel_12:
+    // ARM PC-relative values are offset by 8.
+    Value -= 4;
+    // FALLTHROUGH
+  case ARM::fixup_t2_ldst_pcrel_12: {
+    // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+    Value -= 4;
+    bool isAdd = true;
+    if ((int64_t)Value < 0) {
+      Value = -Value;
+      isAdd = false;
+    }
+    assert ((Value < 4096) && "Out of range pc-relative fixup value!");
+    Value |= isAdd << 23;
+
+    // Same addressing mode as fixup_arm_pcrel_10,
+    // but with 16-bit halfwords swapped.
+    if (Kind == ARM::fixup_t2_ldst_pcrel_12) {
+      uint64_t swapped = (Value & 0xFFFF0000) >> 16;
+      swapped |= (Value & 0x0000FFFF) << 16;
+      return swapped;
+    }
+
+    return Value;
+  }
+  case ARM::fixup_thumb_adr_pcrel_10:
+    return ((Value - 4) >> 2) & 0xff;
+  case ARM::fixup_arm_adr_pcrel_12: {
+    // ARM PC-relative values are offset by 8.
+    Value -= 8;
+    unsigned opc = 4; // bits {24-21}. Default to add: 0b0100
+    if ((int64_t)Value < 0) {
+      Value = -Value;
+      opc = 2; // 0b0010
+    }
+    assert(ARM_AM::getSOImmVal(Value) != -1 &&
+           "Out of range pc-relative fixup value!");
+    // Encode the immediate and shift the opcode into place.
+    return ARM_AM::getSOImmVal(Value) | (opc << 21);
+  }
+
+  case ARM::fixup_t2_adr_pcrel_12: {
+    Value -= 4;
+    unsigned opc = 0;
+    if ((int64_t)Value < 0) {
+      Value = -Value;
+      opc = 5;
+    }
+
+    uint32_t out = (opc << 21);
+    out |= (Value & 0x800) << 14;
+    out |= (Value & 0x700) << 4;
+    out |= (Value & 0x0FF);
+
+    uint64_t swapped = (out & 0xFFFF0000) >> 16;
+    swapped |= (out & 0x0000FFFF) << 16;
+    return swapped;
+  }
+
+  case ARM::fixup_arm_condbranch:
+  case ARM::fixup_arm_uncondbranch:
+    // These values don't encode the low two bits since they're always zero.
+    // Offset by 8 just as above.
+    return 0xffffff & ((Value - 8) >> 2);
+  case ARM::fixup_t2_uncondbranch: {
+    Value = Value - 4;
+    Value >>= 1; // Low bit is not encoded.
+
+    uint32_t out = 0;
+    bool I =  Value & 0x800000;
+    bool J1 = Value & 0x400000;
+    bool J2 = Value & 0x200000;
+    J1 ^= I;
+    J2 ^= I;
+
+    out |= I  << 26; // S bit
+    out |= !J1 << 13; // J1 bit
+    out |= !J2 << 11; // J2 bit
+    out |= (Value & 0x1FF800)  << 5; // imm6 field
+    out |= (Value & 0x0007FF);        // imm11 field
+
+    uint64_t swapped = (out & 0xFFFF0000) >> 16;
+    swapped |= (out & 0x0000FFFF) << 16;
+    return swapped;
+  }
+  case ARM::fixup_t2_condbranch: {
+    Value = Value - 4;
+    Value >>= 1; // Low bit is not encoded.
+
+    uint64_t out = 0;
+    out |= (Value & 0x80000) << 7; // S bit
+    out |= (Value & 0x40000) >> 7; // J2 bit
+    out |= (Value & 0x20000) >> 4; // J1 bit
+    out |= (Value & 0x1F800) << 5; // imm6 field
+    out |= (Value & 0x007FF);      // imm11 field
+
+    uint32_t swapped = (out & 0xFFFF0000) >> 16;
+    swapped |= (out & 0x0000FFFF) << 16;
+    return swapped;
+  }
+  case ARM::fixup_arm_thumb_bl: {
+    // The value doesn't encode the low bit (always zero) and is offset by
+    // four. The value is encoded into disjoint bit positions in the destination
+    // opcode. x = unchanged, I = immediate value bit, S = sign extension bit
+    //
+    //   BL:  xxxxxSIIIIIIIIII xxxxxIIIIIIIIIII
+    //
+    // Note that the halfwords are stored high first, low second; so we need
+    // to transpose the fixup value here to map properly.
+    unsigned isNeg = (int64_t(Value) < 0) ? 1 : 0;
+    uint32_t Binary = 0;
+    Value = 0x3fffff & ((Value - 4) >> 1);
+    Binary  = (Value & 0x7ff) << 16;    // Low imm11 value.
+    Binary |= (Value & 0x1ffc00) >> 11; // High imm10 value.
+    Binary |= isNeg << 10;              // Sign bit.
+    return Binary;
+  }
+  case ARM::fixup_arm_thumb_blx: {
+    // The value doesn't encode the low two bits (always zero) and is offset by
+    // four (see fixup_arm_thumb_cp). The value is encoded into disjoint bit
+    // positions in the destination opcode. x = unchanged, I = immediate value
+    // bit, S = sign extension bit, 0 = zero.
+    //
+    //   BLX: xxxxxSIIIIIIIIII xxxxxIIIIIIIIII0
+    //
+    // Note that the halfwords are stored high first, low second; so we need
+    // to transpose the fixup value here to map properly.
+    unsigned isNeg = (int64_t(Value) < 0) ? 1 : 0;
+    uint32_t Binary = 0;
+    Value = 0xfffff & ((Value - 2) >> 2);
+    Binary  = (Value & 0x3ff) << 17;    // Low imm10L value.
+    Binary |= (Value & 0xffc00) >> 10;  // High imm10H value.
+    Binary |= isNeg << 10;              // Sign bit.
+    return Binary;
+  }
+  case ARM::fixup_arm_thumb_cp:
+    // Offset by 4, and don't encode the low two bits. Two bytes of that
+    // 'off by 4' is implicitly handled by the half-word ordering of the
+    // Thumb encoding, so we only need to adjust by 2 here.
+    return ((Value - 2) >> 2) & 0xff;
+  case ARM::fixup_arm_thumb_cb: {
+    // Offset by 4 and don't encode the lower bit, which is always 0.
+    uint32_t Binary = (Value - 4) >> 1;
+    return ((Binary & 0x20) << 4) | ((Binary & 0x1f) << 3);
+  }
+  case ARM::fixup_arm_thumb_br:
+    // Offset by 4 and don't encode the lower bit, which is always 0.
+    return ((Value - 4) >> 1) & 0x7ff;
+  case ARM::fixup_arm_thumb_bcc:
+    // Offset by 4 and don't encode the lower bit, which is always 0.
+    return ((Value - 4) >> 1) & 0xff;
+  case ARM::fixup_arm_pcrel_10:
+    Value = Value - 4; // ARM fixups offset by an additional word and don't
+                       // need to adjust for the half-word ordering.
+    // Fall through.
+  case ARM::fixup_t2_pcrel_10: {
+    // Offset by 4, adjusted by two due to the half-word ordering of thumb.
+    Value = Value - 4;
+    bool isAdd = true;
+    if ((int64_t)Value < 0) {
+      Value = -Value;
+      isAdd = false;
+    }
+    // These values don't encode the low two bits since they're always zero.
+    Value >>= 2;
+    assert ((Value < 256) && "Out of range pc-relative fixup value!");
+    Value |= isAdd << 23;
+
+    // Same addressing mode as fixup_arm_pcrel_10,
+    // but with 16-bit halfwords swapped.
+    if (Kind == ARM::fixup_t2_pcrel_10) {
+      uint32_t swapped = (Value & 0xFFFF0000) >> 16;
+      swapped |= (Value & 0x0000FFFF) << 16;
+      return swapped;
+    }
+
+    return Value;
+  }
+  }
+}
+
+namespace {
+
+// FIXME: This should be in a separate file.
+// ELF is an ELF of course...
+class ELFARMAsmBackend : public ARMAsmBackend {
+public:
+  Triple::OSType OSType;
+  ELFARMAsmBackend(const Target &T, Triple::OSType _OSType)
+    : ARMAsmBackend(T), OSType(_OSType) { }
+
+  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const;
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createELFObjectWriter(new ARMELFObjectWriter(OSType), OS,
+                              /*IsLittleEndian*/ true);
+  }
+};
+
+// FIXME: Raise this to share code between Darwin and ELF.
+void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+                                  unsigned DataSize, uint64_t Value) const {
+  unsigned NumBytes = 4;        // FIXME: 2 for Thumb
+  Value = adjustFixupValue(Fixup.getKind(), Value);
+  if (!Value) return;           // Doesn't change encoding.
+
+  unsigned Offset = Fixup.getOffset();
+  assert(Offset % NumBytes == 0 && "Offset mod NumBytes is nonzero!");
+
+  // For each byte of the fragment that the fixup touches, mask in the bits from
+  // the fixup value. The Value has been "split up" into the appropriate
+  // bitfields above.
+  for (unsigned i = 0; i != NumBytes; ++i)
+    Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+// FIXME: This should be in a separate file.
+class DarwinARMAsmBackend : public ARMAsmBackend {
+public:
+  DarwinARMAsmBackend(const Target &T) : ARMAsmBackend(T) { }
+
+  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const;
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    // FIXME: Subtarget info should be derived. Force v7 for now.
+    return createMachObjectWriter(new ARMMachObjectWriter(
+                                    /*Is64Bit=*/false,
+                                    object::mach::CTM_ARM,
+                                    object::mach::CSARM_V7),
+                                  OS,
+                                  /*IsLittleEndian=*/true);
+  }
+
+  virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+    return false;
+  }
+};
+
+/// getFixupKindNumBytes - The number of bytes the fixup may change.
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("Unknown fixup kind!");
+
+  case FK_Data_1:
+  case ARM::fixup_arm_thumb_bcc:
+  case ARM::fixup_arm_thumb_cp:
+  case ARM::fixup_thumb_adr_pcrel_10:
+    return 1;
+
+  case FK_Data_2:
+  case ARM::fixup_arm_thumb_br:
+  case ARM::fixup_arm_thumb_cb:
+    return 2;
+
+  case ARM::fixup_arm_ldst_pcrel_12:
+  case ARM::fixup_arm_pcrel_10:
+  case ARM::fixup_arm_adr_pcrel_12:
+  case ARM::fixup_arm_condbranch:
+  case ARM::fixup_arm_uncondbranch:
+    return 3;
+
+  case FK_Data_4:
+  case ARM::fixup_t2_ldst_pcrel_12:
+  case ARM::fixup_t2_condbranch:
+  case ARM::fixup_t2_uncondbranch:
+  case ARM::fixup_t2_pcrel_10:
+  case ARM::fixup_t2_adr_pcrel_12:
+  case ARM::fixup_arm_thumb_bl:
+  case ARM::fixup_arm_thumb_blx:
+  case ARM::fixup_arm_movt_hi16:
+  case ARM::fixup_arm_movw_lo16:
+  case ARM::fixup_arm_movt_hi16_pcrel:
+  case ARM::fixup_arm_movw_lo16_pcrel:
+  case ARM::fixup_t2_movt_hi16:
+  case ARM::fixup_t2_movw_lo16:
+  case ARM::fixup_t2_movt_hi16_pcrel:
+  case ARM::fixup_t2_movw_lo16_pcrel:
+    return 4;
+  }
+}
+
+void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+                                     unsigned DataSize, uint64_t Value) const {
+  unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+  Value = adjustFixupValue(Fixup.getKind(), Value);
+  if (!Value) return;           // Doesn't change encoding.
+
+  unsigned Offset = Fixup.getOffset();
+  assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+  // For each byte of the fragment that the fixup touches, mask in the
+  // bits from the fixup value.
+  for (unsigned i = 0; i != NumBytes; ++i)
+    Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
+}
+
+} // end anonymous namespace
+
+TargetAsmBackend *llvm::createARMAsmBackend(const Target &T,
+                                            const std::string &TT) {
+  switch (Triple(TT).getOS()) {
+  case Triple::Darwin:
+    return new DarwinARMAsmBackend(T);
+  case Triple::MinGW32:
+  case Triple::Cygwin:
+  case Triple::Win32:
+    assert(0 && "Windows not supported on ARM");
+  default:
+    return new ELFARMAsmBackend(T, Triple(TT).getOS());
+  }
+}
diff --git a/final/lib/Target/ARM/ARMAsmPrinter.cpp b/final/lib/Target/ARM/ARMAsmPrinter.cpp
new file mode 100644
index 00000000000..e1a2cc57abf
--- /dev/null
+++ b/final/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -0,0 +1,1725 @@
+//===-- ARMAsmPrinter.cpp - Print machine code to an ARM .s file ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format ARM assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARM.h"
+#include "ARMAsmPrinter.h"
+#include "ARMAddressingModes.h"
+#include "ARMBuildAttrs.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMMCExpr.h"
+#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "InstPrinter/ARMInstPrinter.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+using namespace llvm;
+
+namespace {
+
+  // Per section and per symbol attributes are not supported.
+  // To implement them we would need the ability to delay this emission
+  // until the assembly file is fully parsed/generated as only then do we
+  // know the symbol and section numbers.
+  class AttributeEmitter {
+  public:
+    virtual void MaybeSwitchVendor(StringRef Vendor) = 0;
+    virtual void EmitAttribute(unsigned Attribute, unsigned Value) = 0;
+    virtual void EmitTextAttribute(unsigned Attribute, StringRef String) = 0;
+    virtual void Finish() = 0;
+    virtual ~AttributeEmitter() {}
+  };
+
+  class AsmAttributeEmitter : public AttributeEmitter {
+    MCStreamer &Streamer;
+
+  public:
+    AsmAttributeEmitter(MCStreamer &Streamer_) : Streamer(Streamer_) {}
+    void MaybeSwitchVendor(StringRef Vendor) { }
+
+    void EmitAttribute(unsigned Attribute, unsigned Value) {
+      Streamer.EmitRawText("\t.eabi_attribute " +
+                           Twine(Attribute) + ", " + Twine(Value));
+    }
+
+    void EmitTextAttribute(unsigned Attribute, StringRef String) {
+      switch (Attribute) {
+      case ARMBuildAttrs::CPU_name:
+        Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String));
+        break;
+      /* GAS requires .fpu to be emitted regardless of EABI attribute */
+      case ARMBuildAttrs::Advanced_SIMD_arch:
+      case ARMBuildAttrs::VFP_arch:
+        Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String));
+        break;    
+      default: assert(0 && "Unsupported Text attribute in ASM Mode"); break;
+      }
+    }
+    void Finish() { }
+  };
+
+  class ObjectAttributeEmitter : public AttributeEmitter {
+    MCObjectStreamer &Streamer;
+    StringRef CurrentVendor;
+    SmallString<64> Contents;
+
+  public:
+    ObjectAttributeEmitter(MCObjectStreamer &Streamer_) :
+      Streamer(Streamer_), CurrentVendor("") { }
+
+    void MaybeSwitchVendor(StringRef Vendor) {
+      assert(!Vendor.empty() && "Vendor cannot be empty.");
+
+      if (CurrentVendor.empty())
+        CurrentVendor = Vendor;
+      else if (CurrentVendor == Vendor)
+        return;
+      else
+        Finish();
+
+      CurrentVendor = Vendor;
+
+      assert(Contents.size() == 0);
+    }
+
+    void EmitAttribute(unsigned Attribute, unsigned Value) {
+      // FIXME: should be ULEB
+      Contents += Attribute;
+      Contents += Value;
+    }
+
+    void EmitTextAttribute(unsigned Attribute, StringRef String) {
+      Contents += Attribute;
+      Contents += UppercaseString(String);
+      Contents += 0;
+    }
+
+    void Finish() {
+      const size_t ContentsSize = Contents.size();
+
+      // Vendor size + Vendor name + '\0'
+      const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
+
+      // Tag + Tag Size
+      const size_t TagHeaderSize = 1 + 4;
+
+      Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4);
+      Streamer.EmitBytes(CurrentVendor, 0);
+      Streamer.EmitIntValue(0, 1); // '\0'
+
+      Streamer.EmitIntValue(ARMBuildAttrs::File, 1);
+      Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4);
+
+      Streamer.EmitBytes(Contents, 0);
+
+      Contents.clear();
+    }
+  };
+
+} // end of anonymous namespace
+
+MachineLocation ARMAsmPrinter::
+getDebugValueLocation(const MachineInstr *MI) const {
+  MachineLocation Location;
+  assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+  // Frame address.  Currently handles register +- offset only.
+  if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm())
+    Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+  else {
+    DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+  }
+  return Location;
+}
+
+void ARMAsmPrinter::EmitFunctionEntryLabel() {
+  if (AFI->isThumbFunction()) {
+    OutStreamer.EmitAssemblerFlag(MCAF_Code16);
+    OutStreamer.EmitThumbFunc(Subtarget->isTargetDarwin()? CurrentFnSym : 0);
+  }
+
+  OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+/// runOnMachineFunction - This uses the EmitInstruction()
+/// method to print assembly for each instruction.
+///
+bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  AFI = MF.getInfo<ARMFunctionInfo>();
+  MCP = MF.getConstantPool();
+
+  return AsmPrinter::runOnMachineFunction(MF);
+}
+
+void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+                                 raw_ostream &O, const char *Modifier) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  unsigned TF = MO.getTargetFlags();
+
+  switch (MO.getType()) {
+  default:
+    assert(0 && "<unknown operand type>");
+  case MachineOperand::MO_Register: {
+    unsigned Reg = MO.getReg();
+    assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+    assert(!MO.getSubReg() && "Subregs should be eliminated!");
+    O << ARMInstPrinter::getRegisterName(Reg);
+    break;
+  }
+  case MachineOperand::MO_Immediate: {
+    int64_t Imm = MO.getImm();
+    O << '#';
+    if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+        (TF == ARMII::MO_LO16))
+      O << ":lower16:";
+    else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+             (TF == ARMII::MO_HI16))
+      O << ":upper16:";
+    O << Imm;
+    break;
+  }
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+    if ((Modifier && strcmp(Modifier, "lo16") == 0) ||
+        (TF & ARMII::MO_LO16))
+      O << ":lower16:";
+    else if ((Modifier && strcmp(Modifier, "hi16") == 0) ||
+             (TF & ARMII::MO_HI16))
+      O << ":upper16:";
+    O << *Mang->getSymbol(GV);
+
+    printOffset(MO.getOffset(), O);
+    if (TF == ARMII::MO_PLT)
+      O << "(PLT)";
+    break;
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    if (TF == ARMII::MO_PLT)
+      O << "(PLT)";
+    break;
+  }
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << *GetCPISymbol(MO.getIndex());
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    O << *GetJTISymbol(MO.getIndex());
+    break;
+  }
+}
+
+//===--------------------------------------------------------------------===//
+
+MCSymbol *ARMAsmPrinter::
+GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
+                            const MachineBasicBlock *MBB) const {
+  SmallString<60> Name;
+  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+    << getFunctionNumber() << '_' << uid << '_' << uid2
+    << "_set_" << MBB->getNumber();
+  return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *ARMAsmPrinter::
+GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
+  SmallString<60> Name;
+  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI"
+    << getFunctionNumber() << '_' << uid << '_' << uid2;
+  return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+
+MCSymbol *ARMAsmPrinter::GetARMSJLJEHLabel(void) const {
+  SmallString<60> Name;
+  raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "SJLJEH"
+    << getFunctionNumber();
+  return OutContext.GetOrCreateSymbol(Name.str());
+}
+
+bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+                                    unsigned AsmVariant, const char *ExtraCode,
+                                    raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'a': // Print as a memory address.
+      if (MI->getOperand(OpNum).isReg()) {
+        O << "["
+          << ARMInstPrinter::getRegisterName(MI->getOperand(OpNum).getReg())
+          << "]";
+        return false;
+      }
+      // Fallthrough
+    case 'c': // Don't print "#" before an immediate operand.
+      if (!MI->getOperand(OpNum).isImm())
+        return true;
+      O << MI->getOperand(OpNum).getImm();
+      return false;
+    case 'P': // Print a VFP double precision register.
+    case 'q': // Print a NEON quad precision register.
+      printOperand(MI, OpNum, O);
+      return false;
+    case 'Q':
+    case 'R':
+    case 'H':
+      // These modifiers are not yet supported.
+      return true;
+    }
+  }
+
+  printOperand(MI, OpNum, O);
+  return false;
+}
+
+bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                          unsigned OpNum, unsigned AsmVariant,
+                                          const char *ExtraCode,
+                                          raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isReg() && "unexpected inline asm memory operand");
+  O << "[" << ARMInstPrinter::getRegisterName(MO.getReg()) << "]";
+  return false;
+}
+
+void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
+  if (Subtarget->isTargetDarwin()) {
+    Reloc::Model RelocM = TM.getRelocationModel();
+    if (RelocM == Reloc::PIC_ || RelocM == Reloc::DynamicNoPIC) {
+      // Declare all the text sections up front (before the DWARF sections
+      // emitted by AsmPrinter::doInitialization) so the assembler will keep
+      // them together at the beginning of the object file.  This helps
+      // avoid out-of-range branches that are due a fundamental limitation of
+      // the way symbol offsets are encoded with the current Darwin ARM
+      // relocations.
+      const TargetLoweringObjectFileMachO &TLOFMacho =
+        static_cast<const TargetLoweringObjectFileMachO &>(
+          getObjFileLowering());
+      OutStreamer.SwitchSection(TLOFMacho.getTextSection());
+      OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
+      OutStreamer.SwitchSection(TLOFMacho.getConstTextCoalSection());
+      if (RelocM == Reloc::DynamicNoPIC) {
+        const MCSection *sect =
+          OutContext.getMachOSection("__TEXT", "__symbol_stub4",
+                                     MCSectionMachO::S_SYMBOL_STUBS,
+                                     12, SectionKind::getText());
+        OutStreamer.SwitchSection(sect);
+      } else {
+        const MCSection *sect =
+          OutContext.getMachOSection("__TEXT", "__picsymbolstub4",
+                                     MCSectionMachO::S_SYMBOL_STUBS,
+                                     16, SectionKind::getText());
+        OutStreamer.SwitchSection(sect);
+      }
+      const MCSection *StaticInitSect =
+        OutContext.getMachOSection("__TEXT", "__StaticInit",
+                                   MCSectionMachO::S_REGULAR |
+                                   MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                   SectionKind::getText());
+      OutStreamer.SwitchSection(StaticInitSect);
+    }
+  }
+
+  // Use unified assembler syntax.
+  OutStreamer.EmitAssemblerFlag(MCAF_SyntaxUnified);
+
+  // Emit ARM Build Attributes
+  if (Subtarget->isTargetELF()) {
+
+    emitAttributes();
+  }
+}
+
+
+void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
+  if (Subtarget->isTargetDarwin()) {
+    // All darwin targets use mach-o.
+    const TargetLoweringObjectFileMachO &TLOFMacho =
+      static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+    MachineModuleInfoMachO &MMIMacho =
+      MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+    // Output non-lazy-pointers for external and common global variables.
+    MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList();
+
+    if (!Stubs.empty()) {
+      // Switch with ".non_lazy_symbol_pointer" directive.
+      OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+      EmitAlignment(2);
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        // L_foo$stub:
+        OutStreamer.EmitLabel(Stubs[i].first);
+        //   .indirect_symbol _foo
+        MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+        OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),MCSA_IndirectSymbol);
+
+        if (MCSym.getInt())
+          // External to current translation unit.
+          OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+        else
+          // Internal to current translation unit.
+          //
+          // When we place the LSDA into the TEXT section, the type info
+          // pointers need to be indirect and pc-rel. We accomplish this by
+          // using NLPs; however, sometimes the types are local to the file.
+          // We need to fill in the value for the NLP in those cases.
+          OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+                                                        OutContext),
+                                4/*size*/, 0/*addrspace*/);
+      }
+
+      Stubs.clear();
+      OutStreamer.AddBlankLine();
+    }
+
+    Stubs = MMIMacho.GetHiddenGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+      EmitAlignment(2);
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        // L_foo$stub:
+        OutStreamer.EmitLabel(Stubs[i].first);
+        //   .long _foo
+        OutStreamer.EmitValue(MCSymbolRefExpr::
+                              Create(Stubs[i].second.getPointer(),
+                                     OutContext),
+                              4/*size*/, 0/*addrspace*/);
+      }
+
+      Stubs.clear();
+      OutStreamer.AddBlankLine();
+    }
+
+    // Funny Darwin hack: This flag tells the linker that no global symbols
+    // contain code that falls through to other global symbols (e.g. the obvious
+    // implementation of multiple entry points).  If this doesn't occur, the
+    // linker can safely perform dead code stripping.  Since LLVM never
+    // generates code that does this, it is always safe to set.
+    OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Helper routines for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+// FIXME:
+// The following seem like one-off assembler flags, but they actually need
+// to appear in the .ARM.attributes section in ELF.
+// Instead of subclassing the MCELFStreamer, we do the work here.
+
+void ARMAsmPrinter::emitAttributes() {
+
+  emitARMAttributeSection();
+
+  /* GAS expect .fpu to be emitted, regardless of VFP build attribute */
+  bool emitFPU = false;
+  AttributeEmitter *AttrEmitter;
+  if (OutStreamer.hasRawTextSupport()) {
+    AttrEmitter = new AsmAttributeEmitter(OutStreamer);
+    emitFPU = true;
+  } else {
+    MCObjectStreamer &O = static_cast<MCObjectStreamer&>(OutStreamer);
+    AttrEmitter = new ObjectAttributeEmitter(O);
+  }
+
+  AttrEmitter->MaybeSwitchVendor("aeabi");
+
+  std::string CPUString = Subtarget->getCPUString();
+
+  if (CPUString == "cortex-a8" ||
+      Subtarget->isCortexA8()) {
+    AttrEmitter->EmitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a8");
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile,
+                               ARMBuildAttrs::ApplicationProfile);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+                               ARMBuildAttrs::Allowed);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+                               ARMBuildAttrs::AllowThumb32);
+    // Fixme: figure out when this is emitted.
+    //AttrEmitter->EmitAttribute(ARMBuildAttrs::WMMX_arch,
+    //                           ARMBuildAttrs::AllowWMMXv1);
+    //
+
+    /// ADD additional Else-cases here!
+  } else if (CPUString == "generic") {
+    // FIXME: Why these defaults?
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use,
+                               ARMBuildAttrs::Allowed);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use,
+                               ARMBuildAttrs::Allowed);
+  }
+
+  if (Subtarget->hasNEON() && emitFPU) {
+    /* NEON is not exactly a VFP architecture, but GAS emit one of
+     * neon/vfpv3/vfpv2 for .fpu parameters */
+    AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon");
+    /* If emitted for NEON, omit from VFP below, since you can have both
+     * NEON and VFP in build attributes but only one .fpu */
+    emitFPU = false;
+  }
+
+  /* VFPv3 + .fpu */
+  if (Subtarget->hasVFP3()) {
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+                               ARMBuildAttrs::AllowFPv3A);
+    if (emitFPU)
+      AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv3");
+
+  /* VFPv2 + .fpu */
+  } else if (Subtarget->hasVFP2()) {
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch,
+                               ARMBuildAttrs::AllowFPv2);
+    if (emitFPU)
+      AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv2");
+  }
+
+  /* TODO: ARMBuildAttrs::Allowed is not completely accurate,
+   * since NEON can have 1 (allowed) or 2 (fused MAC operations) */
+  if (Subtarget->hasNEON()) {
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
+                               ARMBuildAttrs::Allowed);
+  }
+
+  // Signal various FP modes.
+  if (!UnsafeFPMath) {
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+                               ARMBuildAttrs::Allowed);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
+                               ARMBuildAttrs::Allowed);
+  }
+
+  if (NoInfsFPMath && NoNaNsFPMath)
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+                               ARMBuildAttrs::Allowed);
+  else
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model,
+                               ARMBuildAttrs::AllowIEE754);
+
+  // FIXME: add more flags to ARMBuildAttrs.h
+  // 8-bytes alignment stuff.
+  AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_needed, 1);
+  AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1);
+
+  // Hard float.  Use both S and D registers and conform to AAPCS-VFP.
+  if (Subtarget->isAAPCS_ABI() && FloatABIType == FloatABI::Hard) {
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3);
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1);
+  }
+  // FIXME: Should we signal R9 usage?
+
+  if (Subtarget->hasDivide())
+    AttrEmitter->EmitAttribute(ARMBuildAttrs::DIV_use, 1);
+
+  AttrEmitter->Finish();
+  delete AttrEmitter;
+}
+
+void ARMAsmPrinter::emitARMAttributeSection() {
+  // <format-version>
+  // [ <section-length> "vendor-name"
+  // [ <file-tag> <size> <attribute>*
+  //   | <section-tag> <size> <section-number>* 0 <attribute>*
+  //   | <symbol-tag> <size> <symbol-number>* 0 <attribute>*
+  //   ]+
+  // ]*
+
+  if (OutStreamer.hasRawTextSupport())
+    return;
+
+  const ARMElfTargetObjectFile &TLOFELF =
+    static_cast<const ARMElfTargetObjectFile &>
+    (getObjFileLowering());
+
+  OutStreamer.SwitchSection(TLOFELF.getAttributesSection());
+
+  // Format version
+  OutStreamer.EmitIntValue(0x41, 1);
+}
+
+//===----------------------------------------------------------------------===//
+
+static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber,
+                             unsigned LabelId, MCContext &Ctx) {
+
+  MCSymbol *Label = Ctx.GetOrCreateSymbol(Twine(Prefix)
+                       + "PC" + Twine(FunctionNumber) + "_" + Twine(LabelId));
+  return Label;
+}
+
+static MCSymbolRefExpr::VariantKind
+getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
+  switch (Modifier) {
+  default: llvm_unreachable("Unknown modifier!");
+  case ARMCP::no_modifier: return MCSymbolRefExpr::VK_None;
+  case ARMCP::TLSGD:       return MCSymbolRefExpr::VK_ARM_TLSGD;
+  case ARMCP::TPOFF:       return MCSymbolRefExpr::VK_ARM_TPOFF;
+  case ARMCP::GOTTPOFF:    return MCSymbolRefExpr::VK_ARM_GOTTPOFF;
+  case ARMCP::GOT:         return MCSymbolRefExpr::VK_ARM_GOT;
+  case ARMCP::GOTOFF:      return MCSymbolRefExpr::VK_ARM_GOTOFF;
+  }
+  return MCSymbolRefExpr::VK_None;
+}
+
+MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) {
+  bool isIndirect = Subtarget->isTargetDarwin() &&
+    Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel());
+  if (!isIndirect)
+    return Mang->getSymbol(GV);
+
+  // FIXME: Remove this when Darwin transition to @GOT like syntax.
+  MCSymbol *MCSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+  MachineModuleInfoMachO &MMIMachO =
+    MMI->getObjFileInfo<MachineModuleInfoMachO>();
+  MachineModuleInfoImpl::StubValueTy &StubSym =
+    GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) :
+    MMIMachO.getGVStubEntry(MCSym);
+  if (StubSym.getPointer() == 0)
+    StubSym = MachineModuleInfoImpl::
+      StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+  return MCSym;
+}
+
+void ARMAsmPrinter::
+EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
+  int Size = TM.getTargetData()->getTypeAllocSize(MCPV->getType());
+
+  ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
+
+  MCSymbol *MCSym;
+  if (ACPV->isLSDA()) {
+    SmallString<128> Str;
+    raw_svector_ostream OS(Str);
+    OS << MAI->getPrivateGlobalPrefix() << "_LSDA_" << getFunctionNumber();
+    MCSym = OutContext.GetOrCreateSymbol(OS.str());
+  } else if (ACPV->isBlockAddress()) {
+    MCSym = GetBlockAddressSymbol(ACPV->getBlockAddress());
+  } else if (ACPV->isGlobalValue()) {
+    const GlobalValue *GV = ACPV->getGV();
+    MCSym = GetARMGVSymbol(GV);
+  } else {
+    assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
+    MCSym = GetExternalSymbolSymbol(ACPV->getSymbol());
+  }
+
+  // Create an MCSymbol for the reference.
+  const MCExpr *Expr =
+    MCSymbolRefExpr::Create(MCSym, getModifierVariantKind(ACPV->getModifier()),
+                            OutContext);
+
+  if (ACPV->getPCAdjustment()) {
+    MCSymbol *PCLabel = getPICLabel(MAI->getPrivateGlobalPrefix(),
+                                    getFunctionNumber(),
+                                    ACPV->getLabelId(),
+                                    OutContext);
+    const MCExpr *PCRelExpr = MCSymbolRefExpr::Create(PCLabel, OutContext);
+    PCRelExpr =
+      MCBinaryExpr::CreateAdd(PCRelExpr,
+                              MCConstantExpr::Create(ACPV->getPCAdjustment(),
+                                                     OutContext),
+                              OutContext);
+    if (ACPV->mustAddCurrentAddress()) {
+      // We want "(<expr> - .)", but MC doesn't have a concept of the '.'
+      // label, so just emit a local label end reference that instead.
+      MCSymbol *DotSym = OutContext.CreateTempSymbol();
+      OutStreamer.EmitLabel(DotSym);
+      const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+      PCRelExpr = MCBinaryExpr::CreateSub(PCRelExpr, DotExpr, OutContext);
+    }
+    Expr = MCBinaryExpr::CreateSub(Expr, PCRelExpr, OutContext);
+  }
+  OutStreamer.EmitValue(Expr, Size);
+}
+
+void ARMAsmPrinter::EmitJumpTable(const MachineInstr *MI) {
+  unsigned Opcode = MI->getOpcode();
+  int OpNum = 1;
+  if (Opcode == ARM::BR_JTadd)
+    OpNum = 2;
+  else if (Opcode == ARM::BR_JTm)
+    OpNum = 3;
+
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+  unsigned JTI = MO1.getIndex();
+
+  // Emit a label for the jump table.
+  MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+  OutStreamer.EmitLabel(JTISymbol);
+
+  // Emit each entry of the table.
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = JTBBs[i];
+    // Construct an MCExpr for the entry. We want a value of the form:
+    // (BasicBlockAddr - TableBeginAddr)
+    //
+    // For example, a table with entries jumping to basic blocks BB0 and BB1
+    // would look like:
+    // LJTI_0_0:
+    //    .word (LBB0 - LJTI_0_0)
+    //    .word (LBB1 - LJTI_0_0)
+    const MCExpr *Expr = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext);
+
+    if (TM.getRelocationModel() == Reloc::PIC_)
+      Expr = MCBinaryExpr::CreateSub(Expr, MCSymbolRefExpr::Create(JTISymbol,
+                                                                   OutContext),
+                                     OutContext);
+    OutStreamer.EmitValue(Expr, 4);
+  }
+}
+
+void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) {
+  unsigned Opcode = MI->getOpcode();
+  int OpNum = (Opcode == ARM::t2BR_JT) ? 2 : 1;
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1); // Unique Id
+  unsigned JTI = MO1.getIndex();
+
+  // Emit a label for the jump table.
+  MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel2(JTI, MO2.getImm());
+  OutStreamer.EmitLabel(JTISymbol);
+
+  // Emit each entry of the table.
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+  unsigned OffsetWidth = 4;
+  if (MI->getOpcode() == ARM::t2TBB_JT)
+    OffsetWidth = 1;
+  else if (MI->getOpcode() == ARM::t2TBH_JT)
+    OffsetWidth = 2;
+
+  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = JTBBs[i];
+    const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(),
+                                                      OutContext);
+    // If this isn't a TBB or TBH, the entries are direct branch instructions.
+    if (OffsetWidth == 4) {
+      MCInst BrInst;
+      BrInst.setOpcode(ARM::t2B);
+      BrInst.addOperand(MCOperand::CreateExpr(MBBSymbolExpr));
+      OutStreamer.EmitInstruction(BrInst);
+      continue;
+    }
+    // Otherwise it's an offset from the dispatch instruction. Construct an
+    // MCExpr for the entry. We want a value of the form:
+    // (BasicBlockAddr - TableBeginAddr) / 2
+    //
+    // For example, a TBB table with entries jumping to basic blocks BB0 and BB1
+    // would look like:
+    // LJTI_0_0:
+    //    .byte (LBB0 - LJTI_0_0) / 2
+    //    .byte (LBB1 - LJTI_0_0) / 2
+    const MCExpr *Expr =
+      MCBinaryExpr::CreateSub(MBBSymbolExpr,
+                              MCSymbolRefExpr::Create(JTISymbol, OutContext),
+                              OutContext);
+    Expr = MCBinaryExpr::CreateDiv(Expr, MCConstantExpr::Create(2, OutContext),
+                                   OutContext);
+    OutStreamer.EmitValue(Expr, OffsetWidth);
+  }
+}
+
+void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+                                           raw_ostream &OS) {
+  unsigned NOps = MI->getNumOperands();
+  assert(NOps==4);
+  OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+  // cast away const; DIetc do not take const operands for some reason.
+  DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+  OS << V.getName();
+  OS << " <- ";
+  // Frame address.  Currently handles register +- offset only.
+  assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+  OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS);
+  OS << ']';
+  OS << "+";
+  printOperand(MI, NOps-2, OS);
+}
+
+static void populateADROperands(MCInst &Inst, unsigned Dest,
+                                const MCSymbol *Label,
+                                unsigned pred, unsigned ccreg,
+                                MCContext &Ctx) {
+  const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, Ctx);
+  Inst.addOperand(MCOperand::CreateReg(Dest));
+  Inst.addOperand(MCOperand::CreateExpr(SymbolExpr));
+  // Add predicate operands.
+  Inst.addOperand(MCOperand::CreateImm(pred));
+  Inst.addOperand(MCOperand::CreateReg(ccreg));
+}
+
+void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI,
+                                           unsigned Opcode) {
+  MCInst TmpInst;
+
+  // Emit the instruction as usual, just patch the opcode.
+  LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+  TmpInst.setOpcode(Opcode);
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
+  assert(MI->getFlag(MachineInstr::FrameSetup) &&
+      "Only instruction which are involved into frame setup code are allowed");
+
+  const MachineFunction &MF = *MI->getParent()->getParent();
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  const ARMFunctionInfo &AFI = *MF.getInfo<ARMFunctionInfo>();
+
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+  unsigned Opc = MI->getOpcode();
+  unsigned SrcReg, DstReg;
+
+  if (Opc == ARM::tPUSH || Opc == ARM::tLDRpci) {
+    // Two special cases:
+    // 1) tPUSH does not have src/dst regs.
+    // 2) for Thumb1 code we sometimes materialize the constant via constpool
+    // load. Yes, this is pretty fragile, but for now I don't see better
+    // way... :(
+    SrcReg = DstReg = ARM::SP;
+  } else {
+    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI->getOperand(0).getReg();
+  }
+
+  // Try to figure out the unwinding opcode out of src / dst regs.
+  if (MI->getDesc().mayStore()) {
+    // Register saves.
+    assert(DstReg == ARM::SP &&
+           "Only stack pointer as a destination reg is supported");
+
+    SmallVector<unsigned, 4> RegList;
+    // Skip src & dst reg, and pred ops.
+    unsigned StartOp = 2 + 2;
+    // Use all the operands.
+    unsigned NumOffset = 0;
+
+    switch (Opc) {
+    default:
+      MI->dump();
+      assert(0 && "Unsupported opcode for unwinding information");
+    case ARM::tPUSH:
+      // Special case here: no src & dst reg, but two extra imp ops.
+      StartOp = 2; NumOffset = 2;
+    case ARM::STMDB_UPD:
+    case ARM::t2STMDB_UPD:
+    case ARM::VSTMDDB_UPD:
+      assert(SrcReg == ARM::SP &&
+             "Only stack pointer as a source reg is supported");
+      for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset;
+           i != NumOps; ++i)
+        RegList.push_back(MI->getOperand(i).getReg());
+      break;
+    case ARM::STR_PRE:
+      assert(MI->getOperand(2).getReg() == ARM::SP &&
+             "Only stack pointer as a source reg is supported");
+      RegList.push_back(SrcReg);
+      break;
+    }
+    OutStreamer.EmitRegSave(RegList, Opc == ARM::VSTMDDB_UPD);
+  } else {
+    // Changes of stack / frame pointer.
+    if (SrcReg == ARM::SP) {
+      int64_t Offset = 0;
+      switch (Opc) {
+      default:
+        MI->dump();
+        assert(0 && "Unsupported opcode for unwinding information");
+      case ARM::MOVr:
+      case ARM::tMOVgpr2gpr:
+      case ARM::tMOVgpr2tgpr:
+        Offset = 0;
+        break;
+      case ARM::ADDri:
+        Offset = -MI->getOperand(2).getImm();
+        break;
+      case ARM::SUBri:
+      case ARM::t2SUBrSPi:
+        Offset =  MI->getOperand(2).getImm();
+        break;
+      case ARM::tSUBspi:
+        Offset =  MI->getOperand(2).getImm()*4;
+        break;
+      case ARM::tADDspi:
+      case ARM::tADDrSPi:
+        Offset = -MI->getOperand(2).getImm()*4;
+        break;
+      case ARM::tLDRpci: {
+        // Grab the constpool index and check, whether it corresponds to
+        // original or cloned constpool entry.
+        unsigned CPI = MI->getOperand(1).getIndex();
+        const MachineConstantPool *MCP = MF.getConstantPool();
+        if (CPI >= MCP->getConstants().size())
+          CPI = AFI.getOriginalCPIdx(CPI);
+        assert(CPI != -1U && "Invalid constpool index");
+
+        // Derive the actual offset.
+        const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+        assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry");
+        // FIXME: Check for user, it should be "add" instruction!
+        Offset = -cast<ConstantInt>(CPE.Val.ConstVal)->getSExtValue();
+        break;
+      }
+      }
+
+      if (DstReg == FramePtr && FramePtr != ARM::SP)
+        // Set-up of the frame pointer. Positive values correspond to "add"
+        // instruction.
+        OutStreamer.EmitSetFP(FramePtr, ARM::SP, -Offset);
+      else if (DstReg == ARM::SP) {
+        // Change of SP by an offset. Positive values correspond to "sub"
+        // instruction.
+        OutStreamer.EmitPad(Offset);
+      } else {
+        MI->dump();
+        assert(0 && "Unsupported opcode for unwinding information");
+      }
+    } else if (DstReg == ARM::SP) {
+      // FIXME: .movsp goes here
+      MI->dump();
+      assert(0 && "Unsupported opcode for unwinding information");
+    }
+    else {
+      MI->dump();
+      assert(0 && "Unsupported opcode for unwinding information");
+    }
+  }
+}
+
+extern cl::opt<bool> EnableARMEHABI;
+
+void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+  default: break;
+  case ARM::t2ADDrSPi:
+  case ARM::t2ADDrSPi12:
+  case ARM::t2SUBrSPi:
+  case ARM::t2SUBrSPi12:
+    assert ((MI->getOperand(1).getReg() == ARM::SP) &&
+            "Unexpected source register!");
+    break;
+
+  case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass");
+  case ARM::DBG_VALUE: {
+    if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+      SmallString<128> TmpStr;
+      raw_svector_ostream OS(TmpStr);
+      PrintDebugValueComment(MI, OS);
+      OutStreamer.EmitRawText(StringRef(OS.str()));
+    }
+    return;
+  }
+  case ARM::tBfar: {
+    MCInst TmpInst;
+    TmpInst.setOpcode(ARM::tBL);
+    TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+          MI->getOperand(0).getMBB()->getSymbol(), OutContext)));
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::LEApcrel:
+  case ARM::tLEApcrel:
+  case ARM::t2LEApcrel: {
+    // FIXME: Need to also handle globals and externals
+    MCInst TmpInst;
+    TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrel ? ARM::t2ADR
+                      : (MI->getOpcode() == ARM::tLEApcrel ? ARM::tADR
+                         : ARM::ADR));
+    populateADROperands(TmpInst, MI->getOperand(0).getReg(),
+                        GetCPISymbol(MI->getOperand(1).getIndex()),
+                        MI->getOperand(2).getImm(), MI->getOperand(3).getReg(),
+                        OutContext);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::LEApcrelJT:
+  case ARM::tLEApcrelJT:
+  case ARM::t2LEApcrelJT: {
+    MCInst TmpInst;
+    TmpInst.setOpcode(MI->getOpcode() == ARM::t2LEApcrelJT ? ARM::t2ADR
+                      : (MI->getOpcode() == ARM::tLEApcrelJT ? ARM::tADR
+                         : ARM::ADR));
+    populateADROperands(TmpInst, MI->getOperand(0).getReg(),
+                      GetARMJTIPICJumpTableLabel2(MI->getOperand(1).getIndex(),
+                                                  MI->getOperand(2).getImm()),
+                      MI->getOperand(3).getImm(), MI->getOperand(4).getReg(),
+                      OutContext);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::MOVPCRX: {
+    MCInst TmpInst;
+    TmpInst.setOpcode(ARM::MOVr);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    // Add 's' bit operand (always reg0 for this)
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::BXr9_CALL:
+  case ARM::BX_CALL: {
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // Add 's' bit operand (always reg0 for this)
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::BX);
+      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    return;
+  }
+  case ARM::BMOVPCRXr9_CALL:
+  case ARM::BMOVPCRX_CALL: {
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // Add 's' bit operand (always reg0 for this)
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // Add 's' bit operand (always reg0 for this)
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    return;
+  }
+  case ARM::MOVi16_ga_pcrel:
+  case ARM::t2MOVi16_ga_pcrel: {
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opc == ARM::MOVi16_ga_pcrel? ARM::MOVi16 : ARM::t2MOVi16);
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+
+    unsigned TF = MI->getOperand(1).getTargetFlags();
+    bool isPIC = TF == ARMII::MO_LO16_NONLAZY_PIC;
+    const GlobalValue *GV = MI->getOperand(1).getGlobal();
+    MCSymbol *GVSym = GetARMGVSymbol(GV);
+    const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+    if (isPIC) {
+      MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(),
+                                       getFunctionNumber(),
+                                       MI->getOperand(2).getImm(), OutContext);
+      const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+      unsigned PCAdj = (Opc == ARM::MOVi16_ga_pcrel) ? 8 : 4;
+      const MCExpr *PCRelExpr =
+        ARMMCExpr::CreateLower16(MCBinaryExpr::CreateSub(GVSymExpr,
+                                  MCBinaryExpr::CreateAdd(LabelSymExpr,
+                                      MCConstantExpr::Create(PCAdj, OutContext),
+                                          OutContext), OutContext), OutContext);
+      TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+    } else {
+      const MCExpr *RefExpr= ARMMCExpr::CreateLower16(GVSymExpr, OutContext);
+      TmpInst.addOperand(MCOperand::CreateExpr(RefExpr));
+    }
+
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    // Add 's' bit operand (always reg0 for this)
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::MOVTi16_ga_pcrel:
+  case ARM::t2MOVTi16_ga_pcrel: {
+    MCInst TmpInst;
+    TmpInst.setOpcode(Opc == ARM::MOVTi16_ga_pcrel
+                      ? ARM::MOVTi16 : ARM::t2MOVTi16);
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+
+    unsigned TF = MI->getOperand(2).getTargetFlags();
+    bool isPIC = TF == ARMII::MO_HI16_NONLAZY_PIC;
+    const GlobalValue *GV = MI->getOperand(2).getGlobal();
+    MCSymbol *GVSym = GetARMGVSymbol(GV);
+    const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext);
+    if (isPIC) {
+      MCSymbol *LabelSym = getPICLabel(MAI->getPrivateGlobalPrefix(),
+                                       getFunctionNumber(),
+                                       MI->getOperand(3).getImm(), OutContext);
+      const MCExpr *LabelSymExpr= MCSymbolRefExpr::Create(LabelSym, OutContext);
+      unsigned PCAdj = (Opc == ARM::MOVTi16_ga_pcrel) ? 8 : 4;
+      const MCExpr *PCRelExpr =
+        ARMMCExpr::CreateUpper16(MCBinaryExpr::CreateSub(GVSymExpr,
+                                   MCBinaryExpr::CreateAdd(LabelSymExpr,
+                                      MCConstantExpr::Create(PCAdj, OutContext),
+                                          OutContext), OutContext), OutContext);
+      TmpInst.addOperand(MCOperand::CreateExpr(PCRelExpr));
+    } else {
+      const MCExpr *RefExpr= ARMMCExpr::CreateUpper16(GVSymExpr, OutContext);
+      TmpInst.addOperand(MCOperand::CreateExpr(RefExpr));
+    }
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    // Add 's' bit operand (always reg0 for this)
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  case ARM::tPICADD: {
+    // This is a pseudo op for a label + instruction sequence, which looks like:
+    // LPC0:
+    //     add r0, pc
+    // This adds the address of LPC0 to r0.
+
+    // Emit the label.
+    OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+                          getFunctionNumber(), MI->getOperand(2).getImm(),
+                          OutContext));
+
+    // Form and emit the add.
+    MCInst AddInst;
+    AddInst.setOpcode(ARM::tADDhirr);
+    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    // Add predicate operands.
+    AddInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    AddInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(AddInst);
+    return;
+  }
+  case ARM::PICADD: {
+    // This is a pseudo op for a label + instruction sequence, which looks like:
+    // LPC0:
+    //     add r0, pc, r0
+    // This adds the address of LPC0 to r0.
+
+    // Emit the label.
+    OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+                          getFunctionNumber(), MI->getOperand(2).getImm(),
+                          OutContext));
+
+    // Form and emit the add.
+    MCInst AddInst;
+    AddInst.setOpcode(ARM::ADDrr);
+    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    AddInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+    // Add predicate operands.
+    AddInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
+    AddInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
+    // Add 's' bit operand (always reg0 for this)
+    AddInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(AddInst);
+    return;
+  }
+  case ARM::PICSTR:
+  case ARM::PICSTRB:
+  case ARM::PICSTRH:
+  case ARM::PICLDR:
+  case ARM::PICLDRB:
+  case ARM::PICLDRH:
+  case ARM::PICLDRSB:
+  case ARM::PICLDRSH: {
+    // This is a pseudo op for a label + instruction sequence, which looks like:
+    // LPC0:
+    //     OP r0, [pc, r0]
+    // The LCP0 label is referenced by a constant pool entry in order to get
+    // a PC-relative address at the ldr instruction.
+
+    // Emit the label.
+    OutStreamer.EmitLabel(getPICLabel(MAI->getPrivateGlobalPrefix(),
+                          getFunctionNumber(), MI->getOperand(2).getImm(),
+                          OutContext));
+
+    // Form and emit the load
+    unsigned Opcode;
+    switch (MI->getOpcode()) {
+    default:
+      llvm_unreachable("Unexpected opcode!");
+    case ARM::PICSTR:   Opcode = ARM::STRrs; break;
+    case ARM::PICSTRB:  Opcode = ARM::STRBrs; break;
+    case ARM::PICSTRH:  Opcode = ARM::STRH; break;
+    case ARM::PICLDR:   Opcode = ARM::LDRrs; break;
+    case ARM::PICLDRB:  Opcode = ARM::LDRBrs; break;
+    case ARM::PICLDRH:  Opcode = ARM::LDRH; break;
+    case ARM::PICLDRSB: Opcode = ARM::LDRSB; break;
+    case ARM::PICLDRSH: Opcode = ARM::LDRSH; break;
+    }
+    MCInst LdStInst;
+    LdStInst.setOpcode(Opcode);
+    LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    LdStInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+    LdStInst.addOperand(MCOperand::CreateImm(0));
+    // Add predicate operands.
+    LdStInst.addOperand(MCOperand::CreateImm(MI->getOperand(3).getImm()));
+    LdStInst.addOperand(MCOperand::CreateReg(MI->getOperand(4).getReg()));
+    OutStreamer.EmitInstruction(LdStInst);
+
+    return;
+  }
+  case ARM::CONSTPOOL_ENTRY: {
+    /// CONSTPOOL_ENTRY - This instruction represents a floating constant pool
+    /// in the function.  The first operand is the ID# for this instruction, the
+    /// second is the index into the MachineConstantPool that this is, the third
+    /// is the size in bytes of this constant pool entry.
+    unsigned LabelId = (unsigned)MI->getOperand(0).getImm();
+    unsigned CPIdx   = (unsigned)MI->getOperand(1).getIndex();
+
+    EmitAlignment(2);
+    OutStreamer.EmitLabel(GetCPISymbol(LabelId));
+
+    const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPIdx];
+    if (MCPE.isMachineConstantPoolEntry())
+      EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
+    else
+      EmitGlobalConstant(MCPE.Val.ConstVal);
+
+    return;
+  }
+  case ARM::t2BR_JT: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    MCInst TmpInst;
+    TmpInst.setOpcode(ARM::tMOVgpr2gpr);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    // Output the data for the jump table itself
+    EmitJump2Table(MI);
+    return;
+  }
+  case ARM::t2TBB_JT: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    MCInst TmpInst;
+
+    TmpInst.setOpcode(ARM::t2TBB);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    // Output the data for the jump table itself
+    EmitJump2Table(MI);
+    // Make sure the next instruction is 2-byte aligned.
+    EmitAlignment(1);
+    return;
+  }
+  case ARM::t2TBH_JT: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    MCInst TmpInst;
+
+    TmpInst.setOpcode(ARM::t2TBH);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+    // Output the data for the jump table itself
+    EmitJump2Table(MI);
+    return;
+  }
+  case ARM::tBR_JTr:
+  case ARM::BR_JTr: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    // mov pc, target
+    MCInst TmpInst;
+    unsigned Opc = MI->getOpcode() == ARM::BR_JTr ?
+      ARM::MOVr : ARM::tMOVgpr2gpr;
+    TmpInst.setOpcode(Opc);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    // Add 's' bit operand (always reg0 for this)
+    if (Opc == ARM::MOVr)
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+
+    // Make sure the Thumb jump table is 4-byte aligned.
+    if (Opc == ARM::tMOVgpr2gpr)
+      EmitAlignment(2);
+
+    // Output the data for the jump table itself
+    EmitJumpTable(MI);
+    return;
+  }
+  case ARM::BR_JTm: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    // ldr pc, target
+    MCInst TmpInst;
+    if (MI->getOperand(1).getReg() == 0) {
+      // literal offset
+      TmpInst.setOpcode(ARM::LDRi12);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+      TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
+    } else {
+      TmpInst.setOpcode(ARM::LDRrs);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+      TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+      TmpInst.addOperand(MCOperand::CreateImm(0));
+    }
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+
+    // Output the data for the jump table itself
+    EmitJumpTable(MI);
+    return;
+  }
+  case ARM::BR_JTadd: {
+    // Lower and emit the instruction itself, then the jump table following it.
+    // add pc, target, idx
+    MCInst TmpInst;
+    TmpInst.setOpcode(ARM::ADDrr);
+    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+    // Add predicate operands.
+    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    // Add 's' bit operand (always reg0 for this)
+    TmpInst.addOperand(MCOperand::CreateReg(0));
+    OutStreamer.EmitInstruction(TmpInst);
+
+    // Output the data for the jump table itself
+    EmitJumpTable(MI);
+    return;
+  }
+  case ARM::TRAP: {
+    // Non-Darwin binutils don't yet support the "trap" mnemonic.
+    // FIXME: Remove this special case when they do.
+    if (!Subtarget->isTargetDarwin()) {
+      //.long 0xe7ffdefe @ trap
+      uint32_t Val = 0xe7ffdefeUL;
+      OutStreamer.AddComment("trap");
+      OutStreamer.EmitIntValue(Val, 4);
+      return;
+    }
+    break;
+  }
+  case ARM::tTRAP: {
+    // Non-Darwin binutils don't yet support the "trap" mnemonic.
+    // FIXME: Remove this special case when they do.
+    if (!Subtarget->isTargetDarwin()) {
+      //.short 57086 @ trap
+      uint16_t Val = 0xdefe;
+      OutStreamer.AddComment("trap");
+      OutStreamer.EmitIntValue(Val, 2);
+      return;
+    }
+    break;
+  }
+  case ARM::t2Int_eh_sjlj_setjmp:
+  case ARM::t2Int_eh_sjlj_setjmp_nofp:
+  case ARM::tInt_eh_sjlj_setjmp: {
+    // Two incoming args: GPR:$src, GPR:$val
+    // mov $val, pc
+    // adds $val, #7
+    // str $val, [$src, #4]
+    // movs r0, #0
+    // b 1f
+    // movs r0, #1
+    // 1:
+    unsigned SrcReg = MI->getOperand(0).getReg();
+    unsigned ValReg = MI->getOperand(1).getReg();
+    MCSymbol *Label = GetARMSJLJEHLabel();
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tMOVgpr2tgpr);
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      // 's' bit operand
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+      OutStreamer.AddComment("eh_setjmp begin");
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tADDi3);
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      // 's' bit operand
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      TmpInst.addOperand(MCOperand::CreateImm(7));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tSTRi);
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      // The offset immediate is #4. The operand value is scaled by 4 for the
+      // tSTR instruction.
+      TmpInst.addOperand(MCOperand::CreateImm(1));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tMOVi8);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+      TmpInst.addOperand(MCOperand::CreateImm(0));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      const MCExpr *SymbolExpr = MCSymbolRefExpr::Create(Label, OutContext);
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tB);
+      TmpInst.addOperand(MCOperand::CreateExpr(SymbolExpr));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tMOVi8);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+      TmpInst.addOperand(MCOperand::CreateImm(1));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.AddComment("eh_setjmp end");
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    OutStreamer.EmitLabel(Label);
+    return;
+  }
+
+  case ARM::Int_eh_sjlj_setjmp_nofp:
+  case ARM::Int_eh_sjlj_setjmp: {
+    // Two incoming args: GPR:$src, GPR:$val
+    // add $val, pc, #8
+    // str $val, [$src, #+4]
+    // mov r0, #0
+    // add pc, pc, #0
+    // mov r0, #1
+    unsigned SrcReg = MI->getOperand(0).getReg();
+    unsigned ValReg = MI->getOperand(1).getReg();
+
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::ADDri);
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateImm(8));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // 's' bit operand (always reg0 for this).
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.AddComment("eh_setjmp begin");
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::STRi12);
+      TmpInst.addOperand(MCOperand::CreateReg(ValReg));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateImm(4));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVi);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+      TmpInst.addOperand(MCOperand::CreateImm(0));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // 's' bit operand (always reg0 for this).
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::ADDri);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      TmpInst.addOperand(MCOperand::CreateImm(0));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // 's' bit operand (always reg0 for this).
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::MOVi);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R0));
+      TmpInst.addOperand(MCOperand::CreateImm(1));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // 's' bit operand (always reg0 for this).
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.AddComment("eh_setjmp end");
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    return;
+  }
+  case ARM::Int_eh_sjlj_longjmp: {
+    // ldr sp, [$src, #8]
+    // ldr $scratch, [$src, #4]
+    // ldr r7, [$src]
+    // bx $scratch
+    unsigned SrcReg = MI->getOperand(0).getReg();
+    unsigned ScratchReg = MI->getOperand(1).getReg();
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::LDRi12);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateImm(8));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::LDRi12);
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateImm(4));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::LDRi12);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateImm(0));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::BX);
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    return;
+  }
+  case ARM::tInt_eh_sjlj_longjmp: {
+    // ldr $scratch, [$src, #8]
+    // mov sp, $scratch
+    // ldr $scratch, [$src, #4]
+    // ldr r7, [$src]
+    // bx $scratch
+    unsigned SrcReg = MI->getOperand(0).getReg();
+    unsigned ScratchReg = MI->getOperand(1).getReg();
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tLDRi);
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      // The offset immediate is #8. The operand value is scaled by 4 for the
+      // tLDR instruction.
+      TmpInst.addOperand(MCOperand::CreateImm(2));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tMOVtgpr2gpr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tLDRi);
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateImm(1));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tLDRr);
+      TmpInst.addOperand(MCOperand::CreateReg(ARM::R7));
+      TmpInst.addOperand(MCOperand::CreateReg(SrcReg));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    {
+      MCInst TmpInst;
+      TmpInst.setOpcode(ARM::tBX_RET_vararg);
+      TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
+      OutStreamer.EmitInstruction(TmpInst);
+    }
+    return;
+  }
+  // These are the pseudos created to comply with stricter operand restrictions
+  // on ARMv5. Lower them now to "normal" instructions, since all the
+  // restrictions are already satisfied.
+  case ARM::MULv5:
+    EmitPatchedInstruction(MI, ARM::MUL);
+    return;
+  case ARM::MLAv5:
+    EmitPatchedInstruction(MI, ARM::MLA);
+    return;
+  case ARM::SMULLv5:
+    EmitPatchedInstruction(MI, ARM::SMULL);
+    return;
+  case ARM::UMULLv5:
+    EmitPatchedInstruction(MI, ARM::UMULL);
+    return;
+  case ARM::SMLALv5:
+    EmitPatchedInstruction(MI, ARM::SMLAL);
+    return;
+  case ARM::UMLALv5:
+    EmitPatchedInstruction(MI, ARM::UMLAL);
+    return;
+  case ARM::UMAALv5:
+    EmitPatchedInstruction(MI, ARM::UMAAL);
+    return;
+  }
+
+  MCInst TmpInst;
+  LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
+
+  // Emit unwinding stuff for frame-related instructions
+  if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup))
+    EmitUnwindingInstruction(MI);
+
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+//===----------------------------------------------------------------------===//
+// Target Registry Stuff
+//===----------------------------------------------------------------------===//
+
+static MCInstPrinter *createARMMCInstPrinter(const Target &T,
+                                             unsigned SyntaxVariant,
+                                             const MCAsmInfo &MAI) {
+  if (SyntaxVariant == 0)
+    return new ARMInstPrinter(MAI);
+  return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMAsmPrinter() {
+  RegisterAsmPrinter<ARMAsmPrinter> X(TheARMTarget);
+  RegisterAsmPrinter<ARMAsmPrinter> Y(TheThumbTarget);
+
+  TargetRegistry::RegisterMCInstPrinter(TheARMTarget, createARMMCInstPrinter);
+  TargetRegistry::RegisterMCInstPrinter(TheThumbTarget, createARMMCInstPrinter);
+}
+
diff --git a/final/lib/Target/ARM/ARMAsmPrinter.h b/final/lib/Target/ARM/ARMAsmPrinter.h
new file mode 100644
index 00000000000..9db139b672c
--- /dev/null
+++ b/final/lib/Target/ARM/ARMAsmPrinter.h
@@ -0,0 +1,114 @@
+//===-- ARMAsmPrinter.h - Print machine code to an ARM .s file ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// ARM Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMASMPRINTER_H
+#define ARMASMPRINTER_H
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+namespace ARM {
+  enum DW_ISA {
+    DW_ISA_ARM_thumb = 1,
+    DW_ISA_ARM_arm = 2
+  };
+}
+
+class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
+
+  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+  /// make the right decision when printing asm code for different targets.
+  const ARMSubtarget *Subtarget;
+
+  /// AFI - Keep a pointer to ARMFunctionInfo for the current
+  /// MachineFunction.
+  ARMFunctionInfo *AFI;
+
+  /// MCP - Keep a pointer to constantpool entries of the current
+  /// MachineFunction.
+  const MachineConstantPool *MCP;
+
+public:
+  explicit ARMAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer), AFI(NULL), MCP(NULL) {
+      Subtarget = &TM.getSubtarget<ARMSubtarget>();
+    }
+
+  virtual const char *getPassName() const {
+    return "ARM Assembly Printer";
+  }
+
+  void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+                    const char *Modifier = 0);
+
+  virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &O);
+  virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+                                     unsigned AsmVariant,
+                                     const char *ExtraCode, raw_ostream &O);
+
+  void EmitJumpTable(const MachineInstr *MI);
+  void EmitJump2Table(const MachineInstr *MI);
+  virtual void EmitInstruction(const MachineInstr *MI);
+  bool runOnMachineFunction(MachineFunction &F);
+
+  virtual void EmitConstantPool() {} // we emit constant pools customly!
+  virtual void EmitFunctionEntryLabel();
+  void EmitStartOfAsmFile(Module &M);
+  void EmitEndOfAsmFile(Module &M);
+
+private:
+  // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
+  void emitAttributes();
+
+  // Helper for ELF .o only
+  void emitARMAttributeSection();
+
+  // Generic helper used to emit e.g. ARMv5 mul pseudos
+  void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc);
+
+  void EmitUnwindingInstruction(const MachineInstr *MI);
+
+public:
+  void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+  MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+
+  virtual unsigned getISAEncoding() {
+    // ARM/Darwin adds ISA to the DWARF info for each function.
+    if (!Subtarget->isTargetDarwin())
+      return 0;
+    return Subtarget->isThumb() ?
+      llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
+  }
+
+  MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
+                                        const MachineBasicBlock *MBB) const;
+  MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
+
+  MCSymbol *GetARMSJLJEHLabel(void) const;
+
+  MCSymbol *GetARMGVSymbol(const GlobalValue *GV);
+  
+  /// EmitMachineConstantPoolValue - Print a machine constantpool value to
+  /// the .s file.
+  virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
+};
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/ARM/ARMBaseInfo.h b/final/lib/Target/ARM/ARMBaseInfo.h
new file mode 100644
index 00000000000..a56cc1a9f24
--- /dev/null
+++ b/final/lib/Target/ARM/ARMBaseInfo.h
@@ -0,0 +1,249 @@
+//===-- ARMBaseInfo.h - Top level definitions for ARM -------- --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains small standalone helper functions and enum definitions for
+// the ARM target useful for the compiler back-end and the MC libraries.
+// As such, it deliberately does not include references to LLVM core
+// code gen types, passes, etc..
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEINFO_H
+#define ARMBASEINFO_H
+
+#include "llvm/Support/ErrorHandling.h"
+
+// Note that the following auto-generated files only defined enum types, and
+// so are safe to include here.
+
+// Defines symbolic names for ARM registers.  This defines a mapping from
+// register name to register number.
+//
+#include "ARMGenRegisterNames.inc"
+
+// Defines symbolic names for the ARM instructions.
+//
+#include "ARMGenInstrNames.inc"
+
+namespace llvm {
+
+// Enums corresponding to ARM condition codes
+namespace ARMCC {
+  // The CondCodes constants map directly to the 4-bit encoding of the
+  // condition field for predicated instructions.
+  enum CondCodes { // Meaning (integer)          Meaning (floating-point)
+    EQ,            // Equal                      Equal
+    NE,            // Not equal                  Not equal, or unordered
+    HS,            // Carry set                  >, ==, or unordered
+    LO,            // Carry clear                Less than
+    MI,            // Minus, negative            Less than
+    PL,            // Plus, positive or zero     >, ==, or unordered
+    VS,            // Overflow                   Unordered
+    VC,            // No overflow                Not unordered
+    HI,            // Unsigned higher            Greater than, or unordered
+    LS,            // Unsigned lower or same     Less than or equal
+    GE,            // Greater than or equal      Greater than or equal
+    LT,            // Less than                  Less than, or unordered
+    GT,            // Greater than               Greater than
+    LE,            // Less than or equal         <, ==, or unordered
+    AL             // Always (unconditional)     Always (unconditional)
+  };
+
+  inline static CondCodes getOppositeCondition(CondCodes CC) {
+    switch (CC) {
+    default: llvm_unreachable("Unknown condition code");
+    case EQ: return NE;
+    case NE: return EQ;
+    case HS: return LO;
+    case LO: return HS;
+    case MI: return PL;
+    case PL: return MI;
+    case VS: return VC;
+    case VC: return VS;
+    case HI: return LS;
+    case LS: return HI;
+    case GE: return LT;
+    case LT: return GE;
+    case GT: return LE;
+    case LE: return GT;
+    }
+  }
+} // namespace ARMCC
+
+inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
+  switch (CC) {
+  default: llvm_unreachable("Unknown condition code");
+  case ARMCC::EQ:  return "eq";
+  case ARMCC::NE:  return "ne";
+  case ARMCC::HS:  return "hs";
+  case ARMCC::LO:  return "lo";
+  case ARMCC::MI:  return "mi";
+  case ARMCC::PL:  return "pl";
+  case ARMCC::VS:  return "vs";
+  case ARMCC::VC:  return "vc";
+  case ARMCC::HI:  return "hi";
+  case ARMCC::LS:  return "ls";
+  case ARMCC::GE:  return "ge";
+  case ARMCC::LT:  return "lt";
+  case ARMCC::GT:  return "gt";
+  case ARMCC::LE:  return "le";
+  case ARMCC::AL:  return "al";
+  }
+}
+
+namespace ARM_PROC {
+  enum IMod {
+    IE = 2,
+    ID = 3
+  };
+
+  enum IFlags {
+    F = 1,
+    I = 2,
+    A = 4
+  };
+
+  inline static const char *IFlagsToString(unsigned val) {
+    switch (val) {
+    default: llvm_unreachable("Unknown iflags operand");
+    case F: return "f";
+    case I: return "i";
+    case A: return "a";
+    }
+  }
+
+  inline static const char *IModToString(unsigned val) {
+    switch (val) {
+    default: llvm_unreachable("Unknown imod operand");
+    case IE: return "ie";
+    case ID: return "id";
+    }
+  }
+}
+
+namespace ARM_MB {
+  // The Memory Barrier Option constants map directly to the 4-bit encoding of
+  // the option field for memory barrier operations.
+  enum MemBOpt {
+    SY    = 15,
+    ST    = 14,
+    ISH   = 11,
+    ISHST = 10,
+    NSH   = 7,
+    NSHST = 6,
+    OSH   = 3,
+    OSHST = 2
+  };
+
+  inline static const char *MemBOptToString(unsigned val) {
+    switch (val) {
+    default: llvm_unreachable("Unknown memory operation");
+    case SY:    return "sy";
+    case ST:    return "st";
+    case ISH:   return "ish";
+    case ISHST: return "ishst";
+    case NSH:   return "nsh";
+    case NSHST: return "nshst";
+    case OSH:   return "osh";
+    case OSHST: return "oshst";
+    }
+  }
+} // namespace ARM_MB
+
+/// getARMRegisterNumbering - Given the enum value for some register, e.g.
+/// ARM::LR, return the number that it corresponds to (e.g. 14).
+inline static unsigned getARMRegisterNumbering(unsigned Reg) {
+  using namespace ARM;
+  switch (Reg) {
+  default:
+    llvm_unreachable("Unknown ARM register!");
+  case R0:  case S0:  case D0:  case Q0:  return 0;
+  case R1:  case S1:  case D1:  case Q1:  return 1;
+  case R2:  case S2:  case D2:  case Q2:  return 2;
+  case R3:  case S3:  case D3:  case Q3:  return 3;
+  case R4:  case S4:  case D4:  case Q4:  return 4;
+  case R5:  case S5:  case D5:  case Q5:  return 5;
+  case R6:  case S6:  case D6:  case Q6:  return 6;
+  case R7:  case S7:  case D7:  case Q7:  return 7;
+  case R8:  case S8:  case D8:  case Q8:  return 8;
+  case R9:  case S9:  case D9:  case Q9:  return 9;
+  case R10: case S10: case D10: case Q10: return 10;
+  case R11: case S11: case D11: case Q11: return 11;
+  case R12: case S12: case D12: case Q12: return 12;
+  case SP:  case S13: case D13: case Q13: return 13;
+  case LR:  case S14: case D14: case Q14: return 14;
+  case PC:  case S15: case D15: case Q15: return 15;
+
+  case S16: case D16: return 16;
+  case S17: case D17: return 17;
+  case S18: case D18: return 18;
+  case S19: case D19: return 19;
+  case S20: case D20: return 20;
+  case S21: case D21: return 21;
+  case S22: case D22: return 22;
+  case S23: case D23: return 23;
+  case S24: case D24: return 24;
+  case S25: case D25: return 25;
+  case S26: case D26: return 26;
+  case S27: case D27: return 27;
+  case S28: case D28: return 28;
+  case S29: case D29: return 29;
+  case S30: case D30: return 30;
+  case S31: case D31: return 31;
+  }
+}
+
+namespace ARMII {
+  /// Target Operand Flag enum.
+  enum TOF {
+    //===------------------------------------------------------------------===//
+    // ARM Specific MachineOperand flags.
+
+    MO_NO_FLAG,
+
+    /// MO_LO16 - On a symbol operand, this represents a relocation containing
+    /// lower 16 bit of the address. Used only via movw instruction.
+    MO_LO16,
+
+    /// MO_HI16 - On a symbol operand, this represents a relocation containing
+    /// higher 16 bit of the address. Used only via movt instruction.
+    MO_HI16,
+
+    /// MO_LO16_NONLAZY - On a symbol operand "FOO", this represents a
+    /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol,
+    /// i.e. "FOO$non_lazy_ptr".
+    /// Used only via movw instruction.
+    MO_LO16_NONLAZY,
+
+    /// MO_HI16_NONLAZY - On a symbol operand "FOO", this represents a
+    /// relocation containing lower 16 bit of the non-lazy-ptr indirect symbol,
+    /// i.e. "FOO$non_lazy_ptr". Used only via movt instruction.
+    MO_HI16_NONLAZY,
+
+    /// MO_LO16_NONLAZY_PIC - On a symbol operand "FOO", this represents a
+    /// relocation containing lower 16 bit of the PC relative address of the
+    /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL".
+    /// Used only via movw instruction.
+    MO_LO16_NONLAZY_PIC,
+
+    /// MO_HI16_NONLAZY_PIC - On a symbol operand "FOO", this represents a
+    /// relocation containing lower 16 bit of the PC relative address of the
+    /// non-lazy-ptr indirect symbol, i.e. "FOO$non_lazy_ptr - LABEL".
+    /// Used only via movt instruction.
+    MO_HI16_NONLAZY_PIC,
+
+    /// MO_PLT - On a symbol operand, this represents an ELF PLT reference on a
+    /// call operand.
+    MO_PLT
+  };
+} // end namespace ARMII
+
+} // end namespace llvm;
+
+#endif
diff --git a/final/lib/Target/ARM/ARMBaseInstrInfo.cpp b/final/lib/Target/ARM/ARMBaseInstrInfo.cpp
new file mode 100644
index 00000000000..012eb280b06
--- /dev/null
+++ b/final/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -0,0 +1,2331 @@
+//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Base ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMBaseInstrInfo.h"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMHazardRecognizer.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMGenInstrInfo.inc"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+static cl::opt<bool>
+EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
+               cl::desc("Enable ARM 2-addr to 3-addr conv"));
+
+/// ARM_MLxEntry - Record information about MLA / MLS instructions.
+struct ARM_MLxEntry {
+  unsigned MLxOpc;     // MLA / MLS opcode
+  unsigned MulOpc;     // Expanded multiplication opcode
+  unsigned AddSubOpc;  // Expanded add / sub opcode
+  bool NegAcc;         // True if the acc is negated before the add / sub.
+  bool HasLane;        // True if instruction has an extra "lane" operand.
+};
+
+static const ARM_MLxEntry ARM_MLxTable[] = {
+  // MLxOpc,          MulOpc,           AddSubOpc,       NegAcc, HasLane
+  // fp scalar ops
+  { ARM::VMLAS,       ARM::VMULS,       ARM::VADDS,      false,  false },
+  { ARM::VMLSS,       ARM::VMULS,       ARM::VSUBS,      false,  false },
+  { ARM::VMLAD,       ARM::VMULD,       ARM::VADDD,      false,  false },
+  { ARM::VMLSD,       ARM::VMULD,       ARM::VSUBD,      false,  false },
+  { ARM::VNMLAS,      ARM::VNMULS,      ARM::VSUBS,      true,   false },
+  { ARM::VNMLSS,      ARM::VMULS,       ARM::VSUBS,      true,   false },
+  { ARM::VNMLAD,      ARM::VNMULD,      ARM::VSUBD,      true,   false },
+  { ARM::VNMLSD,      ARM::VMULD,       ARM::VSUBD,      true,   false },
+
+  // fp SIMD ops
+  { ARM::VMLAfd,      ARM::VMULfd,      ARM::VADDfd,     false,  false },
+  { ARM::VMLSfd,      ARM::VMULfd,      ARM::VSUBfd,     false,  false },
+  { ARM::VMLAfq,      ARM::VMULfq,      ARM::VADDfq,     false,  false },
+  { ARM::VMLSfq,      ARM::VMULfq,      ARM::VSUBfq,     false,  false },
+  { ARM::VMLAslfd,    ARM::VMULslfd,    ARM::VADDfd,     false,  true  },
+  { ARM::VMLSslfd,    ARM::VMULslfd,    ARM::VSUBfd,     false,  true  },
+  { ARM::VMLAslfq,    ARM::VMULslfq,    ARM::VADDfq,     false,  true  },
+  { ARM::VMLSslfq,    ARM::VMULslfq,    ARM::VSUBfq,     false,  true  },
+};
+
+ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
+  : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
+    Subtarget(STI) {
+  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
+    if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
+      assert(false && "Duplicated entries?");
+    MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
+    MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
+  }
+}
+
+// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
+// currently defaults to no prepass hazard recognizer.
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+                             const ScheduleDAG *DAG) const {
+  if (usePreRAHazardRecognizer()) {
+    const InstrItineraryData *II = TM->getInstrItineraryData();
+    return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
+  }
+  return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
+}
+
+ScheduleHazardRecognizer *ARMBaseInstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+                                   const ScheduleDAG *DAG) const {
+  if (Subtarget.isThumb2() || Subtarget.hasVFP2())
+    return (ScheduleHazardRecognizer *)
+      new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG);
+  return TargetInstrInfoImpl::CreateTargetPostRAHazardRecognizer(II, DAG);
+}
+
+MachineInstr *
+ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+                                        MachineBasicBlock::iterator &MBBI,
+                                        LiveVariables *LV) const {
+  // FIXME: Thumb2 support.
+
+  if (!EnableARM3Addr)
+    return NULL;
+
+  MachineInstr *MI = MBBI;
+  MachineFunction &MF = *MI->getParent()->getParent();
+  uint64_t TSFlags = MI->getDesc().TSFlags;
+  bool isPre = false;
+  switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
+  default: return NULL;
+  case ARMII::IndexModePre:
+    isPre = true;
+    break;
+  case ARMII::IndexModePost:
+    break;
+  }
+
+  // Try splitting an indexed load/store to an un-indexed one plus an add/sub
+  // operation.
+  unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
+  if (MemOpc == 0)
+    return NULL;
+
+  MachineInstr *UpdateMI = NULL;
+  MachineInstr *MemMI = NULL;
+  unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned NumOps = TID.getNumOperands();
+  bool isLoad = !TID.mayStore();
+  const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
+  const MachineOperand &Base = MI->getOperand(2);
+  const MachineOperand &Offset = MI->getOperand(NumOps-3);
+  unsigned WBReg = WB.getReg();
+  unsigned BaseReg = Base.getReg();
+  unsigned OffReg = Offset.getReg();
+  unsigned OffImm = MI->getOperand(NumOps-2).getImm();
+  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
+  switch (AddrMode) {
+  default:
+    assert(false && "Unknown indexed op!");
+    return NULL;
+  case ARMII::AddrMode2: {
+    bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+    unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+    if (OffReg == 0) {
+      if (ARM_AM::getSOImmVal(Amt) == -1)
+        // Can't encode it in a so_imm operand. This transformation will
+        // add more than 1 instruction. Abandon!
+        return NULL;
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+        .addReg(BaseReg).addImm(Amt)
+        .addImm(Pred).addReg(0).addReg(0);
+    } else if (Amt != 0) {
+      ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+      unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
+        .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
+        .addImm(Pred).addReg(0).addReg(0);
+    } else
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+        .addReg(BaseReg).addReg(OffReg)
+        .addImm(Pred).addReg(0).addReg(0);
+    break;
+  }
+  case ARMII::AddrMode3 : {
+    bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
+    unsigned Amt = ARM_AM::getAM3Offset(OffImm);
+    if (OffReg == 0)
+      // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
+        .addReg(BaseReg).addImm(Amt)
+        .addImm(Pred).addReg(0).addReg(0);
+    else
+      UpdateMI = BuildMI(MF, MI->getDebugLoc(),
+                         get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
+        .addReg(BaseReg).addReg(OffReg)
+        .addImm(Pred).addReg(0).addReg(0);
+    break;
+  }
+  }
+
+  std::vector<MachineInstr*> NewMIs;
+  if (isPre) {
+    if (isLoad)
+      MemMI = BuildMI(MF, MI->getDebugLoc(),
+                      get(MemOpc), MI->getOperand(0).getReg())
+        .addReg(WBReg).addImm(0).addImm(Pred);
+    else
+      MemMI = BuildMI(MF, MI->getDebugLoc(),
+                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
+        .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
+    NewMIs.push_back(MemMI);
+    NewMIs.push_back(UpdateMI);
+  } else {
+    if (isLoad)
+      MemMI = BuildMI(MF, MI->getDebugLoc(),
+                      get(MemOpc), MI->getOperand(0).getReg())
+        .addReg(BaseReg).addImm(0).addImm(Pred);
+    else
+      MemMI = BuildMI(MF, MI->getDebugLoc(),
+                      get(MemOpc)).addReg(MI->getOperand(1).getReg())
+        .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
+    if (WB.isDead())
+      UpdateMI->getOperand(0).setIsDead();
+    NewMIs.push_back(UpdateMI);
+    NewMIs.push_back(MemMI);
+  }
+
+  // Transfer LiveVariables states, kill / dead info.
+  if (LV) {
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+        unsigned Reg = MO.getReg();
+
+        LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
+        if (MO.isDef()) {
+          MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
+          if (MO.isDead())
+            LV->addVirtualRegisterDead(Reg, NewMI);
+        }
+        if (MO.isUse() && MO.isKill()) {
+          for (unsigned j = 0; j < 2; ++j) {
+            // Look at the two new MI's in reverse order.
+            MachineInstr *NewMI = NewMIs[j];
+            if (!NewMI->readsRegister(Reg))
+              continue;
+            LV->addVirtualRegisterKilled(Reg, NewMI);
+            if (VI.removeKill(MI))
+              VI.Kills.push_back(NewMI);
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  MFI->insert(MBBI, NewMIs[1]);
+  MFI->insert(MBBI, NewMIs[0]);
+  return NewMIs[0];
+}
+
+// Branch analysis.
+bool
+ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                MachineBasicBlock *&FBB,
+                                SmallVectorImpl<MachineOperand> &Cond,
+                                bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  unsigned LastOpc = LastInst->getOpcode();
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (isUncondBranchOpcode(LastOpc)) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    if (isCondBranchOpcode(LastOpc)) {
+      // Block ends with fall-through condbranch.
+      TBB = LastInst->getOperand(0).getMBB();
+      Cond.push_back(LastInst->getOperand(1));
+      Cond.push_back(LastInst->getOperand(2));
+      return false;
+    }
+    return true;  // Can't handle indirect branch.
+  }
+
+  // Get the instruction before it if it is a terminator.
+  MachineInstr *SecondLastInst = I;
+  unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+  // If AllowModify is true and the block ends with two or more unconditional
+  // branches, delete all but the first unconditional branch.
+  if (AllowModify && isUncondBranchOpcode(LastOpc)) {
+    while (isUncondBranchOpcode(SecondLastOpc)) {
+      LastInst->eraseFromParent();
+      LastInst = SecondLastInst;
+      LastOpc = LastInst->getOpcode();
+      if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+        // Return now the only terminator is an unconditional branch.
+        TBB = LastInst->getOperand(0).getMBB();
+        return false;
+      } else {
+        SecondLastInst = I;
+        SecondLastOpc = SecondLastInst->getOpcode();
+      }
+    }
+  }
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+    return true;
+
+  // If the block ends with a B and a Bcc, handle it.
+  if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+    TBB =  SecondLastInst->getOperand(0).getMBB();
+    Cond.push_back(SecondLastInst->getOperand(1));
+    Cond.push_back(SecondLastInst->getOperand(2));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with two unconditional branches, handle it.  The second
+  // one is not executed, so remove it.
+  if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // ...likewise if it ends with a branch table followed by an unconditional
+  // branch. The branch folder can create these, and we must get rid of them for
+  // correctness of Thumb constant islands.
+  if ((isJumpTableBranchOpcode(SecondLastOpc) ||
+       isIndirectBranchOpcode(SecondLastOpc)) &&
+      isUncondBranchOpcode(LastOpc)) {
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return true;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+
+unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+  if (!isUncondBranchOpcode(I->getOpcode()) &&
+      !isCondBranchOpcode(I->getOpcode()))
+    return 0;
+
+  // Remove the branch.
+  I->eraseFromParent();
+
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (!isCondBranchOpcode(I->getOpcode()))
+    return 1;
+
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+unsigned
+ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                               MachineBasicBlock *FBB,
+                               const SmallVectorImpl<MachineOperand> &Cond,
+                               DebugLoc DL) const {
+  ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
+  int BOpc   = !AFI->isThumbFunction()
+    ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
+  int BccOpc = !AFI->isThumbFunction()
+    ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
+
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 2 || Cond.size() == 0) &&
+         "ARM branch conditions have two components!");
+
+  if (FBB == 0) {
+    if (Cond.empty()) // Unconditional branch?
+      BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
+    else
+      BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
+        .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+    return 1;
+  }
+
+  // Two-way conditional branch.
+  BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
+    .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
+  BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
+  return 2;
+}
+
+bool ARMBaseInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
+  Cond[0].setImm(ARMCC::getOppositeCondition(CC));
+  return false;
+}
+
+bool ARMBaseInstrInfo::
+PredicateInstruction(MachineInstr *MI,
+                     const SmallVectorImpl<MachineOperand> &Pred) const {
+  unsigned Opc = MI->getOpcode();
+  if (isUncondBranchOpcode(Opc)) {
+    MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
+    MI->addOperand(MachineOperand::CreateImm(Pred[0].getImm()));
+    MI->addOperand(MachineOperand::CreateReg(Pred[1].getReg(), false));
+    return true;
+  }
+
+  int PIdx = MI->findFirstPredOperandIdx();
+  if (PIdx != -1) {
+    MachineOperand &PMO = MI->getOperand(PIdx);
+    PMO.setImm(Pred[0].getImm());
+    MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
+    return true;
+  }
+  return false;
+}
+
+bool ARMBaseInstrInfo::
+SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                  const SmallVectorImpl<MachineOperand> &Pred2) const {
+  if (Pred1.size() > 2 || Pred2.size() > 2)
+    return false;
+
+  ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
+  ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
+  if (CC1 == CC2)
+    return true;
+
+  switch (CC1) {
+  default:
+    return false;
+  case ARMCC::AL:
+    return true;
+  case ARMCC::HS:
+    return CC2 == ARMCC::HI;
+  case ARMCC::LS:
+    return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
+  case ARMCC::GE:
+    return CC2 == ARMCC::GT;
+  case ARMCC::LE:
+    return CC2 == ARMCC::LT;
+  }
+}
+
+bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
+                                    std::vector<MachineOperand> &Pred) const {
+  // FIXME: This confuses implicit_def with optional CPSR def.
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.getImplicitDefs() && !TID.hasOptionalDef())
+    return false;
+
+  bool Found = false;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.getReg() == ARM::CPSR) {
+      Pred.push_back(MO);
+      Found = true;
+    }
+  }
+
+  return Found;
+}
+
+/// isPredicable - Return true if the specified instruction can be predicated.
+/// By default, this returns true for every instruction with a
+/// PredicateOperand.
+bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.isPredicable())
+    return false;
+
+  if ((TID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+    ARMFunctionInfo *AFI =
+      MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
+    return AFI->isThumb2Function();
+  }
+  return true;
+}
+
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
+LLVM_ATTRIBUTE_NOINLINE
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+                                unsigned JTI);
+static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
+                                unsigned JTI) {
+  assert(JTI < JT.size());
+  return JT[JTI].MBBs.size();
+}
+
+/// GetInstSize - Return the size of the specified MachineInstr.
+///
+unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+  const MachineBasicBlock &MBB = *MI->getParent();
+  const MachineFunction *MF = MBB.getParent();
+  const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+
+  // Basic size info comes from the TSFlags field.
+  const TargetInstrDesc &TID = MI->getDesc();
+  uint64_t TSFlags = TID.TSFlags;
+
+  unsigned Opc = MI->getOpcode();
+  switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
+  default: {
+    // If this machine instr is an inline asm, measure it.
+    if (MI->getOpcode() == ARM::INLINEASM)
+      return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
+    if (MI->isLabel())
+      return 0;
+    switch (Opc) {
+    default:
+      llvm_unreachable("Unknown or unset size field for instr!");
+    case TargetOpcode::IMPLICIT_DEF:
+    case TargetOpcode::KILL:
+    case TargetOpcode::PROLOG_LABEL:
+    case TargetOpcode::EH_LABEL:
+    case TargetOpcode::DBG_VALUE:
+      return 0;
+    }
+    break;
+  }
+  case ARMII::Size8Bytes: return 8;          // ARM instruction x 2.
+  case ARMII::Size4Bytes: return 4;          // ARM / Thumb2 instruction.
+  case ARMII::Size2Bytes: return 2;          // Thumb1 instruction.
+  case ARMII::SizeSpecial: {
+    switch (Opc) {
+    case ARM::MOVi16_ga_pcrel:
+    case ARM::MOVTi16_ga_pcrel:
+    case ARM::t2MOVi16_ga_pcrel:
+    case ARM::t2MOVTi16_ga_pcrel:
+      return 4;
+    case ARM::MOVi32imm:
+    case ARM::t2MOVi32imm:
+      return 8;
+    case ARM::CONSTPOOL_ENTRY:
+      // If this machine instr is a constant pool entry, its size is recorded as
+      // operand #2.
+      return MI->getOperand(2).getImm();
+    case ARM::Int_eh_sjlj_longjmp:
+      return 16;
+    case ARM::tInt_eh_sjlj_longjmp:
+      return 10;
+    case ARM::Int_eh_sjlj_setjmp:
+    case ARM::Int_eh_sjlj_setjmp_nofp:
+      return 20;
+    case ARM::tInt_eh_sjlj_setjmp:
+    case ARM::t2Int_eh_sjlj_setjmp:
+    case ARM::t2Int_eh_sjlj_setjmp_nofp:
+      return 12;
+    case ARM::BR_JTr:
+    case ARM::BR_JTm:
+    case ARM::BR_JTadd:
+    case ARM::tBR_JTr:
+    case ARM::t2BR_JT:
+    case ARM::t2TBB_JT:
+    case ARM::t2TBH_JT: {
+      // These are jumptable branches, i.e. a branch followed by an inlined
+      // jumptable. The size is 4 + 4 * number of entries. For TBB, each
+      // entry is one byte; TBH two byte each.
+      unsigned EntrySize = (Opc == ARM::t2TBB_JT)
+        ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
+      unsigned NumOps = TID.getNumOperands();
+      MachineOperand JTOP =
+        MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
+      unsigned JTI = JTOP.getIndex();
+      const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+      assert(MJTI != 0);
+      const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+      assert(JTI < JT.size());
+      // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
+      // 4 aligned. The assembler / linker may add 2 byte padding just before
+      // the JT entries.  The size does not include this padding; the
+      // constant islands pass does separate bookkeeping for it.
+      // FIXME: If we know the size of the function is less than (1 << 16) *2
+      // bytes, we can use 16-bit entries instead. Then there won't be an
+      // alignment issue.
+      unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
+      unsigned NumEntries = getNumJTEntries(JT, JTI);
+      if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
+        // Make sure the instruction that follows TBB is 2-byte aligned.
+        // FIXME: Constant island pass should insert an "ALIGN" instruction
+        // instead.
+        ++NumEntries;
+      return NumEntries * EntrySize + InstSize;
+    }
+    default:
+      // Otherwise, pseudo-instruction sizes are zero.
+      return 0;
+    }
+  }
+  }
+  return 0; // Not reached
+}
+
+void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I, DebugLoc DL,
+                                   unsigned DestReg, unsigned SrcReg,
+                                   bool KillSrc) const {
+  bool GPRDest = ARM::GPRRegClass.contains(DestReg);
+  bool GPRSrc  = ARM::GPRRegClass.contains(SrcReg);
+
+  if (GPRDest && GPRSrc) {
+    AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+                                  .addReg(SrcReg, getKillRegState(KillSrc))));
+    return;
+  }
+
+  bool SPRDest = ARM::SPRRegClass.contains(DestReg);
+  bool SPRSrc  = ARM::SPRRegClass.contains(SrcReg);
+
+  unsigned Opc;
+  if (SPRDest && SPRSrc)
+    Opc = ARM::VMOVS;
+  else if (GPRDest && SPRSrc)
+    Opc = ARM::VMOVRS;
+  else if (SPRDest && GPRSrc)
+    Opc = ARM::VMOVSR;
+  else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
+    Opc = ARM::VMOVD;
+  else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
+    Opc = ARM::VMOVQ;
+  else if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
+    Opc = ARM::VMOVQQ;
+  else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
+    Opc = ARM::VMOVQQQQ;
+  else
+    llvm_unreachable("Impossible reg-to-reg copy");
+
+  MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
+  MIB.addReg(SrcReg, getKillRegState(KillSrc));
+  if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ)
+    AddDefaultPred(MIB);
+}
+
+static const
+MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
+                             unsigned Reg, unsigned SubIdx, unsigned State,
+                             const TargetRegisterInfo *TRI) {
+  if (!SubIdx)
+    return MIB.addReg(Reg, State);
+
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
+  return MIB.addReg(Reg, State, SubIdx);
+}
+
+void ARMBaseInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FI);
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo(
+                                         PseudoSourceValue::getFixedStack(FI)),
+                            MachineMemOperand::MOStore,
+                            MFI.getObjectSize(FI),
+                            Align);
+
+  // tGPR is used sometimes in ARM instructions that need to avoid using
+  // certain registers.  Just treat it as GPR here. Likewise, rGPR.
+  if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+      || RC == ARM::rGPRRegisterClass)
+    RC = ARM::GPRRegisterClass;
+
+  switch (RC->getID()) {
+  case ARM::GPRRegClassID:
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+    break;
+  case ARM::SPRRegClassID:
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+    break;
+  case ARM::DPRRegClassID:
+  case ARM::DPR_VFP2RegClassID:
+  case ARM::DPR_8RegClassID:
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+    break;
+  case ARM::QPRRegClassID:
+  case ARM::QPR_VFP2RegClassID:
+  case ARM::QPR_8RegClassID:
+    if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64Pseudo))
+                     .addFrameIndex(FI).addImm(16)
+                     .addReg(SrcReg, getKillRegState(isKill))
+                     .addMemOperand(MMO));
+    } else {
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
+                     .addReg(SrcReg, getKillRegState(isKill))
+                     .addFrameIndex(FI)
+                     .addMemOperand(MMO));
+    }
+    break;
+  case ARM::QQPRRegClassID:
+  case ARM::QQPR_VFP2RegClassID:
+    if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+      // FIXME: It's possible to only store part of the QQ register if the
+      // spilled def has a sub-register index.
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
+                     .addFrameIndex(FI).addImm(16)
+                     .addReg(SrcReg, getKillRegState(isKill))
+                     .addMemOperand(MMO));
+    } else {
+      MachineInstrBuilder MIB =
+        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+                       .addFrameIndex(FI))
+        .addMemOperand(MMO);
+      MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+      MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+      MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+            AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+    }
+    break;
+  case ARM::QQQQPRRegClassID: {
+    MachineInstrBuilder MIB =
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
+                     .addFrameIndex(FI))
+      .addMemOperand(MMO);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
+    MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
+          AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
+    break;
+  }
+  default:
+    llvm_unreachable("Unknown regclass!");
+  }
+}
+
+unsigned
+ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                     int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::STRrs:
+  case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isReg() &&
+        MI->getOperand(3).isImm() &&
+        MI->getOperand(2).getReg() == 0 &&
+        MI->getOperand(3).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::STRi12:
+  case ARM::t2STRi12:
+  case ARM::tSpill:
+  case ARM::VSTRD:
+  case ARM::VSTRS:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::VST1q64Pseudo:
+    if (MI->getOperand(0).isFI() &&
+        MI->getOperand(2).getSubReg() == 0) {
+      FrameIndex = MI->getOperand(0).getIndex();
+      return MI->getOperand(2).getReg();
+    }
+    break;
+  case ARM::VSTMQIA:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(0).getSubReg() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+
+  return 0;
+}
+
+void ARMBaseInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  unsigned Align = MFI.getObjectAlignment(FI);
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(
+                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                            MachineMemOperand::MOLoad,
+                            MFI.getObjectSize(FI),
+                            Align);
+
+  // tGPR is used sometimes in ARM instructions that need to avoid using
+  // certain registers.  Just treat it as GPR here.
+  if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass
+      || RC == ARM::rGPRRegisterClass)
+    RC = ARM::GPRRegisterClass;
+
+  switch (RC->getID()) {
+  case ARM::GPRRegClassID:
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+    break;
+  case ARM::SPRRegClassID:
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+    break;
+  case ARM::DPRRegClassID:
+  case ARM::DPR_VFP2RegClassID:
+  case ARM::DPR_8RegClassID:
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+    break;
+  case ARM::QPRRegClassID:
+  case ARM::QPR_VFP2RegClassID:
+  case ARM::QPR_8RegClassID:
+    if (Align >= 16 && getRegisterInfo().needsStackRealignment(MF)) {
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64Pseudo), DestReg)
+                     .addFrameIndex(FI).addImm(16)
+                     .addMemOperand(MMO));
+    } else {
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
+                     .addFrameIndex(FI)
+                     .addMemOperand(MMO));
+    }
+    break;
+  case ARM::QQPRRegClassID:
+  case ARM::QQPR_VFP2RegClassID:
+    if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
+                     .addFrameIndex(FI).addImm(16)
+                     .addMemOperand(MMO));
+    } else {
+      MachineInstrBuilder MIB =
+        AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+                       .addFrameIndex(FI))
+        .addMemOperand(MMO);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+      MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+            AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+    }
+    break;
+  case ARM::QQQQPRRegClassID: {
+    MachineInstrBuilder MIB =
+      AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
+                     .addFrameIndex(FI))
+      .addMemOperand(MMO);
+    MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+    MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+    MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+    MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+    MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
+    MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
+    MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
+    AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+    break;
+  }
+  default:
+    llvm_unreachable("Unknown regclass!");
+  }
+}
+
+unsigned
+ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::LDRrs:
+  case ARM::t2LDRs:  // FIXME: don't use t2LDRs to access frame.
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isReg() &&
+        MI->getOperand(3).isImm() &&
+        MI->getOperand(2).getReg() == 0 &&
+        MI->getOperand(3).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::LDRi12:
+  case ARM::t2LDRi12:
+  case ARM::tRestore:
+  case ARM::VLDRD:
+  case ARM::VLDRS:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::VLD1q64Pseudo:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(0).getSubReg() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  case ARM::VLDMQIA:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(0).getSubReg() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+
+  return 0;
+}
+
+MachineInstr*
+ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                           int FrameIx, uint64_t Offset,
+                                           const MDNode *MDPtr,
+                                           DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE))
+    .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
+/// Create a copy of a const pool value. Update CPI to the new index and return
+/// the label UID.
+static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
+  MachineConstantPool *MCP = MF.getConstantPool();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
+  assert(MCPE.isMachineConstantPoolEntry() &&
+         "Expecting a machine constantpool entry!");
+  ARMConstantPoolValue *ACPV =
+    static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+  unsigned PCLabelId = AFI->createPICLabelUId();
+  ARMConstantPoolValue *NewCPV = 0;
+  // FIXME: The below assumes PIC relocation model and that the function
+  // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
+  // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
+  // instructions, so that's probably OK, but is PIC always correct when
+  // we get here?
+  if (ACPV->isGlobalValue())
+    NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
+                                      ARMCP::CPValue, 4);
+  else if (ACPV->isExtSymbol())
+    NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(),
+                                      ACPV->getSymbol(), PCLabelId, 4);
+  else if (ACPV->isBlockAddress())
+    NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
+                                      ARMCP::CPBlockAddress, 4);
+  else if (ACPV->isLSDA())
+    NewCPV = new ARMConstantPoolValue(MF.getFunction(), PCLabelId,
+                                      ARMCP::CPLSDA, 4);
+  else
+    llvm_unreachable("Unexpected ARM constantpool value type!!");
+  CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
+  return PCLabelId;
+}
+
+void ARMBaseInstrInfo::
+reMaterialize(MachineBasicBlock &MBB,
+              MachineBasicBlock::iterator I,
+              unsigned DestReg, unsigned SubIdx,
+              const MachineInstr *Orig,
+              const TargetRegisterInfo &TRI) const {
+  unsigned Opcode = Orig->getOpcode();
+  switch (Opcode) {
+  default: {
+    MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+    MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+    MBB.insert(I, MI);
+    break;
+  }
+  case ARM::tLDRpci_pic:
+  case ARM::t2LDRpci_pic: {
+    MachineFunction &MF = *MBB.getParent();
+    unsigned CPI = Orig->getOperand(1).getIndex();
+    unsigned PCLabelId = duplicateCPV(MF, CPI);
+    MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
+                                      DestReg)
+      .addConstantPoolIndex(CPI).addImm(PCLabelId);
+    (*MIB).setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
+    break;
+  }
+  }
+}
+
+MachineInstr *
+ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
+  MachineInstr *MI = TargetInstrInfoImpl::duplicate(Orig, MF);
+  switch(Orig->getOpcode()) {
+  case ARM::tLDRpci_pic:
+  case ARM::t2LDRpci_pic: {
+    unsigned CPI = Orig->getOperand(1).getIndex();
+    unsigned PCLabelId = duplicateCPV(MF, CPI);
+    Orig->getOperand(1).setIndex(CPI);
+    Orig->getOperand(2).setImm(PCLabelId);
+    break;
+  }
+  }
+  return MI;
+}
+
+bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
+                                        const MachineInstr *MI1,
+                                        const MachineRegisterInfo *MRI) const {
+  int Opcode = MI0->getOpcode();
+  if (Opcode == ARM::t2LDRpci ||
+      Opcode == ARM::t2LDRpci_pic ||
+      Opcode == ARM::tLDRpci ||
+      Opcode == ARM::tLDRpci_pic ||
+      Opcode == ARM::MOV_ga_dyn ||
+      Opcode == ARM::MOV_ga_pcrel ||
+      Opcode == ARM::MOV_ga_pcrel_ldr ||
+      Opcode == ARM::t2MOV_ga_dyn ||
+      Opcode == ARM::t2MOV_ga_pcrel) {
+    if (MI1->getOpcode() != Opcode)
+      return false;
+    if (MI0->getNumOperands() != MI1->getNumOperands())
+      return false;
+
+    const MachineOperand &MO0 = MI0->getOperand(1);
+    const MachineOperand &MO1 = MI1->getOperand(1);
+    if (MO0.getOffset() != MO1.getOffset())
+      return false;
+
+    if (Opcode == ARM::MOV_ga_dyn ||
+        Opcode == ARM::MOV_ga_pcrel ||
+        Opcode == ARM::MOV_ga_pcrel_ldr ||
+        Opcode == ARM::t2MOV_ga_dyn ||
+        Opcode == ARM::t2MOV_ga_pcrel)
+      // Ignore the PC labels.
+      return MO0.getGlobal() == MO1.getGlobal();
+
+    const MachineFunction *MF = MI0->getParent()->getParent();
+    const MachineConstantPool *MCP = MF->getConstantPool();
+    int CPI0 = MO0.getIndex();
+    int CPI1 = MO1.getIndex();
+    const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
+    const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
+    bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
+    bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
+    if (isARMCP0 && isARMCP1) {
+      ARMConstantPoolValue *ACPV0 =
+        static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
+      ARMConstantPoolValue *ACPV1 =
+        static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
+      return ACPV0->hasSameValue(ACPV1);
+    } else if (!isARMCP0 && !isARMCP1) {
+      return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
+    }
+    return false;
+  } else if (Opcode == ARM::PICLDR) {
+    if (MI1->getOpcode() != Opcode)
+      return false;
+    if (MI0->getNumOperands() != MI1->getNumOperands())
+      return false;
+
+    unsigned Addr0 = MI0->getOperand(1).getReg();
+    unsigned Addr1 = MI1->getOperand(1).getReg();
+    if (Addr0 != Addr1) {
+      if (!MRI ||
+          !TargetRegisterInfo::isVirtualRegister(Addr0) ||
+          !TargetRegisterInfo::isVirtualRegister(Addr1))
+        return false;
+
+      // This assumes SSA form.
+      MachineInstr *Def0 = MRI->getVRegDef(Addr0);
+      MachineInstr *Def1 = MRI->getVRegDef(Addr1);
+      // Check if the loaded value, e.g. a constantpool of a global address, are
+      // the same.
+      if (!produceSameValue(Def0, Def1, MRI))
+        return false;
+    }
+
+    for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
+      // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
+      const MachineOperand &MO0 = MI0->getOperand(i);
+      const MachineOperand &MO1 = MI1->getOperand(i);
+      if (!MO0.isIdenticalTo(MO1))
+        return false;
+    }
+    return true;
+  }
+
+  return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
+/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+/// determine if two loads are loading from the same base address. It should
+/// only return true if the base pointers are the same and the only differences
+/// between the two addresses is the offset. It also returns the offsets by
+/// reference.
+bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+                                               int64_t &Offset1,
+                                               int64_t &Offset2) const {
+  // Don't worry about Thumb: just ARM and Thumb2.
+  if (Subtarget.isThumb1Only()) return false;
+
+  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+    return false;
+
+  switch (Load1->getMachineOpcode()) {
+  default:
+    return false;
+  case ARM::LDRi12:
+  case ARM::LDRBi12:
+  case ARM::LDRD:
+  case ARM::LDRH:
+  case ARM::LDRSB:
+  case ARM::LDRSH:
+  case ARM::VLDRD:
+  case ARM::VLDRS:
+  case ARM::t2LDRi8:
+  case ARM::t2LDRDi8:
+  case ARM::t2LDRSHi8:
+  case ARM::t2LDRi12:
+  case ARM::t2LDRSHi12:
+    break;
+  }
+
+  switch (Load2->getMachineOpcode()) {
+  default:
+    return false;
+  case ARM::LDRi12:
+  case ARM::LDRBi12:
+  case ARM::LDRD:
+  case ARM::LDRH:
+  case ARM::LDRSB:
+  case ARM::LDRSH:
+  case ARM::VLDRD:
+  case ARM::VLDRS:
+  case ARM::t2LDRi8:
+  case ARM::t2LDRDi8:
+  case ARM::t2LDRSHi8:
+  case ARM::t2LDRi12:
+  case ARM::t2LDRSHi12:
+    break;
+  }
+
+  // Check if base addresses and chain operands match.
+  if (Load1->getOperand(0) != Load2->getOperand(0) ||
+      Load1->getOperand(4) != Load2->getOperand(4))
+    return false;
+
+  // Index should be Reg0.
+  if (Load1->getOperand(3) != Load2->getOperand(3))
+    return false;
+
+  // Determine the offsets.
+  if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
+      isa<ConstantSDNode>(Load2->getOperand(1))) {
+    Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
+    Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
+    return true;
+  }
+
+  return false;
+}
+
+/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+/// be scheduled togther. On some targets if two loads are loading from
+/// addresses in the same cache line, it's better if they are scheduled
+/// together. This function takes two integers that represent the load offsets
+/// from the common base address. It returns true if it decides it's desirable
+/// to schedule the two loads together. "NumLoads" is the number of loads that
+/// have already been scheduled after Load1.
+bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+                                               int64_t Offset1, int64_t Offset2,
+                                               unsigned NumLoads) const {
+  // Don't worry about Thumb: just ARM and Thumb2.
+  if (Subtarget.isThumb1Only()) return false;
+
+  assert(Offset2 > Offset1);
+
+  if ((Offset2 - Offset1) / 8 > 64)
+    return false;
+
+  if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
+    return false;  // FIXME: overly conservative?
+
+  // Four loads in a row should be sufficient.
+  if (NumLoads >= 3)
+    return false;
+
+  return true;
+}
+
+bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+                                            const MachineBasicBlock *MBB,
+                                            const MachineFunction &MF) const {
+  // Debug info is never a scheduling boundary. It's necessary to be explicit
+  // due to the special treatment of IT instructions below, otherwise a
+  // dbg_value followed by an IT will result in the IT instruction being
+  // considered a scheduling hazard, which is wrong. It should be the actual
+  // instruction preceding the dbg_value instruction(s), just like it is
+  // when debug info is not present.
+  if (MI->isDebugValue())
+    return false;
+
+  // Terminators and labels can't be scheduled around.
+  if (MI->getDesc().isTerminator() || MI->isLabel())
+    return true;
+
+  // Treat the start of the IT block as a scheduling boundary, but schedule
+  // t2IT along with all instructions following it.
+  // FIXME: This is a big hammer. But the alternative is to add all potential
+  // true and anti dependencies to IT block instructions as implicit operands
+  // to the t2IT instruction. The added compile time and complexity does not
+  // seem worth it.
+  MachineBasicBlock::const_iterator I = MI;
+  // Make sure to skip any dbg_value instructions
+  while (++I != MBB->end() && I->isDebugValue())
+    ;
+  if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
+    return true;
+
+  // Don't attempt to schedule around any instruction that defines
+  // a stack-oriented pointer, as it's unlikely to be profitable. This
+  // saves compile time, because it doesn't require every single
+  // stack slot reference to depend on the instruction that does the
+  // modification.
+  if (MI->definesRegister(ARM::SP))
+    return true;
+
+  return false;
+}
+
+bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+                                           unsigned NumCyles,
+                                           unsigned ExtraPredCycles,
+                                           float Probability,
+                                           float Confidence) const {
+  if (!NumCyles)
+    return false;
+
+  // Attempt to estimate the relative costs of predication versus branching.
+  float UnpredCost = Probability * NumCyles;
+  UnpredCost += 1.0; // The branch itself
+  UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
+
+  return (float)(NumCyles + ExtraPredCycles) < UnpredCost;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                    unsigned TCycles, unsigned TExtra,
+                    MachineBasicBlock &FMBB,
+                    unsigned FCycles, unsigned FExtra,
+                    float Probability, float Confidence) const {
+  if (!TCycles || !FCycles)
+    return false;
+
+  // Attempt to estimate the relative costs of predication versus branching.
+  float UnpredCost = Probability * TCycles + (1.0 - Probability) * FCycles;
+  UnpredCost += 1.0; // The branch itself
+  UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
+
+  return (float)(TCycles + FCycles + TExtra + FExtra) < UnpredCost;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+ARMCC::CondCodes
+llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+  int PIdx = MI->findFirstPredOperandIdx();
+  if (PIdx == -1) {
+    PredReg = 0;
+    return ARMCC::AL;
+  }
+
+  PredReg = MI->getOperand(PIdx+1).getReg();
+  return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
+}
+
+
+int llvm::getMatchingCondBranchOpcode(int Opc) {
+  if (Opc == ARM::B)
+    return ARM::Bcc;
+  else if (Opc == ARM::tB)
+    return ARM::tBcc;
+  else if (Opc == ARM::t2B)
+      return ARM::t2Bcc;
+
+  llvm_unreachable("Unknown unconditional branch opcode!");
+  return 0;
+}
+
+
+void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+                               unsigned DestReg, unsigned BaseReg, int NumBytes,
+                               ARMCC::CondCodes Pred, unsigned PredReg,
+                               const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+  bool isSub = NumBytes < 0;
+  if (isSub) NumBytes = -NumBytes;
+
+  while (NumBytes) {
+    unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
+    unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
+    assert(ThisVal && "Didn't extract field correctly");
+
+    // We will handle these bits from offset, clear them.
+    NumBytes &= ~ThisVal;
+
+    assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
+
+    // Build the new ADD / SUB.
+    unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
+    BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+      .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
+      .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+      .setMIFlags(MIFlags);
+    BaseReg = DestReg;
+  }
+}
+
+bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                                unsigned FrameReg, int &Offset,
+                                const ARMBaseInstrInfo &TII) {
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = MI.getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+  bool isSub = false;
+
+  // Memory operands in inline assembly always use AddrMode2.
+  if (Opcode == ARM::INLINEASM)
+    AddrMode = ARMII::AddrMode2;
+
+  if (Opcode == ARM::ADDri) {
+    Offset += MI.getOperand(FrameRegIdx+1).getImm();
+    if (Offset == 0) {
+      // Turn it into a move.
+      MI.setDesc(TII.get(ARM::MOVr));
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.RemoveOperand(FrameRegIdx+1);
+      Offset = 0;
+      return true;
+    } else if (Offset < 0) {
+      Offset = -Offset;
+      isSub = true;
+      MI.setDesc(TII.get(ARM::SUBri));
+    }
+
+    // Common case: small offset, fits into instruction.
+    if (ARM_AM::getSOImmVal(Offset) != -1) {
+      // Replace the FrameIndex with sp / fp
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+      Offset = 0;
+      return true;
+    }
+
+    // Otherwise, pull as much of the immedidate into this ADDri/SUBri
+    // as possible.
+    unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
+    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
+
+    // We will handle these bits from offset, clear them.
+    Offset &= ~ThisImmVal;
+
+    // Get the properly encoded SOImmVal field.
+    assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
+           "Bit extraction didn't work?");
+    MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+ } else {
+    unsigned ImmIdx = 0;
+    int InstrOffs = 0;
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    switch (AddrMode) {
+    case ARMII::AddrMode_i12: {
+      ImmIdx = FrameRegIdx + 1;
+      InstrOffs = MI.getOperand(ImmIdx).getImm();
+      NumBits = 12;
+      break;
+    }
+    case ARMII::AddrMode2: {
+      ImmIdx = FrameRegIdx+2;
+      InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 12;
+      break;
+    }
+    case ARMII::AddrMode3: {
+      ImmIdx = FrameRegIdx+2;
+      InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 8;
+      break;
+    }
+    case ARMII::AddrMode4:
+    case ARMII::AddrMode6:
+      // Can't fold any offset even if it's zero.
+      return false;
+    case ARMII::AddrMode5: {
+      ImmIdx = FrameRegIdx+1;
+      InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 8;
+      Scale = 4;
+      break;
+    }
+    default:
+      llvm_unreachable("Unsupported addressing mode!");
+      break;
+    }
+
+    Offset += InstrOffs * Scale;
+    assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+    if (Offset < 0) {
+      Offset = -Offset;
+      isSub = true;
+    }
+
+    // Attempt to fold address comp. if opcode has offset bits
+    if (NumBits > 0) {
+      // Common case: small offset, fits into instruction.
+      MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+      int ImmedOffset = Offset / Scale;
+      unsigned Mask = (1 << NumBits) - 1;
+      if ((unsigned)Offset <= Mask * Scale) {
+        // Replace the FrameIndex with sp
+        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+        // FIXME: When addrmode2 goes away, this will simplify (like the
+        // T2 version), as the LDR.i12 versions don't need the encoding
+        // tricks for the offset value.
+        if (isSub) {
+          if (AddrMode == ARMII::AddrMode_i12)
+            ImmedOffset = -ImmedOffset;
+          else
+            ImmedOffset |= 1 << NumBits;
+        }
+        ImmOp.ChangeToImmediate(ImmedOffset);
+        Offset = 0;
+        return true;
+      }
+
+      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+      ImmedOffset = ImmedOffset & Mask;
+      if (isSub) {
+        if (AddrMode == ARMII::AddrMode_i12)
+          ImmedOffset = -ImmedOffset;
+        else
+          ImmedOffset |= 1 << NumBits;
+      }
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      Offset &= ~(Mask*Scale);
+    }
+  }
+
+  Offset = (isSub) ? -Offset : Offset;
+  return Offset == 0;
+}
+
+bool ARMBaseInstrInfo::
+AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg, int &CmpMask,
+               int &CmpValue) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::CMPri:
+  case ARM::t2CMPri:
+    SrcReg = MI->getOperand(0).getReg();
+    CmpMask = ~0;
+    CmpValue = MI->getOperand(1).getImm();
+    return true;
+  case ARM::TSTri:
+  case ARM::t2TSTri:
+    SrcReg = MI->getOperand(0).getReg();
+    CmpMask = MI->getOperand(1).getImm();
+    CmpValue = 0;
+    return true;
+  }
+
+  return false;
+}
+
+/// isSuitableForMask - Identify a suitable 'and' instruction that
+/// operates on the given source register and applies the same mask
+/// as a 'tst' instruction. Provide a limited look-through for copies.
+/// When successful, MI will hold the found instruction.
+static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
+                              int CmpMask, bool CommonUse) {
+  switch (MI->getOpcode()) {
+    case ARM::ANDri:
+    case ARM::t2ANDri:
+      if (CmpMask != MI->getOperand(2).getImm())
+        return false;
+      if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
+        return true;
+      break;
+    case ARM::COPY: {
+      // Walk down one instruction which is potentially an 'and'.
+      const MachineInstr &Copy = *MI;
+      MachineBasicBlock::iterator AND(
+        llvm::next(MachineBasicBlock::iterator(MI)));
+      if (AND == MI->getParent()->end()) return false;
+      MI = AND;
+      return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
+                               CmpMask, true);
+    }
+  }
+
+  return false;
+}
+
+/// OptimizeCompareInstr - Convert the instruction supplying the argument to the
+/// comparison into one that sets the zero bit in the flags register.
+bool ARMBaseInstrInfo::
+OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
+                     int CmpValue, const MachineRegisterInfo *MRI) const {
+  if (CmpValue != 0)
+    return false;
+
+  MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
+  if (llvm::next(DI) != MRI->def_end())
+    // Only support one definition.
+    return false;
+
+  MachineInstr *MI = &*DI;
+
+  // Masked compares sometimes use the same register as the corresponding 'and'.
+  if (CmpMask != ~0) {
+    if (!isSuitableForMask(MI, SrcReg, CmpMask, false)) {
+      MI = 0;
+      for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
+           UE = MRI->use_end(); UI != UE; ++UI) {
+        if (UI->getParent() != CmpInstr->getParent()) continue;
+        MachineInstr *PotentialAND = &*UI;
+        if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true))
+          continue;
+        MI = PotentialAND;
+        break;
+      }
+      if (!MI) return false;
+    }
+  }
+
+  // Conservatively refuse to convert an instruction which isn't in the same BB
+  // as the comparison.
+  if (MI->getParent() != CmpInstr->getParent())
+    return false;
+
+  // Check that CPSR isn't set between the comparison instruction and the one we
+  // want to change.
+  MachineBasicBlock::const_iterator I = CmpInstr, E = MI,
+    B = MI->getParent()->begin();
+
+  // Early exit if CmpInstr is at the beginning of the BB.
+  if (I == B) return false;
+
+  --I;
+  for (; I != E; --I) {
+    const MachineInstr &Instr = *I;
+
+    for (unsigned IO = 0, EO = Instr.getNumOperands(); IO != EO; ++IO) {
+      const MachineOperand &MO = Instr.getOperand(IO);
+      if (!MO.isReg()) continue;
+
+      // This instruction modifies or uses CPSR after the one we want to
+      // change. We can't do this transformation.
+      if (MO.getReg() == ARM::CPSR)
+        return false;
+    }
+
+    if (I == B)
+      // The 'and' is below the comparison instruction.
+      return false;
+  }
+
+  // Set the "zero" bit in CPSR.
+  switch (MI->getOpcode()) {
+  default: break;
+  case ARM::ADDri:
+  case ARM::ANDri:
+  case ARM::t2ANDri:
+  case ARM::SUBri:
+  case ARM::t2ADDri:
+  case ARM::t2SUBri:
+    // Toggle the optional operand to CPSR.
+    MI->getOperand(5).setReg(ARM::CPSR);
+    MI->getOperand(5).setIsDef(true);
+    CmpInstr->eraseFromParent();
+    return true;
+  }
+
+  return false;
+}
+
+bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
+                                     MachineInstr *DefMI, unsigned Reg,
+                                     MachineRegisterInfo *MRI) const {
+  // Fold large immediates into add, sub, or, xor.
+  unsigned DefOpc = DefMI->getOpcode();
+  if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
+    return false;
+  if (!DefMI->getOperand(1).isImm())
+    // Could be t2MOVi32imm <ga:xx>
+    return false;
+
+  if (!MRI->hasOneNonDBGUse(Reg))
+    return false;
+
+  unsigned UseOpc = UseMI->getOpcode();
+  unsigned NewUseOpc = 0;
+  uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
+  uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
+  bool Commute = false;
+  switch (UseOpc) {
+  default: return false;
+  case ARM::SUBrr:
+  case ARM::ADDrr:
+  case ARM::ORRrr:
+  case ARM::EORrr:
+  case ARM::t2SUBrr:
+  case ARM::t2ADDrr:
+  case ARM::t2ORRrr:
+  case ARM::t2EORrr: {
+    Commute = UseMI->getOperand(2).getReg() != Reg;
+    switch (UseOpc) {
+    default: break;
+    case ARM::SUBrr: {
+      if (Commute)
+        return false;
+      ImmVal = -ImmVal;
+      NewUseOpc = ARM::SUBri;
+      // Fallthrough
+    }
+    case ARM::ADDrr:
+    case ARM::ORRrr:
+    case ARM::EORrr: {
+      if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
+        return false;
+      SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
+      SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
+      switch (UseOpc) {
+      default: break;
+      case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
+      case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
+      case ARM::EORrr: NewUseOpc = ARM::EORri; break;
+      }
+      break;
+    }
+    case ARM::t2SUBrr: {
+      if (Commute)
+        return false;
+      ImmVal = -ImmVal;
+      NewUseOpc = ARM::t2SUBri;
+      // Fallthrough
+    }
+    case ARM::t2ADDrr:
+    case ARM::t2ORRrr:
+    case ARM::t2EORrr: {
+      if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
+        return false;
+      SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
+      SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
+      switch (UseOpc) {
+      default: break;
+      case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
+      case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
+      case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
+      }
+      break;
+    }
+    }
+  }
+  }
+
+  unsigned OpIdx = Commute ? 2 : 1;
+  unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
+  bool isKill = UseMI->getOperand(OpIdx).isKill();
+  unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
+  AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
+                                      *UseMI, UseMI->getDebugLoc(),
+                                      get(NewUseOpc), NewReg)
+                              .addReg(Reg1, getKillRegState(isKill))
+                              .addImm(SOImmValV1)));
+  UseMI->setDesc(get(NewUseOpc));
+  UseMI->getOperand(1).setReg(NewReg);
+  UseMI->getOperand(1).setIsKill();
+  UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
+  DefMI->eraseFromParent();
+  return true;
+}
+
+unsigned
+ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+                                 const MachineInstr *MI) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  const TargetInstrDesc &Desc = MI->getDesc();
+  unsigned Class = Desc.getSchedClass();
+  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+  if (UOps)
+    return UOps;
+
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+  default:
+    llvm_unreachable("Unexpected multi-uops instruction!");
+    break;
+  case ARM::VLDMQIA:
+  case ARM::VLDMQDB:
+  case ARM::VSTMQIA:
+  case ARM::VSTMQDB:
+    return 2;
+
+  // The number of uOps for load / store multiple are determined by the number
+  // registers.
+  //
+  // On Cortex-A8, each pair of register loads / stores can be scheduled on the
+  // same cycle. The scheduling for the first load / store must be done
+  // separately by assuming the the address is not 64-bit aligned.
+  //
+  // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
+  // is not 64-bit aligned, then AGU would take an extra cycle.  For VFP / NEON
+  // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
+  case ARM::VLDMDIA:
+  case ARM::VLDMDDB:
+  case ARM::VLDMDIA_UPD:
+  case ARM::VLDMDDB_UPD:
+  case ARM::VLDMSIA:
+  case ARM::VLDMSDB:
+  case ARM::VLDMSIA_UPD:
+  case ARM::VLDMSDB_UPD:
+  case ARM::VSTMDIA:
+  case ARM::VSTMDDB:
+  case ARM::VSTMDIA_UPD:
+  case ARM::VSTMDDB_UPD:
+  case ARM::VSTMSIA:
+  case ARM::VSTMSDB:
+  case ARM::VSTMSIA_UPD:
+  case ARM::VSTMSDB_UPD: {
+    unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
+    return (NumRegs / 2) + (NumRegs % 2) + 1;
+  }
+
+  case ARM::LDMIA_RET:
+  case ARM::LDMIA:
+  case ARM::LDMDA:
+  case ARM::LDMDB:
+  case ARM::LDMIB:
+  case ARM::LDMIA_UPD:
+  case ARM::LDMDA_UPD:
+  case ARM::LDMDB_UPD:
+  case ARM::LDMIB_UPD:
+  case ARM::STMIA:
+  case ARM::STMDA:
+  case ARM::STMDB:
+  case ARM::STMIB:
+  case ARM::STMIA_UPD:
+  case ARM::STMDA_UPD:
+  case ARM::STMDB_UPD:
+  case ARM::STMIB_UPD:
+  case ARM::tLDMIA:
+  case ARM::tLDMIA_UPD:
+  case ARM::tSTMIA:
+  case ARM::tSTMIA_UPD:
+  case ARM::tPOP_RET:
+  case ARM::tPOP:
+  case ARM::tPUSH:
+  case ARM::t2LDMIA_RET:
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB:
+  case ARM::t2LDMIA_UPD:
+  case ARM::t2LDMDB_UPD:
+  case ARM::t2STMIA:
+  case ARM::t2STMDB:
+  case ARM::t2STMIA_UPD:
+  case ARM::t2STMDB_UPD: {
+    unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
+    if (Subtarget.isCortexA8()) {
+      if (NumRegs < 4)
+        return 2;
+      // 4 registers would be issued: 2, 2.
+      // 5 registers would be issued: 2, 2, 1.
+      UOps = (NumRegs / 2);
+      if (NumRegs % 2)
+        ++UOps;
+      return UOps;
+    } else if (Subtarget.isCortexA9()) {
+      UOps = (NumRegs / 2);
+      // If there are odd number of registers or if it's not 64-bit aligned,
+      // then it takes an extra AGU (Address Generation Unit) cycle.
+      if ((NumRegs % 2) ||
+          !MI->hasOneMemOperand() ||
+          (*MI->memoperands_begin())->getAlignment() < 8)
+        ++UOps;
+      return UOps;
+    } else {
+      // Assume the worst.
+      return NumRegs;
+    }
+  }
+  }
+}
+
+int
+ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
+                                  const TargetInstrDesc &DefTID,
+                                  unsigned DefClass,
+                                  unsigned DefIdx, unsigned DefAlign) const {
+  int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1;
+  if (RegNo <= 0)
+    // Def is the address writeback.
+    return ItinData->getOperandCycle(DefClass, DefIdx);
+
+  int DefCycle;
+  if (Subtarget.isCortexA8()) {
+    // (regno / 2) + (regno % 2) + 1
+    DefCycle = RegNo / 2 + 1;
+    if (RegNo % 2)
+      ++DefCycle;
+  } else if (Subtarget.isCortexA9()) {
+    DefCycle = RegNo;
+    bool isSLoad = false;
+
+    switch (DefTID.getOpcode()) {
+    default: break;
+    case ARM::VLDMSIA:
+    case ARM::VLDMSDB:
+    case ARM::VLDMSIA_UPD:
+    case ARM::VLDMSDB_UPD:
+      isSLoad = true;
+      break;
+    }
+
+    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+    // then it takes an extra cycle.
+    if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
+      ++DefCycle;
+  } else {
+    // Assume the worst.
+    DefCycle = RegNo + 2;
+  }
+
+  return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
+                                 const TargetInstrDesc &DefTID,
+                                 unsigned DefClass,
+                                 unsigned DefIdx, unsigned DefAlign) const {
+  int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1;
+  if (RegNo <= 0)
+    // Def is the address writeback.
+    return ItinData->getOperandCycle(DefClass, DefIdx);
+
+  int DefCycle;
+  if (Subtarget.isCortexA8()) {
+    // 4 registers would be issued: 1, 2, 1.
+    // 5 registers would be issued: 1, 2, 2.
+    DefCycle = RegNo / 2;
+    if (DefCycle < 1)
+      DefCycle = 1;
+    // Result latency is issue cycle + 2: E2.
+    DefCycle += 2;
+  } else if (Subtarget.isCortexA9()) {
+    DefCycle = (RegNo / 2);
+    // If there are odd number of registers or if it's not 64-bit aligned,
+    // then it takes an extra AGU (Address Generation Unit) cycle.
+    if ((RegNo % 2) || DefAlign < 8)
+      ++DefCycle;
+    // Result latency is AGU cycles + 2.
+    DefCycle += 2;
+  } else {
+    // Assume the worst.
+    DefCycle = RegNo + 2;
+  }
+
+  return DefCycle;
+}
+
+int
+ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
+                                  const TargetInstrDesc &UseTID,
+                                  unsigned UseClass,
+                                  unsigned UseIdx, unsigned UseAlign) const {
+  int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1;
+  if (RegNo <= 0)
+    return ItinData->getOperandCycle(UseClass, UseIdx);
+
+  int UseCycle;
+  if (Subtarget.isCortexA8()) {
+    // (regno / 2) + (regno % 2) + 1
+    UseCycle = RegNo / 2 + 1;
+    if (RegNo % 2)
+      ++UseCycle;
+  } else if (Subtarget.isCortexA9()) {
+    UseCycle = RegNo;
+    bool isSStore = false;
+
+    switch (UseTID.getOpcode()) {
+    default: break;
+    case ARM::VSTMSIA:
+    case ARM::VSTMSDB:
+    case ARM::VSTMSIA_UPD:
+    case ARM::VSTMSDB_UPD:
+      isSStore = true;
+      break;
+    }
+
+    // If there are odd number of 'S' registers or if it's not 64-bit aligned,
+    // then it takes an extra cycle.
+    if ((isSStore && (RegNo % 2)) || UseAlign < 8)
+      ++UseCycle;
+  } else {
+    // Assume the worst.
+    UseCycle = RegNo + 2;
+  }
+
+  return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
+                                 const TargetInstrDesc &UseTID,
+                                 unsigned UseClass,
+                                 unsigned UseIdx, unsigned UseAlign) const {
+  int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1;
+  if (RegNo <= 0)
+    return ItinData->getOperandCycle(UseClass, UseIdx);
+
+  int UseCycle;
+  if (Subtarget.isCortexA8()) {
+    UseCycle = RegNo / 2;
+    if (UseCycle < 2)
+      UseCycle = 2;
+    // Read in E3.
+    UseCycle += 2;
+  } else if (Subtarget.isCortexA9()) {
+    UseCycle = (RegNo / 2);
+    // If there are odd number of registers or if it's not 64-bit aligned,
+    // then it takes an extra AGU (Address Generation Unit) cycle.
+    if ((RegNo % 2) || UseAlign < 8)
+      ++UseCycle;
+  } else {
+    // Assume the worst.
+    UseCycle = 1;
+  }
+  return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                                    const TargetInstrDesc &DefTID,
+                                    unsigned DefIdx, unsigned DefAlign,
+                                    const TargetInstrDesc &UseTID,
+                                    unsigned UseIdx, unsigned UseAlign) const {
+  unsigned DefClass = DefTID.getSchedClass();
+  unsigned UseClass = UseTID.getSchedClass();
+
+  if (DefIdx < DefTID.getNumDefs() && UseIdx < UseTID.getNumOperands())
+    return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+
+  // This may be a def / use of a variable_ops instruction, the operand
+  // latency might be determinable dynamically. Let the target try to
+  // figure it out.
+  int DefCycle = -1;
+  bool LdmBypass = false;
+  switch (DefTID.getOpcode()) {
+  default:
+    DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+    break;
+
+  case ARM::VLDMDIA:
+  case ARM::VLDMDDB:
+  case ARM::VLDMDIA_UPD:
+  case ARM::VLDMDDB_UPD:
+  case ARM::VLDMSIA:
+  case ARM::VLDMSDB:
+  case ARM::VLDMSIA_UPD:
+  case ARM::VLDMSDB_UPD:
+    DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
+    break;
+
+  case ARM::LDMIA_RET:
+  case ARM::LDMIA:
+  case ARM::LDMDA:
+  case ARM::LDMDB:
+  case ARM::LDMIB:
+  case ARM::LDMIA_UPD:
+  case ARM::LDMDA_UPD:
+  case ARM::LDMDB_UPD:
+  case ARM::LDMIB_UPD:
+  case ARM::tLDMIA:
+  case ARM::tLDMIA_UPD:
+  case ARM::tPUSH:
+  case ARM::t2LDMIA_RET:
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB:
+  case ARM::t2LDMIA_UPD:
+  case ARM::t2LDMDB_UPD:
+    LdmBypass = 1;
+    DefCycle = getLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
+    break;
+  }
+
+  if (DefCycle == -1)
+    // We can't seem to determine the result latency of the def, assume it's 2.
+    DefCycle = 2;
+
+  int UseCycle = -1;
+  switch (UseTID.getOpcode()) {
+  default:
+    UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
+    break;
+
+  case ARM::VSTMDIA:
+  case ARM::VSTMDDB:
+  case ARM::VSTMDIA_UPD:
+  case ARM::VSTMDDB_UPD:
+  case ARM::VSTMSIA:
+  case ARM::VSTMSDB:
+  case ARM::VSTMSIA_UPD:
+  case ARM::VSTMSDB_UPD:
+    UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
+    break;
+
+  case ARM::STMIA:
+  case ARM::STMDA:
+  case ARM::STMDB:
+  case ARM::STMIB:
+  case ARM::STMIA_UPD:
+  case ARM::STMDA_UPD:
+  case ARM::STMDB_UPD:
+  case ARM::STMIB_UPD:
+  case ARM::tSTMIA:
+  case ARM::tSTMIA_UPD:
+  case ARM::tPOP_RET:
+  case ARM::tPOP:
+  case ARM::t2STMIA:
+  case ARM::t2STMDB:
+  case ARM::t2STMIA_UPD:
+  case ARM::t2STMDB_UPD:
+    UseCycle = getSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
+    break;
+  }
+
+  if (UseCycle == -1)
+    // Assume it's read in the first stage.
+    UseCycle = 1;
+
+  UseCycle = DefCycle - UseCycle + 1;
+  if (UseCycle > 0) {
+    if (LdmBypass) {
+      // It's a variable_ops instruction so we can't use DefIdx here. Just use
+      // first def operand.
+      if (ItinData->hasPipelineForwarding(DefClass, DefTID.getNumOperands()-1,
+                                          UseClass, UseIdx))
+        --UseCycle;
+    } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
+                                               UseClass, UseIdx)) {
+      --UseCycle;
+    }
+  }
+
+  return UseCycle;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                             const MachineInstr *DefMI, unsigned DefIdx,
+                             const MachineInstr *UseMI, unsigned UseIdx) const {
+  if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
+      DefMI->isRegSequence() || DefMI->isImplicitDef())
+    return 1;
+
+  const TargetInstrDesc &DefTID = DefMI->getDesc();
+  if (!ItinData || ItinData->isEmpty())
+    return DefTID.mayLoad() ? 3 : 1;
+
+  const TargetInstrDesc &UseTID = UseMI->getDesc();
+  const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
+  if (DefMO.getReg() == ARM::CPSR) {
+    if (DefMI->getOpcode() == ARM::FMSTAT) {
+      // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
+      return Subtarget.isCortexA9() ? 1 : 20;
+    }
+
+    // CPSR set and branch can be paired in the same cycle.
+    if (UseTID.isBranch())
+      return 0;
+  }
+
+  unsigned DefAlign = DefMI->hasOneMemOperand()
+    ? (*DefMI->memoperands_begin())->getAlignment() : 0;
+  unsigned UseAlign = UseMI->hasOneMemOperand()
+    ? (*UseMI->memoperands_begin())->getAlignment() : 0;
+  int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
+                                  UseTID, UseIdx, UseAlign);
+
+  if (Latency > 1 &&
+      (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+    // variants are one cycle cheaper.
+    switch (DefTID.getOpcode()) {
+    default: break;
+    case ARM::LDRrs:
+    case ARM::LDRBrs: {
+      unsigned ShOpVal = DefMI->getOperand(3).getImm();
+      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+      if (ShImm == 0 ||
+          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+        --Latency;
+      break;
+    }
+    case ARM::t2LDRs:
+    case ARM::t2LDRBs:
+    case ARM::t2LDRHs:
+    case ARM::t2LDRSHs: {
+      // Thumb2 mode: lsl only.
+      unsigned ShAmt = DefMI->getOperand(3).getImm();
+      if (ShAmt == 0 || ShAmt == 2)
+        --Latency;
+      break;
+    }
+    }
+  }
+
+  return Latency;
+}
+
+int
+ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                                    SDNode *DefNode, unsigned DefIdx,
+                                    SDNode *UseNode, unsigned UseIdx) const {
+  if (!DefNode->isMachineOpcode())
+    return 1;
+
+  const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode());
+
+  if (isZeroCost(DefTID.Opcode))
+    return 0;
+
+  if (!ItinData || ItinData->isEmpty())
+    return DefTID.mayLoad() ? 3 : 1;
+
+  if (!UseNode->isMachineOpcode()) {
+    int Latency = ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx);
+    if (Subtarget.isCortexA9())
+      return Latency <= 2 ? 1 : Latency - 1;
+    else
+      return Latency <= 3 ? 1 : Latency - 2;
+  }
+
+  const TargetInstrDesc &UseTID = get(UseNode->getMachineOpcode());
+  const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
+  unsigned DefAlign = !DefMN->memoperands_empty()
+    ? (*DefMN->memoperands_begin())->getAlignment() : 0;
+  const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
+  unsigned UseAlign = !UseMN->memoperands_empty()
+    ? (*UseMN->memoperands_begin())->getAlignment() : 0;
+  int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
+                                  UseTID, UseIdx, UseAlign);
+
+  if (Latency > 1 &&
+      (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
+    // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
+    // variants are one cycle cheaper.
+    switch (DefTID.getOpcode()) {
+    default: break;
+    case ARM::LDRrs:
+    case ARM::LDRBrs: {
+      unsigned ShOpVal =
+        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+      unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
+      if (ShImm == 0 ||
+          (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
+        --Latency;
+      break;
+    }
+    case ARM::t2LDRs:
+    case ARM::t2LDRBs:
+    case ARM::t2LDRHs:
+    case ARM::t2LDRSHs: {
+      // Thumb2 mode: lsl only.
+      unsigned ShAmt =
+        cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
+      if (ShAmt == 0 || ShAmt == 2)
+        --Latency;
+      break;
+    }
+    }
+  }
+
+  return Latency;
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                      const MachineInstr *MI,
+                                      unsigned *PredCost) const {
+  if (MI->isCopyLike() || MI->isInsertSubreg() ||
+      MI->isRegSequence() || MI->isImplicitDef())
+    return 1;
+
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned Class = TID.getSchedClass();
+  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+  if (PredCost && TID.hasImplicitDefOfPhysReg(ARM::CPSR))
+    // When predicated, CPSR is an additional source operand for CPSR updating
+    // instructions, this apparently increases their latencies.
+    *PredCost = 1;
+  if (UOps)
+    return ItinData->getStageLatency(Class);
+  return getNumMicroOps(ItinData, MI);
+}
+
+int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                      SDNode *Node) const {
+  if (!Node->isMachineOpcode())
+    return 1;
+
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  unsigned Opcode = Node->getMachineOpcode();
+  switch (Opcode) {
+  default:
+    return ItinData->getStageLatency(get(Opcode).getSchedClass());
+  case ARM::VLDMQIA:
+  case ARM::VLDMQDB:
+  case ARM::VSTMQIA:
+  case ARM::VSTMQDB:
+    return 2;
+  }
+}
+
+bool ARMBaseInstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+                      const MachineRegisterInfo *MRI,
+                      const MachineInstr *DefMI, unsigned DefIdx,
+                      const MachineInstr *UseMI, unsigned UseIdx) const {
+  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+  unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
+  if (Subtarget.isCortexA8() &&
+      (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
+    // CortexA8 VFP instructions are not pipelined.
+    return true;
+
+  // Hoist VFP / NEON instructions with 4 or higher latency.
+  int Latency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
+  if (Latency <= 3)
+    return false;
+  return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
+         UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
+}
+
+bool ARMBaseInstrInfo::
+hasLowDefLatency(const InstrItineraryData *ItinData,
+                 const MachineInstr *DefMI, unsigned DefIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return false;
+
+  unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
+  if (DDomain == ARMII::DomainGeneral) {
+    unsigned DefClass = DefMI->getDesc().getSchedClass();
+    int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+    return (DefCycle != -1 && DefCycle <= 2);
+  }
+  return false;
+}
+
+bool
+ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+                                     unsigned &AddSubOpc,
+                                     bool &NegAcc, bool &HasLane) const {
+  DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
+  if (I == MLxEntryMap.end())
+    return false;
+
+  const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
+  MulOpc = Entry.MulOpc;
+  AddSubOpc = Entry.AddSubOpc;
+  NegAcc = Entry.NegAcc;
+  HasLane = Entry.HasLane;
+  return true;
+}
diff --git a/final/lib/Target/ARM/ARMBaseInstrInfo.h b/final/lib/Target/ARM/ARMBaseInstrInfo.h
new file mode 100644
index 00000000000..228b6cd9141
--- /dev/null
+++ b/final/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -0,0 +1,528 @@
+//===- ARMBaseInstrInfo.h - ARM Base Instruction Information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Base ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEINSTRUCTIONINFO_H
+#define ARMBASEINSTRUCTIONINFO_H
+
+#include "ARM.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+
+namespace llvm {
+  class ARMSubtarget;
+  class ARMBaseRegisterInfo;
+
+/// ARMII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace ARMII {
+  enum {
+    //===------------------------------------------------------------------===//
+    // Instruction Flags.
+
+    //===------------------------------------------------------------------===//
+    // This four-bit field describes the addressing mode used.
+
+    AddrModeMask  = 0x1f,
+    AddrModeNone    = 0,
+    AddrMode1       = 1,
+    AddrMode2       = 2,
+    AddrMode3       = 3,
+    AddrMode4       = 4,
+    AddrMode5       = 5,
+    AddrMode6       = 6,
+    AddrModeT1_1    = 7,
+    AddrModeT1_2    = 8,
+    AddrModeT1_4    = 9,
+    AddrModeT1_s    = 10, // i8 * 4 for pc and sp relative data
+    AddrModeT2_i12  = 11,
+    AddrModeT2_i8   = 12,
+    AddrModeT2_so   = 13,
+    AddrModeT2_pc   = 14, // +/- i12 for pc relative data
+    AddrModeT2_i8s4 = 15, // i8 * 4
+    AddrMode_i12    = 16,
+
+    // Size* - Flags to keep track of the size of an instruction.
+    SizeShift     = 5,
+    SizeMask      = 7 << SizeShift,
+    SizeSpecial   = 1,   // 0 byte pseudo or special case.
+    Size8Bytes    = 2,
+    Size4Bytes    = 3,
+    Size2Bytes    = 4,
+
+    // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
+    // and store ops only.  Generic "updating" flag is used for ld/st multiple.
+    IndexModeShift = 8,
+    IndexModeMask  = 3 << IndexModeShift,
+    IndexModePre   = 1,
+    IndexModePost  = 2,
+    IndexModeUpd   = 3,
+
+    //===------------------------------------------------------------------===//
+    // Instruction encoding formats.
+    //
+    FormShift     = 10,
+    FormMask      = 0x3f << FormShift,
+
+    // Pseudo instructions
+    Pseudo        = 0  << FormShift,
+
+    // Multiply instructions
+    MulFrm        = 1  << FormShift,
+
+    // Branch instructions
+    BrFrm         = 2  << FormShift,
+    BrMiscFrm     = 3  << FormShift,
+
+    // Data Processing instructions
+    DPFrm         = 4  << FormShift,
+    DPSoRegFrm    = 5  << FormShift,
+
+    // Load and Store
+    LdFrm         = 6  << FormShift,
+    StFrm         = 7  << FormShift,
+    LdMiscFrm     = 8  << FormShift,
+    StMiscFrm     = 9  << FormShift,
+    LdStMulFrm    = 10 << FormShift,
+
+    LdStExFrm     = 11 << FormShift,
+
+    // Miscellaneous arithmetic instructions
+    ArithMiscFrm  = 12 << FormShift,
+    SatFrm        = 13 << FormShift,
+
+    // Extend instructions
+    ExtFrm        = 14 << FormShift,
+
+    // VFP formats
+    VFPUnaryFrm   = 15 << FormShift,
+    VFPBinaryFrm  = 16 << FormShift,
+    VFPConv1Frm   = 17 << FormShift,
+    VFPConv2Frm   = 18 << FormShift,
+    VFPConv3Frm   = 19 << FormShift,
+    VFPConv4Frm   = 20 << FormShift,
+    VFPConv5Frm   = 21 << FormShift,
+    VFPLdStFrm    = 22 << FormShift,
+    VFPLdStMulFrm = 23 << FormShift,
+    VFPMiscFrm    = 24 << FormShift,
+
+    // Thumb format
+    ThumbFrm      = 25 << FormShift,
+
+    // Miscelleaneous format
+    MiscFrm       = 26 << FormShift,
+
+    // NEON formats
+    NGetLnFrm     = 27 << FormShift,
+    NSetLnFrm     = 28 << FormShift,
+    NDupFrm       = 29 << FormShift,
+    NLdStFrm      = 30 << FormShift,
+    N1RegModImmFrm= 31 << FormShift,
+    N2RegFrm      = 32 << FormShift,
+    NVCVTFrm      = 33 << FormShift,
+    NVDupLnFrm    = 34 << FormShift,
+    N2RegVShLFrm  = 35 << FormShift,
+    N2RegVShRFrm  = 36 << FormShift,
+    N3RegFrm      = 37 << FormShift,
+    N3RegVShFrm   = 38 << FormShift,
+    NVExtFrm      = 39 << FormShift,
+    NVMulSLFrm    = 40 << FormShift,
+    NVTBLFrm      = 41 << FormShift,
+
+    //===------------------------------------------------------------------===//
+    // Misc flags.
+
+    // UnaryDP - Indicates this is a unary data processing instruction, i.e.
+    // it doesn't have a Rn operand.
+    UnaryDP       = 1 << 16,
+
+    // Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+    // a 16-bit Thumb instruction if certain conditions are met.
+    Xform16Bit    = 1 << 17,
+
+    //===------------------------------------------------------------------===//
+    // Code domain.
+    DomainShift   = 18,
+    DomainMask    = 7 << DomainShift,
+    DomainGeneral = 0 << DomainShift,
+    DomainVFP     = 1 << DomainShift,
+    DomainNEON    = 2 << DomainShift,
+    DomainNEONA8  = 4 << DomainShift,
+
+    //===------------------------------------------------------------------===//
+    // Field shifts - such shifts are used to set field while generating
+    // machine instructions.
+    //
+    // FIXME: This list will need adjusting/fixing as the MC code emitter
+    // takes shape and the ARMCodeEmitter.cpp bits go away.
+    ShiftTypeShift = 4,
+
+    M_BitShift     = 5,
+    ShiftImmShift  = 5,
+    ShiftShift     = 7,
+    N_BitShift     = 7,
+    ImmHiShift     = 8,
+    SoRotImmShift  = 8,
+    RegRsShift     = 8,
+    ExtRotImmShift = 10,
+    RegRdLoShift   = 12,
+    RegRdShift     = 12,
+    RegRdHiShift   = 16,
+    RegRnShift     = 16,
+    S_BitShift     = 20,
+    W_BitShift     = 21,
+    AM3_I_BitShift = 22,
+    D_BitShift     = 22,
+    U_BitShift     = 23,
+    P_BitShift     = 24,
+    I_BitShift     = 25,
+    CondShift      = 28
+  };
+}
+
+class ARMBaseInstrInfo : public TargetInstrInfoImpl {
+  const ARMSubtarget &Subtarget;
+
+protected:
+  // Can be only subclassed.
+  explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
+
+public:
+  // Return the non-pre/post incrementing version of 'Opc'. Return 0
+  // if there is not such an opcode.
+  virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
+
+  virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+                                              MachineBasicBlock::iterator &MBBI,
+                                              LiveVariables *LV) const;
+
+  virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
+  const ARMSubtarget &getSubtarget() const { return Subtarget; }
+
+  ScheduleHazardRecognizer *
+  CreateTargetHazardRecognizer(const TargetMachine *TM,
+                               const ScheduleDAG *DAG) const;
+
+  ScheduleHazardRecognizer *
+  CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+                                     const ScheduleDAG *DAG) const;
+
+  // Branch analysis.
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify = false) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+
+  virtual
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  // Predication support.
+  bool isPredicated(const MachineInstr *MI) const {
+    int PIdx = MI->findFirstPredOperandIdx();
+    return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
+  }
+
+  ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
+    int PIdx = MI->findFirstPredOperandIdx();
+    return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm()
+                      : ARMCC::AL;
+  }
+
+  virtual
+  bool PredicateInstruction(MachineInstr *MI,
+                            const SmallVectorImpl<MachineOperand> &Pred) const;
+
+  virtual
+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                         const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+  virtual bool DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const;
+
+  virtual bool isPredicable(MachineInstr *MI) const;
+
+  /// GetInstSize - Returns the size of the specified MachineInstr.
+  ///
+  virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
+
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx,
+                                                 uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc DL) const;
+
+  virtual void reMaterialize(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator MI,
+                             unsigned DestReg, unsigned SubIdx,
+                             const MachineInstr *Orig,
+                             const TargetRegisterInfo &TRI) const;
+
+  MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
+
+  virtual bool produceSameValue(const MachineInstr *MI0,
+                                const MachineInstr *MI1,
+                                const MachineRegisterInfo *MRI) const;
+
+  /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+  /// determine if two loads are loading from the same base address. It should
+  /// only return true if the base pointers are the same and the only
+  /// differences between the two addresses is the offset. It also returns the
+  /// offsets by reference.
+  virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+                                       int64_t &Offset1, int64_t &Offset2)const;
+
+  /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+  /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+  /// be scheduled togther. On some targets if two loads are loading from
+  /// addresses in the same cache line, it's better if they are scheduled
+  /// together. This function takes two integers that represent the load offsets
+  /// from the common base address. It returns true if it decides it's desirable
+  /// to schedule the two loads together. "NumLoads" is the number of loads that
+  /// have already been scheduled after Load1.
+  virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+                                       int64_t Offset1, int64_t Offset2,
+                                       unsigned NumLoads) const;
+
+  virtual bool isSchedulingBoundary(const MachineInstr *MI,
+                                    const MachineBasicBlock *MBB,
+                                    const MachineFunction &MF) const;
+
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+                                   unsigned NumCyles, unsigned ExtraPredCycles,
+                                   float Prob, float Confidence) const;
+
+  virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
+                                   unsigned NumT, unsigned ExtraT,
+                                   MachineBasicBlock &FMBB,
+                                   unsigned NumF, unsigned ExtraF,
+                                   float Probability, float Confidence) const;
+
+  virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+                                         unsigned NumCyles,
+                                         float Probability,
+                                         float Confidence) const {
+    return NumCyles == 1;
+  }
+
+  /// AnalyzeCompare - For a comparison instruction, return the source register
+  /// in SrcReg and the value it compares against in CmpValue. Return true if
+  /// the comparison instruction can be analyzed.
+  virtual bool AnalyzeCompare(const MachineInstr *MI, unsigned &SrcReg,
+                              int &CmpMask, int &CmpValue) const;
+
+  /// OptimizeCompareInstr - Convert the instruction to set the zero flag so
+  /// that we can remove a "comparison with zero".
+  virtual bool OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg,
+                                    int CmpMask, int CmpValue,
+                                    const MachineRegisterInfo *MRI) const;
+
+  /// FoldImmediate - 'Reg' is known to be defined by a move immediate
+  /// instruction, try to fold the immediate into the use instruction.
+  virtual bool FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI,
+                             unsigned Reg, MachineRegisterInfo *MRI) const;
+
+  virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
+                                  const MachineInstr *MI) const;
+
+  virtual
+  int getOperandLatency(const InstrItineraryData *ItinData,
+                        const MachineInstr *DefMI, unsigned DefIdx,
+                        const MachineInstr *UseMI, unsigned UseIdx) const;
+  virtual
+  int getOperandLatency(const InstrItineraryData *ItinData,
+                        SDNode *DefNode, unsigned DefIdx,
+                        SDNode *UseNode, unsigned UseIdx) const;
+private:
+  int getVLDMDefCycle(const InstrItineraryData *ItinData,
+                      const TargetInstrDesc &DefTID,
+                      unsigned DefClass,
+                      unsigned DefIdx, unsigned DefAlign) const;
+  int getLDMDefCycle(const InstrItineraryData *ItinData,
+                     const TargetInstrDesc &DefTID,
+                     unsigned DefClass,
+                     unsigned DefIdx, unsigned DefAlign) const;
+  int getVSTMUseCycle(const InstrItineraryData *ItinData,
+                      const TargetInstrDesc &UseTID,
+                      unsigned UseClass,
+                      unsigned UseIdx, unsigned UseAlign) const;
+  int getSTMUseCycle(const InstrItineraryData *ItinData,
+                     const TargetInstrDesc &UseTID,
+                     unsigned UseClass,
+                     unsigned UseIdx, unsigned UseAlign) const;
+  int getOperandLatency(const InstrItineraryData *ItinData,
+                        const TargetInstrDesc &DefTID,
+                        unsigned DefIdx, unsigned DefAlign,
+                        const TargetInstrDesc &UseTID,
+                        unsigned UseIdx, unsigned UseAlign) const;
+
+  int getInstrLatency(const InstrItineraryData *ItinData,
+                      const MachineInstr *MI, unsigned *PredCost = 0) const;
+
+  int getInstrLatency(const InstrItineraryData *ItinData,
+                      SDNode *Node) const;
+
+  bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+                             const MachineRegisterInfo *MRI,
+                             const MachineInstr *DefMI, unsigned DefIdx,
+                             const MachineInstr *UseMI, unsigned UseIdx) const;
+  bool hasLowDefLatency(const InstrItineraryData *ItinData,
+                        const MachineInstr *DefMI, unsigned DefIdx) const;
+
+private:
+  /// Modeling special VFP / NEON fp MLA / MLS hazards.
+
+  /// MLxEntryMap - Map fp MLA / MLS to the corresponding entry in the internal
+  /// MLx table.
+  DenseMap<unsigned, unsigned> MLxEntryMap;
+
+  /// MLxHazardOpcodes - Set of add / sub and multiply opcodes that would cause
+  /// stalls when scheduled together with fp MLA / MLS opcodes.
+  SmallSet<unsigned, 16> MLxHazardOpcodes;
+
+public:
+  /// isFpMLxInstruction - Return true if the specified opcode is a fp MLA / MLS
+  /// instruction.
+  bool isFpMLxInstruction(unsigned Opcode) const {
+    return MLxEntryMap.count(Opcode);
+  }
+
+  /// isFpMLxInstruction - This version also returns the multiply opcode and the
+  /// addition / subtraction opcode to expand to. Return true for 'HasLane' for
+  /// the MLX instructions with an extra lane operand.
+  bool isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
+                          unsigned &AddSubOpc, bool &NegAcc,
+                          bool &HasLane) const;
+
+  /// canCauseFpMLxStall - Return true if an instruction of the specified opcode
+  /// will cause stalls when scheduled after (within 4-cycle window) a fp
+  /// MLA / MLS instruction.
+  bool canCauseFpMLxStall(unsigned Opcode) const {
+    return MLxHazardOpcodes.count(Opcode);
+  }
+};
+
+static inline
+const MachineInstrBuilder &AddDefaultPred(const MachineInstrBuilder &MIB) {
+  return MIB.addImm((int64_t)ARMCC::AL).addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
+  return MIB.addReg(0);
+}
+
+static inline
+const MachineInstrBuilder &AddDefaultT1CC(const MachineInstrBuilder &MIB,
+                                          bool isDead = false) {
+  return MIB.addReg(ARM::CPSR, getDefRegState(true) | getDeadRegState(isDead));
+}
+
+static inline
+const MachineInstrBuilder &AddNoT1CC(const MachineInstrBuilder &MIB) {
+  return MIB.addReg(0);
+}
+
+static inline
+bool isUncondBranchOpcode(int Opc) {
+  return Opc == ARM::B || Opc == ARM::tB || Opc == ARM::t2B;
+}
+
+static inline
+bool isCondBranchOpcode(int Opc) {
+  return Opc == ARM::Bcc || Opc == ARM::tBcc || Opc == ARM::t2Bcc;
+}
+
+static inline
+bool isJumpTableBranchOpcode(int Opc) {
+  return Opc == ARM::BR_JTr || Opc == ARM::BR_JTm || Opc == ARM::BR_JTadd ||
+    Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT;
+}
+
+static inline
+bool isIndirectBranchOpcode(int Opc) {
+  return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND;
+}
+
+/// getInstrPredicate - If instruction is predicated, returns its predicate
+/// condition, otherwise returns AL. It also returns the condition code
+/// register by reference.
+ARMCC::CondCodes getInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+int getMatchingCondBranchOpcode(int Opc);
+
+/// emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of
+/// instructions to materializea destreg = basereg + immediate in ARM / Thumb2
+/// code.
+void emitARMRegPlusImmediate(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+                             unsigned DestReg, unsigned BaseReg, int NumBytes,
+                             ARMCC::CondCodes Pred, unsigned PredReg,
+                             const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
+
+void emitT2RegPlusImmediate(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+                            unsigned DestReg, unsigned BaseReg, int NumBytes,
+                            ARMCC::CondCodes Pred, unsigned PredReg,
+                            const ARMBaseInstrInfo &TII, unsigned MIFlags = 0);
+void emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+                               unsigned DestReg, unsigned BaseReg,
+                               int NumBytes, const TargetInstrInfo &TII,
+                               const ARMBaseRegisterInfo& MRI,
+                               unsigned MIFlags = 0);
+
+
+/// rewriteARMFrameIndex / rewriteT2FrameIndex -
+/// Rewrite MI to access 'Offset' bytes from the FP. Return false if the
+/// offset could not be handled directly in MI, and return the left-over
+/// portion by reference.
+bool rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                          unsigned FrameReg, int &Offset,
+                          const ARMBaseInstrInfo &TII);
+
+bool rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                         unsigned FrameReg, int &Offset,
+                         const ARMBaseInstrInfo &TII);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/final/lib/Target/ARM/ARMBaseRegisterInfo.cpp
new file mode 100644
index 00000000000..9d7be660109
--- /dev/null
+++ b/final/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -0,0 +1,1248 @@
+//===- ARMBaseRegisterInfo.cpp - ARM Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the base ARM implementation of TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ForceAllBaseRegAlloc("arm-force-base-reg-alloc", cl::Hidden, cl::init(false),
+          cl::desc("Force use of virtual base registers for stack load/store"));
+static cl::opt<bool>
+EnableLocalStackAlloc("enable-local-stack-alloc", cl::init(true), cl::Hidden,
+          cl::desc("Enable pre-regalloc stack frame index allocation"));
+static cl::opt<bool>
+EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
+          cl::desc("Enable use of a base pointer for complex stack frames"));
+
+ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
+                                         const ARMSubtarget &sti)
+  : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
+    TII(tii), STI(sti),
+    FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
+    BasePtr(ARM::R6) {
+}
+
+const unsigned*
+ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  static const unsigned CalleeSavedRegs[] = {
+    ARM::LR, ARM::R11, ARM::R10, ARM::R9, ARM::R8,
+    ARM::R7, ARM::R6,  ARM::R5,  ARM::R4,
+
+    ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+    ARM::D11, ARM::D10, ARM::D9,  ARM::D8,
+    0
+  };
+
+  static const unsigned DarwinCalleeSavedRegs[] = {
+    // Darwin ABI deviates from ARM standard ABI. R9 is not a callee-saved
+    // register.
+    ARM::LR,  ARM::R7,  ARM::R6, ARM::R5, ARM::R4,
+    ARM::R11, ARM::R10, ARM::R8,
+
+    ARM::D15, ARM::D14, ARM::D13, ARM::D12,
+    ARM::D11, ARM::D10, ARM::D9,  ARM::D8,
+    0
+  };
+  return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
+}
+
+BitVector ARMBaseRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  // FIXME: avoid re-calculating this everytime.
+  BitVector Reserved(getNumRegs());
+  Reserved.set(ARM::SP);
+  Reserved.set(ARM::PC);
+  Reserved.set(ARM::FPSCR);
+  if (TFI->hasFP(MF))
+    Reserved.set(FramePtr);
+  if (hasBasePointer(MF))
+    Reserved.set(BasePtr);
+  // Some targets reserve R9.
+  if (STI.isR9Reserved())
+    Reserved.set(ARM::R9);
+  return Reserved;
+}
+
+bool ARMBaseRegisterInfo::isReservedReg(const MachineFunction &MF,
+                                        unsigned Reg) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  switch (Reg) {
+  default: break;
+  case ARM::SP:
+  case ARM::PC:
+    return true;
+  case ARM::R6:
+    if (hasBasePointer(MF))
+      return true;
+    break;
+  case ARM::R7:
+  case ARM::R11:
+    if (FramePtr == Reg && TFI->hasFP(MF))
+      return true;
+    break;
+  case ARM::R9:
+    return STI.isR9Reserved();
+  }
+
+  return false;
+}
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+                                              const TargetRegisterClass *B,
+                                              unsigned SubIdx) const {
+  switch (SubIdx) {
+  default: return 0;
+  case ARM::ssub_0:
+  case ARM::ssub_1:
+  case ARM::ssub_2:
+  case ARM::ssub_3: {
+    // S sub-registers.
+    if (A->getSize() == 8) {
+      if (B == &ARM::SPR_8RegClass)
+        return &ARM::DPR_8RegClass;
+      assert(B == &ARM::SPRRegClass && "Expecting SPR register class!");
+      if (A == &ARM::DPR_8RegClass)
+        return A;
+      return &ARM::DPR_VFP2RegClass;
+    }
+
+    if (A->getSize() == 16) {
+      if (B == &ARM::SPR_8RegClass)
+        return &ARM::QPR_8RegClass;
+      return &ARM::QPR_VFP2RegClass;
+    }
+
+    if (A->getSize() == 32) {
+      if (B == &ARM::SPR_8RegClass)
+        return 0;  // Do not allow coalescing!
+      return &ARM::QQPR_VFP2RegClass;
+    }
+
+    assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+    return 0;  // Do not allow coalescing!
+  }
+  case ARM::dsub_0:
+  case ARM::dsub_1:
+  case ARM::dsub_2:
+  case ARM::dsub_3: {
+    // D sub-registers.
+    if (A->getSize() == 16) {
+      if (B == &ARM::DPR_VFP2RegClass)
+        return &ARM::QPR_VFP2RegClass;
+      if (B == &ARM::DPR_8RegClass)
+        return 0;  // Do not allow coalescing!
+      return A;
+    }
+
+    if (A->getSize() == 32) {
+      if (B == &ARM::DPR_VFP2RegClass)
+        return &ARM::QQPR_VFP2RegClass;
+      if (B == &ARM::DPR_8RegClass)
+        return 0;  // Do not allow coalescing!
+      return A;
+    }
+
+    assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+    if (B != &ARM::DPRRegClass)
+      return 0;  // Do not allow coalescing!
+    return A;
+  }
+  case ARM::dsub_4:
+  case ARM::dsub_5:
+  case ARM::dsub_6:
+  case ARM::dsub_7: {
+    // D sub-registers of QQQQ registers.
+    if (A->getSize() == 64 && B == &ARM::DPRRegClass)
+      return A;
+    return 0;  // Do not allow coalescing!
+  }
+
+  case ARM::qsub_0:
+  case ARM::qsub_1: {
+    // Q sub-registers.
+    if (A->getSize() == 32) {
+      if (B == &ARM::QPR_VFP2RegClass)
+        return &ARM::QQPR_VFP2RegClass;
+      if (B == &ARM::QPR_8RegClass)
+        return 0;  // Do not allow coalescing!
+      return A;
+    }
+
+    assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+    if (B == &ARM::QPRRegClass)
+      return A;
+    return 0;  // Do not allow coalescing!
+  }
+  case ARM::qsub_2:
+  case ARM::qsub_3: {
+    // Q sub-registers of QQQQ registers.
+    if (A->getSize() == 64 && B == &ARM::QPRRegClass)
+      return A;
+    return 0;  // Do not allow coalescing!
+  }
+  }
+  return 0;
+}
+
+bool
+ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC,
+                                          SmallVectorImpl<unsigned> &SubIndices,
+                                          unsigned &NewSubIdx) const {
+
+  unsigned Size = RC->getSize() * 8;
+  if (Size < 6)
+    return 0;
+
+  NewSubIdx = 0;  // Whole register.
+  unsigned NumRegs = SubIndices.size();
+  if (NumRegs == 8) {
+    // 8 D registers -> 1 QQQQ register.
+    return (Size == 512 &&
+            SubIndices[0] == ARM::dsub_0 &&
+            SubIndices[1] == ARM::dsub_1 &&
+            SubIndices[2] == ARM::dsub_2 &&
+            SubIndices[3] == ARM::dsub_3 &&
+            SubIndices[4] == ARM::dsub_4 &&
+            SubIndices[5] == ARM::dsub_5 &&
+            SubIndices[6] == ARM::dsub_6 &&
+            SubIndices[7] == ARM::dsub_7);
+  } else if (NumRegs == 4) {
+    if (SubIndices[0] == ARM::qsub_0) {
+      // 4 Q registers -> 1 QQQQ register.
+      return (Size == 512 &&
+              SubIndices[1] == ARM::qsub_1 &&
+              SubIndices[2] == ARM::qsub_2 &&
+              SubIndices[3] == ARM::qsub_3);
+    } else if (SubIndices[0] == ARM::dsub_0) {
+      // 4 D registers -> 1 QQ register.
+      if (Size >= 256 &&
+          SubIndices[1] == ARM::dsub_1 &&
+          SubIndices[2] == ARM::dsub_2 &&
+          SubIndices[3] == ARM::dsub_3) {
+        if (Size == 512)
+          NewSubIdx = ARM::qqsub_0;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::dsub_4) {
+      // 4 D registers -> 1 QQ register (2nd).
+      if (Size == 512 &&
+          SubIndices[1] == ARM::dsub_5 &&
+          SubIndices[2] == ARM::dsub_6 &&
+          SubIndices[3] == ARM::dsub_7) {
+        NewSubIdx = ARM::qqsub_1;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::ssub_0) {
+      // 4 S registers -> 1 Q register.
+      if (Size >= 128 &&
+          SubIndices[1] == ARM::ssub_1 &&
+          SubIndices[2] == ARM::ssub_2 &&
+          SubIndices[3] == ARM::ssub_3) {
+        if (Size >= 256)
+          NewSubIdx = ARM::qsub_0;
+        return true;
+      }
+    }
+  } else if (NumRegs == 2) {
+    if (SubIndices[0] == ARM::qsub_0) {
+      // 2 Q registers -> 1 QQ register.
+      if (Size >= 256 && SubIndices[1] == ARM::qsub_1) {
+        if (Size == 512)
+          NewSubIdx = ARM::qqsub_0;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::qsub_2) {
+      // 2 Q registers -> 1 QQ register (2nd).
+      if (Size == 512 && SubIndices[1] == ARM::qsub_3) {
+        NewSubIdx = ARM::qqsub_1;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::dsub_0) {
+      // 2 D registers -> 1 Q register.
+      if (Size >= 128 && SubIndices[1] == ARM::dsub_1) {
+        if (Size >= 256)
+          NewSubIdx = ARM::qsub_0;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::dsub_2) {
+      // 2 D registers -> 1 Q register (2nd).
+      if (Size >= 256 && SubIndices[1] == ARM::dsub_3) {
+        NewSubIdx = ARM::qsub_1;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::dsub_4) {
+      // 2 D registers -> 1 Q register (3rd).
+      if (Size == 512 && SubIndices[1] == ARM::dsub_5) {
+        NewSubIdx = ARM::qsub_2;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::dsub_6) {
+      // 2 D registers -> 1 Q register (3rd).
+      if (Size == 512 && SubIndices[1] == ARM::dsub_7) {
+        NewSubIdx = ARM::qsub_3;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::ssub_0) {
+      // 2 S registers -> 1 D register.
+      if (SubIndices[1] == ARM::ssub_1) {
+        if (Size >= 128)
+          NewSubIdx = ARM::dsub_0;
+        return true;
+      }
+    } else if (SubIndices[0] == ARM::ssub_2) {
+      // 2 S registers -> 1 D register (2nd).
+      if (Size >= 128 && SubIndices[1] == ARM::ssub_3) {
+        NewSubIdx = ARM::dsub_1;
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+
+const TargetRegisterClass *
+ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
+  return ARM::GPRRegisterClass;
+}
+
+unsigned
+ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+                                         MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  switch (RC->getID()) {
+  default:
+    return 0;
+  case ARM::tGPRRegClassID:
+    return TFI->hasFP(MF) ? 4 : 5;
+  case ARM::GPRRegClassID: {
+    unsigned FP = TFI->hasFP(MF) ? 1 : 0;
+    return 10 - FP - (STI.isR9Reserved() ? 1 : 0);
+  }
+  case ARM::SPRRegClassID:  // Currently not used as 'rep' register class.
+  case ARM::DPRRegClassID:
+    return 32 - 10;
+  }
+}
+
+/// getAllocationOrder - Returns the register allocation order for a specified
+/// register class in the form of a pair of TargetRegisterClass iterators.
+std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
+ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
+                                        unsigned HintType, unsigned HintReg,
+                                        const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  // Alternative register allocation orders when favoring even / odd registers
+  // of register pairs.
+
+  // No FP, R9 is available.
+  static const unsigned GPREven1[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+    ARM::R9, ARM::R11
+  };
+  static const unsigned GPROdd1[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+    ARM::R8, ARM::R10
+  };
+
+  // FP is R7, R9 is available.
+  static const unsigned GPREven2[] = {
+    ARM::R0, ARM::R2, ARM::R4,          ARM::R8, ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
+    ARM::R9, ARM::R11
+  };
+  static const unsigned GPROdd2[] = {
+    ARM::R1, ARM::R3, ARM::R5,          ARM::R9, ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+    ARM::R8, ARM::R10
+  };
+
+  // FP is R11, R9 is available.
+  static const unsigned GPREven3[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+    ARM::R9
+  };
+  static const unsigned GPROdd3[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
+    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
+    ARM::R8
+  };
+
+  // No FP, R9 is not available.
+  static const unsigned GPREven4[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6,          ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
+    ARM::R11
+  };
+  static const unsigned GPROdd4[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7,          ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R10
+  };
+
+  // FP is R7, R9 is not available.
+  static const unsigned GPREven5[] = {
+    ARM::R0, ARM::R2, ARM::R4,                   ARM::R10,
+    ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
+    ARM::R11
+  };
+  static const unsigned GPROdd5[] = {
+    ARM::R1, ARM::R3, ARM::R5,                   ARM::R11,
+    ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+    ARM::R10
+  };
+
+  // FP is R11, R9 is not available.
+  static const unsigned GPREven6[] = {
+    ARM::R0, ARM::R2, ARM::R4, ARM::R6,
+    ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
+  };
+  static const unsigned GPROdd6[] = {
+    ARM::R1, ARM::R3, ARM::R5, ARM::R7,
+    ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
+  };
+
+
+  if (HintType == ARMRI::RegPairEven) {
+    if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
+      // It's no longer possible to fulfill this hint. Return the default
+      // allocation order.
+      return std::make_pair(RC->allocation_order_begin(MF),
+                            RC->allocation_order_end(MF));
+
+    if (!TFI->hasFP(MF)) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPREven1,
+                              GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPREven4,
+                              GPREven4 + (sizeof(GPREven4)/sizeof(unsigned)));
+    } else if (FramePtr == ARM::R7) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPREven2,
+                              GPREven2 + (sizeof(GPREven2)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPREven5,
+                              GPREven5 + (sizeof(GPREven5)/sizeof(unsigned)));
+    } else { // FramePtr == ARM::R11
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPREven3,
+                              GPREven3 + (sizeof(GPREven3)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPREven6,
+                              GPREven6 + (sizeof(GPREven6)/sizeof(unsigned)));
+    }
+  } else if (HintType == ARMRI::RegPairOdd) {
+    if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
+      // It's no longer possible to fulfill this hint. Return the default
+      // allocation order.
+      return std::make_pair(RC->allocation_order_begin(MF),
+                            RC->allocation_order_end(MF));
+
+    if (!TFI->hasFP(MF)) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPROdd1,
+                              GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPROdd4,
+                              GPROdd4 + (sizeof(GPROdd4)/sizeof(unsigned)));
+    } else if (FramePtr == ARM::R7) {
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPROdd2,
+                              GPROdd2 + (sizeof(GPROdd2)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPROdd5,
+                              GPROdd5 + (sizeof(GPROdd5)/sizeof(unsigned)));
+    } else { // FramePtr == ARM::R11
+      if (!STI.isR9Reserved())
+        return std::make_pair(GPROdd3,
+                              GPROdd3 + (sizeof(GPROdd3)/sizeof(unsigned)));
+      else
+        return std::make_pair(GPROdd6,
+                              GPROdd6 + (sizeof(GPROdd6)/sizeof(unsigned)));
+    }
+  }
+  return std::make_pair(RC->allocation_order_begin(MF),
+                        RC->allocation_order_end(MF));
+}
+
+/// ResolveRegAllocHint - Resolves the specified register allocation hint
+/// to a physical register. Returns the physical register if it is successful.
+unsigned
+ARMBaseRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
+                                         const MachineFunction &MF) const {
+  if (Reg == 0 || !isPhysicalRegister(Reg))
+    return 0;
+  if (Type == 0)
+    return Reg;
+  else if (Type == (unsigned)ARMRI::RegPairOdd)
+    // Odd register.
+    return getRegisterPairOdd(Reg, MF);
+  else if (Type == (unsigned)ARMRI::RegPairEven)
+    // Even register.
+    return getRegisterPairEven(Reg, MF);
+  return 0;
+}
+
+void
+ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+                                        MachineFunction &MF) const {
+  MachineRegisterInfo *MRI = &MF.getRegInfo();
+  std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
+  if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
+       Hint.first == (unsigned)ARMRI::RegPairEven) &&
+      TargetRegisterInfo::isVirtualRegister(Hint.second)) {
+    // If 'Reg' is one of the even / odd register pair and it's now changed
+    // (e.g. coalesced) into a different register. The other register of the
+    // pair allocation hint must be updated to reflect the relationship
+    // change.
+    unsigned OtherReg = Hint.second;
+    Hint = MRI->getRegAllocationHint(OtherReg);
+    if (Hint.second == Reg)
+      // Make sure the pair has not already divorced.
+      MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
+  }
+}
+
+bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  if (!EnableBasePointer)
+    return false;
+
+  if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+    return true;
+
+  // Thumb has trouble with negative offsets from the FP. Thumb2 has a limited
+  // negative range for ldr/str (255), and thumb1 is positive offsets only.
+  // It's going to be better to use the SP or Base Pointer instead. When there
+  // are variable sized objects, we can't reference off of the SP, so we
+  // reserve a Base Pointer.
+  if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) {
+    // Conservatively estimate whether the negative offset from the frame
+    // pointer will be sufficient to reach. If a function has a smallish
+    // frame, it's less likely to have lots of spills and callee saved
+    // space, so it's all more likely to be within range of the frame pointer.
+    // If it's wrong, the scavenger will still enable access to work, it just
+    // won't be optimal.
+    if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128)
+      return false;
+    return true;
+  }
+
+  return false;
+}
+
+bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  // We can't realign the stack if:
+  // 1. Dynamic stack realignment is explicitly disabled,
+  // 2. This is a Thumb1 function (it's not useful, so we don't bother), or
+  // 3. There are VLAs in the function and the base pointer is disabled.
+  return (RealignStack && !AFI->isThumb1OnlyFunction() &&
+          (!MFI->hasVarSizedObjects() || EnableBasePointer));
+}
+
+bool ARMBaseRegisterInfo::
+needsStackRealignment(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const Function *F = MF.getFunction();
+  unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+  bool requiresRealignment = ((MFI->getLocalFrameMaxAlign() > StackAlign) ||
+                               F->hasFnAttr(Attribute::StackAlignment));
+
+  return requiresRealignment && canRealignStack(MF);
+}
+
+bool ARMBaseRegisterInfo::
+cannotEliminateFrame(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  if (DisableFramePointerElim(MF) && MFI->adjustsStack())
+    return true;
+  return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
+    || needsStackRealignment(MF);
+}
+
+unsigned ARMBaseRegisterInfo::getRARegister() const {
+  return ARM::LR;
+}
+
+unsigned
+ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (TFI->hasFP(MF))
+    return FramePtr;
+  return ARM::SP;
+}
+
+unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+int ARMBaseRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+unsigned ARMBaseRegisterInfo::getRegisterPairEven(unsigned Reg,
+                                              const MachineFunction &MF) const {
+  switch (Reg) {
+  default: break;
+  // Return 0 if either register of the pair is a special register.
+  // So no R12, etc.
+  case ARM::R1:
+    return ARM::R0;
+  case ARM::R3:
+    return ARM::R2;
+  case ARM::R5:
+    return ARM::R4;
+  case ARM::R7:
+    return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+      ? 0 : ARM::R6;
+  case ARM::R9:
+    return isReservedReg(MF, ARM::R9)  ? 0 :ARM::R8;
+  case ARM::R11:
+    return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
+
+  case ARM::S1:
+    return ARM::S0;
+  case ARM::S3:
+    return ARM::S2;
+  case ARM::S5:
+    return ARM::S4;
+  case ARM::S7:
+    return ARM::S6;
+  case ARM::S9:
+    return ARM::S8;
+  case ARM::S11:
+    return ARM::S10;
+  case ARM::S13:
+    return ARM::S12;
+  case ARM::S15:
+    return ARM::S14;
+  case ARM::S17:
+    return ARM::S16;
+  case ARM::S19:
+    return ARM::S18;
+  case ARM::S21:
+    return ARM::S20;
+  case ARM::S23:
+    return ARM::S22;
+  case ARM::S25:
+    return ARM::S24;
+  case ARM::S27:
+    return ARM::S26;
+  case ARM::S29:
+    return ARM::S28;
+  case ARM::S31:
+    return ARM::S30;
+
+  case ARM::D1:
+    return ARM::D0;
+  case ARM::D3:
+    return ARM::D2;
+  case ARM::D5:
+    return ARM::D4;
+  case ARM::D7:
+    return ARM::D6;
+  case ARM::D9:
+    return ARM::D8;
+  case ARM::D11:
+    return ARM::D10;
+  case ARM::D13:
+    return ARM::D12;
+  case ARM::D15:
+    return ARM::D14;
+  case ARM::D17:
+    return ARM::D16;
+  case ARM::D19:
+    return ARM::D18;
+  case ARM::D21:
+    return ARM::D20;
+  case ARM::D23:
+    return ARM::D22;
+  case ARM::D25:
+    return ARM::D24;
+  case ARM::D27:
+    return ARM::D26;
+  case ARM::D29:
+    return ARM::D28;
+  case ARM::D31:
+    return ARM::D30;
+  }
+
+  return 0;
+}
+
+unsigned ARMBaseRegisterInfo::getRegisterPairOdd(unsigned Reg,
+                                             const MachineFunction &MF) const {
+  switch (Reg) {
+  default: break;
+  // Return 0 if either register of the pair is a special register.
+  // So no R12, etc.
+  case ARM::R0:
+    return ARM::R1;
+  case ARM::R2:
+    return ARM::R3;
+  case ARM::R4:
+    return ARM::R5;
+  case ARM::R6:
+    return (isReservedReg(MF, ARM::R7) || isReservedReg(MF, ARM::R6))
+      ? 0 : ARM::R7;
+  case ARM::R8:
+    return isReservedReg(MF, ARM::R9)  ? 0 :ARM::R9;
+  case ARM::R10:
+    return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
+
+  case ARM::S0:
+    return ARM::S1;
+  case ARM::S2:
+    return ARM::S3;
+  case ARM::S4:
+    return ARM::S5;
+  case ARM::S6:
+    return ARM::S7;
+  case ARM::S8:
+    return ARM::S9;
+  case ARM::S10:
+    return ARM::S11;
+  case ARM::S12:
+    return ARM::S13;
+  case ARM::S14:
+    return ARM::S15;
+  case ARM::S16:
+    return ARM::S17;
+  case ARM::S18:
+    return ARM::S19;
+  case ARM::S20:
+    return ARM::S21;
+  case ARM::S22:
+    return ARM::S23;
+  case ARM::S24:
+    return ARM::S25;
+  case ARM::S26:
+    return ARM::S27;
+  case ARM::S28:
+    return ARM::S29;
+  case ARM::S30:
+    return ARM::S31;
+
+  case ARM::D0:
+    return ARM::D1;
+  case ARM::D2:
+    return ARM::D3;
+  case ARM::D4:
+    return ARM::D5;
+  case ARM::D6:
+    return ARM::D7;
+  case ARM::D8:
+    return ARM::D9;
+  case ARM::D10:
+    return ARM::D11;
+  case ARM::D12:
+    return ARM::D13;
+  case ARM::D14:
+    return ARM::D15;
+  case ARM::D16:
+    return ARM::D17;
+  case ARM::D18:
+    return ARM::D19;
+  case ARM::D20:
+    return ARM::D21;
+  case ARM::D22:
+    return ARM::D23;
+  case ARM::D24:
+    return ARM::D25;
+  case ARM::D26:
+    return ARM::D27;
+  case ARM::D28:
+    return ARM::D29;
+  case ARM::D30:
+    return ARM::D31;
+  }
+
+  return 0;
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void ARMBaseRegisterInfo::
+emitLoadConstPool(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator &MBBI,
+                  DebugLoc dl,
+                  unsigned DestReg, unsigned SubIdx, int Val,
+                  ARMCC::CondCodes Pred,
+                  unsigned PredReg, unsigned MIFlags) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineConstantPool *ConstantPool = MF.getConstantPool();
+  const Constant *C =
+        ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val);
+  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+  BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp))
+    .addReg(DestReg, getDefRegState(true), SubIdx)
+    .addConstantPoolIndex(Idx)
+    .addImm(0).addImm(Pred).addReg(PredReg)
+    .setMIFlags(MIFlags);
+}
+
+bool ARMBaseRegisterInfo::
+requiresRegisterScavenging(const MachineFunction &MF) const {
+  return true;
+}
+
+bool ARMBaseRegisterInfo::
+requiresFrameIndexScavenging(const MachineFunction &MF) const {
+  return true;
+}
+
+bool ARMBaseRegisterInfo::
+requiresVirtualBaseRegisters(const MachineFunction &MF) const {
+  return EnableLocalStackAlloc;
+}
+
+static void
+emitSPUpdate(bool isARM,
+             MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+             DebugLoc dl, const ARMBaseInstrInfo &TII,
+             int NumBytes,
+             ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
+  if (isARM)
+    emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+                            Pred, PredReg, TII);
+  else
+    emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+                           Pred, PredReg, TII);
+}
+
+
+void ARMBaseRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  if (!TFI->hasReservedCallFrame(MF)) {
+    // If we have alloca, convert as follows:
+    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+    // ADJCALLSTACKUP   -> add, sp, sp, amount
+    MachineInstr *Old = I;
+    DebugLoc dl = Old->getDebugLoc();
+    unsigned Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = TFI->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+      assert(!AFI->isThumb1OnlyFunction() &&
+             "This eliminateCallFramePseudoInstr does not support Thumb1!");
+      bool isARM = !AFI->isThumbFunction();
+
+      // Replace the pseudo instruction with a new instruction...
+      unsigned Opc = Old->getOpcode();
+      int PIdx = Old->findFirstPredOperandIdx();
+      ARMCC::CondCodes Pred = (PIdx == -1)
+        ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
+      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+        // Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
+        unsigned PredReg = Old->getOperand(2).getReg();
+        emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, Pred, PredReg);
+      } else {
+        // Note: PredReg is operand 3 for ADJCALLSTACKUP.
+        unsigned PredReg = Old->getOperand(3).getReg();
+        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+        emitSPUpdate(isARM, MBB, I, dl, TII, Amount, Pred, PredReg);
+      }
+    }
+  }
+  MBB.erase(I);
+}
+
+int64_t ARMBaseRegisterInfo::
+getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
+  const TargetInstrDesc &Desc = MI->getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+  int64_t InstrOffs = 0;;
+  int Scale = 1;
+  unsigned ImmIdx = 0;
+  switch (AddrMode) {
+  case ARMII::AddrModeT2_i8:
+  case ARMII::AddrModeT2_i12:
+  case ARMII::AddrMode_i12:
+    InstrOffs = MI->getOperand(Idx+1).getImm();
+    Scale = 1;
+    break;
+  case ARMII::AddrMode5: {
+    // VFP address mode.
+    const MachineOperand &OffOp = MI->getOperand(Idx+1);
+    InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+    if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+      InstrOffs = -InstrOffs;
+    Scale = 4;
+    break;
+  }
+  case ARMII::AddrMode2: {
+    ImmIdx = Idx+2;
+    InstrOffs = ARM_AM::getAM2Offset(MI->getOperand(ImmIdx).getImm());
+    if (ARM_AM::getAM2Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+      InstrOffs = -InstrOffs;
+    break;
+  }
+  case ARMII::AddrMode3: {
+    ImmIdx = Idx+2;
+    InstrOffs = ARM_AM::getAM3Offset(MI->getOperand(ImmIdx).getImm());
+    if (ARM_AM::getAM3Op(MI->getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+      InstrOffs = -InstrOffs;
+    break;
+  }
+  case ARMII::AddrModeT1_s: {
+    ImmIdx = Idx+1;
+    InstrOffs = MI->getOperand(ImmIdx).getImm();
+    Scale = 4;
+    break;
+  }
+  default:
+    llvm_unreachable("Unsupported addressing mode!");
+    break;
+  }
+
+  return InstrOffs * Scale;
+}
+
+/// needsFrameBaseReg - Returns true if the instruction's frame index
+/// reference would be better served by a base register other than FP
+/// or SP. Used by LocalStackFrameAllocation to determine which frame index
+/// references it should create new base registers for.
+bool ARMBaseRegisterInfo::
+needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
+  for (unsigned i = 0; !MI->getOperand(i).isFI(); ++i) {
+    assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+  }
+
+  // It's the load/store FI references that cause issues, as it can be difficult
+  // to materialize the offset if it won't fit in the literal field. Estimate
+  // based on the size of the local frame and some conservative assumptions
+  // about the rest of the stack frame (note, this is pre-regalloc, so
+  // we don't know everything for certain yet) whether this offset is likely
+  // to be out of range of the immediate. Return true if so.
+
+  // We only generate virtual base registers for loads and stores, so
+  // return false for everything else.
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+  case ARM::LDRi12: case ARM::LDRH: case ARM::LDRBi12:
+  case ARM::STRi12: case ARM::STRH: case ARM::STRBi12:
+  case ARM::t2LDRi12: case ARM::t2LDRi8:
+  case ARM::t2STRi12: case ARM::t2STRi8:
+  case ARM::VLDRS: case ARM::VLDRD:
+  case ARM::VSTRS: case ARM::VSTRD:
+  case ARM::tSTRspi: case ARM::tLDRspi:
+    if (ForceAllBaseRegAlloc)
+      return true;
+    break;
+  default:
+    return false;
+  }
+
+  // Without a virtual base register, if the function has variable sized
+  // objects, all fixed-size local references will be via the frame pointer,
+  // Approximate the offset and see if it's legal for the instruction.
+  // Note that the incoming offset is based on the SP value at function entry,
+  // so it'll be negative.
+  MachineFunction &MF = *MI->getParent()->getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  // Estimate an offset from the frame pointer.
+  // Conservatively assume all callee-saved registers get pushed. R4-R6
+  // will be earlier than the FP, so we ignore those.
+  // R7, LR
+  int64_t FPOffset = Offset - 8;
+  // ARM and Thumb2 functions also need to consider R8-R11 and D8-D15
+  if (!AFI->isThumbFunction() || !AFI->isThumb1OnlyFunction())
+    FPOffset -= 80;
+  // Estimate an offset from the stack pointer.
+  // The incoming offset is relating to the SP at the start of the function,
+  // but when we access the local it'll be relative to the SP after local
+  // allocation, so adjust our SP-relative offset by that allocation size.
+  Offset = -Offset;
+  Offset += MFI->getLocalFrameSize();
+  // Assume that we'll have at least some spill slots allocated.
+  // FIXME: This is a total SWAG number. We should run some statistics
+  //        and pick a real one.
+  Offset += 128; // 128 bytes of spill slots
+
+  // If there is a frame pointer, try using it.
+  // The FP is only available if there is no dynamic realignment. We
+  // don't know for sure yet whether we'll need that, so we guess based
+  // on whether there are any local variables that would trigger it.
+  unsigned StackAlign = TFI->getStackAlignment();
+  if (TFI->hasFP(MF) &&
+      !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
+    if (isFrameOffsetLegal(MI, FPOffset))
+      return false;
+  }
+  // If we can reference via the stack pointer, try that.
+  // FIXME: This (and the code that resolves the references) can be improved
+  //        to only disallow SP relative references in the live range of
+  //        the VLA(s). In practice, it's unclear how much difference that
+  //        would make, but it may be worth doing.
+  if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, Offset))
+    return false;
+
+  // The offset likely isn't legal, we want to allocate a virtual base register.
+  return true;
+}
+
+/// materializeFrameBaseRegister - Insert defining instruction(s) for BaseReg to
+/// be a pointer to FrameIdx at the beginning of the basic block.
+void ARMBaseRegisterInfo::
+materializeFrameBaseRegister(MachineBasicBlock *MBB,
+                             unsigned BaseReg, int FrameIdx,
+                             int64_t Offset) const {
+  ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+  unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri :
+    (AFI->isThumb1OnlyFunction() ? ARM::tADDrSPi : ARM::t2ADDri);
+
+  MachineBasicBlock::iterator Ins = MBB->begin();
+  DebugLoc DL;                  // Defaults to "unknown"
+  if (Ins != MBB->end())
+    DL = Ins->getDebugLoc();
+
+  MachineInstrBuilder MIB =
+    BuildMI(*MBB, Ins, DL, TII.get(ADDriOpc), BaseReg)
+    .addFrameIndex(FrameIdx).addImm(Offset);
+
+  if (!AFI->isThumb1OnlyFunction())
+    AddDefaultCC(AddDefaultPred(MIB));
+}
+
+void
+ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+                                       unsigned BaseReg, int64_t Offset) const {
+  MachineInstr &MI = *I;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  int Off = Offset; // ARM doesn't need the general 64-bit offsets
+  unsigned i = 0;
+
+  assert(!AFI->isThumb1OnlyFunction() &&
+         "This resolveFrameIndex does not support Thumb1!");
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+  bool Done = false;
+  if (!AFI->isThumbFunction())
+    Done = rewriteARMFrameIndex(MI, i, BaseReg, Off, TII);
+  else {
+    assert(AFI->isThumb2Function());
+    Done = rewriteT2FrameIndex(MI, i, BaseReg, Off, TII);
+  }
+  assert (Done && "Unable to resolve frame index!");
+}
+
+bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
+                                             int64_t Offset) const {
+  const TargetInstrDesc &Desc = MI->getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+  unsigned i = 0;
+
+  while (!MI->getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI->getNumOperands() &&"Instr doesn't have FrameIndex operand!");
+  }
+
+  // AddrMode4 and AddrMode6 cannot handle any offset.
+  if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+    return Offset == 0;
+
+  unsigned NumBits = 0;
+  unsigned Scale = 1;
+  bool isSigned = true;
+  switch (AddrMode) {
+  case ARMII::AddrModeT2_i8:
+  case ARMII::AddrModeT2_i12:
+    // i8 supports only negative, and i12 supports only positive, so
+    // based on Offset sign, consider the appropriate instruction
+    Scale = 1;
+    if (Offset < 0) {
+      NumBits = 8;
+      Offset = -Offset;
+    } else {
+      NumBits = 12;
+    }
+    break;
+  case ARMII::AddrMode5:
+    // VFP address mode.
+    NumBits = 8;
+    Scale = 4;
+    break;
+  case ARMII::AddrMode_i12:
+  case ARMII::AddrMode2:
+    NumBits = 12;
+    break;
+  case ARMII::AddrMode3:
+    NumBits = 8;
+    break;
+  case ARMII::AddrModeT1_s:
+    NumBits = 5;
+    Scale = 4;
+    isSigned = false;
+    break;
+  default:
+    llvm_unreachable("Unsupported addressing mode!");
+    break;
+  }
+
+  Offset += getFrameIndexInstrOffset(MI, i);
+  // Make sure the offset is encodable for instructions that scale the
+  // immediate.
+  if ((Offset & (Scale-1)) != 0)
+    return false;
+
+  if (isSigned && Offset < 0)
+    Offset = -Offset;
+
+  unsigned Mask = (1 << NumBits) - 1;
+  if ((unsigned)Offset <= Mask * Scale)
+    return true;
+
+  return false;
+}
+
+void
+ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                         int SPAdj, RegScavenger *RS) const {
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const ARMFrameLowering *TFI =
+    static_cast<const ARMFrameLowering*>(MF.getTarget().getFrameLowering());
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  assert(!AFI->isThumb1OnlyFunction() &&
+         "This eliminateFrameIndex does not support Thumb1!");
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+  unsigned FrameReg;
+
+  int Offset = TFI->ResolveFrameIndexReference(MF, FrameIndex, FrameReg, SPAdj);
+
+  // Special handling of dbg_value instructions.
+  if (MI.isDebugValue()) {
+    MI.getOperand(i).  ChangeToRegister(FrameReg, false /*isDef*/);
+    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    return;
+  }
+
+  // Modify MI as necessary to handle as much of 'Offset' as possible
+  bool Done = false;
+  if (!AFI->isThumbFunction())
+    Done = rewriteARMFrameIndex(MI, i, FrameReg, Offset, TII);
+  else {
+    assert(AFI->isThumb2Function());
+    Done = rewriteT2FrameIndex(MI, i, FrameReg, Offset, TII);
+  }
+  if (Done)
+    return;
+
+  // If we get here, the immediate doesn't fit into the instruction.  We folded
+  // as much as possible above, handle the rest, providing a register that is
+  // SP+LargeImm.
+  assert((Offset ||
+          (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 ||
+          (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) &&
+         "This code isn't needed if offset already handled!");
+
+  unsigned ScratchReg = 0;
+  int PIdx = MI.findFirstPredOperandIdx();
+  ARMCC::CondCodes Pred = (PIdx == -1)
+    ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
+  unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
+  if (Offset == 0)
+    // Must be addrmode4/6.
+    MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
+  else {
+    ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
+    if (!AFI->isThumbFunction())
+      emitARMRegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
+                              Offset, Pred, PredReg, TII);
+    else {
+      assert(AFI->isThumb2Function());
+      emitT2RegPlusImmediate(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg,
+                             Offset, Pred, PredReg, TII);
+    }
+    // Update the original instruction to use the scratch register.
+    MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
+    if (MI.getOpcode() == ARM::t2ADDrSPi)
+      MI.setDesc(TII.get(ARM::t2ADDri));
+    else if (MI.getOpcode() == ARM::t2SUBrSPi)
+      MI.setDesc(TII.get(ARM::t2SUBri));
+  }
+}
+
+#include "ARMGenRegisterInfo.inc"
diff --git a/final/lib/Target/ARM/ARMBaseRegisterInfo.h b/final/lib/Target/ARM/ARMBaseRegisterInfo.h
new file mode 100644
index 00000000000..0507396f2c8
--- /dev/null
+++ b/final/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -0,0 +1,209 @@
+//===- ARMBaseRegisterInfo.h - ARM Register Information Impl ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the base ARM implementation of TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMBASEREGISTERINFO_H
+#define ARMBASEREGISTERINFO_H
+
+#include "ARM.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "ARMGenRegisterInfo.h.inc"
+
+namespace llvm {
+  class ARMSubtarget;
+  class ARMBaseInstrInfo;
+  class Type;
+
+/// Register allocation hints.
+namespace ARMRI {
+  enum {
+    RegPairOdd  = 1,
+    RegPairEven = 2
+  };
+}
+
+/// isARMLowRegister - Returns true if the register is low register r0-r7.
+///
+static inline bool isARMLowRegister(unsigned Reg) {
+  using namespace ARM;
+  switch (Reg) {
+  case R0:  case R1:  case R2:  case R3:
+  case R4:  case R5:  case R6:  case R7:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// isARMArea1Register - Returns true if the register is a low register (r0-r7)
+/// or a stack/pc register that we should push/pop.
+static inline bool isARMArea1Register(unsigned Reg, bool isDarwin) {
+  using namespace ARM;
+  switch (Reg) {
+    case R0:  case R1:  case R2:  case R3:
+    case R4:  case R5:  case R6:  case R7:
+    case LR:  case SP:  case PC:
+      return true;
+    case R8:  case R9:  case R10: case R11:
+      // For darwin we want r7 and lr to be next to each other.
+      return !isDarwin;
+    default:
+      return false;
+  }
+}
+
+static inline bool isARMArea2Register(unsigned Reg, bool isDarwin) {
+  using namespace ARM;
+  switch (Reg) {
+    case R8: case R9: case R10: case R11:
+      // Darwin has this second area.
+      return isDarwin;
+    default:
+      return false;
+  }
+}
+
+static inline bool isARMArea3Register(unsigned Reg, bool isDarwin) {
+  using namespace ARM;
+  switch (Reg) {
+    case D15: case D14: case D13: case D12:
+    case D11: case D10: case D9:  case D8:
+      return true;
+    default:
+      return false;
+  }
+}
+
+class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
+protected:
+  const ARMBaseInstrInfo &TII;
+  const ARMSubtarget &STI;
+
+  /// FramePtr - ARM physical register used as frame ptr.
+  unsigned FramePtr;
+
+  /// BasePtr - ARM physical register used as a base ptr in complex stack
+  /// frames. I.e., when we need a 3rd base, not just SP and FP, due to
+  /// variable size stack objects.
+  unsigned BasePtr;
+
+  // Can be only subclassed.
+  explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
+                               const ARMSubtarget &STI);
+
+  // Return the opcode that implements 'Op', or 0 if no opcode
+  unsigned getOpcode(int Op) const;
+
+public:
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  /// getMatchingSuperRegClass - Return a subclass of the specified register
+  /// class A so that each register in it has a sub-register of the
+  /// specified sub-register index which is in the specified register class B.
+  virtual const TargetRegisterClass *
+  getMatchingSuperRegClass(const TargetRegisterClass *A,
+                           const TargetRegisterClass *B, unsigned Idx) const;
+
+  /// canCombineSubRegIndices - Given a register class and a list of
+  /// subregister indices, return true if it's possible to combine the
+  /// subregister indices into one that corresponds to a larger
+  /// subregister. Return the new subregister index by reference. Note the
+  /// new index may be zero if the given subregisters can be combined to
+  /// form the whole register.
+  virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC,
+                                       SmallVectorImpl<unsigned> &SubIndices,
+                                       unsigned &NewSubIdx) const;
+
+  const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+
+  unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                               MachineFunction &MF) const;
+
+  std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
+  getAllocationOrder(const TargetRegisterClass *RC,
+                     unsigned HintType, unsigned HintReg,
+                     const MachineFunction &MF) const;
+
+  unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
+                               const MachineFunction &MF) const;
+
+  void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+                          MachineFunction &MF) const;
+
+  bool hasBasePointer(const MachineFunction &MF) const;
+
+  bool canRealignStack(const MachineFunction &MF) const;
+  bool needsStackRealignment(const MachineFunction &MF) const;
+  int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const;
+  bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const;
+  void materializeFrameBaseRegister(MachineBasicBlock *MBB,
+                                    unsigned BaseReg, int FrameIdx,
+                                    int64_t Offset) const;
+  void resolveFrameIndex(MachineBasicBlock::iterator I,
+                         unsigned BaseReg, int64_t Offset) const;
+  bool isFrameOffsetLegal(const MachineInstr *MI, int64_t Offset) const;
+
+  bool cannotEliminateFrame(const MachineFunction &MF) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+  unsigned getBaseRegister() const { return BasePtr; }
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+  bool isLowRegister(unsigned Reg) const;
+
+
+  /// emitLoadConstPool - Emits a load from constpool to materialize the
+  /// specified immediate.
+  virtual void emitLoadConstPool(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator &MBBI,
+                                 DebugLoc dl,
+                                 unsigned DestReg, unsigned SubIdx,
+                                 int Val,
+                                 ARMCC::CondCodes Pred = ARMCC::AL,
+                                 unsigned PredReg = 0,
+                                 unsigned MIFlags = MachineInstr::NoFlags)const;
+
+  /// Code Generation virtual methods...
+  virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+
+  virtual bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+  virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
+
+  virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const;
+
+  virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                           MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator I) const;
+
+  virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                   int SPAdj, RegScavenger *RS = NULL) const;
+
+private:
+  unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
+
+  unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/ARM/ARMBuildAttrs.h b/final/lib/Target/ARM/ARMBuildAttrs.h
new file mode 100644
index 00000000000..69eddf03ec9
--- /dev/null
+++ b/final/lib/Target/ARM/ARMBuildAttrs.h
@@ -0,0 +1,131 @@
+//===-------- ARMBuildAttrs.h - ARM Build Attributes ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains enumerations and support routines for ARM build attributes
+// as defined in ARM ABI addenda document (ABI release 2.08).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __TARGET_ARMBUILDATTRS_H__
+#define __TARGET_ARMBUILDATTRS_H__
+
+namespace ARMBuildAttrs {
+  enum SpecialAttr {
+    // This is for the .cpu asm attr. It translates into one or more
+    // AttrType (below) entries in the .ARM.attributes section in the ELF.
+    SEL_CPU 
+  };
+
+  enum AttrType {
+    // Rest correspond to ELF/.ARM.attributes
+    File                      = 1,
+    Section                   = 2,
+    Symbol                    = 3,
+    CPU_raw_name              = 4,
+    CPU_name                  = 5,
+    CPU_arch                  = 6,
+    CPU_arch_profile          = 7,
+    ARM_ISA_use               = 8,
+    THUMB_ISA_use             = 9,
+    VFP_arch                  = 10,
+    WMMX_arch                 = 11,
+    Advanced_SIMD_arch        = 12,
+    PCS_config                = 13,
+    ABI_PCS_R9_use            = 14,
+    ABI_PCS_RW_data           = 15,
+    ABI_PCS_RO_data           = 16,
+    ABI_PCS_GOT_use           = 17,
+    ABI_PCS_wchar_t           = 18,
+    ABI_FP_rounding           = 19,
+    ABI_FP_denormal           = 20,
+    ABI_FP_exceptions         = 21,
+    ABI_FP_user_exceptions    = 22,
+    ABI_FP_number_model       = 23,
+    ABI_align8_needed         = 24,
+    ABI_align8_preserved      = 25,
+    ABI_enum_size             = 26,
+    ABI_HardFP_use            = 27,
+    ABI_VFP_args              = 28,
+    ABI_WMMX_args             = 29,
+    ABI_optimization_goals    = 30,
+    ABI_FP_optimization_goals = 31,
+    compatibility             = 32,
+    CPU_unaligned_access      = 34,
+    VFP_HP_extension          = 36,
+    ABI_FP_16bit_format       = 38,
+    MPextension_use           = 42, // was 70, 2.08 ABI
+    DIV_use                   = 44,
+    nodefaults                = 64,
+    also_compatible_with      = 65,
+    T2EE_use                  = 66,
+    conformance               = 67,
+    Virtualization_use        = 68,
+    MPextension_use_old       = 70
+  };
+
+  // Magic numbers for .ARM.attributes
+  enum AttrMagic {
+    Format_Version  = 0x41
+  };
+
+  // Legal Values for CPU_arch, (=6), uleb128
+  enum CPUArch {
+    Pre_v4       = 0,
+    v4       = 1,   // e.g. SA110
+    v4T      = 2,   // e.g. ARM7TDMI
+    v5T      = 3,   // e.g. ARM9TDMI
+    v5TE     = 4,   // e.g. ARM946E_S
+    v5TEJ    = 5,   // e.g. ARM926EJ_S
+    v6       = 6,   // e.g. ARM1136J_S
+    v6KZ     = 7,   // e.g. ARM1176JZ_S
+    v6T2     = 8,   // e.g. ARM1156T2F_S
+    v6K      = 9,   // e.g. ARM1136J_S
+    v7       = 10,  // e.g. Cortex A8, Cortex M3
+    v6_M     = 11,  // e.g. Cortex M1
+    v6S_M    = 12,  // v6_M with the System extensions
+    v7E_M    = 13   // v7_M with DSP extensions
+  };
+
+  enum CPUArchProfile { // (=7), uleb128 
+    Not_Applicable = 0, // pre v7, or cross-profile code
+    ApplicationProfile = (0x41), // 'A' (e.g. for Cortex A8)
+    RealTimeProfile = (0x52), // 'R' (e.g. for Cortex R4)
+    MicroControllerProfile = (0x4D), // 'M' (e.g. for Cortex M3)
+    SystemProfile = (0x53) // 'S' Application or real-time profile
+  };
+
+  // The following have a lot of common use cases
+  enum { 
+    //ARMISAUse (=8), uleb128  and THUMBISAUse (=9), uleb128
+    Not_Allowed = 0,
+    Allowed = 1,
+
+    // FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10)
+    AllowFPv2  = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA)
+    AllowFPv3A = 3, // v3 FP ISA permitted (implies use of the v2 FP ISA)
+    AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31 
+    AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA) 
+    AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31
+
+    // Tag_WMMX_arch, (=11), uleb128
+    AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions)
+    
+    // Tag_WMMX_arch, (=11), uleb128
+    AllowWMMXv1 = 2,  // The user permitted this entity to use WMMX v2
+
+    // Tag_ABI_FP_denormal, (=20), uleb128 
+    PreserveFPSign = 2, // sign when flushed-to-zero is preserved
+
+    // Tag_ABI_FP_number_model, (=23), uleb128
+    AllowRTABI = 2,  // numbers, infinities, and one quiet NaN (see [RTABI])
+    AllowIEE754 = 3 // this code to use all the IEEE 754-defined FP encodings
+  };
+}
+
+#endif // __TARGET_ARMBUILDATTRS_H__
diff --git a/final/lib/Target/ARM/ARMCallingConv.h b/final/lib/Target/ARM/ARMCallingConv.h
new file mode 100644
index 00000000000..ff7db1ff62e
--- /dev/null
+++ b/final/lib/Target/ARM/ARMCallingConv.h
@@ -0,0 +1,160 @@
+//===-- ARMCallingConv.h - ARM Custom Calling Convention Routines ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the custom routines for the ARM Calling Convention that
+// aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMCALLINGCONV_H
+#define ARMCALLINGCONV_H
+
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARM.h"
+
+namespace llvm {
+
+// APCS f64 is in register pairs, possibly split to stack
+static bool f64AssignAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                          CCValAssign::LocInfo &LocInfo,
+                          CCState &State, bool CanFail) {
+  static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+  // Try to get the first register.
+  if (unsigned Reg = State.AllocateReg(RegList, 4))
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  else {
+    // For the 2nd half of a v2f64, do not fail.
+    if (CanFail)
+      return false;
+
+    // Put the whole thing on the stack.
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(8, 4),
+                                           LocVT, LocInfo));
+    return true;
+  }
+
+  // Try to get the second register.
+  if (unsigned Reg = State.AllocateReg(RegList, 4))
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  else
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(4, 4),
+                                           LocVT, LocInfo));
+  return true;
+}
+
+static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                   CCValAssign::LocInfo &LocInfo,
+                                   ISD::ArgFlagsTy &ArgFlags,
+                                   CCState &State) {
+  if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+    return false;
+  if (LocVT == MVT::v2f64 &&
+      !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+    return false;
+  return true;  // we handled it
+}
+
+// AAPCS f64 is in aligned register pairs
+static bool f64AssignAAPCS(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                           CCValAssign::LocInfo &LocInfo,
+                           CCState &State, bool CanFail) {
+  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+  static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 };
+
+  unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2);
+  if (Reg == 0) {
+    // For the 2nd half of a v2f64, do not just fail.
+    if (CanFail)
+      return false;
+
+    // Put the whole thing on the stack.
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(8, 8),
+                                           LocVT, LocInfo));
+    return true;
+  }
+
+  unsigned i;
+  for (i = 0; i < 2; ++i)
+    if (HiRegList[i] == Reg)
+      break;
+
+  unsigned T = State.AllocateReg(LoRegList[i]);
+  (void)T;
+  assert(T == LoRegList[i] && "Could not allocate register");
+
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+                                         LocVT, LocInfo));
+  return true;
+}
+
+static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                    CCValAssign::LocInfo &LocInfo,
+                                    ISD::ArgFlagsTy &ArgFlags,
+                                    CCState &State) {
+  if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true))
+    return false;
+  if (LocVT == MVT::v2f64 &&
+      !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false))
+    return false;
+  return true;  // we handled it
+}
+
+static bool f64RetAssign(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                         CCValAssign::LocInfo &LocInfo, CCState &State) {
+  static const unsigned HiRegList[] = { ARM::R0, ARM::R2 };
+  static const unsigned LoRegList[] = { ARM::R1, ARM::R3 };
+
+  unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2);
+  if (Reg == 0)
+    return false; // we didn't handle it
+
+  unsigned i;
+  for (i = 0; i < 2; ++i)
+    if (HiRegList[i] == Reg)
+      break;
+
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i],
+                                         LocVT, LocInfo));
+  return true;
+}
+
+static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                      CCValAssign::LocInfo &LocInfo,
+                                      ISD::ArgFlagsTy &ArgFlags,
+                                      CCState &State) {
+  if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+    return false;
+  if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State))
+    return false;
+  return true;  // we handled it
+}
+
+static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                       CCValAssign::LocInfo &LocInfo,
+                                       ISD::ArgFlagsTy &ArgFlags,
+                                       CCState &State) {
+  return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags,
+                                   State);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/ARM/ARMCallingConv.td b/final/lib/Target/ARM/ARMCallingConv.td
new file mode 100644
index 00000000000..1e6b95e875f
--- /dev/null
+++ b/final/lib/Target/ARM/ARMCallingConv.td
@@ -0,0 +1,164 @@
+//===- ARMCallingConv.td - Calling Conventions for ARM -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for ARM architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+  CCIf<!strconcat("State.getTarget().getSubtarget<ARMSubtarget>().", F), A>;
+
+/// CCIfAlign - Match of the original alignment of the arg
+class CCIfAlign<string Align, CCAction A>:
+  CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention
+//===----------------------------------------------------------------------===//
+def CC_ARM_APCS : CallingConv<[
+
+  // Handles byval parameters.
+  CCIfByVal<CCPassByVal<8, 8>>,
+    
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  // f64 and v2f64 are passed in adjacent GPRs, possibly split onto the stack
+  CCIfType<[f64, v2f64], CCCustom<"CC_ARM_APCS_Custom_f64">>,
+
+  CCIfType<[f32], CCBitConvertToType<i32>>,
+  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+  CCIfType<[i32], CCAssignToStack<4, 4>>,
+  CCIfType<[f64], CCAssignToStack<8, 4>>,
+  CCIfType<[v2f64], CCAssignToStack<16, 4>>
+]>;
+
+def RetCC_ARM_APCS : CallingConv<[
+  CCIfType<[f32], CCBitConvertToType<i32>>,
+
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_APCS_Custom_f64">>,
+
+  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM APCS Calling Convention for FastCC (when VFP2 or later is available)
+//===----------------------------------------------------------------------===//
+def FastCC_ARM_APCS : CallingConv<[
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+                                 S9, S10, S11, S12, S13, S14, S15]>>,
+  CCDelegateTo<CC_ARM_APCS>
+]>;
+
+def RetFastCC_ARM_APCS : CallingConv<[
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+                                 S9, S10, S11, S12, S13, S14, S15]>>,
+  CCDelegateTo<RetCC_ARM_APCS>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention, common parts
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_Common : CallingConv<[
+
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // i64/f64 is passed in even pairs of GPRs
+  // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
+  // (and the same is true for f64 if VFP is not enabled)
+  CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
+  CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
+                       "ArgFlags.getOrigAlign() != 8",
+                       CCAssignToReg<[R0, R1, R2, R3]>>>,
+
+  CCIfType<[i32], CCIfAlign<"8", CCAssignToStackWithShadow<4, 8, R3>>>,
+  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+  CCIfType<[f64], CCAssignToStack<8, 8>>,
+  CCIfType<[v2f64], CCAssignToStack<16, 8>>
+]>;
+
+def RetCC_ARM_AAPCS_Common : CallingConv<[
+  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+  CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS : CallingConv<[
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[f64, v2f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
+  CCIfType<[f32], CCBitConvertToType<i32>>,
+  CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS : CallingConv<[
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
+  CCIfType<[f32], CCBitConvertToType<i32>>,
+  CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS-VFP (EABI) Calling Convention
+// Also used for FastCC (when VFP2 or later is available)
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_VFP : CallingConv<[
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+                                 S9, S10, S11, S12, S13, S14, S15]>>,
+  CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS_VFP : CallingConv<[
+  // Handle all vector types as either f64 or v2f64.
+  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
+  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>,
+
+  CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+  CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+                                 S9, S10, S11, S12, S13, S14, S15]>>,
+  CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
diff --git a/final/lib/Target/ARM/ARMCodeEmitter.cpp b/final/lib/Target/ARM/ARMCodeEmitter.cpp
new file mode 100644
index 00000000000..fa7371626f2
--- /dev/null
+++ b/final/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -0,0 +1,1887 @@
+//===-- ARM/ARMCodeEmitter.cpp - Convert ARM code to machine code ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the ARM machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMInstrInfo.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#ifndef NDEBUG
+#include <iomanip>
+#endif
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+
+  class ARMCodeEmitter : public MachineFunctionPass {
+    ARMJITInfo                *JTI;
+    const ARMInstrInfo        *II;
+    const TargetData          *TD;
+    const ARMSubtarget        *Subtarget;
+    TargetMachine             &TM;
+    JITCodeEmitter            &MCE;
+    MachineModuleInfo *MMI;
+    const std::vector<MachineConstantPoolEntry> *MCPEs;
+    const std::vector<MachineJumpTableEntry> *MJTEs;
+    bool IsPIC;
+    bool IsThumb;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineModuleInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    static char ID;
+  public:
+    ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+      : MachineFunctionPass(ID), JTI(0),
+        II((const ARMInstrInfo *)tm.getInstrInfo()),
+        TD(tm.getTargetData()), TM(tm),
+        MCE(mce), MCPEs(0), MJTEs(0),
+        IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
+
+    /// getBinaryCodeForInstr - This function, generated by the
+    /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+    /// machine instructions.
+    unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+    bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual const char *getPassName() const {
+      return "ARM Machine Code Emitter";
+    }
+
+    void emitInstruction(const MachineInstr &MI);
+
+  private:
+
+    void emitWordLE(unsigned Binary);
+    void emitDWordLE(uint64_t Binary);
+    void emitConstPoolInstruction(const MachineInstr &MI);
+    void emitMOVi32immInstruction(const MachineInstr &MI);
+    void emitMOVi2piecesInstruction(const MachineInstr &MI);
+    void emitLEApcrelJTInstruction(const MachineInstr &MI);
+    void emitPseudoMoveInstruction(const MachineInstr &MI);
+    void addPCLabel(unsigned LabelID);
+    void emitPseudoInstruction(const MachineInstr &MI);
+    unsigned getMachineSoRegOpValue(const MachineInstr &MI,
+                                    const TargetInstrDesc &TID,
+                                    const MachineOperand &MO,
+                                    unsigned OpIdx);
+
+    unsigned getMachineSoImmOpValue(unsigned SoImm);
+    unsigned getAddrModeSBit(const MachineInstr &MI,
+                             const TargetInstrDesc &TID) const;
+
+    void emitDataProcessingInstruction(const MachineInstr &MI,
+                                       unsigned ImplicitRd = 0,
+                                       unsigned ImplicitRn = 0);
+
+    void emitLoadStoreInstruction(const MachineInstr &MI,
+                                  unsigned ImplicitRd = 0,
+                                  unsigned ImplicitRn = 0);
+
+    void emitMiscLoadStoreInstruction(const MachineInstr &MI,
+                                      unsigned ImplicitRn = 0);
+
+    void emitLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+    void emitMulFrmInstruction(const MachineInstr &MI);
+
+    void emitExtendInstruction(const MachineInstr &MI);
+
+    void emitMiscArithInstruction(const MachineInstr &MI);
+
+    void emitSaturateInstruction(const MachineInstr &MI);
+
+    void emitBranchInstruction(const MachineInstr &MI);
+
+    void emitInlineJumpTable(unsigned JTIndex);
+
+    void emitMiscBranchInstruction(const MachineInstr &MI);
+
+    void emitVFPArithInstruction(const MachineInstr &MI);
+
+    void emitVFPConversionInstruction(const MachineInstr &MI);
+
+    void emitVFPLoadStoreInstruction(const MachineInstr &MI);
+
+    void emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI);
+
+    void emitNEONLaneInstruction(const MachineInstr &MI);
+    void emitNEONDupInstruction(const MachineInstr &MI);
+    void emitNEON1RegModImmInstruction(const MachineInstr &MI);
+    void emitNEON2RegInstruction(const MachineInstr &MI);
+    void emitNEON3RegInstruction(const MachineInstr &MI);
+
+    /// getMachineOpValue - Return binary encoding of operand. If the machine
+    /// operand requires relocation, record the relocation and return zero.
+    unsigned getMachineOpValue(const MachineInstr &MI,
+                               const MachineOperand &MO) const;
+    unsigned getMachineOpValue(const MachineInstr &MI, unsigned OpIdx) const {
+      return getMachineOpValue(MI, MI.getOperand(OpIdx));
+    }
+
+    // FIXME: The legacy JIT ARMCodeEmitter doesn't rely on the the
+    //  TableGen'erated getBinaryCodeForInstr() function to encode any
+    //  operand values, instead querying getMachineOpValue() directly for
+    //  each operand it needs to encode. Thus, any of the new encoder
+    //  helper functions can simply return 0 as the values the return
+    //  are already handled elsewhere. They are placeholders to allow this
+    //  encoder to continue to function until the MC encoder is sufficiently
+    //  far along that this one can be eliminated entirely.
+    unsigned NEONThumb2DataIPostEncoder(const MachineInstr &MI, unsigned Val) 
+      const { return 0; }
+    unsigned NEONThumb2LoadStorePostEncoder(const MachineInstr &MI,unsigned Val) 
+      const { return 0; }
+    unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val) 
+      const { return 0; }
+    unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val)
+      const { return 0; }
+    unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbAdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbBLTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbBLXTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbBRTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbBCCTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbCBTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getUnconditionalBranchTargetOpValue(const MachineInstr &MI,
+      unsigned Op) const { return 0; }
+    unsigned getARMBranchTargetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getCCOutOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getSOImmOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2SOImmOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getSORegOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getThumbAddrModeRegRegOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeImm8OpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeImm8s4OpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeImm8OffsetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeImm12OffsetOpValue(const MachineInstr &MI,unsigned Op)
+      const { return 0; }
+    unsigned getT2AddrModeSORegOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2SORegOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getRotImmOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getImmMinusOneOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getT2AdrLabelOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getAddrMode6AddressOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getAddrMode6DupAddressOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getAddrMode6OffsetOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getBitfieldInvertedMaskOpValue(const MachineInstr &MI,
+                                            unsigned Op) const { return 0; }
+    unsigned getMsbOpValue(const MachineInstr &MI,
+                           unsigned Op) const { return 0; }
+    uint32_t getLdStmModeOpValue(const MachineInstr &MI, unsigned OpIdx)
+      const {return 0; }
+    uint32_t getLdStSORegOpValue(const MachineInstr &MI, unsigned OpIdx)
+      const { return 0; }
+
+    unsigned getAddrModeImm12OpValue(const MachineInstr &MI, unsigned Op)
+      const {
+      // {17-13} = reg
+      // {12}    = (U)nsigned (add == '1', sub == '0')
+      // {11-0}  = imm12
+      const MachineOperand &MO  = MI.getOperand(Op);
+      const MachineOperand &MO1 = MI.getOperand(Op + 1);
+      if (!MO.isReg()) {
+        emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+        return 0;
+      }
+      unsigned Reg = getARMRegisterNumbering(MO.getReg());
+      int32_t Imm12 = MO1.getImm();
+      uint32_t Binary;
+      Binary = Imm12 & 0xfff;
+      if (Imm12 >= 0)
+        Binary |= (1 << 12);
+      Binary |= (Reg << 13);
+      return Binary;
+    }
+
+    unsigned getHiLo16ImmOpValue(const MachineInstr &MI, unsigned Op) const {
+      return 0;
+    }
+
+    uint32_t getAddrMode2OpValue(const MachineInstr &MI, unsigned OpIdx)
+      const { return 0;}
+    uint32_t getAddrMode2OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
+      const { return 0;}
+    uint32_t getAddrMode3OffsetOpValue(const MachineInstr &MI, unsigned OpIdx)
+      const { return 0;}
+    uint32_t getAddrMode3OpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    uint32_t getAddrModeThumbSPOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    uint32_t getAddrModeSOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    uint32_t getAddrModeISOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    uint32_t getAddrModePCOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    uint32_t getAddrMode5OpValue(const MachineInstr &MI, unsigned Op) const {
+      // {17-13} = reg
+      // {12}    = (U)nsigned (add == '1', sub == '0')
+      // {11-0}  = imm12
+      const MachineOperand &MO  = MI.getOperand(Op);
+      const MachineOperand &MO1 = MI.getOperand(Op + 1);
+      if (!MO.isReg()) {
+        emitConstPoolAddress(MO.getIndex(), ARM::reloc_arm_cp_entry);
+        return 0;
+      }
+      unsigned Reg = getARMRegisterNumbering(MO.getReg());
+      int32_t Imm12 = MO1.getImm();
+
+      // Special value for #-0
+      if (Imm12 == INT32_MIN)
+        Imm12 = 0;
+
+      // Immediate is always encoded as positive. The 'U' bit controls add vs
+      // sub.
+      bool isAdd = true;
+      if (Imm12 < 0) {
+        Imm12 = -Imm12;
+        isAdd = false;
+      }
+
+      uint32_t Binary = Imm12 & 0xfff;
+      if (isAdd)
+        Binary |= (1 << 12);
+      Binary |= (Reg << 13);
+      return Binary;
+    }
+    unsigned getNEONVcvtImm32OpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+
+    unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+
+    unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+    unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op)
+      const { return 0; }
+
+    /// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+    /// machine operand requires relocation, record the relocation and return
+    /// zero.
+    unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
+                            unsigned Reloc);
+
+    /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+    ///
+    unsigned getShiftOp(unsigned Imm) const ;
+
+    /// Routines that handle operands which add machine relocations which are
+    /// fixed up by the relocation stage.
+    void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+                           bool MayNeedFarStub,  bool Indirect,
+                           intptr_t ACPV = 0) const;
+    void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const;
+    void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const;
+    void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const;
+    void emitMachineBasicBlock(MachineBasicBlock *BB, unsigned Reloc,
+                               intptr_t JTBase = 0) const;
+  };
+}
+
+char ARMCodeEmitter::ID = 0;
+
+/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM
+/// code to the specified MCE object.
+FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
+                                                JITCodeEmitter &JCE) {
+  return new ARMCodeEmitter(TM, JCE);
+}
+
+bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+  assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+          MF.getTarget().getRelocationModel() != Reloc::Static) &&
+         "JIT relocation model must be set to static or default!");
+  JTI = ((ARMTargetMachine &)MF.getTarget()).getJITInfo();
+  II = ((const ARMTargetMachine &)MF.getTarget()).getInstrInfo();
+  TD = ((const ARMTargetMachine &)MF.getTarget()).getTargetData();
+  Subtarget = &TM.getSubtarget<ARMSubtarget>();
+  MCPEs = &MF.getConstantPool()->getConstants();
+  MJTEs = 0;
+  if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
+  IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+  IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction();
+  JTI->Initialize(MF, IsPIC);
+  MMI = &getAnalysis<MachineModuleInfo>();
+  MCE.setModuleInfo(MMI);
+
+  do {
+    DEBUG(errs() << "JITTing function '"
+          << MF.getFunction()->getName() << "'\n");
+    MCE.startFunction(MF);
+    for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+         MBB != E; ++MBB) {
+      MCE.StartMachineBasicBlock(MBB);
+      for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
+           I != E; ++I)
+        emitInstruction(*I);
+    }
+  } while (MCE.finishFunction(MF));
+
+  return false;
+}
+
+/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+///
+unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
+  switch (ARM_AM::getAM2ShiftOpc(Imm)) {
+  default: llvm_unreachable("Unknown shift opc!");
+  case ARM_AM::asr: return 2;
+  case ARM_AM::lsl: return 0;
+  case ARM_AM::lsr: return 1;
+  case ARM_AM::ror:
+  case ARM_AM::rrx: return 3;
+  }
+  return 0;
+}
+
+/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+/// machine operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
+                                        const MachineOperand &MO,
+                                        unsigned Reloc) {
+  assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw))
+      && "Relocation to this function should be for movt or movw");
+
+  if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+  else if (MO.isGlobal())
+    emitGlobalAddress(MO.getGlobal(), Reloc, true, false);
+  else if (MO.isSymbol())
+    emitExternalSymbolAddress(MO.getSymbolName(), Reloc);
+  else if (MO.isMBB())
+    emitMachineBasicBlock(MO.getMBB(), Reloc);
+  else {
+#ifndef NDEBUG
+    errs() << MO;
+#endif
+    llvm_unreachable("Unsupported operand type for movw/movt");
+  }
+  return 0;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+                                           const MachineOperand &MO) const {
+  if (MO.isReg())
+    return getARMRegisterNumbering(MO.getReg());
+  else if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+  else if (MO.isGlobal())
+    emitGlobalAddress(MO.getGlobal(), ARM::reloc_arm_branch, true, false);
+  else if (MO.isSymbol())
+    emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch);
+  else if (MO.isCPI()) {
+    const TargetInstrDesc &TID = MI.getDesc();
+    // For VFP load, the immediate offset is multiplied by 4.
+    unsigned Reloc =  ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
+      ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry;
+    emitConstPoolAddress(MO.getIndex(), Reloc);
+  } else if (MO.isJTI())
+    emitJumpTableAddress(MO.getIndex(), ARM::reloc_arm_relative);
+  else if (MO.isMBB())
+    emitMachineBasicBlock(MO.getMBB(), ARM::reloc_arm_branch);
+  else
+    llvm_unreachable("Unable to encode MachineOperand!");
+  return 0;
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream.
+///
+void ARMCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+                                       bool MayNeedFarStub, bool Indirect,
+                                       intptr_t ACPV) const {
+  MachineRelocation MR = Indirect
+    ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
+                                           const_cast<GlobalValue *>(GV),
+                                           ACPV, MayNeedFarStub)
+    : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+                               const_cast<GlobalValue *>(GV), ACPV,
+                               MayNeedFarStub);
+  MCE.addRelocation(MR);
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::
+emitExternalSymbolAddress(const char *ES, unsigned Reloc) const {
+  MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+                                                 Reloc, ES));
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::emitConstPoolAddress(unsigned CPI, unsigned Reloc) const {
+  // Tell JIT emitter we'll resolve the address.
+  MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+                                                    Reloc, CPI, 0, true));
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+void ARMCodeEmitter::
+emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const {
+  MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+                                                    Reloc, JTIndex, 0, true));
+}
+
+/// emitMachineBasicBlock - Emit the specified address basic block.
+void ARMCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB,
+                                           unsigned Reloc,
+                                           intptr_t JTBase) const {
+  MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+                                             Reloc, BB, JTBase));
+}
+
+void ARMCodeEmitter::emitWordLE(unsigned Binary) {
+  DEBUG(errs() << "  0x";
+        errs().write_hex(Binary) << "\n");
+  MCE.emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitDWordLE(uint64_t Binary) {
+  DEBUG(errs() << "  0x";
+        errs().write_hex(Binary) << "\n");
+  MCE.emitDWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
+  DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI);
+
+  MCE.processDebugLoc(MI.getDebugLoc(), true);
+
+  ++NumEmitted;  // Keep track of the # of mi's emitted
+  switch (MI.getDesc().TSFlags & ARMII::FormMask) {
+  default: {
+    llvm_unreachable("Unhandled instruction encoding format!");
+    break;
+  }
+  case ARMII::MiscFrm:
+    if (MI.getOpcode() == ARM::LEApcrelJT) {
+      // Materialize jumptable address.
+      emitLEApcrelJTInstruction(MI);
+      break;
+    }
+    llvm_unreachable("Unhandled instruction encoding!");
+    break;
+  case ARMII::Pseudo:
+    emitPseudoInstruction(MI);
+    break;
+  case ARMII::DPFrm:
+  case ARMII::DPSoRegFrm:
+    emitDataProcessingInstruction(MI);
+    break;
+  case ARMII::LdFrm:
+  case ARMII::StFrm:
+    emitLoadStoreInstruction(MI);
+    break;
+  case ARMII::LdMiscFrm:
+  case ARMII::StMiscFrm:
+    emitMiscLoadStoreInstruction(MI);
+    break;
+  case ARMII::LdStMulFrm:
+    emitLoadStoreMultipleInstruction(MI);
+    break;
+  case ARMII::MulFrm:
+    emitMulFrmInstruction(MI);
+    break;
+  case ARMII::ExtFrm:
+    emitExtendInstruction(MI);
+    break;
+  case ARMII::ArithMiscFrm:
+    emitMiscArithInstruction(MI);
+    break;
+  case ARMII::SatFrm:
+    emitSaturateInstruction(MI);
+    break;
+  case ARMII::BrFrm:
+    emitBranchInstruction(MI);
+    break;
+  case ARMII::BrMiscFrm:
+    emitMiscBranchInstruction(MI);
+    break;
+  // VFP instructions.
+  case ARMII::VFPUnaryFrm:
+  case ARMII::VFPBinaryFrm:
+    emitVFPArithInstruction(MI);
+    break;
+  case ARMII::VFPConv1Frm:
+  case ARMII::VFPConv2Frm:
+  case ARMII::VFPConv3Frm:
+  case ARMII::VFPConv4Frm:
+  case ARMII::VFPConv5Frm:
+    emitVFPConversionInstruction(MI);
+    break;
+  case ARMII::VFPLdStFrm:
+    emitVFPLoadStoreInstruction(MI);
+    break;
+  case ARMII::VFPLdStMulFrm:
+    emitVFPLoadStoreMultipleInstruction(MI);
+    break;
+
+  // NEON instructions.
+  case ARMII::NGetLnFrm:
+  case ARMII::NSetLnFrm:
+    emitNEONLaneInstruction(MI);
+    break;
+  case ARMII::NDupFrm:
+    emitNEONDupInstruction(MI);
+    break;
+  case ARMII::N1RegModImmFrm:
+    emitNEON1RegModImmInstruction(MI);
+    break;
+  case ARMII::N2RegFrm:
+    emitNEON2RegInstruction(MI);
+    break;
+  case ARMII::N3RegFrm:
+    emitNEON3RegInstruction(MI);
+    break;
+  }
+  MCE.processDebugLoc(MI.getDebugLoc(), false);
+}
+
+void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
+  unsigned CPI = MI.getOperand(0).getImm();       // CP instruction index.
+  unsigned CPIndex = MI.getOperand(1).getIndex(); // Actual cp entry index.
+  const MachineConstantPoolEntry &MCPE = (*MCPEs)[CPIndex];
+
+  // Remember the CONSTPOOL_ENTRY address for later relocation.
+  JTI->addConstantPoolEntryAddr(CPI, MCE.getCurrentPCValue());
+
+  // Emit constpool island entry. In most cases, the actual values will be
+  // resolved and relocated after code emission.
+  if (MCPE.isMachineConstantPoolEntry()) {
+    ARMConstantPoolValue *ACPV =
+      static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+
+    DEBUG(errs() << "  ** ARM constant pool #" << CPI << " @ "
+          << (void*)MCE.getCurrentPCValue() << " " << *ACPV << '\n');
+
+    assert(ACPV->isGlobalValue() && "unsupported constant pool value");
+    const GlobalValue *GV = ACPV->getGV();
+    if (GV) {
+      Reloc::Model RelocM = TM.getRelocationModel();
+      emitGlobalAddress(GV, ARM::reloc_arm_machine_cp_entry,
+                        isa<Function>(GV),
+                        Subtarget->GVIsIndirectSymbol(GV, RelocM),
+                        (intptr_t)ACPV);
+     } else  {
+      emitExternalSymbolAddress(ACPV->getSymbol(), ARM::reloc_arm_absolute);
+    }
+    emitWordLE(0);
+  } else {
+    const Constant *CV = MCPE.Val.ConstVal;
+
+    DEBUG({
+        errs() << "  ** Constant pool #" << CPI << " @ "
+               << (void*)MCE.getCurrentPCValue() << " ";
+        if (const Function *F = dyn_cast<Function>(CV))
+          errs() << F->getName();
+        else
+          errs() << *CV;
+        errs() << '\n';
+      });
+
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
+      emitGlobalAddress(GV, ARM::reloc_arm_absolute, isa<Function>(GV), false);
+      emitWordLE(0);
+    } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+      uint32_t Val = uint32_t(*CI->getValue().getRawData());
+      emitWordLE(Val);
+    } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+      if (CFP->getType()->isFloatTy())
+        emitWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+      else if (CFP->getType()->isDoubleTy())
+        emitDWordLE(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+      else {
+        llvm_unreachable("Unable to handle this constantpool entry!");
+      }
+    } else {
+      llvm_unreachable("Unable to handle this constantpool entry!");
+    }
+  }
+}
+
+void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) {
+  const MachineOperand &MO0 = MI.getOperand(0);
+  const MachineOperand &MO1 = MI.getOperand(1);
+
+  // Emit the 'movw' instruction.
+  unsigned Binary = 0x30 << 20;  // mov: Insts{27-20} = 0b00110000
+
+  unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF;
+
+  // Set the conditional execution predicate.
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+  // Encode imm16 as imm4:imm12
+  Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12
+  Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4
+  emitWordLE(Binary);
+
+  unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16;
+  // Emit the 'movt' instruction.
+  Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100
+
+  // Set the conditional execution predicate.
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+  // Encode imm16 as imm4:imm1, same as movw above.
+  Binary |= Hi16 & 0xFFF;
+  Binary |= ((Hi16 >> 12) & 0xF) << 16;
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
+  const MachineOperand &MO0 = MI.getOperand(0);
+  const MachineOperand &MO1 = MI.getOperand(1);
+  assert(MO1.isImm() && ARM_AM::isSOImmTwoPartVal(MO1.getImm()) &&
+                                                  "Not a valid so_imm value!");
+  unsigned V1 = ARM_AM::getSOImmTwoPartFirst(MO1.getImm());
+  unsigned V2 = ARM_AM::getSOImmTwoPartSecond(MO1.getImm());
+
+  // Emit the 'mov' instruction.
+  unsigned Binary = 0xd << 21;  // mov: Insts{24-21} = 0b1101
+
+  // Set the conditional execution predicate.
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+  // Encode so_imm.
+  // Set bit I(25) to identify this is the immediate form of <shifter_op>
+  Binary |= 1 << ARMII::I_BitShift;
+  Binary |= getMachineSoImmOpValue(V1);
+  emitWordLE(Binary);
+
+  // Now the 'orr' instruction.
+  Binary = 0xc << 21;  // orr: Insts{24-21} = 0b1100
+
+  // Set the conditional execution predicate.
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+  // Encode Rn.
+  Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRnShift;
+
+  // Encode so_imm.
+  // Set bit I(25) to identify this is the immediate form of <shifter_op>
+  Binary |= 1 << ARMII::I_BitShift;
+  Binary |= getMachineSoImmOpValue(V2);
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
+  // It's basically add r, pc, (LJTI - $+8)
+
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  // Emit the 'add' instruction.
+  unsigned Binary = 0x4 << 21;  // add: Insts{24-21} = 0b0100
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode S bit if MI modifies CPSR.
+  Binary |= getAddrModeSBit(MI, TID);
+
+  // Encode Rd.
+  Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+  // Encode Rn which is PC.
+  Binary |= getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+
+  // Encode the displacement.
+  Binary |= 1 << ARMII::I_BitShift;
+  emitJumpTableAddress(MI.getOperand(1).getIndex(), ARM::reloc_arm_jt_base);
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitPseudoMoveInstruction(const MachineInstr &MI) {
+  unsigned Opcode = MI.getDesc().Opcode;
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode S bit if MI modifies CPSR.
+  if (Opcode == ARM::MOVsrl_flag || Opcode == ARM::MOVsra_flag)
+    Binary |= 1 << ARMII::S_BitShift;
+
+  // Encode register def if there is one.
+  Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+  // Encode the shift operation.
+  switch (Opcode) {
+  default: break;
+  case ARM::RRX:
+    // rrx
+    Binary |= 0x6 << 4;
+    break;
+  case ARM::MOVsrl_flag:
+    // lsr #1
+    Binary |= (0x2 << 4) | (1 << 7);
+    break;
+  case ARM::MOVsra_flag:
+    // asr #1
+    Binary |= (0x4 << 4) | (1 << 7);
+    break;
+  }
+
+  // Encode register Rm.
+  Binary |= getMachineOpValue(MI, 1);
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::addPCLabel(unsigned LabelID) {
+  DEBUG(errs() << "  ** LPC" << LabelID << " @ "
+        << (void*)MCE.getCurrentPCValue() << '\n');
+  JTI->addPCLabelAddr(LabelID, MCE.getCurrentPCValue());
+}
+
+void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
+  unsigned Opcode = MI.getDesc().Opcode;
+  switch (Opcode) {
+  default:
+    llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
+  case ARM::BX_CALL:
+  case ARM::BMOVPCRX_CALL:
+  case ARM::BXr9_CALL:
+  case ARM::BMOVPCRXr9_CALL: {
+    // First emit mov lr, pc
+    unsigned Binary = 0x01a0e00f;
+    Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+    emitWordLE(Binary);
+
+    // and then emit the branch.
+    emitMiscBranchInstruction(MI);
+    break;
+  }
+  case TargetOpcode::INLINEASM: {
+    // We allow inline assembler nodes with empty bodies - they can
+    // implicitly define registers, which is ok for JIT.
+    if (MI.getOperand(0).getSymbolName()[0]) {
+      report_fatal_error("JIT does not support inline asm!");
+    }
+    break;
+  }
+  case TargetOpcode::PROLOG_LABEL:
+  case TargetOpcode::EH_LABEL:
+    MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+    break;
+  case TargetOpcode::IMPLICIT_DEF:
+  case TargetOpcode::KILL:
+    // Do nothing.
+    break;
+  case ARM::CONSTPOOL_ENTRY:
+    emitConstPoolInstruction(MI);
+    break;
+  case ARM::PICADD: {
+    // Remember of the address of the PC label for relocation later.
+    addPCLabel(MI.getOperand(2).getImm());
+    // PICADD is just an add instruction that implicitly read pc.
+    emitDataProcessingInstruction(MI, 0, ARM::PC);
+    break;
+  }
+  case ARM::PICLDR:
+  case ARM::PICLDRB:
+  case ARM::PICSTR:
+  case ARM::PICSTRB: {
+    // Remember of the address of the PC label for relocation later.
+    addPCLabel(MI.getOperand(2).getImm());
+    // These are just load / store instructions that implicitly read pc.
+    emitLoadStoreInstruction(MI, 0, ARM::PC);
+    break;
+  }
+  case ARM::PICLDRH:
+  case ARM::PICLDRSH:
+  case ARM::PICLDRSB:
+  case ARM::PICSTRH: {
+    // Remember of the address of the PC label for relocation later.
+    addPCLabel(MI.getOperand(2).getImm());
+    // These are just load / store instructions that implicitly read pc.
+    emitMiscLoadStoreInstruction(MI, ARM::PC);
+    break;
+  }
+
+  case ARM::MOVi32imm:
+    // Two instructions to materialize a constant.
+    if (Subtarget->hasV6T2Ops())
+      emitMOVi32immInstruction(MI);
+    else
+      emitMOVi2piecesInstruction(MI);
+    break;
+
+  case ARM::LEApcrelJT:
+    // Materialize jumptable address.
+    emitLEApcrelJTInstruction(MI);
+    break;
+  case ARM::RRX:
+  case ARM::MOVsrl_flag:
+  case ARM::MOVsra_flag:
+    emitPseudoMoveInstruction(MI);
+    break;
+  }
+}
+
+unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
+                                                const TargetInstrDesc &TID,
+                                                const MachineOperand &MO,
+                                                unsigned OpIdx) {
+  unsigned Binary = getMachineOpValue(MI, MO);
+
+  const MachineOperand &MO1 = MI.getOperand(OpIdx + 1);
+  const MachineOperand &MO2 = MI.getOperand(OpIdx + 2);
+  ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+  // Encode the shift opcode.
+  unsigned SBits = 0;
+  unsigned Rs = MO1.getReg();
+  if (Rs) {
+    // Set shift operand (bit[7:4]).
+    // LSL - 0001
+    // LSR - 0011
+    // ASR - 0101
+    // ROR - 0111
+    // RRX - 0110 and bit[11:8] clear.
+    switch (SOpc) {
+    default: llvm_unreachable("Unknown shift opc!");
+    case ARM_AM::lsl: SBits = 0x1; break;
+    case ARM_AM::lsr: SBits = 0x3; break;
+    case ARM_AM::asr: SBits = 0x5; break;
+    case ARM_AM::ror: SBits = 0x7; break;
+    case ARM_AM::rrx: SBits = 0x6; break;
+    }
+  } else {
+    // Set shift operand (bit[6:4]).
+    // LSL - 000
+    // LSR - 010
+    // ASR - 100
+    // ROR - 110
+    switch (SOpc) {
+    default: llvm_unreachable("Unknown shift opc!");
+    case ARM_AM::lsl: SBits = 0x0; break;
+    case ARM_AM::lsr: SBits = 0x2; break;
+    case ARM_AM::asr: SBits = 0x4; break;
+    case ARM_AM::ror: SBits = 0x6; break;
+    }
+  }
+  Binary |= SBits << 4;
+  if (SOpc == ARM_AM::rrx)
+    return Binary;
+
+  // Encode the shift operation Rs or shift_imm (except rrx).
+  if (Rs) {
+    // Encode Rs bit[11:8].
+    assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+    return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+  }
+
+  // Encode shift_imm bit[11:7].
+  return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+}
+
+unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
+  int SoImmVal = ARM_AM::getSOImmVal(SoImm);
+  assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+  // Encode rotate_imm.
+  unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
+    << ARMII::SoRotImmShift;
+
+  // Encode immed_8.
+  Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
+  return Binary;
+}
+
+unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
+                                         const TargetInstrDesc &TID) const {
+  for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i >= e; --i){
+    const MachineOperand &MO = MI.getOperand(i-1);
+    if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
+      return 1 << ARMII::S_BitShift;
+  }
+  return 0;
+}
+
+void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
+                                                   unsigned ImplicitRd,
+                                                   unsigned ImplicitRn) {
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode S bit if MI modifies CPSR.
+  Binary |= getAddrModeSBit(MI, TID);
+
+  // Encode register def if there is one.
+  unsigned NumDefs = TID.getNumDefs();
+  unsigned OpIdx = 0;
+  if (NumDefs)
+    Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+  else if (ImplicitRd)
+    // Special handling for implicit use (e.g. PC).
+    Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+
+  if (TID.Opcode == ARM::MOVi16) {
+      // Get immediate from MI.
+      unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx),
+                      ARM::reloc_arm_movw);
+      // Encode imm which is the same as in emitMOVi32immInstruction().
+      Binary |= Lo16 & 0xFFF;
+      Binary |= ((Lo16 >> 12) & 0xF) << 16;
+      emitWordLE(Binary);
+      return;
+  } else if(TID.Opcode == ARM::MOVTi16) {
+      unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx),
+                       ARM::reloc_arm_movt) >> 16);
+      Binary |= Hi16 & 0xFFF;
+      Binary |= ((Hi16 >> 12) & 0xF) << 16;
+      emitWordLE(Binary);
+      return;
+  } else if ((TID.Opcode == ARM::BFC) || (TID.Opcode == ARM::BFI)) {
+      uint32_t v = ~MI.getOperand(2).getImm();
+      int32_t lsb = CountTrailingZeros_32(v);
+      int32_t msb = (32 - CountLeadingZeros_32(v)) - 1;
+      // Instr{20-16} = msb, Instr{11-7} = lsb
+      Binary |= (msb & 0x1F) << 16;
+      Binary |= (lsb & 0x1F) << 7;
+      emitWordLE(Binary);
+      return;
+  } else if ((TID.Opcode == ARM::UBFX) || (TID.Opcode == ARM::SBFX)) {
+      // Encode Rn in Instr{0-3}
+      Binary |= getMachineOpValue(MI, OpIdx++);
+
+      uint32_t lsb = MI.getOperand(OpIdx++).getImm();
+      uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1;
+
+      // Instr{20-16} = widthm1, Instr{11-7} = lsb
+      Binary |= (widthm1 & 0x1F) << 16;
+      Binary |= (lsb & 0x1F) << 7;
+      emitWordLE(Binary);
+      return;
+  }
+
+  // If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
+  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+    ++OpIdx;
+
+  // Encode first non-shifter register operand if there is one.
+  bool isUnary = TID.TSFlags & ARMII::UnaryDP;
+  if (!isUnary) {
+    if (ImplicitRn)
+      // Special handling for implicit use (e.g. PC).
+      Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+    else {
+      Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRnShift;
+      ++OpIdx;
+    }
+  }
+
+  // Encode shifter operand.
+  const MachineOperand &MO = MI.getOperand(OpIdx);
+  if ((TID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) {
+    // Encode SoReg.
+    emitWordLE(Binary | getMachineSoRegOpValue(MI, TID, MO, OpIdx));
+    return;
+  }
+
+  if (MO.isReg()) {
+    // Encode register Rm.
+    emitWordLE(Binary | getARMRegisterNumbering(MO.getReg()));
+    return;
+  }
+
+  // Encode so_imm.
+  Binary |= getMachineSoImmOpValue((unsigned)MO.getImm());
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
+                                              unsigned ImplicitRd,
+                                              unsigned ImplicitRn) {
+  const TargetInstrDesc &TID = MI.getDesc();
+  unsigned Form = TID.TSFlags & ARMII::FormMask;
+  bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // If this is an LDRi12, STRi12 or LDRcp, nothing more needs be done.
+  if (MI.getOpcode() == ARM::LDRi12 || MI.getOpcode() == ARM::LDRcp ||
+      MI.getOpcode() == ARM::STRi12) {
+    emitWordLE(Binary);
+    return;
+  }
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+
+  // Operand 0 of a pre- and post-indexed store is the address base
+  // writeback. Skip it.
+  bool Skipped = false;
+  if (IsPrePost && Form == ARMII::StFrm) {
+    ++OpIdx;
+    Skipped = true;
+  }
+
+  // Set first operand
+  if (ImplicitRd)
+    // Special handling for implicit use (e.g. PC).
+    Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
+  else
+    Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+  // Set second operand
+  if (ImplicitRn)
+    // Special handling for implicit use (e.g. PC).
+    Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+  else
+    Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+  // If this is a two-address operand, skip it. e.g. LDR_PRE.
+  if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+    ++OpIdx;
+
+  const MachineOperand &MO2 = MI.getOperand(OpIdx);
+  unsigned AM2Opc = (ImplicitRn == ARM::PC)
+    ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+  // Set bit U(23) according to sign of immed value (positive or negative).
+  Binary |= ((ARM_AM::getAM2Op(AM2Opc) == ARM_AM::add ? 1 : 0) <<
+             ARMII::U_BitShift);
+  if (!MO2.getReg()) { // is immediate
+    if (ARM_AM::getAM2Offset(AM2Opc))
+      // Set the value of offset_12 field
+      Binary |= ARM_AM::getAM2Offset(AM2Opc);
+    emitWordLE(Binary);
+    return;
+  }
+
+  // Set bit I(25), because this is not in immediate encoding.
+  Binary |= 1 << ARMII::I_BitShift;
+  assert(TargetRegisterInfo::isPhysicalRegister(MO2.getReg()));
+  // Set bit[3:0] to the corresponding Rm register
+  Binary |= getARMRegisterNumbering(MO2.getReg());
+
+  // If this instr is in scaled register offset/index instruction, set
+  // shift_immed(bit[11:7]) and shift(bit[6:5]) fields.
+  if (unsigned ShImm = ARM_AM::getAM2Offset(AM2Opc)) {
+    Binary |= getShiftOp(AM2Opc) << ARMII::ShiftImmShift;  // shift
+    Binary |= ShImm              << ARMII::ShiftShift;     // shift_immed
+  }
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
+                                                  unsigned ImplicitRn) {
+  const TargetInstrDesc &TID = MI.getDesc();
+  unsigned Form = TID.TSFlags & ARMII::FormMask;
+  bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+
+  // Operand 0 of a pre- and post-indexed store is the address base
+  // writeback. Skip it.
+  bool Skipped = false;
+  if (IsPrePost && Form == ARMII::StMiscFrm) {
+    ++OpIdx;
+    Skipped = true;
+  }
+
+  // Set first operand
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+  // Skip LDRD and STRD's second operand.
+  if (TID.Opcode == ARM::LDRD || TID.Opcode == ARM::STRD)
+    ++OpIdx;
+
+  // Set second operand
+  if (ImplicitRn)
+    // Special handling for implicit use (e.g. PC).
+    Binary |= (getARMRegisterNumbering(ImplicitRn) << ARMII::RegRnShift);
+  else
+    Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+  // If this is a two-address operand, skip it. e.g. LDRH_POST.
+  if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+    ++OpIdx;
+
+  const MachineOperand &MO2 = MI.getOperand(OpIdx);
+  unsigned AM3Opc = (ImplicitRn == ARM::PC)
+    ? 0 : MI.getOperand(OpIdx+1).getImm();
+
+  // Set bit U(23) according to sign of immed value (positive or negative)
+  Binary |= ((ARM_AM::getAM3Op(AM3Opc) == ARM_AM::add ? 1 : 0) <<
+             ARMII::U_BitShift);
+
+  // If this instr is in register offset/index encoding, set bit[3:0]
+  // to the corresponding Rm register.
+  if (MO2.getReg()) {
+    Binary |= getARMRegisterNumbering(MO2.getReg());
+    emitWordLE(Binary);
+    return;
+  }
+
+  // This instr is in immediate offset/index encoding, set bit 22 to 1.
+  Binary |= 1 << ARMII::AM3_I_BitShift;
+  if (unsigned ImmOffs = ARM_AM::getAM3Offset(AM3Opc)) {
+    // Set operands
+    Binary |= (ImmOffs >> 4) << ARMII::ImmHiShift;  // immedH
+    Binary |= (ImmOffs & 0xF);                      // immedL
+  }
+
+  emitWordLE(Binary);
+}
+
+static unsigned getAddrModeUPBits(unsigned Mode) {
+  unsigned Binary = 0;
+
+  // Set addressing mode by modifying bits U(23) and P(24)
+  // IA - Increment after  - bit U = 1 and bit P = 0
+  // IB - Increment before - bit U = 1 and bit P = 1
+  // DA - Decrement after  - bit U = 0 and bit P = 0
+  // DB - Decrement before - bit U = 0 and bit P = 1
+  switch (Mode) {
+  default: llvm_unreachable("Unknown addressing sub-mode!");
+  case ARM_AM::da:                                     break;
+  case ARM_AM::db: Binary |= 0x1 << ARMII::P_BitShift; break;
+  case ARM_AM::ia: Binary |= 0x1 << ARMII::U_BitShift; break;
+  case ARM_AM::ib: Binary |= 0x3 << ARMII::U_BitShift; break;
+  }
+
+  return Binary;
+}
+
+void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+  bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Skip operand 0 of an instruction with base register update.
+  unsigned OpIdx = 0;
+  if (IsUpdating)
+    ++OpIdx;
+
+  // Set base address operand
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+  // Set addressing mode by modifying bits U(23) and P(24)
+  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
+  Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
+
+  // Set bit W(21)
+  if (IsUpdating)
+    Binary |= 0x1 << ARMII::W_BitShift;
+
+  // Set registers
+  for (unsigned i = OpIdx+2, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      break;
+    unsigned RegNum = getARMRegisterNumbering(MO.getReg());
+    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+           RegNum < 16);
+    Binary |= 0x1 << RegNum;
+  }
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode S bit if MI modifies CPSR.
+  Binary |= getAddrModeSBit(MI, TID);
+
+  // 32x32->64bit operations have two destination registers. The number
+  // of register definitions will tell us if that's what we're dealing with.
+  unsigned OpIdx = 0;
+  if (TID.getNumDefs() == 2)
+    Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift;
+
+  // Encode Rd
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdHiShift;
+
+  // Encode Rm
+  Binary |= getMachineOpValue(MI, OpIdx++);
+
+  // Encode Rs
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRsShift;
+
+  // Many multiple instructions (e.g. MLA) have three src operands. Encode
+  // it as Rn (for multiply, that's in the same offset as RdLo.
+  if (TID.getNumOperands() > OpIdx &&
+      !TID.OpInfo[OpIdx].isPredicate() &&
+      !TID.OpInfo[OpIdx].isOptionalDef())
+    Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift;
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+
+  // Encode Rd
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+  const MachineOperand &MO1 = MI.getOperand(OpIdx++);
+  const MachineOperand &MO2 = MI.getOperand(OpIdx);
+  if (MO2.isReg()) {
+    // Two register operand form.
+    // Encode Rn.
+    Binary |= getMachineOpValue(MI, MO1) << ARMII::RegRnShift;
+
+    // Encode Rm.
+    Binary |= getMachineOpValue(MI, MO2);
+    ++OpIdx;
+  } else {
+    Binary |= getMachineOpValue(MI, MO1);
+  }
+
+  // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand.
+  if (MI.getOperand(OpIdx).isImm() &&
+      !TID.OpInfo[OpIdx].isPredicate() &&
+      !TID.OpInfo[OpIdx].isOptionalDef())
+    Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift;
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+
+  // Encode Rd
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+
+  const MachineOperand &MO = MI.getOperand(OpIdx++);
+  if (OpIdx == TID.getNumOperands() ||
+      TID.OpInfo[OpIdx].isPredicate() ||
+      TID.OpInfo[OpIdx].isOptionalDef()) {
+    // Encode Rm and it's done.
+    Binary |= getMachineOpValue(MI, MO);
+    emitWordLE(Binary);
+    return;
+  }
+
+  // Encode Rn.
+  Binary |= getMachineOpValue(MI, MO) << ARMII::RegRnShift;
+
+  // Encode Rm.
+  Binary |= getMachineOpValue(MI, OpIdx++);
+
+  // Encode shift_imm.
+  unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
+  if (TID.Opcode == ARM::PKHTB) {
+    assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!");
+    if (ShiftAmt == 32)
+      ShiftAmt = 0;
+  }
+  assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+  Binary |= ShiftAmt << ARMII::ShiftShift;
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  // Part of binary is determined by TableGen.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Encode Rd
+  Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
+
+  // Encode saturate bit position.
+  unsigned Pos = MI.getOperand(1).getImm();
+  if (TID.Opcode == ARM::SSAT || TID.Opcode == ARM::SSAT16)
+    Pos -= 1;
+  assert((Pos < 16 || (Pos < 32 &&
+                       TID.Opcode != ARM::SSAT16 &&
+                       TID.Opcode != ARM::USAT16)) &&
+         "saturate bit position out of range");
+  Binary |= Pos << 16;
+
+  // Encode Rm
+  Binary |= getMachineOpValue(MI, 2);
+
+  // Encode shift_imm.
+  if (TID.getNumOperands() == 4) {
+    unsigned ShiftOp = MI.getOperand(3).getImm();
+    ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+    if (Opc == ARM_AM::asr)
+      Binary |= (1 << 6);
+    unsigned ShiftAmt = MI.getOperand(3).getImm();
+    if (ShiftAmt == 32 && Opc == ARM_AM::asr)
+      ShiftAmt = 0;
+    assert(ShiftAmt < 32 && "shift_imm range is 0 to 31!");
+    Binary |= ShiftAmt << ARMII::ShiftShift;
+  }
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  if (TID.Opcode == ARM::TPsoft) {
+    llvm_unreachable("ARM::TPsoft FIXME"); // FIXME
+  }
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Set signed_immed_24 field
+  Binary |= getMachineOpValue(MI, 0);
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) {
+  // Remember the base address of the inline jump table.
+  uintptr_t JTBase = MCE.getCurrentPCValue();
+  JTI->addJumpTableBaseAddr(JTIndex, JTBase);
+  DEBUG(errs() << "  ** Jump Table #" << JTIndex << " @ " << (void*)JTBase
+               << '\n');
+
+  // Now emit the jump table entries.
+  const std::vector<MachineBasicBlock*> &MBBs = (*MJTEs)[JTIndex].MBBs;
+  for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
+    if (IsPIC)
+      // DestBB address - JT base.
+      emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_pic_jt, JTBase);
+    else
+      // Absolute DestBB address.
+      emitMachineBasicBlock(MBBs[i], ARM::reloc_arm_absolute);
+    emitWordLE(0);
+  }
+}
+
+void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  // Handle jump tables.
+  if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd) {
+    // First emit a ldr pc, [] instruction.
+    emitDataProcessingInstruction(MI, ARM::PC);
+
+    // Then emit the inline jump table.
+    unsigned JTIndex =
+      (TID.Opcode == ARM::BR_JTr)
+      ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
+    emitInlineJumpTable(JTIndex);
+    return;
+  } else if (TID.Opcode == ARM::BR_JTm) {
+    // First emit a ldr pc, [] instruction.
+    emitLoadStoreInstruction(MI, ARM::PC);
+
+    // Then emit the inline jump table.
+    emitInlineJumpTable(MI.getOperand(3).getIndex());
+    return;
+  }
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  if (TID.Opcode == ARM::BX_RET || TID.Opcode == ARM::MOVPCLR)
+    // The return register is LR.
+    Binary |= getARMRegisterNumbering(ARM::LR);
+  else
+    // otherwise, set the return register
+    Binary |= getMachineOpValue(MI, 0);
+
+  emitWordLE(Binary);
+}
+
+static unsigned encodeVFPRd(const MachineInstr &MI, unsigned OpIdx) {
+  unsigned RegD = MI.getOperand(OpIdx).getReg();
+  unsigned Binary = 0;
+  bool isSPVFP = ARM::SPRRegisterClass->contains(RegD);
+  RegD = getARMRegisterNumbering(RegD);
+  if (!isSPVFP)
+    Binary |=   RegD               << ARMII::RegRdShift;
+  else {
+    Binary |= ((RegD & 0x1E) >> 1) << ARMII::RegRdShift;
+    Binary |=  (RegD & 0x01)       << ARMII::D_BitShift;
+  }
+  return Binary;
+}
+
+static unsigned encodeVFPRn(const MachineInstr &MI, unsigned OpIdx) {
+  unsigned RegN = MI.getOperand(OpIdx).getReg();
+  unsigned Binary = 0;
+  bool isSPVFP = ARM::SPRRegisterClass->contains(RegN);
+  RegN = getARMRegisterNumbering(RegN);
+  if (!isSPVFP)
+    Binary |=   RegN               << ARMII::RegRnShift;
+  else {
+    Binary |= ((RegN & 0x1E) >> 1) << ARMII::RegRnShift;
+    Binary |=  (RegN & 0x01)       << ARMII::N_BitShift;
+  }
+  return Binary;
+}
+
+static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
+  unsigned RegM = MI.getOperand(OpIdx).getReg();
+  unsigned Binary = 0;
+  bool isSPVFP = ARM::SPRRegisterClass->contains(RegM);
+  RegM = getARMRegisterNumbering(RegM);
+  if (!isSPVFP)
+    Binary |=   RegM;
+  else {
+    Binary |= ((RegM & 0x1E) >> 1);
+    Binary |=  (RegM & 0x01)       << ARMII::M_BitShift;
+  }
+  return Binary;
+}
+
+void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+  assert((Binary & ARMII::D_BitShift) == 0 &&
+         (Binary & ARMII::N_BitShift) == 0 &&
+         (Binary & ARMII::M_BitShift) == 0 && "VFP encoding bug!");
+
+  // Encode Dd / Sd.
+  Binary |= encodeVFPRd(MI, OpIdx++);
+
+  // If this is a two-address operand, skip it, e.g. FMACD.
+  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+    ++OpIdx;
+
+  // Encode Dn / Sn.
+  if ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm)
+    Binary |= encodeVFPRn(MI, OpIdx++);
+
+  if (OpIdx == TID.getNumOperands() ||
+      TID.OpInfo[OpIdx].isPredicate() ||
+      TID.OpInfo[OpIdx].isOptionalDef()) {
+    // FCMPEZD etc. has only one operand.
+    emitWordLE(Binary);
+    return;
+  }
+
+  // Encode Dm / Sm.
+  Binary |= encodeVFPRm(MI, OpIdx);
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+  unsigned Form = TID.TSFlags & ARMII::FormMask;
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  switch (Form) {
+  default: break;
+  case ARMII::VFPConv1Frm:
+  case ARMII::VFPConv2Frm:
+  case ARMII::VFPConv3Frm:
+    // Encode Dd / Sd.
+    Binary |= encodeVFPRd(MI, 0);
+    break;
+  case ARMII::VFPConv4Frm:
+    // Encode Dn / Sn.
+    Binary |= encodeVFPRn(MI, 0);
+    break;
+  case ARMII::VFPConv5Frm:
+    // Encode Dm / Sm.
+    Binary |= encodeVFPRm(MI, 0);
+    break;
+  }
+
+  switch (Form) {
+  default: break;
+  case ARMII::VFPConv1Frm:
+    // Encode Dm / Sm.
+    Binary |= encodeVFPRm(MI, 1);
+    break;
+  case ARMII::VFPConv2Frm:
+  case ARMII::VFPConv3Frm:
+    // Encode Dn / Sn.
+    Binary |= encodeVFPRn(MI, 1);
+    break;
+  case ARMII::VFPConv4Frm:
+  case ARMII::VFPConv5Frm:
+    // Encode Dd / Sd.
+    Binary |= encodeVFPRd(MI, 1);
+    break;
+  }
+
+  if (Form == ARMII::VFPConv5Frm)
+    // Encode Dn / Sn.
+    Binary |= encodeVFPRn(MI, 2);
+  else if (Form == ARMII::VFPConv3Frm)
+    // Encode Dm / Sm.
+    Binary |= encodeVFPRm(MI, 2);
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  unsigned OpIdx = 0;
+
+  // Encode Dd / Sd.
+  Binary |= encodeVFPRd(MI, OpIdx++);
+
+  // Encode address base.
+  const MachineOperand &Base = MI.getOperand(OpIdx++);
+  Binary |= getMachineOpValue(MI, Base) << ARMII::RegRnShift;
+
+  // If there is a non-zero immediate offset, encode it.
+  if (Base.isReg()) {
+    const MachineOperand &Offset = MI.getOperand(OpIdx);
+    if (unsigned ImmOffs = ARM_AM::getAM5Offset(Offset.getImm())) {
+      if (ARM_AM::getAM5Op(Offset.getImm()) == ARM_AM::add)
+        Binary |= 1 << ARMII::U_BitShift;
+      Binary |= ImmOffs;
+      emitWordLE(Binary);
+      return;
+    }
+  }
+
+  // If immediate offset is omitted, default to +0.
+  Binary |= 1 << ARMII::U_BitShift;
+
+  emitWordLE(Binary);
+}
+
+void
+ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+  bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+
+  // Part of binary is determined by TableGn.
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+  // Skip operand 0 of an instruction with base register update.
+  unsigned OpIdx = 0;
+  if (IsUpdating)
+    ++OpIdx;
+
+  // Set base address operand
+  Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
+
+  // Set addressing mode by modifying bits U(23) and P(24)
+  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(MI.getOpcode());
+  Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(Mode));
+
+  // Set bit W(21)
+  if (IsUpdating)
+    Binary |= 0x1 << ARMII::W_BitShift;
+
+  // First register is encoded in Dd.
+  Binary |= encodeVFPRd(MI, OpIdx+2);
+
+  // Count the number of registers.
+  unsigned NumRegs = 1;
+  for (unsigned i = OpIdx+3, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      break;
+    ++NumRegs;
+  }
+  // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0)
+  // Otherwise, it will be 0, in the case of 32-bit registers.
+  if(Binary & 0x100)
+    Binary |= NumRegs * 2;
+  else
+    Binary |= NumRegs;
+
+  emitWordLE(Binary);
+}
+
+static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
+  unsigned RegD = MI.getOperand(OpIdx).getReg();
+  unsigned Binary = 0;
+  RegD = getARMRegisterNumbering(RegD);
+  Binary |= (RegD & 0xf) << ARMII::RegRdShift;
+  Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
+  return Binary;
+}
+
+static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
+  unsigned RegN = MI.getOperand(OpIdx).getReg();
+  unsigned Binary = 0;
+  RegN = getARMRegisterNumbering(RegN);
+  Binary |= (RegN & 0xf) << ARMII::RegRnShift;
+  Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
+  return Binary;
+}
+
+static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) {
+  unsigned RegM = MI.getOperand(OpIdx).getReg();
+  unsigned Binary = 0;
+  RegM = getARMRegisterNumbering(RegM);
+  Binary |= (RegM & 0xf);
+  Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
+  return Binary;
+}
+
+/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON
+/// data-processing instruction to the corresponding Thumb encoding.
+static unsigned convertNEONDataProcToThumb(unsigned Binary) {
+  assert((Binary & 0xfe000000) == 0xf2000000 &&
+         "not an ARM NEON data-processing instruction");
+  unsigned UBit = (Binary >> 24) & 1;
+  return 0xef000000 | (UBit << 28) | (Binary & 0xffffff);
+}
+
+void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  unsigned RegTOpIdx, RegNOpIdx, LnOpIdx;
+  const TargetInstrDesc &TID = MI.getDesc();
+  if ((TID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) {
+    RegTOpIdx = 0;
+    RegNOpIdx = 1;
+    LnOpIdx = 2;
+  } else { // ARMII::NSetLnFrm
+    RegTOpIdx = 2;
+    RegNOpIdx = 0;
+    LnOpIdx = 3;
+  }
+
+  // Set the conditional execution predicate
+  Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+  unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
+  RegT = getARMRegisterNumbering(RegT);
+  Binary |= (RegT << ARMII::RegRdShift);
+  Binary |= encodeNEONRn(MI, RegNOpIdx);
+
+  unsigned LaneShift;
+  if ((Binary & (1 << 22)) != 0)
+    LaneShift = 0; // 8-bit elements
+  else if ((Binary & (1 << 5)) != 0)
+    LaneShift = 1; // 16-bit elements
+  else
+    LaneShift = 2; // 32-bit elements
+
+  unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift;
+  unsigned Opc1 = Lane >> 2;
+  unsigned Opc2 = Lane & 3;
+  assert((Opc1 & 3) == 0 && "out-of-range lane number operand");
+  Binary |= (Opc1 << 21);
+  Binary |= (Opc2 << 5);
+
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
+  unsigned Binary = getBinaryCodeForInstr(MI);
+
+  // Set the conditional execution predicate
+  Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+  unsigned RegT = MI.getOperand(1).getReg();
+  RegT = getARMRegisterNumbering(RegT);
+  Binary |= (RegT << ARMII::RegRdShift);
+  Binary |= encodeNEONRn(MI, 0);
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) {
+  unsigned Binary = getBinaryCodeForInstr(MI);
+  // Destination register is encoded in Dd.
+  Binary |= encodeNEONRd(MI, 0);
+  // Immediate fields: Op, Cmode, I, Imm3, Imm4
+  unsigned Imm = MI.getOperand(1).getImm();
+  unsigned Op = (Imm >> 12) & 1;
+  unsigned Cmode = (Imm >> 8) & 0xf;
+  unsigned I = (Imm >> 7) & 1;
+  unsigned Imm3 = (Imm >> 4) & 0x7;
+  unsigned Imm4 = Imm & 0xf;
+  Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4;
+  if (IsThumb)
+    Binary = convertNEONDataProcToThumb(Binary);
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+  unsigned Binary = getBinaryCodeForInstr(MI);
+  // Destination register is encoded in Dd; source register in Dm.
+  unsigned OpIdx = 0;
+  Binary |= encodeNEONRd(MI, OpIdx++);
+  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+    ++OpIdx;
+  Binary |= encodeNEONRm(MI, OpIdx);
+  if (IsThumb)
+    Binary = convertNEONDataProcToThumb(Binary);
+  // FIXME: This does not handle VDUPfdf or VDUPfqf.
+  emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) {
+  const TargetInstrDesc &TID = MI.getDesc();
+  unsigned Binary = getBinaryCodeForInstr(MI);
+  // Destination register is encoded in Dd; source registers in Dn and Dm.
+  unsigned OpIdx = 0;
+  Binary |= encodeNEONRd(MI, OpIdx++);
+  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+    ++OpIdx;
+  Binary |= encodeNEONRn(MI, OpIdx++);
+  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+    ++OpIdx;
+  Binary |= encodeNEONRm(MI, OpIdx);
+  if (IsThumb)
+    Binary = convertNEONDataProcToThumb(Binary);
+  // FIXME: This does not handle VMOVDneon or VMOVQ.
+  emitWordLE(Binary);
+}
+
+#include "ARMGenCodeEmitter.inc"
diff --git a/final/lib/Target/ARM/ARMConstantIslandPass.cpp b/final/lib/Target/ARM/ARMConstantIslandPass.cpp
new file mode 100644
index 00000000000..13d1b33d116
--- /dev/null
+++ b/final/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -0,0 +1,1900 @@
+//===-- ARMConstantIslandPass.cpp - ARM constant islands ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that splits the constant pool up into 'islands'
+// which are scattered through-out the function.  This is required due to the
+// limited pc-relative displacements that ARM has.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-cp-islands"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMInstrInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumCPEs,       "Number of constpool entries");
+STATISTIC(NumSplit,      "Number of uncond branches inserted");
+STATISTIC(NumCBrFixed,   "Number of cond branches fixed");
+STATISTIC(NumUBrFixed,   "Number of uncond branches fixed");
+STATISTIC(NumTBs,        "Number of table branches generated");
+STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
+STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
+STATISTIC(NumCBZ,        "Number of CBZ / CBNZ formed");
+STATISTIC(NumJTMoved,    "Number of jump table destination blocks moved");
+STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
+
+
+static cl::opt<bool>
+AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
+          cl::desc("Adjust basic block layout to better use TB[BH]"));
+
+namespace {
+  /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
+  /// requires constant pool entries to be scattered among the instructions
+  /// inside a function.  To do this, it completely ignores the normal LLVM
+  /// constant pool; instead, it places constants wherever it feels like with
+  /// special instructions.
+  ///
+  /// The terminology used in this pass includes:
+  ///   Islands - Clumps of constants placed in the function.
+  ///   Water   - Potential places where an island could be formed.
+  ///   CPE     - A constant pool entry that has been placed somewhere, which
+  ///             tracks a list of users.
+  class ARMConstantIslands : public MachineFunctionPass {
+    /// BBSizes - The size of each MachineBasicBlock in bytes of code, indexed
+    /// by MBB Number.  The two-byte pads required for Thumb alignment are
+    /// counted as part of the following block (i.e., the offset and size for
+    /// a padded block will both be ==2 mod 4).
+    std::vector<unsigned> BBSizes;
+
+    /// BBOffsets - the offset of each MBB in bytes, starting from 0.
+    /// The two-byte pads required for Thumb alignment are counted as part of
+    /// the following block.
+    std::vector<unsigned> BBOffsets;
+
+    /// WaterList - A sorted list of basic blocks where islands could be placed
+    /// (i.e. blocks that don't fall through to the following block, due
+    /// to a return, unreachable, or unconditional branch).
+    std::vector<MachineBasicBlock*> WaterList;
+
+    /// NewWaterList - The subset of WaterList that was created since the
+    /// previous iteration by inserting unconditional branches.
+    SmallSet<MachineBasicBlock*, 4> NewWaterList;
+
+    typedef std::vector<MachineBasicBlock*>::iterator water_iterator;
+
+    /// CPUser - One user of a constant pool, keeping the machine instruction
+    /// pointer, the constant pool being referenced, and the max displacement
+    /// allowed from the instruction to the CP.  The HighWaterMark records the
+    /// highest basic block where a new CPEntry can be placed.  To ensure this
+    /// pass terminates, the CP entries are initially placed at the end of the
+    /// function and then move monotonically to lower addresses.  The
+    /// exception to this rule is when the current CP entry for a particular
+    /// CPUser is out of range, but there is another CP entry for the same
+    /// constant value in range.  We want to use the existing in-range CP
+    /// entry, but if it later moves out of range, the search for new water
+    /// should resume where it left off.  The HighWaterMark is used to record
+    /// that point.
+    struct CPUser {
+      MachineInstr *MI;
+      MachineInstr *CPEMI;
+      MachineBasicBlock *HighWaterMark;
+      unsigned MaxDisp;
+      bool NegOk;
+      bool IsSoImm;
+      CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
+             bool neg, bool soimm)
+        : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {
+        HighWaterMark = CPEMI->getParent();
+      }
+    };
+
+    /// CPUsers - Keep track of all of the machine instructions that use various
+    /// constant pools and their max displacement.
+    std::vector<CPUser> CPUsers;
+
+    /// CPEntry - One per constant pool entry, keeping the machine instruction
+    /// pointer, the constpool index, and the number of CPUser's which
+    /// reference this entry.
+    struct CPEntry {
+      MachineInstr *CPEMI;
+      unsigned CPI;
+      unsigned RefCount;
+      CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
+        : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
+    };
+
+    /// CPEntries - Keep track of all of the constant pool entry machine
+    /// instructions. For each original constpool index (i.e. those that
+    /// existed upon entry to this pass), it keeps a vector of entries.
+    /// Original elements are cloned as we go along; the clones are
+    /// put in the vector of the original element, but have distinct CPIs.
+    std::vector<std::vector<CPEntry> > CPEntries;
+
+    /// ImmBranch - One per immediate branch, keeping the machine instruction
+    /// pointer, conditional or unconditional, the max displacement,
+    /// and (if isCond is true) the corresponding unconditional branch
+    /// opcode.
+    struct ImmBranch {
+      MachineInstr *MI;
+      unsigned MaxDisp : 31;
+      bool isCond : 1;
+      int UncondBr;
+      ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, int ubr)
+        : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
+    };
+
+    /// ImmBranches - Keep track of all the immediate branch instructions.
+    ///
+    std::vector<ImmBranch> ImmBranches;
+
+    /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
+    ///
+    SmallVector<MachineInstr*, 4> PushPopMIs;
+
+    /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions.
+    SmallVector<MachineInstr*, 4> T2JumpTables;
+
+    /// HasFarJump - True if any far jump instruction has been emitted during
+    /// the branch fix up pass.
+    bool HasFarJump;
+
+    /// HasInlineAsm - True if the function contains inline assembly.
+    bool HasInlineAsm;
+
+    const ARMInstrInfo *TII;
+    const ARMSubtarget *STI;
+    ARMFunctionInfo *AFI;
+    bool isThumb;
+    bool isThumb1;
+    bool isThumb2;
+  public:
+    static char ID;
+    ARMConstantIslands() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual const char *getPassName() const {
+      return "ARM constant island placement and branch shortening pass";
+    }
+
+  private:
+    void DoInitialPlacement(MachineFunction &MF,
+                            std::vector<MachineInstr*> &CPEMIs);
+    CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+    void JumpTableFunctionScan(MachineFunction &MF);
+    void InitialFunctionScan(MachineFunction &MF,
+                             const std::vector<MachineInstr*> &CPEMIs);
+    MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
+    void UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB);
+    void AdjustBBOffsetsAfter(MachineBasicBlock *BB, int delta);
+    bool DecrementOldEntry(unsigned CPI, MachineInstr* CPEMI);
+    int LookForExistingCPEntry(CPUser& U, unsigned UserOffset);
+    bool LookForWater(CPUser&U, unsigned UserOffset, water_iterator &WaterIter);
+    void CreateNewWater(unsigned CPUserIndex, unsigned UserOffset,
+                        MachineBasicBlock *&NewMBB);
+    bool HandleConstantPoolUser(MachineFunction &MF, unsigned CPUserIndex);
+    void RemoveDeadCPEMI(MachineInstr *CPEMI);
+    bool RemoveUnusedCPEntries();
+    bool CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+                      MachineInstr *CPEMI, unsigned Disp, bool NegOk,
+                      bool DoDump = false);
+    bool WaterIsInRange(unsigned UserOffset, MachineBasicBlock *Water,
+                        CPUser &U);
+    bool OffsetIsInRange(unsigned UserOffset, unsigned TrialOffset,
+                         unsigned Disp, bool NegativeOK, bool IsSoImm = false);
+    bool BBIsInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
+    bool FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br);
+    bool FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br);
+    bool FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br);
+    bool UndoLRSpillRestore();
+    bool OptimizeThumb2Instructions(MachineFunction &MF);
+    bool OptimizeThumb2Branches(MachineFunction &MF);
+    bool ReorderThumb2JumpTables(MachineFunction &MF);
+    bool OptimizeThumb2JumpTables(MachineFunction &MF);
+    MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB,
+                                                  MachineBasicBlock *JTBB);
+
+    unsigned GetOffsetOf(MachineInstr *MI) const;
+    void dumpBBs();
+    void verify(MachineFunction &MF);
+  };
+  char ARMConstantIslands::ID = 0;
+}
+
+/// verify - check BBOffsets, BBSizes, alignment of islands
+void ARMConstantIslands::verify(MachineFunction &MF) {
+  assert(BBOffsets.size() == BBSizes.size());
+  for (unsigned i = 1, e = BBOffsets.size(); i != e; ++i)
+    assert(BBOffsets[i-1]+BBSizes[i-1] == BBOffsets[i]);
+  if (!isThumb)
+    return;
+#ifndef NDEBUG
+  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock *MBB = MBBI;
+    if (!MBB->empty() &&
+        MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+      unsigned MBBId = MBB->getNumber();
+      assert(HasInlineAsm ||
+             (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) ||
+             (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0));
+    }
+  }
+  for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+    CPUser &U = CPUsers[i];
+    unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8);
+    unsigned CPEOffset  = GetOffsetOf(U.CPEMI);
+    unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset :
+      UserOffset - CPEOffset;
+    assert(Disp <= U.MaxDisp || "Constant pool entry out of range!");
+  }
+#endif
+}
+
+/// print block size and offset information - debugging
+void ARMConstantIslands::dumpBBs() {
+  for (unsigned J = 0, E = BBOffsets.size(); J !=E; ++J) {
+    DEBUG(errs() << "block " << J << " offset " << BBOffsets[J]
+                 << " size " << BBSizes[J] << "\n");
+  }
+}
+
+/// createARMConstantIslandPass - returns an instance of the constpool
+/// island pass.
+FunctionPass *llvm::createARMConstantIslandPass() {
+  return new ARMConstantIslands();
+}
+
+bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
+  MachineConstantPool &MCP = *MF.getConstantPool();
+
+  TII = (const ARMInstrInfo*)MF.getTarget().getInstrInfo();
+  AFI = MF.getInfo<ARMFunctionInfo>();
+  STI = &MF.getTarget().getSubtarget<ARMSubtarget>();
+
+  isThumb = AFI->isThumbFunction();
+  isThumb1 = AFI->isThumb1OnlyFunction();
+  isThumb2 = AFI->isThumb2Function();
+
+  HasFarJump = false;
+  HasInlineAsm = false;
+
+  // Renumber all of the machine basic blocks in the function, guaranteeing that
+  // the numbers agree with the position of the block in the function.
+  MF.RenumberBlocks();
+
+  // Try to reorder and otherwise adjust the block layout to make good use
+  // of the TB[BH] instructions.
+  bool MadeChange = false;
+  if (isThumb2 && AdjustJumpTableBlocks) {
+    JumpTableFunctionScan(MF);
+    MadeChange |= ReorderThumb2JumpTables(MF);
+    // Data is out of date, so clear it. It'll be re-computed later.
+    T2JumpTables.clear();
+    // Blocks may have shifted around. Keep the numbering up to date.
+    MF.RenumberBlocks();
+  }
+
+  // Thumb1 functions containing constant pools get 4-byte alignment.
+  // This is so we can keep exact track of where the alignment padding goes.
+
+  // ARM and Thumb2 functions need to be 4-byte aligned.
+  if (!isThumb1)
+    MF.EnsureAlignment(2);  // 2 = log2(4)
+
+  // Perform the initial placement of the constant pool entries.  To start with,
+  // we put them all at the end of the function.
+  std::vector<MachineInstr*> CPEMIs;
+  if (!MCP.isEmpty()) {
+    DoInitialPlacement(MF, CPEMIs);
+    if (isThumb1)
+      MF.EnsureAlignment(2);  // 2 = log2(4)
+  }
+
+  /// The next UID to take is the first unused one.
+  AFI->initPICLabelUId(CPEMIs.size());
+
+  // Do the initial scan of the function, building up information about the
+  // sizes of each block, the location of all the water, and finding all of the
+  // constant pool users.
+  InitialFunctionScan(MF, CPEMIs);
+  CPEMIs.clear();
+  DEBUG(dumpBBs());
+
+
+  /// Remove dead constant pool entries.
+  MadeChange |= RemoveUnusedCPEntries();
+
+  // Iteratively place constant pool entries and fix up branches until there
+  // is no change.
+  unsigned NoCPIters = 0, NoBRIters = 0;
+  while (true) {
+    bool CPChange = false;
+    for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
+      CPChange |= HandleConstantPoolUser(MF, i);
+    if (CPChange && ++NoCPIters > 30)
+      llvm_unreachable("Constant Island pass failed to converge!");
+    DEBUG(dumpBBs());
+
+    // Clear NewWaterList now.  If we split a block for branches, it should
+    // appear as "new water" for the next iteration of constant pool placement.
+    NewWaterList.clear();
+
+    bool BRChange = false;
+    for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
+      BRChange |= FixUpImmediateBr(MF, ImmBranches[i]);
+    if (BRChange && ++NoBRIters > 30)
+      llvm_unreachable("Branch Fix Up pass failed to converge!");
+    DEBUG(dumpBBs());
+
+    if (!CPChange && !BRChange)
+      break;
+    MadeChange = true;
+  }
+
+  // Shrink 32-bit Thumb2 branch, load, and store instructions.
+  if (isThumb2 && !STI->prefers32BitThumb())
+    MadeChange |= OptimizeThumb2Instructions(MF);
+
+  // After a while, this might be made debug-only, but it is not expensive.
+  verify(MF);
+
+  // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
+  // undo the spill / restore of LR if possible.
+  if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
+    MadeChange |= UndoLRSpillRestore();
+
+  // Save the mapping between original and cloned constpool entries.
+  for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+    for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
+      const CPEntry & CPE = CPEntries[i][j];
+      AFI->recordCPEClone(i, CPE.CPI);
+    }
+  }
+
+  DEBUG(errs() << '\n'; dumpBBs());
+
+  BBSizes.clear();
+  BBOffsets.clear();
+  WaterList.clear();
+  CPUsers.clear();
+  CPEntries.clear();
+  ImmBranches.clear();
+  PushPopMIs.clear();
+  T2JumpTables.clear();
+
+  return MadeChange;
+}
+
+/// DoInitialPlacement - Perform the initial placement of the constant pool
+/// entries.  To start with, we put them all at the end of the function.
+void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
+                                        std::vector<MachineInstr*> &CPEMIs) {
+  // Create the basic block to hold the CPE's.
+  MachineBasicBlock *BB = MF.CreateMachineBasicBlock();
+  MF.push_back(BB);
+
+  // Add all of the constants from the constant pool to the end block, use an
+  // identity mapping of CPI's to CPE's.
+  const std::vector<MachineConstantPoolEntry> &CPs =
+    MF.getConstantPool()->getConstants();
+
+  const TargetData &TD = *MF.getTarget().getTargetData();
+  for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
+    unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
+    // Verify that all constant pool entries are a multiple of 4 bytes.  If not,
+    // we would have to pad them out or something so that instructions stay
+    // aligned.
+    assert((Size & 3) == 0 && "CP Entry not multiple of 4 bytes!");
+    MachineInstr *CPEMI =
+      BuildMI(BB, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+        .addImm(i).addConstantPoolIndex(i).addImm(Size);
+    CPEMIs.push_back(CPEMI);
+
+    // Add a new CPEntry, but no corresponding CPUser yet.
+    std::vector<CPEntry> CPEs;
+    CPEs.push_back(CPEntry(CPEMI, i));
+    CPEntries.push_back(CPEs);
+    ++NumCPEs;
+    DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
+                 << "\n");
+  }
+}
+
+/// BBHasFallthrough - Return true if the specified basic block can fallthrough
+/// into the block immediately after it.
+static bool BBHasFallthrough(MachineBasicBlock *MBB) {
+  // Get the next machine basic block in the function.
+  MachineFunction::iterator MBBI = MBB;
+  // Can't fall off end of function.
+  if (llvm::next(MBBI) == MBB->getParent()->end())
+    return false;
+
+  MachineBasicBlock *NextBB = llvm::next(MBBI);
+  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I)
+    if (*I == NextBB)
+      return true;
+
+  return false;
+}
+
+/// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
+/// look up the corresponding CPEntry.
+ARMConstantIslands::CPEntry
+*ARMConstantIslands::findConstPoolEntry(unsigned CPI,
+                                        const MachineInstr *CPEMI) {
+  std::vector<CPEntry> &CPEs = CPEntries[CPI];
+  // Number of entries per constpool index should be small, just do a
+  // linear search.
+  for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+    if (CPEs[i].CPEMI == CPEMI)
+      return &CPEs[i];
+  }
+  return NULL;
+}
+
+/// JumpTableFunctionScan - Do a scan of the function, building up
+/// information about the sizes of each block and the locations of all
+/// the jump tables.
+void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
+  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock &MBB = *MBBI;
+
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+         I != E; ++I)
+      if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT)
+        T2JumpTables.push_back(I);
+  }
+}
+
+/// InitialFunctionScan - Do the initial scan of the function, building up
+/// information about the sizes of each block, the location of all the water,
+/// and finding all of the constant pool users.
+void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
+                                 const std::vector<MachineInstr*> &CPEMIs) {
+  // First thing, see if the function has any inline assembly in it. If so,
+  // we have to be conservative about alignment assumptions, as we don't
+  // know for sure the size of any instructions in the inline assembly.
+  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock &MBB = *MBBI;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+         I != E; ++I)
+      if (I->getOpcode() == ARM::INLINEASM)
+        HasInlineAsm = true;
+  }
+
+  // Now go back through the instructions and build up our data structures.
+  unsigned Offset = 0;
+  for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    MachineBasicBlock &MBB = *MBBI;
+
+    // If this block doesn't fall through into the next MBB, then this is
+    // 'water' that a constant pool island could be placed.
+    if (!BBHasFallthrough(&MBB))
+      WaterList.push_back(&MBB);
+
+    unsigned MBBSize = 0;
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+         I != E; ++I) {
+      if (I->isDebugValue())
+        continue;
+      // Add instruction size to MBBSize.
+      MBBSize += TII->GetInstSizeInBytes(I);
+
+      int Opc = I->getOpcode();
+      if (I->getDesc().isBranch()) {
+        bool isCond = false;
+        unsigned Bits = 0;
+        unsigned Scale = 1;
+        int UOpc = Opc;
+        switch (Opc) {
+        default:
+          continue;  // Ignore other JT branches
+        case ARM::tBR_JTr:
+          // A Thumb1 table jump may involve padding; for the offsets to
+          // be right, functions containing these must be 4-byte aligned.
+          // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+          // table entries. So this code checks whether offset of tBR_JTr + 2
+          // is aligned.  That is held in Offset+MBBSize, which already has
+          // 2 added in for the size of the mov pc instruction.
+          MF.EnsureAlignment(2U);
+          if ((Offset+MBBSize)%4 != 0 || HasInlineAsm)
+            // FIXME: Add a pseudo ALIGN instruction instead.
+            MBBSize += 2;           // padding
+          continue;   // Does not get an entry in ImmBranches
+        case ARM::t2BR_JT:
+          T2JumpTables.push_back(I);
+          continue;   // Does not get an entry in ImmBranches
+        case ARM::Bcc:
+          isCond = true;
+          UOpc = ARM::B;
+          // Fallthrough
+        case ARM::B:
+          Bits = 24;
+          Scale = 4;
+          break;
+        case ARM::tBcc:
+          isCond = true;
+          UOpc = ARM::tB;
+          Bits = 8;
+          Scale = 2;
+          break;
+        case ARM::tB:
+          Bits = 11;
+          Scale = 2;
+          break;
+        case ARM::t2Bcc:
+          isCond = true;
+          UOpc = ARM::t2B;
+          Bits = 20;
+          Scale = 2;
+          break;
+        case ARM::t2B:
+          Bits = 24;
+          Scale = 2;
+          break;
+        }
+
+        // Record this immediate branch.
+        unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+        ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
+      }
+
+      if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
+        PushPopMIs.push_back(I);
+
+      if (Opc == ARM::CONSTPOOL_ENTRY)
+        continue;
+
+      // Scan the instructions for constant pool operands.
+      for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+        if (I->getOperand(op).isCPI()) {
+          // We found one.  The addressing mode tells us the max displacement
+          // from the PC that this instruction permits.
+
+          // Basic size info comes from the TSFlags field.
+          unsigned Bits = 0;
+          unsigned Scale = 1;
+          bool NegOk = false;
+          bool IsSoImm = false;
+
+          switch (Opc) {
+          default:
+            llvm_unreachable("Unknown addressing mode for CP reference!");
+            break;
+
+          // Taking the address of a CP entry.
+          case ARM::LEApcrel:
+            // This takes a SoImm, which is 8 bit immediate rotated. We'll
+            // pretend the maximum offset is 255 * 4. Since each instruction
+            // 4 byte wide, this is always correct. We'll check for other
+            // displacements that fits in a SoImm as well.
+            Bits = 8;
+            Scale = 4;
+            NegOk = true;
+            IsSoImm = true;
+            break;
+          case ARM::t2LEApcrel:
+            Bits = 12;
+            NegOk = true;
+            break;
+          case ARM::tLEApcrel:
+            Bits = 8;
+            Scale = 4;
+            break;
+
+          case ARM::LDRi12:
+          case ARM::LDRcp:
+          case ARM::t2LDRpci:
+            Bits = 12;  // +-offset_12
+            NegOk = true;
+            break;
+
+          case ARM::tLDRpci:
+            Bits = 8;
+            Scale = 4;  // +(offset_8*4)
+            break;
+
+          case ARM::VLDRD:
+          case ARM::VLDRS:
+            Bits = 8;
+            Scale = 4;  // +-(offset_8*4)
+            NegOk = true;
+            break;
+          }
+
+          // Remember that this is a user of a CP entry.
+          unsigned CPI = I->getOperand(op).getIndex();
+          MachineInstr *CPEMI = CPEMIs[CPI];
+          unsigned MaxOffs = ((1 << Bits)-1) * Scale;
+          CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
+
+          // Increment corresponding CPEntry reference count.
+          CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+          assert(CPE && "Cannot find a corresponding CPEntry!");
+          CPE->RefCount++;
+
+          // Instructions can only use one CP entry, don't bother scanning the
+          // rest of the operands.
+          break;
+        }
+    }
+
+    // In thumb mode, if this block is a constpool island, we may need padding
+    // so it's aligned on 4 byte boundary.
+    if (isThumb &&
+        !MBB.empty() &&
+        MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+        ((Offset%4) != 0 || HasInlineAsm))
+      MBBSize += 2;
+
+    BBSizes.push_back(MBBSize);
+    BBOffsets.push_back(Offset);
+    Offset += MBBSize;
+  }
+}
+
+/// GetOffsetOf - Return the current offset of the specified machine instruction
+/// from the start of the function.  This offset changes as stuff is moved
+/// around inside the function.
+unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const {
+  MachineBasicBlock *MBB = MI->getParent();
+
+  // The offset is composed of two things: the sum of the sizes of all MBB's
+  // before this instruction's block, and the offset from the start of the block
+  // it is in.
+  unsigned Offset = BBOffsets[MBB->getNumber()];
+
+  // If we're looking for a CONSTPOOL_ENTRY in Thumb, see if this block has
+  // alignment padding, and compensate if so.
+  if (isThumb &&
+      MI->getOpcode() == ARM::CONSTPOOL_ENTRY &&
+      (Offset%4 != 0 || HasInlineAsm))
+    Offset += 2;
+
+  // Sum instructions before MI in MBB.
+  for (MachineBasicBlock::iterator I = MBB->begin(); ; ++I) {
+    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+    if (&*I == MI) return Offset;
+    Offset += TII->GetInstSizeInBytes(I);
+  }
+}
+
+/// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
+/// ID.
+static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
+                              const MachineBasicBlock *RHS) {
+  return LHS->getNumber() < RHS->getNumber();
+}
+
+/// UpdateForInsertedWaterBlock - When a block is newly inserted into the
+/// machine function, it upsets all of the block numbers.  Renumber the blocks
+/// and update the arrays that parallel this numbering.
+void ARMConstantIslands::UpdateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
+  // Renumber the MBB's to keep them consecutive.
+  NewBB->getParent()->RenumberBlocks(NewBB);
+
+  // Insert a size into BBSizes to align it properly with the (newly
+  // renumbered) block numbers.
+  BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+  // Likewise for BBOffsets.
+  BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+  // Next, update WaterList.  Specifically, we need to add NewMBB as having
+  // available water after it.
+  water_iterator IP =
+    std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
+                     CompareMBBNumbers);
+  WaterList.insert(IP, NewBB);
+}
+
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch.  Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
+  MachineBasicBlock *OrigBB = MI->getParent();
+  MachineFunction &MF = *OrigBB->getParent();
+
+  // Create a new MBB for the code after the OrigBB.
+  MachineBasicBlock *NewBB =
+    MF.CreateMachineBasicBlock(OrigBB->getBasicBlock());
+  MachineFunction::iterator MBBI = OrigBB; ++MBBI;
+  MF.insert(MBBI, NewBB);
+
+  // Splice the instructions starting with MI over to NewBB.
+  NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
+
+  // Add an unconditional branch from OrigBB to NewBB.
+  // Note the new unconditional branch is not being recorded.
+  // There doesn't seem to be meaningful DebugInfo available; this doesn't
+  // correspond to anything in the source.
+  unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
+  BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
+  ++NumSplit;
+
+  // Update the CFG.  All succs of OrigBB are now succs of NewBB.
+  while (!OrigBB->succ_empty()) {
+    MachineBasicBlock *Succ = *OrigBB->succ_begin();
+    OrigBB->removeSuccessor(Succ);
+    NewBB->addSuccessor(Succ);
+
+    // This pass should be run after register allocation, so there should be no
+    // PHI nodes to update.
+    assert((Succ->empty() || !Succ->begin()->isPHI())
+           && "PHI nodes should be eliminated by now!");
+  }
+
+  // OrigBB branches to NewBB.
+  OrigBB->addSuccessor(NewBB);
+
+  // Update internal data structures to account for the newly inserted MBB.
+  // This is almost the same as UpdateForInsertedWaterBlock, except that
+  // the Water goes after OrigBB, not NewBB.
+  MF.RenumberBlocks(NewBB);
+
+  // Insert a size into BBSizes to align it properly with the (newly
+  // renumbered) block numbers.
+  BBSizes.insert(BBSizes.begin()+NewBB->getNumber(), 0);
+
+  // Likewise for BBOffsets.
+  BBOffsets.insert(BBOffsets.begin()+NewBB->getNumber(), 0);
+
+  // Next, update WaterList.  Specifically, we need to add OrigMBB as having
+  // available water after it (but not if it's already there, which happens
+  // when splitting before a conditional branch that is followed by an
+  // unconditional branch - in that case we want to insert NewBB).
+  water_iterator IP =
+    std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
+                     CompareMBBNumbers);
+  MachineBasicBlock* WaterBB = *IP;
+  if (WaterBB == OrigBB)
+    WaterList.insert(llvm::next(IP), NewBB);
+  else
+    WaterList.insert(IP, OrigBB);
+  NewWaterList.insert(OrigBB);
+
+  unsigned OrigBBI = OrigBB->getNumber();
+  unsigned NewBBI = NewBB->getNumber();
+
+  int delta = isThumb1 ? 2 : 4;
+
+  // Figure out how large the OrigBB is.  As the first half of the original
+  // block, it cannot contain a tablejump.  The size includes
+  // the new jump we added.  (It should be possible to do this without
+  // recounting everything, but it's very confusing, and this is rarely
+  // executed.)
+  unsigned OrigBBSize = 0;
+  for (MachineBasicBlock::iterator I = OrigBB->begin(), E = OrigBB->end();
+       I != E; ++I)
+    OrigBBSize += TII->GetInstSizeInBytes(I);
+  BBSizes[OrigBBI] = OrigBBSize;
+
+  // ...and adjust BBOffsets for NewBB accordingly.
+  BBOffsets[NewBBI] = BBOffsets[OrigBBI] + BBSizes[OrigBBI];
+
+  // Figure out how large the NewMBB is.  As the second half of the original
+  // block, it may contain a tablejump.
+  unsigned NewBBSize = 0;
+  for (MachineBasicBlock::iterator I = NewBB->begin(), E = NewBB->end();
+       I != E; ++I)
+    NewBBSize += TII->GetInstSizeInBytes(I);
+  // Set the size of NewBB in BBSizes.  It does not include any padding now.
+  BBSizes[NewBBI] = NewBBSize;
+
+  MachineInstr* ThumbJTMI = prior(NewBB->end());
+  if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
+    // We've added another 2-byte instruction before this tablejump, which
+    // means we will always need padding if we didn't before, and vice versa.
+
+    // The original offset of the jump instruction was:
+    unsigned OrigOffset = BBOffsets[OrigBBI] + BBSizes[OrigBBI] - delta;
+    if (OrigOffset%4 == 0) {
+      // We had padding before and now we don't.  No net change in code size.
+      delta = 0;
+    } else {
+      // We didn't have padding before and now we do.
+      BBSizes[NewBBI] += 2;
+      delta = 4;
+    }
+  }
+
+  // All BBOffsets following these blocks must be modified.
+  if (delta)
+    AdjustBBOffsetsAfter(NewBB, delta);
+
+  return NewBB;
+}
+
+/// OffsetIsInRange - Checks whether UserOffset (the location of a constant pool
+/// reference) is within MaxDisp of TrialOffset (a proposed location of a
+/// constant pool entry).
+bool ARMConstantIslands::OffsetIsInRange(unsigned UserOffset,
+                                         unsigned TrialOffset, unsigned MaxDisp,
+                                         bool NegativeOK, bool IsSoImm) {
+  // On Thumb offsets==2 mod 4 are rounded down by the hardware for
+  // purposes of the displacement computation; compensate for that here.
+  // Effectively, the valid range of displacements is 2 bytes smaller for such
+  // references.
+  unsigned TotalAdj = 0;
+  if (isThumb && UserOffset%4 !=0) {
+    UserOffset -= 2;
+    TotalAdj = 2;
+  }
+  // CPEs will be rounded up to a multiple of 4.
+  if (isThumb && TrialOffset%4 != 0) {
+    TrialOffset += 2;
+    TotalAdj += 2;
+  }
+
+  // In Thumb2 mode, later branch adjustments can shift instructions up and
+  // cause alignment change. In the worst case scenario this can cause the
+  // user's effective address to be subtracted by 2 and the CPE's address to
+  // be plus 2.
+  if (isThumb2 && TotalAdj != 4)
+    MaxDisp -= (4 - TotalAdj);
+
+  if (UserOffset <= TrialOffset) {
+    // User before the Trial.
+    if (TrialOffset - UserOffset <= MaxDisp)
+      return true;
+    // FIXME: Make use full range of soimm values.
+  } else if (NegativeOK) {
+    if (UserOffset - TrialOffset <= MaxDisp)
+      return true;
+    // FIXME: Make use full range of soimm values.
+  }
+  return false;
+}
+
+/// WaterIsInRange - Returns true if a CPE placed after the specified
+/// Water (a basic block) will be in range for the specific MI.
+
+bool ARMConstantIslands::WaterIsInRange(unsigned UserOffset,
+                                        MachineBasicBlock* Water, CPUser &U) {
+  unsigned MaxDisp = U.MaxDisp;
+  unsigned CPEOffset = BBOffsets[Water->getNumber()] +
+                       BBSizes[Water->getNumber()];
+
+  // If the CPE is to be inserted before the instruction, that will raise
+  // the offset of the instruction.
+  if (CPEOffset < UserOffset)
+    UserOffset += U.CPEMI->getOperand(2).getImm();
+
+  return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, U.NegOk, U.IsSoImm);
+}
+
+/// CPEIsInRange - Returns true if the distance between specific MI and
+/// specific ConstPool entry instruction can fit in MI's displacement field.
+bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset,
+                                      MachineInstr *CPEMI, unsigned MaxDisp,
+                                      bool NegOk, bool DoDump) {
+  unsigned CPEOffset  = GetOffsetOf(CPEMI);
+  assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE");
+
+  if (DoDump) {
+    DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
+                 << " max delta=" << MaxDisp
+                 << " insn address=" << UserOffset
+                 << " CPE address=" << CPEOffset
+                 << " offset=" << int(CPEOffset-UserOffset) << "\t" << *MI);
+  }
+
+  return OffsetIsInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
+}
+
+#ifndef NDEBUG
+/// BBIsJumpedOver - Return true of the specified basic block's only predecessor
+/// unconditionally branches to its only successor.
+static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
+  if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
+    return false;
+
+  MachineBasicBlock *Succ = *MBB->succ_begin();
+  MachineBasicBlock *Pred = *MBB->pred_begin();
+  MachineInstr *PredMI = &Pred->back();
+  if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB
+      || PredMI->getOpcode() == ARM::t2B)
+    return PredMI->getOperand(0).getMBB() == Succ;
+  return false;
+}
+#endif // NDEBUG
+
+void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
+                                              int delta) {
+  MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI);
+  for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
+      i < e; ++i) {
+    BBOffsets[i] += delta;
+    // If some existing blocks have padding, adjust the padding as needed, a
+    // bit tricky.  delta can be negative so don't use % on that.
+    if (!isThumb)
+      continue;
+    MachineBasicBlock *MBB = MBBI;
+    if (!MBB->empty() && !HasInlineAsm) {
+      // Constant pool entries require padding.
+      if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) {
+        unsigned OldOffset = BBOffsets[i] - delta;
+        if ((OldOffset%4) == 0 && (BBOffsets[i]%4) != 0) {
+          // add new padding
+          BBSizes[i] += 2;
+          delta += 2;
+        } else if ((OldOffset%4) != 0 && (BBOffsets[i]%4) == 0) {
+          // remove existing padding
+          BBSizes[i] -= 2;
+          delta -= 2;
+        }
+      }
+      // Thumb1 jump tables require padding.  They should be at the end;
+      // following unconditional branches are removed by AnalyzeBranch.
+      // tBR_JTr expands to a mov pc followed by .align 2 and then the jump
+      // table entries. So this code checks whether offset of tBR_JTr
+      // is aligned; if it is, the offset of the jump table following the
+      // instruction will not be aligned, and we need padding.
+      MachineInstr *ThumbJTMI = prior(MBB->end());
+      if (ThumbJTMI->getOpcode() == ARM::tBR_JTr) {
+        unsigned NewMIOffset = GetOffsetOf(ThumbJTMI);
+        unsigned OldMIOffset = NewMIOffset - delta;
+        if ((OldMIOffset%4) == 0 && (NewMIOffset%4) != 0) {
+          // remove existing padding
+          BBSizes[i] -= 2;
+          delta -= 2;
+        } else if ((OldMIOffset%4) != 0 && (NewMIOffset%4) == 0) {
+          // add new padding
+          BBSizes[i] += 2;
+          delta += 2;
+        }
+      }
+      if (delta==0)
+        return;
+    }
+    MBBI = llvm::next(MBBI);
+  }
+}
+
+/// DecrementOldEntry - find the constant pool entry with index CPI
+/// and instruction CPEMI, and decrement its refcount.  If the refcount
+/// becomes 0 remove the entry and instruction.  Returns true if we removed
+/// the entry, false if we didn't.
+
+bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
+  // Find the old entry. Eliminate it if it is no longer used.
+  CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
+  assert(CPE && "Unexpected!");
+  if (--CPE->RefCount == 0) {
+    RemoveDeadCPEMI(CPEMI);
+    CPE->CPEMI = NULL;
+    --NumCPEs;
+    return true;
+  }
+  return false;
+}
+
+/// LookForCPEntryInRange - see if the currently referenced CPE is in range;
+/// if not, see if an in-range clone of the CPE is in range, and if so,
+/// change the data structures so the user references the clone.  Returns:
+/// 0 = no existing entry found
+/// 1 = entry found, and there were no code insertions or deletions
+/// 2 = entry found, and there were code insertions or deletions
+int ARMConstantIslands::LookForExistingCPEntry(CPUser& U, unsigned UserOffset)
+{
+  MachineInstr *UserMI = U.MI;
+  MachineInstr *CPEMI  = U.CPEMI;
+
+  // Check to see if the CPE is already in-range.
+  if (CPEIsInRange(UserMI, UserOffset, CPEMI, U.MaxDisp, U.NegOk, true)) {
+    DEBUG(errs() << "In range\n");
+    return 1;
+  }
+
+  // No.  Look for previously created clones of the CPE that are in range.
+  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  std::vector<CPEntry> &CPEs = CPEntries[CPI];
+  for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
+    // We already tried this one
+    if (CPEs[i].CPEMI == CPEMI)
+      continue;
+    // Removing CPEs can leave empty entries, skip
+    if (CPEs[i].CPEMI == NULL)
+      continue;
+    if (CPEIsInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.MaxDisp, U.NegOk)) {
+      DEBUG(errs() << "Replacing CPE#" << CPI << " with CPE#"
+                   << CPEs[i].CPI << "\n");
+      // Point the CPUser node to the replacement
+      U.CPEMI = CPEs[i].CPEMI;
+      // Change the CPI in the instruction operand to refer to the clone.
+      for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
+        if (UserMI->getOperand(j).isCPI()) {
+          UserMI->getOperand(j).setIndex(CPEs[i].CPI);
+          break;
+        }
+      // Adjust the refcount of the clone...
+      CPEs[i].RefCount++;
+      // ...and the original.  If we didn't remove the old entry, none of the
+      // addresses changed, so we don't need another pass.
+      return DecrementOldEntry(CPI, CPEMI) ? 2 : 1;
+    }
+  }
+  return 0;
+}
+
+/// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
+/// the specific unconditional branch instruction.
+static inline unsigned getUnconditionalBrDisp(int Opc) {
+  switch (Opc) {
+  case ARM::tB:
+    return ((1<<10)-1)*2;
+  case ARM::t2B:
+    return ((1<<23)-1)*2;
+  default:
+    break;
+  }
+
+  return ((1<<23)-1)*4;
+}
+
+/// LookForWater - Look for an existing entry in the WaterList in which
+/// we can place the CPE referenced from U so it's within range of U's MI.
+/// Returns true if found, false if not.  If it returns true, WaterIter
+/// is set to the WaterList entry.  For Thumb, prefer water that will not
+/// introduce padding to water that will.  To ensure that this pass
+/// terminates, the CPE location for a particular CPUser is only allowed to
+/// move to a lower address, so search backward from the end of the list and
+/// prefer the first water that is in range.
+bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset,
+                                      water_iterator &WaterIter) {
+  if (WaterList.empty())
+    return false;
+
+  bool FoundWaterThatWouldPad = false;
+  water_iterator IPThatWouldPad;
+  for (water_iterator IP = prior(WaterList.end()),
+         B = WaterList.begin();; --IP) {
+    MachineBasicBlock* WaterBB = *IP;
+    // Check if water is in range and is either at a lower address than the
+    // current "high water mark" or a new water block that was created since
+    // the previous iteration by inserting an unconditional branch.  In the
+    // latter case, we want to allow resetting the high water mark back to
+    // this new water since we haven't seen it before.  Inserting branches
+    // should be relatively uncommon and when it does happen, we want to be
+    // sure to take advantage of it for all the CPEs near that block, so that
+    // we don't insert more branches than necessary.
+    if (WaterIsInRange(UserOffset, WaterBB, U) &&
+        (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
+         NewWaterList.count(WaterBB))) {
+      unsigned WBBId = WaterBB->getNumber();
+      if (isThumb &&
+          (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) {
+        // This is valid Water, but would introduce padding.  Remember
+        // it in case we don't find any Water that doesn't do this.
+        if (!FoundWaterThatWouldPad) {
+          FoundWaterThatWouldPad = true;
+          IPThatWouldPad = IP;
+        }
+      } else {
+        WaterIter = IP;
+        return true;
+      }
+    }
+    if (IP == B)
+      break;
+  }
+  if (FoundWaterThatWouldPad) {
+    WaterIter = IPThatWouldPad;
+    return true;
+  }
+  return false;
+}
+
+/// CreateNewWater - No existing WaterList entry will work for
+/// CPUsers[CPUserIndex], so create a place to put the CPE.  The end of the
+/// block is used if in range, and the conditional branch munged so control
+/// flow is correct.  Otherwise the block is split to create a hole with an
+/// unconditional branch around it.  In either case NewMBB is set to a
+/// block following which the new island can be inserted (the WaterList
+/// is not adjusted).
+void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
+                                        unsigned UserOffset,
+                                        MachineBasicBlock *&NewMBB) {
+  CPUser &U = CPUsers[CPUserIndex];
+  MachineInstr *UserMI = U.MI;
+  MachineInstr *CPEMI  = U.CPEMI;
+  MachineBasicBlock *UserMBB = UserMI->getParent();
+  unsigned OffsetOfNextBlock = BBOffsets[UserMBB->getNumber()] +
+                               BBSizes[UserMBB->getNumber()];
+  assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]);
+
+  // If the block does not end in an unconditional branch already, and if the
+  // end of the block is within range, make new water there.  (The addition
+  // below is for the unconditional branch we will be adding: 4 bytes on ARM +
+  // Thumb2, 2 on Thumb1.  Possible Thumb1 alignment padding is allowed for
+  // inside OffsetIsInRange.
+  if (BBHasFallthrough(UserMBB) &&
+      OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4),
+                      U.MaxDisp, U.NegOk, U.IsSoImm)) {
+    DEBUG(errs() << "Split at end of block\n");
+    if (&UserMBB->back() == UserMI)
+      assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
+    NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
+    // Add an unconditional branch from UserMBB to fallthrough block.
+    // Record it for branch lengthening; this new branch will not get out of
+    // range, but if the preceding conditional branch is out of range, the
+    // targets will be exchanged, and the altered branch may be out of
+    // range, so the machinery has to know about it.
+    int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
+    BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
+    unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
+    ImmBranches.push_back(ImmBranch(&UserMBB->back(),
+                          MaxDisp, false, UncondBr));
+    int delta = isThumb1 ? 2 : 4;
+    BBSizes[UserMBB->getNumber()] += delta;
+    AdjustBBOffsetsAfter(UserMBB, delta);
+  } else {
+    // What a big block.  Find a place within the block to split it.
+    // This is a little tricky on Thumb1 since instructions are 2 bytes
+    // and constant pool entries are 4 bytes: if instruction I references
+    // island CPE, and instruction I+1 references CPE', it will
+    // not work well to put CPE as far forward as possible, since then
+    // CPE' cannot immediately follow it (that location is 2 bytes
+    // farther away from I+1 than CPE was from I) and we'd need to create
+    // a new island.  So, we make a first guess, then walk through the
+    // instructions between the one currently being looked at and the
+    // possible insertion point, and make sure any other instructions
+    // that reference CPEs will be able to use the same island area;
+    // if not, we back up the insertion point.
+
+    // The 4 in the following is for the unconditional branch we'll be
+    // inserting (allows for long branch on Thumb1).  Alignment of the
+    // island is handled inside OffsetIsInRange.
+    unsigned BaseInsertOffset = UserOffset + U.MaxDisp -4;
+    // This could point off the end of the block if we've already got
+    // constant pool entries following this block; only the last one is
+    // in the water list.  Back past any possible branches (allow for a
+    // conditional and a maximally long unconditional).
+    if (BaseInsertOffset >= BBOffsets[UserMBB->getNumber()+1])
+      BaseInsertOffset = BBOffsets[UserMBB->getNumber()+1] -
+                              (isThumb1 ? 6 : 8);
+    unsigned EndInsertOffset = BaseInsertOffset +
+           CPEMI->getOperand(2).getImm();
+    MachineBasicBlock::iterator MI = UserMI;
+    ++MI;
+    unsigned CPUIndex = CPUserIndex+1;
+    unsigned NumCPUsers = CPUsers.size();
+    MachineInstr *LastIT = 0;
+    for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
+         Offset < BaseInsertOffset;
+         Offset += TII->GetInstSizeInBytes(MI),
+           MI = llvm::next(MI)) {
+      if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
+        CPUser &U = CPUsers[CPUIndex];
+        if (!OffsetIsInRange(Offset, EndInsertOffset,
+                             U.MaxDisp, U.NegOk, U.IsSoImm)) {
+          BaseInsertOffset -= (isThumb1 ? 2 : 4);
+          EndInsertOffset  -= (isThumb1 ? 2 : 4);
+        }
+        // This is overly conservative, as we don't account for CPEMIs
+        // being reused within the block, but it doesn't matter much.
+        EndInsertOffset += CPUsers[CPUIndex].CPEMI->getOperand(2).getImm();
+        CPUIndex++;
+      }
+
+      // Remember the last IT instruction.
+      if (MI->getOpcode() == ARM::t2IT)
+        LastIT = MI;
+    }
+
+    DEBUG(errs() << "Split in middle of big block\n");
+    --MI;
+
+    // Avoid splitting an IT block.
+    if (LastIT) {
+      unsigned PredReg = 0;
+      ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+      if (CC != ARMCC::AL)
+        MI = LastIT;
+    }
+    NewMBB = SplitBlockBeforeInstr(MI);
+  }
+}
+
+/// HandleConstantPoolUser - Analyze the specified user, checking to see if it
+/// is out-of-range.  If so, pick up the constant pool value and move it some
+/// place in-range.  Return true if we changed any addresses (thus must run
+/// another pass of branch lengthening), false otherwise.
+bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
+                                                unsigned CPUserIndex) {
+  CPUser &U = CPUsers[CPUserIndex];
+  MachineInstr *UserMI = U.MI;
+  MachineInstr *CPEMI  = U.CPEMI;
+  unsigned CPI = CPEMI->getOperand(1).getIndex();
+  unsigned Size = CPEMI->getOperand(2).getImm();
+  // Compute this only once, it's expensive.  The 4 or 8 is the value the
+  // hardware keeps in the PC.
+  unsigned UserOffset = GetOffsetOf(UserMI) + (isThumb ? 4 : 8);
+
+  // See if the current entry is within range, or there is a clone of it
+  // in range.
+  int result = LookForExistingCPEntry(U, UserOffset);
+  if (result==1) return false;
+  else if (result==2) return true;
+
+  // No existing clone of this CPE is within range.
+  // We will be generating a new clone.  Get a UID for it.
+  unsigned ID = AFI->createPICLabelUId();
+
+  // Look for water where we can place this CPE.
+  MachineBasicBlock *NewIsland = MF.CreateMachineBasicBlock();
+  MachineBasicBlock *NewMBB;
+  water_iterator IP;
+  if (LookForWater(U, UserOffset, IP)) {
+    DEBUG(errs() << "found water in range\n");
+    MachineBasicBlock *WaterBB = *IP;
+
+    // If the original WaterList entry was "new water" on this iteration,
+    // propagate that to the new island.  This is just keeping NewWaterList
+    // updated to match the WaterList, which will be updated below.
+    if (NewWaterList.count(WaterBB)) {
+      NewWaterList.erase(WaterBB);
+      NewWaterList.insert(NewIsland);
+    }
+    // The new CPE goes before the following block (NewMBB).
+    NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
+
+  } else {
+    // No water found.
+    DEBUG(errs() << "No water found\n");
+    CreateNewWater(CPUserIndex, UserOffset, NewMBB);
+
+    // SplitBlockBeforeInstr adds to WaterList, which is important when it is
+    // called while handling branches so that the water will be seen on the
+    // next iteration for constant pools, but in this context, we don't want
+    // it.  Check for this so it will be removed from the WaterList.
+    // Also remove any entry from NewWaterList.
+    MachineBasicBlock *WaterBB = prior(MachineFunction::iterator(NewMBB));
+    IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
+    if (IP != WaterList.end())
+      NewWaterList.erase(WaterBB);
+
+    // We are adding new water.  Update NewWaterList.
+    NewWaterList.insert(NewIsland);
+  }
+
+  // Remove the original WaterList entry; we want subsequent insertions in
+  // this vicinity to go after the one we're about to insert.  This
+  // considerably reduces the number of times we have to move the same CPE
+  // more than once and is also important to ensure the algorithm terminates.
+  if (IP != WaterList.end())
+    WaterList.erase(IP);
+
+  // Okay, we know we can put an island before NewMBB now, do it!
+  MF.insert(NewMBB, NewIsland);
+
+  // Update internal data structures to account for the newly inserted MBB.
+  UpdateForInsertedWaterBlock(NewIsland);
+
+  // Decrement the old entry, and remove it if refcount becomes 0.
+  DecrementOldEntry(CPI, CPEMI);
+
+  // Now that we have an island to add the CPE to, clone the original CPE and
+  // add it to the island.
+  U.HighWaterMark = NewIsland;
+  U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
+                .addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
+  CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
+  ++NumCPEs;
+
+  BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
+  // Compensate for .align 2 in thumb mode.
+  if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm))
+    Size += 2;
+  // Increase the size of the island block to account for the new entry.
+  BBSizes[NewIsland->getNumber()] += Size;
+  AdjustBBOffsetsAfter(NewIsland, Size);
+
+  // Finally, change the CPI in the instruction operand to be ID.
+  for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
+    if (UserMI->getOperand(i).isCPI()) {
+      UserMI->getOperand(i).setIndex(ID);
+      break;
+    }
+
+  DEBUG(errs() << "  Moved CPE to #" << ID << " CPI=" << CPI
+           << '\t' << *UserMI);
+
+  return true;
+}
+
+/// RemoveDeadCPEMI - Remove a dead constant pool entry instruction. Update
+/// sizes and offsets of impacted basic blocks.
+void ARMConstantIslands::RemoveDeadCPEMI(MachineInstr *CPEMI) {
+  MachineBasicBlock *CPEBB = CPEMI->getParent();
+  unsigned Size = CPEMI->getOperand(2).getImm();
+  CPEMI->eraseFromParent();
+  BBSizes[CPEBB->getNumber()] -= Size;
+  // All succeeding offsets have the current size value added in, fix this.
+  if (CPEBB->empty()) {
+    // In thumb1 mode, the size of island may be padded by two to compensate for
+    // the alignment requirement.  Then it will now be 2 when the block is
+    // empty, so fix this.
+    // All succeeding offsets have the current size value added in, fix this.
+    if (BBSizes[CPEBB->getNumber()] != 0) {
+      Size += BBSizes[CPEBB->getNumber()];
+      BBSizes[CPEBB->getNumber()] = 0;
+    }
+  }
+  AdjustBBOffsetsAfter(CPEBB, -Size);
+  // An island has only one predecessor BB and one successor BB. Check if
+  // this BB's predecessor jumps directly to this BB's successor. This
+  // shouldn't happen currently.
+  assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
+  // FIXME: remove the empty blocks after all the work is done?
+}
+
+/// RemoveUnusedCPEntries - Remove constant pool entries whose refcounts
+/// are zero.
+bool ARMConstantIslands::RemoveUnusedCPEntries() {
+  unsigned MadeChange = false;
+  for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
+      std::vector<CPEntry> &CPEs = CPEntries[i];
+      for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
+        if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
+          RemoveDeadCPEMI(CPEs[j].CPEMI);
+          CPEs[j].CPEMI = NULL;
+          MadeChange = true;
+        }
+      }
+  }
+  return MadeChange;
+}
+
+/// BBIsInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool ARMConstantIslands::BBIsInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
+                                     unsigned MaxDisp) {
+  unsigned PCAdj      = isThumb ? 4 : 8;
+  unsigned BrOffset   = GetOffsetOf(MI) + PCAdj;
+  unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+
+  DEBUG(errs() << "Branch of destination BB#" << DestBB->getNumber()
+               << " from BB#" << MI->getParent()->getNumber()
+               << " max delta=" << MaxDisp
+               << " from " << GetOffsetOf(MI) << " to " << DestOffset
+               << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
+
+  if (BrOffset <= DestOffset) {
+    // Branch before the Dest.
+    if (DestOffset-BrOffset <= MaxDisp)
+      return true;
+  } else {
+    if (BrOffset-DestOffset <= MaxDisp)
+      return true;
+  }
+  return false;
+}
+
+/// FixUpImmediateBr - Fix up an immediate branch whose destination is too far
+/// away to fit in its displacement field.
+bool ARMConstantIslands::FixUpImmediateBr(MachineFunction &MF, ImmBranch &Br) {
+  MachineInstr *MI = Br.MI;
+  MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+  // Check to see if the DestBB is already in-range.
+  if (BBIsInRange(MI, DestBB, Br.MaxDisp))
+    return false;
+
+  if (!Br.isCond)
+    return FixUpUnconditionalBr(MF, Br);
+  return FixUpConditionalBr(MF, Br);
+}
+
+/// FixUpUnconditionalBr - Fix up an unconditional branch whose destination is
+/// too far away to fit in its displacement field. If the LR register has been
+/// spilled in the epilogue, then we can use BL to implement a far jump.
+/// Otherwise, add an intermediate branch instruction to a branch.
+bool
+ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
+  MachineInstr *MI = Br.MI;
+  MachineBasicBlock *MBB = MI->getParent();
+  if (!isThumb1)
+    llvm_unreachable("FixUpUnconditionalBr is Thumb1 only!");
+
+  // Use BL to implement far jump.
+  Br.MaxDisp = (1 << 21) * 2;
+  MI->setDesc(TII->get(ARM::tBfar));
+  BBSizes[MBB->getNumber()] += 2;
+  AdjustBBOffsetsAfter(MBB, 2);
+  HasFarJump = true;
+  ++NumUBrFixed;
+
+  DEBUG(errs() << "  Changed B to long jump " << *MI);
+
+  return true;
+}
+
+/// FixUpConditionalBr - Fix up a conditional branch whose destination is too
+/// far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool
+ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
+  MachineInstr *MI = Br.MI;
+  MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+
+  // Add an unconditional branch to the destination and invert the branch
+  // condition to jump over it:
+  // blt L1
+  // =>
+  // bge L2
+  // b   L1
+  // L2:
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
+  CC = ARMCC::getOppositeCondition(CC);
+  unsigned CCReg = MI->getOperand(2).getReg();
+
+  // If the branch is at the end of its MBB and that has a fall-through block,
+  // direct the updated conditional branch to the fall-through block. Otherwise,
+  // split the MBB before the next instruction.
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineInstr *BMI = &MBB->back();
+  bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
+
+  ++NumCBrFixed;
+  if (BMI != MI) {
+    if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+        BMI->getOpcode() == Br.UncondBr) {
+      // Last MI in the BB is an unconditional branch. Can we simply invert the
+      // condition and swap destinations:
+      // beq L1
+      // b   L2
+      // =>
+      // bne L2
+      // b   L1
+      MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
+      if (BBIsInRange(MI, NewDest, Br.MaxDisp)) {
+        DEBUG(errs() << "  Invert Bcc condition and swap its destination with "
+                     << *BMI);
+        BMI->getOperand(0).setMBB(DestBB);
+        MI->getOperand(0).setMBB(NewDest);
+        MI->getOperand(1).setImm(CC);
+        return true;
+      }
+    }
+  }
+
+  if (NeedSplit) {
+    SplitBlockBeforeInstr(MI);
+    // No need for the branch to the next block. We're adding an unconditional
+    // branch to the destination.
+    int delta = TII->GetInstSizeInBytes(&MBB->back());
+    BBSizes[MBB->getNumber()] -= delta;
+    MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB));
+    AdjustBBOffsetsAfter(SplitBB, -delta);
+    MBB->back().eraseFromParent();
+    // BBOffsets[SplitBB] is wrong temporarily, fixed below
+  }
+  MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
+
+  DEBUG(errs() << "  Insert B to BB#" << DestBB->getNumber()
+               << " also invert condition and change dest. to BB#"
+               << NextBB->getNumber() << "\n");
+
+  // Insert a new conditional branch and a new unconditional branch.
+  // Also update the ImmBranch as well as adding a new entry for the new branch.
+  BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
+    .addMBB(NextBB).addImm(CC).addReg(CCReg);
+  Br.MI = &MBB->back();
+  BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+  BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
+  BBSizes[MBB->getNumber()] += TII->GetInstSizeInBytes(&MBB->back());
+  unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
+  ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
+
+  // Remove the old conditional branch.  It may or may not still be in MBB.
+  BBSizes[MI->getParent()->getNumber()] -= TII->GetInstSizeInBytes(MI);
+  MI->eraseFromParent();
+
+  // The net size change is an addition of one unconditional branch.
+  int delta = TII->GetInstSizeInBytes(&MBB->back());
+  AdjustBBOffsetsAfter(MBB, delta);
+  return true;
+}
+
+/// UndoLRSpillRestore - Remove Thumb push / pop instructions that only spills
+/// LR / restores LR to pc. FIXME: This is done here because it's only possible
+/// to do this if tBfar is not used.
+bool ARMConstantIslands::UndoLRSpillRestore() {
+  bool MadeChange = false;
+  for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
+    MachineInstr *MI = PushPopMIs[i];
+    // First two operands are predicates.
+    if (MI->getOpcode() == ARM::tPOP_RET &&
+        MI->getOperand(2).getReg() == ARM::PC &&
+        MI->getNumExplicitOperands() == 3) {
+      BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET));
+      MI->eraseFromParent();
+      MadeChange = true;
+    }
+  }
+  return MadeChange;
+}
+
+bool ARMConstantIslands::OptimizeThumb2Instructions(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  // Shrink ADR and LDR from constantpool.
+  for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
+    CPUser &U = CPUsers[i];
+    unsigned Opcode = U.MI->getOpcode();
+    unsigned NewOpc = 0;
+    unsigned Scale = 1;
+    unsigned Bits = 0;
+    switch (Opcode) {
+    default: break;
+    case ARM::t2LEApcrel:
+      if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+        NewOpc = ARM::tLEApcrel;
+        Bits = 8;
+        Scale = 4;
+      }
+      break;
+    case ARM::t2LDRpci:
+      if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
+        NewOpc = ARM::tLDRpci;
+        Bits = 8;
+        Scale = 4;
+      }
+      break;
+    }
+
+    if (!NewOpc)
+      continue;
+
+    unsigned UserOffset = GetOffsetOf(U.MI) + 4;
+    unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
+    // FIXME: Check if offset is multiple of scale if scale is not 4.
+    if (CPEIsInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
+      U.MI->setDesc(TII->get(NewOpc));
+      MachineBasicBlock *MBB = U.MI->getParent();
+      BBSizes[MBB->getNumber()] -= 2;
+      AdjustBBOffsetsAfter(MBB, -2);
+      ++NumT2CPShrunk;
+      MadeChange = true;
+    }
+  }
+
+  MadeChange |= OptimizeThumb2Branches(MF);
+  MadeChange |= OptimizeThumb2JumpTables(MF);
+  return MadeChange;
+}
+
+bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i) {
+    ImmBranch &Br = ImmBranches[i];
+    unsigned Opcode = Br.MI->getOpcode();
+    unsigned NewOpc = 0;
+    unsigned Scale = 1;
+    unsigned Bits = 0;
+    switch (Opcode) {
+    default: break;
+    case ARM::t2B:
+      NewOpc = ARM::tB;
+      Bits = 11;
+      Scale = 2;
+      break;
+    case ARM::t2Bcc: {
+      NewOpc = ARM::tBcc;
+      Bits = 8;
+      Scale = 2;
+      break;
+    }
+    }
+    if (NewOpc) {
+      unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
+      MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+      if (BBIsInRange(Br.MI, DestBB, MaxOffs)) {
+        Br.MI->setDesc(TII->get(NewOpc));
+        MachineBasicBlock *MBB = Br.MI->getParent();
+        BBSizes[MBB->getNumber()] -= 2;
+        AdjustBBOffsetsAfter(MBB, -2);
+        ++NumT2BrShrunk;
+        MadeChange = true;
+      }
+    }
+
+    Opcode = Br.MI->getOpcode();
+    if (Opcode != ARM::tBcc)
+      continue;
+
+    NewOpc = 0;
+    unsigned PredReg = 0;
+    ARMCC::CondCodes Pred = llvm::getInstrPredicate(Br.MI, PredReg);
+    if (Pred == ARMCC::EQ)
+      NewOpc = ARM::tCBZ;
+    else if (Pred == ARMCC::NE)
+      NewOpc = ARM::tCBNZ;
+    if (!NewOpc)
+      continue;
+    MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
+    // Check if the distance is within 126. Subtract starting offset by 2
+    // because the cmp will be eliminated.
+    unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2;
+    unsigned DestOffset = BBOffsets[DestBB->getNumber()];
+    if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
+      MachineBasicBlock::iterator CmpMI = Br.MI; --CmpMI;
+      if (CmpMI->getOpcode() == ARM::tCMPi8) {
+        unsigned Reg = CmpMI->getOperand(0).getReg();
+        Pred = llvm::getInstrPredicate(CmpMI, PredReg);
+        if (Pred == ARMCC::AL &&
+            CmpMI->getOperand(1).getImm() == 0 &&
+            isARMLowRegister(Reg)) {
+          MachineBasicBlock *MBB = Br.MI->getParent();
+          MachineInstr *NewBR =
+            BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
+            .addReg(Reg).addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags());
+          CmpMI->eraseFromParent();
+          Br.MI->eraseFromParent();
+          Br.MI = NewBR;
+          BBSizes[MBB->getNumber()] -= 2;
+          AdjustBBOffsetsAfter(MBB, -2);
+          ++NumCBZ;
+          MadeChange = true;
+        }
+      }
+    }
+  }
+
+  return MadeChange;
+}
+
+/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
+/// jumptables when it's possible.
+bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  // FIXME: After the tables are shrunk, can we get rid some of the
+  // constantpool tables?
+  MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  if (MJTI == 0) return false;
+
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+    MachineInstr *MI = T2JumpTables[i];
+    const TargetInstrDesc &TID = MI->getDesc();
+    unsigned NumOps = TID.getNumOperands();
+    unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2);
+    MachineOperand JTOP = MI->getOperand(JTOpIdx);
+    unsigned JTI = JTOP.getIndex();
+    assert(JTI < JT.size());
+
+    bool ByteOk = true;
+    bool HalfWordOk = true;
+    unsigned JTOffset = GetOffsetOf(MI) + 4;
+    const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+    for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+      MachineBasicBlock *MBB = JTBBs[j];
+      unsigned DstOffset = BBOffsets[MBB->getNumber()];
+      // Negative offset is not ok. FIXME: We should change BB layout to make
+      // sure all the branches are forward.
+      if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
+        ByteOk = false;
+      unsigned TBHLimit = ((1<<16)-1)*2;
+      if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit)
+        HalfWordOk = false;
+      if (!ByteOk && !HalfWordOk)
+        break;
+    }
+
+    if (ByteOk || HalfWordOk) {
+      MachineBasicBlock *MBB = MI->getParent();
+      unsigned BaseReg = MI->getOperand(0).getReg();
+      bool BaseRegKill = MI->getOperand(0).isKill();
+      if (!BaseRegKill)
+        continue;
+      unsigned IdxReg = MI->getOperand(1).getReg();
+      bool IdxRegKill = MI->getOperand(1).isKill();
+
+      // Scan backwards to find the instruction that defines the base
+      // register. Due to post-RA scheduling, we can't count on it
+      // immediately preceding the branch instruction.
+      MachineBasicBlock::iterator PrevI = MI;
+      MachineBasicBlock::iterator B = MBB->begin();
+      while (PrevI != B && !PrevI->definesRegister(BaseReg))
+        --PrevI;
+
+      // If for some reason we didn't find it, we can't do anything, so
+      // just skip this one.
+      if (!PrevI->definesRegister(BaseReg))
+        continue;
+
+      MachineInstr *AddrMI = PrevI;
+      bool OptOk = true;
+      // Examine the instruction that calculates the jumptable entry address.
+      // Make sure it only defines the base register and kills any uses
+      // other than the index register.
+      for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
+        const MachineOperand &MO = AddrMI->getOperand(k);
+        if (!MO.isReg() || !MO.getReg())
+          continue;
+        if (MO.isDef() && MO.getReg() != BaseReg) {
+          OptOk = false;
+          break;
+        }
+        if (MO.isUse() && !MO.isKill() && MO.getReg() != IdxReg) {
+          OptOk = false;
+          break;
+        }
+      }
+      if (!OptOk)
+        continue;
+
+      // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
+      // that gave us the initial base register definition.
+      for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI)
+        ;
+
+      // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
+      // to delete it as well.
+      MachineInstr *LeaMI = PrevI;
+      if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
+           LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
+          LeaMI->getOperand(0).getReg() != BaseReg)
+        OptOk = false;
+
+      if (!OptOk)
+        continue;
+
+      unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
+      MachineInstr *NewJTMI = BuildMI(MBB, MI->getDebugLoc(), TII->get(Opc))
+        .addReg(IdxReg, getKillRegState(IdxRegKill))
+        .addJumpTableIndex(JTI, JTOP.getTargetFlags())
+        .addImm(MI->getOperand(JTOpIdx+1).getImm());
+      // FIXME: Insert an "ALIGN" instruction to ensure the next instruction
+      // is 2-byte aligned. For now, asm printer will fix it up.
+      unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
+      unsigned OrigSize = TII->GetInstSizeInBytes(AddrMI);
+      OrigSize += TII->GetInstSizeInBytes(LeaMI);
+      OrigSize += TII->GetInstSizeInBytes(MI);
+
+      AddrMI->eraseFromParent();
+      LeaMI->eraseFromParent();
+      MI->eraseFromParent();
+
+      int delta = OrigSize - NewSize;
+      BBSizes[MBB->getNumber()] -= delta;
+      AdjustBBOffsetsAfter(MBB, -delta);
+
+      ++NumTBs;
+      MadeChange = true;
+    }
+  }
+
+  return MadeChange;
+}
+
+/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that
+/// jump tables always branch forwards, since that's what tbb and tbh need.
+bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
+  bool MadeChange = false;
+
+  MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  if (MJTI == 0) return false;
+
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+    MachineInstr *MI = T2JumpTables[i];
+    const TargetInstrDesc &TID = MI->getDesc();
+    unsigned NumOps = TID.getNumOperands();
+    unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2);
+    MachineOperand JTOP = MI->getOperand(JTOpIdx);
+    unsigned JTI = JTOP.getIndex();
+    assert(JTI < JT.size());
+
+    // We prefer if target blocks for the jump table come after the jump
+    // instruction so we can use TB[BH]. Loop through the target blocks
+    // and try to adjust them such that that's true.
+    int JTNumber = MI->getParent()->getNumber();
+    const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+    for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+      MachineBasicBlock *MBB = JTBBs[j];
+      int DTNumber = MBB->getNumber();
+
+      if (DTNumber < JTNumber) {
+        // The destination precedes the switch. Try to move the block forward
+        // so we have a positive offset.
+        MachineBasicBlock *NewBB =
+          AdjustJTTargetBlockForward(MBB, MI->getParent());
+        if (NewBB)
+          MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
+        MadeChange = true;
+      }
+    }
+  }
+
+  return MadeChange;
+}
+
+MachineBasicBlock *ARMConstantIslands::
+AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
+{
+  MachineFunction &MF = *BB->getParent();
+
+  // If the destination block is terminated by an unconditional branch,
+  // try to move it; otherwise, create a new block following the jump
+  // table that branches back to the actual target. This is a very simple
+  // heuristic. FIXME: We can definitely improve it.
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  SmallVector<MachineOperand, 4> CondPrior;
+  MachineFunction::iterator BBi = BB;
+  MachineFunction::iterator OldPrior = prior(BBi);
+
+  // If the block terminator isn't analyzable, don't try to move the block
+  bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
+
+  // If the block ends in an unconditional branch, move it. The prior block
+  // has to have an analyzable terminator for us to move this one. Be paranoid
+  // and make sure we're not trying to move the entry block of the function.
+  if (!B && Cond.empty() && BB != MF.begin() &&
+      !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
+    BB->moveAfter(JTBB);
+    OldPrior->updateTerminator();
+    BB->updateTerminator();
+    // Update numbering to account for the block being moved.
+    MF.RenumberBlocks();
+    ++NumJTMoved;
+    return NULL;
+  }
+
+  // Create a new MBB for the code after the jump BB.
+  MachineBasicBlock *NewBB =
+    MF.CreateMachineBasicBlock(JTBB->getBasicBlock());
+  MachineFunction::iterator MBBI = JTBB; ++MBBI;
+  MF.insert(MBBI, NewBB);
+
+  // Add an unconditional branch from NewBB to BB.
+  // There doesn't seem to be meaningful DebugInfo available; this doesn't
+  // correspond directly to anything in the source.
+  assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
+  BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB);
+
+  // Update internal data structures to account for the newly inserted MBB.
+  MF.RenumberBlocks(NewBB);
+
+  // Update the CFG.
+  NewBB->addSuccessor(BB);
+  JTBB->removeSuccessor(BB);
+  JTBB->addSuccessor(NewBB);
+
+  ++NumJTInserted;
+  return NewBB;
+}
diff --git a/final/lib/Target/ARM/ARMConstantPoolValue.cpp b/final/lib/Target/ARM/ARMConstantPoolValue.cpp
new file mode 100644
index 00000000000..165a1d849ad
--- /dev/null
+++ b/final/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -0,0 +1,130 @@
+//===- ARMConstantPoolValue.cpp - ARM constantpool value --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMConstantPoolValue.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Constant.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdlib>
+using namespace llvm;
+
+ARMConstantPoolValue::ARMConstantPoolValue(const Constant *cval, unsigned id,
+                                           ARMCP::ARMCPKind K,
+                                           unsigned char PCAdj,
+                                           ARMCP::ARMCPModifier Modif,
+                                           bool AddCA)
+  : MachineConstantPoolValue((const Type*)cval->getType()),
+    CVal(cval), S(NULL), LabelId(id), Kind(K), PCAdjust(PCAdj),
+    Modifier(Modif), AddCurrentAddress(AddCA) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C,
+                                           const char *s, unsigned id,
+                                           unsigned char PCAdj,
+                                           ARMCP::ARMCPModifier Modif,
+                                           bool AddCA)
+  : MachineConstantPoolValue((const Type*)Type::getInt32Ty(C)),
+    CVal(NULL), S(strdup(s)), LabelId(id), Kind(ARMCP::CPExtSymbol),
+    PCAdjust(PCAdj), Modifier(Modif), AddCurrentAddress(AddCA) {}
+
+ARMConstantPoolValue::ARMConstantPoolValue(const GlobalValue *gv,
+                                           ARMCP::ARMCPModifier Modif)
+  : MachineConstantPoolValue((const Type*)Type::getInt32Ty(gv->getContext())),
+    CVal(gv), S(NULL), LabelId(0), Kind(ARMCP::CPValue), PCAdjust(0),
+    Modifier(Modif), AddCurrentAddress(false) {}
+
+const GlobalValue *ARMConstantPoolValue::getGV() const {
+  return dyn_cast_or_null<GlobalValue>(CVal);
+}
+
+const BlockAddress *ARMConstantPoolValue::getBlockAddress() const {
+  return dyn_cast_or_null<BlockAddress>(CVal);
+}
+
+static bool CPV_streq(const char *S1, const char *S2) {
+  if (S1 == S2)
+    return true;
+  if (S1 && S2 && strcmp(S1, S2) == 0)
+    return true;
+  return false;
+}
+
+int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
+                                                    unsigned Alignment) {
+  unsigned AlignMask = Alignment - 1;
+  const std::vector<MachineConstantPoolEntry> Constants = CP->getConstants();
+  for (unsigned i = 0, e = Constants.size(); i != e; ++i) {
+    if (Constants[i].isMachineConstantPoolEntry() &&
+        (Constants[i].getAlignment() & AlignMask) == 0) {
+      ARMConstantPoolValue *CPV =
+        (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
+      if (CPV->CVal == CVal &&
+          CPV->LabelId == LabelId &&
+          CPV->PCAdjust == PCAdjust &&
+          CPV_streq(CPV->S, S) &&
+          CPV->Modifier == Modifier)
+        return i;
+    }
+  }
+
+  return -1;
+}
+
+ARMConstantPoolValue::~ARMConstantPoolValue() {
+  free((void*)S);
+}
+
+void
+ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
+  ID.AddPointer(CVal);
+  ID.AddPointer(S);
+  ID.AddInteger(LabelId);
+  ID.AddInteger(PCAdjust);
+}
+
+bool
+ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
+  if (ACPV->Kind == Kind &&
+      ACPV->CVal == CVal &&
+      ACPV->PCAdjust == PCAdjust &&
+      CPV_streq(ACPV->S, S) &&
+      ACPV->Modifier == Modifier) {
+    if (ACPV->LabelId == LabelId)
+      return true;
+    // Two PC relative constpool entries containing the same GV address or
+    // external symbols. FIXME: What about blockaddress?
+    if (Kind == ARMCP::CPValue || Kind == ARMCP::CPExtSymbol)
+      return true;
+  }
+  return false;
+}
+
+void ARMConstantPoolValue::dump() const {
+  errs() << "  " << *this;
+}
+
+
+void ARMConstantPoolValue::print(raw_ostream &O) const {
+  if (CVal)
+    O << CVal->getName();
+  else
+    O << S;
+  if (Modifier) O << "(" << getModifierText() << ")";
+  if (PCAdjust != 0) {
+    O << "-(LPC" << LabelId << "+" << (unsigned)PCAdjust;
+    if (AddCurrentAddress) O << "-.";
+    O << ")";
+  }
+}
diff --git a/final/lib/Target/ARM/ARMConstantPoolValue.h b/final/lib/Target/ARM/ARMConstantPoolValue.h
new file mode 100644
index 00000000000..d008811c40e
--- /dev/null
+++ b/final/lib/Target/ARM/ARMConstantPoolValue.h
@@ -0,0 +1,122 @@
+//===- ARMConstantPoolValue.h - ARM constantpool value ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific constantpool value class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
+
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cstddef>
+
+namespace llvm {
+
+class Constant;
+class BlockAddress;
+class GlobalValue;
+class LLVMContext;
+
+namespace ARMCP {
+  enum ARMCPKind {
+    CPValue,
+    CPExtSymbol,
+    CPBlockAddress,
+    CPLSDA
+  };
+
+  enum ARMCPModifier {
+    no_modifier,
+    TLSGD,
+    GOT,
+    GOTOFF,
+    GOTTPOFF,
+    TPOFF
+  };
+}
+
+/// ARMConstantPoolValue - ARM specific constantpool value. This is used to
+/// represent PC-relative displacement between the address of the load
+/// instruction and the constant being loaded, i.e. (&GV-(LPIC+8)).
+class ARMConstantPoolValue : public MachineConstantPoolValue {
+  const Constant *CVal;    // Constant being loaded.
+  const char *S;           // ExtSymbol being loaded.
+  unsigned LabelId;        // Label id of the load.
+  ARMCP::ARMCPKind Kind;   // Kind of constant.
+  unsigned char PCAdjust;  // Extra adjustment if constantpool is pc-relative.
+                           // 8 for ARM, 4 for Thumb.
+  ARMCP::ARMCPModifier Modifier;   // GV modifier i.e. (&GV(modifier)-(LPIC+8))
+  bool AddCurrentAddress;
+
+public:
+  ARMConstantPoolValue(const Constant *cval, unsigned id,
+                       ARMCP::ARMCPKind Kind = ARMCP::CPValue,
+                       unsigned char PCAdj = 0,
+                       ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier,
+                       bool AddCurrentAddress = false);
+  ARMConstantPoolValue(LLVMContext &C, const char *s, unsigned id,
+                       unsigned char PCAdj = 0,
+                       ARMCP::ARMCPModifier Modifier = ARMCP::no_modifier,
+                       bool AddCurrentAddress = false);
+  ARMConstantPoolValue(const GlobalValue *GV, ARMCP::ARMCPModifier Modifier);
+  ARMConstantPoolValue();
+  ~ARMConstantPoolValue();
+
+  const GlobalValue *getGV() const;
+  const char *getSymbol() const { return S; }
+  const BlockAddress *getBlockAddress() const;
+  ARMCP::ARMCPModifier getModifier() const { return Modifier; }
+  const char *getModifierText() const {
+    switch (Modifier) {
+    default: llvm_unreachable("Unknown modifier!");
+    // FIXME: Are these case sensitive? It'd be nice to lower-case all the
+    // strings if that's legal.
+    case ARMCP::no_modifier: return "none";
+    case ARMCP::TLSGD:       return "tlsgd";
+    case ARMCP::GOT:         return "GOT";
+    case ARMCP::GOTOFF:      return "GOTOFF";
+    case ARMCP::GOTTPOFF:    return "gottpoff";
+    case ARMCP::TPOFF:       return "tpoff";
+    }
+  }
+  bool hasModifier() const { return Modifier != ARMCP::no_modifier; }
+  bool mustAddCurrentAddress() const { return AddCurrentAddress; }
+  unsigned getLabelId() const { return LabelId; }
+  unsigned char getPCAdjustment() const { return PCAdjust; }
+  bool isGlobalValue() const { return Kind == ARMCP::CPValue; }
+  bool isExtSymbol() const { return Kind == ARMCP::CPExtSymbol; }
+  bool isBlockAddress() { return Kind == ARMCP::CPBlockAddress; }
+  bool isLSDA() { return Kind == ARMCP::CPLSDA; }
+
+  virtual unsigned getRelocationInfo() const { return 2; }
+
+  virtual int getExistingMachineCPValue(MachineConstantPool *CP,
+                                        unsigned Alignment);
+
+  virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
+
+  /// hasSameValue - Return true if this ARM constpool value
+  /// can share the same constantpool entry as another ARM constpool value.
+  bool hasSameValue(ARMConstantPoolValue *ACPV);
+
+  void print(raw_ostream *O) const { if (O) print(*O); }
+  void print(raw_ostream &O) const;
+  void dump() const;
+};
+
+inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
+  V.print(O);
+  return O;
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/ARM/ARMELFWriterInfo.cpp b/final/lib/Target/ARM/ARMELFWriterInfo.cpp
new file mode 100644
index 00000000000..51e68b4553f
--- /dev/null
+++ b/final/lib/Target/ARM/ARMELFWriterInfo.cpp
@@ -0,0 +1,83 @@
+//===-- ARMELFWriterInfo.cpp - ELF Writer Info for the ARM backend --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMELFWriterInfo.h"
+#include "ARMRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ELF.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Implementation of the ARMELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+ARMELFWriterInfo::ARMELFWriterInfo(TargetMachine &TM)
+  : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64,
+                        TM.getTargetData()->isLittleEndian()) {
+}
+
+ARMELFWriterInfo::~ARMELFWriterInfo() {}
+
+unsigned ARMELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+  switch (MachineRelTy) {
+  case ARM::reloc_arm_absolute:
+  case ARM::reloc_arm_relative:
+  case ARM::reloc_arm_cp_entry:
+  case ARM::reloc_arm_vfp_cp_entry:
+  case ARM::reloc_arm_machine_cp_entry:
+  case ARM::reloc_arm_jt_base:
+  case ARM::reloc_arm_pic_jt:
+    assert(0 && "unsupported ARM relocation type"); break;
+    
+  case ARM::reloc_arm_branch: return ELF::R_ARM_CALL; break;
+  case ARM::reloc_arm_movt:   return ELF::R_ARM_MOVT_ABS; break;
+  case ARM::reloc_arm_movw:   return ELF::R_ARM_MOVW_ABS_NC; break;
+  default:
+    llvm_unreachable("unknown ARM relocation type"); break;
+  }
+  return 0;
+}
+
+long int ARMELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+                                                    long int Modifier) const {
+  assert(0 && "ARMELFWriterInfo::getDefaultAddendForRelTy() not implemented");
+  return 0;
+}
+
+unsigned ARMELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+  assert(0 && "ARMELFWriterInfo::getRelocationTySize() not implemented");
+  return 0;
+}
+
+bool ARMELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+  assert(0 && "ARMELFWriterInfo::isPCRelativeRel() not implemented");
+  return 1;
+}
+
+unsigned ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+  assert(0 &&
+         "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
+  return 0;
+}
+
+long int ARMELFWriterInfo::computeRelocation(unsigned SymOffset,
+                                             unsigned RelOffset,
+                                             unsigned RelTy) const {
+  assert(0 &&
+         "ARMELFWriterInfo::getAbsoluteLabelMachineRelTy() not implemented");
+  return 0;
+}
diff --git a/final/lib/Target/ARM/ARMELFWriterInfo.h b/final/lib/Target/ARM/ARMELFWriterInfo.h
new file mode 100644
index 00000000000..1c4e5329ac6
--- /dev/null
+++ b/final/lib/Target/ARM/ARMELFWriterInfo.h
@@ -0,0 +1,58 @@
+//===-- ARMELFWriterInfo.h - ELF Writer Info for ARM ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the ARM backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_ELF_WRITER_INFO_H
+#define ARM_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+  class ARMELFWriterInfo : public TargetELFWriterInfo {
+  public:
+    ARMELFWriterInfo(TargetMachine &TM);
+    virtual ~ARMELFWriterInfo();
+
+    /// getRelocationType - Returns the target specific ELF Relocation type.
+    /// 'MachineRelTy' contains the object code independent relocation type
+    virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+    /// hasRelocationAddend - True if the target uses an addend in the
+    /// ELF relocation entry.
+    virtual bool hasRelocationAddend() const { return false; }
+
+    /// getDefaultAddendForRelTy - Gets the default addend value for a
+    /// relocation entry based on the target ELF relocation type.
+    virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+                                              long int Modifier = 0) const;
+
+    /// getRelTySize - Returns the size of relocatable field in bits
+    virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+    /// isPCRelativeRel - True if the relocation type is pc relative
+    virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+    /// getJumpTableRelocationTy - Returns the machine relocation type used
+    /// to reference a jumptable.
+    virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+    /// computeRelocation - Some relocatable fields could be relocated
+    /// directly, avoiding the relocation symbol emission, compute the
+    /// final relocation value for this symbol.
+    virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+                                       unsigned RelTy) const;
+  };
+
+} // end llvm namespace
+
+#endif // ARM_ELF_WRITER_INFO_H
diff --git a/final/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/final/lib/Target/ARM/ARMExpandPseudoInsts.cpp
new file mode 100644
index 00000000000..de87ec359d9
--- /dev/null
+++ b/final/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -0,0 +1,1241 @@
+//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions to allow proper scheduling, if-conversion, and other late
+// optimizations. This pass should be run after register allocation but before
+// the post-regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-pseudo"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/raw_ostream.h" // FIXME: for debug only. remove!
+using namespace llvm;
+
+namespace {
+  class ARMExpandPseudo : public MachineFunctionPass {
+  public:
+    static char ID;
+    ARMExpandPseudo() : MachineFunctionPass(ID) {}
+
+    const ARMBaseInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    const ARMSubtarget *STI;
+    ARMFunctionInfo *AFI;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM pseudo instruction expansion pass";
+    }
+
+  private:
+    void TransferImpOps(MachineInstr &OldMI,
+                        MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
+    bool ExpandMI(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator MBBI);
+    bool ExpandMBB(MachineBasicBlock &MBB);
+    void ExpandVLD(MachineBasicBlock::iterator &MBBI);
+    void ExpandVST(MachineBasicBlock::iterator &MBBI);
+    void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
+    void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+                    unsigned Opc, bool IsExt, unsigned NumRegs);
+    void ExpandMOV32BitImm(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator &MBBI);
+  };
+  char ARMExpandPseudo::ID = 0;
+}
+
+/// TransferImpOps - Transfer implicit operands on the pseudo instruction to
+/// the instructions created from the expansion.
+void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
+                                     MachineInstrBuilder &UseMI,
+                                     MachineInstrBuilder &DefMI) {
+  const TargetInstrDesc &Desc = OldMI.getDesc();
+  for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
+       i != e; ++i) {
+    const MachineOperand &MO = OldMI.getOperand(i);
+    assert(MO.isReg() && MO.getReg());
+    if (MO.isUse())
+      UseMI.addOperand(MO);
+    else
+      DefMI.addOperand(MO);
+  }
+}
+
+namespace {
+  // Constants for register spacing in NEON load/store instructions.
+  // For quad-register load-lane and store-lane pseudo instructors, the
+  // spacing is initially assumed to be EvenDblSpc, and that is changed to
+  // OddDblSpc depending on the lane number operand.
+  enum NEONRegSpacing {
+    SingleSpc,
+    EvenDblSpc,
+    OddDblSpc
+  };
+
+  // Entries for NEON load/store information table.  The table is sorted by
+  // PseudoOpc for fast binary-search lookups.
+  struct NEONLdStTableEntry {
+    unsigned PseudoOpc;
+    unsigned RealOpc;
+    bool IsLoad;
+    bool HasWriteBack;
+    NEONRegSpacing RegSpacing;
+    unsigned char NumRegs; // D registers loaded or stored
+    unsigned char RegElts; // elements per D register; used for lane ops
+
+    // Comparison methods for binary search of the table.
+    bool operator<(const NEONLdStTableEntry &TE) const {
+      return PseudoOpc < TE.PseudoOpc;
+    }
+    friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
+      return TE.PseudoOpc < PseudoOpc;
+    }
+    friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
+                                                const NEONLdStTableEntry &TE) {
+      return PseudoOpc < TE.PseudoOpc;
+    }
+  };
+}
+
+static const NEONLdStTableEntry NEONLdStTable[] = {
+{ ARM::VLD1DUPq16Pseudo,     ARM::VLD1DUPq16,     true, false, SingleSpc, 2, 4},
+{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true,  SingleSpc, 2, 4},
+{ ARM::VLD1DUPq32Pseudo,     ARM::VLD1DUPq32,     true, false, SingleSpc, 2, 2},
+{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true,  SingleSpc, 2, 2},
+{ ARM::VLD1DUPq8Pseudo,      ARM::VLD1DUPq8,      true, false, SingleSpc, 2, 8},
+{ ARM::VLD1DUPq8Pseudo_UPD,  ARM::VLD1DUPq8_UPD,  true, true,  SingleSpc, 2, 8},
+
+{ ARM::VLD1LNq16Pseudo,     ARM::VLD1LNd16,     true, false, EvenDblSpc, 1, 4 },
+{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true,  EvenDblSpc, 1, 4 },
+{ ARM::VLD1LNq32Pseudo,     ARM::VLD1LNd32,     true, false, EvenDblSpc, 1, 2 },
+{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true,  EvenDblSpc, 1, 2 },
+{ ARM::VLD1LNq8Pseudo,      ARM::VLD1LNd8,      true, false, EvenDblSpc, 1, 8 },
+{ ARM::VLD1LNq8Pseudo_UPD,  ARM::VLD1LNd8_UPD,  true, true,  EvenDblSpc, 1, 8 },
+
+{ ARM::VLD1d64QPseudo,      ARM::VLD1d64Q,     true,  false, SingleSpc,  4, 1 },
+{ ARM::VLD1d64QPseudo_UPD,  ARM::VLD1d64Q_UPD, true,  true,  SingleSpc,  4, 1 },
+{ ARM::VLD1d64TPseudo,      ARM::VLD1d64T,     true,  false, SingleSpc,  3, 1 },
+{ ARM::VLD1d64TPseudo_UPD,  ARM::VLD1d64T_UPD, true,  true,  SingleSpc,  3, 1 },
+
+{ ARM::VLD1q16Pseudo,       ARM::VLD1q16,      true,  false, SingleSpc,  2, 4 },
+{ ARM::VLD1q16Pseudo_UPD,   ARM::VLD1q16_UPD,  true,  true,  SingleSpc,  2, 4 },
+{ ARM::VLD1q32Pseudo,       ARM::VLD1q32,      true,  false, SingleSpc,  2, 2 },
+{ ARM::VLD1q32Pseudo_UPD,   ARM::VLD1q32_UPD,  true,  true,  SingleSpc,  2, 2 },
+{ ARM::VLD1q64Pseudo,       ARM::VLD1q64,      true,  false, SingleSpc,  2, 1 },
+{ ARM::VLD1q64Pseudo_UPD,   ARM::VLD1q64_UPD,  true,  true,  SingleSpc,  2, 1 },
+{ ARM::VLD1q8Pseudo,        ARM::VLD1q8,       true,  false, SingleSpc,  2, 8 },
+{ ARM::VLD1q8Pseudo_UPD,    ARM::VLD1q8_UPD,   true,  true,  SingleSpc,  2, 8 },
+
+{ ARM::VLD2DUPd16Pseudo,     ARM::VLD2DUPd16,     true, false, SingleSpc, 2, 4},
+{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true,  SingleSpc, 2, 4},
+{ ARM::VLD2DUPd32Pseudo,     ARM::VLD2DUPd32,     true, false, SingleSpc, 2, 2},
+{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true,  SingleSpc, 2, 2},
+{ ARM::VLD2DUPd8Pseudo,      ARM::VLD2DUPd8,      true, false, SingleSpc, 2, 8},
+{ ARM::VLD2DUPd8Pseudo_UPD,  ARM::VLD2DUPd8_UPD,  true, true,  SingleSpc, 2, 8},
+
+{ ARM::VLD2LNd16Pseudo,     ARM::VLD2LNd16,     true, false, SingleSpc,  2, 4 },
+{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true,  SingleSpc,  2, 4 },
+{ ARM::VLD2LNd32Pseudo,     ARM::VLD2LNd32,     true, false, SingleSpc,  2, 2 },
+{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true,  SingleSpc,  2, 2 },
+{ ARM::VLD2LNd8Pseudo,      ARM::VLD2LNd8,      true, false, SingleSpc,  2, 8 },
+{ ARM::VLD2LNd8Pseudo_UPD,  ARM::VLD2LNd8_UPD,  true, true,  SingleSpc,  2, 8 },
+{ ARM::VLD2LNq16Pseudo,     ARM::VLD2LNq16,     true, false, EvenDblSpc, 2, 4 },
+{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true,  EvenDblSpc, 2, 4 },
+{ ARM::VLD2LNq32Pseudo,     ARM::VLD2LNq32,     true, false, EvenDblSpc, 2, 2 },
+{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true,  EvenDblSpc, 2, 2 },
+
+{ ARM::VLD2d16Pseudo,       ARM::VLD2d16,      true,  false, SingleSpc,  2, 4 },
+{ ARM::VLD2d16Pseudo_UPD,   ARM::VLD2d16_UPD,  true,  true,  SingleSpc,  2, 4 },
+{ ARM::VLD2d32Pseudo,       ARM::VLD2d32,      true,  false, SingleSpc,  2, 2 },
+{ ARM::VLD2d32Pseudo_UPD,   ARM::VLD2d32_UPD,  true,  true,  SingleSpc,  2, 2 },
+{ ARM::VLD2d8Pseudo,        ARM::VLD2d8,       true,  false, SingleSpc,  2, 8 },
+{ ARM::VLD2d8Pseudo_UPD,    ARM::VLD2d8_UPD,   true,  true,  SingleSpc,  2, 8 },
+
+{ ARM::VLD2q16Pseudo,       ARM::VLD2q16,      true,  false, SingleSpc,  4, 4 },
+{ ARM::VLD2q16Pseudo_UPD,   ARM::VLD2q16_UPD,  true,  true,  SingleSpc,  4, 4 },
+{ ARM::VLD2q32Pseudo,       ARM::VLD2q32,      true,  false, SingleSpc,  4, 2 },
+{ ARM::VLD2q32Pseudo_UPD,   ARM::VLD2q32_UPD,  true,  true,  SingleSpc,  4, 2 },
+{ ARM::VLD2q8Pseudo,        ARM::VLD2q8,       true,  false, SingleSpc,  4, 8 },
+{ ARM::VLD2q8Pseudo_UPD,    ARM::VLD2q8_UPD,   true,  true,  SingleSpc,  4, 8 },
+
+{ ARM::VLD3DUPd16Pseudo,     ARM::VLD3DUPd16,     true, false, SingleSpc, 3, 4},
+{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true,  SingleSpc, 3, 4},
+{ ARM::VLD3DUPd32Pseudo,     ARM::VLD3DUPd32,     true, false, SingleSpc, 3, 2},
+{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true,  SingleSpc, 3, 2},
+{ ARM::VLD3DUPd8Pseudo,      ARM::VLD3DUPd8,      true, false, SingleSpc, 3, 8},
+{ ARM::VLD3DUPd8Pseudo_UPD,  ARM::VLD3DUPd8_UPD,  true, true,  SingleSpc, 3, 8},
+
+{ ARM::VLD3LNd16Pseudo,     ARM::VLD3LNd16,     true, false, SingleSpc,  3, 4 },
+{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true,  SingleSpc,  3, 4 },
+{ ARM::VLD3LNd32Pseudo,     ARM::VLD3LNd32,     true, false, SingleSpc,  3, 2 },
+{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true,  SingleSpc,  3, 2 },
+{ ARM::VLD3LNd8Pseudo,      ARM::VLD3LNd8,      true, false, SingleSpc,  3, 8 },
+{ ARM::VLD3LNd8Pseudo_UPD,  ARM::VLD3LNd8_UPD,  true, true,  SingleSpc,  3, 8 },
+{ ARM::VLD3LNq16Pseudo,     ARM::VLD3LNq16,     true, false, EvenDblSpc, 3, 4 },
+{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true,  EvenDblSpc, 3, 4 },
+{ ARM::VLD3LNq32Pseudo,     ARM::VLD3LNq32,     true, false, EvenDblSpc, 3, 2 },
+{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true,  EvenDblSpc, 3, 2 },
+
+{ ARM::VLD3d16Pseudo,       ARM::VLD3d16,      true,  false, SingleSpc,  3, 4 },
+{ ARM::VLD3d16Pseudo_UPD,   ARM::VLD3d16_UPD,  true,  true,  SingleSpc,  3, 4 },
+{ ARM::VLD3d32Pseudo,       ARM::VLD3d32,      true,  false, SingleSpc,  3, 2 },
+{ ARM::VLD3d32Pseudo_UPD,   ARM::VLD3d32_UPD,  true,  true,  SingleSpc,  3, 2 },
+{ ARM::VLD3d8Pseudo,        ARM::VLD3d8,       true,  false, SingleSpc,  3, 8 },
+{ ARM::VLD3d8Pseudo_UPD,    ARM::VLD3d8_UPD,   true,  true,  SingleSpc,  3, 8 },
+
+{ ARM::VLD3q16Pseudo_UPD,    ARM::VLD3q16_UPD, true,  true,  EvenDblSpc, 3, 4 },
+{ ARM::VLD3q16oddPseudo,     ARM::VLD3q16,     true,  false, OddDblSpc,  3, 4 },
+{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true,  true,  OddDblSpc,  3, 4 },
+{ ARM::VLD3q32Pseudo_UPD,    ARM::VLD3q32_UPD, true,  true,  EvenDblSpc, 3, 2 },
+{ ARM::VLD3q32oddPseudo,     ARM::VLD3q32,     true,  false, OddDblSpc,  3, 2 },
+{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true,  true,  OddDblSpc,  3, 2 },
+{ ARM::VLD3q8Pseudo_UPD,     ARM::VLD3q8_UPD,  true,  true,  EvenDblSpc, 3, 8 },
+{ ARM::VLD3q8oddPseudo,      ARM::VLD3q8,      true,  false, OddDblSpc,  3, 8 },
+{ ARM::VLD3q8oddPseudo_UPD,  ARM::VLD3q8_UPD,  true,  true,  OddDblSpc,  3, 8 },
+
+{ ARM::VLD4DUPd16Pseudo,     ARM::VLD4DUPd16,     true, false, SingleSpc, 4, 4},
+{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true,  SingleSpc, 4, 4},
+{ ARM::VLD4DUPd32Pseudo,     ARM::VLD4DUPd32,     true, false, SingleSpc, 4, 2},
+{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true,  SingleSpc, 4, 2},
+{ ARM::VLD4DUPd8Pseudo,      ARM::VLD4DUPd8,      true, false, SingleSpc, 4, 8},
+{ ARM::VLD4DUPd8Pseudo_UPD,  ARM::VLD4DUPd8_UPD,  true, true,  SingleSpc, 4, 8},
+
+{ ARM::VLD4LNd16Pseudo,     ARM::VLD4LNd16,     true, false, SingleSpc,  4, 4 },
+{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true,  SingleSpc,  4, 4 },
+{ ARM::VLD4LNd32Pseudo,     ARM::VLD4LNd32,     true, false, SingleSpc,  4, 2 },
+{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true,  SingleSpc,  4, 2 },
+{ ARM::VLD4LNd8Pseudo,      ARM::VLD4LNd8,      true, false, SingleSpc,  4, 8 },
+{ ARM::VLD4LNd8Pseudo_UPD,  ARM::VLD4LNd8_UPD,  true, true,  SingleSpc,  4, 8 },
+{ ARM::VLD4LNq16Pseudo,     ARM::VLD4LNq16,     true, false, EvenDblSpc, 4, 4 },
+{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true,  EvenDblSpc, 4, 4 },
+{ ARM::VLD4LNq32Pseudo,     ARM::VLD4LNq32,     true, false, EvenDblSpc, 4, 2 },
+{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true,  EvenDblSpc, 4, 2 },
+
+{ ARM::VLD4d16Pseudo,       ARM::VLD4d16,      true,  false, SingleSpc,  4, 4 },
+{ ARM::VLD4d16Pseudo_UPD,   ARM::VLD4d16_UPD,  true,  true,  SingleSpc,  4, 4 },
+{ ARM::VLD4d32Pseudo,       ARM::VLD4d32,      true,  false, SingleSpc,  4, 2 },
+{ ARM::VLD4d32Pseudo_UPD,   ARM::VLD4d32_UPD,  true,  true,  SingleSpc,  4, 2 },
+{ ARM::VLD4d8Pseudo,        ARM::VLD4d8,       true,  false, SingleSpc,  4, 8 },
+{ ARM::VLD4d8Pseudo_UPD,    ARM::VLD4d8_UPD,   true,  true,  SingleSpc,  4, 8 },
+
+{ ARM::VLD4q16Pseudo_UPD,    ARM::VLD4q16_UPD, true,  true,  EvenDblSpc, 4, 4 },
+{ ARM::VLD4q16oddPseudo,     ARM::VLD4q16,     true,  false, OddDblSpc,  4, 4 },
+{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true,  true,  OddDblSpc,  4, 4 },
+{ ARM::VLD4q32Pseudo_UPD,    ARM::VLD4q32_UPD, true,  true,  EvenDblSpc, 4, 2 },
+{ ARM::VLD4q32oddPseudo,     ARM::VLD4q32,     true,  false, OddDblSpc,  4, 2 },
+{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true,  true,  OddDblSpc,  4, 2 },
+{ ARM::VLD4q8Pseudo_UPD,     ARM::VLD4q8_UPD,  true,  true,  EvenDblSpc, 4, 8 },
+{ ARM::VLD4q8oddPseudo,      ARM::VLD4q8,      true,  false, OddDblSpc,  4, 8 },
+{ ARM::VLD4q8oddPseudo_UPD,  ARM::VLD4q8_UPD,  true,  true,  OddDblSpc,  4, 8 },
+
+{ ARM::VST1LNq16Pseudo,     ARM::VST1LNd16,    false, false, EvenDblSpc, 1, 4 },
+{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true,  EvenDblSpc, 1, 4 },
+{ ARM::VST1LNq32Pseudo,     ARM::VST1LNd32,    false, false, EvenDblSpc, 1, 2 },
+{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true,  EvenDblSpc, 1, 2 },
+{ ARM::VST1LNq8Pseudo,      ARM::VST1LNd8,     false, false, EvenDblSpc, 1, 8 },
+{ ARM::VST1LNq8Pseudo_UPD,  ARM::VST1LNd8_UPD, false, true,  EvenDblSpc, 1, 8 },
+
+{ ARM::VST1d64QPseudo,      ARM::VST1d64Q,     false, false, SingleSpc,  4, 1 },
+{ ARM::VST1d64QPseudo_UPD,  ARM::VST1d64Q_UPD, false, true,  SingleSpc,  4, 1 },
+{ ARM::VST1d64TPseudo,      ARM::VST1d64T,     false, false, SingleSpc,  3, 1 },
+{ ARM::VST1d64TPseudo_UPD,  ARM::VST1d64T_UPD, false, true,  SingleSpc,  3, 1 },
+
+{ ARM::VST1q16Pseudo,       ARM::VST1q16,      false, false, SingleSpc,  2, 4 },
+{ ARM::VST1q16Pseudo_UPD,   ARM::VST1q16_UPD,  false, true,  SingleSpc,  2, 4 },
+{ ARM::VST1q32Pseudo,       ARM::VST1q32,      false, false, SingleSpc,  2, 2 },
+{ ARM::VST1q32Pseudo_UPD,   ARM::VST1q32_UPD,  false, true,  SingleSpc,  2, 2 },
+{ ARM::VST1q64Pseudo,       ARM::VST1q64,      false, false, SingleSpc,  2, 1 },
+{ ARM::VST1q64Pseudo_UPD,   ARM::VST1q64_UPD,  false, true,  SingleSpc,  2, 1 },
+{ ARM::VST1q8Pseudo,        ARM::VST1q8,       false, false, SingleSpc,  2, 8 },
+{ ARM::VST1q8Pseudo_UPD,    ARM::VST1q8_UPD,   false, true,  SingleSpc,  2, 8 },
+
+{ ARM::VST2LNd16Pseudo,     ARM::VST2LNd16,     false, false, SingleSpc, 2, 4 },
+{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true,  SingleSpc, 2, 4 },
+{ ARM::VST2LNd32Pseudo,     ARM::VST2LNd32,     false, false, SingleSpc, 2, 2 },
+{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true,  SingleSpc, 2, 2 },
+{ ARM::VST2LNd8Pseudo,      ARM::VST2LNd8,      false, false, SingleSpc, 2, 8 },
+{ ARM::VST2LNd8Pseudo_UPD,  ARM::VST2LNd8_UPD,  false, true,  SingleSpc, 2, 8 },
+{ ARM::VST2LNq16Pseudo,     ARM::VST2LNq16,     false, false, EvenDblSpc, 2, 4},
+{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true,  EvenDblSpc, 2, 4},
+{ ARM::VST2LNq32Pseudo,     ARM::VST2LNq32,     false, false, EvenDblSpc, 2, 2},
+{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true,  EvenDblSpc, 2, 2},
+
+{ ARM::VST2d16Pseudo,       ARM::VST2d16,      false, false, SingleSpc,  2, 4 },
+{ ARM::VST2d16Pseudo_UPD,   ARM::VST2d16_UPD,  false, true,  SingleSpc,  2, 4 },
+{ ARM::VST2d32Pseudo,       ARM::VST2d32,      false, false, SingleSpc,  2, 2 },
+{ ARM::VST2d32Pseudo_UPD,   ARM::VST2d32_UPD,  false, true,  SingleSpc,  2, 2 },
+{ ARM::VST2d8Pseudo,        ARM::VST2d8,       false, false, SingleSpc,  2, 8 },
+{ ARM::VST2d8Pseudo_UPD,    ARM::VST2d8_UPD,   false, true,  SingleSpc,  2, 8 },
+
+{ ARM::VST2q16Pseudo,       ARM::VST2q16,      false, false, SingleSpc,  4, 4 },
+{ ARM::VST2q16Pseudo_UPD,   ARM::VST2q16_UPD,  false, true,  SingleSpc,  4, 4 },
+{ ARM::VST2q32Pseudo,       ARM::VST2q32,      false, false, SingleSpc,  4, 2 },
+{ ARM::VST2q32Pseudo_UPD,   ARM::VST2q32_UPD,  false, true,  SingleSpc,  4, 2 },
+{ ARM::VST2q8Pseudo,        ARM::VST2q8,       false, false, SingleSpc,  4, 8 },
+{ ARM::VST2q8Pseudo_UPD,    ARM::VST2q8_UPD,   false, true,  SingleSpc,  4, 8 },
+
+{ ARM::VST3LNd16Pseudo,     ARM::VST3LNd16,     false, false, SingleSpc, 3, 4 },
+{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true,  SingleSpc, 3, 4 },
+{ ARM::VST3LNd32Pseudo,     ARM::VST3LNd32,     false, false, SingleSpc, 3, 2 },
+{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true,  SingleSpc, 3, 2 },
+{ ARM::VST3LNd8Pseudo,      ARM::VST3LNd8,      false, false, SingleSpc, 3, 8 },
+{ ARM::VST3LNd8Pseudo_UPD,  ARM::VST3LNd8_UPD,  false, true,  SingleSpc, 3, 8 },
+{ ARM::VST3LNq16Pseudo,     ARM::VST3LNq16,     false, false, EvenDblSpc, 3, 4},
+{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true,  EvenDblSpc, 3, 4},
+{ ARM::VST3LNq32Pseudo,     ARM::VST3LNq32,     false, false, EvenDblSpc, 3, 2},
+{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true,  EvenDblSpc, 3, 2},
+
+{ ARM::VST3d16Pseudo,       ARM::VST3d16,      false, false, SingleSpc,  3, 4 },
+{ ARM::VST3d16Pseudo_UPD,   ARM::VST3d16_UPD,  false, true,  SingleSpc,  3, 4 },
+{ ARM::VST3d32Pseudo,       ARM::VST3d32,      false, false, SingleSpc,  3, 2 },
+{ ARM::VST3d32Pseudo_UPD,   ARM::VST3d32_UPD,  false, true,  SingleSpc,  3, 2 },
+{ ARM::VST3d8Pseudo,        ARM::VST3d8,       false, false, SingleSpc,  3, 8 },
+{ ARM::VST3d8Pseudo_UPD,    ARM::VST3d8_UPD,   false, true,  SingleSpc,  3, 8 },
+
+{ ARM::VST3q16Pseudo_UPD,    ARM::VST3q16_UPD, false, true,  EvenDblSpc, 3, 4 },
+{ ARM::VST3q16oddPseudo,     ARM::VST3q16,     false, false, OddDblSpc,  3, 4 },
+{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true,  OddDblSpc,  3, 4 },
+{ ARM::VST3q32Pseudo_UPD,    ARM::VST3q32_UPD, false, true,  EvenDblSpc, 3, 2 },
+{ ARM::VST3q32oddPseudo,     ARM::VST3q32,     false, false, OddDblSpc,  3, 2 },
+{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true,  OddDblSpc,  3, 2 },
+{ ARM::VST3q8Pseudo_UPD,     ARM::VST3q8_UPD,  false, true,  EvenDblSpc, 3, 8 },
+{ ARM::VST3q8oddPseudo,      ARM::VST3q8,      false, false, OddDblSpc,  3, 8 },
+{ ARM::VST3q8oddPseudo_UPD,  ARM::VST3q8_UPD,  false, true,  OddDblSpc,  3, 8 },
+
+{ ARM::VST4LNd16Pseudo,     ARM::VST4LNd16,     false, false, SingleSpc, 4, 4 },
+{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true,  SingleSpc, 4, 4 },
+{ ARM::VST4LNd32Pseudo,     ARM::VST4LNd32,     false, false, SingleSpc, 4, 2 },
+{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true,  SingleSpc, 4, 2 },
+{ ARM::VST4LNd8Pseudo,      ARM::VST4LNd8,      false, false, SingleSpc, 4, 8 },
+{ ARM::VST4LNd8Pseudo_UPD,  ARM::VST4LNd8_UPD,  false, true,  SingleSpc, 4, 8 },
+{ ARM::VST4LNq16Pseudo,     ARM::VST4LNq16,     false, false, EvenDblSpc, 4, 4},
+{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true,  EvenDblSpc, 4, 4},
+{ ARM::VST4LNq32Pseudo,     ARM::VST4LNq32,     false, false, EvenDblSpc, 4, 2},
+{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true,  EvenDblSpc, 4, 2},
+
+{ ARM::VST4d16Pseudo,       ARM::VST4d16,      false, false, SingleSpc,  4, 4 },
+{ ARM::VST4d16Pseudo_UPD,   ARM::VST4d16_UPD,  false, true,  SingleSpc,  4, 4 },
+{ ARM::VST4d32Pseudo,       ARM::VST4d32,      false, false, SingleSpc,  4, 2 },
+{ ARM::VST4d32Pseudo_UPD,   ARM::VST4d32_UPD,  false, true,  SingleSpc,  4, 2 },
+{ ARM::VST4d8Pseudo,        ARM::VST4d8,       false, false, SingleSpc,  4, 8 },
+{ ARM::VST4d8Pseudo_UPD,    ARM::VST4d8_UPD,   false, true,  SingleSpc,  4, 8 },
+
+{ ARM::VST4q16Pseudo_UPD,    ARM::VST4q16_UPD, false, true,  EvenDblSpc, 4, 4 },
+{ ARM::VST4q16oddPseudo,     ARM::VST4q16,     false, false, OddDblSpc,  4, 4 },
+{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true,  OddDblSpc,  4, 4 },
+{ ARM::VST4q32Pseudo_UPD,    ARM::VST4q32_UPD, false, true,  EvenDblSpc, 4, 2 },
+{ ARM::VST4q32oddPseudo,     ARM::VST4q32,     false, false, OddDblSpc,  4, 2 },
+{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true,  OddDblSpc,  4, 2 },
+{ ARM::VST4q8Pseudo_UPD,     ARM::VST4q8_UPD,  false, true,  EvenDblSpc, 4, 8 },
+{ ARM::VST4q8oddPseudo,      ARM::VST4q8,      false, false, OddDblSpc,  4, 8 },
+{ ARM::VST4q8oddPseudo_UPD,  ARM::VST4q8_UPD,  false, true,  OddDblSpc,  4, 8 }
+};
+
+/// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
+/// load or store pseudo instruction.
+static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
+  unsigned NumEntries = array_lengthof(NEONLdStTable);
+
+#ifndef NDEBUG
+  // Make sure the table is sorted.
+  static bool TableChecked = false;
+  if (!TableChecked) {
+    for (unsigned i = 0; i != NumEntries-1; ++i)
+      assert(NEONLdStTable[i] < NEONLdStTable[i+1] &&
+             "NEONLdStTable is not sorted!");
+    TableChecked = true;
+  }
+#endif
+
+  const NEONLdStTableEntry *I =
+    std::lower_bound(NEONLdStTable, NEONLdStTable + NumEntries, Opcode);
+  if (I != NEONLdStTable + NumEntries && I->PseudoOpc == Opcode)
+    return I;
+  return NULL;
+}
+
+/// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
+/// corresponding to the specified register spacing.  Not all of the results
+/// are necessarily valid, e.g., a Q register only has 2 D subregisters.
+static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
+                        const TargetRegisterInfo *TRI, unsigned &D0,
+                        unsigned &D1, unsigned &D2, unsigned &D3) {
+  if (RegSpc == SingleSpc) {
+    D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+    D1 = TRI->getSubReg(Reg, ARM::dsub_1);
+    D2 = TRI->getSubReg(Reg, ARM::dsub_2);
+    D3 = TRI->getSubReg(Reg, ARM::dsub_3);
+  } else if (RegSpc == EvenDblSpc) {
+    D0 = TRI->getSubReg(Reg, ARM::dsub_0);
+    D1 = TRI->getSubReg(Reg, ARM::dsub_2);
+    D2 = TRI->getSubReg(Reg, ARM::dsub_4);
+    D3 = TRI->getSubReg(Reg, ARM::dsub_6);
+  } else {
+    assert(RegSpc == OddDblSpc && "unknown register spacing");
+    D0 = TRI->getSubReg(Reg, ARM::dsub_1);
+    D1 = TRI->getSubReg(Reg, ARM::dsub_3);
+    D2 = TRI->getSubReg(Reg, ARM::dsub_5);
+    D3 = TRI->getSubReg(Reg, ARM::dsub_7);
+  }
+}
+
+/// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VLD instructions with D register operands.
+void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+
+  const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+  assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
+  NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+  unsigned NumRegs = TableEntry->NumRegs;
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                    TII->get(TableEntry->RealOpc));
+  unsigned OpIdx = 0;
+
+  bool DstIsDead = MI.getOperand(OpIdx).isDead();
+  unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+  unsigned D0, D1, D2, D3;
+  GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+  MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+  if (NumRegs > 2)
+    MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+  if (NumRegs > 3)
+    MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Copy the addrmode6 operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  // Copy the am6offset operand.
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // For an instruction writing double-spaced subregs, the pseudo instruction
+  // has an extra operand that is a use of the super-register.  Record the
+  // operand index and skip over it.
+  unsigned SrcOpIdx = 0;
+  if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc)
+    SrcOpIdx = OpIdx++;
+
+  // Copy the predicate operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Copy the super-register source operand used for double-spaced subregs over
+  // to the new instruction as an implicit operand.
+  if (SrcOpIdx != 0) {
+    MachineOperand MO = MI.getOperand(SrcOpIdx);
+    MO.setImplicit(true);
+    MIB.addOperand(MO);
+  }
+  // Add an implicit def for the super-register.
+  MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+  TransferImpOps(MI, MIB, MIB);
+  MI.eraseFromParent();
+}
+
+/// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
+/// operands to real VST instructions with D register operands.
+void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+
+  const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+  assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
+  NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+  unsigned NumRegs = TableEntry->NumRegs;
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                    TII->get(TableEntry->RealOpc));
+  unsigned OpIdx = 0;
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Copy the addrmode6 operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  // Copy the am6offset operand.
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+  unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+  unsigned D0, D1, D2, D3;
+  GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
+  MIB.addReg(D0).addReg(D1);
+  if (NumRegs > 2)
+    MIB.addReg(D2);
+  if (NumRegs > 3)
+    MIB.addReg(D3);
+
+  // Copy the predicate operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+
+  if (SrcIsKill)
+    // Add an implicit kill for the super-reg.
+    (*MIB).addRegisterKilled(SrcReg, TRI, true);
+  TransferImpOps(MI, MIB, MIB);
+  MI.eraseFromParent();
+}
+
+/// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+
+  const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
+  assert(TableEntry && "NEONLdStTable lookup failed");
+  NEONRegSpacing RegSpc = TableEntry->RegSpacing;
+  unsigned NumRegs = TableEntry->NumRegs;
+  unsigned RegElts = TableEntry->RegElts;
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                    TII->get(TableEntry->RealOpc));
+  unsigned OpIdx = 0;
+  // The lane operand is always the 3rd from last operand, before the 2
+  // predicate operands.
+  unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
+
+  // Adjust the lane and spacing as needed for Q registers.
+  assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
+  if (RegSpc == EvenDblSpc && Lane >= RegElts) {
+    RegSpc = OddDblSpc;
+    Lane -= RegElts;
+  }
+  assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
+
+  unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0;
+  unsigned DstReg = 0;
+  bool DstIsDead = false;
+  if (TableEntry->IsLoad) {
+    DstIsDead = MI.getOperand(OpIdx).isDead();
+    DstReg = MI.getOperand(OpIdx++).getReg();
+    GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
+    MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
+    if (NumRegs > 1)
+      MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+    if (NumRegs > 2)
+      MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
+    if (NumRegs > 3)
+      MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
+  }
+
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Copy the addrmode6 operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  // Copy the am6offset operand.
+  if (TableEntry->HasWriteBack)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Grab the super-register source.
+  MachineOperand MO = MI.getOperand(OpIdx++);
+  if (!TableEntry->IsLoad)
+    GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
+
+  // Add the subregs as sources of the new instruction.
+  unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
+                       getKillRegState(MO.isKill()));
+  MIB.addReg(D0, SrcFlags);
+  if (NumRegs > 1)
+    MIB.addReg(D1, SrcFlags);
+  if (NumRegs > 2)
+    MIB.addReg(D2, SrcFlags);
+  if (NumRegs > 3)
+    MIB.addReg(D3, SrcFlags);
+
+  // Add the lane number operand.
+  MIB.addImm(Lane);
+  OpIdx += 1;
+
+  // Copy the predicate operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Copy the super-register source to be an implicit source.
+  MO.setImplicit(true);
+  MIB.addOperand(MO);
+  if (TableEntry->IsLoad)
+    // Add an implicit def for the super-register.
+    MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+  TransferImpOps(MI, MIB, MIB);
+  MI.eraseFromParent();
+}
+
+/// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
+/// register operands to real instructions with D register operands.
+void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
+                                 unsigned Opc, bool IsExt, unsigned NumRegs) {
+  MachineInstr &MI = *MBBI;
+  MachineBasicBlock &MBB = *MI.getParent();
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
+  unsigned OpIdx = 0;
+
+  // Transfer the destination register operand.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  if (IsExt)
+    MIB.addOperand(MI.getOperand(OpIdx++));
+
+  bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+  unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+  unsigned D0, D1, D2, D3;
+  GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
+  MIB.addReg(D0).addReg(D1);
+  if (NumRegs > 2)
+    MIB.addReg(D2);
+  if (NumRegs > 3)
+    MIB.addReg(D3);
+
+  // Copy the other source register operand.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+
+  // Copy the predicate operands.
+  MIB.addOperand(MI.getOperand(OpIdx++));
+  MIB.addOperand(MI.getOperand(OpIdx++));
+
+  if (SrcIsKill)
+    // Add an implicit kill for the super-reg.
+    (*MIB).addRegisterKilled(SrcReg, TRI, true);
+  TransferImpOps(MI, MIB, MIB);
+  MI.eraseFromParent();
+}
+
+void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator &MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned Opcode = MI.getOpcode();
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
+  unsigned DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
+  const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
+  MachineInstrBuilder LO16, HI16;
+
+  if (!STI->hasV6T2Ops() &&
+      (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
+    // Expand into a movi + orr.
+    LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
+    HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
+      .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(DstReg);
+
+    assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
+    unsigned ImmVal = (unsigned)MO.getImm();
+    unsigned SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
+    unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
+    LO16 = LO16.addImm(SOImmValV1);
+    HI16 = HI16.addImm(SOImmValV2);
+    (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+    LO16.addImm(Pred).addReg(PredReg).addReg(0);
+    HI16.addImm(Pred).addReg(PredReg).addReg(0);
+    TransferImpOps(MI, LO16, HI16);
+    MI.eraseFromParent();
+    return;
+  }
+
+  unsigned LO16Opc = 0;
+  unsigned HI16Opc = 0;
+  if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
+    LO16Opc = ARM::t2MOVi16;
+    HI16Opc = ARM::t2MOVTi16;
+  } else {
+    LO16Opc = ARM::MOVi16;
+    HI16Opc = ARM::MOVTi16;
+  }
+
+  LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
+  HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
+    .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstReg);
+
+  if (MO.isImm()) {
+    unsigned Imm = MO.getImm();
+    unsigned Lo16 = Imm & 0xffff;
+    unsigned Hi16 = (Imm >> 16) & 0xffff;
+    LO16 = LO16.addImm(Lo16);
+    HI16 = HI16.addImm(Hi16);
+  } else {
+    const GlobalValue *GV = MO.getGlobal();
+    unsigned TF = MO.getTargetFlags();
+    LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
+    HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
+  }
+
+  (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  LO16.addImm(Pred).addReg(PredReg);
+  HI16.addImm(Pred).addReg(PredReg);
+
+  TransferImpOps(MI, LO16, HI16);
+  MI.eraseFromParent();
+}
+
+bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned Opcode = MI.getOpcode();
+  switch (Opcode) {
+    default:
+      return false;
+    case ARM::Int_eh_sjlj_dispatchsetup: {
+      MachineFunction &MF = *MI.getParent()->getParent();
+      const ARMBaseInstrInfo *AII =
+        static_cast<const ARMBaseInstrInfo*>(TII);
+      const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
+      // For functions using a base pointer, we rematerialize it (via the frame
+      // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
+      // for us. Otherwise, expand to nothing.
+      if (RI.hasBasePointer(MF)) {
+        int32_t NumBytes = AFI->getFramePtrSpillOffset();
+        unsigned FramePtr = RI.getFrameRegister(MF);
+        assert(MF.getTarget().getFrameLowering()->hasFP(MF) &&
+               "base pointer without frame pointer?");
+
+        if (AFI->isThumb2Function()) {
+          llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+                                       FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
+        } else if (AFI->isThumbFunction()) {
+          llvm::emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+                                          FramePtr, -NumBytes, *TII, RI);
+        } else {
+          llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
+                                        FramePtr, -NumBytes, ARMCC::AL, 0,
+                                        *TII);
+        }
+        // If there's dynamic realignment, adjust for it.
+        if (RI.needsStackRealignment(MF)) {
+          MachineFrameInfo  *MFI = MF.getFrameInfo();
+          unsigned MaxAlign = MFI->getMaxAlignment();
+          assert (!AFI->isThumb1OnlyFunction());
+          // Emit bic r6, r6, MaxAlign
+          unsigned bicOpc = AFI->isThumbFunction() ?
+            ARM::t2BICri : ARM::BICri;
+          AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                              TII->get(bicOpc), ARM::R6)
+                                      .addReg(ARM::R6, RegState::Kill)
+                                      .addImm(MaxAlign-1)));
+        }
+
+      }
+      MI.eraseFromParent();
+      return true;
+    }
+
+    case ARM::MOVsrl_flag:
+    case ARM::MOVsra_flag: {
+      // These are just fancy MOVs insructions.
+      AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+                             MI.getOperand(0).getReg())
+                     .addOperand(MI.getOperand(1))
+                     .addReg(0)
+                     .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr
+                                                  : ARM_AM::asr), 1)))
+        .addReg(ARM::CPSR, RegState::Define);
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::RRX: {
+      // This encodes as "MOVs Rd, Rm, rrx
+      MachineInstrBuilder MIB =
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs),
+                               MI.getOperand(0).getReg())
+                       .addOperand(MI.getOperand(1))
+                       .addOperand(MI.getOperand(1))
+                       .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)))
+        .addReg(0);
+      TransferImpOps(MI, MIB, MIB);
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::TPsoft: {
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                TII->get(ARM::BL))
+        .addExternalSymbol("__aeabi_read_tp", 0);
+
+      (*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      TransferImpOps(MI, MIB, MIB);
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::tLDRpci_pic:
+    case ARM::t2LDRpci_pic: {
+      unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
+        ? ARM::tLDRpci : ARM::t2LDRpci;
+      unsigned DstReg = MI.getOperand(0).getReg();
+      bool DstIsDead = MI.getOperand(0).isDead();
+      MachineInstrBuilder MIB1 =
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                               TII->get(NewLdOpc), DstReg)
+                       .addOperand(MI.getOperand(1)));
+      (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                         TII->get(ARM::tPICADD))
+        .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+        .addReg(DstReg)
+        .addOperand(MI.getOperand(2));
+      TransferImpOps(MI, MIB1, MIB2);
+      MI.eraseFromParent();
+      return true;
+    }
+
+    case ARM::MOV_ga_dyn:
+    case ARM::MOV_ga_pcrel:
+    case ARM::MOV_ga_pcrel_ldr:
+    case ARM::t2MOV_ga_dyn:
+    case ARM::t2MOV_ga_pcrel: {
+      // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
+      unsigned LabelId = AFI->createPICLabelUId();
+      unsigned DstReg = MI.getOperand(0).getReg();
+      bool DstIsDead = MI.getOperand(0).isDead();
+      const MachineOperand &MO1 = MI.getOperand(1);
+      const GlobalValue *GV = MO1.getGlobal();
+      unsigned TF = MO1.getTargetFlags();
+      bool isARM = Opcode != ARM::t2MOV_ga_pcrel;
+      bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn);
+      unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
+      unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel : ARM::t2MOVTi16_ga_pcrel;
+      unsigned LO16TF = isPIC
+        ? ARMII::MO_LO16_NONLAZY_PIC : ARMII::MO_LO16_NONLAZY;
+      unsigned HI16TF = isPIC
+        ? ARMII::MO_HI16_NONLAZY_PIC : ARMII::MO_HI16_NONLAZY;
+      unsigned PICAddOpc = isARM
+        ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
+        : ARM::tPICADD;
+      MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                         TII->get(LO16Opc), DstReg)
+        .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
+        .addImm(LabelId);
+      MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                         TII->get(HI16Opc), DstReg)
+        .addReg(DstReg)
+        .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
+        .addImm(LabelId);
+      if (!isPIC) {
+        TransferImpOps(MI, MIB1, MIB2);
+        MI.eraseFromParent();
+        return true;
+      }
+
+      MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                                         TII->get(PICAddOpc))
+        .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+        .addReg(DstReg).addImm(LabelId);
+      if (isARM) {
+        AddDefaultPred(MIB3);
+        if (Opcode == ARM::MOV_ga_pcrel_ldr)
+          (*MIB2).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+      }
+      TransferImpOps(MI, MIB1, MIB3);
+      MI.eraseFromParent();
+      return true;
+    }
+
+    case ARM::MOVi32imm:
+    case ARM::MOVCCi32imm:
+    case ARM::t2MOVi32imm:
+    case ARM::t2MOVCCi32imm:
+      ExpandMOV32BitImm(MBB, MBBI);
+      return true;
+
+    case ARM::VMOVQQ: {
+      unsigned DstReg = MI.getOperand(0).getReg();
+      bool DstIsDead = MI.getOperand(0).isDead();
+      unsigned EvenDst = TRI->getSubReg(DstReg, ARM::qsub_0);
+      unsigned OddDst  = TRI->getSubReg(DstReg, ARM::qsub_1);
+      unsigned SrcReg = MI.getOperand(1).getReg();
+      bool SrcIsKill = MI.getOperand(1).isKill();
+      unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::qsub_0);
+      unsigned OddSrc  = TRI->getSubReg(SrcReg, ARM::qsub_1);
+      MachineInstrBuilder Even =
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                               TII->get(ARM::VMOVQ))
+                       .addReg(EvenDst,
+                               RegState::Define | getDeadRegState(DstIsDead))
+                       .addReg(EvenSrc, getKillRegState(SrcIsKill)));
+      MachineInstrBuilder Odd =
+        AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+                               TII->get(ARM::VMOVQ))
+                       .addReg(OddDst,
+                               RegState::Define | getDeadRegState(DstIsDead))
+                       .addReg(OddSrc, getKillRegState(SrcIsKill)));
+      TransferImpOps(MI, Even, Odd);
+      MI.eraseFromParent();
+      return true;
+    }
+
+    case ARM::VLDMQIA:
+    case ARM::VLDMQDB: {
+      unsigned NewOpc = (Opcode == ARM::VLDMQIA) ? ARM::VLDMDIA : ARM::VLDMDDB;
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+      unsigned OpIdx = 0;
+
+      // Grab the Q register destination.
+      bool DstIsDead = MI.getOperand(OpIdx).isDead();
+      unsigned DstReg = MI.getOperand(OpIdx++).getReg();
+
+      // Copy the source register.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+
+      // Copy the predicate operands.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+      MIB.addOperand(MI.getOperand(OpIdx++));
+
+      // Add the destination operands (D subregs).
+      unsigned D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
+      unsigned D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
+      MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
+        .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
+
+      // Add an implicit def for the super-register.
+      MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
+      TransferImpOps(MI, MIB, MIB);
+      MI.eraseFromParent();
+      return true;
+    }
+
+    case ARM::VSTMQIA:
+    case ARM::VSTMQDB: {
+      unsigned NewOpc = (Opcode == ARM::VSTMQIA) ? ARM::VSTMDIA : ARM::VSTMDDB;
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+      unsigned OpIdx = 0;
+
+      // Grab the Q register source.
+      bool SrcIsKill = MI.getOperand(OpIdx).isKill();
+      unsigned SrcReg = MI.getOperand(OpIdx++).getReg();
+
+      // Copy the destination register.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+
+      // Copy the predicate operands.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+      MIB.addOperand(MI.getOperand(OpIdx++));
+
+      // Add the source operands (D subregs).
+      unsigned D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
+      unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
+      MIB.addReg(D0).addReg(D1);
+
+      if (SrcIsKill)
+        // Add an implicit kill for the Q register.
+        (*MIB).addRegisterKilled(SrcReg, TRI, true);
+
+      TransferImpOps(MI, MIB, MIB);
+      MI.eraseFromParent();
+      return true;
+    }
+    case ARM::VDUPfqf:
+    case ARM::VDUPfdf:{
+      unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLNfq : ARM::VDUPLNfd;
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
+      unsigned OpIdx = 0;
+      unsigned SrcReg = MI.getOperand(1).getReg();
+      unsigned Lane = getARMRegisterNumbering(SrcReg) & 1;
+      unsigned DReg = TRI->getMatchingSuperReg(SrcReg,
+                                               Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass);
+      // The lane is [0,1] for the containing DReg superregister.
+      // Copy the dst/src register operands.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+      MIB.addReg(DReg);
+      ++OpIdx;
+      // Add the lane select operand.
+      MIB.addImm(Lane);
+      // Add the predicate operands.
+      MIB.addOperand(MI.getOperand(OpIdx++));
+      MIB.addOperand(MI.getOperand(OpIdx++));
+
+      TransferImpOps(MI, MIB, MIB);
+      MI.eraseFromParent();
+      return true;
+    }
+
+    case ARM::VLD1q8Pseudo:
+    case ARM::VLD1q16Pseudo:
+    case ARM::VLD1q32Pseudo:
+    case ARM::VLD1q64Pseudo:
+    case ARM::VLD1q8Pseudo_UPD:
+    case ARM::VLD1q16Pseudo_UPD:
+    case ARM::VLD1q32Pseudo_UPD:
+    case ARM::VLD1q64Pseudo_UPD:
+    case ARM::VLD2d8Pseudo:
+    case ARM::VLD2d16Pseudo:
+    case ARM::VLD2d32Pseudo:
+    case ARM::VLD2q8Pseudo:
+    case ARM::VLD2q16Pseudo:
+    case ARM::VLD2q32Pseudo:
+    case ARM::VLD2d8Pseudo_UPD:
+    case ARM::VLD2d16Pseudo_UPD:
+    case ARM::VLD2d32Pseudo_UPD:
+    case ARM::VLD2q8Pseudo_UPD:
+    case ARM::VLD2q16Pseudo_UPD:
+    case ARM::VLD2q32Pseudo_UPD:
+    case ARM::VLD3d8Pseudo:
+    case ARM::VLD3d16Pseudo:
+    case ARM::VLD3d32Pseudo:
+    case ARM::VLD1d64TPseudo:
+    case ARM::VLD3d8Pseudo_UPD:
+    case ARM::VLD3d16Pseudo_UPD:
+    case ARM::VLD3d32Pseudo_UPD:
+    case ARM::VLD1d64TPseudo_UPD:
+    case ARM::VLD3q8Pseudo_UPD:
+    case ARM::VLD3q16Pseudo_UPD:
+    case ARM::VLD3q32Pseudo_UPD:
+    case ARM::VLD3q8oddPseudo:
+    case ARM::VLD3q16oddPseudo:
+    case ARM::VLD3q32oddPseudo:
+    case ARM::VLD3q8oddPseudo_UPD:
+    case ARM::VLD3q16oddPseudo_UPD:
+    case ARM::VLD3q32oddPseudo_UPD:
+    case ARM::VLD4d8Pseudo:
+    case ARM::VLD4d16Pseudo:
+    case ARM::VLD4d32Pseudo:
+    case ARM::VLD1d64QPseudo:
+    case ARM::VLD4d8Pseudo_UPD:
+    case ARM::VLD4d16Pseudo_UPD:
+    case ARM::VLD4d32Pseudo_UPD:
+    case ARM::VLD1d64QPseudo_UPD:
+    case ARM::VLD4q8Pseudo_UPD:
+    case ARM::VLD4q16Pseudo_UPD:
+    case ARM::VLD4q32Pseudo_UPD:
+    case ARM::VLD4q8oddPseudo:
+    case ARM::VLD4q16oddPseudo:
+    case ARM::VLD4q32oddPseudo:
+    case ARM::VLD4q8oddPseudo_UPD:
+    case ARM::VLD4q16oddPseudo_UPD:
+    case ARM::VLD4q32oddPseudo_UPD:
+    case ARM::VLD1DUPq8Pseudo:
+    case ARM::VLD1DUPq16Pseudo:
+    case ARM::VLD1DUPq32Pseudo:
+    case ARM::VLD1DUPq8Pseudo_UPD:
+    case ARM::VLD1DUPq16Pseudo_UPD:
+    case ARM::VLD1DUPq32Pseudo_UPD:
+    case ARM::VLD2DUPd8Pseudo:
+    case ARM::VLD2DUPd16Pseudo:
+    case ARM::VLD2DUPd32Pseudo:
+    case ARM::VLD2DUPd8Pseudo_UPD:
+    case ARM::VLD2DUPd16Pseudo_UPD:
+    case ARM::VLD2DUPd32Pseudo_UPD:
+    case ARM::VLD3DUPd8Pseudo:
+    case ARM::VLD3DUPd16Pseudo:
+    case ARM::VLD3DUPd32Pseudo:
+    case ARM::VLD3DUPd8Pseudo_UPD:
+    case ARM::VLD3DUPd16Pseudo_UPD:
+    case ARM::VLD3DUPd32Pseudo_UPD:
+    case ARM::VLD4DUPd8Pseudo:
+    case ARM::VLD4DUPd16Pseudo:
+    case ARM::VLD4DUPd32Pseudo:
+    case ARM::VLD4DUPd8Pseudo_UPD:
+    case ARM::VLD4DUPd16Pseudo_UPD:
+    case ARM::VLD4DUPd32Pseudo_UPD:
+      ExpandVLD(MBBI);
+      return true;
+
+    case ARM::VST1q8Pseudo:
+    case ARM::VST1q16Pseudo:
+    case ARM::VST1q32Pseudo:
+    case ARM::VST1q64Pseudo:
+    case ARM::VST1q8Pseudo_UPD:
+    case ARM::VST1q16Pseudo_UPD:
+    case ARM::VST1q32Pseudo_UPD:
+    case ARM::VST1q64Pseudo_UPD:
+    case ARM::VST2d8Pseudo:
+    case ARM::VST2d16Pseudo:
+    case ARM::VST2d32Pseudo:
+    case ARM::VST2q8Pseudo:
+    case ARM::VST2q16Pseudo:
+    case ARM::VST2q32Pseudo:
+    case ARM::VST2d8Pseudo_UPD:
+    case ARM::VST2d16Pseudo_UPD:
+    case ARM::VST2d32Pseudo_UPD:
+    case ARM::VST2q8Pseudo_UPD:
+    case ARM::VST2q16Pseudo_UPD:
+    case ARM::VST2q32Pseudo_UPD:
+    case ARM::VST3d8Pseudo:
+    case ARM::VST3d16Pseudo:
+    case ARM::VST3d32Pseudo:
+    case ARM::VST1d64TPseudo:
+    case ARM::VST3d8Pseudo_UPD:
+    case ARM::VST3d16Pseudo_UPD:
+    case ARM::VST3d32Pseudo_UPD:
+    case ARM::VST1d64TPseudo_UPD:
+    case ARM::VST3q8Pseudo_UPD:
+    case ARM::VST3q16Pseudo_UPD:
+    case ARM::VST3q32Pseudo_UPD:
+    case ARM::VST3q8oddPseudo:
+    case ARM::VST3q16oddPseudo:
+    case ARM::VST3q32oddPseudo:
+    case ARM::VST3q8oddPseudo_UPD:
+    case ARM::VST3q16oddPseudo_UPD:
+    case ARM::VST3q32oddPseudo_UPD:
+    case ARM::VST4d8Pseudo:
+    case ARM::VST4d16Pseudo:
+    case ARM::VST4d32Pseudo:
+    case ARM::VST1d64QPseudo:
+    case ARM::VST4d8Pseudo_UPD:
+    case ARM::VST4d16Pseudo_UPD:
+    case ARM::VST4d32Pseudo_UPD:
+    case ARM::VST1d64QPseudo_UPD:
+    case ARM::VST4q8Pseudo_UPD:
+    case ARM::VST4q16Pseudo_UPD:
+    case ARM::VST4q32Pseudo_UPD:
+    case ARM::VST4q8oddPseudo:
+    case ARM::VST4q16oddPseudo:
+    case ARM::VST4q32oddPseudo:
+    case ARM::VST4q8oddPseudo_UPD:
+    case ARM::VST4q16oddPseudo_UPD:
+    case ARM::VST4q32oddPseudo_UPD:
+      ExpandVST(MBBI);
+      return true;
+
+    case ARM::VLD1LNq8Pseudo:
+    case ARM::VLD1LNq16Pseudo:
+    case ARM::VLD1LNq32Pseudo:
+    case ARM::VLD1LNq8Pseudo_UPD:
+    case ARM::VLD1LNq16Pseudo_UPD:
+    case ARM::VLD1LNq32Pseudo_UPD:
+    case ARM::VLD2LNd8Pseudo:
+    case ARM::VLD2LNd16Pseudo:
+    case ARM::VLD2LNd32Pseudo:
+    case ARM::VLD2LNq16Pseudo:
+    case ARM::VLD2LNq32Pseudo:
+    case ARM::VLD2LNd8Pseudo_UPD:
+    case ARM::VLD2LNd16Pseudo_UPD:
+    case ARM::VLD2LNd32Pseudo_UPD:
+    case ARM::VLD2LNq16Pseudo_UPD:
+    case ARM::VLD2LNq32Pseudo_UPD:
+    case ARM::VLD3LNd8Pseudo:
+    case ARM::VLD3LNd16Pseudo:
+    case ARM::VLD3LNd32Pseudo:
+    case ARM::VLD3LNq16Pseudo:
+    case ARM::VLD3LNq32Pseudo:
+    case ARM::VLD3LNd8Pseudo_UPD:
+    case ARM::VLD3LNd16Pseudo_UPD:
+    case ARM::VLD3LNd32Pseudo_UPD:
+    case ARM::VLD3LNq16Pseudo_UPD:
+    case ARM::VLD3LNq32Pseudo_UPD:
+    case ARM::VLD4LNd8Pseudo:
+    case ARM::VLD4LNd16Pseudo:
+    case ARM::VLD4LNd32Pseudo:
+    case ARM::VLD4LNq16Pseudo:
+    case ARM::VLD4LNq32Pseudo:
+    case ARM::VLD4LNd8Pseudo_UPD:
+    case ARM::VLD4LNd16Pseudo_UPD:
+    case ARM::VLD4LNd32Pseudo_UPD:
+    case ARM::VLD4LNq16Pseudo_UPD:
+    case ARM::VLD4LNq32Pseudo_UPD:
+    case ARM::VST1LNq8Pseudo:
+    case ARM::VST1LNq16Pseudo:
+    case ARM::VST1LNq32Pseudo:
+    case ARM::VST1LNq8Pseudo_UPD:
+    case ARM::VST1LNq16Pseudo_UPD:
+    case ARM::VST1LNq32Pseudo_UPD:
+    case ARM::VST2LNd8Pseudo:
+    case ARM::VST2LNd16Pseudo:
+    case ARM::VST2LNd32Pseudo:
+    case ARM::VST2LNq16Pseudo:
+    case ARM::VST2LNq32Pseudo:
+    case ARM::VST2LNd8Pseudo_UPD:
+    case ARM::VST2LNd16Pseudo_UPD:
+    case ARM::VST2LNd32Pseudo_UPD:
+    case ARM::VST2LNq16Pseudo_UPD:
+    case ARM::VST2LNq32Pseudo_UPD:
+    case ARM::VST3LNd8Pseudo:
+    case ARM::VST3LNd16Pseudo:
+    case ARM::VST3LNd32Pseudo:
+    case ARM::VST3LNq16Pseudo:
+    case ARM::VST3LNq32Pseudo:
+    case ARM::VST3LNd8Pseudo_UPD:
+    case ARM::VST3LNd16Pseudo_UPD:
+    case ARM::VST3LNd32Pseudo_UPD:
+    case ARM::VST3LNq16Pseudo_UPD:
+    case ARM::VST3LNq32Pseudo_UPD:
+    case ARM::VST4LNd8Pseudo:
+    case ARM::VST4LNd16Pseudo:
+    case ARM::VST4LNd32Pseudo:
+    case ARM::VST4LNq16Pseudo:
+    case ARM::VST4LNq32Pseudo:
+    case ARM::VST4LNd8Pseudo_UPD:
+    case ARM::VST4LNd16Pseudo_UPD:
+    case ARM::VST4LNd32Pseudo_UPD:
+    case ARM::VST4LNq16Pseudo_UPD:
+    case ARM::VST4LNq32Pseudo_UPD:
+      ExpandLaneOp(MBBI);
+      return true;
+
+    case ARM::VTBL2Pseudo: ExpandVTBL(MBBI, ARM::VTBL2, false, 2); return true;
+    case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false, 3); return true;
+    case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false, 4); return true;
+    case ARM::VTBX2Pseudo: ExpandVTBL(MBBI, ARM::VTBX2, true, 2); return true;
+    case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true, 3); return true;
+    case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true, 4); return true;
+  }
+
+  return false;
+}
+
+bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
+    Modified |= ExpandMI(MBB, MBBI);
+    MBBI = NMBBI;
+  }
+
+  return Modified;
+}
+
+bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+  const TargetMachine &TM = MF.getTarget();
+  TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+  TRI = TM.getRegisterInfo();
+  STI = &TM.getSubtarget<ARMSubtarget>();
+  AFI = MF.getInfo<ARMFunctionInfo>();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
+       ++MFI)
+    Modified |= ExpandMBB(*MFI);
+  return Modified;
+}
+
+/// createARMExpandPseudoPass - returns an instance of the pseudo instruction
+/// expansion pass.
+FunctionPass *llvm::createARMExpandPseudoPass() {
+  return new ARMExpandPseudo();
+}
diff --git a/final/lib/Target/ARM/ARMFastISel.cpp b/final/lib/Target/ARM/ARMFastISel.cpp
new file mode 100644
index 00000000000..c7385b7a78d
--- /dev/null
+++ b/final/lib/Target/ARM/ARMFastISel.cpp
@@ -0,0 +1,1920 @@
+//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM-specific support for the FastISel class. Some
+// of the target-specific code is generated by tablegen in the file
+// ARMGenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMCallingConv.h"
+#include "ARMRegisterInfo.h"
+#include "ARMTargetMachine.h"
+#include "ARMSubtarget.h"
+#include "ARMConstantPoolValue.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+static cl::opt<bool>
+DisableARMFastISel("disable-arm-fast-isel",
+                    cl::desc("Turn off experimental ARM fast-isel support"),
+                    cl::init(false), cl::Hidden);
+
+extern cl::opt<bool> EnableARMLongCalls;
+
+namespace {
+
+  // All possible address modes, plus some.
+  typedef struct Address {
+    enum {
+      RegBase,
+      FrameIndexBase
+    } BaseType;
+
+    union {
+      unsigned Reg;
+      int FI;
+    } Base;
+
+    int Offset;
+    unsigned Scale;
+    unsigned PlusReg;
+
+    // Innocuous defaults for our address.
+    Address()
+     : BaseType(RegBase), Offset(0), Scale(0), PlusReg(0) {
+       Base.Reg = 0;
+     }
+  } Address;
+
+class ARMFastISel : public FastISel {
+
+  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const ARMSubtarget *Subtarget;
+  const TargetMachine &TM;
+  const TargetInstrInfo &TII;
+  const TargetLowering &TLI;
+  ARMFunctionInfo *AFI;
+
+  // Convenience variables to avoid some queries.
+  bool isThumb;
+  LLVMContext *Context;
+
+  public:
+    explicit ARMFastISel(FunctionLoweringInfo &funcInfo)
+    : FastISel(funcInfo),
+      TM(funcInfo.MF->getTarget()),
+      TII(*TM.getInstrInfo()),
+      TLI(*TM.getTargetLowering()) {
+      Subtarget = &TM.getSubtarget<ARMSubtarget>();
+      AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
+      isThumb = AFI->isThumbFunction();
+      Context = &funcInfo.Fn->getContext();
+    }
+
+    // Code from FastISel.cpp.
+    virtual unsigned FastEmitInst_(unsigned MachineInstOpcode,
+                                   const TargetRegisterClass *RC);
+    virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+                                    const TargetRegisterClass *RC,
+                                    unsigned Op0, bool Op0IsKill);
+    virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+                                     const TargetRegisterClass *RC,
+                                     unsigned Op0, bool Op0IsKill,
+                                     unsigned Op1, bool Op1IsKill);
+    virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+                                     const TargetRegisterClass *RC,
+                                     unsigned Op0, bool Op0IsKill,
+                                     uint64_t Imm);
+    virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode,
+                                     const TargetRegisterClass *RC,
+                                     unsigned Op0, bool Op0IsKill,
+                                     const ConstantFP *FPImm);
+    virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode,
+                                    const TargetRegisterClass *RC,
+                                    uint64_t Imm);
+    virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
+                                      const TargetRegisterClass *RC,
+                                      unsigned Op0, bool Op0IsKill,
+                                      unsigned Op1, bool Op1IsKill,
+                                      uint64_t Imm);
+    virtual unsigned FastEmitInst_extractsubreg(MVT RetVT,
+                                                unsigned Op0, bool Op0IsKill,
+                                                uint32_t Idx);
+
+    // Backend specific FastISel code.
+    virtual bool TargetSelectInstruction(const Instruction *I);
+    virtual unsigned TargetMaterializeConstant(const Constant *C);
+    virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
+
+  #include "ARMGenFastISel.inc"
+
+    // Instruction selection routines.
+  private:
+    bool SelectLoad(const Instruction *I);
+    bool SelectStore(const Instruction *I);
+    bool SelectBranch(const Instruction *I);
+    bool SelectCmp(const Instruction *I);
+    bool SelectFPExt(const Instruction *I);
+    bool SelectFPTrunc(const Instruction *I);
+    bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode);
+    bool SelectSIToFP(const Instruction *I);
+    bool SelectFPToSI(const Instruction *I);
+    bool SelectSDiv(const Instruction *I);
+    bool SelectSRem(const Instruction *I);
+    bool SelectCall(const Instruction *I);
+    bool SelectSelect(const Instruction *I);
+    bool SelectRet(const Instruction *I);
+
+    // Utility routines.
+  private:
+    bool isTypeLegal(const Type *Ty, MVT &VT);
+    bool isLoadTypeLegal(const Type *Ty, MVT &VT);
+    bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr);
+    bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr);
+    bool ARMComputeAddress(const Value *Obj, Address &Addr);
+    void ARMSimplifyAddress(Address &Addr, EVT VT);
+    unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT);
+    unsigned ARMMaterializeInt(const Constant *C, EVT VT);
+    unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT);
+    unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg);
+    unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg);
+    unsigned ARMSelectCallOp(const GlobalValue *GV);
+
+    // Call handling routines.
+  private:
+    bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
+                        unsigned &ResultReg);
+    CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return);
+    bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+                         SmallVectorImpl<unsigned> &ArgRegs,
+                         SmallVectorImpl<MVT> &ArgVTs,
+                         SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+                         SmallVectorImpl<unsigned> &RegArgs,
+                         CallingConv::ID CC,
+                         unsigned &NumBytes);
+    bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+                    const Instruction *I, CallingConv::ID CC,
+                    unsigned &NumBytes);
+    bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
+
+    // OptionalDef handling routines.
+  private:
+    bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
+    const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
+    void AddLoadStoreOperands(EVT VT, Address &Addr,
+                              const MachineInstrBuilder &MIB);
+};
+
+} // end anonymous namespace
+
+#include "ARMGenCallingConv.inc"
+
+// DefinesOptionalPredicate - This is different from DefinesPredicate in that
+// we don't care about implicit defs here, just places we'll need to add a
+// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
+bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.hasOptionalDef())
+    return false;
+
+  // Look to see if our OptionalDef is defining CPSR or CCR.
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || !MO.isDef()) continue;
+    if (MO.getReg() == ARM::CPSR)
+      *CPSR = true;
+  }
+  return true;
+}
+
+// If the machine is predicable go ahead and add the predicate operands, if
+// it needs default CC operands add those.
+// TODO: If we want to support thumb1 then we'll need to deal with optional
+// CPSR defs that need to be added before the remaining operands. See s_cc_out
+// for descriptions why.
+const MachineInstrBuilder &
+ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
+  MachineInstr *MI = &*MIB;
+
+  // Do we use a predicate?
+  if (TII.isPredicable(MI))
+    AddDefaultPred(MIB);
+
+  // Do we optionally set a predicate?  Preds is size > 0 iff the predicate
+  // defines CPSR. All other OptionalDefines in ARM are the CCR register.
+  bool CPSR = false;
+  if (DefinesOptionalPredicate(MI, &CPSR)) {
+    if (CPSR)
+      AddDefaultT1CC(MIB);
+    else
+      AddDefaultCC(MIB);
+  }
+  return MIB;
+}
+
+unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
+                                    const TargetRegisterClass* RC) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
+  return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+                                     const TargetRegisterClass *RC,
+                                     unsigned Op0, bool Op0IsKill) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+                   .addReg(Op0, Op0IsKill * RegState::Kill));
+  else {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+                   .addReg(Op0, Op0IsKill * RegState::Kill));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                   TII.get(TargetOpcode::COPY), ResultReg)
+                   .addReg(II.ImplicitDefs[0]));
+  }
+  return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+                                      const TargetRegisterClass *RC,
+                                      unsigned Op0, bool Op0IsKill,
+                                      unsigned Op1, bool Op1IsKill) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addReg(Op1, Op1IsKill * RegState::Kill));
+  else {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addReg(Op1, Op1IsKill * RegState::Kill));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                           TII.get(TargetOpcode::COPY), ResultReg)
+                   .addReg(II.ImplicitDefs[0]));
+  }
+  return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+                                      const TargetRegisterClass *RC,
+                                      unsigned Op0, bool Op0IsKill,
+                                      uint64_t Imm) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addImm(Imm));
+  else {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addImm(Imm));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                           TII.get(TargetOpcode::COPY), ResultReg)
+                   .addReg(II.ImplicitDefs[0]));
+  }
+  return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
+                                      const TargetRegisterClass *RC,
+                                      unsigned Op0, bool Op0IsKill,
+                                      const ConstantFP *FPImm) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addFPImm(FPImm));
+  else {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addFPImm(FPImm));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                           TII.get(TargetOpcode::COPY), ResultReg)
+                   .addReg(II.ImplicitDefs[0]));
+  }
+  return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
+                                       const TargetRegisterClass *RC,
+                                       unsigned Op0, bool Op0IsKill,
+                                       unsigned Op1, bool Op1IsKill,
+                                       uint64_t Imm) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addReg(Op1, Op1IsKill * RegState::Kill)
+                   .addImm(Imm));
+  else {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+                   .addReg(Op0, Op0IsKill * RegState::Kill)
+                   .addReg(Op1, Op1IsKill * RegState::Kill)
+                   .addImm(Imm));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                           TII.get(TargetOpcode::COPY), ResultReg)
+                   .addReg(II.ImplicitDefs[0]));
+  }
+  return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
+                                     const TargetRegisterClass *RC,
+                                     uint64_t Imm) {
+  unsigned ResultReg = createResultReg(RC);
+  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+
+  if (II.getNumDefs() >= 1)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+                   .addImm(Imm));
+  else {
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+                   .addImm(Imm));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                           TII.get(TargetOpcode::COPY), ResultReg)
+                   .addReg(II.ImplicitDefs[0]));
+  }
+  return ResultReg;
+}
+
+unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT,
+                                                 unsigned Op0, bool Op0IsKill,
+                                                 uint32_t Idx) {
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+  assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+         "Cannot yet extract from physregs");
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+                         DL, TII.get(TargetOpcode::COPY), ResultReg)
+                 .addReg(Op0, getKillRegState(Op0IsKill), Idx));
+  return ResultReg;
+}
+
+// TODO: Don't worry about 64-bit now, but when this is fixed remove the
+// checks from the various callers.
+unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) {
+  if (VT == MVT::f64) return 0;
+
+  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(ARM::VMOVRS), MoveReg)
+                  .addReg(SrcReg));
+  return MoveReg;
+}
+
+unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) {
+  if (VT == MVT::i64) return 0;
+
+  unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(ARM::VMOVSR), MoveReg)
+                  .addReg(SrcReg));
+  return MoveReg;
+}
+
+// For double width floating point we need to materialize two constants
+// (the high and the low) into integer registers then use a move to get
+// the combined constant into an FP reg.
+unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) {
+  const APFloat Val = CFP->getValueAPF();
+  bool is64bit = VT == MVT::f64;
+
+  // This checks to see if we can use VFP3 instructions to materialize
+  // a constant, otherwise we have to go through the constant pool.
+  if (TLI.isFPImmLegal(Val, VT)) {
+    unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS;
+    unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+                            DestReg)
+                    .addFPImm(CFP));
+    return DestReg;
+  }
+
+  // Require VFP2 for loading fp constants.
+  if (!Subtarget->hasVFP2()) return false;
+
+  // MachineConstantPool wants an explicit alignment.
+  unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
+  if (Align == 0) {
+    // TODO: Figure out if this is correct.
+    Align = TD.getTypeAllocSize(CFP->getType());
+  }
+  unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+  unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
+
+  // The extra reg is for addrmode5.
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+                          DestReg)
+                  .addConstantPoolIndex(Idx)
+                  .addReg(0));
+  return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) {
+
+  // For now 32-bit only.
+  if (VT != MVT::i32) return false;
+
+  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+
+  // If we can do this in a single instruction without a constant pool entry
+  // do so now.
+  const ConstantInt *CI = cast<ConstantInt>(C);
+  if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) {
+    unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16;
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                            TII.get(Opc), DestReg)
+                    .addImm(CI->getSExtValue()));
+    return DestReg;
+  }
+
+  // MachineConstantPool wants an explicit alignment.
+  unsigned Align = TD.getPrefTypeAlignment(C->getType());
+  if (Align == 0) {
+    // TODO: Figure out if this is correct.
+    Align = TD.getTypeAllocSize(C->getType());
+  }
+  unsigned Idx = MCP.getConstantPoolIndex(C, Align);
+
+  if (isThumb)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                            TII.get(ARM::t2LDRpci), DestReg)
+                    .addConstantPoolIndex(Idx));
+  else
+    // The extra immediate is for addrmode2.
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                            TII.get(ARM::LDRcp), DestReg)
+                    .addConstantPoolIndex(Idx)
+                    .addImm(0));
+
+  return DestReg;
+}
+
+unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) {
+  // For now 32-bit only.
+  if (VT != MVT::i32) return 0;
+
+  Reloc::Model RelocM = TM.getRelocationModel();
+
+  // TODO: No external globals for now.
+  if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) return 0;
+
+  // TODO: Need more magic for ARM PIC.
+  if (!isThumb && (RelocM == Reloc::PIC_)) return 0;
+
+  // MachineConstantPool wants an explicit alignment.
+  unsigned Align = TD.getPrefTypeAlignment(GV->getType());
+  if (Align == 0) {
+    // TODO: Figure out if this is correct.
+    Align = TD.getTypeAllocSize(GV->getType());
+  }
+
+  // Grab index.
+  unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8);
+  unsigned Id = AFI->createPICLabelUId();
+  ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id,
+                                                       ARMCP::CPValue, PCAdj);
+  unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
+
+  // Load value.
+  MachineInstrBuilder MIB;
+  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+  if (isThumb) {
+    unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
+    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+          .addConstantPoolIndex(Idx);
+    if (RelocM == Reloc::PIC_)
+      MIB.addImm(Id);
+  } else {
+    // The extra immediate is for addrmode2.
+    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp),
+                  DestReg)
+          .addConstantPoolIndex(Idx)
+          .addImm(0);
+  }
+  AddOptionalDefs(MIB);
+  return DestReg;
+}
+
+unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
+  EVT VT = TLI.getValueType(C->getType(), true);
+
+  // Only handle simple types.
+  if (!VT.isSimple()) return 0;
+
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+    return ARMMaterializeFP(CFP, VT);
+  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+    return ARMMaterializeGV(GV, VT);
+  else if (isa<ConstantInt>(C))
+    return ARMMaterializeInt(C, VT);
+
+  return 0;
+}
+
+unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+  // Don't handle dynamic allocas.
+  if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
+
+  MVT VT;
+  if (!isLoadTypeLegal(AI->getType(), VT)) return false;
+
+  DenseMap<const AllocaInst*, int>::iterator SI =
+    FuncInfo.StaticAllocaMap.find(AI);
+
+  // This will get lowered later into the correct offsets and registers
+  // via rewriteXFrameIndex.
+  if (SI != FuncInfo.StaticAllocaMap.end()) {
+    TargetRegisterClass* RC = TLI.getRegClassFor(VT);
+    unsigned ResultReg = createResultReg(RC);
+    unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+                            TII.get(Opc), ResultReg)
+                            .addFrameIndex(SI->second)
+                            .addImm(0));
+    return ResultReg;
+  }
+
+  return 0;
+}
+
+bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) {
+  EVT evt = TLI.getValueType(Ty, true);
+
+  // Only handle simple types.
+  if (evt == MVT::Other || !evt.isSimple()) return false;
+  VT = evt.getSimpleVT();
+
+  // Handle all legal types, i.e. a register that will directly hold this
+  // value.
+  return TLI.isTypeLegal(VT);
+}
+
+bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) {
+  if (isTypeLegal(Ty, VT)) return true;
+
+  // If this is a type than can be sign or zero-extended to a basic operation
+  // go ahead and accept it now.
+  if (VT == MVT::i8 || VT == MVT::i16)
+    return true;
+
+  return false;
+}
+
+// Computes the address to get to an object.
+bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
+  // Some boilerplate from the X86 FastISel.
+  const User *U = NULL;
+  unsigned Opcode = Instruction::UserOp1;
+  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+    // Don't walk into other basic blocks unless the object is an alloca from
+    // another block, otherwise it may not have a virtual register assigned.
+    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
+        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+      Opcode = I->getOpcode();
+      U = I;
+    }
+  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+    Opcode = C->getOpcode();
+    U = C;
+  }
+
+  if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
+    if (Ty->getAddressSpace() > 255)
+      // Fast instruction selection doesn't support the special
+      // address spaces.
+      return false;
+
+  switch (Opcode) {
+    default:
+    break;
+    case Instruction::BitCast: {
+      // Look through bitcasts.
+      return ARMComputeAddress(U->getOperand(0), Addr);
+    }
+    case Instruction::IntToPtr: {
+      // Look past no-op inttoptrs.
+      if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+        return ARMComputeAddress(U->getOperand(0), Addr);
+      break;
+    }
+    case Instruction::PtrToInt: {
+      // Look past no-op ptrtoints.
+      if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+        return ARMComputeAddress(U->getOperand(0), Addr);
+      break;
+    }
+    case Instruction::GetElementPtr: {
+      Address SavedAddr = Addr;
+      int TmpOffset = Addr.Offset;
+
+      // Iterate through the GEP folding the constants into offsets where
+      // we can.
+      gep_type_iterator GTI = gep_type_begin(U);
+      for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+           i != e; ++i, ++GTI) {
+        const Value *Op = *i;
+        if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+          const StructLayout *SL = TD.getStructLayout(STy);
+          unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+          TmpOffset += SL->getElementOffset(Idx);
+        } else {
+          uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+          for (;;) {
+            if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+              // Constant-offset addressing.
+              TmpOffset += CI->getSExtValue() * S;
+              break;
+            }
+            if (isa<AddOperator>(Op) &&
+                (!isa<Instruction>(Op) ||
+                 FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+                 == FuncInfo.MBB) &&
+                isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+              // An add (in the same block) with a constant operand. Fold the 
+              // constant.
+              ConstantInt *CI =
+              cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+              TmpOffset += CI->getSExtValue() * S;
+              // Iterate on the other operand.
+              Op = cast<AddOperator>(Op)->getOperand(0);
+              continue;
+            } 
+            // Unsupported
+            goto unsupported_gep;
+          }
+        }
+      }
+
+      // Try to grab the base operand now.
+      Addr.Offset = TmpOffset;
+      if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
+
+      // We failed, restore everything and try the other options.
+      Addr = SavedAddr;
+
+      unsupported_gep:
+      break;
+    }
+    case Instruction::Alloca: {
+      const AllocaInst *AI = cast<AllocaInst>(Obj);
+      DenseMap<const AllocaInst*, int>::iterator SI =
+        FuncInfo.StaticAllocaMap.find(AI);
+      if (SI != FuncInfo.StaticAllocaMap.end()) {
+        Addr.BaseType = Address::FrameIndexBase;
+        Addr.Base.FI = SI->second;
+        return true;
+      }
+      break;
+    }
+  }
+
+  // Materialize the global variable's address into a reg which can
+  // then be used later to load the variable.
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) {
+    unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType()));
+    if (Tmp == 0) return false;
+
+    Addr.Base.Reg = Tmp;
+    return true;
+  }
+
+  // Try to get this in a register if nothing else has worked.
+  if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
+  return Addr.Base.Reg != 0;
+}
+
+void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) {
+
+  assert(VT.isSimple() && "Non-simple types are invalid here!");
+
+  bool needsLowering = false;
+  switch (VT.getSimpleVT().SimpleTy) {
+    default:
+      assert(false && "Unhandled load/store type!");
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+      // Integer loads/stores handle 12-bit offsets.
+      needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
+      break;
+    case MVT::f32:
+    case MVT::f64:
+      // Floating point operands handle 8-bit offsets.
+      needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
+      break;
+  }
+
+  // If this is a stack pointer and the offset needs to be simplified then
+  // put the alloca address into a register, set the base type back to
+  // register and continue. This should almost never happen.
+  if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
+    TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
+                              ARM::GPRRegisterClass;
+    unsigned ResultReg = createResultReg(RC);
+    unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL,
+                            TII.get(Opc), ResultReg)
+                            .addFrameIndex(Addr.Base.FI)
+                            .addImm(0));
+    Addr.Base.Reg = ResultReg;
+    Addr.BaseType = Address::RegBase;
+  }
+
+  // Since the offset is too large for the load/store instruction
+  // get the reg+offset into a register.
+  if (needsLowering) {
+    ARMCC::CondCodes Pred = ARMCC::AL;
+    unsigned PredReg = 0;
+
+    TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass :
+      ARM::GPRRegisterClass;
+    unsigned BaseReg = createResultReg(RC);
+
+    if (!isThumb)
+      emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                              BaseReg, Addr.Base.Reg, Addr.Offset,
+                              Pred, PredReg,
+                              static_cast<const ARMBaseInstrInfo&>(TII));
+    else {
+      assert(AFI->isThumb2Function());
+      emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                             BaseReg, Addr.Base.Reg, Addr.Offset, Pred, PredReg,
+                             static_cast<const ARMBaseInstrInfo&>(TII));
+    }
+    Addr.Offset = 0;
+    Addr.Base.Reg = BaseReg;
+  }
+}
+
+void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr,
+                                       const MachineInstrBuilder &MIB) {
+  // addrmode5 output depends on the selection dag addressing dividing the
+  // offset by 4 that it then later multiplies. Do this here as well.
+  if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
+      VT.getSimpleVT().SimpleTy == MVT::f64)
+    Addr.Offset /= 4;
+    
+  // Frame base works a bit differently. Handle it separately.
+  if (Addr.BaseType == Address::FrameIndexBase) {
+    int FI = Addr.Base.FI;
+    int Offset = Addr.Offset;
+    MachineMemOperand *MMO =
+          FuncInfo.MF->getMachineMemOperand(
+                                  MachinePointerInfo::getFixedStack(FI, Offset),
+                                  MachineMemOperand::MOLoad,
+                                  MFI.getObjectSize(FI),
+                                  MFI.getObjectAlignment(FI));
+    // Now add the rest of the operands.
+    MIB.addFrameIndex(FI);
+
+    // ARM halfword load/stores need an additional operand.
+    if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
+
+    MIB.addImm(Addr.Offset);
+    MIB.addMemOperand(MMO);
+  } else {
+    // Now add the rest of the operands.
+    MIB.addReg(Addr.Base.Reg);
+  
+    // ARM halfword load/stores need an additional operand.
+    if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0);
+
+    MIB.addImm(Addr.Offset);
+  }
+  AddOptionalDefs(MIB);
+}
+
+bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) {
+
+  assert(VT.isSimple() && "Non-simple types are invalid here!");
+  unsigned Opc;
+  TargetRegisterClass *RC;
+  switch (VT.getSimpleVT().SimpleTy) {
+    // This is mostly going to be Neon/vector support.
+    default: return false;
+    case MVT::i16:
+      Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH;
+      RC = ARM::GPRRegisterClass;
+      break;
+    case MVT::i8:
+      Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12;
+      RC = ARM::GPRRegisterClass;
+      break;
+    case MVT::i32:
+      Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12;
+      RC = ARM::GPRRegisterClass;
+      break;
+    case MVT::f32:
+      Opc = ARM::VLDRS;
+      RC = TLI.getRegClassFor(VT);
+      break;
+    case MVT::f64:
+      Opc = ARM::VLDRD;
+      RC = TLI.getRegClassFor(VT);
+      break;
+  }
+  // Simplify this down to something we can handle.
+  ARMSimplifyAddress(Addr, VT);
+
+  // Create the base instruction, then add the operands.
+  ResultReg = createResultReg(RC);
+  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                    TII.get(Opc), ResultReg);
+  AddLoadStoreOperands(VT, Addr, MIB);
+  return true;
+}
+
+bool ARMFastISel::SelectLoad(const Instruction *I) {
+  // Verify we have a legal type before going any further.
+  MVT VT;
+  if (!isLoadTypeLegal(I->getType(), VT))
+    return false;
+
+  // See if we can handle this address.
+  Address Addr;
+  if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
+
+  unsigned ResultReg;
+  if (!ARMEmitLoad(VT, ResultReg, Addr)) return false;
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) {
+  unsigned StrOpc;
+  switch (VT.getSimpleVT().SimpleTy) {
+    // This is mostly going to be Neon/vector support.
+    default: return false;
+    case MVT::i1: {
+      unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass :
+                                               ARM::GPRRegisterClass);
+      unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri;
+      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                              TII.get(Opc), Res)
+                      .addReg(SrcReg).addImm(1));
+      SrcReg = Res;
+    } // Fallthrough here.
+    case MVT::i8:
+      StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12;
+      break;
+    case MVT::i16:
+      StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH;
+      break;
+    case MVT::i32:
+      StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12;
+      break;
+    case MVT::f32:
+      if (!Subtarget->hasVFP2()) return false;
+      StrOpc = ARM::VSTRS;
+      break;
+    case MVT::f64:
+      if (!Subtarget->hasVFP2()) return false;
+      StrOpc = ARM::VSTRD;
+      break;
+  }
+  // Simplify this down to something we can handle.
+  ARMSimplifyAddress(Addr, VT);
+
+  // Create the base instruction, then add the operands.
+  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                    TII.get(StrOpc))
+                            .addReg(SrcReg, getKillRegState(true));
+  AddLoadStoreOperands(VT, Addr, MIB);
+  return true;
+}
+
+bool ARMFastISel::SelectStore(const Instruction *I) {
+  Value *Op0 = I->getOperand(0);
+  unsigned SrcReg = 0;
+
+  // Verify we have a legal type before going any further.
+  MVT VT;
+  if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
+    return false;
+
+  // Get the value to be stored into a register.
+  SrcReg = getRegForValue(Op0);
+  if (SrcReg == 0) return false;
+
+  // See if we can handle this address.
+  Address Addr;
+  if (!ARMComputeAddress(I->getOperand(1), Addr))
+    return false;
+
+  if (!ARMEmitStore(VT, SrcReg, Addr)) return false;
+  return true;
+}
+
+static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
+  switch (Pred) {
+    // Needs two compares...
+    case CmpInst::FCMP_ONE:
+    case CmpInst::FCMP_UEQ:
+    default:
+      // AL is our "false" for now. The other two need more compares.
+      return ARMCC::AL;
+    case CmpInst::ICMP_EQ:
+    case CmpInst::FCMP_OEQ:
+      return ARMCC::EQ;
+    case CmpInst::ICMP_SGT:
+    case CmpInst::FCMP_OGT:
+      return ARMCC::GT;
+    case CmpInst::ICMP_SGE:
+    case CmpInst::FCMP_OGE:
+      return ARMCC::GE;
+    case CmpInst::ICMP_UGT:
+    case CmpInst::FCMP_UGT:
+      return ARMCC::HI;
+    case CmpInst::FCMP_OLT:
+      return ARMCC::MI;
+    case CmpInst::ICMP_ULE:
+    case CmpInst::FCMP_OLE:
+      return ARMCC::LS;
+    case CmpInst::FCMP_ORD:
+      return ARMCC::VC;
+    case CmpInst::FCMP_UNO:
+      return ARMCC::VS;
+    case CmpInst::FCMP_UGE:
+      return ARMCC::PL;
+    case CmpInst::ICMP_SLT:
+    case CmpInst::FCMP_ULT:
+      return ARMCC::LT;
+    case CmpInst::ICMP_SLE:
+    case CmpInst::FCMP_ULE:
+      return ARMCC::LE;
+    case CmpInst::FCMP_UNE:
+    case CmpInst::ICMP_NE:
+      return ARMCC::NE;
+    case CmpInst::ICMP_UGE:
+      return ARMCC::HS;
+    case CmpInst::ICMP_ULT:
+      return ARMCC::LO;
+  }
+}
+
+bool ARMFastISel::SelectBranch(const Instruction *I) {
+  const BranchInst *BI = cast<BranchInst>(I);
+  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+  // Simple branch support.
+
+  // If we can, avoid recomputing the compare - redoing it could lead to wonky
+  // behavior.
+  // TODO: Factor this out.
+  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+    if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
+      MVT VT;
+      const Type *Ty = CI->getOperand(0)->getType();
+      if (!isTypeLegal(Ty, VT))
+        return false;
+
+      bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+      if (isFloat && !Subtarget->hasVFP2())
+        return false;
+
+      unsigned CmpOpc;
+      switch (VT.SimpleTy) {
+        default: return false;
+        // TODO: Verify compares.
+        case MVT::f32:
+          CmpOpc = ARM::VCMPES;
+          break;
+        case MVT::f64:
+          CmpOpc = ARM::VCMPED;
+          break;
+        case MVT::i32:
+          CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+          break;
+      }
+
+      // Get the compare predicate.
+      ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+      // We may not handle every CC for now.
+      if (ARMPred == ARMCC::AL) return false;
+
+      unsigned Arg1 = getRegForValue(CI->getOperand(0));
+      if (Arg1 == 0) return false;
+
+      unsigned Arg2 = getRegForValue(CI->getOperand(1));
+      if (Arg2 == 0) return false;
+
+      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                              TII.get(CmpOpc))
+                      .addReg(Arg1).addReg(Arg2));
+
+      // For floating point we need to move the result to a comparison register
+      // that we can then use for branches.
+      if (isFloat)
+        AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                TII.get(ARM::FMSTAT)));
+
+      unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+      .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
+      FastEmitBranch(FBB, DL);
+      FuncInfo.MBB->addSuccessor(TBB);
+      return true;
+    }
+  }
+
+  unsigned CmpReg = getRegForValue(BI->getCondition());
+  if (CmpReg == 0) return false;
+
+  // Re-set the flags just in case.
+  unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri;
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+                  .addReg(CmpReg).addImm(0));
+
+  unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc;
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc))
+                  .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+  FastEmitBranch(FBB, DL);
+  FuncInfo.MBB->addSuccessor(TBB);
+  return true;
+}
+
+bool ARMFastISel::SelectCmp(const Instruction *I) {
+  const CmpInst *CI = cast<CmpInst>(I);
+
+  MVT VT;
+  const Type *Ty = CI->getOperand(0)->getType();
+  if (!isTypeLegal(Ty, VT))
+    return false;
+
+  bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+  if (isFloat && !Subtarget->hasVFP2())
+    return false;
+
+  unsigned CmpOpc;
+  unsigned CondReg;
+  switch (VT.SimpleTy) {
+    default: return false;
+    // TODO: Verify compares.
+    case MVT::f32:
+      CmpOpc = ARM::VCMPES;
+      CondReg = ARM::FPSCR;
+      break;
+    case MVT::f64:
+      CmpOpc = ARM::VCMPED;
+      CondReg = ARM::FPSCR;
+      break;
+    case MVT::i32:
+      CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr;
+      CondReg = ARM::CPSR;
+      break;
+  }
+
+  // Get the compare predicate.
+  ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
+
+  // We may not handle every CC for now.
+  if (ARMPred == ARMCC::AL) return false;
+
+  unsigned Arg1 = getRegForValue(CI->getOperand(0));
+  if (Arg1 == 0) return false;
+
+  unsigned Arg2 = getRegForValue(CI->getOperand(1));
+  if (Arg2 == 0) return false;
+
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+                  .addReg(Arg1).addReg(Arg2));
+
+  // For floating point we need to move the result to a comparison register
+  // that we can then use for branches.
+  if (isFloat)
+    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                            TII.get(ARM::FMSTAT)));
+
+  // Now set a register based on the comparison. Explicitly set the predicates
+  // here.
+  unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi;
+  TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass
+                                    : ARM::GPRRegisterClass;
+  unsigned DestReg = createResultReg(RC);
+  Constant *Zero
+    = ConstantInt::get(Type::getInt32Ty(*Context), 0);
+  unsigned ZeroReg = TargetMaterializeConstant(Zero);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg)
+          .addReg(ZeroReg).addImm(1)
+          .addImm(ARMPred).addReg(CondReg);
+
+  UpdateValueMap(I, DestReg);
+  return true;
+}
+
+bool ARMFastISel::SelectFPExt(const Instruction *I) {
+  // Make sure we have VFP and that we're extending float to double.
+  if (!Subtarget->hasVFP2()) return false;
+
+  Value *V = I->getOperand(0);
+  if (!I->getType()->isDoubleTy() ||
+      !V->getType()->isFloatTy()) return false;
+
+  unsigned Op = getRegForValue(V);
+  if (Op == 0) return false;
+
+  unsigned Result = createResultReg(ARM::DPRRegisterClass);
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(ARM::VCVTDS), Result)
+                  .addReg(Op));
+  UpdateValueMap(I, Result);
+  return true;
+}
+
+bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
+  // Make sure we have VFP and that we're truncating double to float.
+  if (!Subtarget->hasVFP2()) return false;
+
+  Value *V = I->getOperand(0);
+  if (!(I->getType()->isFloatTy() &&
+        V->getType()->isDoubleTy())) return false;
+
+  unsigned Op = getRegForValue(V);
+  if (Op == 0) return false;
+
+  unsigned Result = createResultReg(ARM::SPRRegisterClass);
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(ARM::VCVTSD), Result)
+                  .addReg(Op));
+  UpdateValueMap(I, Result);
+  return true;
+}
+
+bool ARMFastISel::SelectSIToFP(const Instruction *I) {
+  // Make sure we have VFP.
+  if (!Subtarget->hasVFP2()) return false;
+
+  MVT DstVT;
+  const Type *Ty = I->getType();
+  if (!isTypeLegal(Ty, DstVT))
+    return false;
+
+  unsigned Op = getRegForValue(I->getOperand(0));
+  if (Op == 0) return false;
+
+  // The conversion routine works on fp-reg to fp-reg and the operand above
+  // was an integer, move it to the fp registers if possible.
+  unsigned FP = ARMMoveToFPReg(MVT::f32, Op);
+  if (FP == 0) return false;
+
+  unsigned Opc;
+  if (Ty->isFloatTy()) Opc = ARM::VSITOS;
+  else if (Ty->isDoubleTy()) Opc = ARM::VSITOD;
+  else return 0;
+
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+                          ResultReg)
+                  .addReg(FP));
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool ARMFastISel::SelectFPToSI(const Instruction *I) {
+  // Make sure we have VFP.
+  if (!Subtarget->hasVFP2()) return false;
+
+  MVT DstVT;
+  const Type *RetTy = I->getType();
+  if (!isTypeLegal(RetTy, DstVT))
+    return false;
+
+  unsigned Op = getRegForValue(I->getOperand(0));
+  if (Op == 0) return false;
+
+  unsigned Opc;
+  const Type *OpTy = I->getOperand(0)->getType();
+  if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS;
+  else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD;
+  else return 0;
+
+  // f64->s32 or f32->s32 both need an intermediate f32 reg.
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc),
+                          ResultReg)
+                  .addReg(Op));
+
+  // This result needs to be in an integer register, but the conversion only
+  // takes place in fp-regs.
+  unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
+  if (IntReg == 0) return false;
+
+  UpdateValueMap(I, IntReg);
+  return true;
+}
+
+bool ARMFastISel::SelectSelect(const Instruction *I) {
+  MVT VT;
+  if (!isTypeLegal(I->getType(), VT))
+    return false;
+
+  // Things need to be register sized for register moves.
+  if (VT != MVT::i32) return false;
+  const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
+
+  unsigned CondReg = getRegForValue(I->getOperand(0));
+  if (CondReg == 0) return false;
+  unsigned Op1Reg = getRegForValue(I->getOperand(1));
+  if (Op1Reg == 0) return false;
+  unsigned Op2Reg = getRegForValue(I->getOperand(2));
+  if (Op2Reg == 0) return false;
+
+  unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri;
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc))
+                  .addReg(CondReg).addImm(1));
+  unsigned ResultReg = createResultReg(RC);
+  unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr;
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg)
+    .addReg(Op1Reg).addReg(Op2Reg)
+    .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool ARMFastISel::SelectSDiv(const Instruction *I) {
+  MVT VT;
+  const Type *Ty = I->getType();
+  if (!isTypeLegal(Ty, VT))
+    return false;
+
+  // If we have integer div support we should have selected this automagically.
+  // In case we have a real miss go ahead and return false and we'll pick
+  // it up later.
+  if (Subtarget->hasDivide()) return false;
+
+  // Otherwise emit a libcall.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i8)
+    LC = RTLIB::SDIV_I8;
+  else if (VT == MVT::i16)
+    LC = RTLIB::SDIV_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::SDIV_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::SDIV_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::SDIV_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
+
+  return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectSRem(const Instruction *I) {
+  MVT VT;
+  const Type *Ty = I->getType();
+  if (!isTypeLegal(Ty, VT))
+    return false;
+
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i8)
+    LC = RTLIB::SREM_I8;
+  else if (VT == MVT::i16)
+    LC = RTLIB::SREM_I16;
+  else if (VT == MVT::i32)
+    LC = RTLIB::SREM_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::SREM_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::SREM_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
+
+  return ARMEmitLibcall(I, LC);
+}
+
+bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) {
+  EVT VT  = TLI.getValueType(I->getType(), true);
+
+  // We can get here in the case when we want to use NEON for our fp
+  // operations, but can't figure out how to. Just use the vfp instructions
+  // if we have them.
+  // FIXME: It'd be nice to use NEON instructions.
+  const Type *Ty = I->getType();
+  bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
+  if (isFloat && !Subtarget->hasVFP2())
+    return false;
+
+  unsigned Op1 = getRegForValue(I->getOperand(0));
+  if (Op1 == 0) return false;
+
+  unsigned Op2 = getRegForValue(I->getOperand(1));
+  if (Op2 == 0) return false;
+
+  unsigned Opc;
+  bool is64bit = VT == MVT::f64 || VT == MVT::i64;
+  switch (ISDOpcode) {
+    default: return false;
+    case ISD::FADD:
+      Opc = is64bit ? ARM::VADDD : ARM::VADDS;
+      break;
+    case ISD::FSUB:
+      Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
+      break;
+    case ISD::FMUL:
+      Opc = is64bit ? ARM::VMULD : ARM::VMULS;
+      break;
+  }
+  unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(Opc), ResultReg)
+                  .addReg(Op1).addReg(Op2));
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+// Call Handling Code
+
+bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src,
+                                 EVT SrcVT, unsigned &ResultReg) {
+  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+                           Src, /*TODO: Kill=*/false);
+
+  if (RR != 0) {
+    ResultReg = RR;
+    return true;
+  } else
+    return false;
+}
+
+// This is largely taken directly from CCAssignFnForNode - we don't support
+// varargs in FastISel so that part has been removed.
+// TODO: We may not support all of this.
+CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) {
+  switch (CC) {
+  default:
+    llvm_unreachable("Unsupported calling convention");
+  case CallingConv::Fast:
+    // Ignore fastcc. Silence compiler warnings.
+    (void)RetFastCC_ARM_APCS;
+    (void)FastCC_ARM_APCS;
+    // Fallthrough
+  case CallingConv::C:
+    // Use target triple & subtarget features to do actual dispatch.
+    if (Subtarget->isAAPCS_ABI()) {
+      if (Subtarget->hasVFP2() &&
+          FloatABIType == FloatABI::Hard)
+        return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+      else
+        return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+    } else
+        return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+  case CallingConv::ARM_AAPCS_VFP:
+    return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+  case CallingConv::ARM_AAPCS:
+    return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+  case CallingConv::ARM_APCS:
+    return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+  }
+}
+
+bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
+                                  SmallVectorImpl<unsigned> &ArgRegs,
+                                  SmallVectorImpl<MVT> &ArgVTs,
+                                  SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+                                  SmallVectorImpl<unsigned> &RegArgs,
+                                  CallingConv::ID CC,
+                                  unsigned &NumBytes) {
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CC, false, TM, ArgLocs, *Context);
+  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false));
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  NumBytes = CCInfo.getNextStackOffset();
+
+  // Issue CALLSEQ_START
+  unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(AdjStackDown))
+                  .addImm(NumBytes));
+
+  // Process the args.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    unsigned Arg = ArgRegs[VA.getValNo()];
+    MVT ArgVT = ArgVTs[VA.getValNo()];
+
+    // We don't handle NEON/vector parameters yet.
+    if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
+      return false;
+
+    // Handle arg promotion, etc.
+    switch (VA.getLocInfo()) {
+      case CCValAssign::Full: break;
+      case CCValAssign::SExt: {
+        bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+                                         Arg, ArgVT, Arg);
+        assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
+        Emitted = true;
+        ArgVT = VA.getLocVT();
+        break;
+      }
+      case CCValAssign::ZExt: {
+        bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+                                         Arg, ArgVT, Arg);
+        assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
+        Emitted = true;
+        ArgVT = VA.getLocVT();
+        break;
+      }
+      case CCValAssign::AExt: {
+        bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
+                                         Arg, ArgVT, Arg);
+        if (!Emitted)
+          Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+                                      Arg, ArgVT, Arg);
+        if (!Emitted)
+          Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+                                      Arg, ArgVT, Arg);
+
+        assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
+        ArgVT = VA.getLocVT();
+        break;
+      }
+      case CCValAssign::BCvt: {
+        unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
+                                 /*TODO: Kill=*/false);
+        assert(BC != 0 && "Failed to emit a bitcast!");
+        Arg = BC;
+        ArgVT = VA.getLocVT();
+        break;
+      }
+      default: llvm_unreachable("Unknown arg promotion!");
+    }
+
+    // Now copy/store arg to correct locations.
+    if (VA.isRegLoc() && !VA.needsCustom()) {
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+              VA.getLocReg())
+      .addReg(Arg);
+      RegArgs.push_back(VA.getLocReg());
+    } else if (VA.needsCustom()) {
+      // TODO: We need custom lowering for vector (v2f64) args.
+      if (VA.getLocVT() != MVT::f64) return false;
+
+      CCValAssign &NextVA = ArgLocs[++i];
+
+      // TODO: Only handle register args for now.
+      if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false;
+
+      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                              TII.get(ARM::VMOVRRD), VA.getLocReg())
+                      .addReg(NextVA.getLocReg(), RegState::Define)
+                      .addReg(Arg));
+      RegArgs.push_back(VA.getLocReg());
+      RegArgs.push_back(NextVA.getLocReg());
+    } else {
+      assert(VA.isMemLoc());
+      // Need to store on the stack.
+      Address Addr;
+      Addr.BaseType = Address::RegBase;
+      Addr.Base.Reg = ARM::SP;
+      Addr.Offset = VA.getLocMemOffset();
+
+      if (!ARMEmitStore(ArgVT, Arg, Addr)) return false;
+    }
+  }
+  return true;
+}
+
+bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+                             const Instruction *I, CallingConv::ID CC,
+                             unsigned &NumBytes) {
+  // Issue CALLSEQ_END
+  unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(AdjStackUp))
+                  .addImm(NumBytes).addImm(0));
+
+  // Now the return value.
+  if (RetVT != MVT::isVoid) {
+    SmallVector<CCValAssign, 16> RVLocs;
+    CCState CCInfo(CC, false, TM, RVLocs, *Context);
+    CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true));
+
+    // Copy all of the result registers out of their specified physreg.
+    if (RVLocs.size() == 2 && RetVT == MVT::f64) {
+      // For this move we copy into two registers and then move into the
+      // double fp reg we want.
+      EVT DestVT = RVLocs[0].getValVT();
+      TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
+      unsigned ResultReg = createResultReg(DstRC);
+      AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                              TII.get(ARM::VMOVDRR), ResultReg)
+                      .addReg(RVLocs[0].getLocReg())
+                      .addReg(RVLocs[1].getLocReg()));
+
+      UsedRegs.push_back(RVLocs[0].getLocReg());
+      UsedRegs.push_back(RVLocs[1].getLocReg());
+
+      // Finally update the result.
+      UpdateValueMap(I, ResultReg);
+    } else {
+      assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
+      EVT CopyVT = RVLocs[0].getValVT();
+      TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+
+      unsigned ResultReg = createResultReg(DstRC);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+              ResultReg).addReg(RVLocs[0].getLocReg());
+      UsedRegs.push_back(RVLocs[0].getLocReg());
+
+      // Finally update the result.
+      UpdateValueMap(I, ResultReg);
+    }
+  }
+
+  return true;
+}
+
+bool ARMFastISel::SelectRet(const Instruction *I) {
+  const ReturnInst *Ret = cast<ReturnInst>(I);
+  const Function &F = *I->getParent()->getParent();
+
+  if (!FuncInfo.CanLowerReturn)
+    return false;
+
+  if (F.isVarArg())
+    return false;
+
+  CallingConv::ID CC = F.getCallingConv();
+  if (Ret->getNumOperands() > 0) {
+    SmallVector<ISD::OutputArg, 4> Outs;
+    GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+                  Outs, TLI);
+
+    // Analyze operands of the call, assigning locations to each operand.
+    SmallVector<CCValAssign, 16> ValLocs;
+    CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
+    CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */));
+
+    const Value *RV = Ret->getOperand(0);
+    unsigned Reg = getRegForValue(RV);
+    if (Reg == 0)
+      return false;
+
+    // Only handle a single return value for now.
+    if (ValLocs.size() != 1)
+      return false;
+
+    CCValAssign &VA = ValLocs[0];
+
+    // Don't bother handling odd stuff for now.
+    if (VA.getLocInfo() != CCValAssign::Full)
+      return false;
+    // Only handle register returns for now.
+    if (!VA.isRegLoc())
+      return false;
+    // TODO: For now, don't try to handle cases where getLocInfo()
+    // says Full but the types don't match.
+    if (TLI.getValueType(RV->getType()) != VA.getValVT())
+      return false;
+
+    // Make the copy.
+    unsigned SrcReg = Reg + VA.getValNo();
+    unsigned DstReg = VA.getLocReg();
+    const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+    // Avoid a cross-class copy. This is very unlikely.
+    if (!SrcRC->contains(DstReg))
+      return false;
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            DstReg).addReg(SrcReg);
+
+    // Mark the register as live out of the function.
+    MRI.addLiveOut(VA.getLocReg());
+  }
+
+  unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET;
+  AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                          TII.get(RetOpc)));
+  return true;
+}
+
+unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) {
+
+  // Depend our opcode for thumb on whether or not we're targeting an
+  // externally callable function. For libcalls we'll just pass a NULL GV
+  // in here.
+  bool isExternal = false;
+  if (!GV || GV->hasExternalLinkage()) isExternal = true;
+  
+  // Darwin needs the r9 versions of the opcodes.
+  bool isDarwin = Subtarget->isTargetDarwin();
+  if (isThumb && isExternal) {
+    return isDarwin ? ARM::tBLXi_r9 : ARM::tBLXi;
+  } else if (isThumb) {
+    return isDarwin ? ARM::tBLr9 : ARM::tBL;
+  } else  {
+    return isDarwin ? ARM::BLr9 : ARM::BL;
+  }
+}
+
+// A quick function that will emit a call for a named libcall in F with the
+// vector of passed arguments for the Instruction in I. We can assume that we
+// can emit a call for any libcall we can produce. This is an abridged version
+// of the full call infrastructure since we won't need to worry about things
+// like computed function pointers or strange arguments at call sites.
+// TODO: Try to unify this and the normal call bits for ARM, then try to unify
+// with X86.
+bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
+  CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
+
+  // Handle *simple* calls for now.
+  const Type *RetTy = I->getType();
+  MVT RetVT;
+  if (RetTy->isVoidTy())
+    RetVT = MVT::isVoid;
+  else if (!isTypeLegal(RetTy, RetVT))
+    return false;
+
+  // For now we're using BLX etc on the assumption that we have v5t ops.
+  if (!Subtarget->hasV5TOps()) return false;
+
+  // TODO: For now if we have long calls specified we don't handle the call.
+  if (EnableARMLongCalls) return false;
+
+  // Set up the argument vectors.
+  SmallVector<Value*, 8> Args;
+  SmallVector<unsigned, 8> ArgRegs;
+  SmallVector<MVT, 8> ArgVTs;
+  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+  Args.reserve(I->getNumOperands());
+  ArgRegs.reserve(I->getNumOperands());
+  ArgVTs.reserve(I->getNumOperands());
+  ArgFlags.reserve(I->getNumOperands());
+  for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+    Value *Op = I->getOperand(i);
+    unsigned Arg = getRegForValue(Op);
+    if (Arg == 0) return false;
+
+    const Type *ArgTy = Op->getType();
+    MVT ArgVT;
+    if (!isTypeLegal(ArgTy, ArgVT)) return false;
+
+    ISD::ArgFlagsTy Flags;
+    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+    Flags.setOrigAlign(OriginalAlignment);
+
+    Args.push_back(Op);
+    ArgRegs.push_back(Arg);
+    ArgVTs.push_back(ArgVT);
+    ArgFlags.push_back(Flags);
+  }
+
+  // Handle the arguments now that we've gotten them.
+  SmallVector<unsigned, 4> RegArgs;
+  unsigned NumBytes;
+  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+    return false;
+
+  // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
+  // TODO: Turn this into the table of arm call ops.
+  MachineInstrBuilder MIB;
+  unsigned CallOpc = ARMSelectCallOp(NULL);
+  if(isThumb)
+    // Explicitly adding the predicate here.
+    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                         TII.get(CallOpc)))
+                         .addExternalSymbol(TLI.getLibcallName(Call));
+  else
+    // Explicitly adding the predicate here.
+    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                         TII.get(CallOpc))
+          .addExternalSymbol(TLI.getLibcallName(Call)));
+
+  // Add implicit physical register uses to the call.
+  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+    MIB.addReg(RegArgs[i]);
+
+  // Finish off the call including any return values.
+  SmallVector<unsigned, 4> UsedRegs;
+  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+
+  // Set all unused physreg defs as dead.
+  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+  return true;
+}
+
+bool ARMFastISel::SelectCall(const Instruction *I) {
+  const CallInst *CI = cast<CallInst>(I);
+  const Value *Callee = CI->getCalledValue();
+
+  // Can't handle inline asm or worry about intrinsics yet.
+  if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false;
+
+  // Only handle global variable Callees that are direct calls.
+  const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+  if (!GV || Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()))
+    return false;
+
+  // Check the calling convention.
+  ImmutableCallSite CS(CI);
+  CallingConv::ID CC = CS.getCallingConv();
+
+  // TODO: Avoid some calling conventions?
+
+  // Let SDISel handle vararg functions.
+  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+  if (FTy->isVarArg())
+    return false;
+
+  // Handle *simple* calls for now.
+  const Type *RetTy = I->getType();
+  MVT RetVT;
+  if (RetTy->isVoidTy())
+    RetVT = MVT::isVoid;
+  else if (!isTypeLegal(RetTy, RetVT))
+    return false;
+
+  // For now we're using BLX etc on the assumption that we have v5t ops.
+  // TODO: Maybe?
+  if (!Subtarget->hasV5TOps()) return false;
+
+  // TODO: For now if we have long calls specified we don't handle the call.
+  if (EnableARMLongCalls) return false;
+  
+  // Set up the argument vectors.
+  SmallVector<Value*, 8> Args;
+  SmallVector<unsigned, 8> ArgRegs;
+  SmallVector<MVT, 8> ArgVTs;
+  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+  Args.reserve(CS.arg_size());
+  ArgRegs.reserve(CS.arg_size());
+  ArgVTs.reserve(CS.arg_size());
+  ArgFlags.reserve(CS.arg_size());
+  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+       i != e; ++i) {
+    unsigned Arg = getRegForValue(*i);
+
+    if (Arg == 0)
+      return false;
+    ISD::ArgFlagsTy Flags;
+    unsigned AttrInd = i - CS.arg_begin() + 1;
+    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+      Flags.setSExt();
+    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+      Flags.setZExt();
+
+         // FIXME: Only handle *easy* calls for now.
+    if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
+        CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+        CS.paramHasAttr(AttrInd, Attribute::Nest) ||
+        CS.paramHasAttr(AttrInd, Attribute::ByVal))
+      return false;
+
+    const Type *ArgTy = (*i)->getType();
+    MVT ArgVT;
+    if (!isTypeLegal(ArgTy, ArgVT))
+      return false;
+    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+    Flags.setOrigAlign(OriginalAlignment);
+
+    Args.push_back(*i);
+    ArgRegs.push_back(Arg);
+    ArgVTs.push_back(ArgVT);
+    ArgFlags.push_back(Flags);
+  }
+
+  // Handle the arguments now that we've gotten them.
+  SmallVector<unsigned, 4> RegArgs;
+  unsigned NumBytes;
+  if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes))
+    return false;
+
+  // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops.
+  // TODO: Turn this into the table of arm call ops.
+  MachineInstrBuilder MIB;
+  unsigned CallOpc = ARMSelectCallOp(GV);
+  // Explicitly adding the predicate here.
+  if(isThumb)
+    // Explicitly adding the predicate here.
+    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                         TII.get(CallOpc)))
+          .addGlobalAddress(GV, 0, 0);
+  else
+    // Explicitly adding the predicate here.
+    MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                         TII.get(CallOpc))
+          .addGlobalAddress(GV, 0, 0));
+  
+  // Add implicit physical register uses to the call.
+  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+    MIB.addReg(RegArgs[i]);
+
+  // Finish off the call including any return values.
+  SmallVector<unsigned, 4> UsedRegs;
+  if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false;
+
+  // Set all unused physreg defs as dead.
+  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+  return true;
+
+}
+
+// TODO: SoftFP support.
+bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
+
+  switch (I->getOpcode()) {
+    case Instruction::Load:
+      return SelectLoad(I);
+    case Instruction::Store:
+      return SelectStore(I);
+    case Instruction::Br:
+      return SelectBranch(I);
+    case Instruction::ICmp:
+    case Instruction::FCmp:
+      return SelectCmp(I);
+    case Instruction::FPExt:
+      return SelectFPExt(I);
+    case Instruction::FPTrunc:
+      return SelectFPTrunc(I);
+    case Instruction::SIToFP:
+      return SelectSIToFP(I);
+    case Instruction::FPToSI:
+      return SelectFPToSI(I);
+    case Instruction::FAdd:
+      return SelectBinaryOp(I, ISD::FADD);
+    case Instruction::FSub:
+      return SelectBinaryOp(I, ISD::FSUB);
+    case Instruction::FMul:
+      return SelectBinaryOp(I, ISD::FMUL);
+    case Instruction::SDiv:
+      return SelectSDiv(I);
+    case Instruction::SRem:
+      return SelectSRem(I);
+    case Instruction::Call:
+      return SelectCall(I);
+    case Instruction::Select:
+      return SelectSelect(I);
+    case Instruction::Ret:
+      return SelectRet(I);
+    default: break;
+  }
+  return false;
+}
+
+namespace llvm {
+  llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) {
+    // Completely untested on non-darwin.
+    const TargetMachine &TM = funcInfo.MF->getTarget();
+
+    // Darwin and thumb1 only for now.
+    const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
+    if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() &&
+        !DisableARMFastISel)
+      return new ARMFastISel(funcInfo);
+    return 0;
+  }
+}
diff --git a/final/lib/Target/ARM/ARMFixupKinds.h b/final/lib/Target/ARM/ARMFixupKinds.h
new file mode 100644
index 00000000000..3d175e38690
--- /dev/null
+++ b/final/lib/Target/ARM/ARMFixupKinds.h
@@ -0,0 +1,97 @@
+//===-- ARM/ARMFixupKinds.h - ARM Specific Fixup Entries --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ARM_ARMFIXUPKINDS_H
+#define LLVM_ARM_ARMFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace ARM {
+enum Fixups {
+  // fixup_arm_ldst_pcrel_12 - 12-bit PC relative relocation for symbol
+  // addresses
+  fixup_arm_ldst_pcrel_12 = FirstTargetFixupKind,
+
+  // fixup_t2_ldst_pcrel_12 - Equivalent to fixup_arm_ldst_pcrel_12, with
+  // the 16-bit halfwords reordered.
+  fixup_t2_ldst_pcrel_12,
+
+  // fixup_arm_pcrel_10 - 10-bit PC relative relocation for symbol addresses
+  // used in VFP instructions where the lower 2 bits are not encoded
+  // (so it's encoded as an 8-bit immediate).
+  fixup_arm_pcrel_10,
+  // fixup_t2_pcrel_10 - Equivalent to fixup_arm_pcrel_10, accounting for
+  // the short-swapped encoding of Thumb2 instructions.
+  fixup_t2_pcrel_10,
+  // fixup_thumb_adr_pcrel_10 - 10-bit PC relative relocation for symbol
+  // addresses where the lower 2 bits are not encoded (so it's encoded as an
+  // 8-bit immediate).
+  fixup_thumb_adr_pcrel_10,
+  // fixup_arm_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+  // instruction.
+  fixup_arm_adr_pcrel_12,
+  // fixup_t2_adr_pcrel_12 - 12-bit PC relative relocation for the ADR
+  // instruction.
+  fixup_t2_adr_pcrel_12,
+  // fixup_arm_condbranch - 24-bit PC relative relocation for conditional branch
+  // instructions. 
+  fixup_arm_condbranch,
+  // fixup_arm_uncondbranch - 24-bit PC relative relocation for 
+  // branch instructions. (unconditional)
+  fixup_arm_uncondbranch,
+  // fixup_t2_condbranch - 20-bit PC relative relocation for Thumb2 direct
+  // uconditional branch instructions.
+  fixup_t2_condbranch,
+  // fixup_t2_uncondbranch - 20-bit PC relative relocation for Thumb2 direct
+  // branch unconditional branch instructions.
+  fixup_t2_uncondbranch,
+
+  // fixup_arm_thumb_br - 12-bit fixup for Thumb B instructions.
+  fixup_arm_thumb_br,
+
+  // fixup_arm_thumb_blx - Fixup for Thumb BL instructions.
+  fixup_arm_thumb_bl,
+
+  // fixup_arm_thumb_blx - Fixup for Thumb BLX instructions.
+  fixup_arm_thumb_blx,
+
+  // fixup_arm_thumb_cb - Fixup for Thumb branch instructions.
+  fixup_arm_thumb_cb,
+
+  // fixup_arm_thumb_cp - Fixup for Thumb load/store from constant pool instrs.
+  fixup_arm_thumb_cp,
+
+  // fixup_arm_thumb_bcc - Fixup for Thumb conditional branching instructions.
+  fixup_arm_thumb_bcc,
+
+  // The next two are for the movt/movw pair
+  // the 16bit imm field are split into imm{15-12} and imm{11-0}
+  fixup_arm_movt_hi16, // :upper16:
+  fixup_arm_movw_lo16, // :lower16:
+  fixup_t2_movt_hi16, // :upper16:
+  fixup_t2_movw_lo16, // :lower16:
+
+  // It is possible to create an "immediate" that happens to be pcrel.
+  // movw r0, :lower16:Foo-(Bar+8) and movt  r0, :upper16:Foo-(Bar+8)
+  // result in different reloc tags than the above two.
+  // Needed to support ELF::R_ARM_MOVT_PREL and ELF::R_ARM_MOVW_PREL_NC
+  fixup_arm_movt_hi16_pcrel, // :upper16:
+  fixup_arm_movw_lo16_pcrel, // :lower16:
+  fixup_t2_movt_hi16_pcrel, // :upper16:
+  fixup_t2_movw_lo16_pcrel, // :lower16:
+
+  // Marker
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/final/lib/Target/ARM/ARMFrameLowering.cpp b/final/lib/Target/ARM/ARMFrameLowering.cpp
new file mode 100644
index 00000000000..4ade29c2d15
--- /dev/null
+++ b/final/lib/Target/ARM/ARMFrameLowering.cpp
@@ -0,0 +1,1041 @@
+//=======- ARMFrameLowering.cpp - ARM Frame Information --------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMFrameLowering.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register.  This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+
+  // Mac OS X requires FP not to be clobbered for backtracing purpose.
+  if (STI.isTargetDarwin())
+    return true;
+
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  // Always eliminate non-leaf frame pointers.
+  return ((DisableFramePointerElim(MF) && MFI->hasCalls()) ||
+          RegInfo->needsStackRealignment(MF) ||
+          MFI->hasVarSizedObjects() ||
+          MFI->isFrameAddressTaken());
+}
+
+/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
+/// not required, we reserve argument space for call sites in the function
+/// immediately on entry to the current function.  This eliminates the need for
+/// add/sub sp brackets around call sites.  Returns true if the call frame is
+/// included as part of the stack frame.
+bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  unsigned CFSize = FFI->getMaxCallFrameSize();
+  // It's not always a good idea to include the call frame as part of the
+  // stack frame. ARM (especially Thumb) has small immediate offset to
+  // address the stack frame. So a large call frame can cause poor codegen
+  // and may even makes it impossible to scavenge a register.
+  if (CFSize >= ((1 << 12) - 1) / 2)  // Half of imm12
+    return false;
+
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
+/// call frame pseudos can be simplified.  Unlike most targets, having a FP
+/// is not sufficient here since we still may reference some objects via SP
+/// even when FP is available in Thumb2 mode.
+bool
+ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
+  return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    if (Reg == CSRegs[i])
+      return true;
+  return false;
+}
+
+static bool isCSRestore(MachineInstr *MI,
+                        const ARMBaseInstrInfo &TII,
+                        const unsigned *CSRegs) {
+  // Integer spill area is handled with "pop".
+  if (MI->getOpcode() == ARM::LDMIA_RET ||
+      MI->getOpcode() == ARM::t2LDMIA_RET ||
+      MI->getOpcode() == ARM::LDMIA_UPD ||
+      MI->getOpcode() == ARM::t2LDMIA_UPD ||
+      MI->getOpcode() == ARM::VLDMDIA_UPD) {
+    // The first two operands are predicates. The last two are
+    // imp-def and imp-use of SP. Check everything in between.
+    for (int i = 5, e = MI->getNumOperands(); i != e; ++i)
+      if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+        return false;
+    return true;
+  }
+  if ((MI->getOpcode() == ARM::LDR_POST ||
+       MI->getOpcode() == ARM::t2LDR_POST) &&
+      isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) &&
+      MI->getOperand(1).getReg() == ARM::SP)
+    return true;
+
+  return false;
+}
+
+static void
+emitSPUpdate(bool isARM,
+             MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+             DebugLoc dl, const ARMBaseInstrInfo &TII,
+             int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) {
+  if (isARM)
+    emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+                            ARMCC::AL, 0, TII, MIFlags);
+  else
+    emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes,
+                           ARMCC::AL, 0, TII, MIFlags);
+}
+
+void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo  *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const ARMBaseRegisterInfo *RegInfo =
+    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const ARMBaseInstrInfo &TII =
+    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+  assert(!AFI->isThumb1OnlyFunction() &&
+         "This emitPrologue does not support Thumb1!");
+  bool isARM = !AFI->isThumbFunction();
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned NumBytes = MFI->getStackSize();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+  // Determine the sizes of each callee-save spill areas and record which frame
+  // belongs to which callee-save spill areas.
+  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+  int FramePtrSpillFI = 0;
+
+  // Allocate the vararg register save area. This is not counted in NumBytes.
+  if (VARegSaveSize)
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize,
+                 MachineInstr::FrameSetup);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+                   MachineInstr::FrameSetup);
+    return;
+  }
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    int FI = CSI[i].getFrameIdx();
+    switch (Reg) {
+    case ARM::R4:
+    case ARM::R5:
+    case ARM::R6:
+    case ARM::R7:
+    case ARM::LR:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      AFI->addGPRCalleeSavedArea1Frame(FI);
+      GPRCS1Size += 4;
+      break;
+    case ARM::R8:
+    case ARM::R9:
+    case ARM::R10:
+    case ARM::R11:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      if (STI.isTargetDarwin()) {
+        AFI->addGPRCalleeSavedArea2Frame(FI);
+        GPRCS2Size += 4;
+      } else {
+        AFI->addGPRCalleeSavedArea1Frame(FI);
+        GPRCS1Size += 4;
+      }
+      break;
+    default:
+      AFI->addDPRCalleeSavedAreaFrame(FI);
+      DPRCSSize += 8;
+    }
+  }
+
+  // Move past area 1.
+  if (GPRCS1Size > 0) MBBI++;
+
+  // Set FP to point to the stack slot that contains the previous FP.
+  // For Darwin, FP is R7, which has now been stored in spill area 1.
+  // Otherwise, if this is not Darwin, all the callee-saved registers go
+  // into spill area 1, including the FP in R11.  In either case, it is
+  // now safe to emit this assignment.
+  bool HasFP = hasFP(MF);
+  if (HasFP) {
+    unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri;
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr)
+      .addFrameIndex(FramePtrSpillFI).addImm(0)
+      .setMIFlag(MachineInstr::FrameSetup);
+    AddDefaultCC(AddDefaultPred(MIB));
+  }
+
+  // Move past area 2.
+  if (GPRCS2Size > 0) MBBI++;
+
+  // Determine starting offsets of spill areas.
+  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+  if (HasFP)
+    AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+                                NumBytes);
+  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+
+  // Move past area 3.
+  if (DPRCSSize > 0) {
+    MBBI++;
+    // Since vpush register list cannot have gaps, there may be multiple vpush
+    // instructions in the prologue.
+    while (MBBI->getOpcode() == ARM::VSTMDDB_UPD)
+      MBBI++;
+  }
+
+  NumBytes = DPRCSOffset;
+  if (NumBytes) {
+    // Adjust SP after all the callee-save spills.
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
+                 MachineInstr::FrameSetup);
+    if (HasFP && isARM)
+      // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
+      // Note it's not safe to do this in Thumb2 mode because it would have
+      // taken two instructions:
+      // mov sp, r7
+      // sub sp, #24
+      // If an interrupt is taken between the two instructions, then sp is in
+      // an inconsistent state (pointing to the middle of callee-saved area).
+      // The interrupt handler can end up clobbering the registers.
+      AFI->setShouldRestoreSPFromFP(true);
+  }
+
+  if (STI.isTargetELF() && hasFP(MF))
+    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+                             AFI->getFramePtrSpillOffset());
+
+  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+  // If we need dynamic stack realignment, do it here. Be paranoid and make
+  // sure if we also have VLAs, we have a base pointer for frame access.
+  if (RegInfo->needsStackRealignment(MF)) {
+    unsigned MaxAlign = MFI->getMaxAlignment();
+    assert (!AFI->isThumb1OnlyFunction());
+    if (!AFI->isThumbFunction()) {
+      // Emit bic sp, sp, MaxAlign
+      AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+                                          TII.get(ARM::BICri), ARM::SP)
+                                  .addReg(ARM::SP, RegState::Kill)
+                                  .addImm(MaxAlign-1)));
+    } else {
+      // We cannot use sp as source/dest register here, thus we're emitting the
+      // following sequence:
+      // mov r4, sp
+      // bic r4, r4, MaxAlign
+      // mov sp, r4
+      // FIXME: It will be better just to find spare register here.
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R4)
+        .addReg(ARM::SP, RegState::Kill);
+      AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+                                          TII.get(ARM::t2BICri), ARM::R4)
+                                  .addReg(ARM::R4, RegState::Kill)
+                                  .addImm(MaxAlign-1)));
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
+        .addReg(ARM::R4, RegState::Kill);
+    }
+
+    AFI->setShouldRestoreSPFromFP(true);
+  }
+
+  // If we need a base pointer, set it up here. It's whatever the value
+  // of the stack pointer is at this point. Any variable size objects
+  // will be allocated after this, so we can still use the base pointer
+  // to reference locals.
+  // FIXME: Clarify FrameSetup flags here.
+  if (RegInfo->hasBasePointer(MF)) {
+    if (isARM)
+      BuildMI(MBB, MBBI, dl,
+              TII.get(ARM::MOVr), RegInfo->getBaseRegister())
+        .addReg(ARM::SP)
+        .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+    else
+      BuildMI(MBB, MBBI, dl,
+              TII.get(ARM::tMOVgpr2gpr), RegInfo->getBaseRegister())
+        .addReg(ARM::SP);
+  }
+
+  // If the frame has variable sized objects then the epilogue must restore
+  // the sp from fp. We can assume there's an FP here since hasFP already
+  // checks for hasVarSizedObjects.
+  if (MFI->hasVarSizedObjects())
+    AFI->setShouldRestoreSPFromFP(true);
+}
+
+void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  assert(MBBI->getDesc().isReturn() &&
+         "Can only insert epilog into returning blocks");
+  unsigned RetOpcode = MBBI->getOpcode();
+  DebugLoc dl = MBBI->getDebugLoc();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  const ARMBaseInstrInfo &TII =
+    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+  assert(!AFI->isThumb1OnlyFunction() &&
+         "This emitEpilogue does not support Thumb1!");
+  bool isARM = !AFI->isThumbFunction();
+
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  int NumBytes = (int)MFI->getStackSize();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+  } else {
+    // Unwind MBBI to point to first LDR / VLDRD.
+    const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+    if (MBBI != MBB.begin()) {
+      do
+        --MBBI;
+      while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs));
+      if (!isCSRestore(MBBI, TII, CSRegs))
+        ++MBBI;
+    }
+
+    // Move SP to start of FP callee save spill area.
+    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+                 AFI->getGPRCalleeSavedArea2Size() +
+                 AFI->getDPRCalleeSavedAreaSize());
+
+    // Reset SP based on frame pointer only if the stack frame extends beyond
+    // frame pointer stack slot or target is ELF and the function has FP.
+    if (AFI->shouldRestoreSPFromFP()) {
+      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+      if (NumBytes) {
+        if (isARM)
+          emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
+                                  ARMCC::AL, 0, TII);
+        else {
+          // It's not possible to restore SP from FP in a single instruction.
+          // For Darwin, this looks like:
+          // mov sp, r7
+          // sub sp, #24
+          // This is bad, if an interrupt is taken after the mov, sp is in an
+          // inconsistent state.
+          // Use the first callee-saved register as a scratch register.
+          assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+                 "No scratch register to restore SP from FP!");
+          emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+                                 ARMCC::AL, 0, TII);
+          BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
+            .addReg(ARM::R4);
+        }
+      } else {
+        // Thumb2 or ARM.
+        if (isARM)
+          BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
+            .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
+        else
+          BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
+            .addReg(FramePtr);
+      }
+    } else if (NumBytes)
+      emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
+
+    // Increment past our save areas.
+    if (AFI->getDPRCalleeSavedAreaSize()) {
+      MBBI++;
+      // Since vpop register list cannot have gaps, there may be multiple vpop
+      // instructions in the epilogue.
+      while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
+        MBBI++;
+    }
+    if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
+    if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
+  }
+
+  if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND ||
+      RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) {
+    // Tail call return: adjust the stack pointer and jump to callee.
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+    // Jump to label or value in register.
+    if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) {
+      unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi)
+        ? (STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)
+        : (STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND);
+      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
+      if (JumpTarget.isGlobal())
+        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                             JumpTarget.getTargetFlags());
+      else {
+        assert(JumpTarget.isSymbol());
+        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+                              JumpTarget.getTargetFlags());
+      }
+    } else if (RetOpcode == ARM::TCRETURNri) {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)).
+        addReg(JumpTarget.getReg(), RegState::Kill);
+    } else if (RetOpcode == ARM::TCRETURNriND) {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)).
+        addReg(JumpTarget.getReg(), RegState::Kill);
+    }
+
+    MachineInstr *NewMI = prior(MBBI);
+    for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+      NewMI->addOperand(MBBI->getOperand(i));
+
+    // Delete the pseudo instruction TCRETURN.
+    MBB.erase(MBBI);
+  }
+
+  if (VARegSaveSize)
+    emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
+}
+
+/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
+/// debug info.  It's the same as what we use for resolving the code-gen
+/// references for now.  FIXME: This can go wrong when references are
+/// SP-relative and simple call frames aren't used.
+int
+ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
+                                         unsigned &FrameReg) const {
+  return ResolveFrameIndexReference(MF, FI, FrameReg, 0);
+}
+
+int
+ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
+                                             int FI,
+                                             unsigned &FrameReg,
+                                             int SPAdj) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const ARMBaseRegisterInfo *RegInfo =
+    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+  int FPOffset = Offset - AFI->getFramePtrSpillOffset();
+  bool isFixed = MFI->isFixedObjectIndex(FI);
+
+  FrameReg = ARM::SP;
+  Offset += SPAdj;
+  if (AFI->isGPRCalleeSavedArea1Frame(FI))
+    return Offset - AFI->getGPRCalleeSavedArea1Offset();
+  else if (AFI->isGPRCalleeSavedArea2Frame(FI))
+    return Offset - AFI->getGPRCalleeSavedArea2Offset();
+  else if (AFI->isDPRCalleeSavedAreaFrame(FI))
+    return Offset - AFI->getDPRCalleeSavedAreaOffset();
+
+  // When dynamically realigning the stack, use the frame pointer for
+  // parameters, and the stack/base pointer for locals.
+  if (RegInfo->needsStackRealignment(MF)) {
+    assert (hasFP(MF) && "dynamic stack realignment without a FP!");
+    if (isFixed) {
+      FrameReg = RegInfo->getFrameRegister(MF);
+      Offset = FPOffset;
+    } else if (MFI->hasVarSizedObjects()) {
+      assert(RegInfo->hasBasePointer(MF) &&
+             "VLAs and dynamic stack alignment, but missing base pointer!");
+      FrameReg = RegInfo->getBaseRegister();
+    }
+    return Offset;
+  }
+
+  // If there is a frame pointer, use it when we can.
+  if (hasFP(MF) && AFI->hasStackFrame()) {
+    // Use frame pointer to reference fixed objects. Use it for locals if
+    // there are VLAs (and thus the SP isn't reliable as a base).
+    if (isFixed || (MFI->hasVarSizedObjects() &&
+                    !RegInfo->hasBasePointer(MF))) {
+      FrameReg = RegInfo->getFrameRegister(MF);
+      return FPOffset;
+    } else if (MFI->hasVarSizedObjects()) {
+      assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
+      // Try to use the frame pointer if we can, else use the base pointer
+      // since it's available. This is handy for the emergency spill slot, in
+      // particular.
+      if (AFI->isThumb2Function()) {
+        if (FPOffset >= -255 && FPOffset < 0) {
+          FrameReg = RegInfo->getFrameRegister(MF);
+          return FPOffset;
+        }
+      } else
+        FrameReg = RegInfo->getBaseRegister();
+    } else if (AFI->isThumb2Function()) {
+      // In Thumb2 mode, the negative offset is very limited. Try to avoid
+      // out of range references.
+      if (FPOffset >= -255 && FPOffset < 0) {
+        FrameReg = RegInfo->getFrameRegister(MF);
+        return FPOffset;
+      }
+    } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
+      // Otherwise, use SP or FP, whichever is closer to the stack slot.
+      FrameReg = RegInfo->getFrameRegister(MF);
+      return FPOffset;
+    }
+  }
+  // Use the base pointer if we have one.
+  if (RegInfo->hasBasePointer(MF))
+    FrameReg = RegInfo->getBaseRegister();
+  return Offset;
+}
+
+int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                          int FI) const {
+  unsigned FrameReg;
+  return getFrameIndexReference(MF, FI, FrameReg);
+}
+
+void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    const std::vector<CalleeSavedInfo> &CSI,
+                                    unsigned StmOpc, unsigned StrOpc,
+                                    bool NoGap,
+                                    bool(*Func)(unsigned, bool),
+                                    unsigned MIFlags) const {
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  SmallVector<std::pair<unsigned,bool>, 4> Regs;
+  unsigned i = CSI.size();
+  while (i != 0) {
+    unsigned LastReg = 0;
+    for (; i != 0; --i) {
+      unsigned Reg = CSI[i-1].getReg();
+      if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+
+      // Add the callee-saved register as live-in unless it's LR and
+      // @llvm.returnaddress is called. If LR is returned for
+      // @llvm.returnaddress then it's already added to the function and
+      // entry block live-in sets.
+      bool isKill = true;
+      if (Reg == ARM::LR) {
+        if (MF.getFrameInfo()->isReturnAddressTaken() &&
+            MF.getRegInfo().isLiveIn(Reg))
+          isKill = false;
+      }
+
+      if (isKill)
+        MBB.addLiveIn(Reg);
+
+      // If NoGap is true, push consecutive registers and then leave the rest
+      // for other instructions. e.g.
+      // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
+      if (NoGap && LastReg && LastReg != Reg-1)
+        break;
+      LastReg = Reg;
+      Regs.push_back(std::make_pair(Reg, isKill));
+    }
+
+    if (Regs.empty())
+      continue;
+    if (Regs.size() > 1 || StrOpc== 0) {
+      MachineInstrBuilder MIB =
+        AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
+                       .addReg(ARM::SP).setMIFlags(MIFlags));
+      for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+        MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
+    } else if (Regs.size() == 1) {
+      MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc),
+                                        ARM::SP)
+        .addReg(Regs[0].first, getKillRegState(Regs[0].second))
+        .addReg(ARM::SP).setMIFlags(MIFlags);
+      // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
+      // that refactoring is complete (eventually).
+      if (StrOpc == ARM::STR_PRE) {
+        MIB.addReg(0);
+        MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::sub, 4, ARM_AM::no_shift));
+      } else
+        MIB.addImm(-4);
+      AddDefaultPred(MIB);
+    }
+    Regs.clear();
+  }
+}
+
+void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   unsigned LdmOpc, unsigned LdrOpc,
+                                   bool isVarArg, bool NoGap,
+                                   bool(*Func)(unsigned, bool)) const {
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned RetOpcode = MI->getOpcode();
+  bool isTailCall = (RetOpcode == ARM::TCRETURNdi ||
+                     RetOpcode == ARM::TCRETURNdiND ||
+                     RetOpcode == ARM::TCRETURNri ||
+                     RetOpcode == ARM::TCRETURNriND);
+
+  SmallVector<unsigned, 4> Regs;
+  unsigned i = CSI.size();
+  while (i != 0) {
+    unsigned LastReg = 0;
+    bool DeleteRet = false;
+    for (; i != 0; --i) {
+      unsigned Reg = CSI[i-1].getReg();
+      if (!(Func)(Reg, STI.isTargetDarwin())) continue;
+
+      if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) {
+        Reg = ARM::PC;
+        LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
+        // Fold the return instruction into the LDM.
+        DeleteRet = true;
+      }
+
+      // If NoGap is true, pop consecutive registers and then leave the rest
+      // for other instructions. e.g.
+      // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
+      if (NoGap && LastReg && LastReg != Reg-1)
+        break;
+
+      LastReg = Reg;
+      Regs.push_back(Reg);
+    }
+
+    if (Regs.empty())
+      continue;
+    if (Regs.size() > 1 || LdrOpc == 0) {
+      MachineInstrBuilder MIB =
+        AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
+                       .addReg(ARM::SP));
+      for (unsigned i = 0, e = Regs.size(); i < e; ++i)
+        MIB.addReg(Regs[i], getDefRegState(true));
+      if (DeleteRet)
+        MI->eraseFromParent();
+      MI = MIB;
+    } else if (Regs.size() == 1) {
+      // If we adjusted the reg to PC from LR above, switch it back here. We
+      // only do that for LDM.
+      if (Regs[0] == ARM::PC)
+        Regs[0] = ARM::LR;
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
+          .addReg(ARM::SP, RegState::Define)
+          .addReg(ARM::SP);
+      // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
+      // that refactoring is complete (eventually).
+      if (LdrOpc == ARM::LDR_POST) {
+        MIB.addReg(0);
+        MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));
+      } else
+        MIB.addImm(4);
+      AddDefaultPred(MIB);
+    }
+    Regs.clear();
+  }
+}
+
+bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
+  unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE;
+  unsigned FltOpc = ARM::VSTMDDB_UPD;
+  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
+               MachineInstr::FrameSetup);
+  emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
+               MachineInstr::FrameSetup);
+  emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
+               MachineInstr::FrameSetup);
+
+  return true;
+}
+
+bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+
+  unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
+  unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST;
+  unsigned FltOpc = ARM::VLDMDIA_UPD;
+  emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register);
+  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+              &isARMArea2Register);
+  emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
+              &isARMArea1Register);
+
+  return true;
+}
+
+// FIXME: Make generic?
+static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
+                                       const ARMBaseInstrInfo &TII) {
+  unsigned FnSize = 0;
+  for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end();
+       MBBI != E; ++MBBI) {
+    const MachineBasicBlock &MBB = *MBBI;
+    for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end();
+         I != E; ++I)
+      FnSize += TII.GetInstSizeInBytes(I);
+  }
+  return FnSize;
+}
+
+/// estimateStackSize - Estimate and return the size of the frame.
+/// FIXME: Make generic?
+static unsigned estimateStackSize(MachineFunction &MF) {
+  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  int Offset = 0;
+  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
+    int FixedOff = -FFI->getObjectOffset(i);
+    if (FixedOff > Offset) Offset = FixedOff;
+  }
+  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
+    if (FFI->isDeadObjectIndex(i))
+      continue;
+    Offset += FFI->getObjectSize(i);
+    unsigned Align = FFI->getObjectAlignment(i);
+    // Adjust to alignment boundary
+    Offset = (Offset+Align-1)/Align*Align;
+  }
+  return (unsigned)Offset;
+}
+
+/// estimateRSStackSizeLimit - Look at each instruction that references stack
+/// frames and return the stack size limit beyond which some of these
+/// instructions will require a scratch register during their expansion later.
+// FIXME: Move to TII?
+static unsigned estimateRSStackSizeLimit(MachineFunction &MF,
+                                         const TargetFrameLowering *TFI) {
+  const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned Limit = (1 << 12) - 1;
+  for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) {
+    for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+        if (!I->getOperand(i).isFI()) continue;
+
+        // When using ADDri to get the address of a stack object, 255 is the
+        // largest offset guaranteed to fit in the immediate offset.
+        if (I->getOpcode() == ARM::ADDri) {
+          Limit = std::min(Limit, (1U << 8) - 1);
+          break;
+        }
+
+        // Otherwise check the addressing mode.
+        switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
+        case ARMII::AddrMode3:
+        case ARMII::AddrModeT2_i8:
+          Limit = std::min(Limit, (1U << 8) - 1);
+          break;
+        case ARMII::AddrMode5:
+        case ARMII::AddrModeT2_i8s4:
+          Limit = std::min(Limit, ((1U << 8) - 1) * 4);
+          break;
+        case ARMII::AddrModeT2_i12:
+          // i12 supports only positive offset so these will be converted to
+          // i8 opcodes. See llvm::rewriteT2FrameIndex.
+          if (TFI->hasFP(MF) && AFI->hasStackFrame())
+            Limit = std::min(Limit, (1U << 8) - 1);
+          break;
+        case ARMII::AddrMode4:
+        case ARMII::AddrMode6:
+          // Addressing modes 4 & 6 (load/store) instructions can't encode an
+          // immediate offset for stack references.
+          return 0;
+        default:
+          break;
+        }
+        break; // At most one FI per instruction
+      }
+    }
+  }
+
+  return Limit;
+}
+
+void
+ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                       RegScavenger *RS) const {
+  // This tells PEI to spill the FP as if it is any other callee-save register
+  // to take advantage the eliminateFrameIndex machinery. This also ensures it
+  // is spilled in the order specified by getCalleeSavedRegs() to make it easier
+  // to combine multiple loads / stores.
+  bool CanEliminateFrame = true;
+  bool CS1Spilled = false;
+  bool LRSpilled = false;
+  unsigned NumGPRSpills = 0;
+  SmallVector<unsigned, 4> UnspilledCS1GPRs;
+  SmallVector<unsigned, 4> UnspilledCS2GPRs;
+  const ARMBaseRegisterInfo *RegInfo =
+    static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const ARMBaseInstrInfo &TII =
+    *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo());
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
+  // scratch register. Also spill R4 if Thumb2 function has varsized objects,
+  // since it's not always possible to restore sp from fp in a single
+  // instruction.
+  // FIXME: It will be better just to find spare register here.
+  if (AFI->isThumb2Function() &&
+      (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
+    MF.getRegInfo().setPhysRegUsed(ARM::R4);
+
+  if (AFI->isThumb1OnlyFunction()) {
+    // Spill LR if Thumb1 function uses variable length argument lists.
+    if (AFI->getVarArgsRegSaveSize() > 0)
+      MF.getRegInfo().setPhysRegUsed(ARM::LR);
+
+    // Spill R4 if Thumb1 epilogue has to restore SP from FP since 
+    // FIXME: It will be better just to find spare register here.
+    if (MFI->hasVarSizedObjects())
+      MF.getRegInfo().setPhysRegUsed(ARM::R4);
+  }
+
+  // Spill the BasePtr if it's used.
+  if (RegInfo->hasBasePointer(MF))
+    MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
+
+  // Don't spill FP if the frame can be eliminated. This is determined
+  // by scanning the callee-save registers to see if any is used.
+  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+  for (unsigned i = 0; CSRegs[i]; ++i) {
+    unsigned Reg = CSRegs[i];
+    bool Spilled = false;
+    if (MF.getRegInfo().isPhysRegUsed(Reg)) {
+      Spilled = true;
+      CanEliminateFrame = false;
+    } else {
+      // Check alias registers too.
+      for (const unsigned *Aliases =
+             RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) {
+        if (MF.getRegInfo().isPhysRegUsed(*Aliases)) {
+          Spilled = true;
+          CanEliminateFrame = false;
+        }
+      }
+    }
+
+    if (!ARM::GPRRegisterClass->contains(Reg))
+      continue;
+
+    if (Spilled) {
+      NumGPRSpills++;
+
+      if (!STI.isTargetDarwin()) {
+        if (Reg == ARM::LR)
+          LRSpilled = true;
+        CS1Spilled = true;
+        continue;
+      }
+
+      // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+      switch (Reg) {
+      case ARM::LR:
+        LRSpilled = true;
+        // Fallthrough
+      case ARM::R4: case ARM::R5:
+      case ARM::R6: case ARM::R7:
+        CS1Spilled = true;
+        break;
+      default:
+        break;
+      }
+    } else {
+      if (!STI.isTargetDarwin()) {
+        UnspilledCS1GPRs.push_back(Reg);
+        continue;
+      }
+
+      switch (Reg) {
+      case ARM::R4: case ARM::R5:
+      case ARM::R6: case ARM::R7:
+      case ARM::LR:
+        UnspilledCS1GPRs.push_back(Reg);
+        break;
+      default:
+        UnspilledCS2GPRs.push_back(Reg);
+        break;
+      }
+    }
+  }
+
+  bool ForceLRSpill = false;
+  if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
+    unsigned FnSize = GetFunctionSizeInBytes(MF, TII);
+    // Force LR to be spilled if the Thumb function size is > 2048. This enables
+    // use of BL to implement far jump. If it turns out that it's not needed
+    // then the branch fix up path will undo it.
+    if (FnSize >= (1 << 11)) {
+      CanEliminateFrame = false;
+      ForceLRSpill = true;
+    }
+  }
+
+  // If any of the stack slot references may be out of range of an immediate
+  // offset, make sure a register (or a spill slot) is available for the
+  // register scavenger. Note that if we're indexing off the frame pointer, the
+  // effective stack size is 4 bytes larger since the FP points to the stack
+  // slot of the previous FP. Also, if we have variable sized objects in the
+  // function, stack slot references will often be negative, and some of
+  // our instructions are positive-offset only, so conservatively consider
+  // that case to want a spill slot (or register) as well. Similarly, if
+  // the function adjusts the stack pointer during execution and the
+  // adjustments aren't already part of our stack size estimate, our offset
+  // calculations may be off, so be conservative.
+  // FIXME: We could add logic to be more precise about negative offsets
+  //        and which instructions will need a scratch register for them. Is it
+  //        worth the effort and added fragility?
+  bool BigStack =
+    (RS &&
+     (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >=
+      estimateRSStackSizeLimit(MF, this)))
+    || MFI->hasVarSizedObjects()
+    || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
+
+  bool ExtraCSSpill = false;
+  if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
+    AFI->setHasStackFrame(true);
+
+    // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
+    // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
+    if (!LRSpilled && CS1Spilled) {
+      MF.getRegInfo().setPhysRegUsed(ARM::LR);
+      NumGPRSpills++;
+      UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(),
+                                    UnspilledCS1GPRs.end(), (unsigned)ARM::LR));
+      ForceLRSpill = false;
+      ExtraCSSpill = true;
+    }
+
+    if (hasFP(MF)) {
+      MF.getRegInfo().setPhysRegUsed(FramePtr);
+      NumGPRSpills++;
+    }
+
+    // If stack and double are 8-byte aligned and we are spilling an odd number
+    // of GPRs, spill one extra callee save GPR so we won't have to pad between
+    // the integer and double callee save areas.
+    unsigned TargetAlign = getStackAlignment();
+    if (TargetAlign == 8 && (NumGPRSpills & 1)) {
+      if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
+        for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) {
+          unsigned Reg = UnspilledCS1GPRs[i];
+          // Don't spill high register if the function is thumb1
+          if (!AFI->isThumb1OnlyFunction() ||
+              isARMLowRegister(Reg) || Reg == ARM::LR) {
+            MF.getRegInfo().setPhysRegUsed(Reg);
+            if (!RegInfo->isReservedReg(MF, Reg))
+              ExtraCSSpill = true;
+            break;
+          }
+        }
+      } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
+        unsigned Reg = UnspilledCS2GPRs.front();
+        MF.getRegInfo().setPhysRegUsed(Reg);
+        if (!RegInfo->isReservedReg(MF, Reg))
+          ExtraCSSpill = true;
+      }
+    }
+
+    // Estimate if we might need to scavenge a register at some point in order
+    // to materialize a stack offset. If so, either spill one additional
+    // callee-saved register or reserve a special spill slot to facilitate
+    // register scavenging. Thumb1 needs a spill slot for stack pointer
+    // adjustments also, even when the frame itself is small.
+    if (BigStack && !ExtraCSSpill) {
+      // If any non-reserved CS register isn't spilled, just spill one or two
+      // extra. That should take care of it!
+      unsigned NumExtras = TargetAlign / 4;
+      SmallVector<unsigned, 2> Extras;
+      while (NumExtras && !UnspilledCS1GPRs.empty()) {
+        unsigned Reg = UnspilledCS1GPRs.back();
+        UnspilledCS1GPRs.pop_back();
+        if (!RegInfo->isReservedReg(MF, Reg) &&
+            (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
+             Reg == ARM::LR)) {
+          Extras.push_back(Reg);
+          NumExtras--;
+        }
+      }
+      // For non-Thumb1 functions, also check for hi-reg CS registers
+      if (!AFI->isThumb1OnlyFunction()) {
+        while (NumExtras && !UnspilledCS2GPRs.empty()) {
+          unsigned Reg = UnspilledCS2GPRs.back();
+          UnspilledCS2GPRs.pop_back();
+          if (!RegInfo->isReservedReg(MF, Reg)) {
+            Extras.push_back(Reg);
+            NumExtras--;
+          }
+        }
+      }
+      if (Extras.size() && NumExtras == 0) {
+        for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+          MF.getRegInfo().setPhysRegUsed(Extras[i]);
+        }
+      } else if (!AFI->isThumb1OnlyFunction()) {
+        // note: Thumb1 functions spill to R12, not the stack.  Reserve a slot
+        // closest to SP or frame pointer.
+        const TargetRegisterClass *RC = ARM::GPRRegisterClass;
+        RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                           RC->getAlignment(),
+                                                           false));
+      }
+    }
+  }
+
+  if (ForceLRSpill) {
+    MF.getRegInfo().setPhysRegUsed(ARM::LR);
+    AFI->setLRIsSpilledForFarJump(true);
+  }
+}
diff --git a/final/lib/Target/ARM/ARMFrameLowering.h b/final/lib/Target/ARM/ARMFrameLowering.h
new file mode 100644
index 00000000000..a7b7f15449d
--- /dev/null
+++ b/final/lib/Target/ARM/ARMFrameLowering.h
@@ -0,0 +1,75 @@
+//==-- ARMTargetFrameLowering.h - Define frame lowering for ARM --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARM_FRAMEINFO_H
+#define ARM_FRAMEINFO_H
+
+#include "ARM.h"
+#include "ARMSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+class ARMFrameLowering : public TargetFrameLowering {
+protected:
+  const ARMSubtarget &STI;
+
+public:
+  explicit ARMFrameLowering(const ARMSubtarget &sti)
+    : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
+      STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+  bool hasReservedCallFrame(const MachineFunction &MF) const;
+  bool canSimplifyCallFramePseudos(const MachineFunction &MF) const;
+  int getFrameIndexReference(const MachineFunction &MF, int FI,
+                             unsigned &FrameReg) const;
+  int ResolveFrameIndexReference(const MachineFunction &MF, int FI,
+                                 unsigned &FrameReg, int SPAdj) const;
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS) const;
+
+ private:
+  void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                    const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
+                    unsigned StrOpc, bool NoGap,
+                    bool(*Func)(unsigned, bool),
+                    unsigned MIFlags = 0) const;
+  void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                   const std::vector<CalleeSavedInfo> &CSI, unsigned LdmOpc,
+                   unsigned LdrOpc, bool isVarArg, bool NoGap,
+                   bool(*Func)(unsigned, bool)) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/ARM/ARMGlobalMerge.cpp b/final/lib/Target/ARM/ARMGlobalMerge.cpp
new file mode 100644
index 00000000000..ab6c00e6e1a
--- /dev/null
+++ b/final/lib/Target/ARM/ARMGlobalMerge.cpp
@@ -0,0 +1,223 @@
+//===-- ARMGlobalMerge.cpp - Internal globals merging  --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass merges globals with internal linkage into one. This way all the
+// globals which were merged into a biggest one can be addressed using offsets
+// from the same base pointer (no need for separate base pointer for each of the
+// global). Such a transformation can significantly reduce the register pressure
+// when many globals are involved.
+//
+// For example, consider the code which touches several global variables at 
+// once:
+//
+// static int foo[N], bar[N], baz[N];
+//
+// for (i = 0; i < N; ++i) {
+//    foo[i] = bar[i] * baz[i];
+// }
+//
+//  On ARM the addresses of 3 arrays should be kept in the registers, thus
+//  this code has quite large register pressure (loop body):
+//
+//  ldr     r1, [r5], #4
+//  ldr     r2, [r6], #4
+//  mul     r1, r2, r1
+//  str     r1, [r0], #4
+//
+//  Pass converts the code to something like:
+//
+//  static struct {
+//    int foo[N];
+//    int bar[N];
+//    int baz[N];
+//  } merged;
+//
+//  for (i = 0; i < N; ++i) {
+//    merged.foo[i] = merged.bar[i] * merged.baz[i];
+//  }
+//
+//  and in ARM code this becomes:
+//
+//  ldr     r0, [r5, #40]
+//  ldr     r1, [r5, #80]
+//  mul     r0, r1, r0
+//  str     r0, [r5], #4
+//
+//  note that we saved 2 registers here almostly "for free".
+// ===---------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-global-merge"
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Attributes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+using namespace llvm;
+
+namespace {
+  class ARMGlobalMerge : public FunctionPass {
+    /// TLI - Keep a pointer of a TargetLowering to consult for determining
+    /// target type sizes.
+    const TargetLowering *TLI;
+
+    bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+                 Module &M, bool isConst) const;
+
+  public:
+    static char ID;             // Pass identification, replacement for typeid.
+    explicit ARMGlobalMerge(const TargetLowering *tli)
+      : FunctionPass(ID), TLI(tli) {}
+
+    virtual bool doInitialization(Module &M);
+    virtual bool runOnFunction(Function &F);
+
+    const char *getPassName() const {
+      return "Merge internal globals";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      FunctionPass::getAnalysisUsage(AU);
+    }
+
+    struct GlobalCmp {
+      const TargetData *TD;
+
+      GlobalCmp(const TargetData *td) : TD(td) { }
+
+      bool operator()(const GlobalVariable *GV1, const GlobalVariable *GV2) {
+        const Type *Ty1 = cast<PointerType>(GV1->getType())->getElementType();
+        const Type *Ty2 = cast<PointerType>(GV2->getType())->getElementType();
+
+        return (TD->getTypeAllocSize(Ty1) < TD->getTypeAllocSize(Ty2));
+      }
+    };
+  };
+} // end anonymous namespace
+
+char ARMGlobalMerge::ID = 0;
+
+bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
+                             Module &M, bool isConst) const {
+  const TargetData *TD = TLI->getTargetData();
+
+  // FIXME: Infer the maximum possible offset depending on the actual users
+  // (these max offsets are different for the users inside Thumb or ARM
+  // functions)
+  unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+
+  // FIXME: Find better heuristics
+  std::stable_sort(Globals.begin(), Globals.end(), GlobalCmp(TD));
+
+  const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+
+  for (size_t i = 0, e = Globals.size(); i != e; ) {
+    size_t j = 0;
+    uint64_t MergedSize = 0;
+    std::vector<const Type*> Tys;
+    std::vector<Constant*> Inits;
+    for (j = i; j != e; ++j) {
+      const Type *Ty = Globals[j]->getType()->getElementType();
+      MergedSize += TD->getTypeAllocSize(Ty);
+      if (MergedSize > MaxOffset) {
+        break;
+      }
+      Tys.push_back(Ty);
+      Inits.push_back(Globals[j]->getInitializer());
+    }
+
+    StructType *MergedTy = StructType::get(M.getContext(), Tys);
+    Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
+    GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst,
+                                                  GlobalValue::InternalLinkage,
+                                                  MergedInit, "_MergedGlobals");
+    for (size_t k = i; k < j; ++k) {
+      Constant *Idx[2] = {
+        ConstantInt::get(Int32Ty, 0),
+        ConstantInt::get(Int32Ty, k-i)
+      };
+      Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx, 2);
+      Globals[k]->replaceAllUsesWith(GEP);
+      Globals[k]->eraseFromParent();
+    }
+    i = j;
+  }
+
+  return true;
+}
+
+
+bool ARMGlobalMerge::doInitialization(Module &M) {
+  SmallVector<GlobalVariable*, 16> Globals, ConstGlobals, BSSGlobals;
+  const TargetData *TD = TLI->getTargetData();
+  unsigned MaxOffset = TLI->getMaximalGlobalOffset();
+  bool Changed = false;
+
+  // Disable this pass on darwin. The debugger is not yet ready to extract
+  // variable's  info from a merged global.
+  if (TLI->getTargetMachine().getSubtarget<ARMSubtarget>().isTargetDarwin())
+    return false;
+
+  // Grab all non-const globals.
+  for (Module::global_iterator I = M.global_begin(),
+         E = M.global_end(); I != E; ++I) {
+    // Merge is safe for "normal" internal globals only
+    if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection())
+      continue;
+
+    // Ignore fancy-aligned globals for now.
+    if (I->getAlignment() != 0)
+      continue;
+
+    // Ignore all 'special' globals.
+    if (I->getName().startswith("llvm.") ||
+        I->getName().startswith(".llvm."))
+      continue;
+
+    if (TD->getTypeAllocSize(I->getType()->getElementType()) < MaxOffset) {
+      const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering();
+      if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal())
+        BSSGlobals.push_back(I);
+      else if (I->isConstant())
+        ConstGlobals.push_back(I);
+      else
+        Globals.push_back(I);
+    }
+  }
+
+  if (Globals.size() > 1)
+    Changed |= doMerge(Globals, M, false);
+  if (BSSGlobals.size() > 1)
+    Changed |= doMerge(BSSGlobals, M, false);
+
+  // FIXME: This currently breaks the EH processing due to way how the 
+  // typeinfo detection works. We might want to detect the TIs and ignore 
+  // them in the future.
+  // if (ConstGlobals.size() > 1)
+  //  Changed |= doMerge(ConstGlobals, M, true);
+
+  return Changed;
+}
+
+bool ARMGlobalMerge::runOnFunction(Function &F) {
+  return false;
+}
+
+FunctionPass *llvm::createARMGlobalMergePass(const TargetLowering *tli) {
+  return new ARMGlobalMerge(tli);
+}
diff --git a/final/lib/Target/ARM/ARMHazardRecognizer.cpp b/final/lib/Target/ARM/ARMHazardRecognizer.cpp
new file mode 100644
index 00000000000..e97ce50bc42
--- /dev/null
+++ b/final/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -0,0 +1,118 @@
+//===-- ARMHazardRecognizer.cpp - ARM postra hazard recognizer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMHazardRecognizer.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
+                         const TargetRegisterInfo &TRI) {
+  // FIXME: Detect integer instructions properly.
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned Domain = TID.TSFlags & ARMII::DomainMask;
+  if (TID.mayStore())
+    return false;
+  unsigned Opcode = TID.getOpcode();
+  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+    return false;
+  if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+    return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI);
+  return false;
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+  assert(Stalls == 0 && "ARM hazards don't support scoreboard lookahead");
+
+  MachineInstr *MI = SU->getInstr();
+
+  if (!MI->isDebugValue()) {
+    if (ITBlockSize && MI != ITBlockMIs[ITBlockSize-1])
+      return Hazard;
+
+    // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
+    // a VMLA / VMLS will cause 4 cycle stall.
+    const TargetInstrDesc &TID = MI->getDesc();
+    if (LastMI && (TID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
+      MachineInstr *DefMI = LastMI;
+      const TargetInstrDesc &LastTID = LastMI->getDesc();
+      // Skip over one non-VFP / NEON instruction.
+      if (!LastTID.isBarrier() &&
+          (LastTID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
+        MachineBasicBlock::iterator I = LastMI;
+        if (I != LastMI->getParent()->begin()) {
+          I = llvm::prior(I);
+          DefMI = &*I;
+        }
+      }
+
+      if (TII.isFpMLxInstruction(DefMI->getOpcode()) &&
+          (TII.canCauseFpMLxStall(MI->getOpcode()) ||
+           hasRAWHazard(DefMI, MI, TRI))) {
+        // Try to schedule another instruction for the next 4 cycles.
+        if (FpMLxStalls == 0)
+          FpMLxStalls = 4;
+        return Hazard;
+      }
+    }
+  }
+
+  return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
+}
+
+void ARMHazardRecognizer::Reset() {
+  LastMI = 0;
+  FpMLxStalls = 0;
+  ITBlockSize = 0;
+  ScoreboardHazardRecognizer::Reset();
+}
+
+void ARMHazardRecognizer::EmitInstruction(SUnit *SU) {
+  MachineInstr *MI = SU->getInstr();
+  unsigned Opcode = MI->getOpcode();
+  if (ITBlockSize) {
+    --ITBlockSize;
+  } else if (Opcode == ARM::t2IT) {
+    unsigned Mask = MI->getOperand(1).getImm();
+    unsigned NumTZ = CountTrailingZeros_32(Mask);
+    assert(NumTZ <= 3 && "Invalid IT mask!");
+    ITBlockSize = 4 - NumTZ;
+    MachineBasicBlock::iterator I = MI;
+    for (unsigned i = 0; i < ITBlockSize; ++i) {
+      // Advance to the next instruction, skipping any dbg_value instructions.
+      do {
+        ++I;
+      } while (I->isDebugValue());
+      ITBlockMIs[ITBlockSize-1-i] = &*I;
+    }
+  }
+
+  if (!MI->isDebugValue()) {
+    LastMI = MI;
+    FpMLxStalls = 0;
+  }
+
+  ScoreboardHazardRecognizer::EmitInstruction(SU);
+}
+
+void ARMHazardRecognizer::AdvanceCycle() {
+  if (FpMLxStalls && --FpMLxStalls == 0)
+    // Stalled for 4 cycles but still can't schedule any other instructions.
+    LastMI = 0;
+  ScoreboardHazardRecognizer::AdvanceCycle();
+}
+
+void ARMHazardRecognizer::RecedeCycle() {
+  llvm_unreachable("reverse ARM hazard checking unsupported");
+}
diff --git a/final/lib/Target/ARM/ARMHazardRecognizer.h b/final/lib/Target/ARM/ARMHazardRecognizer.h
new file mode 100644
index 00000000000..2bc218d8566
--- /dev/null
+++ b/final/lib/Target/ARM/ARMHazardRecognizer.h
@@ -0,0 +1,54 @@
+//===-- ARMHazardRecognizer.h - ARM Hazard Recognizers ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling ARM functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMHAZARDRECOGNIZER_H
+#define ARMHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+
+namespace llvm {
+
+class ARMBaseInstrInfo;
+class ARMBaseRegisterInfo;
+class ARMSubtarget;
+class MachineInstr;
+
+class ARMHazardRecognizer : public ScoreboardHazardRecognizer {
+  const ARMBaseInstrInfo &TII;
+  const ARMBaseRegisterInfo &TRI;
+  const ARMSubtarget &STI;
+
+  MachineInstr *LastMI;
+  unsigned FpMLxStalls;
+  unsigned ITBlockSize;  // No. of MIs in current IT block yet to be scheduled.
+  MachineInstr *ITBlockMIs[4];
+
+public:
+  ARMHazardRecognizer(const InstrItineraryData *ItinData,
+                      const ARMBaseInstrInfo &tii,
+                      const ARMBaseRegisterInfo &tri,
+                      const ARMSubtarget &sti,
+                      const ScheduleDAG *DAG) :
+    ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii),
+    TRI(tri), STI(sti), LastMI(0), ITBlockSize(0) {}
+
+  virtual HazardType getHazardType(SUnit *SU, int Stalls);
+  virtual void Reset();
+  virtual void EmitInstruction(SUnit *SU);
+  virtual void AdvanceCycle();
+  virtual void RecedeCycle();
+};
+
+} // end namespace llvm
+
+#endif // ARMHAZARDRECOGNIZER_H
diff --git a/final/lib/Target/ARM/ARMISelDAGToDAG.cpp b/final/lib/Target/ARM/ARMISelDAGToDAG.cpp
new file mode 100644
index 00000000000..edf74387b15
--- /dev/null
+++ b/final/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -0,0 +1,2869 @@
+//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-isel"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMAddressingModes.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+DisableShifterOp("disable-shifter-op", cl::Hidden,
+  cl::desc("Disable isel of shifter-op"),
+  cl::init(false));
+
+static cl::opt<bool>
+CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
+  cl::desc("Check fp vmla / vmls hazard at isel time"),
+  cl::init(false));
+
+//===--------------------------------------------------------------------===//
+/// ARMDAGToDAGISel - ARM specific code to select ARM machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+
+enum AddrMode2Type {
+  AM2_BASE, // Simple AM2 (+-imm12)
+  AM2_SHOP  // Shifter-op AM2
+};
+
+class ARMDAGToDAGISel : public SelectionDAGISel {
+  ARMBaseTargetMachine &TM;
+  const ARMBaseInstrInfo *TII;
+
+  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const ARMSubtarget *Subtarget;
+
+public:
+  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm,
+                           CodeGenOpt::Level OptLevel)
+    : SelectionDAGISel(tm, OptLevel), TM(tm),
+      TII(static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo())),
+      Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+  }
+
+  virtual const char *getPassName() const {
+    return "ARM Instruction Selection";
+  }
+
+  /// getI32Imm - Return a target constant of type i32 with the specified
+  /// value.
+  inline SDValue getI32Imm(unsigned Imm) {
+    return CurDAG->getTargetConstant(Imm, MVT::i32);
+  }
+
+  SDNode *Select(SDNode *N);
+
+
+  bool hasNoVMLxHazardUse(SDNode *N) const;
+  bool isShifterOpProfitable(const SDValue &Shift,
+                             ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
+  bool SelectShifterOperandReg(SDValue N, SDValue &A,
+                               SDValue &B, SDValue &C);
+  bool SelectShiftShifterOperandReg(SDValue N, SDValue &A,
+                                    SDValue &B, SDValue &C);
+  bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+  bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
+
+  AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
+                                      SDValue &Offset, SDValue &Opc);
+  bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
+                           SDValue &Opc) {
+    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
+  }
+
+  bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
+                           SDValue &Opc) {
+    return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
+  }
+
+  bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
+                       SDValue &Opc) {
+    SelectAddrMode2Worker(N, Base, Offset, Opc);
+//    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
+    // This always matches one way or another.
+    return true;
+  }
+
+  bool SelectAddrMode2Offset(SDNode *Op, SDValue N,
+                             SDValue &Offset, SDValue &Opc);
+  bool SelectAddrMode3(SDValue N, SDValue &Base,
+                       SDValue &Offset, SDValue &Opc);
+  bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
+                             SDValue &Offset, SDValue &Opc);
+  bool SelectAddrMode5(SDValue N, SDValue &Base,
+                       SDValue &Offset);
+  bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
+  bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
+
+  bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
+
+  // Thumb Addressing Modes:
+  bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
+  bool SelectThumbAddrModeRI(SDValue N, SDValue &Base, SDValue &Offset,
+                             unsigned Scale);
+  bool SelectThumbAddrModeRI5S1(SDValue N, SDValue &Base, SDValue &Offset);
+  bool SelectThumbAddrModeRI5S2(SDValue N, SDValue &Base, SDValue &Offset);
+  bool SelectThumbAddrModeRI5S4(SDValue N, SDValue &Base, SDValue &Offset);
+  bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
+                                SDValue &OffImm);
+  bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+                                 SDValue &OffImm);
+  bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+                                 SDValue &OffImm);
+  bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+                                 SDValue &OffImm);
+  bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
+
+  // Thumb 2 Addressing Modes:
+  bool SelectT2ShifterOperandReg(SDValue N,
+                                 SDValue &BaseReg, SDValue &Opc);
+  bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
+  bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
+                            SDValue &OffImm);
+  bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
+                                 SDValue &OffImm);
+  bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
+                             SDValue &OffReg, SDValue &ShImm);
+
+  inline bool is_so_imm(unsigned Imm) const {
+    return ARM_AM::getSOImmVal(Imm) != -1;
+  }
+
+  inline bool is_so_imm_not(unsigned Imm) const {
+    return ARM_AM::getSOImmVal(~Imm) != -1;
+  }
+
+  inline bool is_t2_so_imm(unsigned Imm) const {
+    return ARM_AM::getT2SOImmVal(Imm) != -1;
+  }
+
+  inline bool is_t2_so_imm_not(unsigned Imm) const {
+    return ARM_AM::getT2SOImmVal(~Imm) != -1;
+  }
+
+  inline bool Pred_so_imm(SDNode *inN) const {
+    ConstantSDNode *N = cast<ConstantSDNode>(inN);
+    return is_so_imm(N->getZExtValue());
+  }
+
+  inline bool Pred_t2_so_imm(SDNode *inN) const {
+    ConstantSDNode *N = cast<ConstantSDNode>(inN);
+    return is_t2_so_imm(N->getZExtValue());
+  }
+
+  // Include the pieces autogenerated from the target description.
+#include "ARMGenDAGISel.inc"
+
+private:
+  /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
+  /// ARM.
+  SDNode *SelectARMIndexedLoad(SDNode *N);
+  SDNode *SelectT2IndexedLoad(SDNode *N);
+
+  /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
+  /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
+  /// loads of D registers and even subregs and odd subregs of Q registers.
+  /// For NumVecs <= 2, QOpcodes1 is not used.
+  SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+                    unsigned *DOpcodes,
+                    unsigned *QOpcodes0, unsigned *QOpcodes1);
+
+  /// SelectVST - Select NEON store intrinsics.  NumVecs should
+  /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
+  /// stores of D registers and even subregs and odd subregs of Q registers.
+  /// For NumVecs <= 2, QOpcodes1 is not used.
+  SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+                    unsigned *DOpcodes,
+                    unsigned *QOpcodes0, unsigned *QOpcodes1);
+
+  /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
+  /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
+  /// load/store of D registers and Q registers.
+  SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
+                          bool isUpdating, unsigned NumVecs,
+                          unsigned *DOpcodes, unsigned *QOpcodes);
+
+  /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
+  /// should be 2, 3 or 4.  The opcode array specifies the instructions used
+  /// for loading D registers.  (Q registers are not supported.)
+  SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
+                       unsigned *Opcodes);
+
+  /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
+  /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
+  /// generated to force the table registers to be consecutive.
+  SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
+
+  /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
+  SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
+
+  /// SelectCMOVOp - Select CMOV instructions for ARM.
+  SDNode *SelectCMOVOp(SDNode *N);
+  SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                              ARMCC::CondCodes CCVal, SDValue CCR,
+                              SDValue InFlag);
+  SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                               ARMCC::CondCodes CCVal, SDValue CCR,
+                               SDValue InFlag);
+  SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                              ARMCC::CondCodes CCVal, SDValue CCR,
+                              SDValue InFlag);
+  SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                               ARMCC::CondCodes CCVal, SDValue CCR,
+                               SDValue InFlag);
+
+  SDNode *SelectConcatVector(SDNode *N);
+
+  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+  /// inline asm expressions.
+  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                            char ConstraintCode,
+                                            std::vector<SDValue> &OutOps);
+
+  // Form pairs of consecutive S, D, or Q registers.
+  SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1);
+  SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
+  SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
+
+  // Form sequences of 4 consecutive S, D, or Q registers.
+  SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+  SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+  SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+
+  // Get the alignment operand for a NEON VLD or VST instruction.
+  SDValue GetVLDSTAlign(SDValue Align, unsigned NumVecs, bool is64BitVector);
+};
+}
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+    Imm = cast<ConstantSDNode>(N)->getZExtValue();
+    return true;
+  }
+  return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+  return isInt32Immediate(N.getNode(), Imm);
+}
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+  return N->getOpcode() == Opc &&
+         isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+/// \brief Check whether a particular node is a constant value representable as
+/// (N * Scale) where (N in [\arg RangeMin, \arg RangeMax).
+///
+/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
+static bool isScaledConstantInRange(SDValue Node, unsigned Scale,
+                                    int RangeMin, int RangeMax,
+                                    int &ScaledConstant) {
+  assert(Scale && "Invalid scale!");
+
+  // Check that this is a constant.
+  const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
+  if (!C)
+    return false;
+
+  ScaledConstant = (int) C->getZExtValue();
+  if ((ScaledConstant % Scale) != 0)
+    return false;
+
+  ScaledConstant /= Scale;
+  return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
+}
+
+/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
+/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
+/// least on current ARM implementations) which should be avoidded.
+bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
+  if (OptLevel == CodeGenOpt::None)
+    return true;
+
+  if (!CheckVMLxHazard)
+    return true;
+
+  if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9())
+    return true;
+
+  if (!N->hasOneUse())
+    return false;
+
+  SDNode *Use = *N->use_begin();
+  if (Use->getOpcode() == ISD::CopyToReg)
+    return true;
+  if (Use->isMachineOpcode()) {
+    const TargetInstrDesc &TID = TII->get(Use->getMachineOpcode());
+    if (TID.mayStore())
+      return true;
+    unsigned Opcode = TID.getOpcode();
+    if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+      return true;
+    // vmlx feeding into another vmlx. We actually want to unfold
+    // the use later in the MLxExpansion pass. e.g.
+    // vmla
+    // vmla (stall 8 cycles)
+    //
+    // vmul (5 cycles)
+    // vadd (5 cycles)
+    // vmla
+    // This adds up to about 18 - 19 cycles.
+    //
+    // vmla
+    // vmul (stall 4 cycles)
+    // vadd adds up to about 14 cycles.
+    return TII->isFpMLxInstruction(Opcode);
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
+                                            ARM_AM::ShiftOpc ShOpcVal,
+                                            unsigned ShAmt) {
+  if (!Subtarget->isCortexA9())
+    return true;
+  if (Shift.hasOneUse())
+    return true;
+  // R << 2 is free.
+  return ShOpcVal == ARM_AM::lsl && ShAmt == 2;
+}
+
+bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N,
+                                              SDValue &BaseReg,
+                                              SDValue &ShReg,
+                                              SDValue &Opc) {
+  if (DisableShifterOp)
+    return false;
+
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    ShReg = CurDAG->getRegister(0, MVT::i32);
+    ShImmVal = RHS->getZExtValue() & 31;
+  } else {
+    ShReg = N.getOperand(1);
+    if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal))
+      return false;
+  }
+  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+                                  MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N,
+                                                   SDValue &BaseReg,
+                                                   SDValue &ShReg,
+                                                   SDValue &Opc) {
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  // Do not check isShifterOpProfitable. This must return true.
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    ShReg = CurDAG->getRegister(0, MVT::i32);
+    ShImmVal = RHS->getZExtValue() & 31;
+  } else {
+    ShReg = N.getOperand(1);
+  }
+  Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
+                                  MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
+                                          SDValue &Base,
+                                          SDValue &OffImm) {
+  // Match simple R + imm12 operands.
+
+  // Base only.
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+      !CurDAG->isBaseWithConstantOffset(N)) {
+    if (N.getOpcode() == ISD::FrameIndex) {
+      // Match frame index.
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+      return true;
+    }
+    
+    if (N.getOpcode() == ARMISD::Wrapper &&
+        !(Subtarget->useMovt() &&
+                     N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+      Base = N.getOperand(0);
+    } else
+      Base = N;
+    OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getZExtValue();
+    if (N.getOpcode() == ISD::SUB)
+      RHSC = -RHSC;
+
+    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+      Base   = N.getOperand(0);
+      if (Base.getOpcode() == ISD::FrameIndex) {
+        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      }
+      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      return true;
+    }
+  }
+
+  // Base only.
+  Base = N;
+  OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+
+
+bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
+                                      SDValue &Opc) {
+  if (N.getOpcode() == ISD::MUL &&
+      (!Subtarget->isCortexA9() || N.hasOneUse())) {
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      // X * [3,5,9] -> X + X * [2,4,8] etc.
+      int RHSC = (int)RHS->getZExtValue();
+      if (RHSC & 1) {
+        RHSC = RHSC & ~1;
+        ARM_AM::AddrOpc AddSub = ARM_AM::add;
+        if (RHSC < 0) {
+          AddSub = ARM_AM::sub;
+          RHSC = - RHSC;
+        }
+        if (isPowerOf2_32(RHSC)) {
+          unsigned ShAmt = Log2_32(RHSC);
+          Base = Offset = N.getOperand(0);
+          Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+                                                            ARM_AM::lsl),
+                                          MVT::i32);
+          return true;
+        }
+      }
+    }
+  }
+
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+      // ISD::OR that is equivalent to an ISD::ADD.
+      !CurDAG->isBaseWithConstantOffset(N))
+    return false;
+
+  // Leave simple R +/- imm12 operands for LDRi12
+  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+    int RHSC;
+    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+                                -0x1000+1, 0x1000, RHSC)) // 12 bits.
+      return false;
+  }
+
+  if (Subtarget->isCortexA9() && !N.hasOneUse())
+    // Compute R +/- (R << N) and reuse it.
+    return false;
+
+  // Otherwise this is R +/- [possibly shifted] R.
+  ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+  unsigned ShAmt = 0;
+
+  Base   = N.getOperand(0);
+  Offset = N.getOperand(1);
+
+  if (ShOpcVal != ARM_AM::no_shift) {
+    // Check to see if the RHS of the shift is a constant, if not, we can't fold
+    // it.
+    if (ConstantSDNode *Sh =
+           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+      ShAmt = Sh->getZExtValue();
+      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+        Offset = N.getOperand(1).getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
+    } else {
+      ShOpcVal = ARM_AM::no_shift;
+    }
+  }
+
+  // Try matching (R shl C) + (R).
+  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+      !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
+    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+    if (ShOpcVal != ARM_AM::no_shift) {
+      // Check to see if the RHS of the shift is a constant, if not, we can't
+      // fold it.
+      if (ConstantSDNode *Sh =
+          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+        ShAmt = Sh->getZExtValue();
+        if (!Subtarget->isCortexA9() ||
+            (N.hasOneUse() &&
+             isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+          Offset = N.getOperand(0).getOperand(0);
+          Base = N.getOperand(1);
+        } else {
+          ShAmt = 0;
+          ShOpcVal = ARM_AM::no_shift;
+        }
+      } else {
+        ShOpcVal = ARM_AM::no_shift;
+      }
+    }
+  }
+
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+                                  MVT::i32);
+  return true;
+}
+
+
+
+
+//-----
+
+AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
+                                                     SDValue &Base,
+                                                     SDValue &Offset,
+                                                     SDValue &Opc) {
+  if (N.getOpcode() == ISD::MUL &&
+      (!Subtarget->isCortexA9() || N.hasOneUse())) {
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      // X * [3,5,9] -> X + X * [2,4,8] etc.
+      int RHSC = (int)RHS->getZExtValue();
+      if (RHSC & 1) {
+        RHSC = RHSC & ~1;
+        ARM_AM::AddrOpc AddSub = ARM_AM::add;
+        if (RHSC < 0) {
+          AddSub = ARM_AM::sub;
+          RHSC = - RHSC;
+        }
+        if (isPowerOf2_32(RHSC)) {
+          unsigned ShAmt = Log2_32(RHSC);
+          Base = Offset = N.getOperand(0);
+          Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
+                                                            ARM_AM::lsl),
+                                          MVT::i32);
+          return AM2_SHOP;
+        }
+      }
+    }
+  }
+
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+      // ISD::OR that is equivalent to an ADD.
+      !CurDAG->isBaseWithConstantOffset(N)) {
+    Base = N;
+    if (N.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    } else if (N.getOpcode() == ARMISD::Wrapper &&
+               !(Subtarget->useMovt() &&
+                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+      Base = N.getOperand(0);
+    }
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+                                                      ARM_AM::no_shift),
+                                    MVT::i32);
+    return AM2_BASE;
+  }
+
+  // Match simple R +/- imm12 operands.
+  if (N.getOpcode() != ISD::SUB) {
+    int RHSC;
+    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+                                -0x1000+1, 0x1000, RHSC)) { // 12 bits.
+      Base = N.getOperand(0);
+      if (Base.getOpcode() == ISD::FrameIndex) {
+        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      }
+      Offset = CurDAG->getRegister(0, MVT::i32);
+
+      ARM_AM::AddrOpc AddSub = ARM_AM::add;
+      if (RHSC < 0) {
+        AddSub = ARM_AM::sub;
+        RHSC = - RHSC;
+      }
+      Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
+                                                        ARM_AM::no_shift),
+                                      MVT::i32);
+      return AM2_BASE;
+    }
+  }
+
+  if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+    // Compute R +/- (R << N) and reuse it.
+    Base = N;
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
+                                                      ARM_AM::no_shift),
+                                    MVT::i32);
+    return AM2_BASE;
+  }
+
+  // Otherwise this is R +/- [possibly shifted] R.
+  ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(1));
+  unsigned ShAmt = 0;
+
+  Base   = N.getOperand(0);
+  Offset = N.getOperand(1);
+
+  if (ShOpcVal != ARM_AM::no_shift) {
+    // Check to see if the RHS of the shift is a constant, if not, we can't fold
+    // it.
+    if (ConstantSDNode *Sh =
+           dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
+      ShAmt = Sh->getZExtValue();
+      if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
+        Offset = N.getOperand(1).getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
+    } else {
+      ShOpcVal = ARM_AM::no_shift;
+    }
+  }
+
+  // Try matching (R shl C) + (R).
+  if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
+      !(Subtarget->isCortexA9() || N.getOperand(0).hasOneUse())) {
+    ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0));
+    if (ShOpcVal != ARM_AM::no_shift) {
+      // Check to see if the RHS of the shift is a constant, if not, we can't
+      // fold it.
+      if (ConstantSDNode *Sh =
+          dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
+        ShAmt = Sh->getZExtValue();
+        if (!Subtarget->isCortexA9() ||
+            (N.hasOneUse() &&
+             isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt))) {
+          Offset = N.getOperand(0).getOperand(0);
+          Base = N.getOperand(1);
+        } else {
+          ShAmt = 0;
+          ShOpcVal = ARM_AM::no_shift;
+        }
+      } else {
+        ShOpcVal = ARM_AM::no_shift;
+      }
+    }
+  }
+
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+                                  MVT::i32);
+  return AM2_SHOP;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode2Offset(SDNode *Op, SDValue N,
+                                            SDValue &Offset, SDValue &Opc) {
+  unsigned Opcode = Op->getOpcode();
+  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+    ? cast<LoadSDNode>(Op)->getAddressingMode()
+    : cast<StoreSDNode>(Op)->getAddressingMode();
+  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+    ? ARM_AM::add : ARM_AM::sub;
+  int Val;
+  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
+                                                      ARM_AM::no_shift),
+                                    MVT::i32);
+    return true;
+  }
+
+  Offset = N;
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+  unsigned ShAmt = 0;
+  if (ShOpcVal != ARM_AM::no_shift) {
+    // Check to see if the RHS of the shift is a constant, if not, we can't fold
+    // it.
+    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      ShAmt = Sh->getZExtValue();
+      if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
+        Offset = N.getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
+    } else {
+      ShOpcVal = ARM_AM::no_shift;
+    }
+  }
+
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
+                                  MVT::i32);
+  return true;
+}
+
+
+bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
+                                      SDValue &Base, SDValue &Offset,
+                                      SDValue &Opc) {
+  if (N.getOpcode() == ISD::SUB) {
+    // X - C  is canonicalize to X + -C, no need to handle it here.
+    Base = N.getOperand(0);
+    Offset = N.getOperand(1);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0),MVT::i32);
+    return true;
+  }
+
+  if (!CurDAG->isBaseWithConstantOffset(N)) {
+    Base = N;
+    if (N.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    }
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32);
+    return true;
+  }
+
+  // If the RHS is +/- imm8, fold into addr mode.
+  int RHSC;
+  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
+                              -256 + 1, 256, RHSC)) { // 8 bits.
+    Base = N.getOperand(0);
+    if (Base.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    }
+    Offset = CurDAG->getRegister(0, MVT::i32);
+
+    ARM_AM::AddrOpc AddSub = ARM_AM::add;
+    if (RHSC < 0) {
+      AddSub = ARM_AM::sub;
+      RHSC = -RHSC;
+    }
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC),MVT::i32);
+    return true;
+  }
+
+  Base = N.getOperand(0);
+  Offset = N.getOperand(1);
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
+                                            SDValue &Offset, SDValue &Opc) {
+  unsigned Opcode = Op->getOpcode();
+  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+    ? cast<LoadSDNode>(Op)->getAddressingMode()
+    : cast<StoreSDNode>(Op)->getAddressingMode();
+  ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
+    ? ARM_AM::add : ARM_AM::sub;
+  int Val;
+  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
+    Offset = CurDAG->getRegister(0, MVT::i32);
+    Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), MVT::i32);
+    return true;
+  }
+
+  Offset = N;
+  Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
+                                      SDValue &Base, SDValue &Offset) {
+  if (!CurDAG->isBaseWithConstantOffset(N)) {
+    Base = N;
+    if (N.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    } else if (N.getOpcode() == ARMISD::Wrapper &&
+               !(Subtarget->useMovt() &&
+                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+      Base = N.getOperand(0);
+    }
+    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+                                       MVT::i32);
+    return true;
+  }
+
+  // If the RHS is +/- imm8, fold into addr mode.
+  int RHSC;
+  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
+                              -256 + 1, 256, RHSC)) {
+    Base = N.getOperand(0);
+    if (Base.getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    }
+
+    ARM_AM::AddrOpc AddSub = ARM_AM::add;
+    if (RHSC < 0) {
+      AddSub = ARM_AM::sub;
+      RHSC = -RHSC;
+    }
+    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
+                                       MVT::i32);
+    return true;
+  }
+
+  Base = N;
+  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+                                     MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
+                                      SDValue &Align) {
+  Addr = N;
+
+  unsigned Alignment = 0;
+  if (LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(Parent)) {
+    // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
+    // The maximum alignment is equal to the memory size being referenced.
+    unsigned LSNAlign = LSN->getAlignment();
+    unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8;
+    if (LSNAlign > MemSize && MemSize > 1)
+      Alignment = MemSize;
+  } else {
+    // All other uses of addrmode6 are for intrinsics.  For now just record
+    // the raw alignment value; it will be refined later based on the legal
+    // alignment operands for the intrinsic.
+    Alignment = cast<MemIntrinsicSDNode>(Parent)->getAlignment();
+  }
+
+  Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
+                                            SDValue &Offset) {
+  LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
+  ISD::MemIndexedMode AM = LdSt->getAddressingMode();
+  if (AM != ISD::POST_INC)
+    return false;
+  Offset = N;
+  if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
+    if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
+      Offset = CurDAG->getRegister(0, MVT::i32);
+  }
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
+                                       SDValue &Offset, SDValue &Label) {
+  if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
+    Offset = N.getOperand(0);
+    SDValue N1 = N.getOperand(1);
+    Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+                                      MVT::i32);
+    return true;
+  }
+
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                         Thumb Addressing Modes
+//===----------------------------------------------------------------------===//
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
+                                            SDValue &Base, SDValue &Offset){
+  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
+    ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
+    if (!NC || !NC->isNullValue())
+      return false;
+
+    Base = Offset = N;
+    return true;
+  }
+
+  Base = N.getOperand(0);
+  Offset = N.getOperand(1);
+  return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI(SDValue N, SDValue &Base,
+                                       SDValue &Offset, unsigned Scale) {
+  if (Scale == 4) {
+    SDValue TmpBase, TmpOffImm;
+    if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
+      return false;  // We want to select tLDRspi / tSTRspi instead.
+
+    if (N.getOpcode() == ARMISD::Wrapper &&
+        N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+      return false;  // We want to select tLDRpci instead.
+  }
+
+  if (!CurDAG->isBaseWithConstantOffset(N))
+    return false;
+
+  // Thumb does not have [sp, r] address mode.
+  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+  RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+  if ((LHSR && LHSR->getReg() == ARM::SP) ||
+      (RHSR && RHSR->getReg() == ARM::SP))
+    return false;
+
+  // FIXME: Why do we explicitly check for a match here and then return false?
+  // Presumably to allow something else to match, but shouldn't this be
+  // documented?
+  int RHSC;
+  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC))
+    return false;
+
+  Base = N.getOperand(0);
+  Offset = N.getOperand(1);
+  return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S1(SDValue N,
+                                          SDValue &Base,
+                                          SDValue &Offset) {
+  return SelectThumbAddrModeRI(N, Base, Offset, 1);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S2(SDValue N,
+                                          SDValue &Base,
+                                          SDValue &Offset) {
+  return SelectThumbAddrModeRI(N, Base, Offset, 2);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeRI5S4(SDValue N,
+                                          SDValue &Base,
+                                          SDValue &Offset) {
+  return SelectThumbAddrModeRI(N, Base, Offset, 4);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
+                                          SDValue &Base, SDValue &OffImm) {
+  if (Scale == 4) {
+    SDValue TmpBase, TmpOffImm;
+    if (SelectThumbAddrModeSP(N, TmpBase, TmpOffImm))
+      return false;  // We want to select tLDRspi / tSTRspi instead.
+
+    if (N.getOpcode() == ARMISD::Wrapper &&
+        N.getOperand(0).getOpcode() == ISD::TargetConstantPool)
+      return false;  // We want to select tLDRpci instead.
+  }
+
+  if (!CurDAG->isBaseWithConstantOffset(N)) {
+    if (N.getOpcode() == ARMISD::Wrapper &&
+        !(Subtarget->useMovt() &&
+          N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+      Base = N.getOperand(0);
+    } else {
+      Base = N;
+    }
+
+    OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+  RegisterSDNode *RHSR = dyn_cast<RegisterSDNode>(N.getOperand(1));
+  if ((LHSR && LHSR->getReg() == ARM::SP) ||
+      (RHSR && RHSR->getReg() == ARM::SP)) {
+    ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(N.getOperand(0));
+    ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    unsigned LHSC = LHS ? LHS->getZExtValue() : 0;
+    unsigned RHSC = RHS ? RHS->getZExtValue() : 0;
+
+    // Thumb does not have [sp, #imm5] address mode for non-zero imm5.
+    if (LHSC != 0 || RHSC != 0) return false;
+
+    Base = N;
+    OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  // If the RHS is + imm5 * scale, fold into addr mode.
+  int RHSC;
+  if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
+    Base = N.getOperand(0);
+    OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+    return true;
+  }
+
+  Base = N.getOperand(0);
+  OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
+                                           SDValue &OffImm) {
+  return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
+                                           SDValue &OffImm) {
+  return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
+}
+
+bool
+ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
+                                           SDValue &OffImm) {
+  return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
+}
+
+bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
+                                            SDValue &Base, SDValue &OffImm) {
+  if (N.getOpcode() == ISD::FrameIndex) {
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    OffImm = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  if (!CurDAG->isBaseWithConstantOffset(N))
+    return false;
+
+  RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
+  if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
+      (LHSR && LHSR->getReg() == ARM::SP)) {
+    // If the RHS is + imm8 * scale, fold into addr mode.
+    int RHSC;
+    if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
+      Base = N.getOperand(0);
+      if (Base.getOpcode() == ISD::FrameIndex) {
+        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      }
+      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                        Thumb 2 Addressing Modes
+//===----------------------------------------------------------------------===//
+
+
+bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue N, SDValue &BaseReg,
+                                                SDValue &Opc) {
+  if (DisableShifterOp)
+    return false;
+
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    ShImmVal = RHS->getZExtValue() & 31;
+    Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
+    return true;
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
+                                            SDValue &Base, SDValue &OffImm) {
+  // Match simple R + imm12 operands.
+
+  // Base only.
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+      !CurDAG->isBaseWithConstantOffset(N)) {
+    if (N.getOpcode() == ISD::FrameIndex) {
+      // Match frame index.
+      int FI = cast<FrameIndexSDNode>(N)->getIndex();
+      Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+      return true;
+    }
+    
+    if (N.getOpcode() == ARMISD::Wrapper &&
+               !(Subtarget->useMovt() &&
+                 N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) {
+      Base = N.getOperand(0);
+      if (Base.getOpcode() == ISD::TargetConstantPool)
+        return false;  // We want to select t2LDRpci instead.
+    } else
+      Base = N;
+    OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    if (SelectT2AddrModeImm8(N, Base, OffImm))
+      // Let t2LDRi8 handle (R - imm8).
+      return false;
+
+    int RHSC = (int)RHS->getZExtValue();
+    if (N.getOpcode() == ISD::SUB)
+      RHSC = -RHSC;
+
+    if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
+      Base   = N.getOperand(0);
+      if (Base.getOpcode() == ISD::FrameIndex) {
+        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      }
+      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      return true;
+    }
+  }
+
+  // Base only.
+  Base = N;
+  OffImm  = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
+                                           SDValue &Base, SDValue &OffImm) {
+  // Match simple R - imm8 operands.
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+      !CurDAG->isBaseWithConstantOffset(N))
+    return false;
+  
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getSExtValue();
+    if (N.getOpcode() == ISD::SUB)
+      RHSC = -RHSC;
+
+    if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
+      Base = N.getOperand(0);
+      if (Base.getOpcode() == ISD::FrameIndex) {
+        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
+        Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+      }
+      OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
+                                                 SDValue &OffImm){
+  unsigned Opcode = Op->getOpcode();
+  ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
+    ? cast<LoadSDNode>(Op)->getAddressingMode()
+    : cast<StoreSDNode>(Op)->getAddressingMode();
+  int RHSC;
+  if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
+    OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
+      ? CurDAG->getTargetConstant(RHSC, MVT::i32)
+      : CurDAG->getTargetConstant(-RHSC, MVT::i32);
+    return true;
+  }
+
+  return false;
+}
+
+bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
+                                            SDValue &Base,
+                                            SDValue &OffReg, SDValue &ShImm) {
+  // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
+  if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
+    return false;
+
+  // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getZExtValue();
+    if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
+      return false;
+    else if (RHSC < 0 && RHSC >= -255) // 8 bits
+      return false;
+  }
+
+  if (Subtarget->isCortexA9() && !N.hasOneUse()) {
+    // Compute R + (R << [1,2,3]) and reuse it.
+    Base = N;
+    return false;
+  }
+
+  // Look for (R + R) or (R + (R << [1,2,3])).
+  unsigned ShAmt = 0;
+  Base   = N.getOperand(0);
+  OffReg = N.getOperand(1);
+
+  // Swap if it is ((R << c) + R).
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg);
+  if (ShOpcVal != ARM_AM::lsl) {
+    ShOpcVal = ARM_AM::getShiftOpcForNode(Base);
+    if (ShOpcVal == ARM_AM::lsl)
+      std::swap(Base, OffReg);
+  }
+
+  if (ShOpcVal == ARM_AM::lsl) {
+    // Check to see if the RHS of the shift is a constant, if not, we can't fold
+    // it.
+    if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
+      ShAmt = Sh->getZExtValue();
+      if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
+        OffReg = OffReg.getOperand(0);
+      else {
+        ShAmt = 0;
+        ShOpcVal = ARM_AM::no_shift;
+      }
+    } else {
+      ShOpcVal = ARM_AM::no_shift;
+    }
+  }
+
+  ShImm = CurDAG->getTargetConstant(ShAmt, MVT::i32);
+
+  return true;
+}
+
+//===--------------------------------------------------------------------===//
+
+/// getAL - Returns a ARMCC::AL immediate node.
+static inline SDValue getAL(SelectionDAG *CurDAG) {
+  return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
+}
+
+SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+  if (AM == ISD::UNINDEXED)
+    return NULL;
+
+  EVT LoadedVT = LD->getMemoryVT();
+  SDValue Offset, AMOpc;
+  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+  unsigned Opcode = 0;
+  bool Match = false;
+  if (LoadedVT == MVT::i32 &&
+      SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) {
+    Opcode = isPre ? ARM::LDR_PRE : ARM::LDR_POST;
+    Match = true;
+  } else if (LoadedVT == MVT::i16 &&
+             SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
+    Match = true;
+    Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
+      ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
+      : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
+  } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
+    if (LD->getExtensionType() == ISD::SEXTLOAD) {
+      if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
+        Match = true;
+        Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
+      }
+    } else {
+      if (SelectAddrMode2Offset(N, LD->getOffset(), Offset, AMOpc)) {
+        Match = true;
+        Opcode = isPre ? ARM::LDRB_PRE : ARM::LDRB_POST;
+      }
+    }
+  }
+
+  if (Match) {
+    SDValue Chain = LD->getChain();
+    SDValue Base = LD->getBasePtr();
+    SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG),
+                     CurDAG->getRegister(0, MVT::i32), Chain };
+    return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
+                                  MVT::Other, Ops, 6);
+  }
+
+  return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+  if (AM == ISD::UNINDEXED)
+    return NULL;
+
+  EVT LoadedVT = LD->getMemoryVT();
+  bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
+  SDValue Offset;
+  bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
+  unsigned Opcode = 0;
+  bool Match = false;
+  if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
+    switch (LoadedVT.getSimpleVT().SimpleTy) {
+    case MVT::i32:
+      Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
+      break;
+    case MVT::i16:
+      if (isSExtLd)
+        Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
+      else
+        Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
+      break;
+    case MVT::i8:
+    case MVT::i1:
+      if (isSExtLd)
+        Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
+      else
+        Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
+      break;
+    default:
+      return NULL;
+    }
+    Match = true;
+  }
+
+  if (Match) {
+    SDValue Chain = LD->getChain();
+    SDValue Base = LD->getBasePtr();
+    SDValue Ops[]= { Base, Offset, getAL(CurDAG),
+                     CurDAG->getRegister(0, MVT::i32), Chain };
+    return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32,
+                                  MVT::Other, Ops, 5);
+  }
+
+  return NULL;
+}
+
+/// PairSRegs - Form a D register from a pair of S registers.
+///
+SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
+/// PairDRegs - Form a quad register from a pair of D registers.
+///
+SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
+/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
+///
+SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
+/// QuadSRegs - Form 4 consecutive S registers.
+///
+SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
+                                   SDValue V2, SDValue V3) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
+  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
+  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// QuadDRegs - Form 4 consecutive D registers.
+///
+SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
+                                   SDValue V2, SDValue V3) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
+  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
+  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// QuadQRegs - Form 4 consecutive Q registers.
+///
+SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
+                                   SDValue V2, SDValue V3) {
+  DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+  SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+  SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
+  SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
+  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
+/// of a NEON VLD or VST instruction.  The supported values depend on the
+/// number of registers being loaded.
+SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs,
+                                       bool is64BitVector) {
+  unsigned NumRegs = NumVecs;
+  if (!is64BitVector && NumVecs < 3)
+    NumRegs *= 2;
+
+  unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+  if (Alignment >= 32 && NumRegs == 4)
+    Alignment = 32;
+  else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
+    Alignment = 16;
+  else if (Alignment >= 8)
+    Alignment = 8;
+  else
+    Alignment = 0;
+
+  return CurDAG->getTargetConstant(Alignment, MVT::i32);
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
+                                   unsigned *DOpcodes, unsigned *QOpcodes0,
+                                   unsigned *QOpcodes1) {
+  assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue MemAddr, Align;
+  unsigned AddrOpIdx = isUpdating ? 1 : 2;
+  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+    return NULL;
+
+  SDValue Chain = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  bool is64BitVector = VT.is64BitVector();
+  Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
+
+  unsigned OpcodeIndex;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("unhandled vld type");
+    // Double-register operations:
+  case MVT::v8i8:  OpcodeIndex = 0; break;
+  case MVT::v4i16: OpcodeIndex = 1; break;
+  case MVT::v2f32:
+  case MVT::v2i32: OpcodeIndex = 2; break;
+  case MVT::v1i64: OpcodeIndex = 3; break;
+    // Quad-register operations:
+  case MVT::v16i8: OpcodeIndex = 0; break;
+  case MVT::v8i16: OpcodeIndex = 1; break;
+  case MVT::v4f32:
+  case MVT::v4i32: OpcodeIndex = 2; break;
+  case MVT::v2i64: OpcodeIndex = 3;
+    assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
+    break;
+  }
+
+  EVT ResTy;
+  if (NumVecs == 1)
+    ResTy = VT;
+  else {
+    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+    if (!is64BitVector)
+      ResTyElts *= 2;
+    ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
+  }
+  std::vector<EVT> ResTys;
+  ResTys.push_back(ResTy);
+  if (isUpdating)
+    ResTys.push_back(MVT::i32);
+  ResTys.push_back(MVT::Other);
+
+  SDValue Pred = getAL(CurDAG);
+  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+  SDNode *VLd;
+  SmallVector<SDValue, 7> Ops;
+
+  // Double registers and VLD1/VLD2 quad registers are directly supported.
+  if (is64BitVector || NumVecs <= 2) {
+    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+                    QOpcodes0[OpcodeIndex]);
+    Ops.push_back(MemAddr);
+    Ops.push_back(Align);
+    if (isUpdating) {
+      SDValue Inc = N->getOperand(AddrOpIdx + 1);
+      Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+    }
+    Ops.push_back(Pred);
+    Ops.push_back(Reg0);
+    Ops.push_back(Chain);
+    VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+
+  } else {
+    // Otherwise, quad registers are loaded with two separate instructions,
+    // where one loads the even registers and the other loads the odd registers.
+    EVT AddrTy = MemAddr.getValueType();
+
+    // Load the even subregs.  This is always an updating load, so that it
+    // provides the address to the second load for the odd subregs.
+    SDValue ImplDef =
+      SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
+    const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
+    SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+                                          ResTy, AddrTy, MVT::Other, OpsA, 7);
+    Chain = SDValue(VLdA, 2);
+
+    // Load the odd subregs.
+    Ops.push_back(SDValue(VLdA, 1));
+    Ops.push_back(Align);
+    if (isUpdating) {
+      SDValue Inc = N->getOperand(AddrOpIdx + 1);
+      assert(isa<ConstantSDNode>(Inc.getNode()) &&
+             "only constant post-increment update allowed for VLD3/4");
+      (void)Inc;
+      Ops.push_back(Reg0);
+    }
+    Ops.push_back(SDValue(VLdA, 0));
+    Ops.push_back(Pred);
+    Ops.push_back(Reg0);
+    Ops.push_back(Chain);
+    VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+                                 Ops.data(), Ops.size());
+  }
+
+  if (NumVecs == 1)
+    return VLd;
+
+  // Extract out the subregisters.
+  SDValue SuperReg = SDValue(VLd, 0);
+  assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+         ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+  unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
+  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+    ReplaceUses(SDValue(N, Vec),
+                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+  ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
+  if (isUpdating)
+    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
+  return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
+                                   unsigned *DOpcodes, unsigned *QOpcodes0,
+                                   unsigned *QOpcodes1) {
+  assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue MemAddr, Align;
+  unsigned AddrOpIdx = isUpdating ? 1 : 2;
+  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+    return NULL;
+
+  SDValue Chain = N->getOperand(0);
+  EVT VT = N->getOperand(Vec0Idx).getValueType();
+  bool is64BitVector = VT.is64BitVector();
+  Align = GetVLDSTAlign(Align, NumVecs, is64BitVector);
+
+  unsigned OpcodeIndex;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("unhandled vst type");
+    // Double-register operations:
+  case MVT::v8i8:  OpcodeIndex = 0; break;
+  case MVT::v4i16: OpcodeIndex = 1; break;
+  case MVT::v2f32:
+  case MVT::v2i32: OpcodeIndex = 2; break;
+  case MVT::v1i64: OpcodeIndex = 3; break;
+    // Quad-register operations:
+  case MVT::v16i8: OpcodeIndex = 0; break;
+  case MVT::v8i16: OpcodeIndex = 1; break;
+  case MVT::v4f32:
+  case MVT::v4i32: OpcodeIndex = 2; break;
+  case MVT::v2i64: OpcodeIndex = 3;
+    assert(NumVecs == 1 && "v2i64 type only supported for VST1");
+    break;
+  }
+
+  std::vector<EVT> ResTys;
+  if (isUpdating)
+    ResTys.push_back(MVT::i32);
+  ResTys.push_back(MVT::Other);
+
+  SDValue Pred = getAL(CurDAG);
+  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+  SmallVector<SDValue, 7> Ops;
+
+  // Double registers and VST1/VST2 quad registers are directly supported.
+  if (is64BitVector || NumVecs <= 2) {
+    SDValue SrcReg;
+    if (NumVecs == 1) {
+      SrcReg = N->getOperand(Vec0Idx);
+    } else if (is64BitVector) {
+      // Form a REG_SEQUENCE to force register allocation.
+      SDValue V0 = N->getOperand(Vec0Idx + 0);
+      SDValue V1 = N->getOperand(Vec0Idx + 1);
+      if (NumVecs == 2)
+        SrcReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+      else {
+        SDValue V2 = N->getOperand(Vec0Idx + 2);
+        // If it's a vst3, form a quad D-register and leave the last part as
+        // an undef.
+        SDValue V3 = (NumVecs == 3)
+          ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+          : N->getOperand(Vec0Idx + 3);
+        SrcReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+      }
+    } else {
+      // Form a QQ register.
+      SDValue Q0 = N->getOperand(Vec0Idx);
+      SDValue Q1 = N->getOperand(Vec0Idx + 1);
+      SrcReg = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
+    }
+
+    unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+                    QOpcodes0[OpcodeIndex]);
+    Ops.push_back(MemAddr);
+    Ops.push_back(Align);
+    if (isUpdating) {
+      SDValue Inc = N->getOperand(AddrOpIdx + 1);
+      Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+    }
+    Ops.push_back(SrcReg);
+    Ops.push_back(Pred);
+    Ops.push_back(Reg0);
+    Ops.push_back(Chain);
+    return CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+  }
+
+  // Otherwise, quad registers are stored with two separate instructions,
+  // where one stores the even registers and the other stores the odd registers.
+
+  // Form the QQQQ REG_SEQUENCE.
+  SDValue V0 = N->getOperand(Vec0Idx + 0);
+  SDValue V1 = N->getOperand(Vec0Idx + 1);
+  SDValue V2 = N->getOperand(Vec0Idx + 2);
+  SDValue V3 = (NumVecs == 3)
+    ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+    : N->getOperand(Vec0Idx + 3);
+  SDValue RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+
+  // Store the even D registers.  This is always an updating store, so that it
+  // provides the address to the second store for the odd subregs.
+  const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
+  SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
+                                        MemAddr.getValueType(),
+                                        MVT::Other, OpsA, 7);
+  Chain = SDValue(VStA, 1);
+
+  // Store the odd D registers.
+  Ops.push_back(SDValue(VStA, 0));
+  Ops.push_back(Align);
+  if (isUpdating) {
+    SDValue Inc = N->getOperand(AddrOpIdx + 1);
+    assert(isa<ConstantSDNode>(Inc.getNode()) &&
+           "only constant post-increment update allowed for VST3/4");
+    (void)Inc;
+    Ops.push_back(Reg0);
+  }
+  Ops.push_back(RegSeq);
+  Ops.push_back(Pred);
+  Ops.push_back(Reg0);
+  Ops.push_back(Chain);
+  return CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
+                                Ops.data(), Ops.size());
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
+                                         bool isUpdating, unsigned NumVecs,
+                                         unsigned *DOpcodes,
+                                         unsigned *QOpcodes) {
+  assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue MemAddr, Align;
+  unsigned AddrOpIdx = isUpdating ? 1 : 2;
+  unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
+  if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
+    return NULL;
+
+  SDValue Chain = N->getOperand(0);
+  unsigned Lane =
+    cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
+  EVT VT = N->getOperand(Vec0Idx).getValueType();
+  bool is64BitVector = VT.is64BitVector();
+
+  unsigned Alignment = 0;
+  if (NumVecs != 3) {
+    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+    unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+    if (Alignment > NumBytes)
+      Alignment = NumBytes;
+    if (Alignment < 8 && Alignment < NumBytes)
+      Alignment = 0;
+    // Alignment must be a power of two; make sure of that.
+    Alignment = (Alignment & -Alignment);
+    if (Alignment == 1)
+      Alignment = 0;
+  }
+  Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+
+  unsigned OpcodeIndex;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("unhandled vld/vst lane type");
+    // Double-register operations:
+  case MVT::v8i8:  OpcodeIndex = 0; break;
+  case MVT::v4i16: OpcodeIndex = 1; break;
+  case MVT::v2f32:
+  case MVT::v2i32: OpcodeIndex = 2; break;
+    // Quad-register operations:
+  case MVT::v8i16: OpcodeIndex = 0; break;
+  case MVT::v4f32:
+  case MVT::v4i32: OpcodeIndex = 1; break;
+  }
+
+  std::vector<EVT> ResTys;
+  if (IsLoad) {
+    unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+    if (!is64BitVector)
+      ResTyElts *= 2;
+    ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
+                                      MVT::i64, ResTyElts));
+  }
+  if (isUpdating)
+    ResTys.push_back(MVT::i32);
+  ResTys.push_back(MVT::Other);
+
+  SDValue Pred = getAL(CurDAG);
+  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(MemAddr);
+  Ops.push_back(Align);
+  if (isUpdating) {
+    SDValue Inc = N->getOperand(AddrOpIdx + 1);
+    Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+  }
+
+  SDValue SuperReg;
+  SDValue V0 = N->getOperand(Vec0Idx + 0);
+  SDValue V1 = N->getOperand(Vec0Idx + 1);
+  if (NumVecs == 2) {
+    if (is64BitVector)
+      SuperReg = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+    else
+      SuperReg = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
+  } else {
+    SDValue V2 = N->getOperand(Vec0Idx + 2);
+    SDValue V3 = (NumVecs == 3)
+      ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+      : N->getOperand(Vec0Idx + 3);
+    if (is64BitVector)
+      SuperReg = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+    else
+      SuperReg = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+  }
+  Ops.push_back(SuperReg);
+  Ops.push_back(getI32Imm(Lane));
+  Ops.push_back(Pred);
+  Ops.push_back(Reg0);
+  Ops.push_back(Chain);
+
+  unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
+                                  QOpcodes[OpcodeIndex]);
+  SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys,
+                                         Ops.data(), Ops.size());
+  if (!IsLoad)
+    return VLdLn;
+
+  // Extract the subregisters.
+  SuperReg = SDValue(VLdLn, 0);
+  assert(ARM::dsub_7 == ARM::dsub_0+7 &&
+         ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+  unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+    ReplaceUses(SDValue(N, Vec),
+                CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
+  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
+  if (isUpdating)
+    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
+  return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
+                                      unsigned NumVecs, unsigned *Opcodes) {
+  assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue MemAddr, Align;
+  if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
+    return NULL;
+
+  SDValue Chain = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+
+  unsigned Alignment = 0;
+  if (NumVecs != 3) {
+    Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
+    unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+    if (Alignment > NumBytes)
+      Alignment = NumBytes;
+    if (Alignment < 8 && Alignment < NumBytes)
+      Alignment = 0;
+    // Alignment must be a power of two; make sure of that.
+    Alignment = (Alignment & -Alignment);
+    if (Alignment == 1)
+      Alignment = 0;
+  }
+  Align = CurDAG->getTargetConstant(Alignment, MVT::i32);
+
+  unsigned OpcodeIndex;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("unhandled vld-dup type");
+  case MVT::v8i8:  OpcodeIndex = 0; break;
+  case MVT::v4i16: OpcodeIndex = 1; break;
+  case MVT::v2f32:
+  case MVT::v2i32: OpcodeIndex = 2; break;
+  }
+
+  SDValue Pred = getAL(CurDAG);
+  SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+  SDValue SuperReg;
+  unsigned Opc = Opcodes[OpcodeIndex];
+  SmallVector<SDValue, 6> Ops;
+  Ops.push_back(MemAddr);
+  Ops.push_back(Align);
+  if (isUpdating) {
+    SDValue Inc = N->getOperand(2);
+    Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
+  }
+  Ops.push_back(Pred);
+  Ops.push_back(Reg0);
+  Ops.push_back(Chain);
+
+  unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
+  std::vector<EVT> ResTys;
+  ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts));
+  if (isUpdating)
+    ResTys.push_back(MVT::i32);
+  ResTys.push_back(MVT::Other);
+  SDNode *VLdDup =
+    CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size());
+  SuperReg = SDValue(VLdDup, 0);
+
+  // Extract the subregisters.
+  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+  unsigned SubIdx = ARM::dsub_0;
+  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+    ReplaceUses(SDValue(N, Vec),
+                CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+  ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
+  if (isUpdating)
+    ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
+  return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
+                                    unsigned Opc) {
+  assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  unsigned FirstTblReg = IsExt ? 2 : 1;
+
+  // Form a REG_SEQUENCE to force register allocation.
+  SDValue RegSeq;
+  SDValue V0 = N->getOperand(FirstTblReg + 0);
+  SDValue V1 = N->getOperand(FirstTblReg + 1);
+  if (NumVecs == 2)
+    RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+  else {
+    SDValue V2 = N->getOperand(FirstTblReg + 2);
+    // If it's a vtbl3, form a quad D-register and leave the last part as
+    // an undef.
+    SDValue V3 = (NumVecs == 3)
+      ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+      : N->getOperand(FirstTblReg + 3);
+    RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+  }
+
+  SmallVector<SDValue, 6> Ops;
+  if (IsExt)
+    Ops.push_back(N->getOperand(1));
+  Ops.push_back(RegSeq);
+  Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
+  Ops.push_back(getAL(CurDAG)); // predicate
+  Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
+  return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
+}
+
+SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
+                                                     bool isSigned) {
+  if (!Subtarget->hasV6T2Ops())
+    return NULL;
+
+  unsigned Opc = isSigned ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
+    : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
+
+
+  // For unsigned extracts, check for a shift right and mask
+  unsigned And_imm = 0;
+  if (N->getOpcode() == ISD::AND) {
+    if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
+
+      // The immediate is a mask of the low bits iff imm & (imm+1) == 0
+      if (And_imm & (And_imm + 1))
+        return NULL;
+
+      unsigned Srl_imm = 0;
+      if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
+                                Srl_imm)) {
+        assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+
+        unsigned Width = CountTrailingOnes_32(And_imm);
+        unsigned LSB = Srl_imm;
+        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+        SDValue Ops[] = { N->getOperand(0).getOperand(0),
+                          CurDAG->getTargetConstant(LSB, MVT::i32),
+                          CurDAG->getTargetConstant(Width, MVT::i32),
+          getAL(CurDAG), Reg0 };
+        return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+      }
+    }
+    return NULL;
+  }
+
+  // Otherwise, we're looking for a shift of a shift
+  unsigned Shl_imm = 0;
+  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
+    assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
+    unsigned Srl_imm = 0;
+    if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
+      assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
+      unsigned Width = 32 - Srl_imm;
+      int LSB = Srl_imm - Shl_imm;
+      if (LSB < 0)
+        return NULL;
+      SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+      SDValue Ops[] = { N->getOperand(0).getOperand(0),
+                        CurDAG->getTargetConstant(LSB, MVT::i32),
+                        CurDAG->getTargetConstant(Width, MVT::i32),
+                        getAL(CurDAG), Reg0 };
+      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+    }
+  }
+  return NULL;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                    ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+  SDValue CPTmp0;
+  SDValue CPTmp1;
+  if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) {
+    unsigned SOVal = cast<ConstantSDNode>(CPTmp1)->getZExtValue();
+    unsigned SOShOp = ARM_AM::getSORegShOp(SOVal);
+    unsigned Opc = 0;
+    switch (SOShOp) {
+    case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break;
+    case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break;
+    case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break;
+    case ARM_AM::ror: Opc = ARM::t2MOVCCror; break;
+    default:
+      llvm_unreachable("Unknown so_reg opcode!");
+      break;
+    }
+    SDValue SOShImm =
+      CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32);
+    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+    SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6);
+  }
+  return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                     ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+  SDValue CPTmp0;
+  SDValue CPTmp1;
+  SDValue CPTmp2;
+  if (SelectShifterOperandReg(TrueVal, CPTmp0, CPTmp1, CPTmp2)) {
+    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+    SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag };
+    return CurDAG->SelectNodeTo(N, ARM::MOVCCs, MVT::i32, Ops, 7);
+  }
+  return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                  ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+  ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+  if (!T)
+    return 0;
+
+  unsigned Opc = 0;
+  unsigned TrueImm = T->getZExtValue();
+  if (is_t2_so_imm(TrueImm)) {
+    Opc = ARM::t2MOVCCi;
+  } else if (TrueImm <= 0xffff) {
+    Opc = ARM::t2MOVCCi16;
+  } else if (is_t2_so_imm_not(TrueImm)) {
+    TrueImm = ~TrueImm;
+    Opc = ARM::t2MVNCCi;
+  } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) {
+    // Large immediate.
+    Opc = ARM::t2MOVCCi32imm;
+  }
+
+  if (Opc) {
+    SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+    SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+  }
+
+  return 0;
+}
+
+SDNode *ARMDAGToDAGISel::
+SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal,
+                   ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) {
+  ConstantSDNode *T = dyn_cast<ConstantSDNode>(TrueVal);
+  if (!T)
+    return 0;
+
+  unsigned Opc = 0;
+  unsigned TrueImm = T->getZExtValue();
+  bool isSoImm = is_so_imm(TrueImm);
+  if (isSoImm) {
+    Opc = ARM::MOVCCi;
+  } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) {
+    Opc = ARM::MOVCCi16;
+  } else if (is_so_imm_not(TrueImm)) {
+    TrueImm = ~TrueImm;
+    Opc = ARM::MVNCCi;
+  } else if (TrueVal.getNode()->hasOneUse() &&
+             (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) {
+    // Large immediate.
+    Opc = ARM::MOVCCi32imm;
+  }
+
+  if (Opc) {
+    SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32);
+    SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32);
+    SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag };
+    return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+  }
+
+  return 0;
+}
+
+SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  SDValue FalseVal = N->getOperand(0);
+  SDValue TrueVal  = N->getOperand(1);
+  SDValue CC = N->getOperand(2);
+  SDValue CCR = N->getOperand(3);
+  SDValue InFlag = N->getOperand(4);
+  assert(CC.getOpcode() == ISD::Constant);
+  assert(CCR.getOpcode() == ISD::Register);
+  ARMCC::CondCodes CCVal =
+    (ARMCC::CondCodes)cast<ConstantSDNode>(CC)->getZExtValue();
+
+  if (!Subtarget->isThumb1Only() && VT == MVT::i32) {
+    // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+    // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc)
+    // Pattern complexity = 18  cost = 1  size = 0
+    SDValue CPTmp0;
+    SDValue CPTmp1;
+    SDValue CPTmp2;
+    if (Subtarget->isThumb()) {
+      SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal,
+                                        CCVal, CCR, InFlag);
+      if (!Res)
+        Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal,
+                               ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+      if (Res)
+        return Res;
+    } else {
+      SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal,
+                                         CCVal, CCR, InFlag);
+      if (!Res)
+        Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal,
+                               ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+      if (Res)
+        return Res;
+    }
+
+    // Pattern: (ARMcmov:i32 GPR:i32:$false,
+    //             (imm:i32)<<P:Pred_so_imm>>:$true,
+    //             (imm:i32):$cc)
+    // Emits: (MOVCCi:i32 GPR:i32:$false,
+    //           (so_imm:i32 (imm:i32):$true), (imm:i32):$cc)
+    // Pattern complexity = 10  cost = 1  size = 0
+    if (Subtarget->isThumb()) {
+      SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal,
+                                        CCVal, CCR, InFlag);
+      if (!Res)
+        Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal,
+                               ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+      if (Res)
+        return Res;
+    } else {
+      SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal,
+                                         CCVal, CCR, InFlag);
+      if (!Res)
+        Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal,
+                               ARMCC::getOppositeCondition(CCVal), CCR, InFlag);
+      if (Res)
+        return Res;
+    }
+  }
+
+  // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+  // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+  // Pattern complexity = 6  cost = 1  size = 0
+  //
+  // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+  // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc)
+  // Pattern complexity = 6  cost = 11  size = 0
+  //
+  // Also FCPYScc and FCPYDcc.
+  SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32);
+  SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag };
+  unsigned Opc = 0;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: assert(false && "Illegal conditional move type!");
+    break;
+  case MVT::i32:
+    Opc = Subtarget->isThumb()
+      ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo)
+      : ARM::MOVCCr;
+    break;
+  case MVT::f32:
+    Opc = ARM::VMOVScc;
+    break;
+  case MVT::f64:
+    Opc = ARM::VMOVDcc;
+    break;
+  }
+  return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
+}
+
+SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
+  // The only time a CONCAT_VECTORS operation can have legal types is when
+  // two 64-bit vectors are concatenated to a 128-bit vector.
+  EVT VT = N->getValueType(0);
+  if (!VT.is128BitVector() || N->getNumOperands() != 2)
+    llvm_unreachable("unexpected CONCAT_VECTORS");
+  return PairDRegs(VT, N->getOperand(0), N->getOperand(1));
+}
+
+SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+
+  if (N->isMachineOpcode())
+    return NULL;   // Already selected.
+
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::Constant: {
+    unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
+    bool UseCP = true;
+    if (Subtarget->hasThumb2())
+      // Thumb2-aware targets have the MOVT instruction, so all immediates can
+      // be done with MOV + MOVT, at worst.
+      UseCP = 0;
+    else {
+      if (Subtarget->isThumb()) {
+        UseCP = (Val > 255 &&                          // MOV
+                 ~Val > 255 &&                         // MOV + MVN
+                 !ARM_AM::isThumbImmShiftedVal(Val));  // MOV + LSL
+      } else
+        UseCP = (ARM_AM::getSOImmVal(Val) == -1 &&     // MOV
+                 ARM_AM::getSOImmVal(~Val) == -1 &&    // MVN
+                 !ARM_AM::isSOImmTwoPartVal(Val));     // two instrs.
+    }
+
+    if (UseCP) {
+      SDValue CPIdx =
+        CurDAG->getTargetConstantPool(ConstantInt::get(
+                                  Type::getInt32Ty(*CurDAG->getContext()), Val),
+                                      TLI.getPointerTy());
+
+      SDNode *ResNode;
+      if (Subtarget->isThumb1Only()) {
+        SDValue Pred = getAL(CurDAG);
+        SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+        SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
+        ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
+                                         Ops, 4);
+      } else {
+        SDValue Ops[] = {
+          CPIdx,
+          CurDAG->getTargetConstant(0, MVT::i32),
+          getAL(CurDAG),
+          CurDAG->getRegister(0, MVT::i32),
+          CurDAG->getEntryNode()
+        };
+        ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
+                                       Ops, 5);
+      }
+      ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
+      return NULL;
+    }
+
+    // Other cases are autogenerated.
+    break;
+  }
+  case ISD::FrameIndex: {
+    // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy());
+    if (Subtarget->isThumb1Only()) {
+      return CurDAG->SelectNodeTo(N, ARM::tADDrSPi, MVT::i32, TFI,
+                                  CurDAG->getTargetConstant(0, MVT::i32));
+    } else {
+      unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
+                      ARM::t2ADDri : ARM::ADDri);
+      SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                        CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5);
+    }
+  }
+  case ISD::SRL:
+    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+      return I;
+    break;
+  case ISD::SRA:
+    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
+      return I;
+    break;
+  case ISD::MUL:
+    if (Subtarget->isThumb1Only())
+      break;
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+      unsigned RHSV = C->getZExtValue();
+      if (!RHSV) break;
+      if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
+        unsigned ShImm = Log2_32(RHSV-1);
+        if (ShImm >= 32)
+          break;
+        SDValue V = N->getOperand(0);
+        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
+        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
+        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+        if (Subtarget->isThumb()) {
+          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+          return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops, 6);
+        } else {
+          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+          return CurDAG->SelectNodeTo(N, ARM::ADDrs, MVT::i32, Ops, 7);
+        }
+      }
+      if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
+        unsigned ShImm = Log2_32(RHSV+1);
+        if (ShImm >= 32)
+          break;
+        SDValue V = N->getOperand(0);
+        ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
+        SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
+        SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+        if (Subtarget->isThumb()) {
+          SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+          return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
+        } else {
+          SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+          return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7);
+        }
+      }
+    }
+    break;
+  case ISD::AND: {
+    // Check for unsigned bitfield extract
+    if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
+      return I;
+
+    // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
+    // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
+    // are entirely contributed by c2 and lower 16-bits are entirely contributed
+    // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
+    // Select it to: "movt x, ((c1 & 0xffff) >> 16)
+    EVT VT = N->getValueType(0);
+    if (VT != MVT::i32)
+      break;
+    unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
+      ? ARM::t2MOVTi16
+      : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
+    if (!Opc)
+      break;
+    SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+    if (!N1C)
+      break;
+    if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
+      SDValue N2 = N0.getOperand(1);
+      ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+      if (!N2C)
+        break;
+      unsigned N1CVal = N1C->getZExtValue();
+      unsigned N2CVal = N2C->getZExtValue();
+      if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
+          (N1CVal & 0xffffU) == 0xffffU &&
+          (N2CVal & 0xffffU) == 0x0U) {
+        SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
+                                                  MVT::i32);
+        SDValue Ops[] = { N0.getOperand(0), Imm16,
+                          getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+        return CurDAG->getMachineNode(Opc, dl, VT, Ops, 4);
+      }
+    }
+    break;
+  }
+  case ARMISD::VMOVRRD:
+    return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
+                                  N->getOperand(0), getAL(CurDAG),
+                                  CurDAG->getRegister(0, MVT::i32));
+  case ISD::UMUL_LOHI: {
+    if (Subtarget->isThumb1Only())
+      break;
+    if (Subtarget->isThumb()) {
+      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                        CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
+    } else {
+      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                        CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+                                    ARM::UMULL : ARM::UMULLv5,
+                                    dl, MVT::i32, MVT::i32, Ops, 5);
+    }
+  }
+  case ISD::SMUL_LOHI: {
+    if (Subtarget->isThumb1Only())
+      break;
+    if (Subtarget->isThumb()) {
+      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
+    } else {
+      SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                        getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
+                        CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
+                                    ARM::SMULL : ARM::SMULLv5,
+                                    dl, MVT::i32, MVT::i32, Ops, 5);
+    }
+  }
+  case ISD::LOAD: {
+    SDNode *ResNode = 0;
+    if (Subtarget->isThumb() && Subtarget->hasThumb2())
+      ResNode = SelectT2IndexedLoad(N);
+    else
+      ResNode = SelectARMIndexedLoad(N);
+    if (ResNode)
+      return ResNode;
+    // Other cases are autogenerated.
+    break;
+  }
+  case ARMISD::BRCOND: {
+    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+    // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+    // Pattern complexity = 6  cost = 1  size = 0
+
+    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+    // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
+    // Pattern complexity = 6  cost = 1  size = 0
+
+    // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
+    // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
+    // Pattern complexity = 6  cost = 1  size = 0
+
+    unsigned Opc = Subtarget->isThumb() ?
+      ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
+    SDValue Chain = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
+    SDValue N3 = N->getOperand(3);
+    SDValue InFlag = N->getOperand(4);
+    assert(N1.getOpcode() == ISD::BasicBlock);
+    assert(N2.getOpcode() == ISD::Constant);
+    assert(N3.getOpcode() == ISD::Register);
+
+    SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
+                               cast<ConstantSDNode>(N2)->getZExtValue()),
+                               MVT::i32);
+    SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
+    SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
+                                             MVT::Glue, Ops, 5);
+    Chain = SDValue(ResNode, 0);
+    if (N->getNumValues() == 2) {
+      InFlag = SDValue(ResNode, 1);
+      ReplaceUses(SDValue(N, 1), InFlag);
+    }
+    ReplaceUses(SDValue(N, 0),
+                SDValue(Chain.getNode(), Chain.getResNo()));
+    return NULL;
+  }
+  case ARMISD::CMOV:
+    return SelectCMOVOp(N);
+  case ARMISD::VZIP: {
+    unsigned Opc = 0;
+    EVT VT = N->getValueType(0);
+    switch (VT.getSimpleVT().SimpleTy) {
+    default: return NULL;
+    case MVT::v8i8:  Opc = ARM::VZIPd8; break;
+    case MVT::v4i16: Opc = ARM::VZIPd16; break;
+    case MVT::v2f32:
+    case MVT::v2i32: Opc = ARM::VZIPd32; break;
+    case MVT::v16i8: Opc = ARM::VZIPq8; break;
+    case MVT::v8i16: Opc = ARM::VZIPq16; break;
+    case MVT::v4f32:
+    case MVT::v4i32: Opc = ARM::VZIPq32; break;
+    }
+    SDValue Pred = getAL(CurDAG);
+    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+  }
+  case ARMISD::VUZP: {
+    unsigned Opc = 0;
+    EVT VT = N->getValueType(0);
+    switch (VT.getSimpleVT().SimpleTy) {
+    default: return NULL;
+    case MVT::v8i8:  Opc = ARM::VUZPd8; break;
+    case MVT::v4i16: Opc = ARM::VUZPd16; break;
+    case MVT::v2f32:
+    case MVT::v2i32: Opc = ARM::VUZPd32; break;
+    case MVT::v16i8: Opc = ARM::VUZPq8; break;
+    case MVT::v8i16: Opc = ARM::VUZPq16; break;
+    case MVT::v4f32:
+    case MVT::v4i32: Opc = ARM::VUZPq32; break;
+    }
+    SDValue Pred = getAL(CurDAG);
+    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+  }
+  case ARMISD::VTRN: {
+    unsigned Opc = 0;
+    EVT VT = N->getValueType(0);
+    switch (VT.getSimpleVT().SimpleTy) {
+    default: return NULL;
+    case MVT::v8i8:  Opc = ARM::VTRNd8; break;
+    case MVT::v4i16: Opc = ARM::VTRNd16; break;
+    case MVT::v2f32:
+    case MVT::v2i32: Opc = ARM::VTRNd32; break;
+    case MVT::v16i8: Opc = ARM::VTRNq8; break;
+    case MVT::v8i16: Opc = ARM::VTRNq16; break;
+    case MVT::v4f32:
+    case MVT::v4i32: Opc = ARM::VTRNq32; break;
+    }
+    SDValue Pred = getAL(CurDAG);
+    SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+    SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
+    return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
+  }
+  case ARMISD::BUILD_VECTOR: {
+    EVT VecVT = N->getValueType(0);
+    EVT EltVT = VecVT.getVectorElementType();
+    unsigned NumElts = VecVT.getVectorNumElements();
+    if (EltVT == MVT::f64) {
+      assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
+      return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
+    }
+    assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
+    if (NumElts == 2)
+      return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
+    assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
+    return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1),
+                     N->getOperand(2), N->getOperand(3));
+  }
+
+  case ARMISD::VLD2DUP: {
+    unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd16Pseudo,
+                           ARM::VLD2DUPd32Pseudo };
+    return SelectVLDDup(N, false, 2, Opcodes);
+  }
+
+  case ARMISD::VLD3DUP: {
+    unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd16Pseudo,
+                           ARM::VLD3DUPd32Pseudo };
+    return SelectVLDDup(N, false, 3, Opcodes);
+  }
+
+  case ARMISD::VLD4DUP: {
+    unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd16Pseudo,
+                           ARM::VLD4DUPd32Pseudo };
+    return SelectVLDDup(N, false, 4, Opcodes);
+  }
+
+  case ARMISD::VLD2DUP_UPD: {
+    unsigned Opcodes[] = { ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd16Pseudo_UPD,
+                           ARM::VLD2DUPd32Pseudo_UPD };
+    return SelectVLDDup(N, true, 2, Opcodes);
+  }
+
+  case ARMISD::VLD3DUP_UPD: {
+    unsigned Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd16Pseudo_UPD,
+                           ARM::VLD3DUPd32Pseudo_UPD };
+    return SelectVLDDup(N, true, 3, Opcodes);
+  }
+
+  case ARMISD::VLD4DUP_UPD: {
+    unsigned Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd16Pseudo_UPD,
+                           ARM::VLD4DUPd32Pseudo_UPD };
+    return SelectVLDDup(N, true, 4, Opcodes);
+  }
+
+  case ARMISD::VLD1_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD,
+                            ARM::VLD1d32_UPD, ARM::VLD1d64_UPD };
+    unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD,
+                            ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+    return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0);
+  }
+
+  case ARMISD::VLD2_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD,
+                            ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD,
+                            ARM::VLD2q32Pseudo_UPD };
+    return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0);
+  }
+
+  case ARMISD::VLD3_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD,
+                            ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD };
+    unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+                             ARM::VLD3q16Pseudo_UPD,
+                             ARM::VLD3q32Pseudo_UPD };
+    unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
+                             ARM::VLD3q16oddPseudo_UPD,
+                             ARM::VLD3q32oddPseudo_UPD };
+    return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+  }
+
+  case ARMISD::VLD4_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD,
+                            ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD };
+    unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+                             ARM::VLD4q16Pseudo_UPD,
+                             ARM::VLD4q32Pseudo_UPD };
+    unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
+                             ARM::VLD4q16oddPseudo_UPD,
+                             ARM::VLD4q32oddPseudo_UPD };
+    return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+  }
+
+  case ARMISD::VLD2LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd16Pseudo_UPD,
+                            ARM::VLD2LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
+                            ARM::VLD2LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
+  }
+
+  case ARMISD::VLD3LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd16Pseudo_UPD,
+                            ARM::VLD3LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
+                            ARM::VLD3LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
+  }
+
+  case ARMISD::VLD4LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd16Pseudo_UPD,
+                            ARM::VLD4LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
+                            ARM::VLD4LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
+  }
+
+  case ARMISD::VST1_UPD: {
+    unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD,
+                            ARM::VST1d32_UPD, ARM::VST1d64_UPD };
+    unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD,
+                            ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+    return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0);
+  }
+
+  case ARMISD::VST2_UPD: {
+    unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD,
+                            ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD,
+                            ARM::VST2q32Pseudo_UPD };
+    return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0);
+  }
+
+  case ARMISD::VST3_UPD: {
+    unsigned DOpcodes[] = { ARM::VST3d8Pseudo_UPD, ARM::VST3d16Pseudo_UPD,
+                            ARM::VST3d32Pseudo_UPD, ARM::VST1d64TPseudo_UPD };
+    unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+                             ARM::VST3q16Pseudo_UPD,
+                             ARM::VST3q32Pseudo_UPD };
+    unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
+                             ARM::VST3q16oddPseudo_UPD,
+                             ARM::VST3q32oddPseudo_UPD };
+    return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
+  }
+
+  case ARMISD::VST4_UPD: {
+    unsigned DOpcodes[] = { ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD,
+                            ARM::VST4d32Pseudo_UPD, ARM::VST1d64QPseudo_UPD };
+    unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+                             ARM::VST4q16Pseudo_UPD,
+                             ARM::VST4q32Pseudo_UPD };
+    unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
+                             ARM::VST4q16oddPseudo_UPD,
+                             ARM::VST4q32oddPseudo_UPD };
+    return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
+  }
+
+  case ARMISD::VST2LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd16Pseudo_UPD,
+                            ARM::VST2LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
+                            ARM::VST2LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
+  }
+
+  case ARMISD::VST3LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd16Pseudo_UPD,
+                            ARM::VST3LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
+                            ARM::VST3LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
+  }
+
+  case ARMISD::VST4LN_UPD: {
+    unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd16Pseudo_UPD,
+                            ARM::VST4LNd32Pseudo_UPD };
+    unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
+                            ARM::VST4LNq32Pseudo_UPD };
+    return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
+  }
+
+  case ISD::INTRINSIC_VOID:
+  case ISD::INTRINSIC_W_CHAIN: {
+    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+    switch (IntNo) {
+    default:
+      break;
+
+    case Intrinsic::arm_neon_vld1: {
+      unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
+                              ARM::VLD1d32, ARM::VLD1d64 };
+      unsigned QOpcodes[] = { ARM::VLD1q8Pseudo, ARM::VLD1q16Pseudo,
+                              ARM::VLD1q32Pseudo, ARM::VLD1q64Pseudo };
+      return SelectVLD(N, false, 1, DOpcodes, QOpcodes, 0);
+    }
+
+    case Intrinsic::arm_neon_vld2: {
+      unsigned DOpcodes[] = { ARM::VLD2d8Pseudo, ARM::VLD2d16Pseudo,
+                              ARM::VLD2d32Pseudo, ARM::VLD1q64Pseudo };
+      unsigned QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
+                              ARM::VLD2q32Pseudo };
+      return SelectVLD(N, false, 2, DOpcodes, QOpcodes, 0);
+    }
+
+    case Intrinsic::arm_neon_vld3: {
+      unsigned DOpcodes[] = { ARM::VLD3d8Pseudo, ARM::VLD3d16Pseudo,
+                              ARM::VLD3d32Pseudo, ARM::VLD1d64TPseudo };
+      unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
+                               ARM::VLD3q16Pseudo_UPD,
+                               ARM::VLD3q32Pseudo_UPD };
+      unsigned QOpcodes1[] = { ARM::VLD3q8oddPseudo,
+                               ARM::VLD3q16oddPseudo,
+                               ARM::VLD3q32oddPseudo };
+      return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+    }
+
+    case Intrinsic::arm_neon_vld4: {
+      unsigned DOpcodes[] = { ARM::VLD4d8Pseudo, ARM::VLD4d16Pseudo,
+                              ARM::VLD4d32Pseudo, ARM::VLD1d64QPseudo };
+      unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
+                               ARM::VLD4q16Pseudo_UPD,
+                               ARM::VLD4q32Pseudo_UPD };
+      unsigned QOpcodes1[] = { ARM::VLD4q8oddPseudo,
+                               ARM::VLD4q16oddPseudo,
+                               ARM::VLD4q32oddPseudo };
+      return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+    }
+
+    case Intrinsic::arm_neon_vld2lane: {
+      unsigned DOpcodes[] = { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd16Pseudo,
+                              ARM::VLD2LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq32Pseudo };
+      return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
+    }
+
+    case Intrinsic::arm_neon_vld3lane: {
+      unsigned DOpcodes[] = { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd16Pseudo,
+                              ARM::VLD3LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq32Pseudo };
+      return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
+    }
+
+    case Intrinsic::arm_neon_vld4lane: {
+      unsigned DOpcodes[] = { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd16Pseudo,
+                              ARM::VLD4LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq32Pseudo };
+      return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
+    }
+
+    case Intrinsic::arm_neon_vst1: {
+      unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
+                              ARM::VST1d32, ARM::VST1d64 };
+      unsigned QOpcodes[] = { ARM::VST1q8Pseudo, ARM::VST1q16Pseudo,
+                              ARM::VST1q32Pseudo, ARM::VST1q64Pseudo };
+      return SelectVST(N, false, 1, DOpcodes, QOpcodes, 0);
+    }
+
+    case Intrinsic::arm_neon_vst2: {
+      unsigned DOpcodes[] = { ARM::VST2d8Pseudo, ARM::VST2d16Pseudo,
+                              ARM::VST2d32Pseudo, ARM::VST1q64Pseudo };
+      unsigned QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
+                              ARM::VST2q32Pseudo };
+      return SelectVST(N, false, 2, DOpcodes, QOpcodes, 0);
+    }
+
+    case Intrinsic::arm_neon_vst3: {
+      unsigned DOpcodes[] = { ARM::VST3d8Pseudo, ARM::VST3d16Pseudo,
+                              ARM::VST3d32Pseudo, ARM::VST1d64TPseudo };
+      unsigned QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
+                               ARM::VST3q16Pseudo_UPD,
+                               ARM::VST3q32Pseudo_UPD };
+      unsigned QOpcodes1[] = { ARM::VST3q8oddPseudo,
+                               ARM::VST3q16oddPseudo,
+                               ARM::VST3q32oddPseudo };
+      return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
+    }
+
+    case Intrinsic::arm_neon_vst4: {
+      unsigned DOpcodes[] = { ARM::VST4d8Pseudo, ARM::VST4d16Pseudo,
+                              ARM::VST4d32Pseudo, ARM::VST1d64QPseudo };
+      unsigned QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
+                               ARM::VST4q16Pseudo_UPD,
+                               ARM::VST4q32Pseudo_UPD };
+      unsigned QOpcodes1[] = { ARM::VST4q8oddPseudo,
+                               ARM::VST4q16oddPseudo,
+                               ARM::VST4q32oddPseudo };
+      return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
+    }
+
+    case Intrinsic::arm_neon_vst2lane: {
+      unsigned DOpcodes[] = { ARM::VST2LNd8Pseudo, ARM::VST2LNd16Pseudo,
+                              ARM::VST2LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VST2LNq16Pseudo, ARM::VST2LNq32Pseudo };
+      return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
+    }
+
+    case Intrinsic::arm_neon_vst3lane: {
+      unsigned DOpcodes[] = { ARM::VST3LNd8Pseudo, ARM::VST3LNd16Pseudo,
+                              ARM::VST3LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VST3LNq16Pseudo, ARM::VST3LNq32Pseudo };
+      return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
+    }
+
+    case Intrinsic::arm_neon_vst4lane: {
+      unsigned DOpcodes[] = { ARM::VST4LNd8Pseudo, ARM::VST4LNd16Pseudo,
+                              ARM::VST4LNd32Pseudo };
+      unsigned QOpcodes[] = { ARM::VST4LNq16Pseudo, ARM::VST4LNq32Pseudo };
+      return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
+    }
+    }
+    break;
+  }
+
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+    switch (IntNo) {
+    default:
+      break;
+
+    case Intrinsic::arm_neon_vtbl2:
+      return SelectVTBL(N, false, 2, ARM::VTBL2Pseudo);
+    case Intrinsic::arm_neon_vtbl3:
+      return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
+    case Intrinsic::arm_neon_vtbl4:
+      return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
+
+    case Intrinsic::arm_neon_vtbx2:
+      return SelectVTBL(N, true, 2, ARM::VTBX2Pseudo);
+    case Intrinsic::arm_neon_vtbx3:
+      return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
+    case Intrinsic::arm_neon_vtbx4:
+      return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
+    }
+    break;
+  }
+
+  case ISD::CONCAT_VECTORS:
+    return SelectConcatVector(N);
+  }
+
+  return SelectCode(N);
+}
+
+bool ARMDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                             std::vector<SDValue> &OutOps) {
+  assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+  // Require the address to be in a register.  That is safe for all ARM
+  // variants and it is hard to do anything much smarter without knowing
+  // how the operand is used.
+  OutOps.push_back(Op);
+  return false;
+}
+
+/// createARMISelDag - This pass converts a legalized DAG into a
+/// ARM-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
+                                     CodeGenOpt::Level OptLevel) {
+  return new ARMDAGToDAGISel(TM, OptLevel);
+}
diff --git a/final/lib/Target/ARM/ARMISelLowering.cpp b/final/lib/Target/ARM/ARMISelLowering.cpp
new file mode 100644
index 00000000000..82e37c7075c
--- /dev/null
+++ b/final/lib/Target/ARM/ARMISelLowering.cpp
@@ -0,0 +1,7055 @@
+//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-isel"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMCallingConv.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMISelLowering.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMPerfectShuffle.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "ARMTargetObjectFile.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <sstream>
+using namespace llvm;
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
+
+// This option should go away when tail calls fully work.
+static cl::opt<bool>
+EnableARMTailCalls("arm-tail-calls", cl::Hidden,
+  cl::desc("Generate tail calls (TEMPORARY OPTION)."),
+  cl::init(false));
+
+cl::opt<bool>
+EnableARMLongCalls("arm-long-calls", cl::Hidden,
+  cl::desc("Generate calls via indirect call instructions"),
+  cl::init(false));
+
+static cl::opt<bool>
+ARMInterworking("arm-interworking", cl::Hidden,
+  cl::desc("Enable / disable ARM interworking (for debugging only)"),
+  cl::init(true));
+
+void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
+                                       EVT PromotedBitwiseVT) {
+  if (VT != PromotedLdStVT) {
+    setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
+    AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
+                       PromotedLdStVT.getSimpleVT());
+
+    setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
+    AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
+                       PromotedLdStVT.getSimpleVT());
+  }
+
+  EVT ElemTy = VT.getVectorElementType();
+  if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
+    setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
+  setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
+  if (ElemTy != MVT::i32) {
+    setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
+    setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
+    setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
+    setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
+  }
+  setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
+  setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
+  setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
+  setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
+  if (VT.isInteger()) {
+    setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
+    setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
+    setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
+    setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
+    setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
+    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+      setTruncStoreAction(VT.getSimpleVT(),
+                          (MVT::SimpleValueType)InnerVT, Expand);
+  }
+  setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
+
+  // Promote all bit-wise operations.
+  if (VT.isInteger() && VT != PromotedBitwiseVT) {
+    setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
+    AddPromotedToType (ISD::AND, VT.getSimpleVT(),
+                       PromotedBitwiseVT.getSimpleVT());
+    setOperationAction(ISD::OR,  VT.getSimpleVT(), Promote);
+    AddPromotedToType (ISD::OR,  VT.getSimpleVT(),
+                       PromotedBitwiseVT.getSimpleVT());
+    setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
+    AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
+                       PromotedBitwiseVT.getSimpleVT());
+  }
+
+  // Neon does not support vector divide/remainder operations.
+  setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
+  setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
+}
+
+void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
+  addRegisterClass(VT, ARM::DPRRegisterClass);
+  addTypeForNEON(VT, MVT::f64, MVT::v2i32);
+}
+
+void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
+  addRegisterClass(VT, ARM::QPRRegisterClass);
+  addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
+}
+
+static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
+  if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
+    return new TargetLoweringObjectFileMachO();
+
+  return new ARMElfTargetObjectFile();
+}
+
+ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
+    : TargetLowering(TM, createTLOF(TM)) {
+  Subtarget = &TM.getSubtarget<ARMSubtarget>();
+  RegInfo = TM.getRegisterInfo();
+  Itins = TM.getInstrItineraryData();
+
+  if (Subtarget->isTargetDarwin()) {
+    // Uses VFP for Thumb libfuncs if available.
+    if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
+      // Single-precision floating-point arithmetic.
+      setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
+      setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
+      setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
+      setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
+
+      // Double-precision floating-point arithmetic.
+      setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
+      setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
+      setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
+      setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
+
+      // Single-precision comparisons.
+      setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
+      setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
+      setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
+      setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
+      setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
+      setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
+      setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
+      setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
+
+      setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
+      setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
+
+      // Double-precision comparisons.
+      setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
+      setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
+      setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
+      setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
+      setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
+      setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
+      setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
+      setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
+
+      setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+      setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
+      setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
+
+      // Floating-point to integer conversions.
+      // i64 conversions are done via library routines even when generating VFP
+      // instructions, so use the same ones.
+      setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
+      setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
+      setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
+      setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
+
+      // Conversions between floating types.
+      setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
+      setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
+
+      // Integer to floating-point conversions.
+      // i64 conversions are done via library routines even when generating VFP
+      // instructions, so use the same ones.
+      // FIXME: There appears to be some naming inconsistency in ARM libgcc:
+      // e.g., __floatunsidf vs. __floatunssidfvfp.
+      setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
+      setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
+      setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
+      setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
+    }
+  }
+
+  // These libcalls are not available in 32-bit.
+  setLibcallName(RTLIB::SHL_I128, 0);
+  setLibcallName(RTLIB::SRL_I128, 0);
+  setLibcallName(RTLIB::SRA_I128, 0);
+
+  if (Subtarget->isAAPCS_ABI()) {
+    // Double-precision floating-point arithmetic helper functions
+    // RTABI chapter 4.1.2, Table 2
+    setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
+    setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
+    setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
+    setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
+    setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
+
+    // Double-precision floating-point comparison helper functions
+    // RTABI chapter 4.1.2, Table 3
+    setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
+    setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
+    setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
+    setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
+    setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
+    setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
+    setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
+    setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
+    setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
+    setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
+    setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
+    setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
+    setLibcallName(RTLIB::UO_F64,  "__aeabi_dcmpun");
+    setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
+    setLibcallName(RTLIB::O_F64,   "__aeabi_dcmpun");
+    setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
+    setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
+
+    // Single-precision floating-point arithmetic helper functions
+    // RTABI chapter 4.1.2, Table 4
+    setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
+    setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
+    setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
+    setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
+    setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
+
+    // Single-precision floating-point comparison helper functions
+    // RTABI chapter 4.1.2, Table 5
+    setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
+    setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+    setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
+    setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
+    setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
+    setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+    setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
+    setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+    setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
+    setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+    setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
+    setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+    setLibcallName(RTLIB::UO_F32,  "__aeabi_fcmpun");
+    setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
+    setLibcallName(RTLIB::O_F32,   "__aeabi_fcmpun");
+    setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
+    setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
+
+    // Floating-point to integer conversions.
+    // RTABI chapter 4.1.2, Table 6
+    setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
+    setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
+    setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
+    setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
+    setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
+    setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
+    setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
+    setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
+    setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
+
+    // Conversions between floating types.
+    // RTABI chapter 4.1.2, Table 7
+    setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
+    setLibcallName(RTLIB::FPEXT_F32_F64,   "__aeabi_f2d");
+    setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
+
+    // Integer to floating-point conversions.
+    // RTABI chapter 4.1.2, Table 8
+    setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
+    setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
+    setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
+    setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
+    setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
+    setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
+    setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
+    setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
+    setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
+
+    // Long long helper functions
+    // RTABI chapter 4.2, Table 9
+    setLibcallName(RTLIB::MUL_I64,  "__aeabi_lmul");
+    setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
+    setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
+    setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
+    setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
+    setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
+    setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
+
+    // Integer division functions
+    // RTABI chapter 4.3.1
+    setLibcallName(RTLIB::SDIV_I8,  "__aeabi_idiv");
+    setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
+    setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
+    setLibcallName(RTLIB::UDIV_I8,  "__aeabi_uidiv");
+    setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
+    setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
+    setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
+    setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
+  }
+
+  if (Subtarget->isThumb1Only())
+    addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
+  else
+    addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
+  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+    addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
+    if (!Subtarget->isFPOnlySP())
+      addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
+
+    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  }
+
+  if (Subtarget->hasNEON()) {
+    addDRTypeForNEON(MVT::v2f32);
+    addDRTypeForNEON(MVT::v8i8);
+    addDRTypeForNEON(MVT::v4i16);
+    addDRTypeForNEON(MVT::v2i32);
+    addDRTypeForNEON(MVT::v1i64);
+
+    addQRTypeForNEON(MVT::v4f32);
+    addQRTypeForNEON(MVT::v2f64);
+    addQRTypeForNEON(MVT::v16i8);
+    addQRTypeForNEON(MVT::v8i16);
+    addQRTypeForNEON(MVT::v4i32);
+    addQRTypeForNEON(MVT::v2i64);
+
+    // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
+    // neither Neon nor VFP support any arithmetic operations on it.
+    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
+    setOperationAction(ISD::FREM, MVT::v2f64, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
+    setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
+    setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
+    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+    setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
+    setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
+    setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
+    setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
+    setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
+    setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
+    setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
+    setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
+    setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
+    setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
+    setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
+    setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
+    setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
+    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
+    setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
+
+    setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
+
+    // Neon does not support some operations on v1i64 and v2i64 types.
+    setOperationAction(ISD::MUL, MVT::v1i64, Expand);
+    // Custom handling for some quad-vector types to detect VMULL.
+    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+    setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+    // Custom handling for some vector types to avoid expensive expansions
+    setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
+    setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
+    setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
+    setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
+    setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
+    setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
+
+    setTargetDAGCombine(ISD::INTRINSIC_VOID);
+    setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
+    setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
+    setTargetDAGCombine(ISD::SHL);
+    setTargetDAGCombine(ISD::SRL);
+    setTargetDAGCombine(ISD::SRA);
+    setTargetDAGCombine(ISD::SIGN_EXTEND);
+    setTargetDAGCombine(ISD::ZERO_EXTEND);
+    setTargetDAGCombine(ISD::ANY_EXTEND);
+    setTargetDAGCombine(ISD::SELECT_CC);
+    setTargetDAGCombine(ISD::BUILD_VECTOR);
+    setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+    setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+    setTargetDAGCombine(ISD::STORE);
+  }
+
+  computeRegisterProperties();
+
+  // ARM does not have f32 extending load.
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+  // ARM does not have i1 sign extending load.
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+  // ARM supports all 4 flavors of integer indexed load / store.
+  if (!Subtarget->isThumb1Only()) {
+    for (unsigned im = (unsigned)ISD::PRE_INC;
+         im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
+      setIndexedLoadAction(im,  MVT::i1,  Legal);
+      setIndexedLoadAction(im,  MVT::i8,  Legal);
+      setIndexedLoadAction(im,  MVT::i16, Legal);
+      setIndexedLoadAction(im,  MVT::i32, Legal);
+      setIndexedStoreAction(im, MVT::i1,  Legal);
+      setIndexedStoreAction(im, MVT::i8,  Legal);
+      setIndexedStoreAction(im, MVT::i16, Legal);
+      setIndexedStoreAction(im, MVT::i32, Legal);
+    }
+  }
+
+  // i64 operation support.
+  if (Subtarget->isThumb1Only()) {
+    setOperationAction(ISD::MUL,     MVT::i64, Expand);
+    setOperationAction(ISD::MULHU,   MVT::i32, Expand);
+    setOperationAction(ISD::MULHS,   MVT::i32, Expand);
+    setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+    setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  } else {
+    setOperationAction(ISD::MUL,     MVT::i64, Expand);
+    setOperationAction(ISD::MULHU,   MVT::i32, Expand);
+    if (!Subtarget->hasV6Ops())
+      setOperationAction(ISD::MULHS, MVT::i32, Expand);
+  }
+  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+  setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+  setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+  setOperationAction(ISD::SRL,       MVT::i64, Custom);
+  setOperationAction(ISD::SRA,       MVT::i64, Custom);
+
+  // ARM does not have ROTL.
+  setOperationAction(ISD::ROTL,  MVT::i32, Expand);
+  setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
+  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
+    setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+  // Only ARMv6 has BSWAP.
+  if (!Subtarget->hasV6Ops())
+    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+  // These are expanded into libcalls.
+  if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) {
+    // v7M has a hardware divider
+    setOperationAction(ISD::SDIV,  MVT::i32, Expand);
+    setOperationAction(ISD::UDIV,  MVT::i32, Expand);
+  }
+  setOperationAction(ISD::SREM,  MVT::i32, Expand);
+  setOperationAction(ISD::UREM,  MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+  setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
+  setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
+  setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+  setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+
+  setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+  // Use the default implementation.
+  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
+  setOperationAction(ISD::VAARG,              MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
+  setOperationAction(ISD::VAEND,              MVT::Other, Expand);
+  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
+  setOperationAction(ISD::EHSELECTION,        MVT::i32,   Expand);
+  setOperationAction(ISD::EXCEPTIONADDR,      MVT::i32,   Expand);
+  setExceptionPointerRegister(ARM::R0);
+  setExceptionSelectorRegister(ARM::R1);
+
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+  // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
+  // the default expansion.
+  if (Subtarget->hasDataBarrier() ||
+      (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
+    // membarrier needs custom lowering; the rest are legal and handled
+    // normally.
+    setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+  } else {
+    // Set them all for expansion, which will force libcalls.
+    setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8,  Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+    // Since the libcalls include locking, fold in the fences
+    setShouldFoldAtomicFences(true);
+  }
+  // 64-bit versions are always libcalls (for now)
+  setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Expand);
+  setOperationAction(ISD::ATOMIC_SWAP,      MVT::i64, Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i64, Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i64, Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i64, Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i64, Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Expand);
+  setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
+
+  setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
+
+  // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
+  if (!Subtarget->hasV6Ops()) {
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
+  }
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+    // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
+    // iff target supports vfp2.
+    setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+    setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+  }
+
+  // We want to custom lower some of our intrinsics.
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+  if (Subtarget->isTargetDarwin()) {
+    setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+    setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+    setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
+  }
+
+  setOperationAction(ISD::SETCC,     MVT::i32, Expand);
+  setOperationAction(ISD::SETCC,     MVT::f32, Expand);
+  setOperationAction(ISD::SETCC,     MVT::f64, Expand);
+  setOperationAction(ISD::SELECT,    MVT::i32, Custom);
+  setOperationAction(ISD::SELECT,    MVT::f32, Custom);
+  setOperationAction(ISD::SELECT,    MVT::f64, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+  setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
+  setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
+  setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
+  setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
+
+  // We don't support sin/cos/fmod/copysign/pow
+  setOperationAction(ISD::FSIN,      MVT::f64, Expand);
+  setOperationAction(ISD::FSIN,      MVT::f32, Expand);
+  setOperationAction(ISD::FCOS,      MVT::f32, Expand);
+  setOperationAction(ISD::FCOS,      MVT::f64, Expand);
+  setOperationAction(ISD::FREM,      MVT::f64, Expand);
+  setOperationAction(ISD::FREM,      MVT::f32, Expand);
+  if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+  }
+  setOperationAction(ISD::FPOW,      MVT::f64, Expand);
+  setOperationAction(ISD::FPOW,      MVT::f32, Expand);
+
+  // Various VFP goodness
+  if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
+    // int <-> fp are custom expanded into bit_convert + ARMISD ops.
+    if (Subtarget->hasVFP2()) {
+      setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+      setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+      setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+      setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+    }
+    // Special handling for half-precision FP.
+    if (!Subtarget->hasFP16()) {
+      setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
+      setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
+    }
+  }
+
+  // We have target-specific dag combine patterns for the following nodes:
+  // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
+  setTargetDAGCombine(ISD::ADD);
+  setTargetDAGCombine(ISD::SUB);
+  setTargetDAGCombine(ISD::MUL);
+
+  if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON())
+    setTargetDAGCombine(ISD::OR);
+  if (Subtarget->hasNEON())
+    setTargetDAGCombine(ISD::AND);
+
+  setStackPointerRegisterToSaveRestore(ARM::SP);
+
+  if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+    setSchedulingPreference(Sched::RegPressure);
+  else
+    setSchedulingPreference(Sched::Hybrid);
+
+  //// temporary - rewrite interface to use type
+  maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
+
+  // On ARM arguments smaller than 4 bytes are extended, so all arguments
+  // are at least 4 bytes aligned.
+  setMinStackArgumentAlignment(4);
+
+  benefitFromCodePlacementOpt = true;
+}
+
+// FIXME: It might make sense to define the representative register class as the
+// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
+// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
+// SPR's representative would be DPR_VFP2. This should work well if register
+// pressure tracking were modified such that a register use would increment the
+// pressure of the register class's representative and all of it's super
+// classes' representatives transitively. We have not implemented this because
+// of the difficulty prior to coalescing of modeling operand register classes
+// due to the common occurence of cross class copies and subregister insertions
+// and extractions.
+std::pair<const TargetRegisterClass*, uint8_t>
+ARMTargetLowering::findRepresentativeClass(EVT VT) const{
+  const TargetRegisterClass *RRC = 0;
+  uint8_t Cost = 1;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:
+    return TargetLowering::findRepresentativeClass(VT);
+  // Use DPR as representative register class for all floating point
+  // and vector types. Since there are 32 SPR registers and 32 DPR registers so
+  // the cost is 1 for both f32 and f64.
+  case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
+  case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
+    RRC = ARM::DPRRegisterClass;
+    // When NEON is used for SP, only half of the register file is available
+    // because operations that define both SP and DP results will be constrained
+    // to the VFP2 class (D0-D15). We currently model this constraint prior to
+    // coalescing by double-counting the SP regs. See the FIXME above.
+    if (Subtarget->useNEONForSinglePrecisionFP())
+      Cost = 2;
+    break;
+  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+  case MVT::v4f32: case MVT::v2f64:
+    RRC = ARM::DPRRegisterClass;
+    Cost = 2;
+    break;
+  case MVT::v4i64:
+    RRC = ARM::DPRRegisterClass;
+    Cost = 4;
+    break;
+  case MVT::v8i64:
+    RRC = ARM::DPRRegisterClass;
+    Cost = 8;
+    break;
+  }
+  return std::make_pair(RRC, Cost);
+}
+
+const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return 0;
+  case ARMISD::Wrapper:       return "ARMISD::Wrapper";
+  case ARMISD::WrapperDYN:    return "ARMISD::WrapperDYN";
+  case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
+  case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
+  case ARMISD::CALL:          return "ARMISD::CALL";
+  case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
+  case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
+  case ARMISD::tCALL:         return "ARMISD::tCALL";
+  case ARMISD::BRCOND:        return "ARMISD::BRCOND";
+  case ARMISD::BR_JT:         return "ARMISD::BR_JT";
+  case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
+  case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
+  case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
+  case ARMISD::CMP:           return "ARMISD::CMP";
+  case ARMISD::CMPZ:          return "ARMISD::CMPZ";
+  case ARMISD::CMPFP:         return "ARMISD::CMPFP";
+  case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
+  case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
+  case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
+  case ARMISD::CMOV:          return "ARMISD::CMOV";
+
+  case ARMISD::RBIT:          return "ARMISD::RBIT";
+
+  case ARMISD::FTOSI:         return "ARMISD::FTOSI";
+  case ARMISD::FTOUI:         return "ARMISD::FTOUI";
+  case ARMISD::SITOF:         return "ARMISD::SITOF";
+  case ARMISD::UITOF:         return "ARMISD::UITOF";
+
+  case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
+  case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
+  case ARMISD::RRX:           return "ARMISD::RRX";
+
+  case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
+  case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
+
+  case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
+  case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+  case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
+
+  case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
+
+  case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
+
+  case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
+
+  case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
+  case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
+
+  case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
+
+  case ARMISD::VCEQ:          return "ARMISD::VCEQ";
+  case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
+  case ARMISD::VCGE:          return "ARMISD::VCGE";
+  case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
+  case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
+  case ARMISD::VCGEU:         return "ARMISD::VCGEU";
+  case ARMISD::VCGT:          return "ARMISD::VCGT";
+  case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
+  case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
+  case ARMISD::VCGTU:         return "ARMISD::VCGTU";
+  case ARMISD::VTST:          return "ARMISD::VTST";
+
+  case ARMISD::VSHL:          return "ARMISD::VSHL";
+  case ARMISD::VSHRs:         return "ARMISD::VSHRs";
+  case ARMISD::VSHRu:         return "ARMISD::VSHRu";
+  case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
+  case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
+  case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
+  case ARMISD::VSHRN:         return "ARMISD::VSHRN";
+  case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
+  case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
+  case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
+  case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
+  case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
+  case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
+  case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
+  case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
+  case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
+  case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
+  case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
+  case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
+  case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
+  case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
+  case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
+  case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
+  case ARMISD::VDUP:          return "ARMISD::VDUP";
+  case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
+  case ARMISD::VEXT:          return "ARMISD::VEXT";
+  case ARMISD::VREV64:        return "ARMISD::VREV64";
+  case ARMISD::VREV32:        return "ARMISD::VREV32";
+  case ARMISD::VREV16:        return "ARMISD::VREV16";
+  case ARMISD::VZIP:          return "ARMISD::VZIP";
+  case ARMISD::VUZP:          return "ARMISD::VUZP";
+  case ARMISD::VTRN:          return "ARMISD::VTRN";
+  case ARMISD::VMULLs:        return "ARMISD::VMULLs";
+  case ARMISD::VMULLu:        return "ARMISD::VMULLu";
+  case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
+  case ARMISD::FMAX:          return "ARMISD::FMAX";
+  case ARMISD::FMIN:          return "ARMISD::FMIN";
+  case ARMISD::BFI:           return "ARMISD::BFI";
+  case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
+  case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
+  case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
+  case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
+  case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
+  case ARMISD::VLD1_UPD:      return "ARMISD::VLD1_UPD";
+  case ARMISD::VLD2_UPD:      return "ARMISD::VLD2_UPD";
+  case ARMISD::VLD3_UPD:      return "ARMISD::VLD3_UPD";
+  case ARMISD::VLD4_UPD:      return "ARMISD::VLD4_UPD";
+  case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
+  case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
+  case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
+  case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
+  case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
+  case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
+  case ARMISD::VST1_UPD:      return "ARMISD::VST1_UPD";
+  case ARMISD::VST2_UPD:      return "ARMISD::VST2_UPD";
+  case ARMISD::VST3_UPD:      return "ARMISD::VST3_UPD";
+  case ARMISD::VST4_UPD:      return "ARMISD::VST4_UPD";
+  case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
+  case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
+  case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
+  }
+}
+
+/// getRegClassFor - Return the register class that should be used for the
+/// specified value type.
+TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
+  // Map v4i64 to QQ registers but do not make the type legal. Similarly map
+  // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
+  // load / store 4 to 8 consecutive D registers.
+  if (Subtarget->hasNEON()) {
+    if (VT == MVT::v4i64)
+      return ARM::QQPRRegisterClass;
+    else if (VT == MVT::v8i64)
+      return ARM::QQQQPRRegisterClass;
+  }
+  return TargetLowering::getRegClassFor(VT);
+}
+
+// Create a fast isel object.
+FastISel *
+ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
+  return ARM::createFastISel(funcInfo);
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
+  return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
+}
+
+/// getMaximalGlobalOffset - Returns the maximal possible offset which can
+/// be used for loads / stores from the global.
+unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
+  return (Subtarget->isThumb1Only() ? 127 : 4095);
+}
+
+Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
+  unsigned NumVals = N->getNumValues();
+  if (!NumVals)
+    return Sched::RegPressure;
+
+  for (unsigned i = 0; i != NumVals; ++i) {
+    EVT VT = N->getValueType(i);
+    if (VT == MVT::Glue || VT == MVT::Other)
+      continue;
+    if (VT.isFloatingPoint() || VT.isVector())
+      return Sched::Latency;
+  }
+
+  if (!N->isMachineOpcode())
+    return Sched::RegPressure;
+
+  // Load are scheduled for latency even if there instruction itinerary
+  // is not available.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+
+  if (TID.getNumDefs() == 0)
+    return Sched::RegPressure;
+  if (!Itins->isEmpty() &&
+      Itins->getOperandCycle(TID.getSchedClass(), 0) > 2)
+    return Sched::Latency;
+
+  return Sched::RegPressure;
+}
+
+//===----------------------------------------------------------------------===//
+// Lowering Code
+//===----------------------------------------------------------------------===//
+
+/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
+static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
+  switch (CC) {
+  default: llvm_unreachable("Unknown condition code!");
+  case ISD::SETNE:  return ARMCC::NE;
+  case ISD::SETEQ:  return ARMCC::EQ;
+  case ISD::SETGT:  return ARMCC::GT;
+  case ISD::SETGE:  return ARMCC::GE;
+  case ISD::SETLT:  return ARMCC::LT;
+  case ISD::SETLE:  return ARMCC::LE;
+  case ISD::SETUGT: return ARMCC::HI;
+  case ISD::SETUGE: return ARMCC::HS;
+  case ISD::SETULT: return ARMCC::LO;
+  case ISD::SETULE: return ARMCC::LS;
+  }
+}
+
+/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
+static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
+                        ARMCC::CondCodes &CondCode2) {
+  CondCode2 = ARMCC::AL;
+  switch (CC) {
+  default: llvm_unreachable("Unknown FP condition!");
+  case ISD::SETEQ:
+  case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
+  case ISD::SETGT:
+  case ISD::SETOGT: CondCode = ARMCC::GT; break;
+  case ISD::SETGE:
+  case ISD::SETOGE: CondCode = ARMCC::GE; break;
+  case ISD::SETOLT: CondCode = ARMCC::MI; break;
+  case ISD::SETOLE: CondCode = ARMCC::LS; break;
+  case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
+  case ISD::SETO:   CondCode = ARMCC::VC; break;
+  case ISD::SETUO:  CondCode = ARMCC::VS; break;
+  case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
+  case ISD::SETUGT: CondCode = ARMCC::HI; break;
+  case ISD::SETUGE: CondCode = ARMCC::PL; break;
+  case ISD::SETLT:
+  case ISD::SETULT: CondCode = ARMCC::LT; break;
+  case ISD::SETLE:
+  case ISD::SETULE: CondCode = ARMCC::LE; break;
+  case ISD::SETNE:
+  case ISD::SETUNE: CondCode = ARMCC::NE; break;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "ARMGenCallingConv.inc"
+
+/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
+/// given CallingConvention value.
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
+                                                 bool Return,
+                                                 bool isVarArg) const {
+  switch (CC) {
+  default:
+    llvm_unreachable("Unsupported calling convention");
+  case CallingConv::Fast:
+    if (Subtarget->hasVFP2() && !isVarArg) {
+      if (!Subtarget->isAAPCS_ABI())
+        return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
+      // For AAPCS ABI targets, just use VFP variant of the calling convention.
+      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+    }
+    // Fallthrough
+  case CallingConv::C: {
+    // Use target triple & subtarget features to do actual dispatch.
+    if (!Subtarget->isAAPCS_ABI())
+      return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+    else if (Subtarget->hasVFP2() &&
+             FloatABIType == FloatABI::Hard && !isVarArg)
+      return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+  }
+  case CallingConv::ARM_AAPCS_VFP:
+    return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
+  case CallingConv::ARM_AAPCS:
+    return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+  case CallingConv::ARM_APCS:
+    return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
+  }
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::InputArg> &Ins,
+                                   DebugLoc dl, SelectionDAG &DAG,
+                                   SmallVectorImpl<SDValue> &InVals) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallResult(Ins,
+                           CCAssignFnForNode(CallConv, /* Return*/ true,
+                                             isVarArg));
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign VA = RVLocs[i];
+
+    SDValue Val;
+    if (VA.needsCustom()) {
+      // Handle f64 or half of a v2f64.
+      SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+                                      InFlag);
+      Chain = Lo.getValue(1);
+      InFlag = Lo.getValue(2);
+      VA = RVLocs[++i]; // skip ahead to next loc
+      SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
+                                      InFlag);
+      Chain = Hi.getValue(1);
+      InFlag = Hi.getValue(2);
+      Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+
+      if (VA.getLocVT() == MVT::v2f64) {
+        SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+        Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
+                          DAG.getConstant(0, MVT::i32));
+
+        VA = RVLocs[++i]; // skip ahead to next loc
+        Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+        Chain = Lo.getValue(1);
+        InFlag = Lo.getValue(2);
+        VA = RVLocs[++i]; // skip ahead to next loc
+        Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+        Chain = Hi.getValue(1);
+        InFlag = Hi.getValue(2);
+        Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+        Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
+                          DAG.getConstant(1, MVT::i32));
+      }
+    } else {
+      Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+                               InFlag);
+      Chain = Val.getValue(1);
+      InFlag = Val.getValue(2);
+    }
+
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::BCvt:
+      Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
+      break;
+    }
+
+    InVals.push_back(Val);
+  }
+
+  return Chain;
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size".  Alignment information is
+/// specified by the specific parameter attribute.  The copy will be passed as
+/// a byval function parameter.
+/// Sometimes what we are copying is the end of a larger object, the part that
+/// does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+                          DebugLoc dl) {
+  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+                       /*isVolatile=*/false, /*AlwaysInline=*/false,
+                       MachinePointerInfo(0), MachinePointerInfo(0));
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack.
+SDValue
+ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
+                                    SDValue StackPtr, SDValue Arg,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    const CCValAssign &VA,
+                                    ISD::ArgFlagsTy Flags) const {
+  unsigned LocMemOffset = VA.getLocMemOffset();
+  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+  if (Flags.isByVal())
+    return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
+
+  return DAG.getStore(Chain, dl, Arg, PtrOff,
+                      MachinePointerInfo::getStack(LocMemOffset),
+                      false, false, 0);
+}
+
+void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+                                         SDValue Chain, SDValue &Arg,
+                                         RegsToPassVector &RegsToPass,
+                                         CCValAssign &VA, CCValAssign &NextVA,
+                                         SDValue &StackPtr,
+                                         SmallVector<SDValue, 8> &MemOpChains,
+                                         ISD::ArgFlagsTy Flags) const {
+
+  SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
+                              DAG.getVTList(MVT::i32, MVT::i32), Arg);
+  RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
+
+  if (NextVA.isRegLoc())
+    RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
+  else {
+    assert(NextVA.isMemLoc());
+    if (StackPtr.getNode() == 0)
+      StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+    MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
+                                           dl, DAG, NextVA,
+                                           Flags));
+  }
+}
+
+/// LowerCall - Lowering a call into a callseq_start <-
+/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
+/// nodes.
+SDValue
+ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                             CallingConv::ID CallConv, bool isVarArg,
+                             bool &isTailCall,
+                             const SmallVectorImpl<ISD::OutputArg> &Outs,
+                             const SmallVectorImpl<SDValue> &OutVals,
+                             const SmallVectorImpl<ISD::InputArg> &Ins,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+  bool IsSibCall = false;
+  // Temporarily disable tail calls so things don't break.
+  if (!EnableARMTailCalls)
+    isTailCall = false;
+  if (isTailCall) {
+    // Check if it's really possible to do a tail call.
+    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+                    isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+                                                   Outs, OutVals, Ins, DAG);
+    // We don't support GuaranteedTailCallOpt for ARM, only automatically
+    // detected sibcalls.
+    if (isTailCall) {
+      ++NumTailCalls;
+      IsSibCall = true;
+    }
+  }
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+                 *DAG.getContext());
+  CCInfo.AnalyzeCallOperands(Outs,
+                             CCAssignFnForNode(CallConv, /* Return*/ false,
+                                               isVarArg));
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  // For tail calls, memory operands are available in our caller's stack.
+  if (IsSibCall)
+    NumBytes = 0;
+
+  // Adjust the stack pointer for the new arguments...
+  // These operations are automatically eliminated by the prolog/epilog pass
+  if (!IsSibCall)
+    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+  SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
+
+  RegsToPassVector RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // Walk the register/memloc assignments, inserting copies/loads.  In the case
+  // of tail call optimization, arguments are handled later.
+  for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+       i != e;
+       ++i, ++realArgIdx) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = OutVals[realArgIdx];
+    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+    bool isByVal = Flags.isByVal();
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
+    if (VA.needsCustom()) {
+      if (VA.getLocVT() == MVT::v2f64) {
+        SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+                                  DAG.getConstant(0, MVT::i32));
+        SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+                                  DAG.getConstant(1, MVT::i32));
+
+        PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
+                         VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
+
+        VA = ArgLocs[++i]; // skip ahead to next loc
+        if (VA.isRegLoc()) {
+          PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
+                           VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
+        } else {
+          assert(VA.isMemLoc());
+
+          MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
+                                                 dl, DAG, VA, Flags));
+        }
+      } else {
+        PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
+                         StackPtr, MemOpChains, Flags);
+      }
+    } else if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else if (!IsSibCall || isByVal) {
+      assert(VA.isMemLoc());
+
+      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+                                             dl, DAG, VA, Flags));
+    }
+  }
+
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain
+  // and flag operands which copy the outgoing args into the appropriate regs.
+  SDValue InFlag;
+  // Tail call byval lowering might overwrite argument registers so in case of
+  // tail call optimization the copies to registers are lowered later.
+  if (!isTailCall)
+    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                               RegsToPass[i].second, InFlag);
+      InFlag = Chain.getValue(1);
+    }
+
+  // For tail calls lower the arguments to the 'real' stack slot.
+  if (isTailCall) {
+    // Force all the incoming stack arguments to be loaded from the stack
+    // before any new outgoing arguments are stored to the stack, because the
+    // outgoing stack slots may alias the incoming argument stack slots, and
+    // the alias isn't otherwise explicit. This is slightly more conservative
+    // than necessary, because it means that each store effectively depends
+    // on every argument instead of just those arguments it would clobber.
+
+    // Do not flag preceeding copytoreg stuff together with the following stuff.
+    InFlag = SDValue();
+    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                               RegsToPass[i].second, InFlag);
+      InFlag = Chain.getValue(1);
+    }
+    InFlag =SDValue();
+  }
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  bool isDirect = false;
+  bool isARMFunc = false;
+  bool isLocalARMFunc = false;
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  if (EnableARMLongCalls) {
+    assert (getTargetMachine().getRelocationModel() == Reloc::Static
+            && "long-calls with non-static relocation model!");
+    // Handle a global address or an external symbol. If it's not one of
+    // those, the target's already in a register, so we don't need to do
+    // anything extra.
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+      const GlobalValue *GV = G->getGlobal();
+      // Create a constant pool entry for the callee address
+      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
+                                                           ARMPCLabelIndex,
+                                                           ARMCP::CPValue, 0);
+      // Get the address of the callee into a register
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      Callee = DAG.getLoad(getPointerTy(), dl,
+                           DAG.getEntryNode(), CPAddr,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, 0);
+    } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
+      const char *Sym = S->getSymbol();
+
+      // Create a constant pool entry for the callee address
+      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
+                                                       Sym, ARMPCLabelIndex, 0);
+      // Get the address of the callee into a register
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      Callee = DAG.getLoad(getPointerTy(), dl,
+                           DAG.getEntryNode(), CPAddr,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, 0);
+    }
+  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    const GlobalValue *GV = G->getGlobal();
+    isDirect = true;
+    bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
+    bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
+                   getTargetMachine().getRelocationModel() != Reloc::Static;
+    isARMFunc = !Subtarget->isThumb() || isStub;
+    // ARM call to a local ARM function is predicable.
+    isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
+    // tBX takes a register source operand.
+    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
+                                                           ARMPCLabelIndex,
+                                                           ARMCP::CPValue, 4);
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      Callee = DAG.getLoad(getPointerTy(), dl,
+                           DAG.getEntryNode(), CPAddr,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, 0);
+      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+                           getPointerTy(), Callee, PICLabel);
+    } else {
+      // On ELF targets for PIC code, direct calls should go through the PLT
+      unsigned OpFlags = 0;
+      if (Subtarget->isTargetELF() &&
+                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
+        OpFlags = ARMII::MO_PLT;
+      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+    }
+  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    isDirect = true;
+    bool isStub = Subtarget->isTargetDarwin() &&
+                  getTargetMachine().getRelocationModel() != Reloc::Static;
+    isARMFunc = !Subtarget->isThumb() || isStub;
+    // tBX takes a register source operand.
+    const char *Sym = S->getSymbol();
+    if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+      unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+      ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
+                                                       Sym, ARMPCLabelIndex, 4);
+      SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
+      CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+      Callee = DAG.getLoad(getPointerTy(), dl,
+                           DAG.getEntryNode(), CPAddr,
+                           MachinePointerInfo::getConstantPool(),
+                           false, false, 0);
+      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+      Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
+                           getPointerTy(), Callee, PICLabel);
+    } else {
+      unsigned OpFlags = 0;
+      // On ELF targets for PIC code, direct calls should go through the PLT
+      if (Subtarget->isTargetELF() &&
+                  getTargetMachine().getRelocationModel() == Reloc::PIC_)
+        OpFlags = ARMII::MO_PLT;
+      Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
+    }
+  }
+
+  // FIXME: handle tail calls differently.
+  unsigned CallOpc;
+  if (Subtarget->isThumb()) {
+    if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
+      CallOpc = ARMISD::CALL_NOLINK;
+    else
+      CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
+  } else {
+    CallOpc = (isDirect || Subtarget->hasV5TOps())
+      ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
+      : ARMISD::CALL_NOLINK;
+  }
+
+  std::vector<SDValue> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are known live
+  // into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  if (isTailCall)
+    return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+
+  // Returns a chain and a flag for retval copy to use.
+  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  if (!Ins.empty())
+    InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
+                         dl, DAG, InVals);
+}
+
+/// HandleByVal - Every parameter *after* a byval parameter is passed
+/// on the stack.  Confiscate all the parameter registers to insure
+/// this.
+void
+llvm::ARMTargetLowering::HandleByVal(CCState *State) const {
+  static const unsigned RegList1[] = {
+    ARM::R0, ARM::R1, ARM::R2, ARM::R3
+  };
+  do {} while (State->AllocateReg(RegList1, 4));
+}
+
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+                         const ARMInstrInfo *TII) {
+  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+  int FI = INT_MAX;
+  if (Arg.getOpcode() == ISD::CopyFromReg) {
+    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(VR))
+      return false;
+    MachineInstr *Def = MRI->getVRegDef(VR);
+    if (!Def)
+      return false;
+    if (!Flags.isByVal()) {
+      if (!TII->isLoadFromStackSlot(Def, FI))
+        return false;
+    } else {
+      return false;
+    }
+  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+    if (Flags.isByVal())
+      // ByVal argument is passed in as a pointer but it's now being
+      // dereferenced. e.g.
+      // define @foo(%struct.X* %A) {
+      //   tail call @bar(%struct.X* byval %A)
+      // }
+      return false;
+    SDValue Ptr = Ld->getBasePtr();
+    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+    if (!FINode)
+      return false;
+    FI = FINode->getIndex();
+  } else
+    return false;
+
+  assert(FI != INT_MAX);
+  if (!MFI->isFixedObjectIndex(FI))
+    return false;
+  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+                                                     CallingConv::ID CalleeCC,
+                                                     bool isVarArg,
+                                                     bool isCalleeStructRet,
+                                                     bool isCallerStructRet,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                                     SelectionDAG& DAG) const {
+  const Function *CallerF = DAG.getMachineFunction().getFunction();
+  CallingConv::ID CallerCC = CallerF->getCallingConv();
+  bool CCMatch = CallerCC == CalleeCC;
+
+  // Look for obvious safe cases to perform tail call optimization that do not
+  // require ABI changes. This is what gcc calls sibcall.
+
+  // Do not sibcall optimize vararg calls unless the call site is not passing
+  // any arguments.
+  if (isVarArg && !Outs.empty())
+    return false;
+
+  // Also avoid sibcall optimization if either caller or callee uses struct
+  // return semantics.
+  if (isCalleeStructRet || isCallerStructRet)
+    return false;
+
+  // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
+  // emitEpilogue is not ready for them.
+  // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
+  // LR.  This means if we need to reload LR, it takes an extra instructions,
+  // which outweighs the value of the tail call; but here we don't know yet
+  // whether LR is going to be used.  Probably the right approach is to
+  // generate the tail call here and turn it back into CALL/RET in
+  // emitEpilogue if LR is used.
+
+  // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
+  // but we need to make sure there are enough registers; the only valid
+  // registers are the 4 used for parameters.  We don't currently do this
+  // case.
+  if (Subtarget->isThumb1Only())
+    return false;
+
+  // If the calling conventions do not match, then we'd better make sure the
+  // results are returned in the same way as what the caller expects.
+  if (!CCMatch) {
+    SmallVector<CCValAssign, 16> RVLocs1;
+    CCState CCInfo1(CalleeCC, false, getTargetMachine(),
+                    RVLocs1, *DAG.getContext());
+    CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
+
+    SmallVector<CCValAssign, 16> RVLocs2;
+    CCState CCInfo2(CallerCC, false, getTargetMachine(),
+                    RVLocs2, *DAG.getContext());
+    CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
+
+    if (RVLocs1.size() != RVLocs2.size())
+      return false;
+    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+        return false;
+      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+        return false;
+      if (RVLocs1[i].isRegLoc()) {
+        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+          return false;
+      } else {
+        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+          return false;
+      }
+    }
+  }
+
+  // If the callee takes no arguments then go on to check the results of the
+  // call.
+  if (!Outs.empty()) {
+    // Check if stack adjustment is needed. For now, do not do this if any
+    // argument is passed on the stack.
+    SmallVector<CCValAssign, 16> ArgLocs;
+    CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
+                   ArgLocs, *DAG.getContext());
+    CCInfo.AnalyzeCallOperands(Outs,
+                               CCAssignFnForNode(CalleeCC, false, isVarArg));
+    if (CCInfo.getNextStackOffset()) {
+      MachineFunction &MF = DAG.getMachineFunction();
+
+      // Check if the arguments are already laid out in the right way as
+      // the caller's fixed stack objects.
+      MachineFrameInfo *MFI = MF.getFrameInfo();
+      const MachineRegisterInfo *MRI = &MF.getRegInfo();
+      const ARMInstrInfo *TII =
+        ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
+      for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+           i != e;
+           ++i, ++realArgIdx) {
+        CCValAssign &VA = ArgLocs[i];
+        EVT RegVT = VA.getLocVT();
+        SDValue Arg = OutVals[realArgIdx];
+        ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+        if (VA.getLocInfo() == CCValAssign::Indirect)
+          return false;
+        if (VA.needsCustom()) {
+          // f64 and vector types are split into multiple registers or
+          // register/stack-slot combinations.  The types will not match
+          // the registers; give up on memory f64 refs until we figure
+          // out what to do about this.
+          if (!VA.isRegLoc())
+            return false;
+          if (!ArgLocs[++i].isRegLoc())
+            return false;
+          if (RegVT == MVT::v2f64) {
+            if (!ArgLocs[++i].isRegLoc())
+              return false;
+            if (!ArgLocs[++i].isRegLoc())
+              return false;
+          }
+        } else if (!VA.isRegLoc()) {
+          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+                                   MFI, MRI, TII))
+            return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+SDValue
+ARMTargetLowering::LowerReturn(SDValue Chain,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               DebugLoc dl, SelectionDAG &DAG) const {
+
+  // CCValAssign - represent the assignment of the return value to a location.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slots.
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
+                 *DAG.getContext());
+
+  // Analyze outgoing return values.
+  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
+                                               isVarArg));
+
+  // If this is the first return lowered for this function, add
+  // the regs to the liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0, realRVLocIdx = 0;
+       i != RVLocs.size();
+       ++i, ++realRVLocIdx) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    SDValue Arg = OutVals[realRVLocIdx];
+
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    if (VA.needsCustom()) {
+      if (VA.getLocVT() == MVT::v2f64) {
+        // Extract the first half and return it in two registers.
+        SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+                                   DAG.getConstant(0, MVT::i32));
+        SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
+                                       DAG.getVTList(MVT::i32, MVT::i32), Half);
+
+        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
+        Flag = Chain.getValue(1);
+        VA = RVLocs[++i]; // skip ahead to next loc
+        Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                                 HalfGPRs.getValue(1), Flag);
+        Flag = Chain.getValue(1);
+        VA = RVLocs[++i]; // skip ahead to next loc
+
+        // Extract the 2nd half and fall through to handle it as an f64 value.
+        Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
+                          DAG.getConstant(1, MVT::i32));
+      }
+      // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
+      // available.
+      SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
+                                  DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
+      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
+      Flag = Chain.getValue(1);
+      VA = RVLocs[++i]; // skip ahead to next loc
+      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
+                               Flag);
+    } else
+      Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+
+    // Guarantee that all emitted copies are
+    // stuck together, avoiding something bad.
+    Flag = Chain.getValue(1);
+  }
+
+  SDValue result;
+  if (Flag.getNode())
+    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+  else // Return Void
+    result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
+
+  return result;
+}
+
+bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
+  if (N->getNumValues() != 1)
+    return false;
+  if (!N->hasNUsesOfValue(1, 0))
+    return false;
+
+  unsigned NumCopies = 0;
+  SDNode* Copies[2];
+  SDNode *Use = *N->use_begin();
+  if (Use->getOpcode() == ISD::CopyToReg) {
+    Copies[NumCopies++] = Use;
+  } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
+    // f64 returned in a pair of GPRs.
+    for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
+         UI != UE; ++UI) {
+      if (UI->getOpcode() != ISD::CopyToReg)
+        return false;
+      Copies[UI.getUse().getResNo()] = *UI;
+      ++NumCopies;
+    }
+  } else if (Use->getOpcode() == ISD::BITCAST) {
+    // f32 returned in a single GPR.
+    if (!Use->hasNUsesOfValue(1, 0))
+      return false;
+    Use = *Use->use_begin();
+    if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
+      return false;
+    Copies[NumCopies++] = Use;
+  } else {
+    return false;
+  }
+
+  if (NumCopies != 1 && NumCopies != 2)
+    return false;
+
+  bool HasRet = false;
+  for (unsigned i = 0; i < NumCopies; ++i) {
+    SDNode *Copy = Copies[i];
+    for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+         UI != UE; ++UI) {
+      if (UI->getOpcode() == ISD::CopyToReg) {
+        SDNode *Use = *UI;
+        if (Use == Copies[0] || Use == Copies[1])
+          continue;
+        return false;
+      }
+      if (UI->getOpcode() != ARMISD::RET_FLAG)
+        return false;
+      HasRet = true;
+    }
+  }
+
+  return HasRet;
+}
+
+bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+  if (!EnableARMTailCalls)
+    return false;
+
+  if (!CI->isTailCall())
+    return false;
+
+  return !Subtarget->isThumb1Only();
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOVi.
+static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+  EVT PtrVT = Op.getValueType();
+  // FIXME there is no actual debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+  SDValue Res;
+  if (CP->isMachineConstantPoolEntry())
+    Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+                                    CP->getAlignment());
+  else
+    Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+                                    CP->getAlignment());
+  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
+}
+
+unsigned ARMTargetLowering::getJumpTableEncoding() const {
+  return MachineJumpTableInfo::EK_Inline;
+}
+
+SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned ARMPCLabelIndex = 0;
+  DebugLoc DL = Op.getDebugLoc();
+  EVT PtrVT = getPointerTy();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+  SDValue CPAddr;
+  if (RelocM == Reloc::Static) {
+    CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
+  } else {
+    unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+    ARMPCLabelIndex = AFI->createPICLabelUId();
+    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
+                                                         ARMCP::CPBlockAddress,
+                                                         PCAdj);
+    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+  }
+  CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
+  SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
+                               MachinePointerInfo::getConstantPool(),
+                               false, false, 0);
+  if (RelocM == Reloc::Static)
+    return Result;
+  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+  return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model
+SDValue
+ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+                                                 SelectionDAG &DAG) const {
+  DebugLoc dl = GA->getDebugLoc();
+  EVT PtrVT = getPointerTy();
+  unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+  ARMConstantPoolValue *CPV =
+    new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
+                             ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
+  SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+  Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
+  Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
+                         MachinePointerInfo::getConstantPool(),
+                         false, false, 0);
+  SDValue Chain = Argument.getValue(1);
+
+  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+  Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
+
+  // call __tls_get_addr.
+  ArgListTy Args;
+  ArgListEntry Entry;
+  Entry.Node = Argument;
+  Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext());
+  Args.push_back(Entry);
+  // FIXME: is there useful debug info available here?
+  std::pair<SDValue, SDValue> CallResult =
+    LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
+                false, false, false, false,
+                0, CallingConv::C, false, /*isReturnValueUsed=*/true,
+                DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+  return CallResult.first;
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" or
+// "local exec" model.
+SDValue
+ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
+                                        SelectionDAG &DAG) const {
+  const GlobalValue *GV = GA->getGlobal();
+  DebugLoc dl = GA->getDebugLoc();
+  SDValue Offset;
+  SDValue Chain = DAG.getEntryNode();
+  EVT PtrVT = getPointerTy();
+  // Get the Thread Pointer
+  SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+
+  if (GV->isDeclaration()) {
+    MachineFunction &MF = DAG.getMachineFunction();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+    // Initial exec model.
+    unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+    ARMConstantPoolValue *CPV =
+      new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
+                               ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true);
+    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+                         MachinePointerInfo::getConstantPool(),
+                         false, false, 0);
+    Chain = Offset.getValue(1);
+
+    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+    Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
+
+    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+                         MachinePointerInfo::getConstantPool(),
+                         false, false, 0);
+  } else {
+    // local exec model
+    ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF);
+    Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
+    Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
+                         MachinePointerInfo::getConstantPool(),
+                         false, false, 0);
+  }
+
+  // The address of the thread local variable is the add of the thread
+  // pointer with the offset of the variable.
+  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+  // TODO: implement the "local dynamic" model
+  assert(Subtarget->isTargetELF() &&
+         "TLS not implemented for non-ELF targets");
+  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+  // If the relocation model is PIC, use the "General Dynamic" TLS Model,
+  // otherwise use the "Local Exec" TLS Model
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
+    return LowerToTLSGeneralDynamicModel(GA, DAG);
+  else
+    return LowerToTLSExecModels(GA, DAG);
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  EVT PtrVT = getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+  if (RelocM == Reloc::PIC_) {
+    bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
+    ARMConstantPoolValue *CPV =
+      new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
+    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+    SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+                                 CPAddr,
+                                 MachinePointerInfo::getConstantPool(),
+                                 false, false, 0);
+    SDValue Chain = Result.getValue(1);
+    SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
+    Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
+    if (!UseGOTOFF)
+      Result = DAG.getLoad(PtrVT, dl, Chain, Result,
+                           MachinePointerInfo::getGOT(), false, false, 0);
+    return Result;
+  }
+
+  // If we have T2 ops, we can materialize the address directly via movt/movw
+  // pair. This is always cheaper.
+  if (Subtarget->useMovt()) {
+    ++NumMovwMovt;
+    // FIXME: Once remat is capable of dealing with instructions with register
+    // operands, expand this into two nodes.
+    return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+                       DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+  } else {
+    SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+    return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                       MachinePointerInfo::getConstantPool(),
+                       false, false, 0);
+  }
+}
+
+SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  EVT PtrVT = getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  if (Subtarget->useMovt()) {
+    ++NumMovwMovt;
+    // FIXME: Once remat is capable of dealing with instructions with register
+    // operands, expand this into two nodes.
+    if (RelocM == Reloc::Static)
+      return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
+                                 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+
+    unsigned Wrapper = (RelocM == Reloc::PIC_)
+      ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
+    SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
+                                 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
+    if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+      Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
+                           MachinePointerInfo::getGOT(), false, false, 0);
+    return Result;
+  }
+
+  unsigned ARMPCLabelIndex = 0;
+  SDValue CPAddr;
+  if (RelocM == Reloc::Static) {
+    CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
+  } else {
+    ARMPCLabelIndex = AFI->createPICLabelUId();
+    unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
+    ARMConstantPoolValue *CPV =
+      new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
+    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+  }
+  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+
+  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                               MachinePointerInfo::getConstantPool(),
+                               false, false, 0);
+  SDValue Chain = Result.getValue(1);
+
+  if (RelocM == Reloc::PIC_) {
+    SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+    Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+  }
+
+  if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
+    Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
+                         false, false, 0);
+
+  return Result;
+}
+
+SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
+                                                    SelectionDAG &DAG) const {
+  assert(Subtarget->isTargetELF() &&
+         "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+  EVT PtrVT = getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+  ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
+                                                       "_GLOBAL_OFFSET_TABLE_",
+                                                       ARMPCLabelIndex, PCAdj);
+  SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+  CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+  SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                               MachinePointerInfo::getConstantPool(),
+                               false, false, 0);
+  SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+  return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
+  const {
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+                     Op.getOperand(0), Op.getOperand(1));
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Val = DAG.getConstant(0, MVT::i32);
+  return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
+                     Op.getOperand(1), Val);
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
+                     Op.getOperand(1), DAG.getConstant(0, MVT::i32));
+}
+
+SDValue
+ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+                                          const ARMSubtarget *Subtarget) const {
+  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  DebugLoc dl = Op.getDebugLoc();
+  switch (IntNo) {
+  default: return SDValue();    // Don't custom lower most intrinsics.
+  case Intrinsic::arm_thread_pointer: {
+    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+    return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
+  }
+  case Intrinsic::eh_sjlj_lsda: {
+    MachineFunction &MF = DAG.getMachineFunction();
+    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+    unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
+    EVT PtrVT = getPointerTy();
+    DebugLoc dl = Op.getDebugLoc();
+    Reloc::Model RelocM = getTargetMachine().getRelocationModel();
+    SDValue CPAddr;
+    unsigned PCAdj = (RelocM != Reloc::PIC_)
+      ? 0 : (Subtarget->isThumb() ? 4 : 8);
+    ARMConstantPoolValue *CPV =
+      new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
+                               ARMCP::CPLSDA, PCAdj);
+    CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+    SDValue Result =
+      DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
+                  MachinePointerInfo::getConstantPool(),
+                  false, false, 0);
+
+    if (RelocM == Reloc::PIC_) {
+      SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
+      Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
+    }
+    return Result;
+  }
+  }
+}
+
+static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
+                               const ARMSubtarget *Subtarget) {
+  DebugLoc dl = Op.getDebugLoc();
+  if (!Subtarget->hasDataBarrier()) {
+    // Some ARMv6 cpus can support data barriers with an mcr instruction.
+    // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
+    // here.
+    assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
+           "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+    return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
+                       DAG.getConstant(0, MVT::i32));
+  }
+
+  SDValue Op5 = Op.getOperand(5);
+  bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
+  unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+  bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
+
+  ARM_MB::MemBOpt DMBOpt;
+  if (isDeviceBarrier)
+    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
+  else
+    DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
+  return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+                     DAG.getConstant(DMBOpt, MVT::i32));
+}
+
+static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
+                             const ARMSubtarget *Subtarget) {
+  // ARM pre v5TE and Thumb1 does not have preload instructions.
+  if (!(Subtarget->isThumb2() ||
+        (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
+    // Just preserve the chain.
+    return Op.getOperand(0);
+
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
+  if (!isRead &&
+      (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
+    // ARMv7 with MP extension has PLDW.
+    return Op.getOperand(0);
+
+  if (Subtarget->isThumb())
+    // Invert the bits.
+    isRead = ~isRead & 1;
+  unsigned isData = Subtarget->isThumb() ? 0 : 1;
+
+  // Currently there is no intrinsic that matches pli.
+  return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
+                     Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
+                     DAG.getConstant(isData, MVT::i32));
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
+
+  // vastart just stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument.
+  DebugLoc dl = Op.getDebugLoc();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+                      MachinePointerInfo(SV), false, false, 0);
+}
+
+SDValue
+ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
+                                        SDValue &Root, SelectionDAG &DAG,
+                                        DebugLoc dl) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  TargetRegisterClass *RC;
+  if (AFI->isThumb1OnlyFunction())
+    RC = ARM::tGPRRegisterClass;
+  else
+    RC = ARM::GPRRegisterClass;
+
+  // Transform the arguments stored in physical registers into virtual ones.
+  unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+  SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+
+  SDValue ArgValue2;
+  if (NextVA.isMemLoc()) {
+    MachineFrameInfo *MFI = MF.getFrameInfo();
+    int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
+
+    // Create load node to retrieve arguments from the stack.
+    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+    ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
+                            MachinePointerInfo::getFixedStack(FI),
+                            false, false, 0);
+  } else {
+    Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
+    ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
+  }
+
+  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
+}
+
+SDValue
+ARMTargetLowering::LowerFormalArguments(SDValue Chain,
+                                        CallingConv::ID CallConv, bool isVarArg,
+                                        const SmallVectorImpl<ISD::InputArg>
+                                          &Ins,
+                                        DebugLoc dl, SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+                 *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins,
+                                CCAssignFnForNode(CallConv, /* Return*/ false,
+                                                  isVarArg));
+
+  SmallVector<SDValue, 16> ArgValues;
+  int lastInsIndex = -1;
+
+  SDValue ArgValue;
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    // Arguments stored in registers.
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+
+      if (VA.needsCustom()) {
+        // f64 and vector types are split up into multiple registers or
+        // combinations of registers and stack slots.
+        if (VA.getLocVT() == MVT::v2f64) {
+          SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
+                                                   Chain, DAG, dl);
+          VA = ArgLocs[++i]; // skip ahead to next loc
+          SDValue ArgValue2;
+          if (VA.isMemLoc()) {
+            int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
+            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+            ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
+                                    MachinePointerInfo::getFixedStack(FI),
+                                    false, false, 0);
+          } else {
+            ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
+                                             Chain, DAG, dl);
+          }
+          ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
+          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
+                                 ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
+          ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
+                                 ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
+        } else
+          ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
+
+      } else {
+        TargetRegisterClass *RC;
+
+        if (RegVT == MVT::f32)
+          RC = ARM::SPRRegisterClass;
+        else if (RegVT == MVT::f64)
+          RC = ARM::DPRRegisterClass;
+        else if (RegVT == MVT::v2f64)
+          RC = ARM::QPRRegisterClass;
+        else if (RegVT == MVT::i32)
+          RC = (AFI->isThumb1OnlyFunction() ?
+                ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
+        else
+          llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
+
+        // Transform the arguments in physical registers into virtual ones.
+        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+      }
+
+      // If this is an 8 or 16-bit value, it is really passed promoted
+      // to 32 bits.  Insert an assert[sz]ext to capture this, then
+      // truncate to the right size.
+      switch (VA.getLocInfo()) {
+      default: llvm_unreachable("Unknown loc info!");
+      case CCValAssign::Full: break;
+      case CCValAssign::BCvt:
+        ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
+        break;
+      case CCValAssign::SExt:
+        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+        break;
+      case CCValAssign::ZExt:
+        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+        break;
+      }
+
+      InVals.push_back(ArgValue);
+
+    } else { // VA.isRegLoc()
+
+      // sanity check
+      assert(VA.isMemLoc());
+      assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
+
+      int index = ArgLocs[i].getValNo();
+      
+      // Some Ins[] entries become multiple ArgLoc[] entries.
+      // Process them only once.
+      if (index != lastInsIndex)
+        {
+          ISD::ArgFlagsTy Flags = Ins[index].Flags;
+          // FIXME: For now, all byval parameter objects are marked mutable. This can be
+          // changed with more analysis.
+          // In case of tail call optimization mark all arguments mutable. Since they
+          // could be overwritten by lowering of arguments in case of a tail call.
+          if (Flags.isByVal()) {
+            int FI = MFI->CreateFixedObject(Flags.getByValSize(),
+                                            VA.getLocMemOffset(), false);
+            InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
+          } else {
+            int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
+                                            VA.getLocMemOffset(), true);
+
+            // Create load nodes to retrieve arguments from the stack.
+            SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+            InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                         MachinePointerInfo::getFixedStack(FI),
+                                         false, false, 0));
+          }
+          lastInsIndex = index;
+        }
+    }
+  }
+
+  // varargs
+  if (isVarArg) {
+    static const unsigned GPRArgRegs[] = {
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3
+    };
+
+    unsigned NumGPRs = CCInfo.getFirstUnallocated
+      (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
+
+    unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
+    unsigned VARegSize = (4 - NumGPRs) * 4;
+    unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
+    unsigned ArgOffset = CCInfo.getNextStackOffset();
+    if (VARegSaveSize) {
+      // If this function is vararg, store any remaining integer argument regs
+      // to their spots on the stack so that they may be loaded by deferencing
+      // the result of va_next.
+      AFI->setVarArgsRegSaveSize(VARegSaveSize);
+      AFI->setVarArgsFrameIndex(
+        MFI->CreateFixedObject(VARegSaveSize,
+                               ArgOffset + VARegSaveSize - VARegSize,
+                               false));
+      SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
+                                      getPointerTy());
+
+      SmallVector<SDValue, 4> MemOps;
+      for (; NumGPRs < 4; ++NumGPRs) {
+        TargetRegisterClass *RC;
+        if (AFI->isThumb1OnlyFunction())
+          RC = ARM::tGPRRegisterClass;
+        else
+          RC = ARM::GPRRegisterClass;
+
+        unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC);
+        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+        SDValue Store =
+          DAG.getStore(Val.getValue(1), dl, Val, FIN,
+               MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
+                       false, false, 0);
+        MemOps.push_back(Store);
+        FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
+                          DAG.getConstant(4, getPointerTy()));
+      }
+      if (!MemOps.empty())
+        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                            &MemOps[0], MemOps.size());
+    } else
+      // This will point to the next argument passed via stack.
+      AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
+  }
+
+  return Chain;
+}
+
+/// isFloatingPointZero - Return true if this is +0.0.
+static bool isFloatingPointZero(SDValue Op) {
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+    return CFP->getValueAPF().isPosZero();
+  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+    // Maybe this has already been legalized into the constant pool?
+    if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
+      SDValue WrapperOp = Op.getOperand(1).getOperand(0);
+      if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
+        if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+          return CFP->getValueAPF().isPosZero();
+    }
+  }
+  return false;
+}
+
+/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
+/// the given operands.
+SDValue
+ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+                             SDValue &ARMcc, SelectionDAG &DAG,
+                             DebugLoc dl) const {
+  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
+    unsigned C = RHSC->getZExtValue();
+    if (!isLegalICmpImmediate(C)) {
+      // Constant does not fit, try adjusting it by one?
+      switch (CC) {
+      default: break;
+      case ISD::SETLT:
+      case ISD::SETGE:
+        if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
+          CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
+          RHS = DAG.getConstant(C-1, MVT::i32);
+        }
+        break;
+      case ISD::SETULT:
+      case ISD::SETUGE:
+        if (C != 0 && isLegalICmpImmediate(C-1)) {
+          CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
+          RHS = DAG.getConstant(C-1, MVT::i32);
+        }
+        break;
+      case ISD::SETLE:
+      case ISD::SETGT:
+        if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
+          CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
+          RHS = DAG.getConstant(C+1, MVT::i32);
+        }
+        break;
+      case ISD::SETULE:
+      case ISD::SETUGT:
+        if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
+          CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
+          RHS = DAG.getConstant(C+1, MVT::i32);
+        }
+        break;
+      }
+    }
+  }
+
+  ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+  ARMISD::NodeType CompareType;
+  switch (CondCode) {
+  default:
+    CompareType = ARMISD::CMP;
+    break;
+  case ARMCC::EQ:
+  case ARMCC::NE:
+    // Uses only Z Flag
+    CompareType = ARMISD::CMPZ;
+    break;
+  }
+  ARMcc = DAG.getConstant(CondCode, MVT::i32);
+  return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
+}
+
+/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
+SDValue
+ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
+                             DebugLoc dl) const {
+  SDValue Cmp;
+  if (!isFloatingPointZero(RHS))
+    Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
+  else
+    Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
+  return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
+}
+
+/// duplicateCmp - Glue values can have only one use, so this function
+/// duplicates a comparison node.
+SDValue
+ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
+  unsigned Opc = Cmp.getOpcode();
+  DebugLoc DL = Cmp.getDebugLoc();
+  if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
+    return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+
+  assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
+  Cmp = Cmp.getOperand(0);
+  Opc = Cmp.getOpcode();
+  if (Opc == ARMISD::CMPFP)
+    Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
+  else {
+    assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
+    Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
+  }
+  return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
+}
+
+SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Cond = Op.getOperand(0);
+  SDValue SelectTrue = Op.getOperand(1);
+  SDValue SelectFalse = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Convert:
+  //
+  //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
+  //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
+  //
+  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
+    const ConstantSDNode *CMOVTrue =
+      dyn_cast<ConstantSDNode>(Cond.getOperand(0));
+    const ConstantSDNode *CMOVFalse =
+      dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+
+    if (CMOVTrue && CMOVFalse) {
+      unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
+      unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
+
+      SDValue True;
+      SDValue False;
+      if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
+        True = SelectTrue;
+        False = SelectFalse;
+      } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
+        True = SelectFalse;
+        False = SelectTrue;
+      }
+
+      if (True.getNode() && False.getNode()) {
+        EVT VT = Cond.getValueType();
+        SDValue ARMcc = Cond.getOperand(2);
+        SDValue CCR = Cond.getOperand(3);
+        SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
+        return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
+      }
+    }
+  }
+
+  return DAG.getSelectCC(dl, Cond,
+                         DAG.getConstant(0, Cond.getValueType()),
+                         SelectTrue, SelectFalse, ISD::SETNE);
+}
+
+SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  SDValue TrueVal = Op.getOperand(2);
+  SDValue FalseVal = Op.getOperand(3);
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (LHS.getValueType() == MVT::i32) {
+    SDValue ARMcc;
+    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
+  }
+
+  ARMCC::CondCodes CondCode, CondCode2;
+  FPCCToARMCC(CC, CondCode, CondCode2);
+
+  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
+                               ARMcc, CCR, Cmp);
+  if (CondCode2 != ARMCC::AL) {
+    SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
+    // FIXME: Needs another CMP because flag can have but one use.
+    SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+    Result = DAG.getNode(ARMISD::CMOV, dl, VT,
+                         Result, TrueVal, ARMcc2, CCR, Cmp2);
+  }
+  return Result;
+}
+
+/// canChangeToInt - Given the fp compare operand, return true if it is suitable
+/// to morph to an integer compare sequence.
+static bool canChangeToInt(SDValue Op, bool &SeenZero,
+                           const ARMSubtarget *Subtarget) {
+  SDNode *N = Op.getNode();
+  if (!N->hasOneUse())
+    // Otherwise it requires moving the value from fp to integer registers.
+    return false;
+  if (!N->getNumValues())
+    return false;
+  EVT VT = Op.getValueType();
+  if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
+    // f32 case is generally profitable. f64 case only makes sense when vcmpe +
+    // vmrs are very slow, e.g. cortex-a8.
+    return false;
+
+  if (isFloatingPointZero(Op)) {
+    SeenZero = true;
+    return true;
+  }
+  return ISD::isNormalLoad(N);
+}
+
+static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
+  if (isFloatingPointZero(Op))
+    return DAG.getConstant(0, MVT::i32);
+
+  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
+    return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+                       Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
+                       Ld->isVolatile(), Ld->isNonTemporal(),
+                       Ld->getAlignment());
+
+  llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
+                           SDValue &RetVal1, SDValue &RetVal2) {
+  if (isFloatingPointZero(Op)) {
+    RetVal1 = DAG.getConstant(0, MVT::i32);
+    RetVal2 = DAG.getConstant(0, MVT::i32);
+    return;
+  }
+
+  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
+    SDValue Ptr = Ld->getBasePtr();
+    RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+                          Ld->getChain(), Ptr,
+                          Ld->getPointerInfo(),
+                          Ld->isVolatile(), Ld->isNonTemporal(),
+                          Ld->getAlignment());
+
+    EVT PtrType = Ptr.getValueType();
+    unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
+    SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
+                                 PtrType, Ptr, DAG.getConstant(4, PtrType));
+    RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+                          Ld->getChain(), NewPtr,
+                          Ld->getPointerInfo().getWithOffset(4),
+                          Ld->isVolatile(), Ld->isNonTemporal(),
+                          NewAlign);
+    return;
+  }
+
+  llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
+/// f32 and even f64 comparisons to integer ones.
+SDValue
+ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS = Op.getOperand(2);
+  SDValue RHS = Op.getOperand(3);
+  SDValue Dest = Op.getOperand(4);
+  DebugLoc dl = Op.getDebugLoc();
+
+  bool SeenZero = false;
+  if (canChangeToInt(LHS, SeenZero, Subtarget) &&
+      canChangeToInt(RHS, SeenZero, Subtarget) &&
+      // If one of the operand is zero, it's safe to ignore the NaN case since
+      // we only care about equality comparisons.
+      (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
+    // If unsafe fp math optimization is enabled and there are no other uses of
+    // the CMP operands, and the condition code is EQ or NE, we can optimize it
+    // to an integer comparison.
+    if (CC == ISD::SETOEQ)
+      CC = ISD::SETEQ;
+    else if (CC == ISD::SETUNE)
+      CC = ISD::SETNE;
+
+    SDValue ARMcc;
+    if (LHS.getValueType() == MVT::f32) {
+      LHS = bitcastf32Toi32(LHS, DAG);
+      RHS = bitcastf32Toi32(RHS, DAG);
+      SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+      SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+      return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+                         Chain, Dest, ARMcc, CCR, Cmp);
+    }
+
+    SDValue LHS1, LHS2;
+    SDValue RHS1, RHS2;
+    expandf64Toi32(LHS, DAG, LHS1, LHS2);
+    expandf64Toi32(RHS, DAG, RHS1, RHS2);
+    ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+    ARMcc = DAG.getConstant(CondCode, MVT::i32);
+    SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
+    SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
+    return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
+  }
+
+  return SDValue();
+}
+
+SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS = Op.getOperand(2);
+  SDValue RHS = Op.getOperand(3);
+  SDValue Dest = Op.getOperand(4);
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (LHS.getValueType() == MVT::i32) {
+    SDValue ARMcc;
+    SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+    SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+    return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+                       Chain, Dest, ARMcc, CCR, Cmp);
+  }
+
+  assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+
+  if (UnsafeFPMath &&
+      (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
+       CC == ISD::SETNE || CC == ISD::SETUNE)) {
+    SDValue Result = OptimizeVFPBrcond(Op, DAG);
+    if (Result.getNode())
+      return Result;
+  }
+
+  ARMCC::CondCodes CondCode, CondCode2;
+  FPCCToARMCC(CC, CondCode, CondCode2);
+
+  SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+  SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
+  SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+  if (CondCode2 != ARMCC::AL) {
+    ARMcc = DAG.getConstant(CondCode2, MVT::i32);
+    SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
+    Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
+  }
+  return Res;
+}
+
+SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Table = Op.getOperand(1);
+  SDValue Index = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+
+  EVT PTy = getPointerTy();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+  ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
+  SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
+  Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
+  Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
+  SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
+  if (Subtarget->isThumb2()) {
+    // Thumb2 uses a two-level jump. That is, it jumps into the jump table
+    // which does another jump to the destination. This also makes it easier
+    // to translate it to TBB / TBH later.
+    // FIXME: This might not work if the function is extremely large.
+    return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
+                       Addr, Op.getOperand(2), JTI, UId);
+  }
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
+                       MachinePointerInfo::getJumpTable(),
+                       false, false, 0);
+    Chain = Addr.getValue(1);
+    Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
+    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+  } else {
+    Addr = DAG.getLoad(PTy, dl, Chain, Addr,
+                       MachinePointerInfo::getJumpTable(), false, false, 0);
+    Chain = Addr.getValue(1);
+    return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
+  }
+}
+
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Opc;
+
+  switch (Op.getOpcode()) {
+  default:
+    assert(0 && "Invalid opcode!");
+  case ISD::FP_TO_SINT:
+    Opc = ARMISD::FTOSI;
+    break;
+  case ISD::FP_TO_UINT:
+    Opc = ARMISD::FTOUI;
+    break;
+  }
+  Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
+  return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+}
+
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Opc;
+
+  switch (Op.getOpcode()) {
+  default:
+    assert(0 && "Invalid opcode!");
+  case ISD::SINT_TO_FP:
+    Opc = ARMISD::SITOF;
+    break;
+  case ISD::UINT_TO_FP:
+    Opc = ARMISD::UITOF;
+    break;
+  }
+
+  Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
+  return DAG.getNode(Opc, dl, VT, Op);
+}
+
+SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+  // Implement fcopysign with a fabs and a conditional fneg.
+  SDValue Tmp0 = Op.getOperand(0);
+  SDValue Tmp1 = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  EVT SrcVT = Tmp1.getValueType();
+  bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
+    Tmp0.getOpcode() == ARMISD::VMOVDRR;
+  bool UseNEON = !InGPR && Subtarget->hasNEON();
+
+  if (UseNEON) {
+    // Use VBSL to copy the sign bit.
+    unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
+    SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
+                               DAG.getTargetConstant(EncodedVal, MVT::i32));
+    EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
+    if (VT == MVT::f64)
+      Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+                         DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
+                         DAG.getConstant(32, MVT::i32));
+    else /*if (VT == MVT::f32)*/
+      Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
+    if (SrcVT == MVT::f32) {
+      Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
+      if (VT == MVT::f64)
+        Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
+                           DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
+                           DAG.getConstant(32, MVT::i32));
+    }
+    Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
+    Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
+
+    SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
+                                            MVT::i32);
+    AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
+    SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
+                                  DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
+                                              
+    SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
+                              DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
+                              DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
+    if (VT == MVT::f32) {
+      Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
+      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
+                        DAG.getConstant(0, MVT::i32));
+    } else {
+      Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
+    }
+
+    return Res;
+  }
+
+  // Bitcast operand 1 to i32.
+  if (SrcVT == MVT::f64)
+    Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+                       &Tmp1, 1).getValue(1);
+  Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
+
+  // Or in the signbit with integer operations.
+  SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
+  SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
+  Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
+  if (VT == MVT::f32) {
+    Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
+                       DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
+                       DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
+  }
+
+  // f64: Or the high part with signbit and then combine two parts.
+  Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
+                     &Tmp0, 1);
+  SDValue Lo = Tmp0.getValue(0);
+  SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
+  Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
+  return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+}
+
+SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  if (Depth) {
+    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+    SDValue Offset = DAG.getConstant(4, MVT::i32);
+    return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+                       MachinePointerInfo(), false, false, 0);
+  }
+
+  // Return LR, which contains the return address. Mark it an implicit live-in.
+  unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
+  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
+SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
+    ? ARM::R7 : ARM::R11;
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+  while (Depth--)
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+                            MachinePointerInfo(),
+                            false, false, 0);
+  return FrameAddr;
+}
+
+/// ExpandBITCAST - If the target supports VFP, this function is called to
+/// expand a bit convert where either the source or destination type is i64 to
+/// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
+/// operand type is illegal (e.g., v2f32 for a target that doesn't support
+/// vectors), since the legalizer won't know what to do with that.
+static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  DebugLoc dl = N->getDebugLoc();
+  SDValue Op = N->getOperand(0);
+
+  // This function is only supposed to be called for i64 types, either as the
+  // source or destination of the bit convert.
+  EVT SrcVT = Op.getValueType();
+  EVT DstVT = N->getValueType(0);
+  assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
+         "ExpandBITCAST called for non-i64 type");
+
+  // Turn i64->f64 into VMOVDRR.
+  if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
+    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+                             DAG.getConstant(0, MVT::i32));
+    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
+                             DAG.getConstant(1, MVT::i32));
+    return DAG.getNode(ISD::BITCAST, dl, DstVT,
+                       DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
+  }
+
+  // Turn f64->i64 into VMOVRRD.
+  if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
+    SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
+                              DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
+    // Merge the pieces into a single i64 value.
+    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
+  }
+
+  return SDValue();
+}
+
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+/// Zero vectors are used to represent vector negation and in those cases
+/// will be implemented with the NEON VNEG instruction.  However, VNEG does
+/// not support i64 elements, so sometimes the zero vectors will need to be
+/// explicitly constructed.  Regardless, use a canonical VMOV to create the
+/// zero vector.
+static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+  assert(VT.isVector() && "Expected a vector type");
+  // The canonical modified immediate encoding of a zero vector is....0!
+  SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
+  EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+  SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
+  return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+}
+
+/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
+/// i32 values and take a 2 x i32 value to shift plus a shift amount.
+SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+  EVT VT = Op.getValueType();
+  unsigned VTBits = VT.getSizeInBits();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue ShOpLo = Op.getOperand(0);
+  SDValue ShOpHi = Op.getOperand(1);
+  SDValue ShAmt  = Op.getOperand(2);
+  SDValue ARMcc;
+  unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
+
+  assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
+
+  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
+  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
+  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+                                   DAG.getConstant(VTBits, MVT::i32));
+  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
+  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
+
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
+                          ARMcc, DAG, dl);
+  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
+                           CCR, Cmp);
+
+  SDValue Ops[2] = { Lo, Hi };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
+/// i32 values and take a 2 x i32 value to shift plus a shift amount.
+SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+  EVT VT = Op.getValueType();
+  unsigned VTBits = VT.getSizeInBits();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue ShOpLo = Op.getOperand(0);
+  SDValue ShOpHi = Op.getOperand(1);
+  SDValue ShAmt  = Op.getOperand(2);
+  SDValue ARMcc;
+
+  assert(Op.getOpcode() == ISD::SHL_PARTS);
+  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                 DAG.getConstant(VTBits, MVT::i32), ShAmt);
+  SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+                                   DAG.getConstant(VTBits, MVT::i32));
+  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+
+  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
+                          ARMcc, DAG, dl);
+  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
+                           CCR, Cmp);
+
+  SDValue Ops[2] = { Lo, Hi };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  // The rounding mode is in bits 23:22 of the FPSCR.
+  // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
+  // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
+  // so that the shift + and get folded into a bitfield extract.
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+                              DAG.getConstant(Intrinsic::arm_get_fpscr,
+                                              MVT::i32));
+  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
+                                  DAG.getConstant(1U << 22, MVT::i32));
+  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
+                              DAG.getConstant(22, MVT::i32));
+  return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
+                     DAG.getConstant(3, MVT::i32));
+}
+
+static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
+                         const ARMSubtarget *ST) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  if (!ST->hasV6T2Ops())
+    return SDValue();
+
+  SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
+  return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
+}
+
+static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
+                          const ARMSubtarget *ST) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  if (!VT.isVector())
+    return SDValue();
+
+  // Lower vector shifts on NEON to use VSHL.
+  assert(ST->hasNEON() && "unexpected vector shift");
+
+  // Left shifts translate directly to the vshiftu intrinsic.
+  if (N->getOpcode() == ISD::SHL)
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
+                       N->getOperand(0), N->getOperand(1));
+
+  assert((N->getOpcode() == ISD::SRA ||
+          N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
+
+  // NEON uses the same intrinsics for both left and right shifts.  For
+  // right shifts, the shift amounts are negative, so negate the vector of
+  // shift amounts.
+  EVT ShiftVT = N->getOperand(1).getValueType();
+  SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
+                                     getZeroVector(ShiftVT, DAG, dl),
+                                     N->getOperand(1));
+  Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
+                             Intrinsic::arm_neon_vshifts :
+                             Intrinsic::arm_neon_vshiftu);
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(vshiftInt, MVT::i32),
+                     N->getOperand(0), NegatedCount);
+}
+
+static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
+                                const ARMSubtarget *ST) {
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+
+  // We can get here for a node like i32 = ISD::SHL i32, i64
+  if (VT != MVT::i64)
+    return SDValue();
+
+  assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
+         "Unknown shift to lower!");
+
+  // We only lower SRA, SRL of 1 here, all others use generic lowering.
+  if (!isa<ConstantSDNode>(N->getOperand(1)) ||
+      cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
+    return SDValue();
+
+  // If we are in thumb mode, we don't have RRX.
+  if (ST->isThumb1Only()) return SDValue();
+
+  // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
+  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+                           DAG.getConstant(0, MVT::i32));
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
+                           DAG.getConstant(1, MVT::i32));
+
+  // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
+  // captures the result into a carry flag.
+  unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
+  Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
+
+  // The low part is an ARMISD::RRX operand, which shifts the carry in.
+  Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
+
+  // Merge the pieces into a single i64 value.
+ return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
+  SDValue TmpOp0, TmpOp1;
+  bool Invert = false;
+  bool Swap = false;
+  unsigned Opc = 0;
+
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  SDValue CC = Op.getOperand(2);
+  EVT VT = Op.getValueType();
+  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (Op.getOperand(1).getValueType().isFloatingPoint()) {
+    switch (SetCCOpcode) {
+    default: llvm_unreachable("Illegal FP comparison"); break;
+    case ISD::SETUNE:
+    case ISD::SETNE:  Invert = true; // Fallthrough
+    case ISD::SETOEQ:
+    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
+    case ISD::SETOLT:
+    case ISD::SETLT: Swap = true; // Fallthrough
+    case ISD::SETOGT:
+    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
+    case ISD::SETOLE:
+    case ISD::SETLE:  Swap = true; // Fallthrough
+    case ISD::SETOGE:
+    case ISD::SETGE: Opc = ARMISD::VCGE; break;
+    case ISD::SETUGE: Swap = true; // Fallthrough
+    case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
+    case ISD::SETUGT: Swap = true; // Fallthrough
+    case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
+    case ISD::SETUEQ: Invert = true; // Fallthrough
+    case ISD::SETONE:
+      // Expand this to (OLT | OGT).
+      TmpOp0 = Op0;
+      TmpOp1 = Op1;
+      Opc = ISD::OR;
+      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
+      Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
+      break;
+    case ISD::SETUO: Invert = true; // Fallthrough
+    case ISD::SETO:
+      // Expand this to (OLT | OGE).
+      TmpOp0 = Op0;
+      TmpOp1 = Op1;
+      Opc = ISD::OR;
+      Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
+      Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
+      break;
+    }
+  } else {
+    // Integer comparisons.
+    switch (SetCCOpcode) {
+    default: llvm_unreachable("Illegal integer comparison"); break;
+    case ISD::SETNE:  Invert = true;
+    case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
+    case ISD::SETLT:  Swap = true;
+    case ISD::SETGT:  Opc = ARMISD::VCGT; break;
+    case ISD::SETLE:  Swap = true;
+    case ISD::SETGE:  Opc = ARMISD::VCGE; break;
+    case ISD::SETULT: Swap = true;
+    case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
+    case ISD::SETULE: Swap = true;
+    case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
+    }
+
+    // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
+    if (Opc == ARMISD::VCEQ) {
+
+      SDValue AndOp;
+      if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+        AndOp = Op0;
+      else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
+        AndOp = Op1;
+
+      // Ignore bitconvert.
+      if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
+        AndOp = AndOp.getOperand(0);
+
+      if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
+        Opc = ARMISD::VTST;
+        Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
+        Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
+        Invert = !Invert;
+      }
+    }
+  }
+
+  if (Swap)
+    std::swap(Op0, Op1);
+
+  // If one of the operands is a constant vector zero, attempt to fold the
+  // comparison to a specialized compare-against-zero form.
+  SDValue SingleOp;
+  if (ISD::isBuildVectorAllZeros(Op1.getNode()))
+    SingleOp = Op0;
+  else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
+    if (Opc == ARMISD::VCGE)
+      Opc = ARMISD::VCLEZ;
+    else if (Opc == ARMISD::VCGT)
+      Opc = ARMISD::VCLTZ;
+    SingleOp = Op1;
+  }
+
+  SDValue Result;
+  if (SingleOp.getNode()) {
+    switch (Opc) {
+    case ARMISD::VCEQ:
+      Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
+    case ARMISD::VCGE:
+      Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
+    case ARMISD::VCLEZ:
+      Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
+    case ARMISD::VCGT:
+      Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
+    case ARMISD::VCLTZ:
+      Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
+    default:
+      Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+    }
+  } else {
+     Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+  }
+
+  if (Invert)
+    Result = DAG.getNOT(dl, Result, VT);
+
+  return Result;
+}
+
+/// isNEONModifiedImm - Check if the specified splat value corresponds to a
+/// valid vector constant for a NEON instruction with a "modified immediate"
+/// operand (e.g., VMOV).  If so, return the encoded value.
+static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
+                                 unsigned SplatBitSize, SelectionDAG &DAG,
+                                 EVT &VT, bool is128Bits, NEONModImmType type) {
+  unsigned OpCmode, Imm;
+
+  // SplatBitSize is set to the smallest size that splats the vector, so a
+  // zero vector will always have SplatBitSize == 8.  However, NEON modified
+  // immediate instructions others than VMOV do not support the 8-bit encoding
+  // of a zero vector, and the default encoding of zero is supposed to be the
+  // 32-bit version.
+  if (SplatBits == 0)
+    SplatBitSize = 32;
+
+  switch (SplatBitSize) {
+  case 8:
+    if (type != VMOVModImm)
+      return SDValue();
+    // Any 1-byte value is OK.  Op=0, Cmode=1110.
+    assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
+    OpCmode = 0xe;
+    Imm = SplatBits;
+    VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
+    break;
+
+  case 16:
+    // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
+    VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
+    if ((SplatBits & ~0xff) == 0) {
+      // Value = 0x00nn: Op=x, Cmode=100x.
+      OpCmode = 0x8;
+      Imm = SplatBits;
+      break;
+    }
+    if ((SplatBits & ~0xff00) == 0) {
+      // Value = 0xnn00: Op=x, Cmode=101x.
+      OpCmode = 0xa;
+      Imm = SplatBits >> 8;
+      break;
+    }
+    return SDValue();
+
+  case 32:
+    // NEON's 32-bit VMOV supports splat values where:
+    // * only one byte is nonzero, or
+    // * the least significant byte is 0xff and the second byte is nonzero, or
+    // * the least significant 2 bytes are 0xff and the third is nonzero.
+    VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
+    if ((SplatBits & ~0xff) == 0) {
+      // Value = 0x000000nn: Op=x, Cmode=000x.
+      OpCmode = 0;
+      Imm = SplatBits;
+      break;
+    }
+    if ((SplatBits & ~0xff00) == 0) {
+      // Value = 0x0000nn00: Op=x, Cmode=001x.
+      OpCmode = 0x2;
+      Imm = SplatBits >> 8;
+      break;
+    }
+    if ((SplatBits & ~0xff0000) == 0) {
+      // Value = 0x00nn0000: Op=x, Cmode=010x.
+      OpCmode = 0x4;
+      Imm = SplatBits >> 16;
+      break;
+    }
+    if ((SplatBits & ~0xff000000) == 0) {
+      // Value = 0xnn000000: Op=x, Cmode=011x.
+      OpCmode = 0x6;
+      Imm = SplatBits >> 24;
+      break;
+    }
+
+    // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
+    if (type == OtherModImm) return SDValue();
+
+    if ((SplatBits & ~0xffff) == 0 &&
+        ((SplatBits | SplatUndef) & 0xff) == 0xff) {
+      // Value = 0x0000nnff: Op=x, Cmode=1100.
+      OpCmode = 0xc;
+      Imm = SplatBits >> 8;
+      SplatBits |= 0xff;
+      break;
+    }
+
+    if ((SplatBits & ~0xffffff) == 0 &&
+        ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
+      // Value = 0x00nnffff: Op=x, Cmode=1101.
+      OpCmode = 0xd;
+      Imm = SplatBits >> 16;
+      SplatBits |= 0xffff;
+      break;
+    }
+
+    // Note: there are a few 32-bit splat values (specifically: 00ffff00,
+    // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
+    // VMOV.I32.  A (very) minor optimization would be to replicate the value
+    // and fall through here to test for a valid 64-bit splat.  But, then the
+    // caller would also need to check and handle the change in size.
+    return SDValue();
+
+  case 64: {
+    if (type != VMOVModImm)
+      return SDValue();
+    // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
+    uint64_t BitMask = 0xff;
+    uint64_t Val = 0;
+    unsigned ImmMask = 1;
+    Imm = 0;
+    for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
+      if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
+        Val |= BitMask;
+        Imm |= ImmMask;
+      } else if ((SplatBits & BitMask) != 0) {
+        return SDValue();
+      }
+      BitMask <<= 8;
+      ImmMask <<= 1;
+    }
+    // Op=1, Cmode=1110.
+    OpCmode = 0x1e;
+    SplatBits = Val;
+    VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
+    break;
+  }
+
+  default:
+    llvm_unreachable("unexpected size for isNEONModifiedImm");
+    return SDValue();
+  }
+
+  unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
+  return DAG.getTargetConstant(EncodedVal, MVT::i32);
+}
+
+static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
+                       bool &ReverseVEXT, unsigned &Imm) {
+  unsigned NumElts = VT.getVectorNumElements();
+  ReverseVEXT = false;
+
+  // Assume that the first shuffle index is not UNDEF.  Fail if it is.
+  if (M[0] < 0)
+    return false;
+
+  Imm = M[0];
+
+  // If this is a VEXT shuffle, the immediate value is the index of the first
+  // element.  The other shuffle indices must be the successive elements after
+  // the first one.
+  unsigned ExpectedElt = Imm;
+  for (unsigned i = 1; i < NumElts; ++i) {
+    // Increment the expected index.  If it wraps around, it may still be
+    // a VEXT but the source vectors must be swapped.
+    ExpectedElt += 1;
+    if (ExpectedElt == NumElts * 2) {
+      ExpectedElt = 0;
+      ReverseVEXT = true;
+    }
+
+    if (M[i] < 0) continue; // ignore UNDEF indices
+    if (ExpectedElt != static_cast<unsigned>(M[i]))
+      return false;
+  }
+
+  // Adjust the index value if the source operands will be swapped.
+  if (ReverseVEXT)
+    Imm -= NumElts;
+
+  return true;
+}
+
+/// isVREVMask - Check if a vector shuffle corresponds to a VREV
+/// instruction with the specified blocksize.  (The order of the elements
+/// within each block of the vector is reversed.)
+static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT,
+                       unsigned BlockSize) {
+  assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
+         "Only possible block sizes for VREV are: 16, 32, 64");
+
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned BlockElts = M[0] + 1;
+  // If the first shuffle index is UNDEF, be optimistic.
+  if (M[0] < 0)
+    BlockElts = BlockSize / EltSz;
+
+  if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
+    return false;
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    if (M[i] < 0) continue; // ignore UNDEF indices
+    if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
+      return false;
+  }
+
+  return true;
+}
+
+static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
+                       unsigned &WhichResult) {
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned i = 0; i < NumElts; i += 2) {
+    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+        (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
+      return false;
+  }
+  return true;
+}
+
+/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
+static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+                                unsigned &WhichResult) {
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned i = 0; i < NumElts; i += 2) {
+    if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
+        (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
+      return false;
+  }
+  return true;
+}
+
+static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
+                       unsigned &WhichResult) {
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (M[i] < 0) continue; // ignore UNDEF indices
+    if ((unsigned) M[i] != 2 * i + WhichResult)
+      return false;
+  }
+
+  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+  if (VT.is64BitVector() && EltSz == 32)
+    return false;
+
+  return true;
+}
+
+/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
+static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+                                unsigned &WhichResult) {
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned Half = VT.getVectorNumElements() / 2;
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned j = 0; j != 2; ++j) {
+    unsigned Idx = WhichResult;
+    for (unsigned i = 0; i != Half; ++i) {
+      int MIdx = M[i + j * Half];
+      if (MIdx >= 0 && (unsigned) MIdx != Idx)
+        return false;
+      Idx += 2;
+    }
+  }
+
+  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+  if (VT.is64BitVector() && EltSz == 32)
+    return false;
+
+  return true;
+}
+
+static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
+                       unsigned &WhichResult) {
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  unsigned Idx = WhichResult * NumElts / 2;
+  for (unsigned i = 0; i != NumElts; i += 2) {
+    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
+      return false;
+    Idx += 1;
+  }
+
+  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+  if (VT.is64BitVector() && EltSz == 32)
+    return false;
+
+  return true;
+}
+
+/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
+static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+                                unsigned &WhichResult) {
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  unsigned Idx = WhichResult * NumElts / 2;
+  for (unsigned i = 0; i != NumElts; i += 2) {
+    if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
+        (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
+      return false;
+    Idx += 1;
+  }
+
+  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+  if (VT.is64BitVector() && EltSz == 32)
+    return false;
+
+  return true;
+}
+
+// If N is an integer constant that can be moved into a register in one
+// instruction, return an SDValue of such a constant (will become a MOV
+// instruction).  Otherwise return null.
+static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
+                                     const ARMSubtarget *ST, DebugLoc dl) {
+  uint64_t Val;
+  if (!isa<ConstantSDNode>(N))
+    return SDValue();
+  Val = cast<ConstantSDNode>(N)->getZExtValue();
+
+  if (ST->isThumb1Only()) {
+    if (Val <= 255 || ~Val <= 255)
+      return DAG.getConstant(Val, MVT::i32);
+  } else {
+    if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
+      return DAG.getConstant(Val, MVT::i32);
+  }
+  return SDValue();
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it.
+SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
+                                             const ARMSubtarget *ST) const {
+  BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+
+  APInt SplatBits, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+    if (SplatBitSize <= 64) {
+      // Check if an immediate VMOV works.
+      EVT VmovVT;
+      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+                                      SplatUndef.getZExtValue(), SplatBitSize,
+                                      DAG, VmovVT, VT.is128BitVector(),
+                                      VMOVModImm);
+      if (Val.getNode()) {
+        SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+      }
+
+      // Try an immediate VMVN.
+      uint64_t NegatedImm = (SplatBits.getZExtValue() ^
+                             ((1LL << SplatBitSize) - 1));
+      Val = isNEONModifiedImm(NegatedImm,
+                                      SplatUndef.getZExtValue(), SplatBitSize,
+                                      DAG, VmovVT, VT.is128BitVector(),
+                                      VMVNModImm);
+      if (Val.getNode()) {
+        SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
+      }
+    }
+  }
+
+  // Scan through the operands to see if only one value is used.
+  unsigned NumElts = VT.getVectorNumElements();
+  bool isOnlyLowElement = true;
+  bool usesOnlyOneValue = true;
+  bool isConstant = true;
+  SDValue Value;
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue V = Op.getOperand(i);
+    if (V.getOpcode() == ISD::UNDEF)
+      continue;
+    if (i > 0)
+      isOnlyLowElement = false;
+    if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+      isConstant = false;
+
+    if (!Value.getNode())
+      Value = V;
+    else if (V != Value)
+      usesOnlyOneValue = false;
+  }
+
+  if (!Value.getNode())
+    return DAG.getUNDEF(VT);
+
+  if (isOnlyLowElement)
+    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
+
+  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+
+  // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
+  // i32 and try again.
+  if (usesOnlyOneValue && EltSize <= 32) {
+    if (!isConstant)
+      return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+    if (VT.getVectorElementType().isFloatingPoint()) {
+      SmallVector<SDValue, 8> Ops;
+      for (unsigned i = 0; i < NumElts; ++i)
+        Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+                                  Op.getOperand(i)));
+      EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+      SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
+      Val = LowerBUILD_VECTOR(Val, DAG, ST);
+      if (Val.getNode())
+        return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+    }
+    SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
+    if (Val.getNode())
+      return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
+  }
+
+  // If all elements are constants and the case above didn't get hit, fall back
+  // to the default expansion, which will generate a load from the constant
+  // pool.
+  if (isConstant)
+    return SDValue();
+
+  // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
+  if (NumElts >= 4) {
+    SDValue shuffle = ReconstructShuffle(Op, DAG);
+    if (shuffle != SDValue())
+      return shuffle;
+  }
+
+  // Vectors with 32- or 64-bit elements can be built by directly assigning
+  // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
+  // will be legalized.
+  if (EltSize >= 32) {
+    // Do the expansion with floating-point types, since that is what the VFP
+    // registers are defined to use, and since i64 is not legal.
+    EVT EltVT = EVT::getFloatingPointVT(EltSize);
+    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0; i < NumElts; ++i)
+      Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
+    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+    return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+  }
+
+  return SDValue();
+}
+
+// Gather data to see if the operation can be modelled as a
+// shuffle in combination with VEXTs.
+SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  unsigned NumElts = VT.getVectorNumElements();
+
+  SmallVector<SDValue, 2> SourceVecs;
+  SmallVector<unsigned, 2> MinElts;
+  SmallVector<unsigned, 2> MaxElts;
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue V = Op.getOperand(i);
+    if (V.getOpcode() == ISD::UNDEF)
+      continue;
+    else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
+      // A shuffle can only come from building a vector from various
+      // elements of other vectors.
+      return SDValue();
+    }
+
+    // Record this extraction against the appropriate vector if possible...
+    SDValue SourceVec = V.getOperand(0);
+    unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
+    bool FoundSource = false;
+    for (unsigned j = 0; j < SourceVecs.size(); ++j) {
+      if (SourceVecs[j] == SourceVec) {
+        if (MinElts[j] > EltNo)
+          MinElts[j] = EltNo;
+        if (MaxElts[j] < EltNo)
+          MaxElts[j] = EltNo;
+        FoundSource = true;
+        break;
+      }
+    }
+
+    // Or record a new source if not...
+    if (!FoundSource) {
+      SourceVecs.push_back(SourceVec);
+      MinElts.push_back(EltNo);
+      MaxElts.push_back(EltNo);
+    }
+  }
+
+  // Currently only do something sane when at most two source vectors
+  // involved.
+  if (SourceVecs.size() > 2)
+    return SDValue();
+
+  SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
+  int VEXTOffsets[2] = {0, 0};
+
+  // This loop extracts the usage patterns of the source vectors
+  // and prepares appropriate SDValues for a shuffle if possible.
+  for (unsigned i = 0; i < SourceVecs.size(); ++i) {
+    if (SourceVecs[i].getValueType() == VT) {
+      // No VEXT necessary
+      ShuffleSrcs[i] = SourceVecs[i];
+      VEXTOffsets[i] = 0;
+      continue;
+    } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
+      // It probably isn't worth padding out a smaller vector just to
+      // break it down again in a shuffle.
+      return SDValue();
+    }
+
+    // Since only 64-bit and 128-bit vectors are legal on ARM and
+    // we've eliminated the other cases...
+    assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
+           "unexpected vector sizes in ReconstructShuffle");
+
+    if (MaxElts[i] - MinElts[i] >= NumElts) {
+      // Span too large for a VEXT to cope
+      return SDValue();
+    }
+
+    if (MinElts[i] >= NumElts) {
+      // The extraction can just take the second half
+      VEXTOffsets[i] = NumElts;
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+                                   SourceVecs[i],
+                                   DAG.getIntPtrConstant(NumElts));
+    } else if (MaxElts[i] < NumElts) {
+      // The extraction can just take the first half
+      VEXTOffsets[i] = 0;
+      ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+                                   SourceVecs[i],
+                                   DAG.getIntPtrConstant(0));
+    } else {
+      // An actual VEXT is needed
+      VEXTOffsets[i] = MinElts[i];
+      SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+                                     SourceVecs[i],
+                                     DAG.getIntPtrConstant(0));
+      SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
+                                     SourceVecs[i],
+                                     DAG.getIntPtrConstant(NumElts));
+      ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
+                                   DAG.getConstant(VEXTOffsets[i], MVT::i32));
+    }
+  }
+
+  SmallVector<int, 8> Mask;
+
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue Entry = Op.getOperand(i);
+    if (Entry.getOpcode() == ISD::UNDEF) {
+      Mask.push_back(-1);
+      continue;
+    }
+
+    SDValue ExtractVec = Entry.getOperand(0);
+    int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
+                                          .getOperand(1))->getSExtValue();
+    if (ExtractVec == SourceVecs[0]) {
+      Mask.push_back(ExtractElt - VEXTOffsets[0]);
+    } else {
+      Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
+    }
+  }
+
+  // Final check before we try to produce nonsense...
+  if (isShuffleMaskLegal(Mask, VT))
+    return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
+                                &Mask[0]);
+
+  return SDValue();
+}
+
+/// isShuffleMaskLegal - Targets can use this to indicate that they only
+/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+/// are assumed to be legal.
+bool
+ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+                                      EVT VT) const {
+  if (VT.getVectorNumElements() == 4 &&
+      (VT.is128BitVector() || VT.is64BitVector())) {
+    unsigned PFIndexes[4];
+    for (unsigned i = 0; i != 4; ++i) {
+      if (M[i] < 0)
+        PFIndexes[i] = 8;
+      else
+        PFIndexes[i] = M[i];
+    }
+
+    // Compute the index in the perfect shuffle table.
+    unsigned PFTableIndex =
+      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+    unsigned Cost = (PFEntry >> 30);
+
+    if (Cost <= 4)
+      return true;
+  }
+
+  bool ReverseVEXT;
+  unsigned Imm, WhichResult;
+
+  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+  return (EltSize >= 32 ||
+          ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+          isVREVMask(M, VT, 64) ||
+          isVREVMask(M, VT, 32) ||
+          isVREVMask(M, VT, 16) ||
+          isVEXTMask(M, VT, ReverseVEXT, Imm) ||
+          isVTRNMask(M, VT, WhichResult) ||
+          isVUZPMask(M, VT, WhichResult) ||
+          isVZIPMask(M, VT, WhichResult) ||
+          isVTRN_v_undef_Mask(M, VT, WhichResult) ||
+          isVUZP_v_undef_Mask(M, VT, WhichResult) ||
+          isVZIP_v_undef_Mask(M, VT, WhichResult));
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+                                      SDValue RHS, SelectionDAG &DAG,
+                                      DebugLoc dl) {
+  unsigned OpNum = (PFEntry >> 26) & 0x0F;
+  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
+
+  enum {
+    OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+    OP_VREV,
+    OP_VDUP0,
+    OP_VDUP1,
+    OP_VDUP2,
+    OP_VDUP3,
+    OP_VEXT1,
+    OP_VEXT2,
+    OP_VEXT3,
+    OP_VUZPL, // VUZP, left result
+    OP_VUZPR, // VUZP, right result
+    OP_VZIPL, // VZIP, left result
+    OP_VZIPR, // VZIP, right result
+    OP_VTRNL, // VTRN, left result
+    OP_VTRNR  // VTRN, right result
+  };
+
+  if (OpNum == OP_COPY) {
+    if (LHSID == (1*9+2)*9+3) return LHS;
+    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+    return RHS;
+  }
+
+  SDValue OpLHS, OpRHS;
+  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+  EVT VT = OpLHS.getValueType();
+
+  switch (OpNum) {
+  default: llvm_unreachable("Unknown shuffle opcode!");
+  case OP_VREV:
+    return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
+  case OP_VDUP0:
+  case OP_VDUP1:
+  case OP_VDUP2:
+  case OP_VDUP3:
+    return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
+                       OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
+  case OP_VEXT1:
+  case OP_VEXT2:
+  case OP_VEXT3:
+    return DAG.getNode(ARMISD::VEXT, dl, VT,
+                       OpLHS, OpRHS,
+                       DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
+  case OP_VUZPL:
+  case OP_VUZPR:
+    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+                       OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
+  case OP_VZIPL:
+  case OP_VZIPR:
+    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+                       OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
+  case OP_VTRNL:
+  case OP_VTRNR:
+    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+                       OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
+  }
+}
+
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+  SmallVector<int, 8> ShuffleMask;
+
+  // Convert shuffles that are directly supported on NEON to target-specific
+  // DAG nodes, instead of keeping them as shuffles and matching them again
+  // during code selection.  This is more efficient and avoids the possibility
+  // of inconsistencies between legalization and selection.
+  // FIXME: floating-point vectors should be canonicalized to integer vectors
+  // of the same time so that they get CSEd properly.
+  SVN->getMask(ShuffleMask);
+
+  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+  if (EltSize <= 32) {
+    if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+      int Lane = SVN->getSplatIndex();
+      // If this is undef splat, generate it via "just" vdup, if possible.
+      if (Lane == -1) Lane = 0;
+
+      if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+        return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+      }
+      return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
+                         DAG.getConstant(Lane, MVT::i32));
+    }
+
+    bool ReverseVEXT;
+    unsigned Imm;
+    if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
+      if (ReverseVEXT)
+        std::swap(V1, V2);
+      return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
+                         DAG.getConstant(Imm, MVT::i32));
+    }
+
+    if (isVREVMask(ShuffleMask, VT, 64))
+      return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
+    if (isVREVMask(ShuffleMask, VT, 32))
+      return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
+    if (isVREVMask(ShuffleMask, VT, 16))
+      return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
+
+    // Check for Neon shuffles that modify both input vectors in place.
+    // If both results are used, i.e., if there are two shuffles with the same
+    // source operands and with masks corresponding to both results of one of
+    // these operations, DAG memoization will ensure that a single node is
+    // used for both shuffles.
+    unsigned WhichResult;
+    if (isVTRNMask(ShuffleMask, VT, WhichResult))
+      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+                         V1, V2).getValue(WhichResult);
+    if (isVUZPMask(ShuffleMask, VT, WhichResult))
+      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+                         V1, V2).getValue(WhichResult);
+    if (isVZIPMask(ShuffleMask, VT, WhichResult))
+      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+                         V1, V2).getValue(WhichResult);
+
+    if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
+      return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+                         V1, V1).getValue(WhichResult);
+    if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+      return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+                         V1, V1).getValue(WhichResult);
+    if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+      return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+                         V1, V1).getValue(WhichResult);
+  }
+
+  // If the shuffle is not directly supported and it has 4 elements, use
+  // the PerfectShuffle-generated table to synthesize it from other shuffles.
+  unsigned NumElts = VT.getVectorNumElements();
+  if (NumElts == 4) {
+    unsigned PFIndexes[4];
+    for (unsigned i = 0; i != 4; ++i) {
+      if (ShuffleMask[i] < 0)
+        PFIndexes[i] = 8;
+      else
+        PFIndexes[i] = ShuffleMask[i];
+    }
+
+    // Compute the index in the perfect shuffle table.
+    unsigned PFTableIndex =
+      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+    unsigned Cost = (PFEntry >> 30);
+
+    if (Cost <= 4)
+      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+  }
+
+  // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
+  if (EltSize >= 32) {
+    // Do the expansion with floating-point types, since that is what the VFP
+    // registers are defined to use, and since i64 is not legal.
+    EVT EltVT = EVT::getFloatingPointVT(EltSize);
+    EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+    V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
+    V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
+    SmallVector<SDValue, 8> Ops;
+    for (unsigned i = 0; i < NumElts; ++i) {
+      if (ShuffleMask[i] < 0)
+        Ops.push_back(DAG.getUNDEF(EltVT));
+      else
+        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+                                  ShuffleMask[i] < (int)NumElts ? V1 : V2,
+                                  DAG.getConstant(ShuffleMask[i] & (NumElts-1),
+                                                  MVT::i32)));
+    }
+    SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
+    return DAG.getNode(ISD::BITCAST, dl, VT, Val);
+  }
+
+  return SDValue();
+}
+
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+  // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
+  SDValue Lane = Op.getOperand(1);
+  if (!isa<ConstantSDNode>(Lane))
+    return SDValue();
+
+  SDValue Vec = Op.getOperand(0);
+  if (Op.getValueType() == MVT::i32 &&
+      Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
+    DebugLoc dl = Op.getDebugLoc();
+    return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
+  }
+
+  return Op;
+}
+
+static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
+  // The only time a CONCAT_VECTORS operation can have legal types is when
+  // two 64-bit vectors are concatenated to a 128-bit vector.
+  assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
+         "unexpected CONCAT_VECTORS");
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Val = DAG.getUNDEF(MVT::v2f64);
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  if (Op0.getOpcode() != ISD::UNDEF)
+    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+                      DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
+                      DAG.getIntPtrConstant(0));
+  if (Op1.getOpcode() != ISD::UNDEF)
+    Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
+                      DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
+                      DAG.getIntPtrConstant(1));
+  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
+}
+
+/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
+/// element has been zero/sign-extended, depending on the isSigned parameter,
+/// from an integer type half its size.
+static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
+                                   bool isSigned) {
+  // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
+  EVT VT = N->getValueType(0);
+  if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
+    SDNode *BVN = N->getOperand(0).getNode();
+    if (BVN->getValueType(0) != MVT::v4i32 ||
+        BVN->getOpcode() != ISD::BUILD_VECTOR)
+      return false;
+    unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+    unsigned HiElt = 1 - LoElt;
+    ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
+    ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
+    ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
+    ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
+    if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
+      return false;
+    if (isSigned) {
+      if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
+          Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
+        return true;
+    } else {
+      if (Hi0->isNullValue() && Hi1->isNullValue())
+        return true;
+    }
+    return false;
+  }
+
+  if (N->getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    SDNode *Elt = N->getOperand(i).getNode();
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+      unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+      unsigned HalfSize = EltSize / 2;
+      if (isSigned) {
+        int64_t SExtVal = C->getSExtValue();
+        if ((SExtVal >> HalfSize) != (SExtVal >> EltSize))
+          return false;
+      } else {
+        if ((C->getZExtValue() >> HalfSize) != 0)
+          return false;
+      }
+      continue;
+    }
+    return false;
+  }
+
+  return true;
+}
+
+/// isSignExtended - Check if a node is a vector value that is sign-extended
+/// or a constant BUILD_VECTOR with sign-extended elements.
+static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
+  if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
+    return true;
+  if (isExtendedBUILD_VECTOR(N, DAG, true))
+    return true;
+  return false;
+}
+
+/// isZeroExtended - Check if a node is a vector value that is zero-extended
+/// or a constant BUILD_VECTOR with zero-extended elements.
+static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
+  if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
+    return true;
+  if (isExtendedBUILD_VECTOR(N, DAG, false))
+    return true;
+  return false;
+}
+
+/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending
+/// load, or BUILD_VECTOR with extended elements, return the unextended value.
+static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) {
+  if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
+    return N->getOperand(0);
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+    return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(),
+                       LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
+                       LD->isNonTemporal(), LD->getAlignment());
+  // Otherwise, the value must be a BUILD_VECTOR.  For v2i64, it will
+  // have been legalized as a BITCAST from v4i32.
+  if (N->getOpcode() == ISD::BITCAST) {
+    SDNode *BVN = N->getOperand(0).getNode();
+    assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
+           BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
+    unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
+    return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
+                       BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
+  }
+  // Construct a new BUILD_VECTOR with elements truncated to half the size.
+  assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
+  EVT VT = N->getValueType(0);
+  unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
+  unsigned NumElts = VT.getVectorNumElements();
+  MVT TruncVT = MVT::getIntegerVT(EltSize);
+  SmallVector<SDValue, 8> Ops;
+  for (unsigned i = 0; i != NumElts; ++i) {
+    ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
+    const APInt &CInt = C->getAPIntValue();
+    Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT));
+  }
+  return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+                     MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
+}
+
+static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
+  // Multiplications are only custom-lowered for 128-bit vectors so that
+  // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.
+  EVT VT = Op.getValueType();
+  assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL");
+  SDNode *N0 = Op.getOperand(0).getNode();
+  SDNode *N1 = Op.getOperand(1).getNode();
+  unsigned NewOpc = 0;
+  if (isSignExtended(N0, DAG) && isSignExtended(N1, DAG))
+    NewOpc = ARMISD::VMULLs;
+  else if (isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG))
+    NewOpc = ARMISD::VMULLu;
+  else if (VT == MVT::v2i64)
+    // Fall through to expand this.  It is not legal.
+    return SDValue();
+  else
+    // Other vector multiplications are legal.
+    return Op;
+
+  // Legalize to a VMULL instruction.
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue Op0 = SkipExtension(N0, DAG);
+  SDValue Op1 = SkipExtension(N1, DAG);
+
+  assert(Op0.getValueType().is64BitVector() &&
+         Op1.getValueType().is64BitVector() &&
+         "unexpected types for extended operands to VMULL");
+  return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+}
+
+static SDValue 
+LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
+  // Convert to float
+  // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
+  // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
+  X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
+  Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
+  X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
+  Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
+  // Get reciprocal estimate.
+  // float4 recip = vrecpeq_f32(yf);
+  Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
+  // Because char has a smaller range than uchar, we can actually get away
+  // without any newton steps.  This requires that we use a weird bias
+  // of 0xb000, however (again, this has been exhaustively tested).
+  // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
+  X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
+  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
+  Y = DAG.getConstant(0xb000, MVT::i32);
+  Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
+  X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
+  X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
+  // Convert back to short.
+  X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
+  X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
+  return X;
+}
+
+static SDValue 
+LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
+  SDValue N2;
+  // Convert to float.
+  // float4 yf = vcvt_f32_s32(vmovl_s16(y));
+  // float4 xf = vcvt_f32_s32(vmovl_s16(x));
+  N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
+  N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
+  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+  N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+  
+  // Use reciprocal estimate and one refinement step.
+  // float4 recip = vrecpeq_f32(yf);
+  // recip *= vrecpsq_f32(yf, recip);
+  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
+  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+                   N1, N2);
+  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+  // Because short has a smaller range than ushort, we can actually get away
+  // with only a single newton step.  This requires that we use a weird bias
+  // of 89, however (again, this has been exhaustively tested).
+  // float4 result = as_float4(as_int4(xf*recip) + 89);
+  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+  N1 = DAG.getConstant(89, MVT::i32);
+  N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+  // Convert back to integer and return.
+  // return vmovn_s32(vcvt_s32_f32(result));
+  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+  return N0;
+}
+
+static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+         "unexpected type for custom-lowering ISD::SDIV");
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue N0 = Op.getOperand(0);
+  SDValue N1 = Op.getOperand(1);
+  SDValue N2, N3;
+  
+  if (VT == MVT::v8i8) {
+    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
+    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
+    
+    N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+                     DAG.getIntPtrConstant(4));
+    N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+                     DAG.getIntPtrConstant(4)); 
+    N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+                     DAG.getIntPtrConstant(0));
+    N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+                     DAG.getIntPtrConstant(0));
+
+    N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
+    N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
+
+    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+    N0 = LowerCONCAT_VECTORS(N0, DAG);
+    
+    N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
+    return N0;
+  }
+  return LowerSDIV_v4i16(N0, N1, dl, DAG);
+}
+
+static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
+         "unexpected type for custom-lowering ISD::UDIV");
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue N0 = Op.getOperand(0);
+  SDValue N1 = Op.getOperand(1);
+  SDValue N2, N3;
+  
+  if (VT == MVT::v8i8) {
+    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
+    N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
+    
+    N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+                     DAG.getIntPtrConstant(4));
+    N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+                     DAG.getIntPtrConstant(4)); 
+    N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
+                     DAG.getIntPtrConstant(0));
+    N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
+                     DAG.getIntPtrConstant(0));
+    
+    N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
+    N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
+    
+    N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
+    N0 = LowerCONCAT_VECTORS(N0, DAG);
+    
+    N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, 
+                     DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
+                     N0);
+    return N0;
+  }
+  
+  // v4i16 sdiv ... Convert to float.
+  // float4 yf = vcvt_f32_s32(vmovl_u16(y));
+  // float4 xf = vcvt_f32_s32(vmovl_u16(x));
+  N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
+  N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
+  N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
+  N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
+
+  // Use reciprocal estimate and two refinement steps.
+  // float4 recip = vrecpeq_f32(yf);
+  // recip *= vrecpsq_f32(yf, recip);
+  // recip *= vrecpsq_f32(yf, recip);
+  N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+                   DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
+  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+                   N1, N2);
+  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+  N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 
+                   DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
+                   N1, N2);
+  N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
+  // Simply multiplying by the reciprocal estimate can leave us a few ulps
+  // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
+  // and that it will never cause us to return an answer too large).
+  // float4 result = as_float4(as_int4(xf*recip) + 89);
+  N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
+  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
+  N1 = DAG.getConstant(2, MVT::i32);
+  N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
+  N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
+  N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
+  // Convert back to integer and return.
+  // return vmovn_u32(vcvt_s32_f32(result));
+  N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
+  N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
+  return N0;
+}
+
+SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Don't know how to custom lower this!");
+  case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
+  case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
+  case ISD::GlobalAddress:
+    return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
+      LowerGlobalAddressELF(Op, DAG);
+  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::SELECT:        return LowerSELECT(Op, DAG);
+  case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
+  case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
+  case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
+  case ISD::VASTART:       return LowerVASTART(Op, DAG);
+  case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
+  case ISD::PREFETCH:      return LowerPREFETCH(Op, DAG, Subtarget);
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
+  case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
+  case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
+  case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
+  case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+  case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
+  case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
+  case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
+                                                               Subtarget);
+  case ISD::BITCAST:   return ExpandBITCAST(Op.getNode(), DAG);
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
+  case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
+  case ISD::SRL_PARTS:
+  case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
+  case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
+  case ISD::VSETCC:        return LowerVSETCC(Op, DAG);
+  case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
+  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
+  case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
+  case ISD::MUL:           return LowerMUL(Op, DAG);
+  case ISD::SDIV:          return LowerSDIV(Op, DAG);
+  case ISD::UDIV:          return LowerUDIV(Op, DAG);
+  }
+  return SDValue();
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
+                                           SmallVectorImpl<SDValue>&Results,
+                                           SelectionDAG &DAG) const {
+  SDValue Res;
+  switch (N->getOpcode()) {
+  default:
+    llvm_unreachable("Don't know how to custom expand this!");
+    break;
+  case ISD::BITCAST:
+    Res = ExpandBITCAST(N, DAG);
+    break;
+  case ISD::SRL:
+  case ISD::SRA:
+    Res = Expand64BitShift(N, DAG, Subtarget);
+    break;
+  }
+  if (Res.getNode())
+    Results.push_back(Res);
+}
+
+//===----------------------------------------------------------------------===//
+//                           ARM Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
+                                     MachineBasicBlock *BB,
+                                     unsigned Size) const {
+  unsigned dest    = MI->getOperand(0).getReg();
+  unsigned ptr     = MI->getOperand(1).getReg();
+  unsigned oldval  = MI->getOperand(2).getReg();
+  unsigned newval  = MI->getOperand(3).getReg();
+  unsigned scratch = BB->getParent()->getRegInfo()
+    .createVirtualRegister(ARM::GPRRegisterClass);
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  bool isThumb2 = Subtarget->isThumb2();
+
+  unsigned ldrOpc, strOpc;
+  switch (Size) {
+  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+  case 1:
+    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+    break;
+  case 2:
+    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+    break;
+  case 4:
+    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+    break;
+  }
+
+  MachineFunction *MF = BB->getParent();
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It; // insert the new blocks after the current block
+
+  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, loop1MBB);
+  MF->insert(It, loop2MBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loop1MBB
+  BB->addSuccessor(loop1MBB);
+
+  // loop1MBB:
+  //   ldrex dest, [ptr]
+  //   cmp dest, oldval
+  //   bne exitMBB
+  BB = loop1MBB;
+  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+                 .addReg(dest).addReg(oldval));
+  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+    .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+  BB->addSuccessor(loop2MBB);
+  BB->addSuccessor(exitMBB);
+
+  // loop2MBB:
+  //   strex scratch, newval, [ptr]
+  //   cmp scratch, #0
+  //   bne loop1MBB
+  BB = loop2MBB;
+  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
+                 .addReg(ptr));
+  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+                 .addReg(scratch).addImm(0));
+  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+    .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+  BB->addSuccessor(loop1MBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+
+  MI->eraseFromParent();   // The instruction is gone now.
+
+  return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+                                    unsigned Size, unsigned BinOpcode) const {
+  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction *MF = BB->getParent();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  unsigned dest = MI->getOperand(0).getReg();
+  unsigned ptr = MI->getOperand(1).getReg();
+  unsigned incr = MI->getOperand(2).getReg();
+  DebugLoc dl = MI->getDebugLoc();
+
+  bool isThumb2 = Subtarget->isThumb2();
+  unsigned ldrOpc, strOpc;
+  switch (Size) {
+  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+  case 1:
+    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+    strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
+    break;
+  case 2:
+    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+    break;
+  case 4:
+    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+    break;
+  }
+
+  MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, loopMBB);
+  MF->insert(It, exitMBB);
+
+  // Transfer the remainder of BB and its successor edges to exitMBB.
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+  unsigned scratch2 = (!BinOpcode) ? incr :
+    RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loopMBB
+  BB->addSuccessor(loopMBB);
+
+  //  loopMBB:
+  //   ldrex dest, ptr
+  //   <binop> scratch2, dest, incr
+  //   strex scratch, scratch2, ptr
+  //   cmp scratch, #0
+  //   bne- loopMBB
+  //   fallthrough --> exitMBB
+  BB = loopMBB;
+  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+  if (BinOpcode) {
+    // operand order needs to go the other way for NAND
+    if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
+      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+                     addReg(incr).addReg(dest)).addReg(0);
+    else
+      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+                     addReg(dest).addReg(incr)).addReg(0);
+  }
+
+  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
+                 .addReg(ptr));
+  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+                 .addReg(scratch).addImm(0));
+  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+  BB->addSuccessor(loopMBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+
+  MI->eraseFromParent();   // The instruction is gone now.
+
+  return BB;
+}
+
+static
+MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+  for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I)
+    if (*I != Succ)
+      return *I;
+  llvm_unreachable("Expecting a BB with two successors!");
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                               MachineBasicBlock *BB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  bool isThumb2 = Subtarget->isThumb2();
+  switch (MI->getOpcode()) {
+  default:
+    MI->dump();
+    llvm_unreachable("Unexpected instr type to insert");
+
+  case ARM::ATOMIC_LOAD_ADD_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+  case ARM::ATOMIC_LOAD_ADD_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+  case ARM::ATOMIC_LOAD_ADD_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+
+  case ARM::ATOMIC_LOAD_AND_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+  case ARM::ATOMIC_LOAD_AND_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+  case ARM::ATOMIC_LOAD_AND_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+
+  case ARM::ATOMIC_LOAD_OR_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+  case ARM::ATOMIC_LOAD_OR_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+  case ARM::ATOMIC_LOAD_OR_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+
+  case ARM::ATOMIC_LOAD_XOR_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+  case ARM::ATOMIC_LOAD_XOR_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+  case ARM::ATOMIC_LOAD_XOR_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+
+  case ARM::ATOMIC_LOAD_NAND_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+  case ARM::ATOMIC_LOAD_NAND_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+  case ARM::ATOMIC_LOAD_NAND_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+
+  case ARM::ATOMIC_LOAD_SUB_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+  case ARM::ATOMIC_LOAD_SUB_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+  case ARM::ATOMIC_LOAD_SUB_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+
+  case ARM::ATOMIC_SWAP_I8:  return EmitAtomicBinary(MI, BB, 1, 0);
+  case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
+  case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
+
+  case ARM::ATOMIC_CMP_SWAP_I8:  return EmitAtomicCmpSwap(MI, BB, 1);
+  case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
+  case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+
+  case ARM::tMOVCCr_pseudo: {
+    // To "insert" a SELECT_CC instruction, we actually have to insert the
+    // diamond control-flow pattern.  The incoming instruction knows the
+    // destination vreg to set, the condition code register to branch on, the
+    // true/false values to select between, and a branch opcode to use.
+    const BasicBlock *LLVM_BB = BB->getBasicBlock();
+    MachineFunction::iterator It = BB;
+    ++It;
+
+    //  thisMBB:
+    //  ...
+    //   TrueVal = ...
+    //   cmpTY ccX, r1, r2
+    //   bCC copy1MBB
+    //   fallthrough --> copy0MBB
+    MachineBasicBlock *thisMBB  = BB;
+    MachineFunction *F = BB->getParent();
+    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
+    F->insert(It, copy0MBB);
+    F->insert(It, sinkMBB);
+
+    // Transfer the remainder of BB and its successor edges to sinkMBB.
+    sinkMBB->splice(sinkMBB->begin(), BB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    BB->end());
+    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+    BB->addSuccessor(copy0MBB);
+    BB->addSuccessor(sinkMBB);
+
+    BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+      .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+
+    //  copy0MBB:
+    //   %FalseValue = ...
+    //   # fallthrough to sinkMBB
+    BB = copy0MBB;
+
+    // Update machine-CFG edges
+    BB->addSuccessor(sinkMBB);
+
+    //  sinkMBB:
+    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+    //  ...
+    BB = sinkMBB;
+    BuildMI(*BB, BB->begin(), dl,
+            TII->get(ARM::PHI), MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+    MI->eraseFromParent();   // The pseudo instruction is gone now.
+    return BB;
+  }
+
+  case ARM::BCCi64:
+  case ARM::BCCZi64: {
+    // If there is an unconditional branch to the other successor, remove it.
+    BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
+
+    // Compare both parts that make up the double comparison separately for
+    // equality.
+    bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
+
+    unsigned LHS1 = MI->getOperand(1).getReg();
+    unsigned LHS2 = MI->getOperand(2).getReg();
+    if (RHSisZero) {
+      AddDefaultPred(BuildMI(BB, dl,
+                             TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+                     .addReg(LHS1).addImm(0));
+      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+        .addReg(LHS2).addImm(0)
+        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+    } else {
+      unsigned RHS1 = MI->getOperand(3).getReg();
+      unsigned RHS2 = MI->getOperand(4).getReg();
+      AddDefaultPred(BuildMI(BB, dl,
+                             TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+                     .addReg(LHS1).addReg(RHS1));
+      BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+        .addReg(LHS2).addReg(RHS2)
+        .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+    }
+
+    MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
+    MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
+    if (MI->getOperand(0).getImm() == ARMCC::NE)
+      std::swap(destMBB, exitMBB);
+
+    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+      .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+    BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
+      .addMBB(exitMBB);
+
+    MI->eraseFromParent();   // The pseudo instruction is gone now.
+    return BB;
+  }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                           ARM Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+static
+SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
+                            TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT VT = N->getValueType(0);
+  unsigned Opc = N->getOpcode();
+  bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC;
+  SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1);
+  SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2);
+  ISD::CondCode CC = ISD::SETCC_INVALID;
+
+  if (isSlctCC) {
+    CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get();
+  } else {
+    SDValue CCOp = Slct.getOperand(0);
+    if (CCOp.getOpcode() == ISD::SETCC)
+      CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get();
+  }
+
+  bool DoXform = false;
+  bool InvCC = false;
+  assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) &&
+          "Bad input!");
+
+  if (LHS.getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(LHS)->isNullValue()) {
+    DoXform = true;
+  } else if (CC != ISD::SETCC_INVALID &&
+             RHS.getOpcode() == ISD::Constant &&
+             cast<ConstantSDNode>(RHS)->isNullValue()) {
+    std::swap(LHS, RHS);
+    SDValue Op0 = Slct.getOperand(0);
+    EVT OpVT = isSlctCC ? Op0.getValueType() :
+                          Op0.getOperand(0).getValueType();
+    bool isInt = OpVT.isInteger();
+    CC = ISD::getSetCCInverse(CC, isInt);
+
+    if (!TLI.isCondCodeLegal(CC, OpVT))
+      return SDValue();         // Inverse operator isn't legal.
+
+    DoXform = true;
+    InvCC = true;
+  }
+
+  if (DoXform) {
+    SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS);
+    if (isSlctCC)
+      return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result,
+                             Slct.getOperand(0), Slct.getOperand(1), CC);
+    SDValue CCOp = Slct.getOperand(0);
+    if (InvCC)
+      CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(),
+                          CCOp.getOperand(0), CCOp.getOperand(1), CC);
+    return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
+                       CCOp, OtherOp, Result);
+  }
+  return SDValue();
+}
+
+/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
+/// operands N0 and N1.  This is a helper for PerformADDCombine that is
+/// called with the default operands, and if that fails, with commuted
+/// operands.
+static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
+                                         TargetLowering::DAGCombinerInfo &DCI) {
+  // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
+  if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
+    SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
+    if (Result.getNode()) return Result;
+  }
+  return SDValue();
+}
+
+/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
+///
+static SDValue PerformADDCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // First try with the default operand order.
+  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI);
+  if (Result.getNode())
+    return Result;
+
+  // If that didn't work, try again with the operands commuted.
+  return PerformADDCombineWithOperands(N, N1, N0, DCI);
+}
+
+/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
+///
+static SDValue PerformSUBCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
+  if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) {
+    SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
+    if (Result.getNode()) return Result;
+  }
+
+  return SDValue();
+}
+
+static SDValue PerformMULCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const ARMSubtarget *Subtarget) {
+  SelectionDAG &DAG = DCI.DAG;
+
+  if (Subtarget->isThumb1Only())
+    return SDValue();
+
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i32)
+    return SDValue();
+
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!C)
+    return SDValue();
+
+  uint64_t MulAmt = C->getZExtValue();
+  unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+  ShiftAmt = ShiftAmt & (32 - 1);
+  SDValue V = N->getOperand(0);
+  DebugLoc DL = N->getDebugLoc();
+
+  SDValue Res;
+  MulAmt >>= ShiftAmt;
+  if (isPowerOf2_32(MulAmt - 1)) {
+    // (mul x, 2^N + 1) => (add (shl x, N), x)
+    Res = DAG.getNode(ISD::ADD, DL, VT,
+                      V, DAG.getNode(ISD::SHL, DL, VT,
+                                     V, DAG.getConstant(Log2_32(MulAmt-1),
+                                                        MVT::i32)));
+  } else if (isPowerOf2_32(MulAmt + 1)) {
+    // (mul x, 2^N - 1) => (sub (shl x, N), x)
+    Res = DAG.getNode(ISD::SUB, DL, VT,
+                      DAG.getNode(ISD::SHL, DL, VT,
+                                  V, DAG.getConstant(Log2_32(MulAmt+1),
+                                                     MVT::i32)),
+                                                     V);
+  } else
+    return SDValue();
+
+  if (ShiftAmt != 0)
+    Res = DAG.getNode(ISD::SHL, DL, VT, Res,
+                      DAG.getConstant(ShiftAmt, MVT::i32));
+
+  // Do not add new nodes to DAG combiner worklist.
+  DCI.CombineTo(N, Res, false);
+  return SDValue();
+}
+
+static SDValue PerformANDCombine(SDNode *N,
+                                TargetLowering::DAGCombinerInfo &DCI) {
+  // Attempt to use immediate-form VBIC
+  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  SelectionDAG &DAG = DCI.DAG;
+
+  APInt SplatBits, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (BVN &&
+      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+    if (SplatBitSize <= 64) {
+      EVT VbicVT;
+      SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
+                                      SplatUndef.getZExtValue(), SplatBitSize,
+                                      DAG, VbicVT, VT.is128BitVector(),
+                                      OtherModImm);
+      if (Val.getNode()) {
+        SDValue Input =
+          DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
+        SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
+      }
+    }
+  }
+
+  return SDValue();
+}
+
+/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+                                TargetLowering::DAGCombinerInfo &DCI,
+                                const ARMSubtarget *Subtarget) {
+  // Attempt to use immediate-form VORR
+  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+  SelectionDAG &DAG = DCI.DAG;
+
+  APInt SplatBits, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (BVN && Subtarget->hasNEON() &&
+      BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+    if (SplatBitSize <= 64) {
+      EVT VorrVT;
+      SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+                                      SplatUndef.getZExtValue(), SplatBitSize,
+                                      DAG, VorrVT, VT.is128BitVector(),
+                                      OtherModImm);
+      if (Val.getNode()) {
+        SDValue Input =
+          DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
+        SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
+      }
+    }
+  }
+
+  // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
+  // reasonable.
+
+  // BFI is only available on V6T2+
+  if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+  DebugLoc DL = N->getDebugLoc();
+  // 1) or (and A, mask), val => ARMbfi A, val, mask
+  //      iff (val & mask) == val
+  //
+  // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+  //  2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
+  //          && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+  //  2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
+  //          && CountPopulation_32(mask) == CountPopulation_32(~mask2)
+  //  (i.e., copy a bitfield value into another bitfield of the same width)
+  if (N0.getOpcode() != ISD::AND)
+    return SDValue();
+
+  if (VT != MVT::i32)
+    return SDValue();
+
+  SDValue N00 = N0.getOperand(0);
+
+  // The value and the mask need to be constants so we can verify this is
+  // actually a bitfield set. If the mask is 0xffff, we can do better
+  // via a movt instruction, so don't use BFI in that case.
+  SDValue MaskOp = N0.getOperand(1);
+  ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
+  if (!MaskC)
+    return SDValue();
+  unsigned Mask = MaskC->getZExtValue();
+  if (Mask == 0xffff)
+    return SDValue();
+  SDValue Res;
+  // Case (1): or (and A, mask), val => ARMbfi A, val, mask
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  if (N1C) {
+    unsigned Val = N1C->getZExtValue();
+    if ((Val & ~Mask) != Val)
+      return SDValue();
+
+    if (ARM::isBitFieldInvertedMask(Mask)) {
+      Val >>= CountTrailingZeros_32(~Mask);
+
+      Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
+                        DAG.getConstant(Val, MVT::i32),
+                        DAG.getConstant(Mask, MVT::i32));
+
+      // Do not add new nodes to DAG combiner worklist.
+      DCI.CombineTo(N, Res, false);
+      return SDValue();
+    }
+  } else if (N1.getOpcode() == ISD::AND) {
+    // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
+    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+    if (!N11C)
+      return SDValue();
+    unsigned Mask2 = N11C->getZExtValue();
+
+    if (ARM::isBitFieldInvertedMask(Mask) &&
+        ARM::isBitFieldInvertedMask(~Mask2) &&
+        (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) {
+      // The pack halfword instruction works better for masks that fit it,
+      // so use that when it's available.
+      if (Subtarget->hasT2ExtractPack() &&
+          (Mask == 0xffff || Mask == 0xffff0000))
+        return SDValue();
+      // 2a
+      unsigned lsb = CountTrailingZeros_32(Mask2);
+      Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
+                        DAG.getConstant(lsb, MVT::i32));
+      Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
+                        DAG.getConstant(Mask, MVT::i32));
+      // Do not add new nodes to DAG combiner worklist.
+      DCI.CombineTo(N, Res, false);
+      return SDValue();
+    } else if (ARM::isBitFieldInvertedMask(~Mask) &&
+               ARM::isBitFieldInvertedMask(Mask2) &&
+               (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) {
+      // The pack halfword instruction works better for masks that fit it,
+      // so use that when it's available.
+      if (Subtarget->hasT2ExtractPack() &&
+          (Mask2 == 0xffff || Mask2 == 0xffff0000))
+        return SDValue();
+      // 2b
+      unsigned lsb = CountTrailingZeros_32(Mask);
+      Res = DAG.getNode(ISD::SRL, DL, VT, N00,
+                        DAG.getConstant(lsb, MVT::i32));
+      Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
+                                DAG.getConstant(Mask2, MVT::i32));
+      // Do not add new nodes to DAG combiner worklist.
+      DCI.CombineTo(N, Res, false);
+      return SDValue();
+    }
+  }
+
+  if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
+      N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
+      ARM::isBitFieldInvertedMask(~Mask)) {
+    // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
+    // where lsb(mask) == #shamt and masked bits of B are known zero.
+    SDValue ShAmt = N00.getOperand(1);
+    unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
+    unsigned LSB = CountTrailingZeros_32(Mask);
+    if (ShAmtC != LSB)
+      return SDValue();
+
+    Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
+                      DAG.getConstant(~Mask, MVT::i32));
+
+    // Do not add new nodes to DAG combiner worklist.
+    DCI.CombineTo(N, Res, false);
+  }
+
+  return SDValue();
+}
+
+/// PerformBFICombine - (bfi A, (and B, C1), C2) -> (bfi A, B, C2) iff
+/// C1 & C2 == C1.
+static SDValue PerformBFICombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  SDValue N1 = N->getOperand(1);
+  if (N1.getOpcode() == ISD::AND) {
+    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+    if (!N11C)
+      return SDValue();
+    unsigned Mask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+    unsigned Mask2 = N11C->getZExtValue();
+    if ((Mask & Mask2) == Mask2)
+      return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
+                             N->getOperand(0), N1.getOperand(0),
+                             N->getOperand(2));
+  }
+  return SDValue();
+}
+
+/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVRRD.
+static SDValue PerformVMOVRRDCombine(SDNode *N,
+                                     TargetLowering::DAGCombinerInfo &DCI) {
+  // vmovrrd(vmovdrr x, y) -> x,y
+  SDValue InDouble = N->getOperand(0);
+  if (InDouble.getOpcode() == ARMISD::VMOVDRR)
+    return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
+  return SDValue();
+}
+
+/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
+/// ARMISD::VMOVDRR.  This is also used for BUILD_VECTORs with 2 operands.
+static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
+  // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  if (Op0.getOpcode() == ISD::BITCAST)
+    Op0 = Op0.getOperand(0);
+  if (Op1.getOpcode() == ISD::BITCAST)
+    Op1 = Op1.getOperand(0);
+  if (Op0.getOpcode() == ARMISD::VMOVRRD &&
+      Op0.getNode() == Op1.getNode() &&
+      Op0.getResNo() == 0 && Op1.getResNo() == 1)
+    return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
+                       N->getValueType(0), Op0.getOperand(0));
+  return SDValue();
+}
+
+/// PerformSTORECombine - Target-specific dag combine xforms for
+/// ISD::STORE.
+static SDValue PerformSTORECombine(SDNode *N,
+                                   TargetLowering::DAGCombinerInfo &DCI) {
+  // Bitcast an i64 store extracted from a vector to f64.
+  // Otherwise, the i64 value will be legalized to a pair of i32 values.
+  StoreSDNode *St = cast<StoreSDNode>(N);
+  SDValue StVal = St->getValue();
+  if (!ISD::isNormalStore(St) || St->isVolatile() ||
+      StVal.getValueType() != MVT::i64 ||
+      StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc dl = StVal.getDebugLoc();
+  SDValue IntVec = StVal.getOperand(0);
+  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+                                 IntVec.getValueType().getVectorNumElements());
+  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
+  SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+                               Vec, StVal.getOperand(1));
+  dl = N->getDebugLoc();
+  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
+  // Make the DAGCombiner fold the bitcasts.
+  DCI.AddToWorklist(Vec.getNode());
+  DCI.AddToWorklist(ExtElt.getNode());
+  DCI.AddToWorklist(V.getNode());
+  return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
+                      St->getPointerInfo(), St->isVolatile(),
+                      St->isNonTemporal(), St->getAlignment(),
+                      St->getTBAAInfo());
+}
+
+/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
+/// are normal, non-volatile loads.  If so, it is profitable to bitcast an
+/// i64 vector to have f64 elements, since the value can then be loaded
+/// directly into a VFP register.
+static bool hasNormalLoadOperand(SDNode *N) {
+  unsigned NumElts = N->getValueType(0).getVectorNumElements();
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDNode *Elt = N->getOperand(i).getNode();
+    if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
+      return true;
+  }
+  return false;
+}
+
+/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
+/// ISD::BUILD_VECTOR.
+static SDValue PerformBUILD_VECTORCombine(SDNode *N,
+                                          TargetLowering::DAGCombinerInfo &DCI){
+  // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
+  // VMOVRRD is introduced when legalizing i64 types.  It forces the i64 value
+  // into a pair of GPRs, which is fine when the value is used as a scalar,
+  // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
+  SelectionDAG &DAG = DCI.DAG;
+  if (N->getNumOperands() == 2) {
+    SDValue RV = PerformVMOVDRRCombine(N, DAG);
+    if (RV.getNode())
+      return RV;
+  }
+
+  // Load i64 elements as f64 values so that type legalization does not split
+  // them up into i32 values.
+  EVT VT = N->getValueType(0);
+  if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
+    return SDValue();
+  DebugLoc dl = N->getDebugLoc();
+  SmallVector<SDValue, 8> Ops;
+  unsigned NumElts = VT.getVectorNumElements();
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
+    Ops.push_back(V);
+    // Make the DAGCombiner fold the bitcast.
+    DCI.AddToWorklist(V.getNode());
+  }
+  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
+  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts);
+  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
+}
+
+/// PerformInsertEltCombine - Target-specific dag combine xforms for
+/// ISD::INSERT_VECTOR_ELT.
+static SDValue PerformInsertEltCombine(SDNode *N,
+                                       TargetLowering::DAGCombinerInfo &DCI) {
+  // Bitcast an i64 load inserted into a vector to f64.
+  // Otherwise, the i64 value will be legalized to a pair of i32 values.
+  EVT VT = N->getValueType(0);
+  SDNode *Elt = N->getOperand(1).getNode();
+  if (VT.getVectorElementType() != MVT::i64 ||
+      !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc dl = N->getDebugLoc();
+  EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
+                                 VT.getVectorNumElements());
+  SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
+  SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
+  // Make the DAGCombiner fold the bitcasts.
+  DCI.AddToWorklist(Vec.getNode());
+  DCI.AddToWorklist(V.getNode());
+  SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
+                               Vec, V, N->getOperand(2));
+  return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
+}
+
+/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
+/// ISD::VECTOR_SHUFFLE.
+static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
+  // The LLVM shufflevector instruction does not require the shuffle mask
+  // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
+  // have that requirement.  When translating to ISD::VECTOR_SHUFFLE, if the
+  // operands do not match the mask length, they are extended by concatenating
+  // them with undef vectors.  That is probably the right thing for other
+  // targets, but for NEON it is better to concatenate two double-register
+  // size vector operands into a single quad-register size vector.  Do that
+  // transformation here:
+  //   shuffle(concat(v1, undef), concat(v2, undef)) ->
+  //   shuffle(concat(v1, v2), undef)
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
+      Op1.getOpcode() != ISD::CONCAT_VECTORS ||
+      Op0.getNumOperands() != 2 ||
+      Op1.getNumOperands() != 2)
+    return SDValue();
+  SDValue Concat0Op1 = Op0.getOperand(1);
+  SDValue Concat1Op1 = Op1.getOperand(1);
+  if (Concat0Op1.getOpcode() != ISD::UNDEF ||
+      Concat1Op1.getOpcode() != ISD::UNDEF)
+    return SDValue();
+  // Skip the transformation if any of the types are illegal.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  EVT VT = N->getValueType(0);
+  if (!TLI.isTypeLegal(VT) ||
+      !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
+      !TLI.isTypeLegal(Concat1Op1.getValueType()))
+    return SDValue();
+
+  SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
+                                  Op0.getOperand(0), Op1.getOperand(0));
+  // Translate the shuffle mask.
+  SmallVector<int, 16> NewMask;
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned HalfElts = NumElts/2;
+  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+  for (unsigned n = 0; n < NumElts; ++n) {
+    int MaskElt = SVN->getMaskElt(n);
+    int NewElt = -1;
+    if (MaskElt < (int)HalfElts)
+      NewElt = MaskElt;
+    else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
+      NewElt = HalfElts + MaskElt - NumElts;
+    NewMask.push_back(NewElt);
+  }
+  return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
+                              DAG.getUNDEF(VT), NewMask.data());
+}
+
+/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
+/// NEON load/store intrinsics to merge base address updates.
+static SDValue CombineBaseUpdate(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
+                      N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
+  unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
+  SDValue Addr = N->getOperand(AddrOpIdx);
+
+  // Search for a use of the address operand that is an increment.
+  for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+         UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+    SDNode *User = *UI;
+    if (User->getOpcode() != ISD::ADD ||
+        UI.getUse().getResNo() != Addr.getResNo())
+      continue;
+
+    // Check that the add is independent of the load/store.  Otherwise, folding
+    // it would create a cycle.
+    if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
+      continue;
+
+    // Find the new opcode for the updating load/store.
+    bool isLoad = true;
+    bool isLaneOp = false;
+    unsigned NewOpc = 0;
+    unsigned NumVecs = 0;
+    if (isIntrinsic) {
+      unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+      switch (IntNo) {
+      default: assert(0 && "unexpected intrinsic for Neon base update");
+      case Intrinsic::arm_neon_vld1:     NewOpc = ARMISD::VLD1_UPD;
+        NumVecs = 1; break;
+      case Intrinsic::arm_neon_vld2:     NewOpc = ARMISD::VLD2_UPD;
+        NumVecs = 2; break;
+      case Intrinsic::arm_neon_vld3:     NewOpc = ARMISD::VLD3_UPD;
+        NumVecs = 3; break;
+      case Intrinsic::arm_neon_vld4:     NewOpc = ARMISD::VLD4_UPD;
+        NumVecs = 4; break;
+      case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
+        NumVecs = 2; isLaneOp = true; break;
+      case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
+        NumVecs = 3; isLaneOp = true; break;
+      case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
+        NumVecs = 4; isLaneOp = true; break;
+      case Intrinsic::arm_neon_vst1:     NewOpc = ARMISD::VST1_UPD;
+        NumVecs = 1; isLoad = false; break;
+      case Intrinsic::arm_neon_vst2:     NewOpc = ARMISD::VST2_UPD;
+        NumVecs = 2; isLoad = false; break;
+      case Intrinsic::arm_neon_vst3:     NewOpc = ARMISD::VST3_UPD;
+        NumVecs = 3; isLoad = false; break;
+      case Intrinsic::arm_neon_vst4:     NewOpc = ARMISD::VST4_UPD;
+        NumVecs = 4; isLoad = false; break;
+      case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
+        NumVecs = 2; isLoad = false; isLaneOp = true; break;
+      case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
+        NumVecs = 3; isLoad = false; isLaneOp = true; break;
+      case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
+        NumVecs = 4; isLoad = false; isLaneOp = true; break;
+      }
+    } else {
+      isLaneOp = true;
+      switch (N->getOpcode()) {
+      default: assert(0 && "unexpected opcode for Neon base update");
+      case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
+      case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
+      case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
+      }
+    }
+
+    // Find the size of memory referenced by the load/store.
+    EVT VecTy;
+    if (isLoad)
+      VecTy = N->getValueType(0);
+    else 
+      VecTy = N->getOperand(AddrOpIdx+1).getValueType();
+    unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
+    if (isLaneOp)
+      NumBytes /= VecTy.getVectorNumElements();
+
+    // If the increment is a constant, it must match the memory ref size.
+    SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
+    if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
+      uint64_t IncVal = CInc->getZExtValue();
+      if (IncVal != NumBytes)
+        continue;
+    } else if (NumBytes >= 3 * 16) {
+      // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
+      // separate instructions that make it harder to use a non-constant update.
+      continue;
+    }
+
+    // Create the new updating load/store node.
+    EVT Tys[6];
+    unsigned NumResultVecs = (isLoad ? NumVecs : 0);
+    unsigned n;
+    for (n = 0; n < NumResultVecs; ++n)
+      Tys[n] = VecTy;
+    Tys[n++] = MVT::i32;
+    Tys[n] = MVT::Other;
+    SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
+    SmallVector<SDValue, 8> Ops;
+    Ops.push_back(N->getOperand(0)); // incoming chain
+    Ops.push_back(N->getOperand(AddrOpIdx));
+    Ops.push_back(Inc);
+    for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
+      Ops.push_back(N->getOperand(i));
+    }
+    MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
+    SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys,
+                                           Ops.data(), Ops.size(),
+                                           MemInt->getMemoryVT(),
+                                           MemInt->getMemOperand());
+
+    // Update the uses.
+    std::vector<SDValue> NewResults;
+    for (unsigned i = 0; i < NumResultVecs; ++i) {
+      NewResults.push_back(SDValue(UpdN.getNode(), i));
+    }
+    NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
+    DCI.CombineTo(N, NewResults);
+    DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
+
+    break;
+  } 
+  return SDValue();
+}
+
+/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
+/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
+/// are also VDUPLANEs.  If so, combine them to a vldN-dup operation and
+/// return true.
+static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
+  EVT VT = N->getValueType(0);
+  // vldN-dup instructions only support 64-bit vectors for N > 1.
+  if (!VT.is64BitVector())
+    return false;
+
+  // Check if the VDUPLANE operand is a vldN-dup intrinsic.
+  SDNode *VLD = N->getOperand(0).getNode();
+  if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
+    return false;
+  unsigned NumVecs = 0;
+  unsigned NewOpc = 0;
+  unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
+  if (IntNo == Intrinsic::arm_neon_vld2lane) {
+    NumVecs = 2;
+    NewOpc = ARMISD::VLD2DUP;
+  } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
+    NumVecs = 3;
+    NewOpc = ARMISD::VLD3DUP;
+  } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
+    NumVecs = 4;
+    NewOpc = ARMISD::VLD4DUP;
+  } else {
+    return false;
+  }
+
+  // First check that all the vldN-lane uses are VDUPLANEs and that the lane
+  // numbers match the load.
+  unsigned VLDLaneNo =
+    cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
+  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+       UI != UE; ++UI) {
+    // Ignore uses of the chain result.
+    if (UI.getUse().getResNo() == NumVecs)
+      continue;
+    SDNode *User = *UI;
+    if (User->getOpcode() != ARMISD::VDUPLANE ||
+        VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
+      return false;
+  }
+
+  // Create the vldN-dup node.
+  EVT Tys[5];
+  unsigned n;
+  for (n = 0; n < NumVecs; ++n)
+    Tys[n] = VT;
+  Tys[n] = MVT::Other;
+  SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
+  SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
+  MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
+  SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys,
+                                           Ops, 2, VLDMemInt->getMemoryVT(),
+                                           VLDMemInt->getMemOperand());
+
+  // Update the uses.
+  for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
+       UI != UE; ++UI) {
+    unsigned ResNo = UI.getUse().getResNo();
+    // Ignore uses of the chain result.
+    if (ResNo == NumVecs)
+      continue;
+    SDNode *User = *UI;
+    DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
+  }
+
+  // Now the vldN-lane intrinsic is dead except for its chain result.
+  // Update uses of the chain.
+  std::vector<SDValue> VLDDupResults;
+  for (unsigned n = 0; n < NumVecs; ++n)
+    VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
+  VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
+  DCI.CombineTo(VLD, VLDDupResults);
+
+  return true;
+}
+
+/// PerformVDUPLANECombine - Target-specific dag combine xforms for
+/// ARMISD::VDUPLANE.
+static SDValue PerformVDUPLANECombine(SDNode *N,
+                                      TargetLowering::DAGCombinerInfo &DCI) {
+  SDValue Op = N->getOperand(0);
+
+  // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
+  // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
+  if (CombineVLDDUP(N, DCI))
+    return SDValue(N, 0);
+
+  // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
+  // redundant.  Ignore bit_converts for now; element sizes are checked below.
+  while (Op.getOpcode() == ISD::BITCAST)
+    Op = Op.getOperand(0);
+  if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
+    return SDValue();
+
+  // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
+  unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
+  // The canonical VMOV for a zero vector uses a 32-bit element size.
+  unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  unsigned EltBits;
+  if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
+    EltSize = 8;
+  EVT VT = N->getValueType(0);
+  if (EltSize > VT.getVectorElementType().getSizeInBits())
+    return SDValue();
+
+  return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
+}
+
+/// getVShiftImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift operation, where all the elements of the
+/// build_vector must have the same constant integer value.
+static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
+  // Ignore bit_converts.
+  while (Op.getOpcode() == ISD::BITCAST)
+    Op = Op.getOperand(0);
+  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+  APInt SplatBits, SplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
+                                      HasAnyUndefs, ElementBits) ||
+      SplatBitSize > ElementBits)
+    return false;
+  Cnt = SplatBits.getSExtValue();
+  return true;
+}
+
+/// isVShiftLImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift left operation.  That value must be in the range:
+///   0 <= Value < ElementBits for a left shift; or
+///   0 <= Value <= ElementBits for a long left shift.
+static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
+  assert(VT.isVector() && "vector shift count is not a vector type");
+  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+  if (! getVShiftImm(Op, ElementBits, Cnt))
+    return false;
+  return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
+}
+
+/// isVShiftRImm - Check if this is a valid build_vector for the immediate
+/// operand of a vector shift right operation.  For a shift opcode, the value
+/// is positive, but for an intrinsic the value count must be negative. The
+/// absolute value must be in the range:
+///   1 <= |Value| <= ElementBits for a right shift; or
+///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
+static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
+                         int64_t &Cnt) {
+  assert(VT.isVector() && "vector shift count is not a vector type");
+  unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
+  if (! getVShiftImm(Op, ElementBits, Cnt))
+    return false;
+  if (isIntrinsic)
+    Cnt = -Cnt;
+  return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
+}
+
+/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
+static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
+  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+  switch (IntNo) {
+  default:
+    // Don't do anything for most intrinsics.
+    break;
+
+  // Vector shifts: check for immediate versions and lower them.
+  // Note: This is done during DAG combining instead of DAG legalizing because
+  // the build_vectors for 64-bit vector element shift counts are generally
+  // not legal, and it is hard to see their values after they get legalized to
+  // loads from a constant pool.
+  case Intrinsic::arm_neon_vshifts:
+  case Intrinsic::arm_neon_vshiftu:
+  case Intrinsic::arm_neon_vshiftls:
+  case Intrinsic::arm_neon_vshiftlu:
+  case Intrinsic::arm_neon_vshiftn:
+  case Intrinsic::arm_neon_vrshifts:
+  case Intrinsic::arm_neon_vrshiftu:
+  case Intrinsic::arm_neon_vrshiftn:
+  case Intrinsic::arm_neon_vqshifts:
+  case Intrinsic::arm_neon_vqshiftu:
+  case Intrinsic::arm_neon_vqshiftsu:
+  case Intrinsic::arm_neon_vqshiftns:
+  case Intrinsic::arm_neon_vqshiftnu:
+  case Intrinsic::arm_neon_vqshiftnsu:
+  case Intrinsic::arm_neon_vqrshiftns:
+  case Intrinsic::arm_neon_vqrshiftnu:
+  case Intrinsic::arm_neon_vqrshiftnsu: {
+    EVT VT = N->getOperand(1).getValueType();
+    int64_t Cnt;
+    unsigned VShiftOpc = 0;
+
+    switch (IntNo) {
+    case Intrinsic::arm_neon_vshifts:
+    case Intrinsic::arm_neon_vshiftu:
+      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
+        VShiftOpc = ARMISD::VSHL;
+        break;
+      }
+      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
+        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
+                     ARMISD::VSHRs : ARMISD::VSHRu);
+        break;
+      }
+      return SDValue();
+
+    case Intrinsic::arm_neon_vshiftls:
+    case Intrinsic::arm_neon_vshiftlu:
+      if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
+        break;
+      llvm_unreachable("invalid shift count for vshll intrinsic");
+
+    case Intrinsic::arm_neon_vrshifts:
+    case Intrinsic::arm_neon_vrshiftu:
+      if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
+        break;
+      return SDValue();
+
+    case Intrinsic::arm_neon_vqshifts:
+    case Intrinsic::arm_neon_vqshiftu:
+      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
+        break;
+      return SDValue();
+
+    case Intrinsic::arm_neon_vqshiftsu:
+      if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
+        break;
+      llvm_unreachable("invalid shift count for vqshlu intrinsic");
+
+    case Intrinsic::arm_neon_vshiftn:
+    case Intrinsic::arm_neon_vrshiftn:
+    case Intrinsic::arm_neon_vqshiftns:
+    case Intrinsic::arm_neon_vqshiftnu:
+    case Intrinsic::arm_neon_vqshiftnsu:
+    case Intrinsic::arm_neon_vqrshiftns:
+    case Intrinsic::arm_neon_vqrshiftnu:
+    case Intrinsic::arm_neon_vqrshiftnsu:
+      // Narrowing shifts require an immediate right shift.
+      if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
+        break;
+      llvm_unreachable("invalid shift count for narrowing vector shift "
+                       "intrinsic");
+
+    default:
+      llvm_unreachable("unhandled vector shift");
+    }
+
+    switch (IntNo) {
+    case Intrinsic::arm_neon_vshifts:
+    case Intrinsic::arm_neon_vshiftu:
+      // Opcode already set above.
+      break;
+    case Intrinsic::arm_neon_vshiftls:
+    case Intrinsic::arm_neon_vshiftlu:
+      if (Cnt == VT.getVectorElementType().getSizeInBits())
+        VShiftOpc = ARMISD::VSHLLi;
+      else
+        VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
+                     ARMISD::VSHLLs : ARMISD::VSHLLu);
+      break;
+    case Intrinsic::arm_neon_vshiftn:
+      VShiftOpc = ARMISD::VSHRN; break;
+    case Intrinsic::arm_neon_vrshifts:
+      VShiftOpc = ARMISD::VRSHRs; break;
+    case Intrinsic::arm_neon_vrshiftu:
+      VShiftOpc = ARMISD::VRSHRu; break;
+    case Intrinsic::arm_neon_vrshiftn:
+      VShiftOpc = ARMISD::VRSHRN; break;
+    case Intrinsic::arm_neon_vqshifts:
+      VShiftOpc = ARMISD::VQSHLs; break;
+    case Intrinsic::arm_neon_vqshiftu:
+      VShiftOpc = ARMISD::VQSHLu; break;
+    case Intrinsic::arm_neon_vqshiftsu:
+      VShiftOpc = ARMISD::VQSHLsu; break;
+    case Intrinsic::arm_neon_vqshiftns:
+      VShiftOpc = ARMISD::VQSHRNs; break;
+    case Intrinsic::arm_neon_vqshiftnu:
+      VShiftOpc = ARMISD::VQSHRNu; break;
+    case Intrinsic::arm_neon_vqshiftnsu:
+      VShiftOpc = ARMISD::VQSHRNsu; break;
+    case Intrinsic::arm_neon_vqrshiftns:
+      VShiftOpc = ARMISD::VQRSHRNs; break;
+    case Intrinsic::arm_neon_vqrshiftnu:
+      VShiftOpc = ARMISD::VQRSHRNu; break;
+    case Intrinsic::arm_neon_vqrshiftnsu:
+      VShiftOpc = ARMISD::VQRSHRNsu; break;
+    }
+
+    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+                       N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
+  }
+
+  case Intrinsic::arm_neon_vshiftins: {
+    EVT VT = N->getOperand(1).getValueType();
+    int64_t Cnt;
+    unsigned VShiftOpc = 0;
+
+    if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
+      VShiftOpc = ARMISD::VSLI;
+    else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
+      VShiftOpc = ARMISD::VSRI;
+    else {
+      llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
+    }
+
+    return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
+                       N->getOperand(1), N->getOperand(2),
+                       DAG.getConstant(Cnt, MVT::i32));
+  }
+
+  case Intrinsic::arm_neon_vqrshifts:
+  case Intrinsic::arm_neon_vqrshiftu:
+    // No immediate versions of these to check for.
+    break;
+  }
+
+  return SDValue();
+}
+
+/// PerformShiftCombine - Checks for immediate versions of vector shifts and
+/// lowers them.  As with the vector shift intrinsics, this is done during DAG
+/// combining instead of DAG legalizing because the build_vectors for 64-bit
+/// vector element shift counts are generally not legal, and it is hard to see
+/// their values after they get legalized to loads from a constant pool.
+static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
+                                   const ARMSubtarget *ST) {
+  EVT VT = N->getValueType(0);
+
+  // Nothing to be done for scalar shifts.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!VT.isVector() || !TLI.isTypeLegal(VT))
+    return SDValue();
+
+  assert(ST->hasNEON() && "unexpected vector shift");
+  int64_t Cnt;
+
+  switch (N->getOpcode()) {
+  default: llvm_unreachable("unexpected shift opcode");
+
+  case ISD::SHL:
+    if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
+      return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
+                         DAG.getConstant(Cnt, MVT::i32));
+    break;
+
+  case ISD::SRA:
+  case ISD::SRL:
+    if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
+      unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
+                            ARMISD::VSHRs : ARMISD::VSHRu);
+      return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
+                         DAG.getConstant(Cnt, MVT::i32));
+    }
+  }
+  return SDValue();
+}
+
+/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
+/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
+static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
+                                    const ARMSubtarget *ST) {
+  SDValue N0 = N->getOperand(0);
+
+  // Check for sign- and zero-extensions of vector extract operations of 8-
+  // and 16-bit vector elements.  NEON supports these directly.  They are
+  // handled during DAG combining because type legalization will promote them
+  // to 32-bit types and it is messy to recognize the operations after that.
+  if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+    SDValue Vec = N0.getOperand(0);
+    SDValue Lane = N0.getOperand(1);
+    EVT VT = N->getValueType(0);
+    EVT EltVT = N0.getValueType();
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+    if (VT == MVT::i32 &&
+        (EltVT == MVT::i8 || EltVT == MVT::i16) &&
+        TLI.isTypeLegal(Vec.getValueType()) &&
+        isa<ConstantSDNode>(Lane)) {
+
+      unsigned Opc = 0;
+      switch (N->getOpcode()) {
+      default: llvm_unreachable("unexpected opcode");
+      case ISD::SIGN_EXTEND:
+        Opc = ARMISD::VGETLANEs;
+        break;
+      case ISD::ZERO_EXTEND:
+      case ISD::ANY_EXTEND:
+        Opc = ARMISD::VGETLANEu;
+        break;
+      }
+      return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
+    }
+  }
+
+  return SDValue();
+}
+
+/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
+/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
+static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
+                                       const ARMSubtarget *ST) {
+  // If the target supports NEON, try to use vmax/vmin instructions for f32
+  // selects like "x < y ? x : y".  Unless the NoNaNsFPMath option is set,
+  // be careful about NaNs:  NEON's vmax/vmin return NaN if either operand is
+  // a NaN; only do the transformation when it matches that behavior.
+
+  // For now only do this when using NEON for FP operations; if using VFP, it
+  // is not obvious that the benefit outweighs the cost of switching to the
+  // NEON pipeline.
+  if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
+      N->getValueType(0) != MVT::f32)
+    return SDValue();
+
+  SDValue CondLHS = N->getOperand(0);
+  SDValue CondRHS = N->getOperand(1);
+  SDValue LHS = N->getOperand(2);
+  SDValue RHS = N->getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+  unsigned Opcode = 0;
+  bool IsReversed;
+  if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
+    IsReversed = false; // x CC y ? x : y
+  } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
+    IsReversed = true ; // x CC y ? y : x
+  } else {
+    return SDValue();
+  }
+
+  bool IsUnordered;
+  switch (CC) {
+  default: break;
+  case ISD::SETOLT:
+  case ISD::SETOLE:
+  case ISD::SETLT:
+  case ISD::SETLE:
+  case ISD::SETULT:
+  case ISD::SETULE:
+    // If LHS is NaN, an ordered comparison will be false and the result will
+    // be the RHS, but vmin(NaN, RHS) = NaN.  Avoid this by checking that LHS
+    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
+    IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
+    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+      break;
+    // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
+    // will return -0, so vmin can only be used for unsafe math or if one of
+    // the operands is known to be nonzero.
+    if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
+        !UnsafeFPMath &&
+        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+      break;
+    Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
+    break;
+
+  case ISD::SETOGT:
+  case ISD::SETOGE:
+  case ISD::SETGT:
+  case ISD::SETGE:
+  case ISD::SETUGT:
+  case ISD::SETUGE:
+    // If LHS is NaN, an ordered comparison will be false and the result will
+    // be the RHS, but vmax(NaN, RHS) = NaN.  Avoid this by checking that LHS
+    // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
+    IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
+    if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+      break;
+    // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
+    // will return +0, so vmax can only be used for unsafe math or if one of
+    // the operands is known to be nonzero.
+    if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
+        !UnsafeFPMath &&
+        !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+      break;
+    Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
+    break;
+  }
+
+  if (!Opcode)
+    return SDValue();
+  return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
+}
+
+SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
+                                             DAGCombinerInfo &DCI) const {
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::ADD:        return PerformADDCombine(N, DCI);
+  case ISD::SUB:        return PerformSUBCombine(N, DCI);
+  case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
+  case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
+  case ISD::AND:        return PerformANDCombine(N, DCI);
+  case ARMISD::BFI:     return PerformBFICombine(N, DCI);
+  case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+  case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
+  case ISD::STORE:      return PerformSTORECombine(N, DCI);
+  case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
+  case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
+  case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
+  case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
+  case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:        return PerformShiftCombine(N, DCI.DAG, Subtarget);
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
+  case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
+  case ARMISD::VLD2DUP:
+  case ARMISD::VLD3DUP:
+  case ARMISD::VLD4DUP:
+    return CombineBaseUpdate(N, DCI);
+  case ISD::INTRINSIC_VOID:
+  case ISD::INTRINSIC_W_CHAIN:
+    switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+    case Intrinsic::arm_neon_vld1:
+    case Intrinsic::arm_neon_vld2:
+    case Intrinsic::arm_neon_vld3:
+    case Intrinsic::arm_neon_vld4:
+    case Intrinsic::arm_neon_vld2lane:
+    case Intrinsic::arm_neon_vld3lane:
+    case Intrinsic::arm_neon_vld4lane:
+    case Intrinsic::arm_neon_vst1:
+    case Intrinsic::arm_neon_vst2:
+    case Intrinsic::arm_neon_vst3:
+    case Intrinsic::arm_neon_vst4:
+    case Intrinsic::arm_neon_vst2lane:
+    case Intrinsic::arm_neon_vst3lane:
+    case Intrinsic::arm_neon_vst4lane:
+      return CombineBaseUpdate(N, DCI);
+    default: break;
+    }
+    break;
+  }
+  return SDValue();
+}
+
+bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
+                                                          EVT VT) const {
+  return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
+}
+
+bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
+  if (!Subtarget->allowsUnalignedMem())
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:
+    return false;
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+    return true;
+  // FIXME: VLD1 etc with standard alignment is legal.
+  }
+}
+
+static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
+  if (V < 0)
+    return false;
+
+  unsigned Scale = 1;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return false;
+  case MVT::i1:
+  case MVT::i8:
+    // Scale == 1;
+    break;
+  case MVT::i16:
+    // Scale == 2;
+    Scale = 2;
+    break;
+  case MVT::i32:
+    // Scale == 4;
+    Scale = 4;
+    break;
+  }
+
+  if ((V & (Scale - 1)) != 0)
+    return false;
+  V /= Scale;
+  return V == (V & ((1LL << 5) - 1));
+}
+
+static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
+                                      const ARMSubtarget *Subtarget) {
+  bool isNeg = false;
+  if (V < 0) {
+    isNeg = true;
+    V = - V;
+  }
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return false;
+  case MVT::i1:
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+    // + imm12 or - imm8
+    if (isNeg)
+      return V == (V & ((1LL << 8) - 1));
+    return V == (V & ((1LL << 12) - 1));
+  case MVT::f32:
+  case MVT::f64:
+    // Same as ARM mode. FIXME: NEON?
+    if (!Subtarget->hasVFP2())
+      return false;
+    if ((V & 3) != 0)
+      return false;
+    V >>= 2;
+    return V == (V & ((1LL << 8) - 1));
+  }
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+static bool isLegalAddressImmediate(int64_t V, EVT VT,
+                                    const ARMSubtarget *Subtarget) {
+  if (V == 0)
+    return true;
+
+  if (!VT.isSimple())
+    return false;
+
+  if (Subtarget->isThumb1Only())
+    return isLegalT1AddressImmediate(V, VT);
+  else if (Subtarget->isThumb2())
+    return isLegalT2AddressImmediate(V, VT, Subtarget);
+
+  // ARM mode.
+  if (V < 0)
+    V = - V;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return false;
+  case MVT::i1:
+  case MVT::i8:
+  case MVT::i32:
+    // +- imm12
+    return V == (V & ((1LL << 12) - 1));
+  case MVT::i16:
+    // +- imm8
+    return V == (V & ((1LL << 8) - 1));
+  case MVT::f32:
+  case MVT::f64:
+    if (!Subtarget->hasVFP2()) // FIXME: NEON?
+      return false;
+    if ((V & 3) != 0)
+      return false;
+    V >>= 2;
+    return V == (V & ((1LL << 8) - 1));
+  }
+}
+
+bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
+                                                      EVT VT) const {
+  int Scale = AM.Scale;
+  if (Scale < 0)
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return false;
+  case MVT::i1:
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+    if (Scale == 1)
+      return true;
+    // r + r << imm
+    Scale = Scale & ~1;
+    return Scale == 2 || Scale == 4 || Scale == 8;
+  case MVT::i64:
+    // r + r
+    if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+      return true;
+    return false;
+  case MVT::isVoid:
+    // Note, we allow "void" uses (basically, uses that aren't loads or
+    // stores), because arm allows folding a scale into many arithmetic
+    // operations.  This should be made more precise and revisited later.
+
+    // Allow r << imm, but the imm has to be a multiple of two.
+    if (Scale & 1) return false;
+    return isPowerOf2_32(Scale);
+  }
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                              const Type *Ty) const {
+  EVT VT = getValueType(Ty, true);
+  if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
+    return false;
+
+  // Can never fold addr of global into load/store.
+  if (AM.BaseGV)
+    return false;
+
+  switch (AM.Scale) {
+  case 0:  // no scale reg, must be "r+i" or "r", or "i".
+    break;
+  case 1:
+    if (Subtarget->isThumb1Only())
+      return false;
+    // FALL THROUGH.
+  default:
+    // ARM doesn't support any R+R*scale+imm addr modes.
+    if (AM.BaseOffs)
+      return false;
+
+    if (!VT.isSimple())
+      return false;
+
+    if (Subtarget->isThumb2())
+      return isLegalT2ScaledAddressingMode(AM, VT);
+
+    int Scale = AM.Scale;
+    switch (VT.getSimpleVT().SimpleTy) {
+    default: return false;
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i32:
+      if (Scale < 0) Scale = -Scale;
+      if (Scale == 1)
+        return true;
+      // r + r << imm
+      return isPowerOf2_32(Scale & ~1);
+    case MVT::i16:
+    case MVT::i64:
+      // r + r
+      if (((unsigned)AM.HasBaseReg + Scale) <= 2)
+        return true;
+      return false;
+
+    case MVT::isVoid:
+      // Note, we allow "void" uses (basically, uses that aren't loads or
+      // stores), because arm allows folding a scale into many arithmetic
+      // operations.  This should be made more precise and revisited later.
+
+      // Allow r << imm, but the imm has to be a multiple of two.
+      if (Scale & 1) return false;
+      return isPowerOf2_32(Scale);
+    }
+    break;
+  }
+  return true;
+}
+
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  if (!Subtarget->isThumb())
+    return ARM_AM::getSOImmVal(Imm) != -1;
+  if (Subtarget->isThumb2())
+    return ARM_AM::getT2SOImmVal(Imm) != -1;
+  return Imm >= 0 && Imm <= 255;
+}
+
+static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
+                                      bool isSEXTLoad, SDValue &Base,
+                                      SDValue &Offset, bool &isInc,
+                                      SelectionDAG &DAG) {
+  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+    return false;
+
+  if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
+    // AddressingMode 3
+    Base = Ptr->getOperand(0);
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if (RHSC < 0 && RHSC > -256) {
+        assert(Ptr->getOpcode() == ISD::ADD);
+        isInc = false;
+        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+        return true;
+      }
+    }
+    isInc = (Ptr->getOpcode() == ISD::ADD);
+    Offset = Ptr->getOperand(1);
+    return true;
+  } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
+    // AddressingMode 2
+    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+      int RHSC = (int)RHS->getZExtValue();
+      if (RHSC < 0 && RHSC > -0x1000) {
+        assert(Ptr->getOpcode() == ISD::ADD);
+        isInc = false;
+        Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+        Base = Ptr->getOperand(0);
+        return true;
+      }
+    }
+
+    if (Ptr->getOpcode() == ISD::ADD) {
+      isInc = true;
+      ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0));
+      if (ShOpcVal != ARM_AM::no_shift) {
+        Base = Ptr->getOperand(1);
+        Offset = Ptr->getOperand(0);
+      } else {
+        Base = Ptr->getOperand(0);
+        Offset = Ptr->getOperand(1);
+      }
+      return true;
+    }
+
+    isInc = (Ptr->getOpcode() == ISD::ADD);
+    Base = Ptr->getOperand(0);
+    Offset = Ptr->getOperand(1);
+    return true;
+  }
+
+  // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
+  return false;
+}
+
+static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
+                                     bool isSEXTLoad, SDValue &Base,
+                                     SDValue &Offset, bool &isInc,
+                                     SelectionDAG &DAG) {
+  if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
+    return false;
+
+  Base = Ptr->getOperand(0);
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
+    int RHSC = (int)RHS->getZExtValue();
+    if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
+      assert(Ptr->getOpcode() == ISD::ADD);
+      isInc = false;
+      Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
+      return true;
+    } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
+      isInc = Ptr->getOpcode() == ISD::ADD;
+      Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool
+ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+                                             SDValue &Offset,
+                                             ISD::MemIndexedMode &AM,
+                                             SelectionDAG &DAG) const {
+  if (Subtarget->isThumb1Only())
+    return false;
+
+  EVT VT;
+  SDValue Ptr;
+  bool isSEXTLoad = false;
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    Ptr = LD->getBasePtr();
+    VT  = LD->getMemoryVT();
+    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    Ptr = ST->getBasePtr();
+    VT  = ST->getMemoryVT();
+  } else
+    return false;
+
+  bool isInc;
+  bool isLegal = false;
+  if (Subtarget->isThumb2())
+    isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+                                       Offset, isInc, DAG);
+  else
+    isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
+                                        Offset, isInc, DAG);
+  if (!isLegal)
+    return false;
+
+  AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
+  return true;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                                   SDValue &Base,
+                                                   SDValue &Offset,
+                                                   ISD::MemIndexedMode &AM,
+                                                   SelectionDAG &DAG) const {
+  if (Subtarget->isThumb1Only())
+    return false;
+
+  EVT VT;
+  SDValue Ptr;
+  bool isSEXTLoad = false;
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    VT  = LD->getMemoryVT();
+    Ptr = LD->getBasePtr();
+    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    VT  = ST->getMemoryVT();
+    Ptr = ST->getBasePtr();
+  } else
+    return false;
+
+  bool isInc;
+  bool isLegal = false;
+  if (Subtarget->isThumb2())
+    isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+                                       isInc, DAG);
+  else
+    isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
+                                        isInc, DAG);
+  if (!isLegal)
+    return false;
+
+  if (Ptr != Base) {
+    // Swap base ptr and offset to catch more post-index load / store when
+    // it's legal. In Thumb2 mode, offset must be an immediate.
+    if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
+        !Subtarget->isThumb2())
+      std::swap(Base, Offset);
+
+    // Post-indexed load / store update the base pointer.
+    if (Ptr != Base)
+      return false;
+  }
+
+  AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+  return true;
+}
+
+void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+                                                       const APInt &Mask,
+                                                       APInt &KnownZero,
+                                                       APInt &KnownOne,
+                                                       const SelectionDAG &DAG,
+                                                       unsigned Depth) const {
+  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+  switch (Op.getOpcode()) {
+  default: break;
+  case ARMISD::CMOV: {
+    // Bits are known zero/one if known on the LHS and RHS.
+    DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1);
+    if (KnownZero == 0 && KnownOne == 0) return;
+
+    APInt KnownZeroRHS, KnownOneRHS;
+    DAG.ComputeMaskedBits(Op.getOperand(1), Mask,
+                          KnownZeroRHS, KnownOneRHS, Depth+1);
+    KnownZero &= KnownZeroRHS;
+    KnownOne  &= KnownOneRHS;
+    return;
+  }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                           ARM Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
+  // Looking for "rev" which is V6+.
+  if (!Subtarget->hasV6Ops())
+    return false;
+
+  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+  std::string AsmStr = IA->getAsmString();
+  SmallVector<StringRef, 4> AsmPieces;
+  SplitString(AsmStr, AsmPieces, ";\n");
+
+  switch (AsmPieces.size()) {
+  default: return false;
+  case 1:
+    AsmStr = AsmPieces[0];
+    AsmPieces.clear();
+    SplitString(AsmStr, AsmPieces, " \t,");
+
+    // rev $0, $1
+    if (AsmPieces.size() == 3 &&
+        AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
+        IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
+      const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+      if (Ty && Ty->getBitWidth() == 32)
+        return IntrinsicLowering::LowerToByteSwap(CI);
+    }
+    break;
+  }
+
+  return false;
+}
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+ARMTargetLowering::ConstraintType
+ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default:  break;
+    case 'l': return C_RegisterClass;
+    case 'w': return C_RegisterClass;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ARMTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  const Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'l':
+    if (type->isIntegerTy()) {
+      if (Subtarget->isThumb())
+        weight = CW_SpecificReg;
+      else
+        weight = CW_Register;
+    }
+    break;
+  case 'w':
+    if (type->isFloatingPointTy())
+      weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+                                                EVT VT) const {
+  if (Constraint.size() == 1) {
+    // GCC ARM Constraint Letters
+    switch (Constraint[0]) {
+    case 'l':
+      if (Subtarget->isThumb())
+        return std::make_pair(0U, ARM::tGPRRegisterClass);
+      else
+        return std::make_pair(0U, ARM::GPRRegisterClass);
+    case 'r':
+      return std::make_pair(0U, ARM::GPRRegisterClass);
+    case 'w':
+      if (VT == MVT::f32)
+        return std::make_pair(0U, ARM::SPRRegisterClass);
+      if (VT.getSizeInBits() == 64)
+        return std::make_pair(0U, ARM::DPRRegisterClass);
+      if (VT.getSizeInBits() == 128)
+        return std::make_pair(0U, ARM::QPRRegisterClass);
+      break;
+    }
+  }
+  if (StringRef("{cc}").equals_lower(Constraint))
+    return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+std::vector<unsigned> ARMTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                  EVT VT) const {
+  if (Constraint.size() != 1)
+    return std::vector<unsigned>();
+
+  switch (Constraint[0]) {      // GCC ARM Constraint Letters
+  default: break;
+  case 'l':
+    return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+                                 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+                                 0);
+  case 'r':
+    return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+                                 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+                                 ARM::R8, ARM::R9, ARM::R10, ARM::R11,
+                                 ARM::R12, ARM::LR, 0);
+  case 'w':
+    if (VT == MVT::f32)
+      return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
+                                   ARM::S4, ARM::S5, ARM::S6, ARM::S7,
+                                   ARM::S8, ARM::S9, ARM::S10, ARM::S11,
+                                   ARM::S12,ARM::S13,ARM::S14,ARM::S15,
+                                   ARM::S16,ARM::S17,ARM::S18,ARM::S19,
+                                   ARM::S20,ARM::S21,ARM::S22,ARM::S23,
+                                   ARM::S24,ARM::S25,ARM::S26,ARM::S27,
+                                   ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
+    if (VT.getSizeInBits() == 64)
+      return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
+                                   ARM::D4, ARM::D5, ARM::D6, ARM::D7,
+                                   ARM::D8, ARM::D9, ARM::D10,ARM::D11,
+                                   ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
+    if (VT.getSizeInBits() == 128)
+      return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
+                                   ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0);
+      break;
+  }
+
+  return std::vector<unsigned>();
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector.  If it is invalid, don't add anything to Ops.
+void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+                                                     char Constraint,
+                                                     std::vector<SDValue>&Ops,
+                                                     SelectionDAG &DAG) const {
+  SDValue Result(0, 0);
+
+  switch (Constraint) {
+  default: break;
+  case 'I': case 'J': case 'K': case 'L':
+  case 'M': case 'N': case 'O':
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+    if (!C)
+      return;
+
+    int64_t CVal64 = C->getSExtValue();
+    int CVal = (int) CVal64;
+    // None of these constraints allow values larger than 32 bits.  Check
+    // that the value fits in an int.
+    if (CVal != CVal64)
+      return;
+
+    switch (Constraint) {
+      case 'I':
+        if (Subtarget->isThumb1Only()) {
+          // This must be a constant between 0 and 255, for ADD
+          // immediates.
+          if (CVal >= 0 && CVal <= 255)
+            break;
+        } else if (Subtarget->isThumb2()) {
+          // A constant that can be used as an immediate value in a
+          // data-processing instruction.
+          if (ARM_AM::getT2SOImmVal(CVal) != -1)
+            break;
+        } else {
+          // A constant that can be used as an immediate value in a
+          // data-processing instruction.
+          if (ARM_AM::getSOImmVal(CVal) != -1)
+            break;
+        }
+        return;
+
+      case 'J':
+        if (Subtarget->isThumb()) {  // FIXME thumb2
+          // This must be a constant between -255 and -1, for negated ADD
+          // immediates. This can be used in GCC with an "n" modifier that
+          // prints the negated value, for use with SUB instructions. It is
+          // not useful otherwise but is implemented for compatibility.
+          if (CVal >= -255 && CVal <= -1)
+            break;
+        } else {
+          // This must be a constant between -4095 and 4095. It is not clear
+          // what this constraint is intended for. Implemented for
+          // compatibility with GCC.
+          if (CVal >= -4095 && CVal <= 4095)
+            break;
+        }
+        return;
+
+      case 'K':
+        if (Subtarget->isThumb1Only()) {
+          // A 32-bit value where only one byte has a nonzero value. Exclude
+          // zero to match GCC. This constraint is used by GCC internally for
+          // constants that can be loaded with a move/shift combination.
+          // It is not useful otherwise but is implemented for compatibility.
+          if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
+            break;
+        } else if (Subtarget->isThumb2()) {
+          // A constant whose bitwise inverse can be used as an immediate
+          // value in a data-processing instruction. This can be used in GCC
+          // with a "B" modifier that prints the inverted value, for use with
+          // BIC and MVN instructions. It is not useful otherwise but is
+          // implemented for compatibility.
+          if (ARM_AM::getT2SOImmVal(~CVal) != -1)
+            break;
+        } else {
+          // A constant whose bitwise inverse can be used as an immediate
+          // value in a data-processing instruction. This can be used in GCC
+          // with a "B" modifier that prints the inverted value, for use with
+          // BIC and MVN instructions. It is not useful otherwise but is
+          // implemented for compatibility.
+          if (ARM_AM::getSOImmVal(~CVal) != -1)
+            break;
+        }
+        return;
+
+      case 'L':
+        if (Subtarget->isThumb1Only()) {
+          // This must be a constant between -7 and 7,
+          // for 3-operand ADD/SUB immediate instructions.
+          if (CVal >= -7 && CVal < 7)
+            break;
+        } else if (Subtarget->isThumb2()) {
+          // A constant whose negation can be used as an immediate value in a
+          // data-processing instruction. This can be used in GCC with an "n"
+          // modifier that prints the negated value, for use with SUB
+          // instructions. It is not useful otherwise but is implemented for
+          // compatibility.
+          if (ARM_AM::getT2SOImmVal(-CVal) != -1)
+            break;
+        } else {
+          // A constant whose negation can be used as an immediate value in a
+          // data-processing instruction. This can be used in GCC with an "n"
+          // modifier that prints the negated value, for use with SUB
+          // instructions. It is not useful otherwise but is implemented for
+          // compatibility.
+          if (ARM_AM::getSOImmVal(-CVal) != -1)
+            break;
+        }
+        return;
+
+      case 'M':
+        if (Subtarget->isThumb()) { // FIXME thumb2
+          // This must be a multiple of 4 between 0 and 1020, for
+          // ADD sp + immediate.
+          if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
+            break;
+        } else {
+          // A power of two or a constant between 0 and 32.  This is used in
+          // GCC for the shift amount on shifted register operands, but it is
+          // useful in general for any shift amounts.
+          if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
+            break;
+        }
+        return;
+
+      case 'N':
+        if (Subtarget->isThumb()) {  // FIXME thumb2
+          // This must be a constant between 0 and 31, for shift amounts.
+          if (CVal >= 0 && CVal <= 31)
+            break;
+        }
+        return;
+
+      case 'O':
+        if (Subtarget->isThumb()) {  // FIXME thumb2
+          // This must be a multiple of 4 between -508 and 508, for
+          // ADD/SUB sp = sp + immediate.
+          if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
+            break;
+        }
+        return;
+    }
+    Result = DAG.getTargetConstant(CVal, Op.getValueType());
+    break;
+  }
+
+  if (Result.getNode()) {
+    Ops.push_back(Result);
+    return;
+  }
+  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+bool
+ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The ARM target isn't yet aware of offsets.
+  return false;
+}
+
+int ARM::getVFPf32Imm(const APFloat &FPImm) {
+  APInt Imm = FPImm.bitcastToAPInt();
+  uint32_t Sign = Imm.lshr(31).getZExtValue() & 1;
+  int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127;  // -126 to 127
+  int64_t Mantissa = Imm.getZExtValue() & 0x7fffff;  // 23 bits
+
+  // We can handle 4 bits of mantissa.
+  // mantissa = (16+UInt(e:f:g:h))/16.
+  if (Mantissa & 0x7ffff)
+    return -1;
+  Mantissa >>= 19;
+  if ((Mantissa & 0xf) != Mantissa)
+    return -1;
+
+  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+  if (Exp < -3 || Exp > 4)
+    return -1;
+  Exp = ((Exp+3) & 0x7) ^ 4;
+
+  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+}
+
+int ARM::getVFPf64Imm(const APFloat &FPImm) {
+  APInt Imm = FPImm.bitcastToAPInt();
+  uint64_t Sign = Imm.lshr(63).getZExtValue() & 1;
+  int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023;   // -1022 to 1023
+  uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL;
+
+  // We can handle 4 bits of mantissa.
+  // mantissa = (16+UInt(e:f:g:h))/16.
+  if (Mantissa & 0xffffffffffffLL)
+    return -1;
+  Mantissa >>= 48;
+  if ((Mantissa & 0xf) != Mantissa)
+    return -1;
+
+  // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3
+  if (Exp < -3 || Exp > 4)
+    return -1;
+  Exp = ((Exp+3) & 0x7) ^ 4;
+
+  return ((int)Sign << 7) | (Exp << 4) | Mantissa;
+}
+
+bool ARM::isBitFieldInvertedMask(unsigned v) {
+  if (v == 0xffffffff)
+    return 0;
+  // there can be 1's on either or both "outsides", all the "inside"
+  // bits must be 0's
+  unsigned int lsb = 0, msb = 31;
+  while (v & (1 << msb)) --msb;
+  while (v & (1 << lsb)) ++lsb;
+  for (unsigned int i = lsb; i <= msb; ++i) {
+    if (v & (1 << i))
+      return 0;
+  }
+  return 1;
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  if (!Subtarget->hasVFP3())
+    return false;
+  if (VT == MVT::f32)
+    return ARM::getVFPf32Imm(Imm) != -1;
+  if (VT == MVT::f64)
+    return ARM::getVFPf64Imm(Imm) != -1;
+  return false;
+}
+
+/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
+/// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
+/// specified in the intrinsic calls.
+bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                           const CallInst &I,
+                                           unsigned Intrinsic) const {
+  switch (Intrinsic) {
+  case Intrinsic::arm_neon_vld1:
+  case Intrinsic::arm_neon_vld2:
+  case Intrinsic::arm_neon_vld3:
+  case Intrinsic::arm_neon_vld4:
+  case Intrinsic::arm_neon_vld2lane:
+  case Intrinsic::arm_neon_vld3lane:
+  case Intrinsic::arm_neon_vld4lane: {
+    Info.opc = ISD::INTRINSIC_W_CHAIN;
+    // Conservatively set memVT to the entire set of vectors loaded.
+    uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8;
+    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+    Info.vol = false; // volatile loads with NEON intrinsics not supported
+    Info.readMem = true;
+    Info.writeMem = false;
+    return true;
+  }
+  case Intrinsic::arm_neon_vst1:
+  case Intrinsic::arm_neon_vst2:
+  case Intrinsic::arm_neon_vst3:
+  case Intrinsic::arm_neon_vst4:
+  case Intrinsic::arm_neon_vst2lane:
+  case Intrinsic::arm_neon_vst3lane:
+  case Intrinsic::arm_neon_vst4lane: {
+    Info.opc = ISD::INTRINSIC_VOID;
+    // Conservatively set memVT to the entire set of vectors stored.
+    unsigned NumElts = 0;
+    for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
+      const Type *ArgTy = I.getArgOperand(ArgI)->getType();
+      if (!ArgTy->isVectorTy())
+        break;
+      NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8;
+    }
+    Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
+    Info.ptrVal = I.getArgOperand(0);
+    Info.offset = 0;
+    Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
+    Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
+    Info.vol = false; // volatile stores with NEON intrinsics not supported
+    Info.readMem = false;
+    Info.writeMem = true;
+    return true;
+  }
+  default:
+    break;
+  }
+
+  return false;
+}
diff --git a/final/lib/Target/ARM/ARMISelLowering.h b/final/lib/Target/ARM/ARMISelLowering.h
new file mode 100644
index 00000000000..9ab2bdbf86c
--- /dev/null
+++ b/final/lib/Target/ARM/ARMISelLowering.h
@@ -0,0 +1,490 @@
+//===-- ARMISelLowering.h - ARM DAG Lowering Interface ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that ARM uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMISELLOWERING_H
+#define ARMISELLOWERING_H
+
+#include "ARMSubtarget.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include <vector>
+
+namespace llvm {
+  class ARMConstantPoolValue;
+
+  namespace ARMISD {
+    // ARM Specific DAG Nodes
+    enum NodeType {
+      // Start the numbering where the builtin ops and target ops leave off.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      Wrapper,      // Wrapper - A wrapper node for TargetConstantPool,
+                    // TargetExternalSymbol, and TargetGlobalAddress.
+      WrapperDYN,   // WrapperDYN - A wrapper node for TargetGlobalAddress in
+                    // DYN mode.
+      WrapperPIC,   // WrapperPIC - A wrapper node for TargetGlobalAddress in
+                    // PIC mode.
+      WrapperJT,    // WrapperJT - A wrapper node for TargetJumpTable
+
+      CALL,         // Function call.
+      CALL_PRED,    // Function call that's predicable.
+      CALL_NOLINK,  // Function call with branch not branch-and-link.
+      tCALL,        // Thumb function call.
+      BRCOND,       // Conditional branch.
+      BR_JT,        // Jumptable branch.
+      BR2_JT,       // Jumptable branch (2 level - jumptable entry is a jump).
+      RET_FLAG,     // Return with a flag operand.
+
+      PIC_ADD,      // Add with a PC operand and a PIC label.
+
+      CMP,          // ARM compare instructions.
+      CMPZ,         // ARM compare that sets only Z flag.
+      CMPFP,        // ARM VFP compare instruction, sets FPSCR.
+      CMPFPw0,      // ARM VFP compare against zero instruction, sets FPSCR.
+      FMSTAT,       // ARM fmstat instruction.
+      CMOV,         // ARM conditional move instructions.
+
+      BCC_i64,
+
+      RBIT,         // ARM bitreverse instruction
+
+      FTOSI,        // FP to sint within a FP register.
+      FTOUI,        // FP to uint within a FP register.
+      SITOF,        // sint to FP within a FP register.
+      UITOF,        // uint to FP within a FP register.
+
+      SRL_FLAG,     // V,Flag = srl_flag X -> srl X, 1 + save carry out.
+      SRA_FLAG,     // V,Flag = sra_flag X -> sra X, 1 + save carry out.
+      RRX,          // V = RRX X, Flag     -> srl X, 1 + shift in carry flag.
+
+      VMOVRRD,      // double to two gprs.
+      VMOVDRR,      // Two gprs to double.
+
+      EH_SJLJ_SETJMP,         // SjLj exception handling setjmp.
+      EH_SJLJ_LONGJMP,        // SjLj exception handling longjmp.
+      EH_SJLJ_DISPATCHSETUP,  // SjLj exception handling dispatch setup.
+
+      TC_RETURN,    // Tail call return pseudo.
+
+      THREAD_POINTER,
+
+      DYN_ALLOC,    // Dynamic allocation on the stack.
+
+      MEMBARRIER,   // Memory barrier (DMB)
+      MEMBARRIER_MCR, // Memory barrier (MCR)
+
+      PRELOAD,      // Preload
+      
+      VCEQ,         // Vector compare equal.
+      VCEQZ,        // Vector compare equal to zero.
+      VCGE,         // Vector compare greater than or equal.
+      VCGEZ,        // Vector compare greater than or equal to zero.
+      VCLEZ,        // Vector compare less than or equal to zero.
+      VCGEU,        // Vector compare unsigned greater than or equal.
+      VCGT,         // Vector compare greater than.
+      VCGTZ,        // Vector compare greater than zero.
+      VCLTZ,        // Vector compare less than zero.
+      VCGTU,        // Vector compare unsigned greater than.
+      VTST,         // Vector test bits.
+
+      // Vector shift by immediate:
+      VSHL,         // ...left
+      VSHRs,        // ...right (signed)
+      VSHRu,        // ...right (unsigned)
+      VSHLLs,       // ...left long (signed)
+      VSHLLu,       // ...left long (unsigned)
+      VSHLLi,       // ...left long (with maximum shift count)
+      VSHRN,        // ...right narrow
+
+      // Vector rounding shift by immediate:
+      VRSHRs,       // ...right (signed)
+      VRSHRu,       // ...right (unsigned)
+      VRSHRN,       // ...right narrow
+
+      // Vector saturating shift by immediate:
+      VQSHLs,       // ...left (signed)
+      VQSHLu,       // ...left (unsigned)
+      VQSHLsu,      // ...left (signed to unsigned)
+      VQSHRNs,      // ...right narrow (signed)
+      VQSHRNu,      // ...right narrow (unsigned)
+      VQSHRNsu,     // ...right narrow (signed to unsigned)
+
+      // Vector saturating rounding shift by immediate:
+      VQRSHRNs,     // ...right narrow (signed)
+      VQRSHRNu,     // ...right narrow (unsigned)
+      VQRSHRNsu,    // ...right narrow (signed to unsigned)
+
+      // Vector shift and insert:
+      VSLI,         // ...left
+      VSRI,         // ...right
+
+      // Vector get lane (VMOV scalar to ARM core register)
+      // (These are used for 8- and 16-bit element types only.)
+      VGETLANEu,    // zero-extend vector extract element
+      VGETLANEs,    // sign-extend vector extract element
+
+      // Vector move immediate and move negated immediate:
+      VMOVIMM,
+      VMVNIMM,
+
+      // Vector duplicate:
+      VDUP,
+      VDUPLANE,
+
+      // Vector shuffles:
+      VEXT,         // extract
+      VREV64,       // reverse elements within 64-bit doublewords
+      VREV32,       // reverse elements within 32-bit words
+      VREV16,       // reverse elements within 16-bit halfwords
+      VZIP,         // zip (interleave)
+      VUZP,         // unzip (deinterleave)
+      VTRN,         // transpose
+
+      // Vector multiply long:
+      VMULLs,       // ...signed
+      VMULLu,       // ...unsigned
+
+      // Operands of the standard BUILD_VECTOR node are not legalized, which
+      // is fine if BUILD_VECTORs are always lowered to shuffles or other
+      // operations, but for ARM some BUILD_VECTORs are legal as-is and their
+      // operands need to be legalized.  Define an ARM-specific version of
+      // BUILD_VECTOR for this purpose.
+      BUILD_VECTOR,
+
+      // Floating-point max and min:
+      FMAX,
+      FMIN,
+
+      // Bit-field insert
+      BFI,
+      
+      // Vector OR with immediate
+      VORRIMM,
+      // Vector AND with NOT of immediate
+      VBICIMM,
+
+      // Vector load N-element structure to all lanes:
+      VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
+      VLD3DUP,
+      VLD4DUP,
+
+      // NEON loads with post-increment base updates:
+      VLD1_UPD,
+      VLD2_UPD,
+      VLD3_UPD,
+      VLD4_UPD,
+      VLD2LN_UPD,
+      VLD3LN_UPD,
+      VLD4LN_UPD,
+      VLD2DUP_UPD,
+      VLD3DUP_UPD,
+      VLD4DUP_UPD,
+
+      // NEON stores with post-increment base updates:
+      VST1_UPD,
+      VST2_UPD,
+      VST3_UPD,
+      VST4_UPD,
+      VST2LN_UPD,
+      VST3LN_UPD,
+      VST4LN_UPD
+    };
+  }
+
+  /// Define some predicates that are used for node matching.
+  namespace ARM {
+    /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be
+    /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd)
+    /// instruction, returns its 8-bit integer representation. Otherwise,
+    /// returns -1.
+    int getVFPf32Imm(const APFloat &FPImm);
+    int getVFPf64Imm(const APFloat &FPImm);
+    bool isBitFieldInvertedMask(unsigned v);
+  }
+
+  //===--------------------------------------------------------------------===//
+  //  ARMTargetLowering - ARM Implementation of the TargetLowering interface
+
+  class ARMTargetLowering : public TargetLowering {
+  public:
+    explicit ARMTargetLowering(TargetMachine &TM);
+
+    virtual unsigned getJumpTableEncoding(void) const;
+
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// ReplaceNodeResults - Replace the results of node with an illegal result
+    /// type with new values built out of custom code.
+    ///
+    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+                                    SelectionDAG &DAG) const;
+
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
+
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
+
+    /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+    /// unaligned memory accesses. of the specified type.
+    /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON?
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT) const;
+
+    /// isLegalAddressingMode - Return true if the addressing mode represented
+    /// by AM is legal for this target, for a load/store of the specified type.
+    virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+    bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
+
+    /// isLegalICmpImmediate - Return true if the specified immediate is legal
+    /// icmp immediate, that is the target has icmp instructions which can
+    /// compare a register against the immediate without having to materialize
+    /// the immediate into a register.
+    virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+    /// getPreIndexedAddressParts - returns true by value, base pointer and
+    /// offset pointer and addressing mode by reference if the node's address
+    /// can be legally represented as pre-indexed load / store address.
+    virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+                                           SDValue &Offset,
+                                           ISD::MemIndexedMode &AM,
+                                           SelectionDAG &DAG) const;
+
+    /// getPostIndexedAddressParts - returns true by value, base pointer and
+    /// offset pointer and addressing mode by reference if this node can be
+    /// combined with a load / store to form a post-indexed load / store.
+    virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                            SDValue &Base, SDValue &Offset,
+                                            ISD::MemIndexedMode &AM,
+                                            SelectionDAG &DAG) const;
+
+    virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+                                                const APInt &Mask,
+                                                APInt &KnownZero,
+                                                APInt &KnownOne,
+                                                const SelectionDAG &DAG,
+                                                unsigned Depth) const;
+
+
+    virtual bool ExpandInlineAsm(CallInst *CI) const;
+
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+      getRegForInlineAsmConstraint(const std::string &Constraint,
+                                   EVT VT) const;
+    std::vector<unsigned>
+    getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                      EVT VT) const;
+
+    /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+    /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
+    /// true it means one of the asm constraint of the inline asm instruction
+    /// being processed is 'm'.
+    virtual void LowerAsmOperandForConstraint(SDValue Op,
+                                              char ConstraintLetter,
+                                              std::vector<SDValue> &Ops,
+                                              SelectionDAG &DAG) const;
+
+    const ARMSubtarget* getSubtarget() const {
+      return Subtarget;
+    }
+
+    /// getRegClassFor - Return the register class that should be used for the
+    /// specified value type.
+    virtual TargetRegisterClass *getRegClassFor(EVT VT) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
+    /// getMaximalGlobalOffset - Returns the maximal possible offset which can
+    /// be used for loads / stores from the global.
+    virtual unsigned getMaximalGlobalOffset() const;
+
+    /// createFastISel - This method returns a target specific FastISel object,
+    /// or null if the target does not support "fast" ISel.
+    virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+
+    Sched::Preference getSchedulingPreference(SDNode *N) const;
+
+    bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
+    bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+    virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
+                                    const CallInst &I,
+                                    unsigned Intrinsic) const;
+  protected:
+    std::pair<const TargetRegisterClass*, uint8_t>
+    findRepresentativeClass(EVT VT) const;
+
+  private:
+    /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+    /// make the right decision when generating code for different targets.
+    const ARMSubtarget *Subtarget;
+
+    const TargetRegisterInfo *RegInfo;
+
+    const InstrItineraryData *Itins;
+
+    /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
+    ///
+    unsigned ARMPCLabelIndex;
+
+    void addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT);
+    void addDRTypeForNEON(EVT VT);
+    void addQRTypeForNEON(EVT VT);
+
+    typedef SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPassVector;
+    void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
+                          SDValue Chain, SDValue &Arg,
+                          RegsToPassVector &RegsToPass,
+                          CCValAssign &VA, CCValAssign &NextVA,
+                          SDValue &StackPtr,
+                          SmallVector<SDValue, 8> &MemOpChains,
+                          ISD::ArgFlagsTy Flags) const;
+    SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
+                                 SDValue &Root, SelectionDAG &DAG,
+                                 DebugLoc dl) const;
+
+    CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
+                                  bool isVarArg) const;
+    SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             const CCValAssign &VA,
+                             ISD::ArgFlagsTy Flags) const;
+    SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
+                                    const ARMSubtarget *Subtarget) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
+                                            SelectionDAG &DAG) const;
+    SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
+                                   SelectionDAG &DAG) const;
+    SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 
+                              const ARMSubtarget *ST) const;
+
+    SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    /// HandleByVal - Target-specific cleanup for ByVal support.
+    virtual void HandleByVal(CCState *) const;
+
+    /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+    /// for tail call optimization. Targets which want to do tail call
+    /// optimization should implement this function.
+    bool IsEligibleForTailCallOptimization(SDValue Callee,
+                                           CallingConv::ID CalleeCC,
+                                           bool isVarArg,
+                                           bool isCalleeStructRet,
+                                           bool isCallerStructRet,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                           SelectionDAG& DAG) const;
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual bool isUsedByReturnOnly(SDNode *N) const;
+
+    virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
+    SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+                      SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
+    SDValue getVFPCmp(SDValue LHS, SDValue RHS,
+                      SelectionDAG &DAG, DebugLoc dl) const;
+    SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const;
+
+    SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
+
+    MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
+                                         MachineBasicBlock *BB,
+                                         unsigned Size) const;
+    MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+                                        MachineBasicBlock *BB,
+                                        unsigned Size,
+                                        unsigned BinOpcode) const;
+
+  };
+  
+  enum NEONModImmType {
+    VMOVModImm,
+    VMVNModImm,
+    OtherModImm
+  };
+  
+  
+  namespace ARM {
+    FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+  }
+}
+
+#endif  // ARMISELLOWERING_H
diff --git a/final/lib/Target/ARM/ARMInstrFormats.td b/final/lib/Target/ARM/ARMInstrFormats.td
new file mode 100644
index 00000000000..1c3476c0a92
--- /dev/null
+++ b/final/lib/Target/ARM/ARMInstrFormats.td
@@ -0,0 +1,1815 @@
+//===- ARMInstrFormats.td - ARM Instruction Formats ----------*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// ARM Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction.  This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+  bits<6> Value = val;
+}
+
+def Pseudo        : Format<0>;
+def MulFrm        : Format<1>;
+def BrFrm         : Format<2>;
+def BrMiscFrm     : Format<3>;
+
+def DPFrm         : Format<4>;
+def DPSoRegFrm    : Format<5>;
+
+def LdFrm         : Format<6>;
+def StFrm         : Format<7>;
+def LdMiscFrm     : Format<8>;
+def StMiscFrm     : Format<9>;
+def LdStMulFrm    : Format<10>;
+
+def LdStExFrm     : Format<11>;
+
+def ArithMiscFrm  : Format<12>;
+def SatFrm        : Format<13>;
+def ExtFrm        : Format<14>;
+
+def VFPUnaryFrm   : Format<15>;
+def VFPBinaryFrm  : Format<16>;
+def VFPConv1Frm   : Format<17>;
+def VFPConv2Frm   : Format<18>;
+def VFPConv3Frm   : Format<19>;
+def VFPConv4Frm   : Format<20>;
+def VFPConv5Frm   : Format<21>;
+def VFPLdStFrm    : Format<22>;
+def VFPLdStMulFrm : Format<23>;
+def VFPMiscFrm    : Format<24>;
+
+def ThumbFrm      : Format<25>;
+def MiscFrm       : Format<26>;
+
+def NGetLnFrm     : Format<27>;
+def NSetLnFrm     : Format<28>;
+def NDupFrm       : Format<29>;
+def NLdStFrm      : Format<30>;
+def N1RegModImmFrm: Format<31>;
+def N2RegFrm      : Format<32>;
+def NVCVTFrm      : Format<33>;
+def NVDupLnFrm    : Format<34>;
+def N2RegVShLFrm  : Format<35>;
+def N2RegVShRFrm  : Format<36>;
+def N3RegFrm      : Format<37>;
+def N3RegVShFrm   : Format<38>;
+def NVExtFrm      : Format<39>;
+def NVMulSLFrm    : Format<40>;
+def NVTBLFrm      : Format<41>;
+
+// Misc flags.
+
+// The instruction has an Rn register operand.
+// UnaryDP - Indicates this is a unary data processing instruction, i.e.
+// it doesn't have a Rn operand.
+class UnaryDP    { bit isUnaryDataProc = 1; }
+
+// Xform16Bit - Indicates this Thumb2 instruction may be transformed into
+// a 16-bit Thumb instruction if certain conditions are met.
+class Xform16Bit { bit canXformTo16Bit = 1; }
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction flags.  These need to match ARMBaseInstrInfo.h.
+//
+
+// FIXME: Once the JIT is MC-ized, these can go away.
+// Addressing mode.
+class AddrMode<bits<5> val> {
+  bits<5> Value = val;
+}
+def AddrModeNone    : AddrMode<0>;
+def AddrMode1       : AddrMode<1>;
+def AddrMode2       : AddrMode<2>;
+def AddrMode3       : AddrMode<3>;
+def AddrMode4       : AddrMode<4>;
+def AddrMode5       : AddrMode<5>;
+def AddrMode6       : AddrMode<6>;
+def AddrModeT1_1    : AddrMode<7>;
+def AddrModeT1_2    : AddrMode<8>;
+def AddrModeT1_4    : AddrMode<9>;
+def AddrModeT1_s    : AddrMode<10>;
+def AddrModeT2_i12  : AddrMode<11>;
+def AddrModeT2_i8   : AddrMode<12>;
+def AddrModeT2_so   : AddrMode<13>;
+def AddrModeT2_pc   : AddrMode<14>;
+def AddrModeT2_i8s4 : AddrMode<15>;
+def AddrMode_i12    : AddrMode<16>;
+
+// Instruction size.
+class SizeFlagVal<bits<3> val> {
+  bits<3> Value = val;
+}
+def SizeInvalid  : SizeFlagVal<0>;  // Unset.
+def SizeSpecial  : SizeFlagVal<1>;  // Pseudo or special.
+def Size8Bytes   : SizeFlagVal<2>;
+def Size4Bytes   : SizeFlagVal<3>;
+def Size2Bytes   : SizeFlagVal<4>;
+
+// Load / store index mode.
+class IndexMode<bits<2> val> {
+  bits<2> Value = val;
+}
+def IndexModeNone : IndexMode<0>;
+def IndexModePre  : IndexMode<1>;
+def IndexModePost : IndexMode<2>;
+def IndexModeUpd  : IndexMode<3>;
+
+// Instruction execution domain.
+class Domain<bits<3> val> {
+  bits<3> Value = val;
+}
+def GenericDomain : Domain<0>;
+def VFPDomain     : Domain<1>; // Instructions in VFP domain only
+def NeonDomain    : Domain<2>; // Instructions in Neon domain only
+def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains
+def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8
+
+//===----------------------------------------------------------------------===//
+// ARM special operands.
+//
+
+def CondCodeOperand : AsmOperandClass {
+  let Name = "CondCode";
+  let SuperClasses = [];
+}
+
+def CCOutOperand : AsmOperandClass {
+  let Name = "CCOut";
+  let SuperClasses = [];
+}
+
+def MemBarrierOptOperand : AsmOperandClass {
+  let Name = "MemBarrierOpt";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseMemBarrierOptOperand";
+}
+
+def ProcIFlagsOperand : AsmOperandClass {
+  let Name = "ProcIFlags";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseProcIFlagsOperand";
+}
+
+def MSRMaskOperand : AsmOperandClass {
+  let Name = "MSRMask";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseMSRMaskOperand";
+}
+
+// ARM imod and iflag operands, used only by the CPS instruction.
+def imod_op : Operand<i32> {
+  let PrintMethod = "printCPSIMod";
+}
+
+def iflags_op : Operand<i32> {
+  let PrintMethod = "printCPSIFlag";
+  let ParserMatchClass = ProcIFlagsOperand;
+}
+
+// ARM Predicate operand. Default to 14 = always (AL). Second part is CC
+// register whose default is 0 (no register).
+def pred : PredicateOperand<OtherVT, (ops i32imm, CCR),
+                                     (ops (i32 14), (i32 zero_reg))> {
+  let PrintMethod = "printPredicateOperand";
+  let ParserMatchClass = CondCodeOperand;
+}
+
+// Conditional code result for instructions whose 's' bit is set, e.g. subs.
+def cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 zero_reg))> {
+  let EncoderMethod = "getCCOutOpValue";
+  let PrintMethod = "printSBitModifierOperand";
+  let ParserMatchClass = CCOutOperand;
+}
+
+// Same as cc_out except it defaults to setting CPSR.
+def s_cc_out : OptionalDefOperand<OtherVT, (ops CCR), (ops (i32 CPSR))> {
+  let EncoderMethod = "getCCOutOpValue";
+  let PrintMethod = "printSBitModifierOperand";
+  let ParserMatchClass = CCOutOperand;
+}
+
+// ARM special operands for disassembly only.
+//
+def setend_op : Operand<i32> {
+  let PrintMethod = "printSetendOperand";
+}
+
+def cps_opt : Operand<i32> {
+  let PrintMethod = "printCPSOptionOperand";
+}
+
+def msr_mask : Operand<i32> {
+  let PrintMethod = "printMSRMaskOperand";
+  let ParserMatchClass = MSRMaskOperand;
+}
+
+// A8.6.117, A8.6.118.  Different instructions are generated for #0 and #-0.
+// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc.
+def neg_zero : Operand<i32> {
+  let PrintMethod = "printNegZeroOperand";
+}
+
+// Shift Right Immediate - A shift right immediate is encoded differently from
+// other shift immediates. The imm6 field is encoded like so:
+//
+//    Offset    Encoding
+//     8        imm6<5:3> = '001', 8 - <imm> is encoded in imm6<2:0>
+//     16       imm6<5:4> = '01', 16 - <imm> is encoded in imm6<3:0>
+//     32       imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
+//     64       64 - <imm> is encoded in imm6<5:0>
+def shr_imm8  : Operand<i32> {
+  let EncoderMethod = "getShiftRight8Imm";
+}
+def shr_imm16 : Operand<i32> {
+  let EncoderMethod = "getShiftRight16Imm";
+}
+def shr_imm32 : Operand<i32> {
+  let EncoderMethod = "getShiftRight32Imm";
+}
+def shr_imm64 : Operand<i32> {
+  let EncoderMethod = "getShiftRight64Imm";
+}
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction templates.
+//
+
+class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
+                   Format f, Domain d, string cstr, InstrItinClass itin>
+  : Instruction {
+  let Namespace = "ARM";
+
+  AddrMode AM = am;
+  SizeFlagVal SZ = sz;
+  IndexMode IM = im;
+  bits<2> IndexModeBits = IM.Value;
+  Format F = f;
+  bits<6> Form = F.Value;
+  Domain D = d;
+  bit isUnaryDataProc = 0;
+  bit canXformTo16Bit = 0;
+
+  // If this is a pseudo instruction, mark it isCodeGenOnly.
+  let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
+  // The layout of TSFlags should be kept in sync with ARMBaseInstrInfo.h.
+  let TSFlags{4-0}   = AM.Value;
+  let TSFlags{7-5}   = SZ.Value;
+  let TSFlags{9-8}   = IndexModeBits;
+  let TSFlags{15-10} = Form;
+  let TSFlags{16}    = isUnaryDataProc;
+  let TSFlags{17}    = canXformTo16Bit;
+  let TSFlags{20-18} = D.Value;
+
+  let Constraints = cstr;
+  let Itinerary = itin;
+}
+
+class Encoding {
+  field bits<32> Inst;
+}
+
+class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im,
+              Format f, Domain d, string cstr, InstrItinClass itin>
+  : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding;
+
+// This Encoding-less class is used by Thumb1 to specify the encoding bits later
+// on by adding flavors to specific instructions.
+class InstThumb<AddrMode am, SizeFlagVal sz, IndexMode im,
+                Format f, Domain d, string cstr, InstrItinClass itin>
+  : InstTemplate<am, sz, im, f, d, cstr, itin>;
+
+class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
+  // FIXME: This really should derive from InstTemplate instead, as pseudos
+  //        don't need encoding information. TableGen doesn't like that
+  //        currently. Need to figure out why and fix it.
+  : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, GenericDomain,
+            "", itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let Pattern = pattern;
+}
+
+// PseudoInst that's ARM-mode only.
+class ARMPseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+                    list<dag> pattern>
+  : PseudoInst<oops, iops, itin, pattern> {
+  let SZ = sz;
+  list<Predicate> Predicates = [IsARM];
+}
+
+// PseudoInst that's Thumb-mode only.
+class tPseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+                    list<dag> pattern>
+  : PseudoInst<oops, iops, itin, pattern> {
+  let SZ = sz;
+  list<Predicate> Predicates = [IsThumb];
+}
+
+// PseudoInst that's Thumb2-mode only.
+class t2PseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+                    list<dag> pattern>
+  : PseudoInst<oops, iops, itin, pattern> {
+  let SZ = sz;
+  list<Predicate> Predicates = [IsThumb2];
+}
+// Almost all ARM instructions are predicable.
+class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+        IndexMode im, Format f, InstrItinClass itin,
+        string opc, string asm, string cstr,
+        list<dag> pattern>
+  : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+  bits<4> p;
+  let Inst{31-28} = p;
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${p}", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsARM];
+}
+
+// A few are not predicable
+class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+           IndexMode im, Format f, InstrItinClass itin,
+           string opc, string asm, string cstr,
+           list<dag> pattern>
+  : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString = !strconcat(opc, asm);
+  let Pattern = pattern;
+  let isPredicable = 0;
+  list<Predicate> Predicates = [IsARM];
+}
+
+// Same as I except it can optionally modify CPSR. Note it's modeled as an input
+// operand since by default it's a zero register. It will become an implicit def
+// once it's "flipped".
+class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+         IndexMode im, Format f, InstrItinClass itin,
+         string opc, string asm, string cstr,
+         list<dag> pattern>
+  : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+  bits<4> p; // Predicate operand
+  bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+  let Inst{31-28} = p;
+  let Inst{20} = s;
+
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
+  let AsmString = !strconcat(opc, "${s}${p}", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsARM];
+}
+
+// Special cases
+class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+         IndexMode im, Format f, InstrItinClass itin,
+         string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString = asm;
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsARM];
+}
+
+class AI<dag oops, dag iops, Format f, InstrItinClass itin,
+         string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern>;
+class AsI<dag oops, dag iops, Format f, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+       opc, asm, "", pattern>;
+class AXI<dag oops, dag iops, Format f, InstrItinClass itin,
+          string asm, list<dag> pattern>
+  : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+       asm, "", pattern>;
+class AInoP<dag oops, dag iops, Format f, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : InoP<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+         opc, asm, "", pattern>;
+
+// Ctrl flow instructions
+class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, itin,
+      opc, asm, "", pattern> {
+  let Inst{27-24} = opcod;
+}
+class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
+           string asm, list<dag> pattern>
+  : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, itin,
+       asm, "", pattern> {
+  let Inst{27-24} = opcod;
+}
+
+// BR_JT instructions
+class JTI<dag oops, dag iops, InstrItinClass itin,
+          string asm, list<dag> pattern>
+  : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin,
+       asm, "", pattern>;
+
+// Atomic load/store instructions
+class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
+      opc, asm, "", pattern> {
+  bits<4> Rt;
+  bits<4> Rn;
+  let Inst{27-23} = 0b00011;
+  let Inst{22-21} = opcod;
+  let Inst{20}    = 1;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;
+  let Inst{11-0}  = 0b111110011111;
+}
+class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
+      opc, asm, "", pattern> {
+  bits<4> Rd;
+  bits<4> Rt;
+  bits<4> Rn;
+  let Inst{27-23} = 0b00011;
+  let Inst{22-21} = opcod;
+  let Inst{20}    = 0;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rd;
+  let Inst{11-4}  = 0b11111001;
+  let Inst{3-0}   = Rt;
+}
+class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
+  : AI<oops, iops, MiscFrm, NoItinerary, opc, "\t$Rt, $Rt2, [$Rn]", pattern> {
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<4> Rn;
+  let Inst{27-23} = 0b00010;
+  let Inst{22} = b;
+  let Inst{21-20} = 0b00;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;
+  let Inst{11-4} = 0b00001001;
+  let Inst{3-0} = Rt2;
+}
+
+// addrmode1 instructions
+class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
+  let Inst{24-21} = opcod;
+  let Inst{27-26} = 0b00;
+}
+class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
+       opc, asm, "", pattern> {
+  let Inst{24-21} = opcod;
+  let Inst{27-26} = 0b00;
+}
+class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
+           string asm, list<dag> pattern>
+  : XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
+       asm, "", pattern> {
+  let Inst{24-21} = opcod;
+  let Inst{27-26} = 0b00;
+}
+
+// loads
+
+// LDR/LDRB/STR/STRB/...
+class AI2ldst<bits<3> op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am,
+             Format f, InstrItinClass itin, string opc, string asm,
+             list<dag> pattern>
+  : I<oops, iops, am, Size4Bytes, IndexModeNone, f, itin, opc, asm,
+      "", pattern> {
+  let Inst{27-25} = op;
+  let Inst{24} = 1;  // 24 == P
+  // 23 == U
+  let Inst{22} = isByte;
+  let Inst{21} = 0;  // 21 == W
+  let Inst{20} = isLd;
+}
+// Indexed load/stores
+class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops,
+                IndexMode im, Format f, InstrItinClass itin, string opc,
+                string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode2, Size4Bytes, im, f, itin,
+      opc, asm, cstr, pattern> {
+  bits<4> Rt;
+  let Inst{27-26} = 0b01;
+  let Inst{24}    = isPre; // P bit
+  let Inst{22}    = isByte; // B bit
+  let Inst{21}    = isPre; // W bit
+  let Inst{20}    = isLd; // L bit
+  let Inst{15-12} = Rt;
+}
+class AI2stridx<bit isByte, bit isPre, dag oops, dag iops,
+                IndexMode im, Format f, InstrItinClass itin, string opc,
+                string asm, string cstr, list<dag> pattern>
+  : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+               pattern> {
+  // AM2 store w/ two operands: (GPR, am2offset)
+  // {13}     1 == Rm, 0 == imm12
+  // {12}     isAdd
+  // {11-0}   imm12/Rm
+  bits<14> offset;
+  bits<4> Rn;
+  let Inst{25} = offset{13};
+  let Inst{23} = offset{12};
+  let Inst{19-16} = Rn;
+  let Inst{11-0} = offset{11-0};
+}
+
+// addrmode3 instructions
+class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
+            InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
+  bits<14> addr;
+  bits<4> Rt;
+  let Inst{27-25} = 0b000;
+  let Inst{24}    = 1;            // P bit
+  let Inst{23}    = addr{8};      // U bit
+  let Inst{22}    = addr{13};     // 1 == imm8, 0 == Rm
+  let Inst{21}    = 0;            // W bit
+  let Inst{20}    = op20;         // L bit
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{15-12} = Rt;           // Rt
+  let Inst{11-8}  = addr{7-4};    // imm7_4/zero
+  let Inst{7-4}   = op;
+  let Inst{3-0}   = addr{3-0};    // imm3_0/Rm
+}
+
+class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
+                IndexMode im, Format f, InstrItinClass itin, string opc,
+                string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, im, f, itin,
+      opc, asm, cstr, pattern> {
+  bits<4> Rt;
+  let Inst{27-25} = 0b000;
+  let Inst{24}    = isPre;        // P bit
+  let Inst{21}    = isPre;        // W bit
+  let Inst{20}    = op20;         // L bit
+  let Inst{15-12} = Rt;           // Rt
+  let Inst{7-4}   = op;
+}
+class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops,
+                IndexMode im, Format f, InstrItinClass itin, string opc,
+                string asm, string cstr, list<dag> pattern>
+  : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr,
+               pattern> {
+  // AM3 store w/ two operands: (GPR, am3offset)
+  bits<14> offset;
+  bits<4> Rt;
+  bits<4> Rn;
+  let Inst{27-25} = 0b000;
+  let Inst{23}    = offset{8};
+  let Inst{22}    = offset{9};
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;           // Rt
+  let Inst{11-8}  = offset{7-4};  // imm7_4/zero
+  let Inst{7-4}   = op;
+  let Inst{3-0}   = offset{3-0};  // imm3_0/Rm
+}
+
+// stores
+class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern> {
+  bits<14> addr;
+  bits<4> Rt;
+  let Inst{27-25} = 0b000;
+  let Inst{24}    = 1;            // P bit
+  let Inst{23}    = addr{8};      // U bit
+  let Inst{22}    = addr{13};     // 1 == imm8, 0 == Rm
+  let Inst{21}    = 0;            // W bit
+  let Inst{20}    = 0;            // L bit
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{15-12} = Rt;           // Rt
+  let Inst{11-8}  = addr{7-4};    // imm7_4/zero
+  let Inst{7-4}   = op;
+  let Inst{3-0}   = addr{3-0};    // imm3_0/Rm
+}
+
+// Pre-indexed stores
+class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+      opc, asm, cstr, pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 0; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-25} = 0b000;
+}
+class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+      opc, asm, cstr, pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 1; // W bit
+  let Inst{24}    = 1; // P bit
+  let Inst{27-25} = 0b000;
+}
+
+// Post-indexed stores
+class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+      opc, asm, cstr,pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 0; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{24}    = 0; // P bit
+  let Inst{27-25} = 0b000;
+}
+class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string asm, string cstr, list<dag> pattern>
+  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+      opc, asm, cstr, pattern> {
+  let Inst{4}     = 1;
+  let Inst{5}     = 1; // H bit
+  let Inst{6}     = 1; // S bit
+  let Inst{7}     = 1;
+  let Inst{20}    = 0; // L bit
+  let Inst{21}    = 0; // W bit
+  let Inst{24}    = 0; // P bit
+  let Inst{27-25} = 0b000;
+}
+
+// addrmode4 instructions
+class AXI4<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
+           string asm, string cstr, list<dag> pattern>
+  : XI<oops, iops, AddrMode4, Size4Bytes, im, f, itin, asm, cstr, pattern> {
+  bits<4>  p;
+  bits<16> regs;
+  bits<4>  Rn;
+  let Inst{31-28} = p;
+  let Inst{27-25} = 0b100;
+  let Inst{22}    = 0; // S bit
+  let Inst{19-16} = Rn;
+  let Inst{15-0}  = regs;
+}
+
+// Unsigned multiply, multiply-accumulate instructions.
+class AMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+      opc, asm, "", pattern> {
+  let Inst{7-4}   = 0b1001;
+  let Inst{20}    = 0; // S bit
+  let Inst{27-21} = opcod;
+}
+class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+  : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+       opc, asm, "", pattern> {
+  let Inst{7-4}   = 0b1001;
+  let Inst{27-21} = opcod;
+}
+
+// Most significant word multiply
+class AMul2I<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+             InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+      opc, asm, "", pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{7-4}   = opc7_4;
+  let Inst{20}    = 1;
+  let Inst{27-21} = opcod;
+  let Inst{19-16} = Rd;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+// MSW multiple w/ Ra operand
+class AMul2Ia<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
+              InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AMul2I<opcod, opc7_4, oops, iops, itin, opc, asm, pattern> {
+  bits<4> Ra;
+  let Inst{15-12} = Ra;
+}
+
+// SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y>
+class AMulxyIbase<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+              InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+      opc, asm, "", pattern> {
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{4}     = 0;
+  let Inst{7}     = 1;
+  let Inst{20}    = 0;
+  let Inst{27-21} = opcod;
+  let Inst{6-5}   = bit6_5;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+class AMulxyI<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+              InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  let Inst{19-16} = Rd;
+}
+
+// AMulxyI with Ra operand
+class AMulxyIa<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+              InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AMulxyI<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+  bits<4> Ra;
+  let Inst{15-12} = Ra;
+}
+// SMLAL*
+class AMulxyI64<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
+              InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AMulxyIbase<opcod, bit6_5, oops, iops, itin, opc, asm, pattern> {
+  bits<4> RdLo;
+  bits<4> RdHi;
+  let Inst{19-16} = RdHi;
+  let Inst{15-12} = RdLo;
+}
+
+// Extend instructions.
+class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ExtFrm, itin,
+      opc, asm, "", pattern> {
+  // All AExtI instructions have Rd and Rm register operands.
+  bits<4> Rd;
+  bits<4> Rm;
+  let Inst{15-12} = Rd;
+  let Inst{3-0}   = Rm;
+  let Inst{7-4}   = 0b0111;
+  let Inst{9-8}   = 0b00;
+  let Inst{27-20} = opcod;
+}
+
+// Misc Arithmetic instructions.
+class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
+               InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin,
+      opc, asm, "", pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+  let Inst{27-20} = opcod;
+  let Inst{19-16} = 0b1111;
+  let Inst{15-12} = Rd;
+  let Inst{11-8}  = 0b1111;
+  let Inst{7-4}   = opc7_4;
+  let Inst{3-0}   = Rm;
+}
+
+// PKH instructions
+class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin,
+      opc, asm, "", pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  bits<8> sh;
+  let Inst{27-20} = opcod;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = sh{7-3};
+  let Inst{6}     = tb;
+  let Inst{5-4}   = 0b01;
+  let Inst{3-0}   = Rm;
+}
+
+//===----------------------------------------------------------------------===//
+
+// ARMPat - Same as Pat<>, but requires that the compiler be in ARM mode.
+class ARMPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsARM];
+}
+class ARMV5TEPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsARM, HasV5TE];
+}
+class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsARM, HasV6];
+}
+
+//===----------------------------------------------------------------------===//
+// Thumb Instruction Format Definitions.
+//
+
+class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+             InstrItinClass itin, string asm, string cstr, list<dag> pattern>
+  : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString = asm;
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb];
+}
+
+// TI - Thumb instruction.
+class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
+  : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>;
+
+// Two-address instructions
+class TIt<dag oops, dag iops, InstrItinClass itin, string asm,
+          list<dag> pattern>
+  : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "$lhs = $dst",
+           pattern>;
+
+// tBL, tBX 32-bit instructions
+class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
+           dag oops, dag iops, InstrItinClass itin, string asm,
+           list<dag> pattern>
+    : ThumbI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>,
+      Encoding {
+  let Inst{31-27} = opcod1;
+  let Inst{15-14} = opcod2;
+  let Inst{12}    = opcod3;
+}
+
+// Move to/from coprocessor instructions
+class T1Cop<dag oops, dag iops, string asm, list<dag> pattern>
+  : ThumbI<oops, iops, AddrModeNone, Size4Bytes, NoItinerary, asm, "", pattern>,
+    Encoding, Requires<[IsThumb, HasV6]> {
+  let Inst{31-28} = 0b1110;
+}
+
+// BR_JT instructions
+class TJTI<dag oops, dag iops, InstrItinClass itin, string asm,
+           list<dag> pattern>
+  : ThumbI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
+
+// Thumb1 only
+class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+              InstrItinClass itin, string asm, string cstr, list<dag> pattern>
+  : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString = asm;
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1I<dag oops, dag iops, InstrItinClass itin,
+          string asm, list<dag> pattern>
+  : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>;
+class T1Ix2<dag oops, dag iops, InstrItinClass itin,
+            string asm, list<dag> pattern>
+  : Thumb1I<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>;
+
+// Two-address instructions
+class T1It<dag oops, dag iops, InstrItinClass itin,
+           string asm, string cstr, list<dag> pattern>
+  : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin,
+            asm, cstr, pattern>;
+
+// Thumb1 instruction that can either be predicated or set CPSR.
+class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+               InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = !con(oops, (outs s_cc_out:$s));
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${s}${p}", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1sI<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, "", pattern>;
+
+// Two-address instructions
+class T1sIt<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
+             "$Rn = $Rdn", pattern>;
+
+// Thumb1 instruction that can be predicated.
+class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+               InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${p}", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T1pI<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, "", pattern>;
+
+// Two-address instructions
+class T1pIt<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
+             "$Rn = $Rdn", pattern>;
+
+class T1pIs<dag oops, dag iops,
+            InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : Thumb1pI<oops, iops, AddrModeT1_s, Size2Bytes, itin, opc, asm, "", pattern>;
+
+class Encoding16 : Encoding {
+  let Inst{31-16} = 0x0000;
+}
+
+// A6.2 16-bit Thumb instruction encoding
+class T1Encoding<bits<6> opcode> : Encoding16 {
+  let Inst{15-10} = opcode;
+}
+
+// A6.2.1 Shift (immediate), add, subtract, move, and compare encoding.
+class T1General<bits<5> opcode> : Encoding16 {
+  let Inst{15-14} = 0b00;
+  let Inst{13-9} = opcode;
+}
+
+// A6.2.2 Data-processing encoding.
+class T1DataProcessing<bits<4> opcode> : Encoding16 {
+  let Inst{15-10} = 0b010000;
+  let Inst{9-6} = opcode;
+}
+
+// A6.2.3 Special data instructions and branch and exchange encoding.
+class T1Special<bits<4> opcode> : Encoding16 {
+  let Inst{15-10} = 0b010001;
+  let Inst{9-6}   = opcode;
+}
+
+// A6.2.4 Load/store single data item encoding.
+class T1LoadStore<bits<4> opA, bits<3> opB> : Encoding16 {
+  let Inst{15-12} = opA;
+  let Inst{11-9}  = opB;
+}
+class T1LdStSP<bits<3> opB>   : T1LoadStore<0b1001, opB>; // SP relative
+
+// Helper classes to encode Thumb1 loads and stores. For immediates, the
+// following bits are used for "opA" (see A6.2.4):
+//
+//   0b0110 => Immediate, 4 bytes
+//   0b1000 => Immediate, 2 bytes
+//   0b0111 => Immediate, 1 byte
+class T1pILdStEncode<bits<3> opcode, dag oops, dag iops, AddrMode am,
+                     InstrItinClass itin, string opc, string asm,
+                     list<dag> pattern>
+  : Thumb1pI<oops, iops, am, Size2Bytes, itin, opc, asm, "", pattern>,
+    T1LoadStore<0b0101, opcode> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{8-6} = addr{5-3};    // Rm
+  let Inst{5-3} = addr{2-0};    // Rn
+  let Inst{2-0} = Rt;
+}
+class T1pILdStEncodeImm<bits<4> opA, bit opB, dag oops, dag iops, AddrMode am,
+                        InstrItinClass itin, string opc, string asm,
+                        list<dag> pattern>
+  : Thumb1pI<oops, iops, am, Size2Bytes, itin, opc, asm, "", pattern>,
+    T1LoadStore<opA, {opB,?,?}> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-6} = addr{7-3};   // imm5
+  let Inst{5-3}  = addr{2-0};   // Rn
+  let Inst{2-0}  = Rt;
+}
+
+// A6.2.5 Miscellaneous 16-bit instructions encoding.
+class T1Misc<bits<7> opcode> : Encoding16 {
+  let Inst{15-12} = 0b1011;
+  let Inst{11-5} = opcode;
+}
+
+// Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable.
+class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+              InstrItinClass itin,
+              string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${p}", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb2];
+}
+
+// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as an
+// input operand since by default it's a zero register. It will become an
+// implicit def once it's "flipped".
+//
+// FIXME: This uses unified syntax so {s} comes before {p}. We should make it
+// more consistent.
+class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+               InstrItinClass itin,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  bits<1> s; // condition-code set flag ('1' if the insn should set the flags)
+  let Inst{20} = s;
+
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
+  let AsmString = !strconcat(opc, "${s}${p}", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb2];
+}
+
+// Special cases
+class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+               InstrItinClass itin,
+               string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString = asm;
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb2];
+}
+
+class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+              InstrItinClass itin,
+              string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString = asm;
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+class T2I<dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>;
+class T2Ii12<dag oops, dag iops, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_i12, Size4Bytes, itin, opc, asm, "",pattern>;
+class T2Ii8<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_i8, Size4Bytes, itin, opc, asm, "", pattern>;
+class T2Iso<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_so, Size4Bytes, itin, opc, asm, "", pattern>;
+class T2Ipc<dag oops, dag iops, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_pc, Size4Bytes, itin, opc, asm, "", pattern>;
+class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeT2_i8s4, Size4Bytes, itin, opc, asm, "",
+            pattern> {
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<13> addr;
+  let Inst{31-25} = 0b1110100;
+  let Inst{24}    = P;
+  let Inst{23}    = addr{8};
+  let Inst{22}    = 1;
+  let Inst{21}    = W;
+  let Inst{20}    = isLoad;
+  let Inst{19-16} = addr{12-9};
+  let Inst{15-12} = Rt{3-0};
+  let Inst{11-8}  = Rt2{3-0};
+  let Inst{7-0}   = addr{7-0};
+}
+
+class T2sI<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : Thumb2sI<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>;
+
+class T2XI<dag oops, dag iops, InstrItinClass itin,
+           string asm, list<dag> pattern>
+  : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>;
+class T2JTI<dag oops, dag iops, InstrItinClass itin,
+            string asm, list<dag> pattern>
+  : Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
+
+// Move to/from coprocessor instructions
+class T2Cop<dag oops, dag iops, string asm, list<dag> pattern>
+  : T2XI<oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2, HasV6]> {
+  let Inst{31-28} = 0b1111;
+}
+
+// Two-address instructions
+class T2XIt<dag oops, dag iops, InstrItinClass itin,
+            string asm, string cstr, list<dag> pattern>
+  : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, cstr, pattern>;
+
+// T2Iidxldst - Thumb2 indexed load / store instructions.
+class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
+                 dag oops, dag iops,
+                 AddrMode am, IndexMode im, InstrItinClass itin,
+                 string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${p}", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb2];
+  let Inst{31-27} = 0b11111;
+  let Inst{26-25} = 0b00;
+  let Inst{24}    = signed;
+  let Inst{23}    = 0;
+  let Inst{22-21} = opcod;
+  let Inst{20}    = load;
+  let Inst{11}    = 1;
+  // (P, W) = (1, 1) Pre-indexed or (0, 1) Post-indexed
+  let Inst{10}    = pre; // The P bit.
+  let Inst{8}     = 1; // The W bit.
+
+  bits<9> addr;
+  let Inst{7-0} = addr{7-0};
+  let Inst{9}   = addr{8}; // Sign bit
+
+  bits<4> Rt;
+  bits<4> Rn;
+  let Inst{15-12} = Rt{3-0};
+  let Inst{19-16} = Rn{3-0};
+}
+
+// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
+class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only, HasV5T];
+}
+
+// T1Pat - Same as Pat<>, but requires that the compiler be in Thumb1 mode.
+class T1Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+}
+
+// T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
+class T2Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb2];
+}
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM VFP Instruction templates.
+//
+
+// Almost all VFP instructions are predicable.
+class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+           IndexMode im, Format f, InstrItinClass itin,
+           string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+  bits<4> p;
+  let Inst{31-28} = p;
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${p}", asm);
+  let Pattern = pattern;
+  let PostEncoderMethod = "VFPThumb2PostEncoder";
+  list<Predicate> Predicates = [HasVFP2];
+}
+
+// Special cases
+class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+            IndexMode im, Format f, InstrItinClass itin,
+            string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
+  bits<4> p;
+  let Inst{31-28} = p;
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString = asm;
+  let Pattern = pattern;
+  let PostEncoderMethod = "VFPThumb2PostEncoder";
+  list<Predicate> Predicates = [HasVFP2];
+}
+
+class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin,
+            string opc, string asm, list<dag> pattern>
+  : VFPI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+         opc, asm, "", pattern> {
+  let PostEncoderMethod = "VFPThumb2PostEncoder";
+}
+
+// ARM VFP addrmode5 loads and stores
+class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+           InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+         VFPLdStFrm, itin, opc, asm, "", pattern> {
+  // Instruction operands.
+  bits<5>  Dd;
+  bits<13> addr;
+
+  // Encode instruction operands.
+  let Inst{23}    = addr{8};      // U (add = (U == '1'))
+  let Inst{22}    = Dd{4};
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{15-12} = Dd{3-0};
+  let Inst{7-0}   = addr{7-0};    // imm8
+
+  // TODO: Mark the instructions with the appropriate subtarget info.
+  let Inst{27-24} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 1;          // Double precision
+
+  // Loads & stores operate on both NEON and VFP pipelines.
+  let D = VFPNeonDomain;
+}
+
+class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
+           InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+         VFPLdStFrm, itin, opc, asm, "", pattern> {
+  // Instruction operands.
+  bits<5>  Sd;
+  bits<13> addr;
+
+  // Encode instruction operands.
+  let Inst{23}    = addr{8};      // U (add = (U == '1'))
+  let Inst{22}    = Sd{0};
+  let Inst{19-16} = addr{12-9};   // Rn
+  let Inst{15-12} = Sd{4-1};
+  let Inst{7-0}   = addr{7-0};    // imm8
+
+  // TODO: Mark the instructions with the appropriate subtarget info.
+  let Inst{27-24} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 0;          // Single precision
+
+  // Loads & stores operate on both NEON and VFP pipelines.
+  let D = VFPNeonDomain;
+}
+
+// VFP Load / store multiple pseudo instructions.
+class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
+                     list<dag> pattern>
+  : InstARM<AddrMode4, Size4Bytes, IndexModeNone, Pseudo, VFPNeonDomain,
+            cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [HasVFP2];
+}
+
+// Load / store multiple
+class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+            string asm, string cstr, list<dag> pattern>
+  : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
+          VFPLdStMulFrm, itin, asm, cstr, pattern> {
+  // Instruction operands.
+  bits<4>  Rn;
+  bits<13> regs;
+
+  // Encode instruction operands.
+  let Inst{19-16} = Rn;
+  let Inst{22}    = regs{12};
+  let Inst{15-12} = regs{11-8};
+  let Inst{7-0}   = regs{7-0};
+
+  // TODO: Mark the instructions with the appropriate subtarget info.
+  let Inst{27-25} = 0b110;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 1;          // Double precision
+}
+
+class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
+            string asm, string cstr, list<dag> pattern>
+  : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
+          VFPLdStMulFrm, itin, asm, cstr, pattern> {
+  // Instruction operands.
+  bits<4> Rn;
+  bits<13> regs;
+
+  // Encode instruction operands.
+  let Inst{19-16} = Rn;
+  let Inst{22}    = regs{8};
+  let Inst{15-12} = regs{12-9};
+  let Inst{7-0}   = regs{7-0};
+
+  // TODO: Mark the instructions with the appropriate subtarget info.
+  let Inst{27-25} = 0b110;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 0;          // Single precision
+}
+
+// Double precision, unary
+class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+           bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+           string asm, list<dag> pattern>
+  : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+  // Instruction operands.
+  bits<5> Dd;
+  bits<5> Dm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
+
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{19-16} = opcod3;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 1;          // Double precision
+  let Inst{7-6}   = opcod4;
+  let Inst{4}     = opcod5;
+}
+
+// Double precision, binary
+class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+           dag iops, InstrItinClass itin, string opc, string asm,
+           list<dag> pattern>
+  : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+  // Instruction operands.
+  bits<5> Dd;
+  bits<5> Dn;
+  bits<5> Dm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{19-16} = Dn{3-0};
+  let Inst{7}     = Dn{4};
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
+
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 1;          // Double precision
+  let Inst{6}     = op6;
+  let Inst{4}     = op4;
+}
+
+// Single precision, unary
+class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+           bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+           string asm, list<dag> pattern>
+  : VFPAI<oops, iops, VFPUnaryFrm, itin, opc, asm, pattern> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{19-16} = opcod3;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 0;          // Single precision
+  let Inst{7-6}   = opcod4;
+  let Inst{4}     = opcod5;
+}
+
+// Single precision unary, if no NEON. Same as ASuI except not available if
+// NEON is enabled.
+class ASuIn<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
+            bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
+            string asm, list<dag> pattern>
+  : ASuI<opcod1, opcod2, opcod3, opcod4, opcod5, oops, iops, itin, opc, asm,
+         pattern> {
+  list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
+// Single precision, binary
+class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
+           InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Sn;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{19-16} = Sn{4-1};
+  let Inst{7}     = Sn{0};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 0;          // Single precision
+  let Inst{6}     = op6;
+  let Inst{4}     = op4;
+}
+
+// Single precision binary, if no NEON. Same as ASbI except not available if
+// NEON is enabled.
+class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+            dag iops, InstrItinClass itin, string opc, string asm,
+            list<dag> pattern>
+  : ASbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
+  list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Sn;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{19-16} = Sn{4-1};
+  let Inst{7}     = Sn{0};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+}
+
+// VFP conversion instructions
+class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
+               dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+               list<dag> pattern>
+  : VFPAI<oops, iops, VFPConv1Frm, itin, opc, asm, pattern> {
+  let Inst{27-23} = opcod1;
+  let Inst{21-20} = opcod2;
+  let Inst{19-16} = opcod3;
+  let Inst{11-8}  = opcod4;
+  let Inst{6}     = 1;
+  let Inst{4}     = 0;
+}
+
+// VFP conversion between floating-point and fixed-point
+class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
+                dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+                list<dag> pattern>
+  : AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> {
+  // size (fixed-point number): sx == 0 ? 16 : 32
+  let Inst{7} = op5; // sx
+}
+
+// VFP conversion instructions, if no NEON
+class AVConv1In<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
+                dag oops, dag iops, InstrItinClass itin,
+                string opc, string asm, list<dag> pattern>
+  : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+             pattern> {
+  list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
+}
+
+class AVConvXI<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, Format f,
+               InstrItinClass itin,
+               string opc, string asm, list<dag> pattern>
+  : VFPAI<oops, iops, f, itin, opc, asm, pattern> {
+  let Inst{27-20} = opcod1;
+  let Inst{11-8}  = opcod2;
+  let Inst{4}     = 1;
+}
+
+class AVConv2I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+               InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv2Frm, itin, opc, asm, pattern>;
+
+class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+               InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv3Frm, itin, opc, asm, pattern>;
+
+class AVConv4I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+               InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv4Frm, itin, opc, asm, pattern>;
+
+class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+               InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : AVConvXI<opcod1, opcod2, oops, iops, VFPConv5Frm, itin, opc, asm, pattern>;
+
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM NEON Instruction templates.
+//
+
+class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+            InstrItinClass itin, string opc, string dt, string asm, string cstr,
+            list<dag> pattern>
+  : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [HasNEON];
+}
+
+// Same as NeonI except it does not have a "data type" specifier.
+class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+             InstrItinClass itin, string opc, string asm, string cstr,
+             list<dag> pattern>
+  : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${p}", "\t", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [HasNEON];
+}
+
+class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+            dag oops, dag iops, InstrItinClass itin,
+            string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm,
+          cstr, pattern> {
+  let Inst{31-24} = 0b11110100;
+  let Inst{23}    = op23;
+  let Inst{21-20} = op21_20;
+  let Inst{11-8}  = op11_8;
+  let Inst{7-4}   = op7_4;
+
+  let PostEncoderMethod = "NEONThumb2LoadStorePostEncoder";
+
+  bits<5> Vd;
+  bits<6> Rn;
+  bits<4> Rm;
+
+  let Inst{22}    = Vd{4};
+  let Inst{15-12} = Vd{3-0};
+  let Inst{19-16} = Rn{3-0};
+  let Inst{3-0}   = Rm{3-0};
+}
+
+class NLdStLn<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
+            dag oops, dag iops, InstrItinClass itin,
+            string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NLdSt<op23, op21_20, op11_8, op7_4, oops, iops, itin, opc,
+          dt, asm, cstr, pattern> {
+  bits<3> lane;
+}
+
+class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
+  : InstARM<AddrMode6, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+            itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  list<Predicate> Predicates = [HasNEON];
+}
+
+class PseudoNeonI<dag oops, dag iops, InstrItinClass itin, string cstr,
+                  list<dag> pattern>
+  : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+            itin> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [HasNEON];
+}
+
+class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr,
+          pattern> {
+  let Inst{31-25} = 0b1111001;
+  let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+}
+
+class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
+              string opc, string asm, string cstr, list<dag> pattern>
+  : NeonXI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, asm,
+           cstr, pattern> {
+  let Inst{31-25} = 0b1111001;
+  let PostEncoderMethod = "NEONThumb2DataIPostEncoder";
+}
+
+// NEON "one register and a modified immediate" format.
+class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
+               bit op5, bit op4,
+               dag oops, dag iops, InstrItinClass itin,
+               string opc, string dt, string asm, string cstr,
+               list<dag> pattern>
+  : NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> {
+  let Inst{23}    = op23;
+  let Inst{21-19} = op21_19;
+  let Inst{11-8}  = op11_8;
+  let Inst{7}     = op7;
+  let Inst{6}     = op6;
+  let Inst{5}     = op5;
+  let Inst{4}     = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<13> SIMM;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{24}    = SIMM{7};
+  let Inst{18-16} = SIMM{6-4};
+  let Inst{3-0}   = SIMM{3-0};
+}
+
+// NEON 2 vector register format.
+class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
+          bits<5> op11_7, bit op6, bit op4,
+          dag oops, dag iops, InstrItinClass itin,
+          string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NDataI<oops, iops, N2RegFrm, itin, opc, dt, asm, cstr, pattern> {
+  let Inst{24-23} = op24_23;
+  let Inst{21-20} = op21_20;
+  let Inst{19-18} = op19_18;
+  let Inst{17-16} = op17_16;
+  let Inst{11-7}  = op11_7;
+  let Inst{6}     = op6;
+  let Inst{4}     = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vm;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = Vm{4};
+}
+
+// Same as N2V except it doesn't have a datatype suffix.
+class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
+           bits<5> op11_7, bit op6, bit op4,
+           dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, string cstr, list<dag> pattern>
+  : NDataXI<oops, iops, N2RegFrm, itin, opc, asm, cstr, pattern> {
+  let Inst{24-23} = op24_23;
+  let Inst{21-20} = op21_20;
+  let Inst{19-18} = op19_18;
+  let Inst{17-16} = op17_16;
+  let Inst{11-7}  = op11_7;
+  let Inst{6}     = op6;
+  let Inst{4}     = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vm;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = Vm{4};
+}
+
+// NEON 2 vector register with immediate.
+class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+             dag oops, dag iops, Format f, InstrItinClass itin,
+             string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+  let Inst{24}   = op24;
+  let Inst{23}   = op23;
+  let Inst{11-8} = op11_8;
+  let Inst{7}    = op7;
+  let Inst{6}    = op6;
+  let Inst{4}    = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vm;
+  bits<6> SIMM;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = Vm{4};
+  let Inst{21-16} = SIMM{5-0};
+}
+
+// NEON 3 vector register format.
+class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
+          dag oops, dag iops, Format f, InstrItinClass itin,
+          string opc, string dt, string asm, string cstr, list<dag> pattern>
+  : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
+  let Inst{24}    = op24;
+  let Inst{23}    = op23;
+  let Inst{21-20} = op21_20;
+  let Inst{11-8}  = op11_8;
+  let Inst{6}     = op6;
+  let Inst{4}     = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vn;
+  bits<5> Vm;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{19-16} = Vn{3-0};
+  let Inst{7}     = Vn{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = Vm{4};
+}
+
+// Same as N3V except it doesn't have a data type suffix.
+class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+           bit op4,
+           dag oops, dag iops, Format f, InstrItinClass itin,
+           string opc, string asm, string cstr, list<dag> pattern>
+  : NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> {
+  let Inst{24}    = op24;
+  let Inst{23}    = op23;
+  let Inst{21-20} = op21_20;
+  let Inst{11-8}  = op11_8;
+  let Inst{6}     = op6;
+  let Inst{4}     = op4;
+
+  // Instruction operands.
+  bits<5> Vd;
+  bits<5> Vn;
+  bits<5> Vm;
+
+  let Inst{15-12} = Vd{3-0};
+  let Inst{22}    = Vd{4};
+  let Inst{19-16} = Vn{3-0};
+  let Inst{7}     = Vn{4};
+  let Inst{3-0}   = Vm{3-0};
+  let Inst{5}     = Vm{4};
+}
+
+// NEON VMOVs between scalar and core registers.
+class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+               dag oops, dag iops, Format f, InstrItinClass itin,
+               string opc, string dt, string asm, list<dag> pattern>
+  : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, NeonDomain,
+            "", itin> {
+  let Inst{27-20} = opcod1;
+  let Inst{11-8}  = opcod2;
+  let Inst{6-5}   = opcod3;
+  let Inst{4}     = 1;
+
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ins pred:$p));
+  let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
+  let Pattern = pattern;
+  list<Predicate> Predicates = [HasNEON];
+
+  let PostEncoderMethod = "NEONThumb2DupPostEncoder";
+
+  bits<5> V;
+  bits<4> R;
+  bits<4> p;
+  bits<4> lane;
+
+  let Inst{31-28} = p{3-0};
+  let Inst{7}     = V{4};
+  let Inst{19-16} = V{3-0};
+  let Inst{15-12} = R{3-0};
+}
+class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+                dag oops, dag iops, InstrItinClass itin,
+                string opc, string dt, string asm, list<dag> pattern>
+  : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NGetLnFrm, itin,
+             opc, dt, asm, pattern>;
+class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+                dag oops, dag iops, InstrItinClass itin,
+                string opc, string dt, string asm, list<dag> pattern>
+  : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NSetLnFrm, itin,
+             opc, dt, asm, pattern>;
+class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
+            dag oops, dag iops, InstrItinClass itin,
+            string opc, string dt, string asm, list<dag> pattern>
+  : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NDupFrm, itin,
+             opc, dt, asm, pattern>;
+
+// Vector Duplicate Lane (from scalar to all elements)
+class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
+                InstrItinClass itin, string opc, string dt, string asm,
+                list<dag> pattern>
+  : NDataI<oops, iops, NVDupLnFrm, itin, opc, dt, asm, "", pattern> {
+  let Inst{24-23} = 0b11;
+  let Inst{21-20} = 0b11;
+  let Inst{19-16} = op19_16;
+  let Inst{11-7}  = 0b11000;
+  let Inst{6}     = op6;
+  let Inst{4}     = 0;
+
+  bits<5> Vd;
+  bits<5> Vm;
+  bits<4> lane;
+
+  let Inst{22}     = Vd{4};
+  let Inst{15-12} = Vd{3-0};
+  let Inst{5}     = Vm{4};
+  let Inst{3-0} = Vm{3-0};
+}
+
+// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
+// for single-precision FP.
+class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [HasNEON,UseNEONForFP];
+}
diff --git a/final/lib/Target/ARM/ARMInstrInfo.cpp b/final/lib/Target/ARM/ARMInstrInfo.cpp
new file mode 100644
index 00000000000..6f48d967f91
--- /dev/null
+++ b/final/lib/Target/ARM/ARMInstrInfo.cpp
@@ -0,0 +1,61 @@
+//===- ARMInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstrInfo.h"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+using namespace llvm;
+
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+  : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
+  switch (Opc) {
+  default: break;
+  case ARM::LDR_PRE:
+  case ARM::LDR_POST:
+    return ARM::LDRi12;
+  case ARM::LDRH_PRE:
+  case ARM::LDRH_POST:
+    return ARM::LDRH;
+  case ARM::LDRB_PRE:
+  case ARM::LDRB_POST:
+    return ARM::LDRBi12;
+  case ARM::LDRSH_PRE:
+  case ARM::LDRSH_POST:
+    return ARM::LDRSH;
+  case ARM::LDRSB_PRE:
+  case ARM::LDRSB_POST:
+    return ARM::LDRSB;
+  case ARM::STR_PRE:
+  case ARM::STR_POST:
+    return ARM::STRi12;
+  case ARM::STRH_PRE:
+  case ARM::STRH_POST:
+    return ARM::STRH;
+  case ARM::STRB_PRE:
+  case ARM::STRB_POST:
+    return ARM::STRBi12;
+  }
+
+  return 0;
+}
diff --git a/final/lib/Target/ARM/ARMInstrInfo.h b/final/lib/Target/ARM/ARMInstrInfo.h
new file mode 100644
index 00000000000..f2c7bdc31be
--- /dev/null
+++ b/final/lib/Target/ARM/ARMInstrInfo.h
@@ -0,0 +1,44 @@
+//===- ARMInstrInfo.h - ARM Instruction Information -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTRUCTIONINFO_H
+#define ARMINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "ARM.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+class ARMInstrInfo : public ARMBaseInstrInfo {
+  ARMRegisterInfo RI;
+public:
+  explicit ARMInstrInfo(const ARMSubtarget &STI);
+
+  // Return the non-pre/post incrementing version of 'Opc'. Return 0
+  // if there is not such an opcode.
+  unsigned getUnindexedOpcode(unsigned Opc) const;
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const ARMRegisterInfo &getRegisterInfo() const { return RI; }
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/ARM/ARMInstrInfo.td b/final/lib/Target/ARM/ARMInstrInfo.td
new file mode 100644
index 00000000000..e0de65b865f
--- /dev/null
+++ b/final/lib/Target/ARM/ARMInstrInfo.td
@@ -0,0 +1,3940 @@
+//===- ARMInstrInfo.td - Target Description for ARM Target -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// ARM specific DAG Nodes.
+//
+
+// Type profiles.
+def SDT_ARMCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_ARMCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>;
+
+def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
+
+def SDT_ARMcall    : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMCMov    : SDTypeProfile<1, 3,
+                                   [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+                                    SDTCisVT<3, i32>]>;
+
+def SDT_ARMBrcond  : SDTypeProfile<0, 2,
+                                   [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+
+def SDT_ARMBrJT    : SDTypeProfile<0, 3,
+                                  [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+                                   SDTCisVT<2, i32>]>;
+
+def SDT_ARMBr2JT   : SDTypeProfile<0, 4,
+                                  [SDTCisPtrTy<0>, SDTCisVT<1, i32>,
+                                   SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
+def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
+                                  [SDTCisVT<0, i32>,
+                                   SDTCisVT<1, i32>, SDTCisVT<2, i32>,
+                                   SDTCisVT<3, i32>, SDTCisVT<4, i32>,
+                                   SDTCisVT<5, OtherVT>]>;
+
+def SDT_ARMAnd     : SDTypeProfile<1, 2,
+                                   [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+                                    SDTCisVT<2, i32>]>;
+
+def SDT_ARMCmp     : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+
+def SDT_ARMPICAdd  : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+                                          SDTCisPtrTy<1>, SDTCisVT<2, i32>]>;
+
+def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
+def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
+                                                 SDTCisInt<2>]>;
+def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
+
+def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMMEMBARRIER     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+                                      SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
+// Node definitions.
+def ARMWrapper       : SDNode<"ARMISD::Wrapper",     SDTIntUnaryOp>;
+def ARMWrapperDYN    : SDNode<"ARMISD::WrapperDYN",  SDTIntUnaryOp>;
+def ARMWrapperPIC    : SDNode<"ARMISD::WrapperPIC",  SDTIntUnaryOp>;
+def ARMWrapperJT     : SDNode<"ARMISD::WrapperJT",   SDTIntBinOp>;
+
+def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart,
+                              [SDNPHasChain, SDNPOutGlue]>;
+def ARMcallseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_ARMCallSeqEnd,
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def ARMcall          : SDNode<"ARMISD::CALL", SDT_ARMcall,
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                               SDNPVariadic]>;
+def ARMcall_pred    : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall,
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                               SDNPVariadic]>;
+def ARMcall_nolink   : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                               SDNPVariadic]>;
+
+def ARMretflag       : SDNode<"ARMISD::RET_FLAG", SDTNone,
+                              [SDNPHasChain, SDNPOptInGlue]>;
+
+def ARMcmov          : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
+                              [SDNPInGlue]>;
+
+def ARMbrcond        : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond,
+                              [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
+def ARMbrjt          : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
+                              [SDNPHasChain]>;
+def ARMbr2jt         : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
+                              [SDNPHasChain]>;
+
+def ARMBcci64        : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
+                              [SDNPHasChain]>;
+
+def ARMcmp           : SDNode<"ARMISD::CMP", SDT_ARMCmp,
+                              [SDNPOutGlue]>;
+
+def ARMcmpZ          : SDNode<"ARMISD::CMPZ", SDT_ARMCmp,
+                              [SDNPOutGlue, SDNPCommutative]>;
+
+def ARMpic_add       : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>;
+
+def ARMsrl_flag      : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMsra_flag      : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMrrx           : SDNode<"ARMISD::RRX"     , SDTIntUnaryOp, [SDNPInGlue ]>;
+
+def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
+def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
+                               SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
+def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
+                               SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
+def ARMeh_sjlj_dispatchsetup: SDNode<"ARMISD::EH_SJLJ_DISPATCHSETUP",
+                               SDT_ARMEH_SJLJ_DispatchSetup, [SDNPHasChain]>;
+
+
+def ARMMemBarrier     : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
+                               [SDNPHasChain]>;
+def ARMMemBarrierMCR  : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
+                               [SDNPHasChain]>;
+def ARMPreload        : SDNode<"ARMISD::PRELOAD", SDTPrefetch,
+                               [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+
+def ARMrbit          : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
+
+def ARMtcret         : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
+                        [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
+
+
+def ARMbfi           : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
+
+//===----------------------------------------------------------------------===//
+// ARM Instruction Predicate Definitions.
+//
+def HasV4T           : Predicate<"Subtarget->hasV4TOps()">, AssemblerPredicate;
+def NoV4T            : Predicate<"!Subtarget->hasV4TOps()">;
+def HasV5T           : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE          : Predicate<"Subtarget->hasV5TEOps()">, AssemblerPredicate;
+def HasV6            : Predicate<"Subtarget->hasV6Ops()">, AssemblerPredicate;
+def NoV6             : Predicate<"!Subtarget->hasV6Ops()">;
+def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">, AssemblerPredicate;
+def NoV6T2           : Predicate<"!Subtarget->hasV6T2Ops()">;
+def HasV7            : Predicate<"Subtarget->hasV7Ops()">, AssemblerPredicate;
+def NoVFP            : Predicate<"!Subtarget->hasVFP2()">;
+def HasVFP2          : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate;
+def HasVFP3          : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate;
+def HasNEON          : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate;
+def HasFP16          : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate;
+def HasDivide        : Predicate<"Subtarget->hasDivide()">, AssemblerPredicate;
+def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
+                                 AssemblerPredicate;
+def HasDB            : Predicate<"Subtarget->hasDataBarrier()">,
+                                 AssemblerPredicate;
+def HasMP            : Predicate<"Subtarget->hasMPExtension()">,
+                                 AssemblerPredicate;
+def UseNEONForFP     : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
+def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
+def IsThumb          : Predicate<"Subtarget->isThumb()">, AssemblerPredicate;
+def IsThumb1Only     : Predicate<"Subtarget->isThumb1Only()">;
+def IsThumb2         : Predicate<"Subtarget->isThumb2()">, AssemblerPredicate;
+def IsARM            : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate;
+def IsDarwin         : Predicate<"Subtarget->isTargetDarwin()">;
+def IsNotDarwin      : Predicate<"!Subtarget->isTargetDarwin()">;
+
+// FIXME: Eventually this will be just "hasV6T2Ops".
+def UseMovt          : Predicate<"Subtarget->useMovt()">;
+def DontUseMovt      : Predicate<"!Subtarget->useMovt()">;
+def UseFPVMLx        : Predicate<"Subtarget->useFPVMLx()">;
+
+//===----------------------------------------------------------------------===//
+// ARM Flag Definitions.
+
+class RegConstraint<string C> {
+  string Constraints = C;
+}
+
+//===----------------------------------------------------------------------===//
+//  ARM specific transformation functions and pattern fragments.
+//
+
+// so_imm_neg_XFORM - Return a so_imm value packed into the format described for
+// so_imm_neg def below.
+def so_imm_neg_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
+}]>;
+
+// so_imm_not_XFORM - Return a so_imm value packed into the format described for
+// so_imm_not def below.
+def so_imm_not_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(~(int)N->getZExtValue(), MVT::i32);
+}]>;
+
+/// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15].
+def imm1_15 : PatLeaf<(i32 imm), [{
+  return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 16;
+}]>;
+
+/// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31].
+def imm16_31 : PatLeaf<(i32 imm), [{
+  return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32;
+}]>;
+
+def so_imm_neg :
+  PatLeaf<(imm), [{
+    return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1;
+  }], so_imm_neg_XFORM>;
+
+def so_imm_not :
+  PatLeaf<(imm), [{
+    return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1;
+  }], so_imm_not_XFORM>;
+
+// sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
+def sext_16_node : PatLeaf<(i32 GPR:$a), [{
+  return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+}]>;
+
+/// Split a 32-bit immediate into two 16 bit parts.
+def hi16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((uint32_t)N->getZExtValue() >> 16, MVT::i32);
+}]>;
+
+def lo16AllZero : PatLeaf<(i32 imm), [{
+  // Returns true if all low 16-bits are 0.
+  return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
+}], hi16>;
+
+/// imm0_65535 predicate - True if the 32-bit immediate is in the range
+/// [0.65535].
+def imm0_65535 : PatLeaf<(i32 imm), [{
+  return (uint32_t)N->getZExtValue() < 65536;
+}]>;
+
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
+
+/// adde and sube predicates - True based on whether the carry flag output
+/// will be needed or not.
+def adde_dead_carry :
+  PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
+  [{return !N->hasAnyUseOfValue(1);}]>;
+def sube_dead_carry :
+  PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
+  [{return !N->hasAnyUseOfValue(1);}]>;
+def adde_live_carry :
+  PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
+  [{return N->hasAnyUseOfValue(1);}]>;
+def sube_live_carry :
+  PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
+  [{return N->hasAnyUseOfValue(1);}]>;
+
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+  return N->hasOneUse();
+}]>;
+
+// An 'xor' node with a single use.
+def xor_su : PatFrag<(ops node:$lhs, node:$rhs), (xor node:$lhs, node:$rhs), [{
+  return N->hasOneUse();
+}]>;
+
+// An 'fmul' node with a single use.
+def fmul_su : PatFrag<(ops node:$lhs, node:$rhs), (fmul node:$lhs, node:$rhs),[{
+  return N->hasOneUse();
+}]>;
+
+// An 'fadd' node which checks for single non-hazardous use.
+def fadd_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{
+  return hasNoVMLxHazardUse(N);
+}]>;
+
+// An 'fsub' node which checks for single non-hazardous use.
+def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
+  return hasNoVMLxHazardUse(N);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+// Branch target.
+// FIXME: rename brtarget to t2_brtarget
+def brtarget : Operand<OtherVT> {
+  let EncoderMethod = "getBranchTargetOpValue";
+}
+
+// FIXME: get rid of this one?
+def uncondbrtarget : Operand<OtherVT> {
+  let EncoderMethod = "getUnconditionalBranchTargetOpValue";
+}
+
+// Branch target for ARM. Handles conditional/unconditional
+def br_target : Operand<OtherVT> {
+  let EncoderMethod = "getARMBranchTargetOpValue";
+}
+
+// Call target.
+// FIXME: rename bltarget to t2_bl_target?
+def bltarget : Operand<i32> {
+  // Encoded the same as branch targets.
+  let EncoderMethod = "getBranchTargetOpValue";
+}
+
+// Call target for ARM. Handles conditional/unconditional
+// FIXME: rename bl_target to t2_bltarget?
+def bl_target : Operand<i32> {
+  // Encoded the same as branch targets.
+  let EncoderMethod = "getARMBranchTargetOpValue";
+}
+
+
+// A list of registers separated by comma. Used by load/store multiple.
+def RegListAsmOperand : AsmOperandClass {
+  let Name = "RegList";
+  let SuperClasses = [];
+}
+
+def DPRRegListAsmOperand : AsmOperandClass {
+  let Name = "DPRRegList";
+  let SuperClasses = [];
+}
+
+def SPRRegListAsmOperand : AsmOperandClass {
+  let Name = "SPRRegList";
+  let SuperClasses = [];
+}
+
+def reglist : Operand<i32> {
+  let EncoderMethod = "getRegisterListOpValue";
+  let ParserMatchClass = RegListAsmOperand;
+  let PrintMethod = "printRegisterList";
+}
+
+def dpr_reglist : Operand<i32> {
+  let EncoderMethod = "getRegisterListOpValue";
+  let ParserMatchClass = DPRRegListAsmOperand;
+  let PrintMethod = "printRegisterList";
+}
+
+def spr_reglist : Operand<i32> {
+  let EncoderMethod = "getRegisterListOpValue";
+  let ParserMatchClass = SPRRegListAsmOperand;
+  let PrintMethod = "printRegisterList";
+}
+
+// An operand for the CONSTPOOL_ENTRY pseudo-instruction.
+def cpinst_operand : Operand<i32> {
+  let PrintMethod = "printCPInstOperand";
+}
+
+// Local PC labels.
+def pclabel : Operand<i32> {
+  let PrintMethod = "printPCLabel";
+}
+
+// ADR instruction labels.
+def adrlabel : Operand<i32> {
+  let EncoderMethod = "getAdrLabelOpValue";
+}
+
+def neon_vcvt_imm32 : Operand<i32> {
+  let EncoderMethod = "getNEONVcvtImm32OpValue";
+}
+
+// rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24.
+def rot_imm : Operand<i32>, PatLeaf<(i32 imm), [{
+    int32_t v = (int32_t)N->getZExtValue();
+    return v == 8 || v == 16 || v == 24; }]> {
+  let EncoderMethod = "getRotImmOpValue";
+}
+
+// shift_imm: An integer that encodes a shift amount and the type of shift
+// (currently either asr or lsl) using the same encoding used for the
+// immediates in so_reg operands.
+def shift_imm : Operand<i32> {
+  let PrintMethod = "printShiftImmOperand";
+}
+
+// shifter_operand operands: so_reg and so_imm.
+def so_reg : Operand<i32>,    // reg reg imm
+             ComplexPattern<i32, 3, "SelectShifterOperandReg",
+                            [shl,srl,sra,rotr]> {
+  let EncoderMethod = "getSORegOpValue";
+  let PrintMethod = "printSORegOperand";
+  let MIOperandInfo = (ops GPR, GPR, i32imm);
+}
+def shift_so_reg : Operand<i32>,    // reg reg imm
+                   ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
+                                  [shl,srl,sra,rotr]> {
+  let EncoderMethod = "getSORegOpValue";
+  let PrintMethod = "printSORegOperand";
+  let MIOperandInfo = (ops GPR, GPR, i32imm);
+}
+
+// so_imm - Match a 32-bit shifter_operand immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits.
+def so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> {
+  let EncoderMethod = "getSOImmOpValue";
+  let PrintMethod = "printSOImmOperand";
+}
+
+// Break so_imm's up into two pieces.  This handles immediates with up to 16
+// bits set in them.  This uses so_imm2part to match and so_imm2part_[12] to
+// get the first/second pieces.
+def so_imm2part : PatLeaf<(imm), [{
+      return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
+}]>;
+
+/// arm_i32imm - True for +V6T2, or true only if so_imm2part is true.
+///
+def arm_i32imm : PatLeaf<(imm), [{
+  if (Subtarget->hasV6T2Ops())
+    return true;
+  return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
+}]>;
+
+/// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
+def imm0_31 : Operand<i32>, PatLeaf<(imm), [{
+  return (int32_t)N->getZExtValue() < 32;
+}]>;
+
+/// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'.
+def imm0_31_m1 : Operand<i32>, PatLeaf<(imm), [{
+  return (int32_t)N->getZExtValue() < 32;
+}]> {
+  let EncoderMethod = "getImmMinusOneOpValue";
+}
+
+// i32imm_hilo16 - For movt/movw - sets the MC Encoder method.
+// The imm is split into imm{15-12}, imm{11-0}
+//
+def i32imm_hilo16 : Operand<i32> {
+  let EncoderMethod = "getHiLo16ImmOpValue";
+}
+
+/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
+/// e.g., 0xf000ffff
+def bf_inv_mask_imm : Operand<i32>,
+                      PatLeaf<(imm), [{
+  return ARM::isBitFieldInvertedMask(N->getZExtValue());
+}] > {
+  let EncoderMethod = "getBitfieldInvertedMaskOpValue";
+  let PrintMethod = "printBitfieldInvMaskImmOperand";
+}
+
+/// lsb_pos_imm - position of the lsb bit, used by BFI4p and t2BFI4p
+def lsb_pos_imm : Operand<i32>, PatLeaf<(imm), [{
+  return isInt<5>(N->getSExtValue());
+}]>;
+
+/// width_imm - number of bits to be copied, used by BFI4p and t2BFI4p
+def width_imm : Operand<i32>, PatLeaf<(imm), [{
+  return N->getSExtValue() > 0 &&  N->getSExtValue() <= 32;
+}] > {
+  let EncoderMethod = "getMsbOpValue";
+}
+
+// Define ARM specific addressing modes.
+
+
+// addrmode_imm12 := reg +/- imm12
+//
+def addrmode_imm12 : Operand<i32>,
+                     ComplexPattern<i32, 2, "SelectAddrModeImm12", []> {
+  // 12-bit immediate operand. Note that instructions using this encode
+  // #0 and #-0 differently. We flag #-0 as the magic value INT32_MIN. All other
+  // immediate values are as normal.
+
+  let EncoderMethod = "getAddrModeImm12OpValue";
+  let PrintMethod = "printAddrModeImm12Operand";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+}
+// ldst_so_reg := reg +/- reg shop imm
+//
+def ldst_so_reg : Operand<i32>,
+                  ComplexPattern<i32, 3, "SelectLdStSOReg", []> {
+  let EncoderMethod = "getLdStSORegOpValue";
+  // FIXME: Simplify the printer
+  let PrintMethod = "printAddrMode2Operand";
+  let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+// addrmode2 := reg +/- imm12
+//           := reg +/- reg shop imm
+//
+def addrmode2 : Operand<i32>,
+                ComplexPattern<i32, 3, "SelectAddrMode2", []> {
+  let EncoderMethod = "getAddrMode2OpValue";
+  let PrintMethod = "printAddrMode2Operand";
+  let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def am2offset : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode2Offset",
+                [], [SDNPWantRoot]> {
+  let EncoderMethod = "getAddrMode2OffsetOpValue";
+  let PrintMethod = "printAddrMode2OffsetOperand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// addrmode3 := reg +/- reg
+// addrmode3 := reg +/- imm8
+//
+def addrmode3 : Operand<i32>,
+                ComplexPattern<i32, 3, "SelectAddrMode3", []> {
+  let EncoderMethod = "getAddrMode3OpValue";
+  let PrintMethod = "printAddrMode3Operand";
+  let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm);
+}
+
+def am3offset : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode3Offset",
+                               [], [SDNPWantRoot]> {
+  let EncoderMethod = "getAddrMode3OffsetOpValue";
+  let PrintMethod = "printAddrMode3OffsetOperand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
+
+// ldstm_mode := {ia, ib, da, db}
+//
+def ldstm_mode : OptionalDefOperand<OtherVT, (ops i32), (ops (i32 1))> {
+  let EncoderMethod = "getLdStmModeOpValue";
+  let PrintMethod = "printLdStmModeOperand";
+}
+
+def MemMode5AsmOperand : AsmOperandClass {
+  let Name = "MemMode5";
+  let SuperClasses = [];
+}
+
+// addrmode5 := reg +/- imm8*4
+//
+def addrmode5 : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode5", []> {
+  let PrintMethod = "printAddrMode5Operand";
+  let MIOperandInfo = (ops GPR:$base, i32imm);
+  let ParserMatchClass = MemMode5AsmOperand;
+  let EncoderMethod = "getAddrMode5OpValue";
+}
+
+// addrmode6 := reg with optional alignment
+//
+def addrmode6 : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+  let PrintMethod = "printAddrMode6Operand";
+  let MIOperandInfo = (ops GPR:$addr, i32imm);
+  let EncoderMethod = "getAddrMode6AddressOpValue";
+}
+
+def am6offset : Operand<i32>,
+                ComplexPattern<i32, 1, "SelectAddrMode6Offset",
+                               [], [SDNPWantRoot]> {
+  let PrintMethod = "printAddrMode6OffsetOperand";
+  let MIOperandInfo = (ops GPR);
+  let EncoderMethod = "getAddrMode6OffsetOpValue";
+}
+
+// Special version of addrmode6 to handle alignment encoding for VLD-dup
+// instructions, specifically VLD4-dup.
+def addrmode6dup : Operand<i32>,
+                ComplexPattern<i32, 2, "SelectAddrMode6", [], [SDNPWantParent]>{
+  let PrintMethod = "printAddrMode6Operand";
+  let MIOperandInfo = (ops GPR:$addr, i32imm);
+  let EncoderMethod = "getAddrMode6DupAddressOpValue";
+}
+
+// addrmodepc := pc + reg
+//
+def addrmodepc : Operand<i32>,
+                 ComplexPattern<i32, 2, "SelectAddrModePC", []> {
+  let PrintMethod = "printAddrModePCOperand";
+  let MIOperandInfo = (ops GPR, i32imm);
+}
+
+def nohash_imm : Operand<i32> {
+  let PrintMethod = "printNoHashImmediate";
+}
+
+def CoprocNumAsmOperand : AsmOperandClass {
+  let Name = "CoprocNum";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseCoprocNumOperand";
+}
+
+def CoprocRegAsmOperand : AsmOperandClass {
+  let Name = "CoprocReg";
+  let SuperClasses = [];
+  let ParserMethod = "tryParseCoprocRegOperand";
+}
+
+def p_imm : Operand<i32> {
+  let PrintMethod = "printPImmediate";
+  let ParserMatchClass = CoprocNumAsmOperand;
+}
+
+def c_imm : Operand<i32> {
+  let PrintMethod = "printCImmediate";
+  let ParserMatchClass = CoprocRegAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+
+include "ARMInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+/// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
+/// binop that produces a value.
+multiclass AsI1_bin_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                        PatFrag opnode, bit Commutable = 0> {
+  // The register-immediate version is re-materializable. This is useful
+  // in particular for taking the address of a local.
+  let isReMaterializable = 1 in {
+  def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+               iii, opc, "\t$Rd, $Rn, $imm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> imm;
+    let Inst{25} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-0} = imm;
+  }
+  }
+  def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+               iir, opc, "\t$Rd, $Rn, $Rm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<4> Rm;
+    let Inst{25} = 0;
+    let isCommutable = Commutable;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-4} = 0b00000000;
+    let Inst{3-0} = Rm;
+  }
+  def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+               iis, opc, "\t$Rd, $Rn, $shift",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> shift;
+    let Inst{25} = 0;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-0} = shift;
+  }
+}
+
+/// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CPSR register.
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass AI1_bin_s_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                         PatFrag opnode, bit Commutable = 0> {
+  def ri : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+               iii, opc, "\t$Rd, $Rn, $imm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> imm;
+    let Inst{25} = 1;
+    let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-0} = imm;
+  }
+  def rr : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+               iir, opc, "\t$Rd, $Rn, $Rm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<4> Rm;
+    let isCommutable = Commutable;
+    let Inst{25} = 0;
+    let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-4} = 0b00000000;
+    let Inst{3-0} = Rm;
+  }
+  def rs : AI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm,
+               iis, opc, "\t$Rd, $Rn, $shift",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> shift;
+    let Inst{25} = 0;
+    let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-0} = shift;
+  }
+}
+}
+
+/// AI1_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to AsI1_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+let isCompare = 1, Defs = [CPSR] in {
+multiclass AI1_cmp_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                       PatFrag opnode, bit Commutable = 0> {
+  def ri : AI1<opcod, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, iii,
+               opc, "\t$Rn, $imm",
+               [(opnode GPR:$Rn, so_imm:$imm)]> {
+    bits<4> Rn;
+    bits<12> imm;
+    let Inst{25} = 1;
+    let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = 0b0000;
+    let Inst{11-0} = imm;
+  }
+  def rr : AI1<opcod, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, iir,
+               opc, "\t$Rn, $Rm",
+               [(opnode GPR:$Rn, GPR:$Rm)]> {
+    bits<4> Rn;
+    bits<4> Rm;
+    let isCommutable = Commutable;
+    let Inst{25} = 0;
+    let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = 0b0000;
+    let Inst{11-4} = 0b00000000;
+    let Inst{3-0} = Rm;
+  }
+  def rs : AI1<opcod, (outs), (ins GPR:$Rn, so_reg:$shift), DPSoRegFrm, iis,
+               opc, "\t$Rn, $shift",
+               [(opnode GPR:$Rn, so_reg:$shift)]> {
+    bits<4> Rn;
+    bits<12> shift;
+    let Inst{25} = 0;
+    let Inst{20} = 1;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = 0b0000;
+    let Inst{11-0} = shift;
+  }
+}
+}
+
+/// AI_ext_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+/// FIXME: Remove the 'r' variant. Its rot_imm is zero.
+multiclass AI_ext_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+  def r     : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
+                 IIC_iEXTr, opc, "\t$Rd, $Rm",
+                 [(set GPR:$Rd, (opnode GPR:$Rm))]>,
+              Requires<[IsARM, HasV6]> {
+    bits<4> Rd;
+    bits<4> Rm;
+    let Inst{19-16} = 0b1111;
+    let Inst{15-12} = Rd;
+    let Inst{11-10} = 0b00;
+    let Inst{3-0}   = Rm;
+  }
+  def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
+                 IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
+                 [(set GPR:$Rd, (opnode (rotr GPR:$Rm, rot_imm:$rot)))]>,
+              Requires<[IsARM, HasV6]> {
+    bits<4> Rd;
+    bits<4> Rm;
+    bits<2> rot;
+    let Inst{19-16} = 0b1111;
+    let Inst{15-12} = Rd;
+    let Inst{11-10} = rot;
+    let Inst{3-0}   = Rm;
+  }
+}
+
+multiclass AI_ext_rrot_np<bits<8> opcod, string opc> {
+  def r     : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm),
+                 IIC_iEXTr, opc, "\t$Rd, $Rm",
+                 [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV6]> {
+    let Inst{19-16} = 0b1111;
+    let Inst{11-10} = 0b00;
+  }
+  def r_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rm, rot_imm:$rot),
+                 IIC_iEXTr, opc, "\t$Rd, $Rm, ror $rot",
+                 [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV6]> {
+    bits<2> rot;
+    let Inst{19-16} = 0b1111;
+    let Inst{11-10} = rot;
+  }
+}
+
+/// AI_exta_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass AI_exta_rrot<bits<8> opcod, string opc, PatFrag opnode> {
+  def rr     : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                  IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
+                  [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+               Requires<[IsARM, HasV6]> {
+    bits<4> Rd;
+    bits<4> Rm;
+    bits<4> Rn;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-10} = 0b00;
+    let Inst{9-4}   = 0b000111;
+    let Inst{3-0}   = Rm;
+  }
+  def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+                                             rot_imm:$rot),
+                  IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+                  [(set GPR:$Rd, (opnode GPR:$Rn,
+                                          (rotr GPR:$Rm, rot_imm:$rot)))]>,
+                  Requires<[IsARM, HasV6]> {
+    bits<4> Rd;
+    bits<4> Rm;
+    bits<4> Rn;
+    bits<2> rot;
+    let Inst{19-16} = Rn;
+    let Inst{15-12} = Rd;
+    let Inst{11-10} = rot;
+    let Inst{9-4}   = 0b000111;
+    let Inst{3-0}   = Rm;
+  }
+}
+
+// For disassembly only.
+multiclass AI_exta_rrot_np<bits<8> opcod, string opc> {
+  def rr     : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                  IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm",
+                  [/* For disassembly only; pattern left blank */]>,
+               Requires<[IsARM, HasV6]> {
+    let Inst{11-10} = 0b00;
+  }
+  def rr_rot : AExtI<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+                                             rot_imm:$rot),
+                  IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+                  [/* For disassembly only; pattern left blank */]>,
+                  Requires<[IsARM, HasV6]> {
+    bits<4> Rn;
+    bits<2> rot;
+    let Inst{19-16} = Rn;
+    let Inst{11-10} = rot;
+  }
+}
+
+/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
+let Uses = [CPSR] in {
+multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+                             bit Commutable = 0> {
+  def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+                DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
+               Requires<[IsARM]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> imm;
+    let Inst{25} = 1;
+    let Inst{15-12} = Rd;
+    let Inst{19-16} = Rn;
+    let Inst{11-0} = imm;
+  }
+  def rr : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                DPFrm, IIC_iALUr, opc, "\t$Rd, $Rn, $Rm",
+               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+               Requires<[IsARM]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<4> Rm;
+    let Inst{11-4} = 0b00000000;
+    let Inst{25} = 0;
+    let isCommutable = Commutable;
+    let Inst{3-0} = Rm;
+    let Inst{15-12} = Rd;
+    let Inst{19-16} = Rn;
+  }
+  def rs : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                DPSoRegFrm, IIC_iALUsr, opc, "\t$Rd, $Rn, $shift",
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>,
+               Requires<[IsARM]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> shift;
+    let Inst{25} = 0;
+    let Inst{11-0} = shift;
+    let Inst{15-12} = Rd;
+    let Inst{19-16} = Rn;
+  }
+}
+// Carry setting variants
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
+                             bit Commutable = 0> {
+  def Sri : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+                DPFrm, IIC_iALUi, !strconcat(opc, "\t$Rd, $Rn, $imm"),
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
+               Requires<[IsARM]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> imm;
+    let Inst{15-12} = Rd;
+    let Inst{19-16} = Rn;
+    let Inst{11-0} = imm;
+    let Inst{20} = 1;
+    let Inst{25} = 1;
+  }
+  def Srr : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                DPFrm, IIC_iALUr, !strconcat(opc, "\t$Rd, $Rn, $Rm"),
+               [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]>,
+               Requires<[IsARM]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<4> Rm;
+    let Inst{11-4} = 0b00000000;
+    let isCommutable = Commutable;
+    let Inst{3-0} = Rm;
+    let Inst{15-12} = Rd;
+    let Inst{19-16} = Rn;
+    let Inst{20} = 1;
+    let Inst{25} = 0;
+  }
+  def Srs : AXI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$Rd, $Rn, $shift"),
+               [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>,
+               Requires<[IsARM]> {
+    bits<4> Rd;
+    bits<4> Rn;
+    bits<12> shift;
+    let Inst{11-0} = shift;
+    let Inst{15-12} = Rd;
+    let Inst{19-16} = Rn;
+    let Inst{20} = 1;
+    let Inst{25} = 0;
+  }
+}
+}
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+multiclass AI_ldr1<bit isByte, string opc, InstrItinClass iii,
+           InstrItinClass iir, PatFrag opnode> {
+  // Note: We use the complex addrmode_imm12 rather than just an input
+  // GPR and a constrained immediate so that we can use this to match
+  // frame index references and avoid matching constant pool references.
+  def i12: AI2ldst<0b010, 1, isByte, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+                   AddrMode_i12, LdFrm, iii, opc, "\t$Rt, $addr",
+                  [(set GPR:$Rt, (opnode addrmode_imm12:$addr))]> {
+    bits<4>  Rt;
+    bits<17> addr;
+    let Inst{23}    = addr{12};     // U (add = ('U' == 1))
+    let Inst{19-16} = addr{16-13};  // Rn
+    let Inst{15-12} = Rt;
+    let Inst{11-0}  = addr{11-0};   // imm12
+  }
+  def rs : AI2ldst<0b011, 1, isByte, (outs GPR:$Rt), (ins ldst_so_reg:$shift),
+                  AddrModeNone, LdFrm, iir, opc, "\t$Rt, $shift",
+                 [(set GPR:$Rt, (opnode ldst_so_reg:$shift))]> {
+    bits<4>  Rt;
+    bits<17> shift;
+    let Inst{23}    = shift{12};    // U (add = ('U' == 1))
+    let Inst{19-16} = shift{16-13}; // Rn
+    let Inst{15-12} = Rt;
+    let Inst{11-0}  = shift{11-0};
+  }
+}
+}
+
+multiclass AI_str1<bit isByte, string opc, InstrItinClass iii,
+           InstrItinClass iir, PatFrag opnode> {
+  // Note: We use the complex addrmode_imm12 rather than just an input
+  // GPR and a constrained immediate so that we can use this to match
+  // frame index references and avoid matching constant pool references.
+  def i12 : AI2ldst<0b010, 0, isByte, (outs),
+                   (ins GPR:$Rt, addrmode_imm12:$addr),
+                   AddrMode_i12, StFrm, iii, opc, "\t$Rt, $addr",
+                  [(opnode GPR:$Rt, addrmode_imm12:$addr)]> {
+    bits<4> Rt;
+    bits<17> addr;
+    let Inst{23}    = addr{12};     // U (add = ('U' == 1))
+    let Inst{19-16} = addr{16-13};  // Rn
+    let Inst{15-12} = Rt;
+    let Inst{11-0}  = addr{11-0};   // imm12
+  }
+  def rs : AI2ldst<0b011, 0, isByte, (outs), (ins GPR:$Rt, ldst_so_reg:$shift),
+                  AddrModeNone, StFrm, iir, opc, "\t$Rt, $shift",
+                 [(opnode GPR:$Rt, ldst_so_reg:$shift)]> {
+    bits<4> Rt;
+    bits<17> shift;
+    let Inst{23}    = shift{12};    // U (add = ('U' == 1))
+    let Inst{19-16} = shift{16-13}; // Rn
+    let Inst{15-12} = Rt;
+    let Inst{11-0}  = shift{11-0};
+  }
+}
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
+
+/// CONSTPOOL_ENTRY - This instruction represents a floating constant pool in
+/// the function.  The first operand is the ID# for this instruction, the second
+/// is the index into the MachineConstantPool that this is, the third is the
+/// size in bytes of this constant pool entry.
+let neverHasSideEffects = 1, isNotDuplicable = 1 in
+def CONSTPOOL_ENTRY :
+PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
+                    i32imm:$size), NoItinerary, []>;
+
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def ADJCALLSTACKUP :
+PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
+           [(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
+
+def ADJCALLSTACKDOWN :
+PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
+           [(ARMcallseq_start timm:$amt)]>;
+}
+
+def NOP : AI<(outs), (ins), MiscFrm, NoItinerary, "nop", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{15-8} = 0b11110000;
+  let Inst{7-0} = 0b00000000;
+}
+
+def YIELD : AI<(outs), (ins), MiscFrm, NoItinerary, "yield", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{15-8} = 0b11110000;
+  let Inst{7-0} = 0b00000001;
+}
+
+def WFE : AI<(outs), (ins), MiscFrm, NoItinerary, "wfe", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{15-8} = 0b11110000;
+  let Inst{7-0} = 0b00000010;
+}
+
+def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{15-8} = 0b11110000;
+  let Inst{7-0} = 0b00000011;
+}
+
+def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel",
+             "\t$dst, $a, $b",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{3-0} = Rm;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{27-20} = 0b01101000;
+  let Inst{7-4} = 0b1011;
+  let Inst{11-8} = 0b1111;
+}
+
+def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV6T2]> {
+  let Inst{27-16} = 0b001100100000;
+  let Inst{15-8} = 0b11110000;
+  let Inst{7-0} = 0b00000100;
+}
+
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def BKPT : AI<(outs), (ins i32imm:$val), MiscFrm, NoItinerary, "bkpt", "\t$val",
+              [/* For disassembly only; pattern left blank */]>,
+           Requires<[IsARM]> {
+  bits<16> val;
+  let Inst{3-0} = val{3-0};
+  let Inst{19-8} = val{15-4};
+  let Inst{27-20} = 0b00010010;
+  let Inst{7-4} = 0b0111;
+}
+
+// Change Processor State is a system instruction -- for disassembly and
+// parsing only.
+// FIXME: Since the asm parser has currently no clean way to handle optional
+// operands, create 3 versions of the same instruction. Once there's a clean
+// framework to represent optional operands, change this behavior.
+class CPS<dag iops, string asm_ops>
+  : AXI<(outs), iops, MiscFrm, NoItinerary, !strconcat("cps", asm_ops),
+        [/* For disassembly only; pattern left blank */]>, Requires<[IsARM]> {
+  bits<2> imod;
+  bits<3> iflags;
+  bits<5> mode;
+  bit M;
+
+  let Inst{31-28} = 0b1111;
+  let Inst{27-20} = 0b00010000;
+  let Inst{19-18} = imod;
+  let Inst{17}    = M; // Enabled if mode is set;
+  let Inst{16}    = 0;
+  let Inst{8-6}   = iflags;
+  let Inst{5}     = 0;
+  let Inst{4-0}   = mode;
+}
+
+let M = 1 in
+  def CPS3p : CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+                  "$imod\t$iflags, $mode">;
+let mode = 0, M = 0 in
+  def CPS2p : CPS<(ins imod_op:$imod, iflags_op:$iflags), "$imod\t$iflags">;
+
+let imod = 0, iflags = 0, M = 1 in
+  def CPS1p : CPS<(ins i32imm:$mode), "\t$mode">;
+
+// Preload signals the memory system of possible future data/instruction access.
+// These are for disassembly only.
+multiclass APreLoad<bits<1> read, bits<1> data, string opc> {
+
+  def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload,
+                !strconcat(opc, "\t$addr"),
+                [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]> {
+    bits<4> Rt;
+    bits<17> addr;
+    let Inst{31-26} = 0b111101;
+    let Inst{25} = 0; // 0 for immediate form
+    let Inst{24} = data;
+    let Inst{23} = addr{12};        // U (add = ('U' == 1))
+    let Inst{22} = read;
+    let Inst{21-20} = 0b01;
+    let Inst{19-16} = addr{16-13};  // Rn
+    let Inst{15-12} = 0b1111;
+    let Inst{11-0}  = addr{11-0};   // imm12
+  }
+
+  def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload,
+               !strconcat(opc, "\t$shift"),
+               [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]> {
+    bits<17> shift;
+    let Inst{31-26} = 0b111101;
+    let Inst{25} = 1; // 1 for register form
+    let Inst{24} = data;
+    let Inst{23} = shift{12};    // U (add = ('U' == 1))
+    let Inst{22} = read;
+    let Inst{21-20} = 0b01;
+    let Inst{19-16} = shift{16-13}; // Rn
+    let Inst{15-12} = 0b1111;
+    let Inst{11-0}  = shift{11-0};
+  }
+}
+
+defm PLD  : APreLoad<1, 1, "pld">,  Requires<[IsARM]>;
+defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>;
+defm PLI  : APreLoad<1, 0, "pli">,  Requires<[IsARM,HasV7]>;
+
+def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary,
+                 "setend\t$end",
+                 [/* For disassembly only; pattern left blank */]>,
+               Requires<[IsARM]> {
+  bits<1> end;
+  let Inst{31-10} = 0b1111000100000001000000;
+  let Inst{9} = end;
+  let Inst{8-0} = 0;
+}
+
+def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
+             [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsARM, HasV7]> {
+  bits<4> opt;
+  let Inst{27-4} = 0b001100100000111100001111;
+  let Inst{3-0} = opt;
+}
+
+// A5.4 Permanently UNDEFINED instructions.
+let isBarrier = 1, isTerminator = 1 in
+def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
+               "trap", [(trap)]>,
+           Requires<[IsARM]> {
+  let Inst = 0xe7ffdefe;
+}
+
+// Address computation and loads and stores in PIC mode.
+let isNotDuplicable = 1 in {
+def PICADD  : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
+                            Size4Bytes, IIC_iALUr,
+                            [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
+
+let AddedComplexity = 10 in {
+def PICLDR  : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
+                            Size4Bytes, IIC_iLoad_r,
+                            [(set GPR:$dst, (load addrmodepc:$addr))]>;
+
+def PICLDRH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+                            Size4Bytes, IIC_iLoad_bh_r,
+                            [(set GPR:$Rt, (zextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+                            Size4Bytes, IIC_iLoad_bh_r,
+                            [(set GPR:$Rt, (zextloadi8 addrmodepc:$addr))]>;
+
+def PICLDRSH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+                            Size4Bytes, IIC_iLoad_bh_r,
+                            [(set GPR:$Rt, (sextloadi16 addrmodepc:$addr))]>;
+
+def PICLDRSB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
+                            Size4Bytes, IIC_iLoad_bh_r,
+                            [(set GPR:$Rt, (sextloadi8 addrmodepc:$addr))]>;
+}
+let AddedComplexity = 10 in {
+def PICSTR  : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+      Size4Bytes, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>;
+
+def PICSTRH : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+      Size4Bytes, IIC_iStore_bh_r, [(truncstorei16 GPR:$src,
+                                                   addrmodepc:$addr)]>;
+
+def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
+      Size4Bytes, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
+}
+} // isNotDuplicable = 1
+
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+// The 'adr' mnemonic encodes differently if the label is before or after
+// the instruction. The {24-21} opcode bits are set by the fixup, as we don't
+// know until then which form of the instruction will be used.
+def ADR : AI1<0, (outs GPR:$Rd), (ins adrlabel:$label),
+                 MiscFrm, IIC_iALUi, "adr", "\t$Rd, #$label", []> {
+  bits<4> Rd;
+  bits<12> label;
+  let Inst{27-25} = 0b001;
+  let Inst{20} = 0;
+  let Inst{19-16} = 0b1111;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = label;
+}
+def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
+                    Size4Bytes, IIC_iALUi, []>;
+
+def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
+                      (ins i32imm:$label, nohash_imm:$id, pred:$p),
+                      Size4Bytes, IIC_iALUi, []>;
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+  // ARMV4T and above
+  def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+                  "bx", "\tlr", [(ARMretflag)]>,
+               Requires<[IsARM, HasV4T]> {
+    let Inst{27-0}  = 0b0001001011111111111100011110;
+  }
+
+  // ARMV4 only
+  def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+                  "mov", "\tpc, lr", [(ARMretflag)]>,
+               Requires<[IsARM, NoV4T]> {
+    let Inst{27-0} = 0b0001101000001111000000001110;
+  }
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  // ARMV4T and above
+  def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst",
+                  [(brind GPR:$dst)]>,
+              Requires<[IsARM, HasV4T]> {
+    bits<4> dst;
+    let Inst{31-4} = 0b1110000100101111111111110001;
+    let Inst{3-0}  = dst;
+  }
+
+  // ARMV4 only
+  // FIXME: We would really like to define this as a vanilla ARMPat like:
+  // ARMPat<(brind GPR:$dst), (MOVr PC, GPR:$dst)>
+  // With that, however, we can't set isBranch, isTerminator, etc..
+  def MOVPCRX : ARMPseudoInst<(outs), (ins GPR:$dst),
+                    Size4Bytes, IIC_Br, [(brind GPR:$dst)]>,
+                    Requires<[IsARM, NoV4T]>;
+}
+
+// All calls clobber the non-callee saved registers. SP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let isCall = 1,
+  // On non-Darwin platforms R9 is callee-saved.
+  Defs = [R0,  R1,  R2,  R3,  R12, LR,
+          D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
+          D16, D17, D18, D19, D20, D21, D22, D23,
+          D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
+  Uses = [SP] in {
+  def BL  : ABXI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+                IIC_Br, "bl\t$func",
+                [(ARMcall tglobaladdr:$func)]>,
+            Requires<[IsARM, IsNotDarwin]> {
+    let Inst{31-28} = 0b1110;
+    bits<24> func;
+    let Inst{23-0} = func;
+  }
+
+  def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func, variable_ops),
+                   IIC_Br, "bl", "\t$func",
+                   [(ARMcall_pred tglobaladdr:$func)]>,
+                Requires<[IsARM, IsNotDarwin]> {
+    bits<24> func;
+    let Inst{23-0} = func;
+  }
+
+  // ARMv5T and above
+  def BLX : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+                IIC_Br, "blx\t$func",
+                [(ARMcall GPR:$func)]>,
+            Requires<[IsARM, HasV5T, IsNotDarwin]> {
+    bits<4> func;
+    let Inst{31-4} = 0b1110000100101111111111110011;
+    let Inst{3-0}  = func;
+  }
+
+  def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+                    IIC_Br, "blx", "\t$func",
+                    [(ARMcall_pred GPR:$func)]>,
+                 Requires<[IsARM, HasV5T, IsNotDarwin]> {
+    bits<4> func;
+    let Inst{27-4} = 0b000100101111111111110011;
+    let Inst{3-0}  = func;
+  }
+
+  // ARMv4T
+  // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+  def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+                   Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                   Requires<[IsARM, HasV4T, IsNotDarwin]>;
+
+  // ARMv4
+  def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+                   Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                   Requires<[IsARM, NoV4T, IsNotDarwin]>;
+}
+
+let isCall = 1,
+  // On Darwin R9 is call-clobbered.
+  // R7 is marked as a use to prevent frame-pointer assignments from being
+  // moved above / below calls.
+  Defs = [R0,  R1,  R2,  R3,  R9,  R12, LR,
+          D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
+          D16, D17, D18, D19, D20, D21, D22, D23,
+          D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
+  Uses = [R7, SP] in {
+  def BLr9  : ABXI<0b1011, (outs), (ins bltarget:$func, variable_ops),
+                IIC_Br, "bl\t$func",
+                [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]> {
+    let Inst{31-28} = 0b1110;
+    bits<24> func;
+    let Inst{23-0} = func;
+  }
+
+  def BLr9_pred : ABI<0b1011, (outs), (ins bltarget:$func, variable_ops),
+                   IIC_Br, "bl", "\t$func",
+                   [(ARMcall_pred tglobaladdr:$func)]>,
+                  Requires<[IsARM, IsDarwin]> {
+    bits<24> func;
+    let Inst{23-0} = func;
+  }
+
+  // ARMv5T and above
+  def BLXr9 : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+                IIC_Br, "blx\t$func",
+                [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]> {
+    bits<4> func;
+    let Inst{31-4} = 0b1110000100101111111111110011;
+    let Inst{3-0}   = func;
+  }
+
+  def BLXr9_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm,
+                      IIC_Br, "blx", "\t$func",
+                      [(ARMcall_pred GPR:$func)]>,
+                   Requires<[IsARM, HasV5T, IsDarwin]> {
+    bits<4> func;
+    let Inst{27-4} = 0b000100101111111111110011;
+    let Inst{3-0}   = func;
+  }
+
+  // ARMv4T
+  // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+  def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+                  Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                  Requires<[IsARM, HasV4T, IsDarwin]>;
+
+  // ARMv4
+  def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
+                  Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                  Requires<[IsARM, NoV4T, IsDarwin]>;
+}
+
+// Tail calls.
+
+// FIXME: These should probably be xformed into the non-TC versions of the
+// instructions as part of MC lowering.
+// FIXME: These seem to be used for both Thumb and ARM instruction selection.
+// Thumb should have its own version since the instruction is actually
+// different, even though the mnemonic is the same.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+  // Darwin versions.
+  let Defs = [R0, R1, R2, R3, R9, R12,
+              D0, D1, D2, D3, D4, D5, D6, D7,
+              D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26,
+              D27, D28, D29, D30, D31, PC],
+      Uses = [SP] in {
+    def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsDarwin]>;
+
+    def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsDarwin]>;
+
+    def TAILJMPd : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+                   IIC_Br, "b\t$dst  @ TAILCALL",
+                   []>, Requires<[IsARM, IsDarwin]>;
+
+    def TAILJMPdt: ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+                   IIC_Br, "b.w\t$dst  @ TAILCALL",
+                   []>, Requires<[IsThumb, IsDarwin]>;
+
+    def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops),
+                     BrMiscFrm, IIC_Br, "bx\t$dst  @ TAILCALL",
+                   []>, Requires<[IsDarwin]> {
+      bits<4> dst;
+      let Inst{31-4} = 0b1110000100101111111111110001;
+      let Inst{3-0}  = dst;
+    }
+  }
+
+  // Non-Darwin versions (the difference is R9).
+  let Defs = [R0, R1, R2, R3, R12,
+              D0, D1, D2, D3, D4, D5, D6, D7,
+              D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26,
+              D27, D28, D29, D30, D31, PC],
+      Uses = [SP] in {
+    def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsNotDarwin]>;
+
+    def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsNotDarwin]>;
+
+    def TAILJMPdND : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+                   IIC_Br, "b\t$dst  @ TAILCALL",
+                   []>, Requires<[IsARM, IsNotDarwin]>;
+
+    def TAILJMPdNDt : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+                   IIC_Br, "b.w\t$dst  @ TAILCALL",
+                   []>, Requires<[IsThumb, IsNotDarwin]>;
+
+    def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops),
+                     BrMiscFrm, IIC_Br, "bx\t$dst  @ TAILCALL",
+                   []>, Requires<[IsNotDarwin]> {
+      bits<4> dst;
+      let Inst{31-4} = 0b1110000100101111111111110001;
+      let Inst{3-0}  = dst;
+    }
+  }
+}
+
+let isBranch = 1, isTerminator = 1 in {
+  // B is "predicable" since it can be xformed into a Bcc.
+  let isBarrier = 1 in {
+    let isPredicable = 1 in
+    def B : ABXI<0b1010, (outs), (ins brtarget:$target), IIC_Br,
+                "b\t$target", [(br bb:$target)]> {
+      bits<24> target;
+      let Inst{31-28} = 0b1110;
+      let Inst{23-0} = target;
+    }
+
+    let isNotDuplicable = 1, isIndirectBranch = 1 in {
+    def BR_JTr : ARMPseudoInst<(outs),
+                      (ins GPR:$target, i32imm:$jt, i32imm:$id),
+                      SizeSpecial, IIC_Br,
+                      [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>;
+    // FIXME: This shouldn't use the generic "addrmode2," but rather be split
+    // into i12 and rs suffixed versions.
+    def BR_JTm : ARMPseudoInst<(outs),
+                     (ins addrmode2:$target, i32imm:$jt, i32imm:$id),
+                     SizeSpecial, IIC_Br,
+                     [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
+                       imm:$id)]>;
+    def BR_JTadd : ARMPseudoInst<(outs),
+                   (ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id),
+                   SizeSpecial, IIC_Br,
+                   [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
+                     imm:$id)]>;
+    } // isNotDuplicable = 1, isIndirectBranch = 1
+  } // isBarrier = 1
+
+  // FIXME: should be able to write a pattern for ARMBrcond, but can't use
+  // a two-value operand where a dag node expects two operands. :(
+  def Bcc : ABI<0b1010, (outs), (ins br_target:$target),
+               IIC_Br, "b", "\t$target",
+               [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> {
+    bits<24> target;
+    let Inst{23-0} = target;
+  }
+}
+
+// Branch and Exchange Jazelle -- for disassembly only
+def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-20} = 0b0010;
+  //let Inst{19-8} = 0xfff;
+  let Inst{7-4} = 0b0010;
+}
+
+// Secure Monitor Call is a system instruction -- for disassembly only
+def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
+              [/* For disassembly only; pattern left blank */]> {
+  bits<4> opt;
+  let Inst{23-4} = 0b01100000000000000111;
+  let Inst{3-0} = opt;
+}
+
+// Supervisor Call (Software Interrupt) -- for disassembly only
+let isCall = 1, Uses = [SP] in {
+def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
+              [/* For disassembly only; pattern left blank */]> {
+  bits<24> svc;
+  let Inst{23-0} = svc;
+}
+}
+
+// Store Return State is a system instruction -- for disassembly only
+let isCodeGenOnly = 1 in {  // FIXME: This should not use submode!
+def SRSW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
+                NoItinerary, "srs${amode}\tsp!, $mode",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{22-20} = 0b110; // W = 1
+}
+
+def SRS  : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode),
+                NoItinerary, "srs${amode}\tsp, $mode",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{22-20} = 0b100; // W = 0
+}
+
+// Return From Exception is a system instruction -- for disassembly only
+def RFEW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
+                NoItinerary, "rfe${amode}\t$base!",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{22-20} = 0b011; // W = 1
+}
+
+def RFE  : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base),
+                NoItinerary, "rfe${amode}\t$base",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{22-20} = 0b001; // W = 0
+}
+} // isCodeGenOnly = 1
+
+//===----------------------------------------------------------------------===//
+//  Load / store Instructions.
+//
+
+// Load
+
+
+defm LDR  : AI_ldr1<0, "ldr", IIC_iLoad_r, IIC_iLoad_si,
+                    UnOpFrag<(load node:$Src)>>;
+defm LDRB : AI_ldr1<1, "ldrb", IIC_iLoad_bh_r, IIC_iLoad_bh_si,
+                    UnOpFrag<(zextloadi8 node:$Src)>>;
+defm STR  : AI_str1<0, "str", IIC_iStore_r, IIC_iStore_si,
+                   BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm STRB : AI_str1<1, "strb", IIC_iStore_bh_r, IIC_iStore_bh_si,
+                   BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// Special LDR for loads from non-pc-relative constpools.
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
+    isReMaterializable = 1 in
+def LDRcp : AI2ldst<0b010, 1, 0, (outs GPR:$Rt), (ins addrmode_imm12:$addr),
+                 AddrMode_i12, LdFrm, IIC_iLoad_r, "ldr", "\t$Rt, $addr",
+                 []> {
+  bits<4> Rt;
+  bits<17> addr;
+  let Inst{23}    = addr{12};     // U (add = ('U' == 1))
+  let Inst{19-16} = 0b1111;
+  let Inst{15-12} = Rt;
+  let Inst{11-0}  = addr{11-0};   // imm12
+}
+
+// Loads with zero extension
+def LDRH  : AI3ld<0b1011, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+                  IIC_iLoad_bh_r, "ldrh", "\t$Rt, $addr",
+                  [(set GPR:$Rt, (zextloadi16 addrmode3:$addr))]>;
+
+// Loads with sign extension
+def LDRSH : AI3ld<0b1111, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+                   IIC_iLoad_bh_r, "ldrsh", "\t$Rt, $addr",
+                   [(set GPR:$Rt, (sextloadi16 addrmode3:$addr))]>;
+
+def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm,
+                   IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr",
+                   [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1,
+    isCodeGenOnly = 1 in { // $dst2 doesn't exist in asmstring?
+// FIXME: $dst2 isn't in the asm string as it's implied by $Rd (dst2 = Rd+1)
+//        how to represent that such that tblgen is happy and we don't
+//        mark this codegen only?
+// Load doubleword
+def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2),
+                 (ins addrmode3:$addr), LdMiscFrm,
+                 IIC_iLoad_d_r, "ldrd", "\t$Rd, $addr",
+                 []>, Requires<[IsARM, HasV5TE]>;
+}
+
+// Indexed loads
+multiclass AI2_ldridx<bit isByte, string opc, InstrItinClass itin> {
+  def _PRE  : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+                      (ins addrmode2:$addr), IndexModePre, LdFrm, itin,
+                      opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+    // {17-14}  Rn
+    // {13}     1 == Rm, 0 == imm12
+    // {12}     isAdd
+    // {11-0}   imm12/Rm
+    bits<18> addr;
+    let Inst{25} = addr{13};
+    let Inst{23} = addr{12};
+    let Inst{19-16} = addr{17-14};
+    let Inst{11-0} = addr{11-0};
+  }
+  def _POST : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+                      (ins GPR:$Rn, am2offset:$offset),
+                      IndexModePost, LdFrm, itin,
+                      opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> {
+    // {13}     1 == Rm, 0 == imm12
+    // {12}     isAdd
+    // {11-0}   imm12/Rm
+    bits<14> offset;
+    bits<4> Rn;
+    let Inst{25} = offset{13};
+    let Inst{23} = offset{12};
+    let Inst{19-16} = Rn;
+    let Inst{11-0} = offset{11-0};
+  }
+}
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+defm LDR  : AI2_ldridx<0, "ldr", IIC_iLoad_ru>;
+defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>;
+}
+
+multiclass AI3_ldridx<bits<4> op, bit op20, string opc, InstrItinClass itin> {
+  def _PRE  : AI3ldstidx<op, op20, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb),
+                        (ins addrmode3:$addr), IndexModePre,
+                        LdMiscFrm, itin,
+                        opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> {
+    bits<14> addr;
+    let Inst{23}    = addr{8};      // U bit
+    let Inst{22}    = addr{13};     // 1 == imm8, 0 == Rm
+    let Inst{19-16} = addr{12-9};   // Rn
+    let Inst{11-8}  = addr{7-4};    // imm7_4/zero
+    let Inst{3-0}   = addr{3-0};    // imm3_0/Rm
+  }
+  def _POST : AI3ldstidx<op, op20, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb),
+                        (ins GPR:$Rn, am3offset:$offset), IndexModePost,
+                        LdMiscFrm, itin,
+                        opc, "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", []> {
+    bits<10> offset;
+    bits<4> Rn;
+    let Inst{23}    = offset{8};      // U bit
+    let Inst{22}    = offset{9};      // 1 == imm8, 0 == Rm
+    let Inst{19-16} = Rn;
+    let Inst{11-8}  = offset{7-4};    // imm7_4/zero
+    let Inst{3-0}   = offset{3-0};    // imm3_0/Rm
+  }
+}
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+defm LDRH  : AI3_ldridx<0b1011, 1, "ldrh", IIC_iLoad_bh_ru>;
+defm LDRSH : AI3_ldridx<0b1111, 1, "ldrsh", IIC_iLoad_bh_ru>;
+defm LDRSB : AI3_ldridx<0b1101, 1, "ldrsb", IIC_iLoad_bh_ru>;
+let hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+defm LDRD :  AI3_ldridx<0b1101, 0, "ldrd", IIC_iLoad_d_ru>;
+} // mayLoad = 1, neverHasSideEffects = 1
+
+// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$dst, GPR:$base_wb),
+                   (ins GPR:$base, am2offset:$offset), IndexModeNone,
+                   LdFrm, IIC_iLoad_ru,
+                   "ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+  let Inst{21} = 1; // overwrite
+}
+def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
+                  (ins GPR:$base, am2offset:$offset), IndexModeNone,
+                  LdFrm, IIC_iLoad_bh_ru,
+                  "ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+  let Inst{21} = 1; // overwrite
+}
+def LDRSBT : AI3ldstidx<0b1101, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
+                 (ins GPR:$base, am3offset:$offset), IndexModePost,
+                 LdMiscFrm, IIC_iLoad_bh_ru,
+                 "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+  let Inst{21} = 1; // overwrite
+}
+def LDRHT : AI3ldstidx<0b1011, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
+                 (ins GPR:$base, am3offset:$offset), IndexModePost,
+                 LdMiscFrm, IIC_iLoad_bh_ru,
+                 "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+  let Inst{21} = 1; // overwrite
+}
+def LDRSHT : AI3ldstidx<0b1111, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb),
+                 (ins GPR:$base, am3offset:$offset), IndexModePost,
+                 LdMiscFrm, IIC_iLoad_bh_ru,
+                 "ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+  let Inst{21} = 1; // overwrite
+}
+}
+
+// Store
+
+// Stores with truncate
+def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm,
+               IIC_iStore_bh_r, "strh", "\t$Rt, $addr",
+               [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>;
+
+// Store doubleword
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1,
+    isCodeGenOnly = 1 in  // $src2 doesn't exist in asm string
+def STRD : AI3str<0b1111, (outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
+               StMiscFrm, IIC_iStore_d_r,
+               "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
+
+// Indexed stores
+def STR_PRE  : AI2stridx<0, 1, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+                     IndexModePre, StFrm, IIC_iStore_ru,
+                     "str", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+                     [(set GPR:$Rn_wb,
+                      (pre_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
+
+def STR_POST : AI2stridx<0, 0, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+                     IndexModePost, StFrm, IIC_iStore_ru,
+                     "str", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+                     [(set GPR:$Rn_wb,
+                      (post_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>;
+
+def STRB_PRE : AI2stridx<1, 1, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+                     IndexModePre, StFrm, IIC_iStore_bh_ru,
+                     "strb", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+                     [(set GPR:$Rn_wb, (pre_truncsti8 GPR:$Rt,
+                                        GPR:$Rn, am2offset:$offset))]>;
+def STRB_POST: AI2stridx<1, 0, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+                     IndexModePost, StFrm, IIC_iStore_bh_ru,
+                     "strb", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+                     [(set GPR:$Rn_wb, (post_truncsti8 GPR:$Rt,
+                                        GPR:$Rn, am2offset:$offset))]>;
+
+def STRH_PRE : AI3stridx<0b1011, 0, 1, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
+                     IndexModePre, StMiscFrm, IIC_iStore_ru,
+                     "strh", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb",
+                     [(set GPR:$Rn_wb,
+                      (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>;
+
+def STRH_POST: AI3stridx<0b1011, 0, 0, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am3offset:$offset),
+                     IndexModePost, StMiscFrm, IIC_iStore_bh_ru,
+                     "strh", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+                     [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt,
+                                        GPR:$Rn, am3offset:$offset))]>;
+
+// For disassembly only
+def STRD_PRE : AI3stdpr<(outs GPR:$base_wb),
+                     (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
+                     StMiscFrm, IIC_iStore_d_ru,
+                     "strd", "\t$src1, $src2, [$base, $offset]!",
+                     "$base = $base_wb", []>;
+
+// For disassembly only
+def STRD_POST: AI3stdpo<(outs GPR:$base_wb),
+                     (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
+                     StMiscFrm, IIC_iStore_d_ru,
+                     "strd", "\t$src1, $src2, [$base], $offset",
+                     "$base = $base_wb", []>;
+
+// STRT, STRBT, and STRHT are for disassembly only.
+
+def STRT : AI2stridx<0, 0, (outs GPR:$Rn_wb),
+                    (ins GPR:$Rt, GPR:$Rn,am2offset:$offset),
+                    IndexModeNone, StFrm, IIC_iStore_ru,
+                    "strt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+                    [/* For disassembly only; pattern left blank */]> {
+  let Inst{21} = 1; // overwrite
+}
+
+def STRBT : AI2stridx<1, 0, (outs GPR:$Rn_wb),
+                     (ins GPR:$Rt, GPR:$Rn, am2offset:$offset),
+                     IndexModeNone, StFrm, IIC_iStore_bh_ru,
+                     "strbt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb",
+                     [/* For disassembly only; pattern left blank */]> {
+  let Inst{21} = 1; // overwrite
+}
+
+def STRHT: AI3sthpo<(outs GPR:$base_wb),
+                    (ins GPR:$src, GPR:$base,am3offset:$offset),
+                    StMiscFrm, IIC_iStore_bh_ru,
+                    "strht", "\t$src, [$base], $offset", "$base = $base_wb",
+                    [/* For disassembly only; pattern left blank */]> {
+  let Inst{21} = 1; // overwrite
+}
+
+//===----------------------------------------------------------------------===//
+//  Load / store multiple Instructions.
+//
+
+multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
+                         InstrItinClass itin, InstrItinClass itin_upd> {
+  def IA :
+    AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeNone, f, itin,
+         !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def IA_UPD :
+    AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeUpd, f, itin_upd,
+         !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+  def DA :
+    AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeNone, f, itin,
+         !strconcat(asm, "da${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b00;       // Decrement After
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def DA_UPD :
+    AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeUpd, f, itin_upd,
+         !strconcat(asm, "da${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b00;       // Decrement After
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+  def DB :
+    AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeNone, f, itin,
+         !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def DB_UPD :
+    AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeUpd, f, itin_upd,
+         !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+  def IB :
+    AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeNone, f, itin,
+         !strconcat(asm, "ib${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b11;       // Increment Before
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def IB_UPD :
+    AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         IndexModeUpd, f, itin_upd,
+         !strconcat(asm, "ib${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b11;       // Increment Before
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+} 
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm LDM : arm_ldst_mult<"ldm", 1, LdStMulFrm, IIC_iLoad_m, IIC_iLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>;
+
+} // neverHasSideEffects
+
+// Load / Store Multiple Mnemonic Aliases
+def : MnemonicAlias<"ldm", "ldmia">;
+def : MnemonicAlias<"stm", "stmia">;
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+    hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+// FIXME: Should be a pseudo-instruction.
+def LDMIA_RET : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+                                      reglist:$regs, variable_ops),
+                     IndexModeUpd, LdStMulFrm, IIC_iLoad_mBr,
+                     "ldmia${p}\t$Rn!, $regs",
+                     "$Rn = $wb", []> {
+  let Inst{24-23} = 0b01;       // Increment After
+  let Inst{21}    = 1;          // Writeback
+  let Inst{20}    = 1;          // Load
+}
+
+//===----------------------------------------------------------------------===//
+//  Move Instructions.
+//
+
+let neverHasSideEffects = 1 in
+def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr,
+                "mov", "\t$Rd, $Rm", []>, UnaryDP {
+  bits<4> Rd;
+  bits<4> Rm;
+
+  let Inst{11-4} = 0b00000000;
+  let Inst{25} = 0;
+  let Inst{3-0} = Rm;
+  let Inst{15-12} = Rd;
+}
+
+// A version for the smaller set of tail call registers.
+let neverHasSideEffects = 1 in
+def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm,
+                IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP {
+  bits<4> Rd;
+  bits<4> Rm;
+
+  let Inst{11-4} = 0b00000000;
+  let Inst{25} = 0;
+  let Inst{3-0} = Rm;
+  let Inst{15-12} = Rd;
+}
+
+def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src),
+                DPSoRegFrm, IIC_iMOVsr,
+                "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg:$src)]>,
+                UnaryDP {
+  bits<4> Rd;
+  bits<12> src;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = src;
+  let Inst{25} = 0;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi,
+                "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP {
+  bits<4> Rd;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = 0b0000;
+  let Inst{11-0} = imm;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins i32imm_hilo16:$imm),
+                 DPFrm, IIC_iMOVi,
+                 "movw", "\t$Rd, $imm",
+                 [(set GPR:$Rd, imm0_65535:$imm)]>,
+                 Requires<[IsARM, HasV6T2]>, UnaryDP {
+  bits<4> Rd;
+  bits<16> imm;
+  let Inst{15-12} = Rd;
+  let Inst{11-0}  = imm{11-0};
+  let Inst{19-16} = imm{15-12};
+  let Inst{20} = 0;
+  let Inst{25} = 1;
+}
+
+def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+                                (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def MOVTi16 : AI1<0b1010, (outs GPR:$Rd), (ins GPR:$src, i32imm_hilo16:$imm),
+                  DPFrm, IIC_iMOVi,
+                  "movt", "\t$Rd, $imm",
+                  [(set GPR:$Rd,
+                        (or (and GPR:$src, 0xffff),
+                            lo16AllZero:$imm))]>, UnaryDP,
+                  Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<16> imm;
+  let Inst{15-12} = Rd;
+  let Inst{11-0}  = imm{11-0};
+  let Inst{19-16} = imm{15-12};
+  let Inst{20} = 0;
+  let Inst{25} = 1;
+}
+
+def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd),
+                      (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+} // Constraints
+
+def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>,
+      Requires<[IsARM, HasV6T2]>;
+
+let Uses = [CPSR] in
+def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
+                    [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP,
+                    Requires<[IsARM]>;
+
+// These aren't really mov instructions, but we have to define them this way
+// due to flag operands.
+
+let Defs = [CPSR] in {
+def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+                      [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP,
+                      Requires<[IsARM]>;
+def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+                      [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP,
+                      Requires<[IsARM]>;
+}
+
+//===----------------------------------------------------------------------===//
+//  Extend Instructions.
+//
+
+// Sign extenders
+
+defm SXTB  : AI_ext_rrot<0b01101010,
+                         "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm SXTH  : AI_ext_rrot<0b01101011,
+                         "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>;
+
+defm SXTAB : AI_exta_rrot<0b01101010,
+               "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+defm SXTAH : AI_exta_rrot<0b01101011,
+               "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+
+// For disassembly only
+defm SXTB16  : AI_ext_rrot_np<0b01101000, "sxtb16">;
+
+// For disassembly only
+defm SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+defm UXTB   : AI_ext_rrot<0b01101110,
+                          "uxtb"  , UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm UXTH   : AI_ext_rrot<0b01101111,
+                          "uxth"  , UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm UXTB16 : AI_ext_rrot<0b01101100,
+                          "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+//        The transformation should probably be done as a combiner action
+//        instead so we can include a check for masking back in the upper
+//        eight bits of the source into the lower eight bits of the result.
+//def : ARMV6Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
+//               (UXTB16r_rot GPR:$Src, 24)>;
+def : ARMV6Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
+               (UXTB16r_rot GPR:$Src, 8)>;
+
+defm UXTAB : AI_exta_rrot<0b01101110, "uxtab",
+                        BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+defm UXTAH : AI_exta_rrot<0b01101111, "uxtah",
+                        BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+}
+
+// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
+// For disassembly only
+defm UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
+
+
+def SBFX  : I<(outs GPR:$Rd),
+              (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
+               Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<5> lsb;
+  bits<5> width;
+  let Inst{27-21} = 0b0111101;
+  let Inst{6-4}   = 0b101;
+  let Inst{20-16} = width;
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = lsb;
+  let Inst{3-0}   = Rn;
+}
+
+def UBFX  : I<(outs GPR:$Rd),
+              (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
+               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
+               Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<5> lsb;
+  bits<5> width;
+  let Inst{27-21} = 0b0111111;
+  let Inst{6-4}   = 0b101;
+  let Inst{20-16} = width;
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = lsb;
+  let Inst{3-0}   = Rn;
+}
+
+//===----------------------------------------------------------------------===//
+//  Arithmetic Instructions.
+//
+
+defm ADD  : AsI1_bin_irs<0b0100, "add",
+                         IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+                         BinOpFrag<(add  node:$LHS, node:$RHS)>, 1>;
+defm SUB  : AsI1_bin_irs<0b0010, "sub",
+                         IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+                         BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
+
+// ADD and SUB with 's' bit set.
+defm ADDS : AI1_bin_s_irs<0b0100, "adds",
+                          IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+                          BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
+defm SUBS : AI1_bin_s_irs<0b0010, "subs",
+                          IIC_iALUi, IIC_iALUr, IIC_iALUsr,
+                          BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+defm ADC : AI1_adde_sube_irs<0b0101, "adc",
+                          BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
+defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
+                          BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
+
+// ADC and SUBC with 's' bit set.
+defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs",
+                          BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
+defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
+                          BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
+
+def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+                 IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm",
+                 [(set GPR:$Rd, (sub so_imm:$imm, GPR:$Rn))]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{11-0} = imm;
+}
+
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSBrr : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+                 IIC_iALUr, "rsb", "\t$Rd, $Rn, $Rm",
+                 [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{11-4} = 0b00000000;
+  let Inst{25} = 0;
+  let Inst{3-0} = Rm;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+}
+
+def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                 DPSoRegFrm, IIC_iALUsr, "rsb", "\t$Rd, $Rn, $shift",
+                 [(set GPR:$Rd, (sub so_reg:$shift, GPR:$Rn))]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> shift;
+  let Inst{25} = 0;
+  let Inst{11-0} = shift;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+}
+
+// RSB with 's' bit set.
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+def RSBSri : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm,
+                 IIC_iALUi, "rsbs", "\t$Rd, $Rn, $imm",
+                 [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{20} = 1;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{11-0} = imm;
+}
+def RSBSrr : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm,
+                 IIC_iALUr, "rsbs", "\t$Rd, $Rn, $Rm",
+                 [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{11-4} = 0b00000000;
+  let Inst{25} = 0;
+  let Inst{20} = 1;
+  let Inst{3-0} = Rm;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+}
+def RSBSrs : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                 DPSoRegFrm, IIC_iALUsr, "rsbs", "\t$Rd, $Rn, $shift",
+                 [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> shift;
+  let Inst{25} = 0;
+  let Inst{20} = 1;
+  let Inst{11-0} = shift;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+}
+}
+
+let Uses = [CPSR] in {
+def RSCri : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+                 DPFrm, IIC_iALUi, "rsc", "\t$Rd, $Rn, $imm",
+                 [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>,
+                 Requires<[IsARM]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{11-0} = imm;
+}
+// The reg/reg form is only defined for the disassembler; for codegen it is
+// equivalent to SUBrr.
+def RSCrr : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                 DPFrm, IIC_iALUr, "rsc", "\t$Rd, $Rn, $Rm",
+                 [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{11-4} = 0b00000000;
+  let Inst{25} = 0;
+  let Inst{3-0} = Rm;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+}
+def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                 DPSoRegFrm, IIC_iALUsr, "rsc", "\t$Rd, $Rn, $shift",
+                 [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>,
+                 Requires<[IsARM]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> shift;
+  let Inst{25} = 0;
+  let Inst{11-0} = shift;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+}
+}
+
+// FIXME: Allow these to be predicated.
+let isCodeGenOnly = 1, Defs = [CPSR], Uses = [CPSR] in {
+def RSCSri : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
+                  DPFrm, IIC_iALUi, "rscs\t$Rd, $Rn, $imm",
+                  [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>,
+                  Requires<[IsARM]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{20} = 1;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{11-0} = imm;
+}
+def RSCSrs : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
+                  DPSoRegFrm, IIC_iALUsr, "rscs\t$Rd, $Rn, $shift",
+                  [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>,
+                  Requires<[IsARM]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> shift;
+  let Inst{25} = 0;
+  let Inst{20} = 1;
+  let Inst{11-0} = shift;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = Rn;
+}
+}
+
+// (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+def : ARMPat<(add    GPR:$src, so_imm_neg:$imm),
+             (SUBri  GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(addc   GPR:$src, so_imm_neg:$imm),
+             (SUBSri GPR:$src, so_imm_neg:$imm)>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+def : ARMPat<(adde   GPR:$src, so_imm_not:$imm),
+             (SBCri  GPR:$src, so_imm_not:$imm)>;
+
+// Note: These are implemented in C++ code, because they have to generate
+// ADD/SUBrs instructions, which use a complex pattern that a xform function
+// cannot produce.
+// (mul X, 2^n+1) -> (add (X << n), X)
+// (mul X, 2^n-1) -> (rsb X, (X << n))
+
+// ARM Arithmetic Instruction -- for disassembly only
+// GPR:$dst = GPR:$a op GPR:$b
+class AAI<bits<8> op27_20, bits<8> op11_4, string opc,
+          list<dag> pattern = [/* For disassembly only; pattern left blank */],
+          dag iops = (ins GPR:$Rn, GPR:$Rm), string asm = "\t$Rd, $Rn, $Rm">
+  : AI<(outs GPR:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> {
+  bits<4> Rn;
+  bits<4> Rd;
+  bits<4> Rm;
+  let Inst{27-20} = op27_20;
+  let Inst{11-4} = op11_4;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rd;
+  let Inst{3-0}   = Rm;
+}
+
+// Saturating add/subtract -- for disassembly only
+
+def QADD    : AAI<0b00010000, 0b00000101, "qadd",
+                  [(set GPR:$Rd, (int_arm_qadd GPR:$Rm, GPR:$Rn))],
+                  (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def QSUB    : AAI<0b00010010, 0b00000101, "qsub",
+                  [(set GPR:$Rd, (int_arm_qsub GPR:$Rm, GPR:$Rn))],
+                  (ins GPR:$Rm, GPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def QDADD   : AAI<0b00010100, 0b00000101, "qdadd", [], (ins GPR:$Rm, GPR:$Rn),
+                  "\t$Rd, $Rm, $Rn">;
+def QDSUB   : AAI<0b00010110, 0b00000101, "qdsub", [], (ins GPR:$Rm, GPR:$Rn),
+                  "\t$Rd, $Rm, $Rn">;
+
+def QADD16  : AAI<0b01100010, 0b11110001, "qadd16">;
+def QADD8   : AAI<0b01100010, 0b11111001, "qadd8">;
+def QASX    : AAI<0b01100010, 0b11110011, "qasx">;
+def QSAX    : AAI<0b01100010, 0b11110101, "qsax">;
+def QSUB16  : AAI<0b01100010, 0b11110111, "qsub16">;
+def QSUB8   : AAI<0b01100010, 0b11111111, "qsub8">;
+def UQADD16 : AAI<0b01100110, 0b11110001, "uqadd16">;
+def UQADD8  : AAI<0b01100110, 0b11111001, "uqadd8">;
+def UQASX   : AAI<0b01100110, 0b11110011, "uqasx">;
+def UQSAX   : AAI<0b01100110, 0b11110101, "uqsax">;
+def UQSUB16 : AAI<0b01100110, 0b11110111, "uqsub16">;
+def UQSUB8  : AAI<0b01100110, 0b11111111, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def SASX   : AAI<0b01100001, 0b11110011, "sasx">;
+def SADD16 : AAI<0b01100001, 0b11110001, "sadd16">;
+def SADD8  : AAI<0b01100001, 0b11111001, "sadd8">;
+def SSAX   : AAI<0b01100001, 0b11110101, "ssax">;
+def SSUB16 : AAI<0b01100001, 0b11110111, "ssub16">;
+def SSUB8  : AAI<0b01100001, 0b11111111, "ssub8">;
+def UASX   : AAI<0b01100101, 0b11110011, "uasx">;
+def UADD16 : AAI<0b01100101, 0b11110001, "uadd16">;
+def UADD8  : AAI<0b01100101, 0b11111001, "uadd8">;
+def USAX   : AAI<0b01100101, 0b11110101, "usax">;
+def USUB16 : AAI<0b01100101, 0b11110111, "usub16">;
+def USUB8  : AAI<0b01100101, 0b11111111, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def SHASX   : AAI<0b01100011, 0b11110011, "shasx">;
+def SHADD16 : AAI<0b01100011, 0b11110001, "shadd16">;
+def SHADD8  : AAI<0b01100011, 0b11111001, "shadd8">;
+def SHSAX   : AAI<0b01100011, 0b11110101, "shsax">;
+def SHSUB16 : AAI<0b01100011, 0b11110111, "shsub16">;
+def SHSUB8  : AAI<0b01100011, 0b11111111, "shsub8">;
+def UHASX   : AAI<0b01100111, 0b11110011, "uhasx">;
+def UHADD16 : AAI<0b01100111, 0b11110001, "uhadd16">;
+def UHADD8  : AAI<0b01100111, 0b11111001, "uhadd8">;
+def UHSAX   : AAI<0b01100111, 0b11110101, "uhsax">;
+def UHSUB16 : AAI<0b01100111, 0b11110111, "uhsub16">;
+def UHSUB8  : AAI<0b01100111, 0b11111111, "uhsub8">;
+
+// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
+
+def USAD8  : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                MulFrm /* for convenience */, NoItinerary, "usad8",
+                "\t$Rd, $Rn, $Rm", []>,
+             Requires<[IsARM, HasV6]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{27-20} = 0b01111000;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = 0b0001;
+  let Inst{19-16} = Rd;
+  let Inst{11-8} = Rm;
+  let Inst{3-0} = Rn;
+}
+def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+                MulFrm /* for convenience */, NoItinerary, "usada8",
+                "\t$Rd, $Rn, $Rm, $Ra", []>,
+             Requires<[IsARM, HasV6]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  bits<4> Ra;
+  let Inst{27-20} = 0b01111000;
+  let Inst{7-4} = 0b0001;
+  let Inst{19-16} = Rd;
+  let Inst{15-12} = Ra;
+  let Inst{11-8} = Rm;
+  let Inst{3-0} = Rn;
+}
+
+// Signed/Unsigned saturate -- for disassembly only
+
+def SSAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh),
+              SatFrm, NoItinerary, "ssat", "\t$Rd, $sat_imm, $a$sh",
+              [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<5> sat_imm;
+  bits<4> Rn;
+  bits<8> sh;
+  let Inst{27-21} = 0b0110101;
+  let Inst{5-4} = 0b01;
+  let Inst{20-16} = sat_imm;
+  let Inst{15-12} = Rd;
+  let Inst{11-7} = sh{7-3};
+  let Inst{6} = sh{0};
+  let Inst{3-0} = Rn;
+}
+
+def SSAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$Rn), SatFrm,
+                NoItinerary, "ssat16", "\t$Rd, $sat_imm, $Rn",
+                [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<4> sat_imm;
+  bits<4> Rn;
+  let Inst{27-20} = 0b01101010;
+  let Inst{11-4} = 0b11110011;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = sat_imm;
+  let Inst{3-0} = Rn;
+}
+
+def USAT : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a, shift_imm:$sh),
+              SatFrm, NoItinerary, "usat", "\t$Rd, $sat_imm, $a$sh",
+              [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<5> sat_imm;
+  bits<4> Rn;
+  bits<8> sh;
+  let Inst{27-21} = 0b0110111;
+  let Inst{5-4} = 0b01;
+  let Inst{15-12} = Rd;
+  let Inst{11-7} = sh{7-3};
+  let Inst{6} = sh{0};
+  let Inst{20-16} = sat_imm;
+  let Inst{3-0} = Rn;
+}
+
+def USAT16 : AI<(outs GPR:$Rd), (ins i32imm:$sat_imm, GPR:$a), SatFrm,
+                NoItinerary, "usat16", "\t$Rd, $sat_imm, $a",
+                [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  bits<4> sat_imm;
+  bits<4> Rn;
+  let Inst{27-20} = 0b01101110;
+  let Inst{11-4} = 0b11110011;
+  let Inst{15-12} = Rd;
+  let Inst{19-16} = sat_imm;
+  let Inst{3-0} = Rn;
+}
+
+def : ARMV6Pat<(int_arm_ssat GPR:$a, imm:$pos), (SSAT imm:$pos, GPR:$a, 0)>;
+def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>;
+
+//===----------------------------------------------------------------------===//
+//  Bitwise Instructions.
+//
+
+defm AND   : AsI1_bin_irs<0b0000, "and",
+                          IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+                          BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+defm ORR   : AsI1_bin_irs<0b1100, "orr",
+                          IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+                          BinOpFrag<(or  node:$LHS, node:$RHS)>, 1>;
+defm EOR   : AsI1_bin_irs<0b0001, "eor",
+                          IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+                          BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
+defm BIC   : AsI1_bin_irs<0b1110, "bic",
+                          IIC_iBITi, IIC_iBITr, IIC_iBITsr,
+                          BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+
+def BFC    : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
+               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               "bfc", "\t$Rd, $imm", "$src = $Rd",
+               [(set GPR:$Rd, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
+               Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<10> imm;
+  let Inst{27-21} = 0b0111110;
+  let Inst{6-0}   = 0b0011111;
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = imm{4-0}; // lsb
+  let Inst{20-16} = imm{9-5}; // width
+}
+
+// A8.6.18  BFI - Bitfield insert (Encoding A1)
+def BFI    : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
+               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd",
+               [(set GPR:$Rd, (ARMbfi GPR:$src, GPR:$Rn,
+                                bf_inv_mask_imm:$imm))]>,
+               Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<10> imm;
+  let Inst{27-21} = 0b0111110;
+  let Inst{6-4}   = 0b001; // Rn: Inst{3-0} != 15
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = imm{4-0}; // lsb
+  let Inst{20-16} = imm{9-5}; // width
+  let Inst{3-0}   = Rn;
+}
+
+// GNU as only supports this form of bfi (w/ 4 arguments)
+let isAsmParserOnly = 1 in
+def BFI4p : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn,
+                                   lsb_pos_imm:$lsb, width_imm:$width),
+               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               "bfi", "\t$Rd, $Rn, $lsb, $width", "$src = $Rd",
+               []>, Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<5> lsb;
+  bits<5> width;
+  let Inst{27-21} = 0b0111110;
+  let Inst{6-4}   = 0b001; // Rn: Inst{3-0} != 15
+  let Inst{15-12} = Rd;
+  let Inst{11-7}  = lsb;
+  let Inst{20-16} = width; // Custom encoder => lsb+width-1
+  let Inst{3-0}   = Rn;
+}
+
+def  MVNr  : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr,
+                  "mvn", "\t$Rd, $Rm",
+                  [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP {
+  bits<4> Rd;
+  bits<4> Rm;
+  let Inst{25} = 0;
+  let Inst{19-16} = 0b0000;
+  let Inst{11-4} = 0b00000000;
+  let Inst{15-12} = Rd;
+  let Inst{3-0} = Rm;
+}
+def  MVNs  : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg:$shift), DPSoRegFrm,
+                  IIC_iMVNsr, "mvn", "\t$Rd, $shift",
+                  [(set GPR:$Rd, (not so_reg:$shift))]>, UnaryDP {
+  bits<4> Rd;
+  bits<12> shift;
+  let Inst{25} = 0;
+  let Inst{19-16} = 0b0000;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = shift;
+}
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def  MVNi  : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm,
+                  IIC_iMVNi, "mvn", "\t$Rd, $imm",
+                  [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP {
+  bits<4> Rd;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{19-16} = 0b0000;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = imm;
+}
+
+def : ARMPat<(and   GPR:$src, so_imm_not:$imm),
+             (BICri GPR:$src, so_imm_not:$imm)>;
+
+//===----------------------------------------------------------------------===//
+//  Multiply Instructions.
+//
+class AsMul1I32<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+  bits<4> Rn;
+  let Inst{19-16} = Rd;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
+             string opc, string asm, list<dag> pattern>
+  : AsMul1I<opcod, oops, iops, itin, opc, asm, pattern> {
+  bits<4> RdLo;
+  bits<4> RdHi;
+  bits<4> Rm;
+  bits<4> Rn;
+  let Inst{19-16} = RdHi;
+  let Inst{15-12} = RdLo;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+
+let isCommutable = 1 in {
+let Constraints = "@earlyclobber $Rd" in
+def MULv5: ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+                                          pred:$p, cc_out:$s),
+                          Size4Bytes, IIC_iMUL32,
+                         [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
+                        Requires<[IsARM, NoV6]>;
+
+def MUL  : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                   IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
+                   [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
+                   Requires<[IsARM, HasV6]>;
+}
+
+let Constraints = "@earlyclobber $Rd" in
+def MLAv5: ARMPseudoInst<(outs GPR:$Rd),
+                         (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
+                         Size4Bytes, IIC_iMAC32, 
+                         [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, 
+                        Requires<[IsARM, NoV6]> {
+  bits<4> Ra;
+  let Inst{15-12} = Ra;
+}
+def MLA  : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+                    IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
+                   [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+                   Requires<[IsARM, HasV6]> {
+  bits<4> Ra;
+  let Inst{15-12} = Ra;
+}
+
+def MLS  : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+                   IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
+                   [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
+                   Requires<[IsARM, HasV6T2]> {
+  bits<4> Rd;
+  bits<4> Rm;
+  bits<4> Rn;
+  bits<4> Ra;
+  let Inst{19-16} = Rd;
+  let Inst{15-12} = Ra;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+
+// Extra precision multiplies with low / high results
+
+let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 
+                            Size4Bytes, IIC_iMUL64, []>,
+                           Requires<[IsARM, NoV6]>;
+
+def UMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+                            Size4Bytes, IIC_iMUL64, []>,
+                           Requires<[IsARM, NoV6]>;
+}
+
+def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
+                               (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+                    "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+                    Requires<[IsARM, HasV6]>;
+
+def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
+                               (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+                    "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+                    Requires<[IsARM, HasV6]>;
+}
+
+// Multiply + accumulate
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 
+                            Size4Bytes, IIC_iMAC64, []>,
+                           Requires<[IsARM, NoV6]>;
+def UMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 
+                            Size4Bytes, IIC_iMAC64, []>,
+                           Requires<[IsARM, NoV6]>;
+def UMAALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 
+                            Size4Bytes, IIC_iMAC64, []>,
+                           Requires<[IsARM, NoV6]>;
+
+}
+
+def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
+                               (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+                    "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+                    Requires<[IsARM, HasV6]>;
+def UMLAL : AsMul1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi),
+                               (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+                    "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+                    Requires<[IsARM, HasV6]>;
+
+def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
+                               (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
+                    "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+                    Requires<[IsARM, HasV6]> {
+  bits<4> RdLo;
+  bits<4> RdHi;
+  bits<4> Rm;
+  bits<4> Rn;
+  let Inst{19-16} = RdLo;
+  let Inst{15-12} = RdHi;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+} // neverHasSideEffects
+
+// Most significant word multiply
+def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+               IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm",
+               [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>,
+            Requires<[IsARM, HasV6]> {
+  let Inst{15-12} = 0b1111;
+}
+
+def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+               IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm",
+               [/* For disassembly only; pattern left blank */]>,
+            Requires<[IsARM, HasV6]> {
+  let Inst{15-12} = 0b1111;
+}
+
+def SMMLA : AMul2Ia <0b0111010, 0b0001, (outs GPR:$Rd),
+               (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+               IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+               [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
+            Requires<[IsARM, HasV6]>;
+
+def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd),
+               (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+               IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra",
+               [/* For disassembly only; pattern left blank */]>,
+            Requires<[IsARM, HasV6]>;
+
+def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd),
+               (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+               IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+               [(set GPR:$Rd, (sub GPR:$Ra, (mulhs GPR:$Rn, GPR:$Rm)))]>,
+            Requires<[IsARM, HasV6]>;
+
+def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd),
+               (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+               IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra",
+               [/* For disassembly only; pattern left blank */]>,
+            Requires<[IsARM, HasV6]>;
+
+multiclass AI_smul<string opc, PatFrag opnode> {
+  def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+                                      (sext_inreg GPR:$Rm, i16)))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (opnode (sext_inreg GPR:$Rn, i16),
+                                      (sra GPR:$Rm, (i32 16))))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+                                      (sext_inreg GPR:$Rm, i16)))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (opnode (sra GPR:$Rn, (i32 16)),
+                                      (sra GPR:$Rm, (i32 16))))]>,
+            Requires<[IsARM, HasV5TE]>;
+
+  def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+                                    (sext_inreg GPR:$Rm, i16)), (i32 16)))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+              IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+              [(set GPR:$Rd, (sra (opnode GPR:$Rn,
+                                    (sra GPR:$Rm, (i32 16))), (i32 16)))]>,
+            Requires<[IsARM, HasV5TE]>;
+}
+
+
+multiclass AI_smla<string opc, PatFrag opnode> {
+  def BB : AMulxyIa<0b0001000, 0b00, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set GPR:$Rd, (add GPR:$Ra,
+                               (opnode (sext_inreg GPR:$Rn, i16),
+                                       (sext_inreg GPR:$Rm, i16))))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def BT : AMulxyIa<0b0001000, 0b10, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set GPR:$Rd, (add GPR:$Ra, (opnode (sext_inreg GPR:$Rn, i16),
+                                                   (sra GPR:$Rm, (i32 16)))))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def TB : AMulxyIa<0b0001000, 0b01, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
+                                                (sext_inreg GPR:$Rm, i16))))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def TT : AMulxyIa<0b0001000, 0b11, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+             [(set GPR:$Rd, (add GPR:$Ra, (opnode (sra GPR:$Rn, (i32 16)),
+                                                   (sra GPR:$Rm, (i32 16)))))]>,
+            Requires<[IsARM, HasV5TE]>;
+
+  def WB : AMulxyIa<0b0001001, 0b00, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
+                                      (sext_inreg GPR:$Rm, i16)), (i32 16))))]>,
+           Requires<[IsARM, HasV5TE]>;
+
+  def WT : AMulxyIa<0b0001001, 0b10, (outs GPR:$Rd),
+              (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+              IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set GPR:$Rd, (add GPR:$Ra, (sra (opnode GPR:$Rn,
+                                        (sra GPR:$Rm, (i32 16))), (i32 16))))]>,
+            Requires<[IsARM, HasV5TE]>;
+}
+
+defm SMUL : AI_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm SMLA : AI_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// Halfword multiply accumulate long: SMLAL<x><y> -- for disassembly only
+def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPR:$RdLo, GPR:$RdHi),
+                      (ins GPR:$Rn, GPR:$Rm),
+                      IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm",
+                      [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV5TE]>;
+
+def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPR:$RdLo, GPR:$RdHi),
+                      (ins GPR:$Rn, GPR:$Rm),
+                      IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm",
+                      [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV5TE]>;
+
+def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPR:$RdLo, GPR:$RdHi),
+                      (ins GPR:$Rn, GPR:$Rm),
+                      IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm",
+                      [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV5TE]>;
+
+def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPR:$RdLo, GPR:$RdHi),
+                      (ins GPR:$Rn, GPR:$Rm),
+                      IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm",
+                      [/* For disassembly only; pattern left blank */]>,
+              Requires<[IsARM, HasV5TE]>;
+
+// Helper class for AI_smld -- for disassembly only
+class AMulDualIbase<bit long, bit sub, bit swap, dag oops, dag iops,
+                    InstrItinClass itin, string opc, string asm>
+  : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{4}     = 1;
+  let Inst{5}     = swap;
+  let Inst{6}     = sub;
+  let Inst{7}     = 0;
+  let Inst{21-20} = 0b00;
+  let Inst{22}    = long;
+  let Inst{27-23} = 0b01110;
+  let Inst{11-8}  = Rm;
+  let Inst{3-0}   = Rn;
+}
+class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
+                InstrItinClass itin, string opc, string asm>
+  : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+  bits<4> Rd;
+  let Inst{15-12} = 0b1111;
+  let Inst{19-16} = Rd;
+}
+class AMulDualIa<bit long, bit sub, bit swap, dag oops, dag iops,
+                InstrItinClass itin, string opc, string asm>
+  : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+  bits<4> Ra;
+  let Inst{15-12} = Ra;
+}
+class AMulDualI64<bit long, bit sub, bit swap, dag oops, dag iops,
+                  InstrItinClass itin, string opc, string asm>
+  : AMulDualIbase<long, sub, swap, oops, iops, itin, opc, asm> {
+  bits<4> RdLo;
+  bits<4> RdHi;
+  let Inst{19-16} = RdHi;
+  let Inst{15-12} = RdLo;
+}
+
+multiclass AI_smld<bit sub, string opc> {
+
+  def D : AMulDualIa<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+                  NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">;
+
+  def DX: AMulDualIa<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
+                  NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">;
+
+  def LD: AMulDualI64<1, sub, 0, (outs GPR:$RdLo,GPR:$RdHi),
+                  (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+                  !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">;
+
+  def LDX : AMulDualI64<1, sub, 1, (outs GPR:$RdLo,GPR:$RdHi),
+                  (ins GPR:$Rn, GPR:$Rm), NoItinerary,
+                  !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">;
+
+}
+
+defm SMLA : AI_smld<0, "smla">;
+defm SMLS : AI_smld<1, "smls">;
+
+multiclass AI_sdml<bit sub, string opc> {
+
+  def D : AMulDualI<0, sub, 0, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                    NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">;
+  def DX : AMulDualI<0, sub, 1, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                    NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">;
+}
+
+defm SMUA : AI_sdml<0, "smua">;
+defm SMUS : AI_sdml<1, "smus">;
+
+//===----------------------------------------------------------------------===//
+//  Misc. Arithmetic Instructions.
+//
+
+def CLZ  : AMiscA1I<0b000010110, 0b0001, (outs GPR:$Rd), (ins GPR:$Rm),
+              IIC_iUNAr, "clz", "\t$Rd, $Rm",
+              [(set GPR:$Rd, (ctlz GPR:$Rm))]>, Requires<[IsARM, HasV5T]>;
+
+def RBIT : AMiscA1I<0b01101111, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+              IIC_iUNAr, "rbit", "\t$Rd, $Rm",
+              [(set GPR:$Rd, (ARMrbit GPR:$Rm))]>,
+           Requires<[IsARM, HasV6T2]>;
+
+def REV  : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
+              IIC_iUNAr, "rev", "\t$Rd, $Rm",
+              [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>;
+
+def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+               IIC_iUNAr, "rev16", "\t$Rd, $Rm",
+               [(set GPR:$Rd,
+                   (or (and (srl GPR:$Rm, (i32 8)), 0xFF),
+                       (or (and (shl GPR:$Rm, (i32 8)), 0xFF00),
+                           (or (and (srl GPR:$Rm, (i32 8)), 0xFF0000),
+                               (and (shl GPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+               Requires<[IsARM, HasV6]>;
+
+def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
+               IIC_iUNAr, "revsh", "\t$Rd, $Rm",
+               [(set GPR:$Rd,
+                  (sext_inreg
+                    (or (srl (and GPR:$Rm, 0xFF00), (i32 8)),
+                        (shl GPR:$Rm, (i32 8))), i16))]>,
+               Requires<[IsARM, HasV6]>;
+
+def lsl_shift_imm : SDNodeXForm<imm, [{
+  unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
+  return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
+
+def lsl_amt : PatLeaf<(i32 imm), [{
+  return (N->getZExtValue() < 32);
+}], lsl_shift_imm>;
+
+def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd),
+                              (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+               IIC_iALUsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+               [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF),
+                                  (and (shl GPR:$Rm, lsl_amt:$sh),
+                                       0xFFFF0000)))]>,
+               Requires<[IsARM, HasV6]>;
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (and GPR:$Rm, 0xFFFF0000)),
+               (PKHBT GPR:$Rn, GPR:$Rm, 0)>;
+def : ARMV6Pat<(or (and GPR:$Rn, 0xFFFF), (shl GPR:$Rm, imm16_31:$sh)),
+               (PKHBT GPR:$Rn, GPR:$Rm, (lsl_shift_imm imm16_31:$sh))>;
+
+def asr_shift_imm : SDNodeXForm<imm, [{
+  unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::asr, N->getZExtValue());
+  return CurDAG->getTargetConstant(Sh, MVT::i32);
+}]>;
+
+def asr_amt : PatLeaf<(i32 imm), [{
+  return (N->getZExtValue() <= 32);
+}], asr_shift_imm>;
+
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def PKHTB : APKHI<0b01101000, 1, (outs GPR:$Rd),
+                              (ins GPR:$Rn, GPR:$Rm, shift_imm:$sh),
+               IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+               [(set GPR:$Rd, (or (and GPR:$Rn, 0xFFFF0000),
+                                  (and (sra GPR:$Rm, asr_amt:$sh),
+                                       0xFFFF)))]>,
+               Requires<[IsARM, HasV6]>;
+
+// Alternate cases for PKHTB where identities eliminate some nodes.  Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, imm16_31:$sh)),
+               (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm16_31:$sh))>;
+def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF0000),
+                   (and (srl GPR:$src2, imm1_15:$sh), 0xFFFF)),
+               (PKHTB GPR:$src1, GPR:$src2, (asr_shift_imm imm1_15:$sh))>;
+
+//===----------------------------------------------------------------------===//
+//  Comparison Instructions...
+//
+
+defm CMP  : AI1_cmp_irs<0b1010, "cmp",
+                        IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
+                        BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+
+// ARMcmpZ can re-use the above instruction definitions.
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm:$imm),
+             (CMPri   GPR:$src, so_imm:$imm)>;
+def : ARMPat<(ARMcmpZ GPR:$src, GPR:$rhs),
+             (CMPrr   GPR:$src, GPR:$rhs)>;
+def : ARMPat<(ARMcmpZ GPR:$src, so_reg:$rhs),
+             (CMPrs   GPR:$src, so_reg:$rhs)>;
+
+// FIXME: We have to be careful when using the CMN instruction and comparison
+// with 0. One would expect these two pieces of code should give identical
+// results:
+//
+//   rsbs r1, r1, 0
+//   cmp  r0, r1
+//   mov  r0, #0
+//   it   ls
+//   mov  r0, #1
+//
+// and:
+//
+//   cmn  r0, r1
+//   mov  r0, #0
+//   it   ls
+//   mov  r0, #1
+//
+// However, the CMN gives the *opposite* result when r1 is 0. This is because
+// the carry flag is set in the CMP case but not in the CMN case. In short, the
+// CMP instruction doesn't perform a truncate of the (logical) NOT of 0 plus the
+// value of r0 and the carry bit (because the "carry bit" parameter to
+// AddWithCarry is defined as 1 in this case, the carry flag will always be set
+// when r0 >= 0). The CMN instruction doesn't perform a NOT of 0 so there is
+// never a "carry" when this AddWithCarry is performed (because the "carry bit"
+// parameter to AddWithCarry is defined as 0).
+//
+// When x is 0 and unsigned:
+//
+//    x = 0
+//   ~x = 0xFFFF FFFF
+//   ~x + 1 = 0x1 0000 0000
+//   (-x = 0) != (0x1 0000 0000 = ~x + 1)
+//
+// Therefore, we should disable CMN when comparing against zero, until we can
+// limit when the CMN instruction is used (when we know that the RHS is not 0 or
+// when it's a comparison which doesn't look at the 'carry' flag).
+//
+// (See the ARM docs for the "AddWithCarry" pseudo-code.)
+//
+// This is related to <rdar://problem/7569620>.
+//
+//defm CMN  : AI1_cmp_irs<0b1011, "cmn",
+//                        BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+
+// Note that TST/TEQ don't set all the same flags that CMP does!
+defm TST  : AI1_cmp_irs<0b1000, "tst",
+                        IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+                      BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>, 1>;
+defm TEQ  : AI1_cmp_irs<0b1001, "teq",
+                        IIC_iTSTi, IIC_iTSTr, IIC_iTSTsr,
+                      BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>, 1>;
+
+defm CMNz  : AI1_cmp_irs<0b1011, "cmn",
+                         IIC_iCMPi, IIC_iCMPr, IIC_iCMPsr,
+                         BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
+
+//def : ARMPat<(ARMcmp GPR:$src, so_imm_neg:$imm),
+//             (CMNri  GPR:$src, so_imm_neg:$imm)>;
+
+def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
+             (CMNzri  GPR:$src, so_imm_neg:$imm)>;
+
+// Pseudo i64 compares for some floating point compares.
+let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
+    Defs = [CPSR] in {
+def BCCi64 : PseudoInst<(outs),
+    (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+     IIC_Br,
+    [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
+
+def BCCZi64 : PseudoInst<(outs),
+     (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br,
+    [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
+} // usesCustomInserter
+
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :(
+// FIXME: These should all be pseudo-instructions that get expanded to
+//        the normal MOV instructions. That would fix the dependency on
+//        special casing them in tblgen.
+let neverHasSideEffects = 1 in {
+def MOVCCr : AI1<0b1101, (outs GPR:$Rd), (ins GPR:$false, GPR:$Rm), DPFrm,
+                IIC_iCMOVr, "mov", "\t$Rd, $Rm",
+      [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+                RegConstraint<"$false = $Rd">, UnaryDP {
+  bits<4> Rd;
+  bits<4> Rm;
+  let Inst{25} = 0;
+  let Inst{20} = 0;
+  let Inst{15-12} = Rd;
+  let Inst{11-4} = 0b00000000;
+  let Inst{3-0} = Rm;
+}
+
+def MOVCCs : AI1<0b1101, (outs GPR:$Rd),
+                 (ins GPR:$false, so_reg:$shift), DPSoRegFrm, IIC_iCMOVsr,
+                "mov", "\t$Rd, $shift",
+   [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>,
+                RegConstraint<"$false = $Rd">, UnaryDP {
+  bits<4> Rd;
+  bits<12> shift;
+  let Inst{25} = 0;
+  let Inst{20} = 0;
+  let Inst{19-16} = 0;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = shift;
+}
+
+let isMoveImm = 1 in
+def MOVCCi16 : AI1<0b1000, (outs GPR:$Rd), (ins GPR:$false, i32imm_hilo16:$imm),
+                 DPFrm, IIC_iMOVi,
+                 "movw", "\t$Rd, $imm",
+                 []>,
+                 RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>,
+                 UnaryDP {
+  bits<4> Rd;
+  bits<16> imm;
+  let Inst{25} = 1;
+  let Inst{20} = 0;
+  let Inst{19-16} = imm{15-12};
+  let Inst{15-12} = Rd;
+  let Inst{11-0}  = imm{11-0};
+}
+
+let isMoveImm = 1 in
+def MOVCCi : AI1<0b1101, (outs GPR:$Rd),
+                         (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi,
+                "mov", "\t$Rd, $imm",
+   [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+                RegConstraint<"$false = $Rd">, UnaryDP {
+  bits<4> Rd;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{20} = 0;
+  let Inst{19-16} = 0b0000;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = imm;
+}
+
+// Two instruction predicate mov immediate.
+let isMoveImm = 1 in
+def MOVCCi32imm : PseudoInst<(outs GPR:$Rd),
+                             (ins GPR:$false, i32imm:$src, pred:$p),
+                  IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
+
+let isMoveImm = 1 in
+def MVNCCi : AI1<0b1111, (outs GPR:$Rd),
+                         (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi,
+                "mvn", "\t$Rd, $imm",
+ [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
+                RegConstraint<"$false = $Rd">, UnaryDP {
+  bits<4> Rd;
+  bits<12> imm;
+  let Inst{25} = 1;
+  let Inst{20} = 0;
+  let Inst{19-16} = 0b0000;
+  let Inst{15-12} = Rd;
+  let Inst{11-0} = imm;
+}
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+def memb_opt : Operand<i32> {
+  let PrintMethod = "printMemBOption";
+  let ParserMatchClass = MemBarrierOptOperand;
+}
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+                "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+                Requires<[IsARM, HasDB]> {
+  bits<4> opt;
+  let Inst{31-4} = 0xf57ff05;
+  let Inst{3-0} = opt;
+}
+
+def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary,
+                       "mcr", "\tp15, 0, $zero, c7, c10, 5",
+                       [(ARMMemBarrierMCR GPR:$zero)]>,
+                       Requires<[IsARM, HasV6]> {
+  // FIXME: add encoding
+}
+}
+
+def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+                "dsb", "\t$opt",
+                [/* For disassembly only; pattern left blank */]>,
+                Requires<[IsARM, HasDB]> {
+  bits<4> opt;
+  let Inst{31-4} = 0xf57ff04;
+  let Inst{3-0} = opt;
+}
+
+// ISB has only full system option -- for disassembly only
+def ISB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>,
+                Requires<[IsARM, HasDB]> {
+  let Inst{31-4} = 0xf57ff06;
+  let Inst{3-0} = 0b1111;
+}
+
+let usesCustomInserter = 1 in {
+  let Uses = [CPSR] in {
+    def ATOMIC_LOAD_ADD_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_SUB_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_AND_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_OR_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_XOR_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_NAND_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_ADD_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_SUB_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_AND_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_OR_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_XOR_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_NAND_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_ADD_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_SUB_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_AND_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_OR_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_XOR_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_NAND_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
+
+    def ATOMIC_SWAP_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+      [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>;
+    def ATOMIC_SWAP_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+      [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>;
+    def ATOMIC_SWAP_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+      [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>;
+
+    def ATOMIC_CMP_SWAP_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+      [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>;
+    def ATOMIC_CMP_SWAP_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+      [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>;
+    def ATOMIC_CMP_SWAP_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+      [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>;
+}
+}
+
+let mayLoad = 1 in {
+def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
+                    "ldrexb", "\t$Rt, [$Rn]",
+                    []>;
+def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
+                    "ldrexh", "\t$Rt, [$Rn]",
+                    []>;
+def LDREX  : AIldrex<0b00, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary,
+                    "ldrex", "\t$Rt, [$Rn]",
+                    []>;
+def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins GPR:$Rn),
+                    NoItinerary,
+                    "ldrexd", "\t$Rt, $Rt2, [$Rn]",
+                    []>;
+}
+
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$src, GPR:$Rn),
+                    NoItinerary,
+                    "strexb", "\t$Rd, $src, [$Rn]",
+                    []>;
+def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn),
+                    NoItinerary,
+                    "strexh", "\t$Rd, $Rt, [$Rn]",
+                    []>;
+def STREX  : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn),
+                    NoItinerary,
+                    "strex", "\t$Rd, $Rt, [$Rn]",
+                    []>;
+def STREXD : AIstrex<0b01, (outs GPR:$Rd),
+                    (ins GPR:$Rt, GPR:$Rt2, GPR:$Rn),
+                    NoItinerary,
+                    "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]",
+                    []>;
+}
+
+// Clear-Exclusive is for disassembly only.
+def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
+                [/* For disassembly only; pattern left blank */]>,
+            Requires<[IsARM, HasV7]>  {
+  let Inst{31-0} = 0b11110101011111111111000000011111;
+}
+
+// SWP/SWPB are deprecated in V6/V7 and for disassembly only.
+let mayLoad = 1 in {
+def SWP  : AIswp<0, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swp",
+             [/* For disassembly only; pattern left blank */]>;
+def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb",
+             [/* For disassembly only; pattern left blank */]>;
+}
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+// This is a pseudo inst so that we can get the encoding right, 
+// complete with fixup for the aeabi_read_tp function.
+let isCall = 1,
+  Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
+  def TPsoft : PseudoInst<(outs), (ins), IIC_Br,
+               [(set R0, ARMthread_pointer)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+//   eh_sjlj_setjmp() is an instruction sequence to store the return
+//   address and save #0 in R0 for the non-longjmp case.
+//   Since by its nature we may be coming from some other function to get
+//   here, and we're using the stack frame for the containing function to
+//   save/restore registers, we can't keep anything live in regs across
+//   the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+//   when we get here from a longjmp(). We force everthing out of registers
+//   except for our own input by listing the relevant registers in Defs. By
+//   doing so, we also cause the prologue/epilogue code to actively preserve
+//   all of the callee-saved resgisters, which is exactly what we want.
+//   A constant value is passed in $val, and we use the location as a scratch.
+//
+// These are pseudo-instructions and are lowered to individual MC-insts, so
+// no encoding information is necessary.
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR,  D0,
+    D1,  D2,  D3,  D4,  D5,  D6,  D7,  D8,  D9,  D10, D11, D12, D13, D14, D15,
+    D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
+    D31 ], hasSideEffects = 1, isBarrier = 1 in {
+  def Int_eh_sjlj_setjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+                               NoItinerary,
+                         [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+                           Requires<[IsARM, HasVFP2]>;
+}
+
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR ],
+  hasSideEffects = 1, isBarrier = 1 in {
+  def Int_eh_sjlj_setjmp_nofp : PseudoInst<(outs), (ins GPR:$src, GPR:$val),
+                                   NoItinerary,
+                         [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
+                                Requires<[IsARM, NoVFP]>;
+}
+
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+    Defs = [ R7, LR, SP ] in {
+def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch),
+                             NoItinerary,
+                         [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+                                Requires<[IsARM, IsDarwin]>;
+}
+
+// eh.sjlj.dispatchsetup pseudo-instruction.
+// This pseudo is used for ARM, Thumb1 and Thumb2. Any differences are
+// handled when the pseudo is expanded (which happens before any passes
+// that need the instruction size).
+let isBarrier = 1, hasSideEffects = 1 in
+def Int_eh_sjlj_dispatchsetup :
+ PseudoInst<(outs), (ins GPR:$src), NoItinerary,
+            [(ARMeh_sjlj_dispatchsetup GPR:$src)]>,
+              Requires<[IsDarwin]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// Large immediate handling.
+
+// 32-bit immediate using two piece so_imms or movw + movt.
+// This is a single pseudo instruction, the benefit is that it can be remat'd
+// as a single unit instead of having to handle reg inputs.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def MOVi32imm : PseudoInst<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+                           [(set GPR:$dst, (arm_i32imm:$src))]>,
+                           Requires<[IsARM]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if PIC).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def MOV_ga_pcrel : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+                              IIC_iMOVix2addpc,
+                        [(set GPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+                        Requires<[IsARM, UseMovt]>;
+
+def MOV_ga_dyn : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+                             IIC_iMOVix2,
+                        [(set GPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+                        Requires<[IsARM, UseMovt]>;
+
+let AddedComplexity = 10 in
+def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
+                                IIC_iMOVix2ld,
+                    [(set GPR:$dst, (load (ARMWrapperPIC tglobaladdr:$addr)))]>,
+                    Requires<[IsARM, UseMovt]>;
+} // isReMaterializable
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>,
+            Requires<[IsARM, DontUseMovt]>;
+def : ARMPat<(ARMWrapper  tconstpool  :$dst), (LEApcrel tconstpool  :$dst)>;
+def : ARMPat<(ARMWrapper  tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
+            Requires<[IsARM, UseMovt]>;
+def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+             (LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// TODO: add,sub,and, 3-instr forms?
+
+// Tail calls
+def : ARMPat<(ARMtcret tcGPR:$dst),
+          (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+          (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+          (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret tcGPR:$dst),
+          (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+          (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+          (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+// Direct calls
+def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>,
+      Requires<[IsARM, IsNotDarwin]>;
+def : ARMPat<(ARMcall texternalsym:$func), (BLr9 texternalsym:$func)>,
+      Requires<[IsARM, IsDarwin]>;
+
+// zextload i1 -> zextload i8
+def : ARMPat<(zextloadi1 addrmode_imm12:$addr), (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(zextloadi1 ldst_so_reg:$addr),    (LDRBrs ldst_so_reg:$addr)>;
+
+// extload -> zextload
+def : ARMPat<(extloadi1 addrmode_imm12:$addr),  (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi1 ldst_so_reg:$addr),     (LDRBrs ldst_so_reg:$addr)>;
+def : ARMPat<(extloadi8 addrmode_imm12:$addr),  (LDRBi12 addrmode_imm12:$addr)>;
+def : ARMPat<(extloadi8 ldst_so_reg:$addr),     (LDRBrs ldst_so_reg:$addr)>;
+
+def : ARMPat<(extloadi16 addrmode3:$addr),  (LDRH addrmode3:$addr)>;
+
+def : ARMPat<(extloadi8  addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
+def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
+
+// smul* and smla*
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+                      (sra (shl GPR:$b, (i32 16)), (i32 16))),
+                 (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
+                 (SMULBB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+                      (sra GPR:$b, (i32 16))),
+                 (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
+                 (SMULBT GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
+                      (sra (shl GPR:$b, (i32 16)), (i32 16))),
+                 (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
+                (SMULTB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
+                      (i32 16)),
+                 (SMULWB GPR:$a, GPR:$b)>;
+def : ARMV5TEPat<(sra (mul GPR:$a, sext_16_node:$b), (i32 16)),
+                 (SMULWB GPR:$a, GPR:$b)>;
+
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+                           (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+                 (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul sext_16_node:$a, sext_16_node:$b)),
+                 (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
+                           (sra GPR:$b, (i32 16)))),
+                 (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
+                 (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul (sra GPR:$a, (i32 16)),
+                           (sra (shl GPR:$b, (i32 16)), (i32 16)))),
+                 (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
+                 (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (sra (mul GPR:$a, (sra (shl GPR:$b, (i32 16)), (i32 16))),
+                           (i32 16))),
+                 (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+def : ARMV5TEPat<(add GPR:$acc,
+                      (sra (mul GPR:$a, sext_16_node:$b), (i32 16))),
+                 (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>;
+
+//===----------------------------------------------------------------------===//
+// Thumb Support
+//
+
+include "ARMInstrThumb.td"
+
+//===----------------------------------------------------------------------===//
+// Thumb2 Support
+//
+
+include "ARMInstrThumb2.td"
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//
+
+include "ARMInstrVFP.td"
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD (NEON) Support
+//
+
+include "ARMInstrNEON.td"
+
+//===----------------------------------------------------------------------===//
+// Coprocessor Instructions.  For disassembly only.
+//
+
+def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
+            c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+            NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+            [/* For disassembly only; pattern left blank */]> {
+  bits<4> opc1;
+  bits<4> CRn;
+  bits<4> CRd;
+  bits<4> cop;
+  bits<3> opc2;
+  bits<4> CRm;
+
+  let Inst{3-0}   = CRm;
+  let Inst{4}     = 0;
+  let Inst{7-5}   = opc2;
+  let Inst{11-8}  = cop;
+  let Inst{15-12} = CRd;
+  let Inst{19-16} = CRn;
+  let Inst{23-20} = opc1;
+}
+
+def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
+               c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+               NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+               [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  bits<4> opc1;
+  bits<4> CRn;
+  bits<4> CRd;
+  bits<4> cop;
+  bits<3> opc2;
+  bits<4> CRm;
+
+  let Inst{3-0}   = CRm;
+  let Inst{4}     = 0;
+  let Inst{7-5}   = opc2;
+  let Inst{11-8}  = cop;
+  let Inst{15-12} = CRd;
+  let Inst{19-16} = CRn;
+  let Inst{23-20} = opc1;
+}
+
+class ACI<dag oops, dag iops, string opc, string asm>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, NoItinerary,
+      opc, asm, "", [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-25} = 0b110;
+}
+
+multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
+
+  def _OFFSET : ACI<(outs),
+      (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+      opc, "\tp$cop, cr$CRd, $addr"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 1; // P = 1
+    let Inst{21} = 0; // W = 0
+    let Inst{22} = 0; // D = 0
+    let Inst{20} = load;
+  }
+
+  def _PRE : ACI<(outs),
+      (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+      opc, "\tp$cop, cr$CRd, $addr!"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 1; // P = 1
+    let Inst{21} = 1; // W = 1
+    let Inst{22} = 0; // D = 0
+    let Inst{20} = load;
+  }
+
+  def _POST : ACI<(outs),
+      (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset),
+      opc, "\tp$cop, cr$CRd, [$base], $offset"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 0; // P = 0
+    let Inst{21} = 1; // W = 1
+    let Inst{22} = 0; // D = 0
+    let Inst{20} = load;
+  }
+
+  def _OPTION : ACI<(outs),
+      (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, i32imm:$option),
+      opc, "\tp$cop, cr$CRd, [$base], $option"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 0; // P = 0
+    let Inst{23} = 1; // U = 1
+    let Inst{21} = 0; // W = 0
+    let Inst{22} = 0; // D = 0
+    let Inst{20} = load;
+  }
+
+  def L_OFFSET : ACI<(outs),
+      (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+      !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 1; // P = 1
+    let Inst{21} = 0; // W = 0
+    let Inst{22} = 1; // D = 1
+    let Inst{20} = load;
+  }
+
+  def L_PRE : ACI<(outs),
+      (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+      !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr!"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 1; // P = 1
+    let Inst{21} = 1; // W = 1
+    let Inst{22} = 1; // D = 1
+    let Inst{20} = load;
+  }
+
+  def L_POST : ACI<(outs),
+      (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset),
+      !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $offset"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 0; // P = 0
+    let Inst{21} = 1; // W = 1
+    let Inst{22} = 1; // D = 1
+    let Inst{20} = load;
+  }
+
+  def L_OPTION : ACI<(outs),
+      (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option),
+      !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $option"> {
+    let Inst{31-28} = op31_28;
+    let Inst{24} = 0; // P = 0
+    let Inst{23} = 1; // U = 1
+    let Inst{21} = 0; // W = 0
+    let Inst{22} = 1; // D = 1
+    let Inst{20} = load;
+  }
+}
+
+defm LDC  : LdStCop<{?,?,?,?}, 1, "ldc">;
+defm LDC2 : LdStCop<0b1111,    1, "ldc2">;
+defm STC  : LdStCop<{?,?,?,?}, 0, "stc">;
+defm STC2 : LdStCop<0b1111,    0, "stc2">;
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register -- for disassembly only
+//
+
+class MovRCopro<string opc, bit direction>
+  : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
+        GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+        NoItinerary, opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2",
+        [/* For disassembly only; pattern left blank */]> {
+  let Inst{20} = direction;
+  let Inst{4} = 1;
+
+  bits<4> Rt;
+  bits<4> cop;
+  bits<3> opc1;
+  bits<3> opc2;
+  bits<4> CRm;
+  bits<4> CRn;
+
+  let Inst{15-12} = Rt;
+  let Inst{11-8}  = cop;
+  let Inst{23-21} = opc1;
+  let Inst{7-5}   = opc2;
+  let Inst{3-0}   = CRm;
+  let Inst{19-16} = CRn;
+}
+
+def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>;
+def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>;
+
+class MovRCopro2<string opc, bit direction>
+  : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
+         GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+         NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+         [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{20} = direction;
+  let Inst{4} = 1;
+
+  bits<4> Rt;
+  bits<4> cop;
+  bits<3> opc1;
+  bits<3> opc2;
+  bits<4> CRm;
+  bits<4> CRn;
+
+  let Inst{15-12} = Rt;
+  let Inst{11-8}  = cop;
+  let Inst{23-21} = opc1;
+  let Inst{7-5}   = opc2;
+  let Inst{3-0}   = CRm;
+  let Inst{19-16} = CRn;
+}
+
+def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */>;
+def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */>;
+
+class MovRRCopro<string opc, bit direction>
+  : ABI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1,
+        GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+        NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm",
+        [/* For disassembly only; pattern left blank */]> {
+  let Inst{23-21} = 0b010;
+  let Inst{20} = direction;
+
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<4> cop;
+  bits<4> opc1;
+  bits<4> CRm;
+
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+  let Inst{11-8}  = cop;
+  let Inst{7-4}   = opc1;
+  let Inst{3-0}   = CRm;
+}
+
+def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */>;
+def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
+
+class MovRRCopro2<string opc, bit direction>
+  : ABXI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1,
+         GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+         NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"),
+         [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-28} = 0b1111;
+  let Inst{23-21} = 0b010;
+  let Inst{20} = direction;
+
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<4> cop;
+  bits<4> opc1;
+  bits<4> CRm;
+
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+  let Inst{11-8}  = cop;
+  let Inst{7-4}   = opc1;
+  let Inst{3-0}   = CRm;
+}
+
+def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */>;
+def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>;
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+
+// Move to ARM core register from Special Register
+def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr",
+              [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  let Inst{23-16} = 0b00001111;
+  let Inst{15-12} = Rd;
+  let Inst{7-4} = 0b0000;
+}
+
+def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr",
+              [/* For disassembly only; pattern left blank */]> {
+  bits<4> Rd;
+  let Inst{23-16} = 0b01001111;
+  let Inst{15-12} = Rd;
+  let Inst{7-4} = 0b0000;
+}
+
+// Move from ARM core register to Special Register
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary,
+              "msr", "\t$mask, $Rn",
+              [/* For disassembly only; pattern left blank */]> {
+  bits<5> mask;
+  bits<4> Rn;
+
+  let Inst{23} = 0;
+  let Inst{22} = mask{4}; // R bit
+  let Inst{21-20} = 0b10;
+  let Inst{19-16} = mask{3-0};
+  let Inst{15-12} = 0b1111;
+  let Inst{11-4} = 0b00000000;
+  let Inst{3-0} = Rn;
+}
+
+def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask,  so_imm:$a), NoItinerary,
+               "msr", "\t$mask, $a",
+               [/* For disassembly only; pattern left blank */]> {
+  bits<5> mask;
+  bits<12> a;
+
+  let Inst{23} = 0;
+  let Inst{22} = mask{4}; // R bit
+  let Inst{21-20} = 0b10;
+  let Inst{19-16} = mask{3-0};
+  let Inst{15-12} = 0b1111;
+  let Inst{11-0} = a;
+}
diff --git a/final/lib/Target/ARM/ARMInstrNEON.td b/final/lib/Target/ARM/ARMInstrNEON.td
new file mode 100644
index 00000000000..b979232d466
--- /dev/null
+++ b/final/lib/Target/ARM/ARMInstrNEON.td
@@ -0,0 +1,4852 @@
+//===- ARMInstrNEON.td - NEON support for ARM -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM NEON instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// NEON-specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTARMVCMP    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>;
+def SDTARMVCMPZ   : SDTypeProfile<1, 1, []>;
+
+def NEONvceq      : SDNode<"ARMISD::VCEQ", SDTARMVCMP>;
+def NEONvceqz     : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>;
+def NEONvcge      : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
+def NEONvcgez     : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
+def NEONvclez     : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
+def NEONvcgeu     : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
+def NEONvcgt      : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
+def NEONvcgtz     : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
+def NEONvcltz     : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
+def NEONvcgtu     : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
+def NEONvtst      : SDNode<"ARMISD::VTST", SDTARMVCMP>;
+
+// Types for vector shift by immediates.  The "SHX" version is for long and
+// narrow operations where the source and destination vectors have different
+// types.  The "SHINS" version is for shift and insert operations.
+def SDTARMVSH     : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+                                         SDTCisVT<2, i32>]>;
+def SDTARMVSHX    : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+                                         SDTCisVT<2, i32>]>;
+def SDTARMVSHINS  : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+
+def NEONvshl      : SDNode<"ARMISD::VSHL", SDTARMVSH>;
+def NEONvshrs     : SDNode<"ARMISD::VSHRs", SDTARMVSH>;
+def NEONvshru     : SDNode<"ARMISD::VSHRu", SDTARMVSH>;
+def NEONvshlls    : SDNode<"ARMISD::VSHLLs", SDTARMVSHX>;
+def NEONvshllu    : SDNode<"ARMISD::VSHLLu", SDTARMVSHX>;
+def NEONvshlli    : SDNode<"ARMISD::VSHLLi", SDTARMVSHX>;
+def NEONvshrn     : SDNode<"ARMISD::VSHRN", SDTARMVSHX>;
+
+def NEONvrshrs    : SDNode<"ARMISD::VRSHRs", SDTARMVSH>;
+def NEONvrshru    : SDNode<"ARMISD::VRSHRu", SDTARMVSH>;
+def NEONvrshrn    : SDNode<"ARMISD::VRSHRN", SDTARMVSHX>;
+
+def NEONvqshls    : SDNode<"ARMISD::VQSHLs", SDTARMVSH>;
+def NEONvqshlu    : SDNode<"ARMISD::VQSHLu", SDTARMVSH>;
+def NEONvqshlsu   : SDNode<"ARMISD::VQSHLsu", SDTARMVSH>;
+def NEONvqshrns   : SDNode<"ARMISD::VQSHRNs", SDTARMVSHX>;
+def NEONvqshrnu   : SDNode<"ARMISD::VQSHRNu", SDTARMVSHX>;
+def NEONvqshrnsu  : SDNode<"ARMISD::VQSHRNsu", SDTARMVSHX>;
+
+def NEONvqrshrns  : SDNode<"ARMISD::VQRSHRNs", SDTARMVSHX>;
+def NEONvqrshrnu  : SDNode<"ARMISD::VQRSHRNu", SDTARMVSHX>;
+def NEONvqrshrnsu : SDNode<"ARMISD::VQRSHRNsu", SDTARMVSHX>;
+
+def NEONvsli      : SDNode<"ARMISD::VSLI", SDTARMVSHINS>;
+def NEONvsri      : SDNode<"ARMISD::VSRI", SDTARMVSHINS>;
+
+def SDTARMVGETLN  : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
+                                         SDTCisVT<2, i32>]>;
+def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
+def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
+
+def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+def NEONvmovImm   : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
+def NEONvmvnImm   : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+
+def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+                                           SDTCisVT<2, i32>]>;
+def NEONvorrImm   : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>;
+def NEONvbicImm   : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>;
+
+def NEONvdup      : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
+
+// VDUPLANE can produce a quad-register result from a double-register source,
+// so the result is not constrained to match the source.
+def NEONvduplane  : SDNode<"ARMISD::VDUPLANE",
+                           SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+                                                SDTCisVT<2, i32>]>>;
+
+def SDTARMVEXT    : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+                                         SDTCisSameAs<0, 2>, SDTCisVT<3, i32>]>;
+def NEONvext      : SDNode<"ARMISD::VEXT", SDTARMVEXT>;
+
+def SDTARMVSHUF   : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
+def NEONvrev64    : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
+def NEONvrev32    : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
+def NEONvrev16    : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
+
+def SDTARMVSHUF2  : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
+                                         SDTCisSameAs<0, 2>,
+                                         SDTCisSameAs<0, 3>]>;
+def NEONzip       : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
+def NEONuzp       : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
+def NEONtrn       : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+
+def SDTARMVMULL   : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
+                                         SDTCisSameAs<1, 2>]>;
+def NEONvmulls    : SDNode<"ARMISD::VMULLs", SDTARMVMULL>;
+def NEONvmullu    : SDNode<"ARMISD::VMULLu", SDTARMVMULL>;
+
+def SDTARMFMAX    : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
+                                         SDTCisSameAs<0, 2>]>;
+def NEONfmax      : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
+def NEONfmin      : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
+
+def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+  unsigned EltBits = 0;
+  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+  return (EltBits == 32 && EltVal == 0);
+}]>;
+
+def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+  ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+  unsigned EltBits = 0;
+  uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+  return (EltBits == 8 && EltVal == 0xff);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// NEON operand definitions
+//===----------------------------------------------------------------------===//
+
+def nModImm : Operand<i32> {
+  let PrintMethod = "printNEONModImmOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// NEON load / store instructions
+//===----------------------------------------------------------------------===//
+
+// Use VLDM to load a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VLDMD after reg alloc.
+def VLDMQIA
+  : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
+                    IIC_fpLoad_m, "",
+                   [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
+def VLDMQDB
+  : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn),
+                    IIC_fpLoad_m, "",
+                   [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>;
+
+// Use VSTM to store a Q register as a D register pair.
+// This is a pseudo instruction that is expanded to VSTMD after reg alloc.
+def VSTMQIA
+  : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
+                    IIC_fpStore_m, "",
+                   [(store (v2f64 QPR:$src), GPR:$Rn)]>;
+def VSTMQDB
+  : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn),
+                    IIC_fpStore_m, "",
+                   [(store (v2f64 QPR:$src), GPR:$Rn)]>;
+
+// Classes for VLD* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset), itin,
+                "$addr.addr = $wb">;
+class VLDQQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">;
+class VLDQQWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset), itin,
+                "$addr.addr = $wb">;
+class VLDQQQQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src), itin,"">;
+class VLDQQQQWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
+                "$addr.addr = $wb, $src = $dst">;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+//   VLD1     : Vector Load (multiple single elements)
+class VLD1D<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd),
+          (ins addrmode6:$Rn), IIC_VLD1,
+          "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+class VLD1Q<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2),
+          (ins addrmode6:$Rn), IIC_VLD1x2,
+          "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+
+def  VLD1d8   : VLD1D<{0,0,0,?}, "8">;
+def  VLD1d16  : VLD1D<{0,1,0,?}, "16">;
+def  VLD1d32  : VLD1D<{1,0,0,?}, "32">;
+def  VLD1d64  : VLD1D<{1,1,0,?}, "64">;
+
+def  VLD1q8   : VLD1Q<{0,0,?,?}, "8">;
+def  VLD1q16  : VLD1Q<{0,1,?,?}, "16">;
+def  VLD1q32  : VLD1Q<{1,0,?,?}, "32">;
+def  VLD1q64  : VLD1Q<{1,1,?,?}, "64">;
+
+def  VLD1q8Pseudo  : VLDQPseudo<IIC_VLD1x2>;
+def  VLD1q16Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def  VLD1q32Pseudo : VLDQPseudo<IIC_VLD1x2>;
+def  VLD1q64Pseudo : VLDQPseudo<IIC_VLD1x2>;
+
+// ...with address register writeback:
+class VLD1DWB<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1u,
+          "vld1", Dt, "\\{$Vd\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+class VLD1QWB<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u,
+          "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VLD1d8_UPD  : VLD1DWB<{0,0,0,?}, "8">;
+def VLD1d16_UPD : VLD1DWB<{0,1,0,?}, "16">;
+def VLD1d32_UPD : VLD1DWB<{1,0,0,?}, "32">;
+def VLD1d64_UPD : VLD1DWB<{1,1,0,?}, "64">;
+
+def VLD1q8_UPD  : VLD1QWB<{0,0,?,?}, "8">;
+def VLD1q16_UPD : VLD1QWB<{0,1,?,?}, "16">;
+def VLD1q32_UPD : VLD1QWB<{1,0,?,?}, "32">;
+def VLD1q64_UPD : VLD1QWB<{1,1,?,?}, "64">;
+
+def VLD1q8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+def VLD1q64Pseudo_UPD : VLDQWBPseudo<IIC_VLD1x2u>;
+
+// ...with 3 registers (some of these are only for the disassembler):
+class VLD1D3<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt,
+          "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+class VLD1D3WB<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt,
+          "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD1d8T      : VLD1D3<{0,0,0,?}, "8">;
+def VLD1d16T     : VLD1D3<{0,1,0,?}, "16">;
+def VLD1d32T     : VLD1D3<{1,0,0,?}, "32">;
+def VLD1d64T     : VLD1D3<{1,1,0,?}, "64">;
+
+def VLD1d8T_UPD  : VLD1D3WB<{0,0,0,?}, "8">;
+def VLD1d16T_UPD : VLD1D3WB<{0,1,0,?}, "16">;
+def VLD1d32T_UPD : VLD1D3WB<{1,0,0,?}, "32">;
+def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">;
+
+def VLD1d64TPseudo     : VLDQQPseudo<IIC_VLD1x3>;
+def VLD1d64TPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x3u>;
+
+// ...with 4 registers (some of these are only for the disassembler):
+class VLD1D4<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt,
+          "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+class VLD1D4WB<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b10,0b0010,op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt,
+          "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb",
+          []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VLD1d8Q      : VLD1D4<{0,0,?,?}, "8">;
+def VLD1d16Q     : VLD1D4<{0,1,?,?}, "16">;
+def VLD1d32Q     : VLD1D4<{1,0,?,?}, "32">;
+def VLD1d64Q     : VLD1D4<{1,1,?,?}, "64">;
+
+def VLD1d8Q_UPD  : VLD1D4WB<{0,0,?,?}, "8">;
+def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">;
+def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">;
+def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">;
+
+def VLD1d64QPseudo     : VLDQQPseudo<IIC_VLD1x4>;
+def VLD1d64QPseudo_UPD : VLDQQWBPseudo<IIC_VLD1x4u>;
+
+//   VLD2     : Vector Load (multiple 2-element structures)
+class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+          (ins addrmode6:$Rn), IIC_VLD2,
+          "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+class VLD2Q<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, 0b0011, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$Rn), IIC_VLD2x2,
+          "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+
+def  VLD2d8   : VLD2D<0b1000, {0,0,?,?}, "8">;
+def  VLD2d16  : VLD2D<0b1000, {0,1,?,?}, "16">;
+def  VLD2d32  : VLD2D<0b1000, {1,0,?,?}, "32">;
+
+def  VLD2q8   : VLD2Q<{0,0,?,?}, "8">;
+def  VLD2q16  : VLD2Q<{0,1,?,?}, "16">;
+def  VLD2q32  : VLD2Q<{1,0,?,?}, "32">;
+
+def  VLD2d8Pseudo  : VLDQPseudo<IIC_VLD2>;
+def  VLD2d16Pseudo : VLDQPseudo<IIC_VLD2>;
+def  VLD2d32Pseudo : VLDQPseudo<IIC_VLD2>;
+
+def  VLD2q8Pseudo  : VLDQQPseudo<IIC_VLD2x2>;
+def  VLD2q16Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+def  VLD2q32Pseudo : VLDQQPseudo<IIC_VLD2x2>;
+
+// ...with address register writeback:
+class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u,
+          "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+class VLD2QWB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, 0b0011, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u,
+          "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VLD2d8_UPD  : VLD2DWB<0b1000, {0,0,?,?}, "8">;
+def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">;
+def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">;
+
+def VLD2q8_UPD  : VLD2QWB<{0,0,?,?}, "8">;
+def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">;
+def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">;
+
+def VLD2d8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+def VLD2d32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2u>;
+
+def VLD2q8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+def VLD2q32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD2x2u>;
+
+// ...with double-spaced registers (for disassembly only):
+def VLD2b8      : VLD2D<0b1001, {0,0,?,?}, "8">;
+def VLD2b16     : VLD2D<0b1001, {0,1,?,?}, "16">;
+def VLD2b32     : VLD2D<0b1001, {1,0,?,?}, "32">;
+def VLD2b8_UPD  : VLD2DWB<0b1001, {0,0,?,?}, "8">;
+def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">;
+def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">;
+
+//   VLD3     : Vector Load (multiple 3-element structures)
+class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$Rn), IIC_VLD3,
+          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def  VLD3d8   : VLD3D<0b0100, {0,0,0,?}, "8">;
+def  VLD3d16  : VLD3D<0b0100, {0,1,0,?}, "16">;
+def  VLD3d32  : VLD3D<0b0100, {1,0,0,?}, "32">;
+
+def  VLD3d8Pseudo  : VLDQQPseudo<IIC_VLD3>;
+def  VLD3d16Pseudo : VLDQQPseudo<IIC_VLD3>;
+def  VLD3d32Pseudo : VLDQQPseudo<IIC_VLD3>;
+
+// ...with address register writeback:
+class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u,
+          "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD3d8_UPD  : VLD3DWB<0b0100, {0,0,0,?}, "8">;
+def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">;
+def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VLD3d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+def VLD3d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3u>;
+
+// ...with double-spaced registers:
+def VLD3q8      : VLD3D<0b0101, {0,0,0,?}, "8">;
+def VLD3q16     : VLD3D<0b0101, {0,1,0,?}, "16">;
+def VLD3q32     : VLD3D<0b0101, {1,0,0,?}, "32">;
+def VLD3q8_UPD  : VLD3DWB<0b0101, {0,0,0,?}, "8">;
+def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">;
+def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VLD3q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD3q8oddPseudo   : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q16oddPseudo  : VLDQQQQPseudo<IIC_VLD3>;
+def VLD3q32oddPseudo  : VLDQQQQPseudo<IIC_VLD3>;
+
+def VLD3q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD3u>;
+
+//   VLD4     : Vector Load (multiple 4-element structures)
+class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$Rn), IIC_VLD4,
+          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+
+def  VLD4d8   : VLD4D<0b0000, {0,0,?,?}, "8">;
+def  VLD4d16  : VLD4D<0b0000, {0,1,?,?}, "16">;
+def  VLD4d32  : VLD4D<0b0000, {1,0,?,?}, "32">;
+
+def  VLD4d8Pseudo  : VLDQQPseudo<IIC_VLD4>;
+def  VLD4d16Pseudo : VLDQQPseudo<IIC_VLD4>;
+def  VLD4d32Pseudo : VLDQQPseudo<IIC_VLD4>;
+
+// ...with address register writeback:
+class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b10, op11_8, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u,
+          "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VLD4d8_UPD  : VLD4DWB<0b0000, {0,0,?,?}, "8">;
+def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">;
+def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VLD4d8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+def VLD4d32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4u>;
+
+// ...with double-spaced registers:
+def VLD4q8      : VLD4D<0b0001, {0,0,?,?}, "8">;
+def VLD4q16     : VLD4D<0b0001, {0,1,?,?}, "16">;
+def VLD4q32     : VLD4D<0b0001, {1,0,?,?}, "32">;
+def VLD4q8_UPD  : VLD4DWB<0b0001, {0,0,?,?}, "8">;
+def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">;
+def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VLD4q8Pseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VLD4q8oddPseudo   : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q16oddPseudo  : VLDQQQQPseudo<IIC_VLD4>;
+def VLD4q32oddPseudo  : VLDQQQQPseudo<IIC_VLD4>;
+
+def VLD4q8oddPseudo_UPD  : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo<IIC_VLD4u>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// Classes for VLD*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VLDQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QPR:$dst),
+                (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+                itin, "$src = $dst">;
+class VLDQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQPR:$dst),
+                (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+                itin, "$src = $dst">;
+class VLDQQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+class VLDQQQQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQQQPR:$dst),
+                (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+                itin, "$src = $dst">;
+class VLDQQQQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb, $src = $dst">;
+
+//   VLD1LN   : Vector Load (single element to one lane)
+class VLD1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+             PatFrag LoadOp>
+  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd),
+          (ins addrmode6:$Rn, DPR:$src, nohash_imm:$lane),
+          IIC_VLD1ln, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn",
+          "$src = $Vd",
+          [(set DPR:$Vd, (vector_insert (Ty DPR:$src),
+                                         (i32 (LoadOp addrmode6:$Rn)),
+                                         imm:$lane))]> {
+  let Rm = 0b1111;
+}
+class VLD1QLNPseudo<ValueType Ty, PatFrag LoadOp> : VLDQLNPseudo<IIC_VLD1ln> {
+  let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src),
+                                               (i32 (LoadOp addrmode6:$addr)),
+                                               imm:$lane))];
+}
+
+def VLD1LNd8  : VLD1LN<0b0000, {?,?,?,0}, "8", v8i8, extloadi8> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16 : VLD1LN<0b0100, {?,?,0,?}, "16", v4i16, extloadi16> {
+  let Inst{7-6} = lane{1-0};
+  let Inst{4}   = Rn{4};
+}
+def VLD1LNd32 : VLD1LN<0b1000, {?,0,?,?}, "32", v2i32, load> {
+  let Inst{7} = lane{0};
+  let Inst{5} = Rn{4};
+  let Inst{4} = Rn{4};
+}
+
+def VLD1LNq8Pseudo  : VLD1QLNPseudo<v16i8, extloadi8>;
+def VLD1LNq16Pseudo : VLD1QLNPseudo<v8i16, extloadi16>;
+def VLD1LNq32Pseudo : VLD1QLNPseudo<v4i32, load>;
+
+def : Pat<(vector_insert (v2f32 DPR:$src),
+                         (f32 (load addrmode6:$addr)), imm:$lane),
+          (VLD1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(vector_insert (v4f32 QPR:$src),
+                         (f32 (load addrmode6:$addr)), imm:$lane),
+          (VLD1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+// ...with address register writeback:
+class VLD1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt,
+          "\\{$Vd[$lane]\\}, $Rn$Rm",
+          "$src = $Vd, $Rn.addr = $wb", []>;
+
+def VLD1LNd8_UPD  : VLD1LNWB<0b0000, {?,?,?,0}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD1LNd16_UPD : VLD1LNWB<0b0100, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+  let Inst{4}   = Rn{4};
+}
+def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> {
+  let Inst{7} = lane{0};
+  let Inst{5} = Rn{4};
+  let Inst{4} = Rn{4};
+}
+
+def VLD1LNq8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD1lnu>;
+
+//   VLD2LN   : Vector Load (single 2-element structure to one lane)
+class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2),
+          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+          IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn",
+          "$src1 = $Vd, $src2 = $dst2", []> {
+  let Rm = 0b1111;
+  let Inst{4}   = Rn{4};
+}
+
+def VLD2LNd8  : VLD2LN<0b0001, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> {
+  let Inst{7} = lane{0};
+}
+
+def VLD2LNd8Pseudo  : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd16Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+def VLD2LNd32Pseudo : VLDQLNPseudo<IIC_VLD2ln>;
+
+// ...with double-spaced registers:
+def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> {
+  let Inst{7} = lane{0};
+}
+
+def VLD2LNq16Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+def VLD2LNq32Pseudo : VLDQQLNPseudo<IIC_VLD2ln>;
+
+// ...with address register writeback:
+class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2lnu, "vld2", Dt,
+          "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn$Rm",
+          "$src1 = $Vd, $src2 = $dst2, $Rn.addr = $wb", []> {
+  let Inst{4}   = Rn{4};
+}
+
+def VLD2LNd8_UPD  : VLD2LNWB<0b0001, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> {
+  let Inst{7} = lane{0};
+}
+
+def VLD2LNd8Pseudo_UPD  : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo<IIC_VLD2lnu>;
+
+def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> {
+  let Inst{7} = lane{0};
+}
+
+def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD2lnu>;
+
+//   VLD3LN   : Vector Load (single 3-element structure to one lane)
+class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3,
+          nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt,
+          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn",
+          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> {
+  let Rm = 0b1111;
+}
+
+def VLD3LNd8  : VLD3LN<0b0010, {?,?,?,0}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VLD3LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd16Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNd32Pseudo : VLDQQLNPseudo<IIC_VLD3ln>;
+
+// ...with double-spaced registers:
+def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VLD3LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+def VLD3LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD3ln>;
+
+// ...with address register writeback:
+class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b10, op11_8, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+          IIC_VLD3lnu, "vld3", Dt,
+          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm",
+          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb",
+          []>;
+
+def VLD3LNd8_UPD  : VLD3LNWB<0b0010, {?,?,?,0}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VLD3LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD3lnu>;
+
+def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD3lnu>;
+
+//   VLD4LN   : Vector Load (single 4-element structure to one lane)
+class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b10, op11_8, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+          nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt,
+          "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn",
+          "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> {
+  let Rm = 0b1111;
+  let Inst{4}   = Rn{4};
+}
+
+def VLD4LNd8  : VLD4LN<0b0011, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo  : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd16Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNd32Pseudo : VLDQQLNPseudo<IIC_VLD4ln>;
+
+// ...with double-spaced registers:
+def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VLD4LNq16Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+def VLD4LNq32Pseudo : VLDQQQQLNPseudo<IIC_VLD4ln>;
+
+// ...with address register writeback:
+class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b10, op11_8, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+          IIC_VLD4lnu, "vld4", Dt,
+"\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn$Rm",
+"$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $Rn.addr = $wb",
+          []> {
+  let Inst{4}   = Rn{4};
+}
+
+def VLD4LNd8_UPD  : VLD4LNWB<0b0011, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VLD4LNd8Pseudo_UPD  : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo<IIC_VLD4lnu>;
+
+def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo<IIC_VLD4lnu>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+//   VLD1DUP  : Vector Load (single element to all lanes)
+class VLD1DUP<bits<4> op7_4, string Dt, ValueType Ty, PatFrag LoadOp>
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd), (ins addrmode6dup:$Rn),
+          IIC_VLD1dup, "vld1", Dt, "\\{$Vd[]\\}, $Rn", "",
+          [(set DPR:$Vd, (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$Rn)))))]> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+class VLD1QDUPPseudo<ValueType Ty, PatFrag LoadOp> : VLDQPseudo<IIC_VLD1dup> {
+  let Pattern = [(set QPR:$dst,
+                      (Ty (NEONvdup (i32 (LoadOp addrmode6dup:$addr)))))];
+}
+
+def VLD1DUPd8  : VLD1DUP<{0,0,0,?}, "8", v8i8, extloadi8>;
+def VLD1DUPd16 : VLD1DUP<{0,1,0,?}, "16", v4i16, extloadi16>;
+def VLD1DUPd32 : VLD1DUP<{1,0,0,?}, "32", v2i32, load>;
+
+def VLD1DUPq8Pseudo  : VLD1QDUPPseudo<v16i8, extloadi8>;
+def VLD1DUPq16Pseudo : VLD1QDUPPseudo<v8i16, extloadi16>;
+def VLD1DUPq32Pseudo : VLD1QDUPPseudo<v4i32, load>;
+
+def : Pat<(v2f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+          (VLD1DUPd32 addrmode6:$addr)>;
+def : Pat<(v4f32 (NEONvdup (f32 (load addrmode6dup:$addr)))),
+          (VLD1DUPq32Pseudo addrmode6:$addr)>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+
+class VLD1QDUP<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2),
+          (ins addrmode6dup:$Rn), IIC_VLD1dup,
+          "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def VLD1DUPq8  : VLD1QDUP<{0,0,1,0}, "8">;
+def VLD1DUPq16 : VLD1QDUP<{0,1,1,?}, "16">;
+def VLD1DUPq32 : VLD1QDUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD1DUPWB<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, GPR:$wb),
+          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
+          "vld1", Dt, "\\{$Vd[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+class VLD1QDUPWB<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1100, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD1dupu,
+          "vld1", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD1DUPd8_UPD  : VLD1DUPWB<{0,0,0,0}, "8">;
+def VLD1DUPd16_UPD : VLD1DUPWB<{0,1,0,?}, "16">;
+def VLD1DUPd32_UPD : VLD1DUPWB<{1,0,0,?}, "32">;
+
+def VLD1DUPq8_UPD  : VLD1QDUPWB<{0,0,1,0}, "8">;
+def VLD1DUPq16_UPD : VLD1QDUPWB<{0,1,1,?}, "16">;
+def VLD1DUPq32_UPD : VLD1QDUPWB<{1,0,1,?}, "32">;
+
+def VLD1DUPq8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq16Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+def VLD1DUPq32Pseudo_UPD : VLDQWBPseudo<IIC_VLD1dupu>;
+
+//   VLD2DUP  : Vector Load (single 2-element structure to all lanes)
+class VLD2DUP<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2),
+          (ins addrmode6dup:$Rn), IIC_VLD2dup,
+          "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def VLD2DUPd8  : VLD2DUP<{0,0,0,?}, "8">;
+def VLD2DUPd16 : VLD2DUP<{0,1,0,?}, "16">;
+def VLD2DUPd32 : VLD2DUP<{1,0,0,?}, "32">;
+
+def VLD2DUPd8Pseudo  : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd16Pseudo : VLDQPseudo<IIC_VLD2dup>;
+def VLD2DUPd32Pseudo : VLDQPseudo<IIC_VLD2dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD2DUPd8x2  : VLD2DUP<{0,0,1,?}, "8">;
+def VLD2DUPd16x2 : VLD2DUP<{0,1,1,?}, "16">;
+def VLD2DUPd32x2 : VLD2DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD2DUPWB<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1101, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb),
+          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD2dupu,
+          "vld2", Dt, "\\{$Vd[], $dst2[]\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD2DUPd8_UPD  : VLD2DUPWB<{0,0,0,0}, "8">;
+def VLD2DUPd16_UPD : VLD2DUPWB<{0,1,0,?}, "16">;
+def VLD2DUPd32_UPD : VLD2DUPWB<{1,0,0,?}, "32">;
+
+def VLD2DUPd8x2_UPD  : VLD2DUPWB<{0,0,1,0}, "8">;
+def VLD2DUPd16x2_UPD : VLD2DUPWB<{0,1,1,?}, "16">;
+def VLD2DUPd32x2_UPD : VLD2DUPWB<{1,0,1,?}, "32">;
+
+def VLD2DUPd8Pseudo_UPD  : VLDQWBPseudo<IIC_VLD2dupu>;
+def VLD2DUPd16Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+def VLD2DUPd32Pseudo_UPD : VLDQWBPseudo<IIC_VLD2dupu>;
+
+//   VLD3DUP  : Vector Load (single 3-element structure to all lanes)
+class VLD3DUP<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3),
+          (ins addrmode6dup:$Rn), IIC_VLD3dup,
+          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def VLD3DUPd8  : VLD3DUP<{0,0,0,?}, "8">;
+def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">;
+def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">;
+
+def VLD3DUPd8Pseudo  : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd16Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+def VLD3DUPd32Pseudo : VLDQQPseudo<IIC_VLD3dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD3DUPd8x2  : VLD3DUP<{0,0,1,?}, "8">;
+def VLD3DUPd16x2 : VLD3DUP<{0,1,1,?}, "16">;
+def VLD3DUPd32x2 : VLD3DUP<{1,0,1,?}, "32">;
+
+// ...with address register writeback:
+class VLD3DUPWB<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb),
+          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD3dupu,
+          "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD3DUPd8_UPD  : VLD3DUPWB<{0,0,0,0}, "8">;
+def VLD3DUPd16_UPD : VLD3DUPWB<{0,1,0,?}, "16">;
+def VLD3DUPd32_UPD : VLD3DUPWB<{1,0,0,?}, "32">;
+
+def VLD3DUPd8x2_UPD  : VLD3DUPWB<{0,0,1,0}, "8">;
+def VLD3DUPd16x2_UPD : VLD3DUPWB<{0,1,1,?}, "16">;
+def VLD3DUPd32x2_UPD : VLD3DUPWB<{1,0,1,?}, "32">;
+
+def VLD3DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD3dupu>;
+
+//   VLD4DUP  : Vector Load (single 4-element structure to all lanes)
+class VLD4DUP<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1111, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+          (ins addrmode6dup:$Rn), IIC_VLD4dup,
+          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def VLD4DUPd8  : VLD4DUP<{0,0,0,?}, "8">;
+def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">;
+def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo  : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd16Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+def VLD4DUPd32Pseudo : VLDQQPseudo<IIC_VLD4dup>;
+
+// ...with double-spaced registers (not used for codegen):
+def VLD4DUPd8x2  : VLD4DUP<{0,0,1,?}, "8">;
+def VLD4DUPd16x2 : VLD4DUP<{0,1,1,?}, "16">;
+def VLD4DUPd32x2 : VLD4DUP<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+// ...with address register writeback:
+class VLD4DUPWB<bits<4> op7_4, string Dt>
+  : NLdSt<1, 0b10, 0b1111, op7_4,
+          (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
+          (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu,
+          "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VLD4DUPd8_UPD  : VLD4DUPWB<{0,0,0,0}, "8">;
+def VLD4DUPd16_UPD : VLD4DUPWB<{0,1,0,?}, "16">;
+def VLD4DUPd32_UPD : VLD4DUPWB<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8x2_UPD  : VLD4DUPWB<{0,0,1,0}, "8">;
+def VLD4DUPd16x2_UPD : VLD4DUPWB<{0,1,1,?}, "16">;
+def VLD4DUPd32x2_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; }
+
+def VLD4DUPd8Pseudo_UPD  : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo<IIC_VLD4dupu>;
+
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+
+// Classes for VST* pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src), itin, "">;
+class VSTQWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin,
+                "$addr.addr = $wb">;
+class VSTQQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">;
+class VSTQQWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src), itin,
+                "$addr.addr = $wb">;
+class VSTQQQQPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src), itin, "">;
+class VSTQQQQWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
+                "$addr.addr = $wb">;
+
+//   VST1     : Vector Store (multiple single elements)
+class VST1D<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd),
+          IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+class VST1Q<bits<4> op7_4, string Dt>
+  : NLdSt<0,0b00,0b1010,op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2,
+          "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+
+def  VST1d8   : VST1D<{0,0,0,?}, "8">;
+def  VST1d16  : VST1D<{0,1,0,?}, "16">;
+def  VST1d32  : VST1D<{1,0,0,?}, "32">;
+def  VST1d64  : VST1D<{1,1,0,?}, "64">;
+
+def  VST1q8   : VST1Q<{0,0,?,?}, "8">;
+def  VST1q16  : VST1Q<{0,1,?,?}, "16">;
+def  VST1q32  : VST1Q<{1,0,?,?}, "32">;
+def  VST1q64  : VST1Q<{1,1,?,?}, "64">;
+
+def  VST1q8Pseudo  : VSTQPseudo<IIC_VST1x2>;
+def  VST1q16Pseudo : VSTQPseudo<IIC_VST1x2>;
+def  VST1q32Pseudo : VSTQPseudo<IIC_VST1x2>;
+def  VST1q64Pseudo : VSTQPseudo<IIC_VST1x2>;
+
+// ...with address register writeback:
+class VST1DWB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u,
+          "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+class VST1QWB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
+          IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VST1d8_UPD  : VST1DWB<{0,0,0,?}, "8">;
+def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">;
+def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">;
+def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">;
+
+def VST1q8_UPD  : VST1QWB<{0,0,?,?}, "8">;
+def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">;
+def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">;
+def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">;
+
+def VST1q8Pseudo_UPD  : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q16Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q32Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+def VST1q64Pseudo_UPD : VSTQWBPseudo<IIC_VST1x2u>;
+
+// ...with 3 registers (some of these are only for the disassembler):
+class VST1D3<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3),
+          IIC_VST1x3, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+class VST1D3WB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3),
+          IIC_VST1x3u, "vst1", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VST1d8T      : VST1D3<{0,0,0,?}, "8">;
+def VST1d16T     : VST1D3<{0,1,0,?}, "16">;
+def VST1d32T     : VST1D3<{1,0,0,?}, "32">;
+def VST1d64T     : VST1D3<{1,1,0,?}, "64">;
+
+def VST1d8T_UPD  : VST1D3WB<{0,0,0,?}, "8">;
+def VST1d16T_UPD : VST1D3WB<{0,1,0,?}, "16">;
+def VST1d32T_UPD : VST1D3WB<{1,0,0,?}, "32">;
+def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">;
+
+def VST1d64TPseudo     : VSTQQPseudo<IIC_VST1x3>;
+def VST1d64TPseudo_UPD : VSTQQWBPseudo<IIC_VST1x3u>;
+
+// ...with 4 registers (some of these are only for the disassembler):
+class VST1D4<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST1x4, "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", "",
+          []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+class VST1D4WB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST1x4u,
+          "vst1", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VST1d8Q      : VST1D4<{0,0,?,?}, "8">;
+def VST1d16Q     : VST1D4<{0,1,?,?}, "16">;
+def VST1d32Q     : VST1D4<{1,0,?,?}, "32">;
+def VST1d64Q     : VST1D4<{1,1,?,?}, "64">;
+
+def VST1d8Q_UPD  : VST1D4WB<{0,0,?,?}, "8">;
+def VST1d16Q_UPD : VST1D4WB<{0,1,?,?}, "16">;
+def VST1d32Q_UPD : VST1D4WB<{1,0,?,?}, "32">;
+def VST1d64Q_UPD : VST1D4WB<{1,1,?,?}, "64">;
+
+def VST1d64QPseudo     : VSTQQPseudo<IIC_VST1x4>;
+def VST1d64QPseudo_UPD : VSTQQWBPseudo<IIC_VST1x4u>;
+
+//   VST2     : Vector Store (multiple 2-element structures)
+class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2),
+          IIC_VST2, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+class VST2Q<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0011, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST2x2, "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
+          "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+
+def  VST2d8   : VST2D<0b1000, {0,0,?,?}, "8">;
+def  VST2d16  : VST2D<0b1000, {0,1,?,?}, "16">;
+def  VST2d32  : VST2D<0b1000, {1,0,?,?}, "32">;
+
+def  VST2q8   : VST2Q<{0,0,?,?}, "8">;
+def  VST2q16  : VST2Q<{0,1,?,?}, "16">;
+def  VST2q32  : VST2Q<{1,0,?,?}, "32">;
+
+def  VST2d8Pseudo  : VSTQPseudo<IIC_VST2>;
+def  VST2d16Pseudo : VSTQPseudo<IIC_VST2>;
+def  VST2d32Pseudo : VSTQPseudo<IIC_VST2>;
+
+def  VST2q8Pseudo  : VSTQQPseudo<IIC_VST2x2>;
+def  VST2q16Pseudo : VSTQQPseudo<IIC_VST2x2>;
+def  VST2q32Pseudo : VSTQQPseudo<IIC_VST2x2>;
+
+// ...with address register writeback:
+class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2),
+          IIC_VST2u, "vst2", Dt, "\\{$Vd, $src2\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+class VST2QWB<bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST2x2u,
+          "vst2", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VST2d8_UPD  : VST2DWB<0b1000, {0,0,?,?}, "8">;
+def VST2d16_UPD : VST2DWB<0b1000, {0,1,?,?}, "16">;
+def VST2d32_UPD : VST2DWB<0b1000, {1,0,?,?}, "32">;
+
+def VST2q8_UPD  : VST2QWB<{0,0,?,?}, "8">;
+def VST2q16_UPD : VST2QWB<{0,1,?,?}, "16">;
+def VST2q32_UPD : VST2QWB<{1,0,?,?}, "32">;
+
+def VST2d8Pseudo_UPD  : VSTQWBPseudo<IIC_VST2u>;
+def VST2d16Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+def VST2d32Pseudo_UPD : VSTQWBPseudo<IIC_VST2u>;
+
+def VST2q8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q16Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+def VST2q32Pseudo_UPD : VSTQQWBPseudo<IIC_VST2x2u>;
+
+// ...with double-spaced registers (for disassembly only):
+def VST2b8      : VST2D<0b1001, {0,0,?,?}, "8">;
+def VST2b16     : VST2D<0b1001, {0,1,?,?}, "16">;
+def VST2b32     : VST2D<0b1001, {1,0,?,?}, "32">;
+def VST2b8_UPD  : VST2DWB<0b1001, {0,0,?,?}, "8">;
+def VST2b16_UPD : VST2DWB<0b1001, {0,1,?,?}, "16">;
+def VST2b32_UPD : VST2DWB<0b1001, {1,0,?,?}, "32">;
+
+//   VST3     : Vector Store (multiple 3-element structures)
+class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3,
+          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def  VST3d8   : VST3D<0b0100, {0,0,0,?}, "8">;
+def  VST3d16  : VST3D<0b0100, {0,1,0,?}, "16">;
+def  VST3d32  : VST3D<0b0100, {1,0,0,?}, "32">;
+
+def  VST3d8Pseudo  : VSTQQPseudo<IIC_VST3>;
+def  VST3d16Pseudo : VSTQQPseudo<IIC_VST3>;
+def  VST3d32Pseudo : VSTQQPseudo<IIC_VST3>;
+
+// ...with address register writeback:
+class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u,
+          "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VST3d8_UPD  : VST3DWB<0b0100, {0,0,0,?}, "8">;
+def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">;
+def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">;
+
+def VST3d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+def VST3d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST3u>;
+
+// ...with double-spaced registers:
+def VST3q8      : VST3D<0b0101, {0,0,0,?}, "8">;
+def VST3q16     : VST3D<0b0101, {0,1,0,?}, "16">;
+def VST3q32     : VST3D<0b0101, {1,0,0,?}, "32">;
+def VST3q8_UPD  : VST3DWB<0b0101, {0,0,0,?}, "8">;
+def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">;
+def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">;
+
+def VST3q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST3q8oddPseudo   : VSTQQQQPseudo<IIC_VST3>;
+def VST3q16oddPseudo  : VSTQQQQPseudo<IIC_VST3>;
+def VST3q32oddPseudo  : VSTQQQQPseudo<IIC_VST3>;
+
+def VST3q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST3u>;
+
+//   VST4     : Vector Store (multiple 4-element structures)
+class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4),
+          IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn",
+          "", []> {
+  let Rm = 0b1111;
+  let Inst{5-4} = Rn{5-4};
+}
+
+def  VST4d8   : VST4D<0b0000, {0,0,?,?}, "8">;
+def  VST4d16  : VST4D<0b0000, {0,1,?,?}, "16">;
+def  VST4d32  : VST4D<0b0000, {1,0,?,?}, "32">;
+
+def  VST4d8Pseudo  : VSTQQPseudo<IIC_VST4>;
+def  VST4d16Pseudo : VSTQQPseudo<IIC_VST4>;
+def  VST4d32Pseudo : VSTQQPseudo<IIC_VST4>;
+
+// ...with address register writeback:
+class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u,
+           "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VST4d8_UPD  : VST4DWB<0b0000, {0,0,?,?}, "8">;
+def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">;
+def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">;
+
+def VST4d8Pseudo_UPD  : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d16Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+def VST4d32Pseudo_UPD : VSTQQWBPseudo<IIC_VST4u>;
+
+// ...with double-spaced registers:
+def VST4q8      : VST4D<0b0001, {0,0,?,?}, "8">;
+def VST4q16     : VST4D<0b0001, {0,1,?,?}, "16">;
+def VST4q32     : VST4D<0b0001, {1,0,?,?}, "32">;
+def VST4q8_UPD  : VST4DWB<0b0001, {0,0,?,?}, "8">;
+def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">;
+def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">;
+
+def VST4q8Pseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32Pseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+// ...alternate versions to be allocated odd register numbers:
+def VST4q8oddPseudo   : VSTQQQQPseudo<IIC_VST4>;
+def VST4q16oddPseudo  : VSTQQQQPseudo<IIC_VST4>;
+def VST4q32oddPseudo  : VSTQQQQPseudo<IIC_VST4>;
+
+def VST4q8oddPseudo_UPD  : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo<IIC_VST4u>;
+
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+// Classes for VST*LN pseudo-instructions with multi-register operands.
+// These are expanded to real instructions after register allocation.
+class VSTQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QPR:$src, nohash_imm:$lane),
+                itin, "">;
+class VSTQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src, nohash_imm:$lane),
+                itin, "">;
+class VSTQQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
+class VSTQQQQLNPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQQQPR:$src, nohash_imm:$lane),
+                itin, "">;
+class VSTQQQQLNWBPseudo<InstrItinClass itin>
+  : PseudoNLdSt<(outs GPR:$wb),
+                (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src,
+                 nohash_imm:$lane), itin, "$addr.addr = $wb">;
+
+//   VST1LN   : Vector Store (single element from one lane)
+class VST1LN<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+             PatFrag StoreOp, SDNode ExtractOp>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, nohash_imm:$lane),
+          IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
+          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6:$Rn)]> {
+  let Rm = 0b1111;
+}
+class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+  : VSTQLNPseudo<IIC_VST1ln> {
+  let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+                          addrmode6:$addr)];
+}
+
+def VST1LNd8  : VST1LN<0b0000, {?,?,?,0}, "8", v8i8, truncstorei8,
+                       NEONvgetlaneu> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16 : VST1LN<0b0100, {?,?,0,?}, "16", v4i16, truncstorei16,
+                       NEONvgetlaneu> {
+  let Inst{7-6} = lane{1-0};
+  let Inst{4}   = Rn{5};
+}
+def VST1LNd32 : VST1LN<0b1000, {?,0,?,?}, "32", v2i32, store, extractelt> {
+  let Inst{7}   = lane{0};
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo  : VST1QLNPseudo<v16i8, truncstorei8, NEONvgetlaneu>;
+def VST1LNq16Pseudo : VST1QLNPseudo<v8i16, truncstorei16, NEONvgetlaneu>;
+def VST1LNq32Pseudo : VST1QLNPseudo<v4i32, store, extractelt>;
+
+def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr),
+          (VST1LNd32 addrmode6:$addr, DPR:$src, imm:$lane)>;
+def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr),
+          (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>;
+
+// ...with address register writeback:
+class VST1LNWB<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
+               PatFrag StoreOp, SDNode ExtractOp>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt,
+          "\\{$Vd[$lane]\\}, $Rn$Rm",
+          "$Rn.addr = $wb",
+          [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane),
+                                  addrmode6:$Rn, am6offset:$Rm))]>;
+class VST1QLNWBPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
+  : VSTQLNWBPseudo<IIC_VST1lnu> {
+  let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane),
+                                        addrmode6:$addr, am6offset:$offset))];
+}
+
+def VST1LNd8_UPD  : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8,
+                             NEONvgetlaneu> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16,
+                             NEONvgetlaneu> {
+  let Inst{7-6} = lane{1-0};
+  let Inst{4}   = Rn{5};
+}
+def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store,
+                             extractelt> {
+  let Inst{7}   = lane{0};
+  let Inst{5-4} = Rn{5-4};
+}
+
+def VST1LNq8Pseudo_UPD  : VST1QLNWBPseudo<v16i8, post_truncsti8, NEONvgetlaneu>;
+def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo<v8i16, post_truncsti16,NEONvgetlaneu>;
+def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo<v4i32, post_store, extractelt>;
+
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
+
+//   VST2LN   : Vector Store (single 2-element structure from one lane)
+class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane),
+          IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn",
+          "", []> {
+  let Rm = 0b1111;
+  let Inst{4}   = Rn{4};
+}
+
+def VST2LNd8  : VST2LN<0b0001, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST2LNd8Pseudo  : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd16Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+def VST2LNd32Pseudo : VSTQLNPseudo<IIC_VST2ln>;
+
+// ...with double-spaced registers:
+def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+  let Inst{4}   = Rn{4};
+}
+def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{4}   = Rn{4};
+}
+
+def VST2LNq16Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+def VST2LNq32Pseudo : VSTQQLNPseudo<IIC_VST2ln>;
+
+// ...with address register writeback:
+class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$addr, am6offset:$offset,
+           DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST2lnu, "vst2", Dt,
+          "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
+          "$addr.addr = $wb", []> {
+  let Inst{4}   = Rn{4};
+}
+
+def VST2LNd8_UPD  : VST2LNWB<0b0001, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST2LNd8Pseudo_UPD  : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo<IIC_VST2lnu>;
+
+def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST2lnu>;
+
+//   VST3LN   : Vector Store (single 3-element structure from one lane)
+class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3,
+           nohash_imm:$lane), IIC_VST3ln, "vst3", Dt,
+          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> {
+  let Rm = 0b1111;
+}
+
+def VST3LNd8  : VST3LN<0b0010, {?,?,?,0}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST3LNd8Pseudo  : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd16Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+def VST3LNd32Pseudo : VSTQQLNPseudo<IIC_VST3ln>;
+
+// ...with double-spaced registers:
+def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST3LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+def VST3LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST3ln>;
+
+// ...with address register writeback:
+class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane),
+          IIC_VST3lnu, "vst3", Dt,
+          "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []>;
+
+def VST3LNd8_UPD  : VST3LNWB<0b0010, {?,?,?,0}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST3LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST3lnu>;
+
+def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> {
+  let Inst{7}   = lane{0};
+}
+
+def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST3lnu>;
+
+//   VST4LN   : Vector Store (single 4-element structure from one lane)
+class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
+          (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4,
+           nohash_imm:$lane), IIC_VST4ln, "vst4", Dt,
+          "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn",
+          "", []> {
+  let Rm = 0b1111;
+  let Inst{4} = Rn{4};
+}
+
+def VST4LNd8  : VST4LN<0b0011, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VST4LNd8Pseudo  : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd16Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+def VST4LNd32Pseudo : VSTQQLNPseudo<IIC_VST4ln>;
+
+// ...with double-spaced registers:
+def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VST4LNq16Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+def VST4LNq32Pseudo : VSTQQQQLNPseudo<IIC_VST4ln>;
+
+// ...with address register writeback:
+class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+  : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
+          (ins addrmode6:$Rn, am6offset:$Rm,
+           DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
+          IIC_VST4lnu, "vst4", Dt,
+  "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn$Rm",
+          "$Rn.addr = $wb", []> {
+  let Inst{4} = Rn{4};
+}
+
+def VST4LNd8_UPD  : VST4LNWB<0b0011, {?,?,?,?}, "8"> {
+  let Inst{7-5} = lane{2-0};
+}
+def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VST4LNd8Pseudo_UPD  : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo<IIC_VST4lnu>;
+
+def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> {
+  let Inst{7-6} = lane{1-0};
+}
+def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> {
+  let Inst{7}   = lane{0};
+  let Inst{5} = Rn{5};
+}
+
+def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo<IIC_VST4lnu>;
+
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
+
+
+//===----------------------------------------------------------------------===//
+// NEON pattern fragments
+//===----------------------------------------------------------------------===//
+
+// Extract D sub-registers of Q registers.
+def DSubReg_i8_reg  : SDNodeXForm<imm, [{
+  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
+}]>;
+def DSubReg_i16_reg : SDNodeXForm<imm, [{
+  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
+}]>;
+def DSubReg_i32_reg : SDNodeXForm<imm, [{
+  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
+}]>;
+def DSubReg_f64_reg : SDNodeXForm<imm, [{
+  assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
+}]>;
+
+// Extract S sub-registers of Q/D registers.
+def SSubReg_f32_reg : SDNodeXForm<imm, [{
+  assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
+  return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
+}]>;
+
+// Translate lane numbers from Q registers to D subregs.
+def SubReg_i8_lane  : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() & 7, MVT::i32);
+}]>;
+def SubReg_i16_lane : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() & 3, MVT::i32);
+}]>;
+def SubReg_i32_lane : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue() & 1, MVT::i32);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Classes
+//===----------------------------------------------------------------------===//
+
+// Basic 2-register operations: double- and quad-register.
+class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+        (ins DPR:$Vm), IIC_VUNAD, OpcodeStr, Dt,"$Vd, $Vm", "",
+        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm))))]>;
+class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+           bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+           string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+        (ins QPR:$Vm), IIC_VUNAQ, OpcodeStr, Dt,"$Vd, $Vm", "",
+        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm))))]>;
+
+// Basic 2-register intrinsics, both double- and quad-register.
+class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+              bits<2> op17_16, bits<5> op11_7, bit op4,
+              InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+              bits<2> op17_16, bits<5> op11_7, bit op4,
+              InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Narrow 2-register operations.
+class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+           InstrItinClass itin, string OpcodeStr, string Dt,
+           ValueType TyD, ValueType TyQ, SDNode OpNode>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (TyD (OpNode (TyQ QPR:$Vm))))]>;
+
+// Narrow 2-register intrinsics.
+class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+              InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType TyD, ValueType TyQ, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$Vd),
+        (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vm))))]>;
+
+// Long 2-register operations (currently only used for VMOVL).
+class N2VL<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+           bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+           InstrItinClass itin, string OpcodeStr, string Dt,
+           ValueType TyQ, ValueType TyD, SDNode OpNode>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vm))))]>;
+
+// Long 2-register intrinsics.
+class N2VLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+              bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
+              InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs QPR:$Vd),
+        (ins DPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vm))))]>;
+
+// 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register.
+class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
+  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$Vd, DPR:$Vm),
+        (ins DPR:$src1, DPR:$src2), IIC_VPERMD,
+        OpcodeStr, Dt, "$Vd, $Vm",
+        "$src1 = $Vd, $src2 = $Vm", []>;
+class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
+                  InstrItinClass itin, string OpcodeStr, string Dt>
+  : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$Vd, QPR:$Vm),
+        (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$Vd, $Vm",
+        "$src1 = $Vd, $src2 = $Vm", []>;
+
+// Basic 3-register operations: double- and quad-register.
+class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+           InstrItinClass itin, string OpcodeStr, string Dt,
+           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+  let isCommutable = Commutable;
+}
+// Same as N3VD but no data type.
+class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+           InstrItinClass itin, string OpcodeStr,
+           ValueType ResTy, ValueType OpTy,
+           SDNode OpNode, bit Commutable>
+  : N3VX<op24, op23, op21_20, op11_8, 0, op4,
+         (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+         OpcodeStr, "$Vd, $Vn, $Vm", "",
+         [(set DPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>{
+  let isCommutable = Commutable;
+}
+
+class N3VDSL<bits<2> op21_20, bits<4> op11_8,
+             InstrItinClass itin, string OpcodeStr, string Dt,
+             ValueType Ty, SDNode ShOp>
+  : N3V<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (Ty DPR:$Vd),
+              (Ty (ShOp (Ty DPR:$Vn),
+                        (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
+               string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+  : N3V<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+        [(set (Ty DPR:$Vd),
+              (Ty (ShOp (Ty DPR:$Vn),
+                        (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+
+class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+           InstrItinClass itin, string OpcodeStr, string Dt,
+           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 1, op4,
+        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+  let isCommutable = Commutable;
+}
+class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+           InstrItinClass itin, string OpcodeStr,
+           ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
+  : N3VX<op24, op23, op21_20, op11_8, 1, op4,
+         (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+         OpcodeStr, "$Vd, $Vn, $Vm", "",
+         [(set QPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>{
+  let isCommutable = Commutable;
+}
+class N3VQSL<bits<2> op21_20, bits<4> op11_8,
+             InstrItinClass itin, string OpcodeStr, string Dt,
+             ValueType ResTy, ValueType OpTy, SDNode ShOp>
+  : N3V<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (ShOp (ResTy QPR:$Vn),
+                           (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+                                                imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
+               ValueType ResTy, ValueType OpTy, SDNode ShOp>
+  : N3V<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (ShOp (ResTy QPR:$Vn),
+                           (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+                                                imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+
+// Basic 3-register intrinsics, both double- and quad-register.
+class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR:$Vm), f, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]> {
+  let isCommutable = Commutable;
+}
+class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
+  : N3V<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (Ty DPR:$Vd),
+              (Ty (IntOp (Ty DPR:$Vn),
+                         (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+                                           imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                  string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
+  : N3V<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (Ty DPR:$Vd),
+              (Ty (IntOp (Ty DPR:$Vn),
+                         (Ty (NEONvduplane (Ty DPR_8:$Vm), imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VDIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR:$Vd), (ins DPR:$Vm, DPR:$Vn), f, itin,
+        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (OpTy DPR:$Vn))))]> {
+  let isCommutable = 0;
+}
+
+class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 1, op4,
+        (outs QPR:$Vd), (ins QPR:$Vn, QPR:$Vm), f, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]> {
+  let isCommutable = Commutable;
+}
+class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                string OpcodeStr, string Dt,
+                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (IntOp (ResTy QPR:$Vn),
+                            (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+                                                 imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                  string OpcodeStr, string Dt,
+                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (IntOp (ResTy QPR:$Vn),
+                            (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+                                                 imm:$lane)))))]> {
+  let isCommutable = 0;
+}
+class N3VQIntSh<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+              Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<op24, op23, op21_20, op11_8, 1, op4,
+        (outs QPR:$Vd), (ins QPR:$Vm, QPR:$Vn), f, itin,
+        OpcodeStr, Dt, "$Vd, $Vm, $Vn", "",
+        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (OpTy QPR:$Vn))))]> {
+  let isCommutable = 0;
+}
+
+// Multiply-Add/Sub operations: double- and quad-register.
+class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+                InstrItinClass itin, string OpcodeStr, string Dt,
+                ValueType Ty, SDPatternOperator MulOp, SDPatternOperator OpNode>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+                             (Ty (MulOp DPR:$Vn, DPR:$Vm)))))]>;
+
+class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                  string OpcodeStr, string Dt,
+                  ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp>
+  : N3V<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$Vd),
+        (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (Ty DPR:$Vd),
+              (Ty (ShOp (Ty DPR:$src1),
+                        (Ty (MulOp DPR:$Vn,
+                                   (Ty (NEONvduplane (Ty DPR_VFP2:$Vm),
+                                                     imm:$lane)))))))]>;
+class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                    string OpcodeStr, string Dt,
+                    ValueType Ty, SDNode MulOp, SDNode ShOp>
+  : N3V<0, 1, op21_20, op11_8, 1, 0,
+        (outs DPR:$Vd),
+        (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (Ty DPR:$Vd),
+              (Ty (ShOp (Ty DPR:$src1),
+                        (Ty (MulOp DPR:$Vn,
+                                   (Ty (NEONvduplane (Ty DPR_8:$Vm),
+                                                     imm:$lane)))))))]>;
+
+class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+                InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
+                SDPatternOperator MulOp, SDPatternOperator OpNode>
+  : N3V<op24, op23, op21_20, op11_8, 1, op4,
+        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+                             (Ty (MulOp QPR:$Vn, QPR:$Vm)))))]>;
+class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                  string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+                  SDPatternOperator MulOp, SDPatternOperator ShOp>
+  : N3V<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd),
+        (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (ShOp (ResTy QPR:$src1),
+                           (ResTy (MulOp QPR:$Vn,
+                                   (ResTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+                                                        imm:$lane)))))))]>;
+class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                    string OpcodeStr, string Dt,
+                    ValueType ResTy, ValueType OpTy,
+                    SDNode MulOp, SDNode ShOp>
+  : N3V<1, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd),
+        (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (ShOp (ResTy QPR:$src1),
+                           (ResTy (MulOp QPR:$Vn,
+                                   (ResTy (NEONvduplane (OpTy DPR_8:$Vm),
+                                                        imm:$lane)))))))]>;
+
+// Neon Intrinsic-Op instructions (VABA): double- and quad-register.
+class N3VDIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+                InstrItinClass itin, string OpcodeStr, string Dt,
+                ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set DPR:$Vd, (Ty (OpNode DPR:$src1,
+                             (Ty (IntOp (Ty DPR:$Vn), (Ty DPR:$Vm))))))]>;
+class N3VQIntOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+                InstrItinClass itin, string OpcodeStr, string Dt,
+                ValueType Ty, Intrinsic IntOp, SDNode OpNode>
+  : N3V<op24, op23, op21_20, op11_8, 1, op4,
+        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (Ty (OpNode QPR:$src1,
+                             (Ty (IntOp (Ty QPR:$Vn), (Ty QPR:$Vm))))))]>;
+
+// Neon 3-argument intrinsics, both double- and quad-register.
+// The destination register is also used as the first source operand register.
+class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+               InstrItinClass itin, string OpcodeStr, string Dt,
+               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$src1),
+                                      (OpTy DPR:$Vn), (OpTy DPR:$Vm))))]>;
+class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+               InstrItinClass itin, string OpcodeStr, string Dt,
+               ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<op24, op23, op21_20, op11_8, 1, op4,
+        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$src1),
+                                      (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>;
+
+// Long Multiply-Add/Sub operations.
+class N3VLMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+                InstrItinClass itin, string OpcodeStr, string Dt,
+                ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+                                (TyQ (MulOp (TyD DPR:$Vn),
+                                            (TyD DPR:$Vm)))))]>;
+class N3VLMulOpSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+                  InstrItinClass itin, string OpcodeStr, string Dt,
+                  ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+  : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set QPR:$Vd,
+          (OpNode (TyQ QPR:$src1),
+                  (TyQ (MulOp (TyD DPR:$Vn),
+                              (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),
+                                                 imm:$lane))))))]>;
+class N3VLMulOpSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+                    InstrItinClass itin, string OpcodeStr, string Dt,
+                    ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode>
+  : N3V<op24, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd),
+        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set QPR:$Vd,
+          (OpNode (TyQ QPR:$src1),
+                  (TyQ (MulOp (TyD DPR:$Vn),
+                              (TyD (NEONvduplane (TyD DPR_8:$Vm),
+                                                 imm:$lane))))))]>;
+
+// Long Intrinsic-Op vector operations with explicit extend (VABAL).
+class N3VLIntExtOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+                   InstrItinClass itin, string OpcodeStr, string Dt,
+                   ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+                   SDNode OpNode>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (OpNode (TyQ QPR:$src1),
+                                (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+                                                        (TyD DPR:$Vm)))))))]>;
+
+// Neon Long 3-argument intrinsic.  The destination register is
+// a quad-register and is also used as the first source operand register.
+class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+               InstrItinClass itin, string OpcodeStr, string Dt,
+               ValueType TyQ, ValueType TyD, Intrinsic IntOp>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs QPR:$Vd), (ins QPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd,
+          (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$Vn), (TyD DPR:$Vm))))]>;
+class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                 string OpcodeStr, string Dt,
+                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<op24, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd),
+        (ins QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (IntOp (ResTy QPR:$src1),
+                            (OpTy DPR:$Vn),
+                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+                                                imm:$lane)))))]>;
+class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+                   InstrItinClass itin, string OpcodeStr, string Dt,
+                   ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<op24, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd),
+        (ins QPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "$src1 = $Vd",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (IntOp (ResTy QPR:$src1),
+                            (OpTy DPR:$Vn),
+                            (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+                                                imm:$lane)))))]>;
+
+// Narrowing 3-register intrinsics.
+class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+              string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
+              Intrinsic IntOp, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs DPR:$Vd), (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINi4D,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set DPR:$Vd, (TyD (IntOp (TyQ QPR:$Vn), (TyQ QPR:$Vm))))]> {
+  let isCommutable = Commutable;
+}
+
+// Long 3-register operations.
+class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+           InstrItinClass itin, string OpcodeStr, string Dt,
+           ValueType TyQ, ValueType TyD, SDNode OpNode, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
+  let isCommutable = Commutable;
+}
+class N3VLSL<bit op24, bits<2> op21_20, bits<4> op11_8,
+             InstrItinClass itin, string OpcodeStr, string Dt,
+             ValueType TyQ, ValueType TyD, SDNode OpNode>
+  : N3V<op24, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set QPR:$Vd,
+          (TyQ (OpNode (TyD DPR:$Vn),
+                       (TyD (NEONvduplane (TyD DPR_VFP2:$Vm),imm:$lane)))))]>;
+class N3VLSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+               InstrItinClass itin, string OpcodeStr, string Dt,
+               ValueType TyQ, ValueType TyD, SDNode OpNode>
+  : N3V<op24, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set QPR:$Vd,
+          (TyQ (OpNode (TyD DPR:$Vn),
+                       (TyD (NEONvduplane (TyD DPR_8:$Vm), imm:$lane)))))]>;
+
+// Long 3-register operations with explicitly extended operands.
+class N3VLExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+              InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp,
+              bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (OpNode (TyQ (ExtOp (TyD DPR:$Vn))),
+                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
+  let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics with explicit extend (VABDL).
+class N3VLIntExt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+                 InstrItinClass itin, string OpcodeStr, string Dt,
+                 ValueType TyQ, ValueType TyD, Intrinsic IntOp, SDNode ExtOp,
+                 bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (TyQ (ExtOp (TyD (IntOp (TyD DPR:$Vn),
+                                                (TyD DPR:$Vm))))))]> {
+  let isCommutable = Commutable;
+}
+
+// Long 3-register intrinsics.
+class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+              InstrItinClass itin, string OpcodeStr, string Dt,
+              ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR:$Vm), N3RegFrm, itin,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> {
+  let isCommutable = Commutable;
+}
+class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
+                string OpcodeStr, string Dt,
+                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<op24, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (IntOp (OpTy DPR:$Vn),
+                            (OpTy (NEONvduplane (OpTy DPR_VFP2:$Vm),
+                                                imm:$lane)))))]>;
+class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+                  InstrItinClass itin, string OpcodeStr, string Dt,
+                  ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N3V<op24, 1, op21_20, op11_8, 1, 0,
+        (outs QPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane),
+        NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "",
+        [(set (ResTy QPR:$Vd),
+              (ResTy (IntOp (OpTy DPR:$Vn),
+                            (OpTy (NEONvduplane (OpTy DPR_8:$Vm),
+                                                imm:$lane)))))]>;
+
+// Wide 3-register operations.
+class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+           string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
+           SDNode OpNode, SDNode ExtOp, bit Commutable>
+  : N3V<op24, op23, op21_20, op11_8, 0, op4,
+        (outs QPR:$Vd), (ins QPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VSUBiD,
+        OpcodeStr, Dt, "$Vd, $Vn, $Vm", "",
+        [(set QPR:$Vd, (OpNode (TyQ QPR:$Vn),
+                                (TyQ (ExtOp (TyD DPR:$Vm)))))]> {
+  let isCommutable = Commutable;
+}
+
+// Pairwise long 2-register intrinsics, both double- and quad-register.
+class N2VDPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+                bits<2> op17_16, bits<5> op11_7, bit op4,
+                string OpcodeStr, string Dt,
+                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$Vd),
+        (ins DPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>;
+class N2VQPLInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+                bits<2> op17_16, bits<5> op11_7, bit op4,
+                string OpcodeStr, string Dt,
+                ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$Vd),
+        (ins QPR:$Vm), IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>;
+
+// Pairwise long 2-register accumulate intrinsics,
+// both double- and quad-register.
+// The destination register is also used as the first source operand register.
+class N2VDPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+                 bits<2> op17_16, bits<5> op11_7, bit op4,
+                 string OpcodeStr, string Dt,
+                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
+        (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vm), IIC_VPALiD,
+        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+        [(set DPR:$Vd, (ResTy (IntOp (ResTy DPR:$src1), (OpTy DPR:$Vm))))]>;
+class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+                 bits<2> op17_16, bits<5> op11_7, bit op4,
+                 string OpcodeStr, string Dt,
+                 ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
+  : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4,
+        (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vm), IIC_VPALiQ,
+        OpcodeStr, Dt, "$Vd, $Vm", "$src1 = $Vd",
+        [(set QPR:$Vd, (ResTy (IntOp (ResTy QPR:$src1), (OpTy QPR:$Vm))))]>;
+
+// Shift by immediate,
+// both double- and quad-register.
+class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+             Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+             ValueType Ty, SDNode OpNode>
+  : N2VImm<op24, op23, op11_8, op7, 0, op4,
+           (outs DPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), f, itin,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set DPR:$Vd, (Ty (OpNode (Ty DPR:$Vm), (i32 imm:$SIMM))))]>;
+class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+             Format f, InstrItinClass itin, string OpcodeStr, string Dt,
+             ValueType Ty, SDNode OpNode>
+  : N2VImm<op24, op23, op11_8, op7, 1, op4,
+           (outs QPR:$Vd), (ins QPR:$Vm, i32imm:$SIMM), f, itin,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set QPR:$Vd, (Ty (OpNode (Ty QPR:$Vm), (i32 imm:$SIMM))))]>;
+
+// Long shift by immediate.
+class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+             string OpcodeStr, string Dt,
+             ValueType ResTy, ValueType OpTy, SDNode OpNode>
+  : N2VImm<op24, op23, op11_8, op7, op6, op4,
+           (outs QPR:$Vd), (ins DPR:$Vm, i32imm:$SIMM), N2RegVShLFrm,
+           IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set QPR:$Vd, (ResTy (OpNode (OpTy DPR:$Vm),
+                                          (i32 imm:$SIMM))))]>;
+
+// Narrow shift by immediate.
+class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
+             InstrItinClass itin, string OpcodeStr, string Dt,
+             ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
+  : N2VImm<op24, op23, op11_8, op7, op6, op4,
+           (outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
+                                          (i32 imm:$SIMM))))]>;
+
+// Shift right by immediate and accumulate,
+// both double- and quad-register.
+class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+                string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+           (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+           [(set DPR:$Vd, (Ty (add DPR:$src1,
+                                (Ty (ShOp DPR:$Vm, (i32 imm:$SIMM))))))]>;
+class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+                string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+           (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+           [(set QPR:$Vd, (Ty (add QPR:$src1,
+                                (Ty (ShOp QPR:$Vm, (i32 imm:$SIMM))))))]>;
+
+// Shift by immediate and insert,
+// both double- and quad-register.
+class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+                Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
+  : N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$Vd),
+           (ins DPR:$src1, DPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiD,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+           [(set DPR:$Vd, (Ty (ShOp DPR:$src1, DPR:$Vm, (i32 imm:$SIMM))))]>;
+class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+                Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
+  : N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$Vd),
+           (ins QPR:$src1, QPR:$Vm, i32imm:$SIMM), f, IIC_VSHLiQ,
+           OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "$src1 = $Vd",
+           [(set QPR:$Vd, (Ty (ShOp QPR:$src1, QPR:$Vm, (i32 imm:$SIMM))))]>;
+
+// Convert, with fractional bits immediate,
+// both double- and quad-register.
+class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+              Intrinsic IntOp>
+  : N2VImm<op24, op23, op11_8, op7, 0, op4,
+           (outs DPR:$Vd), (ins DPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+           IIC_VUNAD, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm), (i32 imm:$SIMM))))]>;
+class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
+              string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+              Intrinsic IntOp>
+  : N2VImm<op24, op23, op11_8, op7, 1, op4,
+           (outs QPR:$Vd), (ins QPR:$Vm, neon_vcvt_imm32:$SIMM), NVCVTFrm,
+           IIC_VUNAQ, OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
+           [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm), (i32 imm:$SIMM))))]>;
+
+//===----------------------------------------------------------------------===//
+// Multiclasses
+//===----------------------------------------------------------------------===//
+
+// Abbreviations used in multiclass suffixes:
+//   Q = quarter int (8 bit) elements
+//   H = half int (16 bit) elements
+//   S = single int (32 bit) elements
+//   D = double int (64 bit) elements
+
+// Neon 2-register vector operations and intrinsics.
+
+// Neon 2-register comparisons.
+//   source operand element sizes of 8, 16 and 32 bits:
+multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                       bits<5> op11_7, bit op4, string opc, string Dt,
+                       string asm, SDNode OpNode> {
+  // 64-bit vector types.
+  def v8i8  : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
+                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "8"), asm, "",
+                  [(set DPR:$Vd, (v8i8 (OpNode (v8i8 DPR:$Vm))))]>;
+  def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "16"), asm, "",
+                  [(set DPR:$Vd, (v4i16 (OpNode (v4i16 DPR:$Vm))))]>;
+  def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "32"), asm, "",
+                  [(set DPR:$Vd, (v2i32 (OpNode (v2i32 DPR:$Vm))))]>;
+  def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+                  (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                  opc, "f32", asm, "",
+                  [(set DPR:$Vd, (v2i32 (OpNode (v2f32 DPR:$Vm))))]> {
+    let Inst{10} = 1; // overwrite F = 1
+  }
+
+  // 128-bit vector types.
+  def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
+                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "8"), asm, "",
+                  [(set QPR:$Vd, (v16i8 (OpNode (v16i8 QPR:$Vm))))]>;
+  def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "16"), asm, "",
+                  [(set QPR:$Vd, (v8i16 (OpNode (v8i16 QPR:$Vm))))]>;
+  def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                  opc, !strconcat(Dt, "32"), asm, "",
+                  [(set QPR:$Vd, (v4i32 (OpNode (v4i32 QPR:$Vm))))]>;
+  def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+                  (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                  opc, "f32", asm, "",
+                  [(set QPR:$Vd, (v4i32 (OpNode (v4f32 QPR:$Vm))))]> {
+    let Inst{10} = 1; // overwrite F = 1
+  }
+}
+
+
+// Neon 2-register vector intrinsics,
+//   element sizes of 8, 16 and 32 bits:
+multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                      bits<5> op11_7, bit op4,
+                      InstrItinClass itinD, InstrItinClass itinQ,
+                      string OpcodeStr, string Dt, Intrinsic IntOp> {
+  // 64-bit vector types.
+  def v8i8  : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+                      itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+  def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+                      itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
+  def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+                      itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
+
+  // 128-bit vector types.
+  def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+                      itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
+  def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+                      itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
+  def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+                      itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
+}
+
+
+// Neon Narrowing 2-register vector operations,
+//   source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                    bits<5> op11_7, bit op6, bit op4,
+                    InstrItinClass itin, string OpcodeStr, string Dt,
+                    SDNode OpNode> {
+  def v8i8  : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+                   itin, OpcodeStr, !strconcat(Dt, "16"),
+                   v8i8, v8i16, OpNode>;
+  def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+                   itin, OpcodeStr, !strconcat(Dt, "32"),
+                   v4i16, v4i32, OpNode>;
+  def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+                   itin, OpcodeStr, !strconcat(Dt, "64"),
+                   v2i32, v2i64, OpNode>;
+}
+
+// Neon Narrowing 2-register vector intrinsics,
+//   source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                       bits<5> op11_7, bit op6, bit op4,
+                       InstrItinClass itin, string OpcodeStr, string Dt,
+                       Intrinsic IntOp> {
+  def v8i8  : N2VNInt<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
+                      itin, OpcodeStr, !strconcat(Dt, "16"),
+                      v8i8, v8i16, IntOp>;
+  def v4i16 : N2VNInt<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
+                      itin, OpcodeStr, !strconcat(Dt, "32"),
+                      v4i16, v4i32, IntOp>;
+  def v2i32 : N2VNInt<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
+                      itin, OpcodeStr, !strconcat(Dt, "64"),
+                      v2i32, v2i64, IntOp>;
+}
+
+
+// Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL).
+//   source operand element sizes of 16, 32 and 64 bits:
+multiclass N2VL_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
+                    string OpcodeStr, string Dt, SDNode OpNode> {
+  def v8i16 : N2VL<op24_23, 0b00, 0b10, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+                   OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode>;
+  def v4i32 : N2VL<op24_23, 0b01, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+                   OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+  def v2i64 : N2VL<op24_23, 0b10, 0b00, 0b00, op11_7, op6, op4, IIC_VQUNAiD,
+                   OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+
+// Neon 3-register vector operations.
+
+// First with only element sizes of 8, 16 and 32 bits:
+multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                   InstrItinClass itinD16, InstrItinClass itinD32,
+                   InstrItinClass itinQ16, InstrItinClass itinQ32,
+                   string OpcodeStr, string Dt,
+                   SDNode OpNode, bit Commutable = 0> {
+  // 64-bit vector types.
+  def v8i8  : N3VD<op24, op23, 0b00, op11_8, op4, itinD16,
+                   OpcodeStr, !strconcat(Dt, "8"),
+                   v8i8, v8i8, OpNode, Commutable>;
+  def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
+                   OpcodeStr, !strconcat(Dt, "16"),
+                   v4i16, v4i16, OpNode, Commutable>;
+  def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
+                   OpcodeStr, !strconcat(Dt, "32"),
+                   v2i32, v2i32, OpNode, Commutable>;
+
+  // 128-bit vector types.
+  def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
+                   OpcodeStr, !strconcat(Dt, "8"),
+                   v16i8, v16i8, OpNode, Commutable>;
+  def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
+                   OpcodeStr, !strconcat(Dt, "16"),
+                   v8i16, v8i16, OpNode, Commutable>;
+  def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
+                   OpcodeStr, !strconcat(Dt, "32"),
+                   v4i32, v4i32, OpNode, Commutable>;
+}
+
+multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
+  def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
+                       v4i16, ShOp>;
+  def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"),
+                     v2i32, ShOp>;
+  def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"),
+                       v8i16, v4i16, ShOp>;
+  def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"),
+                     v4i32, v2i32, ShOp>;
+}
+
+// ....then also with element size 64 bits:
+multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                    InstrItinClass itinD, InstrItinClass itinQ,
+                    string OpcodeStr, string Dt,
+                    SDNode OpNode, bit Commutable = 0>
+  : N3V_QHS<op24, op23, op11_8, op4, itinD, itinD, itinQ, itinQ,
+            OpcodeStr, Dt, OpNode, Commutable> {
+  def v1i64 : N3VD<op24, op23, 0b11, op11_8, op4, itinD,
+                   OpcodeStr, !strconcat(Dt, "64"),
+                   v1i64, v1i64, OpNode, Commutable>;
+  def v2i64 : N3VQ<op24, op23, 0b11, op11_8, op4, itinQ,
+                   OpcodeStr, !strconcat(Dt, "64"),
+                   v2i64, v2i64, OpNode, Commutable>;
+}
+
+
+// Neon 3-register vector intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+                     InstrItinClass itinD16, InstrItinClass itinD32,
+                     InstrItinClass itinQ16, InstrItinClass itinQ32,
+                     string OpcodeStr, string Dt,
+                     Intrinsic IntOp, bit Commutable = 0> {
+  // 64-bit vector types.
+  def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v4i16, v4i16, IntOp, Commutable>;
+  def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v2i32, v2i32, IntOp, Commutable>;
+
+  // 128-bit vector types.
+  def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v8i16, v8i16, IntOp, Commutable>;
+  def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v4i32, v4i32, IntOp, Commutable>;
+}
+multiclass N3VInt_HSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+                     InstrItinClass itinD16, InstrItinClass itinD32,
+                     InstrItinClass itinQ16, InstrItinClass itinQ32,
+                     string OpcodeStr, string Dt,
+                     Intrinsic IntOp> {
+  // 64-bit vector types.
+  def v4i16 : N3VDIntSh<op24, op23, 0b01, op11_8, op4, f, itinD16,
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v4i16, v4i16, IntOp>;
+  def v2i32 : N3VDIntSh<op24, op23, 0b10, op11_8, op4, f, itinD32,
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v2i32, v2i32, IntOp>;
+
+  // 128-bit vector types.
+  def v8i16 : N3VQIntSh<op24, op23, 0b01, op11_8, op4, f, itinQ16,
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v8i16, v8i16, IntOp>;
+  def v4i32 : N3VQIntSh<op24, op23, 0b10, op11_8, op4, f, itinQ32,
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v4i32, v4i32, IntOp>;
+}
+
+multiclass N3VIntSL_HS<bits<4> op11_8,
+                       InstrItinClass itinD16, InstrItinClass itinD32,
+                       InstrItinClass itinQ16, InstrItinClass itinQ32,
+                       string OpcodeStr, string Dt, Intrinsic IntOp> {
+  def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16,
+                          OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>;
+  def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
+                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
+  def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
+                          OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
+  def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
+                        OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+                      InstrItinClass itinD16, InstrItinClass itinD32,
+                      InstrItinClass itinQ16, InstrItinClass itinQ32,
+                      string OpcodeStr, string Dt,
+                      Intrinsic IntOp, bit Commutable = 0>
+  : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+              OpcodeStr, Dt, IntOp, Commutable> {
+  def v8i8  : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v8i8, v8i8, IntOp, Commutable>;
+  def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v16i8, v16i8, IntOp, Commutable>;
+}
+multiclass N3VInt_QHSSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+                      InstrItinClass itinD16, InstrItinClass itinD32,
+                      InstrItinClass itinQ16, InstrItinClass itinQ32,
+                      string OpcodeStr, string Dt,
+                      Intrinsic IntOp>
+  : N3VInt_HSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+              OpcodeStr, Dt, IntOp> {
+  def v8i8  : N3VDIntSh<op24, op23, 0b00, op11_8, op4, f, itinD16,
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v8i8, v8i8, IntOp>;
+  def v16i8 : N3VQIntSh<op24, op23, 0b00, op11_8, op4, f, itinQ16,
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v16i8, v16i8, IntOp>;
+}
+
+
+// ....then also with element size of 64 bits:
+multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+                       InstrItinClass itinD16, InstrItinClass itinD32,
+                       InstrItinClass itinQ16, InstrItinClass itinQ32,
+                       string OpcodeStr, string Dt,
+                       Intrinsic IntOp, bit Commutable = 0>
+  : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+               OpcodeStr, Dt, IntOp, Commutable> {
+  def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
+                      OpcodeStr, !strconcat(Dt, "64"),
+                      v1i64, v1i64, IntOp, Commutable>;
+  def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+                      OpcodeStr, !strconcat(Dt, "64"),
+                      v2i64, v2i64, IntOp, Commutable>;
+}
+multiclass N3VInt_QHSDSh<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
+                       InstrItinClass itinD16, InstrItinClass itinD32,
+                       InstrItinClass itinQ16, InstrItinClass itinQ32,
+                       string OpcodeStr, string Dt,
+                       Intrinsic IntOp>
+  : N3VInt_QHSSh<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
+               OpcodeStr, Dt, IntOp> {
+  def v1i64 : N3VDIntSh<op24, op23, 0b11, op11_8, op4, f, itinD32,
+                      OpcodeStr, !strconcat(Dt, "64"),
+                      v1i64, v1i64, IntOp>;
+  def v2i64 : N3VQIntSh<op24, op23, 0b11, op11_8, op4, f, itinQ32,
+                      OpcodeStr, !strconcat(Dt, "64"),
+                      v2i64, v2i64, IntOp>;
+}
+
+// Neon Narrowing 3-register vector intrinsics,
+//   source operand element sizes of 16, 32 and 64 bits:
+multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       string OpcodeStr, string Dt,
+                       Intrinsic IntOp, bit Commutable = 0> {
+  def v8i8  : N3VNInt<op24, op23, 0b00, op11_8, op4,
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v8i8, v8i16, IntOp, Commutable>;
+  def v4i16 : N3VNInt<op24, op23, 0b01, op11_8, op4,
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v4i16, v4i32, IntOp, Commutable>;
+  def v2i32 : N3VNInt<op24, op23, 0b10, op11_8, op4,
+                      OpcodeStr, !strconcat(Dt, "64"),
+                      v2i32, v2i64, IntOp, Commutable>;
+}
+
+
+// Neon Long 3-register vector operations.
+
+multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                    InstrItinClass itin16, InstrItinClass itin32,
+                    string OpcodeStr, string Dt,
+                    SDNode OpNode, bit Commutable = 0> {
+  def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16,
+                   OpcodeStr, !strconcat(Dt, "8"),
+                   v8i16, v8i8, OpNode, Commutable>;
+  def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16,
+                   OpcodeStr, !strconcat(Dt, "16"),
+                   v4i32, v4i16, OpNode, Commutable>;
+  def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32,
+                   OpcodeStr, !strconcat(Dt, "32"),
+                   v2i64, v2i32, OpNode, Commutable>;
+}
+
+multiclass N3VLSL_HS<bit op24, bits<4> op11_8,
+                     InstrItinClass itin, string OpcodeStr, string Dt,
+                     SDNode OpNode> {
+  def v4i16 : N3VLSL16<op24, 0b01, op11_8, itin, OpcodeStr,
+                       !strconcat(Dt, "16"), v4i32, v4i16, OpNode>;
+  def v2i32 : N3VLSL<op24, 0b10, op11_8, itin, OpcodeStr,
+                     !strconcat(Dt, "32"), v2i64, v2i32, OpNode>;
+}
+
+multiclass N3VLExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itin16, InstrItinClass itin32,
+                       string OpcodeStr, string Dt,
+                       SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+  def v8i16 : N3VLExt<op24, op23, 0b00, op11_8, op4, itin16,
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v8i16, v8i8, OpNode, ExtOp, Commutable>;
+  def v4i32 : N3VLExt<op24, op23, 0b01, op11_8, op4, itin16,
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v4i32, v4i16, OpNode, ExtOp, Commutable>;
+  def v2i64 : N3VLExt<op24, op23, 0b10, op11_8, op4, itin32,
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
+// Neon Long 3-register vector intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VLInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                      InstrItinClass itin16, InstrItinClass itin32,
+                      string OpcodeStr, string Dt,
+                      Intrinsic IntOp, bit Commutable = 0> {
+  def v4i32 : N3VLInt<op24, op23, 0b01, op11_8, op4, itin16,
+                      OpcodeStr, !strconcat(Dt, "16"),
+                      v4i32, v4i16, IntOp, Commutable>;
+  def v2i64 : N3VLInt<op24, op23, 0b10, op11_8, op4, itin32,
+                      OpcodeStr, !strconcat(Dt, "32"),
+                      v2i64, v2i32, IntOp, Commutable>;
+}
+
+multiclass N3VLIntSL_HS<bit op24, bits<4> op11_8,
+                        InstrItinClass itin, string OpcodeStr, string Dt,
+                        Intrinsic IntOp> {
+  def v4i16 : N3VLIntSL16<op24, 0b01, op11_8, itin,
+                          OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
+  def v2i32 : N3VLIntSL<op24, 0b10, op11_8, itin,
+                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itin16, InstrItinClass itin32,
+                       string OpcodeStr, string Dt,
+                       Intrinsic IntOp, bit Commutable = 0>
+  : N3VLInt_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt,
+               IntOp, Commutable> {
+  def v8i16 : N3VLInt<op24, op23, 0b00, op11_8, op4, itin16,
+                      OpcodeStr, !strconcat(Dt, "8"),
+                      v8i16, v8i8, IntOp, Commutable>;
+}
+
+// ....with explicit extend (VABDL).
+multiclass N3VLIntExt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itin, string OpcodeStr, string Dt,
+                       Intrinsic IntOp, SDNode ExtOp, bit Commutable = 0> {
+  def v8i16 : N3VLIntExt<op24, op23, 0b00, op11_8, op4, itin,
+                         OpcodeStr, !strconcat(Dt, "8"),
+                         v8i16, v8i8, IntOp, ExtOp, Commutable>;
+  def v4i32 : N3VLIntExt<op24, op23, 0b01, op11_8, op4, itin,
+                         OpcodeStr, !strconcat(Dt, "16"),
+                         v4i32, v4i16, IntOp, ExtOp, Commutable>;
+  def v2i64 : N3VLIntExt<op24, op23, 0b10, op11_8, op4, itin,
+                         OpcodeStr, !strconcat(Dt, "32"),
+                         v2i64, v2i32, IntOp, ExtOp, Commutable>;
+}
+
+
+// Neon Wide 3-register vector intrinsics,
+//   source operand element sizes of 8, 16 and 32 bits:
+multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                    string OpcodeStr, string Dt,
+                    SDNode OpNode, SDNode ExtOp, bit Commutable = 0> {
+  def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4,
+                   OpcodeStr, !strconcat(Dt, "8"),
+                   v8i16, v8i8, OpNode, ExtOp, Commutable>;
+  def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4,
+                   OpcodeStr, !strconcat(Dt, "16"),
+                   v4i32, v4i16, OpNode, ExtOp, Commutable>;
+  def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4,
+                   OpcodeStr, !strconcat(Dt, "32"),
+                   v2i64, v2i32, OpNode, ExtOp, Commutable>;
+}
+
+
+// Neon Multiply-Op vector operations,
+//   element sizes of 8, 16 and 32 bits:
+multiclass N3VMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                        InstrItinClass itinD16, InstrItinClass itinD32,
+                        InstrItinClass itinQ16, InstrItinClass itinQ32,
+                        string OpcodeStr, string Dt, SDNode OpNode> {
+  // 64-bit vector types.
+  def v8i8  : N3VDMulOp<op24, op23, 0b00, op11_8, op4, itinD16,
+                        OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>;
+  def v4i16 : N3VDMulOp<op24, op23, 0b01, op11_8, op4, itinD16,
+                        OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>;
+  def v2i32 : N3VDMulOp<op24, op23, 0b10, op11_8, op4, itinD32,
+                        OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>;
+
+  // 128-bit vector types.
+  def v16i8 : N3VQMulOp<op24, op23, 0b00, op11_8, op4, itinQ16,
+                        OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>;
+  def v8i16 : N3VQMulOp<op24, op23, 0b01, op11_8, op4, itinQ16,
+                        OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>;
+  def v4i32 : N3VQMulOp<op24, op23, 0b10, op11_8, op4, itinQ32,
+                        OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>;
+}
+
+multiclass N3VMulOpSL_HS<bits<4> op11_8,
+                         InstrItinClass itinD16, InstrItinClass itinD32,
+                         InstrItinClass itinQ16, InstrItinClass itinQ32,
+                         string OpcodeStr, string Dt, SDNode ShOp> {
+  def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16,
+                            OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>;
+  def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
+                          OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
+  def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
+                            OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
+                            mul, ShOp>;
+  def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
+                          OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
+                          mul, ShOp>;
+}
+
+// Neon Intrinsic-Op vector operations,
+//   element sizes of 8, 16 and 32 bits:
+multiclass N3VIntOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                        InstrItinClass itinD, InstrItinClass itinQ,
+                        string OpcodeStr, string Dt, Intrinsic IntOp,
+                        SDNode OpNode> {
+  // 64-bit vector types.
+  def v8i8  : N3VDIntOp<op24, op23, 0b00, op11_8, op4, itinD,
+                        OpcodeStr, !strconcat(Dt, "8"), v8i8, IntOp, OpNode>;
+  def v4i16 : N3VDIntOp<op24, op23, 0b01, op11_8, op4, itinD,
+                        OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp, OpNode>;
+  def v2i32 : N3VDIntOp<op24, op23, 0b10, op11_8, op4, itinD,
+                        OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp, OpNode>;
+
+  // 128-bit vector types.
+  def v16i8 : N3VQIntOp<op24, op23, 0b00, op11_8, op4, itinQ,
+                        OpcodeStr, !strconcat(Dt, "8"), v16i8, IntOp, OpNode>;
+  def v8i16 : N3VQIntOp<op24, op23, 0b01, op11_8, op4, itinQ,
+                        OpcodeStr, !strconcat(Dt, "16"), v8i16, IntOp, OpNode>;
+  def v4i32 : N3VQIntOp<op24, op23, 0b10, op11_8, op4, itinQ,
+                        OpcodeStr, !strconcat(Dt, "32"), v4i32, IntOp, OpNode>;
+}
+
+// Neon 3-argument intrinsics,
+//   element sizes of 8, 16 and 32 bits:
+multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itinD, InstrItinClass itinQ,
+                       string OpcodeStr, string Dt, Intrinsic IntOp> {
+  // 64-bit vector types.
+  def v8i8  : N3VDInt3<op24, op23, 0b00, op11_8, op4, itinD,
+                       OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+  def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, itinD,
+                       OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
+  def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, itinD,
+                       OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
+
+  // 128-bit vector types.
+  def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, itinQ,
+                       OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+  def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, itinQ,
+                       OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
+  def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, itinQ,
+                       OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
+}
+
+
+// Neon Long Multiply-Op vector operations,
+//   element sizes of 8, 16 and 32 bits:
+multiclass N3VLMulOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                         InstrItinClass itin16, InstrItinClass itin32,
+                         string OpcodeStr, string Dt, SDNode MulOp,
+                         SDNode OpNode> {
+  def v8i16 : N3VLMulOp<op24, op23, 0b00, op11_8, op4, itin16, OpcodeStr,
+                        !strconcat(Dt, "8"), v8i16, v8i8, MulOp, OpNode>;
+  def v4i32 : N3VLMulOp<op24, op23, 0b01, op11_8, op4, itin16, OpcodeStr,
+                        !strconcat(Dt, "16"), v4i32, v4i16, MulOp, OpNode>;
+  def v2i64 : N3VLMulOp<op24, op23, 0b10, op11_8, op4, itin32, OpcodeStr,
+                        !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+multiclass N3VLMulOpSL_HS<bit op24, bits<4> op11_8, string OpcodeStr,
+                          string Dt, SDNode MulOp, SDNode OpNode> {
+  def v4i16 : N3VLMulOpSL16<op24, 0b01, op11_8, IIC_VMACi16D, OpcodeStr,
+                            !strconcat(Dt,"16"), v4i32, v4i16, MulOp, OpNode>;
+  def v2i32 : N3VLMulOpSL<op24, 0b10, op11_8, IIC_VMACi32D, OpcodeStr,
+                          !strconcat(Dt, "32"), v2i64, v2i32, MulOp, OpNode>;
+}
+
+
+// Neon Long 3-argument intrinsics.
+
+// First with only element sizes of 16 and 32 bits:
+multiclass N3VLInt3_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                       InstrItinClass itin16, InstrItinClass itin32,
+                       string OpcodeStr, string Dt, Intrinsic IntOp> {
+  def v4i32 : N3VLInt3<op24, op23, 0b01, op11_8, op4, itin16,
+                       OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>;
+  def v2i64 : N3VLInt3<op24, op23, 0b10, op11_8, op4, itin32,
+                       OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+multiclass N3VLInt3SL_HS<bit op24, bits<4> op11_8,
+                         string OpcodeStr, string Dt, Intrinsic IntOp> {
+  def v4i16 : N3VLInt3SL16<op24, 0b01, op11_8, IIC_VMACi16D,
+                           OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>;
+  def v2i32 : N3VLInt3SL<op24, 0b10, op11_8, IIC_VMACi32D,
+                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>;
+}
+
+// ....then also with element size of 8 bits:
+multiclass N3VLInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                        InstrItinClass itin16, InstrItinClass itin32,
+                        string OpcodeStr, string Dt, Intrinsic IntOp>
+  : N3VLInt3_HS<op24, op23, op11_8, op4, itin16, itin32, OpcodeStr, Dt, IntOp> {
+  def v8i16 : N3VLInt3<op24, op23, 0b00, op11_8, op4, itin16,
+                       OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>;
+}
+
+// ....with explicit extend (VABAL).
+multiclass N3VLIntExtOp_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+                            InstrItinClass itin, string OpcodeStr, string Dt,
+                            Intrinsic IntOp, SDNode ExtOp, SDNode OpNode> {
+  def v8i16 : N3VLIntExtOp<op24, op23, 0b00, op11_8, op4, itin,
+                           OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8,
+                           IntOp, ExtOp, OpNode>;
+  def v4i32 : N3VLIntExtOp<op24, op23, 0b01, op11_8, op4, itin,
+                           OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16,
+                           IntOp, ExtOp, OpNode>;
+  def v2i64 : N3VLIntExtOp<op24, op23, 0b10, op11_8, op4, itin,
+                           OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32,
+                           IntOp, ExtOp, OpNode>;
+}
+
+
+// Neon Pairwise long 2-register intrinsics,
+//   element sizes of 8, 16 and 32 bits:
+multiclass N2VPLInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                        bits<5> op11_7, bit op4,
+                        string OpcodeStr, string Dt, Intrinsic IntOp> {
+  // 64-bit vector types.
+  def v8i8  : N2VDPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+                        OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
+  def v4i16 : N2VDPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+                        OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
+  def v2i32 : N2VDPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+                        OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
+
+  // 128-bit vector types.
+  def v16i8 : N2VQPLInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+                        OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
+  def v8i16 : N2VQPLInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+                        OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
+  def v4i32 : N2VQPLInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+                        OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
+}
+
+
+// Neon Pairwise long 2-register accumulate intrinsics,
+//   element sizes of 8, 16 and 32 bits:
+multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+                         bits<5> op11_7, bit op4,
+                         string OpcodeStr, string Dt, Intrinsic IntOp> {
+  // 64-bit vector types.
+  def v8i8  : N2VDPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+                         OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>;
+  def v4i16 : N2VDPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+                         OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>;
+  def v2i32 : N2VDPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+                         OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>;
+
+  // 128-bit vector types.
+  def v16i8 : N2VQPLInt2<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
+                         OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>;
+  def v8i16 : N2VQPLInt2<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
+                         OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>;
+  def v4i32 : N2VQPLInt2<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
+                         OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>;
+}
+
+
+// Neon 2-register vector shift by immediate,
+//   with f of either N2RegVShLFrm or N2RegVShRFrm
+//   element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                     InstrItinClass itin, string OpcodeStr, string Dt,
+                     SDNode OpNode, Format f> {
+  // 64-bit vector types.
+  def v8i8  : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
+                     OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
+                     OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
+                     OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, f, itin,
+                     OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
+                             // imm6 = xxxxxx
+
+  // 128-bit vector types.
+  def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
+                     OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
+                     OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
+                     OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, f, itin,
+                     OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
+                             // imm6 = xxxxxx
+}
+
+// Neon Shift-Accumulate vector operations,
+//   element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                         string OpcodeStr, string Dt, SDNode ShOp> {
+  // 64-bit vector types.
+  def v8i8  : N2VDShAdd<op24, op23, op11_8, 0, op4,
+                        OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v4i16 : N2VDShAdd<op24, op23, op11_8, 0, op4,
+                        OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v2i32 : N2VDShAdd<op24, op23, op11_8, 0, op4,
+                        OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v1i64 : N2VDShAdd<op24, op23, op11_8, 1, op4,
+                        OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>;
+                             // imm6 = xxxxxx
+
+  // 128-bit vector types.
+  def v16i8 : N2VQShAdd<op24, op23, op11_8, 0, op4,
+                        OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v8i16 : N2VQShAdd<op24, op23, op11_8, 0, op4,
+                        OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v4i32 : N2VQShAdd<op24, op23, op11_8, 0, op4,
+                        OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v2i64 : N2VQShAdd<op24, op23, op11_8, 1, op4,
+                        OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>;
+                             // imm6 = xxxxxx
+}
+
+
+// Neon Shift-Insert vector operations,
+//   with f of either N2RegVShLFrm or N2RegVShRFrm
+//   element sizes of 8, 16, 32 and 64 bits:
+multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+                         string OpcodeStr, SDNode ShOp,
+                         Format f> {
+  // 64-bit vector types.
+  def v8i8  : N2VDShIns<op24, op23, op11_8, 0, op4,
+                        f, OpcodeStr, "8", v8i8, ShOp> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4,
+                        f, OpcodeStr, "16", v4i16, ShOp> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4,
+                        f, OpcodeStr, "32", v2i32, ShOp> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4,
+                        f, OpcodeStr, "64", v1i64, ShOp>;
+                             // imm6 = xxxxxx
+
+  // 128-bit vector types.
+  def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4,
+                        f, OpcodeStr, "8", v16i8, ShOp> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4,
+                        f, OpcodeStr, "16", v8i16, ShOp> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4,
+                        f, OpcodeStr, "32", v4i32, ShOp> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+  def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4,
+                        f, OpcodeStr, "64", v2i64, ShOp>;
+                             // imm6 = xxxxxx
+}
+
+// Neon Shift Long operations,
+//   element sizes of 8, 16, 32 bits:
+multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
+                      bit op4, string OpcodeStr, string Dt, SDNode OpNode> {
+  def v8i16 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+                 OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v4i32 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+                  OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v2i64 : N2VLSh<op24, op23, op11_8, op7, op6, op4,
+                  OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+}
+
+// Neon Shift Narrow operations,
+//   element sizes of 16, 32, 64 bits:
+multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
+                      bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
+                      SDNode OpNode> {
+  def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+                    OpcodeStr, !strconcat(Dt, "16"),
+                    v8i8, v8i16, shr_imm8, OpNode> {
+    let Inst{21-19} = 0b001; // imm6 = 001xxx
+  }
+  def v4i16 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+                     OpcodeStr, !strconcat(Dt, "32"),
+                     v4i16, v4i32, shr_imm16, OpNode> {
+    let Inst{21-20} = 0b01;  // imm6 = 01xxxx
+  }
+  def v2i32 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
+                     OpcodeStr, !strconcat(Dt, "64"),
+                     v2i32, v2i64, shr_imm32, OpNode> {
+    let Inst{21} = 0b1;      // imm6 = 1xxxxx
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+//===----------------------------------------------------------------------===//
+
+// Vector Add Operations.
+
+//   VADD     : Vector Add (integer and floating-point)
+defm VADD     : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i",
+                         add, 1>;
+def  VADDfd   : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32",
+                     v2f32, v2f32, fadd, 1>;
+def  VADDfq   : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32",
+                     v4f32, v4f32, fadd, 1>;
+//   VADDL    : Vector Add Long (Q = D + D)
+defm VADDLs   : N3VLExt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vaddl", "s", add, sext, 1>;
+defm VADDLu   : N3VLExt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vaddl", "u", add, zext, 1>;
+//   VADDW    : Vector Add Wide (Q = Q + D)
+defm VADDWs   : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>;
+defm VADDWu   : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>;
+//   VHADD    : Vector Halving Add
+defm VHADDs   : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
+                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           "vhadd", "s", int_arm_neon_vhadds, 1>;
+defm VHADDu   : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
+                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           "vhadd", "u", int_arm_neon_vhaddu, 1>;
+//   VRHADD   : Vector Rounding Halving Add
+defm VRHADDs  : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
+                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           "vrhadd", "s", int_arm_neon_vrhadds, 1>;
+defm VRHADDu  : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
+                           IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                           "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
+//   VQADD    : Vector Saturating Add
+defm VQADDs   : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
+                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                            "vqadd", "s", int_arm_neon_vqadds, 1>;
+defm VQADDu   : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
+                            IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+                            "vqadd", "u", int_arm_neon_vqaddu, 1>;
+//   VADDHN   : Vector Add and Narrow Returning High Half (D = Q + Q)
+defm VADDHN   : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
+                            int_arm_neon_vaddhn, 1>;
+//   VRADDHN  : Vector Rounding Add and Narrow Returning High Half (D = Q + Q)
+defm VRADDHN  : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
+                            int_arm_neon_vraddhn, 1>;
+
+// Vector Multiply Operations.
+
+//   VMUL     : Vector Multiply (integer, polynomial and floating-point)
+defm VMUL     : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
+                        IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
+def  VMULpd   : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
+                        "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
+def  VMULpq   : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
+                        "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
+def  VMULfd   : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VFMULD, "vmul", "f32",
+                     v2f32, v2f32, fmul, 1>;
+def  VMULfq   : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VFMULQ, "vmul", "f32",
+                     v4f32, v4f32, fmul, 1>;
+defm VMULsl   : N3VSL_HS<0b1000, "vmul", "i", mul>;
+def  VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
+def  VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
+                       v2f32, fmul>;
+
+def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
+                      (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
+          (v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
+                              (v4i16 (EXTRACT_SUBREG QPR:$src2,
+                                      (DSubReg_i16_reg imm:$lane))),
+                              (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
+                      (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
+          (v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
+                              (v2i32 (EXTRACT_SUBREG QPR:$src2,
+                                      (DSubReg_i32_reg imm:$lane))),
+                              (SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
+                       (v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
+          (v4f32 (VMULslfq (v4f32 QPR:$src1),
+                           (v2f32 (EXTRACT_SUBREG QPR:$src2,
+                                   (DSubReg_i32_reg imm:$lane))),
+                           (SubReg_i32_lane imm:$lane)))>;
+
+//   VQDMULH  : Vector Saturating Doubling Multiply Returning High Half
+defm VQDMULH  : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
+                          IIC_VMULi16Q, IIC_VMULi32Q,
+                          "vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
+defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
+                            IIC_VMULi16Q, IIC_VMULi32Q,
+                            "vqdmulh", "s",  int_arm_neon_vqdmulh>;
+def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
+                                       (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+                                                            imm:$lane)))),
+          (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
+                                 (v4i16 (EXTRACT_SUBREG QPR:$src2,
+                                         (DSubReg_i16_reg imm:$lane))),
+                                 (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
+                                       (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+                                                            imm:$lane)))),
+          (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
+                                 (v2i32 (EXTRACT_SUBREG QPR:$src2,
+                                         (DSubReg_i32_reg imm:$lane))),
+                                 (SubReg_i32_lane imm:$lane)))>;
+
+//   VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
+defm VQRDMULH   : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
+                            IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
+                            "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
+defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
+                              IIC_VMULi16Q, IIC_VMULi32Q,
+                              "vqrdmulh", "s",  int_arm_neon_vqrdmulh>;
+def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
+                                        (v8i16 (NEONvduplane (v8i16 QPR:$src2),
+                                                             imm:$lane)))),
+          (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
+                                  (v4i16 (EXTRACT_SUBREG QPR:$src2,
+                                          (DSubReg_i16_reg imm:$lane))),
+                                  (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
+                                        (v4i32 (NEONvduplane (v4i32 QPR:$src2),
+                                                             imm:$lane)))),
+          (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
+                                  (v2i32 (EXTRACT_SUBREG QPR:$src2,
+                                          (DSubReg_i32_reg imm:$lane))),
+                                  (SubReg_i32_lane imm:$lane)))>;
+
+//   VMULL    : Vector Multiply Long (integer and polynomial) (Q = D * D)
+defm VMULLs   : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+                         "vmull", "s", NEONvmulls, 1>;
+defm VMULLu   : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D,
+                         "vmull", "u", NEONvmullu, 1>;
+def  VMULLp   : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8",
+                        v8i16, v8i8, int_arm_neon_vmullp, 1>;
+defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>;
+defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>;
+
+//   VQDMULL  : Vector Saturating Doubling Multiply Long (Q = D * D)
+defm VQDMULL  : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, IIC_VMULi32D,
+                           "vqdmull", "s", int_arm_neon_vqdmull, 1>;
+defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D,
+                             "vqdmull", "s", int_arm_neon_vqdmull>;
+
+// Vector Multiply-Accumulate and Multiply-Subtract Operations.
+
+//   VMLA     : Vector Multiply Accumulate (integer and floating-point)
+defm VMLA     : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+                             IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def  VMLAfd   : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32",
+                          v2f32, fmul_su, fadd_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
+def  VMLAfq   : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32",
+                          v4f32, fmul_su, fadd_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
+defm VMLAsl   : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D,
+                              IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>;
+def  VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32",
+                            v2f32, fmul_su, fadd_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
+def  VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
+                            v4f32, v2f32, fmul_su, fadd_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
+
+def : Pat<(v8i16 (add (v8i16 QPR:$src1),
+                  (mul (v8i16 QPR:$src2),
+                       (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+          (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
+                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
+                                      (DSubReg_i16_reg imm:$lane))),
+                              (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4i32 (add (v4i32 QPR:$src1),
+                  (mul (v4i32 QPR:$src2),
+                       (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+          (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
+                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
+                                      (DSubReg_i32_reg imm:$lane))),
+                              (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v4f32 (fadd_mlx (v4f32 QPR:$src1),
+                  (fmul_su (v4f32 QPR:$src2),
+                        (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+          (v4f32 (VMLAslfq (v4f32 QPR:$src1),
+                           (v4f32 QPR:$src2),
+                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
+                                   (DSubReg_i32_reg imm:$lane))),
+                           (SubReg_i32_lane imm:$lane)))>,
+          Requires<[HasNEON, UseFPVMLx]>;
+
+//   VMLAL    : Vector Multiply Accumulate Long (Q += D * D)
+defm VMLALs   : N3VLMulOp_QHS<0,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+                              "vmlal", "s", NEONvmulls, add>;
+defm VMLALu   : N3VLMulOp_QHS<1,1,0b1000,0, IIC_VMACi16D, IIC_VMACi32D,
+                              "vmlal", "u", NEONvmullu, add>;
+
+defm VMLALsls : N3VLMulOpSL_HS<0, 0b0010, "vmlal", "s", NEONvmulls, add>;
+defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>;
+
+//   VQDMLAL  : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D)
+defm VQDMLAL  : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+                            "vqdmlal", "s", int_arm_neon_vqdmlal>;
+defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>;
+
+//   VMLS     : Vector Multiply Subtract (integer and floating-point)
+defm VMLS     : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D,
+                             IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def  VMLSfd   : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32",
+                          v2f32, fmul_su, fsub_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
+def  VMLSfq   : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32",
+                          v4f32, fmul_su, fsub_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
+defm VMLSsl   : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D,
+                              IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>;
+def  VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32",
+                            v2f32, fmul_su, fsub_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
+def  VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
+                            v4f32, v2f32, fmul_su, fsub_mlx>,
+                Requires<[HasNEON, UseFPVMLx]>;
+
+def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
+                  (mul (v8i16 QPR:$src2),
+                       (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+          (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
+                              (v4i16 (EXTRACT_SUBREG QPR:$src3,
+                                      (DSubReg_i16_reg imm:$lane))),
+                              (SubReg_i16_lane imm:$lane)))>;
+
+def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
+                  (mul (v4i32 QPR:$src2),
+                     (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+          (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
+                              (v2i32 (EXTRACT_SUBREG QPR:$src3,
+                                      (DSubReg_i32_reg imm:$lane))),
+                              (SubReg_i32_lane imm:$lane)))>;
+
+def : Pat<(v4f32 (fsub_mlx (v4f32 QPR:$src1),
+                  (fmul_su (v4f32 QPR:$src2),
+                        (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+          (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
+                           (v2f32 (EXTRACT_SUBREG QPR:$src3,
+                                   (DSubReg_i32_reg imm:$lane))),
+                           (SubReg_i32_lane imm:$lane)))>,
+          Requires<[HasNEON, UseFPVMLx]>;
+
+//   VMLSL    : Vector Multiply Subtract Long (Q -= D * D)
+defm VMLSLs   : N3VLMulOp_QHS<0,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+                              "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLu   : N3VLMulOp_QHS<1,1,0b1010,0, IIC_VMACi16D, IIC_VMACi32D,
+                              "vmlsl", "u", NEONvmullu, sub>;
+
+defm VMLSLsls : N3VLMulOpSL_HS<0, 0b0110, "vmlsl", "s", NEONvmulls, sub>;
+defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>;
+
+//   VQDMLSL  : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D)
+defm VQDMLSL  : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D,
+                            "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>;
+
+// Vector Subtract Operations.
+
+//   VSUB     : Vector Subtract (integer and floating-point)
+defm VSUB     : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ,
+                         "vsub", "i", sub, 0>;
+def  VSUBfd   : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32",
+                     v2f32, v2f32, fsub, 0>;
+def  VSUBfq   : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32",
+                     v4f32, v4f32, fsub, 0>;
+//   VSUBL    : Vector Subtract Long (Q = D - D)
+defm VSUBLs   : N3VLExt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vsubl", "s", sub, sext, 0>;
+defm VSUBLu   : N3VLExt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD,
+                            "vsubl", "u", sub, zext, 0>;
+//   VSUBW    : Vector Subtract Wide (Q = Q - D)
+defm VSUBWs   : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>;
+defm VSUBWu   : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>;
+//   VHSUB    : Vector Halving Subtract
+defm VHSUBs   : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vhsub", "s", int_arm_neon_vhsubs, 0>;
+defm VHSUBu   : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vhsub", "u", int_arm_neon_vhsubu, 0>;
+//   VQSUB    : Vector Saturing Subtract
+defm VQSUBs   : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
+                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                            "vqsub", "s", int_arm_neon_vqsubs, 0>;
+defm VQSUBu   : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
+                            IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                            "vqsub", "u", int_arm_neon_vqsubu, 0>;
+//   VSUBHN   : Vector Subtract and Narrow Returning High Half (D = Q - Q)
+defm VSUBHN   : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
+                            int_arm_neon_vsubhn, 0>;
+//   VRSUBHN  : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q)
+defm VRSUBHN  : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i",
+                            int_arm_neon_vrsubhn, 0>;
+
+// Vector Comparisons.
+
+//   VCEQ     : Vector Compare Equal
+defm VCEQ     : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vceq", "i", NEONvceq, 1>;
+def  VCEQfd   : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
+                     NEONvceq, 1>;
+def  VCEQfq   : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
+                     NEONvceq, 1>;
+
+defm VCEQz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
+                            "$Vd, $Vm, #0", NEONvceqz>;
+
+//   VCGE     : Vector Compare Greater Than or Equal
+defm VCGEs    : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcge", "s", NEONvcge, 0>;
+defm VCGEu    : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcge", "u", NEONvcgeu, 0>;
+def  VCGEfd   : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
+                     NEONvcge, 0>;
+def  VCGEfq   : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
+                     NEONvcge, 0>;
+
+defm VCGEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
+                            "$Vd, $Vm, #0", NEONvcgez>;
+defm VCLEz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
+                            "$Vd, $Vm, #0", NEONvclez>;
+
+//   VCGT     : Vector Compare Greater Than
+defm VCGTs    : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcgt", "s", NEONvcgt, 0>;
+defm VCGTu    : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
+                        IIC_VSUBi4Q, "vcgt", "u", NEONvcgtu, 0>;
+def  VCGTfd   : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
+                     NEONvcgt, 0>;
+def  VCGTfq   : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
+                     NEONvcgt, 0>;
+
+defm VCGTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
+                            "$Vd, $Vm, #0", NEONvcgtz>;
+defm VCLTz    : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
+                            "$Vd, $Vm, #0", NEONvcltz>;
+
+//   VACGE    : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
+def  VACGEd   : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+                        "f32", v2i32, v2f32, int_arm_neon_vacged, 0>;
+def  VACGEq   : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+                        "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>;
+//   VACGT    : Vector Absolute Compare Greater Than (aka VCAGT)
+def  VACGTd   : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+                        "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>;
+def  VACGTq   : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+                        "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
+//   VTST     : Vector Test Bits
+defm VTST     : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+                        IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
+
+// Vector Bitwise Operations.
+
+def vnotd : PatFrag<(ops node:$in),
+                    (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
+def vnotq : PatFrag<(ops node:$in),
+                    (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
+
+
+//   VAND     : Vector Bitwise AND
+def  VANDd    : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
+                      v2i32, v2i32, and, 1>;
+def  VANDq    : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand",
+                      v4i32, v4i32, and, 1>;
+
+//   VEOR     : Vector Bitwise Exclusive OR
+def  VEORd    : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor",
+                      v2i32, v2i32, xor, 1>;
+def  VEORq    : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor",
+                      v4i32, v4i32, xor, 1>;
+
+//   VORR     : Vector Bitwise OR
+def  VORRd    : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr",
+                      v2i32, v2i32, or, 1>;
+def  VORRq    : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
+                      v4i32, v4i32, or, 1>;
+
+def VORRiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 0, 1,
+                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+                          IIC_VMOVImm,
+                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+                          [(set DPR:$Vd,
+                            (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VORRiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 0, 1,
+                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+                          IIC_VMOVImm,
+                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+                          [(set DPR:$Vd,
+                            (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> {
+  let Inst{10-9} = SIMM{10-9};
+}
+
+def VORRiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 0, 1,
+                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+                          IIC_VMOVImm,
+                          "vorr", "i16", "$Vd, $SIMM", "$src = $Vd",
+                          [(set QPR:$Vd,
+                            (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VORRiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 0, 1,
+                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+                          IIC_VMOVImm,
+                          "vorr", "i32", "$Vd, $SIMM", "$src = $Vd",
+                          [(set QPR:$Vd,
+                            (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> {
+  let Inst{10-9} = SIMM{10-9};
+}
+
+
+//   VBIC     : Vector Bitwise Bit Clear (AND NOT)
+def  VBICd    : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+                     "vbic", "$Vd, $Vn, $Vm", "",
+                     [(set DPR:$Vd, (v2i32 (and DPR:$Vn,
+                                                 (vnotd DPR:$Vm))))]>;
+def  VBICq    : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+                     "vbic", "$Vd, $Vn, $Vm", "",
+                     [(set QPR:$Vd, (v4i32 (and QPR:$Vn,
+                                                 (vnotq QPR:$Vm))))]>;
+
+def VBICiv4i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 0, 1, 1,
+                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+                          IIC_VMOVImm,
+                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+                          [(set DPR:$Vd,
+                            (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VBICiv2i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 0, 1, 1,
+                          (outs DPR:$Vd), (ins nModImm:$SIMM, DPR:$src),
+                          IIC_VMOVImm,
+                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+                          [(set DPR:$Vd,
+                            (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> {
+  let Inst{10-9} = SIMM{10-9};
+}
+
+def VBICiv8i16 : N1ModImm<1, 0b000, {1,0,?,1}, 0, 1, 1, 1,
+                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+                          IIC_VMOVImm,
+                          "vbic", "i16", "$Vd, $SIMM", "$src = $Vd",
+                          [(set QPR:$Vd,
+                            (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VBICiv4i32 : N1ModImm<1, 0b000, {0,?,?,1}, 0, 1, 1, 1,
+                          (outs QPR:$Vd), (ins nModImm:$SIMM, QPR:$src),
+                          IIC_VMOVImm,
+                          "vbic", "i32", "$Vd, $SIMM", "$src = $Vd",
+                          [(set QPR:$Vd,
+                            (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> {
+  let Inst{10-9} = SIMM{10-9};
+}
+
+//   VORN     : Vector Bitwise OR NOT
+def  VORNd    : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd),
+                     (ins DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD,
+                     "vorn", "$Vd, $Vn, $Vm", "",
+                     [(set DPR:$Vd, (v2i32 (or DPR:$Vn,
+                                                (vnotd DPR:$Vm))))]>;
+def  VORNq    : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd),
+                     (ins QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ,
+                     "vorn", "$Vd, $Vn, $Vm", "",
+                     [(set QPR:$Vd, (v4i32 (or QPR:$Vn,
+                                                (vnotq QPR:$Vm))))]>;
+
+//   VMVN     : Vector Bitwise NOT (Immediate)
+
+let isReMaterializable = 1 in {
+
+def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmvn", "i16", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v4i16 (NEONvmvnImm timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmvn", "i16", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v8i16 (NEONvmvnImm timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmvn", "i32", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v2i32 (NEONvmvnImm timm:$SIMM)))]> {
+  let Inst{11-8} = SIMM{11-8};
+}
+
+def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmvn", "i32", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v4i32 (NEONvmvnImm timm:$SIMM)))]> {
+  let Inst{11-8} = SIMM{11-8};
+}
+}
+
+//   VMVN     : Vector Bitwise NOT
+def  VMVNd    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
+                     (outs DPR:$Vd), (ins DPR:$Vm), IIC_VSUBiD,
+                     "vmvn", "$Vd, $Vm", "",
+                     [(set DPR:$Vd, (v2i32 (vnotd DPR:$Vm)))]>;
+def  VMVNq    : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
+                     (outs QPR:$Vd), (ins QPR:$Vm), IIC_VSUBiD,
+                     "vmvn", "$Vd, $Vm", "",
+                     [(set QPR:$Vd, (v4i32 (vnotq QPR:$Vm)))]>;
+def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
+def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
+
+//   VBSL     : Vector Bitwise Select
+def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+                     (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+                     N3RegFrm, IIC_VCNTiD,
+                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+                     [(set DPR:$Vd,
+                       (v2i32 (or (and DPR:$Vn, DPR:$src1),
+                                  (and DPR:$Vm, (vnotd DPR:$src1)))))]>;
+def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+                     (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+                     N3RegFrm, IIC_VCNTiQ,
+                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+                     [(set QPR:$Vd,
+                       (v4i32 (or (and QPR:$Vn, QPR:$src1),
+                                  (and QPR:$Vm, (vnotq QPR:$src1)))))]>;
+
+//   VBIF     : Vector Bitwise Insert if False
+//              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
+def  VBIFd    : N3VX<1, 0, 0b11, 0b0001, 0, 1,
+                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+                     N3RegFrm, IIC_VBINiD,
+                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+                     [/* For disassembly only; pattern left blank */]>;
+def  VBIFq    : N3VX<1, 0, 0b11, 0b0001, 1, 1,
+                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+                     N3RegFrm, IIC_VBINiQ,
+                     "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+                     [/* For disassembly only; pattern left blank */]>;
+
+//   VBIT     : Vector Bitwise Insert if True
+//              like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
+// FIXME: This instruction's encoding MAY NOT BE correct.
+def  VBITd    : N3VX<1, 0, 0b10, 0b0001, 0, 1,
+                     (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+                     N3RegFrm, IIC_VBINiD,
+                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+                     [/* For disassembly only; pattern left blank */]>;
+def  VBITq    : N3VX<1, 0, 0b10, 0b0001, 1, 1,
+                     (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+                     N3RegFrm, IIC_VBINiQ,
+                     "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+                     [/* For disassembly only; pattern left blank */]>;
+
+// VBIT/VBIF are not yet implemented.  The TwoAddress pass will not go looking
+// for equivalent operations with different register constraints; it just
+// inserts copies.
+
+// Vector Absolute Differences.
+
+//   VABD     : Vector Absolute Difference
+defm VABDs    : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vabd", "s", int_arm_neon_vabds, 1>;
+defm VABDu    : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vabd", "u", int_arm_neon_vabdu, 1>;
+def  VABDfd   : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
+                        "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
+def  VABDfq   : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
+                        "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 1>;
+
+//   VABDL    : Vector Absolute Difference Long (Q = | D - D |)
+defm VABDLs   : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
+                               "vabdl", "s", int_arm_neon_vabds, zext, 1>;
+defm VABDLu   : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
+                               "vabdl", "u", int_arm_neon_vabdu, zext, 1>;
+
+//   VABA     : Vector Absolute Difference and Accumulate
+defm VABAs    : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+                             "vaba", "s", int_arm_neon_vabds, add>;
+defm VABAu    : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
+                             "vaba", "u", int_arm_neon_vabdu, add>;
+
+//   VABAL    : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
+defm VABALs   : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
+                                 "vabal", "s", int_arm_neon_vabds, zext, add>;
+defm VABALu   : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
+                                 "vabal", "u", int_arm_neon_vabdu, zext, add>;
+
+// Vector Maximum and Minimum.
+
+//   VMAX     : Vector Maximum
+defm VMAXs    : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vmax", "s", int_arm_neon_vmaxs, 1>;
+defm VMAXu    : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vmax", "u", int_arm_neon_vmaxu, 1>;
+def  VMAXfd   : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND,
+                        "vmax", "f32",
+                        v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+def  VMAXfq   : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+                        "vmax", "f32",
+                        v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+
+//   VMIN     : Vector Minimum
+defm VMINs    : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vmin", "s", int_arm_neon_vmins, 1>;
+defm VMINu    : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
+                           IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
+                           "vmin", "u", int_arm_neon_vminu, 1>;
+def  VMINfd   : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND,
+                        "vmin", "f32",
+                        v2f32, v2f32, int_arm_neon_vmins, 1>;
+def  VMINfq   : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ,
+                        "vmin", "f32",
+                        v4f32, v4f32, int_arm_neon_vmins, 1>;
+
+// Vector Pairwise Operations.
+
+//   VPADD    : Vector Pairwise Add
+def  VPADDi8  : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+                        "vpadd", "i8",
+                        v8i8, v8i8, int_arm_neon_vpadd, 0>;
+def  VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+                        "vpadd", "i16",
+                        v4i16, v4i16, int_arm_neon_vpadd, 0>;
+def  VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VSHLiD,
+                        "vpadd", "i32",
+                        v2i32, v2i32, int_arm_neon_vpadd, 0>;
+def  VPADDf   : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm,
+                        IIC_VPBIND, "vpadd", "f32",
+                        v2f32, v2f32, int_arm_neon_vpadd, 0>;
+
+//   VPADDL   : Vector Pairwise Add Long
+defm VPADDLs  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
+                             int_arm_neon_vpaddls>;
+defm VPADDLu  : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u",
+                             int_arm_neon_vpaddlu>;
+
+//   VPADAL   : Vector Pairwise Add and Accumulate Long
+defm VPADALs  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s",
+                              int_arm_neon_vpadals>;
+defm VPADALu  : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
+                              int_arm_neon_vpadalu>;
+
+//   VPMAX    : Vector Pairwise Maximum
+def  VPMAXs8  : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+                        "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
+def  VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+                        "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
+def  VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+                        "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
+def  VPMAXu8  : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+                        "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
+def  VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+                        "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
+def  VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VSUBi4D, "vpmax",
+                        "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
+def  VPMAXf   : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmax",
+                        "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
+
+//   VPMIN    : Vector Pairwise Minimum
+def  VPMINs8  : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+                        "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
+def  VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+                        "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
+def  VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+                        "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
+def  VPMINu8  : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+                        "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
+def  VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+                        "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
+def  VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VSUBi4D, "vpmin",
+                        "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
+def  VPMINf   : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VPBIND, "vpmin",
+                        "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
+
+// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
+
+//   VRECPE   : Vector Reciprocal Estimate
+def  VRECPEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
+                        IIC_VUNAD, "vrecpe", "u32",
+                        v2i32, v2i32, int_arm_neon_vrecpe>;
+def  VRECPEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0,
+                        IIC_VUNAQ, "vrecpe", "u32",
+                        v4i32, v4i32, int_arm_neon_vrecpe>;
+def  VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
+                        IIC_VUNAD, "vrecpe", "f32",
+                        v2f32, v2f32, int_arm_neon_vrecpe>;
+def  VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
+                        IIC_VUNAQ, "vrecpe", "f32",
+                        v4f32, v4f32, int_arm_neon_vrecpe>;
+
+//   VRECPS   : Vector Reciprocal Step
+def  VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
+                        IIC_VRECSD, "vrecps", "f32",
+                        v2f32, v2f32, int_arm_neon_vrecps, 1>;
+def  VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
+                        IIC_VRECSQ, "vrecps", "f32",
+                        v4f32, v4f32, int_arm_neon_vrecps, 1>;
+
+//   VRSQRTE  : Vector Reciprocal Square Root Estimate
+def  VRSQRTEd  : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
+                         IIC_VUNAD, "vrsqrte", "u32",
+                         v2i32, v2i32, int_arm_neon_vrsqrte>;
+def  VRSQRTEq  : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0,
+                         IIC_VUNAQ, "vrsqrte", "u32",
+                         v4i32, v4i32, int_arm_neon_vrsqrte>;
+def  VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
+                         IIC_VUNAD, "vrsqrte", "f32",
+                         v2f32, v2f32, int_arm_neon_vrsqrte>;
+def  VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
+                         IIC_VUNAQ, "vrsqrte", "f32",
+                         v4f32, v4f32, int_arm_neon_vrsqrte>;
+
+//   VRSQRTS  : Vector Reciprocal Square Root Step
+def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
+                        IIC_VRECSD, "vrsqrts", "f32",
+                        v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
+def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
+                        IIC_VRECSQ, "vrsqrts", "f32",
+                        v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
+
+// Vector Shifts.
+
+//   VSHL     : Vector Shift
+defm VSHLs    : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm,
+                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+                            "vshl", "s", int_arm_neon_vshifts>;
+defm VSHLu    : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm,
+                            IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+                            "vshl", "u", int_arm_neon_vshiftu>;
+//   VSHL     : Vector Shift Left (Immediate)
+defm VSHLi    : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl,
+                           N2RegVShLFrm>;
+//   VSHR     : Vector Shift Right (Immediate)
+defm VSHRs    : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs,
+                           N2RegVShRFrm>;
+defm VSHRu    : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru,
+                           N2RegVShRFrm>;
+
+//   VSHLL    : Vector Shift Left Long
+defm VSHLLs   : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
+defm VSHLLu   : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>;
+
+//   VSHLL    : Vector Shift Left Long (with maximum shift count)
+class N2VLShMax<bit op24, bit op23, bits<6> op21_16, bits<4> op11_8, bit op7,
+                bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy,
+                ValueType OpTy, SDNode OpNode>
+  : N2VLSh<op24, op23, op11_8, op7, op6, op4, OpcodeStr, Dt,
+           ResTy, OpTy, OpNode> {
+  let Inst{21-16} = op21_16;
+}
+def  VSHLLi8  : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8",
+                          v8i16, v8i8, NEONvshlli>;
+def  VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16",
+                          v4i32, v4i16, NEONvshlli>;
+def  VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
+                          v2i64, v2i32, NEONvshlli>;
+
+//   VSHRN    : Vector Shift Right and Narrow
+defm VSHRN    : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
+                           NEONvshrn>;
+
+//   VRSHL    : Vector Rounding Shift
+defm VRSHLs   : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
+                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+                            "vrshl", "s", int_arm_neon_vrshifts>;
+defm VRSHLu   : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm,
+                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+                            "vrshl", "u", int_arm_neon_vrshiftu>;
+//   VRSHR    : Vector Rounding Shift Right
+defm VRSHRs   : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs,
+                           N2RegVShRFrm>;
+defm VRSHRu   : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru,
+                           N2RegVShRFrm>;
+
+//   VRSHRN   : Vector Rounding Shift Right and Narrow
+defm VRSHRN   : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
+                           NEONvrshrn>;
+
+//   VQSHL    : Vector Saturating Shift
+defm VQSHLs   : N3VInt_QHSDSh<0, 0, 0b0100, 1, N3RegVShFrm,
+                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+                            "vqshl", "s", int_arm_neon_vqshifts>;
+defm VQSHLu   : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm,
+                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+                            "vqshl", "u", int_arm_neon_vqshiftu>;
+//   VQSHL    : Vector Saturating Shift Left (Immediate)
+defm VQSHLsi  : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls,
+                           N2RegVShLFrm>;
+defm VQSHLui  : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu,
+                           N2RegVShLFrm>;
+//   VQSHLU   : Vector Saturating Shift Left (Immediate, Unsigned)
+defm VQSHLsu  : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu,
+                           N2RegVShLFrm>;
+
+//   VQSHRN   : Vector Saturating Shift Right and Narrow
+defm VQSHRNs  : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
+                           NEONvqshrns>;
+defm VQSHRNu  : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u",
+                           NEONvqshrnu>;
+
+//   VQSHRUN  : Vector Saturating Shift Right and Narrow (Unsigned)
+defm VQSHRUN  : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
+                           NEONvqshrnsu>;
+
+//   VQRSHL   : Vector Saturating Rounding Shift
+defm VQRSHLs  : N3VInt_QHSDSh<0, 0, 0b0101, 1, N3RegVShFrm,
+                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+                            "vqrshl", "s", int_arm_neon_vqrshifts>;
+defm VQRSHLu  : N3VInt_QHSDSh<1, 0, 0b0101, 1, N3RegVShFrm,
+                            IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+                            "vqrshl", "u", int_arm_neon_vqrshiftu>;
+
+//   VQRSHRN  : Vector Saturating Rounding Shift Right and Narrow
+defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
+                           NEONvqrshrns>;
+defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u",
+                           NEONvqrshrnu>;
+
+//   VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned)
+defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s",
+                           NEONvqrshrnsu>;
+
+//   VSRA     : Vector Shift Right and Accumulate
+defm VSRAs    : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>;
+defm VSRAu    : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>;
+//   VRSRA    : Vector Rounding Shift Right and Accumulate
+defm VRSRAs   : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
+defm VRSRAu   : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
+
+//   VSLI     : Vector Shift Left and Insert
+defm VSLI     : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>;
+//   VSRI     : Vector Shift Right and Insert
+defm VSRI     : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>;
+
+// Vector Absolute and Saturating Absolute.
+
+//   VABS     : Vector Absolute Value
+defm VABS     : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0,
+                           IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s",
+                           int_arm_neon_vabs>;
+def  VABSfd   : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+                        IIC_VUNAD, "vabs", "f32",
+                        v2f32, v2f32, int_arm_neon_vabs>;
+def  VABSfq   : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
+                        IIC_VUNAQ, "vabs", "f32",
+                        v4f32, v4f32, int_arm_neon_vabs>;
+
+//   VQABS    : Vector Saturating Absolute Value
+defm VQABS    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
+                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s",
+                           int_arm_neon_vqabs>;
+
+// Vector Negate.
+
+def vnegd  : PatFrag<(ops node:$in),
+                     (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
+def vnegq  : PatFrag<(ops node:$in),
+                     (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
+
+class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm),
+        IIC_VSHLiD, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (Ty (vnegd DPR:$Vm)))]>;
+class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$Vd), (ins QPR:$Vm),
+        IIC_VSHLiQ, OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (Ty (vnegq QPR:$Vm)))]>;
+
+//   VNEG     : Vector Negate (integer)
+def  VNEGs8d  : VNEGD<0b00, "vneg", "s8", v8i8>;
+def  VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
+def  VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
+def  VNEGs8q  : VNEGQ<0b00, "vneg", "s8", v16i8>;
+def  VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
+def  VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
+
+//   VNEG     : Vector Negate (floating-point)
+def  VNEGfd   : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
+                    (outs DPR:$Vd), (ins DPR:$Vm), IIC_VUNAD,
+                    "vneg", "f32", "$Vd, $Vm", "",
+                    [(set DPR:$Vd, (v2f32 (fneg DPR:$Vm)))]>;
+def  VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
+                    (outs QPR:$Vd), (ins QPR:$Vm), IIC_VUNAQ,
+                    "vneg", "f32", "$Vd, $Vm", "",
+                    [(set QPR:$Vd, (v4f32 (fneg QPR:$Vm)))]>;
+
+def : Pat<(v8i8  (vnegd  DPR:$src)), (VNEGs8d DPR:$src)>;
+def : Pat<(v4i16 (vnegd  DPR:$src)), (VNEGs16d DPR:$src)>;
+def : Pat<(v2i32 (vnegd  DPR:$src)), (VNEGs32d DPR:$src)>;
+def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
+def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
+def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
+
+//   VQNEG    : Vector Saturating Negate
+defm VQNEG    : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
+                           IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s",
+                           int_arm_neon_vqneg>;
+
+// Vector Bit Counting Operations.
+
+//   VCLS     : Vector Count Leading Sign Bits
+defm VCLS     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0,
+                           IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s",
+                           int_arm_neon_vcls>;
+//   VCLZ     : Vector Count Leading Zeros
+defm VCLZ     : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0,
+                           IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i",
+                           int_arm_neon_vclz>;
+//   VCNT     : Vector Count One Bits
+def  VCNTd    : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
+                        IIC_VCNTiD, "vcnt", "8",
+                        v8i8, v8i8, int_arm_neon_vcnt>;
+def  VCNTq    : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
+                        IIC_VCNTiQ, "vcnt", "8",
+                        v16i8, v16i8, int_arm_neon_vcnt>;
+
+// Vector Swap -- for disassembly only.
+def  VSWPd    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
+                     (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary,
+                     "vswp", "$Vd, $Vm", "", []>;
+def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
+                     (outs QPR:$Vd), (ins QPR:$Vm), NoItinerary,
+                     "vswp", "$Vd, $Vm", "", []>;
+
+// Vector Move Operations.
+
+//   VMOV     : Vector Move (Register)
+
+let neverHasSideEffects = 1 in {
+def  VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$Vm),
+                     N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> {
+  let Vn{4-0} = Vm{4-0};
+}
+def  VMOVQ    : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$Vm),
+                     N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> {
+  let Vn{4-0} = Vm{4-0};
+}
+
+// Pseudo vector move instructions for QQ and QQQQ registers. This should
+// be expanded after register allocation is completed.
+def  VMOVQQ   : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src),
+                NoItinerary, []>;
+
+def  VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
+                NoItinerary, []>;
+} // neverHasSideEffects
+
+//   VMOV     : Vector Move (Immediate)
+
+let isReMaterializable = 1 in {
+def VMOVv8i8  : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmov", "i8", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmov", "i8", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
+
+def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmov", "i16", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v4i16 (NEONvmovImm timm:$SIMM)))]> {
+  let Inst{9} = SIMM{9};
+}
+
+def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmov", "i16", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v8i16 (NEONvmovImm timm:$SIMM)))]> {
+ let Inst{9} = SIMM{9};
+}
+
+def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmov", "i32", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v2i32 (NEONvmovImm timm:$SIMM)))]> {
+  let Inst{11-8} = SIMM{11-8};
+}
+
+def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmov", "i32", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v4i32 (NEONvmovImm timm:$SIMM)))]> {
+  let Inst{11-8} = SIMM{11-8};
+}
+
+def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmov", "i64", "$Vd, $SIMM", "",
+                         [(set DPR:$Vd, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$Vd),
+                         (ins nModImm:$SIMM), IIC_VMOVImm,
+                         "vmov", "i64", "$Vd, $SIMM", "",
+                         [(set QPR:$Vd, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
+} // isReMaterializable
+
+//   VMOV     : Vector Get Lane (move scalar to ARM core register)
+
+def VGETLNs8  : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?},
+                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", "s8", "$R, $V[$lane]",
+                          [(set GPR:$R, (NEONvgetlanes (v8i8 DPR:$V),
+                                           imm:$lane))]> {
+  let Inst{21}  = lane{2};
+  let Inst{6-5} = lane{1-0};
+}
+def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1},
+                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", "s16", "$R, $V[$lane]",
+                          [(set GPR:$R, (NEONvgetlanes (v4i16 DPR:$V),
+                                           imm:$lane))]> {
+  let Inst{21} = lane{1};
+  let Inst{6}  = lane{0};
+}
+def VGETLNu8  : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?},
+                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", "u8", "$R, $V[$lane]",
+                          [(set GPR:$R, (NEONvgetlaneu (v8i8 DPR:$V),
+                                           imm:$lane))]> {
+  let Inst{21}  = lane{2};
+  let Inst{6-5} = lane{1-0};
+}
+def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1},
+                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", "u16", "$R, $V[$lane]",
+                          [(set GPR:$R, (NEONvgetlaneu (v4i16 DPR:$V),
+                                           imm:$lane))]> {
+  let Inst{21} = lane{1};
+  let Inst{6}  = lane{0};
+}
+def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00,
+                          (outs GPR:$R), (ins DPR:$V, nohash_imm:$lane),
+                          IIC_VMOVSI, "vmov", "32", "$R, $V[$lane]",
+                          [(set GPR:$R, (extractelt (v2i32 DPR:$V),
+                                           imm:$lane))]> {
+  let Inst{21} = lane{0};
+}
+// def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td
+def : Pat<(NEONvgetlanes (v16i8 QPR:$src), imm:$lane),
+          (VGETLNs8 (v8i8 (EXTRACT_SUBREG QPR:$src,
+                           (DSubReg_i8_reg imm:$lane))),
+                     (SubReg_i8_lane imm:$lane))>;
+def : Pat<(NEONvgetlanes (v8i16 QPR:$src), imm:$lane),
+          (VGETLNs16 (v4i16 (EXTRACT_SUBREG QPR:$src,
+                             (DSubReg_i16_reg imm:$lane))),
+                     (SubReg_i16_lane imm:$lane))>;
+def : Pat<(NEONvgetlaneu (v16i8 QPR:$src), imm:$lane),
+          (VGETLNu8 (v8i8 (EXTRACT_SUBREG QPR:$src,
+                           (DSubReg_i8_reg imm:$lane))),
+                     (SubReg_i8_lane imm:$lane))>;
+def : Pat<(NEONvgetlaneu (v8i16 QPR:$src), imm:$lane),
+          (VGETLNu16 (v4i16 (EXTRACT_SUBREG QPR:$src,
+                             (DSubReg_i16_reg imm:$lane))),
+                     (SubReg_i16_lane imm:$lane))>;
+def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
+          (VGETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src,
+                             (DSubReg_i32_reg imm:$lane))),
+                     (SubReg_i32_lane imm:$lane))>;
+def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
+          (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
+                          (SSubReg_f32_reg imm:$src2))>;
+def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
+          (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
+                          (SSubReg_f32_reg imm:$src2))>;
+//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
+//          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
+def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2),
+          (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
+
+
+//   VMOV     : Vector Set Lane (move ARM core register to scalar)
+
+let Constraints = "$src1 = $V" in {
+def VSETLNi8  : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$V),
+                          (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+                          IIC_VMOVISL, "vmov", "8", "$V[$lane], $R",
+                          [(set DPR:$V, (vector_insert (v8i8 DPR:$src1),
+                                           GPR:$R, imm:$lane))]> {
+  let Inst{21}  = lane{2};
+  let Inst{6-5} = lane{1-0};
+}
+def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$V),
+                          (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+                          IIC_VMOVISL, "vmov", "16", "$V[$lane], $R",
+                          [(set DPR:$V, (vector_insert (v4i16 DPR:$src1),
+                                           GPR:$R, imm:$lane))]> {
+  let Inst{21} = lane{1};
+  let Inst{6}  = lane{0};
+}
+def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$V),
+                          (ins DPR:$src1, GPR:$R, nohash_imm:$lane),
+                          IIC_VMOVISL, "vmov", "32", "$V[$lane], $R",
+                          [(set DPR:$V, (insertelt (v2i32 DPR:$src1),
+                                           GPR:$R, imm:$lane))]> {
+  let Inst{21} = lane{0};
+}
+}
+def : Pat<(vector_insert (v16i8 QPR:$src1), GPR:$src2, imm:$lane),
+          (v16i8 (INSERT_SUBREG QPR:$src1,
+                  (v8i8 (VSETLNi8 (v8i8 (EXTRACT_SUBREG QPR:$src1,
+                                   (DSubReg_i8_reg imm:$lane))),
+                            GPR:$src2, (SubReg_i8_lane imm:$lane))),
+                  (DSubReg_i8_reg imm:$lane)))>;
+def : Pat<(vector_insert (v8i16 QPR:$src1), GPR:$src2, imm:$lane),
+          (v8i16 (INSERT_SUBREG QPR:$src1,
+                  (v4i16 (VSETLNi16 (v4i16 (EXTRACT_SUBREG QPR:$src1,
+                                     (DSubReg_i16_reg imm:$lane))),
+                             GPR:$src2, (SubReg_i16_lane imm:$lane))),
+                  (DSubReg_i16_reg imm:$lane)))>;
+def : Pat<(insertelt (v4i32 QPR:$src1), GPR:$src2, imm:$lane),
+          (v4i32 (INSERT_SUBREG QPR:$src1,
+                  (v2i32 (VSETLNi32 (v2i32 (EXTRACT_SUBREG QPR:$src1,
+                                     (DSubReg_i32_reg imm:$lane))),
+                             GPR:$src2, (SubReg_i32_lane imm:$lane))),
+                  (DSubReg_i32_reg imm:$lane)))>;
+
+def : Pat<(v2f32 (insertelt DPR:$src1, SPR:$src2, imm:$src3)),
+          (INSERT_SUBREG (v2f32 (COPY_TO_REGCLASS DPR:$src1, DPR_VFP2)),
+                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+def : Pat<(v4f32 (insertelt QPR:$src1, SPR:$src2, imm:$src3)),
+          (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS QPR:$src1, QPR_VFP2)),
+                                SPR:$src2, (SSubReg_f32_reg imm:$src3))>;
+
+//def : Pat<(v2i64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
+//          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
+def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
+          (INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
+
+def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
+          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
+          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
+def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
+          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
+
+def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
+          (VSETLNi8  (v8i8  (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+def : Pat<(v4i16 (scalar_to_vector GPR:$src)),
+          (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
+          (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
+
+def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
+          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+                         (VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+                         dsub_0)>;
+def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
+          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
+                         (VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+                         dsub_0)>;
+def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
+          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
+                         (VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
+                         dsub_0)>;
+
+//   VDUP     : Vector Duplicate (from ARM core register to all elements)
+
+class VDUPD<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
+  : NVDup<opcod1, 0b1011, opcod3, (outs DPR:$V), (ins GPR:$R),
+          IIC_VMOVIS, "vdup", Dt, "$V, $R",
+          [(set DPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+class VDUPQ<bits<8> opcod1, bits<2> opcod3, string Dt, ValueType Ty>
+  : NVDup<opcod1, 0b1011, opcod3, (outs QPR:$V), (ins GPR:$R),
+          IIC_VMOVIS, "vdup", Dt, "$V, $R",
+          [(set QPR:$V, (Ty (NEONvdup (i32 GPR:$R))))]>;
+
+def  VDUP8d   : VDUPD<0b11101100, 0b00, "8", v8i8>;
+def  VDUP16d  : VDUPD<0b11101000, 0b01, "16", v4i16>;
+def  VDUP32d  : VDUPD<0b11101000, 0b00, "32", v2i32>;
+def  VDUP8q   : VDUPQ<0b11101110, 0b00, "8", v16i8>;
+def  VDUP16q  : VDUPQ<0b11101010, 0b01, "16", v8i16>;
+def  VDUP32q  : VDUPQ<0b11101010, 0b00, "32", v4i32>;
+
+def  VDUPfd   : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$V), (ins GPR:$R),
+                      IIC_VMOVIS, "vdup", "32", "$V, $R",
+                      [(set DPR:$V, (v2f32 (NEONvdup
+                                              (f32 (bitconvert GPR:$R)))))]>;
+def  VDUPfq   : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$V), (ins GPR:$R),
+                      IIC_VMOVIS, "vdup", "32", "$V, $R",
+                      [(set QPR:$V, (v4f32 (NEONvdup
+                                              (f32 (bitconvert GPR:$R)))))]>;
+
+//   VDUP     : Vector Duplicate Lane (from scalar to all elements)
+
+class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
+              ValueType Ty>
+  : NVDupLane<op19_16, 0, (outs DPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane),
+              IIC_VMOVD, OpcodeStr, Dt, "$Vd, $Vm[$lane]",
+              [(set DPR:$Vd, (Ty (NEONvduplane (Ty DPR:$Vm), imm:$lane)))]>;
+
+class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
+              ValueType ResTy, ValueType OpTy>
+  : NVDupLane<op19_16, 1, (outs QPR:$Vd), (ins DPR:$Vm, nohash_imm:$lane),
+              IIC_VMOVQ, OpcodeStr, Dt, "$Vd, $Vm[$lane]",
+              [(set QPR:$Vd, (ResTy (NEONvduplane (OpTy DPR:$Vm),
+                                      imm:$lane)))]>;
+
+// Inst{19-16} is partially specified depending on the element size.
+
+def VDUPLN8d  : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8> {
+  let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> {
+  let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> {
+  let Inst{19} = lane{0};
+}
+def VDUPLNfd  : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32> {
+  let Inst{19} = lane{0};
+}
+def VDUPLN8q  : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> {
+  let Inst{19-17} = lane{2-0};
+}
+def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> {
+  let Inst{19-18} = lane{1-0};
+}
+def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> {
+  let Inst{19} = lane{0};
+}
+def VDUPLNfq  : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32> {
+  let Inst{19} = lane{0};
+}
+
+def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
+          (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
+                                  (DSubReg_i8_reg imm:$lane))),
+                           (SubReg_i8_lane imm:$lane)))>;
+def : Pat<(v8i16 (NEONvduplane (v8i16 QPR:$src), imm:$lane)),
+          (v8i16 (VDUPLN16q (v4i16 (EXTRACT_SUBREG QPR:$src,
+                                    (DSubReg_i16_reg imm:$lane))),
+                            (SubReg_i16_lane imm:$lane)))>;
+def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)),
+          (v4i32 (VDUPLN32q (v2i32 (EXTRACT_SUBREG QPR:$src,
+                                    (DSubReg_i32_reg imm:$lane))),
+                            (SubReg_i32_lane imm:$lane)))>;
+def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)),
+          (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src,
+                                   (DSubReg_i32_reg imm:$lane))),
+                           (SubReg_i32_lane imm:$lane)))>;
+
+def  VDUPfdf : PseudoNeonI<(outs DPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
+                    [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>;
+def  VDUPfqf : PseudoNeonI<(outs QPR:$dst), (ins SPR:$src), IIC_VMOVD, "",
+                    [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
+
+//   VMOVN    : Vector Narrowing Move
+defm VMOVN    : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVN,
+                         "vmovn", "i", trunc>;
+//   VQMOVN   : Vector Saturating Narrowing Move
+defm VQMOVNs  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
+                            "vqmovn", "s", int_arm_neon_vqmovns>;
+defm VQMOVNu  : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD,
+                            "vqmovn", "u", int_arm_neon_vqmovnu>;
+defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD,
+                            "vqmovun", "s", int_arm_neon_vqmovnsu>;
+//   VMOVL    : Vector Lengthening Move
+defm VMOVLs   : N2VL_QHS<0b01,0b10100,0,1, "vmovl", "s", sext>;
+defm VMOVLu   : N2VL_QHS<0b11,0b10100,0,1, "vmovl", "u", zext>;
+
+// Vector Conversions.
+
+//   VCVT     : Vector Convert Between Floating-Point and Integers
+def  VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+                     v2i32, v2f32, fp_to_sint>;
+def  VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+                     v2i32, v2f32, fp_to_uint>;
+def  VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+                     v2f32, v2i32, sint_to_fp>;
+def  VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+                     v2f32, v2i32, uint_to_fp>;
+
+def  VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+                     v4i32, v4f32, fp_to_sint>;
+def  VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+                     v4i32, v4f32, fp_to_uint>;
+def  VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+                     v4f32, v4i32, sint_to_fp>;
+def  VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+                     v4f32, v4i32, uint_to_fp>;
+
+//   VCVT     : Vector Convert Between Floating-Point and Fixed-Point.
+def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
+                        v2i32, v2f32, int_arm_neon_vcvtfp2fxs>;
+def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
+                        v2i32, v2f32, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
+                        v2f32, v2i32, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
+                        v2f32, v2i32, int_arm_neon_vcvtfxu2fp>;
+
+def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32",
+                        v4i32, v4f32, int_arm_neon_vcvtfp2fxs>;
+def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32",
+                        v4i32, v4f32, int_arm_neon_vcvtfp2fxu>;
+def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32",
+                        v4f32, v4i32, int_arm_neon_vcvtfxs2fp>;
+def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32",
+                        v4f32, v4i32, int_arm_neon_vcvtfxu2fp>;
+
+//   VCVT     : Vector Convert Between Half-Precision and Single-Precision.
+def  VCVTf2h  : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0,
+                        IIC_VUNAQ, "vcvt", "f16.f32",
+                        v4i16, v4f32, int_arm_neon_vcvtfp2hf>,
+                Requires<[HasNEON, HasFP16]>;
+def  VCVTh2f  : N2VLInt<0b11, 0b11, 0b01, 0b10, 0b01110, 0, 0,
+                        IIC_VUNAQ, "vcvt", "f32.f16",
+                        v4f32, v4i16, int_arm_neon_vcvthf2fp>,
+                Requires<[HasNEON, HasFP16]>;
+
+// Vector Reverse.
+
+//   VREV64   : Vector Reverse elements within 64-bit doublewords
+
+class VREV64D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$Vd),
+        (ins DPR:$Vm), IIC_VMOVD,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (Ty (NEONvrev64 (Ty DPR:$Vm))))]>;
+class VREV64Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$Vd),
+        (ins QPR:$Vm), IIC_VMOVQ,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (Ty (NEONvrev64 (Ty QPR:$Vm))))]>;
+
+def VREV64d8  : VREV64D<0b00, "vrev64", "8", v8i8>;
+def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>;
+def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>;
+def VREV64df  : VREV64D<0b10, "vrev64", "32", v2f32>;
+
+def VREV64q8  : VREV64Q<0b00, "vrev64", "8", v16i8>;
+def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>;
+def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>;
+def VREV64qf  : VREV64Q<0b10, "vrev64", "32", v4f32>;
+
+//   VREV32   : Vector Reverse elements within 32-bit words
+
+class VREV32D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$Vd),
+        (ins DPR:$Vm), IIC_VMOVD,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (Ty (NEONvrev32 (Ty DPR:$Vm))))]>;
+class VREV32Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$Vd),
+        (ins QPR:$Vm), IIC_VMOVQ,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (Ty (NEONvrev32 (Ty QPR:$Vm))))]>;
+
+def VREV32d8  : VREV32D<0b00, "vrev32", "8", v8i8>;
+def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>;
+
+def VREV32q8  : VREV32Q<0b00, "vrev32", "8", v16i8>;
+def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>;
+
+//   VREV16   : Vector Reverse elements within 16-bit halfwords
+
+class VREV16D<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$Vd),
+        (ins DPR:$Vm), IIC_VMOVD,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set DPR:$Vd, (Ty (NEONvrev16 (Ty DPR:$Vm))))]>;
+class VREV16Q<bits<2> op19_18, string OpcodeStr, string Dt, ValueType Ty>
+  : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$Vd),
+        (ins QPR:$Vm), IIC_VMOVQ,
+        OpcodeStr, Dt, "$Vd, $Vm", "",
+        [(set QPR:$Vd, (Ty (NEONvrev16 (Ty QPR:$Vm))))]>;
+
+def VREV16d8  : VREV16D<0b00, "vrev16", "8", v8i8>;
+def VREV16q8  : VREV16Q<0b00, "vrev16", "8", v16i8>;
+
+// Other Vector Shuffles.
+
+//  Aligned extractions: really just dropping registers
+
+class AlignedVEXTq<ValueType DestTy, ValueType SrcTy, SDNodeXForm LaneCVT>
+      : Pat<(DestTy (vector_extract_subvec (SrcTy QPR:$src), (i32 imm:$start))),
+             (EXTRACT_SUBREG (SrcTy QPR:$src), (LaneCVT imm:$start))>;
+
+def : AlignedVEXTq<v8i8, v16i8, DSubReg_i8_reg>;
+
+def : AlignedVEXTq<v4i16, v8i16, DSubReg_i16_reg>;
+
+def : AlignedVEXTq<v2i32, v4i32, DSubReg_i32_reg>;
+
+def : AlignedVEXTq<v1i64, v2i64, DSubReg_f64_reg>;
+
+def : AlignedVEXTq<v2f32, v4f32, DSubReg_i32_reg>;
+
+
+//   VEXT     : Vector Extract
+
+class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
+  : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$Vd),
+        (ins DPR:$Vn, DPR:$Vm, i32imm:$index), NVExtFrm,
+        IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+        [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn),
+                                      (Ty DPR:$Vm), imm:$index)))]> {
+  bits<4> index;
+  let Inst{11-8} = index{3-0};
+}
+
+class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
+  : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$Vd),
+        (ins QPR:$Vn, QPR:$Vm, i32imm:$index), NVExtFrm,
+        IIC_VEXTQ, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "",
+        [(set QPR:$Vd, (Ty (NEONvext (Ty QPR:$Vn),
+                                      (Ty QPR:$Vm), imm:$index)))]> {
+  bits<4> index;
+  let Inst{11-8} = index{3-0};
+}
+
+def VEXTd8  : VEXTd<"vext", "8",  v8i8> {
+  let Inst{11-8} = index{3-0};
+}
+def VEXTd16 : VEXTd<"vext", "16", v4i16> {
+  let Inst{11-9} = index{2-0};
+  let Inst{8}    = 0b0;
+}
+def VEXTd32 : VEXTd<"vext", "32", v2i32> {
+  let Inst{11-10} = index{1-0};
+  let Inst{9-8}    = 0b00;
+}
+def VEXTdf  : VEXTd<"vext", "32", v2f32> {
+  let Inst{11}    = index{0};
+  let Inst{10-8}  = 0b000;
+}
+
+def VEXTq8  : VEXTq<"vext", "8",  v16i8> {
+  let Inst{11-8} = index{3-0};
+}
+def VEXTq16 : VEXTq<"vext", "16", v8i16> {
+  let Inst{11-9} = index{2-0};
+  let Inst{8}    = 0b0;
+}
+def VEXTq32 : VEXTq<"vext", "32", v4i32> {
+  let Inst{11-10} = index{1-0};
+  let Inst{9-8}    = 0b00;
+}
+def VEXTqf  : VEXTq<"vext", "32", v4f32> {
+  let Inst{11}    = index{0};
+  let Inst{10-8}  = 0b000;
+}
+
+//   VTRN     : Vector Transpose
+
+def  VTRNd8   : N2VDShuffle<0b00, 0b00001, "vtrn", "8">;
+def  VTRNd16  : N2VDShuffle<0b01, 0b00001, "vtrn", "16">;
+def  VTRNd32  : N2VDShuffle<0b10, 0b00001, "vtrn", "32">;
+
+def  VTRNq8   : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">;
+def  VTRNq16  : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">;
+def  VTRNq32  : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">;
+
+//   VUZP     : Vector Unzip (Deinterleave)
+
+def  VUZPd8   : N2VDShuffle<0b00, 0b00010, "vuzp", "8">;
+def  VUZPd16  : N2VDShuffle<0b01, 0b00010, "vuzp", "16">;
+def  VUZPd32  : N2VDShuffle<0b10, 0b00010, "vuzp", "32">;
+
+def  VUZPq8   : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">;
+def  VUZPq16  : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">;
+def  VUZPq32  : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">;
+
+//   VZIP     : Vector Zip (Interleave)
+
+def  VZIPd8   : N2VDShuffle<0b00, 0b00011, "vzip", "8">;
+def  VZIPd16  : N2VDShuffle<0b01, 0b00011, "vzip", "16">;
+def  VZIPd32  : N2VDShuffle<0b10, 0b00011, "vzip", "32">;
+
+def  VZIPq8   : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">;
+def  VZIPq16  : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">;
+def  VZIPq32  : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
+
+// Vector Table Lookup and Table Extension.
+
+//   VTBL     : Vector Table Lookup
+def  VTBL1
+  : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$Vd),
+        (ins DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTB1,
+        "vtbl", "8", "$Vd, \\{$Vn\\}, $Vm", "",
+        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbl1 DPR:$Vn, DPR:$Vm)))]>;
+let hasExtraSrcRegAllocReq = 1 in {
+def  VTBL2
+  : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$Vd),
+        (ins DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTB2,
+        "vtbl", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "", []>;
+def  VTBL3
+  : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$Vd),
+        (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm), NVTBLFrm, IIC_VTB3,
+        "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm", "", []>;
+def  VTBL4
+  : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$Vd),
+        (ins DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm),
+        NVTBLFrm, IIC_VTB4,
+        "vtbl", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm", "", []>;
+} // hasExtraSrcRegAllocReq = 1
+
+def  VTBL2Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins QPR:$tbl, DPR:$src), IIC_VTB2, "", []>;
+def  VTBL3Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB3, "", []>;
+def  VTBL4Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins QQPR:$tbl, DPR:$src), IIC_VTB4, "", []>;
+
+//   VTBX     : Vector Table Extension
+def  VTBX1
+  : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd),
+        (ins DPR:$orig, DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1,
+        "vtbx", "8", "$Vd, \\{$Vn\\}, $Vm", "$orig = $Vd",
+        [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1
+                               DPR:$orig, DPR:$Vn, DPR:$Vm)))]>;
+let hasExtraSrcRegAllocReq = 1 in {
+def  VTBX2
+  : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd),
+        (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$Vm), NVTBLFrm, IIC_VTBX2,
+        "vtbx", "8", "$Vd, \\{$Vn, $tbl2\\}, $Vm", "$orig = $Vd", []>;
+def  VTBX3
+  : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$Vd),
+        (ins DPR:$orig, DPR:$Vn, DPR:$tbl2, DPR:$tbl3, DPR:$Vm),
+        NVTBLFrm, IIC_VTBX3,
+        "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3\\}, $Vm",
+        "$orig = $Vd", []>;
+def  VTBX4
+  : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$Vd), (ins DPR:$orig, DPR:$Vn,
+        DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$Vm), NVTBLFrm, IIC_VTBX4,
+        "vtbx", "8", "$Vd, \\{$Vn, $tbl2, $tbl3, $tbl4\\}, $Vm",
+        "$orig = $Vd", []>;
+} // hasExtraSrcRegAllocReq = 1
+
+def  VTBX2Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QPR:$tbl, DPR:$src),
+                IIC_VTBX2, "$orig = $dst", []>;
+def  VTBX3Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+                IIC_VTBX3, "$orig = $dst", []>;
+def  VTBX4Pseudo
+  : PseudoNeonI<(outs DPR:$dst), (ins DPR:$orig, QQPR:$tbl, DPR:$src),
+                IIC_VTBX4, "$orig = $dst", []>;
+
+//===----------------------------------------------------------------------===//
+// NEON instructions for single-precision FP math
+//===----------------------------------------------------------------------===//
+
+class N2VSPat<SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(f32 (OpNode SPR:$a)),
+              (EXTRACT_SUBREG
+               (v2f32 (COPY_TO_REGCLASS (Inst
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$a, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+class N3VSPat<SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
+              (EXTRACT_SUBREG
+               (v2f32 (COPY_TO_REGCLASS (Inst
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$a, ssub_0),
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
+  : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
+              (EXTRACT_SUBREG
+               (v2f32 (COPY_TO_REGCLASS (Inst
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$acc, ssub_0),
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$a, ssub_0),
+                (INSERT_SUBREG
+                 (v2f32 (COPY_TO_REGCLASS (v2f32 (IMPLICIT_DEF)), DPR_VFP2)),
+                 SPR:$b, ssub_0)), DPR_VFP2)), ssub_0)>;
+
+def : N3VSPat<fadd, VADDfd>;
+def : N3VSPat<fsub, VSUBfd>;
+def : N3VSPat<fmul, VMULfd>;
+def : N3VSMulOpPat<fmul, fadd, VMLAfd>,
+      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+def : N3VSMulOpPat<fmul, fsub, VMLSfd>,
+      Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>;
+def : N2VSPat<fabs, VABSfd>;
+def : N2VSPat<fneg, VNEGfd>;
+def : N3VSPat<NEONfmax, VMAXfd>;
+def : N3VSPat<NEONfmin, VMINfd>;
+def : N2VSPat<arm_ftosi, VCVTf2sd>;
+def : N2VSPat<arm_ftoui, VCVTf2ud>;
+def : N2VSPat<arm_sitof, VCVTs2fd>;
+def : N2VSPat<arm_uitof, VCVTu2fd>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// bit_convert
+def : Pat<(v1i64 (bitconvert (v2i32 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v4i16 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v8i8  DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (f64   DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v1i64 (bitconvert (v2f32 DPR:$src))), (v1i64 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v1i64 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v4i16 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v8i8  DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (f64   DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v2i32 (bitconvert (v2f32 DPR:$src))), (v2i32 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v1i64 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v2i32 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v8i8  DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (f64   DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v4i16 (bitconvert (v2f32 DPR:$src))), (v4i16 DPR:$src)>;
+def : Pat<(v8i8  (bitconvert (v1i64 DPR:$src))), (v8i8  DPR:$src)>;
+def : Pat<(v8i8  (bitconvert (v2i32 DPR:$src))), (v8i8  DPR:$src)>;
+def : Pat<(v8i8  (bitconvert (v4i16 DPR:$src))), (v8i8  DPR:$src)>;
+def : Pat<(v8i8  (bitconvert (f64   DPR:$src))), (v8i8  DPR:$src)>;
+def : Pat<(v8i8  (bitconvert (v2f32 DPR:$src))), (v8i8  DPR:$src)>;
+def : Pat<(f64   (bitconvert (v1i64 DPR:$src))), (f64   DPR:$src)>;
+def : Pat<(f64   (bitconvert (v2i32 DPR:$src))), (f64   DPR:$src)>;
+def : Pat<(f64   (bitconvert (v4i16 DPR:$src))), (f64   DPR:$src)>;
+def : Pat<(f64   (bitconvert (v8i8  DPR:$src))), (f64   DPR:$src)>;
+def : Pat<(f64   (bitconvert (v2f32 DPR:$src))), (f64   DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (f64   DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v1i64 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v2i32 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v4i16 DPR:$src))), (v2f32 DPR:$src)>;
+def : Pat<(v2f32 (bitconvert (v8i8  DPR:$src))), (v2f32 DPR:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v4i32 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v16i8 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 QPR:$src))), (v2i64 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v16i8 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 QPR:$src))), (v4i32 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v16i8 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 QPR:$src))), (v8i16 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v8i16 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 QPR:$src))), (v16i8 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v16i8 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 QPR:$src))), (v4f32 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>;
diff --git a/final/lib/Target/ARM/ARMInstrThumb.td b/final/lib/Target/ARM/ARMInstrThumb.td
new file mode 100644
index 00000000000..826ef46bcdb
--- /dev/null
+++ b/final/lib/Target/ARM/ARMInstrThumb.td
@@ -0,0 +1,1550 @@
+//===- ARMInstrThumb.td - Thumb support for ARM ------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Thumb specific DAG Nodes.
+//
+
+def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall,
+                      [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                       SDNPVariadic]>;
+
+def imm_neg_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32);
+}]>;
+def imm_comp_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+}]>;
+
+/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7].
+def imm0_7 : PatLeaf<(i32 imm), [{
+  return (uint32_t)N->getZExtValue() < 8;
+}]>;
+def imm0_7_neg : PatLeaf<(i32 imm), [{
+  return (uint32_t)-N->getZExtValue() < 8;
+}], imm_neg_XFORM>;
+
+def imm0_255 : PatLeaf<(i32 imm), [{
+  return (uint32_t)N->getZExtValue() < 256;
+}]>;
+def imm0_255_comp : PatLeaf<(i32 imm), [{
+  return ~((uint32_t)N->getZExtValue()) < 256;
+}]>;
+
+def imm8_255 : PatLeaf<(i32 imm), [{
+  return (uint32_t)N->getZExtValue() >= 8 && (uint32_t)N->getZExtValue() < 256;
+}]>;
+def imm8_255_neg : PatLeaf<(i32 imm), [{
+  unsigned Val = -N->getZExtValue();
+  return Val >= 8 && Val < 256;
+}], imm_neg_XFORM>;
+
+// Break imm's up into two pieces: an immediate + a left shift. This uses
+// thumb_immshifted to match and thumb_immshifted_val and thumb_immshifted_shamt
+// to get the val/shift pieces.
+def thumb_immshifted : PatLeaf<(imm), [{
+  return ARM_AM::isThumbImmShiftedVal((unsigned)N->getZExtValue());
+}]>;
+
+def thumb_immshifted_val : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getThumbImmNonShiftedVal((unsigned)N->getZExtValue());
+  return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def thumb_immshifted_shamt : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getThumbImmValShift((unsigned)N->getZExtValue());
+  return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+// ADR instruction labels.
+def t_adrlabel : Operand<i32> {
+  let EncoderMethod = "getThumbAdrLabelOpValue";
+}
+
+// Scaled 4 immediate.
+def t_imm_s4 : Operand<i32> {
+  let PrintMethod = "printThumbS4ImmOperand";
+}
+
+// Define Thumb specific addressing modes.
+
+def t_brtarget : Operand<OtherVT> {
+  let EncoderMethod = "getThumbBRTargetOpValue";
+}
+
+def t_bcctarget : Operand<i32> {
+  let EncoderMethod = "getThumbBCCTargetOpValue";
+}
+
+def t_cbtarget : Operand<i32> {
+  let EncoderMethod = "getThumbCBTargetOpValue";
+}
+
+def t_bltarget : Operand<i32> {
+  let EncoderMethod = "getThumbBLTargetOpValue";
+}
+
+def t_blxtarget : Operand<i32> {
+  let EncoderMethod = "getThumbBLXTargetOpValue";
+}
+
+def MemModeRegThumbAsmOperand : AsmOperandClass {
+  let Name = "MemModeRegThumb";
+  let SuperClasses = [];
+}
+
+def MemModeImmThumbAsmOperand : AsmOperandClass {
+  let Name = "MemModeImmThumb";
+  let SuperClasses = [];
+}
+
+// t_addrmode_rr := reg + reg
+//
+def t_addrmode_rr : Operand<i32>,
+                    ComplexPattern<i32, 2, "SelectThumbAddrModeRR", []> {
+  let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+  let PrintMethod = "printThumbAddrModeRROperand";
+  let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+}
+
+// t_addrmode_rrs := reg + reg
+//
+def t_addrmode_rrs1 : Operand<i32>,
+                      ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S1", []> {
+  let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+  let PrintMethod = "printThumbAddrModeRROperand";
+  let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+  let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+def t_addrmode_rrs2 : Operand<i32>,
+                      ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S2", []> {
+  let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+  let PrintMethod = "printThumbAddrModeRROperand";
+  let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+  let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+def t_addrmode_rrs4 : Operand<i32>,
+                      ComplexPattern<i32, 2, "SelectThumbAddrModeRI5S4", []> {
+  let EncoderMethod = "getThumbAddrModeRegRegOpValue";
+  let PrintMethod = "printThumbAddrModeRROperand";
+  let MIOperandInfo = (ops tGPR:$base, tGPR:$offsreg);
+  let ParserMatchClass = MemModeRegThumbAsmOperand;
+}
+
+// t_addrmode_is4 := reg + imm5 * 4
+//
+def t_addrmode_is4 : Operand<i32>,
+                     ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S4", []> {
+  let EncoderMethod = "getAddrModeISOpValue";
+  let PrintMethod = "printThumbAddrModeImm5S4Operand";
+  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_is2 := reg + imm5 * 2
+//
+def t_addrmode_is2 : Operand<i32>,
+                     ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S2", []> {
+  let EncoderMethod = "getAddrModeISOpValue";
+  let PrintMethod = "printThumbAddrModeImm5S2Operand";
+  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_is1 := reg + imm5
+//
+def t_addrmode_is1 : Operand<i32>,
+                     ComplexPattern<i32, 2, "SelectThumbAddrModeImm5S1", []> {
+  let EncoderMethod = "getAddrModeISOpValue";
+  let PrintMethod = "printThumbAddrModeImm5S1Operand";
+  let MIOperandInfo = (ops tGPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_sp := sp + imm8 * 4
+//
+def t_addrmode_sp : Operand<i32>,
+                    ComplexPattern<i32, 2, "SelectThumbAddrModeSP", []> {
+  let EncoderMethod = "getAddrModeThumbSPOpValue";
+  let PrintMethod = "printThumbAddrModeSPOperand";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+// t_addrmode_pc := <label> => pc + imm8 * 4
+//
+def t_addrmode_pc : Operand<i32> {
+  let EncoderMethod = "getAddrModePCOpValue";
+  let ParserMatchClass = MemModeImmThumbAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
+
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
+def tADJCALLSTACKUP :
+  PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
+             [(ARMcallseq_end imm:$amt1, imm:$amt2)]>,
+            Requires<[IsThumb, IsThumb1Only]>;
+
+def tADJCALLSTACKDOWN :
+  PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
+             [(ARMcallseq_start imm:$amt)]>,
+            Requires<[IsThumb, IsThumb1Only]>;
+}
+
+// T1Disassembly - A simple class to make encoding some disassembly patterns
+// easier and less verbose.
+class T1Disassembly<bits<2> op1, bits<8> op2>
+  : T1Encoding<0b101111> {
+  let Inst{9-8} = op1;
+  let Inst{7-0} = op2;
+}
+
+def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Disassembly<0b11, 0x00>; // A8.6.110
+
+def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "",
+                  [/* For disassembly only; pattern left blank */]>,
+           T1Disassembly<0b11, 0x10>; // A8.6.410
+
+def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Disassembly<0b11, 0x20>; // A8.6.408
+
+def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Disassembly<0b11, 0x30>; // A8.6.409
+
+def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Disassembly<0b11, 0x40>; // A8.6.157
+
+// The i32imm operand $val can be used by a debugger to store more information
+// about the breakpoint.
+def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Disassembly<0b10, {?,?,?,?,?,?,?,?}> {
+  // A8.6.22
+  bits<8> val;
+  let Inst{7-0} = val;
+}
+
+def tSETENDBE : T1I<(outs), (ins), NoItinerary, "setend\tbe",
+                    [/* For disassembly only; pattern left blank */]>,
+                T1Encoding<0b101101> {
+  // A8.6.156
+  let Inst{9-5} = 0b10010;
+  let Inst{4}   = 1;
+  let Inst{3}   = 1;            // Big-Endian
+  let Inst{2-0} = 0b000;
+}
+
+def tSETENDLE : T1I<(outs), (ins), NoItinerary, "setend\tle",
+                    [/* For disassembly only; pattern left blank */]>,
+                T1Encoding<0b101101> {
+  // A8.6.156
+  let Inst{9-5} = 0b10010;
+  let Inst{4}   = 1;
+  let Inst{3}   = 0;            // Little-Endian
+  let Inst{2-0} = 0b000;
+}
+
+// Change Processor State is a system instruction -- for disassembly only.
+def tCPS : T1I<(outs), (ins imod_op:$imod, iflags_op:$iflags),
+                NoItinerary, "cps$imod $iflags",
+                [/* For disassembly only; pattern left blank */]>,
+           T1Misc<0b0110011> {
+  // A8.6.38 & B6.1.1
+  bit imod;
+  bits<3> iflags;
+
+  let Inst{4}   = imod;
+  let Inst{3}   = 0;
+  let Inst{2-0} = iflags;
+}
+
+// For both thumb1 and thumb2.
+let isNotDuplicable = 1, isCodeGenOnly = 1 in
+def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, "",
+                  [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>,
+              T1Special<{0,0,?,?}> {
+  // A8.6.6
+  bits<3> dst;
+  let Inst{6-3} = 0b1111; // Rm = pc
+  let Inst{2-0} = dst;
+}
+
+// PC relative add (ADR).
+def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi,
+                   "add\t$dst, pc, $rhs", []>,
+               T1Encoding<{1,0,1,0,0,?}> {
+  // A6.2 & A8.6.10
+  bits<3> dst;
+  bits<8> rhs;
+  let Inst{10-8} = dst;
+  let Inst{7-0}  = rhs;
+}
+
+// ADD <Rd>, sp, #<imm8>
+// This is rematerializable, which is particularly useful for taking the
+// address of locals.
+let isReMaterializable = 1 in
+def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi,
+                   "add\t$dst, $sp, $rhs", []>,
+               T1Encoding<{1,0,1,0,1,?}> {
+  // A6.2 & A8.6.8
+  bits<3> dst;
+  bits<8> rhs;
+  let Inst{10-8} = dst;
+  let Inst{7-0}  = rhs;
+}
+
+// ADD sp, sp, #<imm7>
+def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
+                  "add\t$dst, $rhs", []>,
+              T1Misc<{0,0,0,0,0,?,?}> {
+  // A6.2.5 & A8.6.8
+  bits<7> rhs;
+  let Inst{6-0} = rhs;
+}
+
+// SUB sp, sp, #<imm7>
+// FIXME: The encoding and the ASM string don't match up.
+def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi,
+                  "sub\t$dst, $rhs", []>,
+              T1Misc<{0,0,0,0,1,?,?}> {
+  // A6.2.5 & A8.6.214
+  bits<7> rhs;
+  let Inst{6-0} = rhs;
+}
+
+// ADD <Rm>, sp
+def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+                  "add\t$dst, $rhs", []>,
+              T1Special<{0,0,?,?}> {
+  // A8.6.9 Encoding T1
+  bits<4> dst;
+  let Inst{7}   = dst{3};
+  let Inst{6-3} = 0b1101;
+  let Inst{2-0} = dst{2-0};
+}
+
+// ADD sp, <Rm>
+def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+                  "add\t$dst, $rhs", []>,
+              T1Special<{0,0,?,?}> {
+  // A8.6.9 Encoding T2
+  bits<4> dst;
+  let Inst{7} = 1;
+  let Inst{6-3} = dst;
+  let Inst{2-0} = 0b101;
+}
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions.
+//
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+  def tBX_RET : TI<(outs), (ins), IIC_Br, "bx\tlr",
+                   [(ARMretflag)]>,
+                T1Special<{1,1,0,?}> {
+    // A6.2.3 & A8.6.25
+    let Inst{6-3} = 0b1110; // Rm = lr
+    let Inst{2-0} = 0b000;
+  }
+
+  // Alternative return instruction used by vararg functions.
+  def tBX_RET_vararg : TI<(outs), (ins tGPR:$Rm),
+                          IIC_Br, "bx\t$Rm",
+                          []>,
+                       T1Special<{1,1,0,?}> {
+    // A6.2.3 & A8.6.25
+    bits<4> Rm;
+    let Inst{6-3} = Rm;
+    let Inst{2-0} = 0b000;
+  }
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  def tBRIND : TI<(outs), (ins GPR:$Rm),
+                  IIC_Br,
+                  "mov\tpc, $Rm",
+                  [(brind GPR:$Rm)]>,
+               T1Special<{1,0,?,?}> {
+    // A8.6.97
+    bits<4> Rm;
+    let Inst{7}   = 1;          // <Rd> = Inst{7:2-0} = pc
+    let Inst{6-3} = Rm;
+    let Inst{2-0} = 0b111;
+  }
+}
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+    hasExtraDefRegAllocReq = 1 in
+def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+                   IIC_iPop_Br,
+                   "pop${p}\t$regs", []>,
+               T1Misc<{1,1,0,?,?,?,?}> {
+  // A8.6.121
+  bits<16> regs;
+  let Inst{8}   = regs{15};     // registers = P:'0000000':register_list
+  let Inst{7-0} = regs{7-0};
+}
+
+// All calls clobber the non-callee saved registers. SP is marked as a use to
+// prevent stack-pointer assignments that appear immediately before calls from
+// potentially appearing dead.
+let isCall = 1,
+  // On non-Darwin platforms R9 is callee-saved.
+  Defs = [R0,  R1,  R2,  R3,  R12, LR,
+          D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
+          D16, D17, D18, D19, D20, D21, D22, D23,
+          D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
+  Uses = [SP] in {
+  // Also used for Thumb2
+  def tBL  : TIx2<0b11110, 0b11, 1,
+                  (outs), (ins t_bltarget:$func, variable_ops), IIC_Br,
+                  "bl\t$func",
+                  [(ARMtcall tglobaladdr:$func)]>,
+             Requires<[IsThumb, IsNotDarwin]> {
+    bits<21> func;
+    let Inst{25-16} = func{20-11};
+    let Inst{13} = 1;
+    let Inst{11} = 1;
+    let Inst{10-0} = func{10-0};
+  }
+
+  // ARMv5T and above, also used for Thumb2
+  def tBLXi : TIx2<0b11110, 0b11, 0,
+                   (outs), (ins t_blxtarget:$func, variable_ops), IIC_Br,
+                   "blx\t$func",
+                   [(ARMcall tglobaladdr:$func)]>,
+              Requires<[IsThumb, HasV5T, IsNotDarwin]> {
+    bits<21> func;
+    let Inst{25-16} = func{20-11};
+    let Inst{13} = 1;
+    let Inst{11} = 1;
+    let Inst{10-1} = func{10-1};
+    let Inst{0} = 0; // func{0} is assumed zero
+  }
+
+  // Also used for Thumb2
+  def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
+                  "blx\t$func",
+                  [(ARMtcall GPR:$func)]>,
+              Requires<[IsThumb, HasV5T, IsNotDarwin]>,
+              T1Special<{1,1,1,?}>; // A6.2.3 & A8.6.24;
+
+  // ARMv4T
+  // FIXME: Should be a pseudo.
+  let isCodeGenOnly = 1 in
+  def tBX : TIx2<{?,?,?,?,?}, {?,?}, ?,
+                  (outs), (ins tGPR:$func, variable_ops), IIC_Br,
+                  "mov\tlr, pc\n\tbx\t$func",
+                  [(ARMcall_nolink tGPR:$func)]>,
+            Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>;
+}
+
+let isCall = 1,
+  // On Darwin R9 is call-clobbered.
+  // R7 is marked as a use to prevent frame-pointer assignments from being
+  // moved above / below calls.
+  Defs = [R0,  R1,  R2,  R3,  R9,  R12, LR,
+          D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
+          D16, D17, D18, D19, D20, D21, D22, D23,
+          D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR],
+  Uses = [R7, SP] in {
+  // Also used for Thumb2
+  def tBLr9 : TIx2<0b11110, 0b11, 1,
+                   (outs), (ins pred:$p, t_bltarget:$func, variable_ops),
+                   IIC_Br, "bl${p}\t$func",
+                   [(ARMtcall tglobaladdr:$func)]>,
+              Requires<[IsThumb, IsDarwin]> {
+    bits<21> func;
+    let Inst{25-16} = func{20-11};
+    let Inst{13} = 1;
+    let Inst{11} = 1;
+    let Inst{10-0} = func{10-0};
+  }
+
+  // ARMv5T and above, also used for Thumb2
+  def tBLXi_r9 : TIx2<0b11110, 0b11, 0,
+                      (outs), (ins pred:$p, t_blxtarget:$func, variable_ops),
+                      IIC_Br, "blx${p}\t$func",
+                      [(ARMcall tglobaladdr:$func)]>,
+                 Requires<[IsThumb, HasV5T, IsDarwin]> {
+    bits<21> func;
+    let Inst{25-16} = func{20-11};
+    let Inst{13} = 1;
+    let Inst{11} = 1;
+    let Inst{10-1} = func{10-1};
+    let Inst{0} = 0; // func{0} is assumed zero
+  }
+
+  // Also used for Thumb2
+  def tBLXr_r9 : TI<(outs), (ins pred:$p, GPR:$func, variable_ops), IIC_Br,
+                    "blx${p}\t$func",
+                    [(ARMtcall GPR:$func)]>,
+                 Requires<[IsThumb, HasV5T, IsDarwin]>,
+                 T1Special<{1,1,1,?}> {
+    // A6.2.3 & A8.6.24
+    bits<4> func;
+    let Inst{6-3} = func;
+    let Inst{2-0} = 0b000;
+  }
+
+  // ARMv4T
+  let isCodeGenOnly = 1 in
+  // FIXME: Should be a pseudo.
+  def tBXr9 : TIx2<{?,?,?,?,?}, {?,?}, ?,
+                   (outs), (ins tGPR:$func, variable_ops), IIC_Br,
+                   "mov\tlr, pc\n\tbx\t$func",
+                   [(ARMcall_nolink tGPR:$func)]>,
+              Requires<[IsThumb, IsThumb1Only, IsDarwin]>;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+  let isPredicable = 1 in
+  def tB   : T1I<(outs), (ins t_brtarget:$target), IIC_Br,
+                 "b\t$target", [(br bb:$target)]>,
+             T1Encoding<{1,1,1,0,0,?}> {
+    bits<11> target;
+    let Inst{10-0} = target;
+  }
+
+  // Far jump
+  // Just a pseudo for a tBL instruction. Needed to let regalloc know about
+  // the clobber of LR.
+  let Defs = [LR] in
+  def tBfar : tPseudoInst<(outs), (ins t_bltarget:$target),
+                          Size4Bytes, IIC_Br, []>;
+
+  def tBR_JTr : tPseudoInst<(outs),
+                      (ins tGPR:$target, i32imm:$jt, i32imm:$id),
+                      SizeSpecial, IIC_Br,
+                      [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]> {
+    list<Predicate> Predicates = [IsThumb, IsThumb1Only];
+  }
+}
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1 in
+  def tBcc : T1I<(outs), (ins t_bcctarget:$target, pred:$p), IIC_Br,
+                 "b${p}\t$target",
+                 [/*(ARMbrcond bb:$target, imm:$cc)*/]>,
+             T1Encoding<{1,1,0,1,?,?}> {
+  bits<4> p;
+  bits<8> target;
+  let Inst{11-8} = p;
+  let Inst{7-0} = target;
+}
+
+// Compare and branch on zero / non-zero
+let isBranch = 1, isTerminator = 1 in {
+  def tCBZ  : T1I<(outs), (ins tGPR:$Rn, t_cbtarget:$target), IIC_Br,
+                  "cbz\t$Rn, $target", []>,
+              T1Misc<{0,0,?,1,?,?,?}> {
+    // A8.6.27
+    bits<6> target;
+    bits<3> Rn;
+    let Inst{9}   = target{5};
+    let Inst{7-3} = target{4-0};
+    let Inst{2-0} = Rn;
+  }
+
+  def tCBNZ : T1I<(outs), (ins tGPR:$cmp, t_cbtarget:$target), IIC_Br,
+                  "cbnz\t$cmp, $target", []>,
+              T1Misc<{1,0,?,1,?,?,?}> {
+    // A8.6.27
+    bits<6> target;
+    bits<3> Rn;
+    let Inst{9}   = target{5};
+    let Inst{7-3} = target{4-0};
+    let Inst{2-0} = Rn;
+  }
+}
+
+// A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only
+// A8.6.16 B: Encoding T1
+// If Inst{11-8} == 0b1111 then SEE SVC
+let isCall = 1, Uses = [SP] in
+def tSVC : T1pI<(outs), (ins i32imm:$imm), IIC_Br,
+                "svc", "\t$imm", []>, Encoding16 {
+  bits<8> imm;
+  let Inst{15-12} = 0b1101;
+  let Inst{11-8}  = 0b1111;
+  let Inst{7-0}   = imm;
+}
+
+// The assembler uses 0xDEFE for a trap instruction.
+let isBarrier = 1, isTerminator = 1 in
+def tTRAP : TI<(outs), (ins), IIC_Br, 
+               "trap", [(trap)]>, Encoding16 {
+  let Inst = 0xdefe;
+}
+
+//===----------------------------------------------------------------------===//
+//  Load Store Instructions.
+//
+
+// Loads: reg/reg and reg/imm5
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+multiclass thumb_ld_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+                              Operand AddrMode_r, Operand AddrMode_i,
+                              AddrMode am, InstrItinClass itin_r,
+                              InstrItinClass itin_i, string asm,
+                              PatFrag opnode> {
+  def r : // reg/reg
+    T1pILdStEncode<reg_opc,
+                   (outs tGPR:$Rt), (ins AddrMode_r:$addr),
+                   am, itin_r, asm, "\t$Rt, $addr",
+                   [(set tGPR:$Rt, (opnode AddrMode_r:$addr))]>;
+  def i : // reg/imm5
+    T1pILdStEncodeImm<imm_opc, 1 /* Load */,
+                      (outs tGPR:$Rt), (ins AddrMode_i:$addr),
+                      am, itin_i, asm, "\t$Rt, $addr",
+                      [(set tGPR:$Rt, (opnode AddrMode_i:$addr))]>;
+}
+// Stores: reg/reg and reg/imm5
+multiclass thumb_st_rr_ri_enc<bits<3> reg_opc, bits<4> imm_opc,
+                              Operand AddrMode_r, Operand AddrMode_i,
+                              AddrMode am, InstrItinClass itin_r,
+                              InstrItinClass itin_i, string asm,
+                              PatFrag opnode> {
+  def r : // reg/reg
+    T1pILdStEncode<reg_opc,
+                   (outs), (ins tGPR:$Rt, AddrMode_r:$addr),
+                   am, itin_r, asm, "\t$Rt, $addr",
+                   [(opnode tGPR:$Rt, AddrMode_r:$addr)]>;
+  def i : // reg/imm5
+    T1pILdStEncodeImm<imm_opc, 0 /* Store */,
+                      (outs), (ins tGPR:$Rt, AddrMode_i:$addr),
+                      am, itin_i, asm, "\t$Rt, $addr",
+                      [(opnode tGPR:$Rt, AddrMode_i:$addr)]>;
+}
+
+// A8.6.57 & A8.6.60
+defm tLDR  : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rrs4,
+                                t_addrmode_is4, AddrModeT1_4,
+                                IIC_iLoad_r, IIC_iLoad_i, "ldr",
+                                UnOpFrag<(load node:$Src)>>;
+
+// A8.6.64 & A8.6.61
+defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rrs1,
+                                t_addrmode_is1, AddrModeT1_1,
+                                IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb",
+                                UnOpFrag<(zextloadi8 node:$Src)>>;
+
+// A8.6.76 & A8.6.73
+defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rrs2,
+                                t_addrmode_is2, AddrModeT1_2,
+                                IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh",
+                                UnOpFrag<(zextloadi16 node:$Src)>>;
+
+let AddedComplexity = 10 in
+def tLDRSB :                    // A8.6.80
+  T1pILdStEncode<0b011, (outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+                 AddrModeT1_1, IIC_iLoad_bh_r,
+                 "ldrsb", "\t$dst, $addr",
+                 [(set tGPR:$dst, (sextloadi8 t_addrmode_rr:$addr))]>;
+
+let AddedComplexity = 10 in
+def tLDRSH :                    // A8.6.84
+  T1pILdStEncode<0b111, (outs tGPR:$dst), (ins t_addrmode_rr:$addr),
+                 AddrModeT1_2, IIC_iLoad_bh_r,
+                 "ldrsh", "\t$dst, $addr",
+                 [(set tGPR:$dst, (sextloadi16 t_addrmode_rr:$addr))]>;
+
+let canFoldAsLoad = 1 in
+def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+                    "ldr", "\t$Rt, $addr",
+                    [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>,
+              T1LdStSP<{1,?,?}> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0} = addr;
+}
+
+// Special instruction for restore. It cannot clobber condition register
+// when it's expanded by eliminateCallFramePseudoInstr().
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in
+// FIXME: Pseudo for tLDRspi
+def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
+                     "ldr", "\t$dst, $addr", []>,
+               T1LdStSP<{1,?,?}> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0} = addr;
+}
+
+// Load tconstpool
+// FIXME: Use ldr.n to work around a Darwin assembler bug.
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i,
+                  "ldr", ".n\t$Rt, $addr",
+                  [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>,
+              T1Encoding<{0,1,0,0,1,?}> {
+  // A6.2 & A8.6.59
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0}  = addr;
+}
+
+// A8.6.194 & A8.6.192
+defm tSTR  : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4,
+                                t_addrmode_is4, AddrModeT1_4,
+                                IIC_iStore_r, IIC_iStore_i, "str",
+                                BinOpFrag<(store node:$LHS, node:$RHS)>>;
+
+// A8.6.197 & A8.6.195
+defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1,
+                                t_addrmode_is1, AddrModeT1_1,
+                                IIC_iStore_bh_r, IIC_iStore_bh_i, "strb",
+                                BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+
+// A8.6.207 & A8.6.205
+defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2,
+                                t_addrmode_is2, AddrModeT1_2,
+                                IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
+                                BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+
+def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
+                    "str", "\t$Rt, $addr",
+                    [(store tGPR:$Rt, t_addrmode_sp:$addr)]>,
+              T1LdStSP<{0,?,?}> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0} = addr;
+}
+
+let mayStore = 1, neverHasSideEffects = 1 in
+// Special instruction for spill. It cannot clobber condition register when it's
+// expanded by eliminateCallFramePseudoInstr().
+// FIXME: Pseudo for tSTRspi
+def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStore_i,
+                  "str", "\t$src, $addr", []>,
+             T1LdStSP<{0,?,?}> {
+  bits<3> Rt;
+  bits<8> addr;
+  let Inst{10-8} = Rt;
+  let Inst{7-0} = addr;
+}
+
+//===----------------------------------------------------------------------===//
+//  Load / store multiple Instructions.
+//
+
+multiclass thumb_ldst_mult<string asm, InstrItinClass itin,
+                           InstrItinClass itin_upd, bits<6> T1Enc,
+                           bit L_bit> {
+  def IA :
+    T1I<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+        itin, !strconcat(asm, "ia${p}\t$Rn, $regs"), []>,
+       T1Encoding<T1Enc> {
+    bits<3> Rn;
+    bits<8> regs;
+    let Inst{10-8} = Rn;
+    let Inst{7-0}  = regs;
+  }
+  def IA_UPD :
+    T1It<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         itin_upd, !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []>,
+        T1Encoding<T1Enc> {
+    bits<3> Rn;
+    bits<8> regs;
+    let Inst{10-8} = Rn;
+    let Inst{7-0}  = regs;
+  }
+}
+
+// These require base address to be written back or one of the loaded regs.
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm tLDM : thumb_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu,
+                            {1,1,0,0,1,?}, 1>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm tSTM : thumb_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu,
+                            {1,1,0,0,0,?}, 0>;
+ 
+} // neverHasSideEffects
+
+let mayLoad = 1, Uses = [SP], Defs = [SP], hasExtraDefRegAllocReq = 1 in
+def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+               IIC_iPop,
+               "pop${p}\t$regs", []>,
+           T1Misc<{1,1,0,?,?,?,?}> {
+  bits<16> regs;
+  let Inst{8}   = regs{15};
+  let Inst{7-0} = regs{7-0};
+}
+
+let mayStore = 1, Uses = [SP], Defs = [SP], hasExtraSrcRegAllocReq = 1 in
+def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+                IIC_iStore_m,
+                "push${p}\t$regs", []>,
+            T1Misc<{0,1,0,?,?,?,?}> {
+  bits<16> regs;
+  let Inst{8}   = regs{14};
+  let Inst{7-0} = regs{7-0};
+}
+
+//===----------------------------------------------------------------------===//
+//  Arithmetic Instructions.
+//
+
+// Helper classes for encoding T1pI patterns:
+class T1pIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+                   string opc, string asm, list<dag> pattern>
+    : T1pI<oops, iops, itin, opc, asm, pattern>,
+      T1DataProcessing<opA> {
+  bits<3> Rm;
+  bits<3> Rn;
+  let Inst{5-3} = Rm;
+  let Inst{2-0} = Rn;
+}
+class T1pIMiscEncode<bits<7> opA, dag oops, dag iops, InstrItinClass itin,
+                     string opc, string asm, list<dag> pattern>
+    : T1pI<oops, iops, itin, opc, asm, pattern>,
+      T1Misc<opA> {
+  bits<3> Rm;
+  bits<3> Rd;
+  let Inst{5-3} = Rm;
+  let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sI patterns:
+class T1sIDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+                   string opc, string asm, list<dag> pattern>
+    : T1sI<oops, iops, itin, opc, asm, pattern>,
+      T1DataProcessing<opA> {
+  bits<3> Rd;
+  bits<3> Rn;
+  let Inst{5-3} = Rn;
+  let Inst{2-0} = Rd;
+}
+class T1sIGenEncode<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+                    string opc, string asm, list<dag> pattern>
+    : T1sI<oops, iops, itin, opc, asm, pattern>,
+      T1General<opA> {
+  bits<3> Rm;
+  bits<3> Rn;
+  bits<3> Rd;
+  let Inst{8-6} = Rm;
+  let Inst{5-3} = Rn;
+  let Inst{2-0} = Rd;
+}
+class T1sIGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+                       string opc, string asm, list<dag> pattern>
+    : T1sI<oops, iops, itin, opc, asm, pattern>,
+      T1General<opA> {
+  bits<3> Rd;
+  bits<3> Rm;
+  let Inst{5-3} = Rm;
+  let Inst{2-0} = Rd;
+}
+
+// Helper classes for encoding T1sIt patterns:
+class T1sItDPEncode<bits<4> opA, dag oops, dag iops, InstrItinClass itin,
+                    string opc, string asm, list<dag> pattern>
+    : T1sIt<oops, iops, itin, opc, asm, pattern>,
+      T1DataProcessing<opA> {
+  bits<3> Rdn;
+  bits<3> Rm;
+  let Inst{5-3} = Rm;
+  let Inst{2-0} = Rdn;
+}
+class T1sItGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
+                        string opc, string asm, list<dag> pattern>
+    : T1sIt<oops, iops, itin, opc, asm, pattern>,
+      T1General<opA> {
+  bits<3> Rdn;
+  bits<8> imm8;
+  let Inst{10-8} = Rdn;
+  let Inst{7-0}  = imm8;
+}
+
+// Add with carry register
+let isCommutable = 1, Uses = [CPSR] in
+def tADC :                      // A8.6.2
+  T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
+                "adc", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>;
+
+// Add immediate
+def tADDi3 :                    // A8.6.4 T1
+  T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3), IIC_iALUi,
+                   "add", "\t$Rd, $Rm, $imm3",
+                   [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> {
+  bits<3> imm3;
+  let Inst{8-6} = imm3;
+}
+
+def tADDi8 :                    // A8.6.4 T2
+  T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8),
+                    IIC_iALUi,
+                    "add", "\t$Rdn, $imm8",
+                    [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>;
+
+// Add register
+let isCommutable = 1 in
+def tADDrr :                    // A8.6.6 T1
+  T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iALUr,
+                "add", "\t$Rd, $Rn, $Rm",
+                [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>;
+
+let neverHasSideEffects = 1 in
+def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
+                     "add", "\t$Rdn, $Rm", []>,
+               T1Special<{0,0,?,?}> {
+  // A8.6.6 T2
+  bits<4> Rdn;
+  bits<4> Rm;
+  let Inst{7}   = Rdn{3};
+  let Inst{6-3} = Rm;
+  let Inst{2-0} = Rdn{2-0};
+}
+
+// AND register
+let isCommutable = 1 in
+def tAND :                      // A8.6.12
+  T1sItDPEncode<0b0000, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iBITr,
+                "and", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (and tGPR:$Rn, tGPR:$Rm))]>;
+
+// ASR immediate
+def tASRri :                    // A8.6.14
+  T1sIGenEncodeImm<{0,1,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+                   IIC_iMOVsi,
+                   "asr", "\t$Rd, $Rm, $imm5",
+                   [(set tGPR:$Rd, (sra tGPR:$Rm, (i32 imm:$imm5)))]> {
+  bits<5> imm5;
+  let Inst{10-6} = imm5;
+}
+
+// ASR register
+def tASRrr :                    // A8.6.15
+  T1sItDPEncode<0b0100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iMOVsr,
+                "asr", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (sra tGPR:$Rn, tGPR:$Rm))]>;
+
+// BIC register
+def tBIC :                      // A8.6.20
+  T1sItDPEncode<0b1110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iBITr,
+                "bic", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (and tGPR:$Rn, (not tGPR:$Rm)))]>;
+
+// CMN register
+let isCompare = 1, Defs = [CPSR] in {
+//FIXME: Disable CMN, as CCodes are backwards from compare expectations
+//       Compare-to-zero still works out, just not the relationals
+//def tCMN :                     // A8.6.33
+//  T1pIDPEncode<0b1011, (outs), (ins tGPR:$lhs, tGPR:$rhs),
+//               IIC_iCMPr,
+//               "cmn", "\t$lhs, $rhs",
+//               [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
+
+def tCMNz :                     // A8.6.33
+  T1pIDPEncode<0b1011, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+               IIC_iCMPr,
+               "cmn", "\t$Rn, $Rm",
+               [(ARMcmpZ tGPR:$Rn, (ineg tGPR:$Rm))]>;
+
+} // isCompare = 1, Defs = [CPSR]
+
+// CMP immediate
+let isCompare = 1, Defs = [CPSR] in {
+def tCMPi8 : T1pI<(outs), (ins tGPR:$Rn, i32imm:$imm8), IIC_iCMPi,
+                  "cmp", "\t$Rn, $imm8",
+                  [(ARMcmp tGPR:$Rn, imm0_255:$imm8)]>,
+             T1General<{1,0,1,?,?}> {
+  // A8.6.35
+  bits<3> Rn;
+  bits<8> imm8;
+  let Inst{10-8} = Rn;
+  let Inst{7-0}  = imm8;
+}
+
+// CMP register
+def tCMPr :                     // A8.6.36 T1
+  T1pIDPEncode<0b1010, (outs), (ins tGPR:$Rn, tGPR:$Rm),
+               IIC_iCMPr,
+               "cmp", "\t$Rn, $Rm",
+               [(ARMcmp tGPR:$Rn, tGPR:$Rm)]>;
+
+def tCMPhir : T1pI<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_iCMPr,
+                   "cmp", "\t$Rn, $Rm", []>,
+              T1Special<{0,1,?,?}> {
+  // A8.6.36 T2
+  bits<4> Rm;
+  bits<4> Rn;
+  let Inst{7}   = Rn{3};
+  let Inst{6-3} = Rm;
+  let Inst{2-0} = Rn{2-0};
+}
+} // isCompare = 1, Defs = [CPSR]
+
+
+// XOR register
+let isCommutable = 1 in
+def tEOR :                      // A8.6.45
+  T1sItDPEncode<0b0001, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iBITr,
+                "eor", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (xor tGPR:$Rn, tGPR:$Rm))]>;
+
+// LSL immediate
+def tLSLri :                    // A8.6.88
+  T1sIGenEncodeImm<{0,0,0,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+                   IIC_iMOVsi,
+                   "lsl", "\t$Rd, $Rm, $imm5",
+                   [(set tGPR:$Rd, (shl tGPR:$Rm, (i32 imm:$imm5)))]> {
+  bits<5> imm5;
+  let Inst{10-6} = imm5;
+}
+
+// LSL register
+def tLSLrr :                    // A8.6.89
+  T1sItDPEncode<0b0010, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iMOVsr,
+                "lsl", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (shl tGPR:$Rn, tGPR:$Rm))]>;
+
+// LSR immediate
+def tLSRri :                    // A8.6.90
+  T1sIGenEncodeImm<{0,0,1,?,?}, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm5),
+                   IIC_iMOVsi,
+                   "lsr", "\t$Rd, $Rm, $imm5",
+                   [(set tGPR:$Rd, (srl tGPR:$Rm, (i32 imm:$imm5)))]> {
+  bits<5> imm5;
+  let Inst{10-6} = imm5;
+}
+
+// LSR register
+def tLSRrr :                    // A8.6.91
+  T1sItDPEncode<0b0011, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iMOVsr,
+                "lsr", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (srl tGPR:$Rn, tGPR:$Rm))]>;
+
+// Move register
+let isMoveImm = 1 in
+def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins i32imm:$imm8), IIC_iMOVi,
+                  "mov", "\t$Rd, $imm8",
+                  [(set tGPR:$Rd, imm0_255:$imm8)]>,
+             T1General<{1,0,0,?,?}> {
+  // A8.6.96
+  bits<3> Rd;
+  bits<8> imm8;
+  let Inst{10-8} = Rd;
+  let Inst{7-0}  = imm8;
+}
+
+// TODO: A7-73: MOV(2) - mov setting flag.
+
+let neverHasSideEffects = 1 in {
+// FIXME: Make this predicable.
+def tMOVr       : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+                      "mov\t$Rd, $Rm", []>,
+                  T1Special<0b1000> {
+  // A8.6.97
+  bits<4> Rd;
+  bits<4> Rm;
+  // Bits {7-6} are encoded by the T1Special value.
+  let Inst{5-3} = Rm{2-0};
+  let Inst{2-0} = Rd{2-0};
+}
+let Defs = [CPSR] in
+def tMOVSr      : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+                      "movs\t$Rd, $Rm", []>, Encoding16 {
+  // A8.6.97
+  bits<3> Rd;
+  bits<3> Rm;
+  let Inst{15-6} = 0b0000000000;
+  let Inst{5-3}  = Rm;
+  let Inst{2-0}  = Rd;
+}
+
+// FIXME: Make these predicable.
+def tMOVgpr2tgpr : T1I<(outs tGPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+                       "mov\t$Rd, $Rm", []>,
+                   T1Special<{1,0,0,?}> {
+  // A8.6.97
+  bits<4> Rd;
+  bits<4> Rm;
+  // Bit {7} is encoded by the T1Special value.
+  let Inst{6-3} = Rm;
+  let Inst{2-0} = Rd{2-0};
+}
+def tMOVtgpr2gpr : T1I<(outs GPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
+                       "mov\t$Rd, $Rm", []>,
+                   T1Special<{1,0,?,0}> {
+  // A8.6.97
+  bits<4> Rd;
+  bits<4> Rm;
+  // Bit {6} is encoded by the T1Special value.
+  let Inst{7}   = Rd{3};
+  let Inst{5-3} = Rm{2-0};
+  let Inst{2-0} = Rd{2-0};
+}
+def tMOVgpr2gpr  : T1I<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+                       "mov\t$Rd, $Rm", []>,
+                   T1Special<{1,0,?,?}> {
+  // A8.6.97
+  bits<4> Rd;
+  bits<4> Rm;
+  let Inst{7}   = Rd{3};
+  let Inst{6-3} = Rm;
+  let Inst{2-0} = Rd{2-0};
+}
+} // neverHasSideEffects
+
+// Multiply register
+let isCommutable = 1 in
+def tMUL :                      // A8.6.105 T1
+  T1sItDPEncode<0b1101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iMUL32,
+                "mul", "\t$Rdn, $Rm, $Rdn",
+                [(set tGPR:$Rdn, (mul tGPR:$Rn, tGPR:$Rm))]>;
+
+// Move inverse register
+def tMVN :                      // A8.6.107
+  T1sIDPEncode<0b1111, (outs tGPR:$Rd), (ins tGPR:$Rn), IIC_iMVNr,
+               "mvn", "\t$Rd, $Rn",
+               [(set tGPR:$Rd, (not tGPR:$Rn))]>;
+
+// Bitwise or register
+let isCommutable = 1 in
+def tORR :                      // A8.6.114
+  T1sItDPEncode<0b1100, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iBITr,
+                "orr", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (or tGPR:$Rn, tGPR:$Rm))]>;
+
+// Swaps
+def tREV :                      // A8.6.134
+  T1pIMiscEncode<{1,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "rev", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd, (bswap tGPR:$Rm))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def tREV16 :                    // A8.6.135
+  T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "rev16", "\t$Rd, $Rm",
+             [(set tGPR:$Rd,
+                   (or (and (srl tGPR:$Rm, (i32 8)), 0xFF),
+                       (or (and (shl tGPR:$Rm, (i32 8)), 0xFF00),
+                           (or (and (srl tGPR:$Rm, (i32 8)), 0xFF0000),
+                               (and (shl tGPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+                Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def tREVSH :                    // A8.6.136
+  T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "revsh", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd,
+                       (sext_inreg
+                         (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)),
+                             (shl tGPR:$Rm, (i32 8))), i16))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Rotate right register
+def tROR :                      // A8.6.139
+  T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iMOVsr,
+                "ror", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (rotr tGPR:$Rn, tGPR:$Rm))]>;
+
+// Negate register
+def tRSB :                      // A8.6.141
+  T1sIDPEncode<0b1001, (outs tGPR:$Rd), (ins tGPR:$Rn),
+               IIC_iALUi,
+               "rsb", "\t$Rd, $Rn, #0",
+               [(set tGPR:$Rd, (ineg tGPR:$Rn))]>;
+
+// Subtract with carry register
+let Uses = [CPSR] in
+def tSBC :                      // A8.6.151
+  T1sItDPEncode<0b0110, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iALUr,
+                "sbc", "\t$Rdn, $Rm",
+                [(set tGPR:$Rdn, (sube tGPR:$Rn, tGPR:$Rm))]>;
+
+// Subtract immediate
+def tSUBi3 :                    // A8.6.210 T1
+  T1sIGenEncodeImm<0b01111, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3),
+                   IIC_iALUi,
+                   "sub", "\t$Rd, $Rm, $imm3",
+                   [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7_neg:$imm3))]> {
+  bits<3> imm3;
+  let Inst{8-6} = imm3;
+}
+
+def tSUBi8 :                    // A8.6.210 T2
+  T1sItGenEncodeImm<{1,1,1,?,?}, (outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$imm8),
+                    IIC_iALUi,
+                    "sub", "\t$Rdn, $imm8",
+                    [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255_neg:$imm8))]>;
+
+// Subtract register
+def tSUBrr :                    // A8.6.212
+  T1sIGenEncode<0b01101, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+                IIC_iALUr,
+                "sub", "\t$Rd, $Rn, $Rm",
+                [(set tGPR:$Rd, (sub tGPR:$Rn, tGPR:$Rm))]>;
+
+// TODO: A7-96: STMIA - store multiple.
+
+// Sign-extend byte
+def tSXTB :                     // A8.6.222
+  T1pIMiscEncode<{0,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "sxtb", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i8))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Sign-extend short
+def tSXTH :                     // A8.6.224
+  T1pIMiscEncode<{0,0,1,0,0,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "sxth", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd, (sext_inreg tGPR:$Rm, i16))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Test
+let isCompare = 1, isCommutable = 1, Defs = [CPSR] in
+def tTST :                      // A8.6.230
+  T1pIDPEncode<0b1000, (outs), (ins tGPR:$Rn, tGPR:$Rm), IIC_iTSTr,
+               "tst", "\t$Rn, $Rm",
+               [(ARMcmpZ (and_su tGPR:$Rn, tGPR:$Rm), 0)]>;
+
+// Zero-extend byte
+def tUXTB :                     // A8.6.262
+  T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "uxtb", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd, (and tGPR:$Rm, 0xFF))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Zero-extend short
+def tUXTH :                     // A8.6.264
+  T1pIMiscEncode<{0,0,1,0,1,0,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
+                 IIC_iUNAr,
+                 "uxth", "\t$Rd, $Rm",
+                 [(set tGPR:$Rd, (and tGPR:$Rm, 0xFFFF))]>,
+                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation.
+// Expanded after instruction selection into a branch sequence.
+let usesCustomInserter = 1 in  // Expanded after instruction selection.
+  def tMOVCCr_pseudo :
+  PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
+              NoItinerary,
+             [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
+
+
+// 16-bit movcc in IT blocks for Thumb2.
+let neverHasSideEffects = 1 in {
+def tMOVCCr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iCMOVr,
+                    "mov", "\t$Rdn, $Rm", []>,
+              T1Special<{1,0,?,?}> {
+  bits<4> Rdn;
+  bits<4> Rm;
+  let Inst{7}   = Rdn{3};
+  let Inst{6-3} = Rm;
+  let Inst{2-0} = Rdn{2-0};
+}
+
+let isMoveImm = 1 in
+def tMOVCCi : T1pIt<(outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$Rm), IIC_iCMOVi,
+                    "mov", "\t$Rdn, $Rm", []>,
+              T1General<{1,0,0,?,?}> {
+  bits<3> Rdn;
+  bits<8> Rm;
+  let Inst{10-8} = Rdn;
+  let Inst{7-0}  = Rm;
+}
+
+} // neverHasSideEffects
+
+// tLEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+
+def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
+               IIC_iALUi, "adr{$p}\t$Rd, #$addr", []>,
+               T1Encoding<{1,0,1,0,0,?}> {
+  bits<3> Rd;
+  bits<8> addr;
+  let Inst{10-8} = Rd;
+  let Inst{7-0} = addr;
+}
+
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def tLEApcrel   : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
+                              Size2Bytes, IIC_iALUi, []>;
+
+def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
+                              (ins i32imm:$label, nohash_imm:$id, pred:$p),
+                              Size2Bytes, IIC_iALUi, []>;
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register -- for disassembly only
+//
+
+class tMovRCopro<string opc, bit direction>
+  : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
+                       GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+          !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+          [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-24} = 0b1110;
+  let Inst{20} = direction;
+  let Inst{4} = 1;
+
+  bits<4> Rt;
+  bits<4> cop;
+  bits<3> opc1;
+  bits<3> opc2;
+  bits<4> CRm;
+  bits<4> CRn;
+
+  let Inst{15-12} = Rt;
+  let Inst{11-8}  = cop;
+  let Inst{23-21} = opc1;
+  let Inst{7-5}   = opc2;
+  let Inst{3-0}   = CRm;
+  let Inst{19-16} = CRn;
+}
+
+def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>;
+def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>;
+
+class tMovRRCopro<string opc, bit direction>
+  : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+          !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"),
+          [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-24} = 0b1100;
+  let Inst{23-21} = 0b010;
+  let Inst{20} = direction;
+
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<4> cop;
+  bits<4> opc1;
+  bits<4> CRm;
+
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+  let Inst{11-8}  = cop;
+  let Inst{7-4}   = opc1;
+  let Inst{3-0}   = CRm;
+}
+
+def tMCRR : tMovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */>;
+def tMRRC : tMovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
+
+//===----------------------------------------------------------------------===//
+// Other Coprocessor Instructions.  For disassembly only.
+//
+def tCDP : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
+                 c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+                 "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-24} = 0b1110;
+
+  bits<4> opc1;
+  bits<4> CRn;
+  bits<4> CRd;
+  bits<4> cop;
+  bits<3> opc2;
+  bits<4> CRm;
+
+  let Inst{3-0}   = CRm;
+  let Inst{4}     = 0;
+  let Inst{7-5}   = opc2;
+  let Inst{11-8}  = cop;
+  let Inst{15-12} = CRd;
+  let Inst{19-16} = CRn;
+  let Inst{23-20} = opc1;
+}
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+let isCall = 1, Defs = [R0, LR], Uses = [SP] in
+def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br,
+                   "bl\t__aeabi_read_tp",
+                   [(set R0, ARMthread_pointer)]> {
+  // Encoding is 0xf7fffffe.
+  let Inst = 0xf7fffffe;
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+// 
+
+// eh_sjlj_setjmp() is an instruction sequence to store the return address and
+// save #0 in R0 for the non-longjmp case.  Since by its nature we may be coming
+// from some other function to get here, and we're using the stack frame for the
+// containing function to save/restore registers, we can't keep anything live in
+// regs across the eh_sjlj_setjmp(), else it will almost certainly have been
+// tromped upon when we get here from a longjmp(). We force everthing out of
+// registers except for our own input by listing the relevant registers in
+// Defs. By doing so, we also cause the prologue/epilogue code to actively
+// preserve all of the callee-saved resgisters, which is exactly what we want.
+// $val is a scratch register for our use.
+let Defs = [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7, R12 ],
+    hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in
+def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
+                                  AddrModeNone, SizeSpecial, NoItinerary, "","",
+                          [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
+
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
+    Defs = [ R7, LR, SP ] in
+def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
+                              AddrModeNone, SizeSpecial, IndexModeNone,
+                              Pseudo, NoItinerary, "", "",
+                              [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+                             Requires<[IsThumb, IsDarwin]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// Comparisons
+def : T1Pat<(ARMcmpZ tGPR:$Rn, imm0_255:$imm8),
+            (tCMPi8  tGPR:$Rn, imm0_255:$imm8)>;
+def : T1Pat<(ARMcmpZ tGPR:$Rn, tGPR:$Rm),
+            (tCMPr   tGPR:$Rn, tGPR:$Rm)>;
+
+// Add with carry
+def : T1Pat<(addc   tGPR:$lhs, imm0_7:$rhs),
+            (tADDi3 tGPR:$lhs, imm0_7:$rhs)>;
+def : T1Pat<(addc   tGPR:$lhs, imm8_255:$rhs),
+            (tADDi8 tGPR:$lhs, imm8_255:$rhs)>;
+def : T1Pat<(addc   tGPR:$lhs, tGPR:$rhs),
+            (tADDrr tGPR:$lhs, tGPR:$rhs)>;
+
+// Subtract with carry
+def : T1Pat<(addc   tGPR:$lhs, imm0_7_neg:$rhs),
+            (tSUBi3 tGPR:$lhs, imm0_7_neg:$rhs)>;
+def : T1Pat<(addc   tGPR:$lhs, imm8_255_neg:$rhs),
+            (tSUBi8 tGPR:$lhs, imm8_255_neg:$rhs)>;
+def : T1Pat<(subc   tGPR:$lhs, tGPR:$rhs),
+            (tSUBrr tGPR:$lhs, tGPR:$rhs)>;
+
+// ConstantPool, GlobalAddress
+def : T1Pat<(ARMWrapper  tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
+def : T1Pat<(ARMWrapper  tconstpool  :$dst), (tLEApcrel tconstpool  :$dst)>;
+
+// JumpTable
+def : T1Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+            (tLEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Direct calls
+def : T1Pat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>,
+      Requires<[IsThumb, IsNotDarwin]>;
+def : T1Pat<(ARMtcall texternalsym:$func), (tBLr9 texternalsym:$func)>,
+      Requires<[IsThumb, IsDarwin]>;
+
+def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>,
+      Requires<[IsThumb, HasV5T, IsNotDarwin]>;
+def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi_r9 texternalsym:$func)>,
+      Requires<[IsThumb, HasV5T, IsDarwin]>;
+
+// Indirect calls to ARM routines
+def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr GPR:$dst)>,
+      Requires<[IsThumb, HasV5T, IsNotDarwin]>;
+def : Tv5Pat<(ARMcall GPR:$dst), (tBLXr_r9 GPR:$dst)>,
+      Requires<[IsThumb, HasV5T, IsDarwin]>;
+
+// zextload i1 -> zextload i8
+def : T1Pat<(zextloadi1 t_addrmode_rrs1:$addr),
+            (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(zextloadi1 t_addrmode_is1:$addr),
+            (tLDRBi t_addrmode_is1:$addr)>;
+
+// extload -> zextload
+def : T1Pat<(extloadi1  t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(extloadi1  t_addrmode_is1:$addr),  (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi8  t_addrmode_rrs1:$addr), (tLDRBr t_addrmode_rrs1:$addr)>;
+def : T1Pat<(extloadi8  t_addrmode_is1:$addr),  (tLDRBi t_addrmode_is1:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_rrs2:$addr), (tLDRHr t_addrmode_rrs2:$addr)>;
+def : T1Pat<(extloadi16 t_addrmode_is2:$addr),  (tLDRHi t_addrmode_is2:$addr)>;
+
+// If it's impossible to use [r,r] address mode for sextload, select to
+// ldr{b|h} + sxt{b|h} instead.
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+            (tSXTB (tLDRBi t_addrmode_is1:$addr))>,
+      Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
+            (tSXTB (tLDRBr t_addrmode_rrs1:$addr))>,
+      Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+            (tSXTH (tLDRHi t_addrmode_is2:$addr))>,
+      Requires<[IsThumb, IsThumb1Only, HasV6]>;
+def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
+            (tSXTH (tLDRHr t_addrmode_rrs2:$addr))>,
+      Requires<[IsThumb, IsThumb1Only, HasV6]>;
+
+def : T1Pat<(sextloadi8 t_addrmode_rrs1:$addr),
+            (tASRri (tLSLri (tLDRBr t_addrmode_rrs1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi8 t_addrmode_is1:$addr),
+            (tASRri (tLSLri (tLDRBi t_addrmode_is1:$addr), 24), 24)>;
+def : T1Pat<(sextloadi16 t_addrmode_rrs2:$addr),
+            (tASRri (tLSLri (tLDRHr t_addrmode_rrs2:$addr), 16), 16)>;
+def : T1Pat<(sextloadi16 t_addrmode_is2:$addr),
+            (tASRri (tLSLri (tLDRHi t_addrmode_is2:$addr), 16), 16)>;
+
+// Large immediate handling.
+
+// Two piece imms.
+def : T1Pat<(i32 thumb_immshifted:$src),
+            (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)),
+                    (thumb_immshifted_shamt imm:$src))>;
+
+def : T1Pat<(i32 imm0_255_comp:$src),
+            (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let isReMaterializable = 1 in
+def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+                             NoItinerary,
+               [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+                                           imm:$cp))]>,
+               Requires<[IsThumb, IsThumb1Only]>;
diff --git a/final/lib/Target/ARM/ARMInstrThumb2.td b/final/lib/Target/ARM/ARMInstrThumb2.td
new file mode 100644
index 00000000000..0e01be59c7e
--- /dev/null
+++ b/final/lib/Target/ARM/ARMInstrThumb2.td
@@ -0,0 +1,3432 @@
+//===- ARMInstrThumb2.td - Thumb2 support for ARM -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Thumb2 instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+// IT block predicate field
+def it_pred : Operand<i32> {
+  let PrintMethod = "printMandatoryPredicateOperand";
+}
+
+// IT block condition mask
+def it_mask : Operand<i32> {
+  let PrintMethod = "printThumbITMask";
+}
+
+// Shifted operands. No register controlled shifts for Thumb2.
+// Note: We do not support rrx shifted operands yet.
+def t2_so_reg : Operand<i32>,    // reg imm
+                ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
+                               [shl,srl,sra,rotr]> {
+  let EncoderMethod = "getT2SORegOpValue";
+  let PrintMethod = "printT2SOOperand";
+  let MIOperandInfo = (ops rGPR, i32imm);
+}
+
+// t2_so_imm_not_XFORM - Return the complement of a t2_so_imm value
+def t2_so_imm_not_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
+}]>;
+
+// t2_so_imm_neg_XFORM - Return the negation of a t2_so_imm value
+def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(-((int)N->getZExtValue()), MVT::i32);
+}]>;
+
+// t2_so_imm - Match a 32-bit immediate operand, which is an
+// 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
+// immediate splatted into multiple bytes of the word.
+def t2_so_imm : Operand<i32>, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]> {
+  let EncoderMethod = "getT2SOImmOpValue";
+}
+
+// t2_so_imm_not - Match an immediate that is a complement
+// of a t2_so_imm.
+def t2_so_imm_not : Operand<i32>,
+                    PatLeaf<(imm), [{
+  return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1;
+}], t2_so_imm_not_XFORM>;
+
+// t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm.
+def t2_so_imm_neg : Operand<i32>,
+                    PatLeaf<(imm), [{
+  return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1;
+}], t2_so_imm_neg_XFORM>;
+
+// Break t2_so_imm's up into two pieces.  This handles immediates with up to 16
+// bits set in them.  This uses t2_so_imm2part to match and t2_so_imm2part_[12]
+// to get the first/second pieces.
+def t2_so_imm2part : Operand<i32>,
+                  PatLeaf<(imm), [{
+      return ARM_AM::isT2SOImmTwoPartVal((unsigned)N->getZExtValue());
+    }]> {
+}
+
+def t2_so_imm2part_1 : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getT2SOImmTwoPartFirst((unsigned)N->getZExtValue());
+  return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def t2_so_imm2part_2 : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getT2SOImmTwoPartSecond((unsigned)N->getZExtValue());
+  return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def t2_so_neg_imm2part : Operand<i32>, PatLeaf<(imm), [{
+      return ARM_AM::isT2SOImmTwoPartVal(-(int)N->getZExtValue());
+    }]> {
+}
+
+def t2_so_neg_imm2part_1 : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getT2SOImmTwoPartFirst(-(int)N->getZExtValue());
+  return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+def t2_so_neg_imm2part_2 : SDNodeXForm<imm, [{
+  unsigned V = ARM_AM::getT2SOImmTwoPartSecond(-(int)N->getZExtValue());
+  return CurDAG->getTargetConstant(V, MVT::i32);
+}]>;
+
+/// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31].
+def imm1_31 : PatLeaf<(i32 imm), [{
+  return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 32;
+}]>;
+
+/// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
+def imm0_4095 : Operand<i32>,
+                PatLeaf<(i32 imm), [{
+  return (uint32_t)N->getZExtValue() < 4096;
+}]>;
+
+def imm0_4095_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)(-N->getZExtValue()) < 4096;
+}], imm_neg_XFORM>;
+
+def imm0_255_neg : PatLeaf<(i32 imm), [{
+  return (uint32_t)(-N->getZExtValue()) < 255;
+}], imm_neg_XFORM>;
+
+def imm0_255_not : PatLeaf<(i32 imm), [{
+  return (uint32_t)(~N->getZExtValue()) < 255;
+}], imm_comp_XFORM>;
+
+// Define Thumb2 specific addressing modes.
+
+// t2addrmode_imm12  := reg + imm12
+def t2addrmode_imm12 : Operand<i32>,
+                       ComplexPattern<i32, 2, "SelectT2AddrModeImm12", []> {
+  let PrintMethod = "printAddrModeImm12Operand";
+  let EncoderMethod = "getAddrModeImm12OpValue";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemMode5AsmOperand;
+}
+
+// ADR instruction labels.
+def t2adrlabel : Operand<i32> {
+  let EncoderMethod = "getT2AdrLabelOpValue";
+}
+
+
+// t2addrmode_imm8  := reg +/- imm8
+def t2addrmode_imm8 : Operand<i32>,
+                      ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> {
+  let PrintMethod = "printT2AddrModeImm8Operand";
+  let EncoderMethod = "getT2AddrModeImm8OpValue";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemMode5AsmOperand;
+}
+
+def t2am_imm8_offset : Operand<i32>,
+                       ComplexPattern<i32, 1, "SelectT2AddrModeImm8Offset",
+                                      [], [SDNPWantRoot]> {
+  let PrintMethod = "printT2AddrModeImm8OffsetOperand";
+  let EncoderMethod = "getT2AddrModeImm8OffsetOpValue";
+  let ParserMatchClass = MemMode5AsmOperand;
+}
+
+// t2addrmode_imm8s4  := reg +/- (imm8 << 2)
+def t2addrmode_imm8s4 : Operand<i32> {
+  let PrintMethod = "printT2AddrModeImm8s4Operand";
+  let EncoderMethod = "getT2AddrModeImm8s4OpValue";
+  let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm);
+  let ParserMatchClass = MemMode5AsmOperand;
+}
+
+def t2am_imm8s4_offset : Operand<i32> {
+  let PrintMethod = "printT2AddrModeImm8s4OffsetOperand";
+}
+
+// t2addrmode_so_reg  := reg + (reg << imm2)
+def t2addrmode_so_reg : Operand<i32>,
+                        ComplexPattern<i32, 3, "SelectT2AddrModeSoReg", []> {
+  let PrintMethod = "printT2AddrModeSoRegOperand";
+  let EncoderMethod = "getT2AddrModeSORegOpValue";
+  let MIOperandInfo = (ops GPR:$base, rGPR:$offsreg, i32imm:$offsimm);
+  let ParserMatchClass = MemMode5AsmOperand;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Multiclass helpers...
+//
+
+
+class T2OneRegImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<12> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+
+class T2sOneRegImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+class T2OneRegCmpImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rn;
+  bits<12> imm;
+
+  let Inst{19-16}  = Rn;
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+
+class T2OneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<12> ShiftedRm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = ShiftedRm{3-0};
+  let Inst{5-4}   = ShiftedRm{6-5};
+  let Inst{14-12} = ShiftedRm{11-9};
+  let Inst{7-6}   = ShiftedRm{8-7};
+}
+
+class T2sOneRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<12> ShiftedRm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = ShiftedRm{3-0};
+  let Inst{5-4}   = ShiftedRm{6-5};
+  let Inst{14-12} = ShiftedRm{11-9};
+  let Inst{7-6}   = ShiftedRm{8-7};
+}
+
+class T2OneRegCmpShiftedReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rn;
+  bits<12> ShiftedRm;
+
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = ShiftedRm{3-0};
+  let Inst{5-4}   = ShiftedRm{6-5};
+  let Inst{14-12} = ShiftedRm{11-9};
+  let Inst{7-6}   = ShiftedRm{8-7};
+}
+
+class T2TwoReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = Rm;
+}
+
+class T2sTwoReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = Rm;
+}
+
+class T2TwoRegCmp<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rn;
+  bits<4> Rm;
+
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = Rm;
+}
+
+
+class T2TwoRegImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+class T2sTwoRegImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+class T2TwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+  bits<5> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = Rm;
+  let Inst{14-12} = imm{4-2};
+  let Inst{7-6}   = imm{1-0};
+}
+
+class T2sTwoRegShiftImm<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rm;
+  bits<5> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = Rm;
+  let Inst{14-12} = imm{4-2};
+  let Inst{7-6}   = imm{1-0};
+}
+
+class T2ThreeReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = Rm;
+}
+
+class T2sThreeReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = Rm;
+}
+
+class T2TwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> ShiftedRm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = ShiftedRm{3-0};
+  let Inst{5-4}   = ShiftedRm{6-5};
+  let Inst{14-12} = ShiftedRm{11-9};
+  let Inst{7-6}   = ShiftedRm{8-7};
+}
+
+class T2sTwoRegShiftedReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2sI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<12> ShiftedRm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = ShiftedRm{3-0};
+  let Inst{5-4}   = ShiftedRm{6-5};
+  let Inst{14-12} = ShiftedRm{11-9};
+  let Inst{7-6}   = ShiftedRm{8-7};
+}
+
+class T2FourReg<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+  bits<4> Ra;
+
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Ra;
+  let Inst{11-8}  = Rd;
+  let Inst{3-0}   = Rm;
+}
+
+class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
+                dag oops, dag iops, InstrItinClass itin,
+                string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> RdLo;
+  bits<4> RdHi;
+  bits<4> Rn;
+  bits<4> Rm;
+
+  let Inst{31-23} = 0b111110111;
+  let Inst{22-20} = opc22_20;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = RdLo;
+  let Inst{11-8}  = RdHi;
+  let Inst{7-4}   = opc7_4;
+  let Inst{3-0}   = Rm;
+}
+
+
+/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// unary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_un_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                      PatFrag opnode, bit Cheap = 0, bit ReMat = 0> {
+   // shifted imm
+   def i : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), iii,
+                opc, "\t$Rd, $imm",
+                [(set rGPR:$Rd, (opnode t2_so_imm:$imm))]> {
+     let isAsCheapAsAMove = Cheap;
+     let isReMaterializable = ReMat;
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24-21} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15} = 0;
+   }
+   // register
+   def r : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), iir,
+                opc, ".w\t$Rd, $Rm",
+                [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{14-12} = 0b000; // imm3
+     let Inst{7-6} = 0b00; // imm2
+     let Inst{5-4} = 0b00; // type
+   }
+   // shifted register
+   def s : T2sOneRegShiftedReg<(outs rGPR:$Rd), (ins t2_so_reg:$ShiftedRm), iis,
+                opc, ".w\t$Rd, $ShiftedRm",
+                [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm))]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+   }
+}
+
+/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// binary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_bin_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                       PatFrag opnode, bit Commutable = 0, string wide = ""> {
+   // shifted imm
+   def ri : T2sTwoRegImm<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii,
+                 opc, "\t$Rd, $Rn, $imm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]> {
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24-21} = opcod;
+     let Inst{15} = 0;
+   }
+   // register
+   def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), iir,
+                 opc, !strconcat(wide, "\t$Rd, $Rn, $Rm"),
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+     let isCommutable = Commutable;
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{14-12} = 0b000; // imm3
+     let Inst{7-6} = 0b00; // imm2
+     let Inst{5-4} = 0b00; // type
+   }
+   // shifted register
+   def rs : T2sTwoRegShiftedReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+                 opc, !strconcat(wide, "\t$Rd, $Rn, $ShiftedRm"),
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+   }
+}
+
+/// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need
+//  the ".w" prefix to indicate that they are wide.
+multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                         PatFrag opnode, bit Commutable = 0> :
+    T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w">;
+
+/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
+/// reversed.  The 'rr' form is only defined for the disassembler; for codegen
+/// it is equivalent to the T2I_bin_irs counterpart.
+multiclass T2I_rbin_irs<bits<4> opcod, string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : T2sTwoRegImm<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+                 opc, ".w\t$Rd, $Rn, $imm",
+                 [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24-21} = opcod;
+     let Inst{15} = 0;
+   }
+   // register
+   def rr : T2sThreeReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+                 opc, "\t$Rd, $Rn, $Rm",
+                 [/* For disassembly only; pattern left blank */]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{14-12} = 0b000; // imm3
+     let Inst{7-6} = 0b00; // imm2
+     let Inst{5-4} = 0b00; // type
+   }
+   // shifted register
+   def rs : T2sTwoRegShiftedReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+                 IIC_iALUsir, opc, "\t$Rd, $Rn, $ShiftedRm",
+                 [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+   }
+}
+
+/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CPSR register.
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass T2I_bin_s_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                         PatFrag opnode, bit Commutable = 0> {
+   // shifted imm
+   def ri : T2TwoRegImm<
+                (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), iii,
+                !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm",
+                [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> {
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+     let Inst{15} = 0;
+   }
+   // register
+   def rr : T2ThreeReg<
+                (outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), iir,
+                !strconcat(opc, "s"), ".w\t$Rd, $Rn, $Rm",
+                [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> {
+     let isCommutable = Commutable;
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+     let Inst{14-12} = 0b000; // imm3
+     let Inst{7-6} = 0b00; // imm2
+     let Inst{5-4} = 0b00; // type
+   }
+   // shifted register
+   def rs : T2TwoRegShiftedReg<
+                (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+                !strconcat(opc, "s"), ".w\t$Rd, $Rn, $ShiftedRm",
+                [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+   }
+}
+}
+
+/// T2I_bin_ii12rs - Defines a set of (op reg, {so_imm|imm0_4095|r|so_reg})
+/// patterns for a binary operation that produces a value.
+multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
+                          bit Commutable = 0> {
+   // shifted imm
+   // The register-immediate version is re-materializable. This is useful
+   // in particular for taking the address of a local.
+   let isReMaterializable = 1 in {
+   def ri : T2sTwoRegImm<
+                 (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+                 opc, ".w\t$Rd, $Rn, $imm",
+                 [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_imm:$imm))]> {
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24} = 1;
+     let Inst{23-21} = op23_21;
+     let Inst{15} = 0;
+   }
+   }
+   // 12-bit imm
+   def ri12 : T2I<
+                  (outs rGPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm), IIC_iALUi,
+                  !strconcat(opc, "w"), "\t$Rd, $Rn, $imm",
+                  [(set rGPR:$Rd, (opnode GPR:$Rn, imm0_4095:$imm))]> {
+     bits<4> Rd;
+     bits<4> Rn;
+     bits<12> imm;
+     let Inst{31-27} = 0b11110;
+     let Inst{26} = imm{11};
+     let Inst{25-24} = 0b10;
+     let Inst{23-21} = op23_21;
+     let Inst{20} = 0; // The S bit.
+     let Inst{19-16} = Rn;
+     let Inst{15} = 0;
+     let Inst{14-12} = imm{10-8};
+     let Inst{11-8} = Rd;
+     let Inst{7-0} = imm{7-0};
+   }
+   // register
+   def rr : T2sThreeReg<(outs rGPR:$Rd), (ins GPR:$Rn, rGPR:$Rm), IIC_iALUr,
+                 opc, ".w\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (opnode GPR:$Rn, rGPR:$Rm))]> {
+     let isCommutable = Commutable;
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24} = 1;
+     let Inst{23-21} = op23_21;
+     let Inst{14-12} = 0b000; // imm3
+     let Inst{7-6} = 0b00; // imm2
+     let Inst{5-4} = 0b00; // type
+   }
+   // shifted register
+   def rs : T2sTwoRegShiftedReg<
+                 (outs rGPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm),
+                 IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+                 [(set rGPR:$Rd, (opnode GPR:$Rn, t2_so_reg:$ShiftedRm))]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24} = 1;
+     let Inst{23-21} = op23_21;
+   }
+}
+
+/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
+/// for a binary operation that produces a value and use the carry
+/// bit. It's not predicable.
+let Uses = [CPSR] in {
+multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+                             bit Commutable = 0> {
+   // shifted imm
+   def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
+                 IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
+                 Requires<[IsThumb2]> {
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24-21} = opcod;
+     let Inst{15} = 0;
+   }
+   // register
+   def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+                 opc, ".w\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+                 Requires<[IsThumb2]> {
+     let isCommutable = Commutable;
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{14-12} = 0b000; // imm3
+     let Inst{7-6} = 0b00; // imm2
+     let Inst{5-4} = 0b00; // type
+   }
+   // shifted register
+   def rs : T2sTwoRegShiftedReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+                 IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
+                 Requires<[IsThumb2]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+   }
+}
+
+// Carry setting variants
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
+                               bit Commutable = 0> {
+   // shifted imm
+   def ri : T2sTwoRegImm<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+                 opc, "\t$Rd, $Rn, $imm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>,
+                 Requires<[IsThumb2]> {
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+     let Inst{15} = 0;
+   }
+   // register
+   def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr,
+                 opc, ".w\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+                 Requires<[IsThumb2]> {
+     let isCommutable = Commutable;
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+     let Inst{14-12} = 0b000; // imm3
+     let Inst{7-6} = 0b00; // imm2
+     let Inst{5-4} = 0b00; // type
+   }
+   // shifted register
+   def rs : T2sTwoRegShiftedReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+                 IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>,
+                 Requires<[IsThumb2]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+   }
+}
+}
+}
+
+/// T2I_rbin_s_is - Same as T2I_rbin_irs except sets 's' bit and the register
+/// version is not needed since this is only for codegen.
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
+   // shifted imm
+   def ri : T2TwoRegImm<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi,
+                !strconcat(opc, "s"), ".w\t$Rd, $Rn, $imm",
+                [(set rGPR:$Rd, (opnode t2_so_imm:$imm, rGPR:$Rn))]> {
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+     let Inst{15} = 0;
+   }
+   // shifted register
+   def rs : T2TwoRegShiftedReg<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
+                IIC_iALUsi, !strconcat(opc, "s"), "\t$Rd, $Rn, $ShiftedRm",
+                [(set rGPR:$Rd, (opnode t2_so_reg:$ShiftedRm, rGPR:$Rn))]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+   }
+}
+}
+
+/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
+//  rotate operation that produces a value.
+multiclass T2I_sh_ir<bits<2> opcod, string opc, PatFrag opnode> {
+   // 5-bit imm
+   def ri : T2sTwoRegShiftImm<
+                 (outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$imm), IIC_iMOVsi,
+                 opc, ".w\t$Rd, $Rm, $imm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rm, imm1_31:$imm))]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-21} = 0b010010;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{5-4} = opcod;
+   }
+   // register
+   def rr : T2sThreeReg<
+                 (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMOVsr,
+                 opc, ".w\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-21} = opcod;
+     let Inst{15-12} = 0b1111;
+     let Inst{7-4} = 0b0000;
+   }
+}
+
+/// T2I_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to T2I_bin_irs except the instruction does not produce
+/// a explicit result, only implicitly set CPSR.
+let isCompare = 1, Defs = [CPSR] in {
+multiclass T2I_cmp_irs<bits<4> opcod, string opc,
+                     InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
+                       PatFrag opnode> {
+   // shifted imm
+   def ri : T2OneRegCmpImm<
+                (outs), (ins GPR:$Rn, t2_so_imm:$imm), iii,
+                opc, ".w\t$Rn, $imm",
+                [(opnode GPR:$Rn, t2_so_imm:$imm)]> {
+     let Inst{31-27} = 0b11110;
+     let Inst{25} = 0;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+     let Inst{15} = 0;
+     let Inst{11-8} = 0b1111; // Rd
+   }
+   // register
+   def rr : T2TwoRegCmp<
+                (outs), (ins GPR:$lhs, rGPR:$rhs), iir,
+                opc, ".w\t$lhs, $rhs",
+                [(opnode GPR:$lhs, rGPR:$rhs)]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+     let Inst{14-12} = 0b000; // imm3
+     let Inst{11-8} = 0b1111; // Rd
+     let Inst{7-6} = 0b00; // imm2
+     let Inst{5-4} = 0b00; // type
+   }
+   // shifted register
+   def rs : T2OneRegCmpShiftedReg<
+                (outs), (ins GPR:$Rn, t2_so_reg:$ShiftedRm), iis,
+                opc, ".w\t$Rn, $ShiftedRm",
+                [(opnode GPR:$Rn, t2_so_reg:$ShiftedRm)]> {
+     let Inst{31-27} = 0b11101;
+     let Inst{26-25} = 0b01;
+     let Inst{24-21} = opcod;
+     let Inst{20} = 1; // The S bit.
+     let Inst{11-8} = 0b1111; // Rd
+   }
+}
+}
+
+/// T2I_ld - Defines a set of (op r, {imm12|imm8|so_reg}) load patterns.
+multiclass T2I_ld<bit signed, bits<2> opcod, string opc,
+                  InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
+  def i12 : T2Ii12<(outs GPR:$Rt), (ins t2addrmode_imm12:$addr), iii,
+                   opc, ".w\t$Rt, $addr",
+                   [(set GPR:$Rt, (opnode t2addrmode_imm12:$addr))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-25} = 0b00;
+    let Inst{24} = signed;
+    let Inst{23} = 1;
+    let Inst{22-21} = opcod;
+    let Inst{20} = 1; // load
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<17> addr;
+    let Inst{19-16} = addr{16-13}; // Rn
+    let Inst{23}    = addr{12};    // U
+    let Inst{11-0}  = addr{11-0};  // imm
+  }
+  def i8  : T2Ii8 <(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), iii,
+                   opc, "\t$Rt, $addr",
+                   [(set GPR:$Rt, (opnode t2addrmode_imm8:$addr))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-25} = 0b00;
+    let Inst{24} = signed;
+    let Inst{23} = 0;
+    let Inst{22-21} = opcod;
+    let Inst{20} = 1; // load
+    let Inst{11} = 1;
+    // Offset: index==TRUE, wback==FALSE
+    let Inst{10} = 1; // The P bit.
+    let Inst{8} = 0; // The W bit.
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<13> addr;
+    let Inst{19-16} = addr{12-9}; // Rn
+    let Inst{9}     = addr{8};    // U
+    let Inst{7-0}   = addr{7-0};  // imm
+  }
+  def s   : T2Iso <(outs GPR:$Rt), (ins t2addrmode_so_reg:$addr), iis,
+                   opc, ".w\t$Rt, $addr",
+                   [(set GPR:$Rt, (opnode t2addrmode_so_reg:$addr))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-25} = 0b00;
+    let Inst{24} = signed;
+    let Inst{23} = 0;
+    let Inst{22-21} = opcod;
+    let Inst{20} = 1; // load
+    let Inst{11-6} = 0b000000;
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<10> addr;
+    let Inst{19-16} = addr{9-6}; // Rn
+    let Inst{3-0}   = addr{5-2}; // Rm
+    let Inst{5-4}   = addr{1-0}; // imm
+  }
+
+  // FIXME: Is the pci variant actually needed?
+  def pci : T2Ipc <(outs GPR:$Rt), (ins i32imm:$addr), iii,
+                   opc, ".w\t$Rt, $addr",
+                   [(set GPR:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> {
+    let isReMaterializable = 1;
+    let Inst{31-27} = 0b11111;
+    let Inst{26-25} = 0b00;
+    let Inst{24} = signed;
+    let Inst{23} = ?; // add = (U == '1')
+    let Inst{22-21} = opcod;
+    let Inst{20} = 1; // load
+    let Inst{19-16} = 0b1111; // Rn
+    bits<4> Rt;
+    bits<12> addr;
+    let Inst{15-12} = Rt{3-0};
+    let Inst{11-0}  = addr{11-0};
+  }
+}
+
+/// T2I_st - Defines a set of (op r, {imm12|imm8|so_reg}) store patterns.
+multiclass T2I_st<bits<2> opcod, string opc,
+                  InstrItinClass iii, InstrItinClass iis, PatFrag opnode> {
+  def i12 : T2Ii12<(outs), (ins GPR:$Rt, t2addrmode_imm12:$addr), iii,
+                   opc, ".w\t$Rt, $addr",
+                   [(opnode GPR:$Rt, t2addrmode_imm12:$addr)]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0001;
+    let Inst{22-21} = opcod;
+    let Inst{20} = 0; // !load
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<17> addr;
+    let Inst{19-16} = addr{16-13}; // Rn
+    let Inst{23}    = addr{12};    // U
+    let Inst{11-0}  = addr{11-0};  // imm
+  }
+  def i8  : T2Ii8 <(outs), (ins GPR:$Rt, t2addrmode_imm8:$addr), iii,
+                   opc, "\t$Rt, $addr",
+                   [(opnode GPR:$Rt, t2addrmode_imm8:$addr)]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0000;
+    let Inst{22-21} = opcod;
+    let Inst{20} = 0; // !load
+    let Inst{11} = 1;
+    // Offset: index==TRUE, wback==FALSE
+    let Inst{10} = 1; // The P bit.
+    let Inst{8} = 0; // The W bit.
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<13> addr;
+    let Inst{19-16} = addr{12-9}; // Rn
+    let Inst{9}     = addr{8};    // U
+    let Inst{7-0}   = addr{7-0};  // imm
+  }
+  def s   : T2Iso <(outs), (ins GPR:$Rt, t2addrmode_so_reg:$addr), iis,
+                   opc, ".w\t$Rt, $addr",
+                   [(opnode GPR:$Rt, t2addrmode_so_reg:$addr)]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0000;
+    let Inst{22-21} = opcod;
+    let Inst{20} = 0; // !load
+    let Inst{11-6} = 0b000000;
+
+    bits<4> Rt;
+    let Inst{15-12} = Rt;
+
+    bits<10> addr;
+    let Inst{19-16}   = addr{9-6}; // Rn
+    let Inst{3-0} = addr{5-2}; // Rm
+    let Inst{5-4}   = addr{1-0}; // imm
+  }
+}
+
+/// T2I_ext_rrot - A unary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass T2I_ext_rrot<bits<3> opcod, string opc, PatFrag opnode> {
+  def r     : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+                  opc, ".w\t$Rd, $Rm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rm))]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = 0b00; // rotate
+   }
+  def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, rot_imm:$rot), IIC_iEXTr,
+                  opc, ".w\t$Rd, $Rm, ror $rot",
+                 [(set rGPR:$Rd, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+
+     bits<2> rot;
+     let Inst{5-4} = rot{1-0}; // rotate
+   }
+}
+
+// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
+multiclass T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
+  def r     : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+                  opc, "\t$Rd, $Rm",
+                 [(set rGPR:$Rd, (opnode rGPR:$Rm))]>,
+                 Requires<[HasT2ExtractPack, IsThumb2]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = 0b00; // rotate
+   }
+  def r_rot : T2TwoReg<(outs rGPR:$dst), (ins rGPR:$Rm, rot_imm:$rot),
+                  IIC_iEXTr, opc, "\t$dst, $Rm, ror $rot",
+                 [(set rGPR:$dst, (opnode (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+                 Requires<[HasT2ExtractPack, IsThumb2]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+
+     bits<2> rot;
+     let Inst{5-4} = rot{1-0}; // rotate
+   }
+}
+
+// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
+// supported yet.
+multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> {
+  def r     : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
+                  opc, "\t$Rd, $Rm", []> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = 0b00; // rotate
+   }
+  def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$rot), IIC_iEXTr,
+                  opc, "\t$Rd, $Rm, ror $rot", []> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{19-16} = 0b1111; // Rn
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+
+      bits<2> rot;
+      let Inst{5-4} = rot{1-0}; // rotate
+   }
+}
+
+/// T2I_exta_rrot - A binary operation with two forms: one whose operand is a
+/// register and one whose operand is a register rotated by 8/16/24.
+multiclass T2I_exta_rrot<bits<3> opcod, string opc, PatFrag opnode> {
+  def rr     : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr,
+                  opc, "\t$Rd, $Rn, $Rm",
+                  [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>,
+                  Requires<[HasT2ExtractPack, IsThumb2]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = 0b00; // rotate
+   }
+  def rr_rot : T2ThreeReg<(outs rGPR:$Rd),
+                  (ins rGPR:$Rn, rGPR:$Rm, rot_imm:$rot),
+                  IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot",
+                  [(set rGPR:$Rd, (opnode rGPR:$Rn,
+                                          (rotr rGPR:$Rm, rot_imm:$rot)))]>,
+                  Requires<[HasT2ExtractPack, IsThumb2]> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+
+     bits<2> rot;
+     let Inst{5-4} = rot{1-0}; // rotate
+   }
+}
+
+// DO variant - disassembly only, no pattern
+
+multiclass T2I_exta_rrot_DO<bits<3> opcod, string opc> {
+  def rr     : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iEXTAr,
+                  opc, "\t$Rd, $Rn, $Rm", []> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+     let Inst{5-4} = 0b00; // rotate
+   }
+  def rr_rot : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot),
+                  IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", []> {
+     let Inst{31-27} = 0b11111;
+     let Inst{26-23} = 0b0100;
+     let Inst{22-20} = opcod;
+     let Inst{15-12} = 0b1111;
+     let Inst{7} = 1;
+
+     bits<2> rot;
+     let Inst{5-4} = rot{1-0}; // rotate
+   }
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
+
+class T2PCOneRegImm<dag oops, dag iops, InstrItinClass itin,
+           string asm, list<dag> pattern>
+  : T2XI<oops, iops, itin, asm, pattern> {
+  bits<4> Rd;
+  bits<12> label;
+
+  let Inst{11-8}  = Rd;
+  let Inst{26}    = label{11};
+  let Inst{14-12} = label{10-8};
+  let Inst{7-0}   = label{7-0};
+}
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
+              (ins t2adrlabel:$addr, pred:$p),
+              IIC_iALUi, "adr{$p}.w\t$Rd, #$addr", []> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-24} = 0b10;
+  // Inst{23:21} = '11' (add = FALSE) or '00' (add = TRUE)
+  let Inst{22} = 0;
+  let Inst{20} = 0;
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{15} = 0;
+
+  bits<4> Rd;
+  bits<13> addr;
+  let Inst{11-8} = Rd;
+  let Inst{23}    = addr{12};
+  let Inst{21}    = addr{12};
+  let Inst{26}    = addr{11};
+  let Inst{14-12} = addr{10-8};
+  let Inst{7-0}   = addr{7-0};
+}
+
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def t2LEApcrel   : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
+                                Size4Bytes, IIC_iALUi, []>;
+def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
+                                (ins i32imm:$label, nohash_imm:$id, pred:$p),
+                                Size4Bytes, IIC_iALUi,
+                                []>;
+
+
+// FIXME: None of these add/sub SP special instructions should be necessary
+// at all for thumb2 since they use the same encodings as the generic
+// add/sub instructions. In thumb1 we need them since they have dedicated
+// encodings. At the least, they should be pseudo instructions.
+// ADD r, sp, {so_imm|i12}
+let isCodeGenOnly = 1 in {
+def t2ADDrSPi   : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm),
+                        IIC_iALUi, "add", ".w\t$Rd, $Rn, $imm", []> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 0;
+  let Inst{24-21} = 0b1000;
+  let Inst{15} = 0;
+}
+def t2ADDrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm),
+                       IIC_iALUi, "addw", "\t$Rd, $Rn, $imm", []> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-20} = 0b100000;
+  let Inst{15} = 0;
+}
+
+// ADD r, sp, so_reg
+def t2ADDrSPs   : T2sTwoRegShiftedReg<
+                        (outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm),
+                        IIC_iALUsi, "add", ".w\t$Rd, $Rn, $ShiftedRm", []> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-21} = 0b1000;
+  let Inst{15} = 0;
+}
+
+// SUB r, sp, {so_imm|i12}
+def t2SUBrSPi   : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm),
+                        IIC_iALUi, "sub", ".w\t$Rd, $Rn, $imm", []> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 0;
+  let Inst{24-21} = 0b1101;
+  let Inst{15} = 0;
+}
+def t2SUBrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm),
+                       IIC_iALUi, "subw", "\t$Rd, $Rn, $imm", []> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-20} = 0b101010;
+  let Inst{15} = 0;
+}
+
+// SUB r, sp, so_reg
+def t2SUBrSPs   : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$imm),
+                       IIC_iALUsi,
+                       "sub", "\t$Rd, $Rn, $imm", []> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-21} = 0b1101;
+  let Inst{19-16} = 0b1101; // Rn = sp
+  let Inst{15} = 0;
+}
+} // end isCodeGenOnly = 1
+
+// Signed and unsigned division on v7-M
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+                 "sdiv", "\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
+                 Requires<[HasDivide, IsThumb2]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-21} = 0b011100;
+  let Inst{20} = 0b1;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = 0b1111;
+}
+
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+                 "udiv", "\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
+                 Requires<[HasDivide, IsThumb2]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-21} = 0b011101;
+  let Inst{20} = 0b1;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
+//  Load / store Instructions.
+//
+
+// Load
+let canFoldAsLoad = 1, isReMaterializable = 1  in
+defm t2LDR   : T2I_ld<0, 0b10, "ldr", IIC_iLoad_i, IIC_iLoad_si,
+                      UnOpFrag<(load node:$Src)>>;
+
+// Loads with zero extension
+defm t2LDRH  : T2I_ld<0, 0b01, "ldrh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+                      UnOpFrag<(zextloadi16 node:$Src)>>;
+defm t2LDRB  : T2I_ld<0, 0b00, "ldrb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+                      UnOpFrag<(zextloadi8  node:$Src)>>;
+
+// Loads with sign extension
+defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+                      UnOpFrag<(sextloadi16 node:$Src)>>;
+defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", IIC_iLoad_bh_i, IIC_iLoad_bh_si,
+                      UnOpFrag<(sextloadi8  node:$Src)>>;
+
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
+// Load doubleword
+def t2LDRDi8  : T2Ii8s4<1, 0, 1, (outs rGPR:$Rt, rGPR:$Rt2),
+                        (ins t2addrmode_imm8s4:$addr),
+                        IIC_iLoad_d_i, "ldrd", "\t$Rt, $Rt2, $addr", []>;
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
+
+// zextload i1 -> zextload i8
+def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr),
+            (t2LDRBi12  t2addrmode_imm12:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_imm8:$addr),
+            (t2LDRBi8   t2addrmode_imm8:$addr)>;
+def : T2Pat<(zextloadi1 t2addrmode_so_reg:$addr),
+            (t2LDRBs    t2addrmode_so_reg:$addr)>;
+def : T2Pat<(zextloadi1 (ARMWrapper tconstpool:$addr)),
+            (t2LDRBpci  tconstpool:$addr)>;
+
+// extload -> zextload
+// FIXME: Reduce the number of patterns by legalizing extload to zextload
+// earlier?
+def : T2Pat<(extloadi1  t2addrmode_imm12:$addr),
+            (t2LDRBi12  t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi1  t2addrmode_imm8:$addr),
+            (t2LDRBi8   t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi1  t2addrmode_so_reg:$addr),
+            (t2LDRBs    t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi1  (ARMWrapper tconstpool:$addr)),
+            (t2LDRBpci  tconstpool:$addr)>;
+
+def : T2Pat<(extloadi8  t2addrmode_imm12:$addr),
+            (t2LDRBi12  t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi8  t2addrmode_imm8:$addr),
+            (t2LDRBi8   t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi8  t2addrmode_so_reg:$addr),
+            (t2LDRBs    t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi8  (ARMWrapper tconstpool:$addr)),
+            (t2LDRBpci  tconstpool:$addr)>;
+
+def : T2Pat<(extloadi16 t2addrmode_imm12:$addr),
+            (t2LDRHi12  t2addrmode_imm12:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_imm8:$addr),
+            (t2LDRHi8   t2addrmode_imm8:$addr)>;
+def : T2Pat<(extloadi16 t2addrmode_so_reg:$addr),
+            (t2LDRHs    t2addrmode_so_reg:$addr)>;
+def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
+            (t2LDRHpci  tconstpool:$addr)>;
+
+// FIXME: The destination register of the loads and stores can't be PC, but
+//        can be SP. We need another regclass (similar to rGPR) to represent
+//        that. Not a pressing issue since these are selected manually,
+//        not via pattern.
+
+// Indexed loads
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+def t2LDR_PRE  : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iLoad_iu,
+                            "ldr", "\t$Rt, $addr!", "$addr.base = $Rn",
+                            []>;
+
+def t2LDR_POST : T2Iidxldst<0, 0b10, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+                            (ins GPR:$base, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iLoad_iu,
+                          "ldr", "\t$Rt, [$Rn], $addr", "$base = $Rn",
+                            []>;
+
+def t2LDRB_PRE : T2Iidxldst<0, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+                            "ldrb", "\t$Rt, $addr!", "$addr.base = $Rn",
+                            []>;
+def t2LDRB_POST : T2Iidxldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+                            (ins GPR:$base, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+                         "ldrb", "\t$Rt, [$Rn], $addr", "$base = $Rn",
+                            []>;
+
+def t2LDRH_PRE : T2Iidxldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+                            "ldrh", "\t$Rt, $addr!", "$addr.base = $Rn",
+                            []>;
+def t2LDRH_POST : T2Iidxldst<0, 0b01, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+                            (ins GPR:$base, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+                         "ldrh", "\t$Rt, [$Rn], $addr", "$base = $Rn",
+                            []>;
+
+def t2LDRSB_PRE : T2Iidxldst<1, 0b00, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+                            "ldrsb", "\t$Rt, $addr!", "$addr.base = $Rn",
+                            []>;
+def t2LDRSB_POST : T2Iidxldst<1, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn),
+                            (ins GPR:$base, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+                        "ldrsb", "\t$Rt, [$Rn], $addr", "$base = $Rn",
+                            []>;
+
+def t2LDRSH_PRE : T2Iidxldst<1, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn),
+                            (ins t2addrmode_imm8:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iLoad_bh_iu,
+                            "ldrsh", "\t$Rt, $addr!", "$addr.base = $Rn",
+                            []>;
+def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$Rn),
+                            (ins GPR:$base, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu,
+                        "ldrsh", "\t$dst, [$Rn], $addr", "$base = $Rn",
+                            []>;
+} // mayLoad = 1, neverHasSideEffects = 1
+
+// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are
+// for disassembly only.
+// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
+class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
+  : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+          "\t$Rt, $addr", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-25} = 0b00;
+  let Inst{24} = signed;
+  let Inst{23} = 0;
+  let Inst{22-21} = type;
+  let Inst{20} = 1; // load
+  let Inst{11} = 1;
+  let Inst{10-8} = 0b110; // PUW.
+
+  bits<4> Rt;
+  bits<13> addr;
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = addr{12-9};
+  let Inst{7-0}   = addr{7-0};
+}
+
+def t2LDRT   : T2IldT<0, 0b10, "ldrt", IIC_iLoad_i>;
+def t2LDRBT  : T2IldT<0, 0b00, "ldrbt", IIC_iLoad_bh_i>;
+def t2LDRHT  : T2IldT<0, 0b01, "ldrht", IIC_iLoad_bh_i>;
+def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt", IIC_iLoad_bh_i>;
+def t2LDRSHT : T2IldT<1, 0b01, "ldrsht", IIC_iLoad_bh_i>;
+
+// Store
+defm t2STR :T2I_st<0b10,"str", IIC_iStore_i, IIC_iStore_si,
+                   BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm t2STRB:T2I_st<0b00,"strb", IIC_iStore_bh_i, IIC_iStore_bh_si,
+                   BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+defm t2STRH:T2I_st<0b01,"strh", IIC_iStore_bh_i, IIC_iStore_bh_si,
+                   BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+
+// Store doubleword
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
+def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
+                       (ins GPR:$Rt, GPR:$Rt2, t2addrmode_imm8s4:$addr),
+               IIC_iStore_d_r, "strd", "\t$Rt, $Rt2, $addr", []>;
+
+// Indexed stores
+def t2STR_PRE  : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb),
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+                         "str", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
+             [(set GPR:$base_wb,
+                   (pre_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb),
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iStore_iu,
+                          "str", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
+             [(set GPR:$base_wb,
+                  (post_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STRH_PRE  : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb),
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iStore_iu,
+                        "strh", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
+        [(set GPR:$base_wb,
+              (pre_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb),
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+                         "strh", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
+       [(set GPR:$base_wb,
+             (post_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STRB_PRE  : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb),
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu,
+                        "strb", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb",
+         [(set GPR:$base_wb,
+               (pre_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
+                            (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr),
+                            AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu,
+                         "strb", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb",
+        [(set GPR:$base_wb,
+              (post_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>;
+
+// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
+// only.
+// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
+class T2IstT<bits<2> type, string opc, InstrItinClass ii>
+  : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc,
+          "\t$Rt, $addr", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-25} = 0b00;
+  let Inst{24} = 0; // not signed
+  let Inst{23} = 0;
+  let Inst{22-21} = type;
+  let Inst{20} = 0; // store
+  let Inst{11} = 1;
+  let Inst{10-8} = 0b110; // PUW
+
+  bits<4> Rt;
+  bits<13> addr;
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = addr{12-9};
+  let Inst{7-0}   = addr{7-0};
+}
+
+def t2STRT   : T2IstT<0b10, "strt", IIC_iStore_i>;
+def t2STRBT  : T2IstT<0b00, "strbt", IIC_iStore_bh_i>;
+def t2STRHT  : T2IstT<0b01, "strht", IIC_iStore_bh_i>;
+
+// ldrd / strd pre / post variants
+// For disassembly only.
+
+def t2LDRD_PRE  : T2Ii8s4<1, 1, 1, (outs GPR:$Rt, GPR:$Rt2),
+                 (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
+                 "ldrd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
+
+def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs GPR:$Rt, GPR:$Rt2),
+                 (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru,
+                 "ldrd", "\t$Rt, $Rt2, [$base], $imm", []>;
+
+def t2STRD_PRE  : T2Ii8s4<1, 1, 0, (outs),
+                 (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
+                 IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base, $imm]!", []>;
+
+def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs),
+                 (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm),
+                 IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base], $imm", []>;
+
+// T2Ipl (Preload Data/Instruction) signals the memory system of possible future
+// data/instruction access.  These are for disassembly only.
+// instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0),
+// (prefetch 1) -> (preload 2),  (prefetch 2) -> (preload 1).
+multiclass T2Ipl<bits<1> write, bits<1> instr, string opc> {
+
+  def i12 : T2Ii12<(outs), (ins t2addrmode_imm12:$addr), IIC_Preload, opc,
+                "\t$addr",
+              [(ARMPreload t2addrmode_imm12:$addr, (i32 write), (i32 instr))]> {
+    let Inst{31-25} = 0b1111100;
+    let Inst{24} = instr;
+    let Inst{22} = 0;
+    let Inst{21} = write;
+    let Inst{20} = 1;
+    let Inst{15-12} = 0b1111;
+
+    bits<17> addr;
+    let Inst{19-16} = addr{16-13}; // Rn
+    let Inst{23}    = addr{12};    // U
+    let Inst{11-0}  = addr{11-0};  // imm12
+  }
+
+  def i8 : T2Ii8<(outs), (ins t2addrmode_imm8:$addr), IIC_Preload, opc,
+                "\t$addr",
+               [(ARMPreload t2addrmode_imm8:$addr, (i32 write), (i32 instr))]> {
+    let Inst{31-25} = 0b1111100;
+    let Inst{24} = instr;
+    let Inst{23} = 0; // U = 0
+    let Inst{22} = 0;
+    let Inst{21} = write;
+    let Inst{20} = 1;
+    let Inst{15-12} = 0b1111;
+    let Inst{11-8} = 0b1100;
+
+    bits<13> addr;
+    let Inst{19-16} = addr{12-9}; // Rn
+    let Inst{7-0}   = addr{7-0};  // imm8
+  }
+
+  def s : T2Iso<(outs), (ins t2addrmode_so_reg:$addr), IIC_Preload, opc,
+               "\t$addr",
+             [(ARMPreload t2addrmode_so_reg:$addr, (i32 write), (i32 instr))]> {
+    let Inst{31-25} = 0b1111100;
+    let Inst{24} = instr;
+    let Inst{23} = 0; // add = TRUE for T1
+    let Inst{22} = 0;
+    let Inst{21} = write;
+    let Inst{20} = 1;
+    let Inst{15-12} = 0b1111;
+    let Inst{11-6} = 0000000;
+
+    bits<10> addr;
+    let Inst{19-16} = addr{9-6}; // Rn
+    let Inst{3-0}   = addr{5-2}; // Rm
+    let Inst{5-4}   = addr{1-0}; // imm2
+  }
+}
+
+defm t2PLD  : T2Ipl<0, 0, "pld">,  Requires<[IsThumb2]>;
+defm t2PLDW : T2Ipl<1, 0, "pldw">, Requires<[IsThumb2,HasV7,HasMP]>;
+defm t2PLI  : T2Ipl<0, 1, "pli">,  Requires<[IsThumb2,HasV7]>;
+
+//===----------------------------------------------------------------------===//
+//  Load / store multiple Instructions.
+//
+
+multiclass thumb2_ldst_mult<string asm, InstrItinClass itin,
+                            InstrItinClass itin_upd, bit L_bit> {
+  def IA :
+    T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         itin, !strconcat(asm, "ia${p}.w\t$Rn, $regs"), []> {
+    bits<4>  Rn;
+    bits<16> regs;
+
+    let Inst{31-27} = 0b11101;
+    let Inst{26-25} = 0b00;
+    let Inst{24-23} = 0b01;     // Increment After
+    let Inst{22}    = 0;
+    let Inst{21}    = 0;        // No writeback
+    let Inst{20}    = L_bit;
+    let Inst{19-16} = Rn;
+    let Inst{15-0}  = regs;
+  }
+  def IA_UPD :
+    T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+          itin_upd, !strconcat(asm, "ia${p}.w\t$Rn!, $regs"), "$Rn = $wb", []> {
+    bits<4>  Rn;
+    bits<16> regs;
+
+    let Inst{31-27} = 0b11101;
+    let Inst{26-25} = 0b00;
+    let Inst{24-23} = 0b01;     // Increment After
+    let Inst{22}    = 0;
+    let Inst{21}    = 1;        // Writeback
+    let Inst{20}    = L_bit;
+    let Inst{19-16} = Rn;
+    let Inst{15-0}  = regs;
+  }
+  def DB :
+    T2XI<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+         itin, !strconcat(asm, "db${p}.w\t$Rn, $regs"), []> {
+    bits<4>  Rn;
+    bits<16> regs;
+
+    let Inst{31-27} = 0b11101;
+    let Inst{26-25} = 0b00;
+    let Inst{24-23} = 0b10;     // Decrement Before
+    let Inst{22}    = 0;
+    let Inst{21}    = 0;        // No writeback
+    let Inst{20}    = L_bit;
+    let Inst{19-16} = Rn;
+    let Inst{15-0}  = regs;
+  }
+  def DB_UPD :
+    T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
+          itin_upd, !strconcat(asm, "db${p}.w\t$Rn, $regs"), "$Rn = $wb", []> {
+    bits<4>  Rn;
+    bits<16> regs;
+
+    let Inst{31-27} = 0b11101;
+    let Inst{26-25} = 0b00;
+    let Inst{24-23} = 0b10;     // Decrement Before
+    let Inst{22}    = 0;
+    let Inst{21}    = 1;        // Writeback
+    let Inst{20}    = L_bit;
+    let Inst{19-16} = Rn;
+    let Inst{15-0}  = regs;
+  }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm t2LDM : thumb2_ldst_mult<"ldm", IIC_iLoad_m, IIC_iLoad_mu, 1>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm t2STM : thumb2_ldst_mult<"stm", IIC_iStore_m, IIC_iStore_mu, 0>;
+
+} // neverHasSideEffects
+
+
+//===----------------------------------------------------------------------===//
+//  Move Instructions.
+//
+
+let neverHasSideEffects = 1 in
+def t2MOVr : T2sTwoReg<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
+                   "mov", ".w\t$Rd, $Rm", []> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-21} = 0b0010;
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{14-12} = 0b000;
+  let Inst{7-4} = 0b0000;
+}
+
+// AddedComplexity to ensure isel tries t2MOVi before t2MOVi16.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1,
+    AddedComplexity = 1 in
+def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
+                   "mov", ".w\t$Rd, $imm",
+                   [(set rGPR:$Rd, t2_so_imm:$imm)]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 0;
+  let Inst{24-21} = 0b0010;
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{15} = 0;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
+def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi,
+                   "movw", "\t$Rd, $imm",
+                   [(set rGPR:$Rd, imm0_65535:$imm)]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 1;
+  let Inst{24-21} = 0b0010;
+  let Inst{20} = 0; // The S bit.
+  let Inst{15} = 0;
+
+  bits<4> Rd;
+  bits<16> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = imm{15-12};
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+def t2MOVi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+                                (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+
+let Constraints = "$src = $Rd" in {
+def t2MOVTi16 : T2I<(outs rGPR:$Rd),
+                    (ins rGPR:$src, i32imm_hilo16:$imm), IIC_iMOVi,
+                    "movt", "\t$Rd, $imm",
+                    [(set rGPR:$Rd,
+                          (or (and rGPR:$src, 0xffff), lo16AllZero:$imm))]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 1;
+  let Inst{24-21} = 0b0110;
+  let Inst{20} = 0; // The S bit.
+  let Inst{15} = 0;
+
+  bits<4> Rd;
+  bits<16> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = imm{15-12};
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+def t2MOVTi16_ga_pcrel : PseudoInst<(outs rGPR:$Rd),
+                     (ins rGPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>;
+} // Constraints
+
+def : T2Pat<(or rGPR:$src, 0xffff0000), (t2MOVTi16 rGPR:$src, 0xffff)>;
+
+//===----------------------------------------------------------------------===//
+//  Extend Instructions.
+//
+
+// Sign extenders
+
+defm t2SXTB  : T2I_ext_rrot<0b100, "sxtb",
+                              UnOpFrag<(sext_inreg node:$Src, i8)>>;
+defm t2SXTH  : T2I_ext_rrot<0b000, "sxth",
+                              UnOpFrag<(sext_inreg node:$Src, i16)>>;
+defm t2SXTB16 : T2I_ext_rrot_sxtb16<0b010, "sxtb16">;
+
+defm t2SXTAB : T2I_exta_rrot<0b100, "sxtab",
+                        BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
+defm t2SXTAH : T2I_exta_rrot<0b000, "sxtah",
+                        BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+defm t2SXTAB16 : T2I_exta_rrot_DO<0b010, "sxtab16">;
+
+// TODO: SXT(A){B|H}16 - done for disassembly only
+
+// Zero extenders
+
+let AddedComplexity = 16 in {
+defm t2UXTB   : T2I_ext_rrot<0b101, "uxtb",
+                               UnOpFrag<(and node:$Src, 0x000000FF)>>;
+defm t2UXTH   : T2I_ext_rrot<0b001, "uxth",
+                               UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
+defm t2UXTB16 : T2I_ext_rrot_uxtb16<0b011, "uxtb16",
+                               UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
+
+// FIXME: This pattern incorrectly assumes the shl operator is a rotate.
+//        The transformation should probably be done as a combiner action
+//        instead so we can include a check for masking back in the upper
+//        eight bits of the source into the lower eight bits of the result.
+//def : T2Pat<(and (shl rGPR:$Src, (i32 8)), 0xFF00FF),
+//            (t2UXTB16r_rot rGPR:$Src, 24)>,
+//          Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(and (srl rGPR:$Src, (i32 8)), 0xFF00FF),
+            (t2UXTB16r_rot rGPR:$Src, 8)>,
+        Requires<[HasT2ExtractPack, IsThumb2]>;
+
+defm t2UXTAB : T2I_exta_rrot<0b101, "uxtab",
+                           BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
+defm t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
+                           BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+defm t2UXTAB16 : T2I_exta_rrot_DO<0b011, "uxtab16">;
+}
+
+//===----------------------------------------------------------------------===//
+//  Arithmetic Instructions.
+//
+
+defm t2ADD  : T2I_bin_ii12rs<0b000, "add",
+                             BinOpFrag<(add  node:$LHS, node:$RHS)>, 1>;
+defm t2SUB  : T2I_bin_ii12rs<0b101, "sub",
+                             BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
+
+// ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
+defm t2ADDS : T2I_bin_s_irs <0b1000, "add",
+                             IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+                             BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
+defm t2SUBS : T2I_bin_s_irs <0b1101, "sub",
+                             IIC_iALUi, IIC_iALUr, IIC_iALUsi,
+                             BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+defm t2ADC  : T2I_adde_sube_irs<0b1010, "adc",
+                          BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
+defm t2SBC  : T2I_adde_sube_irs<0b1011, "sbc",
+                          BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
+defm t2ADCS : T2I_adde_sube_s_irs<0b1010, "adc",
+                          BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
+defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc",
+                          BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
+
+// RSB
+defm t2RSB  : T2I_rbin_irs  <0b1110, "rsb",
+                             BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
+defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
+                             BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+// The AddedComplexity preferences the first variant over the others since
+// it can be shrunk to a 16-bit wide encoding, while the others cannot.
+let AddedComplexity = 1 in
+def : T2Pat<(add        GPR:$src, imm0_255_neg:$imm),
+            (t2SUBri    GPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(add        GPR:$src, t2_so_imm_neg:$imm),
+            (t2SUBri    GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(add        GPR:$src, imm0_4095_neg:$imm),
+            (t2SUBri12  GPR:$src, imm0_4095_neg:$imm)>;
+let AddedComplexity = 1 in
+def : T2Pat<(addc       rGPR:$src, imm0_255_neg:$imm),
+            (t2SUBSri   rGPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(addc       rGPR:$src, t2_so_imm_neg:$imm),
+            (t2SUBSri   rGPR:$src, t2_so_imm_neg:$imm)>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+let AddedComplexity = 1 in
+def : T2Pat<(adde       rGPR:$src, imm0_255_not:$imm),
+            (t2SBCSri   rGPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(adde       rGPR:$src, t2_so_imm_not:$imm),
+            (t2SBCSri   rGPR:$src, t2_so_imm_not:$imm)>;
+
+// Select Bytes -- for disassembly only
+
+def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
+                NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-24} = 0b010;
+  let Inst{23} = 0b1;
+  let Inst{22-20} = 0b010;
+  let Inst{15-12} = 0b1111;
+  let Inst{7} = 0b1;
+  let Inst{6-4} = 0b000;
+}
+
+// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
+// And Miscellaneous operations -- for disassembly only
+class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
+              list<dag> pat = [/* For disassembly only; pattern left blank */],
+              dag iops = (ins rGPR:$Rn, rGPR:$Rm),
+              string asm = "\t$Rd, $Rn, $Rm">
+  : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0101;
+  let Inst{22-20} = op22_20;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = op7_4;
+
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{3-0}   = Rm;
+}
+
+// Saturating add/subtract -- for disassembly only
+
+def t2QADD    : T2I_pam<0b000, 0b1000, "qadd",
+                        [(set rGPR:$Rd, (int_arm_qadd rGPR:$Rn, rGPR:$Rm))],
+                        (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QADD16  : T2I_pam<0b001, 0b0001, "qadd16">;
+def t2QADD8   : T2I_pam<0b000, 0b0001, "qadd8">;
+def t2QASX    : T2I_pam<0b010, 0b0001, "qasx">;
+def t2QDADD   : T2I_pam<0b000, 0b1001, "qdadd", [],
+                        (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QDSUB   : T2I_pam<0b000, 0b1011, "qdsub", [],
+                        (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QSAX    : T2I_pam<0b110, 0b0001, "qsax">;
+def t2QSUB    : T2I_pam<0b000, 0b1010, "qsub",
+                        [(set rGPR:$Rd, (int_arm_qsub rGPR:$Rn, rGPR:$Rm))],
+                        (ins rGPR:$Rm, rGPR:$Rn), "\t$Rd, $Rm, $Rn">;
+def t2QSUB16  : T2I_pam<0b101, 0b0001, "qsub16">;
+def t2QSUB8   : T2I_pam<0b100, 0b0001, "qsub8">;
+def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
+def t2UQADD8  : T2I_pam<0b000, 0b0101, "uqadd8">;
+def t2UQASX   : T2I_pam<0b010, 0b0101, "uqasx">;
+def t2UQSAX   : T2I_pam<0b110, 0b0101, "uqsax">;
+def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">;
+def t2UQSUB8  : T2I_pam<0b100, 0b0101, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def t2SASX    : T2I_pam<0b010, 0b0000, "sasx">;
+def t2SADD16  : T2I_pam<0b001, 0b0000, "sadd16">;
+def t2SADD8   : T2I_pam<0b000, 0b0000, "sadd8">;
+def t2SSAX    : T2I_pam<0b110, 0b0000, "ssax">;
+def t2SSUB16  : T2I_pam<0b101, 0b0000, "ssub16">;
+def t2SSUB8   : T2I_pam<0b100, 0b0000, "ssub8">;
+def t2UASX    : T2I_pam<0b010, 0b0100, "uasx">;
+def t2UADD16  : T2I_pam<0b001, 0b0100, "uadd16">;
+def t2UADD8   : T2I_pam<0b000, 0b0100, "uadd8">;
+def t2USAX    : T2I_pam<0b110, 0b0100, "usax">;
+def t2USUB16  : T2I_pam<0b101, 0b0100, "usub16">;
+def t2USUB8   : T2I_pam<0b100, 0b0100, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def t2SHASX   : T2I_pam<0b010, 0b0010, "shasx">;
+def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">;
+def t2SHADD8  : T2I_pam<0b000, 0b0010, "shadd8">;
+def t2SHSAX   : T2I_pam<0b110, 0b0010, "shsax">;
+def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">;
+def t2SHSUB8  : T2I_pam<0b100, 0b0010, "shsub8">;
+def t2UHASX   : T2I_pam<0b010, 0b0110, "uhasx">;
+def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">;
+def t2UHADD8  : T2I_pam<0b000, 0b0110, "uhadd8">;
+def t2UHSAX   : T2I_pam<0b110, 0b0110, "uhsax">;
+def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">;
+def t2UHSUB8  : T2I_pam<0b100, 0b0110, "uhsub8">;
+
+// Helper class for disassembly only
+// A6.3.16 & A6.3.17
+// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions.
+class T2ThreeReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+  dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-24} = 0b011;
+  let Inst{23}    = long;
+  let Inst{22-20} = op22_20;
+  let Inst{7-4}   = op7_4;
+}
+
+class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
+  dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : T2FourReg<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-24} = 0b011;
+  let Inst{23}    = long;
+  let Inst{22-20} = op22_20;
+  let Inst{7-4}   = op7_4;
+}
+
+// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
+
+def t2USAD8   : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+                                           (ins rGPR:$Rn, rGPR:$Rm),
+                        NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []> {
+  let Inst{15-12} = 0b1111;
+}
+def t2USADA8  : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
+                       (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
+                        "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>;
+
+// Signed/Unsigned saturate -- for disassembly only
+
+class T2SatI<dag oops, dag iops, InstrItinClass itin,
+           string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<5> sat_imm;
+  bits<7> sh;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{4-0}   = sat_imm{4-0};
+  let Inst{21}    = sh{6};
+  let Inst{14-12} = sh{4-2};
+  let Inst{7-6}   = sh{1-0};
+}
+
+def t2SSAT: T2SatI<
+                (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn, shift_imm:$sh),
+                NoItinerary, "ssat", "\t$Rd, $sat_imm, $Rn$sh",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-22} = 0b1100;
+  let Inst{20} = 0;
+  let Inst{15} = 0;
+}
+
+def t2SSAT16: T2SatI<
+                   (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn), NoItinerary,
+                   "ssat16", "\t$Rd, $sat_imm, $Rn",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-22} = 0b1100;
+  let Inst{20} = 0;
+  let Inst{15} = 0;
+  let Inst{21} = 1;        // sh = '1'
+  let Inst{14-12} = 0b000; // imm3 = '000'
+  let Inst{7-6} = 0b00;    // imm2 = '00'
+}
+
+def t2USAT: T2SatI<
+                (outs rGPR:$Rd), (ins i32imm:$sat_imm, rGPR:$Rn, shift_imm:$sh),
+                NoItinerary, "usat", "\t$Rd, $sat_imm, $Rn$sh",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-22} = 0b1110;
+  let Inst{20} = 0;
+  let Inst{15} = 0;
+}
+
+def t2USAT16: T2SatI<
+                    (outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn), NoItinerary,
+                   "usat16", "\t$dst, $sat_imm, $Rn",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25-22} = 0b1110;
+  let Inst{20} = 0;
+  let Inst{15} = 0;
+  let Inst{21} = 1;        // sh = '1'
+  let Inst{14-12} = 0b000; // imm3 = '000'
+  let Inst{7-6} = 0b00;    // imm2 = '00'
+}
+
+def : T2Pat<(int_arm_ssat GPR:$a, imm:$pos), (t2SSAT imm:$pos, GPR:$a, 0)>;
+def : T2Pat<(int_arm_usat GPR:$a, imm:$pos), (t2USAT imm:$pos, GPR:$a, 0)>;
+
+//===----------------------------------------------------------------------===//
+//  Shift and rotate Instructions.
+//
+
+defm t2LSL  : T2I_sh_ir<0b00, "lsl", BinOpFrag<(shl  node:$LHS, node:$RHS)>>;
+defm t2LSR  : T2I_sh_ir<0b01, "lsr", BinOpFrag<(srl  node:$LHS, node:$RHS)>>;
+defm t2ASR  : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra  node:$LHS, node:$RHS)>>;
+defm t2ROR  : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
+
+let Uses = [CPSR] in {
+def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+                   "rrx", "\t$Rd, $Rm",
+                   [(set rGPR:$Rd, (ARMrrx rGPR:$Rm))]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-21} = 0b0010;
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{14-12} = 0b000;
+  let Inst{7-4} = 0b0011;
+}
+}
+
+let isCodeGenOnly = 1, Defs = [CPSR] in {
+def t2MOVsrl_flag : T2TwoRegShiftImm<
+                        (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+                        "lsrs", ".w\t$Rd, $Rm, #1",
+                        [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-21} = 0b0010;
+  let Inst{20} = 1; // The S bit.
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{5-4} = 0b01; // Shift type.
+  // Shift amount = Inst{14-12:7-6} = 1.
+  let Inst{14-12} = 0b000;
+  let Inst{7-6} = 0b01;
+}
+def t2MOVsra_flag : T2TwoRegShiftImm<
+                        (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
+                        "asrs", ".w\t$Rd, $Rm, #1",
+                        [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-21} = 0b0010;
+  let Inst{20} = 1; // The S bit.
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{5-4} = 0b10; // Shift type.
+  // Shift amount = Inst{14-12:7-6} = 1.
+  let Inst{14-12} = 0b000;
+  let Inst{7-6} = 0b01;
+}
+}
+
+//===----------------------------------------------------------------------===//
+//  Bitwise Instructions.
+//
+
+defm t2AND  : T2I_bin_w_irs<0b0000, "and",
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+                            BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+defm t2ORR  : T2I_bin_w_irs<0b0010, "orr",
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+                            BinOpFrag<(or  node:$LHS, node:$RHS)>, 1>;
+defm t2EOR  : T2I_bin_w_irs<0b0100, "eor",
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+                            BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
+
+defm t2BIC  : T2I_bin_w_irs<0b0001, "bic",
+                            IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+                            BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+
+class T2BitFI<dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+    : T2I<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  bits<5> msb;
+  bits<5> lsb;
+
+  let Inst{11-8}  = Rd;
+  let Inst{4-0}   = msb{4-0};
+  let Inst{14-12} = lsb{4-2};
+  let Inst{7-6}   = lsb{1-0};
+}
+
+class T2TwoRegBitFI<dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+    : T2BitFI<oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rn;
+
+  let Inst{19-16} = Rn;
+}
+
+let Constraints = "$src = $Rd" in
+def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm),
+                IIC_iUNAsi, "bfc", "\t$Rd, $imm",
+                [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 1;
+  let Inst{24-20} = 0b10110;
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{15} = 0;
+
+  bits<10> imm;
+  let msb{4-0} = imm{9-5};
+  let lsb{4-0} = imm{4-0};
+}
+
+def t2SBFX: T2TwoRegBitFI<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb),
+                 IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 1;
+  let Inst{24-20} = 0b10100;
+  let Inst{15} = 0;
+}
+
+def t2UBFX: T2TwoRegBitFI<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm0_31_m1:$msb),
+                 IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 1;
+  let Inst{24-20} = 0b11100;
+  let Inst{15} = 0;
+}
+
+// A8.6.18  BFI - Bitfield insert (Encoding T1)
+let Constraints = "$src = $Rd" in {
+  def t2BFI : T2TwoRegBitFI<(outs rGPR:$Rd),
+                  (ins rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm),
+                  IIC_iBITi, "bfi", "\t$Rd, $Rn, $imm",
+                  [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn,
+                                   bf_inv_mask_imm:$imm))]> {
+    let Inst{31-27} = 0b11110;
+    let Inst{25} = 1;
+    let Inst{24-20} = 0b10110;
+    let Inst{15} = 0;
+
+    bits<10> imm;
+    let msb{4-0} = imm{9-5};
+    let lsb{4-0} = imm{4-0};
+  }
+
+  // GNU as only supports this form of bfi (w/ 4 arguments)
+  let isAsmParserOnly = 1 in
+  def t2BFI4p : T2TwoRegBitFI<(outs rGPR:$Rd),
+                  (ins rGPR:$src, rGPR:$Rn, lsb_pos_imm:$lsbit,
+                       width_imm:$width),
+                  IIC_iBITi, "bfi", "\t$Rd, $Rn, $lsbit, $width",
+                  []> {
+    let Inst{31-27} = 0b11110;
+    let Inst{25} = 1;
+    let Inst{24-20} = 0b10110;
+    let Inst{15} = 0;
+
+    bits<5> lsbit;
+    bits<5> width;
+    let msb{4-0} = width; // Custom encoder => lsb+width-1
+    let lsb{4-0} = lsbit;
+  }
+}
+
+defm t2ORN  : T2I_bin_irs<0b0011, "orn",
+                          IIC_iBITi, IIC_iBITr, IIC_iBITsi,
+                          BinOpFrag<(or  node:$LHS, (not node:$RHS))>, 0, "">;
+
+// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
+let AddedComplexity = 1 in
+defm t2MVN  : T2I_un_irs <0b0011, "mvn",
+                          IIC_iMVNi, IIC_iMVNr, IIC_iMVNsi,
+                          UnOpFrag<(not node:$Src)>, 1, 1>;
+
+
+let AddedComplexity = 1 in
+def : T2Pat<(and     rGPR:$src, t2_so_imm_not:$imm),
+            (t2BICri rGPR:$src, t2_so_imm_not:$imm)>;
+
+// FIXME: Disable this pattern on Darwin to workaround an assembler bug.
+def : T2Pat<(or      rGPR:$src, t2_so_imm_not:$imm),
+            (t2ORNri rGPR:$src, t2_so_imm_not:$imm)>,
+            Requires<[IsThumb2]>;
+
+def : T2Pat<(t2_so_imm_not:$src),
+            (t2MVNi t2_so_imm_not:$src)>;
+
+//===----------------------------------------------------------------------===//
+//  Multiply Instructions.
+//
+let isCommutable = 1 in
+def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+                "mul", "\t$Rd, $Rn, $Rm",
+                [(set rGPR:$Rd, (mul rGPR:$Rn, rGPR:$Rm))]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b000;
+  let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+  let Inst{7-4} = 0b0000; // Multiply
+}
+
+def t2MLA: T2FourReg<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                "mla", "\t$Rd, $Rn, $Rm, $Ra",
+                [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b000;
+  let Inst{7-4} = 0b0000; // Multiply
+}
+
+def t2MLS: T2FourReg<
+                (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                "mls", "\t$Rd, $Rn, $Rm, $Ra",
+                [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b000;
+  let Inst{7-4} = 0b0001; // Multiply and Subtract
+}
+
+// Extra precision multiplies with low / high results
+let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
+def t2SMULL : T2MulLong<0b000, 0b0000,
+                  (outs rGPR:$Rd, rGPR:$Ra),
+                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+                   "smull", "\t$Rd, $Ra, $Rn, $Rm", []>;
+
+def t2UMULL : T2MulLong<0b010, 0b0000,
+                  (outs rGPR:$RdLo, rGPR:$RdHi),
+                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+                   "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+} // isCommutable
+
+// Multiply + accumulate
+def t2SMLAL : T2MulLong<0b100, 0b0000,
+                  (outs rGPR:$RdLo, rGPR:$RdHi),
+                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+                  "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMLAL : T2MulLong<0b110, 0b0000,
+                  (outs rGPR:$RdLo, rGPR:$RdHi),
+                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+                  "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+
+def t2UMAAL : T2MulLong<0b110, 0b0110,
+                  (outs rGPR:$RdLo, rGPR:$RdHi),
+                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
+                  "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+} // neverHasSideEffects
+
+// Rounding variants of the below included for disassembly only
+
+// Most significant word multiply
+def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+                  "smmul", "\t$Rd, $Rn, $Rm",
+                  [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b101;
+  let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+  let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+                  "smmulr", "\t$Rd, $Rn, $Rm", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b101;
+  let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+  let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+def t2SMMLA : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                "smmla", "\t$Rd, $Rn, $Rm, $Ra",
+                [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b101;
+  let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMLAR: T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                  "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b101;
+  let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+def t2SMMLS: T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                "smmls", "\t$Rd, $Rn, $Rm, $Ra",
+                [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b110;
+  let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
+}
+
+def t2SMMLSR:T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+                "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-23} = 0b0110;
+  let Inst{22-20} = 0b110;
+  let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
+multiclass T2I_smul<string opc, PatFrag opnode> {
+  def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+                                      (sext_inreg rGPR:$Rm, i16)))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b001;
+    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b00;
+  }
+
+  def BT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
+                                      (sra rGPR:$Rm, (i32 16))))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b001;
+    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b01;
+  }
+
+  def TB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+                                      (sext_inreg rGPR:$Rm, i16)))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b001;
+    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b10;
+  }
+
+  def TT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
+                                      (sra rGPR:$Rm, (i32 16))))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b001;
+    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b11;
+  }
+
+  def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
+                                    (sext_inreg rGPR:$Rm, i16)), (i32 16)))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b011;
+    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b00;
+  }
+
+  def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
+              !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
+              [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
+                                    (sra rGPR:$Rm, (i32 16))), (i32 16)))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b011;
+    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b01;
+  }
+}
+
+
+multiclass T2I_smla<string opc, PatFrag opnode> {
+  def BB : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+              !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set rGPR:$Rd, (add rGPR:$Ra,
+                               (opnode (sext_inreg rGPR:$Rn, i16),
+                                       (sext_inreg rGPR:$Rm, i16))))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b001;
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b00;
+  }
+
+  def BT : T2FourReg<
+       (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+             !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
+             [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
+                                                 (sra rGPR:$Rm, (i32 16)))))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b001;
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b01;
+  }
+
+  def TB : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+              !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+                                               (sext_inreg rGPR:$Rm, i16))))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b001;
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b10;
+  }
+
+  def TT : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+              !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
+             [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
+                                                 (sra rGPR:$Rm, (i32 16)))))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b001;
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b11;
+  }
+
+  def WB : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+              !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
+                                    (sext_inreg rGPR:$Rm, i16)), (i32 16))))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b011;
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b00;
+  }
+
+  def WT : T2FourReg<
+        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
+              !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
+              [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
+                                      (sra rGPR:$Rm, (i32 16))), (i32 16))))]> {
+    let Inst{31-27} = 0b11111;
+    let Inst{26-23} = 0b0110;
+    let Inst{22-20} = 0b011;
+    let Inst{7-6} = 0b00;
+    let Inst{5-4} = 0b01;
+  }
+}
+
+defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
+
+// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
+def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
+         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
+           [/* For disassembly only; pattern left blank */]>;
+def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
+         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
+           [/* For disassembly only; pattern left blank */]>;
+def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
+         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
+           [/* For disassembly only; pattern left blank */]>;
+def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
+         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
+           [/* For disassembly only; pattern left blank */]>;
+
+// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+// These are for disassembly only.
+
+def t2SMUAD: T2ThreeReg_mac<
+            0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+            IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []> {
+  let Inst{15-12} = 0b1111;
+}
+def t2SMUADX:T2ThreeReg_mac<
+            0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+            IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []> {
+  let Inst{15-12} = 0b1111;
+}
+def t2SMUSD: T2ThreeReg_mac<
+            0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+            IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []> {
+  let Inst{15-12} = 0b1111;
+}
+def t2SMUSDX:T2ThreeReg_mac<
+            0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
+            IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []> {
+  let Inst{15-12} = 0b1111;
+}
+def t2SMLAD   : T2ThreeReg_mac<
+            0, 0b010, 0b0000, (outs rGPR:$Rd),
+            (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
+            "\t$Rd, $Rn, $Rm, $Ra", []>;
+def t2SMLADX  : T2FourReg_mac<
+            0, 0b010, 0b0001, (outs rGPR:$Rd),
+            (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
+            "\t$Rd, $Rn, $Rm, $Ra", []>;
+def t2SMLSD   : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
+            (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
+            "\t$Rd, $Rn, $Rm, $Ra", []>;
+def t2SMLSDX  : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
+            (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
+            "\t$Rd, $Rn, $Rm, $Ra", []>;
+def t2SMLALD  : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+                        (ins rGPR:$Rm, rGPR:$Rn), IIC_iMAC64, "smlald",
+                        "\t$Ra, $Rd, $Rm, $Rn", []>;
+def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+                        (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlaldx",
+                        "\t$Ra, $Rd, $Rm, $Rn", []>;
+def t2SMLSLD  : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
+                        (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsld",
+                        "\t$Ra, $Rd, $Rm, $Rn", []>;
+def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
+                        (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
+                        "\t$Ra, $Rd, $Rm, $Rn", []>;
+
+//===----------------------------------------------------------------------===//
+//  Misc. Arithmetic Instructions.
+//
+
+class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
+      InstrItinClass itin, string opc, string asm, list<dag> pattern>
+  : T2ThreeReg<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-22} = 0b01010;
+  let Inst{21-20} = op1;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-6} = 0b10;
+  let Inst{5-4} = op2;
+  let Rn{3-0} = Rm;
+}
+
+def t2CLZ : T2I_misc<0b11, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+                    "clz", "\t$Rd, $Rm", [(set rGPR:$Rd, (ctlz rGPR:$Rm))]>;
+
+def t2RBIT : T2I_misc<0b01, 0b10, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+                      "rbit", "\t$Rd, $Rm",
+                      [(set rGPR:$Rd, (ARMrbit rGPR:$Rm))]>;
+
+def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+                 "rev", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (bswap rGPR:$Rm))]>;
+
+def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+                       "rev16", ".w\t$Rd, $Rm",
+                [(set rGPR:$Rd,
+                    (or (and (srl rGPR:$Rm, (i32 8)), 0xFF),
+                        (or (and (shl rGPR:$Rm, (i32 8)), 0xFF00),
+                            (or (and (srl rGPR:$Rm, (i32 8)), 0xFF0000),
+                               (and (shl rGPR:$Rm, (i32 8)), 0xFF000000)))))]>;
+
+def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
+                       "revsh", ".w\t$Rd, $Rm",
+                 [(set rGPR:$Rd,
+                    (sext_inreg
+                      (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)),
+                          (shl rGPR:$Rm, (i32 8))), i16))]>;
+
+def t2PKHBT : T2ThreeReg<
+            (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
+                  IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
+                  [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF),
+                                      (and (shl rGPR:$Rm, lsl_amt:$sh),
+                                           0xFFFF0000)))]>,
+                  Requires<[HasT2ExtractPack, IsThumb2]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-20} = 0b01100;
+  let Inst{5} = 0; // BT form
+  let Inst{4} = 0;
+
+  bits<8> sh;
+  let Inst{14-12} = sh{7-5};
+  let Inst{7-6}   = sh{4-3};
+}
+
+// Alternate cases for PKHBT where identities eliminate some nodes.
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (and rGPR:$src2, 0xFFFF0000)),
+            (t2PKHBT rGPR:$src1, rGPR:$src2, 0)>,
+            Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF), (shl rGPR:$src2, imm16_31:$sh)),
+            (t2PKHBT rGPR:$src1, rGPR:$src2, (lsl_shift_imm imm16_31:$sh))>,
+            Requires<[HasT2ExtractPack, IsThumb2]>;
+
+// Note: Shifts of 1-15 bits will be transformed to srl instead of sra and
+// will match the pattern below.
+def t2PKHTB : T2ThreeReg<
+                  (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
+                  IIC_iBITsi, "pkhtb", "\t$Rd, $Rn, $Rm$sh",
+                  [(set rGPR:$Rd, (or (and rGPR:$Rn, 0xFFFF0000),
+                                       (and (sra rGPR:$Rm, asr_amt:$sh),
+                                            0xFFFF)))]>,
+                  Requires<[HasT2ExtractPack, IsThumb2]> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-20} = 0b01100;
+  let Inst{5} = 1; // TB form
+  let Inst{4} = 0;
+
+  bits<8> sh;
+  let Inst{14-12} = sh{7-5};
+  let Inst{7-6}   = sh{4-3};
+}
+
+// Alternate cases for PKHTB where identities eliminate some nodes.  Note that
+// a shift amount of 0 is *not legal* here, it is PKHBT instead.
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000), (srl rGPR:$src2, imm16_31:$sh)),
+            (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm16_31:$sh))>,
+            Requires<[HasT2ExtractPack, IsThumb2]>;
+def : T2Pat<(or (and rGPR:$src1, 0xFFFF0000),
+                (and (srl rGPR:$src2, imm1_15:$sh), 0xFFFF)),
+            (t2PKHTB rGPR:$src1, rGPR:$src2, (asr_shift_imm imm1_15:$sh))>,
+            Requires<[HasT2ExtractPack, IsThumb2]>;
+
+//===----------------------------------------------------------------------===//
+//  Comparison Instructions...
+//
+defm t2CMP  : T2I_cmp_irs<0b1101, "cmp",
+                          IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
+                          BinOpFrag<(ARMcmp node:$LHS, node:$RHS)>>;
+
+def : T2Pat<(ARMcmpZ  GPR:$lhs, t2_so_imm:$imm),
+            (t2CMPri  GPR:$lhs, t2_so_imm:$imm)>;
+def : T2Pat<(ARMcmpZ  GPR:$lhs, rGPR:$rhs),
+            (t2CMPrr  GPR:$lhs, rGPR:$rhs)>;
+def : T2Pat<(ARMcmpZ  GPR:$lhs, t2_so_reg:$rhs),
+            (t2CMPrs  GPR:$lhs, t2_so_reg:$rhs)>;
+
+//FIXME: Disable CMN, as CCodes are backwards from compare expectations
+//       Compare-to-zero still works out, just not the relationals
+//defm t2CMN  : T2I_cmp_irs<0b1000, "cmn",
+//                          BinOpFrag<(ARMcmp node:$LHS,(ineg node:$RHS))>>;
+defm t2CMNz : T2I_cmp_irs<0b1000, "cmn",
+                          IIC_iCMPi, IIC_iCMPr, IIC_iCMPsi,
+                          BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))>>;
+
+//def : T2Pat<(ARMcmp  GPR:$src, t2_so_imm_neg:$imm),
+//            (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
+
+def : T2Pat<(ARMcmpZ  GPR:$src, t2_so_imm_neg:$imm),
+            (t2CMNzri GPR:$src, t2_so_imm_neg:$imm)>;
+
+defm t2TST  : T2I_cmp_irs<0b0000, "tst",
+                          IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+                         BinOpFrag<(ARMcmpZ (and_su node:$LHS, node:$RHS), 0)>>;
+defm t2TEQ  : T2I_cmp_irs<0b0100, "teq",
+                          IIC_iTSTi, IIC_iTSTr, IIC_iTSTsi,
+                         BinOpFrag<(ARMcmpZ (xor_su node:$LHS, node:$RHS), 0)>>;
+
+// Conditional moves
+// FIXME: should be able to write a pattern for ARMcmov, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let neverHasSideEffects = 1 in {
+def t2MOVCCr : T2TwoReg<
+                   (outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm), IIC_iCMOVr,
+                   "mov", ".w\t$Rd, $Rm",
+   [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
+                RegConstraint<"$false = $Rd"> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-21} = 0b0010;
+  let Inst{20} = 0; // The S bit.
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{14-12} = 0b000;
+  let Inst{7-4} = 0b0000;
+}
+
+let isMoveImm = 1 in
+def t2MOVCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
+                   IIC_iCMOVi, "mov", ".w\t$Rd, $imm",
+[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
+                   RegConstraint<"$false = $Rd"> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 0;
+  let Inst{24-21} = 0b0010;
+  let Inst{20} = 0; // The S bit.
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{15} = 0;
+}
+
+let isMoveImm = 1 in
+def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm_hilo16:$imm),
+                      IIC_iCMOVi,
+                      "movw", "\t$Rd, $imm", []>,
+                      RegConstraint<"$false = $Rd"> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 1;
+  let Inst{24-21} = 0b0010;
+  let Inst{20} = 0; // The S bit.
+  let Inst{15} = 0;
+
+  bits<4> Rd;
+  bits<16> imm;
+
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = imm{15-12};
+  let Inst{26}    = imm{11};
+  let Inst{14-12} = imm{10-8};
+  let Inst{7-0}   = imm{7-0};
+}
+
+let isMoveImm = 1 in
+def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst),
+                               (ins rGPR:$false, i32imm:$src, pred:$p),
+                    IIC_iCMOVix2, []>, RegConstraint<"$false = $dst">;
+
+let isMoveImm = 1 in
+def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
+                   IIC_iCMOVi, "mvn", ".w\t$Rd, $imm",
+[/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm,
+                   imm:$cc, CCR:$ccr))*/]>,
+                   RegConstraint<"$false = $Rd"> {
+  let Inst{31-27} = 0b11110;
+  let Inst{25} = 0;
+  let Inst{24-21} = 0b0011;
+  let Inst{20} = 0; // The S bit.
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{15} = 0;
+}
+
+class T2I_movcc_sh<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+                   string opc, string asm, list<dag> pattern>
+  : T2TwoRegShiftImm<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b01;
+  let Inst{24-21} = 0b0010;
+  let Inst{20} = 0; // The S bit.
+  let Inst{19-16} = 0b1111; // Rn
+  let Inst{5-4} = opcod; // Shift type.
+}
+def t2MOVCClsl : T2I_movcc_sh<0b00, (outs rGPR:$Rd),
+                             (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+                             IIC_iCMOVsi, "lsl", ".w\t$Rd, $Rm, $imm", []>,
+                 RegConstraint<"$false = $Rd">;
+def t2MOVCClsr : T2I_movcc_sh<0b01, (outs rGPR:$Rd),
+                             (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+                             IIC_iCMOVsi, "lsr", ".w\t$Rd, $Rm, $imm", []>,
+                 RegConstraint<"$false = $Rd">;
+def t2MOVCCasr : T2I_movcc_sh<0b10, (outs rGPR:$Rd),
+                             (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+                             IIC_iCMOVsi, "asr", ".w\t$Rd, $Rm, $imm", []>,
+                 RegConstraint<"$false = $Rd">;
+def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
+                             (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
+                             IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
+                 RegConstraint<"$false = $Rd">;
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def t2DMB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+                  "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>,
+                  Requires<[IsThumb, HasDB]> {
+  bits<4> opt;
+  let Inst{31-4} = 0xf3bf8f5;
+  let Inst{3-0} = opt;
+}
+}
+
+def t2DSB : AInoP<(outs), (ins memb_opt:$opt), ThumbFrm, NoItinerary,
+                  "dsb", "\t$opt",
+                  [/* For disassembly only; pattern left blank */]>,
+                  Requires<[IsThumb, HasDB]> {
+  bits<4> opt;
+  let Inst{31-4} = 0xf3bf8f4;
+  let Inst{3-0} = opt;
+}
+
+// ISB has only full system option -- for disassembly only
+def t2ISB : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "isb", "",
+                  [/* For disassembly only; pattern left blank */]>,
+                  Requires<[IsThumb2, HasV7]> {
+  let Inst{31-4} = 0xf3bf8f6;
+  let Inst{3-0} = 0b1111;
+}
+
+class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+                InstrItinClass itin, string opc, string asm, string cstr,
+                list<dag> pattern, bits<4> rt2 = 0b1111>
+  : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0001101;
+  let Inst{11-8} = rt2;
+  let Inst{7-6} = 0b01;
+  let Inst{5-4} = opcod;
+  let Inst{3-0} = 0b1111;
+
+  bits<4> Rn;
+  bits<4> Rt;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;
+}
+class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+                InstrItinClass itin, string opc, string asm, string cstr,
+                list<dag> pattern, bits<4> rt2 = 0b1111>
+  : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0001100;
+  let Inst{11-8} = rt2;
+  let Inst{7-6} = 0b01;
+  let Inst{5-4} = opcod;
+
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rt;
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;
+}
+
+let mayLoad = 1 in {
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
+                         Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, [$Rn]",
+                         "", []>;
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
+                         Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, [$Rn]",
+                         "", []>;
+def t2LDREX  : Thumb2I<(outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone,
+                       Size4Bytes, NoItinerary,
+                       "ldrex", "\t$Rt, [$Rn]", "",
+                      []> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0000101;
+  let Inst{11-8} = 0b1111;
+  let Inst{7-0} = 0b00000000; // imm8 = 0
+
+  bits<4> Rn;
+  bits<4> Rt;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;
+}
+def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins rGPR:$Rn),
+                         AddrModeNone, Size4Bytes, NoItinerary,
+                         "ldrexd", "\t$Rt, $Rt2, [$Rn]", "",
+                         [], {?, ?, ?, ?}> {
+  bits<4> Rt2;
+  let Inst{11-8} = Rt2;
+}
+}
+
+let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
+def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
+                         AddrModeNone, Size4Bytes, NoItinerary,
+                         "strexb", "\t$Rd, $Rt, [$Rn]", "", []>;
+def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
+                         AddrModeNone, Size4Bytes, NoItinerary,
+                         "strexh", "\t$Rd, $Rt, [$Rn]", "", []>;
+def t2STREX  : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn),
+                       AddrModeNone, Size4Bytes, NoItinerary,
+                       "strex", "\t$Rd, $Rt, [$Rn]", "",
+                      []> {
+  let Inst{31-27} = 0b11101;
+  let Inst{26-20} = 0b0000100;
+  let Inst{7-0} = 0b00000000; // imm8 = 0
+
+  bits<4> Rd;
+  bits<4> Rn;
+  bits<4> Rt;
+  let Inst{11-8}  = Rd;
+  let Inst{19-16} = Rn;
+  let Inst{15-12} = Rt;
+}
+def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
+                         (ins rGPR:$Rt, rGPR:$Rt2, rGPR:$Rn),
+                         AddrModeNone, Size4Bytes, NoItinerary,
+                         "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]", "", [],
+                         {?, ?, ?, ?}> {
+  bits<4> Rt2;
+  let Inst{11-8} = Rt2;
+}
+}
+
+// Clear-Exclusive is for disassembly only.
+def t2CLREX : T2XI<(outs), (ins), NoItinerary, "clrex",
+                   [/* For disassembly only; pattern left blank */]>,
+            Requires<[IsThumb2, HasV7]>  {
+  let Inst{31-16} = 0xf3bf;
+  let Inst{15-14} = 0b10;
+  let Inst{13} = 0;
+  let Inst{12} = 0;
+  let Inst{11-8} = 0b1111;
+  let Inst{7-4} = 0b0010;
+  let Inst{3-0} = 0b1111;
+}
+
+//===----------------------------------------------------------------------===//
+// TLS Instructions
+//
+
+// __aeabi_read_tp preserves the registers r1-r3.
+let isCall = 1,
+  Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
+  def t2TPsoft : T2XI<(outs), (ins), IIC_Br,
+                     "bl\t__aeabi_read_tp",
+                     [(set R0, ARMthread_pointer)]> {
+    let Inst{31-27} = 0b11110;
+    let Inst{15-14} = 0b11;
+    let Inst{12} = 1;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// SJLJ Exception handling intrinsics
+//   eh_sjlj_setjmp() is an instruction sequence to store the return
+//   address and save #0 in R0 for the non-longjmp case.
+//   Since by its nature we may be coming from some other function to get
+//   here, and we're using the stack frame for the containing function to
+//   save/restore registers, we can't keep anything live in regs across
+//   the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+//   when we get here from a longjmp(). We force everthing out of registers
+//   except for our own input by listing the relevant registers in Defs. By
+//   doing so, we also cause the prologue/epilogue code to actively preserve
+//   all of the callee-saved resgisters, which is exactly what we want.
+//   $val is a scratch register for our use.
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR,  D0,
+    D1,  D2,  D3,  D4,  D5,  D6,  D7,  D8,  D9,  D10, D11, D12, D13, D14, D15,
+    D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
+    D31 ], hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
+  def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
+                               AddrModeNone, SizeSpecial, NoItinerary, "", "",
+                          [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
+                             Requires<[IsThumb2, HasVFP2]>;
+}
+
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR ],
+  hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
+  def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
+                               AddrModeNone, SizeSpecial, NoItinerary, "", "",
+                          [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
+                                  Requires<[IsThumb2, NoVFP]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Control-Flow Instructions
+//
+
+// FIXME: remove when we have a way to marking a MI with these properties.
+// FIXME: $dst1 should be a def. But the extra ops must be in the end of the
+// operand list.
+// FIXME: Should pc be an implicit operand like PICADD, etc?
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+    hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
+def t2LDMIA_RET: T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+                                        reglist:$regs, variable_ops),
+                        IIC_iLoad_mBr,
+                        "ldmia${p}.w\t$Rn!, $regs",
+                        "$Rn = $wb", []> {
+  bits<4>  Rn;
+  bits<16> regs;
+
+  let Inst{31-27} = 0b11101;
+  let Inst{26-25} = 0b00;
+  let Inst{24-23} = 0b01;     // Increment After
+  let Inst{22}    = 0;
+  let Inst{21}    = 1;        // Writeback
+  let Inst{20}    = 1;
+  let Inst{19-16} = Rn;
+  let Inst{15-0}  = regs;
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+let isPredicable = 1 in
+def t2B   : T2XI<(outs), (ins uncondbrtarget:$target), IIC_Br,
+                 "b.w\t$target",
+                 [(br bb:$target)]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 1;
+
+  bits<20> target;
+  let Inst{26} = target{19};
+  let Inst{11} = target{18};
+  let Inst{13} = target{17};
+  let Inst{21-16} = target{16-11};
+  let Inst{10-0} = target{10-0};
+}
+
+let isNotDuplicable = 1, isIndirectBranch = 1 in {
+def t2BR_JT : t2PseudoInst<(outs),
+          (ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id),
+           SizeSpecial, IIC_Br,
+          [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>;
+
+// FIXME: Add a non-pc based case that can be predicated.
+def t2TBB_JT : t2PseudoInst<(outs),
+        (ins GPR:$index, i32imm:$jt, i32imm:$id),
+         SizeSpecial, IIC_Br, []>;
+
+def t2TBH_JT : t2PseudoInst<(outs),
+        (ins GPR:$index, i32imm:$jt, i32imm:$id),
+         SizeSpecial, IIC_Br, []>;
+
+def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
+                    "tbb", "\t[$Rn, $Rm]", []> {
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{31-20} = 0b111010001101;
+  let Inst{19-16} = Rn;
+  let Inst{15-5} = 0b11110000000;
+  let Inst{4} = 0; // B form
+  let Inst{3-0} = Rm;
+}
+
+def t2TBH : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
+                   "tbh", "\t[$Rn, $Rm, lsl #1]", []> {
+  bits<4> Rn;
+  bits<4> Rm;
+  let Inst{31-20} = 0b111010001101;
+  let Inst{19-16} = Rn;
+  let Inst{15-5} = 0b11110000000;
+  let Inst{4} = 1; // H form
+  let Inst{3-0} = Rm;
+}
+} // isNotDuplicable, isIndirectBranch
+
+} // isBranch, isTerminator, isBarrier
+
+// FIXME: should be able to write a pattern for ARMBrcond, but can't use
+// a two-value operand where a dag node expects two operands. :(
+let isBranch = 1, isTerminator = 1 in
+def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
+                "b", ".w\t$target",
+                [/*(ARMbrcond bb:$target, imm:$cc)*/]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+
+  bits<4> p;
+  let Inst{25-22} = p;
+
+  bits<21> target;
+  let Inst{26} = target{20};
+  let Inst{11} = target{19};
+  let Inst{13} = target{18};
+  let Inst{21-16} = target{17-12};
+  let Inst{10-0} = target{11-1};
+}
+
+
+// IT block
+let Defs = [ITSTATE] in
+def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
+                    AddrModeNone, Size2Bytes,  IIC_iALUx,
+                    "it$mask\t$cc", "", []> {
+  // 16-bit instruction.
+  let Inst{31-16} = 0x0000;
+  let Inst{15-8} = 0b10111111;
+
+  bits<4> cc;
+  bits<4> mask;
+  let Inst{7-4} = cc;
+  let Inst{3-0} = mask;
+}
+
+// Branch and Exchange Jazelle -- for disassembly only
+// Rm = Inst{19-16}
+def t2BXJ : T2I<(outs), (ins rGPR:$func), NoItinerary, "bxj", "\t$func",
+              [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{26} = 0;
+  let Inst{25-20} = 0b111100;
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+
+  bits<4> func;
+  let Inst{19-16} = func;
+}
+
+// Change Processor State is a system instruction -- for disassembly and
+// parsing only.
+// FIXME: Since the asm parser has currently no clean way to handle optional
+// operands, create 3 versions of the same instruction. Once there's a clean
+// framework to represent optional operands, change this behavior.
+class t2CPS<dag iops, string asm_op> : T2XI<(outs), iops, NoItinerary,
+            !strconcat("cps", asm_op),
+            [/* For disassembly only; pattern left blank */]> {
+  bits<2> imod;
+  bits<3> iflags;
+  bits<5> mode;
+  bit M;
+
+  let Inst{31-27} = 0b11110;
+  let Inst{26}    = 0;
+  let Inst{25-20} = 0b111010;
+  let Inst{19-16} = 0b1111;
+  let Inst{15-14} = 0b10;
+  let Inst{12}    = 0;
+  let Inst{10-9}  = imod;
+  let Inst{8}     = M;
+  let Inst{7-5}   = iflags;
+  let Inst{4-0}   = mode;
+}
+
+let M = 1 in
+  def t2CPS3p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags, i32imm:$mode),
+                      "$imod.w\t$iflags, $mode">;
+let mode = 0, M = 0 in
+  def t2CPS2p : t2CPS<(ins imod_op:$imod, iflags_op:$iflags),
+                      "$imod.w\t$iflags">;
+let imod = 0, iflags = 0, M = 1 in
+  def t2CPS1p : t2CPS<(ins i32imm:$mode), "\t$mode">;
+
+// A6.3.4 Branches and miscellaneous control
+// Table A6-14 Change Processor State, and hint instructions
+// Helper class for disassembly only.
+class T2I_hint<bits<8> op7_0, string opc, string asm>
+  : T2I<(outs), (ins), NoItinerary, opc, asm,
+        [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-20} = 0xf3a;
+  let Inst{19-16} = 0b1111;
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+  let Inst{10-8} = 0b000;
+  let Inst{7-0} = op7_0;
+}
+
+def t2NOP   : T2I_hint<0b00000000, "nop",   ".w">;
+def t2YIELD : T2I_hint<0b00000001, "yield", ".w">;
+def t2WFE   : T2I_hint<0b00000010, "wfe",   ".w">;
+def t2WFI   : T2I_hint<0b00000011, "wfi",   ".w">;
+def t2SEV   : T2I_hint<0b00000100, "sev",   ".w">;
+
+def t2DBG : T2I<(outs),(ins i32imm:$opt), NoItinerary, "dbg", "\t$opt",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-20} = 0xf3a;
+  let Inst{15-14} = 0b10;
+  let Inst{12} = 0;
+  let Inst{10-8} = 0b000;
+  let Inst{7-4} = 0b1111;
+
+  bits<4> opt;
+  let Inst{3-0} = opt;
+}
+
+// Secure Monitor Call is a system instruction -- for disassembly only
+// Option = Inst{19-16}
+def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{31-27} = 0b11110;
+  let Inst{26-20} = 0b1111111;
+  let Inst{15-12} = 0b1000;
+
+  bits<4> opt;
+  let Inst{19-16} = opt;
+}
+
+class T2SRS<bits<12> op31_20,
+           dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-20} = op31_20{11-0};
+
+  bits<5> mode;
+  let Inst{4-0} = mode{4-0};
+}
+
+// Store Return State is a system instruction -- for disassembly only
+def t2SRSDBW : T2SRS<0b111010000010,
+                   (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2SRSDB  : T2SRS<0b111010000000,
+                   (outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2SRSIAW : T2SRS<0b111010011010,
+                   (outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2SRSIA  : T2SRS<0b111010011000,
+                   (outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
+                   [/* For disassembly only; pattern left blank */]>;
+
+// Return From Exception is a system instruction -- for disassembly only
+
+class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-20} = op31_20{11-0};
+
+  bits<4> Rn;
+  let Inst{19-16} = Rn;
+}
+
+def t2RFEDBW : T2RFE<0b111010000011,
+                   (outs), (ins rGPR:$Rn), NoItinerary, "rfedb", "\t$Rn!",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2RFEDB  : T2RFE<0b111010000001,
+                   (outs), (ins rGPR:$Rn), NoItinerary, "rfeab", "\t$Rn",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2RFEIAW : T2RFE<0b111010011011,
+                   (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn!",
+                   [/* For disassembly only; pattern left blank */]>;
+def t2RFEIA  : T2RFE<0b111010011001,
+                   (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn",
+                   [/* For disassembly only; pattern left blank */]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//
+
+// 32-bit immediate using movw + movt.
+// This is a single pseudo instruction to make it re-materializable.
+// FIXME: Remove this when we can do generalized remat.
+let isReMaterializable = 1, isMoveImm = 1 in
+def t2MOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), IIC_iMOVix2,
+                            [(set rGPR:$dst, (i32 imm:$src))]>,
+                            Requires<[IsThumb, HasV6T2]>;
+
+// Pseudo instruction that combines movw + movt + add pc (if pic).
+// It also makes it possible to rematerialize the instructions.
+// FIXME: Remove this when we can do generalized remat and when machine licm
+// can properly the instructions.
+let isReMaterializable = 1 in {
+def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+                                IIC_iMOVix2addpc,
+                          [(set rGPR:$dst, (ARMWrapperPIC tglobaladdr:$addr))]>,
+                          Requires<[IsThumb2, UseMovt]>;
+
+def t2MOV_ga_dyn : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
+                              IIC_iMOVix2,
+                          [(set rGPR:$dst, (ARMWrapperDYN tglobaladdr:$addr))]>,
+                          Requires<[IsThumb2, UseMovt]>;
+}
+
+// ConstantPool, GlobalAddress, and JumpTable
+def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>,
+           Requires<[IsThumb2, DontUseMovt]>;
+def : T2Pat<(ARMWrapper  tconstpool  :$dst), (t2LEApcrel tconstpool  :$dst)>;
+def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
+           Requires<[IsThumb2, UseMovt]>;
+
+def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+            (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def t2LDRpci_pic : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+                   IIC_iLoadiALU,
+              [(set rGPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+                                           imm:$cp))]>,
+               Requires<[IsThumb2]>;
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+
+class T2SpecialReg<bits<12> op31_20, bits<2> op15_14, bits<1> op12,
+          dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : T2I<oops, iops, itin, opc, asm, pattern> {
+  let Inst{31-20} = op31_20{11-0};
+  let Inst{15-14} = op15_14{1-0};
+  let Inst{12} = op12{0};
+}
+
+class T2MRS<bits<12> op31_20, bits<2> op15_14, bits<1> op12,
+          dag oops, dag iops, InstrItinClass itin,
+          string opc, string asm, list<dag> pattern>
+  : T2SpecialReg<op31_20, op15_14, op12, oops, iops, itin, opc, asm, pattern> {
+  bits<4> Rd;
+  let Inst{11-8} = Rd;
+  let Inst{19-16} = 0b1111;
+}
+
+def t2MRS : T2MRS<0b111100111110, 0b10, 0,
+                (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr",
+                [/* For disassembly only; pattern left blank */]>;
+def t2MRSsys : T2MRS<0b111100111111, 0b10, 0,
+                   (outs rGPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, spsr",
+                   [/* For disassembly only; pattern left blank */]>;
+
+// Move from ARM core register to Special Register
+//
+// No need to have both system and application versions, the encodings are the
+// same and the assembly parser has no way to distinguish between them. The mask
+// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains
+// the mask with the fields to be accessed in the special register.
+def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */,
+                         0 /* op12 */, (outs), (ins msr_mask:$mask, rGPR:$Rn),
+                         NoItinerary, "msr", "\t$mask, $Rn",
+                         [/* For disassembly only; pattern left blank */]> {
+  bits<5> mask;
+  bits<4> Rn;
+  let Inst{19-16} = Rn;
+  let Inst{20}    = mask{4}; // R Bit
+  let Inst{13}    = 0b0;
+  let Inst{11-8}  = mask{3-0};
+}
+
+//===----------------------------------------------------------------------===//
+// Move between coprocessor and ARM core register -- for disassembly only
+//
+
+class t2MovRCopro<string opc, bit direction>
+  : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
+                       GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+          !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+          [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-24} = 0b1110;
+  let Inst{20} = direction;
+  let Inst{4} = 1;
+
+  bits<4> Rt;
+  bits<4> cop;
+  bits<3> opc1;
+  bits<3> opc2;
+  bits<4> CRm;
+  bits<4> CRn;
+
+  let Inst{15-12} = Rt;
+  let Inst{11-8}  = cop;
+  let Inst{23-21} = opc1;
+  let Inst{7-5}   = opc2;
+  let Inst{3-0}   = CRm;
+  let Inst{19-16} = CRn;
+}
+
+def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */>;
+def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */>;
+
+class t2MovRRCopro<string opc, bit direction>
+  : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+          !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"),
+          [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-24} = 0b1100;
+  let Inst{23-21} = 0b010;
+  let Inst{20} = direction;
+
+  bits<4> Rt;
+  bits<4> Rt2;
+  bits<4> cop;
+  bits<4> opc1;
+  bits<4> CRm;
+
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+  let Inst{11-8}  = cop;
+  let Inst{7-4}   = opc1;
+  let Inst{3-0}   = CRm;
+}
+
+def t2MCRR2 : t2MovRRCopro<"mcrr2",
+                           0 /* from ARM core register to coprocessor */>;
+def t2MRRC2 : t2MovRRCopro<"mrrc2",
+                           1 /* from coprocessor to ARM core register */>;
+
+//===----------------------------------------------------------------------===//
+// Other Coprocessor Instructions.  For disassembly only.
+//
+
+def t2CDP2 : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
+                   c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+                   "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{27-24} = 0b1110;
+
+  bits<4> opc1;
+  bits<4> CRn;
+  bits<4> CRd;
+  bits<4> cop;
+  bits<3> opc2;
+  bits<4> CRm;
+
+  let Inst{3-0}   = CRm;
+  let Inst{4}     = 0;
+  let Inst{7-5}   = opc2;
+  let Inst{11-8}  = cop;
+  let Inst{15-12} = CRd;
+  let Inst{19-16} = CRn;
+  let Inst{23-20} = opc1;
+}
diff --git a/final/lib/Target/ARM/ARMInstrVFP.td b/final/lib/Target/ARM/ARMInstrVFP.td
new file mode 100644
index 00000000000..a277ef797ea
--- /dev/null
+++ b/final/lib/Target/ARM/ARMInstrVFP.td
@@ -0,0 +1,1113 @@
+//===- ARMInstrVFP.td - VFP support for ARM ----------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the ARM VFP instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+def SDT_FTOI    : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDT_ITOF    : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+def SDT_CMPFP0  : SDTypeProfile<0, 1, [SDTCisFP<0>]>;
+def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
+                                       SDTCisSameAs<1, 2>]>;
+
+def arm_ftoui  : SDNode<"ARMISD::FTOUI",   SDT_FTOI>;
+def arm_ftosi  : SDNode<"ARMISD::FTOSI",   SDT_FTOI>;
+def arm_sitof  : SDNode<"ARMISD::SITOF",   SDT_ITOF>;
+def arm_uitof  : SDNode<"ARMISD::UITOF",   SDT_ITOF>;
+def arm_fmstat : SDNode<"ARMISD::FMSTAT",  SDTNone, [SDNPInGlue, SDNPOutGlue]>;
+def arm_cmpfp  : SDNode<"ARMISD::CMPFP",   SDT_ARMCmp, [SDNPOutGlue]>;
+def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0, [SDNPOutGlue]>;
+def arm_fmdrr  : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
+
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+//
+
+def vfp_f32imm : Operand<f32>,
+                 PatLeaf<(f32 fpimm), [{
+      return ARM::getVFPf32Imm(N->getValueAPF()) != -1;
+    }]> {
+  let PrintMethod = "printVFPf32ImmOperand";
+}
+
+def vfp_f64imm : Operand<f64>,
+                 PatLeaf<(f64 fpimm), [{
+      return ARM::getVFPf64Imm(N->getValueAPF()) != -1;
+    }]> {
+  let PrintMethod = "printVFPf64ImmOperand";
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Load / store Instructions.
+//
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+
+def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr),
+                 IIC_fpLoad64, "vldr", ".64\t$Dd, $addr",
+                 [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>;
+
+def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr),
+                 IIC_fpLoad32, "vldr", ".32\t$Sd, $addr",
+                 [(set SPR:$Sd, (load addrmode5:$addr))]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
+
+} // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
+
+def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr),
+                 IIC_fpStore64, "vstr", ".64\t$Dd, $addr",
+                 [(store (f64 DPR:$Dd), addrmode5:$addr)]>;
+
+def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr),
+                 IIC_fpStore32, "vstr", ".32\t$Sd, $addr",
+                 [(store SPR:$Sd, addrmode5:$addr)]> {
+  // Some single precision VFP instructions may be executed on both NEON and VFP
+  // pipelines.
+  let D = VFPNeonDomain;
+}
+
+//===----------------------------------------------------------------------===//
+//  Load / store multiple Instructions.
+//
+
+multiclass vfp_ldst_mult<string asm, bit L_bit,
+                         InstrItinClass itin, InstrItinClass itin_upd> {
+  // Double Precision
+  def DIA :
+    AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+          IndexModeNone, itin,
+          !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def DIA_UPD :
+    AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+          IndexModeUpd, itin_upd,
+          !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+  def DDB :
+    AXDI4<(outs), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+          IndexModeNone, itin,
+          !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+  }
+  def DDB_UPD :
+    AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+          IndexModeUpd, itin_upd,
+          !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+  }
+
+  // Single Precision
+  def SIA :
+    AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+          IndexModeNone, itin,
+          !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+
+    // Some single precision VFP instructions may be executed on both NEON and
+    // VFP pipelines.
+    let D = VFPNeonDomain;
+  }
+  def SIA_UPD :
+    AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+          IndexModeUpd, itin_upd,
+          !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b01;       // Increment After
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+
+    // Some single precision VFP instructions may be executed on both NEON and
+    // VFP pipelines.
+    let D = VFPNeonDomain;
+  }
+  def SDB :
+    AXSI4<(outs), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+          IndexModeNone, itin,
+          !strconcat(asm, "db${p}\t$Rn, $regs"), "", []> {
+    let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{21}    = 0;          // No writeback
+    let Inst{20}    = L_bit;
+
+    // Some single precision VFP instructions may be executed on both NEON and
+    // VFP pipelines.
+    let D = VFPNeonDomain;
+  }
+  def SDB_UPD :
+    AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+          IndexModeUpd, itin_upd,
+          !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+    let Inst{24-23} = 0b10;       // Decrement Before
+    let Inst{21}    = 1;          // Writeback
+    let Inst{20}    = L_bit;
+
+    // Some single precision VFP instructions may be executed on both NEON and
+    // VFP pipelines.
+    let D = VFPNeonDomain;
+  }
+}
+
+let neverHasSideEffects = 1 in {
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
+defm VLDM : vfp_ldst_mult<"vldm", 1, IIC_fpLoad_m, IIC_fpLoad_mu>;
+
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>;
+
+} // neverHasSideEffects
+
+def : MnemonicAlias<"vldm", "vldmia">;
+def : MnemonicAlias<"vstm", "vstmia">;
+
+// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
+
+//===----------------------------------------------------------------------===//
+// FP Binary Operations.
+//
+
+def VADDD  : ADbI<0b11100, 0b11, 0, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                  IIC_fpALU64, "vadd", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fadd DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VADDS  : ASbIn<0b11100, 0b11, 0, 0,
+                   (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                   IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm",
+                   [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VSUBD  : ADbI<0b11100, 0b11, 1, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                  IIC_fpALU64, "vsub", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fsub DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VSUBS  : ASbIn<0b11100, 0b11, 1, 0,
+                   (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                   IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm",
+                   [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VDIVD  : ADbI<0b11101, 0b00, 0, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                  IIC_fpDIV64, "vdiv", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fdiv DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VDIVS  : ASbI<0b11101, 0b00, 0, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                  IIC_fpDIV32, "vdiv", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fdiv SPR:$Sn, SPR:$Sm))]>;
+
+def VMULD  : ADbI<0b11100, 0b10, 0, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                  IIC_fpMUL64, "vmul", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fmul DPR:$Dn, (f64 DPR:$Dm)))]>;
+
+def VMULS  : ASbIn<0b11100, 0b10, 0, 0,
+                   (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                   IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm",
+                   [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VNMULD : ADbI<0b11100, 0b10, 1, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                  IIC_fpMUL64, "vnmul", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fneg (fmul DPR:$Dn, (f64 DPR:$Dm))))]>;
+
+def VNMULS : ASbI<0b11100, 0b10, 1, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                  IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+// Match reassociated forms only if not sign dependent rounding.
+def : Pat<(fmul (fneg DPR:$a), (f64 DPR:$b)),
+          (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+def : Pat<(fmul (fneg SPR:$a), SPR:$b),
+          (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+
+// These are encoded as unary instructions.
+let Defs = [FPSCR] in {
+def VCMPED : ADuI<0b11101, 0b11, 0b0100, 0b11, 0,
+                  (outs), (ins DPR:$Dd, DPR:$Dm),
+                  IIC_fpCMP64, "vcmpe", ".f64\t$Dd, $Dm",
+                  [(arm_cmpfp DPR:$Dd, (f64 DPR:$Dm))]>;
+
+def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0,
+                  (outs), (ins SPR:$Sd, SPR:$Sm),
+                  IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm",
+                  [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPD  : ADuI<0b11101, 0b11, 0b0100, 0b01, 0,
+                  (outs), (ins DPR:$Dd, DPR:$Dm),
+                  IIC_fpCMP64, "vcmp", ".f64\t$Dd, $Dm",
+                  [/* For disassembly only; pattern left blank */]>;
+
+def VCMPS  : ASuI<0b11101, 0b11, 0b0100, 0b01, 0,
+                  (outs), (ins SPR:$Sd, SPR:$Sm),
+                  IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm",
+                  [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+} // Defs = [FPSCR]
+
+//===----------------------------------------------------------------------===//
+// FP Unary Operations.
+//
+
+def VABSD  : ADuI<0b11101, 0b11, 0b0000, 0b11, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dm),
+                  IIC_fpUNA64, "vabs", ".f64\t$Dd, $Dm",
+                  [(set DPR:$Dd, (fabs (f64 DPR:$Dm)))]>;
+
+def VABSS  : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0,
+                   (outs SPR:$Sd), (ins SPR:$Sm),
+                   IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm",
+                   [(set SPR:$Sd, (fabs SPR:$Sm))]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+let Defs = [FPSCR] in {
+def VCMPEZD : ADuI<0b11101, 0b11, 0b0101, 0b11, 0,
+                   (outs), (ins DPR:$Dd),
+                   IIC_fpCMP64, "vcmpe", ".f64\t$Dd, #0",
+                   [(arm_cmpfp0 (f64 DPR:$Dd))]> {
+  let Inst{3-0} = 0b0000;
+  let Inst{5}   = 0;
+}
+
+def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0,
+                   (outs), (ins SPR:$Sd),
+                   IIC_fpCMP32, "vcmpe", ".f32\t$Sd, #0",
+                   [(arm_cmpfp0 SPR:$Sd)]> {
+  let Inst{3-0} = 0b0000;
+  let Inst{5}   = 0;
+
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCMPZD  : ADuI<0b11101, 0b11, 0b0101, 0b01, 0,
+                   (outs), (ins DPR:$Dd),
+                   IIC_fpCMP64, "vcmp", ".f64\t$Dd, #0",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{3-0} = 0b0000;
+  let Inst{5}   = 0;
+}
+
+def VCMPZS  : ASuI<0b11101, 0b11, 0b0101, 0b01, 0,
+                   (outs), (ins SPR:$Sd),
+                   IIC_fpCMP32, "vcmp", ".f32\t$Sd, #0",
+                   [/* For disassembly only; pattern left blank */]> {
+  let Inst{3-0} = 0b0000;
+  let Inst{5}   = 0;
+
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+} // Defs = [FPSCR]
+
+def VCVTDS  : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
+                   (outs DPR:$Dd), (ins SPR:$Sm),
+                   IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
+                   [(set DPR:$Dd, (fextend SPR:$Sm))]> {
+  // Instruction operands.
+  bits<5> Dd;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
+}
+
+// Special case encoding: bits 11-8 is 0b1011.
+def VCVTSD  : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
+                    IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
+                    [(set SPR:$Sd, (fround DPR:$Dm))]> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Dm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+
+  let Inst{27-23} = 0b11101;
+  let Inst{21-16} = 0b110111;
+  let Inst{11-8}  = 0b1011;
+  let Inst{7-6}   = 0b11;
+  let Inst{4}     = 0;
+}
+
+// Between half-precision and single-precision.  For disassembly only.
+
+// FIXME: Verify encoding after integrated assembler is working.
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
+                 /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def : ARMPat<(f32_to_f16 SPR:$a),
+             (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
+                 /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def : ARMPat<(f16_to_f32 GPR:$a),
+             (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
+                 /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
+                 /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VNEGD  : ADuI<0b11101, 0b11, 0b0001, 0b01, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dm),
+                  IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm",
+                  [(set DPR:$Dd, (fneg (f64 DPR:$Dm)))]>;
+
+def VNEGS  : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0,
+                   (outs SPR:$Sd), (ins SPR:$Sm),
+                   IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm",
+                   [(set SPR:$Sd, (fneg SPR:$Sm))]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dm),
+                  IIC_fpSQRT64, "vsqrt", ".f64\t$Dd, $Dm",
+                  [(set DPR:$Dd, (fsqrt (f64 DPR:$Dm)))]>;
+
+def VSQRTS : ASuI<0b11101, 0b11, 0b0001, 0b11, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sm),
+                  IIC_fpSQRT32, "vsqrt", ".f32\t$Sd, $Sm",
+                  [(set SPR:$Sd, (fsqrt SPR:$Sm))]>;
+
+let neverHasSideEffects = 1 in {
+def VMOVD  : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
+                  (outs DPR:$Dd), (ins DPR:$Dm),
+                  IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", []>;
+
+def VMOVS  : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sm),
+                  IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", []>;
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// FP <-> GPR Copies.  Int <-> FP Conversions.
+//
+
+def VMOVRS : AVConv2I<0b11100001, 0b1010,
+                      (outs GPR:$Rt), (ins SPR:$Sn),
+                      IIC_fpMOVSI, "vmov", "\t$Rt, $Sn",
+                      [(set GPR:$Rt, (bitconvert SPR:$Sn))]> {
+  // Instruction operands.
+  bits<4> Rt;
+  bits<5> Sn;
+
+  // Encode instruction operands.
+  let Inst{19-16} = Sn{4-1};
+  let Inst{7}     = Sn{0};
+  let Inst{15-12} = Rt;
+
+  let Inst{6-5}   = 0b00;
+  let Inst{3-0}   = 0b0000;
+}
+
+def VMOVSR : AVConv4I<0b11100000, 0b1010,
+                      (outs SPR:$Sn), (ins GPR:$Rt),
+                      IIC_fpMOVIS, "vmov", "\t$Sn, $Rt",
+                      [(set SPR:$Sn, (bitconvert GPR:$Rt))]> {
+  // Instruction operands.
+  bits<5> Sn;
+  bits<4> Rt;
+
+  // Encode instruction operands.
+  let Inst{19-16} = Sn{4-1};
+  let Inst{7}     = Sn{0};
+  let Inst{15-12} = Rt;
+
+  let Inst{6-5}   = 0b00;
+  let Inst{3-0}   = 0b0000;
+}
+
+let neverHasSideEffects = 1 in {
+def VMOVRRD  : AVConv3I<0b11000101, 0b1011,
+                        (outs GPR:$Rt, GPR:$Rt2), (ins DPR:$Dm),
+                        IIC_fpMOVDI, "vmov", "\t$Rt, $Rt2, $Dm",
+                 [/* FIXME: Can't write pattern for multiple result instr*/]> {
+  // Instruction operands.
+  bits<5> Dm;
+  bits<4> Rt;
+  bits<4> Rt2;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+
+  let Inst{7-6} = 0b00;
+}
+
+def VMOVRRS  : AVConv3I<0b11000101, 0b1010,
+                      (outs GPR:$wb, GPR:$dst2), (ins SPR:$src1, SPR:$src2),
+                 IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2",
+                 [/* For disassembly only; pattern left blank */]> {
+  let Inst{7-6} = 0b00;
+}
+} // neverHasSideEffects
+
+// FMDHR: GPR -> SPR
+// FMDLR: GPR -> SPR
+
+def VMOVDRR : AVConv5I<0b11000100, 0b1011,
+                      (outs DPR:$Dm), (ins GPR:$Rt, GPR:$Rt2),
+                      IIC_fpMOVID, "vmov", "\t$Dm, $Rt, $Rt2",
+                      [(set DPR:$Dm, (arm_fmdrr GPR:$Rt, GPR:$Rt2))]> {
+  // Instruction operands.
+  bits<5> Dm;
+  bits<4> Rt;
+  bits<4> Rt2;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{15-12} = Rt;
+  let Inst{19-16} = Rt2;
+
+  let Inst{7-6}   = 0b00;
+}
+
+let neverHasSideEffects = 1 in
+def VMOVSRR : AVConv5I<0b11000100, 0b1010,
+                     (outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
+                IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
+                [/* For disassembly only; pattern left blank */]> {
+  let Inst{7-6} = 0b00;
+}
+
+// FMRDH: SPR -> GPR
+// FMRDL: SPR -> GPR
+// FMRRS: SPR -> GPR
+// FMRX:  SPR system reg -> GPR
+// FMSRR: GPR -> SPR
+// FMXR:  GPR -> VFP system reg
+
+
+// Int -> FP:
+
+class AVConv1IDs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+                        bits<4> opcod4, dag oops, dag iops,
+                        InstrItinClass itin, string opc, string asm,
+                        list<dag> pattern>
+  : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+             pattern> {
+  // Instruction operands.
+  bits<5> Dd;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
+}
+
+class AVConv1InSs_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+                         bits<4> opcod4, dag oops, dag iops,InstrItinClass itin,
+                         string opc, string asm, list<dag> pattern>
+  : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+              pattern> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+}
+
+def VSITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+                               (outs DPR:$Dd), (ins SPR:$Sm),
+                               IIC_fpCVTID, "vcvt", ".f64.s32\t$Dd, $Sm",
+                               [(set DPR:$Dd, (f64 (arm_sitof SPR:$Sm)))]> {
+  let Inst{7} = 1; // s32
+}
+
+def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+                                (outs SPR:$Sd),(ins SPR:$Sm),
+                                IIC_fpCVTIS, "vcvt", ".f32.s32\t$Sd, $Sm",
+                                [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> {
+  let Inst{7} = 1; // s32
+
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011,
+                               (outs DPR:$Dd), (ins SPR:$Sm),
+                               IIC_fpCVTID, "vcvt", ".f64.u32\t$Dd, $Sm",
+                               [(set DPR:$Dd, (f64 (arm_uitof SPR:$Sm)))]> {
+  let Inst{7} = 0; // u32
+}
+
+def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010,
+                                (outs SPR:$Sd), (ins SPR:$Sm),
+                                IIC_fpCVTIS, "vcvt", ".f32.u32\t$Sd, $Sm",
+                                [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> {
+  let Inst{7} = 0; // u32
+
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+// FP -> Int:
+
+class AVConv1IsD_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+                        bits<4> opcod4, dag oops, dag iops,
+                        InstrItinClass itin, string opc, string asm,
+                        list<dag> pattern>
+  : AVConv1I<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+             pattern> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Dm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Dm{3-0};
+  let Inst{5}     = Dm{4};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+}
+
+class AVConv1InsS_Encode<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3,
+                         bits<4> opcod4, dag oops, dag iops,
+                         InstrItinClass itin, string opc, string asm,
+                         list<dag> pattern>
+  : AVConv1In<opcod1, opcod2, opcod3, opcod4, oops, iops, itin, opc, asm,
+              pattern> {
+  // Instruction operands.
+  bits<5> Sd;
+  bits<5> Sm;
+
+  // Encode instruction operands.
+  let Inst{3-0}   = Sm{4-1};
+  let Inst{5}     = Sm{0};
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+}
+
+// Always set Z bit in the instruction, i.e. "round towards zero" variants.
+def VTOSIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+                                (outs SPR:$Sd), (ins DPR:$Dm),
+                                IIC_fpCVTDI, "vcvt", ".s32.f64\t$Sd, $Dm",
+                                [(set SPR:$Sd, (arm_ftosi (f64 DPR:$Dm)))]> {
+  let Inst{7} = 1; // Z bit
+}
+
+def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+                                 (outs SPR:$Sd), (ins SPR:$Sm),
+                                 IIC_fpCVTSI, "vcvt", ".s32.f32\t$Sd, $Sm",
+                                 [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> {
+  let Inst{7} = 1; // Z bit
+
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+                               (outs SPR:$Sd), (ins DPR:$Dm),
+                               IIC_fpCVTDI, "vcvt", ".u32.f64\t$Sd, $Dm",
+                               [(set SPR:$Sd, (arm_ftoui (f64 DPR:$Dm)))]> {
+  let Inst{7} = 1; // Z bit
+}
+
+def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+                                 (outs SPR:$Sd), (ins SPR:$Sm),
+                                 IIC_fpCVTSI, "vcvt", ".u32.f32\t$Sd, $Sm",
+                                 [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> {
+  let Inst{7} = 1; // Z bit
+
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+// And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR.
+let Uses = [FPSCR] in {
+// FIXME: Verify encoding after integrated assembler is working.
+def VTOSIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1101, 0b1011,
+                                (outs SPR:$Sd), (ins DPR:$Dm),
+                                IIC_fpCVTDI, "vcvtr", ".s32.f64\t$Sd, $Dm",
+                                [(set SPR:$Sd, (int_arm_vcvtr (f64 DPR:$Dm)))]>{
+  let Inst{7} = 0; // Z bit
+}
+
+def VTOSIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010,
+                                 (outs SPR:$Sd), (ins SPR:$Sm),
+                                 IIC_fpCVTSI, "vcvtr", ".s32.f32\t$Sd, $Sm",
+                                 [(set SPR:$Sd, (int_arm_vcvtr SPR:$Sm))]> {
+  let Inst{7} = 0; // Z bit
+}
+
+def VTOUIRD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011,
+                                (outs SPR:$Sd), (ins DPR:$Dm),
+                                IIC_fpCVTDI, "vcvtr", ".u32.f64\t$Sd, $Dm",
+                                [(set SPR:$Sd, (int_arm_vcvtru(f64 DPR:$Dm)))]>{
+  let Inst{7} = 0; // Z bit
+}
+
+def VTOUIRS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010,
+                                 (outs SPR:$Sd), (ins SPR:$Sm),
+                                 IIC_fpCVTSI, "vcvtr", ".u32.f32\t$Sd, $Sm",
+                                 [(set SPR:$Sd, (int_arm_vcvtru SPR:$Sm))]> {
+  let Inst{7} = 0; // Z bit
+}
+}
+
+// Convert between floating-point and fixed-point
+// Data type for fixed-point naming convention:
+//   S16 (U=0, sx=0) -> SH
+//   U16 (U=1, sx=0) -> UH
+//   S32 (U=0, sx=1) -> SL
+//   U32 (U=1, sx=1) -> UL
+
+// FIXME: Marking these as codegen only seems wrong. They are real
+//        instructions(?)
+let Constraints = "$a = $dst", isCodeGenOnly = 1 in {
+
+// FP to Fixed-Point:
+
+def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".s16.f64\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOUHD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".u16.f64\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOSLD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VTOULD : AVConv1XI<0b11101, 0b11, 0b1111, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+// Fixed-Point to FP:
+
+def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1,
+                       (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits),
+                 IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.s16\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VUHTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 0,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.u16\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VSLTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
+                       (outs DPR:$dst), (ins DPR:$a, i32imm:$fbits),
+                 IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a, $fbits",
+                 [/* For disassembly only; pattern left blank */]>;
+
+} // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in'
+
+//===----------------------------------------------------------------------===//
+// FP FMA Operations.
+//
+
+def VMLAD : ADbI<0b11100, 0b00, 0, 0,
+                 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                 IIC_fpMAC64, "vmla", ".f64\t$Dd, $Dn, $Dm",
+                 [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+                                          (f64 DPR:$Ddin)))]>,
+              RegConstraint<"$Ddin = $Dd">,
+              Requires<[HasVFP2,UseFPVMLx]>;
+
+def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                  IIC_fpMAC32, "vmla", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm),
+                                           SPR:$Sdin))]>,
+              RegConstraint<"$Sdin = $Sd">,
+              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+          (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+          (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>;
+
+def VMLSD : ADbI<0b11100, 0b00, 1, 0,
+                 (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                 IIC_fpMAC64, "vmls", ".f64\t$Dd, $Dn, $Dm",
+                 [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+                                          (f64 DPR:$Ddin)))]>,
+              RegConstraint<"$Ddin = $Dd">,
+              Requires<[HasVFP2,UseFPVMLx]>;
+
+def VMLSS : ASbIn<0b11100, 0b00, 1, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                  IIC_fpMAC32, "vmls", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+                                           SPR:$Sdin))]>,
+              RegConstraint<"$Sdin = $Sd">,
+              Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))),
+          (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)),
+          (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
+                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                  IIC_fpMAC64, "vnmla", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)),
+                                          (f64 DPR:$Ddin)))]>,
+                RegConstraint<"$Ddin = $Dd">,
+                Requires<[HasVFP2,UseFPVMLx]>;
+
+def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                  IIC_fpMAC32, "vnmla", ".f32\t$Sd, $Sn, $Sm",
+                  [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)),
+                                           SPR:$Sdin))]>,
+                RegConstraint<"$Sdin = $Sd">,
+                Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
+          (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin),
+          (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
+                  (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
+                  IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
+                  [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm),
+                                           (f64 DPR:$Ddin)))]>,
+               RegConstraint<"$Ddin = $Dd">,
+               Requires<[HasVFP2,UseFPVMLx]>;
+
+def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
+                  (outs SPR:$Sd), (ins SPR:$Sdin, SPR:$Sn, SPR:$Sm),
+                  IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm",
+             [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>,
+                         RegConstraint<"$Sdin = $Sd">,
+                  Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> {
+  // Some single precision VFP instructions may be executed on both NEON and
+  // VFP pipelines on A8.
+  let D = VFPNeonA8Domain;
+}
+
+def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin),
+          (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP2,UseFPVMLx]>;
+def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
+          (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>;
+
+
+//===----------------------------------------------------------------------===//
+// FP Conditional moves.
+//
+
+let neverHasSideEffects = 1 in {
+def VMOVDcc  : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
+                    (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm),
+                    IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm",
+                    [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
+                 RegConstraint<"$Dn = $Dd">;
+
+def VMOVScc  : ASuI<0b11101, 0b11, 0b0000, 0b01, 0,
+                    (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                    IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm",
+                    [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
+                 RegConstraint<"$Sn = $Sd">;
+} // neverHasSideEffects
+
+//===----------------------------------------------------------------------===//
+// Move from VFP System Register to ARM core register.
+//
+
+class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+                 list<dag> pattern>:
+  VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+  // Instruction operand.
+  bits<4> Rt;
+
+  let Inst{27-20} = 0b11101111;
+  let Inst{19-16} = opc19_16;
+  let Inst{15-12} = Rt;
+  let Inst{11-8}  = 0b1010;
+  let Inst{7}     = 0;
+  let Inst{6-5}   = 0b00;
+  let Inst{4}     = 1;
+  let Inst{3-0}   = 0b0000;
+}
+
+// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
+// to APSR.
+let Defs = [CPSR], Uses = [FPSCR], Rt = 0b1111 /* apsr_nzcv */ in
+def FMSTAT : MovFromVFP<0b0001 /* fpscr */, (outs), (ins),
+                        "vmrs", "\tapsr_nzcv, fpscr", [(arm_fmstat)]>;
+
+// Application level FPSCR -> GPR
+let hasSideEffects = 1, Uses = [FPSCR] in
+def VMRS : MovFromVFP<0b0001 /* fpscr */, (outs GPR:$Rt), (ins),
+                      "vmrs", "\t$Rt, fpscr",
+                      [(set GPR:$Rt, (int_arm_get_fpscr))]>;
+
+// System level FPEXC, FPSID -> GPR
+let Uses = [FPSCR] in {
+  def VMRS_FPEXC : MovFromVFP<0b1000 /* fpexc */, (outs GPR:$Rt), (ins),
+                              "vmrs", "\t$Rt, fpexc", []>;
+  def VMRS_FPSID : MovFromVFP<0b0000 /* fpsid */, (outs GPR:$Rt), (ins),
+                              "vmrs", "\t$Rt, fpsid", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move from ARM core register to VFP System Register.
+//
+
+class MovToVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
+               list<dag> pattern>:
+  VFPAI<oops, iops, VFPMiscFrm, IIC_fpSTAT, opc, asm, pattern> {
+
+  // Instruction operand.
+  bits<4> src;
+
+  // Encode instruction operand.
+  let Inst{15-12} = src;
+
+  let Inst{27-20} = 0b11101110;
+  let Inst{19-16} = opc19_16;
+  let Inst{11-8}  = 0b1010;
+  let Inst{7}     = 0;
+  let Inst{4}     = 1;
+}
+
+let Defs = [FPSCR] in {
+  // Application level GPR -> FPSCR
+  def VMSR : MovToVFP<0b0001 /* fpscr */, (outs), (ins GPR:$src),
+                      "vmsr", "\tfpscr, $src", [(int_arm_set_fpscr GPR:$src)]>;
+  // System level GPR -> FPEXC
+  def VMSR_FPEXC : MovToVFP<0b1000 /* fpexc */, (outs), (ins GPR:$src),
+                      "vmsr", "\tfpexc, $src", []>;
+  // System level GPR -> FPSID
+  def VMSR_FPSID : MovToVFP<0b0000 /* fpsid */, (outs), (ins GPR:$src),
+                      "vmsr", "\tfpsid, $src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc.
+//
+
+// Materialize FP immediates. VFP3 only.
+let isReMaterializable = 1 in {
+def FCONSTD : VFPAI<(outs DPR:$Dd), (ins vfp_f64imm:$imm),
+                    VFPMiscFrm, IIC_fpUNA64,
+                    "vmov", ".f64\t$Dd, $imm",
+                    [(set DPR:$Dd, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+  // Instruction operands.
+  bits<5>  Dd;
+  bits<32> imm;
+
+  // Encode instruction operands.
+  let Inst{15-12} = Dd{3-0};
+  let Inst{22}    = Dd{4};
+  let Inst{19}    = imm{31};
+  let Inst{18-16} = imm{22-20};
+  let Inst{3-0}   = imm{19-16};
+
+  // Encode remaining instruction bits.
+  let Inst{27-23} = 0b11101;
+  let Inst{21-20} = 0b11;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 1;          // Double precision.
+  let Inst{7-4}   = 0b0000;
+}
+
+def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm),
+                     VFPMiscFrm, IIC_fpUNA32,
+                     "vmov", ".f32\t$Sd, $imm",
+                     [(set SPR:$Sd, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+  // Instruction operands.
+  bits<5>  Sd;
+  bits<32> imm;
+
+  // Encode instruction operands.
+  let Inst{15-12} = Sd{4-1};
+  let Inst{22}    = Sd{0};
+  let Inst{19}    = imm{31};    // The immediate is handled as a double.
+  let Inst{18-16} = imm{22-20};
+  let Inst{3-0}   = imm{19-16};
+
+  // Encode remaining instruction bits.
+  let Inst{27-23} = 0b11101;
+  let Inst{21-20} = 0b11;
+  let Inst{11-9}  = 0b101;
+  let Inst{8}     = 0;          // Single precision.
+  let Inst{7-4}   = 0b0000;
+}
+}
diff --git a/final/lib/Target/ARM/ARMJITInfo.cpp b/final/lib/Target/ARM/ARMJITInfo.cpp
new file mode 100644
index 00000000000..45b7e48d0cf
--- /dev/null
+++ b/final/lib/Target/ARM/ARMJITInfo.cpp
@@ -0,0 +1,336 @@
+//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the ARM target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "ARMJITInfo.h"
+#include "ARMInstrInfo.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMRelocations.h"
+#include "ARMSubtarget.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Memory.h"
+#include <cstdlib>
+using namespace llvm;
+
+void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+  report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction");
+}
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host.  This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because the prolog/epilog inserted by GCC won't work for us. (We need
+// to preserve more context and manipulate the stack directly).  Instead,
+// write our own wrapper, which does things our way, so we have complete
+// control over register saving and restoring.
+extern "C" {
+#if defined(__arm__)
+  void ARMCompilationCallback();
+  asm(
+    ".text\n"
+    ".align 2\n"
+    ".globl " ASMPREFIX "ARMCompilationCallback\n"
+    ASMPREFIX "ARMCompilationCallback:\n"
+    // Save caller saved registers since they may contain stuff
+    // for the real target function right now. We have to act as if this
+    // whole compilation callback doesn't exist as far as the caller is
+    // concerned, so we can't just preserve the callee saved regs.
+    "stmdb sp!, {r0, r1, r2, r3, lr}\n"
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
+    "fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+    // The LR contains the address of the stub function on entry.
+    // pass it as the argument to the C part of the callback
+    "mov  r0, lr\n"
+    "sub  sp, sp, #4\n"
+    // Call the C portion of the callback
+    "bl   " ASMPREFIX "ARMCompilationCallbackC\n"
+    "add  sp, sp, #4\n"
+    // Restoring the LR to the return address of the function that invoked
+    // the stub and de-allocating the stack space for it requires us to
+    // swap the two saved LR values on the stack, as they're backwards
+    // for what we need since the pop instruction has a pre-determined
+    // order for the registers.
+    //      +--------+
+    //   0  | LR     | Original return address
+    //      +--------+
+    //   1  | LR     | Stub address (start of stub)
+    // 2-5  | R3..R0 | Saved registers (we need to preserve all regs)
+    // 6-20 | D0..D7 | Saved VFP registers
+    //      +--------+
+    //
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
+    // Restore VFP caller-saved registers.
+    "fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
+#endif
+    //
+    //      We need to exchange the values in slots 0 and 1 so we can
+    //      return to the address in slot 1 with the address in slot 0
+    //      restored to the LR.
+    "ldr  r0, [sp,#20]\n"
+    "ldr  r1, [sp,#16]\n"
+    "str  r1, [sp,#20]\n"
+    "str  r0, [sp,#16]\n"
+    // Return to the (newly modified) stub to invoke the real function.
+    // The above twiddling of the saved return addresses allows us to
+    // deallocate everything, including the LR the stub saved, with two
+    // updating load instructions.
+    "ldmia  sp!, {r0, r1, r2, r3, lr}\n"
+    "ldr    pc, [sp], #4\n"
+      );
+#else  // Not an ARM host
+  void ARMCompilationCallback() {
+    llvm_unreachable("Cannot call ARMCompilationCallback() on a non-ARM arch!");
+  }
+#endif
+}
+
+/// ARMCompilationCallbackC - This is the target-specific function invoked
+/// by the function stub when we did not know the real target of a call.
+/// This function must locate the start of the stub or call site and pass
+/// it into the JIT compiler function.
+extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) {
+  // Get the address of the compiled code for this function.
+  intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr);
+
+  // Rewrite the call target... so that we don't end up here every time we
+  // execute the call. We're replacing the first two instructions of the
+  // stub with:
+  //   ldr pc, [pc,#-4]
+  //   <addr>
+  if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) {
+    llvm_unreachable("ERROR: Unable to mark stub writable");
+  }
+  *(intptr_t *)StubAddr = 0xe51ff004;  // ldr pc, [pc, #-4]
+  *(intptr_t *)(StubAddr+4) = NewVal;
+  if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) {
+    llvm_unreachable("ERROR: Unable to mark stub executable");
+  }
+}
+
+TargetJITInfo::LazyResolverFn
+ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
+  JITCompilerFunction = F;
+  return ARMCompilationCallback;
+}
+
+void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
+                                             JITCodeEmitter &JCE) {
+  uint8_t Buffer[4];
+  uint8_t *Cur = Buffer;
+  MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)Ptr);
+  void *PtrAddr = JCE.allocIndirectGV(
+      GV, Buffer, sizeof(Buffer), /*Alignment=*/4);
+  addIndirectSymAddr(Ptr, (intptr_t)PtrAddr);
+  return PtrAddr;
+}
+
+TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() {
+  // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a
+  // 4-byte address.  See emitFunctionStub for details.
+  StubLayout Result = {16, 4};
+  return Result;
+}
+
+void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
+                                   JITCodeEmitter &JCE) {
+  void *Addr;
+  // If this is just a call to an external function, emit a branch instead of a
+  // call.  The code is the same except for one bit of the last instruction.
+  if (Fn != (void*)(intptr_t)ARMCompilationCallback) {
+    // Branch to the corresponding function addr.
+    if (IsPIC) {
+      // The stub is 16-byte size and 4-aligned.
+      intptr_t LazyPtr = getIndirectSymAddr(Fn);
+      if (!LazyPtr) {
+        // In PIC mode, the function stub is loading a lazy-ptr.
+        LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE);
+        DEBUG(if (F)
+                errs() << "JIT: Indirect symbol emitted at [" << LazyPtr
+                       << "] for GV '" << F->getName() << "'\n";
+              else
+                errs() << "JIT: Stub emitted at [" << LazyPtr
+                       << "] for external function at '" << Fn << "'\n");
+      }
+      JCE.emitAlignment(4);
+      Addr = (void*)JCE.getCurrentPCValue();
+      if (!sys::Memory::setRangeWritable(Addr, 16)) {
+        llvm_unreachable("ERROR: Unable to mark stub writable");
+      }
+      JCE.emitWordLE(0xe59fc004);            // ldr ip, [pc, #+4]
+      JCE.emitWordLE(0xe08fc00c);            // L_func$scv: add ip, pc, ip
+      JCE.emitWordLE(0xe59cf000);            // ldr pc, [ip]
+      JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8));  // func - (L_func$scv+8)
+      sys::Memory::InvalidateInstructionCache(Addr, 16);
+      if (!sys::Memory::setRangeExecutable(Addr, 16)) {
+        llvm_unreachable("ERROR: Unable to mark stub executable");
+      }
+    } else {
+      // The stub is 8-byte size and 4-aligned.
+      JCE.emitAlignment(4);
+      Addr = (void*)JCE.getCurrentPCValue();
+      if (!sys::Memory::setRangeWritable(Addr, 8)) {
+        llvm_unreachable("ERROR: Unable to mark stub writable");
+      }
+      JCE.emitWordLE(0xe51ff004);    // ldr pc, [pc, #-4]
+      JCE.emitWordLE((intptr_t)Fn);  // addr of function
+      sys::Memory::InvalidateInstructionCache(Addr, 8);
+      if (!sys::Memory::setRangeExecutable(Addr, 8)) {
+        llvm_unreachable("ERROR: Unable to mark stub executable");
+      }
+    }
+  } else {
+    // The compilation callback will overwrite the first two words of this
+    // stub with indirect branch instructions targeting the compiled code.
+    // This stub sets the return address to restart the stub, so that
+    // the new branch will be invoked when we come back.
+    //
+    // Branch and link to the compilation callback.
+    // The stub is 16-byte size and 4-byte aligned.
+    JCE.emitAlignment(4);
+    Addr = (void*)JCE.getCurrentPCValue();
+    if (!sys::Memory::setRangeWritable(Addr, 16)) {
+      llvm_unreachable("ERROR: Unable to mark stub writable");
+    }
+    // Save LR so the callback can determine which stub called it.
+    // The compilation callback is responsible for popping this prior
+    // to returning.
+    JCE.emitWordLE(0xe92d4000); // push {lr}
+    // Set the return address to go back to the start of this stub.
+    JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12
+    // Invoke the compilation callback.
+    JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
+    // The address of the compilation callback.
+    JCE.emitWordLE((intptr_t)ARMCompilationCallback);
+    sys::Memory::InvalidateInstructionCache(Addr, 16);
+    if (!sys::Memory::setRangeExecutable(Addr, 16)) {
+      llvm_unreachable("ERROR: Unable to mark stub executable");
+    }
+  }
+
+  return Addr;
+}
+
+intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const {
+  ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType();
+  switch (RT) {
+  default:
+    return (intptr_t)(MR->getResultPointer());
+  case ARM::reloc_arm_pic_jt:
+    // Destination address - jump table base.
+    return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal();
+  case ARM::reloc_arm_jt_base:
+    // Jump table base address.
+    return getJumpTableBaseAddr(MR->getJumpTableIndex());
+  case ARM::reloc_arm_cp_entry:
+  case ARM::reloc_arm_vfp_cp_entry:
+    // Constant pool entry address.
+    return getConstantPoolEntryAddr(MR->getConstantPoolIndex());
+  case ARM::reloc_arm_machine_cp_entry: {
+    ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal();
+    assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) &&
+           "Can't handle this machine constant pool entry yet!");
+    intptr_t Addr = (intptr_t)(MR->getResultPointer());
+    Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment();
+    return Addr;
+  }
+  }
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
+                          unsigned NumRelocs, unsigned char* GOTBase) {
+  for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+    void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+    intptr_t ResultPtr = resolveRelocDestAddr(MR);
+    switch ((ARM::RelocationType)MR->getRelocationType()) {
+    case ARM::reloc_arm_cp_entry:
+    case ARM::reloc_arm_vfp_cp_entry:
+    case ARM::reloc_arm_relative: {
+      // It is necessary to calculate the correct PC relative value. We
+      // subtract the base addr from the target addr to form a byte offset.
+      ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+      // If the result is positive, set bit U(23) to 1.
+      if (ResultPtr >= 0)
+        *((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift;
+      else {
+        // Otherwise, obtain the absolute value and set bit U(23) to 0.
+        *((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift);
+        ResultPtr = - ResultPtr;
+      }
+      // Set the immed value calculated.
+      // VFP immediate offset is multiplied by 4.
+      if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
+        ResultPtr = ResultPtr >> 2;
+      *((intptr_t*)RelocPos) |= ResultPtr;
+      // Set register Rn to PC.
+      *((intptr_t*)RelocPos) |=
+        getARMRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
+      break;
+    }
+    case ARM::reloc_arm_pic_jt:
+    case ARM::reloc_arm_machine_cp_entry:
+    case ARM::reloc_arm_absolute: {
+      // These addresses have already been resolved.
+      *((intptr_t*)RelocPos) |= (intptr_t)ResultPtr;
+      break;
+    }
+    case ARM::reloc_arm_branch: {
+      // It is necessary to calculate the correct value of signed_immed_24
+      // field. We subtract the base addr from the target addr to form a
+      // byte offset, which must be inside the range -33554432 and +33554428.
+      // Then, we set the signed_immed_24 field of the instruction to bits
+      // [25:2] of the byte offset. More details ARM-ARM p. A4-11.
+      ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+      ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2;
+      assert(ResultPtr >= -33554432 && ResultPtr <= 33554428);
+      *((intptr_t*)RelocPos) |= ResultPtr;
+      break;
+    }
+    case ARM::reloc_arm_jt_base: {
+      // JT base - (instruction addr + 8)
+      ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
+      *((intptr_t*)RelocPos) |= ResultPtr;
+      break;
+    }
+    case ARM::reloc_arm_movw: {
+      ResultPtr = ResultPtr & 0xFFFF; 
+      *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+      *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+      break;
+    }
+    case ARM::reloc_arm_movt: {
+      ResultPtr = (ResultPtr >> 16) & 0xFFFF; 
+      *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+      *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+      break;
+    }
+    }
+  }
+}
diff --git a/final/lib/Target/ARM/ARMJITInfo.h b/final/lib/Target/ARM/ARMJITInfo.h
new file mode 100644
index 00000000000..2f9792813d3
--- /dev/null
+++ b/final/lib/Target/ARM/ARMJITInfo.h
@@ -0,0 +1,183 @@
+//===- ARMJITInfo.h - ARM implementation of the JIT interface  --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMJITINFO_H
+#define ARMJITINFO_H
+
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+  class ARMTargetMachine;
+
+  class ARMJITInfo : public TargetJITInfo {
+    // ConstPoolId2AddrMap - A map from constant pool ids to the corresponding
+    // CONSTPOOL_ENTRY addresses.
+    SmallVector<intptr_t, 16> ConstPoolId2AddrMap;
+
+    // JumpTableId2AddrMap - A map from inline jumptable ids to the
+    // corresponding inline jump table bases.
+    SmallVector<intptr_t, 16> JumpTableId2AddrMap;
+
+    // PCLabelMap - A map from PC labels to addresses.
+    DenseMap<unsigned, intptr_t> PCLabelMap;
+
+    // Sym2IndirectSymMap - A map from symbol (GlobalValue and ExternalSymbol)
+    // addresses to their indirect symbol addresses.
+    DenseMap<void*, intptr_t> Sym2IndirectSymMap;
+
+    // IsPIC - True if the relocation model is PIC. This is used to determine
+    // how to codegen function stubs.
+    bool IsPIC;
+
+  public:
+    explicit ARMJITInfo() : IsPIC(false) { useGOT = false; }
+
+    /// replaceMachineCodeForFunction - Make it so that calling the function
+    /// whose machine code is at OLD turns into a call to NEW, perhaps by
+    /// overwriting OLD with a branch to NEW.  This is used for self-modifying
+    /// code.
+    ///
+    virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+    /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+    /// to emit an indirect symbol which contains the address of the specified
+    /// ptr.
+    virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+                                            JITCodeEmitter &JCE);
+
+    // getStubLayout - Returns the size and alignment of the largest call stub
+    // on ARM.
+    virtual StubLayout getStubLayout();
+
+    /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+    /// small native function that simply calls the function at the specified
+    /// address.
+    virtual void *emitFunctionStub(const Function* F, void *Fn,
+                                   JITCodeEmitter &JCE);
+
+    /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+    virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+    /// relocate - Before the JIT can run a block of code that has been emitted,
+    /// it must rewrite the code to contain the actual addresses of any
+    /// referenced global symbols.
+    virtual void relocate(void *Function, MachineRelocation *MR,
+                          unsigned NumRelocs, unsigned char* GOTBase);
+
+    /// hasCustomConstantPool - Allows a target to specify that constant
+    /// pool address resolution is handled by the target.
+    virtual bool hasCustomConstantPool() const { return true; }
+
+    /// hasCustomJumpTables - Allows a target to specify that jumptables
+    /// are emitted by the target.
+    virtual bool hasCustomJumpTables() const { return true; }
+
+    /// allocateSeparateGVMemory - If true, globals should be placed in
+    /// separately allocated heap memory rather than in the same
+    /// code memory allocated by JITCodeEmitter.
+    virtual bool allocateSeparateGVMemory() const {
+#ifdef __APPLE__
+      return true;
+#else
+      return false;
+#endif
+    }
+
+    /// Initialize - Initialize internal stage for the function being JITted.
+    /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize
+    /// jump table ids to jump table bases map; remember if codegen relocation
+    /// model is PIC.
+    void Initialize(const MachineFunction &MF, bool isPIC) {
+      const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+      ConstPoolId2AddrMap.resize(AFI->getNumPICLabels());
+      JumpTableId2AddrMap.resize(AFI->getNumJumpTables());
+      IsPIC = isPIC;
+    }
+
+    /// getConstantPoolEntryAddr - The ARM target puts all constant
+    /// pool entries into constant islands. This returns the address of the
+    /// constant pool entry of the specified index.
+    intptr_t getConstantPoolEntryAddr(unsigned CPI) const {
+      assert(CPI < ConstPoolId2AddrMap.size());
+      return ConstPoolId2AddrMap[CPI];
+    }
+
+    /// addConstantPoolEntryAddr - Map a Constant Pool Index to the address
+    /// where its associated value is stored. When relocations are processed,
+    /// this value will be used to resolve references to the constant.
+    void addConstantPoolEntryAddr(unsigned CPI, intptr_t Addr) {
+      assert(CPI < ConstPoolId2AddrMap.size());
+      ConstPoolId2AddrMap[CPI] = Addr;
+    }
+
+    /// getJumpTableBaseAddr - The ARM target inline all jump tables within
+    /// text section of the function. This returns the address of the base of
+    /// the jump table of the specified index.
+    intptr_t getJumpTableBaseAddr(unsigned JTI) const {
+      assert(JTI < JumpTableId2AddrMap.size());
+      return JumpTableId2AddrMap[JTI];
+    }
+
+    /// addJumpTableBaseAddr - Map a jump table index to the address where
+    /// the corresponding inline jump table is emitted. When relocations are
+    /// processed, this value will be used to resolve references to the
+    /// jump table.
+    void addJumpTableBaseAddr(unsigned JTI, intptr_t Addr) {
+      assert(JTI < JumpTableId2AddrMap.size());
+      JumpTableId2AddrMap[JTI] = Addr;
+    }
+
+    /// getPCLabelAddr - Retrieve the address of the PC label of the
+    /// specified id.
+    intptr_t getPCLabelAddr(unsigned Id) const {
+      DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id);
+      assert(I != PCLabelMap.end());
+      return I->second;
+    }
+
+    /// addPCLabelAddr - Remember the address of the specified PC label.
+    void addPCLabelAddr(unsigned Id, intptr_t Addr) {
+      PCLabelMap.insert(std::make_pair(Id, Addr));
+    }
+
+    /// getIndirectSymAddr - Retrieve the address of the indirect symbol of the
+    /// specified symbol located at address. Returns 0 if the indirect symbol
+    /// has not been emitted.
+    intptr_t getIndirectSymAddr(void *Addr) const {
+      DenseMap<void*,intptr_t>::const_iterator I= Sym2IndirectSymMap.find(Addr);
+      if (I != Sym2IndirectSymMap.end())
+        return I->second;
+      return 0;
+    }
+
+    /// addIndirectSymAddr - Add a mapping from address of an emitted symbol to
+    /// its indirect symbol address.
+    void addIndirectSymAddr(void *SymAddr, intptr_t IndSymAddr) {
+      Sym2IndirectSymMap.insert(std::make_pair(SymAddr, IndSymAddr));
+    }
+
+  private:
+    /// resolveRelocDestAddr - Resolve the resulting address of the relocation
+    /// if it's not already solved. Constantpool entries must be resolved by
+    /// ARM target.
+    intptr_t resolveRelocDestAddr(MachineRelocation *MR) const;
+  };
+}
+
+#endif
diff --git a/final/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/final/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
new file mode 100644
index 00000000000..d9dc5cdedb3
--- /dev/null
+++ b/final/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -0,0 +1,1838 @@
+//===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs load / store related peephole
+// optimizations. This pass should be run after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-ldst-opt"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumLDMGened , "Number of ldm instructions generated");
+STATISTIC(NumSTMGened , "Number of stm instructions generated");
+STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
+STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
+STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
+STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
+STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
+STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
+STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
+STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
+STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
+
+/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
+/// load / store instructions to form ldm / stm instructions.
+
+namespace {
+  struct ARMLoadStoreOpt : public MachineFunctionPass {
+    static char ID;
+    ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    ARMFunctionInfo *AFI;
+    RegScavenger *RS;
+    bool isThumb2;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM load / store optimization pass";
+    }
+
+  private:
+    struct MemOpQueueEntry {
+      int Offset;
+      unsigned Reg;
+      bool isKill;
+      unsigned Position;
+      MachineBasicBlock::iterator MBBI;
+      bool Merged;
+      MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, 
+                      MachineBasicBlock::iterator i)
+        : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
+    };
+    typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
+    typedef MemOpQueue::iterator MemOpQueueIter;
+
+    bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                  int Offset, unsigned Base, bool BaseKill, int Opcode,
+                  ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
+                  DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
+    void MergeOpsUpdate(MachineBasicBlock &MBB,
+                        MemOpQueue &MemOps,
+                        unsigned memOpsBegin,
+                        unsigned memOpsEnd,
+                        unsigned insertAfter,
+                        int Offset,
+                        unsigned Base,
+                        bool BaseKill,
+                        int Opcode,
+                        ARMCC::CondCodes Pred,
+                        unsigned PredReg,
+                        unsigned Scratch,
+                        DebugLoc dl,
+                        SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+    void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
+                      int Opcode, unsigned Size,
+                      ARMCC::CondCodes Pred, unsigned PredReg,
+                      unsigned Scratch, MemOpQueue &MemOps,
+                      SmallVector<MachineBasicBlock::iterator, 4> &Merges);
+
+    void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+    bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator &MBBI);
+    bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator MBBI,
+                                  const TargetInstrInfo *TII,
+                                  bool &Advance,
+                                  MachineBasicBlock::iterator &I);
+    bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   bool &Advance,
+                                   MachineBasicBlock::iterator &I);
+    bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
+    bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
+  };
+  char ARMLoadStoreOpt::ID = 0;
+}
+
+static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Unhandled opcode!");
+  case ARM::LDRi12:
+    ++NumLDMGened;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::LDMIA;
+    case ARM_AM::da: return ARM::LDMDA;
+    case ARM_AM::db: return ARM::LDMDB;
+    case ARM_AM::ib: return ARM::LDMIB;
+    }
+    break;
+  case ARM::STRi12:
+    ++NumSTMGened;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::STMIA;
+    case ARM_AM::da: return ARM::STMDA;
+    case ARM_AM::db: return ARM::STMDB;
+    case ARM_AM::ib: return ARM::STMIB;
+    }
+    break;
+  case ARM::t2LDRi8:
+  case ARM::t2LDRi12:
+    ++NumLDMGened;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::t2LDMIA;
+    case ARM_AM::db: return ARM::t2LDMDB;
+    }
+    break;
+  case ARM::t2STRi8:
+  case ARM::t2STRi12:
+    ++NumSTMGened;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::t2STMIA;
+    case ARM_AM::db: return ARM::t2STMDB;
+    }
+    break;
+  case ARM::VLDRS:
+    ++NumVLDMGened;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VLDMSIA;
+    case ARM_AM::db: return ARM::VLDMSDB;
+    }
+    break;
+  case ARM::VSTRS:
+    ++NumVSTMGened;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VSTMSIA;
+    case ARM_AM::db: return ARM::VSTMSDB;
+    }
+    break;
+  case ARM::VLDRD:
+    ++NumVLDMGened;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VLDMDIA;
+    case ARM_AM::db: return ARM::VLDMDDB;
+    }
+    break;
+  case ARM::VSTRD:
+    ++NumVSTMGened;
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VSTMDIA;
+    case ARM_AM::db: return ARM::VSTMDDB;
+    }
+    break;
+  }
+
+  return 0;
+}
+
+namespace llvm {
+  namespace ARM_AM {
+
+AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Unhandled opcode!");
+  case ARM::LDMIA_RET:
+  case ARM::LDMIA:
+  case ARM::LDMIA_UPD:
+  case ARM::STMIA:
+  case ARM::STMIA_UPD:
+  case ARM::t2LDMIA_RET:
+  case ARM::t2LDMIA:
+  case ARM::t2LDMIA_UPD:
+  case ARM::t2STMIA:
+  case ARM::t2STMIA_UPD:
+  case ARM::VLDMSIA:
+  case ARM::VLDMSIA_UPD:
+  case ARM::VSTMSIA:
+  case ARM::VSTMSIA_UPD:
+  case ARM::VLDMDIA:
+  case ARM::VLDMDIA_UPD:
+  case ARM::VSTMDIA:
+  case ARM::VSTMDIA_UPD:
+    return ARM_AM::ia;
+
+  case ARM::LDMDA:
+  case ARM::LDMDA_UPD:
+  case ARM::STMDA:
+  case ARM::STMDA_UPD:
+    return ARM_AM::da;
+
+  case ARM::LDMDB:
+  case ARM::LDMDB_UPD:
+  case ARM::STMDB:
+  case ARM::STMDB_UPD:
+  case ARM::t2LDMDB:
+  case ARM::t2LDMDB_UPD:
+  case ARM::t2STMDB:
+  case ARM::t2STMDB_UPD:
+  case ARM::VLDMSDB:
+  case ARM::VLDMSDB_UPD:
+  case ARM::VSTMSDB:
+  case ARM::VSTMSDB_UPD:
+  case ARM::VLDMDDB:
+  case ARM::VLDMDDB_UPD:
+  case ARM::VSTMDDB:
+  case ARM::VSTMDDB_UPD:
+    return ARM_AM::db;
+
+  case ARM::LDMIB:
+  case ARM::LDMIB_UPD:
+  case ARM::STMIB:
+  case ARM::STMIB_UPD:
+    return ARM_AM::ib;
+  }
+
+  return ARM_AM::bad_am_submode;
+}
+
+  } // end namespace ARM_AM
+} // end namespace llvm
+
+static bool isT2i32Load(unsigned Opc) {
+  return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
+}
+
+static bool isi32Load(unsigned Opc) {
+  return Opc == ARM::LDRi12 || isT2i32Load(Opc);
+}
+
+static bool isT2i32Store(unsigned Opc) {
+  return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
+}
+
+static bool isi32Store(unsigned Opc) {
+  return Opc == ARM::STRi12 || isT2i32Store(Opc);
+}
+
+/// MergeOps - Create and insert a LDM or STM with Base as base register and
+/// registers in Regs as the register operands that would be loaded / stored.
+/// It returns true if the transformation is done.
+bool
+ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI,
+                          int Offset, unsigned Base, bool BaseKill,
+                          int Opcode, ARMCC::CondCodes Pred,
+                          unsigned PredReg, unsigned Scratch, DebugLoc dl,
+                          SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
+  // Only a single register to load / store. Don't bother.
+  unsigned NumRegs = Regs.size();
+  if (NumRegs <= 1)
+    return false;
+
+  ARM_AM::AMSubMode Mode = ARM_AM::ia;
+  // VFP and Thumb2 do not support IB or DA modes.
+  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+  bool haveIBAndDA = isNotVFP && !isThumb2;
+  if (Offset == 4 && haveIBAndDA)
+    Mode = ARM_AM::ib;
+  else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
+    Mode = ARM_AM::da;
+  else if (Offset == -4 * (int)NumRegs && isNotVFP)
+    // VLDM/VSTM do not support DB mode without also updating the base reg.
+    Mode = ARM_AM::db;
+  else if (Offset != 0) {
+    // If starting offset isn't zero, insert a MI to materialize a new base.
+    // But only do so if it is cost effective, i.e. merging more than two
+    // loads / stores.
+    if (NumRegs <= 2)
+      return false;
+
+    unsigned NewBase;
+    if (isi32Load(Opcode))
+      // If it is a load, then just use one of the destination register to
+      // use as the new base.
+      NewBase = Regs[NumRegs-1].first;
+    else {
+      // Use the scratch register to use as a new base.
+      NewBase = Scratch;
+      if (NewBase == 0)
+        return false;
+    }
+    int BaseOpc = !isThumb2
+      ? ARM::ADDri
+      : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
+    if (Offset < 0) {
+      BaseOpc = !isThumb2
+        ? ARM::SUBri
+        : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
+      Offset = - Offset;
+    }
+    int ImmedOffset = isThumb2
+      ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
+    if (ImmedOffset == -1)
+      // FIXME: Try t2ADDri12 or t2SUBri12?
+      return false;  // Probably not worth it then.
+
+    BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
+      .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
+      .addImm(Pred).addReg(PredReg).addReg(0);
+    Base = NewBase;
+    BaseKill = true;  // New base is always killed right its use.
+  }
+
+  bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
+                Opcode == ARM::VLDRD);
+  Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
+    .addReg(Base, getKillRegState(BaseKill))
+    .addImm(Pred).addReg(PredReg);
+  for (unsigned i = 0; i != NumRegs; ++i)
+    MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
+                     | getKillRegState(Regs[i].second));
+
+  return true;
+}
+
+// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
+// success.
+void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
+                                     MemOpQueue &memOps,
+                                     unsigned memOpsBegin, unsigned memOpsEnd,
+                                     unsigned insertAfter, int Offset,
+                                     unsigned Base, bool BaseKill,
+                                     int Opcode,
+                                     ARMCC::CondCodes Pred, unsigned PredReg,
+                                     unsigned Scratch,
+                                     DebugLoc dl,
+                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+  // First calculate which of the registers should be killed by the merged
+  // instruction.
+  const unsigned insertPos = memOps[insertAfter].Position;
+  SmallSet<unsigned, 4> KilledRegs;
+  DenseMap<unsigned, unsigned> Killer;
+  for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
+    if (i == memOpsBegin) {
+      i = memOpsEnd;
+      if (i == e)
+        break;
+    }
+    if (memOps[i].Position < insertPos && memOps[i].isKill) {
+      unsigned Reg = memOps[i].Reg;
+      KilledRegs.insert(Reg);
+      Killer[Reg] = i;
+    }
+  }
+
+  SmallVector<std::pair<unsigned, bool>, 8> Regs;
+  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+    unsigned Reg = memOps[i].Reg;
+    // If we are inserting the merged operation after an operation that
+    // uses the same register, make sure to transfer any kill flag.
+    bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
+    Regs.push_back(std::make_pair(Reg, isKill));
+  }
+
+  // Try to do the merge.
+  MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
+  ++Loc;
+  if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
+                Pred, PredReg, Scratch, dl, Regs))
+    return;
+
+  // Merge succeeded, update records.
+  Merges.push_back(prior(Loc));
+  for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
+    // Remove kill flags from any memops that come before insertPos.
+    if (Regs[i-memOpsBegin].second) {
+      unsigned Reg = Regs[i-memOpsBegin].first;
+      if (KilledRegs.count(Reg)) {
+        unsigned j = Killer[Reg];
+        int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
+        assert(Idx >= 0 && "Cannot find killing operand");
+        memOps[j].MBBI->getOperand(Idx).setIsKill(false);
+        memOps[j].isKill = false;
+      }
+      memOps[i].isKill = true;
+    }
+    MBB.erase(memOps[i].MBBI);
+    // Update this memop to refer to the merged instruction.
+    // We may need to move kill flags again.
+    memOps[i].Merged = true;
+    memOps[i].MBBI = Merges.back();
+    memOps[i].Position = insertPos;
+  }
+}
+
+/// MergeLDR_STR - Merge a number of load / store instructions into one or more
+/// load / store multiple instructions.
+void
+ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
+                          unsigned Base, int Opcode, unsigned Size,
+                          ARMCC::CondCodes Pred, unsigned PredReg,
+                          unsigned Scratch, MemOpQueue &MemOps,
+                          SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+  bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
+  int Offset = MemOps[SIndex].Offset;
+  int SOffset = Offset;
+  unsigned insertAfter = SIndex;
+  MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
+  DebugLoc dl = Loc->getDebugLoc();
+  const MachineOperand &PMO = Loc->getOperand(0);
+  unsigned PReg = PMO.getReg();
+  unsigned PRegNum = PMO.isUndef() ? UINT_MAX
+    : getARMRegisterNumbering(PReg);
+  unsigned Count = 1;
+
+  for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
+    int NewOffset = MemOps[i].Offset;
+    const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
+    unsigned Reg = MO.getReg();
+    unsigned RegNum = MO.isUndef() ? UINT_MAX
+      : getARMRegisterNumbering(Reg);
+    // Register numbers must be in ascending order.  For VFP, the registers
+    // must also be consecutive and there is a limit of 16 double-word
+    // registers per instruction.
+    if (Reg != ARM::SP &&
+        NewOffset == Offset + (int)Size &&
+        ((isNotVFP && RegNum > PRegNum)
+         || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
+      Offset += Size;
+      PRegNum = RegNum;
+      ++Count;
+    } else {
+      // Can't merge this in. Try merge the earlier ones first.
+      MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
+                     Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
+      MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
+                   MemOps, Merges);
+      return;
+    }
+
+    if (MemOps[i].Position > MemOps[insertAfter].Position)
+      insertAfter = i;
+  }
+
+  bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
+  MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
+                 Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
+  return;
+}
+
+static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
+                                       unsigned Bytes, unsigned Limit,
+                                       ARMCC::CondCodes Pred, unsigned PredReg){
+  unsigned MyPredReg = 0;
+  if (!MI)
+    return false;
+  if (MI->getOpcode() != ARM::t2SUBri &&
+      MI->getOpcode() != ARM::t2SUBrSPi &&
+      MI->getOpcode() != ARM::t2SUBrSPi12 &&
+      MI->getOpcode() != ARM::tSUBspi &&
+      MI->getOpcode() != ARM::SUBri)
+    return false;
+
+  // Make sure the offset fits in 8 bits.
+  if (Bytes == 0 || (Limit && Bytes >= Limit))
+    return false;
+
+  unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
+  return (MI->getOperand(0).getReg() == Base &&
+          MI->getOperand(1).getReg() == Base &&
+          (MI->getOperand(2).getImm()*Scale) == Bytes &&
+          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
+          MyPredReg == PredReg);
+}
+
+static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
+                                       unsigned Bytes, unsigned Limit,
+                                       ARMCC::CondCodes Pred, unsigned PredReg){
+  unsigned MyPredReg = 0;
+  if (!MI)
+    return false;
+  if (MI->getOpcode() != ARM::t2ADDri &&
+      MI->getOpcode() != ARM::t2ADDrSPi &&
+      MI->getOpcode() != ARM::t2ADDrSPi12 &&
+      MI->getOpcode() != ARM::tADDspi &&
+      MI->getOpcode() != ARM::ADDri)
+    return false;
+
+  if (Bytes == 0 || (Limit && Bytes >= Limit))
+    // Make sure the offset fits in 8 bits.
+    return false;
+
+  unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
+  return (MI->getOperand(0).getReg() == Base &&
+          MI->getOperand(1).getReg() == Base &&
+          (MI->getOperand(2).getImm()*Scale) == Bytes &&
+          llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
+          MyPredReg == PredReg);
+}
+
+static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  default: return 0;
+  case ARM::LDRi12:
+  case ARM::STRi12:
+  case ARM::t2LDRi8:
+  case ARM::t2LDRi12:
+  case ARM::t2STRi8:
+  case ARM::t2STRi12:
+  case ARM::VLDRS:
+  case ARM::VSTRS:
+    return 4;
+  case ARM::VLDRD:
+  case ARM::VSTRD:
+    return 8;
+  case ARM::LDMIA:
+  case ARM::LDMDA:
+  case ARM::LDMDB:
+  case ARM::LDMIB:
+  case ARM::STMIA:
+  case ARM::STMDA:
+  case ARM::STMDB:
+  case ARM::STMIB:
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB:
+  case ARM::t2STMIA:
+  case ARM::t2STMDB:
+  case ARM::VLDMSIA:
+  case ARM::VLDMSDB:
+  case ARM::VSTMSIA:
+  case ARM::VSTMSDB:
+    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
+  case ARM::VLDMDIA:
+  case ARM::VLDMDDB:
+  case ARM::VSTMDIA:
+  case ARM::VSTMDDB:
+    return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
+  }
+}
+
+static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
+                                            ARM_AM::AMSubMode Mode) {
+  switch (Opc) {
+  default: llvm_unreachable("Unhandled opcode!");
+  case ARM::LDMIA:
+  case ARM::LDMDA:
+  case ARM::LDMDB:
+  case ARM::LDMIB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::LDMIA_UPD;
+    case ARM_AM::ib: return ARM::LDMIB_UPD;
+    case ARM_AM::da: return ARM::LDMDA_UPD;
+    case ARM_AM::db: return ARM::LDMDB_UPD;
+    }
+    break;
+  case ARM::STMIA:
+  case ARM::STMDA:
+  case ARM::STMDB:
+  case ARM::STMIB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::STMIA_UPD;
+    case ARM_AM::ib: return ARM::STMIB_UPD;
+    case ARM_AM::da: return ARM::STMDA_UPD;
+    case ARM_AM::db: return ARM::STMDB_UPD;
+    }
+    break;
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::t2LDMIA_UPD;
+    case ARM_AM::db: return ARM::t2LDMDB_UPD;
+    }
+    break;
+  case ARM::t2STMIA:
+  case ARM::t2STMDB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::t2STMIA_UPD;
+    case ARM_AM::db: return ARM::t2STMDB_UPD;
+    }
+    break;
+  case ARM::VLDMSIA:
+  case ARM::VLDMSDB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VLDMSIA_UPD;
+    case ARM_AM::db: return ARM::VLDMSDB_UPD;
+    }
+    break;
+  case ARM::VLDMDIA:
+  case ARM::VLDMDDB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VLDMDIA_UPD;
+    case ARM_AM::db: return ARM::VLDMDDB_UPD;
+    }
+    break;
+  case ARM::VSTMSIA:
+  case ARM::VSTMSDB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VSTMSIA_UPD;
+    case ARM_AM::db: return ARM::VSTMSDB_UPD;
+    }
+    break;
+  case ARM::VSTMDIA:
+  case ARM::VSTMDDB:
+    switch (Mode) {
+    default: llvm_unreachable("Unhandled submode!");
+    case ARM_AM::ia: return ARM::VSTMDIA_UPD;
+    case ARM_AM::db: return ARM::VSTMDDB_UPD;
+    }
+    break;
+  }
+
+  return 0;
+}
+
+/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
+/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
+///
+/// stmia rn, <ra, rb, rc>
+/// rn := rn + 4 * 3;
+/// =>
+/// stmia rn!, <ra, rb, rc>
+///
+/// rn := rn - 4 * 3;
+/// ldmia rn, <ra, rb, rc>
+/// =>
+/// ldmdb rn!, <ra, rb, rc>
+bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator MBBI,
+                                               bool &Advance,
+                                               MachineBasicBlock::iterator &I) {
+  MachineInstr *MI = MBBI;
+  unsigned Base = MI->getOperand(0).getReg();
+  bool BaseKill = MI->getOperand(0).isKill();
+  unsigned Bytes = getLSMultipleTransferSize(MI);
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+  int Opcode = MI->getOpcode();
+  DebugLoc dl = MI->getDebugLoc();
+
+  // Can't use an updating ld/st if the base register is also a dest
+  // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
+  for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
+    if (MI->getOperand(i).getReg() == Base)
+      return false;
+
+  bool DoMerge = false;
+  ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
+
+  // Try merging with the previous instruction.
+  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+  if (MBBI != BeginMBBI) {
+    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+      --PrevMBBI;
+    if (Mode == ARM_AM::ia &&
+        isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+      Mode = ARM_AM::db;
+      DoMerge = true;
+    } else if (Mode == ARM_AM::ib &&
+               isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
+      Mode = ARM_AM::da;
+      DoMerge = true;
+    }
+    if (DoMerge)
+      MBB.erase(PrevMBBI);
+  }
+
+  // Try merging with the next instruction.
+  MachineBasicBlock::iterator EndMBBI = MBB.end();
+  if (!DoMerge && MBBI != EndMBBI) {
+    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+      ++NextMBBI;
+    if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
+        isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+      DoMerge = true;
+    } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
+               isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
+      DoMerge = true;
+    }
+    if (DoMerge) {
+      if (NextMBBI == I) {
+        Advance = true;
+        ++I;
+      }
+      MBB.erase(NextMBBI);
+    }
+  }
+
+  if (!DoMerge)
+    return false;
+
+  unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
+  MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+    .addReg(Base, getDefRegState(true)) // WB base register
+    .addReg(Base, getKillRegState(BaseKill))
+    .addImm(Pred).addReg(PredReg);
+
+  // Transfer the rest of operands.
+  for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
+    MIB.addOperand(MI->getOperand(OpNum));
+
+  // Transfer memoperands.
+  (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+  MBB.erase(MBBI);
+  return true;
+}
+
+static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
+                                             ARM_AM::AddrOpc Mode) {
+  switch (Opc) {
+  case ARM::LDRi12:
+    return ARM::LDR_PRE;
+  case ARM::STRi12:
+    return ARM::STR_PRE;
+  case ARM::VLDRS:
+    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+  case ARM::VLDRD:
+    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+  case ARM::VSTRS:
+    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+  case ARM::VSTRD:
+    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
+  case ARM::t2LDRi8:
+  case ARM::t2LDRi12:
+    return ARM::t2LDR_PRE;
+  case ARM::t2STRi8:
+  case ARM::t2STRi12:
+    return ARM::t2STR_PRE;
+  default: llvm_unreachable("Unhandled opcode!");
+  }
+  return 0;
+}
+
+static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
+                                              ARM_AM::AddrOpc Mode) {
+  switch (Opc) {
+  case ARM::LDRi12:
+    return ARM::LDR_POST;
+  case ARM::STRi12:
+    return ARM::STR_POST;
+  case ARM::VLDRS:
+    return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
+  case ARM::VLDRD:
+    return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
+  case ARM::VSTRS:
+    return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
+  case ARM::VSTRD:
+    return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
+  case ARM::t2LDRi8:
+  case ARM::t2LDRi12:
+    return ARM::t2LDR_POST;
+  case ARM::t2STRi8:
+  case ARM::t2STRi12:
+    return ARM::t2STR_POST;
+  default: llvm_unreachable("Unhandled opcode!");
+  }
+  return 0;
+}
+
+/// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
+/// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
+bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator MBBI,
+                                               const TargetInstrInfo *TII,
+                                               bool &Advance,
+                                               MachineBasicBlock::iterator &I) {
+  MachineInstr *MI = MBBI;
+  unsigned Base = MI->getOperand(1).getReg();
+  bool BaseKill = MI->getOperand(1).isKill();
+  unsigned Bytes = getLSMultipleTransferSize(MI);
+  int Opcode = MI->getOpcode();
+  DebugLoc dl = MI->getDebugLoc();
+  bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
+                Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
+  bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
+  if (isi32Load(Opcode) || isi32Store(Opcode))
+    if (MI->getOperand(2).getImm() != 0)
+      return false;
+  if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
+    return false;
+
+  bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
+  // Can't do the merge if the destination register is the same as the would-be
+  // writeback register.
+  if (isLd && MI->getOperand(0).getReg() == Base)
+    return false;
+
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+  bool DoMerge = false;
+  ARM_AM::AddrOpc AddSub = ARM_AM::add;
+  unsigned NewOpc = 0;
+  // AM2 - 12 bits, thumb2 - 8 bits.
+  unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
+
+  // Try merging with the previous instruction.
+  MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+  if (MBBI != BeginMBBI) {
+    MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+    while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+      --PrevMBBI;
+    if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
+      DoMerge = true;
+      AddSub = ARM_AM::sub;
+    } else if (!isAM5 &&
+               isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
+      DoMerge = true;
+    }
+    if (DoMerge) {
+      NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
+      MBB.erase(PrevMBBI);
+    }
+  }
+
+  // Try merging with the next instruction.
+  MachineBasicBlock::iterator EndMBBI = MBB.end();
+  if (!DoMerge && MBBI != EndMBBI) {
+    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+    while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+      ++NextMBBI;
+    if (!isAM5 &&
+        isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
+      DoMerge = true;
+      AddSub = ARM_AM::sub;
+    } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
+      DoMerge = true;
+    }
+    if (DoMerge) {
+      NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
+      if (NextMBBI == I) {
+        Advance = true;
+        ++I;
+      }
+      MBB.erase(NextMBBI);
+    }
+  }
+
+  if (!DoMerge)
+    return false;
+
+  unsigned Offset = 0;
+  if (isAM2)
+    Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
+  else if (!isAM5)
+    Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
+
+  if (isAM5) {
+    // VLDM[SD}_UPD, VSTM[SD]_UPD
+    // (There are no base-updating versions of VLDR/VSTR instructions, but the
+    // updating load/store-multiple instructions can be used with only one
+    // register.)
+    MachineOperand &MO = MI->getOperand(0);
+    BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
+      .addReg(Base, getDefRegState(true)) // WB base register
+      .addReg(Base, getKillRegState(isLd ? BaseKill : false))
+      .addImm(Pred).addReg(PredReg)
+      .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
+                            getKillRegState(MO.isKill())));
+  } else if (isLd) {
+    if (isAM2)
+      // LDR_PRE, LDR_POST,
+      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+        .addReg(Base, RegState::Define)
+        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+    else
+      // t2LDR_PRE, t2LDR_POST
+      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
+        .addReg(Base, RegState::Define)
+        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+  } else {
+    MachineOperand &MO = MI->getOperand(0);
+    if (isAM2)
+      // STR_PRE, STR_POST
+      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+        .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
+    else
+      // t2STR_PRE, t2STR_POST
+      BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
+        .addReg(MO.getReg(), getKillRegState(MO.isKill()))
+        .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+  }
+  MBB.erase(MBBI);
+
+  return true;
+}
+
+/// isMemoryOp - Returns true if instruction is a memory operations (that this
+/// pass is capable of operating on).
+static bool isMemoryOp(const MachineInstr *MI) {
+  // When no memory operands are present, conservatively assume unaligned,
+  // volatile, unfoldable.
+  if (!MI->hasOneMemOperand())
+    return false;
+
+  const MachineMemOperand *MMO = *MI->memoperands_begin();
+
+  // Don't touch volatile memory accesses - we may be changing their order.
+  if (MMO->isVolatile())
+    return false;
+
+  // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+  // not.
+  if (MMO->getAlignment() < 4)
+    return false;
+
+  // str <undef> could probably be eliminated entirely, but for now we just want
+  // to avoid making a mess of it.
+  // FIXME: Use str <undef> as a wildcard to enable better stm folding.
+  if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
+      MI->getOperand(0).isUndef())
+    return false;
+
+  // Likewise don't mess with references to undefined addresses.
+  if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
+      MI->getOperand(1).isUndef())
+    return false;
+
+  int Opcode = MI->getOpcode();
+  switch (Opcode) {
+  default: break;
+  case ARM::VLDRS:
+  case ARM::VSTRS:
+    return MI->getOperand(1).isReg();
+  case ARM::VLDRD:
+  case ARM::VSTRD:
+    return MI->getOperand(1).isReg();
+  case ARM::LDRi12:
+  case ARM::STRi12:
+  case ARM::t2LDRi8:
+  case ARM::t2LDRi12:
+  case ARM::t2STRi8:
+  case ARM::t2STRi12:
+    return MI->getOperand(1).isReg();
+  }
+  return false;
+}
+
+/// AdvanceRS - Advance register scavenger to just before the earliest memory
+/// op that is being merged.
+void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
+  MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
+  unsigned Position = MemOps[0].Position;
+  for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
+    if (MemOps[i].Position < Position) {
+      Position = MemOps[i].Position;
+      Loc = MemOps[i].MBBI;
+    }
+  }
+
+  if (Loc != MBB.begin())
+    RS->forward(prior(Loc));
+}
+
+static int getMemoryOpOffset(const MachineInstr *MI) {
+  int Opcode = MI->getOpcode();
+  bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
+  unsigned NumOperands = MI->getDesc().getNumOperands();
+  unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+
+  if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
+      Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
+      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
+      Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
+    return OffField;
+
+  int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
+    : ARM_AM::getAM5Offset(OffField) * 4;
+  if (isAM3) {
+    if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
+      Offset = -Offset;
+  } else {
+    if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
+      Offset = -Offset;
+  }
+  return Offset;
+}
+
+static void InsertLDR_STR(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator &MBBI,
+                          int Offset, bool isDef,
+                          DebugLoc dl, unsigned NewOpc,
+                          unsigned Reg, bool RegDeadKill, bool RegUndef,
+                          unsigned BaseReg, bool BaseKill, bool BaseUndef,
+                          bool OffKill, bool OffUndef,
+                          ARMCC::CondCodes Pred, unsigned PredReg,
+                          const TargetInstrInfo *TII, bool isT2) {
+  if (isDef) {
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+                                      TII->get(NewOpc))
+      .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
+      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
+    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+  } else {
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+                                      TII->get(NewOpc))
+      .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
+      .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
+    MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+  }
+}
+
+bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator &MBBI) {
+  MachineInstr *MI = &*MBBI;
+  unsigned Opcode = MI->getOpcode();
+  if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
+      Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
+    unsigned EvenReg = MI->getOperand(0).getReg();
+    unsigned OddReg  = MI->getOperand(1).getReg();
+    unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
+    unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
+    if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
+      return false;
+
+    MachineBasicBlock::iterator NewBBI = MBBI;
+    bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
+    bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
+    bool EvenDeadKill = isLd ?
+      MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
+    bool EvenUndef = MI->getOperand(0).isUndef();
+    bool OddDeadKill  = isLd ?
+      MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
+    bool OddUndef = MI->getOperand(1).isUndef();
+    const MachineOperand &BaseOp = MI->getOperand(2);
+    unsigned BaseReg = BaseOp.getReg();
+    bool BaseKill = BaseOp.isKill();
+    bool BaseUndef = BaseOp.isUndef();
+    bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
+    bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
+    int OffImm = getMemoryOpOffset(MI);
+    unsigned PredReg = 0;
+    ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
+
+    if (OddRegNum > EvenRegNum && OffImm == 0) {
+      // Ascending register numbers and no offset. It's safe to change it to a
+      // ldm or stm.
+      unsigned NewOpc = (isLd)
+        ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
+        : (isT2 ? ARM::t2STMIA : ARM::STMIA);
+      if (isLd) {
+        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+          .addReg(BaseReg, getKillRegState(BaseKill))
+          .addImm(Pred).addReg(PredReg)
+          .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
+          .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+        ++NumLDRD2LDM;
+      } else {
+        BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+          .addReg(BaseReg, getKillRegState(BaseKill))
+          .addImm(Pred).addReg(PredReg)
+          .addReg(EvenReg,
+                  getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
+          .addReg(OddReg,
+                  getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
+        ++NumSTRD2STM;
+      }
+      NewBBI = llvm::prior(MBBI);
+    } else {
+      // Split into two instructions.
+      unsigned NewOpc = (isLd)
+        ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
+        : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
+      DebugLoc dl = MBBI->getDebugLoc();
+      // If this is a load and base register is killed, it may have been
+      // re-defed by the load, make sure the first load does not clobber it.
+      if (isLd &&
+          (BaseKill || OffKill) &&
+          (TRI->regsOverlap(EvenReg, BaseReg))) {
+        assert(!TRI->regsOverlap(OddReg, BaseReg));
+        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+                      OddReg, OddDeadKill, false,
+                      BaseReg, false, BaseUndef, false, OffUndef,
+                      Pred, PredReg, TII, isT2);
+        NewBBI = llvm::prior(MBBI);
+        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+                      EvenReg, EvenDeadKill, false,
+                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+                      Pred, PredReg, TII, isT2);
+      } else {
+        if (OddReg == EvenReg && EvenDeadKill) {
+          // If the two source operands are the same, the kill marker is
+          // probably on the first one. e.g.
+          // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
+          EvenDeadKill = false;
+          OddDeadKill = true;
+        }
+        InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+                      EvenReg, EvenDeadKill, EvenUndef,
+                      BaseReg, false, BaseUndef, false, OffUndef,
+                      Pred, PredReg, TII, isT2);
+        NewBBI = llvm::prior(MBBI);
+        InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+                      OddReg, OddDeadKill, OddUndef,
+                      BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
+                      Pred, PredReg, TII, isT2);
+      }
+      if (isLd)
+        ++NumLDRD2LDR;
+      else
+        ++NumSTRD2STR;
+    }
+
+    MBB.erase(MI);
+    MBBI = NewBBI;
+    return true;
+  }
+  return false;
+}
+
+/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
+/// ops of the same base and incrementing offset into LDM / STM ops.
+bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
+  unsigned NumMerges = 0;
+  unsigned NumMemOps = 0;
+  MemOpQueue MemOps;
+  unsigned CurrBase = 0;
+  int CurrOpc = -1;
+  unsigned CurrSize = 0;
+  ARMCC::CondCodes CurrPred = ARMCC::AL;
+  unsigned CurrPredReg = 0;
+  unsigned Position = 0;
+  SmallVector<MachineBasicBlock::iterator,4> Merges;
+
+  RS->enterBasicBlock(&MBB);
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    if (FixInvalidRegPairOp(MBB, MBBI))
+      continue;
+
+    bool Advance  = false;
+    bool TryMerge = false;
+    bool Clobber  = false;
+
+    bool isMemOp = isMemoryOp(MBBI);
+    if (isMemOp) {
+      int Opcode = MBBI->getOpcode();
+      unsigned Size = getLSMultipleTransferSize(MBBI);
+      const MachineOperand &MO = MBBI->getOperand(0);
+      unsigned Reg = MO.getReg();
+      bool isKill = MO.isDef() ? false : MO.isKill();
+      unsigned Base = MBBI->getOperand(1).getReg();
+      unsigned PredReg = 0;
+      ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
+      int Offset = getMemoryOpOffset(MBBI);
+      // Watch out for:
+      // r4 := ldr [r5]
+      // r5 := ldr [r5, #4]
+      // r6 := ldr [r5, #8]
+      //
+      // The second ldr has effectively broken the chain even though it
+      // looks like the later ldr(s) use the same base register. Try to
+      // merge the ldr's so far, including this one. But don't try to
+      // combine the following ldr(s).
+      Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
+      if (CurrBase == 0 && !Clobber) {
+        // Start of a new chain.
+        CurrBase = Base;
+        CurrOpc  = Opcode;
+        CurrSize = Size;
+        CurrPred = Pred;
+        CurrPredReg = PredReg;
+        MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
+        ++NumMemOps;
+        Advance = true;
+      } else {
+        if (Clobber) {
+          TryMerge = true;
+          Advance = true;
+        }
+
+        if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
+          // No need to match PredReg.
+          // Continue adding to the queue.
+          if (Offset > MemOps.back().Offset) {
+            MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
+                                             Position, MBBI));
+            ++NumMemOps;
+            Advance = true;
+          } else {
+            for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
+                 I != E; ++I) {
+              if (Offset < I->Offset) {
+                MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
+                                                 Position, MBBI));
+                ++NumMemOps;
+                Advance = true;
+                break;
+              } else if (Offset == I->Offset) {
+                // Collision! This can't be merged!
+                break;
+              }
+            }
+          }
+        }
+      }
+    }
+
+    if (MBBI->isDebugValue()) {
+      ++MBBI;
+      if (MBBI == E)
+        // Reach the end of the block, try merging the memory instructions.
+        TryMerge = true;
+    } else if (Advance) {
+      ++Position;
+      ++MBBI;
+      if (MBBI == E)
+        // Reach the end of the block, try merging the memory instructions.
+        TryMerge = true;
+    } else
+      TryMerge = true;
+
+    if (TryMerge) {
+      if (NumMemOps > 1) {
+        // Try to find a free register to use as a new base in case it's needed.
+        // First advance to the instruction just before the start of the chain.
+        AdvanceRS(MBB, MemOps);
+        // Find a scratch register.
+        unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
+        // Process the load / store instructions.
+        RS->forward(prior(MBBI));
+
+        // Merge ops.
+        Merges.clear();
+        MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
+                     CurrPred, CurrPredReg, Scratch, MemOps, Merges);
+
+        // Try folding preceeding/trailing base inc/dec into the generated
+        // LDM/STM ops.
+        for (unsigned i = 0, e = Merges.size(); i < e; ++i)
+          if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
+            ++NumMerges;
+        NumMerges += Merges.size();
+
+        // Try folding preceeding/trailing base inc/dec into those load/store
+        // that were not merged to form LDM/STM ops.
+        for (unsigned i = 0; i != NumMemOps; ++i)
+          if (!MemOps[i].Merged)
+            if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
+              ++NumMerges;
+
+        // RS may be pointing to an instruction that's deleted.
+        RS->skipTo(prior(MBBI));
+      } else if (NumMemOps == 1) {
+        // Try folding preceeding/trailing base inc/dec into the single
+        // load/store.
+        if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
+          ++NumMerges;
+          RS->forward(prior(MBBI));
+        }
+      }
+
+      CurrBase = 0;
+      CurrOpc = -1;
+      CurrSize = 0;
+      CurrPred = ARMCC::AL;
+      CurrPredReg = 0;
+      if (NumMemOps) {
+        MemOps.clear();
+        NumMemOps = 0;
+      }
+
+      // If iterator hasn't been advanced and this is not a memory op, skip it.
+      // It can't start a new chain anyway.
+      if (!Advance && !isMemOp && MBBI != E) {
+        ++Position;
+        ++MBBI;
+      }
+    }
+  }
+  return NumMerges > 0;
+}
+
+/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
+/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it
+/// directly restore the value of LR into pc.
+///   ldmfd sp!, {..., lr}
+///   bx lr
+/// or
+///   ldmfd sp!, {..., lr}
+///   mov pc, lr
+/// =>
+///   ldmfd sp!, {..., pc}
+bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
+  if (MBB.empty()) return false;
+
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  if (MBBI != MBB.begin() &&
+      (MBBI->getOpcode() == ARM::BX_RET ||
+       MBBI->getOpcode() == ARM::tBX_RET ||
+       MBBI->getOpcode() == ARM::MOVPCLR)) {
+    MachineInstr *PrevMI = prior(MBBI);
+    unsigned Opcode = PrevMI->getOpcode();
+    if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
+        Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
+        Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
+      MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
+      if (MO.getReg() != ARM::LR)
+        return false;
+      unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
+      assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
+              Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
+      PrevMI->setDesc(TII->get(NewOpc));
+      MO.setReg(ARM::PC);
+      PrevMI->copyImplicitOps(&*MBBI);
+      MBB.erase(MBBI);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+  const TargetMachine &TM = Fn.getTarget();
+  AFI = Fn.getInfo<ARMFunctionInfo>();
+  TII = TM.getInstrInfo();
+  TRI = TM.getRegisterInfo();
+  RS = new RegScavenger();
+  isThumb2 = AFI->isThumb2Function();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    Modified |= LoadStoreMultipleOpti(MBB);
+    if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
+      Modified |= MergeReturnIntoLDM(MBB);
+  }
+
+  delete RS;
+  return Modified;
+}
+
+
+/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
+/// load / stores from consecutive locations close to make it more
+/// likely they will be combined later.
+
+namespace {
+  struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
+    static char ID;
+    ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
+
+    const TargetData *TD;
+    const TargetInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    const ARMSubtarget *STI;
+    MachineRegisterInfo *MRI;
+    MachineFunction *MF;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM pre- register allocation load / store optimization pass";
+    }
+
+  private:
+    bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
+                          unsigned &NewOpc, unsigned &EvenReg,
+                          unsigned &OddReg, unsigned &BaseReg,
+                          int &Offset,
+                          unsigned &PredReg, ARMCC::CondCodes &Pred,
+                          bool &isT2);
+    bool RescheduleOps(MachineBasicBlock *MBB,
+                       SmallVector<MachineInstr*, 4> &Ops,
+                       unsigned Base, bool isLd,
+                       DenseMap<MachineInstr*, unsigned> &MI2LocMap);
+    bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
+  };
+  char ARMPreAllocLoadStoreOpt::ID = 0;
+}
+
+bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+  TD  = Fn.getTarget().getTargetData();
+  TII = Fn.getTarget().getInstrInfo();
+  TRI = Fn.getTarget().getRegisterInfo();
+  STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
+  MRI = &Fn.getRegInfo();
+  MF  = &Fn;
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI)
+    Modified |= RescheduleLoadStoreInstrs(MFI);
+
+  return Modified;
+}
+
+static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
+                                      MachineBasicBlock::iterator I,
+                                      MachineBasicBlock::iterator E,
+                                      SmallPtrSet<MachineInstr*, 4> &MemOps,
+                                      SmallSet<unsigned, 4> &MemRegs,
+                                      const TargetRegisterInfo *TRI) {
+  // Are there stores / loads / calls between them?
+  // FIXME: This is overly conservative. We should make use of alias information
+  // some day.
+  SmallSet<unsigned, 4> AddedRegPressure;
+  while (++I != E) {
+    if (I->isDebugValue() || MemOps.count(&*I))
+      continue;
+    const TargetInstrDesc &TID = I->getDesc();
+    if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects())
+      return false;
+    if (isLd && TID.mayStore())
+      return false;
+    if (!isLd) {
+      if (TID.mayLoad())
+        return false;
+      // It's not safe to move the first 'str' down.
+      // str r1, [r0]
+      // strh r5, [r0]
+      // str r4, [r0, #+4]
+      if (TID.mayStore())
+        return false;
+    }
+    for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
+      MachineOperand &MO = I->getOperand(j);
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (MO.isDef() && TRI->regsOverlap(Reg, Base))
+        return false;
+      if (Reg != Base && !MemRegs.count(Reg))
+        AddedRegPressure.insert(Reg);
+    }
+  }
+
+  // Estimate register pressure increase due to the transformation.
+  if (MemRegs.size() <= 4)
+    // Ok if we are moving small number of instructions.
+    return true;
+  return AddedRegPressure.size() <= MemRegs.size() * 2;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
+                                          DebugLoc &dl,
+                                          unsigned &NewOpc, unsigned &EvenReg,
+                                          unsigned &OddReg, unsigned &BaseReg,
+                                          int &Offset, unsigned &PredReg,
+                                          ARMCC::CondCodes &Pred,
+                                          bool &isT2) {
+  // Make sure we're allowed to generate LDRD/STRD.
+  if (!STI->hasV5TEOps())
+    return false;
+
+  // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
+  unsigned Scale = 1;
+  unsigned Opcode = Op0->getOpcode();
+  if (Opcode == ARM::LDRi12)
+    NewOpc = ARM::LDRD;
+  else if (Opcode == ARM::STRi12)
+    NewOpc = ARM::STRD;
+  else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
+    NewOpc = ARM::t2LDRDi8;
+    Scale = 4;
+    isT2 = true;
+  } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
+    NewOpc = ARM::t2STRDi8;
+    Scale = 4;
+    isT2 = true;
+  } else
+    return false;
+
+  // Make sure the base address satisfies i64 ld / st alignment requirement.
+  if (!Op0->hasOneMemOperand() ||
+      !(*Op0->memoperands_begin())->getValue() ||
+      (*Op0->memoperands_begin())->isVolatile())
+    return false;
+
+  unsigned Align = (*Op0->memoperands_begin())->getAlignment();
+  const Function *Func = MF->getFunction();
+  unsigned ReqAlign = STI->hasV6Ops()
+    ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
+    : 8;  // Pre-v6 need 8-byte align
+  if (Align < ReqAlign)
+    return false;
+
+  // Then make sure the immediate offset fits.
+  int OffImm = getMemoryOpOffset(Op0);
+  if (isT2) {
+    if (OffImm < 0) {
+      if (OffImm < -255)
+        // Can't fall back to t2LDRi8 / t2STRi8.
+        return false;
+    } else {
+      int Limit = (1 << 8) * Scale;
+      if (OffImm >= Limit || (OffImm & (Scale-1)))
+        return false;
+    }
+    Offset = OffImm;
+  } else {
+    ARM_AM::AddrOpc AddSub = ARM_AM::add;
+    if (OffImm < 0) {
+      AddSub = ARM_AM::sub;
+      OffImm = - OffImm;
+    }
+    int Limit = (1 << 8) * Scale;
+    if (OffImm >= Limit || (OffImm & (Scale-1)))
+      return false;
+    Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
+  }
+  EvenReg = Op0->getOperand(0).getReg();
+  OddReg  = Op1->getOperand(0).getReg();
+  if (EvenReg == OddReg)
+    return false;
+  BaseReg = Op0->getOperand(1).getReg();
+  Pred = llvm::getInstrPredicate(Op0, PredReg);
+  dl = Op0->getDebugLoc();
+  return true;
+}
+
+namespace {
+  struct OffsetCompare {
+    bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+      int LOffset = getMemoryOpOffset(LHS);
+      int ROffset = getMemoryOpOffset(RHS);
+      assert(LHS == RHS || LOffset != ROffset);
+      return LOffset > ROffset;
+    }
+  };
+}
+
+bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
+                                 SmallVector<MachineInstr*, 4> &Ops,
+                                 unsigned Base, bool isLd,
+                                 DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
+  bool RetVal = false;
+
+  // Sort by offset (in reverse order).
+  std::sort(Ops.begin(), Ops.end(), OffsetCompare());
+
+  // The loads / stores of the same base are in order. Scan them from first to
+  // last and check for the following:
+  // 1. Any def of base.
+  // 2. Any gaps.
+  while (Ops.size() > 1) {
+    unsigned FirstLoc = ~0U;
+    unsigned LastLoc = 0;
+    MachineInstr *FirstOp = 0;
+    MachineInstr *LastOp = 0;
+    int LastOffset = 0;
+    unsigned LastOpcode = 0;
+    unsigned LastBytes = 0;
+    unsigned NumMove = 0;
+    for (int i = Ops.size() - 1; i >= 0; --i) {
+      MachineInstr *Op = Ops[i];
+      unsigned Loc = MI2LocMap[Op];
+      if (Loc <= FirstLoc) {
+        FirstLoc = Loc;
+        FirstOp = Op;
+      }
+      if (Loc >= LastLoc) {
+        LastLoc = Loc;
+        LastOp = Op;
+      }
+
+      unsigned Opcode = Op->getOpcode();
+      if (LastOpcode && Opcode != LastOpcode)
+        break;
+
+      int Offset = getMemoryOpOffset(Op);
+      unsigned Bytes = getLSMultipleTransferSize(Op);
+      if (LastBytes) {
+        if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
+          break;
+      }
+      LastOffset = Offset;
+      LastBytes = Bytes;
+      LastOpcode = Opcode;
+      if (++NumMove == 8) // FIXME: Tune this limit.
+        break;
+    }
+
+    if (NumMove <= 1)
+      Ops.pop_back();
+    else {
+      SmallPtrSet<MachineInstr*, 4> MemOps;
+      SmallSet<unsigned, 4> MemRegs;
+      for (int i = NumMove-1; i >= 0; --i) {
+        MemOps.insert(Ops[i]);
+        MemRegs.insert(Ops[i]->getOperand(0).getReg());
+      }
+
+      // Be conservative, if the instructions are too far apart, don't
+      // move them. We want to limit the increase of register pressure.
+      bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
+      if (DoMove)
+        DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
+                                           MemOps, MemRegs, TRI);
+      if (!DoMove) {
+        for (unsigned i = 0; i != NumMove; ++i)
+          Ops.pop_back();
+      } else {
+        // This is the new location for the loads / stores.
+        MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
+        while (InsertPos != MBB->end()
+               && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
+          ++InsertPos;
+
+        // If we are moving a pair of loads / stores, see if it makes sense
+        // to try to allocate a pair of registers that can form register pairs.
+        MachineInstr *Op0 = Ops.back();
+        MachineInstr *Op1 = Ops[Ops.size()-2];
+        unsigned EvenReg = 0, OddReg = 0;
+        unsigned BaseReg = 0, PredReg = 0;
+        ARMCC::CondCodes Pred = ARMCC::AL;
+        bool isT2 = false;
+        unsigned NewOpc = 0;
+        int Offset = 0;
+        DebugLoc dl;
+        if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
+                                             EvenReg, OddReg, BaseReg,
+                                             Offset, PredReg, Pred, isT2)) {
+          Ops.pop_back();
+          Ops.pop_back();
+
+          // Form the pair instruction.
+          if (isLd) {
+            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
+                                              dl, TII->get(NewOpc))
+              .addReg(EvenReg, RegState::Define)
+              .addReg(OddReg, RegState::Define)
+              .addReg(BaseReg);
+            // FIXME: We're converting from LDRi12 to an insn that still
+            // uses addrmode2, so we need an explicit offset reg. It should
+            // always by reg0 since we're transforming LDRi12s.
+            if (!isT2)
+              MIB.addReg(0);
+            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+            ++NumLDRDFormed;
+          } else {
+            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos,
+                                              dl, TII->get(NewOpc))
+              .addReg(EvenReg)
+              .addReg(OddReg)
+              .addReg(BaseReg);
+            // FIXME: We're converting from LDRi12 to an insn that still
+            // uses addrmode2, so we need an explicit offset reg. It should
+            // always by reg0 since we're transforming STRi12s.
+            if (!isT2)
+              MIB.addReg(0);
+            MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
+            ++NumSTRDFormed;
+          }
+          MBB->erase(Op0);
+          MBB->erase(Op1);
+
+          // Add register allocation hints to form register pairs.
+          MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
+          MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
+        } else {
+          for (unsigned i = 0; i != NumMove; ++i) {
+            MachineInstr *Op = Ops.back();
+            Ops.pop_back();
+            MBB->splice(InsertPos, MBB, Op);
+          }
+        }
+
+        NumLdStMoved += NumMove;
+        RetVal = true;
+      }
+    }
+  }
+
+  return RetVal;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
+  bool RetVal = false;
+
+  DenseMap<MachineInstr*, unsigned> MI2LocMap;
+  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
+  DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
+  SmallVector<unsigned, 4> LdBases;
+  SmallVector<unsigned, 4> StBases;
+
+  unsigned Loc = 0;
+  MachineBasicBlock::iterator MBBI = MBB->begin();
+  MachineBasicBlock::iterator E = MBB->end();
+  while (MBBI != E) {
+    for (; MBBI != E; ++MBBI) {
+      MachineInstr *MI = MBBI;
+      const TargetInstrDesc &TID = MI->getDesc();
+      if (TID.isCall() || TID.isTerminator()) {
+        // Stop at barriers.
+        ++MBBI;
+        break;
+      }
+
+      if (!MI->isDebugValue())
+        MI2LocMap[MI] = ++Loc;
+
+      if (!isMemoryOp(MI))
+        continue;
+      unsigned PredReg = 0;
+      if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
+        continue;
+
+      int Opc = MI->getOpcode();
+      bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
+      unsigned Base = MI->getOperand(1).getReg();
+      int Offset = getMemoryOpOffset(MI);
+
+      bool StopHere = false;
+      if (isLd) {
+        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+          Base2LdsMap.find(Base);
+        if (BI != Base2LdsMap.end()) {
+          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+            if (Offset == getMemoryOpOffset(BI->second[i])) {
+              StopHere = true;
+              break;
+            }
+          }
+          if (!StopHere)
+            BI->second.push_back(MI);
+        } else {
+          SmallVector<MachineInstr*, 4> MIs;
+          MIs.push_back(MI);
+          Base2LdsMap[Base] = MIs;
+          LdBases.push_back(Base);
+        }
+      } else {
+        DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+          Base2StsMap.find(Base);
+        if (BI != Base2StsMap.end()) {
+          for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+            if (Offset == getMemoryOpOffset(BI->second[i])) {
+              StopHere = true;
+              break;
+            }
+          }
+          if (!StopHere)
+            BI->second.push_back(MI);
+        } else {
+          SmallVector<MachineInstr*, 4> MIs;
+          MIs.push_back(MI);
+          Base2StsMap[Base] = MIs;
+          StBases.push_back(Base);
+        }
+      }
+
+      if (StopHere) {
+        // Found a duplicate (a base+offset combination that's seen earlier).
+        // Backtrack.
+        --Loc;
+        break;
+      }
+    }
+
+    // Re-schedule loads.
+    for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
+      unsigned Base = LdBases[i];
+      SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
+      if (Lds.size() > 1)
+        RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
+    }
+
+    // Re-schedule stores.
+    for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
+      unsigned Base = StBases[i];
+      SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
+      if (Sts.size() > 1)
+        RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
+    }
+
+    if (MBBI != E) {
+      Base2LdsMap.clear();
+      Base2StsMap.clear();
+      LdBases.clear();
+      StBases.clear();
+    }
+  }
+
+  return RetVal;
+}
+
+
+/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+/// optimization pass.
+FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
+  if (PreAlloc)
+    return new ARMPreAllocLoadStoreOpt();
+  return new ARMLoadStoreOpt();
+}
diff --git a/final/lib/Target/ARM/ARMMCAsmInfo.cpp b/final/lib/Target/ARM/ARMMCAsmInfo.cpp
new file mode 100644
index 00000000000..a3f89e92f8e
--- /dev/null
+++ b/final/lib/Target/ARM/ARMMCAsmInfo.cpp
@@ -0,0 +1,80 @@
+//===-- ARMMCAsmInfo.cpp - ARM asm properties -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the ARMMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCAsmInfo.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+cl::opt<bool>
+EnableARMEHABI("arm-enable-ehabi", cl::Hidden,
+  cl::desc("Generate ARM EHABI tables"),
+  cl::init(false));
+
+
+static const char *const arm_asm_table[] = {
+  "{r0}", "r0",
+  "{r1}", "r1",
+  "{r2}", "r2",
+  "{r3}", "r3",
+  "{r4}", "r4",
+  "{r5}", "r5",
+  "{r6}", "r6",
+  "{r7}", "r7",
+  "{r8}", "r8",
+  "{r9}", "r9",
+  "{r10}", "r10",
+  "{r11}", "r11",
+  "{r12}", "r12",
+  "{r13}", "r13",
+  "{r14}", "r14",
+  "{lr}", "lr",
+  "{sp}", "sp",
+  "{ip}", "ip",
+  "{fp}", "fp",
+  "{sl}", "sl",
+  "{memory}", "memory",
+  "{cc}", "cc",
+  0,0
+};
+
+ARMMCAsmInfoDarwin::ARMMCAsmInfoDarwin() {
+  AsmTransCBE = arm_asm_table;
+  Data64bitsDirective = 0;
+  CommentString = "@";
+  SupportsDebugInformation = true;
+
+  // Exceptions handling
+  ExceptionsType = ExceptionHandling::SjLj;
+}
+
+ARMELFMCAsmInfo::ARMELFMCAsmInfo() {
+  // ".comm align is in bytes but .align is pow-2."
+  AlignmentIsInBytes = false;
+
+  Data64bitsDirective = 0;
+  CommentString = "@";
+
+  HasLEB128 = true;
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective = "\t.weak\t";
+  HasLCOMMDirective = true;
+
+  DwarfRequiresFrameSection = false;
+
+  SupportsDebugInformation = true;
+
+  // Exceptions handling
+  if (EnableARMEHABI)
+    ExceptionsType = ExceptionHandling::ARM;
+}
diff --git a/final/lib/Target/ARM/ARMMCAsmInfo.h b/final/lib/Target/ARM/ARMMCAsmInfo.h
new file mode 100644
index 00000000000..90f7822ea58
--- /dev/null
+++ b/final/lib/Target/ARM/ARMMCAsmInfo.h
@@ -0,0 +1,31 @@
+//=====-- ARMMCAsmInfo.h - ARM asm properties -------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the ARMMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ARMTARGETASMINFO_H
+#define LLVM_ARMTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+
+  struct ARMMCAsmInfoDarwin : public MCAsmInfoDarwin {
+    explicit ARMMCAsmInfoDarwin();
+  };
+
+  struct ARMELFMCAsmInfo : public MCAsmInfo {
+    explicit ARMELFMCAsmInfo();
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/final/lib/Target/ARM/ARMMCCodeEmitter.cpp b/final/lib/Target/ARM/ARMMCCodeEmitter.cpp
new file mode 100644
index 00000000000..10607b17c53
--- /dev/null
+++ b/final/lib/Target/ARM/ARMMCCodeEmitter.cpp
@@ -0,0 +1,1263 @@
+//===-- ARM/ARMMCCodeEmitter.cpp - Convert ARM code to machine code -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMFixupKinds.h"
+#include "ARMInstrInfo.h"
+#include "ARMMCExpr.h"
+#include "ARMSubtarget.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
+STATISTIC(MCNumCPRelocations, "Number of constant pool relocations created.");
+
+namespace {
+class ARMMCCodeEmitter : public MCCodeEmitter {
+  ARMMCCodeEmitter(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
+  const TargetMachine &TM;
+  const TargetInstrInfo &TII;
+  const ARMSubtarget *Subtarget;
+  MCContext &Ctx;
+
+public:
+  ARMMCCodeEmitter(TargetMachine &tm, MCContext &ctx)
+    : TM(tm), TII(*TM.getInstrInfo()),
+      Subtarget(&TM.getSubtarget<ARMSubtarget>()), Ctx(ctx) {
+  }
+
+  ~ARMMCCodeEmitter() {}
+
+  unsigned getMachineSoImmOpValue(unsigned SoImm) const;
+
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  unsigned getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getMachineOpValue - Return binary encoding of operand. If the machine
+  /// operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getHiLo16ImmOpValue - Return the encoding for the hi / low 16-bit of
+  /// the specified operand. This is used for operands with :lower16: and
+  /// :upper16: prefixes.
+  uint32_t getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  bool EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx,
+                              unsigned &Reg, unsigned &Imm,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbBLTargetOpValue - Return encoding info for Thumb immediate
+  /// BL branch target.
+  uint32_t getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+  /// BLX branch target.
+  uint32_t getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                    SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+  uint32_t getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+  uint32_t getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                    SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+  uint32_t getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getBranchTargetOpValue - Return encoding info for 24-bit immediate
+  /// branch target.
+  uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                  SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+  /// immediate Thumb2 direct branch target.
+  uint32_t getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                  SmallVectorImpl<MCFixup> &Fixups) const;
+  
+  /// getARMBranchTargetOpValue - Return encoding info for 24-bit immediate
+  /// branch target.
+  uint32_t getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAdrLabelOpValue - Return encoding info for 12-bit immediate
+  /// ADR label target.
+  uint32_t getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+  uint32_t getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+  uint32_t getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  /// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12'
+  /// operand.
+  uint32_t getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getThumbAddrModeRegRegOpValue - Return encoding for 'reg + reg' operand.
+  uint32_t getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+                                         SmallVectorImpl<MCFixup> &Fixups)const;
+
+  /// getT2AddrModeImm8s4OpValue - Return encoding info for 'reg +/- imm8<<2'
+  /// operand.
+  uint32_t getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+                                   SmallVectorImpl<MCFixup> &Fixups) const;
+
+
+  /// getLdStSORegOpValue - Return encoding info for 'reg +/- reg shop imm'
+  /// operand as needed by load/store instructions.
+  uint32_t getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getLdStmModeOpValue - Return encoding for load/store multiple mode.
+  uint32_t getLdStmModeOpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const {
+    ARM_AM::AMSubMode Mode = (ARM_AM::AMSubMode)MI.getOperand(OpIdx).getImm();
+    switch (Mode) {
+    default: assert(0 && "Unknown addressing sub-mode!");
+    case ARM_AM::da: return 0;
+    case ARM_AM::ia: return 1;
+    case ARM_AM::db: return 2;
+    case ARM_AM::ib: return 3;
+    }
+  }
+  /// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
+  ///
+  unsigned getShiftOp(ARM_AM::ShiftOpc ShOpc) const {
+    switch (ShOpc) {
+    default: llvm_unreachable("Unknown shift opc!");
+    case ARM_AM::no_shift:
+    case ARM_AM::lsl: return 0;
+    case ARM_AM::lsr: return 1;
+    case ARM_AM::asr: return 2;
+    case ARM_AM::ror:
+    case ARM_AM::rrx: return 3;
+    }
+    return 0;
+  }
+
+  /// getAddrMode2OpValue - Return encoding for addrmode2 operands.
+  uint32_t getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrMode2OffsetOpValue - Return encoding for am2offset operands.
+  uint32_t getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrMode3OffsetOpValue - Return encoding for am3offset operands.
+  uint32_t getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrMode3OpValue - Return encoding for addrmode3 operands.
+  uint32_t getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrModeThumbSPOpValue - Return encoding info for 'reg +/- imm12'
+  /// operand.
+  uint32_t getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+  uint32_t getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+                                SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+  uint32_t getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+                                SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getAddrMode5OpValue - Return encoding info for 'reg +/- imm8' operand.
+  uint32_t getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getCCOutOpValue - Return encoding of the 's' bit.
+  unsigned getCCOutOpValue(const MCInst &MI, unsigned Op,
+                           SmallVectorImpl<MCFixup> &Fixups) const {
+    // The operand is either reg0 or CPSR. The 's' bit is encoded as '0' or
+    // '1' respectively.
+    return MI.getOperand(Op).getReg() == ARM::CPSR;
+  }
+
+  /// getSOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+  unsigned getSOImmOpValue(const MCInst &MI, unsigned Op,
+                           SmallVectorImpl<MCFixup> &Fixups) const {
+    unsigned SoImm = MI.getOperand(Op).getImm();
+    int SoImmVal = ARM_AM::getSOImmVal(SoImm);
+    assert(SoImmVal != -1 && "Not a valid so_imm value!");
+
+    // Encode rotate_imm.
+    unsigned Binary = (ARM_AM::getSOImmValRot((unsigned)SoImmVal) >> 1)
+      << ARMII::SoRotImmShift;
+
+    // Encode immed_8.
+    Binary |= ARM_AM::getSOImmValImm((unsigned)SoImmVal);
+    return Binary;
+  }
+
+  /// getT2SOImmOpValue - Return an encoded 12-bit shifted-immediate value.
+  unsigned getT2SOImmOpValue(const MCInst &MI, unsigned Op,
+                           SmallVectorImpl<MCFixup> &Fixups) const {
+    unsigned SoImm = MI.getOperand(Op).getImm();
+    unsigned Encoded =  ARM_AM::getT2SOImmVal(SoImm);
+    assert(Encoded != ~0U && "Not a Thumb2 so_imm value?");
+    return Encoded;
+  }
+
+  unsigned getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+    SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+    SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+    SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+    SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getSORegOpValue - Return an encoded so_reg shifted register value.
+  unsigned getSORegOpValue(const MCInst &MI, unsigned Op,
+                           SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getT2SORegOpValue(const MCInst &MI, unsigned Op,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned getRotImmOpValue(const MCInst &MI, unsigned Op,
+                            SmallVectorImpl<MCFixup> &Fixups) const {
+    switch (MI.getOperand(Op).getImm()) {
+    default: assert (0 && "Not a valid rot_imm value!");
+    case 0:  return 0;
+    case 8:  return 1;
+    case 16: return 2;
+    case 24: return 3;
+    }
+  }
+
+  unsigned getImmMinusOneOpValue(const MCInst &MI, unsigned Op,
+                                 SmallVectorImpl<MCFixup> &Fixups) const {
+    return MI.getOperand(Op).getImm() - 1;
+  }
+
+  unsigned getNEONVcvtImm32OpValue(const MCInst &MI, unsigned Op,
+                                   SmallVectorImpl<MCFixup> &Fixups) const {
+    return 64 - MI.getOperand(Op).getImm();
+  }
+
+  unsigned getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+                                      SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned getMsbOpValue(const MCInst &MI, unsigned Op,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned getRegisterListOpValue(const MCInst &MI, unsigned Op,
+                                  SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+                                      SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+                                        SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+                                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned getShiftRight8Imm(const MCInst &MI, unsigned Op,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getShiftRight16Imm(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getShiftRight32Imm(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getShiftRight64Imm(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups) const;
+
+  unsigned NEONThumb2DataIPostEncoder(const MCInst &MI,
+                                      unsigned EncodedValue) const;
+  unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+                                          unsigned EncodedValue) const;
+  unsigned NEONThumb2DupPostEncoder(const MCInst &MI,
+                                    unsigned EncodedValue) const;
+
+  unsigned VFPThumb2PostEncoder(const MCInst &MI,
+                                unsigned EncodedValue) const;
+
+  void EmitByte(unsigned char C, raw_ostream &OS) const {
+    OS << (char)C;
+  }
+
+  void EmitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) const {
+    // Output the constant in little endian byte order.
+    for (unsigned i = 0; i != Size; ++i) {
+      EmitByte(Val & 255, OS);
+      Val >>= 8;
+    }
+  }
+
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createARMMCCodeEmitter(const Target &, TargetMachine &TM,
+                                            MCContext &Ctx) {
+  return new ARMMCCodeEmitter(TM, Ctx);
+}
+
+/// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI,
+                                                 unsigned EncodedValue) const {
+  if (Subtarget->isThumb2()) {
+    // NEON Thumb2 data-processsing encodings are very simple: bit 24 is moved
+    // to bit 12 of the high half-word (i.e. bit 28), and bits 27-24 are
+    // set to 1111.
+    unsigned Bit24 = EncodedValue & 0x01000000;
+    unsigned Bit28 = Bit24 << 4;
+    EncodedValue &= 0xEFFFFFFF;
+    EncodedValue |= Bit28;
+    EncodedValue |= 0x0F000000;
+  }
+
+  return EncodedValue;
+}
+
+/// NEONThumb2LoadStorePostEncoder - Post-process encoded NEON load/store
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI,
+                                                 unsigned EncodedValue) const {
+  if (Subtarget->isThumb2()) {
+    EncodedValue &= 0xF0FFFFFF;
+    EncodedValue |= 0x09000000;
+  }
+
+  return EncodedValue;
+}
+
+/// NEONThumb2DupPostEncoder - Post-process encoded NEON vdup
+/// instructions, and rewrite them to their Thumb2 form if we are currently in
+/// Thumb2 mode.
+unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI,
+                                                 unsigned EncodedValue) const {
+  if (Subtarget->isThumb2()) {
+    EncodedValue &= 0x00FFFFFF;
+    EncodedValue |= 0xEE000000;
+  }
+
+  return EncodedValue;
+}
+
+/// VFPThumb2PostEncoder - Post-process encoded VFP instructions and rewrite
+/// them to their Thumb2 form if we are currently in Thumb2 mode.
+unsigned ARMMCCodeEmitter::
+VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const {
+  if (Subtarget->isThumb2()) {
+    EncodedValue &= 0x0FFFFFFF;
+    EncodedValue |= 0xE0000000;
+  }
+  return EncodedValue;
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned ARMMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg()) {
+    unsigned Reg = MO.getReg();
+    unsigned RegNo = getARMRegisterNumbering(Reg);
+
+    // Q registers are encoded as 2x their register number.
+    switch (Reg) {
+    default:
+      return RegNo;
+    case ARM::Q0:  case ARM::Q1:  case ARM::Q2:  case ARM::Q3:
+    case ARM::Q4:  case ARM::Q5:  case ARM::Q6:  case ARM::Q7:
+    case ARM::Q8:  case ARM::Q9:  case ARM::Q10: case ARM::Q11:
+    case ARM::Q12: case ARM::Q13: case ARM::Q14: case ARM::Q15:
+      return 2 * RegNo;
+    }
+  } else if (MO.isImm()) {
+    return static_cast<unsigned>(MO.getImm());
+  } else if (MO.isFPImm()) {
+    return static_cast<unsigned>(APFloat(MO.getFPImm())
+                     .bitcastToAPInt().getHiBits(32).getLimitedValue());
+  }
+
+  llvm_unreachable("Unable to encode MCOperand!");
+  return 0;
+}
+
+/// getAddrModeImmOpValue - Return encoding info for 'reg +/- imm' operand.
+bool ARMMCCodeEmitter::
+EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx, unsigned &Reg,
+                       unsigned &Imm, SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO  = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+
+  Reg = getARMRegisterNumbering(MO.getReg());
+
+  int32_t SImm = MO1.getImm();
+  bool isAdd = true;
+
+  // Special value for #-0
+  if (SImm == INT32_MIN)
+    SImm = 0;
+
+  // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+  if (SImm < 0) {
+    SImm = -SImm;
+    isAdd = false;
+  }
+
+  Imm = SImm;
+  return isAdd;
+}
+
+/// getBranchTargetOpValue - Helper function to get the branch target operand,
+/// which is either an immediate or requires a fixup.
+static uint32_t getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                                       unsigned FixupKind,
+                                       SmallVectorImpl<MCFixup> &Fixups) {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+
+  // If the destination is an immediate, we have nothing to do.
+  if (MO.isImm()) return MO.getImm();
+  assert(MO.isExpr() && "Unexpected branch target type!");
+  const MCExpr *Expr = MO.getExpr();
+  MCFixupKind Kind = MCFixupKind(FixupKind);
+  Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+  // All of the information is in the fixup.
+  return 0;
+}
+
+/// getThumbBLTargetOpValue - Return encoding info for immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bl, Fixups);
+}
+
+/// getThumbBLXTargetOpValue - Return encoding info for Thumb immediate
+/// BLX branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBLXTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_blx, Fixups);
+}
+
+/// getThumbBRTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBRTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_br, Fixups);
+}
+
+/// getThumbBCCTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbBCCTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_bcc, Fixups);
+}
+
+/// getThumbCBTargetOpValue - Return encoding info for Thumb branch target.
+uint32_t ARMMCCodeEmitter::
+getThumbCBTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cb, Fixups);
+}
+
+/// Return true if this branch has a non-always predication
+static bool HasConditionalBranch(const MCInst &MI) {
+  int NumOp = MI.getNumOperands();
+  if (NumOp >= 2) {
+    for (int i = 0; i < NumOp-1; ++i) {
+      const MCOperand &MCOp1 = MI.getOperand(i);
+      const MCOperand &MCOp2 = MI.getOperand(i + 1);
+      if (MCOp1.isImm() && MCOp2.isReg() && 
+          (MCOp2.getReg() == 0 || MCOp2.getReg() == ARM::CPSR)) {
+        if (ARMCC::CondCodes(MCOp1.getImm()) != ARMCC::AL) 
+          return true;
+      }
+    }
+  }
+  return false;
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                       SmallVectorImpl<MCFixup> &Fixups) const {
+  // FIXME: This really, really shouldn't use TargetMachine. We don't want
+  // coupling between MC and TM anywhere we can help it.
+  if (Subtarget->isThumb2())
+    return
+      ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_condbranch, Fixups);
+  return getARMBranchTargetOpValue(MI, OpIdx, Fixups);
+}
+
+/// getBranchTargetOpValue - Return encoding info for 24-bit immediate branch
+/// target.
+uint32_t ARMMCCodeEmitter::
+getARMBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                          SmallVectorImpl<MCFixup> &Fixups) const {
+  if (HasConditionalBranch(MI)) 
+    return ::getBranchTargetOpValue(MI, OpIdx,
+                                    ARM::fixup_arm_condbranch, Fixups);
+  return ::getBranchTargetOpValue(MI, OpIdx, 
+                                  ARM::fixup_arm_uncondbranch, Fixups);
+}
+
+
+
+
+/// getUnconditionalBranchTargetOpValue - Return encoding info for 24-bit
+/// immediate branch target.
+uint32_t ARMMCCodeEmitter::
+getUnconditionalBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
+                       SmallVectorImpl<MCFixup> &Fixups) const {
+  unsigned Val =
+    ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_uncondbranch, Fixups);
+  bool I  = (Val & 0x800000);
+  bool J1 = (Val & 0x400000);
+  bool J2 = (Val & 0x200000);
+  if (I ^ J1)
+    Val &= ~0x400000;
+  else
+    Val |= 0x400000;
+
+  if (I ^ J2)
+    Val &= ~0x200000;
+  else
+    Val |= 0x200000;
+
+  return Val;
+}
+
+/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_adr_pcrel_12,
+                                  Fixups);
+}
+
+/// getAdrLabelOpValue - Return encoding info for 12-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getT2AdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_adr_pcrel_12,
+                                  Fixups);
+}
+
+/// getAdrLabelOpValue - Return encoding info for 8-bit immediate ADR label
+/// target.
+uint32_t ARMMCCodeEmitter::
+getThumbAdrLabelOpValue(const MCInst &MI, unsigned OpIdx,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  assert(MI.getOperand(OpIdx).isExpr() && "Unexpected adr target type!");
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_thumb_adr_pcrel_10,
+                                  Fixups);
+}
+
+/// getThumbAddrModeRegRegOpValue - Return encoding info for 'reg + reg'
+/// operand.
+uint32_t ARMMCCodeEmitter::
+getThumbAddrModeRegRegOpValue(const MCInst &MI, unsigned OpIdx,
+                              SmallVectorImpl<MCFixup> &) const {
+  // [Rn, Rm]
+  //   {5-3} = Rm
+  //   {2-0} = Rn
+  const MCOperand &MO1 = MI.getOperand(OpIdx);
+  const MCOperand &MO2 = MI.getOperand(OpIdx + 1);
+  unsigned Rn = getARMRegisterNumbering(MO1.getReg());
+  unsigned Rm = getARMRegisterNumbering(MO2.getReg());
+  return (Rm << 3) | Rn;
+}
+
+/// getAddrModeImm12OpValue - Return encoding info for 'reg +/- imm12' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups) const {
+  // {17-13} = reg
+  // {12}    = (U)nsigned (add == '1', sub == '0')
+  // {11-0}  = imm12
+  unsigned Reg, Imm12;
+  bool isAdd = true;
+  // If The first operand isn't a register, we have a label reference.
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (!MO.isReg()) {
+    Reg = getARMRegisterNumbering(ARM::PC);   // Rn is PC.
+    Imm12 = 0;
+    isAdd = false ; // 'U' bit is set as part of the fixup.
+
+    assert(MO.isExpr() && "Unexpected machine operand type!");
+    const MCExpr *Expr = MO.getExpr();
+
+    MCFixupKind Kind;
+    if (Subtarget->isThumb2())
+      Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
+    else
+      Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
+    Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+    ++MCNumCPRelocations;
+  } else
+    isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm12, Fixups);
+
+  uint32_t Binary = Imm12 & 0xfff;
+  // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+  if (isAdd)
+    Binary |= (1 << 12);
+  Binary |= (Reg << 13);
+  return Binary;
+}
+
+/// getT2AddrModeImm8s4OpValue - Return encoding info for
+/// 'reg +/- imm8<<2' operand.
+uint32_t ARMMCCodeEmitter::
+getT2AddrModeImm8s4OpValue(const MCInst &MI, unsigned OpIdx,
+                        SmallVectorImpl<MCFixup> &Fixups) const {
+  // {12-9} = reg
+  // {8}    = (U)nsigned (add == '1', sub == '0')
+  // {7-0}  = imm8
+  unsigned Reg, Imm8;
+  bool isAdd = true;
+  // If The first operand isn't a register, we have a label reference.
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (!MO.isReg()) {
+    Reg = getARMRegisterNumbering(ARM::PC);   // Rn is PC.
+    Imm8 = 0;
+    isAdd = false ; // 'U' bit is set as part of the fixup.
+
+    assert(MO.isExpr() && "Unexpected machine operand type!");
+    const MCExpr *Expr = MO.getExpr();
+    MCFixupKind Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
+    Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+    ++MCNumCPRelocations;
+  } else
+    isAdd = EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+
+  uint32_t Binary = (Imm8 >> 2) & 0xff;
+  // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+  if (isAdd)
+    Binary |= (1 << 8);
+  Binary |= (Reg << 9);
+  return Binary;
+}
+
+// FIXME: This routine assumes that a binary
+// expression will always result in a PCRel expression
+// In reality, its only true if one or more subexpressions
+// is itself a PCRel (i.e. "." in asm or some other pcrel construct)
+// but this is good enough for now.
+static bool EvaluateAsPCRel(const MCExpr *Expr) {
+  switch (Expr->getKind()) {
+  default: assert(0 && "Unexpected expression type");
+  case MCExpr::SymbolRef: return false;
+  case MCExpr::Binary: return true;
+  }
+}
+
+uint32_t
+ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
+                                      SmallVectorImpl<MCFixup> &Fixups) const {
+  // {20-16} = imm{15-12}
+  // {11-0}  = imm{11-0}
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (MO.isImm())
+    // Hi / lo 16 bits already extracted during earlier passes.
+    return static_cast<unsigned>(MO.getImm());
+
+  // Handle :upper16: and :lower16: assembly prefixes.
+  const MCExpr *E = MO.getExpr();
+  if (E->getKind() == MCExpr::Target) {
+    const ARMMCExpr *ARM16Expr = cast<ARMMCExpr>(E);
+    E = ARM16Expr->getSubExpr();
+
+    MCFixupKind Kind;
+    switch (ARM16Expr->getKind()) {
+    default: assert(0 && "Unsupported ARMFixup");
+    case ARMMCExpr::VK_ARM_HI16:
+      if (!Subtarget->isTargetDarwin() && EvaluateAsPCRel(E))
+        Kind = MCFixupKind(Subtarget->isThumb2()
+                           ? ARM::fixup_t2_movt_hi16_pcrel
+                           : ARM::fixup_arm_movt_hi16_pcrel);
+      else
+        Kind = MCFixupKind(Subtarget->isThumb2()
+                           ? ARM::fixup_t2_movt_hi16
+                           : ARM::fixup_arm_movt_hi16);
+      break;
+    case ARMMCExpr::VK_ARM_LO16:
+      if (!Subtarget->isTargetDarwin() && EvaluateAsPCRel(E))
+        Kind = MCFixupKind(Subtarget->isThumb2()
+                           ? ARM::fixup_t2_movw_lo16_pcrel
+                           : ARM::fixup_arm_movw_lo16_pcrel);
+      else
+        Kind = MCFixupKind(Subtarget->isThumb2()
+                           ? ARM::fixup_t2_movw_lo16
+                           : ARM::fixup_arm_movw_lo16);
+      break;
+    }
+    Fixups.push_back(MCFixup::Create(0, E, Kind));
+    return 0;
+  };
+
+  llvm_unreachable("Unsupported MCExpr type in MCOperand!");
+  return 0;
+}
+
+uint32_t ARMMCCodeEmitter::
+getLdStSORegOpValue(const MCInst &MI, unsigned OpIdx,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+  const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+  unsigned Rn = getARMRegisterNumbering(MO.getReg());
+  unsigned Rm = getARMRegisterNumbering(MO1.getReg());
+  unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm());
+  bool isAdd = ARM_AM::getAM2Op(MO2.getImm()) == ARM_AM::add;
+  ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(MO2.getImm());
+  unsigned SBits = getShiftOp(ShOp);
+
+  // {16-13} = Rn
+  // {12}    = isAdd
+  // {11-0}  = shifter
+  //  {3-0}  = Rm
+  //  {4}    = 0
+  //  {6-5}  = type
+  //  {11-7} = imm
+  uint32_t Binary = Rm;
+  Binary |= Rn << 13;
+  Binary |= SBits << 5;
+  Binary |= ShImm << 7;
+  if (isAdd)
+    Binary |= 1 << 12;
+  return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OpValue(const MCInst &MI, unsigned OpIdx,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  // {17-14}  Rn
+  // {13}     1 == imm12, 0 == Rm
+  // {12}     isAdd
+  // {11-0}   imm12/Rm
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  unsigned Rn = getARMRegisterNumbering(MO.getReg());
+  uint32_t Binary = getAddrMode2OffsetOpValue(MI, OpIdx + 1, Fixups);
+  Binary |= Rn << 14;
+  return Binary;
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode2OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+                          SmallVectorImpl<MCFixup> &Fixups) const {
+  // {13}     1 == imm12, 0 == Rm
+  // {12}     isAdd
+  // {11-0}   imm12/Rm
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+  unsigned Imm = MO1.getImm();
+  bool isAdd = ARM_AM::getAM2Op(Imm) == ARM_AM::add;
+  bool isReg = MO.getReg() != 0;
+  uint32_t Binary = ARM_AM::getAM2Offset(Imm);
+  // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm12
+  if (isReg) {
+    ARM_AM::ShiftOpc ShOp = ARM_AM::getAM2ShiftOpc(Imm);
+    Binary <<= 7;                    // Shift amount is bits [11:7]
+    Binary |= getShiftOp(ShOp) << 5; // Shift type is bits [6:5]
+    Binary |= getARMRegisterNumbering(MO.getReg()); // Rm is bits [3:0]
+  }
+  return Binary | (isAdd << 12) | (isReg << 13);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OffsetOpValue(const MCInst &MI, unsigned OpIdx,
+                          SmallVectorImpl<MCFixup> &Fixups) const {
+  // {9}      1 == imm8, 0 == Rm
+  // {8}      isAdd
+  // {7-4}    imm7_4/zero
+  // {3-0}    imm3_0/Rm
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+  unsigned Imm = MO1.getImm();
+  bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+  bool isImm = MO.getReg() == 0;
+  uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+  // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+  if (!isImm)
+    Imm8 = getARMRegisterNumbering(MO.getReg());
+  return Imm8 | (isAdd << 8) | (isImm << 9);
+}
+
+uint32_t ARMMCCodeEmitter::
+getAddrMode3OpValue(const MCInst &MI, unsigned OpIdx,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  // {13}     1 == imm8, 0 == Rm
+  // {12-9}   Rn
+  // {8}      isAdd
+  // {7-4}    imm7_4/zero
+  // {3-0}    imm3_0/Rm
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx+1);
+  const MCOperand &MO2 = MI.getOperand(OpIdx+2);
+  unsigned Rn = getARMRegisterNumbering(MO.getReg());
+  unsigned Imm = MO2.getImm();
+  bool isAdd = ARM_AM::getAM3Op(Imm) == ARM_AM::add;
+  bool isImm = MO1.getReg() == 0;
+  uint32_t Imm8 = ARM_AM::getAM3Offset(Imm);
+  // if reg +/- reg, Rm will be non-zero. Otherwise, we have reg +/- imm8
+  if (!isImm)
+    Imm8 = getARMRegisterNumbering(MO1.getReg());
+  return (Rn << 9) | Imm8 | (isAdd << 8) | (isImm << 13);
+}
+
+/// getAddrModeThumbSPOpValue - Encode the t_addrmode_sp operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeThumbSPOpValue(const MCInst &MI, unsigned OpIdx,
+                          SmallVectorImpl<MCFixup> &Fixups) const {
+  // [SP, #imm]
+  //   {7-0} = imm8
+  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+  assert(MI.getOperand(OpIdx).getReg() == ARM::SP &&
+         "Unexpected base register!");
+
+  // The immediate is already shifted for the implicit zeroes, so no change
+  // here.
+  return MO1.getImm() & 0xff;
+}
+
+/// getAddrModeISOpValue - Encode the t_addrmode_is# operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModeISOpValue(const MCInst &MI, unsigned OpIdx,
+                     SmallVectorImpl<MCFixup> &Fixups) const {
+  // [Rn, #imm]
+  //   {7-3} = imm5
+  //   {2-0} = Rn
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+  unsigned Rn = getARMRegisterNumbering(MO.getReg());
+  unsigned Imm5 = MO1.getImm();
+  return ((Imm5 & 0x1f) << 3) | Rn;
+}
+
+/// getAddrModePCOpValue - Return encoding for t_addrmode_pc operands.
+uint32_t ARMMCCodeEmitter::
+getAddrModePCOpValue(const MCInst &MI, unsigned OpIdx,
+                     SmallVectorImpl<MCFixup> &Fixups) const {
+  return ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_arm_thumb_cp, Fixups);
+}
+
+/// getAddrMode5OpValue - Return encoding info for 'reg +/- imm10' operand.
+uint32_t ARMMCCodeEmitter::
+getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  // {12-9} = reg
+  // {8}    = (U)nsigned (add == '1', sub == '0')
+  // {7-0}  = imm8
+  unsigned Reg, Imm8;
+  bool isAdd;
+  // If The first operand isn't a register, we have a label reference.
+  const MCOperand &MO = MI.getOperand(OpIdx);
+  if (!MO.isReg()) {
+    Reg = getARMRegisterNumbering(ARM::PC);   // Rn is PC.
+    Imm8 = 0;
+    isAdd = false; // 'U' bit is handled as part of the fixup.
+
+    assert(MO.isExpr() && "Unexpected machine operand type!");
+    const MCExpr *Expr = MO.getExpr();
+    MCFixupKind Kind;
+    if (Subtarget->isThumb2())
+      Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
+    else
+      Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
+    Fixups.push_back(MCFixup::Create(0, Expr, Kind));
+
+    ++MCNumCPRelocations;
+  } else {
+    EncodeAddrModeOpValues(MI, OpIdx, Reg, Imm8, Fixups);
+    isAdd = ARM_AM::getAM5Op(Imm8) == ARM_AM::add;
+  }
+
+  uint32_t Binary = ARM_AM::getAM5Offset(Imm8);
+  // Immediate is always encoded as positive. The 'U' bit controls add vs sub.
+  if (isAdd)
+    Binary |= (1 << 8);
+  Binary |= (Reg << 9);
+  return Binary;
+}
+
+unsigned ARMMCCodeEmitter::
+getSORegOpValue(const MCInst &MI, unsigned OpIdx,
+                SmallVectorImpl<MCFixup> &Fixups) const {
+  // Sub-operands are [reg, reg, imm]. The first register is Rm, the reg to be
+  // shifted. The second is either Rs, the amount to shift by, or reg0 in which
+  // case the imm contains the amount to shift by.
+  //
+  // {3-0} = Rm.
+  // {4}   = 1 if reg shift, 0 if imm shift
+  // {6-5} = type
+  //    If reg shift:
+  //      {11-8} = Rs
+  //      {7}    = 0
+  //    else (imm shift)
+  //      {11-7} = imm
+
+  const MCOperand &MO  = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+  const MCOperand &MO2 = MI.getOperand(OpIdx + 2);
+  ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO2.getImm());
+
+  // Encode Rm.
+  unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+  // Encode the shift opcode.
+  unsigned SBits = 0;
+  unsigned Rs = MO1.getReg();
+  if (Rs) {
+    // Set shift operand (bit[7:4]).
+    // LSL - 0001
+    // LSR - 0011
+    // ASR - 0101
+    // ROR - 0111
+    // RRX - 0110 and bit[11:8] clear.
+    switch (SOpc) {
+    default: llvm_unreachable("Unknown shift opc!");
+    case ARM_AM::lsl: SBits = 0x1; break;
+    case ARM_AM::lsr: SBits = 0x3; break;
+    case ARM_AM::asr: SBits = 0x5; break;
+    case ARM_AM::ror: SBits = 0x7; break;
+    case ARM_AM::rrx: SBits = 0x6; break;
+    }
+  } else {
+    // Set shift operand (bit[6:4]).
+    // LSL - 000
+    // LSR - 010
+    // ASR - 100
+    // ROR - 110
+    switch (SOpc) {
+    default: llvm_unreachable("Unknown shift opc!");
+    case ARM_AM::lsl: SBits = 0x0; break;
+    case ARM_AM::lsr: SBits = 0x2; break;
+    case ARM_AM::asr: SBits = 0x4; break;
+    case ARM_AM::ror: SBits = 0x6; break;
+    }
+  }
+
+  Binary |= SBits << 4;
+  if (SOpc == ARM_AM::rrx)
+    return Binary;
+
+  // Encode the shift operation Rs or shift_imm (except rrx).
+  if (Rs) {
+    // Encode Rs bit[11:8].
+    assert(ARM_AM::getSORegOffset(MO2.getImm()) == 0);
+    return Binary | (getARMRegisterNumbering(Rs) << ARMII::RegRsShift);
+  }
+
+  // Encode shift_imm bit[11:7].
+  return Binary | ARM_AM::getSORegOffset(MO2.getImm()) << 7;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeSORegOpValue(const MCInst &MI, unsigned OpNum,
+                SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO1 = MI.getOperand(OpNum);
+  const MCOperand &MO2 = MI.getOperand(OpNum+1);
+  const MCOperand &MO3 = MI.getOperand(OpNum+2);
+
+  // Encoded as [Rn, Rm, imm].
+  // FIXME: Needs fixup support.
+  unsigned Value = getARMRegisterNumbering(MO1.getReg());
+  Value <<= 4;
+  Value |= getARMRegisterNumbering(MO2.getReg());
+  Value <<= 2;
+  Value |= MO3.getImm();
+
+  return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OpValue(const MCInst &MI, unsigned OpNum,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO1 = MI.getOperand(OpNum);
+  const MCOperand &MO2 = MI.getOperand(OpNum+1);
+
+  // FIXME: Needs fixup support.
+  unsigned Value = getARMRegisterNumbering(MO1.getReg());
+
+  // Even though the immediate is 8 bits long, we need 9 bits in order
+  // to represent the (inverse of the) sign bit.
+  Value <<= 9;
+  int32_t tmp = (int32_t)MO2.getImm();
+  if (tmp < 0)
+    tmp = abs(tmp);
+  else
+    Value |= 256; // Set the ADD bit
+  Value |= tmp & 255;
+  return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm8OffsetOpValue(const MCInst &MI, unsigned OpNum,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO1 = MI.getOperand(OpNum);
+
+  // FIXME: Needs fixup support.
+  unsigned Value = 0;
+  int32_t tmp = (int32_t)MO1.getImm();
+  if (tmp < 0)
+    tmp = abs(tmp);
+  else
+    Value |= 256; // Set the ADD bit
+  Value |= tmp & 255;
+  return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2AddrModeImm12OffsetOpValue(const MCInst &MI, unsigned OpNum,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO1 = MI.getOperand(OpNum);
+
+  // FIXME: Needs fixup support.
+  unsigned Value = 0;
+  int32_t tmp = (int32_t)MO1.getImm();
+  if (tmp < 0)
+    tmp = abs(tmp);
+  else
+    Value |= 4096; // Set the ADD bit
+  Value |= tmp & 4095;
+  return Value;
+}
+
+unsigned ARMMCCodeEmitter::
+getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
+                SmallVectorImpl<MCFixup> &Fixups) const {
+  // Sub-operands are [reg, imm]. The first register is Rm, the reg to be
+  // shifted. The second is the amount to shift by.
+  //
+  // {3-0} = Rm.
+  // {4}   = 0
+  // {6-5} = type
+  // {11-7} = imm
+
+  const MCOperand &MO  = MI.getOperand(OpIdx);
+  const MCOperand &MO1 = MI.getOperand(OpIdx + 1);
+  ARM_AM::ShiftOpc SOpc = ARM_AM::getSORegShOp(MO1.getImm());
+
+  // Encode Rm.
+  unsigned Binary = getARMRegisterNumbering(MO.getReg());
+
+  // Encode the shift opcode.
+  unsigned SBits = 0;
+  // Set shift operand (bit[6:4]).
+  // LSL - 000
+  // LSR - 010
+  // ASR - 100
+  // ROR - 110
+  switch (SOpc) {
+  default: llvm_unreachable("Unknown shift opc!");
+  case ARM_AM::lsl: SBits = 0x0; break;
+  case ARM_AM::lsr: SBits = 0x2; break;
+  case ARM_AM::asr: SBits = 0x4; break;
+  case ARM_AM::ror: SBits = 0x6; break;
+  }
+
+  Binary |= SBits << 4;
+  if (SOpc == ARM_AM::rrx)
+    return Binary;
+
+  // Encode shift_imm bit[11:7].
+  return Binary | ARM_AM::getSORegOffset(MO1.getImm()) << 7;
+}
+
+unsigned ARMMCCodeEmitter::
+getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
+                               SmallVectorImpl<MCFixup> &Fixups) const {
+  // 10 bits. lower 5 bits are are the lsb of the mask, high five bits are the
+  // msb of the mask.
+  const MCOperand &MO = MI.getOperand(Op);
+  uint32_t v = ~MO.getImm();
+  uint32_t lsb = CountTrailingZeros_32(v);
+  uint32_t msb = (32 - CountLeadingZeros_32 (v)) - 1;
+  assert (v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
+  return lsb | (msb << 5);
+}
+
+unsigned ARMMCCodeEmitter::
+getMsbOpValue(const MCInst &MI, unsigned Op,
+              SmallVectorImpl<MCFixup> &Fixups) const {
+  // MSB - 5 bits.
+  uint32_t lsb = MI.getOperand(Op-1).getImm();
+  uint32_t width = MI.getOperand(Op).getImm();
+  uint32_t msb = lsb+width-1;
+  assert (width != 0 && msb < 32 && "Illegal bit width!");
+  return msb;
+}
+
+unsigned ARMMCCodeEmitter::
+getRegisterListOpValue(const MCInst &MI, unsigned Op,
+                       SmallVectorImpl<MCFixup> &Fixups) const {
+  // VLDM/VSTM:
+  //   {12-8} = Vd
+  //   {7-0}  = Number of registers
+  //
+  // LDM/STM:
+  //   {15-0}  = Bitfield of GPRs.
+  unsigned Reg = MI.getOperand(Op).getReg();
+  bool SPRRegs = ARM::SPRRegClass.contains(Reg);
+  bool DPRRegs = ARM::DPRRegClass.contains(Reg);
+
+  unsigned Binary = 0;
+
+  if (SPRRegs || DPRRegs) {
+    // VLDM/VSTM
+    unsigned RegNo = getARMRegisterNumbering(Reg);
+    unsigned NumRegs = (MI.getNumOperands() - Op) & 0xff;
+    Binary |= (RegNo & 0x1f) << 8;
+    if (SPRRegs)
+      Binary |= NumRegs;
+    else
+      Binary |= NumRegs * 2;
+  } else {
+    for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
+      unsigned RegNo = getARMRegisterNumbering(MI.getOperand(I).getReg());
+      Binary |= 1 << RegNo;
+    }
+  }
+
+  return Binary;
+}
+
+/// getAddrMode6AddressOpValue - Encode an addrmode6 register number along
+/// with the alignment operand.
+unsigned ARMMCCodeEmitter::
+getAddrMode6AddressOpValue(const MCInst &MI, unsigned Op,
+                           SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &Reg = MI.getOperand(Op);
+  const MCOperand &Imm = MI.getOperand(Op + 1);
+
+  unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+  unsigned Align = 0;
+
+  switch (Imm.getImm()) {
+  default: break;
+  case 2:
+  case 4:
+  case 8:  Align = 0x01; break;
+  case 16: Align = 0x02; break;
+  case 32: Align = 0x03; break;
+  }
+
+  return RegNo | (Align << 4);
+}
+
+/// getAddrMode6DupAddressOpValue - Encode an addrmode6 register number and
+/// alignment operand for use in VLD-dup instructions.  This is the same as
+/// getAddrMode6AddressOpValue except for the alignment encoding, which is
+/// different for VLD4-dup.
+unsigned ARMMCCodeEmitter::
+getAddrMode6DupAddressOpValue(const MCInst &MI, unsigned Op,
+                              SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &Reg = MI.getOperand(Op);
+  const MCOperand &Imm = MI.getOperand(Op + 1);
+
+  unsigned RegNo = getARMRegisterNumbering(Reg.getReg());
+  unsigned Align = 0;
+
+  switch (Imm.getImm()) {
+  default: break;
+  case 2:
+  case 4:
+  case 8:  Align = 0x01; break;
+  case 16: Align = 0x03; break;
+  }
+
+  return RegNo | (Align << 4);
+}
+
+unsigned ARMMCCodeEmitter::
+getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op,
+                          SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(Op);
+  if (MO.getReg() == 0) return 0x0D;
+  return MO.getReg();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight8Imm(const MCInst &MI, unsigned Op,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  return 8 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight16Imm(const MCInst &MI, unsigned Op,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  return 16 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight32Imm(const MCInst &MI, unsigned Op,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  return 32 - MI.getOperand(Op).getImm();
+}
+
+unsigned ARMMCCodeEmitter::
+getShiftRight64Imm(const MCInst &MI, unsigned Op,
+                   SmallVectorImpl<MCFixup> &Fixups) const {
+  return 64 - MI.getOperand(Op).getImm();
+}
+
+void ARMMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  // Pseudo instructions don't get encoded.
+  const TargetInstrDesc &Desc = TII.get(MI.getOpcode());
+  uint64_t TSFlags = Desc.TSFlags;
+  if ((TSFlags & ARMII::FormMask) == ARMII::Pseudo)
+    return;
+  int Size;
+  // Basic size info comes from the TSFlags field.
+  switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
+  default: llvm_unreachable("Unexpected instruction size!");
+  case ARMII::Size2Bytes: Size = 2; break;
+  case ARMII::Size4Bytes: Size = 4; break;
+  }
+  uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
+  // Thumb 32-bit wide instructions need to emit the high order halfword
+  // first.
+  if (Subtarget->isThumb() && Size == 4) {
+    EmitConstant(Binary >> 16, 2, OS);
+    EmitConstant(Binary & 0xffff, 2, OS);
+  } else
+    EmitConstant(Binary, Size, OS);
+  ++MCNumEmitted;  // Keep track of the # of mi's emitted.
+}
+
+#include "ARMGenMCCodeEmitter.inc"
diff --git a/final/lib/Target/ARM/ARMMCExpr.cpp b/final/lib/Target/ARM/ARMMCExpr.cpp
new file mode 100644
index 00000000000..2727ba8c8aa
--- /dev/null
+++ b/final/lib/Target/ARM/ARMMCExpr.cpp
@@ -0,0 +1,73 @@
+//===-- ARMMCExpr.cpp - ARM specific MC expression classes ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "armmcexpr"
+#include "ARMMCExpr.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAssembler.h"
+using namespace llvm;
+
+const ARMMCExpr*
+ARMMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+                       MCContext &Ctx) {
+  return new (Ctx) ARMMCExpr(Kind, Expr);
+}
+
+void ARMMCExpr::PrintImpl(raw_ostream &OS) const {
+  switch (Kind) {
+  default: assert(0 && "Invalid kind!");
+  case VK_ARM_HI16: OS << ":upper16:"; break;
+  case VK_ARM_LO16: OS << ":lower16:"; break;
+  }
+
+  const MCExpr *Expr = getSubExpr();
+  if (Expr->getKind() != MCExpr::SymbolRef)
+    OS << '(';
+  Expr->print(OS);
+  if (Expr->getKind() != MCExpr::SymbolRef)
+    OS << ')';
+}
+
+bool
+ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+                                     const MCAsmLayout *Layout) const {
+  return false;
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
+  switch (Value->getKind()) {
+  case MCExpr::Target:
+    assert(0 && "Can't handle nested target expr!");
+    break;
+
+  case MCExpr::Constant:
+    break;
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+    AddValueSymbols_(BE->getLHS(), Asm);
+    AddValueSymbols_(BE->getRHS(), Asm);
+    break;
+  }
+
+  case MCExpr::SymbolRef:
+    Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+    break;
+
+  case MCExpr::Unary:
+    AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+    break;
+  }
+}
+
+void ARMMCExpr::AddValueSymbols(MCAssembler *Asm) const {
+  AddValueSymbols_(getSubExpr(), Asm);
+}
diff --git a/final/lib/Target/ARM/ARMMCExpr.h b/final/lib/Target/ARM/ARMMCExpr.h
new file mode 100644
index 00000000000..d42f766ca91
--- /dev/null
+++ b/final/lib/Target/ARM/ARMMCExpr.h
@@ -0,0 +1,73 @@
+//===-- ARMMCExpr.h - ARM specific MC expression classes ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCEXPR_H
+#define ARMMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class ARMMCExpr : public MCTargetExpr {
+public:
+  enum VariantKind {
+    VK_ARM_None,
+    VK_ARM_HI16,  // The R_ARM_MOVT_ABS relocation (:upper16: in the .s file)
+    VK_ARM_LO16   // The R_ARM_MOVW_ABS_NC relocation (:lower16: in the .s file)
+  };
+
+private:
+  const VariantKind Kind;
+  const MCExpr *Expr;
+
+  explicit ARMMCExpr(VariantKind _Kind, const MCExpr *_Expr)
+    : Kind(_Kind), Expr(_Expr) {}
+  
+public:
+  /// @name Construction
+  /// @{
+
+  static const ARMMCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+                                      MCContext &Ctx);
+
+  static const ARMMCExpr *CreateUpper16(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(VK_ARM_HI16, Expr, Ctx);
+  }
+
+  static const ARMMCExpr *CreateLower16(const MCExpr *Expr, MCContext &Ctx) {
+    return Create(VK_ARM_LO16, Expr, Ctx);
+  }
+
+  /// @}
+  /// @name Accessors
+  /// @{
+
+  /// getOpcode - Get the kind of this expression.
+  VariantKind getKind() const { return Kind; }
+
+  /// getSubExpr - Get the child of this expression.
+  const MCExpr *getSubExpr() const { return Expr; }
+
+  /// @}
+
+  void PrintImpl(raw_ostream &OS) const;
+  bool EvaluateAsRelocatableImpl(MCValue &Res,
+                                 const MCAsmLayout *Layout) const;
+  void AddValueSymbols(MCAssembler *) const;
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Target;
+  }
+  
+  static bool classof(const ARMMCExpr *) { return true; }
+
+};
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/ARM/ARMMCInstLower.cpp b/final/lib/Target/ARM/ARMMCInstLower.cpp
new file mode 100644
index 00000000000..59d60506fc0
--- /dev/null
+++ b/final/lib/Target/ARM/ARMMCInstLower.cpp
@@ -0,0 +1,115 @@
+//===-- ARMMCInstLower.cpp - Convert ARM MachineInstr to an MCInst --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower ARM MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAsmPrinter.h"
+#include "ARMMCExpr.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+                              ARMAsmPrinter &Printer) {
+  MCContext &Ctx = Printer.OutContext;
+  const MCExpr *Expr;
+  switch (MO.getTargetFlags()) {
+  default: {
+    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
+    switch (MO.getTargetFlags()) {
+    default:
+      assert(0 && "Unknown target flag on symbol operand");
+    case 0:
+      break;
+    case ARMII::MO_LO16:
+      Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
+      Expr = ARMMCExpr::CreateLower16(Expr, Ctx);
+      break;
+    case ARMII::MO_HI16:
+      Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
+      Expr = ARMMCExpr::CreateUpper16(Expr, Ctx);
+      break;
+    }
+    break;
+  }
+
+  case ARMII::MO_PLT:
+    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT, Ctx);
+    break;
+  }
+
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(), Ctx),
+                                   Ctx);
+  return MCOperand::CreateExpr(Expr);
+
+}
+
+void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+                                        ARMAsmPrinter &AP) {
+  OutMI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default:
+      MI->dump();
+      assert(0 && "unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all non-CPSR implicit register operands.
+      if (MO.isImplicit() && MO.getReg() != ARM::CPSR) continue;
+      assert(!MO.getSubReg() && "Subregs should be eliminated!");
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                       MO.getMBB()->getSymbol(), AP.OutContext));
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      MCOp = GetSymbolRef(MO, AP.Mang->getSymbol(MO.getGlobal()), AP);
+      break;
+    case MachineOperand::MO_ExternalSymbol:
+      MCOp = GetSymbolRef(MO,
+                          AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP);
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
+      break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP);
+      break;
+    case MachineOperand::MO_FPImmediate: {
+      APFloat Val = MO.getFPImm()->getValueAPF();
+      bool ignored;
+      Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+      MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+      break;
+    }
+    }
+
+    OutMI.addOperand(MCOp);
+  }
+}
diff --git a/final/lib/Target/ARM/ARMMachineFunctionInfo.h b/final/lib/Target/ARM/ARMMachineFunctionInfo.h
new file mode 100644
index 00000000000..138f0c26227
--- /dev/null
+++ b/final/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -0,0 +1,250 @@
+//====- ARMMachineFuctionInfo.h - ARM machine function info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares ARM-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMACHINEFUNCTIONINFO_H
+#define ARMMACHINEFUNCTIONINFO_H
+
+#include "ARMSubtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+
+namespace llvm {
+
+/// ARMFunctionInfo - This class is derived from MachineFunctionInfo and
+/// contains private ARM-specific information for each MachineFunction.
+class ARMFunctionInfo : public MachineFunctionInfo {
+
+  /// isThumb - True if this function is compiled under Thumb mode.
+  /// Used to initialized Align, so must precede it.
+  bool isThumb;
+
+  /// hasThumb2 - True if the target architecture supports Thumb2. Do not use
+  /// to determine if function is compiled under Thumb mode, for that use
+  /// 'isThumb'.
+  bool hasThumb2;
+
+  /// VarArgsRegSaveSize - Size of the register save area for vararg functions.
+  ///
+  unsigned VarArgsRegSaveSize;
+
+  /// HasStackFrame - True if this function has a stack frame. Set by
+  /// processFunctionBeforeCalleeSavedScan().
+  bool HasStackFrame;
+
+  /// RestoreSPFromFP - True if epilogue should restore SP from FP. Set by
+  /// emitPrologue.
+  bool RestoreSPFromFP;
+
+  /// LRSpilledForFarJump - True if the LR register has been for spilled to
+  /// enable far jump.
+  bool LRSpilledForFarJump;
+
+  /// FramePtrSpillOffset - If HasStackFrame, this records the frame pointer
+  /// spill stack offset.
+  unsigned FramePtrSpillOffset;
+
+  /// GPRCS1Offset, GPRCS2Offset, DPRCSOffset - Starting offset of callee saved
+  /// register spills areas. For Mac OS X:
+  ///
+  /// GPR callee-saved (1) : r4, r5, r6, r7, lr
+  /// --------------------------------------------
+  /// GPR callee-saved (2) : r8, r10, r11
+  /// --------------------------------------------
+  /// DPR callee-saved : d8 - d15
+  unsigned GPRCS1Offset;
+  unsigned GPRCS2Offset;
+  unsigned DPRCSOffset;
+
+  /// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
+  /// areas.
+  unsigned GPRCS1Size;
+  unsigned GPRCS2Size;
+  unsigned DPRCSSize;
+
+  /// GPRCS1Frames, GPRCS2Frames, DPRCSFrames - Keeps track of frame indices
+  /// which belong to these spill areas.
+  BitVector GPRCS1Frames;
+  BitVector GPRCS2Frames;
+  BitVector DPRCSFrames;
+
+  /// JumpTableUId - Unique id for jumptables.
+  ///
+  unsigned JumpTableUId;
+
+  unsigned PICLabelUId;
+
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
+  /// HasITBlocks - True if IT blocks have been inserted.
+  bool HasITBlocks;
+
+  /// CPEClones - Track constant pool entries clones created by Constant Island
+  /// pass.
+  DenseMap<unsigned, unsigned> CPEClones;
+
+public:
+  ARMFunctionInfo() :
+    isThumb(false),
+    hasThumb2(false),
+    VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+    LRSpilledForFarJump(false),
+    FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+    GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+    GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
+    JumpTableUId(0), PICLabelUId(0),
+    VarArgsFrameIndex(0), HasITBlocks(false) {}
+
+  explicit ARMFunctionInfo(MachineFunction &MF) :
+    isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
+    hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()),
+    VarArgsRegSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false),
+    LRSpilledForFarJump(false),
+    FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
+    GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
+    GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
+    JumpTableUId(0), PICLabelUId(0),
+    VarArgsFrameIndex(0), HasITBlocks(false) {}
+
+  bool isThumbFunction() const { return isThumb; }
+  bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
+  bool isThumb2Function() const { return isThumb && hasThumb2; }
+
+  unsigned getVarArgsRegSaveSize() const { return VarArgsRegSaveSize; }
+  void setVarArgsRegSaveSize(unsigned s) { VarArgsRegSaveSize = s; }
+
+  bool hasStackFrame() const { return HasStackFrame; }
+  void setHasStackFrame(bool s) { HasStackFrame = s; }
+
+  bool shouldRestoreSPFromFP() const { return RestoreSPFromFP; }
+  void setShouldRestoreSPFromFP(bool s) { RestoreSPFromFP = s; }
+
+  bool isLRSpilledForFarJump() const { return LRSpilledForFarJump; }
+  void setLRIsSpilledForFarJump(bool s) { LRSpilledForFarJump = s; }
+
+  unsigned getFramePtrSpillOffset() const { return FramePtrSpillOffset; }
+  void setFramePtrSpillOffset(unsigned o) { FramePtrSpillOffset = o; }
+
+  unsigned getGPRCalleeSavedArea1Offset() const { return GPRCS1Offset; }
+  unsigned getGPRCalleeSavedArea2Offset() const { return GPRCS2Offset; }
+  unsigned getDPRCalleeSavedAreaOffset()  const { return DPRCSOffset; }
+
+  void setGPRCalleeSavedArea1Offset(unsigned o) { GPRCS1Offset = o; }
+  void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
+  void setDPRCalleeSavedAreaOffset(unsigned o)  { DPRCSOffset = o; }
+
+  unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
+  unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+  unsigned getDPRCalleeSavedAreaSize()  const { return DPRCSSize; }
+
+  void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
+  void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+  void setDPRCalleeSavedAreaSize(unsigned s)  { DPRCSSize = s; }
+
+  bool isGPRCalleeSavedArea1Frame(int fi) const {
+    if (fi < 0 || fi >= (int)GPRCS1Frames.size())
+      return false;
+    return GPRCS1Frames[fi];
+  }
+  bool isGPRCalleeSavedArea2Frame(int fi) const {
+    if (fi < 0 || fi >= (int)GPRCS2Frames.size())
+      return false;
+    return GPRCS2Frames[fi];
+  }
+  bool isDPRCalleeSavedAreaFrame(int fi) const {
+    if (fi < 0 || fi >= (int)DPRCSFrames.size())
+      return false;
+    return DPRCSFrames[fi];
+  }
+
+  void addGPRCalleeSavedArea1Frame(int fi) {
+    if (fi >= 0) {
+      int Size = GPRCS1Frames.size();
+      if (fi >= Size) {
+        Size *= 2;
+        if (fi >= Size)
+          Size = fi+1;
+        GPRCS1Frames.resize(Size);
+      }
+      GPRCS1Frames[fi] = true;
+    }
+  }
+  void addGPRCalleeSavedArea2Frame(int fi) {
+    if (fi >= 0) {
+      int Size = GPRCS2Frames.size();
+      if (fi >= Size) {
+        Size *= 2;
+        if (fi >= Size)
+          Size = fi+1;
+        GPRCS2Frames.resize(Size);
+      }
+      GPRCS2Frames[fi] = true;
+    }
+  }
+  void addDPRCalleeSavedAreaFrame(int fi) {
+    if (fi >= 0) {
+      int Size = DPRCSFrames.size();
+      if (fi >= Size) {
+        Size *= 2;
+        if (fi >= Size)
+          Size = fi+1;
+        DPRCSFrames.resize(Size);
+      }
+      DPRCSFrames[fi] = true;
+    }
+  }
+
+  unsigned createJumpTableUId() {
+    return JumpTableUId++;
+  }
+
+  unsigned getNumJumpTables() const {
+    return JumpTableUId;
+  }
+
+  void initPICLabelUId(unsigned UId) {
+    PICLabelUId = UId;
+  }
+
+  unsigned getNumPICLabels() const {
+    return PICLabelUId;
+  }
+
+  unsigned createPICLabelUId() {
+    return PICLabelUId++;
+  }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+  bool hasITBlocks() const { return HasITBlocks; }
+  void setHasITBlocks(bool h) { HasITBlocks = h; }
+
+  void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
+    if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
+      assert(0 && "Duplicate entries!");
+  }
+
+  unsigned getOriginalCPIdx(unsigned CloneIdx) const {
+    DenseMap<unsigned, unsigned>::const_iterator I = CPEClones.find(CloneIdx);
+    if (I != CPEClones.end())
+      return I->second;
+    else
+      return -1U;
+  }
+};
+} // End llvm namespace
+
+#endif // ARMMACHINEFUNCTIONINFO_H
diff --git a/final/lib/Target/ARM/ARMPerfectShuffle.h b/final/lib/Target/ARM/ARMPerfectShuffle.h
new file mode 100644
index 00000000000..edecc4b0240
--- /dev/null
+++ b/final/lib/Target/ARM/ARMPerfectShuffle.h
@@ -0,0 +1,6586 @@
+//===-- ARMPerfectShuffle.h - NEON Perfect Shuffle Table ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle using neon instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 242 entries have cost 1
+// 1447 entries have cost 2
+// 3602 entries have cost 3
+// 1237 entries have cost 4
+// 2 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+   135053414U,  // <0,0,0,0>: Cost 1 vdup0 LHS
+  1543503974U,  // <0,0,0,1>: Cost 2 vext2 <0,0,0,0>, LHS
+  2618572962U,  // <0,0,0,2>: Cost 3 vext2 <0,2,0,0>, <0,2,0,0>
+  2568054923U,  // <0,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+  1476398390U,  // <0,0,0,4>: Cost 2 vext1 <0,0,0,0>, RHS
+  2550140624U,  // <0,0,0,5>: Cost 3 vext1 <0,0,0,0>, <5,1,7,3>
+  2550141434U,  // <0,0,0,6>: Cost 3 vext1 <0,0,0,0>, <6,2,7,3>
+  2591945711U,  // <0,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+   135053414U,  // <0,0,0,u>: Cost 1 vdup0 LHS
+  2886516736U,  // <0,0,1,0>: Cost 3 vzipl LHS, <0,0,0,0>
+  1812775014U,  // <0,0,1,1>: Cost 2 vzipl LHS, LHS
+  1618133094U,  // <0,0,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+  2625209292U,  // <0,0,1,3>: Cost 3 vext2 <1,3,0,0>, <1,3,0,0>
+  2886558034U,  // <0,0,1,4>: Cost 3 vzipl LHS, <0,4,1,5>
+  2617246864U,  // <0,0,1,5>: Cost 3 vext2 <0,0,0,0>, <1,5,3,7>
+  3659723031U,  // <0,0,1,6>: Cost 4 vext1 <6,0,0,1>, <6,0,0,1>
+  2591953904U,  // <0,0,1,7>: Cost 3 vext1 <7,0,0,1>, <7,0,0,1>
+  1812775581U,  // <0,0,1,u>: Cost 2 vzipl LHS, LHS
+  3020734464U,  // <0,0,2,0>: Cost 3 vtrnl LHS, <0,0,0,0>
+  3020734474U,  // <0,0,2,1>: Cost 3 vtrnl LHS, <0,0,1,1>
+  1946992742U,  // <0,0,2,2>: Cost 2 vtrnl LHS, LHS
+  2631181989U,  // <0,0,2,3>: Cost 3 vext2 <2,3,0,0>, <2,3,0,0>
+  3020734668U,  // <0,0,2,4>: Cost 3 vtrnl LHS, <0,2,4,6>
+  3826550569U,  // <0,0,2,5>: Cost 4 vuzpl <0,2,0,2>, <2,4,5,6>
+  2617247674U,  // <0,0,2,6>: Cost 3 vext2 <0,0,0,0>, <2,6,3,7>
+  2591962097U,  // <0,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+  1946992796U,  // <0,0,2,u>: Cost 2 vtrnl LHS, LHS
+  2635163787U,  // <0,0,3,0>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+  2686419196U,  // <0,0,3,1>: Cost 3 vext3 <0,3,1,0>, <0,3,1,0>
+  2686492933U,  // <0,0,3,2>: Cost 3 vext3 <0,3,2,0>, <0,3,2,0>
+  2617248156U,  // <0,0,3,3>: Cost 3 vext2 <0,0,0,0>, <3,3,3,3>
+  2617248258U,  // <0,0,3,4>: Cost 3 vext2 <0,0,0,0>, <3,4,5,6>
+  3826551298U,  // <0,0,3,5>: Cost 4 vuzpl <0,2,0,2>, <3,4,5,6>
+  3690990200U,  // <0,0,3,6>: Cost 4 vext2 <0,0,0,0>, <3,6,0,7>
+  3713551042U,  // <0,0,3,7>: Cost 4 vext2 <3,7,0,0>, <3,7,0,0>
+  2635163787U,  // <0,0,3,u>: Cost 3 vext2 <3,0,0,0>, <3,0,0,0>
+  2617248658U,  // <0,0,4,0>: Cost 3 vext2 <0,0,0,0>, <4,0,5,1>
+  2888450150U,  // <0,0,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+  3021570150U,  // <0,0,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+  3641829519U,  // <0,0,4,3>: Cost 4 vext1 <3,0,0,4>, <3,0,0,4>
+  3021570252U,  // <0,0,4,4>: Cost 3 vtrnl <0,2,4,6>, <0,2,4,6>
+  1543507254U,  // <0,0,4,5>: Cost 2 vext2 <0,0,0,0>, RHS
+  2752810294U,  // <0,0,4,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+  3786998152U,  // <0,0,4,7>: Cost 4 vext3 <4,7,5,0>, <0,4,7,5>
+  1543507497U,  // <0,0,4,u>: Cost 2 vext2 <0,0,0,0>, RHS
+  2684354972U,  // <0,0,5,0>: Cost 3 vext3 <0,0,0,0>, <0,5,0,7>
+  2617249488U,  // <0,0,5,1>: Cost 3 vext2 <0,0,0,0>, <5,1,7,3>
+  3765617070U,  // <0,0,5,2>: Cost 4 vext3 <1,2,3,0>, <0,5,2,7>
+  3635865780U,  // <0,0,5,3>: Cost 4 vext1 <2,0,0,5>, <3,0,4,5>
+  2617249734U,  // <0,0,5,4>: Cost 3 vext2 <0,0,0,0>, <5,4,7,6>
+  2617249796U,  // <0,0,5,5>: Cost 3 vext2 <0,0,0,0>, <5,5,5,5>
+  2718712274U,  // <0,0,5,6>: Cost 3 vext3 <5,6,7,0>, <0,5,6,7>
+  2617249960U,  // <0,0,5,7>: Cost 3 vext2 <0,0,0,0>, <5,7,5,7>
+  2720039396U,  // <0,0,5,u>: Cost 3 vext3 <5,u,7,0>, <0,5,u,7>
+  2684355053U,  // <0,0,6,0>: Cost 3 vext3 <0,0,0,0>, <0,6,0,7>
+  3963609190U,  // <0,0,6,1>: Cost 4 vzipl <0,6,2,7>, LHS
+  2617250298U,  // <0,0,6,2>: Cost 3 vext2 <0,0,0,0>, <6,2,7,3>
+  3796435464U,  // <0,0,6,3>: Cost 4 vext3 <6,3,7,0>, <0,6,3,7>
+  3659762998U,  // <0,0,6,4>: Cost 4 vext1 <6,0,0,6>, RHS
+  3659763810U,  // <0,0,6,5>: Cost 4 vext1 <6,0,0,6>, <5,6,7,0>
+  2617250616U,  // <0,0,6,6>: Cost 3 vext2 <0,0,0,0>, <6,6,6,6>
+  2657727309U,  // <0,0,6,7>: Cost 3 vext2 <6,7,0,0>, <6,7,0,0>
+  2658390942U,  // <0,0,6,u>: Cost 3 vext2 <6,u,0,0>, <6,u,0,0>
+  2659054575U,  // <0,0,7,0>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+  3635880854U,  // <0,0,7,1>: Cost 4 vext1 <2,0,0,7>, <1,2,3,0>
+  3635881401U,  // <0,0,7,2>: Cost 4 vext1 <2,0,0,7>, <2,0,0,7>
+  3734787298U,  // <0,0,7,3>: Cost 4 vext2 <7,3,0,0>, <7,3,0,0>
+  2617251174U,  // <0,0,7,4>: Cost 3 vext2 <0,0,0,0>, <7,4,5,6>
+  3659772002U,  // <0,0,7,5>: Cost 4 vext1 <6,0,0,7>, <5,6,7,0>
+  3659772189U,  // <0,0,7,6>: Cost 4 vext1 <6,0,0,7>, <6,0,0,7>
+  2617251436U,  // <0,0,7,7>: Cost 3 vext2 <0,0,0,0>, <7,7,7,7>
+  2659054575U,  // <0,0,7,u>: Cost 3 vext2 <7,0,0,0>, <7,0,0,0>
+   135053414U,  // <0,0,u,0>: Cost 1 vdup0 LHS
+  1817419878U,  // <0,0,u,1>: Cost 2 vzipl LHS, LHS
+  1947435110U,  // <0,0,u,2>: Cost 2 vtrnl LHS, LHS
+  2568120467U,  // <0,0,u,3>: Cost 3 vext1 <3,0,0,u>, <3,0,0,u>
+  1476463926U,  // <0,0,u,4>: Cost 2 vext1 <0,0,0,u>, RHS
+  1543510170U,  // <0,0,u,5>: Cost 2 vext2 <0,0,0,0>, RHS
+  2752813210U,  // <0,0,u,6>: Cost 3 vuzpl <0,2,0,2>, RHS
+  2592011255U,  // <0,0,u,7>: Cost 3 vext1 <7,0,0,u>, <7,0,0,u>
+   135053414U,  // <0,0,u,u>: Cost 1 vdup0 LHS
+  2618581002U,  // <0,1,0,0>: Cost 3 vext2 <0,2,0,1>, <0,0,1,1>
+  1557446758U,  // <0,1,0,1>: Cost 2 vext2 <2,3,0,1>, LHS
+  2618581155U,  // <0,1,0,2>: Cost 3 vext2 <0,2,0,1>, <0,2,0,1>
+  2690548468U,  // <0,1,0,3>: Cost 3 vext3 <1,0,3,0>, <1,0,3,0>
+  2626543954U,  // <0,1,0,4>: Cost 3 vext2 <1,5,0,1>, <0,4,1,5>
+  4094985216U,  // <0,1,0,5>: Cost 4 vtrnl <0,2,0,2>, <1,3,5,7>
+  2592019278U,  // <0,1,0,6>: Cost 3 vext1 <7,0,1,0>, <6,7,0,1>
+  2592019448U,  // <0,1,0,7>: Cost 3 vext1 <7,0,1,0>, <7,0,1,0>
+  1557447325U,  // <0,1,0,u>: Cost 2 vext2 <2,3,0,1>, LHS
+  1476476938U,  // <0,1,1,0>: Cost 2 vext1 <0,0,1,1>, <0,0,1,1>
+  2886517556U,  // <0,1,1,1>: Cost 3 vzipl LHS, <1,1,1,1>
+  2886517654U,  // <0,1,1,2>: Cost 3 vzipl LHS, <1,2,3,0>
+  2886517720U,  // <0,1,1,3>: Cost 3 vzipl LHS, <1,3,1,3>
+  1476480310U,  // <0,1,1,4>: Cost 2 vext1 <0,0,1,1>, RHS
+  2886558864U,  // <0,1,1,5>: Cost 3 vzipl LHS, <1,5,3,7>
+  2550223354U,  // <0,1,1,6>: Cost 3 vext1 <0,0,1,1>, <6,2,7,3>
+  2550223856U,  // <0,1,1,7>: Cost 3 vext1 <0,0,1,1>, <7,0,0,1>
+  1476482862U,  // <0,1,1,u>: Cost 2 vext1 <0,0,1,1>, LHS
+  1494401126U,  // <0,1,2,0>: Cost 2 vext1 <3,0,1,2>, LHS
+  3020735284U,  // <0,1,2,1>: Cost 3 vtrnl LHS, <1,1,1,1>
+  2562172349U,  // <0,1,2,2>: Cost 3 vext1 <2,0,1,2>, <2,0,1,2>
+      835584U,  // <0,1,2,3>: Cost 0 copy LHS
+  1494404406U,  // <0,1,2,4>: Cost 2 vext1 <3,0,1,2>, RHS
+  3020735488U,  // <0,1,2,5>: Cost 3 vtrnl LHS, <1,3,5,7>
+  2631190458U,  // <0,1,2,6>: Cost 3 vext2 <2,3,0,1>, <2,6,3,7>
+  1518294010U,  // <0,1,2,7>: Cost 2 vext1 <7,0,1,2>, <7,0,1,2>
+      835584U,  // <0,1,2,u>: Cost 0 copy LHS
+  2692318156U,  // <0,1,3,0>: Cost 3 vext3 <1,3,0,0>, <1,3,0,0>
+  2691875800U,  // <0,1,3,1>: Cost 3 vext3 <1,2,3,0>, <1,3,1,3>
+  2691875806U,  // <0,1,3,2>: Cost 3 vext3 <1,2,3,0>, <1,3,2,0>
+  2692539367U,  // <0,1,3,3>: Cost 3 vext3 <1,3,3,0>, <1,3,3,0>
+  2562182454U,  // <0,1,3,4>: Cost 3 vext1 <2,0,1,3>, RHS
+  2691875840U,  // <0,1,3,5>: Cost 3 vext3 <1,2,3,0>, <1,3,5,7>
+  2692760578U,  // <0,1,3,6>: Cost 3 vext3 <1,3,6,0>, <1,3,6,0>
+  2639817411U,  // <0,1,3,7>: Cost 3 vext2 <3,7,0,1>, <3,7,0,1>
+  2691875863U,  // <0,1,3,u>: Cost 3 vext3 <1,2,3,0>, <1,3,u,3>
+  2568159334U,  // <0,1,4,0>: Cost 3 vext1 <3,0,1,4>, LHS
+  4095312692U,  // <0,1,4,1>: Cost 4 vtrnl <0,2,4,6>, <1,1,1,1>
+  2568160934U,  // <0,1,4,2>: Cost 3 vext1 <3,0,1,4>, <2,3,0,1>
+  2568161432U,  // <0,1,4,3>: Cost 3 vext1 <3,0,1,4>, <3,0,1,4>
+  2568162614U,  // <0,1,4,4>: Cost 3 vext1 <3,0,1,4>, RHS
+  1557450038U,  // <0,1,4,5>: Cost 2 vext2 <2,3,0,1>, RHS
+  2754235702U,  // <0,1,4,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+  2592052220U,  // <0,1,4,7>: Cost 3 vext1 <7,0,1,4>, <7,0,1,4>
+  1557450281U,  // <0,1,4,u>: Cost 2 vext2 <2,3,0,1>, RHS
+  3765617775U,  // <0,1,5,0>: Cost 4 vext3 <1,2,3,0>, <1,5,0,1>
+  2647781007U,  // <0,1,5,1>: Cost 3 vext2 <5,1,0,1>, <5,1,0,1>
+  3704934138U,  // <0,1,5,2>: Cost 4 vext2 <2,3,0,1>, <5,2,3,0>
+  2691875984U,  // <0,1,5,3>: Cost 3 vext3 <1,2,3,0>, <1,5,3,7>
+  2657734598U,  // <0,1,5,4>: Cost 3 vext2 <6,7,0,1>, <5,4,7,6>
+  2650435539U,  // <0,1,5,5>: Cost 3 vext2 <5,5,0,1>, <5,5,0,1>
+  2651099172U,  // <0,1,5,6>: Cost 3 vext2 <5,6,0,1>, <5,6,0,1>
+  2651762805U,  // <0,1,5,7>: Cost 3 vext2 <5,7,0,1>, <5,7,0,1>
+  2691876029U,  // <0,1,5,u>: Cost 3 vext3 <1,2,3,0>, <1,5,u,7>
+  2592063590U,  // <0,1,6,0>: Cost 3 vext1 <7,0,1,6>, LHS
+  3765617871U,  // <0,1,6,1>: Cost 4 vext3 <1,2,3,0>, <1,6,1,7>
+  2654417337U,  // <0,1,6,2>: Cost 3 vext2 <6,2,0,1>, <6,2,0,1>
+  3765617889U,  // <0,1,6,3>: Cost 4 vext3 <1,2,3,0>, <1,6,3,7>
+  2592066870U,  // <0,1,6,4>: Cost 3 vext1 <7,0,1,6>, RHS
+  3765617907U,  // <0,1,6,5>: Cost 4 vext3 <1,2,3,0>, <1,6,5,7>
+  2657071869U,  // <0,1,6,6>: Cost 3 vext2 <6,6,0,1>, <6,6,0,1>
+  1583993678U,  // <0,1,6,7>: Cost 2 vext2 <6,7,0,1>, <6,7,0,1>
+  1584657311U,  // <0,1,6,u>: Cost 2 vext2 <6,u,0,1>, <6,u,0,1>
+  2657735672U,  // <0,1,7,0>: Cost 3 vext2 <6,7,0,1>, <7,0,1,0>
+  2657735808U,  // <0,1,7,1>: Cost 3 vext2 <6,7,0,1>, <7,1,7,1>
+  2631193772U,  // <0,1,7,2>: Cost 3 vext2 <2,3,0,1>, <7,2,3,0>
+  2661053667U,  // <0,1,7,3>: Cost 3 vext2 <7,3,0,1>, <7,3,0,1>
+  2657736038U,  // <0,1,7,4>: Cost 3 vext2 <6,7,0,1>, <7,4,5,6>
+  3721524621U,  // <0,1,7,5>: Cost 4 vext2 <5,1,0,1>, <7,5,1,0>
+  2657736158U,  // <0,1,7,6>: Cost 3 vext2 <6,7,0,1>, <7,6,1,0>
+  2657736300U,  // <0,1,7,7>: Cost 3 vext2 <6,7,0,1>, <7,7,7,7>
+  2657736322U,  // <0,1,7,u>: Cost 3 vext2 <6,7,0,1>, <7,u,1,2>
+  1494450278U,  // <0,1,u,0>: Cost 2 vext1 <3,0,1,u>, LHS
+  1557452590U,  // <0,1,u,1>: Cost 2 vext2 <2,3,0,1>, LHS
+  2754238254U,  // <0,1,u,2>: Cost 3 vuzpl <0,4,1,5>, LHS
+      835584U,  // <0,1,u,3>: Cost 0 copy LHS
+  1494453558U,  // <0,1,u,4>: Cost 2 vext1 <3,0,1,u>, RHS
+  1557452954U,  // <0,1,u,5>: Cost 2 vext2 <2,3,0,1>, RHS
+  2754238618U,  // <0,1,u,6>: Cost 3 vuzpl <0,4,1,5>, RHS
+  1518343168U,  // <0,1,u,7>: Cost 2 vext1 <7,0,1,u>, <7,0,1,u>
+      835584U,  // <0,1,u,u>: Cost 0 copy LHS
+  2752299008U,  // <0,2,0,0>: Cost 3 vuzpl LHS, <0,0,0,0>
+  1544847462U,  // <0,2,0,1>: Cost 2 vext2 <0,2,0,2>, LHS
+  1678557286U,  // <0,2,0,2>: Cost 2 vuzpl LHS, LHS
+  2696521165U,  // <0,2,0,3>: Cost 3 vext3 <2,0,3,0>, <2,0,3,0>
+  2752340172U,  // <0,2,0,4>: Cost 3 vuzpl LHS, <0,2,4,6>
+  2691876326U,  // <0,2,0,5>: Cost 3 vext3 <1,2,3,0>, <2,0,5,7>
+  2618589695U,  // <0,2,0,6>: Cost 3 vext2 <0,2,0,2>, <0,6,2,7>
+  2592093185U,  // <0,2,0,7>: Cost 3 vext1 <7,0,2,0>, <7,0,2,0>
+  1678557340U,  // <0,2,0,u>: Cost 2 vuzpl LHS, LHS
+  2618589942U,  // <0,2,1,0>: Cost 3 vext2 <0,2,0,2>, <1,0,3,2>
+  2752299828U,  // <0,2,1,1>: Cost 3 vuzpl LHS, <1,1,1,1>
+  2886518376U,  // <0,2,1,2>: Cost 3 vzipl LHS, <2,2,2,2>
+  2752299766U,  // <0,2,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+  2550295862U,  // <0,2,1,4>: Cost 3 vext1 <0,0,2,1>, RHS
+  2752340992U,  // <0,2,1,5>: Cost 3 vuzpl LHS, <1,3,5,7>
+  2886559674U,  // <0,2,1,6>: Cost 3 vzipl LHS, <2,6,3,7>
+  3934208106U,  // <0,2,1,7>: Cost 4 vuzpr <7,0,1,2>, <0,1,2,7>
+  2752340771U,  // <0,2,1,u>: Cost 3 vuzpl LHS, <1,0,u,2>
+  1476558868U,  // <0,2,2,0>: Cost 2 vext1 <0,0,2,2>, <0,0,2,2>
+  2226628029U,  // <0,2,2,1>: Cost 3 vrev <2,0,1,2>
+  2752300648U,  // <0,2,2,2>: Cost 3 vuzpl LHS, <2,2,2,2>
+  3020736114U,  // <0,2,2,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+  1476562230U,  // <0,2,2,4>: Cost 2 vext1 <0,0,2,2>, RHS
+  2550304464U,  // <0,2,2,5>: Cost 3 vext1 <0,0,2,2>, <5,1,7,3>
+  2618591162U,  // <0,2,2,6>: Cost 3 vext2 <0,2,0,2>, <2,6,3,7>
+  2550305777U,  // <0,2,2,7>: Cost 3 vext1 <0,0,2,2>, <7,0,0,2>
+  1476564782U,  // <0,2,2,u>: Cost 2 vext1 <0,0,2,2>, LHS
+  2618591382U,  // <0,2,3,0>: Cost 3 vext2 <0,2,0,2>, <3,0,1,2>
+  2752301206U,  // <0,2,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+  3826043121U,  // <0,2,3,2>: Cost 4 vuzpl LHS, <3,1,2,3>
+  2752301468U,  // <0,2,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+  2618591746U,  // <0,2,3,4>: Cost 3 vext2 <0,2,0,2>, <3,4,5,6>
+  2752301570U,  // <0,2,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+  3830688102U,  // <0,2,3,6>: Cost 4 vuzpl LHS, <3,2,6,3>
+  2698807012U,  // <0,2,3,7>: Cost 3 vext3 <2,3,7,0>, <2,3,7,0>
+  2752301269U,  // <0,2,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+  2562261094U,  // <0,2,4,0>: Cost 3 vext1 <2,0,2,4>, LHS
+  4095313828U,  // <0,2,4,1>: Cost 4 vtrnl <0,2,4,6>, <2,6,1,3>
+  2226718152U,  // <0,2,4,2>: Cost 3 vrev <2,0,2,4>
+  2568235169U,  // <0,2,4,3>: Cost 3 vext1 <3,0,2,4>, <3,0,2,4>
+  2562264374U,  // <0,2,4,4>: Cost 3 vext1 <2,0,2,4>, RHS
+  1544850742U,  // <0,2,4,5>: Cost 2 vext2 <0,2,0,2>, RHS
+  1678560566U,  // <0,2,4,6>: Cost 2 vuzpl LHS, RHS
+  2592125957U,  // <0,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+  1678560584U,  // <0,2,4,u>: Cost 2 vuzpl LHS, RHS
+  2691876686U,  // <0,2,5,0>: Cost 3 vext3 <1,2,3,0>, <2,5,0,7>
+  2618592976U,  // <0,2,5,1>: Cost 3 vext2 <0,2,0,2>, <5,1,7,3>
+  3765618528U,  // <0,2,5,2>: Cost 4 vext3 <1,2,3,0>, <2,5,2,7>
+  3765618536U,  // <0,2,5,3>: Cost 4 vext3 <1,2,3,0>, <2,5,3,6>
+  2618593222U,  // <0,2,5,4>: Cost 3 vext2 <0,2,0,2>, <5,4,7,6>
+  2752303108U,  // <0,2,5,5>: Cost 3 vuzpl LHS, <5,5,5,5>
+  2618593378U,  // <0,2,5,6>: Cost 3 vext2 <0,2,0,2>, <5,6,7,0>
+  2824785206U,  // <0,2,5,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+  2824785207U,  // <0,2,5,u>: Cost 3 vuzpr <1,0,3,2>, RHS
+  2752303950U,  // <0,2,6,0>: Cost 3 vuzpl LHS, <6,7,0,1>
+  3830690081U,  // <0,2,6,1>: Cost 4 vuzpl LHS, <6,0,1,2>
+  2618593786U,  // <0,2,6,2>: Cost 3 vext2 <0,2,0,2>, <6,2,7,3>
+  2691876794U,  // <0,2,6,3>: Cost 3 vext3 <1,2,3,0>, <2,6,3,7>
+  2752303990U,  // <0,2,6,4>: Cost 3 vuzpl LHS, <6,7,4,5>
+  3830690445U,  // <0,2,6,5>: Cost 4 vuzpl LHS, <6,4,5,6>
+  2752303928U,  // <0,2,6,6>: Cost 3 vuzpl LHS, <6,6,6,6>
+  2657743695U,  // <0,2,6,7>: Cost 3 vext2 <6,7,0,2>, <6,7,0,2>
+  2691876839U,  // <0,2,6,u>: Cost 3 vext3 <1,2,3,0>, <2,6,u,7>
+  2659070961U,  // <0,2,7,0>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+  2659734594U,  // <0,2,7,1>: Cost 3 vext2 <7,1,0,2>, <7,1,0,2>
+  3734140051U,  // <0,2,7,2>: Cost 4 vext2 <7,2,0,2>, <7,2,0,2>
+  2701166596U,  // <0,2,7,3>: Cost 3 vext3 <2,7,3,0>, <2,7,3,0>
+  2662389094U,  // <0,2,7,4>: Cost 3 vext2 <7,5,0,2>, <7,4,5,6>
+  2662389126U,  // <0,2,7,5>: Cost 3 vext2 <7,5,0,2>, <7,5,0,2>
+  3736794583U,  // <0,2,7,6>: Cost 4 vext2 <7,6,0,2>, <7,6,0,2>
+  2752304748U,  // <0,2,7,7>: Cost 3 vuzpl LHS, <7,7,7,7>
+  2659070961U,  // <0,2,7,u>: Cost 3 vext2 <7,0,0,2>, <7,0,0,2>
+  1476608026U,  // <0,2,u,0>: Cost 2 vext1 <0,0,2,u>, <0,0,2,u>
+  1544853294U,  // <0,2,u,1>: Cost 2 vext2 <0,2,0,2>, LHS
+  1678563118U,  // <0,2,u,2>: Cost 2 vuzpl LHS, LHS
+  3021178482U,  // <0,2,u,3>: Cost 3 vtrnl LHS, <2,2,3,3>
+  1476611382U,  // <0,2,u,4>: Cost 2 vext1 <0,0,2,u>, RHS
+  1544853658U,  // <0,2,u,5>: Cost 2 vext2 <0,2,0,2>, RHS
+  1678563482U,  // <0,2,u,6>: Cost 2 vuzpl LHS, RHS
+  2824785449U,  // <0,2,u,7>: Cost 3 vuzpr <1,0,3,2>, RHS
+  1678563172U,  // <0,2,u,u>: Cost 2 vuzpl LHS, LHS
+  2556329984U,  // <0,3,0,0>: Cost 3 vext1 <1,0,3,0>, <0,0,0,0>
+  2686421142U,  // <0,3,0,1>: Cost 3 vext3 <0,3,1,0>, <3,0,1,2>
+  2562303437U,  // <0,3,0,2>: Cost 3 vext1 <2,0,3,0>, <2,0,3,0>
+  4094986652U,  // <0,3,0,3>: Cost 4 vtrnl <0,2,0,2>, <3,3,3,3>
+  2556333366U,  // <0,3,0,4>: Cost 3 vext1 <1,0,3,0>, RHS
+  4094986754U,  // <0,3,0,5>: Cost 4 vtrnl <0,2,0,2>, <3,4,5,6>
+  3798796488U,  // <0,3,0,6>: Cost 4 vext3 <6,7,3,0>, <3,0,6,7>
+  3776530634U,  // <0,3,0,7>: Cost 4 vext3 <3,0,7,0>, <3,0,7,0>
+  2556335918U,  // <0,3,0,u>: Cost 3 vext1 <1,0,3,0>, LHS
+  2886518934U,  // <0,3,1,0>: Cost 3 vzipl LHS, <3,0,1,2>
+  2556338933U,  // <0,3,1,1>: Cost 3 vext1 <1,0,3,1>, <1,0,3,1>
+  2691877105U,  // <0,3,1,2>: Cost 3 vext3 <1,2,3,0>, <3,1,2,3>
+  2886519196U,  // <0,3,1,3>: Cost 3 vzipl LHS, <3,3,3,3>
+  2886519298U,  // <0,3,1,4>: Cost 3 vzipl LHS, <3,4,5,6>
+  4095740418U,  // <0,3,1,5>: Cost 4 vtrnl <0,3,1,4>, <3,4,5,6>
+  3659944242U,  // <0,3,1,6>: Cost 4 vext1 <6,0,3,1>, <6,0,3,1>
+  3769600286U,  // <0,3,1,7>: Cost 4 vext3 <1,u,3,0>, <3,1,7,3>
+  2886519582U,  // <0,3,1,u>: Cost 3 vzipl LHS, <3,u,1,2>
+  1482604646U,  // <0,3,2,0>: Cost 2 vext1 <1,0,3,2>, LHS
+  1482605302U,  // <0,3,2,1>: Cost 2 vext1 <1,0,3,2>, <1,0,3,2>
+  2556348008U,  // <0,3,2,2>: Cost 3 vext1 <1,0,3,2>, <2,2,2,2>
+  3020736924U,  // <0,3,2,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+  1482607926U,  // <0,3,2,4>: Cost 2 vext1 <1,0,3,2>, RHS
+  3020737026U,  // <0,3,2,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+  2598154746U,  // <0,3,2,6>: Cost 3 vext1 <u,0,3,2>, <6,2,7,3>
+  2598155258U,  // <0,3,2,7>: Cost 3 vext1 <u,0,3,2>, <7,0,1,2>
+  1482610478U,  // <0,3,2,u>: Cost 2 vext1 <1,0,3,2>, LHS
+  3692341398U,  // <0,3,3,0>: Cost 4 vext2 <0,2,0,3>, <3,0,1,2>
+  2635851999U,  // <0,3,3,1>: Cost 3 vext2 <3,1,0,3>, <3,1,0,3>
+  3636069840U,  // <0,3,3,2>: Cost 4 vext1 <2,0,3,3>, <2,0,3,3>
+  2691877276U,  // <0,3,3,3>: Cost 3 vext3 <1,2,3,0>, <3,3,3,3>
+  3961522690U,  // <0,3,3,4>: Cost 4 vzipl <0,3,1,4>, <3,4,5,6>
+  3826797058U,  // <0,3,3,5>: Cost 4 vuzpl <0,2,3,5>, <3,4,5,6>
+  3703622282U,  // <0,3,3,6>: Cost 4 vext2 <2,1,0,3>, <3,6,2,7>
+  3769600452U,  // <0,3,3,7>: Cost 4 vext3 <1,u,3,0>, <3,3,7,7>
+  2640497430U,  // <0,3,3,u>: Cost 3 vext2 <3,u,0,3>, <3,u,0,3>
+  3962194070U,  // <0,3,4,0>: Cost 4 vzipl <0,4,1,5>, <3,0,1,2>
+  2232617112U,  // <0,3,4,1>: Cost 3 vrev <3,0,1,4>
+  2232690849U,  // <0,3,4,2>: Cost 3 vrev <3,0,2,4>
+  4095314332U,  // <0,3,4,3>: Cost 4 vtrnl <0,2,4,6>, <3,3,3,3>
+  3962194434U,  // <0,3,4,4>: Cost 4 vzipl <0,4,1,5>, <3,4,5,6>
+  2691877378U,  // <0,3,4,5>: Cost 3 vext3 <1,2,3,0>, <3,4,5,6>
+  3826765110U,  // <0,3,4,6>: Cost 4 vuzpl <0,2,3,1>, RHS
+  3665941518U,  // <0,3,4,7>: Cost 4 vext1 <7,0,3,4>, <7,0,3,4>
+  2691877405U,  // <0,3,4,u>: Cost 3 vext3 <1,2,3,0>, <3,4,u,6>
+  3630112870U,  // <0,3,5,0>: Cost 4 vext1 <1,0,3,5>, LHS
+  3630113526U,  // <0,3,5,1>: Cost 4 vext1 <1,0,3,5>, <1,0,3,2>
+  4035199734U,  // <0,3,5,2>: Cost 4 vzipr <1,4,0,5>, <1,0,3,2>
+  3769600578U,  // <0,3,5,3>: Cost 4 vext3 <1,u,3,0>, <3,5,3,7>
+  2232846516U,  // <0,3,5,4>: Cost 3 vrev <3,0,4,5>
+  3779037780U,  // <0,3,5,5>: Cost 4 vext3 <3,4,5,0>, <3,5,5,7>
+  2718714461U,  // <0,3,5,6>: Cost 3 vext3 <5,6,7,0>, <3,5,6,7>
+  2706106975U,  // <0,3,5,7>: Cost 3 vext3 <3,5,7,0>, <3,5,7,0>
+  2233141464U,  // <0,3,5,u>: Cost 3 vrev <3,0,u,5>
+  2691877496U,  // <0,3,6,0>: Cost 3 vext3 <1,2,3,0>, <3,6,0,7>
+  3727511914U,  // <0,3,6,1>: Cost 4 vext2 <6,1,0,3>, <6,1,0,3>
+  3765619338U,  // <0,3,6,2>: Cost 4 vext3 <1,2,3,0>, <3,6,2,7>
+  3765619347U,  // <0,3,6,3>: Cost 4 vext3 <1,2,3,0>, <3,6,3,7>
+  3765987996U,  // <0,3,6,4>: Cost 4 vext3 <1,2,u,0>, <3,6,4,7>
+  3306670270U,  // <0,3,6,5>: Cost 4 vrev <3,0,5,6>
+  3792456365U,  // <0,3,6,6>: Cost 4 vext3 <5,6,7,0>, <3,6,6,6>
+  2706770608U,  // <0,3,6,7>: Cost 3 vext3 <3,6,7,0>, <3,6,7,0>
+  2706844345U,  // <0,3,6,u>: Cost 3 vext3 <3,6,u,0>, <3,6,u,0>
+  3769600707U,  // <0,3,7,0>: Cost 4 vext3 <1,u,3,0>, <3,7,0,1>
+  2659742787U,  // <0,3,7,1>: Cost 3 vext2 <7,1,0,3>, <7,1,0,3>
+  3636102612U,  // <0,3,7,2>: Cost 4 vext1 <2,0,3,7>, <2,0,3,7>
+  3769600740U,  // <0,3,7,3>: Cost 4 vext3 <1,u,3,0>, <3,7,3,7>
+  3769600747U,  // <0,3,7,4>: Cost 4 vext3 <1,u,3,0>, <3,7,4,5>
+  3769600758U,  // <0,3,7,5>: Cost 4 vext3 <1,u,3,0>, <3,7,5,7>
+  3659993400U,  // <0,3,7,6>: Cost 4 vext1 <6,0,3,7>, <6,0,3,7>
+  3781176065U,  // <0,3,7,7>: Cost 4 vext3 <3,7,7,0>, <3,7,7,0>
+  2664388218U,  // <0,3,7,u>: Cost 3 vext2 <7,u,0,3>, <7,u,0,3>
+  1482653798U,  // <0,3,u,0>: Cost 2 vext1 <1,0,3,u>, LHS
+  1482654460U,  // <0,3,u,1>: Cost 2 vext1 <1,0,3,u>, <1,0,3,u>
+  2556397160U,  // <0,3,u,2>: Cost 3 vext1 <1,0,3,u>, <2,2,2,2>
+  3021179292U,  // <0,3,u,3>: Cost 3 vtrnl LHS, <3,3,3,3>
+  1482657078U,  // <0,3,u,4>: Cost 2 vext1 <1,0,3,u>, RHS
+  3021179394U,  // <0,3,u,5>: Cost 3 vtrnl LHS, <3,4,5,6>
+  2598203898U,  // <0,3,u,6>: Cost 3 vext1 <u,0,3,u>, <6,2,7,3>
+  2708097874U,  // <0,3,u,7>: Cost 3 vext3 <3,u,7,0>, <3,u,7,0>
+  1482659630U,  // <0,3,u,u>: Cost 2 vext1 <1,0,3,u>, LHS
+  2617278468U,  // <0,4,0,0>: Cost 3 vext2 <0,0,0,4>, <0,0,0,4>
+  2618605670U,  // <0,4,0,1>: Cost 3 vext2 <0,2,0,4>, LHS
+  2618605734U,  // <0,4,0,2>: Cost 3 vext2 <0,2,0,4>, <0,2,0,4>
+  3642091695U,  // <0,4,0,3>: Cost 4 vext1 <3,0,4,0>, <3,0,4,0>
+  2753134796U,  // <0,4,0,4>: Cost 3 vuzpl <0,2,4,6>, <0,2,4,6>
+  2718714770U,  // <0,4,0,5>: Cost 3 vext3 <5,6,7,0>, <4,0,5,1>
+  3021245750U,  // <0,4,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+  3665982483U,  // <0,4,0,7>: Cost 4 vext1 <7,0,4,0>, <7,0,4,0>
+  3021245768U,  // <0,4,0,u>: Cost 3 vtrnl <0,2,0,2>, RHS
+  2568355942U,  // <0,4,1,0>: Cost 3 vext1 <3,0,4,1>, LHS
+  3692348212U,  // <0,4,1,1>: Cost 4 vext2 <0,2,0,4>, <1,1,1,1>
+  3692348310U,  // <0,4,1,2>: Cost 4 vext2 <0,2,0,4>, <1,2,3,0>
+  2568358064U,  // <0,4,1,3>: Cost 3 vext1 <3,0,4,1>, <3,0,4,1>
+  2568359222U,  // <0,4,1,4>: Cost 3 vext1 <3,0,4,1>, RHS
+  1812778294U,  // <0,4,1,5>: Cost 2 vzipl LHS, RHS
+  3022671158U,  // <0,4,1,6>: Cost 3 vtrnl <0,4,1,5>, RHS
+  2592248852U,  // <0,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+  1812778537U,  // <0,4,1,u>: Cost 2 vzipl LHS, RHS
+  2568364134U,  // <0,4,2,0>: Cost 3 vext1 <3,0,4,2>, LHS
+  2238573423U,  // <0,4,2,1>: Cost 3 vrev <4,0,1,2>
+  3692349032U,  // <0,4,2,2>: Cost 4 vext2 <0,2,0,4>, <2,2,2,2>
+  2631214761U,  // <0,4,2,3>: Cost 3 vext2 <2,3,0,4>, <2,3,0,4>
+  2568367414U,  // <0,4,2,4>: Cost 3 vext1 <3,0,4,2>, RHS
+  2887028022U,  // <0,4,2,5>: Cost 3 vzipl <0,2,0,2>, RHS
+  1946996022U,  // <0,4,2,6>: Cost 2 vtrnl LHS, RHS
+  2592257045U,  // <0,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+  1946996040U,  // <0,4,2,u>: Cost 2 vtrnl LHS, RHS
+  3692349590U,  // <0,4,3,0>: Cost 4 vext2 <0,2,0,4>, <3,0,1,2>
+  3826878614U,  // <0,4,3,1>: Cost 4 vuzpl <0,2,4,6>, <3,0,1,2>
+  3826878625U,  // <0,4,3,2>: Cost 4 vuzpl <0,2,4,6>, <3,0,2,4>
+  3692349852U,  // <0,4,3,3>: Cost 4 vext2 <0,2,0,4>, <3,3,3,3>
+  3692349954U,  // <0,4,3,4>: Cost 4 vext2 <0,2,0,4>, <3,4,5,6>
+  3826878978U,  // <0,4,3,5>: Cost 4 vuzpl <0,2,4,6>, <3,4,5,6>
+  4095200566U,  // <0,4,3,6>: Cost 4 vtrnl <0,2,3,1>, RHS
+  3713583814U,  // <0,4,3,7>: Cost 4 vext2 <3,7,0,4>, <3,7,0,4>
+  3692350238U,  // <0,4,3,u>: Cost 4 vext2 <0,2,0,4>, <3,u,1,2>
+  2550464552U,  // <0,4,4,0>: Cost 3 vext1 <0,0,4,4>, <0,0,4,4>
+  3962194914U,  // <0,4,4,1>: Cost 4 vzipl <0,4,1,5>, <4,1,5,0>
+  3693677631U,  // <0,4,4,2>: Cost 4 vext2 <0,4,0,4>, <4,2,6,3>
+  3642124467U,  // <0,4,4,3>: Cost 4 vext1 <3,0,4,4>, <3,0,4,4>
+  2718715088U,  // <0,4,4,4>: Cost 3 vext3 <5,6,7,0>, <4,4,4,4>
+  2618608950U,  // <0,4,4,5>: Cost 3 vext2 <0,2,0,4>, RHS
+  2753137974U,  // <0,4,4,6>: Cost 3 vuzpl <0,2,4,6>, RHS
+  3666015255U,  // <0,4,4,7>: Cost 4 vext1 <7,0,4,4>, <7,0,4,4>
+  2618609193U,  // <0,4,4,u>: Cost 3 vext2 <0,2,0,4>, RHS
+  2568388710U,  // <0,4,5,0>: Cost 3 vext1 <3,0,4,5>, LHS
+  2568389526U,  // <0,4,5,1>: Cost 3 vext1 <3,0,4,5>, <1,2,3,0>
+  3636159963U,  // <0,4,5,2>: Cost 4 vext1 <2,0,4,5>, <2,0,4,5>
+  2568390836U,  // <0,4,5,3>: Cost 3 vext1 <3,0,4,5>, <3,0,4,5>
+  2568391990U,  // <0,4,5,4>: Cost 3 vext1 <3,0,4,5>, RHS
+  2718715180U,  // <0,4,5,5>: Cost 3 vext3 <5,6,7,0>, <4,5,5,6>
+  1618136374U,  // <0,4,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+  2592281624U,  // <0,4,5,7>: Cost 3 vext1 <7,0,4,5>, <7,0,4,5>
+  1618136392U,  // <0,4,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+  2550480938U,  // <0,4,6,0>: Cost 3 vext1 <0,0,4,6>, <0,0,4,6>
+  3826880801U,  // <0,4,6,1>: Cost 4 vuzpl <0,2,4,6>, <6,0,1,2>
+  2562426332U,  // <0,4,6,2>: Cost 3 vext1 <2,0,4,6>, <2,0,4,6>
+  3786190181U,  // <0,4,6,3>: Cost 4 vext3 <4,6,3,0>, <4,6,3,0>
+  2718715252U,  // <0,4,6,4>: Cost 3 vext3 <5,6,7,0>, <4,6,4,6>
+  3826881165U,  // <0,4,6,5>: Cost 4 vuzpl <0,2,4,6>, <6,4,5,6>
+  2712669568U,  // <0,4,6,6>: Cost 3 vext3 <4,6,6,0>, <4,6,6,0>
+  2657760081U,  // <0,4,6,7>: Cost 3 vext2 <6,7,0,4>, <6,7,0,4>
+  2718715284U,  // <0,4,6,u>: Cost 3 vext3 <5,6,7,0>, <4,6,u,2>
+  3654090854U,  // <0,4,7,0>: Cost 4 vext1 <5,0,4,7>, LHS
+  3934229326U,  // <0,4,7,1>: Cost 4 vuzpr <7,0,1,4>, <6,7,0,1>
+  3734156437U,  // <0,4,7,2>: Cost 4 vext2 <7,2,0,4>, <7,2,0,4>
+  3734820070U,  // <0,4,7,3>: Cost 4 vext2 <7,3,0,4>, <7,3,0,4>
+  3654094134U,  // <0,4,7,4>: Cost 4 vext1 <5,0,4,7>, RHS
+  2713259464U,  // <0,4,7,5>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+  2713333201U,  // <0,4,7,6>: Cost 3 vext3 <4,7,6,0>, <4,7,6,0>
+  3654095866U,  // <0,4,7,7>: Cost 4 vext1 <5,0,4,7>, <7,0,1,2>
+  2713259464U,  // <0,4,7,u>: Cost 3 vext3 <4,7,5,0>, <4,7,5,0>
+  2568413286U,  // <0,4,u,0>: Cost 3 vext1 <3,0,4,u>, LHS
+  2618611502U,  // <0,4,u,1>: Cost 3 vext2 <0,2,0,4>, LHS
+  2753140526U,  // <0,4,u,2>: Cost 3 vuzpl <0,2,4,6>, LHS
+  2568415415U,  // <0,4,u,3>: Cost 3 vext1 <3,0,4,u>, <3,0,4,u>
+  2568416566U,  // <0,4,u,4>: Cost 3 vext1 <3,0,4,u>, RHS
+  1817423158U,  // <0,4,u,5>: Cost 2 vzipl LHS, RHS
+  1947438390U,  // <0,4,u,6>: Cost 2 vtrnl LHS, RHS
+  2592306203U,  // <0,4,u,7>: Cost 3 vext1 <7,0,4,u>, <7,0,4,u>
+  1947438408U,  // <0,4,u,u>: Cost 2 vtrnl LHS, RHS
+  3630219264U,  // <0,5,0,0>: Cost 4 vext1 <1,0,5,0>, <0,0,0,0>
+  2625912934U,  // <0,5,0,1>: Cost 3 vext2 <1,4,0,5>, LHS
+  3692355748U,  // <0,5,0,2>: Cost 4 vext2 <0,2,0,5>, <0,2,0,2>
+  3693019384U,  // <0,5,0,3>: Cost 4 vext2 <0,3,0,5>, <0,3,0,5>
+  3630222646U,  // <0,5,0,4>: Cost 4 vext1 <1,0,5,0>, RHS
+  3699655062U,  // <0,5,0,5>: Cost 4 vext2 <1,4,0,5>, <0,5,0,1>
+  2718715508U,  // <0,5,0,6>: Cost 3 vext3 <5,6,7,0>, <5,0,6,1>
+  3087011126U,  // <0,5,0,7>: Cost 3 vtrnr <0,0,0,0>, RHS
+  2625913501U,  // <0,5,0,u>: Cost 3 vext2 <1,4,0,5>, LHS
+  1500659814U,  // <0,5,1,0>: Cost 2 vext1 <4,0,5,1>, LHS
+  2886520528U,  // <0,5,1,1>: Cost 3 vzipl LHS, <5,1,7,3>
+  2574403176U,  // <0,5,1,2>: Cost 3 vext1 <4,0,5,1>, <2,2,2,2>
+  2574403734U,  // <0,5,1,3>: Cost 3 vext1 <4,0,5,1>, <3,0,1,2>
+  1500662674U,  // <0,5,1,4>: Cost 2 vext1 <4,0,5,1>, <4,0,5,1>
+  2886520836U,  // <0,5,1,5>: Cost 3 vzipl LHS, <5,5,5,5>
+  2886520930U,  // <0,5,1,6>: Cost 3 vzipl LHS, <5,6,7,0>
+  2718715600U,  // <0,5,1,7>: Cost 3 vext3 <5,6,7,0>, <5,1,7,3>
+  1500665646U,  // <0,5,1,u>: Cost 2 vext1 <4,0,5,1>, LHS
+  2556493926U,  // <0,5,2,0>: Cost 3 vext1 <1,0,5,2>, LHS
+  2244546120U,  // <0,5,2,1>: Cost 3 vrev <5,0,1,2>
+  3692357256U,  // <0,5,2,2>: Cost 4 vext2 <0,2,0,5>, <2,2,5,7>
+  2568439994U,  // <0,5,2,3>: Cost 3 vext1 <3,0,5,2>, <3,0,5,2>
+  2556497206U,  // <0,5,2,4>: Cost 3 vext1 <1,0,5,2>, RHS
+  3020738564U,  // <0,5,2,5>: Cost 3 vtrnl LHS, <5,5,5,5>
+  4027877161U,  // <0,5,2,6>: Cost 4 vzipr <0,2,0,2>, <2,4,5,6>
+  3093220662U,  // <0,5,2,7>: Cost 3 vtrnr <1,0,3,2>, RHS
+  3093220663U,  // <0,5,2,u>: Cost 3 vtrnr <1,0,3,2>, RHS
+  3699656854U,  // <0,5,3,0>: Cost 4 vext2 <1,4,0,5>, <3,0,1,2>
+  3699656927U,  // <0,5,3,1>: Cost 4 vext2 <1,4,0,5>, <3,1,0,3>
+  3699657006U,  // <0,5,3,2>: Cost 4 vext2 <1,4,0,5>, <3,2,0,1>
+  3699657116U,  // <0,5,3,3>: Cost 4 vext2 <1,4,0,5>, <3,3,3,3>
+  2637859284U,  // <0,5,3,4>: Cost 3 vext2 <3,4,0,5>, <3,4,0,5>
+  3790319453U,  // <0,5,3,5>: Cost 4 vext3 <5,3,5,0>, <5,3,5,0>
+  3699657354U,  // <0,5,3,6>: Cost 4 vext2 <1,4,0,5>, <3,6,2,7>
+  2716725103U,  // <0,5,3,7>: Cost 3 vext3 <5,3,7,0>, <5,3,7,0>
+  2716798840U,  // <0,5,3,u>: Cost 3 vext3 <5,3,u,0>, <5,3,u,0>
+  2661747602U,  // <0,5,4,0>: Cost 3 vext2 <7,4,0,5>, <4,0,5,1>
+  3630252810U,  // <0,5,4,1>: Cost 4 vext1 <1,0,5,4>, <1,0,5,4>
+  3636225507U,  // <0,5,4,2>: Cost 4 vext1 <2,0,5,4>, <2,0,5,4>
+  3716910172U,  // <0,5,4,3>: Cost 4 vext2 <4,3,0,5>, <4,3,0,5>
+  3962195892U,  // <0,5,4,4>: Cost 4 vzipl <0,4,1,5>, <5,4,5,6>
+  2625916214U,  // <0,5,4,5>: Cost 3 vext2 <1,4,0,5>, RHS
+  3718901071U,  // <0,5,4,6>: Cost 4 vext2 <4,6,0,5>, <4,6,0,5>
+  2718715846U,  // <0,5,4,7>: Cost 3 vext3 <5,6,7,0>, <5,4,7,6>
+  2625916457U,  // <0,5,4,u>: Cost 3 vext2 <1,4,0,5>, RHS
+  3791278034U,  // <0,5,5,0>: Cost 4 vext3 <5,5,0,0>, <5,5,0,0>
+  3791351771U,  // <0,5,5,1>: Cost 4 vext3 <5,5,1,0>, <5,5,1,0>
+  3318386260U,  // <0,5,5,2>: Cost 4 vrev <5,0,2,5>
+  3791499245U,  // <0,5,5,3>: Cost 4 vext3 <5,5,3,0>, <5,5,3,0>
+  3318533734U,  // <0,5,5,4>: Cost 4 vrev <5,0,4,5>
+  2718715908U,  // <0,5,5,5>: Cost 3 vext3 <5,6,7,0>, <5,5,5,5>
+  2657767522U,  // <0,5,5,6>: Cost 3 vext2 <6,7,0,5>, <5,6,7,0>
+  2718715928U,  // <0,5,5,7>: Cost 3 vext3 <5,6,7,0>, <5,5,7,7>
+  2718715937U,  // <0,5,5,u>: Cost 3 vext3 <5,6,7,0>, <5,5,u,7>
+  2592358502U,  // <0,5,6,0>: Cost 3 vext1 <7,0,5,6>, LHS
+  3792015404U,  // <0,5,6,1>: Cost 4 vext3 <5,6,1,0>, <5,6,1,0>
+  3731509754U,  // <0,5,6,2>: Cost 4 vext2 <6,7,0,5>, <6,2,7,3>
+  3785748546U,  // <0,5,6,3>: Cost 4 vext3 <4,5,6,0>, <5,6,3,4>
+  2592361782U,  // <0,5,6,4>: Cost 3 vext1 <7,0,5,6>, RHS
+  2592362594U,  // <0,5,6,5>: Cost 3 vext1 <7,0,5,6>, <5,6,7,0>
+  3785748576U,  // <0,5,6,6>: Cost 4 vext3 <4,5,6,0>, <5,6,6,7>
+  1644974178U,  // <0,5,6,7>: Cost 2 vext3 <5,6,7,0>, <5,6,7,0>
+  1645047915U,  // <0,5,6,u>: Cost 2 vext3 <5,6,u,0>, <5,6,u,0>
+  2562506854U,  // <0,5,7,0>: Cost 3 vext1 <2,0,5,7>, LHS
+  2562507670U,  // <0,5,7,1>: Cost 3 vext1 <2,0,5,7>, <1,2,3,0>
+  2562508262U,  // <0,5,7,2>: Cost 3 vext1 <2,0,5,7>, <2,0,5,7>
+  3636250774U,  // <0,5,7,3>: Cost 4 vext1 <2,0,5,7>, <3,0,1,2>
+  2562510134U,  // <0,5,7,4>: Cost 3 vext1 <2,0,5,7>, RHS
+  2718716072U,  // <0,5,7,5>: Cost 3 vext3 <5,6,7,0>, <5,7,5,7>
+  2718716074U,  // <0,5,7,6>: Cost 3 vext3 <5,6,7,0>, <5,7,6,0>
+  2719379635U,  // <0,5,7,7>: Cost 3 vext3 <5,7,7,0>, <5,7,7,0>
+  2562512686U,  // <0,5,7,u>: Cost 3 vext1 <2,0,5,7>, LHS
+  1500717158U,  // <0,5,u,0>: Cost 2 vext1 <4,0,5,u>, LHS
+  2625918766U,  // <0,5,u,1>: Cost 3 vext2 <1,4,0,5>, LHS
+  2719674583U,  // <0,5,u,2>: Cost 3 vext3 <5,u,2,0>, <5,u,2,0>
+  2568489152U,  // <0,5,u,3>: Cost 3 vext1 <3,0,5,u>, <3,0,5,u>
+  1500720025U,  // <0,5,u,4>: Cost 2 vext1 <4,0,5,u>, <4,0,5,u>
+  2625919130U,  // <0,5,u,5>: Cost 3 vext2 <1,4,0,5>, RHS
+  2586407243U,  // <0,5,u,6>: Cost 3 vext1 <6,0,5,u>, <6,0,5,u>
+  1646301444U,  // <0,5,u,7>: Cost 2 vext3 <5,u,7,0>, <5,u,7,0>
+  1646375181U,  // <0,5,u,u>: Cost 2 vext3 <5,u,u,0>, <5,u,u,0>
+  2586411110U,  // <0,6,0,0>: Cost 3 vext1 <6,0,6,0>, LHS
+  2619949158U,  // <0,6,0,1>: Cost 3 vext2 <0,4,0,6>, LHS
+  2619949220U,  // <0,6,0,2>: Cost 3 vext2 <0,4,0,6>, <0,2,0,2>
+  3785748789U,  // <0,6,0,3>: Cost 4 vext3 <4,5,6,0>, <6,0,3,4>
+  2619949386U,  // <0,6,0,4>: Cost 3 vext2 <0,4,0,6>, <0,4,0,6>
+  2586415202U,  // <0,6,0,5>: Cost 3 vext1 <6,0,6,0>, <5,6,7,0>
+  2586415436U,  // <0,6,0,6>: Cost 3 vext1 <6,0,6,0>, <6,0,6,0>
+  2952793398U,  // <0,6,0,7>: Cost 3 vzipr <0,0,0,0>, RHS
+  2619949725U,  // <0,6,0,u>: Cost 3 vext2 <0,4,0,6>, LHS
+  2562531430U,  // <0,6,1,0>: Cost 3 vext1 <2,0,6,1>, LHS
+  3693691700U,  // <0,6,1,1>: Cost 4 vext2 <0,4,0,6>, <1,1,1,1>
+  2886521338U,  // <0,6,1,2>: Cost 3 vzipl LHS, <6,2,7,3>
+  3693691864U,  // <0,6,1,3>: Cost 4 vext2 <0,4,0,6>, <1,3,1,3>
+  2562534710U,  // <0,6,1,4>: Cost 3 vext1 <2,0,6,1>, RHS
+  2580450932U,  // <0,6,1,5>: Cost 3 vext1 <5,0,6,1>, <5,0,6,1>
+  2886521656U,  // <0,6,1,6>: Cost 3 vzipl LHS, <6,6,6,6>
+  2966736182U,  // <0,6,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+  2966736183U,  // <0,6,1,u>: Cost 3 vzipr <2,3,0,1>, RHS
+  1500741734U,  // <0,6,2,0>: Cost 2 vext1 <4,0,6,2>, LHS
+  2250518817U,  // <0,6,2,1>: Cost 3 vrev <6,0,1,2>
+  2574485096U,  // <0,6,2,2>: Cost 3 vext1 <4,0,6,2>, <2,2,2,2>
+  2631894694U,  // <0,6,2,3>: Cost 3 vext2 <2,4,0,6>, <2,3,0,1>
+  1500744604U,  // <0,6,2,4>: Cost 2 vext1 <4,0,6,2>, <4,0,6,2>
+  2574487248U,  // <0,6,2,5>: Cost 3 vext1 <4,0,6,2>, <5,1,7,3>
+  3020739384U,  // <0,6,2,6>: Cost 3 vtrnl LHS, <6,6,6,6>
+  2954136886U,  // <0,6,2,7>: Cost 3 vzipr <0,2,0,2>, RHS
+  1500747566U,  // <0,6,2,u>: Cost 2 vext1 <4,0,6,2>, LHS
+  3693693078U,  // <0,6,3,0>: Cost 4 vext2 <0,4,0,6>, <3,0,1,2>
+  3705637136U,  // <0,6,3,1>: Cost 4 vext2 <2,4,0,6>, <3,1,5,7>
+  3705637192U,  // <0,6,3,2>: Cost 4 vext2 <2,4,0,6>, <3,2,3,0>
+  3693693340U,  // <0,6,3,3>: Cost 4 vext2 <0,4,0,6>, <3,3,3,3>
+  2637867477U,  // <0,6,3,4>: Cost 3 vext2 <3,4,0,6>, <3,4,0,6>
+  3705637424U,  // <0,6,3,5>: Cost 4 vext2 <2,4,0,6>, <3,5,1,7>
+  3666154056U,  // <0,6,3,6>: Cost 4 vext1 <7,0,6,3>, <6,3,7,0>
+  2722697800U,  // <0,6,3,7>: Cost 3 vext3 <6,3,7,0>, <6,3,7,0>
+  2722771537U,  // <0,6,3,u>: Cost 3 vext3 <6,3,u,0>, <6,3,u,0>
+  2562556006U,  // <0,6,4,0>: Cost 3 vext1 <2,0,6,4>, LHS
+  4095316257U,  // <0,6,4,1>: Cost 4 vtrnl <0,2,4,6>, <6,0,1,2>
+  2562557420U,  // <0,6,4,2>: Cost 3 vext1 <2,0,6,4>, <2,0,6,4>
+  3636299926U,  // <0,6,4,3>: Cost 4 vext1 <2,0,6,4>, <3,0,1,2>
+  2562559286U,  // <0,6,4,4>: Cost 3 vext1 <2,0,6,4>, RHS
+  2619952438U,  // <0,6,4,5>: Cost 3 vext2 <0,4,0,6>, RHS
+  2723287696U,  // <0,6,4,6>: Cost 3 vext3 <6,4,6,0>, <6,4,6,0>
+  4027895094U,  // <0,6,4,7>: Cost 4 vzipr <0,2,0,4>, RHS
+  2619952681U,  // <0,6,4,u>: Cost 3 vext2 <0,4,0,6>, RHS
+  2718716594U,  // <0,6,5,0>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+  3648250774U,  // <0,6,5,1>: Cost 4 vext1 <4,0,6,5>, <1,2,3,0>
+  3792458436U,  // <0,6,5,2>: Cost 4 vext3 <5,6,7,0>, <6,5,2,7>
+  3705638767U,  // <0,6,5,3>: Cost 5 vext2 <2,4,0,6>, <5,3,7,0>
+  3648252831U,  // <0,6,5,4>: Cost 4 vext1 <4,0,6,5>, <4,0,6,5>
+  3797619416U,  // <0,6,5,5>: Cost 4 vext3 <6,5,5,0>, <6,5,5,0>
+  3792458472U,  // <0,6,5,6>: Cost 4 vext3 <5,6,7,0>, <6,5,6,7>
+  4035202358U,  // <0,6,5,7>: Cost 4 vzipr <1,4,0,5>, RHS
+  2718716594U,  // <0,6,5,u>: Cost 3 vext3 <5,6,7,0>, <6,5,0,7>
+  3786412796U,  // <0,6,6,0>: Cost 4 vext3 <4,6,6,0>, <6,6,0,0>
+  3792458504U,  // <0,6,6,1>: Cost 4 vext3 <5,6,7,0>, <6,6,1,3>
+  3728200126U,  // <0,6,6,2>: Cost 4 vext2 <6,2,0,6>, <6,2,0,6>
+  3798135575U,  // <0,6,6,3>: Cost 4 vext3 <6,6,3,0>, <6,6,3,0>
+  3786412836U,  // <0,6,6,4>: Cost 4 vext3 <4,6,6,0>, <6,6,4,4>
+  3792458543U,  // <0,6,6,5>: Cost 4 vext3 <5,6,7,0>, <6,6,5,6>
+  2718716728U,  // <0,6,6,6>: Cost 3 vext3 <5,6,7,0>, <6,6,6,6>
+  2718716738U,  // <0,6,6,7>: Cost 3 vext3 <5,6,7,0>, <6,6,7,7>
+  2718716747U,  // <0,6,6,u>: Cost 3 vext3 <5,6,7,0>, <6,6,u,7>
+  2718716750U,  // <0,6,7,0>: Cost 3 vext3 <5,6,7,0>, <6,7,0,1>
+  2724909910U,  // <0,6,7,1>: Cost 3 vext3 <6,7,1,0>, <6,7,1,0>
+  3636323823U,  // <0,6,7,2>: Cost 4 vext1 <2,0,6,7>, <2,0,6,7>
+  2725057384U,  // <0,6,7,3>: Cost 3 vext3 <6,7,3,0>, <6,7,3,0>
+  2718716790U,  // <0,6,7,4>: Cost 3 vext3 <5,6,7,0>, <6,7,4,5>
+  2718716800U,  // <0,6,7,5>: Cost 3 vext3 <5,6,7,0>, <6,7,5,6>
+  3792458629U,  // <0,6,7,6>: Cost 4 vext3 <5,6,7,0>, <6,7,6,2>
+  2725352332U,  // <0,6,7,7>: Cost 3 vext3 <6,7,7,0>, <6,7,7,0>
+  2718716822U,  // <0,6,7,u>: Cost 3 vext3 <5,6,7,0>, <6,7,u,1>
+  1500790886U,  // <0,6,u,0>: Cost 2 vext1 <4,0,6,u>, LHS
+  2619954990U,  // <0,6,u,1>: Cost 3 vext2 <0,4,0,6>, LHS
+  2562590192U,  // <0,6,u,2>: Cost 3 vext1 <2,0,6,u>, <2,0,6,u>
+  2725721017U,  // <0,6,u,3>: Cost 3 vext3 <6,u,3,0>, <6,u,3,0>
+  1500793762U,  // <0,6,u,4>: Cost 2 vext1 <4,0,6,u>, <4,0,6,u>
+  2619955354U,  // <0,6,u,5>: Cost 3 vext2 <0,4,0,6>, RHS
+  2725942228U,  // <0,6,u,6>: Cost 3 vext3 <6,u,6,0>, <6,u,6,0>
+  2954186038U,  // <0,6,u,7>: Cost 3 vzipr <0,2,0,u>, RHS
+  1500796718U,  // <0,6,u,u>: Cost 2 vext1 <4,0,6,u>, LHS
+  2256401391U,  // <0,7,0,0>: Cost 3 vrev <7,0,0,0>
+  2632564838U,  // <0,7,0,1>: Cost 3 vext2 <2,5,0,7>, LHS
+  2256548865U,  // <0,7,0,2>: Cost 3 vrev <7,0,2,0>
+  3700998396U,  // <0,7,0,3>: Cost 4 vext2 <1,6,0,7>, <0,3,1,0>
+  2718716952U,  // <0,7,0,4>: Cost 3 vext3 <5,6,7,0>, <7,0,4,5>
+  2718716962U,  // <0,7,0,5>: Cost 3 vext3 <5,6,7,0>, <7,0,5,6>
+  2621284845U,  // <0,7,0,6>: Cost 3 vext2 <0,6,0,7>, <0,6,0,7>
+  3904685542U,  // <0,7,0,7>: Cost 4 vuzpr <2,0,5,7>, <2,0,5,7>
+  2632565405U,  // <0,7,0,u>: Cost 3 vext2 <2,5,0,7>, LHS
+  2256409584U,  // <0,7,1,0>: Cost 3 vrev <7,0,0,1>
+  3706307380U,  // <0,7,1,1>: Cost 4 vext2 <2,5,0,7>, <1,1,1,1>
+  2632565654U,  // <0,7,1,2>: Cost 3 vext2 <2,5,0,7>, <1,2,3,0>
+  3769603168U,  // <0,7,1,3>: Cost 4 vext3 <1,u,3,0>, <7,1,3,5>
+  2256704532U,  // <0,7,1,4>: Cost 3 vrev <7,0,4,1>
+  3769603184U,  // <0,7,1,5>: Cost 4 vext3 <1,u,3,0>, <7,1,5,3>
+  3700999366U,  // <0,7,1,6>: Cost 4 vext2 <1,6,0,7>, <1,6,0,7>
+  2886522476U,  // <0,7,1,7>: Cost 3 vzipl LHS, <7,7,7,7>
+  2256999480U,  // <0,7,1,u>: Cost 3 vrev <7,0,u,1>
+  2586501222U,  // <0,7,2,0>: Cost 3 vext1 <6,0,7,2>, LHS
+  1182749690U,  // <0,7,2,1>: Cost 2 vrev <7,0,1,2>
+  3636356595U,  // <0,7,2,2>: Cost 4 vext1 <2,0,7,2>, <2,0,7,2>
+  2727711916U,  // <0,7,2,3>: Cost 3 vext3 <7,2,3,0>, <7,2,3,0>
+  2586504502U,  // <0,7,2,4>: Cost 3 vext1 <6,0,7,2>, RHS
+  2632566606U,  // <0,7,2,5>: Cost 3 vext2 <2,5,0,7>, <2,5,0,7>
+  2586505559U,  // <0,7,2,6>: Cost 3 vext1 <6,0,7,2>, <6,0,7,2>
+  3020740204U,  // <0,7,2,7>: Cost 3 vtrnl LHS, <7,7,7,7>
+  1183265849U,  // <0,7,2,u>: Cost 2 vrev <7,0,u,2>
+  3701000342U,  // <0,7,3,0>: Cost 4 vext2 <1,6,0,7>, <3,0,1,2>
+  3706308849U,  // <0,7,3,1>: Cost 4 vext2 <2,5,0,7>, <3,1,2,3>
+  3330315268U,  // <0,7,3,2>: Cost 4 vrev <7,0,2,3>
+  3706309020U,  // <0,7,3,3>: Cost 4 vext2 <2,5,0,7>, <3,3,3,3>
+  3706309122U,  // <0,7,3,4>: Cost 4 vext2 <2,5,0,7>, <3,4,5,6>
+  3712281127U,  // <0,7,3,5>: Cost 4 vext2 <3,5,0,7>, <3,5,0,7>
+  2639202936U,  // <0,7,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+  3802412321U,  // <0,7,3,7>: Cost 4 vext3 <7,3,7,0>, <7,3,7,0>
+  2640530202U,  // <0,7,3,u>: Cost 3 vext2 <3,u,0,7>, <3,u,0,7>
+  3654287462U,  // <0,7,4,0>: Cost 4 vext1 <5,0,7,4>, LHS
+  2256507900U,  // <0,7,4,1>: Cost 3 vrev <7,0,1,4>
+  2256581637U,  // <0,7,4,2>: Cost 3 vrev <7,0,2,4>
+  3660262008U,  // <0,7,4,3>: Cost 4 vext1 <6,0,7,4>, <3,6,0,7>
+  3786413405U,  // <0,7,4,4>: Cost 4 vext3 <4,6,6,0>, <7,4,4,6>
+  2632568118U,  // <0,7,4,5>: Cost 3 vext2 <2,5,0,7>, RHS
+  3718917457U,  // <0,7,4,6>: Cost 4 vext2 <4,6,0,7>, <4,6,0,7>
+  3787003255U,  // <0,7,4,7>: Cost 4 vext3 <4,7,5,0>, <7,4,7,5>
+  2632568361U,  // <0,7,4,u>: Cost 3 vext2 <2,5,0,7>, RHS
+  3706310268U,  // <0,7,5,0>: Cost 4 vext2 <2,5,0,7>, <5,0,7,0>
+  3792459156U,  // <0,7,5,1>: Cost 4 vext3 <5,6,7,0>, <7,5,1,7>
+  3330331654U,  // <0,7,5,2>: Cost 4 vrev <7,0,2,5>
+  3722899255U,  // <0,7,5,3>: Cost 4 vext2 <5,3,0,7>, <5,3,0,7>
+  2256737304U,  // <0,7,5,4>: Cost 3 vrev <7,0,4,5>
+  3724226521U,  // <0,7,5,5>: Cost 4 vext2 <5,5,0,7>, <5,5,0,7>
+  2718717377U,  // <0,7,5,6>: Cost 3 vext3 <5,6,7,0>, <7,5,6,7>
+  2729997763U,  // <0,7,5,7>: Cost 3 vext3 <7,5,7,0>, <7,5,7,0>
+  2720044499U,  // <0,7,5,u>: Cost 3 vext3 <5,u,7,0>, <7,5,u,7>
+  3712946517U,  // <0,7,6,0>: Cost 4 vext2 <3,6,0,7>, <6,0,7,0>
+  2256524286U,  // <0,7,6,1>: Cost 3 vrev <7,0,1,6>
+  3792459246U,  // <0,7,6,2>: Cost 4 vext3 <5,6,7,0>, <7,6,2,7>
+  3796440567U,  // <0,7,6,3>: Cost 4 vext3 <6,3,7,0>, <7,6,3,7>
+  3654307126U,  // <0,7,6,4>: Cost 4 vext1 <5,0,7,6>, RHS
+  2656457394U,  // <0,7,6,5>: Cost 3 vext2 <6,5,0,7>, <6,5,0,7>
+  3792459281U,  // <0,7,6,6>: Cost 4 vext3 <5,6,7,0>, <7,6,6,6>
+  2730661396U,  // <0,7,6,7>: Cost 3 vext3 <7,6,7,0>, <7,6,7,0>
+  2658448293U,  // <0,7,6,u>: Cost 3 vext2 <6,u,0,7>, <6,u,0,7>
+  3787003431U,  // <0,7,7,0>: Cost 4 vext3 <4,7,5,0>, <7,7,0,1>
+  3654312854U,  // <0,7,7,1>: Cost 4 vext1 <5,0,7,7>, <1,2,3,0>
+  3654313446U,  // <0,7,7,2>: Cost 4 vext1 <5,0,7,7>, <2,0,5,7>
+  3804771905U,  // <0,7,7,3>: Cost 4 vext3 <7,7,3,0>, <7,7,3,0>
+  3654315318U,  // <0,7,7,4>: Cost 4 vext1 <5,0,7,7>, RHS
+  3654315651U,  // <0,7,7,5>: Cost 4 vext1 <5,0,7,7>, <5,0,7,7>
+  3660288348U,  // <0,7,7,6>: Cost 4 vext1 <6,0,7,7>, <6,0,7,7>
+  2718717548U,  // <0,7,7,7>: Cost 3 vext3 <5,6,7,0>, <7,7,7,7>
+  2664420990U,  // <0,7,7,u>: Cost 3 vext2 <7,u,0,7>, <7,u,0,7>
+  2256466935U,  // <0,7,u,0>: Cost 3 vrev <7,0,0,u>
+  1182798848U,  // <0,7,u,1>: Cost 2 vrev <7,0,1,u>
+  2256614409U,  // <0,7,u,2>: Cost 3 vrev <7,0,2,u>
+  2731693714U,  // <0,7,u,3>: Cost 3 vext3 <7,u,3,0>, <7,u,3,0>
+  2256761883U,  // <0,7,u,4>: Cost 3 vrev <7,0,4,u>
+  2632571034U,  // <0,7,u,5>: Cost 3 vext2 <2,5,0,7>, RHS
+  2669066421U,  // <0,7,u,6>: Cost 3 vext2 <u,6,0,7>, <u,6,0,7>
+  2731988662U,  // <0,7,u,7>: Cost 3 vext3 <7,u,7,0>, <7,u,7,0>
+  1183315007U,  // <0,7,u,u>: Cost 2 vrev <7,0,u,u>
+   135053414U,  // <0,u,0,0>: Cost 1 vdup0 LHS
+  1544896614U,  // <0,u,0,1>: Cost 2 vext2 <0,2,0,u>, LHS
+  1678999654U,  // <0,u,0,2>: Cost 2 vuzpl LHS, LHS
+  2691880677U,  // <0,u,0,3>: Cost 3 vext3 <1,2,3,0>, <u,0,3,2>
+  1476988214U,  // <0,u,0,4>: Cost 2 vext1 <0,0,u,0>, RHS
+  2718791419U,  // <0,u,0,5>: Cost 3 vext3 <5,6,u,0>, <u,0,5,6>
+  3021248666U,  // <0,u,0,6>: Cost 3 vtrnl <0,2,0,2>, RHS
+  2592535607U,  // <0,u,0,7>: Cost 3 vext1 <7,0,u,0>, <7,0,u,0>
+   135053414U,  // <0,u,0,u>: Cost 1 vdup0 LHS
+  1476993097U,  // <0,u,1,0>: Cost 2 vext1 <0,0,u,1>, <0,0,u,1>
+  1812780846U,  // <0,u,1,1>: Cost 2 vzipl LHS, LHS
+  1618138926U,  // <0,u,1,2>: Cost 2 vext3 <1,2,3,0>, LHS
+  2752742134U,  // <0,u,1,3>: Cost 3 vuzpl LHS, <1,0,3,2>
+  1476996406U,  // <0,u,1,4>: Cost 2 vext1 <0,0,u,1>, RHS
+  1812781210U,  // <0,u,1,5>: Cost 2 vzipl LHS, RHS
+  2887006416U,  // <0,u,1,6>: Cost 3 vzipl LHS, <u,6,3,7>
+  2966736200U,  // <0,u,1,7>: Cost 3 vzipr <2,3,0,1>, RHS
+  1812781413U,  // <0,u,1,u>: Cost 2 vzipl LHS, LHS
+  1482973286U,  // <0,u,2,0>: Cost 2 vext1 <1,0,u,2>, LHS
+  1482973987U,  // <0,u,2,1>: Cost 2 vext1 <1,0,u,2>, <1,0,u,2>
+  1946998574U,  // <0,u,2,2>: Cost 2 vtrnl LHS, LHS
+      835584U,  // <0,u,2,3>: Cost 0 copy LHS
+  1482976566U,  // <0,u,2,4>: Cost 2 vext1 <1,0,u,2>, RHS
+  3020781631U,  // <0,u,2,5>: Cost 3 vtrnl LHS, <u,4,5,6>
+  1946998938U,  // <0,u,2,6>: Cost 2 vtrnl LHS, RHS
+  1518810169U,  // <0,u,2,7>: Cost 2 vext1 <7,0,u,2>, <7,0,u,2>
+      835584U,  // <0,u,2,u>: Cost 0 copy LHS
+  2618640534U,  // <0,u,3,0>: Cost 3 vext2 <0,2,0,u>, <3,0,1,2>
+  2752743574U,  // <0,u,3,1>: Cost 3 vuzpl LHS, <3,0,1,2>
+  2636556597U,  // <0,u,3,2>: Cost 3 vext2 <3,2,0,u>, <3,2,0,u>
+  2752743836U,  // <0,u,3,3>: Cost 3 vuzpl LHS, <3,3,3,3>
+  2618640898U,  // <0,u,3,4>: Cost 3 vext2 <0,2,0,u>, <3,4,5,6>
+  2752743938U,  // <0,u,3,5>: Cost 3 vuzpl LHS, <3,4,5,6>
+  2639202936U,  // <0,u,3,6>: Cost 3 vext2 <3,6,0,7>, <3,6,0,7>
+  2639874762U,  // <0,u,3,7>: Cost 3 vext2 <3,7,0,u>, <3,7,0,u>
+  2752743637U,  // <0,u,3,u>: Cost 3 vuzpl LHS, <3,0,u,2>
+  2562703462U,  // <0,u,4,0>: Cost 3 vext1 <2,0,u,4>, LHS
+  2888455982U,  // <0,u,4,1>: Cost 3 vzipl <0,4,1,5>, LHS
+  3021575982U,  // <0,u,4,2>: Cost 3 vtrnl <0,2,4,6>, LHS
+  2568677591U,  // <0,u,4,3>: Cost 3 vext1 <3,0,u,4>, <3,0,u,4>
+  2562706742U,  // <0,u,4,4>: Cost 3 vext1 <2,0,u,4>, RHS
+  1544899894U,  // <0,u,4,5>: Cost 2 vext2 <0,2,0,u>, RHS
+  1679002934U,  // <0,u,4,6>: Cost 2 vuzpl LHS, RHS
+  2718718033U,  // <0,u,4,7>: Cost 3 vext3 <5,6,7,0>, <u,4,7,6>
+  1679002952U,  // <0,u,4,u>: Cost 2 vuzpl LHS, RHS
+  2568683622U,  // <0,u,5,0>: Cost 3 vext1 <3,0,u,5>, LHS
+  2568684438U,  // <0,u,5,1>: Cost 3 vext1 <3,0,u,5>, <1,2,3,0>
+  3765622902U,  // <0,u,5,2>: Cost 4 vext3 <1,2,3,0>, <u,5,2,7>
+  2691881087U,  // <0,u,5,3>: Cost 3 vext3 <1,2,3,0>, <u,5,3,7>
+  2568686902U,  // <0,u,5,4>: Cost 3 vext1 <3,0,u,5>, RHS
+  2650492890U,  // <0,u,5,5>: Cost 3 vext2 <5,5,0,u>, <5,5,0,u>
+  1618139290U,  // <0,u,5,6>: Cost 2 vext3 <1,2,3,0>, RHS
+  2824834358U,  // <0,u,5,7>: Cost 3 vuzpr <1,0,3,u>, RHS
+  1618139308U,  // <0,u,5,u>: Cost 2 vext3 <1,2,3,0>, RHS
+  2592579686U,  // <0,u,6,0>: Cost 3 vext1 <7,0,u,6>, LHS
+  2262496983U,  // <0,u,6,1>: Cost 3 vrev <u,0,1,6>
+  2654474688U,  // <0,u,6,2>: Cost 3 vext2 <6,2,0,u>, <6,2,0,u>
+  2691881168U,  // <0,u,6,3>: Cost 3 vext3 <1,2,3,0>, <u,6,3,7>
+  2592582966U,  // <0,u,6,4>: Cost 3 vext1 <7,0,u,6>, RHS
+  2656465587U,  // <0,u,6,5>: Cost 3 vext2 <6,5,0,u>, <6,5,0,u>
+  2657129220U,  // <0,u,6,6>: Cost 3 vext2 <6,6,0,u>, <6,6,0,u>
+  1584051029U,  // <0,u,6,7>: Cost 2 vext2 <6,7,0,u>, <6,7,0,u>
+  1584714662U,  // <0,u,6,u>: Cost 2 vext2 <6,u,0,u>, <6,u,0,u>
+  2562728038U,  // <0,u,7,0>: Cost 3 vext1 <2,0,u,7>, LHS
+  2562728854U,  // <0,u,7,1>: Cost 3 vext1 <2,0,u,7>, <1,2,3,0>
+  2562729473U,  // <0,u,7,2>: Cost 3 vext1 <2,0,u,7>, <2,0,u,7>
+  2661111018U,  // <0,u,7,3>: Cost 3 vext2 <7,3,0,u>, <7,3,0,u>
+  2562731318U,  // <0,u,7,4>: Cost 3 vext1 <2,0,u,7>, RHS
+  2718718258U,  // <0,u,7,5>: Cost 3 vext3 <5,6,7,0>, <u,7,5,6>
+  2586620261U,  // <0,u,7,6>: Cost 3 vext1 <6,0,u,7>, <6,0,u,7>
+  2657793644U,  // <0,u,7,7>: Cost 3 vext2 <6,7,0,u>, <7,7,7,7>
+  2562733870U,  // <0,u,7,u>: Cost 3 vext1 <2,0,u,7>, LHS
+   135053414U,  // <0,u,u,0>: Cost 1 vdup0 LHS
+  1544902446U,  // <0,u,u,1>: Cost 2 vext2 <0,2,0,u>, LHS
+  1679005486U,  // <0,u,u,2>: Cost 2 vuzpl LHS, LHS
+      835584U,  // <0,u,u,3>: Cost 0 copy LHS
+  1483025718U,  // <0,u,u,4>: Cost 2 vext1 <1,0,u,u>, RHS
+  1544902810U,  // <0,u,u,5>: Cost 2 vext2 <0,2,0,u>, RHS
+  1679005850U,  // <0,u,u,6>: Cost 2 vuzpl LHS, RHS
+  1518859327U,  // <0,u,u,7>: Cost 2 vext1 <7,0,u,u>, <7,0,u,u>
+      835584U,  // <0,u,u,u>: Cost 0 copy LHS
+  2689744896U,  // <1,0,0,0>: Cost 3 vext3 <0,u,1,1>, <0,0,0,0>
+  1610694666U,  // <1,0,0,1>: Cost 2 vext3 <0,0,1,1>, <0,0,1,1>
+  2689744916U,  // <1,0,0,2>: Cost 3 vext3 <0,u,1,1>, <0,0,2,2>
+  2619310332U,  // <1,0,0,3>: Cost 3 vext2 <0,3,1,0>, <0,3,1,0>
+  2684657701U,  // <1,0,0,4>: Cost 3 vext3 <0,0,4,1>, <0,0,4,1>
+  2620637598U,  // <1,0,0,5>: Cost 3 vext2 <0,5,1,0>, <0,5,1,0>
+  3708977654U,  // <1,0,0,6>: Cost 4 vext2 <3,0,1,0>, <0,6,1,7>
+  3666351168U,  // <1,0,0,7>: Cost 4 vext1 <7,1,0,0>, <7,1,0,0>
+  1611210825U,  // <1,0,0,u>: Cost 2 vext3 <0,0,u,1>, <0,0,u,1>
+  2556780646U,  // <1,0,1,0>: Cost 3 vext1 <1,1,0,1>, LHS
+  2556781355U,  // <1,0,1,1>: Cost 3 vext1 <1,1,0,1>, <1,1,0,1>
+  1616003174U,  // <1,0,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+  3693052888U,  // <1,0,1,3>: Cost 4 vext2 <0,3,1,0>, <1,3,1,3>
+  2556783926U,  // <1,0,1,4>: Cost 3 vext1 <1,1,0,1>, RHS
+  2580672143U,  // <1,0,1,5>: Cost 3 vext1 <5,1,0,1>, <5,1,0,1>
+  2724839566U,  // <1,0,1,6>: Cost 3 vext3 <6,7,0,1>, <0,1,6,7>
+  3654415354U,  // <1,0,1,7>: Cost 4 vext1 <5,1,0,1>, <7,0,1,2>
+  1616003228U,  // <1,0,1,u>: Cost 2 vext3 <0,u,1,1>, LHS
+  2685690019U,  // <1,0,2,0>: Cost 3 vext3 <0,2,0,1>, <0,2,0,1>
+  2685763756U,  // <1,0,2,1>: Cost 3 vext3 <0,2,1,1>, <0,2,1,1>
+  2698297524U,  // <1,0,2,2>: Cost 3 vext3 <2,3,0,1>, <0,2,2,0>
+  2685911230U,  // <1,0,2,3>: Cost 3 vext3 <0,2,3,1>, <0,2,3,1>
+  2689745100U,  // <1,0,2,4>: Cost 3 vext3 <0,u,1,1>, <0,2,4,6>
+  3764814038U,  // <1,0,2,5>: Cost 4 vext3 <1,1,1,1>, <0,2,5,7>
+  2724839640U,  // <1,0,2,6>: Cost 3 vext3 <6,7,0,1>, <0,2,6,0>
+  2592625658U,  // <1,0,2,7>: Cost 3 vext1 <7,1,0,2>, <7,0,1,2>
+  2686279915U,  // <1,0,2,u>: Cost 3 vext3 <0,2,u,1>, <0,2,u,1>
+  3087843328U,  // <1,0,3,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+  3087843338U,  // <1,0,3,1>: Cost 3 vtrnr LHS, <0,0,1,1>
+    67944550U,  // <1,0,3,2>: Cost 1 vrev LHS
+  2568743135U,  // <1,0,3,3>: Cost 3 vext1 <3,1,0,3>, <3,1,0,3>
+  2562772278U,  // <1,0,3,4>: Cost 3 vext1 <2,1,0,3>, RHS
+  4099850454U,  // <1,0,3,5>: Cost 4 vtrnl <1,0,3,2>, <0,2,5,7>
+  3704998538U,  // <1,0,3,6>: Cost 4 vext2 <2,3,1,0>, <3,6,2,7>
+  2592633923U,  // <1,0,3,7>: Cost 3 vext1 <7,1,0,3>, <7,1,0,3>
+    68386972U,  // <1,0,3,u>: Cost 1 vrev LHS
+  2620640146U,  // <1,0,4,0>: Cost 3 vext2 <0,5,1,0>, <4,0,5,1>
+  2689745234U,  // <1,0,4,1>: Cost 3 vext3 <0,u,1,1>, <0,4,1,5>
+  2689745244U,  // <1,0,4,2>: Cost 3 vext3 <0,u,1,1>, <0,4,2,6>
+  3760980320U,  // <1,0,4,3>: Cost 4 vext3 <0,4,3,1>, <0,4,3,1>
+  3761054057U,  // <1,0,4,4>: Cost 4 vext3 <0,4,4,1>, <0,4,4,1>
+  2619313462U,  // <1,0,4,5>: Cost 3 vext2 <0,3,1,0>, RHS
+  3761201531U,  // <1,0,4,6>: Cost 4 vext3 <0,4,6,1>, <0,4,6,1>
+  3666383940U,  // <1,0,4,7>: Cost 4 vext1 <7,1,0,4>, <7,1,0,4>
+  2619313705U,  // <1,0,4,u>: Cost 3 vext2 <0,3,1,0>, RHS
+  4029300736U,  // <1,0,5,0>: Cost 4 vzipr <0,4,1,5>, <0,0,0,0>
+  2895249510U,  // <1,0,5,1>: Cost 3 vzipl <1,5,3,7>, LHS
+  3028287590U,  // <1,0,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+  3642501345U,  // <1,0,5,3>: Cost 4 vext1 <3,1,0,5>, <3,1,0,5>
+  2215592058U,  // <1,0,5,4>: Cost 3 vrev <0,1,4,5>
+  3724242907U,  // <1,0,5,5>: Cost 4 vext2 <5,5,1,0>, <5,5,1,0>
+  3724906540U,  // <1,0,5,6>: Cost 4 vext2 <5,6,1,0>, <5,6,1,0>
+  3911118134U,  // <1,0,5,7>: Cost 4 vuzpr <3,1,3,0>, RHS
+  3028287644U,  // <1,0,5,u>: Cost 3 vtrnl <1,3,5,7>, LHS
+  3762086375U,  // <1,0,6,0>: Cost 4 vext3 <0,6,0,1>, <0,6,0,1>
+  2698297846U,  // <1,0,6,1>: Cost 3 vext3 <2,3,0,1>, <0,6,1,7>
+  3760022015U,  // <1,0,6,2>: Cost 4 vext3 <0,2,u,1>, <0,6,2,7>
+  3642509538U,  // <1,0,6,3>: Cost 4 vext1 <3,1,0,6>, <3,1,0,6>
+  3762381323U,  // <1,0,6,4>: Cost 4 vext3 <0,6,4,1>, <0,6,4,1>
+  3730215604U,  // <1,0,6,5>: Cost 4 vext2 <6,5,1,0>, <6,5,1,0>
+  3730879237U,  // <1,0,6,6>: Cost 4 vext2 <6,6,1,0>, <6,6,1,0>
+  2657801046U,  // <1,0,6,7>: Cost 3 vext2 <6,7,1,0>, <6,7,1,0>
+  2658464679U,  // <1,0,6,u>: Cost 3 vext2 <6,u,1,0>, <6,u,1,0>
+  2659128312U,  // <1,0,7,0>: Cost 3 vext2 <7,0,1,0>, <7,0,1,0>
+  4047898278U,  // <1,0,7,1>: Cost 4 vzipr <3,5,1,7>, <2,3,0,1>
+  2215460970U,  // <1,0,7,2>: Cost 3 vrev <0,1,2,7>
+  3734861035U,  // <1,0,7,3>: Cost 4 vext2 <7,3,1,0>, <7,3,1,0>
+  3731543398U,  // <1,0,7,4>: Cost 4 vext2 <6,7,1,0>, <7,4,5,6>
+  3736188301U,  // <1,0,7,5>: Cost 4 vext2 <7,5,1,0>, <7,5,1,0>
+  2663110110U,  // <1,0,7,6>: Cost 3 vext2 <7,6,1,0>, <7,6,1,0>
+  3731543660U,  // <1,0,7,7>: Cost 4 vext2 <6,7,1,0>, <7,7,7,7>
+  2664437376U,  // <1,0,7,u>: Cost 3 vext2 <7,u,1,0>, <7,u,1,0>
+  3087884288U,  // <1,0,u,0>: Cost 3 vtrnr LHS, <0,0,0,0>
+  1616003730U,  // <1,0,u,1>: Cost 2 vext3 <0,u,1,1>, <0,u,1,1>
+    67985515U,  // <1,0,u,2>: Cost 1 vrev LHS
+  2689893028U,  // <1,0,u,3>: Cost 3 vext3 <0,u,3,1>, <0,u,3,1>
+  2689745586U,  // <1,0,u,4>: Cost 3 vext3 <0,u,1,1>, <0,u,4,6>
+  2619316378U,  // <1,0,u,5>: Cost 3 vext2 <0,3,1,0>, RHS
+  2669082807U,  // <1,0,u,6>: Cost 3 vext2 <u,6,1,0>, <u,6,1,0>
+  2592674888U,  // <1,0,u,7>: Cost 3 vext1 <7,1,0,u>, <7,1,0,u>
+    68427937U,  // <1,0,u,u>: Cost 1 vrev LHS
+  1543585802U,  // <1,1,0,0>: Cost 2 vext2 <0,0,1,1>, <0,0,1,1>
+  1548894310U,  // <1,1,0,1>: Cost 2 vext2 <0,u,1,1>, LHS
+  2618654892U,  // <1,1,0,2>: Cost 3 vext2 <0,2,1,1>, <0,2,1,1>
+  2689745654U,  // <1,1,0,3>: Cost 3 vext3 <0,u,1,1>, <1,0,3,2>
+  2622636370U,  // <1,1,0,4>: Cost 3 vext2 <0,u,1,1>, <0,4,1,5>
+  2620645791U,  // <1,1,0,5>: Cost 3 vext2 <0,5,1,1>, <0,5,1,1>
+  3696378367U,  // <1,1,0,6>: Cost 4 vext2 <0,u,1,1>, <0,6,2,7>
+  3666424905U,  // <1,1,0,7>: Cost 4 vext1 <7,1,1,0>, <7,1,1,0>
+  1548894866U,  // <1,1,0,u>: Cost 2 vext2 <0,u,1,1>, <0,u,1,1>
+  1483112550U,  // <1,1,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+   202162278U,  // <1,1,1,1>: Cost 1 vdup1 LHS
+  2622636950U,  // <1,1,1,2>: Cost 3 vext2 <0,u,1,1>, <1,2,3,0>
+  2622637016U,  // <1,1,1,3>: Cost 3 vext2 <0,u,1,1>, <1,3,1,3>
+  1483115830U,  // <1,1,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+  2622637200U,  // <1,1,1,5>: Cost 3 vext2 <0,u,1,1>, <1,5,3,7>
+  2622637263U,  // <1,1,1,6>: Cost 3 vext2 <0,u,1,1>, <1,6,1,7>
+  2592691274U,  // <1,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+   202162278U,  // <1,1,1,u>: Cost 1 vdup1 LHS
+  2550890588U,  // <1,1,2,0>: Cost 3 vext1 <0,1,1,2>, <0,1,1,2>
+  2617329183U,  // <1,1,2,1>: Cost 3 vext2 <0,0,1,1>, <2,1,3,1>
+  2622637672U,  // <1,1,2,2>: Cost 3 vext2 <0,u,1,1>, <2,2,2,2>
+  2622637734U,  // <1,1,2,3>: Cost 3 vext2 <0,u,1,1>, <2,3,0,1>
+  2550893878U,  // <1,1,2,4>: Cost 3 vext1 <0,1,1,2>, RHS
+  3696379744U,  // <1,1,2,5>: Cost 4 vext2 <0,u,1,1>, <2,5,2,7>
+  2622638010U,  // <1,1,2,6>: Cost 3 vext2 <0,u,1,1>, <2,6,3,7>
+  3804554170U,  // <1,1,2,7>: Cost 4 vext3 <7,7,0,1>, <1,2,7,0>
+  2622638139U,  // <1,1,2,u>: Cost 3 vext2 <0,u,1,1>, <2,u,0,1>
+  2622638230U,  // <1,1,3,0>: Cost 3 vext2 <0,u,1,1>, <3,0,1,2>
+  3087844148U,  // <1,1,3,1>: Cost 3 vtrnr LHS, <1,1,1,1>
+  4161585244U,  // <1,1,3,2>: Cost 4 vtrnr LHS, <0,1,1,2>
+  2014101606U,  // <1,1,3,3>: Cost 2 vtrnr LHS, LHS
+  2622638594U,  // <1,1,3,4>: Cost 3 vext2 <0,u,1,1>, <3,4,5,6>
+  2689745920U,  // <1,1,3,5>: Cost 3 vext3 <0,u,1,1>, <1,3,5,7>
+  3763487753U,  // <1,1,3,6>: Cost 4 vext3 <0,u,1,1>, <1,3,6,7>
+  2592707660U,  // <1,1,3,7>: Cost 3 vext1 <7,1,1,3>, <7,1,1,3>
+  2014101611U,  // <1,1,3,u>: Cost 2 vtrnr LHS, LHS
+  2556878950U,  // <1,1,4,0>: Cost 3 vext1 <1,1,1,4>, LHS
+  2221335351U,  // <1,1,4,1>: Cost 3 vrev <1,1,1,4>
+  3696380988U,  // <1,1,4,2>: Cost 4 vext2 <0,u,1,1>, <4,2,6,0>
+  3763487805U,  // <1,1,4,3>: Cost 4 vext3 <0,u,1,1>, <1,4,3,5>
+  2556882230U,  // <1,1,4,4>: Cost 3 vext1 <1,1,1,4>, RHS
+  1548897590U,  // <1,1,4,5>: Cost 2 vext2 <0,u,1,1>, RHS
+  2758184246U,  // <1,1,4,6>: Cost 3 vuzpl <1,1,1,1>, RHS
+  3666457677U,  // <1,1,4,7>: Cost 4 vext1 <7,1,1,4>, <7,1,1,4>
+  1548897833U,  // <1,1,4,u>: Cost 2 vext2 <0,u,1,1>, RHS
+  2693653615U,  // <1,1,5,0>: Cost 3 vext3 <1,5,0,1>, <1,5,0,1>
+  2617331408U,  // <1,1,5,1>: Cost 3 vext2 <0,0,1,1>, <5,1,7,3>
+  4029302934U,  // <1,1,5,2>: Cost 4 vzipr <0,4,1,5>, <3,0,1,2>
+  2689746064U,  // <1,1,5,3>: Cost 3 vext3 <0,u,1,1>, <1,5,3,7>
+  2221564755U,  // <1,1,5,4>: Cost 3 vrev <1,1,4,5>
+  2955559250U,  // <1,1,5,5>: Cost 3 vzipr <0,4,1,5>, <0,4,1,5>
+  2617331810U,  // <1,1,5,6>: Cost 3 vext2 <0,0,1,1>, <5,6,7,0>
+  2825293110U,  // <1,1,5,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+  2689746109U,  // <1,1,5,u>: Cost 3 vext3 <0,u,1,1>, <1,5,u,7>
+  3696382241U,  // <1,1,6,0>: Cost 4 vext2 <0,u,1,1>, <6,0,1,2>
+  2689746127U,  // <1,1,6,1>: Cost 3 vext3 <0,u,1,1>, <1,6,1,7>
+  2617332218U,  // <1,1,6,2>: Cost 3 vext2 <0,0,1,1>, <6,2,7,3>
+  3763487969U,  // <1,1,6,3>: Cost 4 vext3 <0,u,1,1>, <1,6,3,7>
+  3696382605U,  // <1,1,6,4>: Cost 4 vext2 <0,u,1,1>, <6,4,5,6>
+  4029309266U,  // <1,1,6,5>: Cost 4 vzipr <0,4,1,6>, <0,4,1,5>
+  2617332536U,  // <1,1,6,6>: Cost 3 vext2 <0,0,1,1>, <6,6,6,6>
+  2724840702U,  // <1,1,6,7>: Cost 3 vext3 <6,7,0,1>, <1,6,7,0>
+  2725504263U,  // <1,1,6,u>: Cost 3 vext3 <6,u,0,1>, <1,6,u,0>
+  2617332720U,  // <1,1,7,0>: Cost 3 vext2 <0,0,1,1>, <7,0,0,1>
+  2659800138U,  // <1,1,7,1>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+  3691074717U,  // <1,1,7,2>: Cost 4 vext2 <0,0,1,1>, <7,2,1,3>
+  4167811174U,  // <1,1,7,3>: Cost 4 vtrnr <1,1,5,7>, LHS
+  2617333094U,  // <1,1,7,4>: Cost 3 vext2 <0,0,1,1>, <7,4,5,6>
+  3295396702U,  // <1,1,7,5>: Cost 4 vrev <1,1,5,7>
+  3803891014U,  // <1,1,7,6>: Cost 4 vext3 <7,6,0,1>, <1,7,6,0>
+  2617333356U,  // <1,1,7,7>: Cost 3 vext2 <0,0,1,1>, <7,7,7,7>
+  2659800138U,  // <1,1,7,u>: Cost 3 vext2 <7,1,1,1>, <7,1,1,1>
+  1483112550U,  // <1,1,u,0>: Cost 2 vext1 <1,1,1,1>, LHS
+   202162278U,  // <1,1,u,1>: Cost 1 vdup1 LHS
+  2622642056U,  // <1,1,u,2>: Cost 3 vext2 <0,u,1,1>, <u,2,3,3>
+  2014142566U,  // <1,1,u,3>: Cost 2 vtrnr LHS, LHS
+  1483115830U,  // <1,1,u,4>: Cost 2 vext1 <1,1,1,1>, RHS
+  1548900506U,  // <1,1,u,5>: Cost 2 vext2 <0,u,1,1>, RHS
+  2622642384U,  // <1,1,u,6>: Cost 3 vext2 <0,u,1,1>, <u,6,3,7>
+  2825293353U,  // <1,1,u,7>: Cost 3 vuzpr <1,1,1,1>, RHS
+   202162278U,  // <1,1,u,u>: Cost 1 vdup1 LHS
+  2635251712U,  // <1,2,0,0>: Cost 3 vext2 <3,0,1,2>, <0,0,0,0>
+  1561509990U,  // <1,2,0,1>: Cost 2 vext2 <3,0,1,2>, LHS
+  2618663085U,  // <1,2,0,2>: Cost 3 vext2 <0,2,1,2>, <0,2,1,2>
+  2696529358U,  // <1,2,0,3>: Cost 3 vext3 <2,0,3,1>, <2,0,3,1>
+  2635252050U,  // <1,2,0,4>: Cost 3 vext2 <3,0,1,2>, <0,4,1,5>
+  3769533926U,  // <1,2,0,5>: Cost 4 vext3 <1,u,2,1>, <2,0,5,7>
+  2621317617U,  // <1,2,0,6>: Cost 3 vext2 <0,6,1,2>, <0,6,1,2>
+  2659140170U,  // <1,2,0,7>: Cost 3 vext2 <7,0,1,2>, <0,7,2,1>
+  1561510557U,  // <1,2,0,u>: Cost 2 vext2 <3,0,1,2>, LHS
+  2623308516U,  // <1,2,1,0>: Cost 3 vext2 <1,0,1,2>, <1,0,1,2>
+  2635252532U,  // <1,2,1,1>: Cost 3 vext2 <3,0,1,2>, <1,1,1,1>
+  2631271318U,  // <1,2,1,2>: Cost 3 vext2 <2,3,1,2>, <1,2,3,0>
+  2958180454U,  // <1,2,1,3>: Cost 3 vzipr <0,u,1,1>, LHS
+  2550959414U,  // <1,2,1,4>: Cost 3 vext1 <0,1,2,1>, RHS
+  2635252880U,  // <1,2,1,5>: Cost 3 vext2 <3,0,1,2>, <1,5,3,7>
+  2635252952U,  // <1,2,1,6>: Cost 3 vext2 <3,0,1,2>, <1,6,2,7>
+  3732882731U,  // <1,2,1,7>: Cost 4 vext2 <7,0,1,2>, <1,7,3,0>
+  2958180459U,  // <1,2,1,u>: Cost 3 vzipr <0,u,1,1>, LHS
+  2629281213U,  // <1,2,2,0>: Cost 3 vext2 <2,0,1,2>, <2,0,1,2>
+  2635253280U,  // <1,2,2,1>: Cost 3 vext2 <3,0,1,2>, <2,1,3,2>
+  2618664552U,  // <1,2,2,2>: Cost 3 vext2 <0,2,1,2>, <2,2,2,2>
+  2689746546U,  // <1,2,2,3>: Cost 3 vext3 <0,u,1,1>, <2,2,3,3>
+  3764815485U,  // <1,2,2,4>: Cost 4 vext3 <1,1,1,1>, <2,2,4,5>
+  3760023176U,  // <1,2,2,5>: Cost 4 vext3 <0,2,u,1>, <2,2,5,7>
+  2635253690U,  // <1,2,2,6>: Cost 3 vext2 <3,0,1,2>, <2,6,3,7>
+  2659141610U,  // <1,2,2,7>: Cost 3 vext2 <7,0,1,2>, <2,7,0,1>
+  2689746591U,  // <1,2,2,u>: Cost 3 vext3 <0,u,1,1>, <2,2,u,3>
+   403488870U,  // <1,2,3,0>: Cost 1 vext1 LHS, LHS
+  1477231350U,  // <1,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  1477232232U,  // <1,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+  1477233052U,  // <1,2,3,3>: Cost 2 vext1 LHS, <3,3,3,3>
+   403492150U,  // <1,2,3,4>: Cost 1 vext1 LHS, RHS
+  1525010128U,  // <1,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+  1525010938U,  // <1,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1525011450U,  // <1,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+   403494702U,  // <1,2,3,u>: Cost 1 vext1 LHS, LHS
+  2641226607U,  // <1,2,4,0>: Cost 3 vext2 <4,0,1,2>, <4,0,1,2>
+  3624723446U,  // <1,2,4,1>: Cost 4 vext1 <0,1,2,4>, <1,3,4,6>
+  3301123609U,  // <1,2,4,2>: Cost 4 vrev <2,1,2,4>
+  2598759198U,  // <1,2,4,3>: Cost 3 vext1 <u,1,2,4>, <3,u,1,2>
+  2659142864U,  // <1,2,4,4>: Cost 3 vext2 <7,0,1,2>, <4,4,4,4>
+  1561513270U,  // <1,2,4,5>: Cost 2 vext2 <3,0,1,2>, RHS
+  2659143028U,  // <1,2,4,6>: Cost 3 vext2 <7,0,1,2>, <4,6,4,6>
+  2659143112U,  // <1,2,4,7>: Cost 3 vext2 <7,0,1,2>, <4,7,5,0>
+  1561513513U,  // <1,2,4,u>: Cost 2 vext2 <3,0,1,2>, RHS
+  2550988902U,  // <1,2,5,0>: Cost 3 vext1 <0,1,2,5>, LHS
+  2550989824U,  // <1,2,5,1>: Cost 3 vext1 <0,1,2,5>, <1,3,5,7>
+  3624732264U,  // <1,2,5,2>: Cost 4 vext1 <0,1,2,5>, <2,2,2,2>
+  2955559014U,  // <1,2,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+  2550992182U,  // <1,2,5,4>: Cost 3 vext1 <0,1,2,5>, RHS
+  2659143684U,  // <1,2,5,5>: Cost 3 vext2 <7,0,1,2>, <5,5,5,5>
+  2659143778U,  // <1,2,5,6>: Cost 3 vext2 <7,0,1,2>, <5,6,7,0>
+  2659143848U,  // <1,2,5,7>: Cost 3 vext2 <7,0,1,2>, <5,7,5,7>
+  2550994734U,  // <1,2,5,u>: Cost 3 vext1 <0,1,2,5>, LHS
+  2700289945U,  // <1,2,6,0>: Cost 3 vext3 <2,6,0,1>, <2,6,0,1>
+  2635256232U,  // <1,2,6,1>: Cost 3 vext2 <3,0,1,2>, <6,1,7,2>
+  2659144186U,  // <1,2,6,2>: Cost 3 vext2 <7,0,1,2>, <6,2,7,3>
+  2689746874U,  // <1,2,6,3>: Cost 3 vext3 <0,u,1,1>, <2,6,3,7>
+  3763488705U,  // <1,2,6,4>: Cost 4 vext3 <0,u,1,1>, <2,6,4,5>
+  3763488716U,  // <1,2,6,5>: Cost 4 vext3 <0,u,1,1>, <2,6,5,7>
+  2659144504U,  // <1,2,6,6>: Cost 3 vext2 <7,0,1,2>, <6,6,6,6>
+  2657817432U,  // <1,2,6,7>: Cost 3 vext2 <6,7,1,2>, <6,7,1,2>
+  2689746919U,  // <1,2,6,u>: Cost 3 vext3 <0,u,1,1>, <2,6,u,7>
+  1585402874U,  // <1,2,7,0>: Cost 2 vext2 <7,0,1,2>, <7,0,1,2>
+  2659144770U,  // <1,2,7,1>: Cost 3 vext2 <7,0,1,2>, <7,1,0,2>
+  3708998858U,  // <1,2,7,2>: Cost 4 vext2 <3,0,1,2>, <7,2,6,3>
+  2635257059U,  // <1,2,7,3>: Cost 3 vext2 <3,0,1,2>, <7,3,0,1>
+  2659145062U,  // <1,2,7,4>: Cost 3 vext2 <7,0,1,2>, <7,4,5,6>
+  3732886916U,  // <1,2,7,5>: Cost 4 vext2 <7,0,1,2>, <7,5,0,0>
+  3732886998U,  // <1,2,7,6>: Cost 4 vext2 <7,0,1,2>, <7,6,0,1>
+  2659145255U,  // <1,2,7,7>: Cost 3 vext2 <7,0,1,2>, <7,7,0,1>
+  1590711938U,  // <1,2,7,u>: Cost 2 vext2 <7,u,1,2>, <7,u,1,2>
+   403529835U,  // <1,2,u,0>: Cost 1 vext1 LHS, LHS
+  1477272310U,  // <1,2,u,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  1477273192U,  // <1,2,u,2>: Cost 2 vext1 LHS, <2,2,2,2>
+  1477273750U,  // <1,2,u,3>: Cost 2 vext1 LHS, <3,0,1,2>
+   403533110U,  // <1,2,u,4>: Cost 1 vext1 LHS, RHS
+  1561516186U,  // <1,2,u,5>: Cost 2 vext2 <3,0,1,2>, RHS
+  1525051898U,  // <1,2,u,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1525052410U,  // <1,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+   403535662U,  // <1,2,u,u>: Cost 1 vext1 LHS, LHS
+  2819407872U,  // <1,3,0,0>: Cost 3 vuzpr LHS, <0,0,0,0>
+  1551564902U,  // <1,3,0,1>: Cost 2 vext2 <1,3,1,3>, LHS
+  2819408630U,  // <1,3,0,2>: Cost 3 vuzpr LHS, <1,0,3,2>
+  2619334911U,  // <1,3,0,3>: Cost 3 vext2 <0,3,1,3>, <0,3,1,3>
+  2625306962U,  // <1,3,0,4>: Cost 3 vext2 <1,3,1,3>, <0,4,1,5>
+  3832725879U,  // <1,3,0,5>: Cost 4 vuzpl <1,2,3,0>, <0,4,5,6>
+  3699048959U,  // <1,3,0,6>: Cost 4 vext2 <1,3,1,3>, <0,6,2,7>
+  3776538827U,  // <1,3,0,7>: Cost 4 vext3 <3,0,7,1>, <3,0,7,1>
+  1551565469U,  // <1,3,0,u>: Cost 2 vext2 <1,3,1,3>, LHS
+  2618671862U,  // <1,3,1,0>: Cost 3 vext2 <0,2,1,3>, <1,0,3,2>
+  2819408692U,  // <1,3,1,1>: Cost 3 vuzpr LHS, <1,1,1,1>
+  2624643975U,  // <1,3,1,2>: Cost 3 vext2 <1,2,1,3>, <1,2,1,3>
+  1745666150U,  // <1,3,1,3>: Cost 2 vuzpr LHS, LHS
+  2557005110U,  // <1,3,1,4>: Cost 3 vext1 <1,1,3,1>, RHS
+  2625307792U,  // <1,3,1,5>: Cost 3 vext2 <1,3,1,3>, <1,5,3,7>
+  3698386127U,  // <1,3,1,6>: Cost 4 vext2 <1,2,1,3>, <1,6,1,7>
+  2592838748U,  // <1,3,1,7>: Cost 3 vext1 <7,1,3,1>, <7,1,3,1>
+  1745666155U,  // <1,3,1,u>: Cost 2 vuzpr LHS, LHS
+  2819408790U,  // <1,3,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+  2625308193U,  // <1,3,2,1>: Cost 3 vext2 <1,3,1,3>, <2,1,3,3>
+  2819408036U,  // <1,3,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+  2819851890U,  // <1,3,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+  2819408794U,  // <1,3,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+  3893149890U,  // <1,3,2,5>: Cost 4 vuzpr LHS, <0,2,3,5>
+  2819408076U,  // <1,3,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+  3772041583U,  // <1,3,2,7>: Cost 4 vext3 <2,3,0,1>, <3,2,7,3>
+  2819408042U,  // <1,3,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+  1483276390U,  // <1,3,3,0>: Cost 2 vext1 <1,1,3,3>, LHS
+  1483277128U,  // <1,3,3,1>: Cost 2 vext1 <1,1,3,3>, <1,1,3,3>
+  2557019752U,  // <1,3,3,2>: Cost 3 vext1 <1,1,3,3>, <2,2,2,2>
+  2819408856U,  // <1,3,3,3>: Cost 3 vuzpr LHS, <1,3,1,3>
+  1483279670U,  // <1,3,3,4>: Cost 2 vext1 <1,1,3,3>, RHS
+  2819409614U,  // <1,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+  2598826490U,  // <1,3,3,6>: Cost 3 vext1 <u,1,3,3>, <6,2,7,3>
+  3087844352U,  // <1,3,3,7>: Cost 3 vtrnr LHS, <1,3,5,7>
+  1483282222U,  // <1,3,3,u>: Cost 2 vext1 <1,1,3,3>, LHS
+  2568970342U,  // <1,3,4,0>: Cost 3 vext1 <3,1,3,4>, LHS
+  2568971224U,  // <1,3,4,1>: Cost 3 vext1 <3,1,3,4>, <1,3,1,3>
+  3832761290U,  // <1,3,4,2>: Cost 4 vuzpl <1,2,3,4>, <4,1,2,3>
+  2233428219U,  // <1,3,4,3>: Cost 3 vrev <3,1,3,4>
+  2568973622U,  // <1,3,4,4>: Cost 3 vext1 <3,1,3,4>, RHS
+  1551568182U,  // <1,3,4,5>: Cost 2 vext2 <1,3,1,3>, RHS
+  2819410434U,  // <1,3,4,6>: Cost 3 vuzpr LHS, <3,4,5,6>
+  3666605151U,  // <1,3,4,7>: Cost 4 vext1 <7,1,3,4>, <7,1,3,4>
+  1551568425U,  // <1,3,4,u>: Cost 2 vext2 <1,3,1,3>, RHS
+  2563006566U,  // <1,3,5,0>: Cost 3 vext1 <2,1,3,5>, LHS
+  2568979456U,  // <1,3,5,1>: Cost 3 vext1 <3,1,3,5>, <1,3,5,7>
+  2563008035U,  // <1,3,5,2>: Cost 3 vext1 <2,1,3,5>, <2,1,3,5>
+  2233436412U,  // <1,3,5,3>: Cost 3 vrev <3,1,3,5>
+  2563009846U,  // <1,3,5,4>: Cost 3 vext1 <2,1,3,5>, RHS
+  2867187716U,  // <1,3,5,5>: Cost 3 vuzpr LHS, <5,5,5,5>
+  2655834214U,  // <1,3,5,6>: Cost 3 vext2 <6,4,1,3>, <5,6,7,4>
+  1745669430U,  // <1,3,5,7>: Cost 2 vuzpr LHS, RHS
+  1745669431U,  // <1,3,5,u>: Cost 2 vuzpr LHS, RHS
+  2867187810U,  // <1,3,6,0>: Cost 3 vuzpr LHS, <5,6,7,0>
+  3699052931U,  // <1,3,6,1>: Cost 4 vext2 <1,3,1,3>, <6,1,3,1>
+  2654507460U,  // <1,3,6,2>: Cost 3 vext2 <6,2,1,3>, <6,2,1,3>
+  3766291091U,  // <1,3,6,3>: Cost 4 vext3 <1,3,3,1>, <3,6,3,7>
+  2655834726U,  // <1,3,6,4>: Cost 3 vext2 <6,4,1,3>, <6,4,1,3>
+  3923384562U,  // <1,3,6,5>: Cost 4 vuzpr <5,1,7,3>, <u,6,7,5>
+  2657161992U,  // <1,3,6,6>: Cost 3 vext2 <6,6,1,3>, <6,6,1,3>
+  2819852218U,  // <1,3,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+  2819852219U,  // <1,3,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+  2706926275U,  // <1,3,7,0>: Cost 3 vext3 <3,7,0,1>, <3,7,0,1>
+  2659816524U,  // <1,3,7,1>: Cost 3 vext2 <7,1,1,3>, <7,1,1,3>
+  3636766245U,  // <1,3,7,2>: Cost 4 vext1 <2,1,3,7>, <2,1,3,7>
+  2867187903U,  // <1,3,7,3>: Cost 3 vuzpr LHS, <5,7,u,3>
+  2625312102U,  // <1,3,7,4>: Cost 3 vext2 <1,3,1,3>, <7,4,5,6>
+  2867188598U,  // <1,3,7,5>: Cost 3 vuzpr LHS, <6,7,4,5>
+  3728250344U,  // <1,3,7,6>: Cost 4 vext2 <6,2,1,3>, <7,6,2,1>
+  2867187880U,  // <1,3,7,7>: Cost 3 vuzpr LHS, <5,7,5,7>
+  2707516171U,  // <1,3,7,u>: Cost 3 vext3 <3,7,u,1>, <3,7,u,1>
+  1483317350U,  // <1,3,u,0>: Cost 2 vext1 <1,1,3,u>, LHS
+  1483318093U,  // <1,3,u,1>: Cost 2 vext1 <1,1,3,u>, <1,1,3,u>
+  2819410718U,  // <1,3,u,2>: Cost 3 vuzpr LHS, <3,u,1,2>
+  1745666717U,  // <1,3,u,3>: Cost 2 vuzpr LHS, LHS
+  1483320630U,  // <1,3,u,4>: Cost 2 vext1 <1,1,3,u>, RHS
+  1551571098U,  // <1,3,u,5>: Cost 2 vext2 <1,3,1,3>, RHS
+  2819410758U,  // <1,3,u,6>: Cost 3 vuzpr LHS, <3,u,5,6>
+  1745669673U,  // <1,3,u,7>: Cost 2 vuzpr LHS, RHS
+  1745666722U,  // <1,3,u,u>: Cost 2 vuzpr LHS, LHS
+  2617352205U,  // <1,4,0,0>: Cost 3 vext2 <0,0,1,4>, <0,0,1,4>
+  2619342950U,  // <1,4,0,1>: Cost 3 vext2 <0,3,1,4>, LHS
+  3692421295U,  // <1,4,0,2>: Cost 4 vext2 <0,2,1,4>, <0,2,1,4>
+  2619343104U,  // <1,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+  2617352530U,  // <1,4,0,4>: Cost 3 vext2 <0,0,1,4>, <0,4,1,5>
+  1634880402U,  // <1,4,0,5>: Cost 2 vext3 <4,0,5,1>, <4,0,5,1>
+  2713930652U,  // <1,4,0,6>: Cost 3 vext3 <4,u,5,1>, <4,0,6,2>
+  3732898396U,  // <1,4,0,7>: Cost 4 vext2 <7,0,1,4>, <0,7,4,1>
+  1635101613U,  // <1,4,0,u>: Cost 2 vext3 <4,0,u,1>, <4,0,u,1>
+  3693085430U,  // <1,4,1,0>: Cost 4 vext2 <0,3,1,4>, <1,0,3,2>
+  2623988535U,  // <1,4,1,1>: Cost 3 vext2 <1,1,1,4>, <1,1,1,4>
+  3693085590U,  // <1,4,1,2>: Cost 4 vext2 <0,3,1,4>, <1,2,3,0>
+  3692422134U,  // <1,4,1,3>: Cost 4 vext2 <0,2,1,4>, <1,3,4,6>
+  3693085726U,  // <1,4,1,4>: Cost 4 vext2 <0,3,1,4>, <1,4,0,1>
+  2892401974U,  // <1,4,1,5>: Cost 3 vzipl <1,1,1,1>, RHS
+  3026619702U,  // <1,4,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+  3800206324U,  // <1,4,1,7>: Cost 4 vext3 <7,0,4,1>, <4,1,7,0>
+  2892402217U,  // <1,4,1,u>: Cost 3 vzipl <1,1,1,1>, RHS
+  3966978927U,  // <1,4,2,0>: Cost 4 vzipl <1,2,3,4>, <4,0,1,2>
+  3966979018U,  // <1,4,2,1>: Cost 4 vzipl <1,2,3,4>, <4,1,2,3>
+  3693086312U,  // <1,4,2,2>: Cost 4 vext2 <0,3,1,4>, <2,2,2,2>
+  2635269798U,  // <1,4,2,3>: Cost 3 vext2 <3,0,1,4>, <2,3,0,1>
+  3966979280U,  // <1,4,2,4>: Cost 4 vzipl <1,2,3,4>, <4,4,4,4>
+  2893204790U,  // <1,4,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+  3693086650U,  // <1,4,2,6>: Cost 4 vext2 <0,3,1,4>, <2,6,3,7>
+  3666662502U,  // <1,4,2,7>: Cost 4 vext1 <7,1,4,2>, <7,1,4,2>
+  2893205033U,  // <1,4,2,u>: Cost 3 vzipl <1,2,3,0>, RHS
+  2563063910U,  // <1,4,3,0>: Cost 3 vext1 <2,1,4,3>, LHS
+  2563064730U,  // <1,4,3,1>: Cost 3 vext1 <2,1,4,3>, <1,2,3,4>
+  2563065386U,  // <1,4,3,2>: Cost 3 vext1 <2,1,4,3>, <2,1,4,3>
+  3693087132U,  // <1,4,3,3>: Cost 4 vext2 <0,3,1,4>, <3,3,3,3>
+  2619345410U,  // <1,4,3,4>: Cost 3 vext2 <0,3,1,4>, <3,4,5,6>
+  3087843666U,  // <1,4,3,5>: Cost 3 vtrnr LHS, <0,4,1,5>
+  3087843676U,  // <1,4,3,6>: Cost 3 vtrnr LHS, <0,4,2,6>
+  3666670695U,  // <1,4,3,7>: Cost 4 vext1 <7,1,4,3>, <7,1,4,3>
+  3087843669U,  // <1,4,3,u>: Cost 3 vtrnr LHS, <0,4,1,u>
+  2620672914U,  // <1,4,4,0>: Cost 3 vext2 <0,5,1,4>, <4,0,5,1>
+  3630842706U,  // <1,4,4,1>: Cost 4 vext1 <1,1,4,4>, <1,1,4,4>
+  3313069003U,  // <1,4,4,2>: Cost 4 vrev <4,1,2,4>
+  3642788100U,  // <1,4,4,3>: Cost 4 vext1 <3,1,4,4>, <3,1,4,4>
+  2713930960U,  // <1,4,4,4>: Cost 3 vext3 <4,u,5,1>, <4,4,4,4>
+  2619346230U,  // <1,4,4,5>: Cost 3 vext2 <0,3,1,4>, RHS
+  2713930980U,  // <1,4,4,6>: Cost 3 vext3 <4,u,5,1>, <4,4,6,6>
+  3736882642U,  // <1,4,4,7>: Cost 4 vext2 <7,6,1,4>, <4,7,6,1>
+  2619346473U,  // <1,4,4,u>: Cost 3 vext2 <0,3,1,4>, RHS
+  2557108326U,  // <1,4,5,0>: Cost 3 vext1 <1,1,4,5>, LHS
+  2557109075U,  // <1,4,5,1>: Cost 3 vext1 <1,1,4,5>, <1,1,4,5>
+  2598913774U,  // <1,4,5,2>: Cost 3 vext1 <u,1,4,5>, <2,3,u,1>
+  3630852246U,  // <1,4,5,3>: Cost 4 vext1 <1,1,4,5>, <3,0,1,2>
+  2557111606U,  // <1,4,5,4>: Cost 3 vext1 <1,1,4,5>, RHS
+  2895252790U,  // <1,4,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+  1616006454U,  // <1,4,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  3899059510U,  // <1,4,5,7>: Cost 4 vuzpr <1,1,1,4>, RHS
+  1616006472U,  // <1,4,5,u>: Cost 2 vext3 <0,u,1,1>, RHS
+  2557116518U,  // <1,4,6,0>: Cost 3 vext1 <1,1,4,6>, LHS
+  2557117236U,  // <1,4,6,1>: Cost 3 vext1 <1,1,4,6>, <1,1,1,1>
+  3630859880U,  // <1,4,6,2>: Cost 4 vext1 <1,1,4,6>, <2,2,2,2>
+  2569062550U,  // <1,4,6,3>: Cost 3 vext1 <3,1,4,6>, <3,0,1,2>
+  2557119798U,  // <1,4,6,4>: Cost 3 vext1 <1,1,4,6>, RHS
+  3763490174U,  // <1,4,6,5>: Cost 4 vext3 <0,u,1,1>, <4,6,5,7>
+  3763490183U,  // <1,4,6,6>: Cost 4 vext3 <0,u,1,1>, <4,6,6,7>
+  2712751498U,  // <1,4,6,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+  2557122350U,  // <1,4,6,u>: Cost 3 vext1 <1,1,4,6>, LHS
+  2659161084U,  // <1,4,7,0>: Cost 3 vext2 <7,0,1,4>, <7,0,1,4>
+  3732903040U,  // <1,4,7,1>: Cost 4 vext2 <7,0,1,4>, <7,1,7,1>
+  3734230174U,  // <1,4,7,2>: Cost 4 vext2 <7,2,1,4>, <7,2,1,4>
+  3734893807U,  // <1,4,7,3>: Cost 4 vext2 <7,3,1,4>, <7,3,1,4>
+  3660729654U,  // <1,4,7,4>: Cost 4 vext1 <6,1,4,7>, RHS
+  3786493384U,  // <1,4,7,5>: Cost 4 vext3 <4,6,7,1>, <4,7,5,0>
+  2713341394U,  // <1,4,7,6>: Cost 3 vext3 <4,7,6,1>, <4,7,6,1>
+  3660731386U,  // <1,4,7,7>: Cost 4 vext1 <6,1,4,7>, <7,0,1,2>
+  2664470148U,  // <1,4,7,u>: Cost 3 vext2 <7,u,1,4>, <7,u,1,4>
+  2557132902U,  // <1,4,u,0>: Cost 3 vext1 <1,1,4,u>, LHS
+  2619348782U,  // <1,4,u,1>: Cost 3 vext2 <0,3,1,4>, LHS
+  2563106351U,  // <1,4,u,2>: Cost 3 vext1 <2,1,4,u>, <2,1,4,u>
+  2713783816U,  // <1,4,u,3>: Cost 3 vext3 <4,u,3,1>, <4,u,3,1>
+  2622666815U,  // <1,4,u,4>: Cost 3 vext2 <0,u,1,4>, <u,4,5,6>
+  1640189466U,  // <1,4,u,5>: Cost 2 vext3 <4,u,5,1>, <4,u,5,1>
+  1616006697U,  // <1,4,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  2712751498U,  // <1,4,u,7>: Cost 3 vext3 <4,6,7,1>, <4,6,7,1>
+  1616006715U,  // <1,4,u,u>: Cost 2 vext3 <0,u,1,1>, RHS
+  2620014592U,  // <1,5,0,0>: Cost 3 vext2 <0,4,1,5>, <0,0,0,0>
+  1546272870U,  // <1,5,0,1>: Cost 2 vext2 <0,4,1,5>, LHS
+  2618687664U,  // <1,5,0,2>: Cost 3 vext2 <0,2,1,5>, <0,2,1,5>
+  3693093120U,  // <1,5,0,3>: Cost 4 vext2 <0,3,1,5>, <0,3,1,4>
+  1546273106U,  // <1,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+  2620678563U,  // <1,5,0,5>: Cost 3 vext2 <0,5,1,5>, <0,5,1,5>
+  2714668660U,  // <1,5,0,6>: Cost 3 vext3 <5,0,6,1>, <5,0,6,1>
+  3772042877U,  // <1,5,0,7>: Cost 4 vext3 <2,3,0,1>, <5,0,7,1>
+  1546273437U,  // <1,5,0,u>: Cost 2 vext2 <0,4,1,5>, LHS
+  2620015350U,  // <1,5,1,0>: Cost 3 vext2 <0,4,1,5>, <1,0,3,2>
+  2620015412U,  // <1,5,1,1>: Cost 3 vext2 <0,4,1,5>, <1,1,1,1>
+  2620015510U,  // <1,5,1,2>: Cost 3 vext2 <0,4,1,5>, <1,2,3,0>
+  2618688512U,  // <1,5,1,3>: Cost 3 vext2 <0,2,1,5>, <1,3,5,7>
+  2620015677U,  // <1,5,1,4>: Cost 3 vext2 <0,4,1,5>, <1,4,3,5>
+  2620015727U,  // <1,5,1,5>: Cost 3 vext2 <0,4,1,5>, <1,5,0,1>
+  2620015859U,  // <1,5,1,6>: Cost 3 vext2 <0,4,1,5>, <1,6,5,7>
+  3093728566U,  // <1,5,1,7>: Cost 3 vtrnr <1,1,1,1>, RHS
+  2620015981U,  // <1,5,1,u>: Cost 3 vext2 <0,4,1,5>, <1,u,1,3>
+  3692430816U,  // <1,5,2,0>: Cost 4 vext2 <0,2,1,5>, <2,0,5,1>
+  2620016163U,  // <1,5,2,1>: Cost 3 vext2 <0,4,1,5>, <2,1,3,5>
+  2620016232U,  // <1,5,2,2>: Cost 3 vext2 <0,4,1,5>, <2,2,2,2>
+  2620016294U,  // <1,5,2,3>: Cost 3 vext2 <0,4,1,5>, <2,3,0,1>
+  3693758221U,  // <1,5,2,4>: Cost 4 vext2 <0,4,1,5>, <2,4,2,5>
+  3692431209U,  // <1,5,2,5>: Cost 4 vext2 <0,2,1,5>, <2,5,3,7>
+  2620016570U,  // <1,5,2,6>: Cost 3 vext2 <0,4,1,5>, <2,6,3,7>
+  4173598006U,  // <1,5,2,7>: Cost 4 vtrnr <2,1,3,2>, RHS
+  2620016699U,  // <1,5,2,u>: Cost 3 vext2 <0,4,1,5>, <2,u,0,1>
+  2620016790U,  // <1,5,3,0>: Cost 3 vext2 <0,4,1,5>, <3,0,1,2>
+  2569110672U,  // <1,5,3,1>: Cost 3 vext1 <3,1,5,3>, <1,5,3,7>
+  3693758785U,  // <1,5,3,2>: Cost 4 vext2 <0,4,1,5>, <3,2,2,2>
+  2620017052U,  // <1,5,3,3>: Cost 3 vext2 <0,4,1,5>, <3,3,3,3>
+  2620017154U,  // <1,5,3,4>: Cost 3 vext2 <0,4,1,5>, <3,4,5,6>
+  3135623172U,  // <1,5,3,5>: Cost 3 vtrnr LHS, <5,5,5,5>
+  4161587048U,  // <1,5,3,6>: Cost 4 vtrnr LHS, <2,5,3,6>
+  2014104886U,  // <1,5,3,7>: Cost 2 vtrnr LHS, RHS
+  2014104887U,  // <1,5,3,u>: Cost 2 vtrnr LHS, RHS
+  2620017554U,  // <1,5,4,0>: Cost 3 vext2 <0,4,1,5>, <4,0,5,1>
+  2620017634U,  // <1,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+  3693759551U,  // <1,5,4,2>: Cost 4 vext2 <0,4,1,5>, <4,2,6,3>
+  3642861837U,  // <1,5,4,3>: Cost 4 vext1 <3,1,5,4>, <3,1,5,4>
+  2575092710U,  // <1,5,4,4>: Cost 3 vext1 <4,1,5,4>, <4,1,5,4>
+  1546276150U,  // <1,5,4,5>: Cost 2 vext2 <0,4,1,5>, RHS
+  2759855414U,  // <1,5,4,6>: Cost 3 vuzpl <1,3,5,7>, RHS
+  2713931718U,  // <1,5,4,7>: Cost 3 vext3 <4,u,5,1>, <5,4,7,6>
+  1546276393U,  // <1,5,4,u>: Cost 2 vext2 <0,4,1,5>, RHS
+  2557182054U,  // <1,5,5,0>: Cost 3 vext1 <1,1,5,5>, LHS
+  2557182812U,  // <1,5,5,1>: Cost 3 vext1 <1,1,5,5>, <1,1,5,5>
+  3630925347U,  // <1,5,5,2>: Cost 4 vext1 <1,1,5,5>, <2,1,3,5>
+  4029301675U,  // <1,5,5,3>: Cost 4 vzipr <0,4,1,5>, <1,2,5,3>
+  2557185334U,  // <1,5,5,4>: Cost 3 vext1 <1,1,5,5>, RHS
+  2713931780U,  // <1,5,5,5>: Cost 3 vext3 <4,u,5,1>, <5,5,5,5>
+  2667794530U,  // <1,5,5,6>: Cost 3 vext2 <u,4,1,5>, <5,6,7,0>
+  2713931800U,  // <1,5,5,7>: Cost 3 vext3 <4,u,5,1>, <5,5,7,7>
+  2557187886U,  // <1,5,5,u>: Cost 3 vext1 <1,1,5,5>, LHS
+  2718208036U,  // <1,5,6,0>: Cost 3 vext3 <5,6,0,1>, <5,6,0,1>
+  2620019115U,  // <1,5,6,1>: Cost 3 vext2 <0,4,1,5>, <6,1,7,5>
+  2667794938U,  // <1,5,6,2>: Cost 3 vext2 <u,4,1,5>, <6,2,7,3>
+  3787673666U,  // <1,5,6,3>: Cost 4 vext3 <4,u,5,1>, <5,6,3,4>
+  3693761165U,  // <1,5,6,4>: Cost 4 vext2 <0,4,1,5>, <6,4,5,6>
+  3319279297U,  // <1,5,6,5>: Cost 4 vrev <5,1,5,6>
+  2667795256U,  // <1,5,6,6>: Cost 3 vext2 <u,4,1,5>, <6,6,6,6>
+  2713931874U,  // <1,5,6,7>: Cost 3 vext3 <4,u,5,1>, <5,6,7,0>
+  2713931883U,  // <1,5,6,u>: Cost 3 vext3 <4,u,5,1>, <5,6,u,0>
+  2557198438U,  // <1,5,7,0>: Cost 3 vext1 <1,1,5,7>, LHS
+  2557199156U,  // <1,5,7,1>: Cost 3 vext1 <1,1,5,7>, <1,1,1,1>
+  2569143974U,  // <1,5,7,2>: Cost 3 vext1 <3,1,5,7>, <2,3,0,1>
+  2569144592U,  // <1,5,7,3>: Cost 3 vext1 <3,1,5,7>, <3,1,5,7>
+  2557201718U,  // <1,5,7,4>: Cost 3 vext1 <1,1,5,7>, RHS
+  2713931944U,  // <1,5,7,5>: Cost 3 vext3 <4,u,5,1>, <5,7,5,7>
+  3787673770U,  // <1,5,7,6>: Cost 4 vext3 <4,u,5,1>, <5,7,6,0>
+  2719387828U,  // <1,5,7,7>: Cost 3 vext3 <5,7,7,1>, <5,7,7,1>
+  2557204270U,  // <1,5,7,u>: Cost 3 vext1 <1,1,5,7>, LHS
+  2620020435U,  // <1,5,u,0>: Cost 3 vext2 <0,4,1,5>, <u,0,1,2>
+  1546278702U,  // <1,5,u,1>: Cost 2 vext2 <0,4,1,5>, LHS
+  2620020616U,  // <1,5,u,2>: Cost 3 vext2 <0,4,1,5>, <u,2,3,3>
+  2620020668U,  // <1,5,u,3>: Cost 3 vext2 <0,4,1,5>, <u,3,0,1>
+  1594054682U,  // <1,5,u,4>: Cost 2 vext2 <u,4,1,5>, <u,4,1,5>
+  1546279066U,  // <1,5,u,5>: Cost 2 vext2 <0,4,1,5>, RHS
+  2620020944U,  // <1,5,u,6>: Cost 3 vext2 <0,4,1,5>, <u,6,3,7>
+  2014145846U,  // <1,5,u,7>: Cost 2 vtrnr LHS, RHS
+  2014145847U,  // <1,5,u,u>: Cost 2 vtrnr LHS, RHS
+  3692437504U,  // <1,6,0,0>: Cost 4 vext2 <0,2,1,6>, <0,0,0,0>
+  2618695782U,  // <1,6,0,1>: Cost 3 vext2 <0,2,1,6>, LHS
+  2618695857U,  // <1,6,0,2>: Cost 3 vext2 <0,2,1,6>, <0,2,1,6>
+  3794161970U,  // <1,6,0,3>: Cost 4 vext3 <6,0,3,1>, <6,0,3,1>
+  2620023122U,  // <1,6,0,4>: Cost 3 vext2 <0,4,1,6>, <0,4,1,5>
+  2620686756U,  // <1,6,0,5>: Cost 3 vext2 <0,5,1,6>, <0,5,1,6>
+  2621350389U,  // <1,6,0,6>: Cost 3 vext2 <0,6,1,6>, <0,6,1,6>
+  4028599606U,  // <1,6,0,7>: Cost 4 vzipr <0,3,1,0>, RHS
+  2618696349U,  // <1,6,0,u>: Cost 3 vext2 <0,2,1,6>, LHS
+  3692438262U,  // <1,6,1,0>: Cost 4 vext2 <0,2,1,6>, <1,0,3,2>
+  2625995572U,  // <1,6,1,1>: Cost 3 vext2 <1,4,1,6>, <1,1,1,1>
+  3692438422U,  // <1,6,1,2>: Cost 4 vext2 <0,2,1,6>, <1,2,3,0>
+  3692438488U,  // <1,6,1,3>: Cost 4 vext2 <0,2,1,6>, <1,3,1,3>
+  2625995820U,  // <1,6,1,4>: Cost 3 vext2 <1,4,1,6>, <1,4,1,6>
+  3692438672U,  // <1,6,1,5>: Cost 4 vext2 <0,2,1,6>, <1,5,3,7>
+  3692438720U,  // <1,6,1,6>: Cost 4 vext2 <0,2,1,6>, <1,6,0,1>
+  2958183734U,  // <1,6,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+  2958183735U,  // <1,6,1,u>: Cost 3 vzipr <0,u,1,1>, RHS
+  2721526201U,  // <1,6,2,0>: Cost 3 vext3 <6,2,0,1>, <6,2,0,1>
+  3692439097U,  // <1,6,2,1>: Cost 4 vext2 <0,2,1,6>, <2,1,6,0>
+  3692439144U,  // <1,6,2,2>: Cost 4 vext2 <0,2,1,6>, <2,2,2,2>
+  3692439206U,  // <1,6,2,3>: Cost 4 vext2 <0,2,1,6>, <2,3,0,1>
+  3636948278U,  // <1,6,2,4>: Cost 4 vext1 <2,1,6,2>, RHS
+  3787674092U,  // <1,6,2,5>: Cost 4 vext3 <4,u,5,1>, <6,2,5,7>
+  2618697658U,  // <1,6,2,6>: Cost 3 vext2 <0,2,1,6>, <2,6,3,7>
+  2970799414U,  // <1,6,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+  2970799415U,  // <1,6,2,u>: Cost 3 vzipr <3,0,1,2>, RHS
+  2563211366U,  // <1,6,3,0>: Cost 3 vext1 <2,1,6,3>, LHS
+  3699738854U,  // <1,6,3,1>: Cost 4 vext2 <1,4,1,6>, <3,1,1,1>
+  2563212860U,  // <1,6,3,2>: Cost 3 vext1 <2,1,6,3>, <2,1,6,3>
+  3692439964U,  // <1,6,3,3>: Cost 4 vext2 <0,2,1,6>, <3,3,3,3>
+  2563214646U,  // <1,6,3,4>: Cost 3 vext1 <2,1,6,3>, RHS
+  4191820018U,  // <1,6,3,5>: Cost 4 vtrnr <5,1,7,3>, <u,6,7,5>
+  2587103648U,  // <1,6,3,6>: Cost 3 vext1 <6,1,6,3>, <6,1,6,3>
+  3087845306U,  // <1,6,3,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+  3087845307U,  // <1,6,3,u>: Cost 3 vtrnr LHS, <2,6,3,u>
+  3693767570U,  // <1,6,4,0>: Cost 4 vext2 <0,4,1,6>, <4,0,5,1>
+  3693767650U,  // <1,6,4,1>: Cost 4 vext2 <0,4,1,6>, <4,1,5,0>
+  3636962877U,  // <1,6,4,2>: Cost 4 vext1 <2,1,6,4>, <2,1,6,4>
+  3325088134U,  // <1,6,4,3>: Cost 4 vrev <6,1,3,4>
+  3693767898U,  // <1,6,4,4>: Cost 4 vext2 <0,4,1,6>, <4,4,5,5>
+  2618699062U,  // <1,6,4,5>: Cost 3 vext2 <0,2,1,6>, RHS
+  3833670966U,  // <1,6,4,6>: Cost 4 vuzpl <1,3,6,7>, RHS
+  4028632374U,  // <1,6,4,7>: Cost 4 vzipr <0,3,1,4>, RHS
+  2618699305U,  // <1,6,4,u>: Cost 3 vext2 <0,2,1,6>, RHS
+  3693768264U,  // <1,6,5,0>: Cost 4 vext2 <0,4,1,6>, <5,0,1,2>
+  3630998373U,  // <1,6,5,1>: Cost 4 vext1 <1,1,6,5>, <1,1,6,5>
+  3636971070U,  // <1,6,5,2>: Cost 4 vext1 <2,1,6,5>, <2,1,6,5>
+  3642943767U,  // <1,6,5,3>: Cost 4 vext1 <3,1,6,5>, <3,1,6,5>
+  3693768628U,  // <1,6,5,4>: Cost 4 vext2 <0,4,1,6>, <5,4,5,6>
+  3732918276U,  // <1,6,5,5>: Cost 4 vext2 <7,0,1,6>, <5,5,5,5>
+  2620690530U,  // <1,6,5,6>: Cost 3 vext2 <0,5,1,6>, <5,6,7,0>
+  2955562294U,  // <1,6,5,7>: Cost 3 vzipr <0,4,1,5>, RHS
+  2955562295U,  // <1,6,5,u>: Cost 3 vzipr <0,4,1,5>, RHS
+  2724180733U,  // <1,6,6,0>: Cost 3 vext3 <6,6,0,1>, <6,6,0,1>
+  3631006566U,  // <1,6,6,1>: Cost 4 vext1 <1,1,6,6>, <1,1,6,6>
+  3631007674U,  // <1,6,6,2>: Cost 4 vext1 <1,1,6,6>, <2,6,3,7>
+  3692442184U,  // <1,6,6,3>: Cost 4 vext2 <0,2,1,6>, <6,3,7,0>
+  3631009078U,  // <1,6,6,4>: Cost 4 vext1 <1,1,6,6>, RHS
+  3787674416U,  // <1,6,6,5>: Cost 4 vext3 <4,u,5,1>, <6,6,5,7>
+  2713932600U,  // <1,6,6,6>: Cost 3 vext3 <4,u,5,1>, <6,6,6,6>
+  2713932610U,  // <1,6,6,7>: Cost 3 vext3 <4,u,5,1>, <6,6,7,7>
+  2713932619U,  // <1,6,6,u>: Cost 3 vext3 <4,u,5,1>, <6,6,u,7>
+  1651102542U,  // <1,6,7,0>: Cost 2 vext3 <6,7,0,1>, <6,7,0,1>
+  2724918103U,  // <1,6,7,1>: Cost 3 vext3 <6,7,1,1>, <6,7,1,1>
+  2698302306U,  // <1,6,7,2>: Cost 3 vext3 <2,3,0,1>, <6,7,2,3>
+  3642960153U,  // <1,6,7,3>: Cost 4 vext1 <3,1,6,7>, <3,1,6,7>
+  2713932662U,  // <1,6,7,4>: Cost 3 vext3 <4,u,5,1>, <6,7,4,5>
+  2725213051U,  // <1,6,7,5>: Cost 3 vext3 <6,7,5,1>, <6,7,5,1>
+  2724844426U,  // <1,6,7,6>: Cost 3 vext3 <6,7,0,1>, <6,7,6,7>
+  4035956022U,  // <1,6,7,7>: Cost 4 vzipr <1,5,1,7>, RHS
+  1651692438U,  // <1,6,7,u>: Cost 2 vext3 <6,7,u,1>, <6,7,u,1>
+  1651766175U,  // <1,6,u,0>: Cost 2 vext3 <6,u,0,1>, <6,u,0,1>
+  2618701614U,  // <1,6,u,1>: Cost 3 vext2 <0,2,1,6>, LHS
+  3135663508U,  // <1,6,u,2>: Cost 3 vtrnr LHS, <4,6,u,2>
+  3692443580U,  // <1,6,u,3>: Cost 4 vext2 <0,2,1,6>, <u,3,0,1>
+  2713932743U,  // <1,6,u,4>: Cost 3 vext3 <4,u,5,1>, <6,u,4,5>
+  2618701978U,  // <1,6,u,5>: Cost 3 vext2 <0,2,1,6>, RHS
+  2622683344U,  // <1,6,u,6>: Cost 3 vext2 <0,u,1,6>, <u,6,3,7>
+  3087886266U,  // <1,6,u,7>: Cost 3 vtrnr LHS, <2,6,3,7>
+  1652356071U,  // <1,6,u,u>: Cost 2 vext3 <6,u,u,1>, <6,u,u,1>
+  2726171632U,  // <1,7,0,0>: Cost 3 vext3 <7,0,0,1>, <7,0,0,1>
+  2626666598U,  // <1,7,0,1>: Cost 3 vext2 <1,5,1,7>, LHS
+  3695100067U,  // <1,7,0,2>: Cost 4 vext2 <0,6,1,7>, <0,2,0,1>
+  3707044102U,  // <1,7,0,3>: Cost 4 vext2 <2,6,1,7>, <0,3,2,1>
+  2726466580U,  // <1,7,0,4>: Cost 3 vext3 <7,0,4,1>, <7,0,4,1>
+  3654921933U,  // <1,7,0,5>: Cost 4 vext1 <5,1,7,0>, <5,1,7,0>
+  2621358582U,  // <1,7,0,6>: Cost 3 vext2 <0,6,1,7>, <0,6,1,7>
+  2622022215U,  // <1,7,0,7>: Cost 3 vext2 <0,7,1,7>, <0,7,1,7>
+  2626667165U,  // <1,7,0,u>: Cost 3 vext2 <1,5,1,7>, LHS
+  2593128550U,  // <1,7,1,0>: Cost 3 vext1 <7,1,7,1>, LHS
+  2626667316U,  // <1,7,1,1>: Cost 3 vext2 <1,5,1,7>, <1,1,1,1>
+  3700409238U,  // <1,7,1,2>: Cost 4 vext2 <1,5,1,7>, <1,2,3,0>
+  2257294428U,  // <1,7,1,3>: Cost 3 vrev <7,1,3,1>
+  2593131830U,  // <1,7,1,4>: Cost 3 vext1 <7,1,7,1>, RHS
+  2626667646U,  // <1,7,1,5>: Cost 3 vext2 <1,5,1,7>, <1,5,1,7>
+  2627331279U,  // <1,7,1,6>: Cost 3 vext2 <1,6,1,7>, <1,6,1,7>
+  2593133696U,  // <1,7,1,7>: Cost 3 vext1 <7,1,7,1>, <7,1,7,1>
+  2628658545U,  // <1,7,1,u>: Cost 3 vext2 <1,u,1,7>, <1,u,1,7>
+  2587164774U,  // <1,7,2,0>: Cost 3 vext1 <6,1,7,2>, LHS
+  3701073445U,  // <1,7,2,1>: Cost 4 vext2 <1,6,1,7>, <2,1,3,7>
+  3700409960U,  // <1,7,2,2>: Cost 4 vext2 <1,5,1,7>, <2,2,2,2>
+  2638612134U,  // <1,7,2,3>: Cost 3 vext2 <3,5,1,7>, <2,3,0,1>
+  2587168054U,  // <1,7,2,4>: Cost 3 vext1 <6,1,7,2>, RHS
+  3706382167U,  // <1,7,2,5>: Cost 4 vext2 <2,5,1,7>, <2,5,1,7>
+  2587169192U,  // <1,7,2,6>: Cost 3 vext1 <6,1,7,2>, <6,1,7,2>
+  3660911610U,  // <1,7,2,7>: Cost 4 vext1 <6,1,7,2>, <7,0,1,2>
+  2587170606U,  // <1,7,2,u>: Cost 3 vext1 <6,1,7,2>, LHS
+  1507459174U,  // <1,7,3,0>: Cost 2 vext1 <5,1,7,3>, LHS
+  2569257984U,  // <1,7,3,1>: Cost 3 vext1 <3,1,7,3>, <1,3,5,7>
+  2581202536U,  // <1,7,3,2>: Cost 3 vext1 <5,1,7,3>, <2,2,2,2>
+  2569259294U,  // <1,7,3,3>: Cost 3 vext1 <3,1,7,3>, <3,1,7,3>
+  1507462454U,  // <1,7,3,4>: Cost 2 vext1 <5,1,7,3>, RHS
+  1507462864U,  // <1,7,3,5>: Cost 2 vext1 <5,1,7,3>, <5,1,7,3>
+  2581205498U,  // <1,7,3,6>: Cost 3 vext1 <5,1,7,3>, <6,2,7,3>
+  2581206010U,  // <1,7,3,7>: Cost 3 vext1 <5,1,7,3>, <7,0,1,2>
+  1507465006U,  // <1,7,3,u>: Cost 2 vext1 <5,1,7,3>, LHS
+  2728826164U,  // <1,7,4,0>: Cost 3 vext3 <7,4,0,1>, <7,4,0,1>
+  3654951732U,  // <1,7,4,1>: Cost 4 vext1 <5,1,7,4>, <1,1,1,1>
+  3330987094U,  // <1,7,4,2>: Cost 4 vrev <7,1,2,4>
+  3331060831U,  // <1,7,4,3>: Cost 4 vrev <7,1,3,4>
+  3787674971U,  // <1,7,4,4>: Cost 4 vext3 <4,u,5,1>, <7,4,4,4>
+  2626669878U,  // <1,7,4,5>: Cost 3 vext2 <1,5,1,7>, RHS
+  3785979241U,  // <1,7,4,6>: Cost 4 vext3 <4,6,0,1>, <7,4,6,0>
+  3787085176U,  // <1,7,4,7>: Cost 4 vext3 <4,7,6,1>, <7,4,7,6>
+  2626670121U,  // <1,7,4,u>: Cost 3 vext2 <1,5,1,7>, RHS
+  2569273446U,  // <1,7,5,0>: Cost 3 vext1 <3,1,7,5>, LHS
+  2569274368U,  // <1,7,5,1>: Cost 3 vext1 <3,1,7,5>, <1,3,5,7>
+  3643016808U,  // <1,7,5,2>: Cost 4 vext1 <3,1,7,5>, <2,2,2,2>
+  2569275680U,  // <1,7,5,3>: Cost 3 vext1 <3,1,7,5>, <3,1,7,5>
+  2569276726U,  // <1,7,5,4>: Cost 3 vext1 <3,1,7,5>, RHS
+  4102034790U,  // <1,7,5,5>: Cost 4 vtrnl <1,3,5,7>, <7,4,5,6>
+  2651222067U,  // <1,7,5,6>: Cost 3 vext2 <5,6,1,7>, <5,6,1,7>
+  3899378998U,  // <1,7,5,7>: Cost 4 vuzpr <1,1,5,7>, RHS
+  2569279278U,  // <1,7,5,u>: Cost 3 vext1 <3,1,7,5>, LHS
+  2730153430U,  // <1,7,6,0>: Cost 3 vext3 <7,6,0,1>, <7,6,0,1>
+  2724845022U,  // <1,7,6,1>: Cost 3 vext3 <6,7,0,1>, <7,6,1,0>
+  3643025338U,  // <1,7,6,2>: Cost 4 vext1 <3,1,7,6>, <2,6,3,7>
+  3643025697U,  // <1,7,6,3>: Cost 4 vext1 <3,1,7,6>, <3,1,7,6>
+  3643026742U,  // <1,7,6,4>: Cost 4 vext1 <3,1,7,6>, RHS
+  3654971091U,  // <1,7,6,5>: Cost 4 vext1 <5,1,7,6>, <5,1,7,6>
+  3787675153U,  // <1,7,6,6>: Cost 4 vext3 <4,u,5,1>, <7,6,6,6>
+  2724845076U,  // <1,7,6,7>: Cost 3 vext3 <6,7,0,1>, <7,6,7,0>
+  2725508637U,  // <1,7,6,u>: Cost 3 vext3 <6,u,0,1>, <7,6,u,0>
+  2730817063U,  // <1,7,7,0>: Cost 3 vext3 <7,7,0,1>, <7,7,0,1>
+  3631088436U,  // <1,7,7,1>: Cost 4 vext1 <1,1,7,7>, <1,1,1,1>
+  3660949158U,  // <1,7,7,2>: Cost 4 vext1 <6,1,7,7>, <2,3,0,1>
+  3801904705U,  // <1,7,7,3>: Cost 4 vext3 <7,3,0,1>, <7,7,3,0>
+  3631090998U,  // <1,7,7,4>: Cost 4 vext1 <1,1,7,7>, RHS
+  2662503828U,  // <1,7,7,5>: Cost 3 vext2 <7,5,1,7>, <7,5,1,7>
+  3660951981U,  // <1,7,7,6>: Cost 4 vext1 <6,1,7,7>, <6,1,7,7>
+  2713933420U,  // <1,7,7,7>: Cost 3 vext3 <4,u,5,1>, <7,7,7,7>
+  2731406959U,  // <1,7,7,u>: Cost 3 vext3 <7,7,u,1>, <7,7,u,1>
+  1507500134U,  // <1,7,u,0>: Cost 2 vext1 <5,1,7,u>, LHS
+  2626672430U,  // <1,7,u,1>: Cost 3 vext2 <1,5,1,7>, LHS
+  2581243496U,  // <1,7,u,2>: Cost 3 vext1 <5,1,7,u>, <2,2,2,2>
+  2569300259U,  // <1,7,u,3>: Cost 3 vext1 <3,1,7,u>, <3,1,7,u>
+  1507503414U,  // <1,7,u,4>: Cost 2 vext1 <5,1,7,u>, RHS
+  1507503829U,  // <1,7,u,5>: Cost 2 vext1 <5,1,7,u>, <5,1,7,u>
+  2581246458U,  // <1,7,u,6>: Cost 3 vext1 <5,1,7,u>, <6,2,7,3>
+  2581246970U,  // <1,7,u,7>: Cost 3 vext1 <5,1,7,u>, <7,0,1,2>
+  1507505966U,  // <1,7,u,u>: Cost 2 vext1 <5,1,7,u>, LHS
+  1543643153U,  // <1,u,0,0>: Cost 2 vext2 <0,0,1,u>, <0,0,1,u>
+  1546297446U,  // <1,u,0,1>: Cost 2 vext2 <0,4,1,u>, LHS
+  2819448852U,  // <1,u,0,2>: Cost 3 vuzpr LHS, <0,0,2,2>
+  2619375876U,  // <1,u,0,3>: Cost 3 vext2 <0,3,1,u>, <0,3,1,u>
+  1546297685U,  // <1,u,0,4>: Cost 2 vext2 <0,4,1,u>, <0,4,1,u>
+  1658771190U,  // <1,u,0,5>: Cost 2 vext3 <u,0,5,1>, <u,0,5,1>
+  2736789248U,  // <1,u,0,6>: Cost 3 vext3 <u,7,0,1>, <u,0,6,2>
+  2659189376U,  // <1,u,0,7>: Cost 3 vext2 <7,0,1,u>, <0,7,u,1>
+  1546298013U,  // <1,u,0,u>: Cost 2 vext2 <0,4,1,u>, LHS
+  1483112550U,  // <1,u,1,0>: Cost 2 vext1 <1,1,1,1>, LHS
+   202162278U,  // <1,u,1,1>: Cost 1 vdup1 LHS
+  1616009006U,  // <1,u,1,2>: Cost 2 vext3 <0,u,1,1>, LHS
+  1745707110U,  // <1,u,1,3>: Cost 2 vuzpr LHS, LHS
+  1483115830U,  // <1,u,1,4>: Cost 2 vext1 <1,1,1,1>, RHS
+  2620040336U,  // <1,u,1,5>: Cost 3 vext2 <0,4,1,u>, <1,5,3,7>
+  3026622618U,  // <1,u,1,6>: Cost 3 vtrnl <1,1,1,1>, RHS
+  2958183752U,  // <1,u,1,7>: Cost 3 vzipr <0,u,1,1>, RHS
+   202162278U,  // <1,u,1,u>: Cost 1 vdup1 LHS
+  2819449750U,  // <1,u,2,0>: Cost 3 vuzpr LHS, <1,2,3,0>
+  2893207342U,  // <1,u,2,1>: Cost 3 vzipl <1,2,3,0>, LHS
+  2819448996U,  // <1,u,2,2>: Cost 3 vuzpr LHS, <0,2,0,2>
+  2819450482U,  // <1,u,2,3>: Cost 3 vuzpr LHS, <2,2,3,3>
+  2819449754U,  // <1,u,2,4>: Cost 3 vuzpr LHS, <1,2,3,4>
+  2893207706U,  // <1,u,2,5>: Cost 3 vzipl <1,2,3,0>, RHS
+  2819449036U,  // <1,u,2,6>: Cost 3 vuzpr LHS, <0,2,4,6>
+  2970799432U,  // <1,u,2,7>: Cost 3 vzipr <3,0,1,2>, RHS
+  2819449002U,  // <1,u,2,u>: Cost 3 vuzpr LHS, <0,2,0,u>
+   403931292U,  // <1,u,3,0>: Cost 1 vext1 LHS, LHS
+  1477673718U,  // <1,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+   115726126U,  // <1,u,3,2>: Cost 1 vrev LHS
+  2014102173U,  // <1,u,3,3>: Cost 2 vtrnr LHS, LHS
+   403934518U,  // <1,u,3,4>: Cost 1 vext1 LHS, RHS
+  1507536601U,  // <1,u,3,5>: Cost 2 vext1 <5,1,u,3>, <5,1,u,3>
+  1525453306U,  // <1,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  2014105129U,  // <1,u,3,7>: Cost 2 vtrnr LHS, RHS
+   403937070U,  // <1,u,3,u>: Cost 1 vext1 LHS, LHS
+  2620042157U,  // <1,u,4,0>: Cost 3 vext2 <0,4,1,u>, <4,0,u,1>
+  2620042237U,  // <1,u,4,1>: Cost 3 vext2 <0,4,1,u>, <4,1,u,0>
+  2263217967U,  // <1,u,4,2>: Cost 3 vrev <u,1,2,4>
+  2569341224U,  // <1,u,4,3>: Cost 3 vext1 <3,1,u,4>, <3,1,u,4>
+  2569342262U,  // <1,u,4,4>: Cost 3 vext1 <3,1,u,4>, RHS
+  1546300726U,  // <1,u,4,5>: Cost 2 vext2 <0,4,1,u>, RHS
+  2819449180U,  // <1,u,4,6>: Cost 3 vuzpr LHS, <0,4,2,6>
+  2724845649U,  // <1,u,4,7>: Cost 3 vext3 <6,7,0,1>, <u,4,7,6>
+  1546300969U,  // <1,u,4,u>: Cost 2 vext2 <0,4,1,u>, RHS
+  2551431270U,  // <1,u,5,0>: Cost 3 vext1 <0,1,u,5>, LHS
+  2551432192U,  // <1,u,5,1>: Cost 3 vext1 <0,1,u,5>, <1,3,5,7>
+  3028293422U,  // <1,u,5,2>: Cost 3 vtrnl <1,3,5,7>, LHS
+  2955559068U,  // <1,u,5,3>: Cost 3 vzipr <0,4,1,5>, LHS
+  2551434550U,  // <1,u,5,4>: Cost 3 vext1 <0,1,u,5>, RHS
+  2895255706U,  // <1,u,5,5>: Cost 3 vzipl <1,5,3,7>, RHS
+  1616009370U,  // <1,u,5,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  1745710390U,  // <1,u,5,7>: Cost 2 vuzpr LHS, RHS
+  1745710391U,  // <1,u,5,u>: Cost 2 vuzpr LHS, RHS
+  2653221159U,  // <1,u,6,0>: Cost 3 vext2 <6,0,1,u>, <6,0,1,u>
+  2725509303U,  // <1,u,6,1>: Cost 3 vext3 <6,u,0,1>, <u,6,1,0>
+  2659193338U,  // <1,u,6,2>: Cost 3 vext2 <7,0,1,u>, <6,2,7,3>
+  2689751248U,  // <1,u,6,3>: Cost 3 vext3 <0,u,1,1>, <u,6,3,7>
+  2867228774U,  // <1,u,6,4>: Cost 3 vuzpr LHS, <5,6,7,4>
+  3764820194U,  // <1,u,6,5>: Cost 4 vext3 <1,1,1,1>, <u,6,5,7>
+  2657202957U,  // <1,u,6,6>: Cost 3 vext2 <6,6,1,u>, <6,6,1,u>
+  2819450810U,  // <1,u,6,7>: Cost 3 vuzpr LHS, <2,6,3,7>
+  2819450811U,  // <1,u,6,u>: Cost 3 vuzpr LHS, <2,6,3,u>
+  1585452032U,  // <1,u,7,0>: Cost 2 vext2 <7,0,1,u>, <7,0,1,u>
+  2557420340U,  // <1,u,7,1>: Cost 3 vext1 <1,1,u,7>, <1,1,1,1>
+  2569365158U,  // <1,u,7,2>: Cost 3 vext1 <3,1,u,7>, <2,3,0,1>
+  2569365803U,  // <1,u,7,3>: Cost 3 vext1 <3,1,u,7>, <3,1,u,7>
+  2557422902U,  // <1,u,7,4>: Cost 3 vext1 <1,1,u,7>, RHS
+  2662512021U,  // <1,u,7,5>: Cost 3 vext2 <7,5,1,u>, <7,5,1,u>
+  2724845884U,  // <1,u,7,6>: Cost 3 vext3 <6,7,0,1>, <u,7,6,7>
+  2659194476U,  // <1,u,7,7>: Cost 3 vext2 <7,0,1,u>, <7,7,7,7>
+  1590761096U,  // <1,u,7,u>: Cost 2 vext2 <7,u,1,u>, <7,u,1,u>
+   403972257U,  // <1,u,u,0>: Cost 1 vext1 LHS, LHS
+   202162278U,  // <1,u,u,1>: Cost 1 vdup1 LHS
+   115767091U,  // <1,u,u,2>: Cost 1 vrev LHS
+  1745707677U,  // <1,u,u,3>: Cost 2 vuzpr LHS, LHS
+   403975478U,  // <1,u,u,4>: Cost 1 vext1 LHS, RHS
+  1546303642U,  // <1,u,u,5>: Cost 2 vext2 <0,4,1,u>, RHS
+  1616009613U,  // <1,u,u,6>: Cost 2 vext3 <0,u,1,1>, RHS
+  1745710633U,  // <1,u,u,7>: Cost 2 vuzpr LHS, RHS
+   403978030U,  // <1,u,u,u>: Cost 1 vext1 LHS, LHS
+  2551463936U,  // <2,0,0,0>: Cost 3 vext1 <0,2,0,0>, <0,0,0,0>
+  2685698058U,  // <2,0,0,1>: Cost 3 vext3 <0,2,0,2>, <0,0,1,1>
+  1610776596U,  // <2,0,0,2>: Cost 2 vext3 <0,0,2,2>, <0,0,2,2>
+  2619384069U,  // <2,0,0,3>: Cost 3 vext2 <0,3,2,0>, <0,3,2,0>
+  2551467318U,  // <2,0,0,4>: Cost 3 vext1 <0,2,0,0>, RHS
+  3899836596U,  // <2,0,0,5>: Cost 4 vuzpr <1,2,3,0>, <3,0,4,5>
+  2621374968U,  // <2,0,0,6>: Cost 3 vext2 <0,6,2,0>, <0,6,2,0>
+  4168271334U,  // <2,0,0,7>: Cost 4 vtrnr <1,2,3,0>, <2,0,5,7>
+  1611219018U,  // <2,0,0,u>: Cost 2 vext3 <0,0,u,2>, <0,0,u,2>
+  2551472138U,  // <2,0,1,0>: Cost 3 vext1 <0,2,0,1>, <0,0,1,1>
+  2690564186U,  // <2,0,1,1>: Cost 3 vext3 <1,0,3,2>, <0,1,1,0>
+  1611956326U,  // <2,0,1,2>: Cost 2 vext3 <0,2,0,2>, LHS
+  2826092646U,  // <2,0,1,3>: Cost 3 vuzpr <1,2,3,0>, LHS
+  2551475510U,  // <2,0,1,4>: Cost 3 vext1 <0,2,0,1>, RHS
+  3692463248U,  // <2,0,1,5>: Cost 4 vext2 <0,2,2,0>, <1,5,3,7>
+  2587308473U,  // <2,0,1,6>: Cost 3 vext1 <6,2,0,1>, <6,2,0,1>
+  3661050874U,  // <2,0,1,7>: Cost 4 vext1 <6,2,0,1>, <7,0,1,2>
+  1611956380U,  // <2,0,1,u>: Cost 2 vext3 <0,2,0,2>, LHS
+  1477738598U,  // <2,0,2,0>: Cost 2 vext1 <0,2,0,2>, LHS
+  2551481078U,  // <2,0,2,1>: Cost 3 vext1 <0,2,0,2>, <1,0,3,2>
+  2551481796U,  // <2,0,2,2>: Cost 3 vext1 <0,2,0,2>, <2,0,2,0>
+  2551482518U,  // <2,0,2,3>: Cost 3 vext1 <0,2,0,2>, <3,0,1,2>
+  1477741878U,  // <2,0,2,4>: Cost 2 vext1 <0,2,0,2>, RHS
+  2551484112U,  // <2,0,2,5>: Cost 3 vext1 <0,2,0,2>, <5,1,7,3>
+  2551484759U,  // <2,0,2,6>: Cost 3 vext1 <0,2,0,2>, <6,0,7,2>
+  2551485434U,  // <2,0,2,7>: Cost 3 vext1 <0,2,0,2>, <7,0,1,2>
+  1477744430U,  // <2,0,2,u>: Cost 2 vext1 <0,2,0,2>, LHS
+  2953625600U,  // <2,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+  2953627302U,  // <2,0,3,1>: Cost 3 vzipr LHS, <2,3,0,1>
+  2953625764U,  // <2,0,3,2>: Cost 3 vzipr LHS, <0,2,0,2>
+  4027369695U,  // <2,0,3,3>: Cost 4 vzipr LHS, <3,1,0,3>
+  3625233718U,  // <2,0,3,4>: Cost 4 vext1 <0,2,0,3>, RHS
+  3899836110U,  // <2,0,3,5>: Cost 4 vuzpr <1,2,3,0>, <2,3,4,5>
+  4032012618U,  // <2,0,3,6>: Cost 4 vzipr LHS, <0,4,0,6>
+  3899835392U,  // <2,0,3,7>: Cost 4 vuzpr <1,2,3,0>, <1,3,5,7>
+  2953625770U,  // <2,0,3,u>: Cost 3 vzipr LHS, <0,2,0,u>
+  2551496806U,  // <2,0,4,0>: Cost 3 vext1 <0,2,0,4>, LHS
+  2685698386U,  // <2,0,4,1>: Cost 3 vext3 <0,2,0,2>, <0,4,1,5>
+  2685698396U,  // <2,0,4,2>: Cost 3 vext3 <0,2,0,2>, <0,4,2,6>
+  3625240726U,  // <2,0,4,3>: Cost 4 vext1 <0,2,0,4>, <3,0,1,2>
+  2551500086U,  // <2,0,4,4>: Cost 3 vext1 <0,2,0,4>, RHS
+  2618723638U,  // <2,0,4,5>: Cost 3 vext2 <0,2,2,0>, RHS
+  2765409590U,  // <2,0,4,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+  3799990664U,  // <2,0,4,7>: Cost 4 vext3 <7,0,1,2>, <0,4,7,5>
+  2685698450U,  // <2,0,4,u>: Cost 3 vext3 <0,2,0,2>, <0,4,u,6>
+  3625246822U,  // <2,0,5,0>: Cost 4 vext1 <0,2,0,5>, LHS
+  3289776304U,  // <2,0,5,1>: Cost 4 vrev <0,2,1,5>
+  2690564526U,  // <2,0,5,2>: Cost 3 vext3 <1,0,3,2>, <0,5,2,7>
+  3289923778U,  // <2,0,5,3>: Cost 4 vrev <0,2,3,5>
+  2216255691U,  // <2,0,5,4>: Cost 3 vrev <0,2,4,5>
+  3726307332U,  // <2,0,5,5>: Cost 4 vext2 <5,u,2,0>, <5,5,5,5>
+  3726307426U,  // <2,0,5,6>: Cost 4 vext2 <5,u,2,0>, <5,6,7,0>
+  2826095926U,  // <2,0,5,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+  2216550639U,  // <2,0,5,u>: Cost 3 vrev <0,2,u,5>
+  4162420736U,  // <2,0,6,0>: Cost 4 vtrnr <0,2,4,6>, <0,0,0,0>
+  2901885030U,  // <2,0,6,1>: Cost 3 vzipl <2,6,3,7>, LHS
+  2685698559U,  // <2,0,6,2>: Cost 3 vext3 <0,2,0,2>, <0,6,2,7>
+  3643173171U,  // <2,0,6,3>: Cost 4 vext1 <3,2,0,6>, <3,2,0,6>
+  2216263884U,  // <2,0,6,4>: Cost 3 vrev <0,2,4,6>
+  3730289341U,  // <2,0,6,5>: Cost 4 vext2 <6,5,2,0>, <6,5,2,0>
+  3726308152U,  // <2,0,6,6>: Cost 4 vext2 <5,u,2,0>, <6,6,6,6>
+  3899836346U,  // <2,0,6,7>: Cost 4 vuzpr <1,2,3,0>, <2,6,3,7>
+  2216558832U,  // <2,0,6,u>: Cost 3 vrev <0,2,u,6>
+  2659202049U,  // <2,0,7,0>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+  3726308437U,  // <2,0,7,1>: Cost 4 vext2 <5,u,2,0>, <7,1,2,3>
+  2726249034U,  // <2,0,7,2>: Cost 3 vext3 <7,0,1,2>, <0,7,2,1>
+  3734934772U,  // <2,0,7,3>: Cost 4 vext2 <7,3,2,0>, <7,3,2,0>
+  3726308710U,  // <2,0,7,4>: Cost 4 vext2 <5,u,2,0>, <7,4,5,6>
+  3726308814U,  // <2,0,7,5>: Cost 4 vext2 <5,u,2,0>, <7,5,u,2>
+  3736925671U,  // <2,0,7,6>: Cost 4 vext2 <7,6,2,0>, <7,6,2,0>
+  3726308972U,  // <2,0,7,7>: Cost 4 vext2 <5,u,2,0>, <7,7,7,7>
+  2659202049U,  // <2,0,7,u>: Cost 3 vext2 <7,0,2,0>, <7,0,2,0>
+  1477787750U,  // <2,0,u,0>: Cost 2 vext1 <0,2,0,u>, LHS
+  2953668262U,  // <2,0,u,1>: Cost 3 vzipr LHS, <2,3,0,1>
+  1611956893U,  // <2,0,u,2>: Cost 2 vext3 <0,2,0,2>, LHS
+  2551531670U,  // <2,0,u,3>: Cost 3 vext1 <0,2,0,u>, <3,0,1,2>
+  1477791030U,  // <2,0,u,4>: Cost 2 vext1 <0,2,0,u>, RHS
+  2618726554U,  // <2,0,u,5>: Cost 3 vext2 <0,2,2,0>, RHS
+  2765412506U,  // <2,0,u,6>: Cost 3 vuzpl <2,3,0,1>, RHS
+  2826096169U,  // <2,0,u,7>: Cost 3 vuzpr <1,2,3,0>, RHS
+  1611956947U,  // <2,0,u,u>: Cost 2 vext3 <0,2,0,2>, LHS
+  2569453670U,  // <2,1,0,0>: Cost 3 vext1 <3,2,1,0>, LHS
+  2619392102U,  // <2,1,0,1>: Cost 3 vext2 <0,3,2,1>, LHS
+  3759440619U,  // <2,1,0,2>: Cost 4 vext3 <0,2,0,2>, <1,0,2,0>
+  1616823030U,  // <2,1,0,3>: Cost 2 vext3 <1,0,3,2>, <1,0,3,2>
+  2569456950U,  // <2,1,0,4>: Cost 3 vext1 <3,2,1,0>, RHS
+  2690712328U,  // <2,1,0,5>: Cost 3 vext3 <1,0,5,2>, <1,0,5,2>
+  3661115841U,  // <2,1,0,6>: Cost 4 vext1 <6,2,1,0>, <6,2,1,0>
+  2622046794U,  // <2,1,0,7>: Cost 3 vext2 <0,7,2,1>, <0,7,2,1>
+  1617191715U,  // <2,1,0,u>: Cost 2 vext3 <1,0,u,2>, <1,0,u,2>
+  2551545958U,  // <2,1,1,0>: Cost 3 vext1 <0,2,1,1>, LHS
+  2685698868U,  // <2,1,1,1>: Cost 3 vext3 <0,2,0,2>, <1,1,1,1>
+  2628682646U,  // <2,1,1,2>: Cost 3 vext2 <1,u,2,1>, <1,2,3,0>
+  2685698888U,  // <2,1,1,3>: Cost 3 vext3 <0,2,0,2>, <1,1,3,3>
+  2551549238U,  // <2,1,1,4>: Cost 3 vext1 <0,2,1,1>, RHS
+  3693134992U,  // <2,1,1,5>: Cost 4 vext2 <0,3,2,1>, <1,5,3,7>
+  3661124034U,  // <2,1,1,6>: Cost 4 vext1 <6,2,1,1>, <6,2,1,1>
+  3625292794U,  // <2,1,1,7>: Cost 4 vext1 <0,2,1,1>, <7,0,1,2>
+  2685698933U,  // <2,1,1,u>: Cost 3 vext3 <0,2,0,2>, <1,1,u,3>
+  2551554150U,  // <2,1,2,0>: Cost 3 vext1 <0,2,1,2>, LHS
+  3893649571U,  // <2,1,2,1>: Cost 4 vuzpr <0,2,0,1>, <0,2,0,1>
+  2551555688U,  // <2,1,2,2>: Cost 3 vext1 <0,2,1,2>, <2,2,2,2>
+  2685698966U,  // <2,1,2,3>: Cost 3 vext3 <0,2,0,2>, <1,2,3,0>
+  2551557430U,  // <2,1,2,4>: Cost 3 vext1 <0,2,1,2>, RHS
+  3763422123U,  // <2,1,2,5>: Cost 4 vext3 <0,u,0,2>, <1,2,5,3>
+  3693135802U,  // <2,1,2,6>: Cost 4 vext2 <0,3,2,1>, <2,6,3,7>
+  2726249402U,  // <2,1,2,7>: Cost 3 vext3 <7,0,1,2>, <1,2,7,0>
+  2685699011U,  // <2,1,2,u>: Cost 3 vext3 <0,2,0,2>, <1,2,u,0>
+  2551562342U,  // <2,1,3,0>: Cost 3 vext1 <0,2,1,3>, LHS
+  2953625610U,  // <2,1,3,1>: Cost 3 vzipr LHS, <0,0,1,1>
+  2953627798U,  // <2,1,3,2>: Cost 3 vzipr LHS, <3,0,1,2>
+  2953626584U,  // <2,1,3,3>: Cost 3 vzipr LHS, <1,3,1,3>
+  2551565622U,  // <2,1,3,4>: Cost 3 vext1 <0,2,1,3>, RHS
+  2953625938U,  // <2,1,3,5>: Cost 3 vzipr LHS, <0,4,1,5>
+  2587398596U,  // <2,1,3,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+  4032013519U,  // <2,1,3,7>: Cost 4 vzipr LHS, <1,6,1,7>
+  2953625617U,  // <2,1,3,u>: Cost 3 vzipr LHS, <0,0,1,u>
+  2690565154U,  // <2,1,4,0>: Cost 3 vext3 <1,0,3,2>, <1,4,0,5>
+  3625313270U,  // <2,1,4,1>: Cost 4 vext1 <0,2,1,4>, <1,3,4,6>
+  3771532340U,  // <2,1,4,2>: Cost 4 vext3 <2,2,2,2>, <1,4,2,5>
+  1148404634U,  // <2,1,4,3>: Cost 2 vrev <1,2,3,4>
+  3625315638U,  // <2,1,4,4>: Cost 4 vext1 <0,2,1,4>, RHS
+  2619395382U,  // <2,1,4,5>: Cost 3 vext2 <0,3,2,1>, RHS
+  3837242678U,  // <2,1,4,6>: Cost 4 vuzpl <2,0,1,2>, RHS
+  3799991394U,  // <2,1,4,7>: Cost 4 vext3 <7,0,1,2>, <1,4,7,6>
+  1148773319U,  // <2,1,4,u>: Cost 2 vrev <1,2,u,4>
+  2551578726U,  // <2,1,5,0>: Cost 3 vext1 <0,2,1,5>, LHS
+  2551579648U,  // <2,1,5,1>: Cost 3 vext1 <0,2,1,5>, <1,3,5,7>
+  3625321952U,  // <2,1,5,2>: Cost 4 vext1 <0,2,1,5>, <2,0,5,1>
+  2685699216U,  // <2,1,5,3>: Cost 3 vext3 <0,2,0,2>, <1,5,3,7>
+  2551582006U,  // <2,1,5,4>: Cost 3 vext1 <0,2,1,5>, RHS
+  3740913668U,  // <2,1,5,5>: Cost 4 vext2 <u,3,2,1>, <5,5,5,5>
+  3661156806U,  // <2,1,5,6>: Cost 4 vext1 <6,2,1,5>, <6,2,1,5>
+  3893652790U,  // <2,1,5,7>: Cost 4 vuzpr <0,2,0,1>, RHS
+  2685699261U,  // <2,1,5,u>: Cost 3 vext3 <0,2,0,2>, <1,5,u,7>
+  2551586918U,  // <2,1,6,0>: Cost 3 vext1 <0,2,1,6>, LHS
+  3625329398U,  // <2,1,6,1>: Cost 4 vext1 <0,2,1,6>, <1,0,3,2>
+  2551588794U,  // <2,1,6,2>: Cost 3 vext1 <0,2,1,6>, <2,6,3,7>
+  3088679014U,  // <2,1,6,3>: Cost 3 vtrnr <0,2,4,6>, LHS
+  2551590198U,  // <2,1,6,4>: Cost 3 vext1 <0,2,1,6>, RHS
+  4029382994U,  // <2,1,6,5>: Cost 4 vzipr <0,4,2,6>, <0,4,1,5>
+  3625333560U,  // <2,1,6,6>: Cost 4 vext1 <0,2,1,6>, <6,6,6,6>
+  3731624800U,  // <2,1,6,7>: Cost 4 vext2 <6,7,2,1>, <6,7,2,1>
+  2551592750U,  // <2,1,6,u>: Cost 3 vext1 <0,2,1,6>, LHS
+  2622051322U,  // <2,1,7,0>: Cost 3 vext2 <0,7,2,1>, <7,0,1,2>
+  3733615699U,  // <2,1,7,1>: Cost 4 vext2 <7,1,2,1>, <7,1,2,1>
+  3795125538U,  // <2,1,7,2>: Cost 4 vext3 <6,1,7,2>, <1,7,2,0>
+  2222171037U,  // <2,1,7,3>: Cost 3 vrev <1,2,3,7>
+  3740915046U,  // <2,1,7,4>: Cost 4 vext2 <u,3,2,1>, <7,4,5,6>
+  3296060335U,  // <2,1,7,5>: Cost 4 vrev <1,2,5,7>
+  3736933864U,  // <2,1,7,6>: Cost 4 vext2 <7,6,2,1>, <7,6,2,1>
+  3805300055U,  // <2,1,7,7>: Cost 4 vext3 <7,u,1,2>, <1,7,7,u>
+  2669827714U,  // <2,1,7,u>: Cost 3 vext2 <u,7,2,1>, <7,u,1,2>
+  2551603302U,  // <2,1,u,0>: Cost 3 vext1 <0,2,1,u>, LHS
+  2953666570U,  // <2,1,u,1>: Cost 3 vzipr LHS, <0,0,1,1>
+  2953668758U,  // <2,1,u,2>: Cost 3 vzipr LHS, <3,0,1,2>
+  1148437406U,  // <2,1,u,3>: Cost 2 vrev <1,2,3,u>
+  2551606582U,  // <2,1,u,4>: Cost 3 vext1 <0,2,1,u>, RHS
+  2953666898U,  // <2,1,u,5>: Cost 3 vzipr LHS, <0,4,1,5>
+  2587398596U,  // <2,1,u,6>: Cost 3 vext1 <6,2,1,3>, <6,2,1,3>
+  2669828370U,  // <2,1,u,7>: Cost 3 vext2 <u,7,2,1>, <u,7,2,1>
+  1148806091U,  // <2,1,u,u>: Cost 2 vrev <1,2,u,u>
+  1543667732U,  // <2,2,0,0>: Cost 2 vext2 <0,0,2,2>, <0,0,2,2>
+  1548976230U,  // <2,2,0,1>: Cost 2 vext2 <0,u,2,2>, LHS
+  2685699524U,  // <2,2,0,2>: Cost 3 vext3 <0,2,0,2>, <2,0,2,0>
+  2685699535U,  // <2,2,0,3>: Cost 3 vext3 <0,2,0,2>, <2,0,3,2>
+  2551614774U,  // <2,2,0,4>: Cost 3 vext1 <0,2,2,0>, RHS
+  3704422830U,  // <2,2,0,5>: Cost 4 vext2 <2,2,2,2>, <0,5,2,7>
+  3893657642U,  // <2,2,0,6>: Cost 4 vuzpr <0,2,0,2>, <0,0,4,6>
+  3770574323U,  // <2,2,0,7>: Cost 4 vext3 <2,0,7,2>, <2,0,7,2>
+  1548976796U,  // <2,2,0,u>: Cost 2 vext2 <0,u,2,2>, <0,u,2,2>
+  2622718710U,  // <2,2,1,0>: Cost 3 vext2 <0,u,2,2>, <1,0,3,2>
+  2622718772U,  // <2,2,1,1>: Cost 3 vext2 <0,u,2,2>, <1,1,1,1>
+  2622718870U,  // <2,2,1,2>: Cost 3 vext2 <0,u,2,2>, <1,2,3,0>
+  2819915878U,  // <2,2,1,3>: Cost 3 vuzpr <0,2,0,2>, LHS
+  3625364790U,  // <2,2,1,4>: Cost 4 vext1 <0,2,2,1>, RHS
+  2622719120U,  // <2,2,1,5>: Cost 3 vext2 <0,u,2,2>, <1,5,3,7>
+  3760031292U,  // <2,2,1,6>: Cost 4 vext3 <0,2,u,2>, <2,1,6,3>
+  3667170468U,  // <2,2,1,7>: Cost 4 vext1 <7,2,2,1>, <7,2,2,1>
+  2819915883U,  // <2,2,1,u>: Cost 3 vuzpr <0,2,0,2>, LHS
+  1489829990U,  // <2,2,2,0>: Cost 2 vext1 <2,2,2,2>, LHS
+  2563572470U,  // <2,2,2,1>: Cost 3 vext1 <2,2,2,2>, <1,0,3,2>
+   269271142U,  // <2,2,2,2>: Cost 1 vdup2 LHS
+  2685699698U,  // <2,2,2,3>: Cost 3 vext3 <0,2,0,2>, <2,2,3,3>
+  1489833270U,  // <2,2,2,4>: Cost 2 vext1 <2,2,2,2>, RHS
+  2685699720U,  // <2,2,2,5>: Cost 3 vext3 <0,2,0,2>, <2,2,5,7>
+  2622719930U,  // <2,2,2,6>: Cost 3 vext2 <0,u,2,2>, <2,6,3,7>
+  2593436837U,  // <2,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+   269271142U,  // <2,2,2,u>: Cost 1 vdup2 LHS
+  2685699750U,  // <2,2,3,0>: Cost 3 vext3 <0,2,0,2>, <2,3,0,1>
+  2690565806U,  // <2,2,3,1>: Cost 3 vext3 <1,0,3,2>, <2,3,1,0>
+  2953627240U,  // <2,2,3,2>: Cost 3 vzipr LHS, <2,2,2,2>
+  1879883878U,  // <2,2,3,3>: Cost 2 vzipr LHS, LHS
+  2685699790U,  // <2,2,3,4>: Cost 3 vext3 <0,2,0,2>, <2,3,4,5>
+  3893659342U,  // <2,2,3,5>: Cost 4 vuzpr <0,2,0,2>, <2,3,4,5>
+  2958270812U,  // <2,2,3,6>: Cost 3 vzipr LHS, <0,4,2,6>
+  2593445030U,  // <2,2,3,7>: Cost 3 vext1 <7,2,2,3>, <7,2,2,3>
+  1879883883U,  // <2,2,3,u>: Cost 2 vzipr LHS, LHS
+  2551644262U,  // <2,2,4,0>: Cost 3 vext1 <0,2,2,4>, LHS
+  3625386742U,  // <2,2,4,1>: Cost 4 vext1 <0,2,2,4>, <1,0,3,2>
+  2551645902U,  // <2,2,4,2>: Cost 3 vext1 <0,2,2,4>, <2,3,4,5>
+  3759441686U,  // <2,2,4,3>: Cost 4 vext3 <0,2,0,2>, <2,4,3,5>
+  2551647542U,  // <2,2,4,4>: Cost 3 vext1 <0,2,2,4>, RHS
+  1548979510U,  // <2,2,4,5>: Cost 2 vext2 <0,u,2,2>, RHS
+  2764901686U,  // <2,2,4,6>: Cost 3 vuzpl <2,2,2,2>, RHS
+  3667195047U,  // <2,2,4,7>: Cost 4 vext1 <7,2,2,4>, <7,2,2,4>
+  1548979753U,  // <2,2,4,u>: Cost 2 vext2 <0,u,2,2>, RHS
+  3696463432U,  // <2,2,5,0>: Cost 4 vext2 <0,u,2,2>, <5,0,1,2>
+  2617413328U,  // <2,2,5,1>: Cost 3 vext2 <0,0,2,2>, <5,1,7,3>
+  2685699936U,  // <2,2,5,2>: Cost 3 vext3 <0,2,0,2>, <2,5,2,7>
+  4027383910U,  // <2,2,5,3>: Cost 4 vzipr <0,1,2,5>, LHS
+  2228201085U,  // <2,2,5,4>: Cost 3 vrev <2,2,4,5>
+  2617413636U,  // <2,2,5,5>: Cost 3 vext2 <0,0,2,2>, <5,5,5,5>
+  2617413730U,  // <2,2,5,6>: Cost 3 vext2 <0,0,2,2>, <5,6,7,0>
+  2819919158U,  // <2,2,5,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+  2819919159U,  // <2,2,5,u>: Cost 3 vuzpr <0,2,0,2>, RHS
+  3625402554U,  // <2,2,6,0>: Cost 4 vext1 <0,2,2,6>, <0,2,2,6>
+  3760031652U,  // <2,2,6,1>: Cost 4 vext3 <0,2,u,2>, <2,6,1,3>
+  2617414138U,  // <2,2,6,2>: Cost 3 vext2 <0,0,2,2>, <6,2,7,3>
+  2685700026U,  // <2,2,6,3>: Cost 3 vext3 <0,2,0,2>, <2,6,3,7>
+  3625405750U,  // <2,2,6,4>: Cost 4 vext1 <0,2,2,6>, RHS
+  3760031692U,  // <2,2,6,5>: Cost 4 vext3 <0,2,u,2>, <2,6,5,7>
+  3088679116U,  // <2,2,6,6>: Cost 3 vtrnr <0,2,4,6>, <0,2,4,6>
+  2657891169U,  // <2,2,6,7>: Cost 3 vext2 <6,7,2,2>, <6,7,2,2>
+  2685700071U,  // <2,2,6,u>: Cost 3 vext3 <0,2,0,2>, <2,6,u,7>
+  2726250474U,  // <2,2,7,0>: Cost 3 vext3 <7,0,1,2>, <2,7,0,1>
+  3704427616U,  // <2,2,7,1>: Cost 4 vext2 <2,2,2,2>, <7,1,3,5>
+  2660545701U,  // <2,2,7,2>: Cost 3 vext2 <7,2,2,2>, <7,2,2,2>
+  4030718054U,  // <2,2,7,3>: Cost 4 vzipr <0,6,2,7>, LHS
+  2617415014U,  // <2,2,7,4>: Cost 3 vext2 <0,0,2,2>, <7,4,5,6>
+  3302033032U,  // <2,2,7,5>: Cost 4 vrev <2,2,5,7>
+  3661246929U,  // <2,2,7,6>: Cost 4 vext1 <6,2,2,7>, <6,2,2,7>
+  2617415276U,  // <2,2,7,7>: Cost 3 vext2 <0,0,2,2>, <7,7,7,7>
+  2731558962U,  // <2,2,7,u>: Cost 3 vext3 <7,u,1,2>, <2,7,u,1>
+  1489829990U,  // <2,2,u,0>: Cost 2 vext1 <2,2,2,2>, LHS
+  1548982062U,  // <2,2,u,1>: Cost 2 vext2 <0,u,2,2>, LHS
+   269271142U,  // <2,2,u,2>: Cost 1 vdup2 LHS
+  1879924838U,  // <2,2,u,3>: Cost 2 vzipr LHS, LHS
+  1489833270U,  // <2,2,u,4>: Cost 2 vext1 <2,2,2,2>, RHS
+  1548982426U,  // <2,2,u,5>: Cost 2 vext2 <0,u,2,2>, RHS
+  2953666908U,  // <2,2,u,6>: Cost 3 vzipr LHS, <0,4,2,6>
+  2819919401U,  // <2,2,u,7>: Cost 3 vuzpr <0,2,0,2>, RHS
+   269271142U,  // <2,2,u,u>: Cost 1 vdup2 LHS
+  1544339456U,  // <2,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+   470597734U,  // <2,3,0,1>: Cost 1 vext2 LHS, LHS
+  1548984484U,  // <2,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  2619408648U,  // <2,3,0,3>: Cost 3 vext2 <0,3,2,3>, <0,3,2,3>
+  1548984658U,  // <2,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  2665857454U,  // <2,3,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+  2622726655U,  // <2,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+  2593494188U,  // <2,3,0,7>: Cost 3 vext1 <7,2,3,0>, <7,2,3,0>
+   470598301U,  // <2,3,0,u>: Cost 1 vext2 LHS, LHS
+  1544340214U,  // <2,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  1544340276U,  // <2,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+  1544340374U,  // <2,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+  1548985304U,  // <2,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  2551696694U,  // <2,3,1,4>: Cost 3 vext1 <0,2,3,1>, RHS
+  1548985488U,  // <2,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  2622727375U,  // <2,3,1,6>: Cost 3 vext2 LHS, <1,6,1,7>
+  2665858347U,  // <2,3,1,7>: Cost 3 vext2 LHS, <1,7,3,0>
+  1548985709U,  // <2,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+  2622727613U,  // <2,3,2,0>: Cost 3 vext2 LHS, <2,0,1,2>
+  2622727711U,  // <2,3,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+  1544341096U,  // <2,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+  1544341158U,  // <2,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+  2622727958U,  // <2,3,2,4>: Cost 3 vext2 LHS, <2,4,3,5>
+  2622728032U,  // <2,3,2,5>: Cost 3 vext2 LHS, <2,5,2,7>
+  1548986298U,  // <2,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  2665859050U,  // <2,3,2,7>: Cost 3 vext2 LHS, <2,7,0,1>
+  1548986427U,  // <2,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+  1548986518U,  // <2,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+  2622728415U,  // <2,3,3,1>: Cost 3 vext2 LHS, <3,1,0,3>
+  1489913458U,  // <2,3,3,2>: Cost 2 vext1 <2,2,3,3>, <2,2,3,3>
+  1544341916U,  // <2,3,3,3>: Cost 2 vext2 LHS, <3,3,3,3>
+  1548986882U,  // <2,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+  2665859632U,  // <2,3,3,5>: Cost 3 vext2 LHS, <3,5,1,7>
+  2234304870U,  // <2,3,3,6>: Cost 3 vrev <3,2,6,3>
+  2958271632U,  // <2,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+  1548987166U,  // <2,3,3,u>: Cost 2 vext2 LHS, <3,u,1,2>
+  1483948134U,  // <2,3,4,0>: Cost 2 vext1 <1,2,3,4>, LHS
+  1483948954U,  // <2,3,4,1>: Cost 2 vext1 <1,2,3,4>, <1,2,3,4>
+  2622729276U,  // <2,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+  2557692054U,  // <2,3,4,3>: Cost 3 vext1 <1,2,3,4>, <3,0,1,2>
+  1483951414U,  // <2,3,4,4>: Cost 2 vext1 <1,2,3,4>, RHS
+   470601014U,  // <2,3,4,5>: Cost 1 vext2 LHS, RHS
+  1592118644U,  // <2,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+  2593526960U,  // <2,3,4,7>: Cost 3 vext1 <7,2,3,4>, <7,2,3,4>
+   470601257U,  // <2,3,4,u>: Cost 1 vext2 LHS, RHS
+  2551726182U,  // <2,3,5,0>: Cost 3 vext1 <0,2,3,5>, LHS
+  1592118992U,  // <2,3,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+  2665860862U,  // <2,3,5,2>: Cost 3 vext2 LHS, <5,2,3,4>
+  2551728642U,  // <2,3,5,3>: Cost 3 vext1 <0,2,3,5>, <3,4,5,6>
+  1592119238U,  // <2,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+  1592119300U,  // <2,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+  1592119394U,  // <2,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+  1592119464U,  // <2,3,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+  1592119545U,  // <2,3,5,u>: Cost 2 vext2 LHS, <5,u,5,7>
+  2622730529U,  // <2,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+  2557707164U,  // <2,3,6,1>: Cost 3 vext1 <1,2,3,6>, <1,2,3,6>
+  1592119802U,  // <2,3,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+  2665861682U,  // <2,3,6,3>: Cost 3 vext2 LHS, <6,3,4,5>
+  2622730893U,  // <2,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+  2665861810U,  // <2,3,6,5>: Cost 3 vext2 LHS, <6,5,0,7>
+  1592120120U,  // <2,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+  1592120142U,  // <2,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+  1592120223U,  // <2,3,6,u>: Cost 2 vext2 LHS, <6,u,0,1>
+  1592120314U,  // <2,3,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+  2659890261U,  // <2,3,7,1>: Cost 3 vext2 <7,1,2,3>, <7,1,2,3>
+  2660553894U,  // <2,3,7,2>: Cost 3 vext2 <7,2,2,3>, <7,2,2,3>
+  2665862371U,  // <2,3,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+  1592120678U,  // <2,3,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+  2665862534U,  // <2,3,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+  2665862614U,  // <2,3,7,6>: Cost 3 vext2 LHS, <7,6,0,1>
+  1592120940U,  // <2,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+  1592120962U,  // <2,3,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+  1548990163U,  // <2,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+   470603566U,  // <2,3,u,1>: Cost 1 vext2 LHS, LHS
+  1548990341U,  // <2,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+  1548990396U,  // <2,3,u,3>: Cost 2 vext2 LHS, <u,3,0,1>
+  1548990527U,  // <2,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+   470603930U,  // <2,3,u,5>: Cost 1 vext2 LHS, RHS
+  1548990672U,  // <2,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+  1592121600U,  // <2,3,u,7>: Cost 2 vext2 LHS, <u,7,0,1>
+   470604133U,  // <2,3,u,u>: Cost 1 vext2 LHS, LHS
+  2617425942U,  // <2,4,0,0>: Cost 3 vext2 <0,0,2,4>, <0,0,2,4>
+  2618753126U,  // <2,4,0,1>: Cost 3 vext2 <0,2,2,4>, LHS
+  2618753208U,  // <2,4,0,2>: Cost 3 vext2 <0,2,2,4>, <0,2,2,4>
+  2619416841U,  // <2,4,0,3>: Cost 3 vext2 <0,3,2,4>, <0,3,2,4>
+  2587593628U,  // <2,4,0,4>: Cost 3 vext1 <6,2,4,0>, <4,0,6,2>
+  2712832914U,  // <2,4,0,5>: Cost 3 vext3 <4,6,u,2>, <4,0,5,1>
+  1634962332U,  // <2,4,0,6>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+  3799993252U,  // <2,4,0,7>: Cost 4 vext3 <7,0,1,2>, <4,0,7,1>
+  1634962332U,  // <2,4,0,u>: Cost 2 vext3 <4,0,6,2>, <4,0,6,2>
+  2619417334U,  // <2,4,1,0>: Cost 3 vext2 <0,3,2,4>, <1,0,3,2>
+  3692495668U,  // <2,4,1,1>: Cost 4 vext2 <0,2,2,4>, <1,1,1,1>
+  2625389466U,  // <2,4,1,2>: Cost 3 vext2 <1,3,2,4>, <1,2,3,4>
+  2826125414U,  // <2,4,1,3>: Cost 3 vuzpr <1,2,3,4>, LHS
+  3699794995U,  // <2,4,1,4>: Cost 4 vext2 <1,4,2,4>, <1,4,2,4>
+  3692496016U,  // <2,4,1,5>: Cost 4 vext2 <0,2,2,4>, <1,5,3,7>
+  3763424238U,  // <2,4,1,6>: Cost 4 vext3 <0,u,0,2>, <4,1,6,3>
+  3667317942U,  // <2,4,1,7>: Cost 4 vext1 <7,2,4,1>, <7,2,4,1>
+  2826125419U,  // <2,4,1,u>: Cost 3 vuzpr <1,2,3,4>, LHS
+  2629371336U,  // <2,4,2,0>: Cost 3 vext2 <2,0,2,4>, <2,0,2,4>
+  3699131946U,  // <2,4,2,1>: Cost 4 vext2 <1,3,2,4>, <2,1,4,3>
+  2630698602U,  // <2,4,2,2>: Cost 3 vext2 <2,2,2,4>, <2,2,2,4>
+  2618754766U,  // <2,4,2,3>: Cost 3 vext2 <0,2,2,4>, <2,3,4,5>
+  2826126234U,  // <2,4,2,4>: Cost 3 vuzpr <1,2,3,4>, <1,2,3,4>
+  2899119414U,  // <2,4,2,5>: Cost 3 vzipl <2,2,2,2>, RHS
+  3033337142U,  // <2,4,2,6>: Cost 3 vtrnl <2,2,2,2>, RHS
+  3800214597U,  // <2,4,2,7>: Cost 4 vext3 <7,0,4,2>, <4,2,7,0>
+  2899119657U,  // <2,4,2,u>: Cost 3 vzipl <2,2,2,2>, RHS
+  2635344033U,  // <2,4,3,0>: Cost 3 vext2 <3,0,2,4>, <3,0,2,4>
+  4032012325U,  // <2,4,3,1>: Cost 4 vzipr LHS, <0,0,4,1>
+  3692497228U,  // <2,4,3,2>: Cost 4 vext2 <0,2,2,4>, <3,2,3,4>
+  3692497308U,  // <2,4,3,3>: Cost 4 vext2 <0,2,2,4>, <3,3,3,3>
+  3001404624U,  // <2,4,3,4>: Cost 3 vzipr LHS, <4,4,4,4>
+  2953627342U,  // <2,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+  2953625804U,  // <2,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+  3899868160U,  // <2,4,3,7>: Cost 4 vuzpr <1,2,3,4>, <1,3,5,7>
+  2953625806U,  // <2,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+  2710916266U,  // <2,4,4,0>: Cost 3 vext3 <4,4,0,2>, <4,4,0,2>
+  3899869648U,  // <2,4,4,1>: Cost 4 vuzpr <1,2,3,4>, <3,4,0,1>
+  3899869658U,  // <2,4,4,2>: Cost 4 vuzpr <1,2,3,4>, <3,4,1,2>
+  3899868930U,  // <2,4,4,3>: Cost 4 vuzpr <1,2,3,4>, <2,4,1,3>
+  2712833232U,  // <2,4,4,4>: Cost 3 vext3 <4,6,u,2>, <4,4,4,4>
+  2618756406U,  // <2,4,4,5>: Cost 3 vext2 <0,2,2,4>, RHS
+  2765737270U,  // <2,4,4,6>: Cost 3 vuzpl <2,3,4,5>, RHS
+  4168304426U,  // <2,4,4,7>: Cost 4 vtrnr <1,2,3,4>, <2,4,5,7>
+  2618756649U,  // <2,4,4,u>: Cost 3 vext2 <0,2,2,4>, RHS
+  2551800011U,  // <2,4,5,0>: Cost 3 vext1 <0,2,4,5>, <0,2,4,5>
+  2569716470U,  // <2,4,5,1>: Cost 3 vext1 <3,2,4,5>, <1,0,3,2>
+  2563745405U,  // <2,4,5,2>: Cost 3 vext1 <2,2,4,5>, <2,2,4,5>
+  2569718102U,  // <2,4,5,3>: Cost 3 vext1 <3,2,4,5>, <3,2,4,5>
+  2551803190U,  // <2,4,5,4>: Cost 3 vext1 <0,2,4,5>, RHS
+  3625545732U,  // <2,4,5,5>: Cost 4 vext1 <0,2,4,5>, <5,5,5,5>
+  1611959606U,  // <2,4,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  2826128694U,  // <2,4,5,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+  1611959624U,  // <2,4,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+  1478066278U,  // <2,4,6,0>: Cost 2 vext1 <0,2,4,6>, LHS
+  2551808758U,  // <2,4,6,1>: Cost 3 vext1 <0,2,4,6>, <1,0,3,2>
+  2551809516U,  // <2,4,6,2>: Cost 3 vext1 <0,2,4,6>, <2,0,6,4>
+  2551810198U,  // <2,4,6,3>: Cost 3 vext1 <0,2,4,6>, <3,0,1,2>
+  1478069558U,  // <2,4,6,4>: Cost 2 vext1 <0,2,4,6>, RHS
+  2901888310U,  // <2,4,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+  2551812920U,  // <2,4,6,6>: Cost 3 vext1 <0,2,4,6>, <6,6,6,6>
+  2726251914U,  // <2,4,6,7>: Cost 3 vext3 <7,0,1,2>, <4,6,7,1>
+  1478072110U,  // <2,4,6,u>: Cost 2 vext1 <0,2,4,6>, LHS
+  2659234821U,  // <2,4,7,0>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+  3786722726U,  // <2,4,7,1>: Cost 4 vext3 <4,7,1,2>, <4,7,1,2>
+  3734303911U,  // <2,4,7,2>: Cost 4 vext2 <7,2,2,4>, <7,2,2,4>
+  3734967544U,  // <2,4,7,3>: Cost 4 vext2 <7,3,2,4>, <7,3,2,4>
+  3727005030U,  // <2,4,7,4>: Cost 4 vext2 <6,0,2,4>, <7,4,5,6>
+  2726251976U,  // <2,4,7,5>: Cost 3 vext3 <7,0,1,2>, <4,7,5,0>
+  2726251986U,  // <2,4,7,6>: Cost 3 vext3 <7,0,1,2>, <4,7,6,1>
+  3727005292U,  // <2,4,7,7>: Cost 4 vext2 <6,0,2,4>, <7,7,7,7>
+  2659234821U,  // <2,4,7,u>: Cost 3 vext2 <7,0,2,4>, <7,0,2,4>
+  1478082662U,  // <2,4,u,0>: Cost 2 vext1 <0,2,4,u>, LHS
+  2618758958U,  // <2,4,u,1>: Cost 3 vext2 <0,2,2,4>, LHS
+  2551826024U,  // <2,4,u,2>: Cost 3 vext1 <0,2,4,u>, <2,2,2,2>
+  2551826582U,  // <2,4,u,3>: Cost 3 vext1 <0,2,4,u>, <3,0,1,2>
+  1478085942U,  // <2,4,u,4>: Cost 2 vext1 <0,2,4,u>, RHS
+  2953668302U,  // <2,4,u,5>: Cost 3 vzipr LHS, <2,3,4,5>
+  1611959849U,  // <2,4,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  2826128937U,  // <2,4,u,7>: Cost 3 vuzpr <1,2,3,4>, RHS
+  1611959867U,  // <2,4,u,u>: Cost 2 vext3 <0,2,0,2>, RHS
+  3691839488U,  // <2,5,0,0>: Cost 4 vext2 <0,1,2,5>, <0,0,0,0>
+  2618097766U,  // <2,5,0,1>: Cost 3 vext2 <0,1,2,5>, LHS
+  2620088484U,  // <2,5,0,2>: Cost 3 vext2 <0,4,2,5>, <0,2,0,2>
+  2619425034U,  // <2,5,0,3>: Cost 3 vext2 <0,3,2,5>, <0,3,2,5>
+  2620088667U,  // <2,5,0,4>: Cost 3 vext2 <0,4,2,5>, <0,4,2,5>
+  2620752300U,  // <2,5,0,5>: Cost 3 vext2 <0,5,2,5>, <0,5,2,5>
+  3693830655U,  // <2,5,0,6>: Cost 4 vext2 <0,4,2,5>, <0,6,2,7>
+  3094531382U,  // <2,5,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+  2618098333U,  // <2,5,0,u>: Cost 3 vext2 <0,1,2,5>, LHS
+  3691840246U,  // <2,5,1,0>: Cost 4 vext2 <0,1,2,5>, <1,0,3,2>
+  3691840308U,  // <2,5,1,1>: Cost 4 vext2 <0,1,2,5>, <1,1,1,1>
+  2626061206U,  // <2,5,1,2>: Cost 3 vext2 <1,4,2,5>, <1,2,3,0>
+  2618098688U,  // <2,5,1,3>: Cost 3 vext2 <0,1,2,5>, <1,3,5,7>
+  2626061364U,  // <2,5,1,4>: Cost 3 vext2 <1,4,2,5>, <1,4,2,5>
+  3691840656U,  // <2,5,1,5>: Cost 4 vext2 <0,1,2,5>, <1,5,3,7>
+  3789082310U,  // <2,5,1,6>: Cost 4 vext3 <5,1,6,2>, <5,1,6,2>
+  2712833744U,  // <2,5,1,7>: Cost 3 vext3 <4,6,u,2>, <5,1,7,3>
+  2628715896U,  // <2,5,1,u>: Cost 3 vext2 <1,u,2,5>, <1,u,2,5>
+  3693831613U,  // <2,5,2,0>: Cost 4 vext2 <0,4,2,5>, <2,0,1,2>
+  4026698642U,  // <2,5,2,1>: Cost 4 vzipr <0,0,2,2>, <4,0,5,1>
+  2632033896U,  // <2,5,2,2>: Cost 3 vext2 <2,4,2,5>, <2,2,2,2>
+  3691841190U,  // <2,5,2,3>: Cost 4 vext2 <0,1,2,5>, <2,3,0,1>
+  2632034061U,  // <2,5,2,4>: Cost 3 vext2 <2,4,2,5>, <2,4,2,5>
+  3691841352U,  // <2,5,2,5>: Cost 4 vext2 <0,1,2,5>, <2,5,0,1>
+  3691841466U,  // <2,5,2,6>: Cost 4 vext2 <0,1,2,5>, <2,6,3,7>
+  3088354614U,  // <2,5,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+  3088354615U,  // <2,5,2,u>: Cost 3 vtrnr <0,2,0,2>, RHS
+  2557829222U,  // <2,5,3,0>: Cost 3 vext1 <1,2,5,3>, LHS
+  2557830059U,  // <2,5,3,1>: Cost 3 vext1 <1,2,5,3>, <1,2,5,3>
+  2575746766U,  // <2,5,3,2>: Cost 3 vext1 <4,2,5,3>, <2,3,4,5>
+  3691841948U,  // <2,5,3,3>: Cost 4 vext2 <0,1,2,5>, <3,3,3,3>
+  2619427330U,  // <2,5,3,4>: Cost 3 vext2 <0,3,2,5>, <3,4,5,6>
+  2581720847U,  // <2,5,3,5>: Cost 3 vext1 <5,2,5,3>, <5,2,5,3>
+  2953628162U,  // <2,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  2953626624U,  // <2,5,3,7>: Cost 3 vzipr LHS, <1,3,5,7>
+  2953626625U,  // <2,5,3,u>: Cost 3 vzipr LHS, <1,3,5,u>
+  2569781350U,  // <2,5,4,0>: Cost 3 vext1 <3,2,5,4>, LHS
+  3631580076U,  // <2,5,4,1>: Cost 4 vext1 <1,2,5,4>, <1,2,5,4>
+  2569782990U,  // <2,5,4,2>: Cost 3 vext1 <3,2,5,4>, <2,3,4,5>
+  2569783646U,  // <2,5,4,3>: Cost 3 vext1 <3,2,5,4>, <3,2,5,4>
+  2569784630U,  // <2,5,4,4>: Cost 3 vext1 <3,2,5,4>, RHS
+  2618101046U,  // <2,5,4,5>: Cost 3 vext2 <0,1,2,5>, RHS
+  3893905922U,  // <2,5,4,6>: Cost 4 vuzpr <0,2,3,5>, <3,4,5,6>
+  3094564150U,  // <2,5,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+  2618101289U,  // <2,5,4,u>: Cost 3 vext2 <0,1,2,5>, RHS
+  2551873638U,  // <2,5,5,0>: Cost 3 vext1 <0,2,5,5>, LHS
+  3637560320U,  // <2,5,5,1>: Cost 4 vext1 <2,2,5,5>, <1,3,5,7>
+  3637560966U,  // <2,5,5,2>: Cost 4 vext1 <2,2,5,5>, <2,2,5,5>
+  3723030343U,  // <2,5,5,3>: Cost 4 vext2 <5,3,2,5>, <5,3,2,5>
+  2551876918U,  // <2,5,5,4>: Cost 3 vext1 <0,2,5,5>, RHS
+  2712834052U,  // <2,5,5,5>: Cost 3 vext3 <4,6,u,2>, <5,5,5,5>
+  4028713474U,  // <2,5,5,6>: Cost 4 vzipr <0,3,2,5>, <3,4,5,6>
+  2712834072U,  // <2,5,5,7>: Cost 3 vext3 <4,6,u,2>, <5,5,7,7>
+  2712834081U,  // <2,5,5,u>: Cost 3 vext3 <4,6,u,2>, <5,5,u,7>
+  2575769702U,  // <2,5,6,0>: Cost 3 vext1 <4,2,5,6>, LHS
+  3631596462U,  // <2,5,6,1>: Cost 4 vext1 <1,2,5,6>, <1,2,5,6>
+  2655924730U,  // <2,5,6,2>: Cost 3 vext2 <6,4,2,5>, <6,2,7,3>
+  3643541856U,  // <2,5,6,3>: Cost 4 vext1 <3,2,5,6>, <3,2,5,6>
+  2655924849U,  // <2,5,6,4>: Cost 3 vext2 <6,4,2,5>, <6,4,2,5>
+  3787755607U,  // <2,5,6,5>: Cost 4 vext3 <4,u,6,2>, <5,6,5,7>
+  4029385218U,  // <2,5,6,6>: Cost 4 vzipr <0,4,2,6>, <3,4,5,6>
+  3088682294U,  // <2,5,6,7>: Cost 3 vtrnr <0,2,4,6>, RHS
+  3088682295U,  // <2,5,6,u>: Cost 3 vtrnr <0,2,4,6>, RHS
+  2563833958U,  // <2,5,7,0>: Cost 3 vext1 <2,2,5,7>, LHS
+  2551890678U,  // <2,5,7,1>: Cost 3 vext1 <0,2,5,7>, <1,0,3,2>
+  2563835528U,  // <2,5,7,2>: Cost 3 vext1 <2,2,5,7>, <2,2,5,7>
+  3637577878U,  // <2,5,7,3>: Cost 4 vext1 <2,2,5,7>, <3,0,1,2>
+  2563837238U,  // <2,5,7,4>: Cost 3 vext1 <2,2,5,7>, RHS
+  2712834216U,  // <2,5,7,5>: Cost 3 vext3 <4,6,u,2>, <5,7,5,7>
+  2712834220U,  // <2,5,7,6>: Cost 3 vext3 <4,6,u,2>, <5,7,6,2>
+  4174449974U,  // <2,5,7,7>: Cost 4 vtrnr <2,2,5,7>, RHS
+  2563839790U,  // <2,5,7,u>: Cost 3 vext1 <2,2,5,7>, LHS
+  2563842150U,  // <2,5,u,0>: Cost 3 vext1 <2,2,5,u>, LHS
+  2618103598U,  // <2,5,u,1>: Cost 3 vext2 <0,1,2,5>, LHS
+  2563843721U,  // <2,5,u,2>: Cost 3 vext1 <2,2,5,u>, <2,2,5,u>
+  2569816418U,  // <2,5,u,3>: Cost 3 vext1 <3,2,5,u>, <3,2,5,u>
+  2622748735U,  // <2,5,u,4>: Cost 3 vext2 <0,u,2,5>, <u,4,5,6>
+  2618103962U,  // <2,5,u,5>: Cost 3 vext2 <0,1,2,5>, RHS
+  2953669122U,  // <2,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  2953667584U,  // <2,5,u,7>: Cost 3 vzipr LHS, <1,3,5,7>
+  2618104165U,  // <2,5,u,u>: Cost 3 vext2 <0,1,2,5>, LHS
+  2620096512U,  // <2,6,0,0>: Cost 3 vext2 <0,4,2,6>, <0,0,0,0>
+  1546354790U,  // <2,6,0,1>: Cost 2 vext2 <0,4,2,6>, LHS
+  2620096676U,  // <2,6,0,2>: Cost 3 vext2 <0,4,2,6>, <0,2,0,2>
+  3693838588U,  // <2,6,0,3>: Cost 4 vext2 <0,4,2,6>, <0,3,1,0>
+  1546355036U,  // <2,6,0,4>: Cost 2 vext2 <0,4,2,6>, <0,4,2,6>
+  3694502317U,  // <2,6,0,5>: Cost 4 vext2 <0,5,2,6>, <0,5,2,6>
+  2551911246U,  // <2,6,0,6>: Cost 3 vext1 <0,2,6,0>, <6,7,0,1>
+  2720723287U,  // <2,6,0,7>: Cost 3 vext3 <6,0,7,2>, <6,0,7,2>
+  1546355357U,  // <2,6,0,u>: Cost 2 vext2 <0,4,2,6>, LHS
+  2620097270U,  // <2,6,1,0>: Cost 3 vext2 <0,4,2,6>, <1,0,3,2>
+  2620097332U,  // <2,6,1,1>: Cost 3 vext2 <0,4,2,6>, <1,1,1,1>
+  2620097430U,  // <2,6,1,2>: Cost 3 vext2 <0,4,2,6>, <1,2,3,0>
+  2820243558U,  // <2,6,1,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+  2620097598U,  // <2,6,1,4>: Cost 3 vext2 <0,4,2,6>, <1,4,3,6>
+  2620097680U,  // <2,6,1,5>: Cost 3 vext2 <0,4,2,6>, <1,5,3,7>
+  3693839585U,  // <2,6,1,6>: Cost 4 vext2 <0,4,2,6>, <1,6,3,7>
+  2721386920U,  // <2,6,1,7>: Cost 3 vext3 <6,1,7,2>, <6,1,7,2>
+  2820243563U,  // <2,6,1,u>: Cost 3 vuzpr <0,2,4,6>, LHS
+  2714014137U,  // <2,6,2,0>: Cost 3 vext3 <4,u,6,2>, <6,2,0,1>
+  2712834500U,  // <2,6,2,1>: Cost 3 vext3 <4,6,u,2>, <6,2,1,3>
+  2620098152U,  // <2,6,2,2>: Cost 3 vext2 <0,4,2,6>, <2,2,2,2>
+  2620098214U,  // <2,6,2,3>: Cost 3 vext2 <0,4,2,6>, <2,3,0,1>
+  2632042254U,  // <2,6,2,4>: Cost 3 vext2 <2,4,2,6>, <2,4,2,6>
+  2712834540U,  // <2,6,2,5>: Cost 3 vext3 <4,6,u,2>, <6,2,5,7>
+  2820243660U,  // <2,6,2,6>: Cost 3 vuzpr <0,2,4,6>, <0,2,4,6>
+  2958265654U,  // <2,6,2,7>: Cost 3 vzipr <0,u,2,2>, RHS
+  2620098619U,  // <2,6,2,u>: Cost 3 vext2 <0,4,2,6>, <2,u,0,1>
+  2620098710U,  // <2,6,3,0>: Cost 3 vext2 <0,4,2,6>, <3,0,1,2>
+  3893986982U,  // <2,6,3,1>: Cost 4 vuzpr <0,2,4,6>, <2,3,0,1>
+  2569848762U,  // <2,6,3,2>: Cost 3 vext1 <3,2,6,3>, <2,6,3,7>
+  2620098972U,  // <2,6,3,3>: Cost 3 vext2 <0,4,2,6>, <3,3,3,3>
+  2620099074U,  // <2,6,3,4>: Cost 3 vext2 <0,4,2,6>, <3,4,5,6>
+  3893987022U,  // <2,6,3,5>: Cost 4 vuzpr <0,2,4,6>, <2,3,4,5>
+  3001404644U,  // <2,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+  1879887158U,  // <2,6,3,7>: Cost 2 vzipr LHS, RHS
+  1879887159U,  // <2,6,3,u>: Cost 2 vzipr LHS, RHS
+  2620099484U,  // <2,6,4,0>: Cost 3 vext2 <0,4,2,6>, <4,0,6,2>
+  2620099566U,  // <2,6,4,1>: Cost 3 vext2 <0,4,2,6>, <4,1,6,3>
+  2620099644U,  // <2,6,4,2>: Cost 3 vext2 <0,4,2,6>, <4,2,6,0>
+  3643599207U,  // <2,6,4,3>: Cost 4 vext1 <3,2,6,4>, <3,2,6,4>
+  2575830080U,  // <2,6,4,4>: Cost 3 vext1 <4,2,6,4>, <4,2,6,4>
+  1546358070U,  // <2,6,4,5>: Cost 2 vext2 <0,4,2,6>, RHS
+  2667875700U,  // <2,6,4,6>: Cost 3 vext2 <u,4,2,6>, <4,6,4,6>
+  4028042550U,  // <2,6,4,7>: Cost 4 vzipr <0,2,2,4>, RHS
+  1546358313U,  // <2,6,4,u>: Cost 2 vext2 <0,4,2,6>, RHS
+  3693841992U,  // <2,6,5,0>: Cost 4 vext2 <0,4,2,6>, <5,0,1,2>
+  2667876048U,  // <2,6,5,1>: Cost 3 vext2 <u,4,2,6>, <5,1,7,3>
+  2712834756U,  // <2,6,5,2>: Cost 3 vext3 <4,6,u,2>, <6,5,2,7>
+  3643607400U,  // <2,6,5,3>: Cost 4 vext1 <3,2,6,5>, <3,2,6,5>
+  2252091873U,  // <2,6,5,4>: Cost 3 vrev <6,2,4,5>
+  2667876356U,  // <2,6,5,5>: Cost 3 vext2 <u,4,2,6>, <5,5,5,5>
+  2667876450U,  // <2,6,5,6>: Cost 3 vext2 <u,4,2,6>, <5,6,7,0>
+  2820246838U,  // <2,6,5,7>: Cost 3 vuzpr <0,2,4,6>, RHS
+  2820246839U,  // <2,6,5,u>: Cost 3 vuzpr <0,2,4,6>, RHS
+  2563899494U,  // <2,6,6,0>: Cost 3 vext1 <2,2,6,6>, LHS
+  3893988683U,  // <2,6,6,1>: Cost 4 vuzpr <0,2,4,6>, <4,6,0,1>
+  2563901072U,  // <2,6,6,2>: Cost 3 vext1 <2,2,6,6>, <2,2,6,6>
+  3893987236U,  // <2,6,6,3>: Cost 4 vuzpr <0,2,4,6>, <2,6,1,3>
+  2563902774U,  // <2,6,6,4>: Cost 3 vext1 <2,2,6,6>, RHS
+  3893988723U,  // <2,6,6,5>: Cost 4 vuzpr <0,2,4,6>, <4,6,4,5>
+  2712834872U,  // <2,6,6,6>: Cost 3 vext3 <4,6,u,2>, <6,6,6,6>
+  2955644214U,  // <2,6,6,7>: Cost 3 vzipr <0,4,2,6>, RHS
+  2955644215U,  // <2,6,6,u>: Cost 3 vzipr <0,4,2,6>, RHS
+  2712834894U,  // <2,6,7,0>: Cost 3 vext3 <4,6,u,2>, <6,7,0,1>
+  2724926296U,  // <2,6,7,1>: Cost 3 vext3 <6,7,1,2>, <6,7,1,2>
+  2725000033U,  // <2,6,7,2>: Cost 3 vext3 <6,7,2,2>, <6,7,2,2>
+  2702365544U,  // <2,6,7,3>: Cost 3 vext3 <3,0,1,2>, <6,7,3,0>
+  2712834934U,  // <2,6,7,4>: Cost 3 vext3 <4,6,u,2>, <6,7,4,5>
+  3776107393U,  // <2,6,7,5>: Cost 4 vext3 <3,0,1,2>, <6,7,5,7>
+  2725294981U,  // <2,6,7,6>: Cost 3 vext3 <6,7,6,2>, <6,7,6,2>
+  2726253452U,  // <2,6,7,7>: Cost 3 vext3 <7,0,1,2>, <6,7,7,0>
+  2712834966U,  // <2,6,7,u>: Cost 3 vext3 <4,6,u,2>, <6,7,u,1>
+  2620102355U,  // <2,6,u,0>: Cost 3 vext2 <0,4,2,6>, <u,0,1,2>
+  1546360622U,  // <2,6,u,1>: Cost 2 vext2 <0,4,2,6>, LHS
+  2620102536U,  // <2,6,u,2>: Cost 3 vext2 <0,4,2,6>, <u,2,3,3>
+  2820244125U,  // <2,6,u,3>: Cost 3 vuzpr <0,2,4,6>, LHS
+  1594136612U,  // <2,6,u,4>: Cost 2 vext2 <u,4,2,6>, <u,4,2,6>
+  1546360986U,  // <2,6,u,5>: Cost 2 vext2 <0,4,2,6>, RHS
+  2620102864U,  // <2,6,u,6>: Cost 3 vext2 <0,4,2,6>, <u,6,3,7>
+  1879928118U,  // <2,6,u,7>: Cost 2 vzipr LHS, RHS
+  1879928119U,  // <2,6,u,u>: Cost 2 vzipr LHS, RHS
+  2726179825U,  // <2,7,0,0>: Cost 3 vext3 <7,0,0,2>, <7,0,0,2>
+  1652511738U,  // <2,7,0,1>: Cost 2 vext3 <7,0,1,2>, <7,0,1,2>
+  2621431972U,  // <2,7,0,2>: Cost 3 vext2 <0,6,2,7>, <0,2,0,2>
+  2257949868U,  // <2,7,0,3>: Cost 3 vrev <7,2,3,0>
+  2726474773U,  // <2,7,0,4>: Cost 3 vext3 <7,0,4,2>, <7,0,4,2>
+  2620768686U,  // <2,7,0,5>: Cost 3 vext2 <0,5,2,7>, <0,5,2,7>
+  2621432319U,  // <2,7,0,6>: Cost 3 vext2 <0,6,2,7>, <0,6,2,7>
+  2599760953U,  // <2,7,0,7>: Cost 3 vext1 <u,2,7,0>, <7,0,u,2>
+  1653027897U,  // <2,7,0,u>: Cost 2 vext3 <7,0,u,2>, <7,0,u,2>
+  2639348470U,  // <2,7,1,0>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+  3695174452U,  // <2,7,1,1>: Cost 4 vext2 <0,6,2,7>, <1,1,1,1>
+  3695174550U,  // <2,7,1,2>: Cost 4 vext2 <0,6,2,7>, <1,2,3,0>
+  3694511104U,  // <2,7,1,3>: Cost 4 vext2 <0,5,2,7>, <1,3,5,7>
+  3713090594U,  // <2,7,1,4>: Cost 4 vext2 <3,6,2,7>, <1,4,0,5>
+  3693184144U,  // <2,7,1,5>: Cost 4 vext2 <0,3,2,7>, <1,5,3,7>
+  2627405016U,  // <2,7,1,6>: Cost 3 vext2 <1,6,2,7>, <1,6,2,7>
+  3799995519U,  // <2,7,1,7>: Cost 4 vext3 <7,0,1,2>, <7,1,7,0>
+  2639348470U,  // <2,7,1,u>: Cost 3 vext2 <3,6,2,7>, <1,0,3,2>
+  3695175101U,  // <2,7,2,0>: Cost 4 vext2 <0,6,2,7>, <2,0,1,2>
+  3643655168U,  // <2,7,2,1>: Cost 4 vext1 <3,2,7,2>, <1,3,5,7>
+  2257892517U,  // <2,7,2,2>: Cost 3 vrev <7,2,2,2>
+  3695175334U,  // <2,7,2,3>: Cost 4 vext2 <0,6,2,7>, <2,3,0,1>
+  3695175465U,  // <2,7,2,4>: Cost 4 vext2 <0,6,2,7>, <2,4,5,6>
+  2632714080U,  // <2,7,2,5>: Cost 3 vext2 <2,5,2,7>, <2,5,2,7>
+  2633377713U,  // <2,7,2,6>: Cost 3 vext2 <2,6,2,7>, <2,6,2,7>
+  3695175658U,  // <2,7,2,7>: Cost 4 vext2 <0,6,2,7>, <2,7,0,1>
+  2634704979U,  // <2,7,2,u>: Cost 3 vext2 <2,u,2,7>, <2,u,2,7>
+  1514094694U,  // <2,7,3,0>: Cost 2 vext1 <6,2,7,3>, LHS
+  2569921680U,  // <2,7,3,1>: Cost 3 vext1 <3,2,7,3>, <1,5,3,7>
+  2587838056U,  // <2,7,3,2>: Cost 3 vext1 <6,2,7,3>, <2,2,2,2>
+  2569922927U,  // <2,7,3,3>: Cost 3 vext1 <3,2,7,3>, <3,2,7,3>
+  1514097974U,  // <2,7,3,4>: Cost 2 vext1 <6,2,7,3>, RHS
+  2581868321U,  // <2,7,3,5>: Cost 3 vext1 <5,2,7,3>, <5,2,7,3>
+  1514099194U,  // <2,7,3,6>: Cost 2 vext1 <6,2,7,3>, <6,2,7,3>
+  2587841530U,  // <2,7,3,7>: Cost 3 vext1 <6,2,7,3>, <7,0,1,2>
+  1514100526U,  // <2,7,3,u>: Cost 2 vext1 <6,2,7,3>, LHS
+  2708706617U,  // <2,7,4,0>: Cost 3 vext3 <4,0,6,2>, <7,4,0,6>
+  3649643418U,  // <2,7,4,1>: Cost 4 vext1 <4,2,7,4>, <1,2,3,4>
+  3649644330U,  // <2,7,4,2>: Cost 4 vext1 <4,2,7,4>, <2,4,5,7>
+  2257982640U,  // <2,7,4,3>: Cost 3 vrev <7,2,3,4>
+  3649645641U,  // <2,7,4,4>: Cost 4 vext1 <4,2,7,4>, <4,2,7,4>
+  2621435190U,  // <2,7,4,5>: Cost 3 vext2 <0,6,2,7>, RHS
+  2712835441U,  // <2,7,4,6>: Cost 3 vext3 <4,6,u,2>, <7,4,6,u>
+  3799995762U,  // <2,7,4,7>: Cost 4 vext3 <7,0,1,2>, <7,4,7,0>
+  2621435433U,  // <2,7,4,u>: Cost 3 vext2 <0,6,2,7>, RHS
+  2729497990U,  // <2,7,5,0>: Cost 3 vext3 <7,5,0,2>, <7,5,0,2>
+  3643679744U,  // <2,7,5,1>: Cost 4 vext1 <3,2,7,5>, <1,3,5,7>
+  3637708424U,  // <2,7,5,2>: Cost 4 vext1 <2,2,7,5>, <2,2,5,7>
+  3643681137U,  // <2,7,5,3>: Cost 4 vext1 <3,2,7,5>, <3,2,7,5>
+  2599800118U,  // <2,7,5,4>: Cost 3 vext1 <u,2,7,5>, RHS
+  3786577334U,  // <2,7,5,5>: Cost 4 vext3 <4,6,u,2>, <7,5,5,5>
+  3786577345U,  // <2,7,5,6>: Cost 4 vext3 <4,6,u,2>, <7,5,6,7>
+  2599802214U,  // <2,7,5,7>: Cost 3 vext1 <u,2,7,5>, <7,4,5,6>
+  2599802670U,  // <2,7,5,u>: Cost 3 vext1 <u,2,7,5>, LHS
+  2581889126U,  // <2,7,6,0>: Cost 3 vext1 <5,2,7,6>, LHS
+  3643687936U,  // <2,7,6,1>: Cost 4 vext1 <3,2,7,6>, <1,3,5,7>
+  2663240186U,  // <2,7,6,2>: Cost 3 vext2 <7,6,2,7>, <6,2,7,3>
+  3643689330U,  // <2,7,6,3>: Cost 4 vext1 <3,2,7,6>, <3,2,7,6>
+  2581892406U,  // <2,7,6,4>: Cost 3 vext1 <5,2,7,6>, RHS
+  2581892900U,  // <2,7,6,5>: Cost 3 vext1 <5,2,7,6>, <5,2,7,6>
+  2587865597U,  // <2,7,6,6>: Cost 3 vext1 <6,2,7,6>, <6,2,7,6>
+  3786577428U,  // <2,7,6,7>: Cost 4 vext3 <4,6,u,2>, <7,6,7,0>
+  2581894958U,  // <2,7,6,u>: Cost 3 vext1 <5,2,7,6>, LHS
+  2726254119U,  // <2,7,7,0>: Cost 3 vext3 <7,0,1,2>, <7,7,0,1>
+  3804640817U,  // <2,7,7,1>: Cost 4 vext3 <7,7,1,2>, <7,7,1,2>
+  3637724826U,  // <2,7,7,2>: Cost 4 vext1 <2,2,7,7>, <2,2,7,7>
+  3734992123U,  // <2,7,7,3>: Cost 4 vext2 <7,3,2,7>, <7,3,2,7>
+  2552040758U,  // <2,7,7,4>: Cost 3 vext1 <0,2,7,7>, RHS
+  3799995992U,  // <2,7,7,5>: Cost 4 vext3 <7,0,1,2>, <7,7,5,5>
+  2663241198U,  // <2,7,7,6>: Cost 3 vext2 <7,6,2,7>, <7,6,2,7>
+  2712835692U,  // <2,7,7,7>: Cost 3 vext3 <4,6,u,2>, <7,7,7,7>
+  2731562607U,  // <2,7,7,u>: Cost 3 vext3 <7,u,1,2>, <7,7,u,1>
+  1514135654U,  // <2,7,u,0>: Cost 2 vext1 <6,2,7,u>, LHS
+  1657820802U,  // <2,7,u,1>: Cost 2 vext3 <7,u,1,2>, <7,u,1,2>
+  2587879016U,  // <2,7,u,2>: Cost 3 vext1 <6,2,7,u>, <2,2,2,2>
+  2569963892U,  // <2,7,u,3>: Cost 3 vext1 <3,2,7,u>, <3,2,7,u>
+  1514138934U,  // <2,7,u,4>: Cost 2 vext1 <6,2,7,u>, RHS
+  2621438106U,  // <2,7,u,5>: Cost 3 vext2 <0,6,2,7>, RHS
+  1514140159U,  // <2,7,u,6>: Cost 2 vext1 <6,2,7,u>, <6,2,7,u>
+  2587882490U,  // <2,7,u,7>: Cost 3 vext1 <6,2,7,u>, <7,0,1,2>
+  1514141486U,  // <2,7,u,u>: Cost 2 vext1 <6,2,7,u>, LHS
+  1544380416U,  // <2,u,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+   470638699U,  // <2,u,0,1>: Cost 1 vext2 LHS, LHS
+  1544380580U,  // <2,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  1658631909U,  // <2,u,0,3>: Cost 2 vext3 <u,0,3,2>, <u,0,3,2>
+  1544380754U,  // <2,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  2665898414U,  // <2,u,0,5>: Cost 3 vext2 LHS, <0,5,2,7>
+  1658853120U,  // <2,u,0,6>: Cost 2 vext3 <u,0,6,2>, <u,0,6,2>
+  3094531625U,  // <2,u,0,7>: Cost 3 vtrnr <1,2,3,0>, RHS
+   470639261U,  // <2,u,0,u>: Cost 1 vext2 LHS, LHS
+  1544381174U,  // <2,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  1544381236U,  // <2,u,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+  1544381334U,  // <2,u,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+  1544381400U,  // <2,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  2618123325U,  // <2,u,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+  1544381584U,  // <2,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  2618123489U,  // <2,u,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+  2726254427U,  // <2,u,1,7>: Cost 3 vext3 <7,0,1,2>, <u,1,7,3>
+  1544381823U,  // <2,u,1,u>: Cost 2 vext2 LHS, <1,u,3,3>
+  1478328422U,  // <2,u,2,0>: Cost 2 vext1 <0,2,u,2>, LHS
+  2618123807U,  // <2,u,2,1>: Cost 3 vext2 LHS, <2,1,3,1>
+   269271142U,  // <2,u,2,2>: Cost 1 vdup2 LHS
+  1544382118U,  // <2,u,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+  1478331702U,  // <2,u,2,4>: Cost 2 vext1 <0,2,u,2>, RHS
+  2618124136U,  // <2,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+  1544382394U,  // <2,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  3088354857U,  // <2,u,2,7>: Cost 3 vtrnr <0,2,0,2>, RHS
+   269271142U,  // <2,u,2,u>: Cost 1 vdup2 LHS
+  1544382614U,  // <2,u,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+  2953627374U,  // <2,u,3,1>: Cost 3 vzipr LHS, <2,3,u,1>
+  1490282143U,  // <2,u,3,2>: Cost 2 vext1 <2,2,u,3>, <2,2,u,3>
+  1879883932U,  // <2,u,3,3>: Cost 2 vzipr LHS, LHS
+  1544382978U,  // <2,u,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+  2953627378U,  // <2,u,3,5>: Cost 3 vzipr LHS, <2,3,u,5>
+  1514172931U,  // <2,u,3,6>: Cost 2 vext1 <6,2,u,3>, <6,2,u,3>
+  1879887176U,  // <2,u,3,7>: Cost 2 vzipr LHS, RHS
+  1879883937U,  // <2,u,3,u>: Cost 2 vzipr LHS, LHS
+  1484316774U,  // <2,u,4,0>: Cost 2 vext1 <1,2,u,4>, LHS
+  1484317639U,  // <2,u,4,1>: Cost 2 vext1 <1,2,u,4>, <1,2,u,4>
+  2552088270U,  // <2,u,4,2>: Cost 3 vext1 <0,2,u,4>, <2,3,4,5>
+  1190213513U,  // <2,u,4,3>: Cost 2 vrev <u,2,3,4>
+  1484320054U,  // <2,u,4,4>: Cost 2 vext1 <1,2,u,4>, RHS
+   470641974U,  // <2,u,4,5>: Cost 1 vext2 LHS, RHS
+  1592159604U,  // <2,u,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+  3094564393U,  // <2,u,4,7>: Cost 3 vtrnr <1,2,3,4>, RHS
+   470642217U,  // <2,u,4,u>: Cost 1 vext2 LHS, RHS
+  2552094959U,  // <2,u,5,0>: Cost 3 vext1 <0,2,u,5>, <0,2,u,5>
+  1592159952U,  // <2,u,5,1>: Cost 2 vext2 LHS, <5,1,7,3>
+  2564040353U,  // <2,u,5,2>: Cost 3 vext1 <2,2,u,5>, <2,2,u,5>
+  2690275455U,  // <2,u,5,3>: Cost 3 vext3 <0,u,u,2>, <u,5,3,7>
+  1592160198U,  // <2,u,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+  1592160260U,  // <2,u,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+  1611962522U,  // <2,u,5,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  1592160424U,  // <2,u,5,7>: Cost 2 vext2 LHS, <5,7,5,7>
+  1611962540U,  // <2,u,5,u>: Cost 2 vext3 <0,2,0,2>, RHS
+  1478361190U,  // <2,u,6,0>: Cost 2 vext1 <0,2,u,6>, LHS
+  2552103670U,  // <2,u,6,1>: Cost 3 vext1 <0,2,u,6>, <1,0,3,2>
+  1592160762U,  // <2,u,6,2>: Cost 2 vext2 LHS, <6,2,7,3>
+  2685704400U,  // <2,u,6,3>: Cost 3 vext3 <0,2,0,2>, <u,6,3,7>
+  1478364470U,  // <2,u,6,4>: Cost 2 vext1 <0,2,u,6>, RHS
+  2901891226U,  // <2,u,6,5>: Cost 3 vzipl <2,6,3,7>, RHS
+  1592161080U,  // <2,u,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+  1592161102U,  // <2,u,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+  1478367022U,  // <2,u,6,u>: Cost 2 vext1 <0,2,u,6>, LHS
+  1592161274U,  // <2,u,7,0>: Cost 2 vext2 LHS, <7,0,1,2>
+  2659931226U,  // <2,u,7,1>: Cost 3 vext2 <7,1,2,u>, <7,1,2,u>
+  2564056739U,  // <2,u,7,2>: Cost 3 vext1 <2,2,u,7>, <2,2,u,7>
+  2665903331U,  // <2,u,7,3>: Cost 3 vext2 LHS, <7,3,0,1>
+  1592161638U,  // <2,u,7,4>: Cost 2 vext2 LHS, <7,4,5,6>
+  2665903494U,  // <2,u,7,5>: Cost 3 vext2 LHS, <7,5,0,2>
+  2587947527U,  // <2,u,7,6>: Cost 3 vext1 <6,2,u,7>, <6,2,u,7>
+  1592161900U,  // <2,u,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+  1592161922U,  // <2,u,7,u>: Cost 2 vext2 LHS, <7,u,1,2>
+  1478377574U,  // <2,u,u,0>: Cost 2 vext1 <0,2,u,u>, LHS
+   470644526U,  // <2,u,u,1>: Cost 1 vext2 LHS, LHS
+   269271142U,  // <2,u,u,2>: Cost 1 vdup2 LHS
+  1879924892U,  // <2,u,u,3>: Cost 2 vzipr LHS, LHS
+  1478380854U,  // <2,u,u,4>: Cost 2 vext1 <0,2,u,u>, RHS
+   470644890U,  // <2,u,u,5>: Cost 1 vext2 LHS, RHS
+  1611962765U,  // <2,u,u,6>: Cost 2 vext3 <0,2,0,2>, RHS
+  1879928136U,  // <2,u,u,7>: Cost 2 vzipr LHS, RHS
+   470645093U,  // <2,u,u,u>: Cost 1 vext2 LHS, LHS
+  1611448320U,  // <3,0,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+  1611890698U,  // <3,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+  1611890708U,  // <3,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+  3763576860U,  // <3,0,0,3>: Cost 4 vext3 LHS, <0,0,3,1>
+  2689835045U,  // <3,0,0,4>: Cost 3 vext3 LHS, <0,0,4,1>
+  3698508206U,  // <3,0,0,5>: Cost 4 vext2 <1,2,3,0>, <0,5,2,7>
+  3763576887U,  // <3,0,0,6>: Cost 4 vext3 LHS, <0,0,6,1>
+  3667678434U,  // <3,0,0,7>: Cost 4 vext1 <7,3,0,0>, <7,3,0,0>
+  1616093258U,  // <3,0,0,u>: Cost 2 vext3 LHS, <0,0,u,2>
+  1490337894U,  // <3,0,1,0>: Cost 2 vext1 <2,3,0,1>, LHS
+  2685632602U,  // <3,0,1,1>: Cost 3 vext3 LHS, <0,1,1,0>
+   537706598U,  // <3,0,1,2>: Cost 1 vext3 LHS, LHS
+  2624766936U,  // <3,0,1,3>: Cost 3 vext2 <1,2,3,0>, <1,3,1,3>
+  1490341174U,  // <3,0,1,4>: Cost 2 vext1 <2,3,0,1>, RHS
+  2624767120U,  // <3,0,1,5>: Cost 3 vext2 <1,2,3,0>, <1,5,3,7>
+  2732966030U,  // <3,0,1,6>: Cost 3 vext3 LHS, <0,1,6,7>
+  2593944803U,  // <3,0,1,7>: Cost 3 vext1 <7,3,0,1>, <7,3,0,1>
+   537706652U,  // <3,0,1,u>: Cost 1 vext3 LHS, LHS
+  1611890852U,  // <3,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+  2685632684U,  // <3,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+  2685632692U,  // <3,0,2,2>: Cost 3 vext3 LHS, <0,2,2,0>
+  2685632702U,  // <3,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+  1611890892U,  // <3,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+  2732966102U,  // <3,0,2,5>: Cost 3 vext3 LHS, <0,2,5,7>
+  2624767930U,  // <3,0,2,6>: Cost 3 vext2 <1,2,3,0>, <2,6,3,7>
+  2685632744U,  // <3,0,2,7>: Cost 3 vext3 LHS, <0,2,7,7>
+  1611890924U,  // <3,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+  2624768150U,  // <3,0,3,0>: Cost 3 vext2 <1,2,3,0>, <3,0,1,2>
+  2685632764U,  // <3,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+  2685632774U,  // <3,0,3,2>: Cost 3 vext3 LHS, <0,3,2,1>
+  2624768412U,  // <3,0,3,3>: Cost 3 vext2 <1,2,3,0>, <3,3,3,3>
+  2624768514U,  // <3,0,3,4>: Cost 3 vext2 <1,2,3,0>, <3,4,5,6>
+  3702491714U,  // <3,0,3,5>: Cost 4 vext2 <1,u,3,0>, <3,5,3,7>
+  2624768632U,  // <3,0,3,6>: Cost 3 vext2 <1,2,3,0>, <3,6,0,7>
+  3702491843U,  // <3,0,3,7>: Cost 4 vext2 <1,u,3,0>, <3,7,0,1>
+  2686959934U,  // <3,0,3,u>: Cost 3 vext3 <0,3,u,3>, <0,3,u,3>
+  2689835336U,  // <3,0,4,0>: Cost 3 vext3 LHS, <0,4,0,4>
+  1611891026U,  // <3,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+  1611891036U,  // <3,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+  3763577184U,  // <3,0,4,3>: Cost 4 vext3 LHS, <0,4,3,1>
+  2689835374U,  // <3,0,4,4>: Cost 3 vext3 LHS, <0,4,4,6>
+  1551027510U,  // <3,0,4,5>: Cost 2 vext2 <1,2,3,0>, RHS
+  2666573172U,  // <3,0,4,6>: Cost 3 vext2 <u,2,3,0>, <4,6,4,6>
+  3667711206U,  // <3,0,4,7>: Cost 4 vext1 <7,3,0,4>, <7,3,0,4>
+  1616093586U,  // <3,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+  2685190556U,  // <3,0,5,0>: Cost 3 vext3 LHS, <0,5,0,7>
+  2666573520U,  // <3,0,5,1>: Cost 3 vext2 <u,2,3,0>, <5,1,7,3>
+  3040886886U,  // <3,0,5,2>: Cost 3 vtrnl <3,4,5,6>, LHS
+  3625912834U,  // <3,0,5,3>: Cost 4 vext1 <0,3,0,5>, <3,4,5,6>
+  2666573766U,  // <3,0,5,4>: Cost 3 vext2 <u,2,3,0>, <5,4,7,6>
+  2666573828U,  // <3,0,5,5>: Cost 3 vext2 <u,2,3,0>, <5,5,5,5>
+  2732966354U,  // <3,0,5,6>: Cost 3 vext3 LHS, <0,5,6,7>
+  2666573992U,  // <3,0,5,7>: Cost 3 vext2 <u,2,3,0>, <5,7,5,7>
+  3040886940U,  // <3,0,5,u>: Cost 3 vtrnl <3,4,5,6>, LHS
+  2685190637U,  // <3,0,6,0>: Cost 3 vext3 LHS, <0,6,0,7>
+  2732966390U,  // <3,0,6,1>: Cost 3 vext3 LHS, <0,6,1,7>
+  2689835519U,  // <3,0,6,2>: Cost 3 vext3 LHS, <0,6,2,7>
+  3667724438U,  // <3,0,6,3>: Cost 4 vext1 <7,3,0,6>, <3,0,1,2>
+  3763577355U,  // <3,0,6,4>: Cost 4 vext3 LHS, <0,6,4,1>
+  3806708243U,  // <3,0,6,5>: Cost 4 vext3 LHS, <0,6,5,0>
+  2666574648U,  // <3,0,6,6>: Cost 3 vext2 <u,2,3,0>, <6,6,6,6>
+  2657948520U,  // <3,0,6,7>: Cost 3 vext2 <6,7,3,0>, <6,7,3,0>
+  2689835573U,  // <3,0,6,u>: Cost 3 vext3 LHS, <0,6,u,7>
+  2666574842U,  // <3,0,7,0>: Cost 3 vext2 <u,2,3,0>, <7,0,1,2>
+  2685633095U,  // <3,0,7,1>: Cost 3 vext3 LHS, <0,7,1,7>
+  2660603052U,  // <3,0,7,2>: Cost 3 vext2 <7,2,3,0>, <7,2,3,0>
+  3643844997U,  // <3,0,7,3>: Cost 4 vext1 <3,3,0,7>, <3,3,0,7>
+  2666575206U,  // <3,0,7,4>: Cost 3 vext2 <u,2,3,0>, <7,4,5,6>
+  3655790391U,  // <3,0,7,5>: Cost 4 vext1 <5,3,0,7>, <5,3,0,7>
+  3731690968U,  // <3,0,7,6>: Cost 4 vext2 <6,7,3,0>, <7,6,0,3>
+  2666575468U,  // <3,0,7,7>: Cost 3 vext2 <u,2,3,0>, <7,7,7,7>
+  2664584850U,  // <3,0,7,u>: Cost 3 vext2 <7,u,3,0>, <7,u,3,0>
+  1616093834U,  // <3,0,u,0>: Cost 2 vext3 LHS, <0,u,0,2>
+  1611891346U,  // <3,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+   537707165U,  // <3,0,u,2>: Cost 1 vext3 LHS, LHS
+  2689835684U,  // <3,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+  1616093874U,  // <3,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+  1551030426U,  // <3,0,u,5>: Cost 2 vext2 <1,2,3,0>, RHS
+  2624772304U,  // <3,0,u,6>: Cost 3 vext2 <1,2,3,0>, <u,6,3,7>
+  2594002154U,  // <3,0,u,7>: Cost 3 vext1 <7,3,0,u>, <7,3,0,u>
+   537707219U,  // <3,0,u,u>: Cost 1 vext3 LHS, LHS
+  2552201318U,  // <3,1,0,0>: Cost 3 vext1 <0,3,1,0>, LHS
+  2618802278U,  // <3,1,0,1>: Cost 3 vext2 <0,2,3,1>, LHS
+  2618802366U,  // <3,1,0,2>: Cost 3 vext2 <0,2,3,1>, <0,2,3,1>
+  1611449078U,  // <3,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+  2552204598U,  // <3,1,0,4>: Cost 3 vext1 <0,3,1,0>, RHS
+  2732966663U,  // <3,1,0,5>: Cost 3 vext3 LHS, <1,0,5,1>
+  3906258396U,  // <3,1,0,6>: Cost 4 vuzpr <2,3,0,1>, <2,0,4,6>
+  3667752171U,  // <3,1,0,7>: Cost 4 vext1 <7,3,1,0>, <7,3,1,0>
+  1611891491U,  // <3,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+  2689835819U,  // <3,1,1,0>: Cost 3 vext3 LHS, <1,1,0,1>
+  1611449140U,  // <3,1,1,1>: Cost 2 vext3 LHS, <1,1,1,1>
+  2624775063U,  // <3,1,1,2>: Cost 3 vext2 <1,2,3,1>, <1,2,3,1>
+  1611891528U,  // <3,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+  2689835859U,  // <3,1,1,4>: Cost 3 vext3 LHS, <1,1,4,5>
+  2689835868U,  // <3,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+  3763577701U,  // <3,1,1,6>: Cost 4 vext3 LHS, <1,1,6,5>
+  3765273452U,  // <3,1,1,7>: Cost 4 vext3 <1,1,7,3>, <1,1,7,3>
+  1611891573U,  // <3,1,1,u>: Cost 2 vext3 LHS, <1,1,u,3>
+  2629420494U,  // <3,1,2,0>: Cost 3 vext2 <2,0,3,1>, <2,0,3,1>
+  2689835911U,  // <3,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+  2564163248U,  // <3,1,2,2>: Cost 3 vext1 <2,3,1,2>, <2,3,1,2>
+  1611449238U,  // <3,1,2,3>: Cost 2 vext3 LHS, <1,2,3,0>
+  2564164918U,  // <3,1,2,4>: Cost 3 vext1 <2,3,1,2>, RHS
+  2689835947U,  // <3,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+  3692545978U,  // <3,1,2,6>: Cost 4 vext2 <0,2,3,1>, <2,6,3,7>
+  2732966842U,  // <3,1,2,7>: Cost 3 vext3 LHS, <1,2,7,0>
+  1611891651U,  // <3,1,2,u>: Cost 2 vext3 LHS, <1,2,u,0>
+  1484456038U,  // <3,1,3,0>: Cost 2 vext1 <1,3,1,3>, LHS
+  1611891672U,  // <3,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+  2685633502U,  // <3,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+  2685633512U,  // <3,1,3,3>: Cost 3 vext3 LHS, <1,3,3,1>
+  1484459318U,  // <3,1,3,4>: Cost 2 vext1 <1,3,1,3>, RHS
+  1611891712U,  // <3,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+  2689836041U,  // <3,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+  2733409294U,  // <3,1,3,7>: Cost 3 vext3 LHS, <1,3,7,3>
+  1611891735U,  // <3,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+  2552234086U,  // <3,1,4,0>: Cost 3 vext1 <0,3,1,4>, LHS
+  2732966955U,  // <3,1,4,1>: Cost 3 vext3 LHS, <1,4,1,5>
+  2732966964U,  // <3,1,4,2>: Cost 3 vext3 LHS, <1,4,2,5>
+  2685633597U,  // <3,1,4,3>: Cost 3 vext3 LHS, <1,4,3,5>
+  2552237366U,  // <3,1,4,4>: Cost 3 vext1 <0,3,1,4>, RHS
+  2618805558U,  // <3,1,4,5>: Cost 3 vext2 <0,2,3,1>, RHS
+  2769472822U,  // <3,1,4,6>: Cost 3 vuzpl <3,0,1,2>, RHS
+  3667784943U,  // <3,1,4,7>: Cost 4 vext1 <7,3,1,4>, <7,3,1,4>
+  2685633642U,  // <3,1,4,u>: Cost 3 vext3 LHS, <1,4,u,5>
+  2689836143U,  // <3,1,5,0>: Cost 3 vext3 LHS, <1,5,0,1>
+  2564187280U,  // <3,1,5,1>: Cost 3 vext1 <2,3,1,5>, <1,5,3,7>
+  2564187827U,  // <3,1,5,2>: Cost 3 vext1 <2,3,1,5>, <2,3,1,5>
+  1611891856U,  // <3,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+  2689836183U,  // <3,1,5,4>: Cost 3 vext3 LHS, <1,5,4,5>
+  3759375522U,  // <3,1,5,5>: Cost 4 vext3 LHS, <1,5,5,7>
+  3720417378U,  // <3,1,5,6>: Cost 4 vext2 <4,u,3,1>, <5,6,7,0>
+  2832518454U,  // <3,1,5,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+  1611891901U,  // <3,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+  3763578048U,  // <3,1,6,0>: Cost 4 vext3 LHS, <1,6,0,1>
+  2689836239U,  // <3,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+  2732967128U,  // <3,1,6,2>: Cost 3 vext3 LHS, <1,6,2,7>
+  2685633761U,  // <3,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+  3763578088U,  // <3,1,6,4>: Cost 4 vext3 LHS, <1,6,4,5>
+  2689836275U,  // <3,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+  3763578108U,  // <3,1,6,6>: Cost 4 vext3 LHS, <1,6,6,7>
+  2732967166U,  // <3,1,6,7>: Cost 3 vext3 LHS, <1,6,7,0>
+  2685633806U,  // <3,1,6,u>: Cost 3 vext3 LHS, <1,6,u,7>
+  3631972454U,  // <3,1,7,0>: Cost 4 vext1 <1,3,1,7>, LHS
+  2659947612U,  // <3,1,7,1>: Cost 3 vext2 <7,1,3,1>, <7,1,3,1>
+  4036102294U,  // <3,1,7,2>: Cost 4 vzipr <1,5,3,7>, <3,0,1,2>
+  3095396454U,  // <3,1,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+  3631975734U,  // <3,1,7,4>: Cost 4 vext1 <1,3,1,7>, RHS
+  2222982144U,  // <3,1,7,5>: Cost 3 vrev <1,3,5,7>
+  3296797705U,  // <3,1,7,6>: Cost 4 vrev <1,3,6,7>
+  3720418924U,  // <3,1,7,7>: Cost 4 vext2 <4,u,3,1>, <7,7,7,7>
+  3095396459U,  // <3,1,7,u>: Cost 3 vtrnr <1,3,5,7>, LHS
+  1484496998U,  // <3,1,u,0>: Cost 2 vext1 <1,3,1,u>, LHS
+  1611892077U,  // <3,1,u,1>: Cost 2 vext3 LHS, <1,u,1,3>
+  2685633907U,  // <3,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+  1611892092U,  // <3,1,u,3>: Cost 2 vext3 LHS, <1,u,3,0>
+  1484500278U,  // <3,1,u,4>: Cost 2 vext1 <1,3,1,u>, RHS
+  1611892117U,  // <3,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+  2685633950U,  // <3,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+  2832518697U,  // <3,1,u,7>: Cost 3 vuzpr <2,3,0,1>, RHS
+  1611892140U,  // <3,1,u,u>: Cost 2 vext3 LHS, <1,u,u,3>
+  2623455232U,  // <3,2,0,0>: Cost 3 vext2 <1,0,3,2>, <0,0,0,0>
+  1549713510U,  // <3,2,0,1>: Cost 2 vext2 <1,0,3,2>, LHS
+  2689836484U,  // <3,2,0,2>: Cost 3 vext3 LHS, <2,0,2,0>
+  2685633997U,  // <3,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+  2623455570U,  // <3,2,0,4>: Cost 3 vext2 <1,0,3,2>, <0,4,1,5>
+  2732967398U,  // <3,2,0,5>: Cost 3 vext3 LHS, <2,0,5,7>
+  2689836524U,  // <3,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+  2229044964U,  // <3,2,0,7>: Cost 3 vrev <2,3,7,0>
+  1549714077U,  // <3,2,0,u>: Cost 2 vext2 <1,0,3,2>, LHS
+  1549714166U,  // <3,2,1,0>: Cost 2 vext2 <1,0,3,2>, <1,0,3,2>
+  2623456052U,  // <3,2,1,1>: Cost 3 vext2 <1,0,3,2>, <1,1,1,1>
+  2623456150U,  // <3,2,1,2>: Cost 3 vext2 <1,0,3,2>, <1,2,3,0>
+  2685634079U,  // <3,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+  2552286518U,  // <3,2,1,4>: Cost 3 vext1 <0,3,2,1>, RHS
+  2623456400U,  // <3,2,1,5>: Cost 3 vext2 <1,0,3,2>, <1,5,3,7>
+  2689836604U,  // <3,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+  3667834101U,  // <3,2,1,7>: Cost 4 vext1 <7,3,2,1>, <7,3,2,1>
+  1155385070U,  // <3,2,1,u>: Cost 2 vrev <2,3,u,1>
+  2689836629U,  // <3,2,2,0>: Cost 3 vext3 LHS, <2,2,0,1>
+  2689836640U,  // <3,2,2,1>: Cost 3 vext3 LHS, <2,2,1,3>
+  1611449960U,  // <3,2,2,2>: Cost 2 vext3 LHS, <2,2,2,2>
+  1611892338U,  // <3,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+  2689836669U,  // <3,2,2,4>: Cost 3 vext3 LHS, <2,2,4,5>
+  2689836680U,  // <3,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+  2689836688U,  // <3,2,2,6>: Cost 3 vext3 LHS, <2,2,6,6>
+  3763578518U,  // <3,2,2,7>: Cost 4 vext3 LHS, <2,2,7,3>
+  1611892383U,  // <3,2,2,u>: Cost 2 vext3 LHS, <2,2,u,3>
+  1611450022U,  // <3,2,3,0>: Cost 2 vext3 LHS, <2,3,0,1>
+  2685191854U,  // <3,2,3,1>: Cost 3 vext3 LHS, <2,3,1,0>
+  2685191865U,  // <3,2,3,2>: Cost 3 vext3 LHS, <2,3,2,2>
+  2685191875U,  // <3,2,3,3>: Cost 3 vext3 LHS, <2,3,3,3>
+  1611450062U,  // <3,2,3,4>: Cost 2 vext3 LHS, <2,3,4,5>
+  2732967635U,  // <3,2,3,5>: Cost 3 vext3 LHS, <2,3,5,1>
+  2732967645U,  // <3,2,3,6>: Cost 3 vext3 LHS, <2,3,6,2>
+  2732967652U,  // <3,2,3,7>: Cost 3 vext3 LHS, <2,3,7,0>
+  1611450094U,  // <3,2,3,u>: Cost 2 vext3 LHS, <2,3,u,1>
+  2558279782U,  // <3,2,4,0>: Cost 3 vext1 <1,3,2,4>, LHS
+  2558280602U,  // <3,2,4,1>: Cost 3 vext1 <1,3,2,4>, <1,2,3,4>
+  2732967692U,  // <3,2,4,2>: Cost 3 vext3 LHS, <2,4,2,4>
+  2685634326U,  // <3,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+  2558283062U,  // <3,2,4,4>: Cost 3 vext1 <1,3,2,4>, RHS
+  1549716790U,  // <3,2,4,5>: Cost 2 vext2 <1,0,3,2>, RHS
+  2689836844U,  // <3,2,4,6>: Cost 3 vext3 LHS, <2,4,6,0>
+  2229077736U,  // <3,2,4,7>: Cost 3 vrev <2,3,7,4>
+  1549717033U,  // <3,2,4,u>: Cost 2 vext2 <1,0,3,2>, RHS
+  2552316006U,  // <3,2,5,0>: Cost 3 vext1 <0,3,2,5>, LHS
+  2228643507U,  // <3,2,5,1>: Cost 3 vrev <2,3,1,5>
+  2689836896U,  // <3,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+  2685634408U,  // <3,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+  1155122894U,  // <3,2,5,4>: Cost 2 vrev <2,3,4,5>
+  2665263108U,  // <3,2,5,5>: Cost 3 vext2 <u,0,3,2>, <5,5,5,5>
+  2689836932U,  // <3,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+  2665263272U,  // <3,2,5,7>: Cost 3 vext2 <u,0,3,2>, <5,7,5,7>
+  1155417842U,  // <3,2,5,u>: Cost 2 vrev <2,3,u,5>
+  2689836953U,  // <3,2,6,0>: Cost 3 vext3 LHS, <2,6,0,1>
+  2689836964U,  // <3,2,6,1>: Cost 3 vext3 LHS, <2,6,1,3>
+  2689836976U,  // <3,2,6,2>: Cost 3 vext3 LHS, <2,6,2,6>
+  1611892666U,  // <3,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+  2689836993U,  // <3,2,6,4>: Cost 3 vext3 LHS, <2,6,4,5>
+  2689837004U,  // <3,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+  2689837013U,  // <3,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+  2665263950U,  // <3,2,6,7>: Cost 3 vext2 <u,0,3,2>, <6,7,0,1>
+  1611892711U,  // <3,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+  2665264122U,  // <3,2,7,0>: Cost 3 vext2 <u,0,3,2>, <7,0,1,2>
+  2623460419U,  // <3,2,7,1>: Cost 3 vext2 <1,0,3,2>, <7,1,0,3>
+  4169138340U,  // <3,2,7,2>: Cost 4 vtrnr <1,3,5,7>, <0,2,0,2>
+  2962358374U,  // <3,2,7,3>: Cost 3 vzipr <1,5,3,7>, LHS
+  2665264486U,  // <3,2,7,4>: Cost 3 vext2 <u,0,3,2>, <7,4,5,6>
+  2228954841U,  // <3,2,7,5>: Cost 3 vrev <2,3,5,7>
+  2229028578U,  // <3,2,7,6>: Cost 3 vrev <2,3,6,7>
+  2665264748U,  // <3,2,7,7>: Cost 3 vext2 <u,0,3,2>, <7,7,7,7>
+  2962358379U,  // <3,2,7,u>: Cost 3 vzipr <1,5,3,7>, LHS
+  1611892795U,  // <3,2,u,0>: Cost 2 vext3 LHS, <2,u,0,1>
+  1549719342U,  // <3,2,u,1>: Cost 2 vext2 <1,0,3,2>, LHS
+  1611449960U,  // <3,2,u,2>: Cost 2 vext3 LHS, <2,2,2,2>
+  1611892824U,  // <3,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+  1611892835U,  // <3,2,u,4>: Cost 2 vext3 LHS, <2,u,4,5>
+  1549719706U,  // <3,2,u,5>: Cost 2 vext2 <1,0,3,2>, RHS
+  2689837168U,  // <3,2,u,6>: Cost 3 vext3 LHS, <2,u,6,0>
+  2665265408U,  // <3,2,u,7>: Cost 3 vext2 <u,0,3,2>, <u,7,0,1>
+  1611892867U,  // <3,2,u,u>: Cost 2 vext3 LHS, <2,u,u,1>
+  2685192331U,  // <3,3,0,0>: Cost 3 vext3 LHS, <3,0,0,0>
+  1611450518U,  // <3,3,0,1>: Cost 2 vext3 LHS, <3,0,1,2>
+  2685634717U,  // <3,3,0,2>: Cost 3 vext3 LHS, <3,0,2,0>
+  2564294806U,  // <3,3,0,3>: Cost 3 vext1 <2,3,3,0>, <3,0,1,2>
+  2685634736U,  // <3,3,0,4>: Cost 3 vext3 LHS, <3,0,4,1>
+  2732968122U,  // <3,3,0,5>: Cost 3 vext3 LHS, <3,0,5,2>
+  3763579075U,  // <3,3,0,6>: Cost 4 vext3 LHS, <3,0,6,2>
+  4034053264U,  // <3,3,0,7>: Cost 4 vzipr <1,2,3,0>, <1,5,3,7>
+  1611450581U,  // <3,3,0,u>: Cost 2 vext3 LHS, <3,0,u,2>
+  2685192415U,  // <3,3,1,0>: Cost 3 vext3 LHS, <3,1,0,3>
+  1550385992U,  // <3,3,1,1>: Cost 2 vext2 <1,1,3,3>, <1,1,3,3>
+  2685192433U,  // <3,3,1,2>: Cost 3 vext3 LHS, <3,1,2,3>
+  2685634808U,  // <3,3,1,3>: Cost 3 vext3 LHS, <3,1,3,1>
+  2558332214U,  // <3,3,1,4>: Cost 3 vext1 <1,3,3,1>, RHS
+  2685634828U,  // <3,3,1,5>: Cost 3 vext3 LHS, <3,1,5,3>
+  3759376661U,  // <3,3,1,6>: Cost 4 vext3 LHS, <3,1,6,3>
+  2703477022U,  // <3,3,1,7>: Cost 3 vext3 <3,1,7,3>, <3,1,7,3>
+  1555031423U,  // <3,3,1,u>: Cost 2 vext2 <1,u,3,3>, <1,u,3,3>
+  2564309094U,  // <3,3,2,0>: Cost 3 vext1 <2,3,3,2>, LHS
+  2630100513U,  // <3,3,2,1>: Cost 3 vext2 <2,1,3,3>, <2,1,3,3>
+  1557022322U,  // <3,3,2,2>: Cost 2 vext2 <2,2,3,3>, <2,2,3,3>
+  2685192520U,  // <3,3,2,3>: Cost 3 vext3 LHS, <3,2,3,0>
+  2564312374U,  // <3,3,2,4>: Cost 3 vext1 <2,3,3,2>, RHS
+  2732968286U,  // <3,3,2,5>: Cost 3 vext3 LHS, <3,2,5,4>
+  2685634918U,  // <3,3,2,6>: Cost 3 vext3 LHS, <3,2,6,3>
+  2704140655U,  // <3,3,2,7>: Cost 3 vext3 <3,2,7,3>, <3,2,7,3>
+  1561004120U,  // <3,3,2,u>: Cost 2 vext2 <2,u,3,3>, <2,u,3,3>
+  1496547430U,  // <3,3,3,0>: Cost 2 vext1 <3,3,3,3>, LHS
+  2624129256U,  // <3,3,3,1>: Cost 3 vext2 <1,1,3,3>, <3,1,1,3>
+  2630764866U,  // <3,3,3,2>: Cost 3 vext2 <2,2,3,3>, <3,2,2,3>
+   336380006U,  // <3,3,3,3>: Cost 1 vdup3 LHS
+  1496550710U,  // <3,3,3,4>: Cost 2 vext1 <3,3,3,3>, RHS
+  2732968368U,  // <3,3,3,5>: Cost 3 vext3 LHS, <3,3,5,5>
+  2624129683U,  // <3,3,3,6>: Cost 3 vext2 <1,1,3,3>, <3,6,3,7>
+  2594182400U,  // <3,3,3,7>: Cost 3 vext1 <7,3,3,3>, <7,3,3,3>
+   336380006U,  // <3,3,3,u>: Cost 1 vdup3 LHS
+  2558353510U,  // <3,3,4,0>: Cost 3 vext1 <1,3,3,4>, LHS
+  2558354411U,  // <3,3,4,1>: Cost 3 vext1 <1,3,3,4>, <1,3,3,4>
+  2564327108U,  // <3,3,4,2>: Cost 3 vext1 <2,3,3,4>, <2,3,3,4>
+  2564327938U,  // <3,3,4,3>: Cost 3 vext1 <2,3,3,4>, <3,4,5,6>
+  2960343962U,  // <3,3,4,4>: Cost 3 vzipr <1,2,3,4>, <1,2,3,4>
+  1611893250U,  // <3,3,4,5>: Cost 2 vext3 LHS, <3,4,5,6>
+  2771619126U,  // <3,3,4,6>: Cost 3 vuzpl <3,3,3,3>, RHS
+  4034086032U,  // <3,3,4,7>: Cost 4 vzipr <1,2,3,4>, <1,5,3,7>
+  1611893277U,  // <3,3,4,u>: Cost 2 vext3 LHS, <3,4,u,6>
+  2558361702U,  // <3,3,5,0>: Cost 3 vext1 <1,3,3,5>, LHS
+  2558362604U,  // <3,3,5,1>: Cost 3 vext1 <1,3,3,5>, <1,3,3,5>
+  2558363342U,  // <3,3,5,2>: Cost 3 vext1 <1,3,3,5>, <2,3,4,5>
+  2732968512U,  // <3,3,5,3>: Cost 3 vext3 LHS, <3,5,3,5>
+  2558364982U,  // <3,3,5,4>: Cost 3 vext1 <1,3,3,5>, RHS
+  3101279950U,  // <3,3,5,5>: Cost 3 vtrnr <2,3,4,5>, <2,3,4,5>
+  2665934946U,  // <3,3,5,6>: Cost 3 vext2 <u,1,3,3>, <5,6,7,0>
+  2826636598U,  // <3,3,5,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+  2826636599U,  // <3,3,5,u>: Cost 3 vuzpr <1,3,1,3>, RHS
+  2732968568U,  // <3,3,6,0>: Cost 3 vext3 LHS, <3,6,0,7>
+  3763579521U,  // <3,3,6,1>: Cost 4 vext3 LHS, <3,6,1,7>
+  2732968586U,  // <3,3,6,2>: Cost 3 vext3 LHS, <3,6,2,7>
+  2732968595U,  // <3,3,6,3>: Cost 3 vext3 LHS, <3,6,3,7>
+  2732968604U,  // <3,3,6,4>: Cost 3 vext3 LHS, <3,6,4,7>
+  3763579557U,  // <3,3,6,5>: Cost 4 vext3 LHS, <3,6,5,7>
+  2732968621U,  // <3,3,6,6>: Cost 3 vext3 LHS, <3,6,6,6>
+  2657973099U,  // <3,3,6,7>: Cost 3 vext2 <6,7,3,3>, <6,7,3,3>
+  2658636732U,  // <3,3,6,u>: Cost 3 vext2 <6,u,3,3>, <6,u,3,3>
+  2558378086U,  // <3,3,7,0>: Cost 3 vext1 <1,3,3,7>, LHS
+  2558378990U,  // <3,3,7,1>: Cost 3 vext1 <1,3,3,7>, <1,3,3,7>
+  2564351687U,  // <3,3,7,2>: Cost 3 vext1 <2,3,3,7>, <2,3,3,7>
+  2661291264U,  // <3,3,7,3>: Cost 3 vext2 <7,3,3,3>, <7,3,3,3>
+  2558381366U,  // <3,3,7,4>: Cost 3 vext1 <1,3,3,7>, RHS
+  2732968694U,  // <3,3,7,5>: Cost 3 vext3 LHS, <3,7,5,7>
+  3781126907U,  // <3,3,7,6>: Cost 4 vext3 <3,7,6,3>, <3,7,6,3>
+  3095397376U,  // <3,3,7,7>: Cost 3 vtrnr <1,3,5,7>, <1,3,5,7>
+  2558383918U,  // <3,3,7,u>: Cost 3 vext1 <1,3,3,7>, LHS
+  1496547430U,  // <3,3,u,0>: Cost 2 vext1 <3,3,3,3>, LHS
+  1611893534U,  // <3,3,u,1>: Cost 2 vext3 LHS, <3,u,1,2>
+  1592858504U,  // <3,3,u,2>: Cost 2 vext2 <u,2,3,3>, <u,2,3,3>
+   336380006U,  // <3,3,u,3>: Cost 1 vdup3 LHS
+  1496550710U,  // <3,3,u,4>: Cost 2 vext1 <3,3,3,3>, RHS
+  1611893574U,  // <3,3,u,5>: Cost 2 vext3 LHS, <3,u,5,6>
+  2690280268U,  // <3,3,u,6>: Cost 3 vext3 LHS, <3,u,6,3>
+  2826636841U,  // <3,3,u,7>: Cost 3 vuzpr <1,3,1,3>, RHS
+   336380006U,  // <3,3,u,u>: Cost 1 vdup3 LHS
+  2624798720U,  // <3,4,0,0>: Cost 3 vext2 <1,2,3,4>, <0,0,0,0>
+  1551056998U,  // <3,4,0,1>: Cost 2 vext2 <1,2,3,4>, LHS
+  2624798884U,  // <3,4,0,2>: Cost 3 vext2 <1,2,3,4>, <0,2,0,2>
+  3693232384U,  // <3,4,0,3>: Cost 4 vext2 <0,3,3,4>, <0,3,1,4>
+  2624799058U,  // <3,4,0,4>: Cost 3 vext2 <1,2,3,4>, <0,4,1,5>
+  1659227026U,  // <3,4,0,5>: Cost 2 vext3 LHS, <4,0,5,1>
+  1659227036U,  // <3,4,0,6>: Cost 2 vext3 LHS, <4,0,6,2>
+  3667973382U,  // <3,4,0,7>: Cost 4 vext1 <7,3,4,0>, <7,3,4,0>
+  1551057565U,  // <3,4,0,u>: Cost 2 vext2 <1,2,3,4>, LHS
+  2624799478U,  // <3,4,1,0>: Cost 3 vext2 <1,2,3,4>, <1,0,3,2>
+  2624799540U,  // <3,4,1,1>: Cost 3 vext2 <1,2,3,4>, <1,1,1,1>
+  1551057818U,  // <3,4,1,2>: Cost 2 vext2 <1,2,3,4>, <1,2,3,4>
+  2624799704U,  // <3,4,1,3>: Cost 3 vext2 <1,2,3,4>, <1,3,1,3>
+  2564377910U,  // <3,4,1,4>: Cost 3 vext1 <2,3,4,1>, RHS
+  2689838050U,  // <3,4,1,5>: Cost 3 vext3 LHS, <4,1,5,0>
+  2689838062U,  // <3,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+  2628117807U,  // <3,4,1,7>: Cost 3 vext2 <1,7,3,4>, <1,7,3,4>
+  1555039616U,  // <3,4,1,u>: Cost 2 vext2 <1,u,3,4>, <1,u,3,4>
+  3626180710U,  // <3,4,2,0>: Cost 4 vext1 <0,3,4,2>, LHS
+  2624800298U,  // <3,4,2,1>: Cost 3 vext2 <1,2,3,4>, <2,1,4,3>
+  2624800360U,  // <3,4,2,2>: Cost 3 vext2 <1,2,3,4>, <2,2,2,2>
+  2624800422U,  // <3,4,2,3>: Cost 3 vext2 <1,2,3,4>, <2,3,0,1>
+  2624800514U,  // <3,4,2,4>: Cost 3 vext2 <1,2,3,4>, <2,4,1,3>
+  2709965878U,  // <3,4,2,5>: Cost 3 vext3 <4,2,5,3>, <4,2,5,3>
+  2689838140U,  // <3,4,2,6>: Cost 3 vext3 LHS, <4,2,6,0>
+  2634090504U,  // <3,4,2,7>: Cost 3 vext2 <2,7,3,4>, <2,7,3,4>
+  2689838158U,  // <3,4,2,u>: Cost 3 vext3 LHS, <4,2,u,0>
+  2624800918U,  // <3,4,3,0>: Cost 3 vext2 <1,2,3,4>, <3,0,1,2>
+  2636081403U,  // <3,4,3,1>: Cost 3 vext2 <3,1,3,4>, <3,1,3,4>
+  2636745036U,  // <3,4,3,2>: Cost 3 vext2 <3,2,3,4>, <3,2,3,4>
+  2624801180U,  // <3,4,3,3>: Cost 3 vext2 <1,2,3,4>, <3,3,3,3>
+  2624801232U,  // <3,4,3,4>: Cost 3 vext2 <1,2,3,4>, <3,4,0,1>
+  2905836854U,  // <3,4,3,5>: Cost 3 vzipl <3,3,3,3>, RHS
+  3040054582U,  // <3,4,3,6>: Cost 3 vtrnl <3,3,3,3>, RHS
+  3702524611U,  // <3,4,3,7>: Cost 4 vext2 <1,u,3,4>, <3,7,0,1>
+  2624801566U,  // <3,4,3,u>: Cost 3 vext2 <1,2,3,4>, <3,u,1,2>
+  2564399206U,  // <3,4,4,0>: Cost 3 vext1 <2,3,4,4>, LHS
+  2564400026U,  // <3,4,4,1>: Cost 3 vext1 <2,3,4,4>, <1,2,3,4>
+  2564400845U,  // <3,4,4,2>: Cost 3 vext1 <2,3,4,4>, <2,3,4,4>
+  2570373542U,  // <3,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+  1659227344U,  // <3,4,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+  1551060278U,  // <3,4,4,5>: Cost 2 vext2 <1,2,3,4>, RHS
+  1659227364U,  // <3,4,4,6>: Cost 2 vext3 LHS, <4,4,6,6>
+  3668006154U,  // <3,4,4,7>: Cost 4 vext1 <7,3,4,4>, <7,3,4,4>
+  1551060521U,  // <3,4,4,u>: Cost 2 vext2 <1,2,3,4>, RHS
+  1490665574U,  // <3,4,5,0>: Cost 2 vext1 <2,3,4,5>, LHS
+  2689838341U,  // <3,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+  1490667214U,  // <3,4,5,2>: Cost 2 vext1 <2,3,4,5>, <2,3,4,5>
+  2564409494U,  // <3,4,5,3>: Cost 3 vext1 <2,3,4,5>, <3,0,1,2>
+  1490668854U,  // <3,4,5,4>: Cost 2 vext1 <2,3,4,5>, RHS
+  2689838381U,  // <3,4,5,5>: Cost 3 vext3 LHS, <4,5,5,7>
+   537709878U,  // <3,4,5,6>: Cost 1 vext3 LHS, RHS
+  2594272523U,  // <3,4,5,7>: Cost 3 vext1 <7,3,4,5>, <7,3,4,5>
+   537709896U,  // <3,4,5,u>: Cost 1 vext3 LHS, RHS
+  2689838411U,  // <3,4,6,0>: Cost 3 vext3 LHS, <4,6,0,1>
+  2558444534U,  // <3,4,6,1>: Cost 3 vext1 <1,3,4,6>, <1,3,4,6>
+  2666607098U,  // <3,4,6,2>: Cost 3 vext2 <u,2,3,4>, <6,2,7,3>
+  2558446082U,  // <3,4,6,3>: Cost 3 vext1 <1,3,4,6>, <3,4,5,6>
+  1659227508U,  // <3,4,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+  2689838462U,  // <3,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+  2689838471U,  // <3,4,6,6>: Cost 3 vext3 LHS, <4,6,6,7>
+  2657981292U,  // <3,4,6,7>: Cost 3 vext2 <6,7,3,4>, <6,7,3,4>
+  1659227540U,  // <3,4,6,u>: Cost 2 vext3 LHS, <4,6,u,2>
+  2666607610U,  // <3,4,7,0>: Cost 3 vext2 <u,2,3,4>, <7,0,1,2>
+  3702527072U,  // <3,4,7,1>: Cost 4 vext2 <1,u,3,4>, <7,1,3,5>
+  2660635824U,  // <3,4,7,2>: Cost 3 vext2 <7,2,3,4>, <7,2,3,4>
+  3644139945U,  // <3,4,7,3>: Cost 4 vext1 <3,3,4,7>, <3,3,4,7>
+  2666607974U,  // <3,4,7,4>: Cost 3 vext2 <u,2,3,4>, <7,4,5,6>
+  2732969416U,  // <3,4,7,5>: Cost 3 vext3 LHS, <4,7,5,0>
+  2732969425U,  // <3,4,7,6>: Cost 3 vext3 LHS, <4,7,6,0>
+  2666608236U,  // <3,4,7,7>: Cost 3 vext2 <u,2,3,4>, <7,7,7,7>
+  2664617622U,  // <3,4,7,u>: Cost 3 vext2 <7,u,3,4>, <7,u,3,4>
+  1490690150U,  // <3,4,u,0>: Cost 2 vext1 <2,3,4,u>, LHS
+  1551062830U,  // <3,4,u,1>: Cost 2 vext2 <1,2,3,4>, LHS
+  1490691793U,  // <3,4,u,2>: Cost 2 vext1 <2,3,4,u>, <2,3,4,u>
+  2624804796U,  // <3,4,u,3>: Cost 3 vext2 <1,2,3,4>, <u,3,0,1>
+  1490693430U,  // <3,4,u,4>: Cost 2 vext1 <2,3,4,u>, RHS
+  1551063194U,  // <3,4,u,5>: Cost 2 vext2 <1,2,3,4>, RHS
+   537710121U,  // <3,4,u,6>: Cost 1 vext3 LHS, RHS
+  2594297102U,  // <3,4,u,7>: Cost 3 vext1 <7,3,4,u>, <7,3,4,u>
+   537710139U,  // <3,4,u,u>: Cost 1 vext3 LHS, RHS
+  3692576768U,  // <3,5,0,0>: Cost 4 vext2 <0,2,3,5>, <0,0,0,0>
+  2618835046U,  // <3,5,0,1>: Cost 3 vext2 <0,2,3,5>, LHS
+  2618835138U,  // <3,5,0,2>: Cost 3 vext2 <0,2,3,5>, <0,2,3,5>
+  3692577024U,  // <3,5,0,3>: Cost 4 vext2 <0,2,3,5>, <0,3,1,4>
+  2689838690U,  // <3,5,0,4>: Cost 3 vext3 LHS, <5,0,4,1>
+  2732969579U,  // <3,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+  2732969588U,  // <3,5,0,6>: Cost 3 vext3 LHS, <5,0,6,1>
+  2246963055U,  // <3,5,0,7>: Cost 3 vrev <5,3,7,0>
+  2618835613U,  // <3,5,0,u>: Cost 3 vext2 <0,2,3,5>, LHS
+  2594308198U,  // <3,5,1,0>: Cost 3 vext1 <7,3,5,1>, LHS
+  3692577588U,  // <3,5,1,1>: Cost 4 vext2 <0,2,3,5>, <1,1,1,1>
+  2624807835U,  // <3,5,1,2>: Cost 3 vext2 <1,2,3,5>, <1,2,3,5>
+  2625471468U,  // <3,5,1,3>: Cost 3 vext2 <1,3,3,5>, <1,3,3,5>
+  2626135101U,  // <3,5,1,4>: Cost 3 vext2 <1,4,3,5>, <1,4,3,5>
+  2594311888U,  // <3,5,1,5>: Cost 3 vext1 <7,3,5,1>, <5,1,7,3>
+  3699877107U,  // <3,5,1,6>: Cost 4 vext2 <1,4,3,5>, <1,6,5,7>
+  1641680592U,  // <3,5,1,7>: Cost 2 vext3 <5,1,7,3>, <5,1,7,3>
+  1641754329U,  // <3,5,1,u>: Cost 2 vext3 <5,1,u,3>, <5,1,u,3>
+  3692578274U,  // <3,5,2,0>: Cost 4 vext2 <0,2,3,5>, <2,0,5,3>
+  2630116899U,  // <3,5,2,1>: Cost 3 vext2 <2,1,3,5>, <2,1,3,5>
+  3692578408U,  // <3,5,2,2>: Cost 4 vext2 <0,2,3,5>, <2,2,2,2>
+  2625472206U,  // <3,5,2,3>: Cost 3 vext2 <1,3,3,5>, <2,3,4,5>
+  2632107798U,  // <3,5,2,4>: Cost 3 vext2 <2,4,3,5>, <2,4,3,5>
+  2715938575U,  // <3,5,2,5>: Cost 3 vext3 <5,2,5,3>, <5,2,5,3>
+  3692578746U,  // <3,5,2,6>: Cost 4 vext2 <0,2,3,5>, <2,6,3,7>
+  2716086049U,  // <3,5,2,7>: Cost 3 vext3 <5,2,7,3>, <5,2,7,3>
+  2634762330U,  // <3,5,2,u>: Cost 3 vext2 <2,u,3,5>, <2,u,3,5>
+  3692578966U,  // <3,5,3,0>: Cost 4 vext2 <0,2,3,5>, <3,0,1,2>
+  2636089596U,  // <3,5,3,1>: Cost 3 vext2 <3,1,3,5>, <3,1,3,5>
+  3699214668U,  // <3,5,3,2>: Cost 4 vext2 <1,3,3,5>, <3,2,3,4>
+  2638080412U,  // <3,5,3,3>: Cost 3 vext2 <3,4,3,5>, <3,3,3,3>
+  2618837506U,  // <3,5,3,4>: Cost 3 vext2 <0,2,3,5>, <3,4,5,6>
+  2832844494U,  // <3,5,3,5>: Cost 3 vuzpr <2,3,4,5>, <2,3,4,5>
+  4033415682U,  // <3,5,3,6>: Cost 4 vzipr <1,1,3,3>, <3,4,5,6>
+  3095072054U,  // <3,5,3,7>: Cost 3 vtrnr <1,3,1,3>, RHS
+  3095072055U,  // <3,5,3,u>: Cost 3 vtrnr <1,3,1,3>, RHS
+  2600304742U,  // <3,5,4,0>: Cost 3 vext1 <u,3,5,4>, LHS
+  3763580815U,  // <3,5,4,1>: Cost 4 vext3 LHS, <5,4,1,5>
+  2564474582U,  // <3,5,4,2>: Cost 3 vext1 <2,3,5,4>, <2,3,5,4>
+  3699879044U,  // <3,5,4,3>: Cost 4 vext2 <1,4,3,5>, <4,3,5,0>
+  2600308022U,  // <3,5,4,4>: Cost 3 vext1 <u,3,5,4>, RHS
+  2618838326U,  // <3,5,4,5>: Cost 3 vext2 <0,2,3,5>, RHS
+  2772454710U,  // <3,5,4,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+  1659228102U,  // <3,5,4,7>: Cost 2 vext3 LHS, <5,4,7,6>
+  1659228111U,  // <3,5,4,u>: Cost 2 vext3 LHS, <5,4,u,6>
+  2570453094U,  // <3,5,5,0>: Cost 3 vext1 <3,3,5,5>, LHS
+  2624810704U,  // <3,5,5,1>: Cost 3 vext2 <1,2,3,5>, <5,1,7,3>
+  2570454734U,  // <3,5,5,2>: Cost 3 vext1 <3,3,5,5>, <2,3,4,5>
+  2570455472U,  // <3,5,5,3>: Cost 3 vext1 <3,3,5,5>, <3,3,5,5>
+  2570456374U,  // <3,5,5,4>: Cost 3 vext1 <3,3,5,5>, RHS
+  1659228164U,  // <3,5,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+  2732969998U,  // <3,5,5,6>: Cost 3 vext3 LHS, <5,5,6,6>
+  1659228184U,  // <3,5,5,7>: Cost 2 vext3 LHS, <5,5,7,7>
+  1659228193U,  // <3,5,5,u>: Cost 2 vext3 LHS, <5,5,u,7>
+  2732970020U,  // <3,5,6,0>: Cost 3 vext3 LHS, <5,6,0,1>
+  2732970035U,  // <3,5,6,1>: Cost 3 vext3 LHS, <5,6,1,7>
+  2564490968U,  // <3,5,6,2>: Cost 3 vext1 <2,3,5,6>, <2,3,5,6>
+  2732970050U,  // <3,5,6,3>: Cost 3 vext3 LHS, <5,6,3,4>
+  2732970060U,  // <3,5,6,4>: Cost 3 vext3 LHS, <5,6,4,5>
+  2732970071U,  // <3,5,6,5>: Cost 3 vext3 LHS, <5,6,5,7>
+  2732970080U,  // <3,5,6,6>: Cost 3 vext3 LHS, <5,6,6,7>
+  1659228258U,  // <3,5,6,7>: Cost 2 vext3 LHS, <5,6,7,0>
+  1659228267U,  // <3,5,6,u>: Cost 2 vext3 LHS, <5,6,u,0>
+  1484783718U,  // <3,5,7,0>: Cost 2 vext1 <1,3,5,7>, LHS
+  1484784640U,  // <3,5,7,1>: Cost 2 vext1 <1,3,5,7>, <1,3,5,7>
+  2558527080U,  // <3,5,7,2>: Cost 3 vext1 <1,3,5,7>, <2,2,2,2>
+  2558527638U,  // <3,5,7,3>: Cost 3 vext1 <1,3,5,7>, <3,0,1,2>
+  1484786998U,  // <3,5,7,4>: Cost 2 vext1 <1,3,5,7>, RHS
+  1659228328U,  // <3,5,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+  2732970154U,  // <3,5,7,6>: Cost 3 vext3 LHS, <5,7,6,0>
+  2558531180U,  // <3,5,7,7>: Cost 3 vext1 <1,3,5,7>, <7,7,7,7>
+  1484789550U,  // <3,5,7,u>: Cost 2 vext1 <1,3,5,7>, LHS
+  1484791910U,  // <3,5,u,0>: Cost 2 vext1 <1,3,5,u>, LHS
+  1484792833U,  // <3,5,u,1>: Cost 2 vext1 <1,3,5,u>, <1,3,5,u>
+  2558535272U,  // <3,5,u,2>: Cost 3 vext1 <1,3,5,u>, <2,2,2,2>
+  2558535830U,  // <3,5,u,3>: Cost 3 vext1 <1,3,5,u>, <3,0,1,2>
+  1484795190U,  // <3,5,u,4>: Cost 2 vext1 <1,3,5,u>, RHS
+  1659228409U,  // <3,5,u,5>: Cost 2 vext3 LHS, <5,u,5,7>
+  2772457626U,  // <3,5,u,6>: Cost 3 vuzpl <3,4,5,6>, RHS
+  1646326023U,  // <3,5,u,7>: Cost 2 vext3 <5,u,7,3>, <5,u,7,3>
+  1484797742U,  // <3,5,u,u>: Cost 2 vext1 <1,3,5,u>, LHS
+  2558541926U,  // <3,6,0,0>: Cost 3 vext1 <1,3,6,0>, LHS
+  2689839393U,  // <3,6,0,1>: Cost 3 vext3 LHS, <6,0,1,2>
+  2689839404U,  // <3,6,0,2>: Cost 3 vext3 LHS, <6,0,2,4>
+  3706519808U,  // <3,6,0,3>: Cost 4 vext2 <2,5,3,6>, <0,3,1,4>
+  2689839420U,  // <3,6,0,4>: Cost 3 vext3 LHS, <6,0,4,2>
+  2732970314U,  // <3,6,0,5>: Cost 3 vext3 LHS, <6,0,5,7>
+  2732970316U,  // <3,6,0,6>: Cost 3 vext3 LHS, <6,0,6,0>
+  2960313654U,  // <3,6,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+  2689839456U,  // <3,6,0,u>: Cost 3 vext3 LHS, <6,0,u,2>
+  3763581290U,  // <3,6,1,0>: Cost 4 vext3 LHS, <6,1,0,3>
+  3763581297U,  // <3,6,1,1>: Cost 4 vext3 LHS, <6,1,1,1>
+  2624816028U,  // <3,6,1,2>: Cost 3 vext2 <1,2,3,6>, <1,2,3,6>
+  3763581315U,  // <3,6,1,3>: Cost 4 vext3 LHS, <6,1,3,1>
+  2626143294U,  // <3,6,1,4>: Cost 3 vext2 <1,4,3,6>, <1,4,3,6>
+  3763581335U,  // <3,6,1,5>: Cost 4 vext3 LHS, <6,1,5,3>
+  2721321376U,  // <3,6,1,6>: Cost 3 vext3 <6,1,6,3>, <6,1,6,3>
+  2721395113U,  // <3,6,1,7>: Cost 3 vext3 <6,1,7,3>, <6,1,7,3>
+  2628797826U,  // <3,6,1,u>: Cost 3 vext2 <1,u,3,6>, <1,u,3,6>
+  2594390118U,  // <3,6,2,0>: Cost 3 vext1 <7,3,6,2>, LHS
+  2721616324U,  // <3,6,2,1>: Cost 3 vext3 <6,2,1,3>, <6,2,1,3>
+  2630788725U,  // <3,6,2,2>: Cost 3 vext2 <2,2,3,6>, <2,2,3,6>
+  3763581395U,  // <3,6,2,3>: Cost 4 vext3 LHS, <6,2,3,0>
+  2632115991U,  // <3,6,2,4>: Cost 3 vext2 <2,4,3,6>, <2,4,3,6>
+  2632779624U,  // <3,6,2,5>: Cost 3 vext2 <2,5,3,6>, <2,5,3,6>
+  2594394618U,  // <3,6,2,6>: Cost 3 vext1 <7,3,6,2>, <6,2,7,3>
+  1648316922U,  // <3,6,2,7>: Cost 2 vext3 <6,2,7,3>, <6,2,7,3>
+  1648390659U,  // <3,6,2,u>: Cost 2 vext3 <6,2,u,3>, <6,2,u,3>
+  3693914262U,  // <3,6,3,0>: Cost 4 vext2 <0,4,3,6>, <3,0,1,2>
+  3638281176U,  // <3,6,3,1>: Cost 4 vext1 <2,3,6,3>, <1,3,1,3>
+  3696568678U,  // <3,6,3,2>: Cost 4 vext2 <0,u,3,6>, <3,2,6,3>
+  2638088604U,  // <3,6,3,3>: Cost 3 vext2 <3,4,3,6>, <3,3,3,3>
+  2632780290U,  // <3,6,3,4>: Cost 3 vext2 <2,5,3,6>, <3,4,5,6>
+  3712494145U,  // <3,6,3,5>: Cost 4 vext2 <3,5,3,6>, <3,5,3,6>
+  3698559612U,  // <3,6,3,6>: Cost 4 vext2 <1,2,3,6>, <3,6,1,2>
+  2959674678U,  // <3,6,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+  2959674679U,  // <3,6,3,u>: Cost 3 vzipr <1,1,3,3>, RHS
+  3763581536U,  // <3,6,4,0>: Cost 4 vext3 LHS, <6,4,0,6>
+  2722943590U,  // <3,6,4,1>: Cost 3 vext3 <6,4,1,3>, <6,4,1,3>
+  2732970609U,  // <3,6,4,2>: Cost 3 vext3 LHS, <6,4,2,5>
+  3698560147U,  // <3,6,4,3>: Cost 4 vext2 <1,2,3,6>, <4,3,6,6>
+  2732970628U,  // <3,6,4,4>: Cost 3 vext3 LHS, <6,4,4,6>
+  2689839757U,  // <3,6,4,5>: Cost 3 vext3 LHS, <6,4,5,6>
+  2732970640U,  // <3,6,4,6>: Cost 3 vext3 LHS, <6,4,6,0>
+  2960346422U,  // <3,6,4,7>: Cost 3 vzipr <1,2,3,4>, RHS
+  2689839784U,  // <3,6,4,u>: Cost 3 vext3 LHS, <6,4,u,6>
+  2576498790U,  // <3,6,5,0>: Cost 3 vext1 <4,3,6,5>, LHS
+  3650241270U,  // <3,6,5,1>: Cost 4 vext1 <4,3,6,5>, <1,0,3,2>
+  2732970692U,  // <3,6,5,2>: Cost 3 vext3 LHS, <6,5,2,7>
+  2576501250U,  // <3,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+  2576501906U,  // <3,6,5,4>: Cost 3 vext1 <4,3,6,5>, <4,3,6,5>
+  3650244622U,  // <3,6,5,5>: Cost 4 vext1 <4,3,6,5>, <5,5,6,6>
+  4114633528U,  // <3,6,5,6>: Cost 4 vtrnl <3,4,5,6>, <6,6,6,6>
+  2732970735U,  // <3,6,5,7>: Cost 3 vext3 LHS, <6,5,7,5>
+  2576504622U,  // <3,6,5,u>: Cost 3 vext1 <4,3,6,5>, LHS
+  2732970749U,  // <3,6,6,0>: Cost 3 vext3 LHS, <6,6,0,1>
+  2724270856U,  // <3,6,6,1>: Cost 3 vext3 <6,6,1,3>, <6,6,1,3>
+  2624819706U,  // <3,6,6,2>: Cost 3 vext2 <1,2,3,6>, <6,2,7,3>
+  3656223234U,  // <3,6,6,3>: Cost 4 vext1 <5,3,6,6>, <3,4,5,6>
+  2732970788U,  // <3,6,6,4>: Cost 3 vext3 LHS, <6,6,4,4>
+  2732970800U,  // <3,6,6,5>: Cost 3 vext3 LHS, <6,6,5,7>
+  1659228984U,  // <3,6,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+  1659228994U,  // <3,6,6,7>: Cost 2 vext3 LHS, <6,6,7,7>
+  1659229003U,  // <3,6,6,u>: Cost 2 vext3 LHS, <6,6,u,7>
+  1659229006U,  // <3,6,7,0>: Cost 2 vext3 LHS, <6,7,0,1>
+  2558600201U,  // <3,6,7,1>: Cost 3 vext1 <1,3,6,7>, <1,3,6,7>
+  2558601146U,  // <3,6,7,2>: Cost 3 vext1 <1,3,6,7>, <2,6,3,7>
+  2725081963U,  // <3,6,7,3>: Cost 3 vext3 <6,7,3,3>, <6,7,3,3>
+  1659229046U,  // <3,6,7,4>: Cost 2 vext3 LHS, <6,7,4,5>
+  2715423611U,  // <3,6,7,5>: Cost 3 vext3 <5,1,7,3>, <6,7,5,1>
+  2722059141U,  // <3,6,7,6>: Cost 3 vext3 <6,2,7,3>, <6,7,6,2>
+  2962361654U,  // <3,6,7,7>: Cost 3 vzipr <1,5,3,7>, RHS
+  1659229078U,  // <3,6,7,u>: Cost 2 vext3 LHS, <6,7,u,1>
+  1659229087U,  // <3,6,u,0>: Cost 2 vext3 LHS, <6,u,0,1>
+  2689840041U,  // <3,6,u,1>: Cost 3 vext3 LHS, <6,u,1,2>
+  2558609339U,  // <3,6,u,2>: Cost 3 vext1 <1,3,6,u>, <2,6,3,u>
+  2576525853U,  // <3,6,u,3>: Cost 3 vext1 <4,3,6,u>, <3,4,u,6>
+  1659229127U,  // <3,6,u,4>: Cost 2 vext3 LHS, <6,u,4,5>
+  2689840081U,  // <3,6,u,5>: Cost 3 vext3 LHS, <6,u,5,6>
+  1659228984U,  // <3,6,u,6>: Cost 2 vext3 LHS, <6,6,6,6>
+  1652298720U,  // <3,6,u,7>: Cost 2 vext3 <6,u,7,3>, <6,u,7,3>
+  1659229159U,  // <3,6,u,u>: Cost 2 vext3 LHS, <6,u,u,1>
+  2626813952U,  // <3,7,0,0>: Cost 3 vext2 <1,5,3,7>, <0,0,0,0>
+  1553072230U,  // <3,7,0,1>: Cost 2 vext2 <1,5,3,7>, LHS
+  2626814116U,  // <3,7,0,2>: Cost 3 vext2 <1,5,3,7>, <0,2,0,2>
+  3700556028U,  // <3,7,0,3>: Cost 4 vext2 <1,5,3,7>, <0,3,1,0>
+  2626814290U,  // <3,7,0,4>: Cost 3 vext2 <1,5,3,7>, <0,4,1,5>
+  2582507375U,  // <3,7,0,5>: Cost 3 vext1 <5,3,7,0>, <5,3,7,0>
+  2588480072U,  // <3,7,0,6>: Cost 3 vext1 <6,3,7,0>, <6,3,7,0>
+  2732971055U,  // <3,7,0,7>: Cost 3 vext3 LHS, <7,0,7,1>
+  1553072797U,  // <3,7,0,u>: Cost 2 vext2 <1,5,3,7>, LHS
+  2626814710U,  // <3,7,1,0>: Cost 3 vext2 <1,5,3,7>, <1,0,3,2>
+  2626814772U,  // <3,7,1,1>: Cost 3 vext2 <1,5,3,7>, <1,1,1,1>
+  2626814870U,  // <3,7,1,2>: Cost 3 vext2 <1,5,3,7>, <1,2,3,0>
+  2625487854U,  // <3,7,1,3>: Cost 3 vext2 <1,3,3,7>, <1,3,3,7>
+  2582514998U,  // <3,7,1,4>: Cost 3 vext1 <5,3,7,1>, RHS
+  1553073296U,  // <3,7,1,5>: Cost 2 vext2 <1,5,3,7>, <1,5,3,7>
+  2627478753U,  // <3,7,1,6>: Cost 3 vext2 <1,6,3,7>, <1,6,3,7>
+  2727367810U,  // <3,7,1,7>: Cost 3 vext3 <7,1,7,3>, <7,1,7,3>
+  1555064195U,  // <3,7,1,u>: Cost 2 vext2 <1,u,3,7>, <1,u,3,7>
+  2588491878U,  // <3,7,2,0>: Cost 3 vext1 <6,3,7,2>, LHS
+  3700557318U,  // <3,7,2,1>: Cost 4 vext2 <1,5,3,7>, <2,1,0,3>
+  2626815592U,  // <3,7,2,2>: Cost 3 vext2 <1,5,3,7>, <2,2,2,2>
+  2626815654U,  // <3,7,2,3>: Cost 3 vext2 <1,5,3,7>, <2,3,0,1>
+  2588495158U,  // <3,7,2,4>: Cost 3 vext1 <6,3,7,2>, RHS
+  2632787817U,  // <3,7,2,5>: Cost 3 vext2 <2,5,3,7>, <2,5,3,7>
+  1559709626U,  // <3,7,2,6>: Cost 2 vext2 <2,6,3,7>, <2,6,3,7>
+  2728031443U,  // <3,7,2,7>: Cost 3 vext3 <7,2,7,3>, <7,2,7,3>
+  1561036892U,  // <3,7,2,u>: Cost 2 vext2 <2,u,3,7>, <2,u,3,7>
+  2626816150U,  // <3,7,3,0>: Cost 3 vext2 <1,5,3,7>, <3,0,1,2>
+  2626816268U,  // <3,7,3,1>: Cost 3 vext2 <1,5,3,7>, <3,1,5,3>
+  2633451878U,  // <3,7,3,2>: Cost 3 vext2 <2,6,3,7>, <3,2,6,3>
+  2626816412U,  // <3,7,3,3>: Cost 3 vext2 <1,5,3,7>, <3,3,3,3>
+  2626816514U,  // <3,7,3,4>: Cost 3 vext2 <1,5,3,7>, <3,4,5,6>
+  2638760514U,  // <3,7,3,5>: Cost 3 vext2 <3,5,3,7>, <3,5,3,7>
+  2639424147U,  // <3,7,3,6>: Cost 3 vext2 <3,6,3,7>, <3,6,3,7>
+  2826961920U,  // <3,7,3,7>: Cost 3 vuzpr <1,3,5,7>, <1,3,5,7>
+  2626816798U,  // <3,7,3,u>: Cost 3 vext2 <1,5,3,7>, <3,u,1,2>
+  2582536294U,  // <3,7,4,0>: Cost 3 vext1 <5,3,7,4>, LHS
+  2582537360U,  // <3,7,4,1>: Cost 3 vext1 <5,3,7,4>, <1,5,3,7>
+  2588510138U,  // <3,7,4,2>: Cost 3 vext1 <6,3,7,4>, <2,6,3,7>
+  3700558996U,  // <3,7,4,3>: Cost 4 vext2 <1,5,3,7>, <4,3,6,7>
+  2582539574U,  // <3,7,4,4>: Cost 3 vext1 <5,3,7,4>, RHS
+  1553075510U,  // <3,7,4,5>: Cost 2 vext2 <1,5,3,7>, RHS
+  2588512844U,  // <3,7,4,6>: Cost 3 vext1 <6,3,7,4>, <6,3,7,4>
+  2564625766U,  // <3,7,4,7>: Cost 3 vext1 <2,3,7,4>, <7,4,5,6>
+  1553075753U,  // <3,7,4,u>: Cost 2 vext2 <1,5,3,7>, RHS
+  2732971398U,  // <3,7,5,0>: Cost 3 vext3 LHS, <7,5,0,2>
+  2626817744U,  // <3,7,5,1>: Cost 3 vext2 <1,5,3,7>, <5,1,7,3>
+  3700559649U,  // <3,7,5,2>: Cost 4 vext2 <1,5,3,7>, <5,2,7,3>
+  2626817903U,  // <3,7,5,3>: Cost 3 vext2 <1,5,3,7>, <5,3,7,0>
+  2258728203U,  // <3,7,5,4>: Cost 3 vrev <7,3,4,5>
+  2732971446U,  // <3,7,5,5>: Cost 3 vext3 LHS, <7,5,5,5>
+  2732971457U,  // <3,7,5,6>: Cost 3 vext3 LHS, <7,5,6,7>
+  2826964278U,  // <3,7,5,7>: Cost 3 vuzpr <1,3,5,7>, RHS
+  2826964279U,  // <3,7,5,u>: Cost 3 vuzpr <1,3,5,7>, RHS
+  2732971478U,  // <3,7,6,0>: Cost 3 vext3 LHS, <7,6,0,1>
+  2732971486U,  // <3,7,6,1>: Cost 3 vext3 LHS, <7,6,1,0>
+  2633454074U,  // <3,7,6,2>: Cost 3 vext2 <2,6,3,7>, <6,2,7,3>
+  2633454152U,  // <3,7,6,3>: Cost 3 vext2 <2,6,3,7>, <6,3,7,0>
+  2732971518U,  // <3,7,6,4>: Cost 3 vext3 LHS, <7,6,4,5>
+  2732971526U,  // <3,7,6,5>: Cost 3 vext3 LHS, <7,6,5,4>
+  2732971537U,  // <3,7,6,6>: Cost 3 vext3 LHS, <7,6,6,6>
+  2732971540U,  // <3,7,6,7>: Cost 3 vext3 LHS, <7,6,7,0>
+  2726041124U,  // <3,7,6,u>: Cost 3 vext3 <6,u,7,3>, <7,6,u,7>
+  2570616934U,  // <3,7,7,0>: Cost 3 vext1 <3,3,7,7>, LHS
+  2570617856U,  // <3,7,7,1>: Cost 3 vext1 <3,3,7,7>, <1,3,5,7>
+  2564646635U,  // <3,7,7,2>: Cost 3 vext1 <2,3,7,7>, <2,3,7,7>
+  2570619332U,  // <3,7,7,3>: Cost 3 vext1 <3,3,7,7>, <3,3,7,7>
+  2570620214U,  // <3,7,7,4>: Cost 3 vext1 <3,3,7,7>, RHS
+  2582564726U,  // <3,7,7,5>: Cost 3 vext1 <5,3,7,7>, <5,3,7,7>
+  2588537423U,  // <3,7,7,6>: Cost 3 vext1 <6,3,7,7>, <6,3,7,7>
+  1659229804U,  // <3,7,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+  1659229804U,  // <3,7,7,u>: Cost 2 vext3 LHS, <7,7,7,7>
+  2626819795U,  // <3,7,u,0>: Cost 3 vext2 <1,5,3,7>, <u,0,1,2>
+  1553078062U,  // <3,7,u,1>: Cost 2 vext2 <1,5,3,7>, LHS
+  2626819973U,  // <3,7,u,2>: Cost 3 vext2 <1,5,3,7>, <u,2,3,0>
+  2826961565U,  // <3,7,u,3>: Cost 3 vuzpr <1,3,5,7>, LHS
+  2626820159U,  // <3,7,u,4>: Cost 3 vext2 <1,5,3,7>, <u,4,5,6>
+  1553078426U,  // <3,7,u,5>: Cost 2 vext2 <1,5,3,7>, RHS
+  1595545808U,  // <3,7,u,6>: Cost 2 vext2 <u,6,3,7>, <u,6,3,7>
+  1659229804U,  // <3,7,u,7>: Cost 2 vext3 LHS, <7,7,7,7>
+  1553078629U,  // <3,7,u,u>: Cost 2 vext2 <1,5,3,7>, LHS
+  1611448320U,  // <3,u,0,0>: Cost 2 vext3 LHS, <0,0,0,0>
+  1611896531U,  // <3,u,0,1>: Cost 2 vext3 LHS, <u,0,1,2>
+  1659672284U,  // <3,u,0,2>: Cost 2 vext3 LHS, <u,0,2,2>
+  1616099045U,  // <3,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+  2685638381U,  // <3,u,0,4>: Cost 3 vext3 LHS, <u,0,4,1>
+  1663874806U,  // <3,u,0,5>: Cost 2 vext3 LHS, <u,0,5,1>
+  1663874816U,  // <3,u,0,6>: Cost 2 vext3 LHS, <u,0,6,2>
+  2960313672U,  // <3,u,0,7>: Cost 3 vzipr <1,2,3,0>, RHS
+  1611896594U,  // <3,u,0,u>: Cost 2 vext3 LHS, <u,0,u,2>
+  1549763324U,  // <3,u,1,0>: Cost 2 vext2 <1,0,3,u>, <1,0,3,u>
+  1550426957U,  // <3,u,1,1>: Cost 2 vext2 <1,1,3,u>, <1,1,3,u>
+   537712430U,  // <3,u,1,2>: Cost 1 vext3 LHS, LHS
+  1616541495U,  // <3,u,1,3>: Cost 2 vext3 LHS, <u,1,3,3>
+  1490930998U,  // <3,u,1,4>: Cost 2 vext1 <2,3,u,1>, RHS
+  1553081489U,  // <3,u,1,5>: Cost 2 vext2 <1,5,3,u>, <1,5,3,u>
+  2627486946U,  // <3,u,1,6>: Cost 3 vext2 <1,6,3,u>, <1,6,3,u>
+  1659230043U,  // <3,u,1,7>: Cost 2 vext3 LHS, <u,1,7,3>
+   537712484U,  // <3,u,1,u>: Cost 1 vext3 LHS, LHS
+  1611890852U,  // <3,u,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+  2624833102U,  // <3,u,2,1>: Cost 3 vext2 <1,2,3,u>, <2,1,u,3>
+  1557063287U,  // <3,u,2,2>: Cost 2 vext2 <2,2,3,u>, <2,2,3,u>
+  1616099205U,  // <3,u,2,3>: Cost 2 vext3 LHS, <u,2,3,0>
+  1611890892U,  // <3,u,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+  2689841054U,  // <3,u,2,5>: Cost 3 vext3 LHS, <u,2,5,7>
+  1559717819U,  // <3,u,2,6>: Cost 2 vext2 <2,6,3,u>, <2,6,3,u>
+  1659230124U,  // <3,u,2,7>: Cost 2 vext3 LHS, <u,2,7,3>
+  1616541618U,  // <3,u,2,u>: Cost 2 vext3 LHS, <u,2,u,0>
+  1611896764U,  // <3,u,3,0>: Cost 2 vext3 LHS, <u,3,0,1>
+  1484973079U,  // <3,u,3,1>: Cost 2 vext1 <1,3,u,3>, <1,3,u,3>
+  2685638607U,  // <3,u,3,2>: Cost 3 vext3 LHS, <u,3,2,2>
+   336380006U,  // <3,u,3,3>: Cost 1 vdup3 LHS
+  1611896804U,  // <3,u,3,4>: Cost 2 vext3 LHS, <u,3,4,5>
+  1616541679U,  // <3,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+  2690283512U,  // <3,u,3,6>: Cost 3 vext3 LHS, <u,3,6,7>
+  2959674696U,  // <3,u,3,7>: Cost 3 vzipr <1,1,3,3>, RHS
+   336380006U,  // <3,u,3,u>: Cost 1 vdup3 LHS
+  2558722150U,  // <3,u,4,0>: Cost 3 vext1 <1,3,u,4>, LHS
+  1659672602U,  // <3,u,4,1>: Cost 2 vext3 LHS, <u,4,1,5>
+  1659672612U,  // <3,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+  2689841196U,  // <3,u,4,3>: Cost 3 vext3 LHS, <u,4,3,5>
+  1659227344U,  // <3,u,4,4>: Cost 2 vext3 LHS, <4,4,4,4>
+  1611896895U,  // <3,u,4,5>: Cost 2 vext3 LHS, <u,4,5,6>
+  1663875144U,  // <3,u,4,6>: Cost 2 vext3 LHS, <u,4,6,6>
+  1659230289U,  // <3,u,4,7>: Cost 2 vext3 LHS, <u,4,7,6>
+  1611896922U,  // <3,u,4,u>: Cost 2 vext3 LHS, <u,4,u,6>
+  1490960486U,  // <3,u,5,0>: Cost 2 vext1 <2,3,u,5>, LHS
+  2689841261U,  // <3,u,5,1>: Cost 3 vext3 LHS, <u,5,1,7>
+  1490962162U,  // <3,u,5,2>: Cost 2 vext1 <2,3,u,5>, <2,3,u,5>
+  1616541823U,  // <3,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+  1490963766U,  // <3,u,5,4>: Cost 2 vext1 <2,3,u,5>, RHS
+  1659228164U,  // <3,u,5,5>: Cost 2 vext3 LHS, <5,5,5,5>
+   537712794U,  // <3,u,5,6>: Cost 1 vext3 LHS, RHS
+  1659230371U,  // <3,u,5,7>: Cost 2 vext3 LHS, <u,5,7,7>
+   537712812U,  // <3,u,5,u>: Cost 1 vext3 LHS, RHS
+  2689841327U,  // <3,u,6,0>: Cost 3 vext3 LHS, <u,6,0,1>
+  2558739482U,  // <3,u,6,1>: Cost 3 vext1 <1,3,u,6>, <1,3,u,6>
+  2689841351U,  // <3,u,6,2>: Cost 3 vext3 LHS, <u,6,2,7>
+  1616099536U,  // <3,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+  1659227508U,  // <3,u,6,4>: Cost 2 vext3 LHS, <4,6,4,6>
+  2690283746U,  // <3,u,6,5>: Cost 3 vext3 LHS, <u,6,5,7>
+  1659228984U,  // <3,u,6,6>: Cost 2 vext3 LHS, <6,6,6,6>
+  1659230445U,  // <3,u,6,7>: Cost 2 vext3 LHS, <u,6,7,0>
+  1616099581U,  // <3,u,6,u>: Cost 2 vext3 LHS, <u,6,u,7>
+  1485004902U,  // <3,u,7,0>: Cost 2 vext1 <1,3,u,7>, LHS
+  1485005851U,  // <3,u,7,1>: Cost 2 vext1 <1,3,u,7>, <1,3,u,7>
+  2558748264U,  // <3,u,7,2>: Cost 3 vext1 <1,3,u,7>, <2,2,2,2>
+  3095397021U,  // <3,u,7,3>: Cost 3 vtrnr <1,3,5,7>, LHS
+  1485008182U,  // <3,u,7,4>: Cost 2 vext1 <1,3,u,7>, RHS
+  1659228328U,  // <3,u,7,5>: Cost 2 vext3 LHS, <5,7,5,7>
+  2722060599U,  // <3,u,7,6>: Cost 3 vext3 <6,2,7,3>, <u,7,6,2>
+  1659229804U,  // <3,u,7,7>: Cost 2 vext3 LHS, <7,7,7,7>
+  1485010734U,  // <3,u,7,u>: Cost 2 vext1 <1,3,u,7>, LHS
+  1616099665U,  // <3,u,u,0>: Cost 2 vext3 LHS, <u,u,0,1>
+  1611897179U,  // <3,u,u,1>: Cost 2 vext3 LHS, <u,u,1,2>
+   537712997U,  // <3,u,u,2>: Cost 1 vext3 LHS, LHS
+   336380006U,  // <3,u,u,3>: Cost 1 vdup3 LHS
+  1616099705U,  // <3,u,u,4>: Cost 2 vext3 LHS, <u,u,4,5>
+  1611897219U,  // <3,u,u,5>: Cost 2 vext3 LHS, <u,u,5,6>
+   537713037U,  // <3,u,u,6>: Cost 1 vext3 LHS, RHS
+  1659230607U,  // <3,u,u,7>: Cost 2 vext3 LHS, <u,u,7,0>
+   537713051U,  // <3,u,u,u>: Cost 1 vext3 LHS, LHS
+  2691907584U,  // <4,0,0,0>: Cost 3 vext3 <1,2,3,4>, <0,0,0,0>
+  2691907594U,  // <4,0,0,1>: Cost 3 vext3 <1,2,3,4>, <0,0,1,1>
+  2691907604U,  // <4,0,0,2>: Cost 3 vext3 <1,2,3,4>, <0,0,2,2>
+  3709862144U,  // <4,0,0,3>: Cost 4 vext2 <3,1,4,0>, <0,3,1,4>
+  2684682280U,  // <4,0,0,4>: Cost 3 vext3 <0,0,4,4>, <0,0,4,4>
+  3694600633U,  // <4,0,0,5>: Cost 4 vext2 <0,5,4,0>, <0,5,4,0>
+  3291431290U,  // <4,0,0,6>: Cost 4 vrev <0,4,6,0>
+  3668342067U,  // <4,0,0,7>: Cost 4 vext1 <7,4,0,0>, <7,4,0,0>
+  2691907657U,  // <4,0,0,u>: Cost 3 vext3 <1,2,3,4>, <0,0,u,1>
+  2570715238U,  // <4,0,1,0>: Cost 3 vext1 <3,4,0,1>, LHS
+  2570716058U,  // <4,0,1,1>: Cost 3 vext1 <3,4,0,1>, <1,2,3,4>
+  1618165862U,  // <4,0,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  2570717648U,  // <4,0,1,3>: Cost 3 vext1 <3,4,0,1>, <3,4,0,1>
+  2570718518U,  // <4,0,1,4>: Cost 3 vext1 <3,4,0,1>, RHS
+  2594607206U,  // <4,0,1,5>: Cost 3 vext1 <7,4,0,1>, <5,6,7,4>
+  3662377563U,  // <4,0,1,6>: Cost 4 vext1 <6,4,0,1>, <6,4,0,1>
+  2594608436U,  // <4,0,1,7>: Cost 3 vext1 <7,4,0,1>, <7,4,0,1>
+  1618165916U,  // <4,0,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+  2685714598U,  // <4,0,2,0>: Cost 3 vext3 <0,2,0,4>, <0,2,0,4>
+  3759530159U,  // <4,0,2,1>: Cost 4 vext3 <0,2,1,4>, <0,2,1,4>
+  2685862072U,  // <4,0,2,2>: Cost 3 vext3 <0,2,2,4>, <0,2,2,4>
+  2631476937U,  // <4,0,2,3>: Cost 3 vext2 <2,3,4,0>, <2,3,4,0>
+  2685714636U,  // <4,0,2,4>: Cost 3 vext3 <0,2,0,4>, <0,2,4,6>
+  3765649622U,  // <4,0,2,5>: Cost 4 vext3 <1,2,3,4>, <0,2,5,7>
+  2686157020U,  // <4,0,2,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+  3668358453U,  // <4,0,2,7>: Cost 4 vext1 <7,4,0,2>, <7,4,0,2>
+  2686304494U,  // <4,0,2,u>: Cost 3 vext3 <0,2,u,4>, <0,2,u,4>
+  3632529510U,  // <4,0,3,0>: Cost 4 vext1 <1,4,0,3>, LHS
+  2686451968U,  // <4,0,3,1>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+  2686525705U,  // <4,0,3,2>: Cost 3 vext3 <0,3,2,4>, <0,3,2,4>
+  3760341266U,  // <4,0,3,3>: Cost 4 vext3 <0,3,3,4>, <0,3,3,4>
+  3632532790U,  // <4,0,3,4>: Cost 4 vext1 <1,4,0,3>, RHS
+  3913254606U,  // <4,0,3,5>: Cost 4 vuzpr <3,4,5,0>, <2,3,4,5>
+  3705219740U,  // <4,0,3,6>: Cost 4 vext2 <2,3,4,0>, <3,6,4,7>
+  3713845990U,  // <4,0,3,7>: Cost 4 vext2 <3,7,4,0>, <3,7,4,0>
+  2686451968U,  // <4,0,3,u>: Cost 3 vext3 <0,3,1,4>, <0,3,1,4>
+  2552823910U,  // <4,0,4,0>: Cost 3 vext1 <0,4,0,4>, LHS
+  2691907922U,  // <4,0,4,1>: Cost 3 vext3 <1,2,3,4>, <0,4,1,5>
+  2691907932U,  // <4,0,4,2>: Cost 3 vext3 <1,2,3,4>, <0,4,2,6>
+  3626567830U,  // <4,0,4,3>: Cost 4 vext1 <0,4,0,4>, <3,0,1,2>
+  2552827190U,  // <4,0,4,4>: Cost 3 vext1 <0,4,0,4>, RHS
+  2631478582U,  // <4,0,4,5>: Cost 3 vext2 <2,3,4,0>, RHS
+  3626570017U,  // <4,0,4,6>: Cost 4 vext1 <0,4,0,4>, <6,0,1,2>
+  3668374839U,  // <4,0,4,7>: Cost 4 vext1 <7,4,0,4>, <7,4,0,4>
+  2552829742U,  // <4,0,4,u>: Cost 3 vext1 <0,4,0,4>, LHS
+  2558804070U,  // <4,0,5,0>: Cost 3 vext1 <1,4,0,5>, LHS
+  1839644774U,  // <4,0,5,1>: Cost 2 vzipl RHS, LHS
+  2913386660U,  // <4,0,5,2>: Cost 3 vzipl RHS, <0,2,0,2>
+  2570750420U,  // <4,0,5,3>: Cost 3 vext1 <3,4,0,5>, <3,4,0,5>
+  2558807350U,  // <4,0,5,4>: Cost 3 vext1 <1,4,0,5>, RHS
+  3987128750U,  // <4,0,5,5>: Cost 4 vzipl RHS, <0,5,2,7>
+  3987128822U,  // <4,0,5,6>: Cost 4 vzipl RHS, <0,6,1,7>
+  2594641208U,  // <4,0,5,7>: Cost 3 vext1 <7,4,0,5>, <7,4,0,5>
+  1839645341U,  // <4,0,5,u>: Cost 2 vzipl RHS, LHS
+  2552840294U,  // <4,0,6,0>: Cost 3 vext1 <0,4,0,6>, LHS
+  3047604234U,  // <4,0,6,1>: Cost 3 vtrnl RHS, <0,0,1,1>
+  1973862502U,  // <4,0,6,2>: Cost 2 vtrnl RHS, LHS
+  2570758613U,  // <4,0,6,3>: Cost 3 vext1 <3,4,0,6>, <3,4,0,6>
+  2552843574U,  // <4,0,6,4>: Cost 3 vext1 <0,4,0,6>, RHS
+  2217664887U,  // <4,0,6,5>: Cost 3 vrev <0,4,5,6>
+  3662418528U,  // <4,0,6,6>: Cost 4 vext1 <6,4,0,6>, <6,4,0,6>
+  2658022257U,  // <4,0,6,7>: Cost 3 vext2 <6,7,4,0>, <6,7,4,0>
+  1973862556U,  // <4,0,6,u>: Cost 2 vtrnl RHS, LHS
+  3731764218U,  // <4,0,7,0>: Cost 4 vext2 <6,7,4,0>, <7,0,1,2>
+  3988324454U,  // <4,0,7,1>: Cost 4 vzipl <4,7,5,0>, LHS
+  4122034278U,  // <4,0,7,2>: Cost 4 vtrnl <4,6,7,1>, LHS
+  3735082246U,  // <4,0,7,3>: Cost 4 vext2 <7,3,4,0>, <7,3,4,0>
+  3731764536U,  // <4,0,7,4>: Cost 4 vext2 <6,7,4,0>, <7,4,0,5>
+  3937145718U,  // <4,0,7,5>: Cost 4 vuzpr <7,4,5,0>, <6,7,4,5>
+  3737073145U,  // <4,0,7,6>: Cost 4 vext2 <7,6,4,0>, <7,6,4,0>
+  3731764844U,  // <4,0,7,7>: Cost 4 vext2 <6,7,4,0>, <7,7,7,7>
+  4122034332U,  // <4,0,7,u>: Cost 4 vtrnl <4,6,7,1>, LHS
+  2552856678U,  // <4,0,u,0>: Cost 3 vext1 <0,4,0,u>, LHS
+  1841635430U,  // <4,0,u,1>: Cost 2 vzipl RHS, LHS
+  1618166429U,  // <4,0,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  2570774999U,  // <4,0,u,3>: Cost 3 vext1 <3,4,0,u>, <3,4,0,u>
+  2552859958U,  // <4,0,u,4>: Cost 3 vext1 <0,4,0,u>, RHS
+  2631481498U,  // <4,0,u,5>: Cost 3 vext2 <2,3,4,0>, RHS
+  2686157020U,  // <4,0,u,6>: Cost 3 vext3 <0,2,6,4>, <0,2,6,4>
+  2594665787U,  // <4,0,u,7>: Cost 3 vext1 <7,4,0,u>, <7,4,0,u>
+  1618166483U,  // <4,0,u,u>: Cost 2 vext3 <1,2,3,4>, LHS
+  2617548837U,  // <4,1,0,0>: Cost 3 vext2 <0,0,4,1>, <0,0,4,1>
+  2622857318U,  // <4,1,0,1>: Cost 3 vext2 <0,u,4,1>, LHS
+  3693281484U,  // <4,1,0,2>: Cost 4 vext2 <0,3,4,1>, <0,2,4,6>
+  2691908342U,  // <4,1,0,3>: Cost 3 vext3 <1,2,3,4>, <1,0,3,2>
+  2622857554U,  // <4,1,0,4>: Cost 3 vext2 <0,u,4,1>, <0,4,1,5>
+  3764470538U,  // <4,1,0,5>: Cost 4 vext3 <1,0,5,4>, <1,0,5,4>
+  3695272459U,  // <4,1,0,6>: Cost 4 vext2 <0,6,4,1>, <0,6,4,1>
+  3733094980U,  // <4,1,0,7>: Cost 4 vext2 <7,0,4,1>, <0,7,1,4>
+  2622857885U,  // <4,1,0,u>: Cost 3 vext2 <0,u,4,1>, LHS
+  3696599798U,  // <4,1,1,0>: Cost 4 vext2 <0,u,4,1>, <1,0,3,2>
+  2691097399U,  // <4,1,1,1>: Cost 3 vext3 <1,1,1,4>, <1,1,1,4>
+  2631484314U,  // <4,1,1,2>: Cost 3 vext2 <2,3,4,1>, <1,2,3,4>
+  2691908424U,  // <4,1,1,3>: Cost 3 vext3 <1,2,3,4>, <1,1,3,3>
+  3696600125U,  // <4,1,1,4>: Cost 4 vext2 <0,u,4,1>, <1,4,3,5>
+  3696600175U,  // <4,1,1,5>: Cost 4 vext2 <0,u,4,1>, <1,5,0,1>
+  3696600307U,  // <4,1,1,6>: Cost 4 vext2 <0,u,4,1>, <1,6,5,7>
+  3668423997U,  // <4,1,1,7>: Cost 4 vext1 <7,4,1,1>, <7,4,1,1>
+  2691908469U,  // <4,1,1,u>: Cost 3 vext3 <1,2,3,4>, <1,1,u,3>
+  2570797158U,  // <4,1,2,0>: Cost 3 vext1 <3,4,1,2>, LHS
+  2570797978U,  // <4,1,2,1>: Cost 3 vext1 <3,4,1,2>, <1,2,3,4>
+  3696600680U,  // <4,1,2,2>: Cost 4 vext2 <0,u,4,1>, <2,2,2,2>
+  1618166682U,  // <4,1,2,3>: Cost 2 vext3 <1,2,3,4>, <1,2,3,4>
+  2570800438U,  // <4,1,2,4>: Cost 3 vext1 <3,4,1,2>, RHS
+  3765650347U,  // <4,1,2,5>: Cost 4 vext3 <1,2,3,4>, <1,2,5,3>
+  3696601018U,  // <4,1,2,6>: Cost 4 vext2 <0,u,4,1>, <2,6,3,7>
+  3668432190U,  // <4,1,2,7>: Cost 4 vext1 <7,4,1,2>, <7,4,1,2>
+  1618535367U,  // <4,1,2,u>: Cost 2 vext3 <1,2,u,4>, <1,2,u,4>
+  2564833382U,  // <4,1,3,0>: Cost 3 vext1 <2,4,1,3>, LHS
+  2691908568U,  // <4,1,3,1>: Cost 3 vext3 <1,2,3,4>, <1,3,1,3>
+  2691908578U,  // <4,1,3,2>: Cost 3 vext3 <1,2,3,4>, <1,3,2,4>
+  2692572139U,  // <4,1,3,3>: Cost 3 vext3 <1,3,3,4>, <1,3,3,4>
+  2564836662U,  // <4,1,3,4>: Cost 3 vext1 <2,4,1,3>, RHS
+  2691908608U,  // <4,1,3,5>: Cost 3 vext3 <1,2,3,4>, <1,3,5,7>
+  2588725862U,  // <4,1,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+  3662468090U,  // <4,1,3,7>: Cost 4 vext1 <6,4,1,3>, <7,0,1,2>
+  2691908631U,  // <4,1,3,u>: Cost 3 vext3 <1,2,3,4>, <1,3,u,3>
+  3760194590U,  // <4,1,4,0>: Cost 4 vext3 <0,3,1,4>, <1,4,0,1>
+  3693947874U,  // <4,1,4,1>: Cost 4 vext2 <0,4,4,1>, <4,1,5,0>
+  3765650484U,  // <4,1,4,2>: Cost 4 vext3 <1,2,3,4>, <1,4,2,5>
+  3113877606U,  // <4,1,4,3>: Cost 3 vtrnr <4,4,4,4>, LHS
+  3760194630U,  // <4,1,4,4>: Cost 4 vext3 <0,3,1,4>, <1,4,4,5>
+  2622860598U,  // <4,1,4,5>: Cost 3 vext2 <0,u,4,1>, RHS
+  3297436759U,  // <4,1,4,6>: Cost 4 vrev <1,4,6,4>
+  3800007772U,  // <4,1,4,7>: Cost 4 vext3 <7,0,1,4>, <1,4,7,0>
+  2622860841U,  // <4,1,4,u>: Cost 3 vext2 <0,u,4,1>, RHS
+  1479164006U,  // <4,1,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+  2552906486U,  // <4,1,5,1>: Cost 3 vext1 <0,4,1,5>, <1,0,3,2>
+  2552907299U,  // <4,1,5,2>: Cost 3 vext1 <0,4,1,5>, <2,1,3,5>
+  2552907926U,  // <4,1,5,3>: Cost 3 vext1 <0,4,1,5>, <3,0,1,2>
+  1479167286U,  // <4,1,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+  2913387664U,  // <4,1,5,5>: Cost 3 vzipl RHS, <1,5,3,7>
+  2600686074U,  // <4,1,5,6>: Cost 3 vext1 <u,4,1,5>, <6,2,7,3>
+  2600686586U,  // <4,1,5,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+  1479169838U,  // <4,1,5,u>: Cost 2 vext1 <0,4,1,5>, LHS
+  2552914022U,  // <4,1,6,0>: Cost 3 vext1 <0,4,1,6>, LHS
+  2558886708U,  // <4,1,6,1>: Cost 3 vext1 <1,4,1,6>, <1,1,1,1>
+  4028205206U,  // <4,1,6,2>: Cost 4 vzipr <0,2,4,6>, <3,0,1,2>
+  3089858662U,  // <4,1,6,3>: Cost 3 vtrnr <0,4,2,6>, LHS
+  2552917302U,  // <4,1,6,4>: Cost 3 vext1 <0,4,1,6>, RHS
+  2223637584U,  // <4,1,6,5>: Cost 3 vrev <1,4,5,6>
+  4121347081U,  // <4,1,6,6>: Cost 4 vtrnl RHS, <1,3,6,7>
+  3721155406U,  // <4,1,6,7>: Cost 4 vext2 <5,0,4,1>, <6,7,0,1>
+  2552919854U,  // <4,1,6,u>: Cost 3 vext1 <0,4,1,6>, LHS
+  2659357716U,  // <4,1,7,0>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+  3733763173U,  // <4,1,7,1>: Cost 4 vext2 <7,1,4,1>, <7,1,4,1>
+  3734426806U,  // <4,1,7,2>: Cost 4 vext2 <7,2,4,1>, <7,2,4,1>
+  2695226671U,  // <4,1,7,3>: Cost 3 vext3 <1,7,3,4>, <1,7,3,4>
+  3721155942U,  // <4,1,7,4>: Cost 4 vext2 <5,0,4,1>, <7,4,5,6>
+  3721155976U,  // <4,1,7,5>: Cost 4 vext2 <5,0,4,1>, <7,5,0,4>
+  3662500458U,  // <4,1,7,6>: Cost 4 vext1 <6,4,1,7>, <6,4,1,7>
+  3721156204U,  // <4,1,7,7>: Cost 4 vext2 <5,0,4,1>, <7,7,7,7>
+  2659357716U,  // <4,1,7,u>: Cost 3 vext2 <7,0,4,1>, <7,0,4,1>
+  1479188582U,  // <4,1,u,0>: Cost 2 vext1 <0,4,1,u>, LHS
+  2552931062U,  // <4,1,u,1>: Cost 3 vext1 <0,4,1,u>, <1,0,3,2>
+  2552931944U,  // <4,1,u,2>: Cost 3 vext1 <0,4,1,u>, <2,2,2,2>
+  1622148480U,  // <4,1,u,3>: Cost 2 vext3 <1,u,3,4>, <1,u,3,4>
+  1479191862U,  // <4,1,u,4>: Cost 2 vext1 <0,4,1,u>, RHS
+  2622863514U,  // <4,1,u,5>: Cost 3 vext2 <0,u,4,1>, RHS
+  2588725862U,  // <4,1,u,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+  2600686586U,  // <4,1,u,7>: Cost 3 vext1 <u,4,1,5>, <7,0,1,2>
+  1479194414U,  // <4,1,u,u>: Cost 2 vext1 <0,4,1,u>, LHS
+  2617557030U,  // <4,2,0,0>: Cost 3 vext2 <0,0,4,2>, <0,0,4,2>
+  2622865510U,  // <4,2,0,1>: Cost 3 vext2 <0,u,4,2>, LHS
+  2622865612U,  // <4,2,0,2>: Cost 3 vext2 <0,u,4,2>, <0,2,4,6>
+  3693289753U,  // <4,2,0,3>: Cost 4 vext2 <0,3,4,2>, <0,3,4,2>
+  2635473244U,  // <4,2,0,4>: Cost 3 vext2 <3,0,4,2>, <0,4,2,6>
+  3765650918U,  // <4,2,0,5>: Cost 4 vext3 <1,2,3,4>, <2,0,5,7>
+  2696775148U,  // <4,2,0,6>: Cost 3 vext3 <2,0,6,4>, <2,0,6,4>
+  3695944285U,  // <4,2,0,7>: Cost 4 vext2 <0,7,4,2>, <0,7,4,2>
+  2622866077U,  // <4,2,0,u>: Cost 3 vext2 <0,u,4,2>, LHS
+  3696607990U,  // <4,2,1,0>: Cost 4 vext2 <0,u,4,2>, <1,0,3,2>
+  3696608052U,  // <4,2,1,1>: Cost 4 vext2 <0,u,4,2>, <1,1,1,1>
+  3696608150U,  // <4,2,1,2>: Cost 4 vext2 <0,u,4,2>, <1,2,3,0>
+  3895574630U,  // <4,2,1,3>: Cost 4 vuzpr <0,4,u,2>, LHS
+  2691909162U,  // <4,2,1,4>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+  3696608400U,  // <4,2,1,5>: Cost 4 vext2 <0,u,4,2>, <1,5,3,7>
+  3760784956U,  // <4,2,1,6>: Cost 4 vext3 <0,4,0,4>, <2,1,6,3>
+  3773908549U,  // <4,2,1,7>: Cost 5 vext3 <2,5,7,4>, <2,1,7,3>
+  2691909162U,  // <4,2,1,u>: Cost 3 vext3 <1,2,3,4>, <2,1,4,3>
+  3696608748U,  // <4,2,2,0>: Cost 4 vext2 <0,u,4,2>, <2,0,6,4>
+  3696608828U,  // <4,2,2,1>: Cost 4 vext2 <0,u,4,2>, <2,1,6,3>
+  2691909224U,  // <4,2,2,2>: Cost 3 vext3 <1,2,3,4>, <2,2,2,2>
+  2691909234U,  // <4,2,2,3>: Cost 3 vext3 <1,2,3,4>, <2,2,3,3>
+  3759605368U,  // <4,2,2,4>: Cost 4 vext3 <0,2,2,4>, <2,2,4,0>
+  3696609156U,  // <4,2,2,5>: Cost 4 vext2 <0,u,4,2>, <2,5,6,7>
+  3760785040U,  // <4,2,2,6>: Cost 4 vext3 <0,4,0,4>, <2,2,6,6>
+  3668505927U,  // <4,2,2,7>: Cost 4 vext1 <7,4,2,2>, <7,4,2,2>
+  2691909279U,  // <4,2,2,u>: Cost 3 vext3 <1,2,3,4>, <2,2,u,3>
+  2691909286U,  // <4,2,3,0>: Cost 3 vext3 <1,2,3,4>, <2,3,0,1>
+  3764840111U,  // <4,2,3,1>: Cost 4 vext3 <1,1,1,4>, <2,3,1,1>
+  3765651129U,  // <4,2,3,2>: Cost 4 vext3 <1,2,3,4>, <2,3,2,2>
+  2698544836U,  // <4,2,3,3>: Cost 3 vext3 <2,3,3,4>, <2,3,3,4>
+  2685863630U,  // <4,2,3,4>: Cost 3 vext3 <0,2,2,4>, <2,3,4,5>
+  2698692310U,  // <4,2,3,5>: Cost 3 vext3 <2,3,5,4>, <2,3,5,4>
+  3772507871U,  // <4,2,3,6>: Cost 4 vext3 <2,3,6,4>, <2,3,6,4>
+  2698839784U,  // <4,2,3,7>: Cost 3 vext3 <2,3,7,4>, <2,3,7,4>
+  2691909358U,  // <4,2,3,u>: Cost 3 vext3 <1,2,3,4>, <2,3,u,1>
+  2564915302U,  // <4,2,4,0>: Cost 3 vext1 <2,4,2,4>, LHS
+  2564916122U,  // <4,2,4,1>: Cost 3 vext1 <2,4,2,4>, <1,2,3,4>
+  2564917004U,  // <4,2,4,2>: Cost 3 vext1 <2,4,2,4>, <2,4,2,4>
+  2699208469U,  // <4,2,4,3>: Cost 3 vext3 <2,4,3,4>, <2,4,3,4>
+  2564918582U,  // <4,2,4,4>: Cost 3 vext1 <2,4,2,4>, RHS
+  2622868790U,  // <4,2,4,5>: Cost 3 vext2 <0,u,4,2>, RHS
+  2229667632U,  // <4,2,4,6>: Cost 3 vrev <2,4,6,4>
+  3800082229U,  // <4,2,4,7>: Cost 4 vext3 <7,0,2,4>, <2,4,7,0>
+  2622869033U,  // <4,2,4,u>: Cost 3 vext2 <0,u,4,2>, RHS
+  2552979558U,  // <4,2,5,0>: Cost 3 vext1 <0,4,2,5>, LHS
+  2558952342U,  // <4,2,5,1>: Cost 3 vext1 <1,4,2,5>, <1,2,3,0>
+  2564925032U,  // <4,2,5,2>: Cost 3 vext1 <2,4,2,5>, <2,2,2,2>
+  2967060582U,  // <4,2,5,3>: Cost 3 vzipr <2,3,4,5>, LHS
+  2552982838U,  // <4,2,5,4>: Cost 3 vext1 <0,4,2,5>, RHS
+  3987130190U,  // <4,2,5,5>: Cost 4 vzipl RHS, <2,5,0,7>
+  2913388474U,  // <4,2,5,6>: Cost 3 vzipl RHS, <2,6,3,7>
+  3895577910U,  // <4,2,5,7>: Cost 4 vuzpr <0,4,u,2>, RHS
+  2552985390U,  // <4,2,5,u>: Cost 3 vext1 <0,4,2,5>, LHS
+  1479245926U,  // <4,2,6,0>: Cost 2 vext1 <0,4,2,6>, LHS
+  2552988406U,  // <4,2,6,1>: Cost 3 vext1 <0,4,2,6>, <1,0,3,2>
+  2552989288U,  // <4,2,6,2>: Cost 3 vext1 <0,4,2,6>, <2,2,2,2>
+  2954461286U,  // <4,2,6,3>: Cost 3 vzipr <0,2,4,6>, LHS
+  1479249206U,  // <4,2,6,4>: Cost 2 vext1 <0,4,2,6>, RHS
+  2229610281U,  // <4,2,6,5>: Cost 3 vrev <2,4,5,6>
+  2600767994U,  // <4,2,6,6>: Cost 3 vext1 <u,4,2,6>, <6,2,7,3>
+  2600768506U,  // <4,2,6,7>: Cost 3 vext1 <u,4,2,6>, <7,0,1,2>
+  1479251758U,  // <4,2,6,u>: Cost 2 vext1 <0,4,2,6>, LHS
+  2659365909U,  // <4,2,7,0>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+  3733771366U,  // <4,2,7,1>: Cost 4 vext2 <7,1,4,2>, <7,1,4,2>
+  3734434999U,  // <4,2,7,2>: Cost 4 vext2 <7,2,4,2>, <7,2,4,2>
+  2701199368U,  // <4,2,7,3>: Cost 3 vext3 <2,7,3,4>, <2,7,3,4>
+  4175774618U,  // <4,2,7,4>: Cost 4 vtrnr <2,4,5,7>, <1,2,3,4>
+  3303360298U,  // <4,2,7,5>: Cost 4 vrev <2,4,5,7>
+  3727136217U,  // <4,2,7,6>: Cost 4 vext2 <6,0,4,2>, <7,6,0,4>
+  3727136364U,  // <4,2,7,7>: Cost 4 vext2 <6,0,4,2>, <7,7,7,7>
+  2659365909U,  // <4,2,7,u>: Cost 3 vext2 <7,0,4,2>, <7,0,4,2>
+  1479262310U,  // <4,2,u,0>: Cost 2 vext1 <0,4,2,u>, LHS
+  2553004790U,  // <4,2,u,1>: Cost 3 vext1 <0,4,2,u>, <1,0,3,2>
+  2553005672U,  // <4,2,u,2>: Cost 3 vext1 <0,4,2,u>, <2,2,2,2>
+  2954477670U,  // <4,2,u,3>: Cost 3 vzipr <0,2,4,u>, LHS
+  1479265590U,  // <4,2,u,4>: Cost 2 vext1 <0,4,2,u>, RHS
+  2622871706U,  // <4,2,u,5>: Cost 3 vext2 <0,u,4,2>, RHS
+  2229700404U,  // <4,2,u,6>: Cost 3 vrev <2,4,6,u>
+  2600784890U,  // <4,2,u,7>: Cost 3 vext1 <u,4,2,u>, <7,0,1,2>
+  1479268142U,  // <4,2,u,u>: Cost 2 vext1 <0,4,2,u>, LHS
+  3765651595U,  // <4,3,0,0>: Cost 4 vext3 <1,2,3,4>, <3,0,0,0>
+  2691909782U,  // <4,3,0,1>: Cost 3 vext3 <1,2,3,4>, <3,0,1,2>
+  2702452897U,  // <4,3,0,2>: Cost 3 vext3 <3,0,2,4>, <3,0,2,4>
+  3693297946U,  // <4,3,0,3>: Cost 4 vext2 <0,3,4,3>, <0,3,4,3>
+  3760711856U,  // <4,3,0,4>: Cost 4 vext3 <0,3,u,4>, <3,0,4,1>
+  2235533820U,  // <4,3,0,5>: Cost 3 vrev <3,4,5,0>
+  3309349381U,  // <4,3,0,6>: Cost 4 vrev <3,4,6,0>
+  3668563278U,  // <4,3,0,7>: Cost 4 vext1 <7,4,3,0>, <7,4,3,0>
+  2691909845U,  // <4,3,0,u>: Cost 3 vext3 <1,2,3,4>, <3,0,u,2>
+  2235173328U,  // <4,3,1,0>: Cost 3 vrev <3,4,0,1>
+  3764840678U,  // <4,3,1,1>: Cost 4 vext3 <1,1,1,4>, <3,1,1,1>
+  2630173594U,  // <4,3,1,2>: Cost 3 vext2 <2,1,4,3>, <1,2,3,4>
+  2703190267U,  // <4,3,1,3>: Cost 3 vext3 <3,1,3,4>, <3,1,3,4>
+  3760195840U,  // <4,3,1,4>: Cost 4 vext3 <0,3,1,4>, <3,1,4,0>
+  3765651724U,  // <4,3,1,5>: Cost 4 vext3 <1,2,3,4>, <3,1,5,3>
+  3309357574U,  // <4,3,1,6>: Cost 4 vrev <3,4,6,1>
+  3769633054U,  // <4,3,1,7>: Cost 4 vext3 <1,u,3,4>, <3,1,7,3>
+  2703558952U,  // <4,3,1,u>: Cost 3 vext3 <3,1,u,4>, <3,1,u,4>
+  3626770534U,  // <4,3,2,0>: Cost 4 vext1 <0,4,3,2>, LHS
+  2630174250U,  // <4,3,2,1>: Cost 3 vext2 <2,1,4,3>, <2,1,4,3>
+  3765651777U,  // <4,3,2,2>: Cost 4 vext3 <1,2,3,4>, <3,2,2,2>
+  2703853900U,  // <4,3,2,3>: Cost 3 vext3 <3,2,3,4>, <3,2,3,4>
+  3626773814U,  // <4,3,2,4>: Cost 4 vext1 <0,4,3,2>, RHS
+  2704001374U,  // <4,3,2,5>: Cost 3 vext3 <3,2,5,4>, <3,2,5,4>
+  3765651814U,  // <4,3,2,6>: Cost 4 vext3 <1,2,3,4>, <3,2,6,3>
+  3769633135U,  // <4,3,2,7>: Cost 4 vext3 <1,u,3,4>, <3,2,7,3>
+  2634819681U,  // <4,3,2,u>: Cost 3 vext2 <2,u,4,3>, <2,u,4,3>
+  3765651839U,  // <4,3,3,0>: Cost 4 vext3 <1,2,3,4>, <3,3,0,1>
+  3765651848U,  // <4,3,3,1>: Cost 4 vext3 <1,2,3,4>, <3,3,1,1>
+  3710552404U,  // <4,3,3,2>: Cost 4 vext2 <3,2,4,3>, <3,2,4,3>
+  2691910044U,  // <4,3,3,3>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+  2704591270U,  // <4,3,3,4>: Cost 3 vext3 <3,3,4,4>, <3,3,4,4>
+  3769633202U,  // <4,3,3,5>: Cost 4 vext3 <1,u,3,4>, <3,3,5,7>
+  3703917212U,  // <4,3,3,6>: Cost 4 vext2 <2,1,4,3>, <3,6,4,7>
+  3769633220U,  // <4,3,3,7>: Cost 4 vext3 <1,u,3,4>, <3,3,7,7>
+  2691910044U,  // <4,3,3,u>: Cost 3 vext3 <1,2,3,4>, <3,3,3,3>
+  2691910096U,  // <4,3,4,0>: Cost 3 vext3 <1,2,3,4>, <3,4,0,1>
+  2691910106U,  // <4,3,4,1>: Cost 3 vext3 <1,2,3,4>, <3,4,1,2>
+  2564990741U,  // <4,3,4,2>: Cost 3 vext1 <2,4,3,4>, <2,4,3,4>
+  3765651946U,  // <4,3,4,3>: Cost 4 vext3 <1,2,3,4>, <3,4,3,0>
+  2691910136U,  // <4,3,4,4>: Cost 3 vext3 <1,2,3,4>, <3,4,4,5>
+  2686454274U,  // <4,3,4,5>: Cost 3 vext3 <0,3,1,4>, <3,4,5,6>
+  2235640329U,  // <4,3,4,6>: Cost 3 vrev <3,4,6,4>
+  3801483792U,  // <4,3,4,7>: Cost 4 vext3 <7,2,3,4>, <3,4,7,2>
+  2691910168U,  // <4,3,4,u>: Cost 3 vext3 <1,2,3,4>, <3,4,u,1>
+  2559025254U,  // <4,3,5,0>: Cost 3 vext1 <1,4,3,5>, LHS
+  2559026237U,  // <4,3,5,1>: Cost 3 vext1 <1,4,3,5>, <1,4,3,5>
+  2564998862U,  // <4,3,5,2>: Cost 3 vext1 <2,4,3,5>, <2,3,4,5>
+  2570971548U,  // <4,3,5,3>: Cost 3 vext1 <3,4,3,5>, <3,3,3,3>
+  2559028534U,  // <4,3,5,4>: Cost 3 vext1 <1,4,3,5>, RHS
+  4163519477U,  // <4,3,5,5>: Cost 4 vtrnr <0,4,1,5>, <1,3,4,5>
+  3309390346U,  // <4,3,5,6>: Cost 4 vrev <3,4,6,5>
+  2706139747U,  // <4,3,5,7>: Cost 3 vext3 <3,5,7,4>, <3,5,7,4>
+  2559031086U,  // <4,3,5,u>: Cost 3 vext1 <1,4,3,5>, LHS
+  2559033446U,  // <4,3,6,0>: Cost 3 vext1 <1,4,3,6>, LHS
+  2559034430U,  // <4,3,6,1>: Cost 3 vext1 <1,4,3,6>, <1,4,3,6>
+  2565007127U,  // <4,3,6,2>: Cost 3 vext1 <2,4,3,6>, <2,4,3,6>
+  2570979740U,  // <4,3,6,3>: Cost 3 vext1 <3,4,3,6>, <3,3,3,3>
+  2559036726U,  // <4,3,6,4>: Cost 3 vext1 <1,4,3,6>, RHS
+  1161841154U,  // <4,3,6,5>: Cost 2 vrev <3,4,5,6>
+  4028203932U,  // <4,3,6,6>: Cost 4 vzipr <0,2,4,6>, <1,2,3,6>
+  2706803380U,  // <4,3,6,7>: Cost 3 vext3 <3,6,7,4>, <3,6,7,4>
+  1162062365U,  // <4,3,6,u>: Cost 2 vrev <3,4,u,6>
+  3769633475U,  // <4,3,7,0>: Cost 4 vext3 <1,u,3,4>, <3,7,0,1>
+  3769633488U,  // <4,3,7,1>: Cost 4 vext3 <1,u,3,4>, <3,7,1,5>
+  3638757144U,  // <4,3,7,2>: Cost 4 vext1 <2,4,3,7>, <2,4,3,7>
+  3769633508U,  // <4,3,7,3>: Cost 4 vext3 <1,u,3,4>, <3,7,3,7>
+  3769633515U,  // <4,3,7,4>: Cost 4 vext3 <1,u,3,4>, <3,7,4,5>
+  3769633526U,  // <4,3,7,5>: Cost 4 vext3 <1,u,3,4>, <3,7,5,7>
+  3662647932U,  // <4,3,7,6>: Cost 4 vext1 <6,4,3,7>, <6,4,3,7>
+  3781208837U,  // <4,3,7,7>: Cost 4 vext3 <3,7,7,4>, <3,7,7,4>
+  3769633547U,  // <4,3,7,u>: Cost 4 vext3 <1,u,3,4>, <3,7,u,1>
+  2559049830U,  // <4,3,u,0>: Cost 3 vext1 <1,4,3,u>, LHS
+  2691910430U,  // <4,3,u,1>: Cost 3 vext3 <1,2,3,4>, <3,u,1,2>
+  2565023513U,  // <4,3,u,2>: Cost 3 vext1 <2,4,3,u>, <2,4,3,u>
+  2707835698U,  // <4,3,u,3>: Cost 3 vext3 <3,u,3,4>, <3,u,3,4>
+  2559053110U,  // <4,3,u,4>: Cost 3 vext1 <1,4,3,u>, RHS
+  1161857540U,  // <4,3,u,5>: Cost 2 vrev <3,4,5,u>
+  2235673101U,  // <4,3,u,6>: Cost 3 vrev <3,4,6,u>
+  2708130646U,  // <4,3,u,7>: Cost 3 vext3 <3,u,7,4>, <3,u,7,4>
+  1162078751U,  // <4,3,u,u>: Cost 2 vrev <3,4,u,u>
+  2617573416U,  // <4,4,0,0>: Cost 3 vext2 <0,0,4,4>, <0,0,4,4>
+  1570373734U,  // <4,4,0,1>: Cost 2 vext2 <4,4,4,4>, LHS
+  2779676774U,  // <4,4,0,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+  3760196480U,  // <4,4,0,3>: Cost 4 vext3 <0,3,1,4>, <4,0,3,1>
+  2576977100U,  // <4,4,0,4>: Cost 3 vext1 <4,4,4,0>, <4,4,4,0>
+  2718747538U,  // <4,4,0,5>: Cost 3 vext3 <5,6,7,4>, <4,0,5,1>
+  2718747548U,  // <4,4,0,6>: Cost 3 vext3 <5,6,7,4>, <4,0,6,2>
+  3668637015U,  // <4,4,0,7>: Cost 4 vext1 <7,4,4,0>, <7,4,4,0>
+  1570374301U,  // <4,4,0,u>: Cost 2 vext2 <4,4,4,4>, LHS
+  2644116214U,  // <4,4,1,0>: Cost 3 vext2 <4,4,4,4>, <1,0,3,2>
+  2644116276U,  // <4,4,1,1>: Cost 3 vext2 <4,4,4,4>, <1,1,1,1>
+  2691910602U,  // <4,4,1,2>: Cost 3 vext3 <1,2,3,4>, <4,1,2,3>
+  2644116440U,  // <4,4,1,3>: Cost 3 vext2 <4,4,4,4>, <1,3,1,3>
+  2711227356U,  // <4,4,1,4>: Cost 3 vext3 <4,4,4,4>, <4,1,4,3>
+  2709310438U,  // <4,4,1,5>: Cost 3 vext3 <4,1,5,4>, <4,1,5,4>
+  3765652462U,  // <4,4,1,6>: Cost 4 vext3 <1,2,3,4>, <4,1,6,3>
+  3768970231U,  // <4,4,1,7>: Cost 4 vext3 <1,7,3,4>, <4,1,7,3>
+  2695891968U,  // <4,4,1,u>: Cost 3 vext3 <1,u,3,4>, <4,1,u,3>
+  3703260634U,  // <4,4,2,0>: Cost 4 vext2 <2,0,4,4>, <2,0,4,4>
+  3765652499U,  // <4,4,2,1>: Cost 4 vext3 <1,2,3,4>, <4,2,1,4>
+  2644117096U,  // <4,4,2,2>: Cost 3 vext2 <4,4,4,4>, <2,2,2,2>
+  2631509709U,  // <4,4,2,3>: Cost 3 vext2 <2,3,4,4>, <2,3,4,4>
+  2644117269U,  // <4,4,2,4>: Cost 3 vext2 <4,4,4,4>, <2,4,3,4>
+  3705251698U,  // <4,4,2,5>: Cost 4 vext2 <2,3,4,4>, <2,5,4,7>
+  2710047808U,  // <4,4,2,6>: Cost 3 vext3 <4,2,6,4>, <4,2,6,4>
+  3783863369U,  // <4,4,2,7>: Cost 4 vext3 <4,2,7,4>, <4,2,7,4>
+  2634827874U,  // <4,4,2,u>: Cost 3 vext2 <2,u,4,4>, <2,u,4,4>
+  2644117654U,  // <4,4,3,0>: Cost 3 vext2 <4,4,4,4>, <3,0,1,2>
+  3638797210U,  // <4,4,3,1>: Cost 4 vext1 <2,4,4,3>, <1,2,3,4>
+  3638798082U,  // <4,4,3,2>: Cost 4 vext1 <2,4,4,3>, <2,4,1,3>
+  2637482406U,  // <4,4,3,3>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+  2638146039U,  // <4,4,3,4>: Cost 3 vext2 <3,4,4,4>, <3,4,4,4>
+  3913287374U,  // <4,4,3,5>: Cost 4 vuzpr <3,4,5,4>, <2,3,4,5>
+  3765652625U,  // <4,4,3,6>: Cost 4 vext3 <1,2,3,4>, <4,3,6,4>
+  3713878762U,  // <4,4,3,7>: Cost 4 vext2 <3,7,4,4>, <3,7,4,4>
+  2637482406U,  // <4,4,3,u>: Cost 3 vext2 <3,3,4,4>, <3,3,4,4>
+  1503264870U,  // <4,4,4,0>: Cost 2 vext1 <4,4,4,4>, LHS
+  2577007514U,  // <4,4,4,1>: Cost 3 vext1 <4,4,4,4>, <1,2,3,4>
+  2577008232U,  // <4,4,4,2>: Cost 3 vext1 <4,4,4,4>, <2,2,2,2>
+  2571037175U,  // <4,4,4,3>: Cost 3 vext1 <3,4,4,4>, <3,4,4,4>
+   161926454U,  // <4,4,4,4>: Cost 1 vdup0 RHS
+  1570377014U,  // <4,4,4,5>: Cost 2 vext2 <4,4,4,4>, RHS
+  2779680054U,  // <4,4,4,6>: Cost 3 vuzpl <4,6,4,6>, RHS
+  2594927963U,  // <4,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+   161926454U,  // <4,4,4,u>: Cost 1 vdup0 RHS
+  2571042918U,  // <4,4,5,0>: Cost 3 vext1 <3,4,4,5>, LHS
+  2571043738U,  // <4,4,5,1>: Cost 3 vext1 <3,4,4,5>, <1,2,3,4>
+  3638814495U,  // <4,4,5,2>: Cost 4 vext1 <2,4,4,5>, <2,4,4,5>
+  2571045368U,  // <4,4,5,3>: Cost 3 vext1 <3,4,4,5>, <3,4,4,5>
+  2571046198U,  // <4,4,5,4>: Cost 3 vext1 <3,4,4,5>, RHS
+  1839648054U,  // <4,4,5,5>: Cost 2 vzipl RHS, RHS
+  1618169142U,  // <4,4,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  2594936156U,  // <4,4,5,7>: Cost 3 vext1 <7,4,4,5>, <7,4,4,5>
+  1618169160U,  // <4,4,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+  2553135206U,  // <4,4,6,0>: Cost 3 vext1 <0,4,4,6>, LHS
+  3626877686U,  // <4,4,6,1>: Cost 4 vext1 <0,4,4,6>, <1,0,3,2>
+  2565080782U,  // <4,4,6,2>: Cost 3 vext1 <2,4,4,6>, <2,3,4,5>
+  2571053561U,  // <4,4,6,3>: Cost 3 vext1 <3,4,4,6>, <3,4,4,6>
+  2553138486U,  // <4,4,6,4>: Cost 3 vext1 <0,4,4,6>, RHS
+  2241555675U,  // <4,4,6,5>: Cost 3 vrev <4,4,5,6>
+  1973865782U,  // <4,4,6,6>: Cost 2 vtrnl RHS, RHS
+  2658055029U,  // <4,4,6,7>: Cost 3 vext2 <6,7,4,4>, <6,7,4,4>
+  1973865800U,  // <4,4,6,u>: Cost 2 vtrnl RHS, RHS
+  2644120570U,  // <4,4,7,0>: Cost 3 vext2 <4,4,4,4>, <7,0,1,2>
+  3638829978U,  // <4,4,7,1>: Cost 4 vext1 <2,4,4,7>, <1,2,3,4>
+  3638830881U,  // <4,4,7,2>: Cost 4 vext1 <2,4,4,7>, <2,4,4,7>
+  3735115018U,  // <4,4,7,3>: Cost 4 vext2 <7,3,4,4>, <7,3,4,4>
+  2662036827U,  // <4,4,7,4>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+  2713292236U,  // <4,4,7,5>: Cost 3 vext3 <4,7,5,4>, <4,7,5,4>
+  2713365973U,  // <4,4,7,6>: Cost 3 vext3 <4,7,6,4>, <4,7,6,4>
+  2644121196U,  // <4,4,7,7>: Cost 3 vext2 <4,4,4,4>, <7,7,7,7>
+  2662036827U,  // <4,4,7,u>: Cost 3 vext2 <7,4,4,4>, <7,4,4,4>
+  1503297638U,  // <4,4,u,0>: Cost 2 vext1 <4,4,4,u>, LHS
+  1570379566U,  // <4,4,u,1>: Cost 2 vext2 <4,4,4,4>, LHS
+  2779682606U,  // <4,4,u,2>: Cost 3 vuzpl <4,6,4,6>, LHS
+  2571069947U,  // <4,4,u,3>: Cost 3 vext1 <3,4,4,u>, <3,4,4,u>
+   161926454U,  // <4,4,u,4>: Cost 1 vdup0 RHS
+  1841638710U,  // <4,4,u,5>: Cost 2 vzipl RHS, RHS
+  1618169385U,  // <4,4,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  2594960735U,  // <4,4,u,7>: Cost 3 vext1 <7,4,4,u>, <7,4,4,u>
+   161926454U,  // <4,4,u,u>: Cost 1 vdup0 RHS
+  2631516160U,  // <4,5,0,0>: Cost 3 vext2 <2,3,4,5>, <0,0,0,0>
+  1557774438U,  // <4,5,0,1>: Cost 2 vext2 <2,3,4,5>, LHS
+  2618908875U,  // <4,5,0,2>: Cost 3 vext2 <0,2,4,5>, <0,2,4,5>
+  2571078140U,  // <4,5,0,3>: Cost 3 vext1 <3,4,5,0>, <3,4,5,0>
+  2626871634U,  // <4,5,0,4>: Cost 3 vext2 <1,5,4,5>, <0,4,1,5>
+  3705258414U,  // <4,5,0,5>: Cost 4 vext2 <2,3,4,5>, <0,5,2,7>
+  2594968438U,  // <4,5,0,6>: Cost 3 vext1 <7,4,5,0>, <6,7,4,5>
+  2594968928U,  // <4,5,0,7>: Cost 3 vext1 <7,4,5,0>, <7,4,5,0>
+  1557775005U,  // <4,5,0,u>: Cost 2 vext2 <2,3,4,5>, LHS
+  2631516918U,  // <4,5,1,0>: Cost 3 vext2 <2,3,4,5>, <1,0,3,2>
+  2624217939U,  // <4,5,1,1>: Cost 3 vext2 <1,1,4,5>, <1,1,4,5>
+  2631517078U,  // <4,5,1,2>: Cost 3 vext2 <2,3,4,5>, <1,2,3,0>
+  2821341286U,  // <4,5,1,3>: Cost 3 vuzpr <0,4,1,5>, LHS
+  3895086054U,  // <4,5,1,4>: Cost 4 vuzpr <0,4,1,5>, <4,1,5,4>
+  2626872471U,  // <4,5,1,5>: Cost 3 vext2 <1,5,4,5>, <1,5,4,5>
+  3895083131U,  // <4,5,1,6>: Cost 4 vuzpr <0,4,1,5>, <0,1,4,6>
+  2718748368U,  // <4,5,1,7>: Cost 3 vext3 <5,6,7,4>, <5,1,7,3>
+  2821341291U,  // <4,5,1,u>: Cost 3 vuzpr <0,4,1,5>, LHS
+  2571092070U,  // <4,5,2,0>: Cost 3 vext1 <3,4,5,2>, LHS
+  3699287585U,  // <4,5,2,1>: Cost 4 vext2 <1,3,4,5>, <2,1,3,3>
+  2630854269U,  // <4,5,2,2>: Cost 3 vext2 <2,2,4,5>, <2,2,4,5>
+  1557776078U,  // <4,5,2,3>: Cost 2 vext2 <2,3,4,5>, <2,3,4,5>
+  2631517974U,  // <4,5,2,4>: Cost 3 vext2 <2,3,4,5>, <2,4,3,5>
+  3692652384U,  // <4,5,2,5>: Cost 4 vext2 <0,2,4,5>, <2,5,2,7>
+  2631518138U,  // <4,5,2,6>: Cost 3 vext2 <2,3,4,5>, <2,6,3,7>
+  4164013366U,  // <4,5,2,7>: Cost 4 vtrnr <0,4,u,2>, RHS
+  1561094243U,  // <4,5,2,u>: Cost 2 vext2 <2,u,4,5>, <2,u,4,5>
+  2631518358U,  // <4,5,3,0>: Cost 3 vext2 <2,3,4,5>, <3,0,1,2>
+  3895084710U,  // <4,5,3,1>: Cost 4 vuzpr <0,4,1,5>, <2,3,0,1>
+  2631518540U,  // <4,5,3,2>: Cost 3 vext2 <2,3,4,5>, <3,2,3,4>
+  2631518620U,  // <4,5,3,3>: Cost 3 vext2 <2,3,4,5>, <3,3,3,3>
+  2631518716U,  // <4,5,3,4>: Cost 3 vext2 <2,3,4,5>, <3,4,5,0>
+  2631518784U,  // <4,5,3,5>: Cost 3 vext2 <2,3,4,5>, <3,5,3,5>
+  2658060980U,  // <4,5,3,6>: Cost 3 vext2 <6,7,4,5>, <3,6,7,4>
+  2640145131U,  // <4,5,3,7>: Cost 3 vext2 <3,7,4,5>, <3,7,4,5>
+  2631519006U,  // <4,5,3,u>: Cost 3 vext2 <2,3,4,5>, <3,u,1,2>
+  2571108454U,  // <4,5,4,0>: Cost 3 vext1 <3,4,5,4>, LHS
+  3632907342U,  // <4,5,4,1>: Cost 4 vext1 <1,4,5,4>, <1,4,5,4>
+  2571110094U,  // <4,5,4,2>: Cost 3 vext1 <3,4,5,4>, <2,3,4,5>
+  2571110912U,  // <4,5,4,3>: Cost 3 vext1 <3,4,5,4>, <3,4,5,4>
+  2571111734U,  // <4,5,4,4>: Cost 3 vext1 <3,4,5,4>, RHS
+  1557777718U,  // <4,5,4,5>: Cost 2 vext2 <2,3,4,5>, RHS
+  2645454195U,  // <4,5,4,6>: Cost 3 vext2 <4,6,4,5>, <4,6,4,5>
+  2718748614U,  // <4,5,4,7>: Cost 3 vext3 <5,6,7,4>, <5,4,7,6>
+  1557777961U,  // <4,5,4,u>: Cost 2 vext2 <2,3,4,5>, RHS
+  1503346790U,  // <4,5,5,0>: Cost 2 vext1 <4,4,5,5>, LHS
+  2913398480U,  // <4,5,5,1>: Cost 3 vzipl RHS, <5,1,7,3>
+  2631519998U,  // <4,5,5,2>: Cost 3 vext2 <2,3,4,5>, <5,2,3,4>
+  2577090710U,  // <4,5,5,3>: Cost 3 vext1 <4,4,5,5>, <3,0,1,2>
+  1503349978U,  // <4,5,5,4>: Cost 2 vext1 <4,4,5,5>, <4,4,5,5>
+  2631520260U,  // <4,5,5,5>: Cost 3 vext2 <2,3,4,5>, <5,5,5,5>
+  2913390690U,  // <4,5,5,6>: Cost 3 vzipl RHS, <5,6,7,0>
+  2821344566U,  // <4,5,5,7>: Cost 3 vuzpr <0,4,1,5>, RHS
+  1503352622U,  // <4,5,5,u>: Cost 2 vext1 <4,4,5,5>, LHS
+  1497383014U,  // <4,5,6,0>: Cost 2 vext1 <3,4,5,6>, LHS
+  2559181904U,  // <4,5,6,1>: Cost 3 vext1 <1,4,5,6>, <1,4,5,6>
+  2565154601U,  // <4,5,6,2>: Cost 3 vext1 <2,4,5,6>, <2,4,5,6>
+  1497385474U,  // <4,5,6,3>: Cost 2 vext1 <3,4,5,6>, <3,4,5,6>
+  1497386294U,  // <4,5,6,4>: Cost 2 vext1 <3,4,5,6>, RHS
+  3047608324U,  // <4,5,6,5>: Cost 3 vtrnl RHS, <5,5,5,5>
+  2571129656U,  // <4,5,6,6>: Cost 3 vext1 <3,4,5,6>, <6,6,6,6>
+    27705344U,  // <4,5,6,7>: Cost 0 copy RHS
+    27705344U,  // <4,5,6,u>: Cost 0 copy RHS
+  2565161062U,  // <4,5,7,0>: Cost 3 vext1 <2,4,5,7>, LHS
+  2565161882U,  // <4,5,7,1>: Cost 3 vext1 <2,4,5,7>, <1,2,3,4>
+  2565162794U,  // <4,5,7,2>: Cost 3 vext1 <2,4,5,7>, <2,4,5,7>
+  2661381387U,  // <4,5,7,3>: Cost 3 vext2 <7,3,4,5>, <7,3,4,5>
+  2565164342U,  // <4,5,7,4>: Cost 3 vext1 <2,4,5,7>, RHS
+  2718748840U,  // <4,5,7,5>: Cost 3 vext3 <5,6,7,4>, <5,7,5,7>
+  2718748846U,  // <4,5,7,6>: Cost 3 vext3 <5,6,7,4>, <5,7,6,4>
+  2719412407U,  // <4,5,7,7>: Cost 3 vext3 <5,7,7,4>, <5,7,7,4>
+  2565166894U,  // <4,5,7,u>: Cost 3 vext1 <2,4,5,7>, LHS
+  1497399398U,  // <4,5,u,0>: Cost 2 vext1 <3,4,5,u>, LHS
+  1557780270U,  // <4,5,u,1>: Cost 2 vext2 <2,3,4,5>, LHS
+  2631522181U,  // <4,5,u,2>: Cost 3 vext2 <2,3,4,5>, <u,2,3,0>
+  1497401860U,  // <4,5,u,3>: Cost 2 vext1 <3,4,5,u>, <3,4,5,u>
+  1497402678U,  // <4,5,u,4>: Cost 2 vext1 <3,4,5,u>, RHS
+  1557780634U,  // <4,5,u,5>: Cost 2 vext2 <2,3,4,5>, RHS
+  2631522512U,  // <4,5,u,6>: Cost 3 vext2 <2,3,4,5>, <u,6,3,7>
+    27705344U,  // <4,5,u,7>: Cost 0 copy RHS
+    27705344U,  // <4,5,u,u>: Cost 0 copy RHS
+  2618916864U,  // <4,6,0,0>: Cost 3 vext2 <0,2,4,6>, <0,0,0,0>
+  1545175142U,  // <4,6,0,1>: Cost 2 vext2 <0,2,4,6>, LHS
+  1545175244U,  // <4,6,0,2>: Cost 2 vext2 <0,2,4,6>, <0,2,4,6>
+  3692658940U,  // <4,6,0,3>: Cost 4 vext2 <0,2,4,6>, <0,3,1,0>
+  2618917202U,  // <4,6,0,4>: Cost 3 vext2 <0,2,4,6>, <0,4,1,5>
+  3852910806U,  // <4,6,0,5>: Cost 4 vuzpl RHS, <0,2,5,7>
+  2253525648U,  // <4,6,0,6>: Cost 3 vrev <6,4,6,0>
+  4040764726U,  // <4,6,0,7>: Cost 4 vzipr <2,3,4,0>, RHS
+  1545175709U,  // <4,6,0,u>: Cost 2 vext2 <0,2,4,6>, LHS
+  2618917622U,  // <4,6,1,0>: Cost 3 vext2 <0,2,4,6>, <1,0,3,2>
+  2618917684U,  // <4,6,1,1>: Cost 3 vext2 <0,2,4,6>, <1,1,1,1>
+  2618917782U,  // <4,6,1,2>: Cost 3 vext2 <0,2,4,6>, <1,2,3,0>
+  2618917848U,  // <4,6,1,3>: Cost 3 vext2 <0,2,4,6>, <1,3,1,3>
+  3692659773U,  // <4,6,1,4>: Cost 4 vext2 <0,2,4,6>, <1,4,3,5>
+  2618918032U,  // <4,6,1,5>: Cost 3 vext2 <0,2,4,6>, <1,5,3,7>
+  3692659937U,  // <4,6,1,6>: Cost 4 vext2 <0,2,4,6>, <1,6,3,7>
+  4032146742U,  // <4,6,1,7>: Cost 4 vzipr <0,u,4,1>, RHS
+  2618918253U,  // <4,6,1,u>: Cost 3 vext2 <0,2,4,6>, <1,u,1,3>
+  2618918380U,  // <4,6,2,0>: Cost 3 vext2 <0,2,4,6>, <2,0,6,4>
+  2618918460U,  // <4,6,2,1>: Cost 3 vext2 <0,2,4,6>, <2,1,6,3>
+  2618918504U,  // <4,6,2,2>: Cost 3 vext2 <0,2,4,6>, <2,2,2,2>
+  2618918566U,  // <4,6,2,3>: Cost 3 vext2 <0,2,4,6>, <2,3,0,1>
+  2618918679U,  // <4,6,2,4>: Cost 3 vext2 <0,2,4,6>, <2,4,3,6>
+  2618918788U,  // <4,6,2,5>: Cost 3 vext2 <0,2,4,6>, <2,5,6,7>
+  2618918842U,  // <4,6,2,6>: Cost 3 vext2 <0,2,4,6>, <2,6,3,7>
+  2718749178U,  // <4,6,2,7>: Cost 3 vext3 <5,6,7,4>, <6,2,7,3>
+  2618918971U,  // <4,6,2,u>: Cost 3 vext2 <0,2,4,6>, <2,u,0,1>
+  2618919062U,  // <4,6,3,0>: Cost 3 vext2 <0,2,4,6>, <3,0,1,2>
+  2636171526U,  // <4,6,3,1>: Cost 3 vext2 <3,1,4,6>, <3,1,4,6>
+  3692661057U,  // <4,6,3,2>: Cost 4 vext2 <0,2,4,6>, <3,2,2,2>
+  2618919324U,  // <4,6,3,3>: Cost 3 vext2 <0,2,4,6>, <3,3,3,3>
+  2618919426U,  // <4,6,3,4>: Cost 3 vext2 <0,2,4,6>, <3,4,5,6>
+  2638826058U,  // <4,6,3,5>: Cost 3 vext2 <3,5,4,6>, <3,5,4,6>
+  3913303030U,  // <4,6,3,6>: Cost 4 vuzpr <3,4,5,6>, <1,3,4,6>
+  2722730572U,  // <4,6,3,7>: Cost 3 vext3 <6,3,7,4>, <6,3,7,4>
+  2618919710U,  // <4,6,3,u>: Cost 3 vext2 <0,2,4,6>, <3,u,1,2>
+  2565210214U,  // <4,6,4,0>: Cost 3 vext1 <2,4,6,4>, LHS
+  2718749286U,  // <4,6,4,1>: Cost 3 vext3 <5,6,7,4>, <6,4,1,3>
+  2565211952U,  // <4,6,4,2>: Cost 3 vext1 <2,4,6,4>, <2,4,6,4>
+  2571184649U,  // <4,6,4,3>: Cost 3 vext1 <3,4,6,4>, <3,4,6,4>
+  2565213494U,  // <4,6,4,4>: Cost 3 vext1 <2,4,6,4>, RHS
+  1545178422U,  // <4,6,4,5>: Cost 2 vext2 <0,2,4,6>, RHS
+  1705430326U,  // <4,6,4,6>: Cost 2 vuzpl RHS, RHS
+  2595075437U,  // <4,6,4,7>: Cost 3 vext1 <7,4,6,4>, <7,4,6,4>
+  1545178665U,  // <4,6,4,u>: Cost 2 vext2 <0,2,4,6>, RHS
+  2565218406U,  // <4,6,5,0>: Cost 3 vext1 <2,4,6,5>, LHS
+  2645462736U,  // <4,6,5,1>: Cost 3 vext2 <4,6,4,6>, <5,1,7,3>
+  2913399290U,  // <4,6,5,2>: Cost 3 vzipl RHS, <6,2,7,3>
+  3913305394U,  // <4,6,5,3>: Cost 4 vuzpr <3,4,5,6>, <4,5,6,3>
+  2645462982U,  // <4,6,5,4>: Cost 3 vext2 <4,6,4,6>, <5,4,7,6>
+  2779172868U,  // <4,6,5,5>: Cost 3 vuzpl RHS, <5,5,5,5>
+  2913391416U,  // <4,6,5,6>: Cost 3 vzipl RHS, <6,6,6,6>
+  2821426486U,  // <4,6,5,7>: Cost 3 vuzpr <0,4,2,6>, RHS
+  2821426487U,  // <4,6,5,u>: Cost 3 vuzpr <0,4,2,6>, RHS
+  1503428710U,  // <4,6,6,0>: Cost 2 vext1 <4,4,6,6>, LHS
+  2577171190U,  // <4,6,6,1>: Cost 3 vext1 <4,4,6,6>, <1,0,3,2>
+  2645463546U,  // <4,6,6,2>: Cost 3 vext2 <4,6,4,6>, <6,2,7,3>
+  2577172630U,  // <4,6,6,3>: Cost 3 vext1 <4,4,6,6>, <3,0,1,2>
+  1503431908U,  // <4,6,6,4>: Cost 2 vext1 <4,4,6,6>, <4,4,6,6>
+  2253501069U,  // <4,6,6,5>: Cost 3 vrev <6,4,5,6>
+  2618921784U,  // <4,6,6,6>: Cost 3 vext2 <0,2,4,6>, <6,6,6,6>
+  2954464566U,  // <4,6,6,7>: Cost 3 vzipr <0,2,4,6>, RHS
+  1503434542U,  // <4,6,6,u>: Cost 2 vext1 <4,4,6,6>, LHS
+  2645464058U,  // <4,6,7,0>: Cost 3 vext2 <4,6,4,6>, <7,0,1,2>
+  2779173882U,  // <4,6,7,1>: Cost 3 vuzpl RHS, <7,0,1,2>
+  3638978355U,  // <4,6,7,2>: Cost 4 vext1 <2,4,6,7>, <2,4,6,7>
+  2725090156U,  // <4,6,7,3>: Cost 3 vext3 <6,7,3,4>, <6,7,3,4>
+  2645464422U,  // <4,6,7,4>: Cost 3 vext2 <4,6,4,6>, <7,4,5,6>
+  2779174246U,  // <4,6,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+  3852915914U,  // <4,6,7,6>: Cost 4 vuzpl RHS, <7,2,6,3>
+  2779174508U,  // <4,6,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+  2779173945U,  // <4,6,7,u>: Cost 3 vuzpl RHS, <7,0,u,2>
+  1503445094U,  // <4,6,u,0>: Cost 2 vext1 <4,4,6,u>, LHS
+  1545180974U,  // <4,6,u,1>: Cost 2 vext2 <0,2,4,6>, LHS
+  1705432878U,  // <4,6,u,2>: Cost 2 vuzpl RHS, LHS
+  2618922940U,  // <4,6,u,3>: Cost 3 vext2 <0,2,4,6>, <u,3,0,1>
+  1503448294U,  // <4,6,u,4>: Cost 2 vext1 <4,4,6,u>, <4,4,6,u>
+  1545181338U,  // <4,6,u,5>: Cost 2 vext2 <0,2,4,6>, RHS
+  1705433242U,  // <4,6,u,6>: Cost 2 vuzpl RHS, RHS
+  2954480950U,  // <4,6,u,7>: Cost 3 vzipr <0,2,4,u>, RHS
+  1545181541U,  // <4,6,u,u>: Cost 2 vext2 <0,2,4,6>, LHS
+  3706601472U,  // <4,7,0,0>: Cost 4 vext2 <2,5,4,7>, <0,0,0,0>
+  2632859750U,  // <4,7,0,1>: Cost 3 vext2 <2,5,4,7>, LHS
+  2726343685U,  // <4,7,0,2>: Cost 3 vext3 <7,0,2,4>, <7,0,2,4>
+  3701293312U,  // <4,7,0,3>: Cost 4 vext2 <1,6,4,7>, <0,3,1,4>
+  3706601810U,  // <4,7,0,4>: Cost 4 vext2 <2,5,4,7>, <0,4,1,5>
+  2259424608U,  // <4,7,0,5>: Cost 3 vrev <7,4,5,0>
+  3695321617U,  // <4,7,0,6>: Cost 4 vext2 <0,6,4,7>, <0,6,4,7>
+  3800454194U,  // <4,7,0,7>: Cost 4 vext3 <7,0,7,4>, <7,0,7,4>
+  2632860317U,  // <4,7,0,u>: Cost 3 vext2 <2,5,4,7>, LHS
+  2259064116U,  // <4,7,1,0>: Cost 3 vrev <7,4,0,1>
+  3700630324U,  // <4,7,1,1>: Cost 4 vext2 <1,5,4,7>, <1,1,1,1>
+  2632860570U,  // <4,7,1,2>: Cost 3 vext2 <2,5,4,7>, <1,2,3,4>
+  3769635936U,  // <4,7,1,3>: Cost 4 vext3 <1,u,3,4>, <7,1,3,5>
+  3656920374U,  // <4,7,1,4>: Cost 4 vext1 <5,4,7,1>, RHS
+  3700630681U,  // <4,7,1,5>: Cost 4 vext2 <1,5,4,7>, <1,5,4,7>
+  3701294314U,  // <4,7,1,6>: Cost 4 vext2 <1,6,4,7>, <1,6,4,7>
+  3793818754U,  // <4,7,1,7>: Cost 4 vext3 <5,u,7,4>, <7,1,7,3>
+  2259654012U,  // <4,7,1,u>: Cost 3 vrev <7,4,u,1>
+  3656925286U,  // <4,7,2,0>: Cost 4 vext1 <5,4,7,2>, LHS
+  3706603050U,  // <4,7,2,1>: Cost 4 vext2 <2,5,4,7>, <2,1,4,3>
+  3706603112U,  // <4,7,2,2>: Cost 4 vext2 <2,5,4,7>, <2,2,2,2>
+  2727744688U,  // <4,7,2,3>: Cost 3 vext3 <7,2,3,4>, <7,2,3,4>
+  3705939745U,  // <4,7,2,4>: Cost 4 vext2 <2,4,4,7>, <2,4,4,7>
+  2632861554U,  // <4,7,2,5>: Cost 3 vext2 <2,5,4,7>, <2,5,4,7>
+  3706603450U,  // <4,7,2,6>: Cost 4 vext2 <2,5,4,7>, <2,6,3,7>
+  3792491731U,  // <4,7,2,7>: Cost 4 vext3 <5,6,7,4>, <7,2,7,3>
+  2634852453U,  // <4,7,2,u>: Cost 3 vext2 <2,u,4,7>, <2,u,4,7>
+  3706603670U,  // <4,7,3,0>: Cost 4 vext2 <2,5,4,7>, <3,0,1,2>
+  3662906266U,  // <4,7,3,1>: Cost 4 vext1 <6,4,7,3>, <1,2,3,4>
+  3725183326U,  // <4,7,3,2>: Cost 4 vext2 <5,6,4,7>, <3,2,5,4>
+  3706603932U,  // <4,7,3,3>: Cost 4 vext2 <2,5,4,7>, <3,3,3,3>
+  3701295618U,  // <4,7,3,4>: Cost 4 vext2 <1,6,4,7>, <3,4,5,6>
+  2638834251U,  // <4,7,3,5>: Cost 3 vext2 <3,5,4,7>, <3,5,4,7>
+  2639497884U,  // <4,7,3,6>: Cost 3 vext2 <3,6,4,7>, <3,6,4,7>
+  3802445093U,  // <4,7,3,7>: Cost 4 vext3 <7,3,7,4>, <7,3,7,4>
+  2640825150U,  // <4,7,3,u>: Cost 3 vext2 <3,u,4,7>, <3,u,4,7>
+  2718750004U,  // <4,7,4,0>: Cost 3 vext3 <5,6,7,4>, <7,4,0,1>
+  3706604490U,  // <4,7,4,1>: Cost 4 vext2 <2,5,4,7>, <4,1,2,3>
+  3656943474U,  // <4,7,4,2>: Cost 4 vext1 <5,4,7,4>, <2,5,4,7>
+  3779884371U,  // <4,7,4,3>: Cost 4 vext3 <3,5,7,4>, <7,4,3,5>
+  2259383643U,  // <4,7,4,4>: Cost 3 vrev <7,4,4,4>
+  2632863030U,  // <4,7,4,5>: Cost 3 vext2 <2,5,4,7>, RHS
+  2259531117U,  // <4,7,4,6>: Cost 3 vrev <7,4,6,4>
+  3907340074U,  // <4,7,4,7>: Cost 4 vuzpr <2,4,5,7>, <2,4,5,7>
+  2632863273U,  // <4,7,4,u>: Cost 3 vext2 <2,5,4,7>, RHS
+  2913391610U,  // <4,7,5,0>: Cost 3 vzipl RHS, <7,0,1,2>
+  3645006848U,  // <4,7,5,1>: Cost 4 vext1 <3,4,7,5>, <1,3,5,7>
+  2589181646U,  // <4,7,5,2>: Cost 3 vext1 <6,4,7,5>, <2,3,4,5>
+  3645008403U,  // <4,7,5,3>: Cost 4 vext1 <3,4,7,5>, <3,4,7,5>
+  2913391974U,  // <4,7,5,4>: Cost 3 vzipl RHS, <7,4,5,6>
+  2583211973U,  // <4,7,5,5>: Cost 3 vext1 <5,4,7,5>, <5,4,7,5>
+  2589184670U,  // <4,7,5,6>: Cost 3 vext1 <6,4,7,5>, <6,4,7,5>
+  2913392236U,  // <4,7,5,7>: Cost 3 vzipl RHS, <7,7,7,7>
+  2913392258U,  // <4,7,5,u>: Cost 3 vzipl RHS, <7,u,1,2>
+  1509474406U,  // <4,7,6,0>: Cost 2 vext1 <5,4,7,6>, LHS
+  3047609338U,  // <4,7,6,1>: Cost 3 vtrnl RHS, <7,0,1,2>
+  2583217768U,  // <4,7,6,2>: Cost 3 vext1 <5,4,7,6>, <2,2,2,2>
+  2583218326U,  // <4,7,6,3>: Cost 3 vext1 <5,4,7,6>, <3,0,1,2>
+  1509477686U,  // <4,7,6,4>: Cost 2 vext1 <5,4,7,6>, RHS
+  1509478342U,  // <4,7,6,5>: Cost 2 vext1 <5,4,7,6>, <5,4,7,6>
+  2583220730U,  // <4,7,6,6>: Cost 3 vext1 <5,4,7,6>, <6,2,7,3>
+  3047609964U,  // <4,7,6,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+  1509480238U,  // <4,7,6,u>: Cost 2 vext1 <5,4,7,6>, LHS
+  3650994278U,  // <4,7,7,0>: Cost 4 vext1 <4,4,7,7>, LHS
+  3650995098U,  // <4,7,7,1>: Cost 4 vext1 <4,4,7,7>, <1,2,3,4>
+  3650996010U,  // <4,7,7,2>: Cost 4 vext1 <4,4,7,7>, <2,4,5,7>
+  3804804677U,  // <4,7,7,3>: Cost 4 vext3 <7,7,3,4>, <7,7,3,4>
+  3650997486U,  // <4,7,7,4>: Cost 4 vext1 <4,4,7,7>, <4,4,7,7>
+  2662725039U,  // <4,7,7,5>: Cost 3 vext2 <7,5,4,7>, <7,5,4,7>
+  3662942880U,  // <4,7,7,6>: Cost 4 vext1 <6,4,7,7>, <6,4,7,7>
+  2718750316U,  // <4,7,7,7>: Cost 3 vext3 <5,6,7,4>, <7,7,7,7>
+  2664715938U,  // <4,7,7,u>: Cost 3 vext2 <7,u,4,7>, <7,u,4,7>
+  1509490790U,  // <4,7,u,0>: Cost 2 vext1 <5,4,7,u>, LHS
+  2632865582U,  // <4,7,u,1>: Cost 3 vext2 <2,5,4,7>, LHS
+  2583234152U,  // <4,7,u,2>: Cost 3 vext1 <5,4,7,u>, <2,2,2,2>
+  2583234710U,  // <4,7,u,3>: Cost 3 vext1 <5,4,7,u>, <3,0,1,2>
+  1509494070U,  // <4,7,u,4>: Cost 2 vext1 <5,4,7,u>, RHS
+  1509494728U,  // <4,7,u,5>: Cost 2 vext1 <5,4,7,u>, <5,4,7,u>
+  2583237114U,  // <4,7,u,6>: Cost 3 vext1 <5,4,7,u>, <6,2,7,3>
+  3047757420U,  // <4,7,u,7>: Cost 3 vtrnl RHS, <7,7,7,7>
+  1509496622U,  // <4,7,u,u>: Cost 2 vext1 <5,4,7,u>, LHS
+  2618933248U,  // <4,u,0,0>: Cost 3 vext2 <0,2,4,u>, <0,0,0,0>
+  1545191526U,  // <4,u,0,1>: Cost 2 vext2 <0,2,4,u>, LHS
+  1545191630U,  // <4,u,0,2>: Cost 2 vext2 <0,2,4,u>, <0,2,4,u>
+  2691913445U,  // <4,u,0,3>: Cost 3 vext3 <1,2,3,4>, <u,0,3,2>
+  2618933586U,  // <4,u,0,4>: Cost 3 vext2 <0,2,4,u>, <0,4,1,5>
+  2265397305U,  // <4,u,0,5>: Cost 3 vrev <u,4,5,0>
+  2595189625U,  // <4,u,0,6>: Cost 3 vext1 <7,4,u,0>, <6,7,4,u>
+  2595190139U,  // <4,u,0,7>: Cost 3 vext1 <7,4,u,0>, <7,4,u,0>
+  1545192093U,  // <4,u,0,u>: Cost 2 vext2 <0,2,4,u>, LHS
+  2618934006U,  // <4,u,1,0>: Cost 3 vext2 <0,2,4,u>, <1,0,3,2>
+  2618934068U,  // <4,u,1,1>: Cost 3 vext2 <0,2,4,u>, <1,1,1,1>
+  1618171694U,  // <4,u,1,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  2618934232U,  // <4,u,1,3>: Cost 3 vext2 <0,2,4,u>, <1,3,1,3>
+  2695894848U,  // <4,u,1,4>: Cost 3 vext3 <1,u,3,4>, <u,1,4,3>
+  2618934416U,  // <4,u,1,5>: Cost 3 vext2 <0,2,4,u>, <1,5,3,7>
+  3692676321U,  // <4,u,1,6>: Cost 4 vext2 <0,2,4,u>, <1,6,3,7>
+  2718750555U,  // <4,u,1,7>: Cost 3 vext3 <5,6,7,4>, <u,1,7,3>
+  1618171748U,  // <4,u,1,u>: Cost 2 vext3 <1,2,3,4>, LHS
+  2553397350U,  // <4,u,2,0>: Cost 3 vext1 <0,4,u,2>, LHS
+  2630215215U,  // <4,u,2,1>: Cost 3 vext2 <2,1,4,u>, <2,1,4,u>
+  2618934888U,  // <4,u,2,2>: Cost 3 vext2 <0,2,4,u>, <2,2,2,2>
+  1557800657U,  // <4,u,2,3>: Cost 2 vext2 <2,3,4,u>, <2,3,4,u>
+  2618935065U,  // <4,u,2,4>: Cost 3 vext2 <0,2,4,u>, <2,4,3,u>
+  2733864859U,  // <4,u,2,5>: Cost 3 vext3 <u,2,5,4>, <u,2,5,4>
+  2618935226U,  // <4,u,2,6>: Cost 3 vext2 <0,2,4,u>, <2,6,3,7>
+  2718750636U,  // <4,u,2,7>: Cost 3 vext3 <5,6,7,4>, <u,2,7,3>
+  1561118822U,  // <4,u,2,u>: Cost 2 vext2 <2,u,4,u>, <2,u,4,u>
+  2618935446U,  // <4,u,3,0>: Cost 3 vext2 <0,2,4,u>, <3,0,1,2>
+  2779318422U,  // <4,u,3,1>: Cost 3 vuzpl RHS, <3,0,1,2>
+  2636851545U,  // <4,u,3,2>: Cost 3 vext2 <3,2,4,u>, <3,2,4,u>
+  2618935708U,  // <4,u,3,3>: Cost 3 vext2 <0,2,4,u>, <3,3,3,3>
+  2618935810U,  // <4,u,3,4>: Cost 3 vext2 <0,2,4,u>, <3,4,5,6>
+  2691913711U,  // <4,u,3,5>: Cost 3 vext3 <1,2,3,4>, <u,3,5,7>
+  2588725862U,  // <4,u,3,6>: Cost 3 vext1 <6,4,1,3>, <6,4,1,3>
+  2640169710U,  // <4,u,3,7>: Cost 3 vext2 <3,7,4,u>, <3,7,4,u>
+  2618936094U,  // <4,u,3,u>: Cost 3 vext2 <0,2,4,u>, <3,u,1,2>
+  1503559782U,  // <4,u,4,0>: Cost 2 vext1 <4,4,u,4>, LHS
+  2692282391U,  // <4,u,4,1>: Cost 3 vext3 <1,2,u,4>, <u,4,1,2>
+  2565359426U,  // <4,u,4,2>: Cost 3 vext1 <2,4,u,4>, <2,4,u,4>
+  2571332123U,  // <4,u,4,3>: Cost 3 vext1 <3,4,u,4>, <3,4,u,4>
+   161926454U,  // <4,u,4,4>: Cost 1 vdup0 RHS
+  1545194806U,  // <4,u,4,5>: Cost 2 vext2 <0,2,4,u>, RHS
+  1705577782U,  // <4,u,4,6>: Cost 2 vuzpl RHS, RHS
+  2718750801U,  // <4,u,4,7>: Cost 3 vext3 <5,6,7,4>, <u,4,7,6>
+   161926454U,  // <4,u,4,u>: Cost 1 vdup0 RHS
+  1479164006U,  // <4,u,5,0>: Cost 2 vext1 <0,4,1,5>, LHS
+  1839650606U,  // <4,u,5,1>: Cost 2 vzipl RHS, LHS
+  2565367502U,  // <4,u,5,2>: Cost 3 vext1 <2,4,u,5>, <2,3,4,5>
+  3089777309U,  // <4,u,5,3>: Cost 3 vtrnr <0,4,1,5>, LHS
+  1479167286U,  // <4,u,5,4>: Cost 2 vext1 <0,4,1,5>, RHS
+  1839650970U,  // <4,u,5,5>: Cost 2 vzipl RHS, RHS
+  1618172058U,  // <4,u,5,6>: Cost 2 vext3 <1,2,3,4>, RHS
+  3089780265U,  // <4,u,5,7>: Cost 3 vtrnr <0,4,1,5>, RHS
+  1618172076U,  // <4,u,5,u>: Cost 2 vext3 <1,2,3,4>, RHS
+  1479688294U,  // <4,u,6,0>: Cost 2 vext1 <0,4,u,6>, LHS
+  2553430774U,  // <4,u,6,1>: Cost 3 vext1 <0,4,u,6>, <1,0,3,2>
+  1973868334U,  // <4,u,6,2>: Cost 2 vtrnl RHS, LHS
+  1497606685U,  // <4,u,6,3>: Cost 2 vext1 <3,4,u,6>, <3,4,u,6>
+  1479691574U,  // <4,u,6,4>: Cost 2 vext1 <0,4,u,6>, RHS
+  1509552079U,  // <4,u,6,5>: Cost 2 vext1 <5,4,u,6>, <5,4,u,6>
+  1973868698U,  // <4,u,6,6>: Cost 2 vtrnl RHS, RHS
+    27705344U,  // <4,u,6,7>: Cost 0 copy RHS
+    27705344U,  // <4,u,6,u>: Cost 0 copy RHS
+  2565382246U,  // <4,u,7,0>: Cost 3 vext1 <2,4,u,7>, LHS
+  2565383066U,  // <4,u,7,1>: Cost 3 vext1 <2,4,u,7>, <1,2,3,4>
+  2565384005U,  // <4,u,7,2>: Cost 3 vext1 <2,4,u,7>, <2,4,u,7>
+  2661405966U,  // <4,u,7,3>: Cost 3 vext2 <7,3,4,u>, <7,3,4,u>
+  2565385526U,  // <4,u,7,4>: Cost 3 vext1 <2,4,u,7>, RHS
+  2779321702U,  // <4,u,7,5>: Cost 3 vuzpl RHS, <7,4,5,6>
+  2589274793U,  // <4,u,7,6>: Cost 3 vext1 <6,4,u,7>, <6,4,u,7>
+  2779321964U,  // <4,u,7,7>: Cost 3 vuzpl RHS, <7,7,7,7>
+  2565388078U,  // <4,u,7,u>: Cost 3 vext1 <2,4,u,7>, LHS
+  1479704678U,  // <4,u,u,0>: Cost 2 vext1 <0,4,u,u>, LHS
+  1545197358U,  // <4,u,u,1>: Cost 2 vext2 <0,2,4,u>, LHS
+  1618172261U,  // <4,u,u,2>: Cost 2 vext3 <1,2,3,4>, LHS
+  1497623071U,  // <4,u,u,3>: Cost 2 vext1 <3,4,u,u>, <3,4,u,u>
+   161926454U,  // <4,u,u,4>: Cost 1 vdup0 RHS
+  1545197722U,  // <4,u,u,5>: Cost 2 vext2 <0,2,4,u>, RHS
+  1618172301U,  // <4,u,u,6>: Cost 2 vext3 <1,2,3,4>, RHS
+    27705344U,  // <4,u,u,7>: Cost 0 copy RHS
+    27705344U,  // <4,u,u,u>: Cost 0 copy RHS
+  2687123456U,  // <5,0,0,0>: Cost 3 vext3 <0,4,1,5>, <0,0,0,0>
+  2687123466U,  // <5,0,0,1>: Cost 3 vext3 <0,4,1,5>, <0,0,1,1>
+  2687123476U,  // <5,0,0,2>: Cost 3 vext3 <0,4,1,5>, <0,0,2,2>
+  3710599434U,  // <5,0,0,3>: Cost 4 vext2 <3,2,5,0>, <0,3,2,5>
+  2642166098U,  // <5,0,0,4>: Cost 3 vext2 <4,1,5,0>, <0,4,1,5>
+  3657060306U,  // <5,0,0,5>: Cost 4 vext1 <5,5,0,0>, <5,5,0,0>
+  3292094923U,  // <5,0,0,6>: Cost 4 vrev <0,5,6,0>
+  3669005700U,  // <5,0,0,7>: Cost 4 vext1 <7,5,0,0>, <7,5,0,0>
+  2687123530U,  // <5,0,0,u>: Cost 3 vext3 <0,4,1,5>, <0,0,u,2>
+  2559434854U,  // <5,0,1,0>: Cost 3 vext1 <1,5,0,1>, LHS
+  2559435887U,  // <5,0,1,1>: Cost 3 vext1 <1,5,0,1>, <1,5,0,1>
+  1613381734U,  // <5,0,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  3698656256U,  // <5,0,1,3>: Cost 4 vext2 <1,2,5,0>, <1,3,5,7>
+  2559438134U,  // <5,0,1,4>: Cost 3 vext1 <1,5,0,1>, RHS
+  2583326675U,  // <5,0,1,5>: Cost 3 vext1 <5,5,0,1>, <5,5,0,1>
+  3715908851U,  // <5,0,1,6>: Cost 4 vext2 <4,1,5,0>, <1,6,5,7>
+  3657069562U,  // <5,0,1,7>: Cost 4 vext1 <5,5,0,1>, <7,0,1,2>
+  1613381788U,  // <5,0,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+  2686017700U,  // <5,0,2,0>: Cost 3 vext3 <0,2,4,5>, <0,2,0,2>
+  2685796528U,  // <5,0,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+  2698625208U,  // <5,0,2,2>: Cost 3 vext3 <2,3,4,5>, <0,2,2,4>
+  2685944002U,  // <5,0,2,3>: Cost 3 vext3 <0,2,3,5>, <0,2,3,5>
+  2686017739U,  // <5,0,2,4>: Cost 3 vext3 <0,2,4,5>, <0,2,4,5>
+  2686091476U,  // <5,0,2,5>: Cost 3 vext3 <0,2,5,5>, <0,2,5,5>
+  2725167324U,  // <5,0,2,6>: Cost 3 vext3 <6,7,4,5>, <0,2,6,4>
+  2595280230U,  // <5,0,2,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+  2686312687U,  // <5,0,2,u>: Cost 3 vext3 <0,2,u,5>, <0,2,u,5>
+  3760128248U,  // <5,0,3,0>: Cost 4 vext3 <0,3,0,5>, <0,3,0,5>
+  3759685888U,  // <5,0,3,1>: Cost 4 vext3 <0,2,3,5>, <0,3,1,4>
+  2686533898U,  // <5,0,3,2>: Cost 3 vext3 <0,3,2,5>, <0,3,2,5>
+  3760349459U,  // <5,0,3,3>: Cost 4 vext3 <0,3,3,5>, <0,3,3,5>
+  2638187004U,  // <5,0,3,4>: Cost 3 vext2 <3,4,5,0>, <3,4,5,0>
+  3776348452U,  // <5,0,3,5>: Cost 4 vext3 <3,0,4,5>, <0,3,5,4>
+  3713256094U,  // <5,0,3,6>: Cost 4 vext2 <3,6,5,0>, <3,6,5,0>
+  3914064896U,  // <5,0,3,7>: Cost 4 vuzpr <3,5,7,0>, <1,3,5,7>
+  2686976320U,  // <5,0,3,u>: Cost 3 vext3 <0,3,u,5>, <0,3,u,5>
+  2559459430U,  // <5,0,4,0>: Cost 3 vext1 <1,5,0,4>, LHS
+  1613381970U,  // <5,0,4,1>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+  2687123804U,  // <5,0,4,2>: Cost 3 vext3 <0,4,1,5>, <0,4,2,6>
+  3761013092U,  // <5,0,4,3>: Cost 4 vext3 <0,4,3,5>, <0,4,3,5>
+  2559462710U,  // <5,0,4,4>: Cost 3 vext1 <1,5,0,4>, RHS
+  2638187830U,  // <5,0,4,5>: Cost 3 vext2 <3,4,5,0>, RHS
+  3761234303U,  // <5,0,4,6>: Cost 4 vext3 <0,4,6,5>, <0,4,6,5>
+  2646150600U,  // <5,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+  1613381970U,  // <5,0,4,u>: Cost 2 vext3 <0,4,1,5>, <0,4,1,5>
+  3766763926U,  // <5,0,5,0>: Cost 4 vext3 <1,4,0,5>, <0,5,0,1>
+  2919268454U,  // <5,0,5,1>: Cost 3 vzipl <5,5,5,5>, LHS
+  3053486182U,  // <5,0,5,2>: Cost 3 vtrnl <5,5,5,5>, LHS
+  3723210589U,  // <5,0,5,3>: Cost 4 vext2 <5,3,5,0>, <5,3,5,0>
+  3766763966U,  // <5,0,5,4>: Cost 4 vext3 <1,4,0,5>, <0,5,4,5>
+  2650796031U,  // <5,0,5,5>: Cost 3 vext2 <5,5,5,0>, <5,5,5,0>
+  3719893090U,  // <5,0,5,6>: Cost 4 vext2 <4,7,5,0>, <5,6,7,0>
+  3914067254U,  // <5,0,5,7>: Cost 4 vuzpr <3,5,7,0>, RHS
+  2919269021U,  // <5,0,5,u>: Cost 3 vzipl <5,5,5,5>, LHS
+  4047519744U,  // <5,0,6,0>: Cost 4 vzipr <3,4,5,6>, <0,0,0,0>
+  2920038502U,  // <5,0,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+  3759759871U,  // <5,0,6,2>: Cost 4 vext3 <0,2,4,5>, <0,6,2,7>
+  3645164070U,  // <5,0,6,3>: Cost 4 vext1 <3,5,0,6>, <3,5,0,6>
+  3762414095U,  // <5,0,6,4>: Cost 4 vext3 <0,6,4,5>, <0,6,4,5>
+  3993780690U,  // <5,0,6,5>: Cost 4 vzipl <5,6,7,0>, <0,5,6,7>
+  3719893816U,  // <5,0,6,6>: Cost 4 vext2 <4,7,5,0>, <6,6,6,6>
+  2662077302U,  // <5,0,6,7>: Cost 3 vext2 <7,4,5,0>, <6,7,4,5>
+  2920039069U,  // <5,0,6,u>: Cost 3 vzipl <5,6,7,0>, LHS
+  2565455974U,  // <5,0,7,0>: Cost 3 vext1 <2,5,0,7>, LHS
+  2565456790U,  // <5,0,7,1>: Cost 3 vext1 <2,5,0,7>, <1,2,3,0>
+  2565457742U,  // <5,0,7,2>: Cost 3 vext1 <2,5,0,7>, <2,5,0,7>
+  3639199894U,  // <5,0,7,3>: Cost 4 vext1 <2,5,0,7>, <3,0,1,2>
+  2565459254U,  // <5,0,7,4>: Cost 3 vext1 <2,5,0,7>, RHS
+  2589347938U,  // <5,0,7,5>: Cost 3 vext1 <6,5,0,7>, <5,6,7,0>
+  2589348530U,  // <5,0,7,6>: Cost 3 vext1 <6,5,0,7>, <6,5,0,7>
+  4188456422U,  // <5,0,7,7>: Cost 4 vtrnr RHS, <2,0,5,7>
+  2565461806U,  // <5,0,7,u>: Cost 3 vext1 <2,5,0,7>, LHS
+  2687124106U,  // <5,0,u,0>: Cost 3 vext3 <0,4,1,5>, <0,u,0,2>
+  1616036502U,  // <5,0,u,1>: Cost 2 vext3 <0,u,1,5>, <0,u,1,5>
+  1613382301U,  // <5,0,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  2689925800U,  // <5,0,u,3>: Cost 3 vext3 <0,u,3,5>, <0,u,3,5>
+  2687124146U,  // <5,0,u,4>: Cost 3 vext3 <0,4,1,5>, <0,u,4,6>
+  2638190746U,  // <5,0,u,5>: Cost 3 vext2 <3,4,5,0>, RHS
+  2589356723U,  // <5,0,u,6>: Cost 3 vext1 <6,5,0,u>, <6,5,0,u>
+  2595280230U,  // <5,0,u,7>: Cost 3 vext1 <7,5,0,2>, <7,4,5,6>
+  1613382355U,  // <5,0,u,u>: Cost 2 vext3 <0,4,1,5>, LHS
+  2646818816U,  // <5,1,0,0>: Cost 3 vext2 <4,u,5,1>, <0,0,0,0>
+  1573077094U,  // <5,1,0,1>: Cost 2 vext2 <4,u,5,1>, LHS
+  2646818980U,  // <5,1,0,2>: Cost 3 vext2 <4,u,5,1>, <0,2,0,2>
+  2687124214U,  // <5,1,0,3>: Cost 3 vext3 <0,4,1,5>, <1,0,3,2>
+  2641510738U,  // <5,1,0,4>: Cost 3 vext2 <4,0,5,1>, <0,4,1,5>
+  2641510814U,  // <5,1,0,5>: Cost 3 vext2 <4,0,5,1>, <0,5,1,0>
+  3720561142U,  // <5,1,0,6>: Cost 4 vext2 <4,u,5,1>, <0,6,1,7>
+  3298141357U,  // <5,1,0,7>: Cost 4 vrev <1,5,7,0>
+  1573077661U,  // <5,1,0,u>: Cost 2 vext2 <4,u,5,1>, LHS
+  2223891567U,  // <5,1,1,0>: Cost 3 vrev <1,5,0,1>
+  2687124276U,  // <5,1,1,1>: Cost 3 vext3 <0,4,1,5>, <1,1,1,1>
+  2646819734U,  // <5,1,1,2>: Cost 3 vext2 <4,u,5,1>, <1,2,3,0>
+  2687124296U,  // <5,1,1,3>: Cost 3 vext3 <0,4,1,5>, <1,1,3,3>
+  2691326803U,  // <5,1,1,4>: Cost 3 vext3 <1,1,4,5>, <1,1,4,5>
+  2691400540U,  // <5,1,1,5>: Cost 3 vext3 <1,1,5,5>, <1,1,5,5>
+  3765216101U,  // <5,1,1,6>: Cost 4 vext3 <1,1,6,5>, <1,1,6,5>
+  3765289838U,  // <5,1,1,7>: Cost 4 vext3 <1,1,7,5>, <1,1,7,5>
+  2687124341U,  // <5,1,1,u>: Cost 3 vext3 <0,4,1,5>, <1,1,u,3>
+  3297641584U,  // <5,1,2,0>: Cost 4 vrev <1,5,0,2>
+  3763520391U,  // <5,1,2,1>: Cost 4 vext3 <0,u,1,5>, <1,2,1,3>
+  2646820456U,  // <5,1,2,2>: Cost 3 vext2 <4,u,5,1>, <2,2,2,2>
+  2687124374U,  // <5,1,2,3>: Cost 3 vext3 <0,4,1,5>, <1,2,3,0>
+  2691990436U,  // <5,1,2,4>: Cost 3 vext3 <1,2,4,5>, <1,2,4,5>
+  2687124395U,  // <5,1,2,5>: Cost 3 vext3 <0,4,1,5>, <1,2,5,3>
+  2646820794U,  // <5,1,2,6>: Cost 3 vext2 <4,u,5,1>, <2,6,3,7>
+  3808199610U,  // <5,1,2,7>: Cost 4 vext3 <u,3,4,5>, <1,2,7,0>
+  2687124419U,  // <5,1,2,u>: Cost 3 vext3 <0,4,1,5>, <1,2,u,0>
+  2577440870U,  // <5,1,3,0>: Cost 3 vext1 <4,5,1,3>, LHS
+  2687124440U,  // <5,1,3,1>: Cost 3 vext3 <0,4,1,5>, <1,3,1,3>
+  3759686627U,  // <5,1,3,2>: Cost 4 vext3 <0,2,3,5>, <1,3,2,5>
+  2692580332U,  // <5,1,3,3>: Cost 3 vext3 <1,3,3,5>, <1,3,3,5>
+  2687124469U,  // <5,1,3,4>: Cost 3 vext3 <0,4,1,5>, <1,3,4,5>
+  2685207552U,  // <5,1,3,5>: Cost 3 vext3 <0,1,2,5>, <1,3,5,7>
+  3760866313U,  // <5,1,3,6>: Cost 4 vext3 <0,4,1,5>, <1,3,6,7>
+  2692875280U,  // <5,1,3,7>: Cost 3 vext3 <1,3,7,5>, <1,3,7,5>
+  2687124503U,  // <5,1,3,u>: Cost 3 vext3 <0,4,1,5>, <1,3,u,3>
+  1567771538U,  // <5,1,4,0>: Cost 2 vext2 <4,0,5,1>, <4,0,5,1>
+  2693096491U,  // <5,1,4,1>: Cost 3 vext3 <1,4,1,5>, <1,4,1,5>
+  2693170228U,  // <5,1,4,2>: Cost 3 vext3 <1,4,2,5>, <1,4,2,5>
+  2687124541U,  // <5,1,4,3>: Cost 3 vext3 <0,4,1,5>, <1,4,3,5>
+  2646822096U,  // <5,1,4,4>: Cost 3 vext2 <4,u,5,1>, <4,4,4,4>
+  1573080374U,  // <5,1,4,5>: Cost 2 vext2 <4,u,5,1>, RHS
+  2646822260U,  // <5,1,4,6>: Cost 3 vext2 <4,u,5,1>, <4,6,4,6>
+  3298174129U,  // <5,1,4,7>: Cost 4 vrev <1,5,7,4>
+  1573080602U,  // <5,1,4,u>: Cost 2 vext2 <4,u,5,1>, <4,u,5,1>
+  2687124591U,  // <5,1,5,0>: Cost 3 vext3 <0,4,1,5>, <1,5,0,1>
+  2646822543U,  // <5,1,5,1>: Cost 3 vext2 <4,u,5,1>, <5,1,0,1>
+  3760866433U,  // <5,1,5,2>: Cost 4 vext3 <0,4,1,5>, <1,5,2,1>
+  2687124624U,  // <5,1,5,3>: Cost 3 vext3 <0,4,1,5>, <1,5,3,7>
+  2687124631U,  // <5,1,5,4>: Cost 3 vext3 <0,4,1,5>, <1,5,4,5>
+  2646822916U,  // <5,1,5,5>: Cost 3 vext2 <4,u,5,1>, <5,5,5,5>
+  2646823010U,  // <5,1,5,6>: Cost 3 vext2 <4,u,5,1>, <5,6,7,0>
+  2646823080U,  // <5,1,5,7>: Cost 3 vext2 <4,u,5,1>, <5,7,5,7>
+  2687124663U,  // <5,1,5,u>: Cost 3 vext3 <0,4,1,5>, <1,5,u,1>
+  2553577574U,  // <5,1,6,0>: Cost 3 vext1 <0,5,1,6>, LHS
+  3763520719U,  // <5,1,6,1>: Cost 4 vext3 <0,u,1,5>, <1,6,1,7>
+  2646823418U,  // <5,1,6,2>: Cost 3 vext2 <4,u,5,1>, <6,2,7,3>
+  3760866529U,  // <5,1,6,3>: Cost 4 vext3 <0,4,1,5>, <1,6,3,7>
+  2553580854U,  // <5,1,6,4>: Cost 3 vext1 <0,5,1,6>, RHS
+  2687124723U,  // <5,1,6,5>: Cost 3 vext3 <0,4,1,5>, <1,6,5,7>
+  2646823736U,  // <5,1,6,6>: Cost 3 vext2 <4,u,5,1>, <6,6,6,6>
+  2646823758U,  // <5,1,6,7>: Cost 3 vext2 <4,u,5,1>, <6,7,0,1>
+  2646823839U,  // <5,1,6,u>: Cost 3 vext2 <4,u,5,1>, <6,u,0,1>
+  2559557734U,  // <5,1,7,0>: Cost 3 vext1 <1,5,1,7>, LHS
+  2559558452U,  // <5,1,7,1>: Cost 3 vext1 <1,5,1,7>, <1,1,1,1>
+  2571503270U,  // <5,1,7,2>: Cost 3 vext1 <3,5,1,7>, <2,3,0,1>
+  2040971366U,  // <5,1,7,3>: Cost 2 vtrnr RHS, LHS
+  2559561014U,  // <5,1,7,4>: Cost 3 vext1 <1,5,1,7>, RHS
+  2595393232U,  // <5,1,7,5>: Cost 3 vext1 <7,5,1,7>, <5,1,7,3>
+  4188455035U,  // <5,1,7,6>: Cost 4 vtrnr RHS, <0,1,4,6>
+  2646824556U,  // <5,1,7,7>: Cost 3 vext2 <4,u,5,1>, <7,7,7,7>
+  2040971371U,  // <5,1,7,u>: Cost 2 vtrnr RHS, LHS
+  1591662326U,  // <5,1,u,0>: Cost 2 vext2 <u,0,5,1>, <u,0,5,1>
+  1573082926U,  // <5,1,u,1>: Cost 2 vext2 <4,u,5,1>, LHS
+  2695824760U,  // <5,1,u,2>: Cost 3 vext3 <1,u,2,5>, <1,u,2,5>
+  2040979558U,  // <5,1,u,3>: Cost 2 vtrnr RHS, LHS
+  2687124874U,  // <5,1,u,4>: Cost 3 vext3 <0,4,1,5>, <1,u,4,5>
+  1573083290U,  // <5,1,u,5>: Cost 2 vext2 <4,u,5,1>, RHS
+  2646825168U,  // <5,1,u,6>: Cost 3 vext2 <4,u,5,1>, <u,6,3,7>
+  2646825216U,  // <5,1,u,7>: Cost 3 vext2 <4,u,5,1>, <u,7,0,1>
+  2040979563U,  // <5,1,u,u>: Cost 2 vtrnr RHS, LHS
+  3702652928U,  // <5,2,0,0>: Cost 4 vext2 <1,u,5,2>, <0,0,0,0>
+  2628911206U,  // <5,2,0,1>: Cost 3 vext2 <1,u,5,2>, LHS
+  2641518756U,  // <5,2,0,2>: Cost 3 vext2 <4,0,5,2>, <0,2,0,2>
+  3759760847U,  // <5,2,0,3>: Cost 4 vext3 <0,2,4,5>, <2,0,3,2>
+  3760866775U,  // <5,2,0,4>: Cost 4 vext3 <0,4,1,5>, <2,0,4,1>
+  3759539680U,  // <5,2,0,5>: Cost 4 vext3 <0,2,1,5>, <2,0,5,1>
+  3760866796U,  // <5,2,0,6>: Cost 4 vext3 <0,4,1,5>, <2,0,6,4>
+  3304114054U,  // <5,2,0,7>: Cost 4 vrev <2,5,7,0>
+  2628911773U,  // <5,2,0,u>: Cost 3 vext2 <1,u,5,2>, LHS
+  2623603464U,  // <5,2,1,0>: Cost 3 vext2 <1,0,5,2>, <1,0,5,2>
+  3698008921U,  // <5,2,1,1>: Cost 4 vext2 <1,1,5,2>, <1,1,5,2>
+  3633325603U,  // <5,2,1,2>: Cost 4 vext1 <1,5,2,1>, <2,1,3,5>
+  2687125027U,  // <5,2,1,3>: Cost 3 vext3 <0,4,1,5>, <2,1,3,5>
+  3633327414U,  // <5,2,1,4>: Cost 4 vext1 <1,5,2,1>, RHS
+  3759539760U,  // <5,2,1,5>: Cost 4 vext3 <0,2,1,5>, <2,1,5,0>
+  3760866876U,  // <5,2,1,6>: Cost 4 vext3 <0,4,1,5>, <2,1,6,3>
+  3304122247U,  // <5,2,1,7>: Cost 4 vrev <2,5,7,1>
+  2687125072U,  // <5,2,1,u>: Cost 3 vext3 <0,4,1,5>, <2,1,u,5>
+  3633332326U,  // <5,2,2,0>: Cost 4 vext1 <1,5,2,2>, LHS
+  3759760992U,  // <5,2,2,1>: Cost 4 vext3 <0,2,4,5>, <2,2,1,3>
+  2687125096U,  // <5,2,2,2>: Cost 3 vext3 <0,4,1,5>, <2,2,2,2>
+  2687125106U,  // <5,2,2,3>: Cost 3 vext3 <0,4,1,5>, <2,2,3,3>
+  2697963133U,  // <5,2,2,4>: Cost 3 vext3 <2,2,4,5>, <2,2,4,5>
+  3759466120U,  // <5,2,2,5>: Cost 4 vext3 <0,2,0,5>, <2,2,5,7>
+  3760866960U,  // <5,2,2,6>: Cost 4 vext3 <0,4,1,5>, <2,2,6,6>
+  3771926168U,  // <5,2,2,7>: Cost 4 vext3 <2,2,7,5>, <2,2,7,5>
+  2687125151U,  // <5,2,2,u>: Cost 3 vext3 <0,4,1,5>, <2,2,u,3>
+  2687125158U,  // <5,2,3,0>: Cost 3 vext3 <0,4,1,5>, <2,3,0,1>
+  2698405555U,  // <5,2,3,1>: Cost 3 vext3 <2,3,1,5>, <2,3,1,5>
+  2577516238U,  // <5,2,3,2>: Cost 3 vext1 <4,5,2,3>, <2,3,4,5>
+  3759687365U,  // <5,2,3,3>: Cost 4 vext3 <0,2,3,5>, <2,3,3,5>
+  1624884942U,  // <5,2,3,4>: Cost 2 vext3 <2,3,4,5>, <2,3,4,5>
+  2698700503U,  // <5,2,3,5>: Cost 3 vext3 <2,3,5,5>, <2,3,5,5>
+  3772368608U,  // <5,2,3,6>: Cost 4 vext3 <2,3,4,5>, <2,3,6,5>
+  3702655716U,  // <5,2,3,7>: Cost 4 vext2 <1,u,5,2>, <3,7,3,7>
+  1625179890U,  // <5,2,3,u>: Cost 2 vext3 <2,3,u,5>, <2,3,u,5>
+  2641521555U,  // <5,2,4,0>: Cost 3 vext2 <4,0,5,2>, <4,0,5,2>
+  3772368642U,  // <5,2,4,1>: Cost 4 vext3 <2,3,4,5>, <2,4,1,3>
+  2699142925U,  // <5,2,4,2>: Cost 3 vext3 <2,4,2,5>, <2,4,2,5>
+  2698626838U,  // <5,2,4,3>: Cost 3 vext3 <2,3,4,5>, <2,4,3,5>
+  2698626848U,  // <5,2,4,4>: Cost 3 vext3 <2,3,4,5>, <2,4,4,6>
+  2628914486U,  // <5,2,4,5>: Cost 3 vext2 <1,u,5,2>, RHS
+  2645503353U,  // <5,2,4,6>: Cost 3 vext2 <4,6,5,2>, <4,6,5,2>
+  3304146826U,  // <5,2,4,7>: Cost 4 vrev <2,5,7,4>
+  2628914729U,  // <5,2,4,u>: Cost 3 vext2 <1,u,5,2>, RHS
+  2553643110U,  // <5,2,5,0>: Cost 3 vext1 <0,5,2,5>, LHS
+  3758950227U,  // <5,2,5,1>: Cost 4 vext3 <0,1,2,5>, <2,5,1,3>
+  3759761248U,  // <5,2,5,2>: Cost 4 vext3 <0,2,4,5>, <2,5,2,7>
+  2982396006U,  // <5,2,5,3>: Cost 3 vzipr <4,u,5,5>, LHS
+  2553646390U,  // <5,2,5,4>: Cost 3 vext1 <0,5,2,5>, RHS
+  2553647108U,  // <5,2,5,5>: Cost 3 vext1 <0,5,2,5>, <5,5,5,5>
+  3760867204U,  // <5,2,5,6>: Cost 4 vext3 <0,4,1,5>, <2,5,6,7>
+  3702657141U,  // <5,2,5,7>: Cost 4 vext2 <1,u,5,2>, <5,7,0,1>
+  2982396011U,  // <5,2,5,u>: Cost 3 vzipr <4,u,5,5>, LHS
+  3627393126U,  // <5,2,6,0>: Cost 4 vext1 <0,5,2,6>, LHS
+  3760867236U,  // <5,2,6,1>: Cost 4 vext3 <0,4,1,5>, <2,6,1,3>
+  2645504506U,  // <5,2,6,2>: Cost 3 vext2 <4,6,5,2>, <6,2,7,3>
+  2687125434U,  // <5,2,6,3>: Cost 3 vext3 <0,4,1,5>, <2,6,3,7>
+  2700617665U,  // <5,2,6,4>: Cost 3 vext3 <2,6,4,5>, <2,6,4,5>
+  3760867276U,  // <5,2,6,5>: Cost 4 vext3 <0,4,1,5>, <2,6,5,7>
+  3763521493U,  // <5,2,6,6>: Cost 4 vext3 <0,u,1,5>, <2,6,6,7>
+  3719246670U,  // <5,2,6,7>: Cost 4 vext2 <4,6,5,2>, <6,7,0,1>
+  2687125479U,  // <5,2,6,u>: Cost 3 vext3 <0,4,1,5>, <2,6,u,7>
+  2565603430U,  // <5,2,7,0>: Cost 3 vext1 <2,5,2,7>, LHS
+  2553660150U,  // <5,2,7,1>: Cost 3 vext1 <0,5,2,7>, <1,0,3,2>
+  2565605216U,  // <5,2,7,2>: Cost 3 vext1 <2,5,2,7>, <2,5,2,7>
+  2961178726U,  // <5,2,7,3>: Cost 3 vzipr <1,3,5,7>, LHS
+  2565606710U,  // <5,2,7,4>: Cost 3 vext1 <2,5,2,7>, RHS
+  4034920552U,  // <5,2,7,5>: Cost 4 vzipr <1,3,5,7>, <0,1,2,5>
+  3114713292U,  // <5,2,7,6>: Cost 3 vtrnr RHS, <0,2,4,6>
+  3702658668U,  // <5,2,7,7>: Cost 4 vext2 <1,u,5,2>, <7,7,7,7>
+  2961178731U,  // <5,2,7,u>: Cost 3 vzipr <1,3,5,7>, LHS
+  2687125563U,  // <5,2,u,0>: Cost 3 vext3 <0,4,1,5>, <2,u,0,1>
+  2628917038U,  // <5,2,u,1>: Cost 3 vext2 <1,u,5,2>, LHS
+  2565613409U,  // <5,2,u,2>: Cost 3 vext1 <2,5,2,u>, <2,5,2,u>
+  2687125592U,  // <5,2,u,3>: Cost 3 vext3 <0,4,1,5>, <2,u,3,3>
+  1628203107U,  // <5,2,u,4>: Cost 2 vext3 <2,u,4,5>, <2,u,4,5>
+  2628917402U,  // <5,2,u,5>: Cost 3 vext2 <1,u,5,2>, RHS
+  2702092405U,  // <5,2,u,6>: Cost 3 vext3 <2,u,6,5>, <2,u,6,5>
+  3304179598U,  // <5,2,u,7>: Cost 4 vrev <2,5,7,u>
+  1628498055U,  // <5,2,u,u>: Cost 2 vext3 <2,u,u,5>, <2,u,u,5>
+  3760867467U,  // <5,3,0,0>: Cost 4 vext3 <0,4,1,5>, <3,0,0,0>
+  2687125654U,  // <5,3,0,1>: Cost 3 vext3 <0,4,1,5>, <3,0,1,2>
+  3759761565U,  // <5,3,0,2>: Cost 4 vext3 <0,2,4,5>, <3,0,2,0>
+  3633391766U,  // <5,3,0,3>: Cost 4 vext1 <1,5,3,0>, <3,0,1,2>
+  2687125680U,  // <5,3,0,4>: Cost 3 vext3 <0,4,1,5>, <3,0,4,1>
+  3760277690U,  // <5,3,0,5>: Cost 4 vext3 <0,3,2,5>, <3,0,5,2>
+  3310013014U,  // <5,3,0,6>: Cost 4 vrev <3,5,6,0>
+  2236344927U,  // <5,3,0,7>: Cost 3 vrev <3,5,7,0>
+  2687125717U,  // <5,3,0,u>: Cost 3 vext3 <0,4,1,5>, <3,0,u,2>
+  3760867551U,  // <5,3,1,0>: Cost 4 vext3 <0,4,1,5>, <3,1,0,3>
+  3760867558U,  // <5,3,1,1>: Cost 4 vext3 <0,4,1,5>, <3,1,1,1>
+  2624938923U,  // <5,3,1,2>: Cost 3 vext2 <1,2,5,3>, <1,2,5,3>
+  2703198460U,  // <5,3,1,3>: Cost 3 vext3 <3,1,3,5>, <3,1,3,5>
+  3760867587U,  // <5,3,1,4>: Cost 4 vext3 <0,4,1,5>, <3,1,4,3>
+  2636219536U,  // <5,3,1,5>: Cost 3 vext2 <3,1,5,3>, <1,5,3,7>
+  3698681075U,  // <5,3,1,6>: Cost 4 vext2 <1,2,5,3>, <1,6,5,7>
+  2703493408U,  // <5,3,1,7>: Cost 3 vext3 <3,1,7,5>, <3,1,7,5>
+  2628920721U,  // <5,3,1,u>: Cost 3 vext2 <1,u,5,3>, <1,u,5,3>
+  3766765870U,  // <5,3,2,0>: Cost 4 vext3 <1,4,0,5>, <3,2,0,1>
+  3698681379U,  // <5,3,2,1>: Cost 4 vext2 <1,2,5,3>, <2,1,3,5>
+  3760867649U,  // <5,3,2,2>: Cost 4 vext3 <0,4,1,5>, <3,2,2,2>
+  2698627404U,  // <5,3,2,3>: Cost 3 vext3 <2,3,4,5>, <3,2,3,4>
+  2703935830U,  // <5,3,2,4>: Cost 3 vext3 <3,2,4,5>, <3,2,4,5>
+  2698627422U,  // <5,3,2,5>: Cost 3 vext3 <2,3,4,5>, <3,2,5,4>
+  3760867686U,  // <5,3,2,6>: Cost 4 vext3 <0,4,1,5>, <3,2,6,3>
+  3769788783U,  // <5,3,2,7>: Cost 4 vext3 <1,u,5,5>, <3,2,7,3>
+  2701945209U,  // <5,3,2,u>: Cost 3 vext3 <2,u,4,5>, <3,2,u,4>
+  3760867711U,  // <5,3,3,0>: Cost 4 vext3 <0,4,1,5>, <3,3,0,1>
+  2636220684U,  // <5,3,3,1>: Cost 3 vext2 <3,1,5,3>, <3,1,5,3>
+  3772369298U,  // <5,3,3,2>: Cost 4 vext3 <2,3,4,5>, <3,3,2,2>
+  2687125916U,  // <5,3,3,3>: Cost 3 vext3 <0,4,1,5>, <3,3,3,3>
+  2704599463U,  // <5,3,3,4>: Cost 3 vext3 <3,3,4,5>, <3,3,4,5>
+  2704673200U,  // <5,3,3,5>: Cost 3 vext3 <3,3,5,5>, <3,3,5,5>
+  3709962935U,  // <5,3,3,6>: Cost 4 vext2 <3,1,5,3>, <3,6,7,7>
+  3772369346U,  // <5,3,3,7>: Cost 4 vext3 <2,3,4,5>, <3,3,7,5>
+  2704894411U,  // <5,3,3,u>: Cost 3 vext3 <3,3,u,5>, <3,3,u,5>
+  2704968148U,  // <5,3,4,0>: Cost 3 vext3 <3,4,0,5>, <3,4,0,5>
+  3698682850U,  // <5,3,4,1>: Cost 4 vext2 <1,2,5,3>, <4,1,5,0>
+  2642857014U,  // <5,3,4,2>: Cost 3 vext2 <4,2,5,3>, <4,2,5,3>
+  2705189359U,  // <5,3,4,3>: Cost 3 vext3 <3,4,3,5>, <3,4,3,5>
+  2705263096U,  // <5,3,4,4>: Cost 3 vext3 <3,4,4,5>, <3,4,4,5>
+  2685946370U,  // <5,3,4,5>: Cost 3 vext3 <0,2,3,5>, <3,4,5,6>
+  3779152394U,  // <5,3,4,6>: Cost 4 vext3 <3,4,6,5>, <3,4,6,5>
+  2236377699U,  // <5,3,4,7>: Cost 3 vrev <3,5,7,4>
+  2687126045U,  // <5,3,4,u>: Cost 3 vext3 <0,4,1,5>, <3,4,u,6>
+  2571632742U,  // <5,3,5,0>: Cost 3 vext1 <3,5,3,5>, LHS
+  2559689870U,  // <5,3,5,1>: Cost 3 vext1 <1,5,3,5>, <1,5,3,5>
+  2571634382U,  // <5,3,5,2>: Cost 3 vext1 <3,5,3,5>, <2,3,4,5>
+  2571635264U,  // <5,3,5,3>: Cost 3 vext1 <3,5,3,5>, <3,5,3,5>
+  2571636022U,  // <5,3,5,4>: Cost 3 vext1 <3,5,3,5>, RHS
+  2559692804U,  // <5,3,5,5>: Cost 3 vext1 <1,5,3,5>, <5,5,5,5>
+  3720581218U,  // <5,3,5,6>: Cost 4 vext2 <4,u,5,3>, <5,6,7,0>
+  2236385892U,  // <5,3,5,7>: Cost 3 vrev <3,5,7,5>
+  2571638574U,  // <5,3,5,u>: Cost 3 vext1 <3,5,3,5>, LHS
+  2565668966U,  // <5,3,6,0>: Cost 3 vext1 <2,5,3,6>, LHS
+  3633439887U,  // <5,3,6,1>: Cost 4 vext1 <1,5,3,6>, <1,5,3,6>
+  2565670760U,  // <5,3,6,2>: Cost 3 vext1 <2,5,3,6>, <2,5,3,6>
+  2565671426U,  // <5,3,6,3>: Cost 3 vext1 <2,5,3,6>, <3,4,5,6>
+  2565672246U,  // <5,3,6,4>: Cost 3 vext1 <2,5,3,6>, RHS
+  3639414630U,  // <5,3,6,5>: Cost 4 vext1 <2,5,3,6>, <5,3,6,0>
+  4047521640U,  // <5,3,6,6>: Cost 4 vzipr <3,4,5,6>, <2,5,3,6>
+  2725169844U,  // <5,3,6,7>: Cost 3 vext3 <6,7,4,5>, <3,6,7,4>
+  2565674798U,  // <5,3,6,u>: Cost 3 vext1 <2,5,3,6>, LHS
+  1485963366U,  // <5,3,7,0>: Cost 2 vext1 <1,5,3,7>, LHS
+  1485964432U,  // <5,3,7,1>: Cost 2 vext1 <1,5,3,7>, <1,5,3,7>
+  2559706728U,  // <5,3,7,2>: Cost 3 vext1 <1,5,3,7>, <2,2,2,2>
+  2559707286U,  // <5,3,7,3>: Cost 3 vext1 <1,5,3,7>, <3,0,1,2>
+  1485966646U,  // <5,3,7,4>: Cost 2 vext1 <1,5,3,7>, RHS
+  2559708880U,  // <5,3,7,5>: Cost 3 vext1 <1,5,3,7>, <5,1,7,3>
+  2601513466U,  // <5,3,7,6>: Cost 3 vext1 <u,5,3,7>, <6,2,7,3>
+  3114714112U,  // <5,3,7,7>: Cost 3 vtrnr RHS, <1,3,5,7>
+  1485969198U,  // <5,3,7,u>: Cost 2 vext1 <1,5,3,7>, LHS
+  1485971558U,  // <5,3,u,0>: Cost 2 vext1 <1,5,3,u>, LHS
+  1485972625U,  // <5,3,u,1>: Cost 2 vext1 <1,5,3,u>, <1,5,3,u>
+  2559714920U,  // <5,3,u,2>: Cost 3 vext1 <1,5,3,u>, <2,2,2,2>
+  2559715478U,  // <5,3,u,3>: Cost 3 vext1 <1,5,3,u>, <3,0,1,2>
+  1485974838U,  // <5,3,u,4>: Cost 2 vext1 <1,5,3,u>, RHS
+  2687126342U,  // <5,3,u,5>: Cost 3 vext3 <0,4,1,5>, <3,u,5,6>
+  2601521658U,  // <5,3,u,6>: Cost 3 vext1 <u,5,3,u>, <6,2,7,3>
+  2236410471U,  // <5,3,u,7>: Cost 3 vrev <3,5,7,u>
+  1485977390U,  // <5,3,u,u>: Cost 2 vext1 <1,5,3,u>, LHS
+  3627491430U,  // <5,4,0,0>: Cost 4 vext1 <0,5,4,0>, LHS
+  2636890214U,  // <5,4,0,1>: Cost 3 vext2 <3,2,5,4>, LHS
+  3703333028U,  // <5,4,0,2>: Cost 4 vext2 <2,0,5,4>, <0,2,0,2>
+  3782249348U,  // <5,4,0,3>: Cost 4 vext3 <4,0,3,5>, <4,0,3,5>
+  2642198866U,  // <5,4,0,4>: Cost 3 vext2 <4,1,5,4>, <0,4,1,5>
+  2687126418U,  // <5,4,0,5>: Cost 3 vext3 <0,4,1,5>, <4,0,5,1>
+  2242243887U,  // <5,4,0,6>: Cost 3 vrev <4,5,6,0>
+  3316059448U,  // <5,4,0,7>: Cost 4 vrev <4,5,7,0>
+  2636890781U,  // <5,4,0,u>: Cost 3 vext2 <3,2,5,4>, LHS
+  2241809658U,  // <5,4,1,0>: Cost 3 vrev <4,5,0,1>
+  3698025307U,  // <5,4,1,1>: Cost 4 vext2 <1,1,5,4>, <1,1,5,4>
+  3698688940U,  // <5,4,1,2>: Cost 4 vext2 <1,2,5,4>, <1,2,5,4>
+  3698689024U,  // <5,4,1,3>: Cost 4 vext2 <1,2,5,4>, <1,3,5,7>
+  3700016206U,  // <5,4,1,4>: Cost 4 vext2 <1,4,5,4>, <1,4,5,4>
+  2687126498U,  // <5,4,1,5>: Cost 3 vext3 <0,4,1,5>, <4,1,5,0>
+  3760868336U,  // <5,4,1,6>: Cost 4 vext3 <0,4,1,5>, <4,1,6,5>
+  3316067641U,  // <5,4,1,7>: Cost 4 vrev <4,5,7,1>
+  2242399554U,  // <5,4,1,u>: Cost 3 vrev <4,5,u,1>
+  3703334371U,  // <5,4,2,0>: Cost 4 vext2 <2,0,5,4>, <2,0,5,4>
+  3703998004U,  // <5,4,2,1>: Cost 4 vext2 <2,1,5,4>, <2,1,5,4>
+  3704661637U,  // <5,4,2,2>: Cost 4 vext2 <2,2,5,4>, <2,2,5,4>
+  2636891854U,  // <5,4,2,3>: Cost 3 vext2 <3,2,5,4>, <2,3,4,5>
+  3705988903U,  // <5,4,2,4>: Cost 4 vext2 <2,4,5,4>, <2,4,5,4>
+  2698628150U,  // <5,4,2,5>: Cost 3 vext3 <2,3,4,5>, <4,2,5,3>
+  3760868415U,  // <5,4,2,6>: Cost 4 vext3 <0,4,1,5>, <4,2,6,3>
+  3783871562U,  // <5,4,2,7>: Cost 4 vext3 <4,2,7,5>, <4,2,7,5>
+  2666752099U,  // <5,4,2,u>: Cost 3 vext2 <u,2,5,4>, <2,u,4,5>
+  3639459942U,  // <5,4,3,0>: Cost 4 vext1 <2,5,4,3>, LHS
+  3709970701U,  // <5,4,3,1>: Cost 4 vext2 <3,1,5,4>, <3,1,5,4>
+  2636892510U,  // <5,4,3,2>: Cost 3 vext2 <3,2,5,4>, <3,2,5,4>
+  3710634396U,  // <5,4,3,3>: Cost 4 vext2 <3,2,5,4>, <3,3,3,3>
+  2638219776U,  // <5,4,3,4>: Cost 3 vext2 <3,4,5,4>, <3,4,5,4>
+  3766987908U,  // <5,4,3,5>: Cost 4 vext3 <1,4,3,5>, <4,3,5,0>
+  2710719634U,  // <5,4,3,6>: Cost 3 vext3 <4,3,6,5>, <4,3,6,5>
+  3914097664U,  // <5,4,3,7>: Cost 4 vuzpr <3,5,7,4>, <1,3,5,7>
+  2640874308U,  // <5,4,3,u>: Cost 3 vext2 <3,u,5,4>, <3,u,5,4>
+  2583642214U,  // <5,4,4,0>: Cost 3 vext1 <5,5,4,4>, LHS
+  2642201574U,  // <5,4,4,1>: Cost 3 vext2 <4,1,5,4>, <4,1,5,4>
+  3710635062U,  // <5,4,4,2>: Cost 4 vext2 <3,2,5,4>, <4,2,5,3>
+  3717270664U,  // <5,4,4,3>: Cost 4 vext2 <4,3,5,4>, <4,3,5,4>
+  2713963728U,  // <5,4,4,4>: Cost 3 vext3 <4,u,5,5>, <4,4,4,4>
+  1637567706U,  // <5,4,4,5>: Cost 2 vext3 <4,4,5,5>, <4,4,5,5>
+  2242276659U,  // <5,4,4,6>: Cost 3 vrev <4,5,6,4>
+  2646183372U,  // <5,4,4,7>: Cost 3 vext2 <4,7,5,4>, <4,7,5,4>
+  1637788917U,  // <5,4,4,u>: Cost 2 vext3 <4,4,u,5>, <4,4,u,5>
+  2559762534U,  // <5,4,5,0>: Cost 3 vext1 <1,5,4,5>, LHS
+  2559763607U,  // <5,4,5,1>: Cost 3 vext1 <1,5,4,5>, <1,5,4,5>
+  2698628366U,  // <5,4,5,2>: Cost 3 vext3 <2,3,4,5>, <4,5,2,3>
+  3633506454U,  // <5,4,5,3>: Cost 4 vext1 <1,5,4,5>, <3,0,1,2>
+  2559765814U,  // <5,4,5,4>: Cost 3 vext1 <1,5,4,5>, RHS
+  2583654395U,  // <5,4,5,5>: Cost 3 vext1 <5,5,4,5>, <5,5,4,5>
+  1613385014U,  // <5,4,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+  3901639990U,  // <5,4,5,7>: Cost 4 vuzpr <1,5,0,4>, RHS
+  1613385032U,  // <5,4,5,u>: Cost 2 vext3 <0,4,1,5>, RHS
+  2559770726U,  // <5,4,6,0>: Cost 3 vext1 <1,5,4,6>, LHS
+  2559771648U,  // <5,4,6,1>: Cost 3 vext1 <1,5,4,6>, <1,3,5,7>
+  3633514088U,  // <5,4,6,2>: Cost 4 vext1 <1,5,4,6>, <2,2,2,2>
+  2571717122U,  // <5,4,6,3>: Cost 3 vext1 <3,5,4,6>, <3,4,5,6>
+  2559774006U,  // <5,4,6,4>: Cost 3 vext1 <1,5,4,6>, RHS
+  2712636796U,  // <5,4,6,5>: Cost 3 vext3 <4,6,5,5>, <4,6,5,5>
+  3760868743U,  // <5,4,6,6>: Cost 4 vext3 <0,4,1,5>, <4,6,6,7>
+  2712784270U,  // <5,4,6,7>: Cost 3 vext3 <4,6,7,5>, <4,6,7,5>
+  2559776558U,  // <5,4,6,u>: Cost 3 vext1 <1,5,4,6>, LHS
+  2565750886U,  // <5,4,7,0>: Cost 3 vext1 <2,5,4,7>, LHS
+  2565751706U,  // <5,4,7,1>: Cost 3 vext1 <2,5,4,7>, <1,2,3,4>
+  2565752690U,  // <5,4,7,2>: Cost 3 vext1 <2,5,4,7>, <2,5,4,7>
+  2571725387U,  // <5,4,7,3>: Cost 3 vext1 <3,5,4,7>, <3,5,4,7>
+  2565754166U,  // <5,4,7,4>: Cost 3 vext1 <2,5,4,7>, RHS
+  3114713426U,  // <5,4,7,5>: Cost 3 vtrnr RHS, <0,4,1,5>
+    94817590U,  // <5,4,7,6>: Cost 1 vrev RHS
+  2595616175U,  // <5,4,7,7>: Cost 3 vext1 <7,5,4,7>, <7,5,4,7>
+    94965064U,  // <5,4,7,u>: Cost 1 vrev RHS
+  2559787110U,  // <5,4,u,0>: Cost 3 vext1 <1,5,4,u>, LHS
+  2559788186U,  // <5,4,u,1>: Cost 3 vext1 <1,5,4,u>, <1,5,4,u>
+  2242014483U,  // <5,4,u,2>: Cost 3 vrev <4,5,2,u>
+  2667419628U,  // <5,4,u,3>: Cost 3 vext2 <u,3,5,4>, <u,3,5,4>
+  2559790390U,  // <5,4,u,4>: Cost 3 vext1 <1,5,4,u>, RHS
+  1640222238U,  // <5,4,u,5>: Cost 2 vext3 <4,u,5,5>, <4,u,5,5>
+    94825783U,  // <5,4,u,6>: Cost 1 vrev RHS
+  2714111536U,  // <5,4,u,7>: Cost 3 vext3 <4,u,7,5>, <4,u,7,5>
+    94973257U,  // <5,4,u,u>: Cost 1 vrev RHS
+  2646851584U,  // <5,5,0,0>: Cost 3 vext2 <4,u,5,5>, <0,0,0,0>
+  1573109862U,  // <5,5,0,1>: Cost 2 vext2 <4,u,5,5>, LHS
+  2646851748U,  // <5,5,0,2>: Cost 3 vext2 <4,u,5,5>, <0,2,0,2>
+  3760279130U,  // <5,5,0,3>: Cost 4 vext3 <0,3,2,5>, <5,0,3,2>
+  2687127138U,  // <5,5,0,4>: Cost 3 vext3 <0,4,1,5>, <5,0,4,1>
+  2248142847U,  // <5,5,0,5>: Cost 3 vrev <5,5,5,0>
+  3720593910U,  // <5,5,0,6>: Cost 4 vext2 <4,u,5,5>, <0,6,1,7>
+  4182502710U,  // <5,5,0,7>: Cost 4 vtrnr <3,5,7,0>, RHS
+  1573110429U,  // <5,5,0,u>: Cost 2 vext2 <4,u,5,5>, LHS
+  2646852342U,  // <5,5,1,0>: Cost 3 vext2 <4,u,5,5>, <1,0,3,2>
+  2624291676U,  // <5,5,1,1>: Cost 3 vext2 <1,1,5,5>, <1,1,5,5>
+  2646852502U,  // <5,5,1,2>: Cost 3 vext2 <4,u,5,5>, <1,2,3,0>
+  2646852568U,  // <5,5,1,3>: Cost 3 vext2 <4,u,5,5>, <1,3,1,3>
+  2715217591U,  // <5,5,1,4>: Cost 3 vext3 <5,1,4,5>, <5,1,4,5>
+  2628936848U,  // <5,5,1,5>: Cost 3 vext2 <1,u,5,5>, <1,5,3,7>
+  3698033907U,  // <5,5,1,6>: Cost 4 vext2 <1,1,5,5>, <1,6,5,7>
+  2713964240U,  // <5,5,1,7>: Cost 3 vext3 <4,u,5,5>, <5,1,7,3>
+  2628937107U,  // <5,5,1,u>: Cost 3 vext2 <1,u,5,5>, <1,u,5,5>
+  3645497446U,  // <5,5,2,0>: Cost 4 vext1 <3,5,5,2>, LHS
+  3760869099U,  // <5,5,2,1>: Cost 4 vext3 <0,4,1,5>, <5,2,1,3>
+  2646853224U,  // <5,5,2,2>: Cost 3 vext2 <4,u,5,5>, <2,2,2,2>
+  2698628862U,  // <5,5,2,3>: Cost 3 vext3 <2,3,4,5>, <5,2,3,4>
+  3772370694U,  // <5,5,2,4>: Cost 4 vext3 <2,3,4,5>, <5,2,4,3>
+  2713964303U,  // <5,5,2,5>: Cost 3 vext3 <4,u,5,5>, <5,2,5,3>
+  2646853562U,  // <5,5,2,6>: Cost 3 vext2 <4,u,5,5>, <2,6,3,7>
+  4038198272U,  // <5,5,2,7>: Cost 4 vzipr <1,u,5,2>, <1,3,5,7>
+  2701946667U,  // <5,5,2,u>: Cost 3 vext3 <2,u,4,5>, <5,2,u,4>
+  2646853782U,  // <5,5,3,0>: Cost 3 vext2 <4,u,5,5>, <3,0,1,2>
+  3698034922U,  // <5,5,3,1>: Cost 4 vext2 <1,1,5,5>, <3,1,1,5>
+  3702679919U,  // <5,5,3,2>: Cost 4 vext2 <1,u,5,5>, <3,2,7,3>
+  2637564336U,  // <5,5,3,3>: Cost 3 vext2 <3,3,5,5>, <3,3,5,5>
+  2646854146U,  // <5,5,3,4>: Cost 3 vext2 <4,u,5,5>, <3,4,5,6>
+  2638891602U,  // <5,5,3,5>: Cost 3 vext2 <3,5,5,5>, <3,5,5,5>
+  3702680247U,  // <5,5,3,6>: Cost 4 vext2 <1,u,5,5>, <3,6,7,7>
+  3702680259U,  // <5,5,3,7>: Cost 4 vext2 <1,u,5,5>, <3,7,0,1>
+  2646854430U,  // <5,5,3,u>: Cost 3 vext2 <4,u,5,5>, <3,u,1,2>
+  2646854546U,  // <5,5,4,0>: Cost 3 vext2 <4,u,5,5>, <4,0,5,1>
+  2642209767U,  // <5,5,4,1>: Cost 3 vext2 <4,1,5,5>, <4,1,5,5>
+  3711306806U,  // <5,5,4,2>: Cost 4 vext2 <3,3,5,5>, <4,2,5,3>
+  3645516369U,  // <5,5,4,3>: Cost 4 vext1 <3,5,5,4>, <3,5,5,4>
+  1570458842U,  // <5,5,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+  1573113142U,  // <5,5,4,5>: Cost 2 vext2 <4,u,5,5>, RHS
+  2645527932U,  // <5,5,4,6>: Cost 3 vext2 <4,6,5,5>, <4,6,5,5>
+  2713964486U,  // <5,5,4,7>: Cost 3 vext3 <4,u,5,5>, <5,4,7,6>
+  1573113374U,  // <5,5,4,u>: Cost 2 vext2 <4,u,5,5>, <4,u,5,5>
+  1509982310U,  // <5,5,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+  2646855376U,  // <5,5,5,1>: Cost 3 vext2 <4,u,5,5>, <5,1,7,3>
+  2583725672U,  // <5,5,5,2>: Cost 3 vext1 <5,5,5,5>, <2,2,2,2>
+  2583726230U,  // <5,5,5,3>: Cost 3 vext1 <5,5,5,5>, <3,0,1,2>
+  1509985590U,  // <5,5,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+   229035318U,  // <5,5,5,5>: Cost 1 vdup1 RHS
+  2646855778U,  // <5,5,5,6>: Cost 3 vext2 <4,u,5,5>, <5,6,7,0>
+  2646855848U,  // <5,5,5,7>: Cost 3 vext2 <4,u,5,5>, <5,7,5,7>
+   229035318U,  // <5,5,5,u>: Cost 1 vdup1 RHS
+  2577760358U,  // <5,5,6,0>: Cost 3 vext1 <4,5,5,6>, LHS
+  3633587361U,  // <5,5,6,1>: Cost 4 vext1 <1,5,5,6>, <1,5,5,6>
+  2646856186U,  // <5,5,6,2>: Cost 3 vext2 <4,u,5,5>, <6,2,7,3>
+  3633588738U,  // <5,5,6,3>: Cost 4 vext1 <1,5,5,6>, <3,4,5,6>
+  2718535756U,  // <5,5,6,4>: Cost 3 vext3 <5,6,4,5>, <5,6,4,5>
+  2644202223U,  // <5,5,6,5>: Cost 3 vext2 <4,4,5,5>, <6,5,7,5>
+  2973780482U,  // <5,5,6,6>: Cost 3 vzipr <3,4,5,6>, <3,4,5,6>
+  2646856526U,  // <5,5,6,7>: Cost 3 vext2 <4,u,5,5>, <6,7,0,1>
+  2646856607U,  // <5,5,6,u>: Cost 3 vext2 <4,u,5,5>, <6,u,0,1>
+  2571796582U,  // <5,5,7,0>: Cost 3 vext1 <3,5,5,7>, LHS
+  3633595392U,  // <5,5,7,1>: Cost 4 vext1 <1,5,5,7>, <1,3,5,7>
+  2571798222U,  // <5,5,7,2>: Cost 3 vext1 <3,5,5,7>, <2,3,4,5>
+  2571799124U,  // <5,5,7,3>: Cost 3 vext1 <3,5,5,7>, <3,5,5,7>
+  2571799862U,  // <5,5,7,4>: Cost 3 vext1 <3,5,5,7>, RHS
+  3114717188U,  // <5,5,7,5>: Cost 3 vtrnr RHS, <5,5,5,5>
+  4034923010U,  // <5,5,7,6>: Cost 4 vzipr <1,3,5,7>, <3,4,5,6>
+  2040974646U,  // <5,5,7,7>: Cost 2 vtrnr RHS, RHS
+  2040974647U,  // <5,5,7,u>: Cost 2 vtrnr RHS, RHS
+  1509982310U,  // <5,5,u,0>: Cost 2 vext1 <5,5,5,5>, LHS
+  1573115694U,  // <5,5,u,1>: Cost 2 vext2 <4,u,5,5>, LHS
+  2571806414U,  // <5,5,u,2>: Cost 3 vext1 <3,5,5,u>, <2,3,4,5>
+  2571807317U,  // <5,5,u,3>: Cost 3 vext1 <3,5,5,u>, <3,5,5,u>
+  1509985590U,  // <5,5,u,4>: Cost 2 vext1 <5,5,5,5>, RHS
+   229035318U,  // <5,5,u,5>: Cost 1 vdup1 RHS
+  2646857936U,  // <5,5,u,6>: Cost 3 vext2 <4,u,5,5>, <u,6,3,7>
+  2040982838U,  // <5,5,u,7>: Cost 2 vtrnr RHS, RHS
+   229035318U,  // <5,5,u,u>: Cost 1 vdup1 RHS
+  2638233600U,  // <5,6,0,0>: Cost 3 vext2 <3,4,5,6>, <0,0,0,0>
+  1564491878U,  // <5,6,0,1>: Cost 2 vext2 <3,4,5,6>, LHS
+  2632261796U,  // <5,6,0,2>: Cost 3 vext2 <2,4,5,6>, <0,2,0,2>
+  2638233856U,  // <5,6,0,3>: Cost 3 vext2 <3,4,5,6>, <0,3,1,4>
+  2638233938U,  // <5,6,0,4>: Cost 3 vext2 <3,4,5,6>, <0,4,1,5>
+  3706003885U,  // <5,6,0,5>: Cost 4 vext2 <2,4,5,6>, <0,5,2,6>
+  3706003967U,  // <5,6,0,6>: Cost 4 vext2 <2,4,5,6>, <0,6,2,7>
+  4047473974U,  // <5,6,0,7>: Cost 4 vzipr <3,4,5,0>, RHS
+  1564492445U,  // <5,6,0,u>: Cost 2 vext2 <3,4,5,6>, LHS
+  2638234358U,  // <5,6,1,0>: Cost 3 vext2 <3,4,5,6>, <1,0,3,2>
+  2638234420U,  // <5,6,1,1>: Cost 3 vext2 <3,4,5,6>, <1,1,1,1>
+  2638234518U,  // <5,6,1,2>: Cost 3 vext2 <3,4,5,6>, <1,2,3,0>
+  2638234584U,  // <5,6,1,3>: Cost 3 vext2 <3,4,5,6>, <1,3,1,3>
+  2626290768U,  // <5,6,1,4>: Cost 3 vext2 <1,4,5,6>, <1,4,5,6>
+  2638234768U,  // <5,6,1,5>: Cost 3 vext2 <3,4,5,6>, <1,5,3,7>
+  3700032719U,  // <5,6,1,6>: Cost 4 vext2 <1,4,5,6>, <1,6,1,7>
+  2982366518U,  // <5,6,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+  2628945300U,  // <5,6,1,u>: Cost 3 vext2 <1,u,5,6>, <1,u,5,6>
+  3706004925U,  // <5,6,2,0>: Cost 4 vext2 <2,4,5,6>, <2,0,1,2>
+  3711976966U,  // <5,6,2,1>: Cost 4 vext2 <3,4,5,6>, <2,1,0,3>
+  2638235240U,  // <5,6,2,2>: Cost 3 vext2 <3,4,5,6>, <2,2,2,2>
+  2638235302U,  // <5,6,2,3>: Cost 3 vext2 <3,4,5,6>, <2,3,0,1>
+  2632263465U,  // <5,6,2,4>: Cost 3 vext2 <2,4,5,6>, <2,4,5,6>
+  2638235496U,  // <5,6,2,5>: Cost 3 vext2 <3,4,5,6>, <2,5,3,6>
+  2638235578U,  // <5,6,2,6>: Cost 3 vext2 <3,4,5,6>, <2,6,3,7>
+  2713965050U,  // <5,6,2,7>: Cost 3 vext3 <4,u,5,5>, <6,2,7,3>
+  2634917997U,  // <5,6,2,u>: Cost 3 vext2 <2,u,5,6>, <2,u,5,6>
+  2638235798U,  // <5,6,3,0>: Cost 3 vext2 <3,4,5,6>, <3,0,1,2>
+  3711977695U,  // <5,6,3,1>: Cost 4 vext2 <3,4,5,6>, <3,1,0,3>
+  3710650720U,  // <5,6,3,2>: Cost 4 vext2 <3,2,5,6>, <3,2,5,6>
+  2638236060U,  // <5,6,3,3>: Cost 3 vext2 <3,4,5,6>, <3,3,3,3>
+  1564494338U,  // <5,6,3,4>: Cost 2 vext2 <3,4,5,6>, <3,4,5,6>
+  2638236234U,  // <5,6,3,5>: Cost 3 vext2 <3,4,5,6>, <3,5,4,6>
+  3711978104U,  // <5,6,3,6>: Cost 4 vext2 <3,4,5,6>, <3,6,0,7>
+  4034227510U,  // <5,6,3,7>: Cost 4 vzipr <1,2,5,3>, RHS
+  1567148870U,  // <5,6,3,u>: Cost 2 vext2 <3,u,5,6>, <3,u,5,6>
+  2577817702U,  // <5,6,4,0>: Cost 3 vext1 <4,5,6,4>, LHS
+  3700034544U,  // <5,6,4,1>: Cost 4 vext2 <1,4,5,6>, <4,1,6,5>
+  2723033713U,  // <5,6,4,2>: Cost 3 vext3 <6,4,2,5>, <6,4,2,5>
+  2638236818U,  // <5,6,4,3>: Cost 3 vext2 <3,4,5,6>, <4,3,6,5>
+  2644208859U,  // <5,6,4,4>: Cost 3 vext2 <4,4,5,6>, <4,4,5,6>
+  1564495158U,  // <5,6,4,5>: Cost 2 vext2 <3,4,5,6>, RHS
+  2645536125U,  // <5,6,4,6>: Cost 3 vext2 <4,6,5,6>, <4,6,5,6>
+  2723402398U,  // <5,6,4,7>: Cost 3 vext3 <6,4,7,5>, <6,4,7,5>
+  1564495401U,  // <5,6,4,u>: Cost 2 vext2 <3,4,5,6>, RHS
+  2577825894U,  // <5,6,5,0>: Cost 3 vext1 <4,5,6,5>, LHS
+  2662125264U,  // <5,6,5,1>: Cost 3 vext2 <7,4,5,6>, <5,1,7,3>
+  3775836867U,  // <5,6,5,2>: Cost 4 vext3 <2,u,6,5>, <6,5,2,6>
+  3711979343U,  // <5,6,5,3>: Cost 4 vext2 <3,4,5,6>, <5,3,3,4>
+  2650181556U,  // <5,6,5,4>: Cost 3 vext2 <5,4,5,6>, <5,4,5,6>
+  2662125572U,  // <5,6,5,5>: Cost 3 vext2 <7,4,5,6>, <5,5,5,5>
+  2638237732U,  // <5,6,5,6>: Cost 3 vext2 <3,4,5,6>, <5,6,0,1>
+  2982399286U,  // <5,6,5,7>: Cost 3 vzipr <4,u,5,5>, RHS
+  2982399287U,  // <5,6,5,u>: Cost 3 vzipr <4,u,5,5>, RHS
+  2583806054U,  // <5,6,6,0>: Cost 3 vext1 <5,5,6,6>, LHS
+  3711979910U,  // <5,6,6,1>: Cost 4 vext2 <3,4,5,6>, <6,1,3,4>
+  2662126074U,  // <5,6,6,2>: Cost 3 vext2 <7,4,5,6>, <6,2,7,3>
+  2583808514U,  // <5,6,6,3>: Cost 3 vext1 <5,5,6,6>, <3,4,5,6>
+  2583809334U,  // <5,6,6,4>: Cost 3 vext1 <5,5,6,6>, RHS
+  2583810062U,  // <5,6,6,5>: Cost 3 vext1 <5,5,6,6>, <5,5,6,6>
+  2638238520U,  // <5,6,6,6>: Cost 3 vext2 <3,4,5,6>, <6,6,6,6>
+  2973781302U,  // <5,6,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+  2973781303U,  // <5,6,6,u>: Cost 3 vzipr <3,4,5,6>, RHS
+   430358630U,  // <5,6,7,0>: Cost 1 vext1 RHS, LHS
+  1504101110U,  // <5,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+  1504101992U,  // <5,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1504102550U,  // <5,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+   430361910U,  // <5,6,7,4>: Cost 1 vext1 RHS, RHS
+  1504104390U,  // <5,6,7,5>: Cost 2 vext1 RHS, <5,4,7,6>
+  1504105272U,  // <5,6,7,6>: Cost 2 vext1 RHS, <6,6,6,6>
+  1504106092U,  // <5,6,7,7>: Cost 2 vext1 RHS, <7,7,7,7>
+   430364462U,  // <5,6,7,u>: Cost 1 vext1 RHS, LHS
+   430366822U,  // <5,6,u,0>: Cost 1 vext1 RHS, LHS
+  1564497710U,  // <5,6,u,1>: Cost 2 vext2 <3,4,5,6>, LHS
+  1504110184U,  // <5,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1504110742U,  // <5,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+   430370103U,  // <5,6,u,4>: Cost 1 vext1 RHS, RHS
+  1564498074U,  // <5,6,u,5>: Cost 2 vext2 <3,4,5,6>, RHS
+  1504113146U,  // <5,6,u,6>: Cost 2 vext1 RHS, <6,2,7,3>
+  1504113658U,  // <5,6,u,7>: Cost 2 vext1 RHS, <7,0,1,2>
+   430372654U,  // <5,6,u,u>: Cost 1 vext1 RHS, LHS
+  2625634304U,  // <5,7,0,0>: Cost 3 vext2 <1,3,5,7>, <0,0,0,0>
+  1551892582U,  // <5,7,0,1>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625634468U,  // <5,7,0,2>: Cost 3 vext2 <1,3,5,7>, <0,2,0,2>
+  2571889247U,  // <5,7,0,3>: Cost 3 vext1 <3,5,7,0>, <3,5,7,0>
+  2625634642U,  // <5,7,0,4>: Cost 3 vext2 <1,3,5,7>, <0,4,1,5>
+  2595778728U,  // <5,7,0,5>: Cost 3 vext1 <7,5,7,0>, <5,7,5,7>
+  3699376639U,  // <5,7,0,6>: Cost 4 vext2 <1,3,5,7>, <0,6,2,7>
+  2260235715U,  // <5,7,0,7>: Cost 3 vrev <7,5,7,0>
+  1551893149U,  // <5,7,0,u>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625635062U,  // <5,7,1,0>: Cost 3 vext2 <1,3,5,7>, <1,0,3,2>
+  2624308020U,  // <5,7,1,1>: Cost 3 vext2 <1,1,5,7>, <1,1,1,1>
+  2625635222U,  // <5,7,1,2>: Cost 3 vext2 <1,3,5,7>, <1,2,3,0>
+  1551893504U,  // <5,7,1,3>: Cost 2 vext2 <1,3,5,7>, <1,3,5,7>
+  2571898166U,  // <5,7,1,4>: Cost 3 vext1 <3,5,7,1>, RHS
+  2625635472U,  // <5,7,1,5>: Cost 3 vext2 <1,3,5,7>, <1,5,3,7>
+  2627626227U,  // <5,7,1,6>: Cost 3 vext2 <1,6,5,7>, <1,6,5,7>
+  3702031684U,  // <5,7,1,7>: Cost 4 vext2 <1,7,5,7>, <1,7,5,7>
+  1555211669U,  // <5,7,1,u>: Cost 2 vext2 <1,u,5,7>, <1,u,5,7>
+  2629617126U,  // <5,7,2,0>: Cost 3 vext2 <2,0,5,7>, <2,0,5,7>
+  3699377670U,  // <5,7,2,1>: Cost 4 vext2 <1,3,5,7>, <2,1,0,3>
+  2625635944U,  // <5,7,2,2>: Cost 3 vext2 <1,3,5,7>, <2,2,2,2>
+  2625636006U,  // <5,7,2,3>: Cost 3 vext2 <1,3,5,7>, <2,3,0,1>
+  2632271658U,  // <5,7,2,4>: Cost 3 vext2 <2,4,5,7>, <2,4,5,7>
+  2625636201U,  // <5,7,2,5>: Cost 3 vext2 <1,3,5,7>, <2,5,3,7>
+  2625636282U,  // <5,7,2,6>: Cost 3 vext2 <1,3,5,7>, <2,6,3,7>
+  3708004381U,  // <5,7,2,7>: Cost 4 vext2 <2,7,5,7>, <2,7,5,7>
+  2625636411U,  // <5,7,2,u>: Cost 3 vext2 <1,3,5,7>, <2,u,0,1>
+  2625636502U,  // <5,7,3,0>: Cost 3 vext2 <1,3,5,7>, <3,0,1,2>
+  2625636604U,  // <5,7,3,1>: Cost 3 vext2 <1,3,5,7>, <3,1,3,5>
+  3699378478U,  // <5,7,3,2>: Cost 4 vext2 <1,3,5,7>, <3,2,0,1>
+  2625636764U,  // <5,7,3,3>: Cost 3 vext2 <1,3,5,7>, <3,3,3,3>
+  2625636866U,  // <5,7,3,4>: Cost 3 vext2 <1,3,5,7>, <3,4,5,6>
+  2625636959U,  // <5,7,3,5>: Cost 3 vext2 <1,3,5,7>, <3,5,7,0>
+  3699378808U,  // <5,7,3,6>: Cost 4 vext2 <1,3,5,7>, <3,6,0,7>
+  2640235254U,  // <5,7,3,7>: Cost 3 vext2 <3,7,5,7>, <3,7,5,7>
+  2625637150U,  // <5,7,3,u>: Cost 3 vext2 <1,3,5,7>, <3,u,1,2>
+  2571919462U,  // <5,7,4,0>: Cost 3 vext1 <3,5,7,4>, LHS
+  2571920384U,  // <5,7,4,1>: Cost 3 vext1 <3,5,7,4>, <1,3,5,7>
+  3699379260U,  // <5,7,4,2>: Cost 4 vext2 <1,3,5,7>, <4,2,6,0>
+  2571922019U,  // <5,7,4,3>: Cost 3 vext1 <3,5,7,4>, <3,5,7,4>
+  2571922742U,  // <5,7,4,4>: Cost 3 vext1 <3,5,7,4>, RHS
+  1551895862U,  // <5,7,4,5>: Cost 2 vext2 <1,3,5,7>, RHS
+  2846277980U,  // <5,7,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+  2646207951U,  // <5,7,4,7>: Cost 3 vext2 <4,7,5,7>, <4,7,5,7>
+  1551896105U,  // <5,7,4,u>: Cost 2 vext2 <1,3,5,7>, RHS
+  2583871590U,  // <5,7,5,0>: Cost 3 vext1 <5,5,7,5>, LHS
+  2652180176U,  // <5,7,5,1>: Cost 3 vext2 <5,7,5,7>, <5,1,7,3>
+  2625638177U,  // <5,7,5,2>: Cost 3 vext2 <1,3,5,7>, <5,2,7,3>
+  2625638262U,  // <5,7,5,3>: Cost 3 vext2 <1,3,5,7>, <5,3,7,7>
+  2583874870U,  // <5,7,5,4>: Cost 3 vext1 <5,5,7,5>, RHS
+  2846281732U,  // <5,7,5,5>: Cost 3 vuzpr RHS, <5,5,5,5>
+  2651517015U,  // <5,7,5,6>: Cost 3 vext2 <5,6,5,7>, <5,6,5,7>
+  1772539190U,  // <5,7,5,7>: Cost 2 vuzpr RHS, RHS
+  1772539191U,  // <5,7,5,u>: Cost 2 vuzpr RHS, RHS
+  2846281826U,  // <5,7,6,0>: Cost 3 vuzpr RHS, <5,6,7,0>
+  3699380615U,  // <5,7,6,1>: Cost 4 vext2 <1,3,5,7>, <6,1,3,5>
+  2846281108U,  // <5,7,6,2>: Cost 3 vuzpr RHS, <4,6,u,2>
+  2589854210U,  // <5,7,6,3>: Cost 3 vext1 <6,5,7,6>, <3,4,5,6>
+  2846281830U,  // <5,7,6,4>: Cost 3 vuzpr RHS, <5,6,7,4>
+  2725467658U,  // <5,7,6,5>: Cost 3 vext3 <6,7,u,5>, <7,6,5,u>
+  2846281076U,  // <5,7,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+  2846279610U,  // <5,7,6,7>: Cost 3 vuzpr RHS, <2,6,3,7>
+  2846279611U,  // <5,7,6,u>: Cost 3 vuzpr RHS, <2,6,3,u>
+  1510146150U,  // <5,7,7,0>: Cost 2 vext1 <5,5,7,7>, LHS
+  2846282574U,  // <5,7,7,1>: Cost 3 vuzpr RHS, <6,7,0,1>
+  2583889512U,  // <5,7,7,2>: Cost 3 vext1 <5,5,7,7>, <2,2,2,2>
+  2846281919U,  // <5,7,7,3>: Cost 3 vuzpr RHS, <5,7,u,3>
+  1510149430U,  // <5,7,7,4>: Cost 2 vext1 <5,5,7,7>, RHS
+  1510150168U,  // <5,7,7,5>: Cost 2 vext1 <5,5,7,7>, <5,5,7,7>
+  2583892474U,  // <5,7,7,6>: Cost 3 vext1 <5,5,7,7>, <6,2,7,3>
+  2625640044U,  // <5,7,7,7>: Cost 3 vext2 <1,3,5,7>, <7,7,7,7>
+  1510151982U,  // <5,7,7,u>: Cost 2 vext1 <5,5,7,7>, LHS
+  1510154342U,  // <5,7,u,0>: Cost 2 vext1 <5,5,7,u>, LHS
+  1551898414U,  // <5,7,u,1>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625640325U,  // <5,7,u,2>: Cost 3 vext2 <1,3,5,7>, <u,2,3,0>
+  1772536477U,  // <5,7,u,3>: Cost 2 vuzpr RHS, LHS
+  1510157622U,  // <5,7,u,4>: Cost 2 vext1 <5,5,7,u>, RHS
+  1551898778U,  // <5,7,u,5>: Cost 2 vext2 <1,3,5,7>, RHS
+  2625640656U,  // <5,7,u,6>: Cost 3 vext2 <1,3,5,7>, <u,6,3,7>
+  1772539433U,  // <5,7,u,7>: Cost 2 vuzpr RHS, RHS
+  1551898981U,  // <5,7,u,u>: Cost 2 vext2 <1,3,5,7>, LHS
+  2625642496U,  // <5,u,0,0>: Cost 3 vext2 <1,3,5,u>, <0,0,0,0>
+  1551900774U,  // <5,u,0,1>: Cost 2 vext2 <1,3,5,u>, LHS
+  2625642660U,  // <5,u,0,2>: Cost 3 vext2 <1,3,5,u>, <0,2,0,2>
+  2698630885U,  // <5,u,0,3>: Cost 3 vext3 <2,3,4,5>, <u,0,3,2>
+  2687129325U,  // <5,u,0,4>: Cost 3 vext3 <0,4,1,5>, <u,0,4,1>
+  2689783542U,  // <5,u,0,5>: Cost 3 vext3 <0,u,1,5>, <u,0,5,1>
+  2266134675U,  // <5,u,0,6>: Cost 3 vrev <u,5,6,0>
+  2595853772U,  // <5,u,0,7>: Cost 3 vext1 <7,5,u,0>, <7,5,u,0>
+  1551901341U,  // <5,u,0,u>: Cost 2 vext2 <1,3,5,u>, LHS
+  2625643254U,  // <5,u,1,0>: Cost 3 vext2 <1,3,5,u>, <1,0,3,2>
+  2625643316U,  // <5,u,1,1>: Cost 3 vext2 <1,3,5,u>, <1,1,1,1>
+  1613387566U,  // <5,u,1,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  1551901697U,  // <5,u,1,3>: Cost 2 vext2 <1,3,5,u>, <1,3,5,u>
+  2626307154U,  // <5,u,1,4>: Cost 3 vext2 <1,4,5,u>, <1,4,5,u>
+  2689783622U,  // <5,u,1,5>: Cost 3 vext3 <0,u,1,5>, <u,1,5,0>
+  2627634420U,  // <5,u,1,6>: Cost 3 vext2 <1,6,5,u>, <1,6,5,u>
+  2982366536U,  // <5,u,1,7>: Cost 3 vzipr <4,u,5,1>, RHS
+  1613387620U,  // <5,u,1,u>: Cost 2 vext3 <0,4,1,5>, LHS
+  2846286742U,  // <5,u,2,0>: Cost 3 vuzpr RHS, <1,2,3,0>
+  2685796528U,  // <5,u,2,1>: Cost 3 vext3 <0,2,1,5>, <0,2,1,5>
+  2625644136U,  // <5,u,2,2>: Cost 3 vext2 <1,3,5,u>, <2,2,2,2>
+  2687129480U,  // <5,u,2,3>: Cost 3 vext3 <0,4,1,5>, <u,2,3,3>
+  2632279851U,  // <5,u,2,4>: Cost 3 vext2 <2,4,5,u>, <2,4,5,u>
+  2625644394U,  // <5,u,2,5>: Cost 3 vext2 <1,3,5,u>, <2,5,3,u>
+  2625644474U,  // <5,u,2,6>: Cost 3 vext2 <1,3,5,u>, <2,6,3,7>
+  2713966508U,  // <5,u,2,7>: Cost 3 vext3 <4,u,5,5>, <u,2,7,3>
+  2625644603U,  // <5,u,2,u>: Cost 3 vext2 <1,3,5,u>, <2,u,0,1>
+  2687129532U,  // <5,u,3,0>: Cost 3 vext3 <0,4,1,5>, <u,3,0,1>
+  2636261649U,  // <5,u,3,1>: Cost 3 vext2 <3,1,5,u>, <3,1,5,u>
+  2636925282U,  // <5,u,3,2>: Cost 3 vext2 <3,2,5,u>, <3,2,5,u>
+  2625644956U,  // <5,u,3,3>: Cost 3 vext2 <1,3,5,u>, <3,3,3,3>
+  1564510724U,  // <5,u,3,4>: Cost 2 vext2 <3,4,5,u>, <3,4,5,u>
+  2625645160U,  // <5,u,3,5>: Cost 3 vext2 <1,3,5,u>, <3,5,u,0>
+  2734610422U,  // <5,u,3,6>: Cost 3 vext3 <u,3,6,5>, <u,3,6,5>
+  2640243447U,  // <5,u,3,7>: Cost 3 vext2 <3,7,5,u>, <3,7,5,u>
+  1567165256U,  // <5,u,3,u>: Cost 2 vext2 <3,u,5,u>, <3,u,5,u>
+  1567828889U,  // <5,u,4,0>: Cost 2 vext2 <4,0,5,u>, <4,0,5,u>
+  1661163546U,  // <5,u,4,1>: Cost 2 vext3 <u,4,1,5>, <u,4,1,5>
+  2734463012U,  // <5,u,4,2>: Cost 3 vext3 <u,3,4,5>, <u,4,2,6>
+  2698631212U,  // <5,u,4,3>: Cost 3 vext3 <2,3,4,5>, <u,4,3,5>
+  1570458842U,  // <5,u,4,4>: Cost 2 vext2 <4,4,5,5>, <4,4,5,5>
+  1551904054U,  // <5,u,4,5>: Cost 2 vext2 <1,3,5,u>, RHS
+  2846286172U,  // <5,u,4,6>: Cost 3 vuzpr RHS, <0,4,2,6>
+  2646216144U,  // <5,u,4,7>: Cost 3 vext2 <4,7,5,u>, <4,7,5,u>
+  1551904297U,  // <5,u,4,u>: Cost 2 vext2 <1,3,5,u>, RHS
+  1509982310U,  // <5,u,5,0>: Cost 2 vext1 <5,5,5,5>, LHS
+  2560058555U,  // <5,u,5,1>: Cost 3 vext1 <1,5,u,5>, <1,5,u,5>
+  2698926194U,  // <5,u,5,2>: Cost 3 vext3 <2,3,u,5>, <u,5,2,3>
+  2698631295U,  // <5,u,5,3>: Cost 3 vext3 <2,3,4,5>, <u,5,3,7>
+  1509985590U,  // <5,u,5,4>: Cost 2 vext1 <5,5,5,5>, RHS
+   229035318U,  // <5,u,5,5>: Cost 1 vdup1 RHS
+  1613387930U,  // <5,u,5,6>: Cost 2 vext3 <0,4,1,5>, RHS
+  1772547382U,  // <5,u,5,7>: Cost 2 vuzpr RHS, RHS
+   229035318U,  // <5,u,5,u>: Cost 1 vdup1 RHS
+  2566037606U,  // <5,u,6,0>: Cost 3 vext1 <2,5,u,6>, LHS
+  2920044334U,  // <5,u,6,1>: Cost 3 vzipl <5,6,7,0>, LHS
+  2566039445U,  // <5,u,6,2>: Cost 3 vext1 <2,5,u,6>, <2,5,u,6>
+  2687129808U,  // <5,u,6,3>: Cost 3 vext3 <0,4,1,5>, <u,6,3,7>
+  2566040886U,  // <5,u,6,4>: Cost 3 vext1 <2,5,u,6>, RHS
+  2920044698U,  // <5,u,6,5>: Cost 3 vzipl <5,6,7,0>, RHS
+  2846289268U,  // <5,u,6,6>: Cost 3 vuzpr RHS, <4,6,4,6>
+  2973781320U,  // <5,u,6,7>: Cost 3 vzipr <3,4,5,6>, RHS
+  2687129853U,  // <5,u,6,u>: Cost 3 vext3 <0,4,1,5>, <u,6,u,7>
+   430506086U,  // <5,u,7,0>: Cost 1 vext1 RHS, LHS
+  1486333117U,  // <5,u,7,1>: Cost 2 vext1 <1,5,u,7>, <1,5,u,7>
+  1504249448U,  // <5,u,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  2040971933U,  // <5,u,7,3>: Cost 2 vtrnr RHS, LHS
+   430509384U,  // <5,u,7,4>: Cost 1 vext1 RHS, RHS
+  1504251600U,  // <5,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+   118708378U,  // <5,u,7,6>: Cost 1 vrev RHS
+  2040974889U,  // <5,u,7,7>: Cost 2 vtrnr RHS, RHS
+   430511918U,  // <5,u,7,u>: Cost 1 vext1 RHS, LHS
+   430514278U,  // <5,u,u,0>: Cost 1 vext1 RHS, LHS
+  1551906606U,  // <5,u,u,1>: Cost 2 vext2 <1,3,5,u>, LHS
+  1613388133U,  // <5,u,u,2>: Cost 2 vext3 <0,4,1,5>, LHS
+  1772544669U,  // <5,u,u,3>: Cost 2 vuzpr RHS, LHS
+   430517577U,  // <5,u,u,4>: Cost 1 vext1 RHS, RHS
+   229035318U,  // <5,u,u,5>: Cost 1 vdup1 RHS
+   118716571U,  // <5,u,u,6>: Cost 1 vrev RHS
+  1772547625U,  // <5,u,u,7>: Cost 2 vuzpr RHS, RHS
+   430520110U,  // <5,u,u,u>: Cost 1 vext1 RHS, LHS
+  2686025728U,  // <6,0,0,0>: Cost 3 vext3 <0,2,4,6>, <0,0,0,0>
+  2686025738U,  // <6,0,0,1>: Cost 3 vext3 <0,2,4,6>, <0,0,1,1>
+  2686025748U,  // <6,0,0,2>: Cost 3 vext3 <0,2,4,6>, <0,0,2,2>
+  3779084320U,  // <6,0,0,3>: Cost 4 vext3 <3,4,5,6>, <0,0,3,5>
+  2642903388U,  // <6,0,0,4>: Cost 3 vext2 <4,2,6,0>, <0,4,2,6>
+  3657723939U,  // <6,0,0,5>: Cost 4 vext1 <5,6,0,0>, <5,6,0,0>
+  3926676514U,  // <6,0,0,6>: Cost 4 vuzpr <5,6,7,0>, <7,0,5,6>
+  3926675786U,  // <6,0,0,7>: Cost 4 vuzpr <5,6,7,0>, <6,0,5,7>
+  2686025802U,  // <6,0,0,u>: Cost 3 vext3 <0,2,4,6>, <0,0,u,2>
+  2566070374U,  // <6,0,1,0>: Cost 3 vext1 <2,6,0,1>, LHS
+  3759767642U,  // <6,0,1,1>: Cost 4 vext3 <0,2,4,6>, <0,1,1,0>
+  1612284006U,  // <6,0,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  2583988738U,  // <6,0,1,3>: Cost 3 vext1 <5,6,0,1>, <3,4,5,6>
+  2566073654U,  // <6,0,1,4>: Cost 3 vext1 <2,6,0,1>, RHS
+  2583990308U,  // <6,0,1,5>: Cost 3 vext1 <5,6,0,1>, <5,6,0,1>
+  2589963005U,  // <6,0,1,6>: Cost 3 vext1 <6,6,0,1>, <6,6,0,1>
+  2595935702U,  // <6,0,1,7>: Cost 3 vext1 <7,6,0,1>, <7,6,0,1>
+  1612284060U,  // <6,0,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+  2686025892U,  // <6,0,2,0>: Cost 3 vext3 <0,2,4,6>, <0,2,0,2>
+  2685804721U,  // <6,0,2,1>: Cost 3 vext3 <0,2,1,6>, <0,2,1,6>
+  3759620282U,  // <6,0,2,2>: Cost 4 vext3 <0,2,2,6>, <0,2,2,6>
+  2705342658U,  // <6,0,2,3>: Cost 3 vext3 <3,4,5,6>, <0,2,3,5>
+  1612284108U,  // <6,0,2,4>: Cost 2 vext3 <0,2,4,6>, <0,2,4,6>
+  3706029956U,  // <6,0,2,5>: Cost 4 vext2 <2,4,6,0>, <2,5,6,7>
+  2686173406U,  // <6,0,2,6>: Cost 3 vext3 <0,2,6,6>, <0,2,6,6>
+  3651769338U,  // <6,0,2,7>: Cost 4 vext1 <4,6,0,2>, <7,0,1,2>
+  1612579056U,  // <6,0,2,u>: Cost 2 vext3 <0,2,u,6>, <0,2,u,6>
+  3706030230U,  // <6,0,3,0>: Cost 4 vext2 <2,4,6,0>, <3,0,1,2>
+  2705342720U,  // <6,0,3,1>: Cost 3 vext3 <3,4,5,6>, <0,3,1,4>
+  2705342730U,  // <6,0,3,2>: Cost 3 vext3 <3,4,5,6>, <0,3,2,5>
+  3706030492U,  // <6,0,3,3>: Cost 4 vext2 <2,4,6,0>, <3,3,3,3>
+  2644896258U,  // <6,0,3,4>: Cost 3 vext2 <4,5,6,0>, <3,4,5,6>
+  3718638154U,  // <6,0,3,5>: Cost 4 vext2 <4,5,6,0>, <3,5,4,6>
+  3729918619U,  // <6,0,3,6>: Cost 4 vext2 <6,4,6,0>, <3,6,4,6>
+  3926672384U,  // <6,0,3,7>: Cost 4 vuzpr <5,6,7,0>, <1,3,5,7>
+  2705342784U,  // <6,0,3,u>: Cost 3 vext3 <3,4,5,6>, <0,3,u,5>
+  2687058250U,  // <6,0,4,0>: Cost 3 vext3 <0,4,0,6>, <0,4,0,6>
+  2686026066U,  // <6,0,4,1>: Cost 3 vext3 <0,2,4,6>, <0,4,1,5>
+  1613463900U,  // <6,0,4,2>: Cost 2 vext3 <0,4,2,6>, <0,4,2,6>
+  3761021285U,  // <6,0,4,3>: Cost 4 vext3 <0,4,3,6>, <0,4,3,6>
+  2687353198U,  // <6,0,4,4>: Cost 3 vext3 <0,4,4,6>, <0,4,4,6>
+  2632289590U,  // <6,0,4,5>: Cost 3 vext2 <2,4,6,0>, RHS
+  2645560704U,  // <6,0,4,6>: Cost 3 vext2 <4,6,6,0>, <4,6,6,0>
+  2646224337U,  // <6,0,4,7>: Cost 3 vext2 <4,7,6,0>, <4,7,6,0>
+  1613906322U,  // <6,0,4,u>: Cost 2 vext3 <0,4,u,6>, <0,4,u,6>
+  3651788902U,  // <6,0,5,0>: Cost 4 vext1 <4,6,0,5>, LHS
+  2687795620U,  // <6,0,5,1>: Cost 3 vext3 <0,5,1,6>, <0,5,1,6>
+  3761611181U,  // <6,0,5,2>: Cost 4 vext3 <0,5,2,6>, <0,5,2,6>
+  3723284326U,  // <6,0,5,3>: Cost 4 vext2 <5,3,6,0>, <5,3,6,0>
+  2646224838U,  // <6,0,5,4>: Cost 3 vext2 <4,7,6,0>, <5,4,7,6>
+  3718639630U,  // <6,0,5,5>: Cost 4 vext2 <4,5,6,0>, <5,5,6,6>
+  2652196962U,  // <6,0,5,6>: Cost 3 vext2 <5,7,6,0>, <5,6,7,0>
+  2852932918U,  // <6,0,5,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+  2852932919U,  // <6,0,5,u>: Cost 3 vuzpr <5,6,7,0>, RHS
+  2852933730U,  // <6,0,6,0>: Cost 3 vuzpr <5,6,7,0>, <5,6,7,0>
+  2925985894U,  // <6,0,6,1>: Cost 3 vzipl <6,6,6,6>, LHS
+  3060203622U,  // <6,0,6,2>: Cost 3 vtrnl <6,6,6,6>, LHS
+  3718640178U,  // <6,0,6,3>: Cost 4 vext2 <4,5,6,0>, <6,3,4,5>
+  2656178832U,  // <6,0,6,4>: Cost 3 vext2 <6,4,6,0>, <6,4,6,0>
+  3725939378U,  // <6,0,6,5>: Cost 4 vext2 <5,7,6,0>, <6,5,0,7>
+  2657506098U,  // <6,0,6,6>: Cost 3 vext2 <6,6,6,0>, <6,6,6,0>
+  2619020110U,  // <6,0,6,7>: Cost 3 vext2 <0,2,6,0>, <6,7,0,1>
+  2925986461U,  // <6,0,6,u>: Cost 3 vzipl <6,6,6,6>, LHS
+  2572091494U,  // <6,0,7,0>: Cost 3 vext1 <3,6,0,7>, LHS
+  2572092310U,  // <6,0,7,1>: Cost 3 vext1 <3,6,0,7>, <1,2,3,0>
+  2980495524U,  // <6,0,7,2>: Cost 3 vzipr RHS, <0,2,0,2>
+  2572094072U,  // <6,0,7,3>: Cost 3 vext1 <3,6,0,7>, <3,6,0,7>
+  2572094774U,  // <6,0,7,4>: Cost 3 vext1 <3,6,0,7>, RHS
+  4054238242U,  // <6,0,7,5>: Cost 4 vzipr RHS, <1,4,0,5>
+  3645837653U,  // <6,0,7,6>: Cost 4 vext1 <3,6,0,7>, <6,0,7,0>
+  4054239054U,  // <6,0,7,7>: Cost 4 vzipr RHS, <2,5,0,7>
+  2572097326U,  // <6,0,7,u>: Cost 3 vext1 <3,6,0,7>, LHS
+  2686026378U,  // <6,0,u,0>: Cost 3 vext3 <0,2,4,6>, <0,u,0,2>
+  2686026386U,  // <6,0,u,1>: Cost 3 vext3 <0,2,4,6>, <0,u,1,1>
+  1612284573U,  // <6,0,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  2705343144U,  // <6,0,u,3>: Cost 3 vext3 <3,4,5,6>, <0,u,3,5>
+  1616265906U,  // <6,0,u,4>: Cost 2 vext3 <0,u,4,6>, <0,u,4,6>
+  2632292506U,  // <6,0,u,5>: Cost 3 vext2 <2,4,6,0>, RHS
+  2590020356U,  // <6,0,u,6>: Cost 3 vext1 <6,6,0,u>, <6,6,0,u>
+  2852933161U,  // <6,0,u,7>: Cost 3 vuzpr <5,6,7,0>, RHS
+  1612284627U,  // <6,0,u,u>: Cost 2 vext3 <0,2,4,6>, LHS
+  2595995750U,  // <6,1,0,0>: Cost 3 vext1 <7,6,1,0>, LHS
+  2646229094U,  // <6,1,0,1>: Cost 3 vext2 <4,7,6,1>, LHS
+  3694092492U,  // <6,1,0,2>: Cost 4 vext2 <0,4,6,1>, <0,2,4,6>
+  2686026486U,  // <6,1,0,3>: Cost 3 vext3 <0,2,4,6>, <1,0,3,2>
+  2595999030U,  // <6,1,0,4>: Cost 3 vext1 <7,6,1,0>, RHS
+  3767730952U,  // <6,1,0,5>: Cost 4 vext3 <1,5,4,6>, <1,0,5,2>
+  2596000590U,  // <6,1,0,6>: Cost 3 vext1 <7,6,1,0>, <6,7,0,1>
+  2596001246U,  // <6,1,0,7>: Cost 3 vext1 <7,6,1,0>, <7,6,1,0>
+  2686026531U,  // <6,1,0,u>: Cost 3 vext3 <0,2,4,6>, <1,0,u,2>
+  3763602219U,  // <6,1,1,0>: Cost 4 vext3 <0,u,2,6>, <1,1,0,1>
+  2686026548U,  // <6,1,1,1>: Cost 3 vext3 <0,2,4,6>, <1,1,1,1>
+  3764929346U,  // <6,1,1,2>: Cost 4 vext3 <1,1,2,6>, <1,1,2,6>
+  2686026568U,  // <6,1,1,3>: Cost 3 vext3 <0,2,4,6>, <1,1,3,3>
+  2691334996U,  // <6,1,1,4>: Cost 3 vext3 <1,1,4,6>, <1,1,4,6>
+  3760874332U,  // <6,1,1,5>: Cost 4 vext3 <0,4,1,6>, <1,1,5,5>
+  3765224294U,  // <6,1,1,6>: Cost 4 vext3 <1,1,6,6>, <1,1,6,6>
+  3669751263U,  // <6,1,1,7>: Cost 4 vext1 <7,6,1,1>, <7,6,1,1>
+  2686026613U,  // <6,1,1,u>: Cost 3 vext3 <0,2,4,6>, <1,1,u,3>
+  2554208358U,  // <6,1,2,0>: Cost 3 vext1 <0,6,1,2>, LHS
+  3763602311U,  // <6,1,2,1>: Cost 4 vext3 <0,u,2,6>, <1,2,1,3>
+  3639895971U,  // <6,1,2,2>: Cost 4 vext1 <2,6,1,2>, <2,6,1,2>
+  2686026646U,  // <6,1,2,3>: Cost 3 vext3 <0,2,4,6>, <1,2,3,0>
+  2554211638U,  // <6,1,2,4>: Cost 3 vext1 <0,6,1,2>, RHS
+  3760874411U,  // <6,1,2,5>: Cost 4 vext3 <0,4,1,6>, <1,2,5,3>
+  2554212858U,  // <6,1,2,6>: Cost 3 vext1 <0,6,1,2>, <6,2,7,3>
+  3802973114U,  // <6,1,2,7>: Cost 4 vext3 <7,4,5,6>, <1,2,7,0>
+  2686026691U,  // <6,1,2,u>: Cost 3 vext3 <0,2,4,6>, <1,2,u,0>
+  2566160486U,  // <6,1,3,0>: Cost 3 vext1 <2,6,1,3>, LHS
+  2686026712U,  // <6,1,3,1>: Cost 3 vext3 <0,2,4,6>, <1,3,1,3>
+  2686026724U,  // <6,1,3,2>: Cost 3 vext3 <0,2,4,6>, <1,3,2,6>
+  3759768552U,  // <6,1,3,3>: Cost 4 vext3 <0,2,4,6>, <1,3,3,1>
+  2692662262U,  // <6,1,3,4>: Cost 3 vext3 <1,3,4,6>, <1,3,4,6>
+  2686026752U,  // <6,1,3,5>: Cost 3 vext3 <0,2,4,6>, <1,3,5,7>
+  2590053128U,  // <6,1,3,6>: Cost 3 vext1 <6,6,1,3>, <6,6,1,3>
+  3663795194U,  // <6,1,3,7>: Cost 4 vext1 <6,6,1,3>, <7,0,1,2>
+  2686026775U,  // <6,1,3,u>: Cost 3 vext3 <0,2,4,6>, <1,3,u,3>
+  2641587099U,  // <6,1,4,0>: Cost 3 vext2 <4,0,6,1>, <4,0,6,1>
+  2693104684U,  // <6,1,4,1>: Cost 3 vext3 <1,4,1,6>, <1,4,1,6>
+  3639912357U,  // <6,1,4,2>: Cost 4 vext1 <2,6,1,4>, <2,6,1,4>
+  2687206462U,  // <6,1,4,3>: Cost 3 vext3 <0,4,2,6>, <1,4,3,6>
+  3633941814U,  // <6,1,4,4>: Cost 4 vext1 <1,6,1,4>, RHS
+  2693399632U,  // <6,1,4,5>: Cost 3 vext3 <1,4,5,6>, <1,4,5,6>
+  3765077075U,  // <6,1,4,6>: Cost 4 vext3 <1,1,4,6>, <1,4,6,0>
+  2646232530U,  // <6,1,4,7>: Cost 3 vext2 <4,7,6,1>, <4,7,6,1>
+  2687206507U,  // <6,1,4,u>: Cost 3 vext3 <0,4,2,6>, <1,4,u,6>
+  2647559796U,  // <6,1,5,0>: Cost 3 vext2 <5,0,6,1>, <5,0,6,1>
+  3765077118U,  // <6,1,5,1>: Cost 4 vext3 <1,1,4,6>, <1,5,1,7>
+  3767583878U,  // <6,1,5,2>: Cost 4 vext3 <1,5,2,6>, <1,5,2,6>
+  2686026896U,  // <6,1,5,3>: Cost 3 vext3 <0,2,4,6>, <1,5,3,7>
+  2693989528U,  // <6,1,5,4>: Cost 3 vext3 <1,5,4,6>, <1,5,4,6>
+  3767805089U,  // <6,1,5,5>: Cost 4 vext3 <1,5,5,6>, <1,5,5,6>
+  2652868706U,  // <6,1,5,6>: Cost 3 vext2 <5,u,6,1>, <5,6,7,0>
+  3908250934U,  // <6,1,5,7>: Cost 4 vuzpr <2,6,0,1>, RHS
+  2686026941U,  // <6,1,5,u>: Cost 3 vext3 <0,2,4,6>, <1,5,u,7>
+  2554241126U,  // <6,1,6,0>: Cost 3 vext1 <0,6,1,6>, LHS
+  3763602639U,  // <6,1,6,1>: Cost 4 vext3 <0,u,2,6>, <1,6,1,7>
+  3759547607U,  // <6,1,6,2>: Cost 4 vext3 <0,2,1,6>, <1,6,2,6>
+  3115221094U,  // <6,1,6,3>: Cost 3 vtrnr <4,6,4,6>, LHS
+  2554244406U,  // <6,1,6,4>: Cost 3 vext1 <0,6,1,6>, RHS
+  3760874739U,  // <6,1,6,5>: Cost 4 vext3 <0,4,1,6>, <1,6,5,7>
+  2554245944U,  // <6,1,6,6>: Cost 3 vext1 <0,6,1,6>, <6,6,6,6>
+  3719975758U,  // <6,1,6,7>: Cost 4 vext2 <4,7,6,1>, <6,7,0,1>
+  3115221099U,  // <6,1,6,u>: Cost 3 vtrnr <4,6,4,6>, LHS
+  2560221286U,  // <6,1,7,0>: Cost 3 vext1 <1,6,1,7>, LHS
+  2560222415U,  // <6,1,7,1>: Cost 3 vext1 <1,6,1,7>, <1,6,1,7>
+  2980497558U,  // <6,1,7,2>: Cost 3 vzipr RHS, <3,0,1,2>
+  3103211622U,  // <6,1,7,3>: Cost 3 vtrnr <2,6,3,7>, LHS
+  2560224566U,  // <6,1,7,4>: Cost 3 vext1 <1,6,1,7>, RHS
+  2980495698U,  // <6,1,7,5>: Cost 3 vzipr RHS, <0,4,1,5>
+  3633967526U,  // <6,1,7,6>: Cost 4 vext1 <1,6,1,7>, <6,1,7,0>
+  4054237686U,  // <6,1,7,7>: Cost 4 vzipr RHS, <0,6,1,7>
+  2560227118U,  // <6,1,7,u>: Cost 3 vext1 <1,6,1,7>, LHS
+  2560229478U,  // <6,1,u,0>: Cost 3 vext1 <1,6,1,u>, LHS
+  2686027117U,  // <6,1,u,1>: Cost 3 vext3 <0,2,4,6>, <1,u,1,3>
+  2686027129U,  // <6,1,u,2>: Cost 3 vext3 <0,2,4,6>, <1,u,2,6>
+  2686027132U,  // <6,1,u,3>: Cost 3 vext3 <0,2,4,6>, <1,u,3,0>
+  2687206795U,  // <6,1,u,4>: Cost 3 vext3 <0,4,2,6>, <1,u,4,6>
+  2686027157U,  // <6,1,u,5>: Cost 3 vext3 <0,2,4,6>, <1,u,5,7>
+  2590094093U,  // <6,1,u,6>: Cost 3 vext1 <6,6,1,u>, <6,6,1,u>
+  2596066790U,  // <6,1,u,7>: Cost 3 vext1 <7,6,1,u>, <7,6,1,u>
+  2686027177U,  // <6,1,u,u>: Cost 3 vext3 <0,2,4,6>, <1,u,u,0>
+  2646900736U,  // <6,2,0,0>: Cost 3 vext2 <4,u,6,2>, <0,0,0,0>
+  1573159014U,  // <6,2,0,1>: Cost 2 vext2 <4,u,6,2>, LHS
+  2646900900U,  // <6,2,0,2>: Cost 3 vext2 <4,u,6,2>, <0,2,0,2>
+  3759769037U,  // <6,2,0,3>: Cost 4 vext3 <0,2,4,6>, <2,0,3,0>
+  2641592668U,  // <6,2,0,4>: Cost 3 vext2 <4,0,6,2>, <0,4,2,6>
+  3779085794U,  // <6,2,0,5>: Cost 4 vext3 <3,4,5,6>, <2,0,5,3>
+  2686027244U,  // <6,2,0,6>: Cost 3 vext3 <0,2,4,6>, <2,0,6,4>
+  3669816807U,  // <6,2,0,7>: Cost 4 vext1 <7,6,2,0>, <7,6,2,0>
+  1573159581U,  // <6,2,0,u>: Cost 2 vext2 <4,u,6,2>, LHS
+  2230527897U,  // <6,2,1,0>: Cost 3 vrev <2,6,0,1>
+  2646901556U,  // <6,2,1,1>: Cost 3 vext2 <4,u,6,2>, <1,1,1,1>
+  2646901654U,  // <6,2,1,2>: Cost 3 vext2 <4,u,6,2>, <1,2,3,0>
+  2847047782U,  // <6,2,1,3>: Cost 3 vuzpr <4,6,u,2>, LHS
+  3771049517U,  // <6,2,1,4>: Cost 4 vext3 <2,1,4,6>, <2,1,4,6>
+  2646901904U,  // <6,2,1,5>: Cost 3 vext2 <4,u,6,2>, <1,5,3,7>
+  2686027324U,  // <6,2,1,6>: Cost 3 vext3 <0,2,4,6>, <2,1,6,3>
+  3669825000U,  // <6,2,1,7>: Cost 4 vext1 <7,6,2,1>, <7,6,2,1>
+  2231117793U,  // <6,2,1,u>: Cost 3 vrev <2,6,u,1>
+  3763603029U,  // <6,2,2,0>: Cost 4 vext3 <0,u,2,6>, <2,2,0,1>
+  3759769184U,  // <6,2,2,1>: Cost 4 vext3 <0,2,4,6>, <2,2,1,3>
+  2686027368U,  // <6,2,2,2>: Cost 3 vext3 <0,2,4,6>, <2,2,2,2>
+  2686027378U,  // <6,2,2,3>: Cost 3 vext3 <0,2,4,6>, <2,2,3,3>
+  2697971326U,  // <6,2,2,4>: Cost 3 vext3 <2,2,4,6>, <2,2,4,6>
+  3759769224U,  // <6,2,2,5>: Cost 4 vext3 <0,2,4,6>, <2,2,5,7>
+  2698118800U,  // <6,2,2,6>: Cost 3 vext3 <2,2,6,6>, <2,2,6,6>
+  3920794092U,  // <6,2,2,7>: Cost 4 vuzpr <4,6,u,2>, <6,2,5,7>
+  2686027423U,  // <6,2,2,u>: Cost 3 vext3 <0,2,4,6>, <2,2,u,3>
+  2686027430U,  // <6,2,3,0>: Cost 3 vext3 <0,2,4,6>, <2,3,0,1>
+  3759769262U,  // <6,2,3,1>: Cost 4 vext3 <0,2,4,6>, <2,3,1,0>
+  2698487485U,  // <6,2,3,2>: Cost 3 vext3 <2,3,2,6>, <2,3,2,6>
+  2705344196U,  // <6,2,3,3>: Cost 3 vext3 <3,4,5,6>, <2,3,3,4>
+  2686027470U,  // <6,2,3,4>: Cost 3 vext3 <0,2,4,6>, <2,3,4,5>
+  2698708696U,  // <6,2,3,5>: Cost 3 vext3 <2,3,5,6>, <2,3,5,6>
+  2724660961U,  // <6,2,3,6>: Cost 3 vext3 <6,6,6,6>, <2,3,6,6>
+  2729232104U,  // <6,2,3,7>: Cost 3 vext3 <7,4,5,6>, <2,3,7,4>
+  2686027502U,  // <6,2,3,u>: Cost 3 vext3 <0,2,4,6>, <2,3,u,1>
+  1567853468U,  // <6,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+  3759769351U,  // <6,2,4,1>: Cost 4 vext3 <0,2,4,6>, <2,4,1,u>
+  2699151118U,  // <6,2,4,2>: Cost 3 vext3 <2,4,2,6>, <2,4,2,6>
+  2686027543U,  // <6,2,4,3>: Cost 3 vext3 <0,2,4,6>, <2,4,3,6>
+  2699298592U,  // <6,2,4,4>: Cost 3 vext3 <2,4,4,6>, <2,4,4,6>
+  1573162294U,  // <6,2,4,5>: Cost 2 vext2 <4,u,6,2>, RHS
+  2686027564U,  // <6,2,4,6>: Cost 3 vext3 <0,2,4,6>, <2,4,6,0>
+  3719982547U,  // <6,2,4,7>: Cost 4 vext2 <4,7,6,2>, <4,7,6,2>
+  1573162532U,  // <6,2,4,u>: Cost 2 vext2 <4,u,6,2>, <4,u,6,2>
+  3779086154U,  // <6,2,5,0>: Cost 4 vext3 <3,4,5,6>, <2,5,0,3>
+  2646904528U,  // <6,2,5,1>: Cost 3 vext2 <4,u,6,2>, <5,1,7,3>
+  3759769440U,  // <6,2,5,2>: Cost 4 vext3 <0,2,4,6>, <2,5,2,7>
+  2699888488U,  // <6,2,5,3>: Cost 3 vext3 <2,5,3,6>, <2,5,3,6>
+  2230855617U,  // <6,2,5,4>: Cost 3 vrev <2,6,4,5>
+  2646904836U,  // <6,2,5,5>: Cost 3 vext2 <4,u,6,2>, <5,5,5,5>
+  2646904930U,  // <6,2,5,6>: Cost 3 vext2 <4,u,6,2>, <5,6,7,0>
+  2847051062U,  // <6,2,5,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+  2700257173U,  // <6,2,5,u>: Cost 3 vext3 <2,5,u,6>, <2,5,u,6>
+  2687207321U,  // <6,2,6,0>: Cost 3 vext3 <0,4,2,6>, <2,6,0,1>
+  2686027684U,  // <6,2,6,1>: Cost 3 vext3 <0,2,4,6>, <2,6,1,3>
+  2566260656U,  // <6,2,6,2>: Cost 3 vext1 <2,6,2,6>, <2,6,2,6>
+  2685806522U,  // <6,2,6,3>: Cost 3 vext3 <0,2,1,6>, <2,6,3,7>
+  2687207361U,  // <6,2,6,4>: Cost 3 vext3 <0,4,2,6>, <2,6,4,5>
+  2686027724U,  // <6,2,6,5>: Cost 3 vext3 <0,2,4,6>, <2,6,5,7>
+  2646905656U,  // <6,2,6,6>: Cost 3 vext2 <4,u,6,2>, <6,6,6,6>
+  2646905678U,  // <6,2,6,7>: Cost 3 vext2 <4,u,6,2>, <6,7,0,1>
+  2686027751U,  // <6,2,6,u>: Cost 3 vext3 <0,2,4,6>, <2,6,u,7>
+  2554323046U,  // <6,2,7,0>: Cost 3 vext1 <0,6,2,7>, LHS
+  2572239606U,  // <6,2,7,1>: Cost 3 vext1 <3,6,2,7>, <1,0,3,2>
+  2566268849U,  // <6,2,7,2>: Cost 3 vext1 <2,6,2,7>, <2,6,2,7>
+  1906753638U,  // <6,2,7,3>: Cost 2 vzipr RHS, LHS
+  2554326326U,  // <6,2,7,4>: Cost 3 vext1 <0,6,2,7>, RHS
+  3304687564U,  // <6,2,7,5>: Cost 4 vrev <2,6,5,7>
+  2980495708U,  // <6,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+  2646906476U,  // <6,2,7,7>: Cost 3 vext2 <4,u,6,2>, <7,7,7,7>
+  1906753643U,  // <6,2,7,u>: Cost 2 vzipr RHS, LHS
+  1591744256U,  // <6,2,u,0>: Cost 2 vext2 <u,0,6,2>, <u,0,6,2>
+  1573164846U,  // <6,2,u,1>: Cost 2 vext2 <4,u,6,2>, LHS
+  2701805650U,  // <6,2,u,2>: Cost 3 vext3 <2,u,2,6>, <2,u,2,6>
+  1906761830U,  // <6,2,u,3>: Cost 2 vzipr RHS, LHS
+  2686027875U,  // <6,2,u,4>: Cost 3 vext3 <0,2,4,6>, <2,u,4,5>
+  1573165210U,  // <6,2,u,5>: Cost 2 vext2 <4,u,6,2>, RHS
+  2686322800U,  // <6,2,u,6>: Cost 3 vext3 <0,2,u,6>, <2,u,6,0>
+  2847051305U,  // <6,2,u,7>: Cost 3 vuzpr <4,6,u,2>, RHS
+  1906761835U,  // <6,2,u,u>: Cost 2 vzipr RHS, LHS
+  3759769739U,  // <6,3,0,0>: Cost 4 vext3 <0,2,4,6>, <3,0,0,0>
+  2686027926U,  // <6,3,0,1>: Cost 3 vext3 <0,2,4,6>, <3,0,1,2>
+  2686027937U,  // <6,3,0,2>: Cost 3 vext3 <0,2,4,6>, <3,0,2,4>
+  3640027286U,  // <6,3,0,3>: Cost 4 vext1 <2,6,3,0>, <3,0,1,2>
+  2687207601U,  // <6,3,0,4>: Cost 3 vext3 <0,4,2,6>, <3,0,4,2>
+  2705344698U,  // <6,3,0,5>: Cost 3 vext3 <3,4,5,6>, <3,0,5,2>
+  3663917847U,  // <6,3,0,6>: Cost 4 vext1 <6,6,3,0>, <6,6,3,0>
+  2237008560U,  // <6,3,0,7>: Cost 3 vrev <3,6,7,0>
+  2686027989U,  // <6,3,0,u>: Cost 3 vext3 <0,2,4,6>, <3,0,u,2>
+  3759769823U,  // <6,3,1,0>: Cost 4 vext3 <0,2,4,6>, <3,1,0,3>
+  3759769830U,  // <6,3,1,1>: Cost 4 vext3 <0,2,4,6>, <3,1,1,1>
+  3759769841U,  // <6,3,1,2>: Cost 4 vext3 <0,2,4,6>, <3,1,2,3>
+  3759769848U,  // <6,3,1,3>: Cost 4 vext3 <0,2,4,6>, <3,1,3,1>
+  2703280390U,  // <6,3,1,4>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+  3759769868U,  // <6,3,1,5>: Cost 4 vext3 <0,2,4,6>, <3,1,5,3>
+  3704063194U,  // <6,3,1,6>: Cost 4 vext2 <2,1,6,3>, <1,6,3,0>
+  3767732510U,  // <6,3,1,7>: Cost 4 vext3 <1,5,4,6>, <3,1,7,3>
+  2703280390U,  // <6,3,1,u>: Cost 3 vext3 <3,1,4,6>, <3,1,4,6>
+  3704063468U,  // <6,3,2,0>: Cost 4 vext2 <2,1,6,3>, <2,0,6,4>
+  2630321724U,  // <6,3,2,1>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+  3759769921U,  // <6,3,2,2>: Cost 4 vext3 <0,2,4,6>, <3,2,2,2>
+  3759769928U,  // <6,3,2,3>: Cost 4 vext3 <0,2,4,6>, <3,2,3,0>
+  3704063767U,  // <6,3,2,4>: Cost 4 vext2 <2,1,6,3>, <2,4,3,6>
+  3704063876U,  // <6,3,2,5>: Cost 4 vext2 <2,1,6,3>, <2,5,6,7>
+  2636957626U,  // <6,3,2,6>: Cost 3 vext2 <3,2,6,3>, <2,6,3,7>
+  3777907058U,  // <6,3,2,7>: Cost 4 vext3 <3,2,7,6>, <3,2,7,6>
+  2630321724U,  // <6,3,2,u>: Cost 3 vext2 <2,1,6,3>, <2,1,6,3>
+  3759769983U,  // <6,3,3,0>: Cost 4 vext3 <0,2,4,6>, <3,3,0,1>
+  3710036245U,  // <6,3,3,1>: Cost 4 vext2 <3,1,6,3>, <3,1,6,3>
+  2636958054U,  // <6,3,3,2>: Cost 3 vext2 <3,2,6,3>, <3,2,6,3>
+  2686028188U,  // <6,3,3,3>: Cost 3 vext3 <0,2,4,6>, <3,3,3,3>
+  2704607656U,  // <6,3,3,4>: Cost 3 vext3 <3,3,4,6>, <3,3,4,6>
+  3773041072U,  // <6,3,3,5>: Cost 4 vext3 <2,4,4,6>, <3,3,5,5>
+  3711363731U,  // <6,3,3,6>: Cost 4 vext2 <3,3,6,3>, <3,6,3,7>
+  3767732676U,  // <6,3,3,7>: Cost 4 vext3 <1,5,4,6>, <3,3,7,7>
+  2707999179U,  // <6,3,3,u>: Cost 3 vext3 <3,u,5,6>, <3,3,u,5>
+  2584232038U,  // <6,3,4,0>: Cost 3 vext1 <5,6,3,4>, LHS
+  2642267118U,  // <6,3,4,1>: Cost 3 vext2 <4,1,6,3>, <4,1,6,3>
+  2642930751U,  // <6,3,4,2>: Cost 3 vext2 <4,2,6,3>, <4,2,6,3>
+  2705197552U,  // <6,3,4,3>: Cost 3 vext3 <3,4,3,6>, <3,4,3,6>
+  2584235318U,  // <6,3,4,4>: Cost 3 vext1 <5,6,3,4>, RHS
+  1631603202U,  // <6,3,4,5>: Cost 2 vext3 <3,4,5,6>, <3,4,5,6>
+  2654211444U,  // <6,3,4,6>: Cost 3 vext2 <6,1,6,3>, <4,6,4,6>
+  2237041332U,  // <6,3,4,7>: Cost 3 vrev <3,6,7,4>
+  1631824413U,  // <6,3,4,u>: Cost 2 vext3 <3,4,u,6>, <3,4,u,6>
+  3640066150U,  // <6,3,5,0>: Cost 4 vext1 <2,6,3,5>, LHS
+  3772746288U,  // <6,3,5,1>: Cost 4 vext3 <2,4,0,6>, <3,5,1,7>
+  3640067790U,  // <6,3,5,2>: Cost 4 vext1 <2,6,3,5>, <2,3,4,5>
+  3773041216U,  // <6,3,5,3>: Cost 4 vext3 <2,4,4,6>, <3,5,3,5>
+  2705934922U,  // <6,3,5,4>: Cost 3 vext3 <3,5,4,6>, <3,5,4,6>
+  3773041236U,  // <6,3,5,5>: Cost 4 vext3 <2,4,4,6>, <3,5,5,7>
+  3779086940U,  // <6,3,5,6>: Cost 4 vext3 <3,4,5,6>, <3,5,6,6>
+  3767732831U,  // <6,3,5,7>: Cost 4 vext3 <1,5,4,6>, <3,5,7,0>
+  2706229870U,  // <6,3,5,u>: Cost 3 vext3 <3,5,u,6>, <3,5,u,6>
+  2602164326U,  // <6,3,6,0>: Cost 3 vext1 <u,6,3,6>, LHS
+  2654212512U,  // <6,3,6,1>: Cost 3 vext2 <6,1,6,3>, <6,1,6,3>
+  2566334393U,  // <6,3,6,2>: Cost 3 vext1 <2,6,3,6>, <2,6,3,6>
+  3704066588U,  // <6,3,6,3>: Cost 4 vext2 <2,1,6,3>, <6,3,2,1>
+  2602167524U,  // <6,3,6,4>: Cost 3 vext1 <u,6,3,6>, <4,4,6,6>
+  3710702321U,  // <6,3,6,5>: Cost 4 vext2 <3,2,6,3>, <6,5,7,7>
+  2724661933U,  // <6,3,6,6>: Cost 3 vext3 <6,6,6,6>, <3,6,6,6>
+  3710702465U,  // <6,3,6,7>: Cost 4 vext2 <3,2,6,3>, <6,7,5,7>
+  2602170158U,  // <6,3,6,u>: Cost 3 vext1 <u,6,3,6>, LHS
+  1492598886U,  // <6,3,7,0>: Cost 2 vext1 <2,6,3,7>, LHS
+  2560369889U,  // <6,3,7,1>: Cost 3 vext1 <1,6,3,7>, <1,6,3,7>
+  1492600762U,  // <6,3,7,2>: Cost 2 vext1 <2,6,3,7>, <2,6,3,7>
+  2566342806U,  // <6,3,7,3>: Cost 3 vext1 <2,6,3,7>, <3,0,1,2>
+  1492602166U,  // <6,3,7,4>: Cost 2 vext1 <2,6,3,7>, RHS
+  2602176208U,  // <6,3,7,5>: Cost 3 vext1 <u,6,3,7>, <5,1,7,3>
+  2566345210U,  // <6,3,7,6>: Cost 3 vext1 <2,6,3,7>, <6,2,7,3>
+  2980496528U,  // <6,3,7,7>: Cost 3 vzipr RHS, <1,5,3,7>
+  1492604718U,  // <6,3,7,u>: Cost 2 vext1 <2,6,3,7>, LHS
+  1492607078U,  // <6,3,u,0>: Cost 2 vext1 <2,6,3,u>, LHS
+  2686028574U,  // <6,3,u,1>: Cost 3 vext3 <0,2,4,6>, <3,u,1,2>
+  1492608955U,  // <6,3,u,2>: Cost 2 vext1 <2,6,3,u>, <2,6,3,u>
+  2566350998U,  // <6,3,u,3>: Cost 3 vext1 <2,6,3,u>, <3,0,1,2>
+  1492610358U,  // <6,3,u,4>: Cost 2 vext1 <2,6,3,u>, RHS
+  1634257734U,  // <6,3,u,5>: Cost 2 vext3 <3,u,5,6>, <3,u,5,6>
+  2566353489U,  // <6,3,u,6>: Cost 3 vext1 <2,6,3,u>, <6,3,u,0>
+  2980504720U,  // <6,3,u,7>: Cost 3 vzipr RHS, <1,5,3,7>
+  1492612910U,  // <6,3,u,u>: Cost 2 vext1 <2,6,3,u>, LHS
+  3703406592U,  // <6,4,0,0>: Cost 4 vext2 <2,0,6,4>, <0,0,0,0>
+  2629664870U,  // <6,4,0,1>: Cost 3 vext2 <2,0,6,4>, LHS
+  2629664972U,  // <6,4,0,2>: Cost 3 vext2 <2,0,6,4>, <0,2,4,6>
+  3779087232U,  // <6,4,0,3>: Cost 4 vext3 <3,4,5,6>, <4,0,3,1>
+  2642936156U,  // <6,4,0,4>: Cost 3 vext2 <4,2,6,4>, <0,4,2,6>
+  2712570770U,  // <6,4,0,5>: Cost 3 vext3 <4,6,4,6>, <4,0,5,1>
+  2687208348U,  // <6,4,0,6>: Cost 3 vext3 <0,4,2,6>, <4,0,6,2>
+  3316723081U,  // <6,4,0,7>: Cost 4 vrev <4,6,7,0>
+  2629665437U,  // <6,4,0,u>: Cost 3 vext2 <2,0,6,4>, LHS
+  2242473291U,  // <6,4,1,0>: Cost 3 vrev <4,6,0,1>
+  3700089652U,  // <6,4,1,1>: Cost 4 vext2 <1,4,6,4>, <1,1,1,1>
+  3703407510U,  // <6,4,1,2>: Cost 4 vext2 <2,0,6,4>, <1,2,3,0>
+  2852962406U,  // <6,4,1,3>: Cost 3 vuzpr <5,6,7,4>, LHS
+  3628166454U,  // <6,4,1,4>: Cost 4 vext1 <0,6,4,1>, RHS
+  3760876514U,  // <6,4,1,5>: Cost 4 vext3 <0,4,1,6>, <4,1,5,0>
+  2687208430U,  // <6,4,1,6>: Cost 3 vext3 <0,4,2,6>, <4,1,6,3>
+  3316731274U,  // <6,4,1,7>: Cost 4 vrev <4,6,7,1>
+  2243063187U,  // <6,4,1,u>: Cost 3 vrev <4,6,u,1>
+  2629666284U,  // <6,4,2,0>: Cost 3 vext2 <2,0,6,4>, <2,0,6,4>
+  3703408188U,  // <6,4,2,1>: Cost 4 vext2 <2,0,6,4>, <2,1,6,3>
+  3703408232U,  // <6,4,2,2>: Cost 4 vext2 <2,0,6,4>, <2,2,2,2>
+  3703408294U,  // <6,4,2,3>: Cost 4 vext2 <2,0,6,4>, <2,3,0,1>
+  2632320816U,  // <6,4,2,4>: Cost 3 vext2 <2,4,6,4>, <2,4,6,4>
+  2923384118U,  // <6,4,2,5>: Cost 3 vzipl <6,2,7,3>, RHS
+  2687208508U,  // <6,4,2,6>: Cost 3 vext3 <0,4,2,6>, <4,2,6,0>
+  3760950341U,  // <6,4,2,7>: Cost 4 vext3 <0,4,2,6>, <4,2,7,0>
+  2634975348U,  // <6,4,2,u>: Cost 3 vext2 <2,u,6,4>, <2,u,6,4>
+  3703408790U,  // <6,4,3,0>: Cost 4 vext2 <2,0,6,4>, <3,0,1,2>
+  3316305238U,  // <6,4,3,1>: Cost 4 vrev <4,6,1,3>
+  3703408947U,  // <6,4,3,2>: Cost 4 vext2 <2,0,6,4>, <3,2,0,6>
+  3703409052U,  // <6,4,3,3>: Cost 4 vext2 <2,0,6,4>, <3,3,3,3>
+  2644929026U,  // <6,4,3,4>: Cost 3 vext2 <4,5,6,4>, <3,4,5,6>
+  3718670922U,  // <6,4,3,5>: Cost 4 vext2 <4,5,6,4>, <3,5,4,6>
+  2705345682U,  // <6,4,3,6>: Cost 3 vext3 <3,4,5,6>, <4,3,6,5>
+  3926705152U,  // <6,4,3,7>: Cost 4 vuzpr <5,6,7,4>, <1,3,5,7>
+  2668817222U,  // <6,4,3,u>: Cost 3 vext2 <u,5,6,4>, <3,u,5,6>
+  2590277734U,  // <6,4,4,0>: Cost 3 vext1 <6,6,4,4>, LHS
+  3716017135U,  // <6,4,4,1>: Cost 4 vext2 <4,1,6,4>, <4,1,6,4>
+  2642938944U,  // <6,4,4,2>: Cost 3 vext2 <4,2,6,4>, <4,2,6,4>
+  3717344401U,  // <6,4,4,3>: Cost 4 vext2 <4,3,6,4>, <4,3,6,4>
+  2712571088U,  // <6,4,4,4>: Cost 3 vext3 <4,6,4,6>, <4,4,4,4>
+  2629668150U,  // <6,4,4,5>: Cost 3 vext2 <2,0,6,4>, RHS
+  1637649636U,  // <6,4,4,6>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+  2646257109U,  // <6,4,4,7>: Cost 3 vext2 <4,7,6,4>, <4,7,6,4>
+  1637649636U,  // <6,4,4,u>: Cost 2 vext3 <4,4,6,6>, <4,4,6,6>
+  2566398054U,  // <6,4,5,0>: Cost 3 vext1 <2,6,4,5>, LHS
+  3760876805U,  // <6,4,5,1>: Cost 4 vext3 <0,4,1,6>, <4,5,1,3>
+  2566399937U,  // <6,4,5,2>: Cost 3 vext1 <2,6,4,5>, <2,6,4,5>
+  2584316418U,  // <6,4,5,3>: Cost 3 vext1 <5,6,4,5>, <3,4,5,6>
+  2566401334U,  // <6,4,5,4>: Cost 3 vext1 <2,6,4,5>, RHS
+  2584318028U,  // <6,4,5,5>: Cost 3 vext1 <5,6,4,5>, <5,6,4,5>
+  1612287286U,  // <6,4,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+  2852965686U,  // <6,4,5,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+  1612287304U,  // <6,4,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+  1504608358U,  // <6,4,6,0>: Cost 2 vext1 <4,6,4,6>, LHS
+  2578350838U,  // <6,4,6,1>: Cost 3 vext1 <4,6,4,6>, <1,0,3,2>
+  2578351720U,  // <6,4,6,2>: Cost 3 vext1 <4,6,4,6>, <2,2,2,2>
+  2578352278U,  // <6,4,6,3>: Cost 3 vext1 <4,6,4,6>, <3,0,1,2>
+  1504611638U,  // <6,4,6,4>: Cost 2 vext1 <4,6,4,6>, RHS
+  2578353872U,  // <6,4,6,5>: Cost 3 vext1 <4,6,4,6>, <5,1,7,3>
+  2578354682U,  // <6,4,6,6>: Cost 3 vext1 <4,6,4,6>, <6,2,7,3>
+  2578355194U,  // <6,4,6,7>: Cost 3 vext1 <4,6,4,6>, <7,0,1,2>
+  1504614190U,  // <6,4,6,u>: Cost 2 vext1 <4,6,4,6>, LHS
+  2572386406U,  // <6,4,7,0>: Cost 3 vext1 <3,6,4,7>, LHS
+  2572387226U,  // <6,4,7,1>: Cost 3 vext1 <3,6,4,7>, <1,2,3,4>
+  3640157902U,  // <6,4,7,2>: Cost 4 vext1 <2,6,4,7>, <2,3,4,5>
+  2572389020U,  // <6,4,7,3>: Cost 3 vext1 <3,6,4,7>, <3,6,4,7>
+  2572389686U,  // <6,4,7,4>: Cost 3 vext1 <3,6,4,7>, RHS
+  2980497102U,  // <6,4,7,5>: Cost 3 vzipr RHS, <2,3,4,5>
+  2980495564U,  // <6,4,7,6>: Cost 3 vzipr RHS, <0,2,4,6>
+  4054239090U,  // <6,4,7,7>: Cost 4 vzipr RHS, <2,5,4,7>
+  2572392238U,  // <6,4,7,u>: Cost 3 vext1 <3,6,4,7>, LHS
+  1504608358U,  // <6,4,u,0>: Cost 2 vext1 <4,6,4,6>, LHS
+  2629670702U,  // <6,4,u,1>: Cost 3 vext2 <2,0,6,4>, LHS
+  2566424516U,  // <6,4,u,2>: Cost 3 vext1 <2,6,4,u>, <2,6,4,u>
+  2584340994U,  // <6,4,u,3>: Cost 3 vext1 <5,6,4,u>, <3,4,5,6>
+  1640156694U,  // <6,4,u,4>: Cost 2 vext3 <4,u,4,6>, <4,u,4,6>
+  2629671066U,  // <6,4,u,5>: Cost 3 vext2 <2,0,6,4>, RHS
+  1612287529U,  // <6,4,u,6>: Cost 2 vext3 <0,2,4,6>, RHS
+  2852965929U,  // <6,4,u,7>: Cost 3 vuzpr <5,6,7,4>, RHS
+  1612287547U,  // <6,4,u,u>: Cost 2 vext3 <0,2,4,6>, RHS
+  3708723200U,  // <6,5,0,0>: Cost 4 vext2 <2,u,6,5>, <0,0,0,0>
+  2634981478U,  // <6,5,0,1>: Cost 3 vext2 <2,u,6,5>, LHS
+  3694125260U,  // <6,5,0,2>: Cost 4 vext2 <0,4,6,5>, <0,2,4,6>
+  3779087962U,  // <6,5,0,3>: Cost 4 vext3 <3,4,5,6>, <5,0,3,2>
+  3760877154U,  // <6,5,0,4>: Cost 4 vext3 <0,4,1,6>, <5,0,4,1>
+  4195110916U,  // <6,5,0,5>: Cost 4 vtrnr <5,6,7,0>, <5,5,5,5>
+  3696779775U,  // <6,5,0,6>: Cost 4 vext2 <0,u,6,5>, <0,6,2,7>
+  1175212130U,  // <6,5,0,7>: Cost 2 vrev <5,6,7,0>
+  1175285867U,  // <6,5,0,u>: Cost 2 vrev <5,6,u,0>
+  2248445988U,  // <6,5,1,0>: Cost 3 vrev <5,6,0,1>
+  3698107237U,  // <6,5,1,1>: Cost 4 vext2 <1,1,6,5>, <1,1,6,5>
+  3708724118U,  // <6,5,1,2>: Cost 4 vext2 <2,u,6,5>, <1,2,3,0>
+  3908575334U,  // <6,5,1,3>: Cost 4 vuzpr <2,6,4,5>, LHS
+  3716023376U,  // <6,5,1,4>: Cost 4 vext2 <4,1,6,5>, <1,4,5,6>
+  3708724368U,  // <6,5,1,5>: Cost 4 vext2 <2,u,6,5>, <1,5,3,7>
+  3767733960U,  // <6,5,1,6>: Cost 4 vext3 <1,5,4,6>, <5,1,6,4>
+  2712571600U,  // <6,5,1,7>: Cost 3 vext3 <4,6,4,6>, <5,1,7,3>
+  2712571609U,  // <6,5,1,u>: Cost 3 vext3 <4,6,4,6>, <5,1,u,3>
+  2578391142U,  // <6,5,2,0>: Cost 3 vext1 <4,6,5,2>, LHS
+  3704079934U,  // <6,5,2,1>: Cost 4 vext2 <2,1,6,5>, <2,1,6,5>
+  3708724840U,  // <6,5,2,2>: Cost 4 vext2 <2,u,6,5>, <2,2,2,2>
+  3705407182U,  // <6,5,2,3>: Cost 4 vext2 <2,3,6,5>, <2,3,4,5>
+  2578394422U,  // <6,5,2,4>: Cost 3 vext1 <4,6,5,2>, RHS
+  3717351272U,  // <6,5,2,5>: Cost 4 vext2 <4,3,6,5>, <2,5,3,6>
+  2634983354U,  // <6,5,2,6>: Cost 3 vext2 <2,u,6,5>, <2,6,3,7>
+  3115486518U,  // <6,5,2,7>: Cost 3 vtrnr <4,6,u,2>, RHS
+  2634983541U,  // <6,5,2,u>: Cost 3 vext2 <2,u,6,5>, <2,u,6,5>
+  3708725398U,  // <6,5,3,0>: Cost 4 vext2 <2,u,6,5>, <3,0,1,2>
+  3710052631U,  // <6,5,3,1>: Cost 4 vext2 <3,1,6,5>, <3,1,6,5>
+  3708725606U,  // <6,5,3,2>: Cost 4 vext2 <2,u,6,5>, <3,2,6,3>
+  3708725660U,  // <6,5,3,3>: Cost 4 vext2 <2,u,6,5>, <3,3,3,3>
+  2643610114U,  // <6,5,3,4>: Cost 3 vext2 <4,3,6,5>, <3,4,5,6>
+  3717352010U,  // <6,5,3,5>: Cost 4 vext2 <4,3,6,5>, <3,5,4,6>
+  3773632358U,  // <6,5,3,6>: Cost 4 vext3 <2,5,3,6>, <5,3,6,0>
+  2248978533U,  // <6,5,3,7>: Cost 3 vrev <5,6,7,3>
+  2249052270U,  // <6,5,3,u>: Cost 3 vrev <5,6,u,3>
+  2596323430U,  // <6,5,4,0>: Cost 3 vext1 <7,6,5,4>, LHS
+  3716025328U,  // <6,5,4,1>: Cost 4 vext2 <4,1,6,5>, <4,1,6,5>
+  3716688961U,  // <6,5,4,2>: Cost 4 vext2 <4,2,6,5>, <4,2,6,5>
+  2643610770U,  // <6,5,4,3>: Cost 3 vext2 <4,3,6,5>, <4,3,6,5>
+  2596326710U,  // <6,5,4,4>: Cost 3 vext1 <7,6,5,4>, RHS
+  2634984758U,  // <6,5,4,5>: Cost 3 vext2 <2,u,6,5>, RHS
+  3767734199U,  // <6,5,4,6>: Cost 4 vext3 <1,5,4,6>, <5,4,6,0>
+  1643696070U,  // <6,5,4,7>: Cost 2 vext3 <5,4,7,6>, <5,4,7,6>
+  1643769807U,  // <6,5,4,u>: Cost 2 vext3 <5,4,u,6>, <5,4,u,6>
+  2578415718U,  // <6,5,5,0>: Cost 3 vext1 <4,6,5,5>, LHS
+  3652158198U,  // <6,5,5,1>: Cost 4 vext1 <4,6,5,5>, <1,0,3,2>
+  3652159080U,  // <6,5,5,2>: Cost 4 vext1 <4,6,5,5>, <2,2,2,2>
+  3652159638U,  // <6,5,5,3>: Cost 4 vext1 <4,6,5,5>, <3,0,1,2>
+  2578418998U,  // <6,5,5,4>: Cost 3 vext1 <4,6,5,5>, RHS
+  2712571908U,  // <6,5,5,5>: Cost 3 vext3 <4,6,4,6>, <5,5,5,5>
+  2718027790U,  // <6,5,5,6>: Cost 3 vext3 <5,5,6,6>, <5,5,6,6>
+  2712571928U,  // <6,5,5,7>: Cost 3 vext3 <4,6,4,6>, <5,5,7,7>
+  2712571937U,  // <6,5,5,u>: Cost 3 vext3 <4,6,4,6>, <5,5,u,7>
+  2705346596U,  // <6,5,6,0>: Cost 3 vext3 <3,4,5,6>, <5,6,0,1>
+  3767144496U,  // <6,5,6,1>: Cost 4 vext3 <1,4,5,6>, <5,6,1,4>
+  3773116473U,  // <6,5,6,2>: Cost 4 vext3 <2,4,5,6>, <5,6,2,4>
+  2705346626U,  // <6,5,6,3>: Cost 3 vext3 <3,4,5,6>, <5,6,3,4>
+  2705346636U,  // <6,5,6,4>: Cost 3 vext3 <3,4,5,6>, <5,6,4,5>
+  3908577217U,  // <6,5,6,5>: Cost 4 vuzpr <2,6,4,5>, <2,6,4,5>
+  2578428728U,  // <6,5,6,6>: Cost 3 vext1 <4,6,5,6>, <6,6,6,6>
+  2712572002U,  // <6,5,6,7>: Cost 3 vext3 <4,6,4,6>, <5,6,7,0>
+  2705346668U,  // <6,5,6,u>: Cost 3 vext3 <3,4,5,6>, <5,6,u,1>
+  2560516198U,  // <6,5,7,0>: Cost 3 vext1 <1,6,5,7>, LHS
+  2560517363U,  // <6,5,7,1>: Cost 3 vext1 <1,6,5,7>, <1,6,5,7>
+  2566490060U,  // <6,5,7,2>: Cost 3 vext1 <2,6,5,7>, <2,6,5,7>
+  3634260118U,  // <6,5,7,3>: Cost 4 vext1 <1,6,5,7>, <3,0,1,2>
+  2560519478U,  // <6,5,7,4>: Cost 3 vext1 <1,6,5,7>, RHS
+  2980498650U,  // <6,5,7,5>: Cost 3 vzipr RHS, <4,4,5,5>
+  2980497922U,  // <6,5,7,6>: Cost 3 vzipr RHS, <3,4,5,6>
+  3103214902U,  // <6,5,7,7>: Cost 3 vtrnr <2,6,3,7>, RHS
+  2560522030U,  // <6,5,7,u>: Cost 3 vext1 <1,6,5,7>, LHS
+  2560524390U,  // <6,5,u,0>: Cost 3 vext1 <1,6,5,u>, LHS
+  2560525556U,  // <6,5,u,1>: Cost 3 vext1 <1,6,5,u>, <1,6,5,u>
+  2566498253U,  // <6,5,u,2>: Cost 3 vext1 <2,6,5,u>, <2,6,5,u>
+  2646931439U,  // <6,5,u,3>: Cost 3 vext2 <4,u,6,5>, <u,3,5,7>
+  2560527670U,  // <6,5,u,4>: Cost 3 vext1 <1,6,5,u>, RHS
+  2634987674U,  // <6,5,u,5>: Cost 3 vext2 <2,u,6,5>, RHS
+  2980506114U,  // <6,5,u,6>: Cost 3 vzipr RHS, <3,4,5,6>
+  1175277674U,  // <6,5,u,7>: Cost 2 vrev <5,6,7,u>
+  1175351411U,  // <6,5,u,u>: Cost 2 vrev <5,6,u,u>
+  2578448486U,  // <6,6,0,0>: Cost 3 vext1 <4,6,6,0>, LHS
+  1573191782U,  // <6,6,0,1>: Cost 2 vext2 <4,u,6,6>, LHS
+  2686030124U,  // <6,6,0,2>: Cost 3 vext3 <0,2,4,6>, <6,0,2,4>
+  3779088690U,  // <6,6,0,3>: Cost 4 vext3 <3,4,5,6>, <6,0,3,1>
+  2687209788U,  // <6,6,0,4>: Cost 3 vext3 <0,4,2,6>, <6,0,4,2>
+  3652194000U,  // <6,6,0,5>: Cost 4 vext1 <4,6,6,0>, <5,1,7,3>
+  2254852914U,  // <6,6,0,6>: Cost 3 vrev <6,6,6,0>
+  4041575734U,  // <6,6,0,7>: Cost 4 vzipr <2,4,6,0>, RHS
+  1573192349U,  // <6,6,0,u>: Cost 2 vext2 <4,u,6,6>, LHS
+  2646934262U,  // <6,6,1,0>: Cost 3 vext2 <4,u,6,6>, <1,0,3,2>
+  2646934324U,  // <6,6,1,1>: Cost 3 vext2 <4,u,6,6>, <1,1,1,1>
+  2646934422U,  // <6,6,1,2>: Cost 3 vext2 <4,u,6,6>, <1,2,3,0>
+  2846785638U,  // <6,6,1,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+  3760951694U,  // <6,6,1,4>: Cost 4 vext3 <0,4,2,6>, <6,1,4,3>
+  2646934672U,  // <6,6,1,5>: Cost 3 vext2 <4,u,6,6>, <1,5,3,7>
+  2712572320U,  // <6,6,1,6>: Cost 3 vext3 <4,6,4,6>, <6,1,6,3>
+  3775549865U,  // <6,6,1,7>: Cost 4 vext3 <2,u,2,6>, <6,1,7,3>
+  2846785643U,  // <6,6,1,u>: Cost 3 vuzpr <4,6,4,6>, LHS
+  3759772094U,  // <6,6,2,0>: Cost 4 vext3 <0,2,4,6>, <6,2,0,6>
+  3704751676U,  // <6,6,2,1>: Cost 4 vext2 <2,2,6,6>, <2,1,6,3>
+  2631009936U,  // <6,6,2,2>: Cost 3 vext2 <2,2,6,6>, <2,2,6,6>
+  2646935206U,  // <6,6,2,3>: Cost 3 vext2 <4,u,6,6>, <2,3,0,1>
+  3759772127U,  // <6,6,2,4>: Cost 4 vext3 <0,2,4,6>, <6,2,4,3>
+  3704752004U,  // <6,6,2,5>: Cost 4 vext2 <2,2,6,6>, <2,5,6,7>
+  2646935482U,  // <6,6,2,6>: Cost 3 vext2 <4,u,6,6>, <2,6,3,7>
+  2712572410U,  // <6,6,2,7>: Cost 3 vext3 <4,6,4,6>, <6,2,7,3>
+  2712572419U,  // <6,6,2,u>: Cost 3 vext3 <4,6,4,6>, <6,2,u,3>
+  2646935702U,  // <6,6,3,0>: Cost 3 vext2 <4,u,6,6>, <3,0,1,2>
+  3777024534U,  // <6,6,3,1>: Cost 4 vext3 <3,1,4,6>, <6,3,1,4>
+  3704752453U,  // <6,6,3,2>: Cost 4 vext2 <2,2,6,6>, <3,2,2,6>
+  2646935964U,  // <6,6,3,3>: Cost 3 vext2 <4,u,6,6>, <3,3,3,3>
+  2705347122U,  // <6,6,3,4>: Cost 3 vext3 <3,4,5,6>, <6,3,4,5>
+  3779678778U,  // <6,6,3,5>: Cost 4 vext3 <3,5,4,6>, <6,3,5,4>
+  2657553069U,  // <6,6,3,6>: Cost 3 vext2 <6,6,6,6>, <3,6,6,6>
+  4039609654U,  // <6,6,3,7>: Cost 4 vzipr <2,1,6,3>, RHS
+  2708001366U,  // <6,6,3,u>: Cost 3 vext3 <3,u,5,6>, <6,3,u,5>
+  2578481254U,  // <6,6,4,0>: Cost 3 vext1 <4,6,6,4>, LHS
+  3652223734U,  // <6,6,4,1>: Cost 4 vext1 <4,6,6,4>, <1,0,3,2>
+  3760951922U,  // <6,6,4,2>: Cost 4 vext3 <0,4,2,6>, <6,4,2,6>
+  3779089019U,  // <6,6,4,3>: Cost 4 vext3 <3,4,5,6>, <6,4,3,6>
+  1570540772U,  // <6,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+  1573195062U,  // <6,6,4,5>: Cost 2 vext2 <4,u,6,6>, RHS
+  2712572560U,  // <6,6,4,6>: Cost 3 vext3 <4,6,4,6>, <6,4,6,0>
+  2723410591U,  // <6,6,4,7>: Cost 3 vext3 <6,4,7,6>, <6,4,7,6>
+  1573195304U,  // <6,6,4,u>: Cost 2 vext2 <4,u,6,6>, <4,u,6,6>
+  3640287334U,  // <6,6,5,0>: Cost 4 vext1 <2,6,6,5>, LHS
+  2646937296U,  // <6,6,5,1>: Cost 3 vext2 <4,u,6,6>, <5,1,7,3>
+  3640289235U,  // <6,6,5,2>: Cost 4 vext1 <2,6,6,5>, <2,6,6,5>
+  3720679279U,  // <6,6,5,3>: Cost 4 vext2 <4,u,6,6>, <5,3,7,0>
+  2646937542U,  // <6,6,5,4>: Cost 3 vext2 <4,u,6,6>, <5,4,7,6>
+  2646937604U,  // <6,6,5,5>: Cost 3 vext2 <4,u,6,6>, <5,5,5,5>
+  2646937698U,  // <6,6,5,6>: Cost 3 vext2 <4,u,6,6>, <5,6,7,0>
+  2846788918U,  // <6,6,5,7>: Cost 3 vuzpr <4,6,4,6>, RHS
+  2846788919U,  // <6,6,5,u>: Cost 3 vuzpr <4,6,4,6>, RHS
+  1516699750U,  // <6,6,6,0>: Cost 2 vext1 <6,6,6,6>, LHS
+  2590442230U,  // <6,6,6,1>: Cost 3 vext1 <6,6,6,6>, <1,0,3,2>
+  2646938106U,  // <6,6,6,2>: Cost 3 vext2 <4,u,6,6>, <6,2,7,3>
+  2590443670U,  // <6,6,6,3>: Cost 3 vext1 <6,6,6,6>, <3,0,1,2>
+  1516703030U,  // <6,6,6,4>: Cost 2 vext1 <6,6,6,6>, RHS
+  2590445264U,  // <6,6,6,5>: Cost 3 vext1 <6,6,6,6>, <5,1,7,3>
+   296144182U,  // <6,6,6,6>: Cost 1 vdup2 RHS
+  2712572738U,  // <6,6,6,7>: Cost 3 vext3 <4,6,4,6>, <6,6,7,7>
+   296144182U,  // <6,6,6,u>: Cost 1 vdup2 RHS
+  2566561894U,  // <6,6,7,0>: Cost 3 vext1 <2,6,6,7>, LHS
+  3634332924U,  // <6,6,7,1>: Cost 4 vext1 <1,6,6,7>, <1,6,6,7>
+  2566563797U,  // <6,6,7,2>: Cost 3 vext1 <2,6,6,7>, <2,6,6,7>
+  2584480258U,  // <6,6,7,3>: Cost 3 vext1 <5,6,6,7>, <3,4,5,6>
+  2566565174U,  // <6,6,7,4>: Cost 3 vext1 <2,6,6,7>, RHS
+  2717438846U,  // <6,6,7,5>: Cost 3 vext3 <5,4,7,6>, <6,7,5,4>
+  2980500280U,  // <6,6,7,6>: Cost 3 vzipr RHS, <6,6,6,6>
+  1906756918U,  // <6,6,7,7>: Cost 2 vzipr RHS, RHS
+  1906756919U,  // <6,6,7,u>: Cost 2 vzipr RHS, RHS
+  1516699750U,  // <6,6,u,0>: Cost 2 vext1 <6,6,6,6>, LHS
+  1573197614U,  // <6,6,u,1>: Cost 2 vext2 <4,u,6,6>, LHS
+  2566571990U,  // <6,6,u,2>: Cost 3 vext1 <2,6,6,u>, <2,6,6,u>
+  2846786205U,  // <6,6,u,3>: Cost 3 vuzpr <4,6,4,6>, LHS
+  1516703030U,  // <6,6,u,4>: Cost 2 vext1 <6,6,6,6>, RHS
+  1573197978U,  // <6,6,u,5>: Cost 2 vext2 <4,u,6,6>, RHS
+   296144182U,  // <6,6,u,6>: Cost 1 vdup2 RHS
+  1906765110U,  // <6,6,u,7>: Cost 2 vzipr RHS, RHS
+   296144182U,  // <6,6,u,u>: Cost 1 vdup2 RHS
+  1571209216U,  // <6,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+   497467494U,  // <6,7,0,1>: Cost 1 vext2 RHS, LHS
+  1571209380U,  // <6,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+  2644951292U,  // <6,7,0,3>: Cost 3 vext2 RHS, <0,3,1,0>
+  1571209554U,  // <6,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+  1510756450U,  // <6,7,0,5>: Cost 2 vext1 <5,6,7,0>, <5,6,7,0>
+  2644951542U,  // <6,7,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+  2584499194U,  // <6,7,0,7>: Cost 3 vext1 <5,6,7,0>, <7,0,1,2>
+   497468061U,  // <6,7,0,u>: Cost 1 vext2 RHS, LHS
+  1571209974U,  // <6,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+  1571210036U,  // <6,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+  1571210134U,  // <6,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+  1571210200U,  // <6,7,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+  2644952098U,  // <6,7,1,4>: Cost 3 vext2 RHS, <1,4,0,5>
+  1571210384U,  // <6,7,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+  2644952271U,  // <6,7,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+  2578535418U,  // <6,7,1,7>: Cost 3 vext1 <4,6,7,1>, <7,0,1,2>
+  1571210605U,  // <6,7,1,u>: Cost 2 vext2 RHS, <1,u,1,3>
+  2644952509U,  // <6,7,2,0>: Cost 3 vext2 RHS, <2,0,1,2>
+  2644952582U,  // <6,7,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+  1571210856U,  // <6,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+  1571210918U,  // <6,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+  2644952828U,  // <6,7,2,4>: Cost 3 vext2 RHS, <2,4,0,6>
+  2633009028U,  // <6,7,2,5>: Cost 3 vext2 <2,5,6,7>, <2,5,6,7>
+  1571211194U,  // <6,7,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+  2668840938U,  // <6,7,2,7>: Cost 3 vext2 RHS, <2,7,0,1>
+  1571211323U,  // <6,7,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+  1571211414U,  // <6,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+  2644953311U,  // <6,7,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+  2644953390U,  // <6,7,3,2>: Cost 3 vext2 RHS, <3,2,0,1>
+  1571211676U,  // <6,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+  1571211778U,  // <6,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+  2644953648U,  // <6,7,3,5>: Cost 3 vext2 RHS, <3,5,1,7>
+  2644953720U,  // <6,7,3,6>: Cost 3 vext2 RHS, <3,6,0,7>
+  2644953795U,  // <6,7,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+  1571212062U,  // <6,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+  1573202834U,  // <6,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+  2644954058U,  // <6,7,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+  2644954166U,  // <6,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+  2644954258U,  // <6,7,4,3>: Cost 3 vext2 RHS, <4,3,6,5>
+  1571212496U,  // <6,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+   497470774U,  // <6,7,4,5>: Cost 1 vext2 RHS, RHS
+  1573203316U,  // <6,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  2646281688U,  // <6,7,4,7>: Cost 3 vext2 <4,7,6,7>, <4,7,6,7>
+   497471017U,  // <6,7,4,u>: Cost 1 vext2 RHS, RHS
+  2644954696U,  // <6,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+  1573203664U,  // <6,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  2644954878U,  // <6,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+  2644954991U,  // <6,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+  1571213254U,  // <6,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+  1571213316U,  // <6,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+  1571213410U,  // <6,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+  1573204136U,  // <6,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+  1573204217U,  // <6,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+  2644955425U,  // <6,7,6,0>: Cost 3 vext2 RHS, <6,0,1,2>
+  2644955561U,  // <6,7,6,1>: Cost 3 vext2 RHS, <6,1,7,3>
+  1573204474U,  // <6,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  2644955698U,  // <6,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+  2644955789U,  // <6,7,6,4>: Cost 3 vext2 RHS, <6,4,5,6>
+  2644955889U,  // <6,7,6,5>: Cost 3 vext2 RHS, <6,5,7,7>
+  1571214136U,  // <6,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+  1571214158U,  // <6,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+  1573204895U,  // <6,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+  1573204986U,  // <6,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+  2572608656U,  // <6,7,7,1>: Cost 3 vext1 <3,6,7,7>, <1,5,3,7>
+  2644956362U,  // <6,7,7,2>: Cost 3 vext2 RHS, <7,2,6,3>
+  2572610231U,  // <6,7,7,3>: Cost 3 vext1 <3,6,7,7>, <3,6,7,7>
+  1573205350U,  // <6,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+  2646947220U,  // <6,7,7,5>: Cost 3 vext2 RHS, <7,5,1,7>
+  1516786498U,  // <6,7,7,6>: Cost 2 vext1 <6,6,7,7>, <6,6,7,7>
+  1571214956U,  // <6,7,7,7>: Cost 2 vext2 RHS, <7,7,7,7>
+  1573205634U,  // <6,7,7,u>: Cost 2 vext2 RHS, <7,u,1,2>
+  1571215059U,  // <6,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+   497473326U,  // <6,7,u,1>: Cost 1 vext2 RHS, LHS
+  1571215237U,  // <6,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+  1571215292U,  // <6,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+  1571215423U,  // <6,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+   497473690U,  // <6,7,u,5>: Cost 1 vext2 RHS, RHS
+  1571215568U,  // <6,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+  1573206272U,  // <6,7,u,7>: Cost 2 vext2 RHS, <u,7,0,1>
+   497473893U,  // <6,7,u,u>: Cost 1 vext2 RHS, LHS
+  1571217408U,  // <6,u,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+   497475686U,  // <6,u,0,1>: Cost 1 vext2 RHS, LHS
+  1571217572U,  // <6,u,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+  2689865445U,  // <6,u,0,3>: Cost 3 vext3 <0,u,2,6>, <u,0,3,2>
+  1571217746U,  // <6,u,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+  1510830187U,  // <6,u,0,5>: Cost 2 vext1 <5,6,u,0>, <5,6,u,0>
+  2644959734U,  // <6,u,0,6>: Cost 3 vext2 RHS, <0,6,1,7>
+  1193130221U,  // <6,u,0,7>: Cost 2 vrev <u,6,7,0>
+   497476253U,  // <6,u,0,u>: Cost 1 vext2 RHS, LHS
+  1571218166U,  // <6,u,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+  1571218228U,  // <6,u,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+  1612289838U,  // <6,u,1,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  1571218392U,  // <6,u,1,3>: Cost 2 vext2 RHS, <1,3,1,3>
+  2566663478U,  // <6,u,1,4>: Cost 3 vext1 <2,6,u,1>, RHS
+  1571218576U,  // <6,u,1,5>: Cost 2 vext2 RHS, <1,5,3,7>
+  2644960463U,  // <6,u,1,6>: Cost 3 vext2 RHS, <1,6,1,7>
+  2717439835U,  // <6,u,1,7>: Cost 3 vext3 <5,4,7,6>, <u,1,7,3>
+  1612289892U,  // <6,u,1,u>: Cost 2 vext3 <0,2,4,6>, LHS
+  1504870502U,  // <6,u,2,0>: Cost 2 vext1 <4,6,u,2>, LHS
+  2644960774U,  // <6,u,2,1>: Cost 3 vext2 RHS, <2,1,0,3>
+  1571219048U,  // <6,u,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+  1571219110U,  // <6,u,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+  1504873782U,  // <6,u,2,4>: Cost 2 vext1 <4,6,u,2>, RHS
+  2633017221U,  // <6,u,2,5>: Cost 3 vext2 <2,5,6,u>, <2,5,6,u>
+  1571219386U,  // <6,u,2,6>: Cost 2 vext2 RHS, <2,6,3,7>
+  2712573868U,  // <6,u,2,7>: Cost 3 vext3 <4,6,4,6>, <u,2,7,3>
+  1571219515U,  // <6,u,2,u>: Cost 2 vext2 RHS, <2,u,0,1>
+  1571219606U,  // <6,u,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+  2644961503U,  // <6,u,3,1>: Cost 3 vext2 RHS, <3,1,0,3>
+  2566678499U,  // <6,u,3,2>: Cost 3 vext1 <2,6,u,3>, <2,6,u,3>
+  1571219868U,  // <6,u,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+  1571219970U,  // <6,u,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+  2689865711U,  // <6,u,3,5>: Cost 3 vext3 <0,u,2,6>, <u,3,5,7>
+  2708002806U,  // <6,u,3,6>: Cost 3 vext3 <3,u,5,6>, <u,3,6,5>
+  2644961987U,  // <6,u,3,7>: Cost 3 vext2 RHS, <3,7,0,1>
+  1571220254U,  // <6,u,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+  1571220370U,  // <6,u,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+  2644962250U,  // <6,u,4,1>: Cost 3 vext2 RHS, <4,1,2,3>
+  1661245476U,  // <6,u,4,2>: Cost 2 vext3 <u,4,2,6>, <u,4,2,6>
+  2686031917U,  // <6,u,4,3>: Cost 3 vext3 <0,2,4,6>, <u,4,3,6>
+  1571220688U,  // <6,u,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+   497478967U,  // <6,u,4,5>: Cost 1 vext2 RHS, RHS
+  1571220852U,  // <6,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  1661614161U,  // <6,u,4,7>: Cost 2 vext3 <u,4,7,6>, <u,4,7,6>
+   497479209U,  // <6,u,4,u>: Cost 1 vext2 RHS, RHS
+  2566692966U,  // <6,u,5,0>: Cost 3 vext1 <2,6,u,5>, LHS
+  1571221200U,  // <6,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  2566694885U,  // <6,u,5,2>: Cost 3 vext1 <2,6,u,5>, <2,6,u,5>
+  2689865855U,  // <6,u,5,3>: Cost 3 vext3 <0,u,2,6>, <u,5,3,7>
+  1571221446U,  // <6,u,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+  1571221508U,  // <6,u,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+  1612290202U,  // <6,u,5,6>: Cost 2 vext3 <0,2,4,6>, RHS
+  1571221672U,  // <6,u,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+  1612290220U,  // <6,u,5,u>: Cost 2 vext3 <0,2,4,6>, RHS
+  1504903270U,  // <6,u,6,0>: Cost 2 vext1 <4,6,u,6>, LHS
+  2644963752U,  // <6,u,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+  1571222010U,  // <6,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  2686032080U,  // <6,u,6,3>: Cost 3 vext3 <0,2,4,6>, <u,6,3,7>
+  1504906550U,  // <6,u,6,4>: Cost 2 vext1 <4,6,u,6>, RHS
+  2644964079U,  // <6,u,6,5>: Cost 3 vext2 RHS, <6,5,7,5>
+   296144182U,  // <6,u,6,6>: Cost 1 vdup2 RHS
+  1571222350U,  // <6,u,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+   296144182U,  // <6,u,6,u>: Cost 1 vdup2 RHS
+  1492967526U,  // <6,u,7,0>: Cost 2 vext1 <2,6,u,7>, LHS
+  2560738574U,  // <6,u,7,1>: Cost 3 vext1 <1,6,u,7>, <1,6,u,7>
+  1492969447U,  // <6,u,7,2>: Cost 2 vext1 <2,6,u,7>, <2,6,u,7>
+  1906753692U,  // <6,u,7,3>: Cost 2 vzipr RHS, LHS
+  1492970806U,  // <6,u,7,4>: Cost 2 vext1 <2,6,u,7>, RHS
+  2980495761U,  // <6,u,7,5>: Cost 3 vzipr RHS, <0,4,u,5>
+  1516860235U,  // <6,u,7,6>: Cost 2 vext1 <6,6,u,7>, <6,6,u,7>
+  1906756936U,  // <6,u,7,7>: Cost 2 vzipr RHS, RHS
+  1492973358U,  // <6,u,7,u>: Cost 2 vext1 <2,6,u,7>, LHS
+  1492975718U,  // <6,u,u,0>: Cost 2 vext1 <2,6,u,u>, LHS
+   497481518U,  // <6,u,u,1>: Cost 1 vext2 RHS, LHS
+  1612290405U,  // <6,u,u,2>: Cost 2 vext3 <0,2,4,6>, LHS
+  1571223484U,  // <6,u,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+  1492978998U,  // <6,u,u,4>: Cost 2 vext1 <2,6,u,u>, RHS
+   497481882U,  // <6,u,u,5>: Cost 1 vext2 RHS, RHS
+   296144182U,  // <6,u,u,6>: Cost 1 vdup2 RHS
+  1906765128U,  // <6,u,u,7>: Cost 2 vzipr RHS, RHS
+   497482085U,  // <6,u,u,u>: Cost 1 vext2 RHS, LHS
+  1638318080U,  // <7,0,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+  1638318090U,  // <7,0,0,1>: Cost 2 vext3 RHS, <0,0,1,1>
+  1638318100U,  // <7,0,0,2>: Cost 2 vext3 RHS, <0,0,2,2>
+  3646442178U,  // <7,0,0,3>: Cost 4 vext1 <3,7,0,0>, <3,7,0,0>
+  2712059941U,  // <7,0,0,4>: Cost 3 vext3 RHS, <0,0,4,1>
+  2651603364U,  // <7,0,0,5>: Cost 3 vext2 <5,6,7,0>, <0,5,1,6>
+  2590618445U,  // <7,0,0,6>: Cost 3 vext1 <6,7,0,0>, <6,7,0,0>
+  3785801798U,  // <7,0,0,7>: Cost 4 vext3 RHS, <0,0,7,7>
+  1638318153U,  // <7,0,0,u>: Cost 2 vext3 RHS, <0,0,u,1>
+  1516879974U,  // <7,0,1,0>: Cost 2 vext1 <6,7,0,1>, LHS
+  2693922911U,  // <7,0,1,1>: Cost 3 vext3 <1,5,3,7>, <0,1,1,5>
+   564576358U,  // <7,0,1,2>: Cost 1 vext3 RHS, LHS
+  2638996480U,  // <7,0,1,3>: Cost 3 vext2 <3,5,7,0>, <1,3,5,7>
+  1516883254U,  // <7,0,1,4>: Cost 2 vext1 <6,7,0,1>, RHS
+  2649613456U,  // <7,0,1,5>: Cost 3 vext2 <5,3,7,0>, <1,5,3,7>
+  1516884814U,  // <7,0,1,6>: Cost 2 vext1 <6,7,0,1>, <6,7,0,1>
+  2590626808U,  // <7,0,1,7>: Cost 3 vext1 <6,7,0,1>, <7,0,1,0>
+   564576412U,  // <7,0,1,u>: Cost 1 vext3 RHS, LHS
+  1638318244U,  // <7,0,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+  2692743344U,  // <7,0,2,1>: Cost 3 vext3 <1,3,5,7>, <0,2,1,5>
+  2712060084U,  // <7,0,2,2>: Cost 3 vext3 RHS, <0,2,2,0>
+  2712060094U,  // <7,0,2,3>: Cost 3 vext3 RHS, <0,2,3,1>
+  1638318284U,  // <7,0,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+  2712060118U,  // <7,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+  2651604922U,  // <7,0,2,6>: Cost 3 vext2 <5,6,7,0>, <2,6,3,7>
+  2686255336U,  // <7,0,2,7>: Cost 3 vext3 <0,2,7,7>, <0,2,7,7>
+  1638318316U,  // <7,0,2,u>: Cost 2 vext3 RHS, <0,2,u,2>
+  2651605142U,  // <7,0,3,0>: Cost 3 vext2 <5,6,7,0>, <3,0,1,2>
+  2712060156U,  // <7,0,3,1>: Cost 3 vext3 RHS, <0,3,1,0>
+  2712060165U,  // <7,0,3,2>: Cost 3 vext3 RHS, <0,3,2,0>
+  2651605404U,  // <7,0,3,3>: Cost 3 vext2 <5,6,7,0>, <3,3,3,3>
+  2651605506U,  // <7,0,3,4>: Cost 3 vext2 <5,6,7,0>, <3,4,5,6>
+  2638998111U,  // <7,0,3,5>: Cost 3 vext2 <3,5,7,0>, <3,5,7,0>
+  2639661744U,  // <7,0,3,6>: Cost 3 vext2 <3,6,7,0>, <3,6,7,0>
+  3712740068U,  // <7,0,3,7>: Cost 4 vext2 <3,5,7,0>, <3,7,3,7>
+  2640989010U,  // <7,0,3,u>: Cost 3 vext2 <3,u,7,0>, <3,u,7,0>
+  2712060232U,  // <7,0,4,0>: Cost 3 vext3 RHS, <0,4,0,4>
+  1638318418U,  // <7,0,4,1>: Cost 2 vext3 RHS, <0,4,1,5>
+  1638318428U,  // <7,0,4,2>: Cost 2 vext3 RHS, <0,4,2,6>
+  3646474950U,  // <7,0,4,3>: Cost 4 vext1 <3,7,0,4>, <3,7,0,4>
+  2712060270U,  // <7,0,4,4>: Cost 3 vext3 RHS, <0,4,4,6>
+  1577864502U,  // <7,0,4,5>: Cost 2 vext2 <5,6,7,0>, RHS
+  2651606388U,  // <7,0,4,6>: Cost 3 vext2 <5,6,7,0>, <4,6,4,6>
+  3787792776U,  // <7,0,4,7>: Cost 4 vext3 RHS, <0,4,7,5>
+  1638318481U,  // <7,0,4,u>: Cost 2 vext3 RHS, <0,4,u,5>
+  2590654566U,  // <7,0,5,0>: Cost 3 vext1 <6,7,0,5>, LHS
+  2651606736U,  // <7,0,5,1>: Cost 3 vext2 <5,6,7,0>, <5,1,7,3>
+  2712060334U,  // <7,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+  2649616239U,  // <7,0,5,3>: Cost 3 vext2 <5,3,7,0>, <5,3,7,0>
+  2651606982U,  // <7,0,5,4>: Cost 3 vext2 <5,6,7,0>, <5,4,7,6>
+  2651607044U,  // <7,0,5,5>: Cost 3 vext2 <5,6,7,0>, <5,5,5,5>
+  1577865314U,  // <7,0,5,6>: Cost 2 vext2 <5,6,7,0>, <5,6,7,0>
+  2651607208U,  // <7,0,5,7>: Cost 3 vext2 <5,6,7,0>, <5,7,5,7>
+  1579192580U,  // <7,0,5,u>: Cost 2 vext2 <5,u,7,0>, <5,u,7,0>
+  2688393709U,  // <7,0,6,0>: Cost 3 vext3 <0,6,0,7>, <0,6,0,7>
+  2712060406U,  // <7,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+  2688541183U,  // <7,0,6,2>: Cost 3 vext3 <0,6,2,7>, <0,6,2,7>
+  2655588936U,  // <7,0,6,3>: Cost 3 vext2 <6,3,7,0>, <6,3,7,0>
+  3762430481U,  // <7,0,6,4>: Cost 4 vext3 <0,6,4,7>, <0,6,4,7>
+  2651607730U,  // <7,0,6,5>: Cost 3 vext2 <5,6,7,0>, <6,5,0,7>
+  2651607864U,  // <7,0,6,6>: Cost 3 vext2 <5,6,7,0>, <6,6,6,6>
+  2651607886U,  // <7,0,6,7>: Cost 3 vext2 <5,6,7,0>, <6,7,0,1>
+  2688983605U,  // <7,0,6,u>: Cost 3 vext3 <0,6,u,7>, <0,6,u,7>
+  2651608058U,  // <7,0,7,0>: Cost 3 vext2 <5,6,7,0>, <7,0,1,2>
+  2932703334U,  // <7,0,7,1>: Cost 3 vzipl <7,7,7,7>, LHS
+  3066921062U,  // <7,0,7,2>: Cost 3 vtrnl <7,7,7,7>, LHS
+  3712742678U,  // <7,0,7,3>: Cost 4 vext2 <3,5,7,0>, <7,3,5,7>
+  2651608422U,  // <7,0,7,4>: Cost 3 vext2 <5,6,7,0>, <7,4,5,6>
+  2651608513U,  // <7,0,7,5>: Cost 3 vext2 <5,6,7,0>, <7,5,6,7>
+  2663552532U,  // <7,0,7,6>: Cost 3 vext2 <7,6,7,0>, <7,6,7,0>
+  2651608684U,  // <7,0,7,7>: Cost 3 vext2 <5,6,7,0>, <7,7,7,7>
+  2651608706U,  // <7,0,7,u>: Cost 3 vext2 <5,6,7,0>, <7,u,1,2>
+  1638318730U,  // <7,0,u,0>: Cost 2 vext3 RHS, <0,u,0,2>
+  1638318738U,  // <7,0,u,1>: Cost 2 vext3 RHS, <0,u,1,1>
+   564576925U,  // <7,0,u,2>: Cost 1 vext3 RHS, LHS
+  2572765898U,  // <7,0,u,3>: Cost 3 vext1 <3,7,0,u>, <3,7,0,u>
+  1638318770U,  // <7,0,u,4>: Cost 2 vext3 RHS, <0,u,4,6>
+  1577867418U,  // <7,0,u,5>: Cost 2 vext2 <5,6,7,0>, RHS
+  1516942165U,  // <7,0,u,6>: Cost 2 vext1 <6,7,0,u>, <6,7,0,u>
+  2651609344U,  // <7,0,u,7>: Cost 3 vext2 <5,6,7,0>, <u,7,0,1>
+   564576979U,  // <7,0,u,u>: Cost 1 vext3 RHS, LHS
+  2590687334U,  // <7,1,0,0>: Cost 3 vext1 <6,7,1,0>, LHS
+  2639003750U,  // <7,1,0,1>: Cost 3 vext2 <3,5,7,1>, LHS
+  2793357414U,  // <7,1,0,2>: Cost 3 vuzpl <7,0,1,2>, LHS
+  1638318838U,  // <7,1,0,3>: Cost 2 vext3 RHS, <1,0,3,2>
+  2590690614U,  // <7,1,0,4>: Cost 3 vext1 <6,7,1,0>, RHS
+  2712060679U,  // <7,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+  2590692182U,  // <7,1,0,6>: Cost 3 vext1 <6,7,1,0>, <6,7,1,0>
+  3785802521U,  // <7,1,0,7>: Cost 4 vext3 RHS, <1,0,7,1>
+  1638318883U,  // <7,1,0,u>: Cost 2 vext3 RHS, <1,0,u,2>
+  2712060715U,  // <7,1,1,0>: Cost 3 vext3 RHS, <1,1,0,1>
+  1638318900U,  // <7,1,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+  3774300994U,  // <7,1,1,2>: Cost 4 vext3 <2,6,3,7>, <1,1,2,6>
+  1638318920U,  // <7,1,1,3>: Cost 2 vext3 RHS, <1,1,3,3>
+  2712060755U,  // <7,1,1,4>: Cost 3 vext3 RHS, <1,1,4,5>
+  2691416926U,  // <7,1,1,5>: Cost 3 vext3 <1,1,5,7>, <1,1,5,7>
+  2590700375U,  // <7,1,1,6>: Cost 3 vext1 <6,7,1,1>, <6,7,1,1>
+  3765158766U,  // <7,1,1,7>: Cost 4 vext3 <1,1,5,7>, <1,1,7,5>
+  1638318965U,  // <7,1,1,u>: Cost 2 vext3 RHS, <1,1,u,3>
+  2712060796U,  // <7,1,2,0>: Cost 3 vext3 RHS, <1,2,0,1>
+  2712060807U,  // <7,1,2,1>: Cost 3 vext3 RHS, <1,2,1,3>
+  3712747112U,  // <7,1,2,2>: Cost 4 vext2 <3,5,7,1>, <2,2,2,2>
+  1638318998U,  // <7,1,2,3>: Cost 2 vext3 RHS, <1,2,3,0>
+  2712060836U,  // <7,1,2,4>: Cost 3 vext3 RHS, <1,2,4,5>
+  2712060843U,  // <7,1,2,5>: Cost 3 vext3 RHS, <1,2,5,3>
+  2590708568U,  // <7,1,2,6>: Cost 3 vext1 <6,7,1,2>, <6,7,1,2>
+  2735948730U,  // <7,1,2,7>: Cost 3 vext3 RHS, <1,2,7,0>
+  1638319043U,  // <7,1,2,u>: Cost 2 vext3 RHS, <1,2,u,0>
+  2712060876U,  // <7,1,3,0>: Cost 3 vext3 RHS, <1,3,0,0>
+  1638319064U,  // <7,1,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+  2712060894U,  // <7,1,3,2>: Cost 3 vext3 RHS, <1,3,2,0>
+  2692596718U,  // <7,1,3,3>: Cost 3 vext3 <1,3,3,7>, <1,3,3,7>
+  2712060917U,  // <7,1,3,4>: Cost 3 vext3 RHS, <1,3,4,5>
+  1619002368U,  // <7,1,3,5>: Cost 2 vext3 <1,3,5,7>, <1,3,5,7>
+  2692817929U,  // <7,1,3,6>: Cost 3 vext3 <1,3,6,7>, <1,3,6,7>
+  2735948814U,  // <7,1,3,7>: Cost 3 vext3 RHS, <1,3,7,3>
+  1619223579U,  // <7,1,3,u>: Cost 2 vext3 <1,3,u,7>, <1,3,u,7>
+  2712060962U,  // <7,1,4,0>: Cost 3 vext3 RHS, <1,4,0,5>
+  2712060971U,  // <7,1,4,1>: Cost 3 vext3 RHS, <1,4,1,5>
+  2712060980U,  // <7,1,4,2>: Cost 3 vext3 RHS, <1,4,2,5>
+  2712060989U,  // <7,1,4,3>: Cost 3 vext3 RHS, <1,4,3,5>
+  3785802822U,  // <7,1,4,4>: Cost 4 vext3 RHS, <1,4,4,5>
+  2639007030U,  // <7,1,4,5>: Cost 3 vext2 <3,5,7,1>, RHS
+  2645642634U,  // <7,1,4,6>: Cost 3 vext2 <4,6,7,1>, <4,6,7,1>
+  3719384520U,  // <7,1,4,7>: Cost 4 vext2 <4,6,7,1>, <4,7,5,0>
+  2639007273U,  // <7,1,4,u>: Cost 3 vext2 <3,5,7,1>, RHS
+  2572812390U,  // <7,1,5,0>: Cost 3 vext1 <3,7,1,5>, LHS
+  2693776510U,  // <7,1,5,1>: Cost 3 vext3 <1,5,1,7>, <1,5,1,7>
+  3774301318U,  // <7,1,5,2>: Cost 4 vext3 <2,6,3,7>, <1,5,2,6>
+  1620182160U,  // <7,1,5,3>: Cost 2 vext3 <1,5,3,7>, <1,5,3,7>
+  2572815670U,  // <7,1,5,4>: Cost 3 vext1 <3,7,1,5>, RHS
+  3766486178U,  // <7,1,5,5>: Cost 4 vext3 <1,3,5,7>, <1,5,5,7>
+  2651615331U,  // <7,1,5,6>: Cost 3 vext2 <5,6,7,1>, <5,6,7,1>
+  2652278964U,  // <7,1,5,7>: Cost 3 vext2 <5,7,7,1>, <5,7,7,1>
+  1620550845U,  // <7,1,5,u>: Cost 2 vext3 <1,5,u,7>, <1,5,u,7>
+  3768108230U,  // <7,1,6,0>: Cost 4 vext3 <1,6,0,7>, <1,6,0,7>
+  2694440143U,  // <7,1,6,1>: Cost 3 vext3 <1,6,1,7>, <1,6,1,7>
+  2712061144U,  // <7,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+  2694587617U,  // <7,1,6,3>: Cost 3 vext3 <1,6,3,7>, <1,6,3,7>
+  3768403178U,  // <7,1,6,4>: Cost 4 vext3 <1,6,4,7>, <1,6,4,7>
+  2694735091U,  // <7,1,6,5>: Cost 3 vext3 <1,6,5,7>, <1,6,5,7>
+  3768550652U,  // <7,1,6,6>: Cost 4 vext3 <1,6,6,7>, <1,6,6,7>
+  2652279630U,  // <7,1,6,7>: Cost 3 vext2 <5,7,7,1>, <6,7,0,1>
+  2694956302U,  // <7,1,6,u>: Cost 3 vext3 <1,6,u,7>, <1,6,u,7>
+  2645644282U,  // <7,1,7,0>: Cost 3 vext2 <4,6,7,1>, <7,0,1,2>
+  2859062094U,  // <7,1,7,1>: Cost 3 vuzpr <6,7,0,1>, <6,7,0,1>
+  3779462437U,  // <7,1,7,2>: Cost 4 vext3 <3,5,1,7>, <1,7,2,3>
+  3121938534U,  // <7,1,7,3>: Cost 3 vtrnr <5,7,5,7>, LHS
+  2554916150U,  // <7,1,7,4>: Cost 3 vext1 <0,7,1,7>, RHS
+  3769140548U,  // <7,1,7,5>: Cost 4 vext3 <1,7,5,7>, <1,7,5,7>
+  3726022164U,  // <7,1,7,6>: Cost 4 vext2 <5,7,7,1>, <7,6,7,0>
+  2554918508U,  // <7,1,7,7>: Cost 3 vext1 <0,7,1,7>, <7,7,7,7>
+  3121938539U,  // <7,1,7,u>: Cost 3 vtrnr <5,7,5,7>, LHS
+  2572836966U,  // <7,1,u,0>: Cost 3 vext1 <3,7,1,u>, LHS
+  1638319469U,  // <7,1,u,1>: Cost 2 vext3 RHS, <1,u,1,3>
+  2712061299U,  // <7,1,u,2>: Cost 3 vext3 RHS, <1,u,2,0>
+  1622173059U,  // <7,1,u,3>: Cost 2 vext3 <1,u,3,7>, <1,u,3,7>
+  2572840246U,  // <7,1,u,4>: Cost 3 vext1 <3,7,1,u>, RHS
+  1622320533U,  // <7,1,u,5>: Cost 2 vext3 <1,u,5,7>, <1,u,5,7>
+  2696136094U,  // <7,1,u,6>: Cost 3 vext3 <1,u,6,7>, <1,u,6,7>
+  2859060777U,  // <7,1,u,7>: Cost 3 vuzpr <6,7,0,1>, RHS
+  1622541744U,  // <7,1,u,u>: Cost 2 vext3 <1,u,u,7>, <1,u,u,7>
+  2712061364U,  // <7,2,0,0>: Cost 3 vext3 RHS, <2,0,0,2>
+  2712061373U,  // <7,2,0,1>: Cost 3 vext3 RHS, <2,0,1,2>
+  2712061380U,  // <7,2,0,2>: Cost 3 vext3 RHS, <2,0,2,0>
+  2712061389U,  // <7,2,0,3>: Cost 3 vext3 RHS, <2,0,3,0>
+  2712061404U,  // <7,2,0,4>: Cost 3 vext3 RHS, <2,0,4,6>
+  2696725990U,  // <7,2,0,5>: Cost 3 vext3 <2,0,5,7>, <2,0,5,7>
+  2712061417U,  // <7,2,0,6>: Cost 3 vext3 RHS, <2,0,6,1>
+  3785803251U,  // <7,2,0,7>: Cost 4 vext3 RHS, <2,0,7,2>
+  2696947201U,  // <7,2,0,u>: Cost 3 vext3 <2,0,u,7>, <2,0,u,7>
+  2712061446U,  // <7,2,1,0>: Cost 3 vext3 RHS, <2,1,0,3>
+  3785803276U,  // <7,2,1,1>: Cost 4 vext3 RHS, <2,1,1,0>
+  3785803285U,  // <7,2,1,2>: Cost 4 vext3 RHS, <2,1,2,0>
+  2712061471U,  // <7,2,1,3>: Cost 3 vext3 RHS, <2,1,3,1>
+  2712061482U,  // <7,2,1,4>: Cost 3 vext3 RHS, <2,1,4,3>
+  3766486576U,  // <7,2,1,5>: Cost 4 vext3 <1,3,5,7>, <2,1,5,0>
+  2712061500U,  // <7,2,1,6>: Cost 3 vext3 RHS, <2,1,6,3>
+  2602718850U,  // <7,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+  2712061516U,  // <7,2,1,u>: Cost 3 vext3 RHS, <2,1,u,1>
+  2712061525U,  // <7,2,2,0>: Cost 3 vext3 RHS, <2,2,0,1>
+  2712061536U,  // <7,2,2,1>: Cost 3 vext3 RHS, <2,2,1,3>
+  1638319720U,  // <7,2,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+  1638319730U,  // <7,2,2,3>: Cost 2 vext3 RHS, <2,2,3,3>
+  2712061565U,  // <7,2,2,4>: Cost 3 vext3 RHS, <2,2,4,5>
+  2698053256U,  // <7,2,2,5>: Cost 3 vext3 <2,2,5,7>, <2,2,5,7>
+  2712061584U,  // <7,2,2,6>: Cost 3 vext3 RHS, <2,2,6,6>
+  3771795096U,  // <7,2,2,7>: Cost 4 vext3 <2,2,5,7>, <2,2,7,5>
+  1638319775U,  // <7,2,2,u>: Cost 2 vext3 RHS, <2,2,u,3>
+  1638319782U,  // <7,2,3,0>: Cost 2 vext3 RHS, <2,3,0,1>
+  2693924531U,  // <7,2,3,1>: Cost 3 vext3 <1,5,3,7>, <2,3,1,5>
+  2700560061U,  // <7,2,3,2>: Cost 3 vext3 <2,6,3,7>, <2,3,2,6>
+  2693924551U,  // <7,2,3,3>: Cost 3 vext3 <1,5,3,7>, <2,3,3,7>
+  1638319822U,  // <7,2,3,4>: Cost 2 vext3 RHS, <2,3,4,5>
+  2698716889U,  // <7,2,3,5>: Cost 3 vext3 <2,3,5,7>, <2,3,5,7>
+  2712061665U,  // <7,2,3,6>: Cost 3 vext3 RHS, <2,3,6,6>
+  2735949540U,  // <7,2,3,7>: Cost 3 vext3 RHS, <2,3,7,0>
+  1638319854U,  // <7,2,3,u>: Cost 2 vext3 RHS, <2,3,u,1>
+  2712061692U,  // <7,2,4,0>: Cost 3 vext3 RHS, <2,4,0,6>
+  2712061698U,  // <7,2,4,1>: Cost 3 vext3 RHS, <2,4,1,3>
+  2712061708U,  // <7,2,4,2>: Cost 3 vext3 RHS, <2,4,2,4>
+  2712061718U,  // <7,2,4,3>: Cost 3 vext3 RHS, <2,4,3,5>
+  2712061728U,  // <7,2,4,4>: Cost 3 vext3 RHS, <2,4,4,6>
+  2699380522U,  // <7,2,4,5>: Cost 3 vext3 <2,4,5,7>, <2,4,5,7>
+  2712061740U,  // <7,2,4,6>: Cost 3 vext3 RHS, <2,4,6,0>
+  3809691445U,  // <7,2,4,7>: Cost 4 vext3 RHS, <2,4,7,0>
+  2699601733U,  // <7,2,4,u>: Cost 3 vext3 <2,4,u,7>, <2,4,u,7>
+  2699675470U,  // <7,2,5,0>: Cost 3 vext3 <2,5,0,7>, <2,5,0,7>
+  3766486867U,  // <7,2,5,1>: Cost 4 vext3 <1,3,5,7>, <2,5,1,3>
+  2699822944U,  // <7,2,5,2>: Cost 3 vext3 <2,5,2,7>, <2,5,2,7>
+  2692745065U,  // <7,2,5,3>: Cost 3 vext3 <1,3,5,7>, <2,5,3,7>
+  2699970418U,  // <7,2,5,4>: Cost 3 vext3 <2,5,4,7>, <2,5,4,7>
+  3766486907U,  // <7,2,5,5>: Cost 4 vext3 <1,3,5,7>, <2,5,5,7>
+  2700117892U,  // <7,2,5,6>: Cost 3 vext3 <2,5,6,7>, <2,5,6,7>
+  3771795334U,  // <7,2,5,7>: Cost 4 vext3 <2,2,5,7>, <2,5,7,0>
+  2692745110U,  // <7,2,5,u>: Cost 3 vext3 <1,3,5,7>, <2,5,u,7>
+  2572894310U,  // <7,2,6,0>: Cost 3 vext1 <3,7,2,6>, LHS
+  2712061860U,  // <7,2,6,1>: Cost 3 vext3 RHS, <2,6,1,3>
+  2700486577U,  // <7,2,6,2>: Cost 3 vext3 <2,6,2,7>, <2,6,2,7>
+  1626818490U,  // <7,2,6,3>: Cost 2 vext3 <2,6,3,7>, <2,6,3,7>
+  2572897590U,  // <7,2,6,4>: Cost 3 vext1 <3,7,2,6>, RHS
+  2700707788U,  // <7,2,6,5>: Cost 3 vext3 <2,6,5,7>, <2,6,5,7>
+  2700781525U,  // <7,2,6,6>: Cost 3 vext3 <2,6,6,7>, <2,6,6,7>
+  3774597086U,  // <7,2,6,7>: Cost 4 vext3 <2,6,7,7>, <2,6,7,7>
+  1627187175U,  // <7,2,6,u>: Cost 2 vext3 <2,6,u,7>, <2,6,u,7>
+  2735949802U,  // <7,2,7,0>: Cost 3 vext3 RHS, <2,7,0,1>
+  3780200434U,  // <7,2,7,1>: Cost 4 vext3 <3,6,2,7>, <2,7,1,0>
+  3773564928U,  // <7,2,7,2>: Cost 4 vext3 <2,5,2,7>, <2,7,2,5>
+  2986541158U,  // <7,2,7,3>: Cost 3 vzipr <5,5,7,7>, LHS
+  2554989878U,  // <7,2,7,4>: Cost 3 vext1 <0,7,2,7>, RHS
+  3775113245U,  // <7,2,7,5>: Cost 4 vext3 <2,7,5,7>, <2,7,5,7>
+  4060283228U,  // <7,2,7,6>: Cost 4 vzipr <5,5,7,7>, <0,4,2,6>
+  2554992236U,  // <7,2,7,7>: Cost 3 vext1 <0,7,2,7>, <7,7,7,7>
+  2986541163U,  // <7,2,7,u>: Cost 3 vzipr <5,5,7,7>, LHS
+  1638320187U,  // <7,2,u,0>: Cost 2 vext3 RHS, <2,u,0,1>
+  2693924936U,  // <7,2,u,1>: Cost 3 vext3 <1,5,3,7>, <2,u,1,5>
+  1638319720U,  // <7,2,u,2>: Cost 2 vext3 RHS, <2,2,2,2>
+  1628145756U,  // <7,2,u,3>: Cost 2 vext3 <2,u,3,7>, <2,u,3,7>
+  1638320227U,  // <7,2,u,4>: Cost 2 vext3 RHS, <2,u,4,5>
+  2702035054U,  // <7,2,u,5>: Cost 3 vext3 <2,u,5,7>, <2,u,5,7>
+  2702108791U,  // <7,2,u,6>: Cost 3 vext3 <2,u,6,7>, <2,u,6,7>
+  2735949945U,  // <7,2,u,7>: Cost 3 vext3 RHS, <2,u,7,0>
+  1628514441U,  // <7,2,u,u>: Cost 2 vext3 <2,u,u,7>, <2,u,u,7>
+  2712062091U,  // <7,3,0,0>: Cost 3 vext3 RHS, <3,0,0,0>
+  1638320278U,  // <7,3,0,1>: Cost 2 vext3 RHS, <3,0,1,2>
+  2712062109U,  // <7,3,0,2>: Cost 3 vext3 RHS, <3,0,2,0>
+  2590836886U,  // <7,3,0,3>: Cost 3 vext1 <6,7,3,0>, <3,0,1,2>
+  2712062128U,  // <7,3,0,4>: Cost 3 vext3 RHS, <3,0,4,1>
+  2712062138U,  // <7,3,0,5>: Cost 3 vext3 RHS, <3,0,5,2>
+  2590839656U,  // <7,3,0,6>: Cost 3 vext1 <6,7,3,0>, <6,7,3,0>
+  3311414017U,  // <7,3,0,7>: Cost 4 vrev <3,7,7,0>
+  1638320341U,  // <7,3,0,u>: Cost 2 vext3 RHS, <3,0,u,2>
+  2237164227U,  // <7,3,1,0>: Cost 3 vrev <3,7,0,1>
+  2712062182U,  // <7,3,1,1>: Cost 3 vext3 RHS, <3,1,1,1>
+  2712062193U,  // <7,3,1,2>: Cost 3 vext3 RHS, <3,1,2,3>
+  2692745468U,  // <7,3,1,3>: Cost 3 vext3 <1,3,5,7>, <3,1,3,5>
+  2712062214U,  // <7,3,1,4>: Cost 3 vext3 RHS, <3,1,4,6>
+  2693925132U,  // <7,3,1,5>: Cost 3 vext3 <1,5,3,7>, <3,1,5,3>
+  3768183059U,  // <7,3,1,6>: Cost 4 vext3 <1,6,1,7>, <3,1,6,1>
+  2692745504U,  // <7,3,1,7>: Cost 3 vext3 <1,3,5,7>, <3,1,7,5>
+  2696063273U,  // <7,3,1,u>: Cost 3 vext3 <1,u,5,7>, <3,1,u,5>
+  2712062254U,  // <7,3,2,0>: Cost 3 vext3 RHS, <3,2,0,1>
+  2712062262U,  // <7,3,2,1>: Cost 3 vext3 RHS, <3,2,1,0>
+  2712062273U,  // <7,3,2,2>: Cost 3 vext3 RHS, <3,2,2,2>
+  2712062280U,  // <7,3,2,3>: Cost 3 vext3 RHS, <3,2,3,0>
+  2712062294U,  // <7,3,2,4>: Cost 3 vext3 RHS, <3,2,4,5>
+  2712062302U,  // <7,3,2,5>: Cost 3 vext3 RHS, <3,2,5,4>
+  2700560742U,  // <7,3,2,6>: Cost 3 vext3 <2,6,3,7>, <3,2,6,3>
+  2712062319U,  // <7,3,2,7>: Cost 3 vext3 RHS, <3,2,7,3>
+  2712062325U,  // <7,3,2,u>: Cost 3 vext3 RHS, <3,2,u,0>
+  2712062335U,  // <7,3,3,0>: Cost 3 vext3 RHS, <3,3,0,1>
+  2636368158U,  // <7,3,3,1>: Cost 3 vext2 <3,1,7,3>, <3,1,7,3>
+  2637031791U,  // <7,3,3,2>: Cost 3 vext2 <3,2,7,3>, <3,2,7,3>
+  1638320540U,  // <7,3,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+  2712062374U,  // <7,3,3,4>: Cost 3 vext3 RHS, <3,3,4,4>
+  2704689586U,  // <7,3,3,5>: Cost 3 vext3 <3,3,5,7>, <3,3,5,7>
+  2590864235U,  // <7,3,3,6>: Cost 3 vext1 <6,7,3,3>, <6,7,3,3>
+  2704837060U,  // <7,3,3,7>: Cost 3 vext3 <3,3,7,7>, <3,3,7,7>
+  1638320540U,  // <7,3,3,u>: Cost 2 vext3 RHS, <3,3,3,3>
+  2712062416U,  // <7,3,4,0>: Cost 3 vext3 RHS, <3,4,0,1>
+  2712062426U,  // <7,3,4,1>: Cost 3 vext3 RHS, <3,4,1,2>
+  2566981640U,  // <7,3,4,2>: Cost 3 vext1 <2,7,3,4>, <2,7,3,4>
+  2712062447U,  // <7,3,4,3>: Cost 3 vext3 RHS, <3,4,3,5>
+  2712062456U,  // <7,3,4,4>: Cost 3 vext3 RHS, <3,4,4,5>
+  1638320642U,  // <7,3,4,5>: Cost 2 vext3 RHS, <3,4,5,6>
+  2648313204U,  // <7,3,4,6>: Cost 3 vext2 <5,1,7,3>, <4,6,4,6>
+  3311446789U,  // <7,3,4,7>: Cost 4 vrev <3,7,7,4>
+  1638320669U,  // <7,3,4,u>: Cost 2 vext3 RHS, <3,4,u,6>
+  2602819686U,  // <7,3,5,0>: Cost 3 vext1 <u,7,3,5>, LHS
+  1574571728U,  // <7,3,5,1>: Cost 2 vext2 <5,1,7,3>, <5,1,7,3>
+  2648977185U,  // <7,3,5,2>: Cost 3 vext2 <5,2,7,3>, <5,2,7,3>
+  2705869378U,  // <7,3,5,3>: Cost 3 vext3 <3,5,3,7>, <3,5,3,7>
+  2237491947U,  // <7,3,5,4>: Cost 3 vrev <3,7,4,5>
+  2706016852U,  // <7,3,5,5>: Cost 3 vext3 <3,5,5,7>, <3,5,5,7>
+  2648313954U,  // <7,3,5,6>: Cost 3 vext2 <5,1,7,3>, <5,6,7,0>
+  2692745823U,  // <7,3,5,7>: Cost 3 vext3 <1,3,5,7>, <3,5,7,0>
+  1579217159U,  // <7,3,5,u>: Cost 2 vext2 <5,u,7,3>, <5,u,7,3>
+  2706311800U,  // <7,3,6,0>: Cost 3 vext3 <3,6,0,7>, <3,6,0,7>
+  2654286249U,  // <7,3,6,1>: Cost 3 vext2 <6,1,7,3>, <6,1,7,3>
+  1581208058U,  // <7,3,6,2>: Cost 2 vext2 <6,2,7,3>, <6,2,7,3>
+  2706533011U,  // <7,3,6,3>: Cost 3 vext3 <3,6,3,7>, <3,6,3,7>
+  2706606748U,  // <7,3,6,4>: Cost 3 vext3 <3,6,4,7>, <3,6,4,7>
+  3780422309U,  // <7,3,6,5>: Cost 4 vext3 <3,6,5,7>, <3,6,5,7>
+  2712062637U,  // <7,3,6,6>: Cost 3 vext3 RHS, <3,6,6,6>
+  2706827959U,  // <7,3,6,7>: Cost 3 vext3 <3,6,7,7>, <3,6,7,7>
+  1585189856U,  // <7,3,6,u>: Cost 2 vext2 <6,u,7,3>, <6,u,7,3>
+  2693925571U,  // <7,3,7,0>: Cost 3 vext3 <1,5,3,7>, <3,7,0,1>
+  2693925584U,  // <7,3,7,1>: Cost 3 vext3 <1,5,3,7>, <3,7,1,5>
+  2700561114U,  // <7,3,7,2>: Cost 3 vext3 <2,6,3,7>, <3,7,2,6>
+  2572978916U,  // <7,3,7,3>: Cost 3 vext1 <3,7,3,7>, <3,7,3,7>
+  2693925611U,  // <7,3,7,4>: Cost 3 vext3 <1,5,3,7>, <3,7,4,5>
+  2707344118U,  // <7,3,7,5>: Cost 3 vext3 <3,7,5,7>, <3,7,5,7>
+  2654950894U,  // <7,3,7,6>: Cost 3 vext2 <6,2,7,3>, <7,6,2,7>
+  2648315500U,  // <7,3,7,7>: Cost 3 vext2 <5,1,7,3>, <7,7,7,7>
+  2693925643U,  // <7,3,7,u>: Cost 3 vext3 <1,5,3,7>, <3,7,u,1>
+  2237221578U,  // <7,3,u,0>: Cost 3 vrev <3,7,0,u>
+  1638320926U,  // <7,3,u,1>: Cost 2 vext3 RHS, <3,u,1,2>
+  1593153452U,  // <7,3,u,2>: Cost 2 vext2 <u,2,7,3>, <u,2,7,3>
+  1638320540U,  // <7,3,u,3>: Cost 2 vext3 RHS, <3,3,3,3>
+  2237516526U,  // <7,3,u,4>: Cost 3 vrev <3,7,4,u>
+  1638320966U,  // <7,3,u,5>: Cost 2 vext3 RHS, <3,u,5,6>
+  2712062796U,  // <7,3,u,6>: Cost 3 vext3 RHS, <3,u,6,3>
+  2692967250U,  // <7,3,u,7>: Cost 3 vext3 <1,3,u,7>, <3,u,7,0>
+  1638320989U,  // <7,3,u,u>: Cost 2 vext3 RHS, <3,u,u,2>
+  2651635712U,  // <7,4,0,0>: Cost 3 vext2 <5,6,7,4>, <0,0,0,0>
+  1577893990U,  // <7,4,0,1>: Cost 2 vext2 <5,6,7,4>, LHS
+  2651635876U,  // <7,4,0,2>: Cost 3 vext2 <5,6,7,4>, <0,2,0,2>
+  3785804672U,  // <7,4,0,3>: Cost 4 vext3 RHS, <4,0,3,1>
+  2651636050U,  // <7,4,0,4>: Cost 3 vext2 <5,6,7,4>, <0,4,1,5>
+  1638468498U,  // <7,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+  1638468508U,  // <7,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+  3787795364U,  // <7,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+  1640459181U,  // <7,4,0,u>: Cost 2 vext3 RHS, <4,0,u,1>
+  2651636470U,  // <7,4,1,0>: Cost 3 vext2 <5,6,7,4>, <1,0,3,2>
+  2651636532U,  // <7,4,1,1>: Cost 3 vext2 <5,6,7,4>, <1,1,1,1>
+  2712062922U,  // <7,4,1,2>: Cost 3 vext3 RHS, <4,1,2,3>
+  2639029248U,  // <7,4,1,3>: Cost 3 vext2 <3,5,7,4>, <1,3,5,7>
+  2712062940U,  // <7,4,1,4>: Cost 3 vext3 RHS, <4,1,4,3>
+  2712062946U,  // <7,4,1,5>: Cost 3 vext3 RHS, <4,1,5,0>
+  2712062958U,  // <7,4,1,6>: Cost 3 vext3 RHS, <4,1,6,3>
+  3785804791U,  // <7,4,1,7>: Cost 4 vext3 RHS, <4,1,7,3>
+  2712062973U,  // <7,4,1,u>: Cost 3 vext3 RHS, <4,1,u,0>
+  3785804807U,  // <7,4,2,0>: Cost 4 vext3 RHS, <4,2,0,1>
+  3785804818U,  // <7,4,2,1>: Cost 4 vext3 RHS, <4,2,1,3>
+  2651637352U,  // <7,4,2,2>: Cost 3 vext2 <5,6,7,4>, <2,2,2,2>
+  2651637414U,  // <7,4,2,3>: Cost 3 vext2 <5,6,7,4>, <2,3,0,1>
+  3716753194U,  // <7,4,2,4>: Cost 4 vext2 <4,2,7,4>, <2,4,5,7>
+  2712063030U,  // <7,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+  2712063036U,  // <7,4,2,6>: Cost 3 vext3 RHS, <4,2,6,0>
+  3773123658U,  // <7,4,2,7>: Cost 4 vext3 <2,4,5,7>, <4,2,7,5>
+  2712063054U,  // <7,4,2,u>: Cost 3 vext3 RHS, <4,2,u,0>
+  2651637910U,  // <7,4,3,0>: Cost 3 vext2 <5,6,7,4>, <3,0,1,2>
+  3712772348U,  // <7,4,3,1>: Cost 4 vext2 <3,5,7,4>, <3,1,3,5>
+  3785804906U,  // <7,4,3,2>: Cost 4 vext3 RHS, <4,3,2,1>
+  2651638172U,  // <7,4,3,3>: Cost 3 vext2 <5,6,7,4>, <3,3,3,3>
+  2651638274U,  // <7,4,3,4>: Cost 3 vext2 <5,6,7,4>, <3,4,5,6>
+  2639030883U,  // <7,4,3,5>: Cost 3 vext2 <3,5,7,4>, <3,5,7,4>
+  2712063122U,  // <7,4,3,6>: Cost 3 vext3 RHS, <4,3,6,5>
+  3712772836U,  // <7,4,3,7>: Cost 4 vext2 <3,5,7,4>, <3,7,3,7>
+  2641021782U,  // <7,4,3,u>: Cost 3 vext2 <3,u,7,4>, <3,u,7,4>
+  2714053802U,  // <7,4,4,0>: Cost 3 vext3 RHS, <4,4,0,2>
+  3785804978U,  // <7,4,4,1>: Cost 4 vext3 RHS, <4,4,1,1>
+  3716754505U,  // <7,4,4,2>: Cost 4 vext2 <4,2,7,4>, <4,2,7,4>
+  3785804998U,  // <7,4,4,3>: Cost 4 vext3 RHS, <4,4,3,3>
+  1638321360U,  // <7,4,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+  1638468826U,  // <7,4,4,5>: Cost 2 vext3 RHS, <4,4,5,5>
+  1638468836U,  // <7,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+  3785215214U,  // <7,4,4,7>: Cost 4 vext3 <4,4,7,7>, <4,4,7,7>
+  1640459509U,  // <7,4,4,u>: Cost 2 vext3 RHS, <4,4,u,5>
+  1517207654U,  // <7,4,5,0>: Cost 2 vext1 <6,7,4,5>, LHS
+  2573034640U,  // <7,4,5,1>: Cost 3 vext1 <3,7,4,5>, <1,5,3,7>
+  2712063246U,  // <7,4,5,2>: Cost 3 vext3 RHS, <4,5,2,3>
+  2573036267U,  // <7,4,5,3>: Cost 3 vext1 <3,7,4,5>, <3,7,4,5>
+  1517210934U,  // <7,4,5,4>: Cost 2 vext1 <6,7,4,5>, RHS
+  2711989549U,  // <7,4,5,5>: Cost 3 vext3 <4,5,5,7>, <4,5,5,7>
+   564579638U,  // <7,4,5,6>: Cost 1 vext3 RHS, RHS
+  2651639976U,  // <7,4,5,7>: Cost 3 vext2 <5,6,7,4>, <5,7,5,7>
+   564579656U,  // <7,4,5,u>: Cost 1 vext3 RHS, RHS
+  2712063307U,  // <7,4,6,0>: Cost 3 vext3 RHS, <4,6,0,1>
+  3767668056U,  // <7,4,6,1>: Cost 4 vext3 <1,5,3,7>, <4,6,1,5>
+  2651640314U,  // <7,4,6,2>: Cost 3 vext2 <5,6,7,4>, <6,2,7,3>
+  2655621708U,  // <7,4,6,3>: Cost 3 vext2 <6,3,7,4>, <6,3,7,4>
+  1638468980U,  // <7,4,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+  2712063358U,  // <7,4,6,5>: Cost 3 vext3 RHS, <4,6,5,7>
+  2712063367U,  // <7,4,6,6>: Cost 3 vext3 RHS, <4,6,6,7>
+  2712210826U,  // <7,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+  1638469012U,  // <7,4,6,u>: Cost 2 vext3 RHS, <4,6,u,2>
+  2651640826U,  // <7,4,7,0>: Cost 3 vext2 <5,6,7,4>, <7,0,1,2>
+  3773713830U,  // <7,4,7,1>: Cost 4 vext3 <2,5,4,7>, <4,7,1,2>
+  3773713842U,  // <7,4,7,2>: Cost 4 vext3 <2,5,4,7>, <4,7,2,5>
+  3780349372U,  // <7,4,7,3>: Cost 4 vext3 <3,6,4,7>, <4,7,3,6>
+  2651641140U,  // <7,4,7,4>: Cost 3 vext2 <5,6,7,4>, <7,4,0,1>
+  2712210888U,  // <7,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+  2712210898U,  // <7,4,7,6>: Cost 3 vext3 RHS, <4,7,6,1>
+  2651641452U,  // <7,4,7,7>: Cost 3 vext2 <5,6,7,4>, <7,7,7,7>
+  2713538026U,  // <7,4,7,u>: Cost 3 vext3 <4,7,u,7>, <4,7,u,7>
+  1517232230U,  // <7,4,u,0>: Cost 2 vext1 <6,7,4,u>, LHS
+  1577899822U,  // <7,4,u,1>: Cost 2 vext2 <5,6,7,4>, LHS
+  2712063489U,  // <7,4,u,2>: Cost 3 vext3 RHS, <4,u,2,3>
+  2573060846U,  // <7,4,u,3>: Cost 3 vext1 <3,7,4,u>, <3,7,4,u>
+  1640312342U,  // <7,4,u,4>: Cost 2 vext3 RHS, <4,u,4,6>
+  1638469146U,  // <7,4,u,5>: Cost 2 vext3 RHS, <4,u,5,1>
+   564579881U,  // <7,4,u,6>: Cost 1 vext3 RHS, RHS
+  2714054192U,  // <7,4,u,7>: Cost 3 vext3 RHS, <4,u,7,5>
+   564579899U,  // <7,4,u,u>: Cost 1 vext3 RHS, RHS
+  2579038310U,  // <7,5,0,0>: Cost 3 vext1 <4,7,5,0>, LHS
+  2636382310U,  // <7,5,0,1>: Cost 3 vext2 <3,1,7,5>, LHS
+  2796339302U,  // <7,5,0,2>: Cost 3 vuzpl <7,4,5,6>, LHS
+  3646810719U,  // <7,5,0,3>: Cost 4 vext1 <3,7,5,0>, <3,5,7,0>
+  2712063586U,  // <7,5,0,4>: Cost 3 vext3 RHS, <5,0,4,1>
+  2735951467U,  // <7,5,0,5>: Cost 3 vext3 RHS, <5,0,5,1>
+  2735951476U,  // <7,5,0,6>: Cost 3 vext3 RHS, <5,0,6,1>
+  2579043322U,  // <7,5,0,7>: Cost 3 vext1 <4,7,5,0>, <7,0,1,2>
+  2636382877U,  // <7,5,0,u>: Cost 3 vext2 <3,1,7,5>, LHS
+  2712211087U,  // <7,5,1,0>: Cost 3 vext3 RHS, <5,1,0,1>
+  3698180916U,  // <7,5,1,1>: Cost 4 vext2 <1,1,7,5>, <1,1,1,1>
+  3710124950U,  // <7,5,1,2>: Cost 4 vext2 <3,1,7,5>, <1,2,3,0>
+  2636383232U,  // <7,5,1,3>: Cost 3 vext2 <3,1,7,5>, <1,3,5,7>
+  2712211127U,  // <7,5,1,4>: Cost 3 vext3 RHS, <5,1,4,5>
+  2590994128U,  // <7,5,1,5>: Cost 3 vext1 <6,7,5,1>, <5,1,7,3>
+  2590995323U,  // <7,5,1,6>: Cost 3 vext1 <6,7,5,1>, <6,7,5,1>
+  1638469328U,  // <7,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+  1638469337U,  // <7,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+  3785805536U,  // <7,5,2,0>: Cost 4 vext3 RHS, <5,2,0,1>
+  3785805544U,  // <7,5,2,1>: Cost 4 vext3 RHS, <5,2,1,0>
+  3704817288U,  // <7,5,2,2>: Cost 4 vext2 <2,2,7,5>, <2,2,5,7>
+  2712063742U,  // <7,5,2,3>: Cost 3 vext3 RHS, <5,2,3,4>
+  3716761386U,  // <7,5,2,4>: Cost 4 vext2 <4,2,7,5>, <2,4,5,7>
+  2714054415U,  // <7,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+  3774304024U,  // <7,5,2,6>: Cost 4 vext3 <2,6,3,7>, <5,2,6,3>
+  2712063777U,  // <7,5,2,7>: Cost 3 vext3 RHS, <5,2,7,3>
+  2712063787U,  // <7,5,2,u>: Cost 3 vext3 RHS, <5,2,u,4>
+  3634888806U,  // <7,5,3,0>: Cost 4 vext1 <1,7,5,3>, LHS
+  2636384544U,  // <7,5,3,1>: Cost 3 vext2 <3,1,7,5>, <3,1,7,5>
+  3710790001U,  // <7,5,3,2>: Cost 4 vext2 <3,2,7,5>, <3,2,7,5>
+  3710126492U,  // <7,5,3,3>: Cost 4 vext2 <3,1,7,5>, <3,3,3,3>
+  3634892086U,  // <7,5,3,4>: Cost 4 vext1 <1,7,5,3>, RHS
+  2639039076U,  // <7,5,3,5>: Cost 3 vext2 <3,5,7,5>, <3,5,7,5>
+  3713444533U,  // <7,5,3,6>: Cost 4 vext2 <3,6,7,5>, <3,6,7,5>
+  2693926767U,  // <7,5,3,7>: Cost 3 vext3 <1,5,3,7>, <5,3,7,0>
+  2712063864U,  // <7,5,3,u>: Cost 3 vext3 RHS, <5,3,u,0>
+  2579071078U,  // <7,5,4,0>: Cost 3 vext1 <4,7,5,4>, LHS
+  3646841856U,  // <7,5,4,1>: Cost 4 vext1 <3,7,5,4>, <1,3,5,7>
+  3716762698U,  // <7,5,4,2>: Cost 4 vext2 <4,2,7,5>, <4,2,7,5>
+  3646843491U,  // <7,5,4,3>: Cost 4 vext1 <3,7,5,4>, <3,5,7,4>
+  2579074358U,  // <7,5,4,4>: Cost 3 vext1 <4,7,5,4>, RHS
+  2636385590U,  // <7,5,4,5>: Cost 3 vext2 <3,1,7,5>, RHS
+  2645675406U,  // <7,5,4,6>: Cost 3 vext2 <4,6,7,5>, <4,6,7,5>
+  1638322118U,  // <7,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+  1638469583U,  // <7,5,4,u>: Cost 2 vext3 RHS, <5,4,u,6>
+  2714054611U,  // <7,5,5,0>: Cost 3 vext3 RHS, <5,5,0,1>
+  2652974800U,  // <7,5,5,1>: Cost 3 vext2 <5,u,7,5>, <5,1,7,3>
+  3710127905U,  // <7,5,5,2>: Cost 4 vext2 <3,1,7,5>, <5,2,7,3>
+  3785805808U,  // <7,5,5,3>: Cost 4 vext3 RHS, <5,5,3,3>
+  2712211450U,  // <7,5,5,4>: Cost 3 vext3 RHS, <5,5,4,4>
+  1638322180U,  // <7,5,5,5>: Cost 2 vext3 RHS, <5,5,5,5>
+  2712064014U,  // <7,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+  1638469656U,  // <7,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+  1638469665U,  // <7,5,5,u>: Cost 2 vext3 RHS, <5,5,u,7>
+  2712064036U,  // <7,5,6,0>: Cost 3 vext3 RHS, <5,6,0,1>
+  2714054707U,  // <7,5,6,1>: Cost 3 vext3 RHS, <5,6,1,7>
+  3785805879U,  // <7,5,6,2>: Cost 4 vext3 RHS, <5,6,2,2>
+  2712064066U,  // <7,5,6,3>: Cost 3 vext3 RHS, <5,6,3,4>
+  2712064076U,  // <7,5,6,4>: Cost 3 vext3 RHS, <5,6,4,5>
+  2714054743U,  // <7,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+  2712064096U,  // <7,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+  1638322274U,  // <7,5,6,7>: Cost 2 vext3 RHS, <5,6,7,0>
+  1638469739U,  // <7,5,6,u>: Cost 2 vext3 RHS, <5,6,u,0>
+  1511325798U,  // <7,5,7,0>: Cost 2 vext1 <5,7,5,7>, LHS
+  2692747392U,  // <7,5,7,1>: Cost 3 vext3 <1,3,5,7>, <5,7,1,3>
+  2585069160U,  // <7,5,7,2>: Cost 3 vext1 <5,7,5,7>, <2,2,2,2>
+  2573126390U,  // <7,5,7,3>: Cost 3 vext1 <3,7,5,7>, <3,7,5,7>
+  1511329078U,  // <7,5,7,4>: Cost 2 vext1 <5,7,5,7>, RHS
+  1638469800U,  // <7,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+  2712211626U,  // <7,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+  2712211636U,  // <7,5,7,7>: Cost 3 vext3 RHS, <5,7,7,1>
+  1638469823U,  // <7,5,7,u>: Cost 2 vext3 RHS, <5,7,u,3>
+  1511333990U,  // <7,5,u,0>: Cost 2 vext1 <5,7,5,u>, LHS
+  2636388142U,  // <7,5,u,1>: Cost 3 vext2 <3,1,7,5>, LHS
+  2712211671U,  // <7,5,u,2>: Cost 3 vext3 RHS, <5,u,2,0>
+  2573134583U,  // <7,5,u,3>: Cost 3 vext1 <3,7,5,u>, <3,7,5,u>
+  1511337270U,  // <7,5,u,4>: Cost 2 vext1 <5,7,5,u>, RHS
+  1638469881U,  // <7,5,u,5>: Cost 2 vext3 RHS, <5,u,5,7>
+  2712064258U,  // <7,5,u,6>: Cost 3 vext3 RHS, <5,u,6,7>
+  1638469892U,  // <7,5,u,7>: Cost 2 vext3 RHS, <5,u,7,0>
+  1638469904U,  // <7,5,u,u>: Cost 2 vext3 RHS, <5,u,u,3>
+  2650324992U,  // <7,6,0,0>: Cost 3 vext2 <5,4,7,6>, <0,0,0,0>
+  1576583270U,  // <7,6,0,1>: Cost 2 vext2 <5,4,7,6>, LHS
+  2712064300U,  // <7,6,0,2>: Cost 3 vext3 RHS, <6,0,2,4>
+  2255295336U,  // <7,6,0,3>: Cost 3 vrev <6,7,3,0>
+  2712064316U,  // <7,6,0,4>: Cost 3 vext3 RHS, <6,0,4,2>
+  2585088098U,  // <7,6,0,5>: Cost 3 vext1 <5,7,6,0>, <5,6,7,0>
+  2735952204U,  // <7,6,0,6>: Cost 3 vext3 RHS, <6,0,6,0>
+  2712211799U,  // <7,6,0,7>: Cost 3 vext3 RHS, <6,0,7,2>
+  1576583837U,  // <7,6,0,u>: Cost 2 vext2 <5,4,7,6>, LHS
+  1181340494U,  // <7,6,1,0>: Cost 2 vrev <6,7,0,1>
+  2650325812U,  // <7,6,1,1>: Cost 3 vext2 <5,4,7,6>, <1,1,1,1>
+  2650325910U,  // <7,6,1,2>: Cost 3 vext2 <5,4,7,6>, <1,2,3,0>
+  2650325976U,  // <7,6,1,3>: Cost 3 vext2 <5,4,7,6>, <1,3,1,3>
+  2579123510U,  // <7,6,1,4>: Cost 3 vext1 <4,7,6,1>, RHS
+  2650326160U,  // <7,6,1,5>: Cost 3 vext2 <5,4,7,6>, <1,5,3,7>
+  2714055072U,  // <7,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+  2712064425U,  // <7,6,1,7>: Cost 3 vext3 RHS, <6,1,7,3>
+  1181930390U,  // <7,6,1,u>: Cost 2 vrev <6,7,u,1>
+  2712211897U,  // <7,6,2,0>: Cost 3 vext3 RHS, <6,2,0,1>
+  2714055108U,  // <7,6,2,1>: Cost 3 vext3 RHS, <6,2,1,3>
+  2650326632U,  // <7,6,2,2>: Cost 3 vext2 <5,4,7,6>, <2,2,2,2>
+  2650326694U,  // <7,6,2,3>: Cost 3 vext2 <5,4,7,6>, <2,3,0,1>
+  2714055137U,  // <7,6,2,4>: Cost 3 vext3 RHS, <6,2,4,5>
+  2714055148U,  // <7,6,2,5>: Cost 3 vext3 RHS, <6,2,5,7>
+  2650326970U,  // <7,6,2,6>: Cost 3 vext2 <5,4,7,6>, <2,6,3,7>
+  1638470138U,  // <7,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+  1638470147U,  // <7,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+  2650327190U,  // <7,6,3,0>: Cost 3 vext2 <5,4,7,6>, <3,0,1,2>
+  2255172441U,  // <7,6,3,1>: Cost 3 vrev <6,7,1,3>
+  2255246178U,  // <7,6,3,2>: Cost 3 vrev <6,7,2,3>
+  2650327452U,  // <7,6,3,3>: Cost 3 vext2 <5,4,7,6>, <3,3,3,3>
+  2712064562U,  // <7,6,3,4>: Cost 3 vext3 RHS, <6,3,4,5>
+  2650327627U,  // <7,6,3,5>: Cost 3 vext2 <5,4,7,6>, <3,5,4,7>
+  3713452726U,  // <7,6,3,6>: Cost 4 vext2 <3,6,7,6>, <3,6,7,6>
+  2700563016U,  // <7,6,3,7>: Cost 3 vext3 <2,6,3,7>, <6,3,7,0>
+  2712064593U,  // <7,6,3,u>: Cost 3 vext3 RHS, <6,3,u,0>
+  2650327954U,  // <7,6,4,0>: Cost 3 vext2 <5,4,7,6>, <4,0,5,1>
+  2735952486U,  // <7,6,4,1>: Cost 3 vext3 RHS, <6,4,1,3>
+  2735952497U,  // <7,6,4,2>: Cost 3 vext3 RHS, <6,4,2,5>
+  2255328108U,  // <7,6,4,3>: Cost 3 vrev <6,7,3,4>
+  2712212100U,  // <7,6,4,4>: Cost 3 vext3 RHS, <6,4,4,6>
+  1576586550U,  // <7,6,4,5>: Cost 2 vext2 <5,4,7,6>, RHS
+  2714055312U,  // <7,6,4,6>: Cost 3 vext3 RHS, <6,4,6,0>
+  2712212126U,  // <7,6,4,7>: Cost 3 vext3 RHS, <6,4,7,5>
+  1576586793U,  // <7,6,4,u>: Cost 2 vext2 <5,4,7,6>, RHS
+  2579152998U,  // <7,6,5,0>: Cost 3 vext1 <4,7,6,5>, LHS
+  2650328784U,  // <7,6,5,1>: Cost 3 vext2 <5,4,7,6>, <5,1,7,3>
+  2714055364U,  // <7,6,5,2>: Cost 3 vext3 RHS, <6,5,2,7>
+  3785806538U,  // <7,6,5,3>: Cost 4 vext3 RHS, <6,5,3,4>
+  1576587206U,  // <7,6,5,4>: Cost 2 vext2 <5,4,7,6>, <5,4,7,6>
+  2650329092U,  // <7,6,5,5>: Cost 3 vext2 <5,4,7,6>, <5,5,5,5>
+  2650329186U,  // <7,6,5,6>: Cost 3 vext2 <5,4,7,6>, <5,6,7,0>
+  2712064753U,  // <7,6,5,7>: Cost 3 vext3 RHS, <6,5,7,7>
+  1181963162U,  // <7,6,5,u>: Cost 2 vrev <6,7,u,5>
+  2714055421U,  // <7,6,6,0>: Cost 3 vext3 RHS, <6,6,0,1>
+  2714055432U,  // <7,6,6,1>: Cost 3 vext3 RHS, <6,6,1,3>
+  2650329594U,  // <7,6,6,2>: Cost 3 vext2 <5,4,7,6>, <6,2,7,3>
+  3785806619U,  // <7,6,6,3>: Cost 4 vext3 RHS, <6,6,3,4>
+  2712212260U,  // <7,6,6,4>: Cost 3 vext3 RHS, <6,6,4,4>
+  2714055472U,  // <7,6,6,5>: Cost 3 vext3 RHS, <6,6,5,7>
+  1638323000U,  // <7,6,6,6>: Cost 2 vext3 RHS, <6,6,6,6>
+  1638470466U,  // <7,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+  1638470475U,  // <7,6,6,u>: Cost 2 vext3 RHS, <6,6,u,7>
+  1638323022U,  // <7,6,7,0>: Cost 2 vext3 RHS, <6,7,0,1>
+  2712064854U,  // <7,6,7,1>: Cost 3 vext3 RHS, <6,7,1,0>
+  2712064865U,  // <7,6,7,2>: Cost 3 vext3 RHS, <6,7,2,2>
+  2712064872U,  // <7,6,7,3>: Cost 3 vext3 RHS, <6,7,3,0>
+  1638323062U,  // <7,6,7,4>: Cost 2 vext3 RHS, <6,7,4,5>
+  2712064894U,  // <7,6,7,5>: Cost 3 vext3 RHS, <6,7,5,4>
+  2712064905U,  // <7,6,7,6>: Cost 3 vext3 RHS, <6,7,6,6>
+  2712064915U,  // <7,6,7,7>: Cost 3 vext3 RHS, <6,7,7,7>
+  1638323094U,  // <7,6,7,u>: Cost 2 vext3 RHS, <6,7,u,1>
+  1638470559U,  // <7,6,u,0>: Cost 2 vext3 RHS, <6,u,0,1>
+  1576589102U,  // <7,6,u,1>: Cost 2 vext2 <5,4,7,6>, LHS
+  2712212402U,  // <7,6,u,2>: Cost 3 vext3 RHS, <6,u,2,2>
+  2712212409U,  // <7,6,u,3>: Cost 3 vext3 RHS, <6,u,3,0>
+  1638470599U,  // <7,6,u,4>: Cost 2 vext3 RHS, <6,u,4,5>
+  1576589466U,  // <7,6,u,5>: Cost 2 vext2 <5,4,7,6>, RHS
+  1638323000U,  // <7,6,u,6>: Cost 2 vext3 RHS, <6,6,6,6>
+  1638470624U,  // <7,6,u,7>: Cost 2 vext3 RHS, <6,u,7,3>
+  1638470631U,  // <7,6,u,u>: Cost 2 vext3 RHS, <6,u,u,1>
+  2712065007U,  // <7,7,0,0>: Cost 3 vext3 RHS, <7,0,0,0>
+  1638323194U,  // <7,7,0,1>: Cost 2 vext3 RHS, <7,0,1,2>
+  2712065025U,  // <7,7,0,2>: Cost 3 vext3 RHS, <7,0,2,0>
+  3646958337U,  // <7,7,0,3>: Cost 4 vext1 <3,7,7,0>, <3,7,7,0>
+  2712065044U,  // <7,7,0,4>: Cost 3 vext3 RHS, <7,0,4,1>
+  2585161907U,  // <7,7,0,5>: Cost 3 vext1 <5,7,7,0>, <5,7,7,0>
+  2591134604U,  // <7,7,0,6>: Cost 3 vext1 <6,7,7,0>, <6,7,7,0>
+  2591134714U,  // <7,7,0,7>: Cost 3 vext1 <6,7,7,0>, <7,0,1,2>
+  1638323257U,  // <7,7,0,u>: Cost 2 vext3 RHS, <7,0,u,2>
+  2712065091U,  // <7,7,1,0>: Cost 3 vext3 RHS, <7,1,0,3>
+  2712065098U,  // <7,7,1,1>: Cost 3 vext3 RHS, <7,1,1,1>
+  2712065109U,  // <7,7,1,2>: Cost 3 vext3 RHS, <7,1,2,3>
+  2692748384U,  // <7,7,1,3>: Cost 3 vext3 <1,3,5,7>, <7,1,3,5>
+  2585169206U,  // <7,7,1,4>: Cost 3 vext1 <5,7,7,1>, RHS
+  2693928048U,  // <7,7,1,5>: Cost 3 vext3 <1,5,3,7>, <7,1,5,3>
+  2585170766U,  // <7,7,1,6>: Cost 3 vext1 <5,7,7,1>, <6,7,0,1>
+  2735953024U,  // <7,7,1,7>: Cost 3 vext3 RHS, <7,1,7,1>
+  2695918731U,  // <7,7,1,u>: Cost 3 vext3 <1,u,3,7>, <7,1,u,3>
+  3770471574U,  // <7,7,2,0>: Cost 4 vext3 <2,0,5,7>, <7,2,0,5>
+  3785807002U,  // <7,7,2,1>: Cost 4 vext3 RHS, <7,2,1,0>
+  2712065189U,  // <7,7,2,2>: Cost 3 vext3 RHS, <7,2,2,2>
+  2712065196U,  // <7,7,2,3>: Cost 3 vext3 RHS, <7,2,3,0>
+  3773125818U,  // <7,7,2,4>: Cost 4 vext3 <2,4,5,7>, <7,2,4,5>
+  3766490305U,  // <7,7,2,5>: Cost 4 vext3 <1,3,5,7>, <7,2,5,3>
+  2700563658U,  // <7,7,2,6>: Cost 3 vext3 <2,6,3,7>, <7,2,6,3>
+  2735953107U,  // <7,7,2,7>: Cost 3 vext3 RHS, <7,2,7,3>
+  2701890780U,  // <7,7,2,u>: Cost 3 vext3 <2,u,3,7>, <7,2,u,3>
+  2712065251U,  // <7,7,3,0>: Cost 3 vext3 RHS, <7,3,0,1>
+  3766490350U,  // <7,7,3,1>: Cost 4 vext3 <1,3,5,7>, <7,3,1,3>
+  3774305530U,  // <7,7,3,2>: Cost 4 vext3 <2,6,3,7>, <7,3,2,6>
+  2637728196U,  // <7,7,3,3>: Cost 3 vext2 <3,3,7,7>, <3,3,7,7>
+  2712065291U,  // <7,7,3,4>: Cost 3 vext3 RHS, <7,3,4,5>
+  2585186486U,  // <7,7,3,5>: Cost 3 vext1 <5,7,7,3>, <5,7,7,3>
+  2639719095U,  // <7,7,3,6>: Cost 3 vext2 <3,6,7,7>, <3,6,7,7>
+  2640382728U,  // <7,7,3,7>: Cost 3 vext2 <3,7,7,7>, <3,7,7,7>
+  2641046361U,  // <7,7,3,u>: Cost 3 vext2 <3,u,7,7>, <3,u,7,7>
+  2712212792U,  // <7,7,4,0>: Cost 3 vext3 RHS, <7,4,0,5>
+  3646989312U,  // <7,7,4,1>: Cost 4 vext1 <3,7,7,4>, <1,3,5,7>
+  3785807176U,  // <7,7,4,2>: Cost 4 vext3 RHS, <7,4,2,3>
+  3646991109U,  // <7,7,4,3>: Cost 4 vext1 <3,7,7,4>, <3,7,7,4>
+  2712065371U,  // <7,7,4,4>: Cost 3 vext3 RHS, <7,4,4,4>
+  1638323558U,  // <7,7,4,5>: Cost 2 vext3 RHS, <7,4,5,6>
+  2712212845U,  // <7,7,4,6>: Cost 3 vext3 RHS, <7,4,6,4>
+  2591167846U,  // <7,7,4,7>: Cost 3 vext1 <6,7,7,4>, <7,4,5,6>
+  1638323585U,  // <7,7,4,u>: Cost 2 vext3 RHS, <7,4,u,6>
+  2585198694U,  // <7,7,5,0>: Cost 3 vext1 <5,7,7,5>, LHS
+  2712212884U,  // <7,7,5,1>: Cost 3 vext3 RHS, <7,5,1,7>
+  3711471393U,  // <7,7,5,2>: Cost 4 vext2 <3,3,7,7>, <5,2,7,3>
+  2649673590U,  // <7,7,5,3>: Cost 3 vext2 <5,3,7,7>, <5,3,7,7>
+  2712065455U,  // <7,7,5,4>: Cost 3 vext3 RHS, <7,5,4,7>
+  1577259032U,  // <7,7,5,5>: Cost 2 vext2 <5,5,7,7>, <5,5,7,7>
+  2712065473U,  // <7,7,5,6>: Cost 3 vext3 RHS, <7,5,6,7>
+  2712212936U,  // <7,7,5,7>: Cost 3 vext3 RHS, <7,5,7,5>
+  1579249931U,  // <7,7,5,u>: Cost 2 vext2 <5,u,7,7>, <5,u,7,7>
+  2591178854U,  // <7,7,6,0>: Cost 3 vext1 <6,7,7,6>, LHS
+  2735953374U,  // <7,7,6,1>: Cost 3 vext3 RHS, <7,6,1,0>
+  2712212974U,  // <7,7,6,2>: Cost 3 vext3 RHS, <7,6,2,7>
+  2655646287U,  // <7,7,6,3>: Cost 3 vext2 <6,3,7,7>, <6,3,7,7>
+  2591182134U,  // <7,7,6,4>: Cost 3 vext1 <6,7,7,6>, RHS
+  2656973553U,  // <7,7,6,5>: Cost 3 vext2 <6,5,7,7>, <6,5,7,7>
+  1583895362U,  // <7,7,6,6>: Cost 2 vext2 <6,6,7,7>, <6,6,7,7>
+  2712065556U,  // <7,7,6,7>: Cost 3 vext3 RHS, <7,6,7,0>
+  1585222628U,  // <7,7,6,u>: Cost 2 vext2 <6,u,7,7>, <6,u,7,7>
+  1523417190U,  // <7,7,7,0>: Cost 2 vext1 <7,7,7,7>, LHS
+  2597159670U,  // <7,7,7,1>: Cost 3 vext1 <7,7,7,7>, <1,0,3,2>
+  2597160552U,  // <7,7,7,2>: Cost 3 vext1 <7,7,7,7>, <2,2,2,2>
+  2597161110U,  // <7,7,7,3>: Cost 3 vext1 <7,7,7,7>, <3,0,1,2>
+  1523420470U,  // <7,7,7,4>: Cost 2 vext1 <7,7,7,7>, RHS
+  2651002296U,  // <7,7,7,5>: Cost 3 vext2 <5,5,7,7>, <7,5,5,7>
+  2657637906U,  // <7,7,7,6>: Cost 3 vext2 <6,6,7,7>, <7,6,6,7>
+   363253046U,  // <7,7,7,7>: Cost 1 vdup3 RHS
+   363253046U,  // <7,7,7,u>: Cost 1 vdup3 RHS
+  1523417190U,  // <7,7,u,0>: Cost 2 vext1 <7,7,7,7>, LHS
+  1638471298U,  // <7,7,u,1>: Cost 2 vext3 RHS, <7,u,1,2>
+  2712213132U,  // <7,7,u,2>: Cost 3 vext3 RHS, <7,u,2,3>
+  2712213138U,  // <7,7,u,3>: Cost 3 vext3 RHS, <7,u,3,0>
+  1523420470U,  // <7,7,u,4>: Cost 2 vext1 <7,7,7,7>, RHS
+  1638471338U,  // <7,7,u,5>: Cost 2 vext3 RHS, <7,u,5,6>
+  1595840756U,  // <7,7,u,6>: Cost 2 vext2 <u,6,7,7>, <u,6,7,7>
+   363253046U,  // <7,7,u,7>: Cost 1 vdup3 RHS
+   363253046U,  // <7,7,u,u>: Cost 1 vdup3 RHS
+  1638318080U,  // <7,u,0,0>: Cost 2 vext3 RHS, <0,0,0,0>
+  1638323923U,  // <7,u,0,1>: Cost 2 vext3 RHS, <u,0,1,2>
+  1662211804U,  // <7,u,0,2>: Cost 2 vext3 RHS, <u,0,2,2>
+  1638323941U,  // <7,u,0,3>: Cost 2 vext3 RHS, <u,0,3,2>
+  2712065773U,  // <7,u,0,4>: Cost 3 vext3 RHS, <u,0,4,1>
+  1662359286U,  // <7,u,0,5>: Cost 2 vext3 RHS, <u,0,5,1>
+  1662359296U,  // <7,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+  2987150664U,  // <7,u,0,7>: Cost 3 vzipr <5,6,7,0>, RHS
+  1638323986U,  // <7,u,0,u>: Cost 2 vext3 RHS, <u,0,u,2>
+  1517469798U,  // <7,u,1,0>: Cost 2 vext1 <6,7,u,1>, LHS
+  1638318900U,  // <7,u,1,1>: Cost 2 vext3 RHS, <1,1,1,1>
+   564582190U,  // <7,u,1,2>: Cost 1 vext3 RHS, LHS
+  1638324023U,  // <7,u,1,3>: Cost 2 vext3 RHS, <u,1,3,3>
+  1517473078U,  // <7,u,1,4>: Cost 2 vext1 <6,7,u,1>, RHS
+  2693928777U,  // <7,u,1,5>: Cost 3 vext3 <1,5,3,7>, <u,1,5,3>
+  1517474710U,  // <7,u,1,6>: Cost 2 vext1 <6,7,u,1>, <6,7,u,1>
+  1640462171U,  // <7,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+   564582244U,  // <7,u,1,u>: Cost 1 vext3 RHS, LHS
+  1638318244U,  // <7,u,2,0>: Cost 2 vext3 RHS, <0,2,0,2>
+  2712065907U,  // <7,u,2,1>: Cost 3 vext3 RHS, <u,2,1,0>
+  1638319720U,  // <7,u,2,2>: Cost 2 vext3 RHS, <2,2,2,2>
+  1638324101U,  // <7,u,2,3>: Cost 2 vext3 RHS, <u,2,3,0>
+  1638318284U,  // <7,u,2,4>: Cost 2 vext3 RHS, <0,2,4,6>
+  2712065947U,  // <7,u,2,5>: Cost 3 vext3 RHS, <u,2,5,4>
+  2700564387U,  // <7,u,2,6>: Cost 3 vext3 <2,6,3,7>, <u,2,6,3>
+  1640314796U,  // <7,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+  1638324146U,  // <7,u,2,u>: Cost 2 vext3 RHS, <u,2,u,0>
+  1638324156U,  // <7,u,3,0>: Cost 2 vext3 RHS, <u,3,0,1>
+  1638319064U,  // <7,u,3,1>: Cost 2 vext3 RHS, <1,3,1,3>
+  2700564435U,  // <7,u,3,2>: Cost 3 vext3 <2,6,3,7>, <u,3,2,6>
+  1638320540U,  // <7,u,3,3>: Cost 2 vext3 RHS, <3,3,3,3>
+  1638324196U,  // <7,u,3,4>: Cost 2 vext3 RHS, <u,3,4,5>
+  1638324207U,  // <7,u,3,5>: Cost 2 vext3 RHS, <u,3,5,7>
+  2700564472U,  // <7,u,3,6>: Cost 3 vext3 <2,6,3,7>, <u,3,6,7>
+  2695919610U,  // <7,u,3,7>: Cost 3 vext3 <1,u,3,7>, <u,3,7,0>
+  1638324228U,  // <7,u,3,u>: Cost 2 vext3 RHS, <u,3,u,1>
+  2712066061U,  // <7,u,4,0>: Cost 3 vext3 RHS, <u,4,0,1>
+  1662212122U,  // <7,u,4,1>: Cost 2 vext3 RHS, <u,4,1,5>
+  1662212132U,  // <7,u,4,2>: Cost 2 vext3 RHS, <u,4,2,6>
+  2712066092U,  // <7,u,4,3>: Cost 3 vext3 RHS, <u,4,3,5>
+  1638321360U,  // <7,u,4,4>: Cost 2 vext3 RHS, <4,4,4,4>
+  1638324287U,  // <7,u,4,5>: Cost 2 vext3 RHS, <u,4,5,6>
+  1662359624U,  // <7,u,4,6>: Cost 2 vext3 RHS, <u,4,6,6>
+  1640314961U,  // <7,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+  1638324314U,  // <7,u,4,u>: Cost 2 vext3 RHS, <u,4,u,6>
+  1517502566U,  // <7,u,5,0>: Cost 2 vext1 <6,7,u,5>, LHS
+  1574612693U,  // <7,u,5,1>: Cost 2 vext2 <5,1,7,u>, <5,1,7,u>
+  2712066162U,  // <7,u,5,2>: Cost 3 vext3 RHS, <u,5,2,3>
+  1638324351U,  // <7,u,5,3>: Cost 2 vext3 RHS, <u,5,3,7>
+  1576603592U,  // <7,u,5,4>: Cost 2 vext2 <5,4,7,u>, <5,4,7,u>
+  1577267225U,  // <7,u,5,5>: Cost 2 vext2 <5,5,7,u>, <5,5,7,u>
+   564582554U,  // <7,u,5,6>: Cost 1 vext3 RHS, RHS
+  1640462499U,  // <7,u,5,7>: Cost 2 vext3 RHS, <u,5,7,7>
+   564582572U,  // <7,u,5,u>: Cost 1 vext3 RHS, RHS
+  2712066223U,  // <7,u,6,0>: Cost 3 vext3 RHS, <u,6,0,1>
+  2712066238U,  // <7,u,6,1>: Cost 3 vext3 RHS, <u,6,1,7>
+  1581249023U,  // <7,u,6,2>: Cost 2 vext2 <6,2,7,u>, <6,2,7,u>
+  1638324432U,  // <7,u,6,3>: Cost 2 vext3 RHS, <u,6,3,7>
+  1638468980U,  // <7,u,6,4>: Cost 2 vext3 RHS, <4,6,4,6>
+  2712066274U,  // <7,u,6,5>: Cost 3 vext3 RHS, <u,6,5,7>
+  1583903555U,  // <7,u,6,6>: Cost 2 vext2 <6,6,7,u>, <6,6,7,u>
+  1640315117U,  // <7,u,6,7>: Cost 2 vext3 RHS, <u,6,7,0>
+  1638324477U,  // <7,u,6,u>: Cost 2 vext3 RHS, <u,6,u,7>
+  1638471936U,  // <7,u,7,0>: Cost 2 vext3 RHS, <u,7,0,1>
+  2692970763U,  // <7,u,7,1>: Cost 3 vext3 <1,3,u,7>, <u,7,1,3>
+  2700933399U,  // <7,u,7,2>: Cost 3 vext3 <2,6,u,7>, <u,7,2,6>
+  2573347601U,  // <7,u,7,3>: Cost 3 vext1 <3,7,u,7>, <3,7,u,7>
+  1638471976U,  // <7,u,7,4>: Cost 2 vext3 RHS, <u,7,4,5>
+  1511551171U,  // <7,u,7,5>: Cost 2 vext1 <5,7,u,7>, <5,7,u,7>
+  2712213815U,  // <7,u,7,6>: Cost 3 vext3 RHS, <u,7,6,2>
+   363253046U,  // <7,u,7,7>: Cost 1 vdup3 RHS
+   363253046U,  // <7,u,7,u>: Cost 1 vdup3 RHS
+  1638324561U,  // <7,u,u,0>: Cost 2 vext3 RHS, <u,u,0,1>
+  1638324571U,  // <7,u,u,1>: Cost 2 vext3 RHS, <u,u,1,2>
+   564582757U,  // <7,u,u,2>: Cost 1 vext3 RHS, LHS
+  1638324587U,  // <7,u,u,3>: Cost 2 vext3 RHS, <u,u,3,0>
+  1638324601U,  // <7,u,u,4>: Cost 2 vext3 RHS, <u,u,4,5>
+  1638324611U,  // <7,u,u,5>: Cost 2 vext3 RHS, <u,u,5,6>
+   564582797U,  // <7,u,u,6>: Cost 1 vext3 RHS, RHS
+   363253046U,  // <7,u,u,7>: Cost 1 vdup3 RHS
+   564582811U,  // <7,u,u,u>: Cost 1 vext3 RHS, LHS
+   135053414U,  // <u,0,0,0>: Cost 1 vdup0 LHS
+  1611489290U,  // <u,0,0,1>: Cost 2 vext3 LHS, <0,0,1,1>
+  1611489300U,  // <u,0,0,2>: Cost 2 vext3 LHS, <0,0,2,2>
+  2568054923U,  // <u,0,0,3>: Cost 3 vext1 <3,0,0,0>, <3,0,0,0>
+  1481706806U,  // <u,0,0,4>: Cost 2 vext1 <0,u,0,0>, RHS
+  2555449040U,  // <u,0,0,5>: Cost 3 vext1 <0,u,0,0>, <5,1,7,3>
+  2591282078U,  // <u,0,0,6>: Cost 3 vext1 <6,u,0,0>, <6,u,0,0>
+  2591945711U,  // <u,0,0,7>: Cost 3 vext1 <7,0,0,0>, <7,0,0,0>
+   135053414U,  // <u,0,0,u>: Cost 1 vdup0 LHS
+  1493655654U,  // <u,0,1,0>: Cost 2 vext1 <2,u,0,1>, LHS
+  1860550758U,  // <u,0,1,1>: Cost 2 vzipl LHS, LHS
+   537747563U,  // <u,0,1,2>: Cost 1 vext3 LHS, LHS
+  2625135576U,  // <u,0,1,3>: Cost 3 vext2 <1,2,u,0>, <1,3,1,3>
+  1493658934U,  // <u,0,1,4>: Cost 2 vext1 <2,u,0,1>, RHS
+  2625135760U,  // <u,0,1,5>: Cost 3 vext2 <1,2,u,0>, <1,5,3,7>
+  1517548447U,  // <u,0,1,6>: Cost 2 vext1 <6,u,0,1>, <6,u,0,1>
+  2591290362U,  // <u,0,1,7>: Cost 3 vext1 <6,u,0,1>, <7,0,1,2>
+   537747612U,  // <u,0,1,u>: Cost 1 vext3 LHS, LHS
+  1611489444U,  // <u,0,2,0>: Cost 2 vext3 LHS, <0,2,0,2>
+  2685231276U,  // <u,0,2,1>: Cost 3 vext3 LHS, <0,2,1,1>
+  1994768486U,  // <u,0,2,2>: Cost 2 vtrnl LHS, LHS
+  2685231294U,  // <u,0,2,3>: Cost 3 vext3 LHS, <0,2,3,1>
+  1611489484U,  // <u,0,2,4>: Cost 2 vext3 LHS, <0,2,4,6>
+  2712068310U,  // <u,0,2,5>: Cost 3 vext3 RHS, <0,2,5,7>
+  2625136570U,  // <u,0,2,6>: Cost 3 vext2 <1,2,u,0>, <2,6,3,7>
+  2591962097U,  // <u,0,2,7>: Cost 3 vext1 <7,0,0,2>, <7,0,0,2>
+  1611489516U,  // <u,0,2,u>: Cost 2 vext3 LHS, <0,2,u,2>
+  2954067968U,  // <u,0,3,0>: Cost 3 vzipr LHS, <0,0,0,0>
+  2685231356U,  // <u,0,3,1>: Cost 3 vext3 LHS, <0,3,1,0>
+    72589981U,  // <u,0,3,2>: Cost 1 vrev LHS
+  2625137052U,  // <u,0,3,3>: Cost 3 vext2 <1,2,u,0>, <3,3,3,3>
+  2625137154U,  // <u,0,3,4>: Cost 3 vext2 <1,2,u,0>, <3,4,5,6>
+  2639071848U,  // <u,0,3,5>: Cost 3 vext2 <3,5,u,0>, <3,5,u,0>
+  2639735481U,  // <u,0,3,6>: Cost 3 vext2 <3,6,u,0>, <3,6,u,0>
+  2597279354U,  // <u,0,3,7>: Cost 3 vext1 <7,u,0,3>, <7,u,0,3>
+    73032403U,  // <u,0,3,u>: Cost 1 vrev LHS
+  2687074636U,  // <u,0,4,0>: Cost 3 vext3 <0,4,0,u>, <0,4,0,u>
+  1611489618U,  // <u,0,4,1>: Cost 2 vext3 LHS, <0,4,1,5>
+  1611489628U,  // <u,0,4,2>: Cost 2 vext3 LHS, <0,4,2,6>
+  3629222038U,  // <u,0,4,3>: Cost 4 vext1 <0,u,0,4>, <3,0,1,2>
+  2555481398U,  // <u,0,4,4>: Cost 3 vext1 <0,u,0,4>, RHS
+  1551396150U,  // <u,0,4,5>: Cost 2 vext2 <1,2,u,0>, RHS
+  2651680116U,  // <u,0,4,6>: Cost 3 vext2 <5,6,u,0>, <4,6,4,6>
+  2646150600U,  // <u,0,4,7>: Cost 3 vext2 <4,7,5,0>, <4,7,5,0>
+  1611932050U,  // <u,0,4,u>: Cost 2 vext3 LHS, <0,4,u,6>
+  2561458278U,  // <u,0,5,0>: Cost 3 vext1 <1,u,0,5>, LHS
+  1863532646U,  // <u,0,5,1>: Cost 2 vzipl RHS, LHS
+  2712068526U,  // <u,0,5,2>: Cost 3 vext3 RHS, <0,5,2,7>
+  2649689976U,  // <u,0,5,3>: Cost 3 vext2 <5,3,u,0>, <5,3,u,0>
+  2220237489U,  // <u,0,5,4>: Cost 3 vrev <0,u,4,5>
+  2651680772U,  // <u,0,5,5>: Cost 3 vext2 <5,6,u,0>, <5,5,5,5>
+  1577939051U,  // <u,0,5,6>: Cost 2 vext2 <5,6,u,0>, <5,6,u,0>
+  2830077238U,  // <u,0,5,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+  1579266317U,  // <u,0,5,u>: Cost 2 vext2 <5,u,u,0>, <5,u,u,0>
+  2555494502U,  // <u,0,6,0>: Cost 3 vext1 <0,u,0,6>, LHS
+  2712068598U,  // <u,0,6,1>: Cost 3 vext3 RHS, <0,6,1,7>
+  1997750374U,  // <u,0,6,2>: Cost 2 vtrnl RHS, LHS
+  2655662673U,  // <u,0,6,3>: Cost 3 vext2 <6,3,u,0>, <6,3,u,0>
+  2555497782U,  // <u,0,6,4>: Cost 3 vext1 <0,u,0,6>, RHS
+  2651681459U,  // <u,0,6,5>: Cost 3 vext2 <5,6,u,0>, <6,5,0,u>
+  2651681592U,  // <u,0,6,6>: Cost 3 vext2 <5,6,u,0>, <6,6,6,6>
+  2651681614U,  // <u,0,6,7>: Cost 3 vext2 <5,6,u,0>, <6,7,0,1>
+  1997750428U,  // <u,0,6,u>: Cost 2 vtrnl RHS, LHS
+  2567446630U,  // <u,0,7,0>: Cost 3 vext1 <2,u,0,7>, LHS
+  2567447446U,  // <u,0,7,1>: Cost 3 vext1 <2,u,0,7>, <1,2,3,0>
+  2567448641U,  // <u,0,7,2>: Cost 3 vext1 <2,u,0,7>, <2,u,0,7>
+  2573421338U,  // <u,0,7,3>: Cost 3 vext1 <3,u,0,7>, <3,u,0,7>
+  2567449910U,  // <u,0,7,4>: Cost 3 vext1 <2,u,0,7>, RHS
+  2651682242U,  // <u,0,7,5>: Cost 3 vext2 <5,6,u,0>, <7,5,6,u>
+  2591339429U,  // <u,0,7,6>: Cost 3 vext1 <6,u,0,7>, <6,u,0,7>
+  2651682412U,  // <u,0,7,7>: Cost 3 vext2 <5,6,u,0>, <7,7,7,7>
+  2567452462U,  // <u,0,7,u>: Cost 3 vext1 <2,u,0,7>, LHS
+   135053414U,  // <u,0,u,0>: Cost 1 vdup0 LHS
+  1611489938U,  // <u,0,u,1>: Cost 2 vext3 LHS, <0,u,1,1>
+   537748125U,  // <u,0,u,2>: Cost 1 vext3 LHS, LHS
+  2685674148U,  // <u,0,u,3>: Cost 3 vext3 LHS, <0,u,3,1>
+  1611932338U,  // <u,0,u,4>: Cost 2 vext3 LHS, <0,u,4,6>
+  1551399066U,  // <u,0,u,5>: Cost 2 vext2 <1,2,u,0>, RHS
+  1517605798U,  // <u,0,u,6>: Cost 2 vext1 <6,u,0,u>, <6,u,0,u>
+  2830077481U,  // <u,0,u,7>: Cost 3 vuzpr <1,u,3,0>, RHS
+   537748179U,  // <u,0,u,u>: Cost 1 vext3 LHS, LHS
+  1544101961U,  // <u,1,0,0>: Cost 2 vext2 <0,0,u,1>, <0,0,u,1>
+  1558036582U,  // <u,1,0,1>: Cost 2 vext2 <2,3,u,1>, LHS
+  2619171051U,  // <u,1,0,2>: Cost 3 vext2 <0,2,u,1>, <0,2,u,1>
+  1611490038U,  // <u,1,0,3>: Cost 2 vext3 LHS, <1,0,3,2>
+  2555522358U,  // <u,1,0,4>: Cost 3 vext1 <0,u,1,0>, RHS
+  2712068871U,  // <u,1,0,5>: Cost 3 vext3 RHS, <1,0,5,1>
+  2591355815U,  // <u,1,0,6>: Cost 3 vext1 <6,u,1,0>, <6,u,1,0>
+  2597328512U,  // <u,1,0,7>: Cost 3 vext1 <7,u,1,0>, <7,u,1,0>
+  1611490083U,  // <u,1,0,u>: Cost 2 vext3 LHS, <1,0,u,2>
+  1481785446U,  // <u,1,1,0>: Cost 2 vext1 <0,u,1,1>, LHS
+   202162278U,  // <u,1,1,1>: Cost 1 vdup1 LHS
+  2555528808U,  // <u,1,1,2>: Cost 3 vext1 <0,u,1,1>, <2,2,2,2>
+  1611490120U,  // <u,1,1,3>: Cost 2 vext3 LHS, <1,1,3,3>
+  1481788726U,  // <u,1,1,4>: Cost 2 vext1 <0,u,1,1>, RHS
+  2689876828U,  // <u,1,1,5>: Cost 3 vext3 LHS, <1,1,5,5>
+  2591364008U,  // <u,1,1,6>: Cost 3 vext1 <6,u,1,1>, <6,u,1,1>
+  2592691274U,  // <u,1,1,7>: Cost 3 vext1 <7,1,1,1>, <7,1,1,1>
+   202162278U,  // <u,1,1,u>: Cost 1 vdup1 LHS
+  1499709542U,  // <u,1,2,0>: Cost 2 vext1 <3,u,1,2>, LHS
+  2689876871U,  // <u,1,2,1>: Cost 3 vext3 LHS, <1,2,1,3>
+  2631116445U,  // <u,1,2,2>: Cost 3 vext2 <2,2,u,1>, <2,2,u,1>
+      835584U,  // <u,1,2,3>: Cost 0 copy LHS
+  1499712822U,  // <u,1,2,4>: Cost 2 vext1 <3,u,1,2>, RHS
+  2689876907U,  // <u,1,2,5>: Cost 3 vext3 LHS, <1,2,5,3>
+  2631780282U,  // <u,1,2,6>: Cost 3 vext2 <2,3,u,1>, <2,6,3,7>
+  1523603074U,  // <u,1,2,7>: Cost 2 vext1 <7,u,1,2>, <7,u,1,2>
+      835584U,  // <u,1,2,u>: Cost 0 copy LHS
+  1487773798U,  // <u,1,3,0>: Cost 2 vext1 <1,u,1,3>, LHS
+  1611490264U,  // <u,1,3,1>: Cost 2 vext3 LHS, <1,3,1,3>
+  2685232094U,  // <u,1,3,2>: Cost 3 vext3 LHS, <1,3,2,0>
+  2018746470U,  // <u,1,3,3>: Cost 2 vtrnr LHS, LHS
+  1487777078U,  // <u,1,3,4>: Cost 2 vext1 <1,u,1,3>, RHS
+  1611490304U,  // <u,1,3,5>: Cost 2 vext3 LHS, <1,3,5,7>
+  2685674505U,  // <u,1,3,6>: Cost 3 vext3 LHS, <1,3,6,7>
+  2640407307U,  // <u,1,3,7>: Cost 3 vext2 <3,7,u,1>, <3,7,u,1>
+  1611490327U,  // <u,1,3,u>: Cost 2 vext3 LHS, <1,3,u,3>
+  1567992749U,  // <u,1,4,0>: Cost 2 vext2 <4,0,u,1>, <4,0,u,1>
+  2693121070U,  // <u,1,4,1>: Cost 3 vext3 <1,4,1,u>, <1,4,1,u>
+  2693194807U,  // <u,1,4,2>: Cost 3 vext3 <1,4,2,u>, <1,4,2,u>
+  1152386432U,  // <u,1,4,3>: Cost 2 vrev <1,u,3,4>
+  2555555126U,  // <u,1,4,4>: Cost 3 vext1 <0,u,1,4>, RHS
+  1558039862U,  // <u,1,4,5>: Cost 2 vext2 <2,3,u,1>, RHS
+  2645716371U,  // <u,1,4,6>: Cost 3 vext2 <4,6,u,1>, <4,6,u,1>
+  2597361284U,  // <u,1,4,7>: Cost 3 vext1 <7,u,1,4>, <7,u,1,4>
+  1152755117U,  // <u,1,4,u>: Cost 2 vrev <1,u,u,4>
+  1481818214U,  // <u,1,5,0>: Cost 2 vext1 <0,u,1,5>, LHS
+  2555560694U,  // <u,1,5,1>: Cost 3 vext1 <0,u,1,5>, <1,0,3,2>
+  2555561576U,  // <u,1,5,2>: Cost 3 vext1 <0,u,1,5>, <2,2,2,2>
+  1611490448U,  // <u,1,5,3>: Cost 2 vext3 LHS, <1,5,3,7>
+  1481821494U,  // <u,1,5,4>: Cost 2 vext1 <0,u,1,5>, RHS
+  2651025435U,  // <u,1,5,5>: Cost 3 vext2 <5,5,u,1>, <5,5,u,1>
+  2651689068U,  // <u,1,5,6>: Cost 3 vext2 <5,6,u,1>, <5,6,u,1>
+  2823966006U,  // <u,1,5,7>: Cost 3 vuzpr <0,u,1,1>, RHS
+  1611932861U,  // <u,1,5,u>: Cost 2 vext3 LHS, <1,5,u,7>
+  2555568230U,  // <u,1,6,0>: Cost 3 vext1 <0,u,1,6>, LHS
+  2689877199U,  // <u,1,6,1>: Cost 3 vext3 LHS, <1,6,1,7>
+  2712069336U,  // <u,1,6,2>: Cost 3 vext3 RHS, <1,6,2,7>
+  2685232353U,  // <u,1,6,3>: Cost 3 vext3 LHS, <1,6,3,7>
+  2555571510U,  // <u,1,6,4>: Cost 3 vext1 <0,u,1,6>, RHS
+  2689877235U,  // <u,1,6,5>: Cost 3 vext3 LHS, <1,6,5,7>
+  2657661765U,  // <u,1,6,6>: Cost 3 vext2 <6,6,u,1>, <6,6,u,1>
+  1584583574U,  // <u,1,6,7>: Cost 2 vext2 <6,7,u,1>, <6,7,u,1>
+  1585247207U,  // <u,1,6,u>: Cost 2 vext2 <6,u,u,1>, <6,u,u,1>
+  2561548390U,  // <u,1,7,0>: Cost 3 vext1 <1,u,1,7>, LHS
+  2561549681U,  // <u,1,7,1>: Cost 3 vext1 <1,u,1,7>, <1,u,1,7>
+  2573493926U,  // <u,1,7,2>: Cost 3 vext1 <3,u,1,7>, <2,3,0,1>
+  2042962022U,  // <u,1,7,3>: Cost 2 vtrnr RHS, LHS
+  2561551670U,  // <u,1,7,4>: Cost 3 vext1 <1,u,1,7>, RHS
+  2226300309U,  // <u,1,7,5>: Cost 3 vrev <1,u,5,7>
+  2658325990U,  // <u,1,7,6>: Cost 3 vext2 <6,7,u,1>, <7,6,1,u>
+  2658326124U,  // <u,1,7,7>: Cost 3 vext2 <6,7,u,1>, <7,7,7,7>
+  2042962027U,  // <u,1,7,u>: Cost 2 vtrnr RHS, LHS
+  1481842790U,  // <u,1,u,0>: Cost 2 vext1 <0,u,1,u>, LHS
+   202162278U,  // <u,1,u,1>: Cost 1 vdup1 LHS
+  2685674867U,  // <u,1,u,2>: Cost 3 vext3 LHS, <1,u,2,0>
+      835584U,  // <u,1,u,3>: Cost 0 copy LHS
+  1481846070U,  // <u,1,u,4>: Cost 2 vext1 <0,u,1,u>, RHS
+  1611933077U,  // <u,1,u,5>: Cost 2 vext3 LHS, <1,u,5,7>
+  2685674910U,  // <u,1,u,6>: Cost 3 vext3 LHS, <1,u,6,7>
+  1523652232U,  // <u,1,u,7>: Cost 2 vext1 <7,u,1,u>, <7,u,1,u>
+      835584U,  // <u,1,u,u>: Cost 0 copy LHS
+  1544110154U,  // <u,2,0,0>: Cost 2 vext2 <0,0,u,2>, <0,0,u,2>
+  1545437286U,  // <u,2,0,1>: Cost 2 vext2 <0,2,u,2>, LHS
+  1545437420U,  // <u,2,0,2>: Cost 2 vext2 <0,2,u,2>, <0,2,u,2>
+  2685232589U,  // <u,2,0,3>: Cost 3 vext3 LHS, <2,0,3,0>
+  2619179346U,  // <u,2,0,4>: Cost 3 vext2 <0,2,u,2>, <0,4,1,5>
+  2712069606U,  // <u,2,0,5>: Cost 3 vext3 RHS, <2,0,5,7>
+  2689877484U,  // <u,2,0,6>: Cost 3 vext3 LHS, <2,0,6,4>
+  2659656273U,  // <u,2,0,7>: Cost 3 vext2 <7,0,u,2>, <0,7,2,u>
+  1545437853U,  // <u,2,0,u>: Cost 2 vext2 <0,2,u,2>, LHS
+  1550082851U,  // <u,2,1,0>: Cost 2 vext2 <1,0,u,2>, <1,0,u,2>
+  2619179828U,  // <u,2,1,1>: Cost 3 vext2 <0,2,u,2>, <1,1,1,1>
+  2619179926U,  // <u,2,1,2>: Cost 3 vext2 <0,2,u,2>, <1,2,3,0>
+  2685232671U,  // <u,2,1,3>: Cost 3 vext3 LHS, <2,1,3,1>
+  2555604278U,  // <u,2,1,4>: Cost 3 vext1 <0,u,2,1>, RHS
+  2619180176U,  // <u,2,1,5>: Cost 3 vext2 <0,2,u,2>, <1,5,3,7>
+  2689877564U,  // <u,2,1,6>: Cost 3 vext3 LHS, <2,1,6,3>
+  2602718850U,  // <u,2,1,7>: Cost 3 vext1 <u,7,2,1>, <7,u,1,2>
+  1158703235U,  // <u,2,1,u>: Cost 2 vrev <2,u,u,1>
+  1481867366U,  // <u,2,2,0>: Cost 2 vext1 <0,u,2,2>, LHS
+  2555609846U,  // <u,2,2,1>: Cost 3 vext1 <0,u,2,2>, <1,0,3,2>
+   269271142U,  // <u,2,2,2>: Cost 1 vdup2 LHS
+  1611490930U,  // <u,2,2,3>: Cost 2 vext3 LHS, <2,2,3,3>
+  1481870646U,  // <u,2,2,4>: Cost 2 vext1 <0,u,2,2>, RHS
+  2689877640U,  // <u,2,2,5>: Cost 3 vext3 LHS, <2,2,5,7>
+  2619180986U,  // <u,2,2,6>: Cost 3 vext2 <0,2,u,2>, <2,6,3,7>
+  2593436837U,  // <u,2,2,7>: Cost 3 vext1 <7,2,2,2>, <7,2,2,2>
+   269271142U,  // <u,2,2,u>: Cost 1 vdup2 LHS
+   408134301U,  // <u,2,3,0>: Cost 1 vext1 LHS, LHS
+  1481876214U,  // <u,2,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+  1481877096U,  // <u,2,3,2>: Cost 2 vext1 LHS, <2,2,2,2>
+  1880326246U,  // <u,2,3,3>: Cost 2 vzipr LHS, LHS
+   408137014U,  // <u,2,3,4>: Cost 1 vext1 LHS, RHS
+  1529654992U,  // <u,2,3,5>: Cost 2 vext1 LHS, <5,1,7,3>
+  1529655802U,  // <u,2,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1529656314U,  // <u,2,3,7>: Cost 2 vext1 LHS, <7,0,1,2>
+   408139566U,  // <u,2,3,u>: Cost 1 vext1 LHS, LHS
+  1567853468U,  // <u,2,4,0>: Cost 2 vext2 <4,0,6,2>, <4,0,6,2>
+  2561598362U,  // <u,2,4,1>: Cost 3 vext1 <1,u,2,4>, <1,2,3,4>
+  2555627214U,  // <u,2,4,2>: Cost 3 vext1 <0,u,2,4>, <2,3,4,5>
+  2685232918U,  // <u,2,4,3>: Cost 3 vext3 LHS, <2,4,3,5>
+  2555628854U,  // <u,2,4,4>: Cost 3 vext1 <0,u,2,4>, RHS
+  1545440566U,  // <u,2,4,5>: Cost 2 vext2 <0,2,u,2>, RHS
+  1571982740U,  // <u,2,4,6>: Cost 2 vext2 <4,6,u,2>, <4,6,u,2>
+  2592125957U,  // <u,2,4,7>: Cost 3 vext1 <7,0,2,4>, <7,0,2,4>
+  1545440809U,  // <u,2,4,u>: Cost 2 vext2 <0,2,u,2>, RHS
+  2555633766U,  // <u,2,5,0>: Cost 3 vext1 <0,u,2,5>, LHS
+  2561606550U,  // <u,2,5,1>: Cost 3 vext1 <1,u,2,5>, <1,2,3,0>
+  2689877856U,  // <u,2,5,2>: Cost 3 vext3 LHS, <2,5,2,7>
+  2685233000U,  // <u,2,5,3>: Cost 3 vext3 LHS, <2,5,3,6>
+  1158441059U,  // <u,2,5,4>: Cost 2 vrev <2,u,4,5>
+  2645725188U,  // <u,2,5,5>: Cost 3 vext2 <4,6,u,2>, <5,5,5,5>
+  2689877892U,  // <u,2,5,6>: Cost 3 vext3 LHS, <2,5,6,7>
+  2823900470U,  // <u,2,5,7>: Cost 3 vuzpr <0,u,0,2>, RHS
+  1158736007U,  // <u,2,5,u>: Cost 2 vrev <2,u,u,5>
+  1481900134U,  // <u,2,6,0>: Cost 2 vext1 <0,u,2,6>, LHS
+  2555642614U,  // <u,2,6,1>: Cost 3 vext1 <0,u,2,6>, <1,0,3,2>
+  2555643496U,  // <u,2,6,2>: Cost 3 vext1 <0,u,2,6>, <2,2,2,2>
+  1611491258U,  // <u,2,6,3>: Cost 2 vext3 LHS, <2,6,3,7>
+  1481903414U,  // <u,2,6,4>: Cost 2 vext1 <0,u,2,6>, RHS
+  2689877964U,  // <u,2,6,5>: Cost 3 vext3 LHS, <2,6,5,7>
+  2689877973U,  // <u,2,6,6>: Cost 3 vext3 LHS, <2,6,6,7>
+  2645726030U,  // <u,2,6,7>: Cost 3 vext2 <4,6,u,2>, <6,7,0,1>
+  1611933671U,  // <u,2,6,u>: Cost 2 vext3 LHS, <2,6,u,7>
+  1585919033U,  // <u,2,7,0>: Cost 2 vext2 <7,0,u,2>, <7,0,u,2>
+  2573566710U,  // <u,2,7,1>: Cost 3 vext1 <3,u,2,7>, <1,0,3,2>
+  2567596115U,  // <u,2,7,2>: Cost 3 vext1 <2,u,2,7>, <2,u,2,7>
+  1906901094U,  // <u,2,7,3>: Cost 2 vzipr RHS, LHS
+  2555653430U,  // <u,2,7,4>: Cost 3 vext1 <0,u,2,7>, RHS
+  2800080230U,  // <u,2,7,5>: Cost 3 vuzpl LHS, <7,4,5,6>
+  2980643164U,  // <u,2,7,6>: Cost 3 vzipr RHS, <0,4,2,6>
+  2645726828U,  // <u,2,7,7>: Cost 3 vext2 <4,6,u,2>, <7,7,7,7>
+  1906901099U,  // <u,2,7,u>: Cost 2 vzipr RHS, LHS
+   408175266U,  // <u,2,u,0>: Cost 1 vext1 LHS, LHS
+  1545443118U,  // <u,2,u,1>: Cost 2 vext2 <0,2,u,2>, LHS
+   269271142U,  // <u,2,u,2>: Cost 1 vdup2 LHS
+  1611491416U,  // <u,2,u,3>: Cost 2 vext3 LHS, <2,u,3,3>
+   408177974U,  // <u,2,u,4>: Cost 1 vext1 LHS, RHS
+  1545443482U,  // <u,2,u,5>: Cost 2 vext2 <0,2,u,2>, RHS
+  1726339226U,  // <u,2,u,6>: Cost 2 vuzpl LHS, RHS
+  1529697274U,  // <u,2,u,7>: Cost 2 vext1 LHS, <7,0,1,2>
+   408180526U,  // <u,2,u,u>: Cost 1 vext1 LHS, LHS
+  1544781824U,  // <u,3,0,0>: Cost 2 vext2 LHS, <0,0,0,0>
+   471040156U,  // <u,3,0,1>: Cost 1 vext2 LHS, LHS
+  1544781988U,  // <u,3,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  2618523900U,  // <u,3,0,3>: Cost 3 vext2 LHS, <0,3,1,0>
+  1544782162U,  // <u,3,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  2238188352U,  // <u,3,0,5>: Cost 3 vrev <3,u,5,0>
+  2623169023U,  // <u,3,0,6>: Cost 3 vext2 LHS, <0,6,2,7>
+  2238335826U,  // <u,3,0,7>: Cost 3 vrev <3,u,7,0>
+   471040669U,  // <u,3,0,u>: Cost 1 vext2 LHS, LHS
+  1544782582U,  // <u,3,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+  1544782644U,  // <u,3,1,1>: Cost 2 vext2 LHS, <1,1,1,1>
+  1544782742U,  // <u,3,1,2>: Cost 2 vext2 LHS, <1,2,3,0>
+  1544782808U,  // <u,3,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  2618524733U,  // <u,3,1,4>: Cost 3 vext2 LHS, <1,4,3,5>
+  1544782992U,  // <u,3,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  2618524897U,  // <u,3,1,6>: Cost 3 vext2 LHS, <1,6,3,7>
+  2703517987U,  // <u,3,1,7>: Cost 3 vext3 <3,1,7,u>, <3,1,7,u>
+  1544783213U,  // <u,3,1,u>: Cost 2 vext2 LHS, <1,u,1,3>
+  1529716838U,  // <u,3,2,0>: Cost 2 vext1 <u,u,3,2>, LHS
+  1164167966U,  // <u,3,2,1>: Cost 2 vrev <3,u,1,2>
+  1544783464U,  // <u,3,2,2>: Cost 2 vext2 LHS, <2,2,2,2>
+  1544783526U,  // <u,3,2,3>: Cost 2 vext2 LHS, <2,3,0,1>
+  1529720118U,  // <u,3,2,4>: Cost 2 vext1 <u,u,3,2>, RHS
+  2618525544U,  // <u,3,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+  1544783802U,  // <u,3,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  2704181620U,  // <u,3,2,7>: Cost 3 vext3 <3,2,7,u>, <3,2,7,u>
+  1544783931U,  // <u,3,2,u>: Cost 2 vext2 LHS, <2,u,0,1>
+  1544784022U,  // <u,3,3,0>: Cost 2 vext2 LHS, <3,0,1,2>
+  1487922559U,  // <u,3,3,1>: Cost 2 vext1 <1,u,3,3>, <1,u,3,3>
+  1493895256U,  // <u,3,3,2>: Cost 2 vext1 <2,u,3,3>, <2,u,3,3>
+   336380006U,  // <u,3,3,3>: Cost 1 vdup3 LHS
+  1544784386U,  // <u,3,3,4>: Cost 2 vext2 LHS, <3,4,5,6>
+  2824054478U,  // <u,3,3,5>: Cost 3 vuzpr LHS, <2,3,4,5>
+  2238286668U,  // <u,3,3,6>: Cost 3 vrev <3,u,6,3>
+  2954069136U,  // <u,3,3,7>: Cost 3 vzipr LHS, <1,5,3,7>
+   336380006U,  // <u,3,3,u>: Cost 1 vdup3 LHS
+  1487929446U,  // <u,3,4,0>: Cost 2 vext1 <1,u,3,4>, LHS
+  1487930752U,  // <u,3,4,1>: Cost 2 vext1 <1,u,3,4>, <1,u,3,4>
+  2623171644U,  // <u,3,4,2>: Cost 3 vext2 LHS, <4,2,6,0>
+  2561673366U,  // <u,3,4,3>: Cost 3 vext1 <1,u,3,4>, <3,0,1,2>
+  1487932726U,  // <u,3,4,4>: Cost 2 vext1 <1,u,3,4>, RHS
+   471043382U,  // <u,3,4,5>: Cost 1 vext2 LHS, RHS
+  1592561012U,  // <u,3,4,6>: Cost 2 vext2 LHS, <4,6,4,6>
+  2238368598U,  // <u,3,4,7>: Cost 3 vrev <3,u,7,4>
+   471043625U,  // <u,3,4,u>: Cost 1 vext2 LHS, RHS
+  2555707494U,  // <u,3,5,0>: Cost 3 vext1 <0,u,3,5>, LHS
+  1574645465U,  // <u,3,5,1>: Cost 2 vext2 <5,1,u,3>, <5,1,u,3>
+  2567653106U,  // <u,3,5,2>: Cost 3 vext1 <2,u,3,5>, <2,3,u,5>
+  2555709954U,  // <u,3,5,3>: Cost 3 vext1 <0,u,3,5>, <3,4,5,6>
+  1592561606U,  // <u,3,5,4>: Cost 2 vext2 LHS, <5,4,7,6>
+  1592561668U,  // <u,3,5,5>: Cost 2 vext2 LHS, <5,5,5,5>
+  1592561762U,  // <u,3,5,6>: Cost 2 vext2 LHS, <5,6,7,0>
+  1750314294U,  // <u,3,5,7>: Cost 2 vuzpr LHS, RHS
+  1750314295U,  // <u,3,5,u>: Cost 2 vuzpr LHS, RHS
+  2623172897U,  // <u,3,6,0>: Cost 3 vext2 LHS, <6,0,1,2>
+  2561688962U,  // <u,3,6,1>: Cost 3 vext1 <1,u,3,6>, <1,u,3,6>
+  1581281795U,  // <u,3,6,2>: Cost 2 vext2 <6,2,u,3>, <6,2,u,3>
+  2706541204U,  // <u,3,6,3>: Cost 3 vext3 <3,6,3,u>, <3,6,3,u>
+  2623173261U,  // <u,3,6,4>: Cost 3 vext2 LHS, <6,4,5,6>
+  1164495686U,  // <u,3,6,5>: Cost 2 vrev <3,u,5,6>
+  1592562488U,  // <u,3,6,6>: Cost 2 vext2 LHS, <6,6,6,6>
+  1592562510U,  // <u,3,6,7>: Cost 2 vext2 LHS, <6,7,0,1>
+  1164716897U,  // <u,3,6,u>: Cost 2 vrev <3,u,u,6>
+  1487954022U,  // <u,3,7,0>: Cost 2 vext1 <1,u,3,7>, LHS
+  1487955331U,  // <u,3,7,1>: Cost 2 vext1 <1,u,3,7>, <1,u,3,7>
+  1493928028U,  // <u,3,7,2>: Cost 2 vext1 <2,u,3,7>, <2,u,3,7>
+  2561697942U,  // <u,3,7,3>: Cost 3 vext1 <1,u,3,7>, <3,0,1,2>
+  1487957302U,  // <u,3,7,4>: Cost 2 vext1 <1,u,3,7>, RHS
+  2707352311U,  // <u,3,7,5>: Cost 3 vext3 <3,7,5,u>, <3,7,5,u>
+  2655024623U,  // <u,3,7,6>: Cost 3 vext2 <6,2,u,3>, <7,6,2,u>
+  1592563308U,  // <u,3,7,7>: Cost 2 vext2 LHS, <7,7,7,7>
+  1487959854U,  // <u,3,7,u>: Cost 2 vext1 <1,u,3,7>, LHS
+  1544787667U,  // <u,3,u,0>: Cost 2 vext2 LHS, <u,0,1,2>
+   471045934U,  // <u,3,u,1>: Cost 1 vext2 LHS, LHS
+  1549432709U,  // <u,3,u,2>: Cost 2 vext2 LHS, <u,2,3,0>
+   336380006U,  // <u,3,u,3>: Cost 1 vdup3 LHS
+  1544788031U,  // <u,3,u,4>: Cost 2 vext2 LHS, <u,4,5,6>
+   471046298U,  // <u,3,u,5>: Cost 1 vext2 LHS, RHS
+  1549433040U,  // <u,3,u,6>: Cost 2 vext2 LHS, <u,6,3,7>
+  1750314537U,  // <u,3,u,7>: Cost 2 vuzpr LHS, RHS
+   471046501U,  // <u,3,u,u>: Cost 1 vext2 LHS, LHS
+  2625167360U,  // <u,4,0,0>: Cost 3 vext2 <1,2,u,4>, <0,0,0,0>
+  1551425638U,  // <u,4,0,1>: Cost 2 vext2 <1,2,u,4>, LHS
+  2619195630U,  // <u,4,0,2>: Cost 3 vext2 <0,2,u,4>, <0,2,u,4>
+  2619343104U,  // <u,4,0,3>: Cost 3 vext2 <0,3,1,4>, <0,3,1,4>
+  2625167698U,  // <u,4,0,4>: Cost 3 vext2 <1,2,u,4>, <0,4,1,5>
+  1638329234U,  // <u,4,0,5>: Cost 2 vext3 RHS, <4,0,5,1>
+  1638329244U,  // <u,4,0,6>: Cost 2 vext3 RHS, <4,0,6,2>
+  3787803556U,  // <u,4,0,7>: Cost 4 vext3 RHS, <4,0,7,1>
+  1551426205U,  // <u,4,0,u>: Cost 2 vext2 <1,2,u,4>, LHS
+  2555748454U,  // <u,4,1,0>: Cost 3 vext1 <0,u,4,1>, LHS
+  2625168180U,  // <u,4,1,1>: Cost 3 vext2 <1,2,u,4>, <1,1,1,1>
+  1551426503U,  // <u,4,1,2>: Cost 2 vext2 <1,2,u,4>, <1,2,u,4>
+  2625168344U,  // <u,4,1,3>: Cost 3 vext2 <1,2,u,4>, <1,3,1,3>
+  2555751734U,  // <u,4,1,4>: Cost 3 vext1 <0,u,4,1>, RHS
+  1860554038U,  // <u,4,1,5>: Cost 2 vzipl LHS, RHS
+  2689879022U,  // <u,4,1,6>: Cost 3 vext3 LHS, <4,1,6,3>
+  2592248852U,  // <u,4,1,7>: Cost 3 vext1 <7,0,4,1>, <7,0,4,1>
+  1555408301U,  // <u,4,1,u>: Cost 2 vext2 <1,u,u,4>, <1,u,u,4>
+  2555756646U,  // <u,4,2,0>: Cost 3 vext1 <0,u,4,2>, LHS
+  2625168943U,  // <u,4,2,1>: Cost 3 vext2 <1,2,u,4>, <2,1,4,u>
+  2625169000U,  // <u,4,2,2>: Cost 3 vext2 <1,2,u,4>, <2,2,2,2>
+  2619197134U,  // <u,4,2,3>: Cost 3 vext2 <0,2,u,4>, <2,3,4,5>
+  2555759926U,  // <u,4,2,4>: Cost 3 vext1 <0,u,4,2>, RHS
+  2712071222U,  // <u,4,2,5>: Cost 3 vext3 RHS, <4,2,5,3>
+  1994771766U,  // <u,4,2,6>: Cost 2 vtrnl LHS, RHS
+  2592257045U,  // <u,4,2,7>: Cost 3 vext1 <7,0,4,2>, <7,0,4,2>
+  1994771784U,  // <u,4,2,u>: Cost 2 vtrnl LHS, RHS
+  2625169558U,  // <u,4,3,0>: Cost 3 vext2 <1,2,u,4>, <3,0,1,2>
+  2567709594U,  // <u,4,3,1>: Cost 3 vext1 <2,u,4,3>, <1,2,3,4>
+  2567710817U,  // <u,4,3,2>: Cost 3 vext1 <2,u,4,3>, <2,u,4,3>
+  2625169820U,  // <u,4,3,3>: Cost 3 vext2 <1,2,u,4>, <3,3,3,3>
+  2625169922U,  // <u,4,3,4>: Cost 3 vext2 <1,2,u,4>, <3,4,5,6>
+  2954069710U,  // <u,4,3,5>: Cost 3 vzipr LHS, <2,3,4,5>
+  2954068172U,  // <u,4,3,6>: Cost 3 vzipr LHS, <0,2,4,6>
+  3903849472U,  // <u,4,3,7>: Cost 4 vuzpr <1,u,3,4>, <1,3,5,7>
+  2954068174U,  // <u,4,3,u>: Cost 3 vzipr LHS, <0,2,4,u>
+  1505919078U,  // <u,4,4,0>: Cost 2 vext1 <4,u,4,4>, LHS
+  2567717831U,  // <u,4,4,1>: Cost 3 vext1 <2,u,4,4>, <1,2,u,4>
+  2567719010U,  // <u,4,4,2>: Cost 3 vext1 <2,u,4,4>, <2,u,4,4>
+  2570373542U,  // <u,4,4,3>: Cost 3 vext1 <3,3,4,4>, <3,3,4,4>
+   161926454U,  // <u,4,4,4>: Cost 1 vdup0 RHS
+  1551428918U,  // <u,4,4,5>: Cost 2 vext2 <1,2,u,4>, RHS
+  1638329572U,  // <u,4,4,6>: Cost 2 vext3 RHS, <4,4,6,6>
+  2594927963U,  // <u,4,4,7>: Cost 3 vext1 <7,4,4,4>, <7,4,4,4>
+   161926454U,  // <u,4,4,u>: Cost 1 vdup0 RHS
+  1493983334U,  // <u,4,5,0>: Cost 2 vext1 <2,u,4,5>, LHS
+  2689879301U,  // <u,4,5,1>: Cost 3 vext3 LHS, <4,5,1,3>
+  1493985379U,  // <u,4,5,2>: Cost 2 vext1 <2,u,4,5>, <2,u,4,5>
+  2567727254U,  // <u,4,5,3>: Cost 3 vext1 <2,u,4,5>, <3,0,1,2>
+  1493986614U,  // <u,4,5,4>: Cost 2 vext1 <2,u,4,5>, RHS
+  1863535926U,  // <u,4,5,5>: Cost 2 vzipl RHS, RHS
+   537750838U,  // <u,4,5,6>: Cost 1 vext3 LHS, RHS
+  2830110006U,  // <u,4,5,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+   537750856U,  // <u,4,5,u>: Cost 1 vext3 LHS, RHS
+  1482047590U,  // <u,4,6,0>: Cost 2 vext1 <0,u,4,6>, LHS
+  2555790070U,  // <u,4,6,1>: Cost 3 vext1 <0,u,4,6>, <1,0,3,2>
+  2555790952U,  // <u,4,6,2>: Cost 3 vext1 <0,u,4,6>, <2,2,2,2>
+  2555791510U,  // <u,4,6,3>: Cost 3 vext1 <0,u,4,6>, <3,0,1,2>
+  1482050870U,  // <u,4,6,4>: Cost 2 vext1 <0,u,4,6>, RHS
+  2689879422U,  // <u,4,6,5>: Cost 3 vext3 LHS, <4,6,5,7>
+  1997753654U,  // <u,4,6,6>: Cost 2 vtrnl RHS, RHS
+  2712071562U,  // <u,4,6,7>: Cost 3 vext3 RHS, <4,6,7,1>
+  1482053422U,  // <u,4,6,u>: Cost 2 vext1 <0,u,4,6>, LHS
+  2567741542U,  // <u,4,7,0>: Cost 3 vext1 <2,u,4,7>, LHS
+  2567742362U,  // <u,4,7,1>: Cost 3 vext1 <2,u,4,7>, <1,2,3,4>
+  2567743589U,  // <u,4,7,2>: Cost 3 vext1 <2,u,4,7>, <2,u,4,7>
+  2573716286U,  // <u,4,7,3>: Cost 3 vext1 <3,u,4,7>, <3,u,4,7>
+  2567744822U,  // <u,4,7,4>: Cost 3 vext1 <2,u,4,7>, RHS
+  2712071624U,  // <u,4,7,5>: Cost 3 vext3 RHS, <4,7,5,0>
+    96808489U,  // <u,4,7,6>: Cost 1 vrev RHS
+  2651715180U,  // <u,4,7,7>: Cost 3 vext2 <5,6,u,4>, <7,7,7,7>
+    96955963U,  // <u,4,7,u>: Cost 1 vrev RHS
+  1482063974U,  // <u,4,u,0>: Cost 2 vext1 <0,u,4,u>, LHS
+  1551431470U,  // <u,4,u,1>: Cost 2 vext2 <1,2,u,4>, LHS
+  1494009958U,  // <u,4,u,2>: Cost 2 vext1 <2,u,4,u>, <2,u,4,u>
+  2555807894U,  // <u,4,u,3>: Cost 3 vext1 <0,u,4,u>, <3,0,1,2>
+   161926454U,  // <u,4,u,4>: Cost 1 vdup0 RHS
+  1551431834U,  // <u,4,u,5>: Cost 2 vext2 <1,2,u,4>, RHS
+   537751081U,  // <u,4,u,6>: Cost 1 vext3 LHS, RHS
+  2830110249U,  // <u,4,u,7>: Cost 3 vuzpr <1,u,3,4>, RHS
+   537751099U,  // <u,4,u,u>: Cost 1 vext3 LHS, RHS
+  2631811072U,  // <u,5,0,0>: Cost 3 vext2 <2,3,u,5>, <0,0,0,0>
+  1558069350U,  // <u,5,0,1>: Cost 2 vext2 <2,3,u,5>, LHS
+  2619203823U,  // <u,5,0,2>: Cost 3 vext2 <0,2,u,5>, <0,2,u,5>
+  2619867456U,  // <u,5,0,3>: Cost 3 vext2 <0,3,u,5>, <0,3,u,5>
+  1546273106U,  // <u,5,0,4>: Cost 2 vext2 <0,4,1,5>, <0,4,1,5>
+  2733010539U,  // <u,5,0,5>: Cost 3 vext3 LHS, <5,0,5,1>
+  2597622682U,  // <u,5,0,6>: Cost 3 vext1 <7,u,5,0>, <6,7,u,5>
+  1176539396U,  // <u,5,0,7>: Cost 2 vrev <5,u,7,0>
+  1558069917U,  // <u,5,0,u>: Cost 2 vext2 <2,3,u,5>, LHS
+  1505968230U,  // <u,5,1,0>: Cost 2 vext1 <4,u,5,1>, LHS
+  2624512887U,  // <u,5,1,1>: Cost 3 vext2 <1,1,u,5>, <1,1,u,5>
+  2631811990U,  // <u,5,1,2>: Cost 3 vext2 <2,3,u,5>, <1,2,3,0>
+  2618541056U,  // <u,5,1,3>: Cost 3 vext2 <0,1,u,5>, <1,3,5,7>
+  1505971510U,  // <u,5,1,4>: Cost 2 vext1 <4,u,5,1>, RHS
+  2627167419U,  // <u,5,1,5>: Cost 3 vext2 <1,5,u,5>, <1,5,u,5>
+  2579714554U,  // <u,5,1,6>: Cost 3 vext1 <4,u,5,1>, <6,2,7,3>
+  1638330064U,  // <u,5,1,7>: Cost 2 vext3 RHS, <5,1,7,3>
+  1638477529U,  // <u,5,1,u>: Cost 2 vext3 RHS, <5,1,u,3>
+  2561802342U,  // <u,5,2,0>: Cost 3 vext1 <1,u,5,2>, LHS
+  2561803264U,  // <u,5,2,1>: Cost 3 vext1 <1,u,5,2>, <1,3,5,7>
+  2631149217U,  // <u,5,2,2>: Cost 3 vext2 <2,2,u,5>, <2,2,u,5>
+  1558071026U,  // <u,5,2,3>: Cost 2 vext2 <2,3,u,5>, <2,3,u,5>
+  2561805622U,  // <u,5,2,4>: Cost 3 vext1 <1,u,5,2>, RHS
+  2714062607U,  // <u,5,2,5>: Cost 3 vext3 RHS, <5,2,5,3>
+  2631813050U,  // <u,5,2,6>: Cost 3 vext2 <2,3,u,5>, <2,6,3,7>
+  3092335926U,  // <u,5,2,7>: Cost 3 vtrnr <0,u,0,2>, RHS
+  1561389191U,  // <u,5,2,u>: Cost 2 vext2 <2,u,u,5>, <2,u,u,5>
+  2561810534U,  // <u,5,3,0>: Cost 3 vext1 <1,u,5,3>, LHS
+  2561811857U,  // <u,5,3,1>: Cost 3 vext1 <1,u,5,3>, <1,u,5,3>
+  2631813474U,  // <u,5,3,2>: Cost 3 vext2 <2,3,u,5>, <3,2,5,u>
+  2631813532U,  // <u,5,3,3>: Cost 3 vext2 <2,3,u,5>, <3,3,3,3>
+  2619869698U,  // <u,5,3,4>: Cost 3 vext2 <0,3,u,5>, <3,4,5,6>
+  3001847002U,  // <u,5,3,5>: Cost 3 vzipr LHS, <4,4,5,5>
+  2954070530U,  // <u,5,3,6>: Cost 3 vzipr LHS, <3,4,5,6>
+  2018749750U,  // <u,5,3,7>: Cost 2 vtrnr LHS, RHS
+  2018749751U,  // <u,5,3,u>: Cost 2 vtrnr LHS, RHS
+  2573762662U,  // <u,5,4,0>: Cost 3 vext1 <3,u,5,4>, LHS
+  2620017634U,  // <u,5,4,1>: Cost 3 vext2 <0,4,1,5>, <4,1,5,0>
+  2573764338U,  // <u,5,4,2>: Cost 3 vext1 <3,u,5,4>, <2,3,u,5>
+  2573765444U,  // <u,5,4,3>: Cost 3 vext1 <3,u,5,4>, <3,u,5,4>
+  1570680053U,  // <u,5,4,4>: Cost 2 vext2 <4,4,u,5>, <4,4,u,5>
+  1558072630U,  // <u,5,4,5>: Cost 2 vext2 <2,3,u,5>, RHS
+  2645749143U,  // <u,5,4,6>: Cost 3 vext2 <4,6,u,5>, <4,6,u,5>
+  1638330310U,  // <u,5,4,7>: Cost 2 vext3 RHS, <5,4,7,6>
+  1558072873U,  // <u,5,4,u>: Cost 2 vext2 <2,3,u,5>, RHS
+  1506000998U,  // <u,5,5,0>: Cost 2 vext1 <4,u,5,5>, LHS
+  2561827984U,  // <u,5,5,1>: Cost 3 vext1 <1,u,5,5>, <1,5,3,7>
+  2579744360U,  // <u,5,5,2>: Cost 3 vext1 <4,u,5,5>, <2,2,2,2>
+  2579744918U,  // <u,5,5,3>: Cost 3 vext1 <4,u,5,5>, <3,0,1,2>
+  1506004278U,  // <u,5,5,4>: Cost 2 vext1 <4,u,5,5>, RHS
+   229035318U,  // <u,5,5,5>: Cost 1 vdup1 RHS
+  2712072206U,  // <u,5,5,6>: Cost 3 vext3 RHS, <5,5,6,6>
+  1638330392U,  // <u,5,5,7>: Cost 2 vext3 RHS, <5,5,7,7>
+   229035318U,  // <u,5,5,u>: Cost 1 vdup1 RHS
+  1500037222U,  // <u,5,6,0>: Cost 2 vext1 <3,u,5,6>, LHS
+  2561836436U,  // <u,5,6,1>: Cost 3 vext1 <1,u,5,6>, <1,u,5,6>
+  2567809133U,  // <u,5,6,2>: Cost 3 vext1 <2,u,5,6>, <2,u,5,6>
+  1500040006U,  // <u,5,6,3>: Cost 2 vext1 <3,u,5,6>, <3,u,5,6>
+  1500040502U,  // <u,5,6,4>: Cost 2 vext1 <3,u,5,6>, RHS
+  2714062935U,  // <u,5,6,5>: Cost 3 vext3 RHS, <5,6,5,7>
+  2712072288U,  // <u,5,6,6>: Cost 3 vext3 RHS, <5,6,6,7>
+    27705344U,  // <u,5,6,7>: Cost 0 copy RHS
+    27705344U,  // <u,5,6,u>: Cost 0 copy RHS
+  1488101478U,  // <u,5,7,0>: Cost 2 vext1 <1,u,5,7>, LHS
+  1488102805U,  // <u,5,7,1>: Cost 2 vext1 <1,u,5,7>, <1,u,5,7>
+  2561844840U,  // <u,5,7,2>: Cost 3 vext1 <1,u,5,7>, <2,2,2,2>
+  2561845398U,  // <u,5,7,3>: Cost 3 vext1 <1,u,5,7>, <3,0,1,2>
+  1488104758U,  // <u,5,7,4>: Cost 2 vext1 <1,u,5,7>, RHS
+  1638330536U,  // <u,5,7,5>: Cost 2 vext3 RHS, <5,7,5,7>
+  2712072362U,  // <u,5,7,6>: Cost 3 vext3 RHS, <5,7,6,0>
+  2042965302U,  // <u,5,7,7>: Cost 2 vtrnr RHS, RHS
+  1488107310U,  // <u,5,7,u>: Cost 2 vext1 <1,u,5,7>, LHS
+  1488109670U,  // <u,5,u,0>: Cost 2 vext1 <1,u,5,u>, LHS
+  1488110998U,  // <u,5,u,1>: Cost 2 vext1 <1,u,5,u>, <1,u,5,u>
+  2561853032U,  // <u,5,u,2>: Cost 3 vext1 <1,u,5,u>, <2,2,2,2>
+  1500056392U,  // <u,5,u,3>: Cost 2 vext1 <3,u,5,u>, <3,u,5,u>
+  1488112950U,  // <u,5,u,4>: Cost 2 vext1 <1,u,5,u>, RHS
+   229035318U,  // <u,5,u,5>: Cost 1 vdup1 RHS
+  2954111490U,  // <u,5,u,6>: Cost 3 vzipr LHS, <3,4,5,6>
+    27705344U,  // <u,5,u,7>: Cost 0 copy RHS
+    27705344U,  // <u,5,u,u>: Cost 0 copy RHS
+  2619211776U,  // <u,6,0,0>: Cost 3 vext2 <0,2,u,6>, <0,0,0,0>
+  1545470054U,  // <u,6,0,1>: Cost 2 vext2 <0,2,u,6>, LHS
+  1545470192U,  // <u,6,0,2>: Cost 2 vext2 <0,2,u,6>, <0,2,u,6>
+  2255958969U,  // <u,6,0,3>: Cost 3 vrev <6,u,3,0>
+  1546797458U,  // <u,6,0,4>: Cost 2 vext2 <0,4,u,6>, <0,4,u,6>
+  2720624971U,  // <u,6,0,5>: Cost 3 vext3 <6,0,5,u>, <6,0,5,u>
+  2256180180U,  // <u,6,0,6>: Cost 3 vrev <6,u,6,0>
+  2960682294U,  // <u,6,0,7>: Cost 3 vzipr <1,2,u,0>, RHS
+  1545470621U,  // <u,6,0,u>: Cost 2 vext2 <0,2,u,6>, LHS
+  1182004127U,  // <u,6,1,0>: Cost 2 vrev <6,u,0,1>
+  2619212596U,  // <u,6,1,1>: Cost 3 vext2 <0,2,u,6>, <1,1,1,1>
+  2619212694U,  // <u,6,1,2>: Cost 3 vext2 <0,2,u,6>, <1,2,3,0>
+  2619212760U,  // <u,6,1,3>: Cost 3 vext2 <0,2,u,6>, <1,3,1,3>
+  2626511979U,  // <u,6,1,4>: Cost 3 vext2 <1,4,u,6>, <1,4,u,6>
+  2619212944U,  // <u,6,1,5>: Cost 3 vext2 <0,2,u,6>, <1,5,3,7>
+  2714063264U,  // <u,6,1,6>: Cost 3 vext3 RHS, <6,1,6,3>
+  2967326006U,  // <u,6,1,7>: Cost 3 vzipr <2,3,u,1>, RHS
+  1182594023U,  // <u,6,1,u>: Cost 2 vrev <6,u,u,1>
+  1506050150U,  // <u,6,2,0>: Cost 2 vext1 <4,u,6,2>, LHS
+  2579792630U,  // <u,6,2,1>: Cost 3 vext1 <4,u,6,2>, <1,0,3,2>
+  2619213416U,  // <u,6,2,2>: Cost 3 vext2 <0,2,u,6>, <2,2,2,2>
+  2619213478U,  // <u,6,2,3>: Cost 3 vext2 <0,2,u,6>, <2,3,0,1>
+  1506053430U,  // <u,6,2,4>: Cost 2 vext1 <4,u,6,2>, RHS
+  2633148309U,  // <u,6,2,5>: Cost 3 vext2 <2,5,u,6>, <2,5,u,6>
+  2619213754U,  // <u,6,2,6>: Cost 3 vext2 <0,2,u,6>, <2,6,3,7>
+  1638330874U,  // <u,6,2,7>: Cost 2 vext3 RHS, <6,2,7,3>
+  1638478339U,  // <u,6,2,u>: Cost 2 vext3 RHS, <6,2,u,3>
+  2619213974U,  // <u,6,3,0>: Cost 3 vext2 <0,2,u,6>, <3,0,1,2>
+  2255836074U,  // <u,6,3,1>: Cost 3 vrev <6,u,1,3>
+  2255909811U,  // <u,6,3,2>: Cost 3 vrev <6,u,2,3>
+  2619214236U,  // <u,6,3,3>: Cost 3 vext2 <0,2,u,6>, <3,3,3,3>
+  1564715549U,  // <u,6,3,4>: Cost 2 vext2 <3,4,u,6>, <3,4,u,6>
+  2639121006U,  // <u,6,3,5>: Cost 3 vext2 <3,5,u,6>, <3,5,u,6>
+  3001847012U,  // <u,6,3,6>: Cost 3 vzipr LHS, <4,4,6,6>
+  1880329526U,  // <u,6,3,7>: Cost 2 vzipr LHS, RHS
+  1880329527U,  // <u,6,3,u>: Cost 2 vzipr LHS, RHS
+  2567864422U,  // <u,6,4,0>: Cost 3 vext1 <2,u,6,4>, LHS
+  2733011558U,  // <u,6,4,1>: Cost 3 vext3 LHS, <6,4,1,3>
+  2567866484U,  // <u,6,4,2>: Cost 3 vext1 <2,u,6,4>, <2,u,6,4>
+  2638458005U,  // <u,6,4,3>: Cost 3 vext2 <3,4,u,6>, <4,3,6,u>
+  1570540772U,  // <u,6,4,4>: Cost 2 vext2 <4,4,6,6>, <4,4,6,6>
+  1545473334U,  // <u,6,4,5>: Cost 2 vext2 <0,2,u,6>, RHS
+  1572015512U,  // <u,6,4,6>: Cost 2 vext2 <4,6,u,6>, <4,6,u,6>
+  2960715062U,  // <u,6,4,7>: Cost 3 vzipr <1,2,u,4>, RHS
+  1545473577U,  // <u,6,4,u>: Cost 2 vext2 <0,2,u,6>, RHS
+  2567872614U,  // <u,6,5,0>: Cost 3 vext1 <2,u,6,5>, LHS
+  2645757648U,  // <u,6,5,1>: Cost 3 vext2 <4,6,u,6>, <5,1,7,3>
+  2567874490U,  // <u,6,5,2>: Cost 3 vext1 <2,u,6,5>, <2,6,3,7>
+  2576501250U,  // <u,6,5,3>: Cost 3 vext1 <4,3,6,5>, <3,4,5,6>
+  1576660943U,  // <u,6,5,4>: Cost 2 vext2 <5,4,u,6>, <5,4,u,6>
+  2645757956U,  // <u,6,5,5>: Cost 3 vext2 <4,6,u,6>, <5,5,5,5>
+  2645758050U,  // <u,6,5,6>: Cost 3 vext2 <4,6,u,6>, <5,6,7,0>
+  2824080694U,  // <u,6,5,7>: Cost 3 vuzpr <0,u,2,6>, RHS
+  1182626795U,  // <u,6,5,u>: Cost 2 vrev <6,u,u,5>
+  1506082918U,  // <u,6,6,0>: Cost 2 vext1 <4,u,6,6>, LHS
+  2579825398U,  // <u,6,6,1>: Cost 3 vext1 <4,u,6,6>, <1,0,3,2>
+  2645758458U,  // <u,6,6,2>: Cost 3 vext2 <4,6,u,6>, <6,2,7,3>
+  2579826838U,  // <u,6,6,3>: Cost 3 vext1 <4,u,6,6>, <3,0,1,2>
+  1506086198U,  // <u,6,6,4>: Cost 2 vext1 <4,u,6,6>, RHS
+  2579828432U,  // <u,6,6,5>: Cost 3 vext1 <4,u,6,6>, <5,1,7,3>
+   296144182U,  // <u,6,6,6>: Cost 1 vdup2 RHS
+  1638331202U,  // <u,6,6,7>: Cost 2 vext3 RHS, <6,6,7,7>
+   296144182U,  // <u,6,6,u>: Cost 1 vdup2 RHS
+   432349286U,  // <u,6,7,0>: Cost 1 vext1 RHS, LHS
+  1506091766U,  // <u,6,7,1>: Cost 2 vext1 RHS, <1,0,3,2>
+  1506092648U,  // <u,6,7,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1506093206U,  // <u,6,7,3>: Cost 2 vext1 RHS, <3,0,1,2>
+   432352809U,  // <u,6,7,4>: Cost 1 vext1 RHS, RHS
+  1506094800U,  // <u,6,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+  1506095610U,  // <u,6,7,6>: Cost 2 vext1 RHS, <6,2,7,3>
+  1906904374U,  // <u,6,7,7>: Cost 2 vzipr RHS, RHS
+   432355118U,  // <u,6,7,u>: Cost 1 vext1 RHS, LHS
+   432357478U,  // <u,6,u,0>: Cost 1 vext1 RHS, LHS
+  1545475886U,  // <u,6,u,1>: Cost 2 vext2 <0,2,u,6>, LHS
+  1506100840U,  // <u,6,u,2>: Cost 2 vext1 RHS, <2,2,2,2>
+  1506101398U,  // <u,6,u,3>: Cost 2 vext1 RHS, <3,0,1,2>
+   432361002U,  // <u,6,u,4>: Cost 1 vext1 RHS, RHS
+  1545476250U,  // <u,6,u,5>: Cost 2 vext2 <0,2,u,6>, RHS
+   296144182U,  // <u,6,u,6>: Cost 1 vdup2 RHS
+  1880370486U,  // <u,6,u,7>: Cost 2 vzipr LHS, RHS
+   432363310U,  // <u,6,u,u>: Cost 1 vext1 RHS, LHS
+  1571356672U,  // <u,7,0,0>: Cost 2 vext2 RHS, <0,0,0,0>
+   497614950U,  // <u,7,0,1>: Cost 1 vext2 RHS, LHS
+  1571356836U,  // <u,7,0,2>: Cost 2 vext2 RHS, <0,2,0,2>
+  2573880146U,  // <u,7,0,3>: Cost 3 vext1 <3,u,7,0>, <3,u,7,0>
+  1571357010U,  // <u,7,0,4>: Cost 2 vext2 RHS, <0,4,1,5>
+  1512083716U,  // <u,7,0,5>: Cost 2 vext1 <5,u,7,0>, <5,u,7,0>
+  2621874741U,  // <u,7,0,6>: Cost 3 vext2 <0,6,u,7>, <0,6,u,7>
+  2585826298U,  // <u,7,0,7>: Cost 3 vext1 <5,u,7,0>, <7,0,1,2>
+   497615517U,  // <u,7,0,u>: Cost 1 vext2 RHS, LHS
+  1571357430U,  // <u,7,1,0>: Cost 2 vext2 RHS, <1,0,3,2>
+  1571357492U,  // <u,7,1,1>: Cost 2 vext2 RHS, <1,1,1,1>
+  1571357590U,  // <u,7,1,2>: Cost 2 vext2 RHS, <1,2,3,0>
+  1552114715U,  // <u,7,1,3>: Cost 2 vext2 <1,3,u,7>, <1,3,u,7>
+  2573888822U,  // <u,7,1,4>: Cost 3 vext1 <3,u,7,1>, RHS
+  1553441981U,  // <u,7,1,5>: Cost 2 vext2 <1,5,u,7>, <1,5,u,7>
+  2627847438U,  // <u,7,1,6>: Cost 3 vext2 <1,6,u,7>, <1,6,u,7>
+  2727408775U,  // <u,7,1,7>: Cost 3 vext3 <7,1,7,u>, <7,1,7,u>
+  1555432880U,  // <u,7,1,u>: Cost 2 vext2 <1,u,u,7>, <1,u,u,7>
+  2629838337U,  // <u,7,2,0>: Cost 3 vext2 <2,0,u,7>, <2,0,u,7>
+  1188058754U,  // <u,7,2,1>: Cost 2 vrev <7,u,1,2>
+  1571358312U,  // <u,7,2,2>: Cost 2 vext2 RHS, <2,2,2,2>
+  1571358374U,  // <u,7,2,3>: Cost 2 vext2 RHS, <2,3,0,1>
+  2632492869U,  // <u,7,2,4>: Cost 3 vext2 <2,4,u,7>, <2,4,u,7>
+  2633156502U,  // <u,7,2,5>: Cost 3 vext2 <2,5,u,7>, <2,5,u,7>
+  1560078311U,  // <u,7,2,6>: Cost 2 vext2 <2,6,u,7>, <2,6,u,7>
+  2728072408U,  // <u,7,2,7>: Cost 3 vext3 <7,2,7,u>, <7,2,7,u>
+  1561405577U,  // <u,7,2,u>: Cost 2 vext2 <2,u,u,7>, <2,u,u,7>
+  1571358870U,  // <u,7,3,0>: Cost 2 vext2 RHS, <3,0,1,2>
+  2627184913U,  // <u,7,3,1>: Cost 3 vext2 <1,5,u,7>, <3,1,5,u>
+  2633820523U,  // <u,7,3,2>: Cost 3 vext2 <2,6,u,7>, <3,2,6,u>
+  1571359132U,  // <u,7,3,3>: Cost 2 vext2 RHS, <3,3,3,3>
+  1571359234U,  // <u,7,3,4>: Cost 2 vext2 RHS, <3,4,5,6>
+  1512108295U,  // <u,7,3,5>: Cost 2 vext1 <5,u,7,3>, <5,u,7,3>
+  1518080992U,  // <u,7,3,6>: Cost 2 vext1 <6,u,7,3>, <6,u,7,3>
+  2640456465U,  // <u,7,3,7>: Cost 3 vext2 <3,7,u,7>, <3,7,u,7>
+  1571359518U,  // <u,7,3,u>: Cost 2 vext2 RHS, <3,u,1,2>
+  1571359634U,  // <u,7,4,0>: Cost 2 vext2 RHS, <4,0,5,1>
+  2573911067U,  // <u,7,4,1>: Cost 3 vext1 <3,u,7,4>, <1,3,u,7>
+  2645101622U,  // <u,7,4,2>: Cost 3 vext2 RHS, <4,2,5,3>
+  2573912918U,  // <u,7,4,3>: Cost 3 vext1 <3,u,7,4>, <3,u,7,4>
+  1571359952U,  // <u,7,4,4>: Cost 2 vext2 RHS, <4,4,4,4>
+   497618248U,  // <u,7,4,5>: Cost 1 vext2 RHS, RHS
+  1571360116U,  // <u,7,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  2645102024U,  // <u,7,4,7>: Cost 3 vext2 RHS, <4,7,5,0>
+   497618473U,  // <u,7,4,u>: Cost 1 vext2 RHS, RHS
+  2645102152U,  // <u,7,5,0>: Cost 3 vext2 RHS, <5,0,1,2>
+  1571360464U,  // <u,7,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  2645102334U,  // <u,7,5,2>: Cost 3 vext2 RHS, <5,2,3,4>
+  2645102447U,  // <u,7,5,3>: Cost 3 vext2 RHS, <5,3,7,0>
+  1571360710U,  // <u,7,5,4>: Cost 2 vext2 RHS, <5,4,7,6>
+  1571360772U,  // <u,7,5,5>: Cost 2 vext2 RHS, <5,5,5,5>
+  1571360866U,  // <u,7,5,6>: Cost 2 vext2 RHS, <5,6,7,0>
+  1571360936U,  // <u,7,5,7>: Cost 2 vext2 RHS, <5,7,5,7>
+  1571361017U,  // <u,7,5,u>: Cost 2 vext2 RHS, <5,u,5,7>
+  1530044518U,  // <u,7,6,0>: Cost 2 vext1 <u,u,7,6>, LHS
+  2645103016U,  // <u,7,6,1>: Cost 3 vext2 RHS, <6,1,7,2>
+  1571361274U,  // <u,7,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  2645103154U,  // <u,7,6,3>: Cost 3 vext2 RHS, <6,3,4,5>
+  1530047798U,  // <u,7,6,4>: Cost 2 vext1 <u,u,7,6>, RHS
+  1188386474U,  // <u,7,6,5>: Cost 2 vrev <7,u,5,6>
+  1571361592U,  // <u,7,6,6>: Cost 2 vext2 RHS, <6,6,6,6>
+  1571361614U,  // <u,7,6,7>: Cost 2 vext2 RHS, <6,7,0,1>
+  1571361695U,  // <u,7,6,u>: Cost 2 vext2 RHS, <6,u,0,1>
+  1571361786U,  // <u,7,7,0>: Cost 2 vext2 RHS, <7,0,1,2>
+  2573935616U,  // <u,7,7,1>: Cost 3 vext1 <3,u,7,7>, <1,3,5,7>
+  2645103781U,  // <u,7,7,2>: Cost 3 vext2 RHS, <7,2,2,2>
+  2573937497U,  // <u,7,7,3>: Cost 3 vext1 <3,u,7,7>, <3,u,7,7>
+  1571362150U,  // <u,7,7,4>: Cost 2 vext2 RHS, <7,4,5,6>
+  1512141067U,  // <u,7,7,5>: Cost 2 vext1 <5,u,7,7>, <5,u,7,7>
+  1518113764U,  // <u,7,7,6>: Cost 2 vext1 <6,u,7,7>, <6,u,7,7>
+   363253046U,  // <u,7,7,7>: Cost 1 vdup3 RHS
+   363253046U,  // <u,7,7,u>: Cost 1 vdup3 RHS
+  1571362515U,  // <u,7,u,0>: Cost 2 vext2 RHS, <u,0,1,2>
+   497620782U,  // <u,7,u,1>: Cost 1 vext2 RHS, LHS
+  1571362693U,  // <u,7,u,2>: Cost 2 vext2 RHS, <u,2,3,0>
+  1571362748U,  // <u,7,u,3>: Cost 2 vext2 RHS, <u,3,0,1>
+  1571362879U,  // <u,7,u,4>: Cost 2 vext2 RHS, <u,4,5,6>
+   497621146U,  // <u,7,u,5>: Cost 1 vext2 RHS, RHS
+  1571363024U,  // <u,7,u,6>: Cost 2 vext2 RHS, <u,6,3,7>
+   363253046U,  // <u,7,u,7>: Cost 1 vdup3 RHS
+   497621349U,  // <u,7,u,u>: Cost 1 vext2 RHS, LHS
+   135053414U,  // <u,u,0,0>: Cost 1 vdup0 LHS
+   471081121U,  // <u,u,0,1>: Cost 1 vext2 LHS, LHS
+  1544822948U,  // <u,u,0,2>: Cost 2 vext2 LHS, <0,2,0,2>
+  1616140005U,  // <u,u,0,3>: Cost 2 vext3 LHS, <u,0,3,2>
+  1544823122U,  // <u,u,0,4>: Cost 2 vext2 LHS, <0,4,1,5>
+  1512157453U,  // <u,u,0,5>: Cost 2 vext1 <5,u,u,0>, <5,u,u,0>
+  1662220032U,  // <u,u,0,6>: Cost 2 vext3 RHS, <u,0,6,2>
+  1194457487U,  // <u,u,0,7>: Cost 2 vrev <u,u,7,0>
+   471081629U,  // <u,u,0,u>: Cost 1 vext2 LHS, LHS
+  1544823542U,  // <u,u,1,0>: Cost 2 vext2 LHS, <1,0,3,2>
+   202162278U,  // <u,u,1,1>: Cost 1 vdup1 LHS
+   537753390U,  // <u,u,1,2>: Cost 1 vext3 LHS, LHS
+  1544823768U,  // <u,u,1,3>: Cost 2 vext2 LHS, <1,3,1,3>
+  1494248758U,  // <u,u,1,4>: Cost 2 vext1 <2,u,u,1>, RHS
+  1544823952U,  // <u,u,1,5>: Cost 2 vext2 LHS, <1,5,3,7>
+  1518138343U,  // <u,u,1,6>: Cost 2 vext1 <6,u,u,1>, <6,u,u,1>
+  1640322907U,  // <u,u,1,7>: Cost 2 vext3 RHS, <u,1,7,3>
+   537753444U,  // <u,u,1,u>: Cost 1 vext3 LHS, LHS
+  1482309734U,  // <u,u,2,0>: Cost 2 vext1 <0,u,u,2>, LHS
+  1194031451U,  // <u,u,2,1>: Cost 2 vrev <u,u,1,2>
+   269271142U,  // <u,u,2,2>: Cost 1 vdup2 LHS
+      835584U,  // <u,u,2,3>: Cost 0 copy LHS
+  1482313014U,  // <u,u,2,4>: Cost 2 vext1 <0,u,u,2>, RHS
+  2618566504U,  // <u,u,2,5>: Cost 3 vext2 LHS, <2,5,3,6>
+  1544824762U,  // <u,u,2,6>: Cost 2 vext2 LHS, <2,6,3,7>
+  1638479788U,  // <u,u,2,7>: Cost 2 vext3 RHS, <u,2,7,3>
+      835584U,  // <u,u,2,u>: Cost 0 copy LHS
+   408576723U,  // <u,u,3,0>: Cost 1 vext1 LHS, LHS
+  1482318582U,  // <u,u,3,1>: Cost 2 vext1 LHS, <1,0,3,2>
+   120371557U,  // <u,u,3,2>: Cost 1 vrev LHS
+   336380006U,  // <u,u,3,3>: Cost 1 vdup3 LHS
+   408579382U,  // <u,u,3,4>: Cost 1 vext1 LHS, RHS
+  1616140271U,  // <u,u,3,5>: Cost 2 vext3 LHS, <u,3,5,7>
+  1530098170U,  // <u,u,3,6>: Cost 2 vext1 LHS, <6,2,7,3>
+  1880329544U,  // <u,u,3,7>: Cost 2 vzipr LHS, RHS
+   408581934U,  // <u,u,3,u>: Cost 1 vext1 LHS, LHS
+  1488298086U,  // <u,u,4,0>: Cost 2 vext1 <1,u,u,4>, LHS
+  1488299437U,  // <u,u,4,1>: Cost 2 vext1 <1,u,u,4>, <1,u,u,4>
+  1659271204U,  // <u,u,4,2>: Cost 2 vext3 LHS, <u,4,2,6>
+  1194195311U,  // <u,u,4,3>: Cost 2 vrev <u,u,3,4>
+   161926454U,  // <u,u,4,4>: Cost 1 vdup0 RHS
+   471084342U,  // <u,u,4,5>: Cost 1 vext2 LHS, RHS
+  1571368308U,  // <u,u,4,6>: Cost 2 vext2 RHS, <4,6,4,6>
+  1640323153U,  // <u,u,4,7>: Cost 2 vext3 RHS, <u,4,7,6>
+   471084585U,  // <u,u,4,u>: Cost 1 vext2 LHS, RHS
+  1494278246U,  // <u,u,5,0>: Cost 2 vext1 <2,u,u,5>, LHS
+  1571368656U,  // <u,u,5,1>: Cost 2 vext2 RHS, <5,1,7,3>
+  1494280327U,  // <u,u,5,2>: Cost 2 vext1 <2,u,u,5>, <2,u,u,5>
+  1616140415U,  // <u,u,5,3>: Cost 2 vext3 LHS, <u,5,3,7>
+  1494281526U,  // <u,u,5,4>: Cost 2 vext1 <2,u,u,5>, RHS
+   229035318U,  // <u,u,5,5>: Cost 1 vdup1 RHS
+   537753754U,  // <u,u,5,6>: Cost 1 vext3 LHS, RHS
+  1750355254U,  // <u,u,5,7>: Cost 2 vuzpr LHS, RHS
+   537753772U,  // <u,u,5,u>: Cost 1 vext3 LHS, RHS
+  1482342502U,  // <u,u,6,0>: Cost 2 vext1 <0,u,u,6>, LHS
+  2556084982U,  // <u,u,6,1>: Cost 3 vext1 <0,u,u,6>, <1,0,3,2>
+  1571369466U,  // <u,u,6,2>: Cost 2 vext2 RHS, <6,2,7,3>
+  1611938000U,  // <u,u,6,3>: Cost 2 vext3 LHS, <u,6,3,7>
+  1482345782U,  // <u,u,6,4>: Cost 2 vext1 <0,u,u,6>, RHS
+  1194359171U,  // <u,u,6,5>: Cost 2 vrev <u,u,5,6>
+   296144182U,  // <u,u,6,6>: Cost 1 vdup2 RHS
+    27705344U,  // <u,u,6,7>: Cost 0 copy RHS
+    27705344U,  // <u,u,6,u>: Cost 0 copy RHS
+   432496742U,  // <u,u,7,0>: Cost 1 vext1 RHS, LHS
+  1488324016U,  // <u,u,7,1>: Cost 2 vext1 <1,u,u,7>, <1,u,u,7>
+  1494296713U,  // <u,u,7,2>: Cost 2 vext1 <2,u,u,7>, <2,u,u,7>
+  1906901148U,  // <u,u,7,3>: Cost 2 vzipr RHS, LHS
+   432500283U,  // <u,u,7,4>: Cost 1 vext1 RHS, RHS
+  1506242256U,  // <u,u,7,5>: Cost 2 vext1 RHS, <5,1,7,3>
+   120699277U,  // <u,u,7,6>: Cost 1 vrev RHS
+   363253046U,  // <u,u,7,7>: Cost 1 vdup3 RHS
+   432502574U,  // <u,u,7,u>: Cost 1 vext1 RHS, LHS
+   408617688U,  // <u,u,u,0>: Cost 1 vext1 LHS, LHS
+   471086894U,  // <u,u,u,1>: Cost 1 vext2 LHS, LHS
+   537753957U,  // <u,u,u,2>: Cost 1 vext3 LHS, LHS
+      835584U,  // <u,u,u,3>: Cost 0 copy LHS
+   408620342U,  // <u,u,u,4>: Cost 1 vext1 LHS, RHS
+   471087258U,  // <u,u,u,5>: Cost 1 vext2 LHS, RHS
+   537753997U,  // <u,u,u,6>: Cost 1 vext3 LHS, RHS
+    27705344U,  // <u,u,u,7>: Cost 0 copy RHS
+      835584U,  // <u,u,u,u>: Cost 0 copy LHS
+  0
+};
diff --git a/final/lib/Target/ARM/ARMRegisterInfo.cpp b/final/lib/Target/ARM/ARMRegisterInfo.cpp
new file mode 100644
index 00000000000..ad51bc13edf
--- /dev/null
+++ b/final/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -0,0 +1,40 @@
+//===- ARMRegisterInfo.cpp - ARM Register Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii,
+                                 const ARMSubtarget &sti)
+  : ARMBaseRegisterInfo(tii, sti) {
+}
diff --git a/final/lib/Target/ARM/ARMRegisterInfo.h b/final/lib/Target/ARM/ARMRegisterInfo.h
new file mode 100644
index 00000000000..8edfb9a2057
--- /dev/null
+++ b/final/lib/Target/ARM/ARMRegisterInfo.h
@@ -0,0 +1,33 @@
+//===- ARMRegisterInfo.h - ARM Register Information Impl --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMREGISTERINFO_H
+#define ARMREGISTERINFO_H
+
+#include "ARM.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "ARMBaseRegisterInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+  class ARMBaseInstrInfo;
+  class Type;
+
+struct ARMRegisterInfo : public ARMBaseRegisterInfo {
+public:
+  ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/ARM/ARMRegisterInfo.td b/final/lib/Target/ARM/ARMRegisterInfo.td
new file mode 100644
index 00000000000..22d15b572dd
--- /dev/null
+++ b/final/lib/Target/ARM/ARMRegisterInfo.td
@@ -0,0 +1,559 @@
+//===- ARMRegisterInfo.td - ARM Register defs --------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the ARM register file
+//===----------------------------------------------------------------------===//
+
+// Registers are identified with 4-bit ID numbers.
+class ARMReg<bits<4> num, string n, list<Register> subregs = []> : Register<n> {
+  field bits<4> Num;
+  let Namespace = "ARM";
+  let SubRegs = subregs;
+}
+
+class ARMFReg<bits<6> num, string n> : Register<n> {
+  field bits<6> Num;
+  let Namespace = "ARM";
+}
+
+// Subregister indices.
+let Namespace = "ARM" in {
+// Note: Code depends on these having consecutive numbers.
+def ssub_0  : SubRegIndex;
+def ssub_1  : SubRegIndex;
+def ssub_2  : SubRegIndex; // In a Q reg.
+def ssub_3  : SubRegIndex;
+def ssub_4  : SubRegIndex; // In a QQ reg.
+def ssub_5  : SubRegIndex;
+def ssub_6  : SubRegIndex;
+def ssub_7  : SubRegIndex;
+def ssub_8  : SubRegIndex; // In a QQQQ reg.
+def ssub_9  : SubRegIndex;
+def ssub_10 : SubRegIndex;
+def ssub_11 : SubRegIndex;
+def ssub_12 : SubRegIndex;
+def ssub_13 : SubRegIndex;
+def ssub_14 : SubRegIndex;
+def ssub_15 : SubRegIndex;
+
+def dsub_0 : SubRegIndex;
+def dsub_1 : SubRegIndex;
+def dsub_2 : SubRegIndex;
+def dsub_3 : SubRegIndex;
+def dsub_4 : SubRegIndex;
+def dsub_5 : SubRegIndex;
+def dsub_6 : SubRegIndex;
+def dsub_7 : SubRegIndex;
+
+def qsub_0 : SubRegIndex;
+def qsub_1 : SubRegIndex;
+def qsub_2 : SubRegIndex;
+def qsub_3 : SubRegIndex;
+
+def qqsub_0 : SubRegIndex;
+def qqsub_1 : SubRegIndex;
+}
+
+// Integer registers
+def R0  : ARMReg< 0, "r0">,  DwarfRegNum<[0]>;
+def R1  : ARMReg< 1, "r1">,  DwarfRegNum<[1]>;
+def R2  : ARMReg< 2, "r2">,  DwarfRegNum<[2]>;
+def R3  : ARMReg< 3, "r3">,  DwarfRegNum<[3]>;
+def R4  : ARMReg< 4, "r4">,  DwarfRegNum<[4]>;
+def R5  : ARMReg< 5, "r5">,  DwarfRegNum<[5]>;
+def R6  : ARMReg< 6, "r6">,  DwarfRegNum<[6]>;
+def R7  : ARMReg< 7, "r7">,  DwarfRegNum<[7]>;
+def R8  : ARMReg< 8, "r8">,  DwarfRegNum<[8]>;
+def R9  : ARMReg< 9, "r9">,  DwarfRegNum<[9]>;
+def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>;
+def R11 : ARMReg<11, "r11">, DwarfRegNum<[11]>;
+def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>;
+def SP  : ARMReg<13, "sp">,  DwarfRegNum<[13]>;
+def LR  : ARMReg<14, "lr">,  DwarfRegNum<[14]>;
+def PC  : ARMReg<15, "pc">,  DwarfRegNum<[15]>;
+
+// Float registers
+def S0  : ARMFReg< 0, "s0">;  def S1  : ARMFReg< 1, "s1">;
+def S2  : ARMFReg< 2, "s2">;  def S3  : ARMFReg< 3, "s3">;
+def S4  : ARMFReg< 4, "s4">;  def S5  : ARMFReg< 5, "s5">;
+def S6  : ARMFReg< 6, "s6">;  def S7  : ARMFReg< 7, "s7">;
+def S8  : ARMFReg< 8, "s8">;  def S9  : ARMFReg< 9, "s9">;
+def S10 : ARMFReg<10, "s10">; def S11 : ARMFReg<11, "s11">;
+def S12 : ARMFReg<12, "s12">; def S13 : ARMFReg<13, "s13">;
+def S14 : ARMFReg<14, "s14">; def S15 : ARMFReg<15, "s15">;
+def S16 : ARMFReg<16, "s16">; def S17 : ARMFReg<17, "s17">;
+def S18 : ARMFReg<18, "s18">; def S19 : ARMFReg<19, "s19">;
+def S20 : ARMFReg<20, "s20">; def S21 : ARMFReg<21, "s21">;
+def S22 : ARMFReg<22, "s22">; def S23 : ARMFReg<23, "s23">;
+def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
+def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
+def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
+def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+let SubRegIndices = [ssub_0, ssub_1] in {
+def D0  : ARMReg< 0,  "d0", [S0,   S1]>;
+def D1  : ARMReg< 1,  "d1", [S2,   S3]>;
+def D2  : ARMReg< 2,  "d2", [S4,   S5]>;
+def D3  : ARMReg< 3,  "d3", [S6,   S7]>;
+def D4  : ARMReg< 4,  "d4", [S8,   S9]>;
+def D5  : ARMReg< 5,  "d5", [S10, S11]>;
+def D6  : ARMReg< 6,  "d6", [S12, S13]>;
+def D7  : ARMReg< 7,  "d7", [S14, S15]>;
+def D8  : ARMReg< 8,  "d8", [S16, S17]>;
+def D9  : ARMReg< 9,  "d9", [S18, S19]>;
+def D10 : ARMReg<10, "d10", [S20, S21]>;
+def D11 : ARMReg<11, "d11", [S22, S23]>;
+def D12 : ARMReg<12, "d12", [S24, S25]>;
+def D13 : ARMReg<13, "d13", [S26, S27]>;
+def D14 : ARMReg<14, "d14", [S28, S29]>;
+def D15 : ARMReg<15, "d15", [S30, S31]>;
+}
+
+// VFP3 defines 16 additional double registers
+def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d17">;
+def D18 : ARMFReg<18, "d18">; def D19 : ARMFReg<19, "d19">;
+def D20 : ARMFReg<20, "d20">; def D21 : ARMFReg<21, "d21">;
+def D22 : ARMFReg<22, "d22">; def D23 : ARMFReg<23, "d23">;
+def D24 : ARMFReg<24, "d24">; def D25 : ARMFReg<25, "d25">;
+def D26 : ARMFReg<26, "d26">; def D27 : ARMFReg<27, "d27">;
+def D28 : ARMFReg<28, "d28">; def D29 : ARMFReg<29, "d29">;
+def D30 : ARMFReg<30, "d30">; def D31 : ARMFReg<31, "d31">;
+
+// Advanced SIMD (NEON) defines 16 quad-word aliases
+let SubRegIndices = [dsub_0, dsub_1],
+ CompositeIndices = [(ssub_2 dsub_1, ssub_0),
+                     (ssub_3 dsub_1, ssub_1)] in {
+def Q0  : ARMReg< 0,  "q0", [D0,   D1]>;
+def Q1  : ARMReg< 1,  "q1", [D2,   D3]>;
+def Q2  : ARMReg< 2,  "q2", [D4,   D5]>;
+def Q3  : ARMReg< 3,  "q3", [D6,   D7]>;
+def Q4  : ARMReg< 4,  "q4", [D8,   D9]>;
+def Q5  : ARMReg< 5,  "q5", [D10, D11]>;
+def Q6  : ARMReg< 6,  "q6", [D12, D13]>;
+def Q7  : ARMReg< 7,  "q7", [D14, D15]>;
+}
+let SubRegIndices = [dsub_0, dsub_1] in {
+def Q8  : ARMReg< 8,  "q8", [D16, D17]>;
+def Q9  : ARMReg< 9,  "q9", [D18, D19]>;
+def Q10 : ARMReg<10, "q10", [D20, D21]>;
+def Q11 : ARMReg<11, "q11", [D22, D23]>;
+def Q12 : ARMReg<12, "q12", [D24, D25]>;
+def Q13 : ARMReg<13, "q13", [D26, D27]>;
+def Q14 : ARMReg<14, "q14", [D28, D29]>;
+def Q15 : ARMReg<15, "q15", [D30, D31]>;
+}
+
+// Pseudo 256-bit registers to represent pairs of Q registers. These should
+// never be present in the emitted code.
+// These are used for NEON load / store instructions, e.g., vld4, vst3.
+// NOTE: It's possible to define more QQ registers since technically the
+// starting D register number doesn't have to be multiple of 4, e.g.,
+// D1, D2, D3, D4 would be a legal quad, but that would make the subregister
+// stuff very messy.
+let SubRegIndices = [qsub_0, qsub_1] in {
+let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1),
+                        (ssub_4 qsub_1, ssub_0), (ssub_5 qsub_1, ssub_1),
+                        (ssub_6 qsub_1, ssub_2), (ssub_7 qsub_1, ssub_3)] in {
+def QQ0 : ARMReg<0, "qq0", [Q0,  Q1]>;
+def QQ1 : ARMReg<1, "qq1", [Q2,  Q3]>;
+def QQ2 : ARMReg<2, "qq2", [Q4,  Q5]>;
+def QQ3 : ARMReg<3, "qq3", [Q6,  Q7]>;
+}
+let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in {
+def QQ4 : ARMReg<4, "qq4", [Q8,  Q9]>;
+def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>;
+def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>;
+def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>;
+}
+}
+
+// Pseudo 512-bit registers to represent four consecutive Q registers.
+let SubRegIndices = [qqsub_0, qqsub_1] in {
+let CompositeIndices = [(qsub_2  qqsub_1, qsub_0), (qsub_3  qqsub_1, qsub_1),
+                        (dsub_4  qqsub_1, dsub_0), (dsub_5  qqsub_1, dsub_1),
+                        (dsub_6  qqsub_1, dsub_2), (dsub_7  qqsub_1, dsub_3),
+                        (ssub_8  qqsub_1, ssub_0), (ssub_9  qqsub_1, ssub_1),
+                        (ssub_10 qqsub_1, ssub_2), (ssub_11 qqsub_1, ssub_3),
+                        (ssub_12 qqsub_1, ssub_4), (ssub_13 qqsub_1, ssub_5),
+                        (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in
+{
+def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>;
+def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>;
+}
+let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
+                        (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1),
+                        (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in {
+def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>;
+def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
+}
+}
+
+// Current Program Status Register.
+def CPSR    : ARMReg<0, "cpsr">;
+def FPSCR   : ARMReg<1, "fpscr">;
+def ITSTATE : ARMReg<2, "itstate">;
+
+// Special Registers - only available in privileged mode.
+def FPSID   : ARMReg<0, "fpsid">;
+def FPEXC   : ARMReg<8, "fpexc">;
+
+// Register classes.
+//
+// pc  == Program Counter
+// lr  == Link Register
+// sp  == Stack Pointer
+// r12 == ip (scratch)
+// r7  == Frame Pointer (thumb-style backtraces)
+// r9  == May be reserved as Thread Register
+// r11 == Frame Pointer (arm-style backtraces)
+// r10 == Stack Limit
+//
+def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
+                                           R7, R8, R9, R10, R11, R12,
+                                           SP, LR, PC]> {
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned ARM_GPR_AO[] = {
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+      ARM::R12,ARM::LR,
+      ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+      ARM::R8, ARM::R9, ARM::R10, ARM::R11 };
+
+    // For Thumb1 mode, we don't want to allocate hi regs at all, as we
+    // don't know how to spill them. If we make our prologue/epilogue code
+    // smarter at some point, we can go back to using the above allocation
+    // orders for the Thumb1 instructions that know how to use hi regs.
+    static const unsigned THUMB_GPR_AO[] = {
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+      ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
+
+    GPRClass::iterator
+    GPRClass::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+      if (Subtarget.isThumb1Only())
+        return THUMB_GPR_AO;
+      return ARM_GPR_AO;
+    }
+
+    GPRClass::iterator
+    GPRClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+      if (Subtarget.isThumb1Only())
+        return THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
+      return ARM_GPR_AO + (sizeof(ARM_GPR_AO)/sizeof(unsigned));
+    }
+  }];
+}
+
+// restricted GPR register class. Many Thumb2 instructions allow the full
+// register range for operands, but have undefined behaviours when PC
+// or SP (R13 or R15) are used. The ARM ISA refers to these operands
+// via the BadReg() pseudo-code description.
+def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
+                                            R7, R8, R9, R10, R11, R12, LR]> {
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned ARM_rGPR_AO[] = {
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+      ARM::R12,ARM::LR,
+      ARM::R4, ARM::R5, ARM::R6, ARM::R7,
+      ARM::R8, ARM::R9, ARM::R10,
+      ARM::R11 };
+
+    // For Thumb1 mode, we don't want to allocate hi regs at all, as we
+    // don't know how to spill them. If we make our prologue/epilogue code
+    // smarter at some point, we can go back to using the above allocation
+    // orders for the Thumb1 instructions that know how to use hi regs.
+    static const unsigned THUMB_rGPR_AO[] = {
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+      ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
+
+    rGPRClass::iterator
+    rGPRClass::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+      if (Subtarget.isThumb1Only())
+        return THUMB_rGPR_AO;
+      return ARM_rGPR_AO;
+    }
+
+    rGPRClass::iterator
+    rGPRClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+
+      if (Subtarget.isThumb1Only())
+        return THUMB_rGPR_AO + (sizeof(THUMB_rGPR_AO)/sizeof(unsigned));
+      return ARM_rGPR_AO + (sizeof(ARM_rGPR_AO)/sizeof(unsigned));
+    }
+  }];
+}
+
+// Thumb registers are R0-R7 normally. Some instructions can still use
+// the general GPR register class above (MOV, e.g.)
+def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {}
+
+// For tail calls, we can't use callee-saved registers, as they are restored
+// to the saved value before the tail call, which would clobber a call address.
+// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
+// this class and the preceding one(!)  This is what we want.
+def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> {
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    // R9 is available.
+    static const unsigned ARM_GPR_R9_TC[] = {
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+      ARM::R9, ARM::R12 };
+    // R9 is not available.
+    static const unsigned ARM_GPR_NOR9_TC[] = {
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+      ARM::R12 };
+
+    // For Thumb1 mode, we don't want to allocate hi regs at all, as we
+    // don't know how to spill them. If we make our prologue/epilogue code
+    // smarter at some point, we can go back to using the above allocation
+    // orders for the Thumb1 instructions that know how to use hi regs.
+    static const unsigned THUMB_GPR_AO_TC[] = {
+      ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+    tcGPRClass::iterator
+    tcGPRClass::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+      if (Subtarget.isThumb1Only())
+        return THUMB_GPR_AO_TC;
+      return Subtarget.isTargetDarwin() ? ARM_GPR_R9_TC : ARM_GPR_NOR9_TC;
+    }
+
+    tcGPRClass::iterator
+    tcGPRClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+
+      if (Subtarget.isThumb1Only())
+        return THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
+
+      return Subtarget.isTargetDarwin() ?
+        ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)) :
+        ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
+    }
+  }];
+}
+
+
+// Scalar single precision floating point register class..
+def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
+  S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22,
+  S23, S24, S25, S26, S27, S28, S29, S30, S31]>;
+
+// Subset of SPR which can be used as a source of NEON scalars for 16-bit
+// operations
+def SPR_8 : RegisterClass<"ARM", [f32], 32,
+                          [S0, S1,  S2,  S3,  S4,  S5,  S6,  S7,
+                           S8, S9, S10, S11, S12, S13, S14, S15]>;
+
+// Scalar double precision floating point / generic 64-bit vector register
+// class.
+// ARM requires only word alignment for double. It's more performant if it
+// is double-word alignment though.
+def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+                        [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
+                         D8,  D9,  D10, D11, D12, D13, D14, D15,
+                         D16, D17, D18, D19, D20, D21, D22, D23,
+                         D24, D25, D26, D27, D28, D29, D30, D31]> {
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    // VFP2 / VFPv3-D16
+    static const unsigned ARM_DPR_VFP2[] = {
+      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
+      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,
+      ARM::D8,  ARM::D9,  ARM::D10, ARM::D11,
+      ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
+    // VFP3: D8-D15 are callee saved and should be allocated last.
+    // Save other low registers for use as DPR_VFP2 and DPR_8 classes.
+    static const unsigned ARM_DPR_VFP3[] = {
+      ARM::D16, ARM::D17, ARM::D18, ARM::D19,
+      ARM::D20, ARM::D21, ARM::D22, ARM::D23,
+      ARM::D24, ARM::D25, ARM::D26, ARM::D27,
+      ARM::D28, ARM::D29, ARM::D30, ARM::D31,
+      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
+      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,
+      ARM::D8,  ARM::D9,  ARM::D10, ARM::D11,
+      ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
+
+    DPRClass::iterator
+    DPRClass::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+      if (Subtarget.hasVFP3() && !Subtarget.hasD16())
+        return ARM_DPR_VFP3;
+      return ARM_DPR_VFP2;
+    }
+
+    DPRClass::iterator
+    DPRClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+      if (Subtarget.hasVFP3() && !Subtarget.hasD16())
+        return ARM_DPR_VFP3 + (sizeof(ARM_DPR_VFP3)/sizeof(unsigned));
+      else
+        return ARM_DPR_VFP2 + (sizeof(ARM_DPR_VFP2)/sizeof(unsigned));
+    }
+  }];
+}
+
+// Subset of DPR that are accessible with VFP2 (and so that also have
+// 32-bit SPR subregs).
+def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+                             [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
+                              D8,  D9,  D10, D11, D12, D13, D14, D15]> {
+  let SubRegClasses = [(SPR ssub_0, ssub_1)];
+}
+
+// Subset of DPR which can be used as a source of NEON scalars for 16-bit
+// operations
+def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+                          [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7]> {
+  let SubRegClasses = [(SPR_8 ssub_0, ssub_1)];
+}
+
+// Generic 128-bit vector register class.
+def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
+                        [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7,
+                         Q8,  Q9,  Q10, Q11, Q12, Q13, Q14, Q15]> {
+  let SubRegClasses = [(DPR dsub_0, dsub_1)];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    // Q4-Q7 are callee saved and should be allocated last.
+    // Save other low registers for use as QPR_VFP2 and QPR_8 classes.
+    static const unsigned ARM_QPR[] = {
+      ARM::Q8,  ARM::Q9,  ARM::Q10, ARM::Q11,
+      ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15,
+      ARM::Q0,  ARM::Q1,  ARM::Q2,  ARM::Q3,
+      ARM::Q4,  ARM::Q5,  ARM::Q6,  ARM::Q7 };
+
+    QPRClass::iterator
+    QPRClass::allocation_order_begin(const MachineFunction &MF) const {
+      return ARM_QPR;
+    }
+
+    QPRClass::iterator
+    QPRClass::allocation_order_end(const MachineFunction &MF) const {
+      return ARM_QPR + (sizeof(ARM_QPR)/sizeof(unsigned));
+    }
+  }];
+}
+
+// Subset of QPR that have 32-bit SPR subregs.
+def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                             128,
+                             [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7]> {
+  let SubRegClasses = [(SPR      ssub_0, ssub_1, ssub_2, ssub_3),
+                       (DPR_VFP2 dsub_0, dsub_1)];
+}
+
+// Subset of QPR that have DPR_8 and SPR_8 subregs.
+def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                           128,
+                           [Q0,  Q1,  Q2,  Q3]> {
+  let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3),
+                       (DPR_8 dsub_0, dsub_1)];
+}
+
+// Pseudo 256-bit vector register class to model pairs of Q registers
+// (4 consecutive D registers).
+def QQPR : RegisterClass<"ARM", [v4i64],
+                         256,
+                         [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7]> {
+  let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
+                       (QPR qsub_0, qsub_1)];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    // QQ2-QQ3 are callee saved and should be allocated last.
+    // Save other low registers for use as QPR_VFP2 and QPR_8 classes.
+    static const unsigned ARM_QQPR[] = {
+      ARM::QQ4, ARM::QQ5, ARM::QQ6, ARM::QQ7,
+      ARM::QQ0, ARM::QQ1, ARM::QQ2, ARM::QQ3 };
+
+    QQPRClass::iterator
+    QQPRClass::allocation_order_begin(const MachineFunction &MF) const {
+      return ARM_QQPR;
+    }
+
+    QQPRClass::iterator
+    QQPRClass::allocation_order_end(const MachineFunction &MF) const {
+      return ARM_QQPR + (sizeof(ARM_QQPR)/sizeof(unsigned));
+    }
+  }];
+}
+
+// Subset of QQPR that have 32-bit SPR subregs.
+def QQPR_VFP2 : RegisterClass<"ARM", [v4i64],
+                              256,
+                              [QQ0, QQ1, QQ2, QQ3]> {
+  let SubRegClasses = [(SPR      ssub_0, ssub_1, ssub_2, ssub_3),
+                       (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3),
+                       (QPR_VFP2 qsub_0, qsub_1)];
+
+}
+
+// Pseudo 512-bit vector register class to model 4 consecutive Q registers
+// (8 consecutive D registers).
+def QQQQPR : RegisterClass<"ARM", [v8i64],
+                         256,
+                         [QQQQ0, QQQQ1, QQQQ2, QQQQ3]> {
+  let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
+                            dsub_4, dsub_5, dsub_6, dsub_7),
+                       (QPR qsub_0, qsub_1, qsub_2, qsub_3)];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    // QQQQ1 is callee saved and should be allocated last.
+    // Save QQQQ0 for use as QPR_VFP2 and QPR_8 classes.
+    static const unsigned ARM_QQQQPR[] = {
+      ARM::QQQQ2, ARM::QQQQ3, ARM::QQQQ0, ARM::QQQQ1 };
+
+    QQQQPRClass::iterator
+    QQQQPRClass::allocation_order_begin(const MachineFunction &MF) const {
+      return ARM_QQQQPR;
+    }
+
+    QQQQPRClass::iterator
+    QQQQPRClass::allocation_order_end(const MachineFunction &MF) const {
+      return ARM_QQQQPR + (sizeof(ARM_QQQQPR)/sizeof(unsigned));
+    }
+  }];
+}
+
+// Condition code registers.
+def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
diff --git a/final/lib/Target/ARM/ARMRelocations.h b/final/lib/Target/ARM/ARMRelocations.h
new file mode 100644
index 00000000000..86e7206f2cc
--- /dev/null
+++ b/final/lib/Target/ARM/ARMRelocations.h
@@ -0,0 +1,62 @@
+//===- ARMRelocations.h - ARM Code Relocations ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMRELOCATIONS_H
+#define ARMRELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+  namespace ARM {
+    enum RelocationType {
+      // reloc_arm_absolute - Absolute relocation, just add the relocated value
+      // to the value already in memory.
+      reloc_arm_absolute,
+
+      // reloc_arm_relative - PC relative relocation, add the relocated value to
+      // the value already in memory, after we adjust it for where the PC is.
+      reloc_arm_relative,
+
+      // reloc_arm_cp_entry - PC relative relocation for constpool_entry's whose
+      // addresses are kept locally in a map.
+      reloc_arm_cp_entry,
+
+      // reloc_arm_vfp_cp_entry - Same as reloc_arm_cp_entry except the offset
+      // should be divided by 4.
+      reloc_arm_vfp_cp_entry,
+
+      // reloc_arm_machine_cp_entry - Relocation of a ARM machine constantpool
+      // entry.
+      reloc_arm_machine_cp_entry,
+
+      // reloc_arm_jt_base - PC relative relocation for jump tables whose
+      // addresses are kept locally in a map.
+      reloc_arm_jt_base,
+
+      // reloc_arm_pic_jt - PIC jump table entry relocation: dest bb - jt base.
+      reloc_arm_pic_jt,
+
+      // reloc_arm_branch - Branch address relocation.
+      reloc_arm_branch,
+
+      // reloc_arm_movt  - MOVT immediate relocation.
+      reloc_arm_movt,
+
+      // reloc_arm_movw  - MOVW immediate relocation.
+      reloc_arm_movw
+    };
+  }
+}
+
+#endif
+
diff --git a/final/lib/Target/ARM/ARMSchedule.td b/final/lib/Target/ARM/ARMSchedule.td
new file mode 100644
index 00000000000..958c5c64701
--- /dev/null
+++ b/final/lib/Target/ARM/ARMSchedule.td
@@ -0,0 +1,261 @@
+//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for ARM
+//
+def IIC_iALUx      : InstrItinClass;
+def IIC_iALUi      : InstrItinClass;
+def IIC_iALUr      : InstrItinClass;
+def IIC_iALUsi     : InstrItinClass;
+def IIC_iALUsir    : InstrItinClass;
+def IIC_iALUsr     : InstrItinClass;
+def IIC_iBITi      : InstrItinClass;
+def IIC_iBITr      : InstrItinClass;
+def IIC_iBITsi     : InstrItinClass;
+def IIC_iBITsr     : InstrItinClass;
+def IIC_iUNAr      : InstrItinClass;
+def IIC_iUNAsi     : InstrItinClass;
+def IIC_iEXTr      : InstrItinClass;
+def IIC_iEXTAr     : InstrItinClass;
+def IIC_iEXTAsr    : InstrItinClass;
+def IIC_iCMPi      : InstrItinClass;
+def IIC_iCMPr      : InstrItinClass;
+def IIC_iCMPsi     : InstrItinClass;
+def IIC_iCMPsr     : InstrItinClass;
+def IIC_iTSTi      : InstrItinClass;
+def IIC_iTSTr      : InstrItinClass;
+def IIC_iTSTsi     : InstrItinClass;
+def IIC_iTSTsr     : InstrItinClass;
+def IIC_iMOVi      : InstrItinClass;
+def IIC_iMOVr      : InstrItinClass;
+def IIC_iMOVsi     : InstrItinClass;
+def IIC_iMOVsr     : InstrItinClass;
+def IIC_iMOVix2    : InstrItinClass;
+def IIC_iMOVix2addpc : InstrItinClass;
+def IIC_iMOVix2ld  : InstrItinClass;
+def IIC_iMVNi      : InstrItinClass;
+def IIC_iMVNr      : InstrItinClass;
+def IIC_iMVNsi     : InstrItinClass;
+def IIC_iMVNsr     : InstrItinClass;
+def IIC_iCMOVi     : InstrItinClass;
+def IIC_iCMOVr     : InstrItinClass;
+def IIC_iCMOVsi    : InstrItinClass;
+def IIC_iCMOVsr    : InstrItinClass;
+def IIC_iCMOVix2   : InstrItinClass;
+def IIC_iMUL16     : InstrItinClass;
+def IIC_iMAC16     : InstrItinClass;
+def IIC_iMUL32     : InstrItinClass;
+def IIC_iMAC32     : InstrItinClass;
+def IIC_iMUL64     : InstrItinClass;
+def IIC_iMAC64     : InstrItinClass;
+def IIC_iLoad_i    : InstrItinClass;
+def IIC_iLoad_r    : InstrItinClass;
+def IIC_iLoad_si   : InstrItinClass;
+def IIC_iLoad_iu   : InstrItinClass;
+def IIC_iLoad_ru   : InstrItinClass;
+def IIC_iLoad_siu  : InstrItinClass;
+def IIC_iLoad_bh_i   : InstrItinClass;
+def IIC_iLoad_bh_r   : InstrItinClass;
+def IIC_iLoad_bh_si  : InstrItinClass;
+def IIC_iLoad_bh_iu  : InstrItinClass;
+def IIC_iLoad_bh_ru  : InstrItinClass;
+def IIC_iLoad_bh_siu : InstrItinClass;
+def IIC_iLoad_d_i  : InstrItinClass;
+def IIC_iLoad_d_r  : InstrItinClass;
+def IIC_iLoad_d_ru : InstrItinClass;
+def IIC_iLoad_m    : InstrItinClass<0>;  // micro-coded
+def IIC_iLoad_mu   : InstrItinClass<0>;  // micro-coded
+def IIC_iLoad_mBr  : InstrItinClass<0>;  // micro-coded
+def IIC_iPop       : InstrItinClass<0>;  // micro-coded
+def IIC_iPop_Br    : InstrItinClass<0>;  // micro-coded
+def IIC_iLoadiALU  : InstrItinClass;
+def IIC_iStore_i   : InstrItinClass;
+def IIC_iStore_r   : InstrItinClass;
+def IIC_iStore_si  : InstrItinClass;
+def IIC_iStore_iu  : InstrItinClass;
+def IIC_iStore_ru  : InstrItinClass;
+def IIC_iStore_siu : InstrItinClass;
+def IIC_iStore_bh_i   : InstrItinClass;
+def IIC_iStore_bh_r   : InstrItinClass;
+def IIC_iStore_bh_si  : InstrItinClass;
+def IIC_iStore_bh_iu  : InstrItinClass;
+def IIC_iStore_bh_ru  : InstrItinClass;
+def IIC_iStore_bh_siu : InstrItinClass;
+def IIC_iStore_d_i   : InstrItinClass;
+def IIC_iStore_d_r   : InstrItinClass;
+def IIC_iStore_d_ru  : InstrItinClass;
+def IIC_iStore_m   : InstrItinClass<0>;  // micro-coded
+def IIC_iStore_mu  : InstrItinClass<0>;  // micro-coded
+def IIC_Preload    : InstrItinClass;
+def IIC_Br         : InstrItinClass;
+def IIC_fpSTAT     : InstrItinClass;
+def IIC_fpUNA32    : InstrItinClass;
+def IIC_fpUNA64    : InstrItinClass;
+def IIC_fpCMP32    : InstrItinClass;
+def IIC_fpCMP64    : InstrItinClass;
+def IIC_fpCVTSD    : InstrItinClass;
+def IIC_fpCVTDS    : InstrItinClass;
+def IIC_fpCVTSH    : InstrItinClass;
+def IIC_fpCVTHS    : InstrItinClass;
+def IIC_fpCVTIS    : InstrItinClass;
+def IIC_fpCVTID    : InstrItinClass;
+def IIC_fpCVTSI    : InstrItinClass;
+def IIC_fpCVTDI    : InstrItinClass;
+def IIC_fpMOVIS    : InstrItinClass;
+def IIC_fpMOVID    : InstrItinClass;
+def IIC_fpMOVSI    : InstrItinClass;
+def IIC_fpMOVDI    : InstrItinClass;
+def IIC_fpALU32    : InstrItinClass;
+def IIC_fpALU64    : InstrItinClass;
+def IIC_fpMUL32    : InstrItinClass;
+def IIC_fpMUL64    : InstrItinClass;
+def IIC_fpMAC32    : InstrItinClass;
+def IIC_fpMAC64    : InstrItinClass;
+def IIC_fpDIV32    : InstrItinClass;
+def IIC_fpDIV64    : InstrItinClass;
+def IIC_fpSQRT32   : InstrItinClass;
+def IIC_fpSQRT64   : InstrItinClass;
+def IIC_fpLoad32   : InstrItinClass;
+def IIC_fpLoad64   : InstrItinClass;
+def IIC_fpLoad_m   : InstrItinClass<0>;  // micro-coded
+def IIC_fpLoad_mu  : InstrItinClass<0>;  // micro-coded
+def IIC_fpStore32  : InstrItinClass;
+def IIC_fpStore64  : InstrItinClass;
+def IIC_fpStore_m  : InstrItinClass<0>;  // micro-coded
+def IIC_fpStore_mu : InstrItinClass<0>;  // micro-coded
+def IIC_VLD1       : InstrItinClass;
+def IIC_VLD1x2     : InstrItinClass;
+def IIC_VLD1x3     : InstrItinClass;
+def IIC_VLD1x4     : InstrItinClass;
+def IIC_VLD1u      : InstrItinClass;
+def IIC_VLD1x2u    : InstrItinClass;
+def IIC_VLD1x3u    : InstrItinClass;
+def IIC_VLD1x4u    : InstrItinClass;
+def IIC_VLD1ln     : InstrItinClass;
+def IIC_VLD1lnu    : InstrItinClass;
+def IIC_VLD1dup    : InstrItinClass;
+def IIC_VLD1dupu   : InstrItinClass;
+def IIC_VLD2       : InstrItinClass;
+def IIC_VLD2x2     : InstrItinClass;
+def IIC_VLD2u      : InstrItinClass;
+def IIC_VLD2x2u    : InstrItinClass;
+def IIC_VLD2ln     : InstrItinClass;
+def IIC_VLD2lnu    : InstrItinClass;
+def IIC_VLD2dup    : InstrItinClass;
+def IIC_VLD2dupu   : InstrItinClass;
+def IIC_VLD3       : InstrItinClass;
+def IIC_VLD3ln     : InstrItinClass;
+def IIC_VLD3u      : InstrItinClass;
+def IIC_VLD3lnu    : InstrItinClass;
+def IIC_VLD3dup    : InstrItinClass;
+def IIC_VLD3dupu   : InstrItinClass;
+def IIC_VLD4       : InstrItinClass;
+def IIC_VLD4ln     : InstrItinClass;
+def IIC_VLD4u      : InstrItinClass;
+def IIC_VLD4lnu    : InstrItinClass;
+def IIC_VLD4dup    : InstrItinClass;
+def IIC_VLD4dupu   : InstrItinClass;
+def IIC_VST1       : InstrItinClass;
+def IIC_VST1x2     : InstrItinClass;
+def IIC_VST1x3     : InstrItinClass;
+def IIC_VST1x4     : InstrItinClass;
+def IIC_VST1u      : InstrItinClass;
+def IIC_VST1x2u    : InstrItinClass;
+def IIC_VST1x3u    : InstrItinClass;
+def IIC_VST1x4u    : InstrItinClass;
+def IIC_VST1ln     : InstrItinClass;
+def IIC_VST1lnu    : InstrItinClass;
+def IIC_VST2       : InstrItinClass;
+def IIC_VST2x2     : InstrItinClass;
+def IIC_VST2u      : InstrItinClass;
+def IIC_VST2x2u    : InstrItinClass;
+def IIC_VST2ln     : InstrItinClass;
+def IIC_VST2lnu    : InstrItinClass;
+def IIC_VST3       : InstrItinClass;
+def IIC_VST3u      : InstrItinClass;
+def IIC_VST3ln     : InstrItinClass;
+def IIC_VST3lnu    : InstrItinClass;
+def IIC_VST4       : InstrItinClass;
+def IIC_VST4u      : InstrItinClass;
+def IIC_VST4ln     : InstrItinClass;
+def IIC_VST4lnu    : InstrItinClass;
+def IIC_VUNAD      : InstrItinClass;
+def IIC_VUNAQ      : InstrItinClass;
+def IIC_VBIND      : InstrItinClass;
+def IIC_VBINQ      : InstrItinClass;
+def IIC_VPBIND     : InstrItinClass;
+def IIC_VFMULD     : InstrItinClass;
+def IIC_VFMULQ     : InstrItinClass;
+def IIC_VMOV       : InstrItinClass;
+def IIC_VMOVImm    : InstrItinClass;
+def IIC_VMOVD      : InstrItinClass;
+def IIC_VMOVQ      : InstrItinClass;
+def IIC_VMOVIS     : InstrItinClass;
+def IIC_VMOVID     : InstrItinClass;
+def IIC_VMOVISL    : InstrItinClass;
+def IIC_VMOVSI     : InstrItinClass;
+def IIC_VMOVDI     : InstrItinClass;
+def IIC_VMOVN      : InstrItinClass;
+def IIC_VPERMD     : InstrItinClass;
+def IIC_VPERMQ     : InstrItinClass;
+def IIC_VPERMQ3    : InstrItinClass;
+def IIC_VMACD      : InstrItinClass;
+def IIC_VMACQ      : InstrItinClass;
+def IIC_VRECSD     : InstrItinClass;
+def IIC_VRECSQ     : InstrItinClass;
+def IIC_VCNTiD     : InstrItinClass;
+def IIC_VCNTiQ     : InstrItinClass;
+def IIC_VUNAiD     : InstrItinClass;
+def IIC_VUNAiQ     : InstrItinClass;
+def IIC_VQUNAiD    : InstrItinClass;
+def IIC_VQUNAiQ    : InstrItinClass;
+def IIC_VBINiD     : InstrItinClass;
+def IIC_VBINiQ     : InstrItinClass;
+def IIC_VSUBiD     : InstrItinClass;
+def IIC_VSUBiQ     : InstrItinClass;
+def IIC_VBINi4D    : InstrItinClass;
+def IIC_VBINi4Q    : InstrItinClass;
+def IIC_VSUBi4D    : InstrItinClass;
+def IIC_VSUBi4Q    : InstrItinClass;
+def IIC_VABAD      : InstrItinClass;
+def IIC_VABAQ      : InstrItinClass;
+def IIC_VSHLiD     : InstrItinClass;
+def IIC_VSHLiQ     : InstrItinClass;
+def IIC_VSHLi4D    : InstrItinClass;
+def IIC_VSHLi4Q    : InstrItinClass;
+def IIC_VPALiD     : InstrItinClass;
+def IIC_VPALiQ     : InstrItinClass;
+def IIC_VMULi16D   : InstrItinClass;
+def IIC_VMULi32D   : InstrItinClass;
+def IIC_VMULi16Q   : InstrItinClass;
+def IIC_VMULi32Q   : InstrItinClass;
+def IIC_VMACi16D   : InstrItinClass;
+def IIC_VMACi32D   : InstrItinClass;
+def IIC_VMACi16Q   : InstrItinClass;
+def IIC_VMACi32Q   : InstrItinClass;
+def IIC_VEXTD      : InstrItinClass;
+def IIC_VEXTQ      : InstrItinClass;
+def IIC_VTB1       : InstrItinClass;
+def IIC_VTB2       : InstrItinClass;
+def IIC_VTB3       : InstrItinClass;
+def IIC_VTB4       : InstrItinClass;
+def IIC_VTBX1      : InstrItinClass;
+def IIC_VTBX2      : InstrItinClass;
+def IIC_VTBX3      : InstrItinClass;
+def IIC_VTBX4      : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+def GenericItineraries : ProcessorItineraries<[], [], []>;
+
+include "ARMScheduleV6.td"
+include "ARMScheduleA8.td"
+include "ARMScheduleA9.td"
diff --git a/final/lib/Target/ARM/ARMScheduleA8.td b/final/lib/Target/ARM/ARMScheduleA8.td
new file mode 100644
index 00000000000..8d86c01dc74
--- /dev/null
+++ b/final/lib/Target/ARM/ARMScheduleA8.td
@@ -0,0 +1,1034 @@
+//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A8 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
+// Functional Units.
+def A8_Pipe0   : FuncUnit; // pipeline 0
+def A8_Pipe1   : FuncUnit; // pipeline 1
+def A8_LSPipe  : FuncUnit; // Load / store pipeline
+def A8_NPipe   : FuncUnit; // NEON ALU/MUL pipe
+def A8_NLSPipe : FuncUnit; // NEON LS pipe
+//
+// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
+//
+def CortexA8Itineraries : ProcessorItineraries<
+  [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe],
+  [], [
+  // Two fully-pipelined integer ALU pipelines
+  //
+  // No operand cycles
+  InstrItinData<IIC_iALUx    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+  //
+  // Binary Instructions that produce a result
+  InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+  InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+  InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
+  InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+  //
+  // Bitwise Instructions that produce a result
+  InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+  InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+  InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+  //
+  // Unary Instructions that produce a result
+  InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  //
+  // Zero and sign extension instructions
+  InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+  InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>,
+  //
+  // Compare instructions
+  InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+  InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+  //
+  // Test instructions
+  InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+  InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+  InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+  //
+  // Move instructions, unconditional
+  InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+  InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+  InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                             InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                  InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                                  InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>,
+  InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<1, [A8_LSPipe]>], [5]>,
+  //
+  // Move instructions, conditional
+  InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+  InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+  InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+  InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                              InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>,
+  //
+  // MVN instructions
+  InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+  InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+  InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+
+  // Integer multiply pipeline
+  // Result written in E5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  //
+  InstrItinData<IIC_iMUL16   , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
+  InstrItinData<IIC_iMAC16   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+  InstrItinData<IIC_iMUL32   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
+  InstrItinData<IIC_iMAC32   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
+  InstrItinData<IIC_iMUL64   , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+  InstrItinData<IIC_iMAC64   , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
+
+  // Integer load pipeline
+  //
+  // Immediate offset
+  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+  InstrItinData<IIC_iLoad_d_i,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  // FIXME: lsl by 2 takes 1 cycle.
+  InstrItinData<IIC_iLoad_si  , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
+  InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                  InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
+  //
+  // Load multiple, def is the 5th operand. Pipeline 0 only.
+  // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+  InstrItinData<IIC_iLoad_m  , [InstrStage<2, [A8_Pipe0], 0>,
+                                InstrStage<2, [A8_LSPipe]>], [1, 1, 1, 1, 3]>,
+  //
+  // Load multiple + update, defs are the 1st and 5th operands.
+  InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
+                                InstrStage<3, [A8_LSPipe]>], [2, 1, 1, 1, 3]>,
+  //
+  // Load multiple plus branch
+  InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
+                                InstrStage<3, [A8_LSPipe]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+                               [1, 2, 1, 1, 3]>,
+  //
+  // Pop, def is the 3rd operand.
+  InstrItinData<IIC_iPop  ,    [InstrStage<3, [A8_Pipe0], 0>,
+                                InstrStage<3, [A8_LSPipe]>], [1, 1, 3]>,
+  //
+  // Push, def is the 3th operand.
+  InstrItinData<IIC_iPop_Br,   [InstrStage<3, [A8_Pipe0], 0>,
+                                InstrStage<3, [A8_LSPipe]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
+                               [1, 1, 3]>,
+
+  //
+  // iLoadi + iALUr for t2LDRpci_pic.
+  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                InstrStage<1, [A8_LSPipe]>,
+                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>,
+
+
+  // Integer store pipeline
+  //
+  // Immediate offset
+  InstrItinData<IIC_iStore_i  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+  InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+  InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iStore_r  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                  InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
+  InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iStore_ru  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                 InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
+                                   InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
+  //
+  // Store multiple. Pipeline 0 only.
+  // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+  InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
+                                InstrStage<2, [A8_LSPipe]>]>,
+  //
+  // Store multiple + update
+  InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
+                                InstrStage<2, [A8_LSPipe]>], [2]>,
+
+  //
+  // Preload
+  InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+
+  // Branch
+  //
+  // no delay slots, so the latency of a branch is unimportant
+  InstrItinData<IIC_Br      , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
+
+  // VFP
+  // Issue through integer pipeline, and execute in NEON unit. We assume
+  // RunFast mode so that NFP pipeline is used for single-precision when
+  // possible.
+  //
+  // FP Special Register to Integer Register File Move
+  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                              InstrStage<1, [A8_NLSPipe]>], [20]>,
+  //
+  // Single-precision FP Unary
+  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
+  //
+  // Double-precision FP Unary
+  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NPipe], 0>,
+                               InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+  //
+  // Single-precision FP Compare
+  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [1, 1]>,
+  //
+  // Double-precision FP Compare
+  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NPipe], 0>,
+                               InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
+  //
+  // Single to Double FP Convert
+  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<7, [A8_NPipe], 0>,
+                               InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
+  //
+  // Double to Single FP Convert
+  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<5, [A8_NPipe], 0>,
+                               InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
+  //
+  // Single-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
+  //
+  // Double-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<8, [A8_NPipe], 0>,
+                               InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+  //
+  // Integer to Single-Precision FP Convert
+  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
+  //
+  // Integer to Double-Precision FP Convert
+  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<8, [A8_NPipe], 0>,
+                               InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
+  //
+  // Single-precision FP ALU
+  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+  //
+  // Double-precision FP ALU
+  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<9, [A8_NPipe], 0>,
+                               InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
+  //
+  // Single-precision FP Multiply
+  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
+  //
+  // Double-precision FP Multiply
+  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<11, [A8_NPipe], 0>,
+                               InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
+  //
+  // Single-precision FP MAC
+  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
+  //
+  // Double-precision FP MAC
+  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<19, [A8_NPipe], 0>,
+                               InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
+  //
+  // Single-precision FP DIV
+  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<20, [A8_NPipe], 0>,
+                               InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
+  //
+  // Double-precision FP DIV
+  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<29, [A8_NPipe], 0>,
+                               InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
+  //
+  // Single-precision FP SQRT
+  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<19, [A8_NPipe], 0>,
+                               InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
+  //
+  // Double-precision FP SQRT
+  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<29, [A8_NPipe], 0>,
+                               InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
+
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>],
+                              [2, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>],
+                              [2, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>],
+                              [20, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>],
+                              [20, 20, 1]>,
+
+  //
+  // Single-precision FP Load
+  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>],
+                              [2, 1]>,
+  //
+  // Double-precision FP Load
+  InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>],
+                              [2, 1]>,
+  //
+  // FP Load Multiple
+  // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
+  InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 2]>,
+  //
+  // FP Load Multiple + update
+  InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 2]>,
+  //
+  // Single-precision FP Store
+  InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>],
+                              [1, 1]>,
+  //
+  // Double-precision FP Store
+  InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>],
+                              [1, 1]>,
+  //
+  // FP Store Multiple
+  InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>,
+                               InstrStage<1, [A8_NLSPipe], 0>,
+                               InstrStage<1, [A8_LSPipe]>], [1, 1, 1, 1]>,
+  //
+  // FP Store Multiple + update
+  InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                                InstrStage<1, [A8_NLSPipe], 0>,
+                                InstrStage<1, [A8_LSPipe]>,
+                                InstrStage<1, [A8_NLSPipe], 0>,
+                                InstrStage<1, [A8_LSPipe]>], [2, 1, 1, 1, 1]>,
+
+  // NEON
+  // Issue through integer pipeline, and execute in NEON unit.
+  //
+  // VLD1
+  InstrItinData<IIC_VLD1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1]>,
+  // VLD1x2
+  InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 1]>,
+  //
+  // VLD1x3
+  InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 1]>,
+  //
+  // VLD1x4
+  InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 1]>,
+  //
+  // VLD1u
+  InstrItinData<IIC_VLD1u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 1]>,
+  //
+  // VLD1x2u
+  InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 2, 1]>,
+  //
+  // VLD1x3u
+  InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 2, 1]>,
+  //
+  // VLD1x4u
+  InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 2, 1]>,
+  //
+  // VLD1ln
+  InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [3, 1, 1, 1]>,
+  //
+  // VLD1lnu
+  InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [3, 2, 1, 1, 1, 1]>,
+  //
+  // VLD1dup
+  InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1]>,
+  //
+  // VLD1dupu
+  InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 1, 1]>,
+  //
+  // VLD2
+  InstrItinData<IIC_VLD2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 1]>,
+  //
+  // VLD2x2
+  InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 1]>,
+  //
+  // VLD2ln
+  InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [3, 3, 1, 1, 1, 1]>,
+  //
+  // VLD2u
+  InstrItinData<IIC_VLD2u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 2, 1, 1, 1]>,
+  //
+  // VLD2x2u
+  InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 2, 1]>,
+  //
+  // VLD2lnu
+  InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [3, 3, 2, 1, 1, 1, 1, 1]>,
+  //
+  // VLD2dup
+  InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 1]>,
+  //
+  // VLD2dupu
+  InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 2, 2, 1, 1]>,
+  //
+  // VLD3
+  InstrItinData<IIC_VLD3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [3, 3, 4, 1]>,
+  //
+  // VLD3ln
+  InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<5, [A8_NLSPipe], 0>,
+                               InstrStage<5, [A8_LSPipe]>],
+                              [4, 4, 5, 1, 1, 1, 1, 2]>,
+  //
+  // VLD3u
+  InstrItinData<IIC_VLD3u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [3, 3, 4, 2, 1]>,
+  //
+  // VLD3lnu
+  InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<5, [A8_NLSPipe], 0>,
+                               InstrStage<5, [A8_LSPipe]>],
+                              [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VLD3dup
+  InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 1]>,
+  //
+  // VLD3dupu
+  InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 2, 1, 1]>,
+  //
+  // VLD4
+  InstrItinData<IIC_VLD4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [3, 3, 4, 4, 1]>,
+  //
+  // VLD4ln
+  InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<5, [A8_NLSPipe], 0>,
+                               InstrStage<5, [A8_LSPipe]>],
+                              [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VLD4u
+  InstrItinData<IIC_VLD4u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [3, 3, 4, 4, 2, 1]>,
+  //
+  // VLD4lnu
+  InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<5, [A8_NLSPipe], 0>,
+                               InstrStage<5, [A8_LSPipe]>],
+                              [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VLD4dup
+  InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 1]>,
+  //
+  // VLD4dupu
+  InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 2, 3, 3, 2, 1, 1]>,
+  //
+  // VST1
+  InstrItinData<IIC_VST1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [1, 1, 1]>,
+  //
+  // VST1x2
+  InstrItinData<IIC_VST1x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST1x3
+  InstrItinData<IIC_VST1x3,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST1x4
+  InstrItinData<IIC_VST1x4,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST1u
+  InstrItinData<IIC_VST1u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1]>,
+  //
+  // VST1x2u
+  InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST1x3u
+  InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST1x4u
+  InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST1ln
+  InstrItinData<IIC_VST1ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [1, 1, 1]>,
+  //
+  // VST1lnu
+  InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1]>,
+  //
+  // VST2
+  InstrItinData<IIC_VST2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST2x2
+  InstrItinData<IIC_VST2x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST2u
+  InstrItinData<IIC_VST2u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST2x2u
+  InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST2ln
+  InstrItinData<IIC_VST2ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST2lnu
+  InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<2, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST3
+  InstrItinData<IIC_VST3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST3u
+  InstrItinData<IIC_VST3u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST3ln
+  InstrItinData<IIC_VST3ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST3lnu
+  InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<3, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST4
+  InstrItinData<IIC_VST4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4u
+  InstrItinData<IIC_VST4u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4ln
+  InstrItinData<IIC_VST4ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4lnu
+  InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<4, [A8_NLSPipe], 0>,
+                               InstrStage<4, [A8_LSPipe]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // Double-register FP Unary
+  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [5, 2]>,
+  //
+  // Quad-register FP Unary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [6, 2]>,
+  //
+  // Double-register FP Binary
+  InstrItinData<IIC_VBIND,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+  //
+  // VPADD, etc.
+  InstrItinData<IIC_VPBIND,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
+  //
+  // Double-register FP VMUL
+  InstrItinData<IIC_VFMULD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [5, 2, 1]>,
+
+  //
+  // Quad-register FP Binary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
+  //
+  // Quad-register FP VMUL
+  InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [6, 2, 1]>,
+  //
+  // Move
+  InstrItinData<IIC_VMOV,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [1, 1]>,
+  //
+  // Move Immediate
+  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [3]>,
+  //
+  // Double-register Permute Move
+  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+  //
+  // Quad-register Permute Move
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
+  //
+  // Integer to Lane Move
+  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+  //
+  // Vector narrow move
+  InstrItinData<IIC_VMOVN   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [2, 1]>,
+  //
+  // Double-register Permute
+  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
+  //
+  // Quad-register Permute
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
+  //
+  // Quad-register Permute (3 cycle issue)
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
+  //
+  // Double-register FP Multiple-Accumulate
+  InstrItinData<IIC_VMACD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
+  //
+  // Quad-register FP Multiple-Accumulate
+  // Result written in N9, but that is relative to the last cycle of multicycle,
+  // so we use 10 for those cases
+  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
+  //
+  // Double-register Reciprical Step
+  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
+  //
+  // Quad-register Reciprical Step
+  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
+  //
+  // Double-register Integer Count
+  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Count
+  // Result written in N3, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
+  //
+  // Double-register Integer Unary
+  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2]>,
+  //
+  // Quad-register Integer Unary
+  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2]>,
+  //
+  // Double-register Integer Q-Unary
+  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 1]>,
+  //
+  // Quad-register Integer CountQ-Unary
+  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 1]>,
+  //
+  // Double-register Integer Binary
+  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+  //
+  // Quad-register Integer Binary
+  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
+  //
+  // Double-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+  //
+  // Quad-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+
+  //
+  // Double-register Integer Subtract
+  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
+  //
+  // Double-register Integer Subtract
+  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
+  //
+  // Double-register Integer Shift
+  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
+  //
+  // Quad-register Integer Shift
+  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
+  //
+  // Double-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
+  //
+  // Quad-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
+  //
+  // Double-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
+  //
+  // Quad-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
+  //
+  // Double-register Absolute Difference and Accumulate
+  InstrItinData<IIC_VABAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
+  //
+  // Quad-register Absolute Difference and Accumulate
+  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
+
+  //
+  // Double-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
+  //
+  // Double-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
+  //
+  // Quad-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
+  //
+  // Quad-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NPipe]>,
+                               InstrStage<2, [A8_NLSPipe], 0>,
+                               InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
+  //
+  // Double-register VEXT
+  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
+  //
+  // Quad-register VEXT
+  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
+  //
+  // VTB
+  InstrItinData<IIC_VTB1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
+  InstrItinData<IIC_VTB2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
+  InstrItinData<IIC_VTB3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTB4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
+  //
+  // VTBX
+  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
+  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
+  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                               InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
+                               InstrStage<1, [A8_NLSPipe]>,
+                               InstrStage<1, [A8_NPipe], 0>,
+                            InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+]>;
diff --git a/final/lib/Target/ARM/ARMScheduleA9.td b/final/lib/Target/ARM/ARMScheduleA9.td
new file mode 100644
index 00000000000..82c6735f1b1
--- /dev/null
+++ b/final/lib/Target/ARM/ARMScheduleA9.td
@@ -0,0 +1,1824 @@
+//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM Cortex A9 processors.
+//
+//===----------------------------------------------------------------------===//
+
+//
+// Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
+// Reference Manual".
+//
+// Functional units
+def A9_Issue0  : FuncUnit; // Issue 0
+def A9_Issue1  : FuncUnit; // Issue 1
+def A9_Branch  : FuncUnit; // Branch
+def A9_ALU0    : FuncUnit; // ALU / MUL pipeline 0
+def A9_ALU1    : FuncUnit; // ALU pipeline 1
+def A9_AGU     : FuncUnit; // Address generation unit for ld / st
+def A9_NPipe   : FuncUnit; // NEON pipeline
+def A9_MUX0    : FuncUnit; // AGU + NEON/FPU multiplexer
+def A9_LSUnit  : FuncUnit; // L/S Unit
+def A9_DRegsVFP: FuncUnit; // FP register set, VFP side
+def A9_DRegsN  : FuncUnit; // FP register set, NEON side
+
+// Bypasses
+def A9_LdBypass : Bypass;
+
+def CortexA9Itineraries : ProcessorItineraries<
+  [A9_Issue0, A9_Issue1, A9_Branch, A9_ALU0, A9_ALU1, A9_AGU, A9_NPipe, A9_MUX0,
+   A9_LSUnit, A9_DRegsVFP, A9_DRegsN],
+  [A9_LdBypass], [
+  // Two fully-pipelined integer ALU pipelines
+
+  //
+  // Move instructions, unconditional
+  InstrItinData<IIC_iMOVi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+  InstrItinData<IIC_iMOVr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iMOVsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+  InstrItinData<IIC_iMOVix2 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                                  InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                                  InstrStage<1, [A9_ALU0, A9_ALU1]>], [3]>,
+  InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_AGU], 0>,
+                               InstrStage<1, [A9_LSUnit]>], [5]>,
+  //
+  // MVN instructions
+  InstrItinData<IIC_iMVNi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                              [1]>,
+  InstrItinData<IIC_iMVNr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                              [1, 1], [NoBypass, A9_LdBypass]>,
+  InstrItinData<IIC_iMVNsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0, A9_ALU1]>],
+                              [2, 1]>,
+  InstrItinData<IIC_iMVNsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<3, [A9_ALU0, A9_ALU1]>],
+                              [3, 1, 1]>,
+  //
+  // No operand cycles
+  InstrItinData<IIC_iALUx   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>]>,
+  //
+  // Binary Instructions that produce a result
+  InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                            [1, 1], [NoBypass, A9_LdBypass]>,
+  InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                            [1, 1, 1], [NoBypass, A9_LdBypass, A9_LdBypass]>,
+  InstrItinData<IIC_iALUsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<2, [A9_ALU0, A9_ALU1]>],
+                            [2, 1, 1], [NoBypass, A9_LdBypass, NoBypass]>,
+  InstrItinData<IIC_iALUsir,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<2, [A9_ALU0, A9_ALU1]>],
+                            [2, 1, 1], [NoBypass, NoBypass, A9_LdBypass]>,
+  InstrItinData<IIC_iALUsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<3, [A9_ALU0, A9_ALU1]>],
+                            [3, 1, 1, 1],
+                            [NoBypass, A9_LdBypass, NoBypass, NoBypass]>,
+  //
+  // Bitwise Instructions that produce a result
+  InstrItinData<IIC_iBITi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iBITr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
+  InstrItinData<IIC_iBITsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+  InstrItinData<IIC_iBITsr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
+  //
+  // Unary Instructions that produce a result
+
+  // CLZ, RBIT, etc.
+  InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+
+  // BFC, BFI, UBFX, SBFX
+  InstrItinData<IIC_iUNAsi, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+
+  //
+  // Zero and sign extension instructions
+  InstrItinData<IIC_iEXTr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<1, [A9_ALU0, A9_ALU1]>], [2, 1]>,
+  InstrItinData<IIC_iEXTAr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<2, [A9_ALU0, A9_ALU1]>], [3, 1, 1]>,
+  InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                             InstrStage<3, [A9_ALU0, A9_ALU1]>], [3, 1, 1, 1]>,
+  //
+  // Compare instructions
+  InstrItinData<IIC_iCMPi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                               [1], [A9_LdBypass]>,
+  InstrItinData<IIC_iCMPr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                               [1, 1], [A9_LdBypass, A9_LdBypass]>,
+  InstrItinData<IIC_iCMPsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0, A9_ALU1]>],
+                                [1, 1], [A9_LdBypass, NoBypass]>,
+  InstrItinData<IIC_iCMPsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<3, [A9_ALU0, A9_ALU1]>],
+                              [1, 1, 1], [A9_LdBypass, NoBypass, NoBypass]>,
+  //
+  // Test instructions
+  InstrItinData<IIC_iTSTi   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+  InstrItinData<IIC_iTSTr   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iTSTsi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iTSTsr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<3, [A9_ALU0, A9_ALU1]>], [1, 1, 1]>,
+  //
+  // Move instructions, conditional
+  // FIXME: Correctly model the extra input dep on the destination.
+  InstrItinData<IIC_iCMOVi  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1]>,
+  InstrItinData<IIC_iCMOVr  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [1, 1]>,
+  InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0, A9_ALU1]>], [2, 1, 1]>,
+  InstrItinData<IIC_iCMOVix2, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>,
+                               InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_ALU0, A9_ALU1]>], [2]>,
+
+  // Integer multiply pipeline
+  //
+  InstrItinData<IIC_iMUL16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0]>], [3, 1, 1]>,
+  InstrItinData<IIC_iMAC16  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0]>],
+                              [3, 1, 1, 1]>,
+  InstrItinData<IIC_iMUL32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0]>], [4, 1, 1]>,
+  InstrItinData<IIC_iMAC32  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<2, [A9_ALU0]>],
+                              [4, 1, 1, 1]>,
+  InstrItinData<IIC_iMUL64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<3, [A9_ALU0]>], [4, 5, 1, 1]>,
+  InstrItinData<IIC_iMAC64  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<3, [A9_ALU0]>],
+                              [4, 5, 1, 1]>,
+  // Integer load pipeline
+  // FIXME: The timings are some rough approximations
+  //
+  // Immediate offset
+  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [4, 1], [A9_LdBypass]>,
+  // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+  InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 3, 1], [A9_LdBypass]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [4, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 3, 1, 1], [A9_LdBypass]>,
+  //
+  // Scaled register offset
+  InstrItinData<IIC_iLoad_si  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit], 0>],
+                                [4, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [5, 1, 1], [A9_LdBypass]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 2, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [4, 3, 1], [A9_LdBypass]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 2, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [4, 3, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [3, 3, 1, 1], [A9_LdBypass]>,
+  //
+  // Scaled register offset with update
+  InstrItinData<IIC_iLoad_siu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>],
+                                [4, 3, 1, 1], [A9_LdBypass]>,
+  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU], 0>,
+                                  InstrStage<1, [A9_LSUnit]>],
+                                 [5, 4, 1, 1], [A9_LdBypass]>,
+  //
+  // Load multiple, def is the 5th operand.
+  // FIXME: This assumes 3 to 4 registers.
+  InstrItinData<IIC_iLoad_m  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<2, [A9_AGU], 1>,
+                                InstrStage<2, [A9_LSUnit]>],
+                               [1, 1, 1, 1, 3],
+                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+  //
+  // Load multiple + update, defs are the 1st and 5th operands.
+  InstrItinData<IIC_iLoad_mu , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<2, [A9_AGU], 1>,
+                                InstrStage<2, [A9_LSUnit]>],
+                               [2, 1, 1, 1, 3],
+                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+  //
+  // Load multiple plus branch
+  InstrItinData<IIC_iLoad_mBr, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_AGU], 1>,
+                                InstrStage<2, [A9_LSUnit]>,
+                                InstrStage<1, [A9_Branch]>],
+                               [1, 2, 1, 1, 3],
+                         [NoBypass, NoBypass, NoBypass, NoBypass, A9_LdBypass]>,
+  //
+  // Pop, def is the 3rd operand.
+  InstrItinData<IIC_iPop  ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<2, [A9_AGU], 1>,
+                                InstrStage<2, [A9_LSUnit]>],
+                               [1, 1, 3],
+                               [NoBypass, NoBypass, A9_LdBypass]>,
+  //
+  // Pop + branch, def is the 3rd operand.
+  InstrItinData<IIC_iPop_Br,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<2, [A9_AGU], 1>,
+                                InstrStage<2, [A9_LSUnit]>,
+                                InstrStage<1, [A9_Branch]>],
+                               [1, 1, 3],
+                               [NoBypass, NoBypass, A9_LdBypass]>,
+
+  //
+  // iLoadi + iALUr for t2LDRpci_pic.
+  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_AGU], 0>,
+                                InstrStage<1, [A9_LSUnit]>,
+                                InstrStage<1, [A9_ALU0, A9_ALU1]>],
+                               [2, 1]>,
+
+  // Integer store pipeline
+  ///
+  // Immediate offset
+  InstrItinData<IIC_iStore_i  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+  InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 1>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+  // FIXME: If address is 64-bit aligned, AGU cycles is 1.
+  InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 1>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iStore_r  , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<1, [A9_AGU], 0>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 1>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+  InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                 InstrStage<1, [A9_MUX0], 0>,
+                                 InstrStage<2, [A9_AGU], 1>,
+                                 InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+  //
+  // Scaled register offset
+  InstrItinData<IIC_iStore_si ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<1, [A9_AGU], 0>,
+                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_si,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU], 1>,
+                                  InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iStore_iu ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<1, [A9_AGU], 0>,
+                                  InstrStage<1, [A9_LSUnit]>], [2, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU], 1>,
+                                  InstrStage<1, [A9_LSUnit]>], [3, 1, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iStore_ru ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<1, [A9_AGU], 0>,
+                                  InstrStage<1, [A9_LSUnit]>],
+                                 [2, 1, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU], 1>,
+                                  InstrStage<1, [A9_LSUnit]>],
+                                 [3, 1, 1, 1]>,
+  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                  InstrStage<1, [A9_MUX0], 0>,
+                                  InstrStage<2, [A9_AGU], 1>,
+                                  InstrStage<1, [A9_LSUnit]>],
+                                 [3, 1, 1, 1]>,
+  //
+  // Scaled register offset with update
+  InstrItinData<IIC_iStore_siu,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                    InstrStage<1, [A9_MUX0], 0>,
+                                    InstrStage<1, [A9_AGU], 0>,
+                                    InstrStage<1, [A9_LSUnit]>],
+                                   [2, 1, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_siu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                    InstrStage<1, [A9_MUX0], 0>,
+                                    InstrStage<2, [A9_AGU], 1>,
+                                    InstrStage<1, [A9_LSUnit]>],
+                                   [3, 1, 1, 1]>,
+  //
+  // Store multiple
+  InstrItinData<IIC_iStore_m , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_AGU], 0>,
+                                InstrStage<2, [A9_LSUnit]>]>,
+  //
+  // Store multiple + update
+  InstrItinData<IIC_iStore_mu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_AGU], 0>,
+                                InstrStage<2, [A9_LSUnit]>], [2]>,
+
+  //
+  // Preload
+  InstrItinData<IIC_Preload,   [InstrStage<1, [A9_Issue0, A9_Issue1]>], [1, 1]>,
+
+  // Branch
+  //
+  // no delay slots, so the latency of a branch is unimportant
+  InstrItinData<IIC_Br       , [InstrStage<1, [A9_Issue0], 0>,
+                                InstrStage<1, [A9_Issue1], 0>,
+                                InstrStage<1, [A9_Branch]>]>,
+
+  // VFP and NEON shares the same register file. This means that every VFP
+  // instruction should wait for full completion of the consecutive NEON
+  // instruction and vice-versa. We model this behavior with two artificial FUs:
+  // DRegsVFP and DRegsVFP.
+  //
+  // Every VFP instruction:
+  //  - Acquires DRegsVFP resource for 1 cycle
+  //  - Reserves DRegsN resource for the whole duration (including time to
+  //    register file writeback!).
+  // Every NEON instruction does the same but with FUs swapped.
+  //
+  // Since the reserved FU cannot be acquired, this models precisely
+  // "cross-domain" stalls.
+
+  // VFP
+  // Issue through integer pipeline, and execute in NEON unit.
+
+  // FP Special Register to Integer Register File Move
+  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                              InstrStage<1, [A9_MUX0], 0>,
+                              InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                              InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                              InstrStage<1, [A9_NPipe]>],
+                             [1]>,
+  //
+  // Single-precision FP Unary
+  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra latency cycles since wbck is 2 cycles
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
+  //
+  // Double-precision FP Unary
+  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra latency cycles since wbck is 2 cycles
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
+
+  //
+  // Single-precision FP Compare
+  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra latency cycles since wbck is 4 cycles
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
+  //
+  // Double-precision FP Compare
+  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra latency cycles since wbck is 4 cycles
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
+  //
+  // Single to Double FP Convert
+  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+  //
+  // Double to Single FP Convert
+  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+
+  //
+  // Single to Half FP Convert
+  InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+  //
+  // Half to Single FP Convert
+  InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1]>,
+
+  //
+  // Single-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+  //
+  // Double-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+  //
+  // Integer to Single-Precision FP Convert
+  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+  //
+  // Integer to Double-Precision FP Convert
+  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+  //
+  // Single-precision FP ALU
+  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1, 1]>,
+  //
+  // Double-precision FP ALU
+  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<5, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1, 1]>,
+  //
+  // Single-precision FP Multiply
+  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<6, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [5, 1, 1]>,
+  //
+  // Double-precision FP Multiply
+  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<7, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 1, 1]>,
+  //
+  // Single-precision FP MAC
+  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<9, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [8, 1, 1, 1]>,
+  //
+  // Double-precision FP MAC
+  InstrItinData<IIC_fpMAC64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<10, [A9_DRegsN],  0, Reserved>,
+                               InstrStage<2,  [A9_NPipe]>],
+                              [9, 1, 1, 1]>,
+  //
+  // Single-precision FP DIV
+  InstrItinData<IIC_fpDIV32 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<16, [A9_DRegsN],  0, Reserved>,
+                               InstrStage<10, [A9_NPipe]>],
+                              [15, 1, 1]>,
+  //
+  // Double-precision FP DIV
+  InstrItinData<IIC_fpDIV64 , [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<26, [A9_DRegsN],  0, Reserved>,
+                               InstrStage<20, [A9_NPipe]>],
+                              [25, 1, 1]>,
+  //
+  // Single-precision FP SQRT
+  InstrItinData<IIC_fpSQRT32, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<18, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<13, [A9_NPipe]>],
+                              [17, 1]>,
+  //
+  // Double-precision FP SQRT
+  InstrItinData<IIC_fpSQRT64, [InstrStage<1,  [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1,  [A9_MUX0], 0>,
+                               InstrStage<1,  [A9_DRegsVFP], 0, Required>,
+                               InstrStage<33, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<28, [A9_NPipe]>],
+                              [32, 1]>,
+
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra 1 latency cycle since wbck is 2 cycles
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               // Extra 1 latency cycle since wbck is 2 cycles
+                               InstrStage<3, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1, 1]>,
+  //
+  // Single-precision FP Load
+  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1]>,
+  //
+  // Double-precision FP Load
+  // FIXME: Result latency is 1 if address is 64-bit aligned.
+  InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [2, 1]>,
+  //
+  // FP Load Multiple
+  InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
+  //
+  // FP Load Multiple + update
+  InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
+  //
+  // Single-precision FP Store
+  InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1]>,
+  //
+  // Double-precision FP Store
+  InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>],
+                              [1, 1]>,
+  //
+  // FP Store Multiple
+  InstrItinData<IIC_fpStore_m,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                               InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                               InstrStage<1, [A9_NPipe], 0>,
+                               InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>,
+  //
+  // FP Store Multiple + update
+  InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                                InstrStage<1, [A9_MUX0], 0>,
+                                InstrStage<1, [A9_DRegsVFP], 0, Required>,
+                                InstrStage<2, [A9_DRegsN],   0, Reserved>,
+                                InstrStage<1, [A9_NPipe], 0>,
+                                InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>,
+  // NEON
+  // VLD1
+  // FIXME: Conservatively assume insufficent alignment.
+  InstrItinData<IIC_VLD1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 1]>,
+  // VLD1x2
+  InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 2, 1]>,
+  // VLD1x3
+  InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 2, 3, 1]>,
+  // VLD1x4
+  InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 2, 3, 3, 1]>,
+  // VLD1u
+  InstrItinData<IIC_VLD1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 2, 1]>,
+  // VLD1x2u
+  InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 2, 2, 1]>,
+  // VLD1x3u
+  InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 2, 3, 2, 1]>,
+  // VLD1x4u
+  InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 2, 3, 3, 2, 1]>,
+  //
+  // VLD1ln
+  InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [4, 1, 1, 1]>,
+  //
+  // VLD1lnu
+  InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [4, 2, 1, 1, 1, 1]>,
+  //
+  // VLD1dup
+  InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 1]>,
+  //
+  // VLD1dupu
+  InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 2, 1, 1]>,
+  //
+  // VLD2
+  InstrItinData<IIC_VLD2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 3, 1]>,
+  //
+  // VLD2x2
+  InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 4, 3, 4, 1]>,
+  //
+  // VLD2ln
+  InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [4, 4, 1, 1, 1, 1]>,
+  //
+  // VLD2u
+  InstrItinData<IIC_VLD2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 3, 2, 1, 1, 1]>,
+  //
+  // VLD2x2u
+  InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 4, 3, 4, 2, 1]>,
+  //
+  // VLD2lnu
+  InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [4, 4, 2, 1, 1, 1, 1, 1]>,
+  //
+  // VLD2dup
+  InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 3, 1]>,
+  //
+  // VLD2dupu
+  InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [3, 3, 2, 1, 1]>,
+  //
+  // VLD3
+  InstrItinData<IIC_VLD3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe], 0>,
+                               InstrStage<4, [A9_LSUnit]>],
+                              [4, 4, 5, 1]>,
+  //
+  // VLD3ln
+  InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<5, [A9_NPipe], 0>,
+                               InstrStage<5, [A9_LSUnit]>],
+                              [5, 5, 6, 1, 1, 1, 1, 2]>,
+  //
+  // VLD3u
+  InstrItinData<IIC_VLD3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe], 0>,
+                               InstrStage<4, [A9_LSUnit]>],
+                              [4, 4, 5, 2, 1]>,
+  //
+  // VLD3lnu
+  InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<5, [A9_NPipe], 0>,
+                               InstrStage<5, [A9_LSUnit]>],
+                              [5, 5, 6, 2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VLD3dup
+  InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 1]>,
+  //
+  // VLD3dupu
+  InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 2, 1, 1]>,
+  //
+  // VLD4
+  InstrItinData<IIC_VLD4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe], 0>,
+                               InstrStage<4, [A9_LSUnit]>],
+                              [4, 4, 5, 5, 1]>,
+  //
+  // VLD4ln
+  InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<5, [A9_NPipe], 0>,
+                               InstrStage<5, [A9_LSUnit]>],
+                              [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VLD4u
+  InstrItinData<IIC_VLD4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<10,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe], 0>,
+                               InstrStage<4, [A9_LSUnit]>],
+                              [4, 4, 5, 5, 2, 1]>,
+  //
+  // VLD4lnu
+  InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<11,[A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<5, [A9_NPipe], 0>,
+                               InstrStage<5, [A9_LSUnit]>],
+                              [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VLD4dup
+  InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 4, 1]>,
+  //
+  // VLD4dupu
+  InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [3, 3, 4, 4, 2, 1, 1]>,
+  //
+  // VST1
+  InstrItinData<IIC_VST1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 1]>,
+  //
+  // VST1x2
+  InstrItinData<IIC_VST1x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST1x3
+  InstrItinData<IIC_VST1x3,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST1x4
+  InstrItinData<IIC_VST1x4,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST1u
+  InstrItinData<IIC_VST1u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1]>,
+  //
+  // VST1x2u
+  InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST1x3u
+  InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST1x4u
+  InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST1ln
+  InstrItinData<IIC_VST1ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 1]>,
+  //
+  // VST1lnu
+  InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1]>,
+  //
+  // VST2
+  InstrItinData<IIC_VST2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST2x2
+  InstrItinData<IIC_VST2x2,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST2u
+  InstrItinData<IIC_VST2u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST2x2u
+  InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST2ln
+  InstrItinData<IIC_VST2ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<2, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe], 0>,
+                               InstrStage<2, [A9_LSUnit]>],
+                              [1, 1, 1, 1]>,
+  //
+  // VST2lnu
+  InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1]>,
+  //
+  // VST3
+  InstrItinData<IIC_VST3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST3u
+  InstrItinData<IIC_VST3u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST3ln
+  InstrItinData<IIC_VST3ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2]>,
+  //
+  // VST3lnu
+  InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2]>,
+  //
+  // VST4
+  InstrItinData<IIC_VST4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4u
+  InstrItinData<IIC_VST4u,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4ln
+  InstrItinData<IIC_VST4ln,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [1, 1, 1, 1, 2, 2]>,
+  //
+  // VST4lnu
+  InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe], 0>,
+                               InstrStage<3, [A9_LSUnit]>],
+                              [2, 1, 1, 1, 1, 1, 2, 2]>,
+
+  //
+  // Double-register Integer Unary
+  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2]>,
+  //
+  // Quad-register Integer Unary
+  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2]>,
+  //
+  // Double-register Integer Q-Unary
+  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+  //
+  // Quad-register Integer CountQ-Unary
+  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1]>,
+  //
+  // Double-register Integer Binary
+  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 2, 2]>,
+  //
+  // Quad-register Integer Binary
+  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 2, 2]>,
+  //
+  // Double-register Integer Subtract
+  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 2, 1]>,
+  //
+  // Quad-register Integer Subtract
+  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 2, 1]>,
+  //
+  // Double-register Integer Shift
+  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 1, 1]>,
+  //
+  // Quad-register Integer Shift
+  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 1, 1]>,
+  //
+  // Double-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1, 1]>,
+  //
+  // Quad-register Integer Shift (4 cycle)
+  InstrItinData<IIC_VSHLi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 1, 1]>,
+  //
+  // Double-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2, 2]>,
+  //
+  // Quad-register Integer Binary (4 cycle)
+  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2, 2]>,
+  //
+  // Double-register Integer Subtract (4 cycle)
+  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2, 1]>,
+  //
+  // Quad-register Integer Subtract (4 cycle)
+  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [4, 2, 1]>,
+
+  //
+  // Double-register Integer Count
+  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 2, 2]>,
+  //
+  // Quad-register Integer Count
+  // Result written in N3, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [4, 2, 2]>,
+  //
+  // Double-register Absolute Difference and Accumulate
+  InstrItinData<IIC_VABAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [6, 3, 2, 1]>,
+  //
+  // Quad-register Absolute Difference and Accumulate
+  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 3, 2, 1]>,
+  //
+  // Double-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [6, 3, 1]>,
+  //
+  // Quad-register Integer Pair Add Long
+  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 3, 1]>,
+
+  //
+  // Double-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [6, 2, 2]>,
+  //
+  // Quad-register Integer Multiply (.8, .16)
+  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [7, 2, 2]>,
+
+  //
+  // Double-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [7, 2, 1]>,
+  //
+  // Quad-register Integer Multiply (.32)
+  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe]>],
+                              [9, 2, 1]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [6, 3, 2, 2]>,
+  //
+  // Double-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [7, 3, 2, 1]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.8, .16)
+  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [7, 3, 2, 2]>,
+  //
+  // Quad-register Integer Multiply-Accumulate (.32)
+  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe]>],
+                              [9, 3, 2, 1]>,
+
+  //
+  // Move
+  InstrItinData<IIC_VMOV,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<1, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1,1]>,
+  //
+  // Move Immediate
+  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3]>,
+  //
+  // Double-register Permute Move
+  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1]>,
+  //
+  // Quad-register Permute Move
+  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1]>,
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [1, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 2, 1]>,
+  //
+  // Integer to Lane Move
+  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 1, 1]>,
+
+  //
+  // Vector narrow move
+  InstrItinData<IIC_VMOVN,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [3, 1]>,
+  //
+  // Double-register FP Unary
+  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [5, 2]>,
+  //
+  // Quad-register FP Unary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 2]>,
+  //
+  // Double-register FP Binary
+  // FIXME: We're using this itin for many instructions and [2, 2] here is too
+  // optimistic.
+  InstrItinData<IIC_VBIND,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [5, 2, 2]>,
+
+  //
+  // VPADD, etc.
+  InstrItinData<IIC_VPBIND,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [5, 1, 1]>,
+  //
+  // Double-register FP VMUL
+  InstrItinData<IIC_VFMULD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [5, 2, 1]>,
+  //
+  // Quad-register FP Binary
+  // Result written in N5, but that is relative to the last cycle of multicycle,
+  // so we use 6 for those cases
+  // FIXME: We're using this itin for many instructions and [2, 2] here is too
+  // optimistic.
+  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 2, 2]>,
+  //
+  // Quad-register FP VMUL
+  InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [6, 2, 1]>,
+  //
+  // Double-register FP Multiple-Accumulate
+  InstrItinData<IIC_VMACD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [6, 3, 2, 1]>,
+  //
+  // Quad-register FP Multiple-Accumulate
+  // Result written in N9, but that is relative to the last cycle of multicycle,
+  // so we use 10 for those cases
+  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 9 cycles
+                               InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<4, [A9_NPipe]>],
+                              [8, 4, 2, 1]>,
+  //
+  // Double-register Reciprical Step
+  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 10 cycles
+                               InstrStage<11, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [9, 2, 2]>,
+  //
+  // Quad-register Reciprical Step
+  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 11 cycles
+                               InstrStage<12, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [10, 2, 2]>,
+  //
+  // Double-register Permute
+  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 2, 1, 1]>,
+  //
+  // Quad-register Permute
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 3 for those cases
+  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 3, 1, 1]>,
+  //
+  // Quad-register Permute (3 cycle issue)
+  // Result written in N2, but that is relative to the last cycle of multicycle,
+  // so we use 4 for those cases
+  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe]>],
+                              [4, 4, 1, 1]>,
+
+  //
+  // Double-register VEXT
+  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 6 cycles
+                               InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<1, [A9_NPipe]>],
+                              [2, 1, 1]>,
+  //
+  // Quad-register VEXT
+  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 1, 2]>,
+  //
+  // VTB
+  InstrItinData<IIC_VTB1,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 2, 1]>,
+  InstrItinData<IIC_VTB2,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 2, 2, 1]>,
+  InstrItinData<IIC_VTB3,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<2, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe]>],
+                              [4, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTB4,     [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe]>],
+                              [4, 2, 2, 3, 3, 1]>,
+  //
+  // VTBX
+  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 1, 2, 1]>,
+  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 7 cycles
+                               InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [3, 1, 2, 2, 1]>,
+  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<3, [A9_NPipe]>],
+                              [4, 1, 2, 2, 3, 1]>,
+  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A9_Issue0, A9_Issue1], 0>,
+                               InstrStage<1, [A9_MUX0], 0>,
+                               InstrStage<1, [A9_DRegsN],   0, Required>,
+                               // Extra latency cycles since wbck is 8 cycles
+                               InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
+                               InstrStage<2, [A9_NPipe]>],
+                              [4, 1, 2, 2, 3, 3, 1]>
+]>;
diff --git a/final/lib/Target/ARM/ARMScheduleV6.td b/final/lib/Target/ARM/ARMScheduleV6.td
new file mode 100644
index 00000000000..c1880a72fff
--- /dev/null
+++ b/final/lib/Target/ARM/ARMScheduleV6.td
@@ -0,0 +1,294 @@
+//===- ARMScheduleV6.td - ARM v6 Scheduling Definitions ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM v6 processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Model based on ARM1176
+//
+// Functional Units
+def V6_Pipe : FuncUnit; // pipeline
+
+// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual"
+//
+def ARMV6Itineraries : ProcessorItineraries<
+  [V6_Pipe], [], [
+  //
+  // No operand cycles
+  InstrItinData<IIC_iALUx    , [InstrStage<1, [V6_Pipe]>]>,
+  //
+  // Binary Instructions that produce a result
+  InstrItinData<IIC_iALUi    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iALUr    , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+  InstrItinData<IIC_iALUsi   , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iALUsr   , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+  //
+  // Bitwise Instructions that produce a result
+  InstrItinData<IIC_iBITi    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iBITr    , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+  InstrItinData<IIC_iBITsi   , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iBITsr   , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+  //
+  // Unary Instructions that produce a result
+  InstrItinData<IIC_iUNAr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iUNAsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  //
+  // Zero and sign extension instructions
+  InstrItinData<IIC_iEXTr    , [InstrStage<1, [V6_Pipe]>], [1, 1]>,
+  InstrItinData<IIC_iEXTAr   , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iEXTAsr  , [InstrStage<2, [V6_Pipe]>], [3, 3, 2, 1]>,
+  //
+  // Compare instructions
+  InstrItinData<IIC_iCMPi    , [InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iCMPr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iCMPsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iCMPsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+  //
+  // Test instructions
+  InstrItinData<IIC_iTSTi    , [InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iTSTr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iTSTsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iTSTsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+  //
+  // Move instructions, unconditional
+  InstrItinData<IIC_iMOVi    , [InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iMOVr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iMOVsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iMOVsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+  InstrItinData<IIC_iMOVix2  , [InstrStage<1, [V6_Pipe]>,
+                                InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [V6_Pipe]>,
+                                  InstrStage<1, [V6_Pipe]>,
+                                  InstrStage<1, [V6_Pipe]>], [3]>,
+  InstrItinData<IIC_iMOVix2ld , [InstrStage<1, [V6_Pipe]>,
+                                 InstrStage<1, [V6_Pipe]>,
+                                 InstrStage<1, [V6_Pipe]>], [5]>,
+  //
+  // Move instructions, conditional
+  InstrItinData<IIC_iCMOVi   , [InstrStage<1, [V6_Pipe]>], [3]>,
+  InstrItinData<IIC_iCMOVr   , [InstrStage<1, [V6_Pipe]>], [3, 2]>,
+  InstrItinData<IIC_iCMOVsi  , [InstrStage<1, [V6_Pipe]>], [3, 1]>,
+  InstrItinData<IIC_iCMOVsr  , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+  InstrItinData<IIC_iCMOVix2 , [InstrStage<1, [V6_Pipe]>,
+                                InstrStage<1, [V6_Pipe]>], [4]>,
+  //
+  // MVN instructions
+  InstrItinData<IIC_iMVNi    , [InstrStage<1, [V6_Pipe]>], [2]>,
+  InstrItinData<IIC_iMVNr    , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  InstrItinData<IIC_iMVNsi   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iMVNsr   , [InstrStage<2, [V6_Pipe]>], [3, 2, 1]>,
+
+  // Integer multiply pipeline
+  //
+  InstrItinData<IIC_iMUL16   , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+  InstrItinData<IIC_iMAC16   , [InstrStage<1, [V6_Pipe]>], [4, 1, 1, 2]>,
+  InstrItinData<IIC_iMUL32   , [InstrStage<2, [V6_Pipe]>], [5, 1, 1]>,
+  InstrItinData<IIC_iMAC32   , [InstrStage<2, [V6_Pipe]>], [5, 1, 1, 2]>,
+  InstrItinData<IIC_iMUL64   , [InstrStage<3, [V6_Pipe]>], [6, 1, 1]>,
+  InstrItinData<IIC_iMAC64   , [InstrStage<3, [V6_Pipe]>], [6, 1, 1, 2]>,
+  
+  // Integer load pipeline
+  //
+  // Immediate offset
+  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+  InstrItinData<IIC_iLoad_d_i , [InstrStage<1, [V6_Pipe]>], [4, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [V6_Pipe]>], [4, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData<IIC_iLoad_si   , [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+  InstrItinData<IIC_iLoad_bh_si, [InstrStage<2, [V6_Pipe]>], [5, 2, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iLoad_iu   , [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+  InstrItinData<IIC_iLoad_bh_iu, [InstrStage<1, [V6_Pipe]>], [4, 2, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iLoad_ru   , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_bh_ru, [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+  InstrItinData<IIC_iLoad_d_ru , [InstrStage<1, [V6_Pipe]>], [4, 2, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iLoad_siu,   [InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [V6_Pipe]>], [5, 2, 2, 1]>,
+
+  //
+  // Load multiple, def is the 5th operand.
+  InstrItinData<IIC_iLoad_m  , [InstrStage<3, [V6_Pipe]>], [1, 1, 1, 1, 4]>,
+  //
+  // Load multiple + update, defs are the 1st and 5th operands.
+  InstrItinData<IIC_iLoad_mu , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 1, 4]>,
+  //
+  // Load multiple plus branch
+  InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [V6_Pipe]>,
+                                InstrStage<1, [V6_Pipe]>], [1, 2, 1, 1, 4]>,
+
+  //
+  // iLoadi + iALUr for t2LDRpci_pic.
+  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [V6_Pipe]>,
+                                InstrStage<1, [V6_Pipe]>], [3, 1]>,
+
+  //
+  // Pop, def is the 3rd operand.
+  InstrItinData<IIC_iPop     , [InstrStage<3, [V6_Pipe]>], [1, 1, 4]>,
+  //
+  // Pop + branch, def is the 3rd operand.
+  InstrItinData<IIC_iPop_Br,   [InstrStage<3, [V6_Pipe]>,
+                                InstrStage<1, [V6_Pipe]>], [1, 2, 4]>,
+
+  // Integer store pipeline
+  //
+  // Immediate offset
+  InstrItinData<IIC_iStore_i   , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iStore_bh_i, [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  InstrItinData<IIC_iStore_d_i , [InstrStage<1, [V6_Pipe]>], [2, 1]>,
+  //
+  // Register offset
+  InstrItinData<IIC_iStore_r   , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_r, [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+  InstrItinData<IIC_iStore_d_r , [InstrStage<1, [V6_Pipe]>], [2, 1, 1]>,
+  //
+  // Scaled register offset, issues over 2 cycles
+  InstrItinData<IIC_iStore_si   , [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iStore_bh_si, [InstrStage<2, [V6_Pipe]>], [2, 2, 1]>,
+  //
+  // Immediate offset with update
+  InstrItinData<IIC_iStore_iu   , [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+  InstrItinData<IIC_iStore_bh_iu, [InstrStage<1, [V6_Pipe]>], [2, 2, 1]>,
+  //
+  // Register offset with update
+  InstrItinData<IIC_iStore_ru,   [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [V6_Pipe]>], [2, 2, 1, 1]>,
+  //
+  // Scaled register offset with update, issues over 2 cycles
+  InstrItinData<IIC_iStore_siu,   [InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+  InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [V6_Pipe]>], [2, 2, 2, 1]>,
+  //
+  // Store multiple
+  InstrItinData<IIC_iStore_m  , [InstrStage<3, [V6_Pipe]>]>,
+  //
+  // Store multiple + update
+  InstrItinData<IIC_iStore_mu , [InstrStage<3, [V6_Pipe]>], [2]>,
+  
+  // Branch
+  //
+  // no delay slots, so the latency of a branch is unimportant
+  InstrItinData<IIC_Br      , [InstrStage<1, [V6_Pipe]>]>,
+
+  // VFP
+  // Issue through integer pipeline, and execute in NEON unit. We assume
+  // RunFast mode so that NFP pipeline is used for single-precision when
+  // possible.
+  //
+  // FP Special Register to Integer Register File Move
+  InstrItinData<IIC_fpSTAT , [InstrStage<1, [V6_Pipe]>], [3]>,
+  //
+  // Single-precision FP Unary
+  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+  //
+  // Double-precision FP Unary
+  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+  //
+  // Single-precision FP Compare
+  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  //
+  // Double-precision FP Compare
+  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [V6_Pipe]>], [2, 2]>,
+  //
+  // Single to Double FP Convert
+  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+  //
+  // Double to Single FP Convert
+  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [V6_Pipe]>], [5, 2]>,
+  //
+  // Single-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+  //
+  // Double-Precision FP to Integer Convert
+  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+  //
+  // Integer to Single-Precision FP Convert
+  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+  //
+  // Integer to Double-Precision FP Convert
+  InstrItinData<IIC_fpCVTID , [InstrStage<1, [V6_Pipe]>], [9, 2]>,
+  //
+  // Single-precision FP ALU
+  InstrItinData<IIC_fpALU32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+  //
+  // Double-precision FP ALU
+  InstrItinData<IIC_fpALU64 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+  //
+  // Single-precision FP Multiply
+  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2]>,
+  //
+  // Double-precision FP Multiply
+  InstrItinData<IIC_fpMUL64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2]>,
+  //
+  // Single-precision FP MAC
+  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [V6_Pipe]>], [9, 2, 2, 2]>,
+  //
+  // Double-precision FP MAC
+  InstrItinData<IIC_fpMAC64 , [InstrStage<2, [V6_Pipe]>], [9, 2, 2, 2]>,
+  //
+  // Single-precision FP DIV
+  InstrItinData<IIC_fpDIV32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
+  //
+  // Double-precision FP DIV
+  InstrItinData<IIC_fpDIV64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
+  //
+  // Single-precision FP SQRT
+  InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [V6_Pipe]>], [20, 2, 2]>,
+  //
+  // Double-precision FP SQRT
+  InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [V6_Pipe]>], [34, 2, 2]>,
+  //
+  // Integer to Single-precision Move
+  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+  //
+  // Integer to Double-precision Move
+  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [V6_Pipe]>], [10, 1, 1]>,
+  //
+  // Single-precision to Integer Move
+  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [V6_Pipe]>], [10, 1]>,
+  //
+  // Double-precision to Integer Move
+  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [V6_Pipe]>], [10, 10, 1]>,
+  //
+  // Single-precision FP Load
+  InstrItinData<IIC_fpLoad32 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
+  //
+  // Double-precision FP Load
+  InstrItinData<IIC_fpLoad64 , [InstrStage<1, [V6_Pipe]>], [5, 2, 2]>,
+  //
+  // FP Load Multiple
+  InstrItinData<IIC_fpLoad_m , [InstrStage<3, [V6_Pipe]>], [2, 1, 1, 5]>,
+  //
+  // FP Load Multiple + update
+  InstrItinData<IIC_fpLoad_mu, [InstrStage<3, [V6_Pipe]>], [3, 2, 1, 1, 5]>,
+  //
+  // Single-precision FP Store
+  InstrItinData<IIC_fpStore32 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+  //
+  // Double-precision FP Store
+  // use FU_Issue to enforce the 1 load/store per cycle limit
+  InstrItinData<IIC_fpStore64 , [InstrStage<1, [V6_Pipe]>], [2, 2, 2]>,
+  //
+  // FP Store Multiple
+  InstrItinData<IIC_fpStore_m, [InstrStage<3, [V6_Pipe]>], [2, 2, 2, 2]>,
+  //
+  // FP Store Multiple + update
+  InstrItinData<IIC_fpStore_mu,[InstrStage<3, [V6_Pipe]>], [3, 2, 2, 2, 2]>
+]>;
diff --git a/final/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/final/lib/Target/ARM/ARMSelectionDAGInfo.cpp
new file mode 100644
index 00000000000..2b9202bff01
--- /dev/null
+++ b/final/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -0,0 +1,134 @@
+//===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARMSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-selectiondag-info"
+#include "ARMTargetMachine.h"
+using namespace llvm;
+
+ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
+  : TargetSelectionDAGInfo(TM),
+    Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
+}
+
+ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
+}
+
+SDValue
+ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                                             SDValue Chain,
+                                             SDValue Dst, SDValue Src,
+                                             SDValue Size, unsigned Align,
+                                             bool isVolatile, bool AlwaysInline,
+                                             MachinePointerInfo DstPtrInfo,
+                                          MachinePointerInfo SrcPtrInfo) const {
+  // Do repeated 4-byte loads and stores. To be improved.
+  // This requires 4-byte alignment.
+  if ((Align & 3) != 0)
+    return SDValue();
+  // This requires the copy size to be a constant, preferrably
+  // within a subtarget-specific limit.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (!ConstantSize)
+    return SDValue();
+  uint64_t SizeVal = ConstantSize->getZExtValue();
+  if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+    return SDValue();
+
+  unsigned BytesLeft = SizeVal & 3;
+  unsigned NumMemOps = SizeVal >> 2;
+  unsigned EmittedNumMemOps = 0;
+  EVT VT = MVT::i32;
+  unsigned VTSize = 4;
+  unsigned i = 0;
+  const unsigned MAX_LOADS_IN_LDM = 6;
+  SDValue TFOps[MAX_LOADS_IN_LDM];
+  SDValue Loads[MAX_LOADS_IN_LDM];
+  uint64_t SrcOff = 0, DstOff = 0;
+
+  // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+  // same number of stores.  The loads and stores will get combined into
+  // ldm/stm later on.
+  while (EmittedNumMemOps < NumMemOps) {
+    for (i = 0;
+         i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+      Loads[i] = DAG.getLoad(VT, dl, Chain,
+                             DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+                                         DAG.getConstant(SrcOff, MVT::i32)),
+                             SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
+                             false, 0);
+      TFOps[i] = Loads[i].getValue(1);
+      SrcOff += VTSize;
+    }
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+    for (i = 0;
+         i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+      TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+                              DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+                                          DAG.getConstant(DstOff, MVT::i32)),
+                              DstPtrInfo.getWithOffset(DstOff),
+                              isVolatile, false, 0);
+      DstOff += VTSize;
+    }
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+    EmittedNumMemOps += i;
+  }
+
+  if (BytesLeft == 0)
+    return Chain;
+
+  // Issue loads / stores for the trailing (1 - 3) bytes.
+  unsigned BytesLeftSave = BytesLeft;
+  i = 0;
+  while (BytesLeft) {
+    if (BytesLeft >= 2) {
+      VT = MVT::i16;
+      VTSize = 2;
+    } else {
+      VT = MVT::i8;
+      VTSize = 1;
+    }
+
+    Loads[i] = DAG.getLoad(VT, dl, Chain,
+                           DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+                                       DAG.getConstant(SrcOff, MVT::i32)),
+                           SrcPtrInfo.getWithOffset(SrcOff), false, false, 0);
+    TFOps[i] = Loads[i].getValue(1);
+    ++i;
+    SrcOff += VTSize;
+    BytesLeft -= VTSize;
+  }
+  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+  i = 0;
+  BytesLeft = BytesLeftSave;
+  while (BytesLeft) {
+    if (BytesLeft >= 2) {
+      VT = MVT::i16;
+      VTSize = 2;
+    } else {
+      VT = MVT::i8;
+      VTSize = 1;
+    }
+
+    TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+                            DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+                                        DAG.getConstant(DstOff, MVT::i32)),
+                            DstPtrInfo.getWithOffset(DstOff), false, false, 0);
+    ++i;
+    DstOff += VTSize;
+    BytesLeft -= VTSize;
+  }
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
diff --git a/final/lib/Target/ARM/ARMSelectionDAGInfo.h b/final/lib/Target/ARM/ARMSelectionDAGInfo.h
new file mode 100644
index 00000000000..753369037a1
--- /dev/null
+++ b/final/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -0,0 +1,42 @@
+//===-- ARMSelectionDAGInfo.h - ARM SelectionDAG Info -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the ARM subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSELECTIONDAGINFO_H
+#define ARMSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
+  /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const ARMSubtarget *Subtarget;
+
+public:
+  explicit ARMSelectionDAGInfo(const TargetMachine &TM);
+  ~ARMSelectionDAGInfo();
+
+  virtual
+  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                                  SDValue Chain,
+                                  SDValue Dst, SDValue Src,
+                                  SDValue Size, unsigned Align,
+                                  bool isVolatile, bool AlwaysInline,
+                                  MachinePointerInfo DstPtrInfo,
+                                  MachinePointerInfo SrcPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/ARM/ARMSubtarget.cpp b/final/lib/Target/ARM/ARMSubtarget.cpp
new file mode 100644
index 00000000000..1465984899c
--- /dev/null
+++ b/final/lib/Target/ARM/ARMSubtarget.cpp
@@ -0,0 +1,254 @@
+//===-- ARMSubtarget.cpp - ARM Subtarget Information ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ARM specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMSubtarget.h"
+#include "ARMGenSubtarget.inc"
+#include "ARMBaseRegisterInfo.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+static cl::opt<bool>
+ReserveR9("arm-reserve-r9", cl::Hidden,
+          cl::desc("Reserve R9, making it unavailable as GPR"));
+
+static cl::opt<bool>
+DarwinUseMOVT("arm-darwin-use-movt", cl::init(true), cl::Hidden);
+
+static cl::opt<bool>
+StrictAlign("arm-strict-align", cl::Hidden,
+            cl::desc("Disallow all unaligned memory accesses"));
+
+ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
+                           bool isT)
+  : ARMArchVersion(V4)
+  , ARMProcFamily(Others)
+  , ARMFPUType(None)
+  , UseNEONForSinglePrecisionFP(false)
+  , SlowFPVMLx(false)
+  , SlowFPBrcc(false)
+  , IsThumb(isT)
+  , ThumbMode(Thumb1)
+  , NoARM(false)
+  , PostRAScheduler(false)
+  , IsR9Reserved(ReserveR9)
+  , UseMovt(false)
+  , HasFP16(false)
+  , HasD16(false)
+  , HasHardwareDivide(false)
+  , HasT2ExtractPack(false)
+  , HasDataBarrier(false)
+  , Pref32BitThumb(false)
+  , HasMPExtension(false)
+  , FPOnlySP(false)
+  , AllowsUnalignedMem(false)
+  , stackAlignment(4)
+  , CPUString("generic")
+  , TargetTriple(TT)
+  , TargetABI(ARM_ABI_APCS) {
+  // Default to soft float ABI
+  if (FloatABIType == FloatABI::Default)
+    FloatABIType = FloatABI::Soft;
+
+  // Determine default and user specified characteristics
+
+  // When no arch is specified either by CPU or by attributes, make the default
+  // ARMv4T.
+  const char *ARMArchFeature = "";
+  if (CPUString == "generic" && (FS.empty() || FS == "generic")) {
+    ARMArchVersion = V4T;
+    ARMArchFeature = ",+v4t";
+  }
+
+  // Set the boolean corresponding to the current target triple, or the default
+  // if one cannot be determined, to true.
+  unsigned Len = TT.length();
+  unsigned Idx = 0;
+
+  if (Len >= 5 && TT.substr(0, 4) == "armv")
+    Idx = 4;
+  else if (Len >= 6 && TT.substr(0, 5) == "thumb") {
+    IsThumb = true;
+    if (Len >= 7 && TT[5] == 'v')
+      Idx = 6;
+  }
+  if (Idx) {
+    unsigned SubVer = TT[Idx];
+    if (SubVer >= '7' && SubVer <= '9') {
+      ARMArchVersion = V7A;
+      ARMArchFeature = ",+v7a";
+      if (Len >= Idx+2 && TT[Idx+1] == 'm') {
+        ARMArchVersion = V7M;
+        ARMArchFeature = ",+v7m";
+      }
+    } else if (SubVer == '6') {
+      ARMArchVersion = V6;
+      ARMArchFeature = ",+v6";
+      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') {
+        ARMArchVersion = V6T2;
+        ARMArchFeature = ",+v6t2";
+      }
+    } else if (SubVer == '5') {
+      ARMArchVersion = V5T;
+      ARMArchFeature = ",+v5t";
+      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') {
+        ARMArchVersion = V5TE;
+        ARMArchFeature = ",+v5te";
+      }
+    } else if (SubVer == '4') {
+      if (Len >= Idx+2 && TT[Idx+1] == 't') {
+        ARMArchVersion = V4T;
+        ARMArchFeature = ",+v4t";
+      } else {
+        ARMArchVersion = V4;
+        ARMArchFeature = "";
+      }
+    }
+  }
+
+  if (TT.find("eabi") != std::string::npos)
+    TargetABI = ARM_ABI_AAPCS;
+
+  // Parse features string.  If the first entry in FS (the CPU) is missing,
+  // insert the architecture feature derived from the target triple.  This is
+  // important for setting features that are implied based on the architecture
+  // version.
+  std::string FSWithArch;
+  if (FS.empty())
+    FSWithArch = std::string(ARMArchFeature);
+  else if (FS.find(',') == 0)
+    FSWithArch = std::string(ARMArchFeature) + FS;
+  else
+    FSWithArch = FS;
+  CPUString = ParseSubtargetFeatures(FSWithArch, CPUString);
+
+  // After parsing Itineraries, set ItinData.IssueWidth.
+  computeIssueWidth();
+
+  // Thumb2 implies at least V6T2.
+  if (ARMArchVersion >= V6T2)
+    ThumbMode = Thumb2;
+  else if (ThumbMode >= Thumb2)
+    ARMArchVersion = V6T2;
+
+  if (isAAPCS_ABI())
+    stackAlignment = 8;
+
+  if (!isTargetDarwin())
+    UseMovt = hasV6T2Ops();
+  else {
+    IsR9Reserved = ReserveR9 | (ARMArchVersion < V6);
+    UseMovt = DarwinUseMOVT && hasV6T2Ops();
+  }
+
+  if (!isThumb() || hasThumb2())
+    PostRAScheduler = true;
+
+  // v6+ may or may not support unaligned mem access depending on the system
+  // configuration.
+  if (!StrictAlign && hasV6Ops() && isTargetDarwin())
+    AllowsUnalignedMem = true;
+}
+
+/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
+bool
+ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV,
+                                 Reloc::Model RelocM) const {
+  if (RelocM == Reloc::Static)
+    return false;
+
+  // Materializable GVs (in JIT lazy compilation mode) do not require an extra
+  // load from stub.
+  bool isDecl = GV->hasAvailableExternallyLinkage();
+  if (GV->isDeclaration() && !GV->isMaterializable())
+    isDecl = true;
+
+  if (!isTargetDarwin()) {
+    // Extra load is needed for all externally visible.
+    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+      return false;
+    return true;
+  } else {
+    if (RelocM == Reloc::PIC_) {
+      // If this is a strong reference to a definition, it is definitely not
+      // through a stub.
+      if (!isDecl && !GV->isWeakForLinker())
+        return false;
+
+      // Unless we have a symbol with hidden visibility, we have to go through a
+      // normal $non_lazy_ptr stub because this symbol might be resolved late.
+      if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
+        return true;
+
+      // If symbol visibility is hidden, we have a stub for common symbol
+      // references and external declarations.
+      if (isDecl || GV->hasCommonLinkage())
+        // Hidden $non_lazy_ptr reference.
+        return true;
+
+      return false;
+    } else {
+      // If this is a strong reference to a definition, it is definitely not
+      // through a stub.
+      if (!isDecl && !GV->isWeakForLinker())
+        return false;
+
+      // Unless we have a symbol with hidden visibility, we have to go through a
+      // normal $non_lazy_ptr stub because this symbol might be resolved late.
+      if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
+        return true;
+    }
+  }
+
+  return false;
+}
+
+unsigned ARMSubtarget::getMispredictionPenalty() const {
+  // If we have a reasonable estimate of the pipeline depth, then we can
+  // estimate the penalty of a misprediction based on that.
+  if (isCortexA8())
+    return 13;
+  else if (isCortexA9())
+    return 8;
+
+  // Otherwise, just return a sensible default.
+  return 10;
+}
+
+void ARMSubtarget::computeIssueWidth() {
+  unsigned allStage1Units = 0;
+  for (const InstrItinerary *itin = InstrItins.Itineraries;
+       itin->FirstStage != ~0U; ++itin) {
+    const InstrStage *IS = InstrItins.Stages + itin->FirstStage;
+    allStage1Units |= IS->getUnits();
+  }
+  InstrItins.IssueWidth = 0;
+  while (allStage1Units) {
+    ++InstrItins.IssueWidth;
+    // clear the lowest bit
+    allStage1Units ^= allStage1Units & ~(allStage1Units - 1);
+  }
+  assert(InstrItins.IssueWidth <= 2 && "itinerary bug, too many stage 1 units");
+}
+
+bool ARMSubtarget::enablePostRAScheduler(
+           CodeGenOpt::Level OptLevel,
+           TargetSubtarget::AntiDepBreakMode& Mode,
+           RegClassVector& CriticalPathRCs) const {
+  Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+  CriticalPathRCs.clear();
+  CriticalPathRCs.push_back(&ARM::GPRRegClass);
+  return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
+}
diff --git a/final/lib/Target/ARM/ARMSubtarget.h b/final/lib/Target/ARM/ARMSubtarget.h
new file mode 100644
index 00000000000..76c1c3fb41b
--- /dev/null
+++ b/final/lib/Target/ARM/ARMSubtarget.h
@@ -0,0 +1,234 @@
+//=====---- ARMSubtarget.h - Define Subtarget for the ARM -----*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMSUBTARGET_H
+#define ARMSUBTARGET_H
+
+#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/ADT/Triple.h"
+#include <string>
+
+namespace llvm {
+class GlobalValue;
+
+class ARMSubtarget : public TargetSubtarget {
+protected:
+  enum ARMArchEnum {
+    V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M
+  };
+
+  enum ARMProcFamilyEnum {
+    Others, CortexA8, CortexA9
+  };
+
+  enum ARMFPEnum {
+    None, VFPv2, VFPv3, NEON
+  };
+
+  enum ThumbTypeEnum {
+    Thumb1,
+    Thumb2
+  };
+
+  /// ARMArchVersion - ARM architecture version: V4, V4T (base), V5T, V5TE,
+  /// V6, V6T2, V7A, V7M.
+  ARMArchEnum ARMArchVersion;
+
+  /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
+  ARMProcFamilyEnum ARMProcFamily;
+
+  /// ARMFPUType - Floating Point Unit type.
+  ARMFPEnum ARMFPUType;
+
+  /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
+  /// specified. Use the method useNEONForSinglePrecisionFP() to
+  /// determine if NEON should actually be used.
+  bool UseNEONForSinglePrecisionFP;
+
+  /// SlowFPVMLx - If the VFP2 / NEON instructions are available, indicates
+  /// whether the FP VML[AS] instructions are slow (if so, don't use them).
+  bool SlowFPVMLx;
+
+  /// SlowFPBrcc - True if floating point compare + branch is slow.
+  bool SlowFPBrcc;
+
+  /// IsThumb - True if we are in thumb mode, false if in ARM mode.
+  bool IsThumb;
+
+  /// ThumbMode - Indicates supported Thumb version.
+  ThumbTypeEnum ThumbMode;
+
+  /// NoARM - True if subtarget does not support ARM mode execution.
+  bool NoARM;
+
+  /// PostRAScheduler - True if using post-register-allocation scheduler.
+  bool PostRAScheduler;
+
+  /// IsR9Reserved - True if R9 is a not available as general purpose register.
+  bool IsR9Reserved;
+
+  /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit
+  /// imms (including global addresses).
+  bool UseMovt;
+
+  /// HasFP16 - True if subtarget supports half-precision FP (We support VFP+HF
+  /// only so far)
+  bool HasFP16;
+
+  /// HasD16 - True if subtarget is limited to 16 double precision
+  /// FP registers for VFPv3.
+  bool HasD16;
+
+  /// HasHardwareDivide - True if subtarget supports [su]div
+  bool HasHardwareDivide;
+
+  /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
+  /// instructions.
+  bool HasT2ExtractPack;
+
+  /// HasDataBarrier - True if the subtarget supports DMB / DSB data barrier
+  /// instructions.
+  bool HasDataBarrier;
+
+  /// Pref32BitThumb - If true, codegen would prefer 32-bit Thumb instructions
+  /// over 16-bit ones.
+  bool Pref32BitThumb;
+
+  /// HasMPExtension - True if the subtarget supports Multiprocessing
+  /// extension (ARMv7 only).
+  bool HasMPExtension;
+
+  /// FPOnlySP - If true, the floating point unit only supports single
+  /// precision.
+  bool FPOnlySP;
+
+  /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory
+  /// accesses for some types.  For details, see
+  /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
+  bool AllowsUnalignedMem;
+
+  /// stackAlignment - The minimum alignment known to hold of the stack frame on
+  /// entry to the function and which must be maintained by every function.
+  unsigned stackAlignment;
+
+  /// CPUString - String name of used CPU.
+  std::string CPUString;
+
+  /// TargetTriple - What processor and OS we're targeting.
+  Triple TargetTriple;
+
+  /// Selected instruction itineraries (one entry per itinerary class.)
+  InstrItineraryData InstrItins;
+
+ public:
+  enum {
+    isELF, isDarwin
+  } TargetType;
+
+  enum {
+    ARM_ABI_APCS,
+    ARM_ABI_AAPCS // ARM EABI
+  } TargetABI;
+
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  ARMSubtarget(const std::string &TT, const std::string &FS, bool isThumb);
+
+  /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+  /// that still makes it profitable to inline the call.
+  unsigned getMaxInlineSizeThreshold() const {
+    // FIXME: For now, we don't lower memcpy's to loads / stores for Thumb1.
+    // Change this once Thumb1 ldmia / stmia support is added.
+    return isThumb1Only() ? 0 : 64;
+  }
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  std::string ParseSubtargetFeatures(const std::string &FS,
+                                     const std::string &CPU);
+
+  void computeIssueWidth();
+
+  bool hasV4TOps()  const { return ARMArchVersion >= V4T;  }
+  bool hasV5TOps()  const { return ARMArchVersion >= V5T;  }
+  bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
+  bool hasV6Ops()   const { return ARMArchVersion >= V6;   }
+  bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; }
+  bool hasV7Ops()   const { return ARMArchVersion >= V7A;  }
+
+  bool isCortexA8() const { return ARMProcFamily == CortexA8; }
+  bool isCortexA9() const { return ARMProcFamily == CortexA9; }
+
+  bool hasARMOps() const { return !NoARM; }
+
+  bool hasVFP2() const { return ARMFPUType >= VFPv2; }
+  bool hasVFP3() const { return ARMFPUType >= VFPv3; }
+  bool hasNEON() const { return ARMFPUType >= NEON;  }
+  bool useNEONForSinglePrecisionFP() const {
+    return hasNEON() && UseNEONForSinglePrecisionFP; }
+  bool hasDivide() const { return HasHardwareDivide; }
+  bool hasT2ExtractPack() const { return HasT2ExtractPack; }
+  bool hasDataBarrier() const { return HasDataBarrier; }
+  bool useFPVMLx() const { return !SlowFPVMLx; }
+  bool isFPBrccSlow() const { return SlowFPBrcc; }
+  bool isFPOnlySP() const { return FPOnlySP; }
+  bool prefers32BitThumb() const { return Pref32BitThumb; }
+  bool hasMPExtension() const { return HasMPExtension; }
+
+  bool hasFP16() const { return HasFP16; }
+  bool hasD16() const { return HasD16; }
+
+  bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
+  bool isTargetELF() const { return !isTargetDarwin(); }
+
+  bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
+  bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
+
+  bool isThumb() const { return IsThumb; }
+  bool isThumb1Only() const { return IsThumb && (ThumbMode == Thumb1); }
+  bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); }
+  bool hasThumb2() const { return ThumbMode >= Thumb2; }
+
+  bool isR9Reserved() const { return IsR9Reserved; }
+
+  bool useMovt() const { return UseMovt && hasV6T2Ops(); }
+
+  bool allowsUnalignedMem() const { return AllowsUnalignedMem; }
+
+  const std::string & getCPUString() const { return CPUString; }
+
+  unsigned getMispredictionPenalty() const;
+
+  /// enablePostRAScheduler - True at 'More' optimization.
+  bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+                             TargetSubtarget::AntiDepBreakMode& Mode,
+                             RegClassVector& CriticalPathRCs) const;
+
+  /// getInstrItins - Return the instruction itineraies based on subtarget
+  /// selection.
+  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+  /// getStackAlignment - Returns the minimum alignment known to hold of the
+  /// stack frame on entry to the function and which must be maintained by every
+  /// function for this subtarget.
+  unsigned getStackAlignment() const { return stackAlignment; }
+
+  /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect
+  /// symbol.
+  bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const;
+};
+} // End llvm namespace
+
+#endif  // ARMSUBTARGET_H
diff --git a/final/lib/Target/ARM/ARMTargetMachine.cpp b/final/lib/Target/ARM/ARMTargetMachine.cpp
new file mode 100644
index 00000000000..0ee773b165f
--- /dev/null
+++ b/final/lib/Target/ARM/ARMTargetMachine.cpp
@@ -0,0 +1,202 @@
+//===-- ARMTargetMachine.cpp - Define TargetMachine for ARM ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetMachine.h"
+#include "ARMMCAsmInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARM.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+static cl::opt<bool>ExpandMLx("expand-fp-mlx", cl::init(false), cl::Hidden);
+
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+  switch (TheTriple.getOS()) {
+  case Triple::Darwin:
+    return new ARMMCAsmInfoDarwin();
+  default:
+    return new ARMELFMCAsmInfo();
+  }
+}
+
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+                                    MCContext &Ctx, TargetAsmBackend &TAB,
+                                    raw_ostream &OS,
+                                    MCCodeEmitter *Emitter,
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
+  switch (Triple(TT).getOS()) {
+  case Triple::Darwin:
+    return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
+  case Triple::MinGW32:
+  case Triple::Cygwin:
+  case Triple::Win32:
+    llvm_unreachable("ARM does not support Windows COFF format");
+    return NULL;
+  default:
+    return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack);
+  }
+}
+
+extern "C" void LLVMInitializeARMTarget() {
+  // Register the target.
+  RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
+  RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget);
+
+  // Register the target asm info.
+  RegisterAsmInfoFn A(TheARMTarget, createMCAsmInfo);
+  RegisterAsmInfoFn B(TheThumbTarget, createMCAsmInfo);
+
+  // Register the MC Code Emitter
+  TargetRegistry::RegisterCodeEmitter(TheARMTarget, createARMMCCodeEmitter);
+  TargetRegistry::RegisterCodeEmitter(TheThumbTarget, createARMMCCodeEmitter);
+
+  // Register the asm backend.
+  TargetRegistry::RegisterAsmBackend(TheARMTarget, createARMAsmBackend);
+  TargetRegistry::RegisterAsmBackend(TheThumbTarget, createARMAsmBackend);
+
+  // Register the object streamer.
+  TargetRegistry::RegisterObjectStreamer(TheARMTarget, createMCStreamer);
+  TargetRegistry::RegisterObjectStreamer(TheThumbTarget, createMCStreamer);
+
+}
+
+/// TargetMachine ctor - Create an ARM architecture model.
+///
+ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T,
+                                           const std::string &TT,
+                                           const std::string &FS,
+                                           bool isThumb)
+  : LLVMTargetMachine(T, TT),
+    Subtarget(TT, FS, isThumb),
+    JITInfo(),
+    InstrItins(Subtarget.getInstrItineraryData())
+{
+  DefRelocModel = getRelocationModel();
+}
+
+ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &FS)
+  : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget),
+    DataLayout(Subtarget.isAPCS_ABI() ?
+               std::string("e-p:32:32-f64:32:64-i64:32:64-"
+                           "v128:32:128-v64:32:64-n32") :
+               std::string("e-p:32:32-f64:64:64-i64:64:64-"
+                           "v128:64:128-v64:64:64-n32")),
+    ELFWriterInfo(*this),
+    TLInfo(*this),
+    TSInfo(*this),
+    FrameLowering(Subtarget) {
+  if (!Subtarget.hasARMOps())
+    report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
+                       "support ARM mode execution!");
+}
+
+ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
+                                       const std::string &FS)
+  : ARMBaseTargetMachine(T, TT, FS, true),
+    InstrInfo(Subtarget.hasThumb2()
+              ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
+              : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
+    DataLayout(Subtarget.isAPCS_ABI() ?
+               std::string("e-p:32:32-f64:32:64-i64:32:64-"
+                           "i16:16:32-i8:8:32-i1:8:32-"
+                           "v128:32:128-v64:32:64-a:0:32-n32") :
+               std::string("e-p:32:32-f64:64:64-i64:64:64-"
+                           "i16:16:32-i8:8:32-i1:8:32-"
+                           "v128:64:128-v64:64:64-a:0:32-n32")),
+    ELFWriterInfo(*this),
+    TLInfo(*this),
+    TSInfo(*this),
+    FrameLowering(Subtarget.hasThumb2()
+              ? new ARMFrameLowering(Subtarget)
+              : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
+}
+
+// Pass Pipeline Configuration
+bool ARMBaseTargetMachine::addPreISel(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel) {
+  if (OptLevel != CodeGenOpt::None)
+    PM.add(createARMGlobalMergePass(getTargetLowering()));
+
+  return false;
+}
+
+bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
+                                           CodeGenOpt::Level OptLevel) {
+  PM.add(createARMISelDag(*this, OptLevel));
+  return false;
+}
+
+bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
+  // FIXME: temporarily disabling load / store optimization pass for Thumb1.
+  if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
+    PM.add(createARMLoadStoreOptimizationPass(true));
+  if (ExpandMLx &&
+      OptLevel != CodeGenOpt::None && Subtarget.hasVFP2())
+    PM.add(createMLxExpansionPass());
+
+  return true;
+}
+
+bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
+                                        CodeGenOpt::Level OptLevel) {
+  // FIXME: temporarily disabling load / store optimization pass for Thumb1.
+  if (OptLevel != CodeGenOpt::None) {
+    if (!Subtarget.isThumb1Only())
+      PM.add(createARMLoadStoreOptimizationPass());
+    if (Subtarget.hasNEON())
+      PM.add(createNEONMoveFixPass());
+  }
+
+  // Expand some pseudo instructions into multiple instructions to allow
+  // proper scheduling.
+  PM.add(createARMExpandPseudoPass());
+
+  if (OptLevel != CodeGenOpt::None) {
+    if (!Subtarget.isThumb1Only())
+      PM.add(createIfConverterPass());
+  }
+  if (Subtarget.isThumb2())
+    PM.add(createThumb2ITBlockPass());
+
+  return true;
+}
+
+bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
+  if (Subtarget.isThumb2() && !Subtarget.prefers32BitThumb())
+    PM.add(createThumb2SizeReductionPass());
+
+  PM.add(createARMConstantIslandPass());
+  return true;
+}
+
+bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel,
+                                          JITCodeEmitter &JCE) {
+  // FIXME: Move this to TargetJITInfo!
+  if (DefRelocModel == Reloc::Default)
+    setRelocationModel(Reloc::Static);
+
+  // Machine code emitter pass for ARM.
+  PM.add(createARMJITCodeEmitterPass(*this, JCE));
+  return false;
+}
diff --git a/final/lib/Target/ARM/ARMTargetMachine.h b/final/lib/Target/ARM/ARMTargetMachine.h
new file mode 100644
index 00000000000..e0aa149c4cc
--- /dev/null
+++ b/final/lib/Target/ARM/ARMTargetMachine.h
@@ -0,0 +1,143 @@
+//===-- ARMTargetMachine.h - Define TargetMachine for ARM -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the ARM specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMTARGETMACHINE_H
+#define ARMTARGETMACHINE_H
+
+#include "ARMInstrInfo.h"
+#include "ARMELFWriterInfo.h"
+#include "ARMFrameLowering.h"
+#include "ARMJITInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMISelLowering.h"
+#include "ARMSelectionDAGInfo.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1FrameLowering.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/OwningPtr.h"
+
+namespace llvm {
+
+class ARMBaseTargetMachine : public LLVMTargetMachine {
+protected:
+  ARMSubtarget        Subtarget;
+private:
+  ARMJITInfo          JITInfo;
+  InstrItineraryData  InstrItins;
+  Reloc::Model        DefRelocModel;    // Reloc model before it's overridden.
+
+public:
+  ARMBaseTargetMachine(const Target &T, const std::string &TT,
+                       const std::string &FS, bool isThumb);
+
+  virtual       ARMJITInfo       *getJITInfo()         { return &JITInfo; }
+  virtual const ARMSubtarget  *getSubtargetImpl() const { return &Subtarget; }
+  virtual const InstrItineraryData *getInstrItineraryData() const {
+    return &InstrItins;
+  }
+
+  // Pass Pipeline Configuration
+  virtual bool addPreISel(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreSched2(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+                              JITCodeEmitter &MCE);
+};
+
+/// ARMTargetMachine - ARM target machine.
+///
+class ARMTargetMachine : public ARMBaseTargetMachine {
+  ARMInstrInfo        InstrInfo;
+  const TargetData    DataLayout;       // Calculates type size & alignment
+  ARMELFWriterInfo    ELFWriterInfo;
+  ARMTargetLowering   TLInfo;
+  ARMSelectionDAGInfo TSInfo;
+  ARMFrameLowering    FrameLowering;
+ public:
+  ARMTargetMachine(const Target &T, const std::string &TT,
+                   const std::string &FS);
+
+  virtual const ARMRegisterInfo  *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+
+  virtual const ARMTargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+
+  virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+  virtual const ARMFrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
+
+  virtual const ARMInstrInfo     *getInstrInfo() const { return &InstrInfo; }
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+  virtual const ARMELFWriterInfo *getELFWriterInfo() const {
+    return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+  }
+};
+
+/// ThumbTargetMachine - Thumb target machine.
+/// Due to the way architectures are handled, this represents both
+///   Thumb-1 and Thumb-2.
+///
+class ThumbTargetMachine : public ARMBaseTargetMachine {
+  // Either Thumb1InstrInfo or Thumb2InstrInfo.
+  OwningPtr<ARMBaseInstrInfo> InstrInfo;
+  const TargetData    DataLayout;   // Calculates type size & alignment
+  ARMELFWriterInfo    ELFWriterInfo;
+  ARMTargetLowering   TLInfo;
+  ARMSelectionDAGInfo TSInfo;
+  // Either Thumb1FrameLowering or ARMFrameLowering.
+  OwningPtr<ARMFrameLowering> FrameLowering;
+public:
+  ThumbTargetMachine(const Target &T, const std::string &TT,
+                     const std::string &FS);
+
+  /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
+  virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo->getRegisterInfo();
+  }
+
+  virtual const ARMTargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+
+  virtual const ARMSelectionDAGInfo *getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  /// returns either Thumb1InstrInfo or Thumb2InstrInfo
+  virtual const ARMBaseInstrInfo *getInstrInfo() const {
+    return InstrInfo.get();
+  }
+  /// returns either Thumb1FrameLowering or ARMFrameLowering
+  virtual const ARMFrameLowering *getFrameLowering() const {
+    return FrameLowering.get();
+  }
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+  virtual const ARMELFWriterInfo *getELFWriterInfo() const {
+    return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/ARM/ARMTargetObjectFile.cpp b/final/lib/Target/ARM/ARMTargetObjectFile.cpp
new file mode 100644
index 00000000000..19defa1b519
--- /dev/null
+++ b/final/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -0,0 +1,47 @@
+//===-- llvm/Target/ARMTargetObjectFile.cpp - ARM Object Info Impl --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMTargetObjectFile.h"
+#include "ARMSubtarget.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+using namespace dwarf;
+
+//===----------------------------------------------------------------------===//
+//                               ELF Target
+//===----------------------------------------------------------------------===//
+
+void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
+                                        const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+  if (TM.getSubtarget<ARMSubtarget>().isAAPCS_ABI()) {
+    StaticCtorSection =
+      getContext().getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+                                 ELF::SHF_WRITE |
+                                 ELF::SHF_ALLOC,
+                                 SectionKind::getDataRel());
+    StaticDtorSection =
+      getContext().getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+                                 ELF::SHF_WRITE |
+                                 ELF::SHF_ALLOC,
+                                 SectionKind::getDataRel());
+    LSDASection = NULL;
+  }
+
+  AttributesSection =
+    getContext().getELFSection(".ARM.attributes",
+                               ELF::SHT_ARM_ATTRIBUTES,
+                               0,
+                               SectionKind::getMetadata());
+}
diff --git a/final/lib/Target/ARM/ARMTargetObjectFile.h b/final/lib/Target/ARM/ARMTargetObjectFile.h
new file mode 100644
index 00000000000..c6a7261439d
--- /dev/null
+++ b/final/lib/Target/ARM/ARMTargetObjectFile.h
@@ -0,0 +1,38 @@
+//===-- llvm/Target/ARMTargetObjectFile.h - ARM Object Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+#define LLVM_TARGET_ARM_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+class MCContext;
+class TargetMachine;
+
+class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
+protected:
+  const MCSection *AttributesSection;
+public:
+  ARMElfTargetObjectFile() :
+    TargetLoweringObjectFileELF(),
+    AttributesSection(NULL)
+  {}
+
+  virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+  virtual const MCSection *getAttributesSection() const {
+    return AttributesSection;
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/final/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
new file mode 100644
index 00000000000..2428ce16d3d
--- /dev/null
+++ b/final/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -0,0 +1,152 @@
+//===-- ARMAsmLexer.cpp - Tokenize ARM assembly to AsmTokens --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMTargetMachine.h"
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+
+#include <string>
+#include <map>
+
+using namespace llvm;
+
+namespace {
+
+class ARMBaseAsmLexer : public TargetAsmLexer {
+  const MCAsmInfo &AsmInfo;
+
+  const AsmToken &lexDefinite() {
+    return getLexer()->Lex();
+  }
+
+  AsmToken LexTokenUAL();
+protected:
+  typedef std::map <std::string, unsigned> rmap_ty;
+
+  rmap_ty RegisterMap;
+
+  void InitRegisterMap(const TargetRegisterInfo *info) {
+    unsigned numRegs = info->getNumRegs();
+
+    for (unsigned i = 0; i < numRegs; ++i) {
+      const char *regName = info->getName(i);
+      if (regName)
+        RegisterMap[regName] = i;
+    }
+  }
+
+  unsigned MatchRegisterName(StringRef Name) {
+    rmap_ty::iterator iter = RegisterMap.find(Name.str());
+    if (iter != RegisterMap.end())
+      return iter->second;
+    else
+      return 0;
+  }
+
+  AsmToken LexToken() {
+    if (!Lexer) {
+      SetError(SMLoc(), "No MCAsmLexer installed");
+      return AsmToken(AsmToken::Error, "", 0);
+    }
+
+    switch (AsmInfo.getAssemblerDialect()) {
+    default:
+      SetError(SMLoc(), "Unhandled dialect");
+      return AsmToken(AsmToken::Error, "", 0);
+    case 0:
+      return LexTokenUAL();
+    }
+  }
+public:
+  ARMBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
+    : TargetAsmLexer(T), AsmInfo(MAI) {
+  }
+};
+
+class ARMAsmLexer : public ARMBaseAsmLexer {
+public:
+  ARMAsmLexer(const Target &T, const MCAsmInfo &MAI)
+    : ARMBaseAsmLexer(T, MAI) {
+    std::string tripleString("arm-unknown-unknown");
+    std::string featureString;
+    OwningPtr<const TargetMachine>
+      targetMachine(T.createTargetMachine(tripleString, featureString));
+    InitRegisterMap(targetMachine->getRegisterInfo());
+  }
+};
+
+class ThumbAsmLexer : public ARMBaseAsmLexer {
+public:
+  ThumbAsmLexer(const Target &T, const MCAsmInfo &MAI)
+    : ARMBaseAsmLexer(T, MAI) {
+    std::string tripleString("thumb-unknown-unknown");
+    std::string featureString;
+    OwningPtr<const TargetMachine>
+      targetMachine(T.createTargetMachine(tripleString, featureString));
+    InitRegisterMap(targetMachine->getRegisterInfo());
+  }
+};
+
+} // end anonymous namespace
+
+AsmToken ARMBaseAsmLexer::LexTokenUAL() {
+  const AsmToken &lexedToken = lexDefinite();
+
+  switch (lexedToken.getKind()) {
+  default: break;
+  case AsmToken::Error:
+    SetError(Lexer->getErrLoc(), Lexer->getErr());
+    break;
+  case AsmToken::Identifier: {
+    std::string upperCase = lexedToken.getString().str();
+    std::string lowerCase = LowercaseString(upperCase);
+    StringRef lowerRef(lowerCase);
+
+    unsigned regID = MatchRegisterName(lowerRef);
+    // Check for register aliases.
+    //   r13 -> sp
+    //   r14 -> lr
+    //   r15 -> pc
+    //   ip  -> r12
+    //   FIXME: Some assemblers support lots of others. Do we want them all?
+    if (!regID) {
+      regID = StringSwitch<unsigned>(lowerCase)
+        .Case("r13", ARM::SP)
+        .Case("r14", ARM::LR)
+        .Case("r15", ARM::PC)
+        .Case("ip", ARM::R12)
+        .Default(0);
+    }
+
+    if (regID)
+      return AsmToken(AsmToken::Register,
+                      lexedToken.getString(),
+                      static_cast<int64_t>(regID));
+  }
+  }
+
+  return AsmToken(lexedToken);
+}
+
+extern "C" void LLVMInitializeARMAsmLexer() {
+  RegisterAsmLexer<ARMAsmLexer> X(TheARMTarget);
+  RegisterAsmLexer<ThumbAsmLexer> Y(TheThumbTarget);
+}
diff --git a/final/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/final/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
new file mode 100644
index 00000000000..129af206e1d
--- /dev/null
+++ b/final/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -0,0 +1,1866 @@
+//===-- ARMAsmParser.cpp - Parse ARM assembly to MCInst instructions ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMMCExpr.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMSubtarget.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+/// Shift types used for register controlled shifts in ARM memory addressing.
+enum ShiftType {
+  Lsl,
+  Lsr,
+  Asr,
+  Ror,
+  Rrx
+};
+
+namespace {
+
+class ARMOperand;
+
+class ARMAsmParser : public TargetAsmParser {
+  MCAsmParser &Parser;
+  TargetMachine &TM;
+
+  MCAsmParser &getParser() const { return Parser; }
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+  int TryParseRegister();
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+  bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &);
+  bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic);
+  bool ParsePrefix(ARMMCExpr::VariantKind &RefKind);
+  const MCExpr *ApplyPrefixToExpr(const MCExpr *E,
+                                  MCSymbolRefExpr::VariantKind Variant);
+
+
+  bool ParseMemoryOffsetReg(bool &Negative,
+                            bool &OffsetRegShifted,
+                            enum ShiftType &ShiftType,
+                            const MCExpr *&ShiftAmount,
+                            const MCExpr *&Offset,
+                            bool &OffsetIsReg,
+                            int &OffsetRegNum,
+                            SMLoc &E);
+  bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E);
+  bool ParseDirectiveWord(unsigned Size, SMLoc L);
+  bool ParseDirectiveThumb(SMLoc L);
+  bool ParseDirectiveThumbFunc(SMLoc L);
+  bool ParseDirectiveCode(SMLoc L);
+  bool ParseDirectiveSyntax(SMLoc L);
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer &Out);
+  void GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+                             bool &CanAcceptPredicationCode);
+
+  /// @name Auto-generated Match Functions
+  /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "ARMGenAsmMatcher.inc"
+
+  /// }
+
+  OperandMatchResultTy tryParseCoprocNumOperand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy tryParseCoprocRegOperand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy tryParseMemBarrierOptOperand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy tryParseProcIFlagsOperand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+  OperandMatchResultTy tryParseMSRMaskOperand(
+    SmallVectorImpl<MCParsedAsmOperand*>&);
+
+public:
+  ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
+    : TargetAsmParser(T), Parser(_Parser), TM(_TM) {
+      // Initialize the set of available features.
+      setAvailableFeatures(ComputeAvailableFeatures(
+          &TM.getSubtarget<ARMSubtarget>()));
+    }
+
+  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+  virtual bool ParseDirective(AsmToken DirectiveID);
+};
+} // end anonymous namespace
+
+namespace {
+
+/// ARMOperand - Instances of this class represent a parsed ARM machine
+/// instruction.
+class ARMOperand : public MCParsedAsmOperand {
+  enum KindTy {
+    CondCode,
+    CCOut,
+    CoprocNum,
+    CoprocReg,
+    Immediate,
+    MemBarrierOpt,
+    Memory,
+    MSRMask,
+    ProcIFlags,
+    Register,
+    RegisterList,
+    DPRRegisterList,
+    SPRRegisterList,
+    Token
+  } Kind;
+
+  SMLoc StartLoc, EndLoc;
+  SmallVector<unsigned, 8> Registers;
+
+  union {
+    struct {
+      ARMCC::CondCodes Val;
+    } CC;
+
+    struct {
+      ARM_MB::MemBOpt Val;
+    } MBOpt;
+
+    struct {
+      unsigned Val;
+    } Cop;
+
+    struct {
+      ARM_PROC::IFlags Val;
+    } IFlags;
+
+    struct {
+      unsigned Val;
+    } MMask;
+
+    struct {
+      const char *Data;
+      unsigned Length;
+    } Tok;
+
+    struct {
+      unsigned RegNum;
+    } Reg;
+
+    struct {
+      const MCExpr *Val;
+    } Imm;
+
+    /// Combined record for all forms of ARM address expressions.
+    struct {
+      unsigned BaseRegNum;
+      union {
+        unsigned RegNum;     ///< Offset register num, when OffsetIsReg.
+        const MCExpr *Value; ///< Offset value, when !OffsetIsReg.
+      } Offset;
+      const MCExpr *ShiftAmount;     // used when OffsetRegShifted is true
+      enum ShiftType ShiftType;      // used when OffsetRegShifted is true
+      unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true
+      unsigned Preindexed       : 1;
+      unsigned Postindexed      : 1;
+      unsigned OffsetIsReg      : 1;
+      unsigned Negative         : 1; // only used when OffsetIsReg is true
+      unsigned Writeback        : 1;
+    } Mem;
+  };
+
+  ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+  ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() {
+    Kind = o.Kind;
+    StartLoc = o.StartLoc;
+    EndLoc = o.EndLoc;
+    switch (Kind) {
+    case CondCode:
+      CC = o.CC;
+      break;
+    case Token:
+      Tok = o.Tok;
+      break;
+    case CCOut:
+    case Register:
+      Reg = o.Reg;
+      break;
+    case RegisterList:
+    case DPRRegisterList:
+    case SPRRegisterList:
+      Registers = o.Registers;
+      break;
+    case CoprocNum:
+    case CoprocReg:
+      Cop = o.Cop;
+      break;
+    case Immediate:
+      Imm = o.Imm;
+      break;
+    case MemBarrierOpt:
+      MBOpt = o.MBOpt;
+      break;
+    case Memory:
+      Mem = o.Mem;
+      break;
+    case MSRMask:
+      MMask = o.MMask;
+      break;
+    case ProcIFlags:
+      IFlags = o.IFlags;
+    }
+  }
+
+  /// getStartLoc - Get the location of the first token of this operand.
+  SMLoc getStartLoc() const { return StartLoc; }
+  /// getEndLoc - Get the location of the last token of this operand.
+  SMLoc getEndLoc() const { return EndLoc; }
+
+  ARMCC::CondCodes getCondCode() const {
+    assert(Kind == CondCode && "Invalid access!");
+    return CC.Val;
+  }
+
+  unsigned getCoproc() const {
+    assert((Kind == CoprocNum || Kind == CoprocReg) && "Invalid access!");
+    return Cop.Val;
+  }
+
+  StringRef getToken() const {
+    assert(Kind == Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  unsigned getReg() const {
+    assert((Kind == Register || Kind == CCOut) && "Invalid access!");
+    return Reg.RegNum;
+  }
+
+  const SmallVectorImpl<unsigned> &getRegList() const {
+    assert((Kind == RegisterList || Kind == DPRRegisterList ||
+            Kind == SPRRegisterList) && "Invalid access!");
+    return Registers;
+  }
+
+  const MCExpr *getImm() const {
+    assert(Kind == Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  ARM_MB::MemBOpt getMemBarrierOpt() const {
+    assert(Kind == MemBarrierOpt && "Invalid access!");
+    return MBOpt.Val;
+  }
+
+  ARM_PROC::IFlags getProcIFlags() const {
+    assert(Kind == ProcIFlags && "Invalid access!");
+    return IFlags.Val;
+  }
+
+  unsigned getMSRMask() const {
+    assert(Kind == MSRMask && "Invalid access!");
+    return MMask.Val;
+  }
+
+  /// @name Memory Operand Accessors
+  /// @{
+
+  unsigned getMemBaseRegNum() const {
+    return Mem.BaseRegNum;
+  }
+  unsigned getMemOffsetRegNum() const {
+    assert(Mem.OffsetIsReg && "Invalid access!");
+    return Mem.Offset.RegNum;
+  }
+  const MCExpr *getMemOffset() const {
+    assert(!Mem.OffsetIsReg && "Invalid access!");
+    return Mem.Offset.Value;
+  }
+  unsigned getMemOffsetRegShifted() const {
+    assert(Mem.OffsetIsReg && "Invalid access!");
+    return Mem.OffsetRegShifted;
+  }
+  const MCExpr *getMemShiftAmount() const {
+    assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
+    return Mem.ShiftAmount;
+  }
+  enum ShiftType getMemShiftType() const {
+    assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!");
+    return Mem.ShiftType;
+  }
+  bool getMemPreindexed() const { return Mem.Preindexed; }
+  bool getMemPostindexed() const { return Mem.Postindexed; }
+  bool getMemOffsetIsReg() const { return Mem.OffsetIsReg; }
+  bool getMemNegative() const { return Mem.Negative; }
+  bool getMemWriteback() const { return Mem.Writeback; }
+
+  /// @}
+
+  bool isCoprocNum() const { return Kind == CoprocNum; }
+  bool isCoprocReg() const { return Kind == CoprocReg; }
+  bool isCondCode() const { return Kind == CondCode; }
+  bool isCCOut() const { return Kind == CCOut; }
+  bool isImm() const { return Kind == Immediate; }
+  bool isReg() const { return Kind == Register; }
+  bool isRegList() const { return Kind == RegisterList; }
+  bool isDPRRegList() const { return Kind == DPRRegisterList; }
+  bool isSPRRegList() const { return Kind == SPRRegisterList; }
+  bool isToken() const { return Kind == Token; }
+  bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; }
+  bool isMemory() const { return Kind == Memory; }
+  bool isMemMode5() const {
+    if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() ||
+        getMemNegative())
+      return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    if (!CE) return false;
+
+    // The offset must be a multiple of 4 in the range 0-1020.
+    int64_t Value = CE->getValue();
+    return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020);
+  }
+  bool isMemModeRegThumb() const {
+    if (!isMemory() || !getMemOffsetIsReg() || getMemWriteback())
+      return false;
+    return true;
+  }
+  bool isMemModeImmThumb() const {
+    if (!isMemory() || getMemOffsetIsReg() || getMemWriteback())
+      return false;
+
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    if (!CE) return false;
+
+    // The offset must be a multiple of 4 in the range 0-124.
+    uint64_t Value = CE->getValue();
+    return ((Value & 0x3) == 0 && Value <= 124);
+  }
+  bool isMSRMask() const { return Kind == MSRMask; }
+  bool isProcIFlags() const { return Kind == ProcIFlags; }
+
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediates when possible.  Null MCExpr = 0.
+    if (Expr == 0)
+      Inst.addOperand(MCOperand::CreateImm(0));
+    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+  }
+
+  void addCondCodeOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(unsigned(getCondCode())));
+    unsigned RegNum = getCondCode() == ARMCC::AL ? 0: ARM::CPSR;
+    Inst.addOperand(MCOperand::CreateReg(RegNum));
+  }
+
+  void addCoprocNumOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+  }
+
+  void addCoprocRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(getCoproc()));
+  }
+
+  void addCCOutOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  void addRegListOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    const SmallVectorImpl<unsigned> &RegList = getRegList();
+    for (SmallVectorImpl<unsigned>::const_iterator
+           I = RegList.begin(), E = RegList.end(); I != E; ++I)
+      Inst.addOperand(MCOperand::CreateReg(*I));
+  }
+
+  void addDPRRegListOperands(MCInst &Inst, unsigned N) const {
+    addRegListOperands(Inst, N);
+  }
+
+  void addSPRRegListOperands(MCInst &Inst, unsigned N) const {
+    addRegListOperands(Inst, N);
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
+  }
+
+  void addMemMode5Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && isMemMode5() && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+    assert(!getMemOffsetIsReg() && "Invalid mode 5 operand");
+
+    // FIXME: #-0 is encoded differently than #0. Does the parser preserve
+    // the difference?
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    assert(CE && "Non-constant mode 5 offset operand!");
+
+    // The MCInst offset operand doesn't include the low two bits (like
+    // the instruction encoding).
+    int64_t Offset = CE->getValue() / 4;
+    if (Offset >= 0)
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::add,
+                                                             Offset)));
+    else
+      Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(ARM_AM::sub,
+                                                             -Offset)));
+  }
+
+  void addMemModeRegThumbOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && isMemModeRegThumb() && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+    Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum()));
+  }
+
+  void addMemModeImmThumbOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && isMemModeImmThumb() && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum()));
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getMemOffset());
+    assert(CE && "Non-constant mode offset operand!");
+    Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+  }
+
+  void addMSRMaskOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(unsigned(getMSRMask())));
+  }
+
+  void addProcIFlagsOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
+  }
+
+  virtual void dump(raw_ostream &OS) const;
+
+  static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(CondCode);
+    Op->CC.Val = CC;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(CoprocNum);
+    Op->Cop.Val = CopVal;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(CoprocReg);
+    Op->Cop.Val = CopVal;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(CCOut);
+    Op->Reg.RegNum = RegNum;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateToken(StringRef Str, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(Token);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+    ARMOperand *Op = new ARMOperand(Register);
+    Op->Reg.RegNum = RegNum;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARMOperand *
+  CreateRegList(const SmallVectorImpl<std::pair<unsigned, SMLoc> > &Regs,
+                SMLoc StartLoc, SMLoc EndLoc) {
+    KindTy Kind = RegisterList;
+
+    if (ARM::DPRRegClass.contains(Regs.front().first))
+      Kind = DPRRegisterList;
+    else if (ARM::SPRRegClass.contains(Regs.front().first))
+      Kind = SPRRegisterList;
+
+    ARMOperand *Op = new ARMOperand(Kind);
+    for (SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
+           I = Regs.begin(), E = Regs.end(); I != E; ++I)
+      Op->Registers.push_back(I->first);
+    array_pod_sort(Op->Registers.begin(), Op->Registers.end());
+    Op->StartLoc = StartLoc;
+    Op->EndLoc = EndLoc;
+    return Op;
+  }
+
+  static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+    ARMOperand *Op = new ARMOperand(Immediate);
+    Op->Imm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg,
+                               const MCExpr *Offset, int OffsetRegNum,
+                               bool OffsetRegShifted, enum ShiftType ShiftType,
+                               const MCExpr *ShiftAmount, bool Preindexed,
+                               bool Postindexed, bool Negative, bool Writeback,
+                               SMLoc S, SMLoc E) {
+    assert((OffsetRegNum == -1 || OffsetIsReg) &&
+           "OffsetRegNum must imply OffsetIsReg!");
+    assert((!OffsetRegShifted || OffsetIsReg) &&
+           "OffsetRegShifted must imply OffsetIsReg!");
+    assert((Offset || OffsetIsReg) &&
+           "Offset must exists unless register offset is used!");
+    assert((!ShiftAmount || (OffsetIsReg && OffsetRegShifted)) &&
+           "Cannot have shift amount without shifted register offset!");
+    assert((!Offset || !OffsetIsReg) &&
+           "Cannot have expression offset and register offset!");
+
+    ARMOperand *Op = new ARMOperand(Memory);
+    Op->Mem.BaseRegNum = BaseRegNum;
+    Op->Mem.OffsetIsReg = OffsetIsReg;
+    if (OffsetIsReg)
+      Op->Mem.Offset.RegNum = OffsetRegNum;
+    else
+      Op->Mem.Offset.Value = Offset;
+    Op->Mem.OffsetRegShifted = OffsetRegShifted;
+    Op->Mem.ShiftType = ShiftType;
+    Op->Mem.ShiftAmount = ShiftAmount;
+    Op->Mem.Preindexed = Preindexed;
+    Op->Mem.Postindexed = Postindexed;
+    Op->Mem.Negative = Negative;
+    Op->Mem.Writeback = Writeback;
+
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(MemBarrierOpt);
+    Op->MBOpt.Val = Opt;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(ProcIFlags);
+    Op->IFlags.Val = IFlags;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) {
+    ARMOperand *Op = new ARMOperand(MSRMask);
+    Op->MMask.Val = MMask;
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+};
+
+} // end anonymous namespace.
+
+void ARMOperand::dump(raw_ostream &OS) const {
+  switch (Kind) {
+  case CondCode:
+    OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
+    break;
+  case CCOut:
+    OS << "<ccout " << getReg() << ">";
+    break;
+  case CoprocNum:
+    OS << "<coprocessor number: " << getCoproc() << ">";
+    break;
+  case CoprocReg:
+    OS << "<coprocessor register: " << getCoproc() << ">";
+    break;
+  case MSRMask:
+    OS << "<mask: " << getMSRMask() << ">";
+    break;
+  case Immediate:
+    getImm()->print(OS);
+    break;
+  case MemBarrierOpt:
+    OS << "<ARM_MB::" << MemBOptToString(getMemBarrierOpt()) << ">";
+    break;
+  case Memory:
+    OS << "<memory "
+       << "base:" << getMemBaseRegNum();
+    if (getMemOffsetIsReg()) {
+      OS << " offset:<register " << getMemOffsetRegNum();
+      if (getMemOffsetRegShifted()) {
+        OS << " offset-shift-type:" << getMemShiftType();
+        OS << " offset-shift-amount:" << *getMemShiftAmount();
+      }
+    } else {
+      OS << " offset:" << *getMemOffset();
+    }
+    if (getMemOffsetIsReg())
+      OS << " (offset-is-reg)";
+    if (getMemPreindexed())
+      OS << " (pre-indexed)";
+    if (getMemPostindexed())
+      OS << " (post-indexed)";
+    if (getMemNegative())
+      OS << " (negative)";
+    if (getMemWriteback())
+      OS << " (writeback)";
+    OS << ">";
+    break;
+  case ProcIFlags: {
+    OS << "<ARM_PROC::";
+    unsigned IFlags = getProcIFlags();
+    for (int i=2; i >= 0; --i)
+      if (IFlags & (1 << i))
+        OS << ARM_PROC::IFlagsToString(1 << i);
+    OS << ">";
+    break;
+  }
+  case Register:
+    OS << "<register " << getReg() << ">";
+    break;
+  case RegisterList:
+  case DPRRegisterList:
+  case SPRRegisterList: {
+    OS << "<register_list ";
+
+    const SmallVectorImpl<unsigned> &RegList = getRegList();
+    for (SmallVectorImpl<unsigned>::const_iterator
+           I = RegList.begin(), E = RegList.end(); I != E; ) {
+      OS << *I;
+      if (++I < E) OS << ", ";
+    }
+
+    OS << ">";
+    break;
+  }
+  case Token:
+    OS << "'" << getToken() << "'";
+    break;
+  }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
+bool ARMAsmParser::ParseRegister(unsigned &RegNo,
+                                 SMLoc &StartLoc, SMLoc &EndLoc) {
+  RegNo = TryParseRegister();
+
+  return (RegNo == (unsigned)-1);
+}
+
+/// Try to parse a register name.  The token must be an Identifier when called,
+/// and if it is a register name the token is eaten and the register number is
+/// returned.  Otherwise return -1.
+///
+int ARMAsmParser::TryParseRegister() {
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+  // FIXME: Validate register for the current architecture; we have to do
+  // validation later, so maybe there is no need for this here.
+  std::string upperCase = Tok.getString().str();
+  std::string lowerCase = LowercaseString(upperCase);
+  unsigned RegNum = MatchRegisterName(lowerCase);
+  if (!RegNum) {
+    RegNum = StringSwitch<unsigned>(lowerCase)
+      .Case("r13", ARM::SP)
+      .Case("r14", ARM::LR)
+      .Case("r15", ARM::PC)
+      .Case("ip", ARM::R12)
+      .Default(0);
+  }
+  if (!RegNum) return -1;
+
+  Parser.Lex(); // Eat identifier token.
+  return RegNum;
+}
+
+/// Try to parse a register name.  The token must be an Identifier when called.
+/// If it's a register, an AsmOperand is created. Another AsmOperand is created
+/// if there is a "writeback". 'true' if it's not a register.
+///
+/// TODO this is likely to change to allow different register types and or to
+/// parse for a specific register type.
+bool ARMAsmParser::
+TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  int RegNo = TryParseRegister();
+  if (RegNo == -1)
+    return true;
+
+  Operands.push_back(ARMOperand::CreateReg(RegNo, S, Parser.getTok().getLoc()));
+
+  const AsmToken &ExclaimTok = Parser.getTok();
+  if (ExclaimTok.is(AsmToken::Exclaim)) {
+    Operands.push_back(ARMOperand::CreateToken(ExclaimTok.getString(),
+                                               ExclaimTok.getLoc()));
+    Parser.Lex(); // Eat exclaim token
+  }
+
+  return false;
+}
+
+/// MatchCoprocessorOperandName - Try to parse an coprocessor related
+/// instruction with a symbolic operand name. Example: "p1", "p7", "c3",
+/// "c5", ...
+static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
+  // Use the same layout as the tablegen'erated register name matcher. Ugly,
+  // but efficient.
+  switch (Name.size()) {
+  default: break;
+  case 2:
+    if (Name[0] != CoprocOp)
+      return -1;
+    switch (Name[1]) {
+    default:  return -1;
+    case '0': return 0;
+    case '1': return 1;
+    case '2': return 2;
+    case '3': return 3;
+    case '4': return 4;
+    case '5': return 5;
+    case '6': return 6;
+    case '7': return 7;
+    case '8': return 8;
+    case '9': return 9;
+    }
+    break;
+  case 3:
+    if (Name[0] != CoprocOp || Name[1] != '1')
+      return -1;
+    switch (Name[2]) {
+    default:  return -1;
+    case '0': return 10;
+    case '1': return 11;
+    case '2': return 12;
+    case '3': return 13;
+    case '4': return 14;
+    case '5': return 15;
+    }
+    break;
+  }
+
+  return -1;
+}
+
+/// tryParseCoprocNumOperand - Try to parse an coprocessor number operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+  int Num = MatchCoprocessorOperandName(Tok.getString(), 'p');
+  if (Num == -1)
+    return MatchOperand_NoMatch;
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARMOperand::CreateCoprocNum(Num, S));
+  return MatchOperand_Success;
+}
+
+/// tryParseCoprocRegOperand - Try to parse an coprocessor register operand. The
+/// token must be an Identifier when called, and if it is a coprocessor
+/// number, the token is eaten and the operand is added to the operand list.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+
+  int Reg = MatchCoprocessorOperandName(Tok.getString(), 'c');
+  if (Reg == -1)
+    return MatchOperand_NoMatch;
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARMOperand::CreateCoprocReg(Reg, S));
+  return MatchOperand_Success;
+}
+
+/// Parse a register list, return it if successful else return null.  The first
+/// token must be a '{' when called.
+bool ARMAsmParser::
+ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  assert(Parser.getTok().is(AsmToken::LCurly) &&
+         "Token is not a Left Curly Brace");
+  SMLoc S = Parser.getTok().getLoc();
+
+  // Read the rest of the registers in the list.
+  unsigned PrevRegNum = 0;
+  SmallVector<std::pair<unsigned, SMLoc>, 32> Registers;
+
+  do {
+    bool IsRange = Parser.getTok().is(AsmToken::Minus);
+    Parser.Lex(); // Eat non-identifier token.
+
+    const AsmToken &RegTok = Parser.getTok();
+    SMLoc RegLoc = RegTok.getLoc();
+    if (RegTok.isNot(AsmToken::Identifier)) {
+      Error(RegLoc, "register expected");
+      return true;
+    }
+
+    int RegNum = TryParseRegister();
+    if (RegNum == -1) {
+      Error(RegLoc, "register expected");
+      return true;
+    }
+
+    if (IsRange) {
+      int Reg = PrevRegNum;
+      do {
+        ++Reg;
+        Registers.push_back(std::make_pair(Reg, RegLoc));
+      } while (Reg != RegNum);
+    } else {
+      Registers.push_back(std::make_pair(RegNum, RegLoc));
+    }
+
+    PrevRegNum = RegNum;
+  } while (Parser.getTok().is(AsmToken::Comma) ||
+           Parser.getTok().is(AsmToken::Minus));
+
+  // Process the right curly brace of the list.
+  const AsmToken &RCurlyTok = Parser.getTok();
+  if (RCurlyTok.isNot(AsmToken::RCurly)) {
+    Error(RCurlyTok.getLoc(), "'}' expected");
+    return true;
+  }
+
+  SMLoc E = RCurlyTok.getLoc();
+  Parser.Lex(); // Eat right curly brace token.
+
+  // Verify the register list.
+  SmallVectorImpl<std::pair<unsigned, SMLoc> >::const_iterator
+    RI = Registers.begin(), RE = Registers.end();
+
+  unsigned HighRegNum = getARMRegisterNumbering(RI->first);
+  bool EmittedWarning = false;
+
+  DenseMap<unsigned, bool> RegMap;
+  RegMap[HighRegNum] = true;
+
+  for (++RI; RI != RE; ++RI) {
+    const std::pair<unsigned, SMLoc> &RegInfo = *RI;
+    unsigned Reg = getARMRegisterNumbering(RegInfo.first);
+
+    if (RegMap[Reg]) {
+      Error(RegInfo.second, "register duplicated in register list");
+      return true;
+    }
+
+    if (!EmittedWarning && Reg < HighRegNum)
+      Warning(RegInfo.second,
+              "register not in ascending order in register list");
+
+    RegMap[Reg] = true;
+    HighRegNum = std::max(Reg, HighRegNum);
+  }
+
+  Operands.push_back(ARMOperand::CreateRegList(Registers, S, E));
+  return false;
+}
+
+/// tryParseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+  StringRef OptStr = Tok.getString();
+
+  unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()))
+    .Case("sy",    ARM_MB::SY)
+    .Case("st",    ARM_MB::ST)
+    .Case("ish",   ARM_MB::ISH)
+    .Case("ishst", ARM_MB::ISHST)
+    .Case("nsh",   ARM_MB::NSH)
+    .Case("nshst", ARM_MB::NSHST)
+    .Case("osh",   ARM_MB::OSH)
+    .Case("oshst", ARM_MB::OSHST)
+    .Default(~0U);
+
+  if (Opt == ~0U)
+    return MatchOperand_NoMatch;
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S));
+  return MatchOperand_Success;
+}
+
+/// tryParseProcIFlagsOperand - Try to parse iflags from CPS instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+  StringRef IFlagsStr = Tok.getString();
+
+  unsigned IFlags = 0;
+  for (int i = 0, e = IFlagsStr.size(); i != e; ++i) {
+    unsigned Flag = StringSwitch<unsigned>(IFlagsStr.substr(i, 1))
+    .Case("a", ARM_PROC::A)
+    .Case("i", ARM_PROC::I)
+    .Case("f", ARM_PROC::F)
+    .Default(~0U);
+
+    // If some specific iflag is already set, it means that some letter is
+    // present more than once, this is not acceptable.
+    if (Flag == ~0U || (IFlags & Flag))
+      return MatchOperand_NoMatch;
+
+    IFlags |= Flag;
+  }
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARMOperand::CreateProcIFlags((ARM_PROC::IFlags)IFlags, S));
+  return MatchOperand_Success;
+}
+
+/// tryParseMSRMaskOperand - Try to parse mask flags from MSR instruction.
+ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+tryParseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+  StringRef Mask = Tok.getString();
+
+  // Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf"
+  size_t Start = 0, Next = Mask.find('_');
+  StringRef Flags = "";
+  StringRef SpecReg = Mask.slice(Start, Next);
+  if (Next != StringRef::npos)
+    Flags = Mask.slice(Next+1, Mask.size());
+
+  // FlagsVal contains the complete mask:
+  // 3-0: Mask
+  // 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+  unsigned FlagsVal = 0;
+
+  if (SpecReg == "apsr") {
+    FlagsVal = StringSwitch<unsigned>(Flags)
+    .Case("nzcvq",  0x8) // same as CPSR_c
+    .Case("g",      0x4) // same as CPSR_s
+    .Case("nzcvqg", 0xc) // same as CPSR_fs
+    .Default(~0U);
+
+    if (FlagsVal == ~0U) {
+      if (!Flags.empty())
+        return MatchOperand_NoMatch;
+      else
+        FlagsVal = 0; // No flag
+    }
+  } else if (SpecReg == "cpsr" || SpecReg == "spsr") {
+    for (int i = 0, e = Flags.size(); i != e; ++i) {
+      unsigned Flag = StringSwitch<unsigned>(Flags.substr(i, 1))
+      .Case("c", 1)
+      .Case("x", 2)
+      .Case("s", 4)
+      .Case("f", 8)
+      .Default(~0U);
+
+      // If some specific flag is already set, it means that some letter is
+      // present more than once, this is not acceptable.
+      if (FlagsVal == ~0U || (FlagsVal & Flag))
+        return MatchOperand_NoMatch;
+      FlagsVal |= Flag;
+    }
+  } else // No match for special register.
+    return MatchOperand_NoMatch;
+
+  // Special register without flags are equivalent to "fc" flags.
+  if (!FlagsVal)
+    FlagsVal = 0x9;
+
+  // Bit 4: Special Reg (cpsr, apsr => 0; spsr => 1)
+  if (SpecReg == "spsr")
+    FlagsVal |= 16;
+
+  Parser.Lex(); // Eat identifier token.
+  Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
+  return MatchOperand_Success;
+}
+
+/// Parse an ARM memory expression, return false if successful else return true
+/// or an error.  The first token must be a '[' when called.
+///
+/// TODO Only preindexing and postindexing addressing are started, unindexed
+/// with option, etc are still to do.
+bool ARMAsmParser::
+ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  SMLoc S, E;
+  assert(Parser.getTok().is(AsmToken::LBrac) &&
+         "Token is not a Left Bracket");
+  S = Parser.getTok().getLoc();
+  Parser.Lex(); // Eat left bracket token.
+
+  const AsmToken &BaseRegTok = Parser.getTok();
+  if (BaseRegTok.isNot(AsmToken::Identifier)) {
+    Error(BaseRegTok.getLoc(), "register expected");
+    return true;
+  }
+  int BaseRegNum = TryParseRegister();
+  if (BaseRegNum == -1) {
+    Error(BaseRegTok.getLoc(), "register expected");
+    return true;
+  }
+
+  // The next token must either be a comma or a closing bracket.
+  const AsmToken &Tok = Parser.getTok();
+  if (!Tok.is(AsmToken::Comma) && !Tok.is(AsmToken::RBrac))
+    return true;
+
+  bool Preindexed = false;
+  bool Postindexed = false;
+  bool OffsetIsReg = false;
+  bool Negative = false;
+  bool Writeback = false;
+  ARMOperand *WBOp = 0;
+  int OffsetRegNum = -1;
+  bool OffsetRegShifted = false;
+  enum ShiftType ShiftType = Lsl;
+  const MCExpr *ShiftAmount = 0;
+  const MCExpr *Offset = 0;
+
+  // First look for preindexed address forms, that is after the "[Rn" we now
+  // have to see if the next token is a comma.
+  if (Tok.is(AsmToken::Comma)) {
+    Preindexed = true;
+    Parser.Lex(); // Eat comma token.
+
+    if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount,
+                             Offset, OffsetIsReg, OffsetRegNum, E))
+      return true;
+    const AsmToken &RBracTok = Parser.getTok();
+    if (RBracTok.isNot(AsmToken::RBrac)) {
+      Error(RBracTok.getLoc(), "']' expected");
+      return true;
+    }
+    E = RBracTok.getLoc();
+    Parser.Lex(); // Eat right bracket token.
+
+    const AsmToken &ExclaimTok = Parser.getTok();
+    if (ExclaimTok.is(AsmToken::Exclaim)) {
+      WBOp = ARMOperand::CreateToken(ExclaimTok.getString(),
+                                     ExclaimTok.getLoc());
+      Writeback = true;
+      Parser.Lex(); // Eat exclaim token
+    }
+  } else {
+    // The "[Rn" we have so far was not followed by a comma.
+
+    // If there's anything other than the right brace, this is a post indexing
+    // addressing form.
+    E = Tok.getLoc();
+    Parser.Lex(); // Eat right bracket token.
+
+    const AsmToken &NextTok = Parser.getTok();
+
+    if (NextTok.isNot(AsmToken::EndOfStatement)) {
+      Postindexed = true;
+      Writeback = true;
+
+      if (NextTok.isNot(AsmToken::Comma)) {
+        Error(NextTok.getLoc(), "',' expected");
+        return true;
+      }
+
+      Parser.Lex(); // Eat comma token.
+
+      if (ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
+                               ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
+                               E))
+        return true;
+    }
+  }
+
+  // Force Offset to exist if used.
+  if (!OffsetIsReg) {
+    if (!Offset)
+      Offset = MCConstantExpr::Create(0, getContext());
+  }
+
+  Operands.push_back(ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset,
+                                           OffsetRegNum, OffsetRegShifted,
+                                           ShiftType, ShiftAmount, Preindexed,
+                                           Postindexed, Negative, Writeback,
+                                           S, E));
+  if (WBOp)
+    Operands.push_back(WBOp);
+
+  return false;
+}
+
+/// Parse the offset of a memory operand after we have seen "[Rn," or "[Rn],"
+/// we will parse the following (were +/- means that a plus or minus is
+/// optional):
+///   +/-Rm
+///   +/-Rm, shift
+///   #offset
+/// we return false on success or an error otherwise.
+bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
+                                        bool &OffsetRegShifted,
+                                        enum ShiftType &ShiftType,
+                                        const MCExpr *&ShiftAmount,
+                                        const MCExpr *&Offset,
+                                        bool &OffsetIsReg,
+                                        int &OffsetRegNum,
+                                        SMLoc &E) {
+  Negative = false;
+  OffsetRegShifted = false;
+  OffsetIsReg = false;
+  OffsetRegNum = -1;
+  const AsmToken &NextTok = Parser.getTok();
+  E = NextTok.getLoc();
+  if (NextTok.is(AsmToken::Plus))
+    Parser.Lex(); // Eat plus token.
+  else if (NextTok.is(AsmToken::Minus)) {
+    Negative = true;
+    Parser.Lex(); // Eat minus token
+  }
+  // See if there is a register following the "[Rn," or "[Rn]," we have so far.
+  const AsmToken &OffsetRegTok = Parser.getTok();
+  if (OffsetRegTok.is(AsmToken::Identifier)) {
+    SMLoc CurLoc = OffsetRegTok.getLoc();
+    OffsetRegNum = TryParseRegister();
+    if (OffsetRegNum != -1) {
+      OffsetIsReg = true;
+      E = CurLoc;
+    }
+  }
+
+  // If we parsed a register as the offset then there can be a shift after that.
+  if (OffsetRegNum != -1) {
+    // Look for a comma then a shift
+    const AsmToken &Tok = Parser.getTok();
+    if (Tok.is(AsmToken::Comma)) {
+      Parser.Lex(); // Eat comma token.
+
+      const AsmToken &Tok = Parser.getTok();
+      if (ParseShift(ShiftType, ShiftAmount, E))
+        return Error(Tok.getLoc(), "shift expected");
+      OffsetRegShifted = true;
+    }
+  }
+  else { // the "[Rn," or "[Rn,]" we have so far was not followed by "Rm"
+    // Look for #offset following the "[Rn," or "[Rn],"
+    const AsmToken &HashTok = Parser.getTok();
+    if (HashTok.isNot(AsmToken::Hash))
+      return Error(HashTok.getLoc(), "'#' expected");
+
+    Parser.Lex(); // Eat hash token.
+
+    if (getParser().ParseExpression(Offset))
+     return true;
+    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  }
+  return false;
+}
+
+/// ParseShift as one of these two:
+///   ( lsl | lsr | asr | ror ) , # shift_amount
+///   rrx
+/// and returns true if it parses a shift otherwise it returns false.
+bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount,
+                              SMLoc &E) {
+  const AsmToken &Tok = Parser.getTok();
+  if (Tok.isNot(AsmToken::Identifier))
+    return true;
+  StringRef ShiftName = Tok.getString();
+  if (ShiftName == "lsl" || ShiftName == "LSL")
+    St = Lsl;
+  else if (ShiftName == "lsr" || ShiftName == "LSR")
+    St = Lsr;
+  else if (ShiftName == "asr" || ShiftName == "ASR")
+    St = Asr;
+  else if (ShiftName == "ror" || ShiftName == "ROR")
+    St = Ror;
+  else if (ShiftName == "rrx" || ShiftName == "RRX")
+    St = Rrx;
+  else
+    return true;
+  Parser.Lex(); // Eat shift type token.
+
+  // Rrx stands alone.
+  if (St == Rrx)
+    return false;
+
+  // Otherwise, there must be a '#' and a shift amount.
+  const AsmToken &HashTok = Parser.getTok();
+  if (HashTok.isNot(AsmToken::Hash))
+    return Error(HashTok.getLoc(), "'#' expected");
+  Parser.Lex(); // Eat hash token.
+
+  if (getParser().ParseExpression(ShiftAmount))
+    return true;
+
+  return false;
+}
+
+/// Parse a arm instruction operand.  For now this parses the operand regardless
+/// of the mnemonic.
+bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                                StringRef Mnemonic) {
+  SMLoc S, E;
+
+  // Check if the current operand has a custom associated parser, if so, try to
+  // custom parse the operand, or fallback to the general approach.
+  OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+  if (ResTy == MatchOperand_Success)
+    return false;
+  // If there wasn't a custom match, try the generic matcher below. Otherwise,
+  // there was a match, but an error occurred, in which case, just return that
+  // the operand parsing failed.
+  if (ResTy == MatchOperand_ParseFail)
+    return true;
+
+  switch (getLexer().getKind()) {
+  default:
+    Error(Parser.getTok().getLoc(), "unexpected token in operand");
+    return true;
+  case AsmToken::Identifier:
+    if (!TryParseRegisterWithWriteBack(Operands))
+      return false;
+
+    // Fall though for the Identifier case that is not a register or a
+    // special name.
+  case AsmToken::Integer: // things like 1f and 2b as a branch targets
+  case AsmToken::Dot: {   // . as a branch target
+    // This was not a register so parse other operands that start with an
+    // identifier (like labels) as expressions and create them as immediates.
+    const MCExpr *IdVal;
+    S = Parser.getTok().getLoc();
+    if (getParser().ParseExpression(IdVal))
+      return true;
+    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+    Operands.push_back(ARMOperand::CreateImm(IdVal, S, E));
+    return false;
+  }
+  case AsmToken::LBrac:
+    return ParseMemory(Operands);
+  case AsmToken::LCurly:
+    return ParseRegisterList(Operands);
+  case AsmToken::Hash:
+    // #42 -> immediate.
+    // TODO: ":lower16:" and ":upper16:" modifiers after # before immediate
+    S = Parser.getTok().getLoc();
+    Parser.Lex();
+    const MCExpr *ImmVal;
+    if (getParser().ParseExpression(ImmVal))
+      return true;
+    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+    Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E));
+    return false;
+  case AsmToken::Colon: {
+    // ":lower16:" and ":upper16:" expression prefixes
+    // FIXME: Check it's an expression prefix,
+    // e.g. (FOO - :lower16:BAR) isn't legal.
+    ARMMCExpr::VariantKind RefKind;
+    if (ParsePrefix(RefKind))
+      return true;
+
+    const MCExpr *SubExprVal;
+    if (getParser().ParseExpression(SubExprVal))
+      return true;
+
+    const MCExpr *ExprVal = ARMMCExpr::Create(RefKind, SubExprVal,
+                                                   getContext());
+    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+    Operands.push_back(ARMOperand::CreateImm(ExprVal, S, E));
+    return false;
+  }
+  }
+}
+
+// ParsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
+//  :lower16: and :upper16:.
+bool ARMAsmParser::ParsePrefix(ARMMCExpr::VariantKind &RefKind) {
+  RefKind = ARMMCExpr::VK_ARM_None;
+
+  // :lower16: and :upper16: modifiers
+  assert(getLexer().is(AsmToken::Colon) && "expected a :");
+  Parser.Lex(); // Eat ':'
+
+  if (getLexer().isNot(AsmToken::Identifier)) {
+    Error(Parser.getTok().getLoc(), "expected prefix identifier in operand");
+    return true;
+  }
+
+  StringRef IDVal = Parser.getTok().getIdentifier();
+  if (IDVal == "lower16") {
+    RefKind = ARMMCExpr::VK_ARM_LO16;
+  } else if (IDVal == "upper16") {
+    RefKind = ARMMCExpr::VK_ARM_HI16;
+  } else {
+    Error(Parser.getTok().getLoc(), "unexpected prefix in operand");
+    return true;
+  }
+  Parser.Lex();
+
+  if (getLexer().isNot(AsmToken::Colon)) {
+    Error(Parser.getTok().getLoc(), "unexpected token after prefix");
+    return true;
+  }
+  Parser.Lex(); // Eat the last ':'
+  return false;
+}
+
+const MCExpr *
+ARMAsmParser::ApplyPrefixToExpr(const MCExpr *E,
+                                MCSymbolRefExpr::VariantKind Variant) {
+  // Recurse over the given expression, rebuilding it to apply the given variant
+  // to the leftmost symbol.
+  if (Variant == MCSymbolRefExpr::VK_None)
+    return E;
+
+  switch (E->getKind()) {
+  case MCExpr::Target:
+    llvm_unreachable("Can't handle target expr yet");
+  case MCExpr::Constant:
+    llvm_unreachable("Can't handle lower16/upper16 of constant yet");
+
+  case MCExpr::SymbolRef: {
+    const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+
+    if (SRE->getKind() != MCSymbolRefExpr::VK_None)
+      return 0;
+
+    return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, getContext());
+  }
+
+  case MCExpr::Unary:
+    llvm_unreachable("Can't handle unary expressions yet");
+
+  case MCExpr::Binary: {
+    const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+    const MCExpr *LHS = ApplyPrefixToExpr(BE->getLHS(), Variant);
+    const MCExpr *RHS = BE->getRHS();
+    if (!LHS)
+      return 0;
+
+    return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext());
+  }
+  }
+
+  assert(0 && "Invalid expression kind!");
+  return 0;
+}
+
+/// \brief Given a mnemonic, split out possible predication code and carry
+/// setting letters to form a canonical mnemonic and flags.
+//
+// FIXME: Would be nice to autogen this.
+static StringRef SplitMnemonic(StringRef Mnemonic,
+                               unsigned &PredicationCode,
+                               bool &CarrySetting,
+                               unsigned &ProcessorIMod) {
+  PredicationCode = ARMCC::AL;
+  CarrySetting = false;
+  ProcessorIMod = 0;
+
+  // Ignore some mnemonics we know aren't predicated forms.
+  //
+  // FIXME: Would be nice to autogen this.
+  if (Mnemonic == "teq" || Mnemonic == "vceq" ||
+      Mnemonic == "movs" ||
+      Mnemonic == "svc" ||
+      (Mnemonic == "mls" || Mnemonic == "smmls" || Mnemonic == "vcls" ||
+       Mnemonic == "vmls" || Mnemonic == "vnmls") ||
+      Mnemonic == "vacge" || Mnemonic == "vcge" ||
+      Mnemonic == "vclt" ||
+      Mnemonic == "vacgt" || Mnemonic == "vcgt" ||
+      Mnemonic == "vcle" ||
+      (Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" ||
+       Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" ||
+       Mnemonic == "vqdmlal"))
+    return Mnemonic;
+
+  // First, split out any predication code.
+  unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2))
+    .Case("eq", ARMCC::EQ)
+    .Case("ne", ARMCC::NE)
+    .Case("hs", ARMCC::HS)
+    .Case("lo", ARMCC::LO)
+    .Case("mi", ARMCC::MI)
+    .Case("pl", ARMCC::PL)
+    .Case("vs", ARMCC::VS)
+    .Case("vc", ARMCC::VC)
+    .Case("hi", ARMCC::HI)
+    .Case("ls", ARMCC::LS)
+    .Case("ge", ARMCC::GE)
+    .Case("lt", ARMCC::LT)
+    .Case("gt", ARMCC::GT)
+    .Case("le", ARMCC::LE)
+    .Case("al", ARMCC::AL)
+    .Default(~0U);
+  if (CC != ~0U) {
+    Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2);
+    PredicationCode = CC;
+  }
+
+  // Next, determine if we have a carry setting bit. We explicitly ignore all
+  // the instructions we know end in 's'.
+  if (Mnemonic.endswith("s") &&
+      !(Mnemonic == "asrs" || Mnemonic == "cps" || Mnemonic == "mls" ||
+        Mnemonic == "movs" || Mnemonic == "mrs" || Mnemonic == "smmls" ||
+        Mnemonic == "vabs" || Mnemonic == "vcls" || Mnemonic == "vmls" ||
+        Mnemonic == "vmrs" || Mnemonic == "vnmls" || Mnemonic == "vqabs" ||
+        Mnemonic == "vrecps" || Mnemonic == "vrsqrts")) {
+    Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 1);
+    CarrySetting = true;
+  }
+
+  // The "cps" instruction can have a interrupt mode operand which is glued into
+  // the mnemonic. Check if this is the case, split it and parse the imod op
+  if (Mnemonic.startswith("cps")) {
+    // Split out any imod code.
+    unsigned IMod =
+      StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2, 2))
+      .Case("ie", ARM_PROC::IE)
+      .Case("id", ARM_PROC::ID)
+      .Default(~0U);
+    if (IMod != ~0U) {
+      Mnemonic = Mnemonic.slice(0, Mnemonic.size()-2);
+      ProcessorIMod = IMod;
+    }
+  }
+
+  return Mnemonic;
+}
+
+/// \brief Given a canonical mnemonic, determine if the instruction ever allows
+/// inclusion of carry set or predication code operands.
+//
+// FIXME: It would be nice to autogen this.
+void ARMAsmParser::
+GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
+                      bool &CanAcceptPredicationCode) {
+  bool isThumb = TM.getSubtarget<ARMSubtarget>().isThumb();
+
+  if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
+      Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
+      Mnemonic == "smull" || Mnemonic == "add" || Mnemonic == "adc" ||
+      Mnemonic == "mul" || Mnemonic == "bic" || Mnemonic == "asr" ||
+      Mnemonic == "umlal" || Mnemonic == "orr" || Mnemonic == "mov" ||
+      Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
+      Mnemonic == "sbc" || Mnemonic == "mla" || Mnemonic == "umull" ||
+      Mnemonic == "eor" || Mnemonic == "smlal" || Mnemonic == "mvn") {
+    CanAcceptCarrySet = true;
+  } else {
+    CanAcceptCarrySet = false;
+  }
+
+  if (Mnemonic == "cbnz" || Mnemonic == "setend" || Mnemonic == "dmb" ||
+      Mnemonic == "cps" || Mnemonic == "mcr2" || Mnemonic == "it" ||
+      Mnemonic == "mcrr2" || Mnemonic == "cbz" || Mnemonic == "cdp2" ||
+      Mnemonic == "trap" || Mnemonic == "mrc2" || Mnemonic == "mrrc2" ||
+      Mnemonic == "dsb" || Mnemonic == "movs" || Mnemonic == "isb" ||
+      Mnemonic == "clrex" || Mnemonic.startswith("cps")) {
+    CanAcceptPredicationCode = false;
+  } else {
+    CanAcceptPredicationCode = true;
+  }
+
+  if (isThumb)
+    if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" ||
+        Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp")
+      CanAcceptPredicationCode = false;
+}
+
+/// Parse an arm instruction mnemonic followed by its operands.
+bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // Create the leading tokens for the mnemonic, split by '.' characters.
+  size_t Start = 0, Next = Name.find('.');
+  StringRef Head = Name.slice(Start, Next);
+
+  // Split out the predication code and carry setting flag from the mnemonic.
+  unsigned PredicationCode;
+  unsigned ProcessorIMod;
+  bool CarrySetting;
+  Head = SplitMnemonic(Head, PredicationCode, CarrySetting,
+                       ProcessorIMod);
+
+  Operands.push_back(ARMOperand::CreateToken(Head, NameLoc));
+
+  // Next, add the CCOut and ConditionCode operands, if needed.
+  //
+  // For mnemonics which can ever incorporate a carry setting bit or predication
+  // code, our matching model involves us always generating CCOut and
+  // ConditionCode operands to match the mnemonic "as written" and then we let
+  // the matcher deal with finding the right instruction or generating an
+  // appropriate error.
+  bool CanAcceptCarrySet, CanAcceptPredicationCode;
+  GetMnemonicAcceptInfo(Head, CanAcceptCarrySet, CanAcceptPredicationCode);
+
+  // Add the carry setting operand, if necessary.
+  //
+  // FIXME: It would be awesome if we could somehow invent a location such that
+  // match errors on this operand would print a nice diagnostic about how the
+  // 's' character in the mnemonic resulted in a CCOut operand.
+  if (CanAcceptCarrySet) {
+    Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0,
+                                               NameLoc));
+  } else {
+    // This mnemonic can't ever accept a carry set, but the user wrote one (or
+    // misspelled another mnemonic).
+
+    // FIXME: Issue a nice error.
+  }
+
+  // Add the predication code operand, if necessary.
+  if (CanAcceptPredicationCode) {
+    Operands.push_back(ARMOperand::CreateCondCode(
+                         ARMCC::CondCodes(PredicationCode), NameLoc));
+  } else {
+    // This mnemonic can't ever accept a predication code, but the user wrote
+    // one (or misspelled another mnemonic).
+
+    // FIXME: Issue a nice error.
+  }
+
+  // Add the processor imod operand, if necessary.
+  if (ProcessorIMod) {
+    Operands.push_back(ARMOperand::CreateImm(
+          MCConstantExpr::Create(ProcessorIMod, getContext()),
+                                 NameLoc, NameLoc));
+  } else {
+    // This mnemonic can't ever accept a imod, but the user wrote
+    // one (or misspelled another mnemonic).
+
+    // FIXME: Issue a nice error.
+  }
+
+  // Add the remaining tokens in the mnemonic.
+  while (Next != StringRef::npos) {
+    Start = Next;
+    Next = Name.find('.', Start + 1);
+    StringRef ExtraToken = Name.slice(Start, Next);
+
+    Operands.push_back(ARMOperand::CreateToken(ExtraToken, NameLoc));
+  }
+
+  // Read the remaining operands.
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    // Read the first operand.
+    if (ParseOperand(Operands, Head)) {
+      Parser.EatToEndOfStatement();
+      return true;
+    }
+
+    while (getLexer().is(AsmToken::Comma)) {
+      Parser.Lex();  // Eat the comma.
+
+      // Parse and remember the operand.
+      if (ParseOperand(Operands, Head)) {
+        Parser.EatToEndOfStatement();
+        return true;
+      }
+    }
+  }
+
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    Parser.EatToEndOfStatement();
+    return TokError("unexpected token in argument list");
+  }
+
+  Parser.Lex(); // Consume the EndOfStatement
+  return false;
+}
+
+bool ARMAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        MCStreamer &Out) {
+  MCInst Inst;
+  unsigned ErrorInfo;
+  MatchResultTy MatchResult, MatchResult2;
+  MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+  if (MatchResult != Match_Success) {
+    // If we get a Match_InvalidOperand it might be some arithmetic instruction
+    // that does not update the condition codes.  So try adding a CCOut operand
+    // with a value of reg0.
+    if (MatchResult == Match_InvalidOperand) {
+      Operands.insert(Operands.begin() + 1,
+                      ARMOperand::CreateCCOut(0,
+                                  ((ARMOperand*)Operands[0])->getStartLoc()));
+      MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+      if (MatchResult2 == Match_Success)
+        MatchResult = Match_Success;
+      else {
+        ARMOperand *CCOut = ((ARMOperand*)Operands[1]);
+        Operands.erase(Operands.begin() + 1);
+        delete CCOut;
+      }
+    }
+    // If we get a Match_MnemonicFail it might be some arithmetic instruction
+    // that updates the condition codes if it ends in 's'.  So see if the
+    // mnemonic ends in 's' and if so try removing the 's' and adding a CCOut
+    // operand with a value of CPSR.
+    else if(MatchResult == Match_MnemonicFail) {
+      // Get the instruction mnemonic, which is the first token.
+      StringRef Mnemonic = ((ARMOperand*)Operands[0])->getToken();
+      if (Mnemonic.substr(Mnemonic.size()-1) == "s") {
+        // removed the 's' from the mnemonic for matching.
+        StringRef MnemonicNoS = Mnemonic.slice(0, Mnemonic.size() - 1);
+        SMLoc NameLoc = ((ARMOperand*)Operands[0])->getStartLoc();
+        ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]);
+        Operands.erase(Operands.begin());
+        delete OldMnemonic;
+        Operands.insert(Operands.begin(),
+                        ARMOperand::CreateToken(MnemonicNoS, NameLoc));
+        Operands.insert(Operands.begin() + 1,
+                        ARMOperand::CreateCCOut(ARM::CPSR, NameLoc));
+        MatchResult2 = MatchInstructionImpl(Operands, Inst, ErrorInfo);
+        if (MatchResult2 == Match_Success)
+          MatchResult = Match_Success;
+        else {
+          ARMOperand *OldMnemonic = ((ARMOperand*)Operands[0]);
+          Operands.erase(Operands.begin());
+          delete OldMnemonic;
+          Operands.insert(Operands.begin(),
+                          ARMOperand::CreateToken(Mnemonic, NameLoc));
+          ARMOperand *CCOut = ((ARMOperand*)Operands[1]);
+          Operands.erase(Operands.begin() + 1);
+          delete CCOut;
+        }
+      }
+    }
+  }
+  switch (MatchResult) {
+  case Match_Success:
+    Out.EmitInstruction(Inst);
+    return false;
+  case Match_MissingFeature:
+    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+    return true;
+  case Match_InvalidOperand: {
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+  case Match_MnemonicFail:
+    return Error(IDLoc, "unrecognized instruction mnemonic");
+  case Match_ConversionFail:
+    return Error(IDLoc, "unable to convert operands to instruction");
+  }
+
+  llvm_unreachable("Implement any new match types added!");
+  return true;
+}
+
+/// ParseDirective parses the arm specific directives
+bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if (IDVal == ".word")
+    return ParseDirectiveWord(4, DirectiveID.getLoc());
+  else if (IDVal == ".thumb")
+    return ParseDirectiveThumb(DirectiveID.getLoc());
+  else if (IDVal == ".thumb_func")
+    return ParseDirectiveThumbFunc(DirectiveID.getLoc());
+  else if (IDVal == ".code")
+    return ParseDirectiveCode(DirectiveID.getLoc());
+  else if (IDVal == ".syntax")
+    return ParseDirectiveSyntax(DirectiveID.getLoc());
+  return true;
+}
+
+/// ParseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool ARMAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().ParseExpression(Value))
+        return true;
+
+      getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      Parser.Lex();
+    }
+  }
+
+  Parser.Lex();
+  return false;
+}
+
+/// ParseDirectiveThumb
+///  ::= .thumb
+bool ARMAsmParser::ParseDirectiveThumb(SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(L, "unexpected token in directive");
+  Parser.Lex();
+
+  // TODO: set thumb mode
+  // TODO: tell the MC streamer the mode
+  // getParser().getStreamer().Emit???();
+  return false;
+}
+
+/// ParseDirectiveThumbFunc
+///  ::= .thumbfunc symbol_name
+bool ARMAsmParser::ParseDirectiveThumbFunc(SMLoc L) {
+  const AsmToken &Tok = Parser.getTok();
+  if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String))
+    return Error(L, "unexpected token in .thumb_func directive");
+  StringRef Name = Tok.getString();
+  Parser.Lex(); // Consume the identifier token.
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(L, "unexpected token in directive");
+  Parser.Lex();
+
+  // Mark symbol as a thumb symbol.
+  MCSymbol *Func = getParser().getContext().GetOrCreateSymbol(Name);
+  getParser().getStreamer().EmitThumbFunc(Func);
+  return false;
+}
+
+/// ParseDirectiveSyntax
+///  ::= .syntax unified | divided
+bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
+  const AsmToken &Tok = Parser.getTok();
+  if (Tok.isNot(AsmToken::Identifier))
+    return Error(L, "unexpected token in .syntax directive");
+  StringRef Mode = Tok.getString();
+  if (Mode == "unified" || Mode == "UNIFIED")
+    Parser.Lex();
+  else if (Mode == "divided" || Mode == "DIVIDED")
+    return Error(L, "'.syntax divided' arm asssembly not supported");
+  else
+    return Error(L, "unrecognized syntax mode in .syntax directive");
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(Parser.getTok().getLoc(), "unexpected token in directive");
+  Parser.Lex();
+
+  // TODO tell the MC streamer the mode
+  // getParser().getStreamer().Emit???();
+  return false;
+}
+
+/// ParseDirectiveCode
+///  ::= .code 16 | 32
+bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
+  const AsmToken &Tok = Parser.getTok();
+  if (Tok.isNot(AsmToken::Integer))
+    return Error(L, "unexpected token in .code directive");
+  int64_t Val = Parser.getTok().getIntVal();
+  if (Val == 16)
+    Parser.Lex();
+  else if (Val == 32)
+    Parser.Lex();
+  else
+    return Error(L, "invalid operand to .code directive");
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(Parser.getTok().getLoc(), "unexpected token in directive");
+  Parser.Lex();
+
+  // FIXME: We need to be able switch subtargets at this point so that
+  // MatchInstructionImpl() will work when it gets the AvailableFeatures which
+  // includes Feature_IsThumb or not to match the right instructions.  This is
+  // blocked on the FIXME in llvm-mc.cpp when creating the TargetMachine.
+  if (Val == 16){
+    assert(TM.getSubtarget<ARMSubtarget>().isThumb() &&
+	   "switching between arm/thumb not yet suppported via .code 16)");
+    getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+  }
+  else{
+    assert(!TM.getSubtarget<ARMSubtarget>().isThumb() &&
+           "switching between thumb/arm not yet suppported via .code 32)");
+    getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
+   }
+
+  return false;
+}
+
+extern "C" void LLVMInitializeARMAsmLexer();
+
+/// Force static initialization.
+extern "C" void LLVMInitializeARMAsmParser() {
+  RegisterAsmParser<ARMAsmParser> X(TheARMTarget);
+  RegisterAsmParser<ARMAsmParser> Y(TheThumbTarget);
+  LLVMInitializeARMAsmLexer();
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "ARMGenAsmMatcher.inc"
diff --git a/final/lib/Target/ARM/AsmParser/CMakeLists.txt b/final/lib/Target/ARM/AsmParser/CMakeLists.txt
new file mode 100644
index 00000000000..9ba7c0125d7
--- /dev/null
+++ b/final/lib/Target/ARM/AsmParser/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMARMAsmParser
+  ARMAsmLexer.cpp
+  ARMAsmParser.cpp
+  )
+
diff --git a/final/lib/Target/ARM/AsmParser/Makefile b/final/lib/Target/ARM/AsmParser/Makefile
new file mode 100644
index 00000000000..841516fffbd
--- /dev/null
+++ b/final/lib/Target/ARM/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMAsmParser
+
+# Hack: we need to include 'main' ARM target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/ARM/CMakeLists.txt b/final/lib/Target/ARM/CMakeLists.txt
new file mode 100644
index 00000000000..d3b8b54e76b
--- /dev/null
+++ b/final/lib/Target/ARM/CMakeLists.txt
@@ -0,0 +1,69 @@
+set(LLVM_TARGET_DEFINITIONS ARM.td)
+
+tablegen(ARMGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(ARMGenRegisterNames.inc -gen-register-enums)
+tablegen(ARMGenRegisterInfo.inc -gen-register-desc)
+tablegen(ARMGenInstrNames.inc -gen-instr-enums)
+tablegen(ARMGenInstrInfo.inc -gen-instr-desc)
+tablegen(ARMGenCodeEmitter.inc -gen-emitter)
+tablegen(ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
+tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher)
+tablegen(ARMGenDAGISel.inc -gen-dag-isel)
+tablegen(ARMGenFastISel.inc -gen-fast-isel)
+tablegen(ARMGenCallingConv.inc -gen-callingconv)
+tablegen(ARMGenSubtarget.inc -gen-subtarget)
+tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
+tablegen(ARMGenDecoderTables.inc -gen-arm-decoder)
+
+add_llvm_target(ARMCodeGen
+  ARMAsmBackend.cpp
+  ARMAsmPrinter.cpp
+  ARMBaseInstrInfo.cpp
+  ARMBaseRegisterInfo.cpp
+  ARMCodeEmitter.cpp
+  ARMConstantIslandPass.cpp
+  ARMConstantPoolValue.cpp
+  ARMELFWriterInfo.cpp
+  ARMExpandPseudoInsts.cpp
+  ARMFastISel.cpp
+  ARMFrameLowering.cpp
+  ARMGlobalMerge.cpp
+  ARMHazardRecognizer.cpp
+  ARMISelDAGToDAG.cpp
+  ARMISelLowering.cpp
+  ARMInstrInfo.cpp
+  ARMJITInfo.cpp
+  ARMMCCodeEmitter.cpp
+  ARMMCExpr.cpp
+  ARMLoadStoreOptimizer.cpp
+  ARMMCAsmInfo.cpp
+  ARMMCInstLower.cpp
+  ARMRegisterInfo.cpp
+  ARMSelectionDAGInfo.cpp
+  ARMSubtarget.cpp
+  ARMTargetMachine.cpp
+  ARMTargetObjectFile.cpp
+  MLxExpansionPass.cpp
+  NEONMoveFix.cpp
+  Thumb1InstrInfo.cpp
+  Thumb1FrameLowering.cpp
+  Thumb1RegisterInfo.cpp
+  Thumb2ITBlockPass.cpp
+  Thumb2InstrInfo.cpp
+  Thumb2RegisterInfo.cpp
+  Thumb2SizeReduction.cpp
+  )
+
+# workaround for hanging compilation on MSVC10
+if( MSVC_VERSION EQUAL 1600 )
+set_property(
+  SOURCE ARMISelLowering.cpp
+  PROPERTY COMPILE_FLAGS "/Od"
+  )
+endif()
+
+add_subdirectory(TargetInfo)
+add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
diff --git a/final/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/final/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
new file mode 100644
index 00000000000..78d73d3a272
--- /dev/null
+++ b/final/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -0,0 +1,548 @@
+//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains code to implement the public interfaces of ARMDisassembler and
+// ThumbDisassembler, both of which are instances of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "ARMDisassembler.h"
+#include "ARMDisassemblerCore.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+//#define DEBUG(X) do { X; } while (0)
+
+/// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from
+/// ARMDecoderEmitter.cpp TableGen backend.  It contains:
+///
+/// o Mappings from opcode to ARM/Thumb instruction format
+///
+/// o static uint16_t decodeInstruction(uint32_t insn) - the decoding function
+/// for an ARM instruction.
+///
+/// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding
+/// function for a Thumb instruction.
+///
+#include "ARMGenDecoderTables.inc"
+
+#include "ARMGenEDInfo.inc"
+
+using namespace llvm;
+
+/// showBitVector - Use the raw_ostream to log a diagnostic message describing
+/// the inidividual bits of the instruction.
+///
+static inline void showBitVector(raw_ostream &os, const uint32_t &insn) {
+  // Split the bit position markers into more than one lines to fit 80 columns.
+  os << " 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11"
+     << " 10  9  8  7  6  5  4  3  2  1  0 \n";
+  os << "---------------------------------------------------------------"
+     << "----------------------------------\n";
+  os << '|';
+  for (unsigned i = 32; i != 0; --i) {
+    if (insn >> (i - 1) & 0x01)
+      os << " 1";
+    else
+      os << " 0";
+    os << (i%4 == 1 ? '|' : ':');
+  }
+  os << '\n';
+  // Split the bit position markers into more than one lines to fit 80 columns.
+  os << "---------------------------------------------------------------"
+     << "----------------------------------\n";
+  os << '\n';
+}
+
+/// decodeARMInstruction is a decorator function which tries special cases of
+/// instruction matching before calling the auto-generated decoder function.
+static unsigned decodeARMInstruction(uint32_t &insn) {
+  if (slice(insn, 31, 28) == 15)
+    goto AutoGenedDecoder;
+
+  // Special case processing, if any, goes here....
+
+  // LLVM combines the offset mode of A8.6.197 & A8.6.198 into STRB.
+  // The insufficient encoding information of the combined instruction confuses
+  // the decoder wrt BFC/BFI.  Therefore, we try to recover here.
+  // For BFC, Inst{27-21} = 0b0111110 & Inst{6-0} = 0b0011111.
+  // For BFI, Inst{27-21} = 0b0111110 & Inst{6-4} = 0b001 & Inst{3-0} =! 0b1111.
+  if (slice(insn, 27, 21) == 0x3e && slice(insn, 6, 4) == 1) {
+    if (slice(insn, 3, 0) == 15)
+      return ARM::BFC;
+    else
+      return ARM::BFI;
+  }
+
+  // Ditto for STRBT, which is a super-instruction for A8.6.199 Encodings
+  // A1 & A2.
+  // As a result, the decoder fails to deocode USAT properly.
+  if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1)
+    return ARM::USAT;
+
+  // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
+  // As a result, the decoder fails to decode UMULL properly.
+  if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) {
+    return ARM::UMULL;
+  }
+
+  // Ditto for STR_PRE, which is a super-instruction for A8.6.194 & A8.6.195.
+  // As a result, the decoder fails to decode SBFX properly.
+  if (slice(insn, 27, 21) == 0x3d && slice(insn, 6, 4) == 5)
+    return ARM::SBFX;
+
+  // And STRB_PRE, which is a super-instruction for A8.6.197 & A8.6.198.
+  // As a result, the decoder fails to decode UBFX properly.
+  if (slice(insn, 27, 21) == 0x3f && slice(insn, 6, 4) == 5)
+    return ARM::UBFX;
+
+  // Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2.
+  // As a result, the decoder fails to deocode SSAT properly.
+  if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1)
+    return ARM::SSAT;
+
+  // Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147.
+  // As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT.
+  if (slice(insn, 27, 24) == 0) {
+    switch (slice(insn, 21, 20)) {
+    case 2:
+      switch (slice(insn, 7, 4)) {
+      case 11:
+        return ARM::STRHT;
+      default:
+        break; // fallthrough
+      }
+      break;
+    case 3:
+      switch (slice(insn, 7, 4)) {
+      case 11:
+        return ARM::LDRHT;
+      case 13:
+        return ARM::LDRSBT;
+      case 15:
+        return ARM::LDRSHT;
+      default:
+        break; // fallthrough
+      }
+      break;
+    default:
+      break;   // fallthrough
+    }
+  }
+
+  // Ditto for SBCrs, which is a super-instruction for A8.6.152 & A8.6.153.
+  // As a result, the decoder fails to decode STRH_Post/LDRD_POST/STRD_POST
+  // properly.
+  if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 0) {
+    unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
+    switch (slice(insn, 7, 4)) {
+    case 11:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::STRH;
+      case 3: // Pre-indexed
+        return ARM::STRH_PRE;
+      case 0: // Post-indexed
+        return ARM::STRH_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    case 13:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::LDRD;
+      case 3: // Pre-indexed
+        return ARM::LDRD_PRE;
+      case 0: // Post-indexed
+        return ARM::LDRD_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    case 15:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::STRD;
+      case 3: // Pre-indexed
+        return ARM::STRD_PRE;
+      case 0: // Post-indexed
+        return ARM::STRD_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    default:
+      break; // fallthrough
+    }
+  }
+
+  // Ditto for SBCSSrs, which is a super-instruction for A8.6.152 & A8.6.153.
+  // As a result, the decoder fails to decode LDRH_POST/LDRSB_POST/LDRSH_POST
+  // properly.
+  if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 1) {
+    unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
+    switch (slice(insn, 7, 4)) {
+    case 11:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::LDRH;
+      case 3: // Pre-indexed
+        return ARM::LDRH_PRE;
+      case 0: // Post-indexed
+        return ARM::LDRH_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    case 13:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::LDRSB;
+      case 3: // Pre-indexed
+        return ARM::LDRSB_PRE;
+      case 0: // Post-indexed
+        return ARM::LDRSB_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    case 15:
+      switch (PW) {
+      case 2: // Offset
+        return ARM::LDRSH;
+      case 3: // Pre-indexed
+        return ARM::LDRSH_PRE;
+      case 0: // Post-indexed
+        return ARM::LDRSH_POST;
+      default:
+        break; // fallthrough
+      }
+      break;
+    default:
+      break; // fallthrough
+    }
+  }
+
+AutoGenedDecoder:
+  // Calling the auto-generated decoder function.
+  return decodeInstruction(insn);
+}
+
+// Helper function for special case handling of LDR (literal) and friends.
+// See, for example, A6.3.7 Load word: Table A6-18 Load word.
+// See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
+// before returning it.
+static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return Opcode; // Return unmorphed opcode.
+
+  case ARM::t2LDR_POST:   case ARM::t2LDR_PRE:
+  case ARM::t2LDRi12:     case ARM::t2LDRi8:
+  case ARM::t2LDRs:       case ARM::t2LDRT:
+    return ARM::t2LDRpci;
+
+  case ARM::t2LDRB_POST:  case ARM::t2LDRB_PRE:
+  case ARM::t2LDRBi12:    case ARM::t2LDRBi8:
+  case ARM::t2LDRBs:      case ARM::t2LDRBT:
+    return ARM::t2LDRBpci;
+
+  case ARM::t2LDRH_POST:  case ARM::t2LDRH_PRE:
+  case ARM::t2LDRHi12:    case ARM::t2LDRHi8:
+  case ARM::t2LDRHs:      case ARM::t2LDRHT:
+    return ARM::t2LDRHpci;
+
+  case ARM::t2LDRSB_POST:  case ARM::t2LDRSB_PRE:
+  case ARM::t2LDRSBi12:    case ARM::t2LDRSBi8:
+  case ARM::t2LDRSBs:      case ARM::t2LDRSBT:
+    return ARM::t2LDRSBpci;
+
+  case ARM::t2LDRSH_POST:  case ARM::t2LDRSH_PRE:
+  case ARM::t2LDRSHi12:    case ARM::t2LDRSHi8:
+  case ARM::t2LDRSHs:      case ARM::t2LDRSHT:
+    return ARM::t2LDRSHpci;
+  }
+}
+
+/// decodeThumbSideEffect is a decorator function which can potentially twiddle
+/// the instruction or morph the returned opcode under Thumb2.
+///
+/// First it checks whether the insn is a NEON or VFP instr; if true, bit
+/// twiddling could be performed on insn to turn it into an ARM NEON/VFP
+/// equivalent instruction and decodeInstruction is called with the transformed
+/// insn.
+///
+/// Next, there is special handling for Load byte/halfword/word instruction by
+/// checking whether Rn=0b1111 and call T2Morph2LoadLiteral() on the decoded
+/// Thumb2 instruction.  See comments below for further details.
+///
+/// Finally, one last check is made to see whether the insn is a NEON/VFP and
+/// decodeInstruction(insn) is invoked on the original insn.
+///
+/// Otherwise, decodeThumbInstruction is called with the original insn.
+static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) {
+  if (IsThumb2) {
+    uint16_t op1 = slice(insn, 28, 27);
+    uint16_t op2 = slice(insn, 26, 20);
+
+    // A6.3 32-bit Thumb instruction encoding
+    // Table A6-9 32-bit Thumb instruction encoding
+
+    // The coprocessor instructions of interest are transformed to their ARM
+    // equivalents.
+
+    // --------- Transform Begin Marker ---------
+    if ((op1 == 1 || op1 == 3) && slice(op2, 6, 4) == 7) {
+      // A7.4 Advanced SIMD data-processing instructions
+      // U bit of Thumb corresponds to Inst{24} of ARM.
+      uint16_t U = slice(op1, 1, 1);
+
+      // Inst{28-24} of ARM = {1,0,0,1,U};
+      uint16_t bits28_24 = 9 << 1 | U;
+      DEBUG(showBitVector(errs(), insn));
+      setSlice(insn, 28, 24, bits28_24);
+      return decodeInstruction(insn);
+    }
+
+    if (op1 == 3 && slice(op2, 6, 4) == 1 && slice(op2, 0, 0) == 0) {
+      // A7.7 Advanced SIMD element or structure load/store instructions
+      // Inst{27-24} of Thumb = 0b1001
+      // Inst{27-24} of ARM   = 0b0100
+      DEBUG(showBitVector(errs(), insn));
+      setSlice(insn, 27, 24, 4);
+      return decodeInstruction(insn);
+    }
+    // --------- Transform End Marker ---------
+
+    // See, for example, A6.3.7 Load word: Table A6-18 Load word.
+    // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
+    // before returning it to our caller.
+    if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1
+        && slice(insn, 19, 16) == 15)
+      return T2Morph2LoadLiteral(decodeThumbInstruction(insn));
+
+    // One last check for NEON/VFP instructions.
+    if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1)
+      return decodeInstruction(insn);
+
+    // Fall through.
+  }
+
+  return decodeThumbInstruction(insn);
+}
+
+//
+// Public interface for the disassembler
+//
+
+bool ARMDisassembler::getInstruction(MCInst &MI,
+                                     uint64_t &Size,
+                                     const MemoryObject &Region,
+                                     uint64_t Address,
+                                     raw_ostream &os) const {
+  // The machine instruction.
+  uint32_t insn;
+  uint8_t bytes[4];
+
+  // We want to read exactly 4 bytes of data.
+  if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1)
+    return false;
+
+  // Encoded as a small-endian 32-bit word in the stream.
+  insn = (bytes[3] << 24) |
+         (bytes[2] << 16) |
+         (bytes[1] <<  8) |
+         (bytes[0] <<  0);
+
+  unsigned Opcode = decodeARMInstruction(insn);
+  ARMFormat Format = ARMFormats[Opcode];
+  Size = 4;
+
+  DEBUG({
+      errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
+             << " Format=" << stringForARMFormat(Format) << '(' << (int)Format
+             << ")\n";
+      showBitVector(errs(), insn);
+    });
+
+  ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format);
+  if (!Builder)
+    return false;
+
+  if (!Builder->Build(MI, insn))
+    return false;
+
+  delete Builder;
+
+  return true;
+}
+
+bool ThumbDisassembler::getInstruction(MCInst &MI,
+                                       uint64_t &Size,
+                                       const MemoryObject &Region,
+                                       uint64_t Address,
+                                       raw_ostream &os) const {
+  // The Thumb instruction stream is a sequence of halhwords.
+
+  // This represents the first halfword as well as the machine instruction
+  // passed to decodeThumbInstruction().  For 16-bit Thumb instruction, the top
+  // halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to
+  // the top half followed by the second halfword.
+  unsigned insn = 0;
+  // Possible second halfword.
+  uint16_t insn1 = 0;
+
+  // A6.1 Thumb instruction set encoding
+  //
+  // If bits [15:11] of the halfword being decoded take any of the following
+  // values, the halfword is the first halfword of a 32-bit instruction:
+  // o 0b11101
+  // o 0b11110
+  // o 0b11111.
+  //
+  // Otherwise, the halfword is a 16-bit instruction.
+
+  // Read 2 bytes of data first.
+  uint8_t bytes[2];
+  if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1)
+    return false;
+
+  // Encoded as a small-endian 16-bit halfword in the stream.
+  insn = (bytes[1] << 8) | bytes[0];
+  unsigned bits15_11 = slice(insn, 15, 11);
+  bool IsThumb2 = false;
+
+  // 32-bit instructions if the bits [15:11] of the halfword matches
+  // { 0b11101 /* 0x1D */, 0b11110 /* 0x1E */, ob11111 /* 0x1F */ }.
+  if (bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) {
+    IsThumb2 = true;
+    if (Region.readBytes(Address + 2, 2, (uint8_t*)bytes, NULL) == -1)
+      return false;
+    // Encoded as a small-endian 16-bit halfword in the stream.
+    insn1 = (bytes[1] << 8) | bytes[0];
+    insn = (insn << 16 | insn1);
+  }
+
+  // The insn could potentially be bit-twiddled in order to be decoded as an ARM
+  // NEON/VFP opcode.  In such case, the modified insn is later disassembled as
+  // an ARM NEON/VFP instruction.
+  //
+  // This is a short term solution for lack of encoding bits specified for the
+  // Thumb2 NEON/VFP instructions.  The long term solution could be adding some
+  // infrastructure to have each instruction support more than one encodings.
+  // Which encoding is used would be based on which subtarget the compiler/
+  // disassembler is working with at the time.  This would allow the sharing of
+  // the NEON patterns between ARM and Thumb2, as well as potential greater
+  // sharing between the regular ARM instructions and the 32-bit wide Thumb2
+  // instructions as well.
+  unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn);
+
+  ARMFormat Format = ARMFormats[Opcode];
+  Size = IsThumb2 ? 4 : 2;
+
+  DEBUG({
+      errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
+             << " Format=" << stringForARMFormat(Format) << '(' << (int)Format
+             << ")\n";
+      showBitVector(errs(), insn);
+    });
+
+  ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format);
+  if (!Builder)
+    return false;
+
+  Builder->SetSession(const_cast<Session *>(&SO));
+
+  if (!Builder->Build(MI, insn))
+    return false;
+
+  delete Builder;
+
+  return true;
+}
+
+// A8.6.50
+// Valid return values are {1, 2, 3, 4}, with 0 signifying an error condition.
+static unsigned short CountITSize(unsigned ITMask) {
+  // First count the trailing zeros of the IT mask.
+  unsigned TZ = CountTrailingZeros_32(ITMask);
+  if (TZ > 3) {
+    DEBUG(errs() << "Encoding error: IT Mask '0000'");
+    return 0;
+  }
+  return (4 - TZ);
+}
+
+/// Init ITState.  Note that at least one bit is always 1 in mask.
+bool Session::InitIT(unsigned short bits7_0) {
+  ITCounter = CountITSize(slice(bits7_0, 3, 0));
+  if (ITCounter == 0)
+    return false;
+
+  // A8.6.50 IT
+  unsigned short FirstCond = slice(bits7_0, 7, 4);
+  if (FirstCond == 0xF) {
+    DEBUG(errs() << "Encoding error: IT FirstCond '1111'");
+    return false;
+  }
+  if (FirstCond == 0xE && ITCounter != 1) {
+    DEBUG(errs() << "Encoding error: IT FirstCond '1110' && Mask != '1000'");
+    return false;
+  }
+
+  ITState = bits7_0;
+
+  return true;
+}
+
+/// Update ITState if necessary.
+void Session::UpdateIT() {
+  assert(ITCounter);
+  --ITCounter;
+  if (ITCounter == 0)
+    ITState = 0;
+  else {
+    unsigned short NewITState4_0 = slice(ITState, 4, 0) << 1;
+    setSlice(ITState, 4, 0, NewITState4_0);
+  }
+}
+
+static MCDisassembler *createARMDisassembler(const Target &T) {
+  return new ARMDisassembler;
+}
+
+static MCDisassembler *createThumbDisassembler(const Target &T) {
+  return new ThumbDisassembler;
+}
+
+extern "C" void LLVMInitializeARMDisassembler() {
+  // Register the disassembler.
+  TargetRegistry::RegisterMCDisassembler(TheARMTarget,
+                                         createARMDisassembler);
+  TargetRegistry::RegisterMCDisassembler(TheThumbTarget,
+                                         createThumbDisassembler);
+}
+
+EDInstInfo *ARMDisassembler::getEDInfo() const {
+  return instInfoARM;
+}
+
+EDInstInfo *ThumbDisassembler::getEDInfo() const {
+  return instInfoARM;
+}
diff --git a/final/lib/Target/ARM/Disassembler/ARMDisassembler.h b/final/lib/Target/ARM/Disassembler/ARMDisassembler.h
new file mode 100644
index 00000000000..0a74a3866ee
--- /dev/null
+++ b/final/lib/Target/ARM/Disassembler/ARMDisassembler.h
@@ -0,0 +1,99 @@
+//===- ARMDisassembler.h - Disassembler for ARM/Thumb ISA -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains the header for ARMDisassembler and ThumbDisassembler, both are
+// subclasses of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMDISASSEMBLER_H
+#define ARMDISASSEMBLER_H
+
+#include "llvm/MC/MCDisassembler.h"
+
+namespace llvm {
+  
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+  
+struct EDInstInfo;
+  
+/// ARMDisassembler - ARM disassembler for all ARM platforms.
+class ARMDisassembler : public MCDisassembler {
+public:
+  /// Constructor     - Initializes the disassembler.
+  ///
+  ARMDisassembler() :
+    MCDisassembler() {
+  }
+
+  ~ARMDisassembler() {
+  }
+
+  /// getInstruction - See MCDisassembler.
+  bool getInstruction(MCInst &instr,
+                      uint64_t &size,
+                      const MemoryObject &region,
+                      uint64_t address,
+                      raw_ostream &vStream) const;
+  
+  /// getEDInfo - See MCDisassembler.
+  EDInstInfo *getEDInfo() const;
+private:
+};
+
+// Forward declaration.
+class ARMBasicMCBuilder;
+
+/// Session - Keep track of the IT Block progression.
+class Session {
+  friend class ARMBasicMCBuilder;
+public:
+  Session() : ITCounter(0), ITState(0) {}
+  ~Session() {}
+  /// InitIT - Initializes ITCounter/ITState.
+  bool InitIT(unsigned short bits7_0);
+  /// UpdateIT - Updates ITCounter/ITState as IT Block progresses.
+  void UpdateIT();
+
+private:
+  unsigned ITCounter; // Possible values: 0, 1, 2, 3, 4.
+  unsigned ITState;   // A2.5.2 Consists of IT[7:5] and IT[4:0] initially.
+};
+
+/// ThumbDisassembler - Thumb disassembler for all ARM platforms.
+class ThumbDisassembler : public MCDisassembler {
+public:
+  /// Constructor     - Initializes the disassembler.
+  ///
+  ThumbDisassembler() :
+    MCDisassembler(), SO() {
+  }
+
+  ~ThumbDisassembler() {
+  }
+
+  /// getInstruction - See MCDisassembler.
+  bool getInstruction(MCInst &instr,
+                      uint64_t &size,
+                      const MemoryObject &region,
+                      uint64_t address,
+                      raw_ostream &vStream) const;
+  
+  /// getEDInfo - See MCDisassembler.
+  EDInstInfo *getEDInfo() const;
+private:
+  Session SO;
+};
+
+} // namespace llvm
+  
+#endif
diff --git a/final/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/final/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
new file mode 100644
index 00000000000..32c41fc479d
--- /dev/null
+++ b/final/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -0,0 +1,3247 @@
+//===- ARMDisassemblerCore.cpp - ARM disassembler helpers -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains code to represent the core concepts of Builder and DisassembleFP
+// to solve the problem of disassembling an ARM instr.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-disassembler"
+
+#include "ARMDisassemblerCore.h"
+#include "ARMAddressingModes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+//#define DEBUG(X) do { X; } while (0)
+
+/// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const
+/// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s
+/// describing the operand info for each ARMInsts[i].
+///
+/// Together with an instruction's encoding format, we can take advantage of the
+/// NumOperands and the OpInfo fields of the target instruction description in
+/// the quest to build out the MCOperand list for an MCInst.
+///
+/// The general guideline is that with a known format, the number of dst and src
+/// operands are well-known.  The dst is built first, followed by the src
+/// operand(s).  The operands not yet used at this point are for the Implicit
+/// Uses and Defs by this instr.  For the Uses part, the pred:$p operand is
+/// defined with two components:
+///
+/// def pred { // Operand PredicateOperand
+///   ValueType Type = OtherVT;
+///   string PrintMethod = "printPredicateOperand";
+///   string AsmOperandLowerMethod = ?;
+///   dag MIOperandInfo = (ops i32imm, CCR);
+///   AsmOperandClass ParserMatchClass = ImmAsmOperand;
+///   dag DefaultOps = (ops (i32 14), (i32 zero_reg));
+/// }
+///
+/// which is manifested by the TargetOperandInfo[] of:
+///
+/// { 0, 0|(1<<TOI::Predicate), 0 },
+/// { ARM::CCRRegClassID, 0|(1<<TOI::Predicate), 0 }
+///
+/// So the first predicate MCOperand corresponds to the immediate part of the
+/// ARM condition field (Inst{31-28}), and the second predicate MCOperand
+/// corresponds to a register kind of ARM::CPSR.
+///
+/// For the Defs part, in the simple case of only cc_out:$s, we have:
+///
+/// def cc_out { // Operand OptionalDefOperand
+///   ValueType Type = OtherVT;
+///   string PrintMethod = "printSBitModifierOperand";
+///   string AsmOperandLowerMethod = ?;
+///   dag MIOperandInfo = (ops CCR);
+///   AsmOperandClass ParserMatchClass = ImmAsmOperand;
+///   dag DefaultOps = (ops (i32 zero_reg));
+/// }
+///
+/// which is manifested by the one TargetOperandInfo of:
+///
+/// { ARM::CCRRegClassID, 0|(1<<TOI::OptionalDef), 0 }
+///
+/// And this maps to one MCOperand with the regsiter kind of ARM::CPSR.
+#include "ARMGenInstrInfo.inc"
+
+using namespace llvm;
+
+const char *ARMUtils::OpcodeName(unsigned Opcode) {
+  return ARMInsts[Opcode].Name;
+}
+
+// Return the register enum Based on RegClass and the raw register number.
+// FIXME: Auto-gened?
+static unsigned
+getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) {
+  // For this purpose, we can treat rGPR as if it were GPR.
+  if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID;
+
+  // See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm().
+  unsigned RegNum =
+    RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister;
+
+  switch (RegNum) {
+  default:
+    break;
+  case 0:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R0;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D0;
+    case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+    case ARM::QPR_VFP2RegClassID:
+      return ARM::Q0;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S0;
+    }
+    break;
+  case 1:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R1;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D1;
+    case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+    case ARM::QPR_VFP2RegClassID:
+      return ARM::Q1;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S1;
+    }
+    break;
+  case 2:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R2;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D2;
+    case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+    case ARM::QPR_VFP2RegClassID:
+      return ARM::Q2;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S2;
+    }
+    break;
+  case 3:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R3;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D3;
+    case ARM::QPRRegClassID: case ARM::QPR_8RegClassID:
+    case ARM::QPR_VFP2RegClassID:
+      return ARM::Q3;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S3;
+    }
+    break;
+  case 4:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R4;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D4;
+    case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q4;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S4;
+    }
+    break;
+  case 5:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R5;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D5;
+    case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q5;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S5;
+    }
+    break;
+  case 6:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R6;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D6;
+    case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q6;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S6;
+    }
+    break;
+  case 7:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: case ARM::tGPRRegClassID: return ARM::R7;
+    case ARM::DPRRegClassID: case ARM::DPR_8RegClassID:
+    case ARM::DPR_VFP2RegClassID:
+      return ARM::D7;
+    case ARM::QPRRegClassID: case ARM::QPR_VFP2RegClassID: return ARM::Q7;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S7;
+    }
+    break;
+  case 8:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::R8;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D8;
+    case ARM::QPRRegClassID: return ARM::Q8;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S8;
+    }
+    break;
+  case 9:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::R9;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D9;
+    case ARM::QPRRegClassID: return ARM::Q9;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S9;
+    }
+    break;
+  case 10:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::R10;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D10;
+    case ARM::QPRRegClassID: return ARM::Q10;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S10;
+    }
+    break;
+  case 11:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::R11;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D11;
+    case ARM::QPRRegClassID: return ARM::Q11;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S11;
+    }
+    break;
+  case 12:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::R12;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D12;
+    case ARM::QPRRegClassID: return ARM::Q12;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S12;
+    }
+    break;
+  case 13:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::SP;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D13;
+    case ARM::QPRRegClassID: return ARM::Q13;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S13;
+    }
+    break;
+  case 14:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::LR;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D14;
+    case ARM::QPRRegClassID: return ARM::Q14;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S14;
+    }
+    break;
+  case 15:
+    switch (RegClassID) {
+    case ARM::GPRRegClassID: return ARM::PC;
+    case ARM::DPRRegClassID: case ARM::DPR_VFP2RegClassID: return ARM::D15;
+    case ARM::QPRRegClassID: return ARM::Q15;
+    case ARM::SPRRegClassID: case ARM::SPR_8RegClassID: return ARM::S15;
+    }
+    break;
+  case 16:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D16;
+    case ARM::SPRRegClassID: return ARM::S16;
+    }
+    break;
+  case 17:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D17;
+    case ARM::SPRRegClassID: return ARM::S17;
+    }
+    break;
+  case 18:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D18;
+    case ARM::SPRRegClassID: return ARM::S18;
+    }
+    break;
+  case 19:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D19;
+    case ARM::SPRRegClassID: return ARM::S19;
+    }
+    break;
+  case 20:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D20;
+    case ARM::SPRRegClassID: return ARM::S20;
+    }
+    break;
+  case 21:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D21;
+    case ARM::SPRRegClassID: return ARM::S21;
+    }
+    break;
+  case 22:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D22;
+    case ARM::SPRRegClassID: return ARM::S22;
+    }
+    break;
+  case 23:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D23;
+    case ARM::SPRRegClassID: return ARM::S23;
+    }
+    break;
+  case 24:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D24;
+    case ARM::SPRRegClassID: return ARM::S24;
+    }
+    break;
+  case 25:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D25;
+    case ARM::SPRRegClassID: return ARM::S25;
+    }
+    break;
+  case 26:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D26;
+    case ARM::SPRRegClassID: return ARM::S26;
+    }
+    break;
+  case 27:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D27;
+    case ARM::SPRRegClassID: return ARM::S27;
+    }
+    break;
+  case 28:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D28;
+    case ARM::SPRRegClassID: return ARM::S28;
+    }
+    break;
+  case 29:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D29;
+    case ARM::SPRRegClassID: return ARM::S29;
+    }
+    break;
+  case 30:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D30;
+    case ARM::SPRRegClassID: return ARM::S30;
+    }
+    break;
+  case 31:
+    switch (RegClassID) {
+    case ARM::DPRRegClassID: return ARM::D31;
+    case ARM::SPRRegClassID: return ARM::S31;
+    }
+    break;
+  }
+  DEBUG(errs() << "Invalid (RegClassID, RawRegister) combination\n");
+  // Encoding error.  Mark the builder with error code != 0.
+  B->SetErr(-1);
+  return 0;
+}
+
+///////////////////////////////
+//                           //
+//     Utility Functions     //
+//                           //
+///////////////////////////////
+
+// Extract/Decode Rd: Inst{15-12}.
+static inline unsigned decodeRd(uint32_t insn) {
+  return (insn >> ARMII::RegRdShift) & ARMII::GPRRegMask;
+}
+
+// Extract/Decode Rn: Inst{19-16}.
+static inline unsigned decodeRn(uint32_t insn) {
+  return (insn >> ARMII::RegRnShift) & ARMII::GPRRegMask;
+}
+
+// Extract/Decode Rm: Inst{3-0}.
+static inline unsigned decodeRm(uint32_t insn) {
+  return (insn & ARMII::GPRRegMask);
+}
+
+// Extract/Decode Rs: Inst{11-8}.
+static inline unsigned decodeRs(uint32_t insn) {
+  return (insn >> ARMII::RegRsShift) & ARMII::GPRRegMask;
+}
+
+static inline unsigned getCondField(uint32_t insn) {
+  return (insn >> ARMII::CondShift);
+}
+
+static inline unsigned getIBit(uint32_t insn) {
+  return (insn >> ARMII::I_BitShift) & 1;
+}
+
+static inline unsigned getAM3IBit(uint32_t insn) {
+  return (insn >> ARMII::AM3_I_BitShift) & 1;
+}
+
+static inline unsigned getPBit(uint32_t insn) {
+  return (insn >> ARMII::P_BitShift) & 1;
+}
+
+static inline unsigned getUBit(uint32_t insn) {
+  return (insn >> ARMII::U_BitShift) & 1;
+}
+
+static inline unsigned getPUBits(uint32_t insn) {
+  return (insn >> ARMII::U_BitShift) & 3;
+}
+
+static inline unsigned getSBit(uint32_t insn) {
+  return (insn >> ARMII::S_BitShift) & 1;
+}
+
+static inline unsigned getWBit(uint32_t insn) {
+  return (insn >> ARMII::W_BitShift) & 1;
+}
+
+static inline unsigned getDBit(uint32_t insn) {
+  return (insn >> ARMII::D_BitShift) & 1;
+}
+
+static inline unsigned getNBit(uint32_t insn) {
+  return (insn >> ARMII::N_BitShift) & 1;
+}
+
+static inline unsigned getMBit(uint32_t insn) {
+  return (insn >> ARMII::M_BitShift) & 1;
+}
+
+// See A8.4 Shifts applied to a register.
+//     A8.4.2 Register controlled shifts.
+//
+// getShiftOpcForBits - getShiftOpcForBits translates from the ARM encoding bits
+// into llvm enums for shift opcode.  The API clients should pass in the value
+// encoded with two bits, so the assert stays to signal a wrong API usage.
+//
+// A8-12: DecodeRegShift()
+static inline ARM_AM::ShiftOpc getShiftOpcForBits(unsigned bits) {
+  switch (bits) {
+  default: assert(0 && "No such value"); return ARM_AM::no_shift;
+  case 0:  return ARM_AM::lsl;
+  case 1:  return ARM_AM::lsr;
+  case 2:  return ARM_AM::asr;
+  case 3:  return ARM_AM::ror;
+  }
+}
+
+// See A8.4 Shifts applied to a register.
+//     A8.4.1 Constant shifts.
+//
+// getImmShiftSE - getImmShiftSE translates from the raw ShiftOpc and raw Imm5
+// encodings into the intended ShiftOpc and shift amount.
+//
+// A8-11: DecodeImmShift()
+static inline void getImmShiftSE(ARM_AM::ShiftOpc &ShOp, unsigned &ShImm) {
+  if (ShImm != 0)
+    return;
+  switch (ShOp) {
+  case ARM_AM::no_shift:
+  case ARM_AM::rrx:
+    break;
+  case ARM_AM::lsl:
+    ShOp = ARM_AM::no_shift;
+    break;
+  case ARM_AM::lsr:
+  case ARM_AM::asr:
+    ShImm = 32;
+    break;
+  case ARM_AM::ror:
+    ShOp = ARM_AM::rrx;
+    break;
+  }
+}
+
+// getAMSubModeForBits - getAMSubModeForBits translates from the ARM encoding
+// bits Inst{24-23} (P(24) and U(23)) into llvm enums for AMSubMode.  The API
+// clients should pass in the value encoded with two bits, so the assert stays
+// to signal a wrong API usage.
+static inline ARM_AM::AMSubMode getAMSubModeForBits(unsigned bits) {
+  switch (bits) {
+  default: assert(0 && "No such value"); return ARM_AM::bad_am_submode;
+  case 1:  return ARM_AM::ia;   // P=0 U=1
+  case 3:  return ARM_AM::ib;   // P=1 U=1
+  case 0:  return ARM_AM::da;   // P=0 U=0
+  case 2:  return ARM_AM::db;   // P=1 U=0
+  }
+}
+
+////////////////////////////////////////////
+//                                        //
+//    Disassemble function definitions    //
+//                                        //
+////////////////////////////////////////////
+
+/// There is a separate Disassemble*Frm function entry for disassembly of an ARM
+/// instr into a list of MCOperands in the appropriate order, with possible dst,
+/// followed by possible src(s).
+///
+/// The processing of the predicate, and the 'S' modifier bit, if MI modifies
+/// the CPSR, is factored into ARMBasicMCBuilder's method named
+/// TryPredicateAndSBitModifier.
+
+static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  assert(0 && "Unexpected pseudo instruction!");
+  return false;
+}
+
+// Multiply Instructions.
+// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLS:
+//     Rd{19-16} Rn{3-0} Rm{11-8} Ra{15-12}
+//
+// MUL, SMMUL, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT:
+//     Rd{19-16} Rn{3-0} Rm{11-8}
+//
+// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT:
+//     RdLo{15-12} RdHi{19-16} Rn{3-0} Rm{11-8}
+//
+// The mapping of the multiply registers to the "regular" ARM registers, where
+// there are convenience decoder functions, is:
+//
+// Inst{15-12} => Rd
+// Inst{19-16} => Rn
+// Inst{3-0} => Rm
+// Inst{11-8} => Rs
+static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  unsigned short NumDefs = TID.getNumDefs();
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumDefs > 0 && "NumDefs should be greater than 0 for MulFrm");
+  assert(NumOps >= 3
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && OpInfo[2].RegClass == ARM::GPRRegClassID
+         && "Expect three register operands");
+
+  // Instructions with two destination registers have RdLo{15-12} first.
+  if (NumDefs == 2) {
+    assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID &&
+           "Expect 4th register operand");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    ++OpIdx;
+  }
+
+  // The destination register: RdHi{19-16} or Rd{19-16}.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+
+  // The two src regsiters: Rn{3-0}, then Rm{11-8}.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRs(insn))));
+  OpIdx += 3;
+
+  // Many multiply instructions (e.g., MLA) have three src registers.
+  // The third register operand is Ra{15-12}.
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// Helper routines for disassembly of coprocessor instructions.
+
+static bool LdStCopOpcode(unsigned Opcode) {
+  if ((Opcode >= ARM::LDC2L_OFFSET && Opcode <= ARM::LDC_PRE) ||
+      (Opcode >= ARM::STC2L_OFFSET && Opcode <= ARM::STC_PRE))
+    return true;
+  return false;
+}
+static bool CoprocessorOpcode(unsigned Opcode) {
+  if (LdStCopOpcode(Opcode))
+    return true;
+
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::CDP:  case ARM::CDP2:
+  case ARM::MCR:  case ARM::MCR2:  case ARM::MRC:  case ARM::MRC2:
+  case ARM::MCRR: case ARM::MCRR2: case ARM::MRRC: case ARM::MRRC2:
+    return true;
+  }
+}
+static inline unsigned GetCoprocessor(uint32_t insn) {
+  return slice(insn, 11, 8);
+}
+static inline unsigned GetCopOpc1(uint32_t insn, bool CDP) {
+  return CDP ? slice(insn, 23, 20) : slice(insn, 23, 21);
+}
+static inline unsigned GetCopOpc2(uint32_t insn) {
+  return slice(insn, 7, 5);
+}
+static inline unsigned GetCopOpc(uint32_t insn) {
+  return slice(insn, 7, 4);
+}
+// Most of the operands are in immediate forms, except Rd and Rn, which are ARM
+// core registers.
+//
+// CDP, CDP2:                cop opc1 CRd CRn CRm opc2
+//
+// MCR, MCR2, MRC, MRC2:     cop opc1 Rd CRn CRm opc2
+//
+// MCRR, MCRR2, MRRC, MRRc2: cop opc Rd Rn CRm
+//
+// LDC_OFFSET, LDC_PRE, LDC_POST: cop CRd Rn R0 [+/-]imm8:00
+// and friends
+// STC_OFFSET, STC_PRE, STC_POST: cop CRd Rn R0 [+/-]imm8:00
+// and friends
+//                                        <-- addrmode2 -->
+//
+// LDC_OPTION:                    cop CRd Rn imm8
+// and friends
+// STC_OPTION:                    cop CRd Rn imm8
+// and friends
+//
+static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 5 && "Num of operands >= 5 for coprocessor instr");
+
+  unsigned &OpIdx = NumOpsAdded;
+  bool OneCopOpc = (Opcode == ARM::MCRR || Opcode == ARM::MCRR2 ||
+                    Opcode == ARM::MRRC || Opcode == ARM::MRRC2);
+  // CDP/CDP2 has no GPR operand; the opc1 operand is also wider (Inst{23-20}).
+  bool NoGPR = (Opcode == ARM::CDP || Opcode == ARM::CDP2);
+  bool LdStCop = LdStCopOpcode(Opcode);
+
+  OpIdx = 0;
+
+  MI.addOperand(MCOperand::CreateImm(GetCoprocessor(insn)));
+
+  if (LdStCop) {
+    // Unindex if P:W = 0b00 --> _OPTION variant
+    unsigned PW = getPBit(insn) << 1 | getWBit(insn);
+
+    MI.addOperand(MCOperand::CreateImm(decodeRd(insn)));
+
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+
+    if (PW) {
+      MI.addOperand(MCOperand::CreateReg(0));
+      ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+      unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2,
+                                          ARM_AM::no_shift);
+      MI.addOperand(MCOperand::CreateImm(Offset));
+      OpIdx = 5;
+    } else {
+      MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 0)));
+      OpIdx = 4;
+    }
+  } else {
+    MI.addOperand(MCOperand::CreateImm(OneCopOpc ? GetCopOpc(insn)
+                                                 : GetCopOpc1(insn, NoGPR)));
+
+    MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn))
+                        : MCOperand::CreateReg(
+                            getRegisterEnum(B, ARM::GPRRegClassID,
+                                            decodeRd(insn))));
+
+    MI.addOperand(OneCopOpc ? MCOperand::CreateReg(
+                                getRegisterEnum(B, ARM::GPRRegClassID,
+                                                decodeRn(insn)))
+                            : MCOperand::CreateImm(decodeRn(insn)));
+
+    MI.addOperand(MCOperand::CreateImm(decodeRm(insn)));
+
+    OpIdx = 5;
+
+    if (!OneCopOpc) {
+      MI.addOperand(MCOperand::CreateImm(GetCopOpc2(insn)));
+      ++OpIdx;
+    }
+  }
+
+  return true;
+}
+
+// Branch Instructions.
+// BLr9: SignExtend(Imm24:'00', 32)
+// Bcc, BLr9_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1
+// SMC: ZeroExtend(imm4, 32)
+// SVC: ZeroExtend(Imm24, 32)
+//
+// Various coprocessor instructions are assigned BrFrm arbitrarily.
+// Delegates to DisassembleCoprocessor() helper function.
+//
+// MRS/MRSsys: Rd
+// MSR/MSRsys: Rm mask=Inst{19-16}
+// BXJ:        Rm
+// MSRi/MSRsysi: so_imm
+// SRSW/SRS: ldstm_mode:$amode mode_imm
+// RFEW/RFE: ldstm_mode:$amode Rn
+static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  if (CoprocessorOpcode(Opcode))
+    return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  // MRS and MRSsys take one GPR reg Rd.
+  if (Opcode == ARM::MRS || Opcode == ARM::MRSsys) {
+    assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    NumOpsAdded = 1;
+    return true;
+  }
+  // BXJ takes one GPR reg Rm.
+  if (Opcode == ARM::BXJ) {
+    assert(NumOps >= 1 && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    NumOpsAdded = 1;
+    return true;
+  }
+  // MSR take a mask, followed by one GPR reg Rm. The mask contains the R Bit in
+  // bit 4, and the special register fields in bits 3-0.
+  if (Opcode == ARM::MSR) {
+    assert(NumOps >= 1 && OpInfo[1].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
+                                       slice(insn, 19, 16) /* Special Reg */ ));
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    NumOpsAdded = 2;
+    return true;
+  }
+  // MSRi take a mask, followed by one so_imm operand. The mask contains the
+  // R Bit in bit 4, and the special register fields in bits 3-0.
+  if (Opcode == ARM::MSRi) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ |
+                                       slice(insn, 19, 16) /* Special Reg */ ));
+    // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
+    // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}.
+    // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot().
+    unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF;
+    unsigned Imm = insn & 0xFF;
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot)));
+    NumOpsAdded = 2;
+    return true;
+  }
+  if (Opcode == ARM::SRSW || Opcode == ARM::SRS ||
+      Opcode == ARM::RFEW || Opcode == ARM::RFE) {
+    ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
+
+    if (Opcode == ARM::SRSW || Opcode == ARM::SRS)
+      MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
+    else
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                         decodeRn(insn))));
+    NumOpsAdded = 3;
+    return true;
+  }
+
+  assert((Opcode == ARM::Bcc || Opcode == ARM::BLr9 || Opcode == ARM::BLr9_pred
+          || Opcode == ARM::SMC || Opcode == ARM::SVC) &&
+         "Unexpected Opcode");
+
+  assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Reg operand expected");
+
+  int Imm32 = 0;
+  if (Opcode == ARM::SMC) {
+    // ZeroExtend(imm4, 32) where imm24 = Inst{3-0}.
+    Imm32 = slice(insn, 3, 0);
+  } else if (Opcode == ARM::SVC) {
+    // ZeroExtend(imm24, 32) where imm24 = Inst{23-0}.
+    Imm32 = slice(insn, 23, 0);
+  } else {
+    // SignExtend(imm24:'00', 32) where imm24 = Inst{23-0}.
+    unsigned Imm26 = slice(insn, 23, 0) << 2;
+    //Imm32 = signextend<signed int, 26>(Imm26);
+    Imm32 = SignExtend32<26>(Imm26);
+
+    // When executing an ARM instruction, PC reads as the address of the current
+    // instruction plus 8.  The assembler subtracts 8 from the difference
+    // between the branch instruction and the target address, disassembler has
+    // to add 8 to compensate.
+    Imm32 += 8;
+  }
+
+  MI.addOperand(MCOperand::CreateImm(Imm32));
+  NumOpsAdded = 1;
+
+  return true;
+}
+
+// Misc. Branch Instructions.
+// BLXr9, BXr9
+// BX, BX_RET
+static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // BX_RET and MOVPCLR have only two predicate operands; do an early return.
+  if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR)
+    return true;
+
+  // BLX and BX take one GPR reg.
+  if (Opcode == ARM::BLXr9 || Opcode == ARM::BLXr9_pred ||
+      Opcode == ARM::BLX || Opcode == ARM::BLX_pred ||
+      Opcode == ARM::BX) {
+    assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    OpIdx = 1;
+    return true;
+  }
+
+  return false;
+}
+
+static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) {
+  uint32_t lsb = slice(insn, 11, 7);
+  uint32_t msb = slice(insn, 20, 16);
+  uint32_t Val = 0;
+  if (msb < lsb) {
+    DEBUG(errs() << "Encoding error: msb < lsb\n");
+    return false;
+  }
+
+  for (uint32_t i = lsb; i <= msb; ++i)
+    Val |= (1 << i);
+  mask = ~Val;
+  return true;
+}
+
+// A major complication is the fact that some of the saturating add/subtract
+// operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm.
+// They are QADD, QDADD, QDSUB, and QSUB.
+static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  unsigned short NumDefs = TID.getNumDefs();
+  bool isUnary = isUnaryDP(TID.TSFlags);
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // Disassemble register def if there is one.
+  if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    ++OpIdx;
+  }
+
+  // Now disassemble the src operands.
+  if (OpIdx >= NumOps)
+    return false;
+
+  // Special-case handling of BFC/BFI/SBFX/UBFX.
+  if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
+    MI.addOperand(MCOperand::CreateReg(0));
+    if (Opcode == ARM::BFI) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                         decodeRm(insn))));
+      ++OpIdx;
+    }
+    uint32_t mask = 0;
+    if (!getBFCInvMask(insn, mask))
+      return false;
+
+    MI.addOperand(MCOperand::CreateImm(mask));
+    OpIdx += 2;
+    return true;
+  }
+  if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7)));
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 16) + 1));
+    OpIdx += 3;
+    return true;
+  }
+
+  bool RmRn = (Opcode == ARM::QADD || Opcode == ARM::QDADD ||
+               Opcode == ARM::QDSUB || Opcode == ARM::QSUB);
+
+  // BinaryDP has an Rn operand.
+  if (!isUnary) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(B, ARM::GPRRegClassID,
+                                    RmRn ? decodeRm(insn) : decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // If this is a two-address operand, skip it, e.g., MOVCCr operand 1.
+  if (isUnary && (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) {
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  // Now disassemble operand 2.
+  if (OpIdx >= NumOps)
+    return false;
+
+  if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
+    // We have a reg/reg form.
+    // Assert disabled because saturating operations, e.g., A8.6.127 QASX, are
+    // routed here as well.
+    // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form");
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(B, ARM::GPRRegClassID,
+                                    RmRn? decodeRn(insn) : decodeRm(insn))));
+    ++OpIdx;
+  } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) {
+    // We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}).
+    assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
+    unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0);
+    MI.addOperand(MCOperand::CreateImm(Imm16));
+    ++OpIdx;
+  } else {
+    // We have a reg/imm form.
+    // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0.
+    // A5.2.4 Rotate amount is twice the numeric value of Inst{11-8}.
+    // See also ARMAddressingModes.h: getSOImmValImm() and getSOImmValRot().
+    assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form");
+    unsigned Rot = (insn >> ARMII::SoRotImmShift) & 0xF;
+    unsigned Imm = insn & 0xFF;
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::rotr32(Imm, 2*Rot)));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  unsigned short NumDefs = TID.getNumDefs();
+  bool isUnary = isUnaryDP(TID.TSFlags);
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // Disassemble register def if there is one.
+  if (NumDefs && (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID)) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRd(insn))));
+    ++OpIdx;
+  }
+
+  // Disassemble the src operands.
+  if (OpIdx >= NumOps)
+    return false;
+
+  // BinaryDP has an Rn operand.
+  if (!isUnary) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // If this is a two-address operand, skip it, e.g., MOVCCs operand 1.
+  if (isUnary && (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) {
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  // Disassemble operand 2, which consists of three components.
+  if (OpIdx + 2 >= NumOps)
+    return false;
+
+  assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
+         (OpInfo[OpIdx+1].RegClass == ARM::GPRRegClassID) &&
+         (OpInfo[OpIdx+2].RegClass < 0) &&
+         "Expect 3 reg operands");
+
+  // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1.
+  unsigned Rs = slice(insn, 4, 4);
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  if (Rs) {
+    // Register-controlled shifts: [Rm, Rs, shift].
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRs(insn))));
+    // Inst{6-5} encodes the shift opcode.
+    ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, 0)));
+  } else {
+    // Constant shifts: [Rm, reg0, shift_imm].
+    MI.addOperand(MCOperand::CreateReg(0)); // NoRegister
+    // Inst{6-5} encodes the shift opcode.
+    ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+    // Inst{11-7} encodes the imm5 shift amount.
+    unsigned ShImm = slice(insn, 11, 7);
+
+    // A8.4.1.  Possible rrx or shift amount of 32...
+    getImmShiftSE(ShOp, ShImm);
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShImm)));
+  }
+  OpIdx += 3;
+
+  return true;
+}
+
+static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  bool isPrePost = isPrePostLdSt(TID.TSFlags);
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(((!isStore && TID.getNumDefs() > 0) ||
+          (isStore && (TID.getNumDefs() == 0 || isPrePost)))
+         && "Invalid arguments");
+
+  // Operand 0 of a pre- and post-indexed store is the address base writeback.
+  if (isPrePost && isStore) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // Disassemble the dst/src operand.
+  if (OpIdx >= NumOps)
+    return false;
+
+  assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+         "Reg operand expected");
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  // After dst of a pre- and post-indexed load is the address base writeback.
+  if (isPrePost && !isStore) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // Disassemble the base operand.
+  if (OpIdx >= NumOps)
+    return false;
+
+  assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+         "Reg operand expected");
+  assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1))
+         && "Index mode or tied_to operand expected");
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  ++OpIdx;
+
+  // For reg/reg form, base reg is followed by +/- reg shop imm.
+  // For immediate form, it is followed by +/- imm12.
+  // See also ARMAddressingModes.h (Addressing Mode #2).
+  if (OpIdx + 1 >= NumOps)
+    return false;
+
+  assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
+         (OpInfo[OpIdx+1].RegClass < 0) &&
+         "Expect 1 reg operand followed by 1 imm operand");
+
+  ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+  if (getIBit(insn) == 0) {
+    MI.addOperand(MCOperand::CreateReg(0));
+
+    // Disassemble the 12-bit immediate offset.
+    unsigned Imm12 = slice(insn, 11, 0);
+    unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift);
+    MI.addOperand(MCOperand::CreateImm(Offset));
+  } else {
+    // Disassemble the offset reg (Rm), shift type, and immediate shift length.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    // Inst{6-5} encodes the shift opcode.
+    ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+    // Inst{11-7} encodes the imm5 shift amount.
+    unsigned ShImm = slice(insn, 11, 7);
+
+    // A8.4.1.  Possible rrx or shift amount of 32...
+    getImmShiftSE(ShOp, ShImm);
+    MI.addOperand(MCOperand::CreateImm(
+                    ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp)));
+  }
+  OpIdx += 2;
+
+  return true;
+}
+
+static bool DisassembleLdFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false, B);
+}
+
+static bool DisassembleStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleLdStFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B);
+}
+
+static bool HasDualReg(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST:
+  case ARM::STRD: case ARM::STRD_PRE: case ARM::STRD_POST:
+    return true;
+  }  
+}
+
+static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  bool isPrePost = isPrePostLdSt(TID.TSFlags);
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(((!isStore && TID.getNumDefs() > 0) ||
+          (isStore && (TID.getNumDefs() == 0 || isPrePost)))
+         && "Invalid arguments");
+
+  // Operand 0 of a pre- and post-indexed store is the address base writeback.
+  if (isPrePost && isStore) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  bool DualReg = HasDualReg(Opcode);
+
+  // Disassemble the dst/src operand.
+  if (OpIdx >= NumOps)
+    return false;
+
+  assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+         "Reg operand expected");
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  // Fill in LDRD and STRD's second operand.
+  if (DualReg) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRd(insn) + 1)));
+    ++OpIdx;
+  }
+
+  // After dst of a pre- and post-indexed load is the address base writeback.
+  if (isPrePost && !isStore) {
+    assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // Disassemble the base operand.
+  if (OpIdx >= NumOps)
+    return false;
+
+  assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+         "Reg operand expected");
+  assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1))
+         && "Index mode or tied_to operand expected");
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  ++OpIdx;
+
+  // For reg/reg form, base reg is followed by +/- reg.
+  // For immediate form, it is followed by +/- imm8.
+  // See also ARMAddressingModes.h (Addressing Mode #3).
+  if (OpIdx + 1 >= NumOps)
+    return false;
+
+  assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
+         (OpInfo[OpIdx+1].RegClass < 0) &&
+         "Expect 1 reg operand followed by 1 imm operand");
+
+  ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+  if (getAM3IBit(insn) == 1) {
+    MI.addOperand(MCOperand::CreateReg(0));
+
+    // Disassemble the 8-bit immediate offset.
+    unsigned Imm4H = (insn >> ARMII::ImmHiShift) & 0xF;
+    unsigned Imm4L = insn & 0xF;
+    unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L);
+    MI.addOperand(MCOperand::CreateImm(Offset));
+  } else {
+    // Disassemble the offset reg (Rm).
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0);
+    MI.addOperand(MCOperand::CreateImm(Offset));
+  }
+  OpIdx += 2;
+
+  return true;
+}
+
+static bool DisassembleLdMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, false,
+                                B);
+}
+
+static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleLdStMiscFrm(MI, Opcode, insn, NumOps, NumOpsAdded, true, B);
+}
+
+// The algorithm for disassembly of LdStMulFrm is different from others because
+// it explicitly populates the two predicate operands after operand 0 (the base)
+// and operand 1 (the AM4 mode imm).  After operand 3, we need to populate the
+// reglist with each affected register encoded as an MCOperand.
+static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 5 && "LdStMulFrm expects NumOps >= 5");
+  NumOpsAdded = 0;
+
+  unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
+
+  // Writeback to base, if necessary.
+  if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::STMIA_UPD ||
+      Opcode == ARM::LDMDA_UPD || Opcode == ARM::STMDA_UPD ||
+      Opcode == ARM::LDMDB_UPD || Opcode == ARM::STMDB_UPD ||
+      Opcode == ARM::LDMIB_UPD || Opcode == ARM::STMIB_UPD) {
+    MI.addOperand(MCOperand::CreateReg(Base));
+    ++NumOpsAdded;
+  }
+
+  // Add the base register operand.
+  MI.addOperand(MCOperand::CreateReg(Base));
+
+  // Handling the two predicate operands before the reglist.
+  int64_t CondVal = insn >> ARMII::CondShift;
+  MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal));
+  MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+
+  NumOpsAdded += 3;
+
+  // Fill the variadic part of reglist.
+  unsigned RegListBits = insn & ((1 << 16) - 1);
+  for (unsigned i = 0; i < 16; ++i) {
+    if ((RegListBits >> i) & 1) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                         i)));
+      ++NumOpsAdded;
+    }
+  }
+
+  return true;
+}
+
+// LDREX, LDREXB, LDREXH: Rd Rn
+// LDREXD:                Rd Rd+1 Rn
+// STREX, STREXB, STREXH: Rd Rm Rn
+// STREXD:                Rd Rm Rm+1 Rn
+//
+// SWP, SWPB:             Rd Rm Rn
+static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && "Expect 2 reg operands");
+
+  bool isStore = slice(insn, 20, 20) == 0;
+  bool isDW = (Opcode == ARM::LDREXD || Opcode == ARM::STREXD);
+
+  // Add the destination operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  // Store register Exclusive needs a source operand.
+  if (isStore) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    ++OpIdx;
+
+    if (isDW) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                         decodeRm(insn)+1)));
+      ++OpIdx;
+    }
+  } else if (isDW) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRd(insn)+1)));
+    ++OpIdx;
+  }
+
+  // Finally add the pointer operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  ++OpIdx;
+
+  return true;
+}
+
+// Misc. Arithmetic Instructions.
+// CLZ: Rd Rm
+// PKHBT, PKHTB: Rd Rn Rm , LSL/ASR #imm5
+// RBIT, REV, REV16, REVSH: Rd Rm
+static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && "Expect 2 reg operands");
+
+  bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  if (ThreeReg) {
+    assert(NumOps >= 4 && "Expect >= 4 operands");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  ++OpIdx;
+
+  // If there is still an operand info left which is an immediate operand, add
+  // an additional imm5 LSL/ASR operand.
+  if (ThreeReg && OpInfo[OpIdx].RegClass < 0
+      && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // Extract the 5-bit immediate field Inst{11-7}.
+    unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F;
+    ARM_AM::ShiftOpc Opc = ARM_AM::no_shift;
+    if (Opcode == ARM::PKHBT)
+      Opc = ARM_AM::lsl;
+    else if (Opcode == ARM::PKHBT)
+      Opc = ARM_AM::asr;
+    getImmShiftSE(Opc, ShiftAmt);
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt)));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+/// DisassembleSatFrm - Disassemble saturate instructions:
+/// SSAT, SSAT16, USAT, and USAT16.
+static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+
+  // Disassemble register def.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+
+  unsigned Pos = slice(insn, 20, 16);
+  if (Opcode == ARM::SSAT || Opcode == ARM::SSAT16)
+    Pos += 1;
+  MI.addOperand(MCOperand::CreateImm(Pos));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+
+  if (NumOpsAdded == 4) {
+    ARM_AM::ShiftOpc Opc = (slice(insn, 6, 6) != 0 ? ARM_AM::asr : ARM_AM::lsl);
+    // Inst{11-7} encodes the imm5 shift amount.
+    unsigned ShAmt = slice(insn, 11, 7);
+    if (ShAmt == 0) {
+      // A8.6.183.  Possible ASR shift amount of 32...
+      if (Opc == ARM_AM::asr)
+        ShAmt = 32;
+      else
+        Opc = ARM_AM::no_shift;
+    }
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
+  }
+  return true;
+}
+
+// Extend instructions.
+// SXT* and UXT*: Rd [Rn] Rm [rot_imm].
+// The 2nd operand register is Rn and the 3rd operand regsiter is Rm for the
+// three register operand form.  Otherwise, Rn=0b1111 and only Rm is used.
+static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && "Expect 2 reg operands");
+
+  bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID;
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  if (ThreeReg) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  ++OpIdx;
+
+  // If there is still an operand info left which is an immediate operand, add
+  // an additional rotate immediate operand.
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+      && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // Extract the 2-bit rotate field Inst{11-10}.
+    unsigned rot = (insn >> ARMII::ExtRotImmShift) & 3;
+    // Rotation by 8, 16, or 24 bits.
+    MI.addOperand(MCOperand::CreateImm(rot << 3));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+/////////////////////////////////////
+//                                 //
+//    Utility Functions For VFP    //
+//                                 //
+/////////////////////////////////////
+
+// Extract/Decode Dd/Sd:
+//
+// SP => d = UInt(Vd:D)
+// DP => d = UInt(D:Vd)
+static unsigned decodeVFPRd(uint32_t insn, bool isSPVFP) {
+  return isSPVFP ? (decodeRd(insn) << 1 | getDBit(insn))
+                 : (decodeRd(insn) | getDBit(insn) << 4);
+}
+
+// Extract/Decode Dn/Sn:
+//
+// SP => n = UInt(Vn:N)
+// DP => n = UInt(N:Vn)
+static unsigned decodeVFPRn(uint32_t insn, bool isSPVFP) {
+  return isSPVFP ? (decodeRn(insn) << 1 | getNBit(insn))
+                 : (decodeRn(insn) | getNBit(insn) << 4);
+}
+
+// Extract/Decode Dm/Sm:
+//
+// SP => m = UInt(Vm:M)
+// DP => m = UInt(M:Vm)
+static unsigned decodeVFPRm(uint32_t insn, bool isSPVFP) {
+  return isSPVFP ? (decodeRm(insn) << 1 | getMBit(insn))
+                 : (decodeRm(insn) | getMBit(insn) << 4);
+}
+
+// A7.5.1
+static APInt VFPExpandImm(unsigned char byte, unsigned N) {
+  assert(N == 32 || N == 64);
+
+  uint64_t Result;
+  unsigned bit6 = slice(byte, 6, 6);
+  if (N == 32) {
+    Result = slice(byte, 7, 7) << 31 | slice(byte, 5, 0) << 19;
+    if (bit6)
+      Result |= 0x1f << 25;
+    else
+      Result |= 0x1 << 30;
+  } else {
+    Result = (uint64_t)slice(byte, 7, 7) << 63 |
+             (uint64_t)slice(byte, 5, 0) << 48;
+    if (bit6)
+      Result |= 0xffULL << 54;
+    else
+      Result |= 0x1ULL << 62;
+  }
+  return APInt(N, Result);
+}
+
+// VFP Unary Format Instructions:
+//
+// VCMP[E]ZD, VCMP[E]ZS: compares one floating-point register with zero
+// VCVTDS, VCVTSD: converts between double-precision and single-precision
+// The rest of the instructions have homogeneous [VFP]Rd and [VFP]Rm registers.
+static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned RegClass = OpInfo[OpIdx].RegClass;
+  assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
+         "Reg operand expected");
+  bool isSP = (RegClass == ARM::SPRRegClassID);
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP))));
+  ++OpIdx;
+
+  // Early return for compare with zero instructions.
+  if (Opcode == ARM::VCMPEZD || Opcode == ARM::VCMPEZS
+      || Opcode == ARM::VCMPZD || Opcode == ARM::VCMPZS)
+    return true;
+
+  RegClass = OpInfo[OpIdx].RegClass;
+  assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
+         "Reg operand expected");
+  isSP = (RegClass == ARM::SPRRegClassID);
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP))));
+  ++OpIdx;
+
+  return true;
+}
+
+// All the instructions have homogeneous [VFP]Rd, [VFP]Rn, and [VFP]Rm regs.
+// Some of them have operand constraints which tie the first operand in the
+// InOperandList to that of the dst.  As far as asm printing is concerned, this
+// tied_to operand is simply skipped.
+static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3");
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned RegClass = OpInfo[OpIdx].RegClass;
+  assert((RegClass == ARM::SPRRegClassID || RegClass == ARM::DPRRegClassID) &&
+         "Reg operand expected");
+  bool isSP = (RegClass == ARM::SPRRegClassID);
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, RegClass, decodeVFPRd(insn, isSP))));
+  ++OpIdx;
+
+  // Skip tied_to operand constraint.
+  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+    assert(NumOps >= 4 && "Expect >=4 operands");
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, RegClass, decodeVFPRn(insn, isSP))));
+  ++OpIdx;
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, RegClass, decodeVFPRm(insn, isSP))));
+  ++OpIdx;
+
+  return true;
+}
+
+// A8.6.295 vcvt (floating-point <-> integer)
+// Int to FP: VSITOD, VSITOS, VUITOD, VUITOS
+// FP to Int: VTOSI[Z|R]D, VTOSI[Z|R]S, VTOUI[Z|R]D, VTOUI[Z|R]S
+// 
+// A8.6.297 vcvt (floating-point and fixed-point)
+// Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i))
+static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2");
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
+
+  bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297
+  bool fixed_point = slice(insn, 17, 17) == 1; // A8.6.297
+  unsigned RegClassID = SP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
+
+  if (fixed_point) {
+    // A8.6.297
+    assert(NumOps >= 3 && "Expect >= 3 operands");
+    int size = slice(insn, 7, 7) == 0 ? 16 : 32;
+    int fbits = size - (slice(insn,3,0) << 1 | slice(insn,5,5));
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(B, RegClassID,
+                                    decodeVFPRd(insn, SP))));
+
+    assert(TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
+           "Tied to operand expected");
+    MI.addOperand(MI.getOperand(0));
+
+    assert(OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() &&
+           !OpInfo[2].isOptionalDef() && "Imm operand expected");
+    MI.addOperand(MCOperand::CreateImm(fbits));
+
+    NumOpsAdded = 3;
+  } else {
+    // A8.6.295
+    // The Rd (destination) and Rm (source) bits have different interpretations
+    // depending on their single-precisonness.
+    unsigned d, m;
+    if (slice(insn, 18, 18) == 1) { // to_integer operation
+      d = decodeVFPRd(insn, true /* Is Single Precision */);
+      MI.addOperand(MCOperand::CreateReg(
+                      getRegisterEnum(B, ARM::SPRRegClassID, d)));
+      m = decodeVFPRm(insn, SP);
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, m)));
+    } else {
+      d = decodeVFPRd(insn, SP);
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, d)));
+      m = decodeVFPRm(insn, true /* Is Single Precision */);
+      MI.addOperand(MCOperand::CreateReg(
+                      getRegisterEnum(B, ARM::SPRRegClassID, m)));
+    }
+    NumOpsAdded = 2;
+  }
+
+  return true;
+}
+
+// VMOVRS - A8.6.330
+// Rt => Rd; Sn => UInt(Vn:N)
+static bool DisassembleVFPConv2Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 2 && "VFPConv2Frm expects NumOps >= 2");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+                                                     decodeVFPRn(insn, true))));
+  NumOpsAdded = 2;
+  return true;
+}
+
+// VMOVRRD - A8.6.332
+// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm)
+//
+// VMOVRRS - A8.6.331
+// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
+static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  OpIdx = 2;
+
+  if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
+    unsigned Sm = decodeVFPRm(insn, true);
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+                                                       Sm)));
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+                                                       Sm+1)));
+    OpIdx += 2;
+  } else {
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(B, ARM::DPRRegClassID,
+                                    decodeVFPRm(insn, false))));
+    ++OpIdx;
+  }
+  return true;
+}
+
+// VMOVSR - A8.6.330
+// Rt => Rd; Sn => UInt(Vn:N)
+static bool DisassembleVFPConv4Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 2 && "VFPConv4Frm expects NumOps >= 2");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+                                                     decodeVFPRn(insn, true))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  NumOpsAdded = 2;
+  return true;
+}
+
+// VMOVDRR - A8.6.332
+// Rt => Rd; Rt2 => Rn; Dm => UInt(M:Vm)
+//
+// VMOVRRS - A8.6.331
+// Rt => Rd; Rt2 => Rn; Sm => UInt(Vm:M); Sm1 = Sm+1
+static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  if (OpInfo[OpIdx].RegClass == ARM::SPRRegClassID) {
+    unsigned Sm = decodeVFPRm(insn, true);
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+                                                       Sm)));
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::SPRRegClassID,
+                                                       Sm+1)));
+    OpIdx += 2;
+  } else {
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(B, ARM::DPRRegClassID,
+                                    decodeVFPRm(insn, false))));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  OpIdx += 2;
+  return true;
+}
+
+// VFP Load/Store Instructions.
+// VLDRD, VLDRS, VSTRD, VSTRS
+static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 3 && "VFPLdStFrm expects NumOps >= 3");
+
+  bool isSPVFP = (Opcode == ARM::VLDRS || Opcode == ARM::VSTRS);
+  unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
+
+  // Extract Dd/Sd for operand 0.
+  unsigned RegD = decodeVFPRd(insn, isSPVFP);
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD)));
+
+  unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
+  MI.addOperand(MCOperand::CreateReg(Base));
+
+  // Next comes the AM5 Opcode.
+  ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+  unsigned char Imm8 = insn & 0xFF;
+  MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM5Opc(AddrOpcode, Imm8)));
+
+  NumOpsAdded = 3;
+
+  return true;
+}
+
+// VFP Load/Store Multiple Instructions.
+// This is similar to the algorithm for LDM/STM in that operand 0 (the base) and
+// operand 1 (the AM4 mode imm) is followed by two predicate operands.  It is
+// followed by a reglist of either DPR(s) or SPR(s).
+//
+// VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD]
+static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 5 && "VFPLdStMulFrm expects NumOps >= 5");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
+
+  // Writeback to base, if necessary.
+  if (Opcode == ARM::VLDMDIA_UPD || Opcode == ARM::VLDMSIA_UPD ||
+      Opcode == ARM::VLDMDDB_UPD || Opcode == ARM::VLDMSDB_UPD ||
+      Opcode == ARM::VSTMDIA_UPD || Opcode == ARM::VSTMSIA_UPD ||
+      Opcode == ARM::VSTMDDB_UPD || Opcode == ARM::VSTMSDB_UPD) {
+    MI.addOperand(MCOperand::CreateReg(Base));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(Base));
+
+  // Next comes the AM4 Opcode.
+  ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn));
+  // Must be either "ia" or "db" submode.
+  if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) {
+    DEBUG(errs() << "Illegal addressing mode 4 sub-mode!\n");
+    return false;
+  }
+  MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode)));
+
+  // Handling the two predicate operands before the reglist.
+  int64_t CondVal = insn >> ARMII::CondShift;
+  MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal));
+  MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+
+  OpIdx += 4;
+
+  bool isSPVFP = (Opcode == ARM::VLDMSIA     || Opcode == ARM::VLDMSDB     ||
+                  Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMSDB_UPD ||
+                  Opcode == ARM::VSTMSIA     || Opcode == ARM::VSTMSDB     ||
+                  Opcode == ARM::VSTMSIA_UPD || Opcode == ARM::VSTMSDB_UPD);
+  unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID;
+
+  // Extract Dd/Sd.
+  unsigned RegD = decodeVFPRd(insn, isSPVFP);
+
+  // Fill the variadic part of reglist.
+  unsigned char Imm8 = insn & 0xFF;
+  unsigned Regs = isSPVFP ? Imm8 : Imm8/2;
+  for (unsigned i = 0; i < Regs; ++i) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID,
+                                                       RegD + i)));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// Misc. VFP Instructions.
+// FMSTAT (vmrs with Rt=0b1111, i.e., to apsr_nzcv and no register operand)
+// FCONSTD (DPR and a VFPf64Imm operand)
+// FCONSTS (SPR and a VFPf32Imm operand)
+// VMRS/VMSR (GPR operand)
+static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  if (Opcode == ARM::FMSTAT)
+    return true;
+
+  assert(NumOps >= 2 && "VFPMiscFrm expects >=2 operands");
+
+  unsigned RegEnum = 0;
+  switch (OpInfo[0].RegClass) {
+  case ARM::DPRRegClassID:
+    RegEnum = getRegisterEnum(B, ARM::DPRRegClassID, decodeVFPRd(insn, false));
+    break;
+  case ARM::SPRRegClassID:
+    RegEnum = getRegisterEnum(B, ARM::SPRRegClassID, decodeVFPRd(insn, true));
+    break;
+  case ARM::GPRRegClassID:
+    RegEnum = getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn));
+    break;
+  default:
+    assert(0 && "Invalid reg class id");
+    return false;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(RegEnum));
+  ++OpIdx;
+
+  // Extract/decode the f64/f32 immediate.
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+        && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // The asm syntax specifies the floating point value, not the 8-bit literal.
+    APInt immRaw = VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0),
+                             Opcode == ARM::FCONSTD ? 64 : 32);
+    APFloat immFP = APFloat(immRaw, true);
+    double imm = Opcode == ARM::FCONSTD ? immFP.convertToDouble() :
+      immFP.convertToFloat();
+    MI.addOperand(MCOperand::CreateFPImm(imm));
+
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// DisassembleThumbFrm() is defined in ThumbDisassemblerCore.h file.
+#include "ThumbDisassemblerCore.h"
+
+/////////////////////////////////////////////////////
+//                                                 //
+//     Utility Functions For ARM Advanced SIMD     //
+//                                                 //
+/////////////////////////////////////////////////////
+
+// The following NEON namings are based on A8.6.266 VABA, VABAL.  Notice that
+// A8.6.303 VDUP (ARM core register)'s D/Vd pair is the N/Vn pair of VABA/VABAL.
+
+// A7.3 Register encoding
+
+// Extract/Decode NEON D/Vd:
+//
+// Note that for quadword, Qd = UInt(D:Vd<3:1>) = Inst{22:15-13}, whereas for
+// doubleword, Dd = UInt(D:Vd).  We compensate for this difference by
+// handling it in the getRegisterEnum() utility function.
+// D = Inst{22}, Vd = Inst{15-12}
+static unsigned decodeNEONRd(uint32_t insn) {
+  return ((insn >> ARMII::NEON_D_BitShift) & 1) << 4
+    | ((insn >> ARMII::NEON_RegRdShift) & ARMII::NEONRegMask);
+}
+
+// Extract/Decode NEON N/Vn:
+//
+// Note that for quadword, Qn = UInt(N:Vn<3:1>) = Inst{7:19-17}, whereas for
+// doubleword, Dn = UInt(N:Vn).  We compensate for this difference by
+// handling it in the getRegisterEnum() utility function.
+// N = Inst{7}, Vn = Inst{19-16}
+static unsigned decodeNEONRn(uint32_t insn) {
+  return ((insn >> ARMII::NEON_N_BitShift) & 1) << 4
+    | ((insn >> ARMII::NEON_RegRnShift) & ARMII::NEONRegMask);
+}
+
+// Extract/Decode NEON M/Vm:
+//
+// Note that for quadword, Qm = UInt(M:Vm<3:1>) = Inst{5:3-1}, whereas for
+// doubleword, Dm = UInt(M:Vm).  We compensate for this difference by
+// handling it in the getRegisterEnum() utility function.
+// M = Inst{5}, Vm = Inst{3-0}
+static unsigned decodeNEONRm(uint32_t insn) {
+  return ((insn >> ARMII::NEON_M_BitShift) & 1) << 4
+    | ((insn >> ARMII::NEON_RegRmShift) & ARMII::NEONRegMask);
+}
+
+namespace {
+enum ElemSize {
+  ESizeNA = 0,
+  ESize8 = 8,
+  ESize16 = 16,
+  ESize32 = 32,
+  ESize64 = 64
+};
+} // End of unnamed namespace
+
+// size        field -> Inst{11-10}
+// index_align field -> Inst{7-4}
+//
+// The Lane Index interpretation depends on the Data Size:
+//   8  (encoded as size = 0b00) -> Index = index_align[3:1]
+//   16 (encoded as size = 0b01) -> Index = index_align[3:2]
+//   32 (encoded as size = 0b10) -> Index = index_align[3]
+//
+// Ref: A8.6.317 VLD4 (single 4-element structure to one lane).
+static unsigned decodeLaneIndex(uint32_t insn) {
+  unsigned size = insn >> 10 & 3;
+  assert((size == 0 || size == 1 || size == 2) &&
+         "Encoding error: size should be either 0, 1, or 2");
+
+  unsigned index_align = insn >> 4 & 0xF;
+  return (index_align >> 1) >> size;
+}
+
+// imm64 = AdvSIMDExpandImm(op, cmode, i:imm3:imm4)
+// op = Inst{5}, cmode = Inst{11-8}
+// i = Inst{24} (ARM architecture)
+// imm3 = Inst{18-16}, imm4 = Inst{3-0}
+// Ref: Table A7-15 Modified immediate values for Advanced SIMD instructions.
+static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) {
+  unsigned char op = (insn >> 5) & 1;
+  unsigned char cmode = (insn >> 8) & 0xF;
+  unsigned char Imm8 = ((insn >> 24) & 1) << 7 |
+                       ((insn >> 16) & 7) << 4 |
+                       (insn & 0xF);
+  return (op << 12) | (cmode << 8) | Imm8;
+}
+
+// A8.6.339 VMUL, VMULL (by scalar)
+// ESize16 => m = Inst{2-0} (Vm<2:0>) D0-D7
+// ESize32 => m = Inst{3-0} (Vm<3:0>) D0-D15
+static unsigned decodeRestrictedDm(uint32_t insn, ElemSize esize) {
+  switch (esize) {
+  case ESize16:
+    return insn & 7;
+  case ESize32:
+    return insn & 0xF;
+  default:
+    assert(0 && "Unreachable code!");
+    return 0;
+  }
+}
+
+// A8.6.339 VMUL, VMULL (by scalar)
+// ESize16 => index = Inst{5:3} (M:Vm<3>) D0-D7
+// ESize32 => index = Inst{5}   (M)       D0-D15
+static unsigned decodeRestrictedDmIndex(uint32_t insn, ElemSize esize) {
+  switch (esize) {
+  case ESize16:
+    return (((insn >> 5) & 1) << 1) | ((insn >> 3) & 1);
+  case ESize32:
+    return (insn >> 5) & 1;
+  default:
+    assert(0 && "Unreachable code!");
+    return 0;
+  }
+}
+
+// A8.6.296 VCVT (between floating-point and fixed-point, Advanced SIMD)
+// (64 - <fbits>) is encoded as imm6, i.e., Inst{21-16}.
+static unsigned decodeVCVTFractionBits(uint32_t insn) {
+  return 64 - ((insn >> 16) & 0x3F);
+}
+
+// A8.6.302 VDUP (scalar)
+// ESize8  => index = Inst{19-17}
+// ESize16 => index = Inst{19-18}
+// ESize32 => index = Inst{19}
+static unsigned decodeNVLaneDupIndex(uint32_t insn, ElemSize esize) {
+  switch (esize) {
+  case ESize8:
+    return (insn >> 17) & 7;
+  case ESize16:
+    return (insn >> 18) & 3;
+  case ESize32:
+    return (insn >> 19) & 1;
+  default:
+    assert(0 && "Unspecified element size!");
+    return 0;
+  }
+}
+
+// A8.6.328 VMOV (ARM core register to scalar)
+// A8.6.329 VMOV (scalar to ARM core register)
+// ESize8  => index = Inst{21:6-5}
+// ESize16 => index = Inst{21:6}
+// ESize32 => index = Inst{21}
+static unsigned decodeNVLaneOpIndex(uint32_t insn, ElemSize esize) {
+  switch (esize) {
+  case ESize8:
+    return ((insn >> 21) & 1) << 2 | ((insn >> 5) & 3);
+  case ESize16:
+    return ((insn >> 21) & 1) << 1 | ((insn >> 6) & 1);
+  case ESize32:
+    return ((insn >> 21) & 1);
+  default:
+    assert(0 && "Unspecified element size!");
+    return 0;
+  }
+}
+
+// Imm6 = Inst{21-16}, L = Inst{7}
+//
+// LeftShift == true (A8.6.367 VQSHL, A8.6.387 VSLI):
+// case L:imm6 of
+//   '0001xxx' => esize = 8; shift_amount = imm6 - 8
+//   '001xxxx' => esize = 16; shift_amount = imm6 - 16
+//   '01xxxxx' => esize = 32; shift_amount = imm6 - 32
+//   '1xxxxxx' => esize = 64; shift_amount = imm6
+//
+// LeftShift == false (A8.6.376 VRSHR, A8.6.368 VQSHRN):
+// case L:imm6 of
+//   '0001xxx' => esize = 8; shift_amount = 16 - imm6
+//   '001xxxx' => esize = 16; shift_amount = 32 - imm6
+//   '01xxxxx' => esize = 32; shift_amount = 64 - imm6
+//   '1xxxxxx' => esize = 64; shift_amount = 64 - imm6
+//
+static unsigned decodeNVSAmt(uint32_t insn, bool LeftShift) {
+  ElemSize esize = ESizeNA;
+  unsigned L = (insn >> 7) & 1;
+  unsigned imm6 = (insn >> 16) & 0x3F;
+  if (L == 0) {
+    if (imm6 >> 3 == 1)
+      esize = ESize8;
+    else if (imm6 >> 4 == 1)
+      esize = ESize16;
+    else if (imm6 >> 5 == 1)
+      esize = ESize32;
+    else
+      assert(0 && "Wrong encoding of Inst{7:21-16}!");
+  } else
+    esize = ESize64;
+
+  if (LeftShift)
+    return esize == ESize64 ? imm6 : (imm6 - esize);
+  else
+    return esize == ESize64 ? (esize - imm6) : (2*esize - imm6);
+}
+
+// A8.6.305 VEXT
+// Imm4 = Inst{11-8}
+static unsigned decodeN3VImm(uint32_t insn) {
+  return (insn >> 8) & 0xF;
+}
+
+// VLD*
+//   D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm]
+// VLD*LN*
+//   D[d] D[d2] ... Rn [TIED_TO Rn] align [Rm] TIED_TO ... imm(idx)
+// VST*
+//   Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ...
+// VST*LN*
+//   Rn [TIED_TO Rn] align [Rm] D[d] D[d2] ... [imm(idx)]
+//
+// Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it.
+static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced,
+    BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+  // At least one DPR register plus addressing mode #6.
+  assert(NumOps >= 3 && "Expect >= 3 operands");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // We have homogeneous NEON registers for Load/Store.
+  unsigned RegClass = 0;
+
+  // Double-spaced registers have increments of 2.
+  unsigned Inc = DblSpaced ? 2 : 1;
+
+  unsigned Rn = decodeRn(insn);
+  unsigned Rm = decodeRm(insn);
+  unsigned Rd = decodeNEONRd(insn);
+
+  // A7.7.1 Advanced SIMD addressing mode.
+  bool WB = Rm != 15;
+
+  // LLVM Addressing Mode #6.
+  unsigned RmEnum = 0;
+  if (WB && Rm != 13)
+    RmEnum = getRegisterEnum(B, ARM::GPRRegClassID, Rm);
+
+  if (Store) {
+    // Consume possible WB, AddrMode6, possible increment reg, the DPR/QPR's,
+    // then possible lane index.
+    assert(OpIdx < NumOps && OpInfo[0].RegClass == ARM::GPRRegClassID &&
+           "Reg operand expected");
+
+    if (WB) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                         Rn)));
+      ++OpIdx;
+    }
+
+    assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       Rn)));
+    MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
+    OpIdx += 2;
+
+    if (WB) {
+      MI.addOperand(MCOperand::CreateReg(RmEnum));
+      ++OpIdx;
+    }
+
+    assert(OpIdx < NumOps &&
+           (OpInfo[OpIdx].RegClass == ARM::DPRRegClassID ||
+            OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) &&
+           "Reg operand expected");
+
+    RegClass = OpInfo[OpIdx].RegClass;
+    while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
+      MI.addOperand(MCOperand::CreateReg(
+                      getRegisterEnum(B, RegClass, Rd)));
+      Rd += Inc;
+      ++OpIdx;
+    }
+
+    // Handle possible lane index.
+    if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+        && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+      MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
+      ++OpIdx;
+    }
+
+  } else {
+    // Consume the DPR/QPR's, possible WB, AddrMode6, possible incrment reg,
+    // possible TIED_TO DPR/QPR's (ignored), then possible lane index.
+    RegClass = OpInfo[0].RegClass;
+
+    while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
+      MI.addOperand(MCOperand::CreateReg(
+                      getRegisterEnum(B, RegClass, Rd)));
+      Rd += Inc;
+      ++OpIdx;
+    }
+
+    if (WB) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                         Rn)));
+      ++OpIdx;
+    }
+
+    assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
+           OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       Rn)));
+    MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
+    OpIdx += 2;
+
+    if (WB) {
+      MI.addOperand(MCOperand::CreateReg(RmEnum));
+      ++OpIdx;
+    }
+
+    while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
+      assert(TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1 &&
+             "Tied to operand expected");
+      MI.addOperand(MCOperand::CreateReg(0));
+      ++OpIdx;
+    }
+
+    // Handle possible lane index.
+    if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+        && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+      MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
+      ++OpIdx;
+    }
+  }
+
+  // Accessing registers past the end of the NEON register file is not
+  // defined.
+  if (Rd > 32)
+    return false;
+
+  return true;
+}
+
+// A7.7
+// If L (Inst{21}) == 0, store instructions.
+// Find out about double-spaced-ness of the Opcode and pass it on to
+// DisassembleNLdSt0().
+static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const StringRef Name = ARMInsts[Opcode].Name;
+  bool DblSpaced = false;
+
+  if (Name.find("LN") != std::string::npos) {
+    // To one lane instructions.
+    // See, for example, 8.6.317 VLD4 (single 4-element structure to one lane).
+
+    // <size> == 16 && Inst{5} == 1 --> DblSpaced = true
+    if (Name.endswith("16") || Name.endswith("16_UPD"))
+      DblSpaced = slice(insn, 5, 5) == 1;
+
+    // <size> == 32 && Inst{6} == 1 --> DblSpaced = true
+    if (Name.endswith("32") || Name.endswith("32_UPD"))
+      DblSpaced = slice(insn, 6, 6) == 1;
+
+  } else {
+    // Multiple n-element structures with type encoded as Inst{11-8}.
+    // See, for example, A8.6.316 VLD4 (multiple 4-element structures).
+
+    // n == 2 && type == 0b1001 -> DblSpaced = true
+    if (Name.startswith("VST2") || Name.startswith("VLD2"))
+      DblSpaced = slice(insn, 11, 8) == 9;
+    
+    // n == 3 && type == 0b0101 -> DblSpaced = true
+    if (Name.startswith("VST3") || Name.startswith("VLD3"))
+      DblSpaced = slice(insn, 11, 8) == 5;
+    
+    // n == 4 && type == 0b0001 -> DblSpaced = true
+    if (Name.startswith("VST4") || Name.startswith("VLD4"))
+      DblSpaced = slice(insn, 11, 8) == 1;
+    
+  }
+  return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded,
+                           slice(insn, 21, 21) == 0, DblSpaced, B);
+}
+
+// VMOV (immediate)
+//   Qd/Dd imm
+static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+  assert(NumOps >= 2 &&
+         (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+          OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+         (OpInfo[1].RegClass < 0) &&
+         "Expect 1 reg operand followed by 1 imm operand");
+
+  // Qd/Dd = Inst{22:15-12} => NEON Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass,
+                                                     decodeNEONRd(insn))));
+
+  ElemSize esize = ESizeNA;
+  switch (Opcode) {
+  case ARM::VMOVv8i8:
+  case ARM::VMOVv16i8:
+    esize = ESize8;
+    break;
+  case ARM::VMOVv4i16:
+  case ARM::VMOVv8i16:
+  case ARM::VMVNv4i16:
+  case ARM::VMVNv8i16:
+    esize = ESize16;
+    break;
+  case ARM::VMOVv2i32:
+  case ARM::VMOVv4i32:
+  case ARM::VMVNv2i32:
+  case ARM::VMVNv4i32:
+    esize = ESize32;
+    break;
+  case ARM::VMOVv1i64:
+  case ARM::VMOVv2i64:
+    esize = ESize64;
+    break;
+  default:
+    assert(0 && "Unreachable code!");
+    return false;
+  }
+
+  // One register and a modified immediate value.
+  // Add the imm operand.
+  MI.addOperand(MCOperand::CreateImm(decodeN1VImm(insn, esize)));
+
+  NumOpsAdded = 2;
+  return true;
+}
+
+namespace {
+enum N2VFlag {
+  N2V_None,
+  N2V_VectorDupLane,
+  N2V_VectorConvert_Between_Float_Fixed
+};
+} // End of unnamed namespace
+
+// Vector Convert [between floating-point and fixed-point]
+//   Qd/Dd Qm/Dm [fbits]
+//
+// Vector Duplicate Lane (from scalar to all elements) Instructions.
+// VDUPLN16d, VDUPLN16q, VDUPLN32d, VDUPLN32q, VDUPLN8d, VDUPLN8q:
+//   Qd/Dd Dm index
+//
+// Vector Move Long:
+//   Qd Dm
+// 
+// Vector Move Narrow:
+//   Dd Qm
+//
+// Others
+static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opc];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+  assert(NumOps >= 2 &&
+         (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+          OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+         (OpInfo[1].RegClass == ARM::DPRRegClassID ||
+          OpInfo[1].RegClass == ARM::QPRRegClassID) &&
+         "Expect >= 2 operands and first 2 as reg operands");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  ElemSize esize = ESizeNA;
+  if (Flag == N2V_VectorDupLane) {
+    // VDUPLN has its index embedded.  Its size can be inferred from the Opcode.
+    assert(Opc >= ARM::VDUPLN16d && Opc <= ARM::VDUPLN8q &&
+           "Unexpected Opcode");
+    esize = (Opc == ARM::VDUPLN8d || Opc == ARM::VDUPLN8q) ? ESize8
+       : ((Opc == ARM::VDUPLN16d || Opc == ARM::VDUPLN16q) ? ESize16
+                                                           : ESize32);
+  }
+
+  // Qd/Dd = Inst{22:15-12} => NEON Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     decodeNEONRd(insn))));
+  ++OpIdx;
+
+  // VPADAL...
+  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+    // TIED_TO operand.
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  // Dm = Inst{5:3-0} => NEON Rm
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     decodeNEONRm(insn))));
+  ++OpIdx;
+
+  // VZIP and others have two TIED_TO reg operands.
+  int Idx;
+  while (OpIdx < NumOps &&
+         (Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+    // Add TIED_TO operand.
+    MI.addOperand(MI.getOperand(Idx));
+    ++OpIdx;
+  }
+
+  // Add the imm operand, if required.
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+      && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+
+    unsigned imm = 0xFFFFFFFF;
+
+    if (Flag == N2V_VectorDupLane)
+      imm = decodeNVLaneDupIndex(insn, esize);
+    if (Flag == N2V_VectorConvert_Between_Float_Fixed)
+      imm = decodeVCVTFractionBits(insn);
+
+    assert(imm != 0xFFFFFFFF && "Internal error");
+    MI.addOperand(MCOperand::CreateImm(imm));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+static bool DisassembleN2RegFrm(MCInst &MI, unsigned Opc, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
+                                N2V_None, B);
+}
+static bool DisassembleNVCVTFrm(MCInst &MI, unsigned Opc, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
+                                N2V_VectorConvert_Between_Float_Fixed, B);
+}
+static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVdVmOptImm(MI, Opc, insn, NumOps, NumOpsAdded,
+                                N2V_VectorDupLane, B);
+}
+
+// Vector Shift [Accumulate] Instructions.
+// Qd/Dd [Qd/Dd (TIED_TO)] Qm/Dm ShiftAmt
+//
+// Vector Shift Left Long (with maximum shift count) Instructions.
+// VSHLLi16, VSHLLi32, VSHLLi8: Qd Dm imm (== size)
+//
+static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+  assert(NumOps >= 3 &&
+         (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+          OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+         (OpInfo[1].RegClass == ARM::DPRRegClassID ||
+          OpInfo[1].RegClass == ARM::QPRRegClassID) &&
+         "Expect >= 3 operands and first 2 as reg operands");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // Qd/Dd = Inst{22:15-12} => NEON Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     decodeNEONRd(insn))));
+  ++OpIdx;
+
+  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+    // TIED_TO operand.
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  assert((OpInfo[OpIdx].RegClass == ARM::DPRRegClassID ||
+          OpInfo[OpIdx].RegClass == ARM::QPRRegClassID) &&
+         "Reg operand expected");
+
+  // Qm/Dm = Inst{5:3-0} => NEON Rm
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     decodeNEONRm(insn))));
+  ++OpIdx;
+
+  assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected");
+
+  // Add the imm operand.
+  
+  // VSHLL has maximum shift count as the imm, inferred from its size.
+  unsigned Imm;
+  switch (Opcode) {
+  default:
+    Imm = decodeNVSAmt(insn, LeftShift);
+    break;
+  case ARM::VSHLLi8:
+    Imm = 8;
+    break;
+  case ARM::VSHLLi16:
+    Imm = 16;
+    break;
+  case ARM::VSHLLi32:
+    Imm = 32;
+    break;
+  }
+  MI.addOperand(MCOperand::CreateImm(Imm));
+  ++OpIdx;
+
+  return true;
+}
+
+// Left shift instructions.
+static bool DisassembleN2RegVecShLFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, true,
+                                 B);
+}
+// Right shift instructions have different shift amount interpretation.
+static bool DisassembleN2RegVecShRFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVectorShift(MI, Opcode, insn, NumOps, NumOpsAdded, false,
+                                 B);
+}
+
+namespace {
+enum N3VFlag {
+  N3V_None,
+  N3V_VectorExtract,
+  N3V_VectorShift,
+  N3V_Multiply_By_Scalar
+};
+} // End of unnamed namespace
+
+// NEON Three Register Instructions with Optional Immediate Operand
+//
+// Vector Extract Instructions.
+// Qd/Dd Qn/Dn Qm/Dm imm4
+//
+// Vector Shift (Register) Instructions.
+// Qd/Dd Qm/Dm Qn/Dn (notice the order of m, n)
+//
+// Vector Multiply [Accumulate/Subtract] [Long] By Scalar Instructions.
+// Qd/Dd Qn/Dn RestrictedDm index
+//
+// Others
+static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+
+  // No checking for OpInfo[2] because of MOVDneon/MOVQ with only two regs.
+  assert(NumOps >= 3 &&
+         (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+          OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+         (OpInfo[1].RegClass == ARM::DPRRegClassID ||
+          OpInfo[1].RegClass == ARM::QPRRegClassID) &&
+         "Expect >= 3 operands and first 2 as reg operands");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  bool VdVnVm = Flag == N3V_VectorShift ? false : true;
+  bool IsImm4 = Flag == N3V_VectorExtract ? true : false;
+  bool IsDmRestricted = Flag == N3V_Multiply_By_Scalar ? true : false;
+  ElemSize esize = ESizeNA;
+  if (Flag == N3V_Multiply_By_Scalar) {
+    unsigned size = (insn >> 20) & 3;
+    if (size == 1) esize = ESize16;
+    if (size == 2) esize = ESize32;
+    assert (esize == ESize16 || esize == ESize32);
+  }
+
+  // Qd/Dd = Inst{22:15-12} => NEON Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     decodeNEONRd(insn))));
+  ++OpIdx;
+
+  // VABA, VABAL, VBSLd, VBSLq, ...
+  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+    // TIED_TO operand.
+    MI.addOperand(MCOperand::CreateReg(0));
+    ++OpIdx;
+  }
+
+  // Dn = Inst{7:19-16} => NEON Rn
+  // or
+  // Dm = Inst{5:3-0} => NEON Rm
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                  VdVnVm ? decodeNEONRn(insn)
+                                         : decodeNEONRm(insn))));
+  ++OpIdx;
+
+  // Special case handling for VMOVDneon and VMOVQ because they are marked as
+  // N3RegFrm.
+  if (Opcode == ARM::VMOVDneon || Opcode == ARM::VMOVQ)
+    return true;
+  
+  // Dm = Inst{5:3-0} => NEON Rm
+  // or
+  // Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise
+  // or
+  // Dn = Inst{7:19-16} => NEON Rn
+  unsigned m = VdVnVm ? (IsDmRestricted ? decodeRestrictedDm(insn, esize)
+                                        : decodeNEONRm(insn))
+                      : decodeNEONRn(insn);
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, OpInfo[OpIdx].RegClass, m)));
+  ++OpIdx;
+
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+      && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // Add the imm operand.
+    unsigned Imm = 0;
+    if (IsImm4)
+      Imm = decodeN3VImm(insn);
+    else if (IsDmRestricted)
+      Imm = decodeRestrictedDmIndex(insn, esize);
+    else {
+      assert(0 && "Internal error: unreachable code!");
+      return false;
+    }
+
+    MI.addOperand(MCOperand::CreateImm(Imm));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+static bool DisassembleN3RegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  N3V_None, B);
+}
+static bool DisassembleN3RegVecShFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  N3V_VectorShift, B);
+}
+static bool DisassembleNVecExtractFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  N3V_VectorExtract, B);
+}
+static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  return DisassembleNVdVnVmOptImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  N3V_Multiply_By_Scalar, B);
+}
+
+// Vector Table Lookup
+//
+// VTBL1, VTBX1: Dd [Dd(TIED_TO)] Dn Dm
+// VTBL2, VTBX2: Dd [Dd(TIED_TO)] Dn Dn+1 Dm
+// VTBL3, VTBX3: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dm
+// VTBL4, VTBX4: Dd [Dd(TIED_TO)] Dn Dn+1 Dn+2 Dn+3 Dm
+static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::DPRRegClassID &&
+         OpInfo[1].RegClass == ARM::DPRRegClassID &&
+         OpInfo[2].RegClass == ARM::DPRRegClassID &&
+         "Expect >= 3 operands and first 3 as reg operands");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned Rn = decodeNEONRn(insn);
+
+  // {Dn} encoded as len = 0b00
+  // {Dn Dn+1} encoded as len = 0b01
+  // {Dn Dn+1 Dn+2 } encoded as len = 0b10
+  // {Dn Dn+1 Dn+2 Dn+3} encoded as len = 0b11
+  unsigned Len = slice(insn, 9, 8) + 1;
+
+  // Dd (the destination vector)
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+                                                     decodeNEONRd(insn))));
+  ++OpIdx;
+
+  // Process tied_to operand constraint.
+  int Idx;
+  if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+    MI.addOperand(MI.getOperand(Idx));
+    ++OpIdx;
+  }
+
+  // Do the <list> now.
+  for (unsigned i = 0; i < Len; ++i) {
+    assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
+           "Reg operand expected");
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+                                                       Rn + i)));
+    ++OpIdx;
+  }
+
+  // Dm (the index vector)
+  assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::DPRRegClassID &&
+         "Reg operand (index vector) expected");
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+                                                     decodeNEONRm(insn))));
+  ++OpIdx;
+
+  return true;
+}
+
+// Vector Get Lane (move scalar to ARM core register) Instructions.
+// VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index
+static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
+
+  assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::GPRRegClassID &&
+         OpInfo[1].RegClass == ARM::DPRRegClassID &&
+         OpInfo[2].RegClass < 0 &&
+         "Expect >= 3 operands with one dst operand");
+
+  ElemSize esize =
+    Opcode == ARM::VGETLNi32 ? ESize32
+      : ((Opcode == ARM::VGETLNs16 || Opcode == ARM::VGETLNu16) ? ESize16
+                                                                : ESize32);
+
+  // Rt = Inst{15-12} => ARM Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+
+  // Dn = Inst{7:19-16} => NEON Rn
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+                                                     decodeNEONRn(insn))));
+
+  MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
+
+  NumOpsAdded = 3;
+  return true;
+}
+
+// Vector Set Lane (move ARM core register to scalar) Instructions.
+// VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index
+static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  if (!OpInfo) return false;
+
+  assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::DPRRegClassID &&
+         OpInfo[1].RegClass == ARM::DPRRegClassID &&
+         TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
+         OpInfo[2].RegClass == ARM::GPRRegClassID &&
+         OpInfo[3].RegClass < 0 &&
+         "Expect >= 3 operands with one dst operand");
+
+  ElemSize esize =
+    Opcode == ARM::VSETLNi8 ? ESize8
+                            : (Opcode == ARM::VSETLNi16 ? ESize16
+                                                        : ESize32);
+
+  // Dd = Inst{7:19-16} => NEON Rn
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::DPRRegClassID,
+                                                     decodeNEONRn(insn))));
+
+  // TIED_TO operand.
+  MI.addOperand(MCOperand::CreateReg(0));
+
+  // Rt = Inst{15-12} => ARM Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+
+  MI.addOperand(MCOperand::CreateImm(decodeNVLaneOpIndex(insn, esize)));
+
+  NumOpsAdded = 4;
+  return true;
+}
+
+// Vector Duplicate Instructions (from ARM core register to all elements).
+// VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt
+static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  assert(NumOps >= 2 &&
+         (OpInfo[0].RegClass == ARM::DPRRegClassID ||
+          OpInfo[0].RegClass == ARM::QPRRegClassID) &&
+         OpInfo[1].RegClass == ARM::GPRRegClassID &&
+         "Expect >= 2 operands and first 2 as reg operand");
+
+  unsigned RegClass = OpInfo[0].RegClass;
+
+  // Qd/Dd = Inst{7:19-16} => NEON Rn
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClass,
+                                                     decodeNEONRn(insn))));
+
+  // Rt = Inst{15-12} => ARM Rd
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+
+  NumOpsAdded = 2;
+  return true;
+}
+
+// A8.6.41 DMB
+// A8.6.42 DSB
+// A8.6.49 ISB
+static inline bool MemBarrierInstr(uint32_t insn) {
+  unsigned op7_4 = slice(insn, 7, 4);
+  if (slice(insn, 31, 8) == 0xf57ff0 && (op7_4 >= 4 && op7_4 <= 6))
+    return true;
+
+  return false;
+}
+
+static inline bool PreLoadOpcode(unsigned Opcode) {
+  switch(Opcode) {
+  case ARM::PLDi12:  case ARM::PLDrs:
+  case ARM::PLDWi12: case ARM::PLDWrs:
+  case ARM::PLIi12:  case ARM::PLIrs:
+    return true;
+  default:
+    return false;
+  }
+}
+
+static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  // Preload Data/Instruction requires either 2 or 3 operands.
+  // PLDi, PLDWi, PLIi:                addrmode_imm12
+  // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: ldst_so_reg
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+
+  if (Opcode == ARM::PLDi12 || Opcode == ARM::PLDWi12
+      || Opcode == ARM::PLIi12) {
+    unsigned Imm12 = slice(insn, 11, 0);
+    bool Negative = getUBit(insn) == 0;
+    // -0 is represented specially. All other values are as normal.
+    if (Imm12 == 0 && Negative)
+      Imm12 = INT32_MIN;
+    MI.addOperand(MCOperand::CreateImm(Imm12));
+    NumOpsAdded = 2;
+  } else {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+
+    ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
+
+    // Inst{6-5} encodes the shift opcode.
+    ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 6, 5));
+    // Inst{11-7} encodes the imm5 shift amount.
+    unsigned ShImm = slice(insn, 11, 7);
+
+    // A8.4.1.  Possible rrx or shift amount of 32...
+    getImmShiftSE(ShOp, ShImm);
+    MI.addOperand(MCOperand::CreateImm(
+                    ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp)));
+    NumOpsAdded = 3;
+  }
+
+  return true;
+}
+
+static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  if (MemBarrierInstr(insn)) {
+    // DMBsy, DSBsy, and ISBsy instructions have zero operand and are taken care
+    // of within the generic ARMBasicMCBuilder::BuildIt() method.
+    //
+    // Inst{3-0} encodes the memory barrier option for the variants.
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  switch (Opcode) {
+  case ARM::CLREX:
+  case ARM::NOP:
+  case ARM::TRAP:
+  case ARM::YIELD:
+  case ARM::WFE:
+  case ARM::WFI:
+  case ARM::SEV:
+    return true;
+  default:
+    break;
+  }
+
+  if (Opcode == ARM::SETEND) {
+    NumOpsAdded = 1;
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 9, 9)));
+    return true;
+  }
+
+  // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different
+  // opcodes which match the same real instruction. This is needed since there's
+  // no current handling of optional arguments. Fix here when a better handling
+  // of optional arguments is implemented.
+  if (Opcode == ARM::CPS3p) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6)));   // iflags
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));   // mode
+    NumOpsAdded = 3;
+    return true;
+  }
+  if (Opcode == ARM::CPS2p) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6)));   // iflags
+    NumOpsAdded = 2;
+    return true;
+  }
+  if (Opcode == ARM::CPS1p) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // DBG has its option specified in Inst{3-0}.
+  if (Opcode == ARM::DBG) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // BKPT takes an imm32 val equal to ZeroExtend(Inst{19-8:3-0}).
+  if (Opcode == ARM::BKPT) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 8) << 4 |
+                                       slice(insn, 3, 0)));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  if (PreLoadOpcode(Opcode))
+    return DisassemblePreLoadFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+  assert(0 && "Unexpected misc instruction!");
+  return false;
+}
+
+/// FuncPtrs - FuncPtrs maps ARMFormat to its corresponding DisassembleFP.
+/// We divide the disassembly task into different categories, with each one
+/// corresponding to a specific instruction encoding format.  There could be
+/// exceptions when handling a specific format, and that is why the Opcode is
+/// also present in the function prototype.
+static const DisassembleFP FuncPtrs[] = {
+  &DisassemblePseudo,
+  &DisassembleMulFrm,
+  &DisassembleBrFrm,
+  &DisassembleBrMiscFrm,
+  &DisassembleDPFrm,
+  &DisassembleDPSoRegFrm,
+  &DisassembleLdFrm,
+  &DisassembleStFrm,
+  &DisassembleLdMiscFrm,
+  &DisassembleStMiscFrm,
+  &DisassembleLdStMulFrm,
+  &DisassembleLdStExFrm,
+  &DisassembleArithMiscFrm,
+  &DisassembleSatFrm,
+  &DisassembleExtFrm,
+  &DisassembleVFPUnaryFrm,
+  &DisassembleVFPBinaryFrm,
+  &DisassembleVFPConv1Frm,
+  &DisassembleVFPConv2Frm,
+  &DisassembleVFPConv3Frm,
+  &DisassembleVFPConv4Frm,
+  &DisassembleVFPConv5Frm,
+  &DisassembleVFPLdStFrm,
+  &DisassembleVFPLdStMulFrm,
+  &DisassembleVFPMiscFrm,
+  &DisassembleThumbFrm,
+  &DisassembleMiscFrm,
+  &DisassembleNGetLnFrm,
+  &DisassembleNSetLnFrm,
+  &DisassembleNDupFrm,
+
+  // VLD and VST (including one lane) Instructions.
+  &DisassembleNLdSt,
+
+  // A7.4.6 One register and a modified immediate value
+  // 1-Register Instructions with imm.
+  // LLVM only defines VMOVv instructions.
+  &DisassembleN1RegModImmFrm,
+
+  // 2-Register Instructions with no imm.
+  &DisassembleN2RegFrm,
+
+  // 2-Register Instructions with imm (vector convert float/fixed point).
+  &DisassembleNVCVTFrm,
+
+  // 2-Register Instructions with imm (vector dup lane).
+  &DisassembleNVecDupLnFrm,
+
+  // Vector Shift Left Instructions.
+  &DisassembleN2RegVecShLFrm,
+
+  // Vector Shift Righ Instructions, which has different interpretation of the
+  // shift amount from the imm6 field.
+  &DisassembleN2RegVecShRFrm,
+
+  // 3-Register Data-Processing Instructions.
+  &DisassembleN3RegFrm,
+
+  // Vector Shift (Register) Instructions.
+  // D:Vd M:Vm N:Vn (notice that M:Vm is the first operand)
+  &DisassembleN3RegVecShFrm,
+
+  // Vector Extract Instructions.
+  &DisassembleNVecExtractFrm,
+
+  // Vector [Saturating Rounding Doubling] Multiply [Accumulate/Subtract] [Long]
+  // By Scalar Instructions.
+  &DisassembleNVecMulScalarFrm,
+
+  // Vector Table Lookup uses byte indexes in a control vector to look up byte
+  // values in a table and generate a new vector.
+  &DisassembleNVTBLFrm,
+
+  NULL
+};
+
+/// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder.
+/// The general idea is to set the Opcode for the MCInst, followed by adding
+/// the appropriate MCOperands to the MCInst.  ARM Basic MC Builder delegates
+/// to the Format-specific disassemble function for disassembly, followed by
+/// TryPredicateAndSBitModifier() to do PredicateOperand and OptionalDefOperand
+/// which follow the Dst/Src Operands.
+bool ARMBasicMCBuilder::BuildIt(MCInst &MI, uint32_t insn) {
+  // Stage 1 sets the Opcode.
+  MI.setOpcode(Opcode);
+  // If the number of operands is zero, we're done!
+  if (NumOps == 0)
+    return true;
+
+  // Stage 2 calls the format-specific disassemble function to build the operand
+  // list.
+  if (Disasm == NULL)
+    return false;
+  unsigned NumOpsAdded = 0;
+  bool OK = (*Disasm)(MI, Opcode, insn, NumOps, NumOpsAdded, this);
+
+  if (!OK || this->Err != 0) return false;
+  if (NumOpsAdded >= NumOps)
+    return true;
+
+  // Stage 3 deals with operands unaccounted for after stage 2 is finished.
+  // FIXME: Should this be done selectively?
+  return TryPredicateAndSBitModifier(MI, Opcode, insn, NumOps - NumOpsAdded);
+}
+
+// A8.3 Conditional execution
+// A8.3.1 Pseudocode details of conditional execution
+// Condition bits '111x' indicate the instruction is always executed.
+static uint32_t CondCode(uint32_t CondField) {
+  if (CondField == 0xF)
+    return ARMCC::AL;
+  return CondField;
+}
+
+/// DoPredicateOperands - DoPredicateOperands process the predicate operands
+/// of some Thumb instructions which come before the reglist operands.  It
+/// returns true if the two predicate operands have been processed.
+bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode,
+    uint32_t /* insn */, unsigned short NumOpsRemaining) {
+
+  assert(NumOpsRemaining > 0 && "Invalid argument");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned Idx = MI.getNumOperands();
+
+  // First, we check whether this instr specifies the PredicateOperand through
+  // a pair of TargetOperandInfos with isPredicate() property.
+  if (NumOpsRemaining >= 2 &&
+      OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
+      OpInfo[Idx].RegClass < 0 &&
+      OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+  {
+    // If we are inside an IT block, get the IT condition bits maintained via
+    // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
+    // See also A2.5.2.
+    if (InITBlock())
+      MI.addOperand(MCOperand::CreateImm(GetITCond()));
+    else
+      MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
+    MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+    return true;
+  }
+
+  return false;
+}
+  
+/// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
+/// the possible Predicate and SBitModifier, to build the remaining MCOperand
+/// constituents.
+bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOpsRemaining) {
+
+  assert(NumOpsRemaining > 0 && "Invalid argument");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const std::string &Name = ARMInsts[Opcode].Name;
+  unsigned Idx = MI.getNumOperands();
+
+  // First, we check whether this instr specifies the PredicateOperand through
+  // a pair of TargetOperandInfos with isPredicate() property.
+  if (NumOpsRemaining >= 2 &&
+      OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
+      OpInfo[Idx].RegClass < 0 &&
+      OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+  {
+    // If we are inside an IT block, get the IT condition bits maintained via
+    // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
+    // See also A2.5.2.
+    if (InITBlock())
+      MI.addOperand(MCOperand::CreateImm(GetITCond()));
+    else {
+      if (Name.length() > 1 && Name[0] == 't') {
+        // Thumb conditional branch instructions have their cond field embedded,
+        // like ARM.
+        //
+        // A8.6.16 B
+        if (Name == "t2Bcc")
+          MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 25, 22))));
+        else if (Name == "tBcc")
+          MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 11, 8))));
+        else
+          MI.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      } else {
+        // ARM instructions get their condition field from Inst{31-28}.
+        MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn))));
+      }
+    }
+    MI.addOperand(MCOperand::CreateReg(ARM::CPSR));
+    Idx += 2;
+    NumOpsRemaining -= 2;
+  }
+
+  if (NumOpsRemaining == 0)
+    return true;
+
+  // Next, if OptionalDefOperand exists, we check whether the 'S' bit is set.
+  if (OpInfo[Idx].isOptionalDef() && OpInfo[Idx].RegClass==ARM::CCRRegClassID) {
+    MI.addOperand(MCOperand::CreateReg(getSBit(insn) == 1 ? ARM::CPSR : 0));
+    --NumOpsRemaining;
+  }
+
+  if (NumOpsRemaining == 0)
+    return true;
+  else
+    return false;
+}
+
+/// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary
+/// after BuildIt is finished.
+bool ARMBasicMCBuilder::RunBuildAfterHook(bool Status, MCInst &MI,
+    uint32_t insn) {
+
+  if (!SP) return Status;
+
+  if (Opcode == ARM::t2IT)
+    Status = SP->InitIT(slice(insn, 7, 0)) ? Status : false;
+  else if (InITBlock())
+    SP->UpdateIT();
+
+  return Status;
+}
+
+/// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
+ARMBasicMCBuilder::ARMBasicMCBuilder(unsigned opc, ARMFormat format,
+                                     unsigned short num)
+  : Opcode(opc), Format(format), NumOps(num), SP(0), Err(0) {
+  unsigned Idx = (unsigned)format;
+  assert(Idx < (array_lengthof(FuncPtrs) - 1) && "Unknown format");
+  Disasm = FuncPtrs[Idx];
+}
+
+/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
+/// infrastructure of an MCInst given the Opcode and Format of the instr.
+/// Return NULL if it fails to create/return a proper builder.  API clients
+/// are responsible for freeing up of the allocated memory.  Cacheing can be
+/// performed by the API clients to improve performance.
+ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) {
+  // For "Unknown format", fail by returning a NULL pointer.
+  if ((unsigned)Format >= (array_lengthof(FuncPtrs) - 1)) {
+    DEBUG(errs() << "Unknown format\n");
+    return 0;
+  }
+
+  return new ARMBasicMCBuilder(Opcode, Format,
+                               ARMInsts[Opcode].getNumOperands());
+}
diff --git a/final/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/final/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
new file mode 100644
index 00000000000..9c30d332d1f
--- /dev/null
+++ b/final/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -0,0 +1,262 @@
+//===- ARMDisassemblerCore.h - ARM disassembler helpers ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+//
+// The first part defines the enumeration type of ARM instruction format, which
+// specifies the encoding used by the instruction, as well as a helper function
+// to convert the enums to printable char strings.
+//
+// It also contains code to represent the concepts of Builder and DisassembleFP
+// to solve the problem of disassembling an ARM instr.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMDISASSEMBLERCORE_H
+#define ARMDISASSEMBLERCORE_H
+
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARMDisassembler.h"
+
+namespace llvm {
+
+class ARMUtils {
+public:
+  static const char *OpcodeName(unsigned Opcode);
+};
+
+/////////////////////////////////////////////////////
+//                                                 //
+//  Enums and Utilities for ARM Instruction Format //
+//                                                 //
+/////////////////////////////////////////////////////
+
+#define ARM_FORMATS                   \
+  ENTRY(ARM_FORMAT_PSEUDO,         0) \
+  ENTRY(ARM_FORMAT_MULFRM,         1) \
+  ENTRY(ARM_FORMAT_BRFRM,          2) \
+  ENTRY(ARM_FORMAT_BRMISCFRM,      3) \
+  ENTRY(ARM_FORMAT_DPFRM,          4) \
+  ENTRY(ARM_FORMAT_DPSOREGFRM,     5) \
+  ENTRY(ARM_FORMAT_LDFRM,          6) \
+  ENTRY(ARM_FORMAT_STFRM,          7) \
+  ENTRY(ARM_FORMAT_LDMISCFRM,      8) \
+  ENTRY(ARM_FORMAT_STMISCFRM,      9) \
+  ENTRY(ARM_FORMAT_LDSTMULFRM,    10) \
+  ENTRY(ARM_FORMAT_LDSTEXFRM,     11) \
+  ENTRY(ARM_FORMAT_ARITHMISCFRM,  12) \
+  ENTRY(ARM_FORMAT_SATFRM,        13) \
+  ENTRY(ARM_FORMAT_EXTFRM,        14) \
+  ENTRY(ARM_FORMAT_VFPUNARYFRM,   15) \
+  ENTRY(ARM_FORMAT_VFPBINARYFRM,  16) \
+  ENTRY(ARM_FORMAT_VFPCONV1FRM,   17) \
+  ENTRY(ARM_FORMAT_VFPCONV2FRM,   18) \
+  ENTRY(ARM_FORMAT_VFPCONV3FRM,   19) \
+  ENTRY(ARM_FORMAT_VFPCONV4FRM,   20) \
+  ENTRY(ARM_FORMAT_VFPCONV5FRM,   21) \
+  ENTRY(ARM_FORMAT_VFPLDSTFRM,    22) \
+  ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \
+  ENTRY(ARM_FORMAT_VFPMISCFRM,    24) \
+  ENTRY(ARM_FORMAT_THUMBFRM,      25) \
+  ENTRY(ARM_FORMAT_MISCFRM,       26) \
+  ENTRY(ARM_FORMAT_NEONGETLNFRM,  27) \
+  ENTRY(ARM_FORMAT_NEONSETLNFRM,  28) \
+  ENTRY(ARM_FORMAT_NEONDUPFRM,    29) \
+  ENTRY(ARM_FORMAT_NLdSt,         30) \
+  ENTRY(ARM_FORMAT_N1RegModImm,   31) \
+  ENTRY(ARM_FORMAT_N2Reg,         32) \
+  ENTRY(ARM_FORMAT_NVCVT,         33) \
+  ENTRY(ARM_FORMAT_NVecDupLn,     34) \
+  ENTRY(ARM_FORMAT_N2RegVecShL,   35) \
+  ENTRY(ARM_FORMAT_N2RegVecShR,   36) \
+  ENTRY(ARM_FORMAT_N3Reg,         37) \
+  ENTRY(ARM_FORMAT_N3RegVecSh,    38) \
+  ENTRY(ARM_FORMAT_NVecExtract,   39) \
+  ENTRY(ARM_FORMAT_NVecMulScalar, 40) \
+  ENTRY(ARM_FORMAT_NVTBL,         41)
+
+// ARM instruction format specifies the encoding used by the instruction.
+#define ENTRY(n, v) n = v,
+typedef enum {
+  ARM_FORMATS
+  ARM_FORMAT_NA
+} ARMFormat;
+#undef ENTRY
+
+// Converts enum to const char*.
+static const inline char *stringForARMFormat(ARMFormat form) {
+#define ENTRY(n, v) case n: return #n;
+  switch(form) {
+    ARM_FORMATS
+  case ARM_FORMAT_NA:
+  default:
+    return "";
+  }
+#undef ENTRY
+}
+
+/// Expands on the enum definitions from ARMBaseInstrInfo.h.
+/// They are being used by the disassembler implementation.
+namespace ARMII {
+  enum {
+    NEONRegMask = 15,
+    GPRRegMask = 15,
+    NEON_RegRdShift = 12,
+    NEON_D_BitShift = 22,
+    NEON_RegRnShift = 16,
+    NEON_N_BitShift = 7,
+    NEON_RegRmShift = 0,
+    NEON_M_BitShift = 5
+  };
+}
+
+/// Utility function for extracting [From, To] bits from a uint32_t.
+static inline unsigned slice(uint32_t Bits, unsigned From, unsigned To) {
+  assert(From < 32 && To < 32 && From >= To);
+  return (Bits >> To) & ((1 << (From - To + 1)) - 1);
+}
+
+/// Utility function for setting [From, To] bits to Val for a uint32_t.
+static inline void setSlice(unsigned &Bits, unsigned From, unsigned To,
+                            unsigned Val) {
+  assert(From < 32 && To < 32 && From >= To);
+  uint32_t Mask = ((1 << (From - To + 1)) - 1);
+  Bits &= ~(Mask << To);
+  Bits |= (Val & Mask) << To;
+}
+
+/// Various utilities for checking the target specific flags.
+
+/// A unary data processing instruction doesn't have an Rn operand.
+static inline bool isUnaryDP(uint64_t TSFlags) {
+  return (TSFlags & ARMII::UnaryDP);
+}
+
+/// This four-bit field describes the addressing mode used.
+/// See also ARMBaseInstrInfo.h.
+static inline unsigned getAddrMode(uint64_t TSFlags) {
+  return (TSFlags & ARMII::AddrModeMask);
+}
+
+/// {IndexModePre, IndexModePost}
+/// Only valid for load and store ops.
+/// See also ARMBaseInstrInfo.h.
+static inline unsigned getIndexMode(uint64_t TSFlags) {
+  return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+}
+
+/// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList.
+static inline bool isPrePostLdSt(uint64_t TSFlags) {
+  return (TSFlags & ARMII::IndexModeMask) != 0;
+}
+
+// Forward declaration.
+class ARMBasicMCBuilder;
+
+// Builder Object is mostly ignored except in some Thumb disassemble functions.
+typedef ARMBasicMCBuilder *BO;
+
+/// DisassembleFP - DisassembleFP points to a function that disassembles an insn
+/// and builds the MCOperand list upon disassembly.  It returns false on failure
+/// or true on success.  The number of operands added is updated upon success.
+typedef bool (*DisassembleFP)(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO Builder);
+
+/// CreateMCBuilder - Return an ARMBasicMCBuilder that can build up the MC
+/// infrastructure of an MCInst given the Opcode and Format of the instr.
+/// Return NULL if it fails to create/return a proper builder.  API clients
+/// are responsible for freeing up of the allocated memory.  Cacheing can be
+/// performed by the API clients to improve performance.
+extern ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
+
+/// ARMBasicMCBuilder - ARMBasicMCBuilder represents an ARM MCInst builder that
+/// knows how to build up the MCOperand list.
+class ARMBasicMCBuilder {
+  friend ARMBasicMCBuilder *CreateMCBuilder(unsigned Opcode, ARMFormat Format);
+  unsigned Opcode;
+  ARMFormat Format;
+  unsigned short NumOps;
+  DisassembleFP Disasm;
+  Session *SP;
+  int Err; // !=0 if the builder encounters some error condition during build.
+
+private:
+  /// Opcode, Format, and NumOperands make up an ARM Basic MCBuilder.
+  ARMBasicMCBuilder(unsigned opc, ARMFormat format, unsigned short num);
+
+public:
+  ARMBasicMCBuilder(ARMBasicMCBuilder &B)
+    : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm),
+      SP(B.SP) {
+    Err = 0;
+  }
+
+  virtual ~ARMBasicMCBuilder() {}
+
+  void SetSession(Session *sp) {
+    SP = sp;
+  }
+
+  void SetErr(int ErrCode) {
+    Err = ErrCode;
+  }
+
+  /// DoPredicateOperands - DoPredicateOperands process the predicate operands
+  /// of some Thumb instructions which come before the reglist operands.  It
+  /// returns true if the two predicate operands have been processed.
+  bool DoPredicateOperands(MCInst& MI, unsigned Opcode,
+      uint32_t insn, unsigned short NumOpsRemaning);
+  
+  /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process
+  /// the possible Predicate and SBitModifier, to build the remaining MCOperand
+  /// constituents.
+  bool TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
+      uint32_t insn, unsigned short NumOpsRemaning);
+
+  /// InITBlock - InITBlock returns true if we are inside an IT block.
+  bool InITBlock() {
+    if (SP)
+      return SP->ITCounter > 0;
+
+    return false;
+  }
+
+  /// Build - Build delegates to BuildIt to perform the heavy liftling.  After
+  /// that, it invokes RunBuildAfterHook where some housekeepings can be done.
+  virtual bool Build(MCInst &MI, uint32_t insn) {
+    bool Status = BuildIt(MI, insn);
+    return RunBuildAfterHook(Status, MI, insn);
+  }
+
+  /// BuildIt - BuildIt performs the build step for this ARM Basic MC Builder.
+  /// The general idea is to set the Opcode for the MCInst, followed by adding
+  /// the appropriate MCOperands to the MCInst.  ARM Basic MC Builder delegates
+  /// to the Format-specific disassemble function for disassembly, followed by
+  /// TryPredicateAndSBitModifier() for PredicateOperand and OptionalDefOperand
+  /// which follow the Dst/Src Operands.
+  virtual bool BuildIt(MCInst &MI, uint32_t insn);
+
+  /// RunBuildAfterHook - RunBuildAfterHook performs operations deemed necessary
+  /// after BuildIt is finished.
+  virtual bool RunBuildAfterHook(bool Status, MCInst &MI, uint32_t insn);
+
+private:
+  /// Get condition of the current IT instruction.
+  unsigned GetITCond() {
+    assert(SP);
+    return slice(SP->ITState, 7, 4);
+  }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/final/lib/Target/ARM/Disassembler/CMakeLists.txt b/final/lib/Target/ARM/Disassembler/CMakeLists.txt
new file mode 100644
index 00000000000..b23dd6ba57e
--- /dev/null
+++ b/final/lib/Target/ARM/Disassembler/CMakeLists.txt
@@ -0,0 +1,14 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMARMDisassembler
+  ARMDisassembler.cpp
+  ARMDisassemblerCore.cpp
+  )
+# workaround for hanging compilation on MSVC8, 9 and 10
+if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
+set_property(
+  SOURCE ARMDisassembler.cpp
+  PROPERTY COMPILE_FLAGS "/Od"
+  )
+endif()
+add_dependencies(LLVMARMDisassembler ARMCodeGenTable_gen)
diff --git a/final/lib/Target/ARM/Disassembler/Makefile b/final/lib/Target/ARM/Disassembler/Makefile
new file mode 100644
index 00000000000..031b6aca5a4
--- /dev/null
+++ b/final/lib/Target/ARM/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/ARM/Disassembler/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMDisassembler
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/final/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
new file mode 100644
index 00000000000..33889da77d6
--- /dev/null
+++ b/final/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -0,0 +1,2251 @@
+//===- ThumbDisassemblerCore.h - Thumb disassembler helpers -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains code for disassembling a Thumb instr.  It is to be included by
+// ARMDisassemblerCore.cpp because it contains the static DisassembleThumbFrm()
+// function which acts as the dispatcher to disassemble a Thumb instruction.
+//
+//===----------------------------------------------------------------------===//
+
+///////////////////////////////
+//                           //
+//     Utility Functions     //
+//                           //
+///////////////////////////////
+
+// Utilities for 16-bit Thumb instructions.
+/*
+15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
+               [  tRt ]
+                      [ tRm ]  [ tRn ]  [ tRd ]
+                         D  [   Rm   ]  [  Rd ]
+
+                      [ imm3]
+               [    imm5    ]
+                   i     [    imm5   ]
+                            [       imm7      ]
+                         [       imm8         ]
+               [             imm11            ]
+
+            [   cond  ]
+*/
+
+// Extract tRt: Inst{10-8}.
+static inline unsigned getT1tRt(uint32_t insn) {
+  return slice(insn, 10, 8);
+}
+
+// Extract tRm: Inst{8-6}.
+static inline unsigned getT1tRm(uint32_t insn) {
+  return slice(insn, 8, 6);
+}
+
+// Extract tRn: Inst{5-3}.
+static inline unsigned getT1tRn(uint32_t insn) {
+  return slice(insn, 5, 3);
+}
+
+// Extract tRd: Inst{2-0}.
+static inline unsigned getT1tRd(uint32_t insn) {
+  return slice(insn, 2, 0);
+}
+
+// Extract [D:Rd]: Inst{7:2-0}.
+static inline unsigned getT1Rd(uint32_t insn) {
+  return slice(insn, 7, 7) << 3 | slice(insn, 2, 0);
+}
+
+// Extract Rm: Inst{6-3}.
+static inline unsigned getT1Rm(uint32_t insn) {
+  return slice(insn, 6, 3);
+}
+
+// Extract imm3: Inst{8-6}.
+static inline unsigned getT1Imm3(uint32_t insn) {
+  return slice(insn, 8, 6);
+}
+
+// Extract imm5: Inst{10-6}.
+static inline unsigned getT1Imm5(uint32_t insn) {
+  return slice(insn, 10, 6);
+}
+
+// Extract i:imm5: Inst{9:7-3}.
+static inline unsigned getT1Imm6(uint32_t insn) {
+  return slice(insn, 9, 9) << 5 | slice(insn, 7, 3);
+}
+
+// Extract imm7: Inst{6-0}.
+static inline unsigned getT1Imm7(uint32_t insn) {
+  return slice(insn, 6, 0);
+}
+
+// Extract imm8: Inst{7-0}.
+static inline unsigned getT1Imm8(uint32_t insn) {
+  return slice(insn, 7, 0);
+}
+
+// Extract imm11: Inst{10-0}.
+static inline unsigned getT1Imm11(uint32_t insn) {
+  return slice(insn, 10, 0);
+}
+
+// Extract cond: Inst{11-8}.
+static inline unsigned getT1Cond(uint32_t insn) {
+  return slice(insn, 11, 8);
+}
+
+static inline bool IsGPR(unsigned RegClass) {
+  return RegClass == ARM::GPRRegClassID || RegClass == ARM::rGPRRegClassID;
+}
+
+// Utilities for 32-bit Thumb instructions.
+
+// Extract imm4: Inst{19-16}.
+static inline unsigned getImm4(uint32_t insn) {
+  return slice(insn, 19, 16);
+}
+
+// Extract imm3: Inst{14-12}.
+static inline unsigned getImm3(uint32_t insn) {
+  return slice(insn, 14, 12);
+}
+
+// Extract imm8: Inst{7-0}.
+static inline unsigned getImm8(uint32_t insn) {
+  return slice(insn, 7, 0);
+}
+
+// A8.6.61 LDRB (immediate, Thumb) and friends
+// +/-: Inst{9}
+// imm8: Inst{7-0}
+static inline int decodeImm8(uint32_t insn) {
+  int Offset = getImm8(insn);
+  return slice(insn, 9, 9) ? Offset : -Offset;
+}
+
+// Extract imm12: Inst{11-0}.
+static inline unsigned getImm12(uint32_t insn) {
+  return slice(insn, 11, 0);
+}
+
+// A8.6.63 LDRB (literal) and friends
+// +/-: Inst{23}
+// imm12: Inst{11-0}
+static inline int decodeImm12(uint32_t insn) {
+  int Offset = getImm12(insn);
+  return slice(insn, 23, 23) ? Offset : -Offset;
+}
+
+// Extract imm2: Inst{7-6}.
+static inline unsigned getImm2(uint32_t insn) {
+  return slice(insn, 7, 6);
+}
+
+// For BFI, BFC, t2SBFX, and t2UBFX.
+// Extract lsb: Inst{14-12:7-6}.
+static inline unsigned getLsb(uint32_t insn) {
+  return getImm3(insn) << 2 | getImm2(insn);
+}
+
+// For BFI and BFC.
+// Extract msb: Inst{4-0}.
+static inline unsigned getMsb(uint32_t insn) {
+  return slice(insn, 4, 0);
+}
+
+// For t2SBFX and t2UBFX.
+// Extract widthminus1: Inst{4-0}.
+static inline unsigned getWidthMinus1(uint32_t insn) {
+  return slice(insn, 4, 0);
+}
+
+// For t2ADDri12 and t2SUBri12.
+// imm12 = i:imm3:imm8;
+static inline unsigned getIImm3Imm8(uint32_t insn) {
+  return slice(insn, 26, 26) << 11 | getImm3(insn) << 8 | getImm8(insn);
+}
+
+// For t2MOVi16 and t2MOVTi16.
+// imm16 = imm4:i:imm3:imm8;
+static inline unsigned getImm16(uint32_t insn) {
+  return getImm4(insn) << 12 | slice(insn, 26, 26) << 11 |
+    getImm3(insn) << 8 | getImm8(insn);
+}
+
+// Inst{5-4} encodes the shift type.
+static inline unsigned getShiftTypeBits(uint32_t insn) {
+  return slice(insn, 5, 4);
+}
+
+// Inst{14-12}:Inst{7-6} encodes the imm5 shift amount.
+static inline unsigned getShiftAmtBits(uint32_t insn) {
+  return getImm3(insn) << 2 | getImm2(insn);
+}
+
+// A8.6.17 BFC
+// Encoding T1 ARMv6T2, ARMv7
+// LLVM-specific encoding for #<lsb> and #<width>
+static inline bool getBitfieldInvMask(uint32_t insn, uint32_t &mask) {
+  uint32_t lsb = getImm3(insn) << 2 | getImm2(insn);
+  uint32_t msb = getMsb(insn);
+  uint32_t Val = 0;
+  if (msb < lsb) {
+    DEBUG(errs() << "Encoding error: msb < lsb\n");
+    return false;
+  }
+  for (uint32_t i = lsb; i <= msb; ++i)
+    Val |= (1 << i);
+  mask = ~Val;
+  return true;
+}
+
+// A8.4 Shifts applied to a register
+// A8.4.1 Constant shifts
+// A8.4.3 Pseudocode details of instruction-specified shifts and rotates
+//
+// decodeImmShift() returns the shift amount and the the shift opcode.
+// Note that, as of Jan-06-2010, LLVM does not support rrx shifted operands yet.
+static inline unsigned decodeImmShift(unsigned bits2, unsigned imm5,
+                                      ARM_AM::ShiftOpc &ShOp) {
+
+  assert(imm5 < 32 && "Invalid imm5 argument");
+  switch (bits2) {
+  default: assert(0 && "No such value");
+  case 0:
+    ShOp = (imm5 == 0 ? ARM_AM::no_shift : ARM_AM::lsl);
+    return imm5;
+  case 1:
+    ShOp = ARM_AM::lsr;
+    return (imm5 == 0 ? 32 : imm5);
+  case 2:
+    ShOp = ARM_AM::asr;
+    return (imm5 == 0 ? 32 : imm5);
+  case 3:
+    ShOp = (imm5 == 0 ? ARM_AM::rrx : ARM_AM::ror);
+    return (imm5 == 0 ? 1 : imm5);
+  }
+}
+
+// A6.3.2 Modified immediate constants in Thumb instructions
+//
+// ThumbExpandImm() returns the modified immediate constant given an imm12 for
+// Thumb data-processing instructions with modified immediate.
+// See also A6.3.1 Data-processing (modified immediate).
+static inline unsigned ThumbExpandImm(unsigned imm12) {
+  assert(imm12 <= 0xFFF && "Invalid imm12 argument");
+
+  // If the leading two bits is 0b00, the modified immediate constant is
+  // obtained by splatting the low 8 bits into the first byte, every other byte,
+  // or every byte of a 32-bit value.
+  //
+  // Otherwise, a rotate right of '1':imm12<6:0> by the amount imm12<11:7> is
+  // performed.
+
+  if (slice(imm12, 11, 10) == 0) {
+    unsigned short control = slice(imm12, 9, 8);
+    unsigned imm8 = slice(imm12, 7, 0);
+    switch (control) {
+    default:
+      assert(0 && "No such value");
+      return 0;
+    case 0:
+      return imm8;
+    case 1:
+      return imm8 << 16 | imm8;
+    case 2:
+      return imm8 << 24 | imm8 << 8;
+    case 3:
+      return imm8 << 24 | imm8 << 16 | imm8 << 8 | imm8;
+    }
+  } else {
+    // A rotate is required.
+    unsigned Val = 1 << 7 | slice(imm12, 6, 0);
+    unsigned Amt = slice(imm12, 11, 7);
+    return ARM_AM::rotr32(Val, Amt);
+  }
+}
+
+static inline int decodeImm32_B_EncodingT3(uint32_t insn) {
+  bool S = slice(insn, 26, 26);
+  bool J1 = slice(insn, 13, 13);
+  bool J2 = slice(insn, 11, 11);
+  unsigned Imm21 = slice(insn, 21, 16) << 12 | slice(insn, 10, 0) << 1;
+  if (S) Imm21 |= 1 << 20;
+  if (J2) Imm21 |= 1 << 19;
+  if (J1) Imm21 |= 1 << 18;
+
+  return SignExtend32<21>(Imm21);
+}
+
+static inline int decodeImm32_B_EncodingT4(uint32_t insn) {
+  unsigned S = slice(insn, 26, 26);
+  bool I1 = slice(insn, 13, 13) == S;
+  bool I2 = slice(insn, 11, 11) == S;
+  unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1;
+  if (S) Imm25 |= 1 << 24;
+  if (I1) Imm25 |= 1 << 23;
+  if (I2) Imm25 |= 1 << 22;
+
+  return SignExtend32<25>(Imm25);
+}
+
+static inline int decodeImm32_BL(uint32_t insn) {
+  unsigned S = slice(insn, 26, 26);
+  bool I1 = slice(insn, 13, 13) == S;
+  bool I2 = slice(insn, 11, 11) == S;
+  unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 0) << 1;
+  if (S) Imm25 |= 1 << 24;
+  if (I1) Imm25 |= 1 << 23;
+  if (I2) Imm25 |= 1 << 22;
+
+  return SignExtend32<25>(Imm25);
+}
+
+static inline int decodeImm32_BLX(uint32_t insn) {
+  unsigned S = slice(insn, 26, 26);
+  bool I1 = slice(insn, 13, 13) == S;
+  bool I2 = slice(insn, 11, 11) == S;
+  unsigned Imm25 = slice(insn, 25, 16) << 12 | slice(insn, 10, 1) << 2;
+  if (S) Imm25 |= 1 << 24;
+  if (I1) Imm25 |= 1 << 23;
+  if (I2) Imm25 |= 1 << 22;
+
+  return SignExtend32<25>(Imm25);
+}
+
+// See, for example, A8.6.221 SXTAB16.
+static inline unsigned decodeRotate(uint32_t insn) {
+  unsigned rotate = slice(insn, 5, 4);
+  return rotate << 3;
+}
+
+///////////////////////////////////////////////
+//                                           //
+// Thumb1 instruction disassembly functions. //
+//                                           //
+///////////////////////////////////////////////
+
+// See "Utilities for 16-bit Thumb instructions" for register naming convention.
+
+// A6.2.1 Shift (immediate), add, subtract, move, and compare
+//
+// shift immediate:         tRd CPSR tRn imm5
+// add/sub register:        tRd CPSR tRn tRm
+// add/sub 3-bit immediate: tRd CPSR tRn imm3
+// add/sub 8-bit immediate: tRt CPSR tRt(TIED_TO) imm8
+// mov/cmp immediate:       tRt [CPSR] imm8 (CPSR present for mov)
+//
+// Special case:
+// tMOVSr:                  tRd tRn
+static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID
+         && "Invalid arguments");
+
+  bool Imm3 = (Opcode == ARM::tADDi3 || Opcode == ARM::tSUBi3);
+
+  // Use Rt implies use imm8.
+  bool UseRt = (Opcode == ARM::tADDi8 || Opcode == ARM::tSUBi8 ||
+                Opcode == ARM::tMOVi8 || Opcode == ARM::tCMPi8);
+
+  // Add the destination operand.
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, ARM::tGPRRegClassID,
+                                  UseRt ? getT1tRt(insn) : getT1tRd(insn))));
+  ++OpIdx;
+
+  // Check whether the next operand to be added is a CCR Register.
+  if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
+    assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
+    MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR));
+    ++OpIdx;
+  }
+
+  // Check whether the next operand to be added is a Thumb1 Register.
+  assert(OpIdx < NumOps && "More operands expected");
+  if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
+    // For UseRt, the reg operand is tied to the first reg operand.
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(B, ARM::tGPRRegClassID,
+                                    UseRt ? getT1tRt(insn) : getT1tRn(insn))));
+    ++OpIdx;
+  }
+
+  // Special case for tMOVSr.
+  if (OpIdx == NumOps)
+    return true;
+
+  // The next available operand is either a reg operand or an imm operand.
+  if (OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
+    // Three register operand instructions.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                       getT1tRm(insn))));
+  } else {
+    assert(OpInfo[OpIdx].RegClass < 0 &&
+           !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
+           && "Pure imm operand expected");
+    MI.addOperand(MCOperand::CreateImm(UseRt ? getT1Imm8(insn)
+                                             : (Imm3 ? getT1Imm3(insn)
+                                                     : getT1Imm5(insn))));
+  }
+  ++OpIdx;
+
+  return true;
+}
+
+// A6.2.2 Data-processing
+//
+// tCMPr, tTST, tCMN: tRd tRn
+// tMVN, tRSB:        tRd CPSR tRn
+// Others:            tRd CPSR tRd(TIED_TO) tRn
+static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         (OpInfo[1].RegClass == ARM::CCRRegClassID
+          || OpInfo[1].RegClass == ARM::tGPRRegClassID)
+         && "Invalid arguments");
+
+  // Add the destination operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                     getT1tRd(insn))));
+  ++OpIdx;
+
+  // Check whether the next operand to be added is a CCR Register.
+  if (OpInfo[OpIdx].RegClass == ARM::CCRRegClassID) {
+    assert(OpInfo[OpIdx].isOptionalDef() && "Optional def operand expected");
+    MI.addOperand(MCOperand::CreateReg(B->InITBlock() ? 0 : ARM::CPSR));
+    ++OpIdx;
+  }
+
+  // We have either { tRd(TIED_TO), tRn } or { tRn } remaining.
+  // Process the TIED_TO operand first.
+
+  assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
+         && "Thumb reg operand expected");
+  int Idx;
+  if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+    // The reg operand is tied to the first reg operand.
+    MI.addOperand(MI.getOperand(Idx));
+    ++OpIdx;
+  }
+
+  // Process possible next reg operand.
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID) {
+    // Add tRn operand.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                       getT1tRn(insn))));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// A6.2.3 Special data instructions and branch and exchange
+//
+// tADDhirr: Rd Rd(TIED_TO) Rm
+// tCMPhir:  Rd Rm
+// tMOVr, tMOVgpr2gpr, tMOVgpr2tgpr, tMOVtgpr2gpr: Rd|tRd Rm|tRn
+// tBX_RET: 0 operand
+// tBX_RET_vararg: Rm
+// tBLXr_r9: Rm
+static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  // tBX_RET has 0 operand.
+  if (NumOps == 0)
+    return true;
+
+  // BX/BLX has 1 reg operand: Rm.
+  if (NumOps == 1) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       getT1Rm(insn))));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  // Add the destination operand.
+  unsigned RegClass = OpInfo[OpIdx].RegClass;
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, RegClass,
+                                  IsGPR(RegClass) ? getT1Rd(insn)
+                                                  : getT1tRd(insn))));
+  ++OpIdx;
+
+  // We have either { Rd(TIED_TO), Rm } or { Rm|tRn } remaining.
+  // Process the TIED_TO operand first.
+
+  assert(OpIdx < NumOps && "More operands expected");
+  int Idx;
+  if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+    // The reg operand is tied to the first reg operand.
+    MI.addOperand(MI.getOperand(Idx));
+    ++OpIdx;
+  }
+
+  // The next reg operand is either Rm or tRn.
+  assert(OpIdx < NumOps && "More operands expected");
+  RegClass = OpInfo[OpIdx].RegClass;
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, RegClass,
+                                  IsGPR(RegClass) ? getT1Rm(insn)
+                                                  : getT1tRn(insn))));
+  ++OpIdx;
+
+  return true;
+}
+
+// A8.6.59 LDR (literal)
+//
+// tLDRpci: tRt imm8*4
+static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         (OpInfo[1].RegClass < 0 &&
+          !OpInfo[1].isPredicate() &&
+          !OpInfo[1].isOptionalDef())
+         && "Invalid arguments");
+
+  // Add the destination operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                     getT1tRt(insn))));
+
+  // And the (imm8 << 2) operand.
+  MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn) << 2));
+
+  NumOpsAdded = 2;
+
+  return true;
+}
+
+// Thumb specific addressing modes (see ARMInstrThumb.td):
+//
+// t_addrmode_rr := reg + reg
+//
+// t_addrmode_s4 := reg + reg
+//                  reg + imm5 * 4
+//
+// t_addrmode_s2 := reg + reg
+//                  reg + imm5 * 2
+//
+// t_addrmode_s1 := reg + reg
+//                  reg + imm5
+//
+// t_addrmode_sp := sp + imm8 * 4
+//
+
+// A8.6.63 LDRB (literal)
+// A8.6.79 LDRSB (literal)
+// A8.6.75 LDRH (literal)
+// A8.6.83 LDRSH (literal)
+// A8.6.59 LDR (literal)
+//
+// These instrs calculate an address from the PC value and an immediate offset.
+// Rd Rn=PC (+/-)imm12 (+ if Inst{23} == 0b1)
+static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps >= 2 &&
+         OpInfo[0].RegClass == ARM::GPRRegClassID &&
+         OpInfo[1].RegClass < 0 &&
+         "Expect >= 2 operands, first as reg, and second as imm operand");
+
+  // Build the register operand, followed by the (+/-)imm12 immediate.
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+
+  MI.addOperand(MCOperand::CreateImm(decodeImm12(insn)));
+
+  NumOpsAdded = 2;
+
+  return true;
+}
+
+
+// A6.2.4 Load/store single data item
+//
+// Load/Store Register (reg|imm):      tRd tRn imm5 tRm
+// Load Register Signed Byte|Halfword: tRd tRn tRm
+static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  // Table A6-5 16-bit Thumb Load/store instructions
+  // opA = 0b0101 for STR/LDR (register) and friends.
+  // Otherwise, we have STR/LDR (immediate) and friends.
+  bool Imm5 = (opA != 5);
+
+  assert(NumOps >= 2
+         && OpInfo[0].RegClass == ARM::tGPRRegClassID
+         && OpInfo[1].RegClass == ARM::tGPRRegClassID
+         && "Expect >= 2 operands and first two as thumb reg operands");
+
+  // Add the destination reg and the base reg.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                     getT1tRd(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                     getT1tRn(insn))));
+  OpIdx = 2;
+
+  // We have either { imm5, tRm } or { tRm } remaining.
+  // Process the imm5 first.  Note that STR/LDR (register) should skip the imm5
+  // offset operand for t_addrmode_s[1|2|4].
+
+  assert(OpIdx < NumOps && "More operands expected");
+
+  if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() &&
+      !OpInfo[OpIdx].isOptionalDef()) {
+
+    MI.addOperand(MCOperand::CreateImm(Imm5 ? getT1Imm5(insn) : 0));
+    ++OpIdx;
+  }
+
+  // The next reg operand is tRm, the offset.
+  assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
+         && "Thumb reg operand expected");
+  MI.addOperand(MCOperand::CreateReg(
+                  Imm5 ? 0
+                       : getRegisterEnum(B, ARM::tGPRRegClassID,
+                                         getT1tRm(insn))));
+  ++OpIdx;
+
+  return true;
+}
+
+// A6.2.4 Load/store single data item
+//
+// Load/Store Register SP relative: tRt ARM::SP imm8
+static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
+         && "Unexpected opcode");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         OpInfo[1].RegClass == ARM::GPRRegClassID &&
+         (OpInfo[2].RegClass < 0 &&
+          !OpInfo[2].isPredicate() &&
+          !OpInfo[2].isOptionalDef())
+         && "Invalid arguments");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                     getT1tRt(insn))));
+  MI.addOperand(MCOperand::CreateReg(ARM::SP));
+  MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
+  NumOpsAdded = 3;
+  return true;
+}
+
+// Table A6-1 16-bit Thumb instruction encoding
+// A8.6.10 ADR
+//
+// tADDrPCi: tRt imm8
+static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(Opcode == ARM::tADDrPCi && "Unexpected opcode");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         (OpInfo[1].RegClass < 0 &&
+          !OpInfo[1].isPredicate() &&
+          !OpInfo[1].isOptionalDef())
+         && "Invalid arguments");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                     getT1tRt(insn))));
+  MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
+  NumOpsAdded = 2;
+  return true;
+}
+
+// Table A6-1 16-bit Thumb instruction encoding
+// A8.6.8 ADD (SP plus immediate)
+//
+// tADDrSPi: tRt ARM::SP imm8
+static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(Opcode == ARM::tADDrSPi && "Unexpected opcode");
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         OpInfo[1].RegClass == ARM::GPRRegClassID &&
+         (OpInfo[2].RegClass < 0 &&
+          !OpInfo[2].isPredicate() &&
+          !OpInfo[2].isOptionalDef())
+         && "Invalid arguments");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                     getT1tRt(insn))));
+  MI.addOperand(MCOperand::CreateReg(ARM::SP));
+  MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn)));
+  NumOpsAdded = 3;
+  return true;
+}
+
+// tPUSH, tPOP: Pred-Imm Pred-CCR register_list
+//
+// where register_list = low registers + [lr] for PUSH or
+//                       low registers + [pc] for POP
+//
+// "low registers" is specified by Inst{7-0}
+// lr|pc is specified by Inst{8}
+static bool DisassembleThumb1PushPop(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert((Opcode == ARM::tPUSH || Opcode == ARM::tPOP) && "Unexpected opcode");
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  // Handling the two predicate operands before the reglist.
+  if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+    OpIdx += 2;
+  else {
+    DEBUG(errs() << "Expected predicate operands not found.\n");
+    return false;
+  }
+
+  unsigned RegListBits = slice(insn, 8, 8) << (Opcode == ARM::tPUSH ? 14 : 15)
+    | slice(insn, 7, 0);
+
+  // Fill the variadic part of reglist.
+  for (unsigned i = 0; i < 16; ++i) {
+    if ((RegListBits >> i) & 1) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                         i)));
+      ++OpIdx;
+    }
+  }
+
+  return true;
+}
+
+// A6.2.5 Miscellaneous 16-bit instructions
+// Delegate to DisassembleThumb1PushPop() for tPUSH & tPOP.
+//
+// tADDspi, tSUBspi: ARM::SP ARM::SP(TIED_TO) imm7
+// t2IT:             firstcond=Inst{7-4} mask=Inst{3-0}
+// tCBNZ, tCBZ:      tRd imm6*2
+// tBKPT:            imm8
+// tNOP, tSEV, tYIELD, tWFE, tWFI:
+//   no operand (except predicate pair)
+// tSETENDBE, tSETENDLE, :
+//   no operand
+// Others:           tRd tRn
+static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  if (NumOps == 0)
+    return true;
+
+  if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP)
+    return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  // Predicate operands are handled elsewhere.
+  if (NumOps == 2 &&
+      OpInfo[0].isPredicate() && OpInfo[1].isPredicate() &&
+      OpInfo[0].RegClass < 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) {
+    return true;
+  }
+
+  if (Opcode == ARM::tADDspi || Opcode == ARM::tSUBspi) {
+    // Special case handling for tADDspi and tSUBspi.
+    // A8.6.8 ADD (SP plus immediate) & A8.6.215 SUB (SP minus immediate)
+    MI.addOperand(MCOperand::CreateReg(ARM::SP));
+    MI.addOperand(MCOperand::CreateReg(ARM::SP));
+    MI.addOperand(MCOperand::CreateImm(getT1Imm7(insn)));
+    NumOpsAdded = 3;
+    return true;
+  }
+
+  if (Opcode == ARM::t2IT) {
+    // Special case handling for If-Then.
+    // A8.6.50 IT
+    // Tag the (firstcond[0] bit << 4) along with mask.
+
+    // firstcond
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 4)));
+
+    // firstcond[0] and mask
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
+    NumOpsAdded = 2;
+    return true;
+  }
+
+  if (Opcode == ARM::tBKPT) {
+    MI.addOperand(MCOperand::CreateImm(getT1Imm8(insn))); // breakpoint value
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // CPS has a singleton $opt operand that contains the following information:
+  // The first op would be 0b10 as enable and 0b11 as disable in regular ARM,
+  // but in Thumb it's is 0 as enable and 1 as disable. So map it to ARM's
+  // default one. The second get the AIF flags from Inst{2-0}.
+  if (Opcode == ARM::tCPS) {
+    MI.addOperand(MCOperand::CreateImm(2 + slice(insn, 4, 4)));
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 2, 0)));
+    NumOpsAdded = 2;
+    return true;
+  }
+
+  assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
+         (OpInfo[1].RegClass < 0 || OpInfo[1].RegClass==ARM::tGPRRegClassID)
+         && "Expect >=2 operands");
+
+  // Add the destination operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                     getT1tRd(insn))));
+
+  if (OpInfo[1].RegClass == ARM::tGPRRegClassID) {
+    // Two register instructions.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                       getT1tRn(insn))));
+  } else {
+    // CBNZ, CBZ
+    assert((Opcode == ARM::tCBNZ || Opcode == ARM::tCBZ) &&"Unexpected opcode");
+    MI.addOperand(MCOperand::CreateImm(getT1Imm6(insn) * 2));
+  }
+
+  NumOpsAdded = 2;
+
+  return true;
+}
+
+// A8.6.53  LDM / LDMIA
+// A8.6.189 STM / STMIA
+//
+// tLDMIA_UPD/tSTMIA_UPD: tRt tRt AM4ModeImm Pred-Imm Pred-CCR register_list
+// tLDMIA:                tRt AM4ModeImm Pred-Imm Pred-CCR register_list
+static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode,
+                                     uint32_t insn, unsigned short NumOps,
+                                     unsigned &NumOpsAdded, BO B) {
+  assert((Opcode == ARM::tLDMIA || Opcode == ARM::tLDMIA_UPD ||
+          Opcode == ARM::tSTMIA_UPD) && "Unexpected opcode");
+
+  unsigned tRt = getT1tRt(insn);
+  NumOpsAdded = 0;
+
+  // WB register, if necessary.
+  if (Opcode == ARM::tLDMIA_UPD || Opcode == ARM::tSTMIA_UPD) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       tRt)));
+    ++NumOpsAdded;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     tRt)));
+  ++NumOpsAdded;
+
+  // Handling the two predicate operands before the reglist.
+  if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) {
+    NumOpsAdded += 2;
+  } else {
+    DEBUG(errs() << "Expected predicate operands not found.\n");
+    return false;
+  }
+
+  unsigned RegListBits = slice(insn, 7, 0);
+
+  // Fill the variadic part of reglist.
+  for (unsigned i = 0; i < 8; ++i)
+    if ((RegListBits >> i) & 1) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
+                                                         i)));
+      ++NumOpsAdded;
+    }
+
+  return true;
+}
+
+static bool DisassembleThumb1LdMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleThumb1LdStMul(true, MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  B);
+}
+
+static bool DisassembleThumb1StMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  return DisassembleThumb1LdStMul(false, MI, Opcode, insn, NumOps, NumOpsAdded,
+                                  B);
+}
+
+// A8.6.16 B Encoding T1
+// cond = Inst{11-8} & imm8 = Inst{7-0}
+// imm32 = SignExtend(imm8:'0', 32)
+//
+// tBcc: offset Pred-Imm Pred-CCR
+// tSVC: imm8 Pred-Imm Pred-CCR
+// tTRAP: 0 operand (early return)
+static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  if (Opcode == ARM::tTRAP)
+    return true;
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps == 3 && OpInfo[0].RegClass < 0 &&
+         OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID
+         && "Exactly 3 operands expected");
+
+  unsigned Imm8 = getT1Imm8(insn);
+  MI.addOperand(MCOperand::CreateImm(
+                  Opcode == ARM::tBcc ? SignExtend32<9>(Imm8 << 1) + 4
+                                      : (int)Imm8));
+
+  // Predicate operands by ARMBasicMCBuilder::TryPredicateAndSBitModifier().
+  NumOpsAdded = 1;
+
+  return true;
+}
+
+// A8.6.16 B Encoding T2
+// imm11 = Inst{10-0}
+// imm32 = SignExtend(imm11:'0', 32)
+//
+// tB: offset
+static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected");
+
+  unsigned Imm11 = getT1Imm11(insn);
+
+  // When executing a Thumb instruction, PC reads as the address of the current
+  // instruction plus 4.  The assembler subtracts 4 from the difference between
+  // the branch instruction and the target address, disassembler has to add 4 to
+  // to compensate.
+  MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1) + 4));
+
+  NumOpsAdded = 1;
+
+  return true;
+
+}
+
+// See A6.2 16-bit Thumb instruction encoding for instruction classes
+// corresponding to op.
+//
+// Table A6-1 16-bit Thumb instruction encoding (abridged)
+// op    Instruction or instruction class
+// ------  --------------------------------------------------------------------
+// 00xxxx  Shift (immediate), add, subtract, move, and compare on page A6-7
+// 010000  Data-processing on page A6-8
+// 010001  Special data instructions and branch and exchange on page A6-9
+// 01001x  Load from Literal Pool, see LDR (literal) on page A8-122
+// 0101xx  Load/store single data item on page A6-10
+// 011xxx
+// 100xxx
+// 10100x  Generate PC-relative address, see ADR on page A8-32
+// 10101x  Generate SP-relative address, see ADD (SP plus immediate) on
+//         page A8-28
+// 1011xx  Miscellaneous 16-bit instructions on page A6-11
+// 11000x  Store multiple registers, see STM / STMIA / STMEA on page A8-374
+// 11001x  Load multiple registers, see LDM / LDMIA / LDMFD on page A8-110 a
+// 1101xx  Conditional branch, and Supervisor Call on page A6-13
+// 11100x  Unconditional Branch, see B on page A8-44
+//
+static bool DisassembleThumb1(uint16_t op, MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  unsigned op1 = slice(op, 5, 4);
+  unsigned op2 = slice(op, 3, 2);
+  unsigned op3 = slice(op, 1, 0);
+  unsigned opA = slice(op, 5, 2);
+  switch (op1) {
+  case 0:
+    // A6.2.1 Shift (immediate), add, subtract, move, and compare
+    return DisassembleThumb1General(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+  case 1:
+    switch (op2) {
+    case 0:
+      switch (op3) {
+      case 0:
+        // A6.2.2 Data-processing
+        return DisassembleThumb1DP(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+      case 1:
+        // A6.2.3 Special data instructions and branch and exchange
+        return DisassembleThumb1Special(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                        B);
+      default:
+        // A8.6.59 LDR (literal)
+        return DisassembleThumb1LdPC(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+      }
+      break;
+    default:
+      // A6.2.4 Load/store single data item
+      return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded,
+                                   B);
+      break;
+    }
+    break;
+  case 2:
+    switch (op2) {
+    case 0:
+      // A6.2.4 Load/store single data item
+      return DisassembleThumb1LdSt(opA, MI, Opcode, insn, NumOps, NumOpsAdded,
+                                   B);
+    case 1:
+      // A6.2.4 Load/store single data item
+      return DisassembleThumb1LdStSP(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+    case 2:
+      if (op3 <= 1) {
+        // A8.6.10 ADR
+        return DisassembleThumb1AddPCi(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
+      } else {
+        // A8.6.8 ADD (SP plus immediate)
+        return DisassembleThumb1AddSPi(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
+      }
+    default:
+      // A6.2.5 Miscellaneous 16-bit instructions
+      return DisassembleThumb1Misc(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+    }
+    break;
+  case 3:
+    switch (op2) {
+    case 0:
+      if (op3 <= 1) {
+        // A8.6.189 STM / STMIA / STMEA
+        return DisassembleThumb1StMul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+      } else {
+        // A8.6.53 LDM / LDMIA / LDMFD
+        return DisassembleThumb1LdMul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+      }
+    case 1:
+      // A6.2.6 Conditional branch, and Supervisor Call
+      return DisassembleThumb1CondBr(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+    case 2:
+      // Unconditional Branch, see B on page A8-44
+      return DisassembleThumb1Br(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+    default:
+      assert(0 && "Unreachable code");
+      break;
+    }
+    break;
+  default:
+    assert(0 && "Unreachable code");
+    break;
+  }
+
+  return false;
+}
+
+///////////////////////////////////////////////
+//                                           //
+// Thumb2 instruction disassembly functions. //
+//                                           //
+///////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////
+//                                                       //
+// Note: the register naming follows the ARM convention! //
+//                                                       //
+///////////////////////////////////////////////////////////
+
+static inline bool Thumb2SRSOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::t2SRSDBW: case ARM::t2SRSDB:
+  case ARM::t2SRSIAW: case ARM::t2SRSIA:
+    return true;
+  }
+}
+
+static inline bool Thumb2RFEOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::t2RFEDBW: case ARM::t2RFEDB:
+  case ARM::t2RFEIAW: case ARM::t2RFEIA:
+    return true;
+  }
+}
+
+// t2SRS[IA|DB]W/t2SRS[IA|DB]: mode_imm = Inst{4-0}
+static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded) {
+  MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));
+  NumOpsAdded = 1;
+  return true;
+}
+
+// t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn
+static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  NumOpsAdded = 1;
+  return true;
+}
+
+static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  if (Thumb2SRSOpcode(Opcode))
+    return DisassembleThumb2SRS(MI, Opcode, insn, NumOps, NumOpsAdded);
+
+  if (Thumb2RFEOpcode(Opcode))
+    return DisassembleThumb2RFE(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+  assert((Opcode == ARM::t2LDMIA || Opcode == ARM::t2LDMIA_UPD ||
+          Opcode == ARM::t2LDMDB || Opcode == ARM::t2LDMDB_UPD ||
+          Opcode == ARM::t2STMIA || Opcode == ARM::t2STMIA_UPD ||
+          Opcode == ARM::t2STMDB || Opcode == ARM::t2STMDB_UPD)
+         && "Unexpected opcode");
+  assert(NumOps >= 5 && "Thumb2 LdStMul expects NumOps >= 5");
+
+  NumOpsAdded = 0;
+
+  unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn));
+
+  // Writeback to base.
+  if (Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD ||
+      Opcode == ARM::t2STMIA_UPD || Opcode == ARM::t2STMDB_UPD) {
+    MI.addOperand(MCOperand::CreateReg(Base));
+    ++NumOpsAdded;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(Base));
+  ++NumOpsAdded;
+
+  // Handling the two predicate operands before the reglist.
+  if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) {
+    NumOpsAdded += 2;
+  } else {
+    DEBUG(errs() << "Expected predicate operands not found.\n");
+    return false;
+  }
+
+  unsigned RegListBits = insn & ((1 << 16) - 1);
+
+  // Fill the variadic part of reglist.
+  for (unsigned i = 0; i < 16; ++i)
+    if ((RegListBits >> i) & 1) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                         i)));
+      ++NumOpsAdded;
+    }
+
+  return true;
+}
+
+// t2LDREX: Rd Rn
+// t2LDREXD: Rd Rs Rn
+// t2LDREXB, t2LDREXH: Rd Rn
+// t2STREX: Rs Rd Rn
+// t2STREXD: Rm Rd Rs Rn
+// t2STREXB, t2STREXH: Rm Rd Rn
+static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && "Expect >=2 operands and first two as reg operands");
+
+  bool isStore = (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH);
+  bool isSW = (Opcode == ARM::t2LDREX || Opcode == ARM::t2STREX);
+  bool isDW = (Opcode == ARM::t2LDREXD || Opcode == ARM::t2STREXD);
+
+  // Add the destination operand for store.
+  if (isStore) {
+    MI.addOperand(MCOperand::CreateReg(
+                    getRegisterEnum(B, ARM::GPRRegClassID,
+                                    isSW ? decodeRs(insn) : decodeRm(insn))));
+    ++OpIdx;
+  }
+
+  // Source operand for store and destination operand for load.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  ++OpIdx;
+
+  // Thumb2 doubleword complication: with an extra source/destination operand.
+  if (isDW) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRs(insn))));
+    ++OpIdx;
+  }
+
+  // Finally add the pointer operand.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  ++OpIdx;
+
+  return true;
+}
+
+// LLVM, as of Jan-05-2010, does not output <Rt2>, i.e., Rs, in the asm.
+// Whereas the ARM Arch. Manual does not require that t2 = t+1 like in ARM ISA.
+//
+// t2LDRDi8: Rd Rs Rn imm8s4 (offset mode)
+// t2LDRDpci: Rd Rs imm8s4 (Not decoded, prefer the generic t2LDRDi8 version)
+// t2STRDi8: Rd Rs Rn imm8s4 (offset mode)
+//
+// Ditto for t2LDRD_PRE, t2LDRD_POST, t2STRD_PRE, t2STRD_POST, which are for
+// disassembly only and do not have a tied_to writeback base register operand.
+static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  if (!OpInfo) return false;
+
+  assert(NumOps >= 4
+         && OpInfo[0].RegClass == ARM::GPRRegClassID
+         && OpInfo[1].RegClass == ARM::GPRRegClassID
+         && OpInfo[2].RegClass == ARM::GPRRegClassID
+         && OpInfo[3].RegClass < 0
+         && "Expect >= 4 operands and first 3 as reg operands");
+
+  // Add the <Rt> <Rt2> operands.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRd(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRs(insn))));
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+
+  // Finally add (+/-)imm8*4, depending on the U bit.
+  int Offset = getImm8(insn) * 4;
+  if (getUBit(insn) == 0)
+    Offset = -Offset;
+  MI.addOperand(MCOperand::CreateImm(Offset));
+  NumOpsAdded = 4;
+
+  return true;
+}
+
+// t2TBB, t2TBH: Rn Rm Pred-Imm Pred-CCR
+static bool DisassembleThumb2TB(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  assert(NumOps >= 2 && "Expect >= 2 operands");
+
+  // The generic version of TBB/TBH needs a base register.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  // Add the index register.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRm(insn))));
+  NumOpsAdded = 2;
+
+  return true;
+}
+
+static inline bool Thumb2ShiftOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::t2MOVCClsl: case ARM::t2MOVCClsr:
+  case ARM::t2MOVCCasr: case ARM::t2MOVCCror:
+  case ARM::t2LSLri:    case ARM::t2LSRri:
+  case ARM::t2ASRri:    case ARM::t2RORri:
+    return true;
+  }
+}
+
+// A6.3.11 Data-processing (shifted register)
+//
+// Two register operands (Rn=0b1111 no 1st operand reg): Rs Rm
+// Two register operands (Rs=0b1111 no dst operand reg): Rn Rm
+// Three register operands: Rs Rn Rm
+// Three register operands: (Rn=0b1111 Conditional Move) Rs Ro(TIED_TO) Rm
+//
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms: (Rm, ConstantShiftSpecifier).
+// Constant shift specifier: Imm = (ShOp | ShAmt<<3).
+//
+// There are special instructions, like t2MOVsra_flag and t2MOVsrl_flag, which
+// only require two register operands: Rd, Rm in ARM Reference Manual terms, and
+// nothing else, because the shift amount is already specified.
+// Similar case holds for t2MOVrx, t2ADDrr, ..., etc.
+static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  // Special case handling.
+  if (Opcode == ARM::t2BR_JT) {
+    assert(NumOps == 4
+           && OpInfo[0].RegClass == ARM::GPRRegClassID
+           && OpInfo[1].RegClass == ARM::GPRRegClassID
+           && OpInfo[2].RegClass < 0
+           && OpInfo[3].RegClass < 0
+           && "Exactly 4 operands expect and first two as reg operands");
+    // Only need to populate the src reg operand.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+    MI.addOperand(MCOperand::CreateReg(0));
+    MI.addOperand(MCOperand::CreateImm(0));
+    MI.addOperand(MCOperand::CreateImm(0));
+    NumOpsAdded = 4;
+    return true;
+  }
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2
+         && (OpInfo[0].RegClass == ARM::GPRRegClassID ||
+             OpInfo[0].RegClass == ARM::rGPRRegClassID)
+         && (OpInfo[1].RegClass == ARM::GPRRegClassID ||
+             OpInfo[1].RegClass == ARM::rGPRRegClassID)
+         && "Expect >= 2 operands and first two as reg operands");
+
+  bool ThreeReg = (NumOps > 2 && (OpInfo[2].RegClass == ARM::GPRRegClassID ||
+                                  OpInfo[2].RegClass == ARM::rGPRRegClassID));
+  bool NoDstReg = (decodeRs(insn) == 0xF);
+
+  // Build the register operands, followed by the constant shift specifier.
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, OpInfo[0].RegClass,
+                                  NoDstReg ? decodeRn(insn) : decodeRs(insn))));
+  ++OpIdx;
+
+  if (ThreeReg) {
+    int Idx;
+    if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+      // Process tied_to operand constraint.
+      MI.addOperand(MI.getOperand(Idx));
+      ++OpIdx;
+    } else if (!NoDstReg) {
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[1].RegClass,
+                                                         decodeRn(insn))));
+      ++OpIdx;
+    } else {
+      DEBUG(errs() << "Thumb2 encoding error: d==15 for three-reg operands.\n");
+      return false;
+    }
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass,
+                                                     decodeRm(insn))));
+  ++OpIdx;
+
+  if (NumOps == OpIdx)
+    return true;
+
+  if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
+      && !OpInfo[OpIdx].isOptionalDef()) {
+
+    if (Thumb2ShiftOpcode(Opcode))
+      MI.addOperand(MCOperand::CreateImm(getShiftAmtBits(insn)));
+    else {
+      // Build the constant shift specifier operand.
+      unsigned bits2 = getShiftTypeBits(insn);
+      unsigned imm5 = getShiftAmtBits(insn);
+      ARM_AM::ShiftOpc ShOp = ARM_AM::no_shift;
+      unsigned ShAmt = decodeImmShift(bits2, imm5, ShOp);
+      MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(ShOp, ShAmt)));
+    }
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// A6.3.1 Data-processing (modified immediate)
+//
+// Two register operands: Rs Rn ModImm
+// One register operands (Rs=0b1111 no explicit dest reg): Rn ModImm
+// One register operands (Rn=0b1111 no explicit src reg): Rs ModImm -
+// {t2MOVi, t2MVNi}
+//
+// ModImm = ThumbExpandImm(i:imm3:imm8)
+static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned RdRegClassID = OpInfo[0].RegClass;
+  assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
+                         RdRegClassID == ARM::rGPRRegClassID)
+         && "Expect >= 2 operands and first one as reg operand");
+
+  unsigned RnRegClassID = OpInfo[1].RegClass;
+  bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
+                 || RnRegClassID == ARM::rGPRRegClassID);
+  bool NoDstReg = (decodeRs(insn) == 0xF);
+
+  // Build the register operands, followed by the modified immediate.
+
+  MI.addOperand(MCOperand::CreateReg(
+                  getRegisterEnum(B, RdRegClassID,
+                                  NoDstReg ? decodeRn(insn) : decodeRs(insn))));
+  ++OpIdx;
+
+  if (TwoReg) {
+    if (NoDstReg) {
+      DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n");
+      return false;
+    }
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  // The modified immediate operand should come next.
+  assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
+         !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
+         && "Pure imm operand expected");
+
+  // i:imm3:imm8
+  // A6.3.2 Modified immediate constants in Thumb instructions
+  unsigned imm12 = getIImm3Imm8(insn);
+  MI.addOperand(MCOperand::CreateImm(ThumbExpandImm(imm12)));
+  ++OpIdx;
+
+  return true;
+}
+
+static inline bool Thumb2SaturateOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  case ARM::t2SSAT: case ARM::t2SSAT16:
+  case ARM::t2USAT: case ARM::t2USAT16:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// DisassembleThumb2Sat - Disassemble Thumb2 saturate instructions:
+/// o t2SSAT, t2USAT: Rs sat_pos Rn shamt
+/// o t2SSAT16, t2USAT16: Rs sat_pos Rn
+static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
+                                 unsigned &NumOpsAdded, BO B) {
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+
+  // Disassemble the register def.
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRs(insn))));
+
+  unsigned Pos = slice(insn, 4, 0);
+  if (Opcode == ARM::t2SSAT || Opcode == ARM::t2SSAT16)
+    Pos += 1;
+  MI.addOperand(MCOperand::CreateImm(Pos));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRn(insn))));
+
+  if (NumOpsAdded == 4) {
+    ARM_AM::ShiftOpc Opc = (slice(insn, 21, 21) != 0 ?
+                            ARM_AM::asr : ARM_AM::lsl);
+    // Inst{14-12:7-6} encodes the imm5 shift amount.
+    unsigned ShAmt = slice(insn, 14, 12) << 2 | slice(insn, 7, 6);
+    if (ShAmt == 0) {
+      if (Opc == ARM_AM::asr)
+        ShAmt = 32;
+      else
+        Opc = ARM_AM::no_shift;
+    }
+    MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShAmt)));
+  }
+  return true;
+}
+
+// A6.3.3 Data-processing (plain binary immediate)
+//
+// o t2ADDri12, t2SUBri12: Rs Rn imm12
+// o t2LEApcrel (ADR): Rs imm12
+// o t2BFC (BFC): Rs Ro(TIED_TO) bf_inv_mask_imm
+// o t2BFI (BFI) (Currently not defined in LLVM as of Jan-07-2010)
+// o t2MOVi16: Rs imm16
+// o t2MOVTi16: Rs imm16
+// o t2SBFX (SBFX): Rs Rn lsb width
+// o t2UBFX (UBFX): Rs Rn lsb width
+// o t2BFI (BFI): Rs Rn lsb width
+static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  unsigned RdRegClassID = OpInfo[0].RegClass;
+  assert(NumOps >= 2 && (RdRegClassID == ARM::GPRRegClassID ||
+                         RdRegClassID == ARM::rGPRRegClassID)
+         && "Expect >= 2 operands and first one as reg operand");
+
+  unsigned RnRegClassID = OpInfo[1].RegClass;
+  bool TwoReg = (RnRegClassID == ARM::GPRRegClassID
+                 || RnRegClassID == ARM::rGPRRegClassID);
+
+  // Build the register operand(s), followed by the immediate(s).
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RdRegClassID,
+                                                     decodeRs(insn))));
+  ++OpIdx;
+
+  if (TwoReg) {
+    assert(NumOps >= 3 && "Expect >= 3 operands");
+    int Idx;
+    if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+      // Process tied_to operand constraint.
+      MI.addOperand(MI.getOperand(Idx));
+    } else {
+      // Add src reg operand.
+      MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+                                                         decodeRn(insn))));
+    }
+    ++OpIdx;
+  }
+
+  if (Opcode == ARM::t2BFI) {
+    // Add val reg operand.
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
+         && !OpInfo[OpIdx].isOptionalDef()
+         && "Pure imm operand expected");
+
+  // Pre-increment OpIdx.
+  ++OpIdx;
+
+  if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12
+      || Opcode == ARM::t2LEApcrel)
+    MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
+  else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16)
+    MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
+  else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
+    uint32_t mask = 0;
+    if (getBitfieldInvMask(insn, mask))
+      MI.addOperand(MCOperand::CreateImm(mask));
+    else
+      return false;
+  } else {
+    // Handle the case of: lsb width
+    assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX)
+            && "Unexpected opcode");
+    MI.addOperand(MCOperand::CreateImm(getLsb(insn)));
+    MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
+
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// A6.3.4 Table A6-15 Miscellaneous control instructions
+// A8.6.41 DMB
+// A8.6.42 DSB
+// A8.6.49 ISB
+static inline bool t2MiscCtrlInstr(uint32_t insn) {
+  if (slice(insn, 31, 20) == 0xf3b && slice(insn, 15, 14) == 2 &&
+      slice(insn, 12, 12) == 0)
+    return true;
+
+  return false;
+}
+
+// A6.3.4 Branches and miscellaneous control
+//
+// A8.6.16 B
+// Branches: t2B, t2Bcc -> imm operand
+//
+// Branches: t2TPsoft -> no operand
+//
+// A8.6.23 BL, BLX (immediate)
+// Branches (defined in ARMInstrThumb.td): tBLr9, tBLXi_r9 -> imm operand
+//
+// A8.6.26
+// t2BXJ -> Rn
+//
+// Miscellaneous control: t2DMBsy (and its t2DMB variants),
+// t2DSBsy (and its t2DSB varianst), t2ISBsy, t2CLREX
+//   -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants)
+//
+// Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV
+//   -> no operand (except pred-imm pred-ccr)
+//
+// t2DBG -> imm4 = Inst{3-0}
+//
+// t2MRS/t2MRSsys -> Rs
+// t2MSR/t2MSRsys -> Rn mask=Inst{11-8}
+// t2SMC -> imm4 = Inst{19-16}
+static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  if (NumOps == 0)
+    return true;
+
+  if (t2MiscCtrlInstr(insn))
+    return true;
+
+  switch (Opcode) {
+  case ARM::t2CLREX:
+  case ARM::t2NOP:
+  case ARM::t2YIELD:
+  case ARM::t2WFE:
+  case ARM::t2WFI:
+  case ARM::t2SEV:
+    return true;
+  default:
+    break;
+  }
+
+  // FIXME: To enable correct asm parsing and disasm of CPS we need 3 different
+  // opcodes which match the same real instruction. This is needed since there's
+  // no current handling of optional arguments. Fix here when a better handling
+  // of optional arguments is implemented.
+  if (Opcode == ARM::t2CPS3p) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5)));  // iflags
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0)));  // mode
+    NumOpsAdded = 3;
+    return true;
+  }
+  if (Opcode == ARM::t2CPS2p) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 10, 9))); // imod
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 5)));  // iflags
+    NumOpsAdded = 2;
+    return true;
+  }
+  if (Opcode == ARM::t2CPS1p) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // DBG has its option specified in Inst{3-0}.
+  if (Opcode == ARM::t2DBG) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0)));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // MRS and MRSsys take one GPR reg Rs.
+  if (Opcode == ARM::t2MRS || Opcode == ARM::t2MRSsys) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRs(insn))));
+    NumOpsAdded = 1;
+    return true;
+  }
+  // BXJ takes one GPR reg Rn.
+  if (Opcode == ARM::t2BXJ) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    NumOpsAdded = 1;
+    return true;
+  }
+  // MSR take a mask, followed by one GPR reg Rn. The mask contains the R Bit in
+  // bit 4, and the special register fields in bits 3-0.
+  if (Opcode == ARM::t2MSR) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 20, 20) << 4 /* R Bit */ |
+                                       slice(insn, 11, 8) /* Special Reg */));
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRn(insn))));
+    NumOpsAdded = 2;
+    return true;
+  }
+  // SMC take imm4.
+  if (Opcode == ARM::t2SMC) {
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 16)));
+    NumOpsAdded = 1;
+    return true;
+  }
+
+  // Some instructions have predicate operands first before the immediate.
+  if (Opcode == ARM::tBLXi_r9 || Opcode == ARM::tBLr9) {
+    // Handling the two predicate operands before the imm operand.
+    if (B->DoPredicateOperands(MI, Opcode, insn, NumOps))
+      NumOpsAdded += 2;
+    else {
+      DEBUG(errs() << "Expected predicate operands not found.\n");
+      return false;
+    }
+  }
+
+  // Add the imm operand.
+  int Offset = 0;
+
+  switch (Opcode) {
+  default:
+    assert(0 && "Unexpected opcode");
+    return false;
+  case ARM::t2B:
+    Offset = decodeImm32_B_EncodingT4(insn);
+    break;
+  case ARM::t2Bcc:
+    Offset = decodeImm32_B_EncodingT3(insn);
+    break;
+  case ARM::tBLr9:
+    Offset = decodeImm32_BL(insn);
+    break;
+  case ARM::tBLXi_r9:
+    Offset = decodeImm32_BLX(insn);
+    break;
+  }
+  // When executing a Thumb instruction, PC reads as the address of the current
+  // instruction plus 4.  The assembler subtracts 4 from the difference between
+  // the branch instruction and the target address, disassembler has to add 4 to
+  // to compensate.
+  MI.addOperand(MCOperand::CreateImm(Offset + 4));
+
+  // This is an increment as some predicate operands may have been added first.
+  NumOpsAdded += 1;
+
+  return true;
+}
+
+static inline bool Thumb2PreloadOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case ARM::t2PLDi12:   case ARM::t2PLDi8:
+  case ARM::t2PLDs:
+  case ARM::t2PLDWi12:  case ARM::t2PLDWi8:
+  case ARM::t2PLDWs:
+  case ARM::t2PLIi12:   case ARM::t2PLIi8:
+  case ARM::t2PLIs:
+    return true;
+  }
+}
+
+static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  // Preload Data/Instruction requires either 2 or 3 operands.
+  // t2PLDi12, t2PLDi8, t2PLDpci: Rn [+/-]imm12/imm8
+  // t2PLDr:                      Rn Rm
+  // t2PLDs:                      Rn Rm imm2=Inst{5-4}
+  // Same pattern applies for t2PLDW* and t2PLI*.
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2 &&
+         OpInfo[0].RegClass == ARM::GPRRegClassID &&
+         "Expect >= 2 operands and first one as reg operand");
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     decodeRn(insn))));
+  ++OpIdx;
+
+  if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       decodeRm(insn))));
+  } else {
+    assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
+           && !OpInfo[OpIdx].isOptionalDef()
+           && "Pure imm operand expected");
+    int Offset = 0;
+    if (slice(insn, 19, 16) == 0xFF) {
+      bool Negative = slice(insn, 23, 23) == 0;
+      unsigned Imm12 = getImm12(insn);
+      Offset = Negative ? -1 - Imm12 : 1 * Imm12;
+    } else if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 ||
+               Opcode == ARM::t2PLIi8) {
+      // A8.6.117 Encoding T2: add = FALSE
+      unsigned Imm8 = getImm8(insn);
+      Offset = -1 - Imm8;
+    } else // The i12 forms.  See, for example, A8.6.117 Encoding T1.
+      Offset = decodeImm12(insn);
+    MI.addOperand(MCOperand::CreateImm(Offset));
+  }
+  ++OpIdx;
+
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
+      !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // Fills in the shift amount for t2PLDs, t2PLDWs, t2PLIs.
+    MI.addOperand(MCOperand::CreateImm(slice(insn, 5, 4)));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// A6.3.10 Store single data item
+// A6.3.9 Load byte, memory hints
+// A6.3.8 Load halfword, memory hints
+// A6.3.7 Load word
+//
+// For example,
+//
+// t2LDRi12:   Rd Rn (+)imm12
+// t2LDRi8:    Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2LDRs:     Rd Rn Rm ConstantShiftSpecifier (see also
+//             DisassembleThumb2DPSoReg)
+// t2LDR_POST: Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2LDR_PRE:  Rd Rn Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+//
+// t2STRi12:   Rd Rn (+)imm12
+// t2STRi8:    Rd Rn (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2STRs:     Rd Rn Rm ConstantShiftSpecifier (see also
+//             DisassembleThumb2DPSoReg)
+// t2STR_POST: Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+// t2STR_PRE:  Rn Rd Rn(TIED_TO) (+/-)imm8 (+ if Inst{9} == 0b1)
+//
+// Note that for indexed modes, the Rn(TIED_TO) operand needs to be populated
+// correctly, as LLVM AsmPrinter depends on it.  For indexed stores, the first
+// operand is Rn; for all the other instructions, Rd is the first operand.
+//
+// Delegates to DisassembleThumb2PreLoad() for preload data/instruction.
+// Delegates to DisassembleThumb2Ldpci() for load * literal operations.
+static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
+    uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  unsigned Rn = decodeRn(insn);
+
+  if (Thumb2PreloadOpcode(Opcode))
+    return DisassembleThumb2PreLoad(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+
+  // See, for example, A6.3.7 Load word: Table A6-18 Load word.
+  if (Load && Rn == 15)
+    return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::GPRRegClassID &&
+         OpInfo[1].RegClass == ARM::GPRRegClassID &&
+         "Expect >= 3 operands and first two as reg operands");
+
+  bool ThreeReg = (OpInfo[2].RegClass == ARM::GPRRegClassID);
+  bool TIED_TO = ThreeReg && TID.getOperandConstraint(2, TOI::TIED_TO) != -1;
+  bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td
+
+  // Build the register operands, followed by the immediate.
+  unsigned R0, R1, R2 = 0;
+  unsigned Rd = decodeRd(insn);
+  int Imm = 0;
+
+  if (!Load && TIED_TO) {
+    R0 = Rn;
+    R1 = Rd;
+  } else {
+    R0 = Rd;
+    R1 = Rn;
+  }
+  if (ThreeReg) {
+    if (TIED_TO) {
+      R2 = Rn;
+      Imm = decodeImm8(insn);
+    } else {
+      R2 = decodeRm(insn);
+      // See, for example, A8.6.64 LDRB (register).
+      // And ARMAsmPrinter::printT2AddrModeSoRegOperand().
+      // LSL is the default shift opc, and LLVM does not expect it to be encoded
+      // as part of the immediate operand.
+      // Imm = ARM_AM::getSORegOpc(ARM_AM::lsl, slice(insn, 5, 4));
+      Imm = slice(insn, 5, 4);
+    }
+  } else {
+    if (Imm12)
+      Imm = getImm12(insn);
+    else
+      Imm = decodeImm8(insn);
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     R0)));
+  ++OpIdx;
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                     R1)));
+  ++OpIdx;
+
+  if (ThreeReg) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+                                                       R2)));
+    ++OpIdx;
+  }
+
+  assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
+         && !OpInfo[OpIdx].isOptionalDef()
+         && "Pure imm operand expected");
+
+  MI.addOperand(MCOperand::CreateImm(Imm));
+  ++OpIdx;
+
+  return true;
+}
+
+// A6.3.12 Data-processing (register)
+//
+// Two register operands [rotate]:   Rs Rm [rotation(= (rotate:'000'))]
+// Three register operands only:     Rs Rn Rm
+// Three register operands [rotate]: Rs Rn Rm [rotation(= (rotate:'000'))]
+//
+// Parallel addition and subtraction 32-bit Thumb instructions: Rs Rn Rm
+//
+// Miscellaneous operations: Rs [Rn] Rm
+static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetInstrDesc &TID = ARMInsts[Opcode];
+  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  unsigned &OpIdx = NumOpsAdded;
+
+  OpIdx = 0;
+
+  assert(NumOps >= 2 &&
+         OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+         OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+         "Expect >= 2 operands and first two as reg operands");
+
+  // Build the register operands, followed by the optional rotation amount.
+
+  bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::rGPRRegClassID;
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRs(insn))));
+  ++OpIdx;
+
+  if (ThreeReg) {
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                       decodeRn(insn))));
+    ++OpIdx;
+  }
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRm(insn))));
+  ++OpIdx;
+
+  if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
+      && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
+    // Add the rotation amount immediate.
+    MI.addOperand(MCOperand::CreateImm(decodeRotate(insn)));
+    ++OpIdx;
+  }
+
+  return true;
+}
+
+// A6.3.16 Multiply, multiply accumulate, and absolute difference
+//
+// t2MLA, t2MLS, t2SMMLA, t2SMMLS: Rs Rn Rm Ra=Inst{15-12}
+// t2MUL, t2SMMUL:                 Rs Rn Rm
+// t2SMLA[BB|BT|TB|TT|WB|WT]:      Rs Rn Rm Ra=Inst{15-12}
+// t2SMUL[BB|BT|TB|TT|WB|WT]:      Rs Rn Rm
+//
+// Dual halfword multiply: t2SMUAD[X], t2SMUSD[X], t2SMLAD[X], t2SMLSD[X]:
+//   Rs Rn Rm Ra=Inst{15-12}
+//
+// Unsigned Sum of Absolute Differences [and Accumulate]
+//    Rs Rn Rm [Ra=Inst{15-12}]
+static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+         OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+         OpInfo[2].RegClass == ARM::rGPRRegClassID &&
+         "Expect >= 3 operands and first three as reg operands");
+
+  // Build the register operands.
+
+  bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRs(insn))));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRn(insn))));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRm(insn))));
+
+  if (FourReg)
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                       decodeRd(insn))));
+
+  NumOpsAdded = FourReg ? 4 : 3;
+
+  return true;
+}
+
+// A6.3.17 Long multiply, long multiply accumulate, and divide
+//
+// t2SMULL, t2UMULL, t2SMLAL, t2UMLAL, t2UMAAL: RdLo RdHi Rn Rm
+// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
+//
+// Halfword multiple accumulate long: t2SMLAL<x><y>: RdLo RdHi Rn Rm
+// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
+//
+// Dual halfword multiple: t2SMLALD[X], t2SMLSLD[X]: RdLo RdHi Rn Rm
+// where RdLo = Inst{15-12} and RdHi = Inst{11-8}
+//
+// Signed/Unsigned divide: t2SDIV, t2UDIV: Rs Rn Rm
+static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
+
+  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+
+  assert(NumOps >= 3 &&
+         OpInfo[0].RegClass == ARM::rGPRRegClassID &&
+         OpInfo[1].RegClass == ARM::rGPRRegClassID &&
+         OpInfo[2].RegClass == ARM::rGPRRegClassID &&
+         "Expect >= 3 operands and first three as reg operands");
+
+  bool FourReg = NumOps > 3 && OpInfo[3].RegClass == ARM::rGPRRegClassID;
+
+  // Build the register operands.
+
+  if (FourReg)
+    MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                       decodeRd(insn))));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRs(insn))));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRn(insn))));
+
+  MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
+                                                     decodeRm(insn))));
+
+  if (FourReg)
+    NumOpsAdded = 4;
+  else
+    NumOpsAdded = 3;
+
+  return true;
+}
+
+// See A6.3 32-bit Thumb instruction encoding for instruction classes
+// corresponding to (op1, op2, op).
+//
+// Table A6-9 32-bit Thumb instruction encoding
+// op1  op2    op  Instruction class, see
+// ---  -------  --  -----------------------------------------------------------
+// 01  00xx0xx  -  Load/store multiple on page A6-23
+//     00xx1xx  -  Load/store dual, load/store exclusive, table branch on
+//                 page A6-24
+//     01xxxxx  -  Data-processing (shifted register) on page A6-31
+//     1xxxxxx  -  Coprocessor instructions on page A6-40
+// 10  x0xxxxx  0  Data-processing (modified immediate) on page A6-15
+//     x1xxxxx  0  Data-processing (plain binary immediate) on page A6-19
+//         -    1  Branches and miscellaneous control on page A6-20
+// 11  000xxx0  -  Store single data item on page A6-30
+//     001xxx0  -  Advanced SIMD element or structure load/store instructions
+//                 on page A7-27
+//     00xx001  - Load byte, memory hints on page A6-28
+//     00xx011  -  Load halfword, memory hints on page A6-26
+//     00xx101  -  Load word on page A6-25
+//     00xx111  -  UNDEFINED
+//     010xxxx  -  Data-processing (register) on page A6-33
+//     0110xxx  -  Multiply, multiply accumulate, and absolute difference on
+//                 page A6-38
+//     0111xxx  -  Long multiply, long multiply accumulate, and divide on
+//                 page A6-39
+//     1xxxxxx  -  Coprocessor instructions on page A6-40
+//
+static bool DisassembleThumb2(uint16_t op1, uint16_t op2, uint16_t op,
+    MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps,
+    unsigned &NumOpsAdded, BO B) {
+
+  switch (op1) {
+  case 1:
+    if (slice(op2, 6, 5) == 0) {
+      if (slice(op2, 2, 2) == 0) {
+        // Load/store multiple.
+        return DisassembleThumb2LdStMul(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                        B);
+      }
+
+      // Load/store dual, load/store exclusive, table branch, otherwise.
+      assert(slice(op2, 2, 2) == 1 && "Thumb2 encoding error!");
+      if ((ARM::t2LDREX <= Opcode && Opcode <= ARM::t2LDREXH) ||
+          (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH)) {
+        // Load/store exclusive.
+        return DisassembleThumb2LdStEx(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
+      }
+      if (Opcode == ARM::t2LDRDi8 ||
+          Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST ||
+          Opcode == ARM::t2STRDi8 ||
+          Opcode == ARM::t2STRD_PRE || Opcode == ARM::t2STRD_POST) {
+        // Load/store dual.
+        return DisassembleThumb2LdStDual(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                         B);
+      }
+      if (Opcode == ARM::t2TBB || Opcode == ARM::t2TBH) {
+        // Table branch.
+        return DisassembleThumb2TB(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+      }
+    } else if (slice(op2, 6, 5) == 1) {
+      // Data-processing (shifted register).
+      return DisassembleThumb2DPSoReg(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+    }
+
+    // FIXME: A6.3.18 Coprocessor instructions
+    // But see ThumbDisassembler::getInstruction().
+
+    break;
+  case 2:
+    if (op == 0) {
+      if (slice(op2, 5, 5) == 0)
+        // Data-processing (modified immediate)
+        return DisassembleThumb2DPModImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                         B);
+      if (Thumb2SaturateOpcode(Opcode))
+        return DisassembleThumb2Sat(MI, Opcode, insn, NumOpsAdded, B);
+
+      // Data-processing (plain binary immediate)
+      return DisassembleThumb2DPBinImm(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
+    }
+    // Branches and miscellaneous control on page A6-20.
+    return DisassembleThumb2BrMiscCtrl(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                       B);
+  case 3:
+    switch (slice(op2, 6, 5)) {
+    case 0:
+      // Load/store instructions...
+      if (slice(op2, 0, 0) == 0) {
+        if (slice(op2, 4, 4) == 0) {
+          // Store single data item on page A6-30
+          return DisassembleThumb2LdSt(false, MI,Opcode,insn,NumOps,NumOpsAdded,
+                                       B);
+        } else {
+          // FIXME: Advanced SIMD element or structure load/store instructions.
+          // But see ThumbDisassembler::getInstruction().
+          ;
+        }
+      } else {
+        // Table A6-9 32-bit Thumb instruction encoding: Load byte|halfword|word
+        return DisassembleThumb2LdSt(true, MI, Opcode, insn, NumOps,
+                                     NumOpsAdded, B);
+      }
+      break;
+    case 1:
+      if (slice(op2, 4, 4) == 0) {
+        // A6.3.12 Data-processing (register)
+        return DisassembleThumb2DPReg(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+      } else if (slice(op2, 3, 3) == 0) {
+        // A6.3.16 Multiply, multiply accumulate, and absolute difference
+        return DisassembleThumb2Mul(MI, Opcode, insn, NumOps, NumOpsAdded, B);
+      } else {
+        // A6.3.17 Long multiply, long multiply accumulate, and divide
+        return DisassembleThumb2LongMul(MI, Opcode, insn, NumOps, NumOpsAdded,
+                                        B);
+      }
+      break;
+    default:
+      // FIXME: A6.3.18 Coprocessor instructions
+      // But see ThumbDisassembler::getInstruction().
+      ;
+      break;
+    }
+
+    break;
+  default:
+    assert(0 && "Thumb2 encoding error!");
+    break;
+  }
+
+  return false;
+}
+
+static bool DisassembleThumbFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+    unsigned short NumOps, unsigned &NumOpsAdded, BO Builder) {
+
+  uint16_t HalfWord = slice(insn, 31, 16);
+
+  if (HalfWord == 0) {
+    // A6.2 16-bit Thumb instruction encoding
+    // op = bits[15:10]
+    uint16_t op = slice(insn, 15, 10);
+    return DisassembleThumb1(op, MI, Opcode, insn, NumOps, NumOpsAdded,
+                             Builder);
+  }
+
+  unsigned bits15_11 = slice(HalfWord, 15, 11);
+
+  // A6.1 Thumb instruction set encoding
+  if (!(bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F)) {
+    assert("Bits[15:11] first halfword of Thumb2 instruction is out of range");
+    return false;
+  }
+
+  // A6.3 32-bit Thumb instruction encoding
+
+  uint16_t op1 = slice(HalfWord, 12, 11);
+  uint16_t op2 = slice(HalfWord, 10, 4);
+  uint16_t op = slice(insn, 15, 15);
+
+  return DisassembleThumb2(op1, op2, op, MI, Opcode, insn, NumOps, NumOpsAdded,
+                           Builder);
+}
diff --git a/final/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/final/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
new file mode 100644
index 00000000000..303ee2b69d3
--- /dev/null
+++ b/final/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -0,0 +1,714 @@
+//===-- ARMInstPrinter.cpp - Convert ARM MCInst to assembly syntax --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "ARMBaseInfo.h"
+#include "ARMInstPrinter.h"
+#include "ARMAddressingModes.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "ARMGenAsmWriter.inc"
+
+StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
+StringRef ARMInstPrinter::getRegName(unsigned RegNo) const {
+  return getRegisterName(RegNo);
+}
+
+void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+  unsigned Opcode = MI->getOpcode();
+
+  // Check for MOVs and print canonical forms, instead.
+  if (Opcode == ARM::MOVs) {
+    // FIXME: Thumb variants?
+    const MCOperand &Dst = MI->getOperand(0);
+    const MCOperand &MO1 = MI->getOperand(1);
+    const MCOperand &MO2 = MI->getOperand(2);
+    const MCOperand &MO3 = MI->getOperand(3);
+
+    O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm()));
+    printSBitModifierOperand(MI, 6, O);
+    printPredicateOperand(MI, 4, O);
+
+    O << '\t' << getRegisterName(Dst.getReg())
+      << ", " << getRegisterName(MO1.getReg());
+
+    if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx)
+      return;
+
+    O << ", ";
+
+    if (MO2.getReg()) {
+      O << getRegisterName(MO2.getReg());
+      assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+    } else {
+      O << "#" << ARM_AM::getSORegOffset(MO3.getImm());
+    }
+    return;
+  }
+
+  // A8.6.123 PUSH
+  if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    O << '\t' << "push";
+    printPredicateOperand(MI, 2, O);
+    if (Opcode == ARM::t2STMDB_UPD)
+      O << ".w";
+    O << '\t';
+    printRegisterList(MI, 4, O);
+    return;
+  }
+
+  // A8.6.122 POP
+  if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    O << '\t' << "pop";
+    printPredicateOperand(MI, 2, O);
+    if (Opcode == ARM::t2LDMIA_UPD)
+      O << ".w";
+    O << '\t';
+    printRegisterList(MI, 4, O);
+    return;
+  }
+
+  // A8.6.355 VPUSH
+  if ((Opcode == ARM::VSTMSDB_UPD || Opcode == ARM::VSTMDDB_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    O << '\t' << "vpush";
+    printPredicateOperand(MI, 2, O);
+    O << '\t';
+    printRegisterList(MI, 4, O);
+    return;
+  }
+
+  // A8.6.354 VPOP
+  if ((Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMDIA_UPD) &&
+      MI->getOperand(0).getReg() == ARM::SP) {
+    O << '\t' << "vpop";
+    printPredicateOperand(MI, 2, O);
+    O << '\t';
+    printRegisterList(MI, 4, O);
+    return;
+  }
+
+  printInstruction(MI, O);
+}
+
+void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    unsigned Reg = Op.getReg();
+    O << getRegisterName(Reg);
+  } else if (Op.isImm()) {
+    O << '#' << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << *Op.getExpr();
+  }
+}
+
+static void printSOImm(raw_ostream &O, int64_t V, raw_ostream *CommentStream,
+                       const MCAsmInfo *MAI) {
+  // Break it up into two parts that make up a shifter immediate.
+  V = ARM_AM::getSOImmVal(V);
+  assert(V != -1 && "Not a valid so_imm value!");
+
+  unsigned Imm = ARM_AM::getSOImmValImm(V);
+  unsigned Rot = ARM_AM::getSOImmValRot(V);
+
+  // Print low-level immediate formation info, per
+  // A5.1.3: "Data-processing operands - Immediate".
+  if (Rot) {
+    O << "#" << Imm << ", " << Rot;
+    // Pretty printed version.
+    if (CommentStream)
+      *CommentStream << (int)ARM_AM::rotr32(Imm, Rot) << "\n";
+  } else {
+    O << "#" << Imm;
+  }
+}
+
+
+/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit
+/// immediate in bits 0-7.
+void ARMInstPrinter::printSOImmOperand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isImm() && "Not a valid so_imm value!");
+  printSOImm(O, MO.getImm(), CommentStream, &MAI);
+}
+
+// so_reg is a 4-operand unit corresponding to register forms of the A5.1
+// "Addressing Mode 1 - Data-processing operands" forms.  This includes:
+//    REG 0   0           - e.g. R5
+//    REG REG 0,SH_OPC    - e.g. R5, ROR R3
+//    REG 0   IMM,SH_OPC  - e.g. R5, LSL #3
+void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+  O << getRegisterName(MO1.getReg());
+
+  // Print the shift opc.
+  ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
+  O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+  if (MO2.getReg()) {
+    O << ' ' << getRegisterName(MO2.getReg());
+    assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
+  } else if (ShOpc != ARM_AM::rrx) {
+    O << " #" << ARM_AM::getSORegOffset(MO3.getImm());
+  }
+}
+
+
+void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op+1);
+  const MCOperand &MO3 = MI->getOperand(Op+2);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, O);
+    return;
+  }
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  if (!MO2.getReg()) {
+    if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0.
+      O << ", #"
+        << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+        << ARM_AM::getAM2Offset(MO3.getImm());
+    O << "]";
+    return;
+  }
+
+  O << ", "
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm()))
+    << getRegisterName(MO2.getReg());
+
+  if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm()))
+    O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm()))
+    << " #" << ShImm;
+  O << "]";
+}
+
+void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  if (!MO1.getReg()) {
+    unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm());
+    O << '#'
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+      << ImmOffs;
+    return;
+  }
+
+  O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm()))
+    << getRegisterName(MO1.getReg());
+
+  if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm()))
+    O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO2.getImm()))
+    << " #" << ShImm;
+}
+
+void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+  O << '[' << getRegisterName(MO1.getReg());
+
+  if (MO2.getReg()) {
+    O << ", " << (char)ARM_AM::getAM3Op(MO3.getImm())
+      << getRegisterName(MO2.getReg()) << ']';
+    return;
+  }
+
+  if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()))
+    O << ", #"
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm()))
+      << ImmOffs;
+  O << ']';
+}
+
+void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  if (MO1.getReg()) {
+    O << (char)ARM_AM::getAM3Op(MO2.getImm())
+    << getRegisterName(MO1.getReg());
+    return;
+  }
+
+  unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm());
+  O << '#'
+    << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm()))
+    << ImmOffs;
+}
+
+void ARMInstPrinter::printLdStmModeOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MI->getOperand(OpNum)
+                                                 .getImm());
+  O << ARM_AM::getAMSubModeStr(Mode);
+}
+
+void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, OpNum, O);
+    return;
+  }
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) {
+    O << ", #"
+      << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm()))
+      << ImmOffs * 4;
+  }
+  O << "]";
+}
+
+void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << getRegisterName(MO1.getReg());
+  if (MO2.getImm()) {
+    // FIXME: Both darwin as and GNU as violate ARM docs here.
+    O << ", :" << (MO2.getImm() << 3);
+  }
+  O << "]";
+}
+
+void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  if (MO.getReg() == 0)
+    O << "!";
+  else
+    O << ", " << getRegisterName(MO.getReg());
+}
+
+void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
+                                                    unsigned OpNum,
+                                                    raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  uint32_t v = ~MO.getImm();
+  int32_t lsb = CountTrailingZeros_32(v);
+  int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
+  assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
+  O << '#' << lsb << ", #" << width;
+}
+
+void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
+                                     raw_ostream &O) {
+  unsigned val = MI->getOperand(OpNum).getImm();
+  O << ARM_MB::MemBOptToString(val);
+}
+
+void ARMInstPrinter::printShiftImmOperand(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  unsigned ShiftOp = MI->getOperand(OpNum).getImm();
+  ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
+  switch (Opc) {
+  case ARM_AM::no_shift:
+    return;
+  case ARM_AM::lsl:
+    O << ", lsl #";
+    break;
+  case ARM_AM::asr:
+    O << ", asr #";
+    break;
+  default:
+    assert(0 && "unexpected shift opcode for shift immediate operand");
+  }
+  O << ARM_AM::getSORegOffset(ShiftOp);
+}
+
+void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O) {
+  O << "{";
+  for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
+    if (i != OpNum) O << ", ";
+    O << getRegisterName(MI->getOperand(i).getReg());
+  }
+  O << "}";
+}
+
+void ARMInstPrinter::printSetendOperand(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  if (Op.getImm())
+    O << "be";
+  else
+    O << "le";
+}
+
+void ARMInstPrinter::printCPSIMod(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  O << ARM_PROC::IModToString(Op.getImm());
+}
+
+void ARMInstPrinter::printCPSIFlag(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  unsigned IFlags = Op.getImm();
+  for (int i=2; i >= 0; --i)
+    if (IFlags & (1 << i))
+      O << ARM_PROC::IFlagsToString(1 << i);
+}
+
+void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  unsigned SpecRegRBit = Op.getImm() >> 4;
+  unsigned Mask = Op.getImm() & 0xf;
+
+  if (SpecRegRBit)
+    O << "spsr";
+  else
+    O << "cpsr";
+
+  if (Mask) {
+    O << '_';
+    if (Mask & 8) O << 'f';
+    if (Mask & 4) O << 's';
+    if (Mask & 2) O << 'x';
+    if (Mask & 1) O << 'c';
+  }
+}
+
+void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum,
+                                         raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNum);
+  O << '#';
+  if (Op.getImm() < 0)
+    O << '-' << (-Op.getImm() - 1);
+  else
+    O << Op.getImm();
+}
+
+void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+  if (CC != ARMCC::AL)
+    O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
+                                                    unsigned OpNum,
+                                                    raw_ostream &O) {
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+  O << ARMCondCodeToString(CC);
+}
+
+void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+                                              raw_ostream &O) {
+  if (MI->getOperand(OpNum).getReg()) {
+    assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
+           "Expect ARM CPSR register!");
+    O << 's';
+  }
+}
+
+void ARMInstPrinter::printNoHashImmediate(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  O << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printPImmediate(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  O << "p" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printCImmediate(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O) {
+  O << "c" << MI->getOperand(OpNum).getImm();
+}
+
+void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O) {
+  llvm_unreachable("Unhandled PC-relative pseudo-instruction!");
+}
+
+void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &O) {
+  O << "#" <<  MI->getOperand(OpNum).getImm() * 4;
+}
+
+void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O) {
+  // (3 - the number of trailing zeros) is the number of then / else.
+  unsigned Mask = MI->getOperand(OpNum).getImm();
+  unsigned CondBit0 = Mask >> 4 & 1;
+  unsigned NumTZ = CountTrailingZeros_32(Mask);
+  assert(NumTZ <= 3 && "Invalid IT mask!");
+  for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
+    bool T = ((Mask >> Pos) & 1) == CondBit0;
+    if (T)
+      O << 't';
+    else
+      O << 'e';
+  }
+}
+
+void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op,
+                                                 raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, O);
+    return;
+  }
+
+  O << "[" << getRegisterName(MO1.getReg());
+  if (unsigned RegNum = MO2.getReg())
+    O << ", " << getRegisterName(RegNum);
+  O << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5SOperand(const MCInst *MI,
+                                                    unsigned Op,
+                                                    raw_ostream &O,
+                                                    unsigned Scale) {
+  const MCOperand &MO1 = MI->getOperand(Op);
+  const MCOperand &MO2 = MI->getOperand(Op + 1);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, Op, O);
+    return;
+  }
+
+  O << "[" << getRegisterName(MO1.getReg());
+  if (unsigned ImmOffs = MO2.getImm())
+    O << ", #" << ImmOffs * Scale;
+  O << "]";
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S1Operand(const MCInst *MI,
+                                                     unsigned Op,
+                                                     raw_ostream &O) {
+  printThumbAddrModeImm5SOperand(MI, Op, O, 1);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S2Operand(const MCInst *MI,
+                                                     unsigned Op,
+                                                     raw_ostream &O) {
+  printThumbAddrModeImm5SOperand(MI, Op, O, 2);
+}
+
+void ARMInstPrinter::printThumbAddrModeImm5S4Operand(const MCInst *MI,
+                                                     unsigned Op,
+                                                     raw_ostream &O) {
+  printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+}
+
+void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI, unsigned Op,
+                                                 raw_ostream &O) {
+  printThumbAddrModeImm5SOperand(MI, Op, O, 4);
+}
+
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms.
+// REG 0   0           - e.g. R5
+// REG IMM, SH_OPC     - e.g. R5, LSL #3
+void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  unsigned Reg = MO1.getReg();
+  O << getRegisterName(Reg);
+
+  // Print the shift opc.
+  assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+  ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO2.getImm());
+  O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+  if (ShOpc != ARM_AM::rrx)
+    O << " #" << ARM_AM::getSORegOffset(MO2.getImm());
+}
+
+void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+                                               raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  if (!MO1.isReg()) {   // FIXME: This is for CP entries, but isn't right.
+    printOperand(MI, OpNum, O);
+    return;
+  }
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm();
+  bool isSub = OffImm < 0;
+  // Special value for #-0. All others are normal.
+  if (OffImm == INT32_MIN)
+    OffImm = 0;
+  if (isSub)
+    O << ", #-" << -OffImm;
+  else if (OffImm > 0)
+    O << ", #" << OffImm;
+  O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI,
+                                                unsigned OpNum,
+                                                raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm();
+  // Don't print +0.
+  if (OffImm < 0)
+    O << ", #-" << -OffImm;
+  else if (OffImm > 0)
+    O << ", #" << OffImm;
+  O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI,
+                                                  unsigned OpNum,
+                                                  raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  int32_t OffImm = (int32_t)MO2.getImm() / 4;
+  // Don't print +0.
+  if (OffImm < 0)
+    O << ", #-" << -OffImm * 4;
+  else if (OffImm > 0)
+    O << ", #" << OffImm * 4;
+  O << "]";
+}
+
+void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI,
+                                                      unsigned OpNum,
+                                                      raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  int32_t OffImm = (int32_t)MO1.getImm();
+  // Don't print +0.
+  if (OffImm < 0)
+    O << "#-" << -OffImm;
+  else if (OffImm > 0)
+    O << "#" << OffImm;
+}
+
+void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI,
+                                                        unsigned OpNum,
+                                                        raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  int32_t OffImm = (int32_t)MO1.getImm() / 4;
+  // Don't print +0.
+  if (OffImm < 0)
+    O << "#-" << -OffImm * 4;
+  else if (OffImm > 0)
+    O << "#" << OffImm * 4;
+}
+
+void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI,
+                                                 unsigned OpNum,
+                                                 raw_ostream &O) {
+  const MCOperand &MO1 = MI->getOperand(OpNum);
+  const MCOperand &MO2 = MI->getOperand(OpNum+1);
+  const MCOperand &MO3 = MI->getOperand(OpNum+2);
+
+  O << "[" << getRegisterName(MO1.getReg());
+
+  assert(MO2.getReg() && "Invalid so_reg load / store address!");
+  O << ", " << getRegisterName(MO2.getReg());
+
+  unsigned ShAmt = MO3.getImm();
+  if (ShAmt) {
+    assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!");
+    O << ", lsl #" << ShAmt;
+  }
+  O << "]";
+}
+
+void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  O << '#';
+  if (MO.isFPImm()) {
+    O << (float)MO.getFPImm();
+  } else {
+    union {
+      uint32_t I;
+      float F;
+    } FPUnion;
+
+    FPUnion.I = MO.getImm();
+    O << FPUnion.F;
+  }
+}
+
+void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum,
+                                           raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  O << '#';
+  if (MO.isFPImm()) {
+    O << MO.getFPImm();
+  } else {
+    // We expect the binary encoding of a floating point number here.
+    union {
+      uint64_t I;
+      double D;
+    } FPUnion;
+
+    FPUnion.I = MO.getImm();
+    O << FPUnion.D;
+  }
+}
+
+void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+                                            raw_ostream &O) {
+  unsigned EncodedImm = MI->getOperand(OpNum).getImm();
+  unsigned EltBits;
+  uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
+  O << "#0x" << utohexstr(Val);
+}
diff --git a/final/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/final/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
new file mode 100644
index 00000000000..7e85f3ab22b
--- /dev/null
+++ b/final/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -0,0 +1,112 @@
+//===-- ARMInstPrinter.h - Convert ARM MCInst to assembly syntax ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an ARM MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMINSTPRINTER_H
+#define ARMINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+  class MCOperand;
+
+class ARMInstPrinter : public MCInstPrinter {
+public:
+  ARMInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
+
+  virtual void printInst(const MCInst *MI, raw_ostream &O);
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+  virtual StringRef getRegName(unsigned RegNo) const;
+
+  static const char *getInstructionName(unsigned Opcode);
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  void printSOImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+  void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+  void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+
+  void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O);
+  void printMemBOption(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printShiftImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printThumbITMask(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+  void printThumbAddrModeImm5SOperand(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O, unsigned Scale);
+  void printThumbAddrModeImm5S1Operand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O);
+  void printThumbAddrModeImm5S2Operand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O);
+  void printThumbAddrModeImm5S4Operand(const MCInst *MI, unsigned OpNum,
+                                       raw_ostream &O);
+  void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+
+  void printT2SOOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum,
+                                 raw_ostream &O);
+  void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum,
+                                  raw_ostream &O);
+  void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum,
+                                    raw_ostream &O);
+  void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                        raw_ostream &O);
+  void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum,
+                                          raw_ostream &O);
+  void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum,
+                                   raw_ostream &O);
+
+  void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printCPSOptionOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printNegZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum,
+                                      raw_ostream &O);
+  void printSBitModifierOperand(const MCInst *MI, unsigned OpNum,
+                                raw_ostream &O);
+  void printRegisterList(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printPImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printCImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+  void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+
+  void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/ARM/InstPrinter/CMakeLists.txt b/final/lib/Target/ARM/InstPrinter/CMakeLists.txt
new file mode 100644
index 00000000000..18645c0864a
--- /dev/null
+++ b/final/lib/Target/ARM/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMARMAsmPrinter
+  ARMInstPrinter.cpp
+  )
+add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
diff --git a/final/lib/Target/ARM/InstPrinter/Makefile b/final/lib/Target/ARM/InstPrinter/Makefile
new file mode 100644
index 00000000000..65d372e44b8
--- /dev/null
+++ b/final/lib/Target/ARM/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMAsmPrinter
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/ARM/MLxExpansionPass.cpp b/final/lib/Target/ARM/MLxExpansionPass.cpp
new file mode 100644
index 00000000000..9a27e2f4706
--- /dev/null
+++ b/final/lib/Target/ARM/MLxExpansionPass.cpp
@@ -0,0 +1,315 @@
+//===-- MLxExpansionPass.cpp - Expand MLx instrs to avoid hazards ----------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand VFP / NEON floating point MLA / MLS instructions (each to a pair of
+// multiple and add / sub instructions) when special VMLx hazards are detected.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mlx-expansion"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static cl::opt<bool>
+ForceExapnd("expand-all-fp-mlx", cl::init(false), cl::Hidden);
+static cl::opt<unsigned>
+ExpandLimit("expand-limit", cl::init(~0U), cl::Hidden);
+
+STATISTIC(NumExpand, "Number of fp MLA / MLS instructions expanded");
+
+namespace {
+  struct MLxExpansion : public MachineFunctionPass {
+    static char ID;
+    MLxExpansion() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "ARM MLA / MLS expansion pass";
+    }
+
+  private:
+    const ARMBaseInstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    MachineRegisterInfo *MRI;
+
+    unsigned MIIdx;
+    MachineInstr* LastMIs[4];
+
+    void clearStack();
+    void pushStack(MachineInstr *MI);
+    MachineInstr *getAccDefMI(MachineInstr *MI) const;
+    unsigned getDefReg(MachineInstr *MI) const;
+    bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const;
+    bool FindMLxHazard(MachineInstr *MI) const;
+    void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+                                unsigned MulOpc, unsigned AddSubOpc,
+                                bool NegAcc, bool HasLane);
+    bool ExpandFPMLxInstructions(MachineBasicBlock &MBB);
+  };
+  char MLxExpansion::ID = 0;
+}
+
+void MLxExpansion::clearStack() {
+  std::fill(LastMIs, LastMIs + 4, (MachineInstr*)0);
+  MIIdx = 0;
+}
+
+void MLxExpansion::pushStack(MachineInstr *MI) {
+  LastMIs[MIIdx] = MI;
+  if (++MIIdx == 4)
+    MIIdx = 0;
+}
+
+MachineInstr *MLxExpansion::getAccDefMI(MachineInstr *MI) const {
+  // Look past COPY and INSERT_SUBREG instructions to find the
+  // real definition MI. This is important for _sfp instructions.
+  unsigned Reg = MI->getOperand(1).getReg();
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return 0;
+
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineInstr *DefMI = MRI->getVRegDef(Reg);
+  while (true) {
+    if (DefMI->getParent() != MBB)
+      break;
+    if (DefMI->isCopyLike()) {
+      Reg = DefMI->getOperand(1).getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        DefMI = MRI->getVRegDef(Reg);
+        continue;
+      }
+    } else if (DefMI->isInsertSubreg()) {
+      Reg = DefMI->getOperand(2).getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        DefMI = MRI->getVRegDef(Reg);
+        continue;
+      }
+    }
+    break;
+  }
+  return DefMI;
+}
+
+unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
+  unsigned Reg = MI->getOperand(0).getReg();
+  if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+      !MRI->hasOneNonDBGUse(Reg))
+    return Reg;
+
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineInstr *UseMI = &*MRI->use_nodbg_begin(Reg);
+  if (UseMI->getParent() != MBB)
+    return Reg;
+
+  while (UseMI->isCopy() || UseMI->isInsertSubreg()) {
+    Reg = UseMI->getOperand(0).getReg();
+    if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+        !MRI->hasOneNonDBGUse(Reg))
+      return Reg;
+    UseMI = &*MRI->use_nodbg_begin(Reg);
+    if (UseMI->getParent() != MBB)
+      return Reg;
+  }
+
+  return Reg;
+}
+
+bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
+  // FIXME: Detect integer instructions properly.
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned Domain = TID.TSFlags & ARMII::DomainMask;
+  if (TID.mayStore())
+    return false;
+  unsigned Opcode = TID.getOpcode();
+  if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
+    return false;
+  if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
+    return MI->readsRegister(Reg, TRI);
+  return false;
+}
+
+
+bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const {
+  if (NumExpand >= ExpandLimit)
+    return false;
+
+  if (ForceExapnd)
+    return true;
+
+  MachineInstr *DefMI = getAccDefMI(MI);
+  if (TII->isFpMLxInstruction(DefMI->getOpcode()))
+    // r0 = vmla
+    // r3 = vmla r0, r1, r2
+    // takes 16 - 17 cycles
+    //
+    // r0 = vmla
+    // r4 = vmul r1, r2
+    // r3 = vadd r0, r4
+    // takes about 14 - 15 cycles even with vmul stalling for 4 cycles.
+    return true;
+
+  // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the
+  // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall
+  // preserves the in-order retirement of the instructions.
+  // Look at the next few instructions, if *most* of them can cause hazards,
+  // then the scheduler can't *fix* this, we'd better break up the VMLA.
+  for (unsigned i = 1; i <= 4; ++i) {
+    int Idx = ((int)MIIdx - i + 4) % 4;
+    MachineInstr *NextMI = LastMIs[Idx];
+    if (!NextMI)
+      continue;
+
+    if (TII->canCauseFpMLxStall(NextMI->getOpcode()))
+      return true;
+
+    // Look for VMLx RAW hazard.
+    if (hasRAWHazard(getDefReg(MI), NextMI))
+      return true;
+  }
+
+  return false;
+}
+
+/// ExpandFPMLxInstructions - Expand a MLA / MLS instruction into a pair
+/// of MUL + ADD / SUB instructions.
+void
+MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+                                     unsigned MulOpc, unsigned AddSubOpc,
+                                     bool NegAcc, bool HasLane) {
+  unsigned DstReg = MI->getOperand(0).getReg();
+  bool DstDead = MI->getOperand(0).isDead();
+  unsigned AccReg = MI->getOperand(1).getReg();
+  unsigned Src1Reg = MI->getOperand(2).getReg();
+  unsigned Src2Reg = MI->getOperand(3).getReg();
+  bool Src1Kill = MI->getOperand(2).isKill();
+  bool Src2Kill = MI->getOperand(3).isKill();
+  unsigned LaneImm = HasLane ? MI->getOperand(4).getImm() : 0;
+  unsigned NextOp = HasLane ? 5 : 4;
+  ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
+  unsigned PredReg = MI->getOperand(++NextOp).getReg();
+
+  const TargetInstrDesc &TID1 = TII->get(MulOpc);
+  const TargetInstrDesc &TID2 = TII->get(AddSubOpc);
+  unsigned TmpReg = MRI->createVirtualRegister(TID1.getRegClass(0, TRI));
+
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID1, TmpReg)
+    .addReg(Src1Reg, getKillRegState(Src1Kill))
+    .addReg(Src2Reg, getKillRegState(Src2Kill));
+  if (HasLane)
+    MIB.addImm(LaneImm);
+  MIB.addImm(Pred).addReg(PredReg);
+
+  MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID2)
+    .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
+
+  if (NegAcc) {
+    bool AccKill = MRI->hasOneNonDBGUse(AccReg);
+    MIB.addReg(TmpReg, getKillRegState(true))
+       .addReg(AccReg, getKillRegState(AccKill));
+  } else {
+    MIB.addReg(AccReg).addReg(TmpReg, getKillRegState(true));
+  }
+  MIB.addImm(Pred).addReg(PredReg);
+
+  DEBUG({
+      dbgs() << "Expanding: " << *MI;
+      dbgs() << "  to:\n";
+      MachineBasicBlock::iterator MII = MI;
+      MII = llvm::prior(MII);
+      MachineInstr &MI2 = *MII;
+      MII = llvm::prior(MII);
+      MachineInstr &MI1 = *MII;
+      dbgs() << "    " << MI1;
+      dbgs() << "    " << MI2;
+   });
+
+  MI->eraseFromParent();
+  ++NumExpand;
+}
+
+bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
+  bool Changed = false;
+
+  clearStack();
+
+  unsigned Skip = 0;
+  MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
+  while (MII != E) {
+    MachineInstr *MI = &*MII;
+
+    if (MI->isLabel() || MI->isImplicitDef() || MI->isCopy()) {
+      ++MII;
+      continue;
+    }
+
+    const TargetInstrDesc &TID = MI->getDesc();
+    if (TID.isBarrier()) {
+      clearStack();
+      Skip = 0;
+      ++MII;
+      continue;
+    }
+
+    unsigned Domain = TID.TSFlags & ARMII::DomainMask;
+    if (Domain == ARMII::DomainGeneral) {
+      if (++Skip == 2)
+        // Assume dual issues of non-VFP / NEON instructions.
+        pushStack(0);
+    } else {
+      Skip = 0;
+
+      unsigned MulOpc, AddSubOpc;
+      bool NegAcc, HasLane;
+      if (!TII->isFpMLxInstruction(TID.getOpcode(),
+                                   MulOpc, AddSubOpc, NegAcc, HasLane) ||
+          !FindMLxHazard(MI))
+        pushStack(MI);
+      else {
+        ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
+        E = MBB.rend(); // May have changed if MI was the 1st instruction.
+        Changed = true;
+        continue;
+      }
+    }
+
+    ++MII;
+  }
+
+  return Changed;
+}
+
+bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) {
+  TII = static_cast<const ARMBaseInstrInfo*>(Fn.getTarget().getInstrInfo());
+  TRI = Fn.getTarget().getRegisterInfo();
+  MRI = &Fn.getRegInfo();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    Modified |= ExpandFPMLxInstructions(MBB);
+  }
+
+  return Modified;
+}
+
+FunctionPass *llvm::createMLxExpansionPass() {
+  return new MLxExpansion();
+}
diff --git a/final/lib/Target/ARM/Makefile b/final/lib/Target/ARM/Makefile
new file mode 100644
index 00000000000..65a6494986f
--- /dev/null
+++ b/final/lib/Target/ARM/Makefile
@@ -0,0 +1,25 @@
+##===- lib/Target/ARM/Makefile -----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMARMCodeGen
+TARGET = ARM
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
+                ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
+                ARMGenInstrInfo.inc ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
+                ARMGenDAGISel.inc ARMGenSubtarget.inc \
+                ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
+                ARMGenDecoderTables.inc ARMGenEDInfo.inc \
+                ARMGenFastISel.inc ARMGenMCCodeEmitter.inc
+
+DIRS = InstPrinter AsmParser Disassembler TargetInfo
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/ARM/NEONMoveFix.cpp b/final/lib/Target/ARM/NEONMoveFix.cpp
new file mode 100644
index 00000000000..965665c2821
--- /dev/null
+++ b/final/lib/Target/ARM/NEONMoveFix.cpp
@@ -0,0 +1,148 @@
+//===-- NEONMoveFix.cpp - Convert vfp reg-reg moves into neon ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "neon-mov-fix"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMInstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumVMovs, "Number of reg-reg moves converted");
+
+namespace {
+  struct NEONMoveFixPass : public MachineFunctionPass {
+    static char ID;
+    NEONMoveFixPass() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "NEON reg-reg move conversion";
+    }
+
+  private:
+    const TargetRegisterInfo *TRI;
+    const ARMBaseInstrInfo *TII;
+    bool isA8;
+
+    typedef DenseMap<unsigned, const MachineInstr*> RegMap;
+
+    bool InsertMoves(MachineBasicBlock &MBB);
+  };
+  char NEONMoveFixPass::ID = 0;
+}
+
+static bool inNEONDomain(unsigned Domain, bool isA8) {
+  return (Domain & ARMII::DomainNEON) ||
+    (isA8 && (Domain & ARMII::DomainNEONA8));
+}
+
+bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
+  RegMap Defs;
+  bool Modified = false;
+
+  // Walk over MBB tracking the def points of the registers.
+  MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+  MachineBasicBlock::iterator NextMII;
+  for (; MII != E; MII = NextMII) {
+    NextMII = llvm::next(MII);
+    MachineInstr *MI = &*MII;
+
+    if (MI->getOpcode() == ARM::VMOVD &&
+        !TII->isPredicated(MI)) {
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      // If we do not find an instruction defining the reg, this means the
+      // register should be live-in for this BB. It's always to better to use
+      // NEON reg-reg moves.
+      unsigned Domain = ARMII::DomainNEON;
+      RegMap::iterator DefMI = Defs.find(SrcReg);
+      if (DefMI != Defs.end()) {
+        Domain = DefMI->second->getDesc().TSFlags & ARMII::DomainMask;
+        // Instructions in general domain are subreg accesses.
+        // Map them to NEON reg-reg moves.
+        if (Domain == ARMII::DomainGeneral)
+          Domain = ARMII::DomainNEON;
+      }
+
+      if (inNEONDomain(Domain, isA8)) {
+        // Convert VMOVD to VMOVDneon
+        unsigned DestReg = MI->getOperand(0).getReg();
+
+        DEBUG({errs() << "vmov convert: "; MI->dump();});
+
+        // It's safe to ignore imp-defs / imp-uses here, since:
+        //  - We're running late, no intelligent condegen passes should be run
+        //    afterwards
+        //  - The imp-defs / imp-uses are superregs only, we don't care about
+        //    them.
+        AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(),
+                             TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg));
+        MBB.erase(MI);
+        MachineBasicBlock::iterator I = prior(NextMII);
+        MI = &*I;
+
+        DEBUG({errs() << "        into: "; MI->dump();});
+
+        Modified = true;
+        ++NumVMovs;
+      } else {
+        assert((Domain & ARMII::DomainVFP) && "Invalid domain!");
+        // Do nothing.
+      }
+    }
+
+    // Update def information.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand& MO = MI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      unsigned MOReg = MO.getReg();
+
+      Defs[MOReg] = MI;
+      // Catch aliases as well.
+      for (const unsigned *R = TRI->getAliasSet(MOReg); *R; ++R)
+        Defs[*R] = MI;
+    }
+  }
+
+  return Modified;
+}
+
+bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) {
+  ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>();
+  const TargetMachine &TM = Fn.getTarget();
+
+  if (AFI->isThumb1OnlyFunction())
+    return false;
+
+  TRI = TM.getRegisterInfo();
+  TII = static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo());
+  isA8 = TM.getSubtarget<ARMSubtarget>().isCortexA8();
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock &MBB = *MFI;
+    Modified |= InsertMoves(MBB);
+  }
+
+  return Modified;
+}
+
+/// createNEONMoveFixPass - Returns an instance of the NEON reg-reg moves fix
+/// pass.
+FunctionPass *llvm::createNEONMoveFixPass() {
+  return new NEONMoveFixPass();
+}
diff --git a/final/lib/Target/ARM/README-Thumb.txt b/final/lib/Target/ARM/README-Thumb.txt
new file mode 100644
index 00000000000..463c440852f
--- /dev/null
+++ b/final/lib/Target/ARM/README-Thumb.txt
@@ -0,0 +1,267 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the ARM backend (Thumb specific).
+//===---------------------------------------------------------------------===//
+
+* Add support for compiling functions in both ARM and Thumb mode, then taking
+  the smallest.
+
+* Add support for compiling individual basic blocks in thumb mode, when in a 
+  larger ARM function.  This can be used for presumed cold code, like paths
+  to abort (failure path of asserts), EH handling code, etc.
+
+* Thumb doesn't have normal pre/post increment addressing modes, but you can
+  load/store 32-bit integers with pre/postinc by using load/store multiple
+  instrs with a single register.
+
+* Make better use of high registers r8, r10, r11, r12 (ip). Some variants of add
+  and cmp instructions can use high registers. Also, we can use them as
+  temporaries to spill values into.
+
+* In thumb mode, short, byte, and bool preferred alignments are currently set
+  to 4 to accommodate ISA restriction (i.e. add sp, #imm, imm must be multiple
+  of 4).
+
+//===---------------------------------------------------------------------===//
+
+Potential jumptable improvements:
+
+* If we know function size is less than (1 << 16) * 2 bytes, we can use 16-bit
+  jumptable entries (e.g. (L1 - L2) >> 1). Or even smaller entries if the
+  function is even smaller. This also applies to ARM.
+
+* Thumb jumptable codegen can improve given some help from the assembler. This
+  is what we generate right now:
+
+	.set PCRELV0, (LJTI1_0_0-(LPCRELL0+4))
+LPCRELL0:
+	mov r1, #PCRELV0
+	add r1, pc
+	ldr r0, [r0, r1]
+	mov pc, r0 
+	.align	2
+LJTI1_0_0:
+	.long	 LBB1_3
+        ...
+
+Note there is another pc relative add that we can take advantage of.
+     add r1, pc, #imm_8 * 4
+
+We should be able to generate:
+
+LPCRELL0:
+	add r1, LJTI1_0_0
+	ldr r0, [r0, r1]
+	mov pc, r0 
+	.align	2
+LJTI1_0_0:
+	.long	 LBB1_3
+
+if the assembler can translate the add to:
+       add r1, pc, #((LJTI1_0_0-(LPCRELL0+4))&0xfffffffc)
+
+Note the assembler also does something similar to constpool load:
+LPCRELL0:
+     ldr r0, LCPI1_0
+=>
+     ldr r0, pc, #((LCPI1_0-(LPCRELL0+4))&0xfffffffc)
+
+
+//===---------------------------------------------------------------------===//
+
+We compile the following:
+
+define i16 @func_entry_2E_ce(i32 %i) {
+        switch i32 %i, label %bb12.exitStub [
+                 i32 0, label %bb4.exitStub
+                 i32 1, label %bb9.exitStub
+                 i32 2, label %bb4.exitStub
+                 i32 3, label %bb4.exitStub
+                 i32 7, label %bb9.exitStub
+                 i32 8, label %bb.exitStub
+                 i32 9, label %bb9.exitStub
+        ]
+
+bb12.exitStub:
+        ret i16 0
+
+bb4.exitStub:
+        ret i16 1
+
+bb9.exitStub:
+        ret i16 2
+
+bb.exitStub:
+        ret i16 3
+}
+
+into:
+
+_func_entry_2E_ce:
+        mov r2, #1
+        lsl r2, r0
+        cmp r0, #9
+        bhi LBB1_4      @bb12.exitStub
+LBB1_1: @newFuncRoot
+        mov r1, #13
+        tst r2, r1
+        bne LBB1_5      @bb4.exitStub
+LBB1_2: @newFuncRoot
+        ldr r1, LCPI1_0
+        tst r2, r1
+        bne LBB1_6      @bb9.exitStub
+LBB1_3: @newFuncRoot
+        mov r1, #1
+        lsl r1, r1, #8
+        tst r2, r1
+        bne LBB1_7      @bb.exitStub
+LBB1_4: @bb12.exitStub
+        mov r0, #0
+        bx lr
+LBB1_5: @bb4.exitStub
+        mov r0, #1
+        bx lr
+LBB1_6: @bb9.exitStub
+        mov r0, #2
+        bx lr
+LBB1_7: @bb.exitStub
+        mov r0, #3
+        bx lr
+LBB1_8:
+        .align  2
+LCPI1_0:
+        .long   642
+
+
+gcc compiles to:
+
+	cmp	r0, #9
+	@ lr needed for prologue
+	bhi	L2
+	ldr	r3, L11
+	mov	r2, #1
+	mov	r1, r2, asl r0
+	ands	r0, r3, r2, asl r0
+	movne	r0, #2
+	bxne	lr
+	tst	r1, #13
+	beq	L9
+L3:
+	mov	r0, r2
+	bx	lr
+L9:
+	tst	r1, #256
+	movne	r0, #3
+	bxne	lr
+L2:
+	mov	r0, #0
+	bx	lr
+L12:
+	.align 2
+L11:
+	.long	642
+        
+
+GCC is doing a couple of clever things here:
+  1. It is predicating one of the returns.  This isn't a clear win though: in
+     cases where that return isn't taken, it is replacing one condbranch with
+     two 'ne' predicated instructions.
+  2. It is sinking the shift of "1 << i" into the tst, and using ands instead of
+     tst.  This will probably require whole function isel.
+  3. GCC emits:
+  	tst	r1, #256
+     we emit:
+        mov r1, #1
+        lsl r1, r1, #8
+        tst r2, r1
+  
+
+//===---------------------------------------------------------------------===//
+
+When spilling in thumb mode and the sp offset is too large to fit in the ldr /
+str offset field, we load the offset from a constpool entry and add it to sp:
+
+ldr r2, LCPI
+add r2, sp
+ldr r2, [r2]
+
+These instructions preserve the condition code which is important if the spill
+is between a cmp and a bcc instruction. However, we can use the (potentially)
+cheaper sequnce if we know it's ok to clobber the condition register.
+
+add r2, sp, #255 * 4
+add r2, #132
+ldr r2, [r2, #7 * 4]
+
+This is especially bad when dynamic alloca is used. The all fixed size stack
+objects are referenced off the frame pointer with negative offsets. See
+oggenc for an example.
+
+
+//===---------------------------------------------------------------------===//
+
+Poor codegen test/CodeGen/ARM/select.ll f7:
+
+	ldr r5, LCPI1_0
+LPC0:
+	add r5, pc
+	ldr r6, LCPI1_1
+	ldr r2, LCPI1_2
+	mov r3, r6
+	mov lr, pc
+	bx r5
+
+//===---------------------------------------------------------------------===//
+
+Make register allocator / spiller smarter so we can re-materialize "mov r, imm",
+etc. Almost all Thumb instructions clobber condition code.
+
+//===---------------------------------------------------------------------===//
+
+Add ldmia, stmia support.
+
+//===---------------------------------------------------------------------===//
+
+Thumb load / store address mode offsets are scaled. The values kept in the
+instruction operands are pre-scale values. This probably ought to be changed
+to avoid extra work when we convert Thumb2 instructions to Thumb1 instructions.
+
+//===---------------------------------------------------------------------===//
+
+We need to make (some of the) Thumb1 instructions predicable. That will allow
+shrinking of predicated Thumb2 instructions. To allow this, we need to be able
+to toggle the 's' bit since they do not set CPSR when they are inside IT blocks.
+
+//===---------------------------------------------------------------------===//
+
+Make use of hi register variants of cmp: tCMPhir / tCMPZhir.
+
+//===---------------------------------------------------------------------===//
+
+Thumb1 immediate field sometimes keep pre-scaled values. See
+Thumb1RegisterInfo::eliminateFrameIndex. This is inconsistent from ARM and
+Thumb2.
+
+//===---------------------------------------------------------------------===//
+
+Rather than having tBR_JTr print a ".align 2" and constant island pass pad it,
+add a target specific ALIGN instruction instead. That way, GetInstSizeInBytes
+won't have to over-estimate. It can also be used for loop alignment pass.
+
+//===---------------------------------------------------------------------===//
+
+We generate conditional code for icmp when we don't need to. This code:
+
+  int foo(int s) {
+    return s == 1;
+  }
+
+produces:
+
+foo:
+        cmp     r0, #1
+        mov.w   r0, #0
+        it      eq
+        moveq   r0, #1
+        bx      lr
+
+when it could use subs + adcs. This is GCC PR46975.
diff --git a/final/lib/Target/ARM/README-Thumb2.txt b/final/lib/Target/ARM/README-Thumb2.txt
new file mode 100644
index 00000000000..e7c2552d9e4
--- /dev/null
+++ b/final/lib/Target/ARM/README-Thumb2.txt
@@ -0,0 +1,6 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the ARM backend (Thumb2 specific).
+//===---------------------------------------------------------------------===//
+
+Make sure jumptable destinations are below the jumptable in order to make use
+of tbb / tbh.
diff --git a/final/lib/Target/ARM/README.txt b/final/lib/Target/ARM/README.txt
new file mode 100644
index 00000000000..8ba9a27e95c
--- /dev/null
+++ b/final/lib/Target/ARM/README.txt
@@ -0,0 +1,683 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the ARM backend.
+//===---------------------------------------------------------------------===//
+
+Reimplement 'select' in terms of 'SEL'.
+
+* We would really like to support UXTAB16, but we need to prove that the
+  add doesn't need to overflow between the two 16-bit chunks.
+
+* Implement pre/post increment support.  (e.g. PR935)
+* Implement smarter constant generation for binops with large immediates.
+
+A few ARMv6T2 ops should be pattern matched: BFI, SBFX, and UBFX
+
+Interesting optimization for PIC codegen on arm-linux:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43129
+
+//===---------------------------------------------------------------------===//
+
+Crazy idea:  Consider code that uses lots of 8-bit or 16-bit values.  By the
+time regalloc happens, these values are now in a 32-bit register, usually with
+the top-bits known to be sign or zero extended.  If spilled, we should be able
+to spill these to a 8-bit or 16-bit stack slot, zero or sign extending as part
+of the reload.
+
+Doing this reduces the size of the stack frame (important for thumb etc), and
+also increases the likelihood that we will be able to reload multiple values
+from the stack with a single load.
+
+//===---------------------------------------------------------------------===//
+
+The constant island pass is in good shape.  Some cleanups might be desirable,
+but there is unlikely to be much improvement in the generated code.
+
+1.  There may be some advantage to trying to be smarter about the initial
+placement, rather than putting everything at the end.
+
+2.  There might be some compile-time efficiency to be had by representing
+consecutive islands as a single block rather than multiple blocks.
+
+3.  Use a priority queue to sort constant pool users in inverse order of
+    position so we always process the one closed to the end of functions
+    first. This may simply CreateNewWater.
+
+//===---------------------------------------------------------------------===//
+
+Eliminate copysign custom expansion. We are still generating crappy code with
+default expansion + if-conversion.
+
+//===---------------------------------------------------------------------===//
+
+Eliminate one instruction from:
+
+define i32 @_Z6slow4bii(i32 %x, i32 %y) {
+        %tmp = icmp sgt i32 %x, %y
+        %retval = select i1 %tmp, i32 %x, i32 %y
+        ret i32 %retval
+}
+
+__Z6slow4bii:
+        cmp r0, r1
+        movgt r1, r0
+        mov r0, r1
+        bx lr
+=>
+
+__Z6slow4bii:
+        cmp r0, r1
+        movle r0, r1
+        bx lr
+
+//===---------------------------------------------------------------------===//
+
+Implement long long "X-3" with instructions that fold the immediate in.  These
+were disabled due to badness with the ARM carry flag on subtracts.
+
+//===---------------------------------------------------------------------===//
+
+More load / store optimizations:
+1) Better representation for block transfer? This is from Olden/power:
+
+	fldd d0, [r4]
+	fstd d0, [r4, #+32]
+	fldd d0, [r4, #+8]
+	fstd d0, [r4, #+40]
+	fldd d0, [r4, #+16]
+	fstd d0, [r4, #+48]
+	fldd d0, [r4, #+24]
+	fstd d0, [r4, #+56]
+
+If we can spare the registers, it would be better to use fldm and fstm here.
+Need major register allocator enhancement though.
+
+2) Can we recognize the relative position of constantpool entries? i.e. Treat
+
+	ldr r0, LCPI17_3
+	ldr r1, LCPI17_4
+	ldr r2, LCPI17_5
+
+   as
+	ldr r0, LCPI17
+	ldr r1, LCPI17+4
+	ldr r2, LCPI17+8
+
+   Then the ldr's can be combined into a single ldm. See Olden/power.
+
+Note for ARM v4 gcc uses ldmia to load a pair of 32-bit values to represent a
+double 64-bit FP constant:
+
+	adr	r0, L6
+	ldmia	r0, {r0-r1}
+
+	.align 2
+L6:
+	.long	-858993459
+	.long	1074318540
+
+3) struct copies appear to be done field by field 
+instead of by words, at least sometimes:
+
+struct foo { int x; short s; char c1; char c2; };
+void cpy(struct foo*a, struct foo*b) { *a = *b; }
+
+llvm code (-O2)
+        ldrb r3, [r1, #+6]
+        ldr r2, [r1]
+        ldrb r12, [r1, #+7]
+        ldrh r1, [r1, #+4]
+        str r2, [r0]
+        strh r1, [r0, #+4]
+        strb r3, [r0, #+6]
+        strb r12, [r0, #+7]
+gcc code (-O2)
+        ldmia   r1, {r1-r2}
+        stmia   r0, {r1-r2}
+
+In this benchmark poor handling of aggregate copies has shown up as
+having a large effect on size, and possibly speed as well (we don't have
+a good way to measure on ARM).
+
+//===---------------------------------------------------------------------===//
+
+* Consider this silly example:
+
+double bar(double x) {  
+  double r = foo(3.1);
+  return x+r;
+}
+
+_bar:
+        stmfd sp!, {r4, r5, r7, lr}
+        add r7, sp, #8
+        mov r4, r0
+        mov r5, r1
+        fldd d0, LCPI1_0
+        fmrrd r0, r1, d0
+        bl _foo
+        fmdrr d0, r4, r5
+        fmsr s2, r0
+        fsitod d1, s2
+        faddd d0, d1, d0
+        fmrrd r0, r1, d0
+        ldmfd sp!, {r4, r5, r7, pc}
+
+Ignore the prologue and epilogue stuff for a second. Note 
+	mov r4, r0
+	mov r5, r1
+the copys to callee-save registers and the fact they are only being used by the
+fmdrr instruction. It would have been better had the fmdrr been scheduled
+before the call and place the result in a callee-save DPR register. The two
+mov ops would not have been necessary.
+
+//===---------------------------------------------------------------------===//
+
+Calling convention related stuff:
+
+* gcc's parameter passing implementation is terrible and we suffer as a result:
+
+e.g.
+struct s {
+  double d1;
+  int s1;
+};
+
+void foo(struct s S) {
+  printf("%g, %d\n", S.d1, S.s1);
+}
+
+'S' is passed via registers r0, r1, r2. But gcc stores them to the stack, and
+then reload them to r1, r2, and r3 before issuing the call (r0 contains the
+address of the format string):
+
+	stmfd	sp!, {r7, lr}
+	add	r7, sp, #0
+	sub	sp, sp, #12
+	stmia	sp, {r0, r1, r2}
+	ldmia	sp, {r1-r2}
+	ldr	r0, L5
+	ldr	r3, [sp, #8]
+L2:
+	add	r0, pc, r0
+	bl	L_printf$stub
+
+Instead of a stmia, ldmia, and a ldr, wouldn't it be better to do three moves?
+
+* Return an aggregate type is even worse:
+
+e.g.
+struct s foo(void) {
+  struct s S = {1.1, 2};
+  return S;
+}
+
+	mov	ip, r0
+	ldr	r0, L5
+	sub	sp, sp, #12
+L2:
+	add	r0, pc, r0
+	@ lr needed for prologue
+	ldmia	r0, {r0, r1, r2}
+	stmia	sp, {r0, r1, r2}
+	stmia	ip, {r0, r1, r2}
+	mov	r0, ip
+	add	sp, sp, #12
+	bx	lr
+
+r0 (and later ip) is the hidden parameter from caller to store the value in. The
+first ldmia loads the constants into r0, r1, r2. The last stmia stores r0, r1,
+r2 into the address passed in. However, there is one additional stmia that
+stores r0, r1, and r2 to some stack location. The store is dead.
+
+The llvm-gcc generated code looks like this:
+
+csretcc void %foo(%struct.s* %agg.result) {
+entry:
+	%S = alloca %struct.s, align 4		; <%struct.s*> [#uses=1]
+	%memtmp = alloca %struct.s		; <%struct.s*> [#uses=1]
+	cast %struct.s* %S to sbyte*		; <sbyte*>:0 [#uses=2]
+	call void %llvm.memcpy.i32( sbyte* %0, sbyte* cast ({ double, int }* %C.0.904 to sbyte*), uint 12, uint 4 )
+	cast %struct.s* %agg.result to sbyte*		; <sbyte*>:1 [#uses=2]
+	call void %llvm.memcpy.i32( sbyte* %1, sbyte* %0, uint 12, uint 0 )
+	cast %struct.s* %memtmp to sbyte*		; <sbyte*>:2 [#uses=1]
+	call void %llvm.memcpy.i32( sbyte* %2, sbyte* %1, uint 12, uint 0 )
+	ret void
+}
+
+llc ends up issuing two memcpy's (the first memcpy becomes 3 loads from
+constantpool). Perhaps we should 1) fix llvm-gcc so the memcpy is translated
+into a number of load and stores, or 2) custom lower memcpy (of small size) to
+be ldmia / stmia. I think option 2 is better but the current register
+allocator cannot allocate a chunk of registers at a time.
+
+A feasible temporary solution is to use specific physical registers at the
+lowering time for small (<= 4 words?) transfer size.
+
+* ARM CSRet calling convention requires the hidden argument to be returned by
+the callee.
+
+//===---------------------------------------------------------------------===//
+
+We can definitely do a better job on BB placements to eliminate some branches.
+It's very common to see llvm generated assembly code that looks like this:
+
+LBB3:
+ ...
+LBB4:
+...
+  beq LBB3
+  b LBB2
+
+If BB4 is the only predecessor of BB3, then we can emit BB3 after BB4. We can
+then eliminate beq and and turn the unconditional branch to LBB2 to a bne.
+
+See McCat/18-imp/ComputeBoundingBoxes for an example.
+
+//===---------------------------------------------------------------------===//
+
+Pre-/post- indexed load / stores:
+
+1) We should not make the pre/post- indexed load/store transform if the base ptr
+is guaranteed to be live beyond the load/store. This can happen if the base
+ptr is live out of the block we are performing the optimization. e.g.
+
+mov r1, r2
+ldr r3, [r1], #4
+...
+
+vs.
+
+ldr r3, [r2]
+add r1, r2, #4
+...
+
+In most cases, this is just a wasted optimization. However, sometimes it can
+negatively impact the performance because two-address code is more restrictive
+when it comes to scheduling.
+
+Unfortunately, liveout information is currently unavailable during DAG combine
+time.
+
+2) Consider spliting a indexed load / store into a pair of add/sub + load/store
+   to solve #1 (in TwoAddressInstructionPass.cpp).
+
+3) Enhance LSR to generate more opportunities for indexed ops.
+
+4) Once we added support for multiple result patterns, write indexed loads
+   patterns instead of C++ instruction selection code.
+
+5) Use VLDM / VSTM to emulate indexed FP load / store.
+
+//===---------------------------------------------------------------------===//
+
+Implement support for some more tricky ways to materialize immediates.  For
+example, to get 0xffff8000, we can use:
+
+mov r9, #&3f8000
+sub r9, r9, #&400000
+
+//===---------------------------------------------------------------------===//
+
+We sometimes generate multiple add / sub instructions to update sp in prologue
+and epilogue if the inc / dec value is too large to fit in a single immediate
+operand. In some cases, perhaps it might be better to load the value from a
+constantpool instead.
+
+//===---------------------------------------------------------------------===//
+
+GCC generates significantly better code for this function.
+
+int foo(int StackPtr, unsigned char *Line, unsigned char *Stack, int LineLen) {
+    int i = 0;
+
+    if (StackPtr != 0) {
+       while (StackPtr != 0 && i < (((LineLen) < (32768))? (LineLen) : (32768)))
+          Line[i++] = Stack[--StackPtr];
+        if (LineLen > 32768)
+        {
+            while (StackPtr != 0 && i < LineLen)
+            {
+                i++;
+                --StackPtr;
+            }
+        }
+    }
+    return StackPtr;
+}
+
+//===---------------------------------------------------------------------===//
+
+This should compile to the mlas instruction:
+int mlas(int x, int y, int z) { return ((x * y + z) < 0) ? 7 : 13; }
+
+//===---------------------------------------------------------------------===//
+
+At some point, we should triage these to see if they still apply to us:
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19598
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=18560
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=27016
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11831
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11826
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11825
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11824
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11823
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11820
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10982
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10242
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9831
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9760
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9759
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9703
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9702
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=9663
+
+http://www.inf.u-szeged.hu/gcc-arm/
+http://citeseer.ist.psu.edu/debus04linktime.html
+
+//===---------------------------------------------------------------------===//
+
+gcc generates smaller code for this function at -O2 or -Os:
+
+void foo(signed char* p) {
+  if (*p == 3)
+     bar();
+   else if (*p == 4)
+    baz();
+  else if (*p == 5)
+    quux();
+}
+
+llvm decides it's a good idea to turn the repeated if...else into a
+binary tree, as if it were a switch; the resulting code requires -1 
+compare-and-branches when *p<=2 or *p==5, the same number if *p==4
+or *p>6, and +1 if *p==3.  So it should be a speed win
+(on balance).  However, the revised code is larger, with 4 conditional 
+branches instead of 3.
+
+More seriously, there is a byte->word extend before
+each comparison, where there should be only one, and the condition codes
+are not remembered when the same two values are compared twice.
+
+//===---------------------------------------------------------------------===//
+
+More LSR enhancements possible:
+
+1. Teach LSR about pre- and post- indexed ops to allow iv increment be merged
+   in a load / store.
+2. Allow iv reuse even when a type conversion is required. For example, i8
+   and i32 load / store addressing modes are identical.
+
+
+//===---------------------------------------------------------------------===//
+
+This:
+
+int foo(int a, int b, int c, int d) {
+  long long acc = (long long)a * (long long)b;
+  acc += (long long)c * (long long)d;
+  return (int)(acc >> 32);
+}
+
+Should compile to use SMLAL (Signed Multiply Accumulate Long) which multiplies 
+two signed 32-bit values to produce a 64-bit value, and accumulates this with 
+a 64-bit value.
+
+We currently get this with both v4 and v6:
+
+_foo:
+        smull r1, r0, r1, r0
+        smull r3, r2, r3, r2
+        adds r3, r3, r1
+        adc r0, r2, r0
+        bx lr
+
+//===---------------------------------------------------------------------===//
+
+This:
+        #include <algorithm>
+        std::pair<unsigned, bool> full_add(unsigned a, unsigned b)
+        { return std::make_pair(a + b, a + b < a); }
+        bool no_overflow(unsigned a, unsigned b)
+        { return !full_add(a, b).second; }
+
+Should compile to:
+
+_Z8full_addjj:
+	adds	r2, r1, r2
+	movcc	r1, #0
+	movcs	r1, #1
+	str	r2, [r0, #0]
+	strb	r1, [r0, #4]
+	mov	pc, lr
+
+_Z11no_overflowjj:
+	cmn	r0, r1
+	movcs	r0, #0
+	movcc	r0, #1
+	mov	pc, lr
+
+not:
+
+__Z8full_addjj:
+        add r3, r2, r1
+        str r3, [r0]
+        mov r2, #1
+        mov r12, #0
+        cmp r3, r1
+        movlo r12, r2
+        str r12, [r0, #+4]
+        bx lr
+__Z11no_overflowjj:
+        add r3, r1, r0
+        mov r2, #1
+        mov r1, #0
+        cmp r3, r0
+        movhs r1, r2
+        mov r0, r1
+        bx lr
+
+//===---------------------------------------------------------------------===//
+
+Some of the NEON intrinsics may be appropriate for more general use, either
+as target-independent intrinsics or perhaps elsewhere in the ARM backend.
+Some of them may also be lowered to target-independent SDNodes, and perhaps
+some new SDNodes could be added.
+
+For example, maximum, minimum, and absolute value operations are well-defined
+and standard operations, both for vector and scalar types.
+
+The current NEON-specific intrinsics for count leading zeros and count one
+bits could perhaps be replaced by the target-independent ctlz and ctpop
+intrinsics.  It may also make sense to add a target-independent "ctls"
+intrinsic for "count leading sign bits".  Likewise, the backend could use
+the target-independent SDNodes for these operations.
+
+ARMv6 has scalar saturating and halving adds and subtracts.  The same
+intrinsics could possibly be used for both NEON's vector implementations of
+those operations and the ARMv6 scalar versions.
+
+//===---------------------------------------------------------------------===//
+
+ARM::MOVCCr is commutable (by flipping the condition). But we need to implement
+ARMInstrInfo::commuteInstruction() to support it.
+
+//===---------------------------------------------------------------------===//
+
+Split out LDR (literal) from normal ARM LDR instruction. Also consider spliting
+LDR into imm12 and so_reg forms. This allows us to clean up some code. e.g.
+ARMLoadStoreOptimizer does not need to look at LDR (literal) and LDR (so_reg)
+while ARMConstantIslandPass only need to worry about LDR (literal).
+
+//===---------------------------------------------------------------------===//
+
+Constant island pass should make use of full range SoImm values for LEApcrel.
+Be careful though as the last attempt caused infinite looping on lencod.
+
+//===---------------------------------------------------------------------===//
+
+Predication issue. This function:   
+
+extern unsigned array[ 128 ];
+int     foo( int x ) {
+  int     y;
+  y = array[ x & 127 ];
+  if ( x & 128 )
+     y = 123456789 & ( y >> 2 );
+  else
+     y = 123456789 & y;
+  return y;
+}
+
+compiles to:
+
+_foo:
+	and r1, r0, #127
+	ldr r2, LCPI1_0
+	ldr r2, [r2]
+	ldr r1, [r2, +r1, lsl #2]
+	mov r2, r1, lsr #2
+	tst r0, #128
+	moveq r2, r1
+	ldr r0, LCPI1_1
+	and r0, r2, r0
+	bx lr
+
+It would be better to do something like this, to fold the shift into the
+conditional move:
+
+	and r1, r0, #127
+	ldr r2, LCPI1_0
+	ldr r2, [r2]
+	ldr r1, [r2, +r1, lsl #2]
+	tst r0, #128
+	movne r1, r1, lsr #2
+	ldr r0, LCPI1_1
+	and r0, r1, r0
+	bx lr
+
+it saves an instruction and a register.
+
+//===---------------------------------------------------------------------===//
+
+It might be profitable to cse MOVi16 if there are lots of 32-bit immediates
+with the same bottom half.
+
+//===---------------------------------------------------------------------===//
+
+Robert Muth started working on an alternate jump table implementation that
+does not put the tables in-line in the text.  This is more like the llvm
+default jump table implementation.  This might be useful sometime.  Several
+revisions of patches are on the mailing list, beginning at:
+http://lists.cs.uiuc.edu/pipermail/llvmdev/2009-June/022763.html
+
+//===---------------------------------------------------------------------===//
+
+Make use of the "rbit" instruction.
+
+//===---------------------------------------------------------------------===//
+
+Take a look at test/CodeGen/Thumb2/machine-licm.ll. ARM should be taught how
+to licm and cse the unnecessary load from cp#1.
+
+//===---------------------------------------------------------------------===//
+
+The CMN instruction sets the flags like an ADD instruction, while CMP sets
+them like a subtract. Therefore to be able to use CMN for comparisons other
+than the Z bit, we'll need additional logic to reverse the conditionals
+associated with the comparison. Perhaps a pseudo-instruction for the comparison,
+with a post-codegen pass to clean up and handle the condition codes?
+See PR5694 for testcase.
+
+//===---------------------------------------------------------------------===//
+
+Given the following on armv5:
+int test1(int A, int B) {
+  return (A&-8388481)|(B&8388480);
+}
+
+We currently generate:
+	ldr	r2, .LCPI0_0
+	and	r0, r0, r2
+	ldr	r2, .LCPI0_1
+	and	r1, r1, r2
+	orr	r0, r1, r0
+	bx	lr
+
+We should be able to replace the second ldr+and with a bic (i.e. reuse the
+constant which was already loaded).  Not sure what's necessary to do that.
+
+//===---------------------------------------------------------------------===//
+
+The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal:
+
+int a(int x) { return __builtin_bswap32(x); }
+
+a:
+	mov	r1, #255, 24
+	mov	r2, #255, 16
+	and	r1, r1, r0, lsr #8
+	and	r2, r2, r0, lsl #8
+	orr	r1, r1, r0, lsr #24
+	orr	r0, r2, r0, lsl #24
+	orr	r0, r0, r1
+	bx	lr
+
+Something like the following would be better (fewer instructions/registers):
+	eor     r1, r0, r0, ror #16
+	bic     r1, r1, #0xff0000
+	mov     r1, r1, lsr #8
+	eor     r0, r1, r0, ror #8
+	bx	lr
+
+A custom Thumb version would also be a slight improvement over the generic
+version.
+
+//===---------------------------------------------------------------------===//
+
+Consider the following simple C code:
+
+void foo(unsigned char *a, unsigned char *b, int *c) {
+ if ((*a | *b) == 0) *c = 0;
+}
+
+currently llvm-gcc generates something like this (nice branchless code I'd say):
+
+       ldrb    r0, [r0]
+       ldrb    r1, [r1]
+       orr     r0, r1, r0
+       tst     r0, #255
+       moveq   r0, #0
+       streq   r0, [r2]
+       bx      lr
+
+Note that both "tst" and "moveq" are redundant.
+
+//===---------------------------------------------------------------------===//
+
+When loading immediate constants with movt/movw, if there are multiple
+constants needed with the same low 16 bits, and those values are not live at
+the same time, it would be possible to use a single movw instruction, followed
+by multiple movt instructions to rewrite the high bits to different values.
+For example:
+
+  volatile store i32 -1, i32* inttoptr (i32 1342210076 to i32*), align 4,
+  !tbaa
+!0
+  volatile store i32 -1, i32* inttoptr (i32 1342341148 to i32*), align 4,
+  !tbaa
+!0
+
+is compiled and optimized to:
+
+    movw    r0, #32796
+    mov.w    r1, #-1
+    movt    r0, #20480
+    str    r1, [r0]
+    movw    r0, #32796    @ <= this MOVW is not needed, value is there already
+    movt    r0, #20482
+    str    r1, [r0]
+
+//===---------------------------------------------------------------------===//
diff --git a/final/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/final/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
new file mode 100644
index 00000000000..163a0a98758
--- /dev/null
+++ b/final/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- ARMTargetInfo.cpp - ARM Target Implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheARMTarget, llvm::TheThumbTarget;
+
+extern "C" void LLVMInitializeARMTargetInfo() { 
+  RegisterTarget<Triple::arm, /*HasJIT=*/true>
+    X(TheARMTarget, "arm", "ARM");
+
+  RegisterTarget<Triple::thumb, /*HasJIT=*/true>
+    Y(TheThumbTarget, "thumb", "Thumb");
+}
diff --git a/final/lib/Target/ARM/TargetInfo/CMakeLists.txt b/final/lib/Target/ARM/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..3910bb02e21
--- /dev/null
+++ b/final/lib/Target/ARM/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMARMInfo
+  ARMTargetInfo.cpp
+  )
+
+add_dependencies(LLVMARMInfo ARMCodeGenTable_gen)
diff --git a/final/lib/Target/ARM/TargetInfo/Makefile b/final/lib/Target/ARM/TargetInfo/Makefile
new file mode 100644
index 00000000000..6292ab14b34
--- /dev/null
+++ b/final/lib/Target/ARM/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/TargetInfo/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/ARM/Thumb1FrameLowering.cpp b/final/lib/Target/ARM/Thumb1FrameLowering.cpp
new file mode 100644
index 00000000000..dee3d278203
--- /dev/null
+++ b/final/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -0,0 +1,358 @@
+//======- Thumb1FrameLowering.cpp - Thumb1 Frame Information ---*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb1 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb1FrameLowering.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  unsigned CFSize = FFI->getMaxCallFrameSize();
+  // It's not always a good idea to include the call frame as part of the
+  // stack frame. ARM (especially Thumb) has small immediate offset to
+  // address the stack frame. So a large call frame can cause poor codegen
+  // and may even makes it impossible to scavenge a register.
+  if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
+    return false;
+
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+static void
+emitSPUpdate(MachineBasicBlock &MBB,
+             MachineBasicBlock::iterator &MBBI,
+             const TargetInstrInfo &TII, DebugLoc dl,
+             const Thumb1RegisterInfo &MRI,
+             int NumBytes, unsigned MIFlags = MachineInstr::NoFlags)  {
+  emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
+                            MRI, MIFlags);
+}
+
+void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo  *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const Thumb1RegisterInfo *RegInfo =
+    static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const Thumb1InstrInfo &TII =
+    *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  unsigned NumBytes = MFI->getStackSize();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+  unsigned BasePtr = RegInfo->getBaseRegister();
+
+  // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
+  NumBytes = (NumBytes + 3) & ~3;
+  MFI->setStackSize(NumBytes);
+
+  // Determine the sizes of each callee-save spill areas and record which frame
+  // belongs to which callee-save spill areas.
+  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
+  int FramePtrSpillFI = 0;
+
+  if (VARegSaveSize)
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize,
+                 MachineInstr::FrameSetup);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+                   MachineInstr::FrameSetup);
+    return;
+  }
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    int FI = CSI[i].getFrameIdx();
+    switch (Reg) {
+    case ARM::R4:
+    case ARM::R5:
+    case ARM::R6:
+    case ARM::R7:
+    case ARM::LR:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      AFI->addGPRCalleeSavedArea1Frame(FI);
+      GPRCS1Size += 4;
+      break;
+    case ARM::R8:
+    case ARM::R9:
+    case ARM::R10:
+    case ARM::R11:
+      if (Reg == FramePtr)
+        FramePtrSpillFI = FI;
+      if (STI.isTargetDarwin()) {
+        AFI->addGPRCalleeSavedArea2Frame(FI);
+        GPRCS2Size += 4;
+      } else {
+        AFI->addGPRCalleeSavedArea1Frame(FI);
+        GPRCS1Size += 4;
+      }
+      break;
+    default:
+      AFI->addDPRCalleeSavedAreaFrame(FI);
+      DPRCSSize += 8;
+    }
+  }
+
+  if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+    ++MBBI;
+    if (MBBI != MBB.end())
+      dl = MBBI->getDebugLoc();
+  }
+
+  // Determine starting offsets of spill areas.
+  unsigned DPRCSOffset  = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize);
+  unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize;
+  unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
+  AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes);
+  AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
+  AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
+  AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
+  NumBytes = DPRCSOffset;
+
+  // Adjust FP so it point to the stack slot that contains the previous FP.
+  if (hasFP(MF)) {
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
+      .addFrameIndex(FramePtrSpillFI).addImm(0)
+      .setMIFlags(MachineInstr::FrameSetup);
+    if (NumBytes > 7)
+      // If offset is > 7 then sp cannot be adjusted in a single instruction,
+      // try restoring from fp instead.
+      AFI->setShouldRestoreSPFromFP(true);
+  }
+
+  if (NumBytes)
+    // Insert it after all the callee-save spills.
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
+                 MachineInstr::FrameSetup);
+
+  if (STI.isTargetELF() && hasFP(MF))
+    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
+                             AFI->getFramePtrSpillOffset());
+
+  AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
+  AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
+
+  // If we need a base pointer, set it up here. It's whatever the value
+  // of the stack pointer is at this point. Any variable size objects
+  // will be allocated after this, so we can still use the base pointer
+  // to reference locals.
+  if (RegInfo->hasBasePointer(MF))
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP);
+
+  // If the frame has variable sized objects then the epilogue must restore
+  // the sp from fp. We can assume there's an FP here since hasFP already
+  // checks for hasVarSizedObjects.
+  if (MFI->hasVarSizedObjects())
+    AFI->setShouldRestoreSPFromFP(true);
+}
+
+static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
+  for (unsigned i = 0; CSRegs[i]; ++i)
+    if (Reg == CSRegs[i])
+      return true;
+  return false;
+}
+
+static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
+  if (MI->getOpcode() == ARM::tRestore &&
+      MI->getOperand(1).isFI() &&
+      isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
+    return true;
+  else if (MI->getOpcode() == ARM::tPOP) {
+    // The first two operands are predicates. The last two are
+    // imp-def and imp-use of SP. Check everything in between.
+    for (int i = 2, e = MI->getNumOperands() - 2; i != e; ++i)
+      if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs))
+        return false;
+    return true;
+  }
+  return false;
+}
+
+void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
+                                   MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  assert((MBBI->getOpcode() == ARM::tBX_RET ||
+          MBBI->getOpcode() == ARM::tPOP_RET) &&
+         "Can only insert epilog into returning blocks");
+  DebugLoc dl = MBBI->getDebugLoc();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const Thumb1RegisterInfo *RegInfo =
+    static_cast<const Thumb1RegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const Thumb1InstrInfo &TII =
+    *static_cast<const Thumb1InstrInfo*>(MF.getTarget().getInstrInfo());
+
+  unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
+  int NumBytes = (int)MFI->getStackSize();
+  const unsigned *CSRegs = RegInfo->getCalleeSavedRegs();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+
+  if (!AFI->hasStackFrame()) {
+    if (NumBytes != 0)
+      emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+  } else {
+    // Unwind MBBI to point to first LDR / VLDRD.
+    if (MBBI != MBB.begin()) {
+      do
+        --MBBI;
+      while (MBBI != MBB.begin() && isCSRestore(MBBI, CSRegs));
+      if (!isCSRestore(MBBI, CSRegs))
+        ++MBBI;
+    }
+
+    // Move SP to start of FP callee save spill area.
+    NumBytes -= (AFI->getGPRCalleeSavedArea1Size() +
+                 AFI->getGPRCalleeSavedArea2Size() +
+                 AFI->getDPRCalleeSavedAreaSize());
+
+    if (AFI->shouldRestoreSPFromFP()) {
+      NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
+      // Reset SP based on frame pointer only if the stack frame extends beyond
+      // frame pointer stack slot, the target is ELF and the function has FP, or
+      // the target uses var sized objects.
+      if (NumBytes) {
+        assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) &&
+               "No scratch register to restore SP from FP!");
+        emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
+                                  TII, *RegInfo);
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
+          .addReg(ARM::R4);
+      } else
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
+          .addReg(FramePtr);
+    } else {
+      if (MBBI->getOpcode() == ARM::tBX_RET &&
+          &MBB.front() != MBBI &&
+          prior(MBBI)->getOpcode() == ARM::tPOP) {
+        MachineBasicBlock::iterator PMBBI = prior(MBBI);
+        emitSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes);
+      } else
+        emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes);
+    }
+  }
+
+  if (VARegSaveSize) {
+    // Unlike T2 and ARM mode, the T1 pop instruction cannot restore
+    // to LR, and we can't pop the value directly to the PC since
+    // we need to update the SP after popping the value. Therefore, we
+    // pop the old LR into R3 as a temporary.
+
+    // Move back past the callee-saved register restoration
+    while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs))
+      ++MBBI;
+    // Epilogue for vararg functions: pop LR to R3 and branch off it.
+    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
+      .addReg(ARM::R3, RegState::Define);
+
+    emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
+
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
+      .addReg(ARM::R3, RegState::Kill);
+    // erase the old tBX_RET instruction
+    MBB.erase(MBBI);
+  }
+}
+
+bool Thumb1FrameLowering::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI,
+                          const std::vector<CalleeSavedInfo> &CSI,
+                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL;
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
+  AddDefaultPred(MIB);
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    bool isKill = true;
+
+    // Add the callee-saved register as live-in unless it's LR and
+    // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
+    // then it's already added to the function and entry block live-in sets.
+    if (Reg == ARM::LR) {
+      MachineFunction &MF = *MBB.getParent();
+      if (MF.getFrameInfo()->isReturnAddressTaken() &&
+          MF.getRegInfo().isLiveIn(Reg))
+        isKill = false;
+    }
+
+    if (isKill)
+      MBB.addLiveIn(Reg);
+
+    MIB.addReg(Reg, getKillRegState(isKill));
+  }
+  MIB.setMIFlags(MachineInstr::FrameSetup);
+  return true;
+}
+
+bool Thumb1FrameLowering::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            const std::vector<CalleeSavedInfo> &CSI,
+                            const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+  DebugLoc DL = MI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
+  AddDefaultPred(MIB);
+
+  bool NumRegs = false;
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    if (Reg == ARM::LR) {
+      // Special epilogue for vararg functions. See emitEpilogue
+      if (isVarArg)
+        continue;
+      Reg = ARM::PC;
+      (*MIB).setDesc(TII.get(ARM::tPOP_RET));
+      MI = MBB.erase(MI);
+    }
+    MIB.addReg(Reg, getDefRegState(true));
+    NumRegs = true;
+  }
+
+  // It's illegal to emit pop instruction without operands.
+  if (NumRegs)
+    MBB.insert(MI, &*MIB);
+  else
+    MF.DeleteMachineInstr(MIB);
+
+  return true;
+}
diff --git a/final/lib/Target/ARM/Thumb1FrameLowering.h b/final/lib/Target/ARM/Thumb1FrameLowering.h
new file mode 100644
index 00000000000..c592e125de1
--- /dev/null
+++ b/final/lib/Target/ARM/Thumb1FrameLowering.h
@@ -0,0 +1,52 @@
+//===-- Thumb1FrameLowering.h - Thumb1-specific frame info stuff --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __THUMB_FRAMEINFO_H_
+#define __THUMM_FRAMEINFO_H_
+
+#include "ARM.h"
+#include "ARMFrameLowering.h"
+#include "ARMSubtarget.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+class Thumb1FrameLowering : public ARMFrameLowering {
+public:
+  explicit Thumb1FrameLowering(const ARMSubtarget &sti)
+    : ARMFrameLowering(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
+  bool hasReservedCallFrame(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/ARM/Thumb1InstrInfo.cpp b/final/lib/Target/ARM/Thumb1InstrInfo.cpp
new file mode 100644
index 00000000000..3fbb43340c3
--- /dev/null
+++ b/final/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -0,0 +1,111 @@
+//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb1InstrInfo.h"
+#include "ARM.h"
+#include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/ADT/SmallVector.h"
+#include "Thumb1InstrInfo.h"
+
+using namespace llvm;
+
+Thumb1InstrInfo::Thumb1InstrInfo(const ARMSubtarget &STI)
+  : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
+  return 0;
+}
+
+void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator I, DebugLoc DL,
+                                  unsigned DestReg, unsigned SrcReg,
+                                  bool KillSrc) const {
+  bool tDest = ARM::tGPRRegClass.contains(DestReg);
+  bool tSrc  = ARM::tGPRRegClass.contains(SrcReg);
+  unsigned Opc = ARM::tMOVgpr2gpr;
+  if (tDest && tSrc)
+    Opc = ARM::tMOVr;
+  else if (tSrc)
+    Opc = ARM::tMOVtgpr2gpr;
+  else if (tDest)
+    Opc = ARM::tMOVgpr2tgpr;
+
+  BuildMI(MBB, I, DL, get(Opc), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc));
+  assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
+         "Thumb1 can only copy GPR registers");
+}
+
+void Thumb1InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
+  assert((RC == ARM::tGPRRegisterClass ||
+          (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+           isARMLowRegister(SrcReg))) && "Unknown regclass!");
+
+  if (RC == ARM::tGPRRegisterClass ||
+      (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+       isARMLowRegister(SrcReg))) {
+    DebugLoc DL;
+    if (I != MBB.end()) DL = I->getDebugLoc();
+
+    MachineFunction &MF = *MBB.getParent();
+    MachineFrameInfo &MFI = *MF.getFrameInfo();
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(
+                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              MachineMemOperand::MOStore,
+                              MFI.getObjectSize(FI),
+                              MFI.getObjectAlignment(FI));
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSpill))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+  }
+}
+
+void Thumb1InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
+  assert((RC == ARM::tGPRRegisterClass ||
+          (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
+           isARMLowRegister(DestReg))) && "Unknown regclass!");
+
+  if (RC == ARM::tGPRRegisterClass ||
+      (TargetRegisterInfo::isPhysicalRegister(DestReg) &&
+       isARMLowRegister(DestReg))) {
+    DebugLoc DL;
+    if (I != MBB.end()) DL = I->getDebugLoc();
+
+    MachineFunction &MF = *MBB.getParent();
+    MachineFrameInfo &MFI = *MF.getFrameInfo();
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(
+                    MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              MachineMemOperand::MOLoad,
+                              MFI.getObjectSize(FI),
+                              MFI.getObjectAlignment(FI));
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+  }
+}
diff --git a/final/lib/Target/ARM/Thumb1InstrInfo.h b/final/lib/Target/ARM/Thumb1InstrInfo.h
new file mode 100644
index 00000000000..17ef2f758ef
--- /dev/null
+++ b/final/lib/Target/ARM/Thumb1InstrInfo.h
@@ -0,0 +1,59 @@
+//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB1INSTRUCTIONINFO_H
+#define THUMB1INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+class Thumb1InstrInfo : public ARMBaseInstrInfo {
+  Thumb1RegisterInfo RI;
+public:
+  explicit Thumb1InstrInfo(const ARMSubtarget &STI);
+
+  // Return the non-pre/post incrementing version of 'Opc'. Return 0
+  // if there is not such an opcode.
+  unsigned getUnindexedOpcode(unsigned Opc) const;
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const Thumb1RegisterInfo &getRegisterInfo() const { return RI; }
+
+  void copyPhysReg(MachineBasicBlock &MBB,
+                   MachineBasicBlock::iterator I, DebugLoc DL,
+                   unsigned DestReg, unsigned SrcReg,
+                   bool KillSrc) const;
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI,
+                           unsigned SrcReg, bool isKill, int FrameIndex,
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI) const;
+
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI,
+                            unsigned DestReg, int FrameIndex,
+                            const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI) const;
+
+};
+}
+
+#endif // THUMB1INSTRUCTIONINFO_H
diff --git a/final/lib/Target/ARM/Thumb1RegisterInfo.cpp b/final/lib/Target/ARM/Thumb1RegisterInfo.cpp
new file mode 100644
index 00000000000..d1d7ec49e35
--- /dev/null
+++ b/final/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -0,0 +1,705 @@
+//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb1InstrInfo.h"
+#include "Thumb1RegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+extern cl::opt<bool> ReuseFrameIndexVals;
+}
+
+using namespace llvm;
+
+Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii,
+                                       const ARMSubtarget &sti)
+  : ARMBaseRegisterInfo(tii, sti) {
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void
+Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator &MBBI,
+                                      DebugLoc dl,
+                                      unsigned DestReg, unsigned SubIdx,
+                                      int Val,
+                                      ARMCC::CondCodes Pred, unsigned PredReg,
+                                      unsigned MIFlags) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineConstantPool *ConstantPool = MF.getConstantPool();
+  const Constant *C = ConstantInt::get(
+          Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+  BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci))
+    .addReg(DestReg, getDefRegState(true), SubIdx)
+    .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg)
+    .setMIFlags(MIFlags);
+}
+
+
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
+/// in a register using mov / mvn sequences or load the immediate from a
+/// constpool entry.
+static
+void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator &MBBI,
+                              DebugLoc dl,
+                              unsigned DestReg, unsigned BaseReg,
+                              int NumBytes, bool CanChangeCC,
+                              const TargetInstrInfo &TII,
+                              const ARMBaseRegisterInfo& MRI,
+                              unsigned MIFlags = MachineInstr::NoFlags) {
+    MachineFunction &MF = *MBB.getParent();
+    bool isHigh = !isARMLowRegister(DestReg) ||
+                  (BaseReg != 0 && !isARMLowRegister(BaseReg));
+    bool isSub = false;
+    // Subtract doesn't have high register version. Load the negative value
+    // if either base or dest register is a high register. Also, if do not
+    // issue sub as part of the sequence if condition register is to be
+    // preserved.
+    if (NumBytes < 0 && !isHigh && CanChangeCC) {
+      isSub = true;
+      NumBytes = -NumBytes;
+    }
+    unsigned LdReg = DestReg;
+    if (DestReg == ARM::SP) {
+      assert(BaseReg == ARM::SP && "Unexpected!");
+      LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
+    }
+
+    if (NumBytes <= 255 && NumBytes >= 0)
+      AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+        .addImm(NumBytes).setMIFlags(MIFlags);
+    else if (NumBytes < 0 && NumBytes >= -255) {
+      AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg))
+        .addImm(NumBytes).setMIFlags(MIFlags);
+      AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
+        .addReg(LdReg, RegState::Kill).setMIFlags(MIFlags);
+    } else
+      MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes,
+                            ARMCC::AL, 0, MIFlags);
+
+    // Emit add / sub.
+    int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr);
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+    if (Opc != ARM::tADDhirr)
+      MIB = AddDefaultT1CC(MIB);
+    if (DestReg == ARM::SP || isSub)
+      MIB.addReg(BaseReg).addReg(LdReg, RegState::Kill);
+    else
+      MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
+    AddDefaultPred(MIB);
+}
+
+/// calcNumMI - Returns the number of instructions required to materialize
+/// the specific add / sub r, c instruction.
+static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes,
+                          unsigned NumBits, unsigned Scale) {
+  unsigned NumMIs = 0;
+  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+
+  if (Opc == ARM::tADDrSPi) {
+    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+    Bytes -= ThisVal;
+    NumMIs++;
+    NumBits = 8;
+    Scale = 1;  // Followed by a number of tADDi8.
+    Chunk = ((1 << NumBits) - 1) * Scale;
+  }
+
+  NumMIs += Bytes / Chunk;
+  if ((Bytes % Chunk) != 0)
+    NumMIs++;
+  if (ExtraOpc)
+    NumMIs++;
+  return NumMIs;
+}
+
+/// emitThumbRegPlusImmediate - Emits a series of instructions to materialize
+/// a destreg = basereg + immediate in Thumb code.
+void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator &MBBI,
+                                     DebugLoc dl,
+                                     unsigned DestReg, unsigned BaseReg,
+                                     int NumBytes, const TargetInstrInfo &TII,
+                                     const ARMBaseRegisterInfo& MRI,
+                                     unsigned MIFlags) {
+  bool isSub = NumBytes < 0;
+  unsigned Bytes = (unsigned)NumBytes;
+  if (isSub) Bytes = -NumBytes;
+  bool isMul4 = (Bytes & 3) == 0;
+  bool isTwoAddr = false;
+  bool DstNotEqBase = false;
+  unsigned NumBits = 1;
+  unsigned Scale = 1;
+  int Opc = 0;
+  int ExtraOpc = 0;
+  bool NeedCC = false;
+  bool NeedPred = false;
+
+  if (DestReg == BaseReg && BaseReg == ARM::SP) {
+    assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+    NumBits = 7;
+    Scale = 4;
+    Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+    isTwoAddr = true;
+  } else if (!isSub && BaseReg == ARM::SP) {
+    // r1 = add sp, 403
+    // =>
+    // r1 = add sp, 100 * 4
+    // r1 = add r1, 3
+    if (!isMul4) {
+      Bytes &= ~3;
+      ExtraOpc = ARM::tADDi3;
+    }
+    NumBits = 8;
+    Scale = 4;
+    Opc = ARM::tADDrSPi;
+  } else {
+    // sp = sub sp, c
+    // r1 = sub sp, c
+    // r8 = sub sp, c
+    if (DestReg != BaseReg)
+      DstNotEqBase = true;
+    NumBits = 8;
+    if (DestReg == ARM::SP) {
+      Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+      assert(isMul4 && "Thumb sp inc / dec size must be multiple of 4!");
+      NumBits = 7;
+      Scale = 4;
+    } else {
+      Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+      NumBits = 8;
+      NeedPred = NeedCC = true;
+    }
+    isTwoAddr = true;
+  }
+
+  unsigned NumMIs = calcNumMI(Opc, ExtraOpc, Bytes, NumBits, Scale);
+  unsigned Threshold = (DestReg == ARM::SP) ? 3 : 2;
+  if (NumMIs > Threshold) {
+    // This will expand into too many instructions. Load the immediate from a
+    // constpool entry.
+    emitThumbRegPlusImmInReg(MBB, MBBI, dl,
+                             DestReg, BaseReg, NumBytes, true,
+                             TII, MRI, MIFlags);
+    return;
+  }
+
+  if (DstNotEqBase) {
+    if (isARMLowRegister(DestReg) && isARMLowRegister(BaseReg)) {
+      // If both are low registers, emit DestReg = add BaseReg, max(Imm, 7)
+      unsigned Chunk = (1 << 3) - 1;
+      unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+      Bytes -= ThisVal;
+      const TargetInstrDesc &TID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
+      const MachineInstrBuilder MIB =
+        AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg).setMIFlags(MIFlags));
+      AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
+    } else {
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+        .addReg(BaseReg, RegState::Kill)
+        .setMIFlags(MIFlags);
+    }
+    BaseReg = DestReg;
+  }
+
+  unsigned Chunk = ((1 << NumBits) - 1) * Scale;
+  while (Bytes) {
+    unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
+    Bytes -= ThisVal;
+    ThisVal /= Scale;
+    // Build the new tADD / tSUB.
+    if (isTwoAddr) {
+      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+      if (NeedCC)
+        MIB = AddDefaultT1CC(MIB);
+      MIB.addReg(DestReg).addImm(ThisVal);
+      if (NeedPred)
+        MIB = AddDefaultPred(MIB);
+      MIB.setMIFlags(MIFlags);
+    }
+    else {
+      bool isKill = BaseReg != ARM::SP;
+      MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg);
+      if (NeedCC)
+        MIB = AddDefaultT1CC(MIB);
+      MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal);
+      if (NeedPred)
+        MIB = AddDefaultPred(MIB);
+      MIB.setMIFlags(MIFlags);
+
+      BaseReg = DestReg;
+      if (Opc == ARM::tADDrSPi) {
+        // r4 = add sp, imm
+        // r4 = add r4, imm
+        // ...
+        NumBits = 8;
+        Scale = 1;
+        Chunk = ((1 << NumBits) - 1) * Scale;
+        Opc = isSub ? ARM::tSUBi8 : ARM::tADDi8;
+        NeedPred = NeedCC = isTwoAddr = true;
+      }
+    }
+  }
+
+  if (ExtraOpc) {
+    const TargetInstrDesc &TID = TII.get(ExtraOpc);
+    AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg))
+                   .addReg(DestReg, RegState::Kill)
+                   .addImm(((unsigned)NumBytes) & 3)
+                   .setMIFlags(MIFlags));
+  }
+}
+
+static void emitSPUpdate(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         const TargetInstrInfo &TII, DebugLoc dl,
+                         const Thumb1RegisterInfo &MRI,
+                         int NumBytes) {
+  emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII,
+                            MRI);
+}
+
+void Thumb1RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!TFI->hasReservedCallFrame(MF)) {
+    // If we have alloca, convert as follows:
+    // ADJCALLSTACKDOWN -> sub, sp, sp, amount
+    // ADJCALLSTACKUP   -> add, sp, sp, amount
+    MachineInstr *Old = I;
+    DebugLoc dl = Old->getDebugLoc();
+    unsigned Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = TFI->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      // Replace the pseudo instruction with a new instruction...
+      unsigned Opc = Old->getOpcode();
+      if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
+        emitSPUpdate(MBB, I, TII, dl, *this, -Amount);
+      } else {
+        assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
+        emitSPUpdate(MBB, I, TII, dl, *this, Amount);
+      }
+    }
+  }
+  MBB.erase(I);
+}
+
+/// emitThumbConstant - Emit a series of instructions to materialize a
+/// constant.
+static void emitThumbConstant(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator &MBBI,
+                              unsigned DestReg, int Imm,
+                              const TargetInstrInfo &TII,
+                              const Thumb1RegisterInfo& MRI,
+                              DebugLoc dl) {
+  bool isSub = Imm < 0;
+  if (isSub) Imm = -Imm;
+
+  int Chunk = (1 << 8) - 1;
+  int ThisVal = (Imm > Chunk) ? Chunk : Imm;
+  Imm -= ThisVal;
+  AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8),
+                                        DestReg))
+                 .addImm(ThisVal));
+  if (Imm > 0)
+    emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI);
+  if (isSub) {
+    const TargetInstrDesc &TID = TII.get(ARM::tRSB);
+    AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg))
+                   .addReg(DestReg, RegState::Kill));
+  }
+}
+
+static void removeOperands(MachineInstr &MI, unsigned i) {
+  unsigned Op = i;
+  for (unsigned e = MI.getNumOperands(); i != e; ++i)
+    MI.RemoveOperand(Op);
+}
+
+/// convertToNonSPOpcode - Change the opcode to the non-SP version, because
+/// we're replacing the frame index with a non-SP register.
+static unsigned convertToNonSPOpcode(unsigned Opcode) {
+  switch (Opcode) {
+  case ARM::tLDRspi:
+  case ARM::tRestore:           // FIXME: Should this opcode be here?
+    return ARM::tLDRi;
+
+  case ARM::tSTRspi:
+  case ARM::tSpill:             // FIXME: Should this opcode be here?
+    return ARM::tSTRi;
+  }
+
+  return Opcode;
+}
+
+bool Thumb1RegisterInfo::
+rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+                  unsigned FrameReg, int &Offset,
+                  const ARMBaseInstrInfo &TII) const {
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc dl = MI.getDebugLoc();
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = MI.getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+
+  if (Opcode == ARM::tADDrSPi) {
+    Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+    // Can't use tADDrSPi if it's based off the frame pointer.
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    if (FrameReg != ARM::SP) {
+      Opcode = ARM::tADDi3;
+      MI.setDesc(TII.get(Opcode));
+      NumBits = 3;
+    } else {
+      NumBits = 8;
+      Scale = 4;
+      assert((Offset & 3) == 0 &&
+             "Thumb add/sub sp, #imm immediate must be multiple of 4!");
+    }
+
+    unsigned PredReg;
+    if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
+      // Turn it into a move.
+      MI.setDesc(TII.get(ARM::tMOVgpr2tgpr));
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      // Remove offset and remaining explicit predicate operands.
+      do MI.RemoveOperand(FrameRegIdx+1);
+      while (MI.getNumOperands() > FrameRegIdx+1 &&
+             (!MI.getOperand(FrameRegIdx+1).isReg() ||
+              !MI.getOperand(FrameRegIdx+1).isImm()));
+      return true;
+    }
+
+    // Common case: small offset, fits into instruction.
+    unsigned Mask = (1 << NumBits) - 1;
+    if (((Offset / Scale) & ~Mask) == 0) {
+      // Replace the FrameIndex with sp / fp
+      if (Opcode == ARM::tADDi3) {
+        removeOperands(MI, FrameRegIdx);
+        MachineInstrBuilder MIB(&MI);
+        AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg)
+                       .addImm(Offset / Scale));
+      } else {
+        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+        MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset / Scale);
+      }
+      return true;
+    }
+
+    unsigned DestReg = MI.getOperand(0).getReg();
+    unsigned Bytes = (Offset > 0) ? Offset : -Offset;
+    unsigned NumMIs = calcNumMI(Opcode, 0, Bytes, NumBits, Scale);
+    // MI would expand into a large number of instructions. Don't try to
+    // simplify the immediate.
+    if (NumMIs > 2) {
+      emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII,
+                                *this);
+      MBB.erase(II);
+      return true;
+    }
+
+    if (Offset > 0) {
+      // Translate r0 = add sp, imm to
+      // r0 = add sp, 255*4
+      // r0 = add r0, (imm - 255*4)
+      if (Opcode == ARM::tADDi3) {
+        removeOperands(MI, FrameRegIdx);
+        MachineInstrBuilder MIB(&MI);
+        AddDefaultPred(AddDefaultT1CC(MIB).addReg(FrameReg).addImm(Mask));
+      } else {
+        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+        MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Mask);
+      }
+      Offset = (Offset - Mask * Scale);
+      MachineBasicBlock::iterator NII = llvm::next(II);
+      emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII,
+                                *this);
+    } else {
+      // Translate r0 = add sp, -imm to
+      // r0 = -imm (this is then translated into a series of instructons)
+      // r0 = add r0, sp
+      emitThumbConstant(MBB, II, DestReg, Offset, TII, *this, dl);
+
+      MI.setDesc(TII.get(ARM::tADDhirr));
+      MI.getOperand(FrameRegIdx).ChangeToRegister(DestReg, false, false, true);
+      MI.getOperand(FrameRegIdx+1).ChangeToRegister(FrameReg, false);
+      if (Opcode == ARM::tADDi3) {
+        MachineInstrBuilder MIB(&MI);
+        AddDefaultPred(MIB);
+      }
+    }
+    return true;
+  } else {
+    if (AddrMode != ARMII::AddrModeT1_s)
+      llvm_unreachable("Unsupported addressing mode!");
+
+    unsigned ImmIdx = FrameRegIdx + 1;
+    int InstrOffs = MI.getOperand(ImmIdx).getImm();
+    unsigned NumBits = (FrameReg == ARM::SP) ? 8 : 5;
+    unsigned Scale = 4;
+
+    Offset += InstrOffs * Scale;
+    assert((Offset & (Scale - 1)) == 0 && "Can't encode this offset!");
+
+    // Common case: small offset, fits into instruction.
+    MachineOperand &ImmOp = MI.getOperand(ImmIdx);
+    int ImmedOffset = Offset / Scale;
+    unsigned Mask = (1 << NumBits) - 1;
+
+    if ((unsigned)Offset <= Mask * Scale) {
+      // Replace the FrameIndex with the frame register (e.g., sp).
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      ImmOp.ChangeToImmediate(ImmedOffset);
+
+      // If we're using a register where sp was stored, convert the instruction
+      // to the non-SP version.
+      unsigned NewOpc = convertToNonSPOpcode(Opcode);
+      if (NewOpc != Opcode && FrameReg != ARM::SP)
+        MI.setDesc(TII.get(NewOpc));
+
+      return true;
+    }
+
+    NumBits = 5;
+    Mask = (1 << NumBits) - 1;
+
+    // If this is a thumb spill / restore, we will be using a constpool load to
+    // materialize the offset.
+    if (Opcode == ARM::tRestore || Opcode == ARM::tSpill) {
+      ImmOp.ChangeToImmediate(0);
+    } else {
+      // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
+      ImmedOffset = ImmedOffset & Mask;
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      Offset &= ~(Mask * Scale);
+    }
+  }
+
+  return Offset == 0;
+}
+
+void
+Thumb1RegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
+                                      unsigned BaseReg, int64_t Offset) const {
+  MachineInstr &MI = *I;
+  int Off = Offset; // ARM doesn't need the general 64-bit offsets
+  unsigned i = 0;
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+  bool Done = false;
+  Done = rewriteFrameIndex(MI, i, BaseReg, Off, TII);
+  assert (Done && "Unable to resolve frame index!");
+}
+
+/// saveScavengerRegister - Spill the register so it can be used by the
+/// register scavenger. Return true.
+bool
+Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator I,
+                                          MachineBasicBlock::iterator &UseMI,
+                                          const TargetRegisterClass *RC,
+                                          unsigned Reg) const {
+  // Thumb1 can't use the emergency spill slot on the stack because
+  // ldr/str immediate offsets must be positive, and if we're referencing
+  // off the frame pointer (if, for example, there are alloca() calls in
+  // the function, the offset will be negative. Use R12 instead since that's
+  // a call clobbered register that we know won't be used in Thumb1 mode.
+  DebugLoc DL;
+  BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)).
+    addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill);
+
+  // The UseMI is where we would like to restore the register. If there's
+  // interference with R12 before then, however, we'll need to restore it
+  // before that instead and adjust the UseMI.
+  bool done = false;
+  for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
+    if (II->isDebugValue())
+      continue;
+    // If this instruction affects R12, adjust our restore point.
+    for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = II->getOperand(i);
+      if (!MO.isReg() || MO.isUndef() || !MO.getReg() ||
+          TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;
+      if (MO.getReg() == ARM::R12) {
+        UseMI = II;
+        done = true;
+        break;
+      }
+    }
+  }
+  // Restore the register from R12
+  BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)).
+    addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill);
+
+  return true;
+}
+
+void
+Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                        int SPAdj, RegScavenger *RS) const {
+  unsigned VReg = 0;
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  DebugLoc dl = MI.getDebugLoc();
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  unsigned FrameReg = ARM::SP;
+  int FrameIndex = MI.getOperand(i).getIndex();
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+               MF.getFrameInfo()->getStackSize() + SPAdj;
+
+  if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea1Offset();
+  else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
+    Offset -= AFI->getGPRCalleeSavedArea2Offset();
+  else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+    assert(SPAdj == 0 && MF.getTarget().getFrameLowering()->hasFP(MF) &&
+           "Unexpected");
+    // There are alloca()'s in this function, must reference off the frame
+    // pointer or base pointer instead.
+    if (!hasBasePointer(MF)) {
+      FrameReg = getFrameRegister(MF);
+      Offset -= AFI->getFramePtrSpillOffset();
+    } else
+      FrameReg = BasePtr;
+  }
+
+  // Special handling of dbg_value instructions.
+  if (MI.isDebugValue()) {
+    MI.getOperand(i).  ChangeToRegister(FrameReg, false /*isDef*/);
+    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    return;
+  }
+
+  // Modify MI as necessary to handle as much of 'Offset' as possible
+  assert(AFI->isThumbFunction() &&
+         "This eliminateFrameIndex only supports Thumb1!");
+  if (rewriteFrameIndex(MI, i, FrameReg, Offset, TII))
+    return;
+
+  // If we get here, the immediate doesn't fit into the instruction.  We folded
+  // as much as possible above, handle the rest, providing a register that is
+  // SP+LargeImm.
+  assert(Offset && "This code isn't needed if offset already handled!");
+
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = MI.getDesc();
+
+  // Remove predicate first.
+  int PIdx = MI.findFirstPredOperandIdx();
+  if (PIdx != -1)
+    removeOperands(MI, PIdx);
+
+  if (Desc.mayLoad()) {
+    // Use the destination register to materialize sp + offset.
+    unsigned TmpReg = MI.getOperand(0).getReg();
+    bool UseRR = false;
+    if (Opcode == ARM::tRestore) {
+      if (FrameReg == ARM::SP)
+        emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg,
+                                 Offset, false, TII, *this);
+      else {
+        emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset);
+        UseRR = true;
+      }
+    } else {
+      emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII,
+                                *this);
+    }
+
+    MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi));
+    MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
+    if (UseRR)
+      // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+      // register. The offset is already handled in the vreg value.
+      MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+  } else if (Desc.mayStore()) {
+      VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
+      bool UseRR = false;
+
+      if (Opcode == ARM::tSpill) {
+        if (FrameReg == ARM::SP)
+          emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg,
+                                   Offset, false, TII, *this);
+        else {
+          emitLoadConstPool(MBB, II, dl, VReg, 0, Offset);
+          UseRR = true;
+        }
+      } else
+        emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII,
+                                  *this);
+      MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi));
+      MI.getOperand(i).ChangeToRegister(VReg, false, false, true);
+      if (UseRR)
+        // Use [reg, reg] addrmode. Replace the immediate operand w/ the frame
+        // register. The offset is already handled in the vreg value.
+        MI.getOperand(i+1).ChangeToRegister(FrameReg, false, false, false);
+  } else {
+    assert(false && "Unexpected opcode!");
+  }
+
+  // Add predicate back if it's needed.
+  if (MI.getDesc().isPredicable()) {
+    MachineInstrBuilder MIB(&MI);
+    AddDefaultPred(MIB);
+  }
+}
diff --git a/final/lib/Target/ARM/Thumb1RegisterInfo.h b/final/lib/Target/ARM/Thumb1RegisterInfo.h
new file mode 100644
index 00000000000..96412bd0cb1
--- /dev/null
+++ b/final/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -0,0 +1,64 @@
+//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB1REGISTERINFO_H
+#define THUMB1REGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+  class ARMBaseInstrInfo;
+  class Type;
+
+struct Thumb1RegisterInfo : public ARMBaseRegisterInfo {
+public:
+  Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+
+  /// emitLoadConstPool - Emits a load from constpool to materialize the
+  /// specified immediate.
+ void emitLoadConstPool(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator &MBBI,
+                        DebugLoc dl,
+                        unsigned DestReg, unsigned SubIdx, int Val,
+                        ARMCC::CondCodes Pred = ARMCC::AL,
+                        unsigned PredReg = 0,
+                        unsigned MIFlags = MachineInstr::NoFlags) const;
+
+  /// Code Generation virtual methods...
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  // rewrite MI to access 'Offset' bytes from the FP. Update Offset to be
+  // however much remains to be handled. Return 'true' if no further
+  // work is required.
+  bool rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
+                         unsigned FrameReg, int &Offset,
+                         const ARMBaseInstrInfo &TII) const;
+  void resolveFrameIndex(MachineBasicBlock::iterator I,
+                         unsigned BaseReg, int64_t Offset) const;
+  bool saveScavengerRegister(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator I,
+                             MachineBasicBlock::iterator &UseMI,
+                             const TargetRegisterClass *RC,
+                             unsigned Reg) const;
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+};
+}
+
+#endif // THUMB1REGISTERINFO_H
diff --git a/final/lib/Target/ARM/Thumb2ITBlockPass.cpp b/final/lib/Target/ARM/Thumb2ITBlockPass.cpp
new file mode 100644
index 00000000000..45e693744b8
--- /dev/null
+++ b/final/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -0,0 +1,255 @@
+//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "thumb2-it"
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumITs,        "Number of IT blocks inserted");
+STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
+
+namespace {
+  class Thumb2ITBlockPass : public MachineFunctionPass {
+    bool PreRegAlloc;
+
+  public:
+    static char ID;
+    Thumb2ITBlockPass() : MachineFunctionPass(ID) {}
+
+    const Thumb2InstrInfo *TII;
+    const TargetRegisterInfo *TRI;
+    ARMFunctionInfo *AFI;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "Thumb IT blocks insertion pass";
+    }
+
+  private:
+    bool MoveCopyOutOfITBlock(MachineInstr *MI,
+                              ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+                              SmallSet<unsigned, 4> &Defs,
+                              SmallSet<unsigned, 4> &Uses);
+    bool InsertITInstructions(MachineBasicBlock &MBB);
+  };
+  char Thumb2ITBlockPass::ID = 0;
+}
+
+/// TrackDefUses - Tracking what registers are being defined and used by
+/// instructions in the IT block. This also tracks "dependencies", i.e. uses
+/// in the IT block that are defined before the IT instruction.
+static void TrackDefUses(MachineInstr *MI,
+                         SmallSet<unsigned, 4> &Defs,
+                         SmallSet<unsigned, 4> &Uses,
+                         const TargetRegisterInfo *TRI) {
+  SmallVector<unsigned, 4> LocalDefs;
+  SmallVector<unsigned, 4> LocalUses;
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP)
+      continue;
+    if (MO.isUse())
+      LocalUses.push_back(Reg);
+    else
+      LocalDefs.push_back(Reg);
+  }
+
+  for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+    unsigned Reg = LocalUses[i];
+    Uses.insert(Reg);
+    for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+         *Subreg; ++Subreg)
+      Uses.insert(*Subreg);
+  }
+
+  for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+    unsigned Reg = LocalDefs[i];
+    Defs.insert(Reg);
+    for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+         *Subreg; ++Subreg)
+      Defs.insert(*Subreg);
+    if (Reg == ARM::CPSR)
+      continue;
+  }
+}
+
+static bool isCopy(MachineInstr *MI) {
+  switch (MI->getOpcode()) {
+  default:
+    return false;
+  case ARM::MOVr:
+  case ARM::MOVr_TC:
+  case ARM::tMOVr:
+  case ARM::tMOVgpr2tgpr:
+  case ARM::tMOVtgpr2gpr:
+  case ARM::tMOVgpr2gpr:
+  case ARM::t2MOVr:
+    return true;
+  }
+}
+
+bool
+Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
+                                      ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+                                        SmallSet<unsigned, 4> &Defs,
+                                        SmallSet<unsigned, 4> &Uses) {
+  if (!isCopy(MI))
+    return false;
+  // llvm models select's as two-address instructions. That means a copy
+  // is inserted before a t2MOVccr, etc. If the copy is scheduled in
+  // between selects we would end up creating multiple IT blocks.
+  assert(MI->getOperand(0).getSubReg() == 0 &&
+         MI->getOperand(1).getSubReg() == 0 &&
+         "Sub-register indices still around?");
+
+  unsigned DstReg = MI->getOperand(0).getReg();
+  unsigned SrcReg = MI->getOperand(1).getReg();
+
+  // First check if it's safe to move it.
+  if (Uses.count(DstReg) || Defs.count(SrcReg))
+    return false;
+
+  // Then peek at the next instruction to see if it's predicated on CC or OCC.
+  // If not, then there is nothing to be gained by moving the copy.
+  MachineBasicBlock::iterator I = MI; ++I;
+  MachineBasicBlock::iterator E = MI->getParent()->end();
+  while (I != E && I->isDebugValue())
+    ++I;
+  if (I != E) {
+    unsigned NPredReg = 0;
+    ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
+    if (NCC == CC || NCC == OCC)
+      return true;
+  }
+  return false;
+}
+
+bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  SmallSet<unsigned, 4> Defs;
+  SmallSet<unsigned, 4> Uses;
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineInstr *MI = &*MBBI;
+    DebugLoc dl = MI->getDebugLoc();
+    unsigned PredReg = 0;
+    ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
+    if (CC == ARMCC::AL) {
+      ++MBBI;
+      continue;
+    }
+
+    Defs.clear();
+    Uses.clear();
+    TrackDefUses(MI, Defs, Uses, TRI);
+
+    // Insert an IT instruction.
+    MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
+      .addImm(CC);
+
+    // Add implicit use of ITSTATE to IT block instructions.
+    MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+                                             true/*isImp*/, false/*isKill*/));
+
+    MachineInstr *LastITMI = MI;
+    MachineBasicBlock::iterator InsertPos = MIB;
+    ++MBBI;
+
+    // Form IT block.
+    ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
+    unsigned Mask = 0, Pos = 3;
+    // Branches, including tricky ones like LDM_RET, need to end an IT
+    // block so check the instruction we just put in the block.
+    for (; MBBI != E && Pos &&
+           (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
+      if (MBBI->isDebugValue())
+        continue;
+
+      MachineInstr *NMI = &*MBBI;
+      MI = NMI;
+
+      unsigned NPredReg = 0;
+      ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg);
+      if (NCC == CC || NCC == OCC) {
+        Mask |= (NCC & 1) << Pos;
+        // Add implicit use of ITSTATE.
+        NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+                                               true/*isImp*/, false/*isKill*/));
+        LastITMI = NMI;
+      } else {
+        if (NCC == ARMCC::AL &&
+            MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
+          --MBBI;
+          MBB.remove(NMI);
+          MBB.insert(InsertPos, NMI);
+          ++NumMovedInsts;
+          continue;
+        }
+        break;
+      }
+      TrackDefUses(NMI, Defs, Uses, TRI);
+      --Pos;
+    }
+
+    // Finalize IT mask.
+    Mask |= (1 << Pos);
+    // Tag along (firstcond[0] << 4) with the mask.
+    Mask |= (CC & 1) << 4;
+    MIB.addImm(Mask);
+
+    // Last instruction in IT block kills ITSTATE.
+    LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
+
+    Modified = true;
+    ++NumITs;
+  }
+
+  return Modified;
+}
+
+bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
+  const TargetMachine &TM = Fn.getTarget();
+  AFI = Fn.getInfo<ARMFunctionInfo>();
+  TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+  TRI = TM.getRegisterInfo();
+
+  if (!AFI->isThumbFunction())
+    return false;
+
+  bool Modified = false;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ) {
+    MachineBasicBlock &MBB = *MFI;
+    ++MFI;
+    Modified |= InsertITInstructions(MBB);
+  }
+
+  if (Modified)
+    AFI->setHasITBlocks(true);
+
+  return Modified;
+}
+
+/// createThumb2ITBlockPass - Returns an instance of the Thumb2 IT blocks
+/// insertion pass.
+FunctionPass *llvm::createThumb2ITBlockPass() {
+  return new Thumb2ITBlockPass();
+}
diff --git a/final/lib/Target/ARM/Thumb2InstrInfo.cpp b/final/lib/Target/ARM/Thumb2InstrInfo.cpp
new file mode 100644
index 00000000000..d169dbb7f19
--- /dev/null
+++ b/final/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -0,0 +1,625 @@
+//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Thumb2InstrInfo.h"
+#include "ARM.h"
+#include "ARMConstantPoolValue.h"
+#include "ARMAddressingModes.h"
+#include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
+           cl::desc("Use old-style Thumb2 if-conversion heuristics"),
+           cl::init(false));
+
+Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
+  : ARMBaseInstrInfo(STI), RI(*this, STI) {
+}
+
+unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
+  // FIXME
+  return 0;
+}
+
+void
+Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+                                         MachineBasicBlock *NewDest) const {
+  MachineBasicBlock *MBB = Tail->getParent();
+  ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+  if (!AFI->hasITBlocks()) {
+    TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+    return;
+  }
+
+  // If the first instruction of Tail is predicated, we may have to update
+  // the IT instruction.
+  unsigned PredReg = 0;
+  ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg);
+  MachineBasicBlock::iterator MBBI = Tail;
+  if (CC != ARMCC::AL)
+    // Expecting at least the t2IT instruction before it.
+    --MBBI;
+
+  // Actually replace the tail.
+  TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+
+  // Fix up IT.
+  if (CC != ARMCC::AL) {
+    MachineBasicBlock::iterator E = MBB->begin();
+    unsigned Count = 4; // At most 4 instructions in an IT block.
+    while (Count && MBBI != E) {
+      if (MBBI->isDebugValue()) {
+        --MBBI;
+        continue;
+      }
+      if (MBBI->getOpcode() == ARM::t2IT) {
+        unsigned Mask = MBBI->getOperand(1).getImm();
+        if (Count == 4)
+          MBBI->eraseFromParent();
+        else {
+          unsigned MaskOn = 1 << Count;
+          unsigned MaskOff = ~(MaskOn - 1);
+          MBBI->getOperand(1).setImm((Mask & MaskOff) | MaskOn);
+        }
+        return;
+      }
+      --MBBI;
+      --Count;
+    }
+
+    // Ctrl flow can reach here if branch folding is run before IT block
+    // formation pass.
+  }
+}
+
+bool
+Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MBBI) const {
+  while (MBBI->isDebugValue()) {
+    ++MBBI;
+    if (MBBI == MBB.end())
+      return false;
+  }
+
+  unsigned PredReg = 0;
+  return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
+}
+
+void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator I, DebugLoc DL,
+                                  unsigned DestReg, unsigned SrcReg,
+                                  bool KillSrc) const {
+  // Handle SPR, DPR, and QPR copies.
+  if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
+    return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
+
+  bool tDest = ARM::tGPRRegClass.contains(DestReg);
+  bool tSrc  = ARM::tGPRRegClass.contains(SrcReg);
+  unsigned Opc = ARM::tMOVgpr2gpr;
+  if (tDest && tSrc)
+    Opc = ARM::tMOVr;
+  else if (tSrc)
+    Opc = ARM::tMOVtgpr2gpr;
+  else if (tDest)
+    Opc = ARM::tMOVgpr2tgpr;
+
+  BuildMI(MBB, I, DL, get(Opc), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void Thumb2InstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
+  if (RC == ARM::GPRRegisterClass   || RC == ARM::tGPRRegisterClass ||
+      RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
+    DebugLoc DL;
+    if (I != MBB.end()) DL = I->getDebugLoc();
+
+    MachineFunction &MF = *MBB.getParent();
+    MachineFrameInfo &MFI = *MF.getFrameInfo();
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(
+                      MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              MachineMemOperand::MOStore,
+                              MFI.getObjectSize(FI),
+                              MFI.getObjectAlignment(FI));
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
+                   .addReg(SrcReg, getKillRegState(isKill))
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+    return;
+  }
+
+  ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
+}
+
+void Thumb2InstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
+  if (RC == ARM::GPRRegisterClass   || RC == ARM::tGPRRegisterClass ||
+      RC == ARM::tcGPRRegisterClass || RC == ARM::rGPRRegisterClass) {
+    DebugLoc DL;
+    if (I != MBB.end()) DL = I->getDebugLoc();
+
+    MachineFunction &MF = *MBB.getParent();
+    MachineFrameInfo &MFI = *MF.getFrameInfo();
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(
+                      MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+                              MachineMemOperand::MOLoad,
+                              MFI.getObjectSize(FI),
+                              MFI.getObjectAlignment(FI));
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
+                   .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+    return;
+  }
+
+  ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
+}
+
+void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator &MBBI, DebugLoc dl,
+                               unsigned DestReg, unsigned BaseReg, int NumBytes,
+                               ARMCC::CondCodes Pred, unsigned PredReg,
+                               const ARMBaseInstrInfo &TII, unsigned MIFlags) {
+  bool isSub = NumBytes < 0;
+  if (isSub) NumBytes = -NumBytes;
+
+  // If profitable, use a movw or movt to materialize the offset.
+  // FIXME: Use the scavenger to grab a scratch register.
+  if (DestReg != ARM::SP && DestReg != BaseReg &&
+      NumBytes >= 4096 &&
+      ARM_AM::getT2SOImmVal(NumBytes) == -1) {
+    bool Fits = false;
+    if (NumBytes < 65536) {
+      // Use a movw to materialize the 16-bit constant.
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg)
+        .addImm(NumBytes)
+        .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
+      Fits = true;
+    } else if ((NumBytes & 0xffff) == 0) {
+      // Use a movt to materialize the 32-bit constant.
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg)
+        .addReg(DestReg)
+        .addImm(NumBytes >> 16)
+        .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags);
+      Fits = true;
+    }
+
+    if (Fits) {
+      if (isSub) {
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg)
+          .addReg(BaseReg, RegState::Kill)
+          .addReg(DestReg, RegState::Kill)
+          .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+          .setMIFlags(MIFlags);
+      } else {
+        BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg)
+          .addReg(DestReg, RegState::Kill)
+          .addReg(BaseReg, RegState::Kill)
+          .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
+          .setMIFlags(MIFlags);
+      }
+      return;
+    }
+  }
+
+  while (NumBytes) {
+    unsigned ThisVal = NumBytes;
+    unsigned Opc = 0;
+    if (DestReg == ARM::SP && BaseReg != ARM::SP) {
+      // mov sp, rn. Note t2MOVr cannot be used.
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg)
+        .addReg(BaseReg).setMIFlags(MIFlags);
+      BaseReg = ARM::SP;
+      continue;
+    }
+
+    bool HasCCOut = true;
+    if (BaseReg == ARM::SP) {
+      // sub sp, sp, #imm7
+      if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) {
+        assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?");
+        Opc = isSub ? ARM::tSUBspi : ARM::tADDspi;
+        // FIXME: Fix Thumb1 immediate encoding.
+        BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+          .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags);
+        NumBytes = 0;
+        continue;
+      }
+
+      // sub rd, sp, so_imm
+      Opc = isSub ? ARM::t2SUBrSPi : ARM::t2ADDrSPi;
+      if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+        NumBytes = 0;
+      } else {
+        // FIXME: Move this to ARMAddressingModes.h?
+        unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+        ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+        NumBytes &= ~ThisVal;
+        assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+               "Bit extraction didn't work?");
+      }
+    } else {
+      assert(DestReg != ARM::SP && BaseReg != ARM::SP);
+      Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
+      if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
+        NumBytes = 0;
+      } else if (ThisVal < 4096) {
+        Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
+        HasCCOut = false;
+        NumBytes = 0;
+      } else {
+        // FIXME: Move this to ARMAddressingModes.h?
+        unsigned RotAmt = CountLeadingZeros_32(ThisVal);
+        ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+        NumBytes &= ~ThisVal;
+        assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
+               "Bit extraction didn't work?");
+      }
+    }
+
+    // Build the new ADD / SUB.
+    MachineInstrBuilder MIB =
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
+                     .addReg(BaseReg, RegState::Kill)
+                     .addImm(ThisVal)).setMIFlags(MIFlags);
+    if (HasCCOut)
+      AddDefaultCC(MIB);
+
+    BaseReg = DestReg;
+  }
+}
+
+static unsigned
+negativeOffsetOpcode(unsigned opcode)
+{
+  switch (opcode) {
+  case ARM::t2LDRi12:   return ARM::t2LDRi8;
+  case ARM::t2LDRHi12:  return ARM::t2LDRHi8;
+  case ARM::t2LDRBi12:  return ARM::t2LDRBi8;
+  case ARM::t2LDRSHi12: return ARM::t2LDRSHi8;
+  case ARM::t2LDRSBi12: return ARM::t2LDRSBi8;
+  case ARM::t2STRi12:   return ARM::t2STRi8;
+  case ARM::t2STRBi12:  return ARM::t2STRBi8;
+  case ARM::t2STRHi12:  return ARM::t2STRHi8;
+
+  case ARM::t2LDRi8:
+  case ARM::t2LDRHi8:
+  case ARM::t2LDRBi8:
+  case ARM::t2LDRSHi8:
+  case ARM::t2LDRSBi8:
+  case ARM::t2STRi8:
+  case ARM::t2STRBi8:
+  case ARM::t2STRHi8:
+    return opcode;
+
+  default:
+    break;
+  }
+
+  return 0;
+}
+
+static unsigned
+positiveOffsetOpcode(unsigned opcode)
+{
+  switch (opcode) {
+  case ARM::t2LDRi8:   return ARM::t2LDRi12;
+  case ARM::t2LDRHi8:  return ARM::t2LDRHi12;
+  case ARM::t2LDRBi8:  return ARM::t2LDRBi12;
+  case ARM::t2LDRSHi8: return ARM::t2LDRSHi12;
+  case ARM::t2LDRSBi8: return ARM::t2LDRSBi12;
+  case ARM::t2STRi8:   return ARM::t2STRi12;
+  case ARM::t2STRBi8:  return ARM::t2STRBi12;
+  case ARM::t2STRHi8:  return ARM::t2STRHi12;
+
+  case ARM::t2LDRi12:
+  case ARM::t2LDRHi12:
+  case ARM::t2LDRBi12:
+  case ARM::t2LDRSHi12:
+  case ARM::t2LDRSBi12:
+  case ARM::t2STRi12:
+  case ARM::t2STRBi12:
+  case ARM::t2STRHi12:
+    return opcode;
+
+  default:
+    break;
+  }
+
+  return 0;
+}
+
+static unsigned
+immediateOffsetOpcode(unsigned opcode)
+{
+  switch (opcode) {
+  case ARM::t2LDRs:   return ARM::t2LDRi12;
+  case ARM::t2LDRHs:  return ARM::t2LDRHi12;
+  case ARM::t2LDRBs:  return ARM::t2LDRBi12;
+  case ARM::t2LDRSHs: return ARM::t2LDRSHi12;
+  case ARM::t2LDRSBs: return ARM::t2LDRSBi12;
+  case ARM::t2STRs:   return ARM::t2STRi12;
+  case ARM::t2STRBs:  return ARM::t2STRBi12;
+  case ARM::t2STRHs:  return ARM::t2STRHi12;
+
+  case ARM::t2LDRi12:
+  case ARM::t2LDRHi12:
+  case ARM::t2LDRBi12:
+  case ARM::t2LDRSHi12:
+  case ARM::t2LDRSBi12:
+  case ARM::t2STRi12:
+  case ARM::t2STRBi12:
+  case ARM::t2STRHi12:
+  case ARM::t2LDRi8:
+  case ARM::t2LDRHi8:
+  case ARM::t2LDRBi8:
+  case ARM::t2LDRSHi8:
+  case ARM::t2LDRSBi8:
+  case ARM::t2STRi8:
+  case ARM::t2STRBi8:
+  case ARM::t2STRHi8:
+    return opcode;
+
+  default:
+    break;
+  }
+
+  return 0;
+}
+
+bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
+                               unsigned FrameReg, int &Offset,
+                               const ARMBaseInstrInfo &TII) {
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = MI.getDesc();
+  unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
+  bool isSub = false;
+
+  // Memory operands in inline assembly always use AddrModeT2_i12.
+  if (Opcode == ARM::INLINEASM)
+    AddrMode = ARMII::AddrModeT2_i12; // FIXME. mode for thumb2?
+
+  if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) {
+    Offset += MI.getOperand(FrameRegIdx+1).getImm();
+
+    unsigned PredReg;
+    if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
+      // Turn it into a move.
+      MI.setDesc(TII.get(ARM::tMOVgpr2gpr));
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      // Remove offset and remaining explicit predicate operands.
+      do MI.RemoveOperand(FrameRegIdx+1);
+      while (MI.getNumOperands() > FrameRegIdx+1 &&
+             (!MI.getOperand(FrameRegIdx+1).isReg() ||
+              !MI.getOperand(FrameRegIdx+1).isImm()));
+      return true;
+    }
+
+    bool isSP = FrameReg == ARM::SP;
+    bool HasCCOut = Opcode != ARM::t2ADDri12;
+
+    if (Offset < 0) {
+      Offset = -Offset;
+      isSub = true;
+      MI.setDesc(TII.get(isSP ? ARM::t2SUBrSPi : ARM::t2SUBri));
+    } else {
+      MI.setDesc(TII.get(isSP ? ARM::t2ADDrSPi : ARM::t2ADDri));
+    }
+
+    // Common case: small offset, fits into instruction.
+    if (ARM_AM::getT2SOImmVal(Offset) != -1) {
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+      // Add cc_out operand if the original instruction did not have one.
+      if (!HasCCOut)
+        MI.addOperand(MachineOperand::CreateReg(0, false));
+      Offset = 0;
+      return true;
+    }
+    // Another common case: imm12.
+    if (Offset < 4096 &&
+        (!HasCCOut || MI.getOperand(MI.getNumOperands()-1).getReg() == 0)) {
+      unsigned NewOpc = isSP
+        ? (isSub ? ARM::t2SUBrSPi12 : ARM::t2ADDrSPi12)
+        : (isSub ? ARM::t2SUBri12   : ARM::t2ADDri12);
+      MI.setDesc(TII.get(NewOpc));
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
+      // Remove the cc_out operand.
+      if (HasCCOut)
+        MI.RemoveOperand(MI.getNumOperands()-1);
+      Offset = 0;
+      return true;
+    }
+
+    // Otherwise, extract 8 adjacent bits from the immediate into this
+    // t2ADDri/t2SUBri.
+    unsigned RotAmt = CountLeadingZeros_32(Offset);
+    unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt);
+
+    // We will handle these bits from offset, clear them.
+    Offset &= ~ThisImmVal;
+
+    assert(ARM_AM::getT2SOImmVal(ThisImmVal) != -1 &&
+           "Bit extraction didn't work?");
+    MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
+    // Add cc_out operand if the original instruction did not have one.
+    if (!HasCCOut)
+      MI.addOperand(MachineOperand::CreateReg(0, false));
+
+  } else {
+
+    // AddrMode4 and AddrMode6 cannot handle any offset.
+    if (AddrMode == ARMII::AddrMode4 || AddrMode == ARMII::AddrMode6)
+      return false;
+
+    // AddrModeT2_so cannot handle any offset. If there is no offset
+    // register then we change to an immediate version.
+    unsigned NewOpc = Opcode;
+    if (AddrMode == ARMII::AddrModeT2_so) {
+      unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg();
+      if (OffsetReg != 0) {
+        MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+        return Offset == 0;
+      }
+
+      MI.RemoveOperand(FrameRegIdx+1);
+      MI.getOperand(FrameRegIdx+1).ChangeToImmediate(0);
+      NewOpc = immediateOffsetOpcode(Opcode);
+      AddrMode = ARMII::AddrModeT2_i12;
+    }
+
+    unsigned NumBits = 0;
+    unsigned Scale = 1;
+    if (AddrMode == ARMII::AddrModeT2_i8 || AddrMode == ARMII::AddrModeT2_i12) {
+      // i8 supports only negative, and i12 supports only positive, so
+      // based on Offset sign convert Opcode to the appropriate
+      // instruction
+      Offset += MI.getOperand(FrameRegIdx+1).getImm();
+      if (Offset < 0) {
+        NewOpc = negativeOffsetOpcode(Opcode);
+        NumBits = 8;
+        isSub = true;
+        Offset = -Offset;
+      } else {
+        NewOpc = positiveOffsetOpcode(Opcode);
+        NumBits = 12;
+      }
+    } else if (AddrMode == ARMII::AddrMode5) {
+      // VFP address mode.
+      const MachineOperand &OffOp = MI.getOperand(FrameRegIdx+1);
+      int InstrOffs = ARM_AM::getAM5Offset(OffOp.getImm());
+      if (ARM_AM::getAM5Op(OffOp.getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 8;
+      Scale = 4;
+      Offset += InstrOffs * 4;
+      assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
+      if (Offset < 0) {
+        Offset = -Offset;
+        isSub = true;
+      }
+    } else {
+      llvm_unreachable("Unsupported addressing mode!");
+    }
+
+    if (NewOpc != Opcode)
+      MI.setDesc(TII.get(NewOpc));
+
+    MachineOperand &ImmOp = MI.getOperand(FrameRegIdx+1);
+
+    // Attempt to fold address computation
+    // Common case: small offset, fits into instruction.
+    int ImmedOffset = Offset / Scale;
+    unsigned Mask = (1 << NumBits) - 1;
+    if ((unsigned)Offset <= Mask * Scale) {
+      // Replace the FrameIndex with fp/sp
+      MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
+      if (isSub) {
+        if (AddrMode == ARMII::AddrMode5)
+          // FIXME: Not consistent.
+          ImmedOffset |= 1 << NumBits;
+        else
+          ImmedOffset = -ImmedOffset;
+      }
+      ImmOp.ChangeToImmediate(ImmedOffset);
+      Offset = 0;
+      return true;
+    }
+
+    // Otherwise, offset doesn't fit. Pull in what we can to simplify
+    ImmedOffset = ImmedOffset & Mask;
+    if (isSub) {
+      if (AddrMode == ARMII::AddrMode5)
+        // FIXME: Not consistent.
+        ImmedOffset |= 1 << NumBits;
+      else {
+        ImmedOffset = -ImmedOffset;
+        if (ImmedOffset == 0)
+          // Change the opcode back if the encoded offset is zero.
+          MI.setDesc(TII.get(positiveOffsetOpcode(NewOpc)));
+      }
+    }
+    ImmOp.ChangeToImmediate(ImmedOffset);
+    Offset &= ~(Mask*Scale);
+  }
+
+  Offset = (isSub) ? -Offset : Offset;
+  return Offset == 0;
+}
+
+/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+/// two-addrss instruction inserted by two-address pass.
+void
+Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
+                                       MachineInstr *UseMI,
+                                       const TargetRegisterInfo &TRI) const {
+  if (SrcMI->getOpcode() != ARM::tMOVgpr2gpr ||
+      SrcMI->getOperand(1).isKill())
+    return;
+
+  unsigned PredReg = 0;
+  ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg);
+  if (CC == ARMCC::AL || PredReg != ARM::CPSR)
+    return;
+
+  // Schedule the copy so it doesn't come between previous instructions
+  // and UseMI which can form an IT block.
+  unsigned SrcReg = SrcMI->getOperand(1).getReg();
+  ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
+  MachineBasicBlock *MBB = UseMI->getParent();
+  MachineBasicBlock::iterator MBBI = SrcMI;
+  unsigned NumInsts = 0;
+  while (--MBBI != MBB->begin()) {
+    if (MBBI->isDebugValue())
+      continue;
+
+    MachineInstr *NMI = &*MBBI;
+    ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg);
+    if (!(NCC == CC || NCC == OCC) ||
+        NMI->modifiesRegister(SrcReg, &TRI) ||
+        NMI->definesRegister(ARM::CPSR))
+      break;
+    if (++NumInsts == 4)
+      // Too many in a row!
+      return;
+  }
+
+  if (NumInsts) {
+    MBB->remove(SrcMI);
+    MBB->insert(++MBBI, SrcMI);
+  }
+}
+
+ARMCC::CondCodes
+llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+  unsigned Opc = MI->getOpcode();
+  if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
+    return ARMCC::AL;
+  return llvm::getInstrPredicate(MI, PredReg);
+}
diff --git a/final/lib/Target/ARM/Thumb2InstrInfo.h b/final/lib/Target/ARM/Thumb2InstrInfo.h
new file mode 100644
index 00000000000..f2637d7fbca
--- /dev/null
+++ b/final/lib/Target/ARM/Thumb2InstrInfo.h
@@ -0,0 +1,78 @@
+//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2INSTRUCTIONINFO_H
+#define THUMB2INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+#include "Thumb2RegisterInfo.h"
+
+namespace llvm {
+class ARMSubtarget;
+class ScheduleHazardRecognizer;
+
+class Thumb2InstrInfo : public ARMBaseInstrInfo {
+  Thumb2RegisterInfo RI;
+public:
+  explicit Thumb2InstrInfo(const ARMSubtarget &STI);
+
+  // Return the non-pre/post incrementing version of 'Opc'. Return 0
+  // if there is not such an opcode.
+  unsigned getUnindexedOpcode(unsigned Opc) const;
+
+  void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+                               MachineBasicBlock *NewDest) const;
+
+  bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI) const;
+
+  void copyPhysReg(MachineBasicBlock &MBB,
+                   MachineBasicBlock::iterator I, DebugLoc DL,
+                   unsigned DestReg, unsigned SrcReg,
+                   bool KillSrc) const;
+
+  void storeRegToStackSlot(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MBBI,
+                           unsigned SrcReg, bool isKill, int FrameIndex,
+                           const TargetRegisterClass *RC,
+                           const TargetRegisterInfo *TRI) const;
+
+  void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI,
+                            unsigned DestReg, int FrameIndex,
+                            const TargetRegisterClass *RC,
+                            const TargetRegisterInfo *TRI) const;
+
+  /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+  /// two-addrss instruction inserted by two-address pass.
+  void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI,
+                             const TargetRegisterInfo &TRI) const;
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
+};
+
+/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
+/// to llvm::getInstrPredicate except it returns AL for conditional branch
+/// instructions which are "predicated", but are not in IT blocks.
+ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+
+}
+
+#endif // THUMB2INSTRUCTIONINFO_H
diff --git a/final/lib/Target/ARM/Thumb2RegisterInfo.cpp b/final/lib/Target/ARM/Thumb2RegisterInfo.cpp
new file mode 100644
index 00000000000..ce8edbe30a0
--- /dev/null
+++ b/final/lib/Target/ARM/Thumb2RegisterInfo.cpp
@@ -0,0 +1,63 @@
+//===- Thumb2RegisterInfo.cpp - Thumb-2 Register Information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseInstrInfo.h"
+#include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
+#include "Thumb2InstrInfo.h"
+#include "Thumb2RegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii,
+                                       const ARMSubtarget &sti)
+  : ARMBaseRegisterInfo(tii, sti) {
+}
+
+/// emitLoadConstPool - Emits a load from constpool to materialize the
+/// specified immediate.
+void
+Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator &MBBI,
+                                      DebugLoc dl,
+                                      unsigned DestReg, unsigned SubIdx,
+                                      int Val,
+                                      ARMCC::CondCodes Pred, unsigned PredReg,
+                                      unsigned MIFlags) const {
+  MachineFunction &MF = *MBB.getParent();
+  MachineConstantPool *ConstantPool = MF.getConstantPool();
+  const Constant *C = ConstantInt::get(
+           Type::getInt32Ty(MBB.getParent()->getFunction()->getContext()), Val);
+  unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4);
+
+  BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci))
+    .addReg(DestReg, getDefRegState(true), SubIdx)
+    .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0)
+    .setMIFlags(MIFlags);
+}
diff --git a/final/lib/Target/ARM/Thumb2RegisterInfo.h b/final/lib/Target/ARM/Thumb2RegisterInfo.h
new file mode 100644
index 00000000000..824378aeab4
--- /dev/null
+++ b/final/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -0,0 +1,43 @@
+//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2REGISTERINFO_H
+#define THUMB2REGISTERINFO_H
+
+#include "ARM.h"
+#include "ARMRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+  class ARMBaseInstrInfo;
+  class Type;
+
+struct Thumb2RegisterInfo : public ARMBaseRegisterInfo {
+public:
+  Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
+
+  /// emitLoadConstPool - Emits a load from constpool to materialize the
+  /// specified immediate.
+  void emitLoadConstPool(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator &MBBI,
+                         DebugLoc dl,
+                         unsigned DestReg, unsigned SubIdx, int Val,
+                         ARMCC::CondCodes Pred = ARMCC::AL,
+                         unsigned PredReg = 0,
+                         unsigned MIFlags = MachineInstr::NoFlags) const;
+};
+}
+
+#endif // THUMB2REGISTERINFO_H
diff --git a/final/lib/Target/ARM/Thumb2SizeReduction.cpp b/final/lib/Target/ARM/Thumb2SizeReduction.cpp
new file mode 100644
index 00000000000..be9c1509339
--- /dev/null
+++ b/final/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -0,0 +1,779 @@
+//===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "t2-reduce-size"
+#include "ARM.h"
+#include "ARMAddressingModes.h"
+#include "ARMBaseRegisterInfo.h"
+#include "ARMBaseInstrInfo.h"
+#include "Thumb2InstrInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumNarrows,  "Number of 32-bit instrs reduced to 16-bit ones");
+STATISTIC(Num2Addrs,   "Number of 32-bit instrs reduced to 2addr 16-bit ones");
+STATISTIC(NumLdSts,    "Number of 32-bit load / store reduced to 16-bit ones");
+
+static cl::opt<int> ReduceLimit("t2-reduce-limit",
+                                cl::init(-1), cl::Hidden);
+static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
+                                     cl::init(-1), cl::Hidden);
+static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
+                                     cl::init(-1), cl::Hidden);
+
+namespace {
+  /// ReduceTable - A static table with information on mapping from wide
+  /// opcodes to narrow
+  struct ReduceEntry {
+    unsigned WideOpc;      // Wide opcode
+    unsigned NarrowOpc1;   // Narrow opcode to transform to
+    unsigned NarrowOpc2;   // Narrow opcode when it's two-address
+    uint8_t  Imm1Limit;    // Limit of immediate field (bits)
+    uint8_t  Imm2Limit;    // Limit of immediate field when it's two-address
+    unsigned LowRegs1 : 1; // Only possible if low-registers are used
+    unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
+    unsigned PredCC1  : 2; // 0 - If predicated, cc is on and vice versa.
+                           // 1 - No cc field.
+                           // 2 - Always set CPSR.
+    unsigned PredCC2  : 2;
+    unsigned Special  : 1; // Needs to be dealt with specially
+  };
+
+  static const ReduceEntry ReduceTable[] = {
+    // Wide,        Narrow1,      Narrow2,     imm1,imm2,  lo1, lo2, P/C, S
+    { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0 },
+    { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,    1,   0,  0,1, 0 },
+    // Note: immediate scale is 4.
+    { ARM::t2ADDrSPi,ARM::tADDrSPi,0,            8,   0,    1,   0,  1,0, 1 },
+    { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  2,2, 1 },
+    { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,    1,   0,  2,0, 1 },
+    { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2ASRri, ARM::tASRri,  0,             5,   0,    1,   0,  0,0, 0 },
+    { ARM::t2ASRrr, 0,            ARM::tASRrr,   0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2BICrr, 0,            ARM::tBIC,     0,   0,    0,   1,  0,0, 0 },
+    //FIXME: Disable CMN, as CCodes are backwards from compare expectations
+    //{ ARM::t2CMNrr, ARM::tCMN,    0,             0,   0,    1,   0,  2,0, 0 },
+    { ARM::t2CMPri, ARM::tCMPi8,  0,             8,   0,    1,   0,  2,0, 0 },
+    { ARM::t2CMPrr, ARM::tCMPhir, 0,             0,   0,    0,   0,  2,0, 1 },
+    { ARM::t2EORrr, 0,            ARM::tEOR,     0,   0,    0,   1,  0,0, 0 },
+    // FIXME: adr.n immediate offset must be multiple of 4.
+    //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0,     0,   0,    1,   0,  1,0, 0 },
+    { ARM::t2LSLri, ARM::tLSLri,  0,             5,   0,    1,   0,  0,0, 0 },
+    { ARM::t2LSLrr, 0,            ARM::tLSLrr,   0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2LSRri, ARM::tLSRri,  0,             5,   0,    1,   0,  0,0, 0 },
+    { ARM::t2LSRrr, 0,            ARM::tLSRrr,   0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0 },
+    { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 1 },
+    // FIXME: Do we need the 16-bit 'S' variant?
+    { ARM::t2MOVr,ARM::tMOVgpr2gpr,0,            0,   0,    0,   0,  1,0, 0 },
+    { ARM::t2MOVCCr,0,            ARM::tMOVCCr,  0,   0,    0,   0,  0,1, 0 },
+    { ARM::t2MOVCCi,0,            ARM::tMOVCCi,  0,   8,    0,   1,  0,1, 0 },
+    { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,    1,   0,  0,0, 0 },
+    { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2REV,   ARM::tREV,    0,             0,   0,    1,   0,  1,0, 0 },
+    { ARM::t2REV16, ARM::tREV16,  0,             0,   0,    1,   0,  1,0, 0 },
+    { ARM::t2REVSH, ARM::tREVSH,  0,             0,   0,    1,   0,  1,0, 0 },
+    { ARM::t2RORrr, 0,            ARM::tROR,     0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2RSBri, ARM::tRSB,    0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2RSBSri,ARM::tRSB,    0,             0,   0,    1,   0,  2,0, 1 },
+    { ARM::t2SBCrr, 0,            ARM::tSBC,     0,   0,    0,   1,  0,0, 0 },
+    { ARM::t2SUBri, ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  0,0, 0 },
+    { ARM::t2SUBrr, ARM::tSUBrr,  0,             0,   0,    1,   0,  0,0, 0 },
+    { ARM::t2SUBSri,ARM::tSUBi3,  ARM::tSUBi8,   3,   8,    1,   1,  2,2, 0 },
+    { ARM::t2SUBSrr,ARM::tSUBrr,  0,             0,   0,    1,   0,  2,0, 0 },
+    { ARM::t2SXTBr, ARM::tSXTB,   0,             0,   0,    1,   0,  1,0, 0 },
+    { ARM::t2SXTHr, ARM::tSXTH,   0,             0,   0,    1,   0,  1,0, 0 },
+    { ARM::t2TSTrr, ARM::tTST,    0,             0,   0,    1,   0,  2,0, 0 },
+    { ARM::t2UXTBr, ARM::tUXTB,   0,             0,   0,    1,   0,  1,0, 0 },
+    { ARM::t2UXTHr, ARM::tUXTH,   0,             0,   0,    1,   0,  1,0, 0 },
+
+    // FIXME: Clean this up after splitting each Thumb load / store opcode
+    // into multiple ones.
+    { ARM::t2LDRi12,ARM::tLDRi,   ARM::tLDRspi,  5,   8,    1,   0,  0,0, 1 },
+    { ARM::t2LDRs,  ARM::tLDRr,   0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRBi12,ARM::tLDRBi, 0,             5,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRBs, ARM::tLDRBr,  0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRHi12,ARM::tLDRHi, 0,             5,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRHs, ARM::tLDRHr,  0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRSBs,ARM::tLDRSB,  0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2LDRSHs,ARM::tLDRSH,  0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2STRi12,ARM::tSTRi,   ARM::tSTRspi,  5,   8,    1,   0,  0,0, 1 },
+    { ARM::t2STRs,  ARM::tSTRr,   0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2STRBi12,ARM::tSTRBi, 0,             5,   0,    1,   0,  0,0, 1 },
+    { ARM::t2STRBs, ARM::tSTRBr,  0,             0,   0,    1,   0,  0,0, 1 },
+    { ARM::t2STRHi12,ARM::tSTRHi, 0,             5,   0,    1,   0,  0,0, 1 },
+    { ARM::t2STRHs, ARM::tSTRHr,  0,             0,   0,    1,   0,  0,0, 1 },
+
+    { ARM::t2LDMIA, ARM::tLDMIA,  0,             0,   0,    1,   1,  1,1, 1 },
+    { ARM::t2LDMIA_RET,0,         ARM::tPOP_RET, 0,   0,    1,   1,  1,1, 1 },
+    { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0,   0,    1,   1,  1,1, 1 },
+    // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent
+    { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0,       0,   0,    1,   1,  1,1, 1 },
+    { ARM::t2STMDB_UPD, 0,        ARM::tPUSH,    0,   0,    1,   1,  1,1, 1 },
+  };
+
+  class Thumb2SizeReduce : public MachineFunctionPass {
+  public:
+    static char ID;
+    Thumb2SizeReduce();
+
+    const Thumb2InstrInfo *TII;
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual const char *getPassName() const {
+      return "Thumb2 instruction size reduction pass";
+    }
+
+  private:
+    /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
+    DenseMap<unsigned, unsigned> ReduceOpcodeMap;
+
+    bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
+                         bool is2Addr, ARMCC::CondCodes Pred,
+                         bool LiveCPSR, bool &HasCC, bool &CCDead);
+
+    bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
+                         const ReduceEntry &Entry);
+
+    bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
+                       const ReduceEntry &Entry, bool LiveCPSR);
+
+    /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
+    /// instruction.
+    bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
+                       const ReduceEntry &Entry,
+                       bool LiveCPSR);
+
+    /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
+    /// non-two-address instruction.
+    bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
+                        const ReduceEntry &Entry,
+                        bool LiveCPSR);
+
+    /// ReduceMBB - Reduce width of instructions in the specified basic block.
+    bool ReduceMBB(MachineBasicBlock &MBB);
+  };
+  char Thumb2SizeReduce::ID = 0;
+}
+
+Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
+  for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
+    unsigned FromOpc = ReduceTable[i].WideOpc;
+    if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
+      assert(false && "Duplicated entries?");
+  }
+}
+
+static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) {
+  for (const unsigned *Regs = TID.ImplicitDefs; *Regs; ++Regs)
+    if (*Regs == ARM::CPSR)
+      return true;
+  return false;
+}
+
+bool
+Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
+                                  bool is2Addr, ARMCC::CondCodes Pred,
+                                  bool LiveCPSR, bool &HasCC, bool &CCDead) {
+  if ((is2Addr  && Entry.PredCC2 == 0) ||
+      (!is2Addr && Entry.PredCC1 == 0)) {
+    if (Pred == ARMCC::AL) {
+      // Not predicated, must set CPSR.
+      if (!HasCC) {
+        // Original instruction was not setting CPSR, but CPSR is not
+        // currently live anyway. It's ok to set it. The CPSR def is
+        // dead though.
+        if (!LiveCPSR) {
+          HasCC = true;
+          CCDead = true;
+          return true;
+        }
+        return false;
+      }
+    } else {
+      // Predicated, must not set CPSR.
+      if (HasCC)
+        return false;
+    }
+  } else if ((is2Addr  && Entry.PredCC2 == 2) ||
+             (!is2Addr && Entry.PredCC1 == 2)) {
+    /// Old opcode has an optional def of CPSR.
+    if (HasCC)
+      return true;
+    // If old opcode does not implicitly define CPSR, then it's not ok since
+    // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
+    if (!HasImplicitCPSRDef(MI->getDesc()))
+      return false;
+    HasCC = true;
+  } else {
+    // 16-bit instruction does not set CPSR.
+    if (HasCC)
+      return false;
+  }
+
+  return true;
+}
+
+static bool VerifyLowRegs(MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+  bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA     ||
+                 Opc == ARM::t2LDMDB     || Opc == ARM::t2LDMIA_UPD ||
+                 Opc == ARM::t2LDMDB_UPD);
+  bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
+  bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi);
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg() || MO.isImplicit())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (Reg == 0 || Reg == ARM::CPSR)
+      continue;
+    if (isPCOk && Reg == ARM::PC)
+      continue;
+    if (isLROk && Reg == ARM::LR)
+      continue;
+    if (Reg == ARM::SP) {
+      if (isSPOk)
+        continue;
+      if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
+        // Special case for these ldr / str with sp as base register.
+        continue;
+    }
+    if (!isARMLowRegister(Reg))
+      return false;
+  }
+  return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
+                                  const ReduceEntry &Entry) {
+  if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
+    return false;
+
+  unsigned Scale = 1;
+  bool HasImmOffset = false;
+  bool HasShift = false;
+  bool HasOffReg = true;
+  bool isLdStMul = false;
+  unsigned Opc = Entry.NarrowOpc1;
+  unsigned OpNum = 3; // First 'rest' of operands.
+  uint8_t  ImmLimit = Entry.Imm1Limit;
+
+  switch (Entry.WideOpc) {
+  default:
+    llvm_unreachable("Unexpected Thumb2 load / store opcode!");
+  case ARM::t2LDRi12:
+  case ARM::t2STRi12:
+    if (MI->getOperand(1).getReg() == ARM::SP) {
+      Opc = Entry.NarrowOpc2;
+      ImmLimit = Entry.Imm2Limit;
+      HasOffReg = false;
+    }
+
+    Scale = 4;
+    HasImmOffset = true;
+    HasOffReg = false;
+    break;
+  case ARM::t2LDRBi12:
+  case ARM::t2STRBi12:
+    HasImmOffset = true;
+    HasOffReg = false;
+    break;
+  case ARM::t2LDRHi12:
+  case ARM::t2STRHi12:
+    Scale = 2;
+    HasImmOffset = true;
+    HasOffReg = false;
+    break;
+  case ARM::t2LDRs:
+  case ARM::t2LDRBs:
+  case ARM::t2LDRHs:
+  case ARM::t2LDRSBs:
+  case ARM::t2LDRSHs:
+  case ARM::t2STRs:
+  case ARM::t2STRBs:
+  case ARM::t2STRHs:
+    HasShift = true;
+    OpNum = 4;
+    break;
+  case ARM::t2LDMIA:
+  case ARM::t2LDMDB: {
+    unsigned BaseReg = MI->getOperand(0).getReg();
+    if (!isARMLowRegister(BaseReg) || Entry.WideOpc != ARM::t2LDMIA)
+      return false;
+
+    // For the non-writeback version (this one), the base register must be
+    // one of the registers being loaded.
+    bool isOK = false;
+    for (unsigned i = 4; i < MI->getNumOperands(); ++i) {
+      if (MI->getOperand(i).getReg() == BaseReg) {
+        isOK = true;
+        break;
+      }
+    }
+
+    if (!isOK)
+      return false;
+
+    OpNum = 0;
+    isLdStMul = true;
+    break;
+  }
+  case ARM::t2LDMIA_RET: {
+    unsigned BaseReg = MI->getOperand(1).getReg();
+    if (BaseReg != ARM::SP)
+      return false;
+    Opc = Entry.NarrowOpc2; // tPOP_RET
+    OpNum = 2;
+    isLdStMul = true;
+    break;
+  }
+  case ARM::t2LDMIA_UPD:
+  case ARM::t2LDMDB_UPD:
+  case ARM::t2STMIA_UPD:
+  case ARM::t2STMDB_UPD: {
+    OpNum = 0;
+
+    unsigned BaseReg = MI->getOperand(1).getReg();
+    if (BaseReg == ARM::SP &&
+        (Entry.WideOpc == ARM::t2LDMIA_UPD ||
+         Entry.WideOpc == ARM::t2STMDB_UPD)) {
+      Opc = Entry.NarrowOpc2; // tPOP or tPUSH
+      OpNum = 2;
+    } else if (!isARMLowRegister(BaseReg) ||
+               (Entry.WideOpc != ARM::t2LDMIA_UPD &&
+                Entry.WideOpc != ARM::t2STMIA_UPD)) {
+      return false;
+    }
+
+    isLdStMul = true;
+    break;
+  }
+  }
+
+  unsigned OffsetReg = 0;
+  bool OffsetKill = false;
+  if (HasShift) {
+    OffsetReg  = MI->getOperand(2).getReg();
+    OffsetKill = MI->getOperand(2).isKill();
+
+    if (MI->getOperand(3).getImm())
+      // Thumb1 addressing mode doesn't support shift.
+      return false;
+  }
+
+  unsigned OffsetImm = 0;
+  if (HasImmOffset) {
+    OffsetImm = MI->getOperand(2).getImm();
+    unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
+
+    if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
+      // Make sure the immediate field fits.
+      return false;
+  }
+
+  // Add the 16-bit load / store instruction.
+  DebugLoc dl = MI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc));
+  if (!isLdStMul) {
+    MIB.addOperand(MI->getOperand(0));
+    MIB.addOperand(MI->getOperand(1));
+
+    if (HasImmOffset)
+      MIB.addImm(OffsetImm / Scale);
+
+    assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
+
+    if (HasOffReg)
+      MIB.addReg(OffsetReg, getKillRegState(OffsetKill));
+  }
+
+  // Transfer the rest of operands.
+  for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum)
+    MIB.addOperand(MI->getOperand(OpNum));
+
+  // Transfer memoperands.
+  (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+  // Transfer MI flags.
+  MIB.setMIFlags(MI->getFlags());
+
+  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
+
+  MBB.erase(MI);
+  ++NumLdSts;
+  return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
+                                const ReduceEntry &Entry,
+                                bool LiveCPSR) {
+  if (Entry.LowRegs1 && !VerifyLowRegs(MI))
+    return false;
+
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (TID.mayLoad() || TID.mayStore())
+    return ReduceLoadStore(MBB, MI, Entry);
+
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
+  default: break;
+  case ARM::t2ADDSri:
+  case ARM::t2ADDSrr: {
+    unsigned PredReg = 0;
+    if (getInstrPredicate(MI, PredReg) == ARMCC::AL) {
+      switch (Opc) {
+      default: break;
+      case ARM::t2ADDSri: {
+        if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR))
+          return true;
+        // fallthrough
+      }
+      case ARM::t2ADDSrr:
+        return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+      }
+    }
+    break;
+  }
+  case ARM::t2RSBri:
+  case ARM::t2RSBSri:
+    if (MI->getOperand(2).getImm() == 0)
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+    break;
+  case ARM::t2MOVi16:
+    // Can convert only 'pure' immediate operands, not immediates obtained as
+    // globals' addresses.
+    if (MI->getOperand(1).isImm())
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+    break;
+  case ARM::t2CMPrr: {
+    // Try to reduce to the lo-reg only version first. Why there are two
+    // versions of the instruction is a mystery.
+    // It would be nice to just have two entries in the master table that
+    // are prioritized, but the table assumes a unique entry for each
+    // source insn opcode. So for now, we hack a local entry record to use.
+    static const ReduceEntry NarrowEntry =
+      { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 1 };
+    if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR))
+      return true;
+    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+  }
+  case ARM::t2ADDrSPi: {
+    static const ReduceEntry NarrowEntry =
+      { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 1 };
+    if (MI->getOperand(0).getReg() == ARM::SP)
+      return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR);
+    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR);
+  }
+  }
+  return false;
+}
+
+bool
+Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
+                                const ReduceEntry &Entry,
+                                bool LiveCPSR) {
+
+  if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
+    return false;
+
+  unsigned Reg0 = MI->getOperand(0).getReg();
+  unsigned Reg1 = MI->getOperand(1).getReg();
+  if (Reg0 != Reg1) {
+    // Try to commute the operands to make it a 2-address instruction.
+    unsigned CommOpIdx1, CommOpIdx2;
+    if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
+        CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
+      return false;
+    MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+    if (!CommutedMI)
+      return false;
+  }
+  if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
+    return false;
+  if (Entry.Imm2Limit) {
+    unsigned Imm = MI->getOperand(2).getImm();
+    unsigned Limit = (1 << Entry.Imm2Limit) - 1;
+    if (Imm > Limit)
+      return false;
+  } else {
+    unsigned Reg2 = MI->getOperand(2).getReg();
+    if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
+      return false;
+  }
+
+  // Check if it's possible / necessary to transfer the predicate.
+  const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2);
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+  bool SkipPred = false;
+  if (Pred != ARMCC::AL) {
+    if (!NewTID.isPredicable())
+      // Can't transfer predicate, fail.
+      return false;
+  } else {
+    SkipPred = !NewTID.isPredicable();
+  }
+
+  bool HasCC = false;
+  bool CCDead = false;
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (TID.hasOptionalDef()) {
+    unsigned NumOps = TID.getNumOperands();
+    HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
+    if (HasCC && MI->getOperand(NumOps-1).isDead())
+      CCDead = true;
+  }
+  if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
+    return false;
+
+  // Add the 16-bit instruction.
+  DebugLoc dl = MI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
+  MIB.addOperand(MI->getOperand(0));
+  if (NewTID.hasOptionalDef()) {
+    if (HasCC)
+      AddDefaultT1CC(MIB, CCDead);
+    else
+      AddNoT1CC(MIB);
+  }
+
+  // Transfer the rest of operands.
+  unsigned NumOps = TID.getNumOperands();
+  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+    if (i < NumOps && TID.OpInfo[i].isOptionalDef())
+      continue;
+    if (SkipPred && TID.OpInfo[i].isPredicate())
+      continue;
+    MIB.addOperand(MI->getOperand(i));
+  }
+
+  // Transfer MI flags.
+  MIB.setMIFlags(MI->getFlags());
+
+  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
+
+  MBB.erase(MI);
+  ++Num2Addrs;
+  return true;
+}
+
+bool
+Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
+                                 const ReduceEntry &Entry,
+                                 bool LiveCPSR) {
+  if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
+    return false;
+
+  unsigned Limit = ~0U;
+  unsigned Scale = (Entry.WideOpc == ARM::t2ADDrSPi) ? 4 : 1;
+  if (Entry.Imm1Limit)
+    Limit = ((1 << Entry.Imm1Limit) - 1) * Scale;
+
+  const TargetInstrDesc &TID = MI->getDesc();
+  for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+    if (TID.OpInfo[i].isPredicate())
+      continue;
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg()) {
+      unsigned Reg = MO.getReg();
+      if (!Reg || Reg == ARM::CPSR)
+        continue;
+      if (Entry.WideOpc == ARM::t2ADDrSPi && Reg == ARM::SP)
+        continue;
+      if (Entry.LowRegs1 && !isARMLowRegister(Reg))
+        return false;
+    } else if (MO.isImm() &&
+               !TID.OpInfo[i].isPredicate()) {
+      if (((unsigned)MO.getImm()) > Limit || (MO.getImm() & (Scale-1)) != 0)
+        return false;
+    }
+  }
+
+  // Check if it's possible / necessary to transfer the predicate.
+  const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1);
+  unsigned PredReg = 0;
+  ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+  bool SkipPred = false;
+  if (Pred != ARMCC::AL) {
+    if (!NewTID.isPredicable())
+      // Can't transfer predicate, fail.
+      return false;
+  } else {
+    SkipPred = !NewTID.isPredicable();
+  }
+
+  bool HasCC = false;
+  bool CCDead = false;
+  if (TID.hasOptionalDef()) {
+    unsigned NumOps = TID.getNumOperands();
+    HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
+    if (HasCC && MI->getOperand(NumOps-1).isDead())
+      CCDead = true;
+  }
+  if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
+    return false;
+
+  // Add the 16-bit instruction.
+  DebugLoc dl = MI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
+  MIB.addOperand(MI->getOperand(0));
+  if (NewTID.hasOptionalDef()) {
+    if (HasCC)
+      AddDefaultT1CC(MIB, CCDead);
+    else
+      AddNoT1CC(MIB);
+  }
+
+  // Transfer the rest of operands.
+  unsigned NumOps = TID.getNumOperands();
+  for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
+    if (i < NumOps && TID.OpInfo[i].isOptionalDef())
+      continue;
+    if ((TID.getOpcode() == ARM::t2RSBSri ||
+         TID.getOpcode() == ARM::t2RSBri) && i == 2)
+      // Skip the zero immediate operand, it's now implicit.
+      continue;
+    bool isPred = (i < NumOps && TID.OpInfo[i].isPredicate());
+    if (SkipPred && isPred)
+        continue;
+    const MachineOperand &MO = MI->getOperand(i);
+    if (Scale > 1 && !isPred && MO.isImm())
+      MIB.addImm(MO.getImm() / Scale);
+    else {
+      if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
+        // Skip implicit def of CPSR. Either it's modeled as an optional
+        // def now or it's already an implicit def on the new instruction.
+        continue;
+      MIB.addOperand(MO);
+    }
+  }
+  if (!TID.isPredicable() && NewTID.isPredicable())
+    AddDefaultPred(MIB);
+
+  // Transfer MI flags.
+  MIB.setMIFlags(MI->getFlags());
+
+  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
+
+  MBB.erase(MI);
+  ++NumNarrows;
+  return true;
+}
+
+static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) {
+  bool HasDef = false;
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.isUndef() || MO.isUse())
+      continue;
+    if (MO.getReg() != ARM::CPSR)
+      continue;
+    if (!MO.isDead())
+      HasDef = true;
+  }
+
+  return HasDef || LiveCPSR;
+}
+
+static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if (!MO.isReg() || MO.isUndef() || MO.isDef())
+      continue;
+    if (MO.getReg() != ARM::CPSR)
+      continue;
+    assert(LiveCPSR && "CPSR liveness tracking is wrong!");
+    if (MO.isKill()) {
+      LiveCPSR = false;
+      break;
+    }
+  }
+
+  return LiveCPSR;
+}
+
+bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  // Yes, CPSR could be livein.
+  bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
+
+  MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
+  MachineBasicBlock::iterator NextMII;
+  for (; MII != E; MII = NextMII) {
+    NextMII = llvm::next(MII);
+
+    MachineInstr *MI = &*MII;
+    LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
+
+    unsigned Opcode = MI->getOpcode();
+    DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
+    if (OPI != ReduceOpcodeMap.end()) {
+      const ReduceEntry &Entry = ReduceTable[OPI->second];
+      // Ignore "special" cases for now.
+      if (Entry.Special) {
+        if (ReduceSpecial(MBB, MI, Entry, LiveCPSR)) {
+          Modified = true;
+          MachineBasicBlock::iterator I = prior(NextMII);
+          MI = &*I;
+        }
+        goto ProcessNext;
+      }
+
+      // Try to transform to a 16-bit two-address instruction.
+      if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) {
+        Modified = true;
+        MachineBasicBlock::iterator I = prior(NextMII);
+        MI = &*I;
+        goto ProcessNext;
+      }
+
+      // Try to transform to a 16-bit non-two-address instruction.
+      if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) {
+        Modified = true;
+        MachineBasicBlock::iterator I = prior(NextMII);
+        MI = &*I;
+      }
+    }
+
+  ProcessNext:
+    LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR);
+  }
+
+  return Modified;
+}
+
+bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
+  const TargetMachine &TM = MF.getTarget();
+  TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+
+  bool Modified = false;
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
+    Modified |= ReduceMBB(*I);
+  return Modified;
+}
+
+/// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
+/// reduction pass.
+FunctionPass *llvm::createThumb2SizeReductionPass() {
+  return new Thumb2SizeReduce();
+}
diff --git a/final/lib/Target/Alpha/Alpha.h b/final/lib/Target/Alpha/Alpha.h
new file mode 100644
index 00000000000..2c359dade29
--- /dev/null
+++ b/final/lib/Target/Alpha/Alpha.h
@@ -0,0 +1,53 @@
+//===-- Alpha.h - Top-level interface for Alpha representation --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Alpha back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_ALPHA_H
+#define TARGET_ALPHA_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  namespace Alpha {
+    // These describe LDAx
+
+    static const int IMM_LOW  = -32768;
+    static const int IMM_HIGH = 32767;
+    static const int IMM_MULT = 65536;
+  }
+
+  class AlphaTargetMachine;
+  class FunctionPass;
+  class formatted_raw_ostream;
+
+  FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM);
+  FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM);
+  FunctionPass *createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM,
+                                              JITCodeEmitter &JCE);
+  FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm);
+  FunctionPass *createAlphaBranchSelectionPass();
+
+  extern Target TheAlphaTarget;
+
+} // end namespace llvm;
+
+// Defines symbolic names for Alpha registers.  This defines a mapping from
+// register name to register number.
+//
+#include "AlphaGenRegisterNames.inc"
+
+// Defines symbolic names for the Alpha instructions.
+//
+#include "AlphaGenInstrNames.inc"
+
+#endif
diff --git a/final/lib/Target/Alpha/Alpha.td b/final/lib/Target/Alpha/Alpha.td
new file mode 100644
index 00000000000..4508eda897d
--- /dev/null
+++ b/final/lib/Target/Alpha/Alpha.td
@@ -0,0 +1,68 @@
+//===- Alpha.td - Describe the Alpha Target Machine --------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//Alpha is little endian
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features
+//===----------------------------------------------------------------------===//
+
+def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true",
+                                  "Enable CIX extentions">;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Schedule Description
+//===----------------------------------------------------------------------===//
+
+include "AlphaSchedule.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "AlphaInstrInfo.td"
+
+def AlphaInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Alpha Processor Definitions
+//===----------------------------------------------------------------------===//
+
+def : Processor<"generic", Alpha21264Itineraries, []>;
+def : Processor<"ev6"    , Alpha21264Itineraries, []>;
+def : Processor<"ev67"   , Alpha21264Itineraries, [FeatureCIX]>;
+
+//===----------------------------------------------------------------------===//
+// The Alpha Target
+//===----------------------------------------------------------------------===//
+
+
+def Alpha : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = AlphaInstrInfo;
+}
diff --git a/final/lib/Target/Alpha/AlphaAsmPrinter.cpp b/final/lib/Target/Alpha/AlphaAsmPrinter.cpp
new file mode 100644
index 00000000000..46ae286895a
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaAsmPrinter.cpp
@@ -0,0 +1,166 @@
+//===-- AlphaAsmPrinter.cpp - Alpha LLVM assembly writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format Alpha assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  struct AlphaAsmPrinter : public AsmPrinter {
+    /// Unique incrementer for label values for referencing Global values.
+    ///
+
+    explicit AlphaAsmPrinter(TargetMachine &tm, MCStreamer &Streamer)
+      : AsmPrinter(tm, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "Alpha Assembly Printer";
+    }
+    void printInstruction(const MachineInstr *MI, raw_ostream &O);
+    void EmitInstruction(const MachineInstr *MI) {
+      SmallString<128> Str;
+      raw_svector_ostream OS(Str);
+      printInstruction(MI, OS);
+      OutStreamer.EmitRawText(OS.str());
+    }
+    static const char *getRegisterName(unsigned RegNo);
+
+    void printOp(const MachineOperand &MO, raw_ostream &O);
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    virtual void EmitFunctionBodyStart();
+    virtual void EmitFunctionBodyEnd(); 
+    void EmitStartOfAsmFile(Module &M);
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI,
+                               unsigned OpNo, unsigned AsmVariant,
+                               const char *ExtraCode, raw_ostream &O);
+  };
+} // end of anonymous namespace
+
+#include "AlphaGenAsmWriter.inc"
+
+void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                   raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  if (MO.isReg()) {
+    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+           "Not physreg??");
+    O << getRegisterName(MO.getReg());
+  } else if (MO.isImm()) {
+    O << MO.getImm();
+    assert(MO.getImm() < (1 << 30));
+  } else {
+    printOp(MO, O);
+  }
+}
+
+
+void AlphaAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << getRegisterName(MO.getReg());
+    return;
+
+  case MachineOperand::MO_Immediate:
+    assert(0 && "printOp() does not handle immediate values");
+    return;
+
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+      << MO.getIndex();
+    return;
+
+  case MachineOperand::MO_ExternalSymbol:
+    O << MO.getSymbolName();
+    return;
+
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    return;
+
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    return;
+
+  default:
+    O << "<unknown operand type: " << MO.getType() << ">";
+    return;
+  }
+}
+
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void AlphaAsmPrinter::EmitFunctionBodyStart() {
+  OutStreamer.EmitRawText("\t.ent " + Twine(CurrentFnSym->getName()));
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void AlphaAsmPrinter::EmitFunctionBodyEnd() {
+  OutStreamer.EmitRawText("\t.end " + Twine(CurrentFnSym->getName()));
+}
+
+void AlphaAsmPrinter::EmitStartOfAsmFile(Module &M) {
+  OutStreamer.EmitRawText(StringRef("\t.arch ev6"));
+  OutStreamer.EmitRawText(StringRef("\t.set noat"));
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                      unsigned AsmVariant,
+                                      const char *ExtraCode, raw_ostream &O) {
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                            unsigned OpNo, unsigned AsmVariant,
+                                            const char *ExtraCode,
+                                            raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+  O << "0(";
+  printOperand(MI, OpNo, O);
+  O << ")";
+  return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeAlphaAsmPrinter() { 
+  RegisterAsmPrinter<AlphaAsmPrinter> X(TheAlphaTarget);
+}
diff --git a/final/lib/Target/Alpha/AlphaBranchSelector.cpp b/final/lib/Target/Alpha/AlphaBranchSelector.cpp
new file mode 100644
index 00000000000..37681170953
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaBranchSelector.cpp
@@ -0,0 +1,66 @@
+//===-- AlphaBranchSelector.cpp - Convert Pseudo branchs ----------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Replace Pseudo COND_BRANCH_* with their appropriate real branch
+// Simplified version of the PPC Branch Selector
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/MC/MCAsmInfo.h"
+using namespace llvm;
+
+namespace {
+  struct AlphaBSel : public MachineFunctionPass {
+    static char ID;
+    AlphaBSel() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "Alpha Branch Selection";
+    }
+  };
+  char AlphaBSel::ID = 0;
+}
+
+/// createAlphaBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createAlphaBranchSelectionPass() {
+  return new AlphaBSel();
+}
+
+bool AlphaBSel::runOnMachineFunction(MachineFunction &Fn) {
+
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock *MBB = MFI;
+    
+    for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+         MBBI != EE; ++MBBI) {
+      if (MBBI->getOpcode() == Alpha::COND_BRANCH_I ||
+          MBBI->getOpcode() == Alpha::COND_BRANCH_F) {
+        
+        // condbranch operands:
+        // 0. bc opcode
+        // 1. reg
+        // 2. target MBB
+        const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
+        MBBI->setDesc(TII->get(MBBI->getOperand(0).getImm()));
+      }
+    }
+  }
+  
+  return true;
+}
+
diff --git a/final/lib/Target/Alpha/AlphaCallingConv.td b/final/lib/Target/Alpha/AlphaCallingConv.td
new file mode 100644
index 00000000000..bde8819f46e
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaCallingConv.td
@@ -0,0 +1,38 @@
+//===- AlphaCallingConv.td - Calling Conventions for Alpha -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for Alpha architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Alpha Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_Alpha : CallingConv<[
+  // i64 is returned in register R0
+  // R1 is an llvm extension, I don't know what gcc does
+  CCIfType<[i64], CCAssignToReg<[R0,R1]>>,
+
+  // f32 / f64 are returned in F0/F1
+  CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Alpha Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_Alpha : CallingConv<[
+  // The first 6 arguments are passed in registers, whether integer or
+  // floating-point
+  CCIfType<[i64], CCAssignToRegWithShadow<[R16, R17, R18, R19, R20, R21],
+                                          [F16, F17, F18, F19, F20, F21]>>,
+
+  CCIfType<[f32, f64], CCAssignToRegWithShadow<[F16, F17, F18, F19, F20, F21],
+                                               [R16, R17, R18, R19, R20, R21]>>,
+
+  // Stack slots are 8 bytes in size and 8-byte aligned.
+  CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
diff --git a/final/lib/Target/Alpha/AlphaFrameLowering.cpp b/final/lib/Target/Alpha/AlphaFrameLowering.cpp
new file mode 100644
index 00000000000..690cd1da9c1
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaFrameLowering.cpp
@@ -0,0 +1,143 @@
+//=====- AlphaFrameLowering.cpp - Alpha Frame Information ------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaFrameLowering.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/Twine.h"
+
+using namespace llvm;
+
+static long getUpper16(long l) {
+  long y = l / Alpha::IMM_MULT;
+  if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH)
+    ++y;
+  return y;
+}
+
+static long getLower16(long l) {
+  long h = getUpper16(l);
+  return l - h * Alpha::IMM_MULT;
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+//
+bool AlphaFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return MFI->hasVarSizedObjects();
+}
+
+void AlphaFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc());
+  bool FP = hasFP(MF);
+
+  // Handle GOP offset
+  BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29)
+    .addGlobalAddress(MF.getFunction()).addReg(Alpha::R27).addImm(++curgpdist);
+  BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29)
+    .addGlobalAddress(MF.getFunction()).addReg(Alpha::R29).addImm(curgpdist);
+
+  BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT))
+    .addGlobalAddress(MF.getFunction());
+
+  // Get the number of bytes to allocate from the FrameInfo
+  long NumBytes = MFI->getStackSize();
+
+  if (FP)
+    NumBytes += 8; //reserve space for the old FP
+
+  // Do we need to allocate space on the stack?
+  if (NumBytes == 0) return;
+
+  unsigned Align = getStackAlignment();
+  NumBytes = (NumBytes+Align-1)/Align*Align;
+
+  // Update frame info to pretend that this is part of the stack...
+  MFI->setStackSize(NumBytes);
+
+  // adjust stack pointer: r30 -= numbytes
+  NumBytes = -NumBytes;
+  if (NumBytes >= Alpha::IMM_LOW) {
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+      .addReg(Alpha::R30);
+  } else if (getUpper16(NumBytes) >= Alpha::IMM_LOW) {
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
+      .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
+      .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+  } else {
+    report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
+  }
+
+  // Now if we need to, save the old FP and set the new
+  if (FP) {
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ))
+      .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30);
+    // This must be the last instr in the prolog
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15)
+      .addReg(Alpha::R30).addReg(Alpha::R30);
+  }
+
+}
+
+void AlphaFrameLowering::emitEpilogue(MachineFunction &MF,
+                                  MachineBasicBlock &MBB) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  assert((MBBI->getOpcode() == Alpha::RETDAG ||
+          MBBI->getOpcode() == Alpha::RETDAGp)
+         && "Can only insert epilog into returning blocks");
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  bool FP = hasFP(MF);
+
+  // Get the number of bytes allocated from the FrameInfo...
+  long NumBytes = MFI->getStackSize();
+
+  //now if we need to, restore the old FP
+  if (FP) {
+    //copy the FP into the SP (discards allocas)
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15)
+      .addReg(Alpha::R15);
+    //restore the FP
+    BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDQ), Alpha::R15)
+      .addImm(0).addReg(Alpha::R15);
+  }
+
+  if (NumBytes != 0) {
+    if (NumBytes <= Alpha::IMM_HIGH) {
+      BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes)
+        .addReg(Alpha::R30);
+    } else if (getUpper16(NumBytes) <= Alpha::IMM_HIGH) {
+      BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30)
+        .addImm(getUpper16(NumBytes)).addReg(Alpha::R30);
+      BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30)
+        .addImm(getLower16(NumBytes)).addReg(Alpha::R30);
+    } else {
+      report_fatal_error("Too big a stack frame at " + Twine(NumBytes));
+    }
+  }
+}
diff --git a/final/lib/Target/Alpha/AlphaFrameLowering.h b/final/lib/Target/Alpha/AlphaFrameLowering.h
new file mode 100644
index 00000000000..ebd9e1bac19
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaFrameLowering.h
@@ -0,0 +1,43 @@
+//==-- AlphaFrameLowering.h - Define frame lowering for Alpha --*- C++ -*---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_FRAMEINFO_H
+#define ALPHA_FRAMEINFO_H
+
+#include "Alpha.h"
+#include "AlphaSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class AlphaSubtarget;
+
+class AlphaFrameLowering : public TargetFrameLowering {
+  const AlphaSubtarget &STI;
+  // FIXME: This should end in MachineFunctionInfo, not here!
+  mutable int curgpdist;
+public:
+  explicit AlphaFrameLowering(const AlphaSubtarget &sti)
+    : TargetFrameLowering(StackGrowsDown, 16, 0), STI(sti), curgpdist(0) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/final/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
new file mode 100644
index 00000000000..7b91fea54af
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -0,0 +1,426 @@
+//===-- AlphaISelDAGToDAG.cpp - Alpha pattern matching inst selector ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for Alpha,
+// converting from a legalized dag to a Alpha dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+
+  //===--------------------------------------------------------------------===//
+  /// AlphaDAGToDAGISel - Alpha specific code to select Alpha machine
+  /// instructions for SelectionDAG operations.
+  class AlphaDAGToDAGISel : public SelectionDAGISel {
+    static const int64_t IMM_LOW  = -32768;
+    static const int64_t IMM_HIGH = 32767;
+    static const int64_t IMM_MULT = 65536;
+    static const int64_t IMM_FULLHIGH = IMM_HIGH + IMM_HIGH * IMM_MULT;
+    static const int64_t IMM_FULLLOW = IMM_LOW + IMM_LOW  * IMM_MULT;
+
+    static int64_t get_ldah16(int64_t x) {
+      int64_t y = x / IMM_MULT;
+      if (x % IMM_MULT > IMM_HIGH)
+        ++y;
+      return y;
+    }
+
+    static int64_t get_lda16(int64_t x) {
+      return x - get_ldah16(x) * IMM_MULT;
+    }
+
+    /// get_zapImm - Return a zap mask if X is a valid immediate for a zapnot
+    /// instruction (if not, return 0).  Note that this code accepts partial
+    /// zap masks.  For example (and LHS, 1) is a valid zap, as long we know
+    /// that the bits 1-7 of LHS are already zero.  If LHS is non-null, we are
+    /// in checking mode.  If LHS is null, we assume that the mask has already
+    /// been validated before.
+    uint64_t get_zapImm(SDValue LHS, uint64_t Constant) const {
+      uint64_t BitsToCheck = 0;
+      unsigned Result = 0;
+      for (unsigned i = 0; i != 8; ++i) {
+        if (((Constant >> 8*i) & 0xFF) == 0) {
+          // nothing to do.
+        } else {
+          Result |= 1 << i;
+          if (((Constant >> 8*i) & 0xFF) == 0xFF) {
+            // If the entire byte is set, zapnot the byte.
+          } else if (LHS.getNode() == 0) {
+            // Otherwise, if the mask was previously validated, we know its okay
+            // to zapnot this entire byte even though all the bits aren't set.
+          } else {
+            // Otherwise we don't know that the it's okay to zapnot this entire
+            // byte.  Only do this iff we can prove that the missing bits are
+            // already null, so the bytezap doesn't need to really null them.
+            BitsToCheck |= ~Constant & (0xFF << 8*i);
+          }
+        }
+      }
+      
+      // If there are missing bits in a byte (for example, X & 0xEF00), check to
+      // see if the missing bits (0x1000) are already known zero if not, the zap
+      // isn't okay to do, as it won't clear all the required bits.
+      if (BitsToCheck &&
+          !CurDAG->MaskedValueIsZero(LHS,
+                                     APInt(LHS.getValueSizeInBits(),
+                                           BitsToCheck)))
+        return 0;
+      
+      return Result;
+    }
+    
+    static uint64_t get_zapImm(uint64_t x) {
+      unsigned build = 0;
+      for(int i = 0; i != 8; ++i) {
+        if ((x & 0x00FF) == 0x00FF)
+          build |= 1 << i;
+        else if ((x & 0x00FF) != 0)
+          return 0;
+        x >>= 8;
+      }
+      return build;
+    }
+      
+    
+    static uint64_t getNearPower2(uint64_t x) {
+      if (!x) return 0;
+      unsigned at = CountLeadingZeros_64(x);
+      uint64_t complow = 1ULL << (63 - at);
+      uint64_t comphigh = 1ULL << (64 - at);
+      //cerr << x << ":" << complow << ":" << comphigh << "\n";
+      if (abs64(complow - x) <= abs64(comphigh - x))
+        return complow;
+      else
+        return comphigh;
+    }
+
+    static bool chkRemNearPower2(uint64_t x, uint64_t r, bool swap) {
+      uint64_t y = getNearPower2(x);
+      if (swap)
+        return (y - x) == r;
+      else
+        return (x - y) == r;
+    }
+
+  public:
+    explicit AlphaDAGToDAGISel(AlphaTargetMachine &TM)
+      : SelectionDAGISel(TM)
+    {}
+
+    /// getI64Imm - Return a target constant with the specified value, of type
+    /// i64.
+    inline SDValue getI64Imm(int64_t Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i64);
+    }
+
+    // Select - Convert the specified operand from a target-independent to a
+    // target-specific node if it hasn't already been changed.
+    SDNode *Select(SDNode *N);
+    
+    virtual const char *getPassName() const {
+      return "Alpha DAG->DAG Pattern Instruction Selection";
+    } 
+
+    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+    /// inline asm expressions.
+    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                              char ConstraintCode,
+                                              std::vector<SDValue> &OutOps) {
+      SDValue Op0;
+      switch (ConstraintCode) {
+      default: return true;
+      case 'm':   // memory
+        Op0 = Op;
+        break;
+      }
+      
+      OutOps.push_back(Op0);
+      return false;
+    }
+    
+// Include the pieces autogenerated from the target description.
+#include "AlphaGenDAGISel.inc"
+    
+private:
+    /// getTargetMachine - Return a reference to the TargetMachine, casted
+    /// to the target-specific type.
+    const AlphaTargetMachine &getTargetMachine() {
+      return static_cast<const AlphaTargetMachine &>(TM);
+    }
+
+    /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+    /// to the target-specific type.
+    const AlphaInstrInfo *getInstrInfo() {
+      return getTargetMachine().getInstrInfo();
+    }
+
+    SDNode *getGlobalBaseReg();
+    SDNode *getGlobalRetAddr();
+    void SelectCALL(SDNode *Op);
+
+  };
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+///
+SDNode *AlphaDAGToDAGISel::getGlobalBaseReg() {
+  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// getGlobalRetAddr - Grab the return address.
+///
+SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() {
+  unsigned GlobalRetAddr = getInstrInfo()->getGlobalRetAddr(MF);
+  return CurDAG->getRegister(GlobalRetAddr, TLI.getPointerTy()).getNode();
+}
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *AlphaDAGToDAGISel::Select(SDNode *N) {
+  if (N->isMachineOpcode())
+    return NULL;   // Already selected.
+  DebugLoc dl = N->getDebugLoc();
+
+  switch (N->getOpcode()) {
+  default: break;
+  case AlphaISD::CALL:
+    SelectCALL(N);
+    return NULL;
+
+  case ISD::FrameIndex: {
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    return CurDAG->SelectNodeTo(N, Alpha::LDA, MVT::i64,
+                                CurDAG->getTargetFrameIndex(FI, MVT::i32),
+                                getI64Imm(0));
+  }
+  case ISD::GLOBAL_OFFSET_TABLE:
+    return getGlobalBaseReg();
+  case AlphaISD::GlobalRetAddr:
+    return getGlobalRetAddr();
+  
+  case AlphaISD::DivCall: {
+    SDValue Chain = CurDAG->getEntryNode();
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
+    Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R24, N1, 
+                                 SDValue(0,0));
+    Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R25, N2, 
+                                 Chain.getValue(1));
+    Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, N0, 
+                                 Chain.getValue(1));
+    SDNode *CNode =
+      CurDAG->getMachineNode(Alpha::JSRs, dl, MVT::Other, MVT::Glue, 
+                             Chain, Chain.getValue(1));
+    Chain = CurDAG->getCopyFromReg(Chain, dl, Alpha::R27, MVT::i64, 
+                                   SDValue(CNode, 1));
+    return CurDAG->SelectNodeTo(N, Alpha::BISr, MVT::i64, Chain, Chain);
+  }
+
+  case ISD::READCYCLECOUNTER: {
+    SDValue Chain = N->getOperand(0);
+    return CurDAG->getMachineNode(Alpha::RPCC, dl, MVT::i64, MVT::Other,
+                                  Chain);
+  }
+
+  case ISD::Constant: {
+    uint64_t uval = cast<ConstantSDNode>(N)->getZExtValue();
+    
+    if (uval == 0) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                                Alpha::R31, MVT::i64);
+      ReplaceUses(SDValue(N, 0), Result);
+      return NULL;
+    }
+
+    int64_t val = (int64_t)uval;
+    int32_t val32 = (int32_t)val;
+    if (val <= IMM_HIGH + IMM_HIGH * IMM_MULT &&
+        val >= IMM_LOW  + IMM_LOW  * IMM_MULT)
+      break; //(LDAH (LDA))
+    if ((uval >> 32) == 0 && //empty upper bits
+        val32 <= IMM_HIGH + IMM_HIGH * IMM_MULT)
+      // val32 >= IMM_LOW  + IMM_LOW  * IMM_MULT) //always true
+      break; //(zext (LDAH (LDA)))
+    //Else use the constant pool
+    ConstantInt *C = ConstantInt::get(
+                                Type::getInt64Ty(*CurDAG->getContext()), uval);
+    SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64);
+    SDNode *Tmp = CurDAG->getMachineNode(Alpha::LDAHr, dl, MVT::i64, CPI,
+                                         SDValue(getGlobalBaseReg(), 0));
+    return CurDAG->SelectNodeTo(N, Alpha::LDQr, MVT::i64, MVT::Other, 
+                                CPI, SDValue(Tmp, 0), CurDAG->getEntryNode());
+  }
+  case ISD::TargetConstantFP:
+  case ISD::ConstantFP: {
+    ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
+    bool isDouble = N->getValueType(0) == MVT::f64;
+    EVT T = isDouble ? MVT::f64 : MVT::f32;
+    if (CN->getValueAPF().isPosZero()) {
+      return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYST : Alpha::CPYSS,
+                                  T, CurDAG->getRegister(Alpha::F31, T),
+                                  CurDAG->getRegister(Alpha::F31, T));
+    } else if (CN->getValueAPF().isNegZero()) {
+      return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYSNT : Alpha::CPYSNS,
+                                  T, CurDAG->getRegister(Alpha::F31, T),
+                                  CurDAG->getRegister(Alpha::F31, T));
+    } else {
+      report_fatal_error("Unhandled FP constant type");
+    }
+    break;
+  }
+
+  case ISD::SETCC:
+    if (N->getOperand(0).getNode()->getValueType(0).isFloatingPoint()) {
+      ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+      unsigned Opc = Alpha::WTF;
+      bool rev = false;
+      bool inv = false;
+      switch(CC) {
+      default: DEBUG(N->dump(CurDAG)); llvm_unreachable("Unknown FP comparison!");
+      case ISD::SETEQ: case ISD::SETOEQ: case ISD::SETUEQ:
+        Opc = Alpha::CMPTEQ; break;
+      case ISD::SETLT: case ISD::SETOLT: case ISD::SETULT: 
+        Opc = Alpha::CMPTLT; break;
+      case ISD::SETLE: case ISD::SETOLE: case ISD::SETULE: 
+        Opc = Alpha::CMPTLE; break;
+      case ISD::SETGT: case ISD::SETOGT: case ISD::SETUGT: 
+        Opc = Alpha::CMPTLT; rev = true; break;
+      case ISD::SETGE: case ISD::SETOGE: case ISD::SETUGE: 
+        Opc = Alpha::CMPTLE; rev = true; break;
+      case ISD::SETNE: case ISD::SETONE: case ISD::SETUNE:
+        Opc = Alpha::CMPTEQ; inv = true; break;
+      case ISD::SETO:
+        Opc = Alpha::CMPTUN; inv = true; break;
+      case ISD::SETUO:
+        Opc = Alpha::CMPTUN; break;
+      };
+      SDValue tmp1 = N->getOperand(rev?1:0);
+      SDValue tmp2 = N->getOperand(rev?0:1);
+      SDNode *cmp = CurDAG->getMachineNode(Opc, dl, MVT::f64, tmp1, tmp2);
+      if (inv) 
+        cmp = CurDAG->getMachineNode(Alpha::CMPTEQ, dl, 
+                                     MVT::f64, SDValue(cmp, 0), 
+                                     CurDAG->getRegister(Alpha::F31, MVT::f64));
+      switch(CC) {
+      case ISD::SETUEQ: case ISD::SETULT: case ISD::SETULE:
+      case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE:
+       {
+         SDNode* cmp2 = CurDAG->getMachineNode(Alpha::CMPTUN, dl, MVT::f64,
+                                               tmp1, tmp2);
+         cmp = CurDAG->getMachineNode(Alpha::ADDT, dl, MVT::f64, 
+                                      SDValue(cmp2, 0), SDValue(cmp, 0));
+         break;
+       }
+      default: break;
+      }
+
+      SDNode* LD = CurDAG->getMachineNode(Alpha::FTOIT, dl,
+                                          MVT::i64, SDValue(cmp, 0));
+      return CurDAG->getMachineNode(Alpha::CMPULT, dl, MVT::i64, 
+                                    CurDAG->getRegister(Alpha::R31, MVT::i64),
+                                    SDValue(LD,0));
+    }
+    break;
+
+  case ISD::AND: {
+    ConstantSDNode* SC = NULL;
+    ConstantSDNode* MC = NULL;
+    if (N->getOperand(0).getOpcode() == ISD::SRL &&
+        (MC = dyn_cast<ConstantSDNode>(N->getOperand(1))) &&
+        (SC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1)))) {
+      uint64_t sval = SC->getZExtValue();
+      uint64_t mval = MC->getZExtValue();
+      // If the result is a zap, let the autogened stuff handle it.
+      if (get_zapImm(N->getOperand(0), mval))
+        break;
+      // given mask X, and shift S, we want to see if there is any zap in the
+      // mask if we play around with the botton S bits
+      uint64_t dontcare = (~0ULL) >> (64 - sval);
+      uint64_t mask = mval << sval;
+      
+      if (get_zapImm(mask | dontcare))
+        mask = mask | dontcare;
+      
+      if (get_zapImm(mask)) {
+        SDValue Z = 
+          SDValue(CurDAG->getMachineNode(Alpha::ZAPNOTi, dl, MVT::i64,
+                                         N->getOperand(0).getOperand(0),
+                                         getI64Imm(get_zapImm(mask))), 0);
+        return CurDAG->getMachineNode(Alpha::SRLr, dl, MVT::i64, Z, 
+                                      getI64Imm(sval));
+      }
+    }
+    break;
+  }
+
+  }
+
+  return SelectCode(N);
+}
+
+void AlphaDAGToDAGISel::SelectCALL(SDNode *N) {
+  //TODO: add flag stuff to prevent nondeturministic breakage!
+
+  SDValue Chain = N->getOperand(0);
+  SDValue Addr = N->getOperand(1);
+  SDValue InFlag = N->getOperand(N->getNumOperands() - 1);
+  DebugLoc dl = N->getDebugLoc();
+
+   if (Addr.getOpcode() == AlphaISD::GPRelLo) {
+     SDValue GOT = SDValue(getGlobalBaseReg(), 0);
+     Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R29, GOT, InFlag);
+     InFlag = Chain.getValue(1);
+     Chain = SDValue(CurDAG->getMachineNode(Alpha::BSR, dl, MVT::Other, 
+                                            MVT::Glue, Addr.getOperand(0),
+                                            Chain, InFlag), 0);
+   } else {
+     Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, Addr, InFlag);
+     InFlag = Chain.getValue(1);
+     Chain = SDValue(CurDAG->getMachineNode(Alpha::JSR, dl, MVT::Other,
+                                            MVT::Glue, Chain, InFlag), 0);
+   }
+   InFlag = Chain.getValue(1);
+
+  ReplaceUses(SDValue(N, 0), Chain);
+  ReplaceUses(SDValue(N, 1), InFlag);
+}
+
+
+/// createAlphaISelDag - This pass converts a legalized DAG into a 
+/// Alpha-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createAlphaISelDag(AlphaTargetMachine &TM) {
+  return new AlphaDAGToDAGISel(TM);
+}
diff --git a/final/lib/Target/Alpha/AlphaISelLowering.cpp b/final/lib/Target/Alpha/AlphaISelLowering.cpp
new file mode 100644
index 00000000000..c4f43ab9e4e
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -0,0 +1,973 @@
+//===-- AlphaISelLowering.cpp - Alpha DAG Lowering Implementation ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AlphaISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaISelLowering.h"
+#include "AlphaTargetMachine.h"
+#include "AlphaMachineFunctionInfo.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Type.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// AddLiveIn - This helper function adds the specified physical register to the
+/// MachineFunction as a live in value.  It also creates a corresponding virtual
+/// register for it.
+static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
+                          TargetRegisterClass *RC) {
+  assert(RC->contains(PReg) && "Not the correct regclass!");
+  unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+  MF.getRegInfo().addLiveIn(PReg, VReg);
+  return VReg;
+}
+
+AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
+  : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+  // Set up the TargetLowering object.
+  //I am having problems with shr n i8 1
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass);
+  addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass);
+  addRegisterClass(MVT::f32, Alpha::F4RCRegisterClass);
+
+  // We want to custom lower some of our intrinsics.
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+  setLoadExtAction(ISD::EXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
+
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i8,  Expand);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+  //  setOperationAction(ISD::BRIND,        MVT::Other,   Expand);
+  setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
+  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  setOperationAction(ISD::FREM, MVT::f32, Expand);
+  setOperationAction(ISD::FREM, MVT::f64, Expand);
+
+  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+
+  if (!TM.getSubtarget<AlphaSubtarget>().hasCT()) {
+    setOperationAction(ISD::CTPOP    , MVT::i64  , Expand);
+    setOperationAction(ISD::CTTZ     , MVT::i64  , Expand);
+    setOperationAction(ISD::CTLZ     , MVT::i64  , Expand);
+  }
+  setOperationAction(ISD::BSWAP    , MVT::i64, Expand);
+  setOperationAction(ISD::ROTL     , MVT::i64, Expand);
+  setOperationAction(ISD::ROTR     , MVT::i64, Expand);
+
+  setOperationAction(ISD::SREM     , MVT::i64, Custom);
+  setOperationAction(ISD::UREM     , MVT::i64, Custom);
+  setOperationAction(ISD::SDIV     , MVT::i64, Custom);
+  setOperationAction(ISD::UDIV     , MVT::i64, Custom);
+
+  setOperationAction(ISD::ADDC     , MVT::i64, Expand);
+  setOperationAction(ISD::ADDE     , MVT::i64, Expand);
+  setOperationAction(ISD::SUBC     , MVT::i64, Expand);
+  setOperationAction(ISD::SUBE     , MVT::i64, Expand);
+
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+
+  setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
+  setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
+  setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
+
+  // We don't support sin/cos/sqrt/pow
+  setOperationAction(ISD::FSIN , MVT::f64, Expand);
+  setOperationAction(ISD::FCOS , MVT::f64, Expand);
+  setOperationAction(ISD::FSIN , MVT::f32, Expand);
+  setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+  setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+  setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+  setOperationAction(ISD::FPOW , MVT::f32, Expand);
+  setOperationAction(ISD::FPOW , MVT::f64, Expand);
+
+  setOperationAction(ISD::SETCC, MVT::f32, Promote);
+
+  setOperationAction(ISD::BITCAST, MVT::f32, Promote);
+
+  setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+  // Not implemented yet.
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+
+  // We want to legalize GlobalAddress and ConstantPool and
+  // ExternalSymbols nodes into the appropriate instructions to
+  // materialize the address.
+  setOperationAction(ISD::GlobalAddress,  MVT::i64, Custom);
+  setOperationAction(ISD::ConstantPool,   MVT::i64, Custom);
+  setOperationAction(ISD::ExternalSymbol, MVT::i64, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  setOperationAction(ISD::VAEND,   MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY,  MVT::Other, Custom);
+  setOperationAction(ISD::VAARG,   MVT::Other, Custom);
+  setOperationAction(ISD::VAARG,   MVT::i32,   Custom);
+
+  setOperationAction(ISD::JumpTable, MVT::i64, Custom);
+  setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+
+  setStackPointerRegisterToSaveRestore(Alpha::R30);
+
+  setJumpBufSize(272);
+  setJumpBufAlignment(16);
+
+  computeRegisterProperties();
+}
+
+MVT::SimpleValueType AlphaTargetLowering::getSetCCResultType(EVT VT) const {
+  return MVT::i64;
+}
+
+const char *AlphaTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return 0;
+  case AlphaISD::CVTQT_: return "Alpha::CVTQT_";
+  case AlphaISD::CVTQS_: return "Alpha::CVTQS_";
+  case AlphaISD::CVTTQ_: return "Alpha::CVTTQ_";
+  case AlphaISD::GPRelHi: return "Alpha::GPRelHi";
+  case AlphaISD::GPRelLo: return "Alpha::GPRelLo";
+  case AlphaISD::RelLit: return "Alpha::RelLit";
+  case AlphaISD::GlobalRetAddr: return "Alpha::GlobalRetAddr";
+  case AlphaISD::CALL:   return "Alpha::CALL";
+  case AlphaISD::DivCall: return "Alpha::DivCall";
+  case AlphaISD::RET_FLAG: return "Alpha::RET_FLAG";
+  case AlphaISD::COND_BRANCH_I: return "Alpha::COND_BRANCH_I";
+  case AlphaISD::COND_BRANCH_F: return "Alpha::COND_BRANCH_F";
+  }
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned AlphaTargetLowering::getFunctionAlignment(const Function *F) const {
+  return 4;
+}
+
+static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+  EVT PtrVT = Op.getValueType();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+  // FIXME there isn't really any debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  SDValue Hi = DAG.getNode(AlphaISD::GPRelHi,  dl, MVT::i64, JTI,
+                             DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+  SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, JTI, Hi);
+  return Lo;
+}
+
+//http://www.cs.arizona.edu/computer.help/policy/DIGITAL_unix/
+//AA-PY8AC-TET1_html/callCH3.html#BLOCK21
+
+//For now, just use variable size stack frame format
+
+//In a standard call, the first six items are passed in registers $16
+//- $21 and/or registers $f16 - $f21. (See Section 4.1.2 for details
+//of argument-to-register correspondence.) The remaining items are
+//collected in a memory argument list that is a naturally aligned
+//array of quadwords. In a standard call, this list, if present, must
+//be passed at 0(SP).
+//7 ... n         0(SP) ... (n-7)*8(SP)
+
+// //#define FP    $15
+// //#define RA    $26
+// //#define PV    $27
+// //#define GP    $29
+// //#define SP    $30
+
+#include "AlphaGenCallingConv.inc"
+
+SDValue
+AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               bool &isTailCall,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               DebugLoc dl, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) const {
+  // Alpha target does not yet support tail call optimization.
+  isTailCall = false;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallOperands(Outs, CC_Alpha);
+
+    // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes,
+                                                      getPointerTy(), true));
+
+  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+  SmallVector<SDValue, 12> MemOpChains;
+  SDValue StackPtr;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    SDValue Arg = OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+      default: assert(0 && "Unknown loc info!");
+      case CCValAssign::Full: break;
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::AExt:
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+    }
+
+    // Arguments that can be passed on register must be kept at RegsToPass
+    // vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      assert(VA.isMemLoc());
+
+      if (StackPtr.getNode() == 0)
+        StackPtr = DAG.getCopyFromReg(Chain, dl, Alpha::R30, MVT::i64);
+
+      SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   StackPtr,
+                                   DAG.getIntPtrConstant(VA.getLocMemOffset()));
+
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         MachinePointerInfo(),false, false, 0));
+    }
+  }
+
+  // Transform all store nodes into one single node because all store nodes are
+  // independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain and
+  // flag operands which copy the outgoing args into registers.  The InFlag in
+  // necessary since all emited instructions must be stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain = DAG.getNode(AlphaISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getConstant(NumBytes, getPointerTy(), true),
+                             DAG.getConstant(0, getPointerTy(), true),
+                             InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+AlphaTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                     CallingConv::ID CallConv, bool isVarArg,
+                                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                                     DebugLoc dl, SelectionDAG &DAG,
+                                     SmallVectorImpl<SDValue> &InVals) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
+                 *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_Alpha);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+
+    Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+                               VA.getLocVT(), InFlag).getValue(1);
+    SDValue RetValue = Chain.getValue(0);
+    InFlag = Chain.getValue(2);
+
+    // If this is an 8/16/32-bit value, it is really passed promoted to 64
+    // bits. Insert an assert[sz]ext to capture this, then truncate to the
+    // right size.
+    if (VA.getLocInfo() == CCValAssign::SExt)
+      RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue,
+                             DAG.getValueType(VA.getValVT()));
+    else if (VA.getLocInfo() == CCValAssign::ZExt)
+      RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue,
+                             DAG.getValueType(VA.getValVT()));
+
+    if (VA.getLocInfo() != CCValAssign::Full)
+      RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue);
+
+    InVals.push_back(RetValue);
+  }
+
+  return Chain;
+}
+
+SDValue
+AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
+                                          CallingConv::ID CallConv, bool isVarArg,
+                                          const SmallVectorImpl<ISD::InputArg>
+                                            &Ins,
+                                          DebugLoc dl, SelectionDAG &DAG,
+                                          SmallVectorImpl<SDValue> &InVals)
+                                            const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>();
+
+  unsigned args_int[] = {
+    Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21};
+  unsigned args_float[] = {
+    Alpha::F16, Alpha::F17, Alpha::F18, Alpha::F19, Alpha::F20, Alpha::F21};
+
+  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+    SDValue argt;
+    EVT ObjectVT = Ins[ArgNo].VT;
+    SDValue ArgVal;
+
+    if (ArgNo  < 6) {
+      switch (ObjectVT.getSimpleVT().SimpleTy) {
+      default:
+        assert(false && "Invalid value type!");
+      case MVT::f64:
+        args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
+                                      &Alpha::F8RCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
+        break;
+      case MVT::f32:
+        args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo],
+                                      &Alpha::F4RCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT);
+        break;
+      case MVT::i64:
+        args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo],
+                                    &Alpha::GPRCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, args_int[ArgNo], MVT::i64);
+        break;
+      }
+    } else { //more args
+      // Create the frame index object for this incoming parameter...
+      int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true);
+
+      // Create the SelectionDAG nodes corresponding to a load
+      //from this parameter
+      SDValue FIN = DAG.getFrameIndex(FI, MVT::i64);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+                           false, false, 0);
+    }
+    InVals.push_back(ArgVal);
+  }
+
+  // If the functions takes variable number of arguments, copy all regs to stack
+  if (isVarArg) {
+    FuncInfo->setVarArgsOffset(Ins.size() * 8);
+    std::vector<SDValue> LS;
+    for (int i = 0; i < 6; ++i) {
+      if (TargetRegisterInfo::isPhysicalRegister(args_int[i]))
+        args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
+      SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64);
+      int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true);
+      if (i == 0) FuncInfo->setVarArgsBase(FI);
+      SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
+      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
+                                false, false, 0));
+
+      if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
+        args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
+      argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
+      FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true);
+      SDFI = DAG.getFrameIndex(FI, MVT::i64);
+      LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(),
+                                false, false, 0));
+    }
+
+    //Set up a token factor with all the stack traffic
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LS[0], LS.size());
+  }
+
+  return Chain;
+}
+
+SDValue
+AlphaTargetLowering::LowerReturn(SDValue Chain,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 const SmallVectorImpl<SDValue> &OutVals,
+                                 DebugLoc dl, SelectionDAG &DAG) const {
+
+  SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26,
+                                  DAG.getNode(AlphaISD::GlobalRetAddr,
+                                              DebugLoc(), MVT::i64),
+                                  SDValue());
+  switch (Outs.size()) {
+  default:
+    llvm_unreachable("Do not know how to return this many arguments!");
+  case 0:
+    break;
+    //return SDValue(); // ret void is legal
+  case 1: {
+    EVT ArgVT = Outs[0].VT;
+    unsigned ArgReg;
+    if (ArgVT.isInteger())
+      ArgReg = Alpha::R0;
+    else {
+      assert(ArgVT.isFloatingPoint());
+      ArgReg = Alpha::F0;
+    }
+    Copy = DAG.getCopyToReg(Copy, dl, ArgReg,
+                            OutVals[0], Copy.getValue(1));
+    if (DAG.getMachineFunction().getRegInfo().liveout_empty())
+      DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg);
+    break;
+  }
+  case 2: {
+    EVT ArgVT = Outs[0].VT;
+    unsigned ArgReg1, ArgReg2;
+    if (ArgVT.isInteger()) {
+      ArgReg1 = Alpha::R0;
+      ArgReg2 = Alpha::R1;
+    } else {
+      assert(ArgVT.isFloatingPoint());
+      ArgReg1 = Alpha::F0;
+      ArgReg2 = Alpha::F1;
+    }
+    Copy = DAG.getCopyToReg(Copy, dl, ArgReg1,
+                            OutVals[0], Copy.getValue(1));
+    if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
+                  DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1)
+        == DAG.getMachineFunction().getRegInfo().liveout_end())
+      DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1);
+    Copy = DAG.getCopyToReg(Copy, dl, ArgReg2,
+                            OutVals[1], Copy.getValue(1));
+    if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
+                   DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2)
+        == DAG.getMachineFunction().getRegInfo().liveout_end())
+      DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg2);
+    break;
+  }
+  }
+  return DAG.getNode(AlphaISD::RET_FLAG, dl,
+                     MVT::Other, Copy, Copy.getValue(1));
+}
+
+void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
+                                     SDValue &DataPtr,
+                                     SelectionDAG &DAG) const {
+  Chain = N->getOperand(0);
+  SDValue VAListP = N->getOperand(1);
+  const Value *VAListS = cast<SrcValueSDNode>(N->getOperand(2))->getValue();
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP,
+                             MachinePointerInfo(VAListS),
+                             false, false, 0);
+  SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
+                              DAG.getConstant(8, MVT::i64));
+  SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
+                                  Tmp, MachinePointerInfo(),
+                                  MVT::i32, false, false, 0);
+  DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
+  if (N->getValueType(0).isFloatingPoint())
+  {
+    //if fp && Offset < 6*8, then subtract 6*8 from DataPtr
+    SDValue FPDataPtr = DAG.getNode(ISD::SUB, dl, MVT::i64, DataPtr,
+                                      DAG.getConstant(8*6, MVT::i64));
+    SDValue CC = DAG.getSetCC(dl, MVT::i64, Offset,
+                                DAG.getConstant(8*6, MVT::i64), ISD::SETLT);
+    DataPtr = DAG.getNode(ISD::SELECT, dl, MVT::i64, CC, FPDataPtr, DataPtr);
+  }
+
+  SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset,
+                                    DAG.getConstant(8, MVT::i64));
+  Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp,
+                            MachinePointerInfo(),
+                            MVT::i32, false, false, 0);
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+    switch (IntNo) {
+    default: break;    // Don't custom lower most intrinsics.
+    case Intrinsic::alpha_umulh:
+      return DAG.getNode(ISD::MULHU, dl, MVT::i64,
+                         Op.getOperand(1), Op.getOperand(2));
+    }
+  }
+
+  case ISD::SRL_PARTS: {
+    SDValue ShOpLo = Op.getOperand(0);
+    SDValue ShOpHi = Op.getOperand(1);
+    SDValue ShAmt  = Op.getOperand(2);
+    SDValue bm = DAG.getNode(ISD::SUB, dl, MVT::i64,
+                             DAG.getConstant(64, MVT::i64), ShAmt);
+    SDValue BMCC = DAG.getSetCC(dl, MVT::i64, bm,
+                                DAG.getConstant(0, MVT::i64), ISD::SETLE);
+    // if 64 - shAmt <= 0
+    SDValue Hi_Neg = DAG.getConstant(0, MVT::i64);
+    SDValue ShAmt_Neg = DAG.getNode(ISD::SUB, dl, MVT::i64,
+                                    DAG.getConstant(0, MVT::i64), bm);
+    SDValue Lo_Neg = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt_Neg);
+    // else
+    SDValue carries = DAG.getNode(ISD::SHL, dl, MVT::i64, ShOpHi, bm);
+    SDValue Hi_Pos =  DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt);
+    SDValue Lo_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpLo, ShAmt);
+    Lo_Pos = DAG.getNode(ISD::OR, dl, MVT::i64, Lo_Pos, carries);
+    // Merge
+    SDValue Hi = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Hi_Neg, Hi_Pos);
+    SDValue Lo = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Lo_Neg, Lo_Pos);
+    SDValue Ops[2] = { Lo, Hi };
+    return DAG.getMergeValues(Ops, 2, dl);
+  }
+    //  case ISD::SRA_PARTS:
+
+    //  case ISD::SHL_PARTS:
+
+
+  case ISD::SINT_TO_FP: {
+    assert(Op.getOperand(0).getValueType() == MVT::i64 &&
+           "Unhandled SINT_TO_FP type in custom expander!");
+    SDValue LD;
+    bool isDouble = Op.getValueType() == MVT::f64;
+    LD = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
+    SDValue FP = DAG.getNode(isDouble?AlphaISD::CVTQT_:AlphaISD::CVTQS_, dl,
+                               isDouble?MVT::f64:MVT::f32, LD);
+    return FP;
+  }
+  case ISD::FP_TO_SINT: {
+    bool isDouble = Op.getOperand(0).getValueType() == MVT::f64;
+    SDValue src = Op.getOperand(0);
+
+    if (!isDouble) //Promote
+      src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, src);
+
+    src = DAG.getNode(AlphaISD::CVTTQ_, dl, MVT::f64, src);
+
+    return DAG.getNode(ISD::BITCAST, dl, MVT::i64, src);
+  }
+  case ISD::ConstantPool: {
+    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+    const Constant *C = CP->getConstVal();
+    SDValue CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment());
+    // FIXME there isn't really any debug info here
+
+    SDValue Hi = DAG.getNode(AlphaISD::GPRelHi,  dl, MVT::i64, CPI,
+                               DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+    SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, CPI, Hi);
+    return Lo;
+  }
+  case ISD::GlobalTLSAddress:
+    llvm_unreachable("TLS not implemented for Alpha.");
+  case ISD::GlobalAddress: {
+    GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+    const GlobalValue *GV = GSDN->getGlobal();
+    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64,
+                                            GSDN->getOffset());
+    // FIXME there isn't really any debug info here
+
+    //    if (!GV->hasWeakLinkage() && !GV->isDeclaration()
+    //        && !GV->hasLinkOnceLinkage()) {
+    if (GV->hasLocalLinkage()) {
+      SDValue Hi = DAG.getNode(AlphaISD::GPRelHi,  dl, MVT::i64, GA,
+                                DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+      SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, GA, Hi);
+      return Lo;
+    } else
+      return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, GA,
+                         DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+  }
+  case ISD::ExternalSymbol: {
+    return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64,
+                       DAG.getTargetExternalSymbol(cast<ExternalSymbolSDNode>(Op)
+                                                   ->getSymbol(), MVT::i64),
+                       DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
+  }
+
+  case ISD::UREM:
+  case ISD::SREM:
+    //Expand only on constant case
+    if (Op.getOperand(1).getOpcode() == ISD::Constant) {
+      EVT VT = Op.getNode()->getValueType(0);
+      SDValue Tmp1 = Op.getNode()->getOpcode() == ISD::UREM ?
+        BuildUDIV(Op.getNode(), DAG, NULL) :
+        BuildSDIV(Op.getNode(), DAG, NULL);
+      Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Op.getOperand(1));
+      Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Op.getOperand(0), Tmp1);
+      return Tmp1;
+    }
+    //fall through
+  case ISD::SDIV:
+  case ISD::UDIV:
+    if (Op.getValueType().isInteger()) {
+      if (Op.getOperand(1).getOpcode() == ISD::Constant)
+        return Op.getOpcode() == ISD::SDIV ? BuildSDIV(Op.getNode(), DAG, NULL)
+          : BuildUDIV(Op.getNode(), DAG, NULL);
+      const char* opstr = 0;
+      switch (Op.getOpcode()) {
+      case ISD::UREM: opstr = "__remqu"; break;
+      case ISD::SREM: opstr = "__remq";  break;
+      case ISD::UDIV: opstr = "__divqu"; break;
+      case ISD::SDIV: opstr = "__divq";  break;
+      }
+      SDValue Tmp1 = Op.getOperand(0),
+        Tmp2 = Op.getOperand(1),
+        Addr = DAG.getExternalSymbol(opstr, MVT::i64);
+      return DAG.getNode(AlphaISD::DivCall, dl, MVT::i64, Addr, Tmp1, Tmp2);
+    }
+    break;
+
+  case ISD::VAARG: {
+    SDValue Chain, DataPtr;
+    LowerVAARG(Op.getNode(), Chain, DataPtr, DAG);
+
+    SDValue Result;
+    if (Op.getValueType() == MVT::i32)
+      Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
+                              MachinePointerInfo(), MVT::i32, false, false, 0);
+    else
+      Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr,
+                           MachinePointerInfo(),
+                           false, false, 0);
+    return Result;
+  }
+  case ISD::VACOPY: {
+    SDValue Chain = Op.getOperand(0);
+    SDValue DestP = Op.getOperand(1);
+    SDValue SrcP = Op.getOperand(2);
+    const Value *DestS = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+    const Value *SrcS = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+    SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP,
+                              MachinePointerInfo(SrcS),
+                              false, false, 0);
+    SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP,
+                                  MachinePointerInfo(DestS),
+                                  false, false, 0);
+    SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
+                               DAG.getConstant(8, MVT::i64));
+    Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result,
+                         NP, MachinePointerInfo(), MVT::i32, false, false, 0);
+    SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
+                                DAG.getConstant(8, MVT::i64));
+    return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD,
+                             MachinePointerInfo(), MVT::i32,
+                             false, false, 0);
+  }
+  case ISD::VASTART: {
+    MachineFunction &MF = DAG.getMachineFunction();
+    AlphaMachineFunctionInfo *FuncInfo = MF.getInfo<AlphaMachineFunctionInfo>();
+
+    SDValue Chain = Op.getOperand(0);
+    SDValue VAListP = Op.getOperand(1);
+    const Value *VAListS = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+    // vastart stores the address of the VarArgsBase and VarArgsOffset
+    SDValue FR  = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64);
+    SDValue S1  = DAG.getStore(Chain, dl, FR, VAListP,
+                               MachinePointerInfo(VAListS), false, false, 0);
+    SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
+                                DAG.getConstant(8, MVT::i64));
+    return DAG.getTruncStore(S1, dl,
+                             DAG.getConstant(FuncInfo->getVarArgsOffset(),
+                                             MVT::i64),
+                             SA2, MachinePointerInfo(),
+                             MVT::i32, false, false, 0);
+  }
+  case ISD::RETURNADDR:
+    return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc(), MVT::i64);
+      //FIXME: implement
+  case ISD::FRAMEADDR:          break;
+  }
+
+  return SDValue();
+}
+
+void AlphaTargetLowering::ReplaceNodeResults(SDNode *N,
+                                             SmallVectorImpl<SDValue>&Results,
+                                             SelectionDAG &DAG) const {
+  DebugLoc dl = N->getDebugLoc();
+  assert(N->getValueType(0) == MVT::i32 &&
+         N->getOpcode() == ISD::VAARG &&
+         "Unknown node to custom promote!");
+
+  SDValue Chain, DataPtr;
+  LowerVAARG(N, Chain, DataPtr, DAG);
+  SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr,
+                            MachinePointerInfo(),
+                            false, false, 0);
+  Results.push_back(Res);
+  Results.push_back(SDValue(Res.getNode(), 1));
+}
+
+
+//Inline Asm
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+AlphaTargetLowering::ConstraintType
+AlphaTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default: break;
+    case 'f':
+    case 'r':
+      return C_RegisterClass;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+AlphaTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'f':
+    weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
+std::vector<unsigned> AlphaTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                  EVT VT) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default: break;  // Unknown constriant letter
+    case 'f':
+      return make_vector<unsigned>(Alpha::F0 , Alpha::F1 , Alpha::F2 ,
+                                   Alpha::F3 , Alpha::F4 , Alpha::F5 ,
+                                   Alpha::F6 , Alpha::F7 , Alpha::F8 ,
+                                   Alpha::F9 , Alpha::F10, Alpha::F11,
+                                   Alpha::F12, Alpha::F13, Alpha::F14,
+                                   Alpha::F15, Alpha::F16, Alpha::F17,
+                                   Alpha::F18, Alpha::F19, Alpha::F20,
+                                   Alpha::F21, Alpha::F22, Alpha::F23,
+                                   Alpha::F24, Alpha::F25, Alpha::F26,
+                                   Alpha::F27, Alpha::F28, Alpha::F29,
+                                   Alpha::F30, Alpha::F31, 0);
+    case 'r':
+      return make_vector<unsigned>(Alpha::R0 , Alpha::R1 , Alpha::R2 ,
+                                   Alpha::R3 , Alpha::R4 , Alpha::R5 ,
+                                   Alpha::R6 , Alpha::R7 , Alpha::R8 ,
+                                   Alpha::R9 , Alpha::R10, Alpha::R11,
+                                   Alpha::R12, Alpha::R13, Alpha::R14,
+                                   Alpha::R15, Alpha::R16, Alpha::R17,
+                                   Alpha::R18, Alpha::R19, Alpha::R20,
+                                   Alpha::R21, Alpha::R22, Alpha::R23,
+                                   Alpha::R24, Alpha::R25, Alpha::R26,
+                                   Alpha::R27, Alpha::R28, Alpha::R29,
+                                   Alpha::R30, Alpha::R31, 0);
+    }
+  }
+
+  return std::vector<unsigned>();
+}
+//===----------------------------------------------------------------------===//
+//  Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                 MachineBasicBlock *BB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  assert((MI->getOpcode() == Alpha::CAS32 ||
+          MI->getOpcode() == Alpha::CAS64 ||
+          MI->getOpcode() == Alpha::LAS32 ||
+          MI->getOpcode() == Alpha::LAS64 ||
+          MI->getOpcode() == Alpha::SWAP32 ||
+          MI->getOpcode() == Alpha::SWAP64) &&
+         "Unexpected instr type to insert");
+
+  bool is32 = MI->getOpcode() == Alpha::CAS32 ||
+    MI->getOpcode() == Alpha::LAS32 ||
+    MI->getOpcode() == Alpha::SWAP32;
+
+  //Load locked store conditional for atomic ops take on the same form
+  //start:
+  //ll
+  //do stuff (maybe branch to exit)
+  //sc
+  //test sc and maybe branck to start
+  //exit:
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  DebugLoc dl = MI->getDebugLoc();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  MachineBasicBlock *thisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+  sinkMBB->splice(sinkMBB->begin(), thisMBB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  thisMBB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+  F->insert(It, llscMBB);
+  F->insert(It, sinkMBB);
+
+  BuildMI(thisMBB, dl, TII->get(Alpha::BR)).addMBB(llscMBB);
+
+  unsigned reg_res = MI->getOperand(0).getReg(),
+    reg_ptr = MI->getOperand(1).getReg(),
+    reg_v2 = MI->getOperand(2).getReg(),
+    reg_store = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
+
+  BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L),
+          reg_res).addImm(0).addReg(reg_ptr);
+  switch (MI->getOpcode()) {
+  case Alpha::CAS32:
+  case Alpha::CAS64: {
+    unsigned reg_cmp
+      = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass);
+    BuildMI(llscMBB, dl, TII->get(Alpha::CMPEQ), reg_cmp)
+      .addReg(reg_v2).addReg(reg_res);
+    BuildMI(llscMBB, dl, TII->get(Alpha::BEQ))
+      .addImm(0).addReg(reg_cmp).addMBB(sinkMBB);
+    BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store)
+      .addReg(Alpha::R31).addReg(MI->getOperand(3).getReg());
+    break;
+  }
+  case Alpha::LAS32:
+  case Alpha::LAS64: {
+    BuildMI(llscMBB, dl,TII->get(is32 ? Alpha::ADDLr : Alpha::ADDQr), reg_store)
+      .addReg(reg_res).addReg(reg_v2);
+    break;
+  }
+  case Alpha::SWAP32:
+  case Alpha::SWAP64: {
+    BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store)
+      .addReg(reg_v2).addReg(reg_v2);
+    break;
+  }
+  }
+  BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::STL_C : Alpha::STQ_C), reg_store)
+    .addReg(reg_store).addImm(0).addReg(reg_ptr);
+  BuildMI(llscMBB, dl, TII->get(Alpha::BEQ))
+    .addImm(0).addReg(reg_store).addMBB(llscMBB);
+  BuildMI(llscMBB, dl, TII->get(Alpha::BR)).addMBB(sinkMBB);
+
+  thisMBB->addSuccessor(llscMBB);
+  llscMBB->addSuccessor(llscMBB);
+  llscMBB->addSuccessor(sinkMBB);
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+
+  return sinkMBB;
+}
+
+bool
+AlphaTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The Alpha target isn't yet aware of offsets.
+  return false;
+}
+
+bool AlphaTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  if (VT != MVT::f32 && VT != MVT::f64)
+    return false;
+  // +0.0   F31
+  // +0.0f  F31
+  // -0.0  -F31
+  // -0.0f -F31
+  return Imm.isZero() || Imm.isNegZero();
+}
diff --git a/final/lib/Target/Alpha/AlphaISelLowering.h b/final/lib/Target/Alpha/AlphaISelLowering.h
new file mode 100644
index 00000000000..cb98f921dd6
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaISelLowering.h
@@ -0,0 +1,145 @@
+//===-- AlphaISelLowering.h - Alpha DAG Lowering Interface ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Alpha uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
+#define LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
+
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "Alpha.h"
+
+namespace llvm {
+
+  namespace AlphaISD {
+    enum NodeType {
+      // Start the numbering where the builting ops and target ops leave off.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+      //These corrospond to the identical Instruction
+      CVTQT_, CVTQS_, CVTTQ_,
+
+      /// GPRelHi/GPRelLo - These represent the high and low 16-bit
+      /// parts of a global address respectively.
+      GPRelHi, GPRelLo,
+
+      /// RetLit - Literal Relocation of a Global
+      RelLit,
+
+      /// GlobalRetAddr - used to restore the return address
+      GlobalRetAddr,
+
+      /// CALL - Normal call.
+      CALL,
+
+      /// DIVCALL - used for special library calls for div and rem
+      DivCall,
+
+      /// return flag operand
+      RET_FLAG,
+
+      /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This
+      /// corresponds to the COND_BRANCH pseudo instruction.
+      /// *PRC is the input register to compare to zero,
+      /// OPC is the branch opcode to use (e.g. Alpha::BEQ),
+      /// DESTBB is the destination block to branch to, and INFLAG is
+      /// an optional input flag argument.
+      COND_BRANCH_I, COND_BRANCH_F
+
+    };
+  }
+
+  class AlphaTargetLowering : public TargetLowering {
+  public:
+    explicit AlphaTargetLowering(TargetMachine &TM);
+
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
+
+    /// getSetCCResultType - Get the SETCC result ValueType
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    ///
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// ReplaceNodeResults - Replace the results of node with an illegal result
+    /// type with new values built out of custom code.
+    ///
+    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+                                    SelectionDAG &DAG) const;
+
+    // Friendly names for dumps
+    const char *getTargetNodeName(unsigned Opcode) const;
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::vector<unsigned>
+      getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                        EVT VT) const;
+
+    MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *BB) const;
+
+    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+  private:
+    // Helpers for custom lowering.
+    void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr,
+                    SelectionDAG &DAG) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+  };
+}
+
+#endif   // LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H
diff --git a/final/lib/Target/Alpha/AlphaInstrFormats.td b/final/lib/Target/Alpha/AlphaInstrFormats.td
new file mode 100644
index 00000000000..6f4ebf27964
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaInstrFormats.td
@@ -0,0 +1,268 @@
+//===- AlphaInstrFormats.td - Alpha Instruction Formats ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//3.3:
+//Memory
+//Branch
+//Operate
+//Floating-point
+//PALcode
+
+def u8imm   : Operand<i64>;
+def s14imm  : Operand<i64>;
+def s16imm  : Operand<i64>;
+def s21imm  : Operand<i64>;
+def s64imm  : Operand<i64>;
+def u64imm  : Operand<i64>;
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+// Alpha instruction baseline
+class InstAlpha<bits<6> op, string asmstr, InstrItinClass itin> : Instruction {
+  field bits<32> Inst;
+  let Namespace = "Alpha";
+  let AsmString = asmstr;
+  let Inst{31-26} = op;
+  let Itinerary = itin;
+}
+
+
+//3.3.1
+class MForm<bits<6> opcode, bit load, string asmstr, list<dag> pattern, InstrItinClass itin> 
+        : InstAlpha<opcode, asmstr, itin> {
+  let Pattern = pattern;
+  let canFoldAsLoad = load;
+  let Defs = [R28]; //We may use this for frame index calculations, so reserve it here
+
+  bits<5> Ra;
+  bits<16> disp;
+  bits<5> Rb;
+
+  let Inst{25-21} = Ra;
+  let Inst{20-16} = Rb;
+  let Inst{15-0} = disp;
+}
+class MfcForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin> 
+        : InstAlpha<opcode, asmstr, itin> {    
+  bits<5> Ra;
+
+  let OutOperandList = (outs GPRC:$RA);
+  let InOperandList = (ins);
+  let Inst{25-21} = Ra;
+  let Inst{20-16} = 0;
+  let Inst{15-0} = fc;
+}
+class MfcPForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin> 
+        : InstAlpha<opcode, asmstr, itin> {    
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
+  let Inst{25-21} = 0;
+  let Inst{20-16} = 0;
+  let Inst{15-0} = fc;
+}
+
+class MbrForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass itin>
+    : InstAlpha<opcode, asmstr, itin> {
+  bits<5> Ra;
+  bits<5> Rb;
+  bits<14> disp;
+
+  let OutOperandList = (outs);
+  let InOperandList = OL;
+
+  let Inst{25-21} = Ra;
+  let Inst{20-16} = Rb;
+  let Inst{15-14} = TB;
+  let Inst{13-0} = disp;
+}
+class MbrpForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, list<dag> pattern, InstrItinClass itin>
+    : InstAlpha<opcode, asmstr, itin> {
+  let Pattern=pattern;
+  bits<5> Ra;
+  bits<5> Rb;
+  bits<14> disp;
+
+  let OutOperandList = (outs);
+  let InOperandList = OL;
+
+  let Inst{25-21} = Ra;
+  let Inst{20-16} = Rb;
+  let Inst{15-14} = TB;
+  let Inst{13-0} = disp;
+}
+
+//3.3.2
+def target : Operand<OtherVT> {}
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+class BFormN<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+   : InstAlpha<opcode, asmstr, itin> {
+  let OutOperandList = (outs);
+  let InOperandList = OL;
+  bits<64> Opc; //dummy
+  bits<5> Ra;
+  bits<21> disp;
+
+  let Inst{25-21} = Ra;
+  let Inst{20-0} = disp;
+}
+}
+
+let isBranch = 1, isTerminator = 1 in
+class BFormD<bits<6> opcode, string asmstr, list<dag> pattern, InstrItinClass itin> 
+    : InstAlpha<opcode, asmstr, itin> {
+  let Pattern = pattern;
+  let OutOperandList = (outs);
+  let InOperandList = (ins target:$DISP);
+  bits<5> Ra;
+  bits<21> disp;
+
+  let Inst{25-21} = Ra;
+  let Inst{20-0} = disp;
+}
+
+//3.3.3
+class OForm<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
+    : InstAlpha<opcode, asmstr, itin> {
+  let Pattern = pattern;
+  let OutOperandList = (outs GPRC:$RC);
+  let InOperandList = (ins GPRC:$RA, GPRC:$RB);
+
+  bits<5> Rc;
+  bits<5> Ra;
+  bits<5> Rb;
+  bits<7> Function = fun;
+
+  let Inst{25-21} = Ra;
+  let Inst{20-16} = Rb;
+  let Inst{15-13} = 0;
+  let Inst{12} = 0;
+  let Inst{11-5} = Function;
+  let Inst{4-0} = Rc;
+}
+
+class OForm2<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
+    : InstAlpha<opcode, asmstr, itin> {
+  let Pattern = pattern;
+  let OutOperandList = (outs GPRC:$RC);
+  let InOperandList = (ins GPRC:$RB);
+
+  bits<5> Rc;
+  bits<5> Rb;
+  bits<7> Function = fun;
+
+  let Inst{25-21} = 31;
+  let Inst{20-16} = Rb;
+  let Inst{15-13} = 0;
+  let Inst{12} = 0;
+  let Inst{11-5} = Function;
+  let Inst{4-0} = Rc;
+}
+
+class OForm4<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
+    : InstAlpha<opcode, asmstr, itin> {
+  let Pattern = pattern;
+  let OutOperandList = (outs GPRC:$RDEST);
+  let InOperandList = (ins GPRC:$RCOND, GPRC:$RTRUE, GPRC:$RFALSE);
+  let Constraints = "$RFALSE = $RDEST";
+  let DisableEncoding = "$RFALSE";
+
+  bits<5> Rc;
+  bits<5> Ra;
+  bits<5> Rb;
+  bits<7> Function = fun;
+
+//  let Constraints = "$RFALSE = $RDEST";
+  let Inst{25-21} = Ra;
+  let Inst{20-16} = Rb;
+  let Inst{15-13} = 0;
+  let Inst{12} = 0;
+  let Inst{11-5} = Function;
+  let Inst{4-0} = Rc;
+}
+
+
+class OFormL<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
+    : InstAlpha<opcode, asmstr, itin> {
+  let Pattern = pattern;
+  let OutOperandList = (outs GPRC:$RC);
+  let InOperandList = (ins GPRC:$RA, u8imm:$L);
+
+  bits<5> Rc;
+  bits<5> Ra;
+  bits<8> LIT;
+  bits<7> Function = fun;
+
+  let Inst{25-21} = Ra;
+  let Inst{20-13} = LIT;
+  let Inst{12} = 1;
+  let Inst{11-5} = Function;
+  let Inst{4-0} = Rc;
+}
+
+class OForm4L<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
+    : InstAlpha<opcode, asmstr, itin> {
+  let Pattern = pattern;
+  let OutOperandList = (outs GPRC:$RDEST);
+  let InOperandList = (ins GPRC:$RCOND, s64imm:$RTRUE, GPRC:$RFALSE);
+  let Constraints = "$RFALSE = $RDEST";
+  let DisableEncoding = "$RFALSE";
+
+  bits<5> Rc;
+  bits<5> Ra;
+  bits<8> LIT;
+  bits<7> Function = fun;
+
+//  let Constraints = "$RFALSE = $RDEST";
+  let Inst{25-21} = Ra;
+  let Inst{20-13} = LIT;
+  let Inst{12} = 1;
+  let Inst{11-5} = Function;
+  let Inst{4-0} = Rc;
+}
+
+//3.3.4
+class FPForm<bits<6> opcode, bits<11> fun, string asmstr, list<dag> pattern, InstrItinClass itin> 
+    : InstAlpha<opcode, asmstr, itin> {
+  let Pattern = pattern;
+
+  bits<5> Fc;
+  bits<5> Fa;
+  bits<5> Fb;
+  bits<11> Function = fun;
+
+  let Inst{25-21} = Fa;
+  let Inst{20-16} = Fb;
+  let Inst{15-5} = Function;
+  let Inst{4-0} = Fc;
+}
+
+//3.3.5
+class PALForm<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin>
+    : InstAlpha<opcode, asmstr, itin> {
+  let OutOperandList = (outs);
+  let InOperandList = OL;
+  bits<26> Function;
+
+  let Inst{25-0} = Function;
+}
+
+
+// Pseudo instructions.
+class PseudoInstAlpha<dag OOL, dag IOL, string nm, list<dag> pattern, InstrItinClass itin> 
+    : InstAlpha<0, nm, itin>  {
+  let OutOperandList = OOL;
+  let InOperandList = IOL;
+  let Pattern = pattern;
+
+}
diff --git a/final/lib/Target/Alpha/AlphaInstrInfo.cpp b/final/lib/Target/Alpha/AlphaInstrInfo.cpp
new file mode 100644
index 00000000000..5a2f5610fdb
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -0,0 +1,379 @@
+//===- AlphaInstrInfo.cpp - Alpha Instruction Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaInstrInfo.h"
+#include "AlphaMachineFunctionInfo.h"
+#include "AlphaGenInstrInfo.inc"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+AlphaInstrInfo::AlphaInstrInfo()
+  : TargetInstrInfoImpl(AlphaInsts, array_lengthof(AlphaInsts)),
+    RI(*this) { }
+
+
+unsigned 
+AlphaInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                    int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  case Alpha::LDL:
+  case Alpha::LDQ:
+  case Alpha::LDBU:
+  case Alpha::LDWU:
+  case Alpha::LDS:
+  case Alpha::LDT:
+    if (MI->getOperand(1).isFI()) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned 
+AlphaInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                   int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  case Alpha::STL:
+  case Alpha::STQ:
+  case Alpha::STB:
+  case Alpha::STW:
+  case Alpha::STS:
+  case Alpha::STT:
+    if (MI->getOperand(1).isFI()) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+static bool isAlphaIntCondCode(unsigned Opcode) {
+  switch (Opcode) {
+  case Alpha::BEQ: 
+  case Alpha::BNE: 
+  case Alpha::BGE: 
+  case Alpha::BGT: 
+  case Alpha::BLE: 
+  case Alpha::BLT: 
+  case Alpha::BLBC: 
+  case Alpha::BLBS:
+    return true;
+  default:
+    return false;
+  }
+}
+
+unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+                                      MachineBasicBlock *TBB,
+                                      MachineBasicBlock *FBB,
+                                      const SmallVectorImpl<MachineOperand> &Cond,
+                                      DebugLoc DL) const {
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 2 || Cond.size() == 0) && 
+         "Alpha branch conditions have two components!");
+
+  // One-way branch.
+  if (FBB == 0) {
+    if (Cond.empty())   // Unconditional branch
+      BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(TBB);
+    else                // Conditional branch
+      if (isAlphaIntCondCode(Cond[0].getImm()))
+        BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I))
+          .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+      else
+        BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F))
+          .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+    return 1;
+  }
+  
+  // Two-way Conditional Branch.
+  if (isAlphaIntCondCode(Cond[0].getImm()))
+    BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I))
+      .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+  else
+    BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F))
+      .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+  BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(FBB);
+  return 2;
+}
+
+void AlphaInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI, DebugLoc DL,
+                                 unsigned DestReg, unsigned SrcReg,
+                                 bool KillSrc) const {
+  if (Alpha::GPRCRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg)
+      .addReg(SrcReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+  } else if (Alpha::F4RCRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, MI, DL, get(Alpha::CPYSS), DestReg)
+      .addReg(SrcReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+  } else if (Alpha::F8RCRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, MI, DL, get(Alpha::CPYST), DestReg)
+      .addReg(SrcReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+  } else {
+    llvm_unreachable("Attempt to copy register that is not GPR or FPR");
+  }
+}
+
+void
+AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned SrcReg, bool isKill, int FrameIdx,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const {
+  //cerr << "Trying to store " << getPrettyName(SrcReg) << " to "
+  //     << FrameIdx << "\n";
+  //BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg);
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  if (RC == Alpha::F4RCRegisterClass)
+    BuildMI(MBB, MI, DL, get(Alpha::STS))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+  else if (RC == Alpha::F8RCRegisterClass)
+    BuildMI(MBB, MI, DL, get(Alpha::STT))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+  else if (RC == Alpha::GPRCRegisterClass)
+    BuildMI(MBB, MI, DL, get(Alpha::STQ))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+  else
+    llvm_unreachable("Unhandled register class");
+}
+
+void
+AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        unsigned DestReg, int FrameIdx,
+                                     const TargetRegisterClass *RC,
+                                     const TargetRegisterInfo *TRI) const {
+  //cerr << "Trying to load " << getPrettyName(DestReg) << " to "
+  //     << FrameIdx << "\n";
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  if (RC == Alpha::F4RCRegisterClass)
+    BuildMI(MBB, MI, DL, get(Alpha::LDS), DestReg)
+      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+  else if (RC == Alpha::F8RCRegisterClass)
+    BuildMI(MBB, MI, DL, get(Alpha::LDT), DestReg)
+      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+  else if (RC == Alpha::GPRCRegisterClass)
+    BuildMI(MBB, MI, DL, get(Alpha::LDQ), DestReg)
+      .addFrameIndex(FrameIdx).addReg(Alpha::F31);
+  else
+    llvm_unreachable("Unhandled register class");
+}
+
+static unsigned AlphaRevCondCode(unsigned Opcode) {
+  switch (Opcode) {
+  case Alpha::BEQ: return Alpha::BNE;
+  case Alpha::BNE: return Alpha::BEQ;
+  case Alpha::BGE: return Alpha::BLT;
+  case Alpha::BGT: return Alpha::BLE;
+  case Alpha::BLE: return Alpha::BGT;
+  case Alpha::BLT: return Alpha::BGE;
+  case Alpha::BLBC: return Alpha::BLBS;
+  case Alpha::BLBS: return Alpha::BLBC;
+  case Alpha::FBEQ: return Alpha::FBNE;
+  case Alpha::FBNE: return Alpha::FBEQ;
+  case Alpha::FBGE: return Alpha::FBLT;
+  case Alpha::FBGT: return Alpha::FBLE;
+  case Alpha::FBLE: return Alpha::FBGT;
+  case Alpha::FBLT: return Alpha::FBGE;
+  default:
+    llvm_unreachable("Unknown opcode");
+  }
+  return 0; // Not reached
+}
+
+// Branch analysis.
+bool AlphaInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                   MachineBasicBlock *&FBB,
+                                   SmallVectorImpl<MachineOperand> &Cond,
+                                   bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+  
+  // If there is only one terminator instruction, process it.
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (LastInst->getOpcode() == Alpha::BR) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    } else if (LastInst->getOpcode() == Alpha::COND_BRANCH_I ||
+               LastInst->getOpcode() == Alpha::COND_BRANCH_F) {
+      // Block ends with fall-through condbranch.
+      TBB = LastInst->getOperand(2).getMBB();
+      Cond.push_back(LastInst->getOperand(0));
+      Cond.push_back(LastInst->getOperand(1));
+      return false;
+    }
+    // Otherwise, don't know what this is.
+    return true;
+  }
+  
+  // Get the instruction before it if it's a terminator.
+  MachineInstr *SecondLastInst = I;
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() &&
+      isUnpredicatedTerminator(--I))
+    return true;
+  
+  // If the block ends with Alpha::BR and Alpha::COND_BRANCH_*, handle it.
+  if ((SecondLastInst->getOpcode() == Alpha::COND_BRANCH_I ||
+      SecondLastInst->getOpcode() == Alpha::COND_BRANCH_F) && 
+      LastInst->getOpcode() == Alpha::BR) {
+    TBB =  SecondLastInst->getOperand(2).getMBB();
+    Cond.push_back(SecondLastInst->getOperand(0));
+    Cond.push_back(SecondLastInst->getOperand(1));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+  
+  // If the block ends with two Alpha::BRs, handle it.  The second one is not
+  // executed, so remove it.
+  if (SecondLastInst->getOpcode() == Alpha::BR && 
+      LastInst->getOpcode() == Alpha::BR) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+  if (I->getOpcode() != Alpha::BR && 
+      I->getOpcode() != Alpha::COND_BRANCH_I &&
+      I->getOpcode() != Alpha::COND_BRANCH_F)
+    return 0;
+  
+  // Remove the branch.
+  I->eraseFromParent();
+  
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (I->getOpcode() != Alpha::COND_BRANCH_I && 
+      I->getOpcode() != Alpha::COND_BRANCH_F)
+    return 1;
+  
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB, 
+                                MachineBasicBlock::iterator MI) const {
+  DebugLoc DL;
+  BuildMI(MBB, MI, DL, get(Alpha::BISr), Alpha::R31)
+    .addReg(Alpha::R31)
+    .addReg(Alpha::R31);
+}
+
+bool AlphaInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 2 && "Invalid Alpha branch opcode!");
+  Cond[0].setImm(AlphaRevCondCode(Cond[0].getImm()));
+  return false;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+  AlphaMachineFunctionInfo *AlphaFI = MF->getInfo<AlphaMachineFunctionInfo>();
+  unsigned GlobalBaseReg = AlphaFI->getGlobalBaseReg();
+  if (GlobalBaseReg != 0)
+    return GlobalBaseReg;
+
+  // Insert the set of GlobalBaseReg into the first MBB of the function
+  MachineBasicBlock &FirstMBB = MF->front();
+  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+  GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
+  BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+          GlobalBaseReg).addReg(Alpha::R29);
+  RegInfo.addLiveIn(Alpha::R29);
+
+  AlphaFI->setGlobalBaseReg(GlobalBaseReg);
+  return GlobalBaseReg;
+}
+
+/// getGlobalRetAddr - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const {
+  AlphaMachineFunctionInfo *AlphaFI = MF->getInfo<AlphaMachineFunctionInfo>();
+  unsigned GlobalRetAddr = AlphaFI->getGlobalRetAddr();
+  if (GlobalRetAddr != 0)
+    return GlobalRetAddr;
+
+  // Insert the set of GlobalRetAddr into the first MBB of the function
+  MachineBasicBlock &FirstMBB = MF->front();
+  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+  GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
+  BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+          GlobalRetAddr).addReg(Alpha::R26);
+  RegInfo.addLiveIn(Alpha::R26);
+
+  AlphaFI->setGlobalRetAddr(GlobalRetAddr);
+  return GlobalRetAddr;
+}
diff --git a/final/lib/Target/Alpha/AlphaInstrInfo.h b/final/lib/Target/Alpha/AlphaInstrInfo.h
new file mode 100644
index 00000000000..ee6077a4a01
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaInstrInfo.h
@@ -0,0 +1,82 @@
+//===- AlphaInstrInfo.h - Alpha Instruction Information ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAINSTRUCTIONINFO_H
+#define ALPHAINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "AlphaRegisterInfo.h"
+
+namespace llvm {
+
+class AlphaInstrInfo : public TargetInstrInfoImpl {
+  const AlphaRegisterInfo RI;
+public:
+  AlphaInstrInfo();
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const AlphaRegisterInfo &getRegisterInfo() const { return RI; }
+
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+  
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+  
+  bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                     MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond,
+                     bool AllowModify) const;
+  unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+  void insertNoop(MachineBasicBlock &MBB, 
+                  MachineBasicBlock::iterator MI) const;
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  /// getGlobalBaseReg - Return a virtual register initialized with the
+  /// the global base register value. Output instructions required to
+  /// initialize the register in the function entry block, if necessary.
+  ///
+  unsigned getGlobalBaseReg(MachineFunction *MF) const;
+
+  /// getGlobalRetAddr - Return a virtual register initialized with the
+  /// the global return address register value. Output instructions required to
+  /// initialize the register in the function entry block, if necessary.
+  ///
+  unsigned getGlobalRetAddr(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/Alpha/AlphaInstrInfo.td b/final/lib/Target/Alpha/AlphaInstrInfo.td
new file mode 100644
index 00000000000..099d7157ca2
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaInstrInfo.td
@@ -0,0 +1,1157 @@
+//===- AlphaInstrInfo.td - The Alpha Instruction Set -------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+include "AlphaInstrFormats.td"
+
+//********************
+//Custom DAG Nodes
+//********************
+
+def SDTFPUnaryOpUnC  : SDTypeProfile<1, 1, [
+  SDTCisFP<1>, SDTCisFP<0>
+]>;
+def Alpha_cvtqt   : SDNode<"AlphaISD::CVTQT_",    SDTFPUnaryOpUnC, []>;
+def Alpha_cvtqs   : SDNode<"AlphaISD::CVTQS_",    SDTFPUnaryOpUnC, []>;
+def Alpha_cvttq   : SDNode<"AlphaISD::CVTTQ_"  ,  SDTFPUnaryOp, []>;
+def Alpha_gprello : SDNode<"AlphaISD::GPRelLo",   SDTIntBinOp, []>;
+def Alpha_gprelhi : SDNode<"AlphaISD::GPRelHi",   SDTIntBinOp, []>;
+def Alpha_rellit  : SDNode<"AlphaISD::RelLit",    SDTIntBinOp, [SDNPMayLoad]>;
+
+def retflag       : SDNode<"AlphaISD::RET_FLAG", SDTNone,
+                           [SDNPHasChain, SDNPOptInGlue]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_AlphaCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64> ]>;
+def SDT_AlphaCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i64>,
+                                           SDTCisVT<1, i64> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AlphaCallSeqStart,
+                           [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_AlphaCallSeqEnd,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//********************
+//Paterns for matching
+//********************
+def invX : SDNodeXForm<imm, [{ //invert
+  return getI64Imm(~N->getZExtValue());
+}]>;
+def negX : SDNodeXForm<imm, [{ //negate
+  return getI64Imm(~N->getZExtValue() + 1);
+}]>;
+def SExt32 : SDNodeXForm<imm, [{ //signed extend int to long
+  return getI64Imm(((int64_t)N->getZExtValue() << 32) >> 32);
+}]>;
+def SExt16 : SDNodeXForm<imm, [{ //signed extend int to long
+  return getI64Imm(((int64_t)N->getZExtValue() << 48) >> 48);
+}]>;
+def LL16 : SDNodeXForm<imm, [{ //lda part of constant
+  return getI64Imm(get_lda16(N->getZExtValue()));
+}]>;
+def LH16 : SDNodeXForm<imm, [{ //ldah part of constant (or more if too big)
+  return getI64Imm(get_ldah16(N->getZExtValue()));
+}]>;
+def iZAPX : SDNodeXForm<and, [{ // get imm to ZAPi
+  ConstantSDNode *RHS = cast<ConstantSDNode>(N->getOperand(1));
+  return getI64Imm(get_zapImm(SDValue(), RHS->getZExtValue()));
+}]>;
+def nearP2X : SDNodeXForm<imm, [{
+  return getI64Imm(Log2_64(getNearPower2((uint64_t)N->getZExtValue())));
+}]>;
+def nearP2RemX : SDNodeXForm<imm, [{
+  uint64_t x =
+    abs64(N->getZExtValue() - getNearPower2((uint64_t)N->getZExtValue()));
+  return getI64Imm(Log2_64(x));
+}]>;
+
+def immUExt8  : PatLeaf<(imm), [{ //imm fits in 8 bit zero extended field
+  return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
+}]>;
+def immUExt8inv  : PatLeaf<(imm), [{ //inverted imm fits in 8 bit zero extended field
+  return (uint64_t)~N->getZExtValue() == (uint8_t)~N->getZExtValue();
+}], invX>;
+def immUExt8neg  : PatLeaf<(imm), [{ //negated imm fits in 8 bit zero extended field
+  return ((uint64_t)~N->getZExtValue() + 1) ==
+         (uint8_t)((uint64_t)~N->getZExtValue() + 1);
+}], negX>;
+def immSExt16  : PatLeaf<(imm), [{ //imm fits in 16 bit sign extended field
+  return ((int64_t)N->getZExtValue() << 48) >> 48 ==
+         (int64_t)N->getZExtValue();
+}]>;
+def immSExt16int  : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended field
+  return ((int64_t)N->getZExtValue() << 48) >> 48 ==
+         ((int64_t)N->getZExtValue() << 32) >> 32;
+}], SExt16>;
+
+def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm), [{
+  ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!RHS) return 0;
+  uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue());
+  return build != 0;
+}]>;
+
+def immFPZ  : PatLeaf<(fpimm), [{ //the only fpconstant nodes are +/- 0.0
+  (void)N; // silence warning.
+  return true;
+}]>;
+
+def immRem1 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,0);}]>;
+def immRem2 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,0);}]>;
+def immRem3 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,0);}]>;
+def immRem4 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,0);}]>;
+def immRem5 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,0);}]>;
+def immRem1n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,1);}]>;
+def immRem2n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,1);}]>;
+def immRem3n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,1);}]>;
+def immRem4n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,1);}]>;
+def immRem5n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,1);}]>;
+
+def immRemP2n : PatLeaf<(imm), [{
+  return isPowerOf2_64(getNearPower2((uint64_t)N->getZExtValue()) -
+                         N->getZExtValue());
+}]>;
+def immRemP2 : PatLeaf<(imm), [{
+  return isPowerOf2_64(N->getZExtValue() -
+                         getNearPower2((uint64_t)N->getZExtValue()));
+}]>;
+def immUExt8ME : PatLeaf<(imm), [{ //use this imm for mulqi
+  int64_t d =  abs64((int64_t)N->getZExtValue() -
+               (int64_t)getNearPower2((uint64_t)N->getZExtValue()));
+  if (isPowerOf2_64(d)) return false;
+  switch (d) {
+    case 1: case 3: case 5: return false; 
+    default: return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue();
+  };
+}]>;
+
+def intop : PatFrag<(ops node:$op), (sext_inreg node:$op, i32)>;
+def add4  : PatFrag<(ops node:$op1, node:$op2),
+                    (add (shl node:$op1, 2), node:$op2)>;
+def sub4  : PatFrag<(ops node:$op1, node:$op2),
+                    (sub (shl node:$op1, 2), node:$op2)>;
+def add8  : PatFrag<(ops node:$op1, node:$op2),
+                    (add (shl node:$op1, 3), node:$op2)>;
+def sub8  : PatFrag<(ops node:$op1, node:$op2),
+                    (sub (shl node:$op1, 3), node:$op2)>;
+class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
+class CmpOpFrag<dag res> : PatFrag<(ops node:$R), res>;
+
+//Pseudo ops for selection
+
+def WTF : PseudoInstAlpha<(outs), (ins variable_ops), "#wtf", [], s_pseudo>;
+
+let hasCtrlDep = 1, Defs = [R30], Uses = [R30] in {
+def ADJUSTSTACKUP : PseudoInstAlpha<(outs), (ins s64imm:$amt),
+                "; ADJUP $amt", 
+                [(callseq_start timm:$amt)], s_pseudo>;
+def ADJUSTSTACKDOWN : PseudoInstAlpha<(outs), (ins s64imm:$amt1, s64imm:$amt2),
+                "; ADJDOWN $amt1",
+                [(callseq_end timm:$amt1, timm:$amt2)], s_pseudo>;
+}
+
+def ALTENT : PseudoInstAlpha<(outs), (ins s64imm:$TARGET), "$$$TARGET..ng:\n", [], s_pseudo>;
+def PCLABEL : PseudoInstAlpha<(outs), (ins s64imm:$num), "PCMARKER_$num:\n",[], s_pseudo>;
+def MEMLABEL : PseudoInstAlpha<(outs), (ins s64imm:$i, s64imm:$j, s64imm:$k, s64imm:$m),
+         "LSMARKER$$$i$$$j$$$k$$$m:", [], s_pseudo>;
+
+
+let usesCustomInserter = 1 in {   // Expanded after instruction selection.
+def CAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
+      [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
+def CAS64 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "",
+      [(set GPRC:$dst, (atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>;
+
+def LAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+      [(set GPRC:$dst, (atomic_load_add_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+def LAS64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+      [(set GPRC:$dst, (atomic_load_add_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+
+def SWAP32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+        [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+def SWAP64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "",
+        [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>;
+}
+
+//***********************
+//Real instructions
+//***********************
+
+//Operation Form:
+
+//conditional moves, int
+
+multiclass cmov_inst<bits<7> fun, string asmstr, PatFrag OpNode> {
+def r : OForm4<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
+             [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), GPRC:$RTRUE, GPRC:$RFALSE))], s_cmov>;
+def i : OForm4L<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"),
+             [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), immUExt8:$RTRUE, GPRC:$RFALSE))], s_cmov>;
+}
+
+defm CMOVEQ  : cmov_inst<0x24, "cmoveq",  CmpOpFrag<(seteq node:$R, 0)>>;
+defm CMOVNE  : cmov_inst<0x26, "cmovne",  CmpOpFrag<(setne node:$R, 0)>>;
+defm CMOVLT  : cmov_inst<0x44, "cmovlt",  CmpOpFrag<(setlt node:$R, 0)>>;
+defm CMOVLE  : cmov_inst<0x64, "cmovle",  CmpOpFrag<(setle node:$R, 0)>>;
+defm CMOVGT  : cmov_inst<0x66, "cmovgt",  CmpOpFrag<(setgt node:$R, 0)>>;
+defm CMOVGE  : cmov_inst<0x46, "cmovge",  CmpOpFrag<(setge node:$R, 0)>>;
+defm CMOVLBC : cmov_inst<0x16, "cmovlbc", CmpOpFrag<(xor   node:$R, 1)>>;
+defm CMOVLBS : cmov_inst<0x14, "cmovlbs", CmpOpFrag<(and   node:$R, 1)>>;
+
+//General pattern for cmov
+def : Pat<(select GPRC:$which, GPRC:$src1, GPRC:$src2),
+      (CMOVNEr GPRC:$src2, GPRC:$src1, GPRC:$which)>;
+def : Pat<(select GPRC:$which, GPRC:$src1, immUExt8:$src2),
+      (CMOVEQi GPRC:$src1, immUExt8:$src2, GPRC:$which)>;
+
+//Invert sense when we can for constants:
+def : Pat<(select (setne GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+          (CMOVEQi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setgt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+          (CMOVLEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setge GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+          (CMOVLTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setlt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+          (CMOVGEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+def : Pat<(select (setle GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE),
+          (CMOVGTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>;
+
+multiclass all_inst<bits<6> opc, bits<7> funl, bits<7> funq, 
+                    string asmstr, PatFrag OpNode, InstrItinClass itin> {
+  def Lr : OForm< opc, funl, !strconcat(asmstr, "l $RA,$RB,$RC"),
+               [(set GPRC:$RC, (intop (OpNode GPRC:$RA, GPRC:$RB)))], itin>;
+  def Li : OFormL<opc, funl, !strconcat(asmstr, "l $RA,$L,$RC"),
+               [(set GPRC:$RC, (intop (OpNode GPRC:$RA, immUExt8:$L)))], itin>;
+  def Qr : OForm< opc, funq, !strconcat(asmstr, "q $RA,$RB,$RC"),
+               [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
+  def Qi : OFormL<opc, funq, !strconcat(asmstr, "q $RA,$L,$RC"),
+               [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
+}
+
+defm MUL   : all_inst<0x13, 0x00, 0x20, "mul",   BinOpFrag<(mul node:$LHS, node:$RHS)>, s_imul>;
+defm ADD   : all_inst<0x10, 0x00, 0x20, "add",   BinOpFrag<(add node:$LHS, node:$RHS)>, s_iadd>;
+defm S4ADD : all_inst<0x10, 0x02, 0x22, "s4add", add4, s_iadd>;
+defm S8ADD : all_inst<0x10, 0x12, 0x32, "s8add", add8, s_iadd>;
+defm S4SUB : all_inst<0x10, 0x0B, 0x2B, "s4sub", sub4, s_iadd>;
+defm S8SUB : all_inst<0x10, 0x1B, 0x3B, "s8sub", sub8, s_iadd>;
+defm SUB   : all_inst<0x10, 0x09, 0x29, "sub",   BinOpFrag<(sub node:$LHS, node:$RHS)>, s_iadd>;
+//Const cases since legalize does sub x, int -> add x, inv(int) + 1
+def : Pat<(intop (add GPRC:$RA, immUExt8neg:$L)), (SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add GPRC:$RA, immUExt8neg:$L), (SUBQi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(intop (add4 GPRC:$RA, immUExt8neg:$L)), (S4SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add4 GPRC:$RA, immUExt8neg:$L), (S4SUBQi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(intop (add8 GPRC:$RA, immUExt8neg:$L)), (S8SUBLi GPRC:$RA, immUExt8neg:$L)>;
+def : Pat<(add8 GPRC:$RA, immUExt8neg:$L), (S8SUBQi GPRC:$RA, immUExt8neg:$L)>;
+
+multiclass log_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
+def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
+              [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>;
+def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
+              [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8:$L))], itin>;
+}
+multiclass inv_inst<bits<6> opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> {
+def r : OForm<opc, fun, !strconcat(asmstr, " $RA,$RB,$RC"),
+              [(set GPRC:$RC, (OpNode GPRC:$RA, (not GPRC:$RB)))], itin>;
+def i : OFormL<opc, fun, !strconcat(asmstr, " $RA,$L,$RC"),
+              [(set GPRC:$RC, (OpNode GPRC:$RA, immUExt8inv:$L))], itin>;
+}
+
+defm AND   : log_inst<0x11, 0x00, "and",   and,   s_ilog>;
+defm BIC   : inv_inst<0x11, 0x08, "bic",   and,   s_ilog>;
+defm BIS   : log_inst<0x11, 0x20, "bis",   or,    s_ilog>;
+defm ORNOT : inv_inst<0x11, 0x28, "ornot", or,    s_ilog>;
+defm XOR   : log_inst<0x11, 0x40, "xor",   xor,   s_ilog>;
+defm EQV   : inv_inst<0x11, 0x48, "eqv",   xor,   s_ilog>;
+
+defm SL    : log_inst<0x12, 0x39, "sll",   shl,   s_ishf>;
+defm SRA   : log_inst<0x12, 0x3c, "sra",   sra,   s_ishf>;
+defm SRL   : log_inst<0x12, 0x34, "srl",   srl,   s_ishf>;
+defm UMULH : log_inst<0x13, 0x30, "umulh", mulhu, s_imul>;
+
+def CTLZ     : OForm2<0x1C, 0x32, "CTLZ $RB,$RC", 
+                      [(set GPRC:$RC, (ctlz GPRC:$RB))], s_imisc>;
+def CTPOP    : OForm2<0x1C, 0x30, "CTPOP $RB,$RC", 
+                      [(set GPRC:$RC, (ctpop GPRC:$RB))], s_imisc>;
+def CTTZ     : OForm2<0x1C, 0x33, "CTTZ $RB,$RC", 
+                      [(set GPRC:$RC, (cttz GPRC:$RB))], s_imisc>;
+def EXTBL    : OForm< 0x12, 0x06, "EXTBL $RA,$RB,$RC", 
+                      [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 255))], s_ishf>;
+def EXTWL    : OForm< 0x12, 0x16, "EXTWL $RA,$RB,$RC", 
+                      [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 65535))], s_ishf>;
+def EXTLL    : OForm< 0x12, 0x26, "EXTLL $RA,$RB,$RC", 
+                      [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 4294967295))], s_ishf>;
+def SEXTB    : OForm2<0x1C, 0x00, "sextb $RB,$RC", 
+                      [(set GPRC:$RC, (sext_inreg GPRC:$RB, i8))], s_ishf>;
+def SEXTW    : OForm2<0x1C, 0x01, "sextw $RB,$RC", 
+                      [(set GPRC:$RC, (sext_inreg GPRC:$RB, i16))], s_ishf>;
+
+//def EXTBLi   : OFormL<0x12, 0x06, "EXTBL $RA,$L,$RC", []>; //Extract byte low
+//def EXTLH    : OForm< 0x12, 0x6A, "EXTLH $RA,$RB,$RC", []>; //Extract longword high
+//def EXTLHi   : OFormL<0x12, 0x6A, "EXTLH $RA,$L,$RC", []>; //Extract longword high
+//def EXTLLi   : OFormL<0x12, 0x26, "EXTLL $RA,$L,$RC", []>; //Extract longword low
+//def EXTQH    : OForm< 0x12, 0x7A, "EXTQH $RA,$RB,$RC", []>; //Extract quadword high
+//def EXTQHi   : OFormL<0x12, 0x7A, "EXTQH $RA,$L,$RC", []>; //Extract quadword high
+//def EXTQ     : OForm< 0x12, 0x36, "EXTQ $RA,$RB,$RC", []>; //Extract quadword low
+//def EXTQi    : OFormL<0x12, 0x36, "EXTQ $RA,$L,$RC", []>; //Extract quadword low
+//def EXTWH    : OForm< 0x12, 0x5A, "EXTWH $RA,$RB,$RC", []>; //Extract word high
+//def EXTWHi   : OFormL<0x12, 0x5A, "EXTWH $RA,$L,$RC", []>; //Extract word high
+//def EXTWLi   : OFormL<0x12, 0x16, "EXTWL $RA,$L,$RC", []>; //Extract word low
+
+//def INSBL    : OForm< 0x12, 0x0B, "INSBL $RA,$RB,$RC", []>; //Insert byte low
+//def INSBLi   : OFormL<0x12, 0x0B, "INSBL $RA,$L,$RC", []>; //Insert byte low
+//def INSLH    : OForm< 0x12, 0x67, "INSLH $RA,$RB,$RC", []>; //Insert longword high
+//def INSLHi   : OFormL<0x12, 0x67, "INSLH $RA,$L,$RC", []>; //Insert longword high
+//def INSLL    : OForm< 0x12, 0x2B, "INSLL $RA,$RB,$RC", []>; //Insert longword low
+//def INSLLi   : OFormL<0x12, 0x2B, "INSLL $RA,$L,$RC", []>; //Insert longword low
+//def INSQH    : OForm< 0x12, 0x77, "INSQH $RA,$RB,$RC", []>; //Insert quadword high
+//def INSQHi   : OFormL<0x12, 0x77, "INSQH $RA,$L,$RC", []>; //Insert quadword high
+//def INSQL    : OForm< 0x12, 0x3B, "INSQL $RA,$RB,$RC", []>; //Insert quadword low
+//def INSQLi   : OFormL<0x12, 0x3B, "INSQL $RA,$L,$RC", []>; //Insert quadword low
+//def INSWH    : OForm< 0x12, 0x57, "INSWH $RA,$RB,$RC", []>; //Insert word high
+//def INSWHi   : OFormL<0x12, 0x57, "INSWH $RA,$L,$RC", []>; //Insert word high
+//def INSWL    : OForm< 0x12, 0x1B, "INSWL $RA,$RB,$RC", []>; //Insert word low
+//def INSWLi   : OFormL<0x12, 0x1B, "INSWL $RA,$L,$RC", []>; //Insert word low
+
+//def MSKBL    : OForm< 0x12, 0x02, "MSKBL $RA,$RB,$RC", []>; //Mask byte low
+//def MSKBLi   : OFormL<0x12, 0x02, "MSKBL $RA,$L,$RC", []>; //Mask byte low
+//def MSKLH    : OForm< 0x12, 0x62, "MSKLH $RA,$RB,$RC", []>; //Mask longword high
+//def MSKLHi   : OFormL<0x12, 0x62, "MSKLH $RA,$L,$RC", []>; //Mask longword high
+//def MSKLL    : OForm< 0x12, 0x22, "MSKLL $RA,$RB,$RC", []>; //Mask longword low
+//def MSKLLi   : OFormL<0x12, 0x22, "MSKLL $RA,$L,$RC", []>; //Mask longword low
+//def MSKQH    : OForm< 0x12, 0x72, "MSKQH $RA,$RB,$RC", []>; //Mask quadword high
+//def MSKQHi   : OFormL<0x12, 0x72, "MSKQH $RA,$L,$RC", []>; //Mask quadword high
+//def MSKQL    : OForm< 0x12, 0x32, "MSKQL $RA,$RB,$RC", []>; //Mask quadword low
+//def MSKQLi   : OFormL<0x12, 0x32, "MSKQL $RA,$L,$RC", []>; //Mask quadword low
+//def MSKWH    : OForm< 0x12, 0x52, "MSKWH $RA,$RB,$RC", []>; //Mask word high
+//def MSKWHi   : OFormL<0x12, 0x52, "MSKWH $RA,$L,$RC", []>; //Mask word high
+//def MSKWL    : OForm< 0x12, 0x12, "MSKWL $RA,$RB,$RC", []>; //Mask word low
+//def MSKWLi   : OFormL<0x12, 0x12, "MSKWL $RA,$L,$RC", []>; //Mask word low
+                      
+def ZAPNOTi  : OFormL<0x12, 0x31, "zapnot $RA,$L,$RC", [], s_ishf>;
+
+// Define the pattern that produces ZAPNOTi.
+def : Pat<(zappat:$imm GPRC:$RA),
+          (ZAPNOTi GPRC:$RA, (iZAPX GPRC:$imm))>;
+
+
+//Comparison, int
+//So this is a waste of what this instruction can do, but it still saves something
+def CMPBGE  : OForm< 0x10, 0x0F, "cmpbge $RA,$RB,$RC", 
+                     [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), (and GPRC:$RB, 255)))], s_ilog>;
+def CMPBGEi : OFormL<0x10, 0x0F, "cmpbge $RA,$L,$RC",
+                     [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), immUExt8:$L))], s_ilog>;
+def CMPEQ   : OForm< 0x10, 0x2D, "cmpeq $RA,$RB,$RC", 
+                     [(set GPRC:$RC, (seteq GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPEQi  : OFormL<0x10, 0x2D, "cmpeq $RA,$L,$RC", 
+                     [(set GPRC:$RC, (seteq GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPLE   : OForm< 0x10, 0x6D, "cmple $RA,$RB,$RC", 
+                     [(set GPRC:$RC, (setle GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPLEi  : OFormL<0x10, 0x6D, "cmple $RA,$L,$RC",
+                     [(set GPRC:$RC, (setle GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPLT   : OForm< 0x10, 0x4D, "cmplt $RA,$RB,$RC",
+                     [(set GPRC:$RC, (setlt GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPLTi  : OFormL<0x10, 0x4D, "cmplt $RA,$L,$RC",
+                     [(set GPRC:$RC, (setlt GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPULE  : OForm< 0x10, 0x3D, "cmpule $RA,$RB,$RC",
+                     [(set GPRC:$RC, (setule GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPULEi : OFormL<0x10, 0x3D, "cmpule $RA,$L,$RC",
+                     [(set GPRC:$RC, (setule GPRC:$RA, immUExt8:$L))], s_iadd>;
+def CMPULT  : OForm< 0x10, 0x1D, "cmpult $RA,$RB,$RC",
+                     [(set GPRC:$RC, (setult GPRC:$RA, GPRC:$RB))], s_iadd>;
+def CMPULTi : OFormL<0x10, 0x1D, "cmpult $RA,$L,$RC", 
+                      [(set GPRC:$RC, (setult GPRC:$RA, immUExt8:$L))], s_iadd>;
+
+//Patterns for unsupported int comparisons
+def : Pat<(setueq GPRC:$X, GPRC:$Y), (CMPEQ GPRC:$X, GPRC:$Y)>;
+def : Pat<(setueq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>;
+
+def : Pat<(setugt GPRC:$X, GPRC:$Y), (CMPULT GPRC:$Y, GPRC:$X)>;
+def : Pat<(setugt immUExt8:$X, GPRC:$Y), (CMPULTi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setuge GPRC:$X, GPRC:$Y), (CMPULE GPRC:$Y, GPRC:$X)>;
+def : Pat<(setuge immUExt8:$X, GPRC:$Y), (CMPULEi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setgt GPRC:$X, GPRC:$Y), (CMPLT GPRC:$Y, GPRC:$X)>;
+def : Pat<(setgt immUExt8:$X, GPRC:$Y), (CMPLTi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setge GPRC:$X, GPRC:$Y), (CMPLE GPRC:$Y, GPRC:$X)>;
+def : Pat<(setge immUExt8:$X, GPRC:$Y), (CMPLEi GPRC:$Y, immUExt8:$X)>;
+
+def : Pat<(setne GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
+def : Pat<(setne GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQi GPRC:$X, immUExt8:$Y), 0)>;
+
+def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
+def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0)>;
+
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
+  def RETDAG : MbrForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", s_jsr>; //Return from subroutine
+  def RETDAGp : MbrpForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine
+}
+
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, Ra = 31, disp = 0 in
+def JMP : MbrpForm< 0x1A, 0x00, (ins GPRC:$RS), "jmp $$31,($RS),0", 
+          [(brind GPRC:$RS)], s_jsr>; //Jump
+
+let isCall = 1, Ra = 26,
+    Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
+            R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+            F0, F1,
+            F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+            F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R29] in {
+    def BSR : BFormD<0x34, "bsr $$26,$$$DISP..ng", [], s_jsr>; //Branch to subroutine
+}
+let isCall = 1, Ra = 26, Rb = 27, disp = 0,
+    Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19,
+            R20, R21, R22, R23, R24, R25, R26, R27, R28, R29,
+            F0, F1,
+            F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+            F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R27, R29] in {
+    def JSR : MbrForm< 0x1A, 0x01, (ins), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine
+}
+
+let isCall = 1, Ra = 23, Rb = 27, disp = 0,
+    Defs = [R23, R24, R25, R27, R28], Uses = [R24, R25, R27] in
+  def JSRs : MbrForm< 0x1A, 0x01, (ins), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem
+
+
+def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ins GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return
+
+
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
+def LDQ   : MForm<0x29, 1, "ldq $RA,$DISP($RB)",
+                 [(set GPRC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDQr  : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!gprellow",
+                 [(set GPRC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDL   : MForm<0x28, 1, "ldl $RA,$DISP($RB)",
+                 [(set GPRC:$RA, (sextloadi32 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDLr  : MForm<0x28, 1, "ldl $RA,$DISP($RB)\t\t!gprellow",
+                 [(set GPRC:$RA, (sextloadi32 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDBU  : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)",
+                 [(set GPRC:$RA, (zextloadi8 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDBUr : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)\t\t!gprellow",
+                 [(set GPRC:$RA, (zextloadi8 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+def LDWU  : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)",
+                 [(set GPRC:$RA, (zextloadi16 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>;
+def LDWUr : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)\t\t!gprellow",
+                 [(set GPRC:$RA, (zextloadi16 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>;
+}
+
+
+let OutOperandList = (outs), InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STB   : MForm<0x0E, 0, "stb $RA,$DISP($RB)",
+                 [(truncstorei8 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STBr  : MForm<0x0E, 0, "stb $RA,$DISP($RB)\t\t!gprellow",
+                 [(truncstorei8 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STW   : MForm<0x0D, 0, "stw $RA,$DISP($RB)",
+                 [(truncstorei16 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STWr  : MForm<0x0D, 0, "stw $RA,$DISP($RB)\t\t!gprellow",
+                 [(truncstorei16 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STL   : MForm<0x2C, 0, "stl $RA,$DISP($RB)",
+                 [(truncstorei32 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STLr  : MForm<0x2C, 0, "stl $RA,$DISP($RB)\t\t!gprellow",
+                 [(truncstorei32 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+def STQ   : MForm<0x2D, 0, "stq $RA,$DISP($RB)",
+                 [(store GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>;
+def STQr  : MForm<0x2D, 0, "stq $RA,$DISP($RB)\t\t!gprellow",
+                 [(store GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>;
+}
+
+//Load address
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
+def LDA   : MForm<0x08, 0, "lda $RA,$DISP($RB)",
+                 [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_lda>;
+def LDAr  : MForm<0x08, 0, "lda $RA,$DISP($RB)\t\t!gprellow",
+                 [(set GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_lda>;  //Load address
+def LDAH  : MForm<0x09, 0, "ldah $RA,$DISP($RB)",
+                 [], s_lda>;  //Load address high
+def LDAHr : MForm<0x09, 0, "ldah $RA,$DISP($RB)\t\t!gprelhigh",
+                 [(set GPRC:$RA, (Alpha_gprelhi tglobaladdr:$DISP, GPRC:$RB))], s_lda>;  //Load address high
+}
+
+let OutOperandList = (outs), InOperandList = (ins F4RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STS  : MForm<0x26, 0, "sts $RA,$DISP($RB)",
+                [(store F4RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
+def STSr : MForm<0x26, 0, "sts $RA,$DISP($RB)\t\t!gprellow",
+                [(store F4RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
+}
+let OutOperandList = (outs F4RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
+def LDS  : MForm<0x22, 1, "lds $RA,$DISP($RB)",
+                [(set F4RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
+def LDSr : MForm<0x22, 1, "lds $RA,$DISP($RB)\t\t!gprellow",
+                [(set F4RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
+}
+let OutOperandList = (outs), InOperandList = (ins F8RC:$RA, s64imm:$DISP, GPRC:$RB) in {
+def STT  : MForm<0x27, 0, "stt $RA,$DISP($RB)",
+                 [(store F8RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>;
+def STTr : MForm<0x27, 0, "stt $RA,$DISP($RB)\t\t!gprellow",
+                 [(store F8RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>;
+}
+let OutOperandList = (outs F8RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in {
+def LDT  : MForm<0x23, 1, "ldt $RA,$DISP($RB)",
+                [(set F8RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>;
+def LDTr : MForm<0x23, 1, "ldt $RA,$DISP($RB)\t\t!gprellow",
+                [(set F8RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>;
+}
+
+
+//constpool rels
+def : Pat<(i64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+          (LDQr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (sextloadi32 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+          (LDLr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (zextloadi8 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+          (LDBUr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (zextloadi16 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+          (LDWUr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprello tconstpool:$DISP, GPRC:$RB)),
+          (LDAr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprelhi tconstpool:$DISP, GPRC:$RB)),
+          (LDAHr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(f32 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+          (LDSr tconstpool:$DISP, GPRC:$RB)>;
+def : Pat<(f64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))),
+          (LDTr tconstpool:$DISP, GPRC:$RB)>;
+
+//jumptable rels
+def : Pat<(i64 (Alpha_gprelhi tjumptable:$DISP, GPRC:$RB)),
+          (LDAHr tjumptable:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (Alpha_gprello tjumptable:$DISP, GPRC:$RB)),
+          (LDAr tjumptable:$DISP, GPRC:$RB)>;
+
+
+//misc ext patterns
+def : Pat<(i64 (extloadi8 (add GPRC:$RB, immSExt16:$DISP))),
+          (LDBU   immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (extloadi16 (add GPRC:$RB, immSExt16:$DISP))),
+          (LDWU  immSExt16:$DISP, GPRC:$RB)>;
+def : Pat<(i64 (extloadi32 (add GPRC:$RB, immSExt16:$DISP))),
+          (LDL   immSExt16:$DISP, GPRC:$RB)>;
+
+//0 disp patterns
+def : Pat<(i64 (load GPRC:$addr)),
+          (LDQ  0, GPRC:$addr)>;
+def : Pat<(f64 (load GPRC:$addr)),
+          (LDT  0, GPRC:$addr)>;
+def : Pat<(f32 (load GPRC:$addr)),
+          (LDS  0, GPRC:$addr)>;
+def : Pat<(i64 (sextloadi32 GPRC:$addr)),
+          (LDL  0, GPRC:$addr)>;
+def : Pat<(i64 (zextloadi16 GPRC:$addr)),
+          (LDWU 0, GPRC:$addr)>;
+def : Pat<(i64 (zextloadi8 GPRC:$addr)),
+          (LDBU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi8 GPRC:$addr)),
+          (LDBU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi16 GPRC:$addr)),
+          (LDWU 0, GPRC:$addr)>;
+def : Pat<(i64 (extloadi32 GPRC:$addr)),
+          (LDL  0, GPRC:$addr)>;
+
+def : Pat<(store GPRC:$DATA, GPRC:$addr),
+          (STQ  GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(store F8RC:$DATA, GPRC:$addr),
+          (STT  F8RC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(store F4RC:$DATA, GPRC:$addr),
+          (STS  F4RC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei32 GPRC:$DATA, GPRC:$addr),
+          (STL  GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei16 GPRC:$DATA, GPRC:$addr),
+          (STW GPRC:$DATA, 0, GPRC:$addr)>;
+def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr),
+          (STB GPRC:$DATA, 0, GPRC:$addr)>;
+
+
+//load address, rellocated gpdist form
+let OutOperandList = (outs GPRC:$RA),
+    InOperandList = (ins s16imm:$DISP, GPRC:$RB, s16imm:$NUM),
+    mayLoad = 1 in {
+def LDAg  : MForm<0x08, 1, "lda $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>;  //Load address
+def LDAHg : MForm<0x09, 1, "ldah $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>;  //Load address
+}
+
+//Load quad, rellocated literal form
+let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in 
+def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal",
+                 [(set GPRC:$RA, (Alpha_rellit tglobaladdr:$DISP, GPRC:$RB))], s_ild>;
+def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB),
+          (LDQl texternalsym:$ext, GPRC:$RB)>;
+
+let OutOperandList = (outs GPRC:$RR),
+    InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB),
+    Constraints = "$RA = $RR",
+    DisableEncoding = "$RR" in {
+def STQ_C : MForm<0x2F, 0, "stq_l $RA,$DISP($RB)", [], s_ist>;
+def STL_C : MForm<0x2E, 0, "stl_l $RA,$DISP($RB)", [], s_ist>;
+}
+let OutOperandList = (outs GPRC:$RA),
+    InOperandList = (ins s64imm:$DISP, GPRC:$RB),
+    mayLoad = 1 in {
+def LDQ_L : MForm<0x2B, 1, "ldq_l $RA,$DISP($RB)", [], s_ild>;
+def LDL_L : MForm<0x2A, 1, "ldl_l $RA,$DISP($RB)", [], s_ild>;
+}
+
+def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle counter
+def MB  : MfcPForm<0x18, 0x4000, "mb",  s_imisc>; //memory barrier
+def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier
+
+def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 1), (i64 imm)),
+          (WMB)>;
+def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)),
+          (MB)>;
+
+//Basic Floating point ops
+
+//Floats
+
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in 
+def SQRTS : FPForm<0x14, 0x58B, "sqrts/su $RB,$RC",
+                   [(set F4RC:$RC, (fsqrt F4RC:$RB))], s_fsqrts>;
+
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RA, F4RC:$RB) in {
+def ADDS  : FPForm<0x16, 0x580, "adds/su $RA,$RB,$RC",
+                   [(set F4RC:$RC, (fadd F4RC:$RA, F4RC:$RB))], s_fadd>;
+def SUBS  : FPForm<0x16, 0x581, "subs/su $RA,$RB,$RC",
+                   [(set F4RC:$RC, (fsub F4RC:$RA, F4RC:$RB))], s_fadd>;
+def DIVS  : FPForm<0x16, 0x583, "divs/su $RA,$RB,$RC",
+                   [(set F4RC:$RC, (fdiv F4RC:$RA, F4RC:$RB))], s_fdivs>;
+def MULS  : FPForm<0x16, 0x582, "muls/su $RA,$RB,$RC",
+                   [(set F4RC:$RC, (fmul F4RC:$RA, F4RC:$RB))], s_fmul>;
+
+def CPYSS  : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+                   [(set F4RC:$RC, (fcopysign F4RC:$RB, F4RC:$RA))], s_fadd>;
+def CPYSES : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNS : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+                   [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F4RC:$RA)))], s_fadd>;
+}
+
+//Doubles
+
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
+def SQRTT : FPForm<0x14, 0x5AB, "sqrtt/su $RB,$RC",
+                   [(set F8RC:$RC, (fsqrt F8RC:$RB))], s_fsqrtt>;
+
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RA, F8RC:$RB) in {
+def ADDT  : FPForm<0x16, 0x5A0, "addt/su $RA,$RB,$RC",
+                   [(set F8RC:$RC, (fadd F8RC:$RA, F8RC:$RB))], s_fadd>;
+def SUBT  : FPForm<0x16, 0x5A1, "subt/su $RA,$RB,$RC",
+                   [(set F8RC:$RC, (fsub F8RC:$RA, F8RC:$RB))], s_fadd>;
+def DIVT  : FPForm<0x16, 0x5A3, "divt/su $RA,$RB,$RC",
+                   [(set F8RC:$RC, (fdiv F8RC:$RA, F8RC:$RB))], s_fdivt>;
+def MULT  : FPForm<0x16, 0x5A2, "mult/su $RA,$RB,$RC",
+                   [(set F8RC:$RC, (fmul F8RC:$RA, F8RC:$RB))], s_fmul>;
+
+def CPYST  : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+                   [(set F8RC:$RC, (fcopysign F8RC:$RB, F8RC:$RA))], s_fadd>;
+def CPYSET : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNT : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+                   [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F8RC:$RA)))], s_fadd>;
+
+def CMPTEQ : FPForm<0x16, 0x5A5, "cmpteq/su $RA,$RB,$RC", [], s_fadd>;
+//                    [(set F8RC:$RC, (seteq F8RC:$RA, F8RC:$RB))]>;
+def CMPTLE : FPForm<0x16, 0x5A7, "cmptle/su $RA,$RB,$RC", [], s_fadd>;
+//                    [(set F8RC:$RC, (setle F8RC:$RA, F8RC:$RB))]>;
+def CMPTLT : FPForm<0x16, 0x5A6, "cmptlt/su $RA,$RB,$RC", [], s_fadd>;
+//                    [(set F8RC:$RC, (setlt F8RC:$RA, F8RC:$RB))]>;
+def CMPTUN : FPForm<0x16, 0x5A4, "cmptun/su $RA,$RB,$RC", [], s_fadd>;
+//                    [(set F8RC:$RC, (setuo F8RC:$RA, F8RC:$RB))]>;
+}
+
+//More CPYS forms:
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RA, F8RC:$RB) in {
+def CPYSTs  : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+                   [(set F8RC:$RC, (fcopysign F8RC:$RB, F4RC:$RA))], s_fadd>;
+def CPYSNTs : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+                   [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F4RC:$RA)))], s_fadd>;
+}
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RA, F4RC:$RB) in {
+def CPYSSt  : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC",
+                   [(set F4RC:$RC, (fcopysign F4RC:$RB, F8RC:$RA))], s_fadd>;
+def CPYSESt : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent
+def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
+                   [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F8RC:$RA)))], s_fadd>;
+}
+
+//conditional moves, floats
+let OutOperandList = (outs F4RC:$RDEST),
+    InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
+    Constraints = "$RTRUE = $RDEST" in {
+def FCMOVEQS : FPForm<0x17, 0x02A, 
+                      "fcmoveq $RCOND,$RTRUE,$RDEST",
+                      [], s_fcmov>; //FCMOVE if = zero
+def FCMOVGES : FPForm<0x17, 0x02D, 
+                      "fcmovge $RCOND,$RTRUE,$RDEST",
+                      [], s_fcmov>; //FCMOVE if >= zero
+def FCMOVGTS : FPForm<0x17, 0x02F, 
+                      "fcmovgt $RCOND,$RTRUE,$RDEST",
+                      [], s_fcmov>; //FCMOVE if > zero
+def FCMOVLES : FPForm<0x17, 0x02E, 
+                      "fcmovle $RCOND,$RTRUE,$RDEST",
+                      [], s_fcmov>; //FCMOVE if <= zero
+def FCMOVLTS : FPForm<0x17, 0x02C,
+                      "fcmovlt $RCOND,$RTRUE,$RDEST",
+                      [], s_fcmov>; // FCMOVE if < zero
+def FCMOVNES : FPForm<0x17, 0x02B, 
+                      "fcmovne $RCOND,$RTRUE,$RDEST",
+                      [], s_fcmov>; //FCMOVE if != zero
+}
+//conditional moves, doubles
+let OutOperandList = (outs F8RC:$RDEST), 
+    InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
+    Constraints = "$RTRUE = $RDEST" in {
+def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVGTT : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVLET : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVLTT : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+def FCMOVNET : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
+}
+
+//misc FP selects
+//Select double
+
+def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf),
+      (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+
+//Select single
+def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>;
+
+def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>;
+
+def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf),
+      (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>;
+
+
+
+let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F4RC:$RA), Fb = 31 in 
+def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC",
+        [(set GPRC:$RC, (bitconvert F4RC:$RA))], s_ftoi>; //Floating to integer move, S_floating
+let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F8RC:$RA), Fb = 31 in 
+def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC",
+        [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in 
+def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC",
+    	[(set F4RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move, S_floating
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in 
+def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC",
+        [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move
+
+
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
+def CVTQS : FPForm<0x16, 0x7BC, "cvtqs/sui $RB,$RC",
+        [(set F4RC:$RC, (Alpha_cvtqs F8RC:$RB))], s_fadd>;
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
+def CVTQT : FPForm<0x16, 0x7BE, "cvtqt/sui $RB,$RC",
+        [(set F8RC:$RC, (Alpha_cvtqt F8RC:$RB))], s_fadd>;
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
+def CVTTQ : FPForm<0x16, 0x52F, "cvttq/svc $RB,$RC",
+        [(set F8RC:$RC, (Alpha_cvttq F8RC:$RB))], s_fadd>;
+let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in 
+def CVTST : FPForm<0x16, 0x6AC, "cvtst/s $RB,$RC",
+                   [(set F8RC:$RC, (fextend F4RC:$RB))], s_fadd>;
+let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in 
+def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC",
+                   [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>;
+
+def :  Pat<(select GPRC:$RC, F8RC:$st, F8RC:$sf),
+       (f64 (FCMOVEQT  F8RC:$st, F8RC:$sf, (ITOFT GPRC:$RC)))>; 
+def :  Pat<(select GPRC:$RC, F4RC:$st, F4RC:$sf),
+       (f32 (FCMOVEQS  F4RC:$st, F4RC:$sf, (ITOFT GPRC:$RC)))>; 
+
+/////////////////////////////////////////////////////////
+//Branching
+/////////////////////////////////////////////////////////
+class br_icc<bits<6> opc, string asmstr>
+  : BFormN<opc, (ins u64imm:$opc, GPRC:$R, target:$dst), 
+    !strconcat(asmstr, " $R,$dst"),  s_icbr>;
+class br_fcc<bits<6> opc, string asmstr>
+  : BFormN<opc, (ins u64imm:$opc, F8RC:$R, target:$dst), 
+    !strconcat(asmstr, " $R,$dst"),  s_fbr>;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+let Ra = 31, isBarrier = 1 in
+def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>;
+
+def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst), 
+                    "{:comment} COND_BRANCH imm:$opc, GPRC:$R, bb:$dst", 
+                    s_icbr>;
+def COND_BRANCH_F : BFormN<0, (ins u64imm:$opc, F8RC:$R, target:$dst), 
+                    "{:comment} COND_BRANCH imm:$opc, F8RC:$R, bb:$dst",
+                    s_fbr>;
+//Branches, int
+def BEQ  : br_icc<0x39, "beq">;
+def BGE  : br_icc<0x3E, "bge">;
+def BGT  : br_icc<0x3F, "bgt">;
+def BLBC : br_icc<0x38, "blbc">;
+def BLBS : br_icc<0x3C, "blbs">;
+def BLE  : br_icc<0x3B, "ble">;
+def BLT  : br_icc<0x3A, "blt">;
+def BNE  : br_icc<0x3D, "bne">;
+
+//Branches, float
+def FBEQ : br_fcc<0x31, "fbeq">;
+def FBGE : br_fcc<0x36, "fbge">;
+def FBGT : br_fcc<0x37, "fbgt">;
+def FBLE : br_fcc<0x33, "fble">;
+def FBLT : br_fcc<0x32, "fblt">;
+def FBNE : br_fcc<0x36, "fbne">;
+}
+
+//An ugly trick to get the opcode as an imm I can use
+def immBRCond : SDNodeXForm<imm, [{
+  switch((uint64_t)N->getZExtValue()) {
+    default: assert(0 && "Unknown branch type");
+    case 0:  return getI64Imm(Alpha::BEQ);
+    case 1:  return getI64Imm(Alpha::BNE);
+    case 2:  return getI64Imm(Alpha::BGE);
+    case 3:  return getI64Imm(Alpha::BGT);
+    case 4:  return getI64Imm(Alpha::BLE);
+    case 5:  return getI64Imm(Alpha::BLT);
+    case 6:  return getI64Imm(Alpha::BLBS);
+    case 7:  return getI64Imm(Alpha::BLBC);
+    case 20: return getI64Imm(Alpha::FBEQ);
+    case 21: return getI64Imm(Alpha::FBNE);
+    case 22: return getI64Imm(Alpha::FBGE);
+    case 23: return getI64Imm(Alpha::FBGT);
+    case 24: return getI64Imm(Alpha::FBLE);
+    case 25: return getI64Imm(Alpha::FBLT);
+  }
+}]>;
+
+//Int cond patterns
+def : Pat<(brcond (seteq GPRC:$RA, 0), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 0),  GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setge GPRC:$RA, 0), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 2),  GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setgt GPRC:$RA, 0), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 3),  GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (and GPRC:$RA, 1), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 6),  GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setle GPRC:$RA, 0), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 4),  GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setlt GPRC:$RA, 0), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 5),  GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, 0), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 1),  GPRC:$RA, bb:$DISP)>;
+
+def : Pat<(brcond GPRC:$RA, bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, GPRC:$RB), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 0), (CMPEQ GPRC:$RA, GPRC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setne GPRC:$RA, immUExt8:$L), bb:$DISP), 
+      (COND_BRANCH_I (immBRCond 0), (CMPEQi GPRC:$RA, immUExt8:$L), bb:$DISP)>;
+
+//FP cond patterns
+def : Pat<(brcond (seteq F8RC:$RA, immFPZ), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 20),  F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setne F8RC:$RA, immFPZ), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21),  F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setge F8RC:$RA, immFPZ), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 22),  F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setgt F8RC:$RA, immFPZ), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 23),  F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setle F8RC:$RA, immFPZ), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 24),  F8RC:$RA, bb:$DISP)>;
+def : Pat<(brcond (setlt F8RC:$RA, immFPZ), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 25),  F8RC:$RA, bb:$DISP)>;
+
+
+def : Pat<(brcond (seteq F8RC:$RA, F8RC:$RB), bb:$DISP),  
+      (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setoeq F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setueq F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setlt F8RC:$RA, F8RC:$RB), bb:$DISP),  
+      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setolt F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setult F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setle F8RC:$RA, F8RC:$RB), bb:$DISP),  
+      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setole F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setule F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+def : Pat<(brcond (setgt F8RC:$RA, F8RC:$RB), bb:$DISP),  
+      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setogt F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setugt F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+
+def : Pat<(brcond (setge F8RC:$RA, F8RC:$RB), bb:$DISP),  
+      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setoge F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+def : Pat<(brcond (setuge F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>;
+
+def : Pat<(brcond (setne F8RC:$RA, F8RC:$RB), bb:$DISP),  
+      (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setone F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+def : Pat<(brcond (setune F8RC:$RA, F8RC:$RB), bb:$DISP), 
+      (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>;
+
+
+def : Pat<(brcond (setoeq F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setueq F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setoge F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setuge F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setogt F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setugt F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setole F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setule F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setolt F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setult F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>;
+
+def : Pat<(brcond (setone F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
+def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP),   
+      (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>;
+
+//End Branches
+
+//S_floating : IEEE Single
+//T_floating : IEEE Double
+
+//Unused instructions
+//Mnemonic Format Opcode Description
+//CALL_PAL Pcd 00 Trap to PALcode
+//ECB Mfc 18.E800 Evict cache block
+//EXCB Mfc 18.0400 Exception barrier
+//FETCH Mfc 18.8000 Prefetch data
+//FETCH_M Mfc 18.A000 Prefetch data, modify intent
+//LDQ_U Mem 0B Load unaligned quadword
+//MB Mfc 18.4000 Memory barrier
+//STQ_U Mem 0F Store unaligned quadword
+//TRAPB Mfc 18.0000 Trap barrier
+//WH64 Mfc 18.F800 Write hint  64 bytes
+//WMB Mfc 18.4400 Write memory barrier
+//MF_FPCR F-P 17.025 Move from FPCR
+//MT_FPCR F-P 17.024 Move to FPCR
+//There are in the Multimedia extentions, so let's not use them yet
+//def MAXSB8  : OForm<0x1C, 0x3E, "MAXSB8 $RA,$RB,$RC">; //Vector signed byte maximum
+//def MAXSW4 : OForm< 0x1C, 0x3F, "MAXSW4 $RA,$RB,$RC">; //Vector signed word maximum
+//def MAXUB8  : OForm<0x1C, 0x3C, "MAXUB8 $RA,$RB,$RC">; //Vector unsigned byte maximum
+//def MAXUW4 : OForm< 0x1C, 0x3D, "MAXUW4 $RA,$RB,$RC">; //Vector unsigned word maximum
+//def MINSB8 : OForm< 0x1C, 0x38, "MINSB8 $RA,$RB,$RC">; //Vector signed byte minimum
+//def MINSW4 : OForm< 0x1C, 0x39, "MINSW4 $RA,$RB,$RC">; //Vector signed word minimum
+//def MINUB8 : OForm< 0x1C, 0x3A, "MINUB8 $RA,$RB,$RC">; //Vector unsigned byte minimum
+//def MINUW4 : OForm< 0x1C, 0x3B, "MINUW4 $RA,$RB,$RC">; //Vector unsigned word minimum
+//def PERR : OForm< 0x1C, 0x31, "PERR $RA,$RB,$RC">; //Pixel error
+//def PKLB : OForm< 0x1C, 0x37, "PKLB $RA,$RB,$RC">; //Pack longwords to bytes
+//def PKWB  : OForm<0x1C, 0x36, "PKWB $RA,$RB,$RC">; //Pack words to bytes
+//def UNPKBL : OForm< 0x1C, 0x35, "UNPKBL $RA,$RB,$RC">; //Unpack bytes to longwords
+//def UNPKBW : OForm< 0x1C, 0x34, "UNPKBW $RA,$RB,$RC">; //Unpack bytes to words
+//CVTLQ F-P 17.010 Convert longword to quadword
+//CVTQL F-P 17.030 Convert quadword to longword
+
+
+//Constant handling
+
+def immConst2Part  : PatLeaf<(imm), [{
+  //true if imm fits in a LDAH LDA pair
+  int64_t val = (int64_t)N->getZExtValue();
+  return (val <= IMM_FULLHIGH  && val >= IMM_FULLLOW);
+}]>;
+def immConst2PartInt  : PatLeaf<(imm), [{
+  //true if imm fits in a LDAH LDA pair with zeroext
+  uint64_t uval = N->getZExtValue();
+  int32_t val32 = (int32_t)uval;
+  return ((uval >> 32) == 0 && //empty upper bits
+          val32 <= IMM_FULLHIGH);
+//          val32 >= IMM_FULLLOW  + IMM_LOW  * IMM_MULT); //Always True
+}], SExt32>;
+
+def : Pat<(i64 immConst2Part:$imm),
+          (LDA (LL16 immConst2Part:$imm), (LDAH (LH16 immConst2Part:$imm), R31))>;
+
+def : Pat<(i64 immSExt16:$imm),
+          (LDA immSExt16:$imm, R31)>;
+
+def : Pat<(i64 immSExt16int:$imm),
+          (ZAPNOTi (LDA (SExt16 immSExt16int:$imm), R31), 15)>;
+def : Pat<(i64 immConst2PartInt:$imm),
+          (ZAPNOTi (LDA (LL16 (i64 (SExt32 immConst2PartInt:$imm))),
+                        (LDAH (LH16 (i64 (SExt32 immConst2PartInt:$imm))), R31)), 15)>;
+
+
+//TODO: I want to just define these like this!
+//def : Pat<(i64 0),
+//          (R31)>;
+//def : Pat<(f64 0.0),
+//          (F31)>;
+//def : Pat<(f64 -0.0),
+//          (CPYSNT F31, F31)>;
+//def : Pat<(f32 0.0),
+//          (F31)>;
+//def : Pat<(f32 -0.0),
+//          (CPYSNS F31, F31)>;
+
+//Misc Patterns:
+
+def : Pat<(sext_inreg GPRC:$RB, i32),
+          (ADDLi GPRC:$RB, 0)>;
+
+def : Pat<(fabs F8RC:$RB),
+          (CPYST F31, F8RC:$RB)>;
+def : Pat<(fabs F4RC:$RB),
+          (CPYSS F31, F4RC:$RB)>;
+def : Pat<(fneg F8RC:$RB),
+          (CPYSNT F8RC:$RB, F8RC:$RB)>;
+def : Pat<(fneg F4RC:$RB),
+          (CPYSNS F4RC:$RB, F4RC:$RB)>;
+
+def : Pat<(fcopysign F4RC:$A, (fneg F4RC:$B)),
+          (CPYSNS F4RC:$B, F4RC:$A)>;
+def : Pat<(fcopysign F8RC:$A, (fneg F8RC:$B)),
+          (CPYSNT F8RC:$B, F8RC:$A)>;
+def : Pat<(fcopysign F4RC:$A, (fneg F8RC:$B)),
+          (CPYSNSt F8RC:$B, F4RC:$A)>;
+def : Pat<(fcopysign F8RC:$A, (fneg F4RC:$B)),
+          (CPYSNTs F4RC:$B, F8RC:$A)>;
+
+//Yes, signed multiply high is ugly
+def : Pat<(mulhs GPRC:$RA, GPRC:$RB),
+          (SUBQr (UMULHr GPRC:$RA, GPRC:$RB), (ADDQr (CMOVGEr GPRC:$RB, R31, GPRC:$RA), 
+                                                     (CMOVGEr GPRC:$RA, R31, GPRC:$RB)))>;
+
+//Stupid crazy arithmetic stuff:
+let AddedComplexity = 1 in {
+def : Pat<(mul GPRC:$RA, 5), (S4ADDQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 9), (S8ADDQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 3), (S4SUBQr GPRC:$RA, GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, 7), (S8SUBQr GPRC:$RA, GPRC:$RA)>;
+
+//slight tree expansion if we are multiplying near to a power of 2
+//n is above a power of 2
+def : Pat<(mul GPRC:$RA, immRem1:$imm), 
+          (ADDQr (SLr GPRC:$RA, (nearP2X immRem1:$imm)), GPRC:$RA)>;
+def : Pat<(mul GPRC:$RA, immRem2:$imm), 
+          (ADDQr (SLr GPRC:$RA, (nearP2X immRem2:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem3:$imm),
+          (ADDQr (SLr GPRC:$RA, (nearP2X immRem3:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRem4:$imm),
+          (S4ADDQr GPRC:$RA, (SLr GPRC:$RA, (nearP2X immRem4:$imm)))>;
+def : Pat<(mul GPRC:$RA, immRem5:$imm),
+          (ADDQr (SLr GPRC:$RA, (nearP2X immRem5:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
+def : Pat<(mul GPRC:$RA, immRemP2:$imm),
+          (ADDQr (SLr GPRC:$RA, (nearP2X immRemP2:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2:$imm)))>;
+
+//n is below a power of 2
+//FIXME: figure out why something is truncating the imm to 32bits
+// this will fix 2007-11-27-mulneg3
+//def : Pat<(mul GPRC:$RA, immRem1n:$imm), 
+//          (SUBQr (SLr GPRC:$RA, (nearP2X immRem1n:$imm)), GPRC:$RA)>;
+//def : Pat<(mul GPRC:$RA, immRem2n:$imm), 
+//          (SUBQr (SLr GPRC:$RA, (nearP2X immRem2n:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRem3n:$imm),
+//          (SUBQr (SLr GPRC:$RA, (nearP2X immRem3n:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRem4n:$imm),
+//          (SUBQr (SLr GPRC:$RA, (nearP2X immRem4n:$imm)), (SLi GPRC:$RA, 2))>;
+//def : Pat<(mul GPRC:$RA, immRem5n:$imm),
+//          (SUBQr (SLr GPRC:$RA, (nearP2X immRem5n:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>;
+//def : Pat<(mul GPRC:$RA, immRemP2n:$imm),
+//          (SUBQr (SLr GPRC:$RA, (nearP2X immRemP2n:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2n:$imm)))>;
+} //Added complexity
diff --git a/final/lib/Target/Alpha/AlphaLLRP.cpp b/final/lib/Target/Alpha/AlphaLLRP.cpp
new file mode 100644
index 00000000000..85fbfd1affe
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaLLRP.cpp
@@ -0,0 +1,158 @@
+//===-- AlphaLLRP.cpp - Alpha Load Load Replay Trap elimination pass. -- --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Here we check for potential replay traps introduced by the spiller
+// We also align some branch targets if we can do so for free.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alpha-nops"
+#include "Alpha.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+STATISTIC(nopintro, "Number of nops inserted");
+STATISTIC(nopalign, "Number of nops inserted for alignment");
+
+namespace {
+  cl::opt<bool>
+  AlignAll("alpha-align-all", cl::Hidden,
+                   cl::desc("Align all blocks"));
+
+  struct AlphaLLRPPass : public MachineFunctionPass {
+    /// Target machine description which we query for reg. names, data
+    /// layout, etc.
+    ///
+    AlphaTargetMachine &TM;
+
+    static char ID;
+    AlphaLLRPPass(AlphaTargetMachine &tm) 
+      : MachineFunctionPass(ID), TM(tm) { }
+
+    virtual const char *getPassName() const {
+      return "Alpha NOP inserter";
+    }
+
+    bool runOnMachineFunction(MachineFunction &F) {
+      const TargetInstrInfo *TII = F.getTarget().getInstrInfo();
+      bool Changed = false;
+      MachineInstr* prev[3] = {0,0,0};
+      DebugLoc dl;
+      unsigned count = 0;
+      for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+           FI != FE; ++FI) {
+        MachineBasicBlock& MBB = *FI;
+        bool ub = false;
+        for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+          if (count%4 == 0)
+            prev[0] = prev[1] = prev[2] = 0; //Slots cleared at fetch boundary
+          ++count;
+          MachineInstr *MI = I++;
+          switch (MI->getOpcode()) {
+          case Alpha::LDQ:  case Alpha::LDL:
+          case Alpha::LDWU: case Alpha::LDBU:
+          case Alpha::LDT: case Alpha::LDS:
+          case Alpha::STQ:  case Alpha::STL:
+          case Alpha::STW:  case Alpha::STB:
+          case Alpha::STT: case Alpha::STS:
+           if (MI->getOperand(2).getReg() == Alpha::R30) {
+             if (prev[0] && 
+                 prev[0]->getOperand(2).getReg() == MI->getOperand(2).getReg()&&
+                 prev[0]->getOperand(1).getImm() == MI->getOperand(1).getImm()){
+               prev[0] = prev[1];
+               prev[1] = prev[2];
+               prev[2] = 0;
+               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+                 .addReg(Alpha::R31)
+                 .addReg(Alpha::R31); 
+               Changed = true; nopintro += 1;
+               count += 1;
+             } else if (prev[1] 
+                        && prev[1]->getOperand(2).getReg() == 
+                        MI->getOperand(2).getReg()
+                        && prev[1]->getOperand(1).getImm() == 
+                        MI->getOperand(1).getImm()) {
+               prev[0] = prev[2];
+               prev[1] = prev[2] = 0;
+               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+                 .addReg(Alpha::R31)
+                 .addReg(Alpha::R31); 
+               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+                 .addReg(Alpha::R31)
+                 .addReg(Alpha::R31);
+               Changed = true; nopintro += 2;
+               count += 2;
+             } else if (prev[2] 
+                        && prev[2]->getOperand(2).getReg() == 
+                        MI->getOperand(2).getReg()
+                        && prev[2]->getOperand(1).getImm() == 
+                        MI->getOperand(1).getImm()) {
+               prev[0] = prev[1] = prev[2] = 0;
+               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+                 .addReg(Alpha::R31).addReg(Alpha::R31);
+               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+                 .addReg(Alpha::R31).addReg(Alpha::R31);
+               BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31)
+                 .addReg(Alpha::R31).addReg(Alpha::R31);
+               Changed = true; nopintro += 3;
+               count += 3;
+             }
+             prev[0] = prev[1];
+             prev[1] = prev[2];
+             prev[2] = MI;
+             break;
+           }
+           prev[0] = prev[1];
+           prev[1] = prev[2];
+           prev[2] = 0;
+           break;
+          case Alpha::ALTENT:
+          case Alpha::MEMLABEL:
+          case Alpha::PCLABEL:
+            --count;
+            break;
+          case Alpha::BR:
+          case Alpha::JMP:
+            ub = true;
+            //fall through
+          default:
+            prev[0] = prev[1];
+            prev[1] = prev[2];
+            prev[2] = 0;
+            break;
+          }
+        }
+        if (ub || AlignAll) {
+          //we can align stuff for free at this point
+          while (count % 4) {
+            BuildMI(MBB, MBB.end(), dl, TII->get(Alpha::BISr), Alpha::R31)
+              .addReg(Alpha::R31).addReg(Alpha::R31);
+            ++count;
+            ++nopalign;
+            prev[0] = prev[1];
+            prev[1] = prev[2];
+            prev[2] = 0;
+          }
+        }
+      }
+      return Changed;
+    }
+  };
+  char AlphaLLRPPass::ID = 0;
+} // end of anonymous namespace
+
+FunctionPass *llvm::createAlphaLLRPPass(AlphaTargetMachine &tm) {
+  return new AlphaLLRPPass(tm);
+}
diff --git a/final/lib/Target/Alpha/AlphaMCAsmInfo.cpp b/final/lib/Target/Alpha/AlphaMCAsmInfo.cpp
new file mode 100644
index 00000000000..a35e8846e07
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaMCAsmInfo.cpp
@@ -0,0 +1,23 @@
+//===-- AlphaMCAsmInfo.cpp - Alpha asm properties ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the AlphaMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaMCAsmInfo.h"
+using namespace llvm;
+
+AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, StringRef TT) {
+  AlignmentIsInBytes = false;
+  PrivateGlobalPrefix = "$";
+  GPRel32Directive = ".gprel32";
+  WeakRefDirective = "\t.weak\t";
+  HasSetDirective = false;
+}
diff --git a/final/lib/Target/Alpha/AlphaMCAsmInfo.h b/final/lib/Target/Alpha/AlphaMCAsmInfo.h
new file mode 100644
index 00000000000..837844bd29a
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaMCAsmInfo.h
@@ -0,0 +1,29 @@
+//=====-- AlphaMCAsmInfo.h - Alpha asm properties -------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the AlphaMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHATARGETASMINFO_H
+#define ALPHATARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+
+  struct AlphaMCAsmInfo : public MCAsmInfo {
+    explicit AlphaMCAsmInfo(const Target &T, StringRef TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/final/lib/Target/Alpha/AlphaMachineFunctionInfo.h b/final/lib/Target/Alpha/AlphaMachineFunctionInfo.h
new file mode 100644
index 00000000000..186738c20c7
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaMachineFunctionInfo.h
@@ -0,0 +1,62 @@
+//====- AlphaMachineFuctionInfo.h - Alpha machine function info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares Alpha-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMACHINEFUNCTIONINFO_H
+#define ALPHAMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// AlphaMachineFunctionInfo - This class is derived from MachineFunction
+/// private Alpha target-specific information for each MachineFunction.
+class AlphaMachineFunctionInfo : public MachineFunctionInfo {
+  /// GlobalBaseReg - keeps track of the virtual register initialized for
+  /// use as the global base register. This is used for PIC in some PIC
+  /// relocation models.
+  unsigned GlobalBaseReg;
+
+  /// GlobalRetAddr = keeps track of the virtual register initialized for
+  /// the return address value.
+  unsigned GlobalRetAddr;
+
+  /// VarArgsOffset - What is the offset to the first vaarg
+  int VarArgsOffset;
+  /// VarArgsBase - What is the base FrameIndex
+  int VarArgsBase;
+
+public:
+  AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0),
+                               VarArgsOffset(0), VarArgsBase(0) {}
+
+  explicit AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0),
+                                                           GlobalRetAddr(0),
+                                                           VarArgsOffset(0),
+                                                           VarArgsBase(0) {}
+
+  unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+  void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+  unsigned getGlobalRetAddr() const { return GlobalRetAddr; }
+  void setGlobalRetAddr(unsigned Reg) { GlobalRetAddr = Reg; }
+
+  int getVarArgsOffset() const { return VarArgsOffset; }
+  void setVarArgsOffset(int Offset) { VarArgsOffset = Offset; }
+
+  int getVarArgsBase() const { return VarArgsBase; }
+  void setVarArgsBase(int Base) { VarArgsBase = Base; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/Alpha/AlphaRegisterInfo.cpp b/final/lib/Target/Alpha/AlphaRegisterInfo.cpp
new file mode 100644
index 00000000000..7667fd831d1
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -0,0 +1,207 @@
+//===- AlphaRegisterInfo.cpp - Alpha Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "Alpha.h"
+#include "AlphaRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+using namespace llvm;
+
+AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
+  : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
+    TII(tii) {
+}
+
+static long getUpper16(long l) {
+  long y = l / Alpha::IMM_MULT;
+  if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH)
+    ++y;
+  return y;
+}
+
+static long getLower16(long l) {
+  long h = getUpper16(l);
+  return l - h * Alpha::IMM_MULT;
+}
+
+const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+                                                                         const {
+  static const unsigned CalleeSavedRegs[] = {
+    Alpha::R9, Alpha::R10,
+    Alpha::R11, Alpha::R12,
+    Alpha::R13, Alpha::R14,
+    Alpha::F2, Alpha::F3,
+    Alpha::F4, Alpha::F5,
+    Alpha::F6, Alpha::F7,
+    Alpha::F8, Alpha::F9,  0
+  };
+  return CalleeSavedRegs;
+}
+
+BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  Reserved.set(Alpha::R15);
+  Reserved.set(Alpha::R30);
+  Reserved.set(Alpha::R31);
+  return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+void AlphaRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (TFI->hasFP(MF)) {
+    // If we have a frame pointer, turn the adjcallstackup instruction into a
+    // 'sub ESP, <amt>' and the adjcallstackdown instruction into 'add ESP,
+    // <amt>'
+    MachineInstr *Old = I;
+    uint64_t Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = TFI->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      MachineInstr *New;
+      if (Old->getOpcode() == Alpha::ADJUSTSTACKDOWN) {
+        New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30)
+          .addImm(-Amount).addReg(Alpha::R30);
+      } else {
+         assert(Old->getOpcode() == Alpha::ADJUSTSTACKUP);
+         New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30)
+          .addImm(Amount).addReg(Alpha::R30);
+      }
+
+      // Replace the pseudo instruction with a new instruction...
+      MBB.insert(I, New);
+    }
+  }
+
+  MBB.erase(I);
+}
+
+//Alpha has a slightly funny stack:
+//Args
+//<- incoming SP
+//fixed locals (and spills, callee saved, etc)
+//<- FP
+//variable locals
+//<- SP
+
+void
+AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                       int SPAdj, RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Unexpected");
+
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  bool FP = TFI->hasFP(MF);
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+
+  // Add the base register of R30 (SP) or R15 (FP).
+  MI.getOperand(i + 1).ChangeToRegister(FP ? Alpha::R15 : Alpha::R30, false);
+
+  // Now add the frame object offset to the offset from the virtual frame index.
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+  DEBUG(errs() << "FI: " << FrameIndex << " Offset: " << Offset << "\n");
+
+  Offset += MF.getFrameInfo()->getStackSize();
+
+  DEBUG(errs() << "Corrected Offset " << Offset
+       << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n");
+
+  if (Offset > Alpha::IMM_HIGH || Offset < Alpha::IMM_LOW) {
+    DEBUG(errs() << "Unconditionally using R28 for evil purposes Offset: "
+          << Offset << "\n");
+    //so in this case, we need to use a temporary register, and move the
+    //original inst off the SP/FP
+    //fix up the old:
+    MI.getOperand(i + 1).ChangeToRegister(Alpha::R28, false);
+    MI.getOperand(i).ChangeToImmediate(getLower16(Offset));
+    //insert the new
+    MachineInstr* nMI=BuildMI(MF, MI.getDebugLoc(),
+                              TII.get(Alpha::LDAH), Alpha::R28)
+      .addImm(getUpper16(Offset)).addReg(FP ? Alpha::R15 : Alpha::R30);
+    MBB.insert(II, nMI);
+  } else {
+    MI.getOperand(i).ChangeToImmediate(Offset);
+  }
+}
+
+unsigned AlphaRegisterInfo::getRARegister() const {
+  return Alpha::R26;
+}
+
+unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? Alpha::R15 : Alpha::R30;
+}
+
+unsigned AlphaRegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned AlphaRegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+int AlphaRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  llvm_unreachable("What is the dwarf register number");
+  return -1;
+}
+
+#include "AlphaGenRegisterInfo.inc"
+
+std::string AlphaRegisterInfo::getPrettyName(unsigned reg)
+{
+  std::string s(RegisterDescriptors[reg].Name);
+  return s;
+}
diff --git a/final/lib/Target/Alpha/AlphaRegisterInfo.h b/final/lib/Target/Alpha/AlphaRegisterInfo.h
new file mode 100644
index 00000000000..b0d4dd03b33
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -0,0 +1,57 @@
+//===- AlphaRegisterInfo.h - Alpha Register Information Impl ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Alpha implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAREGISTERINFO_H
+#define ALPHAREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "AlphaGenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+class Type;
+
+struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
+  const TargetInstrInfo &TII;
+
+  AlphaRegisterInfo(const TargetInstrInfo &tii);
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+  static std::string getPrettyName(unsigned reg);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/Alpha/AlphaRegisterInfo.td b/final/lib/Target/Alpha/AlphaRegisterInfo.td
new file mode 100644
index 00000000000..35e6804ea6a
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaRegisterInfo.td
@@ -0,0 +1,171 @@
+//===- AlphaRegisterInfo.td - The Alpha Register File ------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Alpha register set.
+//
+//===----------------------------------------------------------------------===//
+
+class AlphaReg<string n> : Register<n> {
+  field bits<5> Num;
+  let Namespace = "Alpha";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : AlphaReg<n> {
+  let Num = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : AlphaReg<n> {
+  let Num = num;
+}
+
+//#define FP    $15
+//#define RA    $26
+//#define PV    $27
+//#define GP    $29
+//#define SP    $30
+
+// General-purpose registers
+def R0  : GPR< 0,  "$0">, DwarfRegNum<[0]>;
+def R1  : GPR< 1,  "$1">, DwarfRegNum<[1]>;
+def R2  : GPR< 2,  "$2">, DwarfRegNum<[2]>;
+def R3  : GPR< 3,  "$3">, DwarfRegNum<[3]>;
+def R4  : GPR< 4,  "$4">, DwarfRegNum<[4]>;
+def R5  : GPR< 5,  "$5">, DwarfRegNum<[5]>;
+def R6  : GPR< 6,  "$6">, DwarfRegNum<[6]>;
+def R7  : GPR< 7,  "$7">, DwarfRegNum<[7]>;
+def R8  : GPR< 8,  "$8">, DwarfRegNum<[8]>;
+def R9  : GPR< 9,  "$9">, DwarfRegNum<[9]>;
+def R10 : GPR<10, "$10">, DwarfRegNum<[10]>;
+def R11 : GPR<11, "$11">, DwarfRegNum<[11]>;
+def R12 : GPR<12, "$12">, DwarfRegNum<[12]>;
+def R13 : GPR<13, "$13">, DwarfRegNum<[13]>;
+def R14 : GPR<14, "$14">, DwarfRegNum<[14]>;
+def R15 : GPR<15, "$15">, DwarfRegNum<[15]>;
+def R16 : GPR<16, "$16">, DwarfRegNum<[16]>;
+def R17 : GPR<17, "$17">, DwarfRegNum<[17]>;
+def R18 : GPR<18, "$18">, DwarfRegNum<[18]>;
+def R19 : GPR<19, "$19">, DwarfRegNum<[19]>;
+def R20 : GPR<20, "$20">, DwarfRegNum<[20]>;
+def R21 : GPR<21, "$21">, DwarfRegNum<[21]>;
+def R22 : GPR<22, "$22">, DwarfRegNum<[22]>;
+def R23 : GPR<23, "$23">, DwarfRegNum<[23]>;
+def R24 : GPR<24, "$24">, DwarfRegNum<[24]>;
+def R25 : GPR<25, "$25">, DwarfRegNum<[25]>;
+def R26 : GPR<26, "$26">, DwarfRegNum<[26]>;
+def R27 : GPR<27, "$27">, DwarfRegNum<[27]>;
+def R28 : GPR<28, "$28">, DwarfRegNum<[28]>;
+def R29 : GPR<29, "$29">, DwarfRegNum<[29]>;
+def R30 : GPR<30, "$30">, DwarfRegNum<[30]>;
+def R31 : GPR<31, "$31">, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0  : FPR< 0,  "$f0">, DwarfRegNum<[33]>;
+def F1  : FPR< 1,  "$f1">, DwarfRegNum<[34]>;
+def F2  : FPR< 2,  "$f2">, DwarfRegNum<[35]>;
+def F3  : FPR< 3,  "$f3">, DwarfRegNum<[36]>;
+def F4  : FPR< 4,  "$f4">, DwarfRegNum<[37]>;
+def F5  : FPR< 5,  "$f5">, DwarfRegNum<[38]>;
+def F6  : FPR< 6,  "$f6">, DwarfRegNum<[39]>;
+def F7  : FPR< 7,  "$f7">, DwarfRegNum<[40]>;
+def F8  : FPR< 8,  "$f8">, DwarfRegNum<[41]>;
+def F9  : FPR< 9,  "$f9">, DwarfRegNum<[42]>;
+def F10 : FPR<10, "$f10">, DwarfRegNum<[43]>;
+def F11 : FPR<11, "$f11">, DwarfRegNum<[44]>;
+def F12 : FPR<12, "$f12">, DwarfRegNum<[45]>;
+def F13 : FPR<13, "$f13">, DwarfRegNum<[46]>;
+def F14 : FPR<14, "$f14">, DwarfRegNum<[47]>;
+def F15 : FPR<15, "$f15">, DwarfRegNum<[48]>;
+def F16 : FPR<16, "$f16">, DwarfRegNum<[49]>;
+def F17 : FPR<17, "$f17">, DwarfRegNum<[50]>;
+def F18 : FPR<18, "$f18">, DwarfRegNum<[51]>;
+def F19 : FPR<19, "$f19">, DwarfRegNum<[52]>;
+def F20 : FPR<20, "$f20">, DwarfRegNum<[53]>;
+def F21 : FPR<21, "$f21">, DwarfRegNum<[54]>;
+def F22 : FPR<22, "$f22">, DwarfRegNum<[55]>;
+def F23 : FPR<23, "$f23">, DwarfRegNum<[56]>;
+def F24 : FPR<24, "$f24">, DwarfRegNum<[57]>;
+def F25 : FPR<25, "$f25">, DwarfRegNum<[58]>;
+def F26 : FPR<26, "$f26">, DwarfRegNum<[59]>;
+def F27 : FPR<27, "$f27">, DwarfRegNum<[60]>;
+def F28 : FPR<28, "$f28">, DwarfRegNum<[61]>;
+def F29 : FPR<29, "$f29">, DwarfRegNum<[62]>;
+def F30 : FPR<30, "$f30">, DwarfRegNum<[63]>;
+def F31 : FPR<31, "$f31">, DwarfRegNum<[64]>;
+
+  // //#define FP    $15
+  // //#define RA    $26
+  // //#define PV    $27
+  // //#define GP    $29
+  // //#define SP    $30
+  // $28 is undefined after any and all calls
+
+/// Register classes
+def GPRC : RegisterClass<"Alpha", [i64], 64,
+     // Volatile
+     [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22,
+      R23, R24, R25, R28, 
+     //Special meaning, but volatile
+     R27, //procedure address
+     R26, //return address
+     R29, //global offset table address
+     // Non-volatile
+     R9, R10, R11, R12, R13, R14,
+// Don't allocate 15, 30, 31
+     R15, R30, R31 ]> //zero
+{
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GPRCClass::iterator
+    GPRCClass::allocation_order_end(const MachineFunction &MF) const {
+        return end()-3;
+    }
+  }];
+}
+
+def F4RC : RegisterClass<"Alpha", [f32], 64, [F0, F1, 
+        F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+        F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
+        // Saved:
+        F2, F3, F4, F5, F6, F7, F8, F9,
+        F31 ]> //zero
+{
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    F4RCClass::iterator
+    F4RCClass::allocation_order_end(const MachineFunction &MF) const {
+        return end()-1;
+    }
+  }];
+}
+
+def F8RC : RegisterClass<"Alpha", [f64], 64, [F0, F1, 
+        F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
+        F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
+        // Saved:
+        F2, F3, F4, F5, F6, F7, F8, F9,
+        F31 ]> //zero
+{
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    F8RCClass::iterator
+    F8RCClass::allocation_order_end(const MachineFunction &MF) const {
+        return end()-1;
+    }
+  }];
+}
diff --git a/final/lib/Target/Alpha/AlphaRelocations.h b/final/lib/Target/Alpha/AlphaRelocations.h
new file mode 100644
index 00000000000..4c92045d469
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaRelocations.h
@@ -0,0 +1,31 @@
+//===- AlphaRelocations.h - Alpha Code Relocations --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Alpha target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHARELOCATIONS_H
+#define ALPHARELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+  namespace Alpha {
+    enum RelocationType {
+      reloc_literal,
+      reloc_gprellow,
+      reloc_gprelhigh,
+      reloc_gpdist,
+      reloc_bsr
+    };
+  }
+}
+
+#endif
diff --git a/final/lib/Target/Alpha/AlphaSchedule.td b/final/lib/Target/Alpha/AlphaSchedule.td
new file mode 100644
index 00000000000..3703dd4fa9f
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaSchedule.td
@@ -0,0 +1,85 @@
+//===- AlphaSchedule.td - Alpha Scheduling Definitions -----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//This is table 2-2 from the 21264 compiler writers guide
+//modified some
+
+//Pipelines
+
+def L0   : FuncUnit;
+def L1   : FuncUnit;
+def FST0 : FuncUnit;
+def FST1 : FuncUnit;
+def U0   : FuncUnit;
+def U1   : FuncUnit;
+def FA   : FuncUnit;
+def FM   : FuncUnit;
+
+def s_ild   : InstrItinClass;
+def s_fld   : InstrItinClass;
+def s_ist   : InstrItinClass;
+def s_fst   : InstrItinClass;
+def s_lda   : InstrItinClass;
+def s_rpcc  : InstrItinClass;
+def s_rx    : InstrItinClass;
+def s_mxpr  : InstrItinClass;
+def s_icbr  : InstrItinClass;
+def s_ubr   : InstrItinClass;
+def s_jsr   : InstrItinClass;
+def s_iadd  : InstrItinClass;
+def s_ilog  : InstrItinClass;
+def s_ishf  : InstrItinClass;
+def s_cmov  : InstrItinClass;
+def s_imul  : InstrItinClass;
+def s_imisc : InstrItinClass;
+def s_fbr   : InstrItinClass;
+def s_fadd  : InstrItinClass;
+def s_fmul  : InstrItinClass;
+def s_fcmov : InstrItinClass;
+def s_fdivt : InstrItinClass;
+def s_fdivs : InstrItinClass;
+def s_fsqrts: InstrItinClass;
+def s_fsqrtt: InstrItinClass;
+def s_ftoi  : InstrItinClass;
+def s_itof  : InstrItinClass;
+def s_pseudo : InstrItinClass;
+
+//Table 2-4 Instruction Class Latency in Cycles
+//modified some
+
+def Alpha21264Itineraries : ProcessorItineraries<
+  [L0, L1, FST0, FST1, U0, U1, FA, FM], [], [
+  InstrItinData<s_ild    , [InstrStage<3, [L0, L1]>]>,
+  InstrItinData<s_fld    , [InstrStage<4, [L0, L1]>]>,
+  InstrItinData<s_ist    , [InstrStage<0, [L0, L1]>]>,
+  InstrItinData<s_fst    , [InstrStage<0, [FST0, FST1, L0, L1]>]>,
+  InstrItinData<s_lda    , [InstrStage<1, [L0, L1, U0, U1]>]>,
+  InstrItinData<s_rpcc   , [InstrStage<1, [L1]>]>,
+  InstrItinData<s_rx     , [InstrStage<1, [L1]>]>,
+  InstrItinData<s_mxpr   , [InstrStage<1, [L0, L1]>]>,
+  InstrItinData<s_icbr   , [InstrStage<0, [U0, U1]>]>,
+  InstrItinData<s_ubr    , [InstrStage<3, [U0, U1]>]>,
+  InstrItinData<s_jsr    , [InstrStage<3, [L0]>]>,
+  InstrItinData<s_iadd   , [InstrStage<1, [L0, U0, L1, U1]>]>,
+  InstrItinData<s_ilog   , [InstrStage<1, [L0, U0, L1, U1]>]>,
+  InstrItinData<s_ishf   , [InstrStage<1, [U0, U1]>]>,
+  InstrItinData<s_cmov   , [InstrStage<1, [L0, U0, L1, U1]>]>,
+  InstrItinData<s_imul   , [InstrStage<7, [U1]>]>,
+  InstrItinData<s_imisc  , [InstrStage<3, [U0]>]>,
+  InstrItinData<s_fbr    , [InstrStage<0, [FA]>]>,
+  InstrItinData<s_fadd   , [InstrStage<6, [FA]>]>,
+  InstrItinData<s_fmul   , [InstrStage<6, [FM]>]>,
+  InstrItinData<s_fcmov  , [InstrStage<6, [FA]>]>,
+  InstrItinData<s_fdivs  , [InstrStage<12, [FA]>]>,
+  InstrItinData<s_fdivt  , [InstrStage<15, [FA]>]>,
+  InstrItinData<s_fsqrts , [InstrStage<18, [FA]>]>,
+  InstrItinData<s_fsqrtt , [InstrStage<33, [FA]>]>,
+  InstrItinData<s_ftoi   , [InstrStage<3, [FST0, FST1, L0, L1]>]>,
+  InstrItinData<s_itof   , [InstrStage<4, [L0, L1]>]>
+]>;
diff --git a/final/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp b/final/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
new file mode 100644
index 00000000000..f1958fe6b5a
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- AlphaSelectionDAGInfo.cpp - Alpha SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AlphaSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "alpha-selectiondag-info"
+#include "AlphaTargetMachine.h"
+using namespace llvm;
+
+AlphaSelectionDAGInfo::AlphaSelectionDAGInfo(const AlphaTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() {
+}
diff --git a/final/lib/Target/Alpha/AlphaSelectionDAGInfo.h b/final/lib/Target/Alpha/AlphaSelectionDAGInfo.h
new file mode 100644
index 00000000000..3405cc0cded
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- AlphaSelectionDAGInfo.h - Alpha SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Alpha subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHASELECTIONDAGINFO_H
+#define ALPHASELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class AlphaTargetMachine;
+
+class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit AlphaSelectionDAGInfo(const AlphaTargetMachine &TM);
+  ~AlphaSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/Alpha/AlphaSubtarget.cpp b/final/lib/Target/Alpha/AlphaSubtarget.cpp
new file mode 100644
index 00000000000..bda7104ab92
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaSubtarget.cpp
@@ -0,0 +1,25 @@
+//===- AlphaSubtarget.cpp - Alpha Subtarget Information ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Alpha specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaSubtarget.h"
+#include "Alpha.h"
+#include "AlphaGenSubtarget.inc"
+using namespace llvm;
+
+AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &FS)
+  : HasCT(false) {
+  std::string CPU = "generic";
+
+  // Parse features string.
+  ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/final/lib/Target/Alpha/AlphaSubtarget.h b/final/lib/Target/Alpha/AlphaSubtarget.h
new file mode 100644
index 00000000000..f0eb93c6cba
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaSubtarget.h
@@ -0,0 +1,46 @@
+//=====-- AlphaSubtarget.h - Define Subtarget for the Alpha --*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Alpha specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHASUBTARGET_H
+#define ALPHASUBTARGET_H
+
+#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+namespace llvm {
+
+class AlphaSubtarget : public TargetSubtarget {
+protected:
+
+  bool HasCT;
+
+  InstrItineraryData InstrItins;
+
+public:
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  AlphaSubtarget(const std::string &TT, const std::string &FS);
+  
+  /// ParseSubtargetFeatures - Parses features string setting specified 
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  std::string ParseSubtargetFeatures(const std::string &FS,
+                                     const std::string &CPU);
+
+  bool hasCT() const { return HasCT; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/Alpha/AlphaTargetMachine.cpp b/final/lib/Target/Alpha/AlphaTargetMachine.cpp
new file mode 100644
index 00000000000..b53533b8ebc
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -0,0 +1,54 @@
+//===-- AlphaTargetMachine.cpp - Define TargetMachine for Alpha -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "AlphaMCAsmInfo.h"
+#include "AlphaTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeAlphaTarget() { 
+  // Register the target.
+  RegisterTargetMachine<AlphaTargetMachine> X(TheAlphaTarget);
+  RegisterAsmInfo<AlphaMCAsmInfo> Y(TheAlphaTarget);
+}
+
+AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT,
+                                       const std::string &FS)
+  : LLVMTargetMachine(T, TT),
+    DataLayout("e-f128:128:128-n64"),
+    FrameLowering(Subtarget),
+    Subtarget(TT, FS),
+    TLInfo(*this),
+    TSInfo(*this) {
+  setRelocationModel(Reloc::PIC_);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool AlphaTargetMachine::addInstSelector(PassManagerBase &PM,
+                                         CodeGenOpt::Level OptLevel) {
+  PM.add(createAlphaISelDag(*this));
+  return false;
+}
+bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                        CodeGenOpt::Level OptLevel) {
+  // Must run branch selection immediately preceding the asm printer
+  PM.add(createAlphaBranchSelectionPass());
+  PM.add(createAlphaLLRPPass(*this));
+  return false;
+}
diff --git a/final/lib/Target/Alpha/AlphaTargetMachine.h b/final/lib/Target/Alpha/AlphaTargetMachine.h
new file mode 100644
index 00000000000..26238fbbc43
--- /dev/null
+++ b/final/lib/Target/Alpha/AlphaTargetMachine.h
@@ -0,0 +1,65 @@
+//===-- AlphaTargetMachine.h - Define TargetMachine for Alpha ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Alpha-specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_TARGETMACHINE_H
+#define ALPHA_TARGETMACHINE_H
+
+#include "AlphaInstrInfo.h"
+#include "AlphaISelLowering.h"
+#include "AlphaFrameLowering.h"
+#include "AlphaSelectionDAGInfo.h"
+#include "AlphaSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class GlobalValue;
+
+class AlphaTargetMachine : public LLVMTargetMachine {
+  const TargetData DataLayout;       // Calculates type size & alignment
+  AlphaInstrInfo InstrInfo;
+  AlphaFrameLowering FrameLowering;
+  AlphaSubtarget Subtarget;
+  AlphaTargetLowering TLInfo;
+  AlphaSelectionDAGInfo TSInfo;
+
+public:
+  AlphaTargetMachine(const Target &T, const std::string &TT,
+                     const std::string &FS);
+
+  virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; }
+  virtual const TargetFrameLowering  *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual const AlphaSubtarget   *getSubtargetImpl() const{ return &Subtarget; }
+  virtual const AlphaRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+  virtual const AlphaTargetLowering* getTargetLowering() const {
+    return &TLInfo;
+  }
+  virtual const AlphaSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+
+  // Pass Pipeline Configuration
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/Alpha/CMakeLists.txt b/final/lib/Target/Alpha/CMakeLists.txt
new file mode 100644
index 00000000000..454262ad631
--- /dev/null
+++ b/final/lib/Target/Alpha/CMakeLists.txt
@@ -0,0 +1,28 @@
+set(LLVM_TARGET_DEFINITIONS Alpha.td)
+
+tablegen(AlphaGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(AlphaGenRegisterNames.inc -gen-register-enums)
+tablegen(AlphaGenRegisterInfo.inc -gen-register-desc)
+tablegen(AlphaGenInstrNames.inc -gen-instr-enums)
+tablegen(AlphaGenInstrInfo.inc -gen-instr-desc)
+tablegen(AlphaGenAsmWriter.inc -gen-asm-writer)
+tablegen(AlphaGenDAGISel.inc -gen-dag-isel)
+tablegen(AlphaGenCallingConv.inc -gen-callingconv)
+tablegen(AlphaGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(AlphaCodeGen
+  AlphaAsmPrinter.cpp
+  AlphaBranchSelector.cpp
+  AlphaInstrInfo.cpp
+  AlphaISelDAGToDAG.cpp
+  AlphaISelLowering.cpp
+  AlphaFrameLowering.cpp
+  AlphaLLRP.cpp
+  AlphaMCAsmInfo.cpp
+  AlphaRegisterInfo.cpp
+  AlphaSubtarget.cpp
+  AlphaTargetMachine.cpp
+  AlphaSelectionDAGInfo.cpp
+  )
+
+add_subdirectory(TargetInfo)
diff --git a/final/lib/Target/Alpha/Makefile b/final/lib/Target/Alpha/Makefile
new file mode 100644
index 00000000000..9564be680e5
--- /dev/null
+++ b/final/lib/Target/Alpha/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/Alpha/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMAlphaCodeGen
+TARGET = Alpha
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = AlphaGenRegisterInfo.h.inc AlphaGenRegisterNames.inc \
+                AlphaGenRegisterInfo.inc AlphaGenInstrNames.inc \
+                AlphaGenInstrInfo.inc \
+                AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \
+                AlphaGenCallingConv.inc AlphaGenSubtarget.inc
+
+DIRS = TargetInfo
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/Alpha/README.txt b/final/lib/Target/Alpha/README.txt
new file mode 100644
index 00000000000..9ae15174c58
--- /dev/null
+++ b/final/lib/Target/Alpha/README.txt
@@ -0,0 +1,42 @@
+***
+
+add gcc builtins for alpha instructions
+
+
+***
+
+custom expand byteswap into nifty 
+extract/insert/mask byte/word/longword/quadword low/high
+sequences
+
+***
+
+see if any of the extract/insert/mask operations can be added
+
+***
+
+match more interesting things for cmovlbc cmovlbs (move if low bit clear/set)
+
+***
+
+lower srem and urem
+
+remq(i,j):  i - (j * divq(i,j)) if j != 0
+remqu(i,j): i - (j * divqu(i,j)) if j != 0
+reml(i,j):  i - (j * divl(i,j)) if j != 0
+remlu(i,j): i - (j * divlu(i,j)) if j != 0
+
+***
+
+add crazy vector instructions (MVI):
+
+(MIN|MAX)(U|S)(B8|W4) min and max, signed and unsigned, byte and word
+PKWB, UNPKBW pack/unpack word to byte
+PKLB UNPKBL pack/unpack long to byte
+PERR pixel error (sum accross bytes of bytewise abs(i8v8 a - i8v8 b))
+
+cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extentions)
+
+this has some good examples for other operations that can be synthesised well 
+from these rather meager vector ops (such as saturating add).
+http://www.alphalinux.org/docs/MVI-full.html
diff --git a/final/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp b/final/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
new file mode 100644
index 00000000000..f7099b9ae97
--- /dev/null
+++ b/final/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- AlphaTargetInfo.cpp - Alpha Target Implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Alpha.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+llvm::Target llvm::TheAlphaTarget;
+
+extern "C" void LLVMInitializeAlphaTargetInfo() { 
+  RegisterTarget<Triple::alpha, /*HasJIT=*/true>
+    X(TheAlphaTarget, "alpha", "Alpha [experimental]");
+}
diff --git a/final/lib/Target/Alpha/TargetInfo/CMakeLists.txt b/final/lib/Target/Alpha/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..2a7291b90ae
--- /dev/null
+++ b/final/lib/Target/Alpha/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMAlphaInfo
+  AlphaTargetInfo.cpp
+  )
+
+add_dependencies(LLVMAlphaInfo AlphaCodeGenTable_gen)
diff --git a/final/lib/Target/Alpha/TargetInfo/Makefile b/final/lib/Target/Alpha/TargetInfo/Makefile
new file mode 100644
index 00000000000..de01d7f8e8e
--- /dev/null
+++ b/final/lib/Target/Alpha/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Alpha/TargetInfo/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAlphaInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/Blackfin/Blackfin.h b/final/lib/Target/Blackfin/Blackfin.h
new file mode 100644
index 00000000000..ec1fa8689de
--- /dev/null
+++ b/final/lib/Target/Blackfin/Blackfin.h
@@ -0,0 +1,38 @@
+//=== Blackfin.h - Top-level interface for Blackfin backend -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Blackfin back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_BLACKFIN_H
+#define TARGET_BLACKFIN_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+  class FunctionPass;
+  class BlackfinTargetMachine;
+
+  FunctionPass *createBlackfinISelDag(BlackfinTargetMachine &TM,
+                                      CodeGenOpt::Level OptLevel);
+  extern Target TheBlackfinTarget;
+
+} // end namespace llvm
+
+// Defines symbolic names for Blackfin registers.  This defines a mapping from
+// register name to register number.
+#include "BlackfinGenRegisterNames.inc"
+
+// Defines symbolic names for the Blackfin instructions.
+#include "BlackfinGenInstrNames.inc"
+
+#endif
diff --git a/final/lib/Target/Blackfin/Blackfin.td b/final/lib/Target/Blackfin/Blackfin.td
new file mode 100644
index 00000000000..cd90962a954
--- /dev/null
+++ b/final/lib/Target/Blackfin/Blackfin.td
@@ -0,0 +1,202 @@
+//===- Blackfin.td - Describe the Blackfin Target Machine --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Blackfin Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureSDRAM : SubtargetFeature<"sdram", "sdram", "true",
+    "Build for SDRAM">;
+
+def FeatureICPLB : SubtargetFeature<"icplb", "icplb", "true",
+    "Assume instruction cache lookaside buffers are enabled at runtime">;
+
+//===----------------------------------------------------------------------===//
+// Bugs in the silicon becomes workarounds in the compiler.
+// See http://www.analog.com/ for the full list of IC anomalies.
+//===----------------------------------------------------------------------===//
+
+def WA_MI_SHIFT : SubtargetFeature<"mi-shift-anomaly","wa_mi_shift", "true",
+    "Work around 05000074 - "
+    "Multi-Issue Instruction with dsp32shiftimm and P-reg Store">;
+
+def WA_CSYNC : SubtargetFeature<"csync-anomaly","wa_csync", "true",
+    "Work around 05000244 - "
+    "If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control">;
+
+def WA_SPECLD : SubtargetFeature<"specld-anomaly","wa_specld", "true",
+    "Work around 05000245 - "
+    "Access in the Shadow of a Conditional Branch">;
+
+def WA_HWLOOP : SubtargetFeature<"hwloop-anomaly","wa_hwloop", "true",
+    "Work around 05000257 - "
+    "Interrupt/Exception During Short Hardware Loop">;
+
+def WA_MMR_STALL : SubtargetFeature<"mmr-stall-anomaly","wa_mmr_stall", "true",
+    "Work around 05000283 - "
+    "System MMR Write Is Stalled Indefinitely when Killed">;
+
+def WA_LCREGS : SubtargetFeature<"lcregs-anomaly","wa_lcregs", "true",
+    "Work around 05000312 - "
+    "SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted">;
+
+def WA_KILLED_MMR : SubtargetFeature<"killed-mmr-anomaly",
+                                     "wa_killed_mmr", "true",
+    "Work around 05000315 - "
+    "Killed System MMR Write Completes Erroneously on Next System MMR Access">;
+
+def WA_RETS : SubtargetFeature<"rets-anomaly", "wa_rets", "true",
+    "Work around 05000371 - "
+    "Possible RETS Register Corruption when Subroutine Is under 5 Cycles">;
+
+def WA_IND_CALL : SubtargetFeature<"ind-call-anomaly", "wa_ind_call", "true",
+    "Work around 05000426 - "
+    "Speculative Fetches of Indirect-Pointer Instructions">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "BlackfinRegisterInfo.td"
+include "BlackfinCallingConv.td"
+include "BlackfinIntrinsics.td"
+include "BlackfinInstrInfo.td"
+
+def BlackfinInstrInfo : InstrInfo {}
+
+//===----------------------------------------------------------------------===//
+// Blackfin processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, string Suffix, list<SubtargetFeature> Features>
+ : Processor<!strconcat(Name, Suffix), NoItineraries, Features>;
+
+def : Proc<"generic", "", []>;
+
+multiclass Core<string Name,string Suffix,
+                list<SubtargetFeature> Features> {
+  def : Proc<Name, Suffix, Features>;
+  def : Proc<Name, "", Features>;
+  def : Proc<Name, "-none", []>;
+}
+
+multiclass CoreEdinburgh<string Name>
+      : Core<Name, "-0.6", [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS]> {
+  def : Proc<Name, "-0.5",
+        [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+         WA_RETS]>;
+  def : Proc<Name, "-0.4",
+        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+         WA_KILLED_MMR, WA_RETS]>;
+  def : Proc<Name, "-0.3",
+        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+         WA_KILLED_MMR, WA_RETS]>;
+  def : Proc<Name, "-any",
+        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+         WA_KILLED_MMR, WA_RETS]>;
+}
+multiclass CoreBraemar<string Name>
+       : Core<Name, "-0.3",
+         [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]> {
+  def  : Proc<Name, "-0.2",
+         [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+          WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+  def  : Proc<Name, "-any",
+         [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+          WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreStirling<string Name>
+      : Core<Name, "-0.5", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+  def : Proc<Name, "-0.4",
+        [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-0.3",
+        [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+         WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-any",
+        [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+         WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreMoab<string Name>
+      : Core<Name, "-0.3", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+  def : Proc<Name, "-0.2", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+  def : Proc<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-0.0",
+        [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-any",
+        [WA_MI_SHIFT, WA_SPECLD, WA_LCREGS, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreTeton<string Name>
+      : Core<Name, "-0.5",
+        [WA_MI_SHIFT, WA_SPECLD, WA_MMR_STALL, WA_LCREGS, WA_KILLED_MMR,
+         WA_RETS, WA_IND_CALL]> {
+  def : Proc<Name, "-0.3",
+        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+         WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-any",
+        [WA_MI_SHIFT, WA_CSYNC, WA_SPECLD, WA_HWLOOP, WA_MMR_STALL, WA_LCREGS,
+         WA_KILLED_MMR, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreKookaburra<string Name>
+      : Core<Name, "-0.2", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+  def : Proc<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+  def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_RETS, WA_IND_CALL]>;
+}
+multiclass CoreMockingbird<string Name>
+      : Core<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+  def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+  def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+}
+multiclass CoreBrodie<string Name>
+      : Core<Name, "-0.1", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]> {
+  def : Proc<Name, "-0.0", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+  def : Proc<Name, "-any", [WA_MI_SHIFT, WA_SPECLD, WA_IND_CALL]>;
+}
+
+defm BF512 : CoreBrodie<"bf512">;
+defm BF514 : CoreBrodie<"bf514">;
+defm BF516 : CoreBrodie<"bf516">;
+defm BF518 : CoreBrodie<"bf518">;
+defm BF522 : CoreMockingbird<"bf522">;
+defm BF523 : CoreKookaburra<"bf523">;
+defm BF524 : CoreMockingbird<"bf524">;
+defm BF525 : CoreKookaburra<"bf525">;
+defm BF526 : CoreMockingbird<"bf526">;
+defm BF527 : CoreKookaburra<"bf527">;
+defm BF531 : CoreEdinburgh<"bf531">;
+defm BF532 : CoreEdinburgh<"bf532">;
+defm BF533 : CoreEdinburgh<"bf533">;
+defm BF534 : CoreBraemar<"bf534">;
+defm BF536 : CoreBraemar<"bf536">;
+defm BF537 : CoreBraemar<"bf537">;
+defm BF538 : CoreStirling<"bf538">;
+defm BF539 : CoreStirling<"bf539">;
+defm BF542 : CoreMoab<"bf542">;
+defm BF544 : CoreMoab<"bf544">;
+defm BF548 : CoreMoab<"bf548">;
+defm BF549 : CoreMoab<"bf549">;
+defm BF561 : CoreTeton<"bf561">;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Blackfin : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = BlackfinInstrInfo;
+}
diff --git a/final/lib/Target/Blackfin/BlackfinAsmPrinter.cpp b/final/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
new file mode 100644
index 00000000000..6ba258beb2b
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinAsmPrinter.cpp
@@ -0,0 +1,156 @@
+//===-- BlackfinAsmPrinter.cpp - Blackfin LLVM assembly writer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format BLACKFIN assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Blackfin.h"
+#include "BlackfinInstrInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class BlackfinAsmPrinter : public AsmPrinter {
+  public:
+    BlackfinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "Blackfin Assembly Printer";
+    }
+
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printMemoryOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printInstruction(const MachineInstr *MI, raw_ostream &O);// autogen'd.
+    static const char *getRegisterName(unsigned RegNo);
+
+    void EmitInstruction(const MachineInstr *MI) {
+      SmallString<128> Str;
+      raw_svector_ostream OS(Str);
+      printInstruction(MI, OS);
+      OutStreamer.EmitRawText(OS.str());
+    }
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &O);
+  };
+} // end of anonymous namespace
+
+#include "BlackfinGenAsmWriter.inc"
+
+extern "C" void LLVMInitializeBlackfinAsmPrinter() {
+  RegisterAsmPrinter<BlackfinAsmPrinter> X(TheBlackfinTarget);
+}
+
+void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                      raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+           "Virtual registers should be already mapped!");
+    O << getRegisterName(MO.getReg());
+    break;
+
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    printOffset(MO.getOffset(), O);
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+      << MO.getIndex();
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+  default:
+    llvm_unreachable("<unknown operand type>");
+    break;
+  }
+}
+
+void BlackfinAsmPrinter::printMemoryOperand(const MachineInstr *MI, int opNum,
+                                            raw_ostream &O) {
+  printOperand(MI, opNum, O);
+
+  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+    return;
+
+  O << " + ";
+  printOperand(MI, opNum+1, O);
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool BlackfinAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
+                                         unsigned OpNo, unsigned AsmVariant,
+                                         const char *ExtraCode,
+                                         raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'r':
+      break;
+    }
+  }
+
+  printOperand(MI, OpNo, O);
+
+  return false;
+}
+
+bool BlackfinAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                               unsigned OpNo,
+                                               unsigned AsmVariant,
+                                               const char *ExtraCode,
+                                               raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true;  // Unknown modifier
+
+  O << '[';
+  printOperand(MI, OpNo, O);
+  O << ']';
+
+  return false;
+}
diff --git a/final/lib/Target/Blackfin/BlackfinCallingConv.td b/final/lib/Target/Blackfin/BlackfinCallingConv.td
new file mode 100644
index 00000000000..0abc84c3c40
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinCallingConv.td
@@ -0,0 +1,30 @@
+//===--- BlackfinCallingConv.td - Calling Conventions ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Blackfin architectures.
+//
+//===----------------------------------------------------------------------===//
+
+// Blackfin C Calling convention.
+def CC_Blackfin : CallingConv<[
+  CCIfType<[i16], CCPromoteToType<i32>>,
+  CCIfSRet<CCAssignToReg<[P0]>>,
+  CCAssignToReg<[R0, R1, R2]>,
+  CCAssignToStack<4, 4>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Blackfin C return-value convention.
+def RetCC_Blackfin : CallingConv<[
+  CCIfType<[i16], CCPromoteToType<i32>>,
+  CCAssignToReg<[R0, R1]>
+]>;
diff --git a/final/lib/Target/Blackfin/BlackfinFrameLowering.cpp b/final/lib/Target/Blackfin/BlackfinFrameLowering.cpp
new file mode 100644
index 00000000000..08bb9522b7c
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinFrameLowering.cpp
@@ -0,0 +1,124 @@
+//====- BlackfinFrameLowering.cpp - Blackfin Frame Information --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinFrameLowering.h"
+#include "BlackfinInstrInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool BlackfinFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return DisableFramePointerElim(MF) ||
+    MFI->adjustsStack() || MFI->hasVarSizedObjects();
+}
+
+// Emit a prologue that sets up a stack frame.
+// On function entry, R0-R2 and P0 may hold arguments.
+// R3, P1, and P2 may be used as scratch registers
+void BlackfinFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const BlackfinRegisterInfo *RegInfo =
+    static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const BlackfinInstrInfo &TII =
+    *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  int FrameSize = MFI->getStackSize();
+  if (FrameSize%4) {
+    FrameSize = (FrameSize+3) & ~3;
+    MFI->setStackSize(FrameSize);
+  }
+
+  if (!hasFP(MF)) {
+    assert(!MFI->adjustsStack() &&
+           "FP elimination on a non-leaf function is not supported");
+    RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize);
+    return;
+  }
+
+  // emit a LINK instruction
+  if (FrameSize <= 0x3ffff) {
+    BuildMI(MBB, MBBI, dl, TII.get(BF::LINK)).addImm(FrameSize);
+    return;
+  }
+
+  // Frame is too big, do a manual LINK:
+  // [--SP] = RETS;
+  // [--SP] = FP;
+  // FP = SP;
+  // P1 = -FrameSize;
+  // SP = SP + P1;
+  BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
+    .addReg(BF::RETS, RegState::Kill);
+  BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH))
+    .addReg(BF::FP, RegState::Kill);
+  BuildMI(MBB, MBBI, dl, TII.get(BF::MOVE), BF::FP)
+    .addReg(BF::SP);
+  RegInfo->loadConstant(MBB, MBBI, dl, BF::P1, -FrameSize);
+  BuildMI(MBB, MBBI, dl, TII.get(BF::ADDpp), BF::SP)
+    .addReg(BF::SP, RegState::Kill)
+    .addReg(BF::P1, RegState::Kill);
+
+}
+
+void BlackfinFrameLowering::emitEpilogue(MachineFunction &MF,
+                                     MachineBasicBlock &MBB) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const BlackfinRegisterInfo *RegInfo =
+    static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const BlackfinInstrInfo &TII =
+    *static_cast<const BlackfinInstrInfo*>(MF.getTarget().getInstrInfo());
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  int FrameSize = MFI->getStackSize();
+  assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+  if (!hasFP(MF)) {
+    assert(!MFI->adjustsStack() &&
+           "FP elimination on a non-leaf function is not supported");
+    RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize);
+    return;
+  }
+
+  // emit an UNLINK instruction
+  BuildMI(MBB, MBBI, dl, TII.get(BF::UNLINK));
+}
+
+void BlackfinFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                     RegScavenger *RS) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const BlackfinRegisterInfo *RegInfo =
+    static_cast<const BlackfinRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const TargetRegisterClass *RC = BF::DPRegisterClass;
+
+  if (RegInfo->requiresRegisterScavenging(MF)) {
+    // Reserve a slot close to SP or frame pointer.
+    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                       RC->getAlignment(),
+                                                       false));
+  }
+}
diff --git a/final/lib/Target/Blackfin/BlackfinFrameLowering.h b/final/lib/Target/Blackfin/BlackfinFrameLowering.h
new file mode 100644
index 00000000000..3d2ee251d3b
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinFrameLowering.h
@@ -0,0 +1,46 @@
+//=- BlackfinFrameLowering.h - Define frame lowering for Blackfin -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHA_FRAMEINFO_H
+#define ALPHA_FRAMEINFO_H
+
+#include "Blackfin.h"
+#include "BlackfinSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class BlackfinSubtarget;
+
+class BlackfinFrameLowering : public TargetFrameLowering {
+protected:
+  const BlackfinSubtarget &STI;
+
+public:
+  explicit BlackfinFrameLowering(const BlackfinSubtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0), STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/final/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
new file mode 100644
index 00000000000..9df2aeeecbc
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -0,0 +1,180 @@
+//===- BlackfinISelDAGToDAG.cpp - A dag to dag inst selector for Blackfin -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the Blackfin target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Blackfin.h"
+#include "BlackfinTargetMachine.h"
+#include "BlackfinRegisterInfo.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+/// BlackfinDAGToDAGISel - Blackfin specific code to select blackfin machine
+/// instructions for SelectionDAG operations.
+namespace {
+  class BlackfinDAGToDAGISel : public SelectionDAGISel {
+    /// Subtarget - Keep a pointer to the Blackfin Subtarget around so that we
+    /// can make the right decision when generating code for different targets.
+    //const BlackfinSubtarget &Subtarget;
+  public:
+    BlackfinDAGToDAGISel(BlackfinTargetMachine &TM, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(TM, OptLevel) {}
+
+    virtual void PostprocessISelDAG();
+
+    virtual const char *getPassName() const {
+      return "Blackfin DAG->DAG Pattern Instruction Selection";
+    }
+
+    // Include the pieces autogenerated from the target description.
+#include "BlackfinGenDAGISel.inc"
+
+  private:
+    SDNode *Select(SDNode *N);
+    bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
+
+    // Walk the DAG after instruction selection, fixing register class issues.
+    void FixRegisterClasses(SelectionDAG &DAG);
+
+    const BlackfinInstrInfo &getInstrInfo() {
+      return *static_cast<const BlackfinTargetMachine&>(TM).getInstrInfo();
+    }
+    const BlackfinRegisterInfo *getRegisterInfo() {
+      return static_cast<const BlackfinTargetMachine&>(TM).getRegisterInfo();
+    }
+  };
+}  // end anonymous namespace
+
+FunctionPass *llvm::createBlackfinISelDag(BlackfinTargetMachine &TM,
+                                          CodeGenOpt::Level OptLevel) {
+  return new BlackfinDAGToDAGISel(TM, OptLevel);
+}
+
+void BlackfinDAGToDAGISel::PostprocessISelDAG() {
+  FixRegisterClasses(*CurDAG);
+}
+
+SDNode *BlackfinDAGToDAGISel::Select(SDNode *N) {
+  if (N->isMachineOpcode())
+    return NULL;   // Already selected.
+
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::FrameIndex: {
+    // Selects to ADDpp FI, 0 which in turn will become ADDimm7 SP, imm or ADDpp
+    // SP, Px
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32);
+    return CurDAG->SelectNodeTo(N, BF::ADDpp, MVT::i32, TFI,
+                                CurDAG->getTargetConstant(0, MVT::i32));
+  }
+  }
+
+  return SelectCode(N);
+}
+
+bool BlackfinDAGToDAGISel::SelectADDRspii(SDValue Addr,
+                                          SDValue &Base,
+                                          SDValue &Offset) {
+  FrameIndexSDNode *FIN = 0;
+  if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+  if (Addr.getOpcode() == ISD::ADD) {
+    ConstantSDNode *CN = 0;
+    if ((FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) &&
+        (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) &&
+        (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
+      // Constant positive word offset from frame index
+      Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+      return true;
+    }
+  }
+  return false;
+}
+
+static inline bool isCC(const TargetRegisterClass *RC) {
+  return RC == &BF::AnyCCRegClass || BF::AnyCCRegClass.hasSubClass(RC);
+}
+
+static inline bool isDCC(const TargetRegisterClass *RC) {
+  return RC == &BF::DRegClass || BF::DRegClass.hasSubClass(RC) || isCC(RC);
+}
+
+static void UpdateNodeOperand(SelectionDAG &DAG,
+                              SDNode *N,
+                              unsigned Num,
+                              SDValue Val) {
+  SmallVector<SDValue, 8> ops(N->op_begin(), N->op_end());
+  ops[Num] = Val;
+  SDNode *New = DAG.UpdateNodeOperands(N, ops.data(), ops.size());
+  DAG.ReplaceAllUsesWith(N, New);
+}
+
+// After instruction selection, insert COPY_TO_REGCLASS nodes to help in
+// choosing the proper register classes.
+void BlackfinDAGToDAGISel::FixRegisterClasses(SelectionDAG &DAG) {
+  const BlackfinInstrInfo &TII = getInstrInfo();
+  const BlackfinRegisterInfo *TRI = getRegisterInfo();
+  DAG.AssignTopologicalOrder();
+  HandleSDNode Dummy(DAG.getRoot());
+
+  for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin();
+       NI != DAG.allnodes_end(); ++NI) {
+    if (NI->use_empty() || !NI->isMachineOpcode())
+      continue;
+    const TargetInstrDesc &DefTID = TII.get(NI->getMachineOpcode());
+    for (SDNode::use_iterator UI = NI->use_begin(); !UI.atEnd(); ++UI) {
+      if (!UI->isMachineOpcode())
+        continue;
+
+      if (UI.getUse().getResNo() >= DefTID.getNumDefs())
+        continue;
+      const TargetRegisterClass *DefRC =
+        DefTID.OpInfo[UI.getUse().getResNo()].getRegClass(TRI);
+
+      const TargetInstrDesc &UseTID = TII.get(UI->getMachineOpcode());
+      if (UseTID.getNumDefs()+UI.getOperandNo() >= UseTID.getNumOperands())
+        continue;
+      const TargetRegisterClass *UseRC =
+        UseTID.OpInfo[UseTID.getNumDefs()+UI.getOperandNo()].getRegClass(TRI);
+      if (!DefRC || !UseRC)
+        continue;
+      // We cannot copy CC <-> !(CC/D)
+      if ((isCC(DefRC) && !isDCC(UseRC)) || (isCC(UseRC) && !isDCC(DefRC))) {
+        SDNode *Copy =
+          DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+                             NI->getDebugLoc(),
+                             MVT::i32,
+                             UI.getUse().get(),
+                             DAG.getTargetConstant(BF::DRegClassID, MVT::i32));
+        UpdateNodeOperand(DAG, *UI, UI.getOperandNo(), SDValue(Copy, 0));
+      }
+    }
+  }
+  DAG.setRoot(Dummy.getValue());
+}
+
diff --git a/final/lib/Target/Blackfin/BlackfinISelLowering.cpp b/final/lib/Target/Blackfin/BlackfinISelLowering.cpp
new file mode 100644
index 00000000000..7c80eec3ba6
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -0,0 +1,664 @@
+//===- BlackfinISelLowering.cpp - Blackfin DAG Lowering Implementation ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Blackfin uses to lower LLVM code
+// into a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinISelLowering.h"
+#include "BlackfinTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM)
+  : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+  setBooleanContents(ZeroOrOneBooleanContent);
+  setStackPointerRegisterToSaveRestore(BF::SP);
+  setIntDivIsCheap(false);
+
+  // Set up the legal register classes.
+  addRegisterClass(MVT::i32, BF::DRegisterClass);
+  addRegisterClass(MVT::i16, BF::D16RegisterClass);
+
+  computeRegisterProperties();
+
+  // Blackfin doesn't have i1 loads or stores
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
+
+  setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+  setOperationAction(ISD::BR_JT,     MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,     MVT::Other, Expand);
+
+  // i16 registers don't do much
+  setOperationAction(ISD::AND,   MVT::i16, Promote);
+  setOperationAction(ISD::OR,    MVT::i16, Promote);
+  setOperationAction(ISD::XOR,   MVT::i16, Promote);
+  setOperationAction(ISD::CTPOP, MVT::i16, Promote);
+  // The expansion of CTLZ/CTTZ uses AND/OR, so we might as well promote
+  // immediately.
+  setOperationAction(ISD::CTLZ,  MVT::i16, Promote);
+  setOperationAction(ISD::CTTZ,  MVT::i16, Promote);
+  setOperationAction(ISD::SETCC, MVT::i16, Promote);
+
+  // Blackfin has no division
+  setOperationAction(ISD::SDIV,    MVT::i16, Expand);
+  setOperationAction(ISD::SDIV,    MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::SREM,    MVT::i16, Expand);
+  setOperationAction(ISD::SREM,    MVT::i32, Expand);
+  setOperationAction(ISD::UDIV,    MVT::i16, Expand);
+  setOperationAction(ISD::UDIV,    MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UREM,    MVT::i16, Expand);
+  setOperationAction(ISD::UREM,    MVT::i32, Expand);
+
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
+
+  // No carry-in operations.
+  setOperationAction(ISD::ADDE, MVT::i32, Custom);
+  setOperationAction(ISD::SUBE, MVT::i32, Custom);
+
+  // Blackfin has no intrinsics for these particular operations.
+  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+  setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+  setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  // i32 has native CTPOP, but not CTLZ/CTTZ
+  setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+  setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+
+  // READCYCLECOUNTER needs special type legalization.
+  setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
+
+  setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+  // Use the default implementation.
+  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+  setOperationAction(ISD::VAEND, MVT::Other, Expand);
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+}
+
+const char *BlackfinTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return 0;
+  case BFISD::CALL:     return "BFISD::CALL";
+  case BFISD::RET_FLAG: return "BFISD::RET_FLAG";
+  case BFISD::Wrapper:  return "BFISD::Wrapper";
+  }
+}
+
+MVT::SimpleValueType BlackfinTargetLowering::getSetCCResultType(EVT VT) const {
+  // SETCC always sets the CC register. Technically that is an i1 register, but
+  // that type is not legal, so we treat it as an i32 register.
+  return MVT::i32;
+}
+
+SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+
+  Op = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+  return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
+}
+
+SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  int JTI = cast<JumpTableSDNode>(Op)->getIndex();
+
+  Op = DAG.getTargetJumpTable(JTI, MVT::i32);
+  return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
+}
+
+SDValue
+BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
+                                             CallingConv::ID CallConv, bool isVarArg,
+                                            const SmallVectorImpl<ISD::InputArg>
+                                               &Ins,
+                                             DebugLoc dl, SelectionDAG &DAG,
+                                             SmallVectorImpl<SDValue> &InVals)
+                                               const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+  CCInfo.AllocateStack(12, 4);  // ABI requires 12 bytes stack space
+  CCInfo.AnalyzeFormalArguments(Ins, CC_Blackfin);
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+      TargetRegisterClass *RC = VA.getLocReg() == BF::P0 ?
+        BF::PRegisterClass : BF::DRegisterClass;
+      assert(RC->contains(VA.getLocReg()) && "Unexpected regclass in CCState");
+      assert(RC->hasType(RegVT) && "Unexpected regclass in CCState");
+
+      unsigned Reg = MF.getRegInfo().createVirtualRegister(RC);
+      MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+      // If this is an 8 or 16-bit value, it is really passed promoted to 32
+      // bits.  Insert an assert[sz]ext to capture this, then truncate to the
+      // right size.
+      if (VA.getLocInfo() == CCValAssign::SExt)
+        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+      else if (VA.getLocInfo() == CCValAssign::ZExt)
+        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+
+      if (VA.getLocInfo() != CCValAssign::Full)
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+      InVals.push_back(ArgValue);
+    } else {
+      assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
+      unsigned ObjSize = VA.getLocVT().getStoreSize();
+      int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
+      SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   MachinePointerInfo(),
+                                   false, false, 0));
+    }
+  }
+
+  return Chain;
+}
+
+SDValue
+BlackfinTargetLowering::LowerReturn(SDValue Chain,
+                                    CallingConv::ID CallConv, bool isVarArg,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    DebugLoc dl, SelectionDAG &DAG) const {
+
+  // CCValAssign - represent the assignment of the return value to locations.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, DAG.getTarget(),
+                 RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_Blackfin);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    SDValue Opi = OutVals[i];
+
+    // Expand to i32 if necessary
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Opi = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Opi);
+      break;
+    case CCValAssign::ZExt:
+      Opi = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Opi);
+      break;
+    case CCValAssign::AExt:
+      Opi = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Opi);
+      break;
+    }
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Opi, SDValue());
+    // Guarantee that all emitted copies are stuck together with flags.
+    Flag = Chain.getValue(1);
+  }
+
+  if (Flag.getNode()) {
+    return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+  } else {
+    return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain);
+  }
+}
+
+SDValue
+BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                                  CallingConv::ID CallConv, bool isVarArg,
+                                  bool &isTailCall,
+                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  const SmallVectorImpl<SDValue> &OutVals,
+                                  const SmallVectorImpl<ISD::InputArg> &Ins,
+                                  DebugLoc dl, SelectionDAG &DAG,
+                                  SmallVectorImpl<SDValue> &InVals) const {
+  // Blackfin target does not yet support tail call optimization.
+  isTailCall = false;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs,
+                 *DAG.getContext());
+  CCInfo.AllocateStack(12, 4);  // ABI requires 12 bytes stack space
+  CCInfo.AnalyzeCallOperands(Outs, CC_Blackfin);
+
+  // Get the size of the outgoing arguments stack space requirement.
+  unsigned ArgsSize = CCInfo.getNextStackOffset();
+
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    // Arguments that can be passed on register must be kept at
+    // RegsToPass vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
+      int Offset = VA.getLocMemOffset();
+      assert(Offset%4 == 0 && "Unaligned LocMemOffset");
+      assert(VA.getLocVT()==MVT::i32 && "Illegal CCValAssign type");
+      SDValue SPN = DAG.getCopyFromReg(Chain, dl, BF::SP, MVT::i32);
+      SDValue OffsetN = DAG.getIntPtrConstant(Offset);
+      OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN);
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN,
+                                         MachinePointerInfo(),false, false, 0));
+    }
+  }
+
+  // Transform all store nodes into one single node because
+  // all store nodes are independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emited instructions must be
+  // stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+  std::vector<EVT> NodeTys;
+  NodeTys.push_back(MVT::Other);   // Returns a chain
+  NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
+  SDValue Ops[] = { Chain, Callee, InFlag };
+  Chain = DAG.getNode(BFISD::CALL, dl, NodeTys, Ops,
+                      InFlag.getNode() ? 3 : 2);
+  InFlag = Chain.getValue(1);
+
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState RVInfo(CallConv, isVarArg, DAG.getTarget(), RVLocs,
+                 *DAG.getContext());
+
+  RVInfo.AnalyzeCallResult(Ins, RetCC_Blackfin);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &RV = RVLocs[i];
+    unsigned Reg = RV.getLocReg();
+
+    Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+                               RVLocs[i].getLocVT(), InFlag);
+    SDValue Val = Chain.getValue(0);
+    InFlag = Chain.getValue(2);
+    Chain = Chain.getValue(1);
+
+    // Callee is responsible for extending any i16 return values.
+    switch (RV.getLocInfo()) {
+    case CCValAssign::SExt:
+      Val = DAG.getNode(ISD::AssertSext, dl, RV.getLocVT(), Val,
+                        DAG.getValueType(RV.getValVT()));
+      break;
+    case CCValAssign::ZExt:
+      Val = DAG.getNode(ISD::AssertZext, dl, RV.getLocVT(), Val,
+                        DAG.getValueType(RV.getValVT()));
+      break;
+    default:
+      break;
+    }
+
+    // Truncate to valtype
+    if (RV.getLocInfo() != CCValAssign::Full)
+      Val = DAG.getNode(ISD::TRUNCATE, dl, RV.getValVT(), Val);
+    InVals.push_back(Val);
+  }
+
+  return Chain;
+}
+
+// Expansion of ADDE / SUBE. This is a bit involved since blackfin doesn't have
+// add-with-carry instructions.
+SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const {
+  // Operands: lhs, rhs, carry-in (AC0 flag)
+  // Results: sum, carry-out (AC0 flag)
+  DebugLoc dl = Op.getDebugLoc();
+
+  unsigned Opcode = Op.getOpcode()==ISD::ADDE ? BF::ADD : BF::SUB;
+
+  // zext incoming carry flag in AC0 to 32 bits
+  SDNode* CarryIn = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32,
+                                       /* flag= */ Op.getOperand(2));
+  CarryIn = DAG.getMachineNode(BF::MOVECC_zext, dl, MVT::i32,
+                               SDValue(CarryIn, 0));
+
+  // Add operands, produce sum and carry flag
+  SDNode *Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
+                                   Op.getOperand(0), Op.getOperand(1));
+
+  // Store intermediate carry from Sum
+  SDNode* Carry1 = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32,
+                                      /* flag= */ SDValue(Sum, 1));
+
+  // Add incoming carry, again producing an output flag
+  Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
+                           SDValue(Sum, 0), SDValue(CarryIn, 0));
+
+  // Update AC0 with the intermediate carry, producing a flag.
+  SDNode *CarryOut = DAG.getMachineNode(BF::OR_ac0_cc, dl, MVT::Glue,
+                                        SDValue(Carry1, 0));
+
+  // Compose (i32, flag) pair
+  SDValue ops[2] = { SDValue(Sum, 0), SDValue(CarryOut, 0) };
+  return DAG.getMergeValues(ops, 2, dl);
+}
+
+SDValue BlackfinTargetLowering::LowerOperation(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default:
+    Op.getNode()->dump();
+    llvm_unreachable("Should not custom lower this!");
+  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+  case ISD::GlobalTLSAddress:
+    llvm_unreachable("TLS not implemented for Blackfin.");
+  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
+    // Frame & Return address.  Currently unimplemented
+  case ISD::FRAMEADDR:          return SDValue();
+  case ISD::RETURNADDR:         return SDValue();
+  case ISD::ADDE:
+  case ISD::SUBE:               return LowerADDE(Op, DAG);
+  }
+}
+
+void
+BlackfinTargetLowering::ReplaceNodeResults(SDNode *N,
+                                           SmallVectorImpl<SDValue> &Results,
+                                           SelectionDAG &DAG) const {
+  DebugLoc dl = N->getDebugLoc();
+  switch (N->getOpcode()) {
+  default:
+    llvm_unreachable("Do not know how to custom type legalize this operation!");
+    return;
+  case ISD::READCYCLECOUNTER: {
+    // The low part of the cycle counter is in CYCLES, the high part in
+    // CYCLES2. Reading CYCLES will latch the value of CYCLES2, so we must read
+    // CYCLES2 last.
+    SDValue TheChain = N->getOperand(0);
+    SDValue lo = DAG.getCopyFromReg(TheChain, dl, BF::CYCLES, MVT::i32);
+    SDValue hi = DAG.getCopyFromReg(lo.getValue(1), dl, BF::CYCLES2, MVT::i32);
+    // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, lo, hi));
+    // Outgoing chain. If we were to use the chain from lo instead, it would be
+    // possible to entirely eliminate the CYCLES2 read in (i32 (trunc
+    // readcyclecounter)). Unfortunately this could possibly delay the CYCLES2
+    // read beyond the next CYCLES read, leading to invalid results.
+    Results.push_back(hi.getValue(1));
+    return;
+  }
+  }
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned BlackfinTargetLowering::getFunctionAlignment(const Function *F) const {
+  return 2;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Blackfin Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+BlackfinTargetLowering::ConstraintType
+BlackfinTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() != 1)
+    return TargetLowering::getConstraintType(Constraint);
+
+  switch (Constraint[0]) {
+    // Standard constraints
+  case 'r':
+    return C_RegisterClass;
+
+    // Blackfin-specific constraints
+  case 'a':
+  case 'd':
+  case 'z':
+  case 'D':
+  case 'W':
+  case 'e':
+  case 'b':
+  case 'v':
+  case 'f':
+  case 'c':
+  case 't':
+  case 'u':
+  case 'k':
+  case 'x':
+  case 'y':
+  case 'w':
+    return C_RegisterClass;
+  case 'A':
+  case 'B':
+  case 'C':
+  case 'Z':
+  case 'Y':
+    return C_Register;
+  }
+
+  // Not implemented: q0-q7, qA. Use {R2} etc instead
+
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+BlackfinTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+
+    // Blackfin-specific constraints
+  case 'a':
+  case 'd':
+  case 'z':
+  case 'D':
+  case 'W':
+  case 'e':
+  case 'b':
+  case 'v':
+  case 'f':
+  case 'c':
+  case 't':
+  case 'u':
+  case 'k':
+  case 'x':
+  case 'y':
+  case 'w':
+    return CW_Register;
+  case 'A':
+  case 'B':
+  case 'C':
+  case 'Z':
+  case 'Y':
+    return CW_SpecificReg;
+  }
+  return weight;
+}
+
+/// getRegForInlineAsmConstraint - Return register no and class for a C_Register
+/// constraint.
+std::pair<unsigned, const TargetRegisterClass*> BlackfinTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+  typedef std::pair<unsigned, const TargetRegisterClass*> Pair;
+  using namespace BF;
+
+  if (Constraint.size() != 1)
+    return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+  switch (Constraint[0]) {
+    // Standard constraints
+  case 'r':
+    return Pair(0U, VT == MVT::i16 ? D16RegisterClass : DPRegisterClass);
+
+    // Blackfin-specific constraints
+  case 'a': return Pair(0U, PRegisterClass);
+  case 'd': return Pair(0U, DRegisterClass);
+  case 'e': return Pair(0U, AccuRegisterClass);
+  case 'A': return Pair(A0, AccuRegisterClass);
+  case 'B': return Pair(A1, AccuRegisterClass);
+  case 'b': return Pair(0U, IRegisterClass);
+  case 'v': return Pair(0U, BRegisterClass);
+  case 'f': return Pair(0U, MRegisterClass);
+  case 'C': return Pair(CC, JustCCRegisterClass);
+  case 'x': return Pair(0U, GRRegisterClass);
+  case 'w': return Pair(0U, ALLRegisterClass);
+  case 'Z': return Pair(P3, PRegisterClass);
+  case 'Y': return Pair(P1, PRegisterClass);
+  }
+
+  // Not implemented: q0-q7, qA. Use {R2} etc instead.
+  // Constraints z, D, W, c, t, u, k, and y use non-existing classes, defer to
+  // getRegClassForInlineAsmConstraint()
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+std::vector<unsigned> BlackfinTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+  using namespace BF;
+
+  if (Constraint.size() != 1)
+    return std::vector<unsigned>();
+
+  switch (Constraint[0]) {
+  case 'z': return make_vector<unsigned>(P0, P1, P2, 0);
+  case 'D': return make_vector<unsigned>(R0, R2, R4, R6, 0);
+  case 'W': return make_vector<unsigned>(R1, R3, R5, R7, 0);
+  case 'c': return make_vector<unsigned>(I0, I1, I2, I3,
+                                         B0, B1, B2, B3,
+                                         L0, L1, L2, L3, 0);
+  case 't': return make_vector<unsigned>(LT0, LT1, 0);
+  case 'u': return make_vector<unsigned>(LB0, LB1, 0);
+  case 'k': return make_vector<unsigned>(LC0, LC1, 0);
+  case 'y': return make_vector<unsigned>(RETS, RETN, RETI, RETX, RETE,
+                                         ASTAT, SEQSTAT, USP, 0);
+  }
+
+  return std::vector<unsigned>();
+}
+
+bool BlackfinTargetLowering::
+isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The Blackfin target isn't yet aware of offsets.
+  return false;
+}
diff --git a/final/lib/Target/Blackfin/BlackfinISelLowering.h b/final/lib/Target/Blackfin/BlackfinISelLowering.h
new file mode 100644
index 00000000000..102c830688e
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -0,0 +1,87 @@
+//===- BlackfinISelLowering.h - Blackfin DAG Lowering Interface -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Blackfin uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFIN_ISELLOWERING_H
+#define BLACKFIN_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "Blackfin.h"
+
+namespace llvm {
+
+  namespace BFISD {
+    enum {
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+      CALL,                     // A call instruction.
+      RET_FLAG,                 // Return with a flag operand.
+      Wrapper                   // Address wrapper
+    };
+  }
+
+  class BlackfinTargetLowering : public TargetLowering {
+  public:
+    BlackfinTargetLowering(TargetMachine &TM);
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; }
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+    virtual void ReplaceNodeResults(SDNode *N,
+                                    SmallVectorImpl<SDValue> &Results,
+                                    SelectionDAG &DAG) const;
+
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+    std::vector<unsigned>
+    getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                      EVT VT) const;
+    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+    const char *getTargetNodeName(unsigned Opcode) const;
+    unsigned getFunctionAlignment(const Function *F) const;
+
+  private:
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerADDE(SDValue Op, SelectionDAG &DAG) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+  };
+} // end namespace llvm
+
+#endif    // BLACKFIN_ISELLOWERING_H
diff --git a/final/lib/Target/Blackfin/BlackfinInstrFormats.td b/final/lib/Target/Blackfin/BlackfinInstrFormats.td
new file mode 100644
index 00000000000..d8e6e252e78
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinInstrFormats.td
@@ -0,0 +1,34 @@
+//===--- BlackfinInstrFormats.td ---------------------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+class InstBfin<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : Instruction {
+  field bits<32> Inst;
+
+  let Namespace = "BF";
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  let AsmString   = asmstr;
+  let Pattern = pattern;
+}
+
+// Single-word (16-bit) instructions
+class F1<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstBfin<outs, ins, asmstr, pattern> {
+}
+
+// Double-word (32-bit) instructions
+class F2<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstBfin<outs, ins, asmstr, pattern> {
+}
diff --git a/final/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/final/lib/Target/Blackfin/BlackfinInstrInfo.cpp
new file mode 100644
index 00000000000..e50d57a31b6
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -0,0 +1,253 @@
+//===- BlackfinInstrInfo.cpp - Blackfin Instruction Information -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinInstrInfo.h"
+#include "BlackfinSubtarget.h"
+#include "Blackfin.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "BlackfinGenInstrInfo.inc"
+
+using namespace llvm;
+
+BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST)
+  : TargetInstrInfoImpl(BlackfinInsts, array_lengthof(BlackfinInsts)),
+    RI(ST, *this),
+    Subtarget(ST) {}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned BlackfinInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                                int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case BF::LOAD32fi:
+  case BF::LOAD16fi:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned BlackfinInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                               int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case BF::STORE32fi:
+  case BF::STORE16fi:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned BlackfinInstrInfo::
+InsertBranch(MachineBasicBlock &MBB,
+             MachineBasicBlock *TBB,
+             MachineBasicBlock *FBB,
+             const SmallVectorImpl<MachineOperand> &Cond,
+             DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "Branch conditions have one component!");
+
+  if (Cond.empty()) {
+    // Unconditional branch?
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, DL, get(BF::JUMPa)).addMBB(TBB);
+    return 1;
+  }
+
+  // Conditional branch.
+  llvm_unreachable("Implement conditional branches!");
+}
+
+void BlackfinInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator I, DebugLoc DL,
+                                    unsigned DestReg, unsigned SrcReg,
+                                    bool KillSrc) const {
+  if (BF::ALLRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, I, DL, get(BF::MOVE), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
+  if (BF::D16RegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc))
+      .addImm(0);
+    return;
+  }
+
+  if (BF::DRegClass.contains(DestReg)) {
+    if (SrcReg == BF::NCC) {
+      BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+      BuildMI(MBB, I, DL, get(BF::BITTGL), DestReg).addReg(DestReg).addImm(0);
+      return;
+    }
+    if (SrcReg == BF::CC) {
+      BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+      return;
+    }
+  }
+
+  if (BF::DRegClass.contains(SrcReg)) {
+    if (DestReg == BF::NCC) {
+      BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc)).addImm(0);
+      return;
+    }
+    if (DestReg == BF::CC) {
+      BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+      return;
+    }
+  }
+
+
+  if (DestReg == BF::NCC && SrcReg == BF::CC) {
+    BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
+  if (DestReg == BF::CC && SrcReg == BF::NCC) {
+    BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
+  llvm_unreachable("Bad reg-to-reg copy");
+}
+
+static bool inClass(const TargetRegisterClass &Test,
+                    unsigned Reg,
+                    const TargetRegisterClass *RC) {
+  if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    return Test.contains(Reg);
+  else
+    return &Test==RC || Test.hasSubClass(RC);
+}
+
+void
+BlackfinInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator I,
+                                       unsigned SrcReg,
+                                       bool isKill,
+                                       int FI,
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+
+  if (inClass(BF::DPRegClass, SrcReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::STORE32fi))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  if (inClass(BF::D16RegClass, SrcReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::STORE16fi))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  if (inClass(BF::AnyCCRegClass, SrcReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::STORE8fi))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  llvm_unreachable((std::string("Cannot store regclass to stack slot: ")+
+                    RC->getName()).c_str());
+}
+
+void BlackfinInstrInfo::
+storeRegToAddr(MachineFunction &MF,
+               unsigned SrcReg,
+               bool isKill,
+               SmallVectorImpl<MachineOperand> &Addr,
+               const TargetRegisterClass *RC,
+               SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  llvm_unreachable("storeRegToAddr not implemented");
+}
+
+void
+BlackfinInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator I,
+                                        unsigned DestReg,
+                                        int FI,
+                                        const TargetRegisterClass *RC,
+                                        const TargetRegisterInfo *TRI) const {
+  DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
+  if (inClass(BF::DPRegClass, DestReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::LOAD32fi), DestReg)
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  if (inClass(BF::D16RegClass, DestReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::LOAD16fi), DestReg)
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  if (inClass(BF::AnyCCRegClass, DestReg, RC)) {
+    BuildMI(MBB, I, DL, get(BF::LOAD8fi), DestReg)
+      .addFrameIndex(FI)
+      .addImm(0);
+    return;
+  }
+
+  llvm_unreachable("Cannot load regclass from stack slot");
+}
+
+void BlackfinInstrInfo::
+loadRegFromAddr(MachineFunction &MF,
+                unsigned DestReg,
+                SmallVectorImpl<MachineOperand> &Addr,
+                const TargetRegisterClass *RC,
+                SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  llvm_unreachable("loadRegFromAddr not implemented");
+}
diff --git a/final/lib/Target/Blackfin/BlackfinInstrInfo.h b/final/lib/Target/Blackfin/BlackfinInstrInfo.h
new file mode 100644
index 00000000000..fdc1029da58
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinInstrInfo.h
@@ -0,0 +1,78 @@
+//===- BlackfinInstrInfo.h - Blackfin Instruction Information ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFININSTRUCTIONINFO_H
+#define BLACKFININSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "BlackfinRegisterInfo.h"
+
+namespace llvm {
+
+  class BlackfinInstrInfo : public TargetInstrInfoImpl {
+    const BlackfinRegisterInfo RI;
+    const BlackfinSubtarget& Subtarget;
+  public:
+    explicit BlackfinInstrInfo(BlackfinSubtarget &ST);
+
+    /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+    /// such, whenever a client has an instance of instruction info, it should
+    /// always be able to get register info as well (through this method).
+    virtual const BlackfinRegisterInfo &getRegisterInfo() const { return RI; }
+
+    virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                         int &FrameIndex) const;
+
+    virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                        int &FrameIndex) const;
+
+    virtual unsigned
+    InsertBranch(MachineBasicBlock &MBB,
+                 MachineBasicBlock *TBB,
+                 MachineBasicBlock *FBB,
+                 const SmallVectorImpl<MachineOperand> &Cond,
+                 DebugLoc DL) const;
+
+    virtual void copyPhysReg(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator MI, DebugLoc DL,
+                             unsigned DestReg, unsigned SrcReg,
+                             bool KillSrc) const;
+
+    virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MBBI,
+                                     unsigned SrcReg, bool isKill,
+                                     int FrameIndex,
+                                     const TargetRegisterClass *RC,
+                                     const TargetRegisterInfo *TRI) const;
+
+    virtual void storeRegToAddr(MachineFunction &MF,
+                                unsigned SrcReg, bool isKill,
+                                SmallVectorImpl<MachineOperand> &Addr,
+                                const TargetRegisterClass *RC,
+                                SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+    virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      unsigned DestReg, int FrameIndex,
+                                      const TargetRegisterClass *RC,
+                                      const TargetRegisterInfo *TRI) const;
+
+    virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                                 SmallVectorImpl<MachineOperand> &Addr,
+                                 const TargetRegisterClass *RC,
+                                 SmallVectorImpl<MachineInstr*> &NewMIs) const;
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/Blackfin/BlackfinInstrInfo.td b/final/lib/Target/Blackfin/BlackfinInstrInfo.td
new file mode 100644
index 00000000000..5b59d7769c7
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -0,0 +1,862 @@
+//===- BlackfinInstrInfo.td - Target Description for Blackfin Target ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Blackfin instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "BlackfinInstrFormats.td"
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_BfinCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_BfinCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+                                        SDTCisVT<1, i32> ]>;
+
+def BfinCallseqStart : SDNode<"ISD::CALLSEQ_START", SDT_BfinCallSeqStart,
+                              [SDNPHasChain, SDNPOutGlue]>;
+def BfinCallseqEnd   : SDNode<"ISD::CALLSEQ_END",   SDT_BfinCallSeqEnd,
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_BfinCall  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def BfinCall      : SDNode<"BFISD::CALL", SDT_BfinCall,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                            SDNPVariadic]>;
+
+def BfinRet: SDNode<"BFISD::RET_FLAG", SDTNone,
+                    [SDNPHasChain, SDNPOptInGlue]>;
+
+def BfinWrapper: SDNode<"BFISD::Wrapper", SDTIntUnaryOp>;
+
+//===----------------------------------------------------------------------===//
+// Transformations
+//===----------------------------------------------------------------------===//
+
+def trailingZeros_xform : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingZeros(),
+                                   MVT::i32);
+}]>;
+
+def trailingOnes_xform : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getAPIntValue().countTrailingOnes(),
+                                   MVT::i32);
+}]>;
+
+def LO16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((unsigned short)N->getZExtValue(), MVT::i16);
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return CurDAG->getTargetConstant((unsigned)N->getZExtValue() >> 16, MVT::i16);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Immediates
+//===----------------------------------------------------------------------===//
+
+def imm3  : PatLeaf<(imm), [{return isInt<3>(N->getSExtValue());}]>;
+def uimm3 : PatLeaf<(imm), [{return isUInt<3>(N->getZExtValue());}]>;
+def uimm4 : PatLeaf<(imm), [{return isUInt<4>(N->getZExtValue());}]>;
+def uimm5 : PatLeaf<(imm), [{return isUInt<5>(N->getZExtValue());}]>;
+
+def uimm5m2 : PatLeaf<(imm), [{
+    uint64_t value = N->getZExtValue();
+    return value % 2 == 0 && isUInt<5>(value);
+}]>;
+
+def uimm6m4 : PatLeaf<(imm), [{
+    uint64_t value = N->getZExtValue();
+    return value % 4 == 0 && isUInt<6>(value);
+}]>;
+
+def imm7   : PatLeaf<(imm), [{return isInt<7>(N->getSExtValue());}]>;
+def imm16  : PatLeaf<(imm), [{return isInt<16>(N->getSExtValue());}]>;
+def uimm16 : PatLeaf<(imm), [{return isUInt<16>(N->getZExtValue());}]>;
+
+def ximm16 : PatLeaf<(imm), [{
+    int64_t value = N->getSExtValue();
+    return value < (1<<16) && value >= -(1<<15);
+}]>;
+
+def imm17m2 : PatLeaf<(imm), [{
+    int64_t value = N->getSExtValue();
+    return value % 2 == 0 && isInt<17>(value);
+}]>;
+
+def imm18m4 : PatLeaf<(imm), [{
+    int64_t value = N->getSExtValue();
+    return value % 4 == 0 && isInt<18>(value);
+}]>;
+
+// 32-bit bitmask transformed to a bit number
+def uimm5mask : Operand<i32>, PatLeaf<(imm), [{
+    return isPowerOf2_32(N->getZExtValue());
+}], trailingZeros_xform>;
+
+// 32-bit inverse bitmask transformed to a bit number
+def uimm5imask : Operand<i32>, PatLeaf<(imm), [{
+    return isPowerOf2_32(~N->getZExtValue());
+}], trailingOnes_xform>;
+
+//===----------------------------------------------------------------------===//
+// Operands
+//===----------------------------------------------------------------------===//
+
+def calltarget : Operand<iPTR>;
+
+def brtarget : Operand<OtherVT>;
+
+// Addressing modes
+def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
+
+// Address operands
+def MEMii : Operand<i32> {
+  let PrintMethod = "printMemoryOperand";
+  let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Pseudo instructions.
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : InstBfin<outs, ins, asmstr, pattern>;
+
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+                              "${:comment}ADJCALLSTACKDOWN $amt",
+                              [(BfinCallseqStart timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+                            "${:comment}ADJCALLSTACKUP $amt1 $amt2",
+                            [(BfinCallseqEnd timm:$amt1, timm:$amt2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Table C-9. Program Flow Control Instructions
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, isTerminator = 1 in {
+
+let isIndirectBranch = 1 in
+def JUMPp : F1<(outs), (ins P:$target),
+               "JUMP ($target);",
+               [(brind P:$target)]>;
+
+// TODO JUMP (PC-P)
+
+// NOTE: assembler chooses between JUMP.S and JUMP.L
+def JUMPa : F1<(outs), (ins brtarget:$target),
+               "jump $target;",
+               [(br bb:$target)]>;
+
+def JUMPcc : F1<(outs), (ins AnyCC:$cc, brtarget:$target),
+               "if $cc jump $target;",
+               [(brcond AnyCC:$cc, bb:$target)]>;
+}
+
+let isCall = 1,
+    Defs   = [R0, R1, R2, R3, P0, P1, P2, LB0, LB1, LC0, LC1, RETS, ASTAT] in {
+def CALLa: F1<(outs), (ins calltarget:$func, variable_ops),
+              "call $func;", []>;
+def CALLp: F1<(outs), (ins P:$func, variable_ops),
+              "call ($func);", [(BfinCall P:$func)]>;
+}
+
+let isReturn     = 1,
+    isTerminator = 1,
+    isBarrier    = 1,
+    Uses         = [RETS] in
+def RTS: F1<(outs), (ins), "rts;", [(BfinRet)]>;
+
+//===----------------------------------------------------------------------===//
+// Table C-10. Load / Store Instructions
+//===----------------------------------------------------------------------===//
+
+// Immediate constant loads
+
+// sext immediate, i32 D/P regs
+def LOADimm7: F1<(outs DP:$dst), (ins i32imm:$src),
+                 "$dst = $src (x);",
+                 [(set DP:$dst, imm7:$src)]>;
+
+// zext immediate, i32 reg groups 0-3
+def LOADuimm16: F2<(outs GR:$dst), (ins i32imm:$src),
+                   "$dst = $src (z);",
+                   [(set GR:$dst, uimm16:$src)]>;
+
+// sext immediate, i32 reg groups 0-3
+def LOADimm16: F2<(outs GR:$dst), (ins i32imm:$src),
+                  "$dst = $src (x);",
+                  [(set GR:$dst, imm16:$src)]>;
+
+// Pseudo-instruction for loading a general 32-bit constant.
+def LOAD32imm: Pseudo<(outs GR:$dst), (ins i32imm:$src),
+                      "$dst.h = ($src >> 16); $dst.l = ($src & 0xffff);",
+                      [(set GR:$dst, imm:$src)]>;
+
+def LOAD32sym: Pseudo<(outs GR:$dst), (ins i32imm:$src),
+                      "$dst.h = $src; $dst.l = $src;", []>;
+
+
+// 16-bit immediate, i16 reg groups 0-3
+def LOAD16i: F2<(outs GR16:$dst), (ins i16imm:$src),
+                 "$dst = $src;", []>;
+
+def : Pat<(BfinWrapper (i32 tglobaladdr:$addr)),
+          (LOAD32sym tglobaladdr:$addr)>;
+
+def : Pat<(BfinWrapper (i32 tjumptable:$addr)),
+          (LOAD32sym tjumptable:$addr)>;
+
+// We cannot copy from GR16 to D16, and codegen wants to insert copies if we
+// emit GR16 instructions. As a hack, we use this fake instruction instead.
+def LOAD16i_d16: F2<(outs D16:$dst), (ins i16imm:$src),
+                    "$dst = $src;",
+                    [(set D16:$dst, ximm16:$src)]>;
+
+// Memory loads with patterns
+
+def LOAD32p: F1<(outs DP:$dst), (ins P:$ptr),
+                "$dst = [$ptr];",
+                [(set DP:$dst, (load P:$ptr))]>;
+
+// Pseudo-instruction for loading a stack slot
+def LOAD32fi: Pseudo<(outs DP:$dst), (ins MEMii:$mem),
+                     "${:comment}FI $dst = [$mem];",
+                     [(set DP:$dst, (load ADDRspii:$mem))]>;
+
+// Note: Expands to multiple insns
+def LOAD16fi: Pseudo<(outs D16:$dst), (ins MEMii:$mem),
+                     "${:comment}FI $dst = [$mem];",
+                     [(set D16:$dst, (load ADDRspii:$mem))]>;
+
+// Pseudo-instruction for loading a stack slot, used for AnyCC regs.
+// Replaced with Load D + CC=D
+def LOAD8fi: Pseudo<(outs AnyCC:$dst), (ins MEMii:$mem),
+                    "${:comment}FI $dst = B[$mem];",
+                    [(set AnyCC:$dst, (load ADDRspii:$mem))]>;
+
+def LOAD32p_uimm6m4: F1<(outs DP:$dst), (ins P:$ptr, i32imm:$off),
+                        "$dst = [$ptr + $off];",
+                        [(set DP:$dst, (load (add P:$ptr, uimm6m4:$off)))]>;
+
+def LOAD32p_imm18m4: F2<(outs DP:$dst), (ins P:$ptr, i32imm:$off),
+                         "$dst = [$ptr + $off];",
+                         [(set DP:$dst, (load (add P:$ptr, imm18m4:$off)))]>;
+
+def LOAD32p_16z: F1<(outs D:$dst), (ins P:$ptr),
+                    "$dst = W[$ptr] (z);",
+                    [(set D:$dst, (zextloadi16 P:$ptr))]>;
+
+def : Pat<(i32 (extloadi16 P:$ptr)),(LOAD32p_16z P:$ptr)>;
+
+def LOAD32p_uimm5m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                            "$dst = w[$ptr + $off] (z);",
+                            [(set D:$dst, (zextloadi16 (add P:$ptr,
+                                                        uimm5m2:$off)))]>;
+
+def : Pat<(i32 (extloadi16 (add P:$ptr, uimm5m2:$off))),
+          (LOAD32p_uimm5m2_16z P:$ptr, imm:$off)>;
+
+def LOAD32p_imm17m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                            "$dst = w[$ptr + $off] (z);",
+                            [(set D:$dst,
+                                  (zextloadi16 (add P:$ptr, imm17m2:$off)))]>;
+
+def : Pat<(i32 (extloadi16 (add P:$ptr, imm17m2:$off))),
+          (LOAD32p_imm17m2_16z P:$ptr, imm:$off)>;
+
+def LOAD32p_16s: F1<(outs D:$dst), (ins P:$ptr),
+                    "$dst = w[$ptr] (x);",
+                    [(set D:$dst, (sextloadi16 P:$ptr))]>;
+
+def LOAD32p_uimm5m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                            "$dst = w[$ptr + $off] (x);",
+                            [(set D:$dst,
+                                  (sextloadi16 (add P:$ptr, uimm5m2:$off)))]>;
+
+def LOAD32p_imm17m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                            "$dst = w[$ptr + $off] (x);",
+                            [(set D:$dst,
+                                  (sextloadi16 (add P:$ptr, imm17m2:$off)))]>;
+
+def LOAD16pi: F1<(outs D16:$dst), (ins PI:$ptr),
+                "$dst = w[$ptr];",
+                [(set D16:$dst, (load PI:$ptr))]>;
+
+def LOAD32p_8z: F1<(outs D:$dst), (ins P:$ptr),
+                   "$dst = B[$ptr] (z);",
+                   [(set D:$dst, (zextloadi8 P:$ptr))]>;
+
+def : Pat<(i32 (extloadi8 P:$ptr)), (LOAD32p_8z P:$ptr)>;
+def : Pat<(i16 (extloadi8 P:$ptr)),
+          (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
+def : Pat<(i16 (zextloadi8 P:$ptr)),
+          (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
+
+def LOAD32p_imm16_8z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                         "$dst = b[$ptr + $off] (z);",
+                         [(set D:$dst, (zextloadi8 (add P:$ptr, imm16:$off)))]>;
+
+def : Pat<(i32 (extloadi8 (add P:$ptr, imm16:$off))),
+          (LOAD32p_imm16_8z P:$ptr, imm:$off)>;
+def : Pat<(i16 (extloadi8 (add P:$ptr, imm16:$off))),
+          (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
+                           lo16)>;
+def : Pat<(i16 (zextloadi8 (add P:$ptr, imm16:$off))),
+          (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
+                           lo16)>;
+
+def LOAD32p_8s: F1<(outs D:$dst), (ins P:$ptr),
+                   "$dst = b[$ptr] (x);",
+                   [(set D:$dst, (sextloadi8 P:$ptr))]>;
+
+def : Pat<(i16 (sextloadi8 P:$ptr)),
+          (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), lo16)>;
+
+def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
+                         "$dst = b[$ptr + $off] (x);",
+                         [(set D:$dst, (sextloadi8 (add P:$ptr, imm16:$off)))]>;
+
+def : Pat<(i16 (sextloadi8 (add P:$ptr, imm16:$off))),
+          (EXTRACT_SUBREG (LOAD32p_imm16_8s P:$ptr, imm:$off),
+                           lo16)>;
+// Memory loads without patterns
+
+let mayLoad = 1 in {
+
+multiclass LOAD_incdec<RegisterClass drc, RegisterClass prc,
+                       string mem="", string suf=";"> {
+  def _inc : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr),
+                !strconcat(!subst("M", mem, "$dst = M[$ptr++]"), suf), []>;
+  def _dec : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr),
+                !strconcat(!subst("M", mem, "$dst = M[$ptr--]"), suf), []>;
+}
+multiclass LOAD_incdecpost<RegisterClass drc, RegisterClass prc,
+                           string mem="", string suf=";">
+         : LOAD_incdec<drc, prc, mem, suf> {
+  def _post : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr, prc:$off),
+                 !strconcat(!subst("M", mem, "$dst = M[$ptr++$off]"), suf), []>;
+}
+
+defm LOAD32p:    LOAD_incdec<DP, P>;
+defm LOAD32i:    LOAD_incdec<D, I>;
+defm LOAD8z32p:  LOAD_incdec<D, P, "b", " (z);">;
+defm LOAD8s32p:  LOAD_incdec<D, P, "b", " (x);">;
+defm LOADhi:     LOAD_incdec<D16, I, "w">;
+defm LOAD16z32p: LOAD_incdecpost<D, P, "w", " (z);">;
+defm LOAD16s32p: LOAD_incdecpost<D, P, "w", " (x);">;
+
+def LOAD32p_post: F1<(outs D:$dst, P:$ptr_wb), (ins P:$ptr, P:$off),
+                     "$dst = [$ptr ++ $off];", []>;
+
+// Note: $fp MUST be FP
+def LOAD32fp_nimm7m4: F1<(outs DP:$dst), (ins P:$fp, i32imm:$off),
+                         "$dst = [$fp - $off];", []>;
+
+def LOAD32i:      F1<(outs D:$dst), (ins I:$ptr),
+                     "$dst = [$ptr];", []>;
+def LOAD32i_post: F1<(outs D:$dst, I:$ptr_wb), (ins I:$ptr, M:$off),
+                     "$dst = [$ptr ++ $off];", []>;
+
+
+
+def LOADhp_post: F1<(outs D16:$dst, P:$ptr_wb), (ins P:$ptr, P:$off),
+                    "$dst = w[$ptr ++ $off];", []>;
+
+
+}
+
+// Memory stores with patterns
+def STORE32p: F1<(outs), (ins DP:$val, P:$ptr),
+                 "[$ptr] = $val;",
+                 [(store DP:$val, P:$ptr)]>;
+
+// Pseudo-instructions for storing to a stack slot
+def STORE32fi: Pseudo<(outs), (ins DP:$val, MEMii:$mem),
+                      "${:comment}FI [$mem] = $val;",
+                      [(store DP:$val, ADDRspii:$mem)]>;
+
+// Note: This stack-storing pseudo-instruction is expanded to multiple insns
+def STORE16fi: Pseudo<(outs), (ins D16:$val, MEMii:$mem),
+                  "${:comment}FI [$mem] = $val;",
+                  [(store D16:$val, ADDRspii:$mem)]>;
+
+// Pseudo-instructions for storing AnyCC register to a stack slot.
+// Replaced with D=CC + STORE byte
+def STORE8fi: Pseudo<(outs), (ins AnyCC:$val, MEMii:$mem),
+                      "${:comment}FI b[$mem] = $val;",
+                      [(store AnyCC:$val, ADDRspii:$mem)]>;
+
+def STORE32p_uimm6m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off),
+                 "[$ptr + $off] = $val;",
+                 [(store DP:$val, (add P:$ptr, uimm6m4:$off))]>;
+
+def STORE32p_imm18m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off),
+                 "[$ptr + $off] = $val;",
+                 [(store DP:$val, (add P:$ptr, imm18m4:$off))]>;
+
+def STORE16pi: F1<(outs), (ins D16:$val, PI:$ptr),
+                  "w[$ptr] = $val;",
+                  [(store D16:$val, PI:$ptr)]>;
+
+def STORE8p: F1<(outs), (ins D:$val, P:$ptr),
+                "b[$ptr] = $val;",
+                [(truncstorei8 D:$val, P:$ptr)]>;
+
+def STORE8p_imm16: F1<(outs), (ins D:$val, P:$ptr, i32imm:$off),
+                 "b[$ptr + $off] = $val;",
+                 [(truncstorei8 D:$val, (add P:$ptr, imm16:$off))]>;
+
+let Constraints = "$ptr = $ptr_wb" in {
+
+multiclass STORE_incdec<RegisterClass drc, RegisterClass prc,
+                        int off=4, string pre=""> {
+  def _inc : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr),
+                !strconcat(pre, "[$ptr++] = $val;"),
+                [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr, off))]>;
+  def _dec : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr),
+                !strconcat(pre, "[$ptr--] = $val;"),
+                [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr,
+                                               (ineg off)))]>;
+}
+
+defm STORE32p: STORE_incdec<DP, P>;
+defm STORE16i: STORE_incdec<D16, I, 2, "w">;
+defm STORE8p:  STORE_incdec<D, P, 1, "b">;
+
+def STORE32p_post: F1<(outs P:$ptr_wb), (ins D:$val, P:$ptr, P:$off),
+                      "[$ptr ++ $off] = $val;",
+                      [(set P:$ptr_wb, (post_store D:$val, P:$ptr, P:$off))]>;
+
+def STORE16p_post: F1<(outs P:$ptr_wb), (ins D16:$val, P:$ptr, P:$off),
+                      "w[$ptr ++ $off] = $val;",
+                      [(set P:$ptr_wb, (post_store D16:$val, P:$ptr, P:$off))]>;
+}
+
+// Memory stores without patterns
+
+let mayStore = 1 in {
+
+// Note: only works for $fp == FP
+def STORE32fp_nimm7m4: F1<(outs), (ins DP:$val, P:$fp, i32imm:$off),
+                         "[$fp - $off] = $val;", []>;
+
+def STORE32i: F1<(outs), (ins D:$val, I:$ptr),
+                 "[$ptr] = $val;", []>;
+
+def STORE32i_inc: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr),
+                 "[$ptr++] = $val;", []>;
+
+def STORE32i_dec: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr),
+                 "[$ptr--] = $val;", []>;
+
+def STORE32i_post: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr, M:$off),
+                      "[$ptr ++ $off] = $val;", []>;
+}
+
+def : Pat<(truncstorei16 D:$val, PI:$ptr),
+          (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
+                                     lo16), PI:$ptr)>;
+
+def : Pat<(truncstorei16 (srl D:$val, (i16 16)), PI:$ptr),
+          (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
+                                     hi16), PI:$ptr)>;
+
+def : Pat<(truncstorei8 D16L:$val, P:$ptr),
+          (STORE8p (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                                  (i16 (COPY_TO_REGCLASS D16L:$val, D16L)),
+                                  lo16),
+                   P:$ptr)>;
+
+//===----------------------------------------------------------------------===//
+// Table C-11. Move Instructions.
+//===----------------------------------------------------------------------===//
+
+def MOVE: F1<(outs ALL:$dst), (ins ALL:$src),
+             "$dst = $src;",
+             []>;
+
+let Constraints = "$src1 = $dst" in
+def MOVEcc: F1<(outs DP:$dst), (ins DP:$src1, DP:$src2, AnyCC:$cc),
+               "if $cc $dst = $src2;",
+               [(set DP:$dst, (select AnyCC:$cc, DP:$src2, DP:$src1))]>;
+
+let Defs = [AZ, AN, AC0, V] in {
+def MOVEzext: F1<(outs D:$dst), (ins D16L:$src),
+                 "$dst = $src (z);",
+                 [(set D:$dst, (zext D16L:$src))]>;
+
+def MOVEsext: F1<(outs D:$dst), (ins D16L:$src),
+                 "$dst = $src (x);",
+                 [(set D:$dst, (sext D16L:$src))]>;
+
+def MOVEzext8: F1<(outs D:$dst), (ins D:$src),
+                  "$dst = $src.b (z);",
+                  [(set D:$dst, (and D:$src, 0xff))]>;
+
+def MOVEsext8: F1<(outs D:$dst), (ins D:$src),
+                  "$dst = $src.b (x);",
+                  [(set D:$dst, (sext_inreg D:$src, i8))]>;
+
+}
+
+def : Pat<(sext_inreg D16L:$src, i8),
+          (EXTRACT_SUBREG (MOVEsext8
+                           (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                                          D16L:$src,
+                                          lo16)),
+                          lo16)>;
+
+def : Pat<(sext_inreg D:$src, i16),
+          (MOVEsext (EXTRACT_SUBREG D:$src, lo16))>;
+
+def : Pat<(and D:$src, 0xffff),
+          (MOVEzext (EXTRACT_SUBREG D:$src, lo16))>;
+
+def : Pat<(i32 (anyext D16L:$src)),
+          (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                         (i16 (COPY_TO_REGCLASS D16L:$src, D16L)),
+                         lo16)>;
+
+// TODO Dreg = Dreg_byte (X/Z)
+
+// TODO Accumulator moves
+
+//===----------------------------------------------------------------------===//
+// Table C-12. Stack Control Instructions
+//===----------------------------------------------------------------------===//
+
+let Uses = [SP], Defs = [SP] in {
+def PUSH: F1<(outs), (ins ALL:$src),
+             "[--sp] = $src;", []> { let mayStore = 1; }
+
+// NOTE: POP does not work for DP regs, use LOAD instead
+def POP:  F1<(outs ALL:$dst), (ins),
+             "$dst = [sp++];", []> { let mayLoad = 1; }
+}
+
+// TODO: push/pop multiple
+
+def LINK: F2<(outs), (ins i32imm:$amount),
+             "link $amount;", []>;
+
+def UNLINK: F2<(outs), (ins),
+               "unlink;", []>;
+
+//===----------------------------------------------------------------------===//
+// Table C-13. Control Code Bit Management Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SETCC<PatFrag opnode, PatFrag invnode, string cond, string suf=";"> {
+  def dd : F1<(outs JustCC:$cc), (ins D:$a, D:$b),
+              !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+              [(set JustCC:$cc, (opnode  D:$a, D:$b))]>;
+
+  def ri : F1<(outs JustCC:$cc), (ins DP:$a, i32imm:$b),
+              !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+              [(set JustCC:$cc, (opnode  DP:$a, imm3:$b))]>;
+
+  def pp : F1<(outs JustCC:$cc), (ins P:$a, P:$b),
+              !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+              []>;
+
+  def ri_not : F1<(outs NotCC:$cc), (ins DP:$a, i32imm:$b),
+                  !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf),
+                  [(set NotCC:$cc, (invnode  DP:$a, imm3:$b))]>;
+}
+
+defm SETEQ  : SETCC<seteq,  setne,  "==">;
+defm SETLT  : SETCC<setlt,  setge,  "<">;
+defm SETLE  : SETCC<setle,  setgt,  "<=">;
+defm SETULT : SETCC<setult, setuge, "<",  " (iu);">;
+defm SETULE : SETCC<setule, setugt, "<=", " (iu);">;
+
+def SETNEdd : F1<(outs NotCC:$cc), (ins D:$a, D:$b),
+                 "cc = $a == $b;",
+                 [(set NotCC:$cc, (setne  D:$a, D:$b))]>;
+
+def : Pat<(setgt  D:$a, D:$b), (SETLTdd  D:$b, D:$a)>;
+def : Pat<(setge  D:$a, D:$b), (SETLEdd  D:$b, D:$a)>;
+def : Pat<(setugt D:$a, D:$b), (SETULTdd D:$b, D:$a)>;
+def : Pat<(setuge D:$a, D:$b), (SETULEdd D:$b, D:$a)>;
+
+// TODO: compare pointer for P-P comparisons
+// TODO: compare accumulator
+
+let Defs = [AC0] in
+def OR_ac0_cc : F1<(outs), (ins JustCC:$cc),
+                   "ac0 \\|= cc;", []>;
+
+let Uses = [AC0] in
+def MOVE_cc_ac0 : F1<(outs JustCC:$cc), (ins),
+                   "cc = ac0;", []>;
+
+def MOVE_ccncc : F1<(outs JustCC:$cc), (ins NotCC:$sb),
+                    "cc = !cc;", []>;
+
+def MOVE_ncccc : F1<(outs NotCC:$cc), (ins JustCC:$sb),
+                    "cc = !cc;", []>;
+
+def MOVECC_zext : F1<(outs D:$dst), (ins JustCC:$cc),
+                      "$dst = $cc;", []>;
+
+def MOVENCC_z : F1<(outs D:$dst), (ins NotCC:$cc),
+                   "$dst = cc;", []>;
+
+def MOVECC_nz : F1<(outs AnyCC:$cc), (ins D:$src),
+                   "cc = $src;",
+                   [(set AnyCC:$cc, (setne D:$src, 0))]>;
+
+//===----------------------------------------------------------------------===//
+// Table C-14. Logical Operations Instructions
+//===----------------------------------------------------------------------===//
+
+def AND: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+            "$dst = $src1 & $src2;",
+            [(set D:$dst, (and D:$src1, D:$src2))]>;
+
+def NOT: F1<(outs D:$dst), (ins D:$src),
+            "$dst = ~$src;",
+            [(set D:$dst, (not D:$src))]>;
+
+def OR: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+           "$dst = $src1 \\| $src2;",
+           [(set D:$dst, (or D:$src1, D:$src2))]>;
+
+def XOR: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+            "$dst = $src1 ^ $src2;",
+            [(set D:$dst, (xor D:$src1, D:$src2))]>;
+
+// missing: BXOR, BXORSHIFT
+
+//===----------------------------------------------------------------------===//
+// Table C-15. Bit Operations Instructions
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+def BITCLR: F1<(outs D:$dst), (ins D:$src1, uimm5imask:$src2),
+              "bitclr($dst, $src2);",
+              [(set D:$dst, (and D:$src1, uimm5imask:$src2))]>;
+
+def BITSET: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2),
+              "bitset($dst, $src2);",
+              [(set D:$dst, (or D:$src1, uimm5mask:$src2))]>;
+
+def BITTGL: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2),
+              "bittgl($dst, $src2);",
+              [(set D:$dst, (xor D:$src1, uimm5mask:$src2))]>;
+}
+
+def BITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2),
+              "cc = bittst($src1, $src2);",
+              [(set JustCC:$cc, (setne (and D:$src1, uimm5mask:$src2),
+                                       (i32 0)))]>;
+
+def NBITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2),
+               "cc = !bittst($src1, $src2);",
+               [(set JustCC:$cc, (seteq (and D:$src1, uimm5mask:$src2),
+                                        (i32 0)))]>;
+
+// TODO: DEPOSIT, EXTRACT, BITMUX
+
+def ONES: F2<(outs D16L:$dst), (ins D:$src),
+              "$dst = ones $src;",
+              [(set D16L:$dst, (trunc (ctpop D:$src)))]>;
+
+def : Pat<(ctpop D:$src), (MOVEzext (ONES D:$src))>;
+
+//===----------------------------------------------------------------------===//
+// Table C-16. Shift / Rotate Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SHIFT32<SDNode opnode, string ops> {
+  def i : F1<(outs D:$dst), (ins D:$src, i16imm:$amount),
+             !subst("XX", ops, "$dst XX= $amount;"),
+             [(set D:$dst, (opnode D:$src, (i16 uimm5:$amount)))]>;
+  def r : F1<(outs D:$dst), (ins D:$src, D:$amount),
+             !subst("XX", ops, "$dst XX= $amount;"),
+             [(set D:$dst, (opnode D:$src, D:$amount))]>;
+}
+
+let Defs = [AZ, AN, V, VS],
+    Constraints = "$src = $dst" in {
+defm SRA : SHIFT32<sra, ">>>">;
+defm SRL : SHIFT32<srl, ">>">;
+defm SLL : SHIFT32<shl, "<<">;
+}
+
+// TODO: automatic switching between 2-addr and 3-addr (?)
+
+let Defs = [AZ, AN, V, VS] in {
+def SLLr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount),
+             "$dst = lshift $src by $amount;",
+             [(set D:$dst, (shl D:$src, D16L:$amount))]>;
+
+// Arithmetic left-shift = saturing overflow.
+def SLAr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount),
+             "$dst = ashift $src by $amount;",
+             [(set D:$dst, (sra D:$src, (ineg D16L:$amount)))]>;
+
+def SRA16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
+              "$dst = $src >>> $amount;",
+              [(set D16:$dst, (sra D16:$src, (i16 uimm4:$amount)))]>;
+
+def SRL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
+              "$dst = $src >> $amount;",
+              [(set D16:$dst, (srl D16:$src, (i16 uimm4:$amount)))]>;
+
+// Arithmetic left-shift = saturing overflow.
+def SLA16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount),
+              "$dst = ashift $src BY $amount;",
+              [(set D16:$dst, (srl D16:$src, (ineg D16L:$amount)))]>;
+
+def SLL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount),
+              "$dst = $src << $amount;",
+              [(set D16:$dst, (shl D16:$src, (i16 uimm4:$amount)))]>;
+
+def SLL16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount),
+              "$dst = lshift $src by $amount;",
+              [(set D16:$dst, (shl D16:$src, D16L:$amount))]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Table C-17. Arithmetic Operations Instructions
+//===----------------------------------------------------------------------===//
+
+// TODO: ABS
+
+let Defs = [AZ, AN, AC0, V, VS] in {
+
+def ADD: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+            "$dst = $src1 + $src2;",
+            [(set D:$dst, (add D:$src1, D:$src2))]>;
+
+def ADD16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+              "$dst = $src1 + $src2;",
+              [(set D16:$dst, (add D16:$src1, D16:$src2))]>;
+
+let Constraints = "$src1 = $dst" in
+def ADDimm7: F1<(outs D:$dst), (ins D:$src1, i32imm:$src2),
+                "$dst += $src2;",
+                [(set D:$dst, (add D:$src1, imm7:$src2))]>;
+
+def SUB: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+            "$dst = $src1 - $src2;",
+            [(set D:$dst, (sub D:$src1, D:$src2))]>;
+
+def SUB16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+              "$dst = $src1 - $src2;",
+              [(set D16:$dst, (sub D16:$src1, D16:$src2))]>;
+
+}
+
+def : Pat<(addc D:$src1, D:$src2), (ADD D:$src1, D:$src2)>;
+def : Pat<(subc D:$src1, D:$src2), (SUB D:$src1, D:$src2)>;
+
+let Defs = [AZ, AN, V, VS] in
+def NEG: F1<(outs D:$dst), (ins D:$src),
+            "$dst = -$src;",
+            [(set D:$dst, (ineg D:$src))]>;
+
+// No pattern, it would confuse isel to have two i32 = i32+i32 patterns
+def ADDpp: F1<(outs P:$dst), (ins P:$src1, P:$src2),
+              "$dst = $src1 + $src2;", []>;
+
+let Constraints = "$src1 = $dst" in
+def ADDpp_imm7: F1<(outs P:$dst), (ins P:$src1, i32imm:$src2),
+                "$dst += $src2;", []>;
+
+let Defs = [AZ, AN, V] in
+def ADD_RND20: F2<(outs D16:$dst), (ins D:$src1, D:$src2),
+                  "$dst = $src1 + $src2 (rnd20);", []>;
+
+let Defs = [V, VS] in {
+def MUL16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+              "$dst = $src1 * $src2 (is);",
+              [(set D16:$dst, (mul D16:$src1, D16:$src2))]>;
+
+def MULHS16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
+                "$dst = $src1 * $src2 (ih);",
+                [(set D16:$dst, (mulhs D16:$src1, D16:$src2))]>;
+
+def MULhh32s: F2<(outs D:$dst), (ins D16:$src1, D16:$src2),
+                "$dst = $src1 * $src2 (is);",
+                [(set D:$dst, (mul (sext D16:$src1), (sext D16:$src2)))]>;
+
+def MULhh32u: F2<(outs D:$dst), (ins D16:$src1, D16:$src2),
+                "$dst = $src1 * $src2 (is);",
+                [(set D:$dst, (mul (zext D16:$src1), (zext D16:$src2)))]>;
+}
+
+
+let Constraints = "$src1 = $dst" in
+def MUL32: F1<(outs D:$dst), (ins D:$src1, D:$src2),
+            "$dst *= $src2;",
+            [(set D:$dst, (mul D:$src1, D:$src2))]>;
+
+//===----------------------------------------------------------------------===//
+// Table C-18. External Exent Management Instructions
+//===----------------------------------------------------------------------===//
+
+def IDLE : F1<(outs), (ins), "idle;", [(int_bfin_idle)]>;
+def CSYNC : F1<(outs), (ins), "csync;", [(int_bfin_csync)]>;
+def SSYNC : F1<(outs), (ins), "ssync;", [(int_bfin_ssync)]>;
+def EMUEXCPT : F1<(outs), (ins), "emuexcpt;", []>;
+def CLI : F1<(outs D:$mask), (ins), "cli $mask;", []>;
+def STI : F1<(outs), (ins D:$mask), "sti $mask;", []>;
+def RAISE : F1<(outs), (ins i32imm:$itr), "raise $itr;", []>;
+def EXCPT : F1<(outs), (ins i32imm:$exc), "excpt $exc;", []>;
+def NOP : F1<(outs), (ins), "nop;", []>;
+def MNOP : F2<(outs), (ins), "mnop;", []>;
+def ABORT : F1<(outs), (ins), "abort;", []>;
+
+//===----------------------------------------------------------------------===//
+// Table C-19. Cache Control Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Table C-20. Video Pixel Operations Instructions
+//===----------------------------------------------------------------------===//
+
+def ALIGN8 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
+                "$dst = align8($src1, $src2);",
+                [(set D:$dst, (or (shl D:$src1, (i32 24)),
+                                  (srl D:$src2, (i32 8))))]>;
+
+def ALIGN16 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
+                 "$dst = align16($src1, $src2);",
+                 [(set D:$dst, (or (shl D:$src1, (i32 16)),
+                                   (srl D:$src2, (i32 16))))]>;
+
+def ALIGN24 : F2<(outs D:$dst), (ins D:$src1, D:$src2),
+                 "$dst = align16($src1, $src2);",
+                 [(set D:$dst, (or (shl D:$src1, (i32 8)),
+                                   (srl D:$src2, (i32 24))))]>;
+
+def DISALGNEXCPT : F2<(outs), (ins), "disalignexcpt;", []>;
+
+// TODO: BYTEOP3P, BYTEOP16P, BYTEOP1P, BYTEOP2P, BYTEOP16M, SAA,
+//       BYTEPACK, BYTEUNPACK
+
+// Table C-21. Vector Operations Instructions
+
+// Patterns
+def : Pat<(BfinCall (i32 tglobaladdr:$dst)),
+          (CALLa tglobaladdr:$dst)>;
+def : Pat<(BfinCall (i32 texternalsym:$dst)),
+          (CALLa texternalsym:$dst)>;
+def : Pat<(i16 (trunc D:$src)),
+          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), lo16)>;
diff --git a/final/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/final/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
new file mode 100644
index 00000000000..34a8d3809ea
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
@@ -0,0 +1,104 @@
+//===- BlackfinIntrinsicInfo.cpp - Intrinsic Information --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinIntrinsicInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+
+using namespace llvm;
+
+namespace bfinIntrinsic {
+
+  enum ID {
+    last_non_bfin_intrinsic = Intrinsic::num_intrinsics-1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+    , num_bfin_intrinsics
+  };
+
+}
+
+std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
+                                           unsigned numTys) const {
+  static const char *const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+  };
+
+  assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded");
+  if (IntrID < Intrinsic::num_intrinsics)
+    return 0;
+  assert(IntrID < bfinIntrinsic::num_bfin_intrinsics && "Invalid intrinsic ID");
+
+  std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+  return Result;
+}
+
+unsigned
+BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const {
+  if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+      || Name[2] != 'v' || Name[3] != 'm')
+    return 0;  // All intrinsics start with 'llvm.'
+
+#define GET_FUNCTION_RECOGNIZER
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+  return 0;
+}
+
+bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const {
+  // Overload Table
+  const bool OTable[] = {
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+  };
+  if (IntrID == 0)
+    return false;
+  else
+    return OTable[IntrID - Intrinsic::num_intrinsics];
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+static const FunctionType *getType(LLVMContext &Context, unsigned id) {
+  const Type *ResultTy = NULL;
+  std::vector<const Type*> ArgTys;
+  bool IsVarArg = false;
+  
+#define GET_INTRINSIC_GENERATOR
+#include "BlackfinGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+
+  return FunctionType::get(ResultTy, ArgTys, IsVarArg); 
+}
+
+Function *BlackfinIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+                                                const Type **Tys,
+                                                unsigned numTy) const {
+  assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded");
+  AttrListPtr AList = getAttributes((bfinIntrinsic::ID) IntrID);
+  return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+                                               getType(M->getContext(), IntrID),
+                                               AList));
+}
diff --git a/final/lib/Target/Blackfin/BlackfinIntrinsicInfo.h b/final/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
new file mode 100644
index 00000000000..7c4b5a9967d
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinIntrinsicInfo.h
@@ -0,0 +1,32 @@
+//===- BlackfinIntrinsicInfo.h - Blackfin Intrinsic Information -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef BLACKFININTRINSICS_H
+#define BLACKFININTRINSICS_H
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+
+  class BlackfinIntrinsicInfo : public TargetIntrinsicInfo {
+  public:
+    std::string getName(unsigned IntrID, const Type **Tys = 0,
+                        unsigned numTys = 0) const;
+    unsigned lookupName(const char *Name, unsigned Len) const;
+    bool isOverloaded(unsigned IID) const;
+    Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0,
+                             unsigned numTys = 0) const;
+  };
+
+}
+
+#endif
diff --git a/final/lib/Target/Blackfin/BlackfinIntrinsics.td b/final/lib/Target/Blackfin/BlackfinIntrinsics.td
new file mode 100644
index 00000000000..ce21b082376
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinIntrinsics.td
@@ -0,0 +1,34 @@
+//===- BlackfinIntrinsics.td - Defines Blackfin intrinsics -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the blackfin-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "bfin", isTarget = 1 in {
+
+//===----------------------------------------------------------------------===//
+// Core synchronisation etc.
+//
+// These intrinsics have sideeffects. Each represent a single instruction, but
+// workarounds are sometimes required depending on the cpu.
+
+// Execute csync instruction with workarounds
+def int_bfin_csync : GCCBuiltin<"__builtin_bfin_csync">,
+        Intrinsic<[]>;
+
+// Execute ssync instruction with workarounds
+def int_bfin_ssync : GCCBuiltin<"__builtin_bfin_ssync">,
+        Intrinsic<[]>;
+
+// Execute idle instruction with workarounds
+def int_bfin_idle : GCCBuiltin<"__builtin_bfin_idle">,
+        Intrinsic<[]>;
+
+}
diff --git a/final/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp b/final/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
new file mode 100644
index 00000000000..5b9d4a29794
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
@@ -0,0 +1,22 @@
+//===-- BlackfinMCAsmInfo.cpp - Blackfin asm properties -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the BlackfinMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinMCAsmInfo.h"
+
+using namespace llvm;
+
+BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, StringRef TT) {
+  GlobalPrefix = "_";
+  CommentString = "//";
+  HasSetDirective = false;
+}
diff --git a/final/lib/Target/Blackfin/BlackfinMCAsmInfo.h b/final/lib/Target/Blackfin/BlackfinMCAsmInfo.h
new file mode 100644
index 00000000000..c372aa247e0
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinMCAsmInfo.h
@@ -0,0 +1,29 @@
+//===-- BlackfinMCAsmInfo.h - Blackfin asm properties ---------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the BlackfinMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINTARGETASMINFO_H
+#define BLACKFINTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+
+  struct BlackfinMCAsmInfo : public MCAsmInfo {
+    explicit BlackfinMCAsmInfo(const Target &T, StringRef TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/final/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/final/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
new file mode 100644
index 00000000000..b4a9b84f9e4
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -0,0 +1,355 @@
+//===- BlackfinRegisterInfo.cpp - Blackfin Register Information -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Blackfin.h"
+#include "BlackfinRegisterInfo.h"
+#include "BlackfinSubtarget.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st,
+                                           const TargetInstrInfo &tii)
+  : BlackfinGenRegisterInfo(BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP),
+    Subtarget(st),
+    TII(tii) {}
+
+const unsigned*
+BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  using namespace BF;
+  static const unsigned CalleeSavedRegs[] = {
+    FP,
+    R4, R5, R6, R7,
+    P3, P4, P5,
+    0 };
+  return  CalleeSavedRegs;
+}
+
+BitVector
+BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  using namespace BF;
+  BitVector Reserved(getNumRegs());
+  Reserved.set(AZ);
+  Reserved.set(AN);
+  Reserved.set(AQ);
+  Reserved.set(AC0);
+  Reserved.set(AC1);
+  Reserved.set(AV0);
+  Reserved.set(AV0S);
+  Reserved.set(AV1);
+  Reserved.set(AV1S);
+  Reserved.set(V);
+  Reserved.set(VS);
+  Reserved.set(CYCLES).set(CYCLES2);
+  Reserved.set(L0);
+  Reserved.set(L1);
+  Reserved.set(L2);
+  Reserved.set(L3);
+  Reserved.set(SP);
+  Reserved.set(RETS);
+  if (TFI->hasFP(MF))
+    Reserved.set(FP);
+  return Reserved;
+}
+
+bool BlackfinRegisterInfo::
+requiresRegisterScavenging(const MachineFunction &MF) const {
+  return true;
+}
+
+// Emit instructions to add delta to D/P register. ScratchReg must be of the
+// same class as Reg (P).
+void BlackfinRegisterInfo::adjustRegister(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator I,
+                                          DebugLoc DL,
+                                          unsigned Reg,
+                                          unsigned ScratchReg,
+                                          int delta) const {
+  if (!delta)
+    return;
+  if (isInt<7>(delta)) {
+    BuildMI(MBB, I, DL, TII.get(BF::ADDpp_imm7), Reg)
+      .addReg(Reg)              // No kill on two-addr operand
+      .addImm(delta);
+    return;
+  }
+
+  // We must load delta into ScratchReg and add that.
+  loadConstant(MBB, I, DL, ScratchReg, delta);
+  if (BF::PRegClass.contains(Reg)) {
+    assert(BF::PRegClass.contains(ScratchReg) &&
+           "ScratchReg must be a P register");
+    BuildMI(MBB, I, DL, TII.get(BF::ADDpp), Reg)
+      .addReg(Reg, RegState::Kill)
+      .addReg(ScratchReg, RegState::Kill);
+  } else {
+    assert(BF::DRegClass.contains(Reg) && "Reg must be a D or P register");
+    assert(BF::DRegClass.contains(ScratchReg) &&
+           "ScratchReg must be a D register");
+    BuildMI(MBB, I, DL, TII.get(BF::ADD), Reg)
+      .addReg(Reg, RegState::Kill)
+      .addReg(ScratchReg, RegState::Kill);
+  }
+}
+
+// Emit instructions to load a constant into D/P register
+void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator I,
+                                        DebugLoc DL,
+                                        unsigned Reg,
+                                        int value) const {
+  if (isInt<7>(value)) {
+    BuildMI(MBB, I, DL, TII.get(BF::LOADimm7), Reg).addImm(value);
+    return;
+  }
+
+  if (isUInt<16>(value)) {
+    BuildMI(MBB, I, DL, TII.get(BF::LOADuimm16), Reg).addImm(value);
+    return;
+  }
+
+  if (isInt<16>(value)) {
+    BuildMI(MBB, I, DL, TII.get(BF::LOADimm16), Reg).addImm(value);
+    return;
+  }
+
+  // We must split into halves
+  BuildMI(MBB, I, DL,
+          TII.get(BF::LOAD16i), getSubReg(Reg, BF::hi16))
+    .addImm((value >> 16) & 0xffff)
+    .addReg(Reg, RegState::ImplicitDefine);
+  BuildMI(MBB, I, DL,
+          TII.get(BF::LOAD16i), getSubReg(Reg, BF::lo16))
+    .addImm(value & 0xffff)
+    .addReg(Reg, RegState::ImplicitKill)
+    .addReg(Reg, RegState::ImplicitDefine);
+}
+
+void BlackfinRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF,
+                              MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!TFI->hasReservedCallFrame(MF)) {
+    int64_t Amount = I->getOperand(0).getImm();
+    if (Amount != 0) {
+      assert(Amount%4 == 0 && "Unaligned call frame size");
+      if (I->getOpcode() == BF::ADJCALLSTACKDOWN) {
+        adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, -Amount);
+      } else {
+        assert(I->getOpcode() == BF::ADJCALLSTACKUP &&
+               "Unknown call frame pseudo instruction");
+        adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, Amount);
+      }
+    }
+  }
+  MBB.erase(I);
+}
+
+/// findScratchRegister - Find a 'free' register. Try for a call-clobbered
+/// register first and then a spilled callee-saved register if that fails.
+static unsigned findScratchRegister(MachineBasicBlock::iterator II,
+                                    RegScavenger *RS,
+                                    const TargetRegisterClass *RC,
+                                    int SPAdj) {
+  assert(RS && "Register scavenging must be on");
+  unsigned Reg = RS->FindUnusedReg(RC);
+  if (Reg == 0)
+    Reg = RS->scavengeRegister(RC, II, SPAdj);
+  return Reg;
+}
+
+void
+BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                          int SPAdj, RegScavenger *RS) const {
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  DebugLoc DL = MI.getDebugLoc();
+
+  unsigned FIPos;
+  for (FIPos=0; !MI.getOperand(FIPos).isFI(); ++FIPos) {
+    assert(FIPos < MI.getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+  int FrameIndex = MI.getOperand(FIPos).getIndex();
+  assert(FIPos+1 < MI.getNumOperands() && MI.getOperand(FIPos+1).isImm());
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex)
+    + MI.getOperand(FIPos+1).getImm();
+  unsigned BaseReg = BF::FP;
+  if (TFI->hasFP(MF)) {
+    assert(SPAdj==0 && "Unexpected SP adjust in function with frame pointer");
+  } else {
+    BaseReg = BF::SP;
+    Offset += MF.getFrameInfo()->getStackSize() + SPAdj;
+  }
+
+  bool isStore = false;
+
+  switch (MI.getOpcode()) {
+  case BF::STORE32fi:
+    isStore = true;
+  case BF::LOAD32fi: {
+    assert(Offset%4 == 0 && "Unaligned i32 stack access");
+    assert(FIPos==1 && "Bad frame index operand");
+    MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
+    MI.getOperand(FIPos+1).setImm(Offset);
+    if (isUInt<6>(Offset)) {
+      MI.setDesc(TII.get(isStore
+                         ? BF::STORE32p_uimm6m4
+                         : BF::LOAD32p_uimm6m4));
+      return;
+    }
+    if (BaseReg == BF::FP && isUInt<7>(-Offset)) {
+      MI.setDesc(TII.get(isStore
+                         ? BF::STORE32fp_nimm7m4
+                         : BF::LOAD32fp_nimm7m4));
+      MI.getOperand(FIPos+1).setImm(-Offset);
+      return;
+    }
+    if (isInt<18>(Offset)) {
+      MI.setDesc(TII.get(isStore
+                         ? BF::STORE32p_imm18m4
+                         : BF::LOAD32p_imm18m4));
+      return;
+    }
+    // Use RegScavenger to calculate proper offset...
+    MI.dump();
+    llvm_unreachable("Stack frame offset too big");
+    break;
+  }
+  case BF::ADDpp: {
+    assert(MI.getOperand(0).isReg() && "ADD instruction needs a register");
+    unsigned DestReg = MI.getOperand(0).getReg();
+    // We need to produce a stack offset in a P register. We emit:
+    // P0 = offset;
+    // P0 = BR + P0;
+    assert(FIPos==1 && "Bad frame index operand");
+    loadConstant(MBB, II, DL, DestReg, Offset);
+    MI.getOperand(1).ChangeToRegister(DestReg, false, false, true);
+    MI.getOperand(2).ChangeToRegister(BaseReg, false);
+    break;
+  }
+  case BF::STORE16fi:
+    isStore = true;
+  case BF::LOAD16fi: {
+    assert(Offset%2 == 0 && "Unaligned i16 stack access");
+    assert(FIPos==1 && "Bad frame index operand");
+    // We need a P register to use as an address
+    unsigned ScratchReg = findScratchRegister(II, RS, &BF::PRegClass, SPAdj);
+    assert(ScratchReg && "Could not scavenge register");
+    loadConstant(MBB, II, DL, ScratchReg, Offset);
+    BuildMI(MBB, II, DL, TII.get(BF::ADDpp), ScratchReg)
+      .addReg(ScratchReg, RegState::Kill)
+      .addReg(BaseReg);
+    MI.setDesc(TII.get(isStore ? BF::STORE16pi : BF::LOAD16pi));
+    MI.getOperand(1).ChangeToRegister(ScratchReg, false, false, true);
+    MI.RemoveOperand(2);
+    break;
+  }
+  case BF::STORE8fi: {
+    // This is an AnyCC spill, we need a scratch register.
+    assert(FIPos==1 && "Bad frame index operand");
+    MachineOperand SpillReg = MI.getOperand(0);
+    unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj);
+    assert(ScratchReg && "Could not scavenge register");
+    if (SpillReg.getReg()==BF::NCC) {
+      BuildMI(MBB, II, DL, TII.get(BF::MOVENCC_z), ScratchReg)
+        .addOperand(SpillReg);
+      BuildMI(MBB, II, DL, TII.get(BF::BITTGL), ScratchReg)
+        .addReg(ScratchReg).addImm(0);
+    } else {
+      BuildMI(MBB, II, DL, TII.get(BF::MOVECC_zext), ScratchReg)
+        .addOperand(SpillReg);
+    }
+    // STORE D
+    MI.setDesc(TII.get(BF::STORE8p_imm16));
+    MI.getOperand(0).ChangeToRegister(ScratchReg, false, false, true);
+    MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
+    MI.getOperand(FIPos+1).setImm(Offset);
+    break;
+  }
+  case BF::LOAD8fi: {
+    // This is an restore, we need a scratch register.
+    assert(FIPos==1 && "Bad frame index operand");
+    MachineOperand SpillReg = MI.getOperand(0);
+    unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj);
+    assert(ScratchReg && "Could not scavenge register");
+    MI.setDesc(TII.get(BF::LOAD32p_imm16_8z));
+    MI.getOperand(0).ChangeToRegister(ScratchReg, true);
+    MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
+    MI.getOperand(FIPos+1).setImm(Offset);
+    ++II;
+    if (SpillReg.getReg()==BF::CC) {
+      // CC = D
+      BuildMI(MBB, II, DL, TII.get(BF::MOVECC_nz), BF::CC)
+        .addReg(ScratchReg, RegState::Kill);
+    } else {
+      // Restore NCC (CC = D==0)
+      BuildMI(MBB, II, DL, TII.get(BF::SETEQri_not), BF::NCC)
+        .addReg(ScratchReg, RegState::Kill)
+        .addImm(0);
+    }
+    break;
+  }
+  default:
+    llvm_unreachable("Cannot eliminate frame index");
+    break;
+  }
+}
+
+unsigned BlackfinRegisterInfo::getRARegister() const {
+  return BF::RETS;
+}
+
+unsigned
+BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? BF::FP : BF::SP;
+}
+
+unsigned BlackfinRegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned BlackfinRegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+int BlackfinRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  llvm_unreachable("What is the dwarf register number");
+  return -1;
+}
+
+#include "BlackfinGenRegisterInfo.inc"
+
diff --git a/final/lib/Target/Blackfin/BlackfinRegisterInfo.h b/final/lib/Target/Blackfin/BlackfinRegisterInfo.h
new file mode 100644
index 00000000000..642b8adaf9b
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -0,0 +1,80 @@
+//===- BlackfinRegisterInfo.h - Blackfin Register Information ..-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Blackfin implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINREGISTERINFO_H
+#define BLACKFINREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "BlackfinGenRegisterInfo.h.inc"
+
+namespace llvm {
+
+  class BlackfinSubtarget;
+  class TargetInstrInfo;
+  class Type;
+
+  struct BlackfinRegisterInfo : public BlackfinGenRegisterInfo {
+    BlackfinSubtarget &Subtarget;
+    const TargetInstrInfo &TII;
+
+    BlackfinRegisterInfo(BlackfinSubtarget &st, const TargetInstrInfo &tii);
+
+    /// Code Generation virtual methods...
+    const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+    BitVector getReservedRegs(const MachineFunction &MF) const;
+
+    // getSubReg implemented by tablegen
+
+    const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const {
+      return &BF::PRegClass;
+    }
+
+    // bool hasReservedCallFrame(MachineFunction &MF) const;
+
+    bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+    void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                       MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator I) const;
+
+    void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                             int SPAdj, RegScavenger *RS = NULL) const;
+
+    unsigned getFrameRegister(const MachineFunction &MF) const;
+    unsigned getRARegister() const;
+
+    // Exception handling queries.
+    unsigned getEHExceptionRegister() const;
+    unsigned getEHHandlerRegister() const;
+
+    int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+    // Utility functions
+    void adjustRegister(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator I,
+                        DebugLoc DL,
+                        unsigned Reg,
+                        unsigned ScratchReg,
+                        int delta) const;
+    void loadConstant(MachineBasicBlock &MBB,
+                      MachineBasicBlock::iterator I,
+                      DebugLoc DL,
+                      unsigned Reg,
+                      int value) const;
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/Blackfin/BlackfinRegisterInfo.td b/final/lib/Target/Blackfin/BlackfinRegisterInfo.td
new file mode 100644
index 00000000000..f5dd439a811
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinRegisterInfo.td
@@ -0,0 +1,365 @@
+//===- BlackfinRegisterInfo.td - Blackfin Register defs ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the Blackfin register file
+//===----------------------------------------------------------------------===//
+
+// Subregs are:
+// 1: .L
+// 2: .H
+// 3: .W (32 low bits of 40-bit accu)
+let Namespace = "BF" in {
+def lo16 : SubRegIndex;
+def hi16 : SubRegIndex;
+def lo32 : SubRegIndex;
+def hi32 : SubRegIndex;
+}
+
+// Registers are identified with 3-bit group and 3-bit ID numbers.
+class BlackfinReg<string n> : Register<n> {
+  field bits<3> Group;
+  field bits<3> Num;
+  let Namespace = "BF";
+}
+
+// Rc - 1-bit registers
+class Rc<bits<5> bitno, string n> : BlackfinReg<n> {
+  field bits<5> BitNum = bitno;
+}
+
+// Rs - 16-bit integer registers
+class Rs<bits<3> group, bits<3> num, bits<1> hi, string n> : BlackfinReg<n> {
+  let Group = group;
+  let Num = num;
+  field bits<1> High = hi;
+}
+
+// Ri - 32-bit integer registers with subregs
+class Ri<bits<3> group, bits<3> num, string n> : BlackfinReg<n> {
+  let Group = group;
+  let Num = num;
+}
+
+// Ra 40-bit accumulator registers
+class Ra<bits<3> num, string n, list<Register> subs> : BlackfinReg<n> {
+  let SubRegs = subs;
+  let SubRegIndices = [hi32, lo32];
+  let Group = 4;
+  let Num = num;
+}
+
+// Two halves of 32-bit register
+multiclass Rss<bits<3> group, bits<3> num, string n> {
+  def H : Rs<group, num, 1, !strconcat(n, ".h")>;
+  def L : Rs<group, num, 0, !strconcat(n, ".l")>;
+}
+
+// Rii - 32-bit integer registers with subregs
+class Rii<bits<3> group, bits<3> num, string n, list<Register> subs>
+      : BlackfinReg<n> {
+  let SubRegs = subs;
+  let SubRegIndices = [hi16, lo16];
+  let Group = group;
+  let Num = num;
+}
+
+// Status bits are all part of ASTAT
+def AZ   : Rc<0,  "az">;
+def AN   : Rc<1,  "an">;
+def CC   : Rc<5,  "cc">, DwarfRegNum<[34]>;
+def NCC  : Rc<5,  "!cc"> { let Aliases = [CC]; }
+def AQ   : Rc<6,  "aq">;
+def AC0  : Rc<12, "ac0">;
+def AC1  : Rc<13, "ac1">;
+def AV0  : Rc<16, "av0">;
+def AV0S : Rc<17, "av0s">;
+def AV1  : Rc<18, "av1">;
+def AV1S : Rc<19, "av1s">;
+def V    : Rc<24, "v">;
+def VS   : Rc<25, "vs">;
+// Skipped non-status bits: AC0_COPY, V_COPY, RND_MOD
+
+// Group 0: Integer registers
+defm R0 : Rss<0, 0, "r0">;
+def  R0 : Rii<0, 0, "r0", [R0H, R0L]>, DwarfRegNum<[0]>;
+defm R1 : Rss<0, 1, "r1">;
+def  R1 : Rii<0, 1, "r1", [R1H, R1L]>, DwarfRegNum<[1]>;
+defm R2 : Rss<0, 2, "r2">;
+def  R2 : Rii<0, 2, "r2", [R2H, R2L]>, DwarfRegNum<[2]>;
+defm R3 : Rss<0, 3, "r3">;
+def  R3 : Rii<0, 3, "r3", [R3H, R3L]>, DwarfRegNum<[3]>;
+defm R4 : Rss<0, 4, "r4">;
+def  R4 : Rii<0, 4, "r4", [R4H, R4L]>, DwarfRegNum<[4]>;
+defm R5 : Rss<0, 5, "r5">;
+def  R5 : Rii<0, 5, "r5", [R5H, R5L]>, DwarfRegNum<[5]>;
+defm R6 : Rss<0, 6, "r6">;
+def  R6 : Rii<0, 6, "r6", [R6H, R6L]>, DwarfRegNum<[6]>;
+defm R7 : Rss<0, 7, "r7">;
+def  R7 : Rii<0, 7, "r7", [R7H, R7L]>, DwarfRegNum<[7]>;
+
+// Group 1: Pointer registers
+defm P0 : Rss<1, 0, "p0">;
+def  P0 : Rii<1, 0, "p0", [P0H, P0L]>, DwarfRegNum<[8]>;
+defm P1 : Rss<1, 1, "p1">;
+def  P1 : Rii<1, 1, "p1", [P1H, P1L]>, DwarfRegNum<[9]>;
+defm P2 : Rss<1, 2, "p2">;
+def  P2 : Rii<1, 2, "p2", [P2H, P2L]>, DwarfRegNum<[10]>;
+defm P3 : Rss<1, 3, "p3">;
+def  P3 : Rii<1, 3, "p3", [P3H, P3L]>, DwarfRegNum<[11]>;
+defm P4 : Rss<1, 4, "p4">;
+def  P4 : Rii<1, 4, "p4", [P4H, P4L]>, DwarfRegNum<[12]>;
+defm P5 : Rss<1, 5, "p5">;
+def  P5 : Rii<1, 5, "p5", [P5H, P5L]>, DwarfRegNum<[13]>;
+defm SP : Rss<1, 6, "sp">;
+def  SP : Rii<1, 6, "sp", [SPH, SPL]>, DwarfRegNum<[14]>;
+defm FP : Rss<1, 7, "fp">;
+def  FP : Rii<1, 7, "fp", [FPH, FPL]>, DwarfRegNum<[15]>;
+
+// Group 2: Index registers
+defm I0 : Rss<2, 0, "i0">;
+def  I0 : Rii<2, 0, "i0", [I0H, I0L]>, DwarfRegNum<[16]>;
+defm I1 : Rss<2, 1, "i1">;
+def  I1 : Rii<2, 1, "i1", [I1H, I1L]>, DwarfRegNum<[17]>;
+defm I2 : Rss<2, 2, "i2">;
+def  I2 : Rii<2, 2, "i2", [I2H, I2L]>, DwarfRegNum<[18]>;
+defm I3 : Rss<2, 3, "i3">;
+def  I3 : Rii<2, 3, "i3", [I3H, I3L]>, DwarfRegNum<[19]>;
+defm M0 : Rss<2, 4, "m0">;
+def  M0 : Rii<2, 4, "m0", [M0H, M0L]>, DwarfRegNum<[20]>;
+defm M1 : Rss<2, 5, "m1">;
+def  M1 : Rii<2, 5, "m1", [M1H, M1L]>, DwarfRegNum<[21]>;
+defm M2 : Rss<2, 6, "m2">;
+def  M2 : Rii<2, 6, "m2", [M2H, M2L]>, DwarfRegNum<[22]>;
+defm M3 : Rss<2, 7, "m3">;
+def  M3 : Rii<2, 7, "m3", [M3H, M3L]>, DwarfRegNum<[23]>;
+
+// Group 3: Cyclic indexing registers
+defm B0 : Rss<3, 0, "b0">;
+def  B0 : Rii<3, 0, "b0", [B0H, B0L]>, DwarfRegNum<[24]>;
+defm B1 : Rss<3, 1, "b1">;
+def  B1 : Rii<3, 1, "b1", [B1H, B1L]>, DwarfRegNum<[25]>;
+defm B2 : Rss<3, 2, "b2">;
+def  B2 : Rii<3, 2, "b2", [B2H, B2L]>, DwarfRegNum<[26]>;
+defm B3 : Rss<3, 3, "b3">;
+def  B3 : Rii<3, 3, "b3", [B3H, B3L]>, DwarfRegNum<[27]>;
+defm L0 : Rss<3, 4, "l0">;
+def  L0 : Rii<3, 4, "l0", [L0H, L0L]>, DwarfRegNum<[28]>;
+defm L1 : Rss<3, 5, "l1">;
+def  L1 : Rii<3, 5, "l1", [L1H, L1L]>, DwarfRegNum<[29]>;
+defm L2 : Rss<3, 6, "l2">;
+def  L2 : Rii<3, 6, "l2", [L2H, L2L]>, DwarfRegNum<[30]>;
+defm L3 : Rss<3, 7, "l3">;
+def  L3 : Rii<3, 7, "l3", [L3H, L3L]>, DwarfRegNum<[31]>;
+
+// Accumulators
+def  A0X : Ri <4, 0, "a0.x">;
+defm A0  : Rss<4, 1, "a0">;
+def  A0W : Rii<4, 1, "a0.w", [A0H, A0L]>, DwarfRegNum<[32]>;
+def  A0  : Ra <0, "a0", [A0X, A0W]>;
+
+def  A1X : Ri <4, 2, "a1.x">;
+defm A1  : Rss<4, 3, "a1">;
+def  A1W : Rii<4, 3, "a1.w", [A1H, A1L]>, DwarfRegNum<[33]>;
+def  A1  : Ra <2, "a1", [A1X, A1W]>;
+
+def RETS : Ri<4, 7, "rets">,  DwarfRegNum<[35]>;
+def RETI : Ri<7, 3, "reti">,  DwarfRegNum<[36]>;
+def RETX : Ri<7, 4, "retx">,  DwarfRegNum<[37]>;
+def RETN : Ri<7, 5, "retn">,  DwarfRegNum<[38]>;
+def RETE : Ri<7, 6, "rete">,  DwarfRegNum<[39]>;
+
+def ASTAT   : Ri<4, 6, "astat">,   DwarfRegNum<[40]> {
+  let Aliases = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS];
+}
+
+def SEQSTAT : Ri<7, 1, "seqstat">, DwarfRegNum<[41]>;
+def USP     : Ri<7, 0, "usp">,     DwarfRegNum<[42]>;
+def EMUDAT  : Ri<7, 7, "emudat">,  DwarfRegNum<[43]>;
+def SYSCFG  : Ri<7, 2, "syscfg">;
+def CYCLES  : Ri<6, 6, "cycles">;
+def CYCLES2 : Ri<6, 7, "cycles2">;
+
+// Hardware loops
+def LT0 : Ri<6, 1, "lt0">, DwarfRegNum<[44]>;
+def LT1 : Ri<6, 4, "lt1">, DwarfRegNum<[45]>;
+def LC0 : Ri<6, 0, "lc0">, DwarfRegNum<[46]>;
+def LC1 : Ri<6, 3, "lc1">, DwarfRegNum<[47]>;
+def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>;
+def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>;
+
+// Register classes.
+def D16 : RegisterClass<"BF", [i16], 16,
+    [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L,
+     R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L]>;
+
+def D16L : RegisterClass<"BF", [i16], 16,
+    [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L]>;
+
+def D16H : RegisterClass<"BF", [i16], 16,
+    [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H]>;
+
+def P16 : RegisterClass<"BF", [i16], 16,
+    [P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L,
+     P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL]>;
+
+def P16L : RegisterClass<"BF", [i16], 16,
+    [P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL]>;
+
+def P16H : RegisterClass<"BF", [i16], 16,
+    [P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH]>;
+
+def DP16 : RegisterClass<"BF", [i16], 16,
+    [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L,
+     R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L,
+     P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L,
+     P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL]>;
+
+def DP16L : RegisterClass<"BF", [i16], 16,
+    [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L,
+     P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL]>;
+
+def DP16H : RegisterClass<"BF", [i16], 16,
+    [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H,
+     P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH]>;
+
+def GR16 : RegisterClass<"BF", [i16], 16,
+    [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L,
+     R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L,
+     P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L,
+     P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL,
+     I0H, I0L, I1H, I1L, I2H, I2L, I3H, I3L,
+     M0H, M0L, M1H, M1L, M2H, M2L, M3H, M3L,
+     B0H, B0L, B1H, B1L, B2H, B2L, B3H, B3L,
+     L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L]>;
+
+def D : RegisterClass<"BF", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
+  let SubRegClasses = [(D16L lo16), (D16H hi16)];
+}
+
+def P : RegisterClass<"BF", [i32], 32, [P0, P1, P2, P3, P4, P5, FP, SP]> {
+  let SubRegClasses = [(P16L lo16), (P16H hi16)];
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    PClass::iterator
+    PClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      return allocation_order_begin(MF)
+             + (TFI->hasFP(MF) ? 7 : 6);
+    }
+  }];
+}
+
+def I : RegisterClass<"BF", [i32], 32, [I0, I1, I2, I3]>;
+def M : RegisterClass<"BF", [i32], 32, [M0, M1, M2, M3]>;
+def B : RegisterClass<"BF", [i32], 32, [B0, B1, B2, B3]>;
+def L : RegisterClass<"BF", [i32], 32, [L0, L1, L2, L3]>;
+
+def DP : RegisterClass<"BF", [i32], 32,
+    [R0, R1, R2, R3, R4, R5, R6, R7,
+     P0, P1, P2, P3, P4, P5, FP, SP]> {
+  let SubRegClasses = [(DP16L lo16), (DP16H hi16)];
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    DPClass::iterator
+    DPClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      return allocation_order_begin(MF)
+             + (TFI->hasFP(MF) ? 15 : 14);
+    }
+  }];
+}
+
+def GR : RegisterClass<"BF", [i32], 32,
+    [R0, R1, R2, R3, R4, R5, R6, R7,
+     P0, P1, P2, P3, P4, P5,
+     I0, I1, I2, I3, M0, M1, M2, M3,
+     B0, B1, B2, B3, L0, L1, L2, L3,
+     FP, SP]> {
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GRClass::iterator
+    GRClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      return allocation_order_begin(MF)
+             + (TFI->hasFP(MF) ? 31 : 30);
+    }
+  }];
+}
+
+def ALL : RegisterClass<"BF", [i32], 32,
+    [R0, R1, R2, R3, R4, R5, R6, R7,
+     P0, P1, P2, P3, P4, P5,
+     I0, I1, I2, I3, M0, M1, M2, M3,
+     B0, B1, B2, B3, L0, L1, L2, L3,
+     FP, SP,
+     A0X, A0W, A1X, A1W, ASTAT, RETS,
+     LC0, LT0, LB0, LC1, LT1, LB1, CYCLES, CYCLES2,
+     USP, SEQSTAT, SYSCFG, RETI, RETX, RETN, RETE, EMUDAT]> {
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    ALLClass::iterator
+    ALLClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      return allocation_order_begin(MF)
+             + (TFI->hasFP(MF) ? 31 : 30);
+    }
+  }];
+}
+
+def PI : RegisterClass<"BF", [i32], 32,
+    [P0, P1, P2, P3, P4, P5, I0, I1, I2, I3, FP, SP]> {
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    PIClass::iterator
+    PIClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      return allocation_order_begin(MF)
+             + (TFI->hasFP(MF) ? 11 : 10);
+    }
+  }];
+}
+
+// We are going to pretend that CC and !CC are 32-bit registers, even though
+// they only can hold 1 bit.
+let CopyCost = -1, Size = 8 in {
+def JustCC  : RegisterClass<"BF", [i32], 8, [CC]>;
+def NotCC   : RegisterClass<"BF", [i32], 8, [NCC]>;
+def AnyCC   : RegisterClass<"BF", [i32], 8, [CC, NCC]> {
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    AnyCCClass::iterator
+    AnyCCClass::allocation_order_end(const MachineFunction &MF) const {
+      return allocation_order_begin(MF)+1;
+    }
+  }];
+}
+def StatBit : RegisterClass<"BF", [i1], 8,
+    [AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS]>;
+}
+
+// Should be i40, but that isn't defined. It is not a legal type yet anyway.
+def Accu : RegisterClass<"BF", [i64], 64, [A0, A1]>;
diff --git a/final/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp b/final/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
new file mode 100644
index 00000000000..a21f696a62e
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
@@ -0,0 +1,24 @@
+//===-- BlackfinSelectionDAGInfo.cpp - Blackfin SelectionDAG Info ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BlackfinSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "blackfin-selectiondag-info"
+#include "BlackfinTargetMachine.h"
+using namespace llvm;
+
+BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo(
+                                              const BlackfinTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() {
+}
diff --git a/final/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h b/final/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
new file mode 100644
index 00000000000..f1ce3482f90
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- BlackfinSelectionDAGInfo.h - Blackfin SelectionDAG Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Blackfin subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINSELECTIONDAGINFO_H
+#define BLACKFINSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class BlackfinTargetMachine;
+
+class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit BlackfinSelectionDAGInfo(const BlackfinTargetMachine &TM);
+  ~BlackfinSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/Blackfin/BlackfinSubtarget.cpp b/final/lib/Target/Blackfin/BlackfinSubtarget.cpp
new file mode 100644
index 00000000000..e104c5245a9
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinSubtarget.cpp
@@ -0,0 +1,36 @@
+//===- BlackfinSubtarget.cpp - BLACKFIN Subtarget Information -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the blackfin specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinSubtarget.h"
+#include "BlackfinGenSubtarget.inc"
+
+using namespace llvm;
+
+BlackfinSubtarget::BlackfinSubtarget(const std::string &TT,
+                                     const std::string &FS)
+  : sdram(false),
+    icplb(false),
+    wa_mi_shift(false),
+    wa_csync(false),
+    wa_specld(false),
+    wa_mmr_stall(false),
+    wa_lcregs(false),
+    wa_hwloop(false),
+    wa_ind_call(false),
+    wa_killed_mmr(false),
+    wa_rets(false)
+{
+  std::string CPU = "generic";
+  // Parse features string.
+  ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/final/lib/Target/Blackfin/BlackfinSubtarget.h b/final/lib/Target/Blackfin/BlackfinSubtarget.h
new file mode 100644
index 00000000000..d667fe26519
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinSubtarget.h
@@ -0,0 +1,45 @@
+//===- BlackfinSubtarget.h - Define Subtarget for the Blackfin -*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the BLACKFIN specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFIN_SUBTARGET_H
+#define BLACKFIN_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include <string>
+
+namespace llvm {
+
+  class BlackfinSubtarget : public TargetSubtarget {
+    bool sdram;
+    bool icplb;
+    bool wa_mi_shift;
+    bool wa_csync;
+    bool wa_specld;
+    bool wa_mmr_stall;
+    bool wa_lcregs;
+    bool wa_hwloop;
+    bool wa_ind_call;
+    bool wa_killed_mmr;
+    bool wa_rets;
+  public:
+    BlackfinSubtarget(const std::string &TT, const std::string &FS);
+
+    /// ParseSubtargetFeatures - Parses features string setting specified
+    /// subtarget options.  Definition of function is auto generated by tblgen.
+    std::string ParseSubtargetFeatures(const std::string &FS,
+                                       const std::string &CPU);
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/final/lib/Target/Blackfin/BlackfinTargetMachine.cpp
new file mode 100644
index 00000000000..e11920f568a
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinTargetMachine.cpp
@@ -0,0 +1,43 @@
+//===-- BlackfinTargetMachine.cpp - Define TargetMachine for Blackfin -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinTargetMachine.h"
+#include "Blackfin.h"
+#include "BlackfinMCAsmInfo.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeBlackfinTarget() {
+  RegisterTargetMachine<BlackfinTargetMachine> X(TheBlackfinTarget);
+  RegisterAsmInfo<BlackfinMCAsmInfo> Y(TheBlackfinTarget);
+
+}
+
+BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
+                                             const std::string &TT,
+                                             const std::string &FS)
+  : LLVMTargetMachine(T, TT),
+    DataLayout("e-p:32:32-i64:32-f64:32-n32"),
+    Subtarget(TT, FS),
+    TLInfo(*this),
+    TSInfo(*this),
+    InstrInfo(Subtarget),
+    FrameLowering(Subtarget) {
+}
+
+bool BlackfinTargetMachine::addInstSelector(PassManagerBase &PM,
+                                            CodeGenOpt::Level OptLevel) {
+  PM.add(createBlackfinISelDag(*this, OptLevel));
+  return false;
+}
diff --git a/final/lib/Target/Blackfin/BlackfinTargetMachine.h b/final/lib/Target/Blackfin/BlackfinTargetMachine.h
new file mode 100644
index 00000000000..29b2b177fc3
--- /dev/null
+++ b/final/lib/Target/Blackfin/BlackfinTargetMachine.h
@@ -0,0 +1,67 @@
+//===-- BlackfinTargetMachine.h - TargetMachine for Blackfin ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Blackfin specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINTARGETMACHINE_H
+#define BLACKFINTARGETMACHINE_H
+
+#include "BlackfinInstrInfo.h"
+#include "BlackfinIntrinsicInfo.h"
+#include "BlackfinISelLowering.h"
+#include "BlackfinFrameLowering.h"
+#include "BlackfinSubtarget.h"
+#include "BlackfinSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+  class BlackfinTargetMachine : public LLVMTargetMachine {
+    const TargetData DataLayout;
+    BlackfinSubtarget Subtarget;
+    BlackfinTargetLowering TLInfo;
+    BlackfinSelectionDAGInfo TSInfo;
+    BlackfinInstrInfo InstrInfo;
+    BlackfinFrameLowering FrameLowering;
+    BlackfinIntrinsicInfo IntrinsicInfo;
+  public:
+    BlackfinTargetMachine(const Target &T, const std::string &TT,
+                          const std::string &FS);
+
+    virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; }
+    virtual const TargetFrameLowering *getFrameLowering() const {
+      return &FrameLowering;
+    }
+    virtual const BlackfinSubtarget *getSubtargetImpl() const {
+      return &Subtarget;
+    }
+    virtual const BlackfinRegisterInfo *getRegisterInfo() const {
+      return &InstrInfo.getRegisterInfo();
+    }
+    virtual const BlackfinTargetLowering* getTargetLowering() const {
+      return &TLInfo;
+    }
+    virtual const BlackfinSelectionDAGInfo* getSelectionDAGInfo() const {
+      return &TSInfo;
+    }
+    virtual const TargetData *getTargetData() const { return &DataLayout; }
+    virtual bool addInstSelector(PassManagerBase &PM,
+                                 CodeGenOpt::Level OptLevel);
+    const TargetIntrinsicInfo *getIntrinsicInfo() const {
+      return &IntrinsicInfo;
+    }
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/Blackfin/CMakeLists.txt b/final/lib/Target/Blackfin/CMakeLists.txt
new file mode 100644
index 00000000000..a47299ff161
--- /dev/null
+++ b/final/lib/Target/Blackfin/CMakeLists.txt
@@ -0,0 +1,28 @@
+set(LLVM_TARGET_DEFINITIONS Blackfin.td)
+
+tablegen(BlackfinGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(BlackfinGenRegisterNames.inc -gen-register-enums)
+tablegen(BlackfinGenRegisterInfo.inc -gen-register-desc)
+tablegen(BlackfinGenInstrNames.inc -gen-instr-enums)
+tablegen(BlackfinGenInstrInfo.inc -gen-instr-desc)
+tablegen(BlackfinGenAsmWriter.inc -gen-asm-writer)
+tablegen(BlackfinGenDAGISel.inc -gen-dag-isel)
+tablegen(BlackfinGenSubtarget.inc -gen-subtarget)
+tablegen(BlackfinGenCallingConv.inc -gen-callingconv)
+tablegen(BlackfinGenIntrinsics.inc -gen-tgt-intrinsic)
+
+add_llvm_target(BlackfinCodeGen
+  BlackfinAsmPrinter.cpp
+  BlackfinInstrInfo.cpp
+  BlackfinIntrinsicInfo.cpp
+  BlackfinISelDAGToDAG.cpp
+  BlackfinISelLowering.cpp
+  BlackfinFrameLowering.cpp
+  BlackfinMCAsmInfo.cpp
+  BlackfinRegisterInfo.cpp
+  BlackfinSubtarget.cpp
+  BlackfinTargetMachine.cpp
+  BlackfinSelectionDAGInfo.cpp
+  )
+
+add_subdirectory(TargetInfo)
diff --git a/final/lib/Target/Blackfin/Makefile b/final/lib/Target/Blackfin/Makefile
new file mode 100644
index 00000000000..5eb8e9a992b
--- /dev/null
+++ b/final/lib/Target/Blackfin/Makefile
@@ -0,0 +1,24 @@
+##===- lib/Target/Blackfin/Makefile ------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMBlackfinCodeGen
+TARGET = Blackfin
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = BlackfinGenRegisterInfo.h.inc BlackfinGenRegisterNames.inc \
+                BlackfinGenRegisterInfo.inc BlackfinGenInstrNames.inc \
+                BlackfinGenInstrInfo.inc BlackfinGenAsmWriter.inc \
+                BlackfinGenDAGISel.inc BlackfinGenSubtarget.inc \
+		BlackfinGenCallingConv.inc BlackfinGenIntrinsics.inc
+
+DIRS = TargetInfo
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Target/Blackfin/README.txt b/final/lib/Target/Blackfin/README.txt
new file mode 100644
index 00000000000..b4c8227cd64
--- /dev/null
+++ b/final/lib/Target/Blackfin/README.txt
@@ -0,0 +1,244 @@
+//===-- README.txt - Notes for Blackfin Target ------------------*- org -*-===//
+
+* Condition codes
+** DONE Problem with asymmetric SETCC operations
+The instruction
+
+  CC = R0 < 2
+
+is not symmetric - there is no R0 > 2 instruction. On the other hand, IF CC
+JUMP can take both CC and !CC as a condition. We cannot pattern-match (brcond
+(not cc), target), the DAG optimizer removes that kind of thing.
+
+This is handled by creating a pseudo-register NCC that aliases CC. Register
+classes JustCC and NotCC are used to control the inversion of CC.
+
+** DONE CC as an i32 register
+The AnyCC register class pretends to hold i32 values. It can only represent the
+values 0 and 1, but we can copy to and from the D class. This hack makes it
+possible to represent the setcc instruction without having i1 as a legal type.
+
+In most cases, the CC register is set by a "CC = .." or BITTST instruction, and
+then used in a conditional branch or move. The code generator thinks it is
+moving 32 bits, but the value stays in CC. In other cases, the result of a
+comparison is actually used as am i32 number, and CC will be copied to a D
+register.
+
+* Stack frames
+** TODO Use Push/Pop instructions
+We should use the push/pop instructions when saving callee-saved
+registers. The are smaller, and we may even use push multiple instructions.
+
+** TODO requiresRegisterScavenging
+We need more intelligence in determining when the scavenger is needed. We
+should keep track of:
+- Spilling D16 registers
+- Spilling AnyCC registers
+
+* Assembler
+** TODO Implement PrintGlobalVariable
+** TODO Remove LOAD32sym
+It's a hack combining two instructions by concatenation.
+
+* Inline Assembly
+
+These are the GCC constraints from bfin/constraints.md:
+
+| Code  | Register class                            | LLVM |
+|-------+-------------------------------------------+------|
+| a     | P                                         | C    |
+| d     | D                                         | C    |
+| z     | Call clobbered P (P0, P1, P2)             | X    |
+| D     | EvenD                                     | X    |
+| W     | OddD                                      | X    |
+| e     | Accu                                      | C    |
+| A     | A0                                        | S    |
+| B     | A1                                        | S    |
+| b     | I                                         | C    |
+| v     | B                                         | C    |
+| f     | M                                         | C    |
+| c     | Circular I, B, L                          | X    |
+| C     | JustCC                                    | S    |
+| t     | LoopTop                                   | X    |
+| u     | LoopBottom                                | X    |
+| k     | LoopCount                                 | X    |
+| x     | GR                                        | C    |
+| y     | RET*, ASTAT, SEQSTAT, USP                 | X    |
+| w     | ALL                                       | C    |
+| Z     | The FD-PIC GOT pointer (P3)               | S    |
+| Y     | The FD-PIC function pointer register (P1) | S    |
+| q0-q7 | R0-R7 individually                        |      |
+| qA    | P0                                        |      |
+|-------+-------------------------------------------+------|
+| Code  | Constant                                  |      |
+|-------+-------------------------------------------+------|
+| J     | 1<<N, N<32                                |      |
+| Ks3   | imm3                                      |      |
+| Ku3   | uimm3                                     |      |
+| Ks4   | imm4                                      |      |
+| Ku4   | uimm4                                     |      |
+| Ks5   | imm5                                      |      |
+| Ku5   | uimm5                                     |      |
+| Ks7   | imm7                                      |      |
+| KN7   | -imm7                                     |      |
+| Ksh   | imm16                                     |      |
+| Kuh   | uimm16                                    |      |
+| L     | ~(1<<N)                                   |      |
+| M1    | 0xff                                      |      |
+| M2    | 0xffff                                    |      |
+| P0-P4 | 0-4                                       |      |
+| PA    | Macflag, not M                            |      |
+| PB    | Macflag, only M                           |      |
+| Q     | Symbol                                    |      |
+
+** TODO Support all register classes
+* DAG combiner
+** Create test case for each Illegal SETCC case
+The DAG combiner may someimes produce illegal i16 SETCC instructions.
+
+*** TODO SETCC (ctlz x), 5) == const
+*** TODO SETCC (and load, const) == const
+*** DONE SETCC (zext x) == const
+*** TODO SETCC (sext x) == const
+
+* Instruction selection
+** TODO Better imediate constants
+Like ARM, build constants as small imm + shift.
+
+** TODO Implement cycle counter
+We have CYCLES and CYCLES2 registers, but the readcyclecounter intrinsic wants
+to return i64, and the code generator doesn't know how to legalize that.
+
+** TODO Instruction alternatives
+Some instructions come in different variants for example:
+
+  D = D + D
+  P = P + P
+
+Cross combinations are not allowed:
+
+  P = D + D (bad)
+
+Similarly for the subreg pseudo-instructions:
+
+ D16L = EXTRACT_SUBREG D16, bfin_subreg_lo16
+ P16L = EXTRACT_SUBREG P16, bfin_subreg_lo16
+
+We want to take advantage of the alternative instructions. This could be done by
+changing the DAG after instruction selection.
+
+
+** Multipatterns for load/store
+We should try to identify multipatterns for load and store instructions. The
+available instruction matrix is a bit irregular.
+
+Loads:
+
+| Addr       | D | P | D 16z | D 16s | D16 | D 8z | D 8s |
+|------------+---+---+-------+-------+-----+------+------|
+| P          | * | * | *     | *     | *   | *    | *    |
+| P++        | * | * | *     | *     |     | *    | *    |
+| P--        | * | * | *     | *     |     | *    | *    |
+| P+uimm5m2  |   |   | *     | *     |     |      |      |
+| P+uimm6m4  | * | * |       |       |     |      |      |
+| P+imm16    |   |   |       |       |     | *    | *    |
+| P+imm17m2  |   |   | *     | *     |     |      |      |
+| P+imm18m4  | * | * |       |       |     |      |      |
+| P++P       | * |   | *     | *     | *   |      |      |
+| FP-uimm7m4 | * | * |       |       |     |      |      |
+| I          | * |   |       |       | *   |      |      |
+| I++        | * |   |       |       | *   |      |      |
+| I--        | * |   |       |       | *   |      |      |
+| I++M       | * |   |       |       |     |      |      |
+
+Stores:
+
+| Addr       | D | P | D16H | D16L | D 8 |
+|------------+---+---+------+------+-----|
+| P          | * | * | *    | *    | *   |
+| P++        | * | * |      | *    | *   |
+| P--        | * | * |      | *    | *   |
+| P+uimm5m2  |   |   |      | *    |     |
+| P+uimm6m4  | * | * |      |      |     |
+| P+imm16    |   |   |      |      | *   |
+| P+imm17m2  |   |   |      | *    |     |
+| P+imm18m4  | * | * |      |      |     |
+| P++P       | * |   | *    | *    |     |
+| FP-uimm7m4 | * | * |      |      |     |
+| I          | * |   | *    | *    |     |
+| I++        | * |   | *    | *    |     |
+| I--        | * |   | *    | *    |     |
+| I++M       | * |   |      |      |     |
+
+* Workarounds and features
+Blackfin CPUs have bugs. Each model comes in a number of silicon revisions with
+different bugs. We learn about the CPU model from the -mcpu switch.
+
+** Interpretation of -mcpu value
+- -mcpu=bf527 refers to the latest known BF527 revision
+- -mcpu=bf527-0.2 refers to silicon rev. 0.2
+- -mcpu=bf527-any refers to all known revisions
+- -mcpu=bf527-none disables all workarounds
+
+The -mcpu setting affects the __SILICON_REVISION__ macro and enabled workarounds:
+
+| -mcpu      | __SILICON_REVISION__ | Workarounds        |
+|------------+----------------------+--------------------|
+| bf527      | Def Latest           | Specific to latest |
+| bf527-1.3  | Def 0x0103           | Specific to 1.3    |
+| bf527-any  | Def 0xffff           | All bf527-x.y      |
+| bf527-none | Undefined            | None               |
+
+These are the known cores and revisions:
+
+| Core        | Silicon            | Processors              |
+|-------------+--------------------+-------------------------|
+| Edinburgh   | 0.3, 0.4, 0.5, 0.6 | BF531 BF532 BF533       |
+| Braemar     | 0.2, 0.3           | BF534 BF536 BF537       |
+| Stirling    | 0.3, 0.4, 0.5      | BF538 BF539             |
+| Moab        | 0.0, 0.1, 0.2      | BF542 BF544 BF548 BF549 |
+| Teton       | 0.3, 0.5           | BF561                   |
+| Kookaburra  | 0.0, 0.1, 0.2      | BF523 BF525 BF527       |
+| Mockingbird | 0.0, 0.1           | BF522 BF524 BF526       |
+| Brodie      | 0.0, 0.1           | BF512 BF514 BF516 BF518 |
+
+
+** Compiler implemented workarounds
+Most workarounds are implemented in header files and source code using the
+__ADSPBF527__ macros. A few workarounds require compiler support.
+
+|  Anomaly | Macro                          | GCC Switch       |
+|----------+--------------------------------+------------------|
+|      Any | __WORKAROUNDS_ENABLED          |                  |
+| 05000074 | WA_05000074                    |                  |
+| 05000244 | __WORKAROUND_SPECULATIVE_SYNCS | -mcsync-anomaly  |
+| 05000245 | __WORKAROUND_SPECULATIVE_LOADS | -mspecld-anomaly |
+| 05000257 | WA_05000257                    |                  |
+| 05000283 | WA_05000283                    |                  |
+| 05000312 | WA_LOAD_LCREGS                 |                  |
+| 05000315 | WA_05000315                    |                  |
+| 05000371 | __WORKAROUND_RETS              |                  |
+| 05000426 | __WORKAROUND_INDIRECT_CALLS    | Not -micplb      |
+
+** GCC feature switches
+| Switch                    | Description                            |
+|---------------------------+----------------------------------------|
+| -msim                     | Use simulator runtime                  |
+| -momit-leaf-frame-pointer | Omit frame pointer for leaf functions  |
+| -mlow64k                  |                                        |
+| -mcsync-anomaly           |                                        |
+| -mspecld-anomaly          |                                        |
+| -mid-shared-library       |                                        |
+| -mleaf-id-shared-library  |                                        |
+| -mshared-library-id=      |                                        |
+| -msep-data                | Enable separate data segment           |
+| -mlong-calls              | Use indirect calls                     |
+| -mfast-fp                 |                                        |
+| -mfdpic                   |                                        |
+| -minline-plt              |                                        |
+| -mstack-check-l1          | Do stack checking in L1 scratch memory |
+| -mmulticore               | Enable multicore support               |
+| -mcorea                   | Build for Core A                       |
+| -mcoreb                   | Build for Core B                       |
+| -msdram                   | Build for SDRAM                        |
+| -micplb                   | Assume ICPLBs are enabled at runtime.  |
diff --git a/final/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp b/final/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
new file mode 100644
index 00000000000..402e0afde81
--- /dev/null
+++ b/final/lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- BlackfinTargetInfo.cpp - Blackfin Target Implementation -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Blackfin.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+Target llvm::TheBlackfinTarget;
+
+extern "C" void LLVMInitializeBlackfinTargetInfo() {
+  RegisterTarget<Triple::bfin> X(TheBlackfinTarget, "bfin",
+                                 "Analog Devices Blackfin [experimental]");
+}
diff --git a/final/lib/Target/Blackfin/TargetInfo/CMakeLists.txt b/final/lib/Target/Blackfin/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..5ca80604f63
--- /dev/null
+++ b/final/lib/Target/Blackfin/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMBlackfinInfo
+  BlackfinTargetInfo.cpp
+  )
+
+add_dependencies(LLVMBlackfinInfo BlackfinCodeGenTable_gen)
diff --git a/final/lib/Target/Blackfin/TargetInfo/Makefile b/final/lib/Target/Blackfin/TargetInfo/Makefile
new file mode 100644
index 00000000000..c49cfbe6907
--- /dev/null
+++ b/final/lib/Target/Blackfin/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Blackfin/TargetInfo/Makefile -------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMBlackfinInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/CBackend/CBackend.cpp b/final/lib/Target/CBackend/CBackend.cpp
new file mode 100644
index 00000000000..6c555a3e9d1
--- /dev/null
+++ b/final/lib/Target/CBackend/CBackend.cpp
@@ -0,0 +1,3591 @@
+//===-- CBackend.cpp - Library for converting LLVM code to C --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library converts LLVM code to C code, compilable by GCC and other C
+// compilers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/ConstantsScanner.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Config/config.h"
+#include <algorithm>
+// Some ms header decided to define setjmp as _setjmp, undo this for this file.
+#ifdef _MSC_VER
+#undef setjmp
+#endif
+using namespace llvm;
+
+extern "C" void LLVMInitializeCBackendTarget() {
+  // Register the target.
+  RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget);
+}
+
+namespace {
+  class CBEMCAsmInfo : public MCAsmInfo {
+  public:
+    CBEMCAsmInfo() {
+      GlobalPrefix = "";
+      PrivateGlobalPrefix = "";
+    }
+  };
+  /// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for
+  /// any unnamed structure types that are used by the program, and merges
+  /// external functions with the same name.
+  ///
+  class CBackendNameAllUsedStructsAndMergeFunctions : public ModulePass {
+  public:
+    static char ID;
+    CBackendNameAllUsedStructsAndMergeFunctions()
+        : ModulePass(ID) {
+          initializeFindUsedTypesPass(*PassRegistry::getPassRegistry());
+        }
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<FindUsedTypes>();
+    }
+
+    virtual const char *getPassName() const {
+      return "C backend type canonicalizer";
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+
+  char CBackendNameAllUsedStructsAndMergeFunctions::ID = 0;
+
+  /// CWriter - This class is the main chunk of code that converts an LLVM
+  /// module to a C translation unit.
+  class CWriter : public FunctionPass, public InstVisitor<CWriter> {
+    formatted_raw_ostream &Out;
+    IntrinsicLowering *IL;
+    Mangler *Mang;
+    LoopInfo *LI;
+    const Module *TheModule;
+    const MCAsmInfo* TAsm;
+    MCContext *TCtx;
+    const TargetData* TD;
+    std::map<const Type *, std::string> TypeNames;
+    std::map<const ConstantFP *, unsigned> FPConstantMap;
+    std::set<Function*> intrinsicPrototypesAlreadyGenerated;
+    std::set<const Argument*> ByValParams;
+    unsigned FPCounter;
+    unsigned OpaqueCounter;
+    DenseMap<const Value*, unsigned> AnonValueNumbers;
+    unsigned NextAnonValueNumber;
+
+  public:
+    static char ID;
+    explicit CWriter(formatted_raw_ostream &o)
+      : FunctionPass(ID), Out(o), IL(0), Mang(0), LI(0),
+        TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0),
+        NextAnonValueNumber(0) {
+      initializeLoopInfoPass(*PassRegistry::getPassRegistry());
+      FPCounter = 0;
+    }
+
+    virtual const char *getPassName() const { return "C backend"; }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LoopInfo>();
+      AU.setPreservesAll();
+    }
+
+    virtual bool doInitialization(Module &M);
+
+    bool runOnFunction(Function &F) {
+     // Do not codegen any 'available_externally' functions at all, they have
+     // definitions outside the translation unit.
+     if (F.hasAvailableExternallyLinkage())
+       return false;
+
+      LI = &getAnalysis<LoopInfo>();
+
+      // Get rid of intrinsics we can't handle.
+      lowerIntrinsics(F);
+
+      // Output all floating point constants that cannot be printed accurately.
+      printFloatingPointConstants(F);
+
+      printFunction(F);
+      return false;
+    }
+
+    virtual bool doFinalization(Module &M) {
+      // Free memory...
+      delete IL;
+      delete TD;
+      delete Mang;
+      delete TCtx;
+      delete TAsm;
+      FPConstantMap.clear();
+      TypeNames.clear();
+      ByValParams.clear();
+      intrinsicPrototypesAlreadyGenerated.clear();
+      return false;
+    }
+
+    raw_ostream &printType(raw_ostream &Out, const Type *Ty,
+                           bool isSigned = false,
+                           const std::string &VariableName = "",
+                           bool IgnoreName = false,
+                           const AttrListPtr &PAL = AttrListPtr());
+    raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty,
+                                 bool isSigned,
+                                 const std::string &NameSoFar = "");
+
+    void printStructReturnPointerFunctionType(raw_ostream &Out,
+                                              const AttrListPtr &PAL,
+                                              const PointerType *Ty);
+
+    /// writeOperandDeref - Print the result of dereferencing the specified
+    /// operand with '*'.  This is equivalent to printing '*' then using
+    /// writeOperand, but avoids excess syntax in some cases.
+    void writeOperandDeref(Value *Operand) {
+      if (isAddressExposed(Operand)) {
+        // Already something with an address exposed.
+        writeOperandInternal(Operand);
+      } else {
+        Out << "*(";
+        writeOperand(Operand);
+        Out << ")";
+      }
+    }
+
+    void writeOperand(Value *Operand, bool Static = false);
+    void writeInstComputationInline(Instruction &I);
+    void writeOperandInternal(Value *Operand, bool Static = false);
+    void writeOperandWithCast(Value* Operand, unsigned Opcode);
+    void writeOperandWithCast(Value* Operand, const ICmpInst &I);
+    bool writeInstructionCast(const Instruction &I);
+
+    void writeMemoryAccess(Value *Operand, const Type *OperandType,
+                           bool IsVolatile, unsigned Alignment);
+
+  private :
+    std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c);
+
+    void lowerIntrinsics(Function &F);
+
+    void printModuleTypes(const TypeSymbolTable &ST);
+    void printContainedStructs(const Type *Ty, std::set<const Type *> &);
+    void printFloatingPointConstants(Function &F);
+    void printFloatingPointConstants(const Constant *C);
+    void printFunctionSignature(const Function *F, bool Prototype);
+
+    void printFunction(Function &);
+    void printBasicBlock(BasicBlock *BB);
+    void printLoop(Loop *L);
+
+    void printCast(unsigned opcode, const Type *SrcTy, const Type *DstTy);
+    void printConstant(Constant *CPV, bool Static);
+    void printConstantWithCast(Constant *CPV, unsigned Opcode);
+    bool printConstExprCast(const ConstantExpr *CE, bool Static);
+    void printConstantArray(ConstantArray *CPA, bool Static);
+    void printConstantVector(ConstantVector *CV, bool Static);
+
+    /// isAddressExposed - Return true if the specified value's name needs to
+    /// have its address taken in order to get a C value of the correct type.
+    /// This happens for global variables, byval parameters, and direct allocas.
+    bool isAddressExposed(const Value *V) const {
+      if (const Argument *A = dyn_cast<Argument>(V))
+        return ByValParams.count(A);
+      return isa<GlobalVariable>(V) || isDirectAlloca(V);
+    }
+
+    // isInlinableInst - Attempt to inline instructions into their uses to build
+    // trees as much as possible.  To do this, we have to consistently decide
+    // what is acceptable to inline, so that variable declarations don't get
+    // printed and an extra copy of the expr is not emitted.
+    //
+    static bool isInlinableInst(const Instruction &I) {
+      // Always inline cmp instructions, even if they are shared by multiple
+      // expressions.  GCC generates horrible code if we don't.
+      if (isa<CmpInst>(I))
+        return true;
+
+      // Must be an expression, must be used exactly once.  If it is dead, we
+      // emit it inline where it would go.
+      if (I.getType() == Type::getVoidTy(I.getContext()) || !I.hasOneUse() ||
+          isa<TerminatorInst>(I) || isa<CallInst>(I) || isa<PHINode>(I) ||
+          isa<LoadInst>(I) || isa<VAArgInst>(I) || isa<InsertElementInst>(I) ||
+          isa<InsertValueInst>(I))
+        // Don't inline a load across a store or other bad things!
+        return false;
+
+      // Must not be used in inline asm, extractelement, or shufflevector.
+      if (I.hasOneUse()) {
+        const Instruction &User = cast<Instruction>(*I.use_back());
+        if (isInlineAsm(User) || isa<ExtractElementInst>(User) ||
+            isa<ShuffleVectorInst>(User))
+          return false;
+      }
+
+      // Only inline instruction it if it's use is in the same BB as the inst.
+      return I.getParent() == cast<Instruction>(I.use_back())->getParent();
+    }
+
+    // isDirectAlloca - Define fixed sized allocas in the entry block as direct
+    // variables which are accessed with the & operator.  This causes GCC to
+    // generate significantly better code than to emit alloca calls directly.
+    //
+    static const AllocaInst *isDirectAlloca(const Value *V) {
+      const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+      if (!AI) return 0;
+      if (AI->isArrayAllocation())
+        return 0;   // FIXME: we can also inline fixed size array allocas!
+      if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock())
+        return 0;
+      return AI;
+    }
+
+    // isInlineAsm - Check if the instruction is a call to an inline asm chunk
+    static bool isInlineAsm(const Instruction& I) {
+      if (const CallInst *CI = dyn_cast<CallInst>(&I))
+        return isa<InlineAsm>(CI->getCalledValue());
+      return false;
+    }
+
+    // Instruction visitation functions
+    friend class InstVisitor<CWriter>;
+
+    void visitReturnInst(ReturnInst &I);
+    void visitBranchInst(BranchInst &I);
+    void visitSwitchInst(SwitchInst &I);
+    void visitIndirectBrInst(IndirectBrInst &I);
+    void visitInvokeInst(InvokeInst &I) {
+      llvm_unreachable("Lowerinvoke pass didn't work!");
+    }
+
+    void visitUnwindInst(UnwindInst &I) {
+      llvm_unreachable("Lowerinvoke pass didn't work!");
+    }
+    void visitUnreachableInst(UnreachableInst &I);
+
+    void visitPHINode(PHINode &I);
+    void visitBinaryOperator(Instruction &I);
+    void visitICmpInst(ICmpInst &I);
+    void visitFCmpInst(FCmpInst &I);
+
+    void visitCastInst (CastInst &I);
+    void visitSelectInst(SelectInst &I);
+    void visitCallInst (CallInst &I);
+    void visitInlineAsm(CallInst &I);
+    bool visitBuiltinCall(CallInst &I, Intrinsic::ID ID, bool &WroteCallee);
+
+    void visitAllocaInst(AllocaInst &I);
+    void visitLoadInst  (LoadInst   &I);
+    void visitStoreInst (StoreInst  &I);
+    void visitGetElementPtrInst(GetElementPtrInst &I);
+    void visitVAArgInst (VAArgInst &I);
+
+    void visitInsertElementInst(InsertElementInst &I);
+    void visitExtractElementInst(ExtractElementInst &I);
+    void visitShuffleVectorInst(ShuffleVectorInst &SVI);
+
+    void visitInsertValueInst(InsertValueInst &I);
+    void visitExtractValueInst(ExtractValueInst &I);
+
+    void visitInstruction(Instruction &I) {
+#ifndef NDEBUG
+      errs() << "C Writer does not know about " << I;
+#endif
+      llvm_unreachable(0);
+    }
+
+    void outputLValue(Instruction *I) {
+      Out << "  " << GetValueName(I) << " = ";
+    }
+
+    bool isGotoCodeNecessary(BasicBlock *From, BasicBlock *To);
+    void printPHICopiesForSuccessor(BasicBlock *CurBlock,
+                                    BasicBlock *Successor, unsigned Indent);
+    void printBranchToBlock(BasicBlock *CurBlock, BasicBlock *SuccBlock,
+                            unsigned Indent);
+    void printGEPExpression(Value *Ptr, gep_type_iterator I,
+                            gep_type_iterator E, bool Static);
+
+    std::string GetValueName(const Value *Operand);
+  };
+}
+
+char CWriter::ID = 0;
+
+
+static std::string CBEMangle(const std::string &S) {
+  std::string Result;
+
+  for (unsigned i = 0, e = S.size(); i != e; ++i)
+    if (isalnum(S[i]) || S[i] == '_') {
+      Result += S[i];
+    } else {
+      Result += '_';
+      Result += 'A'+(S[i]&15);
+      Result += 'A'+((S[i]>>4)&15);
+      Result += '_';
+    }
+  return Result;
+}
+
+
+/// This method inserts names for any unnamed structure types that are used by
+/// the program, and removes names from structure types that are not used by the
+/// program.
+///
+bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
+  // Get a set of types that are used by the program...
+  std::set<const Type *> UT = getAnalysis<FindUsedTypes>().getTypes();
+
+  // Loop over the module symbol table, removing types from UT that are
+  // already named, and removing names for types that are not used.
+  //
+  TypeSymbolTable &TST = M.getTypeSymbolTable();
+  for (TypeSymbolTable::iterator TI = TST.begin(), TE = TST.end();
+       TI != TE; ) {
+    TypeSymbolTable::iterator I = TI++;
+
+    // If this isn't a struct or array type, remove it from our set of types
+    // to name. This simplifies emission later.
+    if (!I->second->isStructTy() && !I->second->isOpaqueTy() &&
+        !I->second->isArrayTy()) {
+      TST.remove(I);
+    } else {
+      // If this is not used, remove it from the symbol table.
+      std::set<const Type *>::iterator UTI = UT.find(I->second);
+      if (UTI == UT.end())
+        TST.remove(I);
+      else
+        UT.erase(UTI);    // Only keep one name for this type.
+    }
+  }
+
+  // UT now contains types that are not named.  Loop over it, naming
+  // structure types.
+  //
+  bool Changed = false;
+  unsigned RenameCounter = 0;
+  for (std::set<const Type *>::const_iterator I = UT.begin(), E = UT.end();
+       I != E; ++I)
+    if ((*I)->isStructTy() || (*I)->isArrayTy()) {
+      while (M.addTypeName("unnamed"+utostr(RenameCounter), *I))
+        ++RenameCounter;
+      Changed = true;
+    }
+
+
+  // Loop over all external functions and globals.  If we have two with
+  // identical names, merge them.
+  // FIXME: This code should disappear when we don't allow values with the same
+  // names when they have different types!
+  std::map<std::string, GlobalValue*> ExtSymbols;
+  for (Module::iterator I = M.begin(), E = M.end(); I != E;) {
+    Function *GV = I++;
+    if (GV->isDeclaration() && GV->hasName()) {
+      std::pair<std::map<std::string, GlobalValue*>::iterator, bool> X
+        = ExtSymbols.insert(std::make_pair(GV->getName(), GV));
+      if (!X.second) {
+        // Found a conflict, replace this global with the previous one.
+        GlobalValue *OldGV = X.first->second;
+        GV->replaceAllUsesWith(ConstantExpr::getBitCast(OldGV, GV->getType()));
+        GV->eraseFromParent();
+        Changed = true;
+      }
+    }
+  }
+  // Do the same for globals.
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E;) {
+    GlobalVariable *GV = I++;
+    if (GV->isDeclaration() && GV->hasName()) {
+      std::pair<std::map<std::string, GlobalValue*>::iterator, bool> X
+        = ExtSymbols.insert(std::make_pair(GV->getName(), GV));
+      if (!X.second) {
+        // Found a conflict, replace this global with the previous one.
+        GlobalValue *OldGV = X.first->second;
+        GV->replaceAllUsesWith(ConstantExpr::getBitCast(OldGV, GV->getType()));
+        GV->eraseFromParent();
+        Changed = true;
+      }
+    }
+  }
+
+  return Changed;
+}
+
+/// printStructReturnPointerFunctionType - This is like printType for a struct
+/// return type, except, instead of printing the type as void (*)(Struct*, ...)
+/// print it as "Struct (*)(...)", for struct return functions.
+void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
+                                                   const AttrListPtr &PAL,
+                                                   const PointerType *TheTy) {
+  const FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
+  std::string tstr;
+  raw_string_ostream FunctionInnards(tstr);
+  FunctionInnards << " (*) (";
+  bool PrintedType = false;
+
+  FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end();
+  const Type *RetTy = cast<PointerType>(I->get())->getElementType();
+  unsigned Idx = 1;
+  for (++I, ++Idx; I != E; ++I, ++Idx) {
+    if (PrintedType)
+      FunctionInnards << ", ";
+    const Type *ArgTy = *I;
+    if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+      assert(ArgTy->isPointerTy());
+      ArgTy = cast<PointerType>(ArgTy)->getElementType();
+    }
+    printType(FunctionInnards, ArgTy,
+        /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
+    PrintedType = true;
+  }
+  if (FTy->isVarArg()) {
+    if (!PrintedType)
+      FunctionInnards << " int"; //dummy argument for empty vararg functs
+    FunctionInnards << ", ...";
+  } else if (!PrintedType) {
+    FunctionInnards << "void";
+  }
+  FunctionInnards << ')';
+  printType(Out, RetTy,
+      /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
+}
+
+raw_ostream &
+CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
+                         const std::string &NameSoFar) {
+  assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) &&
+         "Invalid type for printSimpleType");
+  switch (Ty->getTypeID()) {
+  case Type::VoidTyID:   return Out << "void " << NameSoFar;
+  case Type::IntegerTyID: {
+    unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+    if (NumBits == 1)
+      return Out << "bool " << NameSoFar;
+    else if (NumBits <= 8)
+      return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
+    else if (NumBits <= 16)
+      return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar;
+    else if (NumBits <= 32)
+      return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
+    else if (NumBits <= 64)
+      return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
+    else {
+      assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
+      return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar;
+    }
+  }
+  case Type::FloatTyID:  return Out << "float "   << NameSoFar;
+  case Type::DoubleTyID: return Out << "double "  << NameSoFar;
+  // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is
+  // present matches host 'long double'.
+  case Type::X86_FP80TyID:
+  case Type::PPC_FP128TyID:
+  case Type::FP128TyID:  return Out << "long double " << NameSoFar;
+
+  case Type::X86_MMXTyID:
+    return printSimpleType(Out, Type::getInt32Ty(Ty->getContext()), isSigned,
+                     " __attribute__((vector_size(64))) " + NameSoFar);
+
+  case Type::VectorTyID: {
+    const VectorType *VTy = cast<VectorType>(Ty);
+    return printSimpleType(Out, VTy->getElementType(), isSigned,
+                     " __attribute__((vector_size(" +
+                     utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
+  }
+
+  default:
+#ifndef NDEBUG
+    errs() << "Unknown primitive type: " << *Ty << "\n";
+#endif
+    llvm_unreachable(0);
+  }
+}
+
+// Pass the Type* and the variable name and this prints out the variable
+// declaration.
+//
+raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
+                                bool isSigned, const std::string &NameSoFar,
+                                bool IgnoreName, const AttrListPtr &PAL) {
+  if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) {
+    printSimpleType(Out, Ty, isSigned, NameSoFar);
+    return Out;
+  }
+
+  // Check to see if the type is named.
+  if (!IgnoreName || Ty->isOpaqueTy()) {
+    std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty);
+    if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar;
+  }
+
+  switch (Ty->getTypeID()) {
+  case Type::FunctionTyID: {
+    const FunctionType *FTy = cast<FunctionType>(Ty);
+    std::string tstr;
+    raw_string_ostream FunctionInnards(tstr);
+    FunctionInnards << " (" << NameSoFar << ") (";
+    unsigned Idx = 1;
+    for (FunctionType::param_iterator I = FTy->param_begin(),
+           E = FTy->param_end(); I != E; ++I) {
+      const Type *ArgTy = *I;
+      if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+        assert(ArgTy->isPointerTy());
+        ArgTy = cast<PointerType>(ArgTy)->getElementType();
+      }
+      if (I != FTy->param_begin())
+        FunctionInnards << ", ";
+      printType(FunctionInnards, ArgTy,
+        /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
+      ++Idx;
+    }
+    if (FTy->isVarArg()) {
+      if (!FTy->getNumParams())
+        FunctionInnards << " int"; //dummy argument for empty vaarg functs
+      FunctionInnards << ", ...";
+    } else if (!FTy->getNumParams()) {
+      FunctionInnards << "void";
+    }
+    FunctionInnards << ')';
+    printType(Out, FTy->getReturnType(),
+      /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
+    return Out;
+  }
+  case Type::StructTyID: {
+    const StructType *STy = cast<StructType>(Ty);
+    Out << NameSoFar + " {\n";
+    unsigned Idx = 0;
+    for (StructType::element_iterator I = STy->element_begin(),
+           E = STy->element_end(); I != E; ++I) {
+      Out << "  ";
+      printType(Out, *I, false, "field" + utostr(Idx++));
+      Out << ";\n";
+    }
+    Out << '}';
+    if (STy->isPacked())
+      Out << " __attribute__ ((packed))";
+    return Out;
+  }
+
+  case Type::PointerTyID: {
+    const PointerType *PTy = cast<PointerType>(Ty);
+    std::string ptrName = "*" + NameSoFar;
+
+    if (PTy->getElementType()->isArrayTy() ||
+        PTy->getElementType()->isVectorTy())
+      ptrName = "(" + ptrName + ")";
+
+    if (!PAL.isEmpty())
+      // Must be a function ptr cast!
+      return printType(Out, PTy->getElementType(), false, ptrName, true, PAL);
+    return printType(Out, PTy->getElementType(), false, ptrName);
+  }
+
+  case Type::ArrayTyID: {
+    const ArrayType *ATy = cast<ArrayType>(Ty);
+    unsigned NumElements = ATy->getNumElements();
+    if (NumElements == 0) NumElements = 1;
+    // Arrays are wrapped in structs to allow them to have normal
+    // value semantics (avoiding the array "decay").
+    Out << NameSoFar << " { ";
+    printType(Out, ATy->getElementType(), false,
+              "array[" + utostr(NumElements) + "]");
+    return Out << "; }";
+  }
+
+  case Type::OpaqueTyID: {
+    std::string TyName = "struct opaque_" + itostr(OpaqueCounter++);
+    assert(TypeNames.find(Ty) == TypeNames.end());
+    TypeNames[Ty] = TyName;
+    return Out << TyName << ' ' << NameSoFar;
+  }
+  default:
+    llvm_unreachable("Unhandled case in getTypeProps!");
+  }
+
+  return Out;
+}
+
+void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
+
+  // As a special case, print the array as a string if it is an array of
+  // ubytes or an array of sbytes with positive values.
+  //
+  const Type *ETy = CPA->getType()->getElementType();
+  bool isString = (ETy == Type::getInt8Ty(CPA->getContext()) ||
+                   ETy == Type::getInt8Ty(CPA->getContext()));
+
+  // Make sure the last character is a null char, as automatically added by C
+  if (isString && (CPA->getNumOperands() == 0 ||
+                   !cast<Constant>(*(CPA->op_end()-1))->isNullValue()))
+    isString = false;
+
+  if (isString) {
+    Out << '\"';
+    // Keep track of whether the last number was a hexadecimal escape
+    bool LastWasHex = false;
+
+    // Do not include the last character, which we know is null
+    for (unsigned i = 0, e = CPA->getNumOperands()-1; i != e; ++i) {
+      unsigned char C = cast<ConstantInt>(CPA->getOperand(i))->getZExtValue();
+
+      // Print it out literally if it is a printable character.  The only thing
+      // to be careful about is when the last letter output was a hex escape
+      // code, in which case we have to be careful not to print out hex digits
+      // explicitly (the C compiler thinks it is a continuation of the previous
+      // character, sheesh...)
+      //
+      if (isprint(C) && (!LastWasHex || !isxdigit(C))) {
+        LastWasHex = false;
+        if (C == '"' || C == '\\')
+          Out << "\\" << (char)C;
+        else
+          Out << (char)C;
+      } else {
+        LastWasHex = false;
+        switch (C) {
+        case '\n': Out << "\\n"; break;
+        case '\t': Out << "\\t"; break;
+        case '\r': Out << "\\r"; break;
+        case '\v': Out << "\\v"; break;
+        case '\a': Out << "\\a"; break;
+        case '\"': Out << "\\\""; break;
+        case '\'': Out << "\\\'"; break;
+        default:
+          Out << "\\x";
+          Out << (char)(( C/16  < 10) ? ( C/16 +'0') : ( C/16 -10+'A'));
+          Out << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+          LastWasHex = true;
+          break;
+        }
+      }
+    }
+    Out << '\"';
+  } else {
+    Out << '{';
+    if (CPA->getNumOperands()) {
+      Out << ' ';
+      printConstant(cast<Constant>(CPA->getOperand(0)), Static);
+      for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) {
+        Out << ", ";
+        printConstant(cast<Constant>(CPA->getOperand(i)), Static);
+      }
+    }
+    Out << " }";
+  }
+}
+
+void CWriter::printConstantVector(ConstantVector *CP, bool Static) {
+  Out << '{';
+  if (CP->getNumOperands()) {
+    Out << ' ';
+    printConstant(cast<Constant>(CP->getOperand(0)), Static);
+    for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
+      Out << ", ";
+      printConstant(cast<Constant>(CP->getOperand(i)), Static);
+    }
+  }
+  Out << " }";
+}
+
+// isFPCSafeToPrint - Returns true if we may assume that CFP may be written out
+// textually as a double (rather than as a reference to a stack-allocated
+// variable). We decide this by converting CFP to a string and back into a
+// double, and then checking whether the conversion results in a bit-equal
+// double to the original value of CFP. This depends on us and the target C
+// compiler agreeing on the conversion process (which is pretty likely since we
+// only deal in IEEE FP).
+//
+static bool isFPCSafeToPrint(const ConstantFP *CFP) {
+  bool ignored;
+  // Do long doubles in hex for now.
+  if (CFP->getType() != Type::getFloatTy(CFP->getContext()) &&
+      CFP->getType() != Type::getDoubleTy(CFP->getContext()))
+    return false;
+  APFloat APF = APFloat(CFP->getValueAPF());  // copy
+  if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
+    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
+  char Buffer[100];
+  sprintf(Buffer, "%a", APF.convertToDouble());
+  if (!strncmp(Buffer, "0x", 2) ||
+      !strncmp(Buffer, "-0x", 3) ||
+      !strncmp(Buffer, "+0x", 3))
+    return APF.bitwiseIsEqual(APFloat(atof(Buffer)));
+  return false;
+#else
+  std::string StrVal = ftostr(APF);
+
+  while (StrVal[0] == ' ')
+    StrVal.erase(StrVal.begin());
+
+  // Check to make sure that the stringized number is not some string like "Inf"
+  // or NaN.  Check that the string matches the "[-+]?[0-9]" regex.
+  if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+      ((StrVal[0] == '-' || StrVal[0] == '+') &&
+       (StrVal[1] >= '0' && StrVal[1] <= '9')))
+    // Reparse stringized version!
+    return APF.bitwiseIsEqual(APFloat(atof(StrVal.c_str())));
+  return false;
+#endif
+}
+
+/// Print out the casting for a cast operation. This does the double casting
+/// necessary for conversion to the destination type, if necessary.
+/// @brief Print a cast
+void CWriter::printCast(unsigned opc, const Type *SrcTy, const Type *DstTy) {
+  // Print the destination type cast
+  switch (opc) {
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::IntToPtr:
+    case Instruction::Trunc:
+    case Instruction::BitCast:
+    case Instruction::FPExt:
+    case Instruction::FPTrunc: // For these the DstTy sign doesn't matter
+      Out << '(';
+      printType(Out, DstTy);
+      Out << ')';
+      break;
+    case Instruction::ZExt:
+    case Instruction::PtrToInt:
+    case Instruction::FPToUI: // For these, make sure we get an unsigned dest
+      Out << '(';
+      printSimpleType(Out, DstTy, false);
+      Out << ')';
+      break;
+    case Instruction::SExt:
+    case Instruction::FPToSI: // For these, make sure we get a signed dest
+      Out << '(';
+      printSimpleType(Out, DstTy, true);
+      Out << ')';
+      break;
+    default:
+      llvm_unreachable("Invalid cast opcode");
+  }
+
+  // Print the source type cast
+  switch (opc) {
+    case Instruction::UIToFP:
+    case Instruction::ZExt:
+      Out << '(';
+      printSimpleType(Out, SrcTy, false);
+      Out << ')';
+      break;
+    case Instruction::SIToFP:
+    case Instruction::SExt:
+      Out << '(';
+      printSimpleType(Out, SrcTy, true);
+      Out << ')';
+      break;
+    case Instruction::IntToPtr:
+    case Instruction::PtrToInt:
+      // Avoid "cast to pointer from integer of different size" warnings
+      Out << "(unsigned long)";
+      break;
+    case Instruction::Trunc:
+    case Instruction::BitCast:
+    case Instruction::FPExt:
+    case Instruction::FPTrunc:
+    case Instruction::FPToSI:
+    case Instruction::FPToUI:
+      break; // These don't need a source cast.
+    default:
+      llvm_unreachable("Invalid cast opcode");
+      break;
+  }
+}
+
+// printConstant - The LLVM Constant to C Constant converter.
+void CWriter::printConstant(Constant *CPV, bool Static) {
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
+    switch (CE->getOpcode()) {
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::BitCast:
+      Out << "(";
+      printCast(CE->getOpcode(), CE->getOperand(0)->getType(), CE->getType());
+      if (CE->getOpcode() == Instruction::SExt &&
+          CE->getOperand(0)->getType() == Type::getInt1Ty(CPV->getContext())) {
+        // Make sure we really sext from bool here by subtracting from 0
+        Out << "0-";
+      }
+      printConstant(CE->getOperand(0), Static);
+      if (CE->getType() == Type::getInt1Ty(CPV->getContext()) &&
+          (CE->getOpcode() == Instruction::Trunc ||
+           CE->getOpcode() == Instruction::FPToUI ||
+           CE->getOpcode() == Instruction::FPToSI ||
+           CE->getOpcode() == Instruction::PtrToInt)) {
+        // Make sure we really truncate to bool here by anding with 1
+        Out << "&1u";
+      }
+      Out << ')';
+      return;
+
+    case Instruction::GetElementPtr:
+      Out << "(";
+      printGEPExpression(CE->getOperand(0), gep_type_begin(CPV),
+                         gep_type_end(CPV), Static);
+      Out << ")";
+      return;
+    case Instruction::Select:
+      Out << '(';
+      printConstant(CE->getOperand(0), Static);
+      Out << '?';
+      printConstant(CE->getOperand(1), Static);
+      Out << ':';
+      printConstant(CE->getOperand(2), Static);
+      Out << ')';
+      return;
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::SDiv:
+    case Instruction::UDiv:
+    case Instruction::FDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor:
+    case Instruction::ICmp:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    {
+      Out << '(';
+      bool NeedsClosingParens = printConstExprCast(CE, Static);
+      printConstantWithCast(CE->getOperand(0), CE->getOpcode());
+      switch (CE->getOpcode()) {
+      case Instruction::Add:
+      case Instruction::FAdd: Out << " + "; break;
+      case Instruction::Sub:
+      case Instruction::FSub: Out << " - "; break;
+      case Instruction::Mul:
+      case Instruction::FMul: Out << " * "; break;
+      case Instruction::URem:
+      case Instruction::SRem:
+      case Instruction::FRem: Out << " % "; break;
+      case Instruction::UDiv:
+      case Instruction::SDiv:
+      case Instruction::FDiv: Out << " / "; break;
+      case Instruction::And: Out << " & "; break;
+      case Instruction::Or:  Out << " | "; break;
+      case Instruction::Xor: Out << " ^ "; break;
+      case Instruction::Shl: Out << " << "; break;
+      case Instruction::LShr:
+      case Instruction::AShr: Out << " >> "; break;
+      case Instruction::ICmp:
+        switch (CE->getPredicate()) {
+          case ICmpInst::ICMP_EQ: Out << " == "; break;
+          case ICmpInst::ICMP_NE: Out << " != "; break;
+          case ICmpInst::ICMP_SLT:
+          case ICmpInst::ICMP_ULT: Out << " < "; break;
+          case ICmpInst::ICMP_SLE:
+          case ICmpInst::ICMP_ULE: Out << " <= "; break;
+          case ICmpInst::ICMP_SGT:
+          case ICmpInst::ICMP_UGT: Out << " > "; break;
+          case ICmpInst::ICMP_SGE:
+          case ICmpInst::ICMP_UGE: Out << " >= "; break;
+          default: llvm_unreachable("Illegal ICmp predicate");
+        }
+        break;
+      default: llvm_unreachable("Illegal opcode here!");
+      }
+      printConstantWithCast(CE->getOperand(1), CE->getOpcode());
+      if (NeedsClosingParens)
+        Out << "))";
+      Out << ')';
+      return;
+    }
+    case Instruction::FCmp: {
+      Out << '(';
+      bool NeedsClosingParens = printConstExprCast(CE, Static);
+      if (CE->getPredicate() == FCmpInst::FCMP_FALSE)
+        Out << "0";
+      else if (CE->getPredicate() == FCmpInst::FCMP_TRUE)
+        Out << "1";
+      else {
+        const char* op = 0;
+        switch (CE->getPredicate()) {
+        default: llvm_unreachable("Illegal FCmp predicate");
+        case FCmpInst::FCMP_ORD: op = "ord"; break;
+        case FCmpInst::FCMP_UNO: op = "uno"; break;
+        case FCmpInst::FCMP_UEQ: op = "ueq"; break;
+        case FCmpInst::FCMP_UNE: op = "une"; break;
+        case FCmpInst::FCMP_ULT: op = "ult"; break;
+        case FCmpInst::FCMP_ULE: op = "ule"; break;
+        case FCmpInst::FCMP_UGT: op = "ugt"; break;
+        case FCmpInst::FCMP_UGE: op = "uge"; break;
+        case FCmpInst::FCMP_OEQ: op = "oeq"; break;
+        case FCmpInst::FCMP_ONE: op = "one"; break;
+        case FCmpInst::FCMP_OLT: op = "olt"; break;
+        case FCmpInst::FCMP_OLE: op = "ole"; break;
+        case FCmpInst::FCMP_OGT: op = "ogt"; break;
+        case FCmpInst::FCMP_OGE: op = "oge"; break;
+        }
+        Out << "llvm_fcmp_" << op << "(";
+        printConstantWithCast(CE->getOperand(0), CE->getOpcode());
+        Out << ", ";
+        printConstantWithCast(CE->getOperand(1), CE->getOpcode());
+        Out << ")";
+      }
+      if (NeedsClosingParens)
+        Out << "))";
+      Out << ')';
+      return;
+    }
+    default:
+#ifndef NDEBUG
+      errs() << "CWriter Error: Unhandled constant expression: "
+           << *CE << "\n";
+#endif
+      llvm_unreachable(0);
+    }
+  } else if (isa<UndefValue>(CPV) && CPV->getType()->isSingleValueType()) {
+    Out << "((";
+    printType(Out, CPV->getType()); // sign doesn't matter
+    Out << ")/*UNDEF*/";
+    if (!CPV->getType()->isVectorTy()) {
+      Out << "0)";
+    } else {
+      Out << "{})";
+    }
+    return;
+  }
+
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) {
+    const Type* Ty = CI->getType();
+    if (Ty == Type::getInt1Ty(CPV->getContext()))
+      Out << (CI->getZExtValue() ? '1' : '0');
+    else if (Ty == Type::getInt32Ty(CPV->getContext()))
+      Out << CI->getZExtValue() << 'u';
+    else if (Ty->getPrimitiveSizeInBits() > 32)
+      Out << CI->getZExtValue() << "ull";
+    else {
+      Out << "((";
+      printSimpleType(Out, Ty, false) << ')';
+      if (CI->isMinValue(true))
+        Out << CI->getZExtValue() << 'u';
+      else
+        Out << CI->getSExtValue();
+      Out << ')';
+    }
+    return;
+  }
+
+  switch (CPV->getType()->getTypeID()) {
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+  case Type::X86_FP80TyID:
+  case Type::PPC_FP128TyID:
+  case Type::FP128TyID: {
+    ConstantFP *FPC = cast<ConstantFP>(CPV);
+    std::map<const ConstantFP*, unsigned>::iterator I = FPConstantMap.find(FPC);
+    if (I != FPConstantMap.end()) {
+      // Because of FP precision problems we must load from a stack allocated
+      // value that holds the value in hex.
+      Out << "(*(" << (FPC->getType() == Type::getFloatTy(CPV->getContext()) ?
+                       "float" :
+                       FPC->getType() == Type::getDoubleTy(CPV->getContext()) ?
+                       "double" :
+                       "long double")
+          << "*)&FPConstant" << I->second << ')';
+    } else {
+      double V;
+      if (FPC->getType() == Type::getFloatTy(CPV->getContext()))
+        V = FPC->getValueAPF().convertToFloat();
+      else if (FPC->getType() == Type::getDoubleTy(CPV->getContext()))
+        V = FPC->getValueAPF().convertToDouble();
+      else {
+        // Long double.  Convert the number to double, discarding precision.
+        // This is not awesome, but it at least makes the CBE output somewhat
+        // useful.
+        APFloat Tmp = FPC->getValueAPF();
+        bool LosesInfo;
+        Tmp.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &LosesInfo);
+        V = Tmp.convertToDouble();
+      }
+
+      if (IsNAN(V)) {
+        // The value is NaN
+
+        // FIXME the actual NaN bits should be emitted.
+        // The prefix for a quiet NaN is 0x7FF8. For a signalling NaN,
+        // it's 0x7ff4.
+        const unsigned long QuietNaN = 0x7ff8UL;
+        //const unsigned long SignalNaN = 0x7ff4UL;
+
+        // We need to grab the first part of the FP #
+        char Buffer[100];
+
+        uint64_t ll = DoubleToBits(V);
+        sprintf(Buffer, "0x%llx", static_cast<long long>(ll));
+
+        std::string Num(&Buffer[0], &Buffer[6]);
+        unsigned long Val = strtoul(Num.c_str(), 0, 16);
+
+        if (FPC->getType() == Type::getFloatTy(FPC->getContext()))
+          Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "F(\""
+              << Buffer << "\") /*nan*/ ";
+        else
+          Out << "LLVM_NAN" << (Val == QuietNaN ? "" : "S") << "(\""
+              << Buffer << "\") /*nan*/ ";
+      } else if (IsInf(V)) {
+        // The value is Inf
+        if (V < 0) Out << '-';
+        Out << "LLVM_INF" <<
+            (FPC->getType() == Type::getFloatTy(FPC->getContext()) ? "F" : "")
+            << " /*inf*/ ";
+      } else {
+        std::string Num;
+#if HAVE_PRINTF_A && ENABLE_CBE_PRINTF_A
+        // Print out the constant as a floating point number.
+        char Buffer[100];
+        sprintf(Buffer, "%a", V);
+        Num = Buffer;
+#else
+        Num = ftostr(FPC->getValueAPF());
+#endif
+       Out << Num;
+      }
+    }
+    break;
+  }
+
+  case Type::ArrayTyID:
+    // Use C99 compound expression literal initializer syntax.
+    if (!Static) {
+      Out << "(";
+      printType(Out, CPV->getType());
+      Out << ")";
+    }
+    Out << "{ "; // Arrays are wrapped in struct types.
+    if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) {
+      printConstantArray(CA, Static);
+    } else {
+      assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
+      const ArrayType *AT = cast<ArrayType>(CPV->getType());
+      Out << '{';
+      if (AT->getNumElements()) {
+        Out << ' ';
+        Constant *CZ = Constant::getNullValue(AT->getElementType());
+        printConstant(CZ, Static);
+        for (unsigned i = 1, e = AT->getNumElements(); i != e; ++i) {
+          Out << ", ";
+          printConstant(CZ, Static);
+        }
+      }
+      Out << " }";
+    }
+    Out << " }"; // Arrays are wrapped in struct types.
+    break;
+
+  case Type::VectorTyID:
+    // Use C99 compound expression literal initializer syntax.
+    if (!Static) {
+      Out << "(";
+      printType(Out, CPV->getType());
+      Out << ")";
+    }
+    if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) {
+      printConstantVector(CV, Static);
+    } else {
+      assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
+      const VectorType *VT = cast<VectorType>(CPV->getType());
+      Out << "{ ";
+      Constant *CZ = Constant::getNullValue(VT->getElementType());
+      printConstant(CZ, Static);
+      for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) {
+        Out << ", ";
+        printConstant(CZ, Static);
+      }
+      Out << " }";
+    }
+    break;
+
+  case Type::StructTyID:
+    // Use C99 compound expression literal initializer syntax.
+    if (!Static) {
+      Out << "(";
+      printType(Out, CPV->getType());
+      Out << ")";
+    }
+    if (isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV)) {
+      const StructType *ST = cast<StructType>(CPV->getType());
+      Out << '{';
+      if (ST->getNumElements()) {
+        Out << ' ';
+        printConstant(Constant::getNullValue(ST->getElementType(0)), Static);
+        for (unsigned i = 1, e = ST->getNumElements(); i != e; ++i) {
+          Out << ", ";
+          printConstant(Constant::getNullValue(ST->getElementType(i)), Static);
+        }
+      }
+      Out << " }";
+    } else {
+      Out << '{';
+      if (CPV->getNumOperands()) {
+        Out << ' ';
+        printConstant(cast<Constant>(CPV->getOperand(0)), Static);
+        for (unsigned i = 1, e = CPV->getNumOperands(); i != e; ++i) {
+          Out << ", ";
+          printConstant(cast<Constant>(CPV->getOperand(i)), Static);
+        }
+      }
+      Out << " }";
+    }
+    break;
+
+  case Type::PointerTyID:
+    if (isa<ConstantPointerNull>(CPV)) {
+      Out << "((";
+      printType(Out, CPV->getType()); // sign doesn't matter
+      Out << ")/*NULL*/0)";
+      break;
+    } else if (GlobalValue *GV = dyn_cast<GlobalValue>(CPV)) {
+      writeOperand(GV, Static);
+      break;
+    }
+    // FALL THROUGH
+  default:
+#ifndef NDEBUG
+    errs() << "Unknown constant type: " << *CPV << "\n";
+#endif
+    llvm_unreachable(0);
+  }
+}
+
+// Some constant expressions need to be casted back to the original types
+// because their operands were casted to the expected type. This function takes
+// care of detecting that case and printing the cast for the ConstantExpr.
+bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) {
+  bool NeedsExplicitCast = false;
+  const Type *Ty = CE->getOperand(0)->getType();
+  bool TypeIsSigned = false;
+  switch (CE->getOpcode()) {
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+    // We need to cast integer arithmetic so that it is always performed
+    // as unsigned, to avoid undefined behavior on overflow.
+  case Instruction::LShr:
+  case Instruction::URem:
+  case Instruction::UDiv: NeedsExplicitCast = true; break;
+  case Instruction::AShr:
+  case Instruction::SRem:
+  case Instruction::SDiv: NeedsExplicitCast = true; TypeIsSigned = true; break;
+  case Instruction::SExt:
+    Ty = CE->getType();
+    NeedsExplicitCast = true;
+    TypeIsSigned = true;
+    break;
+  case Instruction::ZExt:
+  case Instruction::Trunc:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::BitCast:
+    Ty = CE->getType();
+    NeedsExplicitCast = true;
+    break;
+  default: break;
+  }
+  if (NeedsExplicitCast) {
+    Out << "((";
+    if (Ty->isIntegerTy() && Ty != Type::getInt1Ty(Ty->getContext()))
+      printSimpleType(Out, Ty, TypeIsSigned);
+    else
+      printType(Out, Ty); // not integer, sign doesn't matter
+    Out << ")(";
+  }
+  return NeedsExplicitCast;
+}
+
+//  Print a constant assuming that it is the operand for a given Opcode. The
+//  opcodes that care about sign need to cast their operands to the expected
+//  type before the operation proceeds. This function does the casting.
+void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) {
+
+  // Extract the operand's type, we'll need it.
+  const Type* OpTy = CPV->getType();
+
+  // Indicate whether to do the cast or not.
+  bool shouldCast = false;
+  bool typeIsSigned = false;
+
+  // Based on the Opcode for which this Constant is being written, determine
+  // the new type to which the operand should be casted by setting the value
+  // of OpTy. If we change OpTy, also set shouldCast to true so it gets
+  // casted below.
+  switch (Opcode) {
+    default:
+      // for most instructions, it doesn't matter
+      break;
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+      // We need to cast integer arithmetic so that it is always performed
+      // as unsigned, to avoid undefined behavior on overflow.
+    case Instruction::LShr:
+    case Instruction::UDiv:
+    case Instruction::URem:
+      shouldCast = true;
+      break;
+    case Instruction::AShr:
+    case Instruction::SDiv:
+    case Instruction::SRem:
+      shouldCast = true;
+      typeIsSigned = true;
+      break;
+  }
+
+  // Write out the casted constant if we should, otherwise just write the
+  // operand.
+  if (shouldCast) {
+    Out << "((";
+    printSimpleType(Out, OpTy, typeIsSigned);
+    Out << ")";
+    printConstant(CPV, false);
+    Out << ")";
+  } else
+    printConstant(CPV, false);
+}
+
+std::string CWriter::GetValueName(const Value *Operand) {
+
+  // Resolve potential alias.
+  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Operand)) {
+    if (const Value *V = GA->resolveAliasedGlobal(false))
+      Operand = V;
+  }
+
+  // Mangle globals with the standard mangler interface for LLC compatibility.
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(Operand)) {
+    SmallString<128> Str;
+    Mang->getNameWithPrefix(Str, GV, false);
+    return CBEMangle(Str.str().str());
+  }
+
+  std::string Name = Operand->getName();
+
+  if (Name.empty()) { // Assign unique names to local temporaries.
+    unsigned &No = AnonValueNumbers[Operand];
+    if (No == 0)
+      No = ++NextAnonValueNumber;
+    Name = "tmp__" + utostr(No);
+  }
+
+  std::string VarName;
+  VarName.reserve(Name.capacity());
+
+  for (std::string::iterator I = Name.begin(), E = Name.end();
+       I != E; ++I) {
+    char ch = *I;
+
+    if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
+          (ch >= '0' && ch <= '9') || ch == '_')) {
+      char buffer[5];
+      sprintf(buffer, "_%x_", ch);
+      VarName += buffer;
+    } else
+      VarName += ch;
+  }
+
+  return "llvm_cbe_" + VarName;
+}
+
+/// writeInstComputationInline - Emit the computation for the specified
+/// instruction inline, with no destination provided.
+void CWriter::writeInstComputationInline(Instruction &I) {
+  // We can't currently support integer types other than 1, 8, 16, 32, 64.
+  // Validate this.
+  const Type *Ty = I.getType();
+  if (Ty->isIntegerTy() && (Ty!=Type::getInt1Ty(I.getContext()) &&
+        Ty!=Type::getInt8Ty(I.getContext()) &&
+        Ty!=Type::getInt16Ty(I.getContext()) &&
+        Ty!=Type::getInt32Ty(I.getContext()) &&
+        Ty!=Type::getInt64Ty(I.getContext()))) {
+      report_fatal_error("The C backend does not currently support integer "
+                        "types of widths other than 1, 8, 16, 32, 64.\n"
+                        "This is being tracked as PR 4158.");
+  }
+
+  // If this is a non-trivial bool computation, make sure to truncate down to
+  // a 1 bit value.  This is important because we want "add i1 x, y" to return
+  // "0" when x and y are true, not "2" for example.
+  bool NeedBoolTrunc = false;
+  if (I.getType() == Type::getInt1Ty(I.getContext()) &&
+      !isa<ICmpInst>(I) && !isa<FCmpInst>(I))
+    NeedBoolTrunc = true;
+
+  if (NeedBoolTrunc)
+    Out << "((";
+
+  visit(I);
+
+  if (NeedBoolTrunc)
+    Out << ")&1)";
+}
+
+
+void CWriter::writeOperandInternal(Value *Operand, bool Static) {
+  if (Instruction *I = dyn_cast<Instruction>(Operand))
+    // Should we inline this instruction to build a tree?
+    if (isInlinableInst(*I) && !isDirectAlloca(I)) {
+      Out << '(';
+      writeInstComputationInline(*I);
+      Out << ')';
+      return;
+    }
+
+  Constant* CPV = dyn_cast<Constant>(Operand);
+
+  if (CPV && !isa<GlobalValue>(CPV))
+    printConstant(CPV, Static);
+  else
+    Out << GetValueName(Operand);
+}
+
+void CWriter::writeOperand(Value *Operand, bool Static) {
+  bool isAddressImplicit = isAddressExposed(Operand);
+  if (isAddressImplicit)
+    Out << "(&";  // Global variables are referenced as their addresses by llvm
+
+  writeOperandInternal(Operand, Static);
+
+  if (isAddressImplicit)
+    Out << ')';
+}
+
+// Some instructions need to have their result value casted back to the
+// original types because their operands were casted to the expected type.
+// This function takes care of detecting that case and printing the cast
+// for the Instruction.
+bool CWriter::writeInstructionCast(const Instruction &I) {
+  const Type *Ty = I.getOperand(0)->getType();
+  switch (I.getOpcode()) {
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+    // We need to cast integer arithmetic so that it is always performed
+    // as unsigned, to avoid undefined behavior on overflow.
+  case Instruction::LShr:
+  case Instruction::URem:
+  case Instruction::UDiv:
+    Out << "((";
+    printSimpleType(Out, Ty, false);
+    Out << ")(";
+    return true;
+  case Instruction::AShr:
+  case Instruction::SRem:
+  case Instruction::SDiv:
+    Out << "((";
+    printSimpleType(Out, Ty, true);
+    Out << ")(";
+    return true;
+  default: break;
+  }
+  return false;
+}
+
+// Write the operand with a cast to another type based on the Opcode being used.
+// This will be used in cases where an instruction has specific type
+// requirements (usually signedness) for its operands.
+void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) {
+
+  // Extract the operand's type, we'll need it.
+  const Type* OpTy = Operand->getType();
+
+  // Indicate whether to do the cast or not.
+  bool shouldCast = false;
+
+  // Indicate whether the cast should be to a signed type or not.
+  bool castIsSigned = false;
+
+  // Based on the Opcode for which this Operand is being written, determine
+  // the new type to which the operand should be casted by setting the value
+  // of OpTy. If we change OpTy, also set shouldCast to true.
+  switch (Opcode) {
+    default:
+      // for most instructions, it doesn't matter
+      break;
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+      // We need to cast integer arithmetic so that it is always performed
+      // as unsigned, to avoid undefined behavior on overflow.
+    case Instruction::LShr:
+    case Instruction::UDiv:
+    case Instruction::URem: // Cast to unsigned first
+      shouldCast = true;
+      castIsSigned = false;
+      break;
+    case Instruction::GetElementPtr:
+    case Instruction::AShr:
+    case Instruction::SDiv:
+    case Instruction::SRem: // Cast to signed first
+      shouldCast = true;
+      castIsSigned = true;
+      break;
+  }
+
+  // Write out the casted operand if we should, otherwise just write the
+  // operand.
+  if (shouldCast) {
+    Out << "((";
+    printSimpleType(Out, OpTy, castIsSigned);
+    Out << ")";
+    writeOperand(Operand);
+    Out << ")";
+  } else
+    writeOperand(Operand);
+}
+
+// Write the operand with a cast to another type based on the icmp predicate
+// being used.
+void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
+  // This has to do a cast to ensure the operand has the right signedness.
+  // Also, if the operand is a pointer, we make sure to cast to an integer when
+  // doing the comparison both for signedness and so that the C compiler doesn't
+  // optimize things like "p < NULL" to false (p may contain an integer value
+  // f.e.).
+  bool shouldCast = Cmp.isRelational();
+
+  // Write out the casted operand if we should, otherwise just write the
+  // operand.
+  if (!shouldCast) {
+    writeOperand(Operand);
+    return;
+  }
+
+  // Should this be a signed comparison?  If so, convert to signed.
+  bool castIsSigned = Cmp.isSigned();
+
+  // If the operand was a pointer, convert to a large integer type.
+  const Type* OpTy = Operand->getType();
+  if (OpTy->isPointerTy())
+    OpTy = TD->getIntPtrType(Operand->getContext());
+
+  Out << "((";
+  printSimpleType(Out, OpTy, castIsSigned);
+  Out << ")";
+  writeOperand(Operand);
+  Out << ")";
+}
+
+// generateCompilerSpecificCode - This is where we add conditional compilation
+// directives to cater to specific compilers as need be.
+//
+static void generateCompilerSpecificCode(formatted_raw_ostream& Out,
+                                         const TargetData *TD) {
+  // Alloca is hard to get, and we don't want to include stdlib.h here.
+  Out << "/* get a declaration for alloca */\n"
+      << "#if defined(__CYGWIN__) || defined(__MINGW32__)\n"
+      << "#define  alloca(x) __builtin_alloca((x))\n"
+      << "#define _alloca(x) __builtin_alloca((x))\n"
+      << "#elif defined(__APPLE__)\n"
+      << "extern void *__builtin_alloca(unsigned long);\n"
+      << "#define alloca(x) __builtin_alloca(x)\n"
+      << "#define longjmp _longjmp\n"
+      << "#define setjmp _setjmp\n"
+      << "#elif defined(__sun__)\n"
+      << "#if defined(__sparcv9)\n"
+      << "extern void *__builtin_alloca(unsigned long);\n"
+      << "#else\n"
+      << "extern void *__builtin_alloca(unsigned int);\n"
+      << "#endif\n"
+      << "#define alloca(x) __builtin_alloca(x)\n"
+      << "#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__arm__)\n"
+      << "#define alloca(x) __builtin_alloca(x)\n"
+      << "#elif defined(_MSC_VER)\n"
+      << "#define inline _inline\n"
+      << "#define alloca(x) _alloca(x)\n"
+      << "#else\n"
+      << "#include <alloca.h>\n"
+      << "#endif\n\n";
+
+  // We output GCC specific attributes to preserve 'linkonce'ness on globals.
+  // If we aren't being compiled with GCC, just drop these attributes.
+  Out << "#ifndef __GNUC__  /* Can only support \"linkonce\" vars with GCC */\n"
+      << "#define __attribute__(X)\n"
+      << "#endif\n\n";
+
+  // On Mac OS X, "external weak" is spelled "__attribute__((weak_import))".
+  Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
+      << "#define __EXTERNAL_WEAK__ __attribute__((weak_import))\n"
+      << "#elif defined(__GNUC__)\n"
+      << "#define __EXTERNAL_WEAK__ __attribute__((weak))\n"
+      << "#else\n"
+      << "#define __EXTERNAL_WEAK__\n"
+      << "#endif\n\n";
+
+  // For now, turn off the weak linkage attribute on Mac OS X. (See above.)
+  Out << "#if defined(__GNUC__) && defined(__APPLE_CC__)\n"
+      << "#define __ATTRIBUTE_WEAK__\n"
+      << "#elif defined(__GNUC__)\n"
+      << "#define __ATTRIBUTE_WEAK__ __attribute__((weak))\n"
+      << "#else\n"
+      << "#define __ATTRIBUTE_WEAK__\n"
+      << "#endif\n\n";
+
+  // Add hidden visibility support. FIXME: APPLE_CC?
+  Out << "#if defined(__GNUC__)\n"
+      << "#define __HIDDEN__ __attribute__((visibility(\"hidden\")))\n"
+      << "#endif\n\n";
+
+  // Define NaN and Inf as GCC builtins if using GCC, as 0 otherwise
+  // From the GCC documentation:
+  //
+  //   double __builtin_nan (const char *str)
+  //
+  // This is an implementation of the ISO C99 function nan.
+  //
+  // Since ISO C99 defines this function in terms of strtod, which we do
+  // not implement, a description of the parsing is in order. The string is
+  // parsed as by strtol; that is, the base is recognized by leading 0 or
+  // 0x prefixes. The number parsed is placed in the significand such that
+  // the least significant bit of the number is at the least significant
+  // bit of the significand. The number is truncated to fit the significand
+  // field provided. The significand is forced to be a quiet NaN.
+  //
+  // This function, if given a string literal, is evaluated early enough
+  // that it is considered a compile-time constant.
+  //
+  //   float __builtin_nanf (const char *str)
+  //
+  // Similar to __builtin_nan, except the return type is float.
+  //
+  //   double __builtin_inf (void)
+  //
+  // Similar to __builtin_huge_val, except a warning is generated if the
+  // target floating-point format does not support infinities. This
+  // function is suitable for implementing the ISO C99 macro INFINITY.
+  //
+  //   float __builtin_inff (void)
+  //
+  // Similar to __builtin_inf, except the return type is float.
+  Out << "#ifdef __GNUC__\n"
+      << "#define LLVM_NAN(NanStr)   __builtin_nan(NanStr)   /* Double */\n"
+      << "#define LLVM_NANF(NanStr)  __builtin_nanf(NanStr)  /* Float */\n"
+      << "#define LLVM_NANS(NanStr)  __builtin_nans(NanStr)  /* Double */\n"
+      << "#define LLVM_NANSF(NanStr) __builtin_nansf(NanStr) /* Float */\n"
+      << "#define LLVM_INF           __builtin_inf()         /* Double */\n"
+      << "#define LLVM_INFF          __builtin_inff()        /* Float */\n"
+      << "#define LLVM_PREFETCH(addr,rw,locality) "
+                              "__builtin_prefetch(addr,rw,locality)\n"
+      << "#define __ATTRIBUTE_CTOR__ __attribute__((constructor))\n"
+      << "#define __ATTRIBUTE_DTOR__ __attribute__((destructor))\n"
+      << "#define LLVM_ASM           __asm__\n"
+      << "#else\n"
+      << "#define LLVM_NAN(NanStr)   ((double)0.0)           /* Double */\n"
+      << "#define LLVM_NANF(NanStr)  0.0F                    /* Float */\n"
+      << "#define LLVM_NANS(NanStr)  ((double)0.0)           /* Double */\n"
+      << "#define LLVM_NANSF(NanStr) 0.0F                    /* Float */\n"
+      << "#define LLVM_INF           ((double)0.0)           /* Double */\n"
+      << "#define LLVM_INFF          0.0F                    /* Float */\n"
+      << "#define LLVM_PREFETCH(addr,rw,locality)            /* PREFETCH */\n"
+      << "#define __ATTRIBUTE_CTOR__\n"
+      << "#define __ATTRIBUTE_DTOR__\n"
+      << "#define LLVM_ASM(X)\n"
+      << "#endif\n\n";
+
+  Out << "#if __GNUC__ < 4 /* Old GCC's, or compilers not GCC */ \n"
+      << "#define __builtin_stack_save() 0   /* not implemented */\n"
+      << "#define __builtin_stack_restore(X) /* noop */\n"
+      << "#endif\n\n";
+
+  // Output typedefs for 128-bit integers. If these are needed with a
+  // 32-bit target or with a C compiler that doesn't support mode(TI),
+  // more drastic measures will be needed.
+  Out << "#if __GNUC__ && __LP64__ /* 128-bit integer types */\n"
+      << "typedef int __attribute__((mode(TI))) llvmInt128;\n"
+      << "typedef unsigned __attribute__((mode(TI))) llvmUInt128;\n"
+      << "#endif\n\n";
+
+  // Output target-specific code that should be inserted into main.
+  Out << "#define CODE_FOR_MAIN() /* Any target-specific code for main()*/\n";
+}
+
+/// FindStaticTors - Given a static ctor/dtor list, unpack its contents into
+/// the StaticTors set.
+static void FindStaticTors(GlobalVariable *GV, std::set<Function*> &StaticTors){
+  ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!InitList) return;
+
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
+    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+      if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
+
+      if (CS->getOperand(1)->isNullValue())
+        return;  // Found a null terminator, exit printing.
+      Constant *FP = CS->getOperand(1);
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
+        if (CE->isCast())
+          FP = CE->getOperand(0);
+      if (Function *F = dyn_cast<Function>(FP))
+        StaticTors.insert(F);
+    }
+}
+
+enum SpecialGlobalClass {
+  NotSpecial = 0,
+  GlobalCtors, GlobalDtors,
+  NotPrinted
+};
+
+/// getGlobalVariableClass - If this is a global that is specially recognized
+/// by LLVM, return a code that indicates how we should handle it.
+static SpecialGlobalClass getGlobalVariableClass(const GlobalVariable *GV) {
+  // If this is a global ctors/dtors list, handle it now.
+  if (GV->hasAppendingLinkage() && GV->use_empty()) {
+    if (GV->getName() == "llvm.global_ctors")
+      return GlobalCtors;
+    else if (GV->getName() == "llvm.global_dtors")
+      return GlobalDtors;
+  }
+
+  // Otherwise, if it is other metadata, don't print it.  This catches things
+  // like debug information.
+  if (GV->getSection() == "llvm.metadata")
+    return NotPrinted;
+
+  return NotSpecial;
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(const char *Str, unsigned Length,
+                               raw_ostream &Out) {
+  for (unsigned i = 0; i != Length; ++i) {
+    unsigned char C = Str[i];
+    if (isprint(C) && C != '\\' && C != '"')
+      Out << C;
+    else if (C == '\\')
+      Out << "\\\\";
+    else if (C == '\"')
+      Out << "\\\"";
+    else if (C == '\t')
+      Out << "\\t";
+    else
+      Out << "\\x" << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+  }
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(const std::string &Str, raw_ostream &Out) {
+  PrintEscapedString(Str.c_str(), Str.size(), Out);
+}
+
+bool CWriter::doInitialization(Module &M) {
+  FunctionPass::doInitialization(M);
+
+  // Initialize
+  TheModule = &M;
+
+  TD = new TargetData(&M);
+  IL = new IntrinsicLowering(*TD);
+  IL->AddPrototypes(M);
+
+#if 0
+  std::string Triple = TheModule->getTargetTriple();
+  if (Triple.empty())
+    Triple = llvm::sys::getHostTriple();
+
+  std::string E;
+  if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
+    TAsm = Match->createAsmInfo(Triple);
+#endif
+  TAsm = new CBEMCAsmInfo();
+  TCtx = new MCContext(*TAsm, NULL);
+  Mang = new Mangler(*TCtx, *TD);
+
+  // Keep track of which functions are static ctors/dtors so they can have
+  // an attribute added to their prototypes.
+  std::set<Function*> StaticCtors, StaticDtors;
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I) {
+    switch (getGlobalVariableClass(I)) {
+    default: break;
+    case GlobalCtors:
+      FindStaticTors(I, StaticCtors);
+      break;
+    case GlobalDtors:
+      FindStaticTors(I, StaticDtors);
+      break;
+    }
+  }
+
+  // get declaration for alloca
+  Out << "/* Provide Declarations */\n";
+  Out << "#include <stdarg.h>\n";      // Varargs support
+  Out << "#include <setjmp.h>\n";      // Unwind support
+  generateCompilerSpecificCode(Out, TD);
+
+  // Provide a definition for `bool' if not compiling with a C++ compiler.
+  Out << "\n"
+      << "#ifndef __cplusplus\ntypedef unsigned char bool;\n#endif\n"
+
+      << "\n\n/* Support for floating point constants */\n"
+      << "typedef unsigned long long ConstantDoubleTy;\n"
+      << "typedef unsigned int        ConstantFloatTy;\n"
+      << "typedef struct { unsigned long long f1; unsigned short f2; "
+         "unsigned short pad[3]; } ConstantFP80Ty;\n"
+      // This is used for both kinds of 128-bit long double; meaning differs.
+      << "typedef struct { unsigned long long f1; unsigned long long f2; }"
+         " ConstantFP128Ty;\n"
+      << "\n\n/* Global Declarations */\n";
+
+  // First output all the declarations for the program, because C requires
+  // Functions & globals to be declared before they are used.
+  //
+  if (!M.getModuleInlineAsm().empty()) {
+    Out << "/* Module asm statements */\n"
+        << "asm(";
+
+    // Split the string into lines, to make it easier to read the .ll file.
+    std::string Asm = M.getModuleInlineAsm();
+    size_t CurPos = 0;
+    size_t NewLine = Asm.find_first_of('\n', CurPos);
+    while (NewLine != std::string::npos) {
+      // We found a newline, print the portion of the asm string from the
+      // last newline up to this newline.
+      Out << "\"";
+      PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine),
+                         Out);
+      Out << "\\n\"\n";
+      CurPos = NewLine+1;
+      NewLine = Asm.find_first_of('\n', CurPos);
+    }
+    Out << "\"";
+    PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out);
+    Out << "\");\n"
+        << "/* End Module asm statements */\n";
+  }
+
+  // Loop over the symbol table, emitting all named constants...
+  printModuleTypes(M.getTypeSymbolTable());
+
+  // Global variable declarations...
+  if (!M.global_empty()) {
+    Out << "\n/* External Global Variable Declarations */\n";
+    for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+         I != E; ++I) {
+
+      if (I->hasExternalLinkage() || I->hasExternalWeakLinkage() ||
+          I->hasCommonLinkage())
+        Out << "extern ";
+      else if (I->hasDLLImportLinkage())
+        Out << "__declspec(dllimport) ";
+      else
+        continue; // Internal Global
+
+      // Thread Local Storage
+      if (I->isThreadLocal())
+        Out << "__thread ";
+
+      printType(Out, I->getType()->getElementType(), false, GetValueName(I));
+
+      if (I->hasExternalWeakLinkage())
+         Out << " __EXTERNAL_WEAK__";
+      Out << ";\n";
+    }
+  }
+
+  // Function declarations
+  Out << "\n/* Function Declarations */\n";
+  Out << "double fmod(double, double);\n";   // Support for FP rem
+  Out << "float fmodf(float, float);\n";
+  Out << "long double fmodl(long double, long double);\n";
+
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    // Don't print declarations for intrinsic functions.
+    if (!I->isIntrinsic() && I->getName() != "setjmp" &&
+        I->getName() != "longjmp" && I->getName() != "_setjmp") {
+      if (I->hasExternalWeakLinkage())
+        Out << "extern ";
+      printFunctionSignature(I, true);
+      if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
+        Out << " __ATTRIBUTE_WEAK__";
+      if (I->hasExternalWeakLinkage())
+        Out << " __EXTERNAL_WEAK__";
+      if (StaticCtors.count(I))
+        Out << " __ATTRIBUTE_CTOR__";
+      if (StaticDtors.count(I))
+        Out << " __ATTRIBUTE_DTOR__";
+      if (I->hasHiddenVisibility())
+        Out << " __HIDDEN__";
+
+      if (I->hasName() && I->getName()[0] == 1)
+        Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
+
+      Out << ";\n";
+    }
+  }
+
+  // Output the global variable declarations
+  if (!M.global_empty()) {
+    Out << "\n\n/* Global Variable Declarations */\n";
+    for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+         I != E; ++I)
+      if (!I->isDeclaration()) {
+        // Ignore special globals, such as debug info.
+        if (getGlobalVariableClass(I))
+          continue;
+
+        if (I->hasLocalLinkage())
+          Out << "static ";
+        else
+          Out << "extern ";
+
+        // Thread Local Storage
+        if (I->isThreadLocal())
+          Out << "__thread ";
+
+        printType(Out, I->getType()->getElementType(), false,
+                  GetValueName(I));
+
+        if (I->hasLinkOnceLinkage())
+          Out << " __attribute__((common))";
+        else if (I->hasCommonLinkage())     // FIXME is this right?
+          Out << " __ATTRIBUTE_WEAK__";
+        else if (I->hasWeakLinkage())
+          Out << " __ATTRIBUTE_WEAK__";
+        else if (I->hasExternalWeakLinkage())
+          Out << " __EXTERNAL_WEAK__";
+        if (I->hasHiddenVisibility())
+          Out << " __HIDDEN__";
+        Out << ";\n";
+      }
+  }
+
+  // Output the global variable definitions and contents...
+  if (!M.global_empty()) {
+    Out << "\n\n/* Global Variable Definitions and Initialization */\n";
+    for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+         I != E; ++I)
+      if (!I->isDeclaration()) {
+        // Ignore special globals, such as debug info.
+        if (getGlobalVariableClass(I))
+          continue;
+
+        if (I->hasLocalLinkage())
+          Out << "static ";
+        else if (I->hasDLLImportLinkage())
+          Out << "__declspec(dllimport) ";
+        else if (I->hasDLLExportLinkage())
+          Out << "__declspec(dllexport) ";
+
+        // Thread Local Storage
+        if (I->isThreadLocal())
+          Out << "__thread ";
+
+        printType(Out, I->getType()->getElementType(), false,
+                  GetValueName(I));
+        if (I->hasLinkOnceLinkage())
+          Out << " __attribute__((common))";
+        else if (I->hasWeakLinkage())
+          Out << " __ATTRIBUTE_WEAK__";
+        else if (I->hasCommonLinkage())
+          Out << " __ATTRIBUTE_WEAK__";
+
+        if (I->hasHiddenVisibility())
+          Out << " __HIDDEN__";
+
+        // If the initializer is not null, emit the initializer.  If it is null,
+        // we try to avoid emitting large amounts of zeros.  The problem with
+        // this, however, occurs when the variable has weak linkage.  In this
+        // case, the assembler will complain about the variable being both weak
+        // and common, so we disable this optimization.
+        // FIXME common linkage should avoid this problem.
+        if (!I->getInitializer()->isNullValue()) {
+          Out << " = " ;
+          writeOperand(I->getInitializer(), true);
+        } else if (I->hasWeakLinkage()) {
+          // We have to specify an initializer, but it doesn't have to be
+          // complete.  If the value is an aggregate, print out { 0 }, and let
+          // the compiler figure out the rest of the zeros.
+          Out << " = " ;
+          if (I->getInitializer()->getType()->isStructTy() ||
+              I->getInitializer()->getType()->isVectorTy()) {
+            Out << "{ 0 }";
+          } else if (I->getInitializer()->getType()->isArrayTy()) {
+            // As with structs and vectors, but with an extra set of braces
+            // because arrays are wrapped in structs.
+            Out << "{ { 0 } }";
+          } else {
+            // Just print it out normally.
+            writeOperand(I->getInitializer(), true);
+          }
+        }
+        Out << ";\n";
+      }
+  }
+
+  if (!M.empty())
+    Out << "\n\n/* Function Bodies */\n";
+
+  // Emit some helper functions for dealing with FCMP instruction's
+  // predicates
+  Out << "static inline int llvm_fcmp_ord(double X, double Y) { ";
+  Out << "return X == X && Y == Y; }\n";
+  Out << "static inline int llvm_fcmp_uno(double X, double Y) { ";
+  Out << "return X != X || Y != Y; }\n";
+  Out << "static inline int llvm_fcmp_ueq(double X, double Y) { ";
+  Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n";
+  Out << "static inline int llvm_fcmp_une(double X, double Y) { ";
+  Out << "return X != Y; }\n";
+  Out << "static inline int llvm_fcmp_ult(double X, double Y) { ";
+  Out << "return X <  Y || llvm_fcmp_uno(X, Y); }\n";
+  Out << "static inline int llvm_fcmp_ugt(double X, double Y) { ";
+  Out << "return X >  Y || llvm_fcmp_uno(X, Y); }\n";
+  Out << "static inline int llvm_fcmp_ule(double X, double Y) { ";
+  Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n";
+  Out << "static inline int llvm_fcmp_uge(double X, double Y) { ";
+  Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n";
+  Out << "static inline int llvm_fcmp_oeq(double X, double Y) { ";
+  Out << "return X == Y ; }\n";
+  Out << "static inline int llvm_fcmp_one(double X, double Y) { ";
+  Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n";
+  Out << "static inline int llvm_fcmp_olt(double X, double Y) { ";
+  Out << "return X <  Y ; }\n";
+  Out << "static inline int llvm_fcmp_ogt(double X, double Y) { ";
+  Out << "return X >  Y ; }\n";
+  Out << "static inline int llvm_fcmp_ole(double X, double Y) { ";
+  Out << "return X <= Y ; }\n";
+  Out << "static inline int llvm_fcmp_oge(double X, double Y) { ";
+  Out << "return X >= Y ; }\n";
+  return false;
+}
+
+
+/// Output all floating point constants that cannot be printed accurately...
+void CWriter::printFloatingPointConstants(Function &F) {
+  // Scan the module for floating point constants.  If any FP constant is used
+  // in the function, we want to redirect it here so that we do not depend on
+  // the precision of the printed form, unless the printed form preserves
+  // precision.
+  //
+  for (constant_iterator I = constant_begin(&F), E = constant_end(&F);
+       I != E; ++I)
+    printFloatingPointConstants(*I);
+
+  Out << '\n';
+}
+
+void CWriter::printFloatingPointConstants(const Constant *C) {
+  // If this is a constant expression, recursively check for constant fp values.
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
+      printFloatingPointConstants(CE->getOperand(i));
+    return;
+  }
+
+  // Otherwise, check for a FP constant that we need to print.
+  const ConstantFP *FPC = dyn_cast<ConstantFP>(C);
+  if (FPC == 0 ||
+      // Do not put in FPConstantMap if safe.
+      isFPCSafeToPrint(FPC) ||
+      // Already printed this constant?
+      FPConstantMap.count(FPC))
+    return;
+
+  FPConstantMap[FPC] = FPCounter;  // Number the FP constants
+
+  if (FPC->getType() == Type::getDoubleTy(FPC->getContext())) {
+    double Val = FPC->getValueAPF().convertToDouble();
+    uint64_t i = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
+    Out << "static const ConstantDoubleTy FPConstant" << FPCounter++
+    << " = 0x" << utohexstr(i)
+    << "ULL;    /* " << Val << " */\n";
+  } else if (FPC->getType() == Type::getFloatTy(FPC->getContext())) {
+    float Val = FPC->getValueAPF().convertToFloat();
+    uint32_t i = (uint32_t)FPC->getValueAPF().bitcastToAPInt().
+    getZExtValue();
+    Out << "static const ConstantFloatTy FPConstant" << FPCounter++
+    << " = 0x" << utohexstr(i)
+    << "U;    /* " << Val << " */\n";
+  } else if (FPC->getType() == Type::getX86_FP80Ty(FPC->getContext())) {
+    // api needed to prevent premature destruction
+    APInt api = FPC->getValueAPF().bitcastToAPInt();
+    const uint64_t *p = api.getRawData();
+    Out << "static const ConstantFP80Ty FPConstant" << FPCounter++
+    << " = { 0x" << utohexstr(p[0])
+    << "ULL, 0x" << utohexstr((uint16_t)p[1]) << ",{0,0,0}"
+    << "}; /* Long double constant */\n";
+  } else if (FPC->getType() == Type::getPPC_FP128Ty(FPC->getContext()) ||
+             FPC->getType() == Type::getFP128Ty(FPC->getContext())) {
+    APInt api = FPC->getValueAPF().bitcastToAPInt();
+    const uint64_t *p = api.getRawData();
+    Out << "static const ConstantFP128Ty FPConstant" << FPCounter++
+    << " = { 0x"
+    << utohexstr(p[0]) << ", 0x" << utohexstr(p[1])
+    << "}; /* Long double constant */\n";
+
+  } else {
+    llvm_unreachable("Unknown float type!");
+  }
+}
+
+
+
+/// printSymbolTable - Run through symbol table looking for type names.  If a
+/// type name is found, emit its declaration...
+///
+void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
+  Out << "/* Helper union for bitcasts */\n";
+  Out << "typedef union {\n";
+  Out << "  unsigned int Int32;\n";
+  Out << "  unsigned long long Int64;\n";
+  Out << "  float Float;\n";
+  Out << "  double Double;\n";
+  Out << "} llvmBitCastUnion;\n";
+
+  // We are only interested in the type plane of the symbol table.
+  TypeSymbolTable::const_iterator I   = TST.begin();
+  TypeSymbolTable::const_iterator End = TST.end();
+
+  // If there are no type names, exit early.
+  if (I == End) return;
+
+  // Print out forward declarations for structure types before anything else!
+  Out << "/* Structure forward decls */\n";
+  for (; I != End; ++I) {
+    std::string Name = "struct " + CBEMangle("l_"+I->first);
+    Out << Name << ";\n";
+    TypeNames.insert(std::make_pair(I->second, Name));
+  }
+
+  Out << '\n';
+
+  // Now we can print out typedefs.  Above, we guaranteed that this can only be
+  // for struct or opaque types.
+  Out << "/* Typedefs */\n";
+  for (I = TST.begin(); I != End; ++I) {
+    std::string Name = CBEMangle("l_"+I->first);
+    Out << "typedef ";
+    printType(Out, I->second, false, Name);
+    Out << ";\n";
+  }
+
+  Out << '\n';
+
+  // Keep track of which structures have been printed so far...
+  std::set<const Type *> StructPrinted;
+
+  // Loop over all structures then push them into the stack so they are
+  // printed in the correct order.
+  //
+  Out << "/* Structure contents */\n";
+  for (I = TST.begin(); I != End; ++I)
+    if (I->second->isStructTy() || I->second->isArrayTy())
+      // Only print out used types!
+      printContainedStructs(I->second, StructPrinted);
+}
+
+// Push the struct onto the stack and recursively push all structs
+// this one depends on.
+//
+// TODO:  Make this work properly with vector types
+//
+void CWriter::printContainedStructs(const Type *Ty,
+                                    std::set<const Type*> &StructPrinted) {
+  // Don't walk through pointers.
+  if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy())
+    return;
+
+  // Print all contained types first.
+  for (Type::subtype_iterator I = Ty->subtype_begin(),
+       E = Ty->subtype_end(); I != E; ++I)
+    printContainedStructs(*I, StructPrinted);
+
+  if (Ty->isStructTy() || Ty->isArrayTy()) {
+    // Check to see if we have already printed this struct.
+    if (StructPrinted.insert(Ty).second) {
+      // Print structure type out.
+      std::string Name = TypeNames[Ty];
+      printType(Out, Ty, false, Name, true);
+      Out << ";\n\n";
+    }
+  }
+}
+
+void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
+  /// isStructReturn - Should this function actually return a struct by-value?
+  bool isStructReturn = F->hasStructRetAttr();
+
+  if (F->hasLocalLinkage()) Out << "static ";
+  if (F->hasDLLImportLinkage()) Out << "__declspec(dllimport) ";
+  if (F->hasDLLExportLinkage()) Out << "__declspec(dllexport) ";
+  switch (F->getCallingConv()) {
+   case CallingConv::X86_StdCall:
+    Out << "__attribute__((stdcall)) ";
+    break;
+   case CallingConv::X86_FastCall:
+    Out << "__attribute__((fastcall)) ";
+    break;
+   case CallingConv::X86_ThisCall:
+    Out << "__attribute__((thiscall)) ";
+    break;
+   default:
+    break;
+  }
+
+  // Loop over the arguments, printing them...
+  const FunctionType *FT = cast<FunctionType>(F->getFunctionType());
+  const AttrListPtr &PAL = F->getAttributes();
+
+  std::string tstr;
+  raw_string_ostream FunctionInnards(tstr);
+
+  // Print out the name...
+  FunctionInnards << GetValueName(F) << '(';
+
+  bool PrintedArg = false;
+  if (!F->isDeclaration()) {
+    if (!F->arg_empty()) {
+      Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+      unsigned Idx = 1;
+
+      // If this is a struct-return function, don't print the hidden
+      // struct-return argument.
+      if (isStructReturn) {
+        assert(I != E && "Invalid struct return function!");
+        ++I;
+        ++Idx;
+      }
+
+      std::string ArgName;
+      for (; I != E; ++I) {
+        if (PrintedArg) FunctionInnards << ", ";
+        if (I->hasName() || !Prototype)
+          ArgName = GetValueName(I);
+        else
+          ArgName = "";
+        const Type *ArgTy = I->getType();
+        if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+          ArgTy = cast<PointerType>(ArgTy)->getElementType();
+          ByValParams.insert(I);
+        }
+        printType(FunctionInnards, ArgTy,
+            /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt),
+            ArgName);
+        PrintedArg = true;
+        ++Idx;
+      }
+    }
+  } else {
+    // Loop over the arguments, printing them.
+    FunctionType::param_iterator I = FT->param_begin(), E = FT->param_end();
+    unsigned Idx = 1;
+
+    // If this is a struct-return function, don't print the hidden
+    // struct-return argument.
+    if (isStructReturn) {
+      assert(I != E && "Invalid struct return function!");
+      ++I;
+      ++Idx;
+    }
+
+    for (; I != E; ++I) {
+      if (PrintedArg) FunctionInnards << ", ";
+      const Type *ArgTy = *I;
+      if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
+        assert(ArgTy->isPointerTy());
+        ArgTy = cast<PointerType>(ArgTy)->getElementType();
+      }
+      printType(FunctionInnards, ArgTy,
+             /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt));
+      PrintedArg = true;
+      ++Idx;
+    }
+  }
+
+  if (!PrintedArg && FT->isVarArg()) {
+    FunctionInnards << "int vararg_dummy_arg";
+    PrintedArg = true;
+  }
+
+  // Finish printing arguments... if this is a vararg function, print the ...,
+  // unless there are no known types, in which case, we just emit ().
+  //
+  if (FT->isVarArg() && PrintedArg) {
+    FunctionInnards << ",...";  // Output varargs portion of signature!
+  } else if (!FT->isVarArg() && !PrintedArg) {
+    FunctionInnards << "void"; // ret() -> ret(void) in C.
+  }
+  FunctionInnards << ')';
+
+  // Get the return tpe for the function.
+  const Type *RetTy;
+  if (!isStructReturn)
+    RetTy = F->getReturnType();
+  else {
+    // If this is a struct-return function, print the struct-return type.
+    RetTy = cast<PointerType>(FT->getParamType(0))->getElementType();
+  }
+
+  // Print out the return type and the signature built above.
+  printType(Out, RetTy,
+            /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt),
+            FunctionInnards.str());
+}
+
+static inline bool isFPIntBitCast(const Instruction &I) {
+  if (!isa<BitCastInst>(I))
+    return false;
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DstTy = I.getType();
+  return (SrcTy->isFloatingPointTy() && DstTy->isIntegerTy()) ||
+         (DstTy->isFloatingPointTy() && SrcTy->isIntegerTy());
+}
+
+void CWriter::printFunction(Function &F) {
+  /// isStructReturn - Should this function actually return a struct by-value?
+  bool isStructReturn = F.hasStructRetAttr();
+
+  printFunctionSignature(&F, false);
+  Out << " {\n";
+
+  // If this is a struct return function, handle the result with magic.
+  if (isStructReturn) {
+    const Type *StructTy =
+      cast<PointerType>(F.arg_begin()->getType())->getElementType();
+    Out << "  ";
+    printType(Out, StructTy, false, "StructReturn");
+    Out << ";  /* Struct return temporary */\n";
+
+    Out << "  ";
+    printType(Out, F.arg_begin()->getType(), false,
+              GetValueName(F.arg_begin()));
+    Out << " = &StructReturn;\n";
+  }
+
+  bool PrintedVar = false;
+
+  // print local variable information for the function
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+    if (const AllocaInst *AI = isDirectAlloca(&*I)) {
+      Out << "  ";
+      printType(Out, AI->getAllocatedType(), false, GetValueName(AI));
+      Out << ";    /* Address-exposed local */\n";
+      PrintedVar = true;
+    } else if (I->getType() != Type::getVoidTy(F.getContext()) &&
+               !isInlinableInst(*I)) {
+      Out << "  ";
+      printType(Out, I->getType(), false, GetValueName(&*I));
+      Out << ";\n";
+
+      if (isa<PHINode>(*I)) {  // Print out PHI node temporaries as well...
+        Out << "  ";
+        printType(Out, I->getType(), false,
+                  GetValueName(&*I)+"__PHI_TEMPORARY");
+        Out << ";\n";
+      }
+      PrintedVar = true;
+    }
+    // We need a temporary for the BitCast to use so it can pluck a value out
+    // of a union to do the BitCast. This is separate from the need for a
+    // variable to hold the result of the BitCast.
+    if (isFPIntBitCast(*I)) {
+      Out << "  llvmBitCastUnion " << GetValueName(&*I)
+          << "__BITCAST_TEMPORARY;\n";
+      PrintedVar = true;
+    }
+  }
+
+  if (PrintedVar)
+    Out << '\n';
+
+  if (F.hasExternalLinkage() && F.getName() == "main")
+    Out << "  CODE_FOR_MAIN();\n";
+
+  // print the basic blocks
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (Loop *L = LI->getLoopFor(BB)) {
+      if (L->getHeader() == BB && L->getParentLoop() == 0)
+        printLoop(L);
+    } else {
+      printBasicBlock(BB);
+    }
+  }
+
+  Out << "}\n\n";
+}
+
+void CWriter::printLoop(Loop *L) {
+  Out << "  do {     /* Syntactic loop '" << L->getHeader()->getName()
+      << "' to make GCC happy */\n";
+  for (unsigned i = 0, e = L->getBlocks().size(); i != e; ++i) {
+    BasicBlock *BB = L->getBlocks()[i];
+    Loop *BBLoop = LI->getLoopFor(BB);
+    if (BBLoop == L)
+      printBasicBlock(BB);
+    else if (BB == BBLoop->getHeader() && BBLoop->getParentLoop() == L)
+      printLoop(BBLoop);
+  }
+  Out << "  } while (1); /* end of syntactic loop '"
+      << L->getHeader()->getName() << "' */\n";
+}
+
+void CWriter::printBasicBlock(BasicBlock *BB) {
+
+  // Don't print the label for the basic block if there are no uses, or if
+  // the only terminator use is the predecessor basic block's terminator.
+  // We have to scan the use list because PHI nodes use basic blocks too but
+  // do not require a label to be generated.
+  //
+  bool NeedsLabel = false;
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    if (isGotoCodeNecessary(*PI, BB)) {
+      NeedsLabel = true;
+      break;
+    }
+
+  if (NeedsLabel) Out << GetValueName(BB) << ":\n";
+
+  // Output all of the instructions in the basic block...
+  for (BasicBlock::iterator II = BB->begin(), E = --BB->end(); II != E;
+       ++II) {
+    if (!isInlinableInst(*II) && !isDirectAlloca(II)) {
+      if (II->getType() != Type::getVoidTy(BB->getContext()) &&
+          !isInlineAsm(*II))
+        outputLValue(II);
+      else
+        Out << "  ";
+      writeInstComputationInline(*II);
+      Out << ";\n";
+    }
+  }
+
+  // Don't emit prefix or suffix for the terminator.
+  visit(*BB->getTerminator());
+}
+
+
+// Specific Instruction type classes... note that all of the casts are
+// necessary because we use the instruction classes as opaque types...
+//
+void CWriter::visitReturnInst(ReturnInst &I) {
+  // If this is a struct return function, return the temporary struct.
+  bool isStructReturn = I.getParent()->getParent()->hasStructRetAttr();
+
+  if (isStructReturn) {
+    Out << "  return StructReturn;\n";
+    return;
+  }
+
+  // Don't output a void return if this is the last basic block in the function
+  if (I.getNumOperands() == 0 &&
+      &*--I.getParent()->getParent()->end() == I.getParent() &&
+      !I.getParent()->size() == 1) {
+    return;
+  }
+
+  if (I.getNumOperands() > 1) {
+    Out << "  {\n";
+    Out << "    ";
+    printType(Out, I.getParent()->getParent()->getReturnType());
+    Out << "   llvm_cbe_mrv_temp = {\n";
+    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+      Out << "      ";
+      writeOperand(I.getOperand(i));
+      if (i != e - 1)
+        Out << ",";
+      Out << "\n";
+    }
+    Out << "    };\n";
+    Out << "    return llvm_cbe_mrv_temp;\n";
+    Out << "  }\n";
+    return;
+  }
+
+  Out << "  return";
+  if (I.getNumOperands()) {
+    Out << ' ';
+    writeOperand(I.getOperand(0));
+  }
+  Out << ";\n";
+}
+
+void CWriter::visitSwitchInst(SwitchInst &SI) {
+
+  Out << "  switch (";
+  writeOperand(SI.getOperand(0));
+  Out << ") {\n  default:\n";
+  printPHICopiesForSuccessor (SI.getParent(), SI.getDefaultDest(), 2);
+  printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
+  Out << ";\n";
+  for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2) {
+    Out << "  case ";
+    writeOperand(SI.getOperand(i));
+    Out << ":\n";
+    BasicBlock *Succ = cast<BasicBlock>(SI.getOperand(i+1));
+    printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
+    printBranchToBlock(SI.getParent(), Succ, 2);
+    if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent())))
+      Out << "    break;\n";
+  }
+  Out << "  }\n";
+}
+
+void CWriter::visitIndirectBrInst(IndirectBrInst &IBI) {
+  Out << "  goto *(void*)(";
+  writeOperand(IBI.getOperand(0));
+  Out << ");\n";
+}
+
+void CWriter::visitUnreachableInst(UnreachableInst &I) {
+  Out << "  /*UNREACHABLE*/;\n";
+}
+
+bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) {
+  /// FIXME: This should be reenabled, but loop reordering safe!!
+  return true;
+
+  if (llvm::next(Function::iterator(From)) != Function::iterator(To))
+    return true;  // Not the direct successor, we need a goto.
+
+  //isa<SwitchInst>(From->getTerminator())
+
+  if (LI->getLoopFor(From) != LI->getLoopFor(To))
+    return true;
+  return false;
+}
+
+void CWriter::printPHICopiesForSuccessor (BasicBlock *CurBlock,
+                                          BasicBlock *Successor,
+                                          unsigned Indent) {
+  for (BasicBlock::iterator I = Successor->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    // Now we have to do the printing.
+    Value *IV = PN->getIncomingValueForBlock(CurBlock);
+    if (!isa<UndefValue>(IV)) {
+      Out << std::string(Indent, ' ');
+      Out << "  " << GetValueName(I) << "__PHI_TEMPORARY = ";
+      writeOperand(IV);
+      Out << ";   /* for PHI node */\n";
+    }
+  }
+}
+
+void CWriter::printBranchToBlock(BasicBlock *CurBB, BasicBlock *Succ,
+                                 unsigned Indent) {
+  if (isGotoCodeNecessary(CurBB, Succ)) {
+    Out << std::string(Indent, ' ') << "  goto ";
+    writeOperand(Succ);
+    Out << ";\n";
+  }
+}
+
+// Branch instruction printing - Avoid printing out a branch to a basic block
+// that immediately succeeds the current one.
+//
+void CWriter::visitBranchInst(BranchInst &I) {
+
+  if (I.isConditional()) {
+    if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(0))) {
+      Out << "  if (";
+      writeOperand(I.getCondition());
+      Out << ") {\n";
+
+      printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 2);
+      printBranchToBlock(I.getParent(), I.getSuccessor(0), 2);
+
+      if (isGotoCodeNecessary(I.getParent(), I.getSuccessor(1))) {
+        Out << "  } else {\n";
+        printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
+        printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
+      }
+    } else {
+      // First goto not necessary, assume second one is...
+      Out << "  if (!";
+      writeOperand(I.getCondition());
+      Out << ") {\n";
+
+      printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(1), 2);
+      printBranchToBlock(I.getParent(), I.getSuccessor(1), 2);
+    }
+
+    Out << "  }\n";
+  } else {
+    printPHICopiesForSuccessor (I.getParent(), I.getSuccessor(0), 0);
+    printBranchToBlock(I.getParent(), I.getSuccessor(0), 0);
+  }
+  Out << "\n";
+}
+
+// PHI nodes get copied into temporary values at the end of predecessor basic
+// blocks.  We now need to copy these temporary values into the REAL value for
+// the PHI.
+void CWriter::visitPHINode(PHINode &I) {
+  writeOperand(&I);
+  Out << "__PHI_TEMPORARY";
+}
+
+
+void CWriter::visitBinaryOperator(Instruction &I) {
+  // binary instructions, shift instructions, setCond instructions.
+  assert(!I.getType()->isPointerTy());
+
+  // We must cast the results of binary operations which might be promoted.
+  bool needsCast = false;
+  if ((I.getType() == Type::getInt8Ty(I.getContext())) ||
+      (I.getType() == Type::getInt16Ty(I.getContext()))
+      || (I.getType() == Type::getFloatTy(I.getContext()))) {
+    needsCast = true;
+    Out << "((";
+    printType(Out, I.getType(), false);
+    Out << ")(";
+  }
+
+  // If this is a negation operation, print it out as such.  For FP, we don't
+  // want to print "-0.0 - X".
+  if (BinaryOperator::isNeg(&I)) {
+    Out << "-(";
+    writeOperand(BinaryOperator::getNegArgument(cast<BinaryOperator>(&I)));
+    Out << ")";
+  } else if (BinaryOperator::isFNeg(&I)) {
+    Out << "-(";
+    writeOperand(BinaryOperator::getFNegArgument(cast<BinaryOperator>(&I)));
+    Out << ")";
+  } else if (I.getOpcode() == Instruction::FRem) {
+    // Output a call to fmod/fmodf instead of emitting a%b
+    if (I.getType() == Type::getFloatTy(I.getContext()))
+      Out << "fmodf(";
+    else if (I.getType() == Type::getDoubleTy(I.getContext()))
+      Out << "fmod(";
+    else  // all 3 flavors of long double
+      Out << "fmodl(";
+    writeOperand(I.getOperand(0));
+    Out << ", ";
+    writeOperand(I.getOperand(1));
+    Out << ")";
+  } else {
+
+    // Write out the cast of the instruction's value back to the proper type
+    // if necessary.
+    bool NeedsClosingParens = writeInstructionCast(I);
+
+    // Certain instructions require the operand to be forced to a specific type
+    // so we use writeOperandWithCast here instead of writeOperand. Similarly
+    // below for operand 1
+    writeOperandWithCast(I.getOperand(0), I.getOpcode());
+
+    switch (I.getOpcode()) {
+    case Instruction::Add:
+    case Instruction::FAdd: Out << " + "; break;
+    case Instruction::Sub:
+    case Instruction::FSub: Out << " - "; break;
+    case Instruction::Mul:
+    case Instruction::FMul: Out << " * "; break;
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem: Out << " % "; break;
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::FDiv: Out << " / "; break;
+    case Instruction::And:  Out << " & "; break;
+    case Instruction::Or:   Out << " | "; break;
+    case Instruction::Xor:  Out << " ^ "; break;
+    case Instruction::Shl : Out << " << "; break;
+    case Instruction::LShr:
+    case Instruction::AShr: Out << " >> "; break;
+    default:
+#ifndef NDEBUG
+       errs() << "Invalid operator type!" << I;
+#endif
+       llvm_unreachable(0);
+    }
+
+    writeOperandWithCast(I.getOperand(1), I.getOpcode());
+    if (NeedsClosingParens)
+      Out << "))";
+  }
+
+  if (needsCast) {
+    Out << "))";
+  }
+}
+
+void CWriter::visitICmpInst(ICmpInst &I) {
+  // We must cast the results of icmp which might be promoted.
+  bool needsCast = false;
+
+  // Write out the cast of the instruction's value back to the proper type
+  // if necessary.
+  bool NeedsClosingParens = writeInstructionCast(I);
+
+  // Certain icmp predicate require the operand to be forced to a specific type
+  // so we use writeOperandWithCast here instead of writeOperand. Similarly
+  // below for operand 1
+  writeOperandWithCast(I.getOperand(0), I);
+
+  switch (I.getPredicate()) {
+  case ICmpInst::ICMP_EQ:  Out << " == "; break;
+  case ICmpInst::ICMP_NE:  Out << " != "; break;
+  case ICmpInst::ICMP_ULE:
+  case ICmpInst::ICMP_SLE: Out << " <= "; break;
+  case ICmpInst::ICMP_UGE:
+  case ICmpInst::ICMP_SGE: Out << " >= "; break;
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_SLT: Out << " < "; break;
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_SGT: Out << " > "; break;
+  default:
+#ifndef NDEBUG
+    errs() << "Invalid icmp predicate!" << I;
+#endif
+    llvm_unreachable(0);
+  }
+
+  writeOperandWithCast(I.getOperand(1), I);
+  if (NeedsClosingParens)
+    Out << "))";
+
+  if (needsCast) {
+    Out << "))";
+  }
+}
+
+void CWriter::visitFCmpInst(FCmpInst &I) {
+  if (I.getPredicate() == FCmpInst::FCMP_FALSE) {
+    Out << "0";
+    return;
+  }
+  if (I.getPredicate() == FCmpInst::FCMP_TRUE) {
+    Out << "1";
+    return;
+  }
+
+  const char* op = 0;
+  switch (I.getPredicate()) {
+  default: llvm_unreachable("Illegal FCmp predicate");
+  case FCmpInst::FCMP_ORD: op = "ord"; break;
+  case FCmpInst::FCMP_UNO: op = "uno"; break;
+  case FCmpInst::FCMP_UEQ: op = "ueq"; break;
+  case FCmpInst::FCMP_UNE: op = "une"; break;
+  case FCmpInst::FCMP_ULT: op = "ult"; break;
+  case FCmpInst::FCMP_ULE: op = "ule"; break;
+  case FCmpInst::FCMP_UGT: op = "ugt"; break;
+  case FCmpInst::FCMP_UGE: op = "uge"; break;
+  case FCmpInst::FCMP_OEQ: op = "oeq"; break;
+  case FCmpInst::FCMP_ONE: op = "one"; break;
+  case FCmpInst::FCMP_OLT: op = "olt"; break;
+  case FCmpInst::FCMP_OLE: op = "ole"; break;
+  case FCmpInst::FCMP_OGT: op = "ogt"; break;
+  case FCmpInst::FCMP_OGE: op = "oge"; break;
+  }
+
+  Out << "llvm_fcmp_" << op << "(";
+  // Write the first operand
+  writeOperand(I.getOperand(0));
+  Out << ", ";
+  // Write the second operand
+  writeOperand(I.getOperand(1));
+  Out << ")";
+}
+
+static const char * getFloatBitCastField(const Type *Ty) {
+  switch (Ty->getTypeID()) {
+    default: llvm_unreachable("Invalid Type");
+    case Type::FloatTyID:  return "Float";
+    case Type::DoubleTyID: return "Double";
+    case Type::IntegerTyID: {
+      unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
+      if (NumBits <= 32)
+        return "Int32";
+      else
+        return "Int64";
+    }
+  }
+}
+
+void CWriter::visitCastInst(CastInst &I) {
+  const Type *DstTy = I.getType();
+  const Type *SrcTy = I.getOperand(0)->getType();
+  if (isFPIntBitCast(I)) {
+    Out << '(';
+    // These int<->float and long<->double casts need to be handled specially
+    Out << GetValueName(&I) << "__BITCAST_TEMPORARY."
+        << getFloatBitCastField(I.getOperand(0)->getType()) << " = ";
+    writeOperand(I.getOperand(0));
+    Out << ", " << GetValueName(&I) << "__BITCAST_TEMPORARY."
+        << getFloatBitCastField(I.getType());
+    Out << ')';
+    return;
+  }
+
+  Out << '(';
+  printCast(I.getOpcode(), SrcTy, DstTy);
+
+  // Make a sext from i1 work by subtracting the i1 from 0 (an int).
+  if (SrcTy == Type::getInt1Ty(I.getContext()) &&
+      I.getOpcode() == Instruction::SExt)
+    Out << "0-";
+
+  writeOperand(I.getOperand(0));
+
+  if (DstTy == Type::getInt1Ty(I.getContext()) &&
+      (I.getOpcode() == Instruction::Trunc ||
+       I.getOpcode() == Instruction::FPToUI ||
+       I.getOpcode() == Instruction::FPToSI ||
+       I.getOpcode() == Instruction::PtrToInt)) {
+    // Make sure we really get a trunc to bool by anding the operand with 1
+    Out << "&1u";
+  }
+  Out << ')';
+}
+
+void CWriter::visitSelectInst(SelectInst &I) {
+  Out << "((";
+  writeOperand(I.getCondition());
+  Out << ") ? (";
+  writeOperand(I.getTrueValue());
+  Out << ") : (";
+  writeOperand(I.getFalseValue());
+  Out << "))";
+}
+
+
+void CWriter::lowerIntrinsics(Function &F) {
+  // This is used to keep track of intrinsics that get generated to a lowered
+  // function. We must generate the prototypes before the function body which
+  // will only be expanded on first use (by the loop below).
+  std::vector<Function*> prototypesToGen;
+
+  // Examine all the instructions in this function to find the intrinsics that
+  // need to be lowered.
+  for (Function::iterator BB = F.begin(), EE = F.end(); BB != EE; ++BB)
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
+      if (CallInst *CI = dyn_cast<CallInst>(I++))
+        if (Function *F = CI->getCalledFunction())
+          switch (F->getIntrinsicID()) {
+          case Intrinsic::not_intrinsic:
+          case Intrinsic::memory_barrier:
+          case Intrinsic::vastart:
+          case Intrinsic::vacopy:
+          case Intrinsic::vaend:
+          case Intrinsic::returnaddress:
+          case Intrinsic::frameaddress:
+          case Intrinsic::setjmp:
+          case Intrinsic::longjmp:
+          case Intrinsic::prefetch:
+          case Intrinsic::powi:
+          case Intrinsic::x86_sse_cmp_ss:
+          case Intrinsic::x86_sse_cmp_ps:
+          case Intrinsic::x86_sse2_cmp_sd:
+          case Intrinsic::x86_sse2_cmp_pd:
+          case Intrinsic::ppc_altivec_lvsl:
+              // We directly implement these intrinsics
+            break;
+          default:
+            // If this is an intrinsic that directly corresponds to a GCC
+            // builtin, we handle it.
+            const char *BuiltinName = "";
+#define GET_GCC_BUILTIN_NAME
+#include "llvm/Intrinsics.gen"
+#undef GET_GCC_BUILTIN_NAME
+            // If we handle it, don't lower it.
+            if (BuiltinName[0]) break;
+
+            // All other intrinsic calls we must lower.
+            Instruction *Before = 0;
+            if (CI != &BB->front())
+              Before = prior(BasicBlock::iterator(CI));
+
+            IL->LowerIntrinsicCall(CI);
+            if (Before) {        // Move iterator to instruction after call
+              I = Before; ++I;
+            } else {
+              I = BB->begin();
+            }
+            // If the intrinsic got lowered to another call, and that call has
+            // a definition then we need to make sure its prototype is emitted
+            // before any calls to it.
+            if (CallInst *Call = dyn_cast<CallInst>(I))
+              if (Function *NewF = Call->getCalledFunction())
+                if (!NewF->isDeclaration())
+                  prototypesToGen.push_back(NewF);
+
+            break;
+          }
+
+  // We may have collected some prototypes to emit in the loop above.
+  // Emit them now, before the function that uses them is emitted. But,
+  // be careful not to emit them twice.
+  std::vector<Function*>::iterator I = prototypesToGen.begin();
+  std::vector<Function*>::iterator E = prototypesToGen.end();
+  for ( ; I != E; ++I) {
+    if (intrinsicPrototypesAlreadyGenerated.insert(*I).second) {
+      Out << '\n';
+      printFunctionSignature(*I, true);
+      Out << ";\n";
+    }
+  }
+}
+
+void CWriter::visitCallInst(CallInst &I) {
+  if (isa<InlineAsm>(I.getCalledValue()))
+    return visitInlineAsm(I);
+
+  bool WroteCallee = false;
+
+  // Handle intrinsic function calls first...
+  if (Function *F = I.getCalledFunction())
+    if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
+      if (visitBuiltinCall(I, ID, WroteCallee))
+        return;
+
+  Value *Callee = I.getCalledValue();
+
+  const PointerType  *PTy   = cast<PointerType>(Callee->getType());
+  const FunctionType *FTy   = cast<FunctionType>(PTy->getElementType());
+
+  // If this is a call to a struct-return function, assign to the first
+  // parameter instead of passing it to the call.
+  const AttrListPtr &PAL = I.getAttributes();
+  bool hasByVal = I.hasByValArgument();
+  bool isStructRet = I.hasStructRetAttr();
+  if (isStructRet) {
+    writeOperandDeref(I.getArgOperand(0));
+    Out << " = ";
+  }
+
+  if (I.isTailCall()) Out << " /*tail*/ ";
+
+  if (!WroteCallee) {
+    // If this is an indirect call to a struct return function, we need to cast
+    // the pointer. Ditto for indirect calls with byval arguments.
+    bool NeedsCast = (hasByVal || isStructRet) && !isa<Function>(Callee);
+
+    // GCC is a real PITA.  It does not permit codegening casts of functions to
+    // function pointers if they are in a call (it generates a trap instruction
+    // instead!).  We work around this by inserting a cast to void* in between
+    // the function and the function pointer cast.  Unfortunately, we can't just
+    // form the constant expression here, because the folder will immediately
+    // nuke it.
+    //
+    // Note finally, that this is completely unsafe.  ANSI C does not guarantee
+    // that void* and function pointers have the same size. :( To deal with this
+    // in the common case, we handle casts where the number of arguments passed
+    // match exactly.
+    //
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Callee))
+      if (CE->isCast())
+        if (Function *RF = dyn_cast<Function>(CE->getOperand(0))) {
+          NeedsCast = true;
+          Callee = RF;
+        }
+
+    if (NeedsCast) {
+      // Ok, just cast the pointer type.
+      Out << "((";
+      if (isStructRet)
+        printStructReturnPointerFunctionType(Out, PAL,
+                             cast<PointerType>(I.getCalledValue()->getType()));
+      else if (hasByVal)
+        printType(Out, I.getCalledValue()->getType(), false, "", true, PAL);
+      else
+        printType(Out, I.getCalledValue()->getType());
+      Out << ")(void*)";
+    }
+    writeOperand(Callee);
+    if (NeedsCast) Out << ')';
+  }
+
+  Out << '(';
+
+  bool PrintedArg = false;
+  if(FTy->isVarArg() && !FTy->getNumParams()) {
+    Out << "0 /*dummy arg*/";
+    PrintedArg = true;
+  }
+
+  unsigned NumDeclaredParams = FTy->getNumParams();
+  CallSite CS(&I);
+  CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+  unsigned ArgNo = 0;
+  if (isStructRet) {   // Skip struct return argument.
+    ++AI;
+    ++ArgNo;
+  }
+
+
+  for (; AI != AE; ++AI, ++ArgNo) {
+    if (PrintedArg) Out << ", ";
+    if (ArgNo < NumDeclaredParams &&
+        (*AI)->getType() != FTy->getParamType(ArgNo)) {
+      Out << '(';
+      printType(Out, FTy->getParamType(ArgNo),
+            /*isSigned=*/PAL.paramHasAttr(ArgNo+1, Attribute::SExt));
+      Out << ')';
+    }
+    // Check if the argument is expected to be passed by value.
+    if (I.paramHasAttr(ArgNo+1, Attribute::ByVal))
+      writeOperandDeref(*AI);
+    else
+      writeOperand(*AI);
+    PrintedArg = true;
+  }
+  Out << ')';
+}
+
+/// visitBuiltinCall - Handle the call to the specified builtin.  Returns true
+/// if the entire call is handled, return false if it wasn't handled, and
+/// optionally set 'WroteCallee' if the callee has already been printed out.
+bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
+                               bool &WroteCallee) {
+  switch (ID) {
+  default: {
+    // If this is an intrinsic that directly corresponds to a GCC
+    // builtin, we emit it here.
+    const char *BuiltinName = "";
+    Function *F = I.getCalledFunction();
+#define GET_GCC_BUILTIN_NAME
+#include "llvm/Intrinsics.gen"
+#undef GET_GCC_BUILTIN_NAME
+    assert(BuiltinName[0] && "Unknown LLVM intrinsic!");
+
+    Out << BuiltinName;
+    WroteCallee = true;
+    return false;
+  }
+  case Intrinsic::memory_barrier:
+    Out << "__sync_synchronize()";
+    return true;
+  case Intrinsic::vastart:
+    Out << "0; ";
+
+    Out << "va_start(*(va_list*)";
+    writeOperand(I.getArgOperand(0));
+    Out << ", ";
+    // Output the last argument to the enclosing function.
+    if (I.getParent()->getParent()->arg_empty())
+      Out << "vararg_dummy_arg";
+    else
+      writeOperand(--I.getParent()->getParent()->arg_end());
+    Out << ')';
+    return true;
+  case Intrinsic::vaend:
+    if (!isa<ConstantPointerNull>(I.getArgOperand(0))) {
+      Out << "0; va_end(*(va_list*)";
+      writeOperand(I.getArgOperand(0));
+      Out << ')';
+    } else {
+      Out << "va_end(*(va_list*)0)";
+    }
+    return true;
+  case Intrinsic::vacopy:
+    Out << "0; ";
+    Out << "va_copy(*(va_list*)";
+    writeOperand(I.getArgOperand(0));
+    Out << ", *(va_list*)";
+    writeOperand(I.getArgOperand(1));
+    Out << ')';
+    return true;
+  case Intrinsic::returnaddress:
+    Out << "__builtin_return_address(";
+    writeOperand(I.getArgOperand(0));
+    Out << ')';
+    return true;
+  case Intrinsic::frameaddress:
+    Out << "__builtin_frame_address(";
+    writeOperand(I.getArgOperand(0));
+    Out << ')';
+    return true;
+  case Intrinsic::powi:
+    Out << "__builtin_powi(";
+    writeOperand(I.getArgOperand(0));
+    Out << ", ";
+    writeOperand(I.getArgOperand(1));
+    Out << ')';
+    return true;
+  case Intrinsic::setjmp:
+    Out << "setjmp(*(jmp_buf*)";
+    writeOperand(I.getArgOperand(0));
+    Out << ')';
+    return true;
+  case Intrinsic::longjmp:
+    Out << "longjmp(*(jmp_buf*)";
+    writeOperand(I.getArgOperand(0));
+    Out << ", ";
+    writeOperand(I.getArgOperand(1));
+    Out << ')';
+    return true;
+  case Intrinsic::prefetch:
+    Out << "LLVM_PREFETCH((const void *)";
+    writeOperand(I.getArgOperand(0));
+    Out << ", ";
+    writeOperand(I.getArgOperand(1));
+    Out << ", ";
+    writeOperand(I.getArgOperand(2));
+    Out << ")";
+    return true;
+  case Intrinsic::stacksave:
+    // Emit this as: Val = 0; *((void**)&Val) = __builtin_stack_save()
+    // to work around GCC bugs (see PR1809).
+    Out << "0; *((void**)&" << GetValueName(&I)
+        << ") = __builtin_stack_save()";
+    return true;
+  case Intrinsic::x86_sse_cmp_ss:
+  case Intrinsic::x86_sse_cmp_ps:
+  case Intrinsic::x86_sse2_cmp_sd:
+  case Intrinsic::x86_sse2_cmp_pd:
+    Out << '(';
+    printType(Out, I.getType());
+    Out << ')';
+    // Multiple GCC builtins multiplex onto this intrinsic.
+    switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) {
+    default: llvm_unreachable("Invalid llvm.x86.sse.cmp!");
+    case 0: Out << "__builtin_ia32_cmpeq"; break;
+    case 1: Out << "__builtin_ia32_cmplt"; break;
+    case 2: Out << "__builtin_ia32_cmple"; break;
+    case 3: Out << "__builtin_ia32_cmpunord"; break;
+    case 4: Out << "__builtin_ia32_cmpneq"; break;
+    case 5: Out << "__builtin_ia32_cmpnlt"; break;
+    case 6: Out << "__builtin_ia32_cmpnle"; break;
+    case 7: Out << "__builtin_ia32_cmpord"; break;
+    }
+    if (ID == Intrinsic::x86_sse_cmp_ps || ID == Intrinsic::x86_sse2_cmp_pd)
+      Out << 'p';
+    else
+      Out << 's';
+    if (ID == Intrinsic::x86_sse_cmp_ss || ID == Intrinsic::x86_sse_cmp_ps)
+      Out << 's';
+    else
+      Out << 'd';
+
+    Out << "(";
+    writeOperand(I.getArgOperand(0));
+    Out << ", ";
+    writeOperand(I.getArgOperand(1));
+    Out << ")";
+    return true;
+  case Intrinsic::ppc_altivec_lvsl:
+    Out << '(';
+    printType(Out, I.getType());
+    Out << ')';
+    Out << "__builtin_altivec_lvsl(0, (void*)";
+    writeOperand(I.getArgOperand(0));
+    Out << ")";
+    return true;
+  }
+}
+
+//This converts the llvm constraint string to something gcc is expecting.
+//TODO: work out platform independent constraints and factor those out
+//      of the per target tables
+//      handle multiple constraint codes
+std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) {
+  assert(c.Codes.size() == 1 && "Too many asm constraint codes to handle");
+
+  // Grab the translation table from MCAsmInfo if it exists.
+  const MCAsmInfo *TargetAsm;
+  std::string Triple = TheModule->getTargetTriple();
+  if (Triple.empty())
+    Triple = llvm::sys::getHostTriple();
+
+  std::string E;
+  if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
+    TargetAsm = Match->createAsmInfo(Triple);
+  else
+    return c.Codes[0];
+
+  const char *const *table = TargetAsm->getAsmCBE();
+
+  // Search the translation table if it exists.
+  for (int i = 0; table && table[i]; i += 2)
+    if (c.Codes[0] == table[i]) {
+      delete TargetAsm;
+      return table[i+1];
+    }
+
+  // Default is identity.
+  delete TargetAsm;
+  return c.Codes[0];
+}
+
+//TODO: import logic from AsmPrinter.cpp
+static std::string gccifyAsm(std::string asmstr) {
+  for (std::string::size_type i = 0; i != asmstr.size(); ++i)
+    if (asmstr[i] == '\n')
+      asmstr.replace(i, 1, "\\n");
+    else if (asmstr[i] == '\t')
+      asmstr.replace(i, 1, "\\t");
+    else if (asmstr[i] == '$') {
+      if (asmstr[i + 1] == '{') {
+        std::string::size_type a = asmstr.find_first_of(':', i + 1);
+        std::string::size_type b = asmstr.find_first_of('}', i + 1);
+        std::string n = "%" +
+          asmstr.substr(a + 1, b - a - 1) +
+          asmstr.substr(i + 2, a - i - 2);
+        asmstr.replace(i, b - i + 1, n);
+        i += n.size() - 1;
+      } else
+        asmstr.replace(i, 1, "%");
+    }
+    else if (asmstr[i] == '%')//grr
+      { asmstr.replace(i, 1, "%%"); ++i;}
+
+  return asmstr;
+}
+
+//TODO: assumptions about what consume arguments from the call are likely wrong
+//      handle communitivity
+void CWriter::visitInlineAsm(CallInst &CI) {
+  InlineAsm* as = cast<InlineAsm>(CI.getCalledValue());
+  InlineAsm::ConstraintInfoVector Constraints = as->ParseConstraints();
+
+  std::vector<std::pair<Value*, int> > ResultVals;
+  if (CI.getType() == Type::getVoidTy(CI.getContext()))
+    ;
+  else if (const StructType *ST = dyn_cast<StructType>(CI.getType())) {
+    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
+      ResultVals.push_back(std::make_pair(&CI, (int)i));
+  } else {
+    ResultVals.push_back(std::make_pair(&CI, -1));
+  }
+
+  // Fix up the asm string for gcc and emit it.
+  Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n";
+  Out << "        :";
+
+  unsigned ValueCount = 0;
+  bool IsFirst = true;
+
+  // Convert over all the output constraints.
+  for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
+       E = Constraints.end(); I != E; ++I) {
+
+    if (I->Type != InlineAsm::isOutput) {
+      ++ValueCount;
+      continue;  // Ignore non-output constraints.
+    }
+
+    assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+    std::string C = InterpretASMConstraint(*I);
+    if (C.empty()) continue;
+
+    if (!IsFirst) {
+      Out << ", ";
+      IsFirst = false;
+    }
+
+    // Unpack the dest.
+    Value *DestVal;
+    int DestValNo = -1;
+
+    if (ValueCount < ResultVals.size()) {
+      DestVal = ResultVals[ValueCount].first;
+      DestValNo = ResultVals[ValueCount].second;
+    } else
+      DestVal = CI.getArgOperand(ValueCount-ResultVals.size());
+
+    if (I->isEarlyClobber)
+      C = "&"+C;
+
+    Out << "\"=" << C << "\"(" << GetValueName(DestVal);
+    if (DestValNo != -1)
+      Out << ".field" << DestValNo; // Multiple retvals.
+    Out << ")";
+    ++ValueCount;
+  }
+
+
+  // Convert over all the input constraints.
+  Out << "\n        :";
+  IsFirst = true;
+  ValueCount = 0;
+  for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
+       E = Constraints.end(); I != E; ++I) {
+    if (I->Type != InlineAsm::isInput) {
+      ++ValueCount;
+      continue;  // Ignore non-input constraints.
+    }
+
+    assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+    std::string C = InterpretASMConstraint(*I);
+    if (C.empty()) continue;
+
+    if (!IsFirst) {
+      Out << ", ";
+      IsFirst = false;
+    }
+
+    assert(ValueCount >= ResultVals.size() && "Input can't refer to result");
+    Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size());
+
+    Out << "\"" << C << "\"(";
+    if (!I->isIndirect)
+      writeOperand(SrcVal);
+    else
+      writeOperandDeref(SrcVal);
+    Out << ")";
+  }
+
+  // Convert over the clobber constraints.
+  IsFirst = true;
+  for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
+       E = Constraints.end(); I != E; ++I) {
+    if (I->Type != InlineAsm::isClobber)
+      continue;  // Ignore non-input constraints.
+
+    assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
+    std::string C = InterpretASMConstraint(*I);
+    if (C.empty()) continue;
+
+    if (!IsFirst) {
+      Out << ", ";
+      IsFirst = false;
+    }
+
+    Out << '\"' << C << '"';
+  }
+
+  Out << ")";
+}
+
+void CWriter::visitAllocaInst(AllocaInst &I) {
+  Out << '(';
+  printType(Out, I.getType());
+  Out << ") alloca(sizeof(";
+  printType(Out, I.getType()->getElementType());
+  Out << ')';
+  if (I.isArrayAllocation()) {
+    Out << " * " ;
+    writeOperand(I.getOperand(0));
+  }
+  Out << ')';
+}
+
+void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
+                                 gep_type_iterator E, bool Static) {
+
+  // If there are no indices, just print out the pointer.
+  if (I == E) {
+    writeOperand(Ptr);
+    return;
+  }
+
+  // Find out if the last index is into a vector.  If so, we have to print this
+  // specially.  Since vectors can't have elements of indexable type, only the
+  // last index could possibly be of a vector element.
+  const VectorType *LastIndexIsVector = 0;
+  {
+    for (gep_type_iterator TmpI = I; TmpI != E; ++TmpI)
+      LastIndexIsVector = dyn_cast<VectorType>(*TmpI);
+  }
+
+  Out << "(";
+
+  // If the last index is into a vector, we can't print it as &a[i][j] because
+  // we can't index into a vector with j in GCC.  Instead, emit this as
+  // (((float*)&a[i])+j)
+  if (LastIndexIsVector) {
+    Out << "((";
+    printType(Out, PointerType::getUnqual(LastIndexIsVector->getElementType()));
+    Out << ")(";
+  }
+
+  Out << '&';
+
+  // If the first index is 0 (very typical) we can do a number of
+  // simplifications to clean up the code.
+  Value *FirstOp = I.getOperand();
+  if (!isa<Constant>(FirstOp) || !cast<Constant>(FirstOp)->isNullValue()) {
+    // First index isn't simple, print it the hard way.
+    writeOperand(Ptr);
+  } else {
+    ++I;  // Skip the zero index.
+
+    // Okay, emit the first operand. If Ptr is something that is already address
+    // exposed, like a global, avoid emitting (&foo)[0], just emit foo instead.
+    if (isAddressExposed(Ptr)) {
+      writeOperandInternal(Ptr, Static);
+    } else if (I != E && (*I)->isStructTy()) {
+      // If we didn't already emit the first operand, see if we can print it as
+      // P->f instead of "P[0].f"
+      writeOperand(Ptr);
+      Out << "->field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
+      ++I;  // eat the struct index as well.
+    } else {
+      // Instead of emitting P[0][1], emit (*P)[1], which is more idiomatic.
+      Out << "(*";
+      writeOperand(Ptr);
+      Out << ")";
+    }
+  }
+
+  for (; I != E; ++I) {
+    if ((*I)->isStructTy()) {
+      Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
+    } else if ((*I)->isArrayTy()) {
+      Out << ".array[";
+      writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+      Out << ']';
+    } else if (!(*I)->isVectorTy()) {
+      Out << '[';
+      writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+      Out << ']';
+    } else {
+      // If the last index is into a vector, then print it out as "+j)".  This
+      // works with the 'LastIndexIsVector' code above.
+      if (isa<Constant>(I.getOperand()) &&
+          cast<Constant>(I.getOperand())->isNullValue()) {
+        Out << "))";  // avoid "+0".
+      } else {
+        Out << ")+(";
+        writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
+        Out << "))";
+      }
+    }
+  }
+  Out << ")";
+}
+
+void CWriter::writeMemoryAccess(Value *Operand, const Type *OperandType,
+                                bool IsVolatile, unsigned Alignment) {
+
+  bool IsUnaligned = Alignment &&
+    Alignment < TD->getABITypeAlignment(OperandType);
+
+  if (!IsUnaligned)
+    Out << '*';
+  if (IsVolatile || IsUnaligned) {
+    Out << "((";
+    if (IsUnaligned)
+      Out << "struct __attribute__ ((packed, aligned(" << Alignment << "))) {";
+    printType(Out, OperandType, false, IsUnaligned ? "data" : "volatile*");
+    if (IsUnaligned) {
+      Out << "; } ";
+      if (IsVolatile) Out << "volatile ";
+      Out << "*";
+    }
+    Out << ")";
+  }
+
+  writeOperand(Operand);
+
+  if (IsVolatile || IsUnaligned) {
+    Out << ')';
+    if (IsUnaligned)
+      Out << "->data";
+  }
+}
+
+void CWriter::visitLoadInst(LoadInst &I) {
+  writeMemoryAccess(I.getOperand(0), I.getType(), I.isVolatile(),
+                    I.getAlignment());
+
+}
+
+void CWriter::visitStoreInst(StoreInst &I) {
+  writeMemoryAccess(I.getPointerOperand(), I.getOperand(0)->getType(),
+                    I.isVolatile(), I.getAlignment());
+  Out << " = ";
+  Value *Operand = I.getOperand(0);
+  Constant *BitMask = 0;
+  if (const IntegerType* ITy = dyn_cast<IntegerType>(Operand->getType()))
+    if (!ITy->isPowerOf2ByteWidth())
+      // We have a bit width that doesn't match an even power-of-2 byte
+      // size. Consequently we must & the value with the type's bit mask
+      BitMask = ConstantInt::get(ITy, ITy->getBitMask());
+  if (BitMask)
+    Out << "((";
+  writeOperand(Operand);
+  if (BitMask) {
+    Out << ") & ";
+    printConstant(BitMask, false);
+    Out << ")";
+  }
+}
+
+void CWriter::visitGetElementPtrInst(GetElementPtrInst &I) {
+  printGEPExpression(I.getPointerOperand(), gep_type_begin(I),
+                     gep_type_end(I), false);
+}
+
+void CWriter::visitVAArgInst(VAArgInst &I) {
+  Out << "va_arg(*(va_list*)";
+  writeOperand(I.getOperand(0));
+  Out << ", ";
+  printType(Out, I.getType());
+  Out << ");\n ";
+}
+
+void CWriter::visitInsertElementInst(InsertElementInst &I) {
+  const Type *EltTy = I.getType()->getElementType();
+  writeOperand(I.getOperand(0));
+  Out << ";\n  ";
+  Out << "((";
+  printType(Out, PointerType::getUnqual(EltTy));
+  Out << ")(&" << GetValueName(&I) << "))[";
+  writeOperand(I.getOperand(2));
+  Out << "] = (";
+  writeOperand(I.getOperand(1));
+  Out << ")";
+}
+
+void CWriter::visitExtractElementInst(ExtractElementInst &I) {
+  // We know that our operand is not inlined.
+  Out << "((";
+  const Type *EltTy =
+    cast<VectorType>(I.getOperand(0)->getType())->getElementType();
+  printType(Out, PointerType::getUnqual(EltTy));
+  Out << ")(&" << GetValueName(I.getOperand(0)) << "))[";
+  writeOperand(I.getOperand(1));
+  Out << "]";
+}
+
+void CWriter::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+  Out << "(";
+  printType(Out, SVI.getType());
+  Out << "){ ";
+  const VectorType *VT = SVI.getType();
+  unsigned NumElts = VT->getNumElements();
+  const Type *EltTy = VT->getElementType();
+
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (i) Out << ", ";
+    int SrcVal = SVI.getMaskValue(i);
+    if ((unsigned)SrcVal >= NumElts*2) {
+      Out << " 0/*undef*/ ";
+    } else {
+      Value *Op = SVI.getOperand((unsigned)SrcVal >= NumElts);
+      if (isa<Instruction>(Op)) {
+        // Do an extractelement of this value from the appropriate input.
+        Out << "((";
+        printType(Out, PointerType::getUnqual(EltTy));
+        Out << ")(&" << GetValueName(Op)
+            << "))[" << (SrcVal & (NumElts-1)) << "]";
+      } else if (isa<ConstantAggregateZero>(Op) || isa<UndefValue>(Op)) {
+        Out << "0";
+      } else {
+        printConstant(cast<ConstantVector>(Op)->getOperand(SrcVal &
+                                                           (NumElts-1)),
+                      false);
+      }
+    }
+  }
+  Out << "}";
+}
+
+void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
+  // Start by copying the entire aggregate value into the result variable.
+  writeOperand(IVI.getOperand(0));
+  Out << ";\n  ";
+
+  // Then do the insert to update the field.
+  Out << GetValueName(&IVI);
+  for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end();
+       i != e; ++i) {
+    const Type *IndexedTy =
+      ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), b, i+1);
+    if (IndexedTy->isArrayTy())
+      Out << ".array[" << *i << "]";
+    else
+      Out << ".field" << *i;
+  }
+  Out << " = ";
+  writeOperand(IVI.getOperand(1));
+}
+
+void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
+  Out << "(";
+  if (isa<UndefValue>(EVI.getOperand(0))) {
+    Out << "(";
+    printType(Out, EVI.getType());
+    Out << ") 0/*UNDEF*/";
+  } else {
+    Out << GetValueName(EVI.getOperand(0));
+    for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end();
+         i != e; ++i) {
+      const Type *IndexedTy =
+        ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), b, i+1);
+      if (IndexedTy->isArrayTy())
+        Out << ".array[" << *i << "]";
+      else
+        Out << ".field" << *i;
+    }
+  }
+  Out << ")";
+}
+
+//===----------------------------------------------------------------------===//
+//                       External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+                                         formatted_raw_ostream &o,
+                                         CodeGenFileType FileType,
+                                         CodeGenOpt::Level OptLevel,
+                                         bool DisableVerify) {
+  if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
+
+  PM.add(createGCLoweringPass());
+  PM.add(createLowerInvokePass());
+  PM.add(createCFGSimplificationPass());   // clean up after lower invoke.
+  PM.add(new CBackendNameAllUsedStructsAndMergeFunctions());
+  PM.add(new CWriter(o));
+  PM.add(createGCInfoDeleter());
+  return false;
+}
diff --git a/final/lib/Target/CBackend/CMakeLists.txt b/final/lib/Target/CBackend/CMakeLists.txt
new file mode 100644
index 00000000000..a23ff852970
--- /dev/null
+++ b/final/lib/Target/CBackend/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_target(CBackend
+  CBackend.cpp
+  )
+
+add_subdirectory(TargetInfo)
diff --git a/final/lib/Target/CBackend/CTargetMachine.h b/final/lib/Target/CBackend/CTargetMachine.h
new file mode 100644
index 00000000000..6fed1959ff6
--- /dev/null
+++ b/final/lib/Target/CBackend/CTargetMachine.h
@@ -0,0 +1,40 @@
+//===-- CTargetMachine.h - TargetMachine for the C backend ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TargetMachine that is used by the C backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CTARGETMACHINE_H
+#define CTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+struct CTargetMachine : public TargetMachine {
+  CTargetMachine(const Target &T, const std::string &TT, const std::string &FS)
+    : TargetMachine(T) {}
+
+  virtual bool addPassesToEmitFile(PassManagerBase &PM,
+                                   formatted_raw_ostream &Out,
+                                   CodeGenFileType FileType,
+                                   CodeGenOpt::Level OptLevel,
+                                   bool DisableVerify);
+  
+  virtual const TargetData *getTargetData() const { return 0; }
+};
+
+extern Target TheCBackendTarget;
+
+} // End llvm namespace
+
+
+#endif
diff --git a/final/lib/Target/CBackend/Makefile b/final/lib/Target/CBackend/Makefile
new file mode 100644
index 00000000000..621948a9f4a
--- /dev/null
+++ b/final/lib/Target/CBackend/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/CBackend/Makefile ------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMCBackend
+DIRS = TargetInfo
+
+include $(LEVEL)/Makefile.common
+
+CompileCommonOpts += -Wno-format
diff --git a/final/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp b/final/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
new file mode 100644
index 00000000000..f7e8ff25484
--- /dev/null
+++ b/final/lib/Target/CBackend/TargetInfo/CBackendTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- CBackendTargetInfo.cpp - CBackend Target Implementation -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCBackendTarget;
+
+extern "C" void LLVMInitializeCBackendTargetInfo() { 
+  RegisterTarget<> X(TheCBackendTarget, "c", "C backend");
+}
diff --git a/final/lib/Target/CBackend/TargetInfo/CMakeLists.txt b/final/lib/Target/CBackend/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..5b35fa7c065
--- /dev/null
+++ b/final/lib/Target/CBackend/TargetInfo/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMCBackendInfo
+  CBackendTargetInfo.cpp
+  )
+
diff --git a/final/lib/Target/CBackend/TargetInfo/Makefile b/final/lib/Target/CBackend/TargetInfo/Makefile
new file mode 100644
index 00000000000..d4d5e15b40b
--- /dev/null
+++ b/final/lib/Target/CBackend/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/CBackend/TargetInfo/Makefile -------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMCBackendInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/CMakeLists.txt b/final/lib/Target/CMakeLists.txt
new file mode 100644
index 00000000000..09b48ce632f
--- /dev/null
+++ b/final/lib/Target/CMakeLists.txt
@@ -0,0 +1,57 @@
+add_llvm_library(LLVMTarget
+  Mangler.cpp
+  SubtargetFeature.cpp
+  Target.cpp
+  TargetAsmInfo.cpp
+  TargetAsmLexer.cpp
+  TargetData.cpp
+  TargetELFWriterInfo.cpp
+  TargetFrameLowering.cpp
+  TargetInstrInfo.cpp
+  TargetIntrinsicInfo.cpp
+  TargetLibraryInfo.cpp
+  TargetLoweringObjectFile.cpp
+  TargetMachine.cpp
+  TargetRegisterInfo.cpp
+  TargetSubtarget.cpp
+  )
+
+set(LLVM_ENUM_ASM_PRINTERS "")
+set(LLVM_ENUM_ASM_PARSERS "")
+set(LLVM_ENUM_DISASSEMBLERS "")
+foreach(t ${LLVM_TARGETS_TO_BUILD})
+  message(STATUS "Targeting ${t}")
+  add_subdirectory(${t})
+  set( td ${LLVM_MAIN_SRC_DIR}/lib/Target/${t} )
+  file(GLOB asmp_file "${td}/*AsmPrinter.cpp")
+  if( asmp_file )
+    set(LLVM_ENUM_ASM_PRINTERS
+      "${LLVM_ENUM_ASM_PRINTERS}LLVM_ASM_PRINTER(${t})\n")
+  endif()
+  if( EXISTS ${td}/AsmParser/CMakeLists.txt )
+    set(LLVM_ENUM_ASM_PARSERS
+      "${LLVM_ENUM_ASM_PARSERS}LLVM_ASM_PARSER(${t})\n")
+  endif()
+  if( EXISTS ${td}/Disassembler/CMakeLists.txt )
+    set(LLVM_ENUM_DISASSEMBLERS
+      "${LLVM_ENUM_DISASSEMBLERS}LLVM_DISASSEMBLER(${t})\n")
+  endif()
+endforeach(t)
+
+# Produce llvm/Config/AsmPrinters.def
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmPrinters.def.in
+  ${LLVM_BINARY_DIR}/include/llvm/Config/AsmPrinters.def
+  )
+
+# Produce llvm/Config/AsmParsers.def
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmParsers.def.in
+  ${LLVM_BINARY_DIR}/include/llvm/Config/AsmParsers.def
+  )
+
+# Produce llvm/Config/Disassemblers.def
+configure_file(
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Disassemblers.def.in
+  ${LLVM_BINARY_DIR}/include/llvm/Config/Disassemblers.def
+  )
diff --git a/final/lib/Target/CellSPU/CMakeLists.txt b/final/lib/Target/CellSPU/CMakeLists.txt
new file mode 100644
index 00000000000..a2a2ef1aa9a
--- /dev/null
+++ b/final/lib/Target/CellSPU/CMakeLists.txt
@@ -0,0 +1,29 @@
+set(LLVM_TARGET_DEFINITIONS SPU.td)
+
+tablegen(SPUGenInstrNames.inc -gen-instr-enums)
+tablegen(SPUGenRegisterNames.inc -gen-register-enums)
+tablegen(SPUGenAsmWriter.inc -gen-asm-writer)
+tablegen(SPUGenCodeEmitter.inc -gen-emitter)
+tablegen(SPUGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(SPUGenRegisterInfo.inc -gen-register-desc)
+tablegen(SPUGenInstrInfo.inc -gen-instr-desc)
+tablegen(SPUGenDAGISel.inc -gen-dag-isel)
+tablegen(SPUGenSubtarget.inc -gen-subtarget)
+tablegen(SPUGenCallingConv.inc -gen-callingconv)
+
+add_llvm_target(CellSPUCodeGen
+  SPUAsmPrinter.cpp
+  SPUHazardRecognizers.cpp
+  SPUInstrInfo.cpp
+  SPUISelDAGToDAG.cpp
+  SPUISelLowering.cpp
+  SPUFrameLowering.cpp
+  SPUMCAsmInfo.cpp
+  SPURegisterInfo.cpp
+  SPUSubtarget.cpp
+  SPUTargetMachine.cpp
+  SPUSelectionDAGInfo.cpp
+  SPUNopFiller.cpp
+  )
+
+add_subdirectory(TargetInfo)
diff --git a/final/lib/Target/CellSPU/CellSDKIntrinsics.td b/final/lib/Target/CellSPU/CellSDKIntrinsics.td
new file mode 100644
index 00000000000..9468aee067a
--- /dev/null
+++ b/final/lib/Target/CellSPU/CellSDKIntrinsics.td
@@ -0,0 +1,449 @@
+//===-- CellSDKIntrinsics.td - Cell SDK Intrinsics ---------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+///--==-- Arithmetic ops intrinsics --==--
+def CellSDKah:
+    RR_Int_v8i16<0b00010011000, "ah", IntegerOp, int_spu_si_ah>;
+def CellSDKahi:
+    RI10_Int_v8i16<0b00010011000, "ahi", IntegerOp, int_spu_si_ahi>;
+def CellSDKa:
+    RR_Int_v4i32<0b00000011000, "a", IntegerOp, int_spu_si_a>;
+def CellSDKai:
+    RI10_Int_v4i32<0b00111000, "ai", IntegerOp, int_spu_si_ai>;
+def CellSDKsfh:
+    RR_Int_v8i16<0b00010010000, "sfh", IntegerOp, int_spu_si_sfh>;
+def CellSDKsfhi:
+    RI10_Int_v8i16<0b10110000, "sfhi", IntegerOp, int_spu_si_sfhi>;
+def CellSDKsf:
+    RR_Int_v4i32<0b00000010000, "sf", IntegerOp, int_spu_si_sf>;
+def CellSDKsfi:
+    RI10_Int_v4i32<0b00110000, "sfi", IntegerOp, int_spu_si_sfi>;
+def CellSDKaddx:
+    RR_Int_v4i32<0b00000010110, "addx", IntegerOp, int_spu_si_addx>;
+def CellSDKcg:
+    RR_Int_v4i32<0b0100001100, "cg", IntegerOp, int_spu_si_cg>;
+def CellSDKcgx:
+    RR_Int_v4i32<0b01000010110, "cgx", IntegerOp, int_spu_si_cgx>;
+def CellSDKsfx:
+    RR_Int_v4i32<0b10000010110, "sfx", IntegerOp, int_spu_si_sfx>;
+def CellSDKbg:
+    RR_Int_v4i32<0b01000010000, "bg", IntegerOp, int_spu_si_bg>;
+def CellSDKbgx:
+    RR_Int_v4i32<0b11000010110, "bgx", IntegerOp, int_spu_si_bgx>;
+
+def CellSDKmpy:
+    RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "mpy $rT, $rA, $rB", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpy (v8i16 VECREG:$rA),
+                                                (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyu:
+    RRForm<0b00110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "mpyu $rT, $rA, $rB", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyu (v8i16 VECREG:$rA),
+                                                 (v8i16 VECREG:$rB)))] >;
+
+def CellSDKmpyi:
+    RI10Form<0b00101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+      "mpyi $rT, $rA, $val", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyi (v8i16 VECREG:$rA),
+                                                 i16ImmSExt10:$val))]>;
+
+def CellSDKmpyui:
+    RI10Form<0b10101110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+      "mpyui $rT, $rA, $val", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyui (v8i16 VECREG:$rA),
+                                                  i16ImmSExt10:$val))]>;
+
+def CellSDKmpya:
+    RRRForm<0b0011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "mpya $rT, $rA, $rB, $rC", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpya (v8i16 VECREG:$rA),
+                                                 (v8i16 VECREG:$rB),
+                                                 (v8i16 VECREG:$rC)))]>;
+
+def CellSDKmpyh:
+    RRForm<0b10100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "mpyh $rT, $rA, $rB", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyh (v4i32 VECREG:$rA),
+                                                 (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpys:
+    RRForm<0b11100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "mpys $rT, $rA, $rB", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpys (v8i16 VECREG:$rA),
+                                                 (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhh:
+    RRForm<0b01100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "mpyhh $rT, $rA, $rB", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhh (v8i16 VECREG:$rA),
+                                                  (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhha:
+    RRForm<0b01100010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "mpyhha $rT, $rA, $rB", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhha (v8i16 VECREG:$rA),
+                                                   (v8i16 VECREG:$rB)))]>;
+
+// Not sure how to match a (set $rT, (add $rT (mpyhh $rA, $rB)))... so leave
+// as an intrinsic for the time being
+def CellSDKmpyhhu:
+    RRForm<0b01110011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "mpyhhu $rT, $rA, $rB", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhu (v8i16 VECREG:$rA),
+                                                   (v8i16 VECREG:$rB)))]>;
+
+def CellSDKmpyhhau:
+    RRForm<0b01110010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "mpyhhau $rT, $rA, $rB", IntegerMulDiv,
+      [(set (v4i32 VECREG:$rT), (int_spu_si_mpyhhau (v8i16 VECREG:$rA),
+                                                    (v8i16 VECREG:$rB)))]>;
+
+def CellSDKand:
+        RRForm<0b1000011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          "and\t $rT, $rA, $rB", IntegerOp,
+          [(set (v4i32 VECREG:$rT),
+                (int_spu_si_and (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKandc:
+        RRForm<0b10000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          "andc\t $rT, $rA, $rB", IntegerOp,
+          [(set (v4i32 VECREG:$rT),
+                (int_spu_si_andc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKandbi:
+     RI10Form<0b01101000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+       "andbi\t $rT, $rA, $val", BranchResolv,
+       [(set (v16i8 VECREG:$rT),
+             (int_spu_si_andbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKandhi:
+     RI10Form<0b10101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+           "andhi\t $rT, $rA, $val", BranchResolv,
+       [(set (v8i16 VECREG:$rT),
+             (int_spu_si_andhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKandi:
+     RI10Form<0b00101000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+           "andi\t $rT, $rA, $val", BranchResolv,
+       [(set (v4i32 VECREG:$rT),
+             (int_spu_si_andi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKor:
+        RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          "or\t $rT, $rA, $rB", IntegerOp,
+          [(set (v4i32 VECREG:$rT),
+                (int_spu_si_or (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKorc:
+        RRForm<0b10010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          "addc\t $rT, $rA, $rB", IntegerOp,
+          [(set (v4i32 VECREG:$rT),
+                (int_spu_si_orc (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKorbi:
+     RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+       "orbi\t $rT, $rA, $val", BranchResolv,
+       [(set (v16i8 VECREG:$rT),
+             (int_spu_si_orbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKorhi:
+     RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+           "orhi\t $rT, $rA, $val", BranchResolv,
+       [(set (v8i16 VECREG:$rT),
+             (int_spu_si_orhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKori:
+     RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+           "ori\t $rT, $rA, $val", BranchResolv,
+       [(set (v4i32 VECREG:$rT),
+             (int_spu_si_ori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKxor:
+        RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          "xor\t $rT, $rA, $rB", IntegerOp,
+          [(set (v4i32 VECREG:$rT), 
+                (int_spu_si_xor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKxorbi:
+     RI10Form<0b01100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+       "xorbi\t $rT, $rA, $val", BranchResolv,
+       [(set (v16i8 VECREG:$rT), (int_spu_si_xorbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKxorhi:
+     RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+       "xorhi\t $rT, $rA, $val", BranchResolv,
+       [(set (v8i16 VECREG:$rT), 
+             (int_spu_si_xorhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKxori:
+     RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+           "xori\t $rT, $rA, $val", BranchResolv,
+       [(set (v4i32 VECREG:$rT), 
+             (int_spu_si_xori (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKnor:
+        RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          "nor\t $rT, $rA, $rB", IntegerOp,
+          [(set (v4i32 VECREG:$rT), 
+                (int_spu_si_nor (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKnand:
+        RRForm<0b10000010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          "nand\t $rT, $rA, $rB", IntegerOp,
+          [(set (v4i32 VECREG:$rT), 
+                (int_spu_si_nand (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+//===----------------------------------------------------------------------===//
+// Shift/rotate intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKshli:
+  Pat<(int_spu_si_shli (v4i32 VECREG:$rA), uimm7:$val),
+      (SHLIv4i32 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def CellSDKshlqbi:
+  Pat<(int_spu_si_shlqbi VECREG:$rA, R32C:$rB),
+      (SHLQBIv16i8 VECREG:$rA, R32C:$rB)>;
+
+def CellSDKshlqii:
+  Pat<(int_spu_si_shlqbii VECREG:$rA, uimm7:$val),
+      (SHLQBIIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def CellSDKshlqby:
+  Pat<(int_spu_si_shlqby VECREG:$rA, R32C:$rB),
+      (SHLQBYv16i8 VECREG:$rA, R32C:$rB)>;
+
+def CellSDKshlqbyi:
+  Pat<(int_spu_si_shlqbyi VECREG:$rA, uimm7:$val),
+      (SHLQBYIv16i8 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+          
+//===----------------------------------------------------------------------===//
+// Branch/compare intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKceq:
+  RRForm<0b00000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+        "ceq\t $rT, $rA, $rB", BranchResolv,
+        [(set (v4i32 VECREG:$rT), 
+              (int_spu_si_ceq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKceqi:
+  RI10Form<0b00111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+        "ceqi\t $rT, $rA, $val", BranchResolv,
+    [(set (v4i32 VECREG:$rT), 
+          (int_spu_si_ceqi (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKceqb:
+  RRForm<0b00001011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+        "ceqb\t $rT, $rA, $rB", BranchResolv,
+        [(set (v16i8 VECREG:$rT), 
+              (int_spu_si_ceqb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKceqbi:
+  RI10Form<0b01111110, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+        "ceqbi\t $rT, $rA, $val", BranchResolv,
+    [(set (v16i8 VECREG:$rT), (int_spu_si_ceqbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKceqh:
+  RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+        "ceqh\t $rT, $rA, $rB", BranchResolv,
+        [(set (v8i16 VECREG:$rT), 
+              (int_spu_si_ceqh (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKceqhi:
+  RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+        "ceqhi\t $rT, $rA, $val", BranchResolv,
+    [(set (v8i16 VECREG:$rT), 
+          (int_spu_si_ceqhi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+def CellSDKcgth:
+  RRForm<0b00010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "cgth\t $rT, $rA, $rB", BranchResolv,
+        [(set (v8i16 VECREG:$rT),
+              (int_spu_si_cgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKcgthi:
+  RI10Form<0b10111110, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+    "cgthi\t $rT, $rA, $val", BranchResolv,
+        [(set (v8i16 VECREG:$rT), 
+              (int_spu_si_cgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKcgt:
+  RRForm<0b00000010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "cgt\t $rT, $rA, $rB", BranchResolv,
+        [(set (v4i32 VECREG:$rT), 
+              (int_spu_si_cgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKcgti:
+  RI10Form<0b00110010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+    "cgti\t $rT, $rA, $val", BranchResolv,
+        [(set (v4i32 VECREG:$rT), 
+              (int_spu_si_cgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKcgtb:
+  RRForm<0b00001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "cgtb\t $rT, $rA, $rB", BranchResolv,
+        [(set (v16i8 VECREG:$rT), 
+              (int_spu_si_cgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKcgtbi:
+  RI10Form<0b01110010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+    "cgtbi\t $rT, $rA, $val", BranchResolv,
+        [(set (v16i8 VECREG:$rT), (int_spu_si_cgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+def CellSDKclgth:
+  RRForm<0b00010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "clgth\t $rT, $rA, $rB", BranchResolv,
+        [(set (v8i16 VECREG:$rT), 
+              (int_spu_si_clgth (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)))]>;
+
+def CellSDKclgthi:
+  RI10Form<0b10111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+    "clgthi\t $rT, $rA, $val", BranchResolv,
+        [(set (v8i16 VECREG:$rT), 
+              (int_spu_si_clgthi (v8i16 VECREG:$rA), i16ImmSExt10:$val))]>;
+
+def CellSDKclgt:
+  RRForm<0b00000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "clgt\t $rT, $rA, $rB", BranchResolv,
+        [(set (v4i32 VECREG:$rT), 
+              (int_spu_si_clgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+def CellSDKclgti:
+  RI10Form<0b00111010, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+    "clgti\t $rT, $rA, $val", BranchResolv,
+        [(set (v4i32 VECREG:$rT), 
+              (int_spu_si_clgti (v4i32 VECREG:$rA), i32ImmSExt10:$val))]>;
+
+def CellSDKclgtb:
+  RRForm<0b00001011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "clgtb\t $rT, $rA, $rB", BranchResolv,
+    [(set (v16i8 VECREG:$rT),
+          (int_spu_si_clgtb (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)))]>;
+
+def CellSDKclgtbi:
+  RI10Form<0b01111010, (outs VECREG:$rT), (ins VECREG:$rA, u10imm_i8:$val),
+    "clgtbi\t $rT, $rA, $val", BranchResolv,
+    [(set (v16i8 VECREG:$rT),
+          (int_spu_si_clgtbi (v16i8 VECREG:$rA), immU8:$val))]>;
+
+//===----------------------------------------------------------------------===//
+// Floating-point intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKfa:
+  RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "fa\t $rT, $rA, $rB", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fa (v4f32 VECREG:$rA),
+                                                 (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfs:
+  RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "fs\t $rT, $rA, $rB", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fs (v4f32 VECREG:$rA),
+                                                 (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfm:
+  RRForm<0b01100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "fm\t $rT, $rA, $rB", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fm (v4f32 VECREG:$rA),
+                                                 (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfceq:
+  RRForm<0b01000011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "fceq\t $rT, $rA, $rB", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fceq (v4f32 VECREG:$rA),
+                                                   (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcgt:
+  RRForm<0b01000011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "fcgt\t $rT, $rA, $rB", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fcgt (v4f32 VECREG:$rA),
+                                                   (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcmeq:
+  RRForm<0b01010011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "fcmeq\t $rT, $rA, $rB", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fcmeq (v4f32 VECREG:$rA),
+                                                    (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfcmgt:
+  RRForm<0b01010011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "fcmgt\t $rT, $rA, $rB", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fcmgt (v4f32 VECREG:$rA),
+                                                    (v4f32 VECREG:$rB)))]>;
+
+def CellSDKfma:
+  RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+    "fma\t $rT, $rA, $rB, $rC", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fma (v4f32 VECREG:$rA),
+                                                  (v4f32 VECREG:$rB),
+                                                  (v4f32 VECREG:$rC)))]>;
+
+def CellSDKfnms:
+  RRRForm<0b1011, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+    "fnms\t $rT, $rA, $rB, $rC", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fnms (v4f32 VECREG:$rA),
+                                                   (v4f32 VECREG:$rB),
+                                                   (v4f32 VECREG:$rC)))]>;
+
+def CellSDKfms:
+  RRRForm<0b1111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+    "fms\t $rT, $rA, $rB, $rC", SPrecFP,
+        [(set (v4f32 VECREG:$rT), (int_spu_si_fms (v4f32 VECREG:$rA),
+                                                  (v4f32 VECREG:$rB),
+                                                  (v4f32 VECREG:$rC)))]>;
+
+//===----------------------------------------------------------------------===//
+// Double precision floating-point intrinsics:
+//===----------------------------------------------------------------------===//
+
+def CellSDKdfa:
+  RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "dfa\t $rT, $rA, $rB", DPrecFP,
+        [(set (v2f64 VECREG:$rT), (int_spu_si_dfa (v2f64 VECREG:$rA),
+                                                  (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfs:
+  RRForm<0b10110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "dfs\t $rT, $rA, $rB", DPrecFP,
+        [(set (v2f64 VECREG:$rT), (int_spu_si_dfs (v2f64 VECREG:$rA),
+                                                  (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfm:
+  RRForm<0b01110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "dfm\t $rT, $rA, $rB", DPrecFP,
+        [(set (v2f64 VECREG:$rT), (int_spu_si_dfm (v2f64 VECREG:$rA),
+                                                  (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfma:
+  RRForm<0b00111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "dfma\t $rT, $rA, $rB", DPrecFP,
+        [(set (v2f64 VECREG:$rT), (int_spu_si_dfma (v2f64 VECREG:$rA),
+                                                   (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfnma:
+  RRForm<0b11111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "dfnma\t $rT, $rA, $rB", DPrecFP,
+        [(set (v2f64 VECREG:$rT), (int_spu_si_dfnma (v2f64 VECREG:$rA),
+                                                    (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfnms:
+  RRForm<0b01111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "dfnms\t $rT, $rA, $rB", DPrecFP,
+        [(set (v2f64 VECREG:$rT), (int_spu_si_dfnms (v2f64 VECREG:$rA),
+                                                    (v2f64 VECREG:$rB)))]>;
+
+def CellSDKdfms:
+  RRForm<0b10111010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "dfms\t $rT, $rA, $rB", DPrecFP,
+        [(set (v2f64 VECREG:$rT), (int_spu_si_dfms (v2f64 VECREG:$rA),
+                                                   (v2f64 VECREG:$rB)))]>;
diff --git a/final/lib/Target/CellSPU/Makefile b/final/lib/Target/CellSPU/Makefile
new file mode 100644
index 00000000000..77c66be9e85
--- /dev/null
+++ b/final/lib/Target/CellSPU/Makefile
@@ -0,0 +1,21 @@
+##===- lib/Target/CellSPU/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMCellSPUCodeGen
+TARGET = SPU
+BUILT_SOURCES = SPUGenInstrNames.inc SPUGenRegisterNames.inc \
+		SPUGenAsmWriter.inc SPUGenCodeEmitter.inc \
+		SPUGenRegisterInfo.h.inc SPUGenRegisterInfo.inc \
+		SPUGenInstrInfo.inc SPUGenDAGISel.inc \
+		SPUGenSubtarget.inc SPUGenCallingConv.inc
+
+DIRS = TargetInfo
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/CellSPU/README.txt b/final/lib/Target/CellSPU/README.txt
new file mode 100644
index 00000000000..3e7e0b68e8e
--- /dev/null
+++ b/final/lib/Target/CellSPU/README.txt
@@ -0,0 +1,92 @@
+//===- README.txt - Notes for improving CellSPU-specific code gen ---------===//
+
+This code was contributed by a team from the Computer Systems Research
+Department in The Aerospace Corporation:
+
+- Scott Michel (head bottle washer and much of the non-floating point
+  instructions)
+- Mark Thomas (floating point instructions)
+- Michael AuYeung (intrinsics)
+- Chandler Carruth (LLVM expertise)
+- Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise)
+
+Some minor fixes added by Kalle Raiskila.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR
+OTHERWISE.  IN NO EVENT SHALL THE AEROSPACE CORPORATION BE LIABLE FOR DAMAGES
+OF ANY KIND OR NATURE WHETHER BASED IN CONTRACT, TORT, OR OTHERWISE ARISING
+OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE INCLUDING, WITHOUT
+LIMITATION, DAMAGES RESULTING FROM LOST OR CONTAMINATED DATA, LOST PROFITS OR
+REVENUE, COMPUTER MALFUNCTION, OR FOR ANY SPECIAL, INCIDENTAL, CONSEQUENTIAL,
+OR PUNITIVE  DAMAGES, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES OR
+SUCH DAMAGES ARE FORESEEABLE.
+
+---------------------------------------------------------------------------
+--WARNING--:
+--WARNING--: The CellSPU work is work-in-progress and "alpha" quality code.
+--WARNING--:
+
+If you are brave enough to try this code or help to hack on it, be sure
+to add 'spu' to configure's --enable-targets option, e.g.:
+
+        ./configure <your_configure_flags_here> \
+           --enable-targets=x86,x86_64,powerpc,spu
+
+---------------------------------------------------------------------------
+
+TODO:
+* Create a machine pass for performing dual-pipeline scheduling specifically
+  for CellSPU, and insert branch prediction instructions as needed.
+
+* i32 instructions:
+
+  * i32 division (work-in-progress)
+
+* i64 support (see i64operations.c test harness):
+
+  * shifts and comparison operators: done
+  * sign and zero extension: done
+  * addition: done
+  * subtraction: needed
+  * multiplication: done
+
+* i128 support:
+
+  * zero extension, any extension: done
+  * sign extension: done
+  * arithmetic operators (add, sub, mul, div): needed
+  * logical operations (and, or, shl, srl, sra, xor, nor, nand): needed
+
+    * or: done
+
+* f64 support
+
+  * Comparison operators:
+    SETOEQ              unimplemented
+    SETOGT              unimplemented
+    SETOGE              unimplemented
+    SETOLT              unimplemented
+    SETOLE              unimplemented
+    SETONE              unimplemented
+    SETO                done (lowered)
+    SETUO               done (lowered)
+    SETUEQ              unimplemented
+    SETUGT              unimplemented
+    SETUGE              unimplemented
+    SETULT              unimplemented
+    SETULE              unimplemented
+    SETUNE              unimplemented
+
+* LLVM vector suport
+
+  * VSETCC needs to be implemented. It's pretty straightforward to code, but
+    needs implementation.
+
+* Intrinsics
+
+  * spu.h instrinsics added but not tested. Need to have an operational
+    llvm-spu-gcc in order to write a unit test harness.
+
+===-------------------------------------------------------------------------===
diff --git a/final/lib/Target/CellSPU/SPU.h b/final/lib/Target/CellSPU/SPU.h
new file mode 100644
index 00000000000..72f84300b2c
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPU.h
@@ -0,0 +1,35 @@
+//===-- SPU.h - Top-level interface for Cell SPU Target ----------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Cell SPU back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_IBMCELLSPU_H
+#define LLVM_TARGET_IBMCELLSPU_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class SPUTargetMachine;
+  class FunctionPass;
+  class formatted_raw_ostream;
+
+  FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
+  FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm);
+
+  extern Target TheCellSPUTarget;
+}
+
+// Defines symbolic names for the SPU instructions.
+//
+#include "SPUGenInstrNames.inc"
+
+#endif /* LLVM_TARGET_IBMCELLSPU_H */
diff --git a/final/lib/Target/CellSPU/SPU.td b/final/lib/Target/CellSPU/SPU.td
new file mode 100644
index 00000000000..8327fe03d7f
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPU.td
@@ -0,0 +1,66 @@
+//===- SPU.td - Describe the STI Cell SPU Target Machine ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the STI Cell SPU target machine.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "llvm/Target/Target.td"
+
+// Holder of code fragments (you'd think this'd already be in
+// a td file somewhere... :-)
+
+class CodeFrag<dag frag> {
+  dag Fragment = frag;
+}
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "SPURegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction formats, instructions
+//===----------------------------------------------------------------------===//
+
+include "SPUNodes.td"
+include "SPUOperands.td"
+include "SPUSchedule.td"
+include "SPUInstrFormats.td"
+include "SPUInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget features:
+//===----------------------------------------------------------------------===//
+
+def DefaultProc: SubtargetFeature<"", "ProcDirective", "SPU::DEFAULT_PROC", "">;
+def LargeMemFeature:
+  SubtargetFeature<"large_mem","UseLargeMem", "true",
+                   "Use large (>256) LSA memory addressing [default = false]">;
+
+def SPURev0 : Processor<"v0", SPUItineraries, [DefaultProc]>;
+
+//===----------------------------------------------------------------------===//
+// Calling convention:
+//===----------------------------------------------------------------------===//
+
+include "SPUCallingConv.td"
+
+// Target:
+
+def SPUInstrInfo : InstrInfo {
+  let isLittleEndianEncoding = 1;
+}
+
+def SPU : Target {
+  let InstructionSet = SPUInstrInfo;
+}
diff --git a/final/lib/Target/CellSPU/SPU128InstrInfo.td b/final/lib/Target/CellSPU/SPU128InstrInfo.td
new file mode 100644
index 00000000000..3031fda5438
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPU128InstrInfo.td
@@ -0,0 +1,41 @@
+//===--- SPU128InstrInfo.td - Cell SPU 128-bit operations -*- tablegen -*--===//
+//
+//                     Cell SPU 128-bit operations
+//
+//===----------------------------------------------------------------------===//
+                                  
+// zext 32->128: Zero extend 32-bit to 128-bit
+def : Pat<(i128 (zext R32C:$rSrc)),
+          (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
+
+// zext 64->128: Zero extend 64-bit to 128-bit
+def : Pat<(i128 (zext R64C:$rSrc)),
+          (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
+
+// zext 16->128: Zero extend 16-bit to 128-bit
+def : Pat<(i128 (zext R16C:$rSrc)),
+          (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
+
+// zext 8->128: Zero extend 8-bit to 128-bit
+def : Pat<(i128 (zext R8C:$rSrc)),
+          (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
+
+// anyext 32->128: Zero extend 32-bit to 128-bit
+def : Pat<(i128 (anyext R32C:$rSrc)),
+          (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
+
+// anyext 64->128: Zero extend 64-bit to 128-bit
+def : Pat<(i128 (anyext R64C:$rSrc)),
+          (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
+
+// anyext 16->128: Zero extend 16-bit to 128-bit
+def : Pat<(i128 (anyext R16C:$rSrc)),
+          (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
+
+// anyext 8->128: Zero extend 8-bit to 128-bit
+def : Pat<(i128 (anyext R8C:$rSrc)),
+          (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
+
+// Shift left
+def : Pat<(shl GPRC:$rA, R32C:$rB),
+          (SHLQBYBIr128 (SHLQBIr128 GPRC:$rA, R32C:$rB), R32C:$rB)>;
diff --git a/final/lib/Target/CellSPU/SPU64InstrInfo.td b/final/lib/Target/CellSPU/SPU64InstrInfo.td
new file mode 100644
index 00000000000..5ef5716bd8c
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -0,0 +1,408 @@
+//====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
+//
+//                     Cell SPU 64-bit operations
+//
+//===----------------------------------------------------------------------===//
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// 64-bit comparisons:
+//
+// 1. The instruction sequences for vector vice scalar differ by a
+//    constant. In the scalar case, we're only interested in the
+//    top two 32-bit slots, whereas we're interested in an exact
+//    all-four-slot match in the vector case.
+//
+// 2. There are no "immediate" forms, since loading 64-bit constants
+//    could be a constant pool load.
+//
+// 3. i64 setcc results are i32, which are subsequently converted to a FSM
+//    mask when used in a select pattern.
+//
+// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
+//    [Note: this may be moot, since gb produces v4i32 or r32.]
+//
+// 5. The code sequences for r64 and v2i64 are probably overly conservative,
+//    compared to the code that gcc produces.
+//
+// M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBr64_cond:
+  SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
+           [/* no pattern */]>;
+
+// The generic i64 select pattern, which assumes that the comparison result
+// is in a 32-bit register that contains a select mask pattern (i.e., gather
+// bits result):
+
+def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
+          (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
+
+// select the negative condition:
+class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
+  Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
+      (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
+
+// setcc the negative condition:
+class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
+  Pat<(cond R64C:$rA, R64C:$rB),
+      (XORIr32 compare.Fragment, -1)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// The i64 seteq fragment that does the scalar->vector conversion and
+// comparison:
+def CEQr64compare:
+    CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
+                                           (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>;
+
+// The i64 seteq fragment that does the vector comparison
+def CEQv2i64compare:
+    CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>;
+
+// i64 seteq (equality): the setcc result is i32, which is converted to a
+// vector FSM mask when used in a select pattern.
+//
+// v2i64 seteq (equality): the setcc result is v4i32
+multiclass CompareEqual64 {
+  // Plain old comparison, converts back to i32 scalar
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>;
+  def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>;
+
+  // SELB mask from FSM:
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CEQr64compare.Fragment), R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CEQv2i64compare.Fragment), R32C))>;
+}
+
+defm I64EQ: CompareEqual64;
+
+def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
+def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
+
+// i64 setne:
+def : I64SETCCNegCond<setne, I64EQr64>;
+def : I64SELECTNegCond<setne, I64EQr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setugt/setule:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CLGTr64ugt:
+    CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
+                        (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+
+def CLGTr64eq:
+    CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
+                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+    
+def CLGTr64compare:
+    CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
+                        (XSWDv2i64 CLGTr64ugt.Fragment),
+                        CLGTr64eq.Fragment)>;
+
+def CLGTv2i64ugt:
+    CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CLGTv2i64eq:
+    CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
+    
+def CLGTv2i64compare:
+    CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment,
+                        (XSWDv2i64 CLGTr64ugt.Fragment),
+                        CLGTv2i64eq.Fragment)>;
+
+multiclass CompareLogicalGreaterThan64 {
+  // Plain old comparison, converts back to i32 scalar
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>;
+  def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
+
+  // SELB mask from FSM:
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CLGTr64compare.Fragment), R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>;
+}
+
+defm I64LGT: CompareLogicalGreaterThan64;
+
+def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
+//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+//          I64LGTv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setule, I64LGTr64>;
+def : I64SELECTNegCond<setule, I64LGTr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setuge/setult:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CLGEr64compare:
+    CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment,
+                                          CLGTr64eq.Fragment)), 0xb)>;
+
+def CLGEv2i64compare:
+    CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment,
+                                          CLGTv2i64eq.Fragment)), 0xf)>;
+
+multiclass CompareLogicalGreaterEqual64 {
+  // Plain old comparison, converts back to i32 scalar
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>;
+  def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
+
+  // SELB mask from FSM:
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                           (FSMv4i32 CLGEr64compare.Fragment), R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                           (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>;
+}
+
+defm I64LGE: CompareLogicalGreaterEqual64;
+
+def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
+def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
+          I64LGEv2i64.Fragment>;
+                  
+
+// i64 setult:
+def : I64SETCCNegCond<setult, I64LGEr64>;
+def : I64SELECTNegCond<setult, I64LGEr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setgt/setle:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def CGTr64sgt:
+    CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
+                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+
+def CGTr64eq:
+    CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 
+                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
+    
+def CGTr64compare:
+    CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
+                        (XSWDv2i64 CGTr64sgt.Fragment),
+                        CGTr64eq.Fragment)>;
+
+def CGTv2i64sgt:
+    CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>;
+
+def CGTv2i64eq:
+    CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
+    
+def CGTv2i64compare:
+    CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment,
+                        (XSWDv2i64 CGTr64sgt.Fragment),
+                        CGTv2i64eq.Fragment)>;
+
+multiclass CompareGreaterThan64 {
+  // Plain old comparison, converts back to i32 scalar
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>;
+  def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
+
+  // SELB mask from FSM:
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                             (FSMv4i32 CGTr64compare.Fragment), R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 
+                               (FSMv4i32 CGTv2i64compare.Fragment), R32C))>;
+}
+
+defm I64GT: CompareLogicalGreaterThan64;
+
+def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
+//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+//                  I64GTv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setle, I64GTr64>;
+def : I64SELECTNegCond<setle, I64GTr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// i64 setge/setlt:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+    
+def CGEr64compare:
+    CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment,
+                                          CGTr64eq.Fragment)), 0xb)>;
+
+def CGEv2i64compare:
+    CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment,
+                                          CGTv2i64eq.Fragment)), 0xf)>;
+
+multiclass CompareGreaterEqual64 {
+  // Plain old comparison, converts back to i32 scalar
+  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>;
+  def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
+
+  // SELB mask from FSM:
+  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>;
+  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>;
+}
+
+defm I64GE: CompareGreaterEqual64;
+
+def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
+def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
+          I64GEv2i64.Fragment>;
+
+// i64 setult:
+def : I64SETCCNegCond<setlt, I64GEr64>;
+def : I64SELECTNegCond<setlt, I64GEr64>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 add
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_add_cg<dag lhs, dag rhs>:
+    CodeFrag<(CGv4i32 lhs, rhs)>;
+
+class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
+    CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
+
+class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
+    v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
+
+def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+           (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+                                  (COPY_TO_REGCLASS R64C:$rB, VECREG),
+                                  (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
+
+def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+                    (v4i32 VECREG:$rCGmask)),
+           v2i64_add<(v2i64 VECREG:$rA),
+                     (v2i64 VECREG:$rB),
+                     (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 subtraction
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
+
+class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
+    CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
+
+def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+           (COPY_TO_REGCLASS 
+               v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+                         (COPY_TO_REGCLASS R64C:$rB, VECREG),
+                         v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+                                      (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment,
+                                  (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
+
+def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+                    (v4i32 VECREG:$rCGmask)),
+           v2i64_sub<(v2i64 VECREG:$rA),
+                     (v2i64 VECREG:$rB),
+                     v2i64_sub_bg<(v2i64 VECREG:$rA),
+                                  (v2i64 VECREG:$rB)>.Fragment,
+                     (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v2i64, i64 multiply
+//
+// Note: i64 multiply is simply the vector->scalar conversion of the
+// full-on v2i64 multiply, since the entire vector has to be manipulated
+// anyway.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class v2i64_mul_ahi64<dag rA> :
+    CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_bhi64<dag rB> :
+    CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
+
+class v2i64_mul_alo64<dag rB> :
+    CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_blo64<dag rB> :
+    CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
+
+class v2i64_mul_ashlq2<dag rA>:
+    CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
+
+class v2i64_mul_ashlq4<dag rA>:
+    CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
+
+class v2i64_mul_bshlq2<dag rB> :
+    CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
+
+class v2i64_mul_bshlq4<dag rB> :
+    CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
+
+class v2i64_highprod<dag rA, dag rB>:
+    CodeFrag<(Av4i32
+                (Av4i32
+                  (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment,     // a1 x b3
+                             v2i64_mul_ahi64<rA>.Fragment),
+                  (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment,      // a0 x b3
+                             v2i64_mul_bshlq4<rB>.Fragment)),
+                (Av4i32
+                  (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
+                             v2i64_mul_ashlq4<rA>.Fragment),
+                  (Av4i32
+                      (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+                                 v2i64_mul_bhi64<rB>.Fragment),
+                    (Av4i32
+                      (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
+                                 v2i64_mul_bhi64<rB>.Fragment),
+                      (Av4i32
+                        (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+                                   v2i64_mul_bshlq2<rB>.Fragment),
+                        (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
+                                   v2i64_mul_bshlq2<rB>.Fragment))))))>;
+
+class v2i64_mul_a3_b3<dag rA, dag rB>:
+    CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
+                        v2i64_mul_blo64<rB>.Fragment)>;
+
+class v2i64_mul_a2_b3<dag rA, dag rB>:
+    CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+                          (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
+                                       v2i64_mul_bshlq2<rB>.Fragment), 0x2),
+                        (ILv4i32 0),
+                        (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_mul_a3_b2<dag rA, dag rB>:
+    CodeFrag<(SELBv4i32 (SHLQBYIv4i32
+                          (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
+                                       v2i64_mul_ashlq2<rA>.Fragment), 0x2),
+                        (ILv4i32 0),
+                        (FSMBIv4i32 0xc3c3))>;
+
+class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
+    v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
+                        v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
+              v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
+
+class v2i64_mul<dag rA, dag rB, dag rCGmask>:
+    v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
+              (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
+                         (ILv4i32 0),
+                         (FSMBIv4i32 0x0f0f)),
+              rCGmask>;
+
+def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
+          (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG),
+                                 (COPY_TO_REGCLASS R64C:$rB, VECREG),
+                                 (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
+
+def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+                    (v4i32 VECREG:$rCGmask)),
+          v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
+                    (v4i32 VECREG:$rCGmask)>.Fragment>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// f64 comparisons
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// selb instruction definition for i64. Note that the selection mask is
+// a vector, produced by various forms of FSM:
+def SELBf64_cond:
+   SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
+            [(set R64FP:$rT,
+                  (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;
diff --git a/final/lib/Target/CellSPU/SPUAsmPrinter.cpp b/final/lib/Target/CellSPU/SPUAsmPrinter.cpp
new file mode 100644
index 00000000000..fd96694b32f
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUAsmPrinter.cpp
@@ -0,0 +1,334 @@
+//===-- SPUAsmPrinter.cpp - Print machine instrs to Cell SPU assembly -------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to Cell SPU assembly language. This printer
+// is the output mechanism used by `llc'.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class SPUAsmPrinter : public AsmPrinter {
+  public:
+    explicit SPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) :
+      AsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "STI CBEA SPU Assembly Printer";
+    }
+
+    /// printInstruction - This method is automatically generated by tablegen
+    /// from the instruction set description.
+    void printInstruction(const MachineInstr *MI, raw_ostream &OS);
+    static const char *getRegisterName(unsigned RegNo);
+
+
+    void EmitInstruction(const MachineInstr *MI) {
+      SmallString<128> Str;
+      raw_svector_ostream OS(Str);
+      printInstruction(MI, OS);
+      OutStreamer.EmitRawText(OS.str());
+    }
+    void printOp(const MachineOperand &MO, raw_ostream &OS);
+
+    void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      const MachineOperand &MO = MI->getOperand(OpNo);
+      if (MO.isReg()) {
+        O << getRegisterName(MO.getReg());
+      } else if (MO.isImm()) {
+        O << MO.getImm();
+      } else {
+        printOp(MO, O);
+      }
+    }
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &O);
+
+
+    void
+    printU7ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      unsigned int value = MI->getOperand(OpNo).getImm();
+      assert(value < (1 << 8) && "Invalid u7 argument");
+      O << value;
+    }
+
+    void
+    printShufAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      char value = MI->getOperand(OpNo).getImm();
+      O << (int) value;
+      O << "(";
+      printOperand(MI, OpNo+1, O);
+      O << ")";
+    }
+
+    void
+    printS16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      O << (short) MI->getOperand(OpNo).getImm();
+    }
+
+    void
+    printU16ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      O << (unsigned short)MI->getOperand(OpNo).getImm();
+    }
+
+    void
+    printMemRegReg(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      // When used as the base register, r0 reads constant zero rather than
+      // the value contained in the register.  For this reason, the darwin
+      // assembler requires that we print r0 as 0 (no r) when used as the base.
+      const MachineOperand &MO = MI->getOperand(OpNo);
+      O << getRegisterName(MO.getReg()) << ", ";
+      printOperand(MI, OpNo+1, O);
+    }
+
+    void
+    printU18ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      unsigned int value = MI->getOperand(OpNo).getImm();
+      assert(value <= (1 << 19) - 1 && "Invalid u18 argument");
+      O << value;
+    }
+
+    void
+    printS10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
+                             >> 16);
+      assert((value >= -(1 << 9) && value <= (1 << 9) - 1)
+             && "Invalid s10 argument");
+      O << value;
+    }
+
+    void
+    printU10ImmOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      short value = (short) (((int) MI->getOperand(OpNo).getImm() << 16)
+                             >> 16);
+      assert((value <= (1 << 10) - 1) && "Invalid u10 argument");
+      O << value;
+    }
+
+    void
+    printDFormAddr(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      assert(MI->getOperand(OpNo).isImm() &&
+             "printDFormAddr first operand is not immediate");
+      int64_t value = int64_t(MI->getOperand(OpNo).getImm());
+      int16_t value16 = int16_t(value);
+      assert((value16 >= -(1 << (9+4)) && value16 <= (1 << (9+4)) - 1)
+             && "Invalid dform s10 offset argument");
+      O << (value16 & ~0xf) << "(";
+      printOperand(MI, OpNo+1, O);
+      O << ")";
+    }
+
+    void
+    printAddr256K(const MachineInstr *MI, unsigned OpNo, raw_ostream &O)
+    {
+      /* Note: operand 1 is an offset or symbol name. */
+      if (MI->getOperand(OpNo).isImm()) {
+        printS16ImmOperand(MI, OpNo, O);
+      } else {
+        printOp(MI->getOperand(OpNo), O);
+        if (MI->getOperand(OpNo+1).isImm()) {
+          int displ = int(MI->getOperand(OpNo+1).getImm());
+          if (displ > 0)
+            O << "+" << displ;
+          else if (displ < 0)
+            O << displ;
+        }
+      }
+    }
+
+    void printCallOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      printOp(MI->getOperand(OpNo), O);
+    }
+
+    void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      printOp(MI->getOperand(OpNo), O);
+    }
+
+    void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      // Used to generate a ".-<target>", but it turns out that the assembler
+      // really wants the target.
+      //
+      // N.B.: This operand is used for call targets. Branch hints are another
+      // animal entirely.
+      printOp(MI->getOperand(OpNo), O);
+    }
+
+    void printSymbolHi(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      if (MI->getOperand(OpNo).isImm()) {
+        printS16ImmOperand(MI, OpNo, O);
+      } else {
+        printOp(MI->getOperand(OpNo), O);
+        O << "@h";
+      }
+    }
+
+    void printSymbolLo(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      if (MI->getOperand(OpNo).isImm()) {
+        printS16ImmOperand(MI, OpNo, O);
+      } else {
+        printOp(MI->getOperand(OpNo), O);
+        O << "@l";
+      }
+    }
+
+    /// Print local store address
+    void printSymbolLSA(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) {
+      printOp(MI->getOperand(OpNo), O);
+    }
+
+    void printROTHNeg7Imm(const MachineInstr *MI, unsigned OpNo,
+                          raw_ostream &O) {
+      if (MI->getOperand(OpNo).isImm()) {
+        int value = (int) MI->getOperand(OpNo).getImm();
+        assert((value >= 0 && value < 16)
+               && "Invalid negated immediate rotate 7-bit argument");
+        O << -value;
+      } else {
+        llvm_unreachable("Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+      }
+    }
+
+    void printROTNeg7Imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O){
+      assert(MI->getOperand(OpNo).isImm() &&
+             "Invalid/non-immediate rotate amount in printRotateNeg7Imm");
+      int value = (int) MI->getOperand(OpNo).getImm();
+      assert((value >= 0 && value <= 32)
+             && "Invalid negated immediate rotate 7-bit argument");
+      O << -value;
+    }
+  };
+} // end of anonymous namespace
+
+// Include the auto-generated portion of the assembly writer
+#include "SPUGenAsmWriter.inc"
+
+void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) {
+  switch (MO.getType()) {
+  case MachineOperand::MO_Immediate:
+    report_fatal_error("printOp() does not handle immediate values");
+    return;
+
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    return;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    return;
+  case MachineOperand::MO_ExternalSymbol:
+    // Computing the address of an external symbol, not calling it.
+    if (TM.getRelocationModel() != Reloc::Static) {
+      O << "L" << MAI->getGlobalPrefix() << MO.getSymbolName()
+        << "$non_lazy_ptr";
+      return;
+    }
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    return;
+  case MachineOperand::MO_GlobalAddress:
+    // External or weakly linked global variables need non-lazily-resolved
+    // stubs
+    if (TM.getRelocationModel() != Reloc::Static) {
+      const GlobalValue *GV = MO.getGlobal();
+      if (((GV->isDeclaration() || GV->hasWeakLinkage() ||
+            GV->hasLinkOnceLinkage() || GV->hasCommonLinkage()))) {
+        O << *GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+        return;
+      }
+    }
+    O << *Mang->getSymbol(MO.getGlobal());
+    return;
+  case MachineOperand::MO_MCSymbol:
+    O << *(MO.getMCSymbol());
+    return;
+  default:
+    O << "<unknown operand type: " << MO.getType() << ">";
+    return;
+  }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool SPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                    unsigned AsmVariant,
+                                    const char *ExtraCode, raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'L': // Write second word of DImode reference.
+      // Verify that this operand has two consecutive registers.
+      if (!MI->getOperand(OpNo).isReg() ||
+          OpNo+1 == MI->getNumOperands() ||
+          !MI->getOperand(OpNo+1).isReg())
+        return true;
+      ++OpNo;   // Return the high-part.
+      break;
+    }
+  }
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+bool SPUAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                          unsigned OpNo, unsigned AsmVariant,
+                                          const char *ExtraCode,
+                                          raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+  printMemRegReg(MI, OpNo, O);
+  return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeCellSPUAsmPrinter() { 
+  RegisterAsmPrinter<SPUAsmPrinter> X(TheCellSPUTarget);
+}
diff --git a/final/lib/Target/CellSPU/SPUCallingConv.td b/final/lib/Target/CellSPU/SPUCallingConv.td
new file mode 100644
index 00000000000..04fa2ae866d
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUCallingConv.td
@@ -0,0 +1,57 @@
+//===- SPUCallingConv.td - Calling Conventions for CellSPU -*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the STI Cell SPU architecture.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Return-value convention for Cell SPU: return value to be passed in reg 3-74
+def RetCC_SPU : CallingConv<[
+  CCIfType<[i8,i16,i32,i64,i128,f32,f64,v16i8,v8i16,v4i32,v2i64,v4f32,v2f64],
+  CCAssignToReg<[R3,   R4,  R5,  R6,  R7,  R8,  R9, R10, R11,
+                 R12, R13, R14, R15, R16, R17, R18, R19, R20,
+                 R21, R22, R23, R24, R25, R26, R27, R28, R29,
+                 R30, R31, R32, R33, R34, R35, R36, R37, R38,
+                 R39, R40, R41, R42, R43, R44, R45, R46, R47,
+                 R48, R49, R50, R51, R52, R53, R54, R55, R56,
+                 R57, R58, R59, R60, R61, R62, R63, R64, R65,
+                 R66, R67, R68, R69, R70, R71, R72, R73, R74]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// CellSPU Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CCC_SPU : CallingConv<[
+  CCIfType<[i8, i16, i32, i64, i128, f32, f64, 
+            v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
+            CCAssignToReg<[R3,   R4,  R5,  R6,  R7,  R8,  R9, R10, R11,
+                           R12, R13, R14, R15, R16, R17, R18, R19, R20,
+                           R21, R22, R23, R24, R25, R26, R27, R28, R29,
+                           R30, R31, R32, R33, R34, R35, R36, R37, R38,
+                           R39, R40, R41, R42, R43, R44, R45, R46, R47,
+                           R48, R49, R50, R51, R52, R53, R54, R55, R56,
+                           R57, R58, R59, R60, R61, R62, R63, R64, R65,
+                           R66, R67, R68, R69, R70, R71, R72, R73, R74]>>,
+  // Integer/FP values get stored in stack slots that are 8 bytes in size and
+  // 8-byte aligned if there are no more registers to hold them.
+  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+  
+  // Vectors get 16-byte stack slots that are 16-byte aligned.
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+              CCAssignToStack<16, 16>>
+]>;
diff --git a/final/lib/Target/CellSPU/SPUFrameLowering.cpp b/final/lib/Target/CellSPU/SPUFrameLowering.cpp
new file mode 100644
index 00000000000..432f4a1b59e
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -0,0 +1,276 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUFrameLowering.h"
+#include "SPURegisterNames.h"
+#include "SPUInstrBuilder.h"
+#include "SPUInstrInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// SPUFrameLowering:
+//===----------------------------------------------------------------------===//
+
+SPUFrameLowering::SPUFrameLowering(const SPUSubtarget &sti)
+  : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+    Subtarget(sti) {
+  LR[0].first = SPU::R0;
+  LR[0].second = 16;
+}
+
+
+//--------------------------------------------------------------------------
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register.  This is true if the function needs a frame pointer and has
+// a non-zero stack size.
+bool SPUFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  return MFI->getStackSize() &&
+    (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects());
+}
+
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void SPUFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Get the number of bytes to allocate from the FrameInfo
+  unsigned FrameSize = MFI->getStackSize();
+
+  // Get the alignments provided by the target, and the maximum alignment
+  // (if any) of the fixed frame objects.
+  unsigned TargetAlign = getStackAlignment();
+  unsigned Align = std::max(TargetAlign, MFI->getMaxAlignment());
+  assert(isPowerOf2_32(Align) && "Alignment is not power of 2");
+  unsigned AlignMask = Align - 1;
+
+  // Get the maximum call frame size of all the calls.
+  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+  // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+  // that allocations will be aligned.
+  if (MFI->hasVarSizedObjects())
+    maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+  // Update maximum call frame size.
+  MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+  // Include call frame size in total.
+  FrameSize += maxCallFrameSize;
+
+  // Make sure the frame is aligned.
+  FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+  // Update frame info.
+  MFI->setStackSize(FrameSize);
+}
+
+void SPUFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const SPUInstrInfo &TII =
+    *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
+  MachineModuleInfo &MMI = MF.getMMI();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Prepare for debug frame info.
+  bool hasDebugInfo = MMI.hasDebugInfo();
+  MCSymbol *FrameLabel = 0;
+
+  // Move MBBI back to the beginning of the function.
+  MBBI = MBB.begin();
+
+  // Work out frame sizes.
+  determineFrameLayout(MF);
+  int FrameSize = MFI->getStackSize();
+
+  assert((FrameSize & 0xf) == 0
+         && "SPURegisterInfo::emitPrologue: FrameSize not aligned");
+
+  // the "empty" frame size is 16 - just the register scavenger spill slot
+  if (FrameSize > 16 || MFI->adjustsStack()) {
+    FrameSize = -(FrameSize + SPUFrameLowering::minStackSize());
+    if (hasDebugInfo) {
+      // Mark effective beginning of when frame pointer becomes valid.
+      FrameLabel = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(FrameLabel);
+    }
+
+    // Adjust stack pointer, spilling $lr -> 16($sp) and $sp -> -FrameSize($sp)
+    // for the ABI
+    BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
+      .addReg(SPU::R1);
+    if (isInt<10>(FrameSize)) {
+      // Spill $sp to adjusted $sp
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
+        .addReg(SPU::R1);
+      // Adjust $sp by required amout
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
+        .addImm(FrameSize);
+    } else if (isInt<16>(FrameSize)) {
+      // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+      // $r2 to adjust $sp:
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+        .addImm(-16)
+        .addReg(SPU::R1);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+        .addImm(FrameSize);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
+        .addReg(SPU::R2)
+        .addReg(SPU::R1);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+        .addReg(SPU::R1)
+        .addReg(SPU::R2);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2)
+        .addReg(SPU::R2)
+        .addImm(16);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+        .addReg(SPU::R2)
+        .addReg(SPU::R1);
+    } else {
+      report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
+    }
+
+    if (hasDebugInfo) {
+      std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+      // Show update of SP.
+      MachineLocation SPDst(MachineLocation::VirtualFP);
+      MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize);
+      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+
+      // Add callee saved registers to move list.
+      const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+      for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+        int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+        unsigned Reg = CSI[I].getReg();
+        if (Reg == SPU::R0) continue;
+        MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+        MachineLocation CSSrc(Reg);
+        Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
+      }
+
+      // Mark effective beginning of when frame pointer is ready.
+      MCSymbol *ReadyLabel = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL)).addSym(ReadyLabel);
+
+      MachineLocation FPDst(SPU::R1);
+      MachineLocation FPSrc(MachineLocation::VirtualFP);
+      Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+    }
+  } else {
+    // This is a leaf function -- insert a branch hint iff there are
+    // sufficient number instructions in the basic block. Note that
+    // this is just a best guess based on the basic block's size.
+    if (MBB.size() >= (unsigned) SPUFrameLowering::branchHintPenalty()) {
+      MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+      dl = MBBI->getDebugLoc();
+
+      // Insert terminator label
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::PROLOG_LABEL))
+        .addSym(MMI.getContext().CreateTempSymbol());
+    }
+  }
+}
+
+void SPUFrameLowering::emitEpilogue(MachineFunction &MF,
+                                MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const SPUInstrInfo &TII =
+    *static_cast<const SPUInstrInfo*>(MF.getTarget().getInstrInfo());
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  int FrameSize = MFI->getStackSize();
+  int LinkSlotOffset = SPUFrameLowering::stackSlotSize();
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  assert(MBBI->getOpcode() == SPU::RET &&
+         "Can only insert epilog into returning blocks");
+  assert((FrameSize & 0xf) == 0 && "FrameSize not aligned");
+
+  // the "empty" frame size is 16 - just the register scavenger spill slot
+  if (FrameSize > 16 || MFI->adjustsStack()) {
+    FrameSize = FrameSize + SPUFrameLowering::minStackSize();
+    if (isInt<10>(FrameSize + LinkSlotOffset)) {
+      // Reload $lr, adjust $sp by required amount
+      // Note: We do this to slightly improve dual issue -- not by much, but it
+      // is an opportunity for dual issue.
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+        .addImm(FrameSize + LinkSlotOffset)
+        .addReg(SPU::R1);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1)
+        .addReg(SPU::R1)
+        .addImm(FrameSize);
+    } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
+      // Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
+      // $r2 to adjust $sp:
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
+        .addImm(16)
+        .addReg(SPU::R1);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
+        .addImm(FrameSize);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
+        .addReg(SPU::R1)
+        .addReg(SPU::R2);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
+        .addImm(16)
+        .addReg(SPU::R1);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
+        addReg(SPU::R2)
+        .addImm(16);
+      BuildMI(MBB, MBBI, dl, TII.get(SPU::LQXr128), SPU::R2)
+        .addReg(SPU::R2)
+        .addReg(SPU::R1);
+    } else {
+      report_fatal_error("Unhandled frame size: " + Twine(FrameSize));
+    }
+  }
+}
+
+void SPUFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
+                                                                         const {
+  // Initial state of the frame pointer is R1.
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(SPU::R1, 0);
+  Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+void SPUFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                        RegScavenger *RS) const{
+  // Mark LR and SP unused, since the prolog spills them to stack and
+  // we don't want anyone else to spill them for us.
+  //
+  // Also, unless R2 is really used someday, don't spill it automatically.
+  MF.getRegInfo().setPhysRegUnused(SPU::R0);
+  MF.getRegInfo().setPhysRegUnused(SPU::R1);
+  MF.getRegInfo().setPhysRegUnused(SPU::R2);
+
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterClass *RC = &SPU::R32CRegClass;
+  RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                     RC->getAlignment(),
+                                                     false));
+}
diff --git a/final/lib/Target/CellSPU/SPUFrameLowering.h b/final/lib/Target/CellSPU/SPUFrameLowering.h
new file mode 100644
index 00000000000..4fee72d946a
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUFrameLowering.h
@@ -0,0 +1,94 @@
+//=====-- SPUFrameLowering.h - SPU Frame Lowering stuff -*- C++ -*----========//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains CellSPU frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_FRAMEINFO_H
+#define SPU_FRAMEINFO_H
+
+#include "SPURegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class SPUSubtarget;
+
+  class SPUFrameLowering: public TargetFrameLowering {
+    const SPUSubtarget &Subtarget;
+    std::pair<unsigned, int> LR[1];
+
+  public:
+    SPUFrameLowering(const SPUSubtarget &sti);
+
+    //! Determine the frame's layour
+    void determineFrameLayout(MachineFunction &MF) const;
+
+    /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+    /// the function.
+    void emitPrologue(MachineFunction &MF) const;
+    void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+    //! Prediate: Target has dedicated frame pointer
+    bool hasFP(const MachineFunction &MF) const;
+
+    void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                              RegScavenger *RS = NULL) const;
+
+    //! Perform target-specific stack frame setup.
+    void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+    //! Return a function's saved spill slots
+    /*!
+      For CellSPU, a function's saved spill slots is just the link register.
+     */
+    const std::pair<unsigned, int> *
+    getCalleeSaveSpillSlots(unsigned &NumEntries) const;
+
+    //! Stack slot size (16 bytes)
+    static int stackSlotSize() {
+      return 16;
+    }
+    //! Maximum frame offset representable by a signed 10-bit integer
+    /*!
+      This is the maximum frame offset that can be expressed as a 10-bit
+      integer, used in D-form addresses.
+     */
+    static int maxFrameOffset() {
+      return ((1 << 9) - 1) * stackSlotSize();
+    }
+    //! Minimum frame offset representable by a signed 10-bit integer
+    static int minFrameOffset() {
+      return -(1 << 9) * stackSlotSize();
+    }
+    //! Minimum frame size (enough to spill LR + SP)
+    static int minStackSize() {
+      return (2 * stackSlotSize());
+    }
+    //! Convert frame index to stack offset
+    static int FItoStackOffset(int frame_index) {
+      return frame_index * stackSlotSize();
+    }
+    //! Number of instructions required to overcome hint-for-branch latency
+    /*!
+      HBR (hint-for-branch) instructions can be inserted when, for example,
+      we know that a given function is going to be called, such as printf(),
+      in the control flow graph. HBRs are only inserted if a sufficient number
+      of instructions occurs between the HBR and the target. Currently, HBRs
+      take 6 cycles, ergo, the magic number 6.
+     */
+    static int branchHintPenalty() {
+      return 6;
+    }
+  };
+}
+
+#endif
diff --git a/final/lib/Target/CellSPU/SPUHazardRecognizers.cpp b/final/lib/Target/CellSPU/SPUHazardRecognizers.cpp
new file mode 100644
index 00000000000..403d7ef1fd9
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUHazardRecognizers.cpp
@@ -0,0 +1,141 @@
+//===-- SPUHazardRecognizers.cpp - Cell Hazard Recognizer Impls -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on Cell SPU
+// processors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sched"
+
+#include "SPUHazardRecognizers.h"
+#include "SPU.h"
+#include "SPUInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Cell SPU hazard recognizer
+//
+// This is the pipeline hazard recognizer for the Cell SPU processor. It does
+// very little right now.
+//===----------------------------------------------------------------------===//
+
+SPUHazardRecognizer::SPUHazardRecognizer(const TargetInstrInfo &tii) :
+  TII(tii),
+  EvenOdd(0)
+{
+}
+
+/// Return the pipeline hazard type encountered or generated by this
+/// instruction. Currently returns NoHazard.
+///
+/// \return NoHazard
+ScheduleHazardRecognizer::HazardType
+SPUHazardRecognizer::getHazardType(SUnit *SU, int Stalls)
+{
+  // Initial thoughts on how to do this, but this code cannot work unless the
+  // function's prolog and epilog code are also being scheduled so that we can
+  // accurately determine which pipeline is being scheduled.
+#if 0
+  assert(Stalls == 0 && "SPU hazards don't yet support scoreboard lookahead");
+
+  const SDNode *Node = SU->getNode()->getFlaggedMachineNode();
+  ScheduleHazardRecognizer::HazardType retval = NoHazard;
+  bool mustBeOdd = false;
+
+  switch (Node->getOpcode()) {
+  case SPU::LQDv16i8:
+  case SPU::LQDv8i16:
+  case SPU::LQDv4i32:
+  case SPU::LQDv4f32:
+  case SPU::LQDv2f64:
+  case SPU::LQDr128:
+  case SPU::LQDr64:
+  case SPU::LQDr32:
+  case SPU::LQDr16:
+  case SPU::LQAv16i8:
+  case SPU::LQAv8i16:
+  case SPU::LQAv4i32:
+  case SPU::LQAv4f32:
+  case SPU::LQAv2f64:
+  case SPU::LQAr128:
+  case SPU::LQAr64:
+  case SPU::LQAr32:
+  case SPU::LQXv4i32:
+  case SPU::LQXr128:
+  case SPU::LQXr64:
+  case SPU::LQXr32:
+  case SPU::LQXr16:
+  case SPU::STQDv16i8:
+  case SPU::STQDv8i16:
+  case SPU::STQDv4i32:
+  case SPU::STQDv4f32:
+  case SPU::STQDv2f64:
+  case SPU::STQDr128:
+  case SPU::STQDr64:
+  case SPU::STQDr32:
+  case SPU::STQDr16:
+  case SPU::STQDr8:
+  case SPU::STQAv16i8:
+  case SPU::STQAv8i16:
+  case SPU::STQAv4i32:
+  case SPU::STQAv4f32:
+  case SPU::STQAv2f64:
+  case SPU::STQAr128:
+  case SPU::STQAr64:
+  case SPU::STQAr32:
+  case SPU::STQAr16:
+  case SPU::STQAr8:
+  case SPU::STQXv16i8:
+  case SPU::STQXv8i16:
+  case SPU::STQXv4i32:
+  case SPU::STQXv4f32:
+  case SPU::STQXv2f64:
+  case SPU::STQXr128:
+  case SPU::STQXr64:
+  case SPU::STQXr32:
+  case SPU::STQXr16:
+  case SPU::STQXr8:
+  case SPU::RET:
+    mustBeOdd = true;
+    break;
+  default:
+    // Assume that this instruction can be on the even pipe
+    break;
+  }
+
+  if (mustBeOdd && !EvenOdd)
+    retval = Hazard;
+
+  DEBUG(errs() << "SPUHazardRecognizer EvenOdd " << EvenOdd << " Hazard "
+               << retval << "\n");
+  EvenOdd ^= 1;
+  return retval;
+#else
+  return NoHazard;
+#endif
+}
+
+void SPUHazardRecognizer::EmitInstruction(SUnit *SU)
+{
+}
+
+void SPUHazardRecognizer::AdvanceCycle()
+{
+  DEBUG(errs() << "SPUHazardRecognizer::AdvanceCycle\n");
+}
+
+void SPUHazardRecognizer::EmitNoop()
+{
+  AdvanceCycle();
+}
diff --git a/final/lib/Target/CellSPU/SPUHazardRecognizers.h b/final/lib/Target/CellSPU/SPUHazardRecognizers.h
new file mode 100644
index 00000000000..675632cc7f1
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUHazardRecognizers.h
@@ -0,0 +1,41 @@
+//===-- SPUHazardRecognizers.h - Cell SPU Hazard Recognizer -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on the Cell SPU
+// processor.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUHAZRECS_H
+#define SPUHAZRECS_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+/// SPUHazardRecognizer
+class SPUHazardRecognizer : public ScheduleHazardRecognizer
+{
+private:
+  const TargetInstrInfo &TII;
+  int EvenOdd;
+
+public:
+  SPUHazardRecognizer(const TargetInstrInfo &TII);
+  virtual HazardType getHazardType(SUnit *SU, int Stalls);
+  virtual void EmitInstruction(SUnit *SU);
+  virtual void AdvanceCycle();
+  virtual void EmitNoop();
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/final/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
new file mode 100644
index 00000000000..9351ffdc0b7
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -0,0 +1,1204 @@
+//===-- SPUISelDAGToDAG.cpp - CellSPU pattern matching inst selector ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for the Cell SPU,
+// converting from a legalized dag to a SPU-target dag.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "SPUHazardRecognizers.h"
+#include "SPUFrameLowering.h"
+#include "SPURegisterNames.h"
+#include "SPUTargetMachine.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+  //! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
+  bool
+  isI32IntS10Immediate(ConstantSDNode *CN)
+  {
+    return isInt<10>(CN->getSExtValue());
+  }
+
+  //! ConstantSDNode predicate for i32 unsigned 10-bit immediate values
+  bool
+  isI32IntU10Immediate(ConstantSDNode *CN)
+  {
+    return isUInt<10>(CN->getSExtValue());
+  }
+
+  //! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values
+  bool
+  isI16IntS10Immediate(ConstantSDNode *CN)
+  {
+    return isInt<10>(CN->getSExtValue());
+  }
+
+  //! ConstantSDNode predicate for i16 unsigned 10-bit immediate values
+  bool
+  isI16IntU10Immediate(ConstantSDNode *CN)
+  {
+    return isUInt<10>((short) CN->getZExtValue());
+  }
+
+  //! ConstantSDNode predicate for signed 16-bit values
+  /*!
+    \arg CN The constant SelectionDAG node holding the value
+    \arg Imm The returned 16-bit value, if returning true
+
+    This predicate tests the value in \a CN to see whether it can be
+    represented as a 16-bit, sign-extended quantity. Returns true if
+    this is the case.
+   */
+  bool
+  isIntS16Immediate(ConstantSDNode *CN, short &Imm)
+  {
+    EVT vt = CN->getValueType(0);
+    Imm = (short) CN->getZExtValue();
+    if (vt.getSimpleVT() >= MVT::i1 && vt.getSimpleVT() <= MVT::i16) {
+      return true;
+    } else if (vt == MVT::i32) {
+      int32_t i_val = (int32_t) CN->getZExtValue();
+      short s_val = (short) i_val;
+      return i_val == s_val;
+    } else {
+      int64_t i_val = (int64_t) CN->getZExtValue();
+      short s_val = (short) i_val;
+      return i_val == s_val;
+    }
+
+    return false;
+  }
+
+  //! ConstantFPSDNode predicate for representing floats as 16-bit sign ext.
+  static bool
+  isFPS16Immediate(ConstantFPSDNode *FPN, short &Imm)
+  {
+    EVT vt = FPN->getValueType(0);
+    if (vt == MVT::f32) {
+      int val = FloatToBits(FPN->getValueAPF().convertToFloat());
+      int sval = (int) ((val << 16) >> 16);
+      Imm = (short) val;
+      return val == sval;
+    }
+
+    return false;
+  }
+
+  //! Generate the carry-generate shuffle mask.
+  SDValue getCarryGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
+    SmallVector<SDValue, 16 > ShufBytes;
+
+    // Create the shuffle mask for "rotating" the borrow up one register slot
+    // once the borrow is generated.
+    ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
+
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                       &ShufBytes[0], ShufBytes.size());
+  }
+
+  //! Generate the borrow-generate shuffle mask
+  SDValue getBorrowGenerateShufMask(SelectionDAG &DAG, DebugLoc dl) {
+    SmallVector<SDValue, 16 > ShufBytes;
+
+    // Create the shuffle mask for "rotating" the borrow up one register slot
+    // once the borrow is generated.
+    ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
+    ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
+
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                       &ShufBytes[0], ShufBytes.size());
+  }
+
+  //===------------------------------------------------------------------===//
+  /// SPUDAGToDAGISel - Cell SPU-specific code to select SPU machine
+  /// instructions for SelectionDAG operations.
+  ///
+  class SPUDAGToDAGISel :
+    public SelectionDAGISel
+  {
+    const SPUTargetMachine &TM;
+    const SPUTargetLowering &SPUtli;
+    unsigned GlobalBaseReg;
+
+  public:
+    explicit SPUDAGToDAGISel(SPUTargetMachine &tm) :
+      SelectionDAGISel(tm),
+      TM(tm),
+      SPUtli(*tm.getTargetLowering())
+    { }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      // Make sure we re-emit a set of the global base reg if necessary
+      GlobalBaseReg = 0;
+      SelectionDAGISel::runOnMachineFunction(MF);
+      return true;
+    }
+
+    /// getI32Imm - Return a target constant with the specified value, of type
+    /// i32.
+    inline SDValue getI32Imm(uint32_t Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i32);
+    }
+
+    /// getSmallIPtrImm - Return a target constant of pointer type.
+    inline SDValue getSmallIPtrImm(unsigned Imm) {
+      return CurDAG->getTargetConstant(Imm, SPUtli.getPointerTy());
+    }
+
+    SDNode *emitBuildVector(SDNode *bvNode) {
+      EVT vecVT = bvNode->getValueType(0);
+      DebugLoc dl = bvNode->getDebugLoc();
+
+      // Check to see if this vector can be represented as a CellSPU immediate
+      // constant by invoking all of the instruction selection predicates:
+      if (((vecVT == MVT::v8i16) &&
+           (SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i16).getNode() != 0)) ||
+          ((vecVT == MVT::v4i32) &&
+           ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+            (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+            (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i32).getNode() != 0) ||
+            (SPU::get_v4i32_imm(bvNode, *CurDAG).getNode() != 0))) ||
+          ((vecVT == MVT::v2i64) &&
+           ((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
+            (SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
+            (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) {
+        HandleSDNode Dummy(SDValue(bvNode, 0));
+        if (SDNode *N = Select(bvNode))
+          return N;
+        return Dummy.getValue().getNode();
+      }
+
+      // No, need to emit a constant pool spill:
+      std::vector<Constant*> CV;
+
+      for (size_t i = 0; i < bvNode->getNumOperands(); ++i) {
+        ConstantSDNode *V = cast<ConstantSDNode > (bvNode->getOperand(i));
+        CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+      }
+
+      const Constant *CP = ConstantVector::get(CV);
+      SDValue CPIdx = CurDAG->getConstantPool(CP, SPUtli.getPointerTy());
+      unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
+      SDValue CGPoolOffset =
+              SPU::LowerConstantPool(CPIdx, *CurDAG, TM);
+
+      HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
+                                         CurDAG->getEntryNode(), CGPoolOffset,
+                                         MachinePointerInfo::getConstantPool(),
+                                         false, false, Alignment));
+      CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
+      if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+        return N;
+      return Dummy.getValue().getNode();
+    }
+
+    /// Select - Convert the specified operand from a target-independent to a
+    /// target-specific node if it hasn't already been changed.
+    SDNode *Select(SDNode *N);
+
+    //! Emit the instruction sequence for i64 shl
+    SDNode *SelectSHLi64(SDNode *N, EVT OpVT);
+
+    //! Emit the instruction sequence for i64 srl
+    SDNode *SelectSRLi64(SDNode *N, EVT OpVT);
+
+    //! Emit the instruction sequence for i64 sra
+    SDNode *SelectSRAi64(SDNode *N, EVT OpVT);
+
+    //! Emit the necessary sequence for loading i64 constants:
+    SDNode *SelectI64Constant(SDNode *N, EVT OpVT, DebugLoc dl);
+
+    //! Alternate instruction emit sequence for loading i64 constants
+    SDNode *SelectI64Constant(uint64_t i64const, EVT OpVT, DebugLoc dl);
+
+    //! Returns true if the address N is an A-form (local store) address
+    bool SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+                         SDValue &Index);
+
+    //! D-form address predicate
+    bool SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+                         SDValue &Index);
+
+    /// Alternate D-form address using i7 offset predicate
+    bool SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
+                          SDValue &Base);
+
+    /// D-form address selection workhorse
+    bool DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Disp,
+                               SDValue &Base, int minOffset, int maxOffset);
+
+    //! Address predicate if N can be expressed as an indexed [r+r] operation.
+    bool SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+                         SDValue &Index);
+
+    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+    /// inline asm expressions.
+    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                              char ConstraintCode,
+                                              std::vector<SDValue> &OutOps) {
+      SDValue Op0, Op1;
+      switch (ConstraintCode) {
+      default: return true;
+      case 'm':   // memory
+        if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
+            && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1))
+          SelectXFormAddr(Op.getNode(), Op, Op0, Op1);
+        break;
+      case 'o':   // offsetable
+        if (!SelectDFormAddr(Op.getNode(), Op, Op0, Op1)
+            && !SelectAFormAddr(Op.getNode(), Op, Op0, Op1)) {
+          Op0 = Op;
+          Op1 = getSmallIPtrImm(0);
+        }
+        break;
+      case 'v':   // not offsetable
+#if 1
+        llvm_unreachable("InlineAsmMemoryOperand 'v' constraint not handled.");
+#else
+        SelectAddrIdxOnly(Op, Op, Op0, Op1);
+#endif
+        break;
+      }
+
+      OutOps.push_back(Op0);
+      OutOps.push_back(Op1);
+      return false;
+    }
+
+    virtual const char *getPassName() const {
+      return "Cell SPU DAG->DAG Pattern Instruction Selection";
+    }
+
+  private:
+    SDValue getRC( MVT );
+
+    // Include the pieces autogenerated from the target description.
+#include "SPUGenDAGISel.inc"
+  };
+}
+
+/*!
+ \arg Op The ISD instruction operand
+ \arg N The address to be tested
+ \arg Base The base address
+ \arg Index The base address index
+ */
+bool
+SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+                    SDValue &Index) {
+  // These match the addr256k operand type:
+  EVT OffsVT = MVT::i16;
+  SDValue Zero = CurDAG->getTargetConstant(0, OffsVT);
+  int64_t val;
+
+  switch (N.getOpcode()) {
+  case ISD::Constant:
+    val = dyn_cast<ConstantSDNode>(N.getNode())->getSExtValue();
+    Base = CurDAG->getTargetConstant( val , MVT::i32);
+    Index = Zero;
+    return true; break;
+  case ISD::ConstantPool:
+  case ISD::GlobalAddress:
+    report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered.");
+    /*NOTREACHED*/
+
+  case ISD::TargetConstant:
+  case ISD::TargetGlobalAddress:
+  case ISD::TargetJumpTable:
+    report_fatal_error("SPUSelectAFormAddr: Target Constant/Pool/Global "
+                      "not wrapped as A-form address.");
+    /*NOTREACHED*/
+
+  case SPUISD::AFormAddr:
+    // Just load from memory if there's only a single use of the location,
+    // otherwise, this will get handled below with D-form offset addresses
+    if (N.hasOneUse()) {
+      SDValue Op0 = N.getOperand(0);
+      switch (Op0.getOpcode()) {
+      case ISD::TargetConstantPool:
+      case ISD::TargetJumpTable:
+        Base = Op0;
+        Index = Zero;
+        return true;
+
+      case ISD::TargetGlobalAddress: {
+        GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op0);
+        const GlobalValue *GV = GSDN->getGlobal();
+        if (GV->getAlignment() == 16) {
+          Base = Op0;
+          Index = Zero;
+          return true;
+        }
+        break;
+      }
+      }
+    }
+    break;
+  }
+  return false;
+}
+
+bool
+SPUDAGToDAGISel::SelectDForm2Addr(SDNode *Op, SDValue N, SDValue &Disp,
+                                  SDValue &Base) {
+  const int minDForm2Offset = -(1 << 7);
+  const int maxDForm2Offset = (1 << 7) - 1;
+  return DFormAddressPredicate(Op, N, Disp, Base, minDForm2Offset,
+                               maxDForm2Offset);
+}
+
+/*!
+  \arg Op The ISD instruction (ignored)
+  \arg N The address to be tested
+  \arg Base Base address register/pointer
+  \arg Index Base address index
+
+  Examine the input address by a base register plus a signed 10-bit
+  displacement, [r+I10] (D-form address).
+
+  \return true if \a N is a D-form address with \a Base and \a Index set
+  to non-empty SDValue instances.
+*/
+bool
+SPUDAGToDAGISel::SelectDFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+                                 SDValue &Index) {
+  return DFormAddressPredicate(Op, N, Base, Index,
+                               SPUFrameLowering::minFrameOffset(),
+                               SPUFrameLowering::maxFrameOffset());
+}
+
+bool
+SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
+                                      SDValue &Index, int minOffset,
+                                      int maxOffset) {
+  unsigned Opc = N.getOpcode();
+  EVT PtrTy = SPUtli.getPointerTy();
+
+  if (Opc == ISD::FrameIndex) {
+    // Stack frame index must be less than 512 (divided by 16):
+    FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(N);
+    int FI = int(FIN->getIndex());
+    DEBUG(errs() << "SelectDFormAddr: ISD::FrameIndex = "
+               << FI << "\n");
+    if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
+      Base = CurDAG->getTargetConstant(0, PtrTy);
+      Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+      return true;
+    }
+  } else if (Opc == ISD::ADD) {
+    // Generated by getelementptr
+    const SDValue Op0 = N.getOperand(0);
+    const SDValue Op1 = N.getOperand(1);
+
+    if ((Op0.getOpcode() == SPUISD::Hi && Op1.getOpcode() == SPUISD::Lo)
+        || (Op1.getOpcode() == SPUISD::Hi && Op0.getOpcode() == SPUISD::Lo)) {
+      Base = CurDAG->getTargetConstant(0, PtrTy);
+      Index = N;
+      return true;
+    } else if (Op1.getOpcode() == ISD::Constant
+               || Op1.getOpcode() == ISD::TargetConstant) {
+      ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+      int32_t offset = int32_t(CN->getSExtValue());
+
+      if (Op0.getOpcode() == ISD::FrameIndex) {
+        FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op0);
+        int FI = int(FIN->getIndex());
+        DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
+                   << " frame index = " << FI << "\n");
+
+        if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
+          Base = CurDAG->getTargetConstant(offset, PtrTy);
+          Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+          return true;
+        }
+      } else if (offset > minOffset && offset < maxOffset) {
+        Base = CurDAG->getTargetConstant(offset, PtrTy);
+        Index = Op0;
+        return true;
+      }
+    } else if (Op0.getOpcode() == ISD::Constant
+               || Op0.getOpcode() == ISD::TargetConstant) {
+      ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
+      int32_t offset = int32_t(CN->getSExtValue());
+
+      if (Op1.getOpcode() == ISD::FrameIndex) {
+        FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op1);
+        int FI = int(FIN->getIndex());
+        DEBUG(errs() << "SelectDFormAddr: ISD::ADD offset = " << offset
+                   << " frame index = " << FI << "\n");
+
+        if (SPUFrameLowering::FItoStackOffset(FI) < maxOffset) {
+          Base = CurDAG->getTargetConstant(offset, PtrTy);
+          Index = CurDAG->getTargetFrameIndex(FI, PtrTy);
+          return true;
+        }
+      } else if (offset > minOffset && offset < maxOffset) {
+        Base = CurDAG->getTargetConstant(offset, PtrTy);
+        Index = Op1;
+        return true;
+      }
+    }
+  } else if (Opc == SPUISD::IndirectAddr) {
+    // Indirect with constant offset -> D-Form address
+    const SDValue Op0 = N.getOperand(0);
+    const SDValue Op1 = N.getOperand(1);
+
+    if (Op0.getOpcode() == SPUISD::Hi
+        && Op1.getOpcode() == SPUISD::Lo) {
+      // (SPUindirect (SPUhi <arg>, 0), (SPUlo <arg>, 0))
+      Base = CurDAG->getTargetConstant(0, PtrTy);
+      Index = N;
+      return true;
+    } else if (isa<ConstantSDNode>(Op0) || isa<ConstantSDNode>(Op1)) {
+      int32_t offset = 0;
+      SDValue idxOp;
+
+      if (isa<ConstantSDNode>(Op1)) {
+        ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+        offset = int32_t(CN->getSExtValue());
+        idxOp = Op0;
+      } else if (isa<ConstantSDNode>(Op0)) {
+        ConstantSDNode *CN = cast<ConstantSDNode>(Op0);
+        offset = int32_t(CN->getSExtValue());
+        idxOp = Op1;
+      }
+
+      if (offset >= minOffset && offset <= maxOffset) {
+        Base = CurDAG->getTargetConstant(offset, PtrTy);
+        Index = idxOp;
+        return true;
+      }
+    }
+  } else if (Opc == SPUISD::AFormAddr) {
+    Base = CurDAG->getTargetConstant(0, N.getValueType());
+    Index = N;
+    return true;
+  } else if (Opc == SPUISD::LDRESULT) {
+    Base = CurDAG->getTargetConstant(0, N.getValueType());
+    Index = N;
+    return true;
+  } else if (Opc == ISD::Register
+           ||Opc == ISD::CopyFromReg
+           ||Opc == ISD::UNDEF
+           ||Opc == ISD::Constant) {
+    unsigned OpOpc = Op->getOpcode();
+
+    if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
+      // Direct load/store without getelementptr
+      SDValue Offs;
+
+      Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2));
+
+      if (Offs.getOpcode() == ISD::Constant || Offs.getOpcode() == ISD::UNDEF) {
+        if (Offs.getOpcode() == ISD::UNDEF)
+          Offs = CurDAG->getTargetConstant(0, Offs.getValueType());
+
+        Base = Offs;
+        Index = N;
+        return true;
+      }
+    } else {
+      /* If otherwise unadorned, default to D-form address with 0 offset: */
+      if (Opc == ISD::CopyFromReg) {
+        Index = N.getOperand(1);
+      } else {
+        Index = N;
+      }
+
+      Base = CurDAG->getTargetConstant(0, Index.getValueType());
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/*!
+  \arg Op The ISD instruction operand
+  \arg N The address operand
+  \arg Base The base pointer operand
+  \arg Index The offset/index operand
+
+  If the address \a N can be expressed as an A-form or D-form address, returns
+  false.  Otherwise, creates two operands, Base and Index that will become the
+  (r)(r) X-form address.
+*/
+bool
+SPUDAGToDAGISel::SelectXFormAddr(SDNode *Op, SDValue N, SDValue &Base,
+                                 SDValue &Index) {
+  if (!SelectAFormAddr(Op, N, Base, Index)
+      && !SelectDFormAddr(Op, N, Base, Index)) {
+    // If the address is neither A-form or D-form, punt and use an X-form
+    // address:
+    Base = N.getOperand(1);
+    Index = N.getOperand(0);
+    return true;
+  }
+
+  return false;
+}
+
+/*!
+ Utility function to use with COPY_TO_REGCLASS instructions. Returns a SDValue
+ to be used as the last parameter of a
+CurDAG->getMachineNode(COPY_TO_REGCLASS,..., ) function call
+ \arg VT the value type for which we want a register class
+*/
+SDValue SPUDAGToDAGISel::getRC( MVT VT ) {
+  switch( VT.SimpleTy ) {
+  case MVT::i8:
+    return CurDAG->getTargetConstant(SPU::R8CRegClass.getID(), MVT::i32);
+    break;
+  case MVT::i16:
+    return CurDAG->getTargetConstant(SPU::R16CRegClass.getID(), MVT::i32);
+    break;
+  case MVT::i32:
+    return CurDAG->getTargetConstant(SPU::R32CRegClass.getID(), MVT::i32);
+    break;
+  case MVT::f32:
+    return CurDAG->getTargetConstant(SPU::R32FPRegClass.getID(), MVT::i32);
+    break;
+  case MVT::i64:
+    return CurDAG->getTargetConstant(SPU::R64CRegClass.getID(), MVT::i32);
+    break;
+  case MVT::i128:
+    return CurDAG->getTargetConstant(SPU::GPRCRegClass.getID(), MVT::i32);
+    break;
+  case MVT::v16i8:
+  case MVT::v8i16:
+  case MVT::v4i32:
+  case MVT::v4f32:
+  case MVT::v2i64:
+  case MVT::v2f64:
+    return CurDAG->getTargetConstant(SPU::VECREGRegClass.getID(), MVT::i32);
+    break;
+  default:
+    assert( false && "add a new case here" );
+  }
+  return SDValue();
+}
+
+//! Convert the operand from a target-independent to a target-specific node
+/*!
+ */
+SDNode *
+SPUDAGToDAGISel::Select(SDNode *N) {
+  unsigned Opc = N->getOpcode();
+  int n_ops = -1;
+  unsigned NewOpc = 0;
+  EVT OpVT = N->getValueType(0);
+  SDValue Ops[8];
+  DebugLoc dl = N->getDebugLoc();
+
+  if (N->isMachineOpcode())
+    return NULL;   // Already selected.
+
+  if (Opc == ISD::FrameIndex) {
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
+    SDValue Imm0 = CurDAG->getTargetConstant(0, N->getValueType(0));
+
+    if (FI < 128) {
+      NewOpc = SPU::AIr32;
+      Ops[0] = TFI;
+      Ops[1] = Imm0;
+      n_ops = 2;
+    } else {
+      NewOpc = SPU::Ar32;
+      Ops[0] = CurDAG->getRegister(SPU::R1, N->getValueType(0));
+      Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILAr32, dl,
+                                              N->getValueType(0), TFI),
+                       0);
+      n_ops = 2;
+    }
+  } else if (Opc == ISD::Constant && OpVT == MVT::i64) {
+    // Catch the i64 constants that end up here. Note: The backend doesn't
+    // attempt to legalize the constant (it's useless because DAGCombiner
+    // will insert 64-bit constants and we can't stop it).
+    return SelectI64Constant(N, OpVT, N->getDebugLoc());
+  } else if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND)
+             && OpVT == MVT::i64) {
+    SDValue Op0 = N->getOperand(0);
+    EVT Op0VT = Op0.getValueType();
+    EVT Op0VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+                                    Op0VT, (128 / Op0VT.getSizeInBits()));
+    EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(),
+                                   OpVT, (128 / OpVT.getSizeInBits()));
+    SDValue shufMask;
+
+    switch (Op0VT.getSimpleVT().SimpleTy) {
+    default:
+      report_fatal_error("CellSPU Select: Unhandled zero/any extend EVT");
+      /*NOTREACHED*/
+    case MVT::i32:
+      shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x00010203, MVT::i32),
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x08090a0b, MVT::i32));
+      break;
+
+    case MVT::i16:
+      shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x80800203, MVT::i32),
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x80800a0b, MVT::i32));
+      break;
+
+    case MVT::i8:
+      shufMask = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x80808003, MVT::i32),
+                                 CurDAG->getConstant(0x80808080, MVT::i32),
+                                 CurDAG->getConstant(0x8080800b, MVT::i32));
+      break;
+    }
+
+    SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode());
+
+    HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
+                                               Op0VecVT, Op0));
+
+    SDValue PromScalar;
+    if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode()))
+      PromScalar = SDValue(N, 0);
+    else
+      PromScalar = PromoteScalar.getValue();
+
+    SDValue zextShuffle =
+            CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+                            PromScalar, PromScalar,
+                            SDValue(shufMaskLoad, 0));
+
+    HandleSDNode Dummy2(zextShuffle);
+    if (SDNode *N = SelectCode(Dummy2.getValue().getNode()))
+      zextShuffle = SDValue(N, 0);
+    else
+      zextShuffle = Dummy2.getValue();
+    HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
+                                       zextShuffle));
+
+    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+    SelectCode(Dummy.getValue().getNode());
+    return Dummy.getValue().getNode();
+  } else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+    SDNode *CGLoad =
+            emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
+
+    HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
+                                       N->getOperand(0), N->getOperand(1),
+                                       SDValue(CGLoad, 0)));
+
+    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+    if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+      return N;
+    return Dummy.getValue().getNode();
+  } else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+    SDNode *CGLoad =
+            emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode());
+
+    HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
+                                       N->getOperand(0), N->getOperand(1),
+                                       SDValue(CGLoad, 0)));
+
+    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+    if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+      return N;
+    return Dummy.getValue().getNode();
+  } else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
+    SDNode *CGLoad =
+            emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
+
+    HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
+                                       N->getOperand(0), N->getOperand(1),
+                                       SDValue(CGLoad, 0)));
+    CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+    if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+      return N;
+    return Dummy.getValue().getNode();
+  } else if (Opc == ISD::TRUNCATE) {
+    SDValue Op0 = N->getOperand(0);
+    if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
+        && OpVT == MVT::i32
+        && Op0.getValueType() == MVT::i64) {
+      // Catch (truncate:i32 ([sra|srl]:i64 arg, c), where c >= 32
+      //
+      // Take advantage of the fact that the upper 32 bits are in the
+      // i32 preferred slot and avoid shuffle gymnastics:
+      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
+      if (CN != 0) {
+        unsigned shift_amt = unsigned(CN->getZExtValue());
+
+        if (shift_amt >= 32) {
+          SDNode *hi32 =
+                  CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+                                         Op0.getOperand(0), getRC(MVT::i32));
+
+          shift_amt -= 32;
+          if (shift_amt > 0) {
+            // Take care of the additional shift, if present:
+            SDValue shift = CurDAG->getTargetConstant(shift_amt, MVT::i32);
+            unsigned Opc = SPU::ROTMAIr32_i32;
+
+            if (Op0.getOpcode() == ISD::SRL)
+              Opc = SPU::ROTMr32;
+
+            hi32 = CurDAG->getMachineNode(Opc, dl, OpVT, SDValue(hi32, 0),
+                                          shift);
+          }
+
+          return hi32;
+        }
+      }
+    }
+  } else if (Opc == ISD::SHL) {
+    if (OpVT == MVT::i64)
+      return SelectSHLi64(N, OpVT);
+  } else if (Opc == ISD::SRL) {
+    if (OpVT == MVT::i64)
+      return SelectSRLi64(N, OpVT);
+  } else if (Opc == ISD::SRA) {
+    if (OpVT == MVT::i64)
+      return SelectSRAi64(N, OpVT);
+  } else if (Opc == ISD::FNEG
+             && (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
+    DebugLoc dl = N->getDebugLoc();
+    // Check if the pattern is a special form of DFNMS:
+    // (fneg (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))
+    SDValue Op0 = N->getOperand(0);
+    if (Op0.getOpcode() == ISD::FSUB) {
+      SDValue Op00 = Op0.getOperand(0);
+      if (Op00.getOpcode() == ISD::FMUL) {
+        unsigned Opc = SPU::DFNMSf64;
+        if (OpVT == MVT::v2f64)
+          Opc = SPU::DFNMSv2f64;
+
+        return CurDAG->getMachineNode(Opc, dl, OpVT,
+                                      Op00.getOperand(0),
+                                      Op00.getOperand(1),
+                                      Op0.getOperand(1));
+      }
+    }
+
+    SDValue negConst = CurDAG->getConstant(0x8000000000000000ULL, MVT::i64);
+    SDNode *signMask = 0;
+    unsigned Opc = SPU::XORfneg64;
+
+    if (OpVT == MVT::f64) {
+      signMask = SelectI64Constant(negConst.getNode(), MVT::i64, dl);
+    } else if (OpVT == MVT::v2f64) {
+      Opc = SPU::XORfnegvec;
+      signMask = emitBuildVector(CurDAG->getNode(ISD::BUILD_VECTOR, dl,
+                                                 MVT::v2i64,
+                                                 negConst, negConst).getNode());
+    }
+
+    return CurDAG->getMachineNode(Opc, dl, OpVT,
+                                  N->getOperand(0), SDValue(signMask, 0));
+  } else if (Opc == ISD::FABS) {
+    if (OpVT == MVT::f64) {
+      SDNode *signMask = SelectI64Constant(0x7fffffffffffffffULL, MVT::i64, dl);
+      return CurDAG->getMachineNode(SPU::ANDfabs64, dl, OpVT,
+                                    N->getOperand(0), SDValue(signMask, 0));
+    } else if (OpVT == MVT::v2f64) {
+      SDValue absConst = CurDAG->getConstant(0x7fffffffffffffffULL, MVT::i64);
+      SDValue absVec = CurDAG->getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+                                       absConst, absConst);
+      SDNode *signMask = emitBuildVector(absVec.getNode());
+      return CurDAG->getMachineNode(SPU::ANDfabsvec, dl, OpVT,
+                                    N->getOperand(0), SDValue(signMask, 0));
+    }
+  } else if (Opc == SPUISD::LDRESULT) {
+    // Custom select instructions for LDRESULT
+    EVT VT = N->getValueType(0);
+    SDValue Arg = N->getOperand(0);
+    SDValue Chain = N->getOperand(1);
+    SDNode *Result;
+
+    Result = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VT,
+                                    MVT::Other, Arg,
+                                    getRC( VT.getSimpleVT()), Chain);
+    return Result;
+
+  } else if (Opc == SPUISD::IndirectAddr) {
+    // Look at the operands: SelectCode() will catch the cases that aren't
+    // specifically handled here.
+    //
+    // SPUInstrInfo catches the following patterns:
+    // (SPUindirect (SPUhi ...), (SPUlo ...))
+    // (SPUindirect $sp, imm)
+    EVT VT = N->getValueType(0);
+    SDValue Op0 = N->getOperand(0);
+    SDValue Op1 = N->getOperand(1);
+    RegisterSDNode *RN;
+
+    if ((Op0.getOpcode() != SPUISD::Hi && Op1.getOpcode() != SPUISD::Lo)
+        || (Op0.getOpcode() == ISD::Register
+            && ((RN = dyn_cast<RegisterSDNode>(Op0.getNode())) != 0
+                && RN->getReg() != SPU::R1))) {
+      NewOpc = SPU::Ar32;
+      Ops[1] = Op1;
+      if (Op1.getOpcode() == ISD::Constant) {
+        ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
+        Op1 = CurDAG->getTargetConstant(CN->getSExtValue(), VT);
+        if (isInt<10>(CN->getSExtValue())) {
+          NewOpc = SPU::AIr32;
+          Ops[1] = Op1;
+        } else {
+          Ops[1] = SDValue(CurDAG->getMachineNode(SPU::ILr32, dl,
+                                                  N->getValueType(0),
+                                                  Op1),
+                           0);
+        }
+      }
+      Ops[0] = Op0;
+      n_ops = 2;
+    }
+  }
+
+  if (n_ops > 0) {
+    if (N->hasOneUse())
+      return CurDAG->SelectNodeTo(N, NewOpc, OpVT, Ops, n_ops);
+    else
+      return CurDAG->getMachineNode(NewOpc, dl, OpVT, Ops, n_ops);
+  } else
+    return SelectCode(N);
+}
+
+/*!
+ * Emit the instruction sequence for i64 left shifts. The basic algorithm
+ * is to fill the bottom two word slots with zeros so that zeros are shifted
+ * in as the entire quadword is shifted left.
+ *
+ * \note This code could also be used to implement v2i64 shl.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSHLi64(SDNode *N, EVT OpVT) {
+  SDValue Op0 = N->getOperand(0);
+  EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+                               OpVT, (128 / OpVT.getSizeInBits()));
+  SDValue ShiftAmt = N->getOperand(1);
+  EVT ShiftAmtVT = ShiftAmt.getValueType();
+  SDNode *VecOp0, *SelMask, *ZeroFill, *Shift = 0;
+  SDValue SelMaskVal;
+  DebugLoc dl = N->getDebugLoc();
+
+  VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+                                  Op0, getRC(MVT::v2i64) );
+  SelMaskVal = CurDAG->getTargetConstant(0xff00ULL, MVT::i16);
+  SelMask = CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT, SelMaskVal);
+  ZeroFill = CurDAG->getMachineNode(SPU::ILv2i64, dl, VecVT,
+                                    CurDAG->getTargetConstant(0, OpVT));
+  VecOp0 = CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
+                                  SDValue(ZeroFill, 0),
+                                  SDValue(VecOp0, 0),
+                                  SDValue(SelMask, 0));
+
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+    unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+    unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+    if (bytes > 0) {
+      Shift =
+        CurDAG->getMachineNode(SPU::SHLQBYIv2i64, dl, VecVT,
+                               SDValue(VecOp0, 0),
+                               CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+    }
+
+    if (bits > 0) {
+      Shift =
+        CurDAG->getMachineNode(SPU::SHLQBIIv2i64, dl, VecVT,
+                               SDValue((Shift != 0 ? Shift : VecOp0), 0),
+                               CurDAG->getTargetConstant(bits, ShiftAmtVT));
+    }
+  } else {
+    SDNode *Bytes =
+      CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
+                             ShiftAmt,
+                             CurDAG->getTargetConstant(3, ShiftAmtVT));
+    SDNode *Bits =
+      CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
+                             ShiftAmt,
+                             CurDAG->getTargetConstant(7, ShiftAmtVT));
+    Shift =
+      CurDAG->getMachineNode(SPU::SHLQBYv2i64, dl, VecVT,
+                             SDValue(VecOp0, 0), SDValue(Bytes, 0));
+    Shift =
+      CurDAG->getMachineNode(SPU::SHLQBIv2i64, dl, VecVT,
+                             SDValue(Shift, 0), SDValue(Bits, 0));
+  }
+
+  return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                                OpVT, SDValue(Shift, 0), getRC(MVT::i64));
+}
+
+/*!
+ * Emit the instruction sequence for i64 logical right shifts.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSRLi64(SDNode *N, EVT OpVT) {
+  SDValue Op0 = N->getOperand(0);
+  EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+                               OpVT, (128 / OpVT.getSizeInBits()));
+  SDValue ShiftAmt = N->getOperand(1);
+  EVT ShiftAmtVT = ShiftAmt.getValueType();
+  SDNode *VecOp0, *Shift = 0;
+  DebugLoc dl = N->getDebugLoc();
+
+  VecOp0 = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, VecVT,
+                                  Op0, getRC(MVT::v2i64) );
+
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+    unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+    unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+    if (bytes > 0) {
+      Shift =
+        CurDAG->getMachineNode(SPU::ROTQMBYIv2i64, dl, VecVT,
+                               SDValue(VecOp0, 0),
+                               CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+    }
+
+    if (bits > 0) {
+      Shift =
+        CurDAG->getMachineNode(SPU::ROTQMBIIv2i64, dl, VecVT,
+                               SDValue((Shift != 0 ? Shift : VecOp0), 0),
+                               CurDAG->getTargetConstant(bits, ShiftAmtVT));
+    }
+  } else {
+    SDNode *Bytes =
+      CurDAG->getMachineNode(SPU::ROTMIr32, dl, ShiftAmtVT,
+                             ShiftAmt,
+                             CurDAG->getTargetConstant(3, ShiftAmtVT));
+    SDNode *Bits =
+      CurDAG->getMachineNode(SPU::ANDIr32, dl, ShiftAmtVT,
+                             ShiftAmt,
+                             CurDAG->getTargetConstant(7, ShiftAmtVT));
+
+    // Ensure that the shift amounts are negated!
+    Bytes = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+                                   SDValue(Bytes, 0),
+                                   CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+    Bits = CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+                                  SDValue(Bits, 0),
+                                  CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+    Shift =
+      CurDAG->getMachineNode(SPU::ROTQMBYv2i64, dl, VecVT,
+                             SDValue(VecOp0, 0), SDValue(Bytes, 0));
+    Shift =
+      CurDAG->getMachineNode(SPU::ROTQMBIv2i64, dl, VecVT,
+                             SDValue(Shift, 0), SDValue(Bits, 0));
+  }
+
+  return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                                OpVT, SDValue(Shift, 0), getRC(MVT::i64));
+}
+
+/*!
+ * Emit the instruction sequence for i64 arithmetic right shifts.
+ *
+ * @param Op The shl operand
+ * @param OpVT Op's machine value value type (doesn't need to be passed, but
+ * makes life easier.)
+ * @return The SDNode with the entire instruction sequence
+ */
+SDNode *
+SPUDAGToDAGISel::SelectSRAi64(SDNode *N, EVT OpVT) {
+  // Promote Op0 to vector
+  EVT VecVT = EVT::getVectorVT(*CurDAG->getContext(),
+                               OpVT, (128 / OpVT.getSizeInBits()));
+  SDValue ShiftAmt = N->getOperand(1);
+  EVT ShiftAmtVT = ShiftAmt.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  SDNode *VecOp0 =
+    CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                           VecVT, N->getOperand(0), getRC(MVT::v2i64));
+
+  SDValue SignRotAmt = CurDAG->getTargetConstant(31, ShiftAmtVT);
+  SDNode *SignRot =
+    CurDAG->getMachineNode(SPU::ROTMAIv2i64_i32, dl, MVT::v2i64,
+                           SDValue(VecOp0, 0), SignRotAmt);
+  SDNode *UpperHalfSign =
+    CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                           MVT::i32, SDValue(SignRot, 0), getRC(MVT::i32));
+
+  SDNode *UpperHalfSignMask =
+    CurDAG->getMachineNode(SPU::FSM64r32, dl, VecVT, SDValue(UpperHalfSign, 0));
+  SDNode *UpperLowerMask =
+    CurDAG->getMachineNode(SPU::FSMBIv2i64, dl, VecVT,
+                           CurDAG->getTargetConstant(0xff00ULL, MVT::i16));
+  SDNode *UpperLowerSelect =
+    CurDAG->getMachineNode(SPU::SELBv2i64, dl, VecVT,
+                           SDValue(UpperHalfSignMask, 0),
+                           SDValue(VecOp0, 0),
+                           SDValue(UpperLowerMask, 0));
+
+  SDNode *Shift = 0;
+
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(ShiftAmt)) {
+    unsigned bytes = unsigned(CN->getZExtValue()) >> 3;
+    unsigned bits = unsigned(CN->getZExtValue()) & 7;
+
+    if (bytes > 0) {
+      bytes = 31 - bytes;
+      Shift =
+        CurDAG->getMachineNode(SPU::ROTQBYIv2i64, dl, VecVT,
+                               SDValue(UpperLowerSelect, 0),
+                               CurDAG->getTargetConstant(bytes, ShiftAmtVT));
+    }
+
+    if (bits > 0) {
+      bits = 8 - bits;
+      Shift =
+        CurDAG->getMachineNode(SPU::ROTQBIIv2i64, dl, VecVT,
+                               SDValue((Shift != 0 ? Shift : UpperLowerSelect), 0),
+                               CurDAG->getTargetConstant(bits, ShiftAmtVT));
+    }
+  } else {
+    SDNode *NegShift =
+      CurDAG->getMachineNode(SPU::SFIr32, dl, ShiftAmtVT,
+                             ShiftAmt, CurDAG->getTargetConstant(0, ShiftAmtVT));
+
+    Shift =
+      CurDAG->getMachineNode(SPU::ROTQBYBIv2i64_r32, dl, VecVT,
+                             SDValue(UpperLowerSelect, 0), SDValue(NegShift, 0));
+    Shift =
+      CurDAG->getMachineNode(SPU::ROTQBIv2i64, dl, VecVT,
+                             SDValue(Shift, 0), SDValue(NegShift, 0));
+  }
+
+  return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                                OpVT, SDValue(Shift, 0), getRC(MVT::i64));
+}
+
+/*!
+ Do the necessary magic necessary to load a i64 constant
+ */
+SDNode *SPUDAGToDAGISel::SelectI64Constant(SDNode *N, EVT OpVT,
+                                           DebugLoc dl) {
+  ConstantSDNode *CN = cast<ConstantSDNode>(N);
+  return SelectI64Constant(CN->getZExtValue(), OpVT, dl);
+}
+
+SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
+                                           DebugLoc dl) {
+  EVT OpVecVT = EVT::getVectorVT(*CurDAG->getContext(), OpVT, 2);
+  SDValue i64vec =
+          SPU::LowerV2I64Splat(OpVecVT, *CurDAG, Value64, dl);
+
+  // Here's where it gets interesting, because we have to parse out the
+  // subtree handed back in i64vec:
+
+  if (i64vec.getOpcode() == ISD::BITCAST) {
+    // The degenerate case where the upper and lower bits in the splat are
+    // identical:
+    SDValue Op0 = i64vec.getOperand(0);
+
+    ReplaceUses(i64vec, Op0);
+    return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+                                  SDValue(emitBuildVector(Op0.getNode()), 0),
+                                  getRC(MVT::i64));
+  } else if (i64vec.getOpcode() == SPUISD::SHUFB) {
+    SDValue lhs = i64vec.getOperand(0);
+    SDValue rhs = i64vec.getOperand(1);
+    SDValue shufmask = i64vec.getOperand(2);
+
+    if (lhs.getOpcode() == ISD::BITCAST) {
+      ReplaceUses(lhs, lhs.getOperand(0));
+      lhs = lhs.getOperand(0);
+    }
+
+    SDNode *lhsNode = (lhs.getNode()->isMachineOpcode()
+                       ? lhs.getNode()
+                       : emitBuildVector(lhs.getNode()));
+
+    if (rhs.getOpcode() == ISD::BITCAST) {
+      ReplaceUses(rhs, rhs.getOperand(0));
+      rhs = rhs.getOperand(0);
+    }
+
+    SDNode *rhsNode = (rhs.getNode()->isMachineOpcode()
+                       ? rhs.getNode()
+                       : emitBuildVector(rhs.getNode()));
+
+    if (shufmask.getOpcode() == ISD::BITCAST) {
+      ReplaceUses(shufmask, shufmask.getOperand(0));
+      shufmask = shufmask.getOperand(0);
+    }
+
+    SDNode *shufMaskNode = (shufmask.getNode()->isMachineOpcode()
+                            ? shufmask.getNode()
+                            : emitBuildVector(shufmask.getNode()));
+
+   SDValue shufNode =
+            CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+                                   SDValue(lhsNode, 0), SDValue(rhsNode, 0),
+                                   SDValue(shufMaskNode, 0));
+    HandleSDNode Dummy(shufNode);
+    SDNode *SN = SelectCode(Dummy.getValue().getNode());
+    if (SN == 0) SN = Dummy.getValue().getNode();
+
+    return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl,
+                                  OpVT, SDValue(SN, 0), getRC(MVT::i64));
+  } else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
+    return CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, dl, OpVT,
+                                  SDValue(emitBuildVector(i64vec.getNode()), 0),
+                                  getRC(MVT::i64));
+  } else {
+    report_fatal_error("SPUDAGToDAGISel::SelectI64Constant: Unhandled i64vec"
+                      "condition");
+  }
+}
+
+/// createSPUISelDag - This pass converts a legalized DAG into a
+/// SPU-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSPUISelDag(SPUTargetMachine &TM) {
+  return new SPUDAGToDAGISel(TM);
+}
diff --git a/final/lib/Target/CellSPU/SPUISelLowering.cpp b/final/lib/Target/CellSPU/SPUISelLowering.cpp
new file mode 100644
index 00000000000..743a4d7a0f7
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -0,0 +1,3258 @@
+//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPUTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPURegisterNames.h"
+#include "SPUISelLowering.h"
+#include "SPUTargetMachine.h"
+#include "SPUFrameLowering.h"
+#include "SPUMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+using namespace llvm;
+
+// Used in getTargetNodeName() below
+namespace {
+  std::map<unsigned, const char *> node_names;
+
+  // Byte offset of the preferred slot (counted from the MSB)
+  int prefslotOffset(EVT VT) {
+    int retval=0;
+    if (VT==MVT::i1) retval=3;
+    if (VT==MVT::i8) retval=3;
+    if (VT==MVT::i16) retval=2;
+
+    return retval;
+  }
+
+  //! Expand a library call into an actual call DAG node
+  /*!
+   \note
+   This code is taken from SelectionDAGLegalize, since it is not exposed as
+   part of the LLVM SelectionDAG API.
+   */
+
+  SDValue
+  ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
+                bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
+    // The input chain to this libcall is the entry node of the function.
+    // Legalizing the call will automatically add the previous call to the
+    // dependence.
+    SDValue InChain = DAG.getEntryNode();
+
+    TargetLowering::ArgListTy Args;
+    TargetLowering::ArgListEntry Entry;
+    for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+      EVT ArgVT = Op.getOperand(i).getValueType();
+      const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+      Entry.Node = Op.getOperand(i);
+      Entry.Ty = ArgTy;
+      Entry.isSExt = isSigned;
+      Entry.isZExt = !isSigned;
+      Args.push_back(Entry);
+    }
+    SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                           TLI.getPointerTy());
+
+    // Splice the libcall in wherever FindInputOutputChains tells us to.
+    const Type *RetTy =
+                Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
+    std::pair<SDValue, SDValue> CallInfo =
+            TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+                            0, TLI.getLibcallCallingConv(LC), false,
+                            /*isReturnValueUsed=*/true,
+                            Callee, Args, DAG, Op.getDebugLoc());
+
+    return CallInfo.first;
+  }
+}
+
+SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
+  : TargetLowering(TM, new TargetLoweringObjectFileELF()),
+    SPUTM(TM) {
+
+  // Use _setjmp/_longjmp instead of setjmp/longjmp.
+  setUseUnderscoreSetJmp(true);
+  setUseUnderscoreLongJmp(true);
+
+  // Set RTLIB libcall names as used by SPU:
+  setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
+
+  // Set up the SPU's register classes:
+  addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
+  addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
+  addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
+  addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
+  addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
+  addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
+  addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
+
+  // SPU has no sign or zero extended loads for i1, i8, i16:
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+
+  setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
+
+  setTruncStoreAction(MVT::i128, MVT::i64, Expand);
+  setTruncStoreAction(MVT::i128, MVT::i32, Expand);
+  setTruncStoreAction(MVT::i128, MVT::i16, Expand);
+  setTruncStoreAction(MVT::i128, MVT::i8, Expand);
+
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+  // SPU constant load actions are custom lowered:
+  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
+
+  // SPU's loads and stores have to be custom lowered:
+  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
+       ++sctype) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
+
+    setOperationAction(ISD::LOAD,   VT, Custom);
+    setOperationAction(ISD::STORE,  VT, Custom);
+    setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
+    setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
+    setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
+
+    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
+      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
+      setTruncStoreAction(VT, StoreVT, Expand);
+    }
+  }
+
+  for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
+       ++sctype) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
+
+    setOperationAction(ISD::LOAD,   VT, Custom);
+    setOperationAction(ISD::STORE,  VT, Custom);
+
+    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
+      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
+      setTruncStoreAction(VT, StoreVT, Expand);
+    }
+  }
+
+  // Expand the jumptable branches
+  setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
+
+  // Custom lower SELECT_CC for most cases, but expand by default
+  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
+  setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
+  setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
+  setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
+  setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
+
+  // SPU has no intrinsics for these particular operations:
+  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+  // SPU has no division/remainder instructions
+  setOperationAction(ISD::SREM,    MVT::i8,   Expand);
+  setOperationAction(ISD::UREM,    MVT::i8,   Expand);
+  setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
+  setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
+  setOperationAction(ISD::SREM,    MVT::i16,  Expand);
+  setOperationAction(ISD::UREM,    MVT::i16,  Expand);
+  setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
+  setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
+  setOperationAction(ISD::SREM,    MVT::i32,  Expand);
+  setOperationAction(ISD::UREM,    MVT::i32,  Expand);
+  setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
+  setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
+  setOperationAction(ISD::SREM,    MVT::i64,  Expand);
+  setOperationAction(ISD::UREM,    MVT::i64,  Expand);
+  setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
+  setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
+  setOperationAction(ISD::SREM,    MVT::i128, Expand);
+  setOperationAction(ISD::UREM,    MVT::i128, Expand);
+  setOperationAction(ISD::SDIV,    MVT::i128, Expand);
+  setOperationAction(ISD::UDIV,    MVT::i128, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
+
+  // We don't support sin/cos/sqrt/fmod
+  setOperationAction(ISD::FSIN , MVT::f64, Expand);
+  setOperationAction(ISD::FCOS , MVT::f64, Expand);
+  setOperationAction(ISD::FREM , MVT::f64, Expand);
+  setOperationAction(ISD::FSIN , MVT::f32, Expand);
+  setOperationAction(ISD::FCOS , MVT::f32, Expand);
+  setOperationAction(ISD::FREM , MVT::f32, Expand);
+
+  // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
+  // for f32!)
+  setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+  setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+
+  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+  // SPU can do rotate right and left, so legalize it... but customize for i8
+  // because instructions don't exist.
+
+  // FIXME: Change from "expand" to appropriate type once ROTR is supported in
+  //        .td files.
+  setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
+  setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
+  setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
+
+  setOperationAction(ISD::ROTL, MVT::i32,    Legal);
+  setOperationAction(ISD::ROTL, MVT::i16,    Legal);
+  setOperationAction(ISD::ROTL, MVT::i8,     Custom);
+
+  // SPU has no native version of shift left/right for i8
+  setOperationAction(ISD::SHL,  MVT::i8,     Custom);
+  setOperationAction(ISD::SRL,  MVT::i8,     Custom);
+  setOperationAction(ISD::SRA,  MVT::i8,     Custom);
+
+  // Make these operations legal and handle them during instruction selection:
+  setOperationAction(ISD::SHL,  MVT::i64,    Legal);
+  setOperationAction(ISD::SRL,  MVT::i64,    Legal);
+  setOperationAction(ISD::SRA,  MVT::i64,    Legal);
+
+  // Custom lower i8, i32 and i64 multiplications
+  setOperationAction(ISD::MUL,  MVT::i8,     Custom);
+  setOperationAction(ISD::MUL,  MVT::i32,    Legal);
+  setOperationAction(ISD::MUL,  MVT::i64,    Legal);
+
+  // Expand double-width multiplication
+  // FIXME: It would probably be reasonable to support some of these operations
+  setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
+  setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
+  setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i16, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i16, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::MULHU,     MVT::i64, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i64, Expand);
+
+  // Need to custom handle (some) common i8, i64 math ops
+  setOperationAction(ISD::ADD,  MVT::i8,     Custom);
+  setOperationAction(ISD::ADD,  MVT::i64,    Legal);
+  setOperationAction(ISD::SUB,  MVT::i8,     Custom);
+  setOperationAction(ISD::SUB,  MVT::i64,    Legal);
+
+  // SPU does not have BSWAP. It does have i32 support CTLZ.
+  // CTPOP has to be custom lowered.
+  setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
+  setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
+
+  setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
+  setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
+  setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
+  setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
+  setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
+
+  setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
+  setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
+  setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
+  setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
+  setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
+
+  setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
+  setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
+  setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
+  setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
+  setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
+
+  // SPU has a version of select that implements (a&~c)|(b&c), just like
+  // select ought to work:
+  setOperationAction(ISD::SELECT, MVT::i8,   Legal);
+  setOperationAction(ISD::SELECT, MVT::i16,  Legal);
+  setOperationAction(ISD::SELECT, MVT::i32,  Legal);
+  setOperationAction(ISD::SELECT, MVT::i64,  Legal);
+
+  setOperationAction(ISD::SETCC, MVT::i8,    Legal);
+  setOperationAction(ISD::SETCC, MVT::i16,   Legal);
+  setOperationAction(ISD::SETCC, MVT::i32,   Legal);
+  setOperationAction(ISD::SETCC, MVT::i64,   Legal);
+  setOperationAction(ISD::SETCC, MVT::f64,   Custom);
+
+  // Custom lower i128 -> i64 truncates
+  setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
+
+  // Custom lower i32/i64 -> i128 sign extend
+  setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
+
+  setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+  // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
+  // to expand to a libcall, hence the custom lowering:
+  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+  setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
+
+  // FDIV on SPU requires custom lowering
+  setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
+
+  // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
+  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+
+  setOperationAction(ISD::BITCAST, MVT::i32, Legal);
+  setOperationAction(ISD::BITCAST, MVT::f32, Legal);
+  setOperationAction(ISD::BITCAST, MVT::i64, Legal);
+  setOperationAction(ISD::BITCAST, MVT::f64, Legal);
+
+  // We cannot sextinreg(i1).  Expand to shifts.
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  // We want to legalize GlobalAddress and ConstantPool nodes into the
+  // appropriate instructions to materialize the address.
+  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
+       ++sctype) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
+
+    setOperationAction(ISD::GlobalAddress,  VT, Custom);
+    setOperationAction(ISD::ConstantPool,   VT, Custom);
+    setOperationAction(ISD::JumpTable,      VT, Custom);
+  }
+
+  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
+
+  // Use the default implementation.
+  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
+  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
+  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
+
+  // Cell SPU has instructions for converting between i64 and fp.
+  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+
+  // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
+  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
+
+  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+
+  // First set operation action for all vector types to expand. Then we
+  // will selectively turn on ones that can be effectively codegen'd.
+  addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
+  addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
+  addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
+  addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
+  addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
+  addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
+
+  for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+    MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+
+    // add/sub are legal for all supported vector VT's.
+    setOperationAction(ISD::ADD,     VT, Legal);
+    setOperationAction(ISD::SUB,     VT, Legal);
+    // mul has to be custom lowered.
+    setOperationAction(ISD::MUL,     VT, Legal);
+
+    setOperationAction(ISD::AND,     VT, Legal);
+    setOperationAction(ISD::OR,      VT, Legal);
+    setOperationAction(ISD::XOR,     VT, Legal);
+    setOperationAction(ISD::LOAD,    VT, Custom);
+    setOperationAction(ISD::SELECT,  VT, Legal);
+    setOperationAction(ISD::STORE,   VT, Custom);
+
+    // These operations need to be expanded:
+    setOperationAction(ISD::SDIV,    VT, Expand);
+    setOperationAction(ISD::SREM,    VT, Expand);
+    setOperationAction(ISD::UDIV,    VT, Expand);
+    setOperationAction(ISD::UREM,    VT, Expand);
+
+    // Custom lower build_vector, constant pool spills, insert and
+    // extract vector elements:
+    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+    setOperationAction(ISD::ConstantPool, VT, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+  }
+
+  setOperationAction(ISD::AND, MVT::v16i8, Custom);
+  setOperationAction(ISD::OR,  MVT::v16i8, Custom);
+  setOperationAction(ISD::XOR, MVT::v16i8, Custom);
+  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+
+  setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
+
+  setBooleanContents(ZeroOrNegativeOneBooleanContent);
+
+  setStackPointerRegisterToSaveRestore(SPU::R1);
+
+  // We have target-specific dag combine patterns for the following nodes:
+  setTargetDAGCombine(ISD::ADD);
+  setTargetDAGCombine(ISD::ZERO_EXTEND);
+  setTargetDAGCombine(ISD::SIGN_EXTEND);
+  setTargetDAGCombine(ISD::ANY_EXTEND);
+
+  computeRegisterProperties();
+
+  // Set pre-RA register scheduler default to BURR, which produces slightly
+  // better code than the default (could also be TDRR, but TargetLowering.h
+  // needs a mod to support that model):
+  setSchedulingPreference(Sched::RegPressure);
+}
+
+const char *
+SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
+{
+  if (node_names.empty()) {
+    node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
+    node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
+    node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
+    node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
+    node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
+    node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
+    node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
+    node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
+    node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
+    node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
+    node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
+    node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
+    node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
+    node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
+    node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
+    node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
+    node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
+    node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
+    node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
+            "SPUISD::ROTBYTES_LEFT_BITS";
+    node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
+    node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
+    node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
+    node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
+    node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
+  }
+
+  std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
+
+  return ((i != node_names.end()) ? i->second : 0);
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
+  return 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Return the Cell SPU's SETCC result type
+//===----------------------------------------------------------------------===//
+
+MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
+  // i8, i16 and i32 are valid SETCC result types
+  MVT::SimpleValueType retval;
+
+  switch(VT.getSimpleVT().SimpleTy){
+    case MVT::i1:
+    case MVT::i8:
+      retval = MVT::i8; break;
+    case MVT::i16:
+      retval = MVT::i16; break;
+    case MVT::i32:
+    default:
+      retval = MVT::i32;
+  }
+  return retval;
+}
+
+//===----------------------------------------------------------------------===//
+// Calling convention code:
+//===----------------------------------------------------------------------===//
+
+#include "SPUGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+//  LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+/// Custom lower loads for CellSPU
+/*!
+ All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
+ within a 16-byte block, we have to rotate to extract the requested element.
+
+ For extending loads, we also want to ensure that the following sequence is
+ emitted, e.g. for MVT::f32 extending load to MVT::f64:
+
+\verbatim
+%1  v16i8,ch = load
+%2  v16i8,ch = rotate %1
+%3  v4f8, ch = bitconvert %2
+%4  f32      = vec2perfslot %3
+%5  f64      = fp_extend %4
+\endverbatim
+*/
+static SDValue
+LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+  LoadSDNode *LN = cast<LoadSDNode>(Op);
+  SDValue the_chain = LN->getChain();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  EVT InVT = LN->getMemoryVT();
+  EVT OutVT = Op.getValueType();
+  ISD::LoadExtType ExtType = LN->getExtensionType();
+  unsigned alignment = LN->getAlignment();
+  int pso = prefslotOffset(InVT);
+  DebugLoc dl = Op.getDebugLoc();
+  EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
+                                                  (128 / InVT.getSizeInBits()));
+
+  // two sanity checks
+  assert( LN->getAddressingMode() == ISD::UNINDEXED
+          && "we should get only UNINDEXED adresses");
+  // clean aligned loads can be selected as-is
+  if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
+    return SDValue();
+
+  // Get pointerinfos to the memory chunk(s) that contain the data to load
+  uint64_t mpi_offset = LN->getPointerInfo().Offset;
+  mpi_offset -= mpi_offset%16;
+  MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
+  MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
+
+  SDValue result;
+  SDValue basePtr = LN->getBasePtr();
+  SDValue rotate;
+
+  if ((alignment%16) == 0) {
+    ConstantSDNode *CN;
+
+    // Special cases for a known aligned load to simplify the base pointer
+    // and the rotation amount:
+    if (basePtr.getOpcode() == ISD::ADD
+        && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
+      // Known offset into basePtr
+      int64_t offset = CN->getSExtValue();
+      int64_t rotamt = int64_t((offset & 0xf) - pso);
+
+      if (rotamt < 0)
+        rotamt += 16;
+
+      rotate = DAG.getConstant(rotamt, MVT::i16);
+
+      // Simplify the base pointer for this case:
+      basePtr = basePtr.getOperand(0);
+      if ((offset & ~0xf) > 0) {
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                              basePtr,
+                              DAG.getConstant((offset & ~0xf), PtrVT));
+      }
+    } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
+               || (basePtr.getOpcode() == SPUISD::IndirectAddr
+                   && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
+                   && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
+      // Plain aligned a-form address: rotate into preferred slot
+      // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
+      int64_t rotamt = -pso;
+      if (rotamt < 0)
+        rotamt += 16;
+      rotate = DAG.getConstant(rotamt, MVT::i16);
+    } else {
+      // Offset the rotate amount by the basePtr and the preferred slot
+      // byte offset
+      int64_t rotamt = -pso;
+      if (rotamt < 0)
+        rotamt += 16;
+      rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+                           basePtr,
+                           DAG.getConstant(rotamt, PtrVT));
+    }
+  } else {
+    // Unaligned load: must be more pessimistic about addressing modes:
+    if (basePtr.getOpcode() == ISD::ADD) {
+      MachineFunction &MF = DAG.getMachineFunction();
+      MachineRegisterInfo &RegInfo = MF.getRegInfo();
+      unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+      SDValue Flag;
+
+      SDValue Op0 = basePtr.getOperand(0);
+      SDValue Op1 = basePtr.getOperand(1);
+
+      if (isa<ConstantSDNode>(Op1)) {
+        // Convert the (add <ptr>, <const>) to an indirect address contained
+        // in a register. Note that this is done because we need to avoid
+        // creating a 0(reg) d-form address due to the SPU's block loads.
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+        the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+        basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+      } else {
+        // Convert the (add <arg1>, <arg2>) to an indirect address, which
+        // will likely be lowered as a reg(reg) x-form address.
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+      }
+    } else {
+      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                            basePtr,
+                            DAG.getConstant(0, PtrVT));
+   }
+
+    // Offset the rotate amount by the basePtr and the preferred slot
+    // byte offset
+    rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
+                         basePtr,
+                         DAG.getConstant(-pso, PtrVT));
+  }
+
+  // Do the load as a i128 to allow possible shifting
+  SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
+                       lowMemPtr,
+                       LN->isVolatile(), LN->isNonTemporal(), 16);
+
+  // When the size is not greater than alignment we get all data with just
+  // one load
+  if (alignment >= InVT.getSizeInBits()/8) {
+    // Update the chain
+    the_chain = low.getValue(1);
+
+    // Rotate into the preferred slot:
+    result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
+                         low.getValue(0), rotate);
+
+    // Convert the loaded v16i8 vector to the appropriate vector type
+    // specified by the operand:
+    EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+                                 InVT, (128 / InVT.getSizeInBits()));
+    result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
+                         DAG.getNode(ISD::BITCAST, dl, vecVT, result));
+  }
+  // When alignment is less than the size, we might need (known only at
+  // run-time) two loads
+  // TODO: if the memory address is composed only from constants, we have
+  // extra kowledge, and might avoid the second load
+  else {
+    // storage position offset from lower 16 byte aligned memory chunk
+    SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+                                  basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
+    // get a registerfull of ones. (this implementation is a workaround: LLVM
+    // cannot handle 128 bit signed int constants)
+    SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
+    ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+    SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
+                               DAG.getNode(ISD::ADD, dl, PtrVT,
+                                           basePtr,
+                                           DAG.getConstant(16, PtrVT)),
+                               highMemPtr,
+                               LN->isVolatile(), LN->isNonTemporal(), 16);
+
+    the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+                                                              high.getValue(1));
+
+    // Shift the (possible) high part right to compensate the misalignemnt.
+    // if there is no highpart (i.e. value is i64 and offset is 4), this
+    // will zero out the high value.
+    high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
+                                     DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                                 DAG.getConstant( 16, MVT::i32),
+                                                 offset
+                                                ));
+
+    // Shift the low similarily
+    // TODO: add SPUISD::SHL_BYTES
+    low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
+
+    // Merge the two parts
+    result = DAG.getNode(ISD::BITCAST, dl, vecVT,
+                          DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
+
+    if (!InVT.isVector()) {
+      result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
+     }
+
+  }
+    // Handle extending loads by extending the scalar result:
+    if (ExtType == ISD::SEXTLOAD) {
+      result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
+    } else if (ExtType == ISD::ZEXTLOAD) {
+      result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
+    } else if (ExtType == ISD::EXTLOAD) {
+      unsigned NewOpc = ISD::ANY_EXTEND;
+
+      if (OutVT.isFloatingPoint())
+        NewOpc = ISD::FP_EXTEND;
+
+      result = DAG.getNode(NewOpc, dl, OutVT, result);
+    }
+
+    SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
+    SDValue retops[2] = {
+      result,
+      the_chain
+    };
+
+    result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
+                         retops, sizeof(retops) / sizeof(retops[0]));
+    return result;
+}
+
+/// Custom lower stores for CellSPU
+/*!
+ All CellSPU stores are aligned to 16-byte boundaries, so for elements
+ within a 16-byte block, we have to generate a shuffle to insert the
+ requested element into its place, then store the resulting block.
+ */
+static SDValue
+LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+  StoreSDNode *SN = cast<StoreSDNode>(Op);
+  SDValue Value = SN->getValue();
+  EVT VT = Value.getValueType();
+  EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned alignment = SN->getAlignment();
+  SDValue result;
+  EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
+                                                 (128 / StVT.getSizeInBits()));
+  // Get pointerinfos to the memory chunk(s) that contain the data to load
+  uint64_t mpi_offset = SN->getPointerInfo().Offset;
+  mpi_offset -= mpi_offset%16;
+  MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
+  MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
+
+
+  // two sanity checks
+  assert( SN->getAddressingMode() == ISD::UNINDEXED
+          && "we should get only UNINDEXED adresses");
+  // clean aligned loads can be selected as-is
+  if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
+    return SDValue();
+
+  SDValue alignLoadVec;
+  SDValue basePtr = SN->getBasePtr();
+  SDValue the_chain = SN->getChain();
+  SDValue insertEltOffs;
+
+  if ((alignment%16) == 0) {
+    ConstantSDNode *CN;
+    // Special cases for a known aligned load to simplify the base pointer
+    // and insertion byte:
+    if (basePtr.getOpcode() == ISD::ADD
+        && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
+      // Known offset into basePtr
+      int64_t offset = CN->getSExtValue();
+
+      // Simplify the base pointer for this case:
+      basePtr = basePtr.getOperand(0);
+      insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                                  basePtr,
+                                  DAG.getConstant((offset & 0xf), PtrVT));
+
+      if ((offset & ~0xf) > 0) {
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                              basePtr,
+                              DAG.getConstant((offset & ~0xf), PtrVT));
+      }
+    } else {
+      // Otherwise, assume it's at byte 0 of basePtr
+      insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                                  basePtr,
+                                  DAG.getConstant(0, PtrVT));
+      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                                  basePtr,
+                                  DAG.getConstant(0, PtrVT));
+    }
+  } else {
+    // Unaligned load: must be more pessimistic about addressing modes:
+    if (basePtr.getOpcode() == ISD::ADD) {
+      MachineFunction &MF = DAG.getMachineFunction();
+      MachineRegisterInfo &RegInfo = MF.getRegInfo();
+      unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+      SDValue Flag;
+
+      SDValue Op0 = basePtr.getOperand(0);
+      SDValue Op1 = basePtr.getOperand(1);
+
+      if (isa<ConstantSDNode>(Op1)) {
+        // Convert the (add <ptr>, <const>) to an indirect address contained
+        // in a register. Note that this is done because we need to avoid
+        // creating a 0(reg) d-form address due to the SPU's block loads.
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+        the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
+        basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
+      } else {
+        // Convert the (add <arg1>, <arg2>) to an indirect address, which
+        // will likely be lowered as a reg(reg) x-form address.
+        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
+      }
+    } else {
+      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                            basePtr,
+                            DAG.getConstant(0, PtrVT));
+    }
+
+    // Insertion point is solely determined by basePtr's contents
+    insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
+                                basePtr,
+                                DAG.getConstant(0, PtrVT));
+  }
+
+  // Load the lower part of the memory to which to store.
+  SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
+                          lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16);
+
+  // if we don't need to store over the 16 byte boundary, one store suffices
+  if (alignment >= StVT.getSizeInBits()/8) {
+    // Update the chain
+    the_chain = low.getValue(1);
+
+    LoadSDNode *LN = cast<LoadSDNode>(low);
+    SDValue theValue = SN->getValue();
+
+    if (StVT != VT
+        && (theValue.getOpcode() == ISD::AssertZext
+            || theValue.getOpcode() == ISD::AssertSext)) {
+      // Drill down and get the value for zero- and sign-extended
+      // quantities
+      theValue = theValue.getOperand(0);
+    }
+
+    // If the base pointer is already a D-form address, then just create
+    // a new D-form address with a slot offset and the orignal base pointer.
+    // Otherwise generate a D-form address with the slot offset relative
+    // to the stack pointer, which is always aligned.
+#if !defined(NDEBUG)
+      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+        errs() << "CellSPU LowerSTORE: basePtr = ";
+        basePtr.getNode()->dump(&DAG);
+        errs() << "\n";
+      }
+#endif
+
+    SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
+                                      insertEltOffs);
+    SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
+                                      theValue);
+
+    result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
+                         vectorizeOp, low,
+                         DAG.getNode(ISD::BITCAST, dl,
+                                     MVT::v4i32, insertEltOp));
+
+    result = DAG.getStore(the_chain, dl, result, basePtr,
+                          lowMemPtr,
+                          LN->isVolatile(), LN->isNonTemporal(),
+                          16);
+
+  }
+  // do the store when it might cross the 16 byte memory access boundary.
+  else {
+    // TODO issue a warning if SN->isVolatile()== true? This is likely not
+    // what the user wanted.
+
+    // address offset from nearest lower 16byte alinged address
+    SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
+                                    SN->getBasePtr(),
+                                    DAG.getConstant(0xf, MVT::i32));
+    // 16 - offset
+    SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                           DAG.getConstant( 16, MVT::i32),
+                                           offset);
+    // 16 - sizeof(Value)
+    SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                     DAG.getConstant( 16, MVT::i32),
+                                     DAG.getConstant( VT.getSizeInBits()/8,
+                                                      MVT::i32));
+    // get a registerfull of ones
+    SDValue ones = DAG.getConstant(-1, MVT::v4i32);
+    ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
+
+    // Create the 128 bit masks that have ones where the data to store is
+    // located.
+    SDValue lowmask, himask;
+    // if the value to store don't fill up the an entire 128 bits, zero
+    // out the last bits of the mask so that only the value we want to store
+    // is masked.
+    // this is e.g. in the case of store i32, align 2
+    if (!VT.isVector()){
+      Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
+      lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
+      lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+                                                               surplus);
+      Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+      Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
+
+    }
+    else {
+      lowmask = ones;
+      Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
+    }
+    // this will zero, if there are no data that goes to the high quad
+    himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
+                                                            offset_compl);
+    lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
+                                                             offset);
+
+    // Load in the old data and zero out the parts that will be overwritten with
+    // the new data to store.
+    SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
+                               DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+                                           DAG.getConstant( 16, PtrVT)),
+                               highMemPtr,
+                               SN->isVolatile(), SN->isNonTemporal(), 16);
+    the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
+                                                              hi.getValue(1));
+
+    low = DAG.getNode(ISD::AND, dl, MVT::i128,
+                        DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
+                        DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
+    hi = DAG.getNode(ISD::AND, dl, MVT::i128,
+                        DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
+                        DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
+
+    // Shift the Value to store into place. rlow contains the parts that go to
+    // the lower memory chunk, rhi has the parts that go to the upper one.
+    SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
+    rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
+    SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
+                                                            offset_compl);
+
+    // Merge the old data and the new data and store the results
+    // Need to convert vectors here to integer as 'OR'ing floats assert
+    rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
+                          DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
+                          DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
+    rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
+                         DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
+                         DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
+
+    low = DAG.getStore(the_chain, dl, rlow, basePtr,
+                          lowMemPtr,
+                          SN->isVolatile(), SN->isNonTemporal(), 16);
+    hi  = DAG.getStore(the_chain, dl, rhi,
+                            DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
+                                        DAG.getConstant( 16, PtrVT)),
+                            highMemPtr,
+                            SN->isVolatile(), SN->isNonTemporal(), 16);
+    result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
+                                                           hi.getValue(0));
+  }
+
+  return result;
+}
+
+//! Generate the address of a constant pool entry.
+static SDValue
+LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+  EVT PtrVT = Op.getValueType();
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+  const Constant *C = CP->getConstVal();
+  SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
+  SDValue Zero = DAG.getConstant(0, PtrVT);
+  const TargetMachine &TM = DAG.getTarget();
+  // FIXME there is no actual debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (TM.getRelocationModel() == Reloc::Static) {
+    if (!ST->usingLargeMem()) {
+      // Just return the SDValue with the constant pool address in it.
+      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
+    } else {
+      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
+      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
+      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+    }
+  }
+
+  llvm_unreachable("LowerConstantPool: Relocation model other than static"
+                   " not supported.");
+  return SDValue();
+}
+
+//! Alternate entry point for generating the address of a constant pool entry
+SDValue
+SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
+  return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
+}
+
+static SDValue
+LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+  EVT PtrVT = Op.getValueType();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
+  SDValue Zero = DAG.getConstant(0, PtrVT);
+  const TargetMachine &TM = DAG.getTarget();
+  // FIXME there is no actual debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (TM.getRelocationModel() == Reloc::Static) {
+    if (!ST->usingLargeMem()) {
+      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
+    } else {
+      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
+      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
+      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+    }
+  }
+
+  llvm_unreachable("LowerJumpTable: Relocation model other than static"
+                   " not supported.");
+  return SDValue();
+}
+
+static SDValue
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
+  EVT PtrVT = Op.getValueType();
+  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+  const GlobalValue *GV = GSDN->getGlobal();
+  SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+                                          PtrVT, GSDN->getOffset());
+  const TargetMachine &TM = DAG.getTarget();
+  SDValue Zero = DAG.getConstant(0, PtrVT);
+  // FIXME there is no actual debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (TM.getRelocationModel() == Reloc::Static) {
+    if (!ST->usingLargeMem()) {
+      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
+    } else {
+      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
+      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
+      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
+    }
+  } else {
+    report_fatal_error("LowerGlobalAddress: Relocation model other than static"
+                      "not supported.");
+    /*NOTREACHED*/
+  }
+
+  return SDValue();
+}
+
+//! Custom lower double precision floating point constants
+static SDValue
+LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  // FIXME there is no actual debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (VT == MVT::f64) {
+    ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
+
+    assert((FP != 0) &&
+           "LowerConstantFP: Node is not ConstantFPSDNode");
+
+    uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
+    SDValue T = DAG.getConstant(dbits, MVT::i64);
+    SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
+    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+                       DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
+  }
+
+  return SDValue();
+}
+
+SDValue
+SPUTargetLowering::LowerFormalArguments(SDValue Chain,
+                                        CallingConv::ID CallConv, bool isVarArg,
+                                        const SmallVectorImpl<ISD::InputArg>
+                                          &Ins,
+                                        DebugLoc dl, SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
+
+  unsigned ArgOffset = SPUFrameLowering::minStackSize();
+  unsigned ArgRegIdx = 0;
+  unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+                 *DAG.getContext());
+  // FIXME: allow for other calling conventions
+  CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
+
+  // Add DAG nodes to load the arguments or copy them out of registers.
+  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+    EVT ObjectVT = Ins[ArgNo].VT;
+    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+    SDValue ArgVal;
+    CCValAssign &VA = ArgLocs[ArgNo];
+
+    if (VA.isRegLoc()) {
+      const TargetRegisterClass *ArgRegClass;
+
+      switch (ObjectVT.getSimpleVT().SimpleTy) {
+      default:
+        report_fatal_error("LowerFormalArguments Unhandled argument type: " +
+                           Twine(ObjectVT.getEVTString()));
+      case MVT::i8:
+        ArgRegClass = &SPU::R8CRegClass;
+        break;
+      case MVT::i16:
+        ArgRegClass = &SPU::R16CRegClass;
+        break;
+      case MVT::i32:
+        ArgRegClass = &SPU::R32CRegClass;
+        break;
+      case MVT::i64:
+        ArgRegClass = &SPU::R64CRegClass;
+        break;
+      case MVT::i128:
+        ArgRegClass = &SPU::GPRCRegClass;
+        break;
+      case MVT::f32:
+        ArgRegClass = &SPU::R32FPRegClass;
+        break;
+      case MVT::f64:
+        ArgRegClass = &SPU::R64FPRegClass;
+        break;
+      case MVT::v2f64:
+      case MVT::v4f32:
+      case MVT::v2i64:
+      case MVT::v4i32:
+      case MVT::v8i16:
+      case MVT::v16i8:
+        ArgRegClass = &SPU::VECREGRegClass;
+        break;
+      }
+
+      unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
+      RegInfo.addLiveIn(VA.getLocReg(), VReg);
+      ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+      ++ArgRegIdx;
+    } else {
+      // We need to load the argument to a virtual register if we determined
+      // above that we ran out of physical registers of the appropriate type
+      // or we're forced to do vararg
+      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+                           false, false, 0);
+      ArgOffset += StackSlotSize;
+    }
+
+    InVals.push_back(ArgVal);
+    // Update the chain
+    Chain = ArgVal.getOperand(0);
+  }
+
+  // vararg handling:
+  if (isVarArg) {
+    // FIXME: we should be able to query the argument registers from
+    //        tablegen generated code.
+    static const unsigned ArgRegs[] = {
+      SPU::R3,  SPU::R4,  SPU::R5,  SPU::R6,  SPU::R7,  SPU::R8,  SPU::R9,
+      SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
+      SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
+      SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
+      SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
+      SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
+      SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
+      SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
+      SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
+      SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
+      SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
+    };
+    // size of ArgRegs array
+    unsigned NumArgRegs = 77;
+
+    // We will spill (79-3)+1 registers to the stack
+    SmallVector<SDValue, 79-3+1> MemOps;
+
+    // Create the frame slot
+    for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
+      FuncInfo->setVarArgsFrameIndex(
+        MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
+      SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+      unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
+      SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
+      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
+                                   false, false, 0);
+      Chain = Store.getOperand(0);
+      MemOps.push_back(Store);
+
+      // Increment address by stack slot size for the next stored argument
+      ArgOffset += StackSlotSize;
+    }
+    if (!MemOps.empty())
+      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                          &MemOps[0], MemOps.size());
+  }
+
+  return Chain;
+}
+
+/// isLSAAddress - Return the immediate to use if the specified
+/// value is representable as a LSA address.
+static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+  if (!C) return 0;
+
+  int Addr = C->getZExtValue();
+  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
+      (Addr << 14 >> 14) != Addr)
+    return 0;  // Top 14 bits have to be sext of immediate.
+
+  return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
+}
+
+SDValue
+SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                             CallingConv::ID CallConv, bool isVarArg,
+                             bool &isTailCall,
+                             const SmallVectorImpl<ISD::OutputArg> &Outs,
+                             const SmallVectorImpl<SDValue> &OutVals,
+                             const SmallVectorImpl<ISD::InputArg> &Ins,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             SmallVectorImpl<SDValue> &InVals) const {
+  // CellSPU target does not yet support tail call optimization.
+  isTailCall = false;
+
+  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
+  unsigned NumOps     = Outs.size();
+  unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+                 *DAG.getContext());
+  // FIXME: allow for other calling conventions
+  CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
+
+  const unsigned NumArgRegs = ArgLocs.size();
+
+
+  // Handy pointer type
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+  // Set up a copy of the stack pointer for use loading and storing any
+  // arguments that may not fit in the registers available for argument
+  // passing.
+  SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
+
+  // Figure out which arguments are going to go in registers, and which in
+  // memory.
+  unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
+  unsigned ArgRegIdx = 0;
+
+  // Keep track of registers passing arguments
+  std::vector<std::pair<unsigned, SDValue> > RegsToPass;
+  // And the arguments passed on the stack
+  SmallVector<SDValue, 8> MemOpChains;
+
+  for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
+    SDValue Arg = OutVals[ArgRegIdx];
+    CCValAssign &VA = ArgLocs[ArgRegIdx];
+
+    // PtrOff will be used to store the current argument to the stack if a
+    // register cannot be found for it.
+    SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unexpected ValueType for argument!");
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+    case MVT::i64:
+    case MVT::i128:
+    case MVT::f32:
+    case MVT::f64:
+    case MVT::v2i64:
+    case MVT::v2f64:
+    case MVT::v4f32:
+    case MVT::v4i32:
+    case MVT::v8i16:
+    case MVT::v16i8:
+      if (ArgRegIdx != NumArgRegs) {
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      } else {
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                           MachinePointerInfo(),
+                                           false, false, 0));
+        ArgOffset += StackSlotSize;
+      }
+      break;
+    }
+  }
+
+  // Accumulate how many bytes are to be pushed on the stack, including the
+  // linkage area, and parameter passing area.  According to the SPU ABI,
+  // we minimally need space for [LR] and [SP].
+  unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
+
+  // Insert a call sequence start
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
+                                                            true));
+
+  if (!MemOpChains.empty()) {
+    // Adjust the stack pointer for the stack arguments.
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+  }
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain
+  // and flag operands which copy the outgoing args into the appropriate regs.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  SmallVector<SDValue, 8> Ops;
+  unsigned CallOpc = SPUISD::CALL;
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    const GlobalValue *GV = G->getGlobal();
+    EVT CalleeVT = Callee.getValueType();
+    SDValue Zero = DAG.getConstant(0, PtrVT);
+    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
+
+    if (!ST->usingLargeMem()) {
+      // Turn calls to targets that are defined (i.e., have bodies) into BRSL
+      // style calls, otherwise, external symbols are BRASL calls. This assumes
+      // that declared/defined symbols are in the same compilation unit and can
+      // be reached through PC-relative jumps.
+      //
+      // NOTE:
+      // This may be an unsafe assumption for JIT and really large compilation
+      // units.
+      if (GV->isDeclaration()) {
+        Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
+      } else {
+        Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
+      }
+    } else {
+      // "Large memory" mode: Turn all calls into indirect calls with a X-form
+      // address pairs:
+      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
+    }
+  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    EVT CalleeVT = Callee.getValueType();
+    SDValue Zero = DAG.getConstant(0, PtrVT);
+    SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
+        Callee.getValueType());
+
+    if (!ST->usingLargeMem()) {
+      Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
+    } else {
+      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
+    }
+  } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
+    // If this is an absolute destination address that appears to be a legal
+    // local store address, use the munged value.
+    Callee = SDValue(Dest, 0);
+  }
+
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are known live
+  // into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+  // Returns a chain and a flag for retval copy to use.
+  Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
+                      &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  if (!Ins.empty())
+    InFlag = Chain.getValue(1);
+
+  // If the function returns void, just return the chain.
+  if (Ins.empty())
+    return Chain;
+
+  // Now handle the return value(s)
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
+                    RVLocs, *DAG.getContext());
+  CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
+
+
+  // If the call has results, copy the values out of the ret val registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign VA = RVLocs[i];
+
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
+                                     InFlag);
+    Chain = Val.getValue(1);
+    InFlag = Val.getValue(2);
+    InVals.push_back(Val);
+   }
+
+  return Chain;
+}
+
+SDValue
+SPUTargetLowering::LowerReturn(SDValue Chain,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               DebugLoc dl, SelectionDAG &DAG) const {
+
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             OutVals[i], Flag);
+    Flag = Chain.getValue(1);
+  }
+
+  if (Flag.getNode())
+    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+  else
+    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering:
+//===----------------------------------------------------------------------===//
+
+static ConstantSDNode *
+getVecImm(SDNode *N) {
+  SDValue OpVal(0, 0);
+
+  // Check to see if this buildvec has a single non-undef value in its elements.
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+    if (OpVal.getNode() == 0)
+      OpVal = N->getOperand(i);
+    else if (OpVal != N->getOperand(i))
+      return 0;
+  }
+
+  if (OpVal.getNode() != 0) {
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+      return CN;
+    }
+  }
+
+  return 0;
+}
+
+/// get_vec_i18imm - Test if this vector is a vector filled with the same value
+/// and the value fits into an unsigned 18-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
+                              EVT ValueType) {
+  if (ConstantSDNode *CN = getVecImm(N)) {
+    uint64_t Value = CN->getZExtValue();
+    if (ValueType == MVT::i64) {
+      uint64_t UValue = CN->getZExtValue();
+      uint32_t upper = uint32_t(UValue >> 32);
+      uint32_t lower = uint32_t(UValue);
+      if (upper != lower)
+        return SDValue();
+      Value = Value >> 32;
+    }
+    if (Value <= 0x3ffff)
+      return DAG.getTargetConstant(Value, ValueType);
+  }
+
+  return SDValue();
+}
+
+/// get_vec_i16imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 16-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
+                              EVT ValueType) {
+  if (ConstantSDNode *CN = getVecImm(N)) {
+    int64_t Value = CN->getSExtValue();
+    if (ValueType == MVT::i64) {
+      uint64_t UValue = CN->getZExtValue();
+      uint32_t upper = uint32_t(UValue >> 32);
+      uint32_t lower = uint32_t(UValue);
+      if (upper != lower)
+        return SDValue();
+      Value = Value >> 32;
+    }
+    if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
+      return DAG.getTargetConstant(Value, ValueType);
+    }
+  }
+
+  return SDValue();
+}
+
+/// get_vec_i10imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 10-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
+                              EVT ValueType) {
+  if (ConstantSDNode *CN = getVecImm(N)) {
+    int64_t Value = CN->getSExtValue();
+    if (ValueType == MVT::i64) {
+      uint64_t UValue = CN->getZExtValue();
+      uint32_t upper = uint32_t(UValue >> 32);
+      uint32_t lower = uint32_t(UValue);
+      if (upper != lower)
+        return SDValue();
+      Value = Value >> 32;
+    }
+    if (isInt<10>(Value))
+      return DAG.getTargetConstant(Value, ValueType);
+  }
+
+  return SDValue();
+}
+
+/// get_vec_i8imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 8-bit constant, and if so, return the
+/// constant.
+///
+/// @note: The incoming vector is v16i8 because that's the only way we can load
+/// constant vectors. Thus, we test to see if the upper and lower bytes are the
+/// same value.
+SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
+                             EVT ValueType) {
+  if (ConstantSDNode *CN = getVecImm(N)) {
+    int Value = (int) CN->getZExtValue();
+    if (ValueType == MVT::i16
+        && Value <= 0xffff                 /* truncated from uint64_t */
+        && ((short) Value >> 8) == ((short) Value & 0xff))
+      return DAG.getTargetConstant(Value & 0xff, ValueType);
+    else if (ValueType == MVT::i8
+             && (Value & 0xff) == Value)
+      return DAG.getTargetConstant(Value, ValueType);
+  }
+
+  return SDValue();
+}
+
+/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
+/// and the value fits into a signed 16-bit constant, and if so, return the
+/// constant
+SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
+                               EVT ValueType) {
+  if (ConstantSDNode *CN = getVecImm(N)) {
+    uint64_t Value = CN->getZExtValue();
+    if ((ValueType == MVT::i32
+          && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
+        || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
+      return DAG.getTargetConstant(Value >> 16, ValueType);
+  }
+
+  return SDValue();
+}
+
+/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
+SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
+  if (ConstantSDNode *CN = getVecImm(N)) {
+    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
+  }
+
+  return SDValue();
+}
+
+/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
+SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
+  if (ConstantSDNode *CN = getVecImm(N)) {
+    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
+  }
+
+  return SDValue();
+}
+
+//! Lower a BUILD_VECTOR instruction creatively:
+static SDValue
+LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = Op.getDebugLoc();
+  BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+  assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
+  unsigned minSplatBits = EltVT.getSizeInBits();
+
+  if (minSplatBits < 16)
+    minSplatBits = 16;
+
+  APInt APSplatBits, APSplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+
+  if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+                            HasAnyUndefs, minSplatBits)
+      || minSplatBits < SplatBitSize)
+    return SDValue();   // Wasn't a constant vector or splat exceeded min
+
+  uint64_t SplatBits = APSplatBits.getZExtValue();
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:
+    report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
+                       Twine(VT.getEVTString()));
+    /*NOTREACHED*/
+  case MVT::v4f32: {
+    uint32_t Value32 = uint32_t(SplatBits);
+    assert(SplatBitSize == 32
+           && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
+    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
+    SDValue T = DAG.getConstant(Value32, MVT::i32);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
+                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
+    break;
+  }
+  case MVT::v2f64: {
+    uint64_t f64val = uint64_t(SplatBits);
+    assert(SplatBitSize == 64
+           && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
+    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
+    SDValue T = DAG.getConstant(f64val, MVT::i64);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
+                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
+    break;
+  }
+  case MVT::v16i8: {
+   // 8-bit constants have to be expanded to 16-bits
+   unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
+   SmallVector<SDValue, 8> Ops;
+
+   Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
+   return DAG.getNode(ISD::BITCAST, dl, VT,
+                      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
+  }
+  case MVT::v8i16: {
+    unsigned short Value16 = SplatBits;
+    SDValue T = DAG.getConstant(Value16, EltVT);
+    SmallVector<SDValue, 8> Ops;
+
+    Ops.assign(8, T);
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
+  }
+  case MVT::v4i32: {
+    SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
+  }
+  case MVT::v2i64: {
+    return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
+  }
+  }
+
+  return SDValue();
+}
+
+/*!
+ */
+SDValue
+SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
+                     DebugLoc dl) {
+  uint32_t upper = uint32_t(SplatVal >> 32);
+  uint32_t lower = uint32_t(SplatVal);
+
+  if (upper == lower) {
+    // Magic constant that can be matched by IL, ILA, et. al.
+    SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
+    return DAG.getNode(ISD::BITCAST, dl, OpVT,
+                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                   Val, Val, Val, Val));
+  } else {
+    bool upper_special, lower_special;
+
+    // NOTE: This code creates common-case shuffle masks that can be easily
+    // detected as common expressions. It is not attempting to create highly
+    // specialized masks to replace any and all 0's, 0xff's and 0x80's.
+
+    // Detect if the upper or lower half is a special shuffle mask pattern:
+    upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
+    lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
+
+    // Both upper and lower are special, lower to a constant pool load:
+    if (lower_special && upper_special) {
+      SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
+      return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
+                         SplatValCN, SplatValCN);
+    }
+
+    SDValue LO32;
+    SDValue HI32;
+    SmallVector<SDValue, 16> ShufBytes;
+    SDValue Result;
+
+    // Create lower vector if not a special pattern
+    if (!lower_special) {
+      SDValue LO32C = DAG.getConstant(lower, MVT::i32);
+      LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
+                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                     LO32C, LO32C, LO32C, LO32C));
+    }
+
+    // Create upper vector if not a special pattern
+    if (!upper_special) {
+      SDValue HI32C = DAG.getConstant(upper, MVT::i32);
+      HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
+                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                     HI32C, HI32C, HI32C, HI32C));
+    }
+
+    // If either upper or lower are special, then the two input operands are
+    // the same (basically, one of them is a "don't care")
+    if (lower_special)
+      LO32 = HI32;
+    if (upper_special)
+      HI32 = LO32;
+
+    for (int i = 0; i < 4; ++i) {
+      uint64_t val = 0;
+      for (int j = 0; j < 4; ++j) {
+        SDValue V;
+        bool process_upper, process_lower;
+        val <<= 8;
+        process_upper = (upper_special && (i & 1) == 0);
+        process_lower = (lower_special && (i & 1) == 1);
+
+        if (process_upper || process_lower) {
+          if ((process_upper && upper == 0)
+                  || (process_lower && lower == 0))
+            val |= 0x80;
+          else if ((process_upper && upper == 0xffffffff)
+                  || (process_lower && lower == 0xffffffff))
+            val |= 0xc0;
+          else if ((process_upper && upper == 0x80000000)
+                  || (process_lower && lower == 0x80000000))
+            val |= (j == 0 ? 0xe0 : 0x80);
+        } else
+          val |= i * 4 + j + ((i & 1) * 16);
+      }
+
+      ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
+    }
+
+    return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
+                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                   &ShufBytes[0], ShufBytes.size()));
+  }
+}
+
+/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
+/// which the Cell can operate. The code inspects V3 to ascertain whether the
+/// permutation vector, V3, is monotonically increasing with one "exception"
+/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
+/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
+/// In either case, the net result is going to eventually invoke SHUFB to
+/// permute/shuffle the bytes from V1 and V2.
+/// \note
+/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
+/// control word for byte/halfword/word insertion. This takes care of a single
+/// element move from V2 into V1.
+/// \note
+/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
+static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+  const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+  // If we have a single element being moved from V1 to V2, this can be handled
+  // using the C*[DX] compute mask instructions, but the vector elements have
+  // to be monotonically increasing with one exception element, and the source
+  // slot of the element to move must be the same as the destination.
+  EVT VecVT = V1.getValueType();
+  EVT EltVT = VecVT.getVectorElementType();
+  unsigned EltsFromV2 = 0;
+  unsigned V2EltOffset = 0;
+  unsigned V2EltIdx0 = 0;
+  unsigned CurrElt = 0;
+  unsigned MaxElts = VecVT.getVectorNumElements();
+  unsigned PrevElt = 0;
+  bool monotonic = true;
+  bool rotate = true;
+  int rotamt=0;
+  EVT maskVT;             // which of the c?d instructions to use
+
+  if (EltVT == MVT::i8) {
+    V2EltIdx0 = 16;
+    maskVT = MVT::v16i8;
+  } else if (EltVT == MVT::i16) {
+    V2EltIdx0 = 8;
+    maskVT = MVT::v8i16;
+  } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
+    V2EltIdx0 = 4;
+    maskVT = MVT::v4i32;
+  } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
+    V2EltIdx0 = 2;
+    maskVT = MVT::v2i64;
+  } else
+    llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
+
+  for (unsigned i = 0; i != MaxElts; ++i) {
+    if (SVN->getMaskElt(i) < 0)
+      continue;
+
+    unsigned SrcElt = SVN->getMaskElt(i);
+
+    if (monotonic) {
+      if (SrcElt >= V2EltIdx0) {
+        // TODO: optimize for the monotonic case when several consecutive
+        // elements are taken form V2. Do we ever get such a case?
+        if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
+          V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
+        else
+          monotonic = false;
+        ++EltsFromV2;
+      } else if (CurrElt != SrcElt) {
+        monotonic = false;
+      }
+
+      ++CurrElt;
+    }
+
+    if (rotate) {
+      if (PrevElt > 0 && SrcElt < MaxElts) {
+        if ((PrevElt == SrcElt - 1)
+            || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
+          PrevElt = SrcElt;
+        } else {
+          rotate = false;
+        }
+      } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
+        // First time or after a "wrap around"
+        rotamt = SrcElt-i;
+        PrevElt = SrcElt;
+      } else {
+        // This isn't a rotation, takes elements from vector 2
+        rotate = false;
+      }
+    }
+  }
+
+  if (EltsFromV2 == 1 && monotonic) {
+    // Compute mask and shuffle
+    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+    // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
+    // R1 ($sp) is used here only as it is guaranteed to have last bits zero
+    SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                                DAG.getRegister(SPU::R1, PtrVT),
+                                DAG.getConstant(V2EltOffset, MVT::i32));
+    SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
+                                     maskVT, Pointer);
+
+    // Use shuffle mask in SHUFB synthetic instruction:
+    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
+                       ShufMaskOp);
+  } else if (rotate) {
+    if (rotamt < 0)
+      rotamt +=MaxElts;
+    rotamt *= EltVT.getSizeInBits()/8;
+    return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
+                       V1, DAG.getConstant(rotamt, MVT::i16));
+  } else {
+   // Convert the SHUFFLE_VECTOR mask's input element units to the
+   // actual bytes.
+    unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+    SmallVector<SDValue, 16> ResultMask;
+    for (unsigned i = 0, e = MaxElts; i != e; ++i) {
+      unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
+
+      for (unsigned j = 0; j < BytesPerElement; ++j)
+        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
+    }
+    SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+                                    &ResultMask[0], ResultMask.size());
+    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
+  }
+}
+
+static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
+  SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (Op0.getNode()->getOpcode() == ISD::Constant) {
+    // For a constant, build the appropriate constant vector, which will
+    // eventually simplify to a vector register load.
+
+    ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
+    SmallVector<SDValue, 16> ConstVecValues;
+    EVT VT;
+    size_t n_copies;
+
+    // Create a constant vector:
+    switch (Op.getValueType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unexpected constant value type in "
+                              "LowerSCALAR_TO_VECTOR");
+    case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
+    case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
+    case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
+    case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
+    case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
+    case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
+    }
+
+    SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
+    for (size_t j = 0; j < n_copies; ++j)
+      ConstVecValues.push_back(CValue);
+
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
+                       &ConstVecValues[0], ConstVecValues.size());
+  } else {
+    // Otherwise, copy the value from one register to another:
+    switch (Op0.getValueType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
+    case MVT::i8:
+    case MVT::i16:
+    case MVT::i32:
+    case MVT::i64:
+    case MVT::f32:
+    case MVT::f64:
+      return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
+    }
+  }
+
+  return SDValue();
+}
+
+static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  SDValue N = Op.getOperand(0);
+  SDValue Elt = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue retval;
+
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
+    // Constant argument:
+    int EltNo = (int) C->getZExtValue();
+
+    // sanity checks:
+    if (VT == MVT::i8 && EltNo >= 16)
+      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
+    else if (VT == MVT::i16 && EltNo >= 8)
+      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
+    else if (VT == MVT::i32 && EltNo >= 4)
+      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
+    else if (VT == MVT::i64 && EltNo >= 2)
+      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
+
+    if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
+      // i32 and i64: Element 0 is the preferred slot
+      return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
+    }
+
+    // Need to generate shuffle mask and extract:
+    int prefslot_begin = -1, prefslot_end = -1;
+    int elt_byte = EltNo * VT.getSizeInBits() / 8;
+
+    switch (VT.getSimpleVT().SimpleTy) {
+    default:
+      assert(false && "Invalid value type!");
+    case MVT::i8: {
+      prefslot_begin = prefslot_end = 3;
+      break;
+    }
+    case MVT::i16: {
+      prefslot_begin = 2; prefslot_end = 3;
+      break;
+    }
+    case MVT::i32:
+    case MVT::f32: {
+      prefslot_begin = 0; prefslot_end = 3;
+      break;
+    }
+    case MVT::i64:
+    case MVT::f64: {
+      prefslot_begin = 0; prefslot_end = 7;
+      break;
+    }
+    }
+
+    assert(prefslot_begin != -1 && prefslot_end != -1 &&
+           "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
+
+    unsigned int ShufBytes[16] = {
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+    };
+    for (int i = 0; i < 16; ++i) {
+      // zero fill uppper part of preferred slot, don't care about the
+      // other slots:
+      unsigned int mask_val;
+      if (i <= prefslot_end) {
+        mask_val =
+          ((i < prefslot_begin)
+           ? 0x80
+           : elt_byte + (i - prefslot_begin));
+
+        ShufBytes[i] = mask_val;
+      } else
+        ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
+    }
+
+    SDValue ShufMask[4];
+    for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
+      unsigned bidx = i * 4;
+      unsigned int bits = ((ShufBytes[bidx] << 24) |
+                           (ShufBytes[bidx+1] << 16) |
+                           (ShufBytes[bidx+2] << 8) |
+                           ShufBytes[bidx+3]);
+      ShufMask[i] = DAG.getConstant(bits, MVT::i32);
+    }
+
+    SDValue ShufMaskVec =
+      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                  &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
+
+    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+                         DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
+                                     N, N, ShufMaskVec));
+  } else {
+    // Variable index: Rotate the requested element into slot 0, then replicate
+    // slot 0 across the vector
+    EVT VecVT = N.getValueType();
+    if (!VecVT.isSimple() || !VecVT.isVector()) {
+      report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
+                        "vector type!");
+    }
+
+    // Make life easier by making sure the index is zero-extended to i32
+    if (Elt.getValueType() != MVT::i32)
+      Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
+
+    // Scale the index to a bit/byte shift quantity
+    APInt scaleFactor =
+            APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
+    unsigned scaleShift = scaleFactor.logBase2();
+    SDValue vecShift;
+
+    if (scaleShift > 0) {
+      // Scale the shift factor:
+      Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
+                        DAG.getConstant(scaleShift, MVT::i32));
+    }
+
+    vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
+
+    // Replicate the bytes starting at byte 0 across the entire vector (for
+    // consistency with the notion of a unified register set)
+    SDValue replicate;
+
+    switch (VT.getSimpleVT().SimpleTy) {
+    default:
+      report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
+                        "type");
+      /*NOTREACHED*/
+    case MVT::i8: {
+      SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
+      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                              factor, factor, factor, factor);
+      break;
+    }
+    case MVT::i16: {
+      SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
+      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                              factor, factor, factor, factor);
+      break;
+    }
+    case MVT::i32:
+    case MVT::f32: {
+      SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
+      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                              factor, factor, factor, factor);
+      break;
+    }
+    case MVT::i64:
+    case MVT::f64: {
+      SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
+      SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
+      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                              loFactor, hiFactor, loFactor, hiFactor);
+      break;
+    }
+    }
+
+    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
+                         DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+                                     vecShift, vecShift, replicate));
+  }
+
+  return retval;
+}
+
+static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
+  SDValue VecOp = Op.getOperand(0);
+  SDValue ValOp = Op.getOperand(1);
+  SDValue IdxOp = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  EVT eltVT = ValOp.getValueType();
+
+  // use 0 when the lane to insert to is 'undef'
+  int64_t Offset=0;
+  if (IdxOp.getOpcode() != ISD::UNDEF) {
+    ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
+    assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
+    Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
+  }
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  // Use $sp ($1) because it's always 16-byte aligned and it's available:
+  SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+                                DAG.getRegister(SPU::R1, PtrVT),
+                                DAG.getConstant(Offset, PtrVT));
+  // widen the mask when dealing with half vectors
+  EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
+                                128/ VT.getVectorElementType().getSizeInBits());
+  SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
+
+  SDValue result =
+    DAG.getNode(SPUISD::SHUFB, dl, VT,
+                DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
+                VecOp,
+                DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
+
+  return result;
+}
+
+static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
+                           const TargetLowering &TLI)
+{
+  SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
+  DebugLoc dl = Op.getDebugLoc();
+  EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
+
+  assert(Op.getValueType() == MVT::i8);
+  switch (Opc) {
+  default:
+    llvm_unreachable("Unhandled i8 math operator");
+    /*NOTREACHED*/
+    break;
+  case ISD::ADD: {
+    // 8-bit addition: Promote the arguments up to 16-bits and truncate
+    // the result:
+    SDValue N1 = Op.getOperand(1);
+    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+
+  }
+
+  case ISD::SUB: {
+    // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
+    // the result:
+    SDValue N1 = Op.getOperand(1);
+    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+  }
+  case ISD::ROTR:
+  case ISD::ROTL: {
+    SDValue N1 = Op.getOperand(1);
+    EVT N1VT = N1.getValueType();
+
+    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+    if (!N1VT.bitsEq(ShiftVT)) {
+      unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
+                       ? ISD::ZERO_EXTEND
+                       : ISD::TRUNCATE;
+      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+    }
+
+    // Replicate lower 8-bits into upper 8:
+    SDValue ExpandArg =
+      DAG.getNode(ISD::OR, dl, MVT::i16, N0,
+                  DAG.getNode(ISD::SHL, dl, MVT::i16,
+                              N0, DAG.getConstant(8, MVT::i32)));
+
+    // Truncate back down to i8
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
+  }
+  case ISD::SRL:
+  case ISD::SHL: {
+    SDValue N1 = Op.getOperand(1);
+    EVT N1VT = N1.getValueType();
+
+    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
+    if (!N1VT.bitsEq(ShiftVT)) {
+      unsigned N1Opc = ISD::ZERO_EXTEND;
+
+      if (N1.getValueType().bitsGT(ShiftVT))
+        N1Opc = ISD::TRUNCATE;
+
+      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+    }
+
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+  }
+  case ISD::SRA: {
+    SDValue N1 = Op.getOperand(1);
+    EVT N1VT = N1.getValueType();
+
+    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+    if (!N1VT.bitsEq(ShiftVT)) {
+      unsigned N1Opc = ISD::SIGN_EXTEND;
+
+      if (N1VT.bitsGT(ShiftVT))
+        N1Opc = ISD::TRUNCATE;
+      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
+    }
+
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+  }
+  case ISD::MUL: {
+    SDValue N1 = Op.getOperand(1);
+
+    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
+    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
+    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
+                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
+    break;
+  }
+  }
+
+  return SDValue();
+}
+
+//! Lower byte immediate operations for v16i8 vectors:
+static SDValue
+LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
+  SDValue ConstVec;
+  SDValue Arg;
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+
+  ConstVec = Op.getOperand(0);
+  Arg = Op.getOperand(1);
+  if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
+    if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
+      ConstVec = ConstVec.getOperand(0);
+    } else {
+      ConstVec = Op.getOperand(1);
+      Arg = Op.getOperand(0);
+      if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
+        ConstVec = ConstVec.getOperand(0);
+      }
+    }
+  }
+
+  if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
+    BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
+    assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
+
+    APInt APSplatBits, APSplatUndef;
+    unsigned SplatBitSize;
+    bool HasAnyUndefs;
+    unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
+
+    if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+                              HasAnyUndefs, minSplatBits)
+        && minSplatBits <= SplatBitSize) {
+      uint64_t SplatBits = APSplatBits.getZExtValue();
+      SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
+
+      SmallVector<SDValue, 16> tcVec;
+      tcVec.assign(16, tc);
+      return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
+                         DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
+    }
+  }
+
+  // These operations (AND, OR, XOR) are legal, they just couldn't be custom
+  // lowered.  Return the operation, rather than a null SDValue.
+  return Op;
+}
+
+//! Custom lowering for CTPOP (count population)
+/*!
+  Custom lowering code that counts the number ones in the input
+  operand. SPU has such an instruction, but it counts the number of
+  ones per byte, which then have to be accumulated.
+*/
+static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+  EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
+                               VT, (128 / VT.getSizeInBits()));
+  DebugLoc dl = Op.getDebugLoc();
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:
+    assert(false && "Invalid value type!");
+  case MVT::i8: {
+    SDValue N = Op.getOperand(0);
+    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
+
+    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
+  }
+
+  case MVT::i16: {
+    MachineFunction &MF = DAG.getMachineFunction();
+    MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
+
+    SDValue N = Op.getOperand(0);
+    SDValue Elt0 = DAG.getConstant(0, MVT::i16);
+    SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
+    SDValue Shift1 = DAG.getConstant(8, MVT::i32);
+
+    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+    // CNTB_result becomes the chain to which all of the virtual registers
+    // CNTB_reg, SUM1_reg become associated:
+    SDValue CNTB_result =
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
+
+    SDValue CNTB_rescopy =
+      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
+
+    SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
+
+    return DAG.getNode(ISD::AND, dl, MVT::i16,
+                       DAG.getNode(ISD::ADD, dl, MVT::i16,
+                                   DAG.getNode(ISD::SRL, dl, MVT::i16,
+                                               Tmp1, Shift1),
+                                   Tmp1),
+                       Mask0);
+  }
+
+  case MVT::i32: {
+    MachineFunction &MF = DAG.getMachineFunction();
+    MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+    unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+
+    SDValue N = Op.getOperand(0);
+    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
+    SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
+    SDValue Shift1 = DAG.getConstant(16, MVT::i32);
+    SDValue Shift2 = DAG.getConstant(8, MVT::i32);
+
+    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
+    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
+
+    // CNTB_result becomes the chain to which all of the virtual registers
+    // CNTB_reg, SUM1_reg become associated:
+    SDValue CNTB_result =
+      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
+
+    SDValue CNTB_rescopy =
+      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
+
+    SDValue Comp1 =
+      DAG.getNode(ISD::SRL, dl, MVT::i32,
+                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
+                  Shift1);
+
+    SDValue Sum1 =
+      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
+                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
+
+    SDValue Sum1_rescopy =
+      DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
+
+    SDValue Comp2 =
+      DAG.getNode(ISD::SRL, dl, MVT::i32,
+                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
+                  Shift2);
+    SDValue Sum2 =
+      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
+                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
+
+    return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
+  }
+
+  case MVT::i64:
+    break;
+  }
+
+  return SDValue();
+}
+
+//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
+/*!
+ f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
+ All conversions to i64 are expanded to a libcall.
+ */
+static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+                              const SPUTargetLowering &TLI) {
+  EVT OpVT = Op.getValueType();
+  SDValue Op0 = Op.getOperand(0);
+  EVT Op0VT = Op0.getValueType();
+
+  if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
+      || OpVT == MVT::i64) {
+    // Convert f32 / f64 to i32 / i64 via libcall.
+    RTLIB::Libcall LC =
+            (Op.getOpcode() == ISD::FP_TO_SINT)
+             ? RTLIB::getFPTOSINT(Op0VT, OpVT)
+             : RTLIB::getFPTOUINT(Op0VT, OpVT);
+    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
+    SDValue Dummy;
+    return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+  }
+
+  return Op;
+}
+
+//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
+/*!
+ i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
+ All conversions from i64 are expanded to a libcall.
+ */
+static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+                              const SPUTargetLowering &TLI) {
+  EVT OpVT = Op.getValueType();
+  SDValue Op0 = Op.getOperand(0);
+  EVT Op0VT = Op0.getValueType();
+
+  if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
+      || Op0VT == MVT::i64) {
+    // Convert i32, i64 to f64 via libcall:
+    RTLIB::Libcall LC =
+            (Op.getOpcode() == ISD::SINT_TO_FP)
+             ? RTLIB::getSINTTOFP(Op0VT, OpVT)
+             : RTLIB::getUINTTOFP(Op0VT, OpVT);
+    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
+    SDValue Dummy;
+    return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
+  }
+
+  return Op;
+}
+
+//! Lower ISD::SETCC
+/*!
+ This handles MVT::f64 (double floating point) condition lowering
+ */
+static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
+                          const TargetLowering &TLI) {
+  CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
+  DebugLoc dl = Op.getDebugLoc();
+  assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
+
+  SDValue lhs = Op.getOperand(0);
+  SDValue rhs = Op.getOperand(1);
+  EVT lhsVT = lhs.getValueType();
+  assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
+
+  EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
+  APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+  EVT IntVT(MVT::i64);
+
+  // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
+  // selected to a NOP:
+  SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
+  SDValue lhsHi32 =
+          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+                      DAG.getNode(ISD::SRL, dl, IntVT,
+                                  i64lhs, DAG.getConstant(32, MVT::i32)));
+  SDValue lhsHi32abs =
+          DAG.getNode(ISD::AND, dl, MVT::i32,
+                      lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
+  SDValue lhsLo32 =
+          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
+
+  // SETO and SETUO only use the lhs operand:
+  if (CC->get() == ISD::SETO) {
+    // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
+    // SETUO
+    APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
+    return DAG.getNode(ISD::XOR, dl, ccResultVT,
+                       DAG.getSetCC(dl, ccResultVT,
+                                    lhs, DAG.getConstantFP(0.0, lhsVT),
+                                    ISD::SETUO),
+                       DAG.getConstant(ccResultAllOnes, ccResultVT));
+  } else if (CC->get() == ISD::SETUO) {
+    // Evaluates to true if Op0 is [SQ]NaN
+    return DAG.getNode(ISD::AND, dl, ccResultVT,
+                       DAG.getSetCC(dl, ccResultVT,
+                                    lhsHi32abs,
+                                    DAG.getConstant(0x7ff00000, MVT::i32),
+                                    ISD::SETGE),
+                       DAG.getSetCC(dl, ccResultVT,
+                                    lhsLo32,
+                                    DAG.getConstant(0, MVT::i32),
+                                    ISD::SETGT));
+  }
+
+  SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
+  SDValue rhsHi32 =
+          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
+                      DAG.getNode(ISD::SRL, dl, IntVT,
+                                  i64rhs, DAG.getConstant(32, MVT::i32)));
+
+  // If a value is negative, subtract from the sign magnitude constant:
+  SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
+
+  // Convert the sign-magnitude representation into 2's complement:
+  SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
+                                      lhsHi32, DAG.getConstant(31, MVT::i32));
+  SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
+  SDValue lhsSelect =
+          DAG.getNode(ISD::SELECT, dl, IntVT,
+                      lhsSelectMask, lhsSignMag2TC, i64lhs);
+
+  SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
+                                      rhsHi32, DAG.getConstant(31, MVT::i32));
+  SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
+  SDValue rhsSelect =
+          DAG.getNode(ISD::SELECT, dl, IntVT,
+                      rhsSelectMask, rhsSignMag2TC, i64rhs);
+
+  unsigned compareOp;
+
+  switch (CC->get()) {
+  case ISD::SETOEQ:
+  case ISD::SETUEQ:
+    compareOp = ISD::SETEQ; break;
+  case ISD::SETOGT:
+  case ISD::SETUGT:
+    compareOp = ISD::SETGT; break;
+  case ISD::SETOGE:
+  case ISD::SETUGE:
+    compareOp = ISD::SETGE; break;
+  case ISD::SETOLT:
+  case ISD::SETULT:
+    compareOp = ISD::SETLT; break;
+  case ISD::SETOLE:
+  case ISD::SETULE:
+    compareOp = ISD::SETLE; break;
+  case ISD::SETUNE:
+  case ISD::SETONE:
+    compareOp = ISD::SETNE; break;
+  default:
+    report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
+  }
+
+  SDValue result =
+          DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
+                       (ISD::CondCode) compareOp);
+
+  if ((CC->get() & 0x8) == 0) {
+    // Ordered comparison:
+    SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
+                                  lhs, DAG.getConstantFP(0.0, MVT::f64),
+                                  ISD::SETO);
+    SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
+                                  rhs, DAG.getConstantFP(0.0, MVT::f64),
+                                  ISD::SETO);
+    SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
+
+    result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
+  }
+
+  return result;
+}
+
+//! Lower ISD::SELECT_CC
+/*!
+  ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
+  SELB instruction.
+
+  \note Need to revisit this in the future: if the code path through the true
+  and false value computations is longer than the latency of a branch (6
+  cycles), then it would be more advantageous to branch and insert a new basic
+  block and branch on the condition. However, this code does not make that
+  assumption, given the simplisitc uses so far.
+ */
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
+                              const TargetLowering &TLI) {
+  EVT VT = Op.getValueType();
+  SDValue lhs = Op.getOperand(0);
+  SDValue rhs = Op.getOperand(1);
+  SDValue trueval = Op.getOperand(2);
+  SDValue falseval = Op.getOperand(3);
+  SDValue condition = Op.getOperand(4);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // NOTE: SELB's arguments: $rA, $rB, $mask
+  //
+  // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
+  // where bits in $mask are 1. CCond will be inverted, having 1s where the
+  // condition was true and 0s where the condition was false. Hence, the
+  // arguments to SELB get reversed.
+
+  // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
+  // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
+  // with another "cannot select select_cc" assert:
+
+  SDValue compare = DAG.getNode(ISD::SETCC, dl,
+                                TLI.getSetCCResultType(Op.getValueType()),
+                                lhs, rhs, condition);
+  return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
+}
+
+//! Custom lower ISD::TRUNCATE
+static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
+{
+  // Type to truncate to
+  EVT VT = Op.getValueType();
+  MVT simpleVT = VT.getSimpleVT();
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
+                               VT, (128 / VT.getSizeInBits()));
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Type to truncate from
+  SDValue Op0 = Op.getOperand(0);
+  EVT Op0VT = Op0.getValueType();
+
+  if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
+    // Create shuffle mask, least significant doubleword of quadword
+    unsigned maskHigh = 0x08090a0b;
+    unsigned maskLow = 0x0c0d0e0f;
+    // Use a shuffle to perform the truncation
+    SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                   DAG.getConstant(maskHigh, MVT::i32),
+                                   DAG.getConstant(maskLow, MVT::i32),
+                                   DAG.getConstant(maskHigh, MVT::i32),
+                                   DAG.getConstant(maskLow, MVT::i32));
+
+    SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
+                                       Op0, Op0, shufMask);
+
+    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
+  }
+
+  return SDValue();             // Leave the truncate unmolested
+}
+
+/*!
+ * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
+ * algorithm is to duplicate the sign bit using rotmai to generate at
+ * least one byte full of sign bits. Then propagate the "sign-byte" into
+ * the leftmost words and the i64/i32 into the rightmost words using shufb.
+ *
+ * @param Op The sext operand
+ * @param DAG The current DAG
+ * @return The SDValue with the entire instruction sequence
+ */
+static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
+{
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Type to extend to
+  MVT OpVT = Op.getValueType().getSimpleVT();
+
+  // Type to extend from
+  SDValue Op0 = Op.getOperand(0);
+  MVT Op0VT = Op0.getValueType().getSimpleVT();
+
+  // extend i8 & i16 via i32
+  if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
+    Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
+    Op0VT = MVT::i32;
+  }
+
+  // The type to extend to needs to be a i128 and
+  // the type to extend from needs to be i64 or i32.
+  assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
+          "LowerSIGN_EXTEND: input and/or output operand have wrong size");
+
+  // Create shuffle mask
+  unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
+  unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
+  unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
+  SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
+                                 DAG.getConstant(mask1, MVT::i32),
+                                 DAG.getConstant(mask1, MVT::i32),
+                                 DAG.getConstant(mask2, MVT::i32),
+                                 DAG.getConstant(mask3, MVT::i32));
+
+  // Word wise arithmetic right shift to generate at least one byte
+  // that contains sign bits.
+  MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
+  SDValue sraVal = DAG.getNode(ISD::SRA,
+                 dl,
+                 mvt,
+                 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
+                 DAG.getConstant(31, MVT::i32));
+
+  // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
+  SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+                                        dl, Op0VT, Op0,
+                                        DAG.getTargetConstant(
+                                                  SPU::GPRCRegClass.getID(),
+                                                  MVT::i32)), 0);
+  // Shuffle bytes - Copy the sign bits into the upper 64 bits
+  // and the input value into the lower 64 bits.
+  SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
+        extended, sraVal, shufMask);
+  return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
+}
+
+//! Custom (target-specific) lowering entry point
+/*!
+  This is where LLVM's DAG selection process calls to do target-specific
+  lowering of nodes.
+ */
+SDValue
+SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+  unsigned Opc = (unsigned) Op.getOpcode();
+  EVT VT = Op.getValueType();
+
+  switch (Opc) {
+  default: {
+#ifndef NDEBUG
+    errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
+    errs() << "Op.getOpcode() = " << Opc << "\n";
+    errs() << "*Op.getNode():\n";
+    Op.getNode()->dump();
+#endif
+    llvm_unreachable(0);
+  }
+  case ISD::LOAD:
+  case ISD::EXTLOAD:
+  case ISD::SEXTLOAD:
+  case ISD::ZEXTLOAD:
+    return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
+  case ISD::STORE:
+    return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
+  case ISD::ConstantPool:
+    return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
+  case ISD::GlobalAddress:
+    return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
+  case ISD::JumpTable:
+    return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
+  case ISD::ConstantFP:
+    return LowerConstantFP(Op, DAG);
+
+  // i8, i64 math ops:
+  case ISD::ADD:
+  case ISD::SUB:
+  case ISD::ROTR:
+  case ISD::ROTL:
+  case ISD::SRL:
+  case ISD::SHL:
+  case ISD::SRA: {
+    if (VT == MVT::i8)
+      return LowerI8Math(Op, DAG, Opc, *this);
+    break;
+  }
+
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+    return LowerFP_TO_INT(Op, DAG, *this);
+
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    return LowerINT_TO_FP(Op, DAG, *this);
+
+  // Vector-related lowering.
+  case ISD::BUILD_VECTOR:
+    return LowerBUILD_VECTOR(Op, DAG);
+  case ISD::SCALAR_TO_VECTOR:
+    return LowerSCALAR_TO_VECTOR(Op, DAG);
+  case ISD::VECTOR_SHUFFLE:
+    return LowerVECTOR_SHUFFLE(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT:
+    return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+  case ISD::INSERT_VECTOR_ELT:
+    return LowerINSERT_VECTOR_ELT(Op, DAG);
+
+  // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+    return LowerByteImmed(Op, DAG);
+
+  // Vector and i8 multiply:
+  case ISD::MUL:
+    if (VT == MVT::i8)
+      return LowerI8Math(Op, DAG, Opc, *this);
+
+  case ISD::CTPOP:
+    return LowerCTPOP(Op, DAG);
+
+  case ISD::SELECT_CC:
+    return LowerSELECT_CC(Op, DAG, *this);
+
+  case ISD::SETCC:
+    return LowerSETCC(Op, DAG, *this);
+
+  case ISD::TRUNCATE:
+    return LowerTRUNCATE(Op, DAG);
+
+  case ISD::SIGN_EXTEND:
+    return LowerSIGN_EXTEND(Op, DAG);
+  }
+
+  return SDValue();
+}
+
+void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
+                                           SmallVectorImpl<SDValue>&Results,
+                                           SelectionDAG &DAG) const
+{
+#if 0
+  unsigned Opc = (unsigned) N->getOpcode();
+  EVT OpVT = N->getValueType(0);
+
+  switch (Opc) {
+  default: {
+    errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
+    errs() << "Op.getOpcode() = " << Opc << "\n";
+    errs() << "*Op.getNode():\n";
+    N->dump();
+    abort();
+    /*NOTREACHED*/
+  }
+  }
+#endif
+
+  /* Otherwise, return unchanged */
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue
+SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
+{
+#if 0
+  TargetMachine &TM = getTargetMachine();
+#endif
+  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue Op0 = N->getOperand(0);       // everything has at least one operand
+  EVT NodeVT = N->getValueType(0);      // The node's value type
+  EVT Op0VT = Op0.getValueType();       // The first operand's result
+  SDValue Result;                       // Initially, empty result
+  DebugLoc dl = N->getDebugLoc();
+
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::ADD: {
+    SDValue Op1 = N->getOperand(1);
+
+    if (Op0.getOpcode() == SPUISD::IndirectAddr
+        || Op1.getOpcode() == SPUISD::IndirectAddr) {
+      // Normalize the operands to reduce repeated code
+      SDValue IndirectArg = Op0, AddArg = Op1;
+
+      if (Op1.getOpcode() == SPUISD::IndirectAddr) {
+        IndirectArg = Op1;
+        AddArg = Op0;
+      }
+
+      if (isa<ConstantSDNode>(AddArg)) {
+        ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
+        SDValue IndOp1 = IndirectArg.getOperand(1);
+
+        if (CN0->isNullValue()) {
+          // (add (SPUindirect <arg>, <arg>), 0) ->
+          // (SPUindirect <arg>, <arg>)
+
+#if !defined(NDEBUG)
+          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+            errs() << "\n"
+                 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
+                 << "With:    (SPUindirect <arg>, <arg>)\n";
+          }
+#endif
+
+          return IndirectArg;
+        } else if (isa<ConstantSDNode>(IndOp1)) {
+          // (add (SPUindirect <arg>, <const>), <const>) ->
+          // (SPUindirect <arg>, <const + const>)
+          ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
+          int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
+          SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
+
+#if !defined(NDEBUG)
+          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+            errs() << "\n"
+                 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
+                 << "), " << CN0->getSExtValue() << ")\n"
+                 << "With:    (SPUindirect <arg>, "
+                 << combinedConst << ")\n";
+          }
+#endif
+
+          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
+                             IndirectArg, combinedValue);
+        }
+      }
+    }
+    break;
+  }
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND: {
+    if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
+      // (any_extend (SPUextract_elt0 <arg>)) ->
+      // (SPUextract_elt0 <arg>)
+      // Types must match, however...
+#if !defined(NDEBUG)
+      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+        errs() << "\nReplace: ";
+        N->dump(&DAG);
+        errs() << "\nWith:    ";
+        Op0.getNode()->dump(&DAG);
+        errs() << "\n";
+      }
+#endif
+
+      return Op0;
+    }
+    break;
+  }
+  case SPUISD::IndirectAddr: {
+    if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
+      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
+      if (CN != 0 && CN->isNullValue()) {
+        // (SPUindirect (SPUaform <addr>, 0), 0) ->
+        // (SPUaform <addr>, 0)
+
+        DEBUG(errs() << "Replace: ");
+        DEBUG(N->dump(&DAG));
+        DEBUG(errs() << "\nWith:    ");
+        DEBUG(Op0.getNode()->dump(&DAG));
+        DEBUG(errs() << "\n");
+
+        return Op0;
+      }
+    } else if (Op0.getOpcode() == ISD::ADD) {
+      SDValue Op1 = N->getOperand(1);
+      if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
+        // (SPUindirect (add <arg>, <arg>), 0) ->
+        // (SPUindirect <arg>, <arg>)
+        if (CN1->isNullValue()) {
+
+#if !defined(NDEBUG)
+          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+            errs() << "\n"
+                 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
+                 << "With:    (SPUindirect <arg>, <arg>)\n";
+          }
+#endif
+
+          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
+                             Op0.getOperand(0), Op0.getOperand(1));
+        }
+      }
+    }
+    break;
+  }
+  case SPUISD::SHL_BITS:
+  case SPUISD::SHL_BYTES:
+  case SPUISD::ROTBYTES_LEFT: {
+    SDValue Op1 = N->getOperand(1);
+
+    // Kill degenerate vector shifts:
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
+      if (CN->isNullValue()) {
+        Result = Op0;
+      }
+    }
+    break;
+  }
+  case SPUISD::PREFSLOT2VEC: {
+    switch (Op0.getOpcode()) {
+    default:
+      break;
+    case ISD::ANY_EXTEND:
+    case ISD::ZERO_EXTEND:
+    case ISD::SIGN_EXTEND: {
+      // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
+      // <arg>
+      // but only if the SPUprefslot2vec and <arg> types match.
+      SDValue Op00 = Op0.getOperand(0);
+      if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
+        SDValue Op000 = Op00.getOperand(0);
+        if (Op000.getValueType() == NodeVT) {
+          Result = Op000;
+        }
+      }
+      break;
+    }
+    case SPUISD::VEC2PREFSLOT: {
+      // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
+      // <arg>
+      Result = Op0.getOperand(0);
+      break;
+    }
+    }
+    break;
+  }
+  }
+
+  // Otherwise, return unchanged.
+#ifndef NDEBUG
+  if (Result.getNode()) {
+    DEBUG(errs() << "\nReplace.SPU: ");
+    DEBUG(N->dump(&DAG));
+    DEBUG(errs() << "\nWith:        ");
+    DEBUG(Result.getNode()->dump(&DAG));
+    DEBUG(errs() << "\n");
+  }
+#endif
+
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+SPUTargetLowering::ConstraintType
+SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
+  if (ConstraintLetter.size() == 1) {
+    switch (ConstraintLetter[0]) {
+    default: break;
+    case 'b':
+    case 'r':
+    case 'f':
+    case 'v':
+    case 'y':
+      return C_RegisterClass;
+    }
+  }
+  return TargetLowering::getConstraintType(ConstraintLetter);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+SPUTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+    //FIXME: Seems like the supported constraint letters were just copied
+    // from PPC, as the following doesn't correspond to the GCC docs.
+    // I'm leaving it so until someone adds the corresponding lowering support.
+  case 'b':
+  case 'r':
+  case 'f':
+  case 'd':
+  case 'v':
+  case 'y':
+    weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+                                                EVT VT) const
+{
+  if (Constraint.size() == 1) {
+    // GCC RS6000 Constraint Letters
+    switch (Constraint[0]) {
+    case 'b':   // R1-R31
+    case 'r':   // R0-R31
+      if (VT == MVT::i64)
+        return std::make_pair(0U, SPU::R64CRegisterClass);
+      return std::make_pair(0U, SPU::R32CRegisterClass);
+    case 'f':
+      if (VT == MVT::f32)
+        return std::make_pair(0U, SPU::R32FPRegisterClass);
+      else if (VT == MVT::f64)
+        return std::make_pair(0U, SPU::R64FPRegisterClass);
+      break;
+    case 'v':
+      return std::make_pair(0U, SPU::GPRCRegisterClass);
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//! Compute used/known bits for a SPU operand
+void
+SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+                                                  const APInt &Mask,
+                                                  APInt &KnownZero,
+                                                  APInt &KnownOne,
+                                                  const SelectionDAG &DAG,
+                                                  unsigned Depth ) const {
+#if 0
+  const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
+
+  switch (Op.getOpcode()) {
+  default:
+    // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+    break;
+  case CALL:
+  case SHUFB:
+  case SHUFFLE_MASK:
+  case CNTB:
+  case SPUISD::PREFSLOT2VEC:
+  case SPUISD::LDRESULT:
+  case SPUISD::VEC2PREFSLOT:
+  case SPUISD::SHLQUAD_L_BITS:
+  case SPUISD::SHLQUAD_L_BYTES:
+  case SPUISD::VEC_ROTL:
+  case SPUISD::VEC_ROTR:
+  case SPUISD::ROTBYTES_LEFT:
+  case SPUISD::SELECT_MASK:
+  case SPUISD::SELB:
+  }
+#endif
+}
+
+unsigned
+SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                   unsigned Depth) const {
+  switch (Op.getOpcode()) {
+  default:
+    return 1;
+
+  case ISD::SETCC: {
+    EVT VT = Op.getValueType();
+
+    if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
+      VT = MVT::i32;
+    }
+    return VT.getSizeInBits();
+  }
+  }
+}
+
+// LowerAsmOperandForConstraint
+void
+SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+                                                char ConstraintLetter,
+                                                std::vector<SDValue> &Ops,
+                                                SelectionDAG &DAG) const {
+  // Default, for the time being, to the base class handler
+  TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode.
+bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
+                                                const Type *Ty) const {
+  // SPU's addresses are 256K:
+  return (V > -(1 << 18) && V < (1 << 18) - 1);
+}
+
+bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+  return false;
+}
+
+bool
+SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The SPU target isn't yet aware of offsets.
+  return false;
+}
+
+// can we compare to Imm without writing it into a register?
+bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+  //ceqi, cgti, etc. all take s10 operand
+  return isInt<10>(Imm);
+}
+
+bool
+SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                         const Type * ) const{
+
+  // A-form: 18bit absolute address.
+  if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
+    return true;
+
+  // D-form: reg + 14bit offset
+  if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
+    return true;
+
+  // X-form: reg+reg
+  if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
+    return true;
+
+  return false;
+}
diff --git a/final/lib/Target/CellSPU/SPUISelLowering.h b/final/lib/Target/CellSPU/SPUISelLowering.h
new file mode 100644
index 00000000000..cf883e25ed7
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUISelLowering.h
@@ -0,0 +1,189 @@
+//===-- SPUISelLowering.h - Cell SPU DAG Lowering Interface -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Cell SPU uses to lower LLVM code into
+// a selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_ISELLOWERING_H
+#define SPU_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "SPU.h"
+
+namespace llvm {
+  namespace SPUISD {
+    enum NodeType {
+      // Start the numbering where the builting ops and target ops leave off.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      // Pseudo instructions:
+      RET_FLAG,                 ///< Return with flag, matched by bi instruction
+
+      Hi,                       ///< High address component (upper 16)
+      Lo,                       ///< Low address component (lower 16)
+      PCRelAddr,                ///< Program counter relative address
+      AFormAddr,                ///< A-form address (local store)
+      IndirectAddr,             ///< D-Form "imm($r)" and X-form "$r($r)"
+
+      LDRESULT,                 ///< Load result (value, chain)
+      CALL,                     ///< CALL instruction
+      SHUFB,                    ///< Vector shuffle (permute)
+      SHUFFLE_MASK,             ///< Shuffle mask
+      CNTB,                     ///< Count leading ones in bytes
+      PREFSLOT2VEC,             ///< Promote scalar->vector
+      VEC2PREFSLOT,             ///< Extract element 0
+      SHL_BITS,                 ///< Shift quad left, by bits
+      SHL_BYTES,                ///< Shift quad left, by bytes
+      SRL_BYTES,                ///< Shift quad right, by bytes. Insert zeros.
+      VEC_ROTL,                 ///< Vector rotate left
+      VEC_ROTR,                 ///< Vector rotate right
+      ROTBYTES_LEFT,            ///< Rotate bytes (loads -> ROTQBYI)
+      ROTBYTES_LEFT_BITS,       ///< Rotate bytes left by bit shift count
+      SELECT_MASK,              ///< Select Mask (FSM, FSMB, FSMH, FSMBI)
+      SELB,                     ///< Select bits -> (b & mask) | (a & ~mask)
+      // Markers: These aren't used to generate target-dependent nodes, but
+      // are used during instruction selection.
+      ADD64_MARKER,             ///< i64 addition marker
+      SUB64_MARKER,             ///< i64 subtraction marker
+      MUL64_MARKER,             ///< i64 multiply marker
+      LAST_SPUISD               ///< Last user-defined instruction
+    };
+  }
+
+  //! Utility functions specific to CellSPU:
+  namespace SPU {
+    SDValue get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
+                             EVT ValueType);
+    SDValue get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
+                             EVT ValueType);
+    SDValue get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
+                             EVT ValueType);
+    SDValue get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
+                            EVT ValueType);
+    SDValue get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
+                              EVT ValueType);
+    SDValue get_v4i32_imm(SDNode *N, SelectionDAG &DAG);
+    SDValue get_v2i64_imm(SDNode *N, SelectionDAG &DAG);
+
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG,
+                              const SPUTargetMachine &TM);
+    //! Simplify a EVT::v2i64 constant splat to CellSPU-ready form
+    SDValue LowerV2I64Splat(EVT OpVT, SelectionDAG &DAG, uint64_t splat,
+                             DebugLoc dl);
+  }
+
+  class SPUTargetMachine;            // forward dec'l.
+
+  class SPUTargetLowering :
+    public TargetLowering
+  {
+    int VarArgsFrameIndex;            // FrameIndex for start of varargs area.
+    SPUTargetMachine &SPUTM;
+
+  public:
+    //! The venerable constructor
+    /*!
+     This is where the CellSPU backend sets operation handling (i.e., legal,
+     custom, expand or promote.)
+     */
+    SPUTargetLowering(SPUTargetMachine &TM);
+
+    //! Get the target machine
+    SPUTargetMachine &getSPUTargetMachine() {
+      return SPUTM;
+    }
+
+    /// getTargetNodeName() - This method returns the name of a target specific
+    /// DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    /// getSetCCResultType - Return the ValueType for ISD::SETCC
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+    //! Custom lowering hooks
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    //! Custom lowering hook for nodes with illegal result types.
+    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+                                    SelectionDAG &DAG) const;
+
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+                                                const APInt &Mask,
+                                                APInt &KnownZero,
+                                                APInt &KnownOne,
+                                                const SelectionDAG &DAG,
+                                                unsigned Depth = 0) const;
+
+    virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                   unsigned Depth = 0) const;
+
+    ConstraintType getConstraintType(const std::string &ConstraintLetter) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+      getRegForInlineAsmConstraint(const std::string &Constraint,
+                                   EVT VT) const;
+
+    void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter,
+                                      std::vector<SDValue> &Ops,
+                                      SelectionDAG &DAG) const;
+
+    /// isLegalAddressImmediate - Return true if the integer value can be used
+    /// as the offset of the target addressing mode.
+    virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const;
+    virtual bool isLegalAddressImmediate(GlobalValue *) const;
+
+    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
+    virtual bool isLegalAddressingMode(const AddrMode &AM,
+                                       const Type *Ty) const;
+  };
+}
+
+#endif
diff --git a/final/lib/Target/CellSPU/SPUInstrBuilder.h b/final/lib/Target/CellSPU/SPUInstrBuilder.h
new file mode 100644
index 00000000000..5e268f8767c
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUInstrBuilder.h
@@ -0,0 +1,43 @@
+//==-- SPUInstrBuilder.h - Aides for building Cell SPU insts -----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_INSTRBUILDER_H
+#define SPU_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function.  This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+                  bool mem = true) {
+  if (mem)
+    return MIB.addImm(Offset).addFrameIndex(FI);
+  else
+    return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/CellSPU/SPUInstrFormats.td b/final/lib/Target/CellSPU/SPUInstrFormats.td
new file mode 100644
index 00000000000..bdbe2552dcd
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUInstrFormats.td
@@ -0,0 +1,320 @@
+//==== SPUInstrFormats.td - Cell SPU Instruction Formats ---*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// Cell SPU instruction formats. Note that these are notationally similar to
+// PowerPC, like "A-Form". But the sizes of operands and fields differ.
+
+// This was kiped from the PPC instruction formats (seemed like a good idea...)
+
+class SPUInstr<dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+        : Instruction {
+  field bits<32> Inst;
+
+  let Namespace = "SPU";
+  let OutOperandList = OOL;
+  let InOperandList = IOL;
+  let AsmString = asmstr;
+  let Itinerary = itin;
+}
+
+// RR Format
+class RRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, 
+              InstrItinClass itin, list<dag> pattern>
+         : SPUInstr<OOL, IOL, asmstr, itin> {
+  bits<7> RA;
+  bits<7> RB;
+  bits<7> RT;
+
+  let Pattern = pattern;
+
+  let Inst{0-10} = opcode;
+  let Inst{11-17} = RB;
+  let Inst{18-24} = RA;
+  let Inst{25-31} = RT;
+}
+
+let RB = 0 in {
+  // RR Format, where RB is zeroed (dont care):
+  class RRForm_1<bits<11> opcode, dag OOL, dag IOL, string asmstr, 
+                 InstrItinClass itin, list<dag> pattern>
+           : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+  { }
+
+  let RA = 0 in {
+    // RR Format, where RA and RB are zeroed (dont care):
+    // Used for reads from status control registers (see FPSCRRr32)
+    class RRForm_2<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+                   InstrItinClass itin, list<dag> pattern>
+             : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+    { }
+  }
+}
+
+let RT = 0 in {
+  // RR Format, where RT is zeroed (don't care), or as the instruction handbook
+  // says, "RT is a false target." Used in "Halt if" instructions
+  class RRForm_3<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+                 InstrItinClass itin, list<dag> pattern>
+      : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+  { }
+}
+
+// RRR Format
+class RRRForm<bits<4> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+        : SPUInstr<OOL, IOL, asmstr, itin>
+{
+  bits<7> RA;
+  bits<7> RB;
+  bits<7> RC;
+  bits<7> RT;
+
+  let Pattern = pattern;
+
+  let Inst{0-3} = opcode;
+  let Inst{4-10} = RT;
+  let Inst{11-17} = RB;
+  let Inst{18-24} = RA;
+  let Inst{25-31} = RC;
+}
+
+// RI7 Format
+class RI7Form<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+        : SPUInstr<OOL, IOL, asmstr, itin>
+{
+  bits<7> i7;
+  bits<7> RA;
+  bits<7> RT;
+
+  let Pattern = pattern;
+
+  let Inst{0-10} = opcode;
+  let Inst{11-17} = i7;
+  let Inst{18-24} = RA;
+  let Inst{25-31} = RT;
+}
+
+// CVTIntFp Format
+class CVTIntFPForm<bits<10> opcode, dag OOL, dag IOL, string asmstr,
+                   InstrItinClass itin, list<dag> pattern>
+        : SPUInstr<OOL, IOL, asmstr, itin>
+{
+  bits<7> RA;
+  bits<7> RT;
+
+  let Pattern = pattern;
+
+  let Inst{0-9} = opcode;
+  let Inst{10-17} = 0;
+  let Inst{18-24} = RA;
+  let Inst{25-31} = RT;
+}
+
+let RA = 0 in {
+  class BICondForm<bits<11> opcode, dag OOL, dag IOL, string asmstr, list<dag> pattern>
+           : RRForm<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+  { }
+
+  let RT = 0 in {
+    // Branch instruction format (without D/E flag settings)
+    class BRForm<bits<11> opcode, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+          : RRForm<opcode, OOL, IOL, asmstr, itin, pattern>
+    { }
+
+    class BIForm<bits<11> opcode, string asmstr, list<dag> pattern>
+             : RRForm<opcode, (outs), (ins R32C:$func), asmstr, BranchResolv,
+                      pattern>
+    { }
+
+    let RB = 0 in {
+      // Return instruction (bi, branch indirect), RA is zero (LR):
+      class RETForm<string asmstr, list<dag> pattern>
+             : BRForm<0b00010101100, (outs), (ins), asmstr, BranchResolv,
+                      pattern>
+      { }
+    }
+  }
+}
+
+// Branch indirect external data forms:
+class BISLEDForm<bits<2> DE_flag, string asmstr, list<dag> pattern>
+         : SPUInstr<(outs), (ins indcalltarget:$func), asmstr, BranchResolv>
+{
+  bits<7> Rcalldest;
+
+  let Pattern = pattern;
+
+  let Inst{0-10} = 0b11010101100;
+  let Inst{11} = 0;
+  let Inst{12-13} = DE_flag;
+  let Inst{14-17} = 0b0000;
+  let Inst{18-24} = Rcalldest;
+  let Inst{25-31} = 0b0000000;
+}
+
+// RI10 Format
+class RI10Form<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+        : SPUInstr<OOL, IOL, asmstr, itin>
+{
+  bits<10> i10;
+  bits<7> RA;
+  bits<7> RT;
+
+  let Pattern = pattern;
+
+  let Inst{0-7} = opcode;
+  let Inst{8-17} = i10;
+  let Inst{18-24} = RA;
+  let Inst{25-31} = RT;
+}
+
+// RI10 Format, where the constant is zero (or effectively ignored by the
+// SPU)
+let i10 = 0 in {
+  class RI10Form_1<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+                   InstrItinClass itin, list<dag> pattern>
+          : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
+  { }
+}
+
+// RI10 Format, where RT is ignored.
+// This format is used primarily by the Halt If ... Immediate set of
+// instructions
+let RT = 0 in {
+  class RI10Form_2<bits<8> opcode, dag OOL, dag IOL, string asmstr,
+                   InstrItinClass itin, list<dag> pattern>
+        : RI10Form<opcode, OOL, IOL, asmstr, itin, pattern>
+  { }
+}
+
+// RI16 Format
+class RI16Form<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+        : SPUInstr<OOL, IOL, asmstr, itin>
+{
+  bits<16> i16;
+  bits<7> RT;
+
+  let Pattern = pattern;
+
+  let Inst{0-8} = opcode;
+  let Inst{9-24} = i16;
+  let Inst{25-31} = RT;
+}
+
+// Specialized version of the RI16 Format for unconditional branch relative and
+// branch absolute, branch and set link. Note that for branch and set link, the
+// link register doesn't have to be $lr, but this is actually hard coded into
+// the instruction pattern.
+
+let RT = 0 in {
+  class UncondBranch<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+                     list<dag> pattern>
+    : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+  { }
+
+  class BranchSetLink<bits<9> opcode, dag OOL, dag IOL, string asmstr,
+                      list<dag> pattern>
+        : RI16Form<opcode, OOL, IOL, asmstr, BranchResolv, pattern>
+  { }
+}
+
+//===----------------------------------------------------------------------===//
+// Specialized versions of RI16:
+//===----------------------------------------------------------------------===//
+
+// RI18 Format
+class RI18Form<bits<7> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+        : SPUInstr<OOL, IOL, asmstr, itin>
+{
+  bits<18> i18;
+  bits<7> RT;
+
+  let Pattern = pattern;
+
+  let Inst{0-6} = opcode;
+  let Inst{7-24} = i18;
+  let Inst{25-31} = RT;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction formats for intrinsics:
+//===----------------------------------------------------------------------===//
+
+// RI10 Format for v8i16 intrinsics
+class RI10_Int_v8i16<bits<8> opcode, string opc, InstrItinClass itin,
+                     Intrinsic IntID> :
+  RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
+           !strconcat(opc, " $rT, $rA, $val"), itin,
+           [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
+                                            i16ImmSExt10:$val))] >;
+
+class RI10_Int_v4i32<bits<8> opcode, string opc, InstrItinClass itin,
+                     Intrinsic IntID> :
+  RI10Form<opcode, (outs VECREG:$rT), (ins s10imm:$val, VECREG:$rA),
+           !strconcat(opc, " $rT, $rA, $val"), itin,
+           [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
+                                            i32ImmSExt10:$val))] >;
+
+// RR Format for v8i16 intrinsics
+class RR_Int_v8i16<bits<11> opcode, string opc, InstrItinClass itin,
+                   Intrinsic IntID> :
+  RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+         !strconcat(opc, " $rT, $rA, $rB"), itin,
+         [(set (v8i16 VECREG:$rT), (IntID (v8i16 VECREG:$rA),
+                                          (v8i16 VECREG:$rB)))] >;
+
+// RR Format for v4i32 intrinsics
+class RR_Int_v4i32<bits<11> opcode, string opc, InstrItinClass itin,
+                   Intrinsic IntID> :
+  RRForm<opcode, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+         !strconcat(opc, " $rT, $rA, $rB"), itin,
+         [(set (v4i32 VECREG:$rT), (IntID (v4i32 VECREG:$rA),
+                                          (v4i32 VECREG:$rB)))] >;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions, like call frames:
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+    : SPUInstr<OOL, IOL, asmstr, NoItinerary> {
+  let OutOperandList = OOL;
+  let InOperandList = IOL;
+  let AsmString   = asmstr;
+  let Pattern = pattern;
+  let Inst{31-0} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch hint formats
+//===----------------------------------------------------------------------===//
+// For hbrr and hbra
+class HBI16Form<bits<7> opcode, dag IOL, string asmstr>
+        : Instruction {
+  field bits<32> Inst;
+  bits<16>i16;
+  bits<9>RO;
+
+  let Namespace = "SPU";
+  let InOperandList = IOL;
+  let OutOperandList = (outs); //no output
+  let AsmString = asmstr;
+  let Itinerary = BranchHints;
+
+  let Inst{0-6} = opcode;
+  let Inst{7-8} = RO{8-7};
+  let Inst{9-24} = i16;
+  let Inst{25-31} = RO{6-0};
+}
diff --git a/final/lib/Target/CellSPU/SPUInstrInfo.cpp b/final/lib/Target/CellSPU/SPUInstrInfo.cpp
new file mode 100644
index 00000000000..080434d6678
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -0,0 +1,449 @@
+//===- SPUInstrInfo.cpp - Cell SPU Instruction Information ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell SPU implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPURegisterNames.h"
+#include "SPUInstrInfo.h"
+#include "SPUInstrBuilder.h"
+#include "SPUTargetMachine.h"
+#include "SPUGenInstrInfo.inc"
+#include "SPUHazardRecognizers.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCContext.h"
+
+using namespace llvm;
+
+namespace {
+  //! Predicate for an unconditional branch instruction
+  inline bool isUncondBranch(const MachineInstr *I) {
+    unsigned opc = I->getOpcode();
+
+    return (opc == SPU::BR
+            || opc == SPU::BRA
+            || opc == SPU::BI);
+  }
+
+  //! Predicate for a conditional branch instruction
+  inline bool isCondBranch(const MachineInstr *I) {
+    unsigned opc = I->getOpcode();
+
+    return (opc == SPU::BRNZr32
+            || opc == SPU::BRNZv4i32
+            || opc == SPU::BRZr32
+            || opc == SPU::BRZv4i32
+            || opc == SPU::BRHNZr16
+            || opc == SPU::BRHNZv8i16
+            || opc == SPU::BRHZr16
+            || opc == SPU::BRHZv8i16);
+  }
+}
+
+SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
+  : TargetInstrInfoImpl(SPUInsts, sizeof(SPUInsts)/sizeof(SPUInsts[0])),
+    TM(tm),
+    RI(*TM.getSubtargetImpl(), *this)
+{ /* NOP */ }
+
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *SPUInstrInfo::CreateTargetHazardRecognizer(
+  const TargetMachine *TM,
+  const ScheduleDAG *DAG) const {
+  const TargetInstrInfo *TII = TM->getInstrInfo();
+  assert(TII && "No InstrInfo?");
+  return new SPUHazardRecognizer(*TII);
+}
+
+unsigned
+SPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                  int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case SPU::LQDv16i8:
+  case SPU::LQDv8i16:
+  case SPU::LQDv4i32:
+  case SPU::LQDv4f32:
+  case SPU::LQDv2f64:
+  case SPU::LQDr128:
+  case SPU::LQDr64:
+  case SPU::LQDr32:
+  case SPU::LQDr16: {
+    const MachineOperand MOp1 = MI->getOperand(1);
+    const MachineOperand MOp2 = MI->getOperand(2);
+    if (MOp1.isImm() && MOp2.isFI()) {
+      FrameIndex = MOp2.getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  }
+  return 0;
+}
+
+unsigned
+SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                 int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case SPU::STQDv16i8:
+  case SPU::STQDv8i16:
+  case SPU::STQDv4i32:
+  case SPU::STQDv4f32:
+  case SPU::STQDv2f64:
+  case SPU::STQDr128:
+  case SPU::STQDr64:
+  case SPU::STQDr32:
+  case SPU::STQDr16:
+  case SPU::STQDr8: {
+    const MachineOperand MOp1 = MI->getOperand(1);
+    const MachineOperand MOp2 = MI->getOperand(2);
+    if (MOp1.isImm() && MOp2.isFI()) {
+      FrameIndex = MOp2.getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  }
+  return 0;
+}
+
+void SPUInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator I, DebugLoc DL,
+                               unsigned DestReg, unsigned SrcReg,
+                               bool KillSrc) const
+{
+  // We support cross register class moves for our aliases, such as R3 in any
+  // reg class to any other reg class containing R3.  This is required because
+  // we instruction select bitconvert i64 -> f64 as a noop for example, so our
+  // types have no specific meaning.
+
+  BuildMI(MBB, I, DL, get(SPU::LRr128), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+void
+SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator MI,
+                                  unsigned SrcReg, bool isKill, int FrameIdx,
+                                  const TargetRegisterClass *RC,
+                                  const TargetRegisterInfo *TRI) const
+{
+  unsigned opc;
+  bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
+  if (RC == SPU::GPRCRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::STQDr128 : SPU::STQXr128);
+  } else if (RC == SPU::R64CRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64);
+  } else if (RC == SPU::R64FPRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::STQDr64 : SPU::STQXr64);
+  } else if (RC == SPU::R32CRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32);
+  } else if (RC == SPU::R32FPRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::STQDr32 : SPU::STQXr32);
+  } else if (RC == SPU::R16CRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::STQDr16 : SPU::STQXr16);
+  } else if (RC == SPU::R8CRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::STQDr8 : SPU::STQXr8);
+  } else if (RC == SPU::VECREGRegisterClass) {
+    opc = (isValidFrameIdx) ? SPU::STQDv16i8 : SPU::STQXv16i8;
+  } else {
+    llvm_unreachable("Unknown regclass!");
+  }
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  addFrameReference(BuildMI(MBB, MI, DL, get(opc))
+                    .addReg(SrcReg, getKillRegState(isKill)), FrameIdx);
+}
+
+void
+SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   unsigned DestReg, int FrameIdx,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const
+{
+  unsigned opc;
+  bool isValidFrameIdx = (FrameIdx < SPUFrameLowering::maxFrameOffset());
+  if (RC == SPU::GPRCRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::LQDr128 : SPU::LQXr128);
+  } else if (RC == SPU::R64CRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64);
+  } else if (RC == SPU::R64FPRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::LQDr64 : SPU::LQXr64);
+  } else if (RC == SPU::R32CRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32);
+  } else if (RC == SPU::R32FPRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::LQDr32 : SPU::LQXr32);
+  } else if (RC == SPU::R16CRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::LQDr16 : SPU::LQXr16);
+  } else if (RC == SPU::R8CRegisterClass) {
+    opc = (isValidFrameIdx ? SPU::LQDr8 : SPU::LQXr8);
+  } else if (RC == SPU::VECREGRegisterClass) {
+    opc = (isValidFrameIdx) ? SPU::LQDv16i8 : SPU::LQXv16i8;
+  } else {
+    llvm_unreachable("Unknown regclass in loadRegFromStackSlot!");
+  }
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx);
+}
+
+//! Branch analysis
+/*!
+  \note This code was kiped from PPC. There may be more branch analysis for
+  CellSPU than what's currently done here.
+ */
+bool
+SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                            MachineBasicBlock *&FBB,
+                            SmallVectorImpl<MachineOperand> &Cond,
+                            bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (isUncondBranch(LastInst)) {
+      // Check for jump tables
+      if (!LastInst->getOperand(0).isMBB())
+        return true;
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    } else if (isCondBranch(LastInst)) {
+      // Block ends with fall-through condbranch.
+      TBB = LastInst->getOperand(1).getMBB();
+      DEBUG(errs() << "Pushing LastInst:               ");
+      DEBUG(LastInst->dump());
+      Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+      Cond.push_back(LastInst->getOperand(0));
+      return false;
+    }
+    // Otherwise, don't know what this is.
+    return true;
+  }
+
+  // Get the instruction before it if it's a terminator.
+  MachineInstr *SecondLastInst = I;
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() &&
+      isUnpredicatedTerminator(--I))
+    return true;
+
+  // If the block ends with a conditional and unconditional branch, handle it.
+  if (isCondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
+    TBB =  SecondLastInst->getOperand(1).getMBB();
+    DEBUG(errs() << "Pushing SecondLastInst:         ");
+    DEBUG(SecondLastInst->dump());
+    Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+    Cond.push_back(SecondLastInst->getOperand(0));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with two unconditional branches, handle it.  The second
+  // one is not executed, so remove it.
+  if (isUncondBranch(SecondLastInst) && isUncondBranch(LastInst)) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+// search MBB for branch hint labels and branch hit ops
+static void removeHBR( MachineBasicBlock &MBB) {
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I){
+    if (I->getOpcode() == SPU::HBRA ||
+        I->getOpcode() == SPU::HBR_LABEL){
+      I=MBB.erase(I);
+    }
+  }
+}
+
+unsigned
+SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  removeHBR(MBB);
+  if (I == MBB.begin())
+    return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+  if (!isCondBranch(I) && !isUncondBranch(I))
+    return 0;
+
+  // Remove the first branch.
+  DEBUG(errs() << "Removing branch:                ");
+  DEBUG(I->dump());
+  I->eraseFromParent();
+  I = MBB.end();
+  if (I == MBB.begin())
+    return 1;
+
+  --I;
+  if (!(isCondBranch(I) || isUncondBranch(I)))
+    return 1;
+
+  // Remove the second branch.
+  DEBUG(errs() << "Removing second branch:         ");
+  DEBUG(I->dump());
+  I->eraseFromParent();
+  return 2;
+}
+
+/** Find the optimal position for a hint branch instruction in a basic block.
+ * This should take into account:
+ *   -the branch hint delays
+ *   -congestion of the memory bus
+ *   -dual-issue scheduling (i.e. avoid insertion of nops)
+ * Current implementation is rather simplistic.
+ */
+static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB)
+{
+   MachineBasicBlock::iterator J = MBB.end();
+	for( int i=0; i<8; i++) {
+		if( J == MBB.begin() ) return J;
+		J--;
+	}
+	return J;
+}
+
+unsigned
+SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                           MachineBasicBlock *FBB,
+                           const SmallVectorImpl<MachineOperand> &Cond,
+                           DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 2 || Cond.size() == 0) &&
+         "SPU branch conditions have two components!");
+
+  MachineInstrBuilder MIB;
+  //TODO: make a more accurate algorithm.
+  bool haveHBR = MBB.size()>8;
+  
+  removeHBR(MBB);
+  MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol();
+  // Add a label just before the branch
+  if (haveHBR)
+    MIB = BuildMI(&MBB, DL, get(SPU::HBR_LABEL)).addSym(branchLabel);
+
+  // One-way branch.
+  if (FBB == 0) {
+    if (Cond.empty()) {
+      // Unconditional branch
+      MIB = BuildMI(&MBB, DL, get(SPU::BR));
+      MIB.addMBB(TBB);
+
+      DEBUG(errs() << "Inserted one-way uncond branch: ");
+      DEBUG((*MIB).dump());
+
+      // basic blocks have just one branch so it is safe to add the hint a its
+      if (haveHBR) {
+        MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+        MIB.addSym(branchLabel);
+        MIB.addMBB(TBB);
+      }	
+    } else {
+      // Conditional branch
+      MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+      MIB.addReg(Cond[1].getReg()).addMBB(TBB);
+
+      if (haveHBR) {
+        MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+        MIB.addSym(branchLabel);
+        MIB.addMBB(TBB);
+      }	
+
+      DEBUG(errs() << "Inserted one-way cond branch:   ");
+      DEBUG((*MIB).dump());
+    }
+    return 1;
+  } else {
+    MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+    MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR));
+
+    // Two-way Conditional Branch.
+    MIB.addReg(Cond[1].getReg()).addMBB(TBB);
+    MIB2.addMBB(FBB);
+
+    if (haveHBR) {
+      MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA));
+      MIB.addSym(branchLabel);
+      MIB.addMBB(FBB);
+    }	
+
+    DEBUG(errs() << "Inserted conditional branch:    ");
+    DEBUG((*MIB).dump());
+    DEBUG(errs() << "part 2: ");
+    DEBUG((*MIB2).dump());
+   return 2;
+  }
+}
+
+//! Reverses a branch's condition, returning false on success.
+bool
+SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+  const {
+  // Pretty brainless way of inverting the condition, but it works, considering
+  // there are only two conditions...
+  static struct {
+    unsigned Opc;               //! The incoming opcode
+    unsigned RevCondOpc;        //! The reversed condition opcode
+  } revconds[] = {
+    { SPU::BRNZr32, SPU::BRZr32 },
+    { SPU::BRNZv4i32, SPU::BRZv4i32 },
+    { SPU::BRZr32, SPU::BRNZr32 },
+    { SPU::BRZv4i32, SPU::BRNZv4i32 },
+    { SPU::BRHNZr16, SPU::BRHZr16 },
+    { SPU::BRHNZv8i16, SPU::BRHZv8i16 },
+    { SPU::BRHZr16, SPU::BRHNZr16 },
+    { SPU::BRHZv8i16, SPU::BRHNZv8i16 }
+  };
+
+  unsigned Opc = unsigned(Cond[0].getImm());
+  // Pretty dull mapping between the two conditions that SPU can generate:
+  for (int i = sizeof(revconds)/sizeof(revconds[0]) - 1; i >= 0; --i) {
+    if (revconds[i].Opc == Opc) {
+      Cond[0].setImm(revconds[i].RevCondOpc);
+      return false;
+    }
+  }
+
+  return true;
+}
diff --git a/final/lib/Target/CellSPU/SPUInstrInfo.h b/final/lib/Target/CellSPU/SPUInstrInfo.h
new file mode 100644
index 00000000000..e5e91481419
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUInstrInfo.h
@@ -0,0 +1,81 @@
+//===- SPUInstrInfo.h - Cell SPU Instruction Information --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the CellSPU implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_INSTRUCTIONINFO_H
+#define SPU_INSTRUCTIONINFO_H
+
+#include "SPU.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "SPURegisterInfo.h"
+
+namespace llvm {
+  //! Cell SPU instruction information class
+  class SPUInstrInfo : public TargetInstrInfoImpl {
+    SPUTargetMachine &TM;
+    const SPURegisterInfo RI;
+  public:
+    explicit SPUInstrInfo(SPUTargetMachine &tm);
+
+    /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+    /// such, whenever a client has an instance of instruction info, it should
+    /// always be able to get register info as well (through this method).
+    ///
+    virtual const SPURegisterInfo &getRegisterInfo() const { return RI; }
+
+    ScheduleHazardRecognizer *
+    CreateTargetHazardRecognizer(const TargetMachine *TM,
+                                 const ScheduleDAG *DAG) const;
+
+    unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                 int &FrameIndex) const;
+    unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                int &FrameIndex) const;
+
+    virtual void copyPhysReg(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator I, DebugLoc DL,
+                             unsigned DestReg, unsigned SrcReg,
+                             bool KillSrc) const;
+
+    //! Store a register to a stack slot, based on its register class.
+    virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MBBI,
+                                     unsigned SrcReg, bool isKill, int FrameIndex,
+                                     const TargetRegisterClass *RC,
+                                     const TargetRegisterInfo *TRI) const;
+
+    //! Load a register from a stack slot, based on its register class.
+    virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      unsigned DestReg, int FrameIndex,
+                                      const TargetRegisterClass *RC,
+                                      const TargetRegisterInfo *TRI) const;
+
+    //! Reverses a branch's condition, returning false on success.
+    virtual
+    bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+    virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                               MachineBasicBlock *&FBB,
+                               SmallVectorImpl<MachineOperand> &Cond,
+                               bool AllowModify) const;
+
+    virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+    virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                  MachineBasicBlock *FBB,
+                                  const SmallVectorImpl<MachineOperand> &Cond,
+                                  DebugLoc DL) const;
+   };
+}
+
+#endif
diff --git a/final/lib/Target/CellSPU/SPUInstrInfo.td b/final/lib/Target/CellSPU/SPUInstrInfo.td
new file mode 100644
index 00000000000..e103c9b6a5a
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUInstrInfo.td
@@ -0,0 +1,4482 @@
+//==- SPUInstrInfo.td - Describe the Cell SPU Instructions -*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Cell SPU Instructions:
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TODO Items (not urgent today, but would be nice, low priority)
+//
+// ANDBI, ORBI: SPU constructs a 4-byte constant for these instructions by
+// concatenating the byte argument b as "bbbb". Could recognize this bit pattern
+// in 16-bit and 32-bit constants and reduce instruction count.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions:
+//===----------------------------------------------------------------------===//
+
+let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in {
+  def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm_i32:$amt),
+                                "${:comment} ADJCALLSTACKDOWN",
+                                [(callseq_start timm:$amt)]>;
+  def ADJCALLSTACKUP   : Pseudo<(outs), (ins u16imm_i32:$amt),
+                                "${:comment} ADJCALLSTACKUP",
+                                [(callseq_end timm:$amt)]>;
+  def HBR_LABEL        : Pseudo<(outs), (ins hbrtarget:$targ), 
+                                "$targ:\t${:comment}branch hint target",[ ]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Loads:
+// NB: The ordering is actually important, since the instruction selection
+// will try each of the instructions in sequence, i.e., the D-form first with
+// the 10-bit displacement, then the A-form with the 16 bit displacement, and
+// finally the X-form with the register-register.
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1 in {
+  class LoadDFormVec<ValueType vectype>
+    : RI10Form<0b00101100, (outs VECREG:$rT), (ins dformaddr:$src),
+               "lqd\t$rT, $src",
+               LoadStore,
+               [(set (vectype VECREG:$rT), (load dform_addr:$src))]>
+  { }
+
+  class LoadDForm<RegisterClass rclass>
+    : RI10Form<0b00101100, (outs rclass:$rT), (ins dformaddr:$src),
+               "lqd\t$rT, $src",
+               LoadStore,
+               [(set rclass:$rT, (load dform_addr:$src))]>
+  { }
+
+  multiclass LoadDForms
+  {
+    def v16i8: LoadDFormVec<v16i8>;
+    def v8i16: LoadDFormVec<v8i16>;
+    def v4i32: LoadDFormVec<v4i32>;
+    def v2i64: LoadDFormVec<v2i64>;
+    def v4f32: LoadDFormVec<v4f32>;
+    def v2f64: LoadDFormVec<v2f64>;
+
+    def r128:  LoadDForm<GPRC>;
+    def r64:   LoadDForm<R64C>;
+    def r32:   LoadDForm<R32C>;
+    def f32:   LoadDForm<R32FP>;
+    def f64:   LoadDForm<R64FP>;
+    def r16:   LoadDForm<R16C>;
+    def r8:    LoadDForm<R8C>;
+  }
+
+  class LoadAFormVec<ValueType vectype>
+    : RI16Form<0b100001100, (outs VECREG:$rT), (ins addr256k:$src),
+               "lqa\t$rT, $src",
+               LoadStore,
+               [(set (vectype VECREG:$rT), (load aform_addr:$src))]>
+  { }
+
+  class LoadAForm<RegisterClass rclass>
+    : RI16Form<0b100001100, (outs rclass:$rT), (ins addr256k:$src),
+               "lqa\t$rT, $src",
+               LoadStore,
+               [(set rclass:$rT, (load aform_addr:$src))]>
+  { }
+
+  multiclass LoadAForms
+  {
+    def v16i8: LoadAFormVec<v16i8>;
+    def v8i16: LoadAFormVec<v8i16>;
+    def v4i32: LoadAFormVec<v4i32>;
+    def v2i64: LoadAFormVec<v2i64>;
+    def v4f32: LoadAFormVec<v4f32>;
+    def v2f64: LoadAFormVec<v2f64>;
+
+    def r128:  LoadAForm<GPRC>;
+    def r64:   LoadAForm<R64C>;
+    def r32:   LoadAForm<R32C>;
+    def f32:   LoadAForm<R32FP>;
+    def f64:   LoadAForm<R64FP>;
+    def r16:   LoadAForm<R16C>;
+    def r8:    LoadAForm<R8C>;
+  }
+
+  class LoadXFormVec<ValueType vectype>
+    : RRForm<0b00100011100, (outs VECREG:$rT), (ins memrr:$src),
+             "lqx\t$rT, $src",
+             LoadStore,
+             [(set (vectype VECREG:$rT), (load xform_addr:$src))]>
+  { }
+
+  class LoadXForm<RegisterClass rclass>
+    : RRForm<0b00100011100, (outs rclass:$rT), (ins memrr:$src),
+             "lqx\t$rT, $src",
+             LoadStore,
+             [(set rclass:$rT, (load xform_addr:$src))]>
+  { }
+
+  multiclass LoadXForms
+  {
+    def v16i8: LoadXFormVec<v16i8>;
+    def v8i16: LoadXFormVec<v8i16>;
+    def v4i32: LoadXFormVec<v4i32>;
+    def v2i64: LoadXFormVec<v2i64>;
+    def v4f32: LoadXFormVec<v4f32>;
+    def v2f64: LoadXFormVec<v2f64>;
+
+    def r128:  LoadXForm<GPRC>;
+    def r64:   LoadXForm<R64C>;
+    def r32:   LoadXForm<R32C>;
+    def f32:   LoadXForm<R32FP>;
+    def f64:   LoadXForm<R64FP>;
+    def r16:   LoadXForm<R16C>;
+    def r8:    LoadXForm<R8C>;
+  }
+
+  defm LQA : LoadAForms;
+  defm LQD : LoadDForms;
+  defm LQX : LoadXForms;
+
+/* Load quadword, PC relative: Not much use at this point in time.
+   Might be of use later for relocatable code. It's effectively the
+   same as LQA, but uses PC-relative addressing.
+  def LQR : RI16Form<0b111001100, (outs VECREG:$rT), (ins s16imm:$disp),
+                     "lqr\t$rT, $disp", LoadStore,
+                     [(set VECREG:$rT, (load iaddr:$disp))]>;
+ */
+}
+
+//===----------------------------------------------------------------------===//
+// Stores:
+//===----------------------------------------------------------------------===//
+class StoreDFormVec<ValueType vectype>
+  : RI10Form<0b00100100, (outs), (ins VECREG:$rT, dformaddr:$src),
+             "stqd\t$rT, $src",
+             LoadStore,
+             [(store (vectype VECREG:$rT), dform_addr:$src)]>
+{ }
+
+class StoreDForm<RegisterClass rclass>
+  : RI10Form<0b00100100, (outs), (ins rclass:$rT, dformaddr:$src),
+             "stqd\t$rT, $src",
+             LoadStore,
+             [(store rclass:$rT, dform_addr:$src)]>
+{ }
+
+multiclass StoreDForms
+{
+  def v16i8: StoreDFormVec<v16i8>;
+  def v8i16: StoreDFormVec<v8i16>;
+  def v4i32: StoreDFormVec<v4i32>;
+  def v2i64: StoreDFormVec<v2i64>;
+  def v4f32: StoreDFormVec<v4f32>;
+  def v2f64: StoreDFormVec<v2f64>;
+
+  def r128:  StoreDForm<GPRC>;
+  def r64:   StoreDForm<R64C>;
+  def r32:   StoreDForm<R32C>;
+  def f32:   StoreDForm<R32FP>;
+  def f64:   StoreDForm<R64FP>;
+  def r16:   StoreDForm<R16C>;
+  def r8:    StoreDForm<R8C>;
+}
+
+class StoreAFormVec<ValueType vectype>
+  : RI16Form<0b0010010, (outs), (ins VECREG:$rT, addr256k:$src),
+             "stqa\t$rT, $src",
+             LoadStore,
+             [(store (vectype VECREG:$rT), aform_addr:$src)]>;
+
+class StoreAForm<RegisterClass rclass>
+  : RI16Form<0b001001, (outs), (ins rclass:$rT, addr256k:$src),
+             "stqa\t$rT, $src",
+             LoadStore,
+             [(store rclass:$rT, aform_addr:$src)]>;
+
+multiclass StoreAForms
+{
+  def v16i8: StoreAFormVec<v16i8>;
+  def v8i16: StoreAFormVec<v8i16>;
+  def v4i32: StoreAFormVec<v4i32>;
+  def v2i64: StoreAFormVec<v2i64>;
+  def v4f32: StoreAFormVec<v4f32>;
+  def v2f64: StoreAFormVec<v2f64>;
+
+  def r128:  StoreAForm<GPRC>;
+  def r64:   StoreAForm<R64C>;
+  def r32:   StoreAForm<R32C>;
+  def f32:   StoreAForm<R32FP>;
+  def f64:   StoreAForm<R64FP>;
+  def r16:   StoreAForm<R16C>;
+  def r8:    StoreAForm<R8C>;
+}
+
+class StoreXFormVec<ValueType vectype>
+  : RRForm<0b00100100, (outs), (ins VECREG:$rT, memrr:$src),
+           "stqx\t$rT, $src",
+           LoadStore,
+           [(store (vectype VECREG:$rT), xform_addr:$src)]>
+{ }
+
+class StoreXForm<RegisterClass rclass>
+  : RRForm<0b00100100, (outs), (ins rclass:$rT, memrr:$src),
+           "stqx\t$rT, $src",
+           LoadStore,
+           [(store rclass:$rT, xform_addr:$src)]>
+{ }
+
+multiclass StoreXForms
+{
+  def v16i8: StoreXFormVec<v16i8>;
+  def v8i16: StoreXFormVec<v8i16>;
+  def v4i32: StoreXFormVec<v4i32>;
+  def v2i64: StoreXFormVec<v2i64>;
+  def v4f32: StoreXFormVec<v4f32>;
+  def v2f64: StoreXFormVec<v2f64>;
+
+  def r128:  StoreXForm<GPRC>;
+  def r64:   StoreXForm<R64C>;
+  def r32:   StoreXForm<R32C>;
+  def f32:   StoreXForm<R32FP>;
+  def f64:   StoreXForm<R64FP>;
+  def r16:   StoreXForm<R16C>;
+  def r8:    StoreXForm<R8C>;
+}
+
+defm STQD : StoreDForms;
+defm STQA : StoreAForms;
+defm STQX : StoreXForms;
+
+/* Store quadword, PC relative: Not much use at this point in time. Might
+   be useful for relocatable code.
+def STQR : RI16Form<0b111000100, (outs), (ins VECREG:$rT, s16imm:$disp),
+                   "stqr\t$rT, $disp", LoadStore,
+                   [(store VECREG:$rT, iaddr:$disp)]>;
+*/
+
+//===----------------------------------------------------------------------===//
+// Generate Controls for Insertion:
+//===----------------------------------------------------------------------===//
+
+def CBD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+    "cbd\t$rT, $src", ShuffleOp,
+    [(set (v16i8 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CBX: RRForm<0b00101011100, (outs VECREG:$rT), (ins memrr:$src),
+    "cbx\t$rT, $src", ShuffleOp,
+    [(set (v16i8 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CHD: RI7Form<0b10101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+    "chd\t$rT, $src", ShuffleOp,
+    [(set (v8i16 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CHX: RRForm<0b10101011100, (outs VECREG:$rT), (ins memrr:$src),
+    "chx\t$rT, $src", ShuffleOp,
+    [(set (v8i16 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CWD: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+    "cwd\t$rT, $src", ShuffleOp,
+    [(set (v4i32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CWX: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
+    "cwx\t$rT, $src", ShuffleOp,
+    [(set (v4i32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CWDf32: RI7Form<0b01101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+    "cwd\t$rT, $src", ShuffleOp,
+    [(set (v4f32 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CWXf32: RRForm<0b01101011100, (outs VECREG:$rT), (ins memrr:$src),
+    "cwx\t$rT, $src", ShuffleOp,
+    [(set (v4f32 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CDD: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+    "cdd\t$rT, $src", ShuffleOp,
+    [(set (v2i64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CDX: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
+    "cdx\t$rT, $src", ShuffleOp,
+    [(set (v2i64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+def CDDf64: RI7Form<0b11101111100, (outs VECREG:$rT), (ins shufaddr:$src),
+    "cdd\t$rT, $src", ShuffleOp,
+    [(set (v2f64 VECREG:$rT), (SPUshufmask dform2_addr:$src))]>;
+
+def CDXf64: RRForm<0b11101011100, (outs VECREG:$rT), (ins memrr:$src),
+    "cdx\t$rT, $src", ShuffleOp,
+    [(set (v2f64 VECREG:$rT), (SPUshufmask xform_addr:$src))]>;
+
+//===----------------------------------------------------------------------===//
+// Constant formation:
+//===----------------------------------------------------------------------===//
+
+def ILHv8i16:
+  RI16Form<0b110000010, (outs VECREG:$rT), (ins s16imm:$val),
+    "ilh\t$rT, $val", ImmLoad,
+    [(set (v8i16 VECREG:$rT), (v8i16 v8i16SExt16Imm:$val))]>;
+
+def ILHr16:
+  RI16Form<0b110000010, (outs R16C:$rT), (ins s16imm:$val),
+    "ilh\t$rT, $val", ImmLoad,
+    [(set R16C:$rT, immSExt16:$val)]>;
+
+// Cell SPU doesn't have a native 8-bit immediate load, but ILH works ("with
+// the right constant")
+def ILHr8:
+  RI16Form<0b110000010, (outs R8C:$rT), (ins s16imm_i8:$val),
+    "ilh\t$rT, $val", ImmLoad,
+    [(set R8C:$rT, immSExt8:$val)]>;
+
+// IL does sign extension!
+
+class ILInst<dag OOL, dag IOL, list<dag> pattern>:
+  RI16Form<0b100000010, OOL, IOL, "il\t$rT, $val",
+           ImmLoad, pattern>;
+
+class ILVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+  ILInst<(outs VECREG:$rT), (ins immtype:$val),
+         [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILRegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+  ILInst<(outs rclass:$rT), (ins immtype:$val),
+         [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmediateLoad
+{
+  def v2i64: ILVecInst<v2i64, s16imm_i64, v2i64SExt16Imm>;
+  def v4i32: ILVecInst<v4i32, s16imm_i32, v4i32SExt16Imm>;
+
+  // TODO: Need v2f64, v4f32
+
+  def r64: ILRegInst<R64C, s16imm_i64, immSExt16>;
+  def r32: ILRegInst<R32C, s16imm_i32, immSExt16>;
+  def f32: ILRegInst<R32FP, s16imm_f32, fpimmSExt16>;
+  def f64: ILRegInst<R64FP, s16imm_f64, fpimmSExt16>;
+}
+
+defm IL : ImmediateLoad;
+
+class ILHUInst<dag OOL, dag IOL, list<dag> pattern>:
+  RI16Form<0b010000010, OOL, IOL, "ilhu\t$rT, $val",
+           ImmLoad, pattern>;
+
+class ILHUVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+  ILHUInst<(outs VECREG:$rT), (ins immtype:$val),
+           [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILHURegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+  ILHUInst<(outs rclass:$rT), (ins immtype:$val),
+           [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmLoadHalfwordUpper
+{
+  def v2i64: ILHUVecInst<v2i64, u16imm_i64, immILHUvec_i64>;
+  def v4i32: ILHUVecInst<v4i32, u16imm_i32, immILHUvec>;
+
+  def r64: ILHURegInst<R64C, u16imm_i64, hi16>;
+  def r32: ILHURegInst<R32C, u16imm_i32, hi16>;
+
+  // Loads the high portion of an address
+  def hi: ILHURegInst<R32C, symbolHi, hi16>;
+
+  // Used in custom lowering constant SFP loads:
+  def f32: ILHURegInst<R32FP, f16imm, hi16_f32>;
+}
+
+defm ILHU : ImmLoadHalfwordUpper;
+
+// Immediate load address (can also be used to load 18-bit unsigned constants,
+// see the zext 16->32 pattern)
+
+class ILAInst<dag OOL, dag IOL, list<dag> pattern>:
+  RI18Form<0b1000010, OOL, IOL, "ila\t$rT, $val",
+           LoadNOP, pattern>;
+
+class ILAVecInst<ValueType vectype, Operand immtype, PatLeaf xform>:
+  ILAInst<(outs VECREG:$rT), (ins immtype:$val),
+          [(set (vectype VECREG:$rT), (vectype xform:$val))]>;
+
+class ILARegInst<RegisterClass rclass, Operand immtype, PatLeaf xform>:
+  ILAInst<(outs rclass:$rT), (ins immtype:$val),
+          [(set rclass:$rT, xform:$val)]>;
+
+multiclass ImmLoadAddress
+{
+  def v2i64: ILAVecInst<v2i64, u18imm, v2i64Uns18Imm>;
+  def v4i32: ILAVecInst<v4i32, u18imm, v4i32Uns18Imm>;
+
+  def r64: ILARegInst<R64C, u18imm_i64, imm18>;
+  def r32: ILARegInst<R32C, u18imm, imm18>;
+  def f32: ILARegInst<R32FP, f18imm, fpimm18>;
+  def f64: ILARegInst<R64FP, f18imm_f64, fpimm18>;
+
+  def hi: ILARegInst<R32C, symbolHi, imm18>;
+  def lo: ILARegInst<R32C, symbolLo, imm18>;
+
+  def lsa: ILAInst<(outs R32C:$rT), (ins symbolLSA:$val),
+                   [(set R32C:$rT, imm18:$val)]>;
+}
+
+defm ILA : ImmLoadAddress;
+
+// Immediate OR, Halfword Lower: The "other" part of loading large constants
+// into 32-bit registers. See the anonymous pattern Pat<(i32 imm:$imm), ...>
+// Note that these are really two operand instructions, but they're encoded
+// as three operands with the first two arguments tied-to each other.
+
+class IOHLInst<dag OOL, dag IOL, list<dag> pattern>:
+  RI16Form<0b100000110, OOL, IOL, "iohl\t$rT, $val",
+           ImmLoad, pattern>,
+  RegConstraint<"$rS = $rT">,
+  NoEncode<"$rS">;
+
+class IOHLVecInst<ValueType vectype, Operand immtype /* , PatLeaf xform */>:
+  IOHLInst<(outs VECREG:$rT), (ins VECREG:$rS, immtype:$val),
+           [/* no pattern */]>;
+
+class IOHLRegInst<RegisterClass rclass, Operand immtype /* , PatLeaf xform */>:
+  IOHLInst<(outs rclass:$rT), (ins rclass:$rS, immtype:$val),
+           [/* no pattern */]>;
+
+multiclass ImmOrHalfwordLower
+{
+  def v2i64: IOHLVecInst<v2i64, u16imm_i64>;
+  def v4i32: IOHLVecInst<v4i32, u16imm_i32>;
+
+  def r32: IOHLRegInst<R32C, i32imm>;
+  def f32: IOHLRegInst<R32FP, f32imm>;
+
+  def lo: IOHLRegInst<R32C, symbolLo>;
+}
+
+defm IOHL: ImmOrHalfwordLower;
+
+// Form select mask for bytes using immediate, used in conjunction with the
+// SELB instruction:
+
+class FSMBIVec<ValueType vectype>:
+  RI16Form<0b101001100, (outs VECREG:$rT), (ins u16imm:$val),
+          "fsmbi\t$rT, $val",
+          SelectOp,
+          [(set (vectype VECREG:$rT), (SPUselmask (i16 immU16:$val)))]>;
+
+multiclass FormSelectMaskBytesImm
+{
+  def v16i8: FSMBIVec<v16i8>;
+  def v8i16: FSMBIVec<v8i16>;
+  def v4i32: FSMBIVec<v4i32>;
+  def v2i64: FSMBIVec<v2i64>;
+}
+
+defm FSMBI : FormSelectMaskBytesImm;
+
+// fsmb: Form select mask for bytes. N.B. Input operand, $rA, is 16-bits
+class FSMBInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b01101101100, OOL, IOL, "fsmb\t$rT, $rA", SelectOp,
+             pattern>;
+
+class FSMBRegInst<RegisterClass rclass, ValueType vectype>:
+    FSMBInst<(outs VECREG:$rT), (ins rclass:$rA),
+             [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMBVecInst<ValueType vectype>:
+    FSMBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+             [(set (vectype VECREG:$rT),
+                   (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskBits {
+  def v16i8_r16: FSMBRegInst<R16C, v16i8>;
+  def v16i8:     FSMBVecInst<v16i8>;
+}
+
+defm FSMB: FormSelectMaskBits;
+
+// fsmh: Form select mask for halfwords. N.B., Input operand, $rA, is
+// only 8-bits wide (even though it's input as 16-bits here)
+
+class FSMHInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b10101101100, OOL, IOL, "fsmh\t$rT, $rA", SelectOp,
+             pattern>;
+
+class FSMHRegInst<RegisterClass rclass, ValueType vectype>:
+    FSMHInst<(outs VECREG:$rT), (ins rclass:$rA),
+             [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMHVecInst<ValueType vectype>:
+    FSMHInst<(outs VECREG:$rT), (ins VECREG:$rA),
+             [(set (vectype VECREG:$rT),
+                   (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskHalfword {
+  def v8i16_r16: FSMHRegInst<R16C, v8i16>;
+  def v8i16:     FSMHVecInst<v8i16>;
+}
+
+defm FSMH: FormSelectMaskHalfword;
+
+// fsm: Form select mask for words. Like the other fsm* instructions,
+// only the lower 4 bits of $rA are significant.
+
+class FSMInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b00101101100, OOL, IOL, "fsm\t$rT, $rA", SelectOp,
+             pattern>;
+
+class FSMRegInst<ValueType vectype, RegisterClass rclass>:
+    FSMInst<(outs VECREG:$rT), (ins rclass:$rA),
+            [(set (vectype VECREG:$rT), (SPUselmask rclass:$rA))]>;
+
+class FSMVecInst<ValueType vectype>:
+    FSMInst<(outs VECREG:$rT), (ins VECREG:$rA),
+            [(set (vectype VECREG:$rT), (SPUselmask (vectype VECREG:$rA)))]>;
+
+multiclass FormSelectMaskWord {
+  def v4i32: FSMVecInst<v4i32>;
+
+  def r32 :  FSMRegInst<v4i32, R32C>;
+  def r16 :  FSMRegInst<v4i32, R16C>;
+}
+
+defm FSM : FormSelectMaskWord;
+
+// Special case when used for i64 math operations
+multiclass FormSelectMaskWord64 {
+  def r32 : FSMRegInst<v2i64, R32C>;
+  def r16 : FSMRegInst<v2i64, R16C>;
+}
+
+defm FSM64 : FormSelectMaskWord64;
+
+//===----------------------------------------------------------------------===//
+// Integer and Logical Operations:
+//===----------------------------------------------------------------------===//
+
+def AHv8i16:
+  RRForm<0b00010011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "ah\t$rT, $rA, $rB", IntegerOp,
+    [(set (v8i16 VECREG:$rT), (int_spu_si_ah VECREG:$rA, VECREG:$rB))]>;
+
+def : Pat<(add (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+          (AHv8i16 VECREG:$rA, VECREG:$rB)>;
+
+def AHr16:
+  RRForm<0b00010011000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+    "ah\t$rT, $rA, $rB", IntegerOp,
+    [(set R16C:$rT, (add R16C:$rA, R16C:$rB))]>;
+
+def AHIvec:
+    RI10Form<0b10111000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+      "ahi\t$rT, $rA, $val", IntegerOp,
+      [(set (v8i16 VECREG:$rT), (add (v8i16 VECREG:$rA),
+                                     v8i16SExt10Imm:$val))]>;
+
+def AHIr16:
+  RI10Form<0b10111000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+    "ahi\t$rT, $rA, $val", IntegerOp,
+    [(set R16C:$rT, (add R16C:$rA, i16ImmSExt10:$val))]>;
+
+// v4i32, i32 add instruction:
+
+class AInst<dag OOL, dag IOL, list<dag> pattern>:
+  RRForm<0b00000011000, OOL, IOL,
+         "a\t$rT, $rA, $rB", IntegerOp,
+         pattern>;
+
+class AVecInst<ValueType vectype>:
+  AInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+        [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA),
+                                         (vectype VECREG:$rB)))]>;
+
+class ARegInst<RegisterClass rclass>:
+  AInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+        [(set rclass:$rT, (add rclass:$rA, rclass:$rB))]>;
+        
+multiclass AddInstruction {
+  def v4i32: AVecInst<v4i32>;
+  def v16i8: AVecInst<v16i8>;
+  def r32:   ARegInst<R32C>;
+}
+
+defm A : AddInstruction;
+
+class AIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI10Form<0b00111000, OOL, IOL,
+             "ai\t$rT, $rA, $val", IntegerOp,
+             pattern>;
+
+class AIVecInst<ValueType vectype, PatLeaf immpred>:
+    AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+            [(set (vectype VECREG:$rT), (add (vectype VECREG:$rA), immpred:$val))]>;
+
+class AIFPVecInst<ValueType vectype, PatLeaf immpred>:
+    AIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+            [/* no pattern */]>;
+
+class AIRegInst<RegisterClass rclass, PatLeaf immpred>:
+    AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
+           [(set rclass:$rT, (add rclass:$rA, immpred:$val))]>;
+
+// This is used to add epsilons to floating point numbers in the f32 fdiv code:
+class AIFPInst<RegisterClass rclass, PatLeaf immpred>:
+    AIInst<(outs rclass:$rT), (ins rclass:$rA, s10imm_i32:$val),
+           [/* no pattern */]>;
+
+multiclass AddImmediate {
+  def v4i32: AIVecInst<v4i32, v4i32SExt10Imm>;
+
+  def r32: AIRegInst<R32C, i32ImmSExt10>;
+
+  def v4f32: AIFPVecInst<v4f32, v4i32SExt10Imm>;
+  def f32: AIFPInst<R32FP, i32ImmSExt10>;
+}
+
+defm AI : AddImmediate;
+
+def SFHvec:
+    RRForm<0b00010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "sfh\t$rT, $rA, $rB", IntegerOp,
+      [(set (v8i16 VECREG:$rT), (sub (v8i16 VECREG:$rA),
+                                     (v8i16 VECREG:$rB)))]>;
+
+def SFHr16:
+    RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+      "sfh\t$rT, $rA, $rB", IntegerOp,
+      [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>;
+
+def SFHIvec:
+    RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+      "sfhi\t$rT, $rA, $val", IntegerOp,
+      [(set (v8i16 VECREG:$rT), (sub v8i16SExt10Imm:$val,
+                                     (v8i16 VECREG:$rA)))]>;
+
+def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+  "sfhi\t$rT, $rA, $val", IntegerOp,
+  [(set R16C:$rT, (sub i16ImmSExt10:$val, R16C:$rA))]>;
+
+def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
+                                  (ins VECREG:$rA, VECREG:$rB),
+  "sf\t$rT, $rA, $rB", IntegerOp,
+  [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
+
+
+def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+  "sf\t$rT, $rA, $rB", IntegerOp,
+  [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
+
+def SFIvec:
+    RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+      "sfi\t$rT, $rA, $val", IntegerOp,
+      [(set (v4i32 VECREG:$rT), (sub v4i32SExt10Imm:$val,
+                                     (v4i32 VECREG:$rA)))]>;
+
+def SFIr32 : RI10Form<0b00110000, (outs R32C:$rT),
+                                  (ins R32C:$rA, s10imm_i32:$val),
+  "sfi\t$rT, $rA, $val", IntegerOp,
+  [(set R32C:$rT, (sub i32ImmSExt10:$val, R32C:$rA))]>;
+
+// ADDX: only available in vector form, doesn't match a pattern.
+class ADDXInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b00000010110, OOL, IOL,
+      "addx\t$rT, $rA, $rB",
+      IntegerOp, pattern>;
+
+class ADDXVecInst<ValueType vectype>:
+    ADDXInst<(outs VECREG:$rT),
+             (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
+             [/* no pattern */]>,
+    RegConstraint<"$rCarry = $rT">,
+    NoEncode<"$rCarry">;
+
+class ADDXRegInst<RegisterClass rclass>:
+    ADDXInst<(outs rclass:$rT),
+             (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
+             [/* no pattern */]>,
+    RegConstraint<"$rCarry = $rT">,
+    NoEncode<"$rCarry">;
+
+multiclass AddExtended {
+  def v2i64 : ADDXVecInst<v2i64>;
+  def v4i32 : ADDXVecInst<v4i32>;
+  def r64 : ADDXRegInst<R64C>;
+  def r32 : ADDXRegInst<R32C>;
+}
+
+defm ADDX : AddExtended;
+
+// CG: Generate carry for add
+class CGInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b01000011000, OOL, IOL,
+      "cg\t$rT, $rA, $rB",
+      IntegerOp, pattern>;
+
+class CGVecInst<ValueType vectype>:
+    CGInst<(outs VECREG:$rT),
+           (ins VECREG:$rA, VECREG:$rB),
+           [/* no pattern */]>;
+
+class CGRegInst<RegisterClass rclass>:
+    CGInst<(outs rclass:$rT),
+           (ins rclass:$rA, rclass:$rB),
+           [/* no pattern */]>;
+
+multiclass CarryGenerate {
+  def v2i64 : CGVecInst<v2i64>;
+  def v4i32 : CGVecInst<v4i32>;
+  def r64 : CGRegInst<R64C>;
+  def r32 : CGRegInst<R32C>;
+}
+
+defm CG : CarryGenerate;
+
+// SFX: Subract from, extended. This is used in conjunction with BG to subtract
+// with carry (borrow, in this case)
+class SFXInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10000010110, OOL, IOL,
+      "sfx\t$rT, $rA, $rB",
+      IntegerOp, pattern>;
+
+class SFXVecInst<ValueType vectype>:
+    SFXInst<(outs VECREG:$rT),
+            (ins VECREG:$rA, VECREG:$rB, VECREG:$rCarry),
+             [/* no pattern */]>,
+    RegConstraint<"$rCarry = $rT">,
+    NoEncode<"$rCarry">;
+
+class SFXRegInst<RegisterClass rclass>:
+    SFXInst<(outs rclass:$rT),
+            (ins rclass:$rA, rclass:$rB, rclass:$rCarry),
+             [/* no pattern */]>,
+    RegConstraint<"$rCarry = $rT">,
+    NoEncode<"$rCarry">;
+
+multiclass SubtractExtended {
+  def v2i64 : SFXVecInst<v2i64>;
+  def v4i32 : SFXVecInst<v4i32>;
+  def r64 : SFXRegInst<R64C>;
+  def r32 : SFXRegInst<R32C>;
+}
+
+defm SFX : SubtractExtended;
+
+// BG: only available in vector form, doesn't match a pattern.
+class BGInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b01000010000, OOL, IOL,
+      "bg\t$rT, $rA, $rB",
+      IntegerOp, pattern>;
+
+class BGVecInst<ValueType vectype>:
+    BGInst<(outs VECREG:$rT),
+           (ins VECREG:$rA, VECREG:$rB),
+           [/* no pattern */]>;
+
+class BGRegInst<RegisterClass rclass>:
+    BGInst<(outs rclass:$rT),
+           (ins rclass:$rA, rclass:$rB),
+           [/* no pattern */]>;
+
+multiclass BorrowGenerate {
+  def v4i32 : BGVecInst<v4i32>;
+  def v2i64 : BGVecInst<v2i64>;
+  def r64 : BGRegInst<R64C>;
+  def r32 : BGRegInst<R32C>;
+}
+
+defm BG : BorrowGenerate;
+
+// BGX: Borrow generate, extended.
+def BGXvec:
+    RRForm<0b11000010110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB,
+                                VECREG:$rCarry),
+      "bgx\t$rT, $rA, $rB", IntegerOp,
+      []>,
+    RegConstraint<"$rCarry = $rT">,
+    NoEncode<"$rCarry">;
+
+// Halfword multiply variants:
+// N.B: These can be used to build up larger quantities (16x16 -> 32)
+
+def MPYv8i16:
+  RRForm<0b00100011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+    "mpy\t$rT, $rA, $rB", IntegerMulDiv,
+    [/* no pattern */]>;
+
+def MPYr16:
+  RRForm<0b00100011110, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+    "mpy\t$rT, $rA, $rB", IntegerMulDiv,
+    [(set R16C:$rT, (mul R16C:$rA, R16C:$rB))]>;
+
+// Unsigned 16-bit multiply:
+
+class MPYUInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b00110011110, OOL, IOL,
+      "mpyu\t$rT, $rA, $rB", IntegerMulDiv,
+      pattern>;
+
+def MPYUv4i32:
+  MPYUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+           [/* no pattern */]>;
+
+def MPYUr16:
+  MPYUInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB),
+           [(set R32C:$rT, (mul (zext R16C:$rA), (zext R16C:$rB)))]>;
+
+def MPYUr32:
+  MPYUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+           [/* no pattern */]>;
+
+// mpyi: multiply 16 x s10imm -> 32 result.
+
+class MPYIInst<dag OOL, dag IOL, list<dag> pattern>:
+  RI10Form<0b00101110, OOL, IOL,
+    "mpyi\t$rT, $rA, $val", IntegerMulDiv,
+    pattern>;
+
+def MPYIvec:
+  MPYIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+           [(set (v8i16 VECREG:$rT),
+                 (mul (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
+
+def MPYIr16:
+  MPYIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+           [(set R16C:$rT, (mul R16C:$rA, i16ImmSExt10:$val))]>;
+
+// mpyui: same issues as other multiplies, plus, this doesn't match a
+// pattern... but may be used during target DAG selection or lowering
+
+class MPYUIInst<dag OOL, dag IOL, list<dag> pattern>:
+  RI10Form<0b10101110, OOL, IOL,
+           "mpyui\t$rT, $rA, $val", IntegerMulDiv,
+           pattern>;
+    
+def MPYUIvec:
+  MPYUIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+            []>;
+
+def MPYUIr16:
+  MPYUIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+            []>;
+
+// mpya: 16 x 16 + 16 -> 32 bit result
+class MPYAInst<dag OOL, dag IOL, list<dag> pattern>:
+  RRRForm<0b0011, OOL, IOL,
+          "mpya\t$rT, $rA, $rB, $rC", IntegerMulDiv,
+          pattern>;
+          
+def MPYAv4i32:
+  MPYAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+           [(set (v4i32 VECREG:$rT),
+                 (add (v4i32 (bitconvert (mul (v8i16 VECREG:$rA),
+                                              (v8i16 VECREG:$rB)))),
+                      (v4i32 VECREG:$rC)))]>;
+
+def MPYAr32:
+  MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
+           [(set R32C:$rT, (add (sext (mul R16C:$rA, R16C:$rB)),
+                                R32C:$rC))]>;
+                                
+def MPYAr32_sext:
+  MPYAInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB, R32C:$rC),
+           [(set R32C:$rT, (add (mul (sext R16C:$rA), (sext R16C:$rB)),
+                                R32C:$rC))]>;
+
+def MPYAr32_sextinreg:
+  MPYAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB, R32C:$rC),
+           [(set R32C:$rT, (add (mul (sext_inreg R32C:$rA, i16),
+                                     (sext_inreg R32C:$rB, i16)),
+                                R32C:$rC))]>;
+
+// mpyh: multiply high, used to synthesize 32-bit multiplies
+class MPYHInst<dag OOL, dag IOL, list<dag> pattern>:
+  RRForm<0b10100011110, OOL, IOL,
+         "mpyh\t$rT, $rA, $rB", IntegerMulDiv,
+         pattern>;
+         
+def MPYHv4i32:
+    MPYHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [/* no pattern */]>;
+
+def MPYHr32:
+    MPYHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+             [/* no pattern */]>;
+
+// mpys: multiply high and shift right (returns the top half of
+// a 16-bit multiply, sign extended to 32 bits.)
+
+class MPYSInst<dag OOL, dag IOL>:
+    RRForm<0b11100011110, OOL, IOL, 
+      "mpys\t$rT, $rA, $rB", IntegerMulDiv,
+      [/* no pattern */]>;
+
+def MPYSv4i32:
+    MPYSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+    
+def MPYSr16:
+    MPYSInst<(outs R32C:$rT), (ins R16C:$rA, R16C:$rB)>;
+
+// mpyhh: multiply high-high (returns the 32-bit result from multiplying
+// the top 16 bits of the $rA, $rB)
+
+class MPYHHInst<dag OOL, dag IOL>:
+  RRForm<0b01100011110, OOL, IOL,
+        "mpyhh\t$rT, $rA, $rB", IntegerMulDiv,
+        [/* no pattern */]>;
+        
+def MPYHHv8i16:
+    MPYHHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+
+def MPYHHr32:
+    MPYHHInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhha: Multiply high-high, add to $rT:
+
+class MPYHHAInst<dag OOL, dag IOL>:
+    RRForm<0b01100010110, OOL, IOL,
+      "mpyhha\t$rT, $rA, $rB", IntegerMulDiv,
+      [/* no pattern */]>;
+
+def MPYHHAvec:
+    MPYHHAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+    
+def MPYHHAr32:
+    MPYHHAInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhhu: Multiply high-high, unsigned, e.g.:
+//
+// +-------+-------+   +-------+-------+   +---------+
+// |  a0   .  a1   | x |  b0   .  b1   | = | a0 x b0 |
+// +-------+-------+   +-------+-------+   +---------+
+//
+// where a0, b0 are the upper 16 bits of the 32-bit word
+
+class MPYHHUInst<dag OOL, dag IOL>:
+    RRForm<0b01110011110, OOL, IOL,
+      "mpyhhu\t$rT, $rA, $rB", IntegerMulDiv,
+      [/* no pattern */]>;
+
+def MPYHHUv4i32:
+    MPYHHUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+    
+def MPYHHUr32:
+    MPYHHUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+// mpyhhau: Multiply high-high, unsigned
+
+class MPYHHAUInst<dag OOL, dag IOL>:
+    RRForm<0b01110010110, OOL, IOL,
+      "mpyhhau\t$rT, $rA, $rB", IntegerMulDiv,
+      [/* no pattern */]>;
+
+def MPYHHAUvec:
+    MPYHHAUInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB)>;
+    
+def MPYHHAUr32:
+    MPYHHAUInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// clz: Count leading zeroes
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class CLZInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b10100101010, OOL, IOL, "clz\t$rT, $rA",
+             IntegerOp, pattern>;
+
+class CLZRegInst<RegisterClass rclass>:
+    CLZInst<(outs rclass:$rT), (ins rclass:$rA),
+            [(set rclass:$rT, (ctlz rclass:$rA))]>;
+
+class CLZVecInst<ValueType vectype>:
+    CLZInst<(outs VECREG:$rT), (ins VECREG:$rA),
+            [(set (vectype VECREG:$rT), (ctlz (vectype VECREG:$rA)))]>;
+
+multiclass CountLeadingZeroes {
+  def v4i32 : CLZVecInst<v4i32>;
+  def r32   : CLZRegInst<R32C>;
+}
+
+defm CLZ : CountLeadingZeroes;
+
+// cntb: Count ones in bytes (aka "population count")
+//
+// NOTE: This instruction is really a vector instruction, but the custom
+// lowering code uses it in unorthodox ways to support CTPOP for other
+// data types!
+
+def CNTBv16i8:
+    RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+      "cntb\t$rT, $rA", IntegerOp,
+      [(set (v16i8 VECREG:$rT), (SPUcntb (v16i8 VECREG:$rA)))]>;
+
+def CNTBv8i16 :
+    RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+      "cntb\t$rT, $rA", IntegerOp,
+      [(set (v8i16 VECREG:$rT), (SPUcntb (v8i16 VECREG:$rA)))]>;
+
+def CNTBv4i32 :
+    RRForm_1<0b00101101010, (outs VECREG:$rT), (ins VECREG:$rA),
+      "cntb\t$rT, $rA", IntegerOp,
+      [(set (v4i32 VECREG:$rT), (SPUcntb (v4i32 VECREG:$rA)))]>;
+
+// gbb: Gather the low order bits from each byte in $rA into a single 16-bit
+// quantity stored into $rT's slot 0, upper 16 bits are zeroed, as are
+// slots 1-3.
+//
+// Note: This instruction "pairs" with the fsmb instruction for all of the
+// various types defined here.
+//
+// Note 2: The "VecInst" and "RegInst" forms refer to the result being either
+// a vector or register.
+
+class GBBInst<dag OOL, dag IOL, list<dag> pattern>:
+  RRForm_1<0b01001101100, OOL, IOL, "gbb\t$rT, $rA", GatherOp, pattern>;
+
+class GBBRegInst<RegisterClass rclass, ValueType vectype>:
+  GBBInst<(outs rclass:$rT), (ins VECREG:$rA),
+          [/* no pattern */]>;
+
+class GBBVecInst<ValueType vectype>:
+  GBBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+          [/* no pattern */]>;
+
+multiclass GatherBitsFromBytes {
+  def v16i8_r32: GBBRegInst<R32C, v16i8>;
+  def v16i8_r16: GBBRegInst<R16C, v16i8>;
+  def v16i8:     GBBVecInst<v16i8>;
+}
+
+defm GBB: GatherBitsFromBytes;
+
+// gbh: Gather all low order bits from each halfword in $rA into a single
+// 8-bit quantity stored in $rT's slot 0, with the upper bits of $rT set to 0
+// and slots 1-3 also set to 0.
+//
+// See notes for GBBInst, above.
+
+class GBHInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b10001101100, OOL, IOL, "gbh\t$rT, $rA", GatherOp,
+             pattern>;
+
+class GBHRegInst<RegisterClass rclass, ValueType vectype>:
+    GBHInst<(outs rclass:$rT), (ins VECREG:$rA),
+            [/* no pattern */]>;
+
+class GBHVecInst<ValueType vectype>:
+    GBHInst<(outs VECREG:$rT), (ins VECREG:$rA),
+            [/* no pattern */]>;
+
+multiclass GatherBitsHalfword {
+  def v8i16_r32: GBHRegInst<R32C, v8i16>;
+  def v8i16_r16: GBHRegInst<R16C, v8i16>;
+  def v8i16:     GBHVecInst<v8i16>;
+}
+
+defm GBH: GatherBitsHalfword;
+
+// gb: Gather all low order bits from each word in $rA into a single
+// 4-bit quantity stored in $rT's slot 0, upper bits in $rT set to 0,
+// as well as slots 1-3.
+//
+// See notes for gbb, above.
+
+class GBInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b00001101100, OOL, IOL, "gb\t$rT, $rA", GatherOp,
+             pattern>;
+
+class GBRegInst<RegisterClass rclass, ValueType vectype>:
+    GBInst<(outs rclass:$rT), (ins VECREG:$rA),
+           [/* no pattern */]>;
+
+class GBVecInst<ValueType vectype>:
+    GBInst<(outs VECREG:$rT), (ins VECREG:$rA),
+           [/* no pattern */]>;
+
+multiclass GatherBitsWord {
+  def v4i32_r32: GBRegInst<R32C, v4i32>;
+  def v4i32_r16: GBRegInst<R16C, v4i32>;
+  def v4i32:     GBVecInst<v4i32>;
+}
+
+defm GB: GatherBitsWord;
+
+// avgb: average bytes
+def AVGB:
+    RRForm<0b11001011000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "avgb\t$rT, $rA, $rB", ByteOp,
+      []>;
+
+// absdb: absolute difference of bytes
+def ABSDB:
+    RRForm<0b11001010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "absdb\t$rT, $rA, $rB", ByteOp,
+      []>;
+
+// sumb: sum bytes into halfwords
+def SUMB:
+    RRForm<0b11001010010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "sumb\t$rT, $rA, $rB", ByteOp,
+      []>;
+
+// Sign extension operations:
+class XSBHInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b01101101010, OOL, IOL,
+      "xsbh\t$rDst, $rSrc",
+      IntegerOp, pattern>;
+
+class XSBHInRegInst<RegisterClass rclass, list<dag> pattern>:
+    XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
+             pattern>;
+
+multiclass ExtendByteHalfword {
+  def v16i8:     XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+                          [
+                  /*(set (v8i16 VECREG:$rDst), (sext (v8i16 VECREG:$rSrc)))*/]>;
+  def r8:        XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
+                          [(set R16C:$rDst, (sext R8C:$rSrc))]>;
+  def r16:       XSBHInRegInst<R16C,
+                               [(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>;
+
+  // 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
+  // quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
+  // pattern below). Intentionally doesn't match a pattern because we want the
+  // sext 8->32 pattern to do the work for us, namely because we need the extra
+  // XSHWr32.
+  def r32:   XSBHInRegInst<R32C, [/* no pattern */]>;
+  
+  // Same as the 32-bit version, but for i64
+  def r64:   XSBHInRegInst<R64C, [/* no pattern */]>;
+}
+
+defm XSBH : ExtendByteHalfword;
+
+// Sign extend halfwords to words:
+
+class XSHWInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b01101101010, OOL, IOL, "xshw\t$rDest, $rSrc",
+            IntegerOp, pattern>;
+
+class XSHWVecInst<ValueType in_vectype, ValueType out_vectype>:
+    XSHWInst<(outs VECREG:$rDest), (ins VECREG:$rSrc),
+             [(set (out_vectype VECREG:$rDest),
+                   (sext (in_vectype VECREG:$rSrc)))]>;
+
+class XSHWInRegInst<RegisterClass rclass, list<dag> pattern>:
+    XSHWInst<(outs rclass:$rDest), (ins rclass:$rSrc),
+             pattern>;
+             
+class XSHWRegInst<RegisterClass rclass>:
+    XSHWInst<(outs rclass:$rDest), (ins R16C:$rSrc),
+             [(set rclass:$rDest, (sext R16C:$rSrc))]>;
+
+multiclass ExtendHalfwordWord {
+  def v4i32: XSHWVecInst<v8i16, v4i32>;
+
+  def r16:   XSHWRegInst<R32C>;
+
+  def r32:   XSHWInRegInst<R32C,
+                          [(set R32C:$rDest, (sext_inreg R32C:$rSrc, i16))]>;
+  def r64:   XSHWInRegInst<R64C, [/* no pattern */]>;
+}
+
+defm XSHW : ExtendHalfwordWord;
+
+// Sign-extend words to doublewords (32->64 bits)
+
+class XSWDInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm_1<0b01100101010, OOL, IOL, "xswd\t$rDst, $rSrc",
+              IntegerOp, pattern>;
+      
+class XSWDVecInst<ValueType in_vectype, ValueType out_vectype>:
+    XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
+             [/*(set (out_vectype VECREG:$rDst),
+                   (sext (out_vectype VECREG:$rSrc)))*/]>;
+      
+class XSWDRegInst<RegisterClass in_rclass, RegisterClass out_rclass>:
+    XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc),
+             [(set out_rclass:$rDst, (sext in_rclass:$rSrc))]>;
+             
+multiclass ExtendWordToDoubleWord {
+  def v2i64: XSWDVecInst<v4i32, v2i64>;
+  def r64:   XSWDRegInst<R32C, R64C>;
+  
+  def r64_inreg: XSWDInst<(outs R64C:$rDst), (ins R64C:$rSrc),
+                          [(set R64C:$rDst, (sext_inreg R64C:$rSrc, i32))]>;
+}
+
+defm XSWD : ExtendWordToDoubleWord;
+
+// AND operations
+
+class ANDInst<dag OOL, dag IOL, list<dag> pattern> :
+    RRForm<0b10000011000, OOL, IOL, "and\t$rT, $rA, $rB",
+           IntegerOp, pattern>;
+
+class ANDVecInst<ValueType vectype>:
+    ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [(set (vectype VECREG:$rT), (and (vectype VECREG:$rA),
+                                              (vectype VECREG:$rB)))]>;
+
+class ANDRegInst<RegisterClass rclass>:
+    ANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+             [(set rclass:$rT, (and rclass:$rA, rclass:$rB))]>;
+
+multiclass BitwiseAnd
+{
+  def v16i8: ANDVecInst<v16i8>;
+  def v8i16: ANDVecInst<v8i16>;
+  def v4i32: ANDVecInst<v4i32>;
+  def v2i64: ANDVecInst<v2i64>;
+
+  def r128:  ANDRegInst<GPRC>;
+  def r64:   ANDRegInst<R64C>;
+  def r32:   ANDRegInst<R32C>;
+  def r16:   ANDRegInst<R16C>;
+  def r8:    ANDRegInst<R8C>;
+
+  //===---------------------------------------------
+  // Special instructions to perform the fabs instruction
+  def fabs32: ANDInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
+                      [/* Intentionally does not match a pattern */]>;
+
+  def fabs64: ANDInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
+                      [/* Intentionally does not match a pattern */]>;
+
+  def fabsvec: ANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                       [/* Intentionally does not match a pattern */]>;
+
+  //===---------------------------------------------
+
+  // Hacked form of AND to zero-extend 16-bit quantities to 32-bit
+  // quantities -- see 16->32 zext pattern.
+  //
+  // This pattern is somewhat artificial, since it might match some
+  // compiler generated pattern but it is unlikely to do so.
+
+  def i16i32: ANDInst<(outs R32C:$rT), (ins R16C:$rA, R32C:$rB),
+                      [(set R32C:$rT, (and (zext R16C:$rA), R32C:$rB))]>;
+}
+
+defm AND : BitwiseAnd;
+
+
+def vnot_cell_conv : PatFrag<(ops node:$in),
+                             (xor node:$in, (bitconvert (v4i32 immAllOnesV)))>;
+
+// N.B.: vnot_cell_conv is one of those special target selection pattern
+// fragments,
+// in which we expect there to be a bit_convert on the constant. Bear in mind
+// that llvm translates "not <reg>" to "xor <reg>, -1" (or in this case, a
+// constant -1 vector.)
+
+class ANDCInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10000011010, OOL, IOL, "andc\t$rT, $rA, $rB",
+           IntegerOp, pattern>;
+
+class ANDCVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
+    ANDCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [(set (vectype VECREG:$rT),
+                   (and (vectype VECREG:$rA),
+                        (vnot_frag (vectype VECREG:$rB))))]>;
+
+class ANDCRegInst<RegisterClass rclass>:
+    ANDCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+             [(set rclass:$rT, (and rclass:$rA, (not rclass:$rB)))]>;
+
+multiclass AndComplement
+{
+  def v16i8: ANDCVecInst<v16i8>;
+  def v8i16: ANDCVecInst<v8i16>;
+  def v4i32: ANDCVecInst<v4i32>;
+  def v2i64: ANDCVecInst<v2i64>;
+
+  def r128: ANDCRegInst<GPRC>;
+  def r64:  ANDCRegInst<R64C>;
+  def r32:  ANDCRegInst<R32C>;
+  def r16:  ANDCRegInst<R16C>;
+  def r8:   ANDCRegInst<R8C>;
+
+  // Sometimes, the xor pattern has a bitcast constant:
+  def v16i8_conv: ANDCVecInst<v16i8, vnot_cell_conv>;
+}
+
+defm ANDC : AndComplement;
+
+class ANDBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI10Form<0b01101000, OOL, IOL, "andbi\t$rT, $rA, $val",
+             ByteOp, pattern>;
+
+multiclass AndByteImm
+{
+  def v16i8: ANDBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+                       [(set (v16i8 VECREG:$rT),
+                             (and (v16i8 VECREG:$rA),
+                                  (v16i8 v16i8U8Imm:$val)))]>;
+
+  def r8: ANDBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+                    [(set R8C:$rT, (and R8C:$rA, immU8:$val))]>;
+}
+
+defm ANDBI : AndByteImm;
+
+class ANDHIInst<dag OOL, dag IOL, list<dag> pattern> :
+    RI10Form<0b10101000, OOL, IOL, "andhi\t$rT, $rA, $val",
+             ByteOp, pattern>;
+
+multiclass AndHalfwordImm
+{
+  def v8i16: ANDHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                       [(set (v8i16 VECREG:$rT),
+                             (and (v8i16 VECREG:$rA), v8i16SExt10Imm:$val))]>;
+
+  def r16: ANDHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
+                     [(set R16C:$rT, (and R16C:$rA, i16ImmUns10:$val))]>;
+
+  // Zero-extend i8 to i16:
+  def i8i16: ANDHIInst<(outs R16C:$rT), (ins R8C:$rA, u10imm:$val),
+                      [(set R16C:$rT, (and (zext R8C:$rA), i16ImmUns10:$val))]>;
+}
+
+defm ANDHI : AndHalfwordImm;
+
+class ANDIInst<dag OOL, dag IOL, list<dag> pattern> :
+    RI10Form<0b00101000, OOL, IOL, "andi\t$rT, $rA, $val",
+             IntegerOp, pattern>;
+
+multiclass AndWordImm
+{
+  def v4i32: ANDIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                      [(set (v4i32 VECREG:$rT),
+                            (and (v4i32 VECREG:$rA), v4i32SExt10Imm:$val))]>;
+
+  def r32: ANDIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+                    [(set R32C:$rT, (and R32C:$rA, i32ImmSExt10:$val))]>;
+
+  // Hacked form of ANDI to zero-extend i8 quantities to i32. See the zext 8->32
+  // pattern below.
+  def i8i32: ANDIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
+                      [(set R32C:$rT,
+                            (and (zext R8C:$rA), i32ImmSExt10:$val))]>;
+
+  // Hacked form of ANDI to zero-extend i16 quantities to i32. See the
+  // zext 16->32 pattern below.
+  //
+  // Note that this pattern is somewhat artificial, since it might match
+  // something the compiler generates but is unlikely to occur in practice.
+  def i16i32: ANDIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
+                       [(set R32C:$rT,
+                             (and (zext R16C:$rA), i32ImmSExt10:$val))]>;
+}
+
+defm ANDI : AndWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Bitwise OR group:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// Bitwise "or" (N.B.: These are also register-register copy instructions...)
+class ORInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10000010000, OOL, IOL, "or\t$rT, $rA, $rB",
+           IntegerOp, pattern>;
+
+class ORVecInst<ValueType vectype>:
+    ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+           [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+                                           (vectype VECREG:$rB)))]>;
+
+class ORRegInst<RegisterClass rclass>:
+    ORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+           [(set rclass:$rT, (or rclass:$rA, rclass:$rB))]>;
+
+
+multiclass BitwiseOr
+{
+  def v16i8: ORVecInst<v16i8>;
+  def v8i16: ORVecInst<v8i16>;
+  def v4i32: ORVecInst<v4i32>;
+  def v2i64: ORVecInst<v2i64>;
+
+  def v4f32: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                    [(set (v4f32 VECREG:$rT),
+                          (v4f32 (bitconvert (or (v4i32 VECREG:$rA),
+                                                 (v4i32 VECREG:$rB)))))]>;
+
+  def v2f64: ORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                    [(set (v2f64 VECREG:$rT),
+                          (v2f64 (bitconvert (or (v2i64 VECREG:$rA),
+                                                 (v2i64 VECREG:$rB)))))]>;
+
+  def r128: ORRegInst<GPRC>;
+  def r64:  ORRegInst<R64C>;
+  def r32:  ORRegInst<R32C>;
+  def r16:  ORRegInst<R16C>;
+  def r8:   ORRegInst<R8C>;
+
+  // OR instructions used to copy f32 and f64 registers.
+  def f32: ORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+                  [/* no pattern */]>;
+
+  def f64: ORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+                  [/* no pattern */]>;
+}
+
+defm OR : BitwiseOr;
+
+//===----------------------------------------------------------------------===//
+// SPU::PREFSLOT2VEC and VEC2PREFSLOT re-interpretations of registers
+//===----------------------------------------------------------------------===//
+def : Pat<(v16i8 (SPUprefslot2vec R8C:$rA)),
+          (COPY_TO_REGCLASS R8C:$rA, VECREG)>;
+
+def : Pat<(v8i16 (SPUprefslot2vec R16C:$rA)),
+          (COPY_TO_REGCLASS R16C:$rA, VECREG)>;
+
+def : Pat<(v4i32 (SPUprefslot2vec R32C:$rA)),
+          (COPY_TO_REGCLASS R32C:$rA, VECREG)>;
+
+def : Pat<(v2i64 (SPUprefslot2vec R64C:$rA)),
+          (COPY_TO_REGCLASS R64C:$rA, VECREG)>;
+
+def : Pat<(v4f32 (SPUprefslot2vec R32FP:$rA)),
+          (COPY_TO_REGCLASS R32FP:$rA, VECREG)>;
+
+def : Pat<(v2f64 (SPUprefslot2vec R64FP:$rA)),
+          (COPY_TO_REGCLASS R64FP:$rA, VECREG)>;
+ 
+def : Pat<(i8 (SPUvec2prefslot (v16i8 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v16i8 VECREG:$rA), R8C)>;
+
+def : Pat<(i16 (SPUvec2prefslot (v8i16 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v8i16 VECREG:$rA), R16C)>;
+
+def : Pat<(i32 (SPUvec2prefslot (v4i32 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v4i32 VECREG:$rA), R32C)>;
+
+def : Pat<(i64 (SPUvec2prefslot (v2i64 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v2i64 VECREG:$rA), R64C)>;
+
+def : Pat<(f32 (SPUvec2prefslot (v4f32 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v4f32 VECREG:$rA), R32FP)>;
+
+def : Pat<(f64 (SPUvec2prefslot (v2f64 VECREG:$rA))),
+          (COPY_TO_REGCLASS (v2f64 VECREG:$rA), R64FP)>;
+
+// Load Register: This is an assembler alias for a bitwise OR of a register
+// against itself. It's here because it brings some clarity to assembly
+// language output.
+
+let hasCtrlDep = 1 in {
+    class LRInst<dag OOL, dag IOL>
+              : SPUInstr<OOL, IOL, "lr\t$rT, $rA", IntegerOp> {
+      bits<7> RA;
+      bits<7> RT;
+
+      let Pattern = [/*no pattern*/];
+
+      let Inst{0-10} = 0b10000010000;   /* It's an OR operation */
+      let Inst{11-17} = RA;
+      let Inst{18-24} = RA;
+      let Inst{25-31} = RT;
+    }
+
+    class LRVecInst<ValueType vectype>:
+        LRInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
+    class LRRegInst<RegisterClass rclass>:
+        LRInst<(outs rclass:$rT), (ins rclass:$rA)>;
+
+    multiclass LoadRegister {
+      def v2i64: LRVecInst<v2i64>;
+      def v2f64: LRVecInst<v2f64>;
+      def v4i32: LRVecInst<v4i32>;
+      def v4f32: LRVecInst<v4f32>;
+      def v8i16: LRVecInst<v8i16>;
+      def v16i8: LRVecInst<v16i8>;
+
+      def r128:  LRRegInst<GPRC>;
+      def r64:   LRRegInst<R64C>;
+      def f64:   LRRegInst<R64FP>;
+      def r32:   LRRegInst<R32C>;
+      def f32:   LRRegInst<R32FP>;
+      def r16:   LRRegInst<R16C>;
+      def r8:    LRRegInst<R8C>;
+    }
+
+    defm LR: LoadRegister;
+}
+
+// ORC: Bitwise "or" with complement (c = a | ~b)
+
+class ORCInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10010010000, OOL, IOL, "orc\t$rT, $rA, $rB",
+           IntegerOp, pattern>;
+
+class ORCVecInst<ValueType vectype>:
+    ORCInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+            [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+                                            (vnot (vectype VECREG:$rB))))]>;
+
+class ORCRegInst<RegisterClass rclass>:
+  ORCInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+          [(set rclass:$rT, (or rclass:$rA, (not rclass:$rB)))]>;
+
+multiclass BitwiseOrComplement
+{
+  def v16i8: ORCVecInst<v16i8>;
+  def v8i16: ORCVecInst<v8i16>;
+  def v4i32: ORCVecInst<v4i32>;
+  def v2i64: ORCVecInst<v2i64>;
+
+  def r128:  ORCRegInst<GPRC>;
+  def r64:   ORCRegInst<R64C>;
+  def r32:   ORCRegInst<R32C>;
+  def r16:   ORCRegInst<R16C>;
+  def r8:    ORCRegInst<R8C>;
+}
+
+defm ORC : BitwiseOrComplement;
+
+// OR byte immediate
+class ORBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI10Form<0b01100000, OOL, IOL, "orbi\t$rT, $rA, $val",
+             IntegerOp, pattern>;
+
+class ORBIVecInst<ValueType vectype, PatLeaf immpred>:
+    ORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+             [(set (v16i8 VECREG:$rT), (or (vectype VECREG:$rA),
+                                           (vectype immpred:$val)))]>;
+
+multiclass BitwiseOrByteImm
+{
+  def v16i8: ORBIVecInst<v16i8, v16i8U8Imm>;
+
+  def r8: ORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+                   [(set R8C:$rT, (or R8C:$rA, immU8:$val))]>;
+}
+
+defm ORBI : BitwiseOrByteImm;
+
+// OR halfword immediate
+class ORHIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI10Form<0b10100000, OOL, IOL, "orhi\t$rT, $rA, $val",
+             IntegerOp, pattern>;
+
+class ORHIVecInst<ValueType vectype, PatLeaf immpred>:
+    ORHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+              [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+                                              immpred:$val))]>;
+
+multiclass BitwiseOrHalfwordImm
+{
+  def v8i16: ORHIVecInst<v8i16, v8i16Uns10Imm>;
+
+  def r16: ORHIInst<(outs R16C:$rT), (ins R16C:$rA, u10imm:$val),
+                    [(set R16C:$rT, (or R16C:$rA, i16ImmUns10:$val))]>;
+
+  // Specialized ORHI form used to promote 8-bit registers to 16-bit
+  def i8i16: ORHIInst<(outs R16C:$rT), (ins R8C:$rA, s10imm:$val),
+                      [(set R16C:$rT, (or (anyext R8C:$rA),
+                                          i16ImmSExt10:$val))]>;
+}
+
+defm ORHI : BitwiseOrHalfwordImm;
+
+class ORIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI10Form<0b00100000, OOL, IOL, "ori\t$rT, $rA, $val",
+             IntegerOp, pattern>;
+
+class ORIVecInst<ValueType vectype, PatLeaf immpred>:
+    ORIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+            [(set (vectype VECREG:$rT), (or (vectype VECREG:$rA),
+                                            immpred:$val))]>;
+
+// Bitwise "or" with immediate
+multiclass BitwiseOrImm
+{
+  def v4i32: ORIVecInst<v4i32, v4i32Uns10Imm>;
+
+  def r32: ORIInst<(outs R32C:$rT), (ins R32C:$rA, u10imm_i32:$val),
+                   [(set R32C:$rT, (or R32C:$rA, i32ImmUns10:$val))]>;
+
+  // i16i32: hacked version of the ori instruction to extend 16-bit quantities
+  // to 32-bit quantities. used exclusively to match "anyext" conversions (vide
+  // infra "anyext 16->32" pattern.)
+  def i16i32: ORIInst<(outs R32C:$rT), (ins R16C:$rA, s10imm_i32:$val),
+                      [(set R32C:$rT, (or (anyext R16C:$rA),
+                                          i32ImmSExt10:$val))]>;
+
+  // i8i32: Hacked version of the ORI instruction to extend 16-bit quantities
+  // to 32-bit quantities. Used exclusively to match "anyext" conversions (vide
+  // infra "anyext 16->32" pattern.)
+  def i8i32: ORIInst<(outs R32C:$rT), (ins R8C:$rA, s10imm_i32:$val),
+                     [(set R32C:$rT, (or (anyext R8C:$rA),
+                                         i32ImmSExt10:$val))]>;
+}
+
+defm ORI : BitwiseOrImm;
+
+// ORX: "or" across the vector: or's $rA's word slots leaving the result in
+// $rT[0], slots 1-3 are zeroed.
+//
+// FIXME: Needs to match an intrinsic pattern.
+def ORXv4i32:
+    RRForm<0b10010010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "orx\t$rT, $rA, $rB", IntegerOp,
+      []>;
+
+// XOR:
+
+class XORInst<dag OOL, dag IOL, list<dag> pattern> :
+    RRForm<0b10010010000, OOL, IOL, "xor\t$rT, $rA, $rB",
+           IntegerOp, pattern>;
+
+class XORVecInst<ValueType vectype>:
+    XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [(set (vectype VECREG:$rT), (xor (vectype VECREG:$rA),
+                                              (vectype VECREG:$rB)))]>;
+
+class XORRegInst<RegisterClass rclass>:
+    XORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+             [(set rclass:$rT, (xor rclass:$rA, rclass:$rB))]>;
+
+multiclass BitwiseExclusiveOr
+{
+  def v16i8: XORVecInst<v16i8>;
+  def v8i16: XORVecInst<v8i16>;
+  def v4i32: XORVecInst<v4i32>;
+  def v2i64: XORVecInst<v2i64>;
+
+  def r128:  XORRegInst<GPRC>;
+  def r64:   XORRegInst<R64C>;
+  def r32:   XORRegInst<R32C>;
+  def r16:   XORRegInst<R16C>;
+  def r8:    XORRegInst<R8C>;
+
+  // XOR instructions used to negate f32 and f64 quantities.
+
+  def fneg32: XORInst<(outs R32FP:$rT), (ins R32FP:$rA, R32C:$rB),
+                     [/* no pattern */]>;
+
+  def fneg64: XORInst<(outs R64FP:$rT), (ins R64FP:$rA, R64C:$rB),
+                     [/* no pattern */]>;
+
+  def fnegvec: XORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                      [/* no pattern, see fneg{32,64} */]>;
+}
+
+defm XOR : BitwiseExclusiveOr;
+
+//==----------------------------------------------------------
+
+class XORBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI10Form<0b01100000, OOL, IOL, "xorbi\t$rT, $rA, $val",
+             IntegerOp, pattern>;
+
+multiclass XorByteImm
+{
+  def v16i8:
+    XORBIInst<(outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+              [(set (v16i8 VECREG:$rT), (xor (v16i8 VECREG:$rA), v16i8U8Imm:$val))]>;
+
+  def r8:
+    XORBIInst<(outs R8C:$rT), (ins R8C:$rA, u10imm_i8:$val),
+              [(set R8C:$rT, (xor R8C:$rA, immU8:$val))]>;
+}
+
+defm XORBI : XorByteImm;
+
+def XORHIv8i16:
+    RI10Form<0b10100000, (outs VECREG:$rT), (ins VECREG:$rA, u10imm:$val),
+      "xorhi\t$rT, $rA, $val", IntegerOp,
+      [(set (v8i16 VECREG:$rT), (xor (v8i16 VECREG:$rA),
+                                      v8i16SExt10Imm:$val))]>;
+
+def XORHIr16:
+    RI10Form<0b10100000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+      "xorhi\t$rT, $rA, $val", IntegerOp,
+      [(set R16C:$rT, (xor R16C:$rA, i16ImmSExt10:$val))]>;
+
+def XORIv4i32:
+    RI10Form<0b00100000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm_i32:$val),
+      "xori\t$rT, $rA, $val", IntegerOp,
+      [(set (v4i32 VECREG:$rT), (xor (v4i32 VECREG:$rA),
+                                     v4i32SExt10Imm:$val))]>;
+
+def XORIr32:
+    RI10Form<0b00100000, (outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+      "xori\t$rT, $rA, $val", IntegerOp,
+      [(set R32C:$rT, (xor R32C:$rA, i32ImmSExt10:$val))]>;
+
+// NAND:
+
+class NANDInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10010011000, OOL, IOL, "nand\t$rT, $rA, $rB",
+           IntegerOp, pattern>;
+
+class NANDVecInst<ValueType vectype>:
+    NANDInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [(set (vectype VECREG:$rT), (vnot (and (vectype VECREG:$rA),
+                                                    (vectype VECREG:$rB))))]>;
+class NANDRegInst<RegisterClass rclass>:
+    NANDInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+             [(set rclass:$rT, (not (and rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitwiseNand
+{
+  def v16i8: NANDVecInst<v16i8>;
+  def v8i16: NANDVecInst<v8i16>;
+  def v4i32: NANDVecInst<v4i32>;
+  def v2i64: NANDVecInst<v2i64>;
+
+  def r128:  NANDRegInst<GPRC>;
+  def r64:   NANDRegInst<R64C>;
+  def r32:   NANDRegInst<R32C>;
+  def r16:   NANDRegInst<R16C>;
+  def r8:    NANDRegInst<R8C>;
+}
+
+defm NAND : BitwiseNand;
+
+// NOR:
+
+class NORInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10010010000, OOL, IOL, "nor\t$rT, $rA, $rB",
+           IntegerOp, pattern>;
+
+class NORVecInst<ValueType vectype>:
+    NORInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+            [(set (vectype VECREG:$rT), (vnot (or (vectype VECREG:$rA),
+                                                  (vectype VECREG:$rB))))]>;
+class NORRegInst<RegisterClass rclass>:
+    NORInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+            [(set rclass:$rT, (not (or rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitwiseNor
+{
+  def v16i8: NORVecInst<v16i8>;
+  def v8i16: NORVecInst<v8i16>;
+  def v4i32: NORVecInst<v4i32>;
+  def v2i64: NORVecInst<v2i64>;
+
+  def r128:  NORRegInst<GPRC>;
+  def r64:   NORRegInst<R64C>;
+  def r32:   NORRegInst<R32C>;
+  def r16:   NORRegInst<R16C>;
+  def r8:    NORRegInst<R8C>;
+}
+
+defm NOR : BitwiseNor;
+
+// Select bits:
+class SELBInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRRForm<0b1000, OOL, IOL, "selb\t$rT, $rA, $rB, $rC",
+            IntegerOp, pattern>;
+
+class SELBVecInst<ValueType vectype, PatFrag vnot_frag = vnot>:
+  SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+           [(set (vectype VECREG:$rT),
+                 (or (and (vectype VECREG:$rC), (vectype VECREG:$rB)),
+                     (and (vnot_frag (vectype VECREG:$rC)),
+                          (vectype VECREG:$rA))))]>;
+
+class SELBVecVCondInst<ValueType vectype>:
+  SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+           [(set (vectype VECREG:$rT),
+                 (select (vectype VECREG:$rC),
+                         (vectype VECREG:$rB),
+                         (vectype VECREG:$rA)))]>;
+
+class SELBVecCondInst<ValueType vectype>:
+  SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, R32C:$rC),
+           [(set (vectype VECREG:$rT),
+                 (select R32C:$rC,
+                         (vectype VECREG:$rB),
+                         (vectype VECREG:$rA)))]>;
+
+class SELBRegInst<RegisterClass rclass>:
+  SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rclass:$rC),
+           [(set rclass:$rT,
+                 (or (and rclass:$rB, rclass:$rC),
+                     (and rclass:$rA, (not rclass:$rC))))]>;
+
+class SELBRegCondInst<RegisterClass rcond, RegisterClass rclass>:
+  SELBInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB, rcond:$rC),
+           [(set rclass:$rT,
+                 (select rcond:$rC, rclass:$rB, rclass:$rA))]>;
+
+multiclass SelectBits
+{
+  def v16i8: SELBVecInst<v16i8>;
+  def v8i16: SELBVecInst<v8i16>;
+  def v4i32: SELBVecInst<v4i32>;
+  def v2i64: SELBVecInst<v2i64, vnot_cell_conv>;
+
+  def r128:  SELBRegInst<GPRC>;
+  def r64:   SELBRegInst<R64C>;
+  def r32:   SELBRegInst<R32C>;
+  def r16:   SELBRegInst<R16C>;
+  def r8:    SELBRegInst<R8C>;
+
+  def v16i8_cond: SELBVecCondInst<v16i8>;
+  def v8i16_cond: SELBVecCondInst<v8i16>;
+  def v4i32_cond: SELBVecCondInst<v4i32>;
+  def v2i64_cond: SELBVecCondInst<v2i64>;
+
+  def v16i8_vcond: SELBVecCondInst<v16i8>;
+  def v8i16_vcond: SELBVecCondInst<v8i16>;
+  def v4i32_vcond: SELBVecCondInst<v4i32>;
+  def v2i64_vcond: SELBVecCondInst<v2i64>;
+
+  def v4f32_cond:
+        SELBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+                 [(set (v4f32 VECREG:$rT),
+                       (select (v4i32 VECREG:$rC),
+                               (v4f32 VECREG:$rB),
+                               (v4f32 VECREG:$rA)))]>;
+
+  // SELBr64_cond is defined in SPU64InstrInfo.td
+  def r32_cond:   SELBRegCondInst<R32C, R32C>;
+  def f32_cond:   SELBRegCondInst<R32C, R32FP>;
+  def r16_cond:   SELBRegCondInst<R16C, R16C>;
+  def r8_cond:    SELBRegCondInst<R8C,  R8C>;
+}
+
+defm SELB : SelectBits;
+
+class SPUselbPatVec<ValueType vectype, SPUInstr inst>:
+   Pat<(SPUselb (vectype VECREG:$rA), (vectype VECREG:$rB), (vectype VECREG:$rC)),
+       (inst VECREG:$rA, VECREG:$rB, VECREG:$rC)>;
+
+def : SPUselbPatVec<v16i8, SELBv16i8>;
+def : SPUselbPatVec<v8i16, SELBv8i16>;
+def : SPUselbPatVec<v4i32, SELBv4i32>;
+def : SPUselbPatVec<v2i64, SELBv2i64>;
+
+class SPUselbPatReg<RegisterClass rclass, SPUInstr inst>:
+   Pat<(SPUselb rclass:$rA, rclass:$rB, rclass:$rC),
+       (inst rclass:$rA, rclass:$rB, rclass:$rC)>;
+
+def : SPUselbPatReg<R8C,   SELBr8>;
+def : SPUselbPatReg<R16C,  SELBr16>;
+def : SPUselbPatReg<R32C,  SELBr32>;
+def : SPUselbPatReg<R64C,  SELBr64>;
+
+// EQV: Equivalence (1 for each same bit, otherwise 0)
+//
+// Note: There are a lot of ways to match this bit operator and these patterns
+// attempt to be as exhaustive as possible.
+
+class EQVInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10010010000, OOL, IOL, "eqv\t$rT, $rA, $rB",
+           IntegerOp, pattern>;
+
+class EQVVecInst<ValueType vectype>:
+    EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+            [(set (vectype VECREG:$rT),
+                  (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
+                      (and (vnot (vectype VECREG:$rA)),
+                           (vnot (vectype VECREG:$rB)))))]>;
+
+class EQVRegInst<RegisterClass rclass>:
+    EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+            [(set rclass:$rT, (or (and rclass:$rA, rclass:$rB),
+                                  (and (not rclass:$rA), (not rclass:$rB))))]>;
+
+class EQVVecPattern1<ValueType vectype>:
+  EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          [(set (vectype VECREG:$rT),
+                (xor (vectype VECREG:$rA), (vnot (vectype VECREG:$rB))))]>;
+
+class EQVRegPattern1<RegisterClass rclass>:
+  EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+          [(set rclass:$rT, (xor rclass:$rA, (not rclass:$rB)))]>;
+
+class EQVVecPattern2<ValueType vectype>:
+  EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          [(set (vectype VECREG:$rT),
+                (or (and (vectype VECREG:$rA), (vectype VECREG:$rB)),
+                    (vnot (or (vectype VECREG:$rA), (vectype VECREG:$rB)))))]>;
+
+class EQVRegPattern2<RegisterClass rclass>:
+  EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+          [(set rclass:$rT,
+                (or (and rclass:$rA, rclass:$rB),
+                    (not (or rclass:$rA, rclass:$rB))))]>;
+
+class EQVVecPattern3<ValueType vectype>:
+  EQVInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+          [(set (vectype VECREG:$rT),
+                (not (xor (vectype VECREG:$rA), (vectype VECREG:$rB))))]>;
+
+class EQVRegPattern3<RegisterClass rclass>:
+  EQVInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+          [(set rclass:$rT, (not (xor rclass:$rA, rclass:$rB)))]>;
+
+multiclass BitEquivalence
+{
+  def v16i8: EQVVecInst<v16i8>;
+  def v8i16: EQVVecInst<v8i16>;
+  def v4i32: EQVVecInst<v4i32>;
+  def v2i64: EQVVecInst<v2i64>;
+
+  def v16i8_1: EQVVecPattern1<v16i8>;
+  def v8i16_1: EQVVecPattern1<v8i16>;
+  def v4i32_1: EQVVecPattern1<v4i32>;
+  def v2i64_1: EQVVecPattern1<v2i64>;
+
+  def v16i8_2: EQVVecPattern2<v16i8>;
+  def v8i16_2: EQVVecPattern2<v8i16>;
+  def v4i32_2: EQVVecPattern2<v4i32>;
+  def v2i64_2: EQVVecPattern2<v2i64>;
+
+  def v16i8_3: EQVVecPattern3<v16i8>;
+  def v8i16_3: EQVVecPattern3<v8i16>;
+  def v4i32_3: EQVVecPattern3<v4i32>;
+  def v2i64_3: EQVVecPattern3<v2i64>;
+
+  def r128:  EQVRegInst<GPRC>;
+  def r64:   EQVRegInst<R64C>;
+  def r32:   EQVRegInst<R32C>;
+  def r16:   EQVRegInst<R16C>;
+  def r8:    EQVRegInst<R8C>;
+
+  def r128_1: EQVRegPattern1<GPRC>;
+  def r64_1:  EQVRegPattern1<R64C>;
+  def r32_1:  EQVRegPattern1<R32C>;
+  def r16_1:  EQVRegPattern1<R16C>;
+  def r8_1:   EQVRegPattern1<R8C>;
+
+  def r128_2: EQVRegPattern2<GPRC>;
+  def r64_2:  EQVRegPattern2<R64C>;
+  def r32_2:  EQVRegPattern2<R32C>;
+  def r16_2:  EQVRegPattern2<R16C>;
+  def r8_2:   EQVRegPattern2<R8C>;
+
+  def r128_3: EQVRegPattern3<GPRC>;
+  def r64_3:  EQVRegPattern3<R64C>;
+  def r32_3:  EQVRegPattern3<R32C>;
+  def r16_3:  EQVRegPattern3<R16C>;
+  def r8_3:   EQVRegPattern3<R8C>;
+}
+
+defm EQV: BitEquivalence;
+
+//===----------------------------------------------------------------------===//
+// Vector shuffle...
+//===----------------------------------------------------------------------===//
+// SPUshuffle is generated in LowerVECTOR_SHUFFLE and gets replaced with SHUFB.
+// See the SPUshuffle SDNode operand above, which sets up the DAG pattern
+// matcher to emit something when the LowerVECTOR_SHUFFLE generates a node with
+// the SPUISD::SHUFB opcode.
+//===----------------------------------------------------------------------===//
+
+class SHUFBInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRRForm<0b1000, OOL, IOL, "shufb\t$rT, $rA, $rB, $rC",
+            ShuffleOp, pattern>;
+
+class SHUFBVecInst<ValueType resultvec, ValueType maskvec>:
+    SHUFBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+              [(set (resultvec VECREG:$rT),
+                    (SPUshuffle (resultvec VECREG:$rA),
+                                (resultvec VECREG:$rB),
+                                (maskvec VECREG:$rC)))]>;
+
+class SHUFBGPRCInst:
+    SHUFBInst<(outs VECREG:$rT), (ins GPRC:$rA, GPRC:$rB, VECREG:$rC),
+              [/* no pattern */]>;
+
+multiclass ShuffleBytes
+{
+  def v16i8     : SHUFBVecInst<v16i8, v16i8>;
+  def v16i8_m32 : SHUFBVecInst<v16i8, v4i32>;
+  def v8i16     : SHUFBVecInst<v8i16, v16i8>;
+  def v8i16_m32 : SHUFBVecInst<v8i16, v4i32>;
+  def v4i32     : SHUFBVecInst<v4i32, v16i8>;
+  def v4i32_m32 : SHUFBVecInst<v4i32, v4i32>;
+  def v2i64     : SHUFBVecInst<v2i64, v16i8>;
+  def v2i64_m32 : SHUFBVecInst<v2i64, v4i32>;
+
+  def v4f32     : SHUFBVecInst<v4f32, v16i8>;
+  def v4f32_m32 : SHUFBVecInst<v4f32, v4i32>;
+
+  def v2f64     : SHUFBVecInst<v2f64, v16i8>;
+  def v2f64_m32 : SHUFBVecInst<v2f64, v4i32>;
+
+  def gprc      : SHUFBGPRCInst;
+}
+
+defm SHUFB : ShuffleBytes;
+
+//===----------------------------------------------------------------------===//
+// Shift and rotate group:
+//===----------------------------------------------------------------------===//
+
+class SHLHInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b11111010000, OOL, IOL, "shlh\t$rT, $rA, $rB",
+           RotShiftVec, pattern>;
+
+class SHLHVecInst<ValueType vectype>:
+    SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [(set (vectype VECREG:$rT),
+                   (SPUvec_shl (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass ShiftLeftHalfword
+{
+  def v8i16: SHLHVecInst<v8i16>;
+  def r16:   SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+                      [(set R16C:$rT, (shl R16C:$rA, R16C:$rB))]>;
+  def r16_r32: SHLHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+                        [(set R16C:$rT, (shl R16C:$rA, R32C:$rB))]>;
+}
+
+defm SHLH : ShiftLeftHalfword;
+
+//===----------------------------------------------------------------------===//
+
+class SHLHIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b11111010000, OOL, IOL, "shlhi\t$rT, $rA, $val",
+            RotShiftVec, pattern>;
+
+class SHLHIVecInst<ValueType vectype>:
+    SHLHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
+              [(set (vectype VECREG:$rT),
+                    (SPUvec_shl (vectype VECREG:$rA), (i16 uimm7:$val)))]>;
+
+multiclass ShiftLeftHalfwordImm
+{
+  def v8i16: SHLHIVecInst<v8i16>;
+  def r16: SHLHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
+                     [(set R16C:$rT, (shl R16C:$rA, (i16 uimm7:$val)))]>;
+}
+
+defm SHLHI : ShiftLeftHalfwordImm;
+
+def : Pat<(SPUvec_shl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
+          (SHLHIv8i16 VECREG:$rA, (TO_IMM16 uimm7:$val))>;
+
+def : Pat<(shl R16C:$rA, (i32 uimm7:$val)),
+          (SHLHIr16 R16C:$rA, (TO_IMM16 uimm7:$val))>;
+
+//===----------------------------------------------------------------------===//
+
+class SHLInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b11111010000, OOL, IOL, "shl\t$rT, $rA, $rB",
+           RotShiftVec, pattern>;
+
+multiclass ShiftLeftWord
+{
+  def v4i32:
+      SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+              [(set (v4i32 VECREG:$rT),
+                    (SPUvec_shl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+  def r32:
+      SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+              [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>;
+}
+
+defm SHL: ShiftLeftWord;
+
+//===----------------------------------------------------------------------===//
+
+class SHLIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b11111010000, OOL, IOL, "shli\t$rT, $rA, $val",
+            RotShiftVec, pattern>;
+
+multiclass ShiftLeftWordImm
+{
+  def v4i32:
+    SHLIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+             [(set (v4i32 VECREG:$rT),
+                   (SPUvec_shl (v4i32 VECREG:$rA), (i32 uimm7:$val)))]>;
+
+  def r32:
+    SHLIInst<(outs R32C:$rT), (ins R32C:$rA, u7imm_i32:$val),
+             [(set R32C:$rT, (shl R32C:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm SHLI : ShiftLeftWordImm;
+
+//===----------------------------------------------------------------------===//
+// SHLQBI vec form: Note that this will shift the entire vector (the 128-bit
+// register) to the left. Vector form is here to ensure type correctness.
+//
+// The shift count is in the lowest 3 bits (29-31) of $rB, so only a bit shift
+// of 7 bits is actually possible.
+//
+// Note also that SHLQBI/SHLQBII are used in conjunction with SHLQBY/SHLQBYI
+// to shift i64 and i128. SHLQBI is the residual left over after shifting by
+// bytes with SHLQBY.
+
+class SHLQBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b11011011100, OOL, IOL, "shlqbi\t$rT, $rA, $rB",
+           RotShiftQuad, pattern>;
+
+class SHLQBIVecInst<ValueType vectype>:
+    SHLQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+               [(set (vectype VECREG:$rT),
+                     (SPUshlquad_l_bits (vectype VECREG:$rA), R32C:$rB))]>;
+
+class SHLQBIRegInst<RegisterClass rclass>:
+    SHLQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+               [/* no pattern */]>;
+
+multiclass ShiftLeftQuadByBits
+{
+  def v16i8: SHLQBIVecInst<v16i8>;
+  def v8i16: SHLQBIVecInst<v8i16>;
+  def v4i32: SHLQBIVecInst<v4i32>;
+  def v4f32: SHLQBIVecInst<v4f32>;
+  def v2i64: SHLQBIVecInst<v2i64>;
+  def v2f64: SHLQBIVecInst<v2f64>;
+
+  def r128:  SHLQBIRegInst<GPRC>;
+}
+
+defm SHLQBI : ShiftLeftQuadByBits;
+
+// See note above on SHLQBI. In this case, the predicate actually does then
+// enforcement, whereas with SHLQBI, we have to "take it on faith."
+class SHLQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b11011111100, OOL, IOL, "shlqbii\t$rT, $rA, $val",
+            RotShiftQuad, pattern>;
+
+class SHLQBIIVecInst<ValueType vectype>:
+    SHLQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+                [(set (vectype VECREG:$rT),
+                      (SPUshlquad_l_bits (vectype VECREG:$rA), (i32 bitshift:$val)))]>;
+
+multiclass ShiftLeftQuadByBitsImm
+{
+  def v16i8 : SHLQBIIVecInst<v16i8>;
+  def v8i16 : SHLQBIIVecInst<v8i16>;
+  def v4i32 : SHLQBIIVecInst<v4i32>;
+  def v4f32 : SHLQBIIVecInst<v4f32>;
+  def v2i64 : SHLQBIIVecInst<v2i64>;
+  def v2f64 : SHLQBIIVecInst<v2f64>;
+}
+
+defm SHLQBII : ShiftLeftQuadByBitsImm;
+
+// SHLQBY, SHLQBYI vector forms: Shift the entire vector to the left by bytes,
+// not by bits. See notes above on SHLQBI.
+
+class SHLQBYInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b11111011100, OOL, IOL, "shlqby\t$rT, $rA, $rB",
+            RotShiftQuad, pattern>;
+
+class SHLQBYVecInst<ValueType vectype>:
+    SHLQBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+               [(set (vectype VECREG:$rT),
+                     (SPUshlquad_l_bytes (vectype VECREG:$rA), R32C:$rB))]>;
+
+multiclass ShiftLeftQuadBytes
+{
+  def v16i8: SHLQBYVecInst<v16i8>;
+  def v8i16: SHLQBYVecInst<v8i16>;
+  def v4i32: SHLQBYVecInst<v4i32>;
+  def v4f32: SHLQBYVecInst<v4f32>;
+  def v2i64: SHLQBYVecInst<v2i64>;
+  def v2f64: SHLQBYVecInst<v2f64>;
+  def r128: SHLQBYInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
+                       [(set GPRC:$rT, (SPUshlquad_l_bytes GPRC:$rA, R32C:$rB))]>;
+}
+
+defm SHLQBY: ShiftLeftQuadBytes;
+
+class SHLQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b11111111100, OOL, IOL, "shlqbyi\t$rT, $rA, $val",
+            RotShiftQuad, pattern>;
+
+class SHLQBYIVecInst<ValueType vectype>:
+    SHLQBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm_i32:$val),
+                [(set (vectype VECREG:$rT),
+                      (SPUshlquad_l_bytes (vectype VECREG:$rA), (i32 uimm7:$val)))]>;
+
+multiclass ShiftLeftQuadBytesImm
+{
+  def v16i8: SHLQBYIVecInst<v16i8>;
+  def v8i16: SHLQBYIVecInst<v8i16>;
+  def v4i32: SHLQBYIVecInst<v4i32>;
+  def v4f32: SHLQBYIVecInst<v4f32>;
+  def v2i64: SHLQBYIVecInst<v2i64>;
+  def v2f64: SHLQBYIVecInst<v2f64>;
+  def r128:  SHLQBYIInst<(outs GPRC:$rT), (ins GPRC:$rA, u7imm_i32:$val),
+                         [(set GPRC:$rT,
+                               (SPUshlquad_l_bytes GPRC:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm SHLQBYI : ShiftLeftQuadBytesImm;
+
+class SHLQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b00111001111, OOL, IOL, "shlqbybi\t$rT, $rA, $rB",
+           RotShiftQuad, pattern>;
+
+class SHLQBYBIVecInst<ValueType vectype>:
+    SHLQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+                [/* no pattern */]>;
+
+class SHLQBYBIRegInst<RegisterClass rclass>:
+    SHLQBYBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+                 [/* no pattern */]>;
+
+multiclass ShiftLeftQuadBytesBitCount
+{
+  def v16i8: SHLQBYBIVecInst<v16i8>;
+  def v8i16: SHLQBYBIVecInst<v8i16>;
+  def v4i32: SHLQBYBIVecInst<v4i32>;
+  def v4f32: SHLQBYBIVecInst<v4f32>;
+  def v2i64: SHLQBYBIVecInst<v2i64>;
+  def v2f64: SHLQBYBIVecInst<v2f64>;
+
+  def r128:  SHLQBYBIRegInst<GPRC>;
+}
+
+defm SHLQBYBI : ShiftLeftQuadBytesBitCount;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate halfword:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class ROTHInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b00111010000, OOL, IOL, "roth\t$rT, $rA, $rB",
+           RotShiftVec, pattern>;
+
+class ROTHVecInst<ValueType vectype>:
+    ROTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [(set (vectype VECREG:$rT),
+                   (SPUvec_rotl VECREG:$rA, (v8i16 VECREG:$rB)))]>;
+
+class ROTHRegInst<RegisterClass rclass>:
+    ROTHInst<(outs rclass:$rT), (ins rclass:$rA, rclass:$rB),
+             [(set rclass:$rT, (rotl rclass:$rA, rclass:$rB))]>;
+
+multiclass RotateLeftHalfword
+{
+  def v8i16: ROTHVecInst<v8i16>;
+  def r16: ROTHRegInst<R16C>;
+}
+
+defm ROTH: RotateLeftHalfword;
+
+def ROTHr16_r32: ROTHInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+                          [(set R16C:$rT, (rotl R16C:$rA, R32C:$rB))]>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate halfword, immediate:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+class ROTHIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b00111110000, OOL, IOL, "rothi\t$rT, $rA, $val",
+            RotShiftVec, pattern>;
+
+class ROTHIVecInst<ValueType vectype>:
+    ROTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, u7imm:$val),
+              [(set (vectype VECREG:$rT),
+                    (SPUvec_rotl VECREG:$rA, (i16 uimm7:$val)))]>;
+
+multiclass RotateLeftHalfwordImm
+{
+  def v8i16: ROTHIVecInst<v8i16>;
+  def r16: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm:$val),
+                     [(set R16C:$rT, (rotl R16C:$rA, (i16 uimm7:$val)))]>;
+  def r16_r32: ROTHIInst<(outs R16C:$rT), (ins R16C:$rA, u7imm_i32:$val),
+                         [(set R16C:$rT, (rotl R16C:$rA, (i32 uimm7:$val)))]>;
+}
+
+defm ROTHI: RotateLeftHalfwordImm;
+
+def : Pat<(SPUvec_rotl (v8i16 VECREG:$rA), (i32 uimm7:$val)),
+          (ROTHIv8i16 VECREG:$rA, (TO_IMM16 imm:$val))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate word:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b00011010000, OOL, IOL, "rot\t$rT, $rA, $rB",
+           RotShiftVec, pattern>;
+
+class ROTVecInst<ValueType vectype>:
+    ROTInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+            [(set (vectype VECREG:$rT),
+                  (SPUvec_rotl (vectype VECREG:$rA), R32C:$rB))]>;
+
+class ROTRegInst<RegisterClass rclass>:
+    ROTInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+            [(set rclass:$rT,
+                  (rotl rclass:$rA, R32C:$rB))]>;
+
+multiclass RotateLeftWord
+{
+  def v4i32: ROTVecInst<v4i32>;
+  def r32:   ROTRegInst<R32C>;
+}
+
+defm ROT: RotateLeftWord;
+
+// The rotate amount is in the same bits whether we've got an 8-bit, 16-bit or
+// 32-bit register
+def ROTr32_r16_anyext:
+    ROTInst<(outs R32C:$rT), (ins R32C:$rA, R16C:$rB),
+            [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R16C:$rB))))]>;
+
+def : Pat<(rotl R32C:$rA, (i32 (zext R16C:$rB))),
+          (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
+
+def : Pat<(rotl R32C:$rA, (i32 (sext R16C:$rB))),
+          (ROTr32_r16_anyext R32C:$rA, R16C:$rB)>;
+
+def ROTr32_r8_anyext:
+    ROTInst<(outs R32C:$rT), (ins R32C:$rA, R8C:$rB),
+            [(set R32C:$rT, (rotl R32C:$rA, (i32 (anyext R8C:$rB))))]>;
+
+def : Pat<(rotl R32C:$rA, (i32 (zext R8C:$rB))),
+          (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
+
+def : Pat<(rotl R32C:$rA, (i32 (sext R8C:$rB))),
+          (ROTr32_r8_anyext R32C:$rA, R8C:$rB)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate word, immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b00011110000, OOL, IOL, "roti\t$rT, $rA, $val",
+            RotShiftVec, pattern>;
+
+class ROTIVecInst<ValueType vectype, Operand optype, ValueType inttype, PatLeaf pred>:
+    ROTIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
+             [(set (vectype VECREG:$rT),
+                   (SPUvec_rotl (vectype VECREG:$rA), (inttype pred:$val)))]>;
+
+class ROTIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>:
+    ROTIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+             [(set rclass:$rT, (rotl rclass:$rA, (inttype pred:$val)))]>;
+
+multiclass RotateLeftWordImm
+{
+  def v4i32: ROTIVecInst<v4i32, u7imm_i32, i32, uimm7>;
+  def v4i32_i16: ROTIVecInst<v4i32, u7imm, i16, uimm7>;
+  def v4i32_i8:  ROTIVecInst<v4i32, u7imm_i8, i8, uimm7>;
+
+  def r32:       ROTIRegInst<R32C, u7imm_i32, i32, uimm7>;
+  def r32_i16:   ROTIRegInst<R32C, u7imm, i16, uimm7>;
+  def r32_i8:    ROTIRegInst<R32C, u7imm_i8, i8, uimm7>;
+}
+
+defm ROTI : RotateLeftWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad by byte (count)
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBYInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b00111011100, OOL, IOL, "rotqby\t$rT, $rA, $rB",
+           RotShiftQuad, pattern>;
+
+class ROTQBYGenInst<ValueType type, RegisterClass rc>:
+    ROTQBYInst<(outs rc:$rT), (ins rc:$rA, R32C:$rB),
+               [(set (type rc:$rT),
+                     (SPUrotbytes_left (type rc:$rA), R32C:$rB))]>;
+
+class ROTQBYVecInst<ValueType type>:
+    ROTQBYGenInst<type, VECREG>;
+
+multiclass RotateQuadLeftByBytes
+{
+  def v16i8: ROTQBYVecInst<v16i8>;
+  def v8i16: ROTQBYVecInst<v8i16>;
+  def v4i32: ROTQBYVecInst<v4i32>;
+  def v4f32: ROTQBYVecInst<v4f32>;
+  def v2i64: ROTQBYVecInst<v2i64>;
+  def v2f64: ROTQBYVecInst<v2f64>;
+  def i128:  ROTQBYGenInst<i128, GPRC>;
+}
+
+defm ROTQBY: RotateQuadLeftByBytes;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad by byte (count), immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b00111111100, OOL, IOL, "rotqbyi\t$rT, $rA, $val",
+            RotShiftQuad, pattern>;
+
+class ROTQBYIGenInst<ValueType type, RegisterClass rclass>:
+    ROTQBYIInst<(outs rclass:$rT), (ins rclass:$rA, u7imm:$val),
+                [(set (type rclass:$rT),
+                      (SPUrotbytes_left (type rclass:$rA), (i16 uimm7:$val)))]>;
+
+class ROTQBYIVecInst<ValueType vectype>:
+    ROTQBYIGenInst<vectype, VECREG>;
+
+multiclass RotateQuadByBytesImm
+{
+  def v16i8: ROTQBYIVecInst<v16i8>;
+  def v8i16: ROTQBYIVecInst<v8i16>;
+  def v4i32: ROTQBYIVecInst<v4i32>;
+  def v4f32: ROTQBYIVecInst<v4f32>;
+  def v2i64: ROTQBYIVecInst<v2i64>;
+  def vfi64: ROTQBYIVecInst<v2f64>;
+  def i128:  ROTQBYIGenInst<i128, GPRC>;
+}
+
+defm ROTQBYI: RotateQuadByBytesImm;
+
+// See ROTQBY note above.
+class ROTQBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b00110011100, OOL, IOL,
+      "rotqbybi\t$rT, $rA, $shift",
+      RotShiftQuad, pattern>;
+
+class ROTQBYBIVecInst<ValueType vectype, RegisterClass rclass>:
+    ROTQBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, rclass:$shift),
+      [(set (vectype VECREG:$rT),
+            (SPUrotbytes_left_bits (vectype VECREG:$rA), rclass:$shift))]>;
+
+multiclass RotateQuadByBytesByBitshift {
+  def v16i8_r32: ROTQBYBIVecInst<v16i8, R32C>;
+  def v8i16_r32: ROTQBYBIVecInst<v8i16, R32C>;
+  def v4i32_r32: ROTQBYBIVecInst<v4i32, R32C>;
+  def v2i64_r32: ROTQBYBIVecInst<v2i64, R32C>;
+}
+
+defm ROTQBYBI : RotateQuadByBytesByBitshift;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// See ROTQBY note above.
+//
+// Assume that the user of this instruction knows to shift the rotate count
+// into bit 29
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b00011011100, OOL, IOL, "rotqbi\t$rT, $rA, $rB",
+           RotShiftQuad, pattern>;
+
+class ROTQBIVecInst<ValueType vectype>:
+    ROTQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+               [/* no pattern yet */]>;
+
+class ROTQBIRegInst<RegisterClass rclass>:
+    ROTQBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+               [/* no pattern yet */]>;
+
+multiclass RotateQuadByBitCount
+{
+  def v16i8: ROTQBIVecInst<v16i8>;
+  def v8i16: ROTQBIVecInst<v8i16>;
+  def v4i32: ROTQBIVecInst<v4i32>;
+  def v2i64: ROTQBIVecInst<v2i64>;
+
+  def r128:  ROTQBIRegInst<GPRC>;
+  def r64:   ROTQBIRegInst<R64C>;
+}
+
+defm ROTQBI: RotateQuadByBitCount;
+
+class ROTQBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b00011111100, OOL, IOL, "rotqbii\t$rT, $rA, $val",
+            RotShiftQuad, pattern>;
+
+class ROTQBIIVecInst<ValueType vectype, Operand optype, ValueType inttype,
+                     PatLeaf pred>:
+    ROTQBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, optype:$val),
+                [/* no pattern yet */]>;
+
+class ROTQBIIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
+                     PatLeaf pred>:
+    ROTQBIIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+                [/* no pattern yet */]>;
+
+multiclass RotateQuadByBitCountImm
+{
+  def v16i8: ROTQBIIVecInst<v16i8, u7imm_i32, i32, uimm7>;
+  def v8i16: ROTQBIIVecInst<v8i16, u7imm_i32, i32, uimm7>;
+  def v4i32: ROTQBIIVecInst<v4i32, u7imm_i32, i32, uimm7>;
+  def v2i64: ROTQBIIVecInst<v2i64, u7imm_i32, i32, uimm7>;
+
+  def r128:  ROTQBIIRegInst<GPRC, u7imm_i32, i32, uimm7>;
+  def r64:   ROTQBIIRegInst<R64C, u7imm_i32, i32, uimm7>;
+}
+
+defm ROTQBII : RotateQuadByBitCountImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// ROTHM v8i16 form:
+// NOTE(1): No vector rotate is generated by the C/C++ frontend (today),
+//          so this only matches a synthetically generated/lowered code
+//          fragment.
+// NOTE(2): $rB must be negated before the right rotate!
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTHMInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10111010000, OOL, IOL, "rothm\t$rT, $rA, $rB",
+           RotShiftVec, pattern>;
+
+def ROTHMv8i16:
+    ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+              [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+          (ROTHMv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
+
+// ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left
+// Note: This instruction doesn't match a pattern because rB must be negated
+// for the instruction to work. Thus, the pattern below the instruction!
+
+def ROTHMr16:
+    ROTHMInst<(outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+              [/* see patterns below - $rB must be negated! */]>;
+
+def : Pat<(srl R16C:$rA, R32C:$rB),
+          (ROTHMr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(srl R16C:$rA, R16C:$rB),
+          (ROTHMr16 R16C:$rA,
+                    (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(srl R16C:$rA, R8C:$rB),
+          (ROTHMr16 R16C:$rA,
+                    (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>;
+
+// ROTHMI v8i16 form: See the comment for ROTHM v8i16. The difference here is
+// that the immediate can be complemented, so that the user doesn't have to
+// worry about it.
+
+class ROTHMIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b10111110000, OOL, IOL, "rothmi\t$rT, $rA, $val",
+            RotShiftVec, pattern>;
+
+def ROTHMIv8i16:
+    ROTHMIInst<(outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
+               [/* no pattern */]>;
+
+def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i32 imm:$val)),
+          (ROTHMIv8i16 VECREG:$rA, imm:$val)>;
+
+def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i16 imm:$val)),
+         (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def: Pat<(SPUvec_srl (v8i16 VECREG:$rA), (i8 imm:$val)),
+         (ROTHMIv8i16 VECREG:$rA, (TO_IMM32 imm:$val))>;
+
+def ROTHMIr16:
+    ROTHMIInst<(outs R16C:$rT), (ins R16C:$rA, rothNeg7imm:$val),
+               [/* no pattern */]>;
+
+def: Pat<(srl R16C:$rA, (i32 uimm7:$val)),
+         (ROTHMIr16 R16C:$rA, uimm7:$val)>;
+
+def: Pat<(srl R16C:$rA, (i16 uimm7:$val)),
+         (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def: Pat<(srl R16C:$rA, (i8 uimm7:$val)),
+         (ROTHMIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+// ROTM v4i32 form: See the ROTHM v8i16 comments.
+class ROTMInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10011010000, OOL, IOL, "rotm\t$rT, $rA, $rB",
+           RotShiftVec, pattern>;
+
+def ROTMv4i32:
+    ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+          (ROTMv4i32 VECREG:$rA, (SFIvec VECREG:$rB, 0))>;
+
+def ROTMr32:
+    ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+             [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(srl R32C:$rA, R32C:$rB),
+          (ROTMr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(srl R32C:$rA, R16C:$rB),
+          (ROTMr32 R32C:$rA,
+                   (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(srl R32C:$rA, R8C:$rB),
+          (ROTMr32 R32C:$rA,
+                   (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+// ROTMI v4i32 form: See the comment for ROTHM v8i16.
+def ROTMIv4i32:
+    RI7Form<0b10011110000, (outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+      "rotmi\t$rT, $rA, $val", RotShiftVec,
+      [(set (v4i32 VECREG:$rT),
+            (SPUvec_srl VECREG:$rA, (i32 uimm7:$val)))]>;
+
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i16 uimm7:$val)),
+          (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (i8 uimm7:$val)),
+          (ROTMIv4i32 VECREG:$rA, (TO_IMM32 uimm7:$val))>;
+
+// ROTMI r32 form: know how to complement the immediate value.
+def ROTMIr32:
+    RI7Form<0b10011110000, (outs R32C:$rT), (ins R32C:$rA, rotNeg7imm:$val),
+      "rotmi\t$rT, $rA, $val", RotShiftVec,
+      [(set R32C:$rT, (srl R32C:$rA, (i32 uimm7:$val)))]>;
+
+def : Pat<(srl R32C:$rA, (i16 imm:$val)),
+          (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(srl R32C:$rA, (i8 imm:$val)),
+          (ROTMIr32 R32C:$rA, (TO_IMM32 uimm7:$val))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// ROTQMBY: This is a vector form merely so that when used in an
+// instruction pattern, type checking will succeed. This instruction assumes
+// that the user knew to negate $rB.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBYInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10111011100, OOL, IOL, "rotqmby\t$rT, $rA, $rB",
+           RotShiftQuad, pattern>;
+
+class ROTQMBYVecInst<ValueType vectype>:
+    ROTQMBYInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+                [/* no pattern, $rB must be negated */]>;
+
+class ROTQMBYRegInst<RegisterClass rclass>:
+    ROTQMBYInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+                [/* no pattern */]>;
+
+multiclass RotateQuadBytes
+{
+  def v16i8: ROTQMBYVecInst<v16i8>;
+  def v8i16: ROTQMBYVecInst<v8i16>;
+  def v4i32: ROTQMBYVecInst<v4i32>;
+  def v2i64: ROTQMBYVecInst<v2i64>;
+
+  def r128: ROTQMBYRegInst<GPRC>;
+  def r64:  ROTQMBYRegInst<R64C>;
+}
+
+defm ROTQMBY : RotateQuadBytes;
+
+def : Pat<(SPUsrl_bytes GPRC:$rA, R32C:$rB),
+          (ROTQMBYr128  GPRC:$rA, 
+                        (SFIr32 R32C:$rB, 0))>;
+
+class ROTQMBYIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b10111111100, OOL, IOL, "rotqmbyi\t$rT, $rA, $val",
+            RotShiftQuad, pattern>;
+
+class ROTQMBYIVecInst<ValueType vectype>:
+    ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+                 [/* no pattern */]>;
+
+class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
+                      PatLeaf pred>:
+    ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
+                 [/* no pattern */]>;
+
+// 128-bit zero extension form:
+class ROTQMBYIZExtInst<RegisterClass rclass, Operand optype, PatLeaf pred>:
+    ROTQMBYIInst<(outs GPRC:$rT), (ins rclass:$rA, optype:$val),
+                 [/* no pattern */]>;
+
+multiclass RotateQuadBytesImm
+{
+  def v16i8: ROTQMBYIVecInst<v16i8>;
+  def v8i16: ROTQMBYIVecInst<v8i16>;
+  def v4i32: ROTQMBYIVecInst<v4i32>;
+  def v2i64: ROTQMBYIVecInst<v2i64>;
+
+  def r128:  ROTQMBYIRegInst<GPRC, rotNeg7imm, i32, uimm7>;
+  def r64:   ROTQMBYIRegInst<R64C, rotNeg7imm, i32, uimm7>;
+  
+  def r128_zext_r8:  ROTQMBYIZExtInst<R8C, rotNeg7imm, uimm7>;
+  def r128_zext_r16: ROTQMBYIZExtInst<R16C, rotNeg7imm, uimm7>;
+  def r128_zext_r32: ROTQMBYIZExtInst<R32C, rotNeg7imm, uimm7>;
+  def r128_zext_r64: ROTQMBYIZExtInst<R64C, rotNeg7imm, uimm7>;
+}
+
+defm ROTQMBYI : RotateQuadBytesImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate right and mask by bit count
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBYBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10110011100, OOL, IOL, "rotqmbybi\t$rT, $rA, $rB",
+           RotShiftQuad, pattern>;
+
+class ROTQMBYBIVecInst<ValueType vectype>:
+    ROTQMBYBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+                  [/* no pattern, */]>;
+
+multiclass RotateMaskQuadByBitCount
+{
+  def v16i8: ROTQMBYBIVecInst<v16i8>;
+  def v8i16: ROTQMBYBIVecInst<v8i16>;
+  def v4i32: ROTQMBYBIVecInst<v4i32>;
+  def v2i64: ROTQMBYBIVecInst<v2i64>;
+  def r128: ROTQMBYBIInst<(outs GPRC:$rT), (ins GPRC:$rA, R32C:$rB),
+                           [/*no pattern*/]>;
+}
+
+defm ROTQMBYBI: RotateMaskQuadByBitCount;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad and mask by bits
+// Note that the rotate amount has to be negated
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b10011011100, OOL, IOL, "rotqmbi\t$rT, $rA, $rB",
+           RotShiftQuad, pattern>;
+
+class ROTQMBIVecInst<ValueType vectype>:
+    ROTQMBIInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB),
+                [/* no pattern */]>;
+
+class ROTQMBIRegInst<RegisterClass rclass>:
+    ROTQMBIInst<(outs rclass:$rT), (ins rclass:$rA, R32C:$rB),
+                [/* no pattern */]>;
+
+multiclass RotateMaskQuadByBits
+{
+  def v16i8: ROTQMBIVecInst<v16i8>;
+  def v8i16: ROTQMBIVecInst<v8i16>;
+  def v4i32: ROTQMBIVecInst<v4i32>;
+  def v2i64: ROTQMBIVecInst<v2i64>;
+
+  def r128:  ROTQMBIRegInst<GPRC>;
+  def r64:   ROTQMBIRegInst<R64C>;
+}
+
+defm ROTQMBI: RotateMaskQuadByBits;
+
+def : Pat<(srl GPRC:$rA, R32C:$rB),
+          (ROTQMBYBIr128 (ROTQMBIr128  GPRC:$rA, 
+                                       (SFIr32 R32C:$rB, 0)),
+                         (SFIr32 R32C:$rB, 0))>;
+
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Rotate quad and mask by bits, immediate
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class ROTQMBIIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RI7Form<0b10011111100, OOL, IOL, "rotqmbii\t$rT, $rA, $val",
+            RotShiftQuad, pattern>;
+
+class ROTQMBIIVecInst<ValueType vectype>:
+   ROTQMBIIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
+                 [/* no pattern */]>;
+
+class ROTQMBIIRegInst<RegisterClass rclass>:
+   ROTQMBIIInst<(outs rclass:$rT), (ins rclass:$rA, rotNeg7imm:$val),
+                 [/* no pattern */]>;
+
+multiclass RotateMaskQuadByBitsImm
+{
+  def v16i8: ROTQMBIIVecInst<v16i8>;
+  def v8i16: ROTQMBIIVecInst<v8i16>;
+  def v4i32: ROTQMBIIVecInst<v4i32>;
+  def v2i64: ROTQMBIIVecInst<v2i64>;
+
+  def r128:  ROTQMBIIRegInst<GPRC>;
+  def r64:   ROTQMBIIRegInst<R64C>;
+}
+
+defm ROTQMBII: RotateMaskQuadByBitsImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def ROTMAHv8i16:
+    RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "rotmah\t$rT, $rA, $rB", RotShiftVec,
+      [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+          (ROTMAHv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>;
+
+def ROTMAHr16:
+    RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB),
+      "rotmah\t$rT, $rA, $rB", RotShiftVec,
+      [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(sra R16C:$rA, R32C:$rB),
+          (ROTMAHr16 R16C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(sra R16C:$rA, R16C:$rB),
+          (ROTMAHr16 R16C:$rA,
+                     (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(sra R16C:$rA, R8C:$rB),
+          (ROTMAHr16 R16C:$rA,
+                     (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+def ROTMAHIv8i16:
+    RRForm<0b01111110000, (outs VECREG:$rT), (ins VECREG:$rA, rothNeg7imm:$val),
+      "rotmahi\t$rT, $rA, $val", RotShiftVec,
+      [(set (v8i16 VECREG:$rT),
+            (SPUvec_sra (v8i16 VECREG:$rA), (i32 uimm7:$val)))]>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i16 uimm7:$val)),
+          (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (i8 uimm7:$val)),
+          (ROTMAHIv8i16 (v8i16 VECREG:$rA), (TO_IMM32 uimm7:$val))>;
+
+def ROTMAHIr16:
+    RRForm<0b01111110000, (outs R16C:$rT), (ins R16C:$rA, rothNeg7imm_i16:$val),
+      "rotmahi\t$rT, $rA, $val", RotShiftVec,
+      [(set R16C:$rT, (sra R16C:$rA, (i16 uimm7:$val)))]>;
+
+def : Pat<(sra R16C:$rA, (i32 imm:$val)),
+          (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def : Pat<(sra R16C:$rA, (i8 imm:$val)),
+          (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>;
+
+def ROTMAv4i32:
+    RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "rotma\t$rT, $rA, $rB", RotShiftVec,
+      [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+          (ROTMAv4i32 VECREG:$rA, (SFIvec (v4i32 VECREG:$rB), 0))>;
+
+def ROTMAr32:
+    RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+      "rotma\t$rT, $rA, $rB", RotShiftVec,
+      [/* see patterns below - $rB must be negated */]>;
+
+def : Pat<(sra R32C:$rA, R32C:$rB),
+          (ROTMAr32 R32C:$rA, (SFIr32 R32C:$rB, 0))>;
+
+def : Pat<(sra R32C:$rA, R16C:$rB),
+          (ROTMAr32 R32C:$rA,
+                    (SFIr32 (XSHWr16 R16C:$rB), 0))>;
+
+def : Pat<(sra R32C:$rA, R8C:$rB),
+          (ROTMAr32 R32C:$rA,
+                    (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>;
+
+class ROTMAIInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b01011110000, OOL, IOL,
+      "rotmai\t$rT, $rA, $val",
+      RotShiftVec, pattern>;
+
+class ROTMAIVecInst<ValueType vectype, Operand intop, ValueType inttype>:
+    ROTMAIInst<(outs VECREG:$rT), (ins VECREG:$rA, intop:$val),
+      [(set (vectype VECREG:$rT),
+            (SPUvec_sra VECREG:$rA, (inttype uimm7:$val)))]>;
+
+class ROTMAIRegInst<RegisterClass rclass, Operand intop, ValueType inttype>:
+    ROTMAIInst<(outs rclass:$rT), (ins rclass:$rA, intop:$val),
+      [(set rclass:$rT, (sra rclass:$rA, (inttype uimm7:$val)))]>;
+
+multiclass RotateMaskAlgebraicImm {
+  def v2i64_i32 : ROTMAIVecInst<v2i64, rotNeg7imm, i32>;
+  def v4i32_i32 : ROTMAIVecInst<v4i32, rotNeg7imm, i32>;
+  def r64_i32 : ROTMAIRegInst<R64C, rotNeg7imm, i32>;
+  def r32_i32 : ROTMAIRegInst<R32C, rotNeg7imm, i32>;
+}
+
+defm ROTMAI : RotateMaskAlgebraicImm;
+
+//===----------------------------------------------------------------------===//
+// Branch and conditionals:
+//===----------------------------------------------------------------------===//
+
+let isTerminator = 1, isBarrier = 1 in {
+  // Halt If Equal (r32 preferred slot only, no vector form)
+  def HEQr32:
+    RRForm_3<0b00011011110, (outs), (ins R32C:$rA, R32C:$rB),
+      "heq\t$rA, $rB", BranchResolv,
+      [/* no pattern to match */]>;
+
+  def HEQIr32 :
+    RI10Form_2<0b11111110, (outs), (ins R32C:$rA, s10imm:$val),
+      "heqi\t$rA, $val", BranchResolv,
+      [/* no pattern to match */]>;
+
+  // HGT/HGTI: These instructions use signed arithmetic for the comparison,
+  // contrasting with HLGT/HLGTI, which use unsigned comparison:
+  def HGTr32:
+    RRForm_3<0b00011010010, (outs), (ins R32C:$rA, R32C:$rB),
+      "hgt\t$rA, $rB", BranchResolv,
+      [/* no pattern to match */]>;
+
+  def HGTIr32:
+    RI10Form_2<0b11110010, (outs), (ins R32C:$rA, s10imm:$val),
+      "hgti\t$rA, $val", BranchResolv,
+      [/* no pattern to match */]>;
+
+  def HLGTr32:
+    RRForm_3<0b00011011010, (outs), (ins R32C:$rA, R32C:$rB),
+      "hlgt\t$rA, $rB", BranchResolv,
+      [/* no pattern to match */]>;
+
+  def HLGTIr32:
+    RI10Form_2<0b11111010, (outs), (ins R32C:$rA, s10imm:$val),
+      "hlgti\t$rA, $val", BranchResolv,
+      [/* no pattern to match */]>;
+}
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// Comparison operators for i8, i16 and i32:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class CEQBInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00001011110, OOL, IOL, "ceqb\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpEqualByte
+{
+  def v16i8 :
+    CEQBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      [(set (v16i8 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
+                                       (v8i16 VECREG:$rB)))]>;
+
+  def r8 :
+    CEQBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+             [(set R8C:$rT, (seteq R8C:$rA, R8C:$rB))]>;
+}
+
+class CEQBIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b01111110, OOL, IOL, "ceqbi\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpEqualByteImm
+{
+  def v16i8 :
+    CEQBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+              [(set (v16i8 VECREG:$rT), (seteq (v16i8 VECREG:$rA),
+                                               v16i8SExt8Imm:$val))]>;
+  def r8:
+    CEQBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+             [(set R8C:$rT, (seteq R8C:$rA, immSExt8:$val))]>;
+}
+
+class CEQHInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00010011110, OOL, IOL, "ceqh\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpEqualHalfword
+{
+  def v8i16 : CEQHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                       [(set (v8i16 VECREG:$rT), (seteq (v8i16 VECREG:$rA),
+                                                        (v8i16 VECREG:$rB)))]>;
+
+  def r16 : CEQHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+                     [(set R16C:$rT, (seteq R16C:$rA, R16C:$rB))]>;
+}
+
+class CEQHIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b10111110, OOL, IOL, "ceqhi\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpEqualHalfwordImm
+{
+  def v8i16 : CEQHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                        [(set (v8i16 VECREG:$rT),
+                              (seteq (v8i16 VECREG:$rA),
+                                     (v8i16 v8i16SExt10Imm:$val)))]>;
+  def r16 : CEQHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+                      [(set R16C:$rT, (seteq R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CEQInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00000011110, OOL, IOL, "ceq\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpEqualWord
+{
+  def v4i32 : CEQInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                      [(set (v4i32 VECREG:$rT),
+                            (seteq (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+  def r32 : CEQInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+                    [(set R32C:$rT, (seteq R32C:$rA, R32C:$rB))]>;
+}
+
+class CEQIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b00111110, OOL, IOL, "ceqi\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpEqualWordImm
+{
+  def v4i32 : CEQIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                       [(set (v4i32 VECREG:$rT),
+                             (seteq (v4i32 VECREG:$rA),
+                                    (v4i32 v4i32SExt16Imm:$val)))]>;
+
+  def r32: CEQIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+                    [(set R32C:$rT, (seteq R32C:$rA, i32ImmSExt10:$val))]>;
+}
+
+class CGTBInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00001010010, OOL, IOL, "cgtb\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpGtrByte
+{
+  def v16i8 :
+    CGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      [(set (v16i8 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
+                                       (v8i16 VECREG:$rB)))]>;
+
+  def r8 :
+    CGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+             [(set R8C:$rT, (setgt R8C:$rA, R8C:$rB))]>;
+}
+
+class CGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b01110010, OOL, IOL, "cgtbi\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpGtrByteImm
+{
+  def v16i8 :
+    CGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+              [(set (v16i8 VECREG:$rT), (setgt (v16i8 VECREG:$rA),
+                                               v16i8SExt8Imm:$val))]>;
+  def r8:
+    CGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+              [(set R8C:$rT, (setgt R8C:$rA, immSExt8:$val))]>;
+}
+
+class CGTHInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00010010010, OOL, IOL, "cgth\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpGtrHalfword
+{
+  def v8i16 : CGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                       [(set (v8i16 VECREG:$rT), (setgt (v8i16 VECREG:$rA),
+                                                        (v8i16 VECREG:$rB)))]>;
+
+  def r16 : CGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+                     [(set R16C:$rT, (setgt R16C:$rA, R16C:$rB))]>;
+}
+
+class CGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b10110010, OOL, IOL, "cgthi\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpGtrHalfwordImm
+{
+  def v8i16 : CGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                        [(set (v8i16 VECREG:$rT),
+                              (setgt (v8i16 VECREG:$rA),
+                                     (v8i16 v8i16SExt10Imm:$val)))]>;
+  def r16 : CGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+                      [(set R16C:$rT, (setgt R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CGTInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00000010010, OOL, IOL, "cgt\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpGtrWord
+{
+  def v4i32 : CGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                      [(set (v4i32 VECREG:$rT),
+                            (setgt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+  def r32 : CGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+                    [(set R32C:$rT, (setgt R32C:$rA, R32C:$rB))]>;
+}
+
+class CGTIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b00110010, OOL, IOL, "cgti\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpGtrWordImm
+{
+  def v4i32 : CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                       [(set (v4i32 VECREG:$rT),
+                             (setgt (v4i32 VECREG:$rA),
+                                    (v4i32 v4i32SExt16Imm:$val)))]>;
+
+  def r32: CGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+                    [(set R32C:$rT, (setgt R32C:$rA, i32ImmSExt10:$val))]>;
+
+  // CGTIv4f32, CGTIf32: These are used in the f32 fdiv instruction sequence:
+  def v4f32: CGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                       [(set (v4i32 VECREG:$rT),
+                             (setgt (v4i32 (bitconvert (v4f32 VECREG:$rA))),
+                                    (v4i32 v4i32SExt16Imm:$val)))]>;
+
+  def f32:   CGTIInst<(outs R32C:$rT), (ins R32FP:$rA, s10imm_i32:$val),
+                      [/* no pattern */]>;
+}
+
+class CLGTBInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00001011010, OOL, IOL, "clgtb\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpLGtrByte
+{
+  def v16i8 :
+    CLGTBInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      [(set (v16i8 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
+                                       (v8i16 VECREG:$rB)))]>;
+
+  def r8 :
+    CLGTBInst<(outs R8C:$rT), (ins R8C:$rA, R8C:$rB),
+             [(set R8C:$rT, (setugt R8C:$rA, R8C:$rB))]>;
+}
+
+class CLGTBIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b01111010, OOL, IOL, "clgtbi\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpLGtrByteImm
+{
+  def v16i8 :
+    CLGTBIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm_i8:$val),
+              [(set (v16i8 VECREG:$rT), (setugt (v16i8 VECREG:$rA),
+                                               v16i8SExt8Imm:$val))]>;
+  def r8:
+    CLGTBIInst<(outs R8C:$rT), (ins R8C:$rA, s10imm_i8:$val),
+             [(set R8C:$rT, (setugt R8C:$rA, immSExt8:$val))]>;
+}
+
+class CLGTHInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00010011010, OOL, IOL, "clgth\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpLGtrHalfword
+{
+  def v8i16 : CLGTHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                       [(set (v8i16 VECREG:$rT), (setugt (v8i16 VECREG:$rA),
+                                                        (v8i16 VECREG:$rB)))]>;
+
+  def r16 : CLGTHInst<(outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
+                     [(set R16C:$rT, (setugt R16C:$rA, R16C:$rB))]>;
+}
+
+class CLGTHIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b10111010, OOL, IOL, "clgthi\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpLGtrHalfwordImm
+{
+  def v8i16 : CLGTHIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                         [(set (v8i16 VECREG:$rT),
+                               (setugt (v8i16 VECREG:$rA),
+                                       (v8i16 v8i16SExt10Imm:$val)))]>;
+  def r16 : CLGTHIInst<(outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
+                       [(set R16C:$rT, (setugt R16C:$rA, i16ImmSExt10:$val))]>;
+}
+
+class CLGTInst<dag OOL, dag IOL, list<dag> pattern> :
+  RRForm<0b00000011010, OOL, IOL, "clgt\t$rT, $rA, $rB",
+         ByteOp, pattern>;
+
+multiclass CmpLGtrWord
+{
+  def v4i32 : CLGTInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+                      [(set (v4i32 VECREG:$rT),
+                            (setugt (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+
+  def r32 : CLGTInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
+                     [(set R32C:$rT, (setugt R32C:$rA, R32C:$rB))]>;
+}
+
+class CLGTIInst<dag OOL, dag IOL, list<dag> pattern> :
+  RI10Form<0b00111010, OOL, IOL, "clgti\t$rT, $rA, $val",
+           ByteOp, pattern>;
+
+multiclass CmpLGtrWordImm
+{
+  def v4i32 : CLGTIInst<(outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
+                       [(set (v4i32 VECREG:$rT),
+                             (setugt (v4i32 VECREG:$rA),
+                                    (v4i32 v4i32SExt16Imm:$val)))]>;
+
+  def r32: CLGTIInst<(outs R32C:$rT), (ins R32C:$rA, s10imm_i32:$val),
+                     [(set R32C:$rT, (setugt R32C:$rA, i32ImmSExt10:$val))]>;
+}
+
+defm CEQB   : CmpEqualByte;
+defm CEQBI  : CmpEqualByteImm;
+defm CEQH   : CmpEqualHalfword;
+defm CEQHI  : CmpEqualHalfwordImm;
+defm CEQ    : CmpEqualWord;
+defm CEQI   : CmpEqualWordImm;
+defm CGTB   : CmpGtrByte;
+defm CGTBI  : CmpGtrByteImm;
+defm CGTH   : CmpGtrHalfword;
+defm CGTHI  : CmpGtrHalfwordImm;
+defm CGT    : CmpGtrWord;
+defm CGTI   : CmpGtrWordImm;
+defm CLGTB  : CmpLGtrByte;
+defm CLGTBI : CmpLGtrByteImm;
+defm CLGTH  : CmpLGtrHalfword;
+defm CLGTHI : CmpLGtrHalfwordImm;
+defm CLGT   : CmpLGtrWord;
+defm CLGTI  : CmpLGtrWordImm;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// For SETCC primitives not supported above (setlt, setle, setge, etc.)
+// define a pattern to generate the right code, as a binary operator
+// (in a manner of speaking.)
+//
+// Notes:
+// 1. This only matches the setcc set of conditionals. Special pattern
+//    matching is used for select conditionals.
+//
+// 2. The "DAG" versions of these classes is almost exclusively used for
+//    i64 comparisons. See the tblgen fundamentals documentation for what
+//    ".ResultInstrs[0]" means; see TargetSelectionDAG.td and the Pattern
+//    class for where ResultInstrs originates.
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class SETCCNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+                      SPUInstr xorinst, SPUInstr cmpare>:
+  Pat<(cond rclass:$rA, rclass:$rB),
+      (xorinst (cmpare rclass:$rA, rclass:$rB), (inttype -1))>;
+
+class SETCCNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
+                      PatLeaf immpred, SPUInstr xorinst, SPUInstr cmpare>:
+  Pat<(cond rclass:$rA, (inttype immpred:$imm)),
+      (xorinst (cmpare rclass:$rA, (inttype immpred:$imm)), (inttype -1))>;
+
+def : SETCCNegCondReg<setne, R8C, i8, XORBIr8,  CEQBr8>;
+def : SETCCNegCondImm<setne, R8C, i8, immSExt8, XORBIr8, CEQBIr8>;
+
+def : SETCCNegCondReg<setne, R16C, i16, XORHIr16,     CEQHr16>;
+def : SETCCNegCondImm<setne, R16C, i16, i16ImmSExt10, XORHIr16, CEQHIr16>;
+
+def : SETCCNegCondReg<setne, R32C, i32, XORIr32, CEQr32>;
+def : SETCCNegCondImm<setne, R32C, i32, i32ImmSExt10, XORIr32, CEQIr32>;
+
+class SETCCBinOpReg<PatFrag cond, RegisterClass rclass,
+                    SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
+    Pat<(cond rclass:$rA, rclass:$rB),
+        (binop (cmpOp1 rclass:$rA, rclass:$rB),
+               (cmpOp2 rclass:$rA, rclass:$rB))>;
+
+class SETCCBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
+                    ValueType immtype,
+                    SPUInstr binop, SPUInstr cmpOp1, SPUInstr cmpOp2>:
+    Pat<(cond rclass:$rA, (immtype immpred:$imm)),
+        (binop (cmpOp1 rclass:$rA, (immtype immpred:$imm)),
+               (cmpOp2 rclass:$rA, (immtype immpred:$imm)))>;
+
+def : SETCCBinOpReg<setge, R8C, ORr8, CGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setge, R8C, immSExt8, i8, ORr8, CGTBIr8, CEQBIr8>;
+def : SETCCBinOpReg<setlt, R8C, NORr8, CGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setlt, R8C, immSExt8, i8, NORr8, CGTBIr8, CEQBIr8>;
+def : Pat<(setle R8C:$rA, R8C:$rB),
+          (XORBIr8 (CGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
+def :  Pat<(setle R8C:$rA, immU8:$imm),
+           (XORBIr8 (CGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
+
+def : SETCCBinOpReg<setge, R16C, ORr16, CGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setge, R16C, i16ImmSExt10, i16,
+                    ORr16, CGTHIr16, CEQHIr16>;
+def : SETCCBinOpReg<setlt, R16C, NORr16, CGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setlt, R16C, i16ImmSExt10, i16, NORr16, CGTHIr16, CEQHIr16>;
+def : Pat<(setle R16C:$rA, R16C:$rB),
+          (XORHIr16 (CGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
+def : Pat<(setle R16C:$rA, i16ImmSExt10:$imm),
+          (XORHIr16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
+
+def : SETCCBinOpReg<setge, R32C, ORr32, CGTr32, CEQr32>;
+def : SETCCBinOpImm<setge, R32C, i32ImmSExt10, i32,
+                    ORr32, CGTIr32, CEQIr32>;
+def : SETCCBinOpReg<setlt, R32C, NORr32, CGTr32, CEQr32>;
+def : SETCCBinOpImm<setlt, R32C, i32ImmSExt10, i32, NORr32, CGTIr32, CEQIr32>;
+def : Pat<(setle R32C:$rA, R32C:$rB),
+          (XORIr32 (CGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
+def : Pat<(setle R32C:$rA, i32ImmSExt10:$imm),
+          (XORIr32 (CGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
+
+def : SETCCBinOpReg<setuge, R8C, ORr8, CLGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setuge, R8C, immSExt8, i8, ORr8, CLGTBIr8, CEQBIr8>;
+def : SETCCBinOpReg<setult, R8C, NORr8, CLGTBr8, CEQBr8>;
+def : SETCCBinOpImm<setult, R8C, immSExt8, i8, NORr8, CLGTBIr8, CEQBIr8>;
+def : Pat<(setule R8C:$rA, R8C:$rB),
+          (XORBIr8 (CLGTBr8 R8C:$rA, R8C:$rB), 0xff)>;
+def :  Pat<(setule R8C:$rA, immU8:$imm),
+           (XORBIr8 (CLGTBIr8 R8C:$rA, immU8:$imm), 0xff)>;
+
+def : SETCCBinOpReg<setuge, R16C, ORr16, CLGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setuge, R16C, i16ImmSExt10, i16,
+                    ORr16, CLGTHIr16, CEQHIr16>;
+def : SETCCBinOpReg<setult, R16C, NORr16, CLGTHr16, CEQHr16>;
+def : SETCCBinOpImm<setult, R16C, i16ImmSExt10, i16, NORr16,
+                    CLGTHIr16, CEQHIr16>;
+def : Pat<(setule R16C:$rA, R16C:$rB),
+          (XORHIr16 (CLGTHr16 R16C:$rA, R16C:$rB), 0xffff)>;
+def :  Pat<(setule R16C:$rA, i16ImmSExt10:$imm),
+           (XORHIr16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$imm), 0xffff)>;
+
+def : SETCCBinOpReg<setuge, R32C, ORr32, CLGTr32, CEQr32>;
+def : SETCCBinOpImm<setuge, R32C, i32ImmSExt10, i32,
+                    ORr32, CLGTIr32, CEQIr32>;
+def : SETCCBinOpReg<setult, R32C, NORr32, CLGTr32, CEQr32>;
+def : SETCCBinOpImm<setult, R32C, i32ImmSExt10, i32, NORr32, CLGTIr32, CEQIr32>;
+def : Pat<(setule R32C:$rA, R32C:$rB),
+          (XORIr32 (CLGTr32 R32C:$rA, R32C:$rB), 0xffffffff)>;
+def : Pat<(setule R32C:$rA, i32ImmSExt10:$imm),
+          (XORIr32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$imm), 0xffffffff)>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// select conditional patterns:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+class SELECTNegCondReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+                       SPUInstr selinstr, SPUInstr cmpare>:
+  Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
+              rclass:$rTrue, rclass:$rFalse),
+      (selinstr rclass:$rTrue, rclass:$rFalse,
+                (cmpare rclass:$rA, rclass:$rB))>;
+
+class SELECTNegCondImm<PatFrag cond, RegisterClass rclass, ValueType inttype,
+                       PatLeaf immpred, SPUInstr selinstr, SPUInstr cmpare>:
+  Pat<(select (inttype (cond rclass:$rA, immpred:$imm)),
+              rclass:$rTrue, rclass:$rFalse),
+      (selinstr rclass:$rTrue, rclass:$rFalse,
+                (cmpare rclass:$rA, immpred:$imm))>;
+
+def : SELECTNegCondReg<setne, R8C, i8, SELBr8, CEQBr8>;
+def : SELECTNegCondImm<setne, R8C, i8, immSExt8, SELBr8, CEQBIr8>;
+def : SELECTNegCondReg<setle, R8C, i8, SELBr8, CGTBr8>;
+def : SELECTNegCondImm<setle, R8C, i8, immSExt8, SELBr8, CGTBr8>;
+def : SELECTNegCondReg<setule, R8C, i8, SELBr8, CLGTBr8>;
+def : SELECTNegCondImm<setule, R8C, i8, immU8, SELBr8, CLGTBIr8>;
+
+def : SELECTNegCondReg<setne, R16C, i16, SELBr16, CEQHr16>;
+def : SELECTNegCondImm<setne, R16C, i16, i16ImmSExt10, SELBr16, CEQHIr16>;
+def : SELECTNegCondReg<setle, R16C, i16, SELBr16, CGTHr16>;
+def : SELECTNegCondImm<setle, R16C, i16, i16ImmSExt10, SELBr16, CGTHIr16>;
+def : SELECTNegCondReg<setule, R16C, i16, SELBr16, CLGTHr16>;
+def : SELECTNegCondImm<setule, R16C, i16, i16ImmSExt10, SELBr16, CLGTHIr16>;
+
+def : SELECTNegCondReg<setne, R32C, i32, SELBr32, CEQr32>;
+def : SELECTNegCondImm<setne, R32C, i32, i32ImmSExt10, SELBr32, CEQIr32>;
+def : SELECTNegCondReg<setle, R32C, i32, SELBr32, CGTr32>;
+def : SELECTNegCondImm<setle, R32C, i32, i32ImmSExt10, SELBr32, CGTIr32>;
+def : SELECTNegCondReg<setule, R32C, i32, SELBr32, CLGTr32>;
+def : SELECTNegCondImm<setule, R32C, i32, i32ImmSExt10, SELBr32, CLGTIr32>;
+
+class SELECTBinOpReg<PatFrag cond, RegisterClass rclass, ValueType inttype,
+                     SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
+                     SPUInstr cmpOp2>:
+  Pat<(select (inttype (cond rclass:$rA, rclass:$rB)),
+              rclass:$rTrue, rclass:$rFalse),
+      (selinstr rclass:$rFalse, rclass:$rTrue,
+                (binop (cmpOp1 rclass:$rA, rclass:$rB),
+                       (cmpOp2 rclass:$rA, rclass:$rB)))>;
+
+class SELECTBinOpImm<PatFrag cond, RegisterClass rclass, PatLeaf immpred,
+                     ValueType inttype,
+                     SPUInstr selinstr, SPUInstr binop, SPUInstr cmpOp1,
+                     SPUInstr cmpOp2>:
+    Pat<(select (inttype (cond rclass:$rA, (inttype immpred:$imm))),
+                rclass:$rTrue, rclass:$rFalse),
+        (selinstr rclass:$rFalse, rclass:$rTrue,
+                  (binop (cmpOp1 rclass:$rA, (inttype immpred:$imm)),
+                         (cmpOp2 rclass:$rA, (inttype immpred:$imm))))>;
+
+def : SELECTBinOpReg<setge, R8C, i8, SELBr8, ORr8, CGTBr8, CEQBr8>;
+def : SELECTBinOpImm<setge, R8C, immSExt8, i8,
+                     SELBr8, ORr8, CGTBIr8, CEQBIr8>;
+
+def : SELECTBinOpReg<setge, R16C, i16, SELBr16, ORr16, CGTHr16, CEQHr16>;
+def : SELECTBinOpImm<setge, R16C, i16ImmSExt10, i16,
+                     SELBr16, ORr16, CGTHIr16, CEQHIr16>;
+
+def : SELECTBinOpReg<setge, R32C, i32, SELBr32, ORr32, CGTr32, CEQr32>;
+def : SELECTBinOpImm<setge, R32C, i32ImmSExt10, i32,
+                     SELBr32, ORr32, CGTIr32, CEQIr32>;
+
+def : SELECTBinOpReg<setuge, R8C, i8, SELBr8, ORr8, CLGTBr8, CEQBr8>;
+def : SELECTBinOpImm<setuge, R8C, immSExt8, i8,
+                     SELBr8, ORr8, CLGTBIr8, CEQBIr8>;
+
+def : SELECTBinOpReg<setuge, R16C, i16, SELBr16, ORr16, CLGTHr16, CEQHr16>;
+def : SELECTBinOpImm<setuge, R16C, i16ImmUns10, i16,
+                     SELBr16, ORr16, CLGTHIr16, CEQHIr16>;
+
+def : SELECTBinOpReg<setuge, R32C, i32, SELBr32, ORr32, CLGTr32, CEQr32>;
+def : SELECTBinOpImm<setuge, R32C, i32ImmUns10, i32,
+                     SELBr32, ORr32, CLGTIr32, CEQIr32>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+let isCall = 1,
+  // All calls clobber the non-callee-saved registers:
+  Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9,
+          R10,R11,R12,R13,R14,R15,R16,R17,R18,R19,
+          R20,R21,R22,R23,R24,R25,R26,R27,R28,R29,
+          R30,R31,R32,R33,R34,R35,R36,R37,R38,R39,
+          R40,R41,R42,R43,R44,R45,R46,R47,R48,R49,
+          R50,R51,R52,R53,R54,R55,R56,R57,R58,R59,
+          R60,R61,R62,R63,R64,R65,R66,R67,R68,R69,
+          R70,R71,R72,R73,R74,R75,R76,R77,R78,R79],
+  // All of these instructions use $lr (aka $0)
+  Uses = [R0]  in {
+  // Branch relative and set link: Used if we actually know that the target
+  // is within [-32768, 32767] bytes of the target
+  def BRSL:
+    BranchSetLink<0b011001100, (outs), (ins relcalltarget:$func, variable_ops),
+      "brsl\t$$lr, $func",
+      [(SPUcall (SPUpcrel tglobaladdr:$func, 0))]>;
+
+  // Branch absolute and set link: Used if we actually know that the target
+  // is an absolute address
+  def BRASL:
+    BranchSetLink<0b011001100, (outs), (ins calltarget:$func, variable_ops),
+      "brasl\t$$lr, $func",
+      [(SPUcall (SPUaform tglobaladdr:$func, 0))]>;
+
+  // Branch indirect and set link if external data. These instructions are not
+  // actually generated, matched by an intrinsic:
+  def BISLED_00: BISLEDForm<0b11, "bisled\t$$lr, $func", [/* empty pattern */]>;
+  def BISLED_E0: BISLEDForm<0b10, "bisled\t$$lr, $func", [/* empty pattern */]>;
+  def BISLED_0D: BISLEDForm<0b01, "bisled\t$$lr, $func", [/* empty pattern */]>;
+  def BISLED_ED: BISLEDForm<0b00, "bisled\t$$lr, $func", [/* empty pattern */]>;
+
+  // Branch indirect and set link. This is the "X-form" address version of a
+  // function call
+  def BISL:
+    BIForm<0b10010101100, "bisl\t$$lr, $func", [(SPUcall R32C:$func)]>;
+}
+
+// Support calls to external symbols:      
+def : Pat<(SPUcall (SPUpcrel texternalsym:$func, 0)),
+          (BRSL texternalsym:$func)>;
+      
+def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)),
+          (BRASL texternalsym:$func)>;
+
+// Unconditional branches:
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+  let isBarrier = 1 in {
+    def BR :
+      UncondBranch<0b001001100, (outs), (ins brtarget:$dest),
+        "br\t$dest",
+        [(br bb:$dest)]>;
+
+    // Unconditional, absolute address branch
+    def BRA:
+      UncondBranch<0b001100000, (outs), (ins brtarget:$dest),
+        "bra\t$dest",
+        [/* no pattern */]>;
+
+    // Indirect branch
+    def BI:
+      BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+  }
+
+  // Conditional branches:
+  class BRNZInst<dag IOL, list<dag> pattern>:
+    RI16Form<0b010000100, (outs), IOL, "brnz\t$rCond,$dest",
+             BranchResolv, pattern>;
+
+  class BRNZRegInst<RegisterClass rclass>:
+    BRNZInst<(ins rclass:$rCond, brtarget:$dest),
+             [(brcond rclass:$rCond, bb:$dest)]>;
+
+  class BRNZVecInst<ValueType vectype>:
+    BRNZInst<(ins VECREG:$rCond, brtarget:$dest),
+             [(brcond (vectype VECREG:$rCond), bb:$dest)]>;
+
+  multiclass BranchNotZero {
+    def v4i32 : BRNZVecInst<v4i32>;
+    def r32   : BRNZRegInst<R32C>;
+  }
+
+  defm BRNZ : BranchNotZero;
+
+  class BRZInst<dag IOL, list<dag> pattern>:
+    RI16Form<0b000000100, (outs), IOL, "brz\t$rT,$dest",
+             BranchResolv, pattern>;
+
+  class BRZRegInst<RegisterClass rclass>:
+    BRZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
+
+  class BRZVecInst<ValueType vectype>:
+    BRZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
+
+  multiclass BranchZero {
+    def v4i32: BRZVecInst<v4i32>;
+    def r32:   BRZRegInst<R32C>;
+  }
+
+  defm BRZ: BranchZero;
+
+  // Note: LLVM doesn't do branch conditional, indirect. Otherwise these would
+  // be useful:
+  /*
+  class BINZInst<dag IOL, list<dag> pattern>:
+   BICondForm<0b10010100100, (outs), IOL, "binz\t$rA, $dest", pattern>;
+
+  class BINZRegInst<RegisterClass rclass>:
+    BINZInst<(ins rclass:$rA, brtarget:$dest),
+             [(brcond rclass:$rA, R32C:$dest)]>;
+
+  class BINZVecInst<ValueType vectype>:
+    BINZInst<(ins VECREG:$rA, R32C:$dest),
+             [(brcond (vectype VECREG:$rA), R32C:$dest)]>;
+
+  multiclass BranchNotZeroIndirect {
+    def v4i32: BINZVecInst<v4i32>;
+    def r32:   BINZRegInst<R32C>;
+  }
+
+  defm BINZ: BranchNotZeroIndirect;
+
+  class BIZInst<dag IOL, list<dag> pattern>:
+    BICondForm<0b00010100100, (outs), IOL, "biz\t$rA, $func", pattern>;
+
+  class BIZRegInst<RegisterClass rclass>:
+    BIZInst<(ins rclass:$rA, R32C:$func), [/* no pattern */]>;
+
+  class BIZVecInst<ValueType vectype>:
+    BIZInst<(ins VECREG:$rA, R32C:$func), [/* no pattern */]>;
+
+  multiclass BranchZeroIndirect {
+    def v4i32: BIZVecInst<v4i32>;
+    def r32:   BIZRegInst<R32C>;
+  }
+
+  defm BIZ: BranchZeroIndirect;
+  */
+
+  class BRHNZInst<dag IOL, list<dag> pattern>:
+    RI16Form<0b011000100, (outs), IOL, "brhnz\t$rCond,$dest", BranchResolv,
+             pattern>;
+
+  class BRHNZRegInst<RegisterClass rclass>:
+    BRHNZInst<(ins rclass:$rCond, brtarget:$dest),
+              [(brcond rclass:$rCond, bb:$dest)]>;
+
+  class BRHNZVecInst<ValueType vectype>:
+    BRHNZInst<(ins VECREG:$rCond, brtarget:$dest), [/* no pattern */]>;
+
+  multiclass BranchNotZeroHalfword {
+    def v8i16: BRHNZVecInst<v8i16>;
+    def r16:   BRHNZRegInst<R16C>;
+  }
+
+  defm BRHNZ: BranchNotZeroHalfword;
+
+  class BRHZInst<dag IOL, list<dag> pattern>:
+    RI16Form<0b001000100, (outs), IOL, "brhz\t$rT,$dest", BranchResolv,
+             pattern>;
+
+  class BRHZRegInst<RegisterClass rclass>:
+    BRHZInst<(ins rclass:$rT, brtarget:$dest), [/* no pattern */]>;
+
+  class BRHZVecInst<ValueType vectype>:
+    BRHZInst<(ins VECREG:$rT, brtarget:$dest), [/* no pattern */]>;
+
+  multiclass BranchZeroHalfword {
+    def v8i16: BRHZVecInst<v8i16>;
+    def r16:   BRHZRegInst<R16C>;
+  }
+
+  defm BRHZ: BranchZeroHalfword;
+}
+
+//===----------------------------------------------------------------------===//
+// setcc and brcond patterns:
+//===----------------------------------------------------------------------===//
+
+def : Pat<(brcond (i16 (seteq R16C:$rA, 0)), bb:$dest),
+          (BRHZr16 R16C:$rA, bb:$dest)>;
+def : Pat<(brcond (i16 (setne R16C:$rA, 0)), bb:$dest),
+          (BRHNZr16 R16C:$rA, bb:$dest)>;
+
+def : Pat<(brcond (i32 (seteq R32C:$rA, 0)), bb:$dest),
+          (BRZr32 R32C:$rA, bb:$dest)>;
+def : Pat<(brcond (i32 (setne R32C:$rA, 0)), bb:$dest),
+          (BRNZr32 R32C:$rA, bb:$dest)>;
+
+multiclass BranchCondEQ<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+  def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+                  (brinst16 (CEQHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+  def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+                (brinst16 (CEQHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+  def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+                   (brinst32 (CEQIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+  def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+                (brinst32 (CEQr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDeq : BranchCondEQ<seteq, BRHNZr16, BRNZr32>;
+defm BRCONDne : BranchCondEQ<setne, BRHZr16, BRZr32>;
+
+multiclass BranchCondLGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+  def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+                   (brinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+  def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+                (brinst16 (CLGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+  def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+                   (brinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+  def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+                (brinst32 (CLGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDugt : BranchCondLGT<setugt, BRHNZr16, BRNZr32>;
+defm BRCONDule : BranchCondLGT<setule, BRHZr16, BRZr32>;
+
+multiclass BranchCondLGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
+                           SPUInstr orinst32, SPUInstr brinst32>
+{
+  def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+                  (brinst16 (orinst16 (CLGTHIr16 R16C:$rA, i16ImmSExt10:$val),
+                                      (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
+                            bb:$dest)>;
+
+  def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+               (brinst16 (orinst16 (CLGTHr16 R16C:$rA, R16:$rB),
+                                   (CEQHr16 R16C:$rA, R16:$rB)),
+                         bb:$dest)>;
+
+  def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+                   (brinst32 (orinst32 (CLGTIr32 R32C:$rA, i32ImmSExt10:$val),
+                                       (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
+                             bb:$dest)>;
+
+  def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+                (brinst32 (orinst32 (CLGTr32 R32C:$rA, R32C:$rB),
+                                    (CEQr32 R32C:$rA, R32C:$rB)),
+                          bb:$dest)>;
+}
+
+defm BRCONDuge : BranchCondLGTEQ<setuge, ORr16, BRHNZr16, ORr32, BRNZr32>;
+defm BRCONDult : BranchCondLGTEQ<setult, ORr16, BRHZr16, ORr32, BRZr32>;
+
+multiclass BranchCondGT<PatFrag cond, SPUInstr brinst16, SPUInstr brinst32>
+{
+  def r16imm : Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+                   (brinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val), bb:$dest)>;
+
+  def r16 : Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+                (brinst16 (CGTHr16 R16C:$rA, R16:$rB), bb:$dest)>;
+
+  def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+                   (brinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val), bb:$dest)>;
+
+  def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+                (brinst32 (CGTr32 R32C:$rA, R32C:$rB), bb:$dest)>;
+}
+
+defm BRCONDgt : BranchCondGT<setgt, BRHNZr16, BRNZr32>;
+defm BRCONDle : BranchCondGT<setle, BRHZr16, BRZr32>;
+
+multiclass BranchCondGTEQ<PatFrag cond, SPUInstr orinst16, SPUInstr brinst16,
+                          SPUInstr orinst32, SPUInstr brinst32>
+{
+  def r16imm: Pat<(brcond (i16 (cond R16C:$rA, i16ImmSExt10:$val)), bb:$dest),
+                  (brinst16 (orinst16 (CGTHIr16 R16C:$rA, i16ImmSExt10:$val),
+                                      (CEQHIr16 R16C:$rA, i16ImmSExt10:$val)),
+                            bb:$dest)>;
+
+  def r16: Pat<(brcond (i16 (cond R16C:$rA, R16C:$rB)), bb:$dest),
+               (brinst16 (orinst16 (CGTHr16 R16C:$rA, R16:$rB),
+                                   (CEQHr16 R16C:$rA, R16:$rB)),
+                         bb:$dest)>;
+
+  def r32imm : Pat<(brcond (i32 (cond R32C:$rA, i32ImmSExt10:$val)), bb:$dest),
+                   (brinst32 (orinst32 (CGTIr32 R32C:$rA, i32ImmSExt10:$val),
+                                       (CEQIr32 R32C:$rA, i32ImmSExt10:$val)),
+                             bb:$dest)>;
+
+  def r32 : Pat<(brcond (i32 (cond R32C:$rA, R32C:$rB)), bb:$dest),
+                (brinst32 (orinst32 (CGTr32 R32C:$rA, R32C:$rB),
+                                    (CEQr32 R32C:$rA, R32C:$rB)),
+                          bb:$dest)>;
+}
+
+defm BRCONDge : BranchCondGTEQ<setge, ORr16, BRHNZr16, ORr32, BRNZr32>;
+defm BRCONDlt : BranchCondGTEQ<setlt, ORr16, BRHZr16, ORr32, BRZr32>;
+
+let isTerminator = 1, isBarrier = 1 in {
+  let isReturn = 1 in {
+    def RET:
+        RETForm<"bi\t$$lr", [(retflag)]>;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Single precision floating point instructions
+//===----------------------------------------------------------------------===//
+
+class FAInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b01011000100, OOL, IOL, "fa\t$rT, $rA, $rB",
+           SPrecFP, pattern>;
+
+class FAVecInst<ValueType vectype>:
+    FAInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+             [(set (vectype VECREG:$rT),
+                   (fadd (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass SFPAdd
+{
+  def v4f32: FAVecInst<v4f32>;
+  def f32:   FAInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+                    [(set R32FP:$rT, (fadd R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FA : SFPAdd;
+
+class FSInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b01011000100, OOL, IOL, "fs\t$rT, $rA, $rB",
+           SPrecFP, pattern>;
+
+class FSVecInst<ValueType vectype>:
+    FSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+           [(set (vectype VECREG:$rT),
+                 (fsub (vectype VECREG:$rA), (vectype VECREG:$rB)))]>;
+
+multiclass SFPSub
+{
+  def v4f32: FSVecInst<v4f32>;
+  def f32:   FSInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+                    [(set R32FP:$rT, (fsub R32FP:$rA, R32FP:$rB))]>;
+}
+
+defm FS : SFPSub;
+
+class FMInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b01100011010, OOL, IOL,
+      "fm\t$rT, $rA, $rB", SPrecFP,
+      pattern>;
+
+class FMVecInst<ValueType type>:
+    FMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+           [(set (type VECREG:$rT),
+                 (fmul (type VECREG:$rA), (type VECREG:$rB)))]>;
+
+multiclass SFPMul
+{
+  def v4f32: FMVecInst<v4f32>;
+  def f32:   FMInst<(outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+                     [(set R32FP:$rT, (fmul R32FP:$rA, R32FP:$rB))]>; 
+}
+
+defm FM : SFPMul;
+
+// Floating point multiply and add
+// e.g. d = c + (a * b)
+def FMAv4f32:
+    RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set (v4f32 VECREG:$rT),
+            (fadd (v4f32 VECREG:$rC),
+                  (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB))))]>;
+
+def FMAf32:
+    RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+      "fma\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set R32FP:$rT, (fadd R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+// FP multiply and subtract
+// Subtracts value in rC from product
+// res = a * b - c
+def FMSv4f32 :
+    RRRForm<0b0111, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set (v4f32 VECREG:$rT),
+            (fsub (fmul (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+                  (v4f32 VECREG:$rC)))]>;
+
+def FMSf32 :
+    RRRForm<0b0111, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+      "fms\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set R32FP:$rT,
+            (fsub (fmul R32FP:$rA, R32FP:$rB), R32FP:$rC))]>;
+
+// Floating Negative Mulitply and Subtract
+// Subtracts product from value in rC
+// res = fneg(fms a b c)
+//     = - (a * b - c)
+//     = c - a * b
+// NOTE: subtraction order
+// fsub a b = a - b
+// fs a b = b - a?
+def FNMSf32 :
+    RRRForm<0b1101, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB, R32FP:$rC),
+      "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set R32FP:$rT, (fsub R32FP:$rC, (fmul R32FP:$rA, R32FP:$rB)))]>;
+
+def FNMSv4f32 :
+    RRRForm<0b1101, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "fnms\t$rT, $rA, $rB, $rC", SPrecFP,
+      [(set (v4f32 VECREG:$rT),
+            (fsub (v4f32 VECREG:$rC),
+                  (fmul (v4f32 VECREG:$rA),
+                        (v4f32 VECREG:$rB))))]>;
+
+
+
+
+// Floating point reciprocal estimate
+
+class FRESTInst<dag OOL, dag IOL>:
+  RRForm_1<0b00110111000, OOL, IOL,
+           "frest\t$rT, $rA", SPrecFP,
+           [/* no pattern */]>;
+
+def FRESTv4f32 :
+    FRESTInst<(outs VECREG:$rT), (ins VECREG:$rA)>;
+
+def FRESTf32 :
+    FRESTInst<(outs R32FP:$rT), (ins R32FP:$rA)>;
+
+// Floating point interpolate (used in conjunction with reciprocal estimate)
+def FIv4f32 :
+    RRForm<0b00101011110, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "fi\t$rT, $rA, $rB", SPrecFP,
+      [/* no pattern */]>;
+
+def FIf32 :
+    RRForm<0b00101011110, (outs R32FP:$rT), (ins R32FP:$rA, R32FP:$rB),
+      "fi\t$rT, $rA, $rB", SPrecFP,
+      [/* no pattern */]>;
+
+//--------------------------------------------------------------------------
+// Basic single precision floating point comparisons:
+//
+// Note: There is no support on SPU for single precision NaN. Consequently,
+// ordered and unordered comparisons are the same.
+//--------------------------------------------------------------------------
+
+def FCEQf32 :
+    RRForm<0b01000011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+      "fceq\t$rT, $rA, $rB", SPrecFP,
+      [(set R32C:$rT, (setueq R32FP:$rA, R32FP:$rB))]>;
+
+def : Pat<(setoeq R32FP:$rA, R32FP:$rB),
+          (FCEQf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCMEQf32 :
+    RRForm<0b01010011110, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+      "fcmeq\t$rT, $rA, $rB", SPrecFP,
+      [(set R32C:$rT, (setueq (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+
+def : Pat<(setoeq (fabs R32FP:$rA), (fabs R32FP:$rB)),
+          (FCMEQf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCGTf32 :
+    RRForm<0b01000011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+      "fcgt\t$rT, $rA, $rB", SPrecFP,
+      [(set R32C:$rT, (setugt R32FP:$rA, R32FP:$rB))]>;
+
+def : Pat<(setogt R32FP:$rA, R32FP:$rB),
+          (FCGTf32 R32FP:$rA, R32FP:$rB)>;
+
+def FCMGTf32 :
+    RRForm<0b01010011010, (outs R32C:$rT), (ins R32FP:$rA, R32FP:$rB),
+      "fcmgt\t$rT, $rA, $rB", SPrecFP,
+      [(set R32C:$rT, (setugt (fabs R32FP:$rA), (fabs R32FP:$rB)))]>;
+
+def : Pat<(setogt (fabs R32FP:$rA), (fabs R32FP:$rB)),
+          (FCMGTf32 R32FP:$rA, R32FP:$rB)>;
+
+//--------------------------------------------------------------------------
+// Single precision floating point comparisons and SETCC equivalents:
+//--------------------------------------------------------------------------
+
+def : SETCCNegCondReg<setune, R32FP, i32, XORIr32, FCEQf32>;
+def : SETCCNegCondReg<setone, R32FP, i32, XORIr32, FCEQf32>;
+
+def : SETCCBinOpReg<setuge, R32FP, ORr32, FCGTf32, FCEQf32>;
+def : SETCCBinOpReg<setoge, R32FP, ORr32, FCGTf32, FCEQf32>;
+
+def : SETCCBinOpReg<setult, R32FP, NORr32, FCGTf32, FCEQf32>;
+def : SETCCBinOpReg<setolt, R32FP, NORr32, FCGTf32, FCEQf32>;
+
+def : Pat<(setule R32FP:$rA, R32FP:$rB),
+          (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
+def : Pat<(setole R32FP:$rA, R32FP:$rB),
+          (XORIr32 (FCGTf32 R32FP:$rA, R32FP:$rB), 0xffffffff)>;
+
+// FP Status and Control Register Write
+// Why isn't rT a don't care in the ISA?
+// Should we create a special RRForm_3 for this guy and zero out the rT?
+def FSCRWf32 :
+    RRForm_1<0b01011101110, (outs R32FP:$rT), (ins R32FP:$rA),
+      "fscrwr\t$rA", SPrecFP,
+      [/* This instruction requires an intrinsic. Note: rT is unused. */]>;
+
+// FP Status and Control Register Read
+def FSCRRf32 :
+    RRForm_2<0b01011101110, (outs R32FP:$rT), (ins),
+      "fscrrd\t$rT", SPrecFP,
+      [/* This instruction requires an intrinsic */]>;
+
+// llvm instruction space
+// How do these map onto cell instructions?
+// fdiv rA rB
+//   frest rC rB        # c = 1/b (both lines)
+//   fi rC rB rC
+//   fm rD rA rC        # d = a * 1/b
+//   fnms rB rD rB rA # b = - (d * b - a) --should == 0 in a perfect world
+//   fma rB rB rC rD            # b = b * c + d
+//                              = -(d *b -a) * c + d
+//                              = a * c - c ( a *b *c - a)
+
+// fcopysign (???)
+
+// Library calls:
+// These llvm instructions will actually map to library calls.
+// All that's needed, then, is to check that the appropriate library is
+// imported and do a brsl to the proper function name.
+// frem # fmod(x, y): x - (x/y) * y
+// (Note: fmod(double, double), fmodf(float,float)
+// fsqrt?
+// fsin?
+// fcos?
+// Unimplemented SPU instruction space
+// floating reciprocal absolute square root estimate (frsqest)
+
+// The following are probably just intrinsics
+// status and control register write
+// status and control register read
+
+//--------------------------------------
+// Floating Point Conversions
+// Signed conversions:
+def CSiFv4f32:
+    CVTIntFPForm<0b0101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+      "csflt\t$rT, $rA, 0", SPrecFP,
+      [(set (v4f32 VECREG:$rT), (sint_to_fp (v4i32 VECREG:$rA)))]>;
+
+// Convert signed integer to floating point
+def CSiFf32 :
+    CVTIntFPForm<0b0101101110, (outs R32FP:$rT), (ins R32C:$rA),
+      "csflt\t$rT, $rA, 0", SPrecFP,
+      [(set R32FP:$rT, (sint_to_fp R32C:$rA))]>;
+
+// Convert unsigned into to float
+def CUiFv4f32 :
+    CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+      "cuflt\t$rT, $rA, 0", SPrecFP,
+      [(set (v4f32 VECREG:$rT), (uint_to_fp (v4i32 VECREG:$rA)))]>;
+
+def CUiFf32 :
+    CVTIntFPForm<0b1101101110, (outs R32FP:$rT), (ins R32C:$rA),
+      "cuflt\t$rT, $rA, 0", SPrecFP,
+      [(set R32FP:$rT, (uint_to_fp R32C:$rA))]>;
+
+// Convert float to unsigned int
+// Assume that scale = 0
+
+def CFUiv4f32 :
+    CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+      "cfltu\t$rT, $rA, 0", SPrecFP,
+      [(set (v4i32 VECREG:$rT), (fp_to_uint (v4f32 VECREG:$rA)))]>;
+
+def CFUif32 :
+    CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
+      "cfltu\t$rT, $rA, 0", SPrecFP,
+      [(set R32C:$rT, (fp_to_uint R32FP:$rA))]>;
+
+// Convert float to signed int
+// Assume that scale = 0
+
+def CFSiv4f32 :
+    CVTIntFPForm<0b1101101110, (outs VECREG:$rT), (ins VECREG:$rA),
+      "cflts\t$rT, $rA, 0", SPrecFP,
+      [(set (v4i32 VECREG:$rT), (fp_to_sint (v4f32 VECREG:$rA)))]>;
+
+def CFSif32 :
+    CVTIntFPForm<0b1101101110, (outs R32C:$rT), (ins R32FP:$rA),
+      "cflts\t$rT, $rA, 0", SPrecFP,
+      [(set R32C:$rT, (fp_to_sint R32FP:$rA))]>;
+
+//===----------------------------------------------------------------------==//
+// Single<->Double precision conversions
+//===----------------------------------------------------------------------==//
+
+// NOTE: We use "vec" name suffix here to avoid confusion (e.g. input is a
+// v4f32, output is v2f64--which goes in the name?)
+
+// Floating point extend single to double
+// NOTE: Not sure if passing in v4f32 to FESDvec is correct since it
+// operates on two double-word slots (i.e. 1st and 3rd fp numbers
+// are ignored).
+def FESDvec :
+    RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA),
+      "fesd\t$rT, $rA", SPrecFP,
+      [/*(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))*/]>;
+
+def FESDf32 :
+    RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA),
+      "fesd\t$rT, $rA", SPrecFP,
+      [(set R64FP:$rT, (fextend R32FP:$rA))]>;
+
+// Floating point round double to single
+//def FRDSvec :
+//    RRForm_1<0b10011101110, (outs VECREG:$rT), (ins VECREG:$rA),
+//      "frds\t$rT, $rA,", SPrecFP,
+//      [(set (v4f32 R32FP:$rT), (fround (v2f64 R64FP:$rA)))]>;
+
+def FRDSf64 :
+    RRForm_1<0b10011101110, (outs R32FP:$rT), (ins R64FP:$rA),
+      "frds\t$rT, $rA", SPrecFP,
+      [(set R32FP:$rT, (fround R64FP:$rA))]>;
+
+//ToDo include anyextend?
+
+//===----------------------------------------------------------------------==//
+// Double precision floating point instructions
+//===----------------------------------------------------------------------==//
+def FAf64 :
+    RRForm<0b00110011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+      "dfa\t$rT, $rA, $rB", DPrecFP,
+      [(set R64FP:$rT, (fadd R64FP:$rA, R64FP:$rB))]>;
+
+def FAv2f64 :
+    RRForm<0b00110011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "dfa\t$rT, $rA, $rB", DPrecFP,
+      [(set (v2f64 VECREG:$rT), (fadd (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FSf64 :
+    RRForm<0b10100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+      "dfs\t$rT, $rA, $rB", DPrecFP,
+      [(set R64FP:$rT, (fsub R64FP:$rA, R64FP:$rB))]>;
+
+def FSv2f64 :
+    RRForm<0b10100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "dfs\t$rT, $rA, $rB", DPrecFP,
+      [(set (v2f64 VECREG:$rT),
+            (fsub (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FMf64 :
+    RRForm<0b01100011010, (outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB),
+      "dfm\t$rT, $rA, $rB", DPrecFP,
+      [(set R64FP:$rT, (fmul R64FP:$rA, R64FP:$rB))]>;
+
+def FMv2f64:
+    RRForm<0b00100011010, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB),
+      "dfm\t$rT, $rA, $rB", DPrecFP,
+      [(set (v2f64 VECREG:$rT),
+            (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)))]>;
+
+def FMAf64:
+    RRForm<0b00111010110, (outs R64FP:$rT),
+                          (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+      "dfma\t$rT, $rA, $rB", DPrecFP,
+      [(set R64FP:$rT, (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB)))]>,
+    RegConstraint<"$rC = $rT">,
+    NoEncode<"$rC">;
+
+def FMAv2f64:
+    RRForm<0b00111010110, (outs VECREG:$rT),
+                          (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "dfma\t$rT, $rA, $rB", DPrecFP,
+      [(set (v2f64 VECREG:$rT),
+            (fadd (v2f64 VECREG:$rC),
+                  (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB))))]>,
+    RegConstraint<"$rC = $rT">,
+    NoEncode<"$rC">;
+
+def FMSf64 :
+    RRForm<0b10111010110, (outs R64FP:$rT),
+                          (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+      "dfms\t$rT, $rA, $rB", DPrecFP,
+      [(set R64FP:$rT, (fsub (fmul R64FP:$rA, R64FP:$rB), R64FP:$rC))]>,
+    RegConstraint<"$rC = $rT">,
+    NoEncode<"$rC">;
+
+def FMSv2f64 :
+    RRForm<0b10111010110, (outs VECREG:$rT),
+                          (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "dfms\t$rT, $rA, $rB", DPrecFP,
+      [(set (v2f64 VECREG:$rT),
+            (fsub (fmul (v2f64 VECREG:$rA), (v2f64 VECREG:$rB)),
+                  (v2f64 VECREG:$rC)))]>;
+
+// DFNMS: - (a * b - c)
+// - (a * b) + c => c - (a * b)
+
+class DFNMSInst<dag OOL, dag IOL, list<dag> pattern>:
+    RRForm<0b01111010110, OOL, IOL, "dfnms\t$rT, $rA, $rB",
+           DPrecFP, pattern>,
+    RegConstraint<"$rC = $rT">,
+    NoEncode<"$rC">;
+
+class DFNMSVecInst<list<dag> pattern>:
+    DFNMSInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+              pattern>;
+
+class DFNMSRegInst<list<dag> pattern>:
+    DFNMSInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+             pattern>;
+
+multiclass DFMultiplySubtract
+{
+  def v2f64 : DFNMSVecInst<[(set (v2f64 VECREG:$rT), 
+                                 (fsub (v2f64 VECREG:$rC),
+                                       (fmul (v2f64 VECREG:$rA),
+                                             (v2f64 VECREG:$rB))))]>;
+
+  def f64 : DFNMSRegInst<[(set R64FP:$rT,
+                               (fsub R64FP:$rC,
+                                     (fmul R64FP:$rA, R64FP:$rB)))]>;
+}
+
+defm DFNMS : DFMultiplySubtract;
+
+// - (a * b + c)
+// - (a * b) - c
+def FNMAf64 :
+    RRForm<0b11111010110, (outs R64FP:$rT),
+                          (ins R64FP:$rA, R64FP:$rB, R64FP:$rC),
+      "dfnma\t$rT, $rA, $rB", DPrecFP,
+      [(set R64FP:$rT, (fneg (fadd R64FP:$rC, (fmul R64FP:$rA, R64FP:$rB))))]>,
+    RegConstraint<"$rC = $rT">,
+    NoEncode<"$rC">;
+
+def FNMAv2f64 :
+    RRForm<0b11111010110, (outs VECREG:$rT),
+                          (ins VECREG:$rA, VECREG:$rB, VECREG:$rC),
+      "dfnma\t$rT, $rA, $rB", DPrecFP,
+      [(set (v2f64 VECREG:$rT),
+            (fneg (fadd (v2f64 VECREG:$rC),
+                        (fmul (v2f64 VECREG:$rA),
+                              (v2f64 VECREG:$rB)))))]>,
+    RegConstraint<"$rC = $rT">,
+    NoEncode<"$rC">;
+
+//===----------------------------------------------------------------------==//
+// Floating point negation and absolute value
+//===----------------------------------------------------------------------==//
+
+def : Pat<(fneg (v4f32 VECREG:$rA)),
+          (XORfnegvec (v4f32 VECREG:$rA),
+                      (v4f32 (ILHUv4i32 0x8000)))>;
+
+def : Pat<(fneg R32FP:$rA),
+          (XORfneg32 R32FP:$rA, (ILHUr32 0x8000))>;
+
+// Floating point absolute value
+// Note: f64 fabs is custom-selected.
+
+def : Pat<(fabs R32FP:$rA),
+          (ANDfabs32 R32FP:$rA, (IOHLr32 (ILHUr32 0x7fff), 0xffff))>;
+
+def : Pat<(fabs (v4f32 VECREG:$rA)),
+          (ANDfabsvec (v4f32 VECREG:$rA),
+                      (IOHLv4i32 (ILHUv4i32 0x7fff), 0xffff))>;
+
+//===----------------------------------------------------------------------===//
+// Hint for branch instructions:
+//===----------------------------------------------------------------------===//
+def HBRA :
+    HBI16Form<0b0001001,(ins hbrtarget:$brinst, brtarget:$btarg), "hbra\t$brinst, $btarg">;
+
+//===----------------------------------------------------------------------===//
+// Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong
+// in the odd pipeline)
+//===----------------------------------------------------------------------===//
+
+def ENOP : SPUInstr<(outs), (ins), "nop", ExecNOP> {
+  let Pattern = [];
+
+  let Inst{0-10} = 0b10000000010;
+  let Inst{11-17} = 0;
+  let Inst{18-24} = 0;
+  let Inst{25-31} = 0;
+}
+
+def LNOP : SPUInstr<(outs), (ins), "lnop", LoadNOP> {
+  let Pattern = [];
+
+  let Inst{0-10} = 0b10000000000;
+  let Inst{11-17} = 0;
+  let Inst{18-24} = 0;
+  let Inst{25-31} = 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Bit conversions (type conversions between vector/packed types)
+// NOTE: Promotions are handled using the XS* instructions.
+//===----------------------------------------------------------------------===//
+def : Pat<(v16i8 (bitconvert (v8i16 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v2i64 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VECREG:$src))), (v16i8 VECREG:$src)>;
+def : Pat<(v16i8 (bitconvert (v2f64 VECREG:$src))), (v16i8 VECREG:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v2i64 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VECREG:$src))), (v8i16 VECREG:$src)>;
+def : Pat<(v8i16 (bitconvert (v2f64 VECREG:$src))), (v8i16 VECREG:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v2i64 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VECREG:$src))), (v4i32 VECREG:$src)>;
+def : Pat<(v4i32 (bitconvert (v2f64 VECREG:$src))), (v4i32 VECREG:$src)>;
+
+def : Pat<(v2i64 (bitconvert (v16i8 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v8i16 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v4i32 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v4f32 VECREG:$src))), (v2i64 VECREG:$src)>;
+def : Pat<(v2i64 (bitconvert (v2f64 VECREG:$src))), (v2i64 VECREG:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v2i64 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VECREG:$src))), (v4f32 VECREG:$src)>;
+def : Pat<(v4f32 (bitconvert (v2f64 VECREG:$src))), (v4f32 VECREG:$src)>;
+
+def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>;
+
+def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))),
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v8i16 VECREG:$src))),
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v4i32 VECREG:$src))),
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v2i64 VECREG:$src))),
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v4f32 VECREG:$src))),
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+def : Pat<(i128 (bitconvert (v2f64 VECREG:$src))),
+          (COPY_TO_REGCLASS VECREG:$src, GPRC)>;
+
+def : Pat<(v16i8 (bitconvert (i128 GPRC:$src))),
+          (v16i8 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v8i16 (bitconvert (i128 GPRC:$src))),
+          (v8i16 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v4i32 (bitconvert (i128 GPRC:$src))),
+          (v4i32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v2i64 (bitconvert (i128 GPRC:$src))),
+          (v2i64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v4f32 (bitconvert (i128 GPRC:$src))),
+          (v4f32 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+def : Pat<(v2f64 (bitconvert (i128 GPRC:$src))),
+          (v2f64 (COPY_TO_REGCLASS GPRC:$src, VECREG))>;
+
+def : Pat<(i32 (bitconvert R32FP:$rA)),
+          (COPY_TO_REGCLASS R32FP:$rA, R32C)>;
+
+def : Pat<(f32 (bitconvert R32C:$rA)),
+          (COPY_TO_REGCLASS R32C:$rA, R32FP)>;
+
+def : Pat<(i64 (bitconvert R64FP:$rA)),
+          (COPY_TO_REGCLASS R64FP:$rA, R64C)>;
+
+def : Pat<(f64 (bitconvert R64C:$rA)),
+          (COPY_TO_REGCLASS R64C:$rA, R64FP)>;
+
+
+//===----------------------------------------------------------------------===//
+// Instruction patterns:
+//===----------------------------------------------------------------------===//
+
+// General 32-bit constants:
+def : Pat<(i32 imm:$imm),
+          (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Single precision float constants:
+def : Pat<(f32 fpimm:$imm),
+          (IOHLf32 (ILHUf32 (HI16_f32 fpimm:$imm)), (LO16_f32 fpimm:$imm))>;
+
+// General constant 32-bit vectors
+def : Pat<(v4i32 v4i32Imm:$imm),
+          (IOHLv4i32 (v4i32 (ILHUv4i32 (HI16_vec v4i32Imm:$imm))),
+                     (LO16_vec v4i32Imm:$imm))>;
+
+// 8-bit constants
+def : Pat<(i8 imm:$imm),
+          (ILHr8 imm:$imm)>;
+
+//===----------------------------------------------------------------------===//
+// Zero/Any/Sign extensions
+//===----------------------------------------------------------------------===//
+
+// sext 8->32: Sign extend bytes to words
+def : Pat<(sext_inreg R32C:$rSrc, i8),
+          (XSHWr32 (XSBHr32 R32C:$rSrc))>;
+
+def : Pat<(i32 (sext R8C:$rSrc)),
+          (XSHWr16 (XSBHr8 R8C:$rSrc))>;
+
+// sext 8->64: Sign extend bytes to double word
+def : Pat<(sext_inreg R64C:$rSrc, i8),
+          (XSWDr64_inreg (XSHWr64 (XSBHr64 R64C:$rSrc)))>;
+          
+def : Pat<(i64 (sext R8C:$rSrc)),
+          (XSWDr64 (XSHWr16 (XSBHr8 R8C:$rSrc)))>;
+
+// zext 8->16: Zero extend bytes to halfwords
+def : Pat<(i16 (zext R8C:$rSrc)),
+          (ANDHIi8i16 R8C:$rSrc, 0xff)>;
+
+// zext 8->32: Zero extend bytes to words
+def : Pat<(i32 (zext R8C:$rSrc)),
+          (ANDIi8i32 R8C:$rSrc, 0xff)>;
+
+// zext 8->64: Zero extend bytes to double words
+def : Pat<(i64 (zext R8C:$rSrc)),
+          (COPY_TO_REGCLASS (SELBv4i32 (ROTQMBYv4i32
+                                    (COPY_TO_REGCLASS 
+                                       (ANDIi8i32 R8C:$rSrc,0xff), VECREG),
+                                    0x4),
+                                  (ILv4i32 0x0),
+                                  (FSMBIv4i32 0x0f0f)), R64C)>;
+
+// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits
+def : Pat<(i16 (anyext R8C:$rSrc)),
+          (ORHIi8i16 R8C:$rSrc, 0)>;
+
+// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits
+def : Pat<(i32 (anyext R8C:$rSrc)),
+          (COPY_TO_REGCLASS R8C:$rSrc, R32C)>;
+
+// sext 16->64: Sign extend halfword to double word
+def : Pat<(sext_inreg R64C:$rSrc, i16),
+          (XSWDr64_inreg (XSHWr64 R64C:$rSrc))>;
+          
+def : Pat<(sext R16C:$rSrc),
+          (XSWDr64 (XSHWr16 R16C:$rSrc))>;
+
+// zext 16->32: Zero extend halfwords to words
+def : Pat<(i32 (zext R16C:$rSrc)),
+          (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff))>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xf))),
+          (ANDIi16i32 R16C:$rSrc, 0xf)>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xff))),
+          (ANDIi16i32 R16C:$rSrc, 0xff)>;
+
+def : Pat<(i32 (zext (and R16C:$rSrc, 0xfff))),
+          (ANDIi16i32 R16C:$rSrc, 0xfff)>;
+
+// anyext 16->32: Extend 16->32 bits, irrespective of sign
+def : Pat<(i32 (anyext R16C:$rSrc)),
+          (COPY_TO_REGCLASS R16C:$rSrc, R32C)>;
+
+//===----------------------------------------------------------------------===//
+// Truncates:
+// These truncates are for the SPU's supported types (i8, i16, i32). i64 and
+// above are custom lowered.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(i8 (trunc GPRC:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBgprc GPRC:$src, GPRC:$src,
+                       (IOHLv4i32 (ILHUv4i32 0x0f0f), 0x0f0f)), R8C)>;
+
+def : Pat<(i8 (trunc R64C:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBv2i64_m32
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (IOHLv4i32 (ILHUv4i32 0x0707), 0x0707)), R8C)>;
+
+def : Pat<(i8 (trunc R32C:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBv4i32_m32
+               (COPY_TO_REGCLASS R32C:$src, VECREG),
+               (COPY_TO_REGCLASS R32C:$src, VECREG),
+               (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
+
+def : Pat<(i8 (trunc R16C:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBv4i32_m32
+               (COPY_TO_REGCLASS R16C:$src, VECREG),
+               (COPY_TO_REGCLASS R16C:$src, VECREG),
+               (IOHLv4i32 (ILHUv4i32 0x0303), 0x0303)), R8C)>;
+
+def : Pat<(i16 (trunc GPRC:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBgprc GPRC:$src, GPRC:$src,
+                       (IOHLv4i32 (ILHUv4i32 0x0e0f), 0x0e0f)), R16C)>;
+
+def : Pat<(i16 (trunc R64C:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBv2i64_m32
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (IOHLv4i32 (ILHUv4i32 0x0607), 0x0607)), R16C)>;
+
+def : Pat<(i16 (trunc R32C:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBv4i32_m32
+               (COPY_TO_REGCLASS R32C:$src, VECREG),
+               (COPY_TO_REGCLASS R32C:$src, VECREG),
+               (IOHLv4i32 (ILHUv4i32 0x0203), 0x0203)), R16C)>;
+
+def : Pat<(i32 (trunc GPRC:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBgprc GPRC:$src, GPRC:$src,
+                       (IOHLv4i32 (ILHUv4i32 0x0c0d), 0x0e0f)), R32C)>;
+
+def : Pat<(i32 (trunc R64C:$src)),
+          (COPY_TO_REGCLASS
+            (SHUFBv2i64_m32
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (COPY_TO_REGCLASS R64C:$src, VECREG),
+              (IOHLv4i32 (ILHUv4i32 0x0405), 0x0607)), R32C)>;
+
+//===----------------------------------------------------------------------===//
+// Address generation: SPU, like PPC, has to split addresses into high and
+// low parts in order to load them into a register.
+//===----------------------------------------------------------------------===//
+
+def : Pat<(SPUaform tglobaladdr:$in, 0),  (ILAlsa tglobaladdr:$in)>;
+def : Pat<(SPUaform texternalsym:$in, 0), (ILAlsa texternalsym:$in)>;
+def : Pat<(SPUaform tjumptable:$in, 0),   (ILAlsa tjumptable:$in)>;
+def : Pat<(SPUaform tconstpool:$in, 0),   (ILAlsa  tconstpool:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tglobaladdr:$in, 0),
+                       (SPUlo tglobaladdr:$in, 0)),
+          (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
+
+def : Pat<(SPUindirect (SPUhi texternalsym:$in, 0),
+                       (SPUlo texternalsym:$in, 0)),
+          (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tjumptable:$in, 0),
+                       (SPUlo tjumptable:$in, 0)),
+          (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
+
+def : Pat<(SPUindirect (SPUhi tconstpool:$in, 0),
+                       (SPUlo tconstpool:$in, 0)),
+          (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
+
+def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)),
+          (IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
+
+def : Pat<(add (SPUhi texternalsym:$in, 0), (SPUlo texternalsym:$in, 0)),
+          (IOHLlo (ILHUhi texternalsym:$in), texternalsym:$in)>;
+
+def : Pat<(add (SPUhi tjumptable:$in, 0), (SPUlo tjumptable:$in, 0)),
+          (IOHLlo (ILHUhi tjumptable:$in), tjumptable:$in)>;
+
+def : Pat<(add (SPUhi tconstpool:$in, 0), (SPUlo tconstpool:$in, 0)),
+          (IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
+
+// Intrinsics:
+include "CellSDKIntrinsics.td"
+// Various math operator instruction sequences
+include "SPUMathInstr.td"
+// 64-bit "instructions"/support
+include "SPU64InstrInfo.td"
+// 128-bit "instructions"/support
+include "SPU128InstrInfo.td"
diff --git a/final/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/final/lib/Target/CellSPU/SPUMCAsmInfo.cpp
new file mode 100644
index 00000000000..99aaeb006a0
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUMCAsmInfo.cpp
@@ -0,0 +1,39 @@
+//===-- SPUMCAsmInfo.cpp - Cell SPU asm properties ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SPUMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUMCAsmInfo.h"
+using namespace llvm;
+
+SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
+  ZeroDirective = "\t.space\t";
+  Data64bitsDirective = "\t.quad\t";
+  AlignmentIsInBytes = false;
+      
+  PCSymbol = ".";
+  CommentString = "#";
+  GlobalPrefix = "";
+  PrivateGlobalPrefix = ".L";
+
+  // Has leb128
+  HasLEB128 = true;
+
+  SupportsDebugInformation = true;
+
+  // Exception handling is not supported on CellSPU (think about it: you only
+  // have 256K for code+data. Would you support exception handling?)
+  ExceptionsType = ExceptionHandling::None;
+
+  // SPU assembly requires ".section" before ".bss" 
+  UsesELFSectionDirectiveForBSS = true;  
+}
+
diff --git a/final/lib/Target/CellSPU/SPUMCAsmInfo.h b/final/lib/Target/CellSPU/SPUMCAsmInfo.h
new file mode 100644
index 00000000000..7f850d347f5
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUMCAsmInfo.h
@@ -0,0 +1,28 @@
+//===-- SPUMCAsmInfo.h - Cell SPU asm properties ---------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SPUMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUTARGETASMINFO_H
+#define SPUTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  
+  struct SPULinuxMCAsmInfo : public MCAsmInfo {
+    explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT);
+  };
+} // namespace llvm
+
+#endif /* SPUTARGETASMINFO_H */
diff --git a/final/lib/Target/CellSPU/SPUMachineFunction.h b/final/lib/Target/CellSPU/SPUMachineFunction.h
new file mode 100644
index 00000000000..3ef3ccbcaae
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUMachineFunction.h
@@ -0,0 +1,49 @@
+//===-- SPUMachineFunctionInfo.h - Private data used for CellSPU --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the IBM Cell SPU specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_MACHINE_FUNCTION_INFO_H
+#define SPU_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// SPUFunctionInfo - Cell SPU target-specific information for each
+/// MachineFunction
+class SPUFunctionInfo : public MachineFunctionInfo {
+private:
+  /// UsesLR - Indicates whether LR is used in the current function.
+  ///
+  bool UsesLR;
+
+  // VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
+public:
+  SPUFunctionInfo(MachineFunction& MF) 
+  : UsesLR(false),
+    VarArgsFrameIndex(0)
+  {}
+
+  void setUsesLR(bool U) { UsesLR = U; }
+  bool usesLR()          { return UsesLR; }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+};
+
+} // end of namespace llvm
+
+
+#endif
+
diff --git a/final/lib/Target/CellSPU/SPUMathInstr.td b/final/lib/Target/CellSPU/SPUMathInstr.td
new file mode 100644
index 00000000000..ed7129e3329
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUMathInstr.td
@@ -0,0 +1,97 @@
+//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
+//
+//                     Cell SPU math operations
+//
+// This target description file contains instruction sequences for various
+// math operations, such as vector multiplies, i32 multiply, etc., for the
+// SPU's i32, i16 i8 and corresponding vector types.
+//
+// Any resemblance to libsimdmath or the Cell SDK simdmath library is
+// purely and completely coincidental.
+//===----------------------------------------------------------------------===//
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v16i8 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
+          (ORv4i32
+           (ANDv4i32
+            (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
+                       (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
+                                             (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
+                       (FSMBIv8i16 0x2222)),
+            (ILAv4i32 0x0000ffff)),
+           (SHLIv4i32
+            (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
+                                 (ROTMAIv4i32_i32 VECREG:$rB, 16)),
+                       (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
+                                             (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
+                       (FSMBIv8i16 0x2222)), 16))>;
+                        
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v8i16 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
+          (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
+                     (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
+                     (FSMBIv8i16 0xcccc))>;
+                 
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// v4i32, i32 multiply instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+def MPYv4i32:
+  Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
+      (Av4i32
+        (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
+                       (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
+        (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
+
+def MPYi32:
+  Pat<(mul R32C:$rA, R32C:$rB),
+      (Ar32
+        (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
+              (MPYHr32 R32C:$rB, R32C:$rA)),
+        (MPYUr32 R32C:$rA, R32C:$rB))>;
+
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+// f32, v4f32 divide instruction sequence:
+//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+// Reciprocal estimate and interpolation
+def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
+// Division estimate
+def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
+// Newton-Raphson iteration
+def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
+                               Interpf32.Fragment,
+                               DivEstf32.Fragment)>;
+// Epsilon addition
+def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
+
+def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
+          (SELBf32_cond NRaphf32.Fragment,
+                        Epsilonf32.Fragment,
+                        (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
+
+// Reciprocal estimate and interpolation
+def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
+// Division estimate
+def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
+// Newton-Raphson iteration
+def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
+                                              (v4f32 VECREG:$rB),
+                                              (v4f32 VECREG:$rA)),
+                                   Interpv4f32.Fragment,
+                                   DivEstv4f32.Fragment)>;
+// Epsilon addition
+def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
+
+def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
+          (SELBv4f32_cond NRaphv4f32.Fragment,
+                        Epsilonv4f32.Fragment,
+                        (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
+                                              Epsilonv4f32.Fragment,
+                                              (v4f32 VECREG:$rA)), -1))>;
diff --git a/final/lib/Target/CellSPU/SPUNodes.td b/final/lib/Target/CellSPU/SPUNodes.td
new file mode 100644
index 00000000000..a6e621f36b3
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUNodes.td
@@ -0,0 +1,159 @@
+//===- SPUNodes.td - Specialized SelectionDAG nodes used for CellSPU ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Type profiles and SelectionDAG nodes used by CellSPU
+//
+//===----------------------------------------------------------------------===//
+
+// Type profile for a call sequence
+def SDT_SPUCallSeq : SDTypeProfile<0, 1, [ SDTCisVT<0, i32> ]>;
+
+// SPU_GenControl: Type profile for generating control words for insertions
+def SPU_GenControl : SDTypeProfile<1, 1, []>;
+def SPUshufmask    : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq,
+                           [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_SPUCallSeq,
+                           [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+//===----------------------------------------------------------------------===//
+// Operand constraints:
+//===----------------------------------------------------------------------===//
+
+def SDT_SPUCall   : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SPUcall       : SDNode<"SPUISD::CALL", SDT_SPUCall,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                            SDNPVariadic]>;
+
+// Operand type constraints for vector shuffle/permute operations
+def SDT_SPUshuffle   : SDTypeProfile<1, 3, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+
+// Vector binary operator type constraints (needs a further constraint to
+// ensure that operand 0 is a vector...):
+
+def SPUVecBinop: SDTypeProfile<1, 2, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
+]>;
+
+// Trinary operators, e.g., addx, carry generate
+def SPUIntTrinaryOp : SDTypeProfile<1, 3, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<0>
+]>;
+
+// SELECT_MASK type constraints: There are several variations for the various
+// vector types (this avoids having to bit_convert all over the place.)
+def SPUselmask_type: SDTypeProfile<1, 1, [
+  SDTCisInt<1>
+]>;
+
+// SELB type constraints:
+def SPUselb_type: SDTypeProfile<1, 3, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<0, 3> ]>;
+
+// SPU Vector shift pseudo-instruction type constraints
+def SPUvecshift_type: SDTypeProfile<1, 2, [
+  SDTCisSameAs<0, 1>, SDTCisInt<2>]>;
+
+// "marker" type for i64 operators that need a shuffle mask
+// (i.e., uses cg or bg or another instruction that needs to
+// use shufb to get things in the right place.)
+// Op0: The result
+// Op1, 2: LHS, RHS
+// Op3: Carry-generate shuffle mask
+
+def SPUmarker_type : SDTypeProfile<1, 3, [
+  SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> ]>;
+
+//===----------------------------------------------------------------------===//
+// Synthetic/pseudo-instructions
+//===----------------------------------------------------------------------===//
+
+// SPU CNTB:
+def SPUcntb : SDNode<"SPUISD::CNTB", SDTIntUnaryOp>;
+
+// SPU vector shuffle node, matched by the SPUISD::SHUFB enum (see
+// SPUISelLowering.h):
+def SPUshuffle: SDNode<"SPUISD::SHUFB", SDT_SPUshuffle, []>;
+
+// Vector shifts (ISD::SHL,SRL,SRA are for _integers_ only):
+def SPUvec_shl: SDNode<"ISD::SHL", SPUvecshift_type, []>;
+def SPUvec_srl: SDNode<"ISD::SRL", SPUvecshift_type, []>;
+def SPUvec_sra: SDNode<"ISD::SRA", SPUvecshift_type, []>;
+
+def SPUvec_rotl: SDNode<"SPUISD::VEC_ROTL", SPUvecshift_type, []>;
+def SPUvec_rotr: SDNode<"SPUISD::VEC_ROTR", SPUvecshift_type, []>;
+
+// Vector rotate left, bits shifted out of the left are rotated in on the right
+def SPUrotbytes_left: SDNode<"SPUISD::ROTBYTES_LEFT",
+                             SPUvecshift_type, []>;
+
+// Vector rotate left by bytes, but the count is given in bits and the SPU
+// internally converts it to bytes (saves an instruction to mask off lower
+// three bits)
+def SPUrotbytes_left_bits : SDNode<"SPUISD::ROTBYTES_LEFT_BITS",
+                                   SPUvecshift_type>;
+
+// Shift entire quad left by bytes/bits. Zeros are shifted in on the right
+// SHL_BITS the same as SHL for i128, but ISD::SHL is not implemented for i128
+def SPUshlquad_l_bytes: SDNode<"SPUISD::SHL_BYTES", SPUvecshift_type, []>;
+def SPUshlquad_l_bits: SDNode<"SPUISD::SHL_BITS", SPUvecshift_type, []>;
+def SPUsrl_bytes: SDNode<"SPUISD::SRL_BYTES", SPUvecshift_type, []>;
+
+// SPU form select mask for bytes, immediate
+def SPUselmask: SDNode<"SPUISD::SELECT_MASK", SPUselmask_type, []>;
+
+// SPU select bits instruction
+def SPUselb: SDNode<"SPUISD::SELB", SPUselb_type, []>;
+
+def SDTprefslot2vec: SDTypeProfile<1, 1, []>;
+def SPUprefslot2vec: SDNode<"SPUISD::PREFSLOT2VEC", SDTprefslot2vec, []>;
+
+def SPU_vec_demote   : SDTypeProfile<1, 1, []>;
+def SPUvec2prefslot: SDNode<"SPUISD::VEC2PREFSLOT", SPU_vec_demote, []>;
+
+// Address high and low components, used for [r+r] type addressing
+def SPUhi : SDNode<"SPUISD::Hi", SDTIntBinOp, []>;
+def SPUlo : SDNode<"SPUISD::Lo", SDTIntBinOp, []>;
+
+// PC-relative address
+def SPUpcrel : SDNode<"SPUISD::PCRelAddr", SDTIntBinOp, []>;
+
+// A-Form local store addresses
+def SPUaform : SDNode<"SPUISD::AFormAddr", SDTIntBinOp, []>;
+
+// Indirect [D-Form "imm($reg)" and X-Form "$reg($reg)"] addresses
+def SPUindirect : SDNode<"SPUISD::IndirectAddr", SDTIntBinOp, []>;
+
+// i64 markers: supplies extra operands used to generate the i64 operator
+// instruction sequences
+def SPUadd64 : SDNode<"SPUISD::ADD64_MARKER", SPUmarker_type, []>;
+def SPUsub64 : SDNode<"SPUISD::SUB64_MARKER", SPUmarker_type, []>;
+def SPUmul64 : SDNode<"SPUISD::MUL64_MARKER", SPUmarker_type, []>;
+
+//===----------------------------------------------------------------------===//
+// Constraints: (taken from PPCInstrInfo.td)
+//===----------------------------------------------------------------------===//
+
+class RegConstraint<string C> {
+  string Constraints = C;
+}
+
+class NoEncode<string E> {
+  string DisableEncoding = E;
+}
+
+//===----------------------------------------------------------------------===//
+// Return (flag isn't quite what it means: the operations are flagged so that
+// instruction scheduling doesn't disassociate them.)
+//===----------------------------------------------------------------------===//
+
+def retflag     : SDNode<"SPUISD::RET_FLAG", SDTNone,
+                         [SDNPHasChain, SDNPOptInGlue]>;
diff --git a/final/lib/Target/CellSPU/SPUNopFiller.cpp b/final/lib/Target/CellSPU/SPUNopFiller.cpp
new file mode 100644
index 00000000000..e2bd2d7f410
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUNopFiller.cpp
@@ -0,0 +1,153 @@
+//===-- SPUNopFiller.cpp - Add nops/lnops to align the pipelines---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The final pass just before assembly printing. This pass is the last
+// checkpoint where nops and lnops are added to the instruction stream to 
+// satisfy the dual issue requirements. The actual dual issue scheduling is 
+// done (TODO: nowhere, currently)
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPUTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+  struct SPUNopFiller : public MachineFunctionPass {
+
+    TargetMachine &TM;
+    const TargetInstrInfo *TII;
+    const InstrItineraryData *IID;
+    bool isEvenPlace;  // the instruction slot (mem address) at hand is even/odd
+
+    static char ID;
+    SPUNopFiller(TargetMachine &tm) 
+      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()), 
+        IID(tm.getInstrItineraryData()) 
+    {
+      DEBUG( dbgs() << "********** SPU Nop filler **********\n" ; );
+    }
+
+    virtual const char *getPassName() const {
+      return "SPU nop/lnop Filler";
+    }
+
+    void runOnMachineBasicBlock(MachineBasicBlock &MBB);
+
+    bool runOnMachineFunction(MachineFunction &F) {
+      isEvenPlace = true; //all functions get an .align 3 directive at start 
+      for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+           FI != FE; ++FI)
+        runOnMachineBasicBlock(*FI);
+      return true; //never-ever do any more modifications, just print it!
+    }
+
+    typedef enum { none   = 0, // no more instructions in this function / BB
+                   pseudo = 1, // this does not get executed
+                   even   = 2, 
+                   odd    = 3 } SPUOpPlace;
+    SPUOpPlace getOpPlacement( MachineInstr &instr );
+
+  };
+  char SPUNopFiller::ID = 0;
+
+} 
+
+// Fill a BasicBlock to alignment. 
+// In the assebly we align the functions to 'even' adresses, but
+// basic blocks have an implicit alignmnet. We hereby define 
+// basic blocks to have the same, even, alignment.
+void SPUNopFiller::
+runOnMachineBasicBlock(MachineBasicBlock &MBB) 
+{
+  assert( isEvenPlace && "basic block start from odd address");
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+  {
+    SPUOpPlace this_optype, next_optype;
+    MachineBasicBlock::iterator J = I;
+    J++;
+
+    this_optype = getOpPlacement( *I );
+    next_optype = none;
+    while (J!=MBB.end()){
+      next_optype = getOpPlacement( *J );
+      ++J;
+      if (next_optype != pseudo ) 
+        break;
+    }
+
+    // padd: odd(wrong), even(wrong), ...
+    // to:   nop(corr), odd(corr), even(corr)...
+    if( isEvenPlace && this_optype == odd && next_optype == even ) {
+      DEBUG( dbgs() <<"Adding NOP before: "; );
+      DEBUG( I->dump(); );
+      BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::ENOP));
+      isEvenPlace=false;
+    }
+    
+    // padd: even(wrong), odd(wrong), ...
+    // to:   lnop(corr), even(corr), odd(corr)...
+    else if ( !isEvenPlace && this_optype == even && next_optype == odd){
+      DEBUG( dbgs() <<"Adding LNOP before: "; );
+      DEBUG( I->dump(); );
+      BuildMI(MBB, I, I->getDebugLoc(), TII->get(SPU::LNOP));
+      isEvenPlace=true;
+    }
+      
+    // now go to next mem slot
+    if( this_optype != pseudo )
+      isEvenPlace = !isEvenPlace;    
+
+  }
+
+  // padd basicblock end
+  if( !isEvenPlace ){
+    MachineBasicBlock::iterator J = MBB.end();
+    J--;
+    if (getOpPlacement( *J ) == odd) {
+      DEBUG( dbgs() <<"Padding basic block with NOP\n"; );
+      BuildMI(MBB, J, J->getDebugLoc(), TII->get(SPU::ENOP));
+    }  
+    else {
+      J++;
+      DEBUG( dbgs() <<"Padding basic block with LNOP\n"; );
+      BuildMI(MBB, J, DebugLoc(), TII->get(SPU::LNOP));
+    }
+    isEvenPlace=true;
+  }
+}
+
+FunctionPass *llvm::createSPUNopFillerPass(SPUTargetMachine &tm) {
+  return new SPUNopFiller(tm);
+}
+
+// Figure out if 'instr' is executed in the even or odd pipeline
+SPUNopFiller::SPUOpPlace 
+SPUNopFiller::getOpPlacement( MachineInstr &instr ) {
+  int sc = instr.getDesc().getSchedClass();
+  const InstrStage *stage = IID->beginStage(sc);
+  unsigned FUs = stage->getUnits();
+  SPUOpPlace retval;
+
+  switch( FUs ) {
+    case 0: retval = pseudo; break;
+    case 1: retval = odd;    break;
+    case 2: retval = even;   break;
+    default: retval= pseudo; 
+             assert( false && "got unknown FuncUnit\n");
+             break;
+  };
+  return retval;
+}
diff --git a/final/lib/Target/CellSPU/SPUOperands.td b/final/lib/Target/CellSPU/SPUOperands.td
new file mode 100644
index 00000000000..96cde51709e
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUOperands.td
@@ -0,0 +1,664 @@
+//===- SPUOperands.td - Cell SPU Instruction Operands ------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+// Cell SPU Instruction Operands:
+//===----------------------------------------------------------------------===//
+
+// TO_IMM32 - Convert an i8/i16 to i32.
+def TO_IMM32 : SDNodeXForm<imm, [{
+  return getI32Imm(N->getZExtValue());
+}]>;
+
+// TO_IMM16 - Convert an i8/i32 to i16.
+def TO_IMM16 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getZExtValue(), MVT::i16);
+}]>;
+
+
+def LO16 : SDNodeXForm<imm, [{
+  unsigned val = N->getZExtValue();
+  // Transformation function: get the low 16 bits.
+  return getI32Imm(val & 0xffff);
+}]>;
+
+def LO16_vec : SDNodeXForm<scalar_to_vector, [{
+  SDValue OpVal(0, 0);
+
+  // Transformation function: get the low 16 bit immediate from a build_vector
+  // node.
+  assert(N->getOpcode() == ISD::BUILD_VECTOR
+         && "LO16_vec got something other than a BUILD_VECTOR");
+
+  // Get first constant operand...
+  for (unsigned i = 0, e = N->getNumOperands();
+       OpVal.getNode() == 0 && i != e; ++i) {
+    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+    if (OpVal.getNode() == 0)
+      OpVal = N->getOperand(i);
+  }
+  
+  assert(OpVal.getNode() != 0 && "LO16_vec did not locate a <defined> node");
+  ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
+  return getI32Imm((unsigned)CN->getZExtValue() & 0xffff);
+}]>;
+
+// Transform an immediate, returning the high 16 bits shifted down:
+def HI16 : SDNodeXForm<imm, [{
+  return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+// Transformation function: shift the high 16 bit immediate from a build_vector
+// node into the low 16 bits, and return a 16-bit constant.
+def HI16_vec : SDNodeXForm<scalar_to_vector, [{
+  SDValue OpVal(0, 0);
+
+  assert(N->getOpcode() == ISD::BUILD_VECTOR
+         && "HI16_vec got something other than a BUILD_VECTOR");
+  
+  // Get first constant operand...
+  for (unsigned i = 0, e = N->getNumOperands();
+       OpVal.getNode() == 0 && i != e; ++i) {
+    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+    if (OpVal.getNode() == 0)
+      OpVal = N->getOperand(i);
+  }
+  
+  assert(OpVal.getNode() != 0 && "HI16_vec did not locate a <defined> node");
+  ConstantSDNode *CN = cast<ConstantSDNode>(OpVal);
+  return getI32Imm((unsigned)CN->getZExtValue() >> 16);
+}]>;
+
+// simm7 predicate - True if the immediate fits in an 7-bit signed
+// field.
+def simm7: PatLeaf<(imm), [{
+  int sextVal = int(N->getSExtValue());
+  return (sextVal >= -64 && sextVal <= 63);
+}]>;
+
+// uimm7 predicate - True if the immediate fits in an 7-bit unsigned
+// field.
+def uimm7: PatLeaf<(imm), [{
+  return (N->getZExtValue() <= 0x7f);
+}]>;
+
+// immSExt8 predicate - True if the immediate fits in an 8-bit sign extended
+// field.
+def immSExt8  : PatLeaf<(imm), [{
+  int Value = int(N->getSExtValue());
+  return (Value >= -(1 << 8) && Value <= (1 << 8) - 1);
+}]>;
+
+// immU8: immediate, unsigned 8-bit quantity
+def immU8 : PatLeaf<(imm), [{
+  return (N->getZExtValue() <= 0xff);
+}]>;
+
+// i32ImmSExt10 predicate - True if the i32 immediate fits in a 10-bit sign
+// extended field.  Used by RI10Form instructions like 'ldq'.
+def i32ImmSExt10  : PatLeaf<(imm), [{
+  return isI32IntS10Immediate(N);
+}]>;
+
+// i32ImmUns10 predicate - True if the i32 immediate fits in a 10-bit unsigned
+// field.  Used by RI10Form instructions like 'ldq'.
+def i32ImmUns10  : PatLeaf<(imm), [{
+  return isI32IntU10Immediate(N);
+}]>;
+
+// i16ImmSExt10 predicate - True if the i16 immediate fits in a 10-bit sign
+// extended field.  Used by RI10Form instructions like 'ldq'.
+def i16ImmSExt10  : PatLeaf<(imm), [{
+  return isI16IntS10Immediate(N);
+}]>;
+
+// i16ImmUns10 predicate - True if the i16 immediate fits into a 10-bit unsigned
+// value. Used by RI10Form instructions.
+def i16ImmUns10 : PatLeaf<(imm), [{
+  return isI16IntU10Immediate(N);
+}]>;
+
+def immSExt16  : PatLeaf<(imm), [{
+  // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.
+  short Ignored;
+  return isIntS16Immediate(N, Ignored);
+}]>;
+
+def immZExt16  : PatLeaf<(imm), [{
+  // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+  // field.
+  return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+def immU16 : PatLeaf<(imm), [{
+  // immU16 predicate- True if the immediate fits into a 16-bit unsigned field.
+  return (uint64_t)N->getZExtValue() == (N->getZExtValue() & 0xffff);
+}]>;
+
+def imm18  : PatLeaf<(imm), [{
+  // imm18 predicate: True if the immediate fits into an 18-bit unsigned field.
+  int Value = (int) N->getZExtValue();
+  return isUInt<18>(Value); 
+}]>;
+
+def lo16 : PatLeaf<(imm), [{
+  // lo16 predicate - returns true if the immediate has all zeros in the
+  // low order bits and is a 32-bit constant:
+  if (N->getValueType(0) == MVT::i32) {
+    uint32_t val = N->getZExtValue();
+    return ((val & 0x0000ffff) == val);
+  }
+
+  return false;
+}], LO16>;
+
+def hi16 : PatLeaf<(imm), [{
+  // hi16 predicate - returns true if the immediate has all zeros in the
+  // low order bits and is a 32-bit constant:
+  if (N->getValueType(0) == MVT::i32) {
+    uint32_t val = uint32_t(N->getZExtValue());
+    return ((val & 0xffff0000) == val);
+  } else if (N->getValueType(0) == MVT::i64) {
+    uint64_t val = N->getZExtValue();
+    return ((val & 0xffff0000ULL) == val);
+  }
+
+  return false;
+}], HI16>;
+
+def bitshift : PatLeaf<(imm), [{
+  // bitshift predicate - returns true if 0 < imm <= 7 for SHLQBII
+  // (shift left quadword by bits immediate)
+  int64_t Val = N->getZExtValue();
+  return (Val > 0 && Val <= 7);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Floating point operands:
+//===----------------------------------------------------------------------===//
+
+// Transform a float, returning the high 16 bits shifted down, as if
+// the float was really an unsigned integer:
+def HI16_f32 : SDNodeXForm<fpimm, [{
+  float fval = N->getValueAPF().convertToFloat();
+  return getI32Imm(FloatToBits(fval) >> 16);
+}]>;
+
+// Transformation function on floats: get the low 16 bits as if the float was
+// an unsigned integer.
+def LO16_f32 : SDNodeXForm<fpimm, [{
+  float fval = N->getValueAPF().convertToFloat();
+  return getI32Imm(FloatToBits(fval) & 0xffff);
+}]>;
+
+def FPimm_sext16 : SDNodeXForm<fpimm, [{
+  float fval = N->getValueAPF().convertToFloat();
+  return getI32Imm((int) ((FloatToBits(fval) << 16) >> 16));
+}]>;
+
+def FPimm_u18 : SDNodeXForm<fpimm, [{
+  float fval = N->getValueAPF().convertToFloat();
+  return getI32Imm(FloatToBits(fval) & ((1 << 18) - 1));
+}]>;
+
+def fpimmSExt16 : PatLeaf<(fpimm), [{
+  short Ignored;
+  return isFPS16Immediate(N, Ignored);  
+}], FPimm_sext16>;
+
+// Does the SFP constant only have upp 16 bits set?
+def hi16_f32 : PatLeaf<(fpimm), [{
+  if (N->getValueType(0) == MVT::f32) {
+    uint32_t val = FloatToBits(N->getValueAPF().convertToFloat());
+    return ((val & 0xffff0000) == val);
+  }
+
+  return false;
+}], HI16_f32>;
+
+// Does the SFP constant fit into 18 bits?
+def fpimm18  : PatLeaf<(fpimm), [{
+  if (N->getValueType(0) == MVT::f32) {
+    uint32_t Value = FloatToBits(N->getValueAPF().convertToFloat());
+    return isUInt<18>(Value);
+  }
+
+  return false;
+}], FPimm_u18>;
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands (TODO):
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// build_vector operands:
+//===----------------------------------------------------------------------===//
+
+// v16i8SExt8Imm_xform function: convert build_vector to 8-bit sign extended
+// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
+// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
+def v16i8SExt8Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
+}]>;
+
+// v16i8SExt8Imm: Predicate test for 8-bit sign extended immediate constant
+// load, works in conjunction with its transform function. N.B.: This relies the
+// incoming constant being a 16-bit quantity, where the upper and lower bytes
+// are EXACTLY the same (e.g., 0x2a2a)
+def v16i8SExt8Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
+}], v16i8SExt8Imm_xform>;
+
+// v16i8U8Imm_xform function: convert build_vector to unsigned 8-bit
+// immediate constant load for v16i8 vectors. N.B.: The incoming constant has
+// to be a 16-bit quantity with the upper and lower bytes equal (e.g., 0x2a2a).
+def v16i8U8Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8);
+}]>;
+
+// v16i8U8Imm: Predicate test for unsigned 8-bit immediate constant
+// load, works in conjunction with its transform function. N.B.: This relies the
+// incoming constant being a 16-bit quantity, where the upper and lower bytes
+// are EXACTLY the same (e.g., 0x2a2a)
+def v16i8U8Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i8).getNode() != 0;
+}], v16i8U8Imm_xform>;
+
+// v8i16SExt8Imm_xform function: convert build_vector to 8-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16SExt8Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt8Imm: Predicate test for 8-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt8Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i8imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16SExt8Imm_xform>;
+
+// v8i16SExt10Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16SExt10Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt10Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt10Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16SExt10Imm_xform>;
+
+// v8i16Uns10Imm_xform function: convert build_vector to 16-bit unsigned
+// immediate constant load for v8i16 vectors.
+def v8i16Uns10Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16Uns10Imm: Predicate test for 16-bit unsigned immediate constant
+// load, works in conjunction with its transform function.
+def v8i16Uns10Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16Uns10Imm_xform>;
+
+// v8i16SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v8i16 vectors.
+def v8i16Uns16Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16);
+}]>;
+
+// v8i16SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v8i16SExt16Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i16).getNode() != 0;
+}], v8i16Uns16Imm_xform>;
+
+// v4i32SExt10Imm_xform function: convert build_vector to 10-bit sign extended
+// immediate constant load for v4i32 vectors.
+def v4i32SExt10Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32SExt10Imm: Predicate test for 10-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v4i32SExt10Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32SExt10Imm_xform>;
+
+// v4i32Uns10Imm_xform function: convert build_vector to 10-bit unsigned
+// immediate constant load for v4i32 vectors.
+def v4i32Uns10Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32Uns10Imm: Predicate test for 10-bit unsigned immediate constant
+// load, works in conjunction with its transform function.
+def v4i32Uns10Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32Uns10Imm_xform>;
+
+// v4i32SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v4i32 vectors.
+def v4i32SExt16Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v4i32SExt16Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32SExt16Imm_xform>;
+
+// v4i32Uns18Imm_xform function: convert build_vector to 18-bit unsigned
+// immediate constant load for v4i32 vectors.
+def v4i32Uns18Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32);
+}]>;
+
+// v4i32Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
+// works in conjunction with its transform function.
+def v4i32Uns18Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_u18imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], v4i32Uns18Imm_xform>;
+
+// ILHUvec_get_imm xform function: convert build_vector to ILHUvec imm constant
+// load.
+def ILHUvec_get_imm: SDNodeXForm<build_vector, [{
+  return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32);
+}]>;
+
+/// immILHUvec: Predicate test for a ILHU constant vector.
+def immILHUvec: PatLeaf<(build_vector), [{
+  return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i32).getNode() != 0;
+}], ILHUvec_get_imm>;
+
+// Catch-all for any other i32 vector constants
+def v4i32_get_imm: SDNodeXForm<build_vector, [{
+  return SPU::get_v4i32_imm(N, *CurDAG);
+}]>;
+
+def v4i32Imm: PatLeaf<(build_vector), [{
+  return SPU::get_v4i32_imm(N, *CurDAG).getNode() != 0;
+}], v4i32_get_imm>;
+
+// v2i64SExt10Imm_xform function: convert build_vector to 10-bit sign extended
+// immediate constant load for v2i64 vectors.
+def v2i64SExt10Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64SExt10Imm: Predicate test for 10-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v2i64SExt10Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i10imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64SExt10Imm_xform>;
+
+// v2i64SExt16Imm_xform function: convert build_vector to 16-bit sign extended
+// immediate constant load for v2i64 vectors.
+def v2i64SExt16Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64SExt16Imm: Predicate test for 16-bit sign extended immediate constant
+// load, works in conjunction with its transform function.
+def v2i64SExt16Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_i16imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64SExt16Imm_xform>;
+
+// v2i64Uns18Imm_xform function: convert build_vector to 18-bit unsigned
+// immediate constant load for v2i64 vectors.
+def v2i64Uns18Imm_xform: SDNodeXForm<build_vector, [{
+  return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64);
+}]>;
+
+// v2i64Uns18Imm: Predicate test for 18-bit unsigned immediate constant load,
+// works in conjunction with its transform function.
+def v2i64Uns18Imm: PatLeaf<(build_vector), [{
+  return SPU::get_vec_u18imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], v2i64Uns18Imm_xform>;
+
+/// immILHUvec: Predicate test for a ILHU constant vector.
+def immILHUvec_i64: PatLeaf<(build_vector), [{
+  return SPU::get_ILHUvec_imm(N, *CurDAG, MVT::i64).getNode() != 0;
+}], ILHUvec_get_imm>;
+
+// Catch-all for any other i32 vector constants
+def v2i64_get_imm: SDNodeXForm<build_vector, [{
+  return SPU::get_v2i64_imm(N, *CurDAG);
+}]>;
+
+def v2i64Imm: PatLeaf<(build_vector), [{
+  return SPU::get_v2i64_imm(N, *CurDAG).getNode() != 0;
+}], v2i64_get_imm>;
+
+//===----------------------------------------------------------------------===//
+// Operand Definitions.
+
+def s7imm: Operand<i8> {
+  let PrintMethod = "printS7ImmOperand";
+}
+
+def s7imm_i8: Operand<i8> {
+  let PrintMethod = "printS7ImmOperand";
+}
+
+def u7imm: Operand<i16> {
+  let PrintMethod = "printU7ImmOperand";
+}
+
+def u7imm_i8: Operand<i8> {
+  let PrintMethod = "printU7ImmOperand";
+}
+
+def u7imm_i32: Operand<i32> {
+  let PrintMethod = "printU7ImmOperand";
+}
+
+// Halfword, signed 10-bit constant
+def s10imm : Operand<i16> {
+  let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i8: Operand<i8> {
+  let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i32: Operand<i32> {
+  let PrintMethod = "printS10ImmOperand";
+}
+
+def s10imm_i64: Operand<i64> {
+  let PrintMethod = "printS10ImmOperand";
+}
+
+// Unsigned 10-bit integers:
+def u10imm: Operand<i16> {
+  let PrintMethod = "printU10ImmOperand";
+}
+
+def u10imm_i8: Operand<i8> {
+  let PrintMethod = "printU10ImmOperand";
+}
+
+def u10imm_i32: Operand<i32> {
+  let PrintMethod = "printU10ImmOperand";
+}
+
+def s16imm  : Operand<i16> {
+  let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i8: Operand<i8> {
+  let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i32: Operand<i32> {
+  let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_i64: Operand<i64> {
+  let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_f32: Operand<f32> {
+  let PrintMethod = "printS16ImmOperand";
+}
+
+def s16imm_f64: Operand<f64> {
+  let PrintMethod = "printS16ImmOperand";
+}
+
+def u16imm_i64 : Operand<i64> {
+  let PrintMethod = "printU16ImmOperand";
+}
+
+def u16imm_i32 : Operand<i32> {
+  let PrintMethod = "printU16ImmOperand";
+}
+
+def u16imm : Operand<i16> {
+  let PrintMethod = "printU16ImmOperand";
+}
+
+def f16imm : Operand<f32> {
+  let PrintMethod = "printU16ImmOperand";
+}
+
+def s18imm  : Operand<i32> {
+  let PrintMethod = "printS18ImmOperand";
+}
+
+def u18imm : Operand<i32> {
+  let PrintMethod = "printU18ImmOperand";
+}
+
+def u18imm_i64 : Operand<i64> {
+  let PrintMethod = "printU18ImmOperand";
+}
+
+def f18imm : Operand<f32> {
+  let PrintMethod = "printU18ImmOperand";
+}
+
+def f18imm_f64 : Operand<f64> {
+  let PrintMethod = "printU18ImmOperand";
+}
+
+// Negated 7-bit halfword rotate immediate operands
+def rothNeg7imm : Operand<i32> {
+  let PrintMethod = "printROTHNeg7Imm";
+}
+
+def rothNeg7imm_i16 : Operand<i16> {
+  let PrintMethod = "printROTHNeg7Imm";
+}
+
+// Negated 7-bit word rotate immediate operands
+def rotNeg7imm : Operand<i32> {
+  let PrintMethod = "printROTNeg7Imm";
+}
+
+def rotNeg7imm_i16 : Operand<i16> {
+  let PrintMethod = "printROTNeg7Imm";
+}
+
+def rotNeg7imm_i8 : Operand<i8> {
+  let PrintMethod = "printROTNeg7Imm";
+}
+
+def target : Operand<OtherVT> {
+  let PrintMethod = "printBranchOperand";
+}
+
+// Absolute address call target
+def calltarget : Operand<iPTR> {
+  let PrintMethod = "printCallOperand";
+  let MIOperandInfo = (ops u18imm:$calldest);
+}
+
+// PC relative call target
+def relcalltarget : Operand<iPTR> {
+  let PrintMethod = "printPCRelativeOperand";
+  let MIOperandInfo = (ops s16imm:$calldest);
+}
+
+// Branch targets:
+def brtarget : Operand<OtherVT> {
+  let PrintMethod = "printPCRelativeOperand";
+}
+
+// Hint for branch target
+def hbrtarget : Operand<OtherVT> {
+  let PrintMethod = "printHBROperand";
+}
+
+// Indirect call target
+def indcalltarget : Operand<iPTR> {
+  let PrintMethod = "printCallOperand";
+  let MIOperandInfo = (ops ptr_rc:$calldest);
+}
+
+def symbolHi: Operand<i32> {
+  let PrintMethod = "printSymbolHi";
+}
+
+def symbolLo: Operand<i32> {
+  let PrintMethod = "printSymbolLo";
+}
+
+def symbolLSA: Operand<i32> {
+  let PrintMethod = "printSymbolLSA";
+}
+
+// Shuffle address memory operaand [s7imm(reg) d-format]
+def shufaddr : Operand<iPTR> {
+  let PrintMethod = "printShufAddr";
+  let MIOperandInfo = (ops s7imm:$imm, ptr_rc:$reg);
+}
+
+// memory s10imm(reg) operand
+def dformaddr : Operand<iPTR> {
+  let PrintMethod = "printDFormAddr";
+  let MIOperandInfo = (ops s10imm:$imm, ptr_rc:$reg);
+}
+
+// 256K local store address
+// N.B.: The tblgen code generator expects to have two operands, an offset
+// and a pointer. Of these, only the immediate is actually used.
+def addr256k : Operand<iPTR> {
+  let PrintMethod = "printAddr256K";
+  let MIOperandInfo = (ops s16imm:$imm, ptr_rc:$reg);
+}
+
+// memory s18imm(reg) operand
+def memri18 : Operand<iPTR> {
+  let PrintMethod = "printMemRegImmS18";
+  let MIOperandInfo = (ops s18imm:$imm, ptr_rc:$reg);
+}
+
+// memory register + register operand
+def memrr : Operand<iPTR> {
+  let PrintMethod = "printMemRegReg";
+  let MIOperandInfo = (ops ptr_rc:$reg_a, ptr_rc:$reg_b);
+}
+
+// Define SPU-specific addressing modes: These come in three basic
+// flavors:
+//
+// D-form   : [r+I10] (10-bit signed offset + reg)
+// X-form   : [r+r]   (reg+reg)
+// A-form   : abs     (256K LSA offset)
+// D-form(2): [r+I7]  (7-bit signed offset + reg)
+
+def dform_addr   : ComplexPattern<iPTR, 2, "SelectDFormAddr",
+                                  [], [SDNPWantRoot]>;
+def xform_addr   : ComplexPattern<iPTR, 2, "SelectXFormAddr",
+                                  [], [SDNPWantRoot]>;
+def aform_addr   : ComplexPattern<iPTR, 2, "SelectAFormAddr",
+                                  [], [SDNPWantRoot]>;
+def dform2_addr  : ComplexPattern<iPTR, 2, "SelectDForm2Addr",
+                                  [], [SDNPWantRoot]>;
diff --git a/final/lib/Target/CellSPU/SPURegisterInfo.cpp b/final/lib/Target/CellSPU/SPURegisterInfo.cpp
new file mode 100644
index 00000000000..0bdd50ac79f
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -0,0 +1,371 @@
+//===- SPURegisterInfo.cpp - Cell SPU Register Information ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "SPU.h"
+#include "SPURegisterInfo.h"
+#include "SPURegisterNames.h"
+#include "SPUInstrBuilder.h"
+#include "SPUSubtarget.h"
+#include "SPUMachineFunction.h"
+#include "SPUFrameLowering.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+
+using namespace llvm;
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// PPC::F14, return the number that it corresponds to (e.g. 14).
+unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+  using namespace SPU;
+  switch (RegEnum) {
+  case SPU::R0: return 0;
+  case SPU::R1: return 1;
+  case SPU::R2: return 2;
+  case SPU::R3: return 3;
+  case SPU::R4: return 4;
+  case SPU::R5: return 5;
+  case SPU::R6: return 6;
+  case SPU::R7: return 7;
+  case SPU::R8: return 8;
+  case SPU::R9: return 9;
+  case SPU::R10: return 10;
+  case SPU::R11: return 11;
+  case SPU::R12: return 12;
+  case SPU::R13: return 13;
+  case SPU::R14: return 14;
+  case SPU::R15: return 15;
+  case SPU::R16: return 16;
+  case SPU::R17: return 17;
+  case SPU::R18: return 18;
+  case SPU::R19: return 19;
+  case SPU::R20: return 20;
+  case SPU::R21: return 21;
+  case SPU::R22: return 22;
+  case SPU::R23: return 23;
+  case SPU::R24: return 24;
+  case SPU::R25: return 25;
+  case SPU::R26: return 26;
+  case SPU::R27: return 27;
+  case SPU::R28: return 28;
+  case SPU::R29: return 29;
+  case SPU::R30: return 30;
+  case SPU::R31: return 31;
+  case SPU::R32: return 32;
+  case SPU::R33: return 33;
+  case SPU::R34: return 34;
+  case SPU::R35: return 35;
+  case SPU::R36: return 36;
+  case SPU::R37: return 37;
+  case SPU::R38: return 38;
+  case SPU::R39: return 39;
+  case SPU::R40: return 40;
+  case SPU::R41: return 41;
+  case SPU::R42: return 42;
+  case SPU::R43: return 43;
+  case SPU::R44: return 44;
+  case SPU::R45: return 45;
+  case SPU::R46: return 46;
+  case SPU::R47: return 47;
+  case SPU::R48: return 48;
+  case SPU::R49: return 49;
+  case SPU::R50: return 50;
+  case SPU::R51: return 51;
+  case SPU::R52: return 52;
+  case SPU::R53: return 53;
+  case SPU::R54: return 54;
+  case SPU::R55: return 55;
+  case SPU::R56: return 56;
+  case SPU::R57: return 57;
+  case SPU::R58: return 58;
+  case SPU::R59: return 59;
+  case SPU::R60: return 60;
+  case SPU::R61: return 61;
+  case SPU::R62: return 62;
+  case SPU::R63: return 63;
+  case SPU::R64: return 64;
+  case SPU::R65: return 65;
+  case SPU::R66: return 66;
+  case SPU::R67: return 67;
+  case SPU::R68: return 68;
+  case SPU::R69: return 69;
+  case SPU::R70: return 70;
+  case SPU::R71: return 71;
+  case SPU::R72: return 72;
+  case SPU::R73: return 73;
+  case SPU::R74: return 74;
+  case SPU::R75: return 75;
+  case SPU::R76: return 76;
+  case SPU::R77: return 77;
+  case SPU::R78: return 78;
+  case SPU::R79: return 79;
+  case SPU::R80: return 80;
+  case SPU::R81: return 81;
+  case SPU::R82: return 82;
+  case SPU::R83: return 83;
+  case SPU::R84: return 84;
+  case SPU::R85: return 85;
+  case SPU::R86: return 86;
+  case SPU::R87: return 87;
+  case SPU::R88: return 88;
+  case SPU::R89: return 89;
+  case SPU::R90: return 90;
+  case SPU::R91: return 91;
+  case SPU::R92: return 92;
+  case SPU::R93: return 93;
+  case SPU::R94: return 94;
+  case SPU::R95: return 95;
+  case SPU::R96: return 96;
+  case SPU::R97: return 97;
+  case SPU::R98: return 98;
+  case SPU::R99: return 99;
+  case SPU::R100: return 100;
+  case SPU::R101: return 101;
+  case SPU::R102: return 102;
+  case SPU::R103: return 103;
+  case SPU::R104: return 104;
+  case SPU::R105: return 105;
+  case SPU::R106: return 106;
+  case SPU::R107: return 107;
+  case SPU::R108: return 108;
+  case SPU::R109: return 109;
+  case SPU::R110: return 110;
+  case SPU::R111: return 111;
+  case SPU::R112: return 112;
+  case SPU::R113: return 113;
+  case SPU::R114: return 114;
+  case SPU::R115: return 115;
+  case SPU::R116: return 116;
+  case SPU::R117: return 117;
+  case SPU::R118: return 118;
+  case SPU::R119: return 119;
+  case SPU::R120: return 120;
+  case SPU::R121: return 121;
+  case SPU::R122: return 122;
+  case SPU::R123: return 123;
+  case SPU::R124: return 124;
+  case SPU::R125: return 125;
+  case SPU::R126: return 126;
+  case SPU::R127: return 127;
+  default:
+    report_fatal_error("Unhandled reg in SPURegisterInfo::getRegisterNumbering");
+  }
+}
+
+SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
+                                 const TargetInstrInfo &tii) :
+  SPUGenRegisterInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP),
+  Subtarget(subtarget),
+  TII(tii)
+{
+}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass *
+SPURegisterInfo::getPointerRegClass(unsigned Kind) const {
+  return &SPU::R32CRegClass;
+}
+
+const unsigned *
+SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
+{
+  // Cell ABI calling convention
+  static const unsigned SPU_CalleeSaveRegs[] = {
+    SPU::R80, SPU::R81, SPU::R82, SPU::R83,
+    SPU::R84, SPU::R85, SPU::R86, SPU::R87,
+    SPU::R88, SPU::R89, SPU::R90, SPU::R91,
+    SPU::R92, SPU::R93, SPU::R94, SPU::R95,
+    SPU::R96, SPU::R97, SPU::R98, SPU::R99,
+    SPU::R100, SPU::R101, SPU::R102, SPU::R103,
+    SPU::R104, SPU::R105, SPU::R106, SPU::R107,
+    SPU::R108, SPU::R109, SPU::R110, SPU::R111,
+    SPU::R112, SPU::R113, SPU::R114, SPU::R115,
+    SPU::R116, SPU::R117, SPU::R118, SPU::R119,
+    SPU::R120, SPU::R121, SPU::R122, SPU::R123,
+    SPU::R124, SPU::R125, SPU::R126, SPU::R127,
+    SPU::R2,    /* environment pointer */
+    SPU::R1,    /* stack pointer */
+    SPU::R0,    /* link register */
+    0 /* end */
+  };
+
+  return SPU_CalleeSaveRegs;
+}
+
+/*!
+ R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is
+ generally unused) are the Cell's reserved registers
+ */
+BitVector SPURegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  Reserved.set(SPU::R0);                // LR
+  Reserved.set(SPU::R1);                // SP
+  Reserved.set(SPU::R2);                // environment pointer
+  return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+//--------------------------------------------------------------------------
+void
+SPURegisterInfo::eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                               MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator I)
+  const
+{
+  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+  MBB.erase(I);
+}
+
+void
+SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                                     RegScavenger *RS) const
+{
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  DebugLoc dl = II->getDebugLoc();
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  MachineOperand &SPOp = MI.getOperand(i);
+  int FrameIndex = SPOp.getIndex();
+
+  // Now add the frame object offset to the offset from r1.
+  int Offset = MFI->getObjectOffset(FrameIndex);
+
+  // Most instructions, except for generated FrameIndex additions using AIr32
+  // and ILAr32, have the immediate in operand 1. AIr32 and ILAr32 have the
+  // immediate in operand 2.
+  unsigned OpNo = 1;
+  if (MI.getOpcode() == SPU::AIr32 || MI.getOpcode() == SPU::ILAr32)
+    OpNo = 2;
+
+  MachineOperand &MO = MI.getOperand(OpNo);
+
+  // Offset is biased by $lr's slot at the bottom.
+  Offset += MO.getImm() + MFI->getStackSize() + SPUFrameLowering::minStackSize();
+  assert((Offset & 0xf) == 0
+         && "16-byte alignment violated in eliminateFrameIndex");
+
+  // Replace the FrameIndex with base register with $sp (aka $r1)
+  SPOp.ChangeToRegister(SPU::R1, false);
+
+  // if 'Offset' doesn't fit to the D-form instruction's
+  // immediate, convert the instruction to X-form
+  // if the instruction is not an AI (which takes a s10 immediate), assume
+  // it is a load/store that can take a s14 immediate
+  if ((MI.getOpcode() == SPU::AIr32 && !isInt<10>(Offset))
+      || !isInt<14>(Offset)) {
+    int newOpcode = convertDFormToXForm(MI.getOpcode());
+    unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj);
+    BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg )
+        .addImm(Offset);
+    BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg())
+        .addReg(tmpReg, RegState::Kill)
+        .addReg(SPU::R1);
+    // remove the replaced D-form instruction
+    MBB.erase(II);
+  } else {
+    MO.ChangeToImmediate(Offset);
+  }
+}
+
+unsigned
+SPURegisterInfo::getRARegister() const
+{
+  return SPU::R0;
+}
+
+unsigned
+SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const
+{
+  return SPU::R1;
+}
+
+int
+SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  // FIXME: Most probably dwarf numbers differs for Linux and Darwin
+  return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+int
+SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
+{
+  switch(dFormOpcode)
+  {
+    case SPU::AIr32:     return SPU::Ar32;
+    case SPU::LQDr32:    return SPU::LQXr32;
+    case SPU::LQDr128:   return SPU::LQXr128;
+    case SPU::LQDv16i8:  return SPU::LQXv16i8;
+    case SPU::LQDv4i32:  return SPU::LQXv4i32;
+    case SPU::LQDv4f32:  return SPU::LQXv4f32;
+    case SPU::STQDr32:   return SPU::STQXr32;
+    case SPU::STQDr128:  return SPU::STQXr128;
+    case SPU::STQDv16i8: return SPU::STQXv16i8;
+    case SPU::STQDv4i32: return SPU::STQXv4i32;
+    case SPU::STQDv4f32: return SPU::STQXv4f32;
+
+    default: assert( false && "Unhandled D to X-form conversion");
+  }
+  // default will assert, but need to return something to keep the
+  // compiler happy.
+  return dFormOpcode;
+}
+
+// TODO this is already copied from PPC. Could this convenience function
+// be moved to the RegScavenger class?
+unsigned
+SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
+                                     RegScavenger *RS,
+                                     const TargetRegisterClass *RC,
+                                     int SPAdj) const
+{
+  assert(RS && "Register scavenging must be on");
+  unsigned Reg = RS->FindUnusedReg(RC);
+  if (Reg == 0)
+    Reg = RS->scavengeRegister(RC, II, SPAdj);
+  assert( Reg && "Register scavenger failed");
+  return Reg;
+}
+
+#include "SPUGenRegisterInfo.inc"
diff --git a/final/lib/Target/CellSPU/SPURegisterInfo.h b/final/lib/Target/CellSPU/SPURegisterInfo.h
new file mode 100644
index 00000000000..1708c598372
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPURegisterInfo.h
@@ -0,0 +1,104 @@
+//===- SPURegisterInfo.h - Cell SPU Register Information Impl ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Cell SPU implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_REGISTERINFO_H
+#define SPU_REGISTERINFO_H
+
+#include "SPU.h"
+#include "SPUGenRegisterInfo.h.inc"
+
+namespace llvm {
+  class SPUSubtarget;
+  class TargetInstrInfo;
+  class Type;
+
+  class SPURegisterInfo : public SPUGenRegisterInfo {
+  private:
+    const SPUSubtarget &Subtarget;
+    const TargetInstrInfo &TII;
+
+    //! Predicate: Does the machine function use the link register?
+    bool usesLR(MachineFunction &MF) const;
+
+  public:
+    SPURegisterInfo(const SPUSubtarget &subtarget, const TargetInstrInfo &tii);
+ 
+    //! Translate a register's enum value to a register number
+    /*!
+      This method translates a register's enum value to it's regiser number,
+      e.g. SPU::R14 -> 14.
+     */
+    static unsigned getRegisterNumbering(unsigned RegEnum);
+
+    /// getPointerRegClass - Return the register class to use to hold pointers.
+    /// This is used for addressing modes.
+    virtual const TargetRegisterClass *
+    getPointerRegClass(unsigned Kind = 0) const;
+
+    /// After allocating this many registers, the allocator should feel
+    /// register pressure. The value is a somewhat random guess, based on the
+    /// number of non callee saved registers in the C calling convention.
+    virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC,
+                                          MachineFunction &MF) const{
+      return 50;
+    }
+
+    //! Return the array of callee-saved registers
+    virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const;
+
+    //! Allow for scavenging, so we can get scratch registers when needed.
+    virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
+    { return true; }
+
+    //! Return the reserved registers
+    BitVector getReservedRegs(const MachineFunction &MF) const;
+
+    //! Eliminate the call frame setup pseudo-instructions
+    void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                       MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator I) const;
+    //! Convert frame indicies into machine operands
+    void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                             RegScavenger *RS = NULL) const;
+
+    //! Get return address register (LR, aka R0)
+    unsigned getRARegister() const;
+    //! Get the stack frame register (SP, aka R1)
+    unsigned getFrameRegister(const MachineFunction &MF) const;
+
+    //------------------------------------------------------------------------
+    // New methods added:
+    //------------------------------------------------------------------------
+
+    //! Get DWARF debugging register number
+    int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+    //! Convert D-form load/store to X-form load/store
+    /*!
+      Converts a regiser displacement load/store into a register-indexed
+      load/store for large stack frames, when the stack frame exceeds the
+      range of a s10 displacement.
+     */
+    int convertDFormToXForm(int dFormOpcode) const;
+
+    //! Acquire an unused register in an emergency.
+    unsigned findScratchRegister(MachineBasicBlock::iterator II,
+                                 RegScavenger *RS,
+                                 const TargetRegisterClass *RC, 
+                                 int SPAdj) const;
+    
+  };
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/CellSPU/SPURegisterInfo.td b/final/lib/Target/CellSPU/SPURegisterInfo.td
new file mode 100644
index 00000000000..3e8f0979256
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPURegisterInfo.td
@@ -0,0 +1,429 @@
+//===- SPURegisterInfo.td - The Cell SPU Register File -----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+class SPUReg<string n> : Register<n> {
+  let Namespace = "SPU";
+}
+
+// The SPU's register are all 128-bits wide, which makes specifying the
+// registers relatively easy, if relatively mundane:
+
+class SPUVecReg<bits<7> num, string n> : SPUReg<n> {
+  field bits<7> Num = num;
+}
+
+def R0 : SPUVecReg<0, "$lr">, DwarfRegNum<[0]>;
+def R1 : SPUVecReg<1, "$sp">, DwarfRegNum<[1]>;
+def R2 : SPUVecReg<2, "$2">, DwarfRegNum<[2]>;
+def R3 : SPUVecReg<3, "$3">, DwarfRegNum<[3]>;
+def R4 : SPUVecReg<4, "$4">, DwarfRegNum<[4]>;
+def R5 : SPUVecReg<5, "$5">, DwarfRegNum<[5]>;
+def R6 : SPUVecReg<6, "$6">, DwarfRegNum<[6]>;
+def R7 : SPUVecReg<7, "$7">, DwarfRegNum<[7]>;
+def R8 : SPUVecReg<8, "$8">, DwarfRegNum<[8]>;
+def R9 : SPUVecReg<9, "$9">, DwarfRegNum<[9]>;
+def R10 : SPUVecReg<10, "$10">, DwarfRegNum<[10]>;
+def R11 : SPUVecReg<11, "$11">, DwarfRegNum<[11]>;
+def R12 : SPUVecReg<12, "$12">, DwarfRegNum<[12]>;
+def R13 : SPUVecReg<13, "$13">, DwarfRegNum<[13]>;
+def R14 : SPUVecReg<14, "$14">, DwarfRegNum<[14]>;
+def R15 : SPUVecReg<15, "$15">, DwarfRegNum<[15]>;
+def R16 : SPUVecReg<16, "$16">, DwarfRegNum<[16]>;
+def R17 : SPUVecReg<17, "$17">, DwarfRegNum<[17]>;
+def R18 : SPUVecReg<18, "$18">, DwarfRegNum<[18]>;
+def R19 : SPUVecReg<19, "$19">, DwarfRegNum<[19]>;
+def R20 : SPUVecReg<20, "$20">, DwarfRegNum<[20]>;
+def R21 : SPUVecReg<21, "$21">, DwarfRegNum<[21]>;
+def R22 : SPUVecReg<22, "$22">, DwarfRegNum<[22]>;
+def R23 : SPUVecReg<23, "$23">, DwarfRegNum<[23]>;
+def R24 : SPUVecReg<24, "$24">, DwarfRegNum<[24]>;
+def R25 : SPUVecReg<25, "$25">, DwarfRegNum<[25]>;
+def R26 : SPUVecReg<26, "$26">, DwarfRegNum<[26]>;
+def R27 : SPUVecReg<27, "$27">, DwarfRegNum<[27]>;
+def R28 : SPUVecReg<28, "$28">, DwarfRegNum<[28]>;
+def R29 : SPUVecReg<29, "$29">, DwarfRegNum<[29]>;
+def R30 : SPUVecReg<30, "$30">, DwarfRegNum<[30]>;
+def R31 : SPUVecReg<31, "$31">, DwarfRegNum<[31]>;
+def R32 : SPUVecReg<32, "$32">, DwarfRegNum<[32]>;
+def R33 : SPUVecReg<33, "$33">, DwarfRegNum<[33]>;
+def R34 : SPUVecReg<34, "$34">, DwarfRegNum<[34]>;
+def R35 : SPUVecReg<35, "$35">, DwarfRegNum<[35]>;
+def R36 : SPUVecReg<36, "$36">, DwarfRegNum<[36]>;
+def R37 : SPUVecReg<37, "$37">, DwarfRegNum<[37]>;
+def R38 : SPUVecReg<38, "$38">, DwarfRegNum<[38]>;
+def R39 : SPUVecReg<39, "$39">, DwarfRegNum<[39]>;
+def R40 : SPUVecReg<40, "$40">, DwarfRegNum<[40]>;
+def R41 : SPUVecReg<41, "$41">, DwarfRegNum<[41]>;
+def R42 : SPUVecReg<42, "$42">, DwarfRegNum<[42]>;
+def R43 : SPUVecReg<43, "$43">, DwarfRegNum<[43]>;
+def R44 : SPUVecReg<44, "$44">, DwarfRegNum<[44]>;
+def R45 : SPUVecReg<45, "$45">, DwarfRegNum<[45]>;
+def R46 : SPUVecReg<46, "$46">, DwarfRegNum<[46]>;
+def R47 : SPUVecReg<47, "$47">, DwarfRegNum<[47]>;
+def R48 : SPUVecReg<48, "$48">, DwarfRegNum<[48]>;
+def R49 : SPUVecReg<49, "$49">, DwarfRegNum<[49]>;
+def R50 : SPUVecReg<50, "$50">, DwarfRegNum<[50]>;
+def R51 : SPUVecReg<51, "$51">, DwarfRegNum<[51]>;
+def R52 : SPUVecReg<52, "$52">, DwarfRegNum<[52]>;
+def R53 : SPUVecReg<53, "$53">, DwarfRegNum<[53]>;
+def R54 : SPUVecReg<54, "$54">, DwarfRegNum<[54]>;
+def R55 : SPUVecReg<55, "$55">, DwarfRegNum<[55]>;
+def R56 : SPUVecReg<56, "$56">, DwarfRegNum<[56]>;
+def R57 : SPUVecReg<57, "$57">, DwarfRegNum<[57]>;
+def R58 : SPUVecReg<58, "$58">, DwarfRegNum<[58]>;
+def R59 : SPUVecReg<59, "$59">, DwarfRegNum<[59]>;
+def R60 : SPUVecReg<60, "$60">, DwarfRegNum<[60]>;
+def R61 : SPUVecReg<61, "$61">, DwarfRegNum<[61]>;
+def R62 : SPUVecReg<62, "$62">, DwarfRegNum<[62]>;
+def R63 : SPUVecReg<63, "$63">, DwarfRegNum<[63]>;
+def R64 : SPUVecReg<64, "$64">, DwarfRegNum<[64]>;
+def R65 : SPUVecReg<65, "$65">, DwarfRegNum<[65]>;
+def R66 : SPUVecReg<66, "$66">, DwarfRegNum<[66]>;
+def R67 : SPUVecReg<67, "$67">, DwarfRegNum<[67]>;
+def R68 : SPUVecReg<68, "$68">, DwarfRegNum<[68]>;
+def R69 : SPUVecReg<69, "$69">, DwarfRegNum<[69]>;
+def R70 : SPUVecReg<70, "$70">, DwarfRegNum<[70]>;
+def R71 : SPUVecReg<71, "$71">, DwarfRegNum<[71]>;
+def R72 : SPUVecReg<72, "$72">, DwarfRegNum<[72]>;
+def R73 : SPUVecReg<73, "$73">, DwarfRegNum<[73]>;
+def R74 : SPUVecReg<74, "$74">, DwarfRegNum<[74]>;
+def R75 : SPUVecReg<75, "$75">, DwarfRegNum<[75]>;
+def R76 : SPUVecReg<76, "$76">, DwarfRegNum<[76]>;
+def R77 : SPUVecReg<77, "$77">, DwarfRegNum<[77]>;
+def R78 : SPUVecReg<78, "$78">, DwarfRegNum<[78]>;
+def R79 : SPUVecReg<79, "$79">, DwarfRegNum<[79]>;
+def R80 : SPUVecReg<80, "$80">, DwarfRegNum<[80]>;
+def R81 : SPUVecReg<81, "$81">, DwarfRegNum<[81]>;
+def R82 : SPUVecReg<82, "$82">, DwarfRegNum<[82]>;
+def R83 : SPUVecReg<83, "$83">, DwarfRegNum<[83]>;
+def R84 : SPUVecReg<84, "$84">, DwarfRegNum<[84]>;
+def R85 : SPUVecReg<85, "$85">, DwarfRegNum<[85]>;
+def R86 : SPUVecReg<86, "$86">, DwarfRegNum<[86]>;
+def R87 : SPUVecReg<87, "$87">, DwarfRegNum<[87]>;
+def R88 : SPUVecReg<88, "$88">, DwarfRegNum<[88]>;
+def R89 : SPUVecReg<89, "$89">, DwarfRegNum<[89]>;
+def R90 : SPUVecReg<90, "$90">, DwarfRegNum<[90]>;
+def R91 : SPUVecReg<91, "$91">, DwarfRegNum<[91]>;
+def R92 : SPUVecReg<92, "$92">, DwarfRegNum<[92]>;
+def R93 : SPUVecReg<93, "$93">, DwarfRegNum<[93]>;
+def R94 : SPUVecReg<94, "$94">, DwarfRegNum<[94]>;
+def R95 : SPUVecReg<95, "$95">, DwarfRegNum<[95]>;
+def R96 : SPUVecReg<96, "$96">, DwarfRegNum<[96]>;
+def R97 : SPUVecReg<97, "$97">, DwarfRegNum<[97]>;
+def R98 : SPUVecReg<98, "$98">, DwarfRegNum<[98]>;
+def R99 : SPUVecReg<99, "$99">, DwarfRegNum<[99]>;
+def R100 : SPUVecReg<100, "$100">, DwarfRegNum<[100]>;
+def R101 : SPUVecReg<101, "$101">, DwarfRegNum<[101]>;
+def R102 : SPUVecReg<102, "$102">, DwarfRegNum<[102]>;
+def R103 : SPUVecReg<103, "$103">, DwarfRegNum<[103]>;
+def R104 : SPUVecReg<104, "$104">, DwarfRegNum<[104]>;
+def R105 : SPUVecReg<105, "$105">, DwarfRegNum<[105]>;
+def R106 : SPUVecReg<106, "$106">, DwarfRegNum<[106]>;
+def R107 : SPUVecReg<107, "$107">, DwarfRegNum<[107]>;
+def R108 : SPUVecReg<108, "$108">, DwarfRegNum<[108]>;
+def R109 : SPUVecReg<109, "$109">, DwarfRegNum<[109]>;
+def R110 : SPUVecReg<110, "$110">, DwarfRegNum<[110]>;
+def R111 : SPUVecReg<111, "$111">, DwarfRegNum<[111]>;
+def R112 : SPUVecReg<112, "$112">, DwarfRegNum<[112]>;
+def R113 : SPUVecReg<113, "$113">, DwarfRegNum<[113]>;
+def R114 : SPUVecReg<114, "$114">, DwarfRegNum<[114]>;
+def R115 : SPUVecReg<115, "$115">, DwarfRegNum<[115]>;
+def R116 : SPUVecReg<116, "$116">, DwarfRegNum<[116]>;
+def R117 : SPUVecReg<117, "$117">, DwarfRegNum<[117]>;
+def R118 : SPUVecReg<118, "$118">, DwarfRegNum<[118]>;
+def R119 : SPUVecReg<119, "$119">, DwarfRegNum<[119]>;
+def R120 : SPUVecReg<120, "$120">, DwarfRegNum<[120]>;
+def R121 : SPUVecReg<121, "$121">, DwarfRegNum<[121]>;
+def R122 : SPUVecReg<122, "$122">, DwarfRegNum<[122]>;
+def R123 : SPUVecReg<123, "$123">, DwarfRegNum<[123]>;
+def R124 : SPUVecReg<124, "$124">, DwarfRegNum<[124]>;
+def R125 : SPUVecReg<125, "$125">, DwarfRegNum<[125]>;
+def R126 : SPUVecReg<126, "$126">, DwarfRegNum<[126]>;
+def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>;
+
+/* Need floating point status register here: */
+/* def FPCSR : ... */
+
+// The SPU's registers as 128-bit wide entities, and can function as general
+// purpose registers, where the operands are in the "preferred slot":
+def GPRC : RegisterClass<"SPU", [i128], 128,
+ [
+   /* volatile register */
+   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
+   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+   R77, R78, R79,
+   /* non-volatile register: take hint from PPC and allocate in reverse order */
+   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+   R86, R85, R84, R83, R82, R81, R80, 
+   /* environment ptr, SP, LR */ 
+   R2, R1, R0 ]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GPRCClass::iterator
+    GPRCClass::allocation_order_begin(const MachineFunction &MF) const {
+      return begin();
+    }
+    GPRCClass::iterator
+    GPRCClass::allocation_order_end(const MachineFunction &MF) const {
+      return end()-3;  // don't allocate R2, R1, or R0 (envp, sp, lr)
+    }
+  }];
+}
+
+// The SPU's registers as 64-bit wide (double word integer) "preferred slot":
+def R64C : RegisterClass<"SPU", [i64], 128,
+ [
+   /* volatile register */
+   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
+   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+   R77, R78, R79,
+   /* non-volatile register: take hint from PPC and allocate in reverse order */
+   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+   R86, R85, R84, R83, R82, R81, R80, 
+   /* environment ptr, SP, LR */ 
+   R2, R1, R0 ]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    R64CClass::iterator
+    R64CClass::allocation_order_begin(const MachineFunction &MF) const {
+      return begin();
+    }
+    R64CClass::iterator
+    R64CClass::allocation_order_end(const MachineFunction &MF) const {
+      return end()-3;  // don't allocate R2, R1, or R0 (envp, sp, lr)
+    }
+  }];
+}
+
+// The SPU's registers as 64-bit wide (double word) FP "preferred slot":
+def R64FP : RegisterClass<"SPU", [f64], 128,
+ [
+   /* volatile register */
+   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
+   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+   R77, R78, R79,
+   /* non-volatile register: take hint from PPC and allocate in reverse order */
+   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+   R86, R85, R84, R83, R82, R81, R80, 
+   /* environment ptr, SP, LR */ 
+   R2, R1, R0 ]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    R64FPClass::iterator
+    R64FPClass::allocation_order_begin(const MachineFunction &MF) const {
+      return begin();
+    }
+    R64FPClass::iterator
+    R64FPClass::allocation_order_end(const MachineFunction &MF) const {
+      return end()-3;  // don't allocate R2, R1, or R0 (envp, sp, lr)
+    }
+  }];
+}
+
+// The SPU's registers as 32-bit wide (word) "preferred slot":
+def R32C : RegisterClass<"SPU", [i32], 128,
+ [
+   /* volatile register */
+   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
+   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+   R77, R78, R79,
+   /* non-volatile register: take hint from PPC and allocate in reverse order */
+   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+   R86, R85, R84, R83, R82, R81, R80, 
+   /* environment ptr, SP, LR */ 
+   R2, R1, R0 ]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    R32CClass::iterator
+    R32CClass::allocation_order_begin(const MachineFunction &MF) const {
+      return begin();
+    }
+    R32CClass::iterator
+    R32CClass::allocation_order_end(const MachineFunction &MF) const {
+      return end()-3;  // don't allocate R2, R1, or R0 (envp, sp, lr)
+    }
+  }];
+}
+
+// The SPU's registers as single precision floating point "preferred slot":
+def R32FP : RegisterClass<"SPU", [f32], 128,
+ [
+   /* volatile register */
+   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
+   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+   R77, R78, R79,
+   /* non-volatile register: take hint from PPC and allocate in reverse order */
+   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+   R86, R85, R84, R83, R82, R81, R80, 
+   /* environment ptr, SP, LR */ 
+   R2, R1, R0 ]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    R32FPClass::iterator
+    R32FPClass::allocation_order_begin(const MachineFunction &MF) const {
+      return begin();
+    }
+    R32FPClass::iterator
+    R32FPClass::allocation_order_end(const MachineFunction &MF) const {
+      return end()-3;  // don't allocate R2, R1, or R0 (envp, sp, lr)
+    }
+  }];
+}
+
+// The SPU's registers as 16-bit wide (halfword) "preferred slot":
+def R16C : RegisterClass<"SPU", [i16], 128,
+ [
+   /* volatile register */
+   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
+   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+   R77, R78, R79,
+   /* non-volatile register: take hint from PPC and allocate in reverse order */
+   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+   R86, R85, R84, R83, R82, R81, R80, 
+   /* environment ptr, SP, LR */ 
+   R2, R1, R0 ]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    R16CClass::iterator
+    R16CClass::allocation_order_begin(const MachineFunction &MF) const {
+      return begin();
+    }
+    R16CClass::iterator
+    R16CClass::allocation_order_end(const MachineFunction &MF) const {
+      return end()-3;  // don't allocate R2, R1, or R0 (envp, sp, lr)
+    }
+  }];
+}
+
+// The SPU's registers as 8-bit wide (byte) "preferred slot":
+def R8C : RegisterClass<"SPU", [i8], 128,
+ [
+   /* volatile register */
+   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
+   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+   R77, R78, R79,
+   /* non-volatile register: take hint from PPC and allocate in reverse order */
+   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+   R86, R85, R84, R83, R82, R81, R80, 
+   /* environment ptr, SP, LR */ 
+   R2, R1, R0 ]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    R8CClass::iterator
+    R8CClass::allocation_order_begin(const MachineFunction &MF) const {
+      return begin();
+    }
+    R8CClass::iterator
+    R8CClass::allocation_order_end(const MachineFunction &MF) const {
+      return end()-3;  // don't allocate R2, R1, or R0 (envp, sp, lr)
+    }
+  }];
+}
+
+// The SPU's registers as vector registers:
+def VECREG : RegisterClass<"SPU",
+                           [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64],
+                           128,
+ [
+   /* volatile register */
+   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
+   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
+   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
+   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
+   R77, R78, R79,
+   /* non-volatile register: take hint from PPC and allocate in reverse order */
+   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
+   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
+   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
+   R86, R85, R84, R83, R82, R81, R80, 
+   /* environment ptr, SP, LR */ 
+   R2, R1, R0 ]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    VECREGClass::iterator
+    VECREGClass::allocation_order_begin(const MachineFunction &MF) const {
+      return begin();
+    }
+    VECREGClass::iterator
+    VECREGClass::allocation_order_end(const MachineFunction &MF) const {
+      return end()-3;  // don't allocate R2, R1, or R0 (envp, sp, lr)
+    }
+  }];
+}
diff --git a/final/lib/Target/CellSPU/SPURegisterNames.h b/final/lib/Target/CellSPU/SPURegisterNames.h
new file mode 100644
index 00000000000..6c3afdf41fd
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPURegisterNames.h
@@ -0,0 +1,18 @@
+//===- SPURegisterNames.h - Wrapper header for SPU register names -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_REGISTER_NAMES_H
+#define SPU_REGISTER_NAMES_H
+
+// Define symbolic names for Cell registers.  This defines a mapping from
+// register name to register number.
+//
+#include "SPUGenRegisterNames.inc"
+
+#endif
diff --git a/final/lib/Target/CellSPU/SPUSchedule.td b/final/lib/Target/CellSPU/SPUSchedule.td
new file mode 100644
index 00000000000..9cd3c2327df
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUSchedule.td
@@ -0,0 +1,59 @@
+//===- SPUSchedule.td - Cell Scheduling Definitions --------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Even pipeline:
+
+def EVEN_UNIT : FuncUnit;       // Even execution unit: (PC & 0x7 == 000)
+def ODD_UNIT  : FuncUnit;       // Odd execution unit:  (PC & 0x7 == 100)
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for Cell SPU
+//===----------------------------------------------------------------------===//
+
+def LoadStore    : InstrItinClass;              // ODD_UNIT
+def BranchHints  : InstrItinClass;              // ODD_UNIT
+def BranchResolv : InstrItinClass;              // ODD_UNIT
+def ChanOpSPR    : InstrItinClass;              // ODD_UNIT
+def ShuffleOp    : InstrItinClass;              // ODD_UNIT
+def SelectOp     : InstrItinClass;              // ODD_UNIT
+def GatherOp     : InstrItinClass;              // ODD_UNIT
+def LoadNOP      : InstrItinClass;              // ODD_UNIT
+def ExecNOP      : InstrItinClass;              // EVEN_UNIT
+def SPrecFP      : InstrItinClass;              // EVEN_UNIT
+def DPrecFP      : InstrItinClass;              // EVEN_UNIT
+def FPInt        : InstrItinClass;              // EVEN_UNIT (FP<->integer)
+def ByteOp       : InstrItinClass;              // EVEN_UNIT
+def IntegerOp    : InstrItinClass;              // EVEN_UNIT
+def IntegerMulDiv: InstrItinClass;              // EVEN_UNIT
+def RotShiftVec  : InstrItinClass;              // EVEN_UNIT Inter vector
+def RotShiftQuad : InstrItinClass;              // ODD_UNIT Entire quad
+def ImmLoad      : InstrItinClass;              // EVEN_UNIT
+
+/* Note: The itinerary for the Cell SPU is somewhat contrived... */
+def SPUItineraries : ProcessorItineraries<[ODD_UNIT, EVEN_UNIT], [], [
+  InstrItinData<LoadStore   , [InstrStage<6,  [ODD_UNIT]>]>,
+  InstrItinData<BranchHints , [InstrStage<6,  [ODD_UNIT]>]>,
+  InstrItinData<BranchResolv, [InstrStage<4,  [ODD_UNIT]>]>,
+  InstrItinData<ChanOpSPR   , [InstrStage<6,  [ODD_UNIT]>]>,
+  InstrItinData<ShuffleOp   , [InstrStage<4,  [ODD_UNIT]>]>,
+  InstrItinData<SelectOp    , [InstrStage<4,  [ODD_UNIT]>]>,
+  InstrItinData<GatherOp    , [InstrStage<4,  [ODD_UNIT]>]>,
+  InstrItinData<LoadNOP     , [InstrStage<1,  [ODD_UNIT]>]>,
+  InstrItinData<ExecNOP     , [InstrStage<1,  [EVEN_UNIT]>]>,
+  InstrItinData<SPrecFP     , [InstrStage<6,  [EVEN_UNIT]>]>,
+  InstrItinData<DPrecFP     , [InstrStage<13, [EVEN_UNIT]>]>,
+  InstrItinData<FPInt       , [InstrStage<2,  [EVEN_UNIT]>]>,
+  InstrItinData<ByteOp      , [InstrStage<4,  [EVEN_UNIT]>]>,
+  InstrItinData<IntegerOp   , [InstrStage<2,  [EVEN_UNIT]>]>,
+  InstrItinData<RotShiftVec , [InstrStage<4,  [EVEN_UNIT]>]>, 
+  InstrItinData<RotShiftQuad, [InstrStage<4,  [ODD_UNIT]>]>,
+  InstrItinData<IntegerMulDiv,[InstrStage<7,  [EVEN_UNIT]>]>,
+  InstrItinData<ImmLoad     , [InstrStage<2,  [EVEN_UNIT]>]>
+  ]>;
diff --git a/final/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/final/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
new file mode 100644
index 00000000000..5732fd43cdc
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- SPUSelectionDAGInfo.cpp - CellSPU SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPUSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "cellspu-selectiondag-info"
+#include "SPUTargetMachine.h"
+using namespace llvm;
+
+SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+SPUSelectionDAGInfo::~SPUSelectionDAGInfo() {
+}
diff --git a/final/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/final/lib/Target/CellSPU/SPUSelectionDAGInfo.h
new file mode 100644
index 00000000000..39257d92c40
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- SPUSelectionDAGInfo.h - CellSPU SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CellSPU subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CELLSPUSELECTIONDAGINFO_H
+#define CELLSPUSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SPUTargetMachine;
+
+class SPUSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM);
+  ~SPUSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/CellSPU/SPUSubtarget.cpp b/final/lib/Target/CellSPU/SPUSubtarget.cpp
new file mode 100644
index 00000000000..07c8352fba9
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -0,0 +1,57 @@
+//===- SPUSubtarget.cpp - STI Cell SPU Subtarget Information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CellSPU-specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUSubtarget.h"
+#include "SPU.h"
+#include "SPUGenSubtarget.inc"
+#include "llvm/ADT/SmallVector.h"
+#include "SPURegisterInfo.h"
+
+using namespace llvm;
+
+SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &FS) :
+  StackAlignment(16),
+  ProcDirective(SPU::DEFAULT_PROC),
+  UseLargeMem(false)
+{
+  // Should be the target SPU processor type. For now, since there's only
+  // one, simply default to the current "v0" default:
+  std::string default_cpu("v0");
+
+  // Parse features string.
+  ParseSubtargetFeatures(FS, default_cpu);
+}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void SPUSubtarget::SetJITMode() {
+}
+
+/// Enable PostRA scheduling for optimization levels -O2 and -O3.
+bool SPUSubtarget::enablePostRAScheduler(
+                       CodeGenOpt::Level OptLevel,
+                       TargetSubtarget::AntiDepBreakMode& Mode,
+                       RegClassVector& CriticalPathRCs) const {
+  Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+  // CriticalPathsRCs seems to be the set of
+  // RegisterClasses that antidep breakings are performed for.
+  // Do it for all register classes 
+  CriticalPathRCs.clear();
+  CriticalPathRCs.push_back(&SPU::R8CRegClass);
+  CriticalPathRCs.push_back(&SPU::R16CRegClass);
+  CriticalPathRCs.push_back(&SPU::R32CRegClass);
+  CriticalPathRCs.push_back(&SPU::R32FPRegClass);
+  CriticalPathRCs.push_back(&SPU::R64CRegClass);
+  CriticalPathRCs.push_back(&SPU::VECREGRegClass);
+  return OptLevel >= CodeGenOpt::Default;
+}
diff --git a/final/lib/Target/CellSPU/SPUSubtarget.h b/final/lib/Target/CellSPU/SPUSubtarget.h
new file mode 100644
index 00000000000..d7929302f08
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUSubtarget.h
@@ -0,0 +1,94 @@
+//===-- SPUSubtarget.h - Define Subtarget for the Cell SPU ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Cell SPU-specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CELLSUBTARGET_H
+#define CELLSUBTARGET_H
+
+#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+namespace llvm {
+  class GlobalValue;
+
+  namespace SPU {
+    enum {
+      PROC_NONE,
+      DEFAULT_PROC
+    };
+  }
+    
+  class SPUSubtarget : public TargetSubtarget {
+  protected:
+    /// stackAlignment - The minimum alignment known to hold of the stack frame
+    /// on entry to the function and which must be maintained by every function.
+    unsigned StackAlignment;
+    
+    /// Selected instruction itineraries (one entry per itinerary class.)
+    InstrItineraryData InstrItins;
+
+    /// Which SPU processor (this isn't really used, but it's there to keep
+    /// the C compiler happy)
+    unsigned ProcDirective;
+
+    /// Use (assume) large memory -- effectively disables the LQA/STQA
+    /// instructions that assume 259K local store.
+    bool UseLargeMem;
+    
+  public:
+    /// This constructor initializes the data members to match that
+    /// of the specified triple.
+    ///
+    SPUSubtarget(const std::string &TT, const std::string &FS);
+    
+    /// ParseSubtargetFeatures - Parses features string setting specified 
+    /// subtarget options.  Definition of function is auto generated by tblgen.
+    std::string ParseSubtargetFeatures(const std::string &FS,
+                                       const std::string &CPU);
+
+    /// SetJITMode - This is called to inform the subtarget info that we are
+    /// producing code for the JIT.
+    void SetJITMode();
+
+    /// getStackAlignment - Returns the minimum alignment known to hold of the
+    /// stack frame on entry to the function and which must be maintained by
+    /// every function for this subtarget.
+    unsigned getStackAlignment() const { return StackAlignment; }
+    
+    /// getInstrItins - Return the instruction itineraies based on subtarget 
+    /// selection.
+    const InstrItineraryData &getInstrItineraryData() const {
+      return InstrItins;
+    }
+
+    /// Use large memory addressing predicate
+    bool usingLargeMem() const {
+      return UseLargeMem;
+    }
+
+    /// getTargetDataString - Return the pointer size and type alignment
+    /// properties of this subtarget.
+    const char *getTargetDataString() const {
+      return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128"
+             "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:64:128-v128:128:128"
+             "-s:128:128-n32:64";
+    }
+
+    bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
+                               TargetSubtarget::AntiDepBreakMode& Mode,
+                               RegClassVector& CriticalPathRCs) const;
+  };
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/CellSPU/SPUTargetMachine.cpp b/final/lib/Target/CellSPU/SPUTargetMachine.cpp
new file mode 100644
index 00000000000..3ed73613a31
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -0,0 +1,70 @@
+//===-- SPUTargetMachine.cpp - Define TargetMachine for Cell SPU ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the Cell SPU target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "SPURegisterNames.h"
+#include "SPUMCAsmInfo.h"
+#include "SPUTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/SchedulerRegistry.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+extern "C" void LLVMInitializeCellSPUTarget() { 
+  // Register the target.
+  RegisterTargetMachine<SPUTargetMachine> X(TheCellSPUTarget);
+  RegisterAsmInfo<SPULinuxMCAsmInfo> Y(TheCellSPUTarget);
+}
+
+const std::pair<unsigned, int> *
+SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
+  NumEntries = 1;
+  return &LR[0];
+}
+
+SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &FS)
+  : LLVMTargetMachine(T, TT),
+    Subtarget(TT, FS),
+    DataLayout(Subtarget.getTargetDataString()),
+    InstrInfo(*this),
+    FrameLowering(Subtarget),
+    TLInfo(*this),
+    TSInfo(*this),
+    InstrItins(Subtarget.getInstrItineraryData()) {
+  // For the time being, use static relocations, since there's really no
+  // support for PIC yet.
+  setRelocationModel(Reloc::Static);
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool SPUTargetMachine::addInstSelector(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
+  // Install an instruction selector.
+  PM.add(createSPUISelDag(*this));
+  return false;
+}
+
+// passes to run just before printing the assembly
+bool SPUTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) 
+{
+  //align instructions with nops/lnops for dual issue
+  PM.add(createSPUNopFillerPass(*this));
+  return true;
+}
diff --git a/final/lib/Target/CellSPU/SPUTargetMachine.h b/final/lib/Target/CellSPU/SPUTargetMachine.h
new file mode 100644
index 00000000000..75abd5eb3fc
--- /dev/null
+++ b/final/lib/Target/CellSPU/SPUTargetMachine.h
@@ -0,0 +1,90 @@
+//===-- SPUTargetMachine.h - Define TargetMachine for Cell SPU ----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CellSPU-specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPU_TARGETMACHINE_H
+#define SPU_TARGETMACHINE_H
+
+#include "SPUSubtarget.h"
+#include "SPUInstrInfo.h"
+#include "SPUISelLowering.h"
+#include "SPUSelectionDAGInfo.h"
+#include "SPUFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+class PassManager;
+class GlobalValue;
+class TargetFrameLowering;
+
+/// SPUTargetMachine
+///
+class SPUTargetMachine : public LLVMTargetMachine {
+  SPUSubtarget        Subtarget;
+  const TargetData    DataLayout;
+  SPUInstrInfo        InstrInfo;
+  SPUFrameLowering    FrameLowering;
+  SPUTargetLowering   TLInfo;
+  SPUSelectionDAGInfo TSInfo;
+  InstrItineraryData  InstrItins;
+public:
+  SPUTargetMachine(const Target &T, const std::string &TT,
+                   const std::string &FS);
+
+  /// Return the subtarget implementation object
+  virtual const SPUSubtarget     *getSubtargetImpl() const {
+    return &Subtarget;
+  }
+  virtual const SPUInstrInfo     *getInstrInfo() const {
+    return &InstrInfo;
+  }
+  virtual const SPUFrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  /*!
+    \note Cell SPU does not support JIT today. It could support JIT at some
+    point.
+   */
+  virtual       TargetJITInfo    *getJITInfo() {
+    return NULL;
+  }
+
+  virtual const SPUTargetLowering *getTargetLowering() const { 
+   return &TLInfo;
+  }
+
+  virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  virtual const SPURegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+  
+  virtual const TargetData *getTargetData() const {
+    return &DataLayout;
+  }
+
+  virtual const InstrItineraryData *getInstrItineraryData() const {
+    return &InstrItins;
+  }
+  
+  // Pass Pipeline Configuration
+  virtual bool addInstSelector(PassManagerBase &PM,
+                               CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &, CodeGenOpt::Level);	
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/CellSPU/TargetInfo/CMakeLists.txt b/final/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..928d0fe97e0
--- /dev/null
+++ b/final/lib/Target/CellSPU/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMCellSPUInfo
+  CellSPUTargetInfo.cpp
+  )
+
+add_dependencies(LLVMCellSPUInfo CellSPUCodeGenTable_gen)
diff --git a/final/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp b/final/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
new file mode 100644
index 00000000000..049ea236e99
--- /dev/null
+++ b/final/lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- CellSPUTargetInfo.cpp - CellSPU Target Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPU.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCellSPUTarget;
+
+extern "C" void LLVMInitializeCellSPUTargetInfo() { 
+  RegisterTarget<Triple::cellspu> 
+    X(TheCellSPUTarget, "cellspu", "STI CBEA Cell SPU [experimental]");
+}
diff --git a/final/lib/Target/CellSPU/TargetInfo/Makefile b/final/lib/Target/CellSPU/TargetInfo/Makefile
new file mode 100644
index 00000000000..9cb6827b432
--- /dev/null
+++ b/final/lib/Target/CellSPU/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/CellSPU/TargetInfo/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMCellSPUInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/CppBackend/CMakeLists.txt b/final/lib/Target/CppBackend/CMakeLists.txt
new file mode 100644
index 00000000000..e9375599511
--- /dev/null
+++ b/final/lib/Target/CppBackend/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_target(CppBackend
+  CPPBackend.cpp
+  )
+
+add_subdirectory(TargetInfo)
diff --git a/final/lib/Target/CppBackend/CPPBackend.cpp b/final/lib/Target/CppBackend/CPPBackend.cpp
new file mode 100644
index 00000000000..71d6049c8a1
--- /dev/null
+++ b/final/lib/Target/CppBackend/CPPBackend.cpp
@@ -0,0 +1,2056 @@
+//===-- CPPBackend.cpp - Library for converting LLVM code to C++ code -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the writing of the LLVM IR as a set of C++ calls to the
+// LLVM IR interface. The input module is assumed to be verified.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CPPTargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instruction.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Config/config.h"
+#include <algorithm>
+#include <set>
+
+using namespace llvm;
+
+static cl::opt<std::string>
+FuncName("cppfname", cl::desc("Specify the name of the generated function"),
+         cl::value_desc("function name"));
+
+enum WhatToGenerate {
+  GenProgram,
+  GenModule,
+  GenContents,
+  GenFunction,
+  GenFunctions,
+  GenInline,
+  GenVariable,
+  GenType
+};
+
+static cl::opt<WhatToGenerate> GenerationType("cppgen", cl::Optional,
+  cl::desc("Choose what kind of output to generate"),
+  cl::init(GenProgram),
+  cl::values(
+    clEnumValN(GenProgram,  "program",   "Generate a complete program"),
+    clEnumValN(GenModule,   "module",    "Generate a module definition"),
+    clEnumValN(GenContents, "contents",  "Generate contents of a module"),
+    clEnumValN(GenFunction, "function",  "Generate a function definition"),
+    clEnumValN(GenFunctions,"functions", "Generate all function definitions"),
+    clEnumValN(GenInline,   "inline",    "Generate an inline function"),
+    clEnumValN(GenVariable, "variable",  "Generate a variable definition"),
+    clEnumValN(GenType,     "type",      "Generate a type definition"),
+    clEnumValEnd
+  )
+);
+
+static cl::opt<std::string> NameToGenerate("cppfor", cl::Optional,
+  cl::desc("Specify the name of the thing to generate"),
+  cl::init("!bad!"));
+
+extern "C" void LLVMInitializeCppBackendTarget() {
+  // Register the target.
+  RegisterTargetMachine<CPPTargetMachine> X(TheCppBackendTarget);
+}
+
+namespace {
+  typedef std::vector<const Type*> TypeList;
+  typedef std::map<const Type*,std::string> TypeMap;
+  typedef std::map<const Value*,std::string> ValueMap;
+  typedef std::set<std::string> NameSet;
+  typedef std::set<const Type*> TypeSet;
+  typedef std::set<const Value*> ValueSet;
+  typedef std::map<const Value*,std::string> ForwardRefMap;
+
+  /// CppWriter - This class is the main chunk of code that converts an LLVM
+  /// module to a C++ translation unit.
+  class CppWriter : public ModulePass {
+    formatted_raw_ostream &Out;
+    const Module *TheModule;
+    uint64_t uniqueNum;
+    TypeMap TypeNames;
+    ValueMap ValueNames;
+    TypeMap UnresolvedTypes;
+    TypeList TypeStack;
+    NameSet UsedNames;
+    TypeSet DefinedTypes;
+    ValueSet DefinedValues;
+    ForwardRefMap ForwardRefs;
+    bool is_inline;
+    unsigned indent_level;
+
+  public:
+    static char ID;
+    explicit CppWriter(formatted_raw_ostream &o) :
+      ModulePass(ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
+
+    virtual const char *getPassName() const { return "C++ backend"; }
+
+    bool runOnModule(Module &M);
+
+    void printProgram(const std::string& fname, const std::string& modName );
+    void printModule(const std::string& fname, const std::string& modName );
+    void printContents(const std::string& fname, const std::string& modName );
+    void printFunction(const std::string& fname, const std::string& funcName );
+    void printFunctions();
+    void printInline(const std::string& fname, const std::string& funcName );
+    void printVariable(const std::string& fname, const std::string& varName );
+    void printType(const std::string& fname, const std::string& typeName );
+
+    void error(const std::string& msg);
+
+    
+    formatted_raw_ostream& nl(formatted_raw_ostream &Out, int delta = 0);
+    inline void in() { indent_level++; }
+    inline void out() { if (indent_level >0) indent_level--; }
+    
+  private:
+    void printLinkageType(GlobalValue::LinkageTypes LT);
+    void printVisibilityType(GlobalValue::VisibilityTypes VisTypes);
+    void printCallingConv(CallingConv::ID cc);
+    void printEscapedString(const std::string& str);
+    void printCFP(const ConstantFP* CFP);
+
+    std::string getCppName(const Type* val);
+    inline void printCppName(const Type* val);
+
+    std::string getCppName(const Value* val);
+    inline void printCppName(const Value* val);
+
+    void printAttributes(const AttrListPtr &PAL, const std::string &name);
+    bool printTypeInternal(const Type* Ty);
+    inline void printType(const Type* Ty);
+    void printTypes(const Module* M);
+
+    void printConstant(const Constant *CPV);
+    void printConstants(const Module* M);
+
+    void printVariableUses(const GlobalVariable *GV);
+    void printVariableHead(const GlobalVariable *GV);
+    void printVariableBody(const GlobalVariable *GV);
+
+    void printFunctionUses(const Function *F);
+    void printFunctionHead(const Function *F);
+    void printFunctionBody(const Function *F);
+    void printInstruction(const Instruction *I, const std::string& bbname);
+    std::string getOpName(Value*);
+
+    void printModuleBody();
+  };
+} // end anonymous namespace.
+
+formatted_raw_ostream &CppWriter::nl(formatted_raw_ostream &Out, int delta) {
+  Out << '\n';
+  if (delta >= 0 || indent_level >= unsigned(-delta))
+    indent_level += delta;
+  Out.indent(indent_level);
+  return Out;
+}
+
+static inline void sanitize(std::string &str) {
+  for (size_t i = 0; i < str.length(); ++i)
+    if (!isalnum(str[i]) && str[i] != '_')
+      str[i] = '_';
+}
+
+static std::string getTypePrefix(const Type *Ty) {
+  switch (Ty->getTypeID()) {
+  case Type::VoidTyID:     return "void_";
+  case Type::IntegerTyID:
+    return "int" + utostr(cast<IntegerType>(Ty)->getBitWidth()) + "_";
+  case Type::FloatTyID:    return "float_";
+  case Type::DoubleTyID:   return "double_";
+  case Type::LabelTyID:    return "label_";
+  case Type::FunctionTyID: return "func_";
+  case Type::StructTyID:   return "struct_";
+  case Type::ArrayTyID:    return "array_";
+  case Type::PointerTyID:  return "ptr_";
+  case Type::VectorTyID:   return "packed_";
+  case Type::OpaqueTyID:   return "opaque_";
+  default:                 return "other_";
+  }
+  return "unknown_";
+}
+
+// Looks up the type in the symbol table and returns a pointer to its name or
+// a null pointer if it wasn't found. Note that this isn't the same as the
+// Mode::getTypeName function which will return an empty string, not a null
+// pointer if the name is not found.
+static const std::string *
+findTypeName(const TypeSymbolTable& ST, const Type* Ty) {
+  TypeSymbolTable::const_iterator TI = ST.begin();
+  TypeSymbolTable::const_iterator TE = ST.end();
+  for (;TI != TE; ++TI)
+    if (TI->second == Ty)
+      return &(TI->first);
+  return 0;
+}
+
+void CppWriter::error(const std::string& msg) {
+  report_fatal_error(msg);
+}
+
+// printCFP - Print a floating point constant .. very carefully :)
+// This makes sure that conversion to/from floating yields the same binary
+// result so that we don't lose precision.
+void CppWriter::printCFP(const ConstantFP *CFP) {
+  bool ignored;
+  APFloat APF = APFloat(CFP->getValueAPF());  // copy
+  if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
+    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+  Out << "ConstantFP::get(mod->getContext(), ";
+  Out << "APFloat(";
+#if HAVE_PRINTF_A
+  char Buffer[100];
+  sprintf(Buffer, "%A", APF.convertToDouble());
+  if ((!strncmp(Buffer, "0x", 2) ||
+       !strncmp(Buffer, "-0x", 3) ||
+       !strncmp(Buffer, "+0x", 3)) &&
+      APF.bitwiseIsEqual(APFloat(atof(Buffer)))) {
+    if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+      Out << "BitsToDouble(" << Buffer << ")";
+    else
+      Out << "BitsToFloat((float)" << Buffer << ")";
+    Out << ")";
+  } else {
+#endif
+    std::string StrVal = ftostr(CFP->getValueAPF());
+
+    while (StrVal[0] == ' ')
+      StrVal.erase(StrVal.begin());
+
+    // Check to make sure that the stringized number is not some string like
+    // "Inf" or NaN.  Check that the string matches the "[-+]?[0-9]" regex.
+    if (((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+         ((StrVal[0] == '-' || StrVal[0] == '+') &&
+          (StrVal[1] >= '0' && StrVal[1] <= '9'))) &&
+        (CFP->isExactlyValue(atof(StrVal.c_str())))) {
+      if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+        Out <<  StrVal;
+      else
+        Out << StrVal << "f";
+    } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+      Out << "BitsToDouble(0x"
+          << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue())
+          << "ULL) /* " << StrVal << " */";
+    else
+      Out << "BitsToFloat(0x"
+          << utohexstr((uint32_t)CFP->getValueAPF().
+                                      bitcastToAPInt().getZExtValue())
+          << "U) /* " << StrVal << " */";
+    Out << ")";
+#if HAVE_PRINTF_A
+  }
+#endif
+  Out << ")";
+}
+
+void CppWriter::printCallingConv(CallingConv::ID cc){
+  // Print the calling convention.
+  switch (cc) {
+  case CallingConv::C:     Out << "CallingConv::C"; break;
+  case CallingConv::Fast:  Out << "CallingConv::Fast"; break;
+  case CallingConv::Cold:  Out << "CallingConv::Cold"; break;
+  case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break;
+  default:                 Out << cc; break;
+  }
+}
+
+void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
+  switch (LT) {
+  case GlobalValue::InternalLinkage:
+    Out << "GlobalValue::InternalLinkage"; break;
+  case GlobalValue::PrivateLinkage:
+    Out << "GlobalValue::PrivateLinkage"; break;
+  case GlobalValue::LinkerPrivateLinkage:
+    Out << "GlobalValue::LinkerPrivateLinkage"; break;
+  case GlobalValue::LinkerPrivateWeakLinkage:
+    Out << "GlobalValue::LinkerPrivateWeakLinkage"; break;
+  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+    Out << "GlobalValue::LinkerPrivateWeakDefAutoLinkage"; break;
+  case GlobalValue::AvailableExternallyLinkage:
+    Out << "GlobalValue::AvailableExternallyLinkage "; break;
+  case GlobalValue::LinkOnceAnyLinkage:
+    Out << "GlobalValue::LinkOnceAnyLinkage "; break;
+  case GlobalValue::LinkOnceODRLinkage:
+    Out << "GlobalValue::LinkOnceODRLinkage "; break;
+  case GlobalValue::WeakAnyLinkage:
+    Out << "GlobalValue::WeakAnyLinkage"; break;
+  case GlobalValue::WeakODRLinkage:
+    Out << "GlobalValue::WeakODRLinkage"; break;
+  case GlobalValue::AppendingLinkage:
+    Out << "GlobalValue::AppendingLinkage"; break;
+  case GlobalValue::ExternalLinkage:
+    Out << "GlobalValue::ExternalLinkage"; break;
+  case GlobalValue::DLLImportLinkage:
+    Out << "GlobalValue::DLLImportLinkage"; break;
+  case GlobalValue::DLLExportLinkage:
+    Out << "GlobalValue::DLLExportLinkage"; break;
+  case GlobalValue::ExternalWeakLinkage:
+    Out << "GlobalValue::ExternalWeakLinkage"; break;
+  case GlobalValue::CommonLinkage:
+    Out << "GlobalValue::CommonLinkage"; break;
+  }
+}
+
+void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) {
+  switch (VisType) {
+  default: llvm_unreachable("Unknown GVar visibility");
+  case GlobalValue::DefaultVisibility:
+    Out << "GlobalValue::DefaultVisibility";
+    break;
+  case GlobalValue::HiddenVisibility:
+    Out << "GlobalValue::HiddenVisibility";
+    break;
+  case GlobalValue::ProtectedVisibility:
+    Out << "GlobalValue::ProtectedVisibility";
+    break;
+  }
+}
+
+// printEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+void CppWriter::printEscapedString(const std::string &Str) {
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    unsigned char C = Str[i];
+    if (isprint(C) && C != '"' && C != '\\') {
+      Out << C;
+    } else {
+      Out << "\\x"
+          << (char) ((C/16  < 10) ? ( C/16 +'0') : ( C/16 -10+'A'))
+          << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
+    }
+  }
+}
+
+std::string CppWriter::getCppName(const Type* Ty) {
+  // First, handle the primitive types .. easy
+  if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
+    switch (Ty->getTypeID()) {
+    case Type::VoidTyID:   return "Type::getVoidTy(mod->getContext())";
+    case Type::IntegerTyID: {
+      unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth();
+      return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")";
+    }
+    case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())";
+    case Type::FloatTyID:    return "Type::getFloatTy(mod->getContext())";
+    case Type::DoubleTyID:   return "Type::getDoubleTy(mod->getContext())";
+    case Type::LabelTyID:    return "Type::getLabelTy(mod->getContext())";
+    case Type::X86_MMXTyID:  return "Type::getX86_MMXTy(mod->getContext())";
+    default:
+      error("Invalid primitive type");
+      break;
+    }
+    // shouldn't be returned, but make it sensible
+    return "Type::getVoidTy(mod->getContext())";
+  }
+
+  // Now, see if we've seen the type before and return that
+  TypeMap::iterator I = TypeNames.find(Ty);
+  if (I != TypeNames.end())
+    return I->second;
+
+  // Okay, let's build a new name for this type. Start with a prefix
+  const char* prefix = 0;
+  switch (Ty->getTypeID()) {
+  case Type::FunctionTyID:    prefix = "FuncTy_"; break;
+  case Type::StructTyID:      prefix = "StructTy_"; break;
+  case Type::ArrayTyID:       prefix = "ArrayTy_"; break;
+  case Type::PointerTyID:     prefix = "PointerTy_"; break;
+  case Type::OpaqueTyID:      prefix = "OpaqueTy_"; break;
+  case Type::VectorTyID:      prefix = "VectorTy_"; break;
+  default:                    prefix = "OtherTy_"; break; // prevent breakage
+  }
+
+  // See if the type has a name in the symboltable and build accordingly
+  const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty);
+  std::string name;
+  if (tName)
+    name = std::string(prefix) + *tName;
+  else
+    name = std::string(prefix) + utostr(uniqueNum++);
+  sanitize(name);
+
+  // Save the name
+  return TypeNames[Ty] = name;
+}
+
+void CppWriter::printCppName(const Type* Ty) {
+  printEscapedString(getCppName(Ty));
+}
+
+std::string CppWriter::getCppName(const Value* val) {
+  std::string name;
+  ValueMap::iterator I = ValueNames.find(val);
+  if (I != ValueNames.end() && I->first == val)
+    return  I->second;
+
+  if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) {
+    name = std::string("gvar_") +
+      getTypePrefix(GV->getType()->getElementType());
+  } else if (isa<Function>(val)) {
+    name = std::string("func_");
+  } else if (const Constant* C = dyn_cast<Constant>(val)) {
+    name = std::string("const_") + getTypePrefix(C->getType());
+  } else if (const Argument* Arg = dyn_cast<Argument>(val)) {
+    if (is_inline) {
+      unsigned argNum = std::distance(Arg->getParent()->arg_begin(),
+                                      Function::const_arg_iterator(Arg)) + 1;
+      name = std::string("arg_") + utostr(argNum);
+      NameSet::iterator NI = UsedNames.find(name);
+      if (NI != UsedNames.end())
+        name += std::string("_") + utostr(uniqueNum++);
+      UsedNames.insert(name);
+      return ValueNames[val] = name;
+    } else {
+      name = getTypePrefix(val->getType());
+    }
+  } else {
+    name = getTypePrefix(val->getType());
+  }
+  if (val->hasName())
+    name += val->getName();
+  else
+    name += utostr(uniqueNum++);
+  sanitize(name);
+  NameSet::iterator NI = UsedNames.find(name);
+  if (NI != UsedNames.end())
+    name += std::string("_") + utostr(uniqueNum++);
+  UsedNames.insert(name);
+  return ValueNames[val] = name;
+}
+
+void CppWriter::printCppName(const Value* val) {
+  printEscapedString(getCppName(val));
+}
+
+void CppWriter::printAttributes(const AttrListPtr &PAL,
+                                const std::string &name) {
+  Out << "AttrListPtr " << name << "_PAL;";
+  nl(Out);
+  if (!PAL.isEmpty()) {
+    Out << '{'; in(); nl(Out);
+    Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out);
+    Out << "AttributeWithIndex PAWI;"; nl(Out);
+    for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
+      unsigned index = PAL.getSlot(i).Index;
+      Attributes attrs = PAL.getSlot(i).Attrs;
+      Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 ";
+#define HANDLE_ATTR(X)                 \
+      if (attrs & Attribute::X)      \
+        Out << " | Attribute::" #X;  \
+      attrs &= ~Attribute::X;
+      
+      HANDLE_ATTR(SExt);
+      HANDLE_ATTR(ZExt);
+      HANDLE_ATTR(NoReturn);
+      HANDLE_ATTR(InReg);
+      HANDLE_ATTR(StructRet);
+      HANDLE_ATTR(NoUnwind);
+      HANDLE_ATTR(NoAlias);
+      HANDLE_ATTR(ByVal);
+      HANDLE_ATTR(Nest);
+      HANDLE_ATTR(ReadNone);
+      HANDLE_ATTR(ReadOnly);
+      HANDLE_ATTR(NoInline);
+      HANDLE_ATTR(AlwaysInline);
+      HANDLE_ATTR(OptimizeForSize);
+      HANDLE_ATTR(StackProtect);
+      HANDLE_ATTR(StackProtectReq);
+      HANDLE_ATTR(NoCapture);
+      HANDLE_ATTR(NoRedZone);
+      HANDLE_ATTR(NoImplicitFloat);
+      HANDLE_ATTR(Naked);
+      HANDLE_ATTR(InlineHint);
+#undef HANDLE_ATTR
+      if (attrs & Attribute::StackAlignment)
+        Out << " | Attribute::constructStackAlignmentFromInt("
+            << Attribute::getStackAlignmentFromAttrs(attrs)
+            << ")"; 
+      attrs &= ~Attribute::StackAlignment;
+      assert(attrs == 0 && "Unhandled attribute!");
+      Out << ";";
+      nl(Out);
+      Out << "Attrs.push_back(PAWI);";
+      nl(Out);
+    }
+    Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());";
+    nl(Out);
+    out(); nl(Out);
+    Out << '}'; nl(Out);
+  }
+}
+
+bool CppWriter::printTypeInternal(const Type* Ty) {
+  // We don't print definitions for primitive types
+  if (Ty->isPrimitiveType() || Ty->isIntegerTy())
+    return false;
+
+  // If we already defined this type, we don't need to define it again.
+  if (DefinedTypes.find(Ty) != DefinedTypes.end())
+    return false;
+
+  // Everything below needs the name for the type so get it now.
+  std::string typeName(getCppName(Ty));
+
+  // Search the type stack for recursion. If we find it, then generate this
+  // as an OpaqueType, but make sure not to do this multiple times because
+  // the type could appear in multiple places on the stack. Once the opaque
+  // definition is issued, it must not be re-issued. Consequently we have to
+  // check the UnresolvedTypes list as well.
+  TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(),
+                                          Ty);
+  if (TI != TypeStack.end()) {
+    TypeMap::const_iterator I = UnresolvedTypes.find(Ty);
+    if (I == UnresolvedTypes.end()) {
+      Out << "PATypeHolder " << typeName;
+      Out << "_fwd = OpaqueType::get(mod->getContext());";
+      nl(Out);
+      UnresolvedTypes[Ty] = typeName;
+    }
+    return true;
+  }
+
+  // We're going to print a derived type which, by definition, contains other
+  // types. So, push this one we're printing onto the type stack to assist with
+  // recursive definitions.
+  TypeStack.push_back(Ty);
+
+  // Print the type definition
+  switch (Ty->getTypeID()) {
+  case Type::FunctionTyID:  {
+    const FunctionType* FT = cast<FunctionType>(Ty);
+    Out << "std::vector<const Type*>" << typeName << "_args;";
+    nl(Out);
+    FunctionType::param_iterator PI = FT->param_begin();
+    FunctionType::param_iterator PE = FT->param_end();
+    for (; PI != PE; ++PI) {
+      const Type* argTy = static_cast<const Type*>(*PI);
+      bool isForward = printTypeInternal(argTy);
+      std::string argName(getCppName(argTy));
+      Out << typeName << "_args.push_back(" << argName;
+      if (isForward)
+        Out << "_fwd";
+      Out << ");";
+      nl(Out);
+    }
+    bool isForward = printTypeInternal(FT->getReturnType());
+    std::string retTypeName(getCppName(FT->getReturnType()));
+    Out << "FunctionType* " << typeName << " = FunctionType::get(";
+    in(); nl(Out) << "/*Result=*/" << retTypeName;
+    if (isForward)
+      Out << "_fwd";
+    Out << ",";
+    nl(Out) << "/*Params=*/" << typeName << "_args,";
+    nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");";
+    out();
+    nl(Out);
+    break;
+  }
+  case Type::StructTyID: {
+    const StructType* ST = cast<StructType>(Ty);
+    Out << "std::vector<const Type*>" << typeName << "_fields;";
+    nl(Out);
+    StructType::element_iterator EI = ST->element_begin();
+    StructType::element_iterator EE = ST->element_end();
+    for (; EI != EE; ++EI) {
+      const Type* fieldTy = static_cast<const Type*>(*EI);
+      bool isForward = printTypeInternal(fieldTy);
+      std::string fieldName(getCppName(fieldTy));
+      Out << typeName << "_fields.push_back(" << fieldName;
+      if (isForward)
+        Out << "_fwd";
+      Out << ");";
+      nl(Out);
+    }
+    Out << "StructType* " << typeName << " = StructType::get("
+        << "mod->getContext(), "
+        << typeName << "_fields, /*isPacked=*/"
+        << (ST->isPacked() ? "true" : "false") << ");";
+    nl(Out);
+    break;
+  }
+  case Type::ArrayTyID: {
+    const ArrayType* AT = cast<ArrayType>(Ty);
+    const Type* ET = AT->getElementType();
+    bool isForward = printTypeInternal(ET);
+    std::string elemName(getCppName(ET));
+    Out << "ArrayType* " << typeName << " = ArrayType::get("
+        << elemName << (isForward ? "_fwd" : "")
+        << ", " << utostr(AT->getNumElements()) << ");";
+    nl(Out);
+    break;
+  }
+  case Type::PointerTyID: {
+    const PointerType* PT = cast<PointerType>(Ty);
+    const Type* ET = PT->getElementType();
+    bool isForward = printTypeInternal(ET);
+    std::string elemName(getCppName(ET));
+    Out << "PointerType* " << typeName << " = PointerType::get("
+        << elemName << (isForward ? "_fwd" : "")
+        << ", " << utostr(PT->getAddressSpace()) << ");";
+    nl(Out);
+    break;
+  }
+  case Type::VectorTyID: {
+    const VectorType* PT = cast<VectorType>(Ty);
+    const Type* ET = PT->getElementType();
+    bool isForward = printTypeInternal(ET);
+    std::string elemName(getCppName(ET));
+    Out << "VectorType* " << typeName << " = VectorType::get("
+        << elemName << (isForward ? "_fwd" : "")
+        << ", " << utostr(PT->getNumElements()) << ");";
+    nl(Out);
+    break;
+  }
+  case Type::OpaqueTyID: {
+    Out << "OpaqueType* " << typeName;
+    Out << " = OpaqueType::get(mod->getContext());";
+    nl(Out);
+    break;
+  }
+  default:
+    error("Invalid TypeID");
+  }
+
+  // If the type had a name, make sure we recreate it.
+  const std::string* progTypeName =
+    findTypeName(TheModule->getTypeSymbolTable(),Ty);
+  if (progTypeName) {
+    Out << "mod->addTypeName(\"" << *progTypeName << "\", "
+        << typeName << ");";
+    nl(Out);
+  }
+
+  // Pop us off the type stack
+  TypeStack.pop_back();
+
+  // Indicate that this type is now defined.
+  DefinedTypes.insert(Ty);
+
+  // Early resolve as many unresolved types as possible. Search the unresolved
+  // types map for the type we just printed. Now that its definition is complete
+  // we can resolve any previous references to it. This prevents a cascade of
+  // unresolved types.
+  TypeMap::iterator I = UnresolvedTypes.find(Ty);
+  if (I != UnresolvedTypes.end()) {
+    Out << "cast<OpaqueType>(" << I->second
+        << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");";
+    nl(Out);
+    Out << I->second << " = cast<";
+    switch (Ty->getTypeID()) {
+    case Type::FunctionTyID: Out << "FunctionType"; break;
+    case Type::ArrayTyID:    Out << "ArrayType"; break;
+    case Type::StructTyID:   Out << "StructType"; break;
+    case Type::VectorTyID:   Out << "VectorType"; break;
+    case Type::PointerTyID:  Out << "PointerType"; break;
+    case Type::OpaqueTyID:   Out << "OpaqueType"; break;
+    default:                 Out << "NoSuchDerivedType"; break;
+    }
+    Out << ">(" << I->second << "_fwd.get());";
+    nl(Out); nl(Out);
+    UnresolvedTypes.erase(I);
+  }
+
+  // Finally, separate the type definition from other with a newline.
+  nl(Out);
+
+  // We weren't a recursive type
+  return false;
+}
+
+// Prints a type definition. Returns true if it could not resolve all the
+// types in the definition but had to use a forward reference.
+void CppWriter::printType(const Type* Ty) {
+  assert(TypeStack.empty());
+  TypeStack.clear();
+  printTypeInternal(Ty);
+  assert(TypeStack.empty());
+}
+
+void CppWriter::printTypes(const Module* M) {
+  // Walk the symbol table and print out all its types
+  const TypeSymbolTable& symtab = M->getTypeSymbolTable();
+  for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end();
+       TI != TE; ++TI) {
+
+    // For primitive types and types already defined, just add a name
+    TypeMap::const_iterator TNI = TypeNames.find(TI->second);
+    if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() ||
+        TNI != TypeNames.end()) {
+      Out << "mod->addTypeName(\"";
+      printEscapedString(TI->first);
+      Out << "\", " << getCppName(TI->second) << ");";
+      nl(Out);
+      // For everything else, define the type
+    } else {
+      printType(TI->second);
+    }
+  }
+
+  // Add all of the global variables to the value table...
+  for (Module::const_global_iterator I = TheModule->global_begin(),
+         E = TheModule->global_end(); I != E; ++I) {
+    if (I->hasInitializer())
+      printType(I->getInitializer()->getType());
+    printType(I->getType());
+  }
+
+  // Add all the functions to the table
+  for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+       FI != FE; ++FI) {
+    printType(FI->getReturnType());
+    printType(FI->getFunctionType());
+    // Add all the function arguments
+    for (Function::const_arg_iterator AI = FI->arg_begin(),
+           AE = FI->arg_end(); AI != AE; ++AI) {
+      printType(AI->getType());
+    }
+
+    // Add all of the basic blocks and instructions
+    for (Function::const_iterator BB = FI->begin(),
+           E = FI->end(); BB != E; ++BB) {
+      printType(BB->getType());
+      for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+           ++I) {
+        printType(I->getType());
+        for (unsigned i = 0; i < I->getNumOperands(); ++i)
+          printType(I->getOperand(i)->getType());
+      }
+    }
+  }
+}
+
+
+// printConstant - Print out a constant pool entry...
+void CppWriter::printConstant(const Constant *CV) {
+  // First, if the constant is actually a GlobalValue (variable or function)
+  // or its already in the constant list then we've printed it already and we
+  // can just return.
+  if (isa<GlobalValue>(CV) || ValueNames.find(CV) != ValueNames.end())
+    return;
+
+  std::string constName(getCppName(CV));
+  std::string typeName(getCppName(CV->getType()));
+
+  if (isa<GlobalValue>(CV)) {
+    // Skip variables and functions, we emit them elsewhere
+    return;
+  }
+
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    std::string constValue = CI->getValue().toString(10, true);
+    Out << "ConstantInt* " << constName
+        << " = ConstantInt::get(mod->getContext(), APInt("
+        << cast<IntegerType>(CI->getType())->getBitWidth()
+        << ", StringRef(\"" <<  constValue << "\"), 10));";
+  } else if (isa<ConstantAggregateZero>(CV)) {
+    Out << "ConstantAggregateZero* " << constName
+        << " = ConstantAggregateZero::get(" << typeName << ");";
+  } else if (isa<ConstantPointerNull>(CV)) {
+    Out << "ConstantPointerNull* " << constName
+        << " = ConstantPointerNull::get(" << typeName << ");";
+  } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+    Out << "ConstantFP* " << constName << " = ";
+    printCFP(CFP);
+    Out << ";";
+  } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+    if (CA->isString() &&
+        CA->getType()->getElementType() ==
+            Type::getInt8Ty(CA->getContext())) {
+      Out << "Constant* " << constName <<
+             " = ConstantArray::get(mod->getContext(), \"";
+      std::string tmp = CA->getAsString();
+      bool nullTerminate = false;
+      if (tmp[tmp.length()-1] == 0) {
+        tmp.erase(tmp.length()-1);
+        nullTerminate = true;
+      }
+      printEscapedString(tmp);
+      // Determine if we want null termination or not.
+      if (nullTerminate)
+        Out << "\", true"; // Indicate that the null terminator should be
+                           // added.
+      else
+        Out << "\", false";// No null terminator
+      Out << ");";
+    } else {
+      Out << "std::vector<Constant*> " << constName << "_elems;";
+      nl(Out);
+      unsigned N = CA->getNumOperands();
+      for (unsigned i = 0; i < N; ++i) {
+        printConstant(CA->getOperand(i)); // recurse to print operands
+        Out << constName << "_elems.push_back("
+            << getCppName(CA->getOperand(i)) << ");";
+        nl(Out);
+      }
+      Out << "Constant* " << constName << " = ConstantArray::get("
+          << typeName << ", " << constName << "_elems);";
+    }
+  } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
+    Out << "std::vector<Constant*> " << constName << "_fields;";
+    nl(Out);
+    unsigned N = CS->getNumOperands();
+    for (unsigned i = 0; i < N; i++) {
+      printConstant(CS->getOperand(i));
+      Out << constName << "_fields.push_back("
+          << getCppName(CS->getOperand(i)) << ");";
+      nl(Out);
+    }
+    Out << "Constant* " << constName << " = ConstantStruct::get("
+        << typeName << ", " << constName << "_fields);";
+  } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+    Out << "std::vector<Constant*> " << constName << "_elems;";
+    nl(Out);
+    unsigned N = CP->getNumOperands();
+    for (unsigned i = 0; i < N; ++i) {
+      printConstant(CP->getOperand(i));
+      Out << constName << "_elems.push_back("
+          << getCppName(CP->getOperand(i)) << ");";
+      nl(Out);
+    }
+    Out << "Constant* " << constName << " = ConstantVector::get("
+        << typeName << ", " << constName << "_elems);";
+  } else if (isa<UndefValue>(CV)) {
+    Out << "UndefValue* " << constName << " = UndefValue::get("
+        << typeName << ");";
+  } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+    if (CE->getOpcode() == Instruction::GetElementPtr) {
+      Out << "std::vector<Constant*> " << constName << "_indices;";
+      nl(Out);
+      printConstant(CE->getOperand(0));
+      for (unsigned i = 1; i < CE->getNumOperands(); ++i ) {
+        printConstant(CE->getOperand(i));
+        Out << constName << "_indices.push_back("
+            << getCppName(CE->getOperand(i)) << ");";
+        nl(Out);
+      }
+      Out << "Constant* " << constName
+          << " = ConstantExpr::getGetElementPtr("
+          << getCppName(CE->getOperand(0)) << ", "
+          << "&" << constName << "_indices[0], "
+          << constName << "_indices.size()"
+          << ");";
+    } else if (CE->isCast()) {
+      printConstant(CE->getOperand(0));
+      Out << "Constant* " << constName << " = ConstantExpr::getCast(";
+      switch (CE->getOpcode()) {
+      default: llvm_unreachable("Invalid cast opcode");
+      case Instruction::Trunc: Out << "Instruction::Trunc"; break;
+      case Instruction::ZExt:  Out << "Instruction::ZExt"; break;
+      case Instruction::SExt:  Out << "Instruction::SExt"; break;
+      case Instruction::FPTrunc:  Out << "Instruction::FPTrunc"; break;
+      case Instruction::FPExt:  Out << "Instruction::FPExt"; break;
+      case Instruction::FPToUI:  Out << "Instruction::FPToUI"; break;
+      case Instruction::FPToSI:  Out << "Instruction::FPToSI"; break;
+      case Instruction::UIToFP:  Out << "Instruction::UIToFP"; break;
+      case Instruction::SIToFP:  Out << "Instruction::SIToFP"; break;
+      case Instruction::PtrToInt:  Out << "Instruction::PtrToInt"; break;
+      case Instruction::IntToPtr:  Out << "Instruction::IntToPtr"; break;
+      case Instruction::BitCast:  Out << "Instruction::BitCast"; break;
+      }
+      Out << ", " << getCppName(CE->getOperand(0)) << ", "
+          << getCppName(CE->getType()) << ");";
+    } else {
+      unsigned N = CE->getNumOperands();
+      for (unsigned i = 0; i < N; ++i ) {
+        printConstant(CE->getOperand(i));
+      }
+      Out << "Constant* " << constName << " = ConstantExpr::";
+      switch (CE->getOpcode()) {
+      case Instruction::Add:    Out << "getAdd(";  break;
+      case Instruction::FAdd:   Out << "getFAdd(";  break;
+      case Instruction::Sub:    Out << "getSub("; break;
+      case Instruction::FSub:   Out << "getFSub("; break;
+      case Instruction::Mul:    Out << "getMul("; break;
+      case Instruction::FMul:   Out << "getFMul("; break;
+      case Instruction::UDiv:   Out << "getUDiv("; break;
+      case Instruction::SDiv:   Out << "getSDiv("; break;
+      case Instruction::FDiv:   Out << "getFDiv("; break;
+      case Instruction::URem:   Out << "getURem("; break;
+      case Instruction::SRem:   Out << "getSRem("; break;
+      case Instruction::FRem:   Out << "getFRem("; break;
+      case Instruction::And:    Out << "getAnd("; break;
+      case Instruction::Or:     Out << "getOr("; break;
+      case Instruction::Xor:    Out << "getXor("; break;
+      case Instruction::ICmp:
+        Out << "getICmp(ICmpInst::ICMP_";
+        switch (CE->getPredicate()) {
+        case ICmpInst::ICMP_EQ:  Out << "EQ"; break;
+        case ICmpInst::ICMP_NE:  Out << "NE"; break;
+        case ICmpInst::ICMP_SLT: Out << "SLT"; break;
+        case ICmpInst::ICMP_ULT: Out << "ULT"; break;
+        case ICmpInst::ICMP_SGT: Out << "SGT"; break;
+        case ICmpInst::ICMP_UGT: Out << "UGT"; break;
+        case ICmpInst::ICMP_SLE: Out << "SLE"; break;
+        case ICmpInst::ICMP_ULE: Out << "ULE"; break;
+        case ICmpInst::ICMP_SGE: Out << "SGE"; break;
+        case ICmpInst::ICMP_UGE: Out << "UGE"; break;
+        default: error("Invalid ICmp Predicate");
+        }
+        break;
+      case Instruction::FCmp:
+        Out << "getFCmp(FCmpInst::FCMP_";
+        switch (CE->getPredicate()) {
+        case FCmpInst::FCMP_FALSE: Out << "FALSE"; break;
+        case FCmpInst::FCMP_ORD:   Out << "ORD"; break;
+        case FCmpInst::FCMP_UNO:   Out << "UNO"; break;
+        case FCmpInst::FCMP_OEQ:   Out << "OEQ"; break;
+        case FCmpInst::FCMP_UEQ:   Out << "UEQ"; break;
+        case FCmpInst::FCMP_ONE:   Out << "ONE"; break;
+        case FCmpInst::FCMP_UNE:   Out << "UNE"; break;
+        case FCmpInst::FCMP_OLT:   Out << "OLT"; break;
+        case FCmpInst::FCMP_ULT:   Out << "ULT"; break;
+        case FCmpInst::FCMP_OGT:   Out << "OGT"; break;
+        case FCmpInst::FCMP_UGT:   Out << "UGT"; break;
+        case FCmpInst::FCMP_OLE:   Out << "OLE"; break;
+        case FCmpInst::FCMP_ULE:   Out << "ULE"; break;
+        case FCmpInst::FCMP_OGE:   Out << "OGE"; break;
+        case FCmpInst::FCMP_UGE:   Out << "UGE"; break;
+        case FCmpInst::FCMP_TRUE:  Out << "TRUE"; break;
+        default: error("Invalid FCmp Predicate");
+        }
+        break;
+      case Instruction::Shl:     Out << "getShl("; break;
+      case Instruction::LShr:    Out << "getLShr("; break;
+      case Instruction::AShr:    Out << "getAShr("; break;
+      case Instruction::Select:  Out << "getSelect("; break;
+      case Instruction::ExtractElement: Out << "getExtractElement("; break;
+      case Instruction::InsertElement:  Out << "getInsertElement("; break;
+      case Instruction::ShuffleVector:  Out << "getShuffleVector("; break;
+      default:
+        error("Invalid constant expression");
+        break;
+      }
+      Out << getCppName(CE->getOperand(0));
+      for (unsigned i = 1; i < CE->getNumOperands(); ++i)
+        Out << ", " << getCppName(CE->getOperand(i));
+      Out << ");";
+    }
+  } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
+    Out << "Constant* " << constName << " = ";
+    Out << "BlockAddress::get(" << getOpName(BA->getBasicBlock()) << ");";
+  } else {
+    error("Bad Constant");
+    Out << "Constant* " << constName << " = 0; ";
+  }
+  nl(Out);
+}
+
+void CppWriter::printConstants(const Module* M) {
+  // Traverse all the global variables looking for constant initializers
+  for (Module::const_global_iterator I = TheModule->global_begin(),
+         E = TheModule->global_end(); I != E; ++I)
+    if (I->hasInitializer())
+      printConstant(I->getInitializer());
+
+  // Traverse the LLVM functions looking for constants
+  for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+       FI != FE; ++FI) {
+    // Add all of the basic blocks and instructions
+    for (Function::const_iterator BB = FI->begin(),
+           E = FI->end(); BB != E; ++BB) {
+      for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+           ++I) {
+        for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+          if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) {
+            printConstant(C);
+          }
+        }
+      }
+    }
+  }
+}
+
+void CppWriter::printVariableUses(const GlobalVariable *GV) {
+  nl(Out) << "// Type Definitions";
+  nl(Out);
+  printType(GV->getType());
+  if (GV->hasInitializer()) {
+    Constant *Init = GV->getInitializer();
+    printType(Init->getType());
+    if (Function *F = dyn_cast<Function>(Init)) {
+      nl(Out)<< "/ Function Declarations"; nl(Out);
+      printFunctionHead(F);
+    } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+      nl(Out) << "// Global Variable Declarations"; nl(Out);
+      printVariableHead(gv);
+      
+      nl(Out) << "// Global Variable Definitions"; nl(Out);
+      printVariableBody(gv);
+    } else  {
+      nl(Out) << "// Constant Definitions"; nl(Out);
+      printConstant(Init);
+    }
+  }
+}
+
+void CppWriter::printVariableHead(const GlobalVariable *GV) {
+  nl(Out) << "GlobalVariable* " << getCppName(GV);
+  if (is_inline) {
+    Out << " = mod->getGlobalVariable(mod->getContext(), ";
+    printEscapedString(GV->getName());
+    Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)";
+    nl(Out) << "if (!" << getCppName(GV) << ") {";
+    in(); nl(Out) << getCppName(GV);
+  }
+  Out << " = new GlobalVariable(/*Module=*/*mod, ";
+  nl(Out) << "/*Type=*/";
+  printCppName(GV->getType()->getElementType());
+  Out << ",";
+  nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false");
+  Out << ",";
+  nl(Out) << "/*Linkage=*/";
+  printLinkageType(GV->getLinkage());
+  Out << ",";
+  nl(Out) << "/*Initializer=*/0, ";
+  if (GV->hasInitializer()) {
+    Out << "// has initializer, specified below";
+  }
+  nl(Out) << "/*Name=*/\"";
+  printEscapedString(GV->getName());
+  Out << "\");";
+  nl(Out);
+
+  if (GV->hasSection()) {
+    printCppName(GV);
+    Out << "->setSection(\"";
+    printEscapedString(GV->getSection());
+    Out << "\");";
+    nl(Out);
+  }
+  if (GV->getAlignment()) {
+    printCppName(GV);
+    Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");";
+    nl(Out);
+  }
+  if (GV->getVisibility() != GlobalValue::DefaultVisibility) {
+    printCppName(GV);
+    Out << "->setVisibility(";
+    printVisibilityType(GV->getVisibility());
+    Out << ");";
+    nl(Out);
+  }
+  if (GV->isThreadLocal()) {
+    printCppName(GV);
+    Out << "->setThreadLocal(true);";
+    nl(Out);
+  }
+  if (is_inline) {
+    out(); Out << "}"; nl(Out);
+  }
+}
+
+void CppWriter::printVariableBody(const GlobalVariable *GV) {
+  if (GV->hasInitializer()) {
+    printCppName(GV);
+    Out << "->setInitializer(";
+    Out << getCppName(GV->getInitializer()) << ");";
+    nl(Out);
+  }
+}
+
+std::string CppWriter::getOpName(Value* V) {
+  if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end())
+    return getCppName(V);
+
+  // See if its alread in the map of forward references, if so just return the
+  // name we already set up for it
+  ForwardRefMap::const_iterator I = ForwardRefs.find(V);
+  if (I != ForwardRefs.end())
+    return I->second;
+
+  // This is a new forward reference. Generate a unique name for it
+  std::string result(std::string("fwdref_") + utostr(uniqueNum++));
+
+  // Yes, this is a hack. An Argument is the smallest instantiable value that
+  // we can make as a placeholder for the real value. We'll replace these
+  // Argument instances later.
+  Out << "Argument* " << result << " = new Argument("
+      << getCppName(V->getType()) << ");";
+  nl(Out);
+  ForwardRefs[V] = result;
+  return result;
+}
+
+// printInstruction - This member is called for each Instruction in a function.
+void CppWriter::printInstruction(const Instruction *I,
+                                 const std::string& bbname) {
+  std::string iName(getCppName(I));
+
+  // Before we emit this instruction, we need to take care of generating any
+  // forward references. So, we get the names of all the operands in advance
+  const unsigned Ops(I->getNumOperands());
+  std::string* opNames = new std::string[Ops];
+  for (unsigned i = 0; i < Ops; i++)
+    opNames[i] = getOpName(I->getOperand(i));
+
+  switch (I->getOpcode()) {
+  default:
+    error("Invalid instruction");
+    break;
+
+  case Instruction::Ret: {
+    const ReturnInst* ret =  cast<ReturnInst>(I);
+    Out << "ReturnInst::Create(mod->getContext(), "
+        << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");";
+    break;
+  }
+  case Instruction::Br: {
+    const BranchInst* br = cast<BranchInst>(I);
+    Out << "BranchInst::Create(" ;
+    if (br->getNumOperands() == 3) {
+      Out << opNames[2] << ", "
+          << opNames[1] << ", "
+          << opNames[0] << ", ";
+
+    } else if (br->getNumOperands() == 1) {
+      Out << opNames[0] << ", ";
+    } else {
+      error("Branch with 2 operands?");
+    }
+    Out << bbname << ");";
+    break;
+  }
+  case Instruction::Switch: {
+    const SwitchInst *SI = cast<SwitchInst>(I);
+    Out << "SwitchInst* " << iName << " = SwitchInst::Create("
+        << opNames[0] << ", "
+        << opNames[1] << ", "
+        << SI->getNumCases() << ", " << bbname << ");";
+    nl(Out);
+    for (unsigned i = 2; i != SI->getNumOperands(); i += 2) {
+      Out << iName << "->addCase("
+          << opNames[i] << ", "
+          << opNames[i+1] << ");";
+      nl(Out);
+    }
+    break;
+  }
+  case Instruction::IndirectBr: {
+    const IndirectBrInst *IBI = cast<IndirectBrInst>(I);
+    Out << "IndirectBrInst *" << iName << " = IndirectBrInst::Create("
+        << opNames[0] << ", " << IBI->getNumDestinations() << ");";
+    nl(Out);
+    for (unsigned i = 1; i != IBI->getNumOperands(); ++i) {
+      Out << iName << "->addDestination(" << opNames[i] << ");";
+      nl(Out);
+    }
+    break;
+  }
+  case Instruction::Invoke: {
+    const InvokeInst* inv = cast<InvokeInst>(I);
+    Out << "std::vector<Value*> " << iName << "_params;";
+    nl(Out);
+    for (unsigned i = 0; i < inv->getNumArgOperands(); ++i) {
+      Out << iName << "_params.push_back("
+          << getOpName(inv->getArgOperand(i)) << ");";
+      nl(Out);
+    }
+    // FIXME: This shouldn't use magic numbers -3, -2, and -1.
+    Out << "InvokeInst *" << iName << " = InvokeInst::Create("
+        << getOpName(inv->getCalledFunction()) << ", "
+        << getOpName(inv->getNormalDest()) << ", "
+        << getOpName(inv->getUnwindDest()) << ", "
+        << iName << "_params.begin(), "
+        << iName << "_params.end(), \"";
+    printEscapedString(inv->getName());
+    Out << "\", " << bbname << ");";
+    nl(Out) << iName << "->setCallingConv(";
+    printCallingConv(inv->getCallingConv());
+    Out << ");";
+    printAttributes(inv->getAttributes(), iName);
+    Out << iName << "->setAttributes(" << iName << "_PAL);";
+    nl(Out);
+    break;
+  }
+  case Instruction::Unwind: {
+    Out << "new UnwindInst("
+        << bbname << ");";
+    break;
+  }
+  case Instruction::Unreachable: {
+    Out << "new UnreachableInst("
+        << "mod->getContext(), "
+        << bbname << ");";
+    break;
+  }
+  case Instruction::Add:
+  case Instruction::FAdd:
+  case Instruction::Sub:
+  case Instruction::FSub:
+  case Instruction::Mul:
+  case Instruction::FMul:
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::FDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::FRem:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:{
+    Out << "BinaryOperator* " << iName << " = BinaryOperator::Create(";
+    switch (I->getOpcode()) {
+    case Instruction::Add: Out << "Instruction::Add"; break;
+    case Instruction::FAdd: Out << "Instruction::FAdd"; break;
+    case Instruction::Sub: Out << "Instruction::Sub"; break;
+    case Instruction::FSub: Out << "Instruction::FSub"; break;
+    case Instruction::Mul: Out << "Instruction::Mul"; break;
+    case Instruction::FMul: Out << "Instruction::FMul"; break;
+    case Instruction::UDiv:Out << "Instruction::UDiv"; break;
+    case Instruction::SDiv:Out << "Instruction::SDiv"; break;
+    case Instruction::FDiv:Out << "Instruction::FDiv"; break;
+    case Instruction::URem:Out << "Instruction::URem"; break;
+    case Instruction::SRem:Out << "Instruction::SRem"; break;
+    case Instruction::FRem:Out << "Instruction::FRem"; break;
+    case Instruction::And: Out << "Instruction::And"; break;
+    case Instruction::Or:  Out << "Instruction::Or";  break;
+    case Instruction::Xor: Out << "Instruction::Xor"; break;
+    case Instruction::Shl: Out << "Instruction::Shl"; break;
+    case Instruction::LShr:Out << "Instruction::LShr"; break;
+    case Instruction::AShr:Out << "Instruction::AShr"; break;
+    default: Out << "Instruction::BadOpCode"; break;
+    }
+    Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+    printEscapedString(I->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::FCmp: {
+    Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", ";
+    switch (cast<FCmpInst>(I)->getPredicate()) {
+    case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break;
+    case FCmpInst::FCMP_OEQ  : Out << "FCmpInst::FCMP_OEQ"; break;
+    case FCmpInst::FCMP_OGT  : Out << "FCmpInst::FCMP_OGT"; break;
+    case FCmpInst::FCMP_OGE  : Out << "FCmpInst::FCMP_OGE"; break;
+    case FCmpInst::FCMP_OLT  : Out << "FCmpInst::FCMP_OLT"; break;
+    case FCmpInst::FCMP_OLE  : Out << "FCmpInst::FCMP_OLE"; break;
+    case FCmpInst::FCMP_ONE  : Out << "FCmpInst::FCMP_ONE"; break;
+    case FCmpInst::FCMP_ORD  : Out << "FCmpInst::FCMP_ORD"; break;
+    case FCmpInst::FCMP_UNO  : Out << "FCmpInst::FCMP_UNO"; break;
+    case FCmpInst::FCMP_UEQ  : Out << "FCmpInst::FCMP_UEQ"; break;
+    case FCmpInst::FCMP_UGT  : Out << "FCmpInst::FCMP_UGT"; break;
+    case FCmpInst::FCMP_UGE  : Out << "FCmpInst::FCMP_UGE"; break;
+    case FCmpInst::FCMP_ULT  : Out << "FCmpInst::FCMP_ULT"; break;
+    case FCmpInst::FCMP_ULE  : Out << "FCmpInst::FCMP_ULE"; break;
+    case FCmpInst::FCMP_UNE  : Out << "FCmpInst::FCMP_UNE"; break;
+    case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break;
+    default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break;
+    }
+    Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+    printEscapedString(I->getName());
+    Out << "\");";
+    break;
+  }
+  case Instruction::ICmp: {
+    Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", ";
+    switch (cast<ICmpInst>(I)->getPredicate()) {
+    case ICmpInst::ICMP_EQ:  Out << "ICmpInst::ICMP_EQ";  break;
+    case ICmpInst::ICMP_NE:  Out << "ICmpInst::ICMP_NE";  break;
+    case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break;
+    case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break;
+    case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break;
+    case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break;
+    case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break;
+    case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break;
+    case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break;
+    case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break;
+    default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break;
+    }
+    Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+    printEscapedString(I->getName());
+    Out << "\");";
+    break;
+  }
+  case Instruction::Alloca: {
+    const AllocaInst* allocaI = cast<AllocaInst>(I);
+    Out << "AllocaInst* " << iName << " = new AllocaInst("
+        << getCppName(allocaI->getAllocatedType()) << ", ";
+    if (allocaI->isArrayAllocation())
+      Out << opNames[0] << ", ";
+    Out << "\"";
+    printEscapedString(allocaI->getName());
+    Out << "\", " << bbname << ");";
+    if (allocaI->getAlignment())
+      nl(Out) << iName << "->setAlignment("
+          << allocaI->getAlignment() << ");";
+    break;
+  }
+  case Instruction::Load: {
+    const LoadInst* load = cast<LoadInst>(I);
+    Out << "LoadInst* " << iName << " = new LoadInst("
+        << opNames[0] << ", \"";
+    printEscapedString(load->getName());
+    Out << "\", " << (load->isVolatile() ? "true" : "false" )
+        << ", " << bbname << ");";
+    break;
+  }
+  case Instruction::Store: {
+    const StoreInst* store = cast<StoreInst>(I);
+    Out << " new StoreInst("
+        << opNames[0] << ", "
+        << opNames[1] << ", "
+        << (store->isVolatile() ? "true" : "false")
+        << ", " << bbname << ");";
+    break;
+  }
+  case Instruction::GetElementPtr: {
+    const GetElementPtrInst* gep = cast<GetElementPtrInst>(I);
+    if (gep->getNumOperands() <= 2) {
+      Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
+          << opNames[0];
+      if (gep->getNumOperands() == 2)
+        Out << ", " << opNames[1];
+    } else {
+      Out << "std::vector<Value*> " << iName << "_indices;";
+      nl(Out);
+      for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
+        Out << iName << "_indices.push_back("
+            << opNames[i] << ");";
+        nl(Out);
+      }
+      Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
+          << opNames[0] << ", " << iName << "_indices.begin(), "
+          << iName << "_indices.end()";
+    }
+    Out << ", \"";
+    printEscapedString(gep->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::PHI: {
+    const PHINode* phi = cast<PHINode>(I);
+
+    Out << "PHINode* " << iName << " = PHINode::Create("
+        << getCppName(phi->getType()) << ", \"";
+    printEscapedString(phi->getName());
+    Out << "\", " << bbname << ");";
+    nl(Out) << iName << "->reserveOperandSpace("
+      << phi->getNumIncomingValues()
+        << ");";
+    nl(Out);
+    for (unsigned i = 0; i < phi->getNumOperands(); i+=2) {
+      Out << iName << "->addIncoming("
+          << opNames[i] << ", " << opNames[i+1] << ");";
+      nl(Out);
+    }
+    break;
+  }
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::BitCast: {
+    const CastInst* cst = cast<CastInst>(I);
+    Out << "CastInst* " << iName << " = new ";
+    switch (I->getOpcode()) {
+    case Instruction::Trunc:    Out << "TruncInst"; break;
+    case Instruction::ZExt:     Out << "ZExtInst"; break;
+    case Instruction::SExt:     Out << "SExtInst"; break;
+    case Instruction::FPTrunc:  Out << "FPTruncInst"; break;
+    case Instruction::FPExt:    Out << "FPExtInst"; break;
+    case Instruction::FPToUI:   Out << "FPToUIInst"; break;
+    case Instruction::FPToSI:   Out << "FPToSIInst"; break;
+    case Instruction::UIToFP:   Out << "UIToFPInst"; break;
+    case Instruction::SIToFP:   Out << "SIToFPInst"; break;
+    case Instruction::PtrToInt: Out << "PtrToIntInst"; break;
+    case Instruction::IntToPtr: Out << "IntToPtrInst"; break;
+    case Instruction::BitCast:  Out << "BitCastInst"; break;
+    default: assert(!"Unreachable"); break;
+    }
+    Out << "(" << opNames[0] << ", "
+        << getCppName(cst->getType()) << ", \"";
+    printEscapedString(cst->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::Call: {
+    const CallInst* call = cast<CallInst>(I);
+    if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) {
+      Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get("
+          << getCppName(ila->getFunctionType()) << ", \""
+          << ila->getAsmString() << "\", \""
+          << ila->getConstraintString() << "\","
+          << (ila->hasSideEffects() ? "true" : "false") << ");";
+      nl(Out);
+    }
+    if (call->getNumArgOperands() > 1) {
+      Out << "std::vector<Value*> " << iName << "_params;";
+      nl(Out);
+      for (unsigned i = 0; i < call->getNumArgOperands(); ++i) {
+        Out << iName << "_params.push_back(" << opNames[i] << ");";
+        nl(Out);
+      }
+      Out << "CallInst* " << iName << " = CallInst::Create("
+          << opNames[call->getNumArgOperands()] << ", "
+          << iName << "_params.begin(), "
+          << iName << "_params.end(), \"";
+    } else if (call->getNumArgOperands() == 1) {
+      Out << "CallInst* " << iName << " = CallInst::Create("
+          << opNames[call->getNumArgOperands()] << ", " << opNames[0] << ", \"";
+    } else {
+      Out << "CallInst* " << iName << " = CallInst::Create("
+          << opNames[call->getNumArgOperands()] << ", \"";
+    }
+    printEscapedString(call->getName());
+    Out << "\", " << bbname << ");";
+    nl(Out) << iName << "->setCallingConv(";
+    printCallingConv(call->getCallingConv());
+    Out << ");";
+    nl(Out) << iName << "->setTailCall("
+        << (call->isTailCall() ? "true" : "false");
+    Out << ");";
+    nl(Out);
+    printAttributes(call->getAttributes(), iName);
+    Out << iName << "->setAttributes(" << iName << "_PAL);";
+    nl(Out);
+    break;
+  }
+  case Instruction::Select: {
+    const SelectInst* sel = cast<SelectInst>(I);
+    Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create(";
+    Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+    printEscapedString(sel->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::UserOp1:
+    /// FALL THROUGH
+  case Instruction::UserOp2: {
+    /// FIXME: What should be done here?
+    break;
+  }
+  case Instruction::VAArg: {
+    const VAArgInst* va = cast<VAArgInst>(I);
+    Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst("
+        << opNames[0] << ", " << getCppName(va->getType()) << ", \"";
+    printEscapedString(va->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::ExtractElement: {
+    const ExtractElementInst* eei = cast<ExtractElementInst>(I);
+    Out << "ExtractElementInst* " << getCppName(eei)
+        << " = new ExtractElementInst(" << opNames[0]
+        << ", " << opNames[1] << ", \"";
+    printEscapedString(eei->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::InsertElement: {
+    const InsertElementInst* iei = cast<InsertElementInst>(I);
+    Out << "InsertElementInst* " << getCppName(iei)
+        << " = InsertElementInst::Create(" << opNames[0]
+        << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+    printEscapedString(iei->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::ShuffleVector: {
+    const ShuffleVectorInst* svi = cast<ShuffleVectorInst>(I);
+    Out << "ShuffleVectorInst* " << getCppName(svi)
+        << " = new ShuffleVectorInst(" << opNames[0]
+        << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+    printEscapedString(svi->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::ExtractValue: {
+    const ExtractValueInst *evi = cast<ExtractValueInst>(I);
+    Out << "std::vector<unsigned> " << iName << "_indices;";
+    nl(Out);
+    for (unsigned i = 0; i < evi->getNumIndices(); ++i) {
+      Out << iName << "_indices.push_back("
+          << evi->idx_begin()[i] << ");";
+      nl(Out);
+    }
+    Out << "ExtractValueInst* " << getCppName(evi)
+        << " = ExtractValueInst::Create(" << opNames[0]
+        << ", "
+        << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+    printEscapedString(evi->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  case Instruction::InsertValue: {
+    const InsertValueInst *ivi = cast<InsertValueInst>(I);
+    Out << "std::vector<unsigned> " << iName << "_indices;";
+    nl(Out);
+    for (unsigned i = 0; i < ivi->getNumIndices(); ++i) {
+      Out << iName << "_indices.push_back("
+          << ivi->idx_begin()[i] << ");";
+      nl(Out);
+    }
+    Out << "InsertValueInst* " << getCppName(ivi)
+        << " = InsertValueInst::Create(" << opNames[0]
+        << ", " << opNames[1] << ", "
+        << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+    printEscapedString(ivi->getName());
+    Out << "\", " << bbname << ");";
+    break;
+  }
+  }
+  DefinedValues.insert(I);
+  nl(Out);
+  delete [] opNames;
+}
+
+// Print out the types, constants and declarations needed by one function
+void CppWriter::printFunctionUses(const Function* F) {
+  nl(Out) << "// Type Definitions"; nl(Out);
+  if (!is_inline) {
+    // Print the function's return type
+    printType(F->getReturnType());
+
+    // Print the function's function type
+    printType(F->getFunctionType());
+
+    // Print the types of each of the function's arguments
+    for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+         AI != AE; ++AI) {
+      printType(AI->getType());
+    }
+  }
+
+  // Print type definitions for every type referenced by an instruction and
+  // make a note of any global values or constants that are referenced
+  SmallPtrSet<GlobalValue*,64> gvs;
+  SmallPtrSet<Constant*,64> consts;
+  for (Function::const_iterator BB = F->begin(), BE = F->end();
+       BB != BE; ++BB){
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      // Print the type of the instruction itself
+      printType(I->getType());
+
+      // Print the type of each of the instruction's operands
+      for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+        Value* operand = I->getOperand(i);
+        printType(operand->getType());
+
+        // If the operand references a GVal or Constant, make a note of it
+        if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
+          gvs.insert(GV);
+          if (GenerationType != GenFunction)
+            if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+              if (GVar->hasInitializer())
+                consts.insert(GVar->getInitializer());
+        } else if (Constant* C = dyn_cast<Constant>(operand)) {
+          consts.insert(C);
+          for (unsigned j = 0; j < C->getNumOperands(); ++j) {
+            // If the operand references a GVal or Constant, make a note of it
+            Value* operand = C->getOperand(j);
+            printType(operand->getType());
+            if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
+              gvs.insert(GV);
+              if (GenerationType != GenFunction)
+                if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+                  if (GVar->hasInitializer())
+                    consts.insert(GVar->getInitializer());
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Print the function declarations for any functions encountered
+  nl(Out) << "// Function Declarations"; nl(Out);
+  for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+       I != E; ++I) {
+    if (Function* Fun = dyn_cast<Function>(*I)) {
+      if (!is_inline || Fun != F)
+        printFunctionHead(Fun);
+    }
+  }
+
+  // Print the global variable declarations for any variables encountered
+  nl(Out) << "// Global Variable Declarations"; nl(Out);
+  for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+       I != E; ++I) {
+    if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I))
+      printVariableHead(F);
+  }
+
+  // Print the constants found
+  nl(Out) << "// Constant Definitions"; nl(Out);
+  for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(),
+         E = consts.end(); I != E; ++I) {
+    printConstant(*I);
+  }
+
+  // Process the global variables definitions now that all the constants have
+  // been emitted. These definitions just couple the gvars with their constant
+  // initializers.
+  if (GenerationType != GenFunction) {
+    nl(Out) << "// Global Variable Definitions"; nl(Out);
+    for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+         I != E; ++I) {
+      if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
+        printVariableBody(GV);
+    }
+  }
+}
+
+void CppWriter::printFunctionHead(const Function* F) {
+  nl(Out) << "Function* " << getCppName(F);
+  if (is_inline) {
+    Out << " = mod->getFunction(\"";
+    printEscapedString(F->getName());
+    Out << "\", " << getCppName(F->getFunctionType()) << ");";
+    nl(Out) << "if (!" << getCppName(F) << ") {";
+    nl(Out) << getCppName(F);
+  }
+  Out<< " = Function::Create(";
+  nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ",";
+  nl(Out) << "/*Linkage=*/";
+  printLinkageType(F->getLinkage());
+  Out << ",";
+  nl(Out) << "/*Name=*/\"";
+  printEscapedString(F->getName());
+  Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : "");
+  nl(Out,-1);
+  printCppName(F);
+  Out << "->setCallingConv(";
+  printCallingConv(F->getCallingConv());
+  Out << ");";
+  nl(Out);
+  if (F->hasSection()) {
+    printCppName(F);
+    Out << "->setSection(\"" << F->getSection() << "\");";
+    nl(Out);
+  }
+  if (F->getAlignment()) {
+    printCppName(F);
+    Out << "->setAlignment(" << F->getAlignment() << ");";
+    nl(Out);
+  }
+  if (F->getVisibility() != GlobalValue::DefaultVisibility) {
+    printCppName(F);
+    Out << "->setVisibility(";
+    printVisibilityType(F->getVisibility());
+    Out << ");";
+    nl(Out);
+  }
+  if (F->hasGC()) {
+    printCppName(F);
+    Out << "->setGC(\"" << F->getGC() << "\");";
+    nl(Out);
+  }
+  if (is_inline) {
+    Out << "}";
+    nl(Out);
+  }
+  printAttributes(F->getAttributes(), getCppName(F));
+  printCppName(F);
+  Out << "->setAttributes(" << getCppName(F) << "_PAL);";
+  nl(Out);
+}
+
+void CppWriter::printFunctionBody(const Function *F) {
+  if (F->isDeclaration())
+    return; // external functions have no bodies.
+
+  // Clear the DefinedValues and ForwardRefs maps because we can't have
+  // cross-function forward refs
+  ForwardRefs.clear();
+  DefinedValues.clear();
+
+  // Create all the argument values
+  if (!is_inline) {
+    if (!F->arg_empty()) {
+      Out << "Function::arg_iterator args = " << getCppName(F)
+          << "->arg_begin();";
+      nl(Out);
+    }
+    for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+         AI != AE; ++AI) {
+      Out << "Value* " << getCppName(AI) << " = args++;";
+      nl(Out);
+      if (AI->hasName()) {
+        Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");";
+        nl(Out);
+      }
+    }
+  }
+
+  // Create all the basic blocks
+  nl(Out);
+  for (Function::const_iterator BI = F->begin(), BE = F->end();
+       BI != BE; ++BI) {
+    std::string bbname(getCppName(BI));
+    Out << "BasicBlock* " << bbname <<
+           " = BasicBlock::Create(mod->getContext(), \"";
+    if (BI->hasName())
+      printEscapedString(BI->getName());
+    Out << "\"," << getCppName(BI->getParent()) << ",0);";
+    nl(Out);
+  }
+
+  // Output all of its basic blocks... for the function
+  for (Function::const_iterator BI = F->begin(), BE = F->end();
+       BI != BE; ++BI) {
+    std::string bbname(getCppName(BI));
+    nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")";
+    nl(Out);
+
+    // Output all of the instructions in the basic block...
+    for (BasicBlock::const_iterator I = BI->begin(), E = BI->end();
+         I != E; ++I) {
+      printInstruction(I,bbname);
+    }
+  }
+
+  // Loop over the ForwardRefs and resolve them now that all instructions
+  // are generated.
+  if (!ForwardRefs.empty()) {
+    nl(Out) << "// Resolve Forward References";
+    nl(Out);
+  }
+
+  while (!ForwardRefs.empty()) {
+    ForwardRefMap::iterator I = ForwardRefs.begin();
+    Out << I->second << "->replaceAllUsesWith("
+        << getCppName(I->first) << "); delete " << I->second << ";";
+    nl(Out);
+    ForwardRefs.erase(I);
+  }
+}
+
+void CppWriter::printInline(const std::string& fname,
+                            const std::string& func) {
+  const Function* F = TheModule->getFunction(func);
+  if (!F) {
+    error(std::string("Function '") + func + "' not found in input module");
+    return;
+  }
+  if (F->isDeclaration()) {
+    error(std::string("Function '") + func + "' is external!");
+    return;
+  }
+  nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *"
+          << getCppName(F);
+  unsigned arg_count = 1;
+  for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+       AI != AE; ++AI) {
+    Out << ", Value* arg_" << arg_count;
+  }
+  Out << ") {";
+  nl(Out);
+  is_inline = true;
+  printFunctionUses(F);
+  printFunctionBody(F);
+  is_inline = false;
+  Out << "return " << getCppName(F->begin()) << ";";
+  nl(Out) << "}";
+  nl(Out);
+}
+
+void CppWriter::printModuleBody() {
+  // Print out all the type definitions
+  nl(Out) << "// Type Definitions"; nl(Out);
+  printTypes(TheModule);
+
+  // Functions can call each other and global variables can reference them so
+  // define all the functions first before emitting their function bodies.
+  nl(Out) << "// Function Declarations"; nl(Out);
+  for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+       I != E; ++I)
+    printFunctionHead(I);
+
+  // Process the global variables declarations. We can't initialze them until
+  // after the constants are printed so just print a header for each global
+  nl(Out) << "// Global Variable Declarations\n"; nl(Out);
+  for (Module::const_global_iterator I = TheModule->global_begin(),
+         E = TheModule->global_end(); I != E; ++I) {
+    printVariableHead(I);
+  }
+
+  // Print out all the constants definitions. Constants don't recurse except
+  // through GlobalValues. All GlobalValues have been declared at this point
+  // so we can proceed to generate the constants.
+  nl(Out) << "// Constant Definitions"; nl(Out);
+  printConstants(TheModule);
+
+  // Process the global variables definitions now that all the constants have
+  // been emitted. These definitions just couple the gvars with their constant
+  // initializers.
+  nl(Out) << "// Global Variable Definitions"; nl(Out);
+  for (Module::const_global_iterator I = TheModule->global_begin(),
+         E = TheModule->global_end(); I != E; ++I) {
+    printVariableBody(I);
+  }
+
+  // Finally, we can safely put out all of the function bodies.
+  nl(Out) << "// Function Definitions"; nl(Out);
+  for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+       I != E; ++I) {
+    if (!I->isDeclaration()) {
+      nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I)
+              << ")";
+      nl(Out) << "{";
+      nl(Out,1);
+      printFunctionBody(I);
+      nl(Out,-1) << "}";
+      nl(Out);
+    }
+  }
+}
+
+void CppWriter::printProgram(const std::string& fname,
+                             const std::string& mName) {
+  Out << "#include <llvm/LLVMContext.h>\n";
+  Out << "#include <llvm/Module.h>\n";
+  Out << "#include <llvm/DerivedTypes.h>\n";
+  Out << "#include <llvm/Constants.h>\n";
+  Out << "#include <llvm/GlobalVariable.h>\n";
+  Out << "#include <llvm/Function.h>\n";
+  Out << "#include <llvm/CallingConv.h>\n";
+  Out << "#include <llvm/BasicBlock.h>\n";
+  Out << "#include <llvm/Instructions.h>\n";
+  Out << "#include <llvm/InlineAsm.h>\n";
+  Out << "#include <llvm/Support/FormattedStream.h>\n";
+  Out << "#include <llvm/Support/MathExtras.h>\n";
+  Out << "#include <llvm/Pass.h>\n";
+  Out << "#include <llvm/PassManager.h>\n";
+  Out << "#include <llvm/ADT/SmallVector.h>\n";
+  Out << "#include <llvm/Analysis/Verifier.h>\n";
+  Out << "#include <llvm/Assembly/PrintModulePass.h>\n";
+  Out << "#include <algorithm>\n";
+  Out << "using namespace llvm;\n\n";
+  Out << "Module* " << fname << "();\n\n";
+  Out << "int main(int argc, char**argv) {\n";
+  Out << "  Module* Mod = " << fname << "();\n";
+  Out << "  verifyModule(*Mod, PrintMessageAction);\n";
+  Out << "  PassManager PM;\n";
+  Out << "  PM.add(createPrintModulePass(&outs()));\n";
+  Out << "  PM.run(*Mod);\n";
+  Out << "  return 0;\n";
+  Out << "}\n\n";
+  printModule(fname,mName);
+}
+
+void CppWriter::printModule(const std::string& fname,
+                            const std::string& mName) {
+  nl(Out) << "Module* " << fname << "() {";
+  nl(Out,1) << "// Module Construction";
+  nl(Out) << "Module* mod = new Module(\"";
+  printEscapedString(mName);
+  Out << "\", getGlobalContext());";
+  if (!TheModule->getTargetTriple().empty()) {
+    nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
+  }
+  if (!TheModule->getTargetTriple().empty()) {
+    nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple()
+            << "\");";
+  }
+
+  if (!TheModule->getModuleInlineAsm().empty()) {
+    nl(Out) << "mod->setModuleInlineAsm(\"";
+    printEscapedString(TheModule->getModuleInlineAsm());
+    Out << "\");";
+  }
+  nl(Out);
+
+  // Loop over the dependent libraries and emit them.
+  Module::lib_iterator LI = TheModule->lib_begin();
+  Module::lib_iterator LE = TheModule->lib_end();
+  while (LI != LE) {
+    Out << "mod->addLibrary(\"" << *LI << "\");";
+    nl(Out);
+    ++LI;
+  }
+  printModuleBody();
+  nl(Out) << "return mod;";
+  nl(Out,-1) << "}";
+  nl(Out);
+}
+
+void CppWriter::printContents(const std::string& fname,
+                              const std::string& mName) {
+  Out << "\nModule* " << fname << "(Module *mod) {\n";
+  Out << "\nmod->setModuleIdentifier(\"";
+  printEscapedString(mName);
+  Out << "\");\n";
+  printModuleBody();
+  Out << "\nreturn mod;\n";
+  Out << "\n}\n";
+}
+
+void CppWriter::printFunction(const std::string& fname,
+                              const std::string& funcName) {
+  const Function* F = TheModule->getFunction(funcName);
+  if (!F) {
+    error(std::string("Function '") + funcName + "' not found in input module");
+    return;
+  }
+  Out << "\nFunction* " << fname << "(Module *mod) {\n";
+  printFunctionUses(F);
+  printFunctionHead(F);
+  printFunctionBody(F);
+  Out << "return " << getCppName(F) << ";\n";
+  Out << "}\n";
+}
+
+void CppWriter::printFunctions() {
+  const Module::FunctionListType &funcs = TheModule->getFunctionList();
+  Module::const_iterator I  = funcs.begin();
+  Module::const_iterator IE = funcs.end();
+
+  for (; I != IE; ++I) {
+    const Function &func = *I;
+    if (!func.isDeclaration()) {
+      std::string name("define_");
+      name += func.getName();
+      printFunction(name, func.getName());
+    }
+  }
+}
+
+void CppWriter::printVariable(const std::string& fname,
+                              const std::string& varName) {
+  const GlobalVariable* GV = TheModule->getNamedGlobal(varName);
+
+  if (!GV) {
+    error(std::string("Variable '") + varName + "' not found in input module");
+    return;
+  }
+  Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n";
+  printVariableUses(GV);
+  printVariableHead(GV);
+  printVariableBody(GV);
+  Out << "return " << getCppName(GV) << ";\n";
+  Out << "}\n";
+}
+
+void CppWriter::printType(const std::string& fname,
+                          const std::string& typeName) {
+  const Type* Ty = TheModule->getTypeByName(typeName);
+  if (!Ty) {
+    error(std::string("Type '") + typeName + "' not found in input module");
+    return;
+  }
+  Out << "\nType* " << fname << "(Module *mod) {\n";
+  printType(Ty);
+  Out << "return " << getCppName(Ty) << ";\n";
+  Out << "}\n";
+}
+
+bool CppWriter::runOnModule(Module &M) {
+  TheModule = &M;
+
+  // Emit a header
+  Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n";
+
+  // Get the name of the function we're supposed to generate
+  std::string fname = FuncName.getValue();
+
+  // Get the name of the thing we are to generate
+  std::string tgtname = NameToGenerate.getValue();
+  if (GenerationType == GenModule ||
+      GenerationType == GenContents ||
+      GenerationType == GenProgram ||
+      GenerationType == GenFunctions) {
+    if (tgtname == "!bad!") {
+      if (M.getModuleIdentifier() == "-")
+        tgtname = "<stdin>";
+      else
+        tgtname = M.getModuleIdentifier();
+    }
+  } else if (tgtname == "!bad!")
+    error("You must use the -for option with -gen-{function,variable,type}");
+
+  switch (WhatToGenerate(GenerationType)) {
+   case GenProgram:
+    if (fname.empty())
+      fname = "makeLLVMModule";
+    printProgram(fname,tgtname);
+    break;
+   case GenModule:
+    if (fname.empty())
+      fname = "makeLLVMModule";
+    printModule(fname,tgtname);
+    break;
+   case GenContents:
+    if (fname.empty())
+      fname = "makeLLVMModuleContents";
+    printContents(fname,tgtname);
+    break;
+   case GenFunction:
+    if (fname.empty())
+      fname = "makeLLVMFunction";
+    printFunction(fname,tgtname);
+    break;
+   case GenFunctions:
+    printFunctions();
+    break;
+   case GenInline:
+    if (fname.empty())
+      fname = "makeLLVMInline";
+    printInline(fname,tgtname);
+    break;
+   case GenVariable:
+    if (fname.empty())
+      fname = "makeLLVMVariable";
+    printVariable(fname,tgtname);
+    break;
+   case GenType:
+    if (fname.empty())
+      fname = "makeLLVMType";
+    printType(fname,tgtname);
+    break;
+   default:
+    error("Invalid generation option");
+  }
+
+  return false;
+}
+
+char CppWriter::ID = 0;
+
+//===----------------------------------------------------------------------===//
+//                       External Interface declaration
+//===----------------------------------------------------------------------===//
+
+bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+                                           formatted_raw_ostream &o,
+                                           CodeGenFileType FileType,
+                                           CodeGenOpt::Level OptLevel,
+                                           bool DisableVerify) {
+  if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
+  PM.add(new CppWriter(o));
+  return false;
+}
diff --git a/final/lib/Target/CppBackend/CPPTargetMachine.h b/final/lib/Target/CppBackend/CPPTargetMachine.h
new file mode 100644
index 00000000000..e42166e0558
--- /dev/null
+++ b/final/lib/Target/CppBackend/CPPTargetMachine.h
@@ -0,0 +1,43 @@
+//===-- CPPTargetMachine.h - TargetMachine for the C++ backend --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the TargetMachine that is used by the C++ backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CPPTARGETMACHINE_H
+#define CPPTARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+
+class formatted_raw_ostream;
+
+struct CPPTargetMachine : public TargetMachine {
+  CPPTargetMachine(const Target &T, const std::string &TT,
+                   const std::string &FS)
+    : TargetMachine(T) {}
+
+  virtual bool addPassesToEmitFile(PassManagerBase &PM,
+                                   formatted_raw_ostream &Out,
+                                   CodeGenFileType FileType,
+                                   CodeGenOpt::Level OptLevel,
+                                   bool DisableVerify);
+
+  virtual const TargetData *getTargetData() const { return 0; }
+};
+
+extern Target TheCppBackendTarget;
+
+} // End llvm namespace
+
+
+#endif
diff --git a/final/lib/Target/CppBackend/Makefile b/final/lib/Target/CppBackend/Makefile
new file mode 100644
index 00000000000..d75f4e87226
--- /dev/null
+++ b/final/lib/Target/CppBackend/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/CppBackend/Makefile --- ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMCppBackend
+DIRS = TargetInfo
+
+include $(LEVEL)/Makefile.common
+
+CompileCommonOpts += -Wno-format
diff --git a/final/lib/Target/CppBackend/TargetInfo/CMakeLists.txt b/final/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..edaf5d3cb18
--- /dev/null
+++ b/final/lib/Target/CppBackend/TargetInfo/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMCppBackendInfo
+  CppBackendTargetInfo.cpp
+  )
+
diff --git a/final/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp b/final/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
new file mode 100644
index 00000000000..d0aeb12499c
--- /dev/null
+++ b/final/lib/Target/CppBackend/TargetInfo/CppBackendTargetInfo.cpp
@@ -0,0 +1,26 @@
+//===-- CppBackendTargetInfo.cpp - CppBackend Target Implementation -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CPPTargetMachine.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheCppBackendTarget;
+
+static unsigned CppBackend_TripleMatchQuality(const std::string &TT) {
+  // This class always works, but shouldn't be the default in most cases.
+  return 1;
+}
+
+extern "C" void LLVMInitializeCppBackendTargetInfo() { 
+  TargetRegistry::RegisterTarget(TheCppBackendTarget, "cpp",    
+                                  "C++ backend",
+                                  &CppBackend_TripleMatchQuality);
+}
diff --git a/final/lib/Target/CppBackend/TargetInfo/Makefile b/final/lib/Target/CppBackend/TargetInfo/Makefile
new file mode 100644
index 00000000000..6e682838dae
--- /dev/null
+++ b/final/lib/Target/CppBackend/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/CppBackend/TargetInfo/Makefile -----------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMCppBackendInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/MBlaze/AsmParser/CMakeLists.txt b/final/lib/Target/MBlaze/AsmParser/CMakeLists.txt
new file mode 100644
index 00000000000..87e7cb5da56
--- /dev/null
+++ b/final/lib/Target/MBlaze/AsmParser/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. 
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeAsmParser
+  MBlazeAsmLexer.cpp
+  MBlazeAsmParser.cpp
+  )
+
diff --git a/final/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/final/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
new file mode 100644
index 00000000000..190379657f4
--- /dev/null
+++ b/final/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
@@ -0,0 +1,127 @@
+//===-- MBlazeAsmLexer.cpp - Tokenize MBlaze assembly to AsmTokens --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeTargetMachine.h"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#include <string>
+#include <map>
+
+using namespace llvm;
+
+namespace {
+  
+  class MBlazeBaseAsmLexer : public TargetAsmLexer {
+    const MCAsmInfo &AsmInfo;
+    
+    const AsmToken &lexDefinite() {
+      return getLexer()->Lex();
+    }
+    
+    AsmToken LexTokenUAL();
+  protected:
+    typedef std::map <std::string, unsigned> rmap_ty;
+    
+    rmap_ty RegisterMap;
+    
+    void InitRegisterMap(const TargetRegisterInfo *info) {
+      unsigned numRegs = info->getNumRegs();
+
+      for (unsigned i = 0; i < numRegs; ++i) {
+        const char *regName = info->getName(i);
+        if (regName)
+          RegisterMap[regName] = i;
+      }
+    }
+    
+    unsigned MatchRegisterName(StringRef Name) {
+      rmap_ty::iterator iter = RegisterMap.find(Name.str());
+      if (iter != RegisterMap.end())
+        return iter->second;
+      else
+        return 0;
+    }
+    
+    AsmToken LexToken() {
+      if (!Lexer) {
+        SetError(SMLoc(), "No MCAsmLexer installed");
+        return AsmToken(AsmToken::Error, "", 0);
+      }
+      
+      switch (AsmInfo.getAssemblerDialect()) {
+      default:
+        SetError(SMLoc(), "Unhandled dialect");
+        return AsmToken(AsmToken::Error, "", 0);
+      case 0:
+        return LexTokenUAL();
+      }
+    }
+  public:
+    MBlazeBaseAsmLexer(const Target &T, const MCAsmInfo &MAI)
+      : TargetAsmLexer(T), AsmInfo(MAI) {
+    }
+  };
+  
+  class MBlazeAsmLexer : public MBlazeBaseAsmLexer {
+  public:
+    MBlazeAsmLexer(const Target &T, const MCAsmInfo &MAI)
+      : MBlazeBaseAsmLexer(T, MAI) {
+      std::string tripleString("mblaze-unknown-unknown");
+      std::string featureString;
+      OwningPtr<const TargetMachine> 
+        targetMachine(T.createTargetMachine(tripleString, featureString));
+      InitRegisterMap(targetMachine->getRegisterInfo());
+    }
+  };
+}
+
+AsmToken MBlazeBaseAsmLexer::LexTokenUAL() {
+  const AsmToken &lexedToken = lexDefinite();
+  
+  switch (lexedToken.getKind()) {
+  default:
+    return AsmToken(lexedToken);
+  case AsmToken::Error:
+    SetError(Lexer->getErrLoc(), Lexer->getErr());
+    return AsmToken(lexedToken);
+  case AsmToken::Identifier:
+  {
+    std::string upperCase = lexedToken.getString().str();
+    std::string lowerCase = LowercaseString(upperCase);
+    StringRef lowerRef(lowerCase);
+    
+    unsigned regID = MatchRegisterName(lowerRef);
+    
+    if (regID) {
+      return AsmToken(AsmToken::Register,
+                      lexedToken.getString(),
+                      static_cast<int64_t>(regID));
+    } else {
+      return AsmToken(lexedToken);
+    }
+  }
+  }
+}
+
+extern "C" void LLVMInitializeMBlazeAsmLexer() {
+  RegisterAsmLexer<MBlazeAsmLexer> X(TheMBlazeTarget);
+}
+
diff --git a/final/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/final/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
new file mode 100644
index 00000000000..524f33d1933
--- /dev/null
+++ b/final/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -0,0 +1,568 @@
+//===-- MBlazeAsmParser.cpp - Parse MBlaze asm to MCInst instructions -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeISelLowering.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+namespace {
+struct MBlazeOperand;
+
+class MBlazeAsmParser : public TargetAsmParser {
+  MCAsmParser &Parser;
+  TargetMachine &TM;
+
+  MCAsmParser &getParser() const { return Parser; }
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+  void Warning(SMLoc L, const Twine &Msg) { Parser.Warning(L, Msg); }
+  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+  MBlazeOperand *ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+  MBlazeOperand *ParseRegister(unsigned &RegNo);
+  MBlazeOperand *ParseImmediate();
+  MBlazeOperand *ParseFsl();
+  MBlazeOperand* ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+  bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer &Out);
+
+  /// @name Auto-generated Match Functions
+  /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "MBlazeGenAsmMatcher.inc"
+
+  /// }
+
+
+public:
+  MBlazeAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
+    : TargetAsmParser(T), Parser(_Parser), TM(_TM) {}
+
+  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  virtual bool ParseDirective(AsmToken DirectiveID);
+};
+
+/// MBlazeOperand - Instances of this class represent a parsed MBlaze machine
+/// instruction.
+struct MBlazeOperand : public MCParsedAsmOperand {
+  enum KindTy {
+    Token,
+    Immediate,
+    Register,
+    Memory,
+    Fsl
+  } Kind;
+
+  SMLoc StartLoc, EndLoc;
+
+  union {
+    struct {
+      const char *Data;
+      unsigned Length;
+    } Tok;
+
+    struct {
+      unsigned RegNum;
+    } Reg;
+
+    struct {
+      const MCExpr *Val;
+    } Imm;
+
+    struct {
+      unsigned Base;
+      unsigned OffReg;
+      const MCExpr *Off;
+    } Mem;
+
+    struct {
+      const MCExpr *Val;
+    } FslImm;
+  };
+
+  MBlazeOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+public:
+  MBlazeOperand(const MBlazeOperand &o) : MCParsedAsmOperand() {
+    Kind = o.Kind;
+    StartLoc = o.StartLoc;
+    EndLoc = o.EndLoc;
+    switch (Kind) {
+    case Register:
+      Reg = o.Reg;
+      break;
+    case Immediate:
+      Imm = o.Imm;
+      break;
+    case Token:
+      Tok = o.Tok;
+      break;
+    case Memory:
+      Mem = o.Mem;
+      break;
+    case Fsl:
+      FslImm = o.FslImm;
+      break;
+    }
+  }
+
+  /// getStartLoc - Get the location of the first token of this operand.
+  SMLoc getStartLoc() const { return StartLoc; }
+
+  /// getEndLoc - Get the location of the last token of this operand.
+  SMLoc getEndLoc() const { return EndLoc; }
+
+  unsigned getReg() const {
+    assert(Kind == Register && "Invalid access!");
+    return Reg.RegNum;
+  }
+
+  const MCExpr *getImm() const {
+    assert(Kind == Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  const MCExpr *getFslImm() const {
+    assert(Kind == Fsl && "Invalid access!");
+    return FslImm.Val;
+  }
+
+  unsigned getMemBase() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.Base;
+  }
+
+  const MCExpr* getMemOff() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.Off;
+  }
+
+  unsigned getMemOffReg() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.OffReg;
+  }
+
+  bool isToken() const { return Kind == Token; }
+  bool isImm() const { return Kind == Immediate; }
+  bool isMem() const { return Kind == Memory; }
+  bool isFsl() const { return Kind == Fsl; }
+  bool isReg() const { return Kind == Register; }
+
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediates when possible.  Null MCExpr = 0.
+    if (Expr == 0)
+      Inst.addOperand(MCOperand::CreateImm(0));
+    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+  }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addFslOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getFslImm());
+  }
+
+  void addMemOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 2 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::CreateReg(getMemBase()));
+
+    unsigned RegOff = getMemOffReg();
+    if (RegOff)
+      Inst.addOperand(MCOperand::CreateReg(RegOff));
+    else
+      addExpr(Inst, getMemOff());
+  }
+
+  StringRef getToken() const {
+    assert(Kind == Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+
+  virtual void dump(raw_ostream &OS) const;
+
+  static MBlazeOperand *CreateToken(StringRef Str, SMLoc S) {
+    MBlazeOperand *Op = new MBlazeOperand(Token);
+    Op->Tok.Data = Str.data();
+    Op->Tok.Length = Str.size();
+    Op->StartLoc = S;
+    Op->EndLoc = S;
+    return Op;
+  }
+
+  static MBlazeOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) {
+    MBlazeOperand *Op = new MBlazeOperand(Register);
+    Op->Reg.RegNum = RegNum;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static MBlazeOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+    MBlazeOperand *Op = new MBlazeOperand(Immediate);
+    Op->Imm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static MBlazeOperand *CreateFslImm(const MCExpr *Val, SMLoc S, SMLoc E) {
+    MBlazeOperand *Op = new MBlazeOperand(Fsl);
+    Op->Imm.Val = Val;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static MBlazeOperand *CreateMem(unsigned Base, const MCExpr *Off, SMLoc S,
+                                  SMLoc E) {
+    MBlazeOperand *Op = new MBlazeOperand(Memory);
+    Op->Mem.Base = Base;
+    Op->Mem.Off = Off;
+    Op->Mem.OffReg = 0;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
+  static MBlazeOperand *CreateMem(unsigned Base, unsigned Off, SMLoc S,
+                                  SMLoc E) {
+    MBlazeOperand *Op = new MBlazeOperand(Memory);
+    Op->Mem.Base = Base;
+    Op->Mem.OffReg = Off;
+    Op->Mem.Off = 0;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+};
+
+} // end anonymous namespace.
+
+void MBlazeOperand::dump(raw_ostream &OS) const {
+  switch (Kind) {
+  case Immediate:
+    getImm()->print(OS);
+    break;
+  case Register:
+    OS << "<register R";
+    OS << MBlazeRegisterInfo::getRegisterNumbering(getReg()) << ">";
+    break;
+  case Token:
+    OS << "'" << getToken() << "'";
+    break;
+  case Memory: {
+    OS << "<memory R";
+    OS << MBlazeRegisterInfo::getRegisterNumbering(getMemBase());
+    OS << ", ";
+
+    unsigned RegOff = getMemOffReg();
+    if (RegOff)
+      OS << "R" << MBlazeRegisterInfo::getRegisterNumbering(RegOff);
+    else
+      OS << getMemOff();
+    OS << ">";
+    }
+    break;
+  case Fsl:
+    getFslImm()->print(OS);
+    break;
+  }
+}
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+//
+bool MBlazeAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        MCStreamer &Out) {
+  MCInst Inst;
+  SMLoc ErrorLoc;
+  unsigned ErrorInfo;
+
+  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo)) {
+  case Match_Success:
+    Out.EmitInstruction(Inst);
+    return false;
+  case Match_MissingFeature:
+    return Error(IDLoc, "instruction use requires an option to be enabled");
+  case Match_MnemonicFail:
+      return Error(IDLoc, "unrecognized instruction mnemonic");
+  case Match_ConversionFail:
+    return Error(IDLoc, "unable to convert operands to instruction");
+  case Match_InvalidOperand:
+    ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0U) {
+      if (ErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((MBlazeOperand*)Operands[ErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+
+  llvm_unreachable("Implement any new match types added!");
+  return true;
+}
+
+MBlazeOperand *MBlazeAsmParser::
+ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  if (Operands.size() != 4)
+    return 0;
+
+  MBlazeOperand &Base = *(MBlazeOperand*)Operands[2];
+  MBlazeOperand &Offset = *(MBlazeOperand*)Operands[3];
+
+  SMLoc S = Base.getStartLoc();
+  SMLoc O = Offset.getStartLoc();
+  SMLoc E = Offset.getEndLoc();
+
+  if (!Base.isReg()) {
+    Error(S, "base address must be a register");
+    return 0;
+  }
+
+  if (!Offset.isReg() && !Offset.isImm()) {
+    Error(O, "offset must be a register or immediate");
+    return 0;
+  }
+
+  MBlazeOperand *Op;
+  if (Offset.isReg())
+    Op = MBlazeOperand::CreateMem(Base.getReg(), Offset.getReg(), S, E);
+  else
+    Op = MBlazeOperand::CreateMem(Base.getReg(), Offset.getImm(), S, E);
+
+  delete Operands.pop_back_val();
+  delete Operands.pop_back_val();
+  Operands.push_back(Op);
+
+  return Op;
+}
+
+bool MBlazeAsmParser::ParseRegister(unsigned &RegNo,
+                                    SMLoc &StartLoc, SMLoc &EndLoc) {
+  return (ParseRegister(RegNo) == 0);
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseRegister(unsigned &RegNo) {
+  SMLoc S = Parser.getTok().getLoc();
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+  switch (getLexer().getKind()) {
+  default: return 0;
+  case AsmToken::Identifier:
+    RegNo = MatchRegisterName(getLexer().getTok().getIdentifier());
+    if (RegNo == 0)
+      return 0;
+
+    getLexer().Lex();
+    return MBlazeOperand::CreateReg(RegNo, S, E);
+  }
+}
+
+static unsigned MatchFslRegister(StringRef String) {
+  if (!String.startswith("rfsl"))
+    return -1;
+
+  unsigned regNum;
+  if (String.substr(4).getAsInteger(10,regNum))
+    return -1;
+
+  return regNum;
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseFsl() {
+  SMLoc S = Parser.getTok().getLoc();
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+  switch (getLexer().getKind()) {
+  default: return 0;
+  case AsmToken::Identifier:
+    unsigned reg = MatchFslRegister(getLexer().getTok().getIdentifier());
+    if (reg >= 16)
+      return 0;
+
+    getLexer().Lex();
+    const MCExpr *EVal = MCConstantExpr::Create(reg,getContext());
+    return MBlazeOperand::CreateFslImm(EVal,S,E);
+  }
+}
+
+MBlazeOperand *MBlazeAsmParser::ParseImmediate() {
+  SMLoc S = Parser.getTok().getLoc();
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+  const MCExpr *EVal;
+  switch (getLexer().getKind()) {
+  default: return 0;
+  case AsmToken::LParen:
+  case AsmToken::Plus:
+  case AsmToken::Minus:
+  case AsmToken::Integer:
+  case AsmToken::Identifier:
+    if (getParser().ParseExpression(EVal))
+      return 0;
+
+    return MBlazeOperand::CreateImm(EVal, S, E);
+  }
+}
+
+MBlazeOperand *MBlazeAsmParser::
+ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  MBlazeOperand *Op;
+
+  // Attempt to parse the next token as a register name
+  unsigned RegNo;
+  Op = ParseRegister(RegNo);
+
+  // Attempt to parse the next token as an FSL immediate
+  if (!Op)
+    Op = ParseFsl();
+
+  // Attempt to parse the next token as an immediate
+  if (!Op)
+    Op = ParseImmediate();
+
+  // If the token could not be parsed then fail
+  if (!Op) {
+    Error(Parser.getTok().getLoc(), "unknown operand");
+    return 0;
+  }
+
+  // Push the parsed operand into the list of operands
+  Operands.push_back(Op);
+  return Op;
+}
+
+/// Parse an mblaze instruction mnemonic followed by its operands.
+bool MBlazeAsmParser::
+ParseInstruction(StringRef Name, SMLoc NameLoc,
+                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  // The first operands is the token for the instruction name
+  size_t dotLoc = Name.find('.');
+  Operands.push_back(MBlazeOperand::CreateToken(Name.substr(0,dotLoc),NameLoc));
+  if (dotLoc < Name.size())
+    Operands.push_back(MBlazeOperand::CreateToken(Name.substr(dotLoc),NameLoc));
+
+  // If there are no more operands then finish
+  if (getLexer().is(AsmToken::EndOfStatement))
+    return false;
+
+  // Parse the first operand
+  if (!ParseOperand(Operands))
+    return true;
+
+  while (getLexer().isNot(AsmToken::EndOfStatement) &&
+         getLexer().is(AsmToken::Comma)) {
+    // Consume the comma token
+    getLexer().Lex();
+
+    // Parse the next operand
+    if (!ParseOperand(Operands))
+      return true;
+  }
+
+  // If the instruction requires a memory operand then we need to
+  // replace the last two operands (base+offset) with a single
+  // memory operand.
+  if (Name.startswith("lw") || Name.startswith("sw") ||
+      Name.startswith("lh") || Name.startswith("sh") ||
+      Name.startswith("lb") || Name.startswith("sb"))
+    return (ParseMemory(Operands) == NULL);
+
+  return false;
+}
+
+/// ParseDirective parses the arm specific directives
+bool MBlazeAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if (IDVal == ".word")
+    return ParseDirectiveWord(2, DirectiveID.getLoc());
+  return true;
+}
+
+/// ParseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool MBlazeAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().ParseExpression(Value))
+        return true;
+
+      getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      Parser.Lex();
+    }
+  }
+
+  Parser.Lex();
+  return false;
+}
+
+extern "C" void LLVMInitializeMBlazeAsmLexer();
+
+/// Force static initialization.
+extern "C" void LLVMInitializeMBlazeAsmParser() {
+  RegisterAsmParser<MBlazeAsmParser> X(TheMBlazeTarget);
+  LLVMInitializeMBlazeAsmLexer();
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "MBlazeGenAsmMatcher.inc"
diff --git a/final/lib/Target/MBlaze/AsmParser/Makefile b/final/lib/Target/MBlaze/AsmParser/Makefile
new file mode 100644
index 00000000000..611a0f473f7
--- /dev/null
+++ b/final/lib/Target/MBlaze/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/ARM/AsmParser/Makefile -------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeAsmParser
+
+# Hack: we need to include 'main' MBlaze target directory for private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/MBlaze/CMakeLists.txt b/final/lib/Target/MBlaze/CMakeLists.txt
new file mode 100644
index 00000000000..004057ad4ae
--- /dev/null
+++ b/final/lib/Target/MBlaze/CMakeLists.txt
@@ -0,0 +1,40 @@
+set(LLVM_TARGET_DEFINITIONS MBlaze.td)
+
+tablegen(MBlazeGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(MBlazeGenRegisterNames.inc -gen-register-enums)
+tablegen(MBlazeGenRegisterInfo.inc -gen-register-desc)
+tablegen(MBlazeGenInstrNames.inc -gen-instr-enums)
+tablegen(MBlazeGenInstrInfo.inc -gen-instr-desc)
+tablegen(MBlazeGenCodeEmitter.inc -gen-emitter)
+tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer)
+tablegen(MBlazeGenAsmMatcher.inc -gen-asm-matcher)
+tablegen(MBlazeGenDAGISel.inc -gen-dag-isel)
+tablegen(MBlazeGenCallingConv.inc -gen-callingconv)
+tablegen(MBlazeGenSubtarget.inc -gen-subtarget)
+tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic)
+tablegen(MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info)
+
+add_llvm_target(MBlazeCodeGen
+  MBlazeDelaySlotFiller.cpp
+  MBlazeInstrInfo.cpp
+  MBlazeISelDAGToDAG.cpp
+  MBlazeISelLowering.cpp
+  MBlazeFrameLowering.cpp
+  MBlazeMCAsmInfo.cpp
+  MBlazeRegisterInfo.cpp
+  MBlazeSubtarget.cpp
+  MBlazeTargetMachine.cpp
+  MBlazeTargetObjectFile.cpp
+  MBlazeIntrinsicInfo.cpp
+  MBlazeSelectionDAGInfo.cpp
+  MBlazeAsmPrinter.cpp
+  MBlazeAsmBackend.cpp
+  MBlazeMCInstLower.cpp
+  MBlazeELFWriterInfo.cpp
+  MBlazeMCCodeEmitter.cpp
+  )
+
+add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
diff --git a/final/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/final/lib/Target/MBlaze/Disassembler/CMakeLists.txt
new file mode 100644
index 00000000000..9376e68a35c
--- /dev/null
+++ b/final/lib/Target/MBlaze/Disassembler/CMakeLists.txt
@@ -0,0 +1,16 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. 
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeDisassembler
+  MBlazeDisassembler.cpp
+  )
+
+# workaround for hanging compilation on MSVC9 and 10
+if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
+set_property(
+  SOURCE MBlazeDisassembler.cpp
+  PROPERTY COMPILE_FLAGS "/Od"
+  )
+endif()
+
+add_dependencies(LLVMMBlazeDisassembler MBlazeCodeGenTable_gen)
diff --git a/final/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/final/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
new file mode 100644
index 00000000000..3379ac21697
--- /dev/null
+++ b/final/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -0,0 +1,647 @@
+//===- MBlazeDisassembler.cpp - Disassembler for MicroBlaze  ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the MBlaze Disassembler. It contains code to translate
+// the data produced by the decoder into MCInsts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeDisassembler.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+
+// #include "MBlazeGenDecoderTables.inc"
+// #include "MBlazeGenRegisterNames.inc"
+#include "MBlazeGenInstrInfo.inc"
+#include "MBlazeGenEDInfo.inc"
+
+using namespace llvm;
+
+const unsigned UNSUPPORTED = -1;
+
+static unsigned mblazeBinary2Opcode[] = {
+  MBlaze::ADD,   MBlaze::RSUB,   MBlaze::ADDC,   MBlaze::RSUBC,   //00,01,02,03
+  MBlaze::ADDK,  MBlaze::RSUBK,  MBlaze::ADDKC,  MBlaze::RSUBKC,  //04,05,06,07
+  MBlaze::ADDI,  MBlaze::RSUBI,  MBlaze::ADDIC,  MBlaze::RSUBIC,  //08,09,0A,0B
+  MBlaze::ADDIK, MBlaze::RSUBIK, MBlaze::ADDIKC, MBlaze::RSUBIKC, //0C,0D,0E,0F
+
+  MBlaze::MUL,   MBlaze::BSRL,   MBlaze::IDIV,   MBlaze::GETD,    //10,11,12,13
+  UNSUPPORTED,   UNSUPPORTED,    MBlaze::FADD,   UNSUPPORTED,     //14,15,16,17
+  MBlaze::MULI,  MBlaze::BSRLI,  UNSUPPORTED,    MBlaze::GET,     //18,19,1A,1B
+  UNSUPPORTED,   UNSUPPORTED,    UNSUPPORTED,    UNSUPPORTED,     //1C,1D,1E,1F
+
+  MBlaze::OR,    MBlaze::AND,    MBlaze::XOR,    MBlaze::ANDN,    //20,21,22,23
+  MBlaze::SEXT8, MBlaze::MFS,    MBlaze::BR,     MBlaze::BEQ,     //24,25,26,27
+  MBlaze::ORI,   MBlaze::ANDI,   MBlaze::XORI,   MBlaze::ANDNI,   //28,29,2A,2B
+  MBlaze::IMM,   MBlaze::RTSD,   MBlaze::BRI,    MBlaze::BEQI,    //2C,2D,2E,2F
+
+  MBlaze::LBU,   MBlaze::LHU,    MBlaze::LW,     UNSUPPORTED,     //30,31,32,33
+  MBlaze::SB,    MBlaze::SH,     MBlaze::SW,     UNSUPPORTED,     //34,35,36,37
+  MBlaze::LBUI,  MBlaze::LHUI,   MBlaze::LWI,    UNSUPPORTED,     //38,39,3A,3B
+  MBlaze::SBI,   MBlaze::SHI,    MBlaze::SWI,    UNSUPPORTED,     //3C,3D,3E,3F
+};
+
+static unsigned getRD(uint32_t insn) {
+  return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>21)&0x1F);
+}
+
+static unsigned getRA(uint32_t insn) {
+  return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F);
+}
+
+static unsigned getRB(uint32_t insn) {
+  return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F);
+}
+
+static int64_t getRS(uint32_t insn) {
+  return MBlazeRegisterInfo::getSpecialRegisterFromNumbering(insn&0x3FFF);
+}
+
+static int64_t getIMM(uint32_t insn) {
+    int16_t val = (insn & 0xFFFF);
+    return val;
+}
+
+static int64_t getSHT(uint32_t insn) {
+    int16_t val = (insn & 0x1F);
+    return val;
+}
+
+static unsigned getFLAGS(int32_t insn) {
+    return (insn & 0x7FF);
+}
+
+static int64_t getFSL(uint32_t insn) {
+    int16_t val = (insn & 0xF);
+    return val;
+}
+
+static unsigned decodeMUL(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default: return UNSUPPORTED;
+    case 0:  return MBlaze::MUL;
+    case 1:  return MBlaze::MULH;
+    case 2:  return MBlaze::MULHSU;
+    case 3:  return MBlaze::MULHU;
+    }
+}
+
+static unsigned decodeSEXT(uint32_t insn) {
+    switch (insn&0x7FF) {
+    default:   return UNSUPPORTED;
+    case 0x60: return MBlaze::SEXT8;
+    case 0x68: return MBlaze::WIC;
+    case 0x64: return MBlaze::WDC;
+    case 0x66: return MBlaze::WDCC;
+    case 0x74: return MBlaze::WDCF;
+    case 0x61: return MBlaze::SEXT16;
+    case 0x41: return MBlaze::SRL;
+    case 0x21: return MBlaze::SRC;
+    case 0x01: return MBlaze::SRA;
+    }
+}
+
+static unsigned decodeBEQ(uint32_t insn) {
+    switch ((insn>>21)&0x1F) {
+    default:    return UNSUPPORTED;
+    case 0x00:  return MBlaze::BEQ;
+    case 0x10:  return MBlaze::BEQD;
+    case 0x05:  return MBlaze::BGE;
+    case 0x15:  return MBlaze::BGED;
+    case 0x04:  return MBlaze::BGT;
+    case 0x14:  return MBlaze::BGTD;
+    case 0x03:  return MBlaze::BLE;
+    case 0x13:  return MBlaze::BLED;
+    case 0x02:  return MBlaze::BLT;
+    case 0x12:  return MBlaze::BLTD;
+    case 0x01:  return MBlaze::BNE;
+    case 0x11:  return MBlaze::BNED;
+    }
+}
+
+static unsigned decodeBEQI(uint32_t insn) {
+    switch ((insn>>21)&0x1F) {
+    default:    return UNSUPPORTED;
+    case 0x00:  return MBlaze::BEQI;
+    case 0x10:  return MBlaze::BEQID;
+    case 0x05:  return MBlaze::BGEI;
+    case 0x15:  return MBlaze::BGEID;
+    case 0x04:  return MBlaze::BGTI;
+    case 0x14:  return MBlaze::BGTID;
+    case 0x03:  return MBlaze::BLEI;
+    case 0x13:  return MBlaze::BLEID;
+    case 0x02:  return MBlaze::BLTI;
+    case 0x12:  return MBlaze::BLTID;
+    case 0x01:  return MBlaze::BNEI;
+    case 0x11:  return MBlaze::BNEID;
+    }
+}
+
+static unsigned decodeBR(uint32_t insn) {
+    switch ((insn>>16)&0x1F) {
+    default:   return UNSUPPORTED;
+    case 0x00: return MBlaze::BR;
+    case 0x08: return MBlaze::BRA;
+    case 0x0C: return MBlaze::BRK;
+    case 0x10: return MBlaze::BRD;
+    case 0x14: return MBlaze::BRLD;
+    case 0x18: return MBlaze::BRAD;
+    case 0x1C: return MBlaze::BRALD;
+    }
+}
+
+static unsigned decodeBRI(uint32_t insn) {
+    switch ((insn>>16)&0x1F) {
+    default:   return UNSUPPORTED;
+    case 0x00: return MBlaze::BRI;
+    case 0x08: return MBlaze::BRAI;
+    case 0x0C: return MBlaze::BRKI;
+    case 0x10: return MBlaze::BRID;
+    case 0x14: return MBlaze::BRLID;
+    case 0x18: return MBlaze::BRAID;
+    case 0x1C: return MBlaze::BRALID;
+    }
+}
+
+static unsigned decodeBSRL(uint32_t insn) {
+    switch ((insn>>9)&0x3) {
+    default:  return UNSUPPORTED;
+    case 0x2: return MBlaze::BSLL;
+    case 0x1: return MBlaze::BSRA;
+    case 0x0: return MBlaze::BSRL;
+    }
+}
+
+static unsigned decodeBSRLI(uint32_t insn) {
+    switch ((insn>>9)&0x3) {
+    default:  return UNSUPPORTED;
+    case 0x2: return MBlaze::BSLLI;
+    case 0x1: return MBlaze::BSRAI;
+    case 0x0: return MBlaze::BSRLI;
+    }
+}
+
+static unsigned decodeRSUBK(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::RSUBK;
+    case 0x1: return MBlaze::CMP;
+    case 0x3: return MBlaze::CMPU;
+    }
+}
+
+static unsigned decodeFADD(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default:    return UNSUPPORTED;
+    case 0x000: return MBlaze::FADD;
+    case 0x080: return MBlaze::FRSUB;
+    case 0x100: return MBlaze::FMUL;
+    case 0x180: return MBlaze::FDIV;
+    case 0x200: return MBlaze::FCMP_UN;
+    case 0x210: return MBlaze::FCMP_LT;
+    case 0x220: return MBlaze::FCMP_EQ;
+    case 0x230: return MBlaze::FCMP_LE;
+    case 0x240: return MBlaze::FCMP_GT;
+    case 0x250: return MBlaze::FCMP_NE;
+    case 0x260: return MBlaze::FCMP_GE;
+    case 0x280: return MBlaze::FLT;
+    case 0x300: return MBlaze::FINT;
+    case 0x380: return MBlaze::FSQRT;
+    }
+}
+
+static unsigned decodeGET(uint32_t insn) {
+    switch ((insn>>10)&0x3F) {
+    default:   return UNSUPPORTED;
+    case 0x00: return MBlaze::GET;
+    case 0x01: return MBlaze::EGET;
+    case 0x02: return MBlaze::AGET;
+    case 0x03: return MBlaze::EAGET;
+    case 0x04: return MBlaze::TGET;
+    case 0x05: return MBlaze::TEGET;
+    case 0x06: return MBlaze::TAGET;
+    case 0x07: return MBlaze::TEAGET;
+    case 0x08: return MBlaze::CGET;
+    case 0x09: return MBlaze::ECGET;
+    case 0x0A: return MBlaze::CAGET;
+    case 0x0B: return MBlaze::ECAGET;
+    case 0x0C: return MBlaze::TCGET;
+    case 0x0D: return MBlaze::TECGET;
+    case 0x0E: return MBlaze::TCAGET;
+    case 0x0F: return MBlaze::TECAGET;
+    case 0x10: return MBlaze::NGET;
+    case 0x11: return MBlaze::NEGET;
+    case 0x12: return MBlaze::NAGET;
+    case 0x13: return MBlaze::NEAGET;
+    case 0x14: return MBlaze::TNGET;
+    case 0x15: return MBlaze::TNEGET;
+    case 0x16: return MBlaze::TNAGET;
+    case 0x17: return MBlaze::TNEAGET;
+    case 0x18: return MBlaze::NCGET;
+    case 0x19: return MBlaze::NECGET;
+    case 0x1A: return MBlaze::NCAGET;
+    case 0x1B: return MBlaze::NECAGET;
+    case 0x1C: return MBlaze::TNCGET;
+    case 0x1D: return MBlaze::TNECGET;
+    case 0x1E: return MBlaze::TNCAGET;
+    case 0x1F: return MBlaze::TNECAGET;
+    case 0x20: return MBlaze::PUT;
+    case 0x22: return MBlaze::APUT;
+    case 0x24: return MBlaze::TPUT;
+    case 0x26: return MBlaze::TAPUT;
+    case 0x28: return MBlaze::CPUT;
+    case 0x2A: return MBlaze::CAPUT;
+    case 0x2C: return MBlaze::TCPUT;
+    case 0x2E: return MBlaze::TCAPUT;
+    case 0x30: return MBlaze::NPUT;
+    case 0x32: return MBlaze::NAPUT;
+    case 0x34: return MBlaze::TNPUT;
+    case 0x36: return MBlaze::TNAPUT;
+    case 0x38: return MBlaze::NCPUT;
+    case 0x3A: return MBlaze::NCAPUT;
+    case 0x3C: return MBlaze::TNCPUT;
+    case 0x3E: return MBlaze::TNCAPUT;
+    }
+}
+
+static unsigned decodeGETD(uint32_t insn) {
+    switch ((insn>>5)&0x3F) {
+    default:   return UNSUPPORTED;
+    case 0x00: return MBlaze::GETD;
+    case 0x01: return MBlaze::EGETD;
+    case 0x02: return MBlaze::AGETD;
+    case 0x03: return MBlaze::EAGETD;
+    case 0x04: return MBlaze::TGETD;
+    case 0x05: return MBlaze::TEGETD;
+    case 0x06: return MBlaze::TAGETD;
+    case 0x07: return MBlaze::TEAGETD;
+    case 0x08: return MBlaze::CGETD;
+    case 0x09: return MBlaze::ECGETD;
+    case 0x0A: return MBlaze::CAGETD;
+    case 0x0B: return MBlaze::ECAGETD;
+    case 0x0C: return MBlaze::TCGETD;
+    case 0x0D: return MBlaze::TECGETD;
+    case 0x0E: return MBlaze::TCAGETD;
+    case 0x0F: return MBlaze::TECAGETD;
+    case 0x10: return MBlaze::NGETD;
+    case 0x11: return MBlaze::NEGETD;
+    case 0x12: return MBlaze::NAGETD;
+    case 0x13: return MBlaze::NEAGETD;
+    case 0x14: return MBlaze::TNGETD;
+    case 0x15: return MBlaze::TNEGETD;
+    case 0x16: return MBlaze::TNAGETD;
+    case 0x17: return MBlaze::TNEAGETD;
+    case 0x18: return MBlaze::NCGETD;
+    case 0x19: return MBlaze::NECGETD;
+    case 0x1A: return MBlaze::NCAGETD;
+    case 0x1B: return MBlaze::NECAGETD;
+    case 0x1C: return MBlaze::TNCGETD;
+    case 0x1D: return MBlaze::TNECGETD;
+    case 0x1E: return MBlaze::TNCAGETD;
+    case 0x1F: return MBlaze::TNECAGETD;
+    case 0x20: return MBlaze::PUTD;
+    case 0x22: return MBlaze::APUTD;
+    case 0x24: return MBlaze::TPUTD;
+    case 0x26: return MBlaze::TAPUTD;
+    case 0x28: return MBlaze::CPUTD;
+    case 0x2A: return MBlaze::CAPUTD;
+    case 0x2C: return MBlaze::TCPUTD;
+    case 0x2E: return MBlaze::TCAPUTD;
+    case 0x30: return MBlaze::NPUTD;
+    case 0x32: return MBlaze::NAPUTD;
+    case 0x34: return MBlaze::TNPUTD;
+    case 0x36: return MBlaze::TNAPUTD;
+    case 0x38: return MBlaze::NCPUTD;
+    case 0x3A: return MBlaze::NCAPUTD;
+    case 0x3C: return MBlaze::TNCPUTD;
+    case 0x3E: return MBlaze::TNCAPUTD;
+    }
+}
+
+static unsigned decodeIDIV(uint32_t insn) {
+    switch (insn&0x3) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::IDIV;
+    case 0x2: return MBlaze::IDIVU;
+    }
+}
+
+static unsigned decodeLBU(uint32_t insn) {
+    switch ((insn>>9)&0x1) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::LBU;
+    case 0x1: return MBlaze::LBUR;
+    }
+}
+
+static unsigned decodeLHU(uint32_t insn) {
+    switch ((insn>>9)&0x1) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::LHU;
+    case 0x1: return MBlaze::LHUR;
+    }
+}
+
+static unsigned decodeLW(uint32_t insn) {
+    switch ((insn>>9)&0x3) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::LW;
+    case 0x1: return MBlaze::LWR;
+    case 0x2: return MBlaze::LWX;
+    }
+}
+
+static unsigned decodeSB(uint32_t insn) {
+    switch ((insn>>9)&0x1) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::SB;
+    case 0x1: return MBlaze::SBR;
+    }
+}
+
+static unsigned decodeSH(uint32_t insn) {
+    switch ((insn>>9)&0x1) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::SH;
+    case 0x1: return MBlaze::SHR;
+    }
+}
+
+static unsigned decodeSW(uint32_t insn) {
+    switch ((insn>>9)&0x3) {
+    default:  return UNSUPPORTED;
+    case 0x0: return MBlaze::SW;
+    case 0x1: return MBlaze::SWR;
+    case 0x2: return MBlaze::SWX;
+    }
+}
+
+static unsigned decodeMFS(uint32_t insn) {
+    switch ((insn>>15)&0x1) {
+    default:   return UNSUPPORTED;
+    case 0x0:
+      switch ((insn>>16)&0x1) {
+      default:   return UNSUPPORTED;
+      case 0x0: return MBlaze::MSRSET;
+      case 0x1: return MBlaze::MSRCLR;
+      }
+    case 0x1:
+      switch ((insn>>14)&0x1) {
+      default:   return UNSUPPORTED;
+      case 0x0: return MBlaze::MFS;
+      case 0x1: return MBlaze::MTS;
+      }
+    }
+}
+
+static unsigned decodeOR(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default:    return UNSUPPORTED;
+    case 0x000: return MBlaze::OR;
+    case 0x400: return MBlaze::PCMPBF;
+    }
+}
+
+static unsigned decodeXOR(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default:    return UNSUPPORTED;
+    case 0x000: return MBlaze::XOR;
+    case 0x400: return MBlaze::PCMPEQ;
+    }
+}
+
+static unsigned decodeANDN(uint32_t insn) {
+    switch (getFLAGS(insn)) {
+    default:    return UNSUPPORTED;
+    case 0x000: return MBlaze::ANDN;
+    case 0x400: return MBlaze::PCMPNE;
+    }
+}
+
+static unsigned decodeRTSD(uint32_t insn) {
+    switch ((insn>>21)&0x1F) {
+    default:   return UNSUPPORTED;
+    case 0x10: return MBlaze::RTSD;
+    case 0x11: return MBlaze::RTID;
+    case 0x12: return MBlaze::RTBD;
+    case 0x14: return MBlaze::RTED;
+    }
+}
+
+static unsigned getOPCODE(uint32_t insn) {
+  unsigned opcode = mblazeBinary2Opcode[ (insn>>26)&0x3F ];
+  switch (opcode) {
+  case MBlaze::MUL:     return decodeMUL(insn);
+  case MBlaze::SEXT8:   return decodeSEXT(insn);
+  case MBlaze::BEQ:     return decodeBEQ(insn);
+  case MBlaze::BEQI:    return decodeBEQI(insn);
+  case MBlaze::BR:      return decodeBR(insn);
+  case MBlaze::BRI:     return decodeBRI(insn);
+  case MBlaze::BSRL:    return decodeBSRL(insn);
+  case MBlaze::BSRLI:   return decodeBSRLI(insn);
+  case MBlaze::RSUBK:   return decodeRSUBK(insn);
+  case MBlaze::FADD:    return decodeFADD(insn);
+  case MBlaze::GET:     return decodeGET(insn);
+  case MBlaze::GETD:    return decodeGETD(insn);
+  case MBlaze::IDIV:    return decodeIDIV(insn);
+  case MBlaze::LBU:     return decodeLBU(insn);
+  case MBlaze::LHU:     return decodeLHU(insn);
+  case MBlaze::LW:      return decodeLW(insn);
+  case MBlaze::SB:      return decodeSB(insn);
+  case MBlaze::SH:      return decodeSH(insn);
+  case MBlaze::SW:      return decodeSW(insn);
+  case MBlaze::MFS:     return decodeMFS(insn);
+  case MBlaze::OR:      return decodeOR(insn);
+  case MBlaze::XOR:     return decodeXOR(insn);
+  case MBlaze::ANDN:    return decodeANDN(insn);
+  case MBlaze::RTSD:    return decodeRTSD(insn);
+  default:              return opcode;
+  }
+}
+
+EDInstInfo *MBlazeDisassembler::getEDInfo() const {
+  return instInfoMBlaze;
+}
+
+//
+// Public interface for the disassembler
+//
+
+bool MBlazeDisassembler::getInstruction(MCInst &instr,
+                                        uint64_t &size,
+                                        const MemoryObject &region,
+                                        uint64_t address,
+                                        raw_ostream &vStream) const {
+  // The machine instruction.
+  uint32_t insn;
+  uint8_t bytes[4];
+
+  // We always consume 4 bytes of data
+  size = 4;
+
+  // We want to read exactly 4 bytes of data.
+  if (region.readBytes(address, 4, (uint8_t*)bytes, NULL) == -1)
+    return false;
+
+  // Encoded as a big-endian 32-bit word in the stream.
+  insn = (bytes[0]<<24) | (bytes[1]<<16) | (bytes[2]<< 8) | (bytes[3]<<0);
+
+  // Get the MCInst opcode from the binary instruction and make sure
+  // that it is a valid instruction.
+  unsigned opcode = getOPCODE(insn);
+  if (opcode == UNSUPPORTED)
+    return false;
+
+  instr.setOpcode(opcode);
+
+  uint64_t tsFlags = MBlazeInsts[opcode].TSFlags;
+  switch ((tsFlags & MBlazeII::FormMask)) {
+  default: llvm_unreachable("unknown instruction encoding");
+
+  case MBlazeII::FRRRR:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    break;
+
+  case MBlazeII::FRRR:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    break;
+
+  case MBlazeII::FRI:
+    switch (opcode) {
+    default: llvm_unreachable("unknown instruction encoding");
+    case MBlaze::MFS:
+      instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+      instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
+      break;
+    case MBlaze::MTS:
+      instr.addOperand(MCOperand::CreateImm(insn&0x3FFF));
+      instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+      break;
+    case MBlaze::MSRSET:
+    case MBlaze::MSRCLR:
+      instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+      instr.addOperand(MCOperand::CreateImm(insn&0x7FFF));
+      break;
+    }
+    break;
+
+  case MBlazeII::FRRI:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    switch (opcode) {
+    default:
+      instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+      break;
+    case MBlaze::BSRLI:
+    case MBlaze::BSRAI:
+    case MBlaze::BSLLI:
+      instr.addOperand(MCOperand::CreateImm(insn&0x1F));
+      break;
+    }
+    break;
+
+  case MBlazeII::FCRR:
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    break;
+
+  case MBlazeII::FCRI:
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+    break;
+
+  case MBlazeII::FRCR:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    break;
+
+  case MBlazeII::FRCI:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+    break;
+
+  case MBlazeII::FCCR:
+    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    break;
+
+  case MBlazeII::FCCI:
+    instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+    break;
+
+  case MBlazeII::FRRCI:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    instr.addOperand(MCOperand::CreateImm(getSHT(insn)));
+    break;
+
+  case MBlazeII::FRRC:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    break;
+
+  case MBlazeII::FRCX:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+    break;
+
+  case MBlazeII::FRCS:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRS(insn)));
+    break;
+
+  case MBlazeII::FCRCS:
+    instr.addOperand(MCOperand::CreateReg(getRS(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    break;
+
+  case MBlazeII::FCRCX:
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+    break;
+
+  case MBlazeII::FCX:
+    instr.addOperand(MCOperand::CreateImm(getFSL(insn)));
+    break;
+
+  case MBlazeII::FCR:
+    instr.addOperand(MCOperand::CreateReg(getRB(insn)));
+    break;
+
+  case MBlazeII::FRIR:
+    instr.addOperand(MCOperand::CreateReg(getRD(insn)));
+    instr.addOperand(MCOperand::CreateImm(getIMM(insn)));
+    instr.addOperand(MCOperand::CreateReg(getRA(insn)));
+    break;
+  }
+
+  return true;
+}
+
+static MCDisassembler *createMBlazeDisassembler(const Target &T) {
+  return new MBlazeDisassembler;
+}
+
+extern "C" void LLVMInitializeMBlazeDisassembler() {
+  // Register the disassembler.
+  TargetRegistry::RegisterMCDisassembler(TheMBlazeTarget,
+                                         createMBlazeDisassembler);
+}
diff --git a/final/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h b/final/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
new file mode 100644
index 00000000000..d05eced0bac
--- /dev/null
+++ b/final/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h
@@ -0,0 +1,55 @@
+//===- MBlazeDisassembler.h - Disassembler for MicroBlaze  ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the MBlaze Disassembler. It it the header for
+// MBlazeDisassembler, a subclass of MCDisassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEDISASSEMBLER_H
+#define MBLAZEDISASSEMBLER_H
+
+#include "llvm/MC/MCDisassembler.h"
+
+struct InternalInstruction;
+
+namespace llvm {
+  
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+
+struct EDInstInfo;
+  
+/// MBlazeDisassembler - Disassembler for all MBlaze platforms.
+class MBlazeDisassembler : public MCDisassembler {
+public:
+  /// Constructor     - Initializes the disassembler.
+  ///
+  MBlazeDisassembler() :
+    MCDisassembler() {
+  }
+
+  ~MBlazeDisassembler() {
+  }
+
+  /// getInstruction - See MCDisassembler.
+  bool getInstruction(MCInst &instr,
+                      uint64_t &size,
+                      const MemoryObject &region,
+                      uint64_t address,
+                      raw_ostream &vStream) const;
+
+  /// getEDInfo - See MCDisassembler.
+  EDInstInfo *getEDInfo() const;
+};
+
+} // namespace llvm
+  
+#endif
diff --git a/final/lib/Target/MBlaze/Disassembler/Makefile b/final/lib/Target/MBlaze/Disassembler/Makefile
new file mode 100644
index 00000000000..0530b3286bc
--- /dev/null
+++ b/final/lib/Target/MBlaze/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MBlaze/Disassembler/Makefile -------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeDisassembler
+
+# Hack: we need to include 'main' MBlaze target directory to grab headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/MBlaze/InstPrinter/CMakeLists.txt b/final/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
new file mode 100644
index 00000000000..242a573036e
--- /dev/null
+++ b/final/lib/Target/MBlaze/InstPrinter/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. 
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeAsmPrinter
+    MBlazeInstPrinter.cpp
+  )
+
+add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen)
diff --git a/final/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp b/final/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
new file mode 100644
index 00000000000..a7fd287990b
--- /dev/null
+++ b/final/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp
@@ -0,0 +1,69 @@
+//===-- MBlazeInstPrinter.cpp - Convert MBlaze MCInst to assembly syntax --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an MBlaze MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MBlaze.h"
+#include "MBlazeInstPrinter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+
+// Include the auto-generated portion of the assembly writer.
+#include "MBlazeGenAsmWriter.inc"
+
+void MBlazeInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+  printInstruction(MI, O);
+}
+
+void MBlazeInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O, const char *Modifier) {
+  assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    O << getRegisterName(Op.getReg());
+  } else if (Op.isImm()) {
+    O << (int32_t)Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << *Op.getExpr();
+  }
+}
+
+void MBlazeInstPrinter::printFSLImm(const MCInst *MI, int OpNo,
+                                    raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNo);
+  if (MO.isImm())
+    O << "rfsl" << MO.getImm();
+  else
+    printOperand(MI, OpNo, O, NULL);
+}
+
+void MBlazeInstPrinter::printUnsignedImm(const MCInst *MI, int OpNo,
+                                        raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNo);
+  if (MO.isImm())
+    O << (uint32_t)MO.getImm();
+  else
+    printOperand(MI, OpNo, O, NULL);
+}
+
+void MBlazeInstPrinter::printMemOperand(const MCInst *MI, int OpNo,
+                                        raw_ostream &O, const char *Modifier) {
+  printOperand(MI, OpNo, O, NULL);
+  O << ", ";
+  printOperand(MI, OpNo+1, O, NULL);
+}
diff --git a/final/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/final/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
new file mode 100644
index 00000000000..bebc6c83d54
--- /dev/null
+++ b/final/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -0,0 +1,43 @@
+//===-- MBLazeInstPrinter.h - Convert MBlaze MCInst to assembly syntax ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a MBlaze MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEINSTPRINTER_H
+#define MBLAZEINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+  class MCOperand;
+
+  class MBlazeInstPrinter : public MCInstPrinter {
+  public:
+    MBlazeInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {
+    }
+
+    virtual void printInst(const MCInst *MI, raw_ostream &O);
+
+    // Autogenerated by tblgen.
+    void printInstruction(const MCInst *MI, raw_ostream &O);
+    static const char *getRegisterName(unsigned RegNo);
+    static const char *getInstructionName(unsigned Opcode);
+
+    void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                      const char *Modifier = 0);
+    void printFSLImm(const MCInst *MI, int OpNo, raw_ostream &O);
+    void printUnsignedImm(const MCInst *MI, int OpNo, raw_ostream &O);
+    void printMemOperand(const MCInst *MI, int OpNo,raw_ostream &O,
+                         const char *Modifier = 0);
+  };
+}
+
+#endif
diff --git a/final/lib/Target/MBlaze/InstPrinter/Makefile b/final/lib/Target/MBlaze/InstPrinter/Makefile
new file mode 100644
index 00000000000..9fb6e869d94
--- /dev/null
+++ b/final/lib/Target/MBlaze/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeAsmPrinter
+
+# Hack: we need to include 'main' MBlaze target directory to grab
+#       private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/MBlaze/MBlaze.h b/final/lib/Target/MBlaze/MBlaze.h
new file mode 100644
index 00000000000..00c73f06fe1
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlaze.h
@@ -0,0 +1,47 @@
+//===-- MBlaze.h - Top-level interface for MBlaze ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM MBlaze back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MBLAZE_H
+#define TARGET_MBLAZE_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class MBlazeTargetMachine;
+  class FunctionPass;
+  class MachineCodeEmitter;
+  class MCCodeEmitter;
+  class TargetAsmBackend;
+  class formatted_raw_ostream;
+
+  MCCodeEmitter *createMBlazeMCCodeEmitter(const Target &,
+                                           TargetMachine &TM,
+                                           MCContext &Ctx);
+
+  TargetAsmBackend *createMBlazeAsmBackend(const Target &, const std::string &);
+
+  FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM);
+  FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM);
+
+  extern Target TheMBlazeTarget;
+} // end namespace llvm;
+
+// Defines symbolic names for MBlaze registers.  This defines a mapping from
+// register name to register number.
+#include "MBlazeGenRegisterNames.inc"
+
+// Defines symbolic names for the MBlaze instructions.
+#include "MBlazeGenInstrNames.inc"
+
+#endif
diff --git a/final/lib/Target/MBlaze/MBlaze.td b/final/lib/Target/MBlaze/MBlaze.td
new file mode 100644
index 00000000000..1fa1e4dd577
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlaze.td
@@ -0,0 +1,94 @@
+//===- MBlaze.td - Describe the MBlaze Target Machine ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the MBlaze target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MBlazeRegisterInfo.td"
+include "MBlazeSchedule.td"
+include "MBlazeIntrinsics.td"
+include "MBlazeInstrInfo.td"
+include "MBlazeCallingConv.td"
+
+def MBlazeInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Microblaze Subtarget features                                              //
+//===----------------------------------------------------------------------===//
+
+def FeaturePipe3       : SubtargetFeature<"pipe3", "HasPipe3", "true",
+                                "Implements 3-stage pipeline">;
+def FeatureBarrel      : SubtargetFeature<"barrel", "HasBarrel", "true",
+                                "Implements barrel shifter">;
+def FeatureDiv         : SubtargetFeature<"div", "HasDiv", "true",
+                                "Implements hardware divider">;
+def FeatureMul         : SubtargetFeature<"mul", "HasMul", "true",
+                                "Implements hardware multiplier">;
+def FeatureFSL         : SubtargetFeature<"fsl", "HasFSL", "true",
+                                "Implements FSL instructions">;
+def FeatureEFSL        : SubtargetFeature<"efsl", "HasEFSL", "true",
+                                "Implements extended FSL instructions">;
+def FeatureMSRSet      : SubtargetFeature<"msrset", "HasMSRSet", "true",
+                                "Implements MSR register set and clear">;
+def FeatureException   : SubtargetFeature<"exception", "HasException", "true",
+                                "Implements hardware exception support">;
+def FeaturePatCmp      : SubtargetFeature<"patcmp", "HasPatCmp", "true",
+                                "Implements pattern compare instruction">;
+def FeatureFPU         : SubtargetFeature<"fpu", "HasFPU", "true",
+                                "Implements floating point unit">;
+def FeatureESR         : SubtargetFeature<"esr", "HasESR", "true",
+                                "Implements ESR and EAR registers">;
+def FeaturePVR         : SubtargetFeature<"pvr", "HasPVR", "true",
+                                "Implements processor version register">;
+def FeatureMul64       : SubtargetFeature<"mul64", "HasMul64", "true",
+                                "Implements multiplier with 64-bit result">;
+def FeatureSqrt        : SubtargetFeature<"sqrt", "HasSqrt", "true",
+                                "Implements sqrt and floating point convert">;
+def FeatureMMU         : SubtargetFeature<"mmu", "HasMMU", "true",
+                                "Implements memory management unit">;
+
+//===----------------------------------------------------------------------===//
+// MBlaze processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, MBlazeGenericItineraries, Features>;
+
+def : Proc<"v400", []>;
+def : Proc<"v500", []>;
+def : Proc<"v600", []>;
+def : Proc<"v700", []>;
+def : Proc<"v710", []>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+def MBlazeAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def MBlaze : Target {
+  let InstructionSet = MBlazeInstrInfo;
+  let AssemblyWriters = [MBlazeAsmWriter];
+}
diff --git a/final/lib/Target/MBlaze/MBlazeAsmBackend.cpp b/final/lib/Target/MBlaze/MBlazeAsmBackend.cpp
new file mode 100644
index 00000000000..a4b21afa599
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeAsmBackend.cpp
@@ -0,0 +1,163 @@
+//===-- MBlazeAsmBackend.cpp - MBlaze Assembler Backend -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+#include "MBlaze.h"
+#include "MBlazeELFWriterInfo.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCELFSymbolFlags.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+static unsigned getFixupKindSize(unsigned Kind) {
+  switch (Kind) {
+  default: assert(0 && "invalid fixup kind!");
+  case FK_Data_1: return 1;
+  case FK_PCRel_2:
+  case FK_Data_2: return 2;
+  case FK_PCRel_4:
+  case FK_Data_4: return 4;
+  case FK_Data_8: return 8;
+  }
+}
+
+
+namespace {
+class MBlazeELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  MBlazeELFObjectWriter(Triple::OSType OSType)
+    : MCELFObjectTargetWriter(/*is64Bit*/ false, OSType, ELF::EM_MBLAZE,
+                              /*HasRelocationAddend*/ true) {}
+};
+
+class MBlazeAsmBackend : public TargetAsmBackend {
+public:
+  MBlazeAsmBackend(const Target &T)
+    : TargetAsmBackend() {
+  }
+
+  unsigned getNumFixupKinds() const {
+    return 2;
+  }
+
+  bool MayNeedRelaxation(const MCInst &Inst) const;
+
+  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+  bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+
+  unsigned getPointerSize() const {
+    return 4;
+  }
+};
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+    switch (Op) {
+    default:            return Op;
+    case MBlaze::ADDIK: return MBlaze::ADDIK32;
+    case MBlaze::ORI:   return MBlaze::ORI32;
+    case MBlaze::BRLID: return MBlaze::BRLID32;
+    }
+}
+
+bool MBlazeAsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+  if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
+    return false;
+
+  bool hasExprOrImm = false;
+  for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
+    hasExprOrImm |= Inst.getOperand(i).isExpr();
+
+  return hasExprOrImm;
+}
+
+void MBlazeAsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+  Res = Inst;
+  Res.setOpcode(getRelaxedOpcode(Inst.getOpcode()));
+}
+
+bool MBlazeAsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+  if ((Count % 4) != 0)
+    return false;
+
+  for (uint64_t i = 0; i < Count; i += 4)
+      OW->Write32(0x00000000);
+
+  return true;
+}
+} // end anonymous namespace
+
+namespace {
+class ELFMBlazeAsmBackend : public MBlazeAsmBackend {
+public:
+  Triple::OSType OSType;
+  ELFMBlazeAsmBackend(const Target &T, Triple::OSType _OSType)
+    : MBlazeAsmBackend(T), OSType(_OSType) { }
+
+  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const;
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createELFObjectWriter(new MBlazeELFObjectWriter(OSType), OS,
+                                 /*IsLittleEndian*/ false);
+  }
+};
+
+void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data,
+                                     unsigned DataSize, uint64_t Value) const {
+  unsigned Size = getFixupKindSize(Fixup.getKind());
+
+  assert(Fixup.getOffset() + Size <= DataSize &&
+         "Invalid fixup offset!");
+
+  char *data = Data + Fixup.getOffset();
+  switch (Size) {
+  default: llvm_unreachable("Cannot fixup unknown value.");
+  case 1:  llvm_unreachable("Cannot fixup 1 byte value.");
+  case 8:  llvm_unreachable("Cannot fixup 8 byte value.");
+
+  case 4:
+    *(data+7) = uint8_t(Value);
+    *(data+6) = uint8_t(Value >> 8);
+    *(data+3) = uint8_t(Value >> 16);
+    *(data+2) = uint8_t(Value >> 24);
+    break;
+
+  case 2:
+    *(data+3) = uint8_t(Value >> 0);
+    *(data+2) = uint8_t(Value >> 8);
+  }
+}
+} // end anonymous namespace
+
+TargetAsmBackend *llvm::createMBlazeAsmBackend(const Target &T,
+                                            const std::string &TT) {
+  switch (Triple(TT).getOS()) {
+  case Triple::Darwin:
+    assert(0 && "Mac not supported on MBlaze");
+  case Triple::MinGW32:
+  case Triple::Cygwin:
+  case Triple::Win32:
+    assert(0 && "Windows not supported on MBlaze");
+  default:
+    return new ELFMBlazeAsmBackend(T, Triple(TT).getOS());
+  }
+}
diff --git a/final/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/final/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
new file mode 100644
index 00000000000..0016df569b9
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -0,0 +1,335 @@
+//===-- MBlazeAsmPrinter.cpp - MBlaze LLVM assembly writer ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MBlaze assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-asm-printer"
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeMCInstLower.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+
+using namespace llvm;
+
+namespace {
+  class MBlazeAsmPrinter : public AsmPrinter {
+    const MBlazeSubtarget *Subtarget;
+  public:
+    explicit MBlazeAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {
+      Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+    }
+
+    virtual const char *getPassName() const {
+      return "MBlaze Assembly Printer";
+    }
+
+    void printSavedRegsBitmask();
+    void emitFrameDirective();
+    virtual void EmitFunctionBodyStart();
+    virtual void EmitFunctionBodyEnd();
+    virtual void EmitFunctionEntryLabel();
+
+    virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+      const;
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printFSLImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+                         const char *Modifier = 0);
+
+    void EmitInstruction(const MachineInstr *MI);
+  };
+} // end of anonymous namespace
+
+// #include "MBlazeGenAsmWriter.inc"
+
+//===----------------------------------------------------------------------===//
+//
+//  MBlaze Asm Directives
+//
+//  -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+//  Describe the stack frame.
+//
+//  -- Mask directives "mask  bitmask, offset"
+//  Tells the assembler which registers are saved and where.
+//  bitmask - contain a little endian bitset indicating which registers are
+//            saved on function prologue (e.g. with a 0x80000000 mask, the
+//            assembler knows the register 31 (RA) is saved at prologue.
+//  offset  - the position before stack pointer subtraction indicating where
+//            the first saved register on prologue is located. (e.g. with a
+//
+//  Consider the following function prologue:
+//
+//    .frame  R19,48,R15
+//    .mask   0xc0000000,-8
+//       addiu R1, R1, -48
+//       sw R15, 40(R1)
+//       sw R19, 36(R1)
+//
+//    With a 0xc0000000 mask, the assembler knows the register 15 (R15) and
+//    19 (R19) are saved at prologue. As the save order on prologue is from
+//    left to right, R15 is saved first. A -8 offset means that after the
+//    stack pointer subtration, the first register in the mask (R15) will be
+//    saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+// Print a 32 bit hex number with all numbers.
+static void printHex32(unsigned int Value, raw_ostream &O) {
+  O << "0x";
+  for (int i = 7; i >= 0; i--)
+    O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
+}
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MBlazeAsmPrinter::printSavedRegsBitmask() {
+  const TargetFrameLowering *TFI = TM.getFrameLowering();
+  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+  // CPU Saved Registers Bitmasks
+  unsigned int CPUBitmask = 0;
+
+  // Set the CPU Bitmasks
+  const MachineFrameInfo *MFI = MF->getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg);
+    if (MBlaze::GPRRegisterClass->contains(Reg))
+      CPUBitmask |= (1 << RegNum);
+  }
+
+  // Return Address and Frame registers must also be set in CPUBitmask.
+  if (TFI->hasFP(*MF))
+    CPUBitmask |= (1 << MBlazeRegisterInfo::
+                getRegisterNumbering(RI.getFrameRegister(*MF)));
+
+  if (MFI->adjustsStack())
+    CPUBitmask |= (1 << MBlazeRegisterInfo::
+                getRegisterNumbering(RI.getRARegister()));
+
+  // Print CPUBitmask
+  OutStreamer.EmitRawText("\t.mask\t0x" + Twine::utohexstr(CPUBitmask));
+}
+
+/// Frame Directive
+void MBlazeAsmPrinter::emitFrameDirective() {
+  if (!OutStreamer.hasRawTextSupport())
+    return;
+
+  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+  unsigned stkReg = RI.getFrameRegister(*MF);
+  unsigned retReg = RI.getRARegister();
+  unsigned stkSze = MF->getFrameInfo()->getStackSize();
+
+  OutStreamer.EmitRawText("\t.frame\t" +
+                          Twine(MBlazeInstPrinter::getRegisterName(stkReg)) +
+                          "," + Twine(stkSze) + "," +
+                          Twine(MBlazeInstPrinter::getRegisterName(retReg)));
+}
+
+void MBlazeAsmPrinter::EmitFunctionEntryLabel() {
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
+  AsmPrinter::EmitFunctionEntryLabel();
+}
+
+void MBlazeAsmPrinter::EmitFunctionBodyStart() {
+  if (!OutStreamer.hasRawTextSupport())
+    return;
+
+  emitFrameDirective();
+  printSavedRegsBitmask();
+}
+
+void MBlazeAsmPrinter::EmitFunctionBodyEnd() {
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
+}
+
+//===----------------------------------------------------------------------===//
+void MBlazeAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  MBlazeMCInstLower MCInstLowering(OutContext, *Mang, *this);
+
+  MCInst TmpInst;
+  MCInstLowering.Lower(MI, TmpInst);
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+// Print out an operand for an inline asm expression.
+bool MBlazeAsmPrinter::
+PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                    raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << MBlazeInstPrinter::getRegisterName(MO.getReg());
+    break;
+
+  case MachineOperand::MO_Immediate:
+    O << (int32_t)MO.getImm();
+    break;
+
+  case MachineOperand::MO_FPImmediate: {
+    const ConstantFP *fp = MO.getFPImm();
+    printHex32(fp->getValueAPF().bitcastToAPInt().getZExtValue(), O);
+    O << ";\t# immediate = " << *fp;
+    break;
+  }
+
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    break;
+
+  case MachineOperand::MO_ExternalSymbol:
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    break;
+
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI"
+      << getFunctionNumber() << "_" << MO.getIndex();
+    if (MO.getOffset())
+      O << "+" << MO.getOffset();
+    break;
+
+  default:
+    llvm_unreachable("<unknown operand type>");
+  }
+}
+
+void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
+                                        raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm())
+    O << (uint32_t)MO.getImm();
+  else
+    printOperand(MI, opNum, O);
+}
+
+void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum,
+                                   raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm())
+    O << "rfsl" << (unsigned int)MO.getImm();
+  else
+    printOperand(MI, opNum, O);
+}
+
+void MBlazeAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+                const char *Modifier) {
+  printOperand(MI, opNum, O);
+  O << ", ";
+  printOperand(MI, opNum+1, O);
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MBlazeAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+  // If this is a landing pad, it isn't a fall through.  If it has no preds,
+  // then nothing falls through to it.
+  if (MBB->isLandingPad() || MBB->pred_empty())
+    return false;
+
+  // If there isn't exactly one predecessor, it can't be a fall through.
+  MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+  ++PI2;
+  if (PI2 != MBB->pred_end())
+    return false;
+
+  // The predecessor has to be immediately before this block.
+  const MachineBasicBlock *Pred = *PI;
+
+  if (!Pred->isLayoutSuccessor(MBB))
+    return false;
+
+  // If the block is completely empty, then it definitely does fall through.
+  if (Pred->empty())
+    return true;
+
+  // Check if the last terminator is an unconditional branch.
+  MachineBasicBlock::const_iterator I = Pred->end();
+  while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+    ; // Noop
+  return I == Pred->end() || !I->getDesc().isBarrier();
+}
+
+static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
+                                                unsigned SyntaxVariant,
+                                                const MCAsmInfo &MAI) {
+  if (SyntaxVariant == 0)
+    return new MBlazeInstPrinter(MAI);
+  return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMBlazeAsmPrinter() {
+  RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget);
+  TargetRegistry::RegisterMCInstPrinter(TheMBlazeTarget,
+                                        createMBlazeMCInstPrinter);
+
+}
diff --git a/final/lib/Target/MBlaze/MBlazeCallingConv.td b/final/lib/Target/MBlaze/MBlazeCallingConv.td
new file mode 100644
index 00000000000..4962573f96a
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeCallingConv.td
@@ -0,0 +1,28 @@
+//===- MBlazeCallingConv.td - Calling Conventions for MBlaze -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for MBlaze architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+  CCIf<!strconcat("State.getTarget().getSubtarget<MBlazeSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze ABI Calling Convention
+//===----------------------------------------------------------------------===//
+
+def RetCC_MBlaze : CallingConv<[
+  // i32 are returned in registers R3, R4
+  CCIfType<[i32,f32], CCAssignToReg<[R3, R4]>>
+]>;
+
+def CC_MBlaze : CallingConv<[
+  CCIfType<[i32,f32], CCCustom<"CC_MBlaze_AssignReg">>,
+  CCIfType<[i32,f32], CCAssignToStack<4, 4>>
+]>;
diff --git a/final/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/final/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
new file mode 100644
index 00000000000..4399ee28009
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -0,0 +1,258 @@
+//===-- DelaySlotFiller.cpp - MBlaze delay slot filler --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass that attempts to fill instructions with delay slots. If no
+// instructions can be moved into the delay slot then a NOP is placed there.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+
+#include "MBlaze.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+namespace llvm {
+cl::opt<bool> DisableDelaySlotFiller(
+  "disable-mblaze-delay-filler",
+  cl::init(false),
+  cl::desc("Disable the MBlaze delay slot filter."),
+  cl::Hidden);
+}
+
+namespace {
+  struct Filler : public MachineFunctionPass {
+
+    TargetMachine &TM;
+    const TargetInstrInfo *TII;
+
+    static char ID;
+    Filler(TargetMachine &tm)
+      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+    virtual const char *getPassName() const {
+      return "MBlaze Delay Slot Filler";
+    }
+
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+    bool runOnMachineFunction(MachineFunction &F) {
+      bool Changed = false;
+      for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+           FI != FE; ++FI)
+        Changed |= runOnMachineBasicBlock(*FI);
+      return Changed;
+    }
+
+  };
+  char Filler::ID = 0;
+} // end of anonymous namespace
+
+static bool hasImmInstruction(MachineBasicBlock::iterator &candidate) {
+    // Any instruction with an immediate mode operand greater than
+    // 16-bits requires an implicit IMM instruction.
+    unsigned numOper = candidate->getNumOperands();
+    for (unsigned op = 0; op < numOper; ++op) {
+        MachineOperand &mop = candidate->getOperand(op);
+
+        // The operand requires more than 16-bits to represent.
+        if (mop.isImm() && (mop.getImm() < -0x8000 || mop.getImm() > 0x7fff))
+          return true;
+
+        // We must assume that unknown immediate values require more than
+        // 16-bits to represent.
+        if (mop.isGlobal() || mop.isSymbol())
+          return true;
+
+        // FIXME: we could probably check to see if the FP value happens
+        //        to not need an IMM instruction. For now we just always
+        //        assume that FP values do.
+        if (mop.isFPImm())
+          return true;
+    }
+
+    return false;
+}
+
+static unsigned getLastRealOperand(MachineBasicBlock::iterator &instr) {
+  switch (instr->getOpcode()) {
+  default: return instr->getNumOperands();
+
+  // These instructions have a variable number of operands but the first two
+  // are the "real" operands that we care about during hazard detection.
+  case MBlaze::BRLID:
+  case MBlaze::BRALID:
+  case MBlaze::BRLD:
+  case MBlaze::BRALD:
+    return 2;
+  }
+}
+
+static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
+                           MachineBasicBlock::iterator &slot) {
+  // Hazard check
+  MachineBasicBlock::iterator a = candidate;
+  MachineBasicBlock::iterator b = slot;
+  TargetInstrDesc desc = candidate->getDesc();
+
+  // MBB layout:-
+  //    candidate := a0 = operation(a1, a2)
+  //    ...middle bit...
+  //    slot := b0 = operation(b1, b2)
+
+  // Possible hazards:-/
+  // 1. a1 or a2 was written during the middle bit
+  // 2. a0 was read or written during the middle bit
+  // 3. a0 is one or more of {b0, b1, b2}
+  // 4. b0 is one or more of {a1, a2}
+  // 5. a accesses memory, and the middle bit
+  //    contains a store operation.
+  bool a_is_memory = desc.mayLoad() || desc.mayStore();
+
+  // Determine the number of operands in the slot instruction and in the
+  // candidate instruction.
+  const unsigned aend = getLastRealOperand(a);
+  const unsigned bend = getLastRealOperand(b);
+
+  // Check hazards type 1, 2 and 5 by scanning the middle bit
+  MachineBasicBlock::iterator m = a;
+  for (++m; m != b; ++m) {
+    for (unsigned aop = 0; aop<aend; ++aop) {
+      bool aop_is_reg = a->getOperand(aop).isReg();
+      if (!aop_is_reg) continue;
+
+      bool aop_is_def = a->getOperand(aop).isDef();
+      unsigned aop_reg = a->getOperand(aop).getReg();
+
+      const unsigned mend = getLastRealOperand(m);
+      for (unsigned mop = 0; mop<mend; ++mop) {
+        bool mop_is_reg = m->getOperand(mop).isReg();
+        if (!mop_is_reg) continue;
+
+        bool mop_is_def = m->getOperand(mop).isDef();
+        unsigned mop_reg = m->getOperand(mop).getReg();
+
+        if (aop_is_def && (mop_reg == aop_reg))
+            return true; // Hazard type 2, because aop = a0
+        else if (mop_is_def && (mop_reg == aop_reg))
+            return true; // Hazard type 1, because aop in {a1, a2}
+      }
+    }
+
+    // Check hazard type 5
+    if (a_is_memory && m->getDesc().mayStore())
+      return true;
+  }
+
+  // Check hazard type 3 & 4
+  for (unsigned aop = 0; aop<aend; ++aop) {
+    if (a->getOperand(aop).isReg()) {
+      unsigned aop_reg = a->getOperand(aop).getReg();
+
+      for (unsigned bop = 0; bop<bend; ++bop) {
+        if (b->getOperand(bop).isReg() && !b->getOperand(bop).isImplicit()) {
+          unsigned bop_reg = b->getOperand(bop).getReg();
+          if (aop_reg == bop_reg)
+            return true;
+        }
+      }
+    }
+  }
+
+  return false;
+}
+
+static bool isDelayFiller(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator candidate) {
+  if (candidate == MBB.begin())
+    return false;
+
+  TargetInstrDesc brdesc = (--candidate)->getDesc();
+  return (brdesc.hasDelaySlot());
+}
+
+static bool hasUnknownSideEffects(MachineBasicBlock::iterator &I) {
+  if (!I->hasUnmodeledSideEffects())
+    return false;
+
+  unsigned op = I->getOpcode();
+  if (op == MBlaze::ADDK || op == MBlaze::ADDIK ||
+      op == MBlaze::ADDC || op == MBlaze::ADDIC ||
+      op == MBlaze::ADDKC || op == MBlaze::ADDIKC ||
+      op == MBlaze::RSUBK || op == MBlaze::RSUBIK ||
+      op == MBlaze::RSUBC || op == MBlaze::RSUBIC ||
+      op == MBlaze::RSUBKC || op == MBlaze::RSUBIKC)
+    return false;
+
+  return true;
+}
+
+static MachineBasicBlock::iterator
+findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
+  MachineBasicBlock::iterator I = slot;
+  while (true) {
+    if (I == MBB.begin())
+      break;
+
+    --I;
+    TargetInstrDesc desc = I->getDesc();
+    if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) ||
+        desc.isCall() || desc.isReturn() || desc.isBarrier() ||
+        hasUnknownSideEffects(I))
+      break;
+
+    if (hasImmInstruction(I) || delayHasHazard(I,slot))
+      continue;
+
+    return I;
+  }
+
+  return MBB.end();
+}
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// Currently, we fill delay slots with NOPs. We assume there is only one
+/// delay slot per delayed instruction.
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+  bool Changed = false;
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+    if (I->getDesc().hasDelaySlot()) {
+      MachineBasicBlock::iterator D = MBB.end();
+      MachineBasicBlock::iterator J = I;
+
+      if (!DisableDelaySlotFiller)
+        D = findDelayInstr(MBB,I);
+
+      ++FilledSlots;
+      Changed = true;
+
+      if (D == MBB.end())
+        BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(MBlaze::NOP));
+      else
+        MBB.splice(++J, &MBB, D);
+    }
+  return Changed;
+}
+
+/// createMBlazeDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in MBlaze MachineFunctions
+FunctionPass *llvm::createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &tm) {
+  return new Filler(tm);
+}
+
diff --git a/final/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp b/final/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
new file mode 100644
index 00000000000..3f26ed15b28
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeELFWriterInfo.cpp
@@ -0,0 +1,111 @@
+//===-- MBlazeELFWriterInfo.cpp - ELF Writer Info for the MBlaze backend --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the MBlaze backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeELFWriterInfo.h"
+#include "MBlazeRelocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Implementation of the MBlazeELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+MBlazeELFWriterInfo::MBlazeELFWriterInfo(TargetMachine &TM)
+  : TargetELFWriterInfo(TM.getTargetData()->getPointerSizeInBits() == 64,
+                        TM.getTargetData()->isLittleEndian()) {
+}
+
+MBlazeELFWriterInfo::~MBlazeELFWriterInfo() {}
+
+unsigned MBlazeELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+  switch (MachineRelTy) {
+  case MBlaze::reloc_pcrel_word:
+    return ELF::R_MICROBLAZE_64_PCREL;
+  case MBlaze::reloc_absolute_word:
+    return ELF::R_MICROBLAZE_NONE;
+  default:
+    llvm_unreachable("unknown mblaze machine relocation type");
+  }
+  return 0;
+}
+
+long int MBlazeELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+                                                    long int Modifier) const {
+  switch (RelTy) {
+  case ELF::R_MICROBLAZE_32_PCREL:
+    return Modifier - 4;
+  case ELF::R_MICROBLAZE_32:
+    return Modifier;
+  default:
+    llvm_unreachable("unknown mblaze relocation type");
+  }
+  return 0;
+}
+
+unsigned MBlazeELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+  // FIXME: Most of these sizes are guesses based on the name
+  switch (RelTy) {
+  case ELF::R_MICROBLAZE_32:
+  case ELF::R_MICROBLAZE_32_PCREL:
+  case ELF::R_MICROBLAZE_32_PCREL_LO:
+  case ELF::R_MICROBLAZE_32_LO:
+  case ELF::R_MICROBLAZE_SRO32:
+  case ELF::R_MICROBLAZE_SRW32:
+  case ELF::R_MICROBLAZE_32_SYM_OP_SYM:
+  case ELF::R_MICROBLAZE_GOTOFF_32:
+    return 32;
+
+  case ELF::R_MICROBLAZE_64_PCREL:
+  case ELF::R_MICROBLAZE_64:
+  case ELF::R_MICROBLAZE_GOTPC_64:
+  case ELF::R_MICROBLAZE_GOT_64:
+  case ELF::R_MICROBLAZE_PLT_64:
+  case ELF::R_MICROBLAZE_GOTOFF_64:
+    return 64;
+  }
+
+  return 0;
+}
+
+bool MBlazeELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+  // FIXME: Most of these are guesses based on the name
+  switch (RelTy) {
+  case ELF::R_MICROBLAZE_32_PCREL:
+  case ELF::R_MICROBLAZE_64_PCREL:
+  case ELF::R_MICROBLAZE_32_PCREL_LO:
+  case ELF::R_MICROBLAZE_GOTPC_64:
+    return true;
+  }
+
+  return false;
+}
+
+unsigned MBlazeELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+  return MBlaze::reloc_absolute_word;
+}
+
+long int MBlazeELFWriterInfo::computeRelocation(unsigned SymOffset,
+                                                unsigned RelOffset,
+                                                unsigned RelTy) const {
+  if (RelTy == ELF::R_MICROBLAZE_32_PCREL || ELF::R_MICROBLAZE_64_PCREL)
+    return SymOffset - (RelOffset + 4);
+  else
+    assert("computeRelocation unknown for this relocation type");
+
+  return 0;
+}
diff --git a/final/lib/Target/MBlaze/MBlazeELFWriterInfo.h b/final/lib/Target/MBlaze/MBlazeELFWriterInfo.h
new file mode 100644
index 00000000000..63bfc0da745
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeELFWriterInfo.h
@@ -0,0 +1,58 @@
+//===-- MBlazeELFWriterInfo.h - ELF Writer Info for MBlaze ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the MBlaze backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_ELF_WRITER_INFO_H
+#define MBLAZE_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+  class MBlazeELFWriterInfo : public TargetELFWriterInfo {
+  public:
+    MBlazeELFWriterInfo(TargetMachine &TM);
+    virtual ~MBlazeELFWriterInfo();
+
+    /// getRelocationType - Returns the target specific ELF Relocation type.
+    /// 'MachineRelTy' contains the object code independent relocation type
+    virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+    /// hasRelocationAddend - True if the target uses an addend in the
+    /// ELF relocation entry.
+    virtual bool hasRelocationAddend() const { return false; }
+
+    /// getDefaultAddendForRelTy - Gets the default addend value for a
+    /// relocation entry based on the target ELF relocation type.
+    virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+                                              long int Modifier = 0) const;
+
+    /// getRelTySize - Returns the size of relocatable field in bits
+    virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+    /// isPCRelativeRel - True if the relocation type is pc relative
+    virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+    /// getJumpTableRelocationTy - Returns the machine relocation type used
+    /// to reference a jumptable.
+    virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+    /// computeRelocation - Some relocatable fields could be relocated
+    /// directly, avoiding the relocation symbol emission, compute the
+    /// final relocation value for this symbol.
+    virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+                                       unsigned RelTy) const;
+  };
+
+} // end llvm namespace
+
+#endif // MBLAZE_ELF_WRITER_INFO_H
diff --git a/final/lib/Target/MBlaze/MBlazeFrameLowering.cpp b/final/lib/Target/MBlaze/MBlazeFrameLowering.cpp
new file mode 100644
index 00000000000..e7639025cf1
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeFrameLowering.cpp
@@ -0,0 +1,450 @@
+//=======- MBlazeFrameLowering.cpp - MBlaze Frame Information ------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-frame-lowering"
+
+#include "MBlazeFrameLowering.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeMachineFunction.h"
+#include "InstPrinter/MBlazeInstPrinter.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace llvm {
+  cl::opt<bool> DisableStackAdjust(
+    "disable-mblaze-stack-adjust",
+    cl::init(false),
+    cl::desc("Disable MBlaze stack layout adjustment."),
+    cl::Hidden);
+}
+
+static void replaceFrameIndexes(MachineFunction &MF, 
+                                SmallVector<std::pair<int,int64_t>, 16> &FR) {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  const SmallVector<std::pair<int,int64_t>, 16>::iterator FRB = FR.begin();
+  const SmallVector<std::pair<int,int64_t>, 16>::iterator FRE = FR.end();
+
+  SmallVector<std::pair<int,int64_t>, 16>::iterator FRI = FRB;
+  for (; FRI != FRE; ++FRI) {
+    MFI->RemoveStackObject(FRI->first);
+    int NFI = MFI->CreateFixedObject(4, FRI->second, true);
+    MBlazeFI->recordReplacement(FRI->first, NFI);
+
+    for (MachineFunction::iterator MB=MF.begin(), ME=MF.end(); MB!=ME; ++MB) {
+      MachineBasicBlock::iterator MBB = MB->begin();
+      const MachineBasicBlock::iterator MBE = MB->end();
+
+      for (; MBB != MBE; ++MBB) {
+        MachineInstr::mop_iterator MIB = MBB->operands_begin();
+        const MachineInstr::mop_iterator MIE = MBB->operands_end();
+
+        for (MachineInstr::mop_iterator MII = MIB; MII != MIE; ++MII) {
+          if (!MII->isFI() || MII->getIndex() != FRI->first) continue;
+          DEBUG(dbgs() << "FOUND FI#" << MII->getIndex() << "\n");
+          MII->setIndex(NFI);
+        }
+      }
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are are done through a positive offset
+// from the stack/frame pointer, so the stack is considered
+// to grow up.
+//
+//===----------------------------------------------------------------------===//
+
+static void analyzeFrameIndexes(MachineFunction &MF) {
+  if (DisableStackAdjust) return;
+
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  MachineRegisterInfo::livein_iterator LII = MRI.livein_begin();
+  MachineRegisterInfo::livein_iterator LIE = MRI.livein_end();
+  const SmallVector<int, 16> &LiveInFI = MBlazeFI->getLiveIn();
+  SmallVector<MachineInstr*, 16> EraseInstr;
+  SmallVector<std::pair<int,int64_t>, 16> FrameRelocate;
+
+  MachineBasicBlock *MBB = MF.getBlockNumbered(0);
+  MachineBasicBlock::iterator MIB = MBB->begin();
+  MachineBasicBlock::iterator MIE = MBB->end();
+
+  int StackAdjust = 0;
+  int StackOffset = -28;
+
+  // In this loop we are searching frame indexes that corrospond to incoming
+  // arguments that are already in the stack. We look for instruction sequences
+  // like the following:
+  //    
+  //    LWI REG, FI1, 0
+  //    ...
+  //    SWI REG, FI2, 0
+  //
+  // As long as there are no defs of REG in the ... part, we can eliminate
+  // the SWI instruction because the value has already been stored to the
+  // stack by the caller. All we need to do is locate FI at the correct
+  // stack location according to the calling convensions.
+  //
+  // Additionally, if the SWI operation kills the def of REG then we don't
+  // need the LWI operation so we can erase it as well.
+  for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i) {
+    for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) {
+      if (I->getOpcode() != MBlaze::LWI || I->getNumOperands() != 3 ||
+          !I->getOperand(1).isFI() || !I->getOperand(0).isReg() ||
+          I->getOperand(1).getIndex() != LiveInFI[i]) continue;
+
+      unsigned FIReg = I->getOperand(0).getReg();
+      MachineBasicBlock::iterator SI = I;
+      for (SI++; SI != MIE; ++SI) {
+        if (!SI->getOperand(0).isReg() ||
+            !SI->getOperand(1).isFI() ||
+            SI->getOpcode() != MBlaze::SWI) continue;
+
+        int FI = SI->getOperand(1).getIndex();
+        if (SI->getOperand(0).getReg() != FIReg ||
+            MFI->isFixedObjectIndex(FI) ||
+            MFI->getObjectSize(FI) != 4) continue;
+
+        if (SI->getOperand(0).isDef()) break;
+
+        if (SI->getOperand(0).isKill()) {
+          DEBUG(dbgs() << "LWI for FI#" << I->getOperand(1).getIndex() 
+                       << " removed\n");
+          EraseInstr.push_back(I);
+        }
+
+        EraseInstr.push_back(SI);
+        DEBUG(dbgs() << "SWI for FI#" << FI << " removed\n");
+
+        FrameRelocate.push_back(std::make_pair(FI,StackOffset));
+        DEBUG(dbgs() << "FI#" << FI << " relocated to " << StackOffset << "\n");
+
+        StackOffset -= 4;
+        StackAdjust += 4;
+        break;
+      }
+    }
+  }
+
+  // In this loop we are searching for frame indexes that corrospond to
+  // incoming arguments that are in registers. We look for instruction
+  // sequences like the following:
+  //    
+  //    ...  SWI REG, FI, 0
+  // 
+  // As long as the ... part does not define REG and if REG is an incoming
+  // parameter register then we know that, according to ABI convensions, the
+  // caller has allocated stack space for it already.  Instead of allocating
+  // stack space on our frame, we record the correct location in the callers
+  // frame.
+  for (MachineRegisterInfo::livein_iterator LI = LII; LI != LIE; ++LI) {
+    for (MachineBasicBlock::iterator I=MIB; I != MIE; ++I) {
+      if (I->definesRegister(LI->first))
+        break;
+
+      if (I->getOpcode() != MBlaze::SWI || I->getNumOperands() != 3 ||
+          !I->getOperand(1).isFI() || !I->getOperand(0).isReg() ||
+          I->getOperand(1).getIndex() < 0) continue;
+
+      if (I->getOperand(0).getReg() == LI->first) {
+        int FI = I->getOperand(1).getIndex();
+        MBlazeFI->recordLiveIn(FI);
+
+        int FILoc = 0;
+        switch (LI->first) {
+        default: llvm_unreachable("invalid incoming parameter!");
+        case MBlaze::R5:  FILoc = -4; break;
+        case MBlaze::R6:  FILoc = -8; break;
+        case MBlaze::R7:  FILoc = -12; break;
+        case MBlaze::R8:  FILoc = -16; break;
+        case MBlaze::R9:  FILoc = -20; break;
+        case MBlaze::R10: FILoc = -24; break;
+        }
+
+        StackAdjust += 4;
+        FrameRelocate.push_back(std::make_pair(FI,FILoc));
+        DEBUG(dbgs() << "FI#" << FI << " relocated to " << FILoc << "\n");
+        break;
+      }
+    }
+  }
+
+  // Go ahead and erase all of the instructions that we determined were
+  // no longer needed.
+  for (int i = 0, e = EraseInstr.size(); i < e; ++i)
+    MBB->erase(EraseInstr[i]);
+
+  // Replace all of the frame indexes that we have relocated with new
+  // fixed object frame indexes.
+  replaceFrameIndexes(MF, FrameRelocate);
+}
+
+static void interruptFrameLayout(MachineFunction &MF) {
+  const Function *F = MF.getFunction();
+  llvm::CallingConv::ID CallConv = F->getCallingConv();
+
+  // If this function is not using either the interrupt_handler
+  // calling convention or the save_volatiles calling convention
+  // then we don't need to do any additional frame layout.
+  if (CallConv != llvm::CallingConv::MBLAZE_INTR &&
+      CallConv != llvm::CallingConv::MBLAZE_SVOL)
+      return;
+
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  const MBlazeInstrInfo &TII =
+    *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  // Determine if the calling convention is the interrupt_handler
+  // calling convention. Some pieces of the prologue and epilogue
+  // only need to be emitted if we are lowering and interrupt handler.
+  bool isIntr = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+  // Determine where to put prologue and epilogue additions
+  MachineBasicBlock &MENT   = MF.front();
+  MachineBasicBlock &MEXT   = MF.back();
+
+  MachineBasicBlock::iterator MENTI = MENT.begin();
+  MachineBasicBlock::iterator MEXTI = prior(MEXT.end());
+
+  DebugLoc ENTDL = MENTI != MENT.end() ? MENTI->getDebugLoc() : DebugLoc();
+  DebugLoc EXTDL = MEXTI != MEXT.end() ? MEXTI->getDebugLoc() : DebugLoc();
+
+  // Store the frame indexes generated during prologue additions for use
+  // when we are generating the epilogue additions.
+  SmallVector<int, 10> VFI;
+
+  // Build the prologue SWI for R3 - R12 if needed. Note that R11 must
+  // always have a SWI because it is used when processing RMSR.
+  for (unsigned r = MBlaze::R3; r <= MBlaze::R12; ++r) {
+    if (!MRI.isPhysRegUsed(r) && !(isIntr && r == MBlaze::R11)) continue;
+    
+    int FI = MFI->CreateStackObject(4,4,false,false);
+    VFI.push_back(FI);
+
+    BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), r)
+      .addFrameIndex(FI).addImm(0);
+  }
+    
+  // Build the prologue SWI for R17, R18
+  int R17FI = MFI->CreateStackObject(4,4,false,false);
+  int R18FI = MFI->CreateStackObject(4,4,false,false);
+
+  BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R17)
+    .addFrameIndex(R17FI).addImm(0);
+    
+  BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R18)
+    .addFrameIndex(R18FI).addImm(0);
+
+  // Buid the prologue SWI and the epilogue LWI for RMSR if needed
+  if (isIntr) {
+    int MSRFI = MFI->CreateStackObject(4,4,false,false);
+    BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::MFS), MBlaze::R11)
+      .addReg(MBlaze::RMSR);
+    BuildMI(MENT, MENTI, ENTDL, TII.get(MBlaze::SWI), MBlaze::R11)
+      .addFrameIndex(MSRFI).addImm(0);
+
+    BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R11)
+      .addFrameIndex(MSRFI).addImm(0);
+    BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::MTS), MBlaze::RMSR)
+      .addReg(MBlaze::R11);
+  }
+
+  // Build the epilogue LWI for R17, R18
+  BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R18)
+    .addFrameIndex(R18FI).addImm(0);
+
+  BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), MBlaze::R17)
+    .addFrameIndex(R17FI).addImm(0);
+
+  // Build the epilogue LWI for R3 - R12 if needed
+  for (unsigned r = MBlaze::R12, i = VFI.size(); r >= MBlaze::R3; --r) {
+    if (!MRI.isPhysRegUsed(r)) continue;
+    BuildMI(MEXT, MEXTI, EXTDL, TII.get(MBlaze::LWI), r)
+      .addFrameIndex(VFI[--i]).addImm(0);
+  }
+}
+
+static void determineFrameLayout(MachineFunction &MF) {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+  // Replace the dummy '0' SPOffset by the negative offsets, as explained on
+  // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid
+  // the approach done by calculateFrameObjectOffsets to the stack frame.
+  MBlazeFI->adjustLoadArgsFI(MFI);
+  MBlazeFI->adjustStoreVarArgsFI(MFI);
+
+  // Get the number of bytes to allocate from the FrameInfo
+  unsigned FrameSize = MFI->getStackSize();
+  DEBUG(dbgs() << "Original Frame Size: " << FrameSize << "\n" );
+
+  // Get the alignments provided by the target, and the maximum alignment
+  // (if any) of the fixed frame objects.
+  // unsigned MaxAlign = MFI->getMaxAlignment();
+  unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+  unsigned AlignMask = TargetAlign - 1;
+
+  // Make sure the frame is aligned.
+  FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+  MFI->setStackSize(FrameSize);
+  DEBUG(dbgs() << "Aligned Frame Size: " << FrameSize << "\n" );
+}
+
+int MBlazeFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) 
+  const {
+  const MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  if (MBlazeFI->hasReplacement(FI))
+    FI = MBlazeFI->getReplacement(FI);
+  return TargetFrameLowering::getFrameIndexOffset(MF,FI);
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MBlazeFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+}
+
+void MBlazeFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB   = MF.front();
+  MachineFrameInfo *MFI    = MF.getFrameInfo();
+  const MBlazeInstrInfo &TII =
+    *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+  bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+  // Determine the correct frame layout
+  determineFrameLayout(MF);
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  unsigned StackSize = MFI->getStackSize();
+
+  // No need to allocate space on the stack.
+  if (StackSize == 0 && !MFI->adjustsStack() && !requiresRA) return;
+
+  int FPOffset = MBlazeFI->getFPStackOffset();
+  int RAOffset = MBlazeFI->getRAStackOffset();
+
+  // Adjust stack : addi R1, R1, -imm
+  BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADDIK), MBlaze::R1)
+      .addReg(MBlaze::R1).addImm(-StackSize);
+
+  // swi  R15, R1, stack_loc
+  if (MFI->adjustsStack() || requiresRA) {
+    BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
+        .addReg(MBlaze::R15).addReg(MBlaze::R1).addImm(RAOffset);
+  }
+
+  if (hasFP(MF)) {
+    // swi  R19, R1, stack_loc
+    BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
+      .addReg(MBlaze::R19).addReg(MBlaze::R1).addImm(FPOffset);
+
+    // add R19, R1, R0
+    BuildMI(MBB, MBBI, DL, TII.get(MBlaze::ADD), MBlaze::R19)
+      .addReg(MBlaze::R1).addReg(MBlaze::R0);
+  }
+}
+
+void MBlazeFrameLowering::emitEpilogue(MachineFunction &MF,
+                                   MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  MachineFrameInfo *MFI            = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI     = MF.getInfo<MBlazeFunctionInfo>();
+  const MBlazeInstrInfo &TII =
+    *static_cast<const MBlazeInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+  bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+  // Get the FI's where RA and FP are saved.
+  int FPOffset = MBlazeFI->getFPStackOffset();
+  int RAOffset = MBlazeFI->getRAStackOffset();
+
+  if (hasFP(MF)) {
+    // add R1, R19, R0
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R1)
+      .addReg(MBlaze::R19).addReg(MBlaze::R0);
+
+    // lwi  R19, R1, stack_loc
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R19)
+      .addReg(MBlaze::R1).addImm(FPOffset);
+  }
+
+  // lwi R15, R1, stack_loc
+  if (MFI->adjustsStack() || requiresRA) {
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15)
+      .addReg(MBlaze::R1).addImm(RAOffset);
+  }
+
+  // Get the number of bytes from FrameInfo
+  int StackSize = (int) MFI->getStackSize();
+
+  // addi R1, R1, imm
+  if (StackSize) {
+    BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDIK), MBlaze::R1)
+      .addReg(MBlaze::R1).addImm(StackSize);
+  }
+}
+
+void MBlazeFrameLowering::
+processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                     RegScavenger *RS) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  llvm::CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+  bool requiresRA = CallConv == llvm::CallingConv::MBLAZE_INTR;
+
+  if (MFI->adjustsStack() || requiresRA) {
+    MBlazeFI->setRAStackOffset(0);
+    MFI->CreateFixedObject(4,0,true);
+  }
+
+  if (hasFP(MF)) {
+    MBlazeFI->setFPStackOffset(4);
+    MFI->CreateFixedObject(4,4,true);
+  }
+
+  interruptFrameLayout(MF);
+  analyzeFrameIndexes(MF);
+}
diff --git a/final/lib/Target/MBlaze/MBlazeFrameLowering.h b/final/lib/Target/MBlaze/MBlazeFrameLowering.h
new file mode 100644
index 00000000000..8be15bfb857
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeFrameLowering.h
@@ -0,0 +1,53 @@
+//=- MBlazeFrameLowering.h - Define frame lowering for MicroBlaze -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_FRAMEINFO_H
+#define MBLAZE_FRAMEINFO_H
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class MBlazeSubtarget;
+
+class MBlazeFrameLowering : public TargetFrameLowering {
+protected:
+  const MBlazeSubtarget &STI;
+
+public:
+  explicit MBlazeFrameLowering(const MBlazeSubtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 4, 0), STI(sti) {
+  }
+
+  /// targetHandlesStackFrameRounding - Returns true if the target is
+  /// responsible for rounding up the stack frame (probably at emitPrologue
+  /// time).
+  bool targetHandlesStackFrameRounding() const { return true; }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+  virtual void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                    RegScavenger *RS) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/final/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
new file mode 100644
index 00000000000..6b4349766f3
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -0,0 +1,277 @@
+//===-- MBlazeISelDAGToDAG.cpp - A dag to dag inst selector for MBlaze ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MBlaze target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-isel"
+#include "MBlaze.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlazeDAGToDAGISel - MBlaze specific code to select MBlaze machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class MBlazeDAGToDAGISel : public SelectionDAGISel {
+
+  /// TM - Keep a reference to MBlazeTargetMachine.
+  MBlazeTargetMachine &TM;
+
+  /// Subtarget - Keep a pointer to the MBlazeSubtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const MBlazeSubtarget &Subtarget;
+
+public:
+  explicit MBlazeDAGToDAGISel(MBlazeTargetMachine &tm) :
+  SelectionDAGISel(tm),
+  TM(tm), Subtarget(tm.getSubtarget<MBlazeSubtarget>()) {}
+
+  // Pass Name
+  virtual const char *getPassName() const {
+    return "MBlaze DAG->DAG Pattern Instruction Selection";
+  }
+private:
+  // Include the pieces autogenerated from the target description.
+  #include "MBlazeGenDAGISel.inc"
+
+  /// getTargetMachine - Return a reference to the TargetMachine, casted
+  /// to the target-specific type.
+  const MBlazeTargetMachine &getTargetMachine() {
+    return static_cast<const MBlazeTargetMachine &>(TM);
+  }
+
+  /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+  /// to the target-specific type.
+  const MBlazeInstrInfo *getInstrInfo() {
+    return getTargetMachine().getInstrInfo();
+  }
+
+  SDNode *getGlobalBaseReg();
+  SDNode *Select(SDNode *N);
+
+  // Address Selection
+  bool SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index);
+  bool SelectAddrRegImm(SDValue N, SDValue &Disp, SDValue &Base);
+
+  // getI32Imm - Return a target constant with the specified value, of type i32.
+  inline SDValue getI32Imm(unsigned Imm) {
+    return CurDAG->getTargetConstant(Imm, MVT::i32);
+  }
+};
+
+}
+
+/// isIntS32Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 32-bit value.  If so, this returns true and the
+/// immediate.
+static bool isIntS32Immediate(SDNode *N, int32_t &Imm) {
+  unsigned Opc = N->getOpcode();
+  if (Opc != ISD::Constant)
+    return false;
+
+  Imm = (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+  if (N->getValueType(0) == MVT::i32)
+    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+  else
+    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS32Immediate(SDValue Op, int32_t &Imm) {
+  return isIntS32Immediate(Op.getNode(), Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation.  Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool MBlazeDAGToDAGISel::
+SelectAddrRegReg(SDValue N, SDValue &Base, SDValue &Index) {
+  if (N.getOpcode() == ISD::FrameIndex) return false;
+  if (N.getOpcode() == ISD::TargetExternalSymbol ||
+      N.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // direct calls.
+
+  int32_t imm = 0;
+  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+    if (isIntS32Immediate(N.getOperand(1), imm))
+      return false;    // r+i
+
+    if (N.getOperand(0).getOpcode() == ISD::TargetJumpTable ||
+        N.getOperand(1).getOpcode() == ISD::TargetJumpTable)
+      return false; // jump tables.
+
+    Base = N.getOperand(0);
+    Index = N.getOperand(1);
+    return true;
+  }
+
+  return false;
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 32-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg.
+bool MBlazeDAGToDAGISel::
+SelectAddrRegImm(SDValue N, SDValue &Base, SDValue &Disp) {
+  // If this can be more profitably realized as r+r, fail.
+  if (SelectAddrRegReg(N, Base, Disp))
+    return false;
+
+  if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+    int32_t imm = 0;
+    if (isIntS32Immediate(N.getOperand(1), imm)) {
+      Disp = CurDAG->getTargetConstant(imm, MVT::i32);
+      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+        Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
+      } else {
+        Base = N.getOperand(0);
+      }
+      return true; // [r+i]
+    }
+  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+    // Loading from a constant address.
+    uint32_t Imm = CN->getZExtValue();
+    Disp = CurDAG->getTargetConstant(Imm, CN->getValueType(0));
+    Base = CurDAG->getRegister(MBlaze::R0, CN->getValueType(0));
+    return true;
+  }
+
+  Disp = CurDAG->getTargetConstant(0, TM.getTargetLowering()->getPointerTy());
+  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+    Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
+  else
+    Base = N;
+  return true;      // [r+0]
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+SDNode *MBlazeDAGToDAGISel::getGlobalBaseReg() {
+  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
+  unsigned Opcode = Node->getOpcode();
+  DebugLoc dl = Node->getDebugLoc();
+
+  // If we have a custom node, we already have selected!
+  if (Node->isMachineOpcode())
+    return NULL;
+
+  ///
+  // Instruction Selection not handled by the auto-generated
+  // tablegen selection should be handled here.
+  ///
+  switch (Opcode) {
+    default: break;
+
+    // Get target GOT address.
+    case ISD::GLOBAL_OFFSET_TABLE:
+      return getGlobalBaseReg();
+
+    case ISD::FrameIndex: {
+        SDValue imm = CurDAG->getTargetConstant(0, MVT::i32);
+        int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex();
+        EVT VT = Node->getValueType(0);
+        SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
+        unsigned Opc = MBlaze::ADDIK;
+        if (Node->hasOneUse())
+          return CurDAG->SelectNodeTo(Node, Opc, VT, TFI, imm);
+        return CurDAG->getMachineNode(Opc, dl, VT, TFI, imm);
+    }
+
+
+    /// Handle direct and indirect calls when using PIC. On PIC, when
+    /// GOT is smaller than about 64k (small code) the GA target is
+    /// loaded with only one instruction. Otherwise GA's target must
+    /// be loaded with 3 instructions.
+    case MBlazeISD::JmpLink: {
+      if (TM.getRelocationModel() == Reloc::PIC_) {
+        SDValue Chain  = Node->getOperand(0);
+        SDValue Callee = Node->getOperand(1);
+        SDValue R20Reg = CurDAG->getRegister(MBlaze::R20, MVT::i32);
+        SDValue InFlag(0, 0);
+
+        if ((isa<GlobalAddressSDNode>(Callee)) ||
+            (isa<ExternalSymbolSDNode>(Callee)))
+        {
+          /// Direct call for global addresses and external symbols
+          SDValue GPReg = CurDAG->getRegister(MBlaze::R15, MVT::i32);
+
+          // Use load to get GOT target
+          SDValue Ops[] = { Callee, GPReg, Chain };
+          SDValue Load = SDValue(CurDAG->getMachineNode(MBlaze::LW, dl,
+                                 MVT::i32, MVT::Other, Ops, 3), 0);
+          Chain = Load.getValue(1);
+
+          // Call target must be on T9
+          Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Load, InFlag);
+        } else
+          /// Indirect call
+          Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Callee, InFlag);
+
+        // Emit Jump and Link Register
+        SDNode *ResNode = CurDAG->getMachineNode(MBlaze::BRLID, dl, MVT::Other,
+                                                 MVT::Glue, R20Reg, Chain);
+        Chain  = SDValue(ResNode, 0);
+        InFlag = SDValue(ResNode, 1);
+        ReplaceUses(SDValue(Node, 0), Chain);
+        ReplaceUses(SDValue(Node, 1), InFlag);
+        return ResNode;
+      }
+    }
+  }
+
+  // Select the default instruction
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(errs() << "=> ");
+  if (ResNode == NULL || ResNode == Node)
+    DEBUG(Node->dump(CurDAG));
+  else
+    DEBUG(ResNode->dump(CurDAG));
+  DEBUG(errs() << "\n");
+  return ResNode;
+}
+
+/// createMBlazeISelDag - This pass converts a legalized DAG into a
+/// MBlaze-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createMBlazeISelDag(MBlazeTargetMachine &TM) {
+  return new MBlazeDAGToDAGISel(TM);
+}
diff --git a/final/lib/Target/MBlaze/MBlazeISelLowering.cpp b/final/lib/Target/MBlaze/MBlazeISelLowering.cpp
new file mode 100644
index 00000000000..f39826b1cf1
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -0,0 +1,1171 @@
+//===-- MBlazeISelLowering.cpp - MBlaze DAG Lowering Implementation -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-lower"
+#include "MBlazeISelLowering.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeTargetObjectFile.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                CCValAssign::LocInfo &LocInfo,
+                                ISD::ArgFlagsTy &ArgFlags,
+                                CCState &State);
+
+const char *MBlazeTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+    case MBlazeISD::JmpLink    : return "MBlazeISD::JmpLink";
+    case MBlazeISD::GPRel      : return "MBlazeISD::GPRel";
+    case MBlazeISD::Wrap       : return "MBlazeISD::Wrap";
+    case MBlazeISD::ICmp       : return "MBlazeISD::ICmp";
+    case MBlazeISD::Ret        : return "MBlazeISD::Ret";
+    case MBlazeISD::Select_CC  : return "MBlazeISD::Select_CC";
+    default                    : return NULL;
+  }
+}
+
+MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
+  : TargetLowering(TM, new MBlazeTargetObjectFile()) {
+  Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+
+  // MBlaze does not have i1 type, so use i32 for
+  // setcc operations results (slt, sgt, ...).
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  // Set up the register classes
+  addRegisterClass(MVT::i32, MBlaze::GPRRegisterClass);
+  if (Subtarget->hasFPU()) {
+    addRegisterClass(MVT::f32, MBlaze::GPRRegisterClass);
+    setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+  }
+
+  // Floating point operations which are not supported
+  setOperationAction(ISD::FREM,       MVT::f32, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+  setOperationAction(ISD::FP_ROUND,   MVT::f32, Expand);
+  setOperationAction(ISD::FP_ROUND,   MVT::f64, Expand);
+  setOperationAction(ISD::FCOPYSIGN,  MVT::f32, Expand);
+  setOperationAction(ISD::FCOPYSIGN,  MVT::f64, Expand);
+  setOperationAction(ISD::FSIN,       MVT::f32, Expand);
+  setOperationAction(ISD::FCOS,       MVT::f32, Expand);
+  setOperationAction(ISD::FPOWI,      MVT::f32, Expand);
+  setOperationAction(ISD::FPOW,       MVT::f32, Expand);
+  setOperationAction(ISD::FLOG,       MVT::f32, Expand);
+  setOperationAction(ISD::FLOG2,      MVT::f32, Expand);
+  setOperationAction(ISD::FLOG10,     MVT::f32, Expand);
+  setOperationAction(ISD::FEXP,       MVT::f32, Expand);
+
+  // Load extented operations for i1 types must be promoted
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1,  Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
+
+  // Sign extended loads must be expanded
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+  // MBlaze has no REM or DIVREM operations.
+  setOperationAction(ISD::UREM,    MVT::i32, Expand);
+  setOperationAction(ISD::SREM,    MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+  // If the processor doesn't support multiply then expand it
+  if (!Subtarget->hasMul()) {
+    setOperationAction(ISD::MUL, MVT::i32, Expand);
+  }
+
+  // If the processor doesn't support 64-bit multiply then expand
+  if (!Subtarget->hasMul() || !Subtarget->hasMul64()) {
+    setOperationAction(ISD::MULHS, MVT::i32, Expand);
+    setOperationAction(ISD::MULHS, MVT::i64, Expand);
+    setOperationAction(ISD::MULHU, MVT::i32, Expand);
+    setOperationAction(ISD::MULHU, MVT::i64, Expand);
+  }
+
+  // If the processor doesn't support division then expand
+  if (!Subtarget->hasDiv()) {
+    setOperationAction(ISD::UDIV, MVT::i32, Expand);
+    setOperationAction(ISD::SDIV, MVT::i32, Expand);
+  }
+
+  // Expand unsupported conversions
+  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+
+  // Expand SELECT_CC
+  setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+  // MBlaze doesn't have MUL_LOHI
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+  // Used by legalize types to correctly generate the setcc result.
+  // Without this, every float setcc comes with a AND/OR with the result,
+  // we don't want this, since the fpcmp result goes to a flag register,
+  // which is used implicitly by brcond and select operations.
+  AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
+  AddPromotedToType(ISD::SELECT, MVT::i1, MVT::i32);
+  AddPromotedToType(ISD::SELECT_CC, MVT::i1, MVT::i32);
+
+  // MBlaze Custom Operations
+  setOperationAction(ISD::GlobalAddress,      MVT::i32,   Custom);
+  setOperationAction(ISD::GlobalTLSAddress,   MVT::i32,   Custom);
+  setOperationAction(ISD::JumpTable,          MVT::i32,   Custom);
+  setOperationAction(ISD::ConstantPool,       MVT::i32,   Custom);
+
+  // Variable Argument support
+  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
+  setOperationAction(ISD::VAEND,              MVT::Other, Expand);
+  setOperationAction(ISD::VAARG,              MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
+
+
+  // Operations not directly supported by MBlaze.
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Expand);
+  setOperationAction(ISD::BR_JT,              MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,              MVT::Other, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG,  MVT::i1,    Expand);
+  setOperationAction(ISD::ROTL,               MVT::i32,   Expand);
+  setOperationAction(ISD::ROTR,               MVT::i32,   Expand);
+  setOperationAction(ISD::SHL_PARTS,          MVT::i32,   Expand);
+  setOperationAction(ISD::SRA_PARTS,          MVT::i32,   Expand);
+  setOperationAction(ISD::SRL_PARTS,          MVT::i32,   Expand);
+  setOperationAction(ISD::CTLZ,               MVT::i32,   Expand);
+  setOperationAction(ISD::CTTZ,               MVT::i32,   Expand);
+  setOperationAction(ISD::CTPOP,              MVT::i32,   Expand);
+  setOperationAction(ISD::BSWAP,              MVT::i32,   Expand);
+
+  // We don't have line number support yet.
+  setOperationAction(ISD::EH_LABEL,          MVT::Other, Expand);
+
+  // Use the default for now
+  setOperationAction(ISD::STACKSAVE,         MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE,      MVT::Other, Expand);
+
+  // MBlaze doesn't have extending float->double load/store
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+  setStackPointerRegisterToSaveRestore(MBlaze::R1);
+  computeRegisterProperties();
+}
+
+MVT::SimpleValueType MBlazeTargetLowering::getSetCCResultType(EVT VT) const {
+  return MVT::i32;
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned MBlazeTargetLowering::getFunctionAlignment(const Function *) const {
+  return 2;
+}
+
+SDValue MBlazeTargetLowering::LowerOperation(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  switch (Op.getOpcode())
+  {
+    case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
+    case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+    case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
+    case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
+    case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
+    case ISD::VASTART:            return LowerVASTART(Op, DAG);
+  }
+  return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+//  Lower helper functions
+//===----------------------------------------------------------------------===//
+MachineBasicBlock*
+MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                  MachineBasicBlock *MBB)
+                                                  const {
+  switch (MI->getOpcode()) {
+  default: assert(false && "Unexpected instr type to insert");
+
+  case MBlaze::ShiftRL:
+  case MBlaze::ShiftRA:
+  case MBlaze::ShiftL:
+    return EmitCustomShift(MI, MBB);
+
+  case MBlaze::Select_FCC:
+  case MBlaze::Select_CC:
+    return EmitCustomSelect(MI, MBB);
+
+  case MBlaze::CAS32:
+  case MBlaze::SWP32:
+  case MBlaze::LAA32:
+  case MBlaze::LAS32:
+  case MBlaze::LAD32:
+  case MBlaze::LAO32:
+  case MBlaze::LAX32:
+  case MBlaze::LAN32:
+    return EmitCustomAtomic(MI, MBB);
+
+  case MBlaze::MEMBARRIER:
+    // The Microblaze does not need memory barriers. Just delete the pseudo
+    // instruction and finish.
+    MI->eraseFromParent();
+    return MBB;
+  }
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomShift(MachineInstr *MI,
+                                      MachineBasicBlock *MBB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  // To "insert" a shift left instruction, we actually have to insert a
+  // simple loop.  The incoming instruction knows the destination vreg to
+  // set, the source vreg to operate over and the shift amount.
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction::iterator It = MBB;
+  ++It;
+
+  // start:
+  //   andi     samt, samt, 31
+  //   beqid    samt, finish
+  //   add      dst, src, r0
+  // loop:
+  //   addik    samt, samt, -1
+  //   sra      dst, dst
+  //   bneid    samt, loop
+  //   nop
+  // finish:
+  MachineFunction *F = MBB->getParent();
+  MachineRegisterInfo &R = F->getRegInfo();
+  MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, loop);
+  F->insert(It, finish);
+
+  // Update machine-CFG edges by transfering adding all successors and
+  // remaining instructions from the current block to the new block which
+  // will contain the Phi node for the select.
+  finish->splice(finish->begin(), MBB,
+                 llvm::next(MachineBasicBlock::iterator(MI)),
+                 MBB->end());
+  finish->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // Add the true and fallthrough blocks as its successors.
+  MBB->addSuccessor(loop);
+  MBB->addSuccessor(finish);
+
+  // Next, add the finish block as a successor of the loop block
+  loop->addSuccessor(finish);
+  loop->addSuccessor(loop);
+
+  unsigned IAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(MBB, dl, TII->get(MBlaze::ANDI), IAMT)
+    .addReg(MI->getOperand(2).getReg())
+    .addImm(31);
+
+  unsigned IVAL = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(MBB, dl, TII->get(MBlaze::ADDIK), IVAL)
+    .addReg(MI->getOperand(1).getReg())
+    .addImm(0);
+
+  BuildMI(MBB, dl, TII->get(MBlaze::BEQID))
+    .addReg(IAMT)
+    .addMBB(finish);
+
+  unsigned DST = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  unsigned NDST = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
+    .addReg(IVAL).addMBB(MBB)
+    .addReg(NDST).addMBB(loop);
+
+  unsigned SAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  unsigned NAMT = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT)
+    .addReg(IAMT).addMBB(MBB)
+    .addReg(NAMT).addMBB(loop);
+
+  if (MI->getOpcode() == MBlaze::ShiftL)
+    BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST);
+  else if (MI->getOpcode() == MBlaze::ShiftRA)
+    BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST);
+  else if (MI->getOpcode() == MBlaze::ShiftRL)
+    BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST);
+  else
+    llvm_unreachable("Cannot lower unknown shift instruction");
+
+  BuildMI(loop, dl, TII->get(MBlaze::ADDIK), NAMT)
+    .addReg(SAMT)
+    .addImm(-1);
+
+  BuildMI(loop, dl, TII->get(MBlaze::BNEID))
+    .addReg(NAMT)
+    .addMBB(loop);
+
+  BuildMI(*finish, finish->begin(), dl,
+          TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+    .addReg(IVAL).addMBB(MBB)
+    .addReg(NDST).addMBB(loop);
+
+  // The pseudo instruction is no longer needed so remove it
+  MI->eraseFromParent();
+  return finish;
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomSelect(MachineInstr *MI,
+                                       MachineBasicBlock *MBB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  // To "insert" a SELECT_CC instruction, we actually have to insert the
+  // diamond control-flow pattern.  The incoming instruction knows the
+  // destination vreg to set, the condition code register to branch on, the
+  // true/false values to select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction::iterator It = MBB;
+  ++It;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   setcc r1, r2, r3
+  //   bNE   r1, r0, copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineFunction *F = MBB->getParent();
+  MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+  unsigned Opc;
+  switch (MI->getOperand(4).getImm()) {
+  default: llvm_unreachable("Unknown branch condition");
+  case MBlazeCC::EQ: Opc = MBlaze::BEQID; break;
+  case MBlazeCC::NE: Opc = MBlaze::BNEID; break;
+  case MBlazeCC::GT: Opc = MBlaze::BGTID; break;
+  case MBlazeCC::LT: Opc = MBlaze::BLTID; break;
+  case MBlazeCC::GE: Opc = MBlaze::BGEID; break;
+  case MBlazeCC::LE: Opc = MBlaze::BLEID; break;
+  }
+
+  F->insert(It, flsBB);
+  F->insert(It, dneBB);
+
+  // Transfer the remainder of MBB and its successor edges to dneBB.
+  dneBB->splice(dneBB->begin(), MBB,
+                llvm::next(MachineBasicBlock::iterator(MI)),
+                MBB->end());
+  dneBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  MBB->addSuccessor(flsBB);
+  MBB->addSuccessor(dneBB);
+  flsBB->addSuccessor(dneBB);
+
+  BuildMI(MBB, dl, TII->get(Opc))
+    .addReg(MI->getOperand(3).getReg())
+    .addMBB(dneBB);
+
+  //  sinkMBB:
+  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+  //  ...
+  //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+  //  .addReg(MI->getOperand(1).getReg()).addMBB(flsBB)
+  //  .addReg(MI->getOperand(2).getReg()).addMBB(BB);
+
+  BuildMI(*dneBB, dneBB->begin(), dl,
+          TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(2).getReg()).addMBB(flsBB)
+    .addReg(MI->getOperand(1).getReg()).addMBB(MBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return dneBB;
+}
+
+MachineBasicBlock*
+MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI,
+                                       MachineBasicBlock *MBB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  // All atomic instructions on the Microblaze are implemented using the
+  // load-linked / store-conditional style atomic instruction sequences.
+  // Thus, all operations will look something like the following:
+  // 
+  //  start:
+  //    lwx     RV, RP, 0
+  //    <do stuff>
+  //    swx     RV, RP, 0
+  //    addic   RC, R0, 0
+  //    bneid   RC, start
+  //
+  //  exit:
+  //
+  // To "insert" a shift left instruction, we actually have to insert a
+  // simple loop.  The incoming instruction knows the destination vreg to
+  // set, the source vreg to operate over and the shift amount.
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction::iterator It = MBB;
+  ++It;
+
+  // start:
+  //   andi     samt, samt, 31
+  //   beqid    samt, finish
+  //   add      dst, src, r0
+  // loop:
+  //   addik    samt, samt, -1
+  //   sra      dst, dst
+  //   bneid    samt, loop
+  //   nop
+  // finish:
+  MachineFunction *F = MBB->getParent();
+  MachineRegisterInfo &R = F->getRegInfo();
+
+  // Create the start and exit basic blocks for the atomic operation
+  MachineBasicBlock *start = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exit = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, start);
+  F->insert(It, exit);
+
+  // Update machine-CFG edges by transfering adding all successors and
+  // remaining instructions from the current block to the new block which
+  // will contain the Phi node for the select.
+  exit->splice(exit->begin(), MBB, llvm::next(MachineBasicBlock::iterator(MI)),
+               MBB->end());
+  exit->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // Add the fallthrough block as its successors.
+  MBB->addSuccessor(start);
+
+  BuildMI(start, dl, TII->get(MBlaze::LWX), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(1).getReg())
+    .addReg(MBlaze::R0);
+
+  MachineBasicBlock *final = start;
+  unsigned finalReg = 0;
+
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("Cannot lower unknown atomic instruction!");
+
+  case MBlaze::SWP32:
+    finalReg = MI->getOperand(2).getReg();
+    start->addSuccessor(exit);
+    start->addSuccessor(start);
+    break;
+
+  case MBlaze::LAN32:
+  case MBlaze::LAX32:
+  case MBlaze::LAO32:
+  case MBlaze::LAD32:
+  case MBlaze::LAS32:
+  case MBlaze::LAA32: {
+    unsigned opcode = 0;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("Cannot lower unknown atomic load!");
+    case MBlaze::LAA32: opcode = MBlaze::ADDIK; break;
+    case MBlaze::LAS32: opcode = MBlaze::RSUBIK; break;
+    case MBlaze::LAD32: opcode = MBlaze::AND; break;
+    case MBlaze::LAO32: opcode = MBlaze::OR; break;
+    case MBlaze::LAX32: opcode = MBlaze::XOR; break;
+    case MBlaze::LAN32: opcode = MBlaze::AND; break;
+    }
+
+    finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+    start->addSuccessor(exit);
+    start->addSuccessor(start);
+
+    BuildMI(start, dl, TII->get(opcode), finalReg)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(2).getReg());
+
+    if (MI->getOpcode() == MBlaze::LAN32) {
+      unsigned tmp = finalReg;
+      finalReg = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+      BuildMI(start, dl, TII->get(MBlaze::XORI), finalReg)
+        .addReg(tmp)
+        .addImm(-1);
+    }
+    break;
+  }
+
+  case MBlaze::CAS32: {
+    finalReg = MI->getOperand(3).getReg();
+    final = F->CreateMachineBasicBlock(LLVM_BB);
+
+    F->insert(It, final);
+    start->addSuccessor(exit);
+    start->addSuccessor(final);
+    final->addSuccessor(exit);
+    final->addSuccessor(start);
+
+    unsigned CMP = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+    BuildMI(start, dl, TII->get(MBlaze::CMP), CMP)
+      .addReg(MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(2).getReg());
+
+    BuildMI(start, dl, TII->get(MBlaze::BNEID))
+      .addReg(CMP)
+      .addMBB(exit);
+
+    final->moveAfter(start);
+    exit->moveAfter(final);
+    break;
+  }
+  }
+
+  unsigned CHK = R.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(final, dl, TII->get(MBlaze::SWX))
+    .addReg(finalReg)
+    .addReg(MI->getOperand(1).getReg())
+    .addReg(MBlaze::R0);
+
+  BuildMI(final, dl, TII->get(MBlaze::ADDIC), CHK)
+    .addReg(MBlaze::R0)
+    .addImm(0);
+
+  BuildMI(final, dl, TII->get(MBlaze::BNEID))
+    .addReg(CHK)
+    .addMBB(start);
+
+  // The pseudo instruction is no longer needed so remove it
+  MI->eraseFromParent();
+  return exit;
+}
+
+//===----------------------------------------------------------------------===//
+//  Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+//
+
+SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue TrueVal = Op.getOperand(2);
+  SDValue FalseVal = Op.getOperand(3);
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Opc;
+
+  SDValue CompareFlag;
+  if (LHS.getValueType() == MVT::i32) {
+    Opc = MBlazeISD::Select_CC;
+    CompareFlag = DAG.getNode(MBlazeISD::ICmp, dl, MVT::i32, LHS, RHS)
+                    .getValue(1);
+  } else {
+    llvm_unreachable("Cannot lower select_cc with unknown type");
+  }
+
+  return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
+                     CompareFlag);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+
+  return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+  llvm_unreachable("TLS not implemented for MicroBlaze.");
+  return SDValue(); // Not reached
+}
+
+SDValue MBlazeTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  SDValue ResNode;
+  SDValue HiPart;
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  EVT PtrVT = Op.getValueType();
+  JumpTableSDNode *JT  = cast<JumpTableSDNode>(Op);
+
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, 0);
+  return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, JTI);
+}
+
+SDValue MBlazeTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+  SDValue ResNode;
+  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+  const Constant *C = N->getConstVal();
+  DebugLoc dl = Op.getDebugLoc();
+
+  SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+                                         N->getOffset(), 0);
+  return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP);
+}
+
+SDValue MBlazeTargetLowering::LowerVASTART(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MBlazeFunctionInfo *FuncInfo = MF.getInfo<MBlazeFunctionInfo>();
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                 getPointerTy());
+
+  // vastart just stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument.
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+                      MachinePointerInfo(SV),
+                      false, false, 0);
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeGenCallingConv.inc"
+
+static bool CC_MBlaze_AssignReg(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                CCValAssign::LocInfo &LocInfo,
+                                ISD::ArgFlagsTy &ArgFlags,
+                                CCState &State) {
+  static const unsigned ArgRegs[] = {
+    MBlaze::R5, MBlaze::R6, MBlaze::R7,
+    MBlaze::R8, MBlaze::R9, MBlaze::R10
+  };
+
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+  unsigned Reg = State.AllocateReg(ArgRegs, NumArgRegs);
+  if (!Reg) return false;
+
+  unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+  State.AllocateStack(SizeInBytes, SizeInBytes);
+  State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+//                  Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerCall - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall.
+SDValue MBlazeTargetLowering::
+LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+          bool isVarArg, bool &isTailCall,
+          const SmallVectorImpl<ISD::OutputArg> &Outs,
+          const SmallVectorImpl<SDValue> &OutVals,
+          const SmallVectorImpl<ISD::InputArg> &Ins,
+          DebugLoc dl, SelectionDAG &DAG,
+          SmallVectorImpl<SDValue> &InVals) const {
+  // MBlaze does not yet support tail call optimization
+  isTailCall = false;
+
+  // The MBlaze requires stack slots for arguments passed to var arg
+  // functions even if they are passed in registers.
+  bool needsRegArgSlots = isVarArg;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+                 *DAG.getContext());
+  CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  // Variable argument function calls require a minimum of 24-bytes of stack
+  if (isVarArg && NumBytes < 24) NumBytes = 24;
+
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    MVT RegVT = VA.getLocVT();
+    SDValue Arg = OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
+      break;
+    }
+
+    // Arguments that can be passed on register must be kept at
+    // RegsToPass vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      // Register can't get to this point...
+      assert(VA.isMemLoc());
+
+      // Since we are alread passing values on the stack we don't
+      // need to worry about creating additional slots for the
+      // values passed via registers.
+      needsRegArgSlots = false;
+
+      // Create the frame index object for this incoming parameter
+      unsigned ArgSize = VA.getValVT().getSizeInBits()/8;
+      unsigned StackLoc = VA.getLocMemOffset() + 4;
+      int FI = MFI->CreateFixedObject(ArgSize, StackLoc, true);
+
+      SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+      // emit ISD::STORE whichs stores the
+      // parameter value to a stack Location
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         MachinePointerInfo(),
+                                         false, false, 0));
+    }
+  }
+
+  // If we need to reserve stack space for the arguments passed via registers
+  // then create a fixed stack object at the beginning of the stack.
+  if (needsRegArgSlots && TFI.hasReservedCallFrame(MF))
+    MFI->CreateFixedObject(28,0,true);
+
+  // Transform all store nodes into one single node because all store
+  // nodes are independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emited instructions must be
+  // stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+                                getPointerTy(), 0, 0);
+  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+                                getPointerTy(), 0);
+
+  // MBlazeJmpLink = #chain, #target_address, #opt_in_flags...
+  //             = Chain, Callee, Reg#1, Reg#2, ...
+  //
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+  }
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain  = DAG.getNode(MBlazeISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  if (!Ins.empty())
+    InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue MBlazeTargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv,
+                bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const {
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_MBlaze);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+                               RVLocs[i].getValVT(), InFlag).getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//             Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerFormalArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+SDValue MBlazeTargetLowering::
+LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                     DebugLoc dl, SelectionDAG &DAG,
+                     SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+  unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
+  MBlazeFI->setVarArgsFrameIndex(0);
+
+  // Used with vargs to acumulate store chains.
+  std::vector<SDValue> OutChains;
+
+  // Keep track of the last register used for arguments
+  unsigned ArgRegEnd = 0;
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze);
+  SDValue StackPtr;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    // Arguments stored on registers
+    if (VA.isRegLoc()) {
+      MVT RegVT = VA.getLocVT();
+      ArgRegEnd = VA.getLocReg();
+      TargetRegisterClass *RC = 0;
+
+      if (RegVT == MVT::i32)
+        RC = MBlaze::GPRRegisterClass;
+      else if (RegVT == MVT::f32)
+        RC = MBlaze::GPRRegisterClass;
+      else
+        llvm_unreachable("RegVT not supported by LowerFormalArguments");
+
+      // Transform the arguments stored on
+      // physical registers into virtual ones
+      unsigned Reg = MF.addLiveIn(ArgRegEnd, RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+      // If this is an 8 or 16-bit value, it has been passed promoted
+      // to 32 bits.  Insert an assert[sz]ext to capture this, then
+      // truncate to the right size. If if is a floating point value
+      // then convert to the correct type.
+      if (VA.getLocInfo() != CCValAssign::Full) {
+        unsigned Opcode = 0;
+        if (VA.getLocInfo() == CCValAssign::SExt)
+          Opcode = ISD::AssertSext;
+        else if (VA.getLocInfo() == CCValAssign::ZExt)
+          Opcode = ISD::AssertZext;
+        if (Opcode)
+          ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+                                 DAG.getValueType(VA.getValVT()));
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+      }
+
+      InVals.push_back(ArgValue);
+    } else { // VA.isRegLoc()
+      // sanity check
+      assert(VA.isMemLoc());
+
+      // The last argument is not a register
+      ArgRegEnd = 0;
+
+      // The stack pointer offset is relative to the caller stack frame.
+      // Since the real stack size is unknown here, a negative SPOffset
+      // is used so there's a way to adjust these offsets when the stack
+      // size get known (on EliminateFrameIndex). A dummy SPOffset is
+      // used instead of a direct negative address (which is recorded to
+      // be used on emitPrologue) to avoid mis-calc of the first stack
+      // offset on PEI::calculateFrameObjectOffsets.
+      // Arguments are always 32-bit.
+      unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+      unsigned StackLoc = VA.getLocMemOffset() + 4;
+      int FI = MFI->CreateFixedObject(ArgSize, 0, true);
+      MBlazeFI->recordLoadArgsFI(FI, -StackLoc);
+      MBlazeFI->recordLiveIn(FI);
+
+      // Create load nodes to retrieve arguments from the stack
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   MachinePointerInfo::getFixedStack(FI),
+                                   false, false, 0));
+    }
+  }
+
+  // To meet ABI, when VARARGS are passed on registers, the registers
+  // must have their values written to the caller stack frame. If the last
+  // argument was placed in the stack, there's no need to save any register.
+  if ((isVarArg) && ArgRegEnd) {
+    if (StackPtr.getNode() == 0)
+      StackPtr = DAG.getRegister(StackReg, getPointerTy());
+
+    // The last register argument that must be saved is MBlaze::R10
+    TargetRegisterClass *RC = MBlaze::GPRRegisterClass;
+
+    unsigned Begin = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R5);
+    unsigned Start = MBlazeRegisterInfo::getRegisterNumbering(ArgRegEnd+1);
+    unsigned End   = MBlazeRegisterInfo::getRegisterNumbering(MBlaze::R10);
+    unsigned StackLoc = Start - Begin + 1;
+
+    for (; Start <= End; ++Start, ++StackLoc) {
+      unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start);
+      unsigned LiveReg = MF.addLiveIn(Reg, RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32);
+
+      int FI = MFI->CreateFixedObject(4, 0, true);
+      MBlazeFI->recordStoreVarArgsFI(FI, -(StackLoc*4));
+      SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+                                       MachinePointerInfo(),
+                                       false, false, 0));
+
+      // Record the frame index of the first variable argument
+      // which is a value necessary to VASTART.
+      if (!MBlazeFI->getVarArgsFrameIndex())
+        MBlazeFI->setVarArgsFrameIndex(FI);
+    }
+  }
+
+  // All stores are grouped in one node to allow the matching between
+  // the size of Ins and InVals. This only happens when on varg functions
+  if (!OutChains.empty()) {
+    OutChains.push_back(Chain);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &OutChains[0], OutChains.size());
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//               Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue MBlazeTargetLowering::
+LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+            const SmallVectorImpl<ISD::OutputArg> &Outs,
+            const SmallVectorImpl<SDValue> &OutVals,
+            DebugLoc dl, SelectionDAG &DAG) const {
+  // CCValAssign - represent the assignment of
+  // the return value to a location
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze);
+
+  // If this is the first return lowered for this function, add
+  // the regs to the liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             OutVals[i], Flag);
+
+    // guarantee that all emitted copies are
+    // stuck together, avoiding something bad
+    Flag = Chain.getValue(1);
+  }
+
+  // If this function is using the interrupt_handler calling convention
+  // then use "rtid r14, 0" otherwise use "rtsd r15, 8"
+  unsigned Ret = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlazeISD::IRet 
+                                                              : MBlazeISD::Ret;
+  unsigned Reg = (CallConv == llvm::CallingConv::MBLAZE_INTR) ? MBlaze::R14 
+                                                              : MBlaze::R15;
+  SDValue DReg = DAG.getRegister(Reg, MVT::i32);
+
+  if (Flag.getNode())
+    return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg, Flag);
+
+  return DAG.getNode(Ret, dl, MVT::Other, Chain, DReg);
+}
+
+//===----------------------------------------------------------------------===//
+//                           MBlaze Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+MBlazeTargetLowering::ConstraintType MBlazeTargetLowering::
+getConstraintType(const std::string &Constraint) const
+{
+  // MBlaze specific constrainy
+  //
+  // 'd' : An address register. Equivalent to r.
+  // 'y' : Equivalent to r; retained for
+  //       backwards compatibility.
+  // 'f' : Floating Point registers.
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+      default : break;
+      case 'd':
+      case 'y':
+      case 'f':
+        return C_RegisterClass;
+        break;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+MBlazeTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  const Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'd':
+  case 'y':
+    if (type->isIntegerTy())
+      weight = CW_Register;
+    break;
+  case 'f':
+    if (type->isFloatTy())
+      weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
+/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
+/// return a list of registers that can be used to satisfy the constraint.
+/// This should only be used for C_RegisterClass constraints.
+std::pair<unsigned, const TargetRegisterClass*> MBlazeTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      return std::make_pair(0U, MBlaze::GPRRegisterClass);
+    case 'f':
+      if (VT == MVT::f32)
+        return std::make_pair(0U, MBlaze::GPRRegisterClass);
+    }
+  }
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::vector<unsigned> MBlazeTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+  if (Constraint.size() != 1)
+    return std::vector<unsigned>();
+
+  switch (Constraint[0]) {
+    default : break;
+    case 'r':
+    // GCC MBlaze Constraint Letters
+    case 'd':
+    case 'y':
+    case 'f':
+      return make_vector<unsigned>(
+        MBlaze::R3,  MBlaze::R4,  MBlaze::R5,  MBlaze::R6,
+        MBlaze::R7,  MBlaze::R9,  MBlaze::R10, MBlaze::R11,
+        MBlaze::R12, MBlaze::R19, MBlaze::R20, MBlaze::R21,
+        MBlaze::R22, MBlaze::R23, MBlaze::R24, MBlaze::R25,
+        MBlaze::R26, MBlaze::R27, MBlaze::R28, MBlaze::R29,
+        MBlaze::R30, MBlaze::R31, 0);
+  }
+  return std::vector<unsigned>();
+}
+
+bool MBlazeTargetLowering::
+isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The MBlaze target isn't yet aware of offsets.
+  return false;
+}
+
+bool MBlazeTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  return VT != MVT::f32;
+}
diff --git a/final/lib/Target/MBlaze/MBlazeISelLowering.h b/final/lib/Target/MBlaze/MBlazeISelLowering.h
new file mode 100644
index 00000000000..91649bc6db0
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -0,0 +1,190 @@
+//===-- MBlazeISelLowering.h - MBlaze DAG Lowering Interface ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBlazeISELLOWERING_H
+#define MBlazeISELLOWERING_H
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+
+namespace llvm {
+  namespace MBlazeCC {
+    enum CC {
+      FIRST = 0,
+      EQ,
+      NE,
+      GT,
+      LT,
+      GE,
+      LE
+    };
+
+    inline static CC getOppositeCondition(CC cc) {
+      switch (cc) {
+      default: llvm_unreachable("Unknown condition code");
+      case EQ: return NE;
+      case NE: return EQ;
+      case GT: return LE;
+      case LT: return GE;
+      case GE: return LT;
+      case LE: return GE;
+      }
+    }
+
+    inline static const char *MBlazeCCToString(CC cc) {
+      switch (cc) {
+      default: llvm_unreachable("Unknown condition code");
+      case EQ: return "eq";
+      case NE: return "ne";
+      case GT: return "gt";
+      case LT: return "lt";
+      case GE: return "ge";
+      case LE: return "le";
+      }
+    }
+  }
+
+  namespace MBlazeISD {
+    enum NodeType {
+      // Start the numbering from where ISD NodeType finishes.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      // Jump and link (call)
+      JmpLink,
+
+      // Handle gp_rel (small data/bss sections) relocation.
+      GPRel,
+
+      // Select CC Pseudo Instruction
+      Select_CC,
+
+      // Wrap up multiple types of instructions
+      Wrap,
+
+      // Integer Compare
+      ICmp,
+
+      // Return from subroutine
+      Ret,
+
+      // Return from interrupt
+      IRet
+    };
+  }
+
+  //===--------------------------------------------------------------------===//
+  // TargetLowering Implementation
+  //===--------------------------------------------------------------------===//
+
+  class MBlazeTargetLowering : public TargetLowering  {
+  public:
+    explicit MBlazeTargetLowering(MBlazeTargetMachine &TM);
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// getTargetNodeName - This method returns the name of a target specific
+    //  DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    /// getSetCCResultType - get the ISD::SETCC result ValueType
+    MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+  private:
+    // Subtarget Info
+    const MBlazeSubtarget *Subtarget;
+
+
+    // Lower Operand helpers
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+
+    // Lower Operand specifics
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual MachineBasicBlock*
+      EmitCustomShift(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+    virtual MachineBasicBlock*
+      EmitCustomSelect(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+    virtual MachineBasicBlock*
+            EmitCustomAtomic(MachineInstr *MI, MachineBasicBlock *MBB) const;
+
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
+
+    // Inline asm support
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+              getRegForInlineAsmConstraint(const std::string &Constraint,
+              EVT VT) const;
+
+    std::vector<unsigned>
+    getRegClassForInlineAsmConstraint(const std::string &Constraint,
+              EVT VT) const;
+
+    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+  };
+}
+
+#endif // MBlazeISELLOWERING_H
diff --git a/final/lib/Target/MBlaze/MBlazeInstrFPU.td b/final/lib/Target/MBlaze/MBlazeInstrFPU.td
new file mode 100644
index 00000000000..094de5c0c1a
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -0,0 +1,224 @@
+//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze profiles and nodes
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Memory Access Instructions
+//===----------------------------------------------------------------------===//
+class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> :
+             TA<op, 0x000, (outs GPR:$dst), (ins memrr:$addr),
+                !strconcat(instr_asm, "   $dst, $addr"),
+                [(set (f32 GPR:$dst), (OpNode xaddr:$addr))], IILoad>;
+
+class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+              TB<op, (outs GPR:$dst), (ins memri:$addr),
+                 !strconcat(instr_asm, "   $dst, $addr"),
+                 [(set (f32 GPR:$dst), (OpNode iaddr:$addr))], IILoad>;
+
+class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> :
+              TA<op, 0x000, (outs), (ins GPR:$dst, memrr:$addr),
+                 !strconcat(instr_asm, "   $dst, $addr"),
+                 [(OpNode (f32 GPR:$dst), xaddr:$addr)], IIStore>;
+
+class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+               TB<op, (outs), (ins GPR:$dst, memrr:$addr),
+                  !strconcat(instr_asm, "   $dst, $addr"),
+                  [(OpNode (f32 GPR:$dst), iaddr:$addr)], IIStore>;
+
+class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+             InstrItinClass itin> :
+             TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class CmpFN<bits<6> op, bits<11> flags, string instr_asm,
+            InstrItinClass itin> :
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+               !strconcat(instr_asm, "   $dst, $b, $c"),
+               [], itin>;
+
+class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+             InstrItinClass itin> :
+             TAR<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+                 !strconcat(instr_asm, "   $dst, $c, $b"),
+                 [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class LogicF<bits<6> op, string instr_asm> :
+             TB<op, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [], IIAlu>;
+
+class LogicFI<bits<6> op, string instr_asm> :
+             TB<op, (outs GPR:$dst), (ins GPR:$b, fimm:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [], IIAlu>;
+
+let rb=0 in {
+  class ArithF2<bits<6> op, bits<11> flags, string instr_asm,
+                InstrItinClass itin> :
+                TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+                   !strconcat(instr_asm, "   $dst, $b"),
+                   [], itin>;
+
+  class ArithIF<bits<6> op, bits<11> flags, string instr_asm,
+                InstrItinClass itin> :
+                TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+                   !strconcat(instr_asm, "   $dst, $b"),
+                   [], itin>;
+
+  class ArithFI<bits<6> op, bits<11> flags, string instr_asm,
+                InstrItinClass itin> :
+                TA<op, flags, (outs GPR:$dst), (ins GPR:$b),
+                   !strconcat(instr_asm, "   $dst, $b"),
+                   [], itin>;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPU Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+let Predicates=[HasFPU] in {
+  def FORI   : LogicFI<0x28, "ori    ">;
+  def FADD   :  ArithF<0x16, 0x000, "fadd   ", fadd, IIAlu>;
+  def FRSUB  : ArithFR<0x16, 0x080, "frsub  ", fsub, IIAlu>;
+  def FMUL   :  ArithF<0x16, 0x100, "fmul   ", fmul, IIAlu>;
+  def FDIV   :  ArithF<0x16, 0x180, "fdiv   ", fdiv, IIAlu>;
+}
+
+let Predicates=[HasFPU], isCodeGenOnly=1 in {
+  def LWF    :   LoadFM<0x32, "lw      ", load>;
+  def LWFI   :  LoadFMI<0x3A, "lwi     ", load>;
+
+  def SWF    :  StoreFM<0x36, "sw      ", store>;
+  def SWFI   : StoreFMI<0x3E, "swi     ", store>;
+}
+
+let Predicates=[HasFPU,HasSqrt] in {
+  def FLT    : ArithIF<0x16, 0x280, "flt    ", IIAlu>;
+  def FINT   : ArithFI<0x16, 0x300, "fint   ", IIAlu>;
+  def FSQRT  : ArithF2<0x16, 0x380, "fsqrt  ", IIAlu>;
+}
+
+let isAsCheapAsAMove = 1 in {
+  def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIAlu>;
+  def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIAlu>;
+  def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIAlu>;
+  def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIAlu>;
+  def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIAlu>;
+  def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIAlu>;
+  def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIAlu>;
+}
+
+
+let usesCustomInserter = 1 in {
+  def Select_FCC : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC),
+    "; SELECT_FCC PSEUDO!",
+    []>;
+}
+
+// Floating point conversions
+let Predicates=[HasFPU] in {
+  def : Pat<(sint_to_fp GPR:$V), (FLT GPR:$V)>;
+  def : Pat<(fp_to_sint GPR:$V), (FINT GPR:$V)>;
+  def : Pat<(fsqrt GPR:$V), (FSQRT GPR:$V)>;
+}
+
+// SET_CC operations
+let Predicates=[HasFPU] in {
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETEQ),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_EQ GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETNE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_EQ GPR:$L, GPR:$R), 1)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOEQ),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_EQ GPR:$L, GPR:$R), 2)>;
+ def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (XOR (FCMP_UN GPR:$L, GPR:$R),
+                            (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETONE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_GT GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_LT GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETGE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_GE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETLE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_LE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_GT GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_LT GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOGE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_GE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETOLE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_LE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUEQ),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_EQ GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUNE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_NE GPR:$L, GPR:$R), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_GT GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULT),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_LT GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUGE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_GE GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETULE),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (OR (FCMP_UN GPR:$L, GPR:$R),
+                           (FCMP_LE GPR:$L, GPR:$R)), 2)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETO),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_UN GPR:$L, GPR:$R), 1)>;
+  def : Pat<(setcc (f32 GPR:$L), (f32 GPR:$R), SETUO),
+            (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                       (FCMP_UN GPR:$L, GPR:$R), 2)>;
+}
+
+// SELECT operations
+def : Pat<(select (i32 GPR:$C), (f32 GPR:$T), (f32 GPR:$F)),
+          (Select_FCC GPR:$T, GPR:$F, GPR:$C, 2)>;
+
+//===----------------------------------------------------------------------===//
+// Patterns for Floating Point Instructions
+//===----------------------------------------------------------------------===//
+def : Pat<(f32 fpimm:$imm), (FORI (i32 R0), fpimm:$imm)>;
diff --git a/final/lib/Target/MBlaze/MBlazeInstrFSL.td b/final/lib/Target/MBlaze/MBlazeInstrFSL.td
new file mode 100644
index 00000000000..32098452416
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -0,0 +1,229 @@
+//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FSL Instruction Formats
+//===----------------------------------------------------------------------===//
+class FSLGet<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
+             MBlazeInst<op, FRCX, (outs GPR:$dst), (ins fslimm:$b),
+                        !strconcat(instr_asm, " $dst, $b"),
+                        [(set GPR:$dst, (OpNode immZExt4:$b))],IIAlu>
+{
+    bits<5> rd;
+    bits<4> fslno;
+
+    let Inst{6-10}  = rd;
+    let Inst{11-15} = 0x0;
+    let Inst{16}    = 0x0;
+    let Inst{17-21} = flags; // NCTAE
+    let Inst{22-27} = 0x0;
+    let Inst{28-31} = fslno;
+}
+
+class FSLGetD<bits<6> op, bits<5> flags, string instr_asm, Intrinsic OpNode> :
+              MBlazeInst<op, FRCR, (outs GPR:$dst), (ins GPR:$b),
+                         !strconcat(instr_asm, " $dst, $b"),
+                         [(set GPR:$dst, (OpNode GPR:$b))], IIAlu>
+{
+    bits<5> rd;
+    bits<5> rb;
+
+    let Inst{6-10}  = rd;
+    let Inst{11-15} = 0x0;
+    let Inst{16-20} = rb;
+    let Inst{21}    = 0x0;
+    let Inst{22-26} = flags; // NCTAE
+    let Inst{27-31} = 0x0;
+}
+
+class FSLPut<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+             MBlazeInst<op, FCRCX, (outs), (ins GPR:$v, fslimm:$b),
+                        !strconcat(instr_asm, " $v, $b"),
+                        [(OpNode GPR:$v, immZExt4:$b)], IIAlu>
+{
+    bits<5> ra;
+    bits<4> fslno;
+
+    let Inst{6-10}  = 0x0;
+    let Inst{11-15} = ra;
+    let Inst{16}    = 0x1;
+    let Inst{17-20} = flags; // NCTA
+    let Inst{21-27} = 0x0;
+    let Inst{28-31} = fslno;
+}
+
+class FSLPutD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+              MBlazeInst<op, FCRR, (outs), (ins GPR:$v, GPR:$b),
+                         !strconcat(instr_asm, " $v, $b"),
+                         [(OpNode GPR:$v, GPR:$b)], IIAlu>
+{
+    bits<5> ra;
+    bits<5> rb;
+
+    let Inst{6-10}  = 0x0;
+    let Inst{11-15} = ra;
+    let Inst{16-20} = rb;
+    let Inst{21}    = 0x1;
+    let Inst{22-25} = flags; // NCTA
+    let Inst{26-31} = 0x0;
+}
+
+class FSLPutT<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+              MBlazeInst<op, FCX, (outs), (ins fslimm:$b),
+                         !strconcat(instr_asm, " $b"),
+                         [(OpNode immZExt4:$b)], IIAlu>
+{
+    bits<4> fslno;
+
+    let Inst{6-10}  = 0x0;
+    let Inst{11-15} = 0x0;
+    let Inst{16}    = 0x1;
+    let Inst{17-20} = flags; // NCTA
+    let Inst{21-27} = 0x0;
+    let Inst{28-31} = fslno;
+}
+
+class FSLPutTD<bits<6> op, bits<4> flags, string instr_asm, Intrinsic OpNode> :
+               MBlazeInst<op, FCR, (outs), (ins GPR:$b),
+                          !strconcat(instr_asm, " $b"),
+                          [(OpNode GPR:$b)], IIAlu>
+{
+    bits<5> rb;
+
+    let Inst{6-10}  = 0x0;
+    let Inst{11-15} = 0x0;
+    let Inst{16-20} = rb;
+    let Inst{21}    = 0x1;
+    let Inst{22-25} = flags; // NCTA
+    let Inst{26-31} = 0x0;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Get Instructions
+//===----------------------------------------------------------------------===//
+def GET      : FSLGet<0x1B, 0x00, "get      ", int_mblaze_fsl_get>;
+def AGET     : FSLGet<0x1B, 0x02, "aget     ", int_mblaze_fsl_aget>;
+def CGET     : FSLGet<0x1B, 0x08, "cget     ", int_mblaze_fsl_cget>;
+def CAGET    : FSLGet<0x1B, 0x0A, "caget    ", int_mblaze_fsl_caget>;
+def EGET     : FSLGet<0x1B, 0x01, "eget     ", int_mblaze_fsl_eget>;
+def EAGET    : FSLGet<0x1B, 0x03, "eaget    ", int_mblaze_fsl_eaget>;
+def ECGET    : FSLGet<0x1B, 0x09, "ecget    ", int_mblaze_fsl_ecget>;
+def ECAGET   : FSLGet<0x1B, 0x0B, "ecaget   ", int_mblaze_fsl_ecaget>;
+def TGET     : FSLGet<0x1B, 0x04, "tget     ", int_mblaze_fsl_tget>;
+def TAGET    : FSLGet<0x1B, 0x06, "taget    ", int_mblaze_fsl_taget>;
+def TCGET    : FSLGet<0x1B, 0x0C, "tcget    ", int_mblaze_fsl_tcget>;
+def TCAGET   : FSLGet<0x1B, 0x0E, "tcaget   ", int_mblaze_fsl_tcaget>;
+def TEGET    : FSLGet<0x1B, 0x05, "teget    ", int_mblaze_fsl_teget>;
+def TEAGET   : FSLGet<0x1B, 0x07, "teaget   ", int_mblaze_fsl_teaget>;
+def TECGET   : FSLGet<0x1B, 0x0D, "tecget   ", int_mblaze_fsl_tecget>;
+def TECAGET  : FSLGet<0x1B, 0x0F, "tecaget  ", int_mblaze_fsl_tecaget>;
+
+let Defs = [CARRY] in {
+  def NGET     : FSLGet<0x1B, 0x10, "nget     ", int_mblaze_fsl_nget>;
+  def NAGET    : FSLGet<0x1B, 0x12, "naget    ", int_mblaze_fsl_naget>;
+  def NCGET    : FSLGet<0x1B, 0x18, "ncget    ", int_mblaze_fsl_ncget>;
+  def NCAGET   : FSLGet<0x1B, 0x1A, "ncaget   ", int_mblaze_fsl_ncaget>;
+  def NEGET    : FSLGet<0x1B, 0x11, "neget    ", int_mblaze_fsl_neget>;
+  def NEAGET   : FSLGet<0x1B, 0x13, "neaget   ", int_mblaze_fsl_neaget>;
+  def NECGET   : FSLGet<0x1B, 0x19, "necget   ", int_mblaze_fsl_necget>;
+  def NECAGET  : FSLGet<0x1B, 0x1B, "necaget  ", int_mblaze_fsl_necaget>;
+  def TNGET    : FSLGet<0x1B, 0x14, "tnget    ", int_mblaze_fsl_tnget>;
+  def TNAGET   : FSLGet<0x1B, 0x16, "tnaget   ", int_mblaze_fsl_tnaget>;
+  def TNCGET   : FSLGet<0x1B, 0x1C, "tncget   ", int_mblaze_fsl_tncget>;
+  def TNCAGET  : FSLGet<0x1B, 0x1E, "tncaget  ", int_mblaze_fsl_tncaget>;
+  def TNEGET   : FSLGet<0x1B, 0x15, "tneget   ", int_mblaze_fsl_tneget>;
+  def TNEAGET  : FSLGet<0x1B, 0x17, "tneaget  ", int_mblaze_fsl_tneaget>;
+  def TNECGET  : FSLGet<0x1B, 0x1D, "tnecget  ", int_mblaze_fsl_tnecget>;
+  def TNECAGET : FSLGet<0x1B, 0x1F, "tnecaget ", int_mblaze_fsl_tnecaget>;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Dynamic Get Instructions
+//===----------------------------------------------------------------------===//
+def GETD      : FSLGetD<0x13, 0x00, "getd     ", int_mblaze_fsl_get>;
+def AGETD     : FSLGetD<0x13, 0x02, "agetd    ", int_mblaze_fsl_aget>;
+def CGETD     : FSLGetD<0x13, 0x08, "cgetd    ", int_mblaze_fsl_cget>;
+def CAGETD    : FSLGetD<0x13, 0x0A, "cagetd   ", int_mblaze_fsl_caget>;
+def EGETD     : FSLGetD<0x13, 0x01, "egetd    ", int_mblaze_fsl_eget>;
+def EAGETD    : FSLGetD<0x13, 0x03, "eagetd   ", int_mblaze_fsl_eaget>;
+def ECGETD    : FSLGetD<0x13, 0x09, "ecgetd   ", int_mblaze_fsl_ecget>;
+def ECAGETD   : FSLGetD<0x13, 0x0B, "ecagetd  ", int_mblaze_fsl_ecaget>;
+def TGETD     : FSLGetD<0x13, 0x04, "tgetd    ", int_mblaze_fsl_tget>;
+def TAGETD    : FSLGetD<0x13, 0x06, "tagetd   ", int_mblaze_fsl_taget>;
+def TCGETD    : FSLGetD<0x13, 0x0C, "tcgetd   ", int_mblaze_fsl_tcget>;
+def TCAGETD   : FSLGetD<0x13, 0x0E, "tcagetd  ", int_mblaze_fsl_tcaget>;
+def TEGETD    : FSLGetD<0x13, 0x05, "tegetd   ", int_mblaze_fsl_teget>;
+def TEAGETD   : FSLGetD<0x13, 0x07, "teagetd  ", int_mblaze_fsl_teaget>;
+def TECGETD   : FSLGetD<0x13, 0x0D, "tecgetd  ", int_mblaze_fsl_tecget>;
+def TECAGETD  : FSLGetD<0x13, 0x0F, "tecagetd ", int_mblaze_fsl_tecaget>;
+
+let Defs = [CARRY] in {
+  def NGETD     : FSLGetD<0x13, 0x10, "ngetd    ", int_mblaze_fsl_nget>;
+  def NAGETD    : FSLGetD<0x13, 0x12, "nagetd   ", int_mblaze_fsl_naget>;
+  def NCGETD    : FSLGetD<0x13, 0x18, "ncgetd   ", int_mblaze_fsl_ncget>;
+  def NCAGETD   : FSLGetD<0x13, 0x1A, "ncagetd  ", int_mblaze_fsl_ncaget>;
+  def NEGETD    : FSLGetD<0x13, 0x11, "negetd   ", int_mblaze_fsl_neget>;
+  def NEAGETD   : FSLGetD<0x13, 0x13, "neagetd  ", int_mblaze_fsl_neaget>;
+  def NECGETD   : FSLGetD<0x13, 0x19, "necgetd  ", int_mblaze_fsl_necget>;
+  def NECAGETD  : FSLGetD<0x13, 0x1B, "necagetd ", int_mblaze_fsl_necaget>;
+  def TNGETD    : FSLGetD<0x13, 0x14, "tngetd   ", int_mblaze_fsl_tnget>;
+  def TNAGETD   : FSLGetD<0x13, 0x16, "tnagetd  ", int_mblaze_fsl_tnaget>;
+  def TNCGETD   : FSLGetD<0x13, 0x1C, "tncgetd  ", int_mblaze_fsl_tncget>;
+  def TNCAGETD  : FSLGetD<0x13, 0x1E, "tncagetd ", int_mblaze_fsl_tncaget>;
+  def TNEGETD   : FSLGetD<0x13, 0x15, "tnegetd  ", int_mblaze_fsl_tneget>;
+  def TNEAGETD  : FSLGetD<0x13, 0x17, "tneagetd ", int_mblaze_fsl_tneaget>;
+  def TNECGETD  : FSLGetD<0x13, 0x1D, "tnecgetd ", int_mblaze_fsl_tnecget>;
+  def TNECAGETD : FSLGetD<0x13, 0x1F, "tnecagetd", int_mblaze_fsl_tnecaget>;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Put Instructions
+//===----------------------------------------------------------------------===//
+def PUT     :  FSLPut<0x1B, 0x0, "put      ", int_mblaze_fsl_put>;
+def APUT    :  FSLPut<0x1B, 0x1, "aput     ", int_mblaze_fsl_aput>;
+def CPUT    :  FSLPut<0x1B, 0x4, "cput     ", int_mblaze_fsl_cput>;
+def CAPUT   :  FSLPut<0x1B, 0x5, "caput    ", int_mblaze_fsl_caput>;
+def TPUT    : FSLPutT<0x1B, 0x2, "tput     ", int_mblaze_fsl_tput>;
+def TAPUT   : FSLPutT<0x1B, 0x3, "taput    ", int_mblaze_fsl_taput>;
+def TCPUT   : FSLPutT<0x1B, 0x6, "tcput    ", int_mblaze_fsl_tcput>;
+def TCAPUT  : FSLPutT<0x1B, 0x7, "tcaput   ", int_mblaze_fsl_tcaput>;
+
+let Defs = [CARRY] in {
+  def NPUT    :  FSLPut<0x1B, 0x8, "nput     ", int_mblaze_fsl_nput>;
+  def NAPUT   :  FSLPut<0x1B, 0x9, "naput    ", int_mblaze_fsl_naput>;
+  def NCPUT   :  FSLPut<0x1B, 0xC, "ncput    ", int_mblaze_fsl_ncput>;
+  def NCAPUT  :  FSLPut<0x1B, 0xD, "ncaput   ", int_mblaze_fsl_ncaput>;
+  def TNPUT   : FSLPutT<0x1B, 0xA, "tnput    ", int_mblaze_fsl_tnput>;
+  def TNAPUT  : FSLPutT<0x1B, 0xB, "tnaput   ", int_mblaze_fsl_tnaput>;
+  def TNCPUT  : FSLPutT<0x1B, 0xE, "tncput   ", int_mblaze_fsl_tncput>;
+  def TNCAPUT : FSLPutT<0x1B, 0xF, "tncaput  ", int_mblaze_fsl_tncaput>;
+}
+
+//===----------------------------------------------------------------------===//
+// FSL Dynamic Put Instructions
+//===----------------------------------------------------------------------===//
+def PUTD     :  FSLPutD<0x13, 0x0, "putd     ", int_mblaze_fsl_put>;
+def APUTD    :  FSLPutD<0x13, 0x1, "aputd    ", int_mblaze_fsl_aput>;
+def CPUTD    :  FSLPutD<0x13, 0x4, "cputd    ", int_mblaze_fsl_cput>;
+def CAPUTD   :  FSLPutD<0x13, 0x5, "caputd   ", int_mblaze_fsl_caput>;
+def TPUTD    : FSLPutTD<0x13, 0x2, "tputd    ", int_mblaze_fsl_tput>;
+def TAPUTD   : FSLPutTD<0x13, 0x3, "taputd   ", int_mblaze_fsl_taput>;
+def TCPUTD   : FSLPutTD<0x13, 0x6, "tcputd   ", int_mblaze_fsl_tcput>;
+def TCAPUTD  : FSLPutTD<0x13, 0x7, "tcaputd  ", int_mblaze_fsl_tcaput>;
+
+let Defs = [CARRY] in {
+  def NPUTD    :  FSLPutD<0x13, 0x8, "nputd    ", int_mblaze_fsl_nput>;
+  def NAPUTD   :  FSLPutD<0x13, 0x9, "naputd   ", int_mblaze_fsl_naput>;
+  def NCPUTD   :  FSLPutD<0x13, 0xC, "ncputd   ", int_mblaze_fsl_ncput>;
+  def NCAPUTD  :  FSLPutD<0x13, 0xD, "ncaputd  ", int_mblaze_fsl_ncaput>;
+  def TNPUTD   : FSLPutTD<0x13, 0xA, "tnputd   ", int_mblaze_fsl_tnput>;
+  def TNAPUTD  : FSLPutTD<0x13, 0xB, "tnaputd  ", int_mblaze_fsl_tnaput>;
+  def TNCPUTD  : FSLPutTD<0x13, 0xE, "tncputd  ", int_mblaze_fsl_tncput>;
+  def TNCAPUTD : FSLPutTD<0x13, 0xF, "tncaputd ", int_mblaze_fsl_tncaput>;
+}
diff --git a/final/lib/Target/MBlaze/MBlazeInstrFormats.td b/final/lib/Target/MBlaze/MBlazeInstrFormats.td
new file mode 100644
index 00000000000..d62574d0ede
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -0,0 +1,204 @@
+//===- MBlazeInstrFormats.td - MB Instruction defs ---------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Format specifies the encoding used by the instruction.  This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+      bits<6> Value = val;
+}
+
+def FPseudo : Format<0>;
+def FRRR    : Format<1>;  // ADD, OR, etc.
+def FRRI    : Format<2>;  // ADDI, ORI, etc.
+def FCRR    : Format<3>;  // PUTD, WDC, WIC, BEQ, BNE, BGE, etc.
+def FCRI    : Format<4>;  // RTID, RTED, RTSD, BEQI, BNEI, BGEI, etc.
+def FRCR    : Format<5>;  // BRLD, BRALD, GETD
+def FRCI    : Format<6>;  // BRLID, BRALID, MSRCLR, MSRSET
+def FCCR    : Format<7>;  // BR, BRA, BRD, etc.
+def FCCI    : Format<8>;  // IMM, BRI, BRAI, BRID, etc.
+def FRRCI   : Format<9>;  // BSRLI, BSRAI, BSLLI
+def FRRC    : Format<10>; // SEXT8, SEXT16, SRA, SRC, SRL, FLT, FINT, FSQRT
+def FRCX    : Format<11>; // GET
+def FRCS    : Format<12>; // MFS
+def FCRCS   : Format<13>; // MTS
+def FCRCX   : Format<14>; // PUT
+def FCX     : Format<15>; // TPUT
+def FCR     : Format<16>; // TPUTD
+def FRIR    : Format<17>; // RSUBI
+def FRRRR   : Format<18>; // RSUB, FRSUB
+def FRI     : Format<19>; // RSUB, FRSUB
+def FC      : Format<20>; // NOP
+
+//===----------------------------------------------------------------------===//
+//  Describe MBlaze instructions format
+//
+//  CPU INSTRUCTION FORMATS
+//
+//  opcode  - operation code.
+//  rd      - dst reg.
+//  ra      - first src. reg.
+//  rb      - second src. reg.
+//  imm16   - 16-bit immediate value.
+//
+//===----------------------------------------------------------------------===//
+
+// Generic MBlaze Format
+class MBlazeInst<bits<6> op, Format form, dag outs, dag ins, string asmstr,
+                 list<dag> pattern, InstrItinClass itin> : Instruction {
+  let Namespace = "MBlaze";
+  field bits<32> Inst;
+
+  bits<6> opcode = op;
+  Format Form = form;
+  bits<6> FormBits = Form.Value;
+
+  // Top 6 bits are the 'opcode' field
+  let Inst{0-5} = opcode;
+
+  // If the instruction is marked as a pseudo, set isCodeGenOnly so that the
+  // assembler and disassmbler ignore it.
+  let isCodeGenOnly = !eq(!cast<string>(form), "FPseudo");
+
+  dag OutOperandList = outs;
+  dag InOperandList  = ins;
+
+  let AsmString   = asmstr;
+  let Pattern     = pattern;
+  let Itinerary   = itin;
+
+  // TSFlags layout should be kept in sync with MBlazeInstrInfo.h.
+  let TSFlags{5-0}   = FormBits;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instruction class
+//===----------------------------------------------------------------------===//
+class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
+      MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIPseudo>;
+
+//===----------------------------------------------------------------------===//
+// Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|>
+//===----------------------------------------------------------------------===//
+
+class TA<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+         list<dag> pattern, InstrItinClass itin> :
+         MBlazeInst<op,FRRR,outs, ins, asmstr, pattern, itin>
+{
+  bits<5> rd;
+  bits<5> ra;
+  bits<5> rb;
+
+  let Inst{6-10}  = rd;
+  let Inst{11-15} = ra;
+  let Inst{16-20} = rb;
+  let Inst{21-31} = flags;
+}
+
+//===----------------------------------------------------------------------===//
+// Type B instruction class in MBlaze : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+
+class TB<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+         InstrItinClass itin> :
+         MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin>
+{
+  bits<5>  rd;
+  bits<5>  ra;
+  bits<16> imm16;
+
+  let Inst{6-10}  = rd;
+  let Inst{11-15} = ra;
+  let Inst{16-31} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Type A instruction class in MBlaze but with the operands reversed
+// in the LLVM DAG : <|opcode|rd|ra|rb|flags|>
+//===----------------------------------------------------------------------===//
+
+class TAR<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+          list<dag> pattern, InstrItinClass itin> :
+          TA<op, flags, outs, ins, asmstr, pattern, itin>
+{
+  bits<5> rrd;
+  bits<5> rrb;
+  bits<5> rra;
+
+  let Form = FRRRR;
+
+  let rd = rrd;
+  let ra = rra;
+  let rb = rrb;
+}
+
+//===----------------------------------------------------------------------===//
+// Type B instruction class in MBlaze but with the operands reversed in
+// the LLVM DAG : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+class TBR<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+         InstrItinClass itin> :
+         TB<op, outs, ins, asmstr, pattern, itin> {
+  bits<5>  rrd;
+  bits<16> rimm16;
+  bits<5>  rra;
+
+  let Form = FRIR;
+
+  let rd = rrd;
+  let ra = rra;
+  let imm16 = rimm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Shift immediate instruction class in MBlaze : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+class SHT<bits<6> op, bits<2> flags, dag outs, dag ins, string asmstr,
+          list<dag> pattern, InstrItinClass itin> :
+          MBlazeInst<op, FRRI, outs, ins, asmstr, pattern, itin> {
+  bits<5>  rd;
+  bits<5>  ra;
+  bits<5>  imm5;
+
+  let Inst{6-10}  = rd;
+  let Inst{11-15} = ra;
+  let Inst{16-20} = 0x0;
+  let Inst{21-22} = flags;
+  let Inst{23-26} = 0x0;
+  let Inst{27-31} = imm5;
+}
+
+//===----------------------------------------------------------------------===//
+// Special instruction class in MBlaze : <|opcode|rd|imm14|>
+//===----------------------------------------------------------------------===//
+class SPC<bits<6> op, bits<2> flags, dag outs, dag ins, string asmstr,
+          list<dag> pattern, InstrItinClass itin> :
+          MBlazeInst<op, FRI, outs, ins, asmstr, pattern, itin> {
+  bits<5>  rd;
+  bits<14> imm14;
+
+  let Inst{6-10}  = rd;
+  let Inst{11-15} = 0x0;
+  let Inst{16-17} = flags;
+  let Inst{18-31} = imm14;
+}
+
+//===----------------------------------------------------------------------===//
+// MSR instruction class in MBlaze : <|opcode|rd|imm15|>
+//===----------------------------------------------------------------------===//
+class MSR<bits<6> op, bits<6> flags, dag outs, dag ins, string asmstr,
+          list<dag> pattern, InstrItinClass itin> :
+          MBlazeInst<op, FRI, outs, ins, asmstr, pattern, itin> {
+  bits<5>  rd;
+  bits<15> imm15;
+
+  let Inst{6-10}  = rd;
+  let Inst{11-16} = flags;
+  let Inst{17-31} = imm15;
+}
diff --git a/final/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/final/lib/Target/MBlaze/MBlazeInstrInfo.cpp
new file mode 100644
index 00000000000..b353dcdef05
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -0,0 +1,291 @@
+//===- MBlazeInstrInfo.cpp - MBlaze Instruction Information -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeInstrInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "MBlazeGenInstrInfo.inc"
+
+using namespace llvm;
+
+MBlazeInstrInfo::MBlazeInstrInfo(MBlazeTargetMachine &tm)
+  : TargetInstrInfoImpl(MBlazeInsts, array_lengthof(MBlazeInsts)),
+    TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+static bool isZeroImm(const MachineOperand &op) {
+  return op.isImm() && op.getImm() == 0;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MBlazeInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const {
+  if (MI->getOpcode() == MBlaze::LWI) {
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2)))) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+
+  return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MBlazeInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const {
+  if (MI->getOpcode() == MBlaze::SWI) {
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2)))) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+  return 0;
+}
+
+/// insertNoop - If data hazard condition is found insert the target nop
+/// instruction.
+void MBlazeInstrInfo::
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
+  DebugLoc DL;
+  BuildMI(MBB, MI, DL, get(MBlaze::NOP));
+}
+
+void MBlazeInstrInfo::
+copyPhysReg(MachineBasicBlock &MBB,
+            MachineBasicBlock::iterator I, DebugLoc DL,
+            unsigned DestReg, unsigned SrcReg,
+            bool KillSrc) const {
+  llvm::BuildMI(MBB, I, DL, get(MBlaze::ADDK), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc)).addReg(MBlaze::R0);
+}
+
+void MBlazeInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  BuildMI(MBB, I, DL, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill))
+    .addFrameIndex(FI).addImm(0); //.addFrameIndex(FI);
+}
+
+void MBlazeInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  BuildMI(MBB, I, DL, get(MBlaze::LWI), DestReg)
+      .addFrameIndex(FI).addImm(0); //.addFrameIndex(FI);
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+bool MBlazeInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                    MachineBasicBlock *&TBB,
+                                    MachineBasicBlock *&FBB,
+                                    SmallVectorImpl<MachineOperand> &Cond,
+                                    bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  unsigned LastOpc = LastInst->getOpcode();
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (MBlaze::isUncondBranchOpcode(LastOpc)) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    if (MBlaze::isCondBranchOpcode(LastOpc)) {
+      // Block ends with fall-through condbranch.
+      TBB = LastInst->getOperand(1).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
+      Cond.push_back(LastInst->getOperand(0));
+      return false;
+    }
+    // Otherwise, don't know what this is.
+    return true;
+  }
+
+  // Get the instruction before it if it's a terminator.
+  MachineInstr *SecondLastInst = I;
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+    return true;
+
+  // If the block ends with something like BEQID then BRID, handle it.
+  if (MBlaze::isCondBranchOpcode(SecondLastInst->getOpcode()) &&
+      MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) {
+    TBB = SecondLastInst->getOperand(1).getMBB();
+    Cond.push_back(MachineOperand::CreateImm(SecondLastInst->getOpcode()));
+    Cond.push_back(SecondLastInst->getOperand(0));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with two unconditional branches, handle it.
+  // The second one is not executed, so remove it.
+  if (MBlaze::isUncondBranchOpcode(SecondLastInst->getOpcode()) &&
+      MBlaze::isUncondBranchOpcode(LastInst->getOpcode())) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+unsigned MBlazeInstrInfo::
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+             MachineBasicBlock *FBB,
+             const SmallVectorImpl<MachineOperand> &Cond,
+             DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 2 || Cond.size() == 0) &&
+         "MBlaze branch conditions have two components!");
+
+  unsigned Opc = MBlaze::BRID;
+  if (!Cond.empty())
+    Opc = (unsigned)Cond[0].getImm();
+
+  if (FBB == 0) {
+    if (Cond.empty()) // Unconditional branch
+      BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
+    else              // Conditional branch
+      BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()).addMBB(TBB);
+    return 1;
+  }
+
+  BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()).addMBB(TBB);
+  BuildMI(&MBB, DL, get(MBlaze::BRID)).addMBB(FBB);
+  return 2;
+}
+
+unsigned MBlazeInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+
+  if (!MBlaze::isUncondBranchOpcode(I->getOpcode()) &&
+      !MBlaze::isCondBranchOpcode(I->getOpcode()))
+    return 0;
+
+  // Remove the branch.
+  I->eraseFromParent();
+
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (!MBlaze::isCondBranchOpcode(I->getOpcode()))
+    return 1;
+
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+bool MBlazeInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 2 && "Invalid MBlaze branch opcode!");
+  switch (Cond[0].getImm()) {
+  default:            return true;
+  case MBlaze::BEQ:   Cond[0].setImm(MBlaze::BNE); return false;
+  case MBlaze::BNE:   Cond[0].setImm(MBlaze::BEQ); return false;
+  case MBlaze::BGT:   Cond[0].setImm(MBlaze::BLE); return false;
+  case MBlaze::BGE:   Cond[0].setImm(MBlaze::BLT); return false;
+  case MBlaze::BLT:   Cond[0].setImm(MBlaze::BGE); return false;
+  case MBlaze::BLE:   Cond[0].setImm(MBlaze::BGT); return false;
+  case MBlaze::BEQI:  Cond[0].setImm(MBlaze::BNEI); return false;
+  case MBlaze::BNEI:  Cond[0].setImm(MBlaze::BEQI); return false;
+  case MBlaze::BGTI:  Cond[0].setImm(MBlaze::BLEI); return false;
+  case MBlaze::BGEI:  Cond[0].setImm(MBlaze::BLTI); return false;
+  case MBlaze::BLTI:  Cond[0].setImm(MBlaze::BGEI); return false;
+  case MBlaze::BLEI:  Cond[0].setImm(MBlaze::BGTI); return false;
+  case MBlaze::BEQD:  Cond[0].setImm(MBlaze::BNED); return false;
+  case MBlaze::BNED:  Cond[0].setImm(MBlaze::BEQD); return false;
+  case MBlaze::BGTD:  Cond[0].setImm(MBlaze::BLED); return false;
+  case MBlaze::BGED:  Cond[0].setImm(MBlaze::BLTD); return false;
+  case MBlaze::BLTD:  Cond[0].setImm(MBlaze::BGED); return false;
+  case MBlaze::BLED:  Cond[0].setImm(MBlaze::BGTD); return false;
+  case MBlaze::BEQID: Cond[0].setImm(MBlaze::BNEID); return false;
+  case MBlaze::BNEID: Cond[0].setImm(MBlaze::BEQID); return false;
+  case MBlaze::BGTID: Cond[0].setImm(MBlaze::BLEID); return false;
+  case MBlaze::BGEID: Cond[0].setImm(MBlaze::BLTID); return false;
+  case MBlaze::BLTID: Cond[0].setImm(MBlaze::BGEID); return false;
+  case MBlaze::BLEID: Cond[0].setImm(MBlaze::BGTID); return false;
+  }
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+  MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>();
+  unsigned GlobalBaseReg = MBlazeFI->getGlobalBaseReg();
+  if (GlobalBaseReg != 0)
+    return GlobalBaseReg;
+
+  // Insert the set of GlobalBaseReg into the first MBB of the function
+  MachineBasicBlock &FirstMBB = MF->front();
+  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+  GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::GPRRegisterClass);
+  BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+          GlobalBaseReg).addReg(MBlaze::R20);
+  RegInfo.addLiveIn(MBlaze::R20);
+
+  MBlazeFI->setGlobalBaseReg(GlobalBaseReg);
+  return GlobalBaseReg;
+}
diff --git a/final/lib/Target/MBlaze/MBlazeInstrInfo.h b/final/lib/Target/MBlaze/MBlazeInstrInfo.h
new file mode 100644
index 00000000000..b7300c14080
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -0,0 +1,294 @@
+//===- MBlazeInstrInfo.h - MBlaze Instruction Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEINSTRUCTIONINFO_H
+#define MBLAZEINSTRUCTIONINFO_H
+
+#include "MBlaze.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MBlazeRegisterInfo.h"
+
+namespace llvm {
+
+namespace MBlaze {
+
+  // MBlaze Branch Codes
+  enum FPBranchCode {
+    BRANCH_F,
+    BRANCH_T,
+    BRANCH_FL,
+    BRANCH_TL,
+    BRANCH_INVALID
+  };
+
+  // MBlaze Condition Codes
+  enum CondCode {
+    // To be used with float branch True
+    FCOND_F,
+    FCOND_UN,
+    FCOND_EQ,
+    FCOND_UEQ,
+    FCOND_OLT,
+    FCOND_ULT,
+    FCOND_OLE,
+    FCOND_ULE,
+    FCOND_SF,
+    FCOND_NGLE,
+    FCOND_SEQ,
+    FCOND_NGL,
+    FCOND_LT,
+    FCOND_NGE,
+    FCOND_LE,
+    FCOND_NGT,
+
+    // To be used with float branch False
+    // This conditions have the same mnemonic as the
+    // above ones, but are used with a branch False;
+    FCOND_T,
+    FCOND_OR,
+    FCOND_NEQ,
+    FCOND_OGL,
+    FCOND_UGE,
+    FCOND_OGE,
+    FCOND_UGT,
+    FCOND_OGT,
+    FCOND_ST,
+    FCOND_GLE,
+    FCOND_SNE,
+    FCOND_GL,
+    FCOND_NLT,
+    FCOND_GE,
+    FCOND_NLE,
+    FCOND_GT,
+
+    // Only integer conditions
+    COND_EQ,
+    COND_GT,
+    COND_GE,
+    COND_LT,
+    COND_LE,
+    COND_NE,
+    COND_INVALID
+  };
+
+  // Turn condition code into conditional branch opcode.
+  inline static unsigned GetCondBranchFromCond(CondCode CC) {
+    switch (CC) {
+    default: llvm_unreachable("Unknown condition code");
+    case COND_EQ: return MBlaze::BEQID;
+    case COND_NE: return MBlaze::BNEID;
+    case COND_GT: return MBlaze::BGTID;
+    case COND_GE: return MBlaze::BGEID;
+    case COND_LT: return MBlaze::BLTID;
+    case COND_LE: return MBlaze::BLEID;
+    }
+  }
+
+  /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+  /// e.g. turning COND_E to COND_NE.
+  // CondCode GetOppositeBranchCondition(MBlaze::CondCode CC);
+
+  /// MBlazeCCToString - Map each FP condition code to its string
+  inline static const char *MBlazeFCCToString(MBlaze::CondCode CC) {
+    switch (CC) {
+    default: llvm_unreachable("Unknown condition code");
+    case FCOND_F:
+    case FCOND_T:   return "f";
+    case FCOND_UN:
+    case FCOND_OR:  return "un";
+    case FCOND_EQ:
+    case FCOND_NEQ: return "eq";
+    case FCOND_UEQ:
+    case FCOND_OGL: return "ueq";
+    case FCOND_OLT:
+    case FCOND_UGE: return "olt";
+    case FCOND_ULT:
+    case FCOND_OGE: return "ult";
+    case FCOND_OLE:
+    case FCOND_UGT: return "ole";
+    case FCOND_ULE:
+    case FCOND_OGT: return "ule";
+    case FCOND_SF:
+    case FCOND_ST:  return "sf";
+    case FCOND_NGLE:
+    case FCOND_GLE: return "ngle";
+    case FCOND_SEQ:
+    case FCOND_SNE: return "seq";
+    case FCOND_NGL:
+    case FCOND_GL:  return "ngl";
+    case FCOND_LT:
+    case FCOND_NLT: return "lt";
+    case FCOND_NGE:
+    case FCOND_GE:  return "ge";
+    case FCOND_LE:
+    case FCOND_NLE: return "nle";
+    case FCOND_NGT:
+    case FCOND_GT:  return "gt";
+    }
+  }
+
+  inline static bool isUncondBranchOpcode(int Opc) {
+    switch (Opc) {
+    default: return false;
+    case MBlaze::BRI:
+    case MBlaze::BRAI:
+    case MBlaze::BRID:
+    case MBlaze::BRAID:
+      return true;
+    }
+  }
+
+  inline static bool isCondBranchOpcode(int Opc) {
+    switch (Opc) {
+    default: return false;
+    case MBlaze::BEQI: case MBlaze::BEQID:
+    case MBlaze::BNEI: case MBlaze::BNEID:
+    case MBlaze::BGTI: case MBlaze::BGTID:
+    case MBlaze::BGEI: case MBlaze::BGEID:
+    case MBlaze::BLTI: case MBlaze::BLTID:
+    case MBlaze::BLEI: case MBlaze::BLEID:
+      return true;
+    }
+  }
+}
+
+/// MBlazeII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MBlazeII {
+  enum {
+    // PseudoFrm - This represents an instruction that is a pseudo instruction
+    // or one that has not been implemented yet.  It is illegal to code generate
+    // it, but tolerated for intermediate implementation stages.
+    FPseudo = 0,
+    FRRR,
+    FRRI,
+    FCRR,
+    FCRI,
+    FRCR,
+    FRCI,
+    FCCR,
+    FCCI,
+    FRRCI,
+    FRRC,
+    FRCX,
+    FRCS,
+    FCRCS,
+    FCRCX,
+    FCX,
+    FCR,
+    FRIR,
+    FRRRR,
+    FRI,
+    FC,
+    FormMask = 63
+
+    //===------------------------------------------------------------------===//
+    // MBlaze Specific MachineOperand flags.
+    // MO_NO_FLAG,
+
+    /// MO_GOT - Represents the offset into the global offset table at which
+    /// the address the relocation entry symbol resides during execution.
+    // MO_GOT,
+
+    /// MO_GOT_CALL - Represents the offset into the global offset table at
+    /// which the address of a call site relocation entry symbol resides
+    /// during execution. This is different from the above since this flag
+    /// can only be present in call instructions.
+    // MO_GOT_CALL,
+
+    /// MO_GPREL - Represents the offset from the current gp value to be used
+    /// for the relocatable object file being produced.
+    // MO_GPREL,
+
+    /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
+    /// address.
+    // MO_ABS_HILO
+
+  };
+}
+
+class MBlazeInstrInfo : public TargetInstrInfoImpl {
+  MBlazeTargetMachine &TM;
+  const MBlazeRegisterInfo RI;
+public:
+  explicit MBlazeInstrInfo(MBlazeTargetMachine &TM);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const MBlazeRegisterInfo &getRegisterInfo() const { return RI; }
+
+  /// isLoadFromStackSlot - If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+  /// Branch Analysis
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+  virtual bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+    const;
+
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  /// Insert nop instruction when hazard condition is found
+  virtual void insertNoop(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI) const;
+
+  /// getGlobalBaseReg - Return a virtual register initialized with the
+  /// the global base register value. Output instructions required to
+  /// initialize the register in the function entry block, if necessary.
+  ///
+  unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/MBlaze/MBlazeInstrInfo.td b/final/lib/Target/MBlaze/MBlazeInstrInfo.td
new file mode 100644
index 00000000000..7b8f70a3043
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -0,0 +1,881 @@
+//===- MBlazeInstrInfo.td - MBlaze Instruction defs --------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+include "MBlazeInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// MBlaze type profiles
+//===----------------------------------------------------------------------===//
+
+// def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>;
+def SDT_MBlazeRet     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MBlazeIRet    : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MBlazeJmpLink : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze specific nodes
+//===----------------------------------------------------------------------===//
+
+def MBlazeRet     : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
+                           [SDNPHasChain, SDNPOptInGlue]>;
+def MBlazeIRet    : SDNode<"MBlazeISD::IRet", SDT_MBlazeIRet,
+                           [SDNPHasChain, SDNPOptInGlue]>;
+
+def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
+                           [SDNPHasChain,SDNPOptInGlue,SDNPOutGlue,
+                            SDNPVariadic]>;
+
+def MBWrapper   : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MBCallSeqStart,
+                           [SDNPHasChain, SDNPOutGlue]>;
+
+def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasPipe3     : Predicate<"Subtarget.hasPipe3()">;
+def HasBarrel    : Predicate<"Subtarget.hasBarrel()">;
+def NoBarrel     : Predicate<"!Subtarget.hasBarrel()">;
+def HasDiv       : Predicate<"Subtarget.hasDiv()">;
+def HasMul       : Predicate<"Subtarget.hasMul()">;
+def HasFSL       : Predicate<"Subtarget.hasFSL()">;
+def HasEFSL      : Predicate<"Subtarget.hasEFSL()">;
+def HasMSRSet    : Predicate<"Subtarget.hasMSRSet()">;
+def HasException : Predicate<"Subtarget.hasException()">;
+def HasPatCmp    : Predicate<"Subtarget.hasPatCmp()">;
+def HasFPU       : Predicate<"Subtarget.hasFPU()">;
+def HasESR       : Predicate<"Subtarget.hasESR()">;
+def HasPVR       : Predicate<"Subtarget.hasPVR()">;
+def HasMul64     : Predicate<"Subtarget.hasMul64()">;
+def HasSqrt      : Predicate<"Subtarget.hasSqrt()">;
+def HasMMU       : Predicate<"Subtarget.hasMMU()">;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+def MBlazeMemAsmOperand : AsmOperandClass {
+  let Name = "Mem";
+  let SuperClasses = [];
+}
+
+def MBlazeFslAsmOperand : AsmOperandClass {
+  let Name = "Fsl";
+  let SuperClasses = [];
+}
+
+// Instruction operand types
+def brtarget    : Operand<OtherVT>;
+def calltarget  : Operand<i32>;
+def simm16      : Operand<i32>;
+def uimm5       : Operand<i32>;
+def uimm15      : Operand<i32>;
+def fimm        : Operand<f32>;
+
+// Unsigned Operand
+def uimm16      : Operand<i32> {
+  let PrintMethod = "printUnsignedImm";
+}
+
+// FSL Operand
+def fslimm      : Operand<i32> {
+  let PrintMethod = "printFSLImm";
+  let ParserMatchClass = MBlazeFslAsmOperand;
+}
+
+// Address operand
+def memri : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops GPR, simm16);
+  let ParserMatchClass = MBlazeMemAsmOperand;
+}
+
+def memrr : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops GPR, GPR);
+  let ParserMatchClass = MBlazeMemAsmOperand;
+}
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+def immSExt16  : PatLeaf<(imm), [{
+  return (N->getZExtValue() >> 16) == 0;
+}]>;
+
+// Node immediate fits as 16-bit zero extended on target immediate.
+// The LO16 param means that only the lower 16 bits of the node
+// immediate are caught.
+// e.g. addiu, sltiu
+def immZExt16  : PatLeaf<(imm), [{
+  return (N->getZExtValue() >> 16) == 0;
+}]>;
+
+// FSL immediate field must fit in 4 bits.
+def immZExt4 : PatLeaf<(imm), [{
+  return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ;
+}]>;
+
+// shamt field must fit in 5 bits.
+def immZExt5 : PatLeaf<(imm), [{
+  return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+}]>;
+
+// MBlaze Address Mode. SDNode frameindex could possibily be a match
+// since load and store instructions from stack used it.
+def iaddr : ComplexPattern<i32, 2, "SelectAddrRegImm", [frameindex], []>;
+def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+// As stack alignment is always done with addiu, we need a 16-bit immediate
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : MBlazePseudo<(outs), (ins simm16:$amt),
+                                  "#ADJCALLSTACKDOWN $amt",
+                                  [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP   : MBlazePseudo<(outs),
+                                  (ins uimm16:$amt1, simm16:$amt2),
+                                  "#ADJCALLSTACKUP $amt1",
+                                  [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+class Arith<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+            InstrItinClass itin> :
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+               !strconcat(instr_asm, "   $dst, $b, $c"),
+               [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
+             Operand Od, PatLeaf imm_type> :
+             TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIAlu>;
+
+class ArithI32<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+               TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+                  !strconcat(instr_asm, "   $dst, $b, $c"),
+                  [], IIAlu>;
+
+class ShiftI<bits<6> op, bits<2> flags, string instr_asm, SDNode OpNode,
+             Operand Od, PatLeaf imm_type> :
+             SHT<op, flags, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+                 !strconcat(instr_asm, "   $dst, $b, $c"),
+                 [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIAlu>;
+
+class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+            InstrItinClass itin> :
+            TAR<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+                !strconcat(instr_asm, "   $dst, $c, $b"),
+                [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>;
+
+class ArithRI<bits<6> op, string instr_asm, SDNode OpNode,
+             Operand Od, PatLeaf imm_type> :
+             TBR<op, (outs GPR:$dst), (ins Od:$b, GPR:$c),
+                 !strconcat(instr_asm, "   $dst, $c, $b"),
+                 [(set GPR:$dst, (OpNode imm_type:$b, GPR:$c))], IIAlu>;
+
+class ArithN<bits<6> op, bits<11> flags, string instr_asm,
+            InstrItinClass itin> :
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+               !strconcat(instr_asm, "   $dst, $b, $c"),
+               [], itin>;
+
+class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+             TB<op, (outs GPR:$dst), (ins GPR:$b, Od:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [], IIAlu>;
+
+class ArithRN<bits<6> op, bits<11> flags, string instr_asm,
+            InstrItinClass itin> :
+            TAR<op, flags, (outs GPR:$dst), (ins GPR:$c, GPR:$b),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [], itin>;
+
+class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+             TBR<op, (outs GPR:$dst), (ins Od:$c, GPR:$b),
+                 !strconcat(instr_asm, "   $dst, $b, $c"),
+                 [], IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// Misc Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> :
+            TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+               !strconcat(instr_asm, "   $dst, $b, $c"),
+               [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], IIAlu>;
+
+class LogicI<bits<6> op, string instr_asm, SDNode OpNode> :
+             TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                [(set GPR:$dst, (OpNode GPR:$b, immZExt16:$c))],
+                IIAlu>;
+
+class LogicI32<bits<6> op, string instr_asm> :
+               TB<op, (outs GPR:$dst), (ins GPR:$b, uimm16:$c),
+                  !strconcat(instr_asm, "   $dst, $b, $c"),
+                  [], IIAlu>;
+
+class PatCmp<bits<6> op, bits<11> flags, string instr_asm> :
+             TA<op, flags, (outs GPR:$dst), (ins GPR:$b, GPR:$c),
+                !strconcat(instr_asm, "   $dst, $b, $c"),
+                 [], IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// Memory Access Instructions
+//===----------------------------------------------------------------------===//
+class LoadM<bits<6> op, bits<11> flags, string instr_asm> :
+            TA<op, flags, (outs GPR:$dst), (ins memrr:$addr),
+               !strconcat(instr_asm, "   $dst, $addr"),
+               [], IILoad>;
+
+class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+             TB<op, (outs GPR:$dst), (ins memri:$addr),
+                !strconcat(instr_asm, "   $dst, $addr"),
+                [(set (i32 GPR:$dst), (OpNode iaddr:$addr))], IILoad>;
+
+class StoreM<bits<6> op, bits<11> flags, string instr_asm> :
+             TA<op, flags, (outs), (ins GPR:$dst, memrr:$addr),
+                !strconcat(instr_asm, "   $dst, $addr"),
+                [], IIStore>;
+
+class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+              TB<op, (outs), (ins GPR:$dst, memri:$addr),
+                 !strconcat(instr_asm, "   $dst, $addr"),
+                 [(OpNode (i32 GPR:$dst), iaddr:$addr)], IIStore>;
+
+//===----------------------------------------------------------------------===//
+// Branch Instructions
+//===----------------------------------------------------------------------===//
+class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+             TA<op, flags, (outs), (ins GPR:$target),
+                !strconcat(instr_asm, "   $target"),
+                [], IIBranch> {
+  let rd = 0x0;
+  let ra = br;
+  let Form = FCCR;
+}
+
+class BranchI<bits<6> op, bits<5> br, string instr_asm> :
+              TB<op, (outs), (ins brtarget:$target),
+                 !strconcat(instr_asm, "   $target"),
+                 [], IIBranch> {
+  let rd = 0;
+  let ra = br;
+  let Form = FCCI;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch and Link Instructions
+//===----------------------------------------------------------------------===//
+class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+              TA<op, flags, (outs), (ins GPR:$link, GPR:$target, variable_ops),
+                 !strconcat(instr_asm, "   $link, $target"),
+                 [], IIBranch> {
+  let ra = br;
+  let Form = FRCR;
+}
+
+class BranchLI<bits<6> op, bits<5> br, string instr_asm> :
+               TB<op, (outs), (ins GPR:$link, calltarget:$target, variable_ops),
+                  !strconcat(instr_asm, "   $link, $target"),
+                  [], IIBranch> {
+  let ra = br;
+  let Form = FRCI;
+}
+
+//===----------------------------------------------------------------------===//
+// Conditional Branch Instructions
+//===----------------------------------------------------------------------===//
+class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+              TA<op, flags, (outs),
+                 (ins GPR:$a, GPR:$b),
+                 !strconcat(instr_asm, "   $a, $b"),
+                 [], IIBranch> {
+  let rd = br;
+  let Form = FCRR;
+}
+
+class BranchCI<bits<6> op, bits<5> br, string instr_asm> :
+               TB<op, (outs), (ins GPR:$a, brtarget:$offset),
+                  !strconcat(instr_asm, "   $a, $offset"),
+                  [], IIBranch> {
+  let rd = br;
+  let Form = FCRI;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze arithmetic instructions
+//===----------------------------------------------------------------------===//
+
+let isCommutable = 1, isAsCheapAsAMove = 1 in {
+  def ADDK   :  Arith<0x04, 0x000, "addk   ", add,  IIAlu>;
+  def AND    :  Logic<0x21, 0x000, "and    ", and>;
+  def OR     :  Logic<0x20, 0x000, "or     ", or>;
+  def XOR    :  Logic<0x22, 0x000, "xor    ", xor>;
+  def PCMPBF : PatCmp<0x20, 0x400, "pcmpbf ">;
+  def PCMPEQ : PatCmp<0x22, 0x400, "pcmpeq ">;
+  def PCMPNE : PatCmp<0x23, 0x400, "pcmpne ">;
+
+  let Defs = [CARRY] in {
+    def ADD    :  Arith<0x00, 0x000, "add    ", addc, IIAlu>;
+
+    let Uses = [CARRY] in {
+      def ADDC   :  Arith<0x02, 0x000, "addc   ", adde, IIAlu>;
+    }
+  }
+
+  let Uses = [CARRY] in {
+    def ADDKC  : ArithN<0x06, 0x000, "addkc  ", IIAlu>;
+  }
+}
+
+let isAsCheapAsAMove = 1 in {
+  def ANDN   :  ArithN<0x23, 0x000, "andn   ", IIAlu>;
+  def CMP    :  ArithN<0x05, 0x001, "cmp    ", IIAlu>;
+  def CMPU   :  ArithN<0x05, 0x003, "cmpu   ", IIAlu>;
+  def RSUBK  :  ArithR<0x05, 0x000, "rsubk  ", sub,  IIAlu>;
+
+  let Defs = [CARRY] in {
+    def RSUB   :  ArithR<0x01, 0x000, "rsub   ", subc, IIAlu>;
+
+    let Uses = [CARRY] in {
+      def RSUBC  :  ArithR<0x03, 0x000, "rsubc  ", sube, IIAlu>;
+    }
+  }
+
+  let Uses = [CARRY] in {
+    def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIAlu>;
+  }
+}
+
+let isCommutable = 1, Predicates=[HasMul] in {
+  def MUL    : Arith<0x10, 0x000, "mul    ", mul,   IIAlu>;
+}
+
+let isCommutable = 1, Predicates=[HasMul,HasMul64] in {
+  def MULH   : Arith<0x10, 0x001, "mulh   ", mulhs, IIAlu>;
+  def MULHU  : Arith<0x10, 0x003, "mulhu  ", mulhu, IIAlu>;
+}
+
+let Predicates=[HasMul,HasMul64] in {
+  def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>;
+}
+
+let Predicates=[HasBarrel] in {
+  def BSRL   :   Arith<0x11, 0x000, "bsrl   ", srl, IIAlu>;
+  def BSRA   :   Arith<0x11, 0x200, "bsra   ", sra, IIAlu>;
+  def BSLL   :   Arith<0x11, 0x400, "bsll   ", shl, IIAlu>;
+  def BSRLI  :  ShiftI<0x19, 0x0, "bsrli  ", srl, uimm5, immZExt5>;
+  def BSRAI  :  ShiftI<0x19, 0x1, "bsrai  ", sra, uimm5, immZExt5>;
+  def BSLLI  :  ShiftI<0x19, 0x2, "bslli  ", shl, uimm5, immZExt5>;
+}
+
+let Predicates=[HasDiv] in {
+  def IDIV   :  ArithR<0x12, 0x000, "idiv   ", sdiv, IIAlu>;
+  def IDIVU  :  ArithR<0x12, 0x002, "idivu  ", udiv, IIAlu>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze immediate mode arithmetic instructions
+//===----------------------------------------------------------------------===//
+
+let isAsCheapAsAMove = 1 in {
+  def ADDIK   :   ArithI<0x0C, "addik  ", add,  simm16, immSExt16>;
+  def RSUBIK  :  ArithRI<0x0D, "rsubik ", sub, simm16, immSExt16>;
+  def ANDNI   :  ArithNI<0x2B, "andni  ", uimm16, immZExt16>;
+  def ANDI    :   LogicI<0x29, "andi   ", and>;
+  def ORI     :   LogicI<0x28, "ori    ", or>;
+  def XORI    :   LogicI<0x2A, "xori   ", xor>;
+
+  let Defs = [CARRY] in {
+    def ADDI    :   ArithI<0x08, "addi   ", addc, simm16, immSExt16>;
+    def RSUBI   :  ArithRI<0x09, "rsubi  ", subc,  simm16, immSExt16>;
+
+    let Uses = [CARRY] in {
+      def ADDIC   :   ArithI<0x0A, "addic  ", adde, simm16, immSExt16>;
+      def RSUBIC  :  ArithRI<0x0B, "rsubic ", sube, simm16, immSExt16>;
+    }
+  }
+
+  let Uses = [CARRY] in {
+    def ADDIKC  :  ArithNI<0x0E, "addikc ", simm16, immSExt16>;
+    def RSUBIKC : ArithRNI<0x0F, "rsubikc", simm16, immSExt16>;
+  }
+}
+
+let Predicates=[HasMul] in {
+  def MULI    :   ArithI<0x18, "muli   ", mul, simm16, immSExt16>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze memory access instructions
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+  def LBU  :  LoadM<0x30, 0x000, "lbu    ">;
+  def LBUR :  LoadM<0x30, 0x200, "lbur   ">;
+
+  def LHU  :  LoadM<0x31, 0x000, "lhu    ">;
+  def LHUR :  LoadM<0x31, 0x200, "lhur   ">;
+
+  def LW   :  LoadM<0x32, 0x000, "lw     ">;
+  def LWR  :  LoadM<0x32, 0x200, "lwr    ">;
+
+  let Defs = [CARRY] in {
+    def LWX  :  LoadM<0x32, 0x400, "lwx    ">;
+  }
+
+  def LBUI : LoadMI<0x38, "lbui   ", zextloadi8>;
+  def LHUI : LoadMI<0x39, "lhui   ", zextloadi16>;
+  def LWI  : LoadMI<0x3A, "lwi    ", load>;
+}
+
+def SB  :  StoreM<0x34, 0x000, "sb     ">;
+def SBR :  StoreM<0x34, 0x200, "sbr    ">;
+
+def SH  :  StoreM<0x35, 0x000, "sh     ">;
+def SHR :  StoreM<0x35, 0x200, "shr    ">;
+
+def SW  :  StoreM<0x36, 0x000, "sw     ">;
+def SWR :  StoreM<0x36, 0x200, "swr    ">;
+
+let Defs = [CARRY] in {
+  def SWX :  StoreM<0x36, 0x400, "swx    ">;
+}
+
+def SBI : StoreMI<0x3C, "sbi    ", truncstorei8>;
+def SHI : StoreMI<0x3D, "shi    ", truncstorei16>;
+def SWI : StoreMI<0x3E, "swi    ", store>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze branch instructions
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+  def BRI    :  BranchI<0x2E, 0x00, "bri    ">;
+  def BRAI   :  BranchI<0x2E, 0x08, "brai   ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+  def BEQI   : BranchCI<0x2F, 0x00, "beqi   ">;
+  def BNEI   : BranchCI<0x2F, 0x01, "bnei   ">;
+  def BLTI   : BranchCI<0x2F, 0x02, "blti   ">;
+  def BLEI   : BranchCI<0x2F, 0x03, "blei   ">;
+  def BGTI   : BranchCI<0x2F, 0x04, "bgti   ">;
+  def BGEI   : BranchCI<0x2F, 0x05, "bgei   ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1,
+    isBarrier = 1 in {
+  def BR     :   Branch<0x26, 0x00, 0x000, "br     ">;
+  def BRA    :   Branch<0x26, 0x08, 0x000, "bra    ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+  def BEQ    :  BranchC<0x27, 0x00, 0x000, "beq    ">;
+  def BNE    :  BranchC<0x27, 0x01, 0x000, "bne    ">;
+  def BLT    :  BranchC<0x27, 0x02, 0x000, "blt    ">;
+  def BLE    :  BranchC<0x27, 0x03, 0x000, "ble    ">;
+  def BGT    :  BranchC<0x27, 0x04, 0x000, "bgt    ">;
+  def BGE    :  BranchC<0x27, 0x05, 0x000, "bge    ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1,
+    isBarrier = 1 in {
+  def BRID   :  BranchI<0x2E, 0x10, "brid   ">;
+  def BRAID  :  BranchI<0x2E, 0x18, "braid  ">;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1 in {
+  def BEQID  : BranchCI<0x2F, 0x10, "beqid  ">;
+  def BNEID  : BranchCI<0x2F, 0x11, "bneid  ">;
+  def BLTID  : BranchCI<0x2F, 0x12, "bltid  ">;
+  def BLEID  : BranchCI<0x2F, 0x13, "bleid  ">;
+  def BGTID  : BranchCI<0x2F, 0x14, "bgtid  ">;
+  def BGEID  : BranchCI<0x2F, 0x15, "bgeid  ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
+    hasDelaySlot = 1, hasCtrlDep = 1, isBarrier = 1 in {
+  def BRD    :   Branch<0x26, 0x10, 0x000, "brd    ">;
+  def BRAD   :   Branch<0x26, 0x18, 0x000, "brad   ">;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
+    hasDelaySlot = 1, hasCtrlDep = 1 in {
+  def BEQD   :  BranchC<0x27, 0x10, 0x000, "beqd   ">;
+  def BNED   :  BranchC<0x27, 0x11, 0x000, "bned   ">;
+  def BLTD   :  BranchC<0x27, 0x12, 0x000, "bltd   ">;
+  def BLED   :  BranchC<0x27, 0x13, 0x000, "bled   ">;
+  def BGTD   :  BranchC<0x27, 0x14, 0x000, "bgtd   ">;
+  def BGED   :  BranchC<0x27, 0x15, 0x000, "bged   ">;
+}
+
+let isCall =1, hasDelaySlot = 1,
+    Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,CARRY],
+    Uses = [R1] in {
+  def BRLID  : BranchLI<0x2E, 0x14, "brlid  ">;
+  def BRALID : BranchLI<0x2E, 0x1C, "bralid ">;
+}
+
+let isCall = 1, hasDelaySlot = 1,
+    Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,CARRY],
+    Uses = [R1] in {
+  def BRLD   : BranchL<0x26, 0x14, 0x000, "brld   ">;
+  def BRALD  : BranchL<0x26, 0x1C, 0x000, "brald  ">;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+    rd=0x10, Form=FCRI in {
+  def RTSD   : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+                  "rtsd      $target, $imm",
+                  [],
+                  IIBranch>;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+    rd=0x11, Form=FCRI in {
+  def RTID   : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+                  "rtid      $target, $imm",
+                  [],
+                  IIBranch>;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+    rd=0x12, Form=FCRI in {
+  def RTBD   : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+                  "rtbd      $target, $imm",
+                  [],
+                  IIBranch>;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1,
+    rd=0x14, Form=FCRI in {
+  def RTED   : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm),
+                  "rted      $target, $imm",
+                  [],
+                  IIBranch>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze misc instructions
+//===----------------------------------------------------------------------===//
+
+let neverHasSideEffects = 1 in {
+  def NOP :  MBlazeInst< 0x20, FC, (outs), (ins), "nop    ", [], IIAlu>;
+}
+
+let usesCustomInserter = 1 in {
+  def Select_CC : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$T, GPR:$F, GPR:$CMP, i32imm:$CC), // F T reversed
+    "; SELECT_CC PSEUDO!",
+    []>;
+
+  def ShiftL : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$L, GPR:$R),
+    "; ShiftL PSEUDO!",
+    []>;
+
+  def ShiftRA : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$L, GPR:$R),
+    "; ShiftRA PSEUDO!",
+    []>;
+
+  def ShiftRL : MBlazePseudo<(outs GPR:$dst),
+    (ins GPR:$L, GPR:$R),
+    "; ShiftRL PSEUDO!",
+    []>;
+}
+
+let rb = 0 in {
+  def SEXT16 : TA<0x24, 0x061, (outs GPR:$dst), (ins GPR:$src),
+                  "sext16    $dst, $src", [], IIAlu>;
+  def SEXT8  : TA<0x24, 0x060, (outs GPR:$dst), (ins GPR:$src),
+                  "sext8     $dst, $src", [], IIAlu>;
+  let Defs = [CARRY] in {
+    def SRL    : TA<0x24, 0x041, (outs GPR:$dst), (ins GPR:$src),
+                    "srl       $dst, $src", [], IIAlu>;
+    def SRA    : TA<0x24, 0x001, (outs GPR:$dst), (ins GPR:$src),
+                    "sra       $dst, $src", [], IIAlu>;
+    let Uses = [CARRY] in {
+      def SRC    : TA<0x24, 0x021, (outs GPR:$dst), (ins GPR:$src),
+                      "src       $dst, $src", [], IIAlu>;
+    }
+  }
+}
+
+let isCodeGenOnly=1 in {
+  def ADDIK32 : ArithI32<0x08, "addik  ", simm16, immSExt16>;
+  def ORI32   : LogicI32<0x28, "ori    ">;
+  def BRLID32 : BranchLI<0x2E, 0x14, "brlid  ">;
+}
+
+//===----------------------------------------------------------------------===//
+// Misc. instructions
+//===----------------------------------------------------------------------===//
+let Form=FRCS in {
+  def MFS : SPC<0x25, 0x2, (outs GPR:$dst), (ins SPR:$src),
+                "mfs       $dst, $src", [], IIAlu>;
+}
+
+let Form=FCRCS in {
+  def MTS : SPC<0x25, 0x3, (outs SPR:$dst), (ins GPR:$src),
+                "mts       $dst, $src", [], IIAlu>;
+}
+
+def MSRSET : MSR<0x25, 0x20, (outs GPR:$dst), (ins uimm15:$set),
+                 "msrset    $dst, $set", [], IIAlu>;
+
+def MSRCLR : MSR<0x25, 0x22, (outs GPR:$dst), (ins uimm15:$clr),
+                 "msrclr    $dst, $clr", [], IIAlu>;
+
+let rd=0x0, Form=FCRR in {
+  def WDC  : TA<0x24, 0x64, (outs), (ins GPR:$a, GPR:$b),
+                "wdc       $a, $b", [], IIAlu>;
+  def WDCF : TA<0x24, 0x74, (outs), (ins GPR:$a, GPR:$b),
+                "wdc.flush $a, $b", [], IIAlu>;
+  def WDCC : TA<0x24, 0x66, (outs), (ins GPR:$a, GPR:$b),
+                "wdc.clear $a, $b", [], IIAlu>;
+  def WIC  : TA<0x24, 0x68, (outs), (ins GPR:$a, GPR:$b),
+                "wic       $a, $b", [], IIAlu>;
+}
+
+def BRK  :  BranchL<0x26, 0x0C, 0x000, "brk    ">;
+def BRKI : BranchLI<0x2E, 0x0C, "brki   ">;
+
+def IMM : MBlazeInst<0x2C, FCCI, (outs), (ins simm16:$imm),
+                     "imm       $imm", [], IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions for atomic operations
+//===----------------------------------------------------------------------===//
+let usesCustomInserter=1 in {
+  def CAS32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$cmp, GPR:$swp),
+    "# atomic compare and swap",
+    [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$cmp, GPR:$swp))]>;
+
+  def SWP32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$swp),
+    "# atomic swap",
+    [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$swp))]>;
+
+  def LAA32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and add",
+    [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$val))]>;
+
+  def LAS32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and sub",
+    [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$val))]>;
+
+  def LAD32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and and",
+    [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$val))]>;
+
+  def LAO32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and or",
+    [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$val))]>;
+
+  def LAX32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and xor",
+    [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$val))]>;
+
+  def LAN32 : MBlazePseudo<(outs GPR:$dst), (ins GPR:$ptr, GPR:$val),
+    "# atomic load and nand",
+    [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$val))]>;
+
+  def MEMBARRIER : MBlazePseudo<(outs), (ins),
+    "# memory barrier",
+    [(membarrier (i32 imm), (i32 imm), (i32 imm), (i32 imm), (i32 imm))]>;
+}
+
+//===----------------------------------------------------------------------===//
+//  Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i32 0), (ADDK (i32 R0), (i32 R0))>;
+def : Pat<(i32 immSExt16:$imm), (ADDIK (i32 R0), imm:$imm)>;
+def : Pat<(i32 immZExt16:$imm), (ORI (i32 R0), imm:$imm)>;
+
+// Arbitrary immediates
+def : Pat<(i32 imm:$imm), (ADDIK (i32 R0), imm:$imm)>;
+
+// In register sign extension
+def : Pat<(sext_inreg GPR:$src, i16), (SEXT16 GPR:$src)>;
+def : Pat<(sext_inreg GPR:$src, i8),  (SEXT8 GPR:$src)>;
+
+// Call
+def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)),
+          (BRLID (i32 R15), tglobaladdr:$dst)>;
+
+def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)),
+          (BRLID (i32 R15), texternalsym:$dst)>;
+
+def : Pat<(MBlazeJmpLink GPR:$dst),
+          (BRALD (i32 R15), GPR:$dst)>;
+
+// Shift Instructions
+def : Pat<(shl GPR:$L, GPR:$R), (ShiftL GPR:$L, GPR:$R)>;
+def : Pat<(sra GPR:$L, GPR:$R), (ShiftRA GPR:$L, GPR:$R)>;
+def : Pat<(srl GPR:$L, GPR:$R), (ShiftRL GPR:$L, GPR:$R)>;
+
+// SET_CC operations
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 1)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETNE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 2)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETGE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETLE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMP GPR:$R, GPR:$L), 6)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, GPR:$L), 3)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULT),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, GPR:$L), 4)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, GPR:$L), 5)>;
+def : Pat<(setcc (i32 GPR:$L), (i32 GPR:$R), SETULE),
+          (Select_CC (ADDIK (i32 R0), 1), (ADDIK (i32 R0), 0),
+                     (CMPU GPR:$R, GPR:$L), 6)>;
+
+// SELECT operations
+def : Pat<(select (i32 GPR:$C), (i32 GPR:$T), (i32 GPR:$F)),
+          (Select_CC GPR:$T, GPR:$F, GPR:$C, 2)>;
+
+// SELECT_CC
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETEQ),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 1)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETNE),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 2)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETGT),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETLT),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETGE),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETLE),
+          (Select_CC GPR:$T, GPR:$F, (CMP GPR:$R, GPR:$L), 6)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETUGT),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 3)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETULT),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 4)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETUGE),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 5)>;
+def : Pat<(selectcc (i32 GPR:$L), (i32 GPR:$R),
+                    (i32 GPR:$T), (i32 GPR:$F), SETULE),
+          (Select_CC GPR:$T, GPR:$F, (CMPU GPR:$R, GPR:$L), 6)>;
+
+// Ret instructions
+def : Pat<(MBlazeRet GPR:$target), (RTSD GPR:$target, 0x8)>;
+def : Pat<(MBlazeIRet GPR:$target), (RTID GPR:$target, 0x0)>;
+
+// BR instructions
+def : Pat<(br bb:$T), (BRID bb:$T)>;
+def : Pat<(brind GPR:$T), (BRAD GPR:$T)>;
+
+// BRCOND instructions
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETEQ), bb:$T),
+          (BEQID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETNE), bb:$T),
+          (BNEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGT), bb:$T),
+          (BGTID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLT), bb:$T),
+          (BLTID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETGE), bb:$T),
+          (BGEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETLE), bb:$T),
+          (BLEID (CMP GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGT), bb:$T),
+          (BGTID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULT), bb:$T),
+          (BLTID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETUGE), bb:$T),
+          (BGEID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (setcc (i32 GPR:$L), (i32 GPR:$R), SETULE), bb:$T),
+          (BLEID (CMPU GPR:$R, GPR:$L), bb:$T)>;
+def : Pat<(brcond (i32 GPR:$C), bb:$T),
+          (BNEID GPR:$C, bb:$T)>;
+
+// Jump tables, global addresses, and constant pools
+def : Pat<(MBWrapper tglobaladdr:$in), (ORI (i32 R0), tglobaladdr:$in)>;
+def : Pat<(MBWrapper tjumptable:$in),  (ORI (i32 R0), tjumptable:$in)>;
+def : Pat<(MBWrapper tconstpool:$in),  (ORI (i32 R0), tconstpool:$in)>;
+
+// Misc instructions
+def : Pat<(and (i32 GPR:$lh), (not (i32 GPR:$rh))),(ANDN GPR:$lh, GPR:$rh)>;
+
+// Arithmetic with immediates
+def : Pat<(add (i32 GPR:$in), imm:$imm),(ADDIK GPR:$in, imm:$imm)>;
+def : Pat<(or (i32 GPR:$in), imm:$imm),(ORI GPR:$in, imm:$imm)>;
+def : Pat<(xor (i32 GPR:$in), imm:$imm),(XORI GPR:$in, imm:$imm)>;
+
+// Convert any extend loads into zero extend loads
+def : Pat<(extloadi8  iaddr:$src), (i32 (LBUI iaddr:$src))>;
+def : Pat<(extloadi16 iaddr:$src), (i32 (LHUI iaddr:$src))>;
+def : Pat<(extloadi8  xaddr:$src), (i32 (LBU xaddr:$src))>;
+def : Pat<(extloadi16 xaddr:$src), (i32 (LHU xaddr:$src))>;
+
+// 32-bit load and store
+def : Pat<(store (i32 GPR:$dst), xaddr:$addr), (SW GPR:$dst, xaddr:$addr)>;
+def : Pat<(load xaddr:$addr), (i32 (LW xaddr:$addr))>;
+
+// 16-bit load and store
+def : Pat<(truncstorei16 (i32 GPR:$dst), xaddr:$addr), (SH GPR:$dst, xaddr:$addr)>;
+def : Pat<(zextloadi16 xaddr:$addr), (i32 (LHU xaddr:$addr))>;
+
+// 8-bit load and store
+def : Pat<(truncstorei8 (i32 GPR:$dst), xaddr:$addr), (SB GPR:$dst, xaddr:$addr)>;
+def : Pat<(zextloadi8 xaddr:$addr), (i32 (LBU xaddr:$addr))>;
+
+// Peepholes
+def : Pat<(store (i32 0), iaddr:$dst), (SWI (i32 R0), iaddr:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//===----------------------------------------------------------------------===//
+include "MBlazeInstrFSL.td"
+include "MBlazeInstrFPU.td"
diff --git a/final/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/final/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
new file mode 100644
index 00000000000..7e4a2f5c945
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -0,0 +1,113 @@
+//===- MBlazeIntrinsicInfo.cpp - Intrinsic Information -00-------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeIntrinsicInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+
+using namespace llvm;
+
+namespace mblazeIntrinsic {
+
+  enum ID {
+    last_non_mblaze_intrinsic = Intrinsic::num_intrinsics-1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+    , num_mblaze_intrinsics
+  };
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+}
+
+std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
+                                         unsigned numTys) const {
+  static const char *const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+  };
+
+  assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
+  if (IntrID < Intrinsic::num_intrinsics)
+    return 0;
+  assert(IntrID < mblazeIntrinsic::num_mblaze_intrinsics &&
+         "Invalid intrinsic ID");
+
+  std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+  return Result;
+}
+
+unsigned MBlazeIntrinsicInfo::
+lookupName(const char *Name, unsigned Len) const {
+  if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+      || Name[2] != 'v' || Name[3] != 'm')
+    return 0;  // All intrinsics start with 'llvm.'
+
+#define GET_FUNCTION_RECOGNIZER
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+  return 0;
+}
+
+unsigned MBlazeIntrinsicInfo::
+lookupGCCName(const char *Name) const {
+    return mblazeIntrinsic::getIntrinsicForGCCBuiltin("mblaze",Name);
+}
+
+bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
+  // Overload Table
+  const bool OTable[] = {
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+  };
+  if (IntrID == 0)
+    return false;
+  else
+    return OTable[IntrID - Intrinsic::num_intrinsics];
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+static const FunctionType *getType(LLVMContext &Context, unsigned id) {
+  const Type *ResultTy = NULL;
+  std::vector<const Type*> ArgTys;
+  bool IsVarArg = false;
+
+#define GET_INTRINSIC_GENERATOR
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+
+  return FunctionType::get(ResultTy, ArgTys, IsVarArg);
+}
+
+Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+                                                const Type **Tys,
+                                                unsigned numTy) const {
+  assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
+  AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID);
+  return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+                                               getType(M->getContext(), IntrID),
+                                               AList));
+}
diff --git a/final/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/final/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
new file mode 100644
index 00000000000..9804c7723be
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
@@ -0,0 +1,33 @@
+//===- MBlazeIntrinsicInfo.h - MBlaze Intrinsic Information -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MBLAZEINTRINSICS_H
+#define MBLAZEINTRINSICS_H
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+
+  class MBlazeIntrinsicInfo : public TargetIntrinsicInfo {
+  public:
+    std::string getName(unsigned IntrID, const Type **Tys = 0,
+                        unsigned numTys = 0) const;
+    unsigned lookupName(const char *Name, unsigned Len) const;
+    unsigned lookupGCCName(const char *Name) const;
+    bool isOverloaded(unsigned IID) const;
+    Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0,
+                             unsigned numTys = 0) const;
+  };
+
+}
+
+#endif
diff --git a/final/lib/Target/MBlaze/MBlazeIntrinsics.td b/final/lib/Target/MBlaze/MBlazeIntrinsics.td
new file mode 100644
index 00000000000..278afbefc16
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -0,0 +1,131 @@
+//===- IntrinsicsMBlaze.td - Defines MBlaze intrinsics -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the MicroBlaze-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Definitions for all MBlaze intrinsics.
+//
+
+// MBlaze intrinsic classes.
+let TargetPrefix = "mblaze", isTarget = 1 in {
+  class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], []>;
+
+  class MBFSL_Put_Intrinsic : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], []>;
+
+  class MBFSL_PutT_Intrinsic : Intrinsic<[], [llvm_i32_ty], []>;
+}
+
+//===----------------------------------------------------------------------===//
+// MicroBlaze FSL Get Intrinsic Definitions.
+//
+
+def int_mblaze_fsl_get      : GCCBuiltin<"__builtin_mblaze_fsl_get">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_aget     : GCCBuiltin<"__builtin_mblaze_fsl_aget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_cget     : GCCBuiltin<"__builtin_mblaze_fsl_cget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_caget    : GCCBuiltin<"__builtin_mblaze_fsl_caget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_eget     : GCCBuiltin<"__builtin_mblaze_fsl_eget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_eaget    : GCCBuiltin<"__builtin_mblaze_fsl_eaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ecget    : GCCBuiltin<"__builtin_mblaze_fsl_ecget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ecaget   : GCCBuiltin<"__builtin_mblaze_fsl_ecaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_nget     : GCCBuiltin<"__builtin_mblaze_fsl_nget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_naget    : GCCBuiltin<"__builtin_mblaze_fsl_naget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ncget    : GCCBuiltin<"__builtin_mblaze_fsl_ncget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ncaget   : GCCBuiltin<"__builtin_mblaze_fsl_ncaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_neget    : GCCBuiltin<"__builtin_mblaze_fsl_neget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_neaget   : GCCBuiltin<"__builtin_mblaze_fsl_neaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_necget   : GCCBuiltin<"__builtin_mblaze_fsl_necget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_necaget  : GCCBuiltin<"__builtin_mblaze_fsl_necaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tget     : GCCBuiltin<"__builtin_mblaze_fsl_tget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_taget    : GCCBuiltin<"__builtin_mblaze_fsl_taget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tcget    : GCCBuiltin<"__builtin_mblaze_fsl_tcget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tcaget   : GCCBuiltin<"__builtin_mblaze_fsl_tcaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_teget    : GCCBuiltin<"__builtin_mblaze_fsl_teget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_teaget   : GCCBuiltin<"__builtin_mblaze_fsl_teaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tecget   : GCCBuiltin<"__builtin_mblaze_fsl_tecget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tecaget  : GCCBuiltin<"__builtin_mblaze_fsl_tecaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnget    : GCCBuiltin<"__builtin_mblaze_fsl_tnget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnaget   : GCCBuiltin<"__builtin_mblaze_fsl_tnaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tncget   : GCCBuiltin<"__builtin_mblaze_fsl_tncget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tncaget  : GCCBuiltin<"__builtin_mblaze_fsl_tncaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tneget   : GCCBuiltin<"__builtin_mblaze_fsl_tneget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tneaget  : GCCBuiltin<"__builtin_mblaze_fsl_tneaget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnecget  : GCCBuiltin<"__builtin_mblaze_fsl_tnecget">,
+                              MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnecaget : GCCBuiltin<"__builtin_mblaze_fsl_tnecaget">,
+                              MBFSL_Get_Intrinsic;
+
+//===----------------------------------------------------------------------===//
+// MicroBlaze FSL Put Intrinsic Definitions.
+//
+
+def int_mblaze_fsl_put     : GCCBuiltin<"__builtin_mblaze_fsl_put">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_aput    : GCCBuiltin<"__builtin_mblaze_fsl_aput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_cput    : GCCBuiltin<"__builtin_mblaze_fsl_cput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_caput   : GCCBuiltin<"__builtin_mblaze_fsl_caput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_nput    : GCCBuiltin<"__builtin_mblaze_fsl_nput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_naput   : GCCBuiltin<"__builtin_mblaze_fsl_naput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_ncput   : GCCBuiltin<"__builtin_mblaze_fsl_ncput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_ncaput  : GCCBuiltin<"__builtin_mblaze_fsl_ncaput">,
+                             MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_tput    : GCCBuiltin<"__builtin_mblaze_fsl_tput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_taput   : GCCBuiltin<"__builtin_mblaze_fsl_taput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tcput   : GCCBuiltin<"__builtin_mblaze_fsl_tcput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tcaput  : GCCBuiltin<"__builtin_mblaze_fsl_tcaput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tnput   : GCCBuiltin<"__builtin_mblaze_fsl_tnput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tnaput  : GCCBuiltin<"__builtin_mblaze_fsl_tnaput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tncput  : GCCBuiltin<"__builtin_mblaze_fsl_tncput">,
+                             MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tncaput : GCCBuiltin<"__builtin_mblaze_fsl_tncaput">,
+                             MBFSL_PutT_Intrinsic;
diff --git a/final/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp b/final/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
new file mode 100644
index 00000000000..1467141d34a
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
@@ -0,0 +1,22 @@
+//===-- MBlazeMCAsmInfo.cpp - MBlaze asm properties -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MBlazeMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCAsmInfo.h"
+using namespace llvm;
+
+MBlazeMCAsmInfo::MBlazeMCAsmInfo() {
+  SupportsDebugInformation    = true;
+  AlignmentIsInBytes          = false;
+  PrivateGlobalPrefix         = "$";
+  GPRel32Directive            = "\t.gpword\t";
+}
diff --git a/final/lib/Target/MBlaze/MBlazeMCAsmInfo.h b/final/lib/Target/MBlaze/MBlazeMCAsmInfo.h
new file mode 100644
index 00000000000..e68dd58b016
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeMCAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- MBlazeMCAsmInfo.h - MBlaze asm properties -----------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MBlazeMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZETARGETASMINFO_H
+#define MBLAZETARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+
+  class MBlazeMCAsmInfo : public MCAsmInfo {
+  public:
+    explicit MBlazeMCAsmInfo();
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/final/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp b/final/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
new file mode 100644
index 00000000000..3ece1a8a340
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
@@ -0,0 +1,223 @@
+//===-- MBlazeMCCodeEmitter.cpp - Convert MBlaze code to machine code -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlazeMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "MBlaze.h"
+#include "MBlazeInstrInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class MBlazeMCCodeEmitter : public MCCodeEmitter {
+  MBlazeMCCodeEmitter(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT
+  const TargetMachine &TM;
+  const TargetInstrInfo &TII;
+  MCContext &Ctx;
+
+public:
+  MBlazeMCCodeEmitter(TargetMachine &tm, MCContext &ctx)
+    : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) {
+  }
+
+  ~MBlazeMCCodeEmitter() {}
+
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  unsigned getBinaryCodeForInstr(const MCInst &MI) const;
+
+  /// getMachineOpValue - Return binary encoding of operand. If the machine
+  /// operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO) const;
+  unsigned getMachineOpValue(const MCInst &MI, unsigned OpIdx) const {
+    return getMachineOpValue(MI, MI.getOperand(OpIdx));
+  }
+
+  static unsigned GetMBlazeRegNum(const MCOperand &MO) {
+    // FIXME: getMBlazeRegisterNumbering() is sufficient?
+    assert(0 && "MBlazeMCCodeEmitter::GetMBlazeRegNum() not yet implemented.");
+    return 0;
+  }
+
+  void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+    // The MicroBlaze uses a bit reversed format so we need to reverse the
+    // order of the bits. Taken from:
+    // http://graphics.stanford.edu/~seander/bithacks.html
+    C = ((C * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
+
+    OS << (char)C;
+    ++CurByte;
+  }
+
+  void EmitRawByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+    OS << (char)C;
+    ++CurByte;
+  }
+
+  void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+                    raw_ostream &OS) const {
+    assert(Size <= 8 && "size too big in emit constant");
+
+    for (unsigned i = 0; i != Size; ++i) {
+      EmitByte(Val & 255, CurByte, OS);
+      Val >>= 8;
+    }
+  }
+
+  void EmitIMM(const MCOperand &imm, unsigned &CurByte, raw_ostream &OS) const;
+  void EmitIMM(const MCInst &MI, unsigned &CurByte, raw_ostream &OS) const;
+
+  void EmitImmediate(const MCInst &MI, unsigned opNo, bool pcrel,
+                     unsigned &CurByte, raw_ostream &OS,
+                     SmallVectorImpl<MCFixup> &Fixups) const;
+
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+};
+
+} // end anonymous namespace
+
+
+MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const Target &,
+                                               TargetMachine &TM,
+                                               MCContext &Ctx) {
+  return new MBlazeMCCodeEmitter(TM, Ctx);
+}
+
+/// getMachineOpValue - Return binary encoding of operand. If the machine
+/// operand requires relocation, record the relocation and return zero.
+unsigned MBlazeMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+                                             const MCOperand &MO) const {
+  if (MO.isReg())
+    return MBlazeRegisterInfo::getRegisterNumbering(MO.getReg());
+  else if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+  else if (MO.isExpr())
+      return 0; // The relocation has already been recorded at this point.
+  else {
+#ifndef NDEBUG
+    errs() << MO;
+#endif
+    llvm_unreachable(0);
+  }
+  return 0;
+}
+
+void MBlazeMCCodeEmitter::
+EmitIMM(const MCOperand &imm, unsigned &CurByte, raw_ostream &OS) const {
+  int32_t val = (int32_t)imm.getImm();
+  if (val > 32767 || val < -32768) {
+    EmitByte(0x0D, CurByte, OS);
+    EmitByte(0x00, CurByte, OS);
+    EmitRawByte((val >> 24) & 0xFF, CurByte, OS);
+    EmitRawByte((val >> 16) & 0xFF, CurByte, OS);
+  }
+}
+
+void MBlazeMCCodeEmitter::
+EmitIMM(const MCInst &MI, unsigned &CurByte,raw_ostream &OS) const {
+  switch (MI.getOpcode()) {
+  default: break;
+
+  case MBlaze::ADDIK32:
+  case MBlaze::ORI32:
+  case MBlaze::BRLID32:
+    EmitByte(0x0D, CurByte, OS);
+    EmitByte(0x00, CurByte, OS);
+    EmitRawByte(0, CurByte, OS);
+    EmitRawByte(0, CurByte, OS);
+  }
+}
+
+void MBlazeMCCodeEmitter::
+EmitImmediate(const MCInst &MI, unsigned opNo, bool pcrel, unsigned &CurByte,
+              raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups) const {
+  assert(MI.getNumOperands()>opNo && "Not enought operands for instruction");
+
+  MCOperand oper = MI.getOperand(opNo);
+
+  if (oper.isImm()) {
+    EmitIMM(oper, CurByte, OS);
+  } else if (oper.isExpr()) {
+    MCFixupKind FixupKind;
+    switch (MI.getOpcode()) {
+    default:
+      FixupKind = pcrel ? FK_PCRel_2 : FK_Data_2;
+      Fixups.push_back(MCFixup::Create(0,oper.getExpr(),FixupKind));
+      break;
+    case MBlaze::ORI32:
+    case MBlaze::ADDIK32:
+    case MBlaze::BRLID32:
+      FixupKind = pcrel ? FK_PCRel_4 : FK_Data_4;
+      Fixups.push_back(MCFixup::Create(0,oper.getExpr(),FixupKind));
+      break;
+    }
+  }
+}
+
+
+
+void MBlazeMCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = TII.get(Opcode);
+  uint64_t TSFlags = Desc.TSFlags;
+  // Keep track of the current byte being emitted.
+  unsigned CurByte = 0;
+
+  // Emit an IMM instruction if the instruction we are encoding requires it
+  EmitIMM(MI,CurByte,OS);
+
+  switch ((TSFlags & MBlazeII::FormMask)) {
+  default: break;
+  case MBlazeII::FPseudo:
+    // Pseudo instructions don't get encoded.
+    return;
+  case MBlazeII::FRRI:
+    EmitImmediate(MI, 2, false, CurByte, OS, Fixups);
+    break;
+  case MBlazeII::FRIR:
+    EmitImmediate(MI, 1, false, CurByte, OS, Fixups);
+    break;
+  case MBlazeII::FCRI:
+    EmitImmediate(MI, 1, true, CurByte, OS, Fixups);
+    break;
+  case MBlazeII::FRCI:
+    EmitImmediate(MI, 1, true, CurByte, OS, Fixups);
+  case MBlazeII::FCCI:
+    EmitImmediate(MI, 0, true, CurByte, OS, Fixups);
+    break;
+  }
+
+  ++MCNumEmitted;  // Keep track of the # of mi's emitted
+  unsigned Value = getBinaryCodeForInstr(MI);
+  EmitConstant(Value, 4, CurByte, OS);
+}
+
+// FIXME: These #defines shouldn't be necessary. Instead, tblgen should
+// be able to generate code emitter helpers for either variant, like it
+// does for the AsmWriter.
+#define MBlazeCodeEmitter MBlazeMCCodeEmitter
+#define MachineInstr MCInst
+#include "MBlazeGenCodeEmitter.inc"
+#undef MBlazeCodeEmitter
+#undef MachineInstr
diff --git a/final/lib/Target/MBlaze/MBlazeMCInstLower.cpp b/final/lib/Target/MBlaze/MBlazeMCInstLower.cpp
new file mode 100644
index 00000000000..a7e400b1d1a
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeMCInstLower.cpp
@@ -0,0 +1,166 @@
+//===-- MBLazeMCInstLower.cpp - Convert MBlaze MachineInstr to an MCInst---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower MBlaze MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCInstLower.h"
+#include "MBlazeInstrInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+MCSymbol *MBlazeMCInstLower::
+GetGlobalAddressSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0:  break;
+  }
+
+  return Printer.Mang->getSymbol(MO.getGlobal());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetExternalSymbolSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0:  break;
+  }
+
+  return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetJumpTableSymbol(const MachineOperand &MO) const {
+  SmallString<256> Name;
+  raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
+                            << Printer.getFunctionNumber() << '_'
+                            << MO.getIndex();
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0:  break;
+  }
+
+  // Create a symbol for the name.
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
+  SmallString<256> Name;
+  raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
+                            << Printer.getFunctionNumber() << '_'
+                            << MO.getIndex();
+
+  switch (MO.getTargetFlags()) {
+  default:
+      llvm_unreachable("Unknown target flag on GV operand");
+
+  case 0: break;
+  }
+
+  // Create a symbol for the name.
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MBlazeMCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default:
+      assert(0 && "Unknown target flag on GV operand");
+
+  case 0: break;
+  }
+
+  return Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+}
+
+MCOperand MBlazeMCInstLower::
+LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
+  // FIXME: We would like an efficient form for this, so we don't have to do a
+  // lot of extra uniquing.
+  const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+
+  switch (MO.getTargetFlags()) {
+  default:
+      llvm_unreachable("Unknown target flag on GV operand");
+
+  case 0: break;
+  }
+
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(), Ctx),
+                                   Ctx);
+  return MCOperand::CreateExpr(Expr);
+}
+
+void MBlazeMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default: llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all implicit register operands.
+      if (MO.isImplicit()) continue;
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                         MO.getMBB()->getSymbol(), Ctx));
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+      break;
+    case MachineOperand::MO_ExternalSymbol:
+      MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+      break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
+      break;
+    case MachineOperand::MO_FPImmediate:
+      bool ignored;
+      APFloat FVal = MO.getFPImm()->getValueAPF();
+      FVal.convert(APFloat::IEEEsingle, APFloat::rmTowardZero, &ignored);
+
+      APInt IVal = FVal.bitcastToAPInt();
+      uint64_t Val = *IVal.getRawData();
+      MCOp = MCOperand::CreateImm(Val);
+      break;
+    }
+
+    OutMI.addOperand(MCOp);
+  }
+}
diff --git a/final/lib/Target/MBlaze/MBlazeMCInstLower.h b/final/lib/Target/MBlaze/MBlazeMCInstLower.h
new file mode 100644
index 00000000000..92196f22022
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeMCInstLower.h
@@ -0,0 +1,50 @@
+//===-- MBlazeMCInstLower.h - Lower MachineInstr to MCInst ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_MCINSTLOWER_H
+#define MBLAZE_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+  class AsmPrinter;
+  class MCAsmInfo;
+  class MCContext;
+  class MCInst;
+  class MCOperand;
+  class MCSymbol;
+  class MachineInstr;
+  class MachineModuleInfoMachO;
+  class MachineOperand;
+  class Mangler;
+
+  /// MBlazeMCInstLower - This class is used to lower an MachineInstr
+  /// into an MCInst.
+class LLVM_LIBRARY_VISIBILITY MBlazeMCInstLower {
+  MCContext &Ctx;
+  Mangler &Mang;
+
+  AsmPrinter &Printer;
+public:
+  MBlazeMCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
+    : Ctx(ctx), Mang(mang), Printer(printer) {}
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+  MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/MBlaze/MBlazeMachineFunction.h b/final/lib/Target/MBlaze/MBlazeMachineFunction.h
new file mode 100644
index 00000000000..df395094282
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -0,0 +1,170 @@
+//===-- MBlazeMachineFunctionInfo.h - Private data ----------------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_MACHINE_FUNCTION_INFO_H
+#define MBLAZE_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+namespace llvm {
+
+/// MBlazeFunctionInfo - This class is derived from MachineFunction private
+/// MBlaze target-specific information for each MachineFunction.
+class MBlazeFunctionInfo : public MachineFunctionInfo {
+
+private:
+  /// Holds for each function where on the stack the Frame Pointer must be
+  /// saved. This is used on Prologue and Epilogue to emit FP save/restore
+  int FPStackOffset;
+
+  /// Holds for each function where on the stack the Return Address must be
+  /// saved. This is used on Prologue and Epilogue to emit RA save/restore
+  int RAStackOffset;
+
+  /// MBlazeFIHolder - Holds a FrameIndex and it's Stack Pointer Offset
+  struct MBlazeFIHolder {
+
+    int FI;
+    int SPOffset;
+
+    MBlazeFIHolder(int FrameIndex, int StackPointerOffset)
+      : FI(FrameIndex), SPOffset(StackPointerOffset) {}
+  };
+
+  /// When PIC is used the GP must be saved on the stack on the function
+  /// prologue and must be reloaded from this stack location after every
+  /// call. A reference to its stack location and frame index must be kept
+  /// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
+  MBlazeFIHolder GPHolder;
+
+  /// On LowerFormalArguments the stack size is unknown, so the Stack
+  /// Pointer Offset calculation of "not in register arguments" must be
+  /// postponed to emitPrologue.
+  SmallVector<MBlazeFIHolder, 16> FnLoadArgs;
+  bool HasLoadArgs;
+
+  // When VarArgs, we must write registers back to caller stack, preserving
+  // on register arguments. Since the stack size is unknown on
+  // LowerFormalArguments, the Stack Pointer Offset calculation must be
+  // postponed to emitPrologue.
+  SmallVector<MBlazeFIHolder, 4> FnStoreVarArgs;
+  bool HasStoreVarArgs;
+
+  // When determining the final stack layout some of the frame indexes may
+  // be replaced by new frame indexes that reside in the caller's stack
+  // frame. The replacements are recorded in this structure.
+  DenseMap<int,int> FIReplacements;
+
+  /// SRetReturnReg - Some subtargets require that sret lowering includes
+  /// returning the value of the returned struct in a register. This field
+  /// holds the virtual register into which the sret argument is passed.
+  unsigned SRetReturnReg;
+
+  /// GlobalBaseReg - keeps track of the virtual register initialized for
+  /// use as the global base register. This is used for PIC in some PIC
+  /// relocation models.
+  unsigned GlobalBaseReg;
+
+  // VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
+  /// LiveInFI - keeps track of the frame indexes in a callers stack
+  /// frame that are live into a function.
+  SmallVector<int, 16> LiveInFI;
+
+public:
+  MBlazeFunctionInfo(MachineFunction& MF)
+  : FPStackOffset(0), RAStackOffset(0), GPHolder(-1,-1), HasLoadArgs(false),
+    HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0),
+    VarArgsFrameIndex(0), LiveInFI()
+  {}
+
+  int getFPStackOffset() const { return FPStackOffset; }
+  void setFPStackOffset(int Off) { FPStackOffset = Off; }
+
+  int getRAStackOffset() const { return RAStackOffset; }
+  void setRAStackOffset(int Off) { RAStackOffset = Off; }
+
+  int getGPStackOffset() const { return GPHolder.SPOffset; }
+  int getGPFI() const { return GPHolder.FI; }
+  void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; }
+  void setGPFI(int FI) { GPHolder.FI = FI; }
+  bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; }
+
+  bool hasLoadArgs() const { return HasLoadArgs; }
+  bool hasStoreVarArgs() const { return HasStoreVarArgs; }
+
+  void recordLiveIn(int FI) {
+    LiveInFI.push_back(FI);
+  }
+
+  bool isLiveIn(int FI) {
+    for (unsigned i = 0, e = LiveInFI.size(); i < e; ++i)
+      if (FI == LiveInFI[i]) return true;
+
+    return false;
+  }
+
+  const SmallVector<int, 16>& getLiveIn() const { return LiveInFI; }
+
+  void recordReplacement(int OFI, int NFI) {
+    FIReplacements.insert(std::make_pair(OFI,NFI));
+  }
+
+  bool hasReplacement(int OFI) const {
+    return FIReplacements.find(OFI) != FIReplacements.end();
+  }
+
+  int getReplacement(int OFI) const {
+    return FIReplacements.lookup(OFI);
+  }
+
+  void recordLoadArgsFI(int FI, int SPOffset) {
+    if (!HasLoadArgs) HasLoadArgs=true;
+    FnLoadArgs.push_back(MBlazeFIHolder(FI, SPOffset));
+  }
+
+  void recordStoreVarArgsFI(int FI, int SPOffset) {
+    if (!HasStoreVarArgs) HasStoreVarArgs=true;
+    FnStoreVarArgs.push_back(MBlazeFIHolder(FI, SPOffset));
+  }
+
+  void adjustLoadArgsFI(MachineFrameInfo *MFI) const {
+    if (!hasLoadArgs()) return;
+    for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
+      MFI->setObjectOffset(FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset);
+  }
+
+  void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const {
+    if (!hasStoreVarArgs()) return;
+    for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
+      MFI->setObjectOffset(FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset);
+  }
+
+  unsigned getSRetReturnReg() const { return SRetReturnReg; }
+  void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+  unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+  void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+};
+
+} // end of namespace llvm
+
+#endif // MBLAZE_MACHINE_FUNCTION_INFO_H
diff --git a/final/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/final/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
new file mode 100644
index 00000000000..fa9140d7922
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -0,0 +1,340 @@
+//===- MBlazeRegisterInfo.cpp - MBlaze Register Information -== -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-frame-info"
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+MBlazeRegisterInfo::
+MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii)
+  : MBlazeGenRegisterInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP),
+    Subtarget(ST), TII(tii) {}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+unsigned MBlazeRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+  switch (RegEnum) {
+    case MBlaze::R0     : return 0;
+    case MBlaze::R1     : return 1;
+    case MBlaze::R2     : return 2;
+    case MBlaze::R3     : return 3;
+    case MBlaze::R4     : return 4;
+    case MBlaze::R5     : return 5;
+    case MBlaze::R6     : return 6;
+    case MBlaze::R7     : return 7;
+    case MBlaze::R8     : return 8;
+    case MBlaze::R9     : return 9;
+    case MBlaze::R10    : return 10;
+    case MBlaze::R11    : return 11;
+    case MBlaze::R12    : return 12;
+    case MBlaze::R13    : return 13;
+    case MBlaze::R14    : return 14;
+    case MBlaze::R15    : return 15;
+    case MBlaze::R16    : return 16;
+    case MBlaze::R17    : return 17;
+    case MBlaze::R18    : return 18;
+    case MBlaze::R19    : return 19;
+    case MBlaze::R20    : return 20;
+    case MBlaze::R21    : return 21;
+    case MBlaze::R22    : return 22;
+    case MBlaze::R23    : return 23;
+    case MBlaze::R24    : return 24;
+    case MBlaze::R25    : return 25;
+    case MBlaze::R26    : return 26;
+    case MBlaze::R27    : return 27;
+    case MBlaze::R28    : return 28;
+    case MBlaze::R29    : return 29;
+    case MBlaze::R30    : return 30;
+    case MBlaze::R31    : return 31;
+    case MBlaze::RPC    : return 0x0000;
+    case MBlaze::RMSR   : return 0x0001;
+    case MBlaze::REAR   : return 0x0003;
+    case MBlaze::RESR   : return 0x0005;
+    case MBlaze::RFSR   : return 0x0007;
+    case MBlaze::RBTR   : return 0x000B;
+    case MBlaze::REDR   : return 0x000D;
+    case MBlaze::RPID   : return 0x1000;
+    case MBlaze::RZPR   : return 0x1001;
+    case MBlaze::RTLBX  : return 0x1002;
+    case MBlaze::RTLBLO : return 0x1003;
+    case MBlaze::RTLBHI : return 0x1004;
+    case MBlaze::RPVR0  : return 0x2000;
+    case MBlaze::RPVR1  : return 0x2001;
+    case MBlaze::RPVR2  : return 0x2002;
+    case MBlaze::RPVR3  : return 0x2003;
+    case MBlaze::RPVR4  : return 0x2004;
+    case MBlaze::RPVR5  : return 0x2005;
+    case MBlaze::RPVR6  : return 0x2006;
+    case MBlaze::RPVR7  : return 0x2007;
+    case MBlaze::RPVR8  : return 0x2008;
+    case MBlaze::RPVR9  : return 0x2009;
+    case MBlaze::RPVR10 : return 0x200A;
+    case MBlaze::RPVR11 : return 0x200B;
+    default: llvm_unreachable("Unknown register number!");
+  }
+  return 0; // Not reached
+}
+
+/// getRegisterFromNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+unsigned MBlazeRegisterInfo::getRegisterFromNumbering(unsigned Reg) {
+  switch (Reg) {
+    case 0  : return MBlaze::R0;
+    case 1  : return MBlaze::R1;
+    case 2  : return MBlaze::R2;
+    case 3  : return MBlaze::R3;
+    case 4  : return MBlaze::R4;
+    case 5  : return MBlaze::R5;
+    case 6  : return MBlaze::R6;
+    case 7  : return MBlaze::R7;
+    case 8  : return MBlaze::R8;
+    case 9  : return MBlaze::R9;
+    case 10 : return MBlaze::R10;
+    case 11 : return MBlaze::R11;
+    case 12 : return MBlaze::R12;
+    case 13 : return MBlaze::R13;
+    case 14 : return MBlaze::R14;
+    case 15 : return MBlaze::R15;
+    case 16 : return MBlaze::R16;
+    case 17 : return MBlaze::R17;
+    case 18 : return MBlaze::R18;
+    case 19 : return MBlaze::R19;
+    case 20 : return MBlaze::R20;
+    case 21 : return MBlaze::R21;
+    case 22 : return MBlaze::R22;
+    case 23 : return MBlaze::R23;
+    case 24 : return MBlaze::R24;
+    case 25 : return MBlaze::R25;
+    case 26 : return MBlaze::R26;
+    case 27 : return MBlaze::R27;
+    case 28 : return MBlaze::R28;
+    case 29 : return MBlaze::R29;
+    case 30 : return MBlaze::R30;
+    case 31 : return MBlaze::R31;
+    default: llvm_unreachable("Unknown register number!");
+  }
+  return 0; // Not reached
+}
+
+unsigned MBlazeRegisterInfo::getSpecialRegisterFromNumbering(unsigned Reg) {
+  switch (Reg) {
+    case 0x0000 : return MBlaze::RPC;
+    case 0x0001 : return MBlaze::RMSR;
+    case 0x0003 : return MBlaze::REAR;
+    case 0x0005 : return MBlaze::RESR;
+    case 0x0007 : return MBlaze::RFSR;
+    case 0x000B : return MBlaze::RBTR;
+    case 0x000D : return MBlaze::REDR;
+    case 0x1000 : return MBlaze::RPID;
+    case 0x1001 : return MBlaze::RZPR;
+    case 0x1002 : return MBlaze::RTLBX;
+    case 0x1003 : return MBlaze::RTLBLO;
+    case 0x1004 : return MBlaze::RTLBHI;
+    case 0x2000 : return MBlaze::RPVR0;
+    case 0x2001 : return MBlaze::RPVR1;
+    case 0x2002 : return MBlaze::RPVR2;
+    case 0x2003 : return MBlaze::RPVR3;
+    case 0x2004 : return MBlaze::RPVR4;
+    case 0x2005 : return MBlaze::RPVR5;
+    case 0x2006 : return MBlaze::RPVR6;
+    case 0x2007 : return MBlaze::RPVR7;
+    case 0x2008 : return MBlaze::RPVR8;
+    case 0x2009 : return MBlaze::RPVR9;
+    case 0x200A : return MBlaze::RPVR10;
+    case 0x200B : return MBlaze::RPVR11;
+    default: llvm_unreachable("Unknown register number!");
+  }
+  return 0; // Not reached
+}
+
+unsigned MBlazeRegisterInfo::getPICCallReg() {
+  return MBlaze::R20;
+}
+
+//===----------------------------------------------------------------------===//
+// Callee Saved Registers methods
+//===----------------------------------------------------------------------===//
+
+/// MBlaze Callee Saved Registers
+const unsigned* MBlazeRegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const {
+  // MBlaze callee-save register range is R20 - R31
+  static const unsigned CalleeSavedRegs[] = {
+    MBlaze::R20, MBlaze::R21, MBlaze::R22, MBlaze::R23,
+    MBlaze::R24, MBlaze::R25, MBlaze::R26, MBlaze::R27,
+    MBlaze::R28, MBlaze::R29, MBlaze::R30, MBlaze::R31,
+    0
+  };
+
+  return CalleeSavedRegs;
+}
+
+BitVector MBlazeRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  Reserved.set(MBlaze::R0);
+  Reserved.set(MBlaze::R1);
+  Reserved.set(MBlaze::R2);
+  Reserved.set(MBlaze::R13);
+  Reserved.set(MBlaze::R14);
+  Reserved.set(MBlaze::R15);
+  Reserved.set(MBlaze::R16);
+  Reserved.set(MBlaze::R17);
+  Reserved.set(MBlaze::R18);
+  Reserved.set(MBlaze::R19);
+  return Reserved;
+}
+
+// This function eliminate ADJCALLSTACKDOWN/ADJCALLSTACKUP pseudo instructions
+void MBlazeRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!TFI->hasReservedCallFrame(MF)) {
+    // If we have a frame pointer, turn the adjcallstackup instruction into a
+    // 'addi r1, r1, -<amt>' and the adjcallstackdown instruction into
+    // 'addi r1, r1, <amt>'
+    MachineInstr *Old = I;
+    int Amount = Old->getOperand(0).getImm() + 4;
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = TFI->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      MachineInstr *New;
+      if (Old->getOpcode() == MBlaze::ADJCALLSTACKDOWN) {
+        New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
+                .addReg(MBlaze::R1).addImm(-Amount);
+      } else {
+        assert(Old->getOpcode() == MBlaze::ADJCALLSTACKUP);
+        New = BuildMI(MF,Old->getDebugLoc(),TII.get(MBlaze::ADDIK),MBlaze::R1)
+                .addReg(MBlaze::R1).addImm(Amount);
+      }
+
+      // Replace the pseudo instruction with a new instruction...
+      MBB.insert(I, New);
+    }
+  }
+
+  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+  MBB.erase(I);
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+void MBlazeRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                    RegScavenger *RS) const {
+  MachineInstr &MI = *II;
+  MachineFunction &MF = *MI.getParent()->getParent();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  unsigned i = 0;
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+
+  unsigned oi = i == 2 ? 1 : 2;
+
+  DEBUG(dbgs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
+        dbgs() << "<--------->\n" << MI);
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+  int stackSize  = MFI->getStackSize();
+  int spOffset   = MFI->getObjectOffset(FrameIndex);
+
+  DEBUG(MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+        dbgs() << "FrameIndex : " << FrameIndex << "\n"
+               << "spOffset   : " << spOffset << "\n"
+               << "stackSize  : " << stackSize << "\n"
+               << "isFixed    : " << MFI->isFixedObjectIndex(FrameIndex) << "\n"
+               << "isLiveIn   : " << MBlazeFI->isLiveIn(FrameIndex) << "\n"
+               << "isSpill    : " << MFI->isSpillSlotObjectIndex(FrameIndex)
+               << "\n" );
+
+  // as explained on LowerFormalArguments, detect negative offsets
+  // and adjust SPOffsets considering the final stack size.
+  int Offset = (spOffset < 0) ? (stackSize - spOffset) : spOffset;
+  Offset += MI.getOperand(oi).getImm();
+
+  DEBUG(dbgs() << "Offset     : " << Offset << "\n" << "<--------->\n");
+
+  MI.getOperand(oi).ChangeToImmediate(Offset);
+  MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
+}
+
+void MBlazeRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+  // Set the stack offset where GP must be saved/loaded from.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+  if (MBlazeFI->needGPSaveRestore())
+    MFI->setObjectOffset(MBlazeFI->getGPFI(), MBlazeFI->getGPStackOffset());
+}
+
+unsigned MBlazeRegisterInfo::getRARegister() const {
+  return MBlaze::R15;
+}
+
+unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? MBlaze::R19 : MBlaze::R1;
+}
+
+unsigned MBlazeRegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned MBlazeRegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
+  return MBlazeGenRegisterInfo::getDwarfRegNumFull(RegNo,0);
+}
+
+#include "MBlazeGenRegisterInfo.inc"
+
diff --git a/final/lib/Target/MBlaze/MBlazeRegisterInfo.h b/final/lib/Target/MBlaze/MBlazeRegisterInfo.h
new file mode 100644
index 00000000000..839536d4e7b
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -0,0 +1,80 @@
+//===- MBlazeRegisterInfo.h - MBlaze Register Information Impl --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEREGISTERINFO_H
+#define MBLAZEREGISTERINFO_H
+
+#include "MBlaze.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "MBlazeGenRegisterInfo.h.inc"
+
+namespace llvm {
+class MBlazeSubtarget;
+class TargetInstrInfo;
+class Type;
+
+namespace MBlaze {
+  /// SubregIndex - The index of various sized subregister classes. Note that
+  /// these indices must be kept in sync with the class indices in the
+  /// MBlazeRegisterInfo.td file.
+  enum SubregIndex {
+    SUBREG_FPEVEN = 1, SUBREG_FPODD = 2
+  };
+}
+
+struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
+  const MBlazeSubtarget &Subtarget;
+  const TargetInstrInfo &TII;
+
+  MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget,
+                     const TargetInstrInfo &tii);
+
+  /// getRegisterNumbering - Given the enum value for some register, e.g.
+  /// MBlaze::RA, return the number that it corresponds to (e.g. 31).
+  static unsigned getRegisterNumbering(unsigned RegEnum);
+  static unsigned getRegisterFromNumbering(unsigned RegEnum);
+  static unsigned getSpecialRegisterFromNumbering(unsigned RegEnum);
+
+  /// Get PIC indirect call register
+  static unsigned getPICCallReg();
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  /// Stack Frame Processing Methods
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+  /// Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  /// Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/MBlaze/MBlazeRegisterInfo.td b/final/lib/Target/MBlaze/MBlazeRegisterInfo.td
new file mode 100644
index 00000000000..fbefb22e9f2
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -0,0 +1,190 @@
+//===- MBlazeRegisterInfo.td - MBlaze Register defs --------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the MicroBlaze register file
+//===----------------------------------------------------------------------===//
+
+// We have banks of 32 registers each.
+class MBlazeReg<string n> : Register<n> {
+  field bits<5> Num;
+  let Namespace = "MBlaze";
+}
+
+// Special purpose registers have 15-bit values
+class MBlazeSReg<string n> : Register<n> {
+  field bits<15> Num;
+  let Namespace = "MBlaze";
+}
+
+// MBlaze general purpose registers
+class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> {
+  let Num = num;
+}
+
+// MBlaze special purpose registers
+class MBlazeSPRReg<bits<15> num, string n> : MBlazeSReg<n> {
+  let Num = num;
+}
+
+//===----------------------------------------------------------------------===//
+//  Registers
+//===----------------------------------------------------------------------===//
+
+let Namespace = "MBlaze" in {
+  // General Purpose Registers
+  def R0  : MBlazeGPRReg< 0,  "r0">,   DwarfRegNum<[0]>;
+  def R1  : MBlazeGPRReg< 1,  "r1">,   DwarfRegNum<[1]>;
+  def R2  : MBlazeGPRReg< 2,  "r2">,   DwarfRegNum<[2]>;
+  def R3  : MBlazeGPRReg< 3,  "r3">,   DwarfRegNum<[3]>;
+  def R4  : MBlazeGPRReg< 4,  "r4">,   DwarfRegNum<[5]>;
+  def R5  : MBlazeGPRReg< 5,  "r5">,   DwarfRegNum<[5]>;
+  def R6  : MBlazeGPRReg< 6,  "r6">,   DwarfRegNum<[6]>;
+  def R7  : MBlazeGPRReg< 7,  "r7">,   DwarfRegNum<[7]>;
+  def R8  : MBlazeGPRReg< 8,  "r8">,   DwarfRegNum<[8]>;
+  def R9  : MBlazeGPRReg< 9,  "r9">,   DwarfRegNum<[9]>;
+  def R10 : MBlazeGPRReg< 10, "r10">,  DwarfRegNum<[10]>;
+  def R11 : MBlazeGPRReg< 11, "r11">,  DwarfRegNum<[11]>;
+  def R12 : MBlazeGPRReg< 12, "r12">,  DwarfRegNum<[12]>;
+  def R13 : MBlazeGPRReg< 13, "r13">,  DwarfRegNum<[13]>;
+  def R14 : MBlazeGPRReg< 14, "r14">,  DwarfRegNum<[14]>;
+  def R15 : MBlazeGPRReg< 15, "r15">,  DwarfRegNum<[15]>;
+  def R16 : MBlazeGPRReg< 16, "r16">,  DwarfRegNum<[16]>;
+  def R17 : MBlazeGPRReg< 17, "r17">,  DwarfRegNum<[17]>;
+  def R18 : MBlazeGPRReg< 18, "r18">,  DwarfRegNum<[18]>;
+  def R19 : MBlazeGPRReg< 19, "r19">,  DwarfRegNum<[19]>;
+  def R20 : MBlazeGPRReg< 20, "r20">,  DwarfRegNum<[20]>;
+  def R21 : MBlazeGPRReg< 21, "r21">,  DwarfRegNum<[21]>;
+  def R22 : MBlazeGPRReg< 22, "r22">,  DwarfRegNum<[22]>;
+  def R23 : MBlazeGPRReg< 23, "r23">,  DwarfRegNum<[23]>;
+  def R24 : MBlazeGPRReg< 24, "r24">,  DwarfRegNum<[24]>;
+  def R25 : MBlazeGPRReg< 25, "r25">,  DwarfRegNum<[25]>;
+  def R26 : MBlazeGPRReg< 26, "r26">,  DwarfRegNum<[26]>;
+  def R27 : MBlazeGPRReg< 27, "r27">,  DwarfRegNum<[27]>;
+  def R28 : MBlazeGPRReg< 28, "r28">,  DwarfRegNum<[28]>;
+  def R29 : MBlazeGPRReg< 29, "r29">,  DwarfRegNum<[29]>;
+  def R30 : MBlazeGPRReg< 30, "r30">,  DwarfRegNum<[30]>;
+  def R31 : MBlazeGPRReg< 31, "r31">,  DwarfRegNum<[31]>;
+
+  // Special Purpose Registers
+  def RPC    : MBlazeSPRReg<0x0000, "rpc">,    DwarfRegNum<[32]>;
+  def RMSR   : MBlazeSPRReg<0x0001, "rmsr">,   DwarfRegNum<[33]>;
+  def REAR   : MBlazeSPRReg<0x0003, "rear">,   DwarfRegNum<[34]>;
+  def RESR   : MBlazeSPRReg<0x0005, "resr">,   DwarfRegNum<[35]>;
+  def RFSR   : MBlazeSPRReg<0x0007, "rfsr">,   DwarfRegNum<[36]>;
+  def RBTR   : MBlazeSPRReg<0x000B, "rbtr">,   DwarfRegNum<[37]>;
+  def REDR   : MBlazeSPRReg<0x000D, "redr">,   DwarfRegNum<[38]>;
+  def RPID   : MBlazeSPRReg<0x1000, "rpid">,   DwarfRegNum<[39]>;
+  def RZPR   : MBlazeSPRReg<0x1001, "rzpr">,   DwarfRegNum<[40]>;
+  def RTLBX  : MBlazeSPRReg<0x1002, "rtlbx">,  DwarfRegNum<[41]>;
+  def RTLBLO : MBlazeSPRReg<0x1003, "rtlblo">, DwarfRegNum<[42]>;
+  def RTLBHI : MBlazeSPRReg<0x1004, "rtlbhi">, DwarfRegNum<[43]>;
+  def RPVR0  : MBlazeSPRReg<0x2000, "rpvr0">,  DwarfRegNum<[44]>;
+  def RPVR1  : MBlazeSPRReg<0x2001, "rpvr1">,  DwarfRegNum<[45]>;
+  def RPVR2  : MBlazeSPRReg<0x2002, "rpvr2">,  DwarfRegNum<[46]>;
+  def RPVR3  : MBlazeSPRReg<0x2003, "rpvr3">,  DwarfRegNum<[47]>;
+  def RPVR4  : MBlazeSPRReg<0x2004, "rpvr4">,  DwarfRegNum<[48]>;
+  def RPVR5  : MBlazeSPRReg<0x2005, "rpvr5">,  DwarfRegNum<[49]>;
+  def RPVR6  : MBlazeSPRReg<0x2006, "rpvr6">,  DwarfRegNum<[50]>;
+  def RPVR7  : MBlazeSPRReg<0x2007, "rpvr7">,  DwarfRegNum<[51]>;
+  def RPVR8  : MBlazeSPRReg<0x2008, "rpvr8">,  DwarfRegNum<[52]>;
+  def RPVR9  : MBlazeSPRReg<0x2009, "rpvr9">,  DwarfRegNum<[53]>;
+  def RPVR10 : MBlazeSPRReg<0x200A, "rpvr10">, DwarfRegNum<[54]>;
+  def RPVR11 : MBlazeSPRReg<0x200B, "rpvr11">, DwarfRegNum<[55]>;
+
+  // The carry bit. In the Microblaze this is really bit 29 of the
+  // MSR register but this is the only bit of that register that we
+  // are interested in modeling.
+  def CARRY  : MBlazeSPRReg<0x0000, "rmsr[c]">, DwarfRegNum<[33]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+def GPR : RegisterClass<"MBlaze", [i32,f32], 32,
+  [
+  // Return Values and Arguments
+  R3, R4, R5, R6, R7, R8, R9, R10,
+
+  // Not preserved across procedure calls
+  R11, R12,
+
+  // Callee save
+  R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+
+  // Reserved
+  R0,  // Always zero
+  R1,  // The stack pointer
+  R2,  // Read-only small data area anchor
+  R13, // Read-write small data area anchor
+  R14, // Return address for interrupts
+  R15, // Return address for sub-routines
+  R16, // Return address for trap
+  R17, // Return address for exceptions
+  R18, // Reserved for assembler
+  R19  // The frame-pointer
+  ]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GPRClass::iterator
+    GPRClass::allocation_order_end(const MachineFunction &MF) const {
+      // The last 10 registers on the list above are reserved
+      return end()-10;
+    }
+  }];
+}
+
+def SPR : RegisterClass<"MBlaze", [i32], 32,
+  [
+  // Reserved
+  RPC,
+  RMSR,
+  REAR,
+  RESR,
+  RFSR,
+  RBTR,
+  REDR,
+  RPID,
+  RZPR,
+  RTLBX,
+  RTLBLO,
+  RTLBHI,
+  RPVR0,
+  RPVR1,
+  RPVR2,
+  RPVR3,
+  RPVR4,
+  RPVR5,
+  RPVR6,
+  RPVR7,
+  RPVR8,
+  RPVR9,
+  RPVR10,
+  RPVR11
+  ]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    SPRClass::iterator
+    SPRClass::allocation_order_end(const MachineFunction &MF) const {
+      // None of the special purpose registers are allocatable.
+      return end()-24;
+    }
+  }];
+}
+
+def CRC : RegisterClass<"MBlaze", [i32], 32, [CARRY]> {
+  let CopyCost = -1;
+}
diff --git a/final/lib/Target/MBlaze/MBlazeRelocations.h b/final/lib/Target/MBlaze/MBlazeRelocations.h
new file mode 100644
index 00000000000..c298eda2195
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeRelocations.h
@@ -0,0 +1,47 @@
+//===- MBlazeRelocations.h - MBlaze Code Relocations ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MBlaze target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZERELOCATIONS_H
+#define MBLAZERELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+  namespace MBlaze {
+    enum RelocationType {
+      /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+      /// the value already in memory, after we adjust it for where the PC is.
+      reloc_pcrel_word = 0,
+
+      /// reloc_picrel_word - PIC base relative relocation, add the relocated
+      /// value to the value already in memory, after we adjust it for where the
+      /// PIC base is.
+      reloc_picrel_word = 1,
+
+      /// reloc_absolute_word - absolute relocation, just add the relocated
+      /// value to the value already in memory.
+      reloc_absolute_word = 2,
+
+      /// reloc_absolute_word_sext - absolute relocation, just add the relocated
+      /// value to the value already in memory. In object files, it represents a
+      /// value which must be sign-extended when resolving the relocation.
+      reloc_absolute_word_sext = 3,
+
+      /// reloc_absolute_dword - absolute relocation, just add the relocated
+      /// value to the value already in memory.
+      reloc_absolute_dword = 4
+    };
+  }
+}
+
+#endif
diff --git a/final/lib/Target/MBlaze/MBlazeSchedule.td b/final/lib/Target/MBlaze/MBlazeSchedule.td
new file mode 100644
index 00000000000..ac4d98c9240
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeSchedule.td
@@ -0,0 +1,64 @@
+//===- MBlazeSchedule.td - MBlaze Scheduling Definitions ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across MBlaze chips sets. Based on GCC/MBlaze backend files.
+//===----------------------------------------------------------------------===//
+def ALU     : FuncUnit;
+def IMULDIV : FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for MBlaze
+//===----------------------------------------------------------------------===//
+def IIAlu              : InstrItinClass;
+def IILoad             : InstrItinClass;
+def IIStore            : InstrItinClass;
+def IIXfer             : InstrItinClass;
+def IIBranch           : InstrItinClass;
+def IIHiLo             : InstrItinClass;
+def IIImul             : InstrItinClass;
+def IIIdiv             : InstrItinClass;
+def IIFcvt             : InstrItinClass;
+def IIFmove            : InstrItinClass;
+def IIFcmp             : InstrItinClass;
+def IIFadd             : InstrItinClass;
+def IIFmulSingle       : InstrItinClass;
+def IIFmulDouble       : InstrItinClass;
+def IIFdivSingle       : InstrItinClass;
+def IIFdivDouble       : InstrItinClass;
+def IIFsqrtSingle      : InstrItinClass;
+def IIFsqrtDouble      : InstrItinClass;
+def IIFrecipFsqrtStep  : InstrItinClass;
+def IIPseudo           : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Generic instruction itineraries.
+//===----------------------------------------------------------------------===//
+def MBlazeGenericItineraries : ProcessorItineraries<
+  [ALU, IMULDIV], [], [
+  InstrItinData<IIAlu              , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IILoad             , [InstrStage<3,  [ALU]>]>,
+  InstrItinData<IIStore            , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IIXfer             , [InstrStage<2,  [ALU]>]>,
+  InstrItinData<IIBranch           , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IIHiLo             , [InstrStage<1,  [IMULDIV]>]>,
+  InstrItinData<IIImul             , [InstrStage<17, [IMULDIV]>]>,
+  InstrItinData<IIIdiv             , [InstrStage<38, [IMULDIV]>]>,
+  InstrItinData<IIFcvt             , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IIFmove            , [InstrStage<2,  [ALU]>]>,
+  InstrItinData<IIFcmp             , [InstrStage<3,  [ALU]>]>,
+  InstrItinData<IIFadd             , [InstrStage<4,  [ALU]>]>,
+  InstrItinData<IIFmulSingle       , [InstrStage<7,  [ALU]>]>,
+  InstrItinData<IIFmulDouble       , [InstrStage<8,  [ALU]>]>,
+  InstrItinData<IIFdivSingle       , [InstrStage<23, [ALU]>]>,
+  InstrItinData<IIFdivDouble       , [InstrStage<36, [ALU]>]>,
+  InstrItinData<IIFsqrtSingle      , [InstrStage<54, [ALU]>]>,
+  InstrItinData<IIFsqrtDouble      , [InstrStage<12, [ALU]>]>,
+  InstrItinData<IIFrecipFsqrtStep  , [InstrStage<5,  [ALU]>]>
+]>;
diff --git a/final/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp b/final/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
new file mode 100644
index 00000000000..6a115b22bd4
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- MBlazeSelectionDAGInfo.cpp - MBlaze SelectionDAG Info -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlazeSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-selectiondag-info"
+#include "MBlazeTargetMachine.h"
+using namespace llvm;
+
+MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+MBlazeSelectionDAGInfo::~MBlazeSelectionDAGInfo() {
+}
diff --git a/final/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h b/final/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
new file mode 100644
index 00000000000..9f8e2aad4b2
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- MBlazeSelectionDAGInfo.h - MBlaze SelectionDAG Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MBlaze subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZESELECTIONDAGINFO_H
+#define MBLAZESELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MBlazeTargetMachine;
+
+class MBlazeSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM);
+  ~MBlazeSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/MBlaze/MBlazeSubtarget.cpp b/final/lib/Target/MBlaze/MBlazeSubtarget.cpp
new file mode 100644
index 00000000000..34405215686
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -0,0 +1,31 @@
+//===- MBlazeSubtarget.cpp - MBlaze Subtarget Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlaze specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeSubtarget.h"
+#include "MBlaze.h"
+#include "MBlazeGenSubtarget.inc"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, const std::string &FS):
+  HasPipe3(false), HasBarrel(false), HasDiv(false), HasMul(false),
+  HasFSL(false), HasEFSL(false), HasMSRSet(false), HasException(false),
+  HasPatCmp(false), HasFPU(false), HasESR(false), HasPVR(false),
+  HasMul64(false), HasSqrt(false), HasMMU(false)
+{
+  std::string CPU = "v400";
+  MBlazeArchVersion = V400;
+
+  // Parse features string.
+  ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/final/lib/Target/MBlaze/MBlazeSubtarget.h b/final/lib/Target/MBlaze/MBlazeSubtarget.h
new file mode 100644
index 00000000000..bebb3f773e0
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -0,0 +1,79 @@
+//=====-- MBlazeSubtarget.h - Define Subtarget for the MBlaze -*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZESUBTARGET_H
+#define MBLAZESUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <string>
+
+namespace llvm {
+
+class MBlazeSubtarget : public TargetSubtarget {
+
+protected:
+
+  enum MBlazeArchEnum {
+    V400, V500, V600, V700, V710
+  };
+
+  // MBlaze architecture version
+  MBlazeArchEnum MBlazeArchVersion;
+
+  bool HasPipe3;
+  bool HasBarrel;
+  bool HasDiv;
+  bool HasMul;
+  bool HasFSL;
+  bool HasEFSL;
+  bool HasMSRSet;
+  bool HasException;
+  bool HasPatCmp;
+  bool HasFPU;
+  bool HasESR;
+  bool HasPVR;
+  bool HasMul64;
+  bool HasSqrt;
+  bool HasMMU;
+
+  InstrItineraryData InstrItins;
+
+public:
+
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  MBlazeSubtarget(const std::string &TT, const std::string &FS);
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  std::string ParseSubtargetFeatures(const std::string &FS,
+                                     const std::string &CPU);
+
+  bool hasFPU()    const { return HasFPU; }
+  bool hasSqrt()   const { return HasSqrt; }
+  bool hasMul()    const { return HasMul; }
+  bool hasMul64()  const { return HasMul64; }
+  bool hasDiv()    const { return HasDiv; }
+  bool hasBarrel() const { return HasBarrel; }
+
+  bool isV400() const { return MBlazeArchVersion == V400; }
+  bool isV500() const { return MBlazeArchVersion == V500; }
+  bool isV600() const { return MBlazeArchVersion == V600; }
+  bool isV700() const { return MBlazeArchVersion == V700; }
+  bool isV710() const { return MBlazeArchVersion == V710; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/final/lib/Target/MBlaze/MBlazeTargetMachine.cpp
new file mode 100644
index 00000000000..cd949e1998d
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -0,0 +1,114 @@
+//===-- MBlazeTargetMachine.cpp - Define TargetMachine for MBlaze ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about MBlaze target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeMCAsmInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+  switch (TheTriple.getOS()) {
+  default:
+    return new MBlazeMCAsmInfo();
+  }
+}
+
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+                                    MCContext &Ctx, TargetAsmBackend &TAB,
+                                    raw_ostream &_OS,
+                                    MCCodeEmitter *_Emitter,
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
+  Triple TheTriple(TT);
+  switch (TheTriple.getOS()) {
+  case Triple::Darwin:
+    llvm_unreachable("MBlaze does not support Darwin MACH-O format");
+    return NULL;
+  case Triple::MinGW32:
+  case Triple::Cygwin:
+  case Triple::Win32:
+    llvm_unreachable("MBlaze does not support Windows COFF format");
+    return NULL;
+  default:
+    return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll,
+                             NoExecStack);
+  }
+}
+
+
+extern "C" void LLVMInitializeMBlazeTarget() {
+  // Register the target.
+  RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget);
+
+  // Register the target asm info.
+  RegisterAsmInfoFn A(TheMBlazeTarget, createMCAsmInfo);
+
+  // Register the MC code emitter
+  TargetRegistry::RegisterCodeEmitter(TheMBlazeTarget,
+                                      llvm::createMBlazeMCCodeEmitter);
+
+  // Register the asm backend
+  TargetRegistry::RegisterAsmBackend(TheMBlazeTarget,
+                                     createMBlazeAsmBackend);
+
+  // Register the object streamer
+  TargetRegistry::RegisterObjectStreamer(TheMBlazeTarget,
+                                         createMCStreamer);
+
+}
+
+// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
+// The stack is always 8 byte aligned
+// On function prologue, the stack is created by decrementing
+// its pointer. Once decremented, all references are done with positive
+// offset from the stack/frame pointer, using StackGrowsUp enables
+// an easier handling.
+MBlazeTargetMachine::
+MBlazeTargetMachine(const Target &T, const std::string &TT,
+                    const std::string &FS):
+  LLVMTargetMachine(T, TT),
+  Subtarget(TT, FS),
+  DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
+  InstrInfo(*this),
+  FrameLowering(Subtarget),
+  TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this) {
+  if (getRelocationModel() == Reloc::Default) {
+      setRelocationModel(Reloc::Static);
+  }
+
+  if (getCodeModel() == CodeModel::Default)
+    setCodeModel(CodeModel::Small);
+}
+
+// Install an instruction selector pass using
+// the ISelDag to gen MBlaze code.
+bool MBlazeTargetMachine::addInstSelector(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
+  PM.add(createMBlazeISelDag(*this));
+  return false;
+}
+
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
+// print out the code after the passes.
+bool MBlazeTargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                         CodeGenOpt::Level OptLevel) {
+  PM.add(createMBlazeDelaySlotFillerPass(*this));
+  return true;
+}
diff --git a/final/lib/Target/MBlaze/MBlazeTargetMachine.h b/final/lib/Target/MBlaze/MBlazeTargetMachine.h
new file mode 100644
index 00000000000..45ad0785888
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -0,0 +1,79 @@
+//===-- MBlazeTargetMachine.h - Define TargetMachine for MBlaze --- C++ ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_TARGETMACHINE_H
+#define MBLAZE_TARGETMACHINE_H
+
+#include "MBlazeSubtarget.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeISelLowering.h"
+#include "MBlazeSelectionDAGInfo.h"
+#include "MBlazeIntrinsicInfo.h"
+#include "MBlazeFrameLowering.h"
+#include "MBlazeELFWriterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class formatted_raw_ostream;
+
+  class MBlazeTargetMachine : public LLVMTargetMachine {
+    MBlazeSubtarget        Subtarget;
+    const TargetData       DataLayout; // Calculates type size & alignment
+    MBlazeInstrInfo        InstrInfo;
+    MBlazeFrameLowering    FrameLowering;
+    MBlazeTargetLowering   TLInfo;
+    MBlazeSelectionDAGInfo TSInfo;
+    MBlazeIntrinsicInfo    IntrinsicInfo;
+    MBlazeELFWriterInfo    ELFWriterInfo;
+  public:
+    MBlazeTargetMachine(const Target &T, const std::string &TT,
+                      const std::string &FS);
+
+    virtual const MBlazeInstrInfo *getInstrInfo() const
+    { return &InstrInfo; }
+
+    virtual const TargetFrameLowering *getFrameLowering() const
+    { return &FrameLowering; }
+
+    virtual const MBlazeSubtarget *getSubtargetImpl() const
+    { return &Subtarget; }
+
+    virtual const TargetData *getTargetData() const
+    { return &DataLayout;}
+
+    virtual const MBlazeRegisterInfo *getRegisterInfo() const
+    { return &InstrInfo.getRegisterInfo(); }
+
+    virtual const MBlazeTargetLowering *getTargetLowering() const
+    { return &TLInfo; }
+
+    virtual const MBlazeSelectionDAGInfo* getSelectionDAGInfo() const
+    { return &TSInfo; }
+
+    const TargetIntrinsicInfo *getIntrinsicInfo() const
+    { return &IntrinsicInfo; }
+
+    virtual const MBlazeELFWriterInfo *getELFWriterInfo() const {
+      return &ELFWriterInfo;
+    }
+
+    // Pass Pipeline Configuration
+    virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level Opt);
+    virtual bool addPreEmitPass(PassManagerBase &PM,CodeGenOpt::Level Opt);
+  };
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/final/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
new file mode 100644
index 00000000000..abd1b0b62c7
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -0,0 +1,90 @@
+//===-- MBlazeTargetObjectFile.cpp - MBlaze object files ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeTargetObjectFile.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+void MBlazeTargetObjectFile::
+Initialize(MCContext &Ctx, const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+  SmallDataSection =
+    getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
+
+  SmallBSSSection =
+    getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+                               SectionKind::getBSS());
+
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
+// gp_rel operator.
+static bool IsInSmallSection(uint64_t Size) {
+  return Size > 0 && Size <= 8;
+}
+
+bool MBlazeTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM) const {
+  if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+    return false;
+
+  return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global address should be
+/// placed into small data/bss section.
+bool MBlazeTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+                       SectionKind Kind) const {
+  // Only global variables, not functions.
+  const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+  if (!GVA)
+    return false;
+
+  // We can only do this for datarel or BSS objects for now.
+  if (!Kind.isBSS() && !Kind.isDataRel())
+    return false;
+
+  // If this is a internal constant string, there is a special
+  // section for it, but not in small data/bss.
+  if (Kind.isMergeable1ByteCString())
+    return false;
+
+  const Type *Ty = GV->getType()->getElementType();
+  return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+}
+
+const MCSection *MBlazeTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+  // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
+  // sections?
+
+  // Handle Small Section classification here.
+  if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallBSSSection;
+  if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallDataSection;
+
+  // Otherwise, we work the same as ELF.
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/final/lib/Target/MBlaze/MBlazeTargetObjectFile.h b/final/lib/Target/MBlaze/MBlazeTargetObjectFile.h
new file mode 100644
index 00000000000..c313722427d
--- /dev/null
+++ b/final/lib/Target/MBlaze/MBlazeTargetObjectFile.h
@@ -0,0 +1,40 @@
+//===-- llvm/Target/MBlazeTargetObjectFile.h - MBlaze Obj. Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H
+#define LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+  class MBlazeTargetObjectFile : public TargetLoweringObjectFileELF {
+    const MCSection *SmallDataSection;
+    const MCSection *SmallBSSSection;
+  public:
+
+    void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+    /// IsGlobalInSmallSection - Return true if this global address should be
+    /// placed into small data/bss section.
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM,
+                                SectionKind Kind) const;
+
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM) const;
+
+    const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+                                            SectionKind Kind,
+                                            Mangler *Mang,
+                                            const TargetMachine &TM) const;
+  };
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/MBlaze/Makefile b/final/lib/Target/MBlaze/Makefile
new file mode 100644
index 00000000000..e01c60bb8c6
--- /dev/null
+++ b/final/lib/Target/MBlaze/Makefile
@@ -0,0 +1,25 @@
+##===- lib/Target/MBlaze/Makefile --------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMMBlazeCodeGen
+TARGET = MBlaze
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = MBlazeGenRegisterInfo.h.inc MBlazeGenRegisterNames.inc \
+								MBlazeGenRegisterInfo.inc MBlazeGenInstrNames.inc \
+								MBlazeGenInstrInfo.inc MBlazeGenAsmWriter.inc \
+								MBlazeGenDAGISel.inc MBlazeGenAsmMatcher.inc \
+								MBlazeGenCodeEmitter.inc MBlazeGenCallingConv.inc \
+								MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc \
+								MBlazeGenEDInfo.inc
+
+DIRS = InstPrinter AsmParser Disassembler TargetInfo
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Target/MBlaze/TODO b/final/lib/Target/MBlaze/TODO
new file mode 100644
index 00000000000..2e613eb0ca0
--- /dev/null
+++ b/final/lib/Target/MBlaze/TODO
@@ -0,0 +1,26 @@
+* Writing out ELF files is close to working but the following needs to
+  be examined more closely:
+    - Relocations use 2-byte / 4-byte to terminology in reference to
+      the size of the immediate value being changed. The Xilinx
+      terminology seems to be (???) 4-byte / 8-byte in reference
+      to the number of bytes of instructions that are being changed.
+
+* Code generation seems to work relatively well now but the following
+  needs to be examined more closely:
+    - The stack layout needs to be examined to make sure it meets
+      the standard, especially in regards to var arg functions.
+    - The processor itineraries are copied from a different backend
+      and need to be updated to model the MicroBlaze correctly.
+    - Look at the MBlazeGenFastISel.inc stuff and make use of it
+      if appropriate.
+
+* A basic assembly parser is present now and seems to parse most things.
+  There are a few things that need to be looked at:
+    - There are some instructions that are not generated by the backend
+      and have not been tested as far as the parser is concerned.
+    - The assembly parser does not use any MicroBlaze specific directives.
+      I should investigate if there are MicroBlaze specific directive and,
+      if there are, add them.
+    - The instruction MFS and MTS use special names for some of the
+      special registers that can be accessed. These special register
+      names should be parsed by the assembly parser.
diff --git a/final/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/final/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..40696f63c46
--- /dev/null
+++ b/final/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. 
+                     ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeInfo
+  MBlazeTargetInfo.cpp
+  )
+
+add_dependencies(LLVMMBlazeInfo MBlazeCodeGenTable_gen)
diff --git a/final/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/final/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
new file mode 100644
index 00000000000..16e01dbfdee
--- /dev/null
+++ b/final/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- MBlazeTargetInfo.cpp - MBlaze Target Implementation ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMBlazeTarget;
+
+extern "C" void LLVMInitializeMBlazeTargetInfo() {
+  RegisterTarget<Triple::mblaze> X(TheMBlazeTarget, "mblaze", "MBlaze");
+}
diff --git a/final/lib/Target/MBlaze/TargetInfo/Makefile b/final/lib/Target/MBlaze/TargetInfo/Makefile
new file mode 100644
index 00000000000..fb7ea118a6b
--- /dev/null
+++ b/final/lib/Target/MBlaze/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/MBlaze/TargetInfo/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/MSP430/CMakeLists.txt b/final/lib/Target/MSP430/CMakeLists.txt
new file mode 100644
index 00000000000..2c7cbb64418
--- /dev/null
+++ b/final/lib/Target/MSP430/CMakeLists.txt
@@ -0,0 +1,29 @@
+set(LLVM_TARGET_DEFINITIONS MSP430.td)
+
+tablegen(MSP430GenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(MSP430GenRegisterNames.inc -gen-register-enums)
+tablegen(MSP430GenRegisterInfo.inc -gen-register-desc)
+tablegen(MSP430GenInstrNames.inc -gen-instr-enums)
+tablegen(MSP430GenInstrInfo.inc -gen-instr-desc)
+tablegen(MSP430GenAsmWriter.inc -gen-asm-writer)
+tablegen(MSP430GenDAGISel.inc -gen-dag-isel)
+tablegen(MSP430GenCallingConv.inc -gen-callingconv)
+tablegen(MSP430GenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(MSP430CodeGen
+  MSP430BranchSelector.cpp
+  MSP430ISelDAGToDAG.cpp
+  MSP430ISelLowering.cpp
+  MSP430InstrInfo.cpp
+  MSP430FrameLowering.cpp
+  MSP430MCAsmInfo.cpp
+  MSP430RegisterInfo.cpp
+  MSP430Subtarget.cpp
+  MSP430TargetMachine.cpp
+  MSP430SelectionDAGInfo.cpp
+  MSP430AsmPrinter.cpp
+  MSP430MCInstLower.cpp
+  )
+
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
diff --git a/final/lib/Target/MSP430/InstPrinter/CMakeLists.txt b/final/lib/Target/MSP430/InstPrinter/CMakeLists.txt
new file mode 100644
index 00000000000..f5458d59a82
--- /dev/null
+++ b/final/lib/Target/MSP430/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMSP430AsmPrinter
+  MSP430InstPrinter.cpp
+  )
+add_dependencies(LLVMMSP430AsmPrinter MSP430CodeGenTable_gen)
diff --git a/final/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp b/final/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
new file mode 100644
index 00000000000..e10d4fe7ca1
--- /dev/null
+++ b/final/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.cpp
@@ -0,0 +1,113 @@
+//===-- MSP430InstPrinter.cpp - Convert MSP430 MCInst to assembly syntax --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an MSP430 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MSP430.h"
+#include "MSP430InstPrinter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+
+// Include the auto-generated portion of the assembly writer.
+#include "MSP430GenAsmWriter.inc"
+
+void MSP430InstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+  printInstruction(MI, O);
+}
+
+void MSP430InstPrinter::printPCRelImmOperand(const MCInst *MI, unsigned OpNo,
+                                             raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm())
+    O << Op.getImm();
+  else {
+    assert(Op.isExpr() && "unknown pcrel immediate operand");
+    O << *Op.getExpr();
+  }
+}
+
+void MSP430InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O, const char *Modifier) {
+  assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    O << getRegisterName(Op.getReg());
+  } else if (Op.isImm()) {
+    O << '#' << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << '#' << *Op.getExpr();
+  }
+}
+
+void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O,
+                                           const char *Modifier) {
+  const MCOperand &Base = MI->getOperand(OpNo);
+  const MCOperand &Disp = MI->getOperand(OpNo+1);
+
+  // Print displacement first
+
+  // If the global address expression is a part of displacement field with a
+  // register base, we should not emit any prefix symbol here, e.g.
+  //   mov.w &foo, r1
+  // vs
+  //   mov.w glb(r1), r2
+  // Otherwise (!) msp430-as will silently miscompile the output :(
+  if (!Base.getReg())
+    O << '&';
+
+  if (Disp.isExpr())
+    O << *Disp.getExpr();
+  else {
+    assert(Disp.isImm() && "Expected immediate in displacement field");
+    O << Disp.getImm();
+  }
+
+  // Print register base field
+  if (Base.getReg())
+    O << '(' << getRegisterName(Base.getReg()) << ')';
+}
+
+void MSP430InstPrinter::printCCOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned CC = MI->getOperand(OpNo).getImm();
+
+  switch (CC) {
+  default:
+   llvm_unreachable("Unsupported CC code");
+   break;
+  case MSP430CC::COND_E:
+   O << "eq";
+   break;
+  case MSP430CC::COND_NE:
+   O << "ne";
+   break;
+  case MSP430CC::COND_HS:
+   O << "hs";
+   break;
+  case MSP430CC::COND_LO:
+   O << "lo";
+   break;
+  case MSP430CC::COND_GE:
+   O << "ge";
+   break;
+  case MSP430CC::COND_L:
+   O << 'l';
+   break;
+  }
+}
diff --git a/final/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/final/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
new file mode 100644
index 00000000000..f0e1ce22841
--- /dev/null
+++ b/final/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -0,0 +1,43 @@
+//===-- MSP430InstPrinter.h - Convert MSP430 MCInst to assembly syntax ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a MSP430 MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430INSTPRINTER_H
+#define MSP430INSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+  class MCOperand;
+
+  class MSP430InstPrinter : public MCInstPrinter {
+  public:
+    MSP430InstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {
+    }
+
+    virtual void printInst(const MCInst *MI, raw_ostream &O);
+
+    // Autogenerated by tblgen.
+    void printInstruction(const MCInst *MI, raw_ostream &O);
+    static const char *getRegisterName(unsigned RegNo);
+
+    void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                      const char *Modifier = 0);
+    void printPCRelImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+    void printSrcMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                            const char *Modifier = 0);
+    void printCCOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  };
+}
+
+#endif
diff --git a/final/lib/Target/MSP430/InstPrinter/Makefile b/final/lib/Target/MSP430/InstPrinter/Makefile
new file mode 100644
index 00000000000..a5293ab8a23
--- /dev/null
+++ b/final/lib/Target/MSP430/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/MSP430/AsmPrinter/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMSP430AsmPrinter
+
+# Hack: we need to include 'main' MSP430 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/MSP430/MSP430.h b/final/lib/Target/MSP430/MSP430.h
new file mode 100644
index 00000000000..e74211807c0
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430.h
@@ -0,0 +1,55 @@
+//==-- MSP430.h - Top-level interface for MSP430 representation --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM MSP430 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_H
+#define LLVM_TARGET_MSP430_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace MSP430CC {
+  // MSP430 specific condition code.
+  enum CondCodes {
+    COND_E  = 0,  // aka COND_Z
+    COND_NE = 1,  // aka COND_NZ
+    COND_HS = 2,  // aka COND_C
+    COND_LO = 3,  // aka COND_NC
+    COND_GE = 4,
+    COND_L  = 5,
+
+    COND_INVALID = -1
+  };
+}
+
+namespace llvm {
+  class MSP430TargetMachine;
+  class FunctionPass;
+  class formatted_raw_ostream;
+
+  FunctionPass *createMSP430ISelDag(MSP430TargetMachine &TM,
+                                    CodeGenOpt::Level OptLevel);
+
+  FunctionPass *createMSP430BranchSelectionPass();
+
+  extern Target TheMSP430Target;
+
+} // end namespace llvm;
+
+// Defines symbolic names for MSP430 registers.
+// This defines a mapping from register name to register number.
+#include "MSP430GenRegisterNames.inc"
+
+// Defines symbolic names for the MSP430 instructions.
+#include "MSP430GenInstrNames.inc"
+
+#endif
diff --git a/final/lib/Target/MSP430/MSP430.td b/final/lib/Target/MSP430/MSP430.td
new file mode 100644
index 00000000000..5cc5e6e3d7c
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430.td
@@ -0,0 +1,66 @@
+//===- MSP430.td - Describe the MSP430 Target Machine ---------*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the MSP430 target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features. 
+//===----------------------------------------------------------------------===//
+def FeatureX
+ : SubtargetFeature<"ext", "ExtendedInsts", "true",
+                    "Enable MSP430-X extensions">;
+
+//===----------------------------------------------------------------------===//
+// MSP430 supported processors.
+//===----------------------------------------------------------------------===//
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic",         []>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "MSP430RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "MSP430CallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MSP430InstrInfo.td"
+
+def MSP430InstrInfo : InstrInfo;
+
+def MSP430InstPrinter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def MSP430 : Target {
+  let InstructionSet = MSP430InstrInfo;
+  let AssemblyWriters = [MSP430InstPrinter];
+}
+
diff --git a/final/lib/Target/MSP430/MSP430AsmPrinter.cpp b/final/lib/Target/MSP430/MSP430AsmPrinter.cpp
new file mode 100644
index 00000000000..a1a7f44c19c
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -0,0 +1,179 @@
+//===-- MSP430AsmPrinter.cpp - MSP430 LLVM assembly writer ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the MSP430 assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MCAsmInfo.h"
+#include "MSP430MCInstLower.h"
+#include "MSP430TargetMachine.h"
+#include "InstPrinter/MSP430InstPrinter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class MSP430AsmPrinter : public AsmPrinter {
+  public:
+    MSP430AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "MSP430 Assembly Printer";
+    }
+
+    void printOperand(const MachineInstr *MI, int OpNum,
+                      raw_ostream &O, const char* Modifier = 0);
+    void printSrcMemOperand(const MachineInstr *MI, int OpNum,
+                            raw_ostream &O);
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI,
+                               unsigned OpNo, unsigned AsmVariant,
+                               const char *ExtraCode, raw_ostream &O);
+    void EmitInstruction(const MachineInstr *MI);
+  };
+} // end of anonymous namespace
+
+
+void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+                                    raw_ostream &O, const char *Modifier) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  switch (MO.getType()) {
+  default: assert(0 && "Not implemented yet!");
+  case MachineOperand::MO_Register:
+    O << MSP430InstPrinter::getRegisterName(MO.getReg());
+    return;
+  case MachineOperand::MO_Immediate:
+    if (!Modifier || strcmp(Modifier, "nohash"))
+      O << '#';
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_GlobalAddress: {
+    bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
+    uint64_t Offset = MO.getOffset();
+
+    // If the global address expression is a part of displacement field with a
+    // register base, we should not emit any prefix symbol here, e.g.
+    //   mov.w &foo, r1
+    // vs
+    //   mov.w glb(r1), r2
+    // Otherwise (!) msp430-as will silently miscompile the output :(
+    if (!Modifier || strcmp(Modifier, "nohash"))
+      O << (isMemOp ? '&' : '#');
+    if (Offset)
+      O << '(' << Offset << '+';
+
+    O << *Mang->getSymbol(MO.getGlobal());
+
+    if (Offset)
+      O << ')';
+
+    return;
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
+    O << (isMemOp ? '&' : '#');
+    O << MAI->getGlobalPrefix() << MO.getSymbolName();
+    return;
+  }
+  }
+}
+
+void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
+                                          raw_ostream &O) {
+  const MachineOperand &Base = MI->getOperand(OpNum);
+  const MachineOperand &Disp = MI->getOperand(OpNum+1);
+
+  // Print displacement first
+
+  // Imm here is in fact global address - print extra modifier.
+  if (Disp.isImm() && !Base.getReg())
+    O << '&';
+  printOperand(MI, OpNum+1, O, "nohash");
+
+  // Print register base field
+  if (Base.getReg()) {
+    O << '(';
+    printOperand(MI, OpNum, O);
+    O << ')';
+  }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool MSP430AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                       unsigned AsmVariant,
+                                       const char *ExtraCode, raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                             unsigned OpNo, unsigned AsmVariant,
+                                             const char *ExtraCode,
+                                             raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    return true; // Unknown modifier.
+  }
+  printSrcMemOperand(MI, OpNo, O);
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this);
+
+  MCInst TmpInst;
+  MCInstLowering.Lower(MI, TmpInst);
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
+                                                unsigned SyntaxVariant,
+                                                const MCAsmInfo &MAI) {
+  if (SyntaxVariant == 0)
+    return new MSP430InstPrinter(MAI);
+  return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMSP430AsmPrinter() {
+  RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target);
+  TargetRegistry::RegisterMCInstPrinter(TheMSP430Target,
+                                        createMSP430MCInstPrinter);
+}
diff --git a/final/lib/Target/MSP430/MSP430BranchSelector.cpp b/final/lib/Target/MSP430/MSP430BranchSelector.cpp
new file mode 100644
index 00000000000..bd644435c76
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -0,0 +1,180 @@
+//===-- MSP430BranchSelector.cpp - Emit long conditional branches--*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that scans a machine function to determine which
+// conditional branches need more than 10 bits of displacement to reach their
+// target basic block.  It does this in two passes; a calculation of basic block
+// positions pass, and a branch pseudo op to machine branch opcode pass.  This
+// pass should be run last, just before the assembly printer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-branch-select"
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+STATISTIC(NumExpanded, "Number of branches expanded to long format");
+
+namespace {
+  struct MSP430BSel : public MachineFunctionPass {
+    static char ID;
+    MSP430BSel() : MachineFunctionPass(ID) {}
+
+    /// BlockSizes - The sizes of the basic blocks in the function.
+    std::vector<unsigned> BlockSizes;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "MSP430 Branch Selector";
+    }
+  };
+  char MSP430BSel::ID = 0;
+}
+
+/// createMSP430BranchSelectionPass - returns an instance of the Branch
+/// Selection Pass
+///
+FunctionPass *llvm::createMSP430BranchSelectionPass() {
+  return new MSP430BSel();
+}
+
+bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
+  const MSP430InstrInfo *TII =
+             static_cast<const MSP430InstrInfo*>(Fn.getTarget().getInstrInfo());
+  // Give the blocks of the function a dense, in-order, numbering.
+  Fn.RenumberBlocks();
+  BlockSizes.resize(Fn.getNumBlockIDs());
+
+  // Measure each MBB and compute a size for the entire function.
+  unsigned FuncSize = 0;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock *MBB = MFI;
+
+    unsigned BlockSize = 0;
+    for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+         MBBI != EE; ++MBBI)
+      BlockSize += TII->GetInstSizeInBytes(MBBI);
+
+    BlockSizes[MBB->getNumber()] = BlockSize;
+    FuncSize += BlockSize;
+  }
+
+  // If the entire function is smaller than the displacement of a branch field,
+  // we know we don't need to shrink any branches in this function.  This is a
+  // common case.
+  if (FuncSize < (1 << 9)) {
+    BlockSizes.clear();
+    return false;
+  }
+
+  // For each conditional branch, if the offset to its destination is larger
+  // than the offset field allows, transform it into a long branch sequence
+  // like this:
+  //   short branch:
+  //     bCC MBB
+  //   long branch:
+  //     b!CC $PC+6
+  //     b MBB
+  //
+  bool MadeChange = true;
+  bool EverMadeChange = false;
+  while (MadeChange) {
+    // Iteratively expand branches until we reach a fixed point.
+    MadeChange = false;
+
+    for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+         ++MFI) {
+      MachineBasicBlock &MBB = *MFI;
+      unsigned MBBStartOffset = 0;
+      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+           I != E; ++I) {
+        if ((I->getOpcode() != MSP430::JCC || I->getOperand(0).isImm()) &&
+            I->getOpcode() != MSP430::JMP) {
+          MBBStartOffset += TII->GetInstSizeInBytes(I);
+          continue;
+        }
+
+        // Determine the offset from the current branch to the destination
+        // block.
+        MachineBasicBlock *Dest = I->getOperand(0).getMBB();
+
+        int BranchSize;
+        if (Dest->getNumber() <= MBB.getNumber()) {
+          // If this is a backwards branch, the delta is the offset from the
+          // start of this block to this branch, plus the sizes of all blocks
+          // from this block to the dest.
+          BranchSize = MBBStartOffset;
+
+          for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
+            BranchSize += BlockSizes[i];
+        } else {
+          // Otherwise, add the size of the blocks between this block and the
+          // dest to the number of bytes left in this block.
+          BranchSize = -MBBStartOffset;
+
+          for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
+            BranchSize += BlockSizes[i];
+        }
+
+        // If this branch is in range, ignore it.
+        if (isInt<10>(BranchSize)) {
+          MBBStartOffset += 2;
+          continue;
+        }
+
+        // Otherwise, we have to expand it to a long branch.
+        unsigned NewSize;
+        MachineInstr *OldBranch = I;
+        DebugLoc dl = OldBranch->getDebugLoc();
+
+        if (I->getOpcode() == MSP430::JMP) {
+          NewSize = 4;
+        } else {
+          // The BCC operands are:
+          // 0. MSP430 branch predicate
+          // 1. Target MBB
+          SmallVector<MachineOperand, 1> Cond;
+          Cond.push_back(I->getOperand(1));
+
+          // Jump over the uncond branch inst (i.e. $+6) on opposite condition.
+          TII->ReverseBranchCondition(Cond);
+          BuildMI(MBB, I, dl, TII->get(MSP430::JCC))
+            .addImm(4).addOperand(Cond[0]);
+
+          NewSize = 6;
+        }
+        // Uncond branch to the real destination.
+        I = BuildMI(MBB, I, dl, TII->get(MSP430::Bi)).addMBB(Dest);
+
+        // Remove the old branch from the function.
+        OldBranch->eraseFromParent();
+
+        // Remember that this instruction is NewSize bytes, increase the size of the
+        // block by NewSize-2, remember to iterate.
+        BlockSizes[MBB.getNumber()] += NewSize-2;
+        MBBStartOffset += NewSize;
+
+        ++NumExpanded;
+        MadeChange = true;
+      }
+    }
+    EverMadeChange |= MadeChange;
+  }
+
+  BlockSizes.clear();
+  return true;
+}
diff --git a/final/lib/Target/MSP430/MSP430CallingConv.td b/final/lib/Target/MSP430/MSP430CallingConv.td
new file mode 100644
index 00000000000..ad27cc9122a
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430CallingConv.td
@@ -0,0 +1,37 @@
+//==- MSP430CallingConv.td - Calling Conventions for MSP430 -*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for MSP430 architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MSP430 Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_MSP430 : CallingConv<[
+  // i8 are returned in registers R15B, R14B, R13B, R12B
+  CCIfType<[i8], CCAssignToReg<[R15B, R14B, R13B, R12B]>>,
+
+  // i16 are returned in registers R15, R14, R13, R12
+  CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_MSP430 : CallingConv<[
+  // Promote i8 arguments to i16.
+  CCIfType<[i8], CCPromoteToType<i16>>,
+
+  // The first 4 integer arguments of non-varargs functions are passed in
+  // integer registers.
+  CCIfNotVarArg<CCIfType<[i16], CCAssignToReg<[R15W, R14W, R13W, R12W]>>>,
+
+  // Integer values get stored in stack slots that are 2 bytes in
+  // size and 2-byte aligned.
+  CCIfType<[i16], CCAssignToStack<2, 2>>
+]>;
diff --git a/final/lib/Target/MSP430/MSP430FrameLowering.cpp b/final/lib/Target/MSP430/MSP430FrameLowering.cpp
new file mode 100644
index 00000000000..c99f4ab6c2f
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -0,0 +1,223 @@
+//======-- MSP430FrameLowering.cpp - MSP430 Frame Information -------=========//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430FrameLowering.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  return (DisableFramePointerElim(MF) ||
+          MF.getFrameInfo()->hasVarSizedObjects() ||
+          MFI->isFrameAddressTaken());
+}
+
+bool MSP430FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void MSP430FrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+  const MSP430InstrInfo &TII =
+    *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  uint64_t StackSize = MFI->getStackSize();
+
+  uint64_t NumBytes = 0;
+  if (hasFP(MF)) {
+    // Calculate required stack adjustment
+    uint64_t FrameSize = StackSize - 2;
+    NumBytes = FrameSize - MSP430FI->getCalleeSavedFrameSize();
+
+    // Get the offset of the stack slot for the EBP register... which is
+    // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+    // Update the frame offset adjustment.
+    MFI->setOffsetAdjustment(-NumBytes);
+
+    // Save FPW into the appropriate stack slot...
+    BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r))
+      .addReg(MSP430::FPW, RegState::Kill);
+
+    // Update FPW with the new base value...
+    BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::FPW)
+      .addReg(MSP430::SPW);
+
+    // Mark the FramePtr as live-in in every block except the entry.
+    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+         I != E; ++I)
+      I->addLiveIn(MSP430::FPW);
+
+  } else
+    NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
+
+  // Skip the callee-saved push instructions.
+  while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r))
+    ++MBBI;
+
+  if (MBBI != MBB.end())
+    DL = MBBI->getDebugLoc();
+
+  if (NumBytes) { // adjust stack pointer: SPW -= numbytes
+    // If there is an SUB16ri of SPW immediately before this instruction, merge
+    // the two.
+    //NumBytes -= mergeSPUpdates(MBB, MBBI, true);
+    // If there is an ADD16ri or SUB16ri of SPW immediately after this
+    // instruction, merge the two instructions.
+    // mergeSPUpdatesDown(MBB, MBBI, &NumBytes);
+
+    if (NumBytes) {
+      MachineInstr *MI =
+        BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SPW)
+        .addReg(MSP430::SPW).addImm(NumBytes);
+      // The SRW implicit def is dead.
+      MI->getOperand(3).setIsDead();
+    }
+  }
+}
+
+void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
+                                       MachineBasicBlock &MBB) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
+  const MSP430InstrInfo &TII =
+    *static_cast<const MSP430InstrInfo*>(MF.getTarget().getInstrInfo());
+
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  unsigned RetOpcode = MBBI->getOpcode();
+  DebugLoc DL = MBBI->getDebugLoc();
+
+  switch (RetOpcode) {
+  case MSP430::RET:
+  case MSP430::RETI: break;  // These are ok
+  default:
+    llvm_unreachable("Can only insert epilog into returning blocks");
+  }
+
+  // Get the number of bytes to allocate from the FrameInfo
+  uint64_t StackSize = MFI->getStackSize();
+  unsigned CSSize = MSP430FI->getCalleeSavedFrameSize();
+  uint64_t NumBytes = 0;
+
+  if (hasFP(MF)) {
+    // Calculate required stack adjustment
+    uint64_t FrameSize = StackSize - 2;
+    NumBytes = FrameSize - CSSize;
+
+    // pop FPW.
+    BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::FPW);
+  } else
+    NumBytes = StackSize - CSSize;
+
+  // Skip the callee-saved pop instructions.
+  while (MBBI != MBB.begin()) {
+    MachineBasicBlock::iterator PI = prior(MBBI);
+    unsigned Opc = PI->getOpcode();
+    if (Opc != MSP430::POP16r && !PI->getDesc().isTerminator())
+      break;
+    --MBBI;
+  }
+
+  DL = MBBI->getDebugLoc();
+
+  // If there is an ADD16ri or SUB16ri of SPW immediately before this
+  // instruction, merge the two instructions.
+  //if (NumBytes || MFI->hasVarSizedObjects())
+  //  mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+  if (MFI->hasVarSizedObjects()) {
+    BuildMI(MBB, MBBI, DL,
+            TII.get(MSP430::MOV16rr), MSP430::SPW).addReg(MSP430::FPW);
+    if (CSSize) {
+      MachineInstr *MI =
+        BuildMI(MBB, MBBI, DL,
+                TII.get(MSP430::SUB16ri), MSP430::SPW)
+        .addReg(MSP430::SPW).addImm(CSSize);
+      // The SRW implicit def is dead.
+      MI->getOperand(3).setIsDead();
+    }
+  } else {
+    // adjust stack pointer back: SPW += numbytes
+    if (NumBytes) {
+      MachineInstr *MI =
+        BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SPW)
+        .addReg(MSP430::SPW).addImm(NumBytes);
+      // The SRW implicit def is dead.
+      MI->getOperand(3).setIsDead();
+    }
+  }
+}
+
+// FIXME: Can we eleminate these in favour of generic code?
+bool
+MSP430FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>();
+  MFI->setCalleeSavedFrameSize(CSI.size() * 2);
+
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(Reg);
+    BuildMI(MBB, MI, DL, TII.get(MSP430::PUSH16r))
+      .addReg(Reg, RegState::Kill);
+  }
+  return true;
+}
+
+bool
+MSP430FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                                 MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                        const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+    BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), CSI[i].getReg());
+
+  return true;
+}
diff --git a/final/lib/Target/MSP430/MSP430FrameLowering.h b/final/lib/Target/MSP430/MSP430FrameLowering.h
new file mode 100644
index 00000000000..b636827da7b
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430FrameLowering.h
@@ -0,0 +1,53 @@
+//==- MSP430FrameLowering.h - Define frame lowering for MSP430 --*- C++ -*--==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430_FRAMEINFO_H
+#define MSP430_FRAMEINFO_H
+
+#include "MSP430.h"
+#include "MSP430Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class MSP430Subtarget;
+
+class MSP430FrameLowering : public TargetFrameLowering {
+protected:
+  const MSP430Subtarget &STI;
+
+public:
+  explicit MSP430FrameLowering(const MSP430Subtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+  bool hasReservedCallFrame(const MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/final/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
new file mode 100644
index 00000000000..5430d433b65
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -0,0 +1,492 @@
+//===-- MSP430ISelDAGToDAG.cpp - A dag to dag inst selector for MSP430 ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MSP430 target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  struct MSP430ISelAddressMode {
+    enum {
+      RegBase,
+      FrameIndexBase
+    } BaseType;
+
+    struct {            // This is really a union, discriminated by BaseType!
+      SDValue Reg;
+      int FrameIndex;
+    } Base;
+
+    int16_t Disp;
+    const GlobalValue *GV;
+    const Constant *CP;
+    const BlockAddress *BlockAddr;
+    const char *ES;
+    int JT;
+    unsigned Align;    // CP alignment.
+
+    MSP430ISelAddressMode()
+      : BaseType(RegBase), Disp(0), GV(0), CP(0), BlockAddr(0),
+        ES(0), JT(-1), Align(0) {
+    }
+
+    bool hasSymbolicDisplacement() const {
+      return GV != 0 || CP != 0 || ES != 0 || JT != -1;
+    }
+
+    void dump() {
+      errs() << "MSP430ISelAddressMode " << this << '\n';
+      if (BaseType == RegBase && Base.Reg.getNode() != 0) {
+        errs() << "Base.Reg ";
+        Base.Reg.getNode()->dump();
+      } else if (BaseType == FrameIndexBase) {
+        errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
+      }
+      errs() << " Disp " << Disp << '\n';
+      if (GV) {
+        errs() << "GV ";
+        GV->dump();
+      } else if (CP) {
+        errs() << " CP ";
+        CP->dump();
+        errs() << " Align" << Align << '\n';
+      } else if (ES) {
+        errs() << "ES ";
+        errs() << ES << '\n';
+      } else if (JT != -1)
+        errs() << " JT" << JT << " Align" << Align << '\n';
+    }
+  };
+}
+
+/// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+  class MSP430DAGToDAGISel : public SelectionDAGISel {
+    const MSP430TargetLowering &Lowering;
+    const MSP430Subtarget &Subtarget;
+
+  public:
+    MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(TM, OptLevel),
+        Lowering(*TM.getTargetLowering()),
+        Subtarget(*TM.getSubtargetImpl()) { }
+
+    virtual const char *getPassName() const {
+      return "MSP430 DAG->DAG Pattern Instruction Selection";
+    }
+
+    bool MatchAddress(SDValue N, MSP430ISelAddressMode &AM);
+    bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM);
+    bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM);
+
+    virtual bool
+    SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                                 std::vector<SDValue> &OutOps);
+
+    // Include the pieces autogenerated from the target description.
+  #include "MSP430GenDAGISel.inc"
+
+  private:
+    SDNode *Select(SDNode *N);
+    SDNode *SelectIndexedLoad(SDNode *Op);
+    SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2,
+                               unsigned Opc8, unsigned Opc16);
+
+    bool SelectAddr(SDValue Addr, SDValue &Base, SDValue &Disp);
+  };
+}  // end anonymous namespace
+
+/// createMSP430ISelDag - This pass converts a legalized DAG into a
+/// MSP430-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createMSP430ISelDag(MSP430TargetMachine &TM,
+                                        CodeGenOpt::Level OptLevel) {
+  return new MSP430DAGToDAGISel(TM, OptLevel);
+}
+
+
+/// MatchWrapper - Try to match MSP430ISD::Wrapper node into an addressing mode.
+/// These wrap things that will resolve down into a symbol reference.  If no
+/// match is possible, this returns true, otherwise it returns false.
+bool MSP430DAGToDAGISel::MatchWrapper(SDValue N, MSP430ISelAddressMode &AM) {
+  // If the addressing mode already has a symbol as the displacement, we can
+  // never match another symbol.
+  if (AM.hasSymbolicDisplacement())
+    return true;
+
+  SDValue N0 = N.getOperand(0);
+
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+    AM.GV = G->getGlobal();
+    AM.Disp += G->getOffset();
+    //AM.SymbolFlags = G->getTargetFlags();
+  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+    AM.CP = CP->getConstVal();
+    AM.Align = CP->getAlignment();
+    AM.Disp += CP->getOffset();
+    //AM.SymbolFlags = CP->getTargetFlags();
+  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+    AM.ES = S->getSymbol();
+    //AM.SymbolFlags = S->getTargetFlags();
+  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+    AM.JT = J->getIndex();
+    //AM.SymbolFlags = J->getTargetFlags();
+  } else {
+    AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+    //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+  }
+  return false;
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool MSP430DAGToDAGISel::MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM) {
+  // Is the base register already occupied?
+  if (AM.BaseType != MSP430ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
+    // If so, we cannot select it.
+    return true;
+  }
+
+  // Default, generate it as a register.
+  AM.BaseType = MSP430ISelAddressMode::RegBase;
+  AM.Base.Reg = N;
+  return false;
+}
+
+bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) {
+  DEBUG(errs() << "MatchAddress: "; AM.dump());
+
+  switch (N.getOpcode()) {
+  default: break;
+  case ISD::Constant: {
+    uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+    AM.Disp += Val;
+    return false;
+  }
+
+  case MSP430ISD::Wrapper:
+    if (!MatchWrapper(N, AM))
+      return false;
+    break;
+
+  case ISD::FrameIndex:
+    if (AM.BaseType == MSP430ISelAddressMode::RegBase
+        && AM.Base.Reg.getNode() == 0) {
+      AM.BaseType = MSP430ISelAddressMode::FrameIndexBase;
+      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+      return false;
+    }
+    break;
+
+  case ISD::ADD: {
+    MSP430ISelAddressMode Backup = AM;
+    if (!MatchAddress(N.getNode()->getOperand(0), AM) &&
+        !MatchAddress(N.getNode()->getOperand(1), AM))
+      return false;
+    AM = Backup;
+    if (!MatchAddress(N.getNode()->getOperand(1), AM) &&
+        !MatchAddress(N.getNode()->getOperand(0), AM))
+      return false;
+    AM = Backup;
+
+    break;
+  }
+
+  case ISD::OR:
+    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      MSP430ISelAddressMode Backup = AM;
+      uint64_t Offset = CN->getSExtValue();
+      // Start with the LHS as an addr mode.
+      if (!MatchAddress(N.getOperand(0), AM) &&
+          // Address could not have picked a GV address for the displacement.
+          AM.GV == NULL &&
+          // Check to see if the LHS & C is zero.
+          CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+        AM.Disp += Offset;
+        return false;
+      }
+      AM = Backup;
+    }
+    break;
+  }
+
+  return MatchAddressBase(N, AM);
+}
+
+/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// It returns the operands which make up the maximal addressing mode it can
+/// match by reference.
+bool MSP430DAGToDAGISel::SelectAddr(SDValue N,
+                                    SDValue &Base, SDValue &Disp) {
+  MSP430ISelAddressMode AM;
+
+  if (MatchAddress(N, AM))
+    return false;
+
+  EVT VT = N.getValueType();
+  if (AM.BaseType == MSP430ISelAddressMode::RegBase) {
+    if (!AM.Base.Reg.getNode())
+      AM.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  Base  = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase) ?
+    CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
+    AM.Base.Reg;
+
+  if (AM.GV)
+    Disp = CurDAG->getTargetGlobalAddress(AM.GV, N->getDebugLoc(),
+                                          MVT::i16, AM.Disp,
+                                          0/*AM.SymbolFlags*/);
+  else if (AM.CP)
+    Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i16,
+                                         AM.Align, AM.Disp, 0/*AM.SymbolFlags*/);
+  else if (AM.ES)
+    Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i16, 0/*AM.SymbolFlags*/);
+  else if (AM.JT != -1)
+    Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i16, 0/*AM.SymbolFlags*/);
+  else if (AM.BlockAddr)
+    Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32,
+                                   true, 0/*AM.SymbolFlags*/);
+  else
+    Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i16);
+
+  return true;
+}
+
+bool MSP430DAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                             std::vector<SDValue> &OutOps) {
+  SDValue Op0, Op1;
+  switch (ConstraintCode) {
+  default: return true;
+  case 'm':   // memory
+    if (!SelectAddr(Op, Op0, Op1))
+      return true;
+    break;
+  }
+
+  OutOps.push_back(Op0);
+  OutOps.push_back(Op1);
+  return false;
+}
+
+static bool isValidIndexedLoad(const LoadSDNode *LD) {
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+  if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD)
+    return false;
+
+  EVT VT = LD->getMemoryVT();
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::i8:
+    // Sanity check
+    if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 1)
+      return false;
+
+    break;
+  case MVT::i16:
+    // Sanity check
+    if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 2)
+      return false;
+
+    break;
+  default:
+    return false;
+  }
+
+  return true;
+}
+
+SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDNode *N) {
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  if (!isValidIndexedLoad(LD))
+    return NULL;
+
+  MVT VT = LD->getMemoryVT().getSimpleVT();
+
+  unsigned Opcode = 0;
+  switch (VT.SimpleTy) {
+  case MVT::i8:
+    Opcode = MSP430::MOV8rm_POST;
+    break;
+  case MVT::i16:
+    Opcode = MSP430::MOV16rm_POST;
+    break;
+  default:
+    return NULL;
+  }
+
+   return CurDAG->getMachineNode(Opcode, N->getDebugLoc(),
+                                 VT, MVT::i16, MVT::Other,
+                                 LD->getBasePtr(), LD->getChain());
+}
+
+SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
+                                               SDValue N1, SDValue N2,
+                                               unsigned Opc8, unsigned Opc16) {
+  if (N1.getOpcode() == ISD::LOAD &&
+      N1.hasOneUse() &&
+      IsLegalToFold(N1, Op, Op, OptLevel)) {
+    LoadSDNode *LD = cast<LoadSDNode>(N1);
+    if (!isValidIndexedLoad(LD))
+      return NULL;
+
+    MVT VT = LD->getMemoryVT().getSimpleVT();
+    unsigned Opc = (VT == MVT::i16 ? Opc16 : Opc8);
+    MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+    MemRefs0[0] = cast<MemSDNode>(N1)->getMemOperand();
+    SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() };
+    SDNode *ResNode =
+      CurDAG->SelectNodeTo(Op, Opc,
+                           VT, MVT::i16, MVT::Other,
+                           Ops0, 3);
+    cast<MachineSDNode>(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1);
+    // Transfer chain.
+    ReplaceUses(SDValue(N1.getNode(), 2), SDValue(ResNode, 2));
+    // Transfer writeback.
+    ReplaceUses(SDValue(N1.getNode(), 1), SDValue(ResNode, 1));
+    return ResNode;
+  }
+
+  return NULL;
+}
+
+
+SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
+  DebugLoc dl = Node->getDebugLoc();
+
+  // Dump information about the Node being selected
+  DEBUG(errs() << "Selecting: ");
+  DEBUG(Node->dump(CurDAG));
+  DEBUG(errs() << "\n");
+
+  // If we have a custom node, we already have selected!
+  if (Node->isMachineOpcode()) {
+    DEBUG(errs() << "== ";
+          Node->dump(CurDAG);
+          errs() << "\n");
+    return NULL;
+  }
+
+  // Few custom selection stuff.
+  switch (Node->getOpcode()) {
+  default: break;
+  case ISD::FrameIndex: {
+    assert(Node->getValueType(0) == MVT::i16);
+    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i16);
+    if (Node->hasOneUse())
+      return CurDAG->SelectNodeTo(Node, MSP430::ADD16ri, MVT::i16,
+                                  TFI, CurDAG->getTargetConstant(0, MVT::i16));
+    return CurDAG->getMachineNode(MSP430::ADD16ri, dl, MVT::i16,
+                                  TFI, CurDAG->getTargetConstant(0, MVT::i16));
+  }
+  case ISD::LOAD:
+    if (SDNode *ResNode = SelectIndexedLoad(Node))
+      return ResNode;
+    // Other cases are autogenerated.
+    break;
+  case ISD::ADD:
+    if (SDNode *ResNode =
+        SelectIndexedBinOp(Node,
+                           Node->getOperand(0), Node->getOperand(1),
+                           MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+      return ResNode;
+    else if (SDNode *ResNode =
+             SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+                                MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+      return ResNode;
+
+    // Other cases are autogenerated.
+    break;
+  case ISD::SUB:
+    if (SDNode *ResNode =
+        SelectIndexedBinOp(Node,
+                           Node->getOperand(0), Node->getOperand(1),
+                           MSP430::SUB8rm_POST, MSP430::SUB16rm_POST))
+      return ResNode;
+
+    // Other cases are autogenerated.
+    break;
+  case ISD::AND:
+    if (SDNode *ResNode =
+        SelectIndexedBinOp(Node,
+                           Node->getOperand(0), Node->getOperand(1),
+                           MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+      return ResNode;
+    else if (SDNode *ResNode =
+             SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+                                MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+      return ResNode;
+
+    // Other cases are autogenerated.
+    break;
+  case ISD::OR:
+    if (SDNode *ResNode =
+        SelectIndexedBinOp(Node,
+                           Node->getOperand(0), Node->getOperand(1),
+                           MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+      return ResNode;
+    else if (SDNode *ResNode =
+             SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+                                MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+      return ResNode;
+
+    // Other cases are autogenerated.
+    break;
+  case ISD::XOR:
+    if (SDNode *ResNode =
+        SelectIndexedBinOp(Node,
+                           Node->getOperand(0), Node->getOperand(1),
+                           MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+      return ResNode;
+    else if (SDNode *ResNode =
+             SelectIndexedBinOp(Node, Node->getOperand(1), Node->getOperand(0),
+                                MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+      return ResNode;
+
+    // Other cases are autogenerated.
+    break;
+  }
+
+  // Select the default instruction
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(errs() << "=> ");
+  if (ResNode == NULL || ResNode == Node)
+    DEBUG(Node->dump(CurDAG));
+  else
+    DEBUG(ResNode->dump(CurDAG));
+  DEBUG(errs() << "\n");
+
+  return ResNode;
+}
diff --git a/final/lib/Target/MSP430/MSP430ISelLowering.cpp b/final/lib/Target/MSP430/MSP430ISelLowering.cpp
new file mode 100644
index 00000000000..a95d59c0576
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -0,0 +1,1199 @@
+//===-- MSP430ISelLowering.cpp - MSP430 DAG Lowering Implementation  ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430TargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-lower"
+
+#include "MSP430ISelLowering.h"
+#include "MSP430.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430TargetMachine.h"
+#include "MSP430Subtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+typedef enum {
+  NoHWMult,
+  HWMultIntr,
+  HWMultNoIntr
+} HWMultUseMode;
+
+static cl::opt<HWMultUseMode>
+HWMultMode("msp430-hwmult-mode",
+           cl::desc("Hardware multiplier use mode"),
+           cl::init(HWMultNoIntr),
+           cl::values(
+             clEnumValN(NoHWMult, "no",
+                "Do not use hardware multiplier"),
+             clEnumValN(HWMultIntr, "interrupts",
+                "Assume hardware multiplier can be used inside interrupts"),
+             clEnumValN(HWMultNoIntr, "use",
+                "Assume hardware multiplier cannot be used inside interrupts"),
+             clEnumValEnd));
+
+MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
+  TargetLowering(tm, new TargetLoweringObjectFileELF()),
+  Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+
+  TD = getTargetData();
+
+  // Set up the register classes.
+  addRegisterClass(MVT::i8,  MSP430::GR8RegisterClass);
+  addRegisterClass(MVT::i16, MSP430::GR16RegisterClass);
+
+  // Compute derived properties from the register classes
+  computeRegisterProperties();
+
+  // Provide all sorts of operation actions
+
+  // Division is expensive
+  setIntDivIsCheap(false);
+
+  setStackPointerRegisterToSaveRestore(MSP430::SPW);
+  setBooleanContents(ZeroOrOneBooleanContent);
+  setSchedulingPreference(Sched::Latency);
+
+  // We have post-incremented loads / stores.
+  setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
+  setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
+
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1,  Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i8,  Expand);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
+
+  // We don't have any truncstores
+  setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+
+  setOperationAction(ISD::SRA,              MVT::i8,    Custom);
+  setOperationAction(ISD::SHL,              MVT::i8,    Custom);
+  setOperationAction(ISD::SRL,              MVT::i8,    Custom);
+  setOperationAction(ISD::SRA,              MVT::i16,   Custom);
+  setOperationAction(ISD::SHL,              MVT::i16,   Custom);
+  setOperationAction(ISD::SRL,              MVT::i16,   Custom);
+  setOperationAction(ISD::ROTL,             MVT::i8,    Expand);
+  setOperationAction(ISD::ROTR,             MVT::i8,    Expand);
+  setOperationAction(ISD::ROTL,             MVT::i16,   Expand);
+  setOperationAction(ISD::ROTR,             MVT::i16,   Expand);
+  setOperationAction(ISD::GlobalAddress,    MVT::i16,   Custom);
+  setOperationAction(ISD::ExternalSymbol,   MVT::i16,   Custom);
+  setOperationAction(ISD::BlockAddress,     MVT::i16,   Custom);
+  setOperationAction(ISD::BR_JT,            MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,            MVT::i8,    Custom);
+  setOperationAction(ISD::BR_CC,            MVT::i16,   Custom);
+  setOperationAction(ISD::BRCOND,           MVT::Other, Expand);
+  setOperationAction(ISD::SETCC,            MVT::i8,    Custom);
+  setOperationAction(ISD::SETCC,            MVT::i16,   Custom);
+  setOperationAction(ISD::SELECT,           MVT::i8,    Expand);
+  setOperationAction(ISD::SELECT,           MVT::i16,   Expand);
+  setOperationAction(ISD::SELECT_CC,        MVT::i8,    Custom);
+  setOperationAction(ISD::SELECT_CC,        MVT::i16,   Custom);
+  setOperationAction(ISD::SIGN_EXTEND,      MVT::i16,   Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);
+
+  setOperationAction(ISD::CTTZ,             MVT::i8,    Expand);
+  setOperationAction(ISD::CTTZ,             MVT::i16,   Expand);
+  setOperationAction(ISD::CTLZ,             MVT::i8,    Expand);
+  setOperationAction(ISD::CTLZ,             MVT::i16,   Expand);
+  setOperationAction(ISD::CTPOP,            MVT::i8,    Expand);
+  setOperationAction(ISD::CTPOP,            MVT::i16,   Expand);
+
+  setOperationAction(ISD::SHL_PARTS,        MVT::i8,    Expand);
+  setOperationAction(ISD::SHL_PARTS,        MVT::i16,   Expand);
+  setOperationAction(ISD::SRL_PARTS,        MVT::i8,    Expand);
+  setOperationAction(ISD::SRL_PARTS,        MVT::i16,   Expand);
+  setOperationAction(ISD::SRA_PARTS,        MVT::i8,    Expand);
+  setOperationAction(ISD::SRA_PARTS,        MVT::i16,   Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1,   Expand);
+
+  // FIXME: Implement efficiently multiplication by a constant
+  setOperationAction(ISD::MUL,              MVT::i8,    Expand);
+  setOperationAction(ISD::MULHS,            MVT::i8,    Expand);
+  setOperationAction(ISD::MULHU,            MVT::i8,    Expand);
+  setOperationAction(ISD::SMUL_LOHI,        MVT::i8,    Expand);
+  setOperationAction(ISD::UMUL_LOHI,        MVT::i8,    Expand);
+  setOperationAction(ISD::MUL,              MVT::i16,   Expand);
+  setOperationAction(ISD::MULHS,            MVT::i16,   Expand);
+  setOperationAction(ISD::MULHU,            MVT::i16,   Expand);
+  setOperationAction(ISD::SMUL_LOHI,        MVT::i16,   Expand);
+  setOperationAction(ISD::UMUL_LOHI,        MVT::i16,   Expand);
+
+  setOperationAction(ISD::UDIV,             MVT::i8,    Expand);
+  setOperationAction(ISD::UDIVREM,          MVT::i8,    Expand);
+  setOperationAction(ISD::UREM,             MVT::i8,    Expand);
+  setOperationAction(ISD::SDIV,             MVT::i8,    Expand);
+  setOperationAction(ISD::SDIVREM,          MVT::i8,    Expand);
+  setOperationAction(ISD::SREM,             MVT::i8,    Expand);
+  setOperationAction(ISD::UDIV,             MVT::i16,   Expand);
+  setOperationAction(ISD::UDIVREM,          MVT::i16,   Expand);
+  setOperationAction(ISD::UREM,             MVT::i16,   Expand);
+  setOperationAction(ISD::SDIV,             MVT::i16,   Expand);
+  setOperationAction(ISD::SDIVREM,          MVT::i16,   Expand);
+  setOperationAction(ISD::SREM,             MVT::i16,   Expand);
+
+  // Libcalls names.
+  if (HWMultMode == HWMultIntr) {
+    setLibcallName(RTLIB::MUL_I8,  "__mulqi3hw");
+    setLibcallName(RTLIB::MUL_I16, "__mulhi3hw");
+  } else if (HWMultMode == HWMultNoIntr) {
+    setLibcallName(RTLIB::MUL_I8,  "__mulqi3hw_noint");
+    setLibcallName(RTLIB::MUL_I16, "__mulhi3hw_noint");
+  }
+}
+
+SDValue MSP430TargetLowering::LowerOperation(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  case ISD::SHL: // FALLTHROUGH
+  case ISD::SRL:
+  case ISD::SRA:              return LowerShifts(Op, DAG);
+  case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
+  case ISD::BlockAddress:     return LowerBlockAddress(Op, DAG);
+  case ISD::ExternalSymbol:   return LowerExternalSymbol(Op, DAG);
+  case ISD::SETCC:            return LowerSETCC(Op, DAG);
+  case ISD::BR_CC:            return LowerBR_CC(Op, DAG);
+  case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
+  case ISD::SIGN_EXTEND:      return LowerSIGN_EXTEND(Op, DAG);
+  case ISD::RETURNADDR:       return LowerRETURNADDR(Op, DAG);
+  case ISD::FRAMEADDR:        return LowerFRAMEADDR(Op, DAG);
+  default:
+    llvm_unreachable("unimplemented operand");
+    return SDValue();
+  }
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned MSP430TargetLowering::getFunctionAlignment(const Function *F) const {
+  return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 2;
+}
+
+//===----------------------------------------------------------------------===//
+//                       MSP430 Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+TargetLowering::ConstraintType
+MSP430TargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      return C_RegisterClass;
+    default:
+      break;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+MSP430TargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+                             EVT VT) const {
+  if (Constraint.size() == 1) {
+    // GCC Constraint Letters
+    switch (Constraint[0]) {
+    default: break;
+    case 'r':   // GENERAL_REGS
+      if (VT == MVT::i8)
+        return std::make_pair(0U, MSP430::GR8RegisterClass);
+
+      return std::make_pair(0U, MSP430::GR16RegisterClass);
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MSP430GenCallingConv.inc"
+
+SDValue
+MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
+                                           CallingConv::ID CallConv,
+                                           bool isVarArg,
+                                           const SmallVectorImpl<ISD::InputArg>
+                                             &Ins,
+                                           DebugLoc dl,
+                                           SelectionDAG &DAG,
+                                           SmallVectorImpl<SDValue> &InVals)
+                                             const {
+
+  switch (CallConv) {
+  default:
+    llvm_unreachable("Unsupported calling convention");
+  case CallingConv::C:
+  case CallingConv::Fast:
+    return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
+  case CallingConv::MSP430_INTR:
+   if (Ins.empty())
+     return Chain;
+   else {
+    report_fatal_error("ISRs cannot have arguments");
+    return SDValue();
+   }
+  }
+}
+
+SDValue
+MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                                CallingConv::ID CallConv, bool isVarArg,
+                                bool &isTailCall,
+                                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                const SmallVectorImpl<SDValue> &OutVals,
+                                const SmallVectorImpl<ISD::InputArg> &Ins,
+                                DebugLoc dl, SelectionDAG &DAG,
+                                SmallVectorImpl<SDValue> &InVals) const {
+  // MSP430 target does not yet support tail call optimization.
+  isTailCall = false;
+
+  switch (CallConv) {
+  default:
+    llvm_unreachable("Unsupported calling convention");
+  case CallingConv::Fast:
+  case CallingConv::C:
+    return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+                          Outs, OutVals, Ins, dl, DAG, InVals);
+  case CallingConv::MSP430_INTR:
+    report_fatal_error("ISRs cannot be called directly");
+    return SDValue();
+  }
+}
+
+/// LowerCCCArguments - transform physical registers into virtual registers and
+/// generate load operations for arguments places on the stack.
+// FIXME: struct return stuff
+// FIXME: varargs
+SDValue
+MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
+                                        CallingConv::ID CallConv,
+                                        bool isVarArg,
+                                        const SmallVectorImpl<ISD::InputArg>
+                                          &Ins,
+                                        DebugLoc dl,
+                                        SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CC_MSP430);
+
+  assert(!isVarArg && "Varargs not supported yet");
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    if (VA.isRegLoc()) {
+      // Arguments passed in registers
+      EVT RegVT = VA.getLocVT();
+      switch (RegVT.getSimpleVT().SimpleTy) {
+      default:
+        {
+#ifndef NDEBUG
+          errs() << "LowerFormalArguments Unhandled argument type: "
+               << RegVT.getSimpleVT().SimpleTy << "\n";
+#endif
+          llvm_unreachable(0);
+        }
+      case MVT::i16:
+        unsigned VReg =
+          RegInfo.createVirtualRegister(MSP430::GR16RegisterClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
+
+        // If this is an 8-bit value, it is really passed promoted to 16
+        // bits. Insert an assert[sz]ext to capture this, then truncate to the
+        // right size.
+        if (VA.getLocInfo() == CCValAssign::SExt)
+          ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+                                 DAG.getValueType(VA.getValVT()));
+        else if (VA.getLocInfo() == CCValAssign::ZExt)
+          ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+                                 DAG.getValueType(VA.getValVT()));
+
+        if (VA.getLocInfo() != CCValAssign::Full)
+          ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+        InVals.push_back(ArgValue);
+      }
+    } else {
+      // Sanity check
+      assert(VA.isMemLoc());
+      // Load the argument to a virtual register
+      unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+      if (ObjSize > 2) {
+        errs() << "LowerFormalArguments Unhandled argument type: "
+             << EVT(VA.getLocVT()).getEVTString()
+             << "\n";
+      }
+      // Create the frame index object for this incoming parameter...
+      int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
+
+      // Create the SelectionDAG nodes corresponding to a load
+      //from this parameter
+      SDValue FIN = DAG.getFrameIndex(FI, MVT::i16);
+      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+                                   MachinePointerInfo::getFixedStack(FI),
+                                   false, false, 0));
+    }
+  }
+
+  return Chain;
+}
+
+SDValue
+MSP430TargetLowering::LowerReturn(SDValue Chain,
+                                  CallingConv::ID CallConv, bool isVarArg,
+                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  const SmallVectorImpl<SDValue> &OutVals,
+                                  DebugLoc dl, SelectionDAG &DAG) const {
+
+  // CCValAssign - represent the assignment of the return value to a location
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // ISRs cannot return any value.
+  if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) {
+    report_fatal_error("ISRs cannot return any value");
+    return SDValue();
+  }
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_MSP430);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             OutVals[i], Flag);
+
+    // Guarantee that all emitted copies are stuck together,
+    // avoiding something bad.
+    Flag = Chain.getValue(1);
+  }
+
+  unsigned Opc = (CallConv == CallingConv::MSP430_INTR ?
+                  MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG);
+
+  if (Flag.getNode())
+    return DAG.getNode(Opc, dl, MVT::Other, Chain, Flag);
+
+  // Return Void
+  return DAG.getNode(Opc, dl, MVT::Other, Chain);
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: sret.
+SDValue
+MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                                     CallingConv::ID CallConv, bool isVarArg,
+                                     bool isTailCall,
+                                     const SmallVectorImpl<ISD::OutputArg>
+                                       &Outs,
+                                     const SmallVectorImpl<SDValue> &OutVals,
+                                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                                     DebugLoc dl, SelectionDAG &DAG,
+                                     SmallVectorImpl<SDValue> &InVals) const {
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallOperands(Outs, CC_MSP430);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes,
+                                                      getPointerTy(), true));
+
+  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+  SmallVector<SDValue, 12> MemOpChains;
+  SDValue StackPtr;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    SDValue Arg = OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+      default: llvm_unreachable("Unknown loc info!");
+      case CCValAssign::Full: break;
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::AExt:
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+    }
+
+    // Arguments that can be passed on register must be kept at RegsToPass
+    // vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      assert(VA.isMemLoc());
+
+      if (StackPtr.getNode() == 0)
+        StackPtr = DAG.getCopyFromReg(Chain, dl, MSP430::SPW, getPointerTy());
+
+      SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   StackPtr,
+                                   DAG.getIntPtrConstant(VA.getLocMemOffset()));
+
+
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         MachinePointerInfo(),false, false, 0));
+    }
+  }
+
+  // Transform all store nodes into one single node because all store nodes are
+  // independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain and
+  // flag operands which copy the outgoing args into registers.  The InFlag in
+  // necessary since all emited instructions must be stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i16);
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16);
+
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain = DAG.getNode(MSP430ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getConstant(NumBytes, getPointerTy(), true),
+                             DAG.getConstant(0, getPointerTy(), true),
+                             InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl,
+                         DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+MSP430TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                      CallingConv::ID CallConv, bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_MSP430);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+                               RVLocs[i].getValVT(), InFlag).getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  }
+
+  return Chain;
+}
+
+SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  unsigned Opc = Op.getOpcode();
+  SDNode* N = Op.getNode();
+  EVT VT = Op.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Expand non-constant shifts to loops:
+  if (!isa<ConstantSDNode>(N->getOperand(1)))
+    switch (Opc) {
+    default:
+      assert(0 && "Invalid shift opcode!");
+    case ISD::SHL:
+      return DAG.getNode(MSP430ISD::SHL, dl,
+                         VT, N->getOperand(0), N->getOperand(1));
+    case ISD::SRA:
+      return DAG.getNode(MSP430ISD::SRA, dl,
+                         VT, N->getOperand(0), N->getOperand(1));
+    case ISD::SRL:
+      return DAG.getNode(MSP430ISD::SRL, dl,
+                         VT, N->getOperand(0), N->getOperand(1));
+    }
+
+  uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+
+  // Expand the stuff into sequence of shifts.
+  // FIXME: for some shift amounts this might be done better!
+  // E.g.: foo >> (8 + N) => sxt(swpb(foo)) >> N
+  SDValue Victim = N->getOperand(0);
+
+  if (Opc == ISD::SRL && ShiftAmount) {
+    // Emit a special goodness here:
+    // srl A, 1 => clrc; rrc A
+    Victim = DAG.getNode(MSP430ISD::RRC, dl, VT, Victim);
+    ShiftAmount -= 1;
+  }
+
+  while (ShiftAmount--)
+    Victim = DAG.getNode((Opc == ISD::SHL ? MSP430ISD::RLA : MSP430ISD::RRA),
+                         dl, VT, Victim);
+
+  return Victim;
+}
+
+SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+
+  // Create the TargetGlobalAddress node, folding in the constant offset.
+  SDValue Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+                                              getPointerTy(), Offset);
+  return DAG.getNode(MSP430ISD::Wrapper, Op.getDebugLoc(),
+                     getPointerTy(), Result);
+}
+
+SDValue MSP430TargetLowering::LowerExternalSymbol(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+  SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
+
+  return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
+}
+
+SDValue MSP430TargetLowering::LowerBlockAddress(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
+
+  return DAG.getNode(MSP430ISD::Wrapper, dl, getPointerTy(), Result);;
+}
+
+static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
+                       ISD::CondCode CC,
+                       DebugLoc dl, SelectionDAG &DAG) {
+  // FIXME: Handle bittests someday
+  assert(!LHS.getValueType().isFloatingPoint() && "We don't handle FP yet");
+
+  // FIXME: Handle jump negative someday
+  MSP430CC::CondCodes TCC = MSP430CC::COND_INVALID;
+  switch (CC) {
+  default: llvm_unreachable("Invalid integer condition!");
+  case ISD::SETEQ:
+    TCC = MSP430CC::COND_E;     // aka COND_Z
+    // Minor optimization: if LHS is a constant, swap operands, then the
+    // constant can be folded into comparison.
+    if (LHS.getOpcode() == ISD::Constant)
+      std::swap(LHS, RHS);
+    break;
+  case ISD::SETNE:
+    TCC = MSP430CC::COND_NE;    // aka COND_NZ
+    // Minor optimization: if LHS is a constant, swap operands, then the
+    // constant can be folded into comparison.
+    if (LHS.getOpcode() == ISD::Constant)
+      std::swap(LHS, RHS);
+    break;
+  case ISD::SETULE:
+    std::swap(LHS, RHS);        // FALLTHROUGH
+  case ISD::SETUGE:
+    // Turn lhs u>= rhs with lhs constant into rhs u< lhs+1, this allows us to
+    // fold constant into instruction.
+    if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+      LHS = RHS;
+      RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+      TCC = MSP430CC::COND_LO;
+      break;
+    }
+    TCC = MSP430CC::COND_HS;    // aka COND_C
+    break;
+  case ISD::SETUGT:
+    std::swap(LHS, RHS);        // FALLTHROUGH
+  case ISD::SETULT:
+    // Turn lhs u< rhs with lhs constant into rhs u>= lhs+1, this allows us to
+    // fold constant into instruction.
+    if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+      LHS = RHS;
+      RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+      TCC = MSP430CC::COND_HS;
+      break;
+    }
+    TCC = MSP430CC::COND_LO;    // aka COND_NC
+    break;
+  case ISD::SETLE:
+    std::swap(LHS, RHS);        // FALLTHROUGH
+  case ISD::SETGE:
+    // Turn lhs >= rhs with lhs constant into rhs < lhs+1, this allows us to
+    // fold constant into instruction.
+    if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+      LHS = RHS;
+      RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+      TCC = MSP430CC::COND_L;
+      break;
+    }
+    TCC = MSP430CC::COND_GE;
+    break;
+  case ISD::SETGT:
+    std::swap(LHS, RHS);        // FALLTHROUGH
+  case ISD::SETLT:
+    // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows us to
+    // fold constant into instruction.
+    if (const ConstantSDNode * C = dyn_cast<ConstantSDNode>(LHS)) {
+      LHS = RHS;
+      RHS = DAG.getConstant(C->getSExtValue() + 1, C->getValueType(0));
+      TCC = MSP430CC::COND_GE;
+      break;
+    }
+    TCC = MSP430CC::COND_L;
+    break;
+  }
+
+  TargetCC = DAG.getConstant(TCC, MVT::i8);
+  return DAG.getNode(MSP430ISD::CMP, dl, MVT::Glue, LHS, RHS);
+}
+
+
+SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS   = Op.getOperand(2);
+  SDValue RHS   = Op.getOperand(3);
+  SDValue Dest  = Op.getOperand(4);
+  DebugLoc dl   = Op.getDebugLoc();
+
+  SDValue TargetCC;
+  SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+  return DAG.getNode(MSP430ISD::BR_CC, dl, Op.getValueType(),
+                     Chain, Dest, TargetCC, Flag);
+}
+
+SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue LHS   = Op.getOperand(0);
+  SDValue RHS   = Op.getOperand(1);
+  DebugLoc dl   = Op.getDebugLoc();
+
+  // If we are doing an AND and testing against zero, then the CMP
+  // will not be generated.  The AND (or BIT) will generate the condition codes,
+  // but they are different from CMP.
+  // FIXME: since we're doing a post-processing, use a pseudoinstr here, so
+  // lowering & isel wouldn't diverge.
+  bool andCC = false;
+  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+    if (RHSC->isNullValue() && LHS.hasOneUse() &&
+        (LHS.getOpcode() == ISD::AND ||
+         (LHS.getOpcode() == ISD::TRUNCATE &&
+          LHS.getOperand(0).getOpcode() == ISD::AND))) {
+      andCC = true;
+    }
+  }
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+  SDValue TargetCC;
+  SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+  // Get the condition codes directly from the status register, if its easy.
+  // Otherwise a branch will be generated.  Note that the AND and BIT
+  // instructions generate different flags than CMP, the carry bit can be used
+  // for NE/EQ.
+  bool Invert = false;
+  bool Shift = false;
+  bool Convert = true;
+  switch (cast<ConstantSDNode>(TargetCC)->getZExtValue()) {
+   default:
+    Convert = false;
+    break;
+   case MSP430CC::COND_HS:
+     // Res = SRW & 1, no processing is required
+     break;
+   case MSP430CC::COND_LO:
+     // Res = ~(SRW & 1)
+     Invert = true;
+     break;
+   case MSP430CC::COND_NE:
+     if (andCC) {
+       // C = ~Z, thus Res = SRW & 1, no processing is required
+     } else {
+       // Res = ~((SRW >> 1) & 1)
+       Shift = true;
+       Invert = true;
+     }
+     break;
+   case MSP430CC::COND_E:
+     Shift = true;
+     // C = ~Z for AND instruction, thus we can put Res = ~(SRW & 1), however,
+     // Res = (SRW >> 1) & 1 is 1 word shorter.
+     break;
+  }
+  EVT VT = Op.getValueType();
+  SDValue One  = DAG.getConstant(1, VT);
+  if (Convert) {
+    SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SRW,
+                                    MVT::i16, Flag);
+    if (Shift)
+      // FIXME: somewhere this is turned into a SRL, lower it MSP specific?
+      SR = DAG.getNode(ISD::SRA, dl, MVT::i16, SR, One);
+    SR = DAG.getNode(ISD::AND, dl, MVT::i16, SR, One);
+    if (Invert)
+      SR = DAG.getNode(ISD::XOR, dl, MVT::i16, SR, One);
+    return SR;
+  } else {
+    SDValue Zero = DAG.getConstant(0, VT);
+    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+    SmallVector<SDValue, 4> Ops;
+    Ops.push_back(One);
+    Ops.push_back(Zero);
+    Ops.push_back(TargetCC);
+    Ops.push_back(Flag);
+    return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+  }
+}
+
+SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  SDValue LHS    = Op.getOperand(0);
+  SDValue RHS    = Op.getOperand(1);
+  SDValue TrueV  = Op.getOperand(2);
+  SDValue FalseV = Op.getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  DebugLoc dl    = Op.getDebugLoc();
+
+  SDValue TargetCC;
+  SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+  SmallVector<SDValue, 4> Ops;
+  Ops.push_back(TrueV);
+  Ops.push_back(FalseV);
+  Ops.push_back(TargetCC);
+  Ops.push_back(Flag);
+
+  return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+}
+
+SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  SDValue Val = Op.getOperand(0);
+  EVT VT      = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+
+  assert(VT == MVT::i16 && "Only support i16 for now!");
+
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
+                     DAG.getNode(ISD::ANY_EXTEND, dl, VT, Val),
+                     DAG.getValueType(Val.getValueType()));
+}
+
+SDValue
+MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+  int ReturnAddrIndex = FuncInfo->getRAIndex();
+
+  if (ReturnAddrIndex == 0) {
+    // Set up a frame object for the return address.
+    uint64_t SlotSize = TD->getPointerSize();
+    ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+                                                           true);
+    FuncInfo->setRAIndex(ReturnAddrIndex);
+  }
+
+  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+}
+
+SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (Depth > 0) {
+    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+    SDValue Offset =
+      DAG.getConstant(TD->getPointerSize(), MVT::i16);
+    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   FrameAddr, Offset),
+                       MachinePointerInfo(), false, false, 0);
+  }
+
+  // Just load the return address.
+  SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     RetAddrFI, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+                                         MSP430::FPW, VT);
+  while (Depth--)
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+                            MachinePointerInfo(),
+                            false, false, 0);
+  return FrameAddr;
+}
+
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool MSP430TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                                      SDValue &Base,
+                                                      SDValue &Offset,
+                                                      ISD::MemIndexedMode &AM,
+                                                      SelectionDAG &DAG) const {
+
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+    return false;
+
+  EVT VT = LD->getMemoryVT();
+  if (VT != MVT::i8 && VT != MVT::i16)
+    return false;
+
+  if (Op->getOpcode() != ISD::ADD)
+    return false;
+
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
+    uint64_t RHSC = RHS->getZExtValue();
+    if ((VT == MVT::i16 && RHSC != 2) ||
+        (VT == MVT::i8 && RHSC != 1))
+      return false;
+
+    Base = Op->getOperand(0);
+    Offset = DAG.getConstant(RHSC, VT);
+    AM = ISD::POST_INC;
+    return true;
+  }
+
+  return false;
+}
+
+
+const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return NULL;
+  case MSP430ISD::RET_FLAG:           return "MSP430ISD::RET_FLAG";
+  case MSP430ISD::RETI_FLAG:          return "MSP430ISD::RETI_FLAG";
+  case MSP430ISD::RRA:                return "MSP430ISD::RRA";
+  case MSP430ISD::RLA:                return "MSP430ISD::RLA";
+  case MSP430ISD::RRC:                return "MSP430ISD::RRC";
+  case MSP430ISD::CALL:               return "MSP430ISD::CALL";
+  case MSP430ISD::Wrapper:            return "MSP430ISD::Wrapper";
+  case MSP430ISD::BR_CC:              return "MSP430ISD::BR_CC";
+  case MSP430ISD::CMP:                return "MSP430ISD::CMP";
+  case MSP430ISD::SELECT_CC:          return "MSP430ISD::SELECT_CC";
+  case MSP430ISD::SHL:                return "MSP430ISD::SHL";
+  case MSP430ISD::SRA:                return "MSP430ISD::SRA";
+  }
+}
+
+bool MSP430TargetLowering::isTruncateFree(const Type *Ty1,
+                                          const Type *Ty2) const {
+  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+    return false;
+
+  return (Ty1->getPrimitiveSizeInBits() > Ty2->getPrimitiveSizeInBits());
+}
+
+bool MSP430TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+  if (!VT1.isInteger() || !VT2.isInteger())
+    return false;
+
+  return (VT1.getSizeInBits() > VT2.getSizeInBits());
+}
+
+bool MSP430TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
+  // MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
+  return 0 && Ty1->isIntegerTy(8) && Ty2->isIntegerTy(16);
+}
+
+bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
+  // MSP430 implicitly zero-extends 8-bit results in 16-bit registers.
+  return 0 && VT1 == MVT::i8 && VT2 == MVT::i16;
+}
+
+//===----------------------------------------------------------------------===//
+//  Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock*
+MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
+                                     MachineBasicBlock *BB) const {
+  MachineFunction *F = BB->getParent();
+  MachineRegisterInfo &RI = F->getRegInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+
+  unsigned Opc;
+  const TargetRegisterClass * RC;
+  switch (MI->getOpcode()) {
+  default:
+    assert(0 && "Invalid shift opcode!");
+  case MSP430::Shl8:
+   Opc = MSP430::SHL8r1;
+   RC = MSP430::GR8RegisterClass;
+   break;
+  case MSP430::Shl16:
+   Opc = MSP430::SHL16r1;
+   RC = MSP430::GR16RegisterClass;
+   break;
+  case MSP430::Sra8:
+   Opc = MSP430::SAR8r1;
+   RC = MSP430::GR8RegisterClass;
+   break;
+  case MSP430::Sra16:
+   Opc = MSP430::SAR16r1;
+   RC = MSP430::GR16RegisterClass;
+   break;
+  case MSP430::Srl8:
+   Opc = MSP430::SAR8r1c;
+   RC = MSP430::GR8RegisterClass;
+   break;
+  case MSP430::Srl16:
+   Opc = MSP430::SAR16r1c;
+   RC = MSP430::GR16RegisterClass;
+   break;
+  }
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator I = BB;
+  ++I;
+
+  // Create loop block
+  MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *RemBB  = F->CreateMachineBasicBlock(LLVM_BB);
+
+  F->insert(I, LoopBB);
+  F->insert(I, RemBB);
+
+  // Update machine-CFG edges by transferring all successors of the current
+  // block to the block containing instructions after shift.
+  RemBB->splice(RemBB->begin(), BB,
+                llvm::next(MachineBasicBlock::iterator(MI)),
+                BB->end());
+  RemBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
+  BB->addSuccessor(LoopBB);
+  BB->addSuccessor(RemBB);
+  LoopBB->addSuccessor(RemBB);
+  LoopBB->addSuccessor(LoopBB);
+
+  unsigned ShiftAmtReg = RI.createVirtualRegister(MSP430::GR8RegisterClass);
+  unsigned ShiftAmtReg2 = RI.createVirtualRegister(MSP430::GR8RegisterClass);
+  unsigned ShiftReg = RI.createVirtualRegister(RC);
+  unsigned ShiftReg2 = RI.createVirtualRegister(RC);
+  unsigned ShiftAmtSrcReg = MI->getOperand(2).getReg();
+  unsigned SrcReg = MI->getOperand(1).getReg();
+  unsigned DstReg = MI->getOperand(0).getReg();
+
+  // BB:
+  // cmp 0, N
+  // je RemBB
+  BuildMI(BB, dl, TII.get(MSP430::CMP8ri))
+    .addReg(ShiftAmtSrcReg).addImm(0);
+  BuildMI(BB, dl, TII.get(MSP430::JCC))
+    .addMBB(RemBB)
+    .addImm(MSP430CC::COND_E);
+
+  // LoopBB:
+  // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
+  // ShiftAmt = phi [%N, BB],      [%ShiftAmt2, LoopBB]
+  // ShiftReg2 = shift ShiftReg
+  // ShiftAmt2 = ShiftAmt - 1;
+  BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftReg)
+    .addReg(SrcReg).addMBB(BB)
+    .addReg(ShiftReg2).addMBB(LoopBB);
+  BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftAmtReg)
+    .addReg(ShiftAmtSrcReg).addMBB(BB)
+    .addReg(ShiftAmtReg2).addMBB(LoopBB);
+  BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
+    .addReg(ShiftReg);
+  BuildMI(LoopBB, dl, TII.get(MSP430::SUB8ri), ShiftAmtReg2)
+    .addReg(ShiftAmtReg).addImm(1);
+  BuildMI(LoopBB, dl, TII.get(MSP430::JCC))
+    .addMBB(LoopBB)
+    .addImm(MSP430CC::COND_NE);
+
+  // RemBB:
+  // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
+  BuildMI(*RemBB, RemBB->begin(), dl, TII.get(MSP430::PHI), DstReg)
+    .addReg(SrcReg).addMBB(BB)
+    .addReg(ShiftReg2).addMBB(LoopBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return RemBB;
+}
+
+MachineBasicBlock*
+MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                  MachineBasicBlock *BB) const {
+  unsigned Opc = MI->getOpcode();
+
+  if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 ||
+      Opc == MSP430::Sra8 || Opc == MSP430::Sra16 ||
+      Opc == MSP430::Srl8 || Opc == MSP430::Srl16)
+    return EmitShiftInstr(MI, BB);
+
+  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+
+  assert((Opc == MSP430::Select16 || Opc == MSP430::Select8) &&
+         "Unexpected instr type to insert");
+
+  // To "insert" a SELECT instruction, we actually have to insert the diamond
+  // control-flow pattern.  The incoming instruction knows the destination vreg
+  // to set, the condition code register to branch on, the true/false values to
+  // select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator I = BB;
+  ++I;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   cmpTY ccX, r1, r2
+  //   jCC copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(I, copy0MBB);
+  F->insert(I, copy1MBB);
+  // Update machine-CFG edges by transferring all successors of the current
+  // block to the new block which will contain the Phi node for the select.
+  copy1MBB->splice(copy1MBB->begin(), BB,
+                   llvm::next(MachineBasicBlock::iterator(MI)),
+                   BB->end());
+  copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
+  // Next, add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(copy1MBB);
+
+  BuildMI(BB, dl, TII.get(MSP430::JCC))
+    .addMBB(copy1MBB)
+    .addImm(MI->getOperand(3).getImm());
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to copy1MBB
+  BB = copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(copy1MBB);
+
+  //  copy1MBB:
+  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+  //  ...
+  BB = copy1MBB;
+  BuildMI(*BB, BB->begin(), dl, TII.get(MSP430::PHI),
+          MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+    .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
diff --git a/final/lib/Target/MSP430/MSP430ISelLowering.h b/final/lib/Target/MSP430/MSP430ISelLowering.h
new file mode 100644
index 00000000000..19c9eac589f
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430ISelLowering.h
@@ -0,0 +1,185 @@
+//==-- MSP430ISelLowering.h - MSP430 DAG Lowering Interface ------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MSP430 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_ISELLOWERING_H
+#define LLVM_TARGET_MSP430_ISELLOWERING_H
+
+#include "MSP430.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+  namespace MSP430ISD {
+    enum {
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      /// Return with a flag operand. Operand 0 is the chain operand.
+      RET_FLAG,
+
+      /// Same as RET_FLAG, but used for returning from ISRs.
+      RETI_FLAG,
+
+      /// Y = R{R,L}A X, rotate right (left) arithmetically
+      RRA, RLA,
+
+      /// Y = RRC X, rotate right via carry
+      RRC,
+
+      /// CALL - These operations represent an abstract call
+      /// instruction, which includes a bunch of information.
+      CALL,
+
+      /// Wrapper - A wrapper node for TargetConstantPool, TargetExternalSymbol,
+      /// and TargetGlobalAddress.
+      Wrapper,
+
+      /// CMP - Compare instruction.
+      CMP,
+
+      /// SetCC - Operand 0 is condition code, and operand 1 is the flag
+      /// operand produced by a CMP instruction.
+      SETCC,
+
+      /// MSP430 conditional branches. Operand 0 is the chain operand, operand 1
+      /// is the block to branch if condition is true, operand 2 is the
+      /// condition code, and operand 3 is the flag operand produced by a CMP
+      /// instruction.
+      BR_CC,
+
+      /// SELECT_CC - Operand 0 and operand 1 are selection variable, operand 3
+      /// is condition code and operand 4 is flag operand.
+      SELECT_CC,
+
+      /// SHL, SRA, SRL - Non-constant shifts.
+      SHL, SRA, SRL
+    };
+  }
+
+  class MSP430Subtarget;
+  class MSP430TargetMachine;
+
+  class MSP430TargetLowering : public TargetLowering {
+  public:
+    explicit MSP430TargetLowering(MSP430TargetMachine &TM);
+
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// getTargetNodeName - This method returns the name of a target specific
+    /// DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
+    SDValue LowerShifts(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+
+    TargetLowering::ConstraintType
+    getConstraintType(const std::string &Constraint) const;
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+
+    /// isTruncateFree - Return true if it's free to truncate a value of type
+    /// Ty1 to type Ty2. e.g. On msp430 it's free to truncate a i16 value in
+    /// register R15W to i8 by referencing its sub-register R15B.
+    virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+    virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+
+    /// isZExtFree - Return true if any actual instruction that defines a value
+    /// of type Ty1 implicit zero-extends the value to Ty2 in the result
+    /// register. This does not necessarily include registers defined in unknown
+    /// ways, such as incoming arguments, or copies from unknown virtual
+    /// registers. Also, if isTruncateFree(Ty2, Ty1) is true, this does not
+    /// necessarily apply to truncate instructions. e.g. on msp430, all
+    /// instructions that define 8-bit values implicit zero-extend the result
+    /// out to 16 bits.
+    virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
+    virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+
+    MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                   MachineBasicBlock *BB) const;
+    MachineBasicBlock* EmitShiftInstr(MachineInstr *MI,
+                                      MachineBasicBlock *BB) const;
+
+  private:
+    SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           bool isTailCall,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<SDValue> &OutVals,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    SDValue LowerCCCArguments(SDValue Chain,
+                              CallingConv::ID CallConv,
+                              bool isVarArg,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              DebugLoc dl,
+                              SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals) const;
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                            SDValue &Base,
+                                            SDValue &Offset,
+                                            ISD::MemIndexedMode &AM,
+                                            SelectionDAG &DAG) const;
+
+    const MSP430Subtarget &Subtarget;
+    const MSP430TargetMachine &TM;
+    const TargetData *TD;
+  };
+} // namespace llvm
+
+#endif // LLVM_TARGET_MSP430_ISELLOWERING_H
diff --git a/final/lib/Target/MSP430/MSP430InstrFormats.td b/final/lib/Target/MSP430/MSP430InstrFormats.td
new file mode 100644
index 00000000000..73aef1facc0
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430InstrFormats.td
@@ -0,0 +1,211 @@
+//===- MSP430InstrFormats.td - MSP430 Instruction Formats-----*- tblgen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Describe MSP430 instructions format here
+//
+
+// Format specifies the encoding used by the instruction.  This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<2> val> {
+  bits<2> Value = val;
+}
+
+def PseudoFrm   : Format<0>;
+def SingleOpFrm : Format<1>;
+def DoubleOpFrm : Format<2>;
+def CondJumpFrm : Format<3>;
+
+class SourceMode<bits<2> val> {
+  bits<2> Value = val;
+}
+
+def SrcReg      : SourceMode<0>;
+def SrcMem      : SourceMode<1>;
+def SrcIndReg   : SourceMode<2>;
+def SrcPostInc  : SourceMode<3>;
+def SrcImm      : SourceMode<3>;
+
+class DestMode<bit val> {
+  bit Value = val;
+}
+
+def DstReg      : DestMode<0>;
+def DstMem      : DestMode<1>;
+
+class SizeVal<bits<3> val> {
+  bits<3> Value = val;
+}
+
+def SizeUnknown : SizeVal<0>; // Unknown / unset size
+def SizeSpecial : SizeVal<1>; // Special instruction, e.g. pseudo
+def Size2Bytes  : SizeVal<2>;
+def Size4Bytes  : SizeVal<3>;
+def Size6Bytes  : SizeVal<4>;
+
+// Generic MSP430 Format
+class MSP430Inst<dag outs, dag ins, SizeVal sz, Format f,
+                 string asmstr> : Instruction {
+  field bits<16> Inst;
+
+  let Namespace = "MSP430";
+
+  dag OutOperandList = outs;
+  dag InOperandList  = ins;
+
+  Format Form = f;
+  SizeVal Sz = sz;
+
+  // Define how we want to layout our TargetSpecific information field... This
+  // should be kept up-to-date with the fields in the MSP430InstrInfo.h file.
+  let TSFlags{1-0} = Form.Value;
+  let TSFlags{4-2} = Sz.Value;
+
+  let AsmString   = asmstr;
+}
+
+// FIXME: Create different classes for different addressing modes.
+
+// MSP430 Double Operand (Format I) Instructions
+class IForm<bits<4> opcode, DestMode dest, bit bw, SourceMode src, SizeVal sz,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : MSP430Inst<outs, ins, sz, DoubleOpFrm, asmstr> {
+  let Pattern = pattern;
+
+  DestMode ad = dest;
+  SourceMode as = src;
+  
+  let Inst{12-15} = opcode;
+  let Inst{7}     = ad.Value;
+  let Inst{6}     = bw;
+  let Inst{4-5}   = as.Value;
+}
+
+// 8 bit IForm instructions
+class IForm8<bits<4> opcode, DestMode dest, SourceMode src, SizeVal sz,
+             dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm<opcode, dest, 1, src, sz, outs, ins, asmstr, pattern>;
+
+class I8rr<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm8<opcode, DstReg, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class I8ri<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm8<opcode, DstReg, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I8rm<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm8<opcode, DstReg, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I8mr<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm8<opcode, DstMem, SrcReg, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I8mi<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm8<opcode, DstMem, SrcImm, Size6Bytes, outs, ins, asmstr, pattern>;
+
+class I8mm<bits<4> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm8<opcode, DstMem, SrcMem, Size6Bytes, outs, ins, asmstr, pattern>;
+
+// 16 bit IForm instructions
+class IForm16<bits<4> opcode, DestMode dest, SourceMode src, SizeVal sz,
+              dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm<opcode, dest, 0, src, sz, outs, ins, asmstr, pattern>;
+
+class I16rr<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm16<opcode, DstReg, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class I16ri<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm16<opcode, DstReg, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I16rm<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm16<opcode, DstReg, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I16mr<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm16<opcode, DstMem, SrcReg, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class I16mi<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm16<opcode, DstMem, SrcImm, Size6Bytes, outs, ins, asmstr, pattern>;
+
+class I16mm<bits<4> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IForm16<opcode, DstMem, SrcMem, Size6Bytes, outs, ins, asmstr, pattern>;
+
+// MSP430 Single Operand (Format II) Instructions
+class IIForm<bits<9> opcode, bit bw, SourceMode src, SizeVal sz,
+             dag outs, dag ins, string asmstr, list<dag> pattern>
+  : MSP430Inst<outs, ins, sz, SingleOpFrm, asmstr> {
+  let Pattern = pattern;
+  
+  SourceMode as = src;
+
+  let Inst{7-15} = opcode;
+  let Inst{6}    = bw;
+  let Inst{4-5}  = as.Value;
+}
+
+// 8 bit IIForm instructions
+class IIForm8<bits<9> opcode, SourceMode src, SizeVal sz,
+              dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm<opcode, 1, src, sz, outs, ins, asmstr, pattern>;
+
+class II8r<bits<9> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm8<opcode, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class II8m<bits<9> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm8<opcode, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class II8i<bits<9> opcode,
+           dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm8<opcode, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+// 16 bit IIForm instructions
+class IIForm16<bits<9> opcode, SourceMode src, SizeVal sz,
+               dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm<opcode, 0, src, sz, outs, ins, asmstr, pattern>;
+
+class II16r<bits<9> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm16<opcode, SrcReg, Size2Bytes, outs, ins, asmstr, pattern>;
+
+class II16m<bits<9> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm16<opcode, SrcMem, Size4Bytes, outs, ins, asmstr, pattern>;
+
+class II16i<bits<9> opcode,
+            dag outs, dag ins, string asmstr, list<dag> pattern>
+  : IIForm16<opcode, SrcImm, Size4Bytes, outs, ins, asmstr, pattern>;
+
+// MSP430 Conditional Jumps Instructions
+class CJForm<bits<3> opcode, bits<3> cond,
+             dag outs, dag ins, string asmstr, list<dag> pattern>
+  : MSP430Inst<outs, ins, Size2Bytes, CondJumpFrm, asmstr> {
+  let Pattern = pattern;
+  
+  let Inst{13-15} = opcode;
+  let Inst{10-12} = cond;
+}
+
+// Pseudo instructions
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : MSP430Inst<outs, ins, SizeSpecial, PseudoFrm, asmstr> {
+  let Pattern = pattern;
+  let Inst{15-0} = 0;
+}
diff --git a/final/lib/Target/MSP430/MSP430InstrInfo.cpp b/final/lib/Target/MSP430/MSP430InstrInfo.cpp
new file mode 100644
index 00000000000..424df136cc1
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -0,0 +1,333 @@
+//===- MSP430InstrInfo.cpp - MSP430 Instruction Information ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430InstrInfo.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430TargetMachine.h"
+#include "MSP430GenInstrInfo.inc"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
+  : TargetInstrInfoImpl(MSP430Insts, array_lengthof(MSP430Insts)),
+    RI(tm, *this), TM(tm) {}
+
+void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator MI,
+                                    unsigned SrcReg, bool isKill, int FrameIdx,
+                                          const TargetRegisterClass *RC,
+                                          const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(
+              MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+                            MachineMemOperand::MOStore,
+                            MFI.getObjectSize(FrameIdx),
+                            MFI.getObjectAlignment(FrameIdx));
+
+  if (RC == &MSP430::GR16RegClass)
+    BuildMI(MBB, MI, DL, get(MSP430::MOV16mr))
+      .addFrameIndex(FrameIdx).addImm(0)
+      .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  else if (RC == &MSP430::GR8RegClass)
+    BuildMI(MBB, MI, DL, get(MSP430::MOV8mr))
+      .addFrameIndex(FrameIdx).addImm(0)
+      .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
+  else
+    llvm_unreachable("Cannot store this register to stack slot!");
+}
+
+void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MI,
+                                           unsigned DestReg, int FrameIdx,
+                                           const TargetRegisterClass *RC,
+                                           const TargetRegisterInfo *TRI) const{
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(
+              MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+                            MachineMemOperand::MOLoad,
+                            MFI.getObjectSize(FrameIdx),
+                            MFI.getObjectAlignment(FrameIdx));
+
+  if (RC == &MSP430::GR16RegClass)
+    BuildMI(MBB, MI, DL, get(MSP430::MOV16rm))
+      .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO);
+  else if (RC == &MSP430::GR8RegClass)
+    BuildMI(MBB, MI, DL, get(MSP430::MOV8rm))
+      .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO);
+  else
+    llvm_unreachable("Cannot store this register to stack slot!");
+}
+
+void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator I, DebugLoc DL,
+                                  unsigned DestReg, unsigned SrcReg,
+                                  bool KillSrc) const {
+  unsigned Opc;
+  if (MSP430::GR16RegClass.contains(DestReg, SrcReg))
+    Opc = MSP430::MOV16rr;
+  else if (MSP430::GR8RegClass.contains(DestReg, SrcReg))
+    Opc = MSP430::MOV8rr;
+  else
+    llvm_unreachable("Impossible reg-to-reg copy");
+
+  BuildMI(MBB, I, DL, get(Opc), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  unsigned Count = 0;
+
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    if (I->getOpcode() != MSP430::JMP &&
+        I->getOpcode() != MSP430::JCC &&
+        I->getOpcode() != MSP430::Br &&
+        I->getOpcode() != MSP430::Bm)
+      break;
+    // Remove the branch.
+    I->eraseFromParent();
+    I = MBB.end();
+    ++Count;
+  }
+
+  return Count;
+}
+
+bool MSP430InstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 1 && "Invalid Xbranch condition!");
+
+  MSP430CC::CondCodes CC = static_cast<MSP430CC::CondCodes>(Cond[0].getImm());
+
+  switch (CC) {
+  default:
+    assert(0 && "Invalid branch condition!");
+    break;
+  case MSP430CC::COND_E:
+    CC = MSP430CC::COND_NE;
+    break;
+  case MSP430CC::COND_NE:
+    CC = MSP430CC::COND_E;
+    break;
+  case MSP430CC::COND_L:
+    CC = MSP430CC::COND_GE;
+    break;
+  case MSP430CC::COND_GE:
+    CC = MSP430CC::COND_L;
+    break;
+  case MSP430CC::COND_HS:
+    CC = MSP430CC::COND_LO;
+    break;
+  case MSP430CC::COND_LO:
+    CC = MSP430CC::COND_HS;
+    break;
+  }
+
+  Cond[0].setImm(CC);
+  return false;
+}
+
+bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.isTerminator()) return false;
+
+  // Conditional branch is a special case.
+  if (TID.isBranch() && !TID.isBarrier())
+    return true;
+  if (!TID.isPredicable())
+    return true;
+  return !isPredicated(MI);
+}
+
+bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                    MachineBasicBlock *&TBB,
+                                    MachineBasicBlock *&FBB,
+                                    SmallVectorImpl<MachineOperand> &Cond,
+                                    bool AllowModify) const {
+  // Start from the bottom of the block and work up, examining the
+  // terminator instructions.
+  MachineBasicBlock::iterator I = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+
+    // Working from the bottom, when we see a non-terminator
+    // instruction, we're done.
+    if (!isUnpredicatedTerminator(I))
+      break;
+
+    // A terminator that isn't a branch can't easily be handled
+    // by this analysis.
+    if (!I->getDesc().isBranch())
+      return true;
+
+    // Cannot handle indirect branches.
+    if (I->getOpcode() == MSP430::Br ||
+        I->getOpcode() == MSP430::Bm)
+      return true;
+
+    // Handle unconditional branches.
+    if (I->getOpcode() == MSP430::JMP) {
+      if (!AllowModify) {
+        TBB = I->getOperand(0).getMBB();
+        continue;
+      }
+
+      // If the block has any instructions after a JMP, delete them.
+      while (llvm::next(I) != MBB.end())
+        llvm::next(I)->eraseFromParent();
+      Cond.clear();
+      FBB = 0;
+
+      // Delete the JMP if it's equivalent to a fall-through.
+      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+        TBB = 0;
+        I->eraseFromParent();
+        I = MBB.end();
+        continue;
+      }
+
+      // TBB is used to indicate the unconditinal destination.
+      TBB = I->getOperand(0).getMBB();
+      continue;
+    }
+
+    // Handle conditional branches.
+    assert(I->getOpcode() == MSP430::JCC && "Invalid conditional branch");
+    MSP430CC::CondCodes BranchCode =
+      static_cast<MSP430CC::CondCodes>(I->getOperand(1).getImm());
+    if (BranchCode == MSP430CC::COND_INVALID)
+      return true;  // Can't handle weird stuff.
+
+    // Working from the bottom, handle the first conditional branch.
+    if (Cond.empty()) {
+      FBB = TBB;
+      TBB = I->getOperand(0).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(BranchCode));
+      continue;
+    }
+
+    // Handle subsequent conditional branches. Only handle the case where all
+    // conditional branches branch to the same destination.
+    assert(Cond.size() == 1);
+    assert(TBB);
+
+    // Only handle the case where all conditional branches branch to
+    // the same destination.
+    if (TBB != I->getOperand(0).getMBB())
+      return true;
+
+    MSP430CC::CondCodes OldBranchCode = (MSP430CC::CondCodes)Cond[0].getImm();
+    // If the conditions are the same, we can leave them alone.
+    if (OldBranchCode == BranchCode)
+      continue;
+
+    return true;
+  }
+
+  return false;
+}
+
+unsigned
+MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                              MachineBasicBlock *FBB,
+                              const SmallVectorImpl<MachineOperand> &Cond,
+                              DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "MSP430 branch conditions have one component!");
+
+  if (Cond.empty()) {
+    // Unconditional branch?
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, DL, get(MSP430::JMP)).addMBB(TBB);
+    return 1;
+  }
+
+  // Conditional branch.
+  unsigned Count = 0;
+  BuildMI(&MBB, DL, get(MSP430::JCC)).addMBB(TBB).addImm(Cond[0].getImm());
+  ++Count;
+
+  if (FBB) {
+    // Two-way Conditional branch. Insert the second branch.
+    BuildMI(&MBB, DL, get(MSP430::JMP)).addMBB(FBB);
+    ++Count;
+  }
+  return Count;
+}
+
+/// GetInstSize - Return the number of bytes of code the specified
+/// instruction may be.  This returns the maximum number of bytes.
+///
+unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+  const TargetInstrDesc &Desc = MI->getDesc();
+
+  switch (Desc.TSFlags & MSP430II::SizeMask) {
+  default:
+    switch (Desc.getOpcode()) {
+    default:
+      assert(0 && "Unknown instruction size!");
+    case TargetOpcode::PROLOG_LABEL:
+    case TargetOpcode::EH_LABEL:
+    case TargetOpcode::IMPLICIT_DEF:
+    case TargetOpcode::KILL:
+    case TargetOpcode::DBG_VALUE:
+      return 0;
+    case TargetOpcode::INLINEASM: {
+      const MachineFunction *MF = MI->getParent()->getParent();
+      const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+      return TII.getInlineAsmLength(MI->getOperand(0).getSymbolName(),
+                                    *MF->getTarget().getMCAsmInfo());
+    }
+    }
+  case MSP430II::SizeSpecial:
+    switch (MI->getOpcode()) {
+    default:
+      assert(0 && "Unknown instruction size!");
+    case MSP430::SAR8r1c:
+    case MSP430::SAR16r1c:
+      return 4;
+    }
+  case MSP430II::Size2Bytes:
+    return 2;
+  case MSP430II::Size4Bytes:
+    return 4;
+  case MSP430II::Size6Bytes:
+    return 6;
+  }
+
+  return 6;
+}
diff --git a/final/lib/Target/MSP430/MSP430InstrInfo.h b/final/lib/Target/MSP430/MSP430InstrInfo.h
new file mode 100644
index 00000000000..e885cd36a04
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430InstrInfo.h
@@ -0,0 +1,89 @@
+//===- MSP430InstrInfo.h - MSP430 Instruction Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430INSTRINFO_H
+#define LLVM_TARGET_MSP430INSTRINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MSP430RegisterInfo.h"
+
+namespace llvm {
+
+class MSP430TargetMachine;
+
+/// MSP430II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MSP430II {
+  enum {
+    SizeShift   = 2,
+    SizeMask    = 7 << SizeShift,
+
+    SizeUnknown = 0 << SizeShift,
+    SizeSpecial = 1 << SizeShift,
+    Size2Bytes  = 2 << SizeShift,
+    Size4Bytes  = 3 << SizeShift,
+    Size6Bytes  = 4 << SizeShift
+  };
+}
+
+class MSP430InstrInfo : public TargetInstrInfoImpl {
+  const MSP430RegisterInfo RI;
+  MSP430TargetMachine &TM;
+public:
+  explicit MSP430InstrInfo(MSP430TargetMachine &TM);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+  void copyPhysReg(MachineBasicBlock &MBB,
+                   MachineBasicBlock::iterator I, DebugLoc DL,
+                   unsigned DestReg, unsigned SrcReg,
+                   bool KillSrc) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   unsigned SrcReg, bool isKill,
+                                   int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned DestReg, int FrameIdx,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+
+  // Branch folding goodness
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+  bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+  bool AnalyzeBranch(MachineBasicBlock &MBB,
+                     MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
+                     SmallVectorImpl<MachineOperand> &Cond,
+                     bool AllowModify) const;
+
+  unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                        MachineBasicBlock *FBB,
+                        const SmallVectorImpl<MachineOperand> &Cond,
+                        DebugLoc DL) const;
+
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/MSP430/MSP430InstrInfo.td b/final/lib/Target/MSP430/MSP430InstrInfo.td
new file mode 100644
index 00000000000..59cb59873ab
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430InstrInfo.td
@@ -0,0 +1,1211 @@
+//===- MSP430InstrInfo.td - MSP430 Instruction defs -----------*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the MSP430 instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "MSP430InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Type Constraints.
+//===----------------------------------------------------------------------===//
+class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
+class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
+
+//===----------------------------------------------------------------------===//
+// Type Profiles.
+//===----------------------------------------------------------------------===//
+def SDT_MSP430Call         : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+def SDT_MSP430CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>]>;
+def SDT_MSP430CallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
+def SDT_MSP430Wrapper      : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+                                                  SDTCisPtrTy<0>]>;
+def SDT_MSP430Cmp          : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_MSP430BrCC         : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
+                                                  SDTCisVT<1, i8>]>;
+def SDT_MSP430SelectCC     : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
+                                                  SDTCisSameAs<1, 2>, 
+                                                  SDTCisVT<3, i8>]>;
+def SDT_MSP430Shift        : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+                                                  SDTCisI8<2>]>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Specific Node Definitions.
+//===----------------------------------------------------------------------===//
+def MSP430retflag  : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
+                       [SDNPHasChain, SDNPOptInGlue]>;
+def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
+                       [SDNPHasChain, SDNPOptInGlue]>;
+
+def MSP430rra     : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
+def MSP430rla     : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
+def MSP430rrc     : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>;
+
+def MSP430call    : SDNode<"MSP430ISD::CALL", SDT_MSP430Call,
+                     [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
+def MSP430callseq_start :
+                 SDNode<"ISD::CALLSEQ_START", SDT_MSP430CallSeqStart,
+                        [SDNPHasChain, SDNPOutGlue]>;
+def MSP430callseq_end :
+                 SDNode<"ISD::CALLSEQ_END",   SDT_MSP430CallSeqEnd,
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>;
+def MSP430cmp     : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutGlue]>;
+def MSP430brcc    : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC,
+                            [SDNPHasChain, SDNPInGlue]>;
+def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC,
+                            [SDNPInGlue]>;
+def MSP430shl     : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>;
+def MSP430sra     : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>;
+def MSP430srl     : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>;
+
+//===----------------------------------------------------------------------===//
+// MSP430 Operand Definitions.
+//===----------------------------------------------------------------------===//
+
+// Address operands
+def memsrc : Operand<i16> {
+  let PrintMethod = "printSrcMemOperand";
+  let MIOperandInfo = (ops GR16, i16imm);
+}
+
+def memdst : Operand<i16> {
+  let PrintMethod = "printSrcMemOperand";
+  let MIOperandInfo = (ops GR16, i16imm);
+}
+
+// Short jump targets have OtherVT type and are printed as pcrel imm values.
+def jmptarget : Operand<OtherVT> {
+  let PrintMethod = "printPCRelImmOperand";
+}
+
+// Operand for printing out a condition code.
+def cc : Operand<i8> {
+  let PrintMethod = "printCCOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// MSP430 Complex Pattern Definitions.
+//===----------------------------------------------------------------------===//
+
+def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], []>;
+
+//===----------------------------------------------------------------------===//
+// Pattern Fragments
+def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
+def  extloadi16i8 : PatFrag<(ops node:$ptr), (i16 ( extloadi8 node:$ptr))>;
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+  return N->hasOneUse();
+}]>;
+//===----------------------------------------------------------------------===//
+// Instruction list..
+
+// ADJCALLSTACKDOWN/UP implicitly use/def SP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber SRW.
+let Defs = [SPW, SRW], Uses = [SPW] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i16imm:$amt),
+                              "#ADJCALLSTACKDOWN",
+                              [(MSP430callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP   : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
+                              "#ADJCALLSTACKUP",
+                              [(MSP430callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+let usesCustomInserter = 1 in {
+  def Select8  : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc),
+                        "# Select8 PSEUDO",
+                        [(set GR8:$dst,
+                          (MSP430selectcc GR8:$src, GR8:$src2, imm:$cc))]>;
+  def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR16:$src2, i8imm:$cc),
+                        "# Select16 PSEUDO",
+                        [(set GR16:$dst,
+                          (MSP430selectcc GR16:$src, GR16:$src2, imm:$cc))]>;
+  let Defs = [SRW] in {
+  def Shl8     : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+                        "# Shl8 PSEUDO",
+                        [(set GR8:$dst, (MSP430shl GR8:$src, GR8:$cnt))]>;
+  def Shl16    : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+                        "# Shl16 PSEUDO",
+                        [(set GR16:$dst, (MSP430shl GR16:$src, GR8:$cnt))]>;
+  def Sra8     : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+                        "# Sra8 PSEUDO",
+                        [(set GR8:$dst, (MSP430sra GR8:$src, GR8:$cnt))]>;
+  def Sra16    : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+                        "# Sra16 PSEUDO",
+                        [(set GR16:$dst, (MSP430sra GR16:$src, GR8:$cnt))]>;
+  def Srl8     : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+                        "# Srl8 PSEUDO",
+                        [(set GR8:$dst, (MSP430srl GR8:$src, GR8:$cnt))]>;
+  def Srl16    : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+                        "# Srl16 PSEUDO",
+                        [(set GR16:$dst, (MSP430srl GR16:$src, GR8:$cnt))]>;
+
+  }
+}
+
+let neverHasSideEffects = 1 in
+def NOP : Pseudo<(outs), (ins), "nop", []>;
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions...
+//
+
+// FIXME: Provide proper encoding!
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+  def RET  : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+                     (outs), (ins), "ret",  [(MSP430retflag)]>;
+  def RETI : II16r<0x0, (outs), (ins), "reti", [(MSP430retiflag)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+
+// FIXME: expand opcode & cond field for branches!
+
+// Direct branch
+let isBarrier = 1 in {
+  // Short branch
+  def JMP : CJForm<0, 0, (outs), (ins jmptarget:$dst),
+                   "jmp\t$dst",
+                   [(br bb:$dst)]>;
+  let isIndirectBranch = 1 in {
+    // Long branches
+    def Bi  : I16ri<0, (outs), (ins i16imm:$brdst),
+                    "br\t$brdst",
+                    [(brind tblockaddress:$brdst)]>;
+    def Br  : I16rr<0, (outs), (ins GR16:$brdst),
+                    "mov.w\t{$brdst, pc}",
+                    [(brind GR16:$brdst)]>;
+    def Bm  : I16rm<0, (outs), (ins memsrc:$brdst),
+                    "mov.w\t{$brdst, pc}",
+                    [(brind (load addr:$brdst))]>;
+  }
+}
+
+// Conditional branches
+let Uses = [SRW] in
+  def JCC : CJForm<0, 0,
+                   (outs), (ins jmptarget:$dst, cc:$cc),
+                   "j$cc\t$dst",
+                   [(MSP430brcc bb:$dst, imm:$cc)]>;
+} // isBranch, isTerminator
+
+//===----------------------------------------------------------------------===//
+//  Call Instructions...
+//
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. SPW is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [R12W, R13W, R14W, R15W, SRW],
+      Uses = [SPW] in {
+    def CALLi     : II16i<0x0,
+                          (outs), (ins i16imm:$dst, variable_ops),
+                          "call\t$dst", [(MSP430call imm:$dst)]>;
+    def CALLr     : II16r<0x0,
+                          (outs), (ins GR16:$dst, variable_ops),
+                          "call\t$dst", [(MSP430call GR16:$dst)]>;
+    def CALLm     : II16m<0x0,
+                          (outs), (ins memsrc:$dst, variable_ops),
+                          "call\t${dst:mem}", [(MSP430call (load addr:$dst))]>;
+  }
+
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions...
+//
+let Defs = [SPW], Uses = [SPW], neverHasSideEffects=1 in {
+let mayLoad = 1 in
+def POP16r   : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+                       (outs GR16:$reg), (ins), "pop.w\t$reg", []>;
+
+let mayStore = 1 in
+def PUSH16r  : II16r<0x0,
+                     (outs), (ins GR16:$reg), "push.w\t$reg",[]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+// FIXME: Provide proper encoding!
+let neverHasSideEffects = 1 in {
+def MOV8rr  : I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src),
+                   "mov.b\t{$src, $dst}",
+                   []>;
+def MOV16rr : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src),
+                    "mov.w\t{$src, $dst}",
+                    []>;
+}
+
+// FIXME: Provide proper encoding!
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV8ri  : I8ri<0x0,
+                   (outs GR8:$dst), (ins i8imm:$src),
+                   "mov.b\t{$src, $dst}",
+                   [(set GR8:$dst, imm:$src)]>;
+def MOV16ri : I16ri<0x0,
+                    (outs GR16:$dst), (ins i16imm:$src),
+                    "mov.w\t{$src, $dst}",
+                    [(set GR16:$dst, imm:$src)]>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def MOV8rm  : I8rm<0x0,
+                   (outs GR8:$dst), (ins memsrc:$src),
+                   "mov.b\t{$src, $dst}",
+                   [(set GR8:$dst, (load addr:$src))]>;
+def MOV16rm : I16rm<0x0,
+                    (outs GR16:$dst), (ins memsrc:$src),
+                    "mov.w\t{$src, $dst}",
+                    [(set GR16:$dst, (load addr:$src))]>;
+}
+
+def MOVZX16rr8 : I8rr<0x0,
+                      (outs GR16:$dst), (ins GR8:$src),
+                      "mov.b\t{$src, $dst}",
+                      [(set GR16:$dst, (zext GR8:$src))]>;
+def MOVZX16rm8 : I8rm<0x0,
+                      (outs GR16:$dst), (ins memsrc:$src),
+                      "mov.b\t{$src, $dst}",
+                      [(set GR16:$dst, (zextloadi16i8 addr:$src))]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, Constraints = "$base = $base_wb" in {
+def MOV8rm_POST  : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+                         (outs GR8:$dst, GR16:$base_wb), (ins GR16:$base),
+                         "mov.b\t{@$base+, $dst}", []>;
+def MOV16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+                           (outs GR16:$dst, GR16:$base_wb), (ins GR16:$base),
+                           "mov.w\t{@$base+, $dst}", []>;
+}
+
+// Any instruction that defines a 8-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may
+// be copying from a truncate, but any other 8-bit operation will zero-extend
+// up to 16 bits.
+def def8 : PatLeaf<(i8 GR8:$src), [{
+  return N->getOpcode() != ISD::TRUNCATE &&
+         N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+         N->getOpcode() != ISD::CopyFromReg;
+}]>;
+
+// In the case of a 8-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i16 (zext def8:$src)),
+          (SUBREG_TO_REG (i16 0), GR8:$src, subreg_8bit)>;
+
+def MOV8mi  : I8mi<0x0,
+                   (outs), (ins memdst:$dst, i8imm:$src),
+                   "mov.b\t{$src, $dst}",
+                   [(store (i8 imm:$src), addr:$dst)]>;
+def MOV16mi : I16mi<0x0,
+                    (outs), (ins memdst:$dst, i16imm:$src),
+                    "mov.w\t{$src, $dst}",
+                    [(store (i16 imm:$src), addr:$dst)]>;
+
+def MOV8mr  : I8mr<0x0,
+                   (outs), (ins memdst:$dst, GR8:$src),
+                   "mov.b\t{$src, $dst}",
+                   [(store GR8:$src, addr:$dst)]>;
+def MOV16mr : I16mr<0x0,
+                    (outs), (ins memdst:$dst, GR16:$src),
+                    "mov.w\t{$src, $dst}",
+                    [(store GR16:$src, addr:$dst)]>;
+
+def MOV8mm  : I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "mov.b\t{$src, $dst}",
+                   [(store (i8 (load addr:$src)), addr:$dst)]>;
+def MOV16mm : I16mm<0x0,
+                    (outs), (ins memdst:$dst, memsrc:$src),
+                    "mov.w\t{$src, $dst}",
+                    [(store (i16 (load addr:$src)), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+let Constraints = "$src = $dst" in {
+
+let Defs = [SRW] in {
+
+let isCommutable = 1 in { // X = ADD Y, Z  == X = ADD Z, Y
+
+def ADD8rr  : I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+                   "add.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (add GR8:$src, GR8:$src2)),
+                    (implicit SRW)]>;
+def ADD16rr : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+                    "add.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (add GR16:$src, GR16:$src2)),
+                     (implicit SRW)]>;
+}
+
+def ADD8rm  : I8rm<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+                   "add.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (add GR8:$src, (load addr:$src2))),
+                    (implicit SRW)]>;
+def ADD16rm : I16rm<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+                    "add.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (add GR16:$src, (load addr:$src2))),
+                     (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
+Constraints = "$base = $base_wb, $src = $dst" in {
+def ADD8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+                         (outs GR8:$dst, GR16:$base_wb),
+                         (ins GR8:$src, GR16:$base),
+                         "add.b\t{@$base+, $dst}", []>;
+def ADD16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+                           (outs GR16:$dst, GR16:$base_wb),
+                           (ins GR16:$src, GR16:$base),
+                          "add.w\t{@$base+, $dst}", []>;
+}
+
+
+def ADD8ri  : I8ri<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+                   "add.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (add GR8:$src, imm:$src2)),
+                    (implicit SRW)]>;
+def ADD16ri : I16ri<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+                    "add.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (add GR16:$src, imm:$src2)),
+                     (implicit SRW)]>;
+
+let Constraints = "" in {
+def ADD8mr  : I8mr<0x0,
+                   (outs), (ins memdst:$dst, GR8:$src),
+                   "add.b\t{$src, $dst}",
+                   [(store (add (load addr:$dst), GR8:$src), addr:$dst),
+                    (implicit SRW)]>;
+def ADD16mr : I16mr<0x0,
+                    (outs), (ins memdst:$dst, GR16:$src),
+                    "add.w\t{$src, $dst}",
+                    [(store (add (load addr:$dst), GR16:$src), addr:$dst),
+                     (implicit SRW)]>;
+
+def ADD8mi  : I8mi<0x0,
+                   (outs), (ins memdst:$dst, i8imm:$src),
+                   "add.b\t{$src, $dst}",
+                   [(store (add (load addr:$dst), (i8 imm:$src)), addr:$dst),
+                    (implicit SRW)]>;
+def ADD16mi : I16mi<0x0,
+                    (outs), (ins memdst:$dst, i16imm:$src),
+                    "add.w\t{$src, $dst}",
+                    [(store (add (load addr:$dst), (i16 imm:$src)), addr:$dst),
+                     (implicit SRW)]>;
+
+def ADD8mm  : I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "add.b\t{$src, $dst}",
+                   [(store (add (load addr:$dst), 
+                                (i8 (load addr:$src))), addr:$dst),
+                    (implicit SRW)]>;
+def ADD16mm : I16mm<0x0,
+                    (outs), (ins memdst:$dst, memsrc:$src),
+                    "add.w\t{$src, $dst}",
+                    [(store (add (load addr:$dst), 
+                                  (i16 (load addr:$src))), addr:$dst),
+                     (implicit SRW)]>;
+}
+
+let Uses = [SRW] in {
+
+let isCommutable = 1 in { // X = ADDC Y, Z  == X = ADDC Z, Y
+def ADC8rr  : I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+                   "addc.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (adde GR8:$src, GR8:$src2)),
+                    (implicit SRW)]>;
+def ADC16rr : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+                    "addc.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (adde GR16:$src, GR16:$src2)),
+                     (implicit SRW)]>;
+} // isCommutable
+
+def ADC8ri  : I8ri<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+                   "addc.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (adde GR8:$src, imm:$src2)),
+                    (implicit SRW)]>;
+def ADC16ri : I16ri<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+                    "addc.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (adde GR16:$src, imm:$src2)),
+                     (implicit SRW)]>;
+
+def ADC8rm  : I8rm<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+                   "addc.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (adde GR8:$src, (load addr:$src2))),
+                    (implicit SRW)]>;
+def ADC16rm : I16rm<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+                    "addc.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (adde GR16:$src, (load addr:$src2))),
+                     (implicit SRW)]>;
+
+let Constraints = "" in {
+def ADC8mr  : I8mr<0x0,
+                   (outs), (ins memdst:$dst, GR8:$src),
+                   "addc.b\t{$src, $dst}",
+                   [(store (adde (load addr:$dst), GR8:$src), addr:$dst),
+                    (implicit SRW)]>;
+def ADC16mr : I16mr<0x0,
+                    (outs), (ins memdst:$dst, GR16:$src),
+                    "addc.w\t{$src, $dst}",
+                    [(store (adde (load addr:$dst), GR16:$src), addr:$dst),
+                     (implicit SRW)]>;
+
+def ADC8mi  : I8mi<0x0,
+                   (outs), (ins memdst:$dst, i8imm:$src),
+                   "addc.b\t{$src, $dst}",
+                   [(store (adde (load addr:$dst), (i8 imm:$src)), addr:$dst),
+                    (implicit SRW)]>;
+def ADC16mi : I16mi<0x0,
+                    (outs), (ins memdst:$dst, i16imm:$src),
+                    "addc.w\t{$src, $dst}",
+                    [(store (adde (load addr:$dst), (i16 imm:$src)), addr:$dst),
+                     (implicit SRW)]>;
+
+def ADC8mm  : I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "addc.b\t{$src, $dst}",
+                   [(store (adde (load addr:$dst), 
+                                 (i8 (load addr:$src))), addr:$dst),
+                    (implicit SRW)]>;
+def ADC16mm : I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "addc.w\t{$src, $dst}",
+                   [(store (adde (load addr:$dst), 
+                                 (i16 (load addr:$src))), addr:$dst),
+                    (implicit SRW)]>;
+}
+
+} // Uses = [SRW]
+
+let isCommutable = 1 in { // X = AND Y, Z  == X = AND Z, Y
+def AND8rr  : I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+                   "and.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (and GR8:$src, GR8:$src2)),
+                    (implicit SRW)]>;
+def AND16rr : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+                    "and.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (and GR16:$src, GR16:$src2)),
+                     (implicit SRW)]>;
+}
+
+def AND8ri  : I8ri<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+                   "and.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (and GR8:$src, imm:$src2)),
+                    (implicit SRW)]>;
+def AND16ri : I16ri<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+                    "and.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (and GR16:$src, imm:$src2)),
+                     (implicit SRW)]>;
+
+def AND8rm  : I8rm<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+                   "and.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (and GR8:$src, (load addr:$src2))),
+                    (implicit SRW)]>;
+def AND16rm : I16rm<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+                    "and.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (and GR16:$src, (load addr:$src2))),
+                     (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
+Constraints = "$base = $base_wb, $src = $dst" in {
+def AND8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+                         (outs GR8:$dst, GR16:$base_wb),
+                         (ins GR8:$src, GR16:$base),
+                         "and.b\t{@$base+, $dst}", []>;
+def AND16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+                           (outs GR16:$dst, GR16:$base_wb),
+                           (ins GR16:$src, GR16:$base),
+                           "and.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def AND8mr  : I8mr<0x0,
+                   (outs), (ins memdst:$dst, GR8:$src),
+                   "and.b\t{$src, $dst}",
+                   [(store (and (load addr:$dst), GR8:$src), addr:$dst),
+                    (implicit SRW)]>;
+def AND16mr : I16mr<0x0,
+                    (outs), (ins memdst:$dst, GR16:$src),
+                    "and.w\t{$src, $dst}",
+                    [(store (and (load addr:$dst), GR16:$src), addr:$dst),
+                     (implicit SRW)]>;
+
+def AND8mi  : I8mi<0x0,
+                   (outs), (ins memdst:$dst, i8imm:$src),
+                   "and.b\t{$src, $dst}",
+                   [(store (and (load addr:$dst), (i8 imm:$src)), addr:$dst),
+                    (implicit SRW)]>;
+def AND16mi : I16mi<0x0,
+                    (outs), (ins memdst:$dst, i16imm:$src),
+                    "and.w\t{$src, $dst}",
+                    [(store (and (load addr:$dst), (i16 imm:$src)), addr:$dst),
+                     (implicit SRW)]>;
+
+def AND8mm  : I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "and.b\t{$src, $dst}",
+                   [(store (and (load addr:$dst), 
+                                (i8 (load addr:$src))), addr:$dst),
+                    (implicit SRW)]>;
+def AND16mm : I16mm<0x0,
+                    (outs), (ins memdst:$dst, memsrc:$src),
+                    "and.w\t{$src, $dst}",
+                    [(store (and (load addr:$dst), 
+                                 (i16 (load addr:$src))), addr:$dst),
+                     (implicit SRW)]>;
+}
+
+let isCommutable = 1 in { // X = OR Y, Z  == X = OR Z, Y
+def OR8rr  : I8rr<0x0,
+                  (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+                  "bis.b\t{$src2, $dst}",
+                  [(set GR8:$dst, (or GR8:$src, GR8:$src2))]>;
+def OR16rr : I16rr<0x0,
+                   (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+                   "bis.w\t{$src2, $dst}",
+                   [(set GR16:$dst, (or GR16:$src, GR16:$src2))]>;
+}
+
+def OR8ri  : I8ri<0x0,
+                  (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+                  "bis.b\t{$src2, $dst}",
+                  [(set GR8:$dst, (or GR8:$src, imm:$src2))]>;
+def OR16ri : I16ri<0x0,
+                   (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+                   "bis.w\t{$src2, $dst}",
+                   [(set GR16:$dst, (or GR16:$src, imm:$src2))]>;
+
+def OR8rm  : I8rm<0x0,
+                  (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+                  "bis.b\t{$src2, $dst}",
+                  [(set GR8:$dst, (or GR8:$src, (load addr:$src2)))]>;
+def OR16rm : I16rm<0x0,
+                   (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+                   "bis.w\t{$src2, $dst}",
+                   [(set GR16:$dst, (or GR16:$src, (load addr:$src2)))]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
+Constraints = "$base = $base_wb, $src = $dst" in {
+def OR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+                        (outs GR8:$dst, GR16:$base_wb),
+                        (ins GR8:$src, GR16:$base),
+                        "bis.b\t{@$base+, $dst}", []>;
+def OR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+                          (outs GR16:$dst, GR16:$base_wb),
+                          (ins GR16:$src, GR16:$base),
+                          "bis.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def OR8mr  : I8mr<0x0,
+                  (outs), (ins memdst:$dst, GR8:$src),
+                  "bis.b\t{$src, $dst}",
+                  [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>;
+def OR16mr : I16mr<0x0,
+                   (outs), (ins memdst:$dst, GR16:$src),
+                   "bis.w\t{$src, $dst}",
+                   [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>;
+
+def OR8mi  : I8mi<0x0, 
+                  (outs), (ins memdst:$dst, i8imm:$src),
+                  "bis.b\t{$src, $dst}",
+                  [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def OR16mi : I16mi<0x0,
+                   (outs), (ins memdst:$dst, i16imm:$src),
+                   "bis.w\t{$src, $dst}",
+                   [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>;
+
+def OR8mm  : I8mm<0x0,
+                  (outs), (ins memdst:$dst, memsrc:$src),
+                  "bis.b\t{$src, $dst}",
+                  [(store (or (i8 (load addr:$dst)),
+                              (i8 (load addr:$src))), addr:$dst)]>;
+def OR16mm : I16mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "bis.w\t{$src, $dst}",
+                   [(store (or (i16 (load addr:$dst)),
+                               (i16 (load addr:$src))), addr:$dst)]>;
+}
+
+// bic does not modify condition codes
+def BIC8rr :  I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+                   "bic.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (and GR8:$src, (not GR8:$src2)))]>;
+def BIC16rr : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+                    "bic.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (and GR16:$src, (not GR16:$src2)))]>;
+
+def BIC8rm :  I8rm<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+                   "bic.b\t{$src2, $dst}",
+                    [(set GR8:$dst, (and GR8:$src, (not (i8 (load addr:$src2)))))]>;
+def BIC16rm : I16rm<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+                    "bic.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (and GR16:$src, (not (i16 (load addr:$src2)))))]>;
+
+let Constraints = "" in {
+def BIC8mr :  I8mr<0x0,
+                   (outs), (ins memdst:$dst, GR8:$src),
+                   "bic.b\t{$src, $dst}",
+                   [(store (and (load addr:$dst), (not GR8:$src)), addr:$dst)]>;
+def BIC16mr : I16mr<0x0,
+                    (outs), (ins memdst:$dst, GR16:$src),
+                    "bic.w\t{$src, $dst}",
+                    [(store (and (load addr:$dst), (not GR16:$src)), addr:$dst)]>;
+
+def BIC8mm :  I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "bic.b\t{$src, $dst}",
+                   [(store (and (load addr:$dst),
+                                (not (i8 (load addr:$src)))), addr:$dst)]>;
+def BIC16mm : I16mm<0x0,
+                    (outs), (ins memdst:$dst, memsrc:$src),
+                    "bic.w\t{$src, $dst}",
+                    [(store (and (load addr:$dst),
+                                 (not (i16 (load addr:$src)))), addr:$dst)]>;
+}
+
+let isCommutable = 1 in { // X = XOR Y, Z  == X = XOR Z, Y
+def XOR8rr  : I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+                   "xor.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (xor GR8:$src, GR8:$src2)),
+                    (implicit SRW)]>;
+def XOR16rr : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+                    "xor.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (xor GR16:$src, GR16:$src2)),
+                     (implicit SRW)]>;
+}
+
+def XOR8ri  : I8ri<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+                   "xor.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (xor GR8:$src, imm:$src2)),
+                    (implicit SRW)]>;
+def XOR16ri : I16ri<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+                    "xor.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (xor GR16:$src, imm:$src2)),
+                     (implicit SRW)]>;
+
+def XOR8rm  : I8rm<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+                   "xor.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (xor GR8:$src, (load addr:$src2))),
+                    (implicit SRW)]>;
+def XOR16rm : I16rm<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+                    "xor.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (xor GR16:$src, (load addr:$src2))),
+                     (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
+Constraints = "$base = $base_wb, $src = $dst" in {
+def XOR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+                         (outs GR8:$dst, GR16:$base_wb),
+                         (ins GR8:$src, GR16:$base),
+                         "xor.b\t{@$base+, $dst}", []>;
+def XOR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+                           (outs GR16:$dst, GR16:$base_wb),
+                           (ins GR16:$src, GR16:$base),
+                           "xor.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def XOR8mr  : I8mr<0x0,
+                   (outs), (ins memdst:$dst, GR8:$src),
+                   "xor.b\t{$src, $dst}",
+                   [(store (xor (load addr:$dst), GR8:$src), addr:$dst),
+                    (implicit SRW)]>;
+def XOR16mr : I16mr<0x0,
+                    (outs), (ins memdst:$dst, GR16:$src),
+                    "xor.w\t{$src, $dst}",
+                    [(store (xor (load addr:$dst), GR16:$src), addr:$dst),
+                     (implicit SRW)]>;
+
+def XOR8mi  : I8mi<0x0,
+                   (outs), (ins memdst:$dst, i8imm:$src),
+                   "xor.b\t{$src, $dst}",
+                   [(store (xor (load addr:$dst), (i8 imm:$src)), addr:$dst),
+                    (implicit SRW)]>;
+def XOR16mi : I16mi<0x0,
+                    (outs), (ins memdst:$dst, i16imm:$src),
+                    "xor.w\t{$src, $dst}",
+                    [(store (xor (load addr:$dst), (i16 imm:$src)), addr:$dst),
+                     (implicit SRW)]>;
+
+def XOR8mm  : I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "xor.b\t{$src, $dst}",
+                   [(store (xor (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+                    (implicit SRW)]>;
+def XOR16mm : I16mm<0x0,
+                    (outs), (ins memdst:$dst, memsrc:$src),
+                    "xor.w\t{$src, $dst}",
+                    [(store (xor (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+                     (implicit SRW)]>;
+}
+
+
+def SUB8rr  : I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+                   "sub.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (sub GR8:$src, GR8:$src2)),
+                    (implicit SRW)]>;
+def SUB16rr : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+                    "sub.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (sub GR16:$src, GR16:$src2)),
+                     (implicit SRW)]>;
+
+def SUB8ri  : I8ri<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+                   "sub.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (sub GR8:$src, imm:$src2)),
+                    (implicit SRW)]>;
+def SUB16ri : I16ri<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+                    "sub.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (sub GR16:$src, imm:$src2)),
+                     (implicit SRW)]>;
+
+def SUB8rm  : I8rm<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+                   "sub.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (sub GR8:$src, (load addr:$src2))),
+                    (implicit SRW)]>;
+def SUB16rm : I16rm<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+                    "sub.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (sub GR16:$src, (load addr:$src2))),
+                     (implicit SRW)]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, 
+Constraints = "$base = $base_wb, $src = $dst" in {
+def SUB8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
+                         (outs GR8:$dst, GR16:$base_wb),
+                         (ins GR8:$src, GR16:$base),
+                         "sub.b\t{@$base+, $dst}", []>;
+def SUB16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
+                          (outs GR16:$dst, GR16:$base_wb),
+                          (ins GR16:$src, GR16:$base),
+                          "sub.w\t{@$base+, $dst}", []>;
+}
+
+let Constraints = "" in {
+def SUB8mr  : I8mr<0x0,
+                   (outs), (ins memdst:$dst, GR8:$src),
+                   "sub.b\t{$src, $dst}",
+                   [(store (sub (load addr:$dst), GR8:$src), addr:$dst),
+                    (implicit SRW)]>;
+def SUB16mr : I16mr<0x0,
+                    (outs), (ins memdst:$dst, GR16:$src),
+                    "sub.w\t{$src, $dst}",
+                    [(store (sub (load addr:$dst), GR16:$src), addr:$dst),
+                     (implicit SRW)]>;
+
+def SUB8mi  : I8mi<0x0,
+                   (outs), (ins memdst:$dst, i8imm:$src),
+                   "sub.b\t{$src, $dst}",
+                   [(store (sub (load addr:$dst), (i8 imm:$src)), addr:$dst),
+                    (implicit SRW)]>;
+def SUB16mi : I16mi<0x0,
+                    (outs), (ins memdst:$dst, i16imm:$src),
+                    "sub.w\t{$src, $dst}",
+                    [(store (sub (load addr:$dst), (i16 imm:$src)), addr:$dst),
+                     (implicit SRW)]>;
+
+def SUB8mm  : I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "sub.b\t{$src, $dst}",
+                   [(store (sub (load addr:$dst), 
+                                (i8 (load addr:$src))), addr:$dst),
+                    (implicit SRW)]>;
+def SUB16mm : I16mm<0x0,
+                    (outs), (ins memdst:$dst, memsrc:$src),
+                    "sub.w\t{$src, $dst}",
+                    [(store (sub (load addr:$dst), 
+                                 (i16 (load addr:$src))), addr:$dst),
+                     (implicit SRW)]>;
+}
+
+let Uses = [SRW] in {
+def SBC8rr  : I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
+                   "subc.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (sube GR8:$src, GR8:$src2)),
+                    (implicit SRW)]>;
+def SBC16rr : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
+                    "subc.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (sube GR16:$src, GR16:$src2)),
+                     (implicit SRW)]>;
+
+def SBC8ri  : I8ri<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
+                   "subc.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (sube GR8:$src, imm:$src2)),
+                    (implicit SRW)]>;
+def SBC16ri : I16ri<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
+                    "subc.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (sube GR16:$src, imm:$src2)),
+                     (implicit SRW)]>;
+
+def SBC8rm  : I8rm<0x0,
+                   (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
+                   "subc.b\t{$src2, $dst}",
+                   [(set GR8:$dst, (sube GR8:$src, (load addr:$src2))),
+                    (implicit SRW)]>;
+def SBC16rm : I16rm<0x0,
+                    (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
+                    "subc.w\t{$src2, $dst}",
+                    [(set GR16:$dst, (sube GR16:$src, (load addr:$src2))),
+                     (implicit SRW)]>;
+
+let Constraints = "" in {
+def SBC8mr  : I8mr<0x0,
+                   (outs), (ins memdst:$dst, GR8:$src),
+                   "subc.b\t{$src, $dst}",
+                  [(store (sube (load addr:$dst), GR8:$src), addr:$dst),
+                   (implicit SRW)]>;
+def SBC16mr : I16mr<0x0,
+                    (outs), (ins memdst:$dst, GR16:$src),
+                    "subc.w\t{$src, $dst}",
+                    [(store (sube (load addr:$dst), GR16:$src), addr:$dst),
+                     (implicit SRW)]>;
+
+def SBC8mi  : I8mi<0x0,
+                   (outs), (ins memdst:$dst, i8imm:$src),
+                   "subc.b\t{$src, $dst}",
+                   [(store (sube (load addr:$dst), (i8 imm:$src)), addr:$dst),
+                    (implicit SRW)]>;
+def SBC16mi : I16mi<0x0,
+                    (outs), (ins memdst:$dst, i16imm:$src),
+                    "subc.w\t{$src, $dst}",
+                    [(store (sube (load addr:$dst), (i16 imm:$src)), addr:$dst),
+                     (implicit SRW)]>;
+
+def SBC8mm  : I8mm<0x0,
+                   (outs), (ins memdst:$dst, memsrc:$src),
+                   "subc.b\t{$src, $dst}",
+                   [(store (sube (load addr:$dst),
+                                 (i8 (load addr:$src))), addr:$dst),
+                    (implicit SRW)]>;
+def SBC16mm : I16mm<0x0,
+                    (outs), (ins memdst:$dst, memsrc:$src),
+                    "subc.w\t{$src, $dst}",
+                    [(store (sube (load addr:$dst),
+                            (i16 (load addr:$src))), addr:$dst),
+                     (implicit SRW)]>;
+}
+
+} // Uses = [SRW]
+
+// FIXME: memory variant!
+def SAR8r1  : II8r<0x0,
+                   (outs GR8:$dst), (ins GR8:$src),
+                   "rra.b\t$dst",
+                   [(set GR8:$dst, (MSP430rra GR8:$src)),
+                    (implicit SRW)]>;
+def SAR16r1 : II16r<0x0,
+                    (outs GR16:$dst), (ins GR16:$src),
+                    "rra.w\t$dst",
+                    [(set GR16:$dst, (MSP430rra GR16:$src)),
+                     (implicit SRW)]>;
+
+def SHL8r1  : I8rr<0x0,
+                   (outs GR8:$dst), (ins GR8:$src),
+                   "rla.b\t$dst",
+                   [(set GR8:$dst, (MSP430rla GR8:$src)),
+                    (implicit SRW)]>;
+def SHL16r1 : I16rr<0x0,
+                    (outs GR16:$dst), (ins GR16:$src),
+                    "rla.w\t$dst",
+                    [(set GR16:$dst, (MSP430rla GR16:$src)),
+                     (implicit SRW)]>;
+
+def SAR8r1c  : Pseudo<(outs GR8:$dst), (ins GR8:$src),
+                      "clrc\n\t"
+                      "rrc.b\t$dst",
+                      [(set GR8:$dst, (MSP430rrc GR8:$src)),
+                       (implicit SRW)]>;
+def SAR16r1c : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+                      "clrc\n\t"
+                      "rrc.w\t$dst",
+                      [(set GR16:$dst, (MSP430rrc GR16:$src)),
+                       (implicit SRW)]>;
+
+// FIXME: Memory sext's ?
+def SEXT16r : II16r<0x0,
+                    (outs GR16:$dst), (ins GR16:$src),
+                    "sxt\t$dst",
+                    [(set GR16:$dst, (sext_inreg GR16:$src, i8)),
+                     (implicit SRW)]>;
+
+} // Defs = [SRW]
+
+def ZEXT16r : I8rr<0x0,
+                   (outs GR16:$dst), (ins GR16:$src),
+                   "mov.b\t{$src, $dst}",
+                   [(set GR16:$dst, (zext (trunc GR16:$src)))]>;
+
+// FIXME: Memory bitswaps?
+def SWPB16r : II16r<0x0,
+                    (outs GR16:$dst), (ins GR16:$src),
+                    "swpb\t$dst",
+                    [(set GR16:$dst, (bswap GR16:$src))]>;
+
+} // Constraints = "$src = $dst"
+
+// Integer comparisons
+let Defs = [SRW] in {
+def CMP8rr  : I8rr<0x0,
+                   (outs), (ins GR8:$src, GR8:$src2),
+                   "cmp.b\t{$src2, $src}",
+                   [(MSP430cmp GR8:$src, GR8:$src2), (implicit SRW)]>;
+def CMP16rr : I16rr<0x0,
+                    (outs), (ins GR16:$src, GR16:$src2),
+                    "cmp.w\t{$src2, $src}",
+                    [(MSP430cmp GR16:$src, GR16:$src2), (implicit SRW)]>;
+
+def CMP8ri  : I8ri<0x0,
+                   (outs), (ins GR8:$src, i8imm:$src2),
+                   "cmp.b\t{$src2, $src}",
+                   [(MSP430cmp GR8:$src, imm:$src2), (implicit SRW)]>;
+def CMP16ri : I16ri<0x0,
+                    (outs), (ins GR16:$src, i16imm:$src2),
+                    "cmp.w\t{$src2, $src}",
+                    [(MSP430cmp GR16:$src, imm:$src2), (implicit SRW)]>;
+
+def CMP8mi  : I8mi<0x0,
+                   (outs), (ins memsrc:$src, i8imm:$src2),
+                   "cmp.b\t{$src2, $src}",
+                   [(MSP430cmp (load addr:$src),
+                               (i8 imm:$src2)), (implicit SRW)]>;
+def CMP16mi : I16mi<0x0,
+                    (outs), (ins memsrc:$src, i16imm:$src2),
+                    "cmp.w\t{$src2, $src}",
+                     [(MSP430cmp (load addr:$src),
+                                 (i16 imm:$src2)), (implicit SRW)]>;
+
+def CMP8rm  : I8rm<0x0,
+                   (outs), (ins GR8:$src, memsrc:$src2),
+                   "cmp.b\t{$src2, $src}",
+                   [(MSP430cmp GR8:$src, (load addr:$src2)), 
+                    (implicit SRW)]>;
+def CMP16rm : I16rm<0x0,
+                    (outs), (ins GR16:$src, memsrc:$src2),
+                    "cmp.w\t{$src2, $src}",
+                    [(MSP430cmp GR16:$src, (load addr:$src2)),
+                     (implicit SRW)]>;
+
+def CMP8mr  : I8mr<0x0,
+                   (outs), (ins memsrc:$src, GR8:$src2),
+                   "cmp.b\t{$src2, $src}",
+                   [(MSP430cmp (load addr:$src), GR8:$src2),
+                    (implicit SRW)]>;
+def CMP16mr : I16mr<0x0,
+                    (outs), (ins memsrc:$src, GR16:$src2),
+                    "cmp.w\t{$src2, $src}",
+                    [(MSP430cmp (load addr:$src), GR16:$src2), 
+                     (implicit SRW)]>;
+
+
+// BIT TESTS, just sets condition codes
+// Note that the C condition is set differently than when using CMP.
+let isCommutable = 1 in {
+def BIT8rr  : I8rr<0x0,
+                   (outs), (ins GR8:$src, GR8:$src2),
+                   "bit.b\t{$src2, $src}",
+                   [(MSP430cmp (and_su GR8:$src, GR8:$src2), 0),
+                    (implicit SRW)]>;
+def BIT16rr : I16rr<0x0,
+                    (outs), (ins GR16:$src, GR16:$src2),
+                    "bit.w\t{$src2, $src}",
+                    [(MSP430cmp (and_su GR16:$src, GR16:$src2), 0),
+                     (implicit SRW)]>;
+}
+def BIT8ri  : I8ri<0x0,
+                   (outs), (ins GR8:$src, i8imm:$src2),
+                   "bit.b\t{$src2, $src}",
+                   [(MSP430cmp (and_su GR8:$src, imm:$src2), 0),
+                    (implicit SRW)]>;
+def BIT16ri : I16ri<0x0,
+                    (outs), (ins GR16:$src, i16imm:$src2),
+                    "bit.w\t{$src2, $src}",
+                    [(MSP430cmp (and_su GR16:$src, imm:$src2), 0),
+                     (implicit SRW)]>;
+
+def BIT8rm  : I8rm<0x0,
+                   (outs), (ins GR8:$src, memdst:$src2),
+                   "bit.b\t{$src2, $src}",
+                   [(MSP430cmp (and_su GR8:$src,  (load addr:$src2)), 0),
+                    (implicit SRW)]>;
+def BIT16rm : I16rm<0x0,
+                    (outs), (ins GR16:$src, memdst:$src2),
+                    "bit.w\t{$src2, $src}",
+                    [(MSP430cmp (and_su GR16:$src,  (load addr:$src2)), 0),
+                     (implicit SRW)]>;
+
+def BIT8mr  : I8mr<0x0,
+                  (outs), (ins memsrc:$src, GR8:$src2),
+                  "bit.b\t{$src2, $src}",
+                  [(MSP430cmp (and_su (load addr:$src), GR8:$src2), 0),
+                   (implicit SRW)]>;
+def BIT16mr : I16mr<0x0,
+                    (outs), (ins memsrc:$src, GR16:$src2),
+                    "bit.w\t{$src2, $src}",
+                    [(MSP430cmp (and_su (load addr:$src), GR16:$src2), 0),
+                     (implicit SRW)]>;
+
+def BIT8mi  : I8mi<0x0,
+                   (outs), (ins memsrc:$src, i8imm:$src2),
+                   "bit.b\t{$src2, $src}",
+                   [(MSP430cmp (and_su (load addr:$src), (i8 imm:$src2)), 0),
+                    (implicit SRW)]>;
+def BIT16mi : I16mi<0x0,
+                    (outs), (ins memsrc:$src, i16imm:$src2),
+                    "bit.w\t{$src2, $src}",
+                    [(MSP430cmp (and_su (load addr:$src), (i16 imm:$src2)), 0),
+                     (implicit SRW)]>;
+
+def BIT8mm  : I8mm<0x0,
+                   (outs), (ins memsrc:$src, memsrc:$src2),
+                   "bit.b\t{$src2, $src}",
+                   [(MSP430cmp (and_su (i8 (load addr:$src)),
+                                       (load addr:$src2)),
+                                 0),
+                      (implicit SRW)]>;
+def BIT16mm : I16mm<0x0,
+                    (outs), (ins memsrc:$src, memsrc:$src2),
+                    "bit.w\t{$src2, $src}",
+                    [(MSP430cmp (and_su (i16 (load addr:$src)),
+                                        (load addr:$src2)),
+                                 0),
+                     (implicit SRW)]>;
+} // Defs = [SRW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+
+// extload
+def : Pat<(extloadi16i8 addr:$src), (MOVZX16rm8 addr:$src)>;
+
+// anyext
+def : Pat<(i16 (anyext GR8:$src)),
+          (SUBREG_TO_REG (i16 0), GR8:$src, subreg_8bit)>;
+
+// truncs
+def : Pat<(i8 (trunc GR16:$src)),
+          (EXTRACT_SUBREG GR16:$src, subreg_8bit)>;
+
+// GlobalAddress, ExternalSymbol
+def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>;
+def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>;
+def : Pat<(i16 (MSP430Wrapper tblockaddress:$dst)), (MOV16ri tblockaddress:$dst)>;
+
+def : Pat<(add GR16:$src, (MSP430Wrapper tglobaladdr :$src2)),
+          (ADD16ri GR16:$src, tglobaladdr:$src2)>;
+def : Pat<(add GR16:$src, (MSP430Wrapper texternalsym:$src2)),
+          (ADD16ri GR16:$src, texternalsym:$src2)>;
+def : Pat<(add GR16:$src, (MSP430Wrapper tblockaddress:$src2)),
+          (ADD16ri GR16:$src, tblockaddress:$src2)>;
+
+def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst),
+          (MOV16mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i16 (MSP430Wrapper texternalsym:$src)), addr:$dst),
+          (MOV16mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i16 (MSP430Wrapper tblockaddress:$src)), addr:$dst),
+          (MOV16mi addr:$dst, tblockaddress:$src)>;
+
+// calls
+def : Pat<(MSP430call (i16 tglobaladdr:$dst)),
+          (CALLi tglobaladdr:$dst)>;
+def : Pat<(MSP430call (i16 texternalsym:$dst)),
+          (CALLi texternalsym:$dst)>;
+
+// add and sub always produce carry
+def : Pat<(addc GR16:$src, GR16:$src2),
+          (ADD16rr GR16:$src, GR16:$src2)>;
+def : Pat<(addc GR16:$src, (load addr:$src2)),
+          (ADD16rm GR16:$src, addr:$src2)>;
+def : Pat<(addc GR16:$src, imm:$src2),
+          (ADD16ri GR16:$src, imm:$src2)>;
+def : Pat<(store (addc (load addr:$dst), GR16:$src), addr:$dst),
+          (ADD16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (addc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+          (ADD16mm addr:$dst, addr:$src)>;
+
+def : Pat<(addc GR8:$src, GR8:$src2),
+          (ADD8rr GR8:$src, GR8:$src2)>;
+def : Pat<(addc GR8:$src, (load addr:$src2)),
+          (ADD8rm GR8:$src, addr:$src2)>;
+def : Pat<(addc GR8:$src, imm:$src2),
+          (ADD8ri GR8:$src, imm:$src2)>;
+def : Pat<(store (addc (load addr:$dst), GR8:$src), addr:$dst),
+          (ADD8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (addc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+          (ADD8mm addr:$dst, addr:$src)>;
+
+def : Pat<(subc GR16:$src, GR16:$src2),
+          (SUB16rr GR16:$src, GR16:$src2)>;
+def : Pat<(subc GR16:$src, (load addr:$src2)),
+          (SUB16rm GR16:$src, addr:$src2)>;
+def : Pat<(subc GR16:$src, imm:$src2),
+          (SUB16ri GR16:$src, imm:$src2)>;
+def : Pat<(store (subc (load addr:$dst), GR16:$src), addr:$dst),
+          (SUB16mr addr:$dst, GR16:$src)>;
+def : Pat<(store (subc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
+          (SUB16mm addr:$dst, addr:$src)>;
+
+def : Pat<(subc GR8:$src, GR8:$src2),
+          (SUB8rr GR8:$src, GR8:$src2)>;
+def : Pat<(subc GR8:$src, (load addr:$src2)),
+          (SUB8rm GR8:$src, addr:$src2)>;
+def : Pat<(subc GR8:$src, imm:$src2),
+          (SUB8ri GR8:$src, imm:$src2)>;
+def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst),
+          (SUB8mr addr:$dst, GR8:$src)>;
+def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
+          (SUB8mm addr:$dst, addr:$src)>;
+
+// peephole patterns
+def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>;
+def : Pat<(MSP430cmp (trunc (and_su GR16:$src, GR16:$src2)), 0),
+          (BIT8rr (EXTRACT_SUBREG GR16:$src, subreg_8bit),
+                  (EXTRACT_SUBREG GR16:$src2, subreg_8bit))>;
diff --git a/final/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/final/lib/Target/MSP430/MSP430MCAsmInfo.cpp
new file mode 100644
index 00000000000..3f449446055
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430MCAsmInfo.cpp
@@ -0,0 +1,26 @@
+//===-- MSP430MCAsmInfo.cpp - MSP430 asm properties -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MSP430MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCAsmInfo.h"
+using namespace llvm;
+
+MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) {
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective ="\t.weak\t";
+  PCSymbol=".";
+  CommentString = ";";
+
+  AlignmentIsInBytes = false;
+  AllowNameToStartWithDigit = true;
+  UsesELFSectionDirectiveForBSS = true;
+}
diff --git a/final/lib/Target/MSP430/MSP430MCAsmInfo.h b/final/lib/Target/MSP430/MSP430MCAsmInfo.h
new file mode 100644
index 00000000000..f3138a22022
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430MCAsmInfo.h
@@ -0,0 +1,29 @@
+//=====-- MSP430MCAsmInfo.h - MSP430 asm properties -----------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MSP430MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430TARGETASMINFO_H
+#define MSP430TARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+
+  struct MSP430MCAsmInfo : public MCAsmInfo {
+    explicit MSP430MCAsmInfo(const Target &T, StringRef TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/final/lib/Target/MSP430/MSP430MCInstLower.cpp b/final/lib/Target/MSP430/MSP430MCInstLower.cpp
new file mode 100644
index 00000000000..d1d9a115863
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430MCInstLower.cpp
@@ -0,0 +1,150 @@
+//===-- MSP430MCInstLower.cpp - Convert MSP430 MachineInstr to an MCInst---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower MSP430 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCInstLower.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+MCSymbol *MSP430MCInstLower::
+GetGlobalAddressSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  return Printer.Mang->getSymbol(MO.getGlobal());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetExternalSymbolSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: assert(0 && "Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetJumpTableSymbol(const MachineOperand &MO) const {
+  SmallString<256> Name;
+  raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "JTI"
+                            << Printer.getFunctionNumber() << '_'
+                            << MO.getIndex();
+
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  // Create a symbol for the name.
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
+  SmallString<256> Name;
+  raw_svector_ostream(Name) << Printer.MAI->getPrivateGlobalPrefix() << "CPI"
+                            << Printer.getFunctionNumber() << '_'
+                            << MO.getIndex();
+
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  // Create a symbol for the name.
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCSymbol *MSP430MCInstLower::
+GetBlockAddressSymbol(const MachineOperand &MO) const {
+  switch (MO.getTargetFlags()) {
+  default: assert(0 && "Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  return Printer.GetBlockAddressSymbol(MO.getBlockAddress());
+}
+
+MCOperand MSP430MCInstLower::
+LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const {
+  // FIXME: We would like an efficient form for this, so we don't have to do a
+  // lot of extra uniquing.
+  const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case 0: break;
+  }
+
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(), Ctx),
+                                   Ctx);
+  return MCOperand::CreateExpr(Expr);
+}
+
+void MSP430MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default:
+      MI->dump();
+      assert(0 && "unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all implicit register operands.
+      if (MO.isImplicit()) continue;
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                         MO.getMBB()->getSymbol(), Ctx));
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+      break;
+    case MachineOperand::MO_ExternalSymbol:
+      MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = LowerSymbolOperand(MO, GetJumpTableSymbol(MO));
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO));
+      break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO));
+    }
+
+    OutMI.addOperand(MCOp);
+  }
+}
diff --git a/final/lib/Target/MSP430/MSP430MCInstLower.h b/final/lib/Target/MSP430/MSP430MCInstLower.h
new file mode 100644
index 00000000000..e937696406f
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430MCInstLower.h
@@ -0,0 +1,50 @@
+//===-- MSP430MCInstLower.h - Lower MachineInstr to MCInst ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430_MCINSTLOWER_H
+#define MSP430_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+  class AsmPrinter;
+  class MCAsmInfo;
+  class MCContext;
+  class MCInst;
+  class MCOperand;
+  class MCSymbol;
+  class MachineInstr;
+  class MachineModuleInfoMachO;
+  class MachineOperand;
+  class Mangler;
+
+  /// MSP430MCInstLower - This class is used to lower an MachineInstr
+  /// into an MCInst.
+class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower {
+  MCContext &Ctx;
+  Mangler &Mang;
+
+  AsmPrinter &Printer;
+public:
+  MSP430MCInstLower(MCContext &ctx, Mangler &mang, AsmPrinter &printer)
+    : Ctx(ctx), Mang(mang), Printer(printer) {}
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+  MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const;
+  MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const;
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/final/lib/Target/MSP430/MSP430MachineFunctionInfo.h
new file mode 100644
index 00000000000..383fd2e9821
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -0,0 +1,46 @@
+//===- MSP430MachineFuctionInfo.h - MSP430 machine function info -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares MSP430-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430MACHINEFUNCTIONINFO_H
+#define MSP430MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// MSP430MachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private MSP430 target-specific information for each MachineFunction.
+class MSP430MachineFunctionInfo : public MachineFunctionInfo {
+  /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+  /// stack frame in bytes.
+  unsigned CalleeSavedFrameSize;
+
+  /// ReturnAddrIndex - FrameIndex for return slot.
+  int ReturnAddrIndex;
+
+public:
+  MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
+
+  explicit MSP430MachineFunctionInfo(MachineFunction &MF)
+    : CalleeSavedFrameSize(0), ReturnAddrIndex(0) {}
+
+  unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+  void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+  int getRAIndex() const { return ReturnAddrIndex; }
+  void setRAIndex(int Index) { ReturnAddrIndex = Index; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/MSP430/MSP430RegisterInfo.cpp b/final/lib/Target/MSP430/MSP430RegisterInfo.cpp
new file mode 100644
index 00000000000..1da6d8da1f2
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -0,0 +1,245 @@
+//===- MSP430RegisterInfo.cpp - MSP430 Register Information ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-reg-info"
+
+#include "MSP430.h"
+#include "MSP430MachineFunctionInfo.h"
+#include "MSP430RegisterInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+// FIXME: Provide proper call frame setup / destroy opcodes.
+MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
+                                       const TargetInstrInfo &tii)
+  : MSP430GenRegisterInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
+    TM(tm), TII(tii) {
+  StackAlign = TM.getFrameLowering()->getStackAlignment();
+}
+
+const unsigned*
+MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  const TargetFrameLowering *TFI = MF->getTarget().getFrameLowering();
+  const Function* F = MF->getFunction();
+  static const unsigned CalleeSavedRegs[] = {
+    MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
+    MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+    0
+  };
+  static const unsigned CalleeSavedRegsFP[] = {
+    MSP430::R5W, MSP430::R6W, MSP430::R7W,
+    MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+    0
+  };
+  static const unsigned CalleeSavedRegsIntr[] = {
+    MSP430::FPW,  MSP430::R5W,  MSP430::R6W,  MSP430::R7W,
+    MSP430::R8W,  MSP430::R9W,  MSP430::R10W, MSP430::R11W,
+    MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+    0
+  };
+  static const unsigned CalleeSavedRegsIntrFP[] = {
+    MSP430::R5W,  MSP430::R6W,  MSP430::R7W,
+    MSP430::R8W,  MSP430::R9W,  MSP430::R10W, MSP430::R11W,
+    MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+    0
+  };
+
+  if (TFI->hasFP(*MF))
+    return (F->getCallingConv() == CallingConv::MSP430_INTR ?
+            CalleeSavedRegsIntrFP : CalleeSavedRegsFP);
+  else
+    return (F->getCallingConv() == CallingConv::MSP430_INTR ?
+            CalleeSavedRegsIntr : CalleeSavedRegs);
+
+}
+
+BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  // Mark 4 special registers as reserved.
+  Reserved.set(MSP430::PCW);
+  Reserved.set(MSP430::SPW);
+  Reserved.set(MSP430::SRW);
+  Reserved.set(MSP430::CGW);
+
+  // Mark frame pointer as reserved if needed.
+  if (TFI->hasFP(MF))
+    Reserved.set(MSP430::FPW);
+
+  return Reserved;
+}
+
+const TargetRegisterClass *
+MSP430RegisterInfo::getPointerRegClass(unsigned Kind) const {
+  return &MSP430::GR16RegClass;
+}
+
+void MSP430RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!TFI->hasReservedCallFrame(MF)) {
+    // If the stack pointer can be changed after prologue, turn the
+    // adjcallstackup instruction into a 'sub SPW, <amt>' and the
+    // adjcallstackdown instruction into 'add SPW, <amt>'
+    // TODO: consider using push / pop instead of sub + store / add
+    MachineInstr *Old = I;
+    uint64_t Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
+
+      MachineInstr *New = 0;
+      if (Old->getOpcode() == getCallFrameSetupOpcode()) {
+        New = BuildMI(MF, Old->getDebugLoc(),
+                      TII.get(MSP430::SUB16ri), MSP430::SPW)
+          .addReg(MSP430::SPW).addImm(Amount);
+      } else {
+        assert(Old->getOpcode() == getCallFrameDestroyOpcode());
+        // factor out the amount the callee already popped.
+        uint64_t CalleeAmt = Old->getOperand(1).getImm();
+        Amount -= CalleeAmt;
+        if (Amount)
+          New = BuildMI(MF, Old->getDebugLoc(),
+                        TII.get(MSP430::ADD16ri), MSP430::SPW)
+            .addReg(MSP430::SPW).addImm(Amount);
+      }
+
+      if (New) {
+        // The SRW implicit def is dead.
+        New->getOperand(3).setIsDead();
+
+        // Replace the pseudo instruction with a new instruction...
+        MBB.insert(I, New);
+      }
+    }
+  } else if (I->getOpcode() == getCallFrameDestroyOpcode()) {
+    // If we are performing frame pointer elimination and if the callee pops
+    // something off the stack pointer, add it back.
+    if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
+      MachineInstr *Old = I;
+      MachineInstr *New =
+        BuildMI(MF, Old->getDebugLoc(), TII.get(MSP430::SUB16ri),
+                MSP430::SPW).addReg(MSP430::SPW).addImm(CalleeAmt);
+      // The SRW implicit def is dead.
+      New->getOperand(3).setIsDead();
+
+      MBB.insert(I, New);
+    }
+  }
+
+  MBB.erase(I);
+}
+
+void
+MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                        int SPAdj, RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Unexpected");
+
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  DebugLoc dl = MI.getDebugLoc();
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+
+  unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW);
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+  // Skip the saved PC
+  Offset += 2;
+
+  if (!TFI->hasFP(MF))
+    Offset += MF.getFrameInfo()->getStackSize();
+  else
+    Offset += 2; // Skip the saved FPW
+
+  // Fold imm into offset
+  Offset += MI.getOperand(i+1).getImm();
+
+  if (MI.getOpcode() == MSP430::ADD16ri) {
+    // This is actually "load effective address" of the stack slot
+    // instruction. We have only two-address instructions, thus we need to
+    // expand it into mov + add
+
+    MI.setDesc(TII.get(MSP430::MOV16rr));
+    MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+    if (Offset == 0)
+      return;
+
+    // We need to materialize the offset via add instruction.
+    unsigned DstReg = MI.getOperand(0).getReg();
+    if (Offset < 0)
+      BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::SUB16ri), DstReg)
+        .addReg(DstReg).addImm(-Offset);
+    else
+      BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
+        .addReg(DstReg).addImm(Offset);
+
+    return;
+  }
+
+  MI.getOperand(i).ChangeToRegister(BasePtr, false);
+  MI.getOperand(i+1).ChangeToImmediate(Offset);
+}
+
+void
+MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
+                                                                         const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  // Create a frame entry for the FPW register that must be saved.
+  if (TFI->hasFP(MF)) {
+    int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
+    (void)FrameIdx;
+    assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+           "Slot for FPW register must be last in order to be found!");
+  }
+}
+
+unsigned MSP430RegisterInfo::getRARegister() const {
+  return MSP430::PCW;
+}
+
+unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? MSP430::FPW : MSP430::SPW;
+}
+
+int MSP430RegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  llvm_unreachable("Not implemented yet!");
+  return 0;
+}
+
+#include "MSP430GenRegisterInfo.inc"
diff --git a/final/lib/Target/MSP430/MSP430RegisterInfo.h b/final/lib/Target/MSP430/MSP430RegisterInfo.h
new file mode 100644
index 00000000000..56744fa64d3
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -0,0 +1,61 @@
+//===- MSP430RegisterInfo.h - MSP430 Register Information Impl --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MSP430 implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430REGISTERINFO_H
+#define LLVM_TARGET_MSP430REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "MSP430GenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+class MSP430TargetMachine;
+
+struct MSP430RegisterInfo : public MSP430GenRegisterInfo {
+private:
+  MSP430TargetMachine &TM;
+  const TargetInstrInfo &TII;
+
+  /// StackAlign - Default stack alignment.
+  ///
+  unsigned StackAlign;
+public:
+  MSP430RegisterInfo(MSP430TargetMachine &tm, const TargetInstrInfo &tii);
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+  const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  //! Get DWARF debugging register number
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_MSP430REGISTERINFO_H
diff --git a/final/lib/Target/MSP430/MSP430RegisterInfo.td b/final/lib/Target/MSP430/MSP430RegisterInfo.td
new file mode 100644
index 00000000000..ab7b59b4eaf
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -0,0 +1,119 @@
+//===- MSP430RegisterInfo.td - MSP430 Register defs ----------*- tblgen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the MSP430 register file
+//===----------------------------------------------------------------------===//
+
+class MSP430Reg<bits<4> num, string n> : Register<n> {
+  field bits<4> Num = num;
+  let Namespace = "MSP430";
+}
+
+class MSP430RegWithSubregs<bits<4> num, string n, list<Register> subregs> 
+  : RegisterWithSubRegs<n, subregs> {
+  field bits<4> Num = num;
+  let Namespace = "MSP430";
+}
+
+//===----------------------------------------------------------------------===//
+//  Registers
+//===----------------------------------------------------------------------===//
+
+def PCB  : MSP430Reg<0,  "r0">;
+def SPB  : MSP430Reg<1,  "r1">;
+def SRB  : MSP430Reg<2,  "r2">;
+def CGB  : MSP430Reg<3,  "r3">;
+def FPB  : MSP430Reg<4,  "r4">;
+def R5B  : MSP430Reg<5,  "r5">;
+def R6B  : MSP430Reg<6,  "r6">;
+def R7B  : MSP430Reg<7,  "r7">;
+def R8B  : MSP430Reg<8,  "r8">;
+def R9B  : MSP430Reg<9,  "r9">;
+def R10B : MSP430Reg<10, "r10">;
+def R11B : MSP430Reg<11, "r11">;
+def R12B : MSP430Reg<12, "r12">;
+def R13B : MSP430Reg<13, "r13">;
+def R14B : MSP430Reg<14, "r14">;
+def R15B : MSP430Reg<15, "r15">;
+
+def subreg_8bit : SubRegIndex { let Namespace = "MSP430"; }
+
+let SubRegIndices = [subreg_8bit] in {
+def PCW  : MSP430RegWithSubregs<0,  "r0",  [PCB]>;
+def SPW  : MSP430RegWithSubregs<1,  "r1",  [SPB]>;
+def SRW  : MSP430RegWithSubregs<2,  "r2",  [SRB]>;
+def CGW  : MSP430RegWithSubregs<3,  "r3",  [CGB]>;
+def FPW  : MSP430RegWithSubregs<4,  "r4",  [FPB]>;
+def R5W  : MSP430RegWithSubregs<5,  "r5",  [R5B]>;
+def R6W  : MSP430RegWithSubregs<6,  "r6",  [R6B]>;
+def R7W  : MSP430RegWithSubregs<7,  "r7",  [R7B]>;
+def R8W  : MSP430RegWithSubregs<8,  "r8",  [R8B]>;
+def R9W  : MSP430RegWithSubregs<9,  "r9",  [R9B]>;
+def R10W : MSP430RegWithSubregs<10, "r10", [R10B]>;
+def R11W : MSP430RegWithSubregs<11, "r11", [R11B]>;
+def R12W : MSP430RegWithSubregs<12, "r12", [R12B]>;
+def R13W : MSP430RegWithSubregs<13, "r13", [R13B]>;
+def R14W : MSP430RegWithSubregs<14, "r14", [R14B]>;
+def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>;
+}
+
+def GR8 : RegisterClass<"MSP430", [i8], 8,
+   // Volatile registers
+  [R12B, R13B, R14B, R15B, R11B, R10B, R9B, R8B, R7B, R6B, R5B,
+   // Frame pointer, sometimes allocable
+   FPB,
+   // Volatile, but not allocable
+   PCB, SPB, SRB, CGB]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GR8Class::iterator
+    GR8Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      // Depending on whether the function uses frame pointer or not, last 5 or 4
+      // registers on the list above are reserved
+      if (TFI->hasFP(MF))
+        return end()-5;
+      else
+        return end()-4;
+    }
+  }];
+}
+
+def GR16 : RegisterClass<"MSP430", [i16], 16,
+   // Volatile registers
+  [R12W, R13W, R14W, R15W, R11W, R10W, R9W, R8W, R7W, R6W, R5W,
+   // Frame pointer, sometimes allocable
+   FPW,
+   // Volatile, but not allocable
+   PCW, SPW, SRW, CGW]>
+{
+  let SubRegClasses = [(GR8 subreg_8bit)];
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GR16Class::iterator
+    GR16Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      // Depending on whether the function uses frame pointer or not, last 5 or 4
+      // registers on the list above are reserved
+      if (TFI->hasFP(MF))
+        return end()-5;
+      else
+        return end()-4;
+    }
+  }];
+}
+
diff --git a/final/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/final/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
new file mode 100644
index 00000000000..24f45fa3881
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- MSP430SelectionDAGInfo.cpp - MSP430 SelectionDAG Info -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "msp430-selectiondag-info"
+#include "MSP430TargetMachine.h"
+using namespace llvm;
+
+MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const MSP430TargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() {
+}
diff --git a/final/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/final/lib/Target/MSP430/MSP430SelectionDAGInfo.h
new file mode 100644
index 00000000000..fa8194830ff
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- MSP430SelectionDAGInfo.h - MSP430 SelectionDAG Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MSP430 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MSP430SELECTIONDAGINFO_H
+#define MSP430SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MSP430TargetMachine;
+
+class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit MSP430SelectionDAGInfo(const MSP430TargetMachine &TM);
+  ~MSP430SelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/MSP430/MSP430Subtarget.cpp b/final/lib/Target/MSP430/MSP430Subtarget.cpp
new file mode 100644
index 00000000000..1346cb9a04d
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -0,0 +1,25 @@
+//===- MSP430Subtarget.cpp - MSP430 Subtarget Information ---------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MSP430 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430Subtarget.h"
+#include "MSP430.h"
+#include "MSP430GenSubtarget.inc"
+
+using namespace llvm;
+
+MSP430Subtarget::MSP430Subtarget(const std::string &TT, const std::string &FS) {
+  std::string CPU = "generic";
+
+  // Parse features string.
+  ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/final/lib/Target/MSP430/MSP430Subtarget.h b/final/lib/Target/MSP430/MSP430Subtarget.h
new file mode 100644
index 00000000000..1070544f077
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430Subtarget.h
@@ -0,0 +1,38 @@
+//====-- MSP430Subtarget.h - Define Subtarget for the MSP430 ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSP430 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MSP430_SUBTARGET_H
+#define LLVM_TARGET_MSP430_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+namespace llvm {
+
+class MSP430Subtarget : public TargetSubtarget {
+  bool ExtendedInsts;
+public:
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  MSP430Subtarget(const std::string &TT, const std::string &FS);
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  std::string ParseSubtargetFeatures(const std::string &FS,
+                                     const std::string &CPU);
+};
+} // End llvm namespace
+
+#endif  // LLVM_TARGET_MSP430_SUBTARGET_H
diff --git a/final/lib/Target/MSP430/MSP430TargetMachine.cpp b/final/lib/Target/MSP430/MSP430TargetMachine.cpp
new file mode 100644
index 00000000000..fba95365a6a
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -0,0 +1,52 @@
+//===-- MSP430TargetMachine.cpp - Define TargetMachine for MSP430 ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the MSP430 target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "MSP430MCAsmInfo.h"
+#include "MSP430TargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeMSP430Target() {
+  // Register the target.
+  RegisterTargetMachine<MSP430TargetMachine> X(TheMSP430Target);
+  RegisterAsmInfo<MSP430MCAsmInfo> Z(TheMSP430Target);
+}
+
+MSP430TargetMachine::MSP430TargetMachine(const Target &T,
+                                         const std::string &TT,
+                                         const std::string &FS)
+  : LLVMTargetMachine(T, TT),
+    Subtarget(TT, FS),
+    // FIXME: Check TargetData string.
+    DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
+    InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+    FrameLowering(Subtarget) { }
+
+
+bool MSP430TargetMachine::addInstSelector(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
+  // Install an instruction selector.
+  PM.add(createMSP430ISelDag(*this, OptLevel));
+  return false;
+}
+
+bool MSP430TargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                         CodeGenOpt::Level OptLevel) {
+  // Must run branch selection immediately preceding the asm printer.
+  PM.add(createMSP430BranchSelectionPass());
+  return false;
+}
diff --git a/final/lib/Target/MSP430/MSP430TargetMachine.h b/final/lib/Target/MSP430/MSP430TargetMachine.h
new file mode 100644
index 00000000000..cee3b048059
--- /dev/null
+++ b/final/lib/Target/MSP430/MSP430TargetMachine.h
@@ -0,0 +1,69 @@
+//==-- MSP430TargetMachine.h - Define TargetMachine for MSP430 ---*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MSP430 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_TARGET_MSP430_TARGETMACHINE_H
+#define LLVM_TARGET_MSP430_TARGETMACHINE_H
+
+#include "MSP430InstrInfo.h"
+#include "MSP430ISelLowering.h"
+#include "MSP430FrameLowering.h"
+#include "MSP430SelectionDAGInfo.h"
+#include "MSP430RegisterInfo.h"
+#include "MSP430Subtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// MSP430TargetMachine
+///
+class MSP430TargetMachine : public LLVMTargetMachine {
+  MSP430Subtarget        Subtarget;
+  const TargetData       DataLayout;       // Calculates type size & alignment
+  MSP430InstrInfo        InstrInfo;
+  MSP430TargetLowering   TLInfo;
+  MSP430SelectionDAGInfo TSInfo;
+  MSP430FrameLowering    FrameLowering;
+
+public:
+  MSP430TargetMachine(const Target &T, const std::string &TT,
+                      const std::string &FS);
+
+  virtual const TargetFrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual const MSP430InstrInfo *getInstrInfo() const  { return &InstrInfo; }
+  virtual const TargetData *getTargetData() const     { return &DataLayout;}
+  virtual const MSP430Subtarget *getSubtargetImpl() const { return &Subtarget; }
+
+  virtual const TargetRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+
+  virtual const MSP430TargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+
+  virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+}; // MSP430TargetMachine.
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_MSP430_TARGETMACHINE_H
diff --git a/final/lib/Target/MSP430/Makefile b/final/lib/Target/MSP430/Makefile
new file mode 100644
index 00000000000..fa4e80b0ff3
--- /dev/null
+++ b/final/lib/Target/MSP430/Makefile
@@ -0,0 +1,24 @@
+##===- lib/Target/MSP430/Makefile --------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source 
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMMSP430CodeGen
+TARGET = MSP430
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = MSP430GenRegisterInfo.h.inc MSP430GenRegisterNames.inc \
+		MSP430GenRegisterInfo.inc MSP430GenInstrNames.inc \
+		MSP430GenInstrInfo.inc MSP430GenAsmWriter.inc \
+		MSP430GenDAGISel.inc MSP430GenCallingConv.inc \
+		MSP430GenSubtarget.inc
+
+DIRS = InstPrinter TargetInfo
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Target/MSP430/README.txt b/final/lib/Target/MSP430/README.txt
new file mode 100644
index 00000000000..5b9634bcfe7
--- /dev/null
+++ b/final/lib/Target/MSP430/README.txt
@@ -0,0 +1,40 @@
+//===---------------------------------------------------------------------===//
+// MSP430 backend.
+//===---------------------------------------------------------------------===//
+
+DISCLAIMER: Thid backend should be considered as highly experimental. I never
+seen nor worked with this MCU, all information was gathered from datasheet
+only. The original intention of making this backend was to write documentation
+of form "How to write backend for dummies" :) Thes notes hopefully will be
+available pretty soon.
+
+Some things are incomplete / not implemented yet (this list surely is not
+complete as well):
+
+1. Verify, how stuff is handling implicit zext with 8 bit operands (this might
+be modelled currently in improper way - should we need to mark the superreg as
+def for every 8 bit instruction?).
+
+2. Libcalls: multiplication, division, remainder. Note, that calling convention
+for libcalls is incomptible with calling convention of libcalls of msp430-gcc
+(these cannot be used though due to license restriction).
+
+3. Implement multiplication / division by constant (dag combiner hook?).
+
+4. Implement non-constant shifts.
+
+5. Implement varargs stuff.
+
+6. Verify and fix (if needed) how's stuff playing with i32 / i64.
+
+7. Implement floating point stuff (softfp?)
+
+8. Implement instruction encoding for (possible) direct code emission in the
+future.
+
+9. Since almost all instructions set flags - implement brcond / select in better
+way (currently they emit explicit comparison).
+
+10. Handle imm in comparisons in better way (see comment in MSP430InstrInfo.td)
+
+11. Implement hooks for better memory op folding, etc.
diff --git a/final/lib/Target/MSP430/TargetInfo/CMakeLists.txt b/final/lib/Target/MSP430/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..2d1aa9d4e5e
--- /dev/null
+++ b/final/lib/Target/MSP430/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMSP430Info
+  MSP430TargetInfo.cpp
+  )
+
+add_dependencies(LLVMMSP430Info MSP430CodeGenTable_gen)
diff --git a/final/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/final/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
new file mode 100644
index 00000000000..f9ca5c49c97
--- /dev/null
+++ b/final/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -0,0 +1,20 @@
+//===-- MSP430TargetInfo.cpp - MSP430 Target Implementation ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMSP430Target;
+
+extern "C" void LLVMInitializeMSP430TargetInfo() { 
+  RegisterTarget<Triple::msp430> 
+    X(TheMSP430Target, "msp430", "MSP430 [experimental]");
+}
diff --git a/final/lib/Target/MSP430/TargetInfo/Makefile b/final/lib/Target/MSP430/TargetInfo/Makefile
new file mode 100644
index 00000000000..abb08f2548e
--- /dev/null
+++ b/final/lib/Target/MSP430/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/MSP430/TargetInfo/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMSP430Info
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/Makefile b/final/lib/Target/Makefile
new file mode 100644
index 00000000000..50a360f1f86
--- /dev/null
+++ b/final/lib/Target/Makefile
@@ -0,0 +1,20 @@
+#===- lib/Target/Makefile ----------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMTarget
+BUILD_ARCHIVE = 1
+
+# We include this early so we can access the value of TARGETS_TO_BUILD as the
+# value for PARALLEL_DIRS which must be set before Makefile.rules is included
+include $(LEVEL)/Makefile.config
+
+PARALLEL_DIRS := $(TARGETS_TO_BUILD)
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/final/lib/Target/Mangler.cpp b/final/lib/Target/Mangler.cpp
new file mode 100644
index 00000000000..46c687b6400
--- /dev/null
+++ b/final/lib/Target/Mangler.cpp
@@ -0,0 +1,235 @@
+//===-- Mangler.cpp - Self-contained c/asm llvm name mangler --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Unified name mangler for assembly backends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/Mangler.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+static bool isAcceptableChar(char C, bool AllowPeriod) {
+  if ((C < 'a' || C > 'z') &&
+      (C < 'A' || C > 'Z') &&
+      (C < '0' || C > '9') &&
+      C != '_' && C != '$' && C != '@' &&
+      !(AllowPeriod && C == '.'))
+    return false;
+  return true;
+}
+
+static char HexDigit(int V) {
+  return V < 10 ? V+'0' : V+'A'-10;
+}
+
+static void MangleLetter(SmallVectorImpl<char> &OutName, unsigned char C) {
+  OutName.push_back('_');
+  OutName.push_back(HexDigit(C >> 4));
+  OutName.push_back(HexDigit(C & 15));
+  OutName.push_back('_');
+}
+
+/// NameNeedsEscaping - Return true if the identifier \arg Str needs quotes
+/// for this assembler.
+static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) {
+  assert(!Str.empty() && "Cannot create an empty MCSymbol");
+  
+  // If the first character is a number and the target does not allow this, we
+  // need quotes.
+  if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9')
+    return true;
+  
+  // If any of the characters in the string is an unacceptable character, force
+  // quotes.
+  bool AllowPeriod = MAI.doesAllowPeriodsInName();
+  for (unsigned i = 0, e = Str.size(); i != e; ++i)
+    if (!isAcceptableChar(Str[i], AllowPeriod))
+      return true;
+  return false;
+}
+
+/// appendMangledName - Add the specified string in mangled form if it uses
+/// any unusual characters.
+static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str,
+                              const MCAsmInfo &MAI) {
+  // The first character is not allowed to be a number unless the target
+  // explicitly allows it.
+  if (!MAI.doesAllowNameToStartWithDigit() && Str[0] >= '0' && Str[0] <= '9') {
+    MangleLetter(OutName, Str[0]);
+    Str = Str.substr(1);
+  }
+
+  bool AllowPeriod = MAI.doesAllowPeriodsInName();
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    if (!isAcceptableChar(Str[i], AllowPeriod))
+      MangleLetter(OutName, Str[i]);
+    else
+      OutName.push_back(Str[i]);
+  }
+}
+
+
+/// appendMangledQuotedName - On systems that support quoted symbols, we still
+/// have to escape some (obscure) characters like " and \n which would break the
+/// assembler's lexing.
+static void appendMangledQuotedName(SmallVectorImpl<char> &OutName,
+                                   StringRef Str) {
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    if (Str[i] == '"' || Str[i] == '\n')
+      MangleLetter(OutName, Str[i]);
+    else
+      OutName.push_back(Str[i]);
+  }
+}
+
+
+/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+/// and the specified name as the global variable name.  GVName must not be
+/// empty.
+void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
+                                const Twine &GVName, ManglerPrefixTy PrefixTy) {
+  SmallString<256> TmpData;
+  StringRef Name = GVName.toStringRef(TmpData);
+  assert(!Name.empty() && "getNameWithPrefix requires non-empty name");
+  
+  const MCAsmInfo &MAI = Context.getAsmInfo();
+  
+  // If the global name is not led with \1, add the appropriate prefixes.
+  if (Name[0] == '\1') {
+    Name = Name.substr(1);
+  } else {
+    if (PrefixTy == Mangler::Private) {
+      const char *Prefix = MAI.getPrivateGlobalPrefix();
+      OutName.append(Prefix, Prefix+strlen(Prefix));
+    } else if (PrefixTy == Mangler::LinkerPrivate) {
+      const char *Prefix = MAI.getLinkerPrivateGlobalPrefix();
+      OutName.append(Prefix, Prefix+strlen(Prefix));
+    }
+
+    const char *Prefix = MAI.getGlobalPrefix();
+    if (Prefix[0] == 0)
+      ; // Common noop, no prefix.
+    else if (Prefix[1] == 0)
+      OutName.push_back(Prefix[0]);  // Common, one character prefix.
+    else
+      OutName.append(Prefix, Prefix+strlen(Prefix)); // Arbitrary length prefix.
+  }
+  
+  // If this is a simple string that doesn't need escaping, just append it.
+  if (!NameNeedsEscaping(Name, MAI) ||
+      // If quotes are supported, they can be used unless the string contains
+      // a quote or newline.
+      (MAI.doesAllowQuotesInName() &&
+       Name.find_first_of("\n\"") == StringRef::npos)) {
+    OutName.append(Name.begin(), Name.end());
+    return;
+  }
+  
+  // On systems that do not allow quoted names, we need to mangle most
+  // strange characters.
+  if (!MAI.doesAllowQuotesInName())
+    return appendMangledName(OutName, Name, MAI);
+  
+  // Okay, the system allows quoted strings.  We can quote most anything, the
+  // only characters that need escaping are " and \n.
+  assert(Name.find_first_of("\n\"") != StringRef::npos);
+  return appendMangledQuotedName(OutName, Name);
+}
+
+/// AddFastCallStdCallSuffix - Microsoft fastcall and stdcall functions require
+/// a suffix on their name indicating the number of words of arguments they
+/// take.
+static void AddFastCallStdCallSuffix(SmallVectorImpl<char> &OutName,
+                                     const Function *F, const TargetData &TD) {
+  // Calculate arguments size total.
+  unsigned ArgWords = 0;
+  for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+       AI != AE; ++AI) {
+    const Type *Ty = AI->getType();
+    // 'Dereference' type in case of byval parameter attribute
+    if (AI->hasByValAttr())
+      Ty = cast<PointerType>(Ty)->getElementType();
+    // Size should be aligned to DWORD boundary
+    ArgWords += ((TD.getTypeAllocSize(Ty) + 3)/4)*4;
+  }
+  
+  raw_svector_ostream(OutName) << '@' << ArgWords;
+}
+
+
+/// getNameWithPrefix - Fill OutName with the name of the appropriate prefix
+/// and the specified global variable's name.  If the global variable doesn't
+/// have a name, this fills in a unique name for the global.
+void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
+                                const GlobalValue *GV,
+                                bool isImplicitlyPrivate) {
+  ManglerPrefixTy PrefixTy = Mangler::Default;
+  if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
+    PrefixTy = Mangler::Private;
+  else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage() ||
+           GV->hasLinkerPrivateWeakDefAutoLinkage())
+    PrefixTy = Mangler::LinkerPrivate;
+  
+  // If this global has a name, handle it simply.
+  if (GV->hasName()) {
+    getNameWithPrefix(OutName, GV->getName(), PrefixTy);
+  } else {
+    // Get the ID for the global, assigning a new one if we haven't got one
+    // already.
+    unsigned &ID = AnonGlobalIDs[GV];
+    if (ID == 0) ID = NextAnonGlobalID++;
+  
+    // Must mangle the global into a unique ID.
+    getNameWithPrefix(OutName, "__unnamed_" + Twine(ID), PrefixTy);
+  }
+  
+  // If we are supposed to add a microsoft-style suffix for stdcall/fastcall,
+  // add it.
+  if (Context.getAsmInfo().hasMicrosoftFastStdCallMangling()) {
+    if (const Function *F = dyn_cast<Function>(GV)) {
+      CallingConv::ID CC = F->getCallingConv();
+    
+      // fastcall functions need to start with @.
+      // FIXME: This logic seems unlikely to be right.
+      if (CC == CallingConv::X86_FastCall) {
+        if (OutName[0] == '_')
+          OutName[0] = '@';
+        else
+          OutName.insert(OutName.begin(), '@');
+      }
+    
+      // fastcall and stdcall functions usually need @42 at the end to specify
+      // the argument info.
+      const FunctionType *FT = F->getFunctionType();
+      if ((CC == CallingConv::X86_FastCall || CC == CallingConv::X86_StdCall) &&
+          // "Pure" variadic functions do not receive @0 suffix.
+          (!FT->isVarArg() || FT->getNumParams() == 0 ||
+           (FT->getNumParams() == 1 && F->hasStructRetAttr())))
+        AddFastCallStdCallSuffix(OutName, F, TD);
+    }
+  }
+}
+
+/// getSymbol - Return the MCSymbol for the specified global value.  This
+/// symbol is the main label that is the address of the global.
+MCSymbol *Mangler::getSymbol(const GlobalValue *GV) {
+  SmallString<60> NameStr;
+  getNameWithPrefix(NameStr, GV, false);
+  return Context.GetOrCreateSymbol(NameStr.str());
+}
+
+
diff --git a/final/lib/Target/Mips/CMakeLists.txt b/final/lib/Target/Mips/CMakeLists.txt
new file mode 100644
index 00000000000..26df1a05295
--- /dev/null
+++ b/final/lib/Target/Mips/CMakeLists.txt
@@ -0,0 +1,28 @@
+set(LLVM_TARGET_DEFINITIONS Mips.td)
+
+tablegen(MipsGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(MipsGenRegisterNames.inc -gen-register-enums)
+tablegen(MipsGenRegisterInfo.inc -gen-register-desc)
+tablegen(MipsGenInstrNames.inc -gen-instr-enums)
+tablegen(MipsGenInstrInfo.inc -gen-instr-desc)
+tablegen(MipsGenAsmWriter.inc -gen-asm-writer)
+tablegen(MipsGenDAGISel.inc -gen-dag-isel)
+tablegen(MipsGenCallingConv.inc -gen-callingconv)
+tablegen(MipsGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(MipsCodeGen
+  MipsAsmPrinter.cpp
+  MipsDelaySlotFiller.cpp
+  MipsInstrInfo.cpp
+  MipsISelDAGToDAG.cpp
+  MipsISelLowering.cpp
+  MipsFrameLowering.cpp
+  MipsMCAsmInfo.cpp
+  MipsRegisterInfo.cpp
+  MipsSubtarget.cpp
+  MipsTargetMachine.cpp
+  MipsTargetObjectFile.cpp
+  MipsSelectionDAGInfo.cpp
+  )
+
+add_subdirectory(TargetInfo)
diff --git a/final/lib/Target/Mips/Makefile b/final/lib/Target/Mips/Makefile
new file mode 100644
index 00000000000..d16b066a624
--- /dev/null
+++ b/final/lib/Target/Mips/Makefile
@@ -0,0 +1,24 @@
+##===- lib/Target/Mips/Makefile ----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMMipsCodeGen
+TARGET = Mips
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = MipsGenRegisterInfo.h.inc MipsGenRegisterNames.inc \
+                MipsGenRegisterInfo.inc MipsGenInstrNames.inc \
+                MipsGenInstrInfo.inc MipsGenAsmWriter.inc \
+                MipsGenDAGISel.inc MipsGenCallingConv.inc \
+                MipsGenSubtarget.inc
+
+DIRS = TargetInfo
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Target/Mips/Mips.h b/final/lib/Target/Mips/Mips.h
new file mode 100644
index 00000000000..fb3c49221fa
--- /dev/null
+++ b/final/lib/Target/Mips/Mips.h
@@ -0,0 +1,41 @@
+//===-- Mips.h - Top-level interface for Mips representation ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM Mips back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MIPS_H
+#define TARGET_MIPS_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class MipsTargetMachine;
+  class FunctionPass;
+  class MachineCodeEmitter;
+  class formatted_raw_ostream;
+
+  FunctionPass *createMipsISelDag(MipsTargetMachine &TM);
+  FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM);
+
+  extern Target TheMipsTarget;
+  extern Target TheMipselTarget;
+
+} // end namespace llvm;
+
+// Defines symbolic names for Mips registers.  This defines a mapping from
+// register name to register number.
+#include "MipsGenRegisterNames.inc"
+
+// Defines symbolic names for the Mips instructions.
+#include "MipsGenInstrNames.inc"
+
+#endif
diff --git a/final/lib/Target/Mips/Mips.td b/final/lib/Target/Mips/Mips.td
new file mode 100644
index 00000000000..5102c699f07
--- /dev/null
+++ b/final/lib/Target/Mips/Mips.td
@@ -0,0 +1,93 @@
+//===- Mips.td - Describe the Mips Target Machine ----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the Mips target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MipsRegisterInfo.td"
+include "MipsSchedule.td"
+include "MipsInstrInfo.td"
+include "MipsCallingConv.td"
+
+def MipsInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Mips Subtarget features                                                    //
+//===----------------------------------------------------------------------===//
+
+def FeatureGP64Bit     : SubtargetFeature<"gp64", "IsGP64bit", "true",
+                                "General Purpose Registers are 64-bit wide.">;
+def FeatureFP64Bit     : SubtargetFeature<"fp64", "IsFP64bit", "true",
+                                "Support 64-bit FP registers.">;
+def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat",
+                                "true", "Only supports single precision float">;
+def FeatureO32         : SubtargetFeature<"o32", "MipsABI", "O32",
+                                "Enable o32 ABI">;
+def FeatureEABI        : SubtargetFeature<"eabi", "MipsABI", "EABI",
+                                "Enable eabi ABI">;
+def FeatureVFPU        : SubtargetFeature<"vfpu", "HasVFPU",
+                                "true", "Enable vector FPU instructions.">;
+def FeatureSEInReg     : SubtargetFeature<"seinreg", "HasSEInReg", "true",
+                                "Enable 'signext in register' instructions.">;
+def FeatureCondMov     : SubtargetFeature<"condmov", "HasCondMov", "true",
+                                "Enable 'conditional move' instructions.">;
+def FeatureMulDivAdd   : SubtargetFeature<"muldivadd", "HasMulDivAdd", "true",
+                                "Enable 'multiply add/sub' instructions.">;
+def FeatureMinMax      : SubtargetFeature<"minmax", "HasMinMax", "true",
+                                "Enable 'min/max' instructions.">;
+def FeatureSwap        : SubtargetFeature<"swap", "HasSwap", "true",
+                                "Enable 'byte/half swap' instructions.">;
+def FeatureBitCount    : SubtargetFeature<"bitcount", "HasBitCount", "true",
+                                "Enable 'count leading bits' instructions.">;
+def FeatureMips1       : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1",
+                                "Mips1 ISA Support">;
+def FeatureMips2       : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2",
+                                "Mips2 ISA Support">;
+def FeatureMips32      : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32",
+                                "Mips32 ISA Support",
+                                [FeatureCondMov, FeatureBitCount]>;
+def FeatureMips32r2    : SubtargetFeature<"mips32r2", "MipsArchVersion",
+                                "Mips32r2", "Mips32r2 ISA Support",
+                                [FeatureMips32, FeatureSEInReg]>;
+
+//===----------------------------------------------------------------------===//
+// Mips processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, MipsGenericItineraries, Features>;
+
+def : Proc<"mips1", [FeatureMips1]>;
+def : Proc<"r2000", [FeatureMips1]>;
+def : Proc<"r3000", [FeatureMips1]>;
+
+def : Proc<"mips2", [FeatureMips2]>;
+def : Proc<"r6000", [FeatureMips2]>;
+
+def : Proc<"4ke", [FeatureMips32r2]>;
+
+// Allegrex is a 32bit subset of r4000, both for interger and fp registers,
+// but much more similar to Mips2 than Mips3. It also contains some of
+// Mips32/Mips32r2 instructions and a custom vector fpu processor.
+def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI,
+      FeatureVFPU, FeatureSEInReg, FeatureCondMov, FeatureMulDivAdd,
+      FeatureMinMax, FeatureSwap, FeatureBitCount]>;
+
+def Mips : Target {
+  let InstructionSet = MipsInstrInfo;
+}
diff --git a/final/lib/Target/Mips/MipsAsmPrinter.cpp b/final/lib/Target/Mips/MipsAsmPrinter.cpp
new file mode 100644
index 00000000000..24f2e89aa59
--- /dev/null
+++ b/final/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -0,0 +1,399 @@
+//===-- MipsAsmPrinter.cpp - Mips LLVM assembly writer --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MIPS assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-asm-printer"
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsInstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class MipsAsmPrinter : public AsmPrinter {
+    const MipsSubtarget *Subtarget;
+  public:
+    explicit MipsAsmPrinter(TargetMachine &TM,  MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {
+      Subtarget = &TM.getSubtarget<MipsSubtarget>();
+    }
+
+    virtual const char *getPassName() const {
+      return "Mips Assembly Printer";
+    }
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+                         const char *Modifier = 0);
+    void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+                         const char *Modifier = 0);
+    void printSavedRegsBitmask(raw_ostream &O);
+    void printHex32(unsigned int Value, raw_ostream &O);
+
+    const char *getCurrentABIString() const;
+    void emitFrameDirective();
+
+    void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd.
+    void EmitInstruction(const MachineInstr *MI) {
+      SmallString<128> Str;
+      raw_svector_ostream OS(Str);
+      printInstruction(MI, OS);
+      OutStreamer.EmitRawText(OS.str());
+    }
+    virtual void EmitFunctionBodyStart();
+    virtual void EmitFunctionBodyEnd();
+    virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const;
+    static const char *getRegisterName(unsigned RegNo);
+
+    virtual void EmitFunctionEntryLabel();
+    void EmitStartOfAsmFile(Module &M);
+  };
+} // end of anonymous namespace
+
+#include "MipsGenAsmWriter.inc"
+
+//===----------------------------------------------------------------------===//
+//
+//  Mips Asm Directives
+//
+//  -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+//  Describe the stack frame.
+//
+//  -- Mask directives "(f)mask  bitmask, offset"
+//  Tells the assembler which registers are saved and where.
+//  bitmask - contain a little endian bitset indicating which registers are
+//            saved on function prologue (e.g. with a 0x80000000 mask, the
+//            assembler knows the register 31 (RA) is saved at prologue.
+//  offset  - the position before stack pointer subtraction indicating where
+//            the first saved register on prologue is located. (e.g. with a
+//
+//  Consider the following function prologue:
+//
+//    .frame  $fp,48,$ra
+//    .mask   0xc0000000,-8
+//       addiu $sp, $sp, -48
+//       sw $ra, 40($sp)
+//       sw $fp, 36($sp)
+//
+//    With a 0xc0000000 mask, the assembler knows the register 31 (RA) and
+//    30 (FP) are saved at prologue. As the save order on prologue is from
+//    left to right, RA is saved first. A -8 offset means that after the
+//    stack pointer subtration, the first register in the mask (RA) will be
+//    saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Mask directives
+//===----------------------------------------------------------------------===//
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
+  const TargetFrameLowering *TFI = TM.getFrameLowering();
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+  const MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
+
+  // CPU and FPU Saved Registers Bitmasks
+  unsigned int CPUBitmask = 0;
+  unsigned int FPUBitmask = 0;
+
+  // Set the CPU and FPU Bitmasks
+  const MachineFrameInfo *MFI = MF->getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg);
+    if (Mips::CPURegsRegisterClass->contains(Reg))
+      CPUBitmask |= (1 << RegNum);
+    else
+      FPUBitmask |= (1 << RegNum);
+  }
+
+  // Return Address and Frame registers must also be set in CPUBitmask.
+  // FIXME: Do we really need hasFP() call here? When no FP is present SP is
+  // just returned -- will it be ok?
+  if (TFI->hasFP(*MF))
+    CPUBitmask |= (1 << MipsRegisterInfo::
+                getRegisterNumbering(RI->getFrameRegister(*MF)));
+
+  if (MFI->adjustsStack())
+    CPUBitmask |= (1 << MipsRegisterInfo::
+                getRegisterNumbering(RI->getRARegister()));
+
+  // Print CPUBitmask
+  O << "\t.mask \t"; printHex32(CPUBitmask, O);
+  O << ',' << MipsFI->getCPUTopSavedRegOff() << '\n';
+
+  // Print FPUBitmask
+  O << "\t.fmask\t"; printHex32(FPUBitmask, O); O << ","
+    << MipsFI->getFPUTopSavedRegOff() << '\n';
+}
+
+// Print a 32 bit hex number with all numbers.
+void MipsAsmPrinter::printHex32(unsigned Value, raw_ostream &O) {
+  O << "0x";
+  for (int i = 7; i >= 0; i--)
+    O << utohexstr((Value & (0xF << (i*4))) >> (i*4));
+}
+
+//===----------------------------------------------------------------------===//
+// Frame and Set directives
+//===----------------------------------------------------------------------===//
+
+/// Frame Directive
+void MipsAsmPrinter::emitFrameDirective() {
+  const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+  unsigned stackReg  = RI.getFrameRegister(*MF);
+  unsigned returnReg = RI.getRARegister();
+  unsigned stackSize = MF->getFrameInfo()->getStackSize();
+
+  OutStreamer.EmitRawText("\t.frame\t$" +
+                          Twine(LowercaseString(getRegisterName(stackReg))) +
+                          "," + Twine(stackSize) + ",$" +
+                          Twine(LowercaseString(getRegisterName(returnReg))));
+}
+
+/// Emit Set directives.
+const char *MipsAsmPrinter::getCurrentABIString() const {
+  switch (Subtarget->getTargetABI()) {
+  case MipsSubtarget::O32:  return "abi32";
+  case MipsSubtarget::O64:  return "abiO64";
+  case MipsSubtarget::N32:  return "abiN32";
+  case MipsSubtarget::N64:  return "abi64";
+  case MipsSubtarget::EABI: return "eabi32"; // TODO: handle eabi64
+  default: break;
+  }
+
+  llvm_unreachable("Unknown Mips ABI");
+  return NULL;
+}
+
+void MipsAsmPrinter::EmitFunctionEntryLabel() {
+  OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName()));
+  OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void MipsAsmPrinter::EmitFunctionBodyStart() {
+  emitFrameDirective();
+
+  SmallString<128> Str;
+  raw_svector_ostream OS(Str);
+  printSavedRegsBitmask(OS);
+  OutStreamer.EmitRawText(OS.str());
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void MipsAsmPrinter::EmitFunctionBodyEnd() {
+  // There are instruction for this macros, but they must
+  // always be at the function end, and we can't emit and
+  // break with BB logic.
+  OutStreamer.EmitRawText(StringRef("\t.set\tmacro"));
+  OutStreamer.EmitRawText(StringRef("\t.set\treorder"));
+  OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName()));
+}
+
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+    const {
+  // The predecessor has to be immediately before this block.
+  const MachineBasicBlock *Pred = *MBB->pred_begin();
+
+  // If the predecessor is a switch statement, assume a jump table
+  // implementation, so it is not a fall through.
+  if (const BasicBlock *bb = Pred->getBasicBlock())
+    if (isa<SwitchInst>(bb->getTerminator()))
+      return false;
+
+  return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB);
+}
+
+// Print out an operand for an inline asm expression.
+bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                     unsigned AsmVariant,const char *ExtraCode,
+                                     raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                  raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  bool closeP = false;
+
+  if (MO.getTargetFlags())
+    closeP = true;
+
+  switch(MO.getTargetFlags()) {
+  case MipsII::MO_GPREL:    O << "%gp_rel("; break;
+  case MipsII::MO_GOT_CALL: O << "%call16("; break;
+  case MipsII::MO_GOT: {
+    const MachineOperand &LastMO = MI->getOperand(opNum-1);
+    bool LastMOIsGP = LastMO.getType() == MachineOperand::MO_Register
+                      && LastMO.getReg() == Mips::GP;
+    if (MI->getOpcode() == Mips::LW || LastMOIsGP)
+      O << "%got(";
+    else
+      O << "%lo(";
+    break;
+  }
+  case MipsII::MO_ABS_HILO:
+    if (MI->getOpcode() == Mips::LUi)
+      O << "%hi(";
+    else
+      O << "%lo(";
+    break;
+  }
+
+  switch (MO.getType()) {
+    case MachineOperand::MO_Register:
+      O << '$' << LowercaseString(getRegisterName(MO.getReg()));
+      break;
+
+    case MachineOperand::MO_Immediate:
+      O << (short int)MO.getImm();
+      break;
+
+    case MachineOperand::MO_MachineBasicBlock:
+      O << *MO.getMBB()->getSymbol();
+      return;
+
+    case MachineOperand::MO_GlobalAddress:
+      O << *Mang->getSymbol(MO.getGlobal());
+      break;
+
+    case MachineOperand::MO_BlockAddress: {
+      MCSymbol* BA = GetBlockAddressSymbol(MO.getBlockAddress());
+      O << BA->getName();
+      break;
+    }
+
+    case MachineOperand::MO_ExternalSymbol:
+      O << *GetExternalSymbolSymbol(MO.getSymbolName());
+      break;
+
+    case MachineOperand::MO_JumpTableIndex:
+      O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+        << '_' << MO.getIndex();
+      break;
+
+    case MachineOperand::MO_ConstantPoolIndex:
+      O << MAI->getPrivateGlobalPrefix() << "CPI"
+        << getFunctionNumber() << "_" << MO.getIndex();
+      if (MO.getOffset())
+        O << "+" << MO.getOffset();
+      break;
+
+    default:
+      llvm_unreachable("<unknown operand type>");
+  }
+
+  if (closeP) O << ")";
+}
+
+void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
+                                      raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm())
+    O << (unsigned short int)MO.getImm();
+  else
+    printOperand(MI, opNum, O);
+}
+
+void MipsAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+                const char *Modifier) {
+  // when using stack locations for not load/store instructions
+  // print the same way as all normal 3 operand instructions.
+  if (Modifier && !strcmp(Modifier, "stackloc")) {
+    printOperand(MI, opNum+1, O);
+    O << ", ";
+    printOperand(MI, opNum, O);
+    return;
+  }
+
+  // Load/Store memory operands -- imm($reg)
+  // If PIC target the target is loaded as the
+  // pattern lw $25,%call16($28)
+  printOperand(MI, opNum, O);
+  O << "(";
+  printOperand(MI, opNum+1, O);
+  O << ")";
+}
+
+void MipsAsmPrinter::
+printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+                const char *Modifier) {
+  const MachineOperand& MO = MI->getOperand(opNum);
+  O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm());
+}
+
+void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
+  // FIXME: Use SwitchSection.
+
+  // Tell the assembler which ABI we are using
+  OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString()));
+
+  // TODO: handle O64 ABI
+  if (Subtarget->isABI_EABI()) {
+    if (Subtarget->isGP32bit())
+      OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long32"));
+    else
+      OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long64"));
+  }
+
+  // return to previous section
+  OutStreamer.EmitRawText(StringRef("\t.previous"));
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMipsAsmPrinter() {
+  RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget);
+  RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget);
+}
diff --git a/final/lib/Target/Mips/MipsCallingConv.td b/final/lib/Target/Mips/MipsCallingConv.td
new file mode 100644
index 00000000000..8e4b2164041
--- /dev/null
+++ b/final/lib/Target/Mips/MipsCallingConv.td
@@ -0,0 +1,86 @@
+//===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for Mips architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+  CCIf<!strconcat("State.getTarget().getSubtarget<MipsSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Mips O32 Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Only the return rules are defined here for O32. The rules for argument
+// passing are defined in MipsISelLowering.cpp.
+def RetCC_MipsO32 : CallingConv<[
+  // i32 are returned in registers V0, V1
+  CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+
+  // f32 are returned in registers F0, F2
+  CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
+
+  // f64 are returned in register D0, D1
+  CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0, D1]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips EABI Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_MipsEABI : CallingConv<[
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // Integer arguments are passed in integer registers.
+  CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>,
+
+  // Single fp arguments are passed in pairs within 32-bit mode
+  CCIfType<[f32], CCIfSubtarget<"isSingleFloat()",
+                  CCAssignToReg<[F12, F13, F14, F15, F16, F17, F18, F19]>>>,
+
+  CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()",
+                  CCAssignToReg<[F12, F14, F16, F18]>>>,
+
+  // The first 4 doubl fp arguments are passed in single fp registers.
+  CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()",
+                  CCAssignToReg<[D6, D7, D8, D9]>>>,
+
+  // Integer values get stored in stack slots that are 4 bytes in
+  // size and 4-byte aligned.
+  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+  // Integer values get stored in stack slots that are 8 bytes in
+  // size and 8-byte aligned.
+  CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToStack<8, 8>>>
+]>;
+
+def RetCC_MipsEABI : CallingConv<[
+  // i32 are returned in registers V0, V1
+  CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+
+  // f32 are returned in registers F0, F1
+  CCIfType<[f32], CCAssignToReg<[F0, F1]>>,
+
+  // f64 are returned in register D0
+  CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D0]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// Mips Calling Convention Dispatch
+//===----------------------------------------------------------------------===//
+
+def CC_Mips : CallingConv<[
+  CCIfSubtarget<"isABI_EABI()", CCDelegateTo<CC_MipsEABI>>
+]>;
+
+def RetCC_Mips : CallingConv<[
+  CCIfSubtarget<"isABI_EABI()", CCDelegateTo<RetCC_MipsEABI>>,
+  CCDelegateTo<RetCC_MipsO32>
+]>;
diff --git a/final/lib/Target/Mips/MipsDelaySlotFiller.cpp b/final/lib/Target/Mips/MipsDelaySlotFiller.cpp
new file mode 100644
index 00000000000..b44a0af2d43
--- /dev/null
+++ b/final/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -0,0 +1,82 @@
+//===-- DelaySlotFiller.cpp - Mips delay slot filler ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple pass to fills delay slots with NOPs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+
+#include "Mips.h"
+#include "MipsTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+namespace {
+  struct Filler : public MachineFunctionPass {
+
+    TargetMachine &TM;
+    const TargetInstrInfo *TII;
+
+    static char ID;
+    Filler(TargetMachine &tm)
+      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+    virtual const char *getPassName() const {
+      return "Mips Delay Slot Filler";
+    }
+
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+    bool runOnMachineFunction(MachineFunction &F) {
+      bool Changed = false;
+      for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+           FI != FE; ++FI)
+        Changed |= runOnMachineBasicBlock(*FI);
+      return Changed;
+    }
+
+  };
+  char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// Currently, we fill delay slots with NOPs. We assume there is only one
+/// delay slot per delayed instruction.
+bool Filler::
+runOnMachineBasicBlock(MachineBasicBlock &MBB)
+{
+  bool Changed = false;
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
+    const TargetInstrDesc& Tid = I->getDesc();
+    if (Tid.hasDelaySlot() &&
+        (TM.getSubtarget<MipsSubtarget>().isMips1() ||
+         Tid.isCall() || Tid.isBranch() || Tid.isReturn())) {
+      MachineBasicBlock::iterator J = I;
+      ++J;
+      BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP));
+      ++FilledSlots;
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+/// createMipsDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Mips MachineFunctions
+FunctionPass *llvm::createMipsDelaySlotFillerPass(MipsTargetMachine &tm) {
+  return new Filler(tm);
+}
+
diff --git a/final/lib/Target/Mips/MipsFrameLowering.cpp b/final/lib/Target/Mips/MipsFrameLowering.cpp
new file mode 100644
index 00000000000..3e832cfee32
--- /dev/null
+++ b/final/lib/Target/Mips/MipsFrameLowering.cpp
@@ -0,0 +1,390 @@
+//=======- MipsFrameLowering.cpp - Mips Frame Information ------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsFrameLowering.h"
+#include "MipsInstrInfo.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are done thought a positive offset
+// from the stack/frame pointer, so the stack is considering
+// to grow up! Otherwise terrible hacks would have to be made
+// to get this stack ABI compliant :)
+//
+//  The stack frame required by the ABI (after call):
+//  Offset
+//
+//  0                 ----------
+//  4                 Args to pass
+//  .                 saved $GP  (used in PIC)
+//  .                 Alloca allocations
+//  .                 Local Area
+//  .                 CPU "Callee Saved" Registers
+//  .                 saved FP
+//  .                 saved RA
+//  .                 FPU "Callee Saved" Registers
+//  StackSize         -----------
+//
+// Offset - offset from sp after stack allocation on function prologue
+//
+// The sp is the stack pointer subtracted/added from the stack size
+// at the Prologue/Epilogue
+//
+// References to the previous stack (to obtain arguments) are done
+// with offsets that exceeds the stack size: (stacksize+(4*(num_arg-1))
+//
+// Examples:
+// - reference to the actual stack frame
+//   for any local area var there is smt like : FI >= 0, StackOffset: 4
+//     sw REGX, 4(SP)
+//
+// - reference to previous stack frame
+//   suppose there's a load to the 5th arguments : FI < 0, StackOffset: 16.
+//   The emitted instruction will be something like:
+//     lw REGX, 16+StackSize(SP)
+//
+// Since the total stack size is unknown on LowerFormalArguments, all
+// stack references (ObjectOffset) created to reference the function
+// arguments, are negative numbers. This way, on eliminateFrameIndex it's
+// possible to detect those references and the offsets are adjusted to
+// their real location.
+//
+//===----------------------------------------------------------------------===//
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+}
+
+void MipsFrameLowering::adjustMipsStackFrame(MachineFunction &MF) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  unsigned StackAlign = getStackAlignment();
+  unsigned RegSize = STI.isGP32bit() ? 4 : 8;
+  bool HasGP = MipsFI->needGPSaveRestore();
+
+  // Min and Max CSI FrameIndex.
+  int MinCSFI = -1, MaxCSFI = -1;
+
+  // See the description at MipsMachineFunction.h
+  int TopCPUSavedRegOff = -1, TopFPUSavedRegOff = -1;
+
+  // Replace the dummy '0' SPOffset by the negative offsets, as explained on
+  // LowerFormalArguments. Leaving '0' for while is necessary to avoid the
+  // approach done by calculateFrameObjectOffsets to the stack frame.
+  MipsFI->adjustLoadArgsFI(MFI);
+  MipsFI->adjustStoreVarArgsFI(MFI);
+
+  // It happens that the default stack frame allocation order does not directly
+  // map to the convention used for mips. So we must fix it. We move the callee
+  // save register slots after the local variables area, as described in the
+  // stack frame above.
+  unsigned CalleeSavedAreaSize = 0;
+  if (!CSI.empty()) {
+    MinCSFI = CSI[0].getFrameIdx();
+    MaxCSFI = CSI[CSI.size()-1].getFrameIdx();
+  }
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i)
+    CalleeSavedAreaSize += MFI->getObjectAlignment(CSI[i].getFrameIdx());
+
+  unsigned StackOffset = HasGP ? (MipsFI->getGPStackOffset()+RegSize)
+                : (STI.isABI_O32() ? 16 : 0);
+
+  // Adjust local variables. They should come on the stack right
+  // after the arguments.
+  int LastOffsetFI = -1;
+  for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+    if (i >= MinCSFI && i <= MaxCSFI)
+      continue;
+    if (MFI->isDeadObjectIndex(i))
+      continue;
+    unsigned Offset =
+      StackOffset + MFI->getObjectOffset(i) - CalleeSavedAreaSize;
+    if (LastOffsetFI == -1)
+      LastOffsetFI = i;
+    if (Offset > MFI->getObjectOffset(LastOffsetFI))
+      LastOffsetFI = i;
+    MFI->setObjectOffset(i, Offset);
+  }
+
+  // Adjust CPU Callee Saved Registers Area. Registers RA and FP must
+  // be saved in this CPU Area. This whole area must be aligned to the
+  // default Stack Alignment requirements.
+  if (LastOffsetFI >= 0)
+    StackOffset = MFI->getObjectOffset(LastOffsetFI)+
+                  MFI->getObjectSize(LastOffsetFI);
+  StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
+
+  for (unsigned i = 0, e = CSI.size(); i != e ; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (!Mips::CPURegsRegisterClass->contains(Reg))
+      break;
+    MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
+    TopCPUSavedRegOff = StackOffset;
+    StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
+  }
+
+  // Stack locations for FP and RA. If only one of them is used,
+  // the space must be allocated for both, otherwise no space at all.
+  if (hasFP(MF) || MFI->adjustsStack()) {
+    // FP stack location
+    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
+                         StackOffset);
+    MipsFI->setFPStackOffset(StackOffset);
+    TopCPUSavedRegOff = StackOffset;
+    StackOffset += RegSize;
+
+    // SP stack location
+    MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
+                         StackOffset);
+    MipsFI->setRAStackOffset(StackOffset);
+    StackOffset += RegSize;
+
+    if (MFI->adjustsStack())
+      TopCPUSavedRegOff += RegSize;
+  }
+
+  StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
+
+  // Adjust FPU Callee Saved Registers Area. This Area must be
+  // aligned to the default Stack Alignment requirements.
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (Mips::CPURegsRegisterClass->contains(Reg))
+      continue;
+    MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
+    TopFPUSavedRegOff = StackOffset;
+    StackOffset += MFI->getObjectAlignment(CSI[i].getFrameIdx());
+  }
+  StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
+
+  // Update frame info
+  MFI->setStackSize(StackOffset);
+
+  // Recalculate the final tops offset. The final values must be '0'
+  // if there isn't a callee saved register for CPU or FPU, otherwise
+  // a negative offset is needed.
+  if (TopCPUSavedRegOff >= 0)
+    MipsFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset);
+
+  if (TopFPUSavedRegOff >= 0)
+    MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset);
+}
+
+
+// expand pair of register and immediate if the immediate doesn't fit in the 16-bit
+// offset field.
+// e.g.
+//  if OrigImm = 0x10000, OrigReg = $sp:
+//    generate the following sequence of instrs:
+//      lui  $at, hi(0x10000)
+//      addu $at, $sp, $at
+//
+//    (NewReg, NewImm) = ($at, lo(Ox10000))
+//    return true
+static bool expandRegLargeImmPair(unsigned OrigReg, int OrigImm,
+                                  unsigned& NewReg, int& NewImm,
+                                  MachineBasicBlock& MBB, MachineBasicBlock::iterator I) {
+  // OrigImm fits in the 16-bit field
+  if (OrigImm < 0x8000 && OrigImm >= -0x8000) {
+    NewReg = OrigReg;
+    NewImm = OrigImm;
+    return false;
+  }
+
+  MachineFunction* MF = MBB.getParent();
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+  DebugLoc DL = I->getDebugLoc();
+  int ImmLo = OrigImm & 0xffff;
+  int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) + ((OrigImm & 0x8000) != 0);
+
+  // FIXME: change this when mips goes MC".
+  BuildMI(MBB, I, DL, TII->get(Mips::NOAT));
+  BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi);
+  BuildMI(MBB, I, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg).addReg(Mips::AT);
+  NewReg = Mips::AT;
+  NewImm = ImmLo;
+
+  return true;
+}
+
+void MipsFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB   = MF.front();
+  MachineFrameInfo *MFI    = MF.getFrameInfo();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+  const MipsRegisterInfo *RegInfo =
+    static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const MipsInstrInfo &TII =
+    *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+  bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
+  unsigned NewReg = 0;
+  int NewImm = 0;
+  bool ATUsed;
+
+  // Get the right frame order for Mips.
+  adjustMipsStackFrame(MF);
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  unsigned StackSize = MFI->getStackSize();
+
+  // No need to allocate space on the stack.
+  if (StackSize == 0 && !MFI->adjustsStack()) return;
+
+  int FPOffset = MipsFI->getFPStackOffset();
+  int RAOffset = MipsFI->getRAStackOffset();
+
+  BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER));
+
+  // TODO: check need from GP here.
+  if (isPIC && STI.isABI_O32())
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD))
+      .addReg(RegInfo->getPICCallReg());
+  BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO));
+
+  // Adjust stack : addi sp, sp, (-imm)
+  ATUsed = expandRegLargeImmPair(Mips::SP, -StackSize, NewReg, NewImm, MBB,
+                                              MBBI);
+  BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
+    .addReg(NewReg).addImm(NewImm);
+
+  // FIXME: change this when mips goes MC".
+  if (ATUsed)
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
+
+  // Save the return address only if the function isnt a leaf one.
+  // sw  $ra, stack_loc($sp)
+  if (MFI->adjustsStack()) {
+    expandRegLargeImmPair(Mips::SP, RAOffset, NewReg, NewImm, MBB, MBBI);
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
+      .addReg(Mips::RA).addImm(NewImm).addReg(NewReg);
+
+    // FIXME: change this when mips goes MC".
+    if (ATUsed)
+      BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
+  }
+
+  // if framepointer enabled, save it and set it
+  // to point to the stack pointer
+  if (hasFP(MF)) {
+    // sw  $fp,stack_loc($sp)
+    expandRegLargeImmPair(Mips::SP, FPOffset, NewReg, NewImm, MBB, MBBI);
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
+      .addReg(Mips::FP).addImm(NewImm).addReg(NewReg);
+
+    // FIXME: change this when mips goes MC".
+    if (ATUsed)
+      BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
+
+    // move $fp, $sp
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP)
+      .addReg(Mips::SP).addReg(Mips::ZERO);
+  }
+
+  // Restore GP from the saved stack location
+  if (MipsFI->needGPSaveRestore())
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE))
+      .addImm(MipsFI->getGPStackOffset());
+}
+
+void MipsFrameLowering::emitEpilogue(MachineFunction &MF,
+                                 MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  MachineFrameInfo *MFI            = MF.getFrameInfo();
+  MipsFunctionInfo *MipsFI         = MF.getInfo<MipsFunctionInfo>();
+  const MipsInstrInfo &TII =
+    *static_cast<const MipsInstrInfo*>(MF.getTarget().getInstrInfo());
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  // Get the number of bytes from FrameInfo
+  int NumBytes = (int) MFI->getStackSize();
+
+  // Get the FI's where RA and FP are saved.
+  int FPOffset = MipsFI->getFPStackOffset();
+  int RAOffset = MipsFI->getRAStackOffset();
+
+  unsigned NewReg = 0;
+  int NewImm = 0;
+  bool ATUsed = false;
+
+  // if framepointer enabled, restore it and restore the
+  // stack pointer
+  if (hasFP(MF)) {
+    // move $sp, $fp
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::SP)
+      .addReg(Mips::FP).addReg(Mips::ZERO);
+
+    // lw  $fp,stack_loc($sp)
+    ATUsed = expandRegLargeImmPair(Mips::SP, FPOffset, NewReg, NewImm, MBB,
+                                                MBBI);
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::FP)
+      .addImm(NewImm).addReg(NewReg);
+
+    // FIXME: change this when mips goes MC".
+    if (ATUsed)
+      BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
+  }
+
+  // Restore the return address only if the function isnt a leaf one.
+  // lw  $ra, stack_loc($sp)
+  if (MFI->adjustsStack()) {
+    expandRegLargeImmPair(Mips::SP, RAOffset, NewReg, NewImm, MBB, MBBI);
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA)
+      .addImm(NewImm).addReg(NewReg);
+
+    // FIXME: change this when mips goes MC".
+    if (ATUsed)
+      BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
+  }
+
+  // adjust stack  : insert addi sp, sp, (imm)
+  if (NumBytes) {
+    expandRegLargeImmPair(Mips::SP, NumBytes, NewReg, NewImm, MBB, MBBI);
+    BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP)
+      .addReg(NewReg).addImm(NewImm);
+
+    // FIXME: change this when mips goes MC".
+    if (ATUsed)
+      BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO));
+  }
+}
+
+void MipsFrameLowering::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+  const MipsRegisterInfo *RegInfo =
+    static_cast<const MipsRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  RegInfo->processFunctionBeforeFrameFinalized(MF);
+}
diff --git a/final/lib/Target/Mips/MipsFrameLowering.h b/final/lib/Target/Mips/MipsFrameLowering.h
new file mode 100644
index 00000000000..34647df4f35
--- /dev/null
+++ b/final/lib/Target/Mips/MipsFrameLowering.h
@@ -0,0 +1,48 @@
+//==--- MipsFrameLowering.h - Define frame lowering for Mips --*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS_FRAMEINFO_H
+#define MIPS_FRAMEINFO_H
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class MipsSubtarget;
+
+class MipsFrameLowering : public TargetFrameLowering {
+protected:
+  const MipsSubtarget &STI;
+
+public:
+  explicit MipsFrameLowering(const MipsSubtarget &sti)
+    // FIXME: Is this correct at all?
+    : TargetFrameLowering(StackGrowsUp, 8, 0), STI(sti) {
+  }
+
+  void adjustMipsStackFrame(MachineFunction &MF) const;
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/Mips/MipsISelDAGToDAG.cpp b/final/lib/Target/Mips/MipsISelDAGToDAG.cpp
new file mode 100644
index 00000000000..f2e53a85d1e
--- /dev/null
+++ b/final/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -0,0 +1,525 @@
+//===-- MipsISelDAGToDAG.cpp - A dag to dag inst selector for Mips --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MIPS target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-isel"
+#include "Mips.h"
+#include "MipsMachineFunction.h"
+#include "MipsRegisterInfo.h"
+#include "MipsSubtarget.h"
+#include "MipsTargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsDAGToDAGISel - MIPS specific code to select MIPS machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class MipsDAGToDAGISel : public SelectionDAGISel {
+
+  /// TM - Keep a reference to MipsTargetMachine.
+  MipsTargetMachine &TM;
+
+  /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const MipsSubtarget &Subtarget;
+
+public:
+  explicit MipsDAGToDAGISel(MipsTargetMachine &tm) :
+  SelectionDAGISel(tm),
+  TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
+
+  // Pass Name
+  virtual const char *getPassName() const {
+    return "MIPS DAG->DAG Pattern Instruction Selection";
+  }
+
+
+private:
+  // Include the pieces autogenerated from the target description.
+  #include "MipsGenDAGISel.inc"
+
+  /// getTargetMachine - Return a reference to the TargetMachine, casted
+  /// to the target-specific type.
+  const MipsTargetMachine &getTargetMachine() {
+    return static_cast<const MipsTargetMachine &>(TM);
+  }
+
+  /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+  /// to the target-specific type.
+  const MipsInstrInfo *getInstrInfo() {
+    return getTargetMachine().getInstrInfo();
+  }
+
+  SDNode *getGlobalBaseReg();
+  SDNode *Select(SDNode *N);
+
+  // Complex Pattern.
+  bool SelectAddr(SDValue N, SDValue &Base, SDValue &Offset);
+
+  SDNode *SelectLoadFp64(SDNode *N);
+  SDNode *SelectStoreFp64(SDNode *N);
+
+  // getI32Imm - Return a target constant with the specified
+  // value, of type i32.
+  inline SDValue getI32Imm(unsigned Imm) {
+    return CurDAG->getTargetConstant(Imm, MVT::i32);
+  }
+};
+
+}
+
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
+  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// ComplexPattern used on MipsInstrInfo
+/// Used on Mips Load/Store instructions
+bool MipsDAGToDAGISel::
+SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) {
+  // if Address is FI, get the TargetFrameIndex.
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  // on PIC code Load GA
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    if ((Addr.getOpcode() == ISD::TargetGlobalAddress) ||
+        (Addr.getOpcode() == ISD::TargetConstantPool) ||
+        (Addr.getOpcode() == ISD::TargetJumpTable) ||
+        (Addr.getOpcode() == ISD::TargetBlockAddress)) {
+      Base   = CurDAG->getRegister(Mips::GP, MVT::i32);
+      Offset = Addr;
+      return true;
+    }
+  } else {
+    if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+        Addr.getOpcode() == ISD::TargetGlobalAddress))
+      return false;
+  }
+
+  // Operand is a result from an ADD.
+  if (Addr.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+      if (isInt<16>(CN->getSExtValue())) {
+
+        // If the first operand is a FI, get the TargetFI Node
+        if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+                                    (Addr.getOperand(0))) {
+          Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+        } else {
+          Base = Addr.getOperand(0);
+        }
+
+        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+        return true;
+      }
+    }
+
+    // When loading from constant pools, load the lower address part in
+    // the instruction itself. Example, instead of:
+    //  lui $2, %hi($CPI1_0)
+    //  addiu $2, $2, %lo($CPI1_0)
+    //  lwc1 $f0, 0($2)
+    // Generate:
+    //  lui $2, %hi($CPI1_0)
+    //  lwc1 $f0, %lo($CPI1_0)($2)
+    if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi ||
+         Addr.getOperand(0).getOpcode() == ISD::LOAD) &&
+        Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
+      SDValue LoVal = Addr.getOperand(1);
+      if (dyn_cast<ConstantPoolSDNode>(LoVal.getOperand(0))) {
+        Base = Addr.getOperand(0);
+        Offset = LoVal.getOperand(0);
+        return true;
+      }
+    }
+  }
+
+  if (isa<BlockAddressSDNode>(Addr.getOperand(1))) {
+    Base = Addr.getOperand(0);
+    Offset = Addr.getOperand(1);
+  }
+
+  Base   = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
+  MVT::SimpleValueType NVT =
+    N->getValueType(0).getSimpleVT().SimpleTy;
+
+  if (!Subtarget.isMips1() || NVT != MVT::f64)
+    return NULL;
+
+  LoadSDNode *LN = cast<LoadSDNode>(N);
+  if (LN->getExtensionType() != ISD::NON_EXTLOAD ||
+      LN->getAddressingMode() != ISD::UNINDEXED)
+    return NULL;
+
+  SDValue Chain = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue Offset0, Offset1, Base;
+
+  if (!SelectAddr(N1, Offset0, Base) ||
+      N1.getValueType() != MVT::i32)
+    return NULL;
+
+  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+  DebugLoc dl = N->getDebugLoc();
+
+  // The second load should start after for 4 bytes.
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
+    Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32);
+  else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Offset0))
+    Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(),
+                                            MVT::i32,
+                                            CP->getAlignment(),
+                                            CP->getOffset()+4,
+                                            CP->getTargetFlags());
+  else
+    return NULL;
+
+  // Choose the offsets depending on the endianess
+  if (TM.getTargetData()->isBigEndian())
+    std::swap(Offset0, Offset1);
+
+  // Instead of:
+  //    ldc $f0, X($3)
+  // Generate:
+  //    lwc $f0, X($3)
+  //    lwc $f1, X+4($3)
+  SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
+                                    MVT::Other, Offset0, Base, Chain);
+  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                                 dl, NVT), 0);
+  SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
+                            MVT::f64, Undef, SDValue(LD0, 0));
+
+  SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
+                          MVT::Other, Offset1, Base, SDValue(LD0, 1));
+  SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
+                            MVT::f64, I0, SDValue(LD1, 0));
+
+  ReplaceUses(SDValue(N, 0), I1);
+  ReplaceUses(SDValue(N, 1), Chain);
+  cast<MachineSDNode>(LD0)->setMemRefs(MemRefs0, MemRefs0 + 1);
+  cast<MachineSDNode>(LD1)->setMemRefs(MemRefs0, MemRefs0 + 1);
+  return I1.getNode();
+}
+
+SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
+
+  if (!Subtarget.isMips1() ||
+      N->getOperand(1).getValueType() != MVT::f64)
+    return NULL;
+
+  SDValue Chain = N->getOperand(0);
+
+  StoreSDNode *SN = cast<StoreSDNode>(N);
+  if (SN->isTruncatingStore() || SN->getAddressingMode() != ISD::UNINDEXED)
+    return NULL;
+
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+  SDValue Offset0, Offset1, Base;
+
+  if (!SelectAddr(N2, Offset0, Base) ||
+      N1.getValueType() != MVT::f64 ||
+      N2.getValueType() != MVT::i32)
+    return NULL;
+
+  MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+  MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
+  DebugLoc dl = N->getDebugLoc();
+
+  // Get the even and odd part from the f64 register
+  SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd,
+                                                 dl, MVT::f32, N1);
+  SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::sub_fpeven,
+                                                 dl, MVT::f32, N1);
+
+  // The second store should start after for 4 bytes.
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0))
+    Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32);
+  else
+    return NULL;
+
+  // Choose the offsets depending on the endianess
+  if (TM.getTargetData()->isBigEndian())
+    std::swap(Offset0, Offset1);
+
+  // Instead of:
+  //    sdc $f0, X($3)
+  // Generate:
+  //    swc $f0, X($3)
+  //    swc $f1, X+4($3)
+  SDValue Ops0[] = { FPEven, Offset0, Base, Chain };
+  Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl,
+                                       MVT::Other, Ops0, 4), 0);
+  cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+  SDValue Ops1[] = { FPOdd, Offset1, Base, Chain };
+  Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl,
+                                       MVT::Other, Ops1, 4), 0);
+  cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
+
+  ReplaceUses(SDValue(N, 0), Chain);
+  return Chain.getNode();
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
+  unsigned Opcode = Node->getOpcode();
+  DebugLoc dl = Node->getDebugLoc();
+
+  // Dump information about the Node being selected
+  DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
+
+  // If we have a custom node, we already have selected!
+  if (Node->isMachineOpcode()) {
+    DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+    return NULL;
+  }
+
+  ///
+  // Instruction Selection not handled by the auto-generated
+  // tablegen selection should be handled here.
+  ///
+  switch(Opcode) {
+
+    default: break;
+
+    case ISD::SUBE:
+    case ISD::ADDE: {
+      SDValue InFlag = Node->getOperand(2), CmpLHS;
+      unsigned Opc = InFlag.getOpcode(); (void)Opc;
+      assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) ||
+              (Opc == ISD::SUBC || Opc == ISD::SUBE)) &&
+             "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn");
+
+      unsigned MOp;
+      if (Opcode == ISD::ADDE) {
+        CmpLHS = InFlag.getValue(0);
+        MOp = Mips::ADDu;
+      } else {
+        CmpLHS = InFlag.getOperand(0);
+        MOp = Mips::SUBu;
+      }
+
+      SDValue Ops[] = { CmpLHS, InFlag.getOperand(1) };
+
+      SDValue LHS = Node->getOperand(0);
+      SDValue RHS = Node->getOperand(1);
+
+      EVT VT = LHS.getValueType();
+      SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2);
+      SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT,
+                                                SDValue(Carry,0), RHS);
+
+      return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue,
+                                  LHS, SDValue(AddCarry,0));
+    }
+
+    /// Mul/Div with two results
+    case ISD::SDIVREM:
+    case ISD::UDIVREM:
+      break;
+    case ISD::SMUL_LOHI:
+    case ISD::UMUL_LOHI: {
+      SDValue Op1 = Node->getOperand(0);
+      SDValue Op2 = Node->getOperand(1);
+
+      unsigned Op;
+      Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT);
+
+      SDNode *Mul = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2);
+
+      SDValue InFlag = SDValue(Mul, 0);
+      SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32,
+                                          MVT::Glue, InFlag);
+      InFlag = SDValue(Lo,1);
+      SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
+
+      if (!SDValue(Node, 0).use_empty())
+        ReplaceUses(SDValue(Node, 0), SDValue(Lo,0));
+
+      if (!SDValue(Node, 1).use_empty())
+        ReplaceUses(SDValue(Node, 1), SDValue(Hi,0));
+
+      return NULL;
+    }
+
+    /// Special Muls
+    case ISD::MUL:
+      if (Subtarget.isMips32())
+        break;
+    case ISD::MULHS:
+    case ISD::MULHU: {
+      SDValue MulOp1 = Node->getOperand(0);
+      SDValue MulOp2 = Node->getOperand(1);
+
+      unsigned MulOp  = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT);
+      SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl,
+                                               MVT::Glue, MulOp1, MulOp2);
+
+      SDValue InFlag = SDValue(MulNode, 0);
+
+      if (Opcode == ISD::MUL)
+        return CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, InFlag);
+      else
+        return CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag);
+    }
+
+    /// Div/Rem operations
+    case ISD::SREM:
+    case ISD::UREM:
+    case ISD::SDIV:
+    case ISD::UDIV:
+      break;
+
+    // Get target GOT address.
+    case ISD::GLOBAL_OFFSET_TABLE:
+      return getGlobalBaseReg();
+
+    case ISD::ConstantFP: {
+      ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node);
+      if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) {
+        SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                        Mips::ZERO, MVT::i32);
+        SDValue Undef = SDValue(
+          CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0);
+        SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero);
+        SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
+                            MVT::f64, Undef, SDValue(MTC, 0));
+        SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
+                            MVT::f64, I0, SDValue(MTC, 0));
+        ReplaceUses(SDValue(Node, 0), I1);
+        return I1.getNode();
+      }
+      break;
+    }
+
+    case ISD::LOAD:
+      if (SDNode *ResNode = SelectLoadFp64(Node))
+        return ResNode;
+      // Other cases are autogenerated.
+      break;
+
+    case ISD::STORE:
+      if (SDNode *ResNode = SelectStoreFp64(Node))
+        return ResNode;
+      // Other cases are autogenerated.
+      break;
+
+    /// Handle direct and indirect calls when using PIC. On PIC, when
+    /// GOT is smaller than about 64k (small code) the GA target is
+    /// loaded with only one instruction. Otherwise GA's target must
+    /// be loaded with 3 instructions.
+    case MipsISD::JmpLink: {
+      if (TM.getRelocationModel() == Reloc::PIC_) {
+        unsigned LastOpNum = Node->getNumOperands()-1;
+
+        SDValue Chain  = Node->getOperand(0);
+        SDValue Callee = Node->getOperand(1);
+        SDValue InFlag;
+
+        // Skip the incomming flag if present
+        if (Node->getOperand(LastOpNum).getValueType() == MVT::Glue)
+          LastOpNum--;
+
+        if ( (isa<GlobalAddressSDNode>(Callee)) ||
+             (isa<ExternalSymbolSDNode>(Callee)) )
+        {
+          /// Direct call for global addresses and external symbols
+          SDValue GPReg = CurDAG->getRegister(Mips::GP, MVT::i32);
+
+          // Use load to get GOT target
+          SDValue Ops[] = { Callee, GPReg, Chain };
+          SDValue Load = SDValue(CurDAG->getMachineNode(Mips::LW, dl, MVT::i32,
+                                     MVT::Other, Ops, 3), 0);
+          Chain = Load.getValue(1);
+
+          // Call target must be on T9
+          Chain = CurDAG->getCopyToReg(Chain, dl, Mips::T9, Load, InFlag);
+        } else
+          /// Indirect call
+          Chain = CurDAG->getCopyToReg(Chain, dl, Mips::T9, Callee, InFlag);
+
+        // Map the JmpLink operands to JALR
+        SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Glue);
+        SmallVector<SDValue, 8> Ops;
+        Ops.push_back(CurDAG->getRegister(Mips::T9, MVT::i32));
+
+        for (unsigned i = 2, e = LastOpNum+1; i != e; ++i)
+          Ops.push_back(Node->getOperand(i));
+        Ops.push_back(Chain);
+        Ops.push_back(Chain.getValue(1));
+
+        // Emit Jump and Link Register
+        SDNode *ResNode = CurDAG->getMachineNode(Mips::JALR, dl, NodeTys,
+                                  &Ops[0], Ops.size());
+
+        // Replace Chain and InFlag
+        ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
+        ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 1));
+        return ResNode;
+      }
+    }
+  }
+
+  // Select the default instruction
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(errs() << "=> ");
+  if (ResNode == NULL || ResNode == Node)
+    DEBUG(Node->dump(CurDAG));
+  else
+    DEBUG(ResNode->dump(CurDAG));
+  DEBUG(errs() << "\n");
+  return ResNode;
+}
+
+/// createMipsISelDag - This pass converts a legalized DAG into a
+/// MIPS-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
+  return new MipsDAGToDAGISel(TM);
+}
diff --git a/final/lib/Target/Mips/MipsISelLowering.cpp b/final/lib/Target/Mips/MipsISelLowering.cpp
new file mode 100644
index 00000000000..163f71b81af
--- /dev/null
+++ b/final/lib/Target/Mips/MipsISelLowering.cpp
@@ -0,0 +1,1654 @@
+//===-- MipsISelLowering.cpp - Mips DAG Lowering Implementation -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Mips uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-lower"
+#include "MipsISelLowering.h"
+#include "MipsMachineFunction.h"
+#include "MipsTargetMachine.h"
+#include "MipsTargetObjectFile.h"
+#include "MipsSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+    case MipsISD::JmpLink    : return "MipsISD::JmpLink";
+    case MipsISD::Hi         : return "MipsISD::Hi";
+    case MipsISD::Lo         : return "MipsISD::Lo";
+    case MipsISD::GPRel      : return "MipsISD::GPRel";
+    case MipsISD::Ret        : return "MipsISD::Ret";
+    case MipsISD::SelectCC   : return "MipsISD::SelectCC";
+    case MipsISD::FPSelectCC : return "MipsISD::FPSelectCC";
+    case MipsISD::FPBrcond   : return "MipsISD::FPBrcond";
+    case MipsISD::FPCmp      : return "MipsISD::FPCmp";
+    case MipsISD::FPRound    : return "MipsISD::FPRound";
+    case MipsISD::MAdd       : return "MipsISD::MAdd";
+    case MipsISD::MAddu      : return "MipsISD::MAddu";
+    case MipsISD::MSub       : return "MipsISD::MSub";
+    case MipsISD::MSubu      : return "MipsISD::MSubu";
+    case MipsISD::DivRem     : return "MipsISD::DivRem";
+    case MipsISD::DivRemU    : return "MipsISD::DivRemU";
+    default                  : return NULL;
+  }
+}
+
+MipsTargetLowering::
+MipsTargetLowering(MipsTargetMachine &TM)
+  : TargetLowering(TM, new MipsTargetObjectFile()) {
+  Subtarget = &TM.getSubtarget<MipsSubtarget>();
+
+  // Mips does not have i1 type, so use i32 for
+  // setcc operations results (slt, sgt, ...).
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  // Set up the register classes
+  addRegisterClass(MVT::i32, Mips::CPURegsRegisterClass);
+  addRegisterClass(MVT::f32, Mips::FGR32RegisterClass);
+
+  // When dealing with single precision only, use libcalls
+  if (!Subtarget->isSingleFloat())
+    if (!Subtarget->isFP64bit())
+      addRegisterClass(MVT::f64, Mips::AFGR64RegisterClass);
+
+  // Load extented operations for i1 types must be promoted
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1,  Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1,  Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1,  Promote);
+
+  // MIPS doesn't have extending float->double load/store
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+  // Used by legalize types to correctly generate the setcc result.
+  // Without this, every float setcc comes with a AND/OR with the result,
+  // we don't want this, since the fpcmp result goes to a flag register,
+  // which is used implicitly by brcond and select operations.
+  AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
+
+  // Mips Custom Operations
+  setOperationAction(ISD::GlobalAddress,      MVT::i32,   Custom);
+  setOperationAction(ISD::BlockAddress,       MVT::i32,   Custom);
+  setOperationAction(ISD::GlobalTLSAddress,   MVT::i32,   Custom);
+  setOperationAction(ISD::JumpTable,          MVT::i32,   Custom);
+  setOperationAction(ISD::ConstantPool,       MVT::i32,   Custom);
+  setOperationAction(ISD::SELECT,             MVT::f32,   Custom);
+  setOperationAction(ISD::SELECT,             MVT::f64,   Custom);
+  setOperationAction(ISD::SELECT,             MVT::i32,   Custom);
+  setOperationAction(ISD::SETCC,              MVT::f32,   Custom);
+  setOperationAction(ISD::SETCC,              MVT::f64,   Custom);
+  setOperationAction(ISD::BRCOND,             MVT::Other, Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32,   Custom);
+  setOperationAction(ISD::FP_TO_SINT,         MVT::i32,   Custom);
+  setOperationAction(ISD::VASTART,            MVT::Other, Custom);
+
+
+  // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors'
+  // with operands comming from setcc fp comparions. This is necessary since
+  // the result from these setcc are in a flag registers (FCR31).
+  setOperationAction(ISD::AND,              MVT::i32,   Custom);
+  setOperationAction(ISD::OR,               MVT::i32,   Custom);
+
+  setOperationAction(ISD::SDIV, MVT::i32, Expand);
+  setOperationAction(ISD::SREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIV, MVT::i32, Expand);
+  setOperationAction(ISD::UREM, MVT::i32, Expand);
+
+  // Operations not directly supported by Mips.
+  setOperationAction(ISD::BR_JT,             MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,             MVT::Other, Expand);
+  setOperationAction(ISD::SELECT_CC,         MVT::Other, Expand);
+  setOperationAction(ISD::UINT_TO_FP,        MVT::i32,   Expand);
+  setOperationAction(ISD::FP_TO_UINT,        MVT::i32,   Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1,    Expand);
+  setOperationAction(ISD::CTPOP,             MVT::i32,   Expand);
+  setOperationAction(ISD::CTTZ,              MVT::i32,   Expand);
+  setOperationAction(ISD::ROTL,              MVT::i32,   Expand);
+
+  if (!Subtarget->isMips32r2())
+    setOperationAction(ISD::ROTR, MVT::i32,   Expand);
+
+  setOperationAction(ISD::SHL_PARTS,         MVT::i32,   Expand);
+  setOperationAction(ISD::SRA_PARTS,         MVT::i32,   Expand);
+  setOperationAction(ISD::SRL_PARTS,         MVT::i32,   Expand);
+  setOperationAction(ISD::FCOPYSIGN,         MVT::f32,   Expand);
+  setOperationAction(ISD::FCOPYSIGN,         MVT::f64,   Expand);
+  setOperationAction(ISD::FSIN,              MVT::f32,   Expand);
+  setOperationAction(ISD::FSIN,              MVT::f64,   Expand);
+  setOperationAction(ISD::FCOS,              MVT::f32,   Expand);
+  setOperationAction(ISD::FCOS,              MVT::f64,   Expand);
+  setOperationAction(ISD::FPOWI,             MVT::f32,   Expand);
+  setOperationAction(ISD::FPOW,              MVT::f32,   Expand);
+  setOperationAction(ISD::FLOG,              MVT::f32,   Expand);
+  setOperationAction(ISD::FLOG2,             MVT::f32,   Expand);
+  setOperationAction(ISD::FLOG10,            MVT::f32,   Expand);
+  setOperationAction(ISD::FEXP,              MVT::f32,   Expand);
+
+  setOperationAction(ISD::EH_LABEL,          MVT::Other, Expand);
+
+  // Use the default for now
+  setOperationAction(ISD::STACKSAVE,         MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE,      MVT::Other, Expand);
+  setOperationAction(ISD::MEMBARRIER,        MVT::Other, Expand);
+
+  if (Subtarget->isSingleFloat())
+    setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
+
+  if (!Subtarget->hasSEInReg()) {
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+  }
+
+  if (!Subtarget->hasBitCount())
+    setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+
+  if (!Subtarget->hasSwap())
+    setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+  setTargetDAGCombine(ISD::ADDE);
+  setTargetDAGCombine(ISD::SUBE);
+  setTargetDAGCombine(ISD::SDIVREM);
+  setTargetDAGCombine(ISD::UDIVREM);
+
+  setStackPointerRegisterToSaveRestore(Mips::SP);
+  computeRegisterProperties();
+}
+
+MVT::SimpleValueType MipsTargetLowering::getSetCCResultType(EVT VT) const {
+  return MVT::i32;
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned MipsTargetLowering::getFunctionAlignment(const Function *) const {
+  return 2;
+}
+
+// SelectMadd -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+//  (addc multLo, Lo0), (adde multHi, Hi0),
+// where,
+//  multHi/Lo: product of multiplication
+//  Lo0: initial value of Lo register
+//  Hi0: initial value of Hi register
+// Return true if mattern matching was successful.
+static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) {
+  // ADDENode's second operand must be a flag output of an ADDC node in order
+  // for the matching to be successful.
+  SDNode* ADDCNode = ADDENode->getOperand(2).getNode();
+
+  if (ADDCNode->getOpcode() != ISD::ADDC)
+    return false;
+
+  SDValue MultHi = ADDENode->getOperand(0);
+  SDValue MultLo = ADDCNode->getOperand(0);
+  SDNode* MultNode = MultHi.getNode();
+  unsigned MultOpc = MultHi.getOpcode();
+
+  // MultHi and MultLo must be generated by the same node,
+  if (MultLo.getNode() != MultNode)
+    return false;
+
+  // and it must be a multiplication.
+  if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+    return false;
+
+  // MultLo amd MultHi must be the first and second output of MultNode
+  // respectively.
+  if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+    return false;
+
+  // Transform this to a MADD only if ADDENode and ADDCNode are the only users
+  // of the values of MultNode, in which case MultNode will be removed in later
+  // phases.
+  // If there exist users other than ADDENode or ADDCNode, this function returns
+  // here, which will result in MultNode being mapped to a single MULT
+  // instruction node rather than a pair of MULT and MADD instructions being
+  // produced.
+  if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+    return false;
+
+  SDValue Chain = CurDAG->getEntryNode();
+  DebugLoc dl = ADDENode->getDebugLoc();
+
+  // create MipsMAdd(u) node
+  MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MAddu : MipsISD::MAdd;
+
+  SDValue MAdd = CurDAG->getNode(MultOpc, dl,
+                                 MVT::Glue,
+                                 MultNode->getOperand(0),// Factor 0
+                                 MultNode->getOperand(1),// Factor 1
+                                 ADDCNode->getOperand(1),// Lo0
+                                 ADDENode->getOperand(1));// Hi0
+
+  // create CopyFromReg nodes
+  SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
+                                              MAdd);
+  SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
+                                              Mips::HI, MVT::i32,
+                                              CopyFromLo.getValue(2));
+
+  // replace uses of adde and addc here
+  if (!SDValue(ADDCNode, 0).use_empty())
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDCNode, 0), CopyFromLo);
+
+  if (!SDValue(ADDENode, 0).use_empty())
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(ADDENode, 0), CopyFromHi);
+
+  return true;
+}
+
+// SelectMsub -
+// Transforms a subgraph in CurDAG if the following pattern is found:
+//  (addc Lo0, multLo), (sube Hi0, multHi),
+// where,
+//  multHi/Lo: product of multiplication
+//  Lo0: initial value of Lo register
+//  Hi0: initial value of Hi register
+// Return true if mattern matching was successful.
+static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) {
+  // SUBENode's second operand must be a flag output of an SUBC node in order
+  // for the matching to be successful.
+  SDNode* SUBCNode = SUBENode->getOperand(2).getNode();
+
+  if (SUBCNode->getOpcode() != ISD::SUBC)
+    return false;
+
+  SDValue MultHi = SUBENode->getOperand(1);
+  SDValue MultLo = SUBCNode->getOperand(1);
+  SDNode* MultNode = MultHi.getNode();
+  unsigned MultOpc = MultHi.getOpcode();
+
+  // MultHi and MultLo must be generated by the same node,
+  if (MultLo.getNode() != MultNode)
+    return false;
+
+  // and it must be a multiplication.
+  if (MultOpc != ISD::SMUL_LOHI && MultOpc != ISD::UMUL_LOHI)
+    return false;
+
+  // MultLo amd MultHi must be the first and second output of MultNode
+  // respectively.
+  if (MultHi.getResNo() != 1 || MultLo.getResNo() != 0)
+    return false;
+
+  // Transform this to a MSUB only if SUBENode and SUBCNode are the only users
+  // of the values of MultNode, in which case MultNode will be removed in later
+  // phases.
+  // If there exist users other than SUBENode or SUBCNode, this function returns
+  // here, which will result in MultNode being mapped to a single MULT
+  // instruction node rather than a pair of MULT and MSUB instructions being
+  // produced.
+  if (!MultHi.hasOneUse() || !MultLo.hasOneUse())
+    return false;
+
+  SDValue Chain = CurDAG->getEntryNode();
+  DebugLoc dl = SUBENode->getDebugLoc();
+
+  // create MipsSub(u) node
+  MultOpc = MultOpc == ISD::UMUL_LOHI ? MipsISD::MSubu : MipsISD::MSub;
+
+  SDValue MSub = CurDAG->getNode(MultOpc, dl,
+                                 MVT::Glue,
+                                 MultNode->getOperand(0),// Factor 0
+                                 MultNode->getOperand(1),// Factor 1
+                                 SUBCNode->getOperand(0),// Lo0
+                                 SUBENode->getOperand(0));// Hi0
+
+  // create CopyFromReg nodes
+  SDValue CopyFromLo = CurDAG->getCopyFromReg(Chain, dl, Mips::LO, MVT::i32,
+                                              MSub);
+  SDValue CopyFromHi = CurDAG->getCopyFromReg(CopyFromLo.getValue(1), dl,
+                                              Mips::HI, MVT::i32,
+                                              CopyFromLo.getValue(2));
+
+  // replace uses of sube and subc here
+  if (!SDValue(SUBCNode, 0).use_empty())
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBCNode, 0), CopyFromLo);
+
+  if (!SDValue(SUBENode, 0).use_empty())
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(SUBENode, 0), CopyFromHi);
+
+  return true;
+}
+
+static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const MipsSubtarget* Subtarget) {
+  if (DCI.isBeforeLegalize())
+    return SDValue();
+
+  if (Subtarget->isMips32() && SelectMadd(N, &DAG))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const MipsSubtarget* Subtarget) {
+  if (DCI.isBeforeLegalize())
+    return SDValue();
+
+  if (Subtarget->isMips32() && SelectMsub(N, &DAG))
+    return SDValue(N, 0);
+
+  return SDValue();
+}
+
+static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG,
+                                    TargetLowering::DAGCombinerInfo &DCI,
+                                    const MipsSubtarget* Subtarget) {
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem :
+                                                  MipsISD::DivRemU;
+  DebugLoc dl = N->getDebugLoc();
+
+  SDValue DivRem = DAG.getNode(opc, dl, MVT::Glue,
+                               N->getOperand(0), N->getOperand(1));
+  SDValue InChain = DAG.getEntryNode();
+  SDValue InGlue = DivRem;
+
+  // insert MFLO
+  if (N->hasAnyUseOfValue(0)) {
+    SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, Mips::LO, MVT::i32,
+                                            InGlue);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo);
+    InChain = CopyFromLo.getValue(1);
+    InGlue = CopyFromLo.getValue(2);
+  }
+
+  // insert MFHI
+  if (N->hasAnyUseOfValue(1)) {
+    SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl,
+                                               Mips::HI, MVT::i32, InGlue);
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi);
+  }
+
+  return SDValue();
+}
+
+SDValue  MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
+  const {
+  SelectionDAG &DAG = DCI.DAG;
+  unsigned opc = N->getOpcode();
+
+  switch (opc) {
+  default: break;
+  case ISD::ADDE:
+    return PerformADDECombine(N, DAG, DCI, Subtarget);
+  case ISD::SUBE:
+    return PerformSUBECombine(N, DAG, DCI, Subtarget);
+  case ISD::SDIVREM:
+  case ISD::UDIVREM:
+    return PerformDivRemCombine(N, DAG, DCI, Subtarget);
+  }
+
+  return SDValue();
+}
+
+SDValue MipsTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+  switch (Op.getOpcode())
+  {
+    case ISD::AND:                return LowerANDOR(Op, DAG);
+    case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
+    case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
+    case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+    case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
+    case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+    case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
+    case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
+    case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
+    case ISD::OR:                 return LowerANDOR(Op, DAG);
+    case ISD::SELECT:             return LowerSELECT(Op, DAG);
+    case ISD::SETCC:              return LowerSETCC(Op, DAG);
+    case ISD::VASTART:            return LowerVASTART(Op, DAG);
+  }
+  return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+//  Lower helper functions
+//===----------------------------------------------------------------------===//
+
+// AddLiveIn - This helper function adds the specified physical register to the
+// MachineFunction as a live in value.  It also creates a corresponding
+// virtual register for it.
+static unsigned
+AddLiveIn(MachineFunction &MF, unsigned PReg, TargetRegisterClass *RC)
+{
+  assert(RC->contains(PReg) && "Not the correct regclass!");
+  unsigned VReg = MF.getRegInfo().createVirtualRegister(RC);
+  MF.getRegInfo().addLiveIn(PReg, VReg);
+  return VReg;
+}
+
+// Get fp branch code (not opcode) from condition code.
+static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) {
+  if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT)
+    return Mips::BRANCH_T;
+
+  if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT)
+    return Mips::BRANCH_F;
+
+  return Mips::BRANCH_INVALID;
+}
+
+static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) {
+  switch(BC) {
+    default:
+      llvm_unreachable("Unknown branch code");
+    case Mips::BRANCH_T  : return Mips::BC1T;
+    case Mips::BRANCH_F  : return Mips::BC1F;
+    case Mips::BRANCH_TL : return Mips::BC1TL;
+    case Mips::BRANCH_FL : return Mips::BC1FL;
+  }
+}
+
+static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) {
+  switch (CC) {
+  default: llvm_unreachable("Unknown fp condition code!");
+  case ISD::SETEQ:
+  case ISD::SETOEQ: return Mips::FCOND_EQ;
+  case ISD::SETUNE: return Mips::FCOND_OGL;
+  case ISD::SETLT:
+  case ISD::SETOLT: return Mips::FCOND_OLT;
+  case ISD::SETGT:
+  case ISD::SETOGT: return Mips::FCOND_OGT;
+  case ISD::SETLE:
+  case ISD::SETOLE: return Mips::FCOND_OLE;
+  case ISD::SETGE:
+  case ISD::SETOGE: return Mips::FCOND_OGE;
+  case ISD::SETULT: return Mips::FCOND_ULT;
+  case ISD::SETULE: return Mips::FCOND_ULE;
+  case ISD::SETUGT: return Mips::FCOND_UGT;
+  case ISD::SETUGE: return Mips::FCOND_UGE;
+  case ISD::SETUO:  return Mips::FCOND_UN;
+  case ISD::SETO:   return Mips::FCOND_OR;
+  case ISD::SETNE:
+  case ISD::SETONE: return Mips::FCOND_NEQ;
+  case ISD::SETUEQ: return Mips::FCOND_UEQ;
+  }
+}
+
+MachineBasicBlock *
+MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                MachineBasicBlock *BB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  bool isFPCmp = false;
+  DebugLoc dl = MI->getDebugLoc();
+
+  switch (MI->getOpcode()) {
+  default: assert(false && "Unexpected instr type to insert");
+  case Mips::Select_FCC:
+  case Mips::Select_FCC_S32:
+  case Mips::Select_FCC_D32:
+    isFPCmp = true; // FALL THROUGH
+  case Mips::Select_CC:
+  case Mips::Select_CC_S32:
+  case Mips::Select_CC_D32: {
+    // To "insert" a SELECT_CC instruction, we actually have to insert the
+    // diamond control-flow pattern.  The incoming instruction knows the
+    // destination vreg to set, the condition code register to branch on, the
+    // true/false values to select between, and a branch opcode to use.
+    const BasicBlock *LLVM_BB = BB->getBasicBlock();
+    MachineFunction::iterator It = BB;
+    ++It;
+
+    //  thisMBB:
+    //  ...
+    //   TrueVal = ...
+    //   setcc r1, r2, r3
+    //   bNE   r1, r0, copy1MBB
+    //   fallthrough --> copy0MBB
+    MachineBasicBlock *thisMBB  = BB;
+    MachineFunction *F = BB->getParent();
+    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
+    F->insert(It, copy0MBB);
+    F->insert(It, sinkMBB);
+
+    // Transfer the remainder of BB and its successor edges to sinkMBB.
+    sinkMBB->splice(sinkMBB->begin(), BB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    BB->end());
+    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+    // Next, add the true and fallthrough blocks as its successors.
+    BB->addSuccessor(copy0MBB);
+    BB->addSuccessor(sinkMBB);
+
+    // Emit the right instruction according to the type of the operands compared
+    if (isFPCmp) {
+      // Find the condiction code present in the setcc operation.
+      Mips::CondCode CC = (Mips::CondCode)MI->getOperand(4).getImm();
+      // Get the branch opcode from the branch code.
+      unsigned Opc = FPBranchCodeToOpc(GetFPBranchCodeFromCond(CC));
+      BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB);
+    } else
+      BuildMI(BB, dl, TII->get(Mips::BNE)).addReg(MI->getOperand(1).getReg())
+        .addReg(Mips::ZERO).addMBB(sinkMBB);
+
+    //  copy0MBB:
+    //   %FalseValue = ...
+    //   # fallthrough to sinkMBB
+    BB = copy0MBB;
+
+    // Update machine-CFG edges
+    BB->addSuccessor(sinkMBB);
+
+    //  sinkMBB:
+    //   %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ]
+    //  ...
+    BB = sinkMBB;
+    BuildMI(*BB, BB->begin(), dl,
+            TII->get(Mips::PHI), MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB)
+      .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB);
+
+    MI->eraseFromParent();   // The pseudo instruction is gone now.
+    return BB;
+  }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+
+SDValue MipsTargetLowering::
+LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
+{
+  if (!Subtarget->isMips1())
+    return Op;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  unsigned CCReg = AddLiveIn(MF, Mips::FCR31, Mips::CCRRegisterClass);
+
+  SDValue Chain = DAG.getEntryNode();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Src = Op.getOperand(0);
+
+  // Set the condition register
+  SDValue CondReg = DAG.getCopyFromReg(Chain, dl, CCReg, MVT::i32);
+  CondReg = DAG.getCopyToReg(Chain, dl, Mips::AT, CondReg);
+  CondReg = DAG.getCopyFromReg(CondReg, dl, Mips::AT, MVT::i32);
+
+  SDValue Cst = DAG.getConstant(3, MVT::i32);
+  SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i32, CondReg, Cst);
+  Cst = DAG.getConstant(2, MVT::i32);
+  SDValue Xor = DAG.getNode(ISD::XOR, dl, MVT::i32, Or, Cst);
+
+  SDValue InFlag(0, 0);
+  CondReg = DAG.getCopyToReg(Chain, dl, Mips::FCR31, Xor, InFlag);
+
+  // Emit the round instruction and bit convert to integer
+  SDValue Trunc = DAG.getNode(MipsISD::FPRound, dl, MVT::f32,
+                              Src, CondReg.getValue(1));
+  SDValue BitCvt = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Trunc);
+  return BitCvt;
+}
+
+SDValue MipsTargetLowering::
+LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Get a reference from Mips stack pointer
+  SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32);
+
+  // Subtract the dynamic size from the actual stack size to
+  // obtain the new stack size.
+  SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size);
+
+  // The Sub result contains the new stack start address, so it
+  // must be placed in the stack pointer register.
+  Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub);
+
+  // This node always has two return values: a new stack pointer
+  // value and a chain
+  SDValue Ops[2] = { Sub, Chain };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue MipsTargetLowering::
+LowerANDOR(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue LHS   = Op.getOperand(0);
+  SDValue RHS   = Op.getOperand(1);
+  DebugLoc dl   = Op.getDebugLoc();
+
+  if (LHS.getOpcode() != MipsISD::FPCmp || RHS.getOpcode() != MipsISD::FPCmp)
+    return Op;
+
+  SDValue True  = DAG.getConstant(1, MVT::i32);
+  SDValue False = DAG.getConstant(0, MVT::i32);
+
+  SDValue LSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
+                             LHS, True, False, LHS.getOperand(2));
+  SDValue RSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
+                             RHS, True, False, RHS.getOperand(2));
+
+  return DAG.getNode(Op.getOpcode(), dl, MVT::i32, LSEL, RSEL);
+}
+
+SDValue MipsTargetLowering::
+LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
+{
+  // The first operand is the chain, the second is the condition, the third is
+  // the block to branch to if the condition is true.
+  SDValue Chain = Op.getOperand(0);
+  SDValue Dest = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (Op.getOperand(1).getOpcode() != MipsISD::FPCmp)
+    return Op;
+
+  SDValue CondRes = Op.getOperand(1);
+  SDValue CCNode  = CondRes.getOperand(2);
+  Mips::CondCode CC =
+    (Mips::CondCode)cast<ConstantSDNode>(CCNode)->getZExtValue();
+  SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32);
+
+  return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode,
+             Dest, CondRes);
+}
+
+SDValue MipsTargetLowering::
+LowerSETCC(SDValue Op, SelectionDAG &DAG) const
+{
+  // The operands to this are the left and right operands to compare (ops #0,
+  // and #1) and the condition code to compare them with (op #2) as a
+  // CondCodeSDNode.
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+  return DAG.getNode(MipsISD::FPCmp, dl, Op.getValueType(), LHS, RHS,
+                 DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32));
+}
+
+SDValue MipsTargetLowering::
+LowerSELECT(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Cond  = Op.getOperand(0);
+  SDValue True  = Op.getOperand(1);
+  SDValue False = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // if the incomming condition comes from a integer compare, the select
+  // operation must be SelectCC or a conditional move if the subtarget
+  // supports it.
+  if (Cond.getOpcode() != MipsISD::FPCmp) {
+    if (Subtarget->hasCondMov() && !True.getValueType().isFloatingPoint())
+      return Op;
+    return DAG.getNode(MipsISD::SelectCC, dl, True.getValueType(),
+                       Cond, True, False);
+  }
+
+  // if the incomming condition comes from fpcmp, the select
+  // operation must use FPSelectCC.
+  SDValue CCNode = Cond.getOperand(2);
+  return DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(),
+                     Cond, True, False, CCNode);
+}
+
+SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+    SDVTList VTs = DAG.getVTList(MVT::i32);
+
+    MipsTargetObjectFile &TLOF = (MipsTargetObjectFile&)getObjFileLowering();
+
+    // %gp_rel relocation
+    if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
+      SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                              MipsII::MO_GPREL);
+      SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1);
+      SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
+      return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode);
+    }
+    // %hi/%lo relocation
+    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                            MipsII::MO_ABS_HILO);
+    SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GA, 1);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA);
+    return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+
+  } else {
+    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                            MipsII::MO_GOT);
+    SDValue ResNode = DAG.getLoad(MVT::i32, dl,
+                                  DAG.getEntryNode(), GA, MachinePointerInfo(),
+                                  false, false, 0);
+    // On functions and global targets not internal linked only
+    // a load from got/GP is necessary for PIC to work.
+    if (!GV->hasLocalLinkage() || isa<Function>(GV))
+      return ResNode;
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA);
+    return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo);
+  }
+
+  llvm_unreachable("Dont know how to handle GlobalAddress");
+  return SDValue(0,0);
+}
+
+SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+    assert(false && "implement LowerBlockAddress for -static");
+    return SDValue(0, 0);
+  }
+  else {
+    // FIXME there isn't actually debug info here
+    DebugLoc dl = Op.getDebugLoc();
+    const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+    SDValue BAGOTOffset = DAG.getBlockAddress(BA, MVT::i32, true,
+                                              MipsII::MO_GOT);
+    SDValue BALOOffset = DAG.getBlockAddress(BA, MVT::i32, true,
+                                             MipsII::MO_ABS_HILO);
+    SDValue Load = DAG.getLoad(MVT::i32, dl,
+                               DAG.getEntryNode(), BAGOTOffset,
+                               MachinePointerInfo(), false, false, 0);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALOOffset);
+    return DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+  }
+}
+
+SDValue MipsTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
+{
+  llvm_unreachable("TLS not implemented for MIPS.");
+  return SDValue(); // Not reached
+}
+
+SDValue MipsTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue ResNode;
+  SDValue HiPart;
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+  unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HILO;
+
+  EVT PtrVT = Op.getValueType();
+  JumpTableSDNode *JT  = cast<JumpTableSDNode>(Op);
+
+  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
+
+  if (!IsPIC) {
+    SDValue Ops[] = { JTI };
+    HiPart = DAG.getNode(MipsISD::Hi, dl, DAG.getVTList(MVT::i32), Ops, 1);
+  } else // Emit Load from Global Pointer
+    HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI,
+                         MachinePointerInfo(),
+                         false, false, 0);
+
+  SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI);
+  ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+
+  return ResNode;
+}
+
+SDValue MipsTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue ResNode;
+  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+  const Constant *C = N->getConstVal();
+  // FIXME there isn't actually debug info here
+  DebugLoc dl = Op.getDebugLoc();
+
+  // gp_rel relocation
+  // FIXME: we should reference the constant pool using small data sections,
+  // but the asm printer currently doens't support this feature without
+  // hacking it. This feature should come soon so we can uncomment the
+  // stuff below.
+  //if (IsInSmallSection(C->getType())) {
+  //  SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP);
+  //  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
+  //  ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode);
+
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+    SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+                                      N->getOffset(), MipsII::MO_ABS_HILO);
+    SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CP);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
+    ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
+  } else {
+    SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+                                      N->getOffset(), MipsII::MO_GOT);
+    SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(),
+                               CP, MachinePointerInfo::getConstantPool(),
+                               false, false, 0);
+    SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP);
+    ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo);
+  }
+
+  return ResNode;
+}
+
+SDValue MipsTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                 getPointerTy());
+
+  // vastart just stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument.
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+                      MachinePointerInfo(SV),
+                      false, false, 0);
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MipsGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TODO: Implement a generic logic using tblgen that can support this.
+// Mips O32 ABI rules:
+// ---
+// i32 - Passed in A0, A1, A2, A3 and stack
+// f32 - Only passed in f32 registers if no int reg has been used yet to hold
+//       an argument. Otherwise, passed in A1, A2, A3 and stack.
+// f64 - Only passed in two aliased f32 registers if no int reg has been used
+//       yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
+//       not used, it must be shadowed. If only A3 is avaiable, shadow it and
+//       go to stack.
+//===----------------------------------------------------------------------===//
+
+static bool CC_MipsO32(unsigned ValNo, MVT ValVT,
+                       MVT LocVT, CCValAssign::LocInfo LocInfo,
+                       ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  static const unsigned IntRegsSize=4, FloatRegsSize=2;
+
+  static const unsigned IntRegs[] = {
+      Mips::A0, Mips::A1, Mips::A2, Mips::A3
+  };
+  static const unsigned F32Regs[] = {
+      Mips::F12, Mips::F14
+  };
+  static const unsigned F64Regs[] = {
+      Mips::D6, Mips::D7
+  };
+
+  unsigned Reg = 0;
+  static bool IntRegUsed = false;
+
+  // This must be the first arg of the call if no regs have been allocated.
+  // Initialize IntRegUsed in that case.
+  if (IntRegs[State.getFirstUnallocated(IntRegs, IntRegsSize)] == Mips::A0 &&
+      F32Regs[State.getFirstUnallocated(F32Regs, FloatRegsSize)] == Mips::F12 &&
+      F64Regs[State.getFirstUnallocated(F64Regs, FloatRegsSize)] == Mips::D6)
+    IntRegUsed = false;
+
+  // Promote i8 and i16
+  if (LocVT == MVT::i8 || LocVT == MVT::i16) {
+    LocVT = MVT::i32;
+    if (ArgFlags.isSExt())
+      LocInfo = CCValAssign::SExt;
+    else if (ArgFlags.isZExt())
+      LocInfo = CCValAssign::ZExt;
+    else
+      LocInfo = CCValAssign::AExt;
+  }
+
+  if (ValVT == MVT::i32) {
+    Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    IntRegUsed = true;
+  } else if (ValVT == MVT::f32) {
+    // An int reg has to be marked allocated regardless of whether or not
+    // IntRegUsed is true.
+    Reg = State.AllocateReg(IntRegs, IntRegsSize);
+
+    if (IntRegUsed) {
+      if (Reg) // Int reg is available
+        LocVT = MVT::i32;
+    } else {
+      unsigned FReg = State.AllocateReg(F32Regs, FloatRegsSize);
+      if (FReg) // F32 reg is available
+        Reg = FReg;
+      else if (Reg) // No F32 regs are available, but an int reg is available.
+        LocVT = MVT::i32;
+    }
+  } else if (ValVT == MVT::f64) {
+    // Int regs have to be marked allocated regardless of whether or not
+    // IntRegUsed is true.
+    Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    if (Reg == Mips::A1)
+      Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    else if (Reg == Mips::A3)
+      Reg = 0;
+    State.AllocateReg(IntRegs, IntRegsSize);
+
+    // At this point, Reg is A0, A2 or 0, and all the unavailable integer regs
+    // are marked as allocated.
+    if (IntRegUsed) {
+      if (Reg)// if int reg is available
+        LocVT = MVT::i32;
+    } else {
+      unsigned FReg = State.AllocateReg(F64Regs, FloatRegsSize);
+      if (FReg) // F64 reg is available.
+        Reg = FReg;
+      else if (Reg) // No F64 regs are available, but an int reg is available.
+        LocVT = MVT::i32;
+    }
+  } else
+    assert(false && "cannot handle this ValVT");
+
+  if (!Reg) {
+    unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+    unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  } else
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+
+  return false; // CC must always match
+}
+
+static bool CC_MipsO32_VarArgs(unsigned ValNo, MVT ValVT,
+                       MVT LocVT, CCValAssign::LocInfo LocInfo,
+                       ISD::ArgFlagsTy ArgFlags, CCState &State) {
+
+  static const unsigned IntRegsSize=4;
+
+  static const unsigned IntRegs[] = {
+      Mips::A0, Mips::A1, Mips::A2, Mips::A3
+  };
+
+  // Promote i8 and i16
+  if (LocVT == MVT::i8 || LocVT == MVT::i16) {
+    LocVT = MVT::i32;
+    if (ArgFlags.isSExt())
+      LocInfo = CCValAssign::SExt;
+    else if (ArgFlags.isZExt())
+      LocInfo = CCValAssign::ZExt;
+    else
+      LocInfo = CCValAssign::AExt;
+  }
+
+  unsigned Reg;
+
+  if (ValVT == MVT::i32 || ValVT == MVT::f32) {
+    Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    LocVT = MVT::i32;
+  } else if (ValVT == MVT::f64) {
+    Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    if (Reg == Mips::A1 || Reg == Mips::A3)
+      Reg = State.AllocateReg(IntRegs, IntRegsSize);
+    State.AllocateReg(IntRegs, IntRegsSize);
+    LocVT = MVT::i32;
+  } else
+    llvm_unreachable("Cannot handle this ValVT.");
+
+  if (!Reg) {
+    unsigned SizeInBytes = ValVT.getSizeInBits() >> 3;
+    unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+  } else
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+
+  return false; // CC must always match
+}
+
+//===----------------------------------------------------------------------===//
+//                  Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerCall - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: isTailCall.
+SDValue
+MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                              CallingConv::ID CallConv, bool isVarArg,
+                              bool &isTailCall,
+                              const SmallVectorImpl<ISD::OutputArg> &Outs,
+                              const SmallVectorImpl<SDValue> &OutVals,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              DebugLoc dl, SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals) const {
+  // MIPs target does not yet support tail call optimization.
+  isTailCall = false;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+                 *DAG.getContext());
+
+  // To meet O32 ABI, Mips must always allocate 16 bytes on
+  // the stack (even if less than 4 are used as arguments)
+  if (Subtarget->isABI_O32()) {
+    int VTsize = MVT(MVT::i32).getSizeInBits()/8;
+    MFI->CreateFixedObject(VTsize, (VTsize*3), true);
+    CCInfo.AnalyzeCallOperands(Outs,
+                     isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32);
+  } else
+    CCInfo.AnalyzeCallOperands(Outs, CC_Mips);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+  // With EABI is it possible to have 16 args on registers.
+  SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // First/LastArgStackLoc contains the first/last
+  // "at stack" argument location.
+  int LastArgStackLoc = 0;
+  unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16);
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    SDValue Arg = OutVals[i];
+    CCValAssign &VA = ArgLocs[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full:
+      if (Subtarget->isABI_O32() && VA.isRegLoc()) {
+        if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32)
+          Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+        if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) {
+          Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
+          SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg,
+                                   DAG.getConstant(0, getPointerTy()));
+          SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg,
+                                   DAG.getConstant(1, getPointerTy()));
+          RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
+          RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi));
+          continue;
+        }
+      }
+      break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    // Arguments that can be passed on register must be kept at
+    // RegsToPass vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      continue;
+    }
+
+    // Register can't get to this point...
+    assert(VA.isMemLoc());
+
+    // Create the frame index object for this incoming parameter
+    // This guarantees that when allocating Local Area the firsts
+    // 16 bytes which are alwayes reserved won't be overwritten
+    // if O32 ABI is used. For EABI the first address is zero.
+    LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset());
+    int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+                                    LastArgStackLoc, true);
+
+    SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+    // emit ISD::STORE whichs stores the
+    // parameter value to a stack Location
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                       MachinePointerInfo(),
+                                       false, false, 0));
+  }
+
+  // Transform all store nodes into one single node because all store
+  // nodes are independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emited instructions must be
+  // stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG;
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+                                getPointerTy(), 0, OpFlag);
+  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+                                getPointerTy(), OpFlag);
+
+  // MipsJmpLink = #chain, #target_address, #opt_in_flags...
+  //             = Chain, Callee, Reg#1, Reg#2, ...
+  //
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain  = DAG.getNode(MipsISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create a stack location to hold GP when PIC is used. This stack
+  // location is used on function prologue to save GP and also after all
+  // emited CALL's to restore GP.
+  if (IsPIC) {
+      // Function can have an arbitrary number of calls, so
+      // hold the LastArgStackLoc with the biggest offset.
+      int FI;
+      MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+      if (LastArgStackLoc >= MipsFI->getGPStackOffset()) {
+        LastArgStackLoc = (!LastArgStackLoc) ? (16) : (LastArgStackLoc+4);
+        // Create the frame index only once. SPOffset here can be anything
+        // (this will be fixed on processFunctionBeforeFrameFinalized)
+        if (MipsFI->getGPStackOffset() == -1) {
+          FI = MFI->CreateFixedObject(4, 0, true);
+          MipsFI->setGPFI(FI);
+        }
+        MipsFI->setGPStackOffset(LastArgStackLoc);
+      }
+
+      // Reload GP value.
+      FI = MipsFI->getGPFI();
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      SDValue GPLoad = DAG.getLoad(MVT::i32, dl, Chain, FIN,
+                                   MachinePointerInfo::getFixedStack(FI),
+                                   false, false, 0);
+      Chain = GPLoad.getValue(1);
+      Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Mips::GP, MVT::i32),
+                               GPLoad, SDValue(0,0));
+      InFlag = Chain.getValue(1);
+  }
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                    CallingConv::ID CallConv, bool isVarArg,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_Mips);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+                               RVLocs[i].getValVT(), InFlag).getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//             Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerFormalArguments - transform physical registers into virtual registers
+/// and generate load operations for arguments places on the stack.
+SDValue
+MipsTargetLowering::LowerFormalArguments(SDValue Chain,
+                                        CallingConv::ID CallConv, bool isVarArg,
+                                        const SmallVectorImpl<ISD::InputArg>
+                                        &Ins,
+                                        DebugLoc dl, SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+  unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
+  MipsFI->setVarArgsFrameIndex(0);
+
+  // Used with vargs to acumulate store chains.
+  std::vector<SDValue> OutChains;
+
+  // Keep track of the last register used for arguments
+  unsigned ArgRegEnd = 0;
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+
+  if (Subtarget->isABI_O32())
+    CCInfo.AnalyzeFormalArguments(Ins,
+                        isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32);
+  else
+    CCInfo.AnalyzeFormalArguments(Ins, CC_Mips);
+
+  SDValue StackPtr;
+
+  unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16);
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    // Arguments stored on registers
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+      ArgRegEnd = VA.getLocReg();
+      TargetRegisterClass *RC = 0;
+
+      if (RegVT == MVT::i32)
+        RC = Mips::CPURegsRegisterClass;
+      else if (RegVT == MVT::f32)
+        RC = Mips::FGR32RegisterClass;
+      else if (RegVT == MVT::f64) {
+        if (!Subtarget->isSingleFloat())
+          RC = Mips::AFGR64RegisterClass;
+      } else
+        llvm_unreachable("RegVT not supported by FormalArguments Lowering");
+
+      // Transform the arguments stored on
+      // physical registers into virtual ones
+      unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+      // If this is an 8 or 16-bit value, it has been passed promoted
+      // to 32 bits.  Insert an assert[sz]ext to capture this, then
+      // truncate to the right size.
+      if (VA.getLocInfo() != CCValAssign::Full) {
+        unsigned Opcode = 0;
+        if (VA.getLocInfo() == CCValAssign::SExt)
+          Opcode = ISD::AssertSext;
+        else if (VA.getLocInfo() == CCValAssign::ZExt)
+          Opcode = ISD::AssertZext;
+        if (Opcode)
+          ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+                                 DAG.getValueType(VA.getValVT()));
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+      }
+
+      // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64
+      if (Subtarget->isABI_O32()) {
+        if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32)
+          ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue);
+        if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) {
+          unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(),
+                                    VA.getLocReg()+1, RC);
+          SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT);
+          SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, ArgValue2, ArgValue);
+          ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Pair);
+        }
+      }
+
+      InVals.push_back(ArgValue);
+    } else { // VA.isRegLoc()
+
+      // sanity check
+      assert(VA.isMemLoc());
+
+      // The last argument is not a register anymore
+      ArgRegEnd = 0;
+
+      // The stack pointer offset is relative to the caller stack frame.
+      // Since the real stack size is unknown here, a negative SPOffset
+      // is used so there's a way to adjust these offsets when the stack
+      // size get known (on EliminateFrameIndex). A dummy SPOffset is
+      // used instead of a direct negative address (which is recorded to
+      // be used on emitPrologue) to avoid mis-calc of the first stack
+      // offset on PEI::calculateFrameObjectOffsets.
+      // Arguments are always 32-bit.
+      unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+      int FI = MFI->CreateFixedObject(ArgSize, 0, true);
+      MipsFI->recordLoadArgsFI(FI, -(ArgSize+
+        (FirstStackArgLoc + VA.getLocMemOffset())));
+
+      // Create load nodes to retrieve arguments from the stack
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   MachinePointerInfo::getFixedStack(FI),
+                                   false, false, 0));
+    }
+  }
+
+  // The mips ABIs for returning structs by value requires that we copy
+  // the sret argument into $v0 for the return. Save the argument into
+  // a virtual register so that we can access it from the return points.
+  if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+    unsigned Reg = MipsFI->getSRetReturnReg();
+    if (!Reg) {
+      Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i32));
+      MipsFI->setSRetReturnReg(Reg);
+    }
+    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+  }
+
+  // To meet ABI, when VARARGS are passed on registers, the registers
+  // must have their values written to the caller stack frame. If the last
+  // argument was placed in the stack, there's no need to save any register.
+  if ((isVarArg) && (Subtarget->isABI_O32() && ArgRegEnd)) {
+    if (StackPtr.getNode() == 0)
+      StackPtr = DAG.getRegister(StackReg, getPointerTy());
+
+    // The last register argument that must be saved is Mips::A3
+    TargetRegisterClass *RC = Mips::CPURegsRegisterClass;
+    unsigned StackLoc = ArgLocs.size()-1;
+
+    for (++ArgRegEnd; ArgRegEnd <= Mips::A3; ++ArgRegEnd, ++StackLoc) {
+      unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32);
+
+      int FI = MFI->CreateFixedObject(4, 0, true);
+      MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
+      SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+      OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff,
+                                       MachinePointerInfo(),
+                                       false, false, 0));
+
+      // Record the frame index of the first variable argument
+      // which is a value necessary to VASTART.
+      if (!MipsFI->getVarArgsFrameIndex())
+        MipsFI->setVarArgsFrameIndex(FI);
+    }
+  }
+
+  // All stores are grouped in one node to allow the matching between
+  // the size of Ins and InVals. This only happens when on varg functions
+  if (!OutChains.empty()) {
+    OutChains.push_back(Chain);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &OutChains[0], OutChains.size());
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//               Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue
+MipsTargetLowering::LowerReturn(SDValue Chain,
+                                CallingConv::ID CallConv, bool isVarArg,
+                                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                const SmallVectorImpl<SDValue> &OutVals,
+                                DebugLoc dl, SelectionDAG &DAG) const {
+
+  // CCValAssign - represent the assignment of
+  // the return value to a location
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
+
+  // If this is the first return lowered for this function, add
+  // the regs to the liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             OutVals[i], Flag);
+
+    // guarantee that all emitted copies are
+    // stuck together, avoiding something bad
+    Flag = Chain.getValue(1);
+  }
+
+  // The mips ABIs for returning structs by value requires that we copy
+  // the sret argument into $v0 for the return. We saved the argument into
+  // a virtual register in the entry block, so now we copy the value out
+  // and into $v0.
+  if (DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+    MachineFunction &MF      = DAG.getMachineFunction();
+    MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+    unsigned Reg = MipsFI->getSRetReturnReg();
+
+    if (!Reg)
+      llvm_unreachable("sret virtual register not created in the entry block");
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+
+    Chain = DAG.getCopyToReg(Chain, dl, Mips::V0, Val, Flag);
+    Flag = Chain.getValue(1);
+  }
+
+  // Return on Mips is always a "jr $ra"
+  if (Flag.getNode())
+    return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
+                       Chain, DAG.getRegister(Mips::RA, MVT::i32), Flag);
+  else // Return Void
+    return DAG.getNode(MipsISD::Ret, dl, MVT::Other,
+                       Chain, DAG.getRegister(Mips::RA, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+//                           Mips Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+MipsTargetLowering::ConstraintType MipsTargetLowering::
+getConstraintType(const std::string &Constraint) const
+{
+  // Mips specific constrainy
+  // GCC config/mips/constraints.md
+  //
+  // 'd' : An address register. Equivalent to r
+  //       unless generating MIPS16 code.
+  // 'y' : Equivalent to r; retained for
+  //       backwards compatibility.
+  // 'f' : Floating Point registers.
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+      default : break;
+      case 'd':
+      case 'y':
+      case 'f':
+        return C_RegisterClass;
+        break;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+MipsTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  const Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'd':
+  case 'y':
+    if (type->isIntegerTy())
+      weight = CW_Register;
+    break;
+  case 'f':
+    if (type->isFloatTy())
+      weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
+/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
+/// return a list of registers that can be used to satisfy the constraint.
+/// This should only be used for C_RegisterClass constraints.
+std::pair<unsigned, const TargetRegisterClass*> MipsTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
+{
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      return std::make_pair(0U, Mips::CPURegsRegisterClass);
+    case 'f':
+      if (VT == MVT::f32)
+        return std::make_pair(0U, Mips::FGR32RegisterClass);
+      if (VT == MVT::f64)
+        if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
+          return std::make_pair(0U, Mips::AFGR64RegisterClass);
+    }
+  }
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::vector<unsigned> MipsTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                  EVT VT) const
+{
+  if (Constraint.size() != 1)
+    return std::vector<unsigned>();
+
+  switch (Constraint[0]) {
+    default : break;
+    case 'r':
+    // GCC Mips Constraint Letters
+    case 'd':
+    case 'y':
+      return make_vector<unsigned>(Mips::T0, Mips::T1, Mips::T2, Mips::T3,
+             Mips::T4, Mips::T5, Mips::T6, Mips::T7, Mips::S0, Mips::S1,
+             Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7,
+             Mips::T8, 0);
+
+    case 'f':
+      if (VT == MVT::f32) {
+        if (Subtarget->isSingleFloat())
+          return make_vector<unsigned>(Mips::F2, Mips::F3, Mips::F4, Mips::F5,
+                 Mips::F6, Mips::F7, Mips::F8, Mips::F9, Mips::F10, Mips::F11,
+                 Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24,
+                 Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29,
+                 Mips::F30, Mips::F31, 0);
+        else
+          return make_vector<unsigned>(Mips::F2, Mips::F4, Mips::F6, Mips::F8,
+                 Mips::F10, Mips::F20, Mips::F22, Mips::F24, Mips::F26,
+                 Mips::F28, Mips::F30, 0);
+      }
+
+      if (VT == MVT::f64)
+        if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
+          return make_vector<unsigned>(Mips::D1, Mips::D2, Mips::D3, Mips::D4,
+                 Mips::D5, Mips::D10, Mips::D11, Mips::D12, Mips::D13,
+                 Mips::D14, Mips::D15, 0);
+  }
+  return std::vector<unsigned>();
+}
+
+bool
+MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The Mips target isn't yet aware of offsets.
+  return false;
+}
+
+bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  if (VT != MVT::f32 && VT != MVT::f64)
+    return false;
+  if (Imm.isNegZero())
+    return false;
+  return Imm.isZero();
+}
diff --git a/final/lib/Target/Mips/MipsISelLowering.h b/final/lib/Target/Mips/MipsISelLowering.h
new file mode 100644
index 00000000000..06c88d77f0d
--- /dev/null
+++ b/final/lib/Target/Mips/MipsISelLowering.h
@@ -0,0 +1,174 @@
+//===-- MipsISelLowering.h - Mips DAG Lowering Interface --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Mips uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MipsISELLOWERING_H
+#define MipsISELLOWERING_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "Mips.h"
+#include "MipsSubtarget.h"
+
+namespace llvm {
+  namespace MipsISD {
+    enum NodeType {
+      // Start the numbering from where ISD NodeType finishes.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      // Jump and link (call)
+      JmpLink,
+
+      // Get the Higher 16 bits from a 32-bit immediate
+      // No relation with Mips Hi register
+      Hi,
+
+      // Get the Lower 16 bits from a 32-bit immediate
+      // No relation with Mips Lo register
+      Lo,
+
+      // Handle gp_rel (small data/bss sections) relocation.
+      GPRel,
+
+      // Select CC Pseudo Instruction
+      SelectCC,
+
+      // Floating Point Select CC Pseudo Instruction
+      FPSelectCC,
+
+      // Floating Point Branch Conditional
+      FPBrcond,
+
+      // Floating Point Compare
+      FPCmp,
+
+      // Floating Point Rounding
+      FPRound,
+
+      // Return
+      Ret,
+
+      // MAdd/Sub nodes
+      MAdd,
+      MAddu,
+      MSub,
+      MSubu,
+
+      // DivRem(u)
+      DivRem,
+      DivRemU
+    };
+  }
+
+  //===--------------------------------------------------------------------===//
+  // TargetLowering Implementation
+  //===--------------------------------------------------------------------===//
+
+  class MipsTargetLowering : public TargetLowering  {
+  public:
+    explicit MipsTargetLowering(MipsTargetMachine &TM);
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// getTargetNodeName - This method returns the name of a target specific
+    //  DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    /// getSetCCResultType - get the ISD::SETCC result ValueType
+    MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  private:
+    // Subtarget Info
+    const MipsSubtarget *Subtarget;
+
+
+    // Lower Operand helpers
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+
+    // Lower Operand specifics
+    SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
+
+    // Inline asm support
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+              getRegForInlineAsmConstraint(const std::string &Constraint,
+              EVT VT) const;
+
+    std::vector<unsigned>
+    getRegClassForInlineAsmConstraint(const std::string &Constraint,
+              EVT VT) const;
+
+    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+  };
+}
+
+#endif // MipsISELLOWERING_H
diff --git a/final/lib/Target/Mips/MipsInstrFPU.td b/final/lib/Target/Mips/MipsInstrFPU.td
new file mode 100644
index 00000000000..2cdece94b0a
--- /dev/null
+++ b/final/lib/Target/Mips/MipsInstrFPU.td
@@ -0,0 +1,316 @@
+//===- MipsInstrFPU.td - Mips FPU Instruction Information --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Floating Point Instructions
+// ------------------------
+// * 64bit fp:
+//    - 32 64-bit registers (default mode)
+//    - 16 even 32-bit registers (32-bit compatible mode) for
+//      single and double access.
+// * 32bit fp:
+//    - 16 even 32-bit registers - single and double (aliased)
+//    - 32 32-bit registers (within single-only mode)
+//===----------------------------------------------------------------------===//
+
+// Floating Point Compare and Branch
+def SDT_MipsFPBrcond : SDTypeProfile<0, 3, [SDTCisSameAs<0, 2>, SDTCisInt<0>,
+                                     SDTCisVT<1, OtherVT>]>;
+def SDT_MipsFPCmp : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
+                                         SDTCisSameAs<1, 2>, SDTCisFP<1>,
+                                         SDTCisInt<3>]>;
+def SDT_MipsFPSelectCC : SDTypeProfile<1, 4, [SDTCisInt<1>, SDTCisInt<4>,
+                                  SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>;
+
+def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInGlue]>;
+def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond,
+                          [SDNPHasChain]>;
+def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp>;
+def MipsFPSelectCC : SDNode<"MipsISD::FPSelectCC", SDT_MipsFPSelectCC>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printFCCOperand" in
+  def condcode : Operand<i32>;
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+def In32BitMode      : Predicate<"!Subtarget.isFP64bit()">;
+def IsSingleFloat    : Predicate<"Subtarget.isSingleFloat()">;
+def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">;
+def IsNotMipsI       : Predicate<"!Subtarget.isMips1()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//
+// A set of multiclasses is used to address the register usage.
+//
+// S32 - single precision in 16 32bit even fp registers
+//       single precision in 32 32bit fp registers in SingleOnly mode
+// S64 - single precision in 32 64bit fp registers (In64BitMode)
+// D32 - double precision in 16 32bit even fp registers
+// D64 - double precision in 32 64bit fp registers (In64BitMode)
+//
+// Only S32 and D32 are supported right now.
+//===----------------------------------------------------------------------===//
+
+multiclass FFR1_1<bits<6> funct, string asmstr>
+{
+  def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
+      !strconcat(asmstr, ".s $fd, $fs"), []>;
+
+  def _D32  : FFR<0x11, funct, 0x1, (outs FGR32:$fd), (ins AFGR64:$fs),
+      !strconcat(asmstr, ".d $fd, $fs"), []>, Requires<[In32BitMode]>;
+}
+
+multiclass FFR1_2<bits<6> funct, string asmstr, SDNode FOp>
+{
+  def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
+                 !strconcat(asmstr, ".s $fd, $fs"),
+                 [(set FGR32:$fd, (FOp FGR32:$fs))]>;
+
+  def _D32  : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
+                 !strconcat(asmstr, ".d $fd, $fs"),
+                 [(set AFGR64:$fd, (FOp AFGR64:$fs))]>, Requires<[In32BitMode]>;
+}
+
+class FFR1_3<bits<6> funct, bits<5> fmt, RegisterClass RcSrc,
+              RegisterClass RcDst, string asmstr>:
+  FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs),
+      !strconcat(asmstr, " $fd, $fs"), []>;
+
+
+multiclass FFR1_4<bits<6> funct, string asmstr, SDNode FOp> {
+  def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd),
+                 (ins FGR32:$fs, FGR32:$ft),
+                 !strconcat(asmstr, ".s $fd, $fs, $ft"),
+                 [(set FGR32:$fd, (FOp FGR32:$fs, FGR32:$ft))]>;
+
+  def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd),
+                 (ins AFGR64:$fs, AFGR64:$ft),
+                 !strconcat(asmstr, ".d $fd, $fs, $ft"),
+                 [(set AFGR64:$fd, (FOp AFGR64:$fs, AFGR64:$ft))]>,
+                 Requires<[In32BitMode]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Instructions
+//===----------------------------------------------------------------------===//
+
+let ft = 0 in {
+  defm FLOOR_W : FFR1_1<0b001111, "floor.w">;
+  defm CEIL_W  : FFR1_1<0b001110, "ceil.w">;
+  defm ROUND_W : FFR1_1<0b001100, "round.w">;
+  defm TRUNC_W : FFR1_1<0b001101, "trunc.w">;
+  defm CVTW    : FFR1_1<0b100100, "cvt.w">;
+
+  defm FABS    : FFR1_2<0b000101, "abs",  fabs>;
+  defm FNEG    : FFR1_2<0b000111, "neg",  fneg>;
+  defm FSQRT   : FFR1_2<0b000100, "sqrt", fsqrt>;
+
+  /// Convert to Single Precison
+  def CVTS_W32 : FFR1_3<0b100000, 0x2, FGR32,  FGR32,  "cvt.s.w">;
+
+  let Predicates = [IsNotSingleFloat] in {
+    /// Ceil to long signed integer
+    def CEIL_LS   : FFR1_3<0b001010, 0x0, FGR32, FGR32, "ceil.l">;
+    def CEIL_LD   : FFR1_3<0b001010, 0x1, AFGR64, AFGR64, "ceil.l">;
+
+    /// Round to long signed integer
+    def ROUND_LS  : FFR1_3<0b001000, 0x0, FGR32, FGR32, "round.l">;
+    def ROUND_LD  : FFR1_3<0b001000, 0x1, AFGR64, AFGR64, "round.l">;
+
+    /// Floor to long signed integer
+    def FLOOR_LS  : FFR1_3<0b001011, 0x0, FGR32, FGR32, "floor.l">;
+    def FLOOR_LD  : FFR1_3<0b001011, 0x1, AFGR64, AFGR64, "floor.l">;
+
+    /// Trunc to long signed integer
+    def TRUNC_LS  : FFR1_3<0b001001, 0x0, FGR32, FGR32, "trunc.l">;
+    def TRUNC_LD  : FFR1_3<0b001001, 0x1, AFGR64, AFGR64, "trunc.l">;
+
+    /// Convert to long signed integer
+    def CVTL_S    : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">;
+    def CVTL_D    : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">;
+
+    /// Convert to Double Precison
+    def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">;
+    def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">;
+    def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">;
+
+    /// Convert to Single Precison
+    def CVTS_D32 : FFR1_3<0b100000, 0x1, FGR32, AFGR64, "cvt.s.d">;
+    def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">;
+  }
+}
+
+// The odd-numbered registers are only referenced when doing loads,
+// stores, and moves between floating-point and integer registers.
+// When defining instructions, we reference all 32-bit registers,
+// regardless of register aliasing.
+let fd = 0 in {
+  /// Move Control Registers From/To CPU Registers
+  def CFC1  : FFR<0x11, 0x0, 0x2, (outs CPURegs:$rt), (ins CCR:$fs),
+                  "cfc1 $rt, $fs", []>;
+
+  def CTC1  : FFR<0x11, 0x0, 0x6, (outs CCR:$rt), (ins CPURegs:$fs),
+                  "ctc1 $fs, $rt", []>;
+
+  def MFC1  : FFR<0x11, 0x00, 0x00, (outs CPURegs:$rt), (ins FGR32:$fs),
+                  "mfc1 $rt, $fs", []>;
+
+  def MTC1  : FFR<0x11, 0x00, 0x04, (outs FGR32:$fs), (ins CPURegs:$rt),
+                  "mtc1 $rt, $fs", []>;
+}
+
+def FMOV_S32 : FFR<0x11, 0b000110, 0x0, (outs FGR32:$fd), (ins FGR32:$fs),
+                   "mov.s $fd, $fs", []>;
+def FMOV_D32 : FFR<0x11, 0b000110, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs),
+                   "mov.d $fd, $fs", []>;
+
+/// Floating Point Memory Instructions
+let Predicates = [IsNotSingleFloat, IsNotMipsI] in {
+  def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr),
+                 "ldc1 $ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>;
+
+  def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr),
+                 "sdc1 $ft, $addr", [(store AFGR64:$ft, addr:$addr)]>;
+}
+
+// LWC1 and SWC1 can always be emited with odd registers.
+def LWC1  : FFI<0b110001, (outs FGR32:$ft), (ins mem:$addr), "lwc1 $ft, $addr",
+               [(set FGR32:$ft, (load addr:$addr))]>;
+def SWC1  : FFI<0b111001, (outs), (ins FGR32:$ft, mem:$addr), "swc1 $ft, $addr",
+               [(store FGR32:$ft, addr:$addr)]>;
+
+/// Floating-point Aritmetic
+defm FADD : FFR1_4<0x10, "add", fadd>;
+defm FDIV : FFR1_4<0x03, "div", fdiv>;
+defm FMUL : FFR1_4<0x02, "mul", fmul>;
+defm FSUB : FFR1_4<0x01, "sub", fsub>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Branch Codes
+//===----------------------------------------------------------------------===//
+// Mips branch codes. These correspond to condcode in MipsInstrInfo.h.
+// They must be kept in synch.
+def MIPS_BRANCH_F  : PatLeaf<(i32 0)>;
+def MIPS_BRANCH_T  : PatLeaf<(i32 1)>;
+def MIPS_BRANCH_FL : PatLeaf<(i32 2)>;
+def MIPS_BRANCH_TL : PatLeaf<(i32 3)>;
+
+/// Floating Point Branch of False/True (Likely)
+let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in {
+  class FBRANCH<PatLeaf op, string asmstr> : FFI<0x11, (outs),
+        (ins brtarget:$dst), !strconcat(asmstr, " $dst"),
+        [(MipsFPBrcond op, bb:$dst, FCR31)]>;
+}
+def BC1F  : FBRANCH<MIPS_BRANCH_F,  "bc1f">;
+def BC1T  : FBRANCH<MIPS_BRANCH_T,  "bc1t">;
+def BC1FL : FBRANCH<MIPS_BRANCH_FL, "bc1fl">;
+def BC1TL : FBRANCH<MIPS_BRANCH_TL, "bc1tl">;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Flag Conditions
+//===----------------------------------------------------------------------===//
+// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h.
+// They must be kept in synch.
+def MIPS_FCOND_F    : PatLeaf<(i32 0)>;
+def MIPS_FCOND_UN   : PatLeaf<(i32 1)>;
+def MIPS_FCOND_EQ   : PatLeaf<(i32 2)>;
+def MIPS_FCOND_UEQ  : PatLeaf<(i32 3)>;
+def MIPS_FCOND_OLT  : PatLeaf<(i32 4)>;
+def MIPS_FCOND_ULT  : PatLeaf<(i32 5)>;
+def MIPS_FCOND_OLE  : PatLeaf<(i32 6)>;
+def MIPS_FCOND_ULE  : PatLeaf<(i32 7)>;
+def MIPS_FCOND_SF   : PatLeaf<(i32 8)>;
+def MIPS_FCOND_NGLE : PatLeaf<(i32 9)>;
+def MIPS_FCOND_SEQ  : PatLeaf<(i32 10)>;
+def MIPS_FCOND_NGL  : PatLeaf<(i32 11)>;
+def MIPS_FCOND_LT   : PatLeaf<(i32 12)>;
+def MIPS_FCOND_NGE  : PatLeaf<(i32 13)>;
+def MIPS_FCOND_LE   : PatLeaf<(i32 14)>;
+def MIPS_FCOND_NGT  : PatLeaf<(i32 15)>;
+
+/// Floating Point Compare
+let hasDelaySlot = 1, Defs=[FCR31] in {
+  def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc),
+      "c.$cc.s $fs, $ft",
+        [(set FCR31, (MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc))]>;
+
+  def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc),
+      "c.$cc.d $fs, $ft",
+       [(set FCR31, (MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc))]>,
+      Requires<[In32BitMode]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Floating Point Pseudo-Instructions
+//===----------------------------------------------------------------------===//
+
+// For some explanation, see Select_CC at MipsInstrInfo.td. We also embedd a
+// condiciton code to enable easy handling by the Custom Inserter.
+let usesCustomInserter = 1, Uses=[FCR31] in {
+  class PseudoFPSelCC<RegisterClass RC, string asmstr> :
+    MipsPseudo<(outs RC:$dst),
+               (ins CPURegs:$CmpRes, RC:$T, RC:$F, condcode:$cc), asmstr,
+               [(set RC:$dst, (MipsFPSelectCC CPURegs:$CmpRes, RC:$T, RC:$F,
+                 imm:$cc))]>;
+}
+
+// The values to be selected are fp but the condition test is with integers.
+def Select_CC_S32 : PseudoSelCC<FGR32, "# MipsSelect_CC_S32_f32">;
+def Select_CC_D32 : PseudoSelCC<AFGR64, "# MipsSelect_CC_D32_f32">,
+                    Requires<[In32BitMode]>;
+
+// The values to be selected are int but the condition test is done with fp.
+def Select_FCC     : PseudoFPSelCC<CPURegs, "# MipsSelect_FCC">;
+
+// The values to be selected and the condition test is done with fp.
+def Select_FCC_S32 : PseudoFPSelCC<FGR32, "# MipsSelect_FCC_S32_f32">;
+def Select_FCC_D32 : PseudoFPSelCC<AFGR64, "# MipsSelect_FCC_D32_f32">,
+                     Requires<[In32BitMode]>;
+
+def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src),
+                             "# MOVCCRToCCR", []>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Patterns
+//===----------------------------------------------------------------------===//
+def fpimm0 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimm0neg : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(-0.0);
+}]>;
+
+def : Pat<(f32 fpimm0), (MTC1 ZERO)>;
+def : Pat<(f32 fpimm0neg), (FNEG_S32 (MTC1 ZERO))>;
+
+def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVTS_W32 (MTC1 CPURegs:$src))>;
+def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVTD_W32 (MTC1 CPURegs:$src))>;
+
+def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S32 FGR32:$src))>;
+
+def : Pat<(i32 (bitconvert FGR32:$src)),  (MFC1 FGR32:$src)>;
+def : Pat<(f32 (bitconvert CPURegs:$src)), (MTC1 CPURegs:$src)>;
+
+let Predicates = [In32BitMode] in {
+  def : Pat<(f32 (fround AFGR64:$src)), (CVTS_D32 AFGR64:$src)>;
+  def : Pat<(f64 (fextend FGR32:$src)), (CVTD_S32 FGR32:$src)>;
+}
+
+// MipsFPRound is only emitted for MipsI targets.
+def : Pat<(f32 (MipsFPRound AFGR64:$src)), (CVTW_D32 AFGR64:$src)>;
+
diff --git a/final/lib/Target/Mips/MipsInstrFormats.td b/final/lib/Target/Mips/MipsInstrFormats.td
new file mode 100644
index 00000000000..0120f59026c
--- /dev/null
+++ b/final/lib/Target/Mips/MipsInstrFormats.td
@@ -0,0 +1,182 @@
+//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Describe MIPS instructions format
+//
+//  CPU INSTRUCTION FORMATS
+//
+//  opcode  - operation code.
+//  rs      - src reg.
+//  rt      - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr).
+//  rd      - dst reg, only used on 3 regs instr.
+//  shamt   - only used on shift instructions, contains the shift amount.
+//  funct   - combined with opcode field give us an operation code.
+//
+//===----------------------------------------------------------------------===//
+
+// Generic Mips Format
+class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
+               InstrItinClass itin>: Instruction
+{
+  field bits<32> Inst;
+
+  let Namespace = "Mips";
+
+  bits<6> opcode;
+
+  // Top 5 bits are the 'opcode' field
+  let Inst{31-26} = opcode;
+
+  dag OutOperandList = outs;
+  dag InOperandList  = ins;
+
+  let AsmString   = asmstr;
+  let Pattern     = pattern;
+  let Itinerary   = itin;
+}
+
+// Mips Pseudo Instructions Format
+class MipsPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
+      MipsInst<outs, ins, asmstr, pattern, IIPseudo>;
+
+//===----------------------------------------------------------------------===//
+// Format R instruction class in Mips : <|opcode|rs|rt|rd|shamt|funct|>
+//===----------------------------------------------------------------------===//
+
+class FR<bits<6> op, bits<6> _funct, dag outs, dag ins, string asmstr,
+         list<dag> pattern, InstrItinClass itin>:
+      MipsInst<outs, ins, asmstr, pattern, itin>
+{
+  bits<5>  rd;
+  bits<5>  rs;
+  bits<5>  rt;
+  bits<5>  shamt;
+  bits<6>  funct;
+
+  let opcode = op;
+  let funct  = _funct;
+
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = rd;
+  let Inst{10-6}  = shamt;
+  let Inst{5-0}   = funct;
+}
+
+//===----------------------------------------------------------------------===//
+// Format I instruction class in Mips : <|opcode|rs|rt|immediate|>
+//===----------------------------------------------------------------------===//
+
+class FI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+         InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
+{
+  bits<5>  rt;
+  bits<5>  rs;
+  bits<16> imm16;
+
+  let opcode = op;
+
+  let Inst{25-21} = rs;
+  let Inst{20-16} = rt;
+  let Inst{15-0}  = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Format J instruction class in Mips : <|opcode|address|>
+//===----------------------------------------------------------------------===//
+
+class FJ<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+         InstrItinClass itin>: MipsInst<outs, ins, asmstr, pattern, itin>
+{
+  bits<26> addr;
+
+  let opcode = op;
+
+  let Inst{25-0} = addr;
+}
+
+//===----------------------------------------------------------------------===//
+//
+//  FLOATING POINT INSTRUCTION FORMATS
+//
+//  opcode  - operation code.
+//  fs      - src reg.
+//  ft      - dst reg (on a 2 regs instr) or src reg (on a 3 reg instr).
+//  fd      - dst reg, only used on 3 regs instr.
+//  fmt     - double or single precision.
+//  funct   - combined with opcode field give us an operation code.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Format FR instruction class in Mips : <|opcode|fmt|ft|fs|fd|funct|>
+//===----------------------------------------------------------------------===//
+
+class FFR<bits<6> op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins,
+          string asmstr, list<dag> pattern> :
+          MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+  bits<5>  fd;
+  bits<5>  fs;
+  bits<5>  ft;
+  bits<5>  fmt;
+  bits<6>  funct;
+
+  let opcode = op;
+  let funct  = _funct;
+  let fmt    = _fmt;
+
+  let Inst{25-21} = fmt;
+  let Inst{20-16} = ft;
+  let Inst{15-11} = fs;
+  let Inst{10-6}  = fd;
+  let Inst{5-0}   = funct;
+}
+
+//===----------------------------------------------------------------------===//
+// Format FI instruction class in Mips : <|opcode|base|ft|immediate|>
+//===----------------------------------------------------------------------===//
+
+class FFI<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern>:
+          MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+  bits<5>  ft;
+  bits<5>  base;
+  bits<16> imm16;
+
+  let opcode = op;
+
+  let Inst{25-21} = base;
+  let Inst{20-16} = ft;
+  let Inst{15-0}  = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Compare instruction class in Mips : <|010001|fmt|ft|fs|0000011|condcode|>
+//===----------------------------------------------------------------------===//
+
+class FCC<bits<5> _fmt, dag outs, dag ins, string asmstr, list<dag> pattern> :
+          MipsInst<outs, ins, asmstr, pattern, NoItinerary>
+{
+  bits<5>  fs;
+  bits<5>  ft;
+  bits<4>  cc;
+  bits<5>  fmt;
+
+  let opcode = 0x11;
+  let fmt    = _fmt;
+
+  let Inst{25-21} = fmt;
+  let Inst{20-16} = ft;
+  let Inst{15-11} = fs;
+  let Inst{10-6}  = 0;
+  let Inst{5-4}   = 0b11;
+  let Inst{3-0}   = cc;
+}
diff --git a/final/lib/Target/Mips/MipsInstrInfo.cpp b/final/lib/Target/Mips/MipsInstrInfo.cpp
new file mode 100644
index 00000000000..614dd1f71ef
--- /dev/null
+++ b/final/lib/Target/Mips/MipsInstrInfo.cpp
@@ -0,0 +1,522 @@
+//===- MipsInstrInfo.cpp - Mips Instruction Information ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsInstrInfo.h"
+#include "MipsTargetMachine.h"
+#include "MipsMachineFunction.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "MipsGenInstrInfo.inc"
+
+using namespace llvm;
+
+MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
+  : TargetInstrInfoImpl(MipsInsts, array_lengthof(MipsInsts)),
+    TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+static bool isZeroImm(const MachineOperand &op) {
+  return op.isImm() && op.getImm() == 0;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MipsInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+  if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) ||
+      (MI->getOpcode() == Mips::LDC1)) {
+    if ((MI->getOperand(2).isFI()) && // is a stack slot
+        (MI->getOperand(1).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(1)))) {
+      FrameIndex = MI->getOperand(2).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+
+  return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MipsInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
+{
+  if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) ||
+      (MI->getOpcode() == Mips::SDC1)) {
+    if ((MI->getOperand(2).isFI()) && // is a stack slot
+        (MI->getOperand(1).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(1)))) {
+      FrameIndex = MI->getOperand(2).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+  return 0;
+}
+
+/// insertNoop - If data hazard condition is found insert the target nop
+/// instruction.
+void MipsInstrInfo::
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
+{
+  DebugLoc DL;
+  BuildMI(MBB, MI, DL, get(Mips::NOP));
+}
+
+void MipsInstrInfo::
+copyPhysReg(MachineBasicBlock &MBB,
+            MachineBasicBlock::iterator I, DebugLoc DL,
+            unsigned DestReg, unsigned SrcReg,
+            bool KillSrc) const {
+  bool DestCPU = Mips::CPURegsRegClass.contains(DestReg);
+  bool SrcCPU  = Mips::CPURegsRegClass.contains(SrcReg);
+
+  // CPU-CPU is the most common.
+  if (DestCPU && SrcCPU) {
+    BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
+  // Copy to CPU from other registers.
+  if (DestCPU) {
+    if (Mips::CCRRegClass.contains(SrcReg))
+      BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+    else if (Mips::FGR32RegClass.contains(SrcReg))
+      BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+    else if (SrcReg == Mips::HI)
+      BuildMI(MBB, I, DL, get(Mips::MFHI), DestReg);
+    else if (SrcReg == Mips::LO)
+      BuildMI(MBB, I, DL, get(Mips::MFLO), DestReg);
+    else
+      llvm_unreachable("Copy to CPU from invalid register");
+    return;
+  }
+
+  // Copy to other registers from CPU.
+  if (SrcCPU) {
+    if (Mips::CCRRegClass.contains(DestReg))
+      BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+    else if (Mips::FGR32RegClass.contains(DestReg))
+      BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+    else if (DestReg == Mips::HI)
+      BuildMI(MBB, I, DL, get(Mips::MTHI))
+        .addReg(SrcReg, getKillRegState(KillSrc));
+    else if (DestReg == Mips::LO)
+      BuildMI(MBB, I, DL, get(Mips::MTLO))
+        .addReg(SrcReg, getKillRegState(KillSrc));
+    else
+      llvm_unreachable("Copy from CPU to invalid register");
+    return;
+  }
+
+  if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
+  if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
+  if (Mips::CCRRegClass.contains(DestReg, SrcReg)) {
+    BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+  llvm_unreachable("Cannot copy registers");
+}
+
+void MipsInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  if (RC == Mips::CPURegsRegisterClass)
+    BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill))
+          .addImm(0).addFrameIndex(FI);
+  else if (RC == Mips::FGR32RegisterClass)
+    BuildMI(MBB, I, DL, get(Mips::SWC1)).addReg(SrcReg, getKillRegState(isKill))
+          .addImm(0).addFrameIndex(FI);
+  else if (RC == Mips::AFGR64RegisterClass) {
+    if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
+      BuildMI(MBB, I, DL, get(Mips::SDC1))
+        .addReg(SrcReg, getKillRegState(isKill))
+        .addImm(0).addFrameIndex(FI);
+    } else {
+      const TargetRegisterInfo *TRI =
+        MBB.getParent()->getTarget().getRegisterInfo();
+      const unsigned *SubSet = TRI->getSubRegisters(SrcReg);
+      BuildMI(MBB, I, DL, get(Mips::SWC1))
+        .addReg(SubSet[0], getKillRegState(isKill))
+        .addImm(0).addFrameIndex(FI);
+      BuildMI(MBB, I, DL, get(Mips::SWC1))
+        .addReg(SubSet[1], getKillRegState(isKill))
+        .addImm(4).addFrameIndex(FI);
+    }
+  } else
+    llvm_unreachable("Register class not handled!");
+}
+
+void MipsInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const
+{
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  if (RC == Mips::CPURegsRegisterClass)
+    BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addImm(0).addFrameIndex(FI);
+  else if (RC == Mips::FGR32RegisterClass)
+    BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addImm(0).addFrameIndex(FI);
+  else if (RC == Mips::AFGR64RegisterClass) {
+    if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
+      BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addImm(0).addFrameIndex(FI);
+    } else {
+      const TargetRegisterInfo *TRI =
+        MBB.getParent()->getTarget().getRegisterInfo();
+      const unsigned *SubSet = TRI->getSubRegisters(DestReg);
+      BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0])
+        .addImm(0).addFrameIndex(FI);
+      BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[1])
+        .addImm(4).addFrameIndex(FI);
+    }
+  } else
+    llvm_unreachable("Register class not handled!");
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+
+/// GetCondFromBranchOpc - Return the Mips CC that matches
+/// the correspondent Branch instruction opcode.
+static Mips::CondCode GetCondFromBranchOpc(unsigned BrOpc)
+{
+  switch (BrOpc) {
+  default: return Mips::COND_INVALID;
+  case Mips::BEQ  : return Mips::COND_E;
+  case Mips::BNE  : return Mips::COND_NE;
+  case Mips::BGTZ : return Mips::COND_GZ;
+  case Mips::BGEZ : return Mips::COND_GEZ;
+  case Mips::BLTZ : return Mips::COND_LZ;
+  case Mips::BLEZ : return Mips::COND_LEZ;
+
+  // We dont do fp branch analysis yet!
+  case Mips::BC1T :
+  case Mips::BC1F : return Mips::COND_INVALID;
+  }
+}
+
+/// GetCondBranchFromCond - Return the Branch instruction
+/// opcode that matches the cc.
+unsigned Mips::GetCondBranchFromCond(Mips::CondCode CC)
+{
+  switch (CC) {
+  default: llvm_unreachable("Illegal condition code!");
+  case Mips::COND_E   : return Mips::BEQ;
+  case Mips::COND_NE  : return Mips::BNE;
+  case Mips::COND_GZ  : return Mips::BGTZ;
+  case Mips::COND_GEZ : return Mips::BGEZ;
+  case Mips::COND_LZ  : return Mips::BLTZ;
+  case Mips::COND_LEZ : return Mips::BLEZ;
+
+  case Mips::FCOND_F:
+  case Mips::FCOND_UN:
+  case Mips::FCOND_EQ:
+  case Mips::FCOND_UEQ:
+  case Mips::FCOND_OLT:
+  case Mips::FCOND_ULT:
+  case Mips::FCOND_OLE:
+  case Mips::FCOND_ULE:
+  case Mips::FCOND_SF:
+  case Mips::FCOND_NGLE:
+  case Mips::FCOND_SEQ:
+  case Mips::FCOND_NGL:
+  case Mips::FCOND_LT:
+  case Mips::FCOND_NGE:
+  case Mips::FCOND_LE:
+  case Mips::FCOND_NGT: return Mips::BC1T;
+
+  case Mips::FCOND_T:
+  case Mips::FCOND_OR:
+  case Mips::FCOND_NEQ:
+  case Mips::FCOND_OGL:
+  case Mips::FCOND_UGE:
+  case Mips::FCOND_OGE:
+  case Mips::FCOND_UGT:
+  case Mips::FCOND_OGT:
+  case Mips::FCOND_ST:
+  case Mips::FCOND_GLE:
+  case Mips::FCOND_SNE:
+  case Mips::FCOND_GL:
+  case Mips::FCOND_NLT:
+  case Mips::FCOND_GE:
+  case Mips::FCOND_NLE:
+  case Mips::FCOND_GT: return Mips::BC1F;
+  }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified
+/// condition, e.g. turning COND_E to COND_NE.
+Mips::CondCode Mips::GetOppositeBranchCondition(Mips::CondCode CC)
+{
+  switch (CC) {
+  default: llvm_unreachable("Illegal condition code!");
+  case Mips::COND_E   : return Mips::COND_NE;
+  case Mips::COND_NE  : return Mips::COND_E;
+  case Mips::COND_GZ  : return Mips::COND_LEZ;
+  case Mips::COND_GEZ : return Mips::COND_LZ;
+  case Mips::COND_LZ  : return Mips::COND_GEZ;
+  case Mips::COND_LEZ : return Mips::COND_GZ;
+  case Mips::FCOND_F  : return Mips::FCOND_T;
+  case Mips::FCOND_UN : return Mips::FCOND_OR;
+  case Mips::FCOND_EQ : return Mips::FCOND_NEQ;
+  case Mips::FCOND_UEQ: return Mips::FCOND_OGL;
+  case Mips::FCOND_OLT: return Mips::FCOND_UGE;
+  case Mips::FCOND_ULT: return Mips::FCOND_OGE;
+  case Mips::FCOND_OLE: return Mips::FCOND_UGT;
+  case Mips::FCOND_ULE: return Mips::FCOND_OGT;
+  case Mips::FCOND_SF:  return Mips::FCOND_ST;
+  case Mips::FCOND_NGLE:return Mips::FCOND_GLE;
+  case Mips::FCOND_SEQ: return Mips::FCOND_SNE;
+  case Mips::FCOND_NGL: return Mips::FCOND_GL;
+  case Mips::FCOND_LT:  return Mips::FCOND_NLT;
+  case Mips::FCOND_NGE: return Mips::FCOND_GE;
+  case Mips::FCOND_LE:  return Mips::FCOND_NLE;
+  case Mips::FCOND_NGT: return Mips::FCOND_GT;
+  }
+}
+
+bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                  MachineBasicBlock *&TBB,
+                                  MachineBasicBlock *&FBB,
+                                  SmallVectorImpl<MachineOperand> &Cond,
+                                  bool AllowModify) const
+{
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  unsigned LastOpc = LastInst->getOpcode();
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (!LastInst->getDesc().isBranch())
+      return true;
+
+    // Unconditional branch
+    if (LastOpc == Mips::J) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+
+    Mips::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
+    if (BranchCode == Mips::COND_INVALID)
+      return true;  // Can't handle indirect branch.
+
+    // Conditional branch
+    // Block ends with fall-through condbranch.
+    if (LastOpc != Mips::COND_INVALID) {
+      int LastNumOp = LastInst->getNumOperands();
+
+      TBB = LastInst->getOperand(LastNumOp-1).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(BranchCode));
+
+      for (int i=0; i<LastNumOp-1; i++) {
+        Cond.push_back(LastInst->getOperand(i));
+      }
+
+      return false;
+    }
+  }
+
+  // Get the instruction before it if it is a terminator.
+  MachineInstr *SecondLastInst = I;
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+    return true;
+
+  // If the block ends with Mips::J and a Mips::BNE/Mips::BEQ, handle it.
+  unsigned SecondLastOpc    = SecondLastInst->getOpcode();
+  Mips::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);
+
+  if (BranchCode != Mips::COND_INVALID && LastOpc == Mips::J) {
+    int SecondNumOp = SecondLastInst->getNumOperands();
+
+    TBB = SecondLastInst->getOperand(SecondNumOp-1).getMBB();
+    Cond.push_back(MachineOperand::CreateImm(BranchCode));
+
+    for (int i=0; i<SecondNumOp-1; i++) {
+      Cond.push_back(SecondLastInst->getOperand(i));
+    }
+
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with two unconditional branches, handle it. The last
+  // one is not executed, so remove it.
+  if ((SecondLastOpc == Mips::J) && (LastOpc == Mips::J)) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+unsigned MipsInstrInfo::
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+             MachineBasicBlock *FBB,
+             const SmallVectorImpl<MachineOperand> &Cond,
+             DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 3 || Cond.size() == 2 || Cond.size() == 0) &&
+         "Mips branch conditions can have two|three components!");
+
+  if (FBB == 0) { // One way branch.
+    if (Cond.empty()) {
+      // Unconditional branch?
+      BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB);
+    } else {
+      // Conditional branch.
+      unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm());
+      const TargetInstrDesc &TID = get(Opc);
+
+      if (TID.getNumOperands() == 3)
+        BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg())
+                          .addReg(Cond[2].getReg())
+                          .addMBB(TBB);
+      else
+        BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg())
+                          .addMBB(TBB);
+
+    }
+    return 1;
+  }
+
+  // Two-way Conditional branch.
+  unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm());
+  const TargetInstrDesc &TID = get(Opc);
+
+  if (TID.getNumOperands() == 3)
+    BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg())
+                      .addMBB(TBB);
+  else
+    BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addMBB(TBB);
+
+  BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB);
+  return 2;
+}
+
+unsigned MipsInstrInfo::
+RemoveBranch(MachineBasicBlock &MBB) const
+{
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+  if (I->getOpcode() != Mips::J &&
+      GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID)
+    return 0;
+
+  // Remove the branch.
+  I->eraseFromParent();
+
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID)
+    return 1;
+
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+/// ReverseBranchCondition - Return the inverse opcode of the
+/// specified Branch instruction.
+bool MipsInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+{
+  assert( (Cond.size() == 3 || Cond.size() == 2) &&
+          "Invalid Mips branch condition!");
+  Cond[0].setImm(GetOppositeBranchCondition((Mips::CondCode)Cond[0].getImm()));
+  return false;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+  MipsFunctionInfo *MipsFI = MF->getInfo<MipsFunctionInfo>();
+  unsigned GlobalBaseReg = MipsFI->getGlobalBaseReg();
+  if (GlobalBaseReg != 0)
+    return GlobalBaseReg;
+
+  // Insert the set of GlobalBaseReg into the first MBB of the function
+  MachineBasicBlock &FirstMBB = MF->front();
+  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+  GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass);
+  BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+          GlobalBaseReg).addReg(Mips::GP);
+  RegInfo.addLiveIn(Mips::GP);
+
+  MipsFI->setGlobalBaseReg(GlobalBaseReg);
+  return GlobalBaseReg;
+}
diff --git a/final/lib/Target/Mips/MipsInstrInfo.h b/final/lib/Target/Mips/MipsInstrInfo.h
new file mode 100644
index 00000000000..1b0351981ac
--- /dev/null
+++ b/final/lib/Target/Mips/MipsInstrInfo.h
@@ -0,0 +1,235 @@
+//===- MipsInstrInfo.h - Mips Instruction Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSINSTRUCTIONINFO_H
+#define MIPSINSTRUCTIONINFO_H
+
+#include "Mips.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MipsRegisterInfo.h"
+
+namespace llvm {
+
+namespace Mips {
+
+  // Mips Branch Codes
+  enum FPBranchCode {
+    BRANCH_F,
+    BRANCH_T,
+    BRANCH_FL,
+    BRANCH_TL,
+    BRANCH_INVALID
+  };
+
+  // Mips Condition Codes
+  enum CondCode {
+    // To be used with float branch True
+    FCOND_F,
+    FCOND_UN,
+    FCOND_EQ,
+    FCOND_UEQ,
+    FCOND_OLT,
+    FCOND_ULT,
+    FCOND_OLE,
+    FCOND_ULE,
+    FCOND_SF,
+    FCOND_NGLE,
+    FCOND_SEQ,
+    FCOND_NGL,
+    FCOND_LT,
+    FCOND_NGE,
+    FCOND_LE,
+    FCOND_NGT,
+
+    // To be used with float branch False
+    // This conditions have the same mnemonic as the
+    // above ones, but are used with a branch False;
+    FCOND_T,
+    FCOND_OR,
+    FCOND_NEQ,
+    FCOND_OGL,
+    FCOND_UGE,
+    FCOND_OGE,
+    FCOND_UGT,
+    FCOND_OGT,
+    FCOND_ST,
+    FCOND_GLE,
+    FCOND_SNE,
+    FCOND_GL,
+    FCOND_NLT,
+    FCOND_GE,
+    FCOND_NLE,
+    FCOND_GT,
+
+    // Only integer conditions
+    COND_E,
+    COND_GZ,
+    COND_GEZ,
+    COND_LZ,
+    COND_LEZ,
+    COND_NE,
+    COND_INVALID
+  };
+
+  // Turn condition code into conditional branch opcode.
+  unsigned GetCondBranchFromCond(CondCode CC);
+
+  /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+  /// e.g. turning COND_E to COND_NE.
+  CondCode GetOppositeBranchCondition(Mips::CondCode CC);
+
+  /// MipsCCToString - Map each FP condition code to its string
+  inline static const char *MipsFCCToString(Mips::CondCode CC)
+  {
+    switch (CC) {
+      default: llvm_unreachable("Unknown condition code");
+      case FCOND_F:
+      case FCOND_T:   return "f";
+      case FCOND_UN:
+      case FCOND_OR:  return "un";
+      case FCOND_EQ:
+      case FCOND_NEQ: return "eq";
+      case FCOND_UEQ:
+      case FCOND_OGL: return "ueq";
+      case FCOND_OLT:
+      case FCOND_UGE: return "olt";
+      case FCOND_ULT:
+      case FCOND_OGE: return "ult";
+      case FCOND_OLE:
+      case FCOND_UGT: return "ole";
+      case FCOND_ULE:
+      case FCOND_OGT: return "ule";
+      case FCOND_SF:
+      case FCOND_ST:  return "sf";
+      case FCOND_NGLE:
+      case FCOND_GLE: return "ngle";
+      case FCOND_SEQ:
+      case FCOND_SNE: return "seq";
+      case FCOND_NGL:
+      case FCOND_GL:  return "ngl";
+      case FCOND_LT:
+      case FCOND_NLT: return "lt";
+      case FCOND_NGE:
+      case FCOND_GE:  return "ge";
+      case FCOND_LE:
+      case FCOND_NLE: return "nle";
+      case FCOND_NGT:
+      case FCOND_GT:  return "gt";
+    }
+  }
+}
+
+/// MipsII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MipsII {
+  /// Target Operand Flag enum.
+  enum TOF {
+    //===------------------------------------------------------------------===//
+    // Mips Specific MachineOperand flags.
+
+    MO_NO_FLAG,
+
+    /// MO_GOT - Represents the offset into the global offset table at which
+    /// the address the relocation entry symbol resides during execution.
+    MO_GOT,
+
+    /// MO_GOT_CALL - Represents the offset into the global offset table at
+    /// which the address of a call site relocation entry symbol resides
+    /// during execution. This is different from the above since this flag
+    /// can only be present in call instructions.
+    MO_GOT_CALL,
+
+    /// MO_GPREL - Represents the offset from the current gp value to be used
+    /// for the relocatable object file being produced.
+    MO_GPREL,
+
+    /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
+    /// address.
+    MO_ABS_HILO
+
+  };
+}
+
+class MipsInstrInfo : public TargetInstrInfoImpl {
+  MipsTargetMachine &TM;
+  const MipsRegisterInfo RI;
+public:
+  explicit MipsInstrInfo(MipsTargetMachine &TM);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const MipsRegisterInfo &getRegisterInfo() const { return RI; }
+
+  /// isLoadFromStackSlot - If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+  /// Branch Analysis
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  virtual
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  /// Insert nop instruction when hazard condition is found
+  virtual void insertNoop(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI) const;
+
+  /// getGlobalBaseReg - Return a virtual register initialized with the
+  /// the global base register value. Output instructions required to
+  /// initialize the register in the function entry block, if necessary.
+  ///
+  unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/Mips/MipsInstrInfo.td b/final/lib/Target/Mips/MipsInstrInfo.td
new file mode 100644
index 00000000000..ceaf75f70a9
--- /dev/null
+++ b/final/lib/Target/Mips/MipsInstrInfo.td
@@ -0,0 +1,681 @@
+//===- MipsInstrInfo.td - Mips Register defs ---------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Mips profiles and nodes
+//===----------------------------------------------------------------------===//
+
+def SDT_MipsRet          : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MipsJmpLink      : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>;
+def SDT_MipsSelectCC     : SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>,
+                                         SDTCisSameAs<2, 3>, SDTCisInt<1>]>;
+def SDT_MipsCMov         : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+                                         SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>,
+                                         SDTCisInt<4>]>;
+def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MipsCallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def SDT_MipsMAddMSub     : SDTypeProfile<0, 4,
+                                         [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>,
+                                          SDTCisSameAs<1, 2>,
+                                          SDTCisSameAs<2, 3>]>;
+def SDT_MipsDivRem       : SDTypeProfile<0, 2,
+                                         [SDTCisVT<0, i32>,
+                                          SDTCisSameAs<0, 1>]>;
+
+// Call
+def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
+                         [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+                          SDNPVariadic]>;
+
+// Hi and Lo nodes are used to handle global addresses. Used on
+// MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol
+// static model. (nothing to do with Mips Registers Hi and Lo)
+def MipsHi    : SDNode<"MipsISD::Hi", SDTIntUnaryOp>;
+def MipsLo    : SDNode<"MipsISD::Lo", SDTIntUnaryOp>;
+def MipsGPRel : SDNode<"MipsISD::GPRel", SDTIntUnaryOp>;
+
+// Return
+def MipsRet : SDNode<"MipsISD::Ret", SDT_MipsRet, [SDNPHasChain,
+                     SDNPOptInGlue]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart,
+                           [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+// Select Condition Code
+def MipsSelectCC  : SDNode<"MipsISD::SelectCC", SDT_MipsSelectCC>;
+
+// MAdd*/MSub* nodes
+def MipsMAdd      : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub,
+                           [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMAddu     : SDNode<"MipsISD::MAddu", SDT_MipsMAddMSub,
+                           [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMSub      : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub,
+                           [SDNPOptInGlue, SDNPOutGlue]>;
+def MipsMSubu     : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub,
+                           [SDNPOptInGlue, SDNPOutGlue]>;
+
+// DivRem(u) nodes
+def MipsDivRem    : SDNode<"MipsISD::DivRem", SDT_MipsDivRem,
+                           [SDNPOutGlue]>;
+def MipsDivRemU   : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
+                           [SDNPOutGlue]>;
+
+//===----------------------------------------------------------------------===//
+// Mips Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasSEInReg  : Predicate<"Subtarget.hasSEInReg()">;
+def HasBitCount : Predicate<"Subtarget.hasBitCount()">;
+def HasSwap     : Predicate<"Subtarget.hasSwap()">;
+def HasCondMov  : Predicate<"Subtarget.hasCondMov()">;
+def IsMips32    : Predicate<"Subtarget.isMips32()">;
+def IsMips32r2  : Predicate<"Subtarget.isMips32r2()">;
+
+//===----------------------------------------------------------------------===//
+// Mips Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+// Instruction operand types
+def brtarget    : Operand<OtherVT>;
+def calltarget  : Operand<i32>;
+def simm16      : Operand<i32>;
+def shamt       : Operand<i32>;
+
+// Unsigned Operand
+def uimm16      : Operand<i32> {
+  let PrintMethod = "printUnsignedImm";
+}
+
+// Address operand
+def mem : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops simm16, CPURegs);
+}
+
+// Transformation Function - get the lower 16 bits.
+def LO16 : SDNodeXForm<imm, [{
+  return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
+}]>;
+
+// Transformation Function - get the higher 16 bits.
+def HI16 : SDNodeXForm<imm, [{
+  return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt16  : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
+
+// Node immediate fits as 16-bit zero extended on target immediate.
+// The LO16 param means that only the lower 16 bits of the node
+// immediate are caught.
+// e.g. addiu, sltiu
+def immZExt16  : PatLeaf<(imm), [{
+  if (N->getValueType(0) == MVT::i32)
+    return (uint32_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+  else
+    return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+// shamt field must fit in 5 bits.
+def immZExt5 : PatLeaf<(imm), [{
+  return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+}]>;
+
+// Mips Address Mode! SDNode frameindex could possibily be a match
+// since load and store instructions from stack used it.
+def addr : ComplexPattern<iPTR, 2, "SelectAddr", [frameindex], []>;
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+// Arithmetic 3 register operands
+let isCommutable = 1 in
+class ArithR<bits<6> op, bits<6> func, string instr_asm, SDNode OpNode,
+             InstrItinClass itin>:
+  FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+
+let isCommutable = 1 in
+class ArithOverflowR<bits<6> op, bits<6> func, string instr_asm>:
+  FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu>;
+
+// Arithmetic 2 register operands
+class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
+             Operand Od, PatLeaf imm_type> :
+  FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
+
+class ArithOverflowI<bits<6> op, string instr_asm, SDNode OpNode,
+             Operand Od, PatLeaf imm_type> :
+  FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"), [], IIAlu>;
+
+// Arithmetic Multiply ADD/SUB
+let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in
+class MArithR<bits<6> func, string instr_asm, SDNode op> :
+  FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt),
+     !strconcat(instr_asm, "\t$rs, $rt"),
+     [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul>;
+
+//  Logical
+class LogicR<bits<6> func, string instr_asm, SDNode OpNode>:
+  FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+
+class LogicI<bits<6> op, string instr_asm, SDNode OpNode>:
+  FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))], IIAlu>;
+
+class LogicNOR<bits<6> op, bits<6> func, string instr_asm>:
+  FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>;
+
+// Shifts
+class LogicR_shift_rotate_imm<bits<6> func, bits<5> _rs, string instr_asm,
+                              SDNode OpNode>:
+  FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, shamt:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt5:$c))], IIAlu> {
+  let rs = _rs;
+}
+
+class LogicR_shift_rotate_reg<bits<6> func, bits<5> _shamt, string instr_asm,
+                              SDNode OpNode>:
+  FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu> {
+  let shamt = _shamt;
+}
+
+// Load Upper Imediate
+class LoadUpper<bits<6> op, string instr_asm>:
+  FI< op,
+      (outs CPURegs:$dst),
+      (ins uimm16:$imm),
+      !strconcat(instr_asm, "\t$dst, $imm"),
+      [], IIAlu>;
+
+// Memory Load/Store
+let canFoldAsLoad = 1, hasDelaySlot = 1 in
+class LoadM<bits<6> op, string instr_asm, PatFrag OpNode>:
+  FI<op, (outs CPURegs:$dst), (ins mem:$addr),
+     !strconcat(instr_asm, "\t$dst, $addr"),
+     [(set CPURegs:$dst, (OpNode addr:$addr))], IILoad>;
+
+class StoreM<bits<6> op, string instr_asm, PatFrag OpNode>:
+  FI<op, (outs), (ins CPURegs:$dst, mem:$addr),
+     !strconcat(instr_asm, "\t$dst, $addr"),
+     [(OpNode CPURegs:$dst, addr:$addr)], IIStore>;
+
+// Conditional Branch
+let isBranch = 1, isTerminator=1, hasDelaySlot = 1 in {
+class CBranch<bits<6> op, string instr_asm, PatFrag cond_op>:
+  FI<op, (outs), (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
+     !strconcat(instr_asm, "\t$a, $b, $offset"),
+     [(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
+     IIBranch>;
+
+class CBranchZero<bits<6> op, string instr_asm, PatFrag cond_op>:
+  FI<op, (outs), (ins CPURegs:$src, brtarget:$offset),
+     !strconcat(instr_asm, "\t$src, $offset"),
+     [(brcond (cond_op CPURegs:$src, 0), bb:$offset)],
+     IIBranch>;
+}
+
+// SetCC
+class SetCC_R<bits<6> op, bits<6> func, string instr_asm,
+      PatFrag cond_op>:
+  FR<op, func, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (cond_op CPURegs:$b, CPURegs:$c))],
+     IIAlu>;
+
+class SetCC_I<bits<6> op, string instr_asm, PatFrag cond_op,
+      Operand Od, PatLeaf imm_type>:
+  FI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+     !strconcat(instr_asm, "\t$dst, $b, $c"),
+     [(set CPURegs:$dst, (cond_op CPURegs:$b, imm_type:$c))],
+     IIAlu>;
+
+// Unconditional branch
+let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in
+class JumpFJ<bits<6> op, string instr_asm>:
+  FJ<op, (outs), (ins brtarget:$target),
+     !strconcat(instr_asm, "\t$target"), [(br bb:$target)], IIBranch>;
+
+let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in
+class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
+  FR<op, func, (outs), (ins CPURegs:$target),
+     !strconcat(instr_asm, "\t$target"), [(brind CPURegs:$target)], IIBranch>;
+
+// Jump and Link (Call)
+let isCall=1, hasDelaySlot=1,
+  // All calls clobber the non-callee saved registers...
+  Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+          K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in {
+  class JumpLink<bits<6> op, string instr_asm>:
+    FJ<op, (outs), (ins calltarget:$target, variable_ops),
+       !strconcat(instr_asm, "\t$target"), [(MipsJmpLink imm:$target)],
+       IIBranch>;
+
+  let rd=31 in
+  class JumpLinkReg<bits<6> op, bits<6> func, string instr_asm>:
+    FR<op, func, (outs), (ins CPURegs:$rs, variable_ops),
+       !strconcat(instr_asm, "\t$rs"), [(MipsJmpLink CPURegs:$rs)], IIBranch>;
+
+  class BranchLink<string instr_asm>:
+    FI<0x1, (outs), (ins CPURegs:$rs, brtarget:$target, variable_ops),
+       !strconcat(instr_asm, "\t$rs, $target"), [], IIBranch>;
+}
+
+// Mul, Div
+let Defs = [HI, LO] in {
+  class Mul<bits<6> func, string instr_asm, InstrItinClass itin>:
+    FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
+       !strconcat(instr_asm, "\t$a, $b"), [], itin>;
+
+  class Div<SDNode op, bits<6> func, string instr_asm, InstrItinClass itin>:
+            FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b),
+            !strconcat(instr_asm, "\t$$zero, $a, $b"),
+            [(op CPURegs:$a, CPURegs:$b)], itin>;
+}
+
+// Move from Hi/Lo
+class MoveFromLOHI<bits<6> func, string instr_asm>:
+  FR<0x00, func, (outs CPURegs:$dst), (ins),
+     !strconcat(instr_asm, "\t$dst"), [], IIHiLo>;
+
+class MoveToLOHI<bits<6> func, string instr_asm>:
+  FR<0x00, func, (outs), (ins CPURegs:$src),
+     !strconcat(instr_asm, "\t$src"), [], IIHiLo>;
+
+class EffectiveAddress<string instr_asm> :
+  FI<0x09, (outs CPURegs:$dst), (ins mem:$addr),
+     instr_asm, [(set CPURegs:$dst, addr:$addr)], IIAlu>;
+
+// Count Leading Ones/Zeros in Word
+class CountLeading<bits<6> func, string instr_asm, list<dag> pattern>:
+  FR<0x1c, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+     !strconcat(instr_asm, "\t$dst, $src"), pattern, IIAlu>,
+     Requires<[HasBitCount]> {
+  let shamt = 0;
+  let rt = rd;
+}
+
+// Sign Extend in Register.
+class SignExtInReg<bits<6> func, string instr_asm, ValueType vt>:
+  FR<0x3f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+     !strconcat(instr_asm, "\t$dst, $src"),
+     [(set CPURegs:$dst, (sext_inreg CPURegs:$src, vt))], NoItinerary>;
+
+// Byte Swap
+class ByteSwap<bits<6> func, string instr_asm>:
+  FR<0x1f, func, (outs CPURegs:$dst), (ins CPURegs:$src),
+     !strconcat(instr_asm, "\t$dst, $src"),
+     [(set CPURegs:$dst, (bswap CPURegs:$src))], NoItinerary>;
+
+// Conditional Move
+class CondMov<bits<6> func, string instr_asm, PatLeaf MovCode>:
+  FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$F, CPURegs:$T,
+     CPURegs:$cond), !strconcat(instr_asm, "\t$dst, $T, $cond"),
+     [], NoItinerary>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+// As stack alignment is always done with addiu, we need a 16-bit immediate
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : MipsPseudo<(outs), (ins uimm16:$amt),
+                                  "!ADJCALLSTACKDOWN $amt",
+                                  [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP   : MipsPseudo<(outs), (ins uimm16:$amt1, uimm16:$amt2),
+                                  "!ADJCALLSTACKUP $amt1",
+                                  [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+// Some assembly macros need to avoid pseudoinstructions and assembler
+// automatic reodering, we should reorder ourselves.
+def MACRO     : MipsPseudo<(outs), (ins), ".set\tmacro",     []>;
+def REORDER   : MipsPseudo<(outs), (ins), ".set\treorder",   []>;
+def NOMACRO   : MipsPseudo<(outs), (ins), ".set\tnomacro",   []>;
+def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>;
+
+// These macros are inserted to prevent GAS from complaining 
+// when using the AT register.
+def NOAT      : MipsPseudo<(outs), (ins), ".set\tnoat", []>;
+def ATMACRO   : MipsPseudo<(outs), (ins), ".set\tat", []>;
+
+// When handling PIC code the assembler needs .cpload and .cprestore
+// directives. If the real instructions corresponding these directives
+// are used, we have the same behavior, but get also a bunch of warnings
+// from the assembler.
+def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
+def CPRESTORE : MipsPseudo<(outs), (ins uimm16:$loc), ".cprestore\t$loc\n", []>;
+
+// The supported Mips ISAs dont have any instruction close to the SELECT_CC
+// operation. The solution is to create a Mips pseudo SELECT_CC instruction
+// (MipsSelectCC), use LowerSELECT_CC to generate this instruction and finally
+// replace it for real supported nodes into EmitInstrWithCustomInserter
+let usesCustomInserter = 1 in {
+  class PseudoSelCC<RegisterClass RC, string asmstr>:
+    MipsPseudo<(outs RC:$dst), (ins CPURegs:$CmpRes, RC:$T, RC:$F), asmstr,
+    [(set RC:$dst, (MipsSelectCC CPURegs:$CmpRes, RC:$T, RC:$F))]>;
+}
+
+def Select_CC : PseudoSelCC<CPURegs, "# MipsSelect_CC_i32">;
+
+//===----------------------------------------------------------------------===//
+// Instruction definition
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MipsI Instructions
+//===----------------------------------------------------------------------===//
+
+/// Arithmetic Instructions (ALU Immediate)
+def ADDiu   : ArithI<0x09, "addiu", add, simm16, immSExt16>;
+def ADDi    : ArithOverflowI<0x08, "addi",  add, simm16, immSExt16>;
+def SLTi    : SetCC_I<0x0a, "slti", setlt, simm16, immSExt16>;
+def SLTiu   : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16>;
+def ANDi    : LogicI<0x0c, "andi", and>;
+def ORi     : LogicI<0x0d, "ori",  or>;
+def XORi    : LogicI<0x0e, "xori",  xor>;
+def LUi     : LoadUpper<0x0f, "lui">;
+
+/// Arithmetic Instructions (3-Operand, R-Type)
+def ADDu    : ArithR<0x00, 0x21, "addu", add, IIAlu>;
+def SUBu    : ArithR<0x00, 0x23, "subu", sub, IIAlu>;
+def ADD     : ArithOverflowR<0x00, 0x20, "add">;
+def SUB     : ArithOverflowR<0x00, 0x22, "sub">;
+def SLT     : SetCC_R<0x00, 0x2a, "slt", setlt>;
+def SLTu    : SetCC_R<0x00, 0x2b, "sltu", setult>;
+def AND     : LogicR<0x24, "and", and>;
+def OR      : LogicR<0x25, "or",  or>;
+def XOR     : LogicR<0x26, "xor", xor>;
+def NOR     : LogicNOR<0x00, 0x27, "nor">;
+
+/// Shift Instructions
+def SLL     : LogicR_shift_rotate_imm<0x00, 0x00, "sll", shl>;
+def SRL     : LogicR_shift_rotate_imm<0x02, 0x00, "srl", srl>;
+def SRA     : LogicR_shift_rotate_imm<0x03, 0x00, "sra", sra>;
+def SLLV    : LogicR_shift_rotate_reg<0x04, 0x00, "sllv", shl>;
+def SRLV    : LogicR_shift_rotate_reg<0x06, 0x00, "srlv", srl>;
+def SRAV    : LogicR_shift_rotate_reg<0x07, 0x00, "srav", sra>;
+
+// Rotate Instructions
+let Predicates = [IsMips32r2] in {
+    def ROTR    : LogicR_shift_rotate_imm<0x02, 0x01, "rotr", rotr>;
+    def ROTRV   : LogicR_shift_rotate_reg<0x06, 0x01, "rotrv", rotr>;
+}
+
+/// Load and Store Instructions
+def LB      : LoadM<0x20, "lb",  sextloadi8>;
+def LBu     : LoadM<0x24, "lbu", zextloadi8>;
+def LH      : LoadM<0x21, "lh",  sextloadi16>;
+def LHu     : LoadM<0x25, "lhu", zextloadi16>;
+def LW      : LoadM<0x23, "lw",  load>;
+def SB      : StoreM<0x28, "sb", truncstorei8>;
+def SH      : StoreM<0x29, "sh", truncstorei16>;
+def SW      : StoreM<0x2b, "sw", store>;
+
+/// Jump and Branch Instructions
+def J       : JumpFJ<0x02, "j">;
+def JR      : JumpFR<0x00, 0x08, "jr">;
+def JAL     : JumpLink<0x03, "jal">;
+def JALR    : JumpLinkReg<0x00, 0x09, "jalr">;
+def BEQ     : CBranch<0x04, "beq", seteq>;
+def BNE     : CBranch<0x05, "bne", setne>;
+
+let rt=1 in
+  def BGEZ  : CBranchZero<0x01, "bgez", setge>;
+
+let rt=0 in {
+  def BGTZ  : CBranchZero<0x07, "bgtz", setgt>;
+  def BLEZ  : CBranchZero<0x07, "blez", setle>;
+  def BLTZ  : CBranchZero<0x01, "bltz", setlt>;
+}
+
+def BGEZAL  : BranchLink<"bgezal">;
+def BLTZAL  : BranchLink<"bltzal">;
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1,
+    isBarrier=1, hasCtrlDep=1, rs=0, rt=0, shamt=0 in
+  def RET : FR <0x00, 0x02, (outs), (ins CPURegs:$target),
+                "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>;
+
+/// Multiply and Divide Instructions.
+def MULT    : Mul<0x18, "mult", IIImul>;
+def MULTu   : Mul<0x19, "multu", IIImul>;
+def SDIV    : Div<MipsDivRem, 0x1a, "div", IIIdiv>;
+def UDIV    : Div<MipsDivRemU, 0x1b, "divu", IIIdiv>;
+
+let Defs = [HI] in
+  def MTHI  : MoveToLOHI<0x11, "mthi">;
+let Defs = [LO] in
+  def MTLO  : MoveToLOHI<0x13, "mtlo">;
+
+let Uses = [HI] in
+  def MFHI  : MoveFromLOHI<0x10, "mfhi">;
+let Uses = [LO] in
+  def MFLO  : MoveFromLOHI<0x12, "mflo">;
+
+/// Sign Ext In Register Instructions.
+let Predicates = [HasSEInReg] in {
+  let shamt = 0x10, rs = 0 in
+    def SEB : SignExtInReg<0x21, "seb", i8>;
+
+  let shamt = 0x18, rs = 0 in
+    def SEH : SignExtInReg<0x20, "seh", i16>;
+}
+
+/// Count Leading
+def CLZ : CountLeading<0b100000, "clz",
+                       [(set CPURegs:$dst, (ctlz CPURegs:$src))]>;
+def CLO : CountLeading<0b100001, "clo",
+                       [(set CPURegs:$dst, (ctlz (not CPURegs:$src)))]>;
+
+/// Byte Swap
+let Predicates = [HasSwap] in {
+  let shamt = 0x3, rs = 0 in
+    def WSBW : ByteSwap<0x20, "wsbw">;
+}
+
+/// Conditional Move
+def MIPS_CMOV_ZERO  : PatLeaf<(i32 0)>;
+def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>;
+
+let Predicates = [HasCondMov], Constraints = "$F = $dst" in {
+  def MOVN : CondMov<0x0a, "movn", MIPS_CMOV_NZERO>;
+  def MOVZ : CondMov<0x0b, "movz", MIPS_CMOV_ZERO>;
+}
+
+/// No operation
+let addr=0 in
+  def NOP   : FJ<0, (outs), (ins), "nop", [], IIAlu>;
+
+// FrameIndexes are legalized when they are operands from load/store
+// instructions. The same not happens for stack address copies, so an
+// add op with mem ComplexPattern is used and the stack address copy
+// can be matched. It's similar to Sparc LEA_ADDRi
+def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, ${addr:stackloc}">;
+
+// MADD*/MSUB*
+def MADD  : MArithR<0, "madd", MipsMAdd>;
+def MADDU : MArithR<1, "maddu", MipsMAddu>;
+def MSUB  : MArithR<4, "msub", MipsMSub>;
+def MSUBU : MArithR<5, "msubu", MipsMSubu>;
+
+// MUL is a assembly macro in the current used ISAs. In recent ISA's
+// it is a real instruction.
+def MUL   : ArithR<0x1c, 0x02, "mul", mul, IIImul>, Requires<[IsMips32]>;
+
+//===----------------------------------------------------------------------===//
+//  Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i32 immSExt16:$in),
+          (ADDiu ZERO, imm:$in)>;
+def : Pat<(i32 immZExt16:$in),
+          (ORi ZERO, imm:$in)>;
+
+// Arbitrary immediates
+def : Pat<(i32 imm:$imm),
+          (ORi (LUi (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Carry patterns
+def : Pat<(subc CPURegs:$lhs, CPURegs:$rhs),
+          (SUBu CPURegs:$lhs, CPURegs:$rhs)>;
+def : Pat<(addc CPURegs:$lhs, CPURegs:$rhs),
+          (ADDu CPURegs:$lhs, CPURegs:$rhs)>;
+def : Pat<(addc  CPURegs:$src, immSExt16:$imm),
+          (ADDiu CPURegs:$src, imm:$imm)>;
+
+// Call
+def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)),
+          (JAL tglobaladdr:$dst)>;
+def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
+          (JAL texternalsym:$dst)>;
+//def : Pat<(MipsJmpLink CPURegs:$dst),
+//          (JALR CPURegs:$dst)>;
+
+// hi/lo relocs
+def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)),
+          (ADDiu CPURegs:$hi, tglobaladdr:$lo)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)),
+          (ADDiu CPURegs:$hi, tblockaddress:$lo)>;
+
+def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)),
+          (ADDiu CPURegs:$hi, tjumptable:$lo)>;
+
+def : Pat<(MipsHi tconstpool:$in), (LUi tconstpool:$in)>;
+def : Pat<(add CPURegs:$hi, (MipsLo tconstpool:$lo)),
+          (ADDiu CPURegs:$hi, tconstpool:$lo)>;
+
+// gp_rel relocs
+def : Pat<(add CPURegs:$gp, (MipsGPRel tglobaladdr:$in)),
+          (ADDiu CPURegs:$gp, tglobaladdr:$in)>;
+def : Pat<(add CPURegs:$gp, (MipsGPRel tconstpool:$in)),
+          (ADDiu CPURegs:$gp, tconstpool:$in)>;
+
+// Mips does not have "not", so we expand our way
+def : Pat<(not CPURegs:$in),
+          (NOR CPURegs:$in, ZERO)>;
+
+// extended load and stores
+def : Pat<(extloadi1  addr:$src), (LBu addr:$src)>;
+def : Pat<(extloadi8  addr:$src), (LBu addr:$src)>;
+def : Pat<(extloadi16 addr:$src), (LHu addr:$src)>;
+
+// peepholes
+def : Pat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
+
+// brcond patterns
+def : Pat<(brcond (setne CPURegs:$lhs, 0), bb:$dst),
+          (BNE CPURegs:$lhs, ZERO, bb:$dst)>;
+def : Pat<(brcond (seteq CPURegs:$lhs, 0), bb:$dst),
+          (BEQ CPURegs:$lhs, ZERO, bb:$dst)>;
+
+def : Pat<(brcond (setge CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+          (BEQ (SLT CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setuge CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+          (BEQ (SLTu CPURegs:$lhs, CPURegs:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setge CPURegs:$lhs, immSExt16:$rhs), bb:$dst),
+          (BEQ (SLTi CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setuge CPURegs:$lhs, immSExt16:$rhs), bb:$dst),
+          (BEQ (SLTiu CPURegs:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+
+def : Pat<(brcond (setle CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+          (BEQ (SLT CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
+def : Pat<(brcond (setule CPURegs:$lhs, CPURegs:$rhs), bb:$dst),
+          (BEQ (SLTu CPURegs:$rhs, CPURegs:$lhs), ZERO, bb:$dst)>;
+
+def : Pat<(brcond CPURegs:$cond, bb:$dst),
+          (BNE CPURegs:$cond, ZERO, bb:$dst)>;
+
+// select patterns
+def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+          (MOVZ CPURegs:$F, CPURegs:$T, (SLT CPURegs:$lhs, CPURegs:$rhs))>;
+def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+          (MOVZ CPURegs:$F, CPURegs:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs))>;
+def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), CPURegs:$T, CPURegs:$F),
+          (MOVZ CPURegs:$F, CPURegs:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs))>;
+def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), CPURegs:$T, CPURegs:$F),
+          (MOVZ CPURegs:$F, CPURegs:$T, (SLTiu CPURegs:$lh, immSExt16:$rh))>;
+
+def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+          (MOVZ CPURegs:$F, CPURegs:$T, (SLT CPURegs:$rhs, CPURegs:$lhs))>;
+def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+          (MOVZ CPURegs:$F, CPURegs:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs))>;
+
+def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+          (MOVZ CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
+def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F),
+          (MOVN CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
+
+def : Pat<(select CPURegs:$cond, CPURegs:$T, CPURegs:$F),
+          (MOVN CPURegs:$F, CPURegs:$T, CPURegs:$cond)>;
+
+// select patterns with got access
+def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs),
+                  (i32 tglobaladdr:$T), CPURegs:$F),
+          (MOVN CPURegs:$F, (ADDiu GP, tglobaladdr:$T),
+                (XOR CPURegs:$lhs, CPURegs:$rhs))>;
+
+// setcc patterns
+def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs),
+          (SLTu (XOR CPURegs:$lhs, CPURegs:$rhs), 1)>;
+def : Pat<(setne CPURegs:$lhs, CPURegs:$rhs),
+          (SLTu ZERO, (XOR CPURegs:$lhs, CPURegs:$rhs))>;
+
+def : Pat<(setle CPURegs:$lhs, CPURegs:$rhs),
+          (XORi (SLT CPURegs:$rhs, CPURegs:$lhs), 1)>;
+def : Pat<(setule CPURegs:$lhs, CPURegs:$rhs),
+          (XORi (SLTu CPURegs:$rhs, CPURegs:$lhs), 1)>;
+
+def : Pat<(setgt CPURegs:$lhs, CPURegs:$rhs),
+          (SLT CPURegs:$rhs, CPURegs:$lhs)>;
+def : Pat<(setugt CPURegs:$lhs, CPURegs:$rhs),
+          (SLTu CPURegs:$rhs, CPURegs:$lhs)>;
+
+def : Pat<(setge CPURegs:$lhs, CPURegs:$rhs),
+          (XORi (SLT CPURegs:$lhs, CPURegs:$rhs), 1)>;
+def : Pat<(setuge CPURegs:$lhs, CPURegs:$rhs),
+          (XORi (SLTu CPURegs:$lhs, CPURegs:$rhs), 1)>;
+
+def : Pat<(setge CPURegs:$lhs, immSExt16:$rhs),
+          (XORi (SLTi CPURegs:$lhs, immSExt16:$rhs), 1)>;
+def : Pat<(setuge CPURegs:$lhs, immSExt16:$rhs),
+          (XORi (SLTiu CPURegs:$lhs, immSExt16:$rhs), 1)>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//===----------------------------------------------------------------------===//
+
+include "MipsInstrFPU.td"
+
diff --git a/final/lib/Target/Mips/MipsMCAsmInfo.cpp b/final/lib/Target/Mips/MipsMCAsmInfo.cpp
new file mode 100644
index 00000000000..fe48ab770e6
--- /dev/null
+++ b/final/lib/Target/Mips/MipsMCAsmInfo.cpp
@@ -0,0 +1,27 @@
+//===-- MipsMCAsmInfo.cpp - Mips asm properties ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MipsMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMCAsmInfo.h"
+using namespace llvm;
+
+MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
+  AlignmentIsInBytes          = false;
+  Data16bitsDirective         = "\t.half\t";
+  Data32bitsDirective         = "\t.word\t";
+  Data64bitsDirective         = 0;
+  PrivateGlobalPrefix         = "$";
+  CommentString               = "#";
+  ZeroDirective               = "\t.space\t";
+  GPRel32Directive            = "\t.gpword\t";
+  HasSetDirective             = false;
+}
diff --git a/final/lib/Target/Mips/MipsMCAsmInfo.h b/final/lib/Target/Mips/MipsMCAsmInfo.h
new file mode 100644
index 00000000000..41b719207b7
--- /dev/null
+++ b/final/lib/Target/Mips/MipsMCAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- MipsMCAsmInfo.h - Mips asm properties ---------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MipsMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETASMINFO_H
+#define MIPSTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+
+  class MipsMCAsmInfo : public MCAsmInfo {
+  public:
+    explicit MipsMCAsmInfo(const Target &T, StringRef TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/final/lib/Target/Mips/MipsMachineFunction.h b/final/lib/Target/Mips/MipsMachineFunction.h
new file mode 100644
index 00000000000..1e8e4feedd0
--- /dev/null
+++ b/final/lib/Target/Mips/MipsMachineFunction.h
@@ -0,0 +1,147 @@
+//===-- MipsMachineFunctionInfo.h - Private data used for Mips ----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPS_MACHINE_FUNCTION_INFO_H
+#define MIPS_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+namespace llvm {
+
+/// MipsFunctionInfo - This class is derived from MachineFunction private
+/// Mips target-specific information for each MachineFunction.
+class MipsFunctionInfo : public MachineFunctionInfo {
+
+private:
+  /// Holds for each function where on the stack the Frame Pointer must be
+  /// saved. This is used on Prologue and Epilogue to emit FP save/restore
+  int FPStackOffset;
+
+  /// Holds for each function where on the stack the Return Address must be
+  /// saved. This is used on Prologue and Epilogue to emit RA save/restore
+  int RAStackOffset;
+
+  /// At each function entry, two special bitmask directives must be emitted
+  /// to help debugging, for CPU and FPU callee saved registers. Both need
+  /// the negative offset from the final stack size and its higher registers
+  /// location on the stack.
+  int CPUTopSavedRegOff;
+  int FPUTopSavedRegOff;
+
+  /// MipsFIHolder - Holds a FrameIndex and it's Stack Pointer Offset
+  struct MipsFIHolder {
+
+    int FI;
+    int SPOffset;
+
+    MipsFIHolder(int FrameIndex, int StackPointerOffset)
+      : FI(FrameIndex), SPOffset(StackPointerOffset) {}
+  };
+
+  /// When PIC is used the GP must be saved on the stack on the function
+  /// prologue and must be reloaded from this stack location after every
+  /// call. A reference to its stack location and frame index must be kept
+  /// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
+  MipsFIHolder GPHolder;
+
+  /// On LowerFormalArguments the stack size is unknown, so the Stack
+  /// Pointer Offset calculation of "not in register arguments" must be
+  /// postponed to emitPrologue.
+  SmallVector<MipsFIHolder, 16> FnLoadArgs;
+  bool HasLoadArgs;
+
+  // When VarArgs, we must write registers back to caller stack, preserving
+  // on register arguments. Since the stack size is unknown on
+  // LowerFormalArguments, the Stack Pointer Offset calculation must be
+  // postponed to emitPrologue.
+  SmallVector<MipsFIHolder, 4> FnStoreVarArgs;
+  bool HasStoreVarArgs;
+
+  /// SRetReturnReg - Some subtargets require that sret lowering includes
+  /// returning the value of the returned struct in a register. This field
+  /// holds the virtual register into which the sret argument is passed.
+  unsigned SRetReturnReg;
+
+  /// GlobalBaseReg - keeps track of the virtual register initialized for
+  /// use as the global base register. This is used for PIC in some PIC
+  /// relocation models.
+  unsigned GlobalBaseReg;
+
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+
+public:
+  MipsFunctionInfo(MachineFunction& MF)
+  : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0),
+    FPUTopSavedRegOff(0), GPHolder(-1,-1), HasLoadArgs(false),
+    HasStoreVarArgs(false), SRetReturnReg(0), GlobalBaseReg(0),
+    VarArgsFrameIndex(0)
+  {}
+
+  int getFPStackOffset() const { return FPStackOffset; }
+  void setFPStackOffset(int Off) { FPStackOffset = Off; }
+
+  int getRAStackOffset() const { return RAStackOffset; }
+  void setRAStackOffset(int Off) { RAStackOffset = Off; }
+
+  int getCPUTopSavedRegOff() const { return CPUTopSavedRegOff; }
+  void setCPUTopSavedRegOff(int Off) { CPUTopSavedRegOff = Off; }
+
+  int getFPUTopSavedRegOff() const { return FPUTopSavedRegOff; }
+  void setFPUTopSavedRegOff(int Off) { FPUTopSavedRegOff = Off; }
+
+  int getGPStackOffset() const { return GPHolder.SPOffset; }
+  int getGPFI() const { return GPHolder.FI; }
+  void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; }
+  void setGPFI(int FI) { GPHolder.FI = FI; }
+  bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; }
+
+  bool hasLoadArgs() const { return HasLoadArgs; }
+  bool hasStoreVarArgs() const { return HasStoreVarArgs; }
+
+  void recordLoadArgsFI(int FI, int SPOffset) {
+    if (!HasLoadArgs) HasLoadArgs=true;
+    FnLoadArgs.push_back(MipsFIHolder(FI, SPOffset));
+  }
+  void recordStoreVarArgsFI(int FI, int SPOffset) {
+    if (!HasStoreVarArgs) HasStoreVarArgs=true;
+    FnStoreVarArgs.push_back(MipsFIHolder(FI, SPOffset));
+  }
+
+  void adjustLoadArgsFI(MachineFrameInfo *MFI) const {
+    if (!hasLoadArgs()) return;
+    for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
+      MFI->setObjectOffset( FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset );
+  }
+  void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const {
+    if (!hasStoreVarArgs()) return;
+    for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
+      MFI->setObjectOffset( FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset );
+  }
+
+  unsigned getSRetReturnReg() const { return SRetReturnReg; }
+  void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+  unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+  void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+};
+
+} // end of namespace llvm
+
+#endif // MIPS_MACHINE_FUNCTION_INFO_H
diff --git a/final/lib/Target/Mips/MipsRegisterInfo.cpp b/final/lib/Target/Mips/MipsRegisterInfo.cpp
new file mode 100644
index 00000000000..acea7dacaab
--- /dev/null
+++ b/final/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -0,0 +1,258 @@
+//===- MipsRegisterInfo.cpp - MIPS Register Information -== -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MIPS implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-reg-info"
+
+#include "Mips.h"
+#include "MipsSubtarget.h"
+#include "MipsRegisterInfo.h"
+#include "MipsMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
+                                   const TargetInstrInfo &tii)
+  : MipsGenRegisterInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
+    Subtarget(ST), TII(tii) {}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// Mips::RA, return the number that it corresponds to (e.g. 31).
+unsigned MipsRegisterInfo::
+getRegisterNumbering(unsigned RegEnum)
+{
+  switch (RegEnum) {
+    case Mips::ZERO : case Mips::F0 : case Mips::D0 : return 0;
+    case Mips::AT   : case Mips::F1 : return 1;
+    case Mips::V0   : case Mips::F2 : case Mips::D1 : return 2;
+    case Mips::V1   : case Mips::F3 : return 3;
+    case Mips::A0   : case Mips::F4 : case Mips::D2 : return 4;
+    case Mips::A1   : case Mips::F5 : return 5;
+    case Mips::A2   : case Mips::F6 : case Mips::D3 : return 6;
+    case Mips::A3   : case Mips::F7 : return 7;
+    case Mips::T0   : case Mips::F8 : case Mips::D4 : return 8;
+    case Mips::T1   : case Mips::F9 : return 9;
+    case Mips::T2   : case Mips::F10: case Mips::D5: return 10;
+    case Mips::T3   : case Mips::F11: return 11;
+    case Mips::T4   : case Mips::F12: case Mips::D6: return 12;
+    case Mips::T5   : case Mips::F13: return 13;
+    case Mips::T6   : case Mips::F14: case Mips::D7: return 14;
+    case Mips::T7   : case Mips::F15: return 15;
+    case Mips::T8   : case Mips::F16: case Mips::D8: return 16;
+    case Mips::T9   : case Mips::F17: return 17;
+    case Mips::S0   : case Mips::F18: case Mips::D9: return 18;
+    case Mips::S1   : case Mips::F19: return 19;
+    case Mips::S2   : case Mips::F20: case Mips::D10: return 20;
+    case Mips::S3   : case Mips::F21: return 21;
+    case Mips::S4   : case Mips::F22: case Mips::D11: return 22;
+    case Mips::S5   : case Mips::F23: return 23;
+    case Mips::S6   : case Mips::F24: case Mips::D12: return 24;
+    case Mips::S7   : case Mips::F25: return 25;
+    case Mips::K0   : case Mips::F26: case Mips::D13: return 26;
+    case Mips::K1   : case Mips::F27: return 27;
+    case Mips::GP   : case Mips::F28: case Mips::D14: return 28;
+    case Mips::SP   : case Mips::F29: return 29;
+    case Mips::FP   : case Mips::F30: case Mips::D15: return 30;
+    case Mips::RA   : case Mips::F31: return 31;
+    default: llvm_unreachable("Unknown register number!");
+  }
+  return 0; // Not reached
+}
+
+unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; }
+
+//===----------------------------------------------------------------------===//
+// Callee Saved Registers methods
+//===----------------------------------------------------------------------===//
+
+/// Mips Callee Saved Registers
+const unsigned* MipsRegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const
+{
+  // Mips callee-save register range is $16-$23, $f20-$f30
+  static const unsigned SingleFloatOnlyCalleeSavedRegs[] = {
+    Mips::S0, Mips::S1, Mips::S2, Mips::S3,
+    Mips::S4, Mips::S5, Mips::S6, Mips::S7,
+    Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24, Mips::F25,
+    Mips::F26, Mips::F27, Mips::F28, Mips::F29, Mips::F30, 0
+  };
+
+  static const unsigned BitMode32CalleeSavedRegs[] = {
+    Mips::S0, Mips::S1, Mips::S2, Mips::S3,
+    Mips::S4, Mips::S5, Mips::S6, Mips::S7,
+    Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, 0
+  };
+
+  if (Subtarget.isSingleFloat())
+    return SingleFloatOnlyCalleeSavedRegs;
+  else
+    return BitMode32CalleeSavedRegs;
+}
+
+BitVector MipsRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  Reserved.set(Mips::ZERO);
+  Reserved.set(Mips::AT);
+  Reserved.set(Mips::K0);
+  Reserved.set(Mips::K1);
+  Reserved.set(Mips::GP);
+  Reserved.set(Mips::SP);
+  Reserved.set(Mips::FP);
+  Reserved.set(Mips::RA);
+
+  // SRV4 requires that odd register can't be used.
+  if (!Subtarget.isSingleFloat())
+    for (unsigned FReg=(Mips::F0)+1; FReg < Mips::F30; FReg+=2)
+      Reserved.set(FReg);
+
+  return Reserved;
+}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void MipsRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+  MBB.erase(I);
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+void MipsRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+                    RegScavenger *RS) const {
+  MachineInstr &MI = *II;
+  MachineFunction &MF = *MI.getParent()->getParent();
+
+  unsigned i = 0;
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+
+  DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
+        errs() << "<--------->\n" << MI);
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+  int stackSize  = MF.getFrameInfo()->getStackSize();
+  int spOffset   = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+  DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
+               << "spOffset   : " << spOffset << "\n"
+               << "stackSize  : " << stackSize << "\n");
+
+  // as explained on LowerFormalArguments, detect negative offsets
+  // and adjust SPOffsets considering the final stack size.
+  int Offset = ((spOffset < 0) ? (stackSize + (-(spOffset+4))) : (spOffset));
+  Offset    += MI.getOperand(i-1).getImm();
+
+  DEBUG(errs() << "Offset     : " << Offset << "\n" << "<--------->\n");
+
+  unsigned NewReg = 0;
+  int NewImm = 0;
+  MachineBasicBlock &MBB = *MI.getParent();
+  bool ATUsed;
+  unsigned OrigReg = getFrameRegister(MF);
+  int OrigImm = Offset;
+
+// OrigImm fits in the 16-bit field
+  if (OrigImm < 0x8000 && OrigImm >= -0x8000) {
+    NewReg = OrigReg;
+    NewImm = OrigImm;
+    ATUsed = false;
+  }
+  else {
+    const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+    DebugLoc DL = II->getDebugLoc();
+    int ImmLo = OrigImm & 0xffff;
+    int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) + ((OrigImm & 0x8000) != 0);
+
+    // FIXME: change this when mips goes MC".
+    BuildMI(MBB, II, DL, TII->get(Mips::NOAT));
+    BuildMI(MBB, II, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi);
+    BuildMI(MBB, II, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg).addReg(Mips::AT);
+    NewReg = Mips::AT;
+    NewImm = ImmLo;
+    
+    ATUsed = true;
+  }
+
+  // FIXME: change this when mips goes MC".
+  if (ATUsed)
+    BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO));
+
+  MI.getOperand(i).ChangeToRegister(NewReg, false);
+  MI.getOperand(i-1).ChangeToImmediate(NewImm);
+}
+
+void MipsRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+  // Set the stack offset where GP must be saved/loaded from.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+  if (MipsFI->needGPSaveRestore())
+    MFI->setObjectOffset(MipsFI->getGPFI(), MipsFI->getGPStackOffset());
+}
+
+unsigned MipsRegisterInfo::
+getRARegister() const {
+  return Mips::RA;
+}
+
+unsigned MipsRegisterInfo::
+getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? Mips::FP : Mips::SP;
+}
+
+unsigned MipsRegisterInfo::
+getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned MipsRegisterInfo::
+getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+int MipsRegisterInfo::
+getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  llvm_unreachable("What is the dwarf register number");
+  return -1;
+}
+
+#include "MipsGenRegisterInfo.inc"
diff --git a/final/lib/Target/Mips/MipsRegisterInfo.h b/final/lib/Target/Mips/MipsRegisterInfo.h
new file mode 100644
index 00000000000..767359fd6ed
--- /dev/null
+++ b/final/lib/Target/Mips/MipsRegisterInfo.h
@@ -0,0 +1,70 @@
+//===- MipsRegisterInfo.h - Mips Register Information Impl ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Mips implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSREGISTERINFO_H
+#define MIPSREGISTERINFO_H
+
+#include "Mips.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "MipsGenRegisterInfo.h.inc"
+
+namespace llvm {
+class MipsSubtarget;
+class TargetInstrInfo;
+class Type;
+
+struct MipsRegisterInfo : public MipsGenRegisterInfo {
+  const MipsSubtarget &Subtarget;
+  const TargetInstrInfo &TII;
+
+  MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii);
+
+  /// getRegisterNumbering - Given the enum value for some register, e.g.
+  /// Mips::RA, return the number that it corresponds to (e.g. 31).
+  static unsigned getRegisterNumbering(unsigned RegEnum);
+
+  /// Get PIC indirect call register
+  static unsigned getPICCallReg();
+
+  /// Adjust the Mips stack frame.
+  void adjustMipsStackFrame(MachineFunction &MF) const;
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  /// Stack Frame Processing Methods
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+  /// Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  /// Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/Mips/MipsRegisterInfo.td b/final/lib/Target/Mips/MipsRegisterInfo.td
new file mode 100644
index 00000000000..9f9cae7d11f
--- /dev/null
+++ b/final/lib/Target/Mips/MipsRegisterInfo.td
@@ -0,0 +1,264 @@
+//===- MipsRegisterInfo.td - Mips Register defs ------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the MIPS register file
+//===----------------------------------------------------------------------===//
+
+// We have banks of 32 registers each.
+class MipsReg<string n> : Register<n> {
+  field bits<5> Num;
+  let Namespace = "Mips";
+}
+
+class MipsRegWithSubRegs<string n, list<Register> subregs>
+  : RegisterWithSubRegs<n, subregs> {
+  field bits<5> Num;
+  let Namespace = "Mips";
+}
+
+// Mips CPU Registers
+class MipsGPRReg<bits<5> num, string n> : MipsReg<n> {
+  let Num = num;
+}
+
+// Mips 32-bit FPU Registers
+class FPR<bits<5> num, string n> : MipsReg<n> {
+  let Num = num;
+}
+
+// Mips 64-bit (aliased) FPU Registers
+let Namespace = "Mips" in {
+def sub_fpeven : SubRegIndex;
+def sub_fpodd  : SubRegIndex;
+}
+class AFPR<bits<5> num, string n, list<Register> subregs>
+  : MipsRegWithSubRegs<n, subregs> {
+  let Num = num;
+  let SubRegIndices = [sub_fpeven, sub_fpodd];
+}
+
+//===----------------------------------------------------------------------===//
+//  Registers
+//===----------------------------------------------------------------------===//
+
+let Namespace = "Mips" in {
+
+  // General Purpose Registers
+  def ZERO : MipsGPRReg< 0, "ZERO">, DwarfRegNum<[0]>;
+  def AT   : MipsGPRReg< 1, "AT">,   DwarfRegNum<[1]>;
+  def V0   : MipsGPRReg< 2, "2">,    DwarfRegNum<[2]>;
+  def V1   : MipsGPRReg< 3, "3">,    DwarfRegNum<[3]>;
+  def A0   : MipsGPRReg< 4, "4">,    DwarfRegNum<[5]>;
+  def A1   : MipsGPRReg< 5, "5">,    DwarfRegNum<[5]>;
+  def A2   : MipsGPRReg< 6, "6">,    DwarfRegNum<[6]>;
+  def A3   : MipsGPRReg< 7, "7">,    DwarfRegNum<[7]>;
+  def T0   : MipsGPRReg< 8, "8">,    DwarfRegNum<[8]>;
+  def T1   : MipsGPRReg< 9, "9">,    DwarfRegNum<[9]>;
+  def T2   : MipsGPRReg< 10, "10">,  DwarfRegNum<[10]>;
+  def T3   : MipsGPRReg< 11, "11">,  DwarfRegNum<[11]>;
+  def T4   : MipsGPRReg< 12, "12">,  DwarfRegNum<[12]>;
+  def T5   : MipsGPRReg< 13, "13">,  DwarfRegNum<[13]>;
+  def T6   : MipsGPRReg< 14, "14">,  DwarfRegNum<[14]>;
+  def T7   : MipsGPRReg< 15, "15">,  DwarfRegNum<[15]>;
+  def S0   : MipsGPRReg< 16, "16">,  DwarfRegNum<[16]>;
+  def S1   : MipsGPRReg< 17, "17">,  DwarfRegNum<[17]>;
+  def S2   : MipsGPRReg< 18, "18">,  DwarfRegNum<[18]>;
+  def S3   : MipsGPRReg< 19, "19">,  DwarfRegNum<[19]>;
+  def S4   : MipsGPRReg< 20, "20">,  DwarfRegNum<[20]>;
+  def S5   : MipsGPRReg< 21, "21">,  DwarfRegNum<[21]>;
+  def S6   : MipsGPRReg< 22, "22">,  DwarfRegNum<[22]>;
+  def S7   : MipsGPRReg< 23, "23">,  DwarfRegNum<[23]>;
+  def T8   : MipsGPRReg< 24, "24">,  DwarfRegNum<[24]>;
+  def T9   : MipsGPRReg< 25, "25">,  DwarfRegNum<[25]>;
+  def K0   : MipsGPRReg< 26, "26">,  DwarfRegNum<[26]>;
+  def K1   : MipsGPRReg< 27, "27">,  DwarfRegNum<[27]>;
+  def GP   : MipsGPRReg< 28, "GP">,  DwarfRegNum<[28]>;
+  def SP   : MipsGPRReg< 29, "SP">,  DwarfRegNum<[29]>;
+  def FP   : MipsGPRReg< 30, "FP">,  DwarfRegNum<[30]>;
+  def RA   : MipsGPRReg< 31, "RA">,  DwarfRegNum<[31]>;
+
+  /// Mips Single point precision FPU Registers
+  def F0  : FPR< 0,  "F0">, DwarfRegNum<[32]>;
+  def F1  : FPR< 1,  "F1">, DwarfRegNum<[33]>;
+  def F2  : FPR< 2,  "F2">, DwarfRegNum<[34]>;
+  def F3  : FPR< 3,  "F3">, DwarfRegNum<[35]>;
+  def F4  : FPR< 4,  "F4">, DwarfRegNum<[36]>;
+  def F5  : FPR< 5,  "F5">, DwarfRegNum<[37]>;
+  def F6  : FPR< 6,  "F6">, DwarfRegNum<[38]>;
+  def F7  : FPR< 7,  "F7">, DwarfRegNum<[39]>;
+  def F8  : FPR< 8,  "F8">, DwarfRegNum<[40]>;
+  def F9  : FPR< 9,  "F9">, DwarfRegNum<[41]>;
+  def F10 : FPR<10, "F10">, DwarfRegNum<[42]>;
+  def F11 : FPR<11, "F11">, DwarfRegNum<[43]>;
+  def F12 : FPR<12, "F12">, DwarfRegNum<[44]>;
+  def F13 : FPR<13, "F13">, DwarfRegNum<[45]>;
+  def F14 : FPR<14, "F14">, DwarfRegNum<[46]>;
+  def F15 : FPR<15, "F15">, DwarfRegNum<[47]>;
+  def F16 : FPR<16, "F16">, DwarfRegNum<[48]>;
+  def F17 : FPR<17, "F17">, DwarfRegNum<[49]>;
+  def F18 : FPR<18, "F18">, DwarfRegNum<[50]>;
+  def F19 : FPR<19, "F19">, DwarfRegNum<[51]>;
+  def F20 : FPR<20, "F20">, DwarfRegNum<[52]>;
+  def F21 : FPR<21, "F21">, DwarfRegNum<[53]>;
+  def F22 : FPR<22, "F22">, DwarfRegNum<[54]>;
+  def F23 : FPR<23, "F23">, DwarfRegNum<[55]>;
+  def F24 : FPR<24, "F24">, DwarfRegNum<[56]>;
+  def F25 : FPR<25, "F25">, DwarfRegNum<[57]>;
+  def F26 : FPR<26, "F26">, DwarfRegNum<[58]>;
+  def F27 : FPR<27, "F27">, DwarfRegNum<[59]>;
+  def F28 : FPR<28, "F28">, DwarfRegNum<[60]>;
+  def F29 : FPR<29, "F29">, DwarfRegNum<[61]>;
+  def F30 : FPR<30, "F30">, DwarfRegNum<[62]>;
+  def F31 : FPR<31, "F31">, DwarfRegNum<[63]>;
+
+  /// Mips Double point precision FPU Registers (aliased
+  /// with the single precision to hold 64 bit values)
+  def D0  : AFPR< 0,  "F0", [F0,   F1]>, DwarfRegNum<[32]>;
+  def D1  : AFPR< 2,  "F2", [F2,   F3]>, DwarfRegNum<[34]>;
+  def D2  : AFPR< 4,  "F4", [F4,   F5]>, DwarfRegNum<[36]>;
+  def D3  : AFPR< 6,  "F6", [F6,   F7]>, DwarfRegNum<[38]>;
+  def D4  : AFPR< 8,  "F8", [F8,   F9]>, DwarfRegNum<[40]>;
+  def D5  : AFPR<10, "F10", [F10, F11]>, DwarfRegNum<[42]>;
+  def D6  : AFPR<12, "F12", [F12, F13]>, DwarfRegNum<[44]>;
+  def D7  : AFPR<14, "F14", [F14, F15]>, DwarfRegNum<[46]>;
+  def D8  : AFPR<16, "F16", [F16, F17]>, DwarfRegNum<[48]>;
+  def D9  : AFPR<18, "F18", [F18, F19]>, DwarfRegNum<[50]>;
+  def D10 : AFPR<20, "F20", [F20, F21]>, DwarfRegNum<[52]>;
+  def D11 : AFPR<22, "F22", [F22, F23]>, DwarfRegNum<[54]>;
+  def D12 : AFPR<24, "F24", [F24, F25]>, DwarfRegNum<[56]>;
+  def D13 : AFPR<26, "F26", [F26, F27]>, DwarfRegNum<[58]>;
+  def D14 : AFPR<28, "F28", [F28, F29]>, DwarfRegNum<[60]>;
+  def D15 : AFPR<30, "F30", [F30, F31]>, DwarfRegNum<[62]>;
+
+  // Hi/Lo registers
+  def HI  : Register<"hi">, DwarfRegNum<[64]>;
+  def LO  : Register<"lo">, DwarfRegNum<[65]>;
+
+  // Status flags register
+  def FCR31 : Register<"31">;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+def CPURegs : RegisterClass<"Mips", [i32], 32,
+  // Return Values and Arguments
+  [V0, V1, A0, A1, A2, A3,
+  // Not preserved across procedure calls
+  T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+  // Callee save
+  S0, S1, S2, S3, S4, S5, S6, S7,
+  // Reserved
+  ZERO, AT, K0, K1, GP, SP, FP, RA]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    CPURegsClass::iterator
+    CPURegsClass::allocation_order_end(const MachineFunction &MF) const {
+      // The last 8 registers on the list above are reserved
+      return end()-8;
+    }
+  }];
+}
+
+// 64bit fp:
+// * FGR64  - 32 64-bit registers
+// * AFGR64 - 16 32-bit even registers (32-bit FP Mode)
+//
+// 32bit fp:
+// * FGR32 - 16 32-bit even registers
+// * FGR32 - 32 32-bit registers (single float only mode)
+def FGR32 : RegisterClass<"Mips", [f32], 32,
+  // Return Values and Arguments
+  [F0, F1, F2, F3, F12, F13, F14, F15,
+  // Not preserved across procedure calls
+  F4, F5, F6, F7, F8, F9, F10, F11, F16, F17, F18, F19,
+  // Callee save
+  F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
+  // Reserved
+  F31]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+
+    static const unsigned MIPS_FGR32[] = {
+      Mips::F0,  Mips::F1,  Mips::F2,  Mips::F3,  Mips::F12,  Mips::F13,
+      Mips::F14, Mips::F15, Mips::F4,  Mips::F5,  Mips::F6,   Mips::F7,
+      Mips::F8,  Mips::F9,  Mips::F10, Mips::F11, Mips::F16,  Mips::F17,
+      Mips::F18, Mips::F19, Mips::F20, Mips::F21, Mips::F22,  Mips::F23,
+      Mips::F24, Mips::F25, Mips::F26, Mips::F27, Mips::F28,  Mips::F29,
+      Mips::F30
+    };
+
+    static const unsigned MIPS_SVR4_FGR32[] = {
+      Mips::F0,  Mips::F2,  Mips::F12, Mips::F14, Mips::F4,
+      Mips::F6,  Mips::F8,  Mips::F10, Mips::F16, Mips::F18,
+      Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30,
+    };
+
+    FGR32Class::iterator
+    FGR32Class::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+
+      if (Subtarget.isSingleFloat())
+        return MIPS_FGR32;
+      else
+        return MIPS_SVR4_FGR32;
+    }
+
+    FGR32Class::iterator
+    FGR32Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+
+      if (Subtarget.isSingleFloat())
+        return MIPS_FGR32 + (sizeof(MIPS_FGR32) / sizeof(unsigned));
+      else
+        return MIPS_SVR4_FGR32 + (sizeof(MIPS_SVR4_FGR32) / sizeof(unsigned));
+    }
+  }];
+}
+
+def AFGR64 : RegisterClass<"Mips", [f64], 64,
+  // Return Values and Arguments
+  [D0, D1, D6, D7,
+  // Not preserved across procedure calls
+  D2, D3, D4, D5, D8, D9,
+  // Callee save
+  D10, D11, D12, D13, D14,
+  // Reserved
+  D15]>
+{
+  let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)];
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    AFGR64Class::iterator
+    AFGR64Class::allocation_order_end(const MachineFunction &MF) const {
+      // The last register on the list above is reserved
+      return end()-1;
+    }
+  }];
+}
+
+// Condition Register for floating point operations
+def CCR  : RegisterClass<"Mips", [i32], 32, [FCR31]>;
+
+// Hi/Lo Registers
+def HILO : RegisterClass<"Mips", [i32], 32, [HI, LO]>;
+
diff --git a/final/lib/Target/Mips/MipsSchedule.td b/final/lib/Target/Mips/MipsSchedule.td
new file mode 100644
index 00000000000..00be8ee9443
--- /dev/null
+++ b/final/lib/Target/Mips/MipsSchedule.td
@@ -0,0 +1,63 @@
+//===- MipsSchedule.td - Mips Scheduling Definitions -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across Mips chips sets. Based on GCC/Mips backend files.
+//===----------------------------------------------------------------------===//
+def ALU     : FuncUnit;
+def IMULDIV : FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for Mips
+//===----------------------------------------------------------------------===//
+def IIAlu              : InstrItinClass;
+def IILoad             : InstrItinClass;
+def IIStore            : InstrItinClass;
+def IIXfer             : InstrItinClass;
+def IIBranch           : InstrItinClass;
+def IIHiLo             : InstrItinClass;
+def IIImul             : InstrItinClass;
+def IIIdiv             : InstrItinClass;
+def IIFcvt             : InstrItinClass;
+def IIFmove            : InstrItinClass;
+def IIFcmp             : InstrItinClass;
+def IIFadd             : InstrItinClass;
+def IIFmulSingle       : InstrItinClass;
+def IIFmulDouble       : InstrItinClass;
+def IIFdivSingle       : InstrItinClass;
+def IIFdivDouble       : InstrItinClass;
+def IIFsqrtSingle      : InstrItinClass;
+def IIFsqrtDouble      : InstrItinClass;
+def IIFrecipFsqrtStep  : InstrItinClass;
+def IIPseudo           : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Mips Generic instruction itineraries.
+//===----------------------------------------------------------------------===//
+def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
+  InstrItinData<IIAlu              , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IILoad             , [InstrStage<3,  [ALU]>]>,
+  InstrItinData<IIStore            , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IIXfer             , [InstrStage<2,  [ALU]>]>,
+  InstrItinData<IIBranch           , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IIHiLo             , [InstrStage<1,  [IMULDIV]>]>,
+  InstrItinData<IIImul             , [InstrStage<17, [IMULDIV]>]>,
+  InstrItinData<IIIdiv             , [InstrStage<38, [IMULDIV]>]>,
+  InstrItinData<IIFcvt             , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<IIFmove            , [InstrStage<2,  [ALU]>]>,
+  InstrItinData<IIFcmp             , [InstrStage<3,  [ALU]>]>,
+  InstrItinData<IIFadd             , [InstrStage<4,  [ALU]>]>,
+  InstrItinData<IIFmulSingle       , [InstrStage<7,  [ALU]>]>,
+  InstrItinData<IIFmulDouble       , [InstrStage<8,  [ALU]>]>,
+  InstrItinData<IIFdivSingle       , [InstrStage<23, [ALU]>]>,
+  InstrItinData<IIFdivDouble       , [InstrStage<36, [ALU]>]>,
+  InstrItinData<IIFsqrtSingle      , [InstrStage<54, [ALU]>]>,
+  InstrItinData<IIFsqrtDouble      , [InstrStage<12, [ALU]>]>,
+  InstrItinData<IIFrecipFsqrtStep  , [InstrStage<5,  [ALU]>]>
+]>;
diff --git a/final/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/final/lib/Target/Mips/MipsSelectionDAGInfo.cpp
new file mode 100644
index 00000000000..e4d70fcec59
--- /dev/null
+++ b/final/lib/Target/Mips/MipsSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- MipsSelectionDAGInfo.cpp - Mips SelectionDAG Info -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MipsSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mips-selectiondag-info"
+#include "MipsTargetMachine.h"
+using namespace llvm;
+
+MipsSelectionDAGInfo::MipsSelectionDAGInfo(const MipsTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+MipsSelectionDAGInfo::~MipsSelectionDAGInfo() {
+}
diff --git a/final/lib/Target/Mips/MipsSelectionDAGInfo.h b/final/lib/Target/Mips/MipsSelectionDAGInfo.h
new file mode 100644
index 00000000000..6cafb558b35
--- /dev/null
+++ b/final/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- MipsSelectionDAGInfo.h - Mips SelectionDAG Info ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Mips subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSELECTIONDAGINFO_H
+#define MIPSSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class MipsTargetMachine;
+
+class MipsSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit MipsSelectionDAGInfo(const MipsTargetMachine &TM);
+  ~MipsSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/Mips/MipsSubtarget.cpp b/final/lib/Target/Mips/MipsSubtarget.cpp
new file mode 100644
index 00000000000..70747f5da13
--- /dev/null
+++ b/final/lib/Target/Mips/MipsSubtarget.cpp
@@ -0,0 +1,51 @@
+//===- MipsSubtarget.cpp - Mips Subtarget Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Mips specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsSubtarget.h"
+#include "Mips.h"
+#include "MipsGenSubtarget.inc"
+using namespace llvm;
+
+MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &FS,
+                             bool little) :
+  MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false),
+  IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true),
+  HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false),
+  HasSwap(false), HasBitCount(false)
+{
+  std::string CPU = "mips1";
+  MipsArchVersion = Mips1;
+
+  // Parse features string.
+  ParseSubtargetFeatures(FS, CPU);
+
+  // Is the target system Linux ?
+  if (TT.find("linux") == std::string::npos)
+    IsLinux = false;
+
+  // When only the target triple is specified and is
+  // a allegrex target, set the features. We also match
+  // big and little endian allegrex cores (dont really
+  // know if a big one exists)
+  if (TT.find("mipsallegrex") != std::string::npos ||
+      TT.find("psp") != std::string::npos) {
+    MipsABI = EABI;
+    IsSingleFloat = true;
+    MipsArchVersion = Mips2;
+    HasVFPU = true; // Enables Allegrex Vector FPU (not supported yet)
+    HasSEInReg = true;
+    HasBitCount = true;
+    HasSwap = true;
+    HasCondMov = true;
+  }
+}
diff --git a/final/lib/Target/Mips/MipsSubtarget.h b/final/lib/Target/Mips/MipsSubtarget.h
new file mode 100644
index 00000000000..096bbed7b04
--- /dev/null
+++ b/final/lib/Target/Mips/MipsSubtarget.h
@@ -0,0 +1,125 @@
+//=====-- MipsSubtarget.h - Define Subtarget for the Mips -----*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSSUBTARGET_H
+#define MIPSSUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <string>
+
+namespace llvm {
+
+class MipsSubtarget : public TargetSubtarget {
+
+public:
+  enum MipsABIEnum {
+    O32, O64, N32, N64, EABI
+  };
+
+protected:
+
+  enum MipsArchEnum {
+    Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2
+  };
+
+  // Mips architecture version
+  MipsArchEnum MipsArchVersion;
+
+  // Mips supported ABIs
+  MipsABIEnum MipsABI;
+
+  // IsLittle - The target is Little Endian
+  bool IsLittle;
+
+  // IsSingleFloat - The target only supports single precision float
+  // point operations. This enable the target to use all 32 32-bit
+  // floating point registers instead of only using even ones.
+  bool IsSingleFloat;
+
+  // IsFP64bit - The target processor has 64-bit floating point registers.
+  bool IsFP64bit;
+
+  // IsFP64bit - General-purpose registers are 64 bits wide
+  bool IsGP64bit;
+
+  // HasVFPU - Processor has a vector floating point unit.
+  bool HasVFPU;
+
+  // isLinux - Target system is Linux. Is false we consider ELFOS for now.
+  bool IsLinux;
+
+  /// Features related to the presence of specific instructions.
+
+  // HasSEInReg - SEB and SEH (signext in register) instructions.
+  bool HasSEInReg;
+
+  // HasCondMov - Conditional mov (MOVZ, MOVN) instructions.
+  bool HasCondMov;
+
+  // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu)
+  // instructions.
+  bool HasMulDivAdd;
+
+  // HasMinMax - MIN and MAX instructions.
+  bool HasMinMax;
+
+  // HasSwap - Byte and half swap instructions.
+  bool HasSwap;
+
+  // HasBitCount - Count leading '1' and '0' bits.
+  bool HasBitCount;
+
+  InstrItineraryData InstrItins;
+
+public:
+
+  /// Only O32 and EABI supported right now.
+  bool isABI_EABI() const { return MipsABI == EABI; }
+  bool isABI_O32() const { return MipsABI == O32; }
+  unsigned getTargetABI() const { return MipsABI; }
+
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  MipsSubtarget(const std::string &TT, const std::string &FS, bool little);
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  std::string ParseSubtargetFeatures(const std::string &FS,
+                                     const std::string &CPU);
+
+  bool isMips1() const { return MipsArchVersion == Mips1; }
+  bool isMips32() const { return MipsArchVersion >= Mips32; }
+  bool isMips32r2() const { return MipsArchVersion == Mips32r2; }
+
+  bool isLittle() const { return IsLittle; }
+  bool isFP64bit() const { return IsFP64bit; }
+  bool isGP64bit() const { return IsGP64bit; }
+  bool isGP32bit() const { return !IsGP64bit; }
+  bool isSingleFloat() const { return IsSingleFloat; }
+  bool isNotSingleFloat() const { return !IsSingleFloat; }
+  bool hasVFPU() const { return HasVFPU; }
+  bool isLinux() const { return IsLinux; }
+
+  /// Features related to the presence of specific instructions.
+  bool hasSEInReg()   const { return HasSEInReg; }
+  bool hasCondMov()   const { return HasCondMov; }
+  bool hasMulDivAdd() const { return HasMulDivAdd; }
+  bool hasMinMax()    const { return HasMinMax; }
+  bool hasSwap()      const { return HasSwap; }
+  bool hasBitCount()  const { return HasBitCount; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/Mips/MipsTargetMachine.cpp b/final/lib/Target/Mips/MipsTargetMachine.cpp
new file mode 100644
index 00000000000..7a2dd1f651d
--- /dev/null
+++ b/final/lib/Target/Mips/MipsTargetMachine.cpp
@@ -0,0 +1,77 @@
+//===-- MipsTargetMachine.cpp - Define TargetMachine for Mips -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about Mips target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "MipsMCAsmInfo.h"
+#include "MipsTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeMipsTarget() {
+  // Register the target.
+  RegisterTargetMachine<MipsTargetMachine> X(TheMipsTarget);
+  RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget);
+  RegisterAsmInfo<MipsMCAsmInfo> A(TheMipsTarget);
+  RegisterAsmInfo<MipsMCAsmInfo> B(TheMipselTarget);
+}
+
+// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
+// The stack is always 8 byte aligned
+// On function prologue, the stack is created by decrementing
+// its pointer. Once decremented, all references are done with positive
+// offset from the stack/frame pointer, using StackGrowsUp enables
+// an easier handling.
+// Using CodeModel::Large enables different CALL behavior.
+MipsTargetMachine::
+MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS,
+                  bool isLittle=false):
+  LLVMTargetMachine(T, TT),
+  Subtarget(TT, FS, isLittle),
+  DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32-n32") :
+                        std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")),
+  InstrInfo(*this),
+  FrameLowering(Subtarget),
+  TLInfo(*this), TSInfo(*this) {
+  // Abicall enables PIC by default
+  if (getRelocationModel() == Reloc::Default) {
+    if (Subtarget.isABI_O32())
+      setRelocationModel(Reloc::PIC_);
+    else
+      setRelocationModel(Reloc::Static);
+  }
+}
+
+MipselTargetMachine::
+MipselTargetMachine(const Target &T, const std::string &TT,
+                    const std::string &FS) :
+  MipsTargetMachine(T, TT, FS, true) {}
+
+// Install an instruction selector pass using
+// the ISelDag to gen Mips code.
+bool MipsTargetMachine::
+addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+{
+  PM.add(createMipsISelDag(*this));
+  return false;
+}
+
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
+// print out the code after the passes.
+bool MipsTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
+{
+  PM.add(createMipsDelaySlotFillerPass(*this));
+  return true;
+}
diff --git a/final/lib/Target/Mips/MipsTargetMachine.h b/final/lib/Target/Mips/MipsTargetMachine.h
new file mode 100644
index 00000000000..43ab7984520
--- /dev/null
+++ b/final/lib/Target/Mips/MipsTargetMachine.h
@@ -0,0 +1,78 @@
+//===-- MipsTargetMachine.h - Define TargetMachine for Mips -00--*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Mips specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSTARGETMACHINE_H
+#define MIPSTARGETMACHINE_H
+
+#include "MipsSubtarget.h"
+#include "MipsInstrInfo.h"
+#include "MipsISelLowering.h"
+#include "MipsFrameLowering.h"
+#include "MipsSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class formatted_raw_ostream;
+
+  class MipsTargetMachine : public LLVMTargetMachine {
+    MipsSubtarget       Subtarget;
+    const TargetData    DataLayout; // Calculates type size & alignment
+    MipsInstrInfo       InstrInfo;
+    MipsFrameLowering   FrameLowering;
+    MipsTargetLowering  TLInfo;
+    MipsSelectionDAGInfo TSInfo;
+  public:
+    MipsTargetMachine(const Target &T, const std::string &TT,
+                      const std::string &FS, bool isLittle);
+
+    virtual const MipsInstrInfo   *getInstrInfo()     const
+    { return &InstrInfo; }
+    virtual const TargetFrameLowering *getFrameLowering()     const
+    { return &FrameLowering; }
+    virtual const MipsSubtarget   *getSubtargetImpl() const
+    { return &Subtarget; }
+    virtual const TargetData      *getTargetData()    const
+    { return &DataLayout;}
+
+    virtual const MipsRegisterInfo *getRegisterInfo()  const {
+      return &InstrInfo.getRegisterInfo();
+    }
+
+    virtual const MipsTargetLowering *getTargetLowering() const {
+      return &TLInfo;
+    }
+
+    virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
+      return &TSInfo;
+    }
+
+    // Pass Pipeline Configuration
+    virtual bool addInstSelector(PassManagerBase &PM,
+                                 CodeGenOpt::Level OptLevel);
+    virtual bool addPreEmitPass(PassManagerBase &PM,
+                                CodeGenOpt::Level OptLevel);
+  };
+
+/// MipselTargetMachine - Mipsel target machine.
+///
+class MipselTargetMachine : public MipsTargetMachine {
+public:
+  MipselTargetMachine(const Target &T, const std::string &TT,
+                      const std::string &FS);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/Mips/MipsTargetObjectFile.cpp b/final/lib/Target/Mips/MipsTargetObjectFile.cpp
new file mode 100644
index 00000000000..cf5d1b58add
--- /dev/null
+++ b/final/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -0,0 +1,102 @@
+//===-- MipsTargetObjectFile.cpp - Mips object files ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsTargetObjectFile.h"
+#include "MipsSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+static cl::opt<unsigned>
+SSThreshold("mips-ssection-threshold", cl::Hidden,
+            cl::desc("Small data and bss section threshold size (default=8)"),
+            cl::init(8));
+
+void MipsTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+  SmallDataSection =
+    getContext().getELFSection(".sdata", ELF::SHT_PROGBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+                               SectionKind::getDataRel());
+
+  SmallBSSSection =
+    getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
+                               ELF::SHF_WRITE |ELF::SHF_ALLOC,
+                               SectionKind::getBSS());
+
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
+// gp_rel operator.
+static bool IsInSmallSection(uint64_t Size) {
+  return Size > 0 && Size <= SSThreshold;
+}
+
+bool MipsTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV,
+                                                const TargetMachine &TM) const {
+  if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+    return false;
+
+  return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global address should be
+/// placed into small data/bss section.
+bool MipsTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+                       SectionKind Kind) const {
+
+  // Only use small section for non linux targets.
+  const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>();
+  if (Subtarget.isLinux())
+    return false;
+
+  // Only global variables, not functions.
+  const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+  if (!GVA)
+    return false;
+
+  // We can only do this for datarel or BSS objects for now.
+  if (!Kind.isBSS() && !Kind.isDataRel())
+    return false;
+
+  // If this is a internal constant string, there is a special
+  // section for it, but not in small data/bss.
+  if (Kind.isMergeable1ByteCString())
+    return false;
+
+  const Type *Ty = GV->getType()->getElementType();
+  return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+}
+
+
+
+const MCSection *MipsTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+                       Mangler *Mang, const TargetMachine &TM) const {
+  // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
+  // sections?
+
+  // Handle Small Section classification here.
+  if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallBSSSection;
+  if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+    return SmallDataSection;
+
+  // Otherwise, we work the same as ELF.
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/final/lib/Target/Mips/MipsTargetObjectFile.h b/final/lib/Target/Mips/MipsTargetObjectFile.h
new file mode 100644
index 00000000000..c394a9dc02e
--- /dev/null
+++ b/final/lib/Target/Mips/MipsTargetObjectFile.h
@@ -0,0 +1,41 @@
+//===-- llvm/Target/MipsTargetObjectFile.h - Mips Object Info ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
+#define LLVM_TARGET_MIPS_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+  class MipsTargetObjectFile : public TargetLoweringObjectFileELF {
+    const MCSection *SmallDataSection;
+    const MCSection *SmallBSSSection;
+  public:
+
+    void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+
+    /// IsGlobalInSmallSection - Return true if this global address should be
+    /// placed into small data/bss section.
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM, SectionKind Kind)const;
+    bool IsGlobalInSmallSection(const GlobalValue *GV,
+                                const TargetMachine &TM) const;
+
+    const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+                                            SectionKind Kind,
+                                            Mangler *Mang,
+                                            const TargetMachine &TM) const;
+
+    // TODO: Classify globals as mips wishes.
+  };
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/Mips/TargetInfo/CMakeLists.txt b/final/lib/Target/Mips/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..6e5d56ba4ae
--- /dev/null
+++ b/final/lib/Target/Mips/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMipsInfo
+  MipsTargetInfo.cpp
+  )
+
+add_dependencies(LLVMMipsInfo MipsCodeGenTable_gen)
diff --git a/final/lib/Target/Mips/TargetInfo/Makefile b/final/lib/Target/Mips/TargetInfo/Makefile
new file mode 100644
index 00000000000..32f4e1695b1
--- /dev/null
+++ b/final/lib/Target/Mips/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Mips/TargetInfo/Makefile -----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMipsInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/final/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
new file mode 100644
index 00000000000..a8d6fe94b1a
--- /dev/null
+++ b/final/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- MipsTargetInfo.cpp - Mips Target Implementation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMipsTarget, llvm::TheMipselTarget;
+
+extern "C" void LLVMInitializeMipsTargetInfo() {
+  RegisterTarget<Triple::mips> X(TheMipsTarget, "mips", "Mips");
+
+  RegisterTarget<Triple::mipsel> Y(TheMipselTarget, "mipsel", "Mipsel");
+}
diff --git a/final/lib/Target/PTX/CMakeLists.txt b/final/lib/Target/PTX/CMakeLists.txt
new file mode 100644
index 00000000000..331266da30b
--- /dev/null
+++ b/final/lib/Target/PTX/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(LLVM_TARGET_DEFINITIONS PTX.td)
+
+tablegen(PTXGenAsmWriter.inc -gen-asm-writer)
+tablegen(PTXGenDAGISel.inc -gen-dag-isel)
+tablegen(PTXGenInstrInfo.inc -gen-instr-desc)
+tablegen(PTXGenInstrNames.inc -gen-instr-enums)
+tablegen(PTXGenRegisterInfo.inc -gen-register-desc)
+tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(PTXGenRegisterNames.inc -gen-register-enums)
+tablegen(PTXGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(PTXCodeGen
+  PTXAsmPrinter.cpp
+  PTXISelDAGToDAG.cpp
+  PTXISelLowering.cpp
+  PTXInstrInfo.cpp
+  PTXFrameLowering.cpp
+  PTXMCAsmInfo.cpp
+  PTXMCAsmStreamer.cpp
+  PTXMFInfoExtract.cpp
+  PTXRegisterInfo.cpp
+  PTXSubtarget.cpp
+  PTXTargetMachine.cpp
+  )
+
+add_subdirectory(TargetInfo)
diff --git a/final/lib/Target/PTX/Makefile b/final/lib/Target/PTX/Makefile
new file mode 100644
index 00000000000..2c40d699409
--- /dev/null
+++ b/final/lib/Target/PTX/Makefile
@@ -0,0 +1,26 @@
+##===- lib/Target/PTX/Makefile -----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMPTXCodeGen
+TARGET = PTX
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = PTXGenAsmWriter.inc \
+		PTXGenDAGISel.inc \
+		PTXGenInstrInfo.inc \
+		PTXGenInstrNames.inc \
+		PTXGenRegisterInfo.inc \
+		PTXGenRegisterInfo.h.inc \
+		PTXGenRegisterNames.inc \
+		PTXGenSubtarget.inc
+
+DIRS = TargetInfo
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/PTX/PTX.h b/final/lib/Target/PTX/PTX.h
new file mode 100644
index 00000000000..19385ba1ff8
--- /dev/null
+++ b/final/lib/Target/PTX/PTX.h
@@ -0,0 +1,49 @@
+//===-- PTX.h - Top-level interface for PTX representation ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PTX back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_H
+#define PTX_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class PTXTargetMachine;
+  class FunctionPass;
+
+  namespace PTX {
+    enum StateSpace {
+      GLOBAL = 0, // default to global state space
+      CONSTANT = 1,
+      LOCAL = 2,
+      PARAMETER = 3,
+      SHARED = 4
+    };
+  } // namespace PTX
+
+  FunctionPass *createPTXISelDag(PTXTargetMachine &TM,
+                                 CodeGenOpt::Level OptLevel);
+
+  FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
+                                       CodeGenOpt::Level OptLevel);
+
+  extern Target ThePTXTarget;
+} // namespace llvm;
+
+// Defines symbolic names for PTX registers.
+#include "PTXGenRegisterNames.inc"
+
+// Defines symbolic names for the PTX instructions.
+#include "PTXGenInstrNames.inc"
+
+#endif // PTX_H
diff --git a/final/lib/Target/PTX/PTX.td b/final/lib/Target/PTX/PTX.td
new file mode 100644
index 00000000000..12febcb13de
--- /dev/null
+++ b/final/lib/Target/PTX/PTX.td
@@ -0,0 +1,83 @@
+//===- PTX.td - Describe the PTX Target Machine ---------------*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the PTX target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features.
+//===----------------------------------------------------------------------===//
+
+//===- Architectural Features ---------------------------------------------===//
+
+def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
+                                     "Do not demote .f64 to .f32">;
+
+def Feature64Bit : SubtargetFeature<"64bit", "Use64BitAddresses", "true",
+                                    "Use 64-bit integer types for addresses.">;
+
+//===- PTX Version --------------------------------------------------------===//
+
+def FeaturePTX14 : SubtargetFeature<"ptx14", "PTXVersion", "PTX_VERSION_1_4",
+                                    "Use PTX Language Version 1.4">;
+
+def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
+                                    "Use PTX Language Version 2.0",
+                                    [FeaturePTX14]>;
+
+def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
+                                    "Use PTX Language Version 2.1",
+                                    [FeaturePTX20]>;
+
+//===- PTX Shader Model ---------------------------------------------------===//
+
+def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0",
+                                   "Enable Shader Model 1.0 compliance">;
+def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3",
+                                   "Enable Shader Model 1.3 compliance",
+                                   [FeatureSM10, FeatureDouble]>;
+def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0",
+                                   "Enable Shader Model 2.0 compliance",
+                                   [FeatureSM13]>;
+
+//===----------------------------------------------------------------------===//
+// PTX supported processors.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+  : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic", []>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PTXRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "PTXInstrInfo.td"
+
+def PTXInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def PTX : Target {
+  let InstructionSet = PTXInstrInfo;
+}
diff --git a/final/lib/Target/PTX/PTXAsmPrinter.cpp b/final/lib/Target/PTX/PTXAsmPrinter.cpp
new file mode 100644
index 00000000000..2c4c79b2f10
--- /dev/null
+++ b/final/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -0,0 +1,402 @@
+//===-- PTXAsmPrinter.cpp - PTX LLVM assembly writer ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PTX assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-asm-printer"
+
+#include "PTX.h"
+#include "PTXMachineFunctionInfo.h"
+#include "PTXTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class PTXAsmPrinter : public AsmPrinter {
+public:
+  explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer) {}
+
+  const char *getPassName() const { return "PTX Assembly Printer"; }
+
+  bool doFinalization(Module &M);
+
+  virtual void EmitStartOfAsmFile(Module &M);
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  virtual void EmitFunctionBodyStart();
+  virtual void EmitFunctionBodyEnd() { OutStreamer.EmitRawText(Twine("}")); }
+
+  virtual void EmitInstruction(const MachineInstr *MI);
+
+  void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+  void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+                       const char *Modifier = 0);
+  void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+                         const char *Modifier = 0);
+
+  // autogen'd.
+  void printInstruction(const MachineInstr *MI, raw_ostream &OS);
+  static const char *getRegisterName(unsigned RegNo);
+
+private:
+  void EmitVariableDeclaration(const GlobalVariable *gv);
+  void EmitFunctionDeclaration();
+}; // class PTXAsmPrinter
+} // namespace
+
+static const char PARAM_PREFIX[] = "__param_";
+
+static const char *getRegisterTypeName(unsigned RegNo) {
+#define TEST_REGCLS(cls, clsstr)                \
+  if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
+  TEST_REGCLS(Preds, pred);
+  TEST_REGCLS(RRegu16, u16);
+  TEST_REGCLS(RRegu32, u32);
+  TEST_REGCLS(RRegu64, u64);
+  TEST_REGCLS(RRegf32, f32);
+  TEST_REGCLS(RRegf64, f64);
+#undef TEST_REGCLS
+
+  llvm_unreachable("Not in any register class!");
+  return NULL;
+}
+
+static const char *getInstructionTypeName(const MachineInstr *MI) {
+  for (int i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (MO.getType() == MachineOperand::MO_Register)
+      return getRegisterTypeName(MO.getReg());
+  }
+
+  llvm_unreachable("No reg operand found in instruction!");
+  return NULL;
+}
+
+static const char *getStateSpaceName(unsigned addressSpace) {
+  switch (addressSpace) {
+  default: llvm_unreachable("Unknown state space");
+  case PTX::GLOBAL:    return "global";
+  case PTX::CONSTANT:  return "const";
+  case PTX::LOCAL:     return "local";
+  case PTX::PARAMETER: return "param";
+  case PTX::SHARED:    return "shared";
+  }
+  return NULL;
+}
+
+static const char *getTypeName(const Type* type) {
+  while (true) {
+    switch (type->getTypeID()) {
+      default: llvm_unreachable("Unknown type");
+      case Type::FloatTyID: return ".f32";
+      case Type::DoubleTyID: return ".f64";
+      case Type::IntegerTyID:
+        switch (type->getPrimitiveSizeInBits()) {
+          default: llvm_unreachable("Unknown integer bit-width");
+          case 16: return ".u16";
+          case 32: return ".u32";
+          case 64: return ".u64";
+        }
+      case Type::ArrayTyID:
+      case Type::PointerTyID:
+        type = dyn_cast<const SequentialType>(type)->getElementType();
+        break;
+    }
+  }
+  return NULL;
+}
+
+bool PTXAsmPrinter::doFinalization(Module &M) {
+  // XXX Temproarily remove global variables so that doFinalization() will not
+  // emit them again (global variables are emitted at beginning).
+
+  Module::GlobalListType &global_list = M.getGlobalList();
+  int i, n = global_list.size();
+  GlobalVariable **gv_array = new GlobalVariable* [n];
+
+  // first, back-up GlobalVariable in gv_array
+  i = 0;
+  for (Module::global_iterator I = global_list.begin(), E = global_list.end();
+       I != E; ++I)
+    gv_array[i++] = &*I;
+
+  // second, empty global_list
+  while (!global_list.empty())
+    global_list.remove(global_list.begin());
+
+  // call doFinalization
+  bool ret = AsmPrinter::doFinalization(M);
+
+  // now we restore global variables
+  for (i = 0; i < n; i ++)
+    global_list.insert(global_list.end(), gv_array[i]);
+
+  delete[] gv_array;
+  return ret;
+}
+
+void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
+{
+  const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
+
+  OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString()));
+  OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
+                                (ST.supportsDouble() ? ""
+                                                     : ", map_f64_to_f32")));
+  OutStreamer.AddBlankLine();
+
+  // declare global variables
+  for (Module::const_global_iterator i = M.global_begin(), e = M.global_end();
+       i != e; ++i)
+    EmitVariableDeclaration(i);
+}
+
+bool PTXAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  SetupMachineFunction(MF);
+  EmitFunctionDeclaration();
+  EmitFunctionBody();
+  return false;
+}
+
+void PTXAsmPrinter::EmitFunctionBodyStart() {
+  OutStreamer.EmitRawText(Twine("{"));
+
+  const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+
+  // Print local variable definition
+  for (PTXMachineFunctionInfo::reg_iterator
+       i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); i != e; ++ i) {
+    unsigned reg = *i;
+
+    std::string def = "\t.reg .";
+    def += getRegisterTypeName(reg);
+    def += ' ';
+    def += getRegisterName(reg);
+    def += ';';
+    OutStreamer.EmitRawText(Twine(def));
+  }
+}
+
+void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  std::string str;
+  str.reserve(64);
+
+  // Write instruction to str
+  raw_string_ostream OS(str);
+  printInstruction(MI, OS);
+  OS << ';';
+  OS.flush();
+
+  // Replace "%type" if found
+  size_t pos;
+  if ((pos = str.find("%type")) != std::string::npos)
+    str.replace(pos, /*strlen("%type")==*/5, getInstructionTypeName(MI));
+
+  StringRef strref = StringRef(str);
+  OutStreamer.EmitRawText(strref);
+}
+
+void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                 raw_ostream &OS) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+
+  switch (MO.getType()) {
+    default:
+      llvm_unreachable("<unknown operand type>");
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      OS << *Mang->getSymbol(MO.getGlobal());
+      break;
+    case MachineOperand::MO_Immediate:
+      OS << (int) MO.getImm();
+      break;
+    case MachineOperand::MO_Register:
+      OS << getRegisterName(MO.getReg());
+      break;
+    case MachineOperand::MO_FPImmediate:
+      APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt();
+      bool  isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID;
+      // Emit 0F for 32-bit floats and 0D for 64-bit doubles.
+      if (isFloat) {
+        OS << "0F";
+      }
+      else {
+        OS << "0D";
+      }
+      // Emit the encoded floating-point value.
+      if (constFP.getZExtValue() > 0) {
+        OS << constFP.toString(16, false);
+      }
+      else {
+        OS << "00000000";
+        // If We have a double-precision zero, pad to 8-bytes.
+        if (!isFloat) {
+          OS << "00000000";
+        }
+      }
+      break;
+  }
+}
+
+void PTXAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+                                    raw_ostream &OS, const char *Modifier) {
+  printOperand(MI, opNum, OS);
+
+  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+    return; // don't print "+0"
+
+  OS << "+";
+  printOperand(MI, opNum+1, OS);
+}
+
+void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum,
+                                      raw_ostream &OS, const char *Modifier) {
+  OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+}
+
+void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
+  // Check to see if this is a special global used by LLVM, if so, emit it.
+  if (EmitSpecialLLVMGlobal(gv))
+    return;
+
+  MCSymbol *gvsym = Mang->getSymbol(gv);
+
+  assert(gvsym->isUndefined() && "Cannot define a symbol twice!");
+
+  std::string decl;
+
+  // check if it is defined in some other translation unit
+  if (gv->isDeclaration())
+    decl += ".extern ";
+
+  // state space: e.g., .global
+  decl += ".";
+  decl += getStateSpaceName(gv->getType()->getAddressSpace());
+  decl += " ";
+
+  // alignment (optional)
+  unsigned alignment = gv->getAlignment();
+  if (alignment != 0) {
+    decl += ".align ";
+    decl += utostr(Log2_32(gv->getAlignment()));
+    decl += " ";
+  }
+
+  decl += getTypeName(gv->getType());
+  decl += " ";
+
+  decl += gvsym->getName();
+
+  if (ArrayType::classof(gv->getType()) || PointerType::classof(gv->getType()))
+    decl += "[]";
+
+  decl += ";";
+
+  OutStreamer.EmitRawText(Twine(decl));
+
+  OutStreamer.AddBlankLine();
+}
+
+void PTXAsmPrinter::EmitFunctionDeclaration() {
+  // The function label could have already been emitted if two symbols end up
+  // conflicting due to asm renaming.  Detect this and emit an error.
+  if (!CurrentFnSym->isUndefined()) {
+    report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+                       "' label emitted multiple times to assembly file");
+    return;
+  }
+
+  const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
+  const bool isKernel = MFI->isKernel();
+  unsigned reg;
+
+  std::string decl = isKernel ? ".entry" : ".func";
+
+  // Print return register
+  reg = MFI->retReg();
+  if (!isKernel && reg != PTX::NoRegister) {
+    decl += " (.reg ."; // FIXME: could it return in .param space?
+    decl += getRegisterTypeName(reg);
+    decl += " ";
+    decl += getRegisterName(reg);
+    decl += ")";
+  }
+
+  // Print function name
+  decl += " ";
+  decl += CurrentFnSym->getName().str();
+
+  // Print parameter list
+  if (!MFI->argRegEmpty()) {
+    decl += " (";
+    if (isKernel) {
+      unsigned cnt = 0;
+      //for (int i = 0, e = MFI->getNumArg(); i != e; ++i) {
+      for(PTXMachineFunctionInfo::reg_reverse_iterator
+          i = MFI->argRegReverseBegin(), e = MFI->argRegReverseEnd(), b = i;
+          i != e; ++i) {
+        reg = *i;
+        assert(reg != PTX::NoRegister && "Not a valid register!");
+        if (i != b)
+          decl += ", ";
+        decl += ".param .";
+        decl += getRegisterTypeName(reg);
+        decl += " ";
+        decl += PARAM_PREFIX;
+        decl += utostr(++cnt);
+      }
+    } else {
+      for (PTXMachineFunctionInfo::reg_reverse_iterator
+           i = MFI->argRegReverseBegin(), e = MFI->argRegReverseEnd(), b = i;
+           i != e; ++i) {
+        reg = *i;
+        assert(reg != PTX::NoRegister && "Not a valid register!");
+        if (i != b)
+          decl += ", ";
+        decl += ".reg .";
+        decl += getRegisterTypeName(reg);
+        decl += " ";
+        decl += getRegisterName(reg);
+      }
+    }
+    decl += ")";
+  }
+
+  OutStreamer.EmitRawText(Twine(decl));
+}
+
+#include "PTXGenAsmWriter.inc"
+
+// Force static initialization.
+extern "C" void LLVMInitializePTXAsmPrinter() {
+  RegisterAsmPrinter<PTXAsmPrinter> X(ThePTXTarget);
+}
diff --git a/final/lib/Target/PTX/PTXFrameLowering.cpp b/final/lib/Target/PTX/PTXFrameLowering.cpp
new file mode 100644
index 00000000000..b621b9d634d
--- /dev/null
+++ b/final/lib/Target/PTX/PTXFrameLowering.cpp
@@ -0,0 +1,24 @@
+//=======- PTXFrameLowering.cpp - PTX Frame Information -------*- C++ -*-=====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXFrameLowering.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+using namespace llvm;
+
+void PTXFrameLowering::emitPrologue(MachineFunction &MF) const {
+}
+
+void PTXFrameLowering::emitEpilogue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {
+}
diff --git a/final/lib/Target/PTX/PTXFrameLowering.h b/final/lib/Target/PTX/PTXFrameLowering.h
new file mode 100644
index 00000000000..9320676150d
--- /dev/null
+++ b/final/lib/Target/PTX/PTXFrameLowering.h
@@ -0,0 +1,44 @@
+//===--- PTXFrameLowering.h - Define frame lowering for PTX --*- C++ -*----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_FRAMEINFO_H
+#define PTX_FRAMEINFO_H
+
+#include "PTX.h"
+#include "PTXSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class PTXSubtarget;
+
+class PTXFrameLowering : public TargetFrameLowering {
+protected:
+  const PTXSubtarget &STI;
+
+public:
+  explicit PTXFrameLowering(const PTXSubtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2),
+      STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const { return false; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/PTX/PTXISelDAGToDAG.cpp b/final/lib/Target/PTX/PTXISelDAGToDAG.cpp
new file mode 100644
index 00000000000..fe2d25a3b43
--- /dev/null
+++ b/final/lib/Target/PTX/PTXISelDAGToDAG.cpp
@@ -0,0 +1,180 @@
+//===-- PTXISelDAGToDAG.cpp - A dag to dag inst selector for PTX ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the PTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+// PTXDAGToDAGISel - PTX specific code to select PTX machine
+// instructions for SelectionDAG operations.
+class PTXDAGToDAGISel : public SelectionDAGISel {
+  public:
+    PTXDAGToDAGISel(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel);
+
+    virtual const char *getPassName() const {
+      return "PTX DAG->DAG Pattern Instruction Selection";
+    }
+
+    SDNode *Select(SDNode *Node);
+
+    // Complex Pattern Selectors.
+    bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2);
+    bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset);
+    bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset);
+
+    // Include the pieces auto'gened from the target description
+#include "PTXGenDAGISel.inc"
+
+  private:
+    SDNode *SelectREAD_PARAM(SDNode *Node);
+
+    bool isImm(const SDValue &operand);
+    bool SelectImm(const SDValue &operand, SDValue &imm);
+
+    const PTXSubtarget& getSubtarget() const;
+}; // class PTXDAGToDAGISel
+} // namespace
+
+// createPTXISelDag - This pass converts a legalized DAG into a
+// PTX-specific DAG, ready for instruction scheduling
+FunctionPass *llvm::createPTXISelDag(PTXTargetMachine &TM,
+                                     CodeGenOpt::Level OptLevel) {
+  return new PTXDAGToDAGISel(TM, OptLevel);
+}
+
+PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM,
+                                 CodeGenOpt::Level OptLevel)
+  : SelectionDAGISel(TM, OptLevel) {}
+
+SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
+  if (Node->getOpcode() == PTXISD::READ_PARAM)
+    return SelectREAD_PARAM(Node);
+  else
+    return SelectCode(Node);
+}
+
+SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) {
+  SDValue  index = Node->getOperand(1);
+  DebugLoc dl    = Node->getDebugLoc();
+  unsigned opcode;
+
+  if (index.getOpcode() != ISD::TargetConstant)
+    llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant");
+
+  if (Node->getValueType(0) == MVT::i16) {
+    opcode = PTX::LDpiU16;
+  }
+  else if (Node->getValueType(0) == MVT::i32) {
+    opcode = PTX::LDpiU32;
+  }
+  else if (Node->getValueType(0) == MVT::i64) {
+    opcode = PTX::LDpiU64;
+  }
+  else if (Node->getValueType(0) == MVT::f32) {
+    opcode = PTX::LDpiF32;
+  }
+  else if (Node->getValueType(0) == MVT::f64) {
+    opcode = PTX::LDpiF64;
+  }
+  else {
+    llvm_unreachable("Unknown parameter type for ld.param");
+  }
+
+  return PTXInstrInfo::
+    GetPTXMachineNode(CurDAG, opcode, dl, Node->getValueType(0), index);
+}
+
+// Match memory operand of the form [reg+reg]
+bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
+  if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 ||
+      isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1)))
+    return false;
+
+  R1 = Addr;
+  R2 = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
+bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
+                                   SDValue &Offset) {
+  if (Addr.getOpcode() != ISD::ADD) {
+    // let SelectADDRii handle the [imm] case
+    if (isImm(Addr))
+      return false;
+    // it is [reg]
+    Base = Addr;
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  if (Addr.getNumOperands() < 2)
+    return false;
+
+  // let SelectADDRii handle the [imm+imm] case
+  if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1)))
+    return false;
+
+  // try [reg+imm] and [imm+reg]
+  for (int i = 0; i < 2; i ++)
+    if (SelectImm(Addr.getOperand(1-i), Offset)) {
+      Base = Addr.getOperand(i);
+      return true;
+    }
+
+  // neither [reg+imm] nor [imm+reg]
+  return false;
+}
+
+// Match memory operand of the form [imm+imm] and [imm]
+bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
+                                   SDValue &Offset) {
+  // is [imm+imm]?
+  if (Addr.getOpcode() == ISD::ADD) {
+    return SelectImm(Addr.getOperand(0), Base) &&
+           SelectImm(Addr.getOperand(1), Offset);
+  }
+
+  // is [imm]?
+  if (SelectImm(Addr, Base)) {
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+
+  return false;
+}
+
+bool PTXDAGToDAGISel::isImm(const SDValue &operand) {
+  return ConstantSDNode::classof(operand.getNode());
+}
+
+bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) {
+  SDNode *node = operand.getNode();
+  if (!ConstantSDNode::classof(node))
+    return false;
+
+  ConstantSDNode *CN = cast<ConstantSDNode>(node);
+  imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(), MVT::i32);
+  return true;
+}
+
+const PTXSubtarget& PTXDAGToDAGISel::getSubtarget() const
+{
+  return TM.getSubtarget<PTXSubtarget>();
+}
+
diff --git a/final/lib/Target/PTX/PTXISelLowering.cpp b/final/lib/Target/PTX/PTXISelLowering.cpp
new file mode 100644
index 00000000000..147b2a82cfc
--- /dev/null
+++ b/final/lib/Target/PTX/PTXISelLowering.cpp
@@ -0,0 +1,239 @@
+//===-- PTXISelLowering.cpp - PTX DAG Lowering Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTXTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXISelLowering.h"
+#include "PTXMachineFunctionInfo.h"
+#include "PTXRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
+  : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+  // Set up the register classes.
+  addRegisterClass(MVT::i1,  PTX::PredsRegisterClass);
+  addRegisterClass(MVT::i16, PTX::RRegu16RegisterClass);
+  addRegisterClass(MVT::i32, PTX::RRegu32RegisterClass);
+  addRegisterClass(MVT::i64, PTX::RRegu64RegisterClass);
+  addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass);
+  addRegisterClass(MVT::f64, PTX::RRegf64RegisterClass);
+
+  setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+
+  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+
+  // Customize translation of memory addresses
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+
+  // Compute derived properties from the register classes
+  computeRegisterProperties();
+}
+
+SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+    default:                 llvm_unreachable("Unimplemented operand");
+    case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+  }
+}
+
+const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+    default:
+      llvm_unreachable("Unknown opcode");
+    case PTXISD::READ_PARAM:
+      return "PTXISD::READ_PARAM";
+    case PTXISD::EXIT:
+      return "PTXISD::EXIT";
+    case PTXISD::RET:
+      return "PTXISD::RET";
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                      Custom Lower Operation
+//===----------------------------------------------------------------------===//
+
+SDValue PTXTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+  EVT PtrVT = getPointerTy();
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  return DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct argmap_entry {
+  MVT::SimpleValueType VT;
+  TargetRegisterClass *RC;
+  TargetRegisterClass::iterator loc;
+
+  argmap_entry(MVT::SimpleValueType _VT, TargetRegisterClass *_RC)
+    : VT(_VT), RC(_RC), loc(_RC->begin()) {}
+
+  void reset() { loc = RC->begin(); }
+  bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
+} argmap[] = {
+  argmap_entry(MVT::i1,  PTX::PredsRegisterClass),
+  argmap_entry(MVT::i16, PTX::RRegu16RegisterClass),
+  argmap_entry(MVT::i32, PTX::RRegu32RegisterClass),
+  argmap_entry(MVT::i64, PTX::RRegu64RegisterClass),
+  argmap_entry(MVT::f32, PTX::RRegf32RegisterClass),
+  argmap_entry(MVT::f64, PTX::RRegf64RegisterClass)
+};
+}                               // end anonymous namespace
+
+SDValue PTXTargetLowering::
+  LowerFormalArguments(SDValue Chain,
+                       CallingConv::ID CallConv,
+                       bool isVarArg,
+                       const SmallVectorImpl<ISD::InputArg> &Ins,
+                       DebugLoc dl,
+                       SelectionDAG &DAG,
+                       SmallVectorImpl<SDValue> &InVals) const {
+  if (isVarArg) llvm_unreachable("PTX does not support varargs");
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+
+  switch (CallConv) {
+    default:
+      llvm_unreachable("Unsupported calling convention");
+      break;
+    case CallingConv::PTX_Kernel:
+      MFI->setKernel(true);
+      break;
+    case CallingConv::PTX_Device:
+      MFI->setKernel(false);
+      break;
+  }
+
+  // Make sure we don't add argument registers twice
+  if (MFI->isDoneAddArg())
+    llvm_unreachable("cannot add argument registers twice");
+
+  // Reset argmap before allocation
+  for (struct argmap_entry *i = argmap, *e = argmap + array_lengthof(argmap);
+       i != e; ++ i)
+    i->reset();
+
+  for (int i = 0, e = Ins.size(); i != e; ++ i) {
+    MVT::SimpleValueType VT = Ins[i].VT.SimpleTy;
+
+    struct argmap_entry *entry = std::find(argmap,
+                                           argmap + array_lengthof(argmap), VT);
+    if (entry == argmap + array_lengthof(argmap))
+      llvm_unreachable("Type of argument is not supported");
+
+    if (MFI->isKernel() && entry->RC == PTX::PredsRegisterClass)
+      llvm_unreachable("cannot pass preds to kernel");
+
+    MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+
+    unsigned preg = *++(entry->loc); // allocate start from register 1
+    unsigned vreg = RegInfo.createVirtualRegister(entry->RC);
+    RegInfo.addLiveIn(preg, vreg);
+
+    MFI->addArgReg(preg);
+
+    SDValue inval;
+    if (MFI->isKernel())
+      inval = DAG.getNode(PTXISD::READ_PARAM, dl, VT, Chain,
+                          DAG.getTargetConstant(i, MVT::i32));
+    else
+      inval = DAG.getCopyFromReg(Chain, dl, vreg, VT);
+    InVals.push_back(inval);
+  }
+
+  MFI->doneAddArg();
+
+  return Chain;
+}
+
+SDValue PTXTargetLowering::
+  LowerReturn(SDValue Chain,
+              CallingConv::ID CallConv,
+              bool isVarArg,
+              const SmallVectorImpl<ISD::OutputArg> &Outs,
+              const SmallVectorImpl<SDValue> &OutVals,
+              DebugLoc dl,
+              SelectionDAG &DAG) const {
+  if (isVarArg) llvm_unreachable("PTX does not support varargs");
+
+  switch (CallConv) {
+    default:
+      llvm_unreachable("Unsupported calling convention.");
+    case CallingConv::PTX_Kernel:
+      assert(Outs.size() == 0 && "Kernel must return void.");
+      return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
+    case CallingConv::PTX_Device:
+      assert(Outs.size() <= 1 && "Can at most return one value.");
+      break;
+  }
+
+  // PTX_Device
+
+  // return void
+  if (Outs.size() == 0)
+    return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
+
+  SDValue Flag;
+  unsigned reg;
+
+  if (Outs[0].VT == MVT::i16) {
+    reg = PTX::RH0;
+  }
+  else if (Outs[0].VT == MVT::i32) {
+    reg = PTX::R0;
+  }
+  else if (Outs[0].VT == MVT::i64) {
+    reg = PTX::RD0;
+  }
+  else if (Outs[0].VT == MVT::f32) {
+    reg = PTX::F0;
+  }
+  else if (Outs[0].VT == MVT::f64) {
+    reg = PTX::FD0;
+  }
+  else {
+    assert(false && "Can return only basic types");
+  }
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+  MFI->setRetReg(reg);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty())
+    DAG.getMachineFunction().getRegInfo().addLiveOut(reg);
+
+  // Copy the result values into the output registers
+  Chain = DAG.getCopyToReg(Chain, dl, reg, OutVals[0], Flag);
+
+  // Guarantee that all emitted copies are stuck together,
+  // avoiding something bad
+  Flag = Chain.getValue(1);
+
+  return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
+}
diff --git a/final/lib/Target/PTX/PTXISelLowering.h b/final/lib/Target/PTX/PTXISelLowering.h
new file mode 100644
index 00000000000..b03a9f66630
--- /dev/null
+++ b/final/lib/Target/PTX/PTXISelLowering.h
@@ -0,0 +1,67 @@
+//==-- PTXISelLowering.h - PTX DAG Lowering Interface ------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PTX uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_ISEL_LOWERING_H
+#define PTX_ISEL_LOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+class PTXSubtarget;
+class PTXTargetMachine;
+
+namespace PTXISD {
+  enum NodeType {
+    FIRST_NUMBER = ISD::BUILTIN_OP_END,
+    READ_PARAM,
+    EXIT,
+    RET
+  };
+} // namespace PTXISD
+
+class PTXTargetLowering : public TargetLowering {
+  public:
+    explicit PTXTargetLowering(TargetMachine &TM);
+
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    virtual unsigned getFunctionAlignment(const Function *F) const {
+      return 2; }
+
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv,
+                           bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl,
+                           SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv,
+                  bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl,
+                  SelectionDAG &DAG) const;
+
+  private:
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+}; // class PTXTargetLowering
+} // namespace llvm
+
+#endif // PTX_ISEL_LOWERING_H
diff --git a/final/lib/Target/PTX/PTXInstrFormats.td b/final/lib/Target/PTX/PTXInstrFormats.td
new file mode 100644
index 00000000000..e4e099987e8
--- /dev/null
+++ b/final/lib/Target/PTX/PTXInstrFormats.td
@@ -0,0 +1,24 @@
+//===- PTXInstrFormats.td - PTX Instruction Formats ----------*- tblgen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Predicate operand, default to (0, 0) = (zero-reg, always).
+// Leave PrintMethod empty; predicate printing is defined elsewhere.
+def pred : PredicateOperand<OtherVT, (ops Preds, i32imm),
+                                     (ops (i1 zero_reg), (i32 0))>;
+
+let Namespace = "PTX" in {
+  class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern>
+    : Instruction {
+      dag OutOperandList = oops;
+      dag InOperandList = !con(iops, (ins pred:$_p));
+      let AsmString = asmstr; // Predicate printing is defined elsewhere.
+      let Pattern = pattern;
+      let isPredicable = 1;
+  }
+}
diff --git a/final/lib/Target/PTX/PTXInstrInfo.cpp b/final/lib/Target/PTX/PTXInstrInfo.cpp
new file mode 100644
index 00000000000..8b473a1b33d
--- /dev/null
+++ b/final/lib/Target/PTX/PTXInstrInfo.cpp
@@ -0,0 +1,97 @@
+//===- PTXInstrInfo.cpp - PTX Instruction Information ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXInstrInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+#include "PTXGenInstrInfo.inc"
+
+PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM)
+  : TargetInstrInfoImpl(PTXInsts, array_lengthof(PTXInsts)),
+    RI(_TM, *this), TM(_TM) {}
+
+static const struct map_entry {
+  const TargetRegisterClass *cls;
+  const int opcode;
+} map[] = {
+  { &PTX::RRegu16RegClass, PTX::MOVU16rr },
+  { &PTX::RRegu32RegClass, PTX::MOVU32rr },
+  { &PTX::RRegu64RegClass, PTX::MOVU64rr },
+  { &PTX::RRegf32RegClass, PTX::MOVF32rr },
+  { &PTX::RRegf64RegClass, PTX::MOVF64rr },
+  { &PTX::PredsRegClass,   PTX::MOVPREDrr }
+};
+
+void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator I, DebugLoc DL,
+                               unsigned DstReg, unsigned SrcReg,
+                               bool KillSrc) const {
+  for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) {
+    if (map[i].cls->contains(DstReg, SrcReg)) {
+      BuildMI(MBB, I, DL,
+              get(map[i].opcode), DstReg).addReg(SrcReg,
+                                                 getKillRegState(KillSrc));
+      return;
+    }
+  }
+
+  llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator I,
+                                unsigned DstReg, unsigned SrcReg,
+                                const TargetRegisterClass *DstRC,
+                                const TargetRegisterClass *SrcRC,
+                                DebugLoc DL) const {
+  if (DstRC != SrcRC)
+    return false;
+
+  for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
+    if (DstRC == map[i].cls) {
+      MachineInstr *MI = BuildMI(MBB, I, DL, get(map[i].opcode),
+                                 DstReg).addReg(SrcReg);
+      if (MI->findFirstPredOperandIdx() == -1) {
+        MI->addOperand(MachineOperand::CreateReg(0, false));
+        MI->addOperand(MachineOperand::CreateImm(/*IsInv=*/0));
+      }
+      return true;
+    }
+
+  return false;
+}
+
+bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
+                               unsigned &SrcReg, unsigned &DstReg,
+                               unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+  switch (MI.getOpcode()) {
+    default:
+      return false;
+    case PTX::MOVU16rr:
+    case PTX::MOVU32rr:
+    case PTX::MOVU64rr:
+    case PTX::MOVF32rr:
+    case PTX::MOVF64rr:
+    case PTX::MOVPREDrr:
+      assert(MI.getNumOperands() >= 2 &&
+             MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
+             "Invalid register-register move instruction");
+      SrcSubIdx = DstSubIdx = 0; // No sub-registers
+      DstReg = MI.getOperand(0).getReg();
+      SrcReg = MI.getOperand(1).getReg();
+      return true;
+  }
+}
diff --git a/final/lib/Target/PTX/PTXInstrInfo.h b/final/lib/Target/PTX/PTXInstrInfo.h
new file mode 100644
index 00000000000..e7f00f09c2f
--- /dev/null
+++ b/final/lib/Target/PTX/PTXInstrInfo.h
@@ -0,0 +1,75 @@
+//===- PTXInstrInfo.h - PTX Instruction Information -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_INSTR_INFO_H
+#define PTX_INSTR_INFO_H
+
+#include "PTXRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+class PTXTargetMachine;
+
+class PTXInstrInfo : public TargetInstrInfoImpl {
+  private:
+    const PTXRegisterInfo RI;
+    PTXTargetMachine &TM;
+
+  public:
+    explicit PTXInstrInfo(PTXTargetMachine &_TM);
+
+    virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; }
+
+    virtual void copyPhysReg(MachineBasicBlock &MBB,
+                             MachineBasicBlock::iterator I, DebugLoc DL,
+                             unsigned DstReg, unsigned SrcReg,
+                             bool KillSrc) const;
+
+    virtual bool copyRegToReg(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I,
+                              unsigned DstReg, unsigned SrcReg,
+                              const TargetRegisterClass *DstRC,
+                              const TargetRegisterClass *SrcRC,
+                              DebugLoc DL) const;
+
+    virtual bool isMoveInstr(const MachineInstr& MI,
+                             unsigned &SrcReg, unsigned &DstReg,
+                             unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+    // static helper routines
+
+    static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+                                            DebugLoc dl, EVT VT,
+                                            SDValue Op1) {
+      SDValue pred_reg = DAG->getRegister(0, MVT::i1);
+      SDValue pred_imm = DAG->getTargetConstant(0, MVT::i32);
+      SDValue ops[] = { Op1, pred_reg, pred_imm };
+      return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+    }
+
+    static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
+                                            DebugLoc dl, EVT VT,
+                                            SDValue Op1,
+                                            SDValue Op2) {
+      SDValue pred_reg = DAG->getRegister(0, MVT::i1);
+      SDValue pred_imm = DAG->getTargetConstant(0, MVT::i32);
+      SDValue ops[] = { Op1, Op2, pred_reg, pred_imm };
+      return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
+    }
+
+  }; // class PTXInstrInfo
+} // namespace llvm
+
+#endif // PTX_INSTR_INFO_H
diff --git a/final/lib/Target/PTX/PTXInstrInfo.td b/final/lib/Target/PTX/PTXInstrInfo.td
new file mode 100644
index 00000000000..bc155736ad8
--- /dev/null
+++ b/final/lib/Target/PTX/PTXInstrInfo.td
@@ -0,0 +1,396 @@
+//===- PTXInstrInfo.td - PTX Instruction defs -----------------*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PTX instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "PTXInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Code Generation Predicates
+//===----------------------------------------------------------------------===//
+
+def Use32BitAddresses : Predicate<"!getSubtarget().use64BitAddresses()">;
+def Use64BitAddresses : Predicate<"getSubtarget().use64BitAddresses()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::GLOBAL;
+  return false;
+}]>;
+
+def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::CONSTANT;
+  return false;
+}]>;
+
+def load_local : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::LOCAL;
+  return false;
+}]>;
+
+def load_parameter : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::PARAMETER;
+  return false;
+}]>;
+
+def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::SHARED;
+  return false;
+}]>;
+
+def store_global
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::GLOBAL;
+  return false;
+}]>;
+
+def store_local
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::LOCAL;
+  return false;
+}]>;
+
+def store_parameter
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::PARAMETER;
+  return false;
+}]>;
+
+def store_shared
+  : PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
+  const Value *Src;
+  const PointerType *PT;
+  if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
+      (PT = dyn_cast<PointerType>(Src->getType())))
+    return PT->getAddressSpace() == PTX::SHARED;
+  return false;
+}]>;
+
+// Addressing modes.
+def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
+def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
+def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
+def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
+def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
+
+
+// Address operands
+def MEMri32 : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops RRegu32, i32imm);
+}
+def MEMri64 : Operand<i64> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops RRegu64, i64imm);
+}
+def MEMii32 : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops i32imm, i32imm);
+}
+def MEMii64 : Operand<i64> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops i64imm, i64imm);
+}
+// The operand here does not correspond to an actual address, so we
+// can use i32 in 64-bit address modes.
+def MEMpi : Operand<i32> {
+  let PrintMethod = "printParamOperand";
+  let MIOperandInfo = (ops i32imm);
+}
+
+
+//===----------------------------------------------------------------------===//
+// PTX Specific Node Definitions
+//===----------------------------------------------------------------------===//
+
+// PTX allow generic 3-reg shifts like shl r0, r1, r2
+def PTXshl : SDNode<"ISD::SHL", SDTIntBinOp>;
+def PTXsrl : SDNode<"ISD::SRL", SDTIntBinOp>;
+def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>;
+
+def PTXexit
+  : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>;
+def PTXret
+  : SDNode<"PTXISD::RET",  SDTNone, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+// Three-operand floating-point instruction template
+multiclass FLOAT3<string opcstr, SDNode opnode> {
+  def rr32 : InstPTX<(outs RRegf32:$d),
+                     (ins RRegf32:$a, RRegf32:$b),
+                     !strconcat(opcstr, ".f32\t$d, $a, $b"),
+                     [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>;
+  def ri32 : InstPTX<(outs RRegf32:$d),
+                     (ins RRegf32:$a, f32imm:$b),
+                     !strconcat(opcstr, ".f32\t$d, $a, $b"),
+                     [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>;
+  def rr64 : InstPTX<(outs RRegf64:$d),
+                     (ins RRegf64:$a, RRegf64:$b),
+                     !strconcat(opcstr, ".f64\t$d, $a, $b"),
+                     [(set RRegf64:$d, (opnode RRegf64:$a, RRegf64:$b))]>;
+  def ri64 : InstPTX<(outs RRegf64:$d),
+                     (ins RRegf64:$a, f64imm:$b),
+                     !strconcat(opcstr, ".f64\t$d, $a, $b"),
+                     [(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>;
+}
+
+multiclass INT3<string opcstr, SDNode opnode> {
+  def rr16 : InstPTX<(outs RRegu16:$d),
+                     (ins RRegu16:$a, RRegu16:$b),
+                     !strconcat(opcstr, ".u16\t$d, $a, $b"),
+                     [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
+  def ri16 : InstPTX<(outs RRegu16:$d),
+                     (ins RRegu16:$a, i16imm:$b),
+                     !strconcat(opcstr, ".u16\t$d, $a, $b"),
+                     [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
+  def rr32 : InstPTX<(outs RRegu32:$d),
+                     (ins RRegu32:$a, RRegu32:$b),
+                     !strconcat(opcstr, ".u32\t$d, $a, $b"),
+                     [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
+  def ri32 : InstPTX<(outs RRegu32:$d),
+                     (ins RRegu32:$a, i32imm:$b),
+                     !strconcat(opcstr, ".u32\t$d, $a, $b"),
+                     [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
+  def rr64 : InstPTX<(outs RRegu64:$d),
+                     (ins RRegu64:$a, RRegu64:$b),
+                     !strconcat(opcstr, ".u64\t$d, $a, $b"),
+                     [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
+  def ri64 : InstPTX<(outs RRegu64:$d),
+                     (ins RRegu64:$a, i64imm:$b),
+                     !strconcat(opcstr, ".u64\t$d, $a, $b"),
+                     [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
+}
+
+// no %type directive, non-communtable
+multiclass INT3ntnc<string opcstr, SDNode opnode> {
+  def rr : InstPTX<(outs RRegu32:$d),
+                   (ins RRegu32:$a, RRegu32:$b),
+                   !strconcat(opcstr, "\t$d, $a, $b"),
+                   [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
+  def ri : InstPTX<(outs RRegu32:$d),
+                   (ins RRegu32:$a, i32imm:$b),
+                   !strconcat(opcstr, "\t$d, $a, $b"),
+                   [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
+  def ir : InstPTX<(outs RRegu32:$d),
+                   (ins i32imm:$a, RRegu32:$b),
+                   !strconcat(opcstr, "\t$d, $a, $b"),
+                   [(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>;
+}
+
+multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
+  def rr32 : InstPTX<(outs RC:$d),
+                     (ins MEMri32:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRrr32:$a))]>, Requires<[Use32BitAddresses]>;
+  def rr64 : InstPTX<(outs RC:$d),
+                     (ins MEMri64:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRrr64:$a))]>, Requires<[Use64BitAddresses]>;
+  def ri32 : InstPTX<(outs RC:$d),
+                     (ins MEMri32:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRri32:$a))]>, Requires<[Use32BitAddresses]>;
+  def ri64 : InstPTX<(outs RC:$d),
+                     (ins MEMri64:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRri64:$a))]>, Requires<[Use64BitAddresses]>;
+  def ii32 : InstPTX<(outs RC:$d),
+                     (ins MEMii32:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRii32:$a))]>, Requires<[Use32BitAddresses]>;
+  def ii64 : InstPTX<(outs RC:$d),
+                     (ins MEMii64:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
+                     [(set RC:$d, (pat_load ADDRii64:$a))]>, Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
+  defm u16 : PTX_LD<opstr, ".u16", RRegu16, pat_load>;
+  defm u32 : PTX_LD<opstr, ".u32", RRegu32, pat_load>;
+  defm u64 : PTX_LD<opstr, ".u64", RRegu64, pat_load>;
+  defm f32 : PTX_LD<opstr, ".f32", RRegf32, pat_load>;
+  defm f64 : PTX_LD<opstr, ".f64", RRegf64, pat_load>;
+}
+
+multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> {
+  def rr32 : InstPTX<(outs),
+                     (ins RC:$d, MEMri32:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                     [(pat_store RC:$d, ADDRrr32:$a)]>, Requires<[Use32BitAddresses]>;
+  def rr64 : InstPTX<(outs),
+                     (ins RC:$d, MEMri64:$a),
+                     !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                     [(pat_store RC:$d, ADDRrr64:$a)]>, Requires<[Use64BitAddresses]>;
+  def ri32 : InstPTX<(outs),
+                   (ins RC:$d, MEMri32:$a),
+                   !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                   [(pat_store RC:$d, ADDRri32:$a)]>, Requires<[Use32BitAddresses]>;
+  def ri64 : InstPTX<(outs),
+                   (ins RC:$d, MEMri64:$a),
+                   !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                   [(pat_store RC:$d, ADDRri64:$a)]>, Requires<[Use64BitAddresses]>;
+  def ii32 : InstPTX<(outs),
+                   (ins RC:$d, MEMii32:$a),
+                   !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                   [(pat_store RC:$d, ADDRii32:$a)]>, Requires<[Use32BitAddresses]>;
+  def ii64 : InstPTX<(outs),
+                   (ins RC:$d, MEMii64:$a),
+                   !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
+                   [(pat_store RC:$d, ADDRii64:$a)]>, Requires<[Use64BitAddresses]>;
+}
+
+multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
+  defm u16 : PTX_ST<opstr, ".u16", RRegu16, pat_store>;
+  defm u32 : PTX_ST<opstr, ".u32", RRegu32, pat_store>;
+  defm u64 : PTX_ST<opstr, ".u64", RRegu64, pat_store>;
+  defm f32 : PTX_ST<opstr, ".f32", RRegf32, pat_store>;
+  defm f64 : PTX_ST<opstr, ".f64", RRegf64, pat_store>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+///===- Floating-Point Arithmetic Instructions ----------------------------===//
+
+defm FADD : FLOAT3<"add", fadd>;
+defm FSUB : FLOAT3<"sub", fsub>;
+defm FMUL : FLOAT3<"mul", fmul>;
+
+///===- Integer Arithmetic Instructions -----------------------------------===//
+
+defm ADD : INT3<"add", add>;
+defm SUB : INT3<"sub", sub>;
+
+///===- Logic and Shift Instructions --------------------------------------===//
+
+defm SHL : INT3ntnc<"shl.b32", PTXshl>;
+defm SRL : INT3ntnc<"shr.u32", PTXsrl>;
+defm SRA : INT3ntnc<"shr.s32", PTXsra>;
+
+///===- Data Movement and Conversion Instructions -------------------------===//
+
+let neverHasSideEffects = 1 in {
+  def MOVPREDrr
+    : InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>;
+  def MOVU16rr
+    : InstPTX<(outs RRegu16:$d), (ins RRegu16:$a), "mov.u16\t$d, $a", []>;
+  def MOVU32rr
+    : InstPTX<(outs RRegu32:$d), (ins RRegu32:$a), "mov.u32\t$d, $a", []>;
+  def MOVU64rr
+    : InstPTX<(outs RRegu64:$d), (ins RRegu64:$a), "mov.u64\t$d, $a", []>;
+  def MOVF32rr
+    : InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>;
+  def MOVF64rr
+    : InstPTX<(outs RRegf64:$d), (ins RRegf64:$a), "mov.f64\t$d, $a", []>;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+  def MOVPREDri
+    : InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
+              [(set Preds:$d, imm:$a)]>;
+  def MOVU16ri
+    : InstPTX<(outs RRegu16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
+              [(set RRegu16:$d, imm:$a)]>;
+  def MOVU32ri
+    : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+              [(set RRegu32:$d, imm:$a)]>;
+  def MOVU164ri
+    : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+              [(set RRegu64:$d, imm:$a)]>;
+  def MOVF32ri
+    : InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
+              [(set RRegf32:$d, fpimm:$a)]>;
+  def MOVF64ri
+    : InstPTX<(outs RRegf64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
+              [(set RRegf64:$d, fpimm:$a)]>;
+}
+
+// Loads
+defm LDg : PTX_LD_ALL<"ld.global", load_global>;
+defm LDc : PTX_LD_ALL<"ld.const",  load_constant>;
+defm LDl : PTX_LD_ALL<"ld.local",  load_local>;
+defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
+
+// This is a special instruction that is manually inserted for kernel parameters
+def LDpiU16 : InstPTX<(outs RRegu16:$d), (ins MEMpi:$a),
+                      "ld.param.u16\t$d, [$a]", []>;
+def LDpiU32 : InstPTX<(outs RRegu32:$d), (ins MEMpi:$a),
+                      "ld.param.u32\t$d, [$a]", []>;
+def LDpiU64 : InstPTX<(outs RRegu64:$d), (ins MEMpi:$a),
+                      "ld.param.u64\t$d, [$a]", []>;
+def LDpiF32 : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a),
+                      "ld.param.f32\t$d, [$a]", []>;
+def LDpiF64 : InstPTX<(outs RRegf64:$d), (ins MEMpi:$a),
+                      "ld.param.f64\t$d, [$a]", []>;
+
+// Stores
+defm STg : PTX_ST_ALL<"st.global", store_global>;
+defm STl : PTX_ST_ALL<"st.local",  store_local>;
+defm STs : PTX_ST_ALL<"st.shared", store_shared>;
+
+// defm STp : PTX_ST_ALL<"st.param",  store_parameter>;
+// defm LDp : PTX_LD_ALL<"ld.param",  load_parameter>;
+// TODO: Do something with st.param if/when it is needed.
+
+///===- Control Flow Instructions -----------------------------------------===//
+
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+  def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>;
+  def RET  : InstPTX<(outs), (ins), "ret",  [(PTXret)]>;
+}
+
+///===- Intrinsic Instructions --------------------------------------------===//
+
+include "PTXIntrinsicInstrInfo.td"
diff --git a/final/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/final/lib/Target/PTX/PTXIntrinsicInstrInfo.td
new file mode 100644
index 00000000000..3f2737e077f
--- /dev/null
+++ b/final/lib/Target/PTX/PTXIntrinsicInstrInfo.td
@@ -0,0 +1,35 @@
+//===- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the PTX-specific intrinsic instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Special Purpose Register Accessor Intrinsics
+
+class PTX_READ_SPECIAL_REGISTER<string regname, Intrinsic intop>
+  : InstPTX<(outs RRegu64:$d), (ins),
+            !strconcat("mov.u64\t$d, ", regname),
+            [(set RRegu64:$d, (intop))]>;
+
+class PTX_READ_SPECIAL_SUB_REGISTER<string regname, Intrinsic intop>
+  : InstPTX<(outs RRegu16:$d), (ins),
+            !strconcat("mov.u16\t$d, ", regname),
+            [(set RRegu16:$d, (intop))]>;
+
+def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER<"tid", int_ptx_read_tid_r64>;
+def PTX_READ_TID_X : PTX_READ_SPECIAL_SUB_REGISTER<"tid.x", int_ptx_read_tid_x>;
+def PTX_READ_TID_Y : PTX_READ_SPECIAL_SUB_REGISTER<"tid.y", int_ptx_read_tid_y>;
+def PTX_READ_TID_Z : PTX_READ_SPECIAL_SUB_REGISTER<"tid.z", int_ptx_read_tid_z>;
+def PTX_READ_TID_W : PTX_READ_SPECIAL_SUB_REGISTER<"tid.w", int_ptx_read_tid_w>;
+
+// PTX Parallel Synchronization and Communication Intrinsics
+
+def PTX_BAR_SYNC : InstPTX<(outs), (ins i32imm:$i), "bar.sync\t$i",
+                           [(int_ptx_bar_sync imm:$i)]>;
diff --git a/final/lib/Target/PTX/PTXMCAsmInfo.cpp b/final/lib/Target/PTX/PTXMCAsmInfo.cpp
new file mode 100644
index 00000000000..b670abdbe09
--- /dev/null
+++ b/final/lib/Target/PTX/PTXMCAsmInfo.cpp
@@ -0,0 +1,30 @@
+//===-- PTXMCAsmInfo.cpp - PTX asm properties -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the PTXMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXMCAsmInfo.h"
+
+using namespace llvm;
+
+PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) {
+  CommentString = "//";
+
+  PrivateGlobalPrefix = "$L__";
+
+  AllowPeriodsInName = false;
+
+  HasSetDirective = false;
+
+  HasDotTypeDotSizeDirective = false;
+
+  HasSingleParameterDotFile = false;
+}
diff --git a/final/lib/Target/PTX/PTXMCAsmInfo.h b/final/lib/Target/PTX/PTXMCAsmInfo.h
new file mode 100644
index 00000000000..03f5d66b3d6
--- /dev/null
+++ b/final/lib/Target/PTX/PTXMCAsmInfo.h
@@ -0,0 +1,28 @@
+//=====-- PTXMCAsmInfo.h - PTX asm properties -----------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the PTXMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_MCASM_INFO_H
+#define PTX_MCASM_INFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  class StringRef;
+
+  struct PTXMCAsmInfo : public MCAsmInfo {
+    explicit PTXMCAsmInfo(const Target &T, const StringRef &TT);
+  };
+} // namespace llvm
+
+#endif // PTX_MCASM_INFO_H
diff --git a/final/lib/Target/PTX/PTXMCAsmStreamer.cpp b/final/lib/Target/PTX/PTXMCAsmStreamer.cpp
new file mode 100644
index 00000000000..1c5d418c30d
--- /dev/null
+++ b/final/lib/Target/PTX/PTXMCAsmStreamer.cpp
@@ -0,0 +1,543 @@
+//===- lib/Target/PTX/PTXMCAsmStreamer.cpp - PTX Text Assembly Output -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmInfo.h"
+
+using namespace llvm;
+
+namespace {
+class PTXMCAsmStreamer : public MCStreamer {
+  formatted_raw_ostream &OS;
+  const MCAsmInfo &MAI;
+  OwningPtr<MCInstPrinter> InstPrinter;
+  OwningPtr<MCCodeEmitter> Emitter;
+
+  SmallString<128> CommentToEmit;
+  raw_svector_ostream CommentStream;
+
+  unsigned IsVerboseAsm : 1;
+  unsigned ShowInst : 1;
+
+public:
+  PTXMCAsmStreamer(MCContext &Context,
+                   formatted_raw_ostream &os,
+                   bool isVerboseAsm, bool useLoc,
+                   MCInstPrinter *printer,
+                   MCCodeEmitter *emitter,
+                   bool showInst)
+    : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
+      InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit),
+      IsVerboseAsm(isVerboseAsm),
+      ShowInst(showInst) {
+    if (InstPrinter && IsVerboseAsm)
+      InstPrinter->setCommentStream(CommentStream);
+  }
+
+  ~PTXMCAsmStreamer() {}
+
+  inline void EmitEOL() {
+    // If we don't have any comments, just emit a \n.
+    if (!IsVerboseAsm) {
+      OS << '\n';
+      return;
+    }
+    EmitCommentsAndEOL();
+  }
+  void EmitCommentsAndEOL();
+
+  /// isVerboseAsm - Return true if this streamer supports verbose assembly at
+  /// all.
+  virtual bool isVerboseAsm() const { return IsVerboseAsm; }
+
+  /// hasRawTextSupport - We support EmitRawText.
+  virtual bool hasRawTextSupport() const { return true; }
+
+  /// AddComment - Add a comment that can be emitted to the generated .s
+  /// file if applicable as a QoI issue to make the output of the compiler
+  /// more readable.  This only affects the MCAsmStreamer, and only when
+  /// verbose assembly output is enabled.
+  virtual void AddComment(const Twine &T);
+
+  /// AddEncodingComment - Add a comment showing the encoding of an instruction.
+  virtual void AddEncodingComment(const MCInst &Inst);
+
+  /// GetCommentOS - Return a raw_ostream that comments can be written to.
+  /// Unlike AddComment, you are required to terminate comments with \n if you
+  /// use this method.
+  virtual raw_ostream &GetCommentOS() {
+    if (!IsVerboseAsm)
+      return nulls();  // Discard comments unless in verbose asm mode.
+    return CommentStream;
+  }
+
+  /// AddBlankLine - Emit a blank line to a .s file to pretty it up.
+  virtual void AddBlankLine() {
+    EmitEOL();
+  }
+
+  /// @name MCStreamer Interface
+  /// @{
+
+  virtual void ChangeSection(const MCSection *Section);
+  virtual void InitSections() {}
+
+  virtual void EmitLabel(MCSymbol *Symbol);
+
+  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+
+  virtual void EmitThumbFunc(MCSymbol *Func);
+
+  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+
+  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
+
+  virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                        const MCSymbol *LastLabel,
+                                        const MCSymbol *Label);
+
+  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+
+  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
+  virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+  virtual void EmitCOFFSymbolType(int Type);
+  virtual void EndCOFFSymbolDef();
+  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                unsigned ByteAlignment);
+
+  /// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
+  ///
+  /// @param Symbol - The common symbol to emit.
+  /// @param Size - The size of the common symbol.
+  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+
+  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+                            unsigned Size = 0, unsigned ByteAlignment = 0);
+
+  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                              uint64_t Size, unsigned ByteAlignment = 0);
+
+  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+
+  virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                             bool isPCRel, unsigned AddrSpace);
+  virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+  virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0);
+  virtual void EmitGPRel32Value(const MCExpr *Value);
+
+
+  virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+                        unsigned AddrSpace);
+
+  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                    unsigned ValueSize = 1,
+                                    unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                 unsigned MaxBytesToEmit = 0);
+
+  virtual void EmitValueToOffset(const MCExpr *Offset,
+                                 unsigned char Value = 0);
+
+  virtual void EmitFileDirective(StringRef Filename);
+  virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename);
+
+  virtual void EmitInstruction(const MCInst &Inst);
+
+  /// EmitRawText - If this file is backed by an assembly streamer, this dumps
+  /// the specified string in the output .s file.  This capability is
+  /// indicated by the hasRawTextSupport() predicate.
+  virtual void EmitRawText(StringRef String);
+
+  virtual void Finish();
+
+  /// @}
+
+}; // class PTXMCAsmStreamer
+
+}
+
+/// TODO: Add appropriate implementation of Emit*() methods when needed
+
+void PTXMCAsmStreamer::AddComment(const Twine &T) {
+  if (!IsVerboseAsm) return;
+
+  // Make sure that CommentStream is flushed.
+  CommentStream.flush();
+
+  T.toVector(CommentToEmit);
+  // Each comment goes on its own line.
+  CommentToEmit.push_back('\n');
+
+  // Tell the comment stream that the vector changed underneath it.
+  CommentStream.resync();
+}
+
+void PTXMCAsmStreamer::EmitCommentsAndEOL() {
+  if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) {
+    OS << '\n';
+    return;
+  }
+
+  CommentStream.flush();
+  StringRef Comments = CommentToEmit.str();
+
+  assert(Comments.back() == '\n' &&
+         "Comment array not newline terminated");
+  do {
+    // Emit a line of comments.
+    OS.PadToColumn(MAI.getCommentColumn());
+    size_t Position = Comments.find('\n');
+    OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n';
+
+    Comments = Comments.substr(Position+1);
+  } while (!Comments.empty());
+
+  CommentToEmit.clear();
+  // Tell the comment stream that the vector changed underneath it.
+  CommentStream.resync();
+}
+
+static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
+  assert(Bytes && "Invalid size!");
+  return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
+}
+
+void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) {
+  assert(Section && "Cannot switch to a null section!");
+}
+
+void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+  assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+  assert(getCurrentSection() && "Cannot emit before setting section!");
+
+  OS << *Symbol << MAI.getLabelSuffix();
+  EmitEOL();
+  Symbol->setSection(*getCurrentSection());
+}
+
+void PTXMCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+
+void PTXMCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {}
+
+void PTXMCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+  OS << *Symbol << " = " << *Value;
+  EmitEOL();
+
+  // FIXME: Lift context changes into super class.
+  Symbol->setVariableValue(Value);
+}
+
+void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias,
+                                         const MCSymbol *Symbol) {
+  OS << ".weakref " << *Alias << ", " << *Symbol;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                                const MCSymbol *LastLabel,
+                                                const MCSymbol *Label) {
+  report_fatal_error("Unimplemented.");
+}
+
+void PTXMCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+                                           MCSymbolAttr Attribute) {}
+
+void PTXMCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+
+void PTXMCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+
+void PTXMCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {}
+
+void PTXMCAsmStreamer::EmitCOFFSymbolType (int Type) {}
+
+void PTXMCAsmStreamer::EndCOFFSymbolDef() {}
+
+void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+
+void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                        unsigned ByteAlignment) {}
+
+void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {}
+
+void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                                    unsigned Size, unsigned ByteAlignment) {}
+
+void PTXMCAsmStreamer::EmitTBSSSymbol(const MCSection *Section,
+                                      MCSymbol *Symbol,
+                                      uint64_t Size, unsigned ByteAlignment) {}
+
+static inline char toOctal(int X) { return (X&7)+'0'; }
+
+static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
+  OS << '"';
+
+  for (unsigned i = 0, e = Data.size(); i != e; ++i) {
+    unsigned char C = Data[i];
+    if (C == '"' || C == '\\') {
+      OS << '\\' << (char)C;
+      continue;
+    }
+
+    if (isprint((unsigned char)C)) {
+      OS << (char)C;
+      continue;
+    }
+
+    switch (C) {
+      case '\b': OS << "\\b"; break;
+      case '\f': OS << "\\f"; break;
+      case '\n': OS << "\\n"; break;
+      case '\r': OS << "\\r"; break;
+      case '\t': OS << "\\t"; break;
+      default:
+        OS << '\\';
+        OS << toOctal(C >> 6);
+        OS << toOctal(C >> 3);
+        OS << toOctal(C >> 0);
+        break;
+    }
+  }
+
+  OS << '"';
+}
+
+void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  if (Data.empty()) return;
+
+  if (Data.size() == 1) {
+    OS << MAI.getData8bitsDirective(AddrSpace);
+    OS << (unsigned)(unsigned char)Data[0];
+    EmitEOL();
+    return;
+  }
+
+  // If the data ends with 0 and the target supports .asciz, use it, otherwise
+  // use .ascii
+  if (MAI.getAscizDirective() && Data.back() == 0) {
+    OS << MAI.getAscizDirective();
+    Data = Data.substr(0, Data.size()-1);
+  } else {
+    OS << MAI.getAsciiDirective();
+  }
+
+  OS << ' ';
+  PrintQuotedString(Data, OS);
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
+                                     bool isPCRel, unsigned AddrSpace) {
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+  assert(!isPCRel && "Cannot emit pc relative relocations!");
+  const char *Directive = 0;
+  switch (Size) {
+  default: break;
+  case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break;
+  case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break;
+  case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break;
+  case 8:
+    Directive = MAI.getData64bitsDirective(AddrSpace);
+    // If the target doesn't support 64-bit data, emit as two 32-bit halves.
+    if (Directive) break;
+    int64_t IntValue;
+    if (!Value->EvaluateAsAbsolute(IntValue))
+      report_fatal_error("Don't know how to emit this value.");
+    if (getContext().getTargetAsmInfo().isLittleEndian()) {
+      EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+      EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+    } else {
+      EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
+      EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
+    }
+    return;
+  }
+
+  assert(Directive && "Invalid size for machine code value!");
+  OS << Directive << *Value;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value,
+                                        unsigned AddrSpace) {
+  assert(MAI.hasLEB128() && "Cannot print a .uleb");
+  OS << ".uleb128 " << *Value;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value,
+                                        unsigned AddrSpace) {
+  assert(MAI.hasLEB128() && "Cannot print a .sleb");
+  OS << ".sleb128 " << *Value;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
+  assert(MAI.getGPRel32Directive() != 0);
+  OS << MAI.getGPRel32Directive() << *Value;
+  EmitEOL();
+}
+
+
+/// EmitFill - Emit NumBytes bytes worth of the value specified by
+/// FillValue.  This implements directives such as '.space'.
+void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
+                                unsigned AddrSpace) {
+  if (NumBytes == 0) return;
+
+  if (AddrSpace == 0)
+    if (const char *ZeroDirective = MAI.getZeroDirective()) {
+      OS << ZeroDirective << NumBytes;
+      if (FillValue != 0)
+        OS << ',' << (int)FillValue;
+      EmitEOL();
+      return;
+    }
+
+  // Emit a byte at a time.
+  MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace);
+}
+
+void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+                                            int64_t Value,
+                                            unsigned ValueSize,
+                                            unsigned MaxBytesToEmit) {
+  // Some assemblers don't support non-power of two alignments, so we always
+  // emit alignments as a power of two if possible.
+  if (isPowerOf2_32(ByteAlignment)) {
+    switch (ValueSize) {
+    default: llvm_unreachable("Invalid size for machine code value!");
+    case 1: OS << MAI.getAlignDirective(); break;
+    // FIXME: use MAI for this!
+    case 2: OS << ".p2alignw "; break;
+    case 4: OS << ".p2alignl "; break;
+    case 8: llvm_unreachable("Unsupported alignment size!");
+    }
+
+    if (MAI.getAlignmentIsInBytes())
+      OS << ByteAlignment;
+    else
+      OS << Log2_32(ByteAlignment);
+
+    if (Value || MaxBytesToEmit) {
+      OS << ", 0x";
+      OS.write_hex(truncateToSize(Value, ValueSize));
+
+      if (MaxBytesToEmit)
+        OS << ", " << MaxBytesToEmit;
+    }
+    EmitEOL();
+    return;
+  }
+
+  // Non-power of two alignment.  This is not widely supported by assemblers.
+  // FIXME: Parameterize this based on MAI.
+  switch (ValueSize) {
+  default: llvm_unreachable("Invalid size for machine code value!");
+  case 1: OS << ".balign";  break;
+  case 2: OS << ".balignw"; break;
+  case 4: OS << ".balignl"; break;
+  case 8: llvm_unreachable("Unsupported alignment size!");
+  }
+
+  OS << ' ' << ByteAlignment;
+  OS << ", " << truncateToSize(Value, ValueSize);
+  if (MaxBytesToEmit)
+    OS << ", " << MaxBytesToEmit;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+                                         unsigned MaxBytesToEmit) {}
+
+void PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
+                                         unsigned char Value) {}
+
+
+void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) {
+  assert(MAI.hasSingleParameterDotFile());
+  OS << "\t.file\t";
+  PrintQuotedString(Filename, OS);
+  EmitEOL();
+}
+
+// FIXME: should we inherit from MCAsmStreamer?
+bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo,
+                                              StringRef Filename){
+  OS << "\t.file\t" << FileNo << ' ';
+  PrintQuotedString(Filename, OS);
+  EmitEOL();
+  return this->MCStreamer::EmitDwarfFileDirective(FileNo, Filename);
+}
+
+void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {}
+
+void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) {
+  assert(getCurrentSection() && "Cannot emit contents before setting section!");
+
+  // Show the encoding in a comment if we have a code emitter.
+  if (Emitter)
+    AddEncodingComment(Inst);
+
+  // Show the MCInst if enabled.
+  if (ShowInst) {
+    Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
+    GetCommentOS() << "\n";
+  }
+
+  // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
+  if (InstPrinter)
+    InstPrinter->printInst(&Inst, OS);
+  else
+    Inst.print(OS, &MAI);
+  EmitEOL();
+}
+
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
+/// the specified string in the output .s file.  This capability is
+/// indicated by the hasRawTextSupport() predicate.
+void PTXMCAsmStreamer::EmitRawText(StringRef String) {
+  if (!String.empty() && String.back() == '\n')
+    String = String.substr(0, String.size()-1);
+  OS << String;
+  EmitEOL();
+}
+
+void PTXMCAsmStreamer::Finish() {}
+
+namespace llvm {
+  MCStreamer *createPTXAsmStreamer(MCContext &Context,
+                                   formatted_raw_ostream &OS,
+                                   bool isVerboseAsm, bool useLoc,
+                                   MCInstPrinter *IP,
+                                   MCCodeEmitter *CE, TargetAsmBackend *TAB,
+                                   bool ShowInst) {
+    return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
+                                IP, CE, ShowInst);
+  }
+}
diff --git a/final/lib/Target/PTX/PTXMFInfoExtract.cpp b/final/lib/Target/PTX/PTXMFInfoExtract.cpp
new file mode 100644
index 00000000000..c5e19100723
--- /dev/null
+++ b/final/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -0,0 +1,96 @@
+//===-- PTXMFInfoExtract.cpp - Extract PTX machine function info ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an information extractor for PTX machine functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ptx-mf-info-extract"
+
+#include "PTX.h"
+#include "PTXTargetMachine.h"
+#include "PTXMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+// NOTE: PTXMFInfoExtract must after register allocation!
+
+namespace llvm {
+  /// PTXMFInfoExtract - PTX specific code to extract of PTX machine
+  /// function information for PTXAsmPrinter
+  ///
+  class PTXMFInfoExtract : public MachineFunctionPass {
+    private:
+      static char ID;
+
+    public:
+      PTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
+        : MachineFunctionPass(ID) {}
+
+      virtual bool runOnMachineFunction(MachineFunction &MF);
+
+      virtual const char *getPassName() const {
+        return "PTX Machine Function Info Extractor";
+      }
+  }; // class PTXMFInfoExtract
+} // namespace llvm
+
+using namespace llvm;
+
+char PTXMFInfoExtract::ID = 0;
+
+bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
+  PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n");
+
+  unsigned retreg = MFI->retReg();
+
+  DEBUG(dbgs()
+        << "PTX::NoRegister == " << PTX::NoRegister << "\n"
+        << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n");
+
+  DEBUG(for (unsigned reg = PTX::NoRegister + 1;
+             reg < PTX::NUM_TARGET_REGS; ++reg)
+          if (MRI.isPhysRegUsed(reg))
+            dbgs() << "Used Reg: " << reg << "\n";);
+
+  // FIXME: This is a slow linear scanning
+  for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg)
+    if (MRI.isPhysRegUsed(reg) &&
+        reg != retreg &&
+        (MFI->isKernel() || !MFI->isArgReg(reg)))
+      MFI->addLocalVarReg(reg);
+
+  // Notify MachineFunctionInfo that I've done adding local var reg
+  MFI->doneAddLocalVar();
+
+  DEBUG(dbgs() << "Return Reg: " << retreg << "\n");
+
+  DEBUG(for (PTXMachineFunctionInfo::reg_iterator
+             i = MFI->argRegBegin(), e = MFI->argRegEnd();
+             i != e; ++i)
+        dbgs() << "Arg Reg: " << *i << "\n";);
+
+  DEBUG(for (PTXMachineFunctionInfo::reg_iterator
+             i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
+             i != e; ++i)
+        dbgs() << "Local Var Reg: " << *i << "\n";);
+
+  return false;
+}
+
+FunctionPass *llvm::createPTXMFInfoExtract(PTXTargetMachine &TM,
+                                           CodeGenOpt::Level OptLevel) {
+  return new PTXMFInfoExtract(TM, OptLevel);
+}
diff --git a/final/lib/Target/PTX/PTXMachineFunctionInfo.h b/final/lib/Target/PTX/PTXMachineFunctionInfo.h
new file mode 100644
index 00000000000..b5b3c3be17b
--- /dev/null
+++ b/final/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -0,0 +1,82 @@
+//===- PTXMachineFuctionInfo.h - PTX machine function info -------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares PTX-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_MACHINE_FUNCTION_INFO_H
+#define PTX_MACHINE_FUNCTION_INFO_H
+
+#include "PTX.h"
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+/// PTXMachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private PTX target-specific information for each MachineFunction.
+///
+class PTXMachineFunctionInfo : public MachineFunctionInfo {
+private:
+  bool is_kernel;
+  std::vector<unsigned> reg_arg, reg_local_var;
+  unsigned reg_ret;
+  bool _isDoneAddArg;
+
+public:
+  PTXMachineFunctionInfo(MachineFunction &MF)
+    : is_kernel(false), reg_ret(PTX::NoRegister), _isDoneAddArg(false) {
+      reg_arg.reserve(8);
+      reg_local_var.reserve(32);
+    }
+
+  void setKernel(bool _is_kernel=true) { is_kernel = _is_kernel; }
+
+  void addArgReg(unsigned reg) { reg_arg.push_back(reg); }
+  void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); }
+  void setRetReg(unsigned reg) { reg_ret = reg; }
+
+  void doneAddArg(void) {
+    std::sort(reg_arg.begin(), reg_arg.end());
+    _isDoneAddArg = true;
+  }
+  void doneAddLocalVar(void) {
+    std::sort(reg_local_var.begin(), reg_local_var.end());
+  }
+
+  bool isDoneAddArg(void) { return _isDoneAddArg; }
+
+  bool isKernel() const { return is_kernel; }
+
+  typedef std::vector<unsigned>::const_iterator         reg_iterator;
+  typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator;
+
+  bool         argRegEmpty() const { return reg_arg.empty(); }
+  int          getNumArg() const { return reg_arg.size(); }
+  reg_iterator argRegBegin() const { return reg_arg.begin(); }
+  reg_iterator argRegEnd()   const { return reg_arg.end(); }
+  reg_reverse_iterator argRegReverseBegin() const { return reg_arg.rbegin(); }
+  reg_reverse_iterator argRegReverseEnd() const { return reg_arg.rend(); }
+
+  bool         localVarRegEmpty() const { return reg_local_var.empty(); }
+  reg_iterator localVarRegBegin() const { return reg_local_var.begin(); }
+  reg_iterator localVarRegEnd()   const { return reg_local_var.end(); }
+
+  unsigned retReg() const { return reg_ret; }
+
+  bool isArgReg(unsigned reg) const {
+    return std::binary_search(reg_arg.begin(), reg_arg.end(), reg);
+  }
+
+  bool isLocalVarReg(unsigned reg) const {
+    return std::binary_search(reg_local_var.begin(), reg_local_var.end(), reg);
+  }
+}; // class PTXMachineFunctionInfo
+} // namespace llvm
+
+#endif // PTX_MACHINE_FUNCTION_INFO_H
diff --git a/final/lib/Target/PTX/PTXRegisterInfo.cpp b/final/lib/Target/PTX/PTXRegisterInfo.cpp
new file mode 100644
index 00000000000..0f3e7bc2c3a
--- /dev/null
+++ b/final/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -0,0 +1,19 @@
+//===- PTXRegisterInfo.cpp - PTX Register Information ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXRegisterInfo.h"
+
+using namespace llvm;
+
+#include "PTXGenRegisterInfo.inc"
diff --git a/final/lib/Target/PTX/PTXRegisterInfo.h b/final/lib/Target/PTX/PTXRegisterInfo.h
new file mode 100644
index 00000000000..67e130f153d
--- /dev/null
+++ b/final/lib/Target/PTX/PTXRegisterInfo.h
@@ -0,0 +1,63 @@
+//===- PTXRegisterInfo.h - PTX Register Information Impl --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PTX implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_REGISTER_INFO_H
+#define PTX_REGISTER_INFO_H
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/BitVector.h"
+
+#include "PTXGenRegisterInfo.h.inc"
+
+namespace llvm {
+class PTXTargetMachine;
+class MachineFunction;
+
+struct PTXRegisterInfo : public PTXGenRegisterInfo {
+  PTXRegisterInfo(PTXTargetMachine &TM,
+                  const TargetInstrInfo &TII) {}
+
+  virtual const unsigned
+    *getCalleeSavedRegs(const MachineFunction *MF = 0) const {
+    static const unsigned CalleeSavedRegs[] = { 0 };
+    return CalleeSavedRegs; // save nothing
+  }
+
+  virtual BitVector getReservedRegs(const MachineFunction &MF) const {
+    BitVector Reserved(getNumRegs());
+    return Reserved; // reserve no regs
+  }
+
+  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                                   int SPAdj,
+                                   RegScavenger *RS = NULL) const {
+    llvm_unreachable("PTX does not support general function call");
+  }
+
+  virtual unsigned getFrameRegister(const MachineFunction &MF) const {
+    llvm_unreachable("PTX does not have a frame register");
+    return 0;
+  }
+
+  virtual unsigned getRARegister() const {
+    llvm_unreachable("PTX does not have a return address register");
+    return 0;
+  }
+
+  virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const {
+    return PTXGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+  }
+}; // struct PTXRegisterInfo
+} // namespace llvm
+
+#endif // PTX_REGISTER_INFO_H
diff --git a/final/lib/Target/PTX/PTXRegisterInfo.td b/final/lib/Target/PTX/PTXRegisterInfo.td
new file mode 100644
index 00000000000..548e3bbeb98
--- /dev/null
+++ b/final/lib/Target/PTX/PTXRegisterInfo.td
@@ -0,0 +1,271 @@
+//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the PTX register file
+//===----------------------------------------------------------------------===//
+
+class PTXReg<string n> : Register<n> {
+  let Namespace = "PTX";
+}
+
+//===----------------------------------------------------------------------===//
+//  Registers
+//===----------------------------------------------------------------------===//
+
+///===- Predicate Registers -----------------------------------------------===//
+
+def P0  : PTXReg<"p0">;
+def P1  : PTXReg<"p1">;
+def P2  : PTXReg<"p2">;
+def P3  : PTXReg<"p3">;
+def P4  : PTXReg<"p4">;
+def P5  : PTXReg<"p5">;
+def P6  : PTXReg<"p6">;
+def P7  : PTXReg<"p7">;
+def P8  : PTXReg<"p8">;
+def P9  : PTXReg<"p9">;
+def P10 : PTXReg<"p10">;
+def P11 : PTXReg<"p11">;
+def P12 : PTXReg<"p12">;
+def P13 : PTXReg<"p13">;
+def P14 : PTXReg<"p14">;
+def P15 : PTXReg<"p15">;
+def P16 : PTXReg<"p16">;
+def P17 : PTXReg<"p17">;
+def P18 : PTXReg<"p18">;
+def P19 : PTXReg<"p19">;
+def P20 : PTXReg<"p20">;
+def P21 : PTXReg<"p21">;
+def P22 : PTXReg<"p22">;
+def P23 : PTXReg<"p23">;
+def P24 : PTXReg<"p24">;
+def P25 : PTXReg<"p25">;
+def P26 : PTXReg<"p26">;
+def P27 : PTXReg<"p27">;
+def P28 : PTXReg<"p28">;
+def P29 : PTXReg<"p29">;
+def P30 : PTXReg<"p30">;
+def P31 : PTXReg<"p31">;
+
+///===- 16-bit Integer Registers ------------------------------------------===//
+
+def RH0  : PTXReg<"rh0">;
+def RH1  : PTXReg<"rh1">;
+def RH2  : PTXReg<"rh2">;
+def RH3  : PTXReg<"rh3">;
+def RH4  : PTXReg<"rh4">;
+def RH5  : PTXReg<"rh5">;
+def RH6  : PTXReg<"rh6">;
+def RH7  : PTXReg<"rh7">;
+def RH8  : PTXReg<"rh8">;
+def RH9  : PTXReg<"rh9">;
+def RH10 : PTXReg<"rh10">;
+def RH11 : PTXReg<"rh11">;
+def RH12 : PTXReg<"rh12">;
+def RH13 : PTXReg<"rh13">;
+def RH14 : PTXReg<"rh14">;
+def RH15 : PTXReg<"rh15">;
+def RH16 : PTXReg<"rh16">;
+def RH17 : PTXReg<"rh17">;
+def RH18 : PTXReg<"rh18">;
+def RH19 : PTXReg<"rh19">;
+def RH20 : PTXReg<"rh20">;
+def RH21 : PTXReg<"rh21">;
+def RH22 : PTXReg<"rh22">;
+def RH23 : PTXReg<"rh23">;
+def RH24 : PTXReg<"rh24">;
+def RH25 : PTXReg<"rh25">;
+def RH26 : PTXReg<"rh26">;
+def RH27 : PTXReg<"rh27">;
+def RH28 : PTXReg<"rh28">;
+def RH29 : PTXReg<"rh29">;
+def RH30 : PTXReg<"rh30">;
+def RH31 : PTXReg<"rh31">;
+
+///===- 32-bit Integer Registers ------------------------------------------===//
+
+def R0  : PTXReg<"r0">;
+def R1  : PTXReg<"r1">;
+def R2  : PTXReg<"r2">;
+def R3  : PTXReg<"r3">;
+def R4  : PTXReg<"r4">;
+def R5  : PTXReg<"r5">;
+def R6  : PTXReg<"r6">;
+def R7  : PTXReg<"r7">;
+def R8  : PTXReg<"r8">;
+def R9  : PTXReg<"r9">;
+def R10 : PTXReg<"r10">;
+def R11 : PTXReg<"r11">;
+def R12 : PTXReg<"r12">;
+def R13 : PTXReg<"r13">;
+def R14 : PTXReg<"r14">;
+def R15 : PTXReg<"r15">;
+def R16 : PTXReg<"r16">;
+def R17 : PTXReg<"r17">;
+def R18 : PTXReg<"r18">;
+def R19 : PTXReg<"r19">;
+def R20 : PTXReg<"r20">;
+def R21 : PTXReg<"r21">;
+def R22 : PTXReg<"r22">;
+def R23 : PTXReg<"r23">;
+def R24 : PTXReg<"r24">;
+def R25 : PTXReg<"r25">;
+def R26 : PTXReg<"r26">;
+def R27 : PTXReg<"r27">;
+def R28 : PTXReg<"r28">;
+def R29 : PTXReg<"r29">;
+def R30 : PTXReg<"r30">;
+def R31 : PTXReg<"r31">;
+
+///===- 64-bit Integer Registers ------------------------------------------===//
+
+def RD0  : PTXReg<"rd0">;
+def RD1  : PTXReg<"rd1">;
+def RD2  : PTXReg<"rd2">;
+def RD3  : PTXReg<"rd3">;
+def RD4  : PTXReg<"rd4">;
+def RD5  : PTXReg<"rd5">;
+def RD6  : PTXReg<"rd6">;
+def RD7  : PTXReg<"rd7">;
+def RD8  : PTXReg<"rd8">;
+def RD9  : PTXReg<"rd9">;
+def RD10 : PTXReg<"rd10">;
+def RD11 : PTXReg<"rd11">;
+def RD12 : PTXReg<"rd12">;
+def RD13 : PTXReg<"rd13">;
+def RD14 : PTXReg<"rd14">;
+def RD15 : PTXReg<"rd15">;
+def RD16 : PTXReg<"rd16">;
+def RD17 : PTXReg<"rd17">;
+def RD18 : PTXReg<"rd18">;
+def RD19 : PTXReg<"rd19">;
+def RD20 : PTXReg<"rd20">;
+def RD21 : PTXReg<"rd21">;
+def RD22 : PTXReg<"rd22">;
+def RD23 : PTXReg<"rd23">;
+def RD24 : PTXReg<"rd24">;
+def RD25 : PTXReg<"rd25">;
+def RD26 : PTXReg<"rd26">;
+def RD27 : PTXReg<"rd27">;
+def RD28 : PTXReg<"rd28">;
+def RD29 : PTXReg<"rd29">;
+def RD30 : PTXReg<"rd30">;
+def RD31 : PTXReg<"rd31">;
+
+///===- 32-bit Floating-Point Registers -----------------------------------===//
+
+def F0  : PTXReg<"f0">;
+def F1  : PTXReg<"f1">;
+def F2  : PTXReg<"f2">;
+def F3  : PTXReg<"f3">;
+def F4  : PTXReg<"f4">;
+def F5  : PTXReg<"f5">;
+def F6  : PTXReg<"f6">;
+def F7  : PTXReg<"f7">;
+def F8  : PTXReg<"f8">;
+def F9  : PTXReg<"f9">;
+def F10 : PTXReg<"f10">;
+def F11 : PTXReg<"f11">;
+def F12 : PTXReg<"f12">;
+def F13 : PTXReg<"f13">;
+def F14 : PTXReg<"f14">;
+def F15 : PTXReg<"f15">;
+def F16 : PTXReg<"f16">;
+def F17 : PTXReg<"f17">;
+def F18 : PTXReg<"f18">;
+def F19 : PTXReg<"f19">;
+def F20 : PTXReg<"f20">;
+def F21 : PTXReg<"f21">;
+def F22 : PTXReg<"f22">;
+def F23 : PTXReg<"f23">;
+def F24 : PTXReg<"f24">;
+def F25 : PTXReg<"f25">;
+def F26 : PTXReg<"f26">;
+def F27 : PTXReg<"f27">;
+def F28 : PTXReg<"f28">;
+def F29 : PTXReg<"f29">;
+def F30 : PTXReg<"f30">;
+def F31 : PTXReg<"f31">;
+
+///===- 64-bit Floating-Point Registers -----------------------------------===//
+
+def FD0  : PTXReg<"fd0">;
+def FD1  : PTXReg<"fd1">;
+def FD2  : PTXReg<"fd2">;
+def FD3  : PTXReg<"fd3">;
+def FD4  : PTXReg<"fd4">;
+def FD5  : PTXReg<"fd5">;
+def FD6  : PTXReg<"fd6">;
+def FD7  : PTXReg<"fd7">;
+def FD8  : PTXReg<"fd8">;
+def FD9  : PTXReg<"fd9">;
+def FD10 : PTXReg<"fd10">;
+def FD11 : PTXReg<"fd11">;
+def FD12 : PTXReg<"fd12">;
+def FD13 : PTXReg<"fd13">;
+def FD14 : PTXReg<"fd14">;
+def FD15 : PTXReg<"fd15">;
+def FD16 : PTXReg<"fd16">;
+def FD17 : PTXReg<"fd17">;
+def FD18 : PTXReg<"fd18">;
+def FD19 : PTXReg<"fd19">;
+def FD20 : PTXReg<"fd20">;
+def FD21 : PTXReg<"fd21">;
+def FD22 : PTXReg<"fd22">;
+def FD23 : PTXReg<"fd23">;
+def FD24 : PTXReg<"fd24">;
+def FD25 : PTXReg<"fd25">;
+def FD26 : PTXReg<"fd26">;
+def FD27 : PTXReg<"fd27">;
+def FD28 : PTXReg<"fd28">;
+def FD29 : PTXReg<"fd29">;
+def FD30 : PTXReg<"fd30">;
+def FD31 : PTXReg<"fd31">;
+
+
+//===----------------------------------------------------------------------===//
+//  Register classes
+//===----------------------------------------------------------------------===//
+
+def Preds : RegisterClass<"PTX", [i1], 8,
+                          [P0, P1, P2, P3, P4, P5, P6, P7,
+                           P8, P9, P10, P11, P12, P13, P14, P15,
+                           P16, P17, P18, P19, P20, P21, P22, P23,
+                           P24, P25, P26, P27, P28, P29, P30, P31]>;
+
+def RRegu16 : RegisterClass<"PTX", [i16], 16,
+                            [RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7,
+                             RH8, RH9, RH10, RH11, RH12, RH13, RH14, RH15,
+                             RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23,
+                             RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31]>;
+
+def RRegu32 : RegisterClass<"PTX", [i32], 32,
+                            [R0, R1, R2, R3, R4, R5, R6, R7,
+                             R8, R9, R10, R11, R12, R13, R14, R15,
+                             R16, R17, R18, R19, R20, R21, R22, R23,
+                             R24, R25, R26, R27, R28, R29, R30, R31]>;
+
+def RRegu64 : RegisterClass<"PTX", [i64], 64,
+                            [RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7,
+                             RD8, RD9, RD10, RD11, RD12, RD13, RD14, RD15,
+                             RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23,
+                             RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31]>;
+
+def RRegf32 : RegisterClass<"PTX", [f32], 32,
+                            [F0, F1, F2, F3, F4, F5, F6, F7,
+                             F8, F9, F10, F11, F12, F13, F14, F15,
+                             F16, F17, F18, F19, F20, F21, F22, F23,
+                             F24, F25, F26, F27, F28, F29, F30, F31]>;
+
+def RRegf64 : RegisterClass<"PTX", [f64], 64,
+                            [FD0, FD1, FD2, FD3, FD4, FD5, FD6, FD7,
+                             FD8, FD9, FD10, FD11, FD12, FD13, FD14, FD15,
+                             FD16, FD17, FD18, FD19, FD20, FD21, FD22, FD23,
+                             FD24, FD25, FD26, FD27, FD28, FD29, FD30, FD31]>;
diff --git a/final/lib/Target/PTX/PTXSubtarget.cpp b/final/lib/Target/PTX/PTXSubtarget.cpp
new file mode 100644
index 00000000000..ef4060d6f01
--- /dev/null
+++ b/final/lib/Target/PTX/PTXSubtarget.cpp
@@ -0,0 +1,46 @@
+//===- PTXSubtarget.cpp - PTX Subtarget Information ---------------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PTX specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXSubtarget.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS)
+  : PTXShaderModel(PTX_SM_1_0),
+    PTXVersion(PTX_VERSION_1_4),
+    SupportsDouble(false),
+    Use64BitAddresses(false) {
+  std::string TARGET = "generic";
+  ParseSubtargetFeatures(FS, TARGET);
+}
+
+std::string PTXSubtarget::getTargetString() const {
+  switch(PTXShaderModel) {
+    default: llvm_unreachable("Unknown shader model");
+    case PTX_SM_1_0: return "sm_10";
+    case PTX_SM_1_3: return "sm_13";
+    case PTX_SM_2_0: return "sm_20";
+  }
+}
+
+std::string PTXSubtarget::getPTXVersionString() const {
+  switch(PTXVersion) {
+    default: llvm_unreachable("Unknown PTX version");
+    case PTX_VERSION_1_4: return "1.4";
+    case PTX_VERSION_2_0: return "2.0";
+    case PTX_VERSION_2_1: return "2.1";
+  }
+}
+
+#include "PTXGenSubtarget.inc"
diff --git a/final/lib/Target/PTX/PTXSubtarget.h b/final/lib/Target/PTX/PTXSubtarget.h
new file mode 100644
index 00000000000..23aa3a349f9
--- /dev/null
+++ b/final/lib/Target/PTX/PTXSubtarget.h
@@ -0,0 +1,62 @@
+//====-- PTXSubtarget.h - Define Subtarget for the PTX ---------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PTX specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_SUBTARGET_H
+#define PTX_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+
+namespace llvm {
+  class PTXSubtarget : public TargetSubtarget {
+    private:
+      enum PTXShaderModelEnum {
+        PTX_SM_1_0,
+        PTX_SM_1_3,
+        PTX_SM_2_0
+      };
+
+      enum PTXVersionEnum {
+        PTX_VERSION_1_4,
+        PTX_VERSION_2_0,
+        PTX_VERSION_2_1
+      };
+
+      /// Shader Model supported on the target GPU.
+      PTXShaderModelEnum PTXShaderModel;
+
+      /// PTX Language Version.
+      PTXVersionEnum PTXVersion;
+
+      // The native .f64 type is supported on the hardware.
+      bool SupportsDouble;
+
+      // Use .u64 instead of .u32 for addresses.
+      bool Use64BitAddresses;
+
+    public:
+      PTXSubtarget(const std::string &TT, const std::string &FS);
+
+      std::string getTargetString() const;
+
+      std::string getPTXVersionString() const;
+
+      bool supportsDouble() const { return SupportsDouble; }
+
+      bool use64BitAddresses() const { return Use64BitAddresses; }
+
+      std::string ParseSubtargetFeatures(const std::string &FS,
+                                         const std::string &CPU);
+  }; // class PTXSubtarget
+} // namespace llvm
+
+#endif // PTX_SUBTARGET_H
diff --git a/final/lib/Target/PTX/PTXTargetMachine.cpp b/final/lib/Target/PTX/PTXTargetMachine.cpp
new file mode 100644
index 00000000000..4701a941d18
--- /dev/null
+++ b/final/lib/Target/PTX/PTXTargetMachine.cpp
@@ -0,0 +1,71 @@
+//===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PTX target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "PTXMCAsmInfo.h"
+#include "PTXTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace llvm {
+  MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+                                   bool isVerboseAsm, bool useLoc,
+                                   MCInstPrinter *InstPrint,
+                                   MCCodeEmitter *CE,
+                                   TargetAsmBackend *TAB,
+                                   bool ShowInst);
+}
+
+extern "C" void LLVMInitializePTXTarget() {
+  RegisterTargetMachine<PTXTargetMachine> X(ThePTXTarget);
+  RegisterAsmInfo<PTXMCAsmInfo> Y(ThePTXTarget);
+  TargetRegistry::RegisterAsmStreamer(ThePTXTarget, createPTXAsmStreamer);
+}
+
+namespace {
+  const char* DataLayout32 =
+    "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
+  const char* DataLayout64 =
+    "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
+}
+
+// DataLayout and FrameLowering are filled with dummy data
+PTXTargetMachine::PTXTargetMachine(const Target &T,
+                                   const std::string &TT,
+                                   const std::string &FS)
+  : LLVMTargetMachine(T, TT),
+    // FIXME: This feels like a dirty hack, but Subtarget does not appear to be
+    //        initialized at this point, and we need to finish initialization of
+    //        DataLayout.
+    DataLayout((FS.find("64bit") != FS.npos) ? DataLayout64 : DataLayout32),
+    Subtarget(TT, FS),
+    FrameLowering(Subtarget),
+    InstrInfo(*this),
+    TLInfo(*this) {
+}
+
+bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
+  PM.add(createPTXISelDag(*this, OptLevel));
+  return false;
+}
+
+bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
+  // PTXMFInfoExtract must after register allocation!
+  PM.add(createPTXMFInfoExtract(*this, OptLevel));
+  return false;
+}
diff --git a/final/lib/Target/PTX/PTXTargetMachine.h b/final/lib/Target/PTX/PTXTargetMachine.h
new file mode 100644
index 00000000000..a5dba537d1d
--- /dev/null
+++ b/final/lib/Target/PTX/PTXTargetMachine.h
@@ -0,0 +1,60 @@
+//===-- PTXTargetMachine.h - Define TargetMachine for PTX -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PTX specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTX_TARGET_MACHINE_H
+#define PTX_TARGET_MACHINE_H
+
+#include "PTXISelLowering.h"
+#include "PTXInstrInfo.h"
+#include "PTXFrameLowering.h"
+#include "PTXSubtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class PTXTargetMachine : public LLVMTargetMachine {
+  private:
+    const TargetData  DataLayout;
+    PTXSubtarget      Subtarget; // has to be initialized before FrameLowering
+    PTXFrameLowering  FrameLowering;
+    PTXInstrInfo      InstrInfo;
+    PTXTargetLowering TLInfo;
+
+  public:
+    PTXTargetMachine(const Target &T, const std::string &TT,
+                     const std::string &FS);
+
+    virtual const TargetData *getTargetData() const { return &DataLayout; }
+
+    virtual const TargetFrameLowering *getFrameLowering() const {
+      return &FrameLowering;
+    }
+
+    virtual const PTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
+    virtual const TargetRegisterInfo *getRegisterInfo() const {
+      return &InstrInfo.getRegisterInfo(); }
+
+    virtual const PTXTargetLowering *getTargetLowering() const {
+      return &TLInfo; }
+
+    virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
+    virtual bool addInstSelector(PassManagerBase &PM,
+                                 CodeGenOpt::Level OptLevel);
+    virtual bool addPostRegAlloc(PassManagerBase &PM,
+                                 CodeGenOpt::Level OptLevel);
+}; // class PTXTargetMachine
+} // namespace llvm
+
+#endif // PTX_TARGET_MACHINE_H
diff --git a/final/lib/Target/PTX/TargetInfo/CMakeLists.txt b/final/lib/Target/PTX/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..4b09cf5ce09
--- /dev/null
+++ b/final/lib/Target/PTX/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMPTXInfo
+  PTXTargetInfo.cpp
+  )
+
+add_dependencies(LLVMPTXInfo PTXCodeGenTable_gen)
diff --git a/final/lib/Target/PTX/TargetInfo/Makefile b/final/lib/Target/PTX/TargetInfo/Makefile
new file mode 100644
index 00000000000..8619785889a
--- /dev/null
+++ b/final/lib/Target/PTX/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPTXInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/final/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
new file mode 100644
index 00000000000..a577d7755af
--- /dev/null
+++ b/final/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- PTXTargetInfo.cpp - PTX Target Implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTX.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+
+using namespace llvm;
+
+Target llvm::ThePTXTarget;
+
+extern "C" void LLVMInitializePTXTargetInfo() {
+  // see llvm/ADT/Triple.h
+  RegisterTarget<Triple::ptx> X(ThePTXTarget, "ptx", "PTX");
+}
diff --git a/final/lib/Target/PowerPC/CMakeLists.txt b/final/lib/Target/PowerPC/CMakeLists.txt
new file mode 100644
index 00000000000..f28257999d1
--- /dev/null
+++ b/final/lib/Target/PowerPC/CMakeLists.txt
@@ -0,0 +1,37 @@
+set(LLVM_TARGET_DEFINITIONS PPC.td)
+
+tablegen(PPCGenInstrNames.inc -gen-instr-enums)
+tablegen(PPCGenRegisterNames.inc -gen-register-enums)
+tablegen(PPCGenAsmWriter.inc -gen-asm-writer)
+tablegen(PPCGenCodeEmitter.inc -gen-emitter)
+tablegen(PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(PPCGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(PPCGenRegisterInfo.inc -gen-register-desc)
+tablegen(PPCGenInstrInfo.inc -gen-instr-desc)
+tablegen(PPCGenDAGISel.inc -gen-dag-isel)
+tablegen(PPCGenCallingConv.inc -gen-callingconv)
+tablegen(PPCGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(PowerPCCodeGen
+  PPCAsmBackend.cpp
+  PPCAsmPrinter.cpp
+  PPCBranchSelector.cpp
+  PPCCodeEmitter.cpp
+  PPCHazardRecognizers.cpp
+  PPCInstrInfo.cpp
+  PPCISelDAGToDAG.cpp
+  PPCISelLowering.cpp
+  PPCFrameLowering.cpp
+  PPCJITInfo.cpp
+  PPCMCAsmInfo.cpp
+  PPCMCCodeEmitter.cpp
+  PPCMCInstLower.cpp
+  PPCPredicates.cpp
+  PPCRegisterInfo.cpp
+  PPCSubtarget.cpp
+  PPCTargetMachine.cpp
+  PPCSelectionDAGInfo.cpp
+  )
+
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
diff --git a/final/lib/Target/PowerPC/InstPrinter/CMakeLists.txt b/final/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
new file mode 100644
index 00000000000..389ea7742b0
--- /dev/null
+++ b/final/lib/Target/PowerPC/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMPowerPCAsmPrinter
+  PPCInstPrinter.cpp
+  )
+add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen)
diff --git a/final/lib/Target/PowerPC/InstPrinter/Makefile b/final/lib/Target/PowerPC/InstPrinter/Makefile
new file mode 100644
index 00000000000..f097e84248f
--- /dev/null
+++ b/final/lib/Target/PowerPC/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/PowerPC/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCAsmPrinter
+
+# Hack: we need to include 'main' powerpc target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/final/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
new file mode 100644
index 00000000000..c8db0c40476
--- /dev/null
+++ b/final/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -0,0 +1,292 @@
+//===-- PPCInstPrinter.cpp - Convert PPC MCInst to assembly syntax --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "PPCInstPrinter.h"
+#include "PPCPredicates.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "PPCGenAsmWriter.inc"
+
+StringRef PPCInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
+
+void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+  // Check for slwi/srwi mnemonics.
+  if (MI->getOpcode() == PPC::RLWINM) {
+    unsigned char SH = MI->getOperand(2).getImm();
+    unsigned char MB = MI->getOperand(3).getImm();
+    unsigned char ME = MI->getOperand(4).getImm();
+    bool useSubstituteMnemonic = false;
+    if (SH <= 31 && MB == 0 && ME == (31-SH)) {
+      O << "\tslwi "; useSubstituteMnemonic = true;
+    }
+    if (SH <= 31 && MB == (32-SH) && ME == 31) {
+      O << "\tsrwi "; useSubstituteMnemonic = true;
+      SH = 32-SH;
+    }
+    if (useSubstituteMnemonic) {
+      printOperand(MI, 0, O);
+      O << ", ";
+      printOperand(MI, 1, O);
+      O << ", " << (unsigned int)SH;
+      return;
+    }
+  }
+  
+  if ((MI->getOpcode() == PPC::OR || MI->getOpcode() == PPC::OR8) &&
+      MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
+    O << "\tmr ";
+    printOperand(MI, 0, O);
+    O << ", ";
+    printOperand(MI, 1, O);
+    return;
+  }
+  
+  if (MI->getOpcode() == PPC::RLDICR) {
+    unsigned char SH = MI->getOperand(2).getImm();
+    unsigned char ME = MI->getOperand(3).getImm();
+    // rldicr RA, RS, SH, 63-SH == sldi RA, RS, SH
+    if (63-SH == ME) {
+      O << "\tsldi ";
+      printOperand(MI, 0, O);
+      O << ", ";
+      printOperand(MI, 1, O);
+      O << ", " << (unsigned int)SH;
+      return;
+    }
+  }
+  
+  printInstruction(MI, O);
+}
+
+
+void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O, 
+                                           const char *Modifier) {
+  assert(Modifier && "Must specify 'cc' or 'reg' as predicate op modifier!");
+  unsigned Code = MI->getOperand(OpNo).getImm();
+  if (StringRef(Modifier) == "cc") {
+    switch ((PPC::Predicate)Code) {
+    default: assert(0 && "Invalid predicate");
+    case PPC::PRED_ALWAYS: return; // Don't print anything for always.
+    case PPC::PRED_LT: O << "lt"; return;
+    case PPC::PRED_LE: O << "le"; return;
+    case PPC::PRED_EQ: O << "eq"; return;
+    case PPC::PRED_GE: O << "ge"; return;
+    case PPC::PRED_GT: O << "gt"; return;
+    case PPC::PRED_NE: O << "ne"; return;
+    case PPC::PRED_UN: O << "un"; return;
+    case PPC::PRED_NU: O << "nu"; return;
+    }
+  }
+  
+  assert(StringRef(Modifier) == "reg" &&
+         "Need to specify 'cc' or 'reg' as predicate op modifier!");
+  // Don't print the register for 'always'.
+  if (Code == PPC::PRED_ALWAYS) return;
+  printOperand(MI, OpNo+1, O);
+}
+
+void PPCInstPrinter::printS5ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  char Value = MI->getOperand(OpNo).getImm();
+  Value = (Value << (32-5)) >> (32-5);
+  O << (int)Value;
+}
+
+void PPCInstPrinter::printU5ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned char Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 31 && "Invalid u5imm argument!");
+  O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned char Value = MI->getOperand(OpNo).getImm();
+  assert(Value <= 63 && "Invalid u6imm argument!");
+  O << (unsigned int)Value;
+}
+
+void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  O << (short)MI->getOperand(OpNo).getImm();
+}
+
+void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  O << (unsigned short)MI->getOperand(OpNo).getImm();
+}
+
+void PPCInstPrinter::printS16X4ImmOperand(const MCInst *MI, unsigned OpNo,
+                                          raw_ostream &O) {
+  if (MI->getOperand(OpNo).isImm())
+    O << (short)(MI->getOperand(OpNo).getImm()*4);
+  else
+    printOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  if (!MI->getOperand(OpNo).isImm())
+    return printOperand(MI, OpNo, O);
+
+  // Branches can take an immediate operand.  This is used by the branch
+  // selection pass to print $+8, an eight byte displacement from the PC.
+  O << "$+";
+  printAbsAddrOperand(MI, OpNo, O);
+}
+
+void PPCInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo,
+                                         raw_ostream &O) {
+  O << (int)MI->getOperand(OpNo).getImm()*4;
+}
+
+
+void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  unsigned CCReg = MI->getOperand(OpNo).getReg();
+  unsigned RegNo;
+  switch (CCReg) {
+  default: assert(0 && "Unknown CR register");
+  case PPC::CR0: RegNo = 0; break;
+  case PPC::CR1: RegNo = 1; break;
+  case PPC::CR2: RegNo = 2; break;
+  case PPC::CR3: RegNo = 3; break;
+  case PPC::CR4: RegNo = 4; break;
+  case PPC::CR5: RegNo = 5; break;
+  case PPC::CR6: RegNo = 6; break;
+  case PPC::CR7: RegNo = 7; break;
+  }
+  O << (0x80 >> RegNo);
+}
+
+void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  printSymbolLo(MI, OpNo, O);
+  O << '(';
+  if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+    O << "0";
+  else
+    printOperand(MI, OpNo+1, O);
+  O << ')';
+}
+
+void PPCInstPrinter::printMemRegImmShifted(const MCInst *MI, unsigned OpNo,
+                                           raw_ostream &O) {
+  if (MI->getOperand(OpNo).isImm())
+    printS16X4ImmOperand(MI, OpNo, O);
+  else
+    printSymbolLo(MI, OpNo, O);
+  O << '(';
+  
+  if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
+    O << "0";
+  else
+    printOperand(MI, OpNo+1, O);
+  O << ')';
+}
+
+
+void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  // When used as the base register, r0 reads constant zero rather than
+  // the value contained in the register.  For this reason, the darwin
+  // assembler requires that we print r0 as 0 (no r) when used as the base.
+  if (MI->getOperand(OpNo).getReg() == PPC::R0)
+    O << "0";
+  else
+    printOperand(MI, OpNo, O);
+  O << ", ";
+  printOperand(MI, OpNo+1, O);
+}
+
+
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left.  Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+  switch (RegName[0]) {
+  case 'r':
+  case 'f':
+  case 'v': return RegName + 1;
+  case 'c': if (RegName[1] == 'r') return RegName + 2;
+  }
+  
+  return RegName;
+}
+
+void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    const char *RegName = getRegisterName(Op.getReg());
+    // The linux and AIX assembler does not take register prefixes.
+    if (!isDarwinSyntax())
+      RegName = stripRegisterPrefix(RegName);
+    
+    O << RegName;
+    return;
+  }
+  
+  if (Op.isImm()) {
+    O << Op.getImm();
+    return;
+  }
+  
+  assert(Op.isExpr() && "unknown operand kind in printOperand");
+  O << *Op.getExpr();
+}
+  
+void PPCInstPrinter::printSymbolLo(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  if (MI->getOperand(OpNo).isImm())
+    return printS16ImmOperand(MI, OpNo, O);
+  
+  // FIXME: This is a terrible hack because we can't encode lo16() as an operand
+  // flag of a subtraction.  See the FIXME in GetSymbolRef in PPCMCInstLower.
+  if (MI->getOperand(OpNo).isExpr() &&
+      isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
+    O << "lo16(";
+    printOperand(MI, OpNo, O);
+    O << ')';
+  } else {
+    printOperand(MI, OpNo, O);
+  }
+}
+
+void PPCInstPrinter::printSymbolHi(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  if (MI->getOperand(OpNo).isImm())
+    return printS16ImmOperand(MI, OpNo, O);
+
+  // FIXME: This is a terrible hack because we can't encode lo16() as an operand
+  // flag of a subtraction.  See the FIXME in GetSymbolRef in PPCMCInstLower.
+  if (MI->getOperand(OpNo).isExpr() &&
+      isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
+    O << "ha16(";
+    printOperand(MI, OpNo, O);
+    O << ')';
+  } else {
+    printOperand(MI, OpNo, O);
+  }
+}
+
+
diff --git a/final/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/final/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
new file mode 100644
index 00000000000..ebc10daa5f1
--- /dev/null
+++ b/final/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -0,0 +1,69 @@
+//===-- PPCInstPrinter.h - Convert PPC MCInst to assembly syntax ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an PPC MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCINSTPRINTER_H
+#define PPCINSTPRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+  class MCOperand;
+
+class PPCInstPrinter : public MCInstPrinter {
+  // 0 -> AIX, 1 -> Darwin.
+  unsigned SyntaxVariant;
+public:
+  PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant)
+    : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {}
+  
+  bool isDarwinSyntax() const {
+    return SyntaxVariant == 1;
+  }
+  
+  virtual void printInst(const MCInst *MI, raw_ostream &O);
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+  
+  static const char *getInstructionName(unsigned Opcode);
+  
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+  
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPredicateOperand(const MCInst *MI, unsigned OpNo,
+                             raw_ostream &O, const char *Modifier);
+
+
+  void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printS16X4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMemRegImmShifted(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  
+  // FIXME: Remove
+  void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/PowerPC/Makefile b/final/lib/Target/PowerPC/Makefile
new file mode 100644
index 00000000000..030defe212c
--- /dev/null
+++ b/final/lib/Target/PowerPC/Makefile
@@ -0,0 +1,24 @@
+##===- lib/Target/PowerPC/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMPowerPCCodeGen
+TARGET = PPC
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = PPCGenInstrNames.inc PPCGenRegisterNames.inc \
+                PPCGenAsmWriter.inc  PPCGenCodeEmitter.inc \
+                PPCGenRegisterInfo.h.inc PPCGenRegisterInfo.inc \
+                PPCGenInstrInfo.inc PPCGenDAGISel.inc \
+                PPCGenSubtarget.inc PPCGenCallingConv.inc \
+                PPCGenMCCodeEmitter.inc
+
+DIRS = InstPrinter TargetInfo
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/PowerPC/PPC.h b/final/lib/Target/PowerPC/PPC.h
new file mode 100644
index 00000000000..7242f3aa845
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPC.h
@@ -0,0 +1,93 @@
+//===-- PPC.h - Top-level interface for PowerPC Target ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// PowerPC back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_H
+#define LLVM_TARGET_POWERPC_H
+
+#include <string>
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+  class PPCTargetMachine;
+  class FunctionPass;
+  class formatted_raw_ostream;
+  class JITCodeEmitter;
+  class Target;
+  class MachineInstr;
+  class AsmPrinter;
+  class MCInst;
+  class MCCodeEmitter;
+  class MCContext;
+  class TargetMachine;
+  class TargetAsmBackend;
+  
+  FunctionPass *createPPCBranchSelectionPass();
+  FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
+  FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+                                            JITCodeEmitter &MCE);
+  MCCodeEmitter *createPPCMCCodeEmitter(const Target &, TargetMachine &TM,
+                                        MCContext &Ctx);
+  TargetAsmBackend *createPPCAsmBackend(const Target &, const std::string &);
+  
+  void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+                                    AsmPrinter &AP);
+  
+  extern Target ThePPC32Target;
+  extern Target ThePPC64Target;
+  
+  namespace PPCII {
+    
+  /// Target Operand Flag enum.
+  enum TOF {
+    //===------------------------------------------------------------------===//
+    // PPC Specific MachineOperand flags.
+    MO_NO_FLAG,
+    
+    /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
+    /// reference is actually to the "FOO$stub" symbol.  This is used for calls
+    /// and jumps to external functions on Tiger and earlier.
+    MO_DARWIN_STUB = 1,
+    
+    /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
+    MO_LO16 = 4, MO_HA16 = 8,
+
+    /// MO_PIC_FLAG - If this bit is set, the symbol reference is relative to
+    /// the function's picbase, e.g. lo16(symbol-picbase).
+    MO_PIC_FLAG = 16,
+
+    /// MO_NLP_FLAG - If this bit is set, the symbol reference is actually to
+    /// the non_lazy_ptr for the global, e.g. lo16(symbol$non_lazy_ptr-picbase).
+    MO_NLP_FLAG = 32,
+    
+    /// MO_NLP_HIDDEN_FLAG - If this bit is set, the symbol reference is to a
+    /// symbol with hidden visibility.  This causes a different kind of
+    /// non-lazy-pointer to be generated.
+    MO_NLP_HIDDEN_FLAG = 64
+  };
+  } // end namespace PPCII
+  
+} // end namespace llvm;
+
+// Defines symbolic names for PowerPC registers.  This defines a mapping from
+// register name to register number.
+//
+#include "PPCGenRegisterNames.inc"
+
+// Defines symbolic names for the PowerPC instructions.
+//
+#include "PPCGenInstrNames.inc"
+
+#endif
diff --git a/final/lib/Target/PowerPC/PPC.td b/final/lib/Target/PowerPC/PPC.td
new file mode 100644
index 00000000000..aabf494012e
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPC.td
@@ -0,0 +1,112 @@
+//===- PPC.td - Describe the PowerPC Target Machine --------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This is the top level entry point for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing.
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC Subtarget features.
+//
+ 
+//===----------------------------------------------------------------------===//
+// CPU Directives                                                             //
+//===----------------------------------------------------------------------===//
+
+def Directive601 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_601", "">;
+def Directive602 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_602", "">;
+def Directive603 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive604 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive620 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_603", "">;
+def Directive7400: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_7400", "">;
+def Directive750 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_750", "">;
+def Directive970 : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_970", "">;
+def Directive32  : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_32", "">;
+def Directive64  : SubtargetFeature<"", "DarwinDirective", "PPC::DIR_64", "">;
+
+def Feature64Bit     : SubtargetFeature<"64bit","Has64BitSupport", "true",
+                                        "Enable 64-bit instructions">;
+def Feature64BitRegs : SubtargetFeature<"64bitregs","Use64BitRegs", "true",
+                              "Enable 64-bit registers usage for ppc32 [beta]">;
+def FeatureAltivec   : SubtargetFeature<"altivec","HasAltivec", "true",
+                                        "Enable Altivec instructions">;
+def FeatureGPUL      : SubtargetFeature<"gpul","IsGigaProcessor", "true",
+                                        "Enable GPUL instructions">;
+def FeatureFSqrt     : SubtargetFeature<"fsqrt","HasFSQRT", "true",
+                                        "Enable the fsqrt instruction">; 
+def FeatureSTFIWX    : SubtargetFeature<"stfiwx","HasSTFIWX", "true",
+                                        "Enable the stfiwx instruction">; 
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "PPCRegisterInfo.td"
+include "PPCSchedule.td"
+include "PPCInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC processors supported.
+//
+
+def : Processor<"generic", G3Itineraries, [Directive32]>;
+def : Processor<"601", G3Itineraries, [Directive601]>;
+def : Processor<"602", G3Itineraries, [Directive602]>;
+def : Processor<"603", G3Itineraries, [Directive603]>;
+def : Processor<"603e", G3Itineraries, [Directive603]>;
+def : Processor<"603ev", G3Itineraries, [Directive603]>;
+def : Processor<"604", G3Itineraries, [Directive604]>;
+def : Processor<"604e", G3Itineraries, [Directive604]>;
+def : Processor<"620", G3Itineraries, [Directive620]>;
+def : Processor<"g3", G3Itineraries, [Directive7400]>;
+def : Processor<"7400", G4Itineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"g4", G4Itineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"7450", G4PlusItineraries, [Directive7400, FeatureAltivec]>;
+def : Processor<"g4+", G4PlusItineraries, [Directive750, FeatureAltivec]>;
+def : Processor<"750", G4Itineraries, [Directive750, FeatureAltivec]>;
+def : Processor<"970", G5Itineraries,
+                  [Directive970, FeatureAltivec,
+                   FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+                   Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"g5", G5Itineraries,
+                  [Directive970, FeatureAltivec,
+                   FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+                   Feature64Bit /*, Feature64BitRegs */]>;
+def : Processor<"ppc", G3Itineraries, [Directive32]>;
+def : Processor<"ppc64", G5Itineraries,
+                  [Directive64, FeatureAltivec,
+                   FeatureGPUL, FeatureFSqrt, FeatureSTFIWX,
+                   Feature64Bit /*, Feature64BitRegs */]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "PPCCallingConv.td"
+
+def PPCInstrInfo : InstrInfo {
+  let isLittleEndianEncoding = 1;
+}
+
+def PPCAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
+def PPC : Target {
+  // Information about the instructions.
+  let InstructionSet = PPCInstrInfo;
+  
+  let AssemblyWriters = [PPCAsmWriter];
+}
diff --git a/final/lib/Target/PowerPC/PPCAsmBackend.cpp b/final/lib/Target/PowerPC/PPCAsmBackend.cpp
new file mode 100644
index 00000000000..c4d4ac9b3eb
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCAsmBackend.cpp
@@ -0,0 +1,119 @@
+//===-- PPCAsmBackend.cpp - PPC Assembler Backend -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+#include "PPC.h"
+#include "PPCFixupKinds.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+namespace {
+class PPCMachObjectWriter : public MCMachObjectTargetWriter {
+public:
+  PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+                      uint32_t CPUSubtype)
+    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
+};
+
+class PPCAsmBackend : public TargetAsmBackend {
+const Target &TheTarget;
+public:
+  PPCAsmBackend(const Target &T) : TargetAsmBackend(), TheTarget(T) {}
+
+  unsigned getNumFixupKinds() const { return PPC::NumTargetFixupKinds; }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+    const static MCFixupKindInfo Infos[PPC::NumTargetFixupKinds] = {
+      // name                    offset  bits  flags
+      { "fixup_ppc_br24",        6,      24,   MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_ppc_brcond14",    16,     14,   MCFixupKindInfo::FKF_IsPCRel },
+      { "fixup_ppc_lo16",        16,     16,   0 },
+      { "fixup_ppc_ha16",        16,     16,   0 },
+      { "fixup_ppc_lo14",        16,     14,   0 }
+    };
+  
+    if (Kind < FirstTargetFixupKind)
+      return TargetAsmBackend::getFixupKindInfo(Kind);
+  
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+  
+  bool MayNeedRelaxation(const MCInst &Inst) const {
+    // FIXME.
+    return false;
+  }
+  
+  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+    // FIXME.
+    assert(0 && "RelaxInstruction() unimplemented");
+  }
+  
+  bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+    // FIXME: Zero fill for now. That's not right, but at least will get the
+    // section size right.
+    for (uint64_t i = 0; i != Count; ++i)
+      OW->Write8(0);
+    return true;
+  }      
+  
+  unsigned getPointerSize() const {
+    StringRef Name = TheTarget.getName();
+    if (Name == "ppc64") return 8;
+    assert(Name == "ppc32" && "Unknown target name!");
+    return 4;
+  }
+};
+} // end anonymous namespace
+
+
+// FIXME: This should be in a separate file.
+namespace {
+  class DarwinPPCAsmBackend : public PPCAsmBackend {
+  public:
+    DarwinPPCAsmBackend(const Target &T) : PPCAsmBackend(T) { }
+    
+    void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                    uint64_t Value) const {
+      assert(0 && "UNIMP");
+    }
+    
+    MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+      bool is64 = getPointerSize() == 8;
+      return createMachObjectWriter(new PPCMachObjectWriter(
+                                      /*Is64Bit=*/is64,
+                                      (is64 ? object::mach::CTM_PowerPC64 :
+                                       object::mach::CTM_PowerPC),
+                                      object::mach::CSPPC_ALL),
+                                    OS, /*IsLittleEndian=*/false);
+    }
+    
+    virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+      return false;
+    }
+  };
+} // end anonymous namespace
+
+
+
+
+TargetAsmBackend *llvm::createPPCAsmBackend(const Target &T,
+                                            const std::string &TT) {
+  switch (Triple(TT).getOS()) {
+  case Triple::Darwin:
+    return new DarwinPPCAsmBackend(T);
+  default:
+    return 0;
+  }
+}
diff --git a/final/lib/Target/PowerPC/PPCAsmPrinter.cpp b/final/lib/Target/PowerPC/PPCAsmPrinter.cpp
new file mode 100644
index 00000000000..8ed5d7f0ee7
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -0,0 +1,696 @@
+//===-- PPCAsmPrinter.cpp - Print machine instrs to PowerPC assembly --------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to PowerPC assembly language. This printer is
+// the output mechanism used by `llc'.
+//
+// Documentation at http://developer.apple.com/documentation/DeveloperTools/
+// Reference/Assembler/ASMIntroduction/chapter_1_section_1.html
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asmprinter"
+#include "PPC.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "PPCSubtarget.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "InstPrinter/PPCInstPrinter.h"
+using namespace llvm;
+
+namespace {
+  class PPCAsmPrinter : public AsmPrinter {
+  protected:
+    DenseMap<MCSymbol*, MCSymbol*> TOC;
+    const PPCSubtarget &Subtarget;
+    uint64_t TOCLabelID;
+  public:
+    explicit PPCAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer),
+        Subtarget(TM.getSubtarget<PPCSubtarget>()), TOCLabelID(0) {}
+
+    virtual const char *getPassName() const {
+      return "PowerPC Assembly Printer";
+    }
+
+
+    virtual void EmitInstruction(const MachineInstr *MI);
+
+    void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &O);
+
+    MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
+      MachineLocation Location;
+      assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+      // Frame address.  Currently handles register +- offset only.
+      if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm())
+        Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm());
+      else {
+        DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+      }
+      return Location;
+    }
+  };
+
+  /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
+  class PPCLinuxAsmPrinter : public PPCAsmPrinter {
+  public:
+    explicit PPCLinuxAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : PPCAsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "Linux PPC Assembly Printer";
+    }
+
+    bool doFinalization(Module &M);
+
+    virtual void EmitFunctionEntryLabel();
+  };
+
+  /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
+  /// OS X
+  class PPCDarwinAsmPrinter : public PPCAsmPrinter {
+  public:
+    explicit PPCDarwinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : PPCAsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "Darwin PPC Assembly Printer";
+    }
+
+    bool doFinalization(Module &M);
+    void EmitStartOfAsmFile(Module &M);
+
+    void EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs);
+  };
+} // end of anonymous namespace
+
+/// stripRegisterPrefix - This method strips the character prefix from a
+/// register name so that only the number is left.  Used by for linux asm.
+static const char *stripRegisterPrefix(const char *RegName) {
+  switch (RegName[0]) {
+    case 'r':
+    case 'f':
+    case 'v': return RegName + 1;
+    case 'c': if (RegName[1] == 'r') return RegName + 2;
+  }
+  
+  return RegName;
+}
+
+void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+                                 raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register: {
+    const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
+    // Linux assembler (Others?) does not take register mnemonics.
+    // FIXME - What about special registers used in mfspr/mtspr?
+    if (!Subtarget.isDarwin()) RegName = stripRegisterPrefix(RegName);
+    O << RegName;
+    return;
+  }
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    // FIXME: PIC relocation model
+    return;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    return;
+  case MachineOperand::MO_BlockAddress:
+    O << *GetBlockAddressSymbol(MO.getBlockAddress());
+    return;
+  case MachineOperand::MO_ExternalSymbol: {
+    // Computing the address of an external symbol, not calling it.
+    if (TM.getRelocationModel() == Reloc::Static) {
+      O << *GetExternalSymbolSymbol(MO.getSymbolName());
+      return;
+    }
+
+    MCSymbol *NLPSym = 
+      OutContext.GetOrCreateSymbol(StringRef(MAI->getGlobalPrefix())+
+                                   MO.getSymbolName()+"$non_lazy_ptr");
+    MachineModuleInfoImpl::StubValueTy &StubSym = 
+      MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(NLPSym);
+    if (StubSym.getPointer() == 0)
+      StubSym = MachineModuleInfoImpl::
+        StubValueTy(GetExternalSymbolSymbol(MO.getSymbolName()), true);
+    
+    O << *NLPSym;
+    return;
+  }
+  case MachineOperand::MO_GlobalAddress: {
+    // Computing the address of a global symbol, not calling it.
+    const GlobalValue *GV = MO.getGlobal();
+    MCSymbol *SymToPrint;
+
+    // External or weakly linked global variables need non-lazily-resolved stubs
+    if (TM.getRelocationModel() != Reloc::Static &&
+        (GV->isDeclaration() || GV->isWeakForLinker())) {
+      if (!GV->hasHiddenVisibility()) {
+        SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+        MachineModuleInfoImpl::StubValueTy &StubSym = 
+          MMI->getObjFileInfo<MachineModuleInfoMachO>()
+            .getGVStubEntry(SymToPrint);
+        if (StubSym.getPointer() == 0)
+          StubSym = MachineModuleInfoImpl::
+            StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+      } else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
+                 GV->hasAvailableExternallyLinkage()) {
+        SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+        
+        MachineModuleInfoImpl::StubValueTy &StubSym = 
+          MMI->getObjFileInfo<MachineModuleInfoMachO>().
+                    getHiddenGVStubEntry(SymToPrint);
+        if (StubSym.getPointer() == 0)
+          StubSym = MachineModuleInfoImpl::
+            StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+      } else {
+        SymToPrint = Mang->getSymbol(GV);
+      }
+    } else {
+      SymToPrint = Mang->getSymbol(GV);
+    }
+    
+    O << *SymToPrint;
+
+    printOffset(MO.getOffset(), O);
+    return;
+  }
+
+  default:
+    O << "<unknown operand type: " << MO.getType() << ">";
+    return;
+  }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool PPCAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                    unsigned AsmVariant,
+                                    const char *ExtraCode, raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'c': // Don't print "$" before a global var name or constant.
+      break; // PPC never has a prefix.
+    case 'L': // Write second word of DImode reference.
+      // Verify that this operand has two consecutive registers.
+      if (!MI->getOperand(OpNo).isReg() ||
+          OpNo+1 == MI->getNumOperands() ||
+          !MI->getOperand(OpNo+1).isReg())
+        return true;
+      ++OpNo;   // Return the high-part.
+      break;
+    case 'I':
+      // Write 'i' if an integer constant, otherwise nothing.  Used to print
+      // addi vs add, etc.
+      if (MI->getOperand(OpNo).isImm())
+        O << "i";
+      return false;
+    }
+  }
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+// At the moment, all inline asm memory operands are a single register.
+// In any case, the output of this routine should always be just one
+// assembler operand.
+
+bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                                          unsigned AsmVariant,
+                                          const char *ExtraCode,
+                                          raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true; // Unknown modifier.
+  assert(MI->getOperand(OpNo).isReg());
+  O << "0(";
+  printOperand(MI, OpNo, O);
+  O << ")";
+  return false;
+}
+
+
+/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
+/// the current output stream.
+///
+void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  MCInst TmpInst;
+  
+  // Lower multi-instruction pseudo operations.
+  switch (MI->getOpcode()) {
+  default: break;
+  case TargetOpcode::DBG_VALUE: {
+    if (!isVerbose() || !OutStreamer.hasRawTextSupport()) return;
+      
+    SmallString<32> Str;
+    raw_svector_ostream O(Str);
+    unsigned NOps = MI->getNumOperands();
+    assert(NOps==4);
+    O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+    // cast away const; DIetc do not take const operands for some reason.
+    DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+    O << V.getName();
+    O << " <- ";
+    // Frame address.  Currently handles register +- offset only.
+    assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
+    O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 1, O);
+    O << ']';
+    O << "+";
+    printOperand(MI, NOps-2, O);
+    OutStreamer.EmitRawText(O.str());
+    return;
+  }
+      
+  case PPC::MovePCtoLR:
+  case PPC::MovePCtoLR8: {
+    // Transform %LR = MovePCtoLR
+    // Into this, where the label is the PIC base: 
+    //     bl L1$pb
+    // L1$pb:
+    MCSymbol *PICBase = MF->getPICBaseSymbol();
+    
+    // Emit the 'bl'.
+    TmpInst.setOpcode(PPC::BL_Darwin); // Darwin vs SVR4 doesn't matter here.
+    
+    
+    // FIXME: We would like an efficient form for this, so we don't have to do
+    // a lot of extra uniquing.
+    TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::
+                                             Create(PICBase, OutContext)));
+    OutStreamer.EmitInstruction(TmpInst);
+    
+    // Emit the label.
+    OutStreamer.EmitLabel(PICBase);
+    return;
+  }
+  case PPC::LDtoc: {
+    // Transform %X3 = LDtoc <ga:@min1>, %X2
+    LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
+      
+    // Change the opcode to LD, and the global address operand to be a
+    // reference to the TOC entry we will synthesize later.
+    TmpInst.setOpcode(PPC::LD);
+    const MachineOperand &MO = MI->getOperand(1);
+    assert(MO.isGlobal());
+      
+    // Map symbol -> label of TOC entry.
+    MCSymbol *&TOCEntry = TOC[Mang->getSymbol(MO.getGlobal())];
+    if (TOCEntry == 0)
+      TOCEntry = GetTempSymbol("C", TOCLabelID++);
+      
+    const MCExpr *Exp =
+      MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
+                              OutContext);
+    TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+      
+  case PPC::MFCRpseud:
+    // Transform: %R3 = MFCRpseud %CR7
+    // Into:      %R3 = MFCR      ;; cr7
+    OutStreamer.AddComment(PPCInstPrinter::
+                           getRegisterName(MI->getOperand(1).getReg()));
+    TmpInst.setOpcode(PPC::MFCR);
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+
+  LowerPPCMachineInstrToMCInst(MI, TmpInst, *this);
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
+void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
+  if (!Subtarget.isPPC64())  // linux/ppc32 - Normal entry label.
+    return AsmPrinter::EmitFunctionEntryLabel();
+    
+  // Emit an official procedure descriptor.
+  // FIXME 64-bit SVR4: Use MCSection here!
+  OutStreamer.EmitRawText(StringRef("\t.section\t\".opd\",\"aw\""));
+  OutStreamer.EmitRawText(StringRef("\t.align 3"));
+  OutStreamer.EmitLabel(CurrentFnSym);
+  OutStreamer.EmitRawText("\t.quad .L." + Twine(CurrentFnSym->getName()) +
+                          ",.TOC.@tocbase");
+  OutStreamer.EmitRawText(StringRef("\t.previous"));
+  OutStreamer.EmitRawText(".L." + Twine(CurrentFnSym->getName()) + ":");
+}
+
+
+bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
+  const TargetData *TD = TM.getTargetData();
+
+  bool isPPC64 = TD->getPointerSizeInBits() == 64;
+
+  if (isPPC64 && !TOC.empty()) {
+    // FIXME 64-bit SVR4: Use MCSection here?
+    OutStreamer.EmitRawText(StringRef("\t.section\t\".toc\",\"aw\""));
+
+    // FIXME: This is nondeterminstic!
+    for (DenseMap<MCSymbol*, MCSymbol*>::iterator I = TOC.begin(),
+         E = TOC.end(); I != E; ++I) {
+      OutStreamer.EmitLabel(I->second);
+      OutStreamer.EmitRawText("\t.tc " + Twine(I->first->getName()) +
+                              "[TC]," + I->first->getName());
+    }
+  }
+
+  return AsmPrinter::doFinalization(M);
+}
+
+void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
+  static const char *const CPUDirectives[] = {
+    "",
+    "ppc",
+    "ppc601",
+    "ppc602",
+    "ppc603",
+    "ppc7400",
+    "ppc750",
+    "ppc970",
+    "ppc64"
+  };
+
+  unsigned Directive = Subtarget.getDarwinDirective();
+  if (Subtarget.isGigaProcessor() && Directive < PPC::DIR_970)
+    Directive = PPC::DIR_970;
+  if (Subtarget.hasAltivec() && Directive < PPC::DIR_7400)
+    Directive = PPC::DIR_7400;
+  if (Subtarget.isPPC64() && Directive < PPC::DIR_970)
+    Directive = PPC::DIR_64;
+  assert(Directive <= PPC::DIR_64 && "Directive out of range.");
+  
+  // FIXME: This is a total hack, finish mc'izing the PPC backend.
+  if (OutStreamer.hasRawTextSupport())
+    OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
+
+  // Prime text sections so they are adjacent.  This reduces the likelihood a
+  // large data or debug section causes a branch to exceed 16M limit.
+  const TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  OutStreamer.SwitchSection(TLOFMacho.getTextCoalSection());
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    OutStreamer.SwitchSection(
+           OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+                                      MCSectionMachO::S_SYMBOL_STUBS |
+                                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                      32, SectionKind::getText()));
+  } else if (TM.getRelocationModel() == Reloc::DynamicNoPIC) {
+    OutStreamer.SwitchSection(
+           OutContext.getMachOSection("__TEXT","__symbol_stub1",
+                                      MCSectionMachO::S_SYMBOL_STUBS |
+                                      MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                      16, SectionKind::getText()));
+  }
+  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
+static MCSymbol *GetLazyPtr(MCSymbol *Sym, MCContext &Ctx) {
+  // Remove $stub suffix, add $lazy_ptr.
+  SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5);
+  TmpStr += "$lazy_ptr";
+  return Ctx.GetOrCreateSymbol(TmpStr.str());
+}
+
+static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
+  // Add $tmp suffix to $stub, yielding $stub$tmp.
+  SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end());
+  TmpStr += "$tmp";
+  return Ctx.GetOrCreateSymbol(TmpStr.str());
+}
+
+void PPCDarwinAsmPrinter::
+EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
+  bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
+  
+  const TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+
+  // .lazy_symbol_pointer
+  const MCSection *LSPSection = TLOFMacho.getLazySymbolPointerSection();
+  
+  // Output stubs for dynamically-linked functions
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    const MCSection *StubSection = 
+    OutContext.getMachOSection("__TEXT", "__picsymbolstub1",
+                               MCSectionMachO::S_SYMBOL_STUBS |
+                               MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                               32, SectionKind::getText());
+    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+      OutStreamer.SwitchSection(StubSection);
+      EmitAlignment(4);
+      
+      MCSymbol *Stub = Stubs[i].first;
+      MCSymbol *RawSym = Stubs[i].second.getPointer();
+      MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+      MCSymbol *AnonSymbol = GetAnonSym(Stub, OutContext);
+                                           
+      OutStreamer.EmitLabel(Stub);
+      OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+      // FIXME: MCize this.
+      OutStreamer.EmitRawText(StringRef("\tmflr r0"));
+      OutStreamer.EmitRawText("\tbcl 20,31," + Twine(AnonSymbol->getName()));
+      OutStreamer.EmitLabel(AnonSymbol);
+      OutStreamer.EmitRawText(StringRef("\tmflr r11"));
+      OutStreamer.EmitRawText("\taddis r11,r11,ha16("+Twine(LazyPtr->getName())+
+                              "-" + AnonSymbol->getName() + ")");
+      OutStreamer.EmitRawText(StringRef("\tmtlr r0"));
+      
+      if (isPPC64)
+        OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
+                                "-" + AnonSymbol->getName() + ")(r11)");
+      else
+        OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
+                                "-" + AnonSymbol->getName() + ")(r11)");
+      OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
+      OutStreamer.EmitRawText(StringRef("\tbctr"));
+      
+      OutStreamer.SwitchSection(LSPSection);
+      OutStreamer.EmitLabel(LazyPtr);
+      OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+      
+      if (isPPC64)
+        OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
+      else
+        OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+    }
+    OutStreamer.AddBlankLine();
+    return;
+  }
+  
+  const MCSection *StubSection =
+    OutContext.getMachOSection("__TEXT","__symbol_stub1",
+                               MCSectionMachO::S_SYMBOL_STUBS |
+                               MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                               16, SectionKind::getText());
+  for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+    MCSymbol *Stub = Stubs[i].first;
+    MCSymbol *RawSym = Stubs[i].second.getPointer();
+    MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+
+    OutStreamer.SwitchSection(StubSection);
+    EmitAlignment(4);
+    OutStreamer.EmitLabel(Stub);
+    OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+    OutStreamer.EmitRawText("\tlis r11,ha16(" + Twine(LazyPtr->getName()) +")");
+    if (isPPC64)
+      OutStreamer.EmitRawText("\tldu r12,lo16(" + Twine(LazyPtr->getName()) +
+                              ")(r11)");
+    else
+      OutStreamer.EmitRawText("\tlwzu r12,lo16(" + Twine(LazyPtr->getName()) +
+                              ")(r11)");
+    OutStreamer.EmitRawText(StringRef("\tmtctr r12"));
+    OutStreamer.EmitRawText(StringRef("\tbctr"));
+    OutStreamer.SwitchSection(LSPSection);
+    OutStreamer.EmitLabel(LazyPtr);
+    OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+    
+    if (isPPC64)
+      OutStreamer.EmitRawText(StringRef("\t.quad dyld_stub_binding_helper"));
+    else
+      OutStreamer.EmitRawText(StringRef("\t.long dyld_stub_binding_helper"));
+  }
+  
+  OutStreamer.AddBlankLine();
+}
+
+
+bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
+  bool isPPC64 = TM.getTargetData()->getPointerSizeInBits() == 64;
+
+  // Darwin/PPC always uses mach-o.
+  const TargetLoweringObjectFileMachO &TLOFMacho = 
+    static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
+  MachineModuleInfoMachO &MMIMacho =
+    MMI->getObjFileInfo<MachineModuleInfoMachO>();
+  
+  MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetFnStubList();
+  if (!Stubs.empty())
+    EmitFunctionStubs(Stubs);
+
+  if (MAI->doesSupportExceptionHandling() && MMI) {
+    // Add the (possibly multiple) personalities to the set of global values.
+    // Only referenced functions get into the Personalities list.
+    const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+    for (std::vector<const Function*>::const_iterator I = Personalities.begin(),
+         E = Personalities.end(); I != E; ++I) {
+      if (*I) {
+        MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
+        MachineModuleInfoImpl::StubValueTy &StubSym =
+          MMIMacho.getGVStubEntry(NLPSym);
+        StubSym = MachineModuleInfoImpl::StubValueTy(Mang->getSymbol(*I), true);
+      }
+    }
+  }
+
+  // Output stubs for dynamically-linked functions.
+  Stubs = MMIMacho.GetGVStubList();
+  
+  // Output macho stubs for external and common global variables.
+  if (!Stubs.empty()) {
+    // Switch with ".non_lazy_symbol_pointer" directive.
+    OutStreamer.SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+    EmitAlignment(isPPC64 ? 3 : 2);
+    
+    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+      // L_foo$stub:
+      OutStreamer.EmitLabel(Stubs[i].first);
+      //   .indirect_symbol _foo
+      MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+      OutStreamer.EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
+
+      if (MCSym.getInt())
+        // External to current translation unit.
+        OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+      else
+        // Internal to current translation unit.
+        //
+        // When we place the LSDA into the TEXT section, the type info pointers
+        // need to be indirect and pc-rel. We accomplish this by using NLPs.
+        // However, sometimes the types are local to the file. So we need to
+        // fill in the value for the NLP in those cases.
+        OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+                                                      OutContext),
+                              isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+    }
+
+    Stubs.clear();
+    OutStreamer.AddBlankLine();
+  }
+
+  Stubs = MMIMacho.GetHiddenGVStubList();
+  if (!Stubs.empty()) {
+    OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+    EmitAlignment(isPPC64 ? 3 : 2);
+    
+    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+      // L_foo$stub:
+      OutStreamer.EmitLabel(Stubs[i].first);
+      //   .long _foo
+      OutStreamer.EmitValue(MCSymbolRefExpr::
+                            Create(Stubs[i].second.getPointer(),
+                                   OutContext),
+                            isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+    }
+
+    Stubs.clear();
+    OutStreamer.AddBlankLine();
+  }
+
+  // Funny Darwin hack: This flag tells the linker that no global symbols
+  // contain code that falls through to other global symbols (e.g. the obvious
+  // implementation of multiple entry points).  If this doesn't occur, the
+  // linker can safely perform dead code stripping.  Since LLVM never generates
+  // code that does this, it is always safe to set.
+  OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+
+  return AsmPrinter::doFinalization(M);
+}
+
+/// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
+/// for a MachineFunction to the given output stream, in a format that the
+/// Darwin assembler can deal with.
+///
+static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
+                                           MCStreamer &Streamer) {
+  const PPCSubtarget *Subtarget = &tm.getSubtarget<PPCSubtarget>();
+
+  if (Subtarget->isDarwin())
+    return new PPCDarwinAsmPrinter(tm, Streamer);
+  return new PPCLinuxAsmPrinter(tm, Streamer);
+}
+
+static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
+                                             unsigned SyntaxVariant,
+                                             const MCAsmInfo &MAI) {
+  return new PPCInstPrinter(MAI, SyntaxVariant);
+}
+
+
+// Force static initialization.
+extern "C" void LLVMInitializePowerPCAsmPrinter() { 
+  TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
+  TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
+  
+  TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
+  TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
+}
diff --git a/final/lib/Target/PowerPC/PPCBranchSelector.cpp b/final/lib/Target/PowerPC/PPCBranchSelector.cpp
new file mode 100644
index 00000000000..e161d23600e
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -0,0 +1,174 @@
+//===-- PPCBranchSelector.cpp - Emit long conditional branches-----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that scans a machine function to determine which
+// conditional branches need more than 16 bits of displacement to reach their
+// target basic block.  It does this in two passes; a calculation of basic block
+// positions pass, and a branch pseudo op to machine branch opcode pass.  This
+// pass should be run last, just before the assembly printer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-branch-select"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCInstrInfo.h"
+#include "PPCPredicates.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+STATISTIC(NumExpanded, "Number of branches expanded to long format");
+
+namespace {
+  struct PPCBSel : public MachineFunctionPass {
+    static char ID;
+    PPCBSel() : MachineFunctionPass(ID) {}
+
+    /// BlockSizes - The sizes of the basic blocks in the function.
+    std::vector<unsigned> BlockSizes;
+
+    virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+    virtual const char *getPassName() const {
+      return "PowerPC Branch Selector";
+    }
+  };
+  char PPCBSel::ID = 0;
+}
+
+/// createPPCBranchSelectionPass - returns an instance of the Branch Selection
+/// Pass
+///
+FunctionPass *llvm::createPPCBranchSelectionPass() {
+  return new PPCBSel();
+}
+
+bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
+  const PPCInstrInfo *TII =
+                static_cast<const PPCInstrInfo*>(Fn.getTarget().getInstrInfo());
+  // Give the blocks of the function a dense, in-order, numbering.
+  Fn.RenumberBlocks();
+  BlockSizes.resize(Fn.getNumBlockIDs());
+
+  // Measure each MBB and compute a size for the entire function.
+  unsigned FuncSize = 0;
+  for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+       ++MFI) {
+    MachineBasicBlock *MBB = MFI;
+
+    unsigned BlockSize = 0;
+    for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end();
+         MBBI != EE; ++MBBI)
+      BlockSize += TII->GetInstSizeInBytes(MBBI);
+    
+    BlockSizes[MBB->getNumber()] = BlockSize;
+    FuncSize += BlockSize;
+  }
+  
+  // If the entire function is smaller than the displacement of a branch field,
+  // we know we don't need to shrink any branches in this function.  This is a
+  // common case.
+  if (FuncSize < (1 << 15)) {
+    BlockSizes.clear();
+    return false;
+  }
+  
+  // For each conditional branch, if the offset to its destination is larger
+  // than the offset field allows, transform it into a long branch sequence
+  // like this:
+  //   short branch:
+  //     bCC MBB
+  //   long branch:
+  //     b!CC $PC+8
+  //     b MBB
+  //
+  bool MadeChange = true;
+  bool EverMadeChange = false;
+  while (MadeChange) {
+    // Iteratively expand branches until we reach a fixed point.
+    MadeChange = false;
+  
+    for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+         ++MFI) {
+      MachineBasicBlock &MBB = *MFI;
+      unsigned MBBStartOffset = 0;
+      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+           I != E; ++I) {
+        if (I->getOpcode() != PPC::BCC || I->getOperand(2).isImm()) {
+          MBBStartOffset += TII->GetInstSizeInBytes(I);
+          continue;
+        }
+        
+        // Determine the offset from the current branch to the destination
+        // block.
+        MachineBasicBlock *Dest = I->getOperand(2).getMBB();
+        
+        int BranchSize;
+        if (Dest->getNumber() <= MBB.getNumber()) {
+          // If this is a backwards branch, the delta is the offset from the
+          // start of this block to this branch, plus the sizes of all blocks
+          // from this block to the dest.
+          BranchSize = MBBStartOffset;
+          
+          for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
+            BranchSize += BlockSizes[i];
+        } else {
+          // Otherwise, add the size of the blocks between this block and the
+          // dest to the number of bytes left in this block.
+          BranchSize = -MBBStartOffset;
+
+          for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
+            BranchSize += BlockSizes[i];
+        }
+
+        // If this branch is in range, ignore it.
+        if (isInt<16>(BranchSize)) {
+          MBBStartOffset += 4;
+          continue;
+        }
+        
+        // Otherwise, we have to expand it to a long branch.
+        // The BCC operands are:
+        // 0. PPC branch predicate
+        // 1. CR register
+        // 2. Target MBB
+        PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
+        unsigned CRReg = I->getOperand(1).getReg();
+        
+        MachineInstr *OldBranch = I;
+        DebugLoc dl = OldBranch->getDebugLoc();
+        
+        // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
+        BuildMI(MBB, I, dl, TII->get(PPC::BCC))
+          .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
+        
+        // Uncond branch to the real destination.
+        I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);
+
+        // Remove the old branch from the function.
+        OldBranch->eraseFromParent();
+        
+        // Remember that this instruction is 8-bytes, increase the size of the
+        // block by 4, remember to iterate.
+        BlockSizes[MBB.getNumber()] += 4;
+        MBBStartOffset += 8;
+        ++NumExpanded;
+        MadeChange = true;
+      }
+    }
+    EverMadeChange |= MadeChange;
+  }
+  
+  BlockSizes.clear();
+  return true;
+}
+
diff --git a/final/lib/Target/PowerPC/PPCCallingConv.td b/final/lib/Target/PowerPC/PPCCallingConv.td
new file mode 100644
index 00000000000..441db94581a
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCCallingConv.td
@@ -0,0 +1,132 @@
+//===- PPCCallingConv.td - Calling Conventions for PowerPC -*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PowerPC 32- and 64-bit
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<PPCSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+
+// Return-value convention for PowerPC
+def RetCC_PPC : CallingConv<[
+  CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+  CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>,
+  
+  CCIfType<[f32], CCAssignToReg<[F1]>>,
+  CCIfType<[f64], CCAssignToReg<[F1, F2]>>,
+  
+  // Vector types are always returned in V2.
+  CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
+]>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+/*
+def CC_PPC : CallingConv<[
+  // The first 8 integer arguments are passed in integer registers.
+  CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+  CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
+  
+  // Common sub-targets passes FP values in F1 - F13
+  CCIfType<[f32, f64], 
+           CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8,F9,F10,F11,F12,F13]>>,
+           
+  // The first 12 Vector arguments are passed in altivec registers.
+  CCIfType<[v16i8, v8i16, v4i32, v4f32],
+              CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,V11,V12,V13]>>
+
+/*
+  // Integer/FP values get stored in stack slots that are 8 bytes in size and
+  // 8-byte aligned if there are no more registers to hold them.
+  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+  
+  // Vectors get 16-byte stack slots that are 16-byte aligned.
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+              CCAssignToStack<16, 16>>*/
+]>;
+
+*/
+
+//===----------------------------------------------------------------------===//
+// PowerPC System V Release 4 ABI
+//===----------------------------------------------------------------------===//
+
+def CC_PPC_SVR4_Common : CallingConv<[
+  // The ABI requires i64 to be passed in two adjacent registers with the first
+  // register having an odd register number.
+  CCIfType<[i32], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignArgRegs">>>,
+
+  // The first 8 integer arguments are passed in integer registers.
+  CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>,
+
+  // Make sure the i64 words from a long double are either both passed in
+  // registers or both passed on the stack.
+  CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC_SVR4_Custom_AlignFPArgRegs">>>,
+  
+  // FP values are passed in F1 - F8.
+  CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
+
+  // Split arguments have an alignment of 8 bytes on the stack.
+  CCIfType<[i32], CCIfSplit<CCAssignToStack<4, 8>>>,
+  
+  CCIfType<[i32], CCAssignToStack<4, 4>>,
+  
+  // Floats are stored in double precision format, thus they have the same
+  // alignment and size as doubles.
+  CCIfType<[f32,f64], CCAssignToStack<8, 8>>,  
+
+  // Vectors get 16-byte stack slots that are 16-byte aligned.
+  CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToStack<16, 16>>
+]>;
+
+// This calling convention puts vector arguments always on the stack. It is used
+// to assign vector arguments which belong to the variable portion of the
+// parameter list of a variable argument function.
+def CC_PPC_SVR4_VarArg : CallingConv<[
+  CCDelegateTo<CC_PPC_SVR4_Common>
+]>;
+
+// In contrast to CC_PPC_SVR4_VarArg, this calling convention first tries to put
+// vector arguments in vector registers before putting them on the stack.
+def CC_PPC_SVR4 : CallingConv<[
+  // The first 12 Vector arguments are passed in AltiVec registers.
+  CCIfType<[v16i8, v8i16, v4i32, v4f32],
+           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10, V11, V12, V13]>>,
+           
+  CCDelegateTo<CC_PPC_SVR4_Common>
+]>;  
+
+// Helper "calling convention" to handle aggregate by value arguments.
+// Aggregate by value arguments are always placed in the local variable space
+// of the caller. This calling convention is only used to assign those stack
+// offsets in the callers stack frame.
+//
+// Still, the address of the aggregate copy in the callers stack frame is passed
+// in a GPR (or in the parameter list area if all GPRs are allocated) from the
+// caller to the callee. The location for the address argument is assigned by
+// the CC_PPC_SVR4 calling convention.
+//
+// The only purpose of CC_PPC_SVR4_Custom_Dummy is to skip arguments which are
+// not passed by value.
+ 
+def CC_PPC_SVR4_ByVal : CallingConv<[
+  CCIfByVal<CCPassByVal<4, 4>>,
+  
+  CCCustom<"CC_PPC_SVR4_Custom_Dummy">
+]>;
+
diff --git a/final/lib/Target/PowerPC/PPCCodeEmitter.cpp b/final/lib/Target/PowerPC/PPCCodeEmitter.cpp
new file mode 100644
index 00000000000..42232a07535
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -0,0 +1,261 @@
+//===-- PPCCodeEmitter.cpp - JIT Code Emitter for PowerPC32 -------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit CodeEmitter and associated machinery to
+// JIT-compile bitcode to native PowerPC.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetMachine.h"
+#include "PPCRelocations.h"
+#include "PPC.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+  class PPCCodeEmitter : public MachineFunctionPass {
+    TargetMachine &TM;
+    JITCodeEmitter &MCE;
+    MachineModuleInfo *MMI;
+    
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<MachineModuleInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+    
+    static char ID;
+    
+    /// MovePCtoLROffset - When/if we see a MovePCtoLR instruction, we record
+    /// its address in the function into this pointer.
+    void *MovePCtoLROffset;
+  public:
+    
+    PPCCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce)
+      : MachineFunctionPass(ID), TM(tm), MCE(mce) {}
+
+    /// getBinaryCodeForInstr - This function, generated by the
+    /// CodeEmitterGenerator using TableGen, produces the binary encoding for
+    /// machine instructions.
+    unsigned getBinaryCodeForInstr(const MachineInstr &MI) const;
+
+    
+    MachineRelocation GetRelocation(const MachineOperand &MO,
+                                    unsigned RelocID) const;
+    
+    /// getMachineOpValue - evaluates the MachineOperand of a given MachineInstr
+    unsigned getMachineOpValue(const MachineInstr &MI,
+                               const MachineOperand &MO) const;
+
+    unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+    unsigned getHA16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
+    unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
+
+    const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
+
+    /// runOnMachineFunction - emits the given MachineFunction to memory
+    ///
+    bool runOnMachineFunction(MachineFunction &MF);
+
+    /// emitBasicBlock - emits the given MachineBasicBlock to memory
+    ///
+    void emitBasicBlock(MachineBasicBlock &MBB);
+  };
+}
+
+char PPCCodeEmitter::ID = 0;
+
+/// createPPCCodeEmitterPass - Return a pass that emits the collected PPC code
+/// to the specified MCE object.
+FunctionPass *llvm::createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
+                                                JITCodeEmitter &JCE) {
+  return new PPCCodeEmitter(TM, JCE);
+}
+
+bool PPCCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
+  assert((MF.getTarget().getRelocationModel() != Reloc::Default ||
+          MF.getTarget().getRelocationModel() != Reloc::Static) &&
+         "JIT relocation model must be set to static or default!");
+
+  MMI = &getAnalysis<MachineModuleInfo>();
+  MCE.setModuleInfo(MMI);
+  do {
+    MovePCtoLROffset = 0;
+    MCE.startFunction(MF);
+    for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+      emitBasicBlock(*BB);
+  } while (MCE.finishFunction(MF));
+
+  return false;
+}
+
+void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
+  MCE.StartMachineBasicBlock(&MBB);
+
+  for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I){
+    const MachineInstr &MI = *I;
+    MCE.processDebugLoc(MI.getDebugLoc(), true);
+    switch (MI.getOpcode()) {
+    default:
+      MCE.emitWordBE(getBinaryCodeForInstr(MI));
+      break;
+    case TargetOpcode::PROLOG_LABEL:
+    case TargetOpcode::EH_LABEL:
+      MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+      break;
+    case TargetOpcode::IMPLICIT_DEF:
+    case TargetOpcode::KILL:
+      break; // pseudo opcode, no side effects
+    case PPC::MovePCtoLR:
+    case PPC::MovePCtoLR8:
+      assert(TM.getRelocationModel() == Reloc::PIC_);
+      MovePCtoLROffset = (void*)MCE.getCurrentPCValue();
+      MCE.emitWordBE(0x48000005);   // bl 1
+      break;
+    }
+    MCE.processDebugLoc(MI.getDebugLoc(), false);
+  }
+}
+
+unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
+                                             unsigned OpNo) const {
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+         (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+  return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+}
+
+MachineRelocation PPCCodeEmitter::GetRelocation(const MachineOperand &MO, 
+                                                unsigned RelocID) const {
+  // If in PIC mode, we need to encode the negated address of the
+  // 'movepctolr' into the unrelocated field.  After relocation, we'll have
+  // &gv-&movepctolr-4 in the imm field.  Once &movepctolr is added to the imm
+  // field, we get &gv.  This doesn't happen for branch relocations, which are
+  // always implicitly pc relative.
+  intptr_t Cst = 0;
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    assert(MovePCtoLROffset && "MovePCtoLR not seen yet?");
+    Cst = -(intptr_t)MovePCtoLROffset - 4;
+  }
+  
+  if (MO.isGlobal())
+    return MachineRelocation::getGV(MCE.getCurrentPCOffset(), RelocID,
+                                    const_cast<GlobalValue *>(MO.getGlobal()),
+                                    Cst, isa<Function>(MO.getGlobal()));
+  if (MO.isSymbol())
+    return MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+                                        RelocID, MO.getSymbolName(), Cst);
+  if (MO.isCPI())
+    return MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+                                           RelocID, MO.getIndex(), Cst);
+
+  if (MO.isMBB())
+    return MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+                                    RelocID, MO.getMBB());
+  
+  assert(MO.isJTI());
+  return MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+                                         RelocID, MO.getIndex(), Cst);
+}
+
+unsigned PPCCodeEmitter::getDirectBrEncoding(const MachineInstr &MI,
+                                             unsigned OpNo) const {
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+  
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bx));
+  return 0;
+}
+
+unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI,
+                                           unsigned OpNo) const {
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_pcrel_bcx));
+  return 0;
+}
+
+unsigned PPCCodeEmitter::getHA16Encoding(const MachineInstr &MI,
+                                         unsigned OpNo) const {
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_high));
+  return 0;
+}
+
+unsigned PPCCodeEmitter::getLO16Encoding(const MachineInstr &MI,
+                                         unsigned OpNo) const {
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
+  
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+  return 0;
+}
+
+unsigned PPCCodeEmitter::getMemRIEncoding(const MachineInstr &MI,
+                                          unsigned OpNo) const {
+  // Encode (imm, reg) as a memri, which has the low 16-bits as the
+  // displacement and the next 5 bits as the register #.
+  assert(MI.getOperand(OpNo+1).isReg());
+  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 16;
+  
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  if (MO.isImm())
+    return (getMachineOpValue(MI, MO) & 0xFFFF) | RegBits;
+  
+  // Add a fixup for the displacement field.
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+  return RegBits;
+}
+
+unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
+                                           unsigned OpNo) const {
+  // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+  // displacement and the next 5 bits as the register #.
+  assert(MI.getOperand(OpNo+1).isReg());
+  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1)) << 14;
+  
+  const MachineOperand &MO = MI.getOperand(OpNo);
+  if (MO.isImm())
+    return (getMachineOpValue(MI, MO) & 0x3FFF) | RegBits;
+  
+  MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix));
+  return RegBits;
+}
+
+
+unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
+                                           const MachineOperand &MO) const {
+
+  if (MO.isReg()) {
+    // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+    // The GPR operand should come through here though.
+    assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+           MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+    return PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+  }
+  
+  assert(MO.isImm() &&
+         "Relocation required in an instruction that we cannot encode!");
+  return MO.getImm();
+}
+
+#include "PPCGenCodeEmitter.inc"
diff --git a/final/lib/Target/PowerPC/PPCFixupKinds.h b/final/lib/Target/PowerPC/PPCFixupKinds.h
new file mode 100644
index 00000000000..b3c889e3f8d
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCFixupKinds.h
@@ -0,0 +1,45 @@
+//===-- PPCFixupKinds.h - PPC Specific Fixup Entries ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_PPC_PPCFIXUPKINDS_H
+#define LLVM_PPC_PPCFIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace PPC {
+enum Fixups {
+  // fixup_ppc_br24 - 24-bit PC relative relocation for direct branches like 'b'
+  // and 'bl'.
+  fixup_ppc_br24 = FirstTargetFixupKind,
+  
+  /// fixup_ppc_brcond14 - 14-bit PC relative relocation for conditional
+  /// branches.
+  fixup_ppc_brcond14,
+  
+  /// fixup_ppc_lo16 - A 16-bit fixup corresponding to lo16(_foo) for instrs
+  /// like 'li'.
+  fixup_ppc_lo16,
+  
+  /// fixup_ppc_ha16 - A 16-bit fixup corresponding to ha16(_foo) for instrs
+  /// like 'lis'.
+  fixup_ppc_ha16,
+  
+  /// fixup_ppc_lo14 - A 14-bit fixup corresponding to lo16(_foo) for instrs
+  /// like 'std'.
+  fixup_ppc_lo14,
+  
+  // Marker
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/final/lib/Target/PowerPC/PPCFrameLowering.cpp b/final/lib/Target/PowerPC/PPCFrameLowering.cpp
new file mode 100644
index 00000000000..6aca6b00a06
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -0,0 +1,971 @@
+//=====- PPCFrameLowering.cpp - PPC Frame Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PPC implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCFrameLowering.h"
+#include "PPCInstrInfo.h"
+#include "PPCMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+
+// FIXME This disables some code that aligns the stack to a boundary bigger than
+// the default (16 bytes on Darwin) when there is a stack local of greater
+// alignment.  This does not currently work, because the delta between old and
+// new stack pointers is added to offsets that reference incoming parameters
+// after the prolog is generated, and the code that does that doesn't handle a
+// variable delta.  You don't want to do that anyway; a better approach is to
+// reserve another register that retains to the incoming stack pointer, and
+// reference parameters relative to that.
+#define ALIGN_STACK 0
+
+
+/// VRRegNo - Map from a numbered VR register to its enum value.
+///
+static const unsigned short VRRegNo[] = {
+ PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
+ PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+ PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+ PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
+};
+
+/// RemoveVRSaveCode - We have found that this function does not need any code
+/// to manipulate the VRSAVE register, even though it uses vector registers.
+/// This can happen when the only registers used are known to be live in or out
+/// of the function.  Remove all of the VRSAVE related code from the function.
+static void RemoveVRSaveCode(MachineInstr *MI) {
+  MachineBasicBlock *Entry = MI->getParent();
+  MachineFunction *MF = Entry->getParent();
+
+  // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
+  MachineBasicBlock::iterator MBBI = MI;
+  ++MBBI;
+  assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
+  MBBI->eraseFromParent();
+
+  bool RemovedAllMTVRSAVEs = true;
+  // See if we can find and remove the MTVRSAVE instruction from all of the
+  // epilog blocks.
+  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
+    // If last instruction is a return instruction, add an epilogue
+    if (!I->empty() && I->back().getDesc().isReturn()) {
+      bool FoundIt = false;
+      for (MBBI = I->end(); MBBI != I->begin(); ) {
+        --MBBI;
+        if (MBBI->getOpcode() == PPC::MTVRSAVE) {
+          MBBI->eraseFromParent();  // remove it.
+          FoundIt = true;
+          break;
+        }
+      }
+      RemovedAllMTVRSAVEs &= FoundIt;
+    }
+  }
+
+  // If we found and removed all MTVRSAVE instructions, remove the read of
+  // VRSAVE as well.
+  if (RemovedAllMTVRSAVEs) {
+    MBBI = MI;
+    assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
+    --MBBI;
+    assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
+    MBBI->eraseFromParent();
+  }
+
+  // Finally, nuke the UPDATE_VRSAVE.
+  MI->eraseFromParent();
+}
+
+// HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
+// instruction selector.  Based on the vector registers that have been used,
+// transform this into the appropriate ORI instruction.
+static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
+  MachineFunction *MF = MI->getParent()->getParent();
+  DebugLoc dl = MI->getDebugLoc();
+
+  unsigned UsedRegMask = 0;
+  for (unsigned i = 0; i != 32; ++i)
+    if (MF->getRegInfo().isPhysRegUsed(VRRegNo[i]))
+      UsedRegMask |= 1 << (31-i);
+
+  // Live in and live out values already must be in the mask, so don't bother
+  // marking them.
+  for (MachineRegisterInfo::livein_iterator
+       I = MF->getRegInfo().livein_begin(),
+       E = MF->getRegInfo().livein_end(); I != E; ++I) {
+    unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(I->first);
+    if (VRRegNo[RegNo] == I->first)        // If this really is a vector reg.
+      UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
+  }
+  for (MachineRegisterInfo::liveout_iterator
+       I = MF->getRegInfo().liveout_begin(),
+       E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+    unsigned RegNo = PPCRegisterInfo::getRegisterNumbering(*I);
+    if (VRRegNo[RegNo] == *I)              // If this really is a vector reg.
+      UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
+  }
+
+  // If no registers are used, turn this into a copy.
+  if (UsedRegMask == 0) {
+    // Remove all VRSAVE code.
+    RemoveVRSaveCode(MI);
+    return;
+  }
+
+  unsigned SrcReg = MI->getOperand(1).getReg();
+  unsigned DstReg = MI->getOperand(0).getReg();
+
+  if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
+    if (DstReg != SrcReg)
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+        .addReg(SrcReg)
+        .addImm(UsedRegMask);
+    else
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+        .addReg(SrcReg, RegState::Kill)
+        .addImm(UsedRegMask);
+  } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
+    if (DstReg != SrcReg)
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+        .addReg(SrcReg)
+        .addImm(UsedRegMask >> 16);
+    else
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+        .addReg(SrcReg, RegState::Kill)
+        .addImm(UsedRegMask >> 16);
+  } else {
+    if (DstReg != SrcReg)
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+        .addReg(SrcReg)
+        .addImm(UsedRegMask >> 16);
+    else
+      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+        .addReg(SrcReg, RegState::Kill)
+        .addImm(UsedRegMask >> 16);
+
+    BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+      .addReg(DstReg, RegState::Kill)
+      .addImm(UsedRegMask & 0xFFFF);
+  }
+
+  // Remove the old UPDATE_VRSAVE instruction.
+  MI->eraseFromParent();
+}
+
+/// determineFrameLayout - Determine the size of the frame and maximum call
+/// frame size.
+void PPCFrameLowering::determineFrameLayout(MachineFunction &MF) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Get the number of bytes to allocate from the FrameInfo
+  unsigned FrameSize = MFI->getStackSize();
+
+  // Get the alignments provided by the target, and the maximum alignment
+  // (if any) of the fixed frame objects.
+  unsigned MaxAlign = MFI->getMaxAlignment();
+  unsigned TargetAlign = getStackAlignment();
+  unsigned AlignMask = TargetAlign - 1;  //
+
+  // If we are a leaf function, and use up to 224 bytes of stack space,
+  // don't have a frame pointer, calls, or dynamic alloca then we do not need
+  // to adjust the stack pointer (we fit in the Red Zone).
+  bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone);
+  // FIXME SVR4 The 32-bit SVR4 ABI has no red zone.
+  if (!DisableRedZone &&
+      FrameSize <= 224 &&                          // Fits in red zone.
+      !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
+      !MFI->adjustsStack() &&                      // No calls.
+      (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
+    // No need for frame
+    MFI->setStackSize(0);
+    return;
+  }
+
+  // Get the maximum call frame size of all the calls.
+  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+
+  // Maximum call frame needs to be at least big enough for linkage and 8 args.
+  unsigned minCallFrameSize = getMinCallFrameSize(Subtarget.isPPC64(),
+                                                  Subtarget.isDarwinABI());
+  maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
+
+  // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
+  // that allocations will be aligned.
+  if (MFI->hasVarSizedObjects())
+    maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
+
+  // Update maximum call frame size.
+  MFI->setMaxCallFrameSize(maxCallFrameSize);
+
+  // Include call frame size in total.
+  FrameSize += maxCallFrameSize;
+
+  // Make sure the frame is aligned.
+  FrameSize = (FrameSize + AlignMask) & ~AlignMask;
+
+  // Update frame info.
+  MFI->setStackSize(FrameSize);
+}
+
+// hasFP - Return true if the specified function actually has a dedicated frame
+// pointer register.
+bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  // FIXME: This is pretty much broken by design: hasFP() might be called really
+  // early, before the stack layout was calculated and thus hasFP() might return
+  // true or false here depending on the time of call.
+  return (MFI->getStackSize()) && needsFP(MF);
+}
+
+// needsFP - Return true if the specified function should have a dedicated frame
+// pointer register.  This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // Naked functions have no stack frame pushed, so we don't have a frame
+  // pointer.
+  if (MF.getFunction()->hasFnAttr(Attribute::Naked))
+    return false;
+
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects() ||
+    (GuaranteedTailCallOpt && MF.getInfo<PPCFunctionInfo>()->hasFastCall());
+}
+
+
+void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const PPCInstrInfo &TII =
+    *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  MachineModuleInfo &MMI = MF.getMMI();
+  DebugLoc dl;
+  bool needsFrameMoves = MMI.hasDebugInfo() ||
+       !MF.getFunction()->doesNotThrow() ||
+       UnwindTablesMandatory;
+
+  // Prepare for frame info.
+  MCSymbol *FrameLabel = 0;
+
+  // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
+  // process it.
+  for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
+    if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
+      HandleVRSaveUpdate(MBBI, TII);
+      break;
+    }
+  }
+
+  // Move MBBI back to the beginning of the function.
+  MBBI = MBB.begin();
+
+  // Work out frame sizes.
+  // FIXME: determineFrameLayout() may change the frame size. This should be
+  // moved upper, to some hook.
+  determineFrameLayout(MF);
+  unsigned FrameSize = MFI->getStackSize();
+
+  int NegFrameSize = -FrameSize;
+
+  // Get processor type.
+  bool isPPC64 = Subtarget.isPPC64();
+  // Get operating system
+  bool isDarwinABI = Subtarget.isDarwinABI();
+  // Check if the link register (LR) must be saved.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  bool MustSaveLR = FI->mustSaveLR();
+  // Do we have a frame pointer for this function?
+  bool HasFP = hasFP(MF);
+
+  int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+
+  int FPOffset = 0;
+  if (HasFP) {
+    if (Subtarget.isSVR4ABI()) {
+      MachineFrameInfo *FFI = MF.getFrameInfo();
+      int FPIndex = FI->getFramePointerSaveIndex();
+      assert(FPIndex && "No Frame Pointer Save Slot!");
+      FPOffset = FFI->getObjectOffset(FPIndex);
+    } else {
+      FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+    }
+  }
+
+  if (isPPC64) {
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
+
+    if (HasFP)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+        .addReg(PPC::X31)
+        .addImm(FPOffset/4)
+        .addReg(PPC::X1);
+
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
+        .addReg(PPC::X0)
+        .addImm(LROffset / 4)
+        .addReg(PPC::X1);
+  } else {
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
+
+    if (HasFP)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+        .addReg(PPC::R31)
+        .addImm(FPOffset)
+        .addReg(PPC::R1);
+
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
+        .addReg(PPC::R0)
+        .addImm(LROffset)
+        .addReg(PPC::R1);
+  }
+
+  // Skip if a leaf routine.
+  if (!FrameSize) return;
+
+  // Get stack alignments.
+  unsigned TargetAlign = getStackAlignment();
+  unsigned MaxAlign = MFI->getMaxAlignment();
+
+  // Adjust stack pointer: r1 += NegFrameSize.
+  // If there is a preferred stack alignment, align R1 now
+  if (!isPPC64) {
+    // PPC32.
+    if (ALIGN_STACK && MaxAlign > TargetAlign) {
+      assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+             "Invalid alignment!");
+      assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
+
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
+        .addReg(PPC::R1)
+        .addImm(0)
+        .addImm(32 - Log2_32(MaxAlign))
+        .addImm(31);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0)
+        .addReg(PPC::R0, RegState::Kill)
+        .addImm(NegFrameSize);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+        .addReg(PPC::R1)
+        .addReg(PPC::R1)
+        .addReg(PPC::R0);
+    } else if (isInt<16>(NegFrameSize)) {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
+        .addReg(PPC::R1)
+        .addImm(NegFrameSize)
+        .addReg(PPC::R1);
+    } else {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+        .addImm(NegFrameSize >> 16);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+        .addReg(PPC::R0, RegState::Kill)
+        .addImm(NegFrameSize & 0xFFFF);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX))
+        .addReg(PPC::R1)
+        .addReg(PPC::R1)
+        .addReg(PPC::R0);
+    }
+  } else {    // PPC64.
+    if (ALIGN_STACK && MaxAlign > TargetAlign) {
+      assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+             "Invalid alignment!");
+      assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
+
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
+        .addReg(PPC::X1)
+        .addImm(0)
+        .addImm(64 - Log2_32(MaxAlign));
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
+        .addReg(PPC::X0)
+        .addImm(NegFrameSize);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+        .addReg(PPC::X1)
+        .addReg(PPC::X1)
+        .addReg(PPC::X0);
+    } else if (isInt<16>(NegFrameSize)) {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
+        .addReg(PPC::X1)
+        .addImm(NegFrameSize / 4)
+        .addReg(PPC::X1);
+    } else {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+        .addImm(NegFrameSize >> 16);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+        .addReg(PPC::X0, RegState::Kill)
+        .addImm(NegFrameSize & 0xFFFF);
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX))
+        .addReg(PPC::X1)
+        .addReg(PPC::X1)
+        .addReg(PPC::X0);
+    }
+  }
+
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+
+  // Add the "machine moves" for the instructions we generated above, but in
+  // reverse order.
+  if (needsFrameMoves) {
+    // Mark effective beginning of when frame pointer becomes valid.
+    FrameLabel = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel);
+
+    // Show update of SP.
+    if (NegFrameSize) {
+      MachineLocation SPDst(MachineLocation::VirtualFP);
+      MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
+      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+    } else {
+      MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31);
+      Moves.push_back(MachineMove(FrameLabel, SP, SP));
+    }
+
+    if (HasFP) {
+      MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
+      MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31);
+      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+    }
+
+    if (MustSaveLR) {
+      MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
+      MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR);
+      Moves.push_back(MachineMove(FrameLabel, LRDst, LRSrc));
+    }
+  }
+
+  MCSymbol *ReadyLabel = 0;
+
+  // If there is a frame pointer, copy R1 into R31
+  if (HasFP) {
+    if (!isPPC64) {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31)
+        .addReg(PPC::R1)
+        .addReg(PPC::R1);
+    } else {
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31)
+        .addReg(PPC::X1)
+        .addReg(PPC::X1);
+    }
+
+    if (needsFrameMoves) {
+      ReadyLabel = MMI.getContext().CreateTempSymbol();
+
+      // Mark effective beginning of when frame pointer is ready.
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
+
+      MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
+                                    (isPPC64 ? PPC::X1 : PPC::R1));
+      MachineLocation FPSrc(MachineLocation::VirtualFP);
+      Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+    }
+  }
+
+  if (needsFrameMoves) {
+    MCSymbol *Label = HasFP ? ReadyLabel : FrameLabel;
+
+    // Add callee saved registers to move list.
+    const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+    for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+      int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+      unsigned Reg = CSI[I].getReg();
+      if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
+      MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+      MachineLocation CSSrc(Reg);
+      Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+    }
+  }
+}
+
+void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
+                                MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  assert(MBBI != MBB.end() && "Returning block has no terminator");
+  const PPCInstrInfo &TII =
+    *static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+
+  unsigned RetOpcode = MBBI->getOpcode();
+  DebugLoc dl;
+
+  assert((RetOpcode == PPC::BLR ||
+          RetOpcode == PPC::TCRETURNri ||
+          RetOpcode == PPC::TCRETURNdi ||
+          RetOpcode == PPC::TCRETURNai ||
+          RetOpcode == PPC::TCRETURNri8 ||
+          RetOpcode == PPC::TCRETURNdi8 ||
+          RetOpcode == PPC::TCRETURNai8) &&
+         "Can only insert epilog into returning blocks");
+
+  // Get alignment info so we know how to restore r1
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  unsigned TargetAlign = getStackAlignment();
+  unsigned MaxAlign = MFI->getMaxAlignment();
+
+  // Get the number of bytes allocated from the FrameInfo.
+  int FrameSize = MFI->getStackSize();
+
+  // Get processor type.
+  bool isPPC64 = Subtarget.isPPC64();
+  // Get operating system
+  bool isDarwinABI = Subtarget.isDarwinABI();
+  // Check if the link register (LR) has been saved.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  bool MustSaveLR = FI->mustSaveLR();
+  // Do we have a frame pointer for this function?
+  bool HasFP = hasFP(MF);
+
+  int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+
+  int FPOffset = 0;
+  if (HasFP) {
+    if (Subtarget.isSVR4ABI()) {
+      MachineFrameInfo *FFI = MF.getFrameInfo();
+      int FPIndex = FI->getFramePointerSaveIndex();
+      assert(FPIndex && "No Frame Pointer Save Slot!");
+      FPOffset = FFI->getObjectOffset(FPIndex);
+    } else {
+      FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+    }
+  }
+
+  bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
+    RetOpcode == PPC::TCRETURNdi ||
+    RetOpcode == PPC::TCRETURNai ||
+    RetOpcode == PPC::TCRETURNri8 ||
+    RetOpcode == PPC::TCRETURNdi8 ||
+    RetOpcode == PPC::TCRETURNai8;
+
+  if (UsesTCRet) {
+    int MaxTCRetDelta = FI->getTailCallSPDelta();
+    MachineOperand &StackAdjust = MBBI->getOperand(1);
+    assert(StackAdjust.isImm() && "Expecting immediate value.");
+    // Adjust stack pointer.
+    int StackAdj = StackAdjust.getImm();
+    int Delta = StackAdj - MaxTCRetDelta;
+    assert((Delta >= 0) && "Delta must be positive");
+    if (MaxTCRetDelta>0)
+      FrameSize += (StackAdj +Delta);
+    else
+      FrameSize += StackAdj;
+  }
+
+  if (FrameSize) {
+    // The loaded (or persistent) stack pointer value is offset by the 'stwu'
+    // on entry to the function.  Add this offset back now.
+    if (!isPPC64) {
+      // If this function contained a fastcc call and GuaranteedTailCallOpt is
+      // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
+      // call which invalidates the stack pointer value in SP(0). So we use the
+      // value of R31 in this case.
+      if (FI->hasFastCall() && isInt<16>(FrameSize)) {
+        assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+          .addReg(PPC::R31).addImm(FrameSize);
+      } else if(FI->hasFastCall()) {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+          .addImm(FrameSize >> 16);
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
+          .addReg(PPC::R0, RegState::Kill)
+          .addImm(FrameSize & 0xFFFF);
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4))
+          .addReg(PPC::R1)
+          .addReg(PPC::R31)
+          .addReg(PPC::R0);
+      } else if (isInt<16>(FrameSize) &&
+                 (!ALIGN_STACK || TargetAlign >= MaxAlign) &&
+                 !MFI->hasVarSizedObjects()) {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
+          .addReg(PPC::R1).addImm(FrameSize);
+      } else {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1)
+          .addImm(0).addReg(PPC::R1);
+      }
+    } else {
+      if (FI->hasFastCall() && isInt<16>(FrameSize)) {
+        assert(hasFP(MF) && "Expecting a valid the frame pointer.");
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+          .addReg(PPC::X31).addImm(FrameSize);
+      } else if(FI->hasFastCall()) {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+          .addImm(FrameSize >> 16);
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
+          .addReg(PPC::X0, RegState::Kill)
+          .addImm(FrameSize & 0xFFFF);
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8))
+          .addReg(PPC::X1)
+          .addReg(PPC::X31)
+          .addReg(PPC::X0);
+      } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign &&
+            !MFI->hasVarSizedObjects()) {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
+           .addReg(PPC::X1).addImm(FrameSize);
+      } else {
+        BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1)
+           .addImm(0).addReg(PPC::X1);
+      }
+    }
+  }
+
+  if (isPPC64) {
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
+        .addImm(LROffset/4).addReg(PPC::X1);
+
+    if (HasFP)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
+        .addImm(FPOffset/4).addReg(PPC::X1);
+
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
+  } else {
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
+          .addImm(LROffset).addReg(PPC::R1);
+
+    if (HasFP)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
+          .addImm(FPOffset).addReg(PPC::R1);
+
+    if (MustSaveLR)
+      BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0);
+  }
+
+  // Callee pop calling convention. Pop parameter/linkage area. Used for tail
+  // call optimization
+  if (GuaranteedTailCallOpt && RetOpcode == PPC::BLR &&
+      MF.getFunction()->getCallingConv() == CallingConv::Fast) {
+     PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+     unsigned CallerAllocatedAmt = FI->getMinReservedArea();
+     unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1;
+     unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+     unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0;
+     unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI;
+     unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4;
+     unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS;
+     unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI;
+
+     if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
+       BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
+         .addReg(StackReg).addImm(CallerAllocatedAmt);
+     } else {
+       BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+          .addImm(CallerAllocatedAmt >> 16);
+       BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+          .addReg(TmpReg, RegState::Kill)
+          .addImm(CallerAllocatedAmt & 0xFFFF);
+       BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
+          .addReg(StackReg)
+          .addReg(FPReg)
+          .addReg(TmpReg);
+     }
+  } else if (RetOpcode == PPC::TCRETURNdi) {
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
+      addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+  } else if (RetOpcode == PPC::TCRETURNri) {
+    MBBI = MBB.getLastNonDebugInstr();
+    assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
+  } else if (RetOpcode == PPC::TCRETURNai) {
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
+  } else if (RetOpcode == PPC::TCRETURNdi8) {
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
+      addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
+  } else if (RetOpcode == PPC::TCRETURNri8) {
+    MBBI = MBB.getLastNonDebugInstr();
+    assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
+  } else if (RetOpcode == PPC::TCRETURNai8) {
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
+  }
+}
+
+void PPCFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+  // Initial state of the frame pointer is R1.
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(PPC::R1, 0);
+  Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+static bool spillsCR(const MachineFunction &MF) {
+  const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  return FuncInfo->isCRSpilled();
+}
+
+/// MustSaveLR - Return true if this function requires that we save the LR
+/// register onto the stack in the prolog and restore it in the epilog of the
+/// function.
+static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
+  const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
+
+  // We need a save/restore of LR if there is any def of LR (which is
+  // defined by calls, including the PIC setup sequence), or if there is
+  // some use of the LR stack slot (e.g. for builtin_return_address).
+  // (LR comes in 32 and 64 bit versions.)
+  MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
+  return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
+}
+
+void
+PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                   RegScavenger *RS) const {
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+
+  //  Save and clear the LR state.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  unsigned LR = RegInfo->getRARegister();
+  FI->setMustSaveLR(MustSaveLR(MF, LR));
+  MF.getRegInfo().setPhysRegUnused(LR);
+
+  //  Save R31 if necessary
+  int FPSI = FI->getFramePointerSaveIndex();
+  bool isPPC64 = Subtarget.isPPC64();
+  bool isDarwinABI  = Subtarget.isDarwinABI();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+
+  // If the frame pointer save index hasn't been defined yet.
+  if (!FPSI && needsFP(MF)) {
+    // Find out what the fix offset of the frame pointer save area.
+    int FPOffset = getFramePointerSaveOffset(isPPC64, isDarwinABI);
+    // Allocate the frame index for frame pointer save area.
+    FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+    // Save the result.
+    FI->setFramePointerSaveIndex(FPSI);
+  }
+
+  // Reserve stack space to move the linkage area to in case of a tail call.
+  int TCSPDelta = 0;
+  if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
+    MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
+  }
+
+  // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
+  // a large stack, which will require scavenging a register to materialize a
+  // large offset.
+  // FIXME: this doesn't actually check stack size, so is a bit pessimistic
+  // FIXME: doesn't detect whether or not we need to spill vXX, which requires
+  //        r0 for now.
+
+  if (RegInfo->requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable.
+    if (needsFP(MF) || spillsCR(MF)) {
+      const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+      const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+      const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
+      RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                         RC->getAlignment(),
+                                                         false));
+    }
+}
+
+void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF)
+                                                                        const {
+  // Early exit if not using the SVR4 ABI.
+  if (!Subtarget.isSVR4ABI())
+    return;
+
+  // Get callee saved register information.
+  MachineFrameInfo *FFI = MF.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+
+  // Early exit if no callee saved registers are modified!
+  if (CSI.empty() && !needsFP(MF)) {
+    return;
+  }
+
+  unsigned MinGPR = PPC::R31;
+  unsigned MinG8R = PPC::X31;
+  unsigned MinFPR = PPC::F31;
+  unsigned MinVR = PPC::V31;
+
+  bool HasGPSaveArea = false;
+  bool HasG8SaveArea = false;
+  bool HasFPSaveArea = false;
+  bool HasCRSaveArea = false;
+  bool HasVRSAVESaveArea = false;
+  bool HasVRSaveArea = false;
+
+  SmallVector<CalleeSavedInfo, 18> GPRegs;
+  SmallVector<CalleeSavedInfo, 18> G8Regs;
+  SmallVector<CalleeSavedInfo, 18> FPRegs;
+  SmallVector<CalleeSavedInfo, 18> VRegs;
+
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (PPC::GPRCRegisterClass->contains(Reg)) {
+      HasGPSaveArea = true;
+
+      GPRegs.push_back(CSI[i]);
+
+      if (Reg < MinGPR) {
+        MinGPR = Reg;
+      }
+    } else if (PPC::G8RCRegisterClass->contains(Reg)) {
+      HasG8SaveArea = true;
+
+      G8Regs.push_back(CSI[i]);
+
+      if (Reg < MinG8R) {
+        MinG8R = Reg;
+      }
+    } else if (PPC::F8RCRegisterClass->contains(Reg)) {
+      HasFPSaveArea = true;
+
+      FPRegs.push_back(CSI[i]);
+
+      if (Reg < MinFPR) {
+        MinFPR = Reg;
+      }
+// FIXME SVR4: Disable CR save area for now.
+    } else if (PPC::CRBITRCRegisterClass->contains(Reg)
+               || PPC::CRRCRegisterClass->contains(Reg)) {
+//      HasCRSaveArea = true;
+    } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
+      HasVRSAVESaveArea = true;
+    } else if (PPC::VRRCRegisterClass->contains(Reg)) {
+      HasVRSaveArea = true;
+
+      VRegs.push_back(CSI[i]);
+
+      if (Reg < MinVR) {
+        MinVR = Reg;
+      }
+    } else {
+      llvm_unreachable("Unknown RegisterClass!");
+    }
+  }
+
+  PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
+
+  int64_t LowerBound = 0;
+
+  // Take into account stack space reserved for tail calls.
+  int TCSPDelta = 0;
+  if (GuaranteedTailCallOpt && (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
+    LowerBound = TCSPDelta;
+  }
+
+  // The Floating-point register save area is right below the back chain word
+  // of the previous stack frame.
+  if (HasFPSaveArea) {
+    for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
+      int FI = FPRegs[i].getFrameIdx();
+
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+
+    LowerBound -= (31 - PPCRegisterInfo::getRegisterNumbering(MinFPR) + 1) * 8;
+  }
+
+  // Check whether the frame pointer register is allocated. If so, make sure it
+  // is spilled to the correct offset.
+  if (needsFP(MF)) {
+    HasGPSaveArea = true;
+
+    int FI = PFI->getFramePointerSaveIndex();
+    assert(FI && "No Frame Pointer Save Slot!");
+
+    FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+  }
+
+  // General register save area starts right below the Floating-point
+  // register save area.
+  if (HasGPSaveArea || HasG8SaveArea) {
+    // Move general register save area spill slots down, taking into account
+    // the size of the Floating-point register save area.
+    for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
+      int FI = GPRegs[i].getFrameIdx();
+
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+
+    // Move general register save area spill slots down, taking into account
+    // the size of the Floating-point register save area.
+    for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
+      int FI = G8Regs[i].getFrameIdx();
+
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+
+    unsigned MinReg =
+      std::min<unsigned>(PPCRegisterInfo::getRegisterNumbering(MinGPR),
+                         PPCRegisterInfo::getRegisterNumbering(MinG8R));
+
+    if (Subtarget.isPPC64()) {
+      LowerBound -= (31 - MinReg + 1) * 8;
+    } else {
+      LowerBound -= (31 - MinReg + 1) * 4;
+    }
+  }
+
+  // The CR save area is below the general register save area.
+  if (HasCRSaveArea) {
+    // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+    //             which have the CR/CRBIT register class?
+    // Adjust the frame index of the CR spill slot.
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+
+      if (PPC::CRBITRCRegisterClass->contains(Reg) ||
+          PPC::CRRCRegisterClass->contains(Reg)) {
+        int FI = CSI[i].getFrameIdx();
+
+        FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+      }
+    }
+
+    LowerBound -= 4; // The CR save area is always 4 bytes long.
+  }
+
+  if (HasVRSAVESaveArea) {
+    // FIXME SVR4: Is it actually possible to have multiple elements in CSI
+    //             which have the VRSAVE register class?
+    // Adjust the frame index of the VRSAVE spill slot.
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+
+      if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
+        int FI = CSI[i].getFrameIdx();
+
+        FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+      }
+    }
+
+    LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
+  }
+
+  if (HasVRSaveArea) {
+    // Insert alignment padding, we need 16-byte alignment.
+    LowerBound = (LowerBound - 15) & ~(15);
+
+    for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
+      int FI = VRegs[i].getFrameIdx();
+
+      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    }
+  }
+}
diff --git a/final/lib/Target/PowerPC/PPCFrameLowering.h b/final/lib/Target/PowerPC/PPCFrameLowering.h
new file mode 100644
index 00000000000..0c18de1e2e2
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCFrameLowering.h
@@ -0,0 +1,322 @@
+//==-- PPCFrameLowering.h - Define frame lowering for PowerPC ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_FRAMEINFO_H
+#define POWERPC_FRAMEINFO_H
+
+#include "PPC.h"
+#include "PPCSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+
+namespace llvm {
+  class PPCSubtarget;
+
+class PPCFrameLowering: public TargetFrameLowering {
+  const PPCSubtarget &Subtarget;
+
+public:
+  PPCFrameLowering(const PPCSubtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 16, 0),
+      Subtarget(sti) {
+  }
+
+  void determineFrameLayout(MachineFunction &MF) const;
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+  bool needsFP(const MachineFunction &MF) const;
+  void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS = NULL) const;
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+  /// targetHandlesStackFrameRounding - Returns true if the target is
+  /// responsible for rounding up the stack frame (probably at emitPrologue
+  /// time).
+  bool targetHandlesStackFrameRounding() const { return true; }
+
+  /// getReturnSaveOffset - Return the previous frame offset to save the
+  /// return address.
+  static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) {
+    if (isDarwinABI)
+      return isPPC64 ? 16 : 8;
+    // SVR4 ABI:
+    return isPPC64 ? 16 : 4;
+  }
+
+  /// getFramePointerSaveOffset - Return the previous frame offset to save the
+  /// frame pointer.
+  static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
+    // For the Darwin ABI:
+    // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
+    // for saving the frame pointer (if needed.)  While the published ABI has
+    // not used this slot since at least MacOSX 10.2, there is older code
+    // around that does use it, and that needs to continue to work.
+    if (isDarwinABI)
+      return isPPC64 ? -8U : -4U;
+
+    // SVR4 ABI: First slot in the general register save area.
+    return isPPC64 ? -8U : -4U;
+  }
+
+  /// getLinkageSize - Return the size of the PowerPC ABI linkage area.
+  ///
+  static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
+    if (isDarwinABI || isPPC64)
+      return 6 * (isPPC64 ? 8 : 4);
+
+    // SVR4 ABI:
+    return 8;
+  }
+
+  /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI
+  /// argument area.
+  static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) {
+    // For the Darwin ABI / 64-bit SVR4 ABI:
+    // The prolog code of the callee may store up to 8 GPR argument registers to
+    // the stack, allowing va_start to index over them in memory if its varargs.
+    // Because we cannot tell if this is needed on the caller side, we have to
+    // conservatively assume that it is needed.  As such, make sure we have at
+    // least enough stack space for the caller to store the 8 GPRs.
+    if (isDarwinABI || isPPC64)
+      return 8 * (isPPC64 ? 8 : 4);
+
+    // 32-bit SVR4 ABI:
+    // There is no default stack allocated for the 8 first GPR arguments.
+    return 0;
+  }
+
+  /// getMinCallFrameSize - Return the minimum size a call frame can be using
+  /// the PowerPC ABI.
+  static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) {
+    // The call frame needs to be at least big enough for linkage and 8 args.
+    return getLinkageSize(isPPC64, isDarwinABI) +
+           getMinCallArgumentsSize(isPPC64, isDarwinABI);
+  }
+
+  // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
+  const SpillSlot *
+  getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+    if (Subtarget.isDarwinABI()) {
+      NumEntries = 1;
+      if (Subtarget.isPPC64()) {
+        static const SpillSlot darwin64Offsets = {PPC::X31, -8};
+        return &darwin64Offsets;
+      } else {
+        static const SpillSlot darwinOffsets = {PPC::R31, -4};
+        return &darwinOffsets;
+      }
+    }
+
+    // Early exit if not using the SVR4 ABI.
+    if (!Subtarget.isSVR4ABI()) {
+      NumEntries = 0;
+      return 0;
+    }
+
+    static const SpillSlot Offsets[] = {
+      // Floating-point register save area offsets.
+      {PPC::F31, -8},
+      {PPC::F30, -16},
+      {PPC::F29, -24},
+      {PPC::F28, -32},
+      {PPC::F27, -40},
+      {PPC::F26, -48},
+      {PPC::F25, -56},
+      {PPC::F24, -64},
+      {PPC::F23, -72},
+      {PPC::F22, -80},
+      {PPC::F21, -88},
+      {PPC::F20, -96},
+      {PPC::F19, -104},
+      {PPC::F18, -112},
+      {PPC::F17, -120},
+      {PPC::F16, -128},
+      {PPC::F15, -136},
+      {PPC::F14, -144},
+
+      // General register save area offsets.
+      {PPC::R31, -4},
+      {PPC::R30, -8},
+      {PPC::R29, -12},
+      {PPC::R28, -16},
+      {PPC::R27, -20},
+      {PPC::R26, -24},
+      {PPC::R25, -28},
+      {PPC::R24, -32},
+      {PPC::R23, -36},
+      {PPC::R22, -40},
+      {PPC::R21, -44},
+      {PPC::R20, -48},
+      {PPC::R19, -52},
+      {PPC::R18, -56},
+      {PPC::R17, -60},
+      {PPC::R16, -64},
+      {PPC::R15, -68},
+      {PPC::R14, -72},
+
+      // CR save area offset.
+      // FIXME SVR4: Disable CR save area for now.
+//      {PPC::CR2, -4},
+//      {PPC::CR3, -4},
+//      {PPC::CR4, -4},
+//      {PPC::CR2LT, -4},
+//      {PPC::CR2GT, -4},
+//      {PPC::CR2EQ, -4},
+//      {PPC::CR2UN, -4},
+//      {PPC::CR3LT, -4},
+//      {PPC::CR3GT, -4},
+//      {PPC::CR3EQ, -4},
+//      {PPC::CR3UN, -4},
+//      {PPC::CR4LT, -4},
+//      {PPC::CR4GT, -4},
+//      {PPC::CR4EQ, -4},
+//      {PPC::CR4UN, -4},
+
+      // VRSAVE save area offset.
+      {PPC::VRSAVE, -4},
+
+      // Vector register save area
+      {PPC::V31, -16},
+      {PPC::V30, -32},
+      {PPC::V29, -48},
+      {PPC::V28, -64},
+      {PPC::V27, -80},
+      {PPC::V26, -96},
+      {PPC::V25, -112},
+      {PPC::V24, -128},
+      {PPC::V23, -144},
+      {PPC::V22, -160},
+      {PPC::V21, -176},
+      {PPC::V20, -192}
+    };
+
+    static const SpillSlot Offsets64[] = {
+      // Floating-point register save area offsets.
+      {PPC::F31, -8},
+      {PPC::F30, -16},
+      {PPC::F29, -24},
+      {PPC::F28, -32},
+      {PPC::F27, -40},
+      {PPC::F26, -48},
+      {PPC::F25, -56},
+      {PPC::F24, -64},
+      {PPC::F23, -72},
+      {PPC::F22, -80},
+      {PPC::F21, -88},
+      {PPC::F20, -96},
+      {PPC::F19, -104},
+      {PPC::F18, -112},
+      {PPC::F17, -120},
+      {PPC::F16, -128},
+      {PPC::F15, -136},
+      {PPC::F14, -144},
+
+      // General register save area offsets.
+      // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit
+      //                    mode?
+      {PPC::R31, -4},
+      {PPC::R30, -12},
+      {PPC::R29, -20},
+      {PPC::R28, -28},
+      {PPC::R27, -36},
+      {PPC::R26, -44},
+      {PPC::R25, -52},
+      {PPC::R24, -60},
+      {PPC::R23, -68},
+      {PPC::R22, -76},
+      {PPC::R21, -84},
+      {PPC::R20, -92},
+      {PPC::R19, -100},
+      {PPC::R18, -108},
+      {PPC::R17, -116},
+      {PPC::R16, -124},
+      {PPC::R15, -132},
+      {PPC::R14, -140},
+
+      {PPC::X31, -8},
+      {PPC::X30, -16},
+      {PPC::X29, -24},
+      {PPC::X28, -32},
+      {PPC::X27, -40},
+      {PPC::X26, -48},
+      {PPC::X25, -56},
+      {PPC::X24, -64},
+      {PPC::X23, -72},
+      {PPC::X22, -80},
+      {PPC::X21, -88},
+      {PPC::X20, -96},
+      {PPC::X19, -104},
+      {PPC::X18, -112},
+      {PPC::X17, -120},
+      {PPC::X16, -128},
+      {PPC::X15, -136},
+      {PPC::X14, -144},
+
+      // CR save area offset.
+      // FIXME SVR4: Disable CR save area for now.
+//      {PPC::CR2, -4},
+//      {PPC::CR3, -4},
+//      {PPC::CR4, -4},
+//      {PPC::CR2LT, -4},
+//      {PPC::CR2GT, -4},
+//      {PPC::CR2EQ, -4},
+//      {PPC::CR2UN, -4},
+//      {PPC::CR3LT, -4},
+//      {PPC::CR3GT, -4},
+//      {PPC::CR3EQ, -4},
+//      {PPC::CR3UN, -4},
+//      {PPC::CR4LT, -4},
+//      {PPC::CR4GT, -4},
+//      {PPC::CR4EQ, -4},
+//      {PPC::CR4UN, -4},
+
+      // VRSAVE save area offset.
+      {PPC::VRSAVE, -4},
+
+      // Vector register save area
+      {PPC::V31, -16},
+      {PPC::V30, -32},
+      {PPC::V29, -48},
+      {PPC::V28, -64},
+      {PPC::V27, -80},
+      {PPC::V26, -96},
+      {PPC::V25, -112},
+      {PPC::V24, -128},
+      {PPC::V23, -144},
+      {PPC::V22, -160},
+      {PPC::V21, -176},
+      {PPC::V20, -192}
+    };
+
+    if (Subtarget.isPPC64()) {
+      NumEntries = array_lengthof(Offsets64);
+
+      return Offsets64;
+    } else {
+      NumEntries = array_lengthof(Offsets);
+
+      return Offsets;
+    }
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/final/lib/Target/PowerPC/PPCHazardRecognizers.cpp
new file mode 100644
index 00000000000..0de5844d1c2
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -0,0 +1,308 @@
+//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pre-RA-sched"
+#include "PPCHazardRecognizers.h"
+#include "PPC.h"
+#include "PPCInstrInfo.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// PowerPC 970 Hazard Recognizer
+//
+// This models the dispatch group formation of the PPC970 processor.  Dispatch
+// groups are bundles of up to five instructions that can contain various mixes
+// of instructions.  The PPC970 can dispatch a peak of 4 non-branch and one
+// branch instruction per-cycle.
+//
+// There are a number of restrictions to dispatch group formation: some
+// instructions can only be issued in the first slot of a dispatch group, & some
+// instructions fill an entire dispatch group.  Additionally, only branches can
+// issue in the 5th (last) slot.
+//
+// Finally, there are a number of "structural" hazards on the PPC970.  These
+// conditions cause large performance penalties due to misprediction, recovery,
+// and replay logic that has to happen.  These cases include setting a CTR and
+// branching through it in the same dispatch group, and storing to an address,
+// then loading from the same address within a dispatch group.  To avoid these
+// conditions, we insert no-op instructions when appropriate.
+//
+// FIXME: This is missing some significant cases:
+//   1. Modeling of microcoded instructions.
+//   2. Handling of serialized operations.
+//   3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
+//
+
+PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
+  : TII(tii) {
+  EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::EndDispatchGroup() {
+  DEBUG(errs() << "=== Start of dispatch group\n");
+  NumIssued = 0;
+
+  // Structural hazard info.
+  HasCTRSet = false;
+  NumStores = 0;
+}
+
+
+PPCII::PPC970_Unit
+PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
+                                     bool &isFirst, bool &isSingle,
+                                     bool &isCracked,
+                                     bool &isLoad, bool &isStore) {
+  if ((int)Opcode >= 0) {
+    isFirst = isSingle = isCracked = isLoad = isStore = false;
+    return PPCII::PPC970_Pseudo;
+  }
+  Opcode = ~Opcode;
+
+  const TargetInstrDesc &TID = TII.get(Opcode);
+
+  isLoad  = TID.mayLoad();
+  isStore = TID.mayStore();
+
+  uint64_t TSFlags = TID.TSFlags;
+
+  isFirst   = TSFlags & PPCII::PPC970_First;
+  isSingle  = TSFlags & PPCII::PPC970_Single;
+  isCracked = TSFlags & PPCII::PPC970_Cracked;
+  return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
+}
+
+/// isLoadOfStoredAddress - If we have a load from the previously stored pointer
+/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
+bool PPCHazardRecognizer970::
+isLoadOfStoredAddress(unsigned LoadSize, SDValue Ptr1, SDValue Ptr2) const {
+  for (unsigned i = 0, e = NumStores; i != e; ++i) {
+    // Handle exact and commuted addresses.
+    if (Ptr1 == StorePtr1[i] && Ptr2 == StorePtr2[i])
+      return true;
+    if (Ptr2 == StorePtr1[i] && Ptr1 == StorePtr2[i])
+      return true;
+
+    // Okay, we don't have an exact match, if this is an indexed offset, see if
+    // we have overlap (which happens during fp->int conversion for example).
+    if (StorePtr2[i] == Ptr2) {
+      if (ConstantSDNode *StoreOffset = dyn_cast<ConstantSDNode>(StorePtr1[i]))
+        if (ConstantSDNode *LoadOffset = dyn_cast<ConstantSDNode>(Ptr1)) {
+          // Okay the base pointers match, so we have [c1+r] vs [c2+r].  Check
+          // to see if the load and store actually overlap.
+          int StoreOffs = StoreOffset->getZExtValue();
+          int LoadOffs  = LoadOffset->getZExtValue();
+          if (StoreOffs < LoadOffs) {
+            if (int(StoreOffs+StoreSize[i]) > LoadOffs) return true;
+          } else {
+            if (int(LoadOffs+LoadSize) > StoreOffs) return true;
+          }
+        }
+    }
+  }
+  return false;
+}
+
+/// getHazardType - We return hazard for any non-branch instruction that would
+/// terminate the dispatch group.  We turn NoopHazard for any
+/// instructions that wouldn't terminate the dispatch group that would cause a
+/// pipeline flush.
+ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
+getHazardType(SUnit *SU, int Stalls) {
+  assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
+
+  const SDNode *Node = SU->getNode()->getGluedMachineNode();
+  bool isFirst, isSingle, isCracked, isLoad, isStore;
+  PPCII::PPC970_Unit InstrType =
+    GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+                 isLoad, isStore);
+  if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
+  unsigned Opcode = Node->getMachineOpcode();
+
+  // We can only issue a PPC970_First/PPC970_Single instruction (such as
+  // crand/mtspr/etc) if this is the first cycle of the dispatch group.
+  if (NumIssued != 0 && (isFirst || isSingle))
+    return Hazard;
+
+  // If this instruction is cracked into two ops by the decoder, we know that
+  // it is not a branch and that it cannot issue if 3 other instructions are
+  // already in the dispatch group.
+  if (isCracked && NumIssued > 2)
+    return Hazard;
+
+  switch (InstrType) {
+  default: llvm_unreachable("Unknown instruction type!");
+  case PPCII::PPC970_FXU:
+  case PPCII::PPC970_LSU:
+  case PPCII::PPC970_FPU:
+  case PPCII::PPC970_VALU:
+  case PPCII::PPC970_VPERM:
+    // We can only issue a branch as the last instruction in a group.
+    if (NumIssued == 4) return Hazard;
+    break;
+  case PPCII::PPC970_CRU:
+    // We can only issue a CR instruction in the first two slots.
+    if (NumIssued >= 2) return Hazard;
+    break;
+  case PPCII::PPC970_BRU:
+    break;
+  }
+
+  // Do not allow MTCTR and BCTRL to be in the same dispatch group.
+  if (HasCTRSet && (Opcode == PPC::BCTRL_Darwin || Opcode == PPC::BCTRL_SVR4))
+    return NoopHazard;
+
+  // If this is a load following a store, make sure it's not to the same or
+  // overlapping address.
+  if (isLoad && NumStores) {
+    unsigned LoadSize;
+    switch (Opcode) {
+    default: llvm_unreachable("Unknown load!");
+    case PPC::LBZ:   case PPC::LBZU:
+    case PPC::LBZX:
+    case PPC::LBZ8:  case PPC::LBZU8:
+    case PPC::LBZX8:
+    case PPC::LVEBX:
+      LoadSize = 1;
+      break;
+    case PPC::LHA:   case PPC::LHAU:
+    case PPC::LHAX:
+    case PPC::LHZ:   case PPC::LHZU:
+    case PPC::LHZX:
+    case PPC::LVEHX:
+    case PPC::LHBRX:
+    case PPC::LHA8:   case PPC::LHAU8:
+    case PPC::LHAX8:
+    case PPC::LHZ8:   case PPC::LHZU8:
+    case PPC::LHZX8:
+      LoadSize = 2;
+      break;
+    case PPC::LFS:    case PPC::LFSU:
+    case PPC::LFSX:
+    case PPC::LWZ:    case PPC::LWZU:
+    case PPC::LWZX:
+    case PPC::LWA:
+    case PPC::LWAX:
+    case PPC::LVEWX:
+    case PPC::LWBRX:
+    case PPC::LWZ8:
+    case PPC::LWZX8:
+      LoadSize = 4;
+      break;
+    case PPC::LFD:    case PPC::LFDU:
+    case PPC::LFDX:
+    case PPC::LD:     case PPC::LDU:
+    case PPC::LDX:
+      LoadSize = 8;
+      break;
+    case PPC::LVX:
+    case PPC::LVXL:
+      LoadSize = 16;
+      break;
+    }
+
+    if (isLoadOfStoredAddress(LoadSize,
+                              Node->getOperand(0), Node->getOperand(1)))
+      return NoopHazard;
+  }
+
+  return NoHazard;
+}
+
+void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
+  const SDNode *Node = SU->getNode()->getGluedMachineNode();
+  bool isFirst, isSingle, isCracked, isLoad, isStore;
+  PPCII::PPC970_Unit InstrType =
+    GetInstrType(Node->getOpcode(), isFirst, isSingle, isCracked,
+                 isLoad, isStore);
+  if (InstrType == PPCII::PPC970_Pseudo) return;
+  unsigned Opcode = Node->getMachineOpcode();
+
+  // Update structural hazard information.
+  if (Opcode == PPC::MTCTR) HasCTRSet = true;
+
+  // Track the address stored to.
+  if (isStore) {
+    unsigned ThisStoreSize;
+    switch (Opcode) {
+    default: llvm_unreachable("Unknown store instruction!");
+    case PPC::STB:    case PPC::STB8:
+    case PPC::STBU:   case PPC::STBU8:
+    case PPC::STBX:   case PPC::STBX8:
+    case PPC::STVEBX:
+      ThisStoreSize = 1;
+      break;
+    case PPC::STH:    case PPC::STH8:
+    case PPC::STHU:   case PPC::STHU8:
+    case PPC::STHX:   case PPC::STHX8:
+    case PPC::STVEHX:
+    case PPC::STHBRX:
+      ThisStoreSize = 2;
+      break;
+    case PPC::STFS:
+    case PPC::STFSU:
+    case PPC::STFSX:
+    case PPC::STWX:   case PPC::STWX8:
+    case PPC::STWUX:
+    case PPC::STW:    case PPC::STW8:
+    case PPC::STWU:
+    case PPC::STVEWX:
+    case PPC::STFIWX:
+    case PPC::STWBRX:
+      ThisStoreSize = 4;
+      break;
+    case PPC::STD_32:
+    case PPC::STDX_32:
+    case PPC::STD:
+    case PPC::STDU:
+    case PPC::STFD:
+    case PPC::STFDX:
+    case PPC::STDX:
+    case PPC::STDUX:
+      ThisStoreSize = 8;
+      break;
+    case PPC::STVX:
+    case PPC::STVXL:
+      ThisStoreSize = 16;
+      break;
+    }
+
+    StoreSize[NumStores] = ThisStoreSize;
+    StorePtr1[NumStores] = Node->getOperand(1);
+    StorePtr2[NumStores] = Node->getOperand(2);
+    ++NumStores;
+  }
+
+  if (InstrType == PPCII::PPC970_BRU || isSingle)
+    NumIssued = 4;  // Terminate a d-group.
+  ++NumIssued;
+
+  // If this instruction is cracked into two ops by the decoder, remember that
+  // we issued two pieces.
+  if (isCracked)
+    ++NumIssued;
+
+  if (NumIssued == 5)
+    EndDispatchGroup();
+}
+
+void PPCHazardRecognizer970::AdvanceCycle() {
+  assert(NumIssued < 5 && "Illegal dispatch group!");
+  ++NumIssued;
+  if (NumIssued == 5)
+    EndDispatchGroup();
+}
diff --git a/final/lib/Target/PowerPC/PPCHazardRecognizers.h b/final/lib/Target/PowerPC/PPCHazardRecognizers.h
new file mode 100644
index 00000000000..2f81f0f7c7f
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -0,0 +1,73 @@
+//===-- PPCHazardRecognizers.h - PowerPC Hazard Recognizers -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling on PowerPC processors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCHAZRECS_H
+#define PPCHAZRECS_H
+
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "PPCInstrInfo.h"
+
+namespace llvm {
+
+/// PPCHazardRecognizer970 - This class defines a finite state automata that
+/// models the dispatch logic on the PowerPC 970 (aka G5) processor.  This
+/// promotes good dispatch group formation and implements noop insertion to
+/// avoid structural hazards that cause significant performance penalties (e.g.
+/// setting the CTR register then branching through it within a dispatch group),
+/// or storing then loading from the same address within a dispatch group.
+class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
+  const TargetInstrInfo &TII;
+
+  unsigned NumIssued;  // Number of insts issued, including advanced cycles.
+
+  // Various things that can cause a structural hazard.
+
+  // HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
+  bool HasCTRSet;
+
+  // StoredPtr - Keep track of the address of any store.  If we see a load from
+  // the same address (or one that aliases it), disallow the store.  We can have
+  // up to four stores in one dispatch group, hence we track up to 4.
+  //
+  // This is null if we haven't seen a store yet.  We keep track of both
+  // operands of the store here, since we support [r+r] and [r+i] addressing.
+  SDValue StorePtr1[4], StorePtr2[4];
+  unsigned  StoreSize[4];
+  unsigned NumStores;
+
+public:
+  PPCHazardRecognizer970(const TargetInstrInfo &TII);
+  virtual HazardType getHazardType(SUnit *SU, int Stalls);
+  virtual void EmitInstruction(SUnit *SU);
+  virtual void AdvanceCycle();
+
+private:
+  /// EndDispatchGroup - Called when we are finishing a new dispatch group.
+  ///
+  void EndDispatchGroup();
+
+  /// GetInstrType - Classify the specified powerpc opcode according to its
+  /// pipeline.
+  PPCII::PPC970_Unit GetInstrType(unsigned Opcode,
+                                  bool &isFirst, bool &isSingle,bool &isCracked,
+                                  bool &isLoad, bool &isStore);
+
+  bool isLoadOfStoredAddress(unsigned LoadSize,
+                             SDValue Ptr1, SDValue Ptr2) const;
+};
+
+} // end namespace llvm
+
+#endif
+
diff --git a/final/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/final/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
new file mode 100644
index 00000000000..faae9b2f22a
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -0,0 +1,1077 @@
+//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a pattern matching instruction selector for PowerPC,
+// converting from a legalized dag to a PPC dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppc-codegen"
+#include "PPC.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  //===--------------------------------------------------------------------===//
+  /// PPCDAGToDAGISel - PPC specific code to select PPC machine
+  /// instructions for SelectionDAG operations.
+  ///
+  class PPCDAGToDAGISel : public SelectionDAGISel {
+    const PPCTargetMachine &TM;
+    const PPCTargetLowering &PPCLowering;
+    const PPCSubtarget &PPCSubTarget;
+    unsigned GlobalBaseReg;
+  public:
+    explicit PPCDAGToDAGISel(PPCTargetMachine &tm)
+      : SelectionDAGISel(tm), TM(tm),
+        PPCLowering(*TM.getTargetLowering()),
+        PPCSubTarget(*TM.getSubtargetImpl()) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      // Make sure we re-emit a set of the global base reg if necessary
+      GlobalBaseReg = 0;
+      SelectionDAGISel::runOnMachineFunction(MF);
+
+      InsertVRSaveCode(MF);
+      return true;
+    }
+
+    /// getI32Imm - Return a target constant with the specified value, of type
+    /// i32.
+    inline SDValue getI32Imm(unsigned Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i32);
+    }
+
+    /// getI64Imm - Return a target constant with the specified value, of type
+    /// i64.
+    inline SDValue getI64Imm(uint64_t Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i64);
+    }
+
+    /// getSmallIPtrImm - Return a target constant of pointer type.
+    inline SDValue getSmallIPtrImm(unsigned Imm) {
+      return CurDAG->getTargetConstant(Imm, PPCLowering.getPointerTy());
+    }
+
+    /// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s
+    /// with any number of 0s on either side.  The 1s are allowed to wrap from
+    /// LSB to MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.
+    /// 0x0F0F0000 is not, since all 1s are not contiguous.
+    static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME);
+
+
+    /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
+    /// rotate and mask opcode and mask operation.
+    static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
+                                unsigned &SH, unsigned &MB, unsigned &ME);
+
+    /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
+    /// base register.  Return the virtual register that holds this value.
+    SDNode *getGlobalBaseReg();
+
+    // Select - Convert the specified operand from a target-independent to a
+    // target-specific node if it hasn't already been changed.
+    SDNode *Select(SDNode *N);
+
+    SDNode *SelectBitfieldInsert(SDNode *N);
+
+    /// SelectCC - Select a comparison of the specified values with the
+    /// specified condition code, returning the CR# of the expression.
+    SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, DebugLoc dl);
+
+    /// SelectAddrImm - Returns true if the address N can be represented by
+    /// a base register plus a signed 16-bit displacement [r+imm].
+    bool SelectAddrImm(SDValue N, SDValue &Disp,
+                       SDValue &Base) {
+      return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG);
+    }
+
+    /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
+    /// immediate field.  Because preinc imms have already been validated, just
+    /// accept it.
+    bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
+      Out = N;
+      return true;
+    }
+
+    /// SelectAddrIdx - Given the specified addressed, check to see if it can be
+    /// represented as an indexed [r+r] operation.  Returns false if it can
+    /// be represented by [r+imm], which are preferred.
+    bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
+      return PPCLowering.SelectAddressRegReg(N, Base, Index, *CurDAG);
+    }
+
+    /// SelectAddrIdxOnly - Given the specified addressed, force it to be
+    /// represented as an indexed [r+r] operation.
+    bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
+      return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
+    }
+
+    /// SelectAddrImmShift - Returns true if the address N can be represented by
+    /// a base register plus a signed 14-bit displacement [r+imm*4].  Suitable
+    /// for use by STD and friends.
+    bool SelectAddrImmShift(SDValue N, SDValue &Disp, SDValue &Base) {
+      return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
+    }
+
+    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+    /// inline asm expressions.  It is always correct to compute the value into
+    /// a register.  The case of adding a (possibly relocatable) constant to a
+    /// register can be improved, but it is wrong to substitute Reg+Reg for
+    /// Reg in an asm, because the load or store opcode would have to change.
+   virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                              char ConstraintCode,
+                                              std::vector<SDValue> &OutOps) {
+      OutOps.push_back(Op);
+      return false;
+    }
+
+    void InsertVRSaveCode(MachineFunction &MF);
+
+    virtual const char *getPassName() const {
+      return "PowerPC DAG->DAG Pattern Instruction Selection";
+    }
+
+// Include the pieces autogenerated from the target description.
+#include "PPCGenDAGISel.inc"
+
+private:
+    SDNode *SelectSETCC(SDNode *N);
+  };
+}
+
+/// InsertVRSaveCode - Once the entire function has been instruction selected,
+/// all virtual registers are created and all machine instructions are built,
+/// check to see if we need to save/restore VRSAVE.  If so, do it.
+void PPCDAGToDAGISel::InsertVRSaveCode(MachineFunction &Fn) {
+  // Check to see if this function uses vector registers, which means we have to
+  // save and restore the VRSAVE register and update it with the regs we use.
+  //
+  // In this case, there will be virtual registers of vector type created
+  // by the scheduler.  Detect them now.
+  bool HasVectorVReg = false;
+  for (unsigned i = 0, e = RegInfo->getNumVirtRegs(); i != e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    if (RegInfo->getRegClass(Reg) == &PPC::VRRCRegClass) {
+      HasVectorVReg = true;
+      break;
+    }
+  }
+  if (!HasVectorVReg) return;  // nothing to do.
+
+  // If we have a vector register, we want to emit code into the entry and exit
+  // blocks to save and restore the VRSAVE register.  We do this here (instead
+  // of marking all vector instructions as clobbering VRSAVE) for two reasons:
+  //
+  // 1. This (trivially) reduces the load on the register allocator, by not
+  //    having to represent the live range of the VRSAVE register.
+  // 2. This (more significantly) allows us to create a temporary virtual
+  //    register to hold the saved VRSAVE value, allowing this temporary to be
+  //    register allocated, instead of forcing it to be spilled to the stack.
+
+  // Create two vregs - one to hold the VRSAVE register that is live-in to the
+  // function and one for the value after having bits or'd into it.
+  unsigned InVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+  unsigned UpdatedVRSAVE = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
+
+  const TargetInstrInfo &TII = *TM.getInstrInfo();
+  MachineBasicBlock &EntryBB = *Fn.begin();
+  DebugLoc dl;
+  // Emit the following code into the entry block:
+  // InVRSAVE = MFVRSAVE
+  // UpdatedVRSAVE = UPDATE_VRSAVE InVRSAVE
+  // MTVRSAVE UpdatedVRSAVE
+  MachineBasicBlock::iterator IP = EntryBB.begin();  // Insert Point
+  BuildMI(EntryBB, IP, dl, TII.get(PPC::MFVRSAVE), InVRSAVE);
+  BuildMI(EntryBB, IP, dl, TII.get(PPC::UPDATE_VRSAVE),
+          UpdatedVRSAVE).addReg(InVRSAVE);
+  BuildMI(EntryBB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(UpdatedVRSAVE);
+
+  // Find all return blocks, outputting a restore in each epilog.
+  for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+    if (!BB->empty() && BB->back().getDesc().isReturn()) {
+      IP = BB->end(); --IP;
+
+      // Skip over all terminator instructions, which are part of the return
+      // sequence.
+      MachineBasicBlock::iterator I2 = IP;
+      while (I2 != BB->begin() && (--I2)->getDesc().isTerminator())
+        IP = I2;
+
+      // Emit: MTVRSAVE InVRSave
+      BuildMI(*BB, IP, dl, TII.get(PPC::MTVRSAVE)).addReg(InVRSAVE);
+    }
+  }
+}
+
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// base address to use for accessing globals into a register.
+///
+SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
+  if (!GlobalBaseReg) {
+    const TargetInstrInfo &TII = *TM.getInstrInfo();
+    // Insert the set of GlobalBaseReg into the first MBB of the function
+    MachineBasicBlock &FirstMBB = MF->front();
+    MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+    DebugLoc dl;
+
+    if (PPCLowering.getPointerTy() == MVT::i32) {
+      GlobalBaseReg = RegInfo->createVirtualRegister(PPC::GPRCRegisterClass);
+      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR), PPC::LR);
+      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
+    } else {
+      GlobalBaseReg = RegInfo->createVirtualRegister(PPC::G8RCRegisterClass);
+      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8), PPC::LR8);
+      BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
+    }
+  }
+  return CurDAG->getRegister(GlobalBaseReg,
+                             PPCLowering.getPointerTy()).getNode();
+}
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value.  If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+  if (N->getOpcode() != ISD::Constant)
+    return false;
+
+  Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+  if (N->getValueType(0) == MVT::i32)
+    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+  else
+    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+  return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
+/// operand. If so Imm will receive the 32-bit value.
+static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
+  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
+    Imm = cast<ConstantSDNode>(N)->getZExtValue();
+    return true;
+  }
+  return false;
+}
+
+/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
+/// operand.  If so Imm will receive the 64-bit value.
+static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
+  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
+    Imm = cast<ConstantSDNode>(N)->getZExtValue();
+    return true;
+  }
+  return false;
+}
+
+// isInt32Immediate - This method tests to see if a constant operand.
+// If so Imm will receive the 32 bit value.
+static bool isInt32Immediate(SDValue N, unsigned &Imm) {
+  return isInt32Immediate(N.getNode(), Imm);
+}
+
+
+// isOpcWithIntImmediate - This method tests to see if the node is a specific
+// opcode and that it has a immediate integer right operand.
+// If so Imm will receive the 32 bit value.
+static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
+  return N->getOpcode() == Opc
+         && isInt32Immediate(N->getOperand(1).getNode(), Imm);
+}
+
+bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+  if (isShiftedMask_32(Val)) {
+    // look for the first non-zero bit
+    MB = CountLeadingZeros_32(Val);
+    // look for the first zero bit after the run of ones
+    ME = CountLeadingZeros_32((Val - 1) ^ Val);
+    return true;
+  } else {
+    Val = ~Val; // invert mask
+    if (isShiftedMask_32(Val)) {
+      // effectively look for the first zero bit
+      ME = CountLeadingZeros_32(Val) - 1;
+      // effectively look for the first one bit after the run of zeros
+      MB = CountLeadingZeros_32((Val - 1) ^ Val) + 1;
+      return true;
+    }
+  }
+  // no run present
+  return false;
+}
+
+bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
+                                      bool isShiftMask, unsigned &SH,
+                                      unsigned &MB, unsigned &ME) {
+  // Don't even go down this path for i64, since different logic will be
+  // necessary for rldicl/rldicr/rldimi.
+  if (N->getValueType(0) != MVT::i32)
+    return false;
+
+  unsigned Shift  = 32;
+  unsigned Indeterminant = ~0;  // bit mask marking indeterminant results
+  unsigned Opcode = N->getOpcode();
+  if (N->getNumOperands() != 2 ||
+      !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
+    return false;
+
+  if (Opcode == ISD::SHL) {
+    // apply shift left to mask if it comes first
+    if (isShiftMask) Mask = Mask << Shift;
+    // determine which bits are made indeterminant by shift
+    Indeterminant = ~(0xFFFFFFFFu << Shift);
+  } else if (Opcode == ISD::SRL) {
+    // apply shift right to mask if it comes first
+    if (isShiftMask) Mask = Mask >> Shift;
+    // determine which bits are made indeterminant by shift
+    Indeterminant = ~(0xFFFFFFFFu >> Shift);
+    // adjust for the left rotate
+    Shift = 32 - Shift;
+  } else if (Opcode == ISD::ROTL) {
+    Indeterminant = 0;
+  } else {
+    return false;
+  }
+
+  // if the mask doesn't intersect any Indeterminant bits
+  if (Mask && !(Mask & Indeterminant)) {
+    SH = Shift & 31;
+    // make sure the mask is still a mask (wrap arounds may not be)
+    return isRunOfOnes(Mask, MB, ME);
+  }
+  return false;
+}
+
+/// SelectBitfieldInsert - turn an or of two masked values into
+/// the rotate left word immediate then mask insert (rlwimi) instruction.
+SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  DebugLoc dl = N->getDebugLoc();
+
+  APInt LKZ, LKO, RKZ, RKO;
+  CurDAG->ComputeMaskedBits(Op0, APInt::getAllOnesValue(32), LKZ, LKO);
+  CurDAG->ComputeMaskedBits(Op1, APInt::getAllOnesValue(32), RKZ, RKO);
+
+  unsigned TargetMask = LKZ.getZExtValue();
+  unsigned InsertMask = RKZ.getZExtValue();
+
+  if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
+    unsigned Op0Opc = Op0.getOpcode();
+    unsigned Op1Opc = Op1.getOpcode();
+    unsigned Value, SH = 0;
+    TargetMask = ~TargetMask;
+    InsertMask = ~InsertMask;
+
+    // If the LHS has a foldable shift and the RHS does not, then swap it to the
+    // RHS so that we can fold the shift into the insert.
+    if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
+      if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
+          Op0.getOperand(0).getOpcode() == ISD::SRL) {
+        if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
+            Op1.getOperand(0).getOpcode() != ISD::SRL) {
+          std::swap(Op0, Op1);
+          std::swap(Op0Opc, Op1Opc);
+          std::swap(TargetMask, InsertMask);
+        }
+      }
+    } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
+      if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
+          Op1.getOperand(0).getOpcode() != ISD::SRL) {
+        std::swap(Op0, Op1);
+        std::swap(Op0Opc, Op1Opc);
+        std::swap(TargetMask, InsertMask);
+      }
+    }
+
+    unsigned MB, ME;
+    if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) {
+      SDValue Tmp1, Tmp2;
+
+      if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
+          isInt32Immediate(Op1.getOperand(1), Value)) {
+        Op1 = Op1.getOperand(0);
+        SH  = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
+      }
+      if (Op1Opc == ISD::AND) {
+        unsigned SHOpc = Op1.getOperand(0).getOpcode();
+        if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) &&
+            isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
+          Op1 = Op1.getOperand(0).getOperand(0);
+          SH  = (SHOpc == ISD::SHL) ? Value : 32 - Value;
+        } else {
+          Op1 = Op1.getOperand(0);
+        }
+      }
+
+      SH &= 31;
+      SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB),
+                          getI32Imm(ME) };
+      return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+    }
+  }
+  return 0;
+}
+
+/// SelectCC - Select a comparison of the specified values with the specified
+/// condition code, returning the CR# of the expression.
+SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
+                                    ISD::CondCode CC, DebugLoc dl) {
+  // Always select the LHS.
+  unsigned Opc;
+
+  if (LHS.getValueType() == MVT::i32) {
+    unsigned Imm;
+    if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+      if (isInt32Immediate(RHS, Imm)) {
+        // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+        if (isUInt<16>(Imm))
+          return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+                                                getI32Imm(Imm & 0xFFFF)), 0);
+        // If this is a 16-bit signed immediate, fold it.
+        if (isInt<16>((int)Imm))
+          return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
+                                                getI32Imm(Imm & 0xFFFF)), 0);
+
+        // For non-equality comparisons, the default code would materialize the
+        // constant, then compare against it, like this:
+        //   lis r2, 4660
+        //   ori r2, r2, 22136
+        //   cmpw cr0, r3, r2
+        // Since we are just comparing for equality, we can emit this instead:
+        //   xoris r0,r3,0x1234
+        //   cmplwi cr0,r0,0x5678
+        //   beq cr0,L6
+        SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
+                                           getI32Imm(Imm >> 16)), 0);
+        return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
+                                              getI32Imm(Imm & 0xFFFF)), 0);
+      }
+      Opc = PPC::CMPLW;
+    } else if (ISD::isUnsignedIntSetCC(CC)) {
+      if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
+        return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
+                                              getI32Imm(Imm & 0xFFFF)), 0);
+      Opc = PPC::CMPLW;
+    } else {
+      short SImm;
+      if (isIntS16Immediate(RHS, SImm))
+        return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
+                                              getI32Imm((int)SImm & 0xFFFF)),
+                         0);
+      Opc = PPC::CMPW;
+    }
+  } else if (LHS.getValueType() == MVT::i64) {
+    uint64_t Imm;
+    if (CC == ISD::SETEQ || CC == ISD::SETNE) {
+      if (isInt64Immediate(RHS.getNode(), Imm)) {
+        // SETEQ/SETNE comparison with 16-bit immediate, fold it.
+        if (isUInt<16>(Imm))
+          return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+                                                getI32Imm(Imm & 0xFFFF)), 0);
+        // If this is a 16-bit signed immediate, fold it.
+        if (isInt<16>(Imm))
+          return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
+                                                getI32Imm(Imm & 0xFFFF)), 0);
+
+        // For non-equality comparisons, the default code would materialize the
+        // constant, then compare against it, like this:
+        //   lis r2, 4660
+        //   ori r2, r2, 22136
+        //   cmpd cr0, r3, r2
+        // Since we are just comparing for equality, we can emit this instead:
+        //   xoris r0,r3,0x1234
+        //   cmpldi cr0,r0,0x5678
+        //   beq cr0,L6
+        if (isUInt<32>(Imm)) {
+          SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
+                                             getI64Imm(Imm >> 16)), 0);
+          return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
+                                                getI64Imm(Imm & 0xFFFF)), 0);
+        }
+      }
+      Opc = PPC::CMPLD;
+    } else if (ISD::isUnsignedIntSetCC(CC)) {
+      if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
+        return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
+                                              getI64Imm(Imm & 0xFFFF)), 0);
+      Opc = PPC::CMPLD;
+    } else {
+      short SImm;
+      if (isIntS16Immediate(RHS, SImm))
+        return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
+                                              getI64Imm(SImm & 0xFFFF)),
+                         0);
+      Opc = PPC::CMPD;
+    }
+  } else if (LHS.getValueType() == MVT::f32) {
+    Opc = PPC::FCMPUS;
+  } else {
+    assert(LHS.getValueType() == MVT::f64 && "Unknown vt!");
+    Opc = PPC::FCMPUD;
+  }
+  return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
+}
+
+static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
+  switch (CC) {
+  case ISD::SETUEQ:
+  case ISD::SETONE:
+  case ISD::SETOLE:
+  case ISD::SETOGE:
+    llvm_unreachable("Should be lowered by legalize!");
+  default: llvm_unreachable("Unknown condition!");
+  case ISD::SETOEQ:
+  case ISD::SETEQ:  return PPC::PRED_EQ;
+  case ISD::SETUNE:
+  case ISD::SETNE:  return PPC::PRED_NE;
+  case ISD::SETOLT:
+  case ISD::SETLT:  return PPC::PRED_LT;
+  case ISD::SETULE:
+  case ISD::SETLE:  return PPC::PRED_LE;
+  case ISD::SETOGT:
+  case ISD::SETGT:  return PPC::PRED_GT;
+  case ISD::SETUGE:
+  case ISD::SETGE:  return PPC::PRED_GE;
+  case ISD::SETO:   return PPC::PRED_NU;
+  case ISD::SETUO:  return PPC::PRED_UN;
+    // These two are invalid for floating point.  Assume we have int.
+  case ISD::SETULT: return PPC::PRED_LT;
+  case ISD::SETUGT: return PPC::PRED_GT;
+  }
+}
+
+/// getCRIdxForSetCC - Return the index of the condition register field
+/// associated with the SetCC condition, and whether or not the field is
+/// treated as inverted.  That is, lt = 0; ge = 0 inverted.
+///
+/// If this returns with Other != -1, then the returned comparison is an or of
+/// two simpler comparisons.  In this case, Invert is guaranteed to be false.
+static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
+  Invert = false;
+  Other = -1;
+  switch (CC) {
+  default: llvm_unreachable("Unknown condition!");
+  case ISD::SETOLT:
+  case ISD::SETLT:  return 0;                  // Bit #0 = SETOLT
+  case ISD::SETOGT:
+  case ISD::SETGT:  return 1;                  // Bit #1 = SETOGT
+  case ISD::SETOEQ:
+  case ISD::SETEQ:  return 2;                  // Bit #2 = SETOEQ
+  case ISD::SETUO:  return 3;                  // Bit #3 = SETUO
+  case ISD::SETUGE:
+  case ISD::SETGE:  Invert = true; return 0;   // !Bit #0 = SETUGE
+  case ISD::SETULE:
+  case ISD::SETLE:  Invert = true; return 1;   // !Bit #1 = SETULE
+  case ISD::SETUNE:
+  case ISD::SETNE:  Invert = true; return 2;   // !Bit #2 = SETUNE
+  case ISD::SETO:   Invert = true; return 3;   // !Bit #3 = SETO
+  case ISD::SETUEQ:
+  case ISD::SETOGE:
+  case ISD::SETOLE:
+  case ISD::SETONE:
+    llvm_unreachable("Invalid branch code: should be expanded by legalize");
+  // These are invalid for floating point.  Assume integer.
+  case ISD::SETULT: return 0;
+  case ISD::SETUGT: return 1;
+  }
+  return 0;
+}
+
+SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  unsigned Imm;
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  if (isInt32Immediate(N->getOperand(1), Imm)) {
+    // We can codegen setcc op, imm very efficiently compared to a brcond.
+    // Check for those cases here.
+    // setcc op, 0
+    if (Imm == 0) {
+      SDValue Op = N->getOperand(0);
+      switch (CC) {
+      default: break;
+      case ISD::SETEQ: {
+        Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
+        SDValue Ops[] = { Op, getI32Imm(27), getI32Imm(5), getI32Imm(31) };
+        return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+      }
+      case ISD::SETNE: {
+        SDValue AD =
+          SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+                                         Op, getI32Imm(~0U)), 0);
+        return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op,
+                                    AD.getValue(1));
+      }
+      case ISD::SETLT: {
+        SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+        return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+      }
+      case ISD::SETGT: {
+        SDValue T =
+          SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
+        T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
+        SDValue Ops[] = { T, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+        return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+      }
+      }
+    } else if (Imm == ~0U) {        // setcc op, -1
+      SDValue Op = N->getOperand(0);
+      switch (CC) {
+      default: break;
+      case ISD::SETEQ:
+        Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+                                            Op, getI32Imm(1)), 0);
+        return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+                              SDValue(CurDAG->getMachineNode(PPC::LI, dl,
+                                                             MVT::i32,
+                                                             getI32Imm(0)), 0),
+                                      Op.getValue(1));
+      case ISD::SETNE: {
+        Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
+        SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+                                            Op, getI32Imm(~0U));
+        return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0),
+                                    Op, SDValue(AD, 1));
+      }
+      case ISD::SETLT: {
+        SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
+                                                    getI32Imm(1)), 0);
+        SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
+                                                    Op), 0);
+        SDValue Ops[] = { AN, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+        return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+      }
+      case ISD::SETGT: {
+        SDValue Ops[] = { Op, getI32Imm(1), getI32Imm(31), getI32Imm(31) };
+        Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4),
+                     0);
+        return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op,
+                                    getI32Imm(1));
+      }
+      }
+    }
+  }
+
+  bool Inv;
+  int OtherCondIdx;
+  unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx);
+  SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+  SDValue IntCR;
+
+  // Force the ccreg into CR7.
+  SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
+
+  SDValue InFlag(0, 0);  // Null incoming flag value.
+  CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
+                               InFlag).getValue(1);
+
+  if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1)
+    IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
+                                           CCReg), 0);
+ else
+    IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
+                                           CR7Reg, CCReg), 0);
+
+  SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
+                      getI32Imm(31), getI32Imm(31) };
+  if (OtherCondIdx == -1 && !Inv)
+    return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+
+  // Get the specified bit.
+  SDValue Tmp =
+    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+  if (Inv) {
+    assert(OtherCondIdx == -1 && "Can't have split plus negation");
+    return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
+  }
+
+  // Otherwise, we have to turn an operation like SETONE -> SETOLT | SETOGT.
+  // We already got the bit for the first part of the comparison (e.g. SETULE).
+
+  // Get the other bit of the comparison.
+  Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31);
+  SDValue OtherCond =
+    SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops, 4), 0);
+
+  return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond);
+}
+
+
+// Select - Convert the specified operand from a target-independent to a
+// target-specific node if it hasn't already been changed.
+SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  if (N->isMachineOpcode())
+    return NULL;   // Already selected.
+
+  switch (N->getOpcode()) {
+  default: break;
+
+  case ISD::Constant: {
+    if (N->getValueType(0) == MVT::i64) {
+      // Get 64 bit value.
+      int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
+      // Assume no remaining bits.
+      unsigned Remainder = 0;
+      // Assume no shift required.
+      unsigned Shift = 0;
+
+      // If it can't be represented as a 32 bit value.
+      if (!isInt<32>(Imm)) {
+        Shift = CountTrailingZeros_64(Imm);
+        int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+        // If the shifted value fits 32 bits.
+        if (isInt<32>(ImmSh)) {
+          // Go with the shifted value.
+          Imm = ImmSh;
+        } else {
+          // Still stuck with a 64 bit value.
+          Remainder = Imm;
+          Shift = 32;
+          Imm >>= 32;
+        }
+      }
+
+      // Intermediate operand.
+      SDNode *Result;
+
+      // Handle first 32 bits.
+      unsigned Lo = Imm & 0xFFFF;
+      unsigned Hi = (Imm >> 16) & 0xFFFF;
+
+      // Simple value.
+      if (isInt<16>(Imm)) {
+       // Just the Lo bits.
+        Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
+      } else if (Lo) {
+        // Handle the Hi bits.
+        unsigned OpC = Hi ? PPC::LIS8 : PPC::LI8;
+        Result = CurDAG->getMachineNode(OpC, dl, MVT::i64, getI32Imm(Hi));
+        // And Lo bits.
+        Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+                                        SDValue(Result, 0), getI32Imm(Lo));
+      } else {
+       // Just the Hi bits.
+        Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi));
+      }
+
+      // If no shift, we're done.
+      if (!Shift) return Result;
+
+      // Shift for next step if the upper 32-bits were not zero.
+      if (Imm) {
+        Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
+                                        SDValue(Result, 0),
+                                        getI32Imm(Shift),
+                                        getI32Imm(63 - Shift));
+      }
+
+      // Add in the last bits as required.
+      if ((Hi = (Remainder >> 16) & 0xFFFF)) {
+        Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
+                                        SDValue(Result, 0), getI32Imm(Hi));
+      }
+      if ((Lo = Remainder & 0xFFFF)) {
+        Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+                                        SDValue(Result, 0), getI32Imm(Lo));
+      }
+
+      return Result;
+    }
+    break;
+  }
+
+  case ISD::SETCC:
+    return SelectSETCC(N);
+  case PPCISD::GlobalBaseReg:
+    return getGlobalBaseReg();
+
+  case ISD::FrameIndex: {
+    int FI = cast<FrameIndexSDNode>(N)->getIndex();
+    SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
+    unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
+    if (N->hasOneUse())
+      return CurDAG->SelectNodeTo(N, Opc, N->getValueType(0), TFI,
+                                  getSmallIPtrImm(0));
+    return CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
+                                  getSmallIPtrImm(0));
+  }
+
+  case PPCISD::MFCR: {
+    SDValue InFlag = N->getOperand(1);
+    // Use MFOCRF if supported.
+    if (PPCSubTarget.isGigaProcessor())
+      return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
+                                    N->getOperand(0), InFlag);
+    else
+      return CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
+                                    N->getOperand(0), InFlag);
+  }
+
+  case ISD::SDIV: {
+    // FIXME: since this depends on the setting of the carry flag from the srawi
+    //        we should really be making notes about that for the scheduler.
+    // FIXME: It sure would be nice if we could cheaply recognize the
+    //        srl/add/sra pattern the dag combiner will generate for this as
+    //        sra/addze rather than having to handle sdiv ourselves.  oh well.
+    unsigned Imm;
+    if (isInt32Immediate(N->getOperand(1), Imm)) {
+      SDValue N0 = N->getOperand(0);
+      if ((signed)Imm > 0 && isPowerOf2_32(Imm)) {
+        SDNode *Op =
+          CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
+                                 N0, getI32Imm(Log2_32(Imm)));
+        return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
+                                    SDValue(Op, 0), SDValue(Op, 1));
+      } else if ((signed)Imm < 0 && isPowerOf2_32(-Imm)) {
+        SDNode *Op =
+          CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
+                                 N0, getI32Imm(Log2_32(-Imm)));
+        SDValue PT =
+          SDValue(CurDAG->getMachineNode(PPC::ADDZE, dl, MVT::i32,
+                                         SDValue(Op, 0), SDValue(Op, 1)),
+                    0);
+        return CurDAG->SelectNodeTo(N, PPC::NEG, MVT::i32, PT);
+      }
+    }
+
+    // Other cases are autogenerated.
+    break;
+  }
+
+  case ISD::LOAD: {
+    // Handle preincrement loads.
+    LoadSDNode *LD = cast<LoadSDNode>(N);
+    EVT LoadedVT = LD->getMemoryVT();
+
+    // Normal loads are handled by code generated from the .td file.
+    if (LD->getAddressingMode() != ISD::PRE_INC)
+      break;
+
+    SDValue Offset = LD->getOffset();
+    if (isa<ConstantSDNode>(Offset) ||
+        Offset.getOpcode() == ISD::TargetGlobalAddress) {
+
+      unsigned Opcode;
+      bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
+      if (LD->getValueType(0) != MVT::i64) {
+        // Handle PPC32 integer and normal FP loads.
+        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+        switch (LoadedVT.getSimpleVT().SimpleTy) {
+          default: llvm_unreachable("Invalid PPC load type!");
+          case MVT::f64: Opcode = PPC::LFDU; break;
+          case MVT::f32: Opcode = PPC::LFSU; break;
+          case MVT::i32: Opcode = PPC::LWZU; break;
+          case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
+          case MVT::i1:
+          case MVT::i8:  Opcode = PPC::LBZU; break;
+        }
+      } else {
+        assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
+        assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
+        switch (LoadedVT.getSimpleVT().SimpleTy) {
+          default: llvm_unreachable("Invalid PPC load type!");
+          case MVT::i64: Opcode = PPC::LDU; break;
+          case MVT::i32: Opcode = PPC::LWZU8; break;
+          case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
+          case MVT::i1:
+          case MVT::i8:  Opcode = PPC::LBZU8; break;
+        }
+      }
+
+      SDValue Chain = LD->getChain();
+      SDValue Base = LD->getBasePtr();
+      SDValue Ops[] = { Offset, Base, Chain };
+      // FIXME: PPC64
+      return CurDAG->getMachineNode(Opcode, dl, LD->getValueType(0),
+                                    PPCLowering.getPointerTy(),
+                                    MVT::Other, Ops, 3);
+    } else {
+      llvm_unreachable("R+R preindex loads not supported yet!");
+    }
+  }
+
+  case ISD::AND: {
+    unsigned Imm, Imm2, SH, MB, ME;
+
+    // If this is an and of a value rotated between 0 and 31 bits and then and'd
+    // with a mask, emit rlwinm
+    if (isInt32Immediate(N->getOperand(1), Imm) &&
+        isRotateAndMask(N->getOperand(0).getNode(), Imm, false, SH, MB, ME)) {
+      SDValue Val = N->getOperand(0).getOperand(0);
+      SDValue Ops[] = { Val, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+      return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+    }
+    // If this is just a masked value where the input is not handled above, and
+    // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
+    if (isInt32Immediate(N->getOperand(1), Imm) &&
+        isRunOfOnes(Imm, MB, ME) &&
+        N->getOperand(0).getOpcode() != ISD::ROTL) {
+      SDValue Val = N->getOperand(0);
+      SDValue Ops[] = { Val, getI32Imm(0), getI32Imm(MB), getI32Imm(ME) };
+      return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+    }
+    // AND X, 0 -> 0, not "rlwinm 32".
+    if (isInt32Immediate(N->getOperand(1), Imm) && (Imm == 0)) {
+      ReplaceUses(SDValue(N, 0), N->getOperand(1));
+      return NULL;
+    }
+    // ISD::OR doesn't get all the bitfield insertion fun.
+    // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) is a bitfield insert
+    if (isInt32Immediate(N->getOperand(1), Imm) &&
+        N->getOperand(0).getOpcode() == ISD::OR &&
+        isInt32Immediate(N->getOperand(0).getOperand(1), Imm2)) {
+      unsigned MB, ME;
+      Imm = ~(Imm^Imm2);
+      if (isRunOfOnes(Imm, MB, ME)) {
+        SDValue Ops[] = { N->getOperand(0).getOperand(0),
+                            N->getOperand(0).getOperand(1),
+                            getI32Imm(0), getI32Imm(MB),getI32Imm(ME) };
+        return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5);
+      }
+    }
+
+    // Other cases are autogenerated.
+    break;
+  }
+  case ISD::OR:
+    if (N->getValueType(0) == MVT::i32)
+      if (SDNode *I = SelectBitfieldInsert(N))
+        return I;
+
+    // Other cases are autogenerated.
+    break;
+  case ISD::SHL: {
+    unsigned Imm, SH, MB, ME;
+    if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+        isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+      SDValue Ops[] = { N->getOperand(0).getOperand(0),
+                          getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+      return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+    }
+
+    // Other cases are autogenerated.
+    break;
+  }
+  case ISD::SRL: {
+    unsigned Imm, SH, MB, ME;
+    if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
+        isRotateAndMask(N, Imm, true, SH, MB, ME)) {
+      SDValue Ops[] = { N->getOperand(0).getOperand(0),
+                          getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) };
+      return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
+    }
+
+    // Other cases are autogenerated.
+    break;
+  }
+  case ISD::SELECT_CC: {
+    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+    // Handle the setcc cases here.  select_cc lhs, 0, 1, 0, cc
+    if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+      if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+        if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+          if (N1C->isNullValue() && N3C->isNullValue() &&
+              N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
+              // FIXME: Implement this optzn for PPC64.
+              N->getValueType(0) == MVT::i32) {
+            SDNode *Tmp =
+              CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+                                     N->getOperand(0), getI32Imm(~0U));
+            return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
+                                        SDValue(Tmp, 0), N->getOperand(0),
+                                        SDValue(Tmp, 1));
+          }
+
+    SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
+    unsigned BROpc = getPredicateForSetCC(CC);
+
+    unsigned SelectCCOp;
+    if (N->getValueType(0) == MVT::i32)
+      SelectCCOp = PPC::SELECT_CC_I4;
+    else if (N->getValueType(0) == MVT::i64)
+      SelectCCOp = PPC::SELECT_CC_I8;
+    else if (N->getValueType(0) == MVT::f32)
+      SelectCCOp = PPC::SELECT_CC_F4;
+    else if (N->getValueType(0) == MVT::f64)
+      SelectCCOp = PPC::SELECT_CC_F8;
+    else
+      SelectCCOp = PPC::SELECT_CC_VRRC;
+
+    SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
+                        getI32Imm(BROpc) };
+    return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4);
+  }
+  case PPCISD::COND_BRANCH: {
+    // Op #0 is the Chain.
+    // Op #1 is the PPC::PRED_* number.
+    // Op #2 is the CR#
+    // Op #3 is the Dest MBB
+    // Op #4 is the Flag.
+    // Prevent PPC::PRED_* from being selected into LI.
+    SDValue Pred =
+      getI32Imm(cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
+    SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
+      N->getOperand(0), N->getOperand(4) };
+    return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 5);
+  }
+  case ISD::BR_CC: {
+    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+    SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
+    SDValue Ops[] = { getI32Imm(getPredicateForSetCC(CC)), CondCode,
+                        N->getOperand(4), N->getOperand(0) };
+    return CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops, 4);
+  }
+  case ISD::BRIND: {
+    // FIXME: Should custom lower this.
+    SDValue Chain = N->getOperand(0);
+    SDValue Target = N->getOperand(1);
+    unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
+    Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Target,
+                                           Chain), 0);
+    return CurDAG->SelectNodeTo(N, PPC::BCTR, MVT::Other, Chain);
+  }
+  }
+
+  return SelectCode(N);
+}
+
+
+
+/// createPPCISelDag - This pass converts a legalized DAG into a
+/// PowerPC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createPPCISelDag(PPCTargetMachine &TM) {
+  return new PPCDAGToDAGISel(TM);
+}
+
diff --git a/final/lib/Target/PowerPC/PPCISelLowering.cpp b/final/lib/Target/PowerPC/PPCISelLowering.cpp
new file mode 100644
index 00000000000..70d00e4b5cc
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -0,0 +1,5634 @@
+//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCISelLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCISelLowering.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCPerfectShuffle.h"
+#include "PPCPredicates.h"
+#include "PPCTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/DerivedTypes.h"
+using namespace llvm;
+
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                     CCValAssign::LocInfo &LocInfo,
+                                     ISD::ArgFlagsTy &ArgFlags,
+                                     CCState &State);
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                            MVT &LocVT,
+                                            CCValAssign::LocInfo &LocInfo,
+                                            ISD::ArgFlagsTy &ArgFlags,
+                                            CCState &State);
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                              MVT &LocVT,
+                                              CCValAssign::LocInfo &LocInfo,
+                                              ISD::ArgFlagsTy &ArgFlags,
+                                              CCState &State);
+
+static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc",
+cl::desc("enable preincrement load/store generation on PPC (experimental)"),
+                                     cl::Hidden);
+
+static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
+  if (TM.getSubtargetImpl()->isDarwin())
+    return new TargetLoweringObjectFileMachO();
+
+  return new TargetLoweringObjectFileELF();
+}
+
+PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
+  : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
+
+  setPow2DivIsCheap();
+
+  // Use _setjmp/_longjmp instead of setjmp/longjmp.
+  setUseUnderscoreSetJmp(true);
+  setUseUnderscoreLongJmp(true);
+
+  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
+  // arguments are at least 4/8 bytes aligned.
+  setMinStackArgumentAlignment(TM.getSubtarget<PPCSubtarget>().isPPC64() ? 8:4);
+
+  // Set up the register classes.
+  addRegisterClass(MVT::i32, PPC::GPRCRegisterClass);
+  addRegisterClass(MVT::f32, PPC::F4RCRegisterClass);
+  addRegisterClass(MVT::f64, PPC::F8RCRegisterClass);
+
+  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+  // PowerPC has pre-inc load and store's.
+  setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
+  setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
+  setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
+  setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
+  setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
+  setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
+  setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
+  setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
+  setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
+  setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
+
+  // This is used in the ppcf128->int sequence.  Note it has different semantics
+  // from FP_ROUND:  that rounds to nearest, this rounds to zero.
+  setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom);
+
+  // PowerPC has no SREM/UREM instructions
+  setOperationAction(ISD::SREM, MVT::i32, Expand);
+  setOperationAction(ISD::UREM, MVT::i32, Expand);
+  setOperationAction(ISD::SREM, MVT::i64, Expand);
+  setOperationAction(ISD::UREM, MVT::i64, Expand);
+
+  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
+
+  // We don't support sin/cos/sqrt/fmod/pow
+  setOperationAction(ISD::FSIN , MVT::f64, Expand);
+  setOperationAction(ISD::FCOS , MVT::f64, Expand);
+  setOperationAction(ISD::FREM , MVT::f64, Expand);
+  setOperationAction(ISD::FPOW , MVT::f64, Expand);
+  setOperationAction(ISD::FSIN , MVT::f32, Expand);
+  setOperationAction(ISD::FCOS , MVT::f32, Expand);
+  setOperationAction(ISD::FREM , MVT::f32, Expand);
+  setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+  setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
+
+  // If we're enabling GP optimizations, use hardware square root
+  if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) {
+    setOperationAction(ISD::FSQRT, MVT::f64, Expand);
+    setOperationAction(ISD::FSQRT, MVT::f32, Expand);
+  }
+
+  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+  // PowerPC does not have BSWAP, CTPOP or CTTZ
+  setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
+  setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
+  setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
+  setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
+  setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
+  setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
+
+  // PowerPC does not have ROTR
+  setOperationAction(ISD::ROTR, MVT::i32   , Expand);
+  setOperationAction(ISD::ROTR, MVT::i64   , Expand);
+
+  // PowerPC does not have Select
+  setOperationAction(ISD::SELECT, MVT::i32, Expand);
+  setOperationAction(ISD::SELECT, MVT::i64, Expand);
+  setOperationAction(ISD::SELECT, MVT::f32, Expand);
+  setOperationAction(ISD::SELECT, MVT::f64, Expand);
+
+  // PowerPC wants to turn select_cc of FP into fsel when possible.
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+  // PowerPC wants to optimize integer setcc a bit
+  setOperationAction(ISD::SETCC, MVT::i32, Custom);
+
+  // PowerPC does not have BRCOND which requires SetCC
+  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+  setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
+
+  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
+  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+
+  // PowerPC does not have [U|S]INT_TO_FP
+  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::i64, Expand);
+  setOperationAction(ISD::BITCAST, MVT::f64, Expand);
+
+  // We cannot sextinreg(i1).  Expand to shifts.
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+  setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
+  setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+  setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
+
+
+  // We want to legalize GlobalAddress and ConstantPool nodes into the
+  // appropriate instructions to materialize the address.
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+  setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
+  setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
+  setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
+  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+  setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
+  setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
+  setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
+
+  // TRAP is legal.
+  setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+  // TRAMPOLINE is custom lowered.
+  setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+
+  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
+
+  // VAARG is custom lowered with the 32-bit SVR4 ABI.
+  if (    TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+      && !TM.getSubtarget<PPCSubtarget>().isPPC64())
+    setOperationAction(ISD::VAARG, MVT::Other, Custom);
+  else
+    setOperationAction(ISD::VAARG, MVT::Other, Expand);
+
+  // Use the default implementation.
+  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
+  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
+  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
+
+  // We want to custom lower some of our intrinsics.
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+  // Comparisons that require checking two conditions.
+  setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
+  setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
+  setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
+  setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
+  setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
+  setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
+
+  if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+    // They also have instructions for converting between i64 and fp.
+    setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
+    // This is just the low 32 bits of a (signed) fp->i64 conversion.
+    // We cannot do this with Promote because i64 is not a legal type.
+    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+
+    // FIXME: disable this lowered code.  This generates 64-bit register values,
+    // and we don't model the fact that the top part is clobbered by calls.  We
+    // need to flag these together so that the value isn't live across a call.
+    //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+  } else {
+    // PowerPC does not have FP_TO_UINT on 32-bit implementations.
+    setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+  }
+
+  if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) {
+    // 64-bit PowerPC implementations can support i64 types directly
+    addRegisterClass(MVT::i64, PPC::G8RCRegisterClass);
+    // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
+    setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
+    // 64-bit PowerPC wants to expand i128 shifts itself.
+    setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
+    setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
+    setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
+  } else {
+    // 32-bit PowerPC wants to expand i64 shifts itself.
+    setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
+    setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
+    setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
+  }
+
+  if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) {
+    // First set operation action for all vector types to expand. Then we
+    // will selectively turn on ones that can be effectively codegen'd.
+    for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+      MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+
+      // add/sub are legal for all supported vector VT's.
+      setOperationAction(ISD::ADD , VT, Legal);
+      setOperationAction(ISD::SUB , VT, Legal);
+
+      // We promote all shuffles to v16i8.
+      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
+      AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
+
+      // We promote all non-typed operations to v4i32.
+      setOperationAction(ISD::AND   , VT, Promote);
+      AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
+      setOperationAction(ISD::OR    , VT, Promote);
+      AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
+      setOperationAction(ISD::XOR   , VT, Promote);
+      AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
+      setOperationAction(ISD::LOAD  , VT, Promote);
+      AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
+      setOperationAction(ISD::SELECT, VT, Promote);
+      AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
+      setOperationAction(ISD::STORE, VT, Promote);
+      AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
+
+      // No other operations are legal.
+      setOperationAction(ISD::MUL , VT, Expand);
+      setOperationAction(ISD::SDIV, VT, Expand);
+      setOperationAction(ISD::SREM, VT, Expand);
+      setOperationAction(ISD::UDIV, VT, Expand);
+      setOperationAction(ISD::UREM, VT, Expand);
+      setOperationAction(ISD::FDIV, VT, Expand);
+      setOperationAction(ISD::FNEG, VT, Expand);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
+      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
+      setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
+      setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+      setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+      setOperationAction(ISD::UDIVREM, VT, Expand);
+      setOperationAction(ISD::SDIVREM, VT, Expand);
+      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
+      setOperationAction(ISD::FPOW, VT, Expand);
+      setOperationAction(ISD::CTPOP, VT, Expand);
+      setOperationAction(ISD::CTLZ, VT, Expand);
+      setOperationAction(ISD::CTTZ, VT, Expand);
+    }
+
+    // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
+    // with merges, splats, etc.
+    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
+
+    setOperationAction(ISD::AND   , MVT::v4i32, Legal);
+    setOperationAction(ISD::OR    , MVT::v4i32, Legal);
+    setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
+    setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
+    setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
+    setOperationAction(ISD::STORE , MVT::v4i32, Legal);
+
+    addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass);
+    addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass);
+    addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass);
+    addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass);
+
+    setOperationAction(ISD::MUL, MVT::v4f32, Legal);
+    setOperationAction(ISD::MUL, MVT::v4i32, Custom);
+    setOperationAction(ISD::MUL, MVT::v8i16, Custom);
+    setOperationAction(ISD::MUL, MVT::v16i8, Custom);
+
+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
+
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
+    setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
+  }
+
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  if (TM.getSubtarget<PPCSubtarget>().isPPC64()) {
+    setStackPointerRegisterToSaveRestore(PPC::X1);
+    setExceptionPointerRegister(PPC::X3);
+    setExceptionSelectorRegister(PPC::X4);
+  } else {
+    setStackPointerRegisterToSaveRestore(PPC::R1);
+    setExceptionPointerRegister(PPC::R3);
+    setExceptionSelectorRegister(PPC::R4);
+  }
+
+  // We have target-specific dag combine patterns for the following nodes:
+  setTargetDAGCombine(ISD::SINT_TO_FP);
+  setTargetDAGCombine(ISD::STORE);
+  setTargetDAGCombine(ISD::BR_CC);
+  setTargetDAGCombine(ISD::BSWAP);
+
+  // Darwin long double math library functions have $LDBL128 appended.
+  if (TM.getSubtarget<PPCSubtarget>().isDarwin()) {
+    setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
+    setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128");
+    setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128");
+    setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128");
+    setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128");
+    setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128");
+    setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128");
+    setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128");
+    setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128");
+    setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128");
+  }
+
+  computeRegisterProperties();
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area.
+unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const {
+  const TargetMachine &TM = getTargetMachine();
+  // Darwin passes everything on 4 byte boundary.
+  if (TM.getSubtarget<PPCSubtarget>().isDarwin())
+    return 4;
+  // FIXME SVR4 TBD
+  return 4;
+}
+
+const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return 0;
+  case PPCISD::FSEL:            return "PPCISD::FSEL";
+  case PPCISD::FCFID:           return "PPCISD::FCFID";
+  case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
+  case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
+  case PPCISD::STFIWX:          return "PPCISD::STFIWX";
+  case PPCISD::VMADDFP:         return "PPCISD::VMADDFP";
+  case PPCISD::VNMSUBFP:        return "PPCISD::VNMSUBFP";
+  case PPCISD::VPERM:           return "PPCISD::VPERM";
+  case PPCISD::Hi:              return "PPCISD::Hi";
+  case PPCISD::Lo:              return "PPCISD::Lo";
+  case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
+  case PPCISD::TOC_RESTORE:     return "PPCISD::TOC_RESTORE";
+  case PPCISD::LOAD:            return "PPCISD::LOAD";
+  case PPCISD::LOAD_TOC:        return "PPCISD::LOAD_TOC";
+  case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
+  case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
+  case PPCISD::SRL:             return "PPCISD::SRL";
+  case PPCISD::SRA:             return "PPCISD::SRA";
+  case PPCISD::SHL:             return "PPCISD::SHL";
+  case PPCISD::EXTSW_32:        return "PPCISD::EXTSW_32";
+  case PPCISD::STD_32:          return "PPCISD::STD_32";
+  case PPCISD::CALL_SVR4:       return "PPCISD::CALL_SVR4";
+  case PPCISD::CALL_Darwin:     return "PPCISD::CALL_Darwin";
+  case PPCISD::NOP:             return "PPCISD::NOP";
+  case PPCISD::MTCTR:           return "PPCISD::MTCTR";
+  case PPCISD::BCTRL_Darwin:    return "PPCISD::BCTRL_Darwin";
+  case PPCISD::BCTRL_SVR4:      return "PPCISD::BCTRL_SVR4";
+  case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
+  case PPCISD::MFCR:            return "PPCISD::MFCR";
+  case PPCISD::VCMP:            return "PPCISD::VCMP";
+  case PPCISD::VCMPo:           return "PPCISD::VCMPo";
+  case PPCISD::LBRX:            return "PPCISD::LBRX";
+  case PPCISD::STBRX:           return "PPCISD::STBRX";
+  case PPCISD::LARX:            return "PPCISD::LARX";
+  case PPCISD::STCX:            return "PPCISD::STCX";
+  case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
+  case PPCISD::MFFS:            return "PPCISD::MFFS";
+  case PPCISD::MTFSB0:          return "PPCISD::MTFSB0";
+  case PPCISD::MTFSB1:          return "PPCISD::MTFSB1";
+  case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
+  case PPCISD::MTFSF:           return "PPCISD::MTFSF";
+  case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
+  }
+}
+
+MVT::SimpleValueType PPCTargetLowering::getSetCCResultType(EVT VT) const {
+  return MVT::i32;
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned PPCTargetLowering::getFunctionAlignment(const Function *F) const {
+  if (getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin())
+    return F->hasFnAttr(Attribute::OptimizeForSize) ? 2 : 4;
+  else
+    return 2;
+}
+
+//===----------------------------------------------------------------------===//
+// Node matching predicates, for use by the tblgen matching code.
+//===----------------------------------------------------------------------===//
+
+/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
+static bool isFloatingPointZero(SDValue Op) {
+  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
+    return CFP->getValueAPF().isZero();
+  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
+    // Maybe this has already been legalized into the constant pool?
+    if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
+      if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
+        return CFP->getValueAPF().isZero();
+  }
+  return false;
+}
+
+/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
+/// true if Op is undef or if it matches the specified value.
+static bool isConstantOrUndef(int Op, int Val) {
+  return Op < 0 || Op == Val;
+}
+
+/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUHUM instruction.
+bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+  if (!isUnary) {
+    for (unsigned i = 0; i != 16; ++i)
+      if (!isConstantOrUndef(N->getMaskElt(i),  i*2+1))
+        return false;
+  } else {
+    for (unsigned i = 0; i != 8; ++i)
+      if (!isConstantOrUndef(N->getMaskElt(i),    i*2+1) ||
+          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+1))
+        return false;
+  }
+  return true;
+}
+
+/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+/// VPKUWUM instruction.
+bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) {
+  if (!isUnary) {
+    for (unsigned i = 0; i != 16; i += 2)
+      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
+          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
+        return false;
+  } else {
+    for (unsigned i = 0; i != 8; i += 2)
+      if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
+          !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3) ||
+          !isConstantOrUndef(N->getMaskElt(i+8),  i*2+2) ||
+          !isConstantOrUndef(N->getMaskElt(i+9),  i*2+3))
+        return false;
+  }
+  return true;
+}
+
+/// isVMerge - Common function, used to match vmrg* shuffles.
+///
+static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
+                     unsigned LHSStart, unsigned RHSStart) {
+  assert(N->getValueType(0) == MVT::v16i8 &&
+         "PPC only supports shuffles by bytes!");
+  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
+         "Unsupported merge size!");
+
+  for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
+    for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
+      if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
+                             LHSStart+j+i*UnitSize) ||
+          !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
+                             RHSStart+j+i*UnitSize))
+        return false;
+    }
+  return true;
+}
+
+/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+                             bool isUnary) {
+  if (!isUnary)
+    return isVMerge(N, UnitSize, 8, 24);
+  return isVMerge(N, UnitSize, 8, 8);
+}
+
+/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+                             bool isUnary) {
+  if (!isUnary)
+    return isVMerge(N, UnitSize, 0, 16);
+  return isVMerge(N, UnitSize, 0, 0);
+}
+
+
+/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+/// amount, otherwise return -1.
+int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) {
+  assert(N->getValueType(0) == MVT::v16i8 &&
+         "PPC only supports shuffles by bytes!");
+
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+
+  // Find the first non-undef value in the shuffle mask.
+  unsigned i;
+  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
+    /*search*/;
+
+  if (i == 16) return -1;  // all undef.
+
+  // Otherwise, check to see if the rest of the elements are consecutively
+  // numbered from this value.
+  unsigned ShiftAmt = SVOp->getMaskElt(i);
+  if (ShiftAmt < i) return -1;
+  ShiftAmt -= i;
+
+  if (!isUnary) {
+    // Check the rest of the elements to see if they are consecutive.
+    for (++i; i != 16; ++i)
+      if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
+        return -1;
+  } else {
+    // Check the rest of the elements to see if they are consecutive.
+    for (++i; i != 16; ++i)
+      if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
+        return -1;
+  }
+  return ShiftAmt;
+}
+
+/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a splat of a single element that is suitable for input to
+/// VSPLTB/VSPLTH/VSPLTW.
+bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
+  assert(N->getValueType(0) == MVT::v16i8 &&
+         (EltSize == 1 || EltSize == 2 || EltSize == 4));
+
+  // This is a splat operation if each element of the permute is the same, and
+  // if the value doesn't reference the second vector.
+  unsigned ElementBase = N->getMaskElt(0);
+
+  // FIXME: Handle UNDEF elements too!
+  if (ElementBase >= 16)
+    return false;
+
+  // Check that the indices are consecutive, in the case of a multi-byte element
+  // splatted with a v16i8 mask.
+  for (unsigned i = 1; i != EltSize; ++i)
+    if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
+      return false;
+
+  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
+    if (N->getMaskElt(i) < 0) continue;
+    for (unsigned j = 0; j != EltSize; ++j)
+      if (N->getMaskElt(i+j) != N->getMaskElt(j))
+        return false;
+  }
+  return true;
+}
+
+/// isAllNegativeZeroVector - Returns true if all elements of build_vector
+/// are -0.0.
+bool PPC::isAllNegativeZeroVector(SDNode *N) {
+  BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N);
+
+  APInt APVal, APUndef;
+  unsigned BitSize;
+  bool HasAnyUndefs;
+
+  if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
+    if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+      return CFP->getValueAPF().isNegZero();
+
+  return false;
+}
+
+/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+  assert(isSplatShuffleMask(SVOp, EltSize));
+  return SVOp->getMaskElt(0) / EltSize;
+}
+
+/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
+/// by using a vspltis[bhw] instruction of the specified element size, return
+/// the constant being splatted.  The ByteSize field indicates the number of
+/// bytes of each element [124] -> [bhw].
+SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
+  SDValue OpVal(0, 0);
+
+  // If ByteSize of the splat is bigger than the element size of the
+  // build_vector, then we have a case where we are checking for a splat where
+  // multiple elements of the buildvector are folded together into a single
+  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
+  unsigned EltSize = 16/N->getNumOperands();
+  if (EltSize < ByteSize) {
+    unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
+    SDValue UniquedVals[4];
+    assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
+
+    // See if all of the elements in the buildvector agree across.
+    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+      if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+      // If the element isn't a constant, bail fully out.
+      if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
+
+
+      if (UniquedVals[i&(Multiple-1)].getNode() == 0)
+        UniquedVals[i&(Multiple-1)] = N->getOperand(i);
+      else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
+        return SDValue();  // no match.
+    }
+
+    // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
+    // either constant or undef values that are identical for each chunk.  See
+    // if these chunks can form into a larger vspltis*.
+
+    // Check to see if all of the leading entries are either 0 or -1.  If
+    // neither, then this won't fit into the immediate field.
+    bool LeadingZero = true;
+    bool LeadingOnes = true;
+    for (unsigned i = 0; i != Multiple-1; ++i) {
+      if (UniquedVals[i].getNode() == 0) continue;  // Must have been undefs.
+
+      LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue();
+      LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue();
+    }
+    // Finally, check the least significant entry.
+    if (LeadingZero) {
+      if (UniquedVals[Multiple-1].getNode() == 0)
+        return DAG.getTargetConstant(0, MVT::i32);  // 0,0,0,undef
+      int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
+      if (Val < 16)
+        return DAG.getTargetConstant(Val, MVT::i32);  // 0,0,0,4 -> vspltisw(4)
+    }
+    if (LeadingOnes) {
+      if (UniquedVals[Multiple-1].getNode() == 0)
+        return DAG.getTargetConstant(~0U, MVT::i32);  // -1,-1,-1,undef
+      int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
+      if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
+        return DAG.getTargetConstant(Val, MVT::i32);
+    }
+
+    return SDValue();
+  }
+
+  // Check to see if this buildvec has a single non-undef value in its elements.
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
+    if (OpVal.getNode() == 0)
+      OpVal = N->getOperand(i);
+    else if (OpVal != N->getOperand(i))
+      return SDValue();
+  }
+
+  if (OpVal.getNode() == 0) return SDValue();  // All UNDEF: use implicit def.
+
+  unsigned ValSizeInBytes = EltSize;
+  uint64_t Value = 0;
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
+    Value = CN->getZExtValue();
+  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
+    assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
+    Value = FloatToBits(CN->getValueAPF().convertToFloat());
+  }
+
+  // If the splat value is larger than the element value, then we can never do
+  // this splat.  The only case that we could fit the replicated bits into our
+  // immediate field for would be zero, and we prefer to use vxor for it.
+  if (ValSizeInBytes < ByteSize) return SDValue();
+
+  // If the element value is larger than the splat value, cut it in half and
+  // check to see if the two halves are equal.  Continue doing this until we
+  // get to ByteSize.  This allows us to handle 0x01010101 as 0x01.
+  while (ValSizeInBytes > ByteSize) {
+    ValSizeInBytes >>= 1;
+
+    // If the top half equals the bottom half, we're still ok.
+    if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) !=
+         (Value                        & ((1 << (8*ValSizeInBytes))-1)))
+      return SDValue();
+  }
+
+  // Properly sign extend the value.
+  int ShAmt = (4-ByteSize)*8;
+  int MaskVal = ((int)Value << ShAmt) >> ShAmt;
+
+  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
+  if (MaskVal == 0) return SDValue();
+
+  // Finally, if this value fits in a 5 bit sext field, return it
+  if (((MaskVal << (32-5)) >> (32-5)) == MaskVal)
+    return DAG.getTargetConstant(MaskVal, MVT::i32);
+  return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+//  Addressing Mode Selection
+//===----------------------------------------------------------------------===//
+
+/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 16-bit value.  If so, this returns true and the
+/// immediate.
+static bool isIntS16Immediate(SDNode *N, short &Imm) {
+  if (N->getOpcode() != ISD::Constant)
+    return false;
+
+  Imm = (short)cast<ConstantSDNode>(N)->getZExtValue();
+  if (N->getValueType(0) == MVT::i32)
+    return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+  else
+    return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+static bool isIntS16Immediate(SDValue Op, short &Imm) {
+  return isIntS16Immediate(Op.getNode(), Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation.  Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
+                                            SDValue &Index,
+                                            SelectionDAG &DAG) const {
+  short imm = 0;
+  if (N.getOpcode() == ISD::ADD) {
+    if (isIntS16Immediate(N.getOperand(1), imm))
+      return false;    // r+i
+    if (N.getOperand(1).getOpcode() == PPCISD::Lo)
+      return false;    // r+i
+
+    Base = N.getOperand(0);
+    Index = N.getOperand(1);
+    return true;
+  } else if (N.getOpcode() == ISD::OR) {
+    if (isIntS16Immediate(N.getOperand(1), imm))
+      return false;    // r+i can fold it if we can.
+
+    // If this is an or of disjoint bitfields, we can codegen this as an add
+    // (for better address arithmetic) if the LHS and RHS of the OR are provably
+    // disjoint.
+    APInt LHSKnownZero, LHSKnownOne;
+    APInt RHSKnownZero, RHSKnownOne;
+    DAG.ComputeMaskedBits(N.getOperand(0),
+                          APInt::getAllOnesValue(N.getOperand(0)
+                            .getValueSizeInBits()),
+                          LHSKnownZero, LHSKnownOne);
+
+    if (LHSKnownZero.getBoolValue()) {
+      DAG.ComputeMaskedBits(N.getOperand(1),
+                            APInt::getAllOnesValue(N.getOperand(1)
+                              .getValueSizeInBits()),
+                            RHSKnownZero, RHSKnownOne);
+      // If all of the bits are known zero on the LHS or RHS, the add won't
+      // carry.
+      if (~(LHSKnownZero | RHSKnownZero) == 0) {
+        Base = N.getOperand(0);
+        Index = N.getOperand(1);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 16-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg.
+bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
+                                            SDValue &Base,
+                                            SelectionDAG &DAG) const {
+  // FIXME dl should come from parent load or store, not from address
+  DebugLoc dl = N.getDebugLoc();
+  // If this can be more profitably realized as r+r, fail.
+  if (SelectAddressRegReg(N, Disp, Base, DAG))
+    return false;
+
+  if (N.getOpcode() == ISD::ADD) {
+    short imm = 0;
+    if (isIntS16Immediate(N.getOperand(1), imm)) {
+      Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+      } else {
+        Base = N.getOperand(0);
+      }
+      return true; // [r+i]
+    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+      // Match LOAD (ADD (X, Lo(G))).
+     assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+             && "Cannot handle constant offsets yet!");
+      Disp = N.getOperand(1).getOperand(0);  // The global address.
+      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+             Disp.getOpcode() == ISD::TargetConstantPool ||
+             Disp.getOpcode() == ISD::TargetJumpTable);
+      Base = N.getOperand(0);
+      return true;  // [&g+r]
+    }
+  } else if (N.getOpcode() == ISD::OR) {
+    short imm = 0;
+    if (isIntS16Immediate(N.getOperand(1), imm)) {
+      // If this is an or of disjoint bitfields, we can codegen this as an add
+      // (for better address arithmetic) if the LHS and RHS of the OR are
+      // provably disjoint.
+      APInt LHSKnownZero, LHSKnownOne;
+      DAG.ComputeMaskedBits(N.getOperand(0),
+                            APInt::getAllOnesValue(N.getOperand(0)
+                                                   .getValueSizeInBits()),
+                            LHSKnownZero, LHSKnownOne);
+
+      if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+        // If all of the bits are known zero on the LHS or RHS, the add won't
+        // carry.
+        Base = N.getOperand(0);
+        Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+        return true;
+      }
+    }
+  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+    // Loading from a constant address.
+
+    // If this address fits entirely in a 16-bit sext immediate field, codegen
+    // this as "d, 0"
+    short Imm;
+    if (isIntS16Immediate(CN, Imm)) {
+      Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
+      Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
+      return true;
+    }
+
+    // Handle 32-bit sext immediates with LIS + addr mode.
+    if (CN->getValueType(0) == MVT::i32 ||
+        (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+      int Addr = (int)CN->getZExtValue();
+
+      // Otherwise, break this down into an LIS + disp.
+      Disp = DAG.getTargetConstant((short)Addr, MVT::i32);
+
+      Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32);
+      unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+      Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
+      return true;
+    }
+  }
+
+  Disp = DAG.getTargetConstant(0, getPointerTy());
+  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+  else
+    Base = N;
+  return true;      // [r+0]
+}
+
+/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+/// represented as an indexed [r+r] operation.
+bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
+                                                SDValue &Index,
+                                                SelectionDAG &DAG) const {
+  // Check to see if we can easily represent this as an [r+r] address.  This
+  // will fail if it thinks that the address is more profitably represented as
+  // reg+imm, e.g. where imm = 0.
+  if (SelectAddressRegReg(N, Base, Index, DAG))
+    return true;
+
+  // If the operand is an addition, always emit this as [r+r], since this is
+  // better (for code size, and execution, as the memop does the add for free)
+  // than emitting an explicit add.
+  if (N.getOpcode() == ISD::ADD) {
+    Base = N.getOperand(0);
+    Index = N.getOperand(1);
+    return true;
+  }
+
+  // Otherwise, do it the hard way, using R0 as the base register.
+  Base = DAG.getRegister(PPC::R0, N.getValueType());
+  Index = N;
+  return true;
+}
+
+/// SelectAddressRegImmShift - Returns true if the address N can be
+/// represented by a base register plus a signed 14-bit displacement
+/// [r+imm*4].  Suitable for use by STD and friends.
+bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
+                                                 SDValue &Base,
+                                                 SelectionDAG &DAG) const {
+  // FIXME dl should come from the parent load or store, not the address
+  DebugLoc dl = N.getDebugLoc();
+  // If this can be more profitably realized as r+r, fail.
+  if (SelectAddressRegReg(N, Disp, Base, DAG))
+    return false;
+
+  if (N.getOpcode() == ISD::ADD) {
+    short imm = 0;
+    if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
+      Disp =  DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+      if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+        Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+      } else {
+        Base = N.getOperand(0);
+      }
+      return true; // [r+i]
+    } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
+      // Match LOAD (ADD (X, Lo(G))).
+     assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
+             && "Cannot handle constant offsets yet!");
+      Disp = N.getOperand(1).getOperand(0);  // The global address.
+      assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
+             Disp.getOpcode() == ISD::TargetConstantPool ||
+             Disp.getOpcode() == ISD::TargetJumpTable);
+      Base = N.getOperand(0);
+      return true;  // [&g+r]
+    }
+  } else if (N.getOpcode() == ISD::OR) {
+    short imm = 0;
+    if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
+      // If this is an or of disjoint bitfields, we can codegen this as an add
+      // (for better address arithmetic) if the LHS and RHS of the OR are
+      // provably disjoint.
+      APInt LHSKnownZero, LHSKnownOne;
+      DAG.ComputeMaskedBits(N.getOperand(0),
+                            APInt::getAllOnesValue(N.getOperand(0)
+                                                   .getValueSizeInBits()),
+                            LHSKnownZero, LHSKnownOne);
+      if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
+        // If all of the bits are known zero on the LHS or RHS, the add won't
+        // carry.
+        Base = N.getOperand(0);
+        Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
+        return true;
+      }
+    }
+  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+    // Loading from a constant address.  Verify low two bits are clear.
+    if ((CN->getZExtValue() & 3) == 0) {
+      // If this address fits entirely in a 14-bit sext immediate field, codegen
+      // this as "d, 0"
+      short Imm;
+      if (isIntS16Immediate(CN, Imm)) {
+        Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
+        Base = DAG.getRegister(PPC::R0, CN->getValueType(0));
+        return true;
+      }
+
+      // Fold the low-part of 32-bit absolute addresses into addr mode.
+      if (CN->getValueType(0) == MVT::i32 ||
+          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+        int Addr = (int)CN->getZExtValue();
+
+        // Otherwise, break this down into an LIS + disp.
+        Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
+        Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
+        unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
+        Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0);
+        return true;
+      }
+    }
+  }
+
+  Disp = DAG.getTargetConstant(0, getPointerTy());
+  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+    Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+  else
+    Base = N;
+  return true;      // [r+0]
+}
+
+
+/// getPreIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+                                                  SDValue &Offset,
+                                                  ISD::MemIndexedMode &AM,
+                                                  SelectionDAG &DAG) const {
+  // Disabled by default for now.
+  if (!EnablePPCPreinc) return false;
+
+  SDValue Ptr;
+  EVT VT;
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    Ptr = LD->getBasePtr();
+    VT = LD->getMemoryVT();
+
+  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    Ptr = ST->getBasePtr();
+    VT  = ST->getMemoryVT();
+  } else
+    return false;
+
+  // PowerPC doesn't have preinc load/store instructions for vectors.
+  if (VT.isVector())
+    return false;
+
+  // TODO: Check reg+reg first.
+
+  // LDU/STU use reg+imm*4, others use reg+imm.
+  if (VT != MVT::i64) {
+    // reg + imm
+    if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
+      return false;
+  } else {
+    // reg + imm * 4.
+    if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
+      return false;
+  }
+
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
+    // sext i32 to i64 when addr mode is r+i.
+    if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
+        LD->getExtensionType() == ISD::SEXTLOAD &&
+        isa<ConstantSDNode>(Offset))
+      return false;
+  }
+
+  AM = ISD::PRE_INC;
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+//  LowerOperation implementation
+//===----------------------------------------------------------------------===//
+
+/// GetLabelAccessInfo - Return true if we should reference labels using a
+/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
+static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
+                               unsigned &LoOpFlags, const GlobalValue *GV = 0) {
+  HiOpFlags = PPCII::MO_HA16;
+  LoOpFlags = PPCII::MO_LO16;
+
+  // Don't use the pic base if not in PIC relocation model.  Or if we are on a
+  // non-darwin platform.  We don't support PIC on other platforms yet.
+  bool isPIC = TM.getRelocationModel() == Reloc::PIC_ &&
+               TM.getSubtarget<PPCSubtarget>().isDarwin();
+  if (isPIC) {
+    HiOpFlags |= PPCII::MO_PIC_FLAG;
+    LoOpFlags |= PPCII::MO_PIC_FLAG;
+  }
+
+  // If this is a reference to a global value that requires a non-lazy-ptr, make
+  // sure that instruction lowering adds it.
+  if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) {
+    HiOpFlags |= PPCII::MO_NLP_FLAG;
+    LoOpFlags |= PPCII::MO_NLP_FLAG;
+
+    if (GV->hasHiddenVisibility()) {
+      HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+      LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG;
+    }
+  }
+
+  return isPIC;
+}
+
+static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
+                             SelectionDAG &DAG) {
+  EVT PtrVT = HiPart.getValueType();
+  SDValue Zero = DAG.getConstant(0, PtrVT);
+  DebugLoc DL = HiPart.getDebugLoc();
+
+  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
+  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
+
+  // With PIC, the first instruction is actually "GR+hi(&G)".
+  if (isPIC)
+    Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
+                     DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
+
+  // Generate non-pic code that has direct accesses to the constant pool.
+  // The address of the global is just (hi(&g)+lo(&g)).
+  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
+}
+
+SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  EVT PtrVT = Op.getValueType();
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+  const Constant *C = CP->getConstVal();
+
+  unsigned MOHiFlag, MOLoFlag;
+  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+  SDValue CPIHi =
+    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag);
+  SDValue CPILo =
+    DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag);
+  return LowerLabelRef(CPIHi, CPILo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  EVT PtrVT = Op.getValueType();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+  unsigned MOHiFlag, MOLoFlag;
+  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
+  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
+  return LowerLabelRef(JTIHi, JTILo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  EVT PtrVT = Op.getValueType();
+
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+
+  unsigned MOHiFlag, MOLoFlag;
+  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag);
+  SDValue TgtBAHi = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOHiFlag);
+  SDValue TgtBALo = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true, MOLoFlag);
+  return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
+}
+
+SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  EVT PtrVT = Op.getValueType();
+  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
+  DebugLoc DL = GSDN->getDebugLoc();
+  const GlobalValue *GV = GSDN->getGlobal();
+
+  // 64-bit SVR4 ABI code is always position-independent.
+  // The actual address of the GlobalValue is stored in the TOC.
+  if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
+    SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
+    return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA,
+                       DAG.getRegister(PPC::X2, MVT::i64));
+  }
+
+  unsigned MOHiFlag, MOLoFlag;
+  bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV);
+
+  SDValue GAHi =
+    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
+  SDValue GALo =
+    DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
+
+  SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG);
+
+  // If the global reference is actually to a non-lazy-pointer, we have to do an
+  // extra load to get the address of the global.
+  if (MOHiFlag & PPCII::MO_NLP_FLAG)
+    Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(),
+                      false, false, 0);
+  return Ptr;
+}
+
+SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+  DebugLoc dl = Op.getDebugLoc();
+
+  // If we're comparing for equality to zero, expose the fact that this is
+  // implented as a ctlz/srl pair on ppc, so that the dag combiner can
+  // fold the new nodes.
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+    if (C->isNullValue() && CC == ISD::SETEQ) {
+      EVT VT = Op.getOperand(0).getValueType();
+      SDValue Zext = Op.getOperand(0);
+      if (VT.bitsLT(MVT::i32)) {
+        VT = MVT::i32;
+        Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
+      }
+      unsigned Log2b = Log2_32(VT.getSizeInBits());
+      SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
+      SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
+                                DAG.getConstant(Log2b, MVT::i32));
+      return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
+    }
+    // Leave comparisons against 0 and -1 alone for now, since they're usually
+    // optimized.  FIXME: revisit this when we can custom lower all setcc
+    // optimizations.
+    if (C->isAllOnesValue() || C->isNullValue())
+      return SDValue();
+  }
+
+  // If we have an integer seteq/setne, turn it into a compare against zero
+  // by xor'ing the rhs with the lhs, which is faster than setting a
+  // condition register, reading it back out, and masking the correct bit.  The
+  // normal approach here uses sub to do this instead of xor.  Using xor exposes
+  // the result to other bit-twiddling opportunities.
+  EVT LHSVT = Op.getOperand(0).getValueType();
+  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+    EVT VT = Op.getValueType();
+    SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
+                                Op.getOperand(1));
+    return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC);
+  }
+  return SDValue();
+}
+
+SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
+                                      const PPCSubtarget &Subtarget) const {
+
+  llvm_unreachable("VAARG not yet implemented for the SVR4 ABI!");
+  return SDValue(); // Not reached
+}
+
+SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Trmp = Op.getOperand(1); // trampoline
+  SDValue FPtr = Op.getOperand(2); // nested function
+  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+  DebugLoc dl = Op.getDebugLoc();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  bool isPPC64 = (PtrVT == MVT::i64);
+  const Type *IntPtrTy =
+    DAG.getTargetLoweringInfo().getTargetData()->getIntPtrType(
+                                                             *DAG.getContext());
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+
+  Entry.Ty = IntPtrTy;
+  Entry.Node = Trmp; Args.push_back(Entry);
+
+  // TrampSize == (isPPC64 ? 48 : 40);
+  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40,
+                               isPPC64 ? MVT::i64 : MVT::i32);
+  Args.push_back(Entry);
+
+  Entry.Node = FPtr; Args.push_back(Entry);
+  Entry.Node = Nest; Args.push_back(Entry);
+
+  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
+  std::pair<SDValue, SDValue> CallResult =
+    LowerCallTo(Chain, Op.getValueType().getTypeForEVT(*DAG.getContext()),
+                false, false, false, false, 0, CallingConv::C, false,
+                /*isReturnValueUsed=*/true,
+                DAG.getExternalSymbol("__trampoline_setup", PtrVT),
+                Args, DAG, dl);
+
+  SDValue Ops[] =
+    { CallResult.first, CallResult.second };
+
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
+                                        const PPCSubtarget &Subtarget) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
+    // vastart just stores the address of the VarArgsFrameIndex slot into the
+    // memory location argument.
+    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+    const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+    return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
+                        MachinePointerInfo(SV),
+                        false, false, 0);
+  }
+
+  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
+  // We suppose the given va_list is already allocated.
+  //
+  // typedef struct {
+  //  char gpr;     /* index into the array of 8 GPRs
+  //                 * stored in the register save area
+  //                 * gpr=0 corresponds to r3,
+  //                 * gpr=1 to r4, etc.
+  //                 */
+  //  char fpr;     /* index into the array of 8 FPRs
+  //                 * stored in the register save area
+  //                 * fpr=0 corresponds to f1,
+  //                 * fpr=1 to f2, etc.
+  //                 */
+  //  char *overflow_arg_area;
+  //                /* location on stack that holds
+  //                 * the next overflow argument
+  //                 */
+  //  char *reg_save_area;
+  //               /* where r3:r10 and f1:f8 (if saved)
+  //                * are stored
+  //                */
+  // } va_list[1];
+
+
+  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32);
+  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32);
+
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
+                                            PtrVT);
+  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                 PtrVT);
+
+  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
+  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT);
+
+  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
+  SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT);
+
+  uint64_t FPROffset = 1;
+  SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT);
+
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+
+  // Store first byte : number of int regs
+  SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR,
+                                         Op.getOperand(1),
+                                         MachinePointerInfo(SV),
+                                         MVT::i8, false, false, 0);
+  uint64_t nextOffset = FPROffset;
+  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
+                                  ConstFPROffset);
+
+  // Store second byte : number of float regs
+  SDValue secondStore =
+    DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
+                      MachinePointerInfo(SV, nextOffset), MVT::i8,
+                      false, false, 0);
+  nextOffset += StackOffset;
+  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
+
+  // Store second word : arguments given on stack
+  SDValue thirdStore =
+    DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
+                 MachinePointerInfo(SV, nextOffset),
+                 false, false, 0);
+  nextOffset += FrameOffset;
+  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
+
+  // Store third word : arguments given in registers
+  return DAG.getStore(thirdStore, dl, FR, nextPtr,
+                      MachinePointerInfo(SV, nextOffset),
+                      false, false, 0);
+
+}
+
+#include "PPCGenCallingConv.inc"
+
+static bool CC_PPC_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                     CCValAssign::LocInfo &LocInfo,
+                                     ISD::ArgFlagsTy &ArgFlags,
+                                     CCState &State) {
+  return true;
+}
+
+static bool CC_PPC_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+                                            MVT &LocVT,
+                                            CCValAssign::LocInfo &LocInfo,
+                                            ISD::ArgFlagsTy &ArgFlags,
+                                            CCState &State) {
+  static const unsigned ArgRegs[] = {
+    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+  };
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+
+  // Skip one register if the first unallocated register has an even register
+  // number and there are still argument registers available which have not been
+  // allocated yet. RegNum is actually an index into ArgRegs, which means we
+  // need to skip a register if RegNum is odd.
+  if (RegNum != NumArgRegs && RegNum % 2 == 1) {
+    State.AllocateReg(ArgRegs[RegNum]);
+  }
+
+  // Always return false here, as this function only makes sure that the first
+  // unallocated register has an odd register number and does not actually
+  // allocate a register for the current argument.
+  return false;
+}
+
+static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+                                              MVT &LocVT,
+                                              CCValAssign::LocInfo &LocInfo,
+                                              ISD::ArgFlagsTy &ArgFlags,
+                                              CCState &State) {
+  static const unsigned ArgRegs[] = {
+    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+    PPC::F8
+  };
+
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs);
+
+  // If there is only one Floating-point register left we need to put both f64
+  // values of a split ppc_fp128 value on the stack.
+  if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) {
+    State.AllocateReg(ArgRegs[RegNum]);
+  }
+
+  // Always return false here, as this function only makes sure that the two f64
+  // values a ppc_fp128 value is split into are both passed in registers or both
+  // passed on the stack and does not actually allocate a register for the
+  // current argument.
+  return false;
+}
+
+/// GetFPR - Get the set of FP registers that should be allocated for arguments,
+/// on Darwin.
+static const unsigned *GetFPR() {
+  static const unsigned FPR[] = {
+    PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+    PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13
+  };
+
+  return FPR;
+}
+
+/// CalculateStackSlotSize - Calculates the size reserved for this argument on
+/// the stack.
+static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
+                                       unsigned PtrByteSize) {
+  unsigned ArgSize = ArgVT.getSizeInBits()/8;
+  if (Flags.isByVal())
+    ArgSize = Flags.getByValSize();
+  ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+
+  return ArgSize;
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments(SDValue Chain,
+                                        CallingConv::ID CallConv, bool isVarArg,
+                                        const SmallVectorImpl<ISD::InputArg>
+                                          &Ins,
+                                        DebugLoc dl, SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
+  if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
+    return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins,
+                                     dl, DAG, InVals);
+  } else {
+    return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins,
+                                       dl, DAG, InVals);
+  }
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_SVR4(
+                                      SDValue Chain,
+                                      CallingConv::ID CallConv, bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg>
+                                        &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+
+  // 32-bit SVR4 ABI Stack Frame Layout:
+  //              +-----------------------------------+
+  //        +-->  |            Back chain             |
+  //        |     +-----------------------------------+
+  //        |     | Floating-point register save area |
+  //        |     +-----------------------------------+
+  //        |     |    General register save area     |
+  //        |     +-----------------------------------+
+  //        |     |          CR save word             |
+  //        |     +-----------------------------------+
+  //        |     |         VRSAVE save word          |
+  //        |     +-----------------------------------+
+  //        |     |         Alignment padding         |
+  //        |     +-----------------------------------+
+  //        |     |     Vector register save area     |
+  //        |     +-----------------------------------+
+  //        |     |       Local variable space        |
+  //        |     +-----------------------------------+
+  //        |     |        Parameter list area        |
+  //        |     +-----------------------------------+
+  //        |     |           LR save word            |
+  //        |     +-----------------------------------+
+  // SP-->  +---  |            Back chain             |
+  //              +-----------------------------------+
+  //
+  // Specifications:
+  //   System V Application Binary Interface PowerPC Processor Supplement
+  //   AltiVec Technology Programming Interface Manual
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  // Potential tail calls could cause overwriting of argument stack slots.
+  bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+  unsigned PtrByteSize = 4;
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+                 *DAG.getContext());
+
+  // Reserve space for the linkage area on the stack.
+  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+
+  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4);
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    // Arguments stored in registers.
+    if (VA.isRegLoc()) {
+      TargetRegisterClass *RC;
+      EVT ValVT = VA.getValVT();
+
+      switch (ValVT.getSimpleVT().SimpleTy) {
+        default:
+          llvm_unreachable("ValVT not supported by formal arguments Lowering");
+        case MVT::i32:
+          RC = PPC::GPRCRegisterClass;
+          break;
+        case MVT::f32:
+          RC = PPC::F4RCRegisterClass;
+          break;
+        case MVT::f64:
+          RC = PPC::F8RCRegisterClass;
+          break;
+        case MVT::v16i8:
+        case MVT::v8i16:
+        case MVT::v4i32:
+        case MVT::v4f32:
+          RC = PPC::VRRCRegisterClass;
+          break;
+      }
+
+      // Transform the arguments stored in physical registers into virtual ones.
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT);
+
+      InVals.push_back(ArgValue);
+    } else {
+      // Argument stored in memory.
+      assert(VA.isMemLoc());
+
+      unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
+      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
+                                      isImmutable);
+
+      // Create load nodes to retrieve arguments from the stack.
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+                                   MachinePointerInfo(),
+                                   false, false, 0));
+    }
+  }
+
+  // Assign locations to all of the incoming aggregate by value arguments.
+  // Aggregates passed by value are stored in the local variable space of the
+  // caller's stack frame, right above the parameter list area.
+  SmallVector<CCValAssign, 16> ByValArgLocs;
+  CCState CCByValInfo(CallConv, isVarArg, getTargetMachine(),
+                      ByValArgLocs, *DAG.getContext());
+
+  // Reserve stack space for the allocations in CCInfo.
+  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC_SVR4_ByVal);
+
+  // Area that is at least reserved in the caller of this function.
+  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
+
+  // Set the size that is at least reserved in caller of this function.  Tail
+  // call optimized function's reserved stack space needs to be aligned so that
+  // taking the difference between two stack areas will result in an aligned
+  // stack.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+
+  MinReservedArea =
+    std::max(MinReservedArea,
+             PPCFrameLowering::getMinCallFrameSize(false, false));
+
+  unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
+    getStackAlignment();
+  unsigned AlignMask = TargetAlign-1;
+  MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+
+  FI->setMinReservedArea(MinReservedArea);
+
+  SmallVector<SDValue, 8> MemOps;
+
+  // If the function takes variable number of arguments, make a frame index for
+  // the start of the first vararg value... for expansion of llvm.va_start.
+  if (isVarArg) {
+    static const unsigned GPArgRegs[] = {
+      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+      PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+    };
+    const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
+
+    static const unsigned FPArgRegs[] = {
+      PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
+      PPC::F8
+    };
+    const unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+
+    FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs,
+                                                          NumGPArgRegs));
+    FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs,
+                                                          NumFPArgRegs));
+
+    // Make room for NumGPArgRegs and NumFPArgRegs.
+    int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
+                NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
+
+    FuncInfo->setVarArgsStackOffset(
+      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+                             CCInfo.getNextStackOffset(), true));
+
+    FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
+    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+    // The fixed integer arguments of a variadic function are stored to the
+    // VarArgsFrameIndex on the stack so that they may be loaded by deferencing
+    // the result of va_next.
+    for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
+      // Get an existing live-in vreg, or add a new one.
+      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
+      if (!VReg)
+        VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
+
+      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                   MachinePointerInfo(), false, false, 0);
+      MemOps.push_back(Store);
+      // Increment the address by four for the next argument to store
+      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+    }
+
+    // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
+    // is set.
+    // The double arguments are stored to the VarArgsFrameIndex
+    // on the stack.
+    for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
+      // Get an existing live-in vreg, or add a new one.
+      unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
+      if (!VReg)
+        VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
+
+      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                   MachinePointerInfo(), false, false, 0);
+      MemOps.push_back(Store);
+      // Increment the address by eight for the next argument to store
+      SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8,
+                                         PtrVT);
+      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+    }
+  }
+
+  if (!MemOps.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl,
+                        MVT::Other, &MemOps[0], MemOps.size());
+
+  return Chain;
+}
+
+SDValue
+PPCTargetLowering::LowerFormalArguments_Darwin(
+                                      SDValue Chain,
+                                      CallingConv::ID CallConv, bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg>
+                                        &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+  // TODO: add description of PPC stack frame format, or at least some docs.
+  //
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  bool isPPC64 = PtrVT == MVT::i64;
+  // Potential tail calls could cause overwriting of argument stack slots.
+  bool isImmutable = !(GuaranteedTailCallOpt && (CallConv==CallingConv::Fast));
+  unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+  // Area that is at least reserved in caller of this function.
+  unsigned MinReservedArea = ArgOffset;
+
+  static const unsigned GPR_32[] = {           // 32-bit registers.
+    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+  };
+  static const unsigned GPR_64[] = {           // 64-bit registers.
+    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+  };
+
+  static const unsigned *FPR = GetFPR();
+
+  static const unsigned VR[] = {
+    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+  };
+
+  const unsigned Num_GPR_Regs = array_lengthof(GPR_32);
+  const unsigned Num_FPR_Regs = 13;
+  const unsigned Num_VR_Regs  = array_lengthof( VR);
+
+  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+  const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+  // In 32-bit non-varargs functions, the stack space for vectors is after the
+  // stack space for non-vectors.  We do not use this space unless we have
+  // too many vectors to fit in registers, something that only occurs in
+  // constructed examples:), but we have to walk the arglist to figure
+  // that out...for the pathological case, compute VecArgOffset as the
+  // start of the vector parameter area.  Computing VecArgOffset is the
+  // entire point of the following loop.
+  unsigned VecArgOffset = ArgOffset;
+  if (!isVarArg && !isPPC64) {
+    for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e;
+         ++ArgNo) {
+      EVT ObjectVT = Ins[ArgNo].VT;
+      unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+      ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+
+      if (Flags.isByVal()) {
+        // ObjSize is the true size, ArgSize rounded up to multiple of regs.
+        ObjSize = Flags.getByValSize();
+        unsigned ArgSize =
+                ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+        VecArgOffset += ArgSize;
+        continue;
+      }
+
+      switch(ObjectVT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unhandled argument type!");
+      case MVT::i32:
+      case MVT::f32:
+        VecArgOffset += isPPC64 ? 8 : 4;
+        break;
+      case MVT::i64:  // PPC64
+      case MVT::f64:
+        VecArgOffset += 8;
+        break;
+      case MVT::v4f32:
+      case MVT::v4i32:
+      case MVT::v8i16:
+      case MVT::v16i8:
+        // Nothing to do, we're only looking at Nonvector args here.
+        break;
+      }
+    }
+  }
+  // We've found where the vector parameter area in memory is.  Skip the
+  // first 12 parameters; these don't use that memory.
+  VecArgOffset = ((VecArgOffset+15)/16)*16;
+  VecArgOffset += 12*16;
+
+  // Add DAG nodes to load the arguments or copy them out of registers.  On
+  // entry to a function on PPC, the arguments start after the linkage area,
+  // although the first ones are often in registers.
+
+  SmallVector<SDValue, 8> MemOps;
+  unsigned nAltivecParamsAtEnd = 0;
+  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
+    SDValue ArgVal;
+    bool needsLoad = false;
+    EVT ObjectVT = Ins[ArgNo].VT;
+    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
+    unsigned ArgSize = ObjSize;
+    ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+
+    unsigned CurArgOffset = ArgOffset;
+
+    // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
+    if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 ||
+        ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) {
+      if (isVarArg || isPPC64) {
+        MinReservedArea = ((MinReservedArea+15)/16)*16;
+        MinReservedArea += CalculateStackSlotSize(ObjectVT,
+                                                  Flags,
+                                                  PtrByteSize);
+      } else  nAltivecParamsAtEnd++;
+    } else
+      // Calculate min reserved area.
+      MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT,
+                                                Flags,
+                                                PtrByteSize);
+
+    // FIXME the codegen can be much improved in some cases.
+    // We do not have to keep everything in memory.
+    if (Flags.isByVal()) {
+      // ObjSize is the true size, ArgSize rounded up to multiple of registers.
+      ObjSize = Flags.getByValSize();
+      ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
+      // Objects of size 1 and 2 are right justified, everything else is
+      // left justified.  This means the memory address is adjusted forwards.
+      if (ObjSize==1 || ObjSize==2) {
+        CurArgOffset = CurArgOffset + (4 - ObjSize);
+      }
+      // The value of the object is its address.
+      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      InVals.push_back(FIN);
+      if (ObjSize==1 || ObjSize==2) {
+        if (GPR_idx != Num_GPR_Regs) {
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+          SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
+                                            MachinePointerInfo(),
+                                            ObjSize==1 ? MVT::i8 : MVT::i16,
+                                            false, false, 0);
+          MemOps.push_back(Store);
+          ++GPR_idx;
+        }
+
+        ArgOffset += PtrByteSize;
+
+        continue;
+      }
+      for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
+        // Store whatever pieces of the object are in registers
+        // to memory.  ArgVal will be address of the beginning of
+        // the object.
+        if (GPR_idx != Num_GPR_Regs) {
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+          int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+          SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+          SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                       MachinePointerInfo(),
+                                       false, false, 0);
+          MemOps.push_back(Store);
+          ++GPR_idx;
+          ArgOffset += PtrByteSize;
+        } else {
+          ArgOffset += ArgSize - (ArgOffset-CurArgOffset);
+          break;
+        }
+      }
+      continue;
+    }
+
+    switch (ObjectVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unhandled argument type!");
+    case MVT::i32:
+      if (!isPPC64) {
+        if (GPR_idx != Num_GPR_Regs) {
+          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+          ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+          ++GPR_idx;
+        } else {
+          needsLoad = true;
+          ArgSize = PtrByteSize;
+        }
+        // All int arguments reserve stack space in the Darwin ABI.
+        ArgOffset += PtrByteSize;
+        break;
+      }
+      // FALLTHROUGH
+    case MVT::i64:  // PPC64
+      if (GPR_idx != Num_GPR_Regs) {
+        unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+
+        if (ObjectVT == MVT::i32) {
+          // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
+          // value to MVT::i64 and then truncate to the correct register size.
+          if (Flags.isSExt())
+            ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
+                                 DAG.getValueType(ObjectVT));
+          else if (Flags.isZExt())
+            ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
+                                 DAG.getValueType(ObjectVT));
+
+          ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
+        }
+
+        ++GPR_idx;
+      } else {
+        needsLoad = true;
+        ArgSize = PtrByteSize;
+      }
+      // All int arguments reserve stack space in the Darwin ABI.
+      ArgOffset += 8;
+      break;
+
+    case MVT::f32:
+    case MVT::f64:
+      // Every 4 bytes of argument space consumes one of the GPRs available for
+      // argument passing.
+      if (GPR_idx != Num_GPR_Regs) {
+        ++GPR_idx;
+        if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64)
+          ++GPR_idx;
+      }
+      if (FPR_idx != Num_FPR_Regs) {
+        unsigned VReg;
+
+        if (ObjectVT == MVT::f32)
+          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass);
+        else
+          VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass);
+
+        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+        ++FPR_idx;
+      } else {
+        needsLoad = true;
+      }
+
+      // All FP arguments reserve stack space in the Darwin ABI.
+      ArgOffset += isPPC64 ? 8 : ObjSize;
+      break;
+    case MVT::v4f32:
+    case MVT::v4i32:
+    case MVT::v8i16:
+    case MVT::v16i8:
+      // Note that vector arguments in registers don't reserve stack space,
+      // except in varargs functions.
+      if (VR_idx != Num_VR_Regs) {
+        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+        ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
+        if (isVarArg) {
+          while ((ArgOffset % 16) != 0) {
+            ArgOffset += PtrByteSize;
+            if (GPR_idx != Num_GPR_Regs)
+              GPR_idx++;
+          }
+          ArgOffset += 16;
+          GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64?
+        }
+        ++VR_idx;
+      } else {
+        if (!isVarArg && !isPPC64) {
+          // Vectors go after all the nonvectors.
+          CurArgOffset = VecArgOffset;
+          VecArgOffset += 16;
+        } else {
+          // Vectors are aligned.
+          ArgOffset = ((ArgOffset+15)/16)*16;
+          CurArgOffset = ArgOffset;
+          ArgOffset += 16;
+        }
+        needsLoad = true;
+      }
+      break;
+    }
+
+    // We need to load the argument to a virtual register if we determined above
+    // that we ran out of physical registers of the appropriate type.
+    if (needsLoad) {
+      int FI = MFI->CreateFixedObject(ObjSize,
+                                      CurArgOffset + (ArgSize - ObjSize),
+                                      isImmutable);
+      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
+      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
+                           false, false, 0);
+    }
+
+    InVals.push_back(ArgVal);
+  }
+
+  // Set the size that is at least reserved in caller of this function.  Tail
+  // call optimized function's reserved stack space needs to be aligned so that
+  // taking the difference between two stack areas will result in an aligned
+  // stack.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  // Add the Altivec parameters at the end, if needed.
+  if (nAltivecParamsAtEnd) {
+    MinReservedArea = ((MinReservedArea+15)/16)*16;
+    MinReservedArea += 16*nAltivecParamsAtEnd;
+  }
+  MinReservedArea =
+    std::max(MinReservedArea,
+             PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
+  unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
+    getStackAlignment();
+  unsigned AlignMask = TargetAlign-1;
+  MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask;
+  FI->setMinReservedArea(MinReservedArea);
+
+  // If the function takes variable number of arguments, make a frame index for
+  // the start of the first vararg value... for expansion of llvm.va_start.
+  if (isVarArg) {
+    int Depth = ArgOffset;
+
+    FuncInfo->setVarArgsFrameIndex(
+      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
+                             Depth, true));
+    SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
+
+    // If this function is vararg, store any remaining integer argument regs
+    // to their spots on the stack so that they may be loaded by deferencing the
+    // result of va_next.
+    for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) {
+      unsigned VReg;
+
+      if (isPPC64)
+        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+      else
+        VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+
+      SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
+      SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                   MachinePointerInfo(), false, false, 0);
+      MemOps.push_back(Store);
+      // Increment the address by four for the next argument to store
+      SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
+      FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
+    }
+  }
+
+  if (!MemOps.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl,
+                        MVT::Other, &MemOps[0], MemOps.size());
+
+  return Chain;
+}
+
+/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
+/// linkage area for the Darwin ABI.
+static unsigned
+CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
+                                     bool isPPC64,
+                                     bool isVarArg,
+                                     unsigned CC,
+                                     const SmallVectorImpl<ISD::OutputArg>
+                                       &Outs,
+                                     const SmallVectorImpl<SDValue> &OutVals,
+                                     unsigned &nAltivecParamsAtEnd) {
+  // Count how many bytes are to be pushed on the stack, including the linkage
+  // area, and parameter passing area.  We start with 24/48 bytes, which is
+  // prereserved space for [SP][CR][LR][3 x unused].
+  unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true);
+  unsigned NumOps = Outs.size();
+  unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+  // Add up all the space actually used.
+  // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
+  // they all go in registers, but we must reserve stack space for them for
+  // possible use by the caller.  In varargs or 64-bit calls, parameters are
+  // assigned stack space in order, with padding so Altivec parameters are
+  // 16-byte aligned.
+  nAltivecParamsAtEnd = 0;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    EVT ArgVT = Outs[i].VT;
+    // Varargs Altivec parameters are padded to a 16 byte boundary.
+    if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
+        ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
+      if (!isVarArg && !isPPC64) {
+        // Non-varargs Altivec parameters go after all the non-Altivec
+        // parameters; handle those later so we know how much padding we need.
+        nAltivecParamsAtEnd++;
+        continue;
+      }
+      // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
+      NumBytes = ((NumBytes+15)/16)*16;
+    }
+    NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
+  }
+
+   // Allow for Altivec parameters at the end, if needed.
+  if (nAltivecParamsAtEnd) {
+    NumBytes = ((NumBytes+15)/16)*16;
+    NumBytes += 16*nAltivecParamsAtEnd;
+  }
+
+  // The prolog code of the callee may store up to 8 GPR argument registers to
+  // the stack, allowing va_start to index over them in memory if its varargs.
+  // Because we cannot tell if this is needed on the caller side, we have to
+  // conservatively assume that it is needed.  As such, make sure we have at
+  // least enough stack space for the caller to store the 8 GPRs.
+  NumBytes = std::max(NumBytes,
+                      PPCFrameLowering::getMinCallFrameSize(isPPC64, true));
+
+  // Tail call needs the stack to be aligned.
+  if (CC==CallingConv::Fast && GuaranteedTailCallOpt) {
+    unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()->
+      getStackAlignment();
+    unsigned AlignMask = TargetAlign-1;
+    NumBytes = (NumBytes + AlignMask) & ~AlignMask;
+  }
+
+  return NumBytes;
+}
+
+/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
+/// adjusted to accomodate the arguments for the tailcall.
+static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
+                                   unsigned ParamSize) {
+
+  if (!isTailCall) return 0;
+
+  PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
+  unsigned CallerMinReservedArea = FI->getMinReservedArea();
+  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
+  // Remember only if the new adjustement is bigger.
+  if (SPDiff < FI->getTailCallSPDelta())
+    FI->setTailCallSPDelta(SPDiff);
+
+  return SPDiff;
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+                                                     CallingConv::ID CalleeCC,
+                                                     bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                                     SelectionDAG& DAG) const {
+  if (!GuaranteedTailCallOpt)
+    return false;
+
+  // Variable argument functions are not supported.
+  if (isVarArg)
+    return false;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  CallingConv::ID CallerCC = MF.getFunction()->getCallingConv();
+  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
+    // Functions containing by val parameters are not supported.
+    for (unsigned i = 0; i != Ins.size(); i++) {
+       ISD::ArgFlagsTy Flags = Ins[i].Flags;
+       if (Flags.isByVal()) return false;
+    }
+
+    // Non PIC/GOT  tail calls are supported.
+    if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+      return true;
+
+    // At the moment we can only do local tail calls (in same module, hidden
+    // or protected) if we are generating PIC.
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+      return G->getGlobal()->hasHiddenVisibility()
+          || G->getGlobal()->hasProtectedVisibility();
+  }
+
+  return false;
+}
+
+/// isCallCompatibleAddress - Return the immediate to use if the specified
+/// 32-bit value is representable in the immediate field of a BxA instruction.
+static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+  if (!C) return 0;
+
+  int Addr = C->getZExtValue();
+  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
+      (Addr << 6 >> 6) != Addr)
+    return 0;  // Top 6 bits have to be sext of immediate.
+
+  return DAG.getConstant((int)C->getZExtValue() >> 2,
+                         DAG.getTargetLoweringInfo().getPointerTy()).getNode();
+}
+
+namespace {
+
+struct TailCallArgumentInfo {
+  SDValue Arg;
+  SDValue FrameIdxOp;
+  int       FrameIdx;
+
+  TailCallArgumentInfo() : FrameIdx(0) {}
+};
+
+}
+
+/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
+static void
+StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
+                                           SDValue Chain,
+                   const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
+                   SmallVector<SDValue, 8> &MemOpChains,
+                   DebugLoc dl) {
+  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
+    SDValue Arg = TailCallArgs[i].Arg;
+    SDValue FIN = TailCallArgs[i].FrameIdxOp;
+    int FI = TailCallArgs[i].FrameIdx;
+    // Store relative to framepointer.
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN,
+                                       MachinePointerInfo::getFixedStack(FI),
+                                       false, false, 0));
+  }
+}
+
+/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
+/// the appropriate stack slot for the tail call optimized function call.
+static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
+                                               MachineFunction &MF,
+                                               SDValue Chain,
+                                               SDValue OldRetAddr,
+                                               SDValue OldFP,
+                                               int SPDiff,
+                                               bool isPPC64,
+                                               bool isDarwinABI,
+                                               DebugLoc dl) {
+  if (SPDiff) {
+    // Calculate the new stack slot for the return address.
+    int SlotSize = isPPC64 ? 8 : 4;
+    int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64,
+                                                                   isDarwinABI);
+    int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
+                                                          NewRetAddrLoc, true);
+    EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+    SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
+    Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
+                         MachinePointerInfo::getFixedStack(NewRetAddr),
+                         false, false, 0);
+
+    // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
+    // slot as the FP is never overwritten.
+    if (isDarwinABI) {
+      int NewFPLoc =
+        SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI);
+      int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
+                                                          true);
+      SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
+      Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
+                           MachinePointerInfo::getFixedStack(NewFPIdx),
+                           false, false, 0);
+    }
+  }
+  return Chain;
+}
+
+/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
+/// the position of the argument.
+static void
+CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
+                         SDValue Arg, int SPDiff, unsigned ArgOffset,
+                      SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+  int Offset = ArgOffset + SPDiff;
+  uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
+  int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
+  SDValue FIN = DAG.getFrameIndex(FI, VT);
+  TailCallArgumentInfo Info;
+  Info.Arg = Arg;
+  Info.FrameIdxOp = FIN;
+  Info.FrameIdx = FI;
+  TailCallArguments.push_back(Info);
+}
+
+/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
+/// stack slot. Returns the chain as result and the loaded frame pointers in
+/// LROpOut/FPOpout. Used when tail calling.
+SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+                                                        int SPDiff,
+                                                        SDValue Chain,
+                                                        SDValue &LROpOut,
+                                                        SDValue &FPOpOut,
+                                                        bool isDarwinABI,
+                                                        DebugLoc dl) const {
+  if (SPDiff) {
+    // Load the LR and FP stack slot for later adjusting.
+    EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
+    LROpOut = getReturnAddrFrameIndex(DAG);
+    LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(),
+                          false, false, 0);
+    Chain = SDValue(LROpOut.getNode(), 1);
+
+    // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
+    // slot as the FP is never overwritten.
+    if (isDarwinABI) {
+      FPOpOut = getFramePointerFrameIndex(DAG);
+      FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(),
+                            false, false, 0);
+      Chain = SDValue(FPOpOut.getNode(), 1);
+    }
+  }
+  return Chain;
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" of size "Size".  Alignment information is
+/// specified by the specific parameter attribute. The copy will be passed as
+/// a byval function parameter.
+/// Sometimes what we are copying is the end of a larger object, the part that
+/// does not fit in registers.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+                          DebugLoc dl) {
+  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+                       false, false, MachinePointerInfo(0),
+                       MachinePointerInfo(0));
+}
+
+/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
+/// tail calls.
+static void
+LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
+                 SDValue Arg, SDValue PtrOff, int SPDiff,
+                 unsigned ArgOffset, bool isPPC64, bool isTailCall,
+                 bool isVector, SmallVector<SDValue, 8> &MemOpChains,
+                 SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
+                 DebugLoc dl) {
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  if (!isTailCall) {
+    if (isVector) {
+      SDValue StackPtr;
+      if (isPPC64)
+        StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+      else
+        StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+      PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+                           DAG.getConstant(ArgOffset, PtrVT));
+    }
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                       MachinePointerInfo(), false, false, 0));
+  // Calculate and remember argument location.
+  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
+                                  TailCallArguments);
+}
+
+static
+void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
+                     DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
+                     SDValue LROp, SDValue FPOp, bool isDarwinABI,
+                     SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
+  // might overwrite each other in case of tail call optimization.
+  SmallVector<SDValue, 8> MemOpChains2;
+  // Do not flag preceeding copytoreg stuff together with the following stuff.
+  InFlag = SDValue();
+  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
+                                    MemOpChains2, dl);
+  if (!MemOpChains2.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains2[0], MemOpChains2.size());
+
+  // Store the return address to the appropriate stack slot.
+  Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff,
+                                        isPPC64, isDarwinABI, dl);
+
+  // Emit callseq_end just before tailcall node.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  InFlag = Chain.getValue(1);
+}
+
+static
+unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
+                     SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
+                     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
+                     SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
+                     const PPCSubtarget &PPCSubTarget) {
+
+  bool isPPC64 = PPCSubTarget.isPPC64();
+  bool isSVR4ABI = PPCSubTarget.isSVR4ABI();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  NodeTys.push_back(MVT::Other);   // Returns a chain
+  NodeTys.push_back(MVT::Glue);    // Returns a flag for retval copy to use.
+
+  unsigned CallOpc = isSVR4ABI ? PPCISD::CALL_SVR4 : PPCISD::CALL_Darwin;
+
+  bool needIndirectCall = true;
+  if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) {
+    // If this is an absolute destination address, use the munged value.
+    Callee = SDValue(Dest, 0);
+    needIndirectCall = false;
+  }
+
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201
+    // Use indirect calls for ALL functions calls in JIT mode, since the
+    // far-call stubs may be outside relocation limits for a BL instruction.
+    if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) {
+      unsigned OpFlags = 0;
+      if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
+          PPCSubTarget.getDarwinVers() < 9 &&
+          (G->getGlobal()->isDeclaration() ||
+           G->getGlobal()->isWeakForLinker())) {
+        // PC-relative references to external symbols should go through $stub,
+        // unless we're building with the leopard linker or later, which
+        // automatically synthesizes these stubs.
+        OpFlags = PPCII::MO_DARWIN_STUB;
+      }
+
+      // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+      // every direct call is) turn it into a TargetGlobalAddress /
+      // TargetExternalSymbol node so that legalize doesn't hack it.
+      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+                                          Callee.getValueType(),
+                                          0, OpFlags);
+      needIndirectCall = false;
+    }
+  }
+
+  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    unsigned char OpFlags = 0;
+
+    if (DAG.getTarget().getRelocationModel() != Reloc::Static &&
+        PPCSubTarget.getDarwinVers() < 9) {
+      // PC-relative references to external symbols should go through $stub,
+      // unless we're building with the leopard linker or later, which
+      // automatically synthesizes these stubs.
+      OpFlags = PPCII::MO_DARWIN_STUB;
+    }
+
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(),
+                                         OpFlags);
+    needIndirectCall = false;
+  }
+
+  if (needIndirectCall) {
+    // Otherwise, this is an indirect call.  We have to use a MTCTR/BCTRL pair
+    // to do the call, we can't use PPCISD::CALL.
+    SDValue MTCTROps[] = {Chain, Callee, InFlag};
+
+    if (isSVR4ABI && isPPC64) {
+      // Function pointers in the 64-bit SVR4 ABI do not point to the function
+      // entry point, but to the function descriptor (the function entry point
+      // address is part of the function descriptor though).
+      // The function descriptor is a three doubleword structure with the
+      // following fields: function entry point, TOC base address and
+      // environment pointer.
+      // Thus for a call through a function pointer, the following actions need
+      // to be performed:
+      //   1. Save the TOC of the caller in the TOC save area of its stack
+      //      frame (this is done in LowerCall_Darwin()).
+      //   2. Load the address of the function entry point from the function
+      //      descriptor.
+      //   3. Load the TOC of the callee from the function descriptor into r2.
+      //   4. Load the environment pointer from the function descriptor into
+      //      r11.
+      //   5. Branch to the function entry point address.
+      //   6. On return of the callee, the TOC of the caller needs to be
+      //      restored (this is done in FinishCall()).
+      //
+      // All those operations are flagged together to ensure that no other
+      // operations can be scheduled in between. E.g. without flagging the
+      // operations together, a TOC access in the caller could be scheduled
+      // between the load of the callee TOC and the branch to the callee, which
+      // results in the TOC access going through the TOC of the callee instead
+      // of going through the TOC of the caller, which leads to incorrect code.
+
+      // Load the address of the function entry point from the function
+      // descriptor.
+      SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
+      SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps,
+                                        InFlag.getNode() ? 3 : 2);
+      Chain = LoadFuncPtr.getValue(1);
+      InFlag = LoadFuncPtr.getValue(2);
+
+      // Load environment pointer into r11.
+      // Offset of the environment pointer within the function descriptor.
+      SDValue PtrOff = DAG.getIntPtrConstant(16);
+
+      SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
+      SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
+                                       InFlag);
+      Chain = LoadEnvPtr.getValue(1);
+      InFlag = LoadEnvPtr.getValue(2);
+
+      SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
+                                        InFlag);
+      Chain = EnvVal.getValue(0);
+      InFlag = EnvVal.getValue(1);
+
+      // Load TOC of the callee into r2. We are using a target-specific load
+      // with r2 hard coded, because the result of a target-independent load
+      // would never go directly into r2, since r2 is a reserved register (which
+      // prevents the register allocator from allocating it), resulting in an
+      // additional register being allocated and an unnecessary move instruction
+      // being generated.
+      VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+      SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
+                                       Callee, InFlag);
+      Chain = LoadTOCPtr.getValue(0);
+      InFlag = LoadTOCPtr.getValue(1);
+
+      MTCTROps[0] = Chain;
+      MTCTROps[1] = LoadFuncPtr;
+      MTCTROps[2] = InFlag;
+    }
+
+    Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps,
+                        2 + (InFlag.getNode() != 0));
+    InFlag = Chain.getValue(1);
+
+    NodeTys.clear();
+    NodeTys.push_back(MVT::Other);
+    NodeTys.push_back(MVT::Glue);
+    Ops.push_back(Chain);
+    CallOpc = isSVR4ABI ? PPCISD::BCTRL_SVR4 : PPCISD::BCTRL_Darwin;
+    Callee.setNode(0);
+    // Add CTR register as callee so a bctr can be emitted later.
+    if (isTailCall)
+      Ops.push_back(DAG.getRegister(PPC::CTR, PtrVT));
+  }
+
+  // If this is a direct call, pass the chain and the callee.
+  if (Callee.getNode()) {
+    Ops.push_back(Chain);
+    Ops.push_back(Callee);
+  }
+  // If this is a tail call add stack pointer delta.
+  if (isTailCall)
+    Ops.push_back(DAG.getConstant(SPDiff, MVT::i32));
+
+  // Add argument registers to the end of the list so that they are known live
+  // into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  return CallOpc;
+}
+
+SDValue
+PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::InputArg> &Ins,
+                                   DebugLoc dl, SelectionDAG &DAG,
+                                   SmallVectorImpl<SDValue> &InVals) const {
+
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
+                    RVLocs, *DAG.getContext());
+  CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+    CCValAssign &VA = RVLocs[i];
+    EVT VT = VA.getValVT();
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    Chain = DAG.getCopyFromReg(Chain, dl,
+                               VA.getLocReg(), VT, InFlag).getValue(1);
+    InVals.push_back(Chain.getValue(0));
+    InFlag = Chain.getValue(2);
+  }
+
+  return Chain;
+}
+
+SDValue
+PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
+                              bool isTailCall, bool isVarArg,
+                              SelectionDAG &DAG,
+                              SmallVector<std::pair<unsigned, SDValue>, 8>
+                                &RegsToPass,
+                              SDValue InFlag, SDValue Chain,
+                              SDValue &Callee,
+                              int SPDiff, unsigned NumBytes,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              SmallVectorImpl<SDValue> &InVals) const {
+  std::vector<EVT> NodeTys;
+  SmallVector<SDValue, 8> Ops;
+  unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
+                                 isTailCall, RegsToPass, Ops, NodeTys,
+                                 PPCSubTarget);
+
+  // When performing tail call optimization the callee pops its arguments off
+  // the stack. Account for this here so these bytes can be pushed back on in
+  // PPCRegisterInfo::eliminateCallFramePseudoInstr.
+  int BytesCalleePops =
+    (CallConv==CallingConv::Fast && GuaranteedTailCallOpt) ? NumBytes : 0;
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  // Emit tail call.
+  if (isTailCall) {
+    // If this is the first return lowered for this function, add the regs
+    // to the liveout set for the function.
+    if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+      SmallVector<CCValAssign, 16> RVLocs;
+      CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
+                     *DAG.getContext());
+      CCInfo.AnalyzeCallResult(Ins, RetCC_PPC);
+      for (unsigned i = 0; i != RVLocs.size(); ++i)
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+    }
+
+    assert(((Callee.getOpcode() == ISD::Register &&
+             cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
+            Callee.getOpcode() == ISD::TargetExternalSymbol ||
+            Callee.getOpcode() == ISD::TargetGlobalAddress ||
+            isa<ConstantSDNode>(Callee)) &&
+    "Expecting an global address, external symbol, absolute value or register");
+
+    return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
+  }
+
+  Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Add a NOP immediately after the branch instruction when using the 64-bit
+  // SVR4 ABI. At link time, if caller and callee are in a different module and
+  // thus have a different TOC, the call will be replaced with a call to a stub
+  // function which saves the current TOC, loads the TOC of the callee and
+  // branches to the callee. The NOP will be replaced with a load instruction
+  // which restores the TOC of the caller from the TOC save slot of the current
+  // stack frame. If caller and callee belong to the same module (and have the
+  // same TOC), the NOP will remain unchanged.
+  if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
+    SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
+    if (CallOpc == PPCISD::BCTRL_SVR4) {
+      // This is a call through a function pointer.
+      // Restore the caller TOC from the save area into R2.
+      // See PrepareCall() for more information about calls through function
+      // pointers in the 64-bit SVR4 ABI.
+      // We are using a target-specific load with r2 hard coded, because the
+      // result of a target-independent load would never go directly into r2,
+      // since r2 is a reserved register (which prevents the register allocator
+      // from allocating it), resulting in an additional register being
+      // allocated and an unnecessary move instruction being generated.
+      Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
+      InFlag = Chain.getValue(1);
+    } else {
+      // Otherwise insert NOP.
+      InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag);
+    }
+  }
+
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                             DAG.getIntPtrConstant(BytesCalleePops, true),
+                             InFlag);
+  if (!Ins.empty())
+    InFlag = Chain.getValue(1);
+
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                             CallingConv::ID CallConv, bool isVarArg,
+                             bool &isTailCall,
+                             const SmallVectorImpl<ISD::OutputArg> &Outs,
+                             const SmallVectorImpl<SDValue> &OutVals,
+                             const SmallVectorImpl<ISD::InputArg> &Ins,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             SmallVectorImpl<SDValue> &InVals) const {
+  if (isTailCall)
+    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
+                                                   Ins, DAG);
+
+  if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64())
+    return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
+                          isTailCall, Outs, OutVals, Ins,
+                          dl, DAG, InVals);
+
+  return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
+                          isTailCall, Outs, OutVals, Ins,
+                          dl, DAG, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
+                                  CallingConv::ID CallConv, bool isVarArg,
+                                  bool isTailCall,
+                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  const SmallVectorImpl<SDValue> &OutVals,
+                                  const SmallVectorImpl<ISD::InputArg> &Ins,
+                                  DebugLoc dl, SelectionDAG &DAG,
+                                  SmallVectorImpl<SDValue> &InVals) const {
+  // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description
+  // of the 32-bit SVR4 ABI stack frame layout.
+
+  assert((CallConv == CallingConv::C ||
+          CallConv == CallingConv::Fast) && "Unknown calling convention!");
+
+  unsigned PtrByteSize = 4;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Mark this function as potentially containing a function that contains a
+  // tail call. As a consequence the frame pointer will be used for dynamicalloc
+  // and restoring the callers stack pointer in this functions epilog. This is
+  // done because by tail calling the called function might overwrite the value
+  // in this function's (MF) stack pointer stack slot 0(SP).
+  if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+  // Count how many bytes are to be pushed on the stack, including the linkage
+  // area, parameter list area and the part of the local variable space which
+  // contains copies of aggregates which are passed by value.
+
+  // Assign locations to all of the outgoing arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+
+  // Reserve space for the linkage area on the stack.
+  CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize);
+
+  if (isVarArg) {
+    // Handle fixed and variable vector arguments differently.
+    // Fixed vector arguments go into registers as long as registers are
+    // available. Variable vector arguments always go into memory.
+    unsigned NumArgs = Outs.size();
+
+    for (unsigned i = 0; i != NumArgs; ++i) {
+      MVT ArgVT = Outs[i].VT;
+      ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
+      bool Result;
+
+      if (Outs[i].IsFixed) {
+        Result = CC_PPC_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
+                             CCInfo);
+      } else {
+        Result = CC_PPC_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
+                                    ArgFlags, CCInfo);
+      }
+
+      if (Result) {
+#ifndef NDEBUG
+        errs() << "Call operand #" << i << " has unhandled type "
+             << EVT(ArgVT).getEVTString() << "\n";
+#endif
+        llvm_unreachable(0);
+      }
+    }
+  } else {
+    // All arguments are treated the same.
+    CCInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4);
+  }
+
+  // Assign locations to all of the outgoing aggregate by value arguments.
+  SmallVector<CCValAssign, 16> ByValArgLocs;
+  CCState CCByValInfo(CallConv, isVarArg, getTargetMachine(), ByValArgLocs,
+                      *DAG.getContext());
+
+  // Reserve stack space for the allocations in CCInfo.
+  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize);
+
+  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC_SVR4_ByVal);
+
+  // Size of the linkage area, parameter list area and the part of the local
+  // space variable where copies of aggregates which are passed by value are
+  // stored.
+  unsigned NumBytes = CCByValInfo.getNextStackOffset();
+
+  // Calculate by how many bytes the stack has to be adjusted in case of tail
+  // call optimization.
+  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+  // Adjust the stack pointer for the new arguments...
+  // These operations are automatically eliminated by the prolog/epilog pass
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  SDValue CallSeqStart = Chain;
+
+  // Load the return address and frame pointer so it can be moved somewhere else
+  // later.
+  SDValue LROp, FPOp;
+  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false,
+                                       dl);
+
+  // Set up a copy of the stack pointer for use loading and storing any
+  // arguments that may not fit in the registers available for argument
+  // passing.
+  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, j = 0, e = ArgLocs.size();
+       i != e;
+       ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = OutVals[i];
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+    if (Flags.isByVal()) {
+      // Argument is an aggregate which is passed by value, thus we need to
+      // create a copy of it in the local variable space of the current stack
+      // frame (which is the stack frame of the caller) and pass the address of
+      // this copy to the callee.
+      assert((j < ByValArgLocs.size()) && "Index out of bounds!");
+      CCValAssign &ByValVA = ByValArgLocs[j++];
+      assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
+
+      // Memory reserved in the local variable space of the callers stack frame.
+      unsigned LocMemOffset = ByValVA.getLocMemOffset();
+
+      SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+      PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+
+      // Create a copy of the argument in the local area of the current
+      // stack frame.
+      SDValue MemcpyCall =
+        CreateCopyOfByValArgument(Arg, PtrOff,
+                                  CallSeqStart.getNode()->getOperand(0),
+                                  Flags, DAG, dl);
+
+      // This must go outside the CALLSEQ_START..END.
+      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+                           CallSeqStart.getNode()->getOperand(1));
+      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+                             NewCallSeqStart.getNode());
+      Chain = CallSeqStart = NewCallSeqStart;
+
+      // Pass the address of the aggregate copy on the stack either in a
+      // physical register or in the parameter list area of the current stack
+      // frame to the callee.
+      Arg = PtrOff;
+    }
+
+    if (VA.isRegLoc()) {
+      // Put argument in a physical register.
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      // Put argument in the parameter list area of the current stack frame.
+      assert(VA.isMemLoc());
+      unsigned LocMemOffset = VA.getLocMemOffset();
+
+      if (!isTailCall) {
+        SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+        PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                           MachinePointerInfo(),
+                                           false, false, 0));
+      } else {
+        // Calculate and remember argument location.
+        CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
+                                 TailCallArguments);
+      }
+    }
+  }
+
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain
+  // and flag operands which copy the outgoing args into the appropriate regs.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // Set CR6 to true if this is a vararg call.
+  if (isVarArg) {
+    SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
+    Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  if (isTailCall)
+    PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
+                    false, TailCallArguments);
+
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+                    Ins, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
+                                    CallingConv::ID CallConv, bool isVarArg,
+                                    bool isTailCall,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) const {
+
+  unsigned NumOps  = Outs.size();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  bool isPPC64 = PtrVT == MVT::i64;
+  unsigned PtrByteSize = isPPC64 ? 8 : 4;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // Mark this function as potentially containing a function that contains a
+  // tail call. As a consequence the frame pointer will be used for dynamicalloc
+  // and restoring the callers stack pointer in this functions epilog. This is
+  // done because by tail calling the called function might overwrite the value
+  // in this function's (MF) stack pointer stack slot 0(SP).
+  if (GuaranteedTailCallOpt && CallConv==CallingConv::Fast)
+    MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
+
+  unsigned nAltivecParamsAtEnd = 0;
+
+  // Count how many bytes are to be pushed on the stack, including the linkage
+  // area, and parameter passing area.  We start with 24/48 bytes, which is
+  // prereserved space for [SP][CR][LR][3 x unused].
+  unsigned NumBytes =
+    CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
+                                         Outs, OutVals,
+                                         nAltivecParamsAtEnd);
+
+  // Calculate by how many bytes the stack has to be adjusted in case of tail
+  // call optimization.
+  int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes);
+
+  // To protect arguments on the stack from being clobbered in a tail call,
+  // force all the loads to happen before doing any other lowering.
+  if (isTailCall)
+    Chain = DAG.getStackArgumentTokenFactor(Chain);
+
+  // Adjust the stack pointer for the new arguments...
+  // These operations are automatically eliminated by the prolog/epilog pass
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+  SDValue CallSeqStart = Chain;
+
+  // Load the return address and frame pointer so it can be move somewhere else
+  // later.
+  SDValue LROp, FPOp;
+  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true,
+                                       dl);
+
+  // Set up a copy of the stack pointer for use loading and storing any
+  // arguments that may not fit in the registers available for argument
+  // passing.
+  SDValue StackPtr;
+  if (isPPC64)
+    StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
+  else
+    StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
+
+  // Figure out which arguments are going to go in registers, and which in
+  // memory.  Also, if this is a vararg function, floating point operations
+  // must be stored to our stack, and loaded into integer regs as well, if
+  // any integer regs are available for argument passing.
+  unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true);
+  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
+
+  static const unsigned GPR_32[] = {           // 32-bit registers.
+    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+  };
+  static const unsigned GPR_64[] = {           // 64-bit registers.
+    PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+    PPC::X7, PPC::X8, PPC::X9, PPC::X10,
+  };
+  static const unsigned *FPR = GetFPR();
+
+  static const unsigned VR[] = {
+    PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
+    PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
+  };
+  const unsigned NumGPRs = array_lengthof(GPR_32);
+  const unsigned NumFPRs = 13;
+  const unsigned NumVRs  = array_lengthof(VR);
+
+  const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32;
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
+
+  SmallVector<SDValue, 8> MemOpChains;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    SDValue Arg = OutVals[i];
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+
+    // PtrOff will be used to store the current argument to the stack if a
+    // register cannot be found for it.
+    SDValue PtrOff;
+
+    PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
+
+    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+
+    // On PPC64, promote integers to 64-bit values.
+    if (isPPC64 && Arg.getValueType() == MVT::i32) {
+      // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
+      unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+      Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
+    }
+
+    // FIXME memcpy is used way more than necessary.  Correctness first.
+    if (Flags.isByVal()) {
+      unsigned Size = Flags.getByValSize();
+      if (Size==1 || Size==2) {
+        // Very small objects are passed right-justified.
+        // Everything else is passed left-justified.
+        EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
+        if (GPR_idx != NumGPRs) {
+          SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+                                        MachinePointerInfo(), VT,
+                                        false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+
+          ArgOffset += PtrByteSize;
+        } else {
+          SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType());
+          SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
+          SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr,
+                                CallSeqStart.getNode()->getOperand(0),
+                                Flags, DAG, dl);
+          // This must go outside the CALLSEQ_START..END.
+          SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+                               CallSeqStart.getNode()->getOperand(1));
+          DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
+                                 NewCallSeqStart.getNode());
+          Chain = CallSeqStart = NewCallSeqStart;
+          ArgOffset += PtrByteSize;
+        }
+        continue;
+      }
+      // Copy entire object into memory.  There are cases where gcc-generated
+      // code assumes it is there, even if it could be put entirely into
+      // registers.  (This is not what the doc says.)
+      SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
+                            CallSeqStart.getNode()->getOperand(0),
+                            Flags, DAG, dl);
+      // This must go outside the CALLSEQ_START..END.
+      SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
+                           CallSeqStart.getNode()->getOperand(1));
+      DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode());
+      Chain = CallSeqStart = NewCallSeqStart;
+      // And copy the pieces of it that fit into registers.
+      for (unsigned j=0; j<Size; j+=PtrByteSize) {
+        SDValue Const = DAG.getConstant(j, PtrOff.getValueType());
+        SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
+        if (GPR_idx != NumGPRs) {
+          SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
+                                     MachinePointerInfo(),
+                                     false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+          ArgOffset += PtrByteSize;
+        } else {
+          ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
+          break;
+        }
+      }
+      continue;
+    }
+
+    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unexpected ValueType for argument!");
+    case MVT::i32:
+    case MVT::i64:
+      if (GPR_idx != NumGPRs) {
+        RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
+      } else {
+        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+                         isPPC64, isTailCall, false, MemOpChains,
+                         TailCallArguments, dl);
+      }
+      ArgOffset += PtrByteSize;
+      break;
+    case MVT::f32:
+    case MVT::f64:
+      if (FPR_idx != NumFPRs) {
+        RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
+
+        if (isVarArg) {
+          SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+                                       MachinePointerInfo(), false, false, 0);
+          MemOpChains.push_back(Store);
+
+          // Float varargs are always shadowed in available integer registers
+          if (GPR_idx != NumGPRs) {
+            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+                                       MachinePointerInfo(), false, false, 0);
+            MemOpChains.push_back(Load.getValue(1));
+            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+          }
+          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){
+            SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
+            PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
+            SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff,
+                                       MachinePointerInfo(),
+                                       false, false, 0);
+            MemOpChains.push_back(Load.getValue(1));
+            RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+          }
+        } else {
+          // If we have any FPRs remaining, we may also have GPRs remaining.
+          // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
+          // GPRs.
+          if (GPR_idx != NumGPRs)
+            ++GPR_idx;
+          if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 &&
+              !isPPC64)  // PPC64 has 64-bit GPR's obviously :)
+            ++GPR_idx;
+        }
+      } else {
+        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+                         isPPC64, isTailCall, false, MemOpChains,
+                         TailCallArguments, dl);
+      }
+      if (isPPC64)
+        ArgOffset += 8;
+      else
+        ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8;
+      break;
+    case MVT::v4f32:
+    case MVT::v4i32:
+    case MVT::v8i16:
+    case MVT::v16i8:
+      if (isVarArg) {
+        // These go aligned on the stack, or in the corresponding R registers
+        // when within range.  The Darwin PPC ABI doc claims they also go in
+        // V registers; in fact gcc does this only for arguments that are
+        // prototyped, not for those that match the ...  We do it for all
+        // arguments, seems to work.
+        while (ArgOffset % 16 !=0) {
+          ArgOffset += PtrByteSize;
+          if (GPR_idx != NumGPRs)
+            GPR_idx++;
+        }
+        // We could elide this store in the case where the object fits
+        // entirely in R registers.  Maybe later.
+        PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
+                            DAG.getConstant(ArgOffset, PtrVT));
+        SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff,
+                                     MachinePointerInfo(), false, false, 0);
+        MemOpChains.push_back(Store);
+        if (VR_idx != NumVRs) {
+          SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff,
+                                     MachinePointerInfo(),
+                                     false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
+        }
+        ArgOffset += 16;
+        for (unsigned i=0; i<16; i+=PtrByteSize) {
+          if (GPR_idx == NumGPRs)
+            break;
+          SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
+                                  DAG.getConstant(i, PtrVT));
+          SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(),
+                                     false, false, 0);
+          MemOpChains.push_back(Load.getValue(1));
+          RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
+        }
+        break;
+      }
+
+      // Non-varargs Altivec params generally go in registers, but have
+      // stack space allocated at the end.
+      if (VR_idx != NumVRs) {
+        // Doesn't have GPR space allocated.
+        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
+      } else if (nAltivecParamsAtEnd==0) {
+        // We are emitting Altivec params in order.
+        LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+                         isPPC64, isTailCall, true, MemOpChains,
+                         TailCallArguments, dl);
+        ArgOffset += 16;
+      }
+      break;
+    }
+  }
+  // If all Altivec parameters fit in registers, as they usually do,
+  // they get stack space following the non-Altivec parameters.  We
+  // don't track this here because nobody below needs it.
+  // If there are more Altivec parameters than fit in registers emit
+  // the stores here.
+  if (!isVarArg && nAltivecParamsAtEnd > NumVRs) {
+    unsigned j = 0;
+    // Offset is aligned; skip 1st 12 params which go in V registers.
+    ArgOffset = ((ArgOffset+15)/16)*16;
+    ArgOffset += 12*16;
+    for (unsigned i = 0; i != NumOps; ++i) {
+      SDValue Arg = OutVals[i];
+      EVT ArgType = Outs[i].VT;
+      if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
+          ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
+        if (++j > NumVRs) {
+          SDValue PtrOff;
+          // We are emitting Altivec params in order.
+          LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
+                           isPPC64, isTailCall, true, MemOpChains,
+                           TailCallArguments, dl);
+          ArgOffset += 16;
+        }
+      }
+    }
+  }
+
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Check if this is an indirect call (MTCTR/BCTRL).
+  // See PrepareCall() for more information about calls through function
+  // pointers in the 64-bit SVR4 ABI.
+  if (!isTailCall && isPPC64 && PPCSubTarget.isSVR4ABI() &&
+      !dyn_cast<GlobalAddressSDNode>(Callee) &&
+      !dyn_cast<ExternalSymbolSDNode>(Callee) &&
+      !isBLACompatibleAddress(Callee, DAG)) {
+    // Load r2 into a virtual register and store it to the TOC save area.
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
+    // TOC save area offset.
+    SDValue PtrOff = DAG.getIntPtrConstant(40);
+    SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
+    Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
+                         false, false, 0);
+  }
+
+  // On Darwin, R12 must contain the address of an indirect callee.  This does
+  // not mean the MTCTR instruction must use R12; it's easier to model this as
+  // an extra parameter, so do that.
+  if (!isTailCall &&
+      !dyn_cast<GlobalAddressSDNode>(Callee) &&
+      !dyn_cast<ExternalSymbolSDNode>(Callee) &&
+      !isBLACompatibleAddress(Callee, DAG))
+    RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 :
+                                                   PPC::R12), Callee));
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain
+  // and flag operands which copy the outgoing args into the appropriate regs.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  if (isTailCall)
+    PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp,
+                    FPOp, true, TailCallArguments);
+
+  return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG,
+                    RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
+                    Ins, InVals);
+}
+
+SDValue
+PPCTargetLowering::LowerReturn(SDValue Chain,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               DebugLoc dl, SelectionDAG &DAG) const {
+
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeReturn(Outs, RetCC_PPC);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             OutVals[i], Flag);
+    Flag = Chain.getValue(1);
+  }
+
+  if (Flag.getNode())
+    return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+  else
+    return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+                                   const PPCSubtarget &Subtarget) const {
+  // When we pop the dynamic allocation we need to restore the SP link.
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Get the corect type for pointers.
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+  // Construct the stack pointer operand.
+  bool isPPC64 = Subtarget.isPPC64();
+  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
+  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
+
+  // Get the operands for the STACKRESTORE.
+  SDValue Chain = Op.getOperand(0);
+  SDValue SaveSP = Op.getOperand(1);
+
+  // Load the old link SP.
+  SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr,
+                                   MachinePointerInfo(),
+                                   false, false, 0);
+
+  // Restore the stack pointer.
+  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
+
+  // Store the old link SP.
+  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(),
+                      false, false, 0);
+}
+
+
+
+SDValue
+PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  bool isPPC64 = PPCSubTarget.isPPC64();
+  bool isDarwinABI = PPCSubTarget.isDarwinABI();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+  // Get current frame pointer save index.  The users of this index will be
+  // primarily DYNALLOC instructions.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  int RASI = FI->getReturnAddrSaveIndex();
+
+  // If the frame pointer save index hasn't been defined yet.
+  if (!RASI) {
+    // Find out what the fix offset of the frame pointer save area.
+    int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
+    // Allocate the frame index for frame pointer save area.
+    RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
+    // Save the result.
+    FI->setReturnAddrSaveIndex(RASI);
+  }
+  return DAG.getFrameIndex(RASI, PtrVT);
+}
+
+SDValue
+PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  bool isPPC64 = PPCSubTarget.isPPC64();
+  bool isDarwinABI = PPCSubTarget.isDarwinABI();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+
+  // Get current frame pointer save index.  The users of this index will be
+  // primarily DYNALLOC instructions.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  int FPSI = FI->getFramePointerSaveIndex();
+
+  // If the frame pointer save index hasn't been defined yet.
+  if (!FPSI) {
+    // Find out what the fix offset of the frame pointer save area.
+    int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64,
+                                                           isDarwinABI);
+
+    // Allocate the frame index for frame pointer save area.
+    FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+    // Save the result.
+    FI->setFramePointerSaveIndex(FPSI);
+  }
+  return DAG.getFrameIndex(FPSI, PtrVT);
+}
+
+SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+                                         SelectionDAG &DAG,
+                                         const PPCSubtarget &Subtarget) const {
+  // Get the inputs.
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size  = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Get the corect type for pointers.
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  // Negate the size.
+  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
+                                  DAG.getConstant(0, PtrVT), Size);
+  // Construct a node for the frame pointer save index.
+  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
+  // Build a DYNALLOC node.
+  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
+  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
+  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3);
+}
+
+/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
+/// possible.
+SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+  // Not FP? Not a fsel.
+  if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
+      !Op.getOperand(2).getValueType().isFloatingPoint())
+    return Op;
+
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+
+  // Cannot handle SETEQ/SETNE.
+  if (CC == ISD::SETEQ || CC == ISD::SETNE) return Op;
+
+  EVT ResVT = Op.getValueType();
+  EVT CmpVT = Op.getOperand(0).getValueType();
+  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+  SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // If the RHS of the comparison is a 0.0, we don't need to do the
+  // subtraction at all.
+  if (isFloatingPointZero(RHS))
+    switch (CC) {
+    default: break;       // SETUO etc aren't handled by fsel.
+    case ISD::SETULT:
+    case ISD::SETLT:
+      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
+    case ISD::SETOGE:
+    case ISD::SETGE:
+      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
+        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+      return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
+    case ISD::SETUGT:
+    case ISD::SETGT:
+      std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
+    case ISD::SETOLE:
+    case ISD::SETLE:
+      if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
+        LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
+      return DAG.getNode(PPCISD::FSEL, dl, ResVT,
+                         DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
+    }
+
+  SDValue Cmp;
+  switch (CC) {
+  default: break;       // SETUO etc aren't handled by fsel.
+  case ISD::SETULT:
+  case ISD::SETLT:
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
+      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+  case ISD::SETOGE:
+  case ISD::SETGE:
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS);
+    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
+      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+  case ISD::SETUGT:
+  case ISD::SETGT:
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
+      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
+  case ISD::SETOLE:
+  case ISD::SETLE:
+    Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS);
+    if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
+      Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
+      return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
+  }
+  return Op;
+}
+
+// FIXME: Split this code up when LegalizeDAGTypes lands.
+SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+                                           DebugLoc dl) const {
+  assert(Op.getOperand(0).getValueType().isFloatingPoint());
+  SDValue Src = Op.getOperand(0);
+  if (Src.getValueType() == MVT::f32)
+    Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
+
+  SDValue Tmp;
+  switch (Op.getValueType().getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
+  case MVT::i32:
+    Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
+                                                         PPCISD::FCTIDZ,
+                      dl, MVT::f64, Src);
+    break;
+  case MVT::i64:
+    Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src);
+    break;
+  }
+
+  // Convert the FP value to an int value through memory.
+  SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64);
+
+  // Emit a store to the stack slot.
+  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr,
+                               MachinePointerInfo(), false, false, 0);
+
+  // Result is a load from the stack slot.  If loading 4 bytes, make sure to
+  // add in a bias.
+  if (Op.getValueType() == MVT::i32)
+    FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
+                        DAG.getConstant(4, FIPtr.getValueType()));
+  return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(),
+                     false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
+  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
+    return SDValue();
+
+  if (Op.getOperand(0).getValueType() == MVT::i64) {
+    SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0));
+    SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Bits);
+    if (Op.getValueType() == MVT::f32)
+      FP = DAG.getNode(ISD::FP_ROUND, dl,
+                       MVT::f32, FP, DAG.getIntPtrConstant(0));
+    return FP;
+  }
+
+  assert(Op.getOperand(0).getValueType() == MVT::i32 &&
+         "Unhandled SINT_TO_FP type in custom expander!");
+  // Since we only generate this in 64-bit mode, we can take advantage of
+  // 64-bit registers.  In particular, sign extend the input value into the
+  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
+  // then lfd it and fcfid it.
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+  int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+  SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, dl, MVT::i32,
+                                Op.getOperand(0));
+
+  // STD the extended value into the stack slot.
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx),
+                            MachineMemOperand::MOStore, 8, 8);
+  SDValue Ops[] = { DAG.getEntryNode(), Ext64, FIdx };
+  SDValue Store =
+    DAG.getMemIntrinsicNode(PPCISD::STD_32, dl, DAG.getVTList(MVT::Other),
+                            Ops, 4, MVT::i64, MMO);
+  // Load the value as a double.
+  SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(),
+                           false, false, 0);
+
+  // FCFID it and return it.
+  SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld);
+  if (Op.getValueType() == MVT::f32)
+    FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0));
+  return FP;
+}
+
+SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  /*
+   The rounding mode is in bits 30:31 of FPSR, and has the following
+   settings:
+     00 Round to nearest
+     01 Round to 0
+     10 Round to +inf
+     11 Round to -inf
+
+  FLT_ROUNDS, on the other hand, expects the following:
+    -1 Undefined
+     0 Round to 0
+     1 Round to nearest
+     2 Round to +inf
+     3 Round to -inf
+
+  To perform the conversion, we do:
+    ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
+  */
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  EVT VT = Op.getValueType();
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  std::vector<EVT> NodeTys;
+  SDValue MFFSreg, InFlag;
+
+  // Save FP Control Word to register
+  NodeTys.push_back(MVT::f64);    // return register
+  NodeTys.push_back(MVT::Glue);   // unused in this context
+  SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+
+  // Save FP register to stack slot
+  int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
+  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
+                               StackSlot, MachinePointerInfo(), false, false,0);
+
+  // Load FP Control Word from low 32 bits of stack slot.
+  SDValue Four = DAG.getConstant(4, PtrVT);
+  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
+  SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(),
+                            false, false, 0);
+
+  // Transform as necessary
+  SDValue CWD1 =
+    DAG.getNode(ISD::AND, dl, MVT::i32,
+                CWD, DAG.getConstant(3, MVT::i32));
+  SDValue CWD2 =
+    DAG.getNode(ISD::SRL, dl, MVT::i32,
+                DAG.getNode(ISD::AND, dl, MVT::i32,
+                            DAG.getNode(ISD::XOR, dl, MVT::i32,
+                                        CWD, DAG.getConstant(3, MVT::i32)),
+                            DAG.getConstant(3, MVT::i32)),
+                DAG.getConstant(1, MVT::i32));
+
+  SDValue RetVal =
+    DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
+
+  return DAG.getNode((VT.getSizeInBits() < 16 ?
+                      ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal);
+}
+
+SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  unsigned BitWidth = VT.getSizeInBits();
+  DebugLoc dl = Op.getDebugLoc();
+  assert(Op.getNumOperands() == 3 &&
+         VT == Op.getOperand(1).getValueType() &&
+         "Unexpected SHL!");
+
+  // Expand into a bunch of logical ops.  Note that these ops
+  // depend on the PPC behavior for oversized shift amounts.
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+  SDValue Amt = Op.getOperand(2);
+  EVT AmtVT = Amt.getValueType();
+
+  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+                             DAG.getConstant(BitWidth, AmtVT), Amt);
+  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
+  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
+  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
+  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+                             DAG.getConstant(-BitWidth, AmtVT));
+  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
+  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
+  SDValue OutOps[] = { OutLo, OutHi };
+  return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned BitWidth = VT.getSizeInBits();
+  assert(Op.getNumOperands() == 3 &&
+         VT == Op.getOperand(1).getValueType() &&
+         "Unexpected SRL!");
+
+  // Expand into a bunch of logical ops.  Note that these ops
+  // depend on the PPC behavior for oversized shift amounts.
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+  SDValue Amt = Op.getOperand(2);
+  EVT AmtVT = Amt.getValueType();
+
+  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+                             DAG.getConstant(BitWidth, AmtVT), Amt);
+  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+                             DAG.getConstant(-BitWidth, AmtVT));
+  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
+  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
+  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
+  SDValue OutOps[] = { OutLo, OutHi };
+  return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  unsigned BitWidth = VT.getSizeInBits();
+  assert(Op.getNumOperands() == 3 &&
+         VT == Op.getOperand(1).getValueType() &&
+         "Unexpected SRA!");
+
+  // Expand into a bunch of logical ops, followed by a select_cc.
+  SDValue Lo = Op.getOperand(0);
+  SDValue Hi = Op.getOperand(1);
+  SDValue Amt = Op.getOperand(2);
+  EVT AmtVT = Amt.getValueType();
+
+  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
+                             DAG.getConstant(BitWidth, AmtVT), Amt);
+  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
+  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
+  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
+                             DAG.getConstant(-BitWidth, AmtVT));
+  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
+  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
+  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT),
+                                  Tmp4, Tmp6, ISD::SETLE);
+  SDValue OutOps[] = { OutLo, OutHi };
+  return DAG.getMergeValues(OutOps, 2, dl);
+}
+
+//===----------------------------------------------------------------------===//
+// Vector related lowering.
+//
+
+/// BuildSplatI - Build a canonical splati of Val with an element size of
+/// SplatSize.  Cast the result to VT.
+static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
+                             SelectionDAG &DAG, DebugLoc dl) {
+  assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
+
+  static const EVT VTys[] = { // canonical VT to use for each size.
+    MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
+  };
+
+  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
+
+  // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
+  if (Val == -1)
+    SplatSize = 1;
+
+  EVT CanonicalVT = VTys[SplatSize-1];
+
+  // Build a canonical splat for this value.
+  SDValue Elt = DAG.getConstant(Val, MVT::i32);
+  SmallVector<SDValue, 8> Ops;
+  Ops.assign(CanonicalVT.getVectorNumElements(), Elt);
+  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT,
+                              &Ops[0], Ops.size());
+  return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
+}
+
+/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
+                                SelectionDAG &DAG, DebugLoc dl,
+                                EVT DestVT = MVT::Other) {
+  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+                     DAG.getConstant(IID, MVT::i32), LHS, RHS);
+}
+
+/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
+                                SDValue Op2, SelectionDAG &DAG,
+                                DebugLoc dl, EVT DestVT = MVT::Other) {
+  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+                     DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
+}
+
+
+/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
+/// amount.  The result has the specified value type.
+static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
+                             EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+  // Force LHS/RHS to be the right type.
+  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
+  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
+
+  int Ops[16];
+  for (unsigned i = 0; i != 16; ++i)
+    Ops[i] = i + Amt;
+  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
+  return DAG.getNode(ISD::BITCAST, dl, VT, T);
+}
+
+// If this is a case we can't handle, return null and let the default
+// expansion code take care of it.  If we CAN select this case, and if it
+// selects to a single instruction, return Op.  Otherwise, if we can codegen
+// this case more efficiently than a constant pool load, lower it to the
+// sequence of ops that should be used.
+SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
+  assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
+
+  // Check if this is a splat of a constant value.
+  APInt APSplatBits, APSplatUndef;
+  unsigned SplatBitSize;
+  bool HasAnyUndefs;
+  if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
+                             HasAnyUndefs, 0, true) || SplatBitSize > 32)
+    return SDValue();
+
+  unsigned SplatBits = APSplatBits.getZExtValue();
+  unsigned SplatUndef = APSplatUndef.getZExtValue();
+  unsigned SplatSize = SplatBitSize / 8;
+
+  // First, handle single instruction cases.
+
+  // All zeros?
+  if (SplatBits == 0) {
+    // Canonicalize all zero vectors to be v4i32.
+    if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
+      SDValue Z = DAG.getConstant(0, MVT::i32);
+      Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z);
+      Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
+    }
+    return Op;
+  }
+
+  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
+  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
+                    (32-SplatBitSize));
+  if (SextVal >= -16 && SextVal <= 15)
+    return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl);
+
+
+  // Two instruction sequences.
+
+  // If this value is in the range [-32,30] and is even, use:
+  //    tmp = VSPLTI[bhw], result = add tmp, tmp
+  if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
+    SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG, dl);
+    Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
+    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+  }
+
+  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
+  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
+  // for fneg/fabs.
+  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
+    // Make -1 and vspltisw -1:
+    SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl);
+
+    // Make the VSLW intrinsic, computing 0x8000_0000.
+    SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
+                                   OnesV, DAG, dl);
+
+    // xor by OnesV to invert it.
+    Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
+    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+  }
+
+  // Check to see if this is a wide variety of vsplti*, binop self cases.
+  static const signed char SplatCsts[] = {
+    -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
+    -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
+  };
+
+  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
+    // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
+    // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
+    int i = SplatCsts[idx];
+
+    // Figure out what shift amount will be used by altivec if shifted by i in
+    // this splat size.
+    unsigned TypeShiftAmt = i & (SplatBitSize-1);
+
+    // vsplti + shl self.
+    if (SextVal == (i << (int)TypeShiftAmt)) {
+      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+      static const unsigned IIDs[] = { // Intrinsic to use for each size.
+        Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
+        Intrinsic::ppc_altivec_vslw
+      };
+      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+    }
+
+    // vsplti + srl self.
+    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+      static const unsigned IIDs[] = { // Intrinsic to use for each size.
+        Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
+        Intrinsic::ppc_altivec_vsrw
+      };
+      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+    }
+
+    // vsplti + sra self.
+    if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
+      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+      static const unsigned IIDs[] = { // Intrinsic to use for each size.
+        Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0,
+        Intrinsic::ppc_altivec_vsraw
+      };
+      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+    }
+
+    // vsplti + rol self.
+    if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
+                         ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
+      SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl);
+      static const unsigned IIDs[] = { // Intrinsic to use for each size.
+        Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
+        Intrinsic::ppc_altivec_vrlw
+      };
+      Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
+      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
+    }
+
+    // t = vsplti c, result = vsldoi t, t, 1
+    if (SextVal == ((i << 8) | (i < 0 ? 0xFF : 0))) {
+      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+      return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl);
+    }
+    // t = vsplti c, result = vsldoi t, t, 2
+    if (SextVal == ((i << 16) | (i < 0 ? 0xFFFF : 0))) {
+      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+      return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl);
+    }
+    // t = vsplti c, result = vsldoi t, t, 3
+    if (SextVal == ((i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
+      SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl);
+      return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl);
+    }
+  }
+
+  // Three instruction sequences.
+
+  // Odd, in range [17,31]:  (vsplti C)-(vsplti -16).
+  if (SextVal >= 0 && SextVal <= 31) {
+    SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG, dl);
+    SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
+    LHS = DAG.getNode(ISD::SUB, dl, LHS.getValueType(), LHS, RHS);
+    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
+  }
+  // Odd, in range [-31,-17]:  (vsplti C)+(vsplti -16).
+  if (SextVal >= -31 && SextVal <= 0) {
+    SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG, dl);
+    SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG, dl);
+    LHS = DAG.getNode(ISD::ADD, dl, LHS.getValueType(), LHS, RHS);
+    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), LHS);
+  }
+
+  return SDValue();
+}
+
+/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
+/// the specified operations to build the shuffle.
+static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
+                                      SDValue RHS, SelectionDAG &DAG,
+                                      DebugLoc dl) {
+  unsigned OpNum = (PFEntry >> 26) & 0x0F;
+  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
+  unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
+
+  enum {
+    OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+    OP_VMRGHW,
+    OP_VMRGLW,
+    OP_VSPLTISW0,
+    OP_VSPLTISW1,
+    OP_VSPLTISW2,
+    OP_VSPLTISW3,
+    OP_VSLDOI4,
+    OP_VSLDOI8,
+    OP_VSLDOI12
+  };
+
+  if (OpNum == OP_COPY) {
+    if (LHSID == (1*9+2)*9+3) return LHS;
+    assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
+    return RHS;
+  }
+
+  SDValue OpLHS, OpRHS;
+  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
+  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
+
+  int ShufIdxs[16];
+  switch (OpNum) {
+  default: llvm_unreachable("Unknown i32 permute!");
+  case OP_VMRGHW:
+    ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
+    ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
+    ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
+    ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
+    break;
+  case OP_VMRGLW:
+    ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
+    ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
+    ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
+    ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
+    break;
+  case OP_VSPLTISW0:
+    for (unsigned i = 0; i != 16; ++i)
+      ShufIdxs[i] = (i&3)+0;
+    break;
+  case OP_VSPLTISW1:
+    for (unsigned i = 0; i != 16; ++i)
+      ShufIdxs[i] = (i&3)+4;
+    break;
+  case OP_VSPLTISW2:
+    for (unsigned i = 0; i != 16; ++i)
+      ShufIdxs[i] = (i&3)+8;
+    break;
+  case OP_VSPLTISW3:
+    for (unsigned i = 0; i != 16; ++i)
+      ShufIdxs[i] = (i&3)+12;
+    break;
+  case OP_VSLDOI4:
+    return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
+  case OP_VSLDOI8:
+    return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
+  case OP_VSLDOI12:
+    return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
+  }
+  EVT VT = OpLHS.getValueType();
+  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
+  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
+  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
+  return DAG.getNode(ISD::BITCAST, dl, VT, T);
+}
+
+/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
+/// is a shuffle we can handle in a single instruction, return it.  Otherwise,
+/// return the code it can be lowered into.  Worst case, it can always be
+/// lowered into a vperm.
+SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  EVT VT = Op.getValueType();
+
+  // Cases that are handled by instructions that take permute immediates
+  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
+  // selected by the instruction selector.
+  if (V2.getOpcode() == ISD::UNDEF) {
+    if (PPC::isSplatShuffleMask(SVOp, 1) ||
+        PPC::isSplatShuffleMask(SVOp, 2) ||
+        PPC::isSplatShuffleMask(SVOp, 4) ||
+        PPC::isVPKUWUMShuffleMask(SVOp, true) ||
+        PPC::isVPKUHUMShuffleMask(SVOp, true) ||
+        PPC::isVSLDOIShuffleMask(SVOp, true) != -1 ||
+        PPC::isVMRGLShuffleMask(SVOp, 1, true) ||
+        PPC::isVMRGLShuffleMask(SVOp, 2, true) ||
+        PPC::isVMRGLShuffleMask(SVOp, 4, true) ||
+        PPC::isVMRGHShuffleMask(SVOp, 1, true) ||
+        PPC::isVMRGHShuffleMask(SVOp, 2, true) ||
+        PPC::isVMRGHShuffleMask(SVOp, 4, true)) {
+      return Op;
+    }
+  }
+
+  // Altivec has a variety of "shuffle immediates" that take two vector inputs
+  // and produce a fixed permutation.  If any of these match, do not lower to
+  // VPERM.
+  if (PPC::isVPKUWUMShuffleMask(SVOp, false) ||
+      PPC::isVPKUHUMShuffleMask(SVOp, false) ||
+      PPC::isVSLDOIShuffleMask(SVOp, false) != -1 ||
+      PPC::isVMRGLShuffleMask(SVOp, 1, false) ||
+      PPC::isVMRGLShuffleMask(SVOp, 2, false) ||
+      PPC::isVMRGLShuffleMask(SVOp, 4, false) ||
+      PPC::isVMRGHShuffleMask(SVOp, 1, false) ||
+      PPC::isVMRGHShuffleMask(SVOp, 2, false) ||
+      PPC::isVMRGHShuffleMask(SVOp, 4, false))
+    return Op;
+
+  // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
+  // perfect shuffle table to emit an optimal matching sequence.
+  SmallVector<int, 16> PermMask;
+  SVOp->getMask(PermMask);
+
+  unsigned PFIndexes[4];
+  bool isFourElementShuffle = true;
+  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
+    unsigned EltNo = 8;   // Start out undef.
+    for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
+      if (PermMask[i*4+j] < 0)
+        continue;   // Undef, ignore it.
+
+      unsigned ByteSource = PermMask[i*4+j];
+      if ((ByteSource & 3) != j) {
+        isFourElementShuffle = false;
+        break;
+      }
+
+      if (EltNo == 8) {
+        EltNo = ByteSource/4;
+      } else if (EltNo != ByteSource/4) {
+        isFourElementShuffle = false;
+        break;
+      }
+    }
+    PFIndexes[i] = EltNo;
+  }
+
+  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
+  // perfect shuffle vector to determine if it is cost effective to do this as
+  // discrete instructions, or whether we should use a vperm.
+  if (isFourElementShuffle) {
+    // Compute the index in the perfect shuffle table.
+    unsigned PFTableIndex =
+      PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
+
+    unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
+    unsigned Cost  = (PFEntry >> 30);
+
+    // Determining when to avoid vperm is tricky.  Many things affect the cost
+    // of vperm, particularly how many times the perm mask needs to be computed.
+    // For example, if the perm mask can be hoisted out of a loop or is already
+    // used (perhaps because there are multiple permutes with the same shuffle
+    // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
+    // the loop requires an extra register.
+    //
+    // As a compromise, we only emit discrete instructions if the shuffle can be
+    // generated in 3 or fewer operations.  When we have loop information
+    // available, if this block is within a loop, we should avoid using vperm
+    // for 3-operation perms and use a constant pool load instead.
+    if (Cost < 3)
+      return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
+  }
+
+  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
+  // vector that will get spilled to the constant pool.
+  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
+
+  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
+  // that it is in input element units, not in bytes.  Convert now.
+  EVT EltVT = V1.getValueType().getVectorElementType();
+  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
+
+  SmallVector<SDValue, 16> ResultMask;
+  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+    unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
+
+    for (unsigned j = 0; j != BytesPerElement; ++j)
+      ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
+                                           MVT::i32));
+  }
+
+  SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
+                                    &ResultMask[0], ResultMask.size());
+  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask);
+}
+
+/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
+/// altivec comparison.  If it is, return true and fill in Opc/isDot with
+/// information about the intrinsic.
+static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc,
+                                  bool &isDot) {
+  unsigned IntrinsicID =
+    cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
+  CompareOpc = -1;
+  isDot = false;
+  switch (IntrinsicID) {
+  default: return false;
+    // Comparison predicates.
+  case Intrinsic::ppc_altivec_vcmpbfp_p:  CompareOpc = 966; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc =   6; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc =  70; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break;
+  case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break;
+
+    // Normal Comparisons.
+  case Intrinsic::ppc_altivec_vcmpbfp:    CompareOpc = 966; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpeqfp:   CompareOpc = 198; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpequb:   CompareOpc =   6; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpequh:   CompareOpc =  70; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpequw:   CompareOpc = 134; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtsh:   CompareOpc = 838; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtsw:   CompareOpc = 902; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtub:   CompareOpc = 518; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtuh:   CompareOpc = 582; isDot = 0; break;
+  case Intrinsic::ppc_altivec_vcmpgtuw:   CompareOpc = 646; isDot = 0; break;
+  }
+  return true;
+}
+
+/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
+/// lower, do it, otherwise return null.
+SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  // If this is a lowered altivec predicate compare, CompareOpc is set to the
+  // opcode number of the comparison.
+  DebugLoc dl = Op.getDebugLoc();
+  int CompareOpc;
+  bool isDot;
+  if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
+    return SDValue();    // Don't custom lower most intrinsics.
+
+  // If this is a non-dot comparison, make the VCMP node and we are done.
+  if (!isDot) {
+    SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
+                              Op.getOperand(1), Op.getOperand(2),
+                              DAG.getConstant(CompareOpc, MVT::i32));
+    return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
+  }
+
+  // Create the PPCISD altivec 'dot' comparison node.
+  SDValue Ops[] = {
+    Op.getOperand(2),  // LHS
+    Op.getOperand(3),  // RHS
+    DAG.getConstant(CompareOpc, MVT::i32)
+  };
+  std::vector<EVT> VTs;
+  VTs.push_back(Op.getOperand(2).getValueType());
+  VTs.push_back(MVT::Glue);
+  SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+
+  // Now that we have the comparison, emit a copy from the CR to a GPR.
+  // This is flagged to the above dot comparison.
+  SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
+                                DAG.getRegister(PPC::CR6, MVT::i32),
+                                CompNode.getValue(1));
+
+  // Unpack the result based on how the target uses it.
+  unsigned BitNo;   // Bit # of CR6.
+  bool InvertBit;   // Invert result?
+  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
+  default:  // Can't happen, don't crash on invalid number though.
+  case 0:   // Return the value of the EQ bit of CR6.
+    BitNo = 0; InvertBit = false;
+    break;
+  case 1:   // Return the inverted value of the EQ bit of CR6.
+    BitNo = 0; InvertBit = true;
+    break;
+  case 2:   // Return the value of the LT bit of CR6.
+    BitNo = 2; InvertBit = false;
+    break;
+  case 3:   // Return the inverted value of the LT bit of CR6.
+    BitNo = 2; InvertBit = true;
+    break;
+  }
+
+  // Shift the bit into the low position.
+  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
+                      DAG.getConstant(8-(3-BitNo), MVT::i32));
+  // Isolate the bit.
+  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
+                      DAG.getConstant(1, MVT::i32));
+
+  // If we are supposed to, toggle the bit.
+  if (InvertBit)
+    Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
+                        DAG.getConstant(1, MVT::i32));
+  return Flags;
+}
+
+SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  // Create a stack slot that is 16-byte aligned.
+  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+  EVT PtrVT = getPointerTy();
+  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
+
+  // Store the input value into Value#0 of the stack slot.
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+                               Op.getOperand(0), FIdx, MachinePointerInfo(),
+                               false, false, 0);
+  // Load it out.
+  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(),
+                     false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  if (Op.getValueType() == MVT::v4i32) {
+    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+    SDValue Zero  = BuildSplatI(  0, 1, MVT::v4i32, DAG, dl);
+    SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt.
+
+    SDValue RHSSwap =   // = vrlw RHS, 16
+      BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
+
+    // Shrinkify inputs to v8i16.
+    LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
+    RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
+    RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
+
+    // Low parts multiplied together, generating 32-bit results (we ignore the
+    // top parts).
+    SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
+                                        LHS, RHS, DAG, dl, MVT::v4i32);
+
+    SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
+                                      LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
+    // Shift the high parts up 16 bits.
+    HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
+                              Neg16, DAG, dl);
+    return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
+  } else if (Op.getValueType() == MVT::v8i16) {
+    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+    SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl);
+
+    return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm,
+                            LHS, RHS, Zero, DAG, dl);
+  } else if (Op.getValueType() == MVT::v16i8) {
+    SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
+
+    // Multiply the even 8-bit parts, producing 16-bit sums.
+    SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
+                                           LHS, RHS, DAG, dl, MVT::v8i16);
+    EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
+
+    // Multiply the odd 8-bit parts, producing 16-bit sums.
+    SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
+                                          LHS, RHS, DAG, dl, MVT::v8i16);
+    OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
+
+    // Merge the results together.
+    int Ops[16];
+    for (unsigned i = 0; i != 8; ++i) {
+      Ops[i*2  ] = 2*i+1;
+      Ops[i*2+1] = 2*i+1+16;
+    }
+    return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
+  } else {
+    llvm_unreachable("Unknown mul to lower!");
+  }
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
+  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
+  case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
+  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+  case ISD::GlobalTLSAddress:   llvm_unreachable("TLS not implemented for PPC");
+  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
+  case ISD::SETCC:              return LowerSETCC(Op, DAG);
+  case ISD::TRAMPOLINE:         return LowerTRAMPOLINE(Op, DAG);
+  case ISD::VASTART:
+    return LowerVASTART(Op, DAG, PPCSubTarget);
+
+  case ISD::VAARG:
+    return LowerVAARG(Op, DAG, PPCSubTarget);
+
+  case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
+  case ISD::DYNAMIC_STACKALLOC:
+    return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
+
+  case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
+  case ISD::FP_TO_UINT:
+  case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG,
+                                                       Op.getDebugLoc());
+  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
+  case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
+
+  // Lower 64-bit shifts.
+  case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
+  case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
+  case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
+
+  // Vector-related lowering.
+  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
+  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
+  case ISD::MUL:                return LowerMUL(Op, DAG);
+
+  // Frame & Return address.
+  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
+  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
+  }
+  return SDValue();
+}
+
+void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
+                                           SmallVectorImpl<SDValue>&Results,
+                                           SelectionDAG &DAG) const {
+  DebugLoc dl = N->getDebugLoc();
+  switch (N->getOpcode()) {
+  default:
+    assert(false && "Do not know how to custom type legalize this operation!");
+    return;
+  case ISD::FP_ROUND_INREG: {
+    assert(N->getValueType(0) == MVT::ppcf128);
+    assert(N->getOperand(0).getValueType() == MVT::ppcf128);
+    SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+                             MVT::f64, N->getOperand(0),
+                             DAG.getIntPtrConstant(0));
+    SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
+                             MVT::f64, N->getOperand(0),
+                             DAG.getIntPtrConstant(1));
+
+    // This sequence changes FPSCR to do round-to-zero, adds the two halves
+    // of the long double, and puts FPSCR back the way it was.  We do not
+    // actually model FPSCR.
+    std::vector<EVT> NodeTys;
+    SDValue Ops[4], Result, MFFSreg, InFlag, FPreg;
+
+    NodeTys.push_back(MVT::f64);   // Return register
+    NodeTys.push_back(MVT::Glue);    // Returns a flag for later insns
+    Result = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
+    MFFSreg = Result.getValue(0);
+    InFlag = Result.getValue(1);
+
+    NodeTys.clear();
+    NodeTys.push_back(MVT::Glue);   // Returns a flag
+    Ops[0] = DAG.getConstant(31, MVT::i32);
+    Ops[1] = InFlag;
+    Result = DAG.getNode(PPCISD::MTFSB1, dl, NodeTys, Ops, 2);
+    InFlag = Result.getValue(0);
+
+    NodeTys.clear();
+    NodeTys.push_back(MVT::Glue);   // Returns a flag
+    Ops[0] = DAG.getConstant(30, MVT::i32);
+    Ops[1] = InFlag;
+    Result = DAG.getNode(PPCISD::MTFSB0, dl, NodeTys, Ops, 2);
+    InFlag = Result.getValue(0);
+
+    NodeTys.clear();
+    NodeTys.push_back(MVT::f64);    // result of add
+    NodeTys.push_back(MVT::Glue);   // Returns a flag
+    Ops[0] = Lo;
+    Ops[1] = Hi;
+    Ops[2] = InFlag;
+    Result = DAG.getNode(PPCISD::FADDRTZ, dl, NodeTys, Ops, 3);
+    FPreg = Result.getValue(0);
+    InFlag = Result.getValue(1);
+
+    NodeTys.clear();
+    NodeTys.push_back(MVT::f64);
+    Ops[0] = DAG.getConstant(1, MVT::i32);
+    Ops[1] = MFFSreg;
+    Ops[2] = FPreg;
+    Ops[3] = InFlag;
+    Result = DAG.getNode(PPCISD::MTFSF, dl, NodeTys, Ops, 4);
+    FPreg = Result.getValue(0);
+
+    // We know the low half is about to be thrown away, so just use something
+    // convenient.
+    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
+                                FPreg, FPreg));
+    return;
+  }
+  case ISD::FP_TO_SINT:
+    Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
+    return;
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+                                    bool is64bit, unsigned BinOpcode) const {
+  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction *F = BB->getParent();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  unsigned dest = MI->getOperand(0).getReg();
+  unsigned ptrA = MI->getOperand(1).getReg();
+  unsigned ptrB = MI->getOperand(2).getReg();
+  unsigned incr = MI->getOperand(3).getReg();
+  DebugLoc dl = MI->getDebugLoc();
+
+  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, loopMBB);
+  F->insert(It, exitMBB);
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  MachineRegisterInfo &RegInfo = F->getRegInfo();
+  unsigned TmpReg = (!BinOpcode) ? incr :
+    RegInfo.createVirtualRegister(
+       is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+                 (const TargetRegisterClass *) &PPC::GPRCRegClass);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loopMBB
+  BB->addSuccessor(loopMBB);
+
+  //  loopMBB:
+  //   l[wd]arx dest, ptr
+  //   add r0, dest, incr
+  //   st[wd]cx. r0, ptr
+  //   bne- loopMBB
+  //   fallthrough --> exitMBB
+  BB = loopMBB;
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+    .addReg(ptrA).addReg(ptrB);
+  if (BinOpcode)
+    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+    .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
+  BuildMI(BB, dl, TII->get(PPC::BCC))
+    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+  BB->addSuccessor(loopMBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+  return BB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
+                                            MachineBasicBlock *BB,
+                                            bool is8bit,    // operation
+                                            unsigned BinOpcode) const {
+  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  // In 64 bit mode we have to use 64 bits for addresses, even though the
+  // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
+  // registers without caring whether they're 32 or 64, but here we're
+  // doing actual arithmetic on the addresses.
+  bool is64bit = PPCSubTarget.isPPC64();
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction *F = BB->getParent();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  unsigned dest = MI->getOperand(0).getReg();
+  unsigned ptrA = MI->getOperand(1).getReg();
+  unsigned ptrB = MI->getOperand(2).getReg();
+  unsigned incr = MI->getOperand(3).getReg();
+  DebugLoc dl = MI->getDebugLoc();
+
+  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, loopMBB);
+  F->insert(It, exitMBB);
+  exitMBB->splice(exitMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  MachineRegisterInfo &RegInfo = F->getRegInfo();
+  const TargetRegisterClass *RC =
+    is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+              (const TargetRegisterClass *) &PPC::GPRCRegClass;
+  unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+  unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+  unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+  unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
+  unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+  unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+  unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC);
+  unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+  unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+  unsigned Ptr1Reg;
+  unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loopMBB
+  BB->addSuccessor(loopMBB);
+
+  // The 4-byte load must be aligned, while a char or short may be
+  // anywhere in the word.  Hence all this nasty bookkeeping code.
+  //   add ptr1, ptrA, ptrB [copy if ptrA==0]
+  //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+  //   xori shift, shift1, 24 [16]
+  //   rlwinm ptr, ptr1, 0, 0, 29
+  //   slw incr2, incr, shift
+  //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+  //   slw mask, mask2, shift
+  //  loopMBB:
+  //   lwarx tmpDest, ptr
+  //   add tmp, tmpDest, incr2
+  //   andc tmp2, tmpDest, mask
+  //   and tmp3, tmp, mask
+  //   or tmp4, tmp3, tmp2
+  //   stwcx. tmp4, ptr
+  //   bne- loopMBB
+  //   fallthrough --> exitMBB
+  //   srw dest, tmpDest, shift
+
+  if (ptrA!=PPC::R0) {
+    Ptr1Reg = RegInfo.createVirtualRegister(RC);
+    BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+      .addReg(ptrA).addReg(ptrB);
+  } else {
+    Ptr1Reg = ptrB;
+  }
+  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+      .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+      .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+  if (is64bit)
+    BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+      .addReg(Ptr1Reg).addImm(0).addImm(61);
+  else
+    BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+      .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg)
+      .addReg(incr).addReg(ShiftReg);
+  if (is8bit)
+    BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+  else {
+    BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+    BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535);
+  }
+  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+      .addReg(Mask2Reg).addReg(ShiftReg);
+
+  BB = loopMBB;
+  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+    .addReg(PPC::R0).addReg(PtrReg);
+  if (BinOpcode)
+    BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
+      .addReg(Incr2Reg).addReg(TmpDestReg);
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg)
+    .addReg(TmpDestReg).addReg(MaskReg);
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg)
+    .addReg(TmpReg).addReg(MaskReg);
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
+    .addReg(Tmp3Reg).addReg(Tmp2Reg);
+  BuildMI(BB, dl, TII->get(PPC::STWCX))
+    .addReg(Tmp4Reg).addReg(PPC::R0).addReg(PtrReg);
+  BuildMI(BB, dl, TII->get(PPC::BCC))
+    .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
+  BB->addSuccessor(loopMBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+  BuildMI(BB, dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg).addReg(ShiftReg);
+  return BB;
+}
+
+MachineBasicBlock *
+PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                               MachineBasicBlock *BB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  // To "insert" these instructions we actually have to insert their
+  // control-flow patterns.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  MachineFunction *F = BB->getParent();
+
+  if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
+      MI->getOpcode() == PPC::SELECT_CC_I8 ||
+      MI->getOpcode() == PPC::SELECT_CC_F4 ||
+      MI->getOpcode() == PPC::SELECT_CC_F8 ||
+      MI->getOpcode() == PPC::SELECT_CC_VRRC) {
+
+    // The incoming instruction knows the destination vreg to set, the
+    // condition code register to branch on, the true/false values to
+    // select between, and a branch opcode to use.
+
+    //  thisMBB:
+    //  ...
+    //   TrueVal = ...
+    //   cmpTY ccX, r1, r2
+    //   bCC copy1MBB
+    //   fallthrough --> copy0MBB
+    MachineBasicBlock *thisMBB = BB;
+    MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+    unsigned SelectPred = MI->getOperand(4).getImm();
+    DebugLoc dl = MI->getDebugLoc();
+    F->insert(It, copy0MBB);
+    F->insert(It, sinkMBB);
+
+    // Transfer the remainder of BB and its successor edges to sinkMBB.
+    sinkMBB->splice(sinkMBB->begin(), BB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    BB->end());
+    sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+    // Next, add the true and fallthrough blocks as its successors.
+    BB->addSuccessor(copy0MBB);
+    BB->addSuccessor(sinkMBB);
+
+    BuildMI(BB, dl, TII->get(PPC::BCC))
+      .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+
+    //  copy0MBB:
+    //   %FalseValue = ...
+    //   # fallthrough to sinkMBB
+    BB = copy0MBB;
+
+    // Update machine-CFG edges
+    BB->addSuccessor(sinkMBB);
+
+    //  sinkMBB:
+    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+    //  ...
+    BB = sinkMBB;
+    BuildMI(*BB, BB->begin(), dl,
+            TII->get(PPC::PHI), MI->getOperand(0).getReg())
+      .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+  }
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
+    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
+    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
+    BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
+    BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8);
+
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
+    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
+    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
+    BB = EmitAtomicBinary(MI, BB, false, PPC::AND);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
+    BB = EmitAtomicBinary(MI, BB, true, PPC::AND8);
+
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
+    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
+    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
+    BB = EmitAtomicBinary(MI, BB, false, PPC::OR);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
+    BB = EmitAtomicBinary(MI, BB, true, PPC::OR8);
+
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
+    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
+    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
+    BB = EmitAtomicBinary(MI, BB, false, PPC::XOR);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
+    BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8);
+
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
+    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
+    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
+    BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
+    BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8);
+
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
+    BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
+    BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
+    BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF);
+  else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
+    BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8);
+
+  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8)
+    BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
+  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16)
+    BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
+  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32)
+    BB = EmitAtomicBinary(MI, BB, false, 0);
+  else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64)
+    BB = EmitAtomicBinary(MI, BB, true, 0);
+
+  else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
+           MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) {
+    bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
+
+    unsigned dest   = MI->getOperand(0).getReg();
+    unsigned ptrA   = MI->getOperand(1).getReg();
+    unsigned ptrB   = MI->getOperand(2).getReg();
+    unsigned oldval = MI->getOperand(3).getReg();
+    unsigned newval = MI->getOperand(4).getReg();
+    DebugLoc dl     = MI->getDebugLoc();
+
+    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+    F->insert(It, loop1MBB);
+    F->insert(It, loop2MBB);
+    F->insert(It, midMBB);
+    F->insert(It, exitMBB);
+    exitMBB->splice(exitMBB->begin(), BB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    BB->end());
+    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+    //  thisMBB:
+    //   ...
+    //   fallthrough --> loopMBB
+    BB->addSuccessor(loop1MBB);
+
+    // loop1MBB:
+    //   l[wd]arx dest, ptr
+    //   cmp[wd] dest, oldval
+    //   bne- midMBB
+    // loop2MBB:
+    //   st[wd]cx. newval, ptr
+    //   bne- loopMBB
+    //   b exitBB
+    // midMBB:
+    //   st[wd]cx. dest, ptr
+    // exitBB:
+    BB = loop1MBB;
+    BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest)
+      .addReg(ptrA).addReg(ptrB);
+    BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
+      .addReg(oldval).addReg(dest);
+    BuildMI(BB, dl, TII->get(PPC::BCC))
+      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+    BB->addSuccessor(loop2MBB);
+    BB->addSuccessor(midMBB);
+
+    BB = loop2MBB;
+    BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+      .addReg(newval).addReg(ptrA).addReg(ptrB);
+    BuildMI(BB, dl, TII->get(PPC::BCC))
+      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+    BB->addSuccessor(loop1MBB);
+    BB->addSuccessor(exitMBB);
+
+    BB = midMBB;
+    BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
+      .addReg(dest).addReg(ptrA).addReg(ptrB);
+    BB->addSuccessor(exitMBB);
+
+    //  exitMBB:
+    //   ...
+    BB = exitMBB;
+  } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
+             MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
+    // We must use 64-bit registers for addresses when targeting 64-bit,
+    // since we're actually doing arithmetic on them.  Other registers
+    // can be 32-bit.
+    bool is64bit = PPCSubTarget.isPPC64();
+    bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
+
+    unsigned dest   = MI->getOperand(0).getReg();
+    unsigned ptrA   = MI->getOperand(1).getReg();
+    unsigned ptrB   = MI->getOperand(2).getReg();
+    unsigned oldval = MI->getOperand(3).getReg();
+    unsigned newval = MI->getOperand(4).getReg();
+    DebugLoc dl     = MI->getDebugLoc();
+
+    MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
+    MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+    F->insert(It, loop1MBB);
+    F->insert(It, loop2MBB);
+    F->insert(It, midMBB);
+    F->insert(It, exitMBB);
+    exitMBB->splice(exitMBB->begin(), BB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    BB->end());
+    exitMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+    MachineRegisterInfo &RegInfo = F->getRegInfo();
+    const TargetRegisterClass *RC =
+      is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass :
+                (const TargetRegisterClass *) &PPC::GPRCRegClass;
+    unsigned PtrReg = RegInfo.createVirtualRegister(RC);
+    unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
+    unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+    unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
+    unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
+    unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
+    unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC);
+    unsigned MaskReg = RegInfo.createVirtualRegister(RC);
+    unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
+    unsigned Mask3Reg = RegInfo.createVirtualRegister(RC);
+    unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC);
+    unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC);
+    unsigned TmpDestReg = RegInfo.createVirtualRegister(RC);
+    unsigned Ptr1Reg;
+    unsigned TmpReg = RegInfo.createVirtualRegister(RC);
+    //  thisMBB:
+    //   ...
+    //   fallthrough --> loopMBB
+    BB->addSuccessor(loop1MBB);
+
+    // The 4-byte load must be aligned, while a char or short may be
+    // anywhere in the word.  Hence all this nasty bookkeeping code.
+    //   add ptr1, ptrA, ptrB [copy if ptrA==0]
+    //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
+    //   xori shift, shift1, 24 [16]
+    //   rlwinm ptr, ptr1, 0, 0, 29
+    //   slw newval2, newval, shift
+    //   slw oldval2, oldval,shift
+    //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
+    //   slw mask, mask2, shift
+    //   and newval3, newval2, mask
+    //   and oldval3, oldval2, mask
+    // loop1MBB:
+    //   lwarx tmpDest, ptr
+    //   and tmp, tmpDest, mask
+    //   cmpw tmp, oldval3
+    //   bne- midMBB
+    // loop2MBB:
+    //   andc tmp2, tmpDest, mask
+    //   or tmp4, tmp2, newval3
+    //   stwcx. tmp4, ptr
+    //   bne- loop1MBB
+    //   b exitBB
+    // midMBB:
+    //   stwcx. tmpDest, ptr
+    // exitBB:
+    //   srw dest, tmpDest, shift
+    if (ptrA!=PPC::R0) {
+      Ptr1Reg = RegInfo.createVirtualRegister(RC);
+      BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
+        .addReg(ptrA).addReg(ptrB);
+    } else {
+      Ptr1Reg = ptrB;
+    }
+    BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
+        .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
+    BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+        .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+    if (is64bit)
+      BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
+        .addReg(Ptr1Reg).addImm(0).addImm(61);
+    else
+      BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
+        .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29);
+    BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
+        .addReg(newval).addReg(ShiftReg);
+    BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
+        .addReg(oldval).addReg(ShiftReg);
+    if (is8bit)
+      BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
+    else {
+      BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
+      BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
+        .addReg(Mask3Reg).addImm(65535);
+    }
+    BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
+        .addReg(Mask2Reg).addReg(ShiftReg);
+    BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
+        .addReg(NewVal2Reg).addReg(MaskReg);
+    BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
+        .addReg(OldVal2Reg).addReg(MaskReg);
+
+    BB = loop1MBB;
+    BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
+        .addReg(PPC::R0).addReg(PtrReg);
+    BuildMI(BB, dl, TII->get(PPC::AND),TmpReg)
+        .addReg(TmpDestReg).addReg(MaskReg);
+    BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
+        .addReg(TmpReg).addReg(OldVal3Reg);
+    BuildMI(BB, dl, TII->get(PPC::BCC))
+        .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB);
+    BB->addSuccessor(loop2MBB);
+    BB->addSuccessor(midMBB);
+
+    BB = loop2MBB;
+    BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg)
+        .addReg(TmpDestReg).addReg(MaskReg);
+    BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg)
+        .addReg(Tmp2Reg).addReg(NewVal3Reg);
+    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg)
+        .addReg(PPC::R0).addReg(PtrReg);
+    BuildMI(BB, dl, TII->get(PPC::BCC))
+      .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB);
+    BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
+    BB->addSuccessor(loop1MBB);
+    BB->addSuccessor(exitMBB);
+
+    BB = midMBB;
+    BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg)
+      .addReg(PPC::R0).addReg(PtrReg);
+    BB->addSuccessor(exitMBB);
+
+    //  exitMBB:
+    //   ...
+    BB = exitMBB;
+    BuildMI(BB, dl, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg);
+  } else {
+    llvm_unreachable("Unexpected instr type to insert");
+  }
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
+                                             DAGCombinerInfo &DCI) const {
+  const TargetMachine &TM = getTargetMachine();
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc dl = N->getDebugLoc();
+  switch (N->getOpcode()) {
+  default: break;
+  case PPCISD::SHL:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+      if (C->isNullValue())   // 0 << V -> 0.
+        return N->getOperand(0);
+    }
+    break;
+  case PPCISD::SRL:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+      if (C->isNullValue())   // 0 >>u V -> 0.
+        return N->getOperand(0);
+    }
+    break;
+  case PPCISD::SRA:
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+      if (C->isNullValue() ||   //  0 >>s V -> 0.
+          C->isAllOnesValue())    // -1 >>s V -> -1.
+        return N->getOperand(0);
+    }
+    break;
+
+  case ISD::SINT_TO_FP:
+    if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) {
+      if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) {
+        // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
+        // We allow the src/dst to be either f32/f64, but the intermediate
+        // type must be i64.
+        if (N->getOperand(0).getValueType() == MVT::i64 &&
+            N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) {
+          SDValue Val = N->getOperand(0).getOperand(0);
+          if (Val.getValueType() == MVT::f32) {
+            Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+            DCI.AddToWorklist(Val.getNode());
+          }
+
+          Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val);
+          DCI.AddToWorklist(Val.getNode());
+          Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val);
+          DCI.AddToWorklist(Val.getNode());
+          if (N->getValueType(0) == MVT::f32) {
+            Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val,
+                              DAG.getIntPtrConstant(0));
+            DCI.AddToWorklist(Val.getNode());
+          }
+          return Val;
+        } else if (N->getOperand(0).getValueType() == MVT::i32) {
+          // If the intermediate type is i32, we can avoid the load/store here
+          // too.
+        }
+      }
+    }
+    break;
+  case ISD::STORE:
+    // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
+    if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() &&
+        !cast<StoreSDNode>(N)->isTruncatingStore() &&
+        N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
+        N->getOperand(1).getValueType() == MVT::i32 &&
+        N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
+      SDValue Val = N->getOperand(1).getOperand(0);
+      if (Val.getValueType() == MVT::f32) {
+        Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
+        DCI.AddToWorklist(Val.getNode());
+      }
+      Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
+      DCI.AddToWorklist(Val.getNode());
+
+      Val = DAG.getNode(PPCISD::STFIWX, dl, MVT::Other, N->getOperand(0), Val,
+                        N->getOperand(2), N->getOperand(3));
+      DCI.AddToWorklist(Val.getNode());
+      return Val;
+    }
+
+    // Turn STORE (BSWAP) -> sthbrx/stwbrx.
+    if (cast<StoreSDNode>(N)->isUnindexed() &&
+        N->getOperand(1).getOpcode() == ISD::BSWAP &&
+        N->getOperand(1).getNode()->hasOneUse() &&
+        (N->getOperand(1).getValueType() == MVT::i32 ||
+         N->getOperand(1).getValueType() == MVT::i16)) {
+      SDValue BSwapOp = N->getOperand(1).getOperand(0);
+      // Do an any-extend to 32-bits if this is a half-word input.
+      if (BSwapOp.getValueType() == MVT::i16)
+        BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
+
+      SDValue Ops[] = {
+        N->getOperand(0), BSwapOp, N->getOperand(2),
+        DAG.getValueType(N->getOperand(1).getValueType())
+      };
+      return
+        DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
+                                Ops, array_lengthof(Ops),
+                                cast<StoreSDNode>(N)->getMemoryVT(),
+                                cast<StoreSDNode>(N)->getMemOperand());
+    }
+    break;
+  case ISD::BSWAP:
+    // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
+    if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
+        N->getOperand(0).hasOneUse() &&
+        (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) {
+      SDValue Load = N->getOperand(0);
+      LoadSDNode *LD = cast<LoadSDNode>(Load);
+      // Create the byte-swapping load.
+      SDValue Ops[] = {
+        LD->getChain(),    // Chain
+        LD->getBasePtr(),  // Ptr
+        DAG.getValueType(N->getValueType(0)) // VT
+      };
+      SDValue BSLoad =
+        DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
+                                DAG.getVTList(MVT::i32, MVT::Other), Ops, 3,
+                                LD->getMemoryVT(), LD->getMemOperand());
+
+      // If this is an i16 load, insert the truncate.
+      SDValue ResVal = BSLoad;
+      if (N->getValueType(0) == MVT::i16)
+        ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
+
+      // First, combine the bswap away.  This makes the value produced by the
+      // load dead.
+      DCI.CombineTo(N, ResVal);
+
+      // Next, combine the load away, we give it a bogus result value but a real
+      // chain result.  The result value is dead because the bswap is dead.
+      DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
+
+      // Return N so it doesn't get rechecked!
+      return SDValue(N, 0);
+    }
+
+    break;
+  case PPCISD::VCMP: {
+    // If a VCMPo node already exists with exactly the same operands as this
+    // node, use its result instead of this node (VCMPo computes both a CR6 and
+    // a normal output).
+    //
+    if (!N->getOperand(0).hasOneUse() &&
+        !N->getOperand(1).hasOneUse() &&
+        !N->getOperand(2).hasOneUse()) {
+
+      // Scan all of the users of the LHS, looking for VCMPo's that match.
+      SDNode *VCMPoNode = 0;
+
+      SDNode *LHSN = N->getOperand(0).getNode();
+      for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
+           UI != E; ++UI)
+        if (UI->getOpcode() == PPCISD::VCMPo &&
+            UI->getOperand(1) == N->getOperand(1) &&
+            UI->getOperand(2) == N->getOperand(2) &&
+            UI->getOperand(0) == N->getOperand(0)) {
+          VCMPoNode = *UI;
+          break;
+        }
+
+      // If there is no VCMPo node, or if the flag value has a single use, don't
+      // transform this.
+      if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1))
+        break;
+
+      // Look at the (necessarily single) use of the flag value.  If it has a
+      // chain, this transformation is more complex.  Note that multiple things
+      // could use the value result, which we should ignore.
+      SDNode *FlagUser = 0;
+      for (SDNode::use_iterator UI = VCMPoNode->use_begin();
+           FlagUser == 0; ++UI) {
+        assert(UI != VCMPoNode->use_end() && "Didn't find user!");
+        SDNode *User = *UI;
+        for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+          if (User->getOperand(i) == SDValue(VCMPoNode, 1)) {
+            FlagUser = User;
+            break;
+          }
+        }
+      }
+
+      // If the user is a MFCR instruction, we know this is safe.  Otherwise we
+      // give up for right now.
+      if (FlagUser->getOpcode() == PPCISD::MFCR)
+        return SDValue(VCMPoNode, 0);
+    }
+    break;
+  }
+  case ISD::BR_CC: {
+    // If this is a branch on an altivec predicate comparison, lower this so
+    // that we don't have to do a MFCR: instead, branch directly on CR6.  This
+    // lowering is done pre-legalize, because the legalizer lowers the predicate
+    // compare down to code that is difficult to reassemble.
+    ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
+    SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
+    int CompareOpc;
+    bool isDot;
+
+    if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+        isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
+        getAltivecCompareInfo(LHS, CompareOpc, isDot)) {
+      assert(isDot && "Can't compare against a vector result!");
+
+      // If this is a comparison against something other than 0/1, then we know
+      // that the condition is never/always true.
+      unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
+      if (Val != 0 && Val != 1) {
+        if (CC == ISD::SETEQ)      // Cond never true, remove branch.
+          return N->getOperand(0);
+        // Always !=, turn it into an unconditional branch.
+        return DAG.getNode(ISD::BR, dl, MVT::Other,
+                           N->getOperand(0), N->getOperand(4));
+      }
+
+      bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
+
+      // Create the PPCISD altivec 'dot' comparison node.
+      std::vector<EVT> VTs;
+      SDValue Ops[] = {
+        LHS.getOperand(2),  // LHS of compare
+        LHS.getOperand(3),  // RHS of compare
+        DAG.getConstant(CompareOpc, MVT::i32)
+      };
+      VTs.push_back(LHS.getOperand(2).getValueType());
+      VTs.push_back(MVT::Glue);
+      SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3);
+
+      // Unpack the result based on how the target uses it.
+      PPC::Predicate CompOpc;
+      switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
+      default:  // Can't happen, don't crash on invalid number though.
+      case 0:   // Branch on the value of the EQ bit of CR6.
+        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
+        break;
+      case 1:   // Branch on the inverted value of the EQ bit of CR6.
+        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
+        break;
+      case 2:   // Branch on the value of the LT bit of CR6.
+        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
+        break;
+      case 3:   // Branch on the inverted value of the LT bit of CR6.
+        CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
+        break;
+      }
+
+      return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
+                         DAG.getConstant(CompOpc, MVT::i32),
+                         DAG.getRegister(PPC::CR6, MVT::i32),
+                         N->getOperand(4), CompNode.getValue(1));
+    }
+    break;
+  }
+  }
+
+  return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+                                                       const APInt &Mask,
+                                                       APInt &KnownZero,
+                                                       APInt &KnownOne,
+                                                       const SelectionDAG &DAG,
+                                                       unsigned Depth) const {
+  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+  switch (Op.getOpcode()) {
+  default: break;
+  case PPCISD::LBRX: {
+    // lhbrx is known to have the top bits cleared out.
+    if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
+      KnownZero = 0xFFFF0000;
+    break;
+  }
+  case ISD::INTRINSIC_WO_CHAIN: {
+    switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
+    default: break;
+    case Intrinsic::ppc_altivec_vcmpbfp_p:
+    case Intrinsic::ppc_altivec_vcmpeqfp_p:
+    case Intrinsic::ppc_altivec_vcmpequb_p:
+    case Intrinsic::ppc_altivec_vcmpequh_p:
+    case Intrinsic::ppc_altivec_vcmpequw_p:
+    case Intrinsic::ppc_altivec_vcmpgefp_p:
+    case Intrinsic::ppc_altivec_vcmpgtfp_p:
+    case Intrinsic::ppc_altivec_vcmpgtsb_p:
+    case Intrinsic::ppc_altivec_vcmpgtsh_p:
+    case Intrinsic::ppc_altivec_vcmpgtsw_p:
+    case Intrinsic::ppc_altivec_vcmpgtub_p:
+    case Intrinsic::ppc_altivec_vcmpgtuh_p:
+    case Intrinsic::ppc_altivec_vcmpgtuw_p:
+      KnownZero = ~1U;  // All bits but the low one are known to be zero.
+      break;
+    }
+  }
+  }
+}
+
+
+/// getConstraintType - Given a constraint, return the type of
+/// constraint it is for this target.
+PPCTargetLowering::ConstraintType
+PPCTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default: break;
+    case 'b':
+    case 'r':
+    case 'f':
+    case 'v':
+    case 'y':
+      return C_RegisterClass;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+PPCTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  const Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'b':
+    if (type->isIntegerTy())
+      weight = CW_Register;
+    break;
+  case 'f':
+    if (type->isFloatTy())
+      weight = CW_Register;
+    break;
+  case 'd':
+    if (type->isDoubleTy())
+      weight = CW_Register;
+    break;
+  case 'v':
+    if (type->isVectorTy())
+      weight = CW_Register;
+    break;
+  case 'y':
+    weight = CW_Register;
+    break;
+  }
+  return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+                                                EVT VT) const {
+  if (Constraint.size() == 1) {
+    // GCC RS6000 Constraint Letters
+    switch (Constraint[0]) {
+    case 'b':   // R1-R31
+    case 'r':   // R0-R31
+      if (VT == MVT::i64 && PPCSubTarget.isPPC64())
+        return std::make_pair(0U, PPC::G8RCRegisterClass);
+      return std::make_pair(0U, PPC::GPRCRegisterClass);
+    case 'f':
+      if (VT == MVT::f32)
+        return std::make_pair(0U, PPC::F4RCRegisterClass);
+      else if (VT == MVT::f64)
+        return std::make_pair(0U, PPC::F8RCRegisterClass);
+      break;
+    case 'v':
+      return std::make_pair(0U, PPC::VRRCRegisterClass);
+    case 'y':   // crrc
+      return std::make_pair(0U, PPC::CRRCRegisterClass);
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector.  If it is invalid, don't add anything to Ops.
+void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter,
+                                                     std::vector<SDValue>&Ops,
+                                                     SelectionDAG &DAG) const {
+  SDValue Result(0,0);
+  switch (Letter) {
+  default: break;
+  case 'I':
+  case 'J':
+  case 'K':
+  case 'L':
+  case 'M':
+  case 'N':
+  case 'O':
+  case 'P': {
+    ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
+    if (!CST) return; // Must be an immediate to match.
+    unsigned Value = CST->getZExtValue();
+    switch (Letter) {
+    default: llvm_unreachable("Unknown constraint letter!");
+    case 'I':  // "I" is a signed 16-bit constant.
+      if ((short)Value == (int)Value)
+        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      break;
+    case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
+    case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
+      if ((short)Value == 0)
+        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      break;
+    case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
+      if ((Value >> 16) == 0)
+        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      break;
+    case 'M':  // "M" is a constant that is greater than 31.
+      if (Value > 31)
+        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      break;
+    case 'N':  // "N" is a positive constant that is an exact power of two.
+      if ((int)Value > 0 && isPowerOf2_32(Value))
+        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      break;
+    case 'O':  // "O" is the constant zero.
+      if (Value == 0)
+        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      break;
+    case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
+      if ((short)-Value == (int)-Value)
+        Result = DAG.getTargetConstant(Value, Op.getValueType());
+      break;
+    }
+    break;
+  }
+  }
+
+  if (Result.getNode()) {
+    Ops.push_back(Result);
+    return;
+  }
+
+  // Handle standard constraint letters.
+  TargetLowering::LowerAsmOperandForConstraint(Op, Letter, Ops, DAG);
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                              const Type *Ty) const {
+  // FIXME: PPC does not allow r+i addressing modes for vectors!
+
+  // PPC allows a sign-extended 16-bit immediate field.
+  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
+    return false;
+
+  // No global is ever allowed as a base.
+  if (AM.BaseGV)
+    return false;
+
+  // PPC only support r+r,
+  switch (AM.Scale) {
+  case 0:  // "r+i" or just "i", depending on HasBaseReg.
+    break;
+  case 1:
+    if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
+      return false;
+    // Otherwise we have r+r or r+i.
+    break;
+  case 2:
+    if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
+      return false;
+    // Allow 2*r as r+r.
+    break;
+  default:
+    // No other scales are supported.
+    return false;
+  }
+
+  return true;
+}
+
+/// isLegalAddressImmediate - Return true if the integer value can be used
+/// as the offset of the target addressing mode for load / store of the
+/// given type.
+bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{
+  // PPC allows a sign-extended 16-bit immediate field.
+  return (V > -(1 << 16) && V < (1 << 16)-1);
+}
+
+bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
+  return false;
+}
+
+SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+  // Make sure the function does not optimize away the store of the RA to
+  // the stack.
+  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+  FuncInfo->setLRStoreRequired();
+  bool isPPC64 = PPCSubTarget.isPPC64();
+  bool isDarwinABI = PPCSubTarget.isDarwinABI();
+
+  if (Depth > 0) {
+    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+    SDValue Offset =
+
+      DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI),
+                      isPPC64? MVT::i64 : MVT::i32);
+    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   FrameAddr, Offset),
+                       MachinePointerInfo(), false, false, 0);
+  }
+
+  // Just load the return address off the stack.
+  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     RetAddrFI, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  bool isPPC64 = PtrVT == MVT::i64;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+  bool is31 = (DisableFramePointerElim(MF) || MFI->hasVarSizedObjects()) &&
+                  MFI->getStackSize() &&
+                  !MF.getFunction()->hasFnAttr(Attribute::Naked);
+  unsigned FrameReg = isPPC64 ? (is31 ? PPC::X31 : PPC::X1) :
+                                (is31 ? PPC::R31 : PPC::R1);
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
+                                         PtrVT);
+  while (Depth--)
+    FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
+                            FrameAddr, MachinePointerInfo(), false, false, 0);
+  return FrameAddr;
+}
+
+bool
+PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The PowerPC target isn't yet aware of offsets.
+  return false;
+}
+
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If
+/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// non-scalar-integer type, e.g. empty string source, constant, or loaded
+/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+/// constant so it does not need to be loaded.
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
+EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
+                                           unsigned DstAlign, unsigned SrcAlign,
+                                           bool NonScalarIntSafe,
+                                           bool MemcpyStrSrc,
+                                           MachineFunction &MF) const {
+  if (this->PPCSubTarget.isPPC64()) {
+    return MVT::i64;
+  } else {
+    return MVT::i32;
+  }
+}
diff --git a/final/lib/Target/PowerPC/PPCISelLowering.h b/final/lib/Target/PowerPC/PPCISelLowering.h
new file mode 100644
index 00000000000..33daae9b544
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCISelLowering.h
@@ -0,0 +1,489 @@
+//===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that PPC uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+#define LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "PPC.h"
+#include "PPCSubtarget.h"
+
+namespace llvm {
+  namespace PPCISD {
+    enum NodeType {
+      // Start the numbering where the builtin ops and target ops leave off.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      /// FSEL - Traditional three-operand fsel node.
+      ///
+      FSEL,
+
+      /// FCFID - The FCFID instruction, taking an f64 operand and producing
+      /// and f64 value containing the FP representation of the integer that
+      /// was temporarily in the f64 operand.
+      FCFID,
+
+      /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
+      /// operand, producing an f64 value containing the integer representation
+      /// of that FP value.
+      FCTIDZ, FCTIWZ,
+
+      /// STFIWX - The STFIWX instruction.  The first operand is an input token
+      /// chain, then an f64 value to store, then an address to store it to.
+      STFIWX,
+
+      // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking
+      // three v4f32 operands and producing a v4f32 result.
+      VMADDFP, VNMSUBFP,
+
+      /// VPERM - The PPC VPERM Instruction.
+      ///
+      VPERM,
+
+      /// Hi/Lo - These represent the high and low 16-bit parts of a global
+      /// address respectively.  These nodes have two operands, the first of
+      /// which must be a TargetGlobalAddress, and the second of which must be a
+      /// Constant.  Selected naively, these turn into 'lis G+C' and 'li G+C',
+      /// though these are usually folded into other nodes.
+      Hi, Lo,
+
+      TOC_ENTRY,
+
+      /// The following three target-specific nodes are used for calls through
+      /// function pointers in the 64-bit SVR4 ABI.
+
+      /// Restore the TOC from the TOC save area of the current stack frame.
+      /// This is basically a hard coded load instruction which additionally
+      /// takes/produces a flag.
+      TOC_RESTORE,
+
+      /// Like a regular LOAD but additionally taking/producing a flag.
+      LOAD,
+
+      /// LOAD into r2 (also taking/producing a flag). Like TOC_RESTORE, this is
+      /// a hard coded load instruction.
+      LOAD_TOC,
+
+      /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
+      /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
+      /// compute an allocation on the stack.
+      DYNALLOC,
+
+      /// GlobalBaseReg - On Darwin, this node represents the result of the mflr
+      /// at function entry, used for PIC code.
+      GlobalBaseReg,
+
+      /// These nodes represent the 32-bit PPC shifts that operate on 6-bit
+      /// shift amounts.  These nodes are generated by the multi-precision shift
+      /// code.
+      SRL, SRA, SHL,
+
+      /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit"
+      /// registers.
+      EXTSW_32,
+
+      /// CALL - A direct function call.
+      CALL_Darwin, CALL_SVR4,
+
+      /// NOP - Special NOP which follows 64-bit SVR4 calls.
+      NOP,
+
+      /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
+      /// MTCTR instruction.
+      MTCTR,
+
+      /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
+      /// BCTRL instruction.
+      BCTRL_Darwin, BCTRL_SVR4,
+
+      /// Return with a flag operand, matched by 'blr'
+      RET_FLAG,
+
+      /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF
+      /// instructions.  This copies the bits corresponding to the specified
+      /// CRREG into the resultant GPR.  Bits corresponding to other CR regs
+      /// are undefined.
+      MFCR,
+
+      /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
+      /// instructions.  For lack of better number, we use the opcode number
+      /// encoding for the OPC field to identify the compare.  For example, 838
+      /// is VCMPGTSH.
+      VCMP,
+
+      /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the
+      /// altivec VCMP*o instructions.  For lack of better number, we use the
+      /// opcode number encoding for the OPC field to identify the compare.  For
+      /// example, 838 is VCMPGTSH.
+      VCMPo,
+
+      /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
+      /// corresponds to the COND_BRANCH pseudo instruction.  CRRC is the
+      /// condition register to branch on, OPC is the branch opcode to use (e.g.
+      /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
+      /// an optional input flag argument.
+      COND_BRANCH,
+
+      // The following 5 instructions are used only as part of the
+      // long double-to-int conversion sequence.
+
+      /// OUTFLAG = MFFS F8RC - This moves the FPSCR (not modelled) into the
+      /// register.
+      MFFS,
+
+      /// OUTFLAG = MTFSB0 INFLAG - This clears a bit in the FPSCR.
+      MTFSB0,
+
+      /// OUTFLAG = MTFSB1 INFLAG - This sets a bit in the FPSCR.
+      MTFSB1,
+
+      /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with
+      /// rounding towards zero.  It has flags added so it won't move past the
+      /// FPSCR-setting instructions.
+      FADDRTZ,
+
+      /// MTFSF = F8RC, INFLAG - This moves the register into the FPSCR.
+      MTFSF,
+
+      /// LARX = This corresponds to PPC l{w|d}arx instrcution: load and
+      /// reserve indexed. This is used to implement atomic operations.
+      LARX,
+
+      /// STCX = This corresponds to PPC stcx. instrcution: store conditional
+      /// indexed. This is used to implement atomic operations.
+      STCX,
+
+      /// TC_RETURN - A tail call return.
+      ///   operand #0 chain
+      ///   operand #1 callee (register or absolute)
+      ///   operand #2 stack adjustment
+      ///   operand #3 optional in flag
+      TC_RETURN,
+
+      /// STD_32 - This is the STD instruction for use with "32-bit" registers.
+      STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,
+
+      /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
+      /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
+      /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
+      /// i32.
+      STBRX,
+
+      /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
+      /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
+      /// then puts it in the bottom bits of the GPRC.  TYPE can be either i16
+      /// or i32.
+      LBRX
+    };
+  }
+
+  /// Define some predicates that are used for node matching.
+  namespace PPC {
+    /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
+    /// VPKUHUM instruction.
+    bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+
+    /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
+    /// VPKUWUM instruction.
+    bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary);
+
+    /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
+    /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
+    bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+                            bool isUnary);
+
+    /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
+    /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
+    bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
+                            bool isUnary);
+
+    /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
+    /// amount, otherwise return -1.
+    int isVSLDOIShuffleMask(SDNode *N, bool isUnary);
+
+    /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a splat of a single element that is suitable for input to
+    /// VSPLTB/VSPLTH/VSPLTW.
+    bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
+
+    /// isAllNegativeZeroVector - Returns true if all elements of build_vector
+    /// are -0.0.
+    bool isAllNegativeZeroVector(SDNode *N);
+
+    /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
+    /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
+    unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize);
+
+    /// get_VSPLTI_elt - If this is a build_vector of constants which can be
+    /// formed by using a vspltis[bhw] instruction of the specified element
+    /// size, return the constant being splatted.  The ByteSize field indicates
+    /// the number of bytes of each element [124] -> [bhw].
+    SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+  }
+
+  class PPCTargetLowering : public TargetLowering {
+    const PPCSubtarget &PPCSubTarget;
+
+  public:
+    explicit PPCTargetLowering(PPCTargetMachine &TM);
+
+    /// getTargetNodeName() - This method returns the name of a target specific
+    /// DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+    /// getSetCCResultType - Return the ISD::SETCC ValueType
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+    /// getPreIndexedAddressParts - returns true by value, base pointer and
+    /// offset pointer and addressing mode by reference if the node's address
+    /// can be legally represented as pre-indexed load / store address.
+    virtual bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+                                           SDValue &Offset,
+                                           ISD::MemIndexedMode &AM,
+                                           SelectionDAG &DAG) const;
+
+    /// SelectAddressRegReg - Given the specified addressed, check to see if it
+    /// can be represented as an indexed [r+r] operation.  Returns false if it
+    /// can be more efficiently represented with [r+imm].
+    bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
+                             SelectionDAG &DAG) const;
+
+    /// SelectAddressRegImm - Returns true if the address N can be represented
+    /// by a base register plus a signed 16-bit displacement [r+imm], and if it
+    /// is not better represented as reg+reg.
+    bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
+                             SelectionDAG &DAG) const;
+
+    /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
+    /// represented as an indexed [r+r] operation.
+    bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
+                                 SelectionDAG &DAG) const;
+
+    /// SelectAddressRegImmShift - Returns true if the address N can be
+    /// represented by a base register plus a signed 14-bit displacement
+    /// [r+imm*4].  Suitable for use by STD and friends.
+    bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base,
+                                  SelectionDAG &DAG) const;
+
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    ///
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// ReplaceNodeResults - Replace the results of node with an illegal result
+    /// type with new values built out of custom code.
+    ///
+    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+                                    SelectionDAG &DAG) const;
+
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+                                                const APInt &Mask,
+                                                APInt &KnownZero,
+                                                APInt &KnownOne,
+                                                const SelectionDAG &DAG,
+                                                unsigned Depth = 0) const;
+
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
+    MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+                                        MachineBasicBlock *MBB, bool is64Bit,
+                                        unsigned BinOpcode) const;
+    MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI,
+                                                MachineBasicBlock *MBB,
+                                            bool is8bit, unsigned Opcode) const;
+
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::pair<unsigned, const TargetRegisterClass*>
+      getRegForInlineAsmConstraint(const std::string &Constraint,
+                                   EVT VT) const;
+
+    /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+    /// function arguments in the caller parameter area.  This is the actual
+    /// alignment, not its logarithm.
+    unsigned getByValTypeAlignment(const Type *Ty) const;
+
+    /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+    /// vector.  If it is invalid, don't add anything to Ops.
+    virtual void LowerAsmOperandForConstraint(SDValue Op,
+                                              char ConstraintLetter,
+                                              std::vector<SDValue> &Ops,
+                                              SelectionDAG &DAG) const;
+
+    /// isLegalAddressingMode - Return true if the addressing mode represented
+    /// by AM is legal for this target, for a load/store of the specified type.
+    virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+
+    /// isLegalAddressImmediate - Return true if the integer value can be used
+    /// as the offset of the target addressing mode for load / store of the
+    /// given type.
+    virtual bool isLegalAddressImmediate(int64_t V, const Type *Ty) const;
+
+    /// isLegalAddressImmediate - Return true if the GlobalValue can be used as
+    /// the offset of the target addressing mode.
+    virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
+
+    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// getOptimalMemOpType - Returns the target specific optimal type for load
+    /// and store operations as a result of memset, memcpy, and memmove
+    /// lowering. If DstAlign is zero that means it's safe to destination
+    /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+    /// means there isn't a need to check it against alignment requirement,
+    /// probably because the source does not need to be loaded. If
+    /// 'NonScalarIntSafe' is true, that means it's safe to return a
+    /// non-scalar-integer type, e.g. empty string source, constant, or loaded
+    /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+    /// constant so it does not need to be loaded.
+    /// It returns EVT::Other if the type should be determined using generic
+    /// target-independent logic.
+    virtual EVT
+    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+                        bool NonScalarIntSafe, bool MemcpyStrSrc,
+                        MachineFunction &MF) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
+  private:
+    SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
+    SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
+
+    bool
+    IsEligibleForTailCallOptimization(SDValue Callee,
+                                      CallingConv::ID CalleeCC,
+                                      bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      SelectionDAG& DAG) const;
+
+    SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
+                                         int SPDiff,
+                                         SDValue Chain,
+                                         SDValue &LROpOut,
+                                         SDValue &FPOpOut,
+                                         bool isDarwinABI,
+                                         DebugLoc dl) const;
+
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+                         const PPCSubtarget &Subtarget) const;
+    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG,
+                       const PPCSubtarget &Subtarget) const;
+    SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
+                                const PPCSubtarget &Subtarget) const;
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
+                                      const PPCSubtarget &Subtarget) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const;
+    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+    SDValue FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool isTailCall,
+                       bool isVarArg,
+                       SelectionDAG &DAG,
+                       SmallVector<std::pair<unsigned, SDValue>, 8>
+                         &RegsToPass,
+                       SDValue InFlag, SDValue Chain,
+                       SDValue &Callee,
+                       int SPDiff, unsigned NumBytes,
+                       const SmallVectorImpl<ISD::InputArg> &Ins,
+                       SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    SDValue
+      LowerFormalArguments_Darwin(SDValue Chain,
+                                  CallingConv::ID CallConv, bool isVarArg,
+                                  const SmallVectorImpl<ISD::InputArg> &Ins,
+                                  DebugLoc dl, SelectionDAG &DAG,
+                                  SmallVectorImpl<SDValue> &InVals) const;
+    SDValue
+      LowerFormalArguments_SVR4(SDValue Chain,
+                                CallingConv::ID CallConv, bool isVarArg,
+                                const SmallVectorImpl<ISD::InputArg> &Ins,
+                                DebugLoc dl, SelectionDAG &DAG,
+                                SmallVectorImpl<SDValue> &InVals) const;
+
+    SDValue
+      LowerCall_Darwin(SDValue Chain, SDValue Callee,
+                       CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                       const SmallVectorImpl<ISD::OutputArg> &Outs,
+                       const SmallVectorImpl<SDValue> &OutVals,
+                       const SmallVectorImpl<ISD::InputArg> &Ins,
+                       DebugLoc dl, SelectionDAG &DAG,
+                       SmallVectorImpl<SDValue> &InVals) const;
+    SDValue
+      LowerCall_SVR4(SDValue Chain, SDValue Callee,
+                     CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
+                     const SmallVectorImpl<ISD::OutputArg> &Outs,
+                     const SmallVectorImpl<SDValue> &OutVals,
+                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                     DebugLoc dl, SelectionDAG &DAG,
+                     SmallVectorImpl<SDValue> &InVals) const;
+  };
+}
+
+#endif   // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/final/lib/Target/PowerPC/PPCInstr64Bit.td b/final/lib/Target/PowerPC/PPCInstr64Bit.td
new file mode 100644
index 00000000000..6636b692719
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -0,0 +1,744 @@
+//===- PPCInstr64Bit.td - The PowerPC 64-bit Support -------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC 64-bit instructions.  These patterns are used
+// both when in ppc64 mode and when in "use 64-bit extensions in 32-bit" mode.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// 64-bit operands.
+//
+def s16imm64 : Operand<i64> {
+  let PrintMethod = "printS16ImmOperand";
+}
+def u16imm64 : Operand<i64> {
+  let PrintMethod = "printU16ImmOperand";
+}
+def symbolHi64 : Operand<i64> {
+  let PrintMethod = "printSymbolHi";
+  let EncoderMethod = "getHA16Encoding";
+}
+def symbolLo64 : Operand<i64> {
+  let PrintMethod = "printSymbolLo";
+  let EncoderMethod = "getLO16Encoding";
+}
+
+//===----------------------------------------------------------------------===//
+// 64-bit transformation functions.
+//
+
+def SHL64 : SDNodeXForm<imm, [{
+  // Transformation function: 63 - imm
+  return getI32Imm(63 - N->getZExtValue());
+}]>;
+
+def SRL64 : SDNodeXForm<imm, [{
+  // Transformation function: 64 - imm
+  return N->getZExtValue() ? getI32Imm(64 - N->getZExtValue()) : getI32Imm(0);
+}]>;
+
+def HI32_48 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return getI32Imm((unsigned short)(N->getZExtValue() >> 32));
+}]>;
+
+def HI48_64 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return getI32Imm((unsigned short)(N->getZExtValue() >> 48));
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Calls.
+//
+
+let Defs = [LR8] in
+  def MovePCtoLR8 : Pseudo<(outs), (ins piclabel:$label), "", []>,
+                    PPC970_Unit_BRU;
+
+// Darwin ABI Calls.
+let isCall = 1, PPC970_Unit = 7, 
+  // All calls clobber the PPC64 non-callee saved registers.
+  Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
+          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+          V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+          LR8,CTR8,
+          CR0,CR1,CR5,CR6,CR7,CARRY] in {
+  // Convenient aliases for call instructions
+  let Uses = [RM] in {
+    def BL8_Darwin  : IForm<18, 0, 1,
+                            (outs), (ins calltarget:$func, variable_ops), 
+                            "bl $func", BrB, []>;  // See Pat patterns below.
+    def BLA8_Darwin : IForm<18, 1, 1,
+                          (outs), (ins aaddr:$func, variable_ops),
+                          "bla $func", BrB, [(PPCcall_Darwin (i64 imm:$func))]>;
+  }
+  let Uses = [CTR8, RM] in {
+    def BCTRL8_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, 
+                                  (outs), (ins variable_ops),
+                                  "bctrl", BrB,
+                                  [(PPCbctrl_Darwin)]>, Requires<[In64BitMode]>;
+  }
+}
+
+// ELF 64 ABI Calls = Darwin ABI Calls
+// Used to define BL8_ELF and BLA8_ELF
+let isCall = 1, PPC970_Unit = 7, 
+  // All calls clobber the PPC64 non-callee saved registers.
+  Defs = [X0,X2,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,
+          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+          V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+          LR8,CTR8,
+          CR0,CR1,CR5,CR6,CR7,CARRY] in {
+  // Convenient aliases for call instructions
+  let Uses = [RM] in {
+    def BL8_ELF  : IForm<18, 0, 1,
+                         (outs), (ins calltarget:$func, variable_ops), 
+                         "bl $func", BrB, []>;  // See Pat patterns below.                            
+    def BLA8_ELF : IForm<18, 1, 1,
+                         (outs), (ins aaddr:$func, variable_ops),
+                         "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
+  }
+  let Uses = [CTR8, RM] in {
+    def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
+                               (outs), (ins variable_ops),
+                               "bctrl", BrB,
+                               [(PPCbctrl_SVR4)]>, Requires<[In64BitMode]>;
+  }
+}
+
+
+// Calls
+def : Pat<(PPCcall_Darwin (i64 tglobaladdr:$dst)),
+          (BL8_Darwin tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)),
+          (BL8_Darwin texternalsym:$dst)>;
+
+def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
+          (BL8_ELF tglobaladdr:$dst)>;
+def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
+          (BL8_ELF texternalsym:$dst)>;
+def : Pat<(PPCnop),
+          (NOP)>;
+
+// Atomic operations
+let usesCustomInserter = 1 in {
+  let Uses = [CR0] in {
+    def ATOMIC_LOAD_ADD_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>;
+    def ATOMIC_LOAD_SUB_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      [(set G8RC:$dst, (atomic_load_sub_64 xoaddr:$ptr, G8RC:$incr))]>;
+    def ATOMIC_LOAD_OR_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      [(set G8RC:$dst, (atomic_load_or_64 xoaddr:$ptr, G8RC:$incr))]>;
+    def ATOMIC_LOAD_XOR_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      [(set G8RC:$dst, (atomic_load_xor_64 xoaddr:$ptr, G8RC:$incr))]>;
+    def ATOMIC_LOAD_AND_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      [(set G8RC:$dst, (atomic_load_and_64 xoaddr:$ptr, G8RC:$incr))]>;
+    def ATOMIC_LOAD_NAND_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "",
+      [(set G8RC:$dst, (atomic_load_nand_64 xoaddr:$ptr, G8RC:$incr))]>;
+
+    def ATOMIC_CMP_SWAP_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$old, G8RC:$new), "",
+      [(set G8RC:$dst, 
+                    (atomic_cmp_swap_64 xoaddr:$ptr, G8RC:$old, G8RC:$new))]>;
+
+    def ATOMIC_SWAP_I64 : Pseudo<
+      (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$new), "",
+      [(set G8RC:$dst, (atomic_swap_64 xoaddr:$ptr, G8RC:$new))]>;
+  }
+}
+
+// Instructions to support atomic operations
+def LDARX : XForm_1<31,  84, (outs G8RC:$rD), (ins memrr:$ptr),
+                   "ldarx $rD, $ptr", LdStLDARX,
+                   [(set G8RC:$rD, (PPClarx xoaddr:$ptr))]>;
+
+let Defs = [CR0] in
+def STDCX : XForm_1<31, 214, (outs), (ins G8RC:$rS, memrr:$dst),
+                   "stdcx. $rS, $dst", LdStSTDCX,
+                   [(PPCstcx G8RC:$rS, xoaddr:$dst)]>,
+                   isDOT;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi8 :Pseudo< (outs),
+                        (ins calltarget:$dst, i32imm:$offset, variable_ops),
+                 "#TC_RETURNd8 $dst $offset",
+                 []>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
+                 "#TC_RETURNa8 $func $offset",
+                 [(PPCtc_return (i64 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri8 : Pseudo<(outs), (ins CTRRC8:$dst, i32imm:$offset, variable_ops),
+                 "#TC_RETURNr8 $dst $offset",
+                 []>;
+
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+    isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM] in
+def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+    Requires<[In64BitMode]>;
+
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+    isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB8   : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+                  "b $dst", BrB,
+                  []>;
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+    isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA8   : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
+                  "ba $dst", BrB,
+                  []>;
+
+def : Pat<(PPCtc_return (i64 tglobaladdr:$dst),  imm:$imm),
+          (TCRETURNdi8 tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i64 texternalsym:$dst), imm:$imm),
+          (TCRETURNdi8 texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
+          (TCRETURNri8 CTRRC8:$dst, imm:$imm)>;
+
+
+//===----------------------------------------------------------------------===//
+// 64-bit SPR manipulation instrs.
+
+let Uses = [CTR8] in {
+def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs G8RC:$rT), (ins),
+                           "mfctr $rT", SprMFSPR>,
+             PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Pattern = [(PPCmtctr G8RC:$rS)], Defs = [CTR8] in {
+def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins G8RC:$rS),
+                           "mtctr $rS", SprMTSPR>,
+             PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Defs = [X1], Uses = [X1] in
+def DYNALLOC8 : Pseudo<(outs G8RC:$result), (ins G8RC:$negsize, memri:$fpsi),"",
+                       [(set G8RC:$result,
+                             (PPCdynalloc G8RC:$negsize, iaddr:$fpsi))]>;
+
+let Defs = [LR8] in {
+def MTLR8  : XFXForm_7_ext<31, 467, 8, (outs), (ins G8RC:$rS),
+                           "mtlr $rS", SprMTSPR>,
+             PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR8] in {
+def MFLR8  : XFXForm_1_ext<31, 339, 8, (outs G8RC:$rT), (ins),
+                           "mflr $rT", SprMFSPR>,
+             PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+//===----------------------------------------------------------------------===//
+// Fixed point instructions.
+//
+
+let PPC970_Unit = 1 in {  // FXU Operations.
+
+// Copies, extends, truncates.
+def OR4To8  : XForm_6<31, 444, (outs G8RC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "or $rA, $rS, $rB", IntGeneral,
+                   []>;
+def OR8To4  : XForm_6<31, 444, (outs GPRC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "or $rA, $rS, $rB", IntGeneral,
+                   []>;
+
+def LI8  : DForm_2_r0<14, (outs G8RC:$rD), (ins symbolLo64:$imm),
+                      "li $rD, $imm", IntGeneral,
+                      [(set G8RC:$rD, immSExt16:$imm)]>;
+def LIS8 : DForm_2_r0<15, (outs G8RC:$rD), (ins symbolHi64:$imm),
+                      "lis $rD, $imm", IntGeneral,
+                      [(set G8RC:$rD, imm16ShiftedSExt:$imm)]>;
+
+// Logical ops.
+def NAND8: XForm_6<31, 476, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "nand $rA, $rS, $rB", IntGeneral,
+                   [(set G8RC:$rA, (not (and G8RC:$rS, G8RC:$rB)))]>;
+def AND8 : XForm_6<31,  28, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "and $rA, $rS, $rB", IntGeneral,
+                   [(set G8RC:$rA, (and G8RC:$rS, G8RC:$rB))]>;
+def ANDC8: XForm_6<31,  60, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "andc $rA, $rS, $rB", IntGeneral,
+                   [(set G8RC:$rA, (and G8RC:$rS, (not G8RC:$rB)))]>;
+def OR8  : XForm_6<31, 444, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "or $rA, $rS, $rB", IntGeneral,
+                   [(set G8RC:$rA, (or G8RC:$rS, G8RC:$rB))]>;
+def NOR8 : XForm_6<31, 124, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "nor $rA, $rS, $rB", IntGeneral,
+                   [(set G8RC:$rA, (not (or G8RC:$rS, G8RC:$rB)))]>;
+def ORC8 : XForm_6<31, 412, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "orc $rA, $rS, $rB", IntGeneral,
+                   [(set G8RC:$rA, (or G8RC:$rS, (not G8RC:$rB)))]>;
+def EQV8 : XForm_6<31, 284, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "eqv $rA, $rS, $rB", IntGeneral,
+                   [(set G8RC:$rA, (not (xor G8RC:$rS, G8RC:$rB)))]>;
+def XOR8 : XForm_6<31, 316, (outs G8RC:$rA), (ins G8RC:$rS, G8RC:$rB),
+                   "xor $rA, $rS, $rB", IntGeneral,
+                   [(set G8RC:$rA, (xor G8RC:$rS, G8RC:$rB))]>;
+
+// Logical ops with immediate.
+def ANDIo8  : DForm_4<28, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+                      "andi. $dst, $src1, $src2", IntGeneral,
+                      [(set G8RC:$dst, (and G8RC:$src1, immZExt16:$src2))]>,
+                      isDOT;
+def ANDISo8 : DForm_4<29, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+                     "andis. $dst, $src1, $src2", IntGeneral,
+                    [(set G8RC:$dst, (and G8RC:$src1,imm16ShiftedZExt:$src2))]>,
+                     isDOT;
+def ORI8    : DForm_4<24, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+                      "ori $dst, $src1, $src2", IntGeneral,
+                      [(set G8RC:$dst, (or G8RC:$src1, immZExt16:$src2))]>;
+def ORIS8   : DForm_4<25, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+                      "oris $dst, $src1, $src2", IntGeneral,
+                    [(set G8RC:$dst, (or G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI8   : DForm_4<26, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+                      "xori $dst, $src1, $src2", IntGeneral,
+                      [(set G8RC:$dst, (xor G8RC:$src1, immZExt16:$src2))]>;
+def XORIS8  : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
+                      "xoris $dst, $src1, $src2", IntGeneral,
+                   [(set G8RC:$dst, (xor G8RC:$src1, imm16ShiftedZExt:$src2))]>;
+
+def ADD8  : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "add $rT, $rA, $rB", IntGeneral,
+                     [(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
+                     
+let Defs = [CARRY] in {
+def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "addc $rT, $rA, $rB", IntGeneral,
+                     [(set G8RC:$rT, (addc G8RC:$rA, G8RC:$rB))]>,
+                     PPC970_DGroup_Cracked;
+def ADDIC8 : DForm_2<12, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+                     "addic $rD, $rA, $imm", IntGeneral,
+                     [(set G8RC:$rD, (addc G8RC:$rA, immSExt16:$imm))]>;
+}
+def ADDI8  : DForm_2<14, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+                     "addi $rD, $rA, $imm", IntGeneral,
+                     [(set G8RC:$rD, (add G8RC:$rA, immSExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs G8RC:$rD), (ins G8RC:$rA, symbolHi64:$imm),
+                     "addis $rD, $rA, $imm", IntGeneral,
+                     [(set G8RC:$rD, (add G8RC:$rA, imm16ShiftedSExt:$imm))]>;
+
+let Defs = [CARRY] in {
+def SUBFIC8: DForm_2< 8, (outs G8RC:$rD), (ins G8RC:$rA, s16imm64:$imm),
+                     "subfic $rD, $rA, $imm", IntGeneral,
+                     [(set G8RC:$rD, (subc immSExt16:$imm, G8RC:$rA))]>;
+def SUBFC8 : XOForm_1<31, 8, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                      "subfc $rT, $rA, $rB", IntGeneral,
+                      [(set G8RC:$rT, (subc G8RC:$rB, G8RC:$rA))]>,
+                      PPC970_DGroup_Cracked;
+}
+def SUBF8 : XOForm_1<31, 40, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "subf $rT, $rA, $rB", IntGeneral,
+                     [(set G8RC:$rT, (sub G8RC:$rB, G8RC:$rA))]>;
+def NEG8    : XOForm_3<31, 104, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+                       "neg $rT, $rA", IntGeneral,
+                       [(set G8RC:$rT, (ineg G8RC:$rA))]>;
+let Uses = [CARRY], Defs = [CARRY] in {
+def ADDE8   : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                       "adde $rT, $rA, $rB", IntGeneral,
+                       [(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
+def ADDME8  : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+                       "addme $rT, $rA", IntGeneral,
+                       [(set G8RC:$rT, (adde G8RC:$rA, -1))]>;
+def ADDZE8  : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+                       "addze $rT, $rA", IntGeneral,
+                       [(set G8RC:$rT, (adde G8RC:$rA, 0))]>;
+def SUBFE8  : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                       "subfe $rT, $rA, $rB", IntGeneral,
+                       [(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
+def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+                       "subfme $rT, $rA", IntGeneral,
+                       [(set G8RC:$rT, (sube -1, G8RC:$rA))]>;
+def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
+                       "subfze $rT, $rA", IntGeneral,
+                       [(set G8RC:$rT, (sube 0, G8RC:$rA))]>;
+}
+
+
+def MULHD : XOForm_1<31, 73, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "mulhd $rT, $rA, $rB", IntMulHW,
+                     [(set G8RC:$rT, (mulhs G8RC:$rA, G8RC:$rB))]>;
+def MULHDU : XOForm_1<31, 9, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "mulhdu $rT, $rA, $rB", IntMulHWU,
+                     [(set G8RC:$rT, (mulhu G8RC:$rA, G8RC:$rB))]>;
+
+def CMPD   : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
+                          "cmpd $crD, $rA, $rB", IntCompare>, isPPC64;
+def CMPLD  : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins G8RC:$rA, G8RC:$rB),
+                          "cmpld $crD, $rA, $rB", IntCompare>, isPPC64;
+def CMPDI  : DForm_5_ext<11, (outs CRRC:$crD), (ins G8RC:$rA, s16imm:$imm),
+                         "cmpdi $crD, $rA, $imm", IntCompare>, isPPC64;
+def CMPLDI : DForm_6_ext<10, (outs CRRC:$dst), (ins G8RC:$src1, u16imm:$src2),
+                         "cmpldi $dst, $src1, $src2", IntCompare>, isPPC64;
+
+def SLD  : XForm_6<31,  27, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+                   "sld $rA, $rS, $rB", IntRotateD,
+                   [(set G8RC:$rA, (PPCshl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+def SRD  : XForm_6<31, 539, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+                   "srd $rA, $rS, $rB", IntRotateD,
+                   [(set G8RC:$rA, (PPCsrl G8RC:$rS, GPRC:$rB))]>, isPPC64;
+let Defs = [CARRY] in {
+def SRAD : XForm_6<31, 794, (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB),
+                   "srad $rA, $rS, $rB", IntRotateD,
+                   [(set G8RC:$rA, (PPCsra G8RC:$rS, GPRC:$rB))]>, isPPC64;
+}
+                   
+def EXTSB8 : XForm_11<31, 954, (outs G8RC:$rA), (ins G8RC:$rS),
+                      "extsb $rA, $rS", IntGeneral,
+                      [(set G8RC:$rA, (sext_inreg G8RC:$rS, i8))]>;
+def EXTSH8 : XForm_11<31, 922, (outs G8RC:$rA), (ins G8RC:$rS),
+                      "extsh $rA, $rS", IntGeneral,
+                      [(set G8RC:$rA, (sext_inreg G8RC:$rS, i16))]>;
+
+def EXTSW  : XForm_11<31, 986, (outs G8RC:$rA), (ins G8RC:$rS),
+                      "extsw $rA, $rS", IntGeneral,
+                      [(set G8RC:$rA, (sext_inreg G8RC:$rS, i32))]>, isPPC64;
+/// EXTSW_32 - Just like EXTSW, but works on '32-bit' registers.
+def EXTSW_32 : XForm_11<31, 986, (outs GPRC:$rA), (ins GPRC:$rS),
+                      "extsw $rA, $rS", IntGeneral,
+                      [(set GPRC:$rA, (PPCextsw_32 GPRC:$rS))]>, isPPC64;
+def EXTSW_32_64 : XForm_11<31, 986, (outs G8RC:$rA), (ins GPRC:$rS),
+                      "extsw $rA, $rS", IntGeneral,
+                      [(set G8RC:$rA, (sext GPRC:$rS))]>, isPPC64;
+
+let Defs = [CARRY] in {
+def SRADI  : XSForm_1<31, 413, (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH),
+                      "sradi $rA, $rS, $SH", IntRotateD,
+                      [(set G8RC:$rA, (sra G8RC:$rS, (i32 imm:$SH)))]>, isPPC64;
+}
+def CNTLZD : XForm_11<31, 58, (outs G8RC:$rA), (ins G8RC:$rS),
+                      "cntlzd $rA, $rS", IntGeneral,
+                      [(set G8RC:$rA, (ctlz G8RC:$rS))]>;
+
+def DIVD  : XOForm_1<31, 489, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "divd $rT, $rA, $rB", IntDivD,
+                     [(set G8RC:$rT, (sdiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def DIVDU : XOForm_1<31, 457, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "divdu $rT, $rA, $rB", IntDivD,
+                     [(set G8RC:$rT, (udiv G8RC:$rA, G8RC:$rB))]>, isPPC64,
+                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def MULLD : XOForm_1<31, 233, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
+                     "mulld $rT, $rA, $rB", IntMulHD,
+                     [(set G8RC:$rT, (mul G8RC:$rA, G8RC:$rB))]>, isPPC64;
+
+
+let isCommutable = 1 in {
+def RLDIMI : MDForm_1<30, 3,
+                      (outs G8RC:$rA), (ins G8RC:$rSi, G8RC:$rS, u6imm:$SH, u6imm:$MB),
+                      "rldimi $rA, $rS, $SH, $MB", IntRotateD,
+                      []>, isPPC64, RegConstraint<"$rSi = $rA">,
+                      NoEncode<"$rSi">;
+}
+
+// Rotate instructions.
+def RLDCL  : MDForm_1<30, 0,
+                      (outs G8RC:$rA), (ins G8RC:$rS, GPRC:$rB, u6imm:$MB),
+                      "rldcl $rA, $rS, $rB, $MB", IntRotateD,
+                      []>, isPPC64;
+def RLDICL : MDForm_1<30, 0,
+                      (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$MB),
+                      "rldicl $rA, $rS, $SH, $MB", IntRotateD,
+                      []>, isPPC64;
+def RLDICR : MDForm_1<30, 1,
+                      (outs G8RC:$rA), (ins G8RC:$rS, u6imm:$SH, u6imm:$ME),
+                      "rldicr $rA, $rS, $SH, $ME", IntRotateD,
+                      []>, isPPC64;
+}  // End FXU Operations.
+
+
+//===----------------------------------------------------------------------===//
+// Load/Store instructions.
+//
+
+
+// Sign extending loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LHA8: DForm_1<42, (outs G8RC:$rD), (ins memri:$src),
+                  "lha $rD, $src", LdStLHA,
+                  [(set G8RC:$rD, (sextloadi16 iaddr:$src))]>,
+                  PPC970_DGroup_Cracked;
+def LWA  : DSForm_1<58, 2, (outs G8RC:$rD), (ins memrix:$src),
+                    "lwa $rD, $src", LdStLWA,
+                    [(set G8RC:$rD, (sextloadi32 ixaddr:$src))]>, isPPC64,
+                    PPC970_DGroup_Cracked;
+def LHAX8: XForm_1<31, 343, (outs G8RC:$rD), (ins memrr:$src),
+                   "lhax $rD, $src", LdStLHA,
+                   [(set G8RC:$rD, (sextloadi16 xaddr:$src))]>,
+                   PPC970_DGroup_Cracked;
+def LWAX : XForm_1<31, 341, (outs G8RC:$rD), (ins memrr:$src),
+                   "lwax $rD, $src", LdStLHA,
+                   [(set G8RC:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
+                   PPC970_DGroup_Cracked;
+
+// Update forms.
+let mayLoad = 1 in
+def LHAU8 : DForm_1a<43, (outs G8RC:$rD, ptr_rc:$ea_result), (ins symbolLo:$disp,
+                            ptr_rc:$rA),
+                    "lhau $rD, $disp($rA)", LdStGeneral,
+                    []>, RegConstraint<"$rA = $ea_result">,
+                    NoEncode<"$ea_result">;
+// NO LWAU!
+
+}
+
+// Zero extending loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZ8 : DForm_1<34, (outs G8RC:$rD), (ins memri:$src),
+                  "lbz $rD, $src", LdStGeneral,
+                  [(set G8RC:$rD, (zextloadi8 iaddr:$src))]>;
+def LHZ8 : DForm_1<40, (outs G8RC:$rD), (ins memri:$src),
+                  "lhz $rD, $src", LdStGeneral,
+                  [(set G8RC:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ8 : DForm_1<32, (outs G8RC:$rD), (ins memri:$src),
+                  "lwz $rD, $src", LdStGeneral,
+                  [(set G8RC:$rD, (zextloadi32 iaddr:$src))]>, isPPC64;
+
+def LBZX8 : XForm_1<31,  87, (outs G8RC:$rD), (ins memrr:$src),
+                   "lbzx $rD, $src", LdStGeneral,
+                   [(set G8RC:$rD, (zextloadi8 xaddr:$src))]>;
+def LHZX8 : XForm_1<31, 279, (outs G8RC:$rD), (ins memrr:$src),
+                   "lhzx $rD, $src", LdStGeneral,
+                   [(set G8RC:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX8 : XForm_1<31,  23, (outs G8RC:$rD), (ins memrr:$src),
+                   "lwzx $rD, $src", LdStGeneral,
+                   [(set G8RC:$rD, (zextloadi32 xaddr:$src))]>;
+                   
+                   
+// Update forms.
+let mayLoad = 1 in {
+def LBZU8 : DForm_1<35, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                    "lbzu $rD, $addr", LdStGeneral,
+                    []>, RegConstraint<"$addr.reg = $ea_result">,
+                    NoEncode<"$ea_result">;
+def LHZU8 : DForm_1<41, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                    "lhzu $rD, $addr", LdStGeneral,
+                    []>, RegConstraint<"$addr.reg = $ea_result">,
+                    NoEncode<"$ea_result">;
+def LWZU8 : DForm_1<33, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                    "lwzu $rD, $addr", LdStGeneral,
+                    []>, RegConstraint<"$addr.reg = $ea_result">,
+                    NoEncode<"$ea_result">;
+}
+}
+
+
+// Full 8-byte loads.
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LD   : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src),
+                    "ld $rD, $src", LdStLD,
+                    [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64;
+def LDtoc: Pseudo<(outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg),
+                  "",
+                  [(set G8RC:$rD,
+                     (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64;
+                     
+let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
+def LDinto_toc: DSForm_1<58, 0, (outs), (ins G8RC:$reg),
+                    "ld 2, 8($reg)", LdStLD,
+                    [(PPCload_toc G8RC:$reg)]>, isPPC64;
+                    
+let RST = 2, DS_RA = 0 in // FIXME: Should be a pseudo.
+def LDtoc_restore : DSForm_1<58, 0, (outs), (ins),
+                    "ld 2, 40(1)", LdStLD,
+                    [(PPCtoc_restore)]>, isPPC64;
+def LDX  : XForm_1<31,  21, (outs G8RC:$rD), (ins memrr:$src),
+                   "ldx $rD, $src", LdStLD,
+                   [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64;
+                   
+let mayLoad = 1 in
+def LDU  : DSForm_1<58, 1, (outs G8RC:$rD, ptr_rc:$ea_result), (ins memrix:$addr),
+                    "ldu $rD, $addr", LdStLD,
+                    []>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
+                    NoEncode<"$ea_result">;
+
+}
+
+def : Pat<(PPCload ixaddr:$src),
+          (LD ixaddr:$src)>;
+def : Pat<(PPCload xaddr:$src),
+          (LDX xaddr:$src)>;
+
+let PPC970_Unit = 2 in {
+// Truncating stores.                       
+def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),
+                   "stb $rS, $src", LdStGeneral,
+                   [(truncstorei8 G8RC:$rS, iaddr:$src)]>;
+def STH8 : DForm_1<44, (outs), (ins G8RC:$rS, memri:$src),
+                   "sth $rS, $src", LdStGeneral,
+                   [(truncstorei16 G8RC:$rS, iaddr:$src)]>;
+def STW8 : DForm_1<36, (outs), (ins G8RC:$rS, memri:$src),
+                   "stw $rS, $src", LdStGeneral,
+                   [(truncstorei32 G8RC:$rS, iaddr:$src)]>;
+def STBX8 : XForm_8<31, 215, (outs), (ins G8RC:$rS, memrr:$dst),
+                   "stbx $rS, $dst", LdStGeneral,
+                   [(truncstorei8 G8RC:$rS, xaddr:$dst)]>, 
+                   PPC970_DGroup_Cracked;
+def STHX8 : XForm_8<31, 407, (outs), (ins G8RC:$rS, memrr:$dst),
+                   "sthx $rS, $dst", LdStGeneral,
+                   [(truncstorei16 G8RC:$rS, xaddr:$dst)]>, 
+                   PPC970_DGroup_Cracked;
+def STWX8 : XForm_8<31, 151, (outs), (ins G8RC:$rS, memrr:$dst),
+                   "stwx $rS, $dst", LdStGeneral,
+                   [(truncstorei32 G8RC:$rS, xaddr:$dst)]>,
+                   PPC970_DGroup_Cracked;
+// Normal 8-byte stores.
+def STD  : DSForm_1<62, 0, (outs), (ins G8RC:$rS, memrix:$dst),
+                    "std $rS, $dst", LdStSTD,
+                    [(store G8RC:$rS, ixaddr:$dst)]>, isPPC64;
+def STDX  : XForm_8<31, 149, (outs), (ins G8RC:$rS, memrr:$dst),
+                   "stdx $rS, $dst", LdStSTD,
+                   [(store G8RC:$rS, xaddr:$dst)]>, isPPC64,
+                   PPC970_DGroup_Cracked;
+}
+
+let PPC970_Unit = 2 in {
+
+def STBU8 : DForm_1a<38, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+                             symbolLo:$ptroff, ptr_rc:$ptrreg),
+                    "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    [(set ptr_rc:$ea_res,
+                          (pre_truncsti8 G8RC:$rS, ptr_rc:$ptrreg, 
+                                         iaddroff:$ptroff))]>,
+                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1a<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+                             symbolLo:$ptroff, ptr_rc:$ptrreg),
+                    "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    [(set ptr_rc:$ea_res,
+                        (pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg, 
+                                        iaddroff:$ptroff))]>,
+                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+
+def STDU : DSForm_1a<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
+                                        s16immX4:$ptroff, ptr_rc:$ptrreg),
+                    "stdu $rS, $ptroff($ptrreg)", LdStSTD,
+                    [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg, 
+                                                     iaddroff:$ptroff))]>,
+                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">,
+                    isPPC64;
+
+let mayStore = 1 in
+def STDUX : XForm_8<31, 181, (outs), (ins G8RC:$rS, memrr:$dst),
+                   "stdux $rS, $dst", LdStSTD,
+                   []>, isPPC64;
+
+// STD_32/STDX_32 - Just like STD/STDX, but uses a '32-bit' input register.
+def STD_32  : DSForm_1<62, 0, (outs), (ins GPRC:$rT, memrix:$dst),
+                       "std $rT, $dst", LdStSTD,
+                       [(PPCstd_32  GPRC:$rT, ixaddr:$dst)]>, isPPC64;
+def STDX_32  : XForm_8<31, 149, (outs), (ins GPRC:$rT, memrr:$dst),
+                       "stdx $rT, $dst", LdStSTD,
+                       [(PPCstd_32  GPRC:$rT, xaddr:$dst)]>, isPPC64,
+                       PPC970_DGroup_Cracked;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Floating point instructions.
+//
+
+
+let PPC970_Unit = 3, Uses = [RM] in {  // FPU Operations.
+def FCFID  : XForm_26<63, 846, (outs F8RC:$frD), (ins F8RC:$frB),
+                      "fcfid $frD, $frB", FPGeneral,
+                      [(set F8RC:$frD, (PPCfcfid F8RC:$frB))]>, isPPC64;
+def FCTIDZ : XForm_26<63, 815, (outs F8RC:$frD), (ins F8RC:$frB),
+                      "fctidz $frD, $frB", FPGeneral,
+                      [(set F8RC:$frD, (PPCfctidz F8RC:$frB))]>, isPPC64;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction Patterns
+//
+
+// Extensions and truncates to/from 32-bit regs.
+def : Pat<(i64 (zext GPRC:$in)),
+          (RLDICL (OR4To8 GPRC:$in, GPRC:$in), 0, 32)>;
+def : Pat<(i64 (anyext GPRC:$in)),
+          (OR4To8 GPRC:$in, GPRC:$in)>;
+def : Pat<(i32 (trunc G8RC:$in)),
+          (OR8To4 G8RC:$in, G8RC:$in)>;
+
+// Extending loads with i64 targets.
+def : Pat<(zextloadi1 iaddr:$src),
+          (LBZ8 iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+          (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+          (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+          (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+          (LBZ8 iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+          (LBZX8 xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+          (LHZ8 iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+          (LHZX8 xaddr:$src)>;
+def : Pat<(extloadi32 iaddr:$src),
+          (LWZ8 iaddr:$src)>;
+def : Pat<(extloadi32 xaddr:$src),
+          (LWZX8 xaddr:$src)>;
+
+// Standard shifts.  These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 6-bit and 7-bit shift
+// amounts.
+def : Pat<(sra G8RC:$rS, GPRC:$rB),
+          (SRAD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(srl G8RC:$rS, GPRC:$rB),
+          (SRD G8RC:$rS, GPRC:$rB)>;
+def : Pat<(shl G8RC:$rS, GPRC:$rB),
+          (SLD G8RC:$rS, GPRC:$rB)>;
+
+// SHL/SRL
+def : Pat<(shl G8RC:$in, (i32 imm:$imm)),
+          (RLDICR G8RC:$in, imm:$imm, (SHL64 imm:$imm))>;
+def : Pat<(srl G8RC:$in, (i32 imm:$imm)),
+          (RLDICL G8RC:$in, (SRL64 imm:$imm), imm:$imm)>;
+
+// ROTL
+def : Pat<(rotl G8RC:$in, GPRC:$sh),
+          (RLDCL G8RC:$in, GPRC:$sh, 0)>;
+def : Pat<(rotl G8RC:$in, (i32 imm:$imm)),
+          (RLDICL G8RC:$in, imm:$imm, 0)>;
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS8 tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI8  tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in , 0), (LIS8 tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in , 0), (LI8  tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in , 0), (LIS8 tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in , 0), (LI8  tjumptable:$in)>;
+def : Pat<(PPChi tblockaddress:$in, 0), (LIS8 tblockaddress:$in)>;
+def : Pat<(PPClo tblockaddress:$in, 0), (LI8  tblockaddress:$in)>;
+def : Pat<(add G8RC:$in, (PPChi tglobaladdr:$g, 0)),
+          (ADDIS8 G8RC:$in, tglobaladdr:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tconstpool:$g, 0)),
+          (ADDIS8 G8RC:$in, tconstpool:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tjumptable:$g, 0)),
+          (ADDIS8 G8RC:$in, tjumptable:$g)>;
+def : Pat<(add G8RC:$in, (PPChi tblockaddress:$g, 0)),
+          (ADDIS8 G8RC:$in, tblockaddress:$g)>;
diff --git a/final/lib/Target/PowerPC/PPCInstrAltivec.td b/final/lib/Target/PowerPC/PPCInstrAltivec.td
new file mode 100644
index 00000000000..256370fa5f5
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -0,0 +1,695 @@
+//===- PPCInstrAltivec.td - The PowerPC Altivec Extension --*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Altivec extension to the PowerPC instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Altivec transformation functions and pattern fragments.
+//
+
+// Since we canonicalize buildvectors to v16i8, all vnots "-1" operands will be
+// of that type.
+def vnot_ppc : PatFrag<(ops node:$in),
+                       (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>;
+
+def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                              (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+}]>;
+def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                              (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false);
+}]>;
+def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                    (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+}]>;
+def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                    (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true);
+}]>;
+
+
+def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+}]>;
+def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+}]>;
+def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+}]>;
+def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false);
+}]>;
+def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false);
+}]>;
+def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false);
+}]>;
+
+
+def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                               (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+}]>;
+def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+}]>;
+def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+}]>;
+def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true);
+}]>;
+def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true);
+}]>;
+def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true);
+}]>;
+
+
+def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{
+  return getI32Imm(PPC::isVSLDOIShuffleMask(N, false));
+}]>;
+def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVSLDOIShuffleMask(N, false) != -1;
+}], VSLDOI_get_imm>;
+
+
+/// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into
+/// vector_shuffle(X,undef,mask) by the dag combiner.
+def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{
+  return getI32Imm(PPC::isVSLDOIShuffleMask(N, true));
+}]>;
+def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isVSLDOIShuffleMask(N, true) != -1;
+}], VSLDOI_unary_get_imm>;
+
+
+// VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm.
+def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{
+  return getI32Imm(PPC::getVSPLTImmediate(N, 1));
+}]>;
+def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1);
+}], VSPLTB_get_imm>;
+def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{
+  return getI32Imm(PPC::getVSPLTImmediate(N, 2));
+}]>;
+def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2);
+}], VSPLTH_get_imm>;
+def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{
+  return getI32Imm(PPC::getVSPLTImmediate(N, 4));
+}]>;
+def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
+                             (vector_shuffle node:$lhs, node:$rhs), [{
+  return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 4);
+}], VSPLTW_get_imm>;
+
+
+// VSPLTISB_get_imm xform function: convert build_vector to VSPLTISB imm.
+def VSPLTISB_get_imm : SDNodeXForm<build_vector, [{
+  return PPC::get_VSPLTI_elt(N, 1, *CurDAG);
+}]>;
+def vecspltisb : PatLeaf<(build_vector), [{
+  return PPC::get_VSPLTI_elt(N, 1, *CurDAG).getNode() != 0;
+}], VSPLTISB_get_imm>;
+
+// VSPLTISH_get_imm xform function: convert build_vector to VSPLTISH imm.
+def VSPLTISH_get_imm : SDNodeXForm<build_vector, [{
+  return PPC::get_VSPLTI_elt(N, 2, *CurDAG);
+}]>;
+def vecspltish : PatLeaf<(build_vector), [{
+  return PPC::get_VSPLTI_elt(N, 2, *CurDAG).getNode() != 0;
+}], VSPLTISH_get_imm>;
+
+// VSPLTISW_get_imm xform function: convert build_vector to VSPLTISW imm.
+def VSPLTISW_get_imm : SDNodeXForm<build_vector, [{
+  return PPC::get_VSPLTI_elt(N, 4, *CurDAG);
+}]>;
+def vecspltisw : PatLeaf<(build_vector), [{
+  return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0;
+}], VSPLTISW_get_imm>;
+
+def V_immneg0 : PatLeaf<(build_vector), [{
+  return PPC::isAllNegativeZeroVector(N);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Helpers for defining instructions that directly correspond to intrinsics.
+
+// VA1a_Int - A VAForm_1a intrinsic definition.
+class VA1a_Int<bits<6> xo, string opc, Intrinsic IntID>
+  : VAForm_1a<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, VRRC:$vC),
+              !strconcat(opc, " $vD, $vA, $vB, $vC"), VecFP,
+                       [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB, VRRC:$vC))]>;
+
+// VX1_Int - A VXForm_1 intrinsic definition.
+class VX1_Int<bits<11> xo, string opc, Intrinsic IntID>
+  : VXForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+             !strconcat(opc, " $vD, $vA, $vB"), VecFP,
+             [(set VRRC:$vD, (IntID VRRC:$vA, VRRC:$vB))]>;
+
+// VX2_Int - A VXForm_2 intrinsic definition.
+class VX2_Int<bits<11> xo, string opc, Intrinsic IntID>
+  : VXForm_2<xo, (outs VRRC:$vD), (ins VRRC:$vB),
+             !strconcat(opc, " $vD, $vB"), VecFP,
+             [(set VRRC:$vD, (IntID VRRC:$vB))]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Definitions.
+
+def DSS      : DSS_Form<822, (outs),
+                        (ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
+                        "dss $STRM", LdStGeneral /*FIXME*/, []>;
+def DSSALL   : DSS_Form<822, (outs),
+                        (ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
+                        "dssall", LdStGeneral /*FIXME*/, []>;
+def DST      : DSS_Form<342, (outs),
+                        (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+                        "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTT     : DSS_Form<342, (outs),
+                        (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+                        "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTST    : DSS_Form<374, (outs),
+                        (ins u5imm:$ZERO, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+                        "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTSTT   : DSS_Form<374, (outs),
+                        (ins u5imm:$ONE, u5imm:$STRM, GPRC:$rA, GPRC:$rB),
+                        "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+
+def DST64    : DSS_Form<342, (outs),
+                        (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+                        "dst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTT64   : DSS_Form<342, (outs),
+                        (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+                        "dstt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTST64  : DSS_Form<374, (outs),
+                        (ins u5imm:$ZERO, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+                        "dstst $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+def DSTSTT64 : DSS_Form<374, (outs),
+                        (ins u5imm:$ONE, u5imm:$STRM, G8RC:$rA, GPRC:$rB),
+                        "dststt $rA, $rB, $STRM", LdStGeneral /*FIXME*/, []>;
+
+def MFVSCR : VXForm_4<1540, (outs VRRC:$vD), (ins),
+                      "mfvscr $vD", LdStGeneral,
+                      [(set VRRC:$vD, (int_ppc_altivec_mfvscr))]>; 
+def MTVSCR : VXForm_5<1604, (outs), (ins VRRC:$vB),
+                      "mtvscr $vB", LdStGeneral,
+                      [(int_ppc_altivec_mtvscr VRRC:$vB)]>; 
+
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {  // Loads.
+def LVEBX: XForm_1<31,   7, (outs VRRC:$vD), (ins memrr:$src),
+                   "lvebx $vD, $src", LdStGeneral,
+                   [(set VRRC:$vD, (int_ppc_altivec_lvebx xoaddr:$src))]>;
+def LVEHX: XForm_1<31,  39, (outs VRRC:$vD), (ins memrr:$src),
+                   "lvehx $vD, $src", LdStGeneral,
+                   [(set VRRC:$vD, (int_ppc_altivec_lvehx xoaddr:$src))]>;
+def LVEWX: XForm_1<31,  71, (outs VRRC:$vD), (ins memrr:$src),
+                   "lvewx $vD, $src", LdStGeneral,
+                   [(set VRRC:$vD, (int_ppc_altivec_lvewx xoaddr:$src))]>;
+def LVX  : XForm_1<31, 103, (outs VRRC:$vD), (ins memrr:$src),
+                   "lvx $vD, $src", LdStGeneral,
+                   [(set VRRC:$vD, (int_ppc_altivec_lvx xoaddr:$src))]>;
+def LVXL : XForm_1<31, 359, (outs VRRC:$vD), (ins memrr:$src),
+                   "lvxl $vD, $src", LdStGeneral,
+                   [(set VRRC:$vD, (int_ppc_altivec_lvxl xoaddr:$src))]>;
+}
+
+def LVSL : XForm_1<31,   6, (outs VRRC:$vD), (ins memrr:$src),
+                   "lvsl $vD, $src", LdStGeneral,
+                   [(set VRRC:$vD, (int_ppc_altivec_lvsl xoaddr:$src))]>,
+                   PPC970_Unit_LSU;
+def LVSR : XForm_1<31,  38, (outs VRRC:$vD), (ins memrr:$src),
+                   "lvsr $vD, $src", LdStGeneral,
+                   [(set VRRC:$vD, (int_ppc_altivec_lvsr xoaddr:$src))]>,
+                   PPC970_Unit_LSU;
+
+let PPC970_Unit = 2 in {   // Stores.
+def STVEBX: XForm_8<31, 135, (outs), (ins VRRC:$rS, memrr:$dst),
+                   "stvebx $rS, $dst", LdStGeneral,
+                   [(int_ppc_altivec_stvebx VRRC:$rS, xoaddr:$dst)]>;
+def STVEHX: XForm_8<31, 167, (outs), (ins VRRC:$rS, memrr:$dst),
+                   "stvehx $rS, $dst", LdStGeneral,
+                   [(int_ppc_altivec_stvehx VRRC:$rS, xoaddr:$dst)]>;
+def STVEWX: XForm_8<31, 199, (outs), (ins VRRC:$rS, memrr:$dst),
+                   "stvewx $rS, $dst", LdStGeneral,
+                   [(int_ppc_altivec_stvewx VRRC:$rS, xoaddr:$dst)]>;
+def STVX  : XForm_8<31, 231, (outs), (ins VRRC:$rS, memrr:$dst),
+                   "stvx $rS, $dst", LdStGeneral,
+                   [(int_ppc_altivec_stvx VRRC:$rS, xoaddr:$dst)]>;
+def STVXL : XForm_8<31, 487, (outs), (ins VRRC:$rS, memrr:$dst),
+                   "stvxl $rS, $dst", LdStGeneral,
+                   [(int_ppc_altivec_stvxl VRRC:$rS, xoaddr:$dst)]>;
+}
+
+let PPC970_Unit = 5 in {  // VALU Operations.
+// VA-Form instructions.  3-input AltiVec ops.
+def VMADDFP : VAForm_1<46, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+                       "vmaddfp $vD, $vA, $vC, $vB", VecFP,
+                       [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC),
+                                             VRRC:$vB))]>,
+                       Requires<[FPContractions]>;
+def VNMSUBFP: VAForm_1<47, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vC, VRRC:$vB),
+                       "vnmsubfp $vD, $vA, $vC, $vB", VecFP,
+                       [(set VRRC:$vD, (fsub V_immneg0,
+                                             (fsub (fmul VRRC:$vA, VRRC:$vC),
+                                                   VRRC:$vB)))]>,
+                       Requires<[FPContractions]>;
+
+def VMHADDSHS  : VA1a_Int<32, "vmhaddshs",  int_ppc_altivec_vmhaddshs>;
+def VMHRADDSHS : VA1a_Int<33, "vmhraddshs", int_ppc_altivec_vmhraddshs>;
+def VMLADDUHM  : VA1a_Int<34, "vmladduhm",  int_ppc_altivec_vmladduhm>;
+def VPERM      : VA1a_Int<43, "vperm",      int_ppc_altivec_vperm>;
+def VSEL       : VA1a_Int<42, "vsel",       int_ppc_altivec_vsel>;
+
+// Shuffles.
+def VSLDOI  : VAForm_2<44, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB, u5imm:$SH),
+                       "vsldoi $vD, $vA, $vB, $SH", VecFP,
+                       [(set VRRC:$vD, 
+                         (vsldoi_shuffle:$SH (v16i8 VRRC:$vA), VRRC:$vB))]>;
+
+// VX-Form instructions.  AltiVec arithmetic ops.
+def VADDFP : VXForm_1<10, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vaddfp $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (fadd VRRC:$vA, VRRC:$vB))]>;
+                      
+def VADDUBM : VXForm_1<0, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vaddubm $vD, $vA, $vB", VecGeneral,
+                      [(set VRRC:$vD, (add (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VADDUHM : VXForm_1<64, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vadduhm $vD, $vA, $vB", VecGeneral,
+                      [(set VRRC:$vD, (add (v8i16 VRRC:$vA), VRRC:$vB))]>;
+def VADDUWM : VXForm_1<128, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vadduwm $vD, $vA, $vB", VecGeneral,
+                      [(set VRRC:$vD, (add (v4i32 VRRC:$vA), VRRC:$vB))]>;
+                      
+def VADDCUW : VX1_Int<384, "vaddcuw", int_ppc_altivec_vaddcuw>;
+def VADDSBS : VX1_Int<768, "vaddsbs", int_ppc_altivec_vaddsbs>;
+def VADDSHS : VX1_Int<832, "vaddshs", int_ppc_altivec_vaddshs>;
+def VADDSWS : VX1_Int<896, "vaddsws", int_ppc_altivec_vaddsws>;
+def VADDUBS : VX1_Int<512, "vaddubs", int_ppc_altivec_vaddubs>;
+def VADDUHS : VX1_Int<576, "vadduhs", int_ppc_altivec_vadduhs>;
+def VADDUWS : VX1_Int<640, "vadduws", int_ppc_altivec_vadduws>;
+                             
+                             
+def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                    "vand $vD, $vA, $vB", VecFP,
+                    [(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>;
+def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                     "vandc $vD, $vA, $vB", VecFP,
+                     [(set VRRC:$vD, (and (v4i32 VRRC:$vA),
+                                          (vnot_ppc VRRC:$vB)))]>;
+
+def VCFSX  : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+                      "vcfsx $vD, $vB, $UIMM", VecFP,
+                      [(set VRRC:$vD,
+                             (int_ppc_altivec_vcfsx VRRC:$vB, imm:$UIMM))]>;
+def VCFUX  : VXForm_1<778, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+                      "vcfux $vD, $vB, $UIMM", VecFP,
+                      [(set VRRC:$vD,
+                             (int_ppc_altivec_vcfux VRRC:$vB, imm:$UIMM))]>;
+def VCTSXS : VXForm_1<970, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+                      "vctsxs $vD, $vB, $UIMM", VecFP,
+                      [(set VRRC:$vD,
+                             (int_ppc_altivec_vctsxs VRRC:$vB, imm:$UIMM))]>;
+def VCTUXS : VXForm_1<906, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+                      "vctuxs $vD, $vB, $UIMM", VecFP,
+                      [(set VRRC:$vD,
+                             (int_ppc_altivec_vctuxs VRRC:$vB, imm:$UIMM))]>;
+def VEXPTEFP : VX2_Int<394, "vexptefp", int_ppc_altivec_vexptefp>;
+def VLOGEFP  : VX2_Int<458, "vlogefp",  int_ppc_altivec_vlogefp>;
+
+def VAVGSB : VX1_Int<1282, "vavgsb", int_ppc_altivec_vavgsb>;
+def VAVGSH : VX1_Int<1346, "vavgsh", int_ppc_altivec_vavgsh>;
+def VAVGSW : VX1_Int<1410, "vavgsw", int_ppc_altivec_vavgsw>;
+def VAVGUB : VX1_Int<1026, "vavgub", int_ppc_altivec_vavgub>;
+def VAVGUH : VX1_Int<1090, "vavguh", int_ppc_altivec_vavguh>;
+def VAVGUW : VX1_Int<1154, "vavguw", int_ppc_altivec_vavguw>;
+
+def VMAXFP : VX1_Int<1034, "vmaxfp", int_ppc_altivec_vmaxfp>;
+def VMAXSB : VX1_Int< 258, "vmaxsb", int_ppc_altivec_vmaxsb>;
+def VMAXSH : VX1_Int< 322, "vmaxsh", int_ppc_altivec_vmaxsh>;
+def VMAXSW : VX1_Int< 386, "vmaxsw", int_ppc_altivec_vmaxsw>;
+def VMAXUB : VX1_Int<   2, "vmaxub", int_ppc_altivec_vmaxub>;
+def VMAXUH : VX1_Int<  66, "vmaxuh", int_ppc_altivec_vmaxuh>;
+def VMAXUW : VX1_Int< 130, "vmaxuw", int_ppc_altivec_vmaxuw>;
+def VMINFP : VX1_Int<1098, "vminfp", int_ppc_altivec_vminfp>;
+def VMINSB : VX1_Int< 770, "vminsb", int_ppc_altivec_vminsb>;
+def VMINSH : VX1_Int< 834, "vminsh", int_ppc_altivec_vminsh>;
+def VMINSW : VX1_Int< 898, "vminsw", int_ppc_altivec_vminsw>;
+def VMINUB : VX1_Int< 514, "vminub", int_ppc_altivec_vminub>;
+def VMINUH : VX1_Int< 578, "vminuh", int_ppc_altivec_vminuh>;
+def VMINUW : VX1_Int< 642, "vminuw", int_ppc_altivec_vminuw>;
+
+def VMRGHB : VXForm_1< 12, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vmrghb $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (vmrghb_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGHH : VXForm_1< 76, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vmrghh $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (vmrghh_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGHW : VXForm_1<140, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vmrghw $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (vmrghw_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLB : VXForm_1<268, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vmrglb $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (vmrglb_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLH : VXForm_1<332, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vmrglh $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (vmrglh_shuffle VRRC:$vA, VRRC:$vB))]>;
+def VMRGLW : VXForm_1<396, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vmrglw $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (vmrglw_shuffle VRRC:$vA, VRRC:$vB))]>;
+
+def VMSUMMBM : VA1a_Int<37, "vmsummbm", int_ppc_altivec_vmsummbm>;
+def VMSUMSHM : VA1a_Int<40, "vmsumshm", int_ppc_altivec_vmsumshm>;
+def VMSUMSHS : VA1a_Int<41, "vmsumshs", int_ppc_altivec_vmsumshs>;
+def VMSUMUBM : VA1a_Int<36, "vmsumubm", int_ppc_altivec_vmsumubm>;
+def VMSUMUHM : VA1a_Int<38, "vmsumuhm", int_ppc_altivec_vmsumuhm>;
+def VMSUMUHS : VA1a_Int<39, "vmsumuhs", int_ppc_altivec_vmsumuhs>;
+
+def VMULESB : VX1_Int<776, "vmulesb", int_ppc_altivec_vmulesb>;
+def VMULESH : VX1_Int<840, "vmulesh", int_ppc_altivec_vmulesh>;
+def VMULEUB : VX1_Int<520, "vmuleub", int_ppc_altivec_vmuleub>;
+def VMULEUH : VX1_Int<584, "vmuleuh", int_ppc_altivec_vmuleuh>;
+def VMULOSB : VX1_Int<264, "vmulosb", int_ppc_altivec_vmulosb>;
+def VMULOSH : VX1_Int<328, "vmulosh", int_ppc_altivec_vmulosh>;
+def VMULOUB : VX1_Int<  8, "vmuloub", int_ppc_altivec_vmuloub>;
+def VMULOUH : VX1_Int< 72, "vmulouh", int_ppc_altivec_vmulouh>;
+                       
+def VREFP     : VX2_Int<266, "vrefp",     int_ppc_altivec_vrefp>;
+def VRFIM     : VX2_Int<714, "vrfim",     int_ppc_altivec_vrfim>;
+def VRFIN     : VX2_Int<522, "vrfin",     int_ppc_altivec_vrfin>;
+def VRFIP     : VX2_Int<650, "vrfip",     int_ppc_altivec_vrfip>;
+def VRFIZ     : VX2_Int<586, "vrfiz",     int_ppc_altivec_vrfiz>;
+def VRSQRTEFP : VX2_Int<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
+
+def VSUBCUW : VX1_Int<74, "vsubcuw", int_ppc_altivec_vsubcuw>;
+
+def VSUBFP  : VXForm_1<74, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vsubfp $vD, $vA, $vB", VecGeneral,
+                      [(set VRRC:$vD, (fsub VRRC:$vA, VRRC:$vB))]>;
+def VSUBUBM : VXForm_1<1024, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vsububm $vD, $vA, $vB", VecGeneral,
+                      [(set VRRC:$vD, (sub (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VSUBUHM : VXForm_1<1088, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vsubuhm $vD, $vA, $vB", VecGeneral,
+                      [(set VRRC:$vD, (sub (v8i16 VRRC:$vA), VRRC:$vB))]>;
+def VSUBUWM : VXForm_1<1152, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vsubuwm $vD, $vA, $vB", VecGeneral,
+                      [(set VRRC:$vD, (sub (v4i32 VRRC:$vA), VRRC:$vB))]>;
+                      
+def VSUBSBS : VX1_Int<1792, "vsubsbs" , int_ppc_altivec_vsubsbs>;
+def VSUBSHS : VX1_Int<1856, "vsubshs" , int_ppc_altivec_vsubshs>;
+def VSUBSWS : VX1_Int<1920, "vsubsws" , int_ppc_altivec_vsubsws>;
+def VSUBUBS : VX1_Int<1536, "vsububs" , int_ppc_altivec_vsububs>;
+def VSUBUHS : VX1_Int<1600, "vsubuhs" , int_ppc_altivec_vsubuhs>;
+def VSUBUWS : VX1_Int<1664, "vsubuws" , int_ppc_altivec_vsubuws>;
+def VSUMSWS : VX1_Int<1928, "vsumsws" , int_ppc_altivec_vsumsws>;
+def VSUM2SWS: VX1_Int<1672, "vsum2sws", int_ppc_altivec_vsum2sws>;
+def VSUM4SBS: VX1_Int<1672, "vsum4sbs", int_ppc_altivec_vsum4sbs>;
+def VSUM4SHS: VX1_Int<1608, "vsum4shs", int_ppc_altivec_vsum4shs>;
+def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>;
+
+def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                    "vnor $vD, $vA, $vB", VecFP,
+                    [(set VRRC:$vD, (vnot_ppc (or (v4i32 VRRC:$vA),
+                                                  VRRC:$vB)))]>;
+def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vor $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>;
+def VXOR : VXForm_1<1220, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                      "vxor $vD, $vA, $vB", VecFP,
+                      [(set VRRC:$vD, (xor (v4i32 VRRC:$vA), VRRC:$vB))]>;
+
+def VRLB   : VX1_Int<   4, "vrlb", int_ppc_altivec_vrlb>;
+def VRLH   : VX1_Int<  68, "vrlh", int_ppc_altivec_vrlh>;
+def VRLW   : VX1_Int< 132, "vrlw", int_ppc_altivec_vrlw>;
+
+def VSL    : VX1_Int< 452, "vsl" , int_ppc_altivec_vsl >;
+def VSLO   : VX1_Int<1036, "vslo", int_ppc_altivec_vslo>;
+def VSLB   : VX1_Int< 260, "vslb", int_ppc_altivec_vslb>;
+def VSLH   : VX1_Int< 324, "vslh", int_ppc_altivec_vslh>;
+def VSLW   : VX1_Int< 388, "vslw", int_ppc_altivec_vslw>;
+
+def VSPLTB : VXForm_1<524, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+                      "vspltb $vD, $vB, $UIMM", VecPerm,
+                      [(set VRRC:$vD,
+                        (vspltb_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+def VSPLTH : VXForm_1<588, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+                      "vsplth $vD, $vB, $UIMM", VecPerm,
+                      [(set VRRC:$vD,
+                        (vsplth_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+def VSPLTW : VXForm_1<652, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
+                      "vspltw $vD, $vB, $UIMM", VecPerm,
+                      [(set VRRC:$vD, 
+                        (vspltw_shuffle:$UIMM (v16i8 VRRC:$vB), (undef)))]>;
+
+def VSR    : VX1_Int< 708, "vsr"  , int_ppc_altivec_vsr>;
+def VSRO   : VX1_Int<1100, "vsro" , int_ppc_altivec_vsro>;
+def VSRAB  : VX1_Int< 772, "vsrab", int_ppc_altivec_vsrab>;
+def VSRAH  : VX1_Int< 836, "vsrah", int_ppc_altivec_vsrah>;
+def VSRAW  : VX1_Int< 900, "vsraw", int_ppc_altivec_vsraw>;
+def VSRB   : VX1_Int< 516, "vsrb" , int_ppc_altivec_vsrb>;
+def VSRH   : VX1_Int< 580, "vsrh" , int_ppc_altivec_vsrh>;
+def VSRW   : VX1_Int< 644, "vsrw" , int_ppc_altivec_vsrw>;
+
+
+def VSPLTISB : VXForm_3<780, (outs VRRC:$vD), (ins s5imm:$SIMM),
+                       "vspltisb $vD, $SIMM", VecPerm,
+                       [(set VRRC:$vD, (v16i8 vecspltisb:$SIMM))]>;
+def VSPLTISH : VXForm_3<844, (outs VRRC:$vD), (ins s5imm:$SIMM),
+                       "vspltish $vD, $SIMM", VecPerm,
+                       [(set VRRC:$vD, (v8i16 vecspltish:$SIMM))]>;
+def VSPLTISW : VXForm_3<908, (outs VRRC:$vD), (ins s5imm:$SIMM),
+                       "vspltisw $vD, $SIMM", VecPerm,
+                       [(set VRRC:$vD, (v4i32 vecspltisw:$SIMM))]>;
+
+// Vector Pack.
+def VPKPX   : VX1_Int<782, "vpkpx", int_ppc_altivec_vpkpx>;
+def VPKSHSS : VX1_Int<398, "vpkshss", int_ppc_altivec_vpkshss>;
+def VPKSHUS : VX1_Int<270, "vpkshus", int_ppc_altivec_vpkshus>;
+def VPKSWSS : VX1_Int<462, "vpkswss", int_ppc_altivec_vpkswss>;
+def VPKSWUS : VX1_Int<334, "vpkswus", int_ppc_altivec_vpkswus>;
+def VPKUHUM : VXForm_1<14, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                       "vpkuhum $vD, $vA, $vB", VecFP,
+                       [(set VRRC:$vD,
+                         (vpkuhum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VPKUHUS : VX1_Int<142, "vpkuhus", int_ppc_altivec_vpkuhus>;
+def VPKUWUM : VXForm_1<78, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
+                       "vpkuwum $vD, $vA, $vB", VecFP,
+                       [(set VRRC:$vD,
+                         (vpkuwum_shuffle (v16i8 VRRC:$vA), VRRC:$vB))]>;
+def VPKUWUS : VX1_Int<206, "vpkuwus", int_ppc_altivec_vpkuwus>;
+
+// Vector Unpack.
+def VUPKHPX : VX2_Int<846, "vupkhpx", int_ppc_altivec_vupkhpx>;
+def VUPKHSB : VX2_Int<526, "vupkhsb", int_ppc_altivec_vupkhsb>;
+def VUPKHSH : VX2_Int<590, "vupkhsh", int_ppc_altivec_vupkhsh>;
+def VUPKLPX : VX2_Int<974, "vupklpx", int_ppc_altivec_vupklpx>;
+def VUPKLSB : VX2_Int<654, "vupklsb", int_ppc_altivec_vupklsb>;
+def VUPKLSH : VX2_Int<718, "vupklsh", int_ppc_altivec_vupklsh>;
+
+
+// Altivec Comparisons.
+
+class VCMP<bits<10> xo, string asmstr, ValueType Ty>
+  : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+              [(set VRRC:$vD, (Ty (PPCvcmp VRRC:$vA, VRRC:$vB, xo)))]>;
+class VCMPo<bits<10> xo, string asmstr, ValueType Ty>
+  : VXRForm_1<xo, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),asmstr,VecFPCompare,
+              [(set VRRC:$vD, (Ty (PPCvcmp_o VRRC:$vA, VRRC:$vB, xo)))]> {
+  let Defs = [CR6];
+  let RC = 1;
+}
+
+// f32 element comparisons.0
+def VCMPBFP   : VCMP <966, "vcmpbfp $vD, $vA, $vB"  , v4f32>;
+def VCMPBFPo  : VCMPo<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
+def VCMPEQFP  : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>;
+def VCMPEQFPo : VCMPo<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
+def VCMPGEFP  : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>;
+def VCMPGEFPo : VCMPo<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
+def VCMPGTFP  : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>;
+def VCMPGTFPo : VCMPo<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
+
+// i8 element comparisons.
+def VCMPEQUB  : VCMP <  6, "vcmpequb $vD, $vA, $vB" , v16i8>;
+def VCMPEQUBo : VCMPo<  6, "vcmpequb. $vD, $vA, $vB", v16i8>;
+def VCMPGTSB  : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>;
+def VCMPGTSBo : VCMPo<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
+def VCMPGTUB  : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>;
+def VCMPGTUBo : VCMPo<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
+
+// i16 element comparisons.
+def VCMPEQUH  : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>;
+def VCMPEQUHo : VCMPo< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
+def VCMPGTSH  : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>;
+def VCMPGTSHo : VCMPo<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
+def VCMPGTUH  : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>;
+def VCMPGTUHo : VCMPo<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
+
+// i32 element comparisons.
+def VCMPEQUW  : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>;
+def VCMPEQUWo : VCMPo<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
+def VCMPGTSW  : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
+def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
+def VCMPGTUW  : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
+def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+                      
+def V_SET0 : VXForm_setzero<1220, (outs VRRC:$vD), (ins),
+                      "vxor $vD, $vD, $vD", VecFP,
+                      [(set VRRC:$vD, (v4i32 immAllZerosV))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Additional Altivec Patterns
+//
+
+// DS* intrinsics
+def : Pat<(int_ppc_altivec_dssall), (DSSALL 1, 0, 0, 0)>;
+def : Pat<(int_ppc_altivec_dss imm:$STRM), (DSS 0, imm:$STRM, 0, 0)>;
+
+//  * 32-bit
+def : Pat<(int_ppc_altivec_dst GPRC:$rA, GPRC:$rB, imm:$STRM),
+          (DST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstt GPRC:$rA, GPRC:$rB, imm:$STRM),
+          (DSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstst GPRC:$rA, GPRC:$rB, imm:$STRM),
+          (DSTST 0, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dststt GPRC:$rA, GPRC:$rB, imm:$STRM),
+          (DSTSTT 1, imm:$STRM, GPRC:$rA, GPRC:$rB)>;
+
+//  * 64-bit
+def : Pat<(int_ppc_altivec_dst G8RC:$rA, GPRC:$rB, imm:$STRM),
+          (DST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstt G8RC:$rA, GPRC:$rB, imm:$STRM),
+          (DSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dstst G8RC:$rA, GPRC:$rB, imm:$STRM),
+          (DSTST64 0, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+def : Pat<(int_ppc_altivec_dststt G8RC:$rA, GPRC:$rB, imm:$STRM),
+          (DSTSTT64 1, imm:$STRM, (i64 G8RC:$rA), GPRC:$rB)>;
+
+// Loads.
+def : Pat<(v4i32 (load xoaddr:$src)), (LVX xoaddr:$src)>;
+
+// Stores.
+def : Pat<(store (v4i32 VRRC:$rS), xoaddr:$dst),
+          (STVX (v4i32 VRRC:$rS), xoaddr:$dst)>;
+
+// Bit conversions.
+def : Pat<(v16i8 (bitconvert (v8i16 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4i32 VRRC:$src))), (v16i8 VRRC:$src)>;
+def : Pat<(v16i8 (bitconvert (v4f32 VRRC:$src))), (v16i8 VRRC:$src)>;
+
+def : Pat<(v8i16 (bitconvert (v16i8 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4i32 VRRC:$src))), (v8i16 VRRC:$src)>;
+def : Pat<(v8i16 (bitconvert (v4f32 VRRC:$src))), (v8i16 VRRC:$src)>;
+
+def : Pat<(v4i32 (bitconvert (v16i8 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v8i16 VRRC:$src))), (v4i32 VRRC:$src)>;
+def : Pat<(v4i32 (bitconvert (v4f32 VRRC:$src))), (v4i32 VRRC:$src)>;
+
+def : Pat<(v4f32 (bitconvert (v16i8 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v8i16 VRRC:$src))), (v4f32 VRRC:$src)>;
+def : Pat<(v4f32 (bitconvert (v4i32 VRRC:$src))), (v4f32 VRRC:$src)>;
+
+// Shuffles.
+
+// Match vsldoi(x,x), vpkuwum(x,x), vpkuhum(x,x)
+def:Pat<(vsldoi_unary_shuffle:$in (v16i8 VRRC:$vA), undef),
+        (VSLDOI VRRC:$vA, VRRC:$vA, (VSLDOI_unary_get_imm VRRC:$in))>;
+def:Pat<(vpkuwum_unary_shuffle (v16i8 VRRC:$vA), undef),
+        (VPKUWUM VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vpkuhum_unary_shuffle (v16i8 VRRC:$vA), undef),
+        (VPKUHUM VRRC:$vA, VRRC:$vA)>;
+
+// Match vmrg*(x,x)
+def:Pat<(vmrglb_unary_shuffle (v16i8 VRRC:$vA), undef),
+        (VMRGLB VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglh_unary_shuffle (v16i8 VRRC:$vA), undef),
+        (VMRGLH VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrglw_unary_shuffle (v16i8 VRRC:$vA), undef),
+        (VMRGLW VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghb_unary_shuffle (v16i8 VRRC:$vA), undef),
+        (VMRGHB VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghh_unary_shuffle (v16i8 VRRC:$vA), undef),
+        (VMRGHH VRRC:$vA, VRRC:$vA)>;
+def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef),
+        (VMRGHW VRRC:$vA, VRRC:$vA)>;
+
+// Logical Operations
+def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+
+def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))),
+          (VNOR VRRC:$A, VRRC:$B)>;
+def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))),
+          (VANDC VRRC:$A, VRRC:$B)>;
+
+def : Pat<(fmul VRRC:$vA, VRRC:$vB),
+          (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>; 
+
+// Fused multiply add and multiply sub for packed float.  These are represented
+// separately from the real instructions above, for operations that must have
+// the additional precision, such as Newton-Rhapson (used by divide, sqrt)
+def : Pat<(PPCvmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
+          (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(PPCvnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
+          (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+
+def : Pat<(int_ppc_altivec_vmaddfp VRRC:$A, VRRC:$B, VRRC:$C),
+          (VMADDFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
+          (VNMSUBFP VRRC:$A, VRRC:$B, VRRC:$C)>;
+
+def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC),
+          (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>;
+
+// Vector shifts
+def : Pat<(v16i8 (shl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
+          (v16i8 (VSLB VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v8i16 (shl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
+          (v8i16 (VSLH VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v4i32 (shl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
+          (v4i32 (VSLW VRRC:$vA, VRRC:$vB))>;
+
+def : Pat<(v16i8 (srl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
+          (v16i8 (VSRB VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v8i16 (srl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
+          (v8i16 (VSRH VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v4i32 (srl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
+          (v4i32 (VSRW VRRC:$vA, VRRC:$vB))>;
+
+def : Pat<(v16i8 (sra (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
+          (v16i8 (VSRAB VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
+          (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
+          (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>;
diff --git a/final/lib/Target/PowerPC/PPCInstrBuilder.h b/final/lib/Target/PowerPC/PPCInstrBuilder.h
new file mode 100644
index 00000000000..b424d110141
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCInstrBuilder.h
@@ -0,0 +1,43 @@
+//===-- PPCInstrBuilder.h - Aides for building PPC insts --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to simplify generating frame and constant pool
+// references.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Dest Reg, Base Reg, and either Reg Index or Immediate
+// Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_INSTRBUILDER_H
+#define POWERPC_INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+namespace llvm {
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function.  This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder&
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0,
+                  bool mem = true) {
+  if (mem)
+    return MIB.addImm(Offset).addFrameIndex(FI);
+  else
+    return MIB.addFrameIndex(FI).addImm(Offset);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/PowerPC/PPCInstrFormats.td b/final/lib/Target/PowerPC/PPCInstrFormats.td
new file mode 100644
index 00000000000..84a15b1ca94
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCInstrFormats.td
@@ -0,0 +1,907 @@
+//===- PowerPCInstrFormats.td - PowerPC Instruction Formats --*- tablegen -*-=//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// PowerPC instruction formats
+
+class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
+        : Instruction {
+  field bits<32> Inst;
+
+  bit PPC64 = 0;  // Default value, override with isPPC64
+
+  let Namespace = "PPC";
+  let Inst{0-5} = opcode;
+  let OutOperandList = OOL;
+  let InOperandList = IOL;
+  let AsmString = asmstr;
+  let Itinerary = itin;
+
+  bits<1> PPC970_First = 0;
+  bits<1> PPC970_Single = 0;
+  bits<1> PPC970_Cracked = 0;
+  bits<3> PPC970_Unit = 0;
+
+  /// These fields correspond to the fields in PPCInstrInfo.h.  Any changes to
+  /// these must be reflected there!  See comments there for what these are.
+  let TSFlags{0}   = PPC970_First;
+  let TSFlags{1}   = PPC970_Single;
+  let TSFlags{2}   = PPC970_Cracked;
+  let TSFlags{5-3} = PPC970_Unit;
+}
+
+class PPC970_DGroup_First   { bits<1> PPC970_First = 1;  }
+class PPC970_DGroup_Single  { bits<1> PPC970_Single = 1; }
+class PPC970_DGroup_Cracked { bits<1> PPC970_Cracked = 1; }
+class PPC970_MicroCode;
+
+class PPC970_Unit_Pseudo   { bits<3> PPC970_Unit = 0;   }
+class PPC970_Unit_FXU      { bits<3> PPC970_Unit = 1;   }
+class PPC970_Unit_LSU      { bits<3> PPC970_Unit = 2;   }
+class PPC970_Unit_FPU      { bits<3> PPC970_Unit = 3;   }
+class PPC970_Unit_CRU      { bits<3> PPC970_Unit = 4;   }
+class PPC970_Unit_VALU     { bits<3> PPC970_Unit = 5;   }
+class PPC970_Unit_VPERM    { bits<3> PPC970_Unit = 6;   }
+class PPC970_Unit_BRU      { bits<3> PPC970_Unit = 7;   }
+
+
+// 1.7.1 I-Form
+class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
+            InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  let Pattern = pattern;
+  bits<24> LI;
+
+  let Inst{6-29}  = LI;
+  let Inst{30}    = aa;
+  let Inst{31}    = lk;
+}
+
+// 1.7.2 B-Form
+class BForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr>
+  : I<opcode, OOL, IOL, asmstr, BrB> {
+  bits<7> BIBO;  // 2 bits of BI and 5 bits of BO.
+  bits<3>  CR;
+  bits<14> BD;
+
+  bits<5> BI;
+  let BI{0-1} = BIBO{5-6};
+  let BI{2-4} = CR{0-2};
+
+  let Inst{6-10}  = BIBO{4-0};
+  let Inst{11-15} = BI;
+  let Inst{16-29} = BD;
+  let Inst{30}    = aa;
+  let Inst{31}    = lk;
+}
+
+
+// 1.7.4 D-Form
+class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+                 InstrItinClass itin, list<dag> pattern> 
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  A;
+  bits<5>  B;
+  bits<16> C;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = A;
+  let Inst{11-15} = B;
+  let Inst{16-31} = C;
+}
+
+class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  A;
+  bits<21> Addr;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = A;
+  let Inst{11-15} = Addr{20-16}; // Base Reg
+  let Inst{16-31} = Addr{15-0};  // Displacement
+}
+
+class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  A;
+  bits<16> C;
+  bits<5>  B;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = A;
+  let Inst{11-15} = B;
+  let Inst{16-31} = C;
+}
+
+
+class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern>;
+
+class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+                 InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  A;
+  bits<16> B;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = A;
+  let Inst{11-15} = 0;
+  let Inst{16-31} = B;
+}
+
+class DForm_4<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  B;
+  bits<5>  A;
+  bits<16> C;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = A;
+  let Inst{11-15} = B;
+  let Inst{16-31} = C;
+}
+              
+class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+                   InstrItinClass itin, list<dag> pattern>
+  : DForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+  let A = 0;
+  let Addr = 0;
+}
+
+class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<3>  BF;
+  bits<1>  L;
+  bits<5>  RA;
+  bits<16> I;
+
+  let Inst{6-8}   = BF;
+  let Inst{9}     = 0;
+  let Inst{10}    = L;
+  let Inst{11-15} = RA;
+  let Inst{16-31} = I;
+}
+
+class DForm_5_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+                  InstrItinClass itin>
+  : DForm_5<opcode, OOL, IOL, asmstr, itin> {
+  let L = PPC64;
+}
+
+class DForm_6<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin> 
+  : DForm_5<opcode, OOL, IOL, asmstr, itin>;
+
+class DForm_6_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+                  InstrItinClass itin>
+  : DForm_6<opcode, OOL, IOL, asmstr, itin> {
+  let L = PPC64;
+}
+
+
+// 1.7.5 DS-Form
+class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  RST;
+  bits<19> DS_RA;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = RST;
+  let Inst{11-15} = DS_RA{18-14};  // Register #
+  let Inst{16-29} = DS_RA{13-0};   // Displacement.
+  let Inst{30-31} = xo;
+}
+
+class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+   bits<5>  RST;
+   bits<14> DS;
+   bits<5>  RA;
+ 
+   let Pattern = pattern;
+   
+   let Inst{6-10}  = RST;
+   let Inst{11-15} = RA;
+   let Inst{16-29} = DS;
+   let Inst{30-31} = xo;
+}
+
+// 1.7.6 X-Form
+class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
+                      InstrItinClass itin, list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> RST;
+  bits<5> A;
+  bits<5> B;
+
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = RST;
+  let Inst{11-15} = A;
+  let Inst{16-20} = B;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
+// This is the same as XForm_base_r3xo, but the first two operands are swapped
+// when code is emitted.
+class XForm_base_r3xo_swapped
+        <bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+        InstrItinClass itin> 
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> A;
+  bits<5> RST;
+  bits<5> B;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = RST;
+  let Inst{11-15} = A;
+  let Inst{16-20} = B;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
+
+class XForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern> 
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern> 
+  : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+  let Pattern = pattern;
+}
+
+class XForm_8<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern> 
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern>;
+
+class XForm_10<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern> 
+  : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+    let Pattern = pattern;
+}
+
+class XForm_11<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern> 
+  : XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
+  let B = 0;
+  let Pattern = pattern;
+}
+
+class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<3> BF;
+  bits<1> L; 
+  bits<5> RA;
+  bits<5> RB;
+  
+  let Inst{6-8}   = BF;
+  let Inst{9}     = 0;
+  let Inst{10}    = L;
+  let Inst{11-15} = RA;
+  let Inst{16-20} = RB;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XForm_16_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                   InstrItinClass itin>
+  : XForm_16<opcode, xo, OOL, IOL, asmstr, itin> {
+  let L = PPC64;
+}
+
+class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<3> BF;
+  bits<5> FRA;
+  bits<5> FRB;
+  
+  let Inst{6-8}   = BF;
+  let Inst{9-10}  = 0;
+  let Inst{11-15} = FRA;
+  let Inst{16-20} = FRB;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern> 
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  let Pattern = pattern;
+  let Inst{6-10}  = 31;
+  let Inst{11-15} = 0;
+  let Inst{16-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XForm_24_sync<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+               string asmstr, InstrItinClass itin, list<dag> pattern> 
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  let Pattern = pattern;
+  let Inst{6-10}  = 0;
+  let Inst{11-15} = 0;
+  let Inst{16-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XForm_25<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern> 
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let A = 0;
+}
+
+class XForm_28<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern> 
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// This is used for MFFS, MTFSB0, MTFSB1.  42 is arbitrary; this series of
+// numbers presumably relates to some document, but I haven't found it.
+class XForm_42<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = RST;
+  let Inst{11-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let Pattern = pattern;
+  bits<5> FM;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = FM;
+  let Inst{11-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
+// DCB_Form - Form X instruction, used for dcb* instructions.
+class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr, 
+                      InstrItinClass itin, list<dag> pattern>
+  : I<31, OOL, IOL, asmstr, itin> {
+  bits<5> A;
+  bits<5> B;
+
+  let Pattern = pattern;
+
+  let Inst{6-10}  = immfield;
+  let Inst{11-15} = A;
+  let Inst{16-20} = B;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+
+// DSS_Form - Form X instruction, used for altivec dss* instructions.
+class DSS_Form<bits<10> xo, dag OOL, dag IOL, string asmstr, 
+                      InstrItinClass itin, list<dag> pattern>
+  : I<31, OOL, IOL, asmstr, itin> {
+  bits<1> T;
+  bits<2> STRM;
+  bits<5> A;
+  bits<5> B;
+
+  let Pattern = pattern;
+
+  let Inst{6}     = T;
+  let Inst{7-8}   = 0;
+  let Inst{9-10}  = STRM;
+  let Inst{11-15} = A;
+  let Inst{16-20} = B;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+// 1.7.7 XL-Form
+class XLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> CRD;
+  bits<5> CRA;
+  bits<5> CRB;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = CRD;
+  let Inst{11-15} = CRA;
+  let Inst{16-20} = CRB;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XLForm_1_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> CRD;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = CRD;
+  let Inst{11-15} = CRD;
+  let Inst{16-20} = CRD;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XLForm_2<bits<6> opcode, bits<10> xo, bit lk, dag OOL, dag IOL, string asmstr, 
+               InstrItinClass itin, list<dag> pattern>
+    : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> BO;
+  bits<5> BI;
+  bits<2> BH;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = BO;
+  let Inst{11-15} = BI;
+  let Inst{16-18} = 0;
+  let Inst{19-20} = BH;
+  let Inst{21-30} = xo;
+  let Inst{31}    = lk;
+}
+
+class XLForm_2_br<bits<6> opcode, bits<10> xo, bit lk,
+                  dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+  : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+  bits<7> BIBO;  // 2 bits of BI and 5 bits of BO.
+  bits<3>  CR;
+  
+  let BO = BIBO{2-6};
+  let BI{0-1} = BIBO{0-1};
+  let BI{2-4} = CR;
+  let BH = 0;
+}
+
+
+class XLForm_2_ext<bits<6> opcode, bits<10> xo, bits<5> bo,  bits<5> bi, bit lk,
+                  dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+  : XLForm_2<opcode, xo, lk, OOL, IOL, asmstr, itin, pattern> {
+  let BO = bo;
+  let BI = bi;
+  let BH = 0;
+}
+
+class XLForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<3> BF;
+  bits<3> BFA;
+  
+  let Inst{6-8}   = BF;
+  let Inst{9-10}  = 0;
+  let Inst{11-13} = BFA;
+  let Inst{14-15} = 0;
+  let Inst{16-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+// 1.7.8 XFX-Form
+class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  RT;
+  bits<10> SPR;
+
+  let Inst{6-10}  = RT;
+  let Inst{11}    = SPR{4};
+  let Inst{12}    = SPR{3};
+  let Inst{13}    = SPR{2};
+  let Inst{14}    = SPR{1};
+  let Inst{15}    = SPR{0};
+  let Inst{16}    = SPR{9};
+  let Inst{17}    = SPR{8};
+  let Inst{18}    = SPR{7};
+  let Inst{19}    = SPR{6};
+  let Inst{20}    = SPR{5};
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XFXForm_1_ext<bits<6> opcode, bits<10> xo, bits<10> spr, 
+                   dag OOL, dag IOL, string asmstr, InstrItinClass itin> 
+  : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin> {
+  let SPR = spr;
+}
+
+class XFXForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  RT;
+   
+  let Inst{6-10}  = RT;
+  let Inst{11-20} = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin> 
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<8>  FXM;
+  bits<5>  ST;
+   
+  let Inst{6-10}  = ST;
+  let Inst{11}    = 0;
+  let Inst{12-19} = FXM;
+  let Inst{20}    = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XFXForm_5a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                 InstrItinClass itin> 
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5>  ST;
+  bits<8>  FXM;
+   
+  let Inst{6-10}  = ST;
+  let Inst{11}    = 1;
+  let Inst{12-19} = FXM;
+  let Inst{20}    = 0;
+  let Inst{21-30} = xo;
+  let Inst{31}    = 0;
+}
+
+class XFXForm_7<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin>
+  : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin>;
+
+class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr, 
+                    dag OOL, dag IOL, string asmstr, InstrItinClass itin> 
+  : XFXForm_7<opcode, xo, OOL, IOL, asmstr, itin> {
+  let SPR = spr;
+}
+
+// XFL-Form - MTFSF
+// This is probably 1.7.9, but I don't have the reference that uses this
+// numbering scheme...
+class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, 
+                      string cstr, InstrItinClass itin, list<dag>pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<8> FM;
+  bits<5> RT;
+
+  bit RC = 0;    // set by isDOT
+  let Pattern = pattern;
+  let Constraints = cstr;
+
+  let Inst{6} = 0;
+  let Inst{7-14}  = FM;
+  let Inst{15} = 0;
+  let Inst{16-20} = RT;
+  let Inst{21-30} = xo;
+  let Inst{31}    = RC;
+}
+
+// 1.7.10 XS-Form - SRADI.
+class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> A;
+  bits<5> RS;
+  bits<6> SH;
+
+  bit RC = 0;    // set by isDOT
+  let Pattern = pattern;
+
+  let Inst{6-10}  = RS;
+  let Inst{11-15} = A;
+  let Inst{16-20} = SH{4,3,2,1,0};
+  let Inst{21-29} = xo;
+  let Inst{30}    = SH{5};
+  let Inst{31}    = RC;
+}
+
+// 1.7.11 XO-Form
+class XOForm_1<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> RT;
+  bits<5> RA;
+  bits<5> RB;
+
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = RT;
+  let Inst{11-15} = RA;
+  let Inst{16-20} = RB;
+  let Inst{21}    = oe;
+  let Inst{22-30} = xo;
+  let Inst{31}    = RC;  
+}
+
+class XOForm_3<bits<6> opcode, bits<9> xo, bit oe, 
+               dag OOL, dag IOL, string asmstr, InstrItinClass itin, list<dag> pattern>
+  : XOForm_1<opcode, xo, oe, OOL, IOL, asmstr, itin, pattern> {
+  let RB = 0;
+}
+
+// 1.7.12 A-Form
+class AForm_1<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr, 
+              InstrItinClass itin, list<dag> pattern>
+         : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> FRT;
+  bits<5> FRA;
+  bits<5> FRC;
+  bits<5> FRB;
+
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = FRT;
+  let Inst{11-15} = FRA;
+  let Inst{16-20} = FRB;
+  let Inst{21-25} = FRC;
+  let Inst{26-30} = xo;
+  let Inst{31}    = RC;
+}
+
+class AForm_2<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let FRC = 0;
+}
+
+class AForm_3<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern> 
+  : AForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let FRB = 0;
+}
+
+// 1.7.13 M-Form
+class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+    : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> RA;
+  bits<5> RS;
+  bits<5> RB;
+  bits<5> MB;
+  bits<5> ME;
+
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = RS;
+  let Inst{11-15} = RA;
+  let Inst{16-20} = RB;
+  let Inst{21-25} = MB;
+  let Inst{26-30} = ME;
+  let Inst{31}    = RC;
+}
+
+class MForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : MForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+}
+
+// 1.7.14 MD-Form
+class MDForm_1<bits<6> opcode, bits<3> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<5> RA;
+  bits<5> RS;
+  bits<6> SH;
+  bits<6> MBE;
+
+  let Pattern = pattern;
+
+  bit RC = 0;    // set by isDOT
+
+  let Inst{6-10}  = RS;
+  let Inst{11-15} = RA;
+  let Inst{16-20} = SH{4,3,2,1,0};
+  let Inst{21-26} = MBE{4,3,2,1,0,5};
+  let Inst{27-29} = xo;
+  let Inst{30}    = SH{5};
+  let Inst{31}    = RC;
+}
+
+
+
+// E-1 VA-Form
+
+// VAForm_1 - DACB ordering.
+class VAForm_1<bits<6> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VA;
+  bits<5> VC;
+  bits<5> VB;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = VA;
+  let Inst{16-20} = VB;
+  let Inst{21-25} = VC;
+  let Inst{26-31} = xo;
+}
+
+// VAForm_1a - DABC ordering.
+class VAForm_1a<bits<6> xo, dag OOL, dag IOL, string asmstr,
+                InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VA;
+  bits<5> VB;
+  bits<5> VC;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = VA;
+  let Inst{16-20} = VB;
+  let Inst{21-25} = VC;
+  let Inst{26-31} = xo;
+}
+
+class VAForm_2<bits<6> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VA;
+  bits<5> VB;
+  bits<4> SH;
+
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = VA;
+  let Inst{16-20} = VB;
+  let Inst{21}    = 0;
+  let Inst{22-25} = SH;
+  let Inst{26-31} = xo;
+}
+
+// E-2 VX-Form
+class VXForm_1<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VA;
+  bits<5> VB;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = VA;
+  let Inst{16-20} = VB;
+  let Inst{21-31} = xo;
+}
+
+class VXForm_setzero<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : VXForm_1<xo, OOL, IOL, asmstr, itin, pattern> {
+  let VA = VD;
+  let VB = VD;
+}
+
+
+class VXForm_2<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VB;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = 0;
+  let Inst{16-20} = VB;
+  let Inst{21-31} = xo;
+}
+
+class VXForm_3<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> IMM;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = IMM;
+  let Inst{16-20} = 0;
+  let Inst{21-31} = xo;
+}
+
+/// VXForm_4 - VX instructions with "VD,0,0" register fields, like mfvscr.
+class VXForm_4<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = 0;
+  let Inst{16-20} = 0;
+  let Inst{21-31} = xo;
+}
+
+/// VXForm_5 - VX instructions with "0,0,VB" register fields, like mtvscr.
+class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VB;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = 0;
+  let Inst{11-15} = 0;
+  let Inst{16-20} = VB;
+  let Inst{21-31} = xo;
+}
+
+// E-4 VXR-Form
+class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : I<4, OOL, IOL, asmstr, itin> {
+  bits<5> VD;
+  bits<5> VA;
+  bits<5> VB;
+  bit RC = 0;
+  
+  let Pattern = pattern;
+  
+  let Inst{6-10}  = VD;
+  let Inst{11-15} = VA;
+  let Inst{16-20} = VB;
+  let Inst{21}    = RC;
+  let Inst{22-31} = xo;
+}
+
+//===----------------------------------------------------------------------===//
+class Pseudo<dag OOL, dag IOL, string asmstr, list<dag> pattern>
+    : I<0, OOL, IOL, asmstr, NoItinerary> {
+  let PPC64 = 0;
+  let Pattern = pattern;
+  let Inst{31-0} = 0;
+}
diff --git a/final/lib/Target/PowerPC/PPCInstrInfo.cpp b/final/lib/Target/PowerPC/PPCInstrInfo.cpp
new file mode 100644
index 00000000000..53b049135e2
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -0,0 +1,651 @@
+//===- PPCInstrInfo.cpp - PowerPC32 Instruction Information -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCInstrInfo.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCPredicates.h"
+#include "PPCGenInstrInfo.inc"
+#include "PPCTargetMachine.h"
+#include "PPCHazardRecognizers.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+extern cl::opt<bool> EnablePPC32RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
+extern cl::opt<bool> EnablePPC64RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
+}
+
+using namespace llvm;
+
+PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
+  : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm),
+    RI(*TM.getSubtargetImpl(), *this) {}
+
+/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
+/// this target when scheduling the DAG.
+ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
+  const TargetMachine *TM,
+  const ScheduleDAG *DAG) const {
+  // Should use subtarget info to pick the right hazard recognizer.  For
+  // now, always return a PPC970 recognizer.
+  const TargetInstrInfo *TII = TM->getInstrInfo();
+  assert(TII && "No InstrInfo?");
+  return new PPCHazardRecognizer970(*TII);
+}
+
+unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                           int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case PPC::LD:
+  case PPC::LWZ:
+  case PPC::LFS:
+  case PPC::LFD:
+    if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+        MI->getOperand(2).isFI()) {
+      FrameIndex = MI->getOperand(2).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                          int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case PPC::STD:
+  case PPC::STW:
+  case PPC::STFS:
+  case PPC::STFD:
+    if (MI->getOperand(1).isImm() && !MI->getOperand(1).getImm() &&
+        MI->getOperand(2).isFI()) {
+      FrameIndex = MI->getOperand(2).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+// commuteInstruction - We can commute rlwimi instructions, but only if the
+// rotate amt is zero.  We also have to munge the immediates a bit.
+MachineInstr *
+PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+  MachineFunction &MF = *MI->getParent()->getParent();
+
+  // Normal instructions can be commuted the obvious way.
+  if (MI->getOpcode() != PPC::RLWIMI)
+    return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+
+  // Cannot commute if it has a non-zero rotate count.
+  if (MI->getOperand(3).getImm() != 0)
+    return 0;
+
+  // If we have a zero rotate count, we have:
+  //   M = mask(MB,ME)
+  //   Op0 = (Op1 & ~M) | (Op2 & M)
+  // Change this to:
+  //   M = mask((ME+1)&31, (MB-1)&31)
+  //   Op0 = (Op2 & ~M) | (Op1 & M)
+
+  // Swap op1/op2
+  unsigned Reg0 = MI->getOperand(0).getReg();
+  unsigned Reg1 = MI->getOperand(1).getReg();
+  unsigned Reg2 = MI->getOperand(2).getReg();
+  bool Reg1IsKill = MI->getOperand(1).isKill();
+  bool Reg2IsKill = MI->getOperand(2).isKill();
+  bool ChangeReg0 = false;
+  // If machine instrs are no longer in two-address forms, update
+  // destination register as well.
+  if (Reg0 == Reg1) {
+    // Must be two address instruction!
+    assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
+           "Expecting a two-address instruction!");
+    Reg2IsKill = false;
+    ChangeReg0 = true;
+  }
+
+  // Masks.
+  unsigned MB = MI->getOperand(4).getImm();
+  unsigned ME = MI->getOperand(5).getImm();
+
+  if (NewMI) {
+    // Create a new instruction.
+    unsigned Reg0 = ChangeReg0 ? Reg2 : MI->getOperand(0).getReg();
+    bool Reg0IsDead = MI->getOperand(0).isDead();
+    return BuildMI(MF, MI->getDebugLoc(), MI->getDesc())
+      .addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))
+      .addReg(Reg2, getKillRegState(Reg2IsKill))
+      .addReg(Reg1, getKillRegState(Reg1IsKill))
+      .addImm((ME+1) & 31)
+      .addImm((MB-1) & 31);
+  }
+
+  if (ChangeReg0)
+    MI->getOperand(0).setReg(Reg2);
+  MI->getOperand(2).setReg(Reg1);
+  MI->getOperand(1).setReg(Reg2);
+  MI->getOperand(2).setIsKill(Reg1IsKill);
+  MI->getOperand(1).setIsKill(Reg2IsKill);
+
+  // Swap the mask around.
+  MI->getOperand(4).setImm((ME+1) & 31);
+  MI->getOperand(5).setImm((MB-1) & 31);
+  return MI;
+}
+
+void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator MI) const {
+  DebugLoc DL;
+  BuildMI(MBB, MI, DL, get(PPC::NOP));
+}
+
+
+// Branch analysis.
+bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                 MachineBasicBlock *&FBB,
+                                 SmallVectorImpl<MachineOperand> &Cond,
+                                 bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+
+  // If there is only one terminator instruction, process it.
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (LastInst->getOpcode() == PPC::B) {
+      if (!LastInst->getOperand(0).isMBB())
+        return true;
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    } else if (LastInst->getOpcode() == PPC::BCC) {
+      if (!LastInst->getOperand(2).isMBB())
+        return true;
+      // Block ends with fall-through condbranch.
+      TBB = LastInst->getOperand(2).getMBB();
+      Cond.push_back(LastInst->getOperand(0));
+      Cond.push_back(LastInst->getOperand(1));
+      return false;
+    }
+    // Otherwise, don't know what this is.
+    return true;
+  }
+
+  // Get the instruction before it if it's a terminator.
+  MachineInstr *SecondLastInst = I;
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() &&
+      isUnpredicatedTerminator(--I))
+    return true;
+
+  // If the block ends with PPC::B and PPC:BCC, handle it.
+  if (SecondLastInst->getOpcode() == PPC::BCC &&
+      LastInst->getOpcode() == PPC::B) {
+    if (!SecondLastInst->getOperand(2).isMBB() ||
+        !LastInst->getOperand(0).isMBB())
+      return true;
+    TBB =  SecondLastInst->getOperand(2).getMBB();
+    Cond.push_back(SecondLastInst->getOperand(0));
+    Cond.push_back(SecondLastInst->getOperand(1));
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+
+  // If the block ends with two PPC:Bs, handle it.  The second one is not
+  // executed, so remove it.
+  if (SecondLastInst->getOpcode() == PPC::B &&
+      LastInst->getOpcode() == PPC::B) {
+    if (!SecondLastInst->getOperand(0).isMBB())
+      return true;
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+  if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC)
+    return 0;
+
+  // Remove the branch.
+  I->eraseFromParent();
+
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (I->getOpcode() != PPC::BCC)
+    return 1;
+
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+unsigned
+PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                           MachineBasicBlock *FBB,
+                           const SmallVectorImpl<MachineOperand> &Cond,
+                           DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 2 || Cond.size() == 0) &&
+         "PPC branch conditions have two components!");
+
+  // One-way branch.
+  if (FBB == 0) {
+    if (Cond.empty())   // Unconditional branch
+      BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
+    else                // Conditional branch
+      BuildMI(&MBB, DL, get(PPC::BCC))
+        .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+    return 1;
+  }
+
+  // Two-way Conditional Branch.
+  BuildMI(&MBB, DL, get(PPC::BCC))
+    .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
+  BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
+  return 2;
+}
+
+void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator I, DebugLoc DL,
+                               unsigned DestReg, unsigned SrcReg,
+                               bool KillSrc) const {
+  unsigned Opc;
+  if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::OR;
+  else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::OR8;
+  else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::FMR;
+  else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::MCRF;
+  else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::VOR;
+  else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
+    Opc = PPC::CROR;
+  else
+    llvm_unreachable("Impossible reg-to-reg copy");
+
+  const TargetInstrDesc &TID = get(Opc);
+  if (TID.getNumOperands() == 3)
+    BuildMI(MBB, I, DL, TID, DestReg)
+      .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
+  else
+    BuildMI(MBB, I, DL, TID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+bool
+PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
+                                  unsigned SrcReg, bool isKill,
+                                  int FrameIdx,
+                                  const TargetRegisterClass *RC,
+                                  SmallVectorImpl<MachineInstr*> &NewMIs) const{
+  DebugLoc DL;
+  if (RC == PPC::GPRCRegisterClass) {
+    if (SrcReg != PPC::LR) {
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+                                         .addReg(SrcReg,
+                                                 getKillRegState(isKill)),
+                                         FrameIdx));
+    } else {
+      // FIXME: this spills LR immediately to memory in one step.  To do this,
+      // we use R11, which we know cannot be used in the prolog/epilog.  This is
+      // a hack.
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR), PPC::R11));
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+                                         .addReg(PPC::R11,
+                                                 getKillRegState(isKill)),
+                                         FrameIdx));
+    }
+  } else if (RC == PPC::G8RCRegisterClass) {
+    if (SrcReg != PPC::LR8) {
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+                                         .addReg(SrcReg,
+                                                 getKillRegState(isKill)),
+                                         FrameIdx));
+    } else {
+      // FIXME: this spills LR immediately to memory in one step.  To do this,
+      // we use R11, which we know cannot be used in the prolog/epilog.  This is
+      // a hack.
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFLR8), PPC::X11));
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STD))
+                                         .addReg(PPC::X11,
+                                                 getKillRegState(isKill)),
+                                         FrameIdx));
+    }
+  } else if (RC == PPC::F8RCRegisterClass) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFD))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+  } else if (RC == PPC::F4RCRegisterClass) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STFS))
+                                       .addReg(SrcReg,
+                                               getKillRegState(isKill)),
+                                       FrameIdx));
+  } else if (RC == PPC::CRRCRegisterClass) {
+    if ((EnablePPC32RS && !TM.getSubtargetImpl()->isPPC64()) ||
+        (EnablePPC64RS && TM.getSubtargetImpl()->isPPC64())) {
+      // FIXME (64-bit): Enable
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_CR))
+                                         .addReg(SrcReg,
+                                                 getKillRegState(isKill)),
+                                         FrameIdx));
+      return true;
+    } else {
+      // FIXME: We need a scatch reg here.  The trouble with using R0 is that
+      // it's possible for the stack frame to be so big the save location is
+      // out of range of immediate offsets, necessitating another register.
+      // We hack this on Darwin by reserving R2.  It's probably broken on Linux
+      // at the moment.
+
+      // We need to store the CR in the low 4-bits of the saved value.  First,
+      // issue a MFCR to save all of the CRBits.
+      unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+                                                           PPC::R2 : PPC::R0;
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg)
+                               .addReg(SrcReg, getKillRegState(isKill)));
+
+      // If the saved register wasn't CR0, shift the bits left so that they are
+      // in CR0's slot.
+      if (SrcReg != PPC::CR0) {
+        unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(SrcReg)*4;
+        // rlwinm scratch, scratch, ShiftBits, 0, 31.
+        NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+                       .addReg(ScratchReg).addImm(ShiftBits)
+                       .addImm(0).addImm(31));
+      }
+
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STW))
+                                         .addReg(ScratchReg,
+                                                 getKillRegState(isKill)),
+                                         FrameIdx));
+    }
+  } else if (RC == PPC::CRBITRCRegisterClass) {
+    // FIXME: We use CRi here because there is no mtcrf on a bit. Since the
+    // backend currently only uses CR1EQ as an individual bit, this should
+    // not cause any bug. If we need other uses of CR bits, the following
+    // code may be invalid.
+    unsigned Reg = 0;
+    if (SrcReg == PPC::CR0LT || SrcReg == PPC::CR0GT ||
+        SrcReg == PPC::CR0EQ || SrcReg == PPC::CR0UN)
+      Reg = PPC::CR0;
+    else if (SrcReg == PPC::CR1LT || SrcReg == PPC::CR1GT ||
+             SrcReg == PPC::CR1EQ || SrcReg == PPC::CR1UN)
+      Reg = PPC::CR1;
+    else if (SrcReg == PPC::CR2LT || SrcReg == PPC::CR2GT ||
+             SrcReg == PPC::CR2EQ || SrcReg == PPC::CR2UN)
+      Reg = PPC::CR2;
+    else if (SrcReg == PPC::CR3LT || SrcReg == PPC::CR3GT ||
+             SrcReg == PPC::CR3EQ || SrcReg == PPC::CR3UN)
+      Reg = PPC::CR3;
+    else if (SrcReg == PPC::CR4LT || SrcReg == PPC::CR4GT ||
+             SrcReg == PPC::CR4EQ || SrcReg == PPC::CR4UN)
+      Reg = PPC::CR4;
+    else if (SrcReg == PPC::CR5LT || SrcReg == PPC::CR5GT ||
+             SrcReg == PPC::CR5EQ || SrcReg == PPC::CR5UN)
+      Reg = PPC::CR5;
+    else if (SrcReg == PPC::CR6LT || SrcReg == PPC::CR6GT ||
+             SrcReg == PPC::CR6EQ || SrcReg == PPC::CR6UN)
+      Reg = PPC::CR6;
+    else if (SrcReg == PPC::CR7LT || SrcReg == PPC::CR7GT ||
+             SrcReg == PPC::CR7EQ || SrcReg == PPC::CR7UN)
+      Reg = PPC::CR7;
+
+    return StoreRegToStackSlot(MF, Reg, isKill, FrameIdx,
+                               PPC::CRRCRegisterClass, NewMIs);
+
+  } else if (RC == PPC::VRRCRegisterClass) {
+    // We don't have indexed addressing for vector loads.  Emit:
+    // R0 = ADDI FI#
+    // STVX VAL, 0, R0
+    //
+    // FIXME: We use R0 here, because it isn't available for RA.
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+                                       FrameIdx, 0, 0));
+    NewMIs.push_back(BuildMI(MF, DL, get(PPC::STVX))
+                     .addReg(SrcReg, getKillRegState(isKill))
+                     .addReg(PPC::R0)
+                     .addReg(PPC::R0));
+  } else {
+    llvm_unreachable("Unknown regclass!");
+  }
+
+  return false;
+}
+
+void
+PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator MI,
+                                  unsigned SrcReg, bool isKill, int FrameIdx,
+                                  const TargetRegisterClass *RC,
+                                  const TargetRegisterInfo *TRI) const {
+  MachineFunction &MF = *MBB.getParent();
+  SmallVector<MachineInstr*, 4> NewMIs;
+
+  if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs)) {
+    PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+    FuncInfo->setSpillsCR();
+  }
+
+  for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+    MBB.insert(MI, NewMIs[i]);
+
+  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(
+                MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+                            MachineMemOperand::MOStore,
+                            MFI.getObjectSize(FrameIdx),
+                            MFI.getObjectAlignment(FrameIdx));
+  NewMIs.back()->addMemOperand(MF, MMO);
+}
+
+void
+PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+                                   unsigned DestReg, int FrameIdx,
+                                   const TargetRegisterClass *RC,
+                                   SmallVectorImpl<MachineInstr*> &NewMIs)const{
+  if (RC == PPC::GPRCRegisterClass) {
+    if (DestReg != PPC::LR) {
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+                                                 DestReg), FrameIdx));
+    } else {
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+                                                 PPC::R11), FrameIdx));
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR)).addReg(PPC::R11));
+    }
+  } else if (RC == PPC::G8RCRegisterClass) {
+    if (DestReg != PPC::LR8) {
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD), DestReg),
+                                         FrameIdx));
+    } else {
+      NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LD),
+                                                 PPC::R11), FrameIdx));
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTLR8)).addReg(PPC::R11));
+    }
+  } else if (RC == PPC::F8RCRegisterClass) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFD), DestReg),
+                                       FrameIdx));
+  } else if (RC == PPC::F4RCRegisterClass) {
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LFS), DestReg),
+                                       FrameIdx));
+  } else if (RC == PPC::CRRCRegisterClass) {
+    // FIXME: We need a scatch reg here.  The trouble with using R0 is that
+    // it's possible for the stack frame to be so big the save location is
+    // out of range of immediate offsets, necessitating another register.
+    // We hack this on Darwin by reserving R2.  It's probably broken on Linux
+    // at the moment.
+    unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
+                                                          PPC::R2 : PPC::R0;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LWZ),
+                                       ScratchReg), FrameIdx));
+
+    // If the reloaded register isn't CR0, shift the bits right so that they are
+    // in the right CR's slot.
+    if (DestReg != PPC::CR0) {
+      unsigned ShiftBits = PPCRegisterInfo::getRegisterNumbering(DestReg)*4;
+      // rlwinm r11, r11, 32-ShiftBits, 0, 31.
+      NewMIs.push_back(BuildMI(MF, DL, get(PPC::RLWINM), ScratchReg)
+                    .addReg(ScratchReg).addImm(32-ShiftBits).addImm(0)
+                    .addImm(31));
+    }
+
+    NewMIs.push_back(BuildMI(MF, DL, get(PPC::MTCRF), DestReg)
+                     .addReg(ScratchReg));
+  } else if (RC == PPC::CRBITRCRegisterClass) {
+
+    unsigned Reg = 0;
+    if (DestReg == PPC::CR0LT || DestReg == PPC::CR0GT ||
+        DestReg == PPC::CR0EQ || DestReg == PPC::CR0UN)
+      Reg = PPC::CR0;
+    else if (DestReg == PPC::CR1LT || DestReg == PPC::CR1GT ||
+             DestReg == PPC::CR1EQ || DestReg == PPC::CR1UN)
+      Reg = PPC::CR1;
+    else if (DestReg == PPC::CR2LT || DestReg == PPC::CR2GT ||
+             DestReg == PPC::CR2EQ || DestReg == PPC::CR2UN)
+      Reg = PPC::CR2;
+    else if (DestReg == PPC::CR3LT || DestReg == PPC::CR3GT ||
+             DestReg == PPC::CR3EQ || DestReg == PPC::CR3UN)
+      Reg = PPC::CR3;
+    else if (DestReg == PPC::CR4LT || DestReg == PPC::CR4GT ||
+             DestReg == PPC::CR4EQ || DestReg == PPC::CR4UN)
+      Reg = PPC::CR4;
+    else if (DestReg == PPC::CR5LT || DestReg == PPC::CR5GT ||
+             DestReg == PPC::CR5EQ || DestReg == PPC::CR5UN)
+      Reg = PPC::CR5;
+    else if (DestReg == PPC::CR6LT || DestReg == PPC::CR6GT ||
+             DestReg == PPC::CR6EQ || DestReg == PPC::CR6UN)
+      Reg = PPC::CR6;
+    else if (DestReg == PPC::CR7LT || DestReg == PPC::CR7GT ||
+             DestReg == PPC::CR7EQ || DestReg == PPC::CR7UN)
+      Reg = PPC::CR7;
+
+    return LoadRegFromStackSlot(MF, DL, Reg, FrameIdx,
+                                PPC::CRRCRegisterClass, NewMIs);
+
+  } else if (RC == PPC::VRRCRegisterClass) {
+    // We don't have indexed addressing for vector loads.  Emit:
+    // R0 = ADDI FI#
+    // Dest = LVX 0, R0
+    //
+    // FIXME: We use R0 here, because it isn't available for RA.
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::ADDI), PPC::R0),
+                                       FrameIdx, 0, 0));
+    NewMIs.push_back(BuildMI(MF, DL, get(PPC::LVX),DestReg).addReg(PPC::R0)
+                     .addReg(PPC::R0));
+  } else {
+    llvm_unreachable("Unknown regclass!");
+  }
+}
+
+void
+PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   unsigned DestReg, int FrameIdx,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const {
+  MachineFunction &MF = *MBB.getParent();
+  SmallVector<MachineInstr*, 4> NewMIs;
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+  LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);
+  for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
+    MBB.insert(MI, NewMIs[i]);
+
+  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(
+                MachinePointerInfo(PseudoSourceValue::getFixedStack(FrameIdx)),
+                            MachineMemOperand::MOLoad,
+                            MFI.getObjectSize(FrameIdx),
+                            MFI.getObjectAlignment(FrameIdx));
+  NewMIs.back()->addMemOperand(MF, MMO);
+}
+
+MachineInstr*
+PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                       int FrameIx, uint64_t Offset,
+                                       const MDNode *MDPtr,
+                                       DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(PPC::DBG_VALUE));
+  addFrameReference(MIB, FrameIx, 0, false).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
+bool PPCInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
+  // Leave the CR# the same, but invert the condition.
+  Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));
+  return false;
+}
+
+/// GetInstSize - Return the number of bytes of code the specified
+/// instruction may be.  This returns the maximum number of bytes.
+///
+unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+  switch (MI->getOpcode()) {
+  case PPC::INLINEASM: {       // Inline Asm: Variable size.
+    const MachineFunction *MF = MI->getParent()->getParent();
+    const char *AsmStr = MI->getOperand(0).getSymbolName();
+    return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+  }
+  case PPC::PROLOG_LABEL:
+  case PPC::EH_LABEL:
+  case PPC::GC_LABEL:
+  case PPC::DBG_VALUE:
+    return 0;
+  default:
+    return 4; // PowerPC instructions are all 4 bytes
+  }
+}
diff --git a/final/lib/Target/PowerPC/PPCInstrInfo.h b/final/lib/Target/PowerPC/PPCInstrInfo.h
new file mode 100644
index 00000000000..b5249ae0376
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCInstrInfo.h
@@ -0,0 +1,146 @@
+//===- PPCInstrInfo.h - PowerPC Instruction Information ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC32_INSTRUCTIONINFO_H
+#define POWERPC32_INSTRUCTIONINFO_H
+
+#include "PPC.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "PPCRegisterInfo.h"
+
+namespace llvm {
+
+/// PPCII - This namespace holds all of the PowerPC target-specific
+/// per-instruction flags.  These must match the corresponding definitions in
+/// PPC.td and PPCInstrFormats.td.
+namespace PPCII {
+enum {
+  // PPC970 Instruction Flags.  These flags describe the characteristics of the
+  // PowerPC 970 (aka G5) dispatch groups and how they are formed out of
+  // raw machine instructions.
+
+  /// PPC970_First - This instruction starts a new dispatch group, so it will
+  /// always be the first one in the group.
+  PPC970_First = 0x1,
+
+  /// PPC970_Single - This instruction starts a new dispatch group and
+  /// terminates it, so it will be the sole instruction in the group.
+  PPC970_Single = 0x2,
+
+  /// PPC970_Cracked - This instruction is cracked into two pieces, requiring
+  /// two dispatch pipes to be available to issue.
+  PPC970_Cracked = 0x4,
+
+  /// PPC970_Mask/Shift - This is a bitmask that selects the pipeline type that
+  /// an instruction is issued to.
+  PPC970_Shift = 3,
+  PPC970_Mask = 0x07 << PPC970_Shift
+};
+enum PPC970_Unit {
+  /// These are the various PPC970 execution unit pipelines.  Each instruction
+  /// is one of these.
+  PPC970_Pseudo = 0 << PPC970_Shift,   // Pseudo instruction
+  PPC970_FXU    = 1 << PPC970_Shift,   // Fixed Point (aka Integer/ALU) Unit
+  PPC970_LSU    = 2 << PPC970_Shift,   // Load Store Unit
+  PPC970_FPU    = 3 << PPC970_Shift,   // Floating Point Unit
+  PPC970_CRU    = 4 << PPC970_Shift,   // Control Register Unit
+  PPC970_VALU   = 5 << PPC970_Shift,   // Vector ALU
+  PPC970_VPERM  = 6 << PPC970_Shift,   // Vector Permute Unit
+  PPC970_BRU    = 7 << PPC970_Shift    // Branch Unit
+};
+} // end namespace PPCII
+
+
+class PPCInstrInfo : public TargetInstrInfoImpl {
+  PPCTargetMachine &TM;
+  const PPCRegisterInfo RI;
+
+  bool StoreRegToStackSlot(MachineFunction &MF,
+                           unsigned SrcReg, bool isKill, int FrameIdx,
+                           const TargetRegisterClass *RC,
+                           SmallVectorImpl<MachineInstr*> &NewMIs) const;
+  void LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL,
+                            unsigned DestReg, int FrameIdx,
+                            const TargetRegisterClass *RC,
+                            SmallVectorImpl<MachineInstr*> &NewMIs) const;
+public:
+  explicit PPCInstrInfo(PPCTargetMachine &TM);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const PPCRegisterInfo &getRegisterInfo() const { return RI; }
+
+  ScheduleHazardRecognizer *
+  CreateTargetHazardRecognizer(const TargetMachine *TM,
+                               const ScheduleDAG *DAG) const;
+
+  unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                               int &FrameIndex) const;
+  unsigned isStoreToStackSlot(const MachineInstr *MI,
+                              int &FrameIndex) const;
+
+  // commuteInstruction - We can commute rlwimi instructions, but only if the
+  // rotate amt is zero.  We also have to munge the immediates a bit.
+  virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+
+  virtual void insertNoop(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI) const;
+
+
+  // Branch analysis.
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx,
+                                                 uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc DL) const;
+
+  virtual
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  /// GetInstSize - Return the number of bytes of code the specified
+  /// instruction may be.  This returns the maximum number of bytes.
+  ///
+  virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/PowerPC/PPCInstrInfo.td b/final/lib/Target/PowerPC/PPCInstrInfo.td
new file mode 100644
index 00000000000..82aadeb47ad
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCInstrInfo.td
@@ -0,0 +1,1477 @@
+//===- PPCInstrInfo.td - The PowerPC Instruction Set -------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the subset of the 32-bit PowerPC instruction set, as used
+// by the PowerPC instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+include "PPCInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific type constraints.
+//
+def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
+  SDTCisVT<0, f64>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_PPCCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+                                         SDTCisVT<1, i32> ]>;
+def SDT_PPCvperm   : SDTypeProfile<1, 3, [
+  SDTCisVT<3, v16i8>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>
+]>;
+
+def SDT_PPCvcmp : SDTypeProfile<1, 3, [
+  SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>
+]>;
+
+def SDT_PPCcondbr : SDTypeProfile<0, 3, [
+  SDTCisVT<0, i32>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPClbrx : SDTypeProfile<1, 2, [
+  SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+]>;
+def SDT_PPCstbrx : SDTypeProfile<0, 3, [
+  SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>
+]>;
+
+def SDT_PPClarx : SDTypeProfile<1, 1, [
+  SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+def SDT_PPCstcx : SDTypeProfile<0, 2, [
+  SDTCisInt<0>, SDTCisPtrTy<1>
+]>;
+
+def SDT_PPCTC_ret : SDTypeProfile<0, 2, [
+  SDTCisPtrTy<0>, SDTCisVT<1, i32>
+]>;
+
+def SDT_PPCnop : SDTypeProfile<0, 0, []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific DAG Nodes.
+//
+
+def PPCfcfid  : SDNode<"PPCISD::FCFID" , SDTFPUnaryOp, []>;
+def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>;
+def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>;
+def PPCstfiwx : SDNode<"PPCISD::STFIWX", SDT_PPCstfiwx,
+                       [SDNPHasChain, SDNPMayStore]>;
+
+// This sequence is used for long double->int conversions.  It changes the
+// bits in the FPSCR which is not modelled.  
+def PPCmffs   : SDNode<"PPCISD::MFFS", SDTypeProfile<1, 0, [SDTCisVT<0, f64>]>,
+                        [SDNPOutGlue]>;
+def PPCmtfsb0 : SDNode<"PPCISD::MTFSB0", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+                       [SDNPInGlue, SDNPOutGlue]>;
+def PPCmtfsb1 : SDNode<"PPCISD::MTFSB1", SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+                       [SDNPInGlue, SDNPOutGlue]>;
+def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp,
+                       [SDNPInGlue, SDNPOutGlue]>;
+def PPCmtfsf  : SDNode<"PPCISD::MTFSF", SDTypeProfile<1, 3, 
+                       [SDTCisVT<0, f64>, SDTCisInt<1>, SDTCisVT<2, f64>,
+                        SDTCisVT<3, f64>]>,
+                       [SDNPInGlue]>;
+
+def PPCfsel   : SDNode<"PPCISD::FSEL",  
+   // Type constraint for fsel.
+   SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, 
+                        SDTCisFP<0>, SDTCisVT<1, f64>]>, []>;
+
+def PPChi       : SDNode<"PPCISD::Hi", SDTIntBinOp, []>;
+def PPClo       : SDNode<"PPCISD::Lo", SDTIntBinOp, []>;
+def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
+def PPCvmaddfp  : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
+def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
+
+def PPCvperm    : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
+
+// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift
+// amounts.  These nodes are generated by the multi-precision shift code.
+def PPCsrl        : SDNode<"PPCISD::SRL"       , SDTIntShiftOp>;
+def PPCsra        : SDNode<"PPCISD::SRA"       , SDTIntShiftOp>;
+def PPCshl        : SDNode<"PPCISD::SHL"       , SDTIntShiftOp>;
+
+def PPCextsw_32   : SDNode<"PPCISD::EXTSW_32"  , SDTIntUnaryOp>;
+def PPCstd_32     : SDNode<"PPCISD::STD_32"    , SDTStore,
+                           [SDNPHasChain, SDNPMayStore]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart,
+                           [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_PPCCallSeqEnd,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_PPCCall   : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
+def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
+                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                             SDNPVariadic]>;
+def PPCcall_SVR4  : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                            SDNPVariadic]>;
+def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
+def PPCload   : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
+                       [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
+                          [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>,
+                            [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+def PPCmtctr      : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def PPCbctrl_Darwin  : SDNode<"PPCISD::BCTRL_Darwin", SDTNone,
+                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                               SDNPVariadic]>;
+
+def PPCbctrl_SVR4  : SDNode<"PPCISD::BCTRL_SVR4", SDTNone,
+                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                             SDNPVariadic]>;
+
+def retflag       : SDNode<"PPCISD::RET_FLAG", SDTNone,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
+                        [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
+
+def PPCvcmp       : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
+def PPCvcmp_o     : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
+
+def PPCcondbranch : SDNode<"PPCISD::COND_BRANCH", SDT_PPCcondbr,
+                           [SDNPHasChain, SDNPOptInGlue]>;
+
+def PPClbrx       : SDNode<"PPCISD::LBRX", SDT_PPClbrx,
+                           [SDNPHasChain, SDNPMayLoad]>;
+def PPCstbrx      : SDNode<"PPCISD::STBRX", SDT_PPCstbrx,
+                           [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to support atomic operations
+def PPClarx      : SDNode<"PPCISD::LARX", SDT_PPClarx,
+                          [SDNPHasChain, SDNPMayLoad]>;
+def PPCstcx      : SDNode<"PPCISD::STCX", SDT_PPCstcx,
+                          [SDNPHasChain, SDNPMayStore]>;
+
+// Instructions to support dynamic alloca.
+def SDTDynOp  : SDTypeProfile<1, 2, []>;
+def PPCdynalloc   : SDNode<"PPCISD::DYNALLOC", SDTDynOp, [SDNPHasChain]>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC specific transformation functions and pattern fragments.
+//
+
+def SHL32 : SDNodeXForm<imm, [{
+  // Transformation function: 31 - imm
+  return getI32Imm(31 - N->getZExtValue());
+}]>;
+
+def SRL32 : SDNodeXForm<imm, [{
+  // Transformation function: 32 - imm
+  return N->getZExtValue() ? getI32Imm(32 - N->getZExtValue()) : getI32Imm(0);
+}]>;
+
+def LO16 : SDNodeXForm<imm, [{
+  // Transformation function: get the low 16 bits.
+  return getI32Imm((unsigned short)N->getZExtValue());
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+def HA16 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  signed int Val = N->getZExtValue();
+  return getI32Imm((Val - (signed short)Val) >> 16);
+}]>;
+def MB : SDNodeXForm<imm, [{
+  // Transformation function: get the start bit of a mask
+  unsigned mb = 0, me;
+  (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+  return getI32Imm(mb);
+}]>;
+
+def ME : SDNodeXForm<imm, [{
+  // Transformation function: get the end bit of a mask
+  unsigned mb, me = 0;
+  (void)isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+  return getI32Imm(me);
+}]>;
+def maskimm32 : PatLeaf<(imm), [{
+  // maskImm predicate - True if immediate is a run of ones.
+  unsigned mb, me;
+  if (N->getValueType(0) == MVT::i32)
+    return isRunOfOnes((unsigned)N->getZExtValue(), mb, me);
+  else
+    return false;
+}]>;
+
+def immSExt16  : PatLeaf<(imm), [{
+  // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
+  // field.  Used by instructions like 'addi'.
+  if (N->getValueType(0) == MVT::i32)
+    return (int32_t)N->getZExtValue() == (short)N->getZExtValue();
+  else
+    return (int64_t)N->getZExtValue() == (short)N->getZExtValue();
+}]>;
+def immZExt16  : PatLeaf<(imm), [{
+  // immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
+  // field.  Used by instructions like 'ori'.
+  return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
+}], LO16>;
+
+// imm16Shifted* - These match immediates where the low 16-bits are zero.  There
+// are two forms: imm16ShiftedSExt and imm16ShiftedZExt.  These two forms are
+// identical in 32-bit mode, but in 64-bit mode, they return true if the
+// immediate fits into a sign/zero extended 32-bit immediate (with the low bits
+// clear).
+def imm16ShiftedZExt : PatLeaf<(imm), [{
+  // imm16ShiftedZExt predicate - True if only bits in the top 16-bits of the
+  // immediate are set.  Used by instructions like 'xoris'.
+  return (N->getZExtValue() & ~uint64_t(0xFFFF0000)) == 0;
+}], HI16>;
+
+def imm16ShiftedSExt : PatLeaf<(imm), [{
+  // imm16ShiftedSExt predicate - True if only bits in the top 16-bits of the
+  // immediate are set.  Used by instructions like 'addis'.  Identical to 
+  // imm16ShiftedZExt in 32-bit mode.
+  if (N->getZExtValue() & 0xFFFF) return false;
+  if (N->getValueType(0) == MVT::i32)
+    return true;
+  // For 64-bit, make sure it is sext right.
+  return N->getZExtValue() == (uint64_t)(int)N->getZExtValue();
+}], HI16>;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Flag Definitions.
+
+class isPPC64 { bit PPC64 = 1; }
+class isDOT   {
+  list<Register> Defs = [CR0];
+  bit RC  = 1;
+}
+
+class RegConstraint<string C> {
+  string Constraints = C;
+}
+class NoEncode<string E> {
+  string DisableEncoding = E;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Operand Definitions.
+
+def s5imm   : Operand<i32> {
+  let PrintMethod = "printS5ImmOperand";
+}
+def u5imm   : Operand<i32> {
+  let PrintMethod = "printU5ImmOperand";
+}
+def u6imm   : Operand<i32> {
+  let PrintMethod = "printU6ImmOperand";
+}
+def s16imm  : Operand<i32> {
+  let PrintMethod = "printS16ImmOperand";
+}
+def u16imm  : Operand<i32> {
+  let PrintMethod = "printU16ImmOperand";
+}
+def s16immX4  : Operand<i32> {   // Multiply imm by 4 before printing.
+  let PrintMethod = "printS16X4ImmOperand";
+}
+def directbrtarget : Operand<OtherVT> {
+  let PrintMethod = "printBranchOperand";
+  let EncoderMethod = "getDirectBrEncoding";
+}
+def condbrtarget : Operand<OtherVT> {
+  let PrintMethod = "printBranchOperand";
+  let EncoderMethod = "getCondBrEncoding";
+}
+def calltarget : Operand<iPTR> {
+  let EncoderMethod = "getDirectBrEncoding";
+}
+def aaddr : Operand<iPTR> {
+  let PrintMethod = "printAbsAddrOperand";
+}
+def piclabel: Operand<iPTR> {}
+def symbolHi: Operand<i32> {
+  let PrintMethod = "printSymbolHi";
+  let EncoderMethod = "getHA16Encoding";
+}
+def symbolLo: Operand<i32> {
+  let PrintMethod = "printSymbolLo";
+  let EncoderMethod = "getLO16Encoding";
+}
+def crbitm: Operand<i8> {
+  let PrintMethod = "printcrbitm";
+  let EncoderMethod = "get_crbitm_encoding";
+}
+// Address operands
+def memri : Operand<iPTR> {
+  let PrintMethod = "printMemRegImm";
+  let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+  let EncoderMethod = "getMemRIEncoding";
+}
+def memrr : Operand<iPTR> {
+  let PrintMethod = "printMemRegReg";
+  let MIOperandInfo = (ops ptr_rc, ptr_rc);
+}
+def memrix : Operand<iPTR> {   // memri where the imm is shifted 2 bits.
+  let PrintMethod = "printMemRegImmShifted";
+  let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg);
+  let EncoderMethod = "getMemRIXEncoding";
+}
+def tocentry : Operand<iPTR> {
+  let MIOperandInfo = (ops i32imm:$imm);
+}
+
+// PowerPC Predicate operand.  20 = (0<<5)|20 = always, CR0 is a dummy reg
+// that doesn't matter.
+def pred : PredicateOperand<OtherVT, (ops imm, CRRC),
+                                     (ops (i32 20), (i32 zero_reg))> {
+  let PrintMethod = "printPredicateOperand";
+}
+
+// Define PowerPC specific addressing mode.
+def iaddr  : ComplexPattern<iPTR, 2, "SelectAddrImm",    [], []>;
+def xaddr  : ComplexPattern<iPTR, 2, "SelectAddrIdx",    [], []>;
+def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
+def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
+
+/// This is just the offset part of iaddr, used for preinc.
+def iaddroff : ComplexPattern<iPTR, 1, "SelectAddrImmOffs", [], []>;
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Predicate Definitions.
+def FPContractions : Predicate<"!NoExcessFPPrecision">;
+def In32BitMode  : Predicate<"!PPCSubTarget.isPPC64()">;
+def In64BitMode  : Predicate<"PPCSubTarget.isPPC64()">;
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Definitions.
+
+// Pseudo-instructions:
+
+let hasCtrlDep = 1 in {
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins u16imm:$amt), "",
+                              [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP   : Pseudo<(outs), (ins u16imm:$amt1, u16imm:$amt2), "",
+                              [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+def UPDATE_VRSAVE    : Pseudo<(outs GPRC:$rD), (ins GPRC:$rS),
+                              "UPDATE_VRSAVE $rD, $rS", []>;
+}
+
+let Defs = [R1], Uses = [R1] in
+def DYNALLOC : Pseudo<(outs GPRC:$result), (ins GPRC:$negsize, memri:$fpsi), "",
+                       [(set GPRC:$result,
+                             (PPCdynalloc GPRC:$negsize, iaddr:$fpsi))]>;
+                         
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1,    // Expanded after instruction selection.
+    PPC970_Single = 1 in {
+  def SELECT_CC_I4 : Pseudo<(outs GPRC:$dst), (ins CRRC:$cond, GPRC:$T, GPRC:$F,
+                              i32imm:$BROPC), "",
+                              []>;
+  def SELECT_CC_I8 : Pseudo<(outs G8RC:$dst), (ins CRRC:$cond, G8RC:$T, G8RC:$F,
+                              i32imm:$BROPC), "",
+                              []>;
+  def SELECT_CC_F4  : Pseudo<(outs F4RC:$dst), (ins CRRC:$cond, F4RC:$T, F4RC:$F,
+                              i32imm:$BROPC), "",
+                              []>;
+  def SELECT_CC_F8  : Pseudo<(outs F8RC:$dst), (ins CRRC:$cond, F8RC:$T, F8RC:$F,
+                              i32imm:$BROPC), "",
+                              []>;
+  def SELECT_CC_VRRC: Pseudo<(outs VRRC:$dst), (ins CRRC:$cond, VRRC:$T, VRRC:$F,
+                              i32imm:$BROPC), "",
+                              []>;
+}
+
+// SPILL_CR - Indicate that we're dumping the CR register, so we'll need to
+// scavenge a register for it.
+def SPILL_CR : Pseudo<(outs), (ins GPRC:$cond, memri:$F),
+                     "", []>;
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
+  let isReturn = 1, Uses = [LR, RM] in
+    def BLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$p),
+                          "b${p:cc}lr ${p:reg}", BrB, 
+                          [(retflag)]>;
+  let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in
+    def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>;
+}
+
+let Defs = [LR] in
+  def MovePCtoLR : Pseudo<(outs), (ins piclabel:$label), "", []>,
+                   PPC970_Unit_BRU;
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
+  let isBarrier = 1 in {
+  def B   : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
+                  "b $dst", BrB,
+                  [(br bb:$dst)]>;
+  }
+
+  // BCC represents an arbitrary conditional branch on a predicate.
+  // FIXME: should be able to write a pattern for PPCcondbranch, but can't use
+  // a two-value operand where a dag node expects two operands. :( 
+  def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
+                  "b${cond:cc} ${cond:reg}, $dst"
+                  /*[(PPCcondbranch CRRC:$crS, imm:$opc, bb:$dst)]*/>;
+}
+
+// Darwin ABI Calls.
+let isCall = 1, PPC970_Unit = 7, 
+  // All calls clobber the non-callee saved registers...
+  Defs = [R0,R2,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
+          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+          V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+          LR,CTR,
+          CR0,CR1,CR5,CR6,CR7,CARRY] in {
+  // Convenient aliases for call instructions
+  let Uses = [RM] in {
+    def BL_Darwin  : IForm<18, 0, 1,
+                           (outs), (ins calltarget:$func, variable_ops), 
+                           "bl $func", BrB, []>;  // See Pat patterns below.
+    def BLA_Darwin : IForm<18, 1, 1, 
+                          (outs), (ins aaddr:$func, variable_ops),
+                          "bla $func", BrB, [(PPCcall_Darwin (i32 imm:$func))]>;
+  }
+  let Uses = [CTR, RM] in {
+    def BCTRL_Darwin : XLForm_2_ext<19, 528, 20, 0, 1, 
+                                  (outs), (ins variable_ops),
+                                  "bctrl", BrB,
+                                  [(PPCbctrl_Darwin)]>, Requires<[In32BitMode]>;
+  }
+}
+
+// SVR4 ABI Calls.
+let isCall = 1, PPC970_Unit = 7, 
+  // All calls clobber the non-callee saved registers...
+  Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,
+          F0,F1,F2,F3,F4,F5,F6,F7,F8,F9,F10,F11,F12,F13,
+          V0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,
+          LR,CTR,
+          CR0,CR1,CR5,CR6,CR7,CARRY] in {
+  // Convenient aliases for call instructions
+  let Uses = [RM] in {
+    def BL_SVR4  : IForm<18, 0, 1,
+                        (outs), (ins calltarget:$func, variable_ops), 
+                        "bl $func", BrB, []>;  // See Pat patterns below.
+    def BLA_SVR4 : IForm<18, 1, 1,
+                        (outs), (ins aaddr:$func, variable_ops),
+                        "bla $func", BrB,
+                        [(PPCcall_SVR4 (i32 imm:$func))]>;
+  }
+  let Uses = [CTR, RM] in {
+    def BCTRL_SVR4 : XLForm_2_ext<19, 528, 20, 0, 1,
+                                (outs), (ins variable_ops),
+                                "bctrl", BrB,
+                                [(PPCbctrl_SVR4)]>, Requires<[In32BitMode]>;
+  }
+}
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNdi :Pseudo< (outs),
+                        (ins calltarget:$dst, i32imm:$offset, variable_ops),
+                 "#TC_RETURNd $dst $offset",
+                 []>;
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset, variable_ops),
+                 "#TC_RETURNa $func $offset",
+                 [(PPCtc_return (i32 imm:$func), imm:$offset)]>;
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
+def TCRETURNri : Pseudo<(outs), (ins CTRRC:$dst, i32imm:$offset, variable_ops),
+                 "#TC_RETURNr $dst $offset",
+                 []>;
+
+
+let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
+    isIndirectBranch = 1, isCall = 1, isReturn = 1, Uses = [CTR, RM]  in
+def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
+     Requires<[In32BitMode]>;
+
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+    isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILB   : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
+                  "b $dst", BrB,
+                  []>;
+
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
+    isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
+def TAILBA   : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
+                  "ba $dst", BrB,
+                  []>;
+
+
+// DCB* instructions.
+def DCBA   : DCB_Form<758, 0, (outs), (ins memrr:$dst),
+                      "dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
+                      PPC970_DGroup_Single;
+def DCBF   : DCB_Form<86, 0, (outs), (ins memrr:$dst),
+                      "dcbf $dst", LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>,
+                      PPC970_DGroup_Single;
+def DCBI   : DCB_Form<470, 0, (outs), (ins memrr:$dst),
+                      "dcbi $dst", LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
+                      PPC970_DGroup_Single;
+def DCBST  : DCB_Form<54, 0, (outs), (ins memrr:$dst),
+                      "dcbst $dst", LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
+                      PPC970_DGroup_Single;
+def DCBT   : DCB_Form<278, 0, (outs), (ins memrr:$dst),
+                      "dcbt $dst", LdStDCBF, [(int_ppc_dcbt xoaddr:$dst)]>,
+                      PPC970_DGroup_Single;
+def DCBTST : DCB_Form<246, 0, (outs), (ins memrr:$dst),
+                      "dcbtst $dst", LdStDCBF, [(int_ppc_dcbtst xoaddr:$dst)]>,
+                      PPC970_DGroup_Single;
+def DCBZ   : DCB_Form<1014, 0, (outs), (ins memrr:$dst),
+                      "dcbz $dst", LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
+                      PPC970_DGroup_Single;
+def DCBZL  : DCB_Form<1014, 1, (outs), (ins memrr:$dst),
+                      "dcbzl $dst", LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
+                      PPC970_DGroup_Single;
+
+// Atomic operations
+let usesCustomInserter = 1 in {
+  let Uses = [CR0] in {
+    def ATOMIC_LOAD_ADD_I8 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_SUB_I8 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_sub_8 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_AND_I8 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_and_8 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_OR_I8 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_or_8 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_XOR_I8 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_xor_8 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_NAND_I8 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_nand_8 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_ADD_I16 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_add_16 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_SUB_I16 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_sub_16 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_AND_I16 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_and_16 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_OR_I16 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_or_16 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_XOR_I16 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_xor_16 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_NAND_I16 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_nand_16 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_ADD_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_add_32 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_SUB_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_sub_32 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_AND_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_and_32 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_OR_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_or_32 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_XOR_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_xor_32 xoaddr:$ptr, GPRC:$incr))]>;
+    def ATOMIC_LOAD_NAND_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "",
+      [(set GPRC:$dst, (atomic_load_nand_32 xoaddr:$ptr, GPRC:$incr))]>;
+
+    def ATOMIC_CMP_SWAP_I8 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+      [(set GPRC:$dst, 
+                    (atomic_cmp_swap_8 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+    def ATOMIC_CMP_SWAP_I16 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+      [(set GPRC:$dst, 
+                    (atomic_cmp_swap_16 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+    def ATOMIC_CMP_SWAP_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$old, GPRC:$new), "",
+      [(set GPRC:$dst, 
+                    (atomic_cmp_swap_32 xoaddr:$ptr, GPRC:$old, GPRC:$new))]>;
+
+    def ATOMIC_SWAP_I8 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+      [(set GPRC:$dst, (atomic_swap_8 xoaddr:$ptr, GPRC:$new))]>;
+    def ATOMIC_SWAP_I16 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+      [(set GPRC:$dst, (atomic_swap_16 xoaddr:$ptr, GPRC:$new))]>;
+    def ATOMIC_SWAP_I32 : Pseudo<
+      (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$new), "",
+      [(set GPRC:$dst, (atomic_swap_32 xoaddr:$ptr, GPRC:$new))]>;
+  }
+}
+
+// Instructions to support atomic operations
+def LWARX : XForm_1<31,  20, (outs GPRC:$rD), (ins memrr:$src),
+                   "lwarx $rD, $src", LdStLWARX,
+                   [(set GPRC:$rD, (PPClarx xoaddr:$src))]>;
+
+let Defs = [CR0] in
+def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
+                   "stwcx. $rS, $dst", LdStSTWCX,
+                   [(PPCstcx GPRC:$rS, xoaddr:$dst)]>,
+                   isDOT;
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+def TRAP  : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Load Instructions.
+//
+
+// Unindexed (r+i) Loads. 
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZ : DForm_1<34, (outs GPRC:$rD), (ins memri:$src),
+                  "lbz $rD, $src", LdStGeneral,
+                  [(set GPRC:$rD, (zextloadi8 iaddr:$src))]>;
+def LHA : DForm_1<42, (outs GPRC:$rD), (ins memri:$src),
+                  "lha $rD, $src", LdStLHA,
+                  [(set GPRC:$rD, (sextloadi16 iaddr:$src))]>,
+                  PPC970_DGroup_Cracked;
+def LHZ : DForm_1<40, (outs GPRC:$rD), (ins memri:$src),
+                  "lhz $rD, $src", LdStGeneral,
+                  [(set GPRC:$rD, (zextloadi16 iaddr:$src))]>;
+def LWZ : DForm_1<32, (outs GPRC:$rD), (ins memri:$src),
+                  "lwz $rD, $src", LdStGeneral,
+                  [(set GPRC:$rD, (load iaddr:$src))]>;
+
+def LFS : DForm_1<48, (outs F4RC:$rD), (ins memri:$src),
+                  "lfs $rD, $src", LdStLFDU,
+                  [(set F4RC:$rD, (load iaddr:$src))]>;
+def LFD : DForm_1<50, (outs F8RC:$rD), (ins memri:$src),
+                  "lfd $rD, $src", LdStLFD,
+                  [(set F8RC:$rD, (load iaddr:$src))]>;
+
+
+// Unindexed (r+i) Loads with Update (preinc).
+let mayLoad = 1 in {
+def LBZU : DForm_1<35, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                   "lbzu $rD, $addr", LdStGeneral,
+                   []>, RegConstraint<"$addr.reg = $ea_result">,
+                   NoEncode<"$ea_result">;
+
+def LHAU : DForm_1<43, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                   "lhau $rD, $addr", LdStGeneral,
+                   []>, RegConstraint<"$addr.reg = $ea_result">,
+                   NoEncode<"$ea_result">;
+
+def LHZU : DForm_1<41, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                   "lhzu $rD, $addr", LdStGeneral,
+                   []>, RegConstraint<"$addr.reg = $ea_result">,
+                   NoEncode<"$ea_result">;
+
+def LWZU : DForm_1<33, (outs GPRC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                   "lwzu $rD, $addr", LdStGeneral,
+                   []>, RegConstraint<"$addr.reg = $ea_result">,
+                   NoEncode<"$ea_result">;
+
+def LFSU : DForm_1<49, (outs F4RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                  "lfs $rD, $addr", LdStLFDU,
+                  []>, RegConstraint<"$addr.reg = $ea_result">,
+                   NoEncode<"$ea_result">;
+
+def LFDU : DForm_1<51, (outs F8RC:$rD, ptr_rc:$ea_result), (ins memri:$addr),
+                  "lfd $rD, $addr", LdStLFD,
+                  []>, RegConstraint<"$addr.reg = $ea_result">,
+                   NoEncode<"$ea_result">;
+}
+}
+
+// Indexed (r+r) Loads.
+//
+let canFoldAsLoad = 1, PPC970_Unit = 2 in {
+def LBZX : XForm_1<31,  87, (outs GPRC:$rD), (ins memrr:$src),
+                   "lbzx $rD, $src", LdStGeneral,
+                   [(set GPRC:$rD, (zextloadi8 xaddr:$src))]>;
+def LHAX : XForm_1<31, 343, (outs GPRC:$rD), (ins memrr:$src),
+                   "lhax $rD, $src", LdStLHA,
+                   [(set GPRC:$rD, (sextloadi16 xaddr:$src))]>,
+                   PPC970_DGroup_Cracked;
+def LHZX : XForm_1<31, 279, (outs GPRC:$rD), (ins memrr:$src),
+                   "lhzx $rD, $src", LdStGeneral,
+                   [(set GPRC:$rD, (zextloadi16 xaddr:$src))]>;
+def LWZX : XForm_1<31,  23, (outs GPRC:$rD), (ins memrr:$src),
+                   "lwzx $rD, $src", LdStGeneral,
+                   [(set GPRC:$rD, (load xaddr:$src))]>;
+                   
+                   
+def LHBRX : XForm_1<31, 790, (outs GPRC:$rD), (ins memrr:$src),
+                   "lhbrx $rD, $src", LdStGeneral,
+                   [(set GPRC:$rD, (PPClbrx xoaddr:$src, i16))]>;
+def LWBRX : XForm_1<31,  534, (outs GPRC:$rD), (ins memrr:$src),
+                   "lwbrx $rD, $src", LdStGeneral,
+                   [(set GPRC:$rD, (PPClbrx xoaddr:$src, i32))]>;
+
+def LFSX   : XForm_25<31, 535, (outs F4RC:$frD), (ins memrr:$src),
+                      "lfsx $frD, $src", LdStLFDU,
+                      [(set F4RC:$frD, (load xaddr:$src))]>;
+def LFDX   : XForm_25<31, 599, (outs F8RC:$frD), (ins memrr:$src),
+                      "lfdx $frD, $src", LdStLFDU,
+                      [(set F8RC:$frD, (load xaddr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// PPC32 Store Instructions.
+//
+
+// Unindexed (r+i) Stores.
+let PPC970_Unit = 2 in {
+def STB  : DForm_1<38, (outs), (ins GPRC:$rS, memri:$src),
+                   "stb $rS, $src", LdStGeneral,
+                   [(truncstorei8 GPRC:$rS, iaddr:$src)]>;
+def STH  : DForm_1<44, (outs), (ins GPRC:$rS, memri:$src),
+                   "sth $rS, $src", LdStGeneral,
+                   [(truncstorei16 GPRC:$rS, iaddr:$src)]>;
+def STW  : DForm_1<36, (outs), (ins GPRC:$rS, memri:$src),
+                   "stw $rS, $src", LdStGeneral,
+                   [(store GPRC:$rS, iaddr:$src)]>;
+def STFS : DForm_1<52, (outs), (ins F4RC:$rS, memri:$dst),
+                   "stfs $rS, $dst", LdStUX,
+                   [(store F4RC:$rS, iaddr:$dst)]>;
+def STFD : DForm_1<54, (outs), (ins F8RC:$rS, memri:$dst),
+                   "stfd $rS, $dst", LdStUX,
+                   [(store F8RC:$rS, iaddr:$dst)]>;
+}
+
+// Unindexed (r+i) Stores with Update (preinc).
+let PPC970_Unit = 2 in {
+def STBU  : DForm_1a<39, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+                             symbolLo:$ptroff, ptr_rc:$ptrreg),
+                    "stbu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    [(set ptr_rc:$ea_res,
+                          (pre_truncsti8 GPRC:$rS, ptr_rc:$ptrreg, 
+                                         iaddroff:$ptroff))]>,
+                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STHU  : DForm_1a<45, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+                             symbolLo:$ptroff, ptr_rc:$ptrreg),
+                    "sthu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    [(set ptr_rc:$ea_res,
+                        (pre_truncsti16 GPRC:$rS, ptr_rc:$ptrreg, 
+                                        iaddroff:$ptroff))]>,
+                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STWU  : DForm_1a<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS,
+                             symbolLo:$ptroff, ptr_rc:$ptrreg),
+                    "stwu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    [(set ptr_rc:$ea_res, (pre_store GPRC:$rS, ptr_rc:$ptrreg, 
+                                                     iaddroff:$ptroff))]>,
+                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STFSU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F4RC:$rS,
+                             symbolLo:$ptroff, ptr_rc:$ptrreg),
+                    "stfsu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    [(set ptr_rc:$ea_res, (pre_store F4RC:$rS,  ptr_rc:$ptrreg, 
+                                          iaddroff:$ptroff))]>,
+                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+def STFDU : DForm_1a<37, (outs ptr_rc:$ea_res), (ins F8RC:$rS,
+                             symbolLo:$ptroff, ptr_rc:$ptrreg),
+                    "stfdu $rS, $ptroff($ptrreg)", LdStGeneral,
+                    [(set ptr_rc:$ea_res, (pre_store F8RC:$rS, ptr_rc:$ptrreg, 
+                                          iaddroff:$ptroff))]>,
+                    RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
+}
+
+
+// Indexed (r+r) Stores.
+//
+let PPC970_Unit = 2 in {
+def STBX  : XForm_8<31, 215, (outs), (ins GPRC:$rS, memrr:$dst),
+                   "stbx $rS, $dst", LdStGeneral,
+                   [(truncstorei8 GPRC:$rS, xaddr:$dst)]>, 
+                   PPC970_DGroup_Cracked;
+def STHX  : XForm_8<31, 407, (outs), (ins GPRC:$rS, memrr:$dst),
+                   "sthx $rS, $dst", LdStGeneral,
+                   [(truncstorei16 GPRC:$rS, xaddr:$dst)]>, 
+                   PPC970_DGroup_Cracked;
+def STWX  : XForm_8<31, 151, (outs), (ins GPRC:$rS, memrr:$dst),
+                   "stwx $rS, $dst", LdStGeneral,
+                   [(store GPRC:$rS, xaddr:$dst)]>,
+                   PPC970_DGroup_Cracked;
+                   
+let mayStore = 1 in {
+def STWUX : XForm_8<31, 183, (outs), (ins GPRC:$rS, GPRC:$rA, GPRC:$rB),
+                   "stwux $rS, $rA, $rB", LdStGeneral,
+                   []>;
+}
+def STHBRX: XForm_8<31, 918, (outs), (ins GPRC:$rS, memrr:$dst),
+                   "sthbrx $rS, $dst", LdStGeneral,
+                   [(PPCstbrx GPRC:$rS, xoaddr:$dst, i16)]>, 
+                   PPC970_DGroup_Cracked;
+def STWBRX: XForm_8<31, 662, (outs), (ins GPRC:$rS, memrr:$dst),
+                   "stwbrx $rS, $dst", LdStGeneral,
+                   [(PPCstbrx GPRC:$rS, xoaddr:$dst, i32)]>,
+                   PPC970_DGroup_Cracked;
+
+def STFIWX: XForm_28<31, 983, (outs), (ins F8RC:$frS, memrr:$dst),
+                     "stfiwx $frS, $dst", LdStUX,
+                     [(PPCstfiwx F8RC:$frS, xoaddr:$dst)]>;
+                     
+def STFSX : XForm_28<31, 663, (outs), (ins F4RC:$frS, memrr:$dst),
+                     "stfsx $frS, $dst", LdStUX,
+                     [(store F4RC:$frS, xaddr:$dst)]>;
+def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
+                     "stfdx $frS, $dst", LdStUX,
+                     [(store F8RC:$frS, xaddr:$dst)]>;
+}
+
+def SYNC : XForm_24_sync<31, 598, (outs), (ins),
+                        "sync", LdStSync,
+                        [(int_ppc_sync)]>;
+
+//===----------------------------------------------------------------------===//
+// PPC32 Arithmetic Instructions.
+//
+
+let PPC970_Unit = 1 in {  // FXU Operations.
+def ADDI   : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+                     "addi $rD, $rA, $imm", IntGeneral,
+                     [(set GPRC:$rD, (add GPRC:$rA, immSExt16:$imm))]>;
+let Defs = [CARRY] in {
+def ADDIC  : DForm_2<12, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+                     "addic $rD, $rA, $imm", IntGeneral,
+                     [(set GPRC:$rD, (addc GPRC:$rA, immSExt16:$imm))]>,
+                     PPC970_DGroup_Cracked;
+def ADDICo : DForm_2<13, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+                     "addic. $rD, $rA, $imm", IntGeneral,
+                     []>;
+}
+def ADDIS  : DForm_2<15, (outs GPRC:$rD), (ins GPRC:$rA, symbolHi:$imm),
+                     "addis $rD, $rA, $imm", IntGeneral,
+                     [(set GPRC:$rD, (add GPRC:$rA, imm16ShiftedSExt:$imm))]>;
+def LA     : DForm_2<14, (outs GPRC:$rD), (ins GPRC:$rA, symbolLo:$sym),
+                     "la $rD, $sym($rA)", IntGeneral,
+                     [(set GPRC:$rD, (add GPRC:$rA,
+                                          (PPClo tglobaladdr:$sym, 0)))]>;
+def MULLI  : DForm_2< 7, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+                     "mulli $rD, $rA, $imm", IntMulLI,
+                     [(set GPRC:$rD, (mul GPRC:$rA, immSExt16:$imm))]>;
+let Defs = [CARRY] in {
+def SUBFIC : DForm_2< 8, (outs GPRC:$rD), (ins GPRC:$rA, s16imm:$imm),
+                     "subfic $rD, $rA, $imm", IntGeneral,
+                     [(set GPRC:$rD, (subc immSExt16:$imm, GPRC:$rA))]>;
+}
+
+let isReMaterializable = 1 in {
+  def LI  : DForm_2_r0<14, (outs GPRC:$rD), (ins symbolLo:$imm),
+                       "li $rD, $imm", IntGeneral,
+                       [(set GPRC:$rD, immSExt16:$imm)]>;
+  def LIS : DForm_2_r0<15, (outs GPRC:$rD), (ins symbolHi:$imm),
+                       "lis $rD, $imm", IntGeneral,
+                       [(set GPRC:$rD, imm16ShiftedSExt:$imm)]>;
+}
+}
+
+let PPC970_Unit = 1 in {  // FXU Operations.
+def ANDIo : DForm_4<28, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+                    "andi. $dst, $src1, $src2", IntGeneral,
+                    [(set GPRC:$dst, (and GPRC:$src1, immZExt16:$src2))]>,
+                    isDOT;
+def ANDISo : DForm_4<29, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+                    "andis. $dst, $src1, $src2", IntGeneral,
+                    [(set GPRC:$dst, (and GPRC:$src1,imm16ShiftedZExt:$src2))]>,
+                    isDOT;
+def ORI   : DForm_4<24, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+                    "ori $dst, $src1, $src2", IntGeneral,
+                    [(set GPRC:$dst, (or GPRC:$src1, immZExt16:$src2))]>;
+def ORIS  : DForm_4<25, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+                    "oris $dst, $src1, $src2", IntGeneral,
+                    [(set GPRC:$dst, (or GPRC:$src1, imm16ShiftedZExt:$src2))]>;
+def XORI  : DForm_4<26, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+                    "xori $dst, $src1, $src2", IntGeneral,
+                    [(set GPRC:$dst, (xor GPRC:$src1, immZExt16:$src2))]>;
+def XORIS : DForm_4<27, (outs GPRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+                    "xoris $dst, $src1, $src2", IntGeneral,
+                    [(set GPRC:$dst, (xor GPRC:$src1,imm16ShiftedZExt:$src2))]>;
+def NOP   : DForm_4_zero<24, (outs), (ins), "nop", IntGeneral,
+                         []>;
+def CMPWI : DForm_5_ext<11, (outs CRRC:$crD), (ins GPRC:$rA, s16imm:$imm),
+                        "cmpwi $crD, $rA, $imm", IntCompare>;
+def CMPLWI : DForm_6_ext<10, (outs CRRC:$dst), (ins GPRC:$src1, u16imm:$src2),
+                         "cmplwi $dst, $src1, $src2", IntCompare>;
+}
+
+
+let PPC970_Unit = 1 in {  // FXU Operations.
+def NAND : XForm_6<31, 476, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "nand $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (not (and GPRC:$rS, GPRC:$rB)))]>;
+def AND  : XForm_6<31,  28, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "and $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (and GPRC:$rS, GPRC:$rB))]>;
+def ANDC : XForm_6<31,  60, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "andc $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (and GPRC:$rS, (not GPRC:$rB)))]>;
+def OR   : XForm_6<31, 444, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "or $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (or GPRC:$rS, GPRC:$rB))]>;
+def NOR  : XForm_6<31, 124, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "nor $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (not (or GPRC:$rS, GPRC:$rB)))]>;
+def ORC  : XForm_6<31, 412, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "orc $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (or GPRC:$rS, (not GPRC:$rB)))]>;
+def EQV  : XForm_6<31, 284, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "eqv $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (not (xor GPRC:$rS, GPRC:$rB)))]>;
+def XOR  : XForm_6<31, 316, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "xor $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (xor GPRC:$rS, GPRC:$rB))]>;
+def SLW  : XForm_6<31,  24, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "slw $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (PPCshl GPRC:$rS, GPRC:$rB))]>;
+def SRW  : XForm_6<31, 536, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "srw $rA, $rS, $rB", IntGeneral,
+                   [(set GPRC:$rA, (PPCsrl GPRC:$rS, GPRC:$rB))]>;
+let Defs = [CARRY] in {
+def SRAW : XForm_6<31, 792, (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB),
+                   "sraw $rA, $rS, $rB", IntShift,
+                   [(set GPRC:$rA, (PPCsra GPRC:$rS, GPRC:$rB))]>;
+}
+}
+
+let PPC970_Unit = 1 in {  // FXU Operations.
+let Defs = [CARRY] in {
+def SRAWI : XForm_10<31, 824, (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH), 
+                     "srawi $rA, $rS, $SH", IntShift,
+                     [(set GPRC:$rA, (sra GPRC:$rS, (i32 imm:$SH)))]>;
+}
+def CNTLZW : XForm_11<31,  26, (outs GPRC:$rA), (ins GPRC:$rS),
+                      "cntlzw $rA, $rS", IntGeneral,
+                      [(set GPRC:$rA, (ctlz GPRC:$rS))]>;
+def EXTSB  : XForm_11<31, 954, (outs GPRC:$rA), (ins GPRC:$rS),
+                      "extsb $rA, $rS", IntGeneral,
+                      [(set GPRC:$rA, (sext_inreg GPRC:$rS, i8))]>;
+def EXTSH  : XForm_11<31, 922, (outs GPRC:$rA), (ins GPRC:$rS),
+                      "extsh $rA, $rS", IntGeneral,
+                      [(set GPRC:$rA, (sext_inreg GPRC:$rS, i16))]>;
+
+def CMPW   : XForm_16_ext<31, 0, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
+                          "cmpw $crD, $rA, $rB", IntCompare>;
+def CMPLW  : XForm_16_ext<31, 32, (outs CRRC:$crD), (ins GPRC:$rA, GPRC:$rB),
+                          "cmplw $crD, $rA, $rB", IntCompare>;
+}
+let PPC970_Unit = 3 in {  // FPU Operations.
+//def FCMPO  : XForm_17<63, 32, (outs CRRC:$crD), (ins FPRC:$fA, FPRC:$fB),
+//                      "fcmpo $crD, $fA, $fB", FPCompare>;
+def FCMPUS : XForm_17<63, 0, (outs CRRC:$crD), (ins F4RC:$fA, F4RC:$fB),
+                      "fcmpu $crD, $fA, $fB", FPCompare>;
+def FCMPUD : XForm_17<63, 0, (outs CRRC:$crD), (ins F8RC:$fA, F8RC:$fB),
+                      "fcmpu $crD, $fA, $fB", FPCompare>;
+
+let Uses = [RM] in {
+  def FCTIWZ : XForm_26<63, 15, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "fctiwz $frD, $frB", FPGeneral,
+                        [(set F8RC:$frD, (PPCfctiwz F8RC:$frB))]>;
+  def FRSP   : XForm_26<63, 12, (outs F4RC:$frD), (ins F8RC:$frB),
+                        "frsp $frD, $frB", FPGeneral,
+                        [(set F4RC:$frD, (fround F8RC:$frB))]>;
+  def FSQRT  : XForm_26<63, 22, (outs F8RC:$frD), (ins F8RC:$frB),
+                        "fsqrt $frD, $frB", FPSqrt,
+                        [(set F8RC:$frD, (fsqrt F8RC:$frB))]>;
+  def FSQRTS : XForm_26<59, 22, (outs F4RC:$frD), (ins F4RC:$frB),
+                        "fsqrts $frD, $frB", FPSqrt,
+                        [(set F4RC:$frD, (fsqrt F4RC:$frB))]>;
+  }
+}
+
+/// Note that FMR is defined as pseudo-ops on the PPC970 because they are
+/// often coalesced away and we don't want the dispatch group builder to think
+/// that they will fill slots (which could cause the load of a LSU reject to
+/// sneak into a d-group with a store).
+def FMR   : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
+                     "fmr $frD, $frB", FPGeneral,
+                     []>,  // (set F4RC:$frD, F4RC:$frB)
+                     PPC970_Unit_Pseudo;
+
+let PPC970_Unit = 3 in {  // FPU Operations.
+// These are artificially split into two different forms, for 4/8 byte FP.
+def FABSS  : XForm_26<63, 264, (outs F4RC:$frD), (ins F4RC:$frB),
+                      "fabs $frD, $frB", FPGeneral,
+                      [(set F4RC:$frD, (fabs F4RC:$frB))]>;
+def FABSD  : XForm_26<63, 264, (outs F8RC:$frD), (ins F8RC:$frB),
+                      "fabs $frD, $frB", FPGeneral,
+                      [(set F8RC:$frD, (fabs F8RC:$frB))]>;
+def FNABSS : XForm_26<63, 136, (outs F4RC:$frD), (ins F4RC:$frB),
+                      "fnabs $frD, $frB", FPGeneral,
+                      [(set F4RC:$frD, (fneg (fabs F4RC:$frB)))]>;
+def FNABSD : XForm_26<63, 136, (outs F8RC:$frD), (ins F8RC:$frB),
+                      "fnabs $frD, $frB", FPGeneral,
+                      [(set F8RC:$frD, (fneg (fabs F8RC:$frB)))]>;
+def FNEGS  : XForm_26<63, 40, (outs F4RC:$frD), (ins F4RC:$frB),
+                      "fneg $frD, $frB", FPGeneral,
+                      [(set F4RC:$frD, (fneg F4RC:$frB))]>;
+def FNEGD  : XForm_26<63, 40, (outs F8RC:$frD), (ins F8RC:$frB),
+                      "fneg $frD, $frB", FPGeneral,
+                      [(set F8RC:$frD, (fneg F8RC:$frB))]>;
+}
+                      
+
+// XL-Form instructions.  condition register logical ops.
+//
+def MCRF   : XLForm_3<19, 0, (outs CRRC:$BF), (ins CRRC:$BFA),
+                      "mcrf $BF, $BFA", BrMCR>,
+             PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def CREQV  : XLForm_1<19, 289, (outs CRBITRC:$CRD),
+                               (ins CRBITRC:$CRA, CRBITRC:$CRB),
+                      "creqv $CRD, $CRA, $CRB", BrCR,
+                      []>;
+
+def CROR  : XLForm_1<19, 449, (outs CRBITRC:$CRD),
+                               (ins CRBITRC:$CRA, CRBITRC:$CRB),
+                      "cror $CRD, $CRA, $CRB", BrCR,
+                      []>;
+
+def CRSET  : XLForm_1_ext<19, 289, (outs CRBITRC:$dst), (ins),
+              "creqv $dst, $dst, $dst", BrCR,
+              []>;
+
+// XFX-Form instructions.  Instructions that deal with SPRs.
+//
+let Uses = [CTR] in {
+def MFCTR : XFXForm_1_ext<31, 339, 9, (outs GPRC:$rT), (ins),
+                          "mfctr $rT", SprMFSPR>,
+            PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Defs = [CTR], Pattern = [(PPCmtctr GPRC:$rS)] in {
+def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins GPRC:$rS),
+                          "mtctr $rS", SprMTSPR>,
+            PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+let Defs = [LR] in {
+def MTLR  : XFXForm_7_ext<31, 467, 8, (outs), (ins GPRC:$rS),
+                          "mtlr $rS", SprMTSPR>,
+            PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+let Uses = [LR] in {
+def MFLR  : XFXForm_1_ext<31, 339, 8, (outs GPRC:$rT), (ins),
+                          "mflr $rT", SprMFSPR>,
+            PPC970_DGroup_First, PPC970_Unit_FXU;
+}
+
+// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like
+// a GPR on the PPC970.  As such, copies in and out have the same performance
+// characteristics as an OR instruction.
+def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins GPRC:$rS),
+                             "mtspr 256, $rS", IntGeneral>,
+               PPC970_DGroup_Single, PPC970_Unit_FXU;
+def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
+                             "mfspr $rT, 256", IntGeneral>,
+               PPC970_DGroup_First, PPC970_Unit_FXU;
+
+def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS),
+                      "mtcrf $FXM, $rS", BrMCRX>,
+            PPC970_MicroCode, PPC970_Unit_CRU;
+
+// This is a pseudo for MFCR, which implicitly uses all 8 of its subregisters;
+// declaring that here gives the local register allocator problems with this:
+//  vreg = MCRF  CR0
+//  MFCR  <kill of whatever preg got assigned to vreg>
+// while not declaring it breaks DeadMachineInstructionElimination.
+// As it turns out, in all cases where we currently use this,
+// we're only interested in one subregister of it.  Represent this in the
+// instruction to keep the register allocator from becoming confused.
+//
+// FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
+def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+                       "", SprMFCR>,
+            PPC970_MicroCode, PPC970_Unit_CRU;
+            
+def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins),
+                     "mfcr $rT", SprMFCR>,
+                     PPC970_MicroCode, PPC970_Unit_CRU;
+
+def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+                       "mfcr $rT, $FXM", SprMFCR>,
+            PPC970_DGroup_First, PPC970_Unit_CRU;
+
+// Instructions to manipulate FPSCR.  Only long double handling uses these.
+// FPSCR is not modelled; we use the SDNode Flag to keep things in order.
+
+let Uses = [RM], Defs = [RM] in { 
+  def MTFSB0 : XForm_43<63, 70, (outs), (ins u5imm:$FM),
+                         "mtfsb0 $FM", IntMTFSB0,
+                        [(PPCmtfsb0 (i32 imm:$FM))]>,
+               PPC970_DGroup_Single, PPC970_Unit_FPU;
+  def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
+                         "mtfsb1 $FM", IntMTFSB0,
+                        [(PPCmtfsb1 (i32 imm:$FM))]>,
+               PPC970_DGroup_Single, PPC970_Unit_FPU;
+  // MTFSF does not actually produce an FP result.  We pretend it copies
+  // input reg B to the output.  If we didn't do this it would look like the
+  // instruction had no outputs (because we aren't modelling the FPSCR) and
+  // it would be deleted.
+  def MTFSF  : XFLForm<63, 711, (outs F8RC:$FRA),
+                                (ins i32imm:$FM, F8RC:$rT, F8RC:$FRB),
+                         "mtfsf $FM, $rT", "$FRB = $FRA", IntMTFSB0,
+                         [(set F8RC:$FRA, (PPCmtfsf (i32 imm:$FM), 
+                                                     F8RC:$rT, F8RC:$FRB))]>,
+               PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+let Uses = [RM] in {
+  def MFFS   : XForm_42<63, 583, (outs F8RC:$rT), (ins), 
+                         "mffs $rT", IntMFFS,
+                         [(set F8RC:$rT, (PPCmffs))]>,
+               PPC970_DGroup_Single, PPC970_Unit_FPU;
+  def FADDrtz: AForm_2<63, 21,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+                      "fadd $FRT, $FRA, $FRB", FPGeneral,
+                      [(set F8RC:$FRT, (PPCfaddrtz F8RC:$FRA, F8RC:$FRB))]>,
+               PPC970_DGroup_Single, PPC970_Unit_FPU;
+}
+
+
+let PPC970_Unit = 1 in {  // FXU Operations.
+
+// XO-Form instructions.  Arithmetic instructions that can set overflow bit
+//
+def ADD4  : XOForm_1<31, 266, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "add $rT, $rA, $rB", IntGeneral,
+                     [(set GPRC:$rT, (add GPRC:$rA, GPRC:$rB))]>;
+let Defs = [CARRY] in {
+def ADDC  : XOForm_1<31, 10, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "addc $rT, $rA, $rB", IntGeneral,
+                     [(set GPRC:$rT, (addc GPRC:$rA, GPRC:$rB))]>,
+                     PPC970_DGroup_Cracked;
+}
+def DIVW  : XOForm_1<31, 491, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "divw $rT, $rA, $rB", IntDivW,
+                     [(set GPRC:$rT, (sdiv GPRC:$rA, GPRC:$rB))]>,
+                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def DIVWU : XOForm_1<31, 459, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "divwu $rT, $rA, $rB", IntDivW,
+                     [(set GPRC:$rT, (udiv GPRC:$rA, GPRC:$rB))]>,
+                     PPC970_DGroup_First, PPC970_DGroup_Cracked;
+def MULHW : XOForm_1<31, 75, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "mulhw $rT, $rA, $rB", IntMulHW,
+                     [(set GPRC:$rT, (mulhs GPRC:$rA, GPRC:$rB))]>;
+def MULHWU : XOForm_1<31, 11, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "mulhwu $rT, $rA, $rB", IntMulHWU,
+                     [(set GPRC:$rT, (mulhu GPRC:$rA, GPRC:$rB))]>;
+def MULLW : XOForm_1<31, 235, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "mullw $rT, $rA, $rB", IntMulHW,
+                     [(set GPRC:$rT, (mul GPRC:$rA, GPRC:$rB))]>;
+def SUBF  : XOForm_1<31, 40, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "subf $rT, $rA, $rB", IntGeneral,
+                     [(set GPRC:$rT, (sub GPRC:$rB, GPRC:$rA))]>;
+let Defs = [CARRY] in {
+def SUBFC : XOForm_1<31, 8, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                     "subfc $rT, $rA, $rB", IntGeneral,
+                     [(set GPRC:$rT, (subc GPRC:$rB, GPRC:$rA))]>,
+                     PPC970_DGroup_Cracked;
+}
+def NEG    : XOForm_3<31, 104, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+                      "neg $rT, $rA", IntGeneral,
+                      [(set GPRC:$rT, (ineg GPRC:$rA))]>;
+let Uses = [CARRY], Defs = [CARRY] in {
+def ADDE  : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                      "adde $rT, $rA, $rB", IntGeneral,
+                      [(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
+def ADDME  : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+                      "addme $rT, $rA", IntGeneral,
+                      [(set GPRC:$rT, (adde GPRC:$rA, -1))]>;
+def ADDZE  : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+                      "addze $rT, $rA", IntGeneral,
+                      [(set GPRC:$rT, (adde GPRC:$rA, 0))]>;
+def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
+                      "subfe $rT, $rA, $rB", IntGeneral,
+                      [(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
+def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+                      "subfme $rT, $rA", IntGeneral,
+                      [(set GPRC:$rT, (sube -1, GPRC:$rA))]>;
+def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
+                      "subfze $rT, $rA", IntGeneral,
+                      [(set GPRC:$rT, (sube 0, GPRC:$rA))]>;
+}
+}
+
+// A-Form instructions.  Most of the instructions executed in the FPU are of
+// this type.
+//
+let PPC970_Unit = 3 in {  // FPU Operations.
+let Uses = [RM] in {
+  def FMADD : AForm_1<63, 29, 
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+                      "fmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+                      [(set F8RC:$FRT, (fadd (fmul F8RC:$FRA, F8RC:$FRC),
+                                             F8RC:$FRB))]>,
+                      Requires<[FPContractions]>;
+  def FMADDS : AForm_1<59, 29,
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+                      "fmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+                                             F4RC:$FRB))]>,
+                      Requires<[FPContractions]>;
+  def FMSUB : AForm_1<63, 28,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+                      "fmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+                      [(set F8RC:$FRT, (fsub (fmul F8RC:$FRA, F8RC:$FRC),
+                                             F8RC:$FRB))]>,
+                      Requires<[FPContractions]>;
+  def FMSUBS : AForm_1<59, 28,
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+                      "fmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set F4RC:$FRT, (fsub (fmul F4RC:$FRA, F4RC:$FRC),
+                                             F4RC:$FRB))]>,
+                      Requires<[FPContractions]>;
+  def FNMADD : AForm_1<63, 31,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+                      "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused,
+                      [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC),
+                                                   F8RC:$FRB)))]>,
+                      Requires<[FPContractions]>;
+  def FNMADDS : AForm_1<59, 31,
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+                      "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC),
+                                                   F4RC:$FRB)))]>,
+                      Requires<[FPContractions]>;
+  def FNMSUB : AForm_1<63, 30,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+                      "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused,
+                      [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC),
+                                                   F8RC:$FRB)))]>,
+                      Requires<[FPContractions]>;
+  def FNMSUBS : AForm_1<59, 30,
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+                      "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral,
+                      [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC),
+                                                   F4RC:$FRB)))]>,
+                      Requires<[FPContractions]>;
+}
+// FSEL is artificially split into 4 and 8-byte forms for the result.  To avoid
+// having 4 of these, force the comparison to always be an 8-byte double (code
+// should use an FMRSD if the input comparison value really wants to be a float)
+// and 4/8 byte forms for the result and operand type..
+def FSELD : AForm_1<63, 23,
+                    (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRC, F8RC:$FRB),
+                    "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
+                    [(set F8RC:$FRT, (PPCfsel F8RC:$FRA,F8RC:$FRC,F8RC:$FRB))]>;
+def FSELS : AForm_1<63, 23,
+                     (outs F4RC:$FRT), (ins F8RC:$FRA, F4RC:$FRC, F4RC:$FRB),
+                     "fsel $FRT, $FRA, $FRC, $FRB", FPGeneral,
+                    [(set F4RC:$FRT, (PPCfsel F8RC:$FRA,F4RC:$FRC,F4RC:$FRB))]>;
+let Uses = [RM] in {
+  def FADD  : AForm_2<63, 21,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+                      "fadd $FRT, $FRA, $FRB", FPGeneral,
+                      [(set F8RC:$FRT, (fadd F8RC:$FRA, F8RC:$FRB))]>;
+  def FADDS : AForm_2<59, 21,
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+                      "fadds $FRT, $FRA, $FRB", FPGeneral,
+                      [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>;
+  def FDIV  : AForm_2<63, 18,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+                      "fdiv $FRT, $FRA, $FRB", FPDivD,
+                      [(set F8RC:$FRT, (fdiv F8RC:$FRA, F8RC:$FRB))]>;
+  def FDIVS : AForm_2<59, 18,
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+                      "fdivs $FRT, $FRA, $FRB", FPDivS,
+                      [(set F4RC:$FRT, (fdiv F4RC:$FRA, F4RC:$FRB))]>;
+  def FMUL  : AForm_3<63, 25,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+                      "fmul $FRT, $FRA, $FRB", FPFused,
+                      [(set F8RC:$FRT, (fmul F8RC:$FRA, F8RC:$FRB))]>;
+  def FMULS : AForm_3<59, 25,
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+                      "fmuls $FRT, $FRA, $FRB", FPGeneral,
+                      [(set F4RC:$FRT, (fmul F4RC:$FRA, F4RC:$FRB))]>;
+  def FSUB  : AForm_2<63, 20,
+                      (outs F8RC:$FRT), (ins F8RC:$FRA, F8RC:$FRB),
+                      "fsub $FRT, $FRA, $FRB", FPGeneral,
+                      [(set F8RC:$FRT, (fsub F8RC:$FRA, F8RC:$FRB))]>;
+  def FSUBS : AForm_2<59, 20,
+                      (outs F4RC:$FRT), (ins F4RC:$FRA, F4RC:$FRB),
+                      "fsubs $FRT, $FRA, $FRB", FPGeneral,
+                      [(set F4RC:$FRT, (fsub F4RC:$FRA, F4RC:$FRB))]>;
+  }
+}
+
+let PPC970_Unit = 1 in {  // FXU Operations.
+// M-Form instructions.  rotate and mask instructions.
+//
+let isCommutable = 1 in {
+// RLWIMI can be commuted if the rotate amount is zero.
+def RLWIMI : MForm_2<20,
+                     (outs GPRC:$rA), (ins GPRC:$rSi, GPRC:$rS, u5imm:$SH, u5imm:$MB, 
+                      u5imm:$ME), "rlwimi $rA, $rS, $SH, $MB, $ME", IntRotate,
+                      []>, PPC970_DGroup_Cracked, RegConstraint<"$rSi = $rA">,
+                      NoEncode<"$rSi">;
+}
+def RLWINM : MForm_2<21,
+                     (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+                     "rlwinm $rA, $rS, $SH, $MB, $ME", IntGeneral,
+                     []>;
+def RLWINMo : MForm_2<21,
+                     (outs GPRC:$rA), (ins GPRC:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+                     "rlwinm. $rA, $rS, $SH, $MB, $ME", IntGeneral,
+                     []>, isDOT, PPC970_DGroup_Cracked;
+def RLWNM  : MForm_2<23,
+                     (outs GPRC:$rA), (ins GPRC:$rS, GPRC:$rB, u5imm:$MB, u5imm:$ME),
+                     "rlwnm $rA, $rS, $rB, $MB, $ME", IntGeneral,
+                     []>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// PowerPC Instruction Patterns
+//
+
+// Arbitrary immediate support.  Implement in terms of LIS/ORI.
+def : Pat<(i32 imm:$imm),
+          (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>;
+
+// Implement the 'not' operation with the NOR instruction.
+def NOT : Pat<(not GPRC:$in),
+              (NOR GPRC:$in, GPRC:$in)>;
+
+// ADD an arbitrary immediate.
+def : Pat<(add GPRC:$in, imm:$imm),
+          (ADDIS (ADDI GPRC:$in, (LO16 imm:$imm)), (HA16 imm:$imm))>;
+// OR an arbitrary immediate.
+def : Pat<(or GPRC:$in, imm:$imm),
+          (ORIS (ORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// XOR an arbitrary immediate.
+def : Pat<(xor GPRC:$in, imm:$imm),
+          (XORIS (XORI GPRC:$in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
+// SUBFIC
+def : Pat<(sub  immSExt16:$imm, GPRC:$in),
+          (SUBFIC GPRC:$in, imm:$imm)>;
+
+// SHL/SRL
+def : Pat<(shl GPRC:$in, (i32 imm:$imm)),
+          (RLWINM GPRC:$in, imm:$imm, 0, (SHL32 imm:$imm))>;
+def : Pat<(srl GPRC:$in, (i32 imm:$imm)),
+          (RLWINM GPRC:$in, (SRL32 imm:$imm), imm:$imm, 31)>;
+
+// ROTL
+def : Pat<(rotl GPRC:$in, GPRC:$sh),
+          (RLWNM GPRC:$in, GPRC:$sh, 0, 31)>;
+def : Pat<(rotl GPRC:$in, (i32 imm:$imm)),
+          (RLWINM GPRC:$in, imm:$imm, 0, 31)>;
+
+// RLWNM
+def : Pat<(and (rotl GPRC:$in, GPRC:$sh), maskimm32:$imm),
+          (RLWNM GPRC:$in, GPRC:$sh, (MB maskimm32:$imm), (ME maskimm32:$imm))>;
+
+// Calls
+def : Pat<(PPCcall_Darwin (i32 tglobaladdr:$dst)),
+          (BL_Darwin tglobaladdr:$dst)>;
+def : Pat<(PPCcall_Darwin (i32 texternalsym:$dst)),
+          (BL_Darwin texternalsym:$dst)>;
+def : Pat<(PPCcall_SVR4 (i32 tglobaladdr:$dst)),
+          (BL_SVR4 tglobaladdr:$dst)>;
+def : Pat<(PPCcall_SVR4 (i32 texternalsym:$dst)),
+          (BL_SVR4 texternalsym:$dst)>;
+
+
+def : Pat<(PPCtc_return (i32 tglobaladdr:$dst),  imm:$imm),
+          (TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm),
+          (TCRETURNdi texternalsym:$dst, imm:$imm)>;
+
+def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm),
+          (TCRETURNri CTRRC:$dst, imm:$imm)>;
+
+
+
+// Hi and Lo for Darwin Global Addresses.
+def : Pat<(PPChi tglobaladdr:$in, 0), (LIS tglobaladdr:$in)>;
+def : Pat<(PPClo tglobaladdr:$in, 0), (LI tglobaladdr:$in)>;
+def : Pat<(PPChi tconstpool:$in, 0), (LIS tconstpool:$in)>;
+def : Pat<(PPClo tconstpool:$in, 0), (LI tconstpool:$in)>;
+def : Pat<(PPChi tjumptable:$in, 0), (LIS tjumptable:$in)>;
+def : Pat<(PPClo tjumptable:$in, 0), (LI tjumptable:$in)>;
+def : Pat<(PPChi tblockaddress:$in, 0), (LIS tblockaddress:$in)>;
+def : Pat<(PPClo tblockaddress:$in, 0), (LI tblockaddress:$in)>;
+def : Pat<(add GPRC:$in, (PPChi tglobaladdr:$g, 0)),
+          (ADDIS GPRC:$in, tglobaladdr:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)),
+          (ADDIS GPRC:$in, tconstpool:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tjumptable:$g, 0)),
+          (ADDIS GPRC:$in, tjumptable:$g)>;
+def : Pat<(add GPRC:$in, (PPChi tblockaddress:$g, 0)),
+          (ADDIS GPRC:$in, tblockaddress:$g)>;
+
+// Fused negative multiply subtract, alternate pattern
+def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)),
+          (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>,
+          Requires<[FPContractions]>;
+def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)),
+          (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>,
+          Requires<[FPContractions]>;
+
+// Standard shifts.  These are represented separately from the real shifts above
+// so that we can distinguish between shifts that allow 5-bit and 6-bit shift
+// amounts.
+def : Pat<(sra GPRC:$rS, GPRC:$rB),
+          (SRAW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(srl GPRC:$rS, GPRC:$rB),
+          (SRW GPRC:$rS, GPRC:$rB)>;
+def : Pat<(shl GPRC:$rS, GPRC:$rB),
+          (SLW GPRC:$rS, GPRC:$rB)>;
+
+def : Pat<(zextloadi1 iaddr:$src),
+          (LBZ iaddr:$src)>;
+def : Pat<(zextloadi1 xaddr:$src),
+          (LBZX xaddr:$src)>;
+def : Pat<(extloadi1 iaddr:$src),
+          (LBZ iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src),
+          (LBZX xaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src),
+          (LBZ iaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src),
+          (LBZX xaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src),
+          (LHZ iaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src),
+          (LHZX xaddr:$src)>;
+def : Pat<(f64 (extloadf32 iaddr:$src)),
+          (COPY_TO_REGCLASS (LFS iaddr:$src), F8RC)>;
+def : Pat<(f64 (extloadf32 xaddr:$src)),
+          (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
+
+def : Pat<(f64 (fextend F4RC:$src)),
+          (COPY_TO_REGCLASS F4RC:$src, F8RC)>;
+
+// Memory barriers
+def : Pat<(membarrier (i32 imm /*ll*/),
+                      (i32 imm /*ls*/),
+                      (i32 imm /*sl*/),
+                      (i32 imm /*ss*/),
+                      (i32 imm /*device*/)),
+           (SYNC)>;
+
+include "PPCInstrAltivec.td"
+include "PPCInstr64Bit.td"
diff --git a/final/lib/Target/PowerPC/PPCJITInfo.cpp b/final/lib/Target/PowerPC/PPCJITInfo.cpp
new file mode 100644
index 00000000000..78383e0603b
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -0,0 +1,446 @@
+//===-- PPCJITInfo.cpp - Implement the JIT interfaces for the PowerPC -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the 32-bit PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "PPCJITInfo.h"
+#include "PPCRelocations.h"
+#include "PPCTargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+#define BUILD_ADDIS(RD,RS,IMM16) \
+  ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535))
+#define BUILD_ORI(RD,RS,UIMM16) \
+  ((24 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_ORIS(RD,RS,UIMM16) \
+  ((25 << 26) | ((RS) << 21) | ((RD) << 16) | ((UIMM16) & 65535))
+#define BUILD_RLDICR(RD,RS,SH,ME) \
+  ((30 << 26) | ((RS) << 21) | ((RD) << 16) | (((SH) & 31) << 11) | \
+   (((ME) & 63) << 6) | (1 << 2) | ((((SH) >> 5) & 1) << 1))
+#define BUILD_MTSPR(RS,SPR)      \
+  ((31 << 26) | ((RS) << 21) | ((SPR) << 16) | (467 << 1))
+#define BUILD_BCCTRx(BO,BI,LINK) \
+  ((19 << 26) | ((BO) << 21) | ((BI) << 16) | (528 << 1) | ((LINK) & 1))
+#define BUILD_B(TARGET, LINK) \
+  ((18 << 26) | (((TARGET) & 0x00FFFFFF) << 2) | ((LINK) & 1))
+
+// Pseudo-ops
+#define BUILD_LIS(RD,IMM16)    BUILD_ADDIS(RD,0,IMM16)
+#define BUILD_SLDI(RD,RS,IMM6) BUILD_RLDICR(RD,RS,IMM6,63-IMM6)
+#define BUILD_MTCTR(RS)        BUILD_MTSPR(RS,9)
+#define BUILD_BCTR(LINK)       BUILD_BCCTRx(20,0,LINK)
+
+static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){
+  intptr_t Offset = ((intptr_t)To - (intptr_t)At) >> 2;
+  unsigned *AtI = (unsigned*)(intptr_t)At;
+
+  if (Offset >= -(1 << 23) && Offset < (1 << 23)) {   // In range?
+    AtI[0] = BUILD_B(Offset, isCall);     // b/bl target
+  } else if (!is64Bit) {
+    AtI[0] = BUILD_LIS(12, To >> 16);     // lis r12, hi16(address)
+    AtI[1] = BUILD_ORI(12, 12, To);       // ori r12, r12, lo16(address)
+    AtI[2] = BUILD_MTCTR(12);             // mtctr r12
+    AtI[3] = BUILD_BCTR(isCall);          // bctr/bctrl
+  } else {
+    AtI[0] = BUILD_LIS(12, To >> 48);      // lis r12, hi16(address)
+    AtI[1] = BUILD_ORI(12, 12, To >> 32);  // ori r12, r12, lo16(address)
+    AtI[2] = BUILD_SLDI(12, 12, 32);       // sldi r12, r12, 32
+    AtI[3] = BUILD_ORIS(12, 12, To >> 16); // oris r12, r12, hi16(address)
+    AtI[4] = BUILD_ORI(12, 12, To);        // ori r12, r12, lo16(address)
+    AtI[5] = BUILD_MTCTR(12);              // mtctr r12
+    AtI[6] = BUILD_BCTR(isCall);           // bctr/bctrl
+  }
+}
+
+extern "C" void PPC32CompilationCallback();
+extern "C" void PPC64CompilationCallback();
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+    !(defined(__ppc64__) || defined(__FreeBSD__))
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us.  Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+    ".text\n"
+    ".align 2\n"
+    ".globl _PPC32CompilationCallback\n"
+"_PPC32CompilationCallback:\n"
+    // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the 
+    // FIXME: need to save v[0-19] for altivec?
+    // FIXME: could shrink frame
+    // Set up a proper stack frame
+    // FIXME Layout
+    //   PowerPC64 ABI linkage    -  24 bytes
+    //                 parameters -  32 bytes
+    //   13 double registers      - 104 bytes
+    //   8 int registers          -  32 bytes
+    "mflr r0\n"
+    "stw r0,  8(r1)\n"
+    "stwu r1, -208(r1)\n"
+    // Save all int arg registers
+    "stw r10, 204(r1)\n"    "stw r9,  200(r1)\n"
+    "stw r8,  196(r1)\n"    "stw r7,  192(r1)\n"
+    "stw r6,  188(r1)\n"    "stw r5,  184(r1)\n"
+    "stw r4,  180(r1)\n"    "stw r3,  176(r1)\n"
+    // Save all call-clobbered FP regs.
+    "stfd f13, 168(r1)\n"   "stfd f12, 160(r1)\n"
+    "stfd f11, 152(r1)\n"   "stfd f10, 144(r1)\n"
+    "stfd f9,  136(r1)\n"   "stfd f8,  128(r1)\n"
+    "stfd f7,  120(r1)\n"   "stfd f6,  112(r1)\n"
+    "stfd f5,  104(r1)\n"   "stfd f4,   96(r1)\n"
+    "stfd f3,   88(r1)\n"   "stfd f2,   80(r1)\n"
+    "stfd f1,   72(r1)\n"
+    // Arguments to Compilation Callback:
+    // r3 - our lr (address of the call instruction in stub plus 4)
+    // r4 - stub's lr (address of instruction that called the stub plus 4)
+    // r5 - is64Bit - always 0.
+    "mr   r3, r0\n"
+    "lwz  r2, 208(r1)\n" // stub's frame
+    "lwz  r4, 8(r2)\n" // stub's lr
+    "li   r5, 0\n"       // 0 == 32 bit
+    "bl _PPCCompilationCallbackC\n"
+    "mtctr r3\n"
+    // Restore all int arg registers
+    "lwz r10, 204(r1)\n"    "lwz r9,  200(r1)\n"
+    "lwz r8,  196(r1)\n"    "lwz r7,  192(r1)\n"
+    "lwz r6,  188(r1)\n"    "lwz r5,  184(r1)\n"
+    "lwz r4,  180(r1)\n"    "lwz r3,  176(r1)\n"
+    // Restore all FP arg registers
+    "lfd f13, 168(r1)\n"    "lfd f12, 160(r1)\n"
+    "lfd f11, 152(r1)\n"    "lfd f10, 144(r1)\n"
+    "lfd f9,  136(r1)\n"    "lfd f8,  128(r1)\n"
+    "lfd f7,  120(r1)\n"    "lfd f6,  112(r1)\n"
+    "lfd f5,  104(r1)\n"    "lfd f4,   96(r1)\n"
+    "lfd f3,   88(r1)\n"    "lfd f2,   80(r1)\n"
+    "lfd f1,   72(r1)\n"
+    // Pop 3 frames off the stack and branch to target
+    "lwz  r1, 208(r1)\n"
+    "lwz  r2, 8(r1)\n"
+    "mtlr r2\n"
+    "bctr\n"
+    );
+
+#elif defined(__PPC__) && !defined(__ppc64__)
+// Linux & FreeBSD / PPC 32 support
+
+// CompilationCallback stub - We can't use a C function with inline assembly in
+// it, because we the prolog/epilog inserted by GCC won't work for us.  Instead,
+// write our own wrapper, which does things our way, so we have complete control
+// over register saving and restoring.
+asm(
+    ".text\n"
+    ".align 2\n"
+    ".globl PPC32CompilationCallback\n"
+"PPC32CompilationCallback:\n"
+    // Make space for 8 ints r[3-10] and 8 doubles f[1-8] and the 
+    // FIXME: need to save v[0-19] for altivec?
+    // FIXME: could shrink frame
+    // Set up a proper stack frame
+    // FIXME Layout
+    //   8 double registers       -  64 bytes
+    //   8 int registers          -  32 bytes
+    "mflr 0\n"
+    "stw 0,  4(1)\n"
+    "stwu 1, -104(1)\n"
+    // Save all int arg registers
+    "stw 10, 100(1)\n"   "stw 9,  96(1)\n"
+    "stw 8,  92(1)\n"    "stw 7,  88(1)\n"
+    "stw 6,  84(1)\n"    "stw 5,  80(1)\n"
+    "stw 4,  76(1)\n"    "stw 3,  72(1)\n"
+    // Save all call-clobbered FP regs.
+    "stfd 8,  64(1)\n"
+    "stfd 7,  56(1)\n"   "stfd 6,  48(1)\n"
+    "stfd 5,  40(1)\n"   "stfd 4,  32(1)\n"
+    "stfd 3,  24(1)\n"   "stfd 2,  16(1)\n"
+    "stfd 1,  8(1)\n"
+    // Arguments to Compilation Callback:
+    // r3 - our lr (address of the call instruction in stub plus 4)
+    // r4 - stub's lr (address of instruction that called the stub plus 4)
+    // r5 - is64Bit - always 0.
+    "mr   3, 0\n"
+    "lwz  5, 104(1)\n" // stub's frame
+    "lwz  4, 4(5)\n" // stub's lr
+    "li   5, 0\n"       // 0 == 32 bit
+    "bl PPCCompilationCallbackC\n"
+    "mtctr 3\n"
+    // Restore all int arg registers
+    "lwz 10, 100(1)\n"   "lwz 9,  96(1)\n"
+    "lwz 8,  92(1)\n"    "lwz 7,  88(1)\n"
+    "lwz 6,  84(1)\n"    "lwz 5,  80(1)\n"
+    "lwz 4,  76(1)\n"    "lwz 3,  72(1)\n"
+    // Restore all FP arg registers
+    "lfd 8,  64(1)\n"
+    "lfd 7,  56(1)\n"    "lfd 6,  48(1)\n"
+    "lfd 5,  40(1)\n"    "lfd 4,  32(1)\n"
+    "lfd 3,  24(1)\n"    "lfd 2,  16(1)\n"
+    "lfd 1,  8(1)\n"
+    // Pop 3 frames off the stack and branch to target
+    "lwz  1, 104(1)\n"
+    "lwz  0, 4(1)\n"
+    "mtlr 0\n"
+    "bctr\n"
+    );
+#else
+void PPC32CompilationCallback() {
+  llvm_unreachable("This is not a power pc, you can't execute this!");
+}
+#endif
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+    defined(__ppc64__)
+asm(
+    ".text\n"
+    ".align 2\n"
+    ".globl _PPC64CompilationCallback\n"
+"_PPC64CompilationCallback:\n"
+    // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the 
+    // FIXME: need to save v[0-19] for altivec?
+    // Set up a proper stack frame
+    // Layout
+    //   PowerPC64 ABI linkage    -  48 bytes
+    //                 parameters -  64 bytes
+    //   13 double registers      - 104 bytes
+    //   8 int registers          -  64 bytes
+    "mflr r0\n"
+    "std r0,  16(r1)\n"
+    "stdu r1, -280(r1)\n"
+    // Save all int arg registers
+    "std r10, 272(r1)\n"    "std r9,  264(r1)\n"
+    "std r8,  256(r1)\n"    "std r7,  248(r1)\n"
+    "std r6,  240(r1)\n"    "std r5,  232(r1)\n"
+    "std r4,  224(r1)\n"    "std r3,  216(r1)\n"
+    // Save all call-clobbered FP regs.
+    "stfd f13, 208(r1)\n"    "stfd f12, 200(r1)\n"
+    "stfd f11, 192(r1)\n"    "stfd f10, 184(r1)\n"
+    "stfd f9,  176(r1)\n"    "stfd f8,  168(r1)\n"
+    "stfd f7,  160(r1)\n"    "stfd f6,  152(r1)\n"
+    "stfd f5,  144(r1)\n"    "stfd f4,  136(r1)\n"
+    "stfd f3,  128(r1)\n"    "stfd f2,  120(r1)\n"
+    "stfd f1,  112(r1)\n"
+    // Arguments to Compilation Callback:
+    // r3 - our lr (address of the call instruction in stub plus 4)
+    // r4 - stub's lr (address of instruction that called the stub plus 4)
+    // r5 - is64Bit - always 1.
+    "mr   r3, r0\n"
+    "ld   r2, 280(r1)\n" // stub's frame
+    "ld   r4, 16(r2)\n"  // stub's lr
+    "li   r5, 1\n"       // 1 == 64 bit
+    "bl _PPCCompilationCallbackC\n"
+    "mtctr r3\n"
+    // Restore all int arg registers
+    "ld r10, 272(r1)\n"    "ld r9,  264(r1)\n"
+    "ld r8,  256(r1)\n"    "ld r7,  248(r1)\n"
+    "ld r6,  240(r1)\n"    "ld r5,  232(r1)\n"
+    "ld r4,  224(r1)\n"    "ld r3,  216(r1)\n"
+    // Restore all FP arg registers
+    "lfd f13, 208(r1)\n"    "lfd f12, 200(r1)\n"
+    "lfd f11, 192(r1)\n"    "lfd f10, 184(r1)\n"
+    "lfd f9,  176(r1)\n"    "lfd f8,  168(r1)\n"
+    "lfd f7,  160(r1)\n"    "lfd f6,  152(r1)\n"
+    "lfd f5,  144(r1)\n"    "lfd f4,  136(r1)\n"
+    "lfd f3,  128(r1)\n"    "lfd f2,  120(r1)\n"
+    "lfd f1,  112(r1)\n"
+    // Pop 3 frames off the stack and branch to target
+    "ld  r1, 280(r1)\n"
+    "ld  r2, 16(r1)\n"
+    "mtlr r2\n"
+    "bctr\n"
+    );
+#else
+void PPC64CompilationCallback() {
+  llvm_unreachable("This is not a power pc, you can't execute this!");
+}
+#endif
+
+extern "C" void *PPCCompilationCallbackC(unsigned *StubCallAddrPlus4,
+                                         unsigned *OrigCallAddrPlus4,
+                                         bool is64Bit) {
+  // Adjust the pointer to the address of the call instruction in the stub
+  // emitted by emitFunctionStub, rather than the instruction after it.
+  unsigned *StubCallAddr = StubCallAddrPlus4 - 1;
+  unsigned *OrigCallAddr = OrigCallAddrPlus4 - 1;
+
+  void *Target = JITCompilerFunction(StubCallAddr);
+
+  // Check to see if *OrigCallAddr is a 'bl' instruction, and if we can rewrite
+  // it to branch directly to the destination.  If so, rewrite it so it does not
+  // need to go through the stub anymore.
+  unsigned OrigCallInst = *OrigCallAddr;
+  if ((OrigCallInst >> 26) == 18) {     // Direct call.
+    intptr_t Offset = ((intptr_t)Target - (intptr_t)OrigCallAddr) >> 2;
+    
+    if (Offset >= -(1 << 23) && Offset < (1 << 23)) {   // In range?
+      // Clear the original target out.
+      OrigCallInst &= (63 << 26) | 3;
+      // Fill in the new target.
+      OrigCallInst |= (Offset & ((1 << 24)-1)) << 2;
+      // Replace the call.
+      *OrigCallAddr = OrigCallInst;
+    }
+  }
+
+  // Assert that we are coming from a stub that was created with our
+  // emitFunctionStub.
+  if ((*StubCallAddr >> 26) == 18)
+    StubCallAddr -= 3;
+  else {
+  assert((*StubCallAddr >> 26) == 19 && "Call in stub is not indirect!");
+    StubCallAddr -= is64Bit ? 9 : 6;
+  }
+
+  // Rewrite the stub with an unconditional branch to the target, for any users
+  // who took the address of the stub.
+  EmitBranchToAt((intptr_t)StubCallAddr, (intptr_t)Target, false, is64Bit);
+  sys::Memory::InvalidateInstructionCache(StubCallAddr, 7*4);
+
+  // Put the address of the target function to call and the address to return to
+  // after calling the target function in a place that is easy to get on the
+  // stack after we restore all regs.
+  return Target;
+}
+
+
+
+TargetJITInfo::LazyResolverFn
+PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) {
+  JITCompilerFunction = Fn;
+  return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback;
+}
+
+TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() {
+  // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3
+  // instructions to save the caller's address if this is a lazy-compilation
+  // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address
+  // into a register and jump through it.
+  StubLayout Result = {10*4, 4};
+  return Result;
+}
+
+#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
+defined(__APPLE__)
+extern "C" void sys_icache_invalidate(const void *Addr, size_t len);
+#endif
+
+void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn,
+                                   JITCodeEmitter &JCE) {
+  // If this is just a call to an external function, emit a branch instead of a
+  // call.  The code is the same except for one bit of the last instruction.
+  if (Fn != (void*)(intptr_t)PPC32CompilationCallback && 
+      Fn != (void*)(intptr_t)PPC64CompilationCallback) {
+    void *Addr = (void*)JCE.getCurrentPCValue();
+    JCE.emitWordBE(0);
+    JCE.emitWordBE(0);
+    JCE.emitWordBE(0);
+    JCE.emitWordBE(0);
+    JCE.emitWordBE(0);
+    JCE.emitWordBE(0);
+    JCE.emitWordBE(0);
+    EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit);
+    sys::Memory::InvalidateInstructionCache(Addr, 7*4);
+    return Addr;
+  }
+
+  void *Addr = (void*)JCE.getCurrentPCValue();
+  if (is64Bit) {
+    JCE.emitWordBE(0xf821ffb1);     // stdu r1,-80(r1)
+    JCE.emitWordBE(0x7d6802a6);     // mflr r11
+    JCE.emitWordBE(0xf9610060);     // std r11, 96(r1)
+  } else if (TM.getSubtargetImpl()->isDarwinABI()){
+    JCE.emitWordBE(0x9421ffe0);     // stwu r1,-32(r1)
+    JCE.emitWordBE(0x7d6802a6);     // mflr r11
+    JCE.emitWordBE(0x91610028);     // stw r11, 40(r1)
+  } else {
+    JCE.emitWordBE(0x9421ffe0);     // stwu r1,-32(r1)
+    JCE.emitWordBE(0x7d6802a6);     // mflr r11
+    JCE.emitWordBE(0x91610024);     // stw r11, 36(r1)
+  }
+  intptr_t BranchAddr = (intptr_t)JCE.getCurrentPCValue();
+  JCE.emitWordBE(0);
+  JCE.emitWordBE(0);
+  JCE.emitWordBE(0);
+  JCE.emitWordBE(0);
+  JCE.emitWordBE(0);
+  JCE.emitWordBE(0);
+  JCE.emitWordBE(0);
+  EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit);
+  sys::Memory::InvalidateInstructionCache(Addr, 10*4);
+  return Addr;
+}
+
+
+void PPCJITInfo::relocate(void *Function, MachineRelocation *MR,
+                          unsigned NumRelocs, unsigned char* GOTBase) {
+  for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+    unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
+    intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+    switch ((PPC::RelocationType)MR->getRelocationType()) {
+    default: llvm_unreachable("Unknown relocation type!");
+    case PPC::reloc_pcrel_bx:
+      // PC-relative relocation for b and bl instructions.
+      ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+      assert(ResultPtr >= -(1 << 23) && ResultPtr < (1 << 23) &&
+             "Relocation out of range!");
+      *RelocPos |= (ResultPtr & ((1 << 24)-1))  << 2;
+      break;
+    case PPC::reloc_pcrel_bcx:
+      // PC-relative relocation for BLT,BLE,BEQ,BGE,BGT,BNE, or other
+      // bcx instructions.
+      ResultPtr = (ResultPtr-(intptr_t)RelocPos) >> 2;
+      assert(ResultPtr >= -(1 << 13) && ResultPtr < (1 << 13) &&
+             "Relocation out of range!");
+      *RelocPos |= (ResultPtr & ((1 << 14)-1))  << 2;
+      break;
+    case PPC::reloc_absolute_high:     // high bits of ref -> low 16 of instr
+    case PPC::reloc_absolute_low: {    // low bits of ref  -> low 16 of instr
+      ResultPtr += MR->getConstantVal();
+
+      // If this is a high-part access, get the high-part.
+      if (MR->getRelocationType() == PPC::reloc_absolute_high) {
+        // If the low part will have a carry (really a borrow) from the low
+        // 16-bits into the high 16, add a bit to borrow from.
+        if (((int)ResultPtr << 16) < 0)
+          ResultPtr += 1 << 16;
+        ResultPtr >>= 16;
+      }
+
+      // Do the addition then mask, so the addition does not overflow the 16-bit
+      // immediate section of the instruction.
+      unsigned LowBits  = (*RelocPos + ResultPtr) & 65535;
+      unsigned HighBits = *RelocPos & ~65535;
+      *RelocPos = LowBits | HighBits;  // Slam into low 16-bits
+      break;
+    }
+    case PPC::reloc_absolute_low_ix: {  // low bits of ref  -> low 14 of instr
+      ResultPtr += MR->getConstantVal();
+      // Do the addition then mask, so the addition does not overflow the 16-bit
+      // immediate section of the instruction.
+      unsigned LowBits  = (*RelocPos + ResultPtr) & 0xFFFC;
+      unsigned HighBits = *RelocPos & 0xFFFF0003;
+      *RelocPos = LowBits | HighBits;  // Slam into low 14-bits.
+      break;
+    }
+    }
+  }
+}
+
+void PPCJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+  EmitBranchToAt((intptr_t)Old, (intptr_t)New, false, is64Bit);
+  sys::Memory::InvalidateInstructionCache(Old, 7*4);
+}
diff --git a/final/lib/Target/PowerPC/PPCJITInfo.h b/final/lib/Target/PowerPC/PPCJITInfo.h
new file mode 100644
index 00000000000..47ead59b587
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCJITInfo.h
@@ -0,0 +1,49 @@
+//===- PPCJITInfo.h - PowerPC impl. of the JIT interface --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC_JITINFO_H
+#define POWERPC_JITINFO_H
+
+#include "llvm/Target/TargetJITInfo.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+
+namespace llvm {
+  class PPCTargetMachine;
+
+  class PPCJITInfo : public TargetJITInfo {
+  protected:
+    PPCTargetMachine &TM;
+    bool is64Bit;
+  public:
+    PPCJITInfo(PPCTargetMachine &tm, bool tmIs64Bit) : TM(tm) {
+      useGOT = 0;
+      is64Bit = tmIs64Bit;
+    }
+
+    virtual StubLayout getStubLayout();
+    virtual void *emitFunctionStub(const Function* F, void *Fn,
+                                   JITCodeEmitter &JCE);
+    virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+    virtual void relocate(void *Function, MachineRelocation *MR,
+                          unsigned NumRelocs, unsigned char* GOTBase);
+    
+    /// replaceMachineCodeForFunction - Make it so that calling the function
+    /// whose machine code is at OLD turns into a call to NEW, perhaps by
+    /// overwriting OLD with a branch to NEW.  This is used for self-modifying
+    /// code.
+    ///
+    virtual void replaceMachineCodeForFunction(void *Old, void *New);
+  };
+}
+
+#endif
diff --git a/final/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/final/lib/Target/PowerPC/PPCMCAsmInfo.cpp
new file mode 100644
index 00000000000..d1178dd7e1f
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCMCAsmInfo.cpp
@@ -0,0 +1,58 @@
+//===-- PPCMCAsmInfo.cpp - PPC asm properties -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MCAsmInfoDarwin properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCAsmInfo.h"
+using namespace llvm;
+
+PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
+  PCSymbol = ".";
+  CommentString = ";";
+  ExceptionsType = ExceptionHandling::DwarfTable;
+
+  if (!is64Bit)
+    Data64bitsDirective = 0;      // We can't emit a 64-bit unit in PPC32 mode.
+
+  AssemblerDialect = 1;           // New-Style mnemonics.
+  SupportsDebugInformation= true; // Debug information.
+}
+
+PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
+  // ".comm align is in bytes but .align is pow-2."
+  AlignmentIsInBytes = false;
+
+  CommentString = "#";
+  GlobalPrefix = "";
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective = "\t.weak\t";
+  
+  // Uses '.section' before '.bss' directive
+  UsesELFSectionDirectiveForBSS = true;  
+
+  // Debug Information
+  SupportsDebugInformation = true;
+
+  PCSymbol = ".";
+
+  // Set up DWARF directives
+  HasLEB128 = true;  // Target asm supports leb128 directives (little-endian)
+
+  // Exceptions handling
+  if (!is64Bit)
+    ExceptionsType = ExceptionHandling::DwarfTable;
+    
+  ZeroDirective = "\t.space\t";
+  Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
+  HasLCOMMDirective = true;
+  AssemblerDialect = 0;           // Old-Style mnemonics.
+}
+
diff --git a/final/lib/Target/PowerPC/PPCMCAsmInfo.h b/final/lib/Target/PowerPC/PPCMCAsmInfo.h
new file mode 100644
index 00000000000..96ae6fbba0e
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCMCAsmInfo.h
@@ -0,0 +1,31 @@
+//=====-- PPCMCAsmInfo.h - PPC asm properties -----------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MCAsmInfoDarwin class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCTARGETASMINFO_H
+#define PPCTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+
+  struct PPCMCAsmInfoDarwin : public MCAsmInfoDarwin {
+    explicit PPCMCAsmInfoDarwin(bool is64Bit);
+  };
+
+  struct PPCLinuxMCAsmInfo : public MCAsmInfo {
+    explicit PPCLinuxMCAsmInfo(bool is64Bit);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/final/lib/Target/PowerPC/PPCMCCodeEmitter.cpp b/final/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
new file mode 100644
index 00000000000..65c2c82c51a
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
@@ -0,0 +1,195 @@
+//===-- PPCMCCodeEmitter.cpp - Convert PPC code to machine code -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "PPC.h"
+#include "PPCRegisterInfo.h"
+#include "PPCFixupKinds.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class PPCMCCodeEmitter : public MCCodeEmitter {
+  PPCMCCodeEmitter(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const PPCMCCodeEmitter &);   // DO NOT IMPLEMENT
+  const TargetMachine &TM;
+  MCContext &Ctx;
+  
+public:
+  PPCMCCodeEmitter(TargetMachine &tm, MCContext &ctx)
+    : TM(tm), Ctx(ctx) {
+  }
+  
+  ~PPCMCCodeEmitter() {}
+
+  unsigned getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getHA16Encoding(const MCInst &MI, unsigned OpNo,
+                           SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getLO16Encoding(const MCInst &MI, unsigned OpNo,
+                           SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+                            SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+  unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+                               SmallVectorImpl<MCFixup> &Fixups) const;
+
+  /// getMachineOpValue - Return binary encoding of operand. If the machine
+  /// operand requires relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups) const;
+  
+  // getBinaryCodeForInstr - TableGen'erated function for getting the
+  // binary encoding for an instruction.
+  unsigned getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups) const;
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const {
+    unsigned Bits = getBinaryCodeForInstr(MI, Fixups);
+    
+    // Output the constant in big endian byte order.
+    for (unsigned i = 0; i != 4; ++i) {
+      OS << (char)(Bits >> 24);
+      Bits <<= 8;
+    }
+    
+    ++MCNumEmitted;  // Keep track of the # of mi's emitted.
+  }
+  
+};
+  
+} // end anonymous namespace
+  
+MCCodeEmitter *llvm::createPPCMCCodeEmitter(const Target &, TargetMachine &TM,
+                                            MCContext &Ctx) {
+  return new PPCMCCodeEmitter(TM, Ctx);
+}
+
+unsigned PPCMCCodeEmitter::
+getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+  
+  // Add a fixup for the branch target.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_br24));
+  return 0;
+}
+
+unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
+                                     SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+  // Add a fixup for the branch target.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_brcond14));
+  return 0;
+}
+
+unsigned PPCMCCodeEmitter::getHA16Encoding(const MCInst &MI, unsigned OpNo,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+  
+  // Add a fixup for the branch target.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_ha16));
+  return 0;
+}
+
+unsigned PPCMCCodeEmitter::getLO16Encoding(const MCInst &MI, unsigned OpNo,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+  
+  // Add a fixup for the branch target.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_lo16));
+  return 0;
+}
+
+unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
+                                            SmallVectorImpl<MCFixup> &Fixups) const {
+  // Encode (imm, reg) as a memri, which has the low 16-bits as the
+  // displacement and the next 5 bits as the register #.
+  assert(MI.getOperand(OpNo+1).isReg());
+  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 16;
+  
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isImm())
+    return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
+  
+  // Add a fixup for the displacement field.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_lo16));
+  return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+                                       SmallVectorImpl<MCFixup> &Fixups) const {
+  // Encode (imm, reg) as a memrix, which has the low 14-bits as the
+  // displacement and the next 5 bits as the register #.
+  assert(MI.getOperand(OpNo+1).isReg());
+  unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups) << 14;
+  
+  const MCOperand &MO = MI.getOperand(OpNo);
+  if (MO.isImm())
+    return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
+  
+  // Add a fixup for the branch target.
+  Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+                                   (MCFixupKind)PPC::fixup_ppc_lo14));
+  return RegBits;
+}
+
+
+unsigned PPCMCCodeEmitter::
+get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
+                    SmallVectorImpl<MCFixup> &Fixups) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+  assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MFOCRF) &&
+         (MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
+  return 0x80 >> PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+}
+
+
+unsigned PPCMCCodeEmitter::
+getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  if (MO.isReg()) {
+    // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+    // The GPR operand should come through here though.
+    assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+           MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
+    return PPCRegisterInfo::getRegisterNumbering(MO.getReg());
+  }
+  
+  assert(MO.isImm() &&
+         "Relocation required in an instruction that we cannot encode!");
+  return MO.getImm();
+}
+
+
+#include "PPCGenMCCodeEmitter.inc"
diff --git a/final/lib/Target/PowerPC/PPCMCInstLower.cpp b/final/lib/Target/PowerPC/PPCMCInstLower.cpp
new file mode 100644
index 00000000000..6082587b2d3
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -0,0 +1,172 @@
+//===-- PPCMCInstLower.cpp - Convert PPC MachineInstr to an MCInst --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower PPC MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
+  return AP.MMI->getObjFileInfo<MachineModuleInfoMachO>();
+}
+
+
+static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
+  MCContext &Ctx = AP.OutContext;
+
+  SmallString<128> Name;
+  if (!MO.isGlobal()) {
+    assert(MO.isSymbol() && "Isn't a symbol reference");
+    Name += AP.MAI->getGlobalPrefix();
+    Name += MO.getSymbolName();
+  } else {    
+    const GlobalValue *GV = MO.getGlobal();
+    bool isImplicitlyPrivate = false;
+    if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB ||
+        (MO.getTargetFlags() & PPCII::MO_NLP_FLAG))
+      isImplicitlyPrivate = true;
+    
+    AP.Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+  }
+  
+  // If the target flags on the operand changes the name of the symbol, do that
+  // before we return the symbol.
+  if (MO.getTargetFlags() == PPCII::MO_DARWIN_STUB) {
+    Name += "$stub";
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+      getMachOMMI(AP).getFnStubEntry(Sym);
+    if (StubSym.getPointer())
+      return Sym;
+    
+    if (MO.isGlobal()) {
+      StubSym =
+      MachineModuleInfoImpl::
+      StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+                  !MO.getGlobal()->hasInternalLinkage());
+    } else {
+      Name.erase(Name.end()-5, Name.end());
+      StubSym =
+      MachineModuleInfoImpl::
+      StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false);
+    }
+    return Sym;
+  }
+
+  // If the symbol reference is actually to a non_lazy_ptr, not to the symbol,
+  // then add the suffix.
+  if (MO.getTargetFlags() & PPCII::MO_NLP_FLAG) {
+    Name += "$non_lazy_ptr";
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+  
+    MachineModuleInfoMachO &MachO = getMachOMMI(AP);
+    
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+      (MO.getTargetFlags() & PPCII::MO_NLP_HIDDEN_FLAG) ? 
+         MachO.getHiddenGVStubEntry(Sym) : MachO.getGVStubEntry(Sym);
+    
+    if (StubSym.getPointer() == 0) {
+      assert(MO.isGlobal() && "Extern symbol not handled yet");
+      StubSym = MachineModuleInfoImpl::
+                   StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+                               !MO.getGlobal()->hasInternalLinkage());
+    }
+    return Sym;
+  }
+  
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
+                              AsmPrinter &Printer) {
+  MCContext &Ctx = Printer.OutContext;
+  MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+
+  if (MO.getTargetFlags() & PPCII::MO_LO16)
+    RefKind = MCSymbolRefExpr::VK_PPC_LO16;
+  else if (MO.getTargetFlags() & PPCII::MO_HA16)
+    RefKind = MCSymbolRefExpr::VK_PPC_HA16;
+
+  // FIXME: This isn't right, but we don't have a good way to express this in
+  // the MC Level, see below.
+  if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG)
+    RefKind = MCSymbolRefExpr::VK_None;
+  
+  const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx);
+
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(), Ctx),
+                                   Ctx);
+
+  // Subtract off the PIC base if required.
+  if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG) {
+    const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+    
+    const MCExpr *PB = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
+    Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx);
+    // FIXME: We have no way to make the result be VK_PPC_LO16/VK_PPC_HA16,
+    // since it is not a symbol!
+  }
+  
+  return MCOperand::CreateExpr(Expr);
+}
+
+void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
+                                        AsmPrinter &AP) {
+  OutMI.setOpcode(MI->getOpcode());
+  
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default:
+      MI->dump();
+      assert(0 && "unknown operand type");
+    case MachineOperand::MO_Register:
+      assert(!MO.getSubReg() && "Subregs should be eliminated!");
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                                      MO.getMBB()->getSymbol(), AP.OutContext));
+      break;
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_ExternalSymbol:
+      MCOp = GetSymbolRef(MO, GetSymbolFromOperand(MO, AP), AP);
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
+      break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP);
+      break;
+    }
+    
+    OutMI.addOperand(MCOp);
+  }
+}
diff --git a/final/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/final/lib/Target/PowerPC/PPCMachineFunctionInfo.h
new file mode 100644
index 00000000000..e2649c8b380
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -0,0 +1,132 @@
+//===-- PPCMachineFunctionInfo.h - Private data used for PowerPC --*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_MACHINE_FUNCTION_INFO_H
+#define PPC_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// PPCFunctionInfo - This class is derived from MachineFunction private
+/// PowerPC target-specific information for each MachineFunction.
+class PPCFunctionInfo : public MachineFunctionInfo {
+private:
+  /// FramePointerSaveIndex - Frame index of where the old frame pointer is
+  /// stored.  Also used as an anchor for instructions that need to be altered
+  /// when using frame pointers (dyna_add, dyna_sub.)
+  int FramePointerSaveIndex;
+  
+  /// ReturnAddrSaveIndex - Frame index of where the return address is stored.
+  ///
+  int ReturnAddrSaveIndex;
+
+  /// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
+  /// function.  This is only valid after the initial scan of the function by
+  /// PEI.
+  bool MustSaveLR;
+
+  /// SpillsCR - Indicates whether CR is spilled in the current function.
+  bool SpillsCR;
+
+  /// LRStoreRequired - The bool indicates whether there is some explicit use of
+  /// the LR/LR8 stack slot that is not obvious from scanning the code.  This
+  /// requires that the code generator produce a store of LR to the stack on
+  /// entry, even though LR may otherwise apparently not be used.
+  bool LRStoreRequired;
+
+  /// MinReservedArea - This is the frame size that is at least reserved in a
+  /// potential caller (parameter+linkage area).
+  unsigned MinReservedArea;
+
+  /// TailCallSPDelta - Stack pointer delta used when tail calling. Maximum
+  /// amount the stack pointer is adjusted to make the frame bigger for tail
+  /// calls. Used for creating an area before the register spill area.
+  int TailCallSPDelta;
+
+  /// HasFastCall - Does this function contain a fast call. Used to determine
+  /// how the caller's stack pointer should be calculated (epilog/dynamicalloc).
+  bool HasFastCall;
+
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+  /// VarArgsStackOffset - StackOffset for start of stack
+  /// arguments.
+  int VarArgsStackOffset;
+  /// VarArgsNumGPR - Index of the first unused integer
+  /// register for parameter passing.
+  unsigned VarArgsNumGPR;
+  /// VarArgsNumFPR - Index of the first unused double
+  /// register for parameter passing.
+  unsigned VarArgsNumFPR;
+
+public:
+  explicit PPCFunctionInfo(MachineFunction &MF) 
+    : FramePointerSaveIndex(0),
+      ReturnAddrSaveIndex(0),
+      SpillsCR(false),
+      LRStoreRequired(false),
+      MinReservedArea(0),
+      TailCallSPDelta(0),
+      HasFastCall(false),
+      VarArgsFrameIndex(0),
+      VarArgsStackOffset(0),
+      VarArgsNumGPR(0),
+      VarArgsNumFPR(0) {}
+
+  int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
+  void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
+  
+  int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; }
+  void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; }
+
+  unsigned getMinReservedArea() const { return MinReservedArea; }
+  void setMinReservedArea(unsigned size) { MinReservedArea = size; }
+
+  int getTailCallSPDelta() const { return TailCallSPDelta; }
+  void setTailCallSPDelta(int size) { TailCallSPDelta = size; }
+
+  /// MustSaveLR - This is set when the prolog/epilog inserter does its initial
+  /// scan of the function. It is true if the LR/LR8 register is ever explicitly
+  /// defined/clobbered in the machine function (e.g. by calls and movpctolr,
+  /// which is used in PIC generation), or if the LR stack slot is explicitly
+  /// referenced by builtin_return_address.
+  void setMustSaveLR(bool U) { MustSaveLR = U; }
+  bool mustSaveLR() const    { return MustSaveLR; }
+
+  void setSpillsCR()       { SpillsCR = true; }
+  bool isCRSpilled() const { return SpillsCR; }
+
+  void setLRStoreRequired() { LRStoreRequired = true; }
+  bool isLRStoreRequired() const { return LRStoreRequired; }
+
+  void setHasFastCall() { HasFastCall = true; }
+  bool hasFastCall() const { return HasFastCall;}
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+  int getVarArgsStackOffset() const { return VarArgsStackOffset; }
+  void setVarArgsStackOffset(int Offset) { VarArgsStackOffset = Offset; }
+
+  unsigned getVarArgsNumGPR() const { return VarArgsNumGPR; }
+  void setVarArgsNumGPR(unsigned Num) { VarArgsNumGPR = Num; }
+
+  unsigned getVarArgsNumFPR() const { return VarArgsNumFPR; }
+  void setVarArgsNumFPR(unsigned Num) { VarArgsNumFPR = Num; }
+};
+
+} // end of namespace llvm
+
+
+#endif
diff --git a/final/lib/Target/PowerPC/PPCPerfectShuffle.h b/final/lib/Target/PowerPC/PPCPerfectShuffle.h
new file mode 100644
index 00000000000..3164e33faae
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCPerfectShuffle.h
@@ -0,0 +1,6586 @@
+//===-- PPCPerfectShuffle.h - Altivec Perfect Shuffle Table ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file, which was autogenerated by llvm-PerfectShuffle, contains data
+// for the optimal way to build a perfect shuffle without using vperm.
+//
+//===----------------------------------------------------------------------===//
+
+// 31 entries have cost 0
+// 292 entries have cost 1
+// 1384 entries have cost 2
+// 3061 entries have cost 3
+// 1733 entries have cost 4
+// 60 entries have cost 5
+
+// This table is 6561*4 = 26244 bytes in size.
+static const unsigned PerfectShuffleTable[6561+1] = {
+  202162278U,	// <0,0,0,0>: Cost 1 vspltisw0 LHS
+  1140850790U,	// <0,0,0,1>: Cost 2 vmrghw <0,0,0,0>, LHS
+  2617247181U,	// <0,0,0,2>: Cost 3 vsldoi4 <0,0,0,0>, <2,0,3,0>
+  2635163787U,	// <0,0,0,3>: Cost 3 vsldoi4 <3,0,0,0>, <3,0,0,0>
+  1543507254U,	// <0,0,0,4>: Cost 2 vsldoi4 <0,0,0,0>, RHS
+  2281701705U,	// <0,0,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,0,5>
+  2617250133U,	// <0,0,0,6>: Cost 3 vsldoi4 <0,0,0,0>, <6,0,7,0>
+  2659054575U,	// <0,0,0,7>: Cost 3 vsldoi4 <7,0,0,0>, <7,0,0,0>
+  202162278U,	// <0,0,0,u>: Cost 1 vspltisw0 LHS
+  1141686282U,	// <0,0,1,0>: Cost 2 vmrghw LHS, <0,0,1,1>
+  67944550U,	// <0,0,1,1>: Cost 1 vmrghw LHS, LHS
+  1685241958U,	// <0,0,1,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+  2215870716U,	// <0,0,1,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+  1141727570U,	// <0,0,1,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+  2215428562U,	// <0,0,1,5>: Cost 3 vmrghw LHS, <0,5,6,7>
+  2215428589U,	// <0,0,1,6>: Cost 3 vmrghw LHS, <0,6,0,7>
+  2659062768U,	// <0,0,1,7>: Cost 3 vsldoi4 <7,0,0,1>, <7,0,0,1>
+  67945117U,	// <0,0,1,u>: Cost 1 vmrghw LHS, LHS
+  2684356045U,	// <0,0,2,0>: Cost 3 vsldoi8 <0,0,0,0>, <2,0,3,0>
+  2216009830U,	// <0,0,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+  2216009901U,	// <0,0,2,2>: Cost 3 vmrghw <0,2,1,2>, <0,2,1,2>
+  2698290853U,	// <0,0,2,3>: Cost 3 vsldoi8 <2,3,0,0>, <2,3,0,0>
+  3289751890U,	// <0,0,2,4>: Cost 4 vmrghw <0,2,1,2>, <0,4,1,5>
+  3758098275U,	// <0,0,2,5>: Cost 4 vsldoi8 <0,0,0,0>, <2,5,3,1>
+  2684356538U,	// <0,0,2,6>: Cost 3 vsldoi8 <0,0,0,0>, <2,6,3,7>
+  3758098410U,	// <0,0,2,7>: Cost 4 vsldoi8 <0,0,0,0>, <2,7,0,1>
+  2216010397U,	// <0,0,2,u>: Cost 3 vmrghw <0,2,1,2>, LHS
+  2702272651U,	// <0,0,3,0>: Cost 3 vsldoi8 <3,0,0,0>, <3,0,0,0>
+  2216656998U,	// <0,0,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+  3844669704U,	// <0,0,3,2>: Cost 4 vsldoi12 <3,2,3,0>, <0,3,2,3>
+  2216657148U,	// <0,0,3,3>: Cost 3 vmrghw <0,3,1,0>, <0,3,1,0>
+  2684357122U,	// <0,0,3,4>: Cost 3 vsldoi8 <0,0,0,0>, <3,4,5,6>
+  3732820066U,	// <0,0,3,5>: Cost 4 vsldoi4 <7,0,0,3>, <5,6,7,0>
+  3778005624U,	// <0,0,3,6>: Cost 4 vsldoi8 <3,3,0,0>, <3,6,0,7>
+  3374713464U,	// <0,0,3,7>: Cost 4 vmrglw <3,2,0,3>, <3,6,0,7>
+  2216657565U,	// <0,0,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+  2217361408U,	// <0,0,4,0>: Cost 3 vmrghw <0,4,1,5>, <0,0,0,0>
+  1143619686U,	// <0,0,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+  3291103405U,	// <0,0,4,2>: Cost 4 vmrghw <0,4,1,5>, <0,2,1,2>
+  3827269988U,	// <0,0,4,3>: Cost 4 vsldoi12 <0,3,1,0>, <0,4,3,5>
+  1143619922U,	// <0,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+  1610616118U,	// <0,0,4,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+  3758099833U,	// <0,0,4,6>: Cost 4 vsldoi8 <0,0,0,0>, <4,6,5,2>
+  3854107016U,	// <0,0,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <0,4,7,5>
+  1143620253U,	// <0,0,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+  2284396544U,	// <0,0,5,0>: Cost 3 vmrglw <0,4,0,5>, <0,0,0,0>
+  2218025062U,	// <0,0,5,1>: Cost 3 vmrghw <0,5,1,5>, LHS
+  3758100203U,	// <0,0,5,2>: Cost 4 vsldoi8 <0,0,0,0>, <5,2,1,3>
+  3395966100U,	// <0,0,5,3>: Cost 4 vmrglw <6,7,0,5>, <7,2,0,3>
+  3804549052U,	// <0,0,5,4>: Cost 4 vsldoi8 <7,7,0,0>, <5,4,6,5>
+  2302314964U,	// <0,0,5,5>: Cost 3 vmrglw <3,4,0,5>, <3,4,0,5>
+  2785821138U,	// <0,0,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+  3395966428U,	// <0,0,5,7>: Cost 4 vmrglw <6,7,0,5>, <7,6,0,7>
+  2787148260U,	// <0,0,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <0,5,u,7>
+  2684358997U,	// <0,0,6,0>: Cost 3 vsldoi8 <0,0,0,0>, <6,0,7,0>
+  2218631270U,	// <0,0,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+  2684359162U,	// <0,0,6,2>: Cost 3 vsldoi8 <0,0,0,0>, <6,2,7,3>
+  3758101042U,	// <0,0,6,3>: Cost 4 vsldoi8 <0,0,0,0>, <6,3,4,5>
+  3732843830U,	// <0,0,6,4>: Cost 4 vsldoi4 <7,0,0,6>, RHS
+  3758101227U,	// <0,0,6,5>: Cost 4 vsldoi8 <0,0,0,0>, <6,5,7,1>
+  2684359480U,	// <0,0,6,6>: Cost 3 vsldoi8 <0,0,0,0>, <6,6,6,6>
+  2724836173U,	// <0,0,6,7>: Cost 3 vsldoi8 <6,7,0,0>, <6,7,0,0>
+  2725499806U,	// <0,0,6,u>: Cost 3 vsldoi8 <6,u,0,0>, <6,u,0,0>
+  2726163439U,	// <0,0,7,0>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+  2219311206U,	// <0,0,7,1>: Cost 3 vmrghw <0,7,1,0>, LHS
+  3868557900U,	// <0,0,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <0,7,2,3>
+  3377400112U,	// <0,0,7,3>: Cost 4 vmrglw <3,6,0,7>, <3,2,0,3>
+  2684360038U,	// <0,0,7,4>: Cost 3 vsldoi8 <0,0,0,0>, <7,4,5,6>
+  3732852834U,	// <0,0,7,5>: Cost 4 vsldoi4 <7,0,0,7>, <5,6,7,0>
+  3871507060U,	// <0,0,7,6>: Cost 4 vsldoi12 <7,6,7,0>, <0,7,6,7>
+  2303658616U,	// <0,0,7,7>: Cost 3 vmrglw <3,6,0,7>, <3,6,0,7>
+  2726163439U,	// <0,0,7,u>: Cost 3 vsldoi8 <7,0,0,0>, <7,0,0,0>
+  202162278U,	// <0,0,u,0>: Cost 1 vspltisw0 LHS
+  72589414U,	// <0,0,u,1>: Cost 1 vmrghw LHS, LHS
+  1685242525U,	// <0,0,u,2>: Cost 2 vsldoi12 <1,2,3,0>, LHS
+  2220073212U,	// <0,0,u,3>: Cost 3 vmrghw LHS, <0,3,1,0>
+  1146331474U,	// <0,0,u,4>: Cost 2 vmrghw LHS, <0,4,1,5>
+  1610619034U,	// <0,0,u,5>: Cost 2 vsldoi8 <0,0,0,0>, RHS
+  2785821138U,	// <0,0,u,6>: Cost 3 vsldoi12 <5,6,7,0>, <0,5,6,7>
+  2659120119U,	// <0,0,u,7>: Cost 3 vsldoi4 <7,0,0,u>, <7,0,0,u>
+  72589981U,	// <0,0,u,u>: Cost 1 vmrghw LHS, LHS
+  2698297344U,	// <0,1,0,0>: Cost 3 vsldoi8 <2,3,0,1>, <0,0,0,0>
+  1624555622U,	// <0,1,0,1>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+  2758984428U,	// <0,1,0,2>: Cost 3 vsldoi12 <1,2,3,0>, <1,0,2,1>
+  2635237524U,	// <0,1,0,3>: Cost 3 vsldoi4 <3,0,1,0>, <3,0,1,0>
+  2693652818U,	// <0,1,0,4>: Cost 3 vsldoi8 <1,5,0,1>, <0,4,1,5>
+  2281701714U,	// <0,1,0,5>: Cost 3 vmrglw <0,0,0,0>, <0,4,1,5>
+  2698297846U,	// <0,1,0,6>: Cost 3 vsldoi8 <2,3,0,1>, <0,6,1,7>
+  2659128312U,	// <0,1,0,7>: Cost 3 vsldoi4 <7,0,1,0>, <7,0,1,0>
+  1624556189U,	// <0,1,0,u>: Cost 2 vsldoi8 <2,3,0,1>, LHS
+  1543585802U,	// <0,1,1,0>: Cost 2 vsldoi4 <0,0,1,1>, <0,0,1,1>
+  1141728052U,	// <0,1,1,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+  1141728150U,	// <0,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+  2295644334U,	// <0,1,1,3>: Cost 3 vmrglw <2,3,0,1>, <0,2,1,3>
+  1543589174U,	// <0,1,1,4>: Cost 2 vsldoi4 <0,0,1,1>, RHS
+  2290999634U,	// <0,1,1,5>: Cost 3 vmrglw <1,5,0,1>, <0,4,1,5>
+  2617332135U,	// <0,1,1,6>: Cost 3 vsldoi4 <0,0,1,1>, <6,1,7,1>
+  2617332720U,	// <0,1,1,7>: Cost 3 vsldoi4 <0,0,1,1>, <7,0,0,1>
+  1142171004U,	// <0,1,1,u>: Cost 2 vmrghw LHS, <1,u,3,0>
+  1561509990U,	// <0,1,2,0>: Cost 2 vsldoi4 <3,0,1,2>, LHS
+  2623308516U,	// <0,1,2,1>: Cost 3 vsldoi4 <1,0,1,2>, <1,0,1,2>
+  2698298984U,	// <0,1,2,2>: Cost 3 vsldoi8 <2,3,0,1>, <2,2,2,2>
+  835584U,	// <0,1,2,3>: Cost 0 copy LHS
+  1561513270U,	// <0,1,2,4>: Cost 2 vsldoi4 <3,0,1,2>, RHS
+  2647199304U,	// <0,1,2,5>: Cost 3 vsldoi4 <5,0,1,2>, <5,0,1,2>
+  2698299322U,	// <0,1,2,6>: Cost 3 vsldoi8 <2,3,0,1>, <2,6,3,7>
+  1585402874U,	// <0,1,2,7>: Cost 2 vsldoi4 <7,0,1,2>, <7,0,1,2>
+  835584U,	// <0,1,2,u>: Cost 0 copy LHS
+  2698299540U,	// <0,1,3,0>: Cost 3 vsldoi8 <2,3,0,1>, <3,0,1,0>
+  3290399540U,	// <0,1,3,1>: Cost 4 vmrghw <0,3,1,0>, <1,1,1,1>
+  2698299720U,	// <0,1,3,2>: Cost 3 vsldoi8 <2,3,0,1>, <3,2,3,0>
+  2698299804U,	// <0,1,3,3>: Cost 3 vsldoi8 <2,3,0,1>, <3,3,3,3>
+  2698299906U,	// <0,1,3,4>: Cost 3 vsldoi8 <2,3,0,1>, <3,4,5,6>
+  3832726521U,	// <0,1,3,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,3,5,0>
+  2724842160U,	// <0,1,3,6>: Cost 3 vsldoi8 <6,7,0,1>, <3,6,7,0>
+  2706926275U,	// <0,1,3,7>: Cost 3 vsldoi8 <3,7,0,1>, <3,7,0,1>
+  2698300190U,	// <0,1,3,u>: Cost 3 vsldoi8 <2,3,0,1>, <3,u,1,2>
+  2635268198U,	// <0,1,4,0>: Cost 3 vsldoi4 <3,0,1,4>, LHS
+  2217362228U,	// <0,1,4,1>: Cost 3 vmrghw <0,4,1,5>, <1,1,1,1>
+  2217362326U,	// <0,1,4,2>: Cost 3 vmrghw <0,4,1,5>, <1,2,3,0>
+  2635270296U,	// <0,1,4,3>: Cost 3 vsldoi4 <3,0,1,4>, <3,0,1,4>
+  2635271478U,	// <0,1,4,4>: Cost 3 vsldoi4 <3,0,1,4>, RHS
+  1624558902U,	// <0,1,4,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+  2659160910U,	// <0,1,4,6>: Cost 3 vsldoi4 <7,0,1,4>, <6,7,0,1>
+  2659161084U,	// <0,1,4,7>: Cost 3 vsldoi4 <7,0,1,4>, <7,0,1,4>
+  1624559145U,	// <0,1,4,u>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+  3832726639U,	// <0,1,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,0,1>
+  2714889871U,	// <0,1,5,1>: Cost 3 vsldoi8 <5,1,0,1>, <5,1,0,1>
+  2302314646U,	// <0,1,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+  3834717321U,	// <0,1,5,3>: Cost 4 vsldoi12 <1,5,3,0>, <1,5,3,0>
+  3832726679U,	// <0,1,5,4>: Cost 4 vsldoi12 <1,2,3,0>, <1,5,4,5>
+  2717544403U,	// <0,1,5,5>: Cost 3 vsldoi8 <5,5,0,1>, <5,5,0,1>
+  2718208036U,	// <0,1,5,6>: Cost 3 vsldoi8 <5,6,0,1>, <5,6,0,1>
+  3792613493U,	// <0,1,5,7>: Cost 4 vsldoi8 <5,7,0,1>, <5,7,0,1>
+  2719535302U,	// <0,1,5,u>: Cost 3 vsldoi8 <5,u,0,1>, <5,u,0,1>
+  2659172454U,	// <0,1,6,0>: Cost 3 vsldoi4 <7,0,1,6>, LHS
+  3832726735U,	// <0,1,6,1>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,1,7>
+  2724844026U,	// <0,1,6,2>: Cost 3 vsldoi8 <6,7,0,1>, <6,2,7,3>
+  3775361608U,	// <0,1,6,3>: Cost 4 vsldoi8 <2,u,0,1>, <6,3,7,0>
+  2659175734U,	// <0,1,6,4>: Cost 3 vsldoi4 <7,0,1,6>, RHS
+  3832726771U,	// <0,1,6,5>: Cost 4 vsldoi12 <1,2,3,0>, <1,6,5,7>
+  2724844344U,	// <0,1,6,6>: Cost 3 vsldoi8 <6,7,0,1>, <6,6,6,6>
+  1651102542U,	// <0,1,6,7>: Cost 2 vsldoi8 <6,7,0,1>, <6,7,0,1>
+  1651766175U,	// <0,1,6,u>: Cost 2 vsldoi8 <6,u,0,1>, <6,u,0,1>
+  2724844536U,	// <0,1,7,0>: Cost 3 vsldoi8 <6,7,0,1>, <7,0,1,0>
+  3377397770U,	// <0,1,7,1>: Cost 4 vmrglw <3,6,0,7>, <0,0,1,1>
+  2698302636U,	// <0,1,7,2>: Cost 3 vsldoi8 <2,3,0,1>, <7,2,3,0>
+  2728162531U,	// <0,1,7,3>: Cost 3 vsldoi8 <7,3,0,1>, <7,3,0,1>
+  2724844902U,	// <0,1,7,4>: Cost 3 vsldoi8 <6,7,0,1>, <7,4,5,6>
+  3377398098U,	// <0,1,7,5>: Cost 4 vmrglw <3,6,0,7>, <0,4,1,5>
+  2724845076U,	// <0,1,7,6>: Cost 3 vsldoi8 <6,7,0,1>, <7,6,7,0>
+  2724845164U,	// <0,1,7,7>: Cost 3 vsldoi8 <6,7,0,1>, <7,7,7,7>
+  2724845186U,	// <0,1,7,u>: Cost 3 vsldoi8 <6,7,0,1>, <7,u,1,2>
+  1561559142U,	// <0,1,u,0>: Cost 2 vsldoi4 <3,0,1,u>, LHS
+  1146331956U,	// <0,1,u,1>: Cost 2 vmrghw LHS, <1,1,1,1>
+  1146332054U,	// <0,1,u,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+  835584U,	// <0,1,u,3>: Cost 0 copy LHS
+  1561562422U,	// <0,1,u,4>: Cost 2 vsldoi4 <3,0,1,u>, RHS
+  1624561818U,	// <0,1,u,5>: Cost 2 vsldoi8 <2,3,0,1>, RHS
+  2220074191U,	// <0,1,u,6>: Cost 3 vmrghw LHS, <1,6,1,7>
+  1585452032U,	// <0,1,u,7>: Cost 2 vsldoi4 <7,0,1,u>, <7,0,1,u>
+  835584U,	// <0,1,u,u>: Cost 0 copy LHS
+  2214593997U,	// <0,2,0,0>: Cost 3 vmrghw <0,0,0,0>, <2,0,3,0>
+  2214675999U,	// <0,2,0,1>: Cost 3 vmrghw <0,0,1,1>, <2,1,3,1>
+  2214594152U,	// <0,2,0,2>: Cost 3 vmrghw <0,0,0,0>, <2,2,2,2>
+  1207959654U,	// <0,2,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+  3709054262U,	// <0,2,0,4>: Cost 4 vsldoi4 <3,0,2,0>, RHS
+  3375350836U,	// <0,2,0,5>: Cost 4 vmrglw <3,3,0,0>, <1,4,2,5>
+  2214594490U,	// <0,2,0,6>: Cost 3 vmrghw <0,0,0,0>, <2,6,3,7>
+  3288336362U,	// <0,2,0,7>: Cost 4 vmrghw <0,0,0,0>, <2,7,0,1>
+  1207959659U,	// <0,2,0,u>: Cost 2 vmrglw <0,0,0,0>, LHS
+  2215871994U,	// <0,2,1,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+  2215470623U,	// <0,2,1,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+  1141728872U,	// <0,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+  1141728934U,	// <0,2,1,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+  2215872323U,	// <0,2,1,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+  2215872405U,	// <0,2,1,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+  1141729210U,	// <0,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+  2215430122U,	// <0,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+  1141729368U,	// <0,2,1,u>: Cost 2 vmrghw LHS, <2,u,3,3>
+  3289736698U,	// <0,2,2,0>: Cost 4 vmrghw <0,2,1,0>, <2,0,u,0>
+  3289744927U,	// <0,2,2,1>: Cost 4 vmrghw <0,2,1,1>, <2,1,3,1>
+  2216011368U,	// <0,2,2,2>: Cost 3 vmrghw <0,2,1,2>, <2,2,2,2>
+  2216019622U,	// <0,2,2,3>: Cost 3 vmrghw <0,2,1,3>, <2,3,0,1>
+  3289769795U,	// <0,2,2,4>: Cost 4 vmrghw <0,2,1,4>, <2,4,u,5>
+  3289778069U,	// <0,2,2,5>: Cost 4 vmrghw <0,2,1,5>, <2,5,u,6>
+  2216044474U,	// <0,2,2,6>: Cost 3 vmrghw <0,2,1,6>, <2,6,3,7>
+  3732960259U,	// <0,2,2,7>: Cost 4 vsldoi4 <7,0,2,2>, <7,0,2,2>
+  2216061016U,	// <0,2,2,u>: Cost 3 vmrghw <0,2,1,u>, <2,u,3,3>
+  2758985382U,	// <0,2,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,0,1>
+  2758985392U,	// <0,2,3,1>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,1,2>
+  3290400360U,	// <0,2,3,2>: Cost 4 vmrghw <0,3,1,0>, <2,2,2,2>
+  2758985408U,	// <0,2,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,3,0>
+  2758985422U,	// <0,2,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,4,5>
+  2785822424U,	// <0,2,3,5>: Cost 3 vsldoi12 <5,6,7,0>, <2,3,5,6>
+  3290400698U,	// <0,2,3,6>: Cost 4 vmrghw <0,3,1,0>, <2,6,3,7>
+  2765915876U,	// <0,2,3,7>: Cost 3 vsldoi12 <2,3,7,0>, <2,3,7,0>
+  2758985453U,	// <0,2,3,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,3,u,0>
+  3291104762U,	// <0,2,4,0>: Cost 4 vmrghw <0,4,1,5>, <2,0,u,0>
+  2217362979U,	// <0,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+  2217363048U,	// <0,2,4,2>: Cost 3 vmrghw <0,4,1,5>, <2,2,2,2>
+  2217363110U,	// <0,2,4,3>: Cost 3 vmrghw <0,4,1,5>, <2,3,0,1>
+  3291105087U,	// <0,2,4,4>: Cost 4 vmrghw <0,4,1,5>, <2,4,u,1>
+  3291105173U,	// <0,2,4,5>: Cost 4 vmrghw <0,4,1,5>, <2,5,u,6>
+  2217363386U,	// <0,2,4,6>: Cost 3 vmrghw <0,4,1,5>, <2,6,3,7>
+  3788639688U,	// <0,2,4,7>: Cost 4 vsldoi8 <5,1,0,2>, <4,7,5,0>
+  2217363515U,	// <0,2,4,u>: Cost 3 vmrghw <0,4,1,5>, <2,u,0,1>
+  3376054371U,	// <0,2,5,0>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,0>
+  3788639888U,	// <0,2,5,1>: Cost 4 vsldoi8 <5,1,0,2>, <5,1,0,2>
+  3376055912U,	// <0,2,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,2,2,2>
+  2302312550U,	// <0,2,5,3>: Cost 3 vmrglw <3,4,0,5>, LHS
+  3376054375U,	// <0,2,5,4>: Cost 4 vmrglw <3,4,0,5>, <0,1,2,4>
+  3374728244U,	// <0,2,5,5>: Cost 4 vmrglw <3,2,0,5>, <1,4,2,5>
+  3805229154U,	// <0,2,5,6>: Cost 4 vsldoi8 <7,u,0,2>, <5,6,7,0>
+  3376055512U,	// <0,2,5,7>: Cost 4 vmrglw <3,4,0,5>, <1,6,2,7>
+  2302312555U,	// <0,2,5,u>: Cost 3 vmrglw <3,4,0,5>, LHS
+  3709100134U,	// <0,2,6,0>: Cost 4 vsldoi4 <3,0,2,6>, LHS
+  3709100950U,	// <0,2,6,1>: Cost 4 vsldoi4 <3,0,2,6>, <1,2,3,0>
+  3709102010U,	// <0,2,6,2>: Cost 4 vsldoi4 <3,0,2,6>, <2,6,3,7>
+  2758985658U,	// <0,2,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,3,7>
+  3709103414U,	// <0,2,6,4>: Cost 4 vsldoi4 <3,0,2,6>, RHS
+  3732992098U,	// <0,2,6,5>: Cost 4 vsldoi4 <7,0,2,6>, <5,6,7,0>
+  3292374970U,	// <0,2,6,6>: Cost 4 vmrghw <0,6,0,7>, <2,6,3,7>
+  3798594383U,	// <0,2,6,7>: Cost 4 vsldoi8 <6,7,0,2>, <6,7,0,2>
+  2758985703U,	// <0,2,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <2,6,u,7>
+  3788641274U,	// <0,2,7,0>: Cost 4 vsldoi8 <5,1,0,2>, <7,0,1,2>
+  3377398508U,	// <0,2,7,1>: Cost 4 vmrglw <3,6,0,7>, <1,0,2,1>
+  3377398590U,	// <0,2,7,2>: Cost 4 vmrglw <3,6,0,7>, <1,1,2,2>
+  2303656038U,	// <0,2,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+  3709111606U,	// <0,2,7,4>: Cost 4 vsldoi4 <3,0,2,7>, RHS
+  3377398836U,	// <0,2,7,5>: Cost 4 vmrglw <3,6,0,7>, <1,4,2,5>
+  3803903447U,	// <0,2,7,6>: Cost 4 vsldoi8 <7,6,0,2>, <7,6,0,2>
+  3293054954U,	// <0,2,7,7>: Cost 4 vmrghw <0,7,1,0>, <2,7,0,1>
+  2303656043U,	// <0,2,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+  2220074490U,	// <0,2,u,0>: Cost 3 vmrghw LHS, <2,0,u,0>
+  2220074527U,	// <0,2,u,1>: Cost 3 vmrghw LHS, <2,1,3,1>
+  1146332776U,	// <0,2,u,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+  1146332838U,	// <0,2,u,3>: Cost 2 vmrghw LHS, <2,3,0,1>
+  2220074819U,	// <0,2,u,4>: Cost 3 vmrghw LHS, <2,4,u,5>
+  2220074901U,	// <0,2,u,5>: Cost 3 vmrghw LHS, <2,5,u,6>
+  1146333114U,	// <0,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+  2220074986U,	// <0,2,u,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+  1146333243U,	// <0,2,u,u>: Cost 2 vmrghw LHS, <2,u,0,1>
+  2629410816U,	// <0,3,0,0>: Cost 3 vsldoi4 <2,0,3,0>, <0,0,0,0>
+  2753530006U,	// <0,3,0,1>: Cost 3 vsldoi12 <0,3,1,0>, <3,0,1,2>
+  2629412301U,	// <0,3,0,2>: Cost 3 vsldoi4 <2,0,3,0>, <2,0,3,0>
+  2214594972U,	// <0,3,0,3>: Cost 3 vmrghw <0,0,0,0>, <3,3,3,3>
+  2758985908U,	// <0,3,0,4>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,4,5>
+  3733016674U,	// <0,3,0,5>: Cost 4 vsldoi4 <7,0,3,0>, <5,6,7,0>
+  3777364488U,	// <0,3,0,6>: Cost 4 vsldoi8 <3,2,0,3>, <0,6,3,7>
+  2281703354U,	// <0,3,0,7>: Cost 3 vmrglw <0,0,0,0>, <2,6,3,7>
+  2758985941U,	// <0,3,0,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,0,u,2>
+  1141729430U,	// <0,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+  2215471334U,	// <0,3,1,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+  2215471425U,	// <0,3,1,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+  1141729692U,	// <0,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+  1141729794U,	// <0,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+  2215430738U,	// <0,3,1,5>: Cost 3 vmrghw LHS, <3,5,5,5>
+  2215430776U,	// <0,3,1,6>: Cost 3 vmrghw LHS, <3,6,0,7>
+  2295646138U,	// <0,3,1,7>: Cost 3 vmrglw <2,3,0,1>, <2,6,3,7>
+  1141730078U,	// <0,3,1,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+  2758986032U,	// <0,3,2,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,2,0,3>
+  3709141910U,	// <0,3,2,1>: Cost 4 vsldoi4 <3,0,3,2>, <1,2,3,0>
+  3289753921U,	// <0,3,2,2>: Cost 4 vmrghw <0,2,1,2>, <3,2,2,2>
+  2770929992U,	// <0,3,2,3>: Cost 3 vsldoi12 <3,2,3,0>, <3,2,3,0>
+  3289754114U,	// <0,3,2,4>: Cost 4 vmrghw <0,2,1,2>, <3,4,5,6>
+  3362095460U,	// <0,3,2,5>: Cost 5 vmrglw <1,1,0,2>, <0,4,3,5>
+  3832727910U,	// <0,3,2,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,2,6,3>
+  3365414842U,	// <0,3,2,7>: Cost 4 vmrglw <1,6,0,2>, <2,6,3,7>
+  2771298677U,	// <0,3,2,u>: Cost 3 vsldoi12 <3,2,u,0>, <3,2,u,0>
+  2216659094U,	// <0,3,3,0>: Cost 3 vmrghw <0,3,1,0>, <3,0,1,2>
+  3290409190U,	// <0,3,3,1>: Cost 4 vmrghw <0,3,1,1>, <3,1,1,1>
+  2703624496U,	// <0,3,3,2>: Cost 3 vsldoi8 <3,2,0,3>, <3,2,0,3>
+  2216683932U,	// <0,3,3,3>: Cost 3 vmrghw <0,3,1,3>, <3,3,3,3>
+  2216692226U,	// <0,3,3,4>: Cost 3 vmrghw <0,3,1,4>, <3,4,5,6>
+  3733041250U,	// <0,3,3,5>: Cost 4 vsldoi4 <7,0,3,3>, <5,6,7,0>
+  3832727988U,	// <0,3,3,6>: Cost 4 vsldoi12 <1,2,3,0>, <3,3,6,0>
+  3374712762U,	// <0,3,3,7>: Cost 4 vmrglw <3,2,0,3>, <2,6,3,7>
+  2216725278U,	// <0,3,3,u>: Cost 3 vmrghw <0,3,1,u>, <3,u,1,2>
+  2217363606U,	// <0,3,4,0>: Cost 3 vmrghw <0,4,1,5>, <3,0,1,2>
+  3291105510U,	// <0,3,4,1>: Cost 4 vmrghw <0,4,1,5>, <3,1,1,1>
+  3291105601U,	// <0,3,4,2>: Cost 4 vmrghw <0,4,1,5>, <3,2,2,2>
+  2217363868U,	// <0,3,4,3>: Cost 3 vmrghw <0,4,1,5>, <3,3,3,3>
+  2217363970U,	// <0,3,4,4>: Cost 3 vmrghw <0,4,1,5>, <3,4,5,6>
+  2758986242U,	// <0,3,4,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,4,5,6>
+  3727077685U,	// <0,3,4,6>: Cost 4 vsldoi4 <6,0,3,4>, <6,0,3,4>
+  3364767674U,	// <0,3,4,7>: Cost 4 vmrglw <1,5,0,4>, <2,6,3,7>
+  2217364254U,	// <0,3,4,u>: Cost 3 vmrghw <0,4,1,5>, <3,u,1,2>
+  3832728102U,	// <0,3,5,0>: Cost 4 vsldoi12 <1,2,3,0>, <3,5,0,6>
+  3405916003U,	// <0,3,5,1>: Cost 4 vmrglw <u,4,0,5>, <2,5,3,1>
+  3376055840U,	// <0,3,5,2>: Cost 4 vmrglw <3,4,0,5>, <2,1,3,2>
+  3376055679U,	// <0,3,5,3>: Cost 4 vmrglw <3,4,0,5>, <1,u,3,3>
+  3376055194U,	// <0,3,5,4>: Cost 4 vmrglw <3,4,0,5>, <1,2,3,4>
+  3859565138U,	// <0,3,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <3,5,5,5>
+  2727514210U,	// <0,3,5,6>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+  3376056250U,	// <0,3,5,7>: Cost 4 vmrglw <3,4,0,5>, <2,6,3,7>
+  2727514210U,	// <0,3,5,u>: Cost 3 vsldoi8 <7,2,0,3>, <5,6,7,0>
+  2758986360U,	// <0,3,6,0>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+  3709174678U,	// <0,3,6,1>: Cost 4 vsldoi4 <3,0,3,6>, <1,2,3,0>
+  3795284411U,	// <0,3,6,2>: Cost 4 vsldoi8 <6,2,0,3>, <6,2,0,3>
+  3709175980U,	// <0,3,6,3>: Cost 4 vsldoi4 <3,0,3,6>, <3,0,3,6>
+  3833096860U,	// <0,3,6,4>: Cost 4 vsldoi12 <1,2,u,0>, <3,6,4,7>
+  3376728235U,	// <0,3,6,5>: Cost 5 vmrglw <3,5,0,6>, <3,0,3,5>
+  3859565229U,	// <0,3,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <3,6,6,6>
+  2773879472U,	// <0,3,6,7>: Cost 3 vsldoi12 <3,6,7,0>, <3,6,7,0>
+  2758986360U,	// <0,3,6,u>: Cost 3 vsldoi12 <1,2,3,0>, <3,6,0,7>
+  2303656854U,	// <0,3,7,0>: Cost 3 vmrglw <3,6,0,7>, <1,2,3,0>
+  3807229018U,	// <0,3,7,1>: Cost 4 vsldoi8 <u,2,0,3>, <7,1,2,u>
+  2727515284U,	// <0,3,7,2>: Cost 3 vsldoi8 <7,2,0,3>, <7,2,0,3>
+  3377399410U,	// <0,3,7,3>: Cost 4 vmrglw <3,6,0,7>, <2,2,3,3>
+  3377398682U,	// <0,3,7,4>: Cost 4 vmrglw <3,6,0,7>, <1,2,3,4>
+  3801257409U,	// <0,3,7,5>: Cost 4 vsldoi8 <7,2,0,3>, <7,5,6,7>
+  3377399980U,	// <0,3,7,6>: Cost 4 vmrglw <3,6,0,7>, <3,0,3,6>
+  3375409082U,	// <0,3,7,7>: Cost 4 vmrglw <3,3,0,7>, <2,6,3,7>
+  2731497082U,	// <0,3,7,u>: Cost 3 vsldoi8 <7,u,0,3>, <7,u,0,3>
+  1146333334U,	// <0,3,u,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+  2220075238U,	// <0,3,u,1>: Cost 3 vmrghw LHS, <3,1,1,1>
+  2220075329U,	// <0,3,u,2>: Cost 3 vmrghw LHS, <3,2,2,2>
+  1146333596U,	// <0,3,u,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+  1146333698U,	// <0,3,u,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+  2758986566U,	// <0,3,u,5>: Cost 3 vsldoi12 <1,2,3,0>, <3,u,5,6>
+  2803739472U,	// <0,3,u,6>: Cost 3 vsldoi12 <u,6,7,0>, <3,u,6,7>
+  2295703482U,	// <0,3,u,7>: Cost 3 vmrglw <2,3,0,u>, <2,6,3,7>
+  1146333982U,	// <0,3,u,u>: Cost 2 vmrghw LHS, <3,u,1,2>
+  2214595473U,	// <0,4,0,0>: Cost 3 vmrghw <0,0,0,0>, <4,0,5,0>
+  2693677158U,	// <0,4,0,1>: Cost 3 vsldoi8 <1,5,0,4>, LHS
+  3839437689U,	// <0,4,0,2>: Cost 4 vsldoi12 <2,3,4,0>, <4,0,2,3>
+  3709200559U,	// <0,4,0,3>: Cost 4 vsldoi4 <3,0,4,0>, <3,0,4,0>
+  2693677394U,	// <0,4,0,4>: Cost 3 vsldoi8 <1,5,0,4>, <0,4,1,5>
+  1140854070U,	// <0,4,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+  3767419409U,	// <0,4,0,6>: Cost 4 vsldoi8 <1,5,0,4>, <0,6,4,7>
+  3854109604U,	// <0,4,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,0,7,1>
+  1140854313U,	// <0,4,0,u>: Cost 2 vmrghw <0,0,0,0>, RHS
+  1141689234U,	// <0,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+  2215431114U,	// <0,4,1,1>: Cost 3 vmrghw LHS, <4,1,2,3>
+  2215431221U,	// <0,4,1,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+  2635466928U,	// <0,4,1,3>: Cost 3 vsldoi4 <3,0,4,1>, <3,0,4,1>
+  1141689552U,	// <0,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+  67947830U,	// <0,4,1,5>: Cost 1 vmrghw LHS, RHS
+  2215431545U,	// <0,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+  2659357716U,	// <0,4,1,7>: Cost 3 vsldoi4 <7,0,4,1>, <7,0,4,1>
+  67948073U,	// <0,4,1,u>: Cost 1 vmrghw LHS, RHS
+  3767420369U,	// <0,4,2,0>: Cost 4 vsldoi8 <1,5,0,4>, <2,0,3,4>
+  3767420451U,	// <0,4,2,1>: Cost 4 vsldoi8 <1,5,0,4>, <2,1,3,5>
+  3767420520U,	// <0,4,2,2>: Cost 4 vsldoi8 <1,5,0,4>, <2,2,2,2>
+  2698323625U,	// <0,4,2,3>: Cost 3 vsldoi8 <2,3,0,4>, <2,3,0,4>
+  3709218102U,	// <0,4,2,4>: Cost 4 vsldoi4 <3,0,4,2>, RHS
+  2216013110U,	// <0,4,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+  3767420858U,	// <0,4,2,6>: Cost 4 vsldoi8 <1,5,0,4>, <2,6,3,7>
+  3774719981U,	// <0,4,2,7>: Cost 4 vsldoi8 <2,7,0,4>, <2,7,0,4>
+  2216013353U,	// <0,4,2,u>: Cost 3 vmrghw <0,2,1,2>, RHS
+  3767421078U,	// <0,4,3,0>: Cost 4 vsldoi8 <1,5,0,4>, <3,0,1,2>
+  3776710880U,	// <0,4,3,1>: Cost 4 vsldoi8 <3,1,0,4>, <3,1,0,4>
+  3833097325U,	// <0,4,3,2>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,2,4>
+  3767421340U,	// <0,4,3,3>: Cost 4 vsldoi8 <1,5,0,4>, <3,3,3,3>
+  3767421442U,	// <0,4,3,4>: Cost 4 vsldoi8 <1,5,0,4>, <3,4,5,6>
+  2216660278U,	// <0,4,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+  3833097361U,	// <0,4,3,6>: Cost 5 vsldoi12 <1,2,u,0>, <4,3,6,4>
+  3780692678U,	// <0,4,3,7>: Cost 4 vsldoi8 <3,7,0,4>, <3,7,0,4>
+  2216660521U,	// <0,4,3,u>: Cost 3 vmrghw <0,3,1,0>, RHS
+  2617573416U,	// <0,4,4,0>: Cost 3 vsldoi4 <0,0,4,4>, <0,0,4,4>
+  2217364450U,	// <0,4,4,1>: Cost 3 vmrghw <0,4,1,5>, <4,1,5,0>
+  3691316771U,	// <0,4,4,2>: Cost 4 vsldoi4 <0,0,4,4>, <2,1,3,5>
+  3709233331U,	// <0,4,4,3>: Cost 4 vsldoi4 <3,0,4,4>, <3,0,4,4>
+  2785823952U,	// <0,4,4,4>: Cost 3 vsldoi12 <5,6,7,0>, <4,4,4,4>
+  1143622966U,	// <0,4,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+  3691319723U,	// <0,4,4,6>: Cost 4 vsldoi4 <0,0,4,4>, <6,1,7,5>
+  3854109932U,	// <0,4,4,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,4,7,5>
+  1143623209U,	// <0,4,4,u>: Cost 2 vmrghw <0,4,1,5>, RHS
+  2635497574U,	// <0,4,5,0>: Cost 3 vsldoi4 <3,0,4,5>, LHS
+  2635498390U,	// <0,4,5,1>: Cost 3 vsldoi4 <3,0,4,5>, <1,2,3,0>
+  3709240936U,	// <0,4,5,2>: Cost 4 vsldoi4 <3,0,4,5>, <2,2,2,2>
+  2635499700U,	// <0,4,5,3>: Cost 3 vsldoi4 <3,0,4,5>, <3,0,4,5>
+  2635500854U,	// <0,4,5,4>: Cost 3 vsldoi4 <3,0,4,5>, RHS
+  2785824044U,	// <0,4,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <4,5,5,6>
+  1685245238U,	// <0,4,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+  2659390488U,	// <0,4,5,7>: Cost 3 vsldoi4 <7,0,4,5>, <7,0,4,5>
+  1685245256U,	// <0,4,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+  3839438161U,	// <0,4,6,0>: Cost 4 vsldoi12 <2,3,4,0>, <4,6,0,7>
+  3798610347U,	// <0,4,6,1>: Cost 4 vsldoi8 <6,7,0,4>, <6,1,7,5>
+  3798610426U,	// <0,4,6,2>: Cost 4 vsldoi8 <6,7,0,4>, <6,2,7,3>
+  3795956237U,	// <0,4,6,3>: Cost 4 vsldoi8 <6,3,0,4>, <6,3,0,4>
+  3733138742U,	// <0,4,6,4>: Cost 4 vsldoi4 <7,0,4,6>, RHS
+  2218634550U,	// <0,4,6,5>: Cost 3 vmrghw <0,6,0,7>, RHS
+  3798610744U,	// <0,4,6,6>: Cost 4 vsldoi8 <6,7,0,4>, <6,6,6,6>
+  2724868945U,	// <0,4,6,7>: Cost 3 vsldoi8 <6,7,0,4>, <6,7,0,4>
+  2725532578U,	// <0,4,6,u>: Cost 3 vsldoi8 <6,u,0,4>, <6,u,0,4>
+  3383371465U,	// <0,4,7,0>: Cost 4 vmrglw <4,6,0,7>, <2,3,4,0>
+  3800601668U,	// <0,4,7,1>: Cost 4 vsldoi8 <7,1,0,4>, <7,1,0,4>
+  3775386826U,	// <0,4,7,2>: Cost 5 vsldoi8 <2,u,0,4>, <7,2,6,3>
+  3801928934U,	// <0,4,7,3>: Cost 4 vsldoi8 <7,3,0,4>, <7,3,0,4>
+  3721202998U,	// <0,4,7,4>: Cost 4 vsldoi4 <5,0,4,7>, RHS
+  2780368328U,	// <0,4,7,5>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+  3383372686U,	// <0,4,7,6>: Cost 5 vmrglw <4,6,0,7>, <4,0,4,6>
+  3854110170U,	// <0,4,7,7>: Cost 4 vsldoi12 <4,7,5,0>, <4,7,7,0>
+  2780368328U,	// <0,4,7,u>: Cost 3 vsldoi12 <4,7,5,0>, <4,7,5,0>
+  1146334098U,	// <0,4,u,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+  2220076002U,	// <0,4,u,1>: Cost 3 vmrghw LHS, <4,1,5,0>
+  2220076085U,	// <0,4,u,2>: Cost 3 vmrghw LHS, <4,2,5,2>
+  2635524279U,	// <0,4,u,3>: Cost 3 vsldoi4 <3,0,4,u>, <3,0,4,u>
+  1146334416U,	// <0,4,u,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+  72592694U,	// <0,4,u,5>: Cost 1 vmrghw LHS, RHS
+  1685245481U,	// <0,4,u,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+  2659415067U,	// <0,4,u,7>: Cost 3 vsldoi4 <7,0,4,u>, <7,0,4,u>
+  72592937U,	// <0,4,u,u>: Cost 1 vmrghw LHS, RHS
+  2281704337U,	// <0,5,0,0>: Cost 3 vmrglw <0,0,0,0>, <4,0,5,0>
+  2704965734U,	// <0,5,0,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+  3778707666U,	// <0,5,0,2>: Cost 4 vsldoi8 <3,4,0,5>, <0,2,5,3>
+  3778707708U,	// <0,5,0,3>: Cost 4 vsldoi8 <3,4,0,5>, <0,3,1,0>
+  2687050057U,	// <0,5,0,4>: Cost 3 vsldoi8 <0,4,0,5>, <0,4,0,5>
+  2214596612U,	// <0,5,0,5>: Cost 3 vmrghw <0,0,0,0>, <5,5,5,5>
+  2785824372U,	// <0,5,0,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,0,6,1>
+  3854110332U,	// <0,5,0,7>: Cost 4 vsldoi12 <4,7,5,0>, <5,0,7,0>
+  2704966301U,	// <0,5,0,u>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+  1567768678U,	// <0,5,1,0>: Cost 2 vsldoi4 <4,0,5,1>, LHS
+  2312236570U,	// <0,5,1,1>: Cost 3 vmrglw <5,1,0,1>, <4,u,5,1>
+  2215431915U,	// <0,5,1,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+  2641512598U,	// <0,5,1,3>: Cost 3 vsldoi4 <4,0,5,1>, <3,0,1,2>
+  1567771538U,	// <0,5,1,4>: Cost 2 vsldoi4 <4,0,5,1>, <4,0,5,1>
+  1141690372U,	// <0,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+  1141690466U,	// <0,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+  2641515514U,	// <0,5,1,7>: Cost 3 vsldoi4 <4,0,5,1>, <7,0,1,2>
+  1141690615U,	// <0,5,1,u>: Cost 2 vmrghw LHS, <5,u,5,5>
+  3772736973U,	// <0,5,2,0>: Cost 4 vsldoi8 <2,4,0,5>, <2,0,3,0>
+  3778709024U,	// <0,5,2,1>: Cost 4 vsldoi8 <3,4,0,5>, <2,1,3,2>
+  3778709096U,	// <0,5,2,2>: Cost 4 vsldoi8 <3,4,0,5>, <2,2,2,2>
+  3778709158U,	// <0,5,2,3>: Cost 4 vsldoi8 <3,4,0,5>, <2,3,0,1>
+  3772737275U,	// <0,5,2,4>: Cost 4 vsldoi8 <2,4,0,5>, <2,4,0,5>
+  3859566351U,	// <0,5,2,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,2,5,3>
+  3778709434U,	// <0,5,2,6>: Cost 4 vsldoi8 <3,4,0,5>, <2,6,3,7>
+  3805251562U,	// <0,5,2,7>: Cost 4 vsldoi8 <7,u,0,5>, <2,7,0,1>
+  3775391807U,	// <0,5,2,u>: Cost 4 vsldoi8 <2,u,0,5>, <2,u,0,5>
+  2704967830U,	// <0,5,3,0>: Cost 3 vsldoi8 <3,4,0,5>, <3,0,1,2>
+  3776719073U,	// <0,5,3,1>: Cost 4 vsldoi8 <3,1,0,5>, <3,1,0,5>
+  3777382706U,	// <0,5,3,2>: Cost 4 vsldoi8 <3,2,0,5>, <3,2,0,5>
+  3778709887U,	// <0,5,3,3>: Cost 4 vsldoi8 <3,4,0,5>, <3,3,0,1>
+  2704968148U,	// <0,5,3,4>: Cost 3 vsldoi8 <3,4,0,5>, <3,4,0,5>
+  3857428317U,	// <0,5,3,5>: Cost 4 vsldoi12 <5,3,5,0>, <5,3,5,0>
+  3364096514U,	// <0,5,3,6>: Cost 4 vmrglw <1,4,0,3>, <3,4,5,6>
+  3780700871U,	// <0,5,3,7>: Cost 4 vsldoi8 <3,7,0,5>, <3,7,0,5>
+  2707622680U,	// <0,5,3,u>: Cost 3 vsldoi8 <3,u,0,5>, <3,u,0,5>
+  2728856466U,	// <0,5,4,0>: Cost 3 vsldoi8 <7,4,0,5>, <4,0,5,1>
+  3697361674U,	// <0,5,4,1>: Cost 4 vsldoi4 <1,0,5,4>, <1,0,5,4>
+  3697362601U,	// <0,5,4,2>: Cost 4 vsldoi4 <1,0,5,4>, <2,3,0,4>
+  3364766635U,	// <0,5,4,3>: Cost 4 vmrglw <1,5,0,4>, <1,2,5,3>
+  2217365428U,	// <0,5,4,4>: Cost 3 vmrghw <0,4,1,5>, <5,4,5,6>
+  2704969014U,	// <0,5,4,5>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+  2785824700U,	// <0,5,4,6>: Cost 3 vsldoi12 <5,6,7,0>, <5,4,6,5>
+  3364766963U,	// <0,5,4,7>: Cost 4 vmrglw <1,5,0,4>, <1,6,5,7>
+  2704969257U,	// <0,5,4,u>: Cost 3 vsldoi8 <3,4,0,5>, RHS
+  3846148050U,	// <0,5,5,0>: Cost 4 vsldoi12 <3,4,5,0>, <5,5,0,0>
+  2326203282U,	// <0,5,5,1>: Cost 3 vmrglw <7,4,0,5>, <4,0,5,1>
+  3291746027U,	// <0,5,5,2>: Cost 4 vmrghw <0,5,1,2>, <5,2,1,3>
+  3376054482U,	// <0,5,5,3>: Cost 4 vmrglw <3,4,0,5>, <0,2,5,3>
+  3790655366U,	// <0,5,5,4>: Cost 4 vsldoi8 <5,4,0,5>, <5,4,0,5>
+  2785824772U,	// <0,5,5,5>: Cost 3 vsldoi12 <5,6,7,0>, <5,5,5,5>
+  2724876386U,	// <0,5,5,6>: Cost 3 vsldoi8 <6,7,0,5>, <5,6,7,0>
+  3858903057U,	// <0,5,5,7>: Cost 4 vsldoi12 <5,5,7,0>, <5,5,7,0>
+  2736820484U,	// <0,5,5,u>: Cost 3 vsldoi8 <u,7,0,5>, <5,u,7,0>
+  2659467366U,	// <0,5,6,0>: Cost 3 vsldoi4 <7,0,5,6>, LHS
+  3859566643U,	// <0,5,6,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,6,1,7>
+  3798618618U,	// <0,5,6,2>: Cost 4 vsldoi8 <6,7,0,5>, <6,2,7,3>
+  3852857410U,	// <0,5,6,3>: Cost 4 vsldoi12 <4,5,6,0>, <5,6,3,4>
+  2659470646U,	// <0,5,6,4>: Cost 3 vsldoi4 <7,0,5,6>, RHS
+  2659471458U,	// <0,5,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+  3832729696U,	// <0,5,6,6>: Cost 4 vsldoi12 <1,2,3,0>, <5,6,6,7>
+  1712083042U,	// <0,5,6,7>: Cost 2 vsldoi12 <5,6,7,0>, <5,6,7,0>
+  1712156779U,	// <0,5,6,u>: Cost 2 vsldoi12 <5,6,u,0>, <5,6,u,0>
+  2731512826U,	// <0,5,7,0>: Cost 3 vsldoi8 <7,u,0,5>, <7,0,1,2>
+  3859566717U,	// <0,5,7,1>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,1,0>
+  3798619284U,	// <0,5,7,2>: Cost 4 vsldoi8 <6,7,0,5>, <7,2,0,3>
+  3778712803U,	// <0,5,7,3>: Cost 4 vsldoi8 <3,4,0,5>, <7,3,0,1>
+  2728858936U,	// <0,5,7,4>: Cost 3 vsldoi8 <7,4,0,5>, <7,4,0,5>
+  3859566753U,	// <0,5,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <5,7,5,0>
+  3377398135U,	// <0,5,7,6>: Cost 4 vmrglw <3,6,0,7>, <0,4,5,6>
+  3798619686U,	// <0,5,7,7>: Cost 4 vsldoi8 <6,7,0,5>, <7,7,0,0>
+  2731513468U,	// <0,5,7,u>: Cost 3 vsldoi8 <7,u,0,5>, <7,u,0,5>
+  1567826022U,	// <0,5,u,0>: Cost 2 vsldoi4 <4,0,5,u>, LHS
+  2704971566U,	// <0,5,u,1>: Cost 3 vsldoi8 <3,4,0,5>, LHS
+  2220076779U,	// <0,5,u,2>: Cost 3 vmrghw LHS, <5,2,1,3>
+  2641569942U,	// <0,5,u,3>: Cost 3 vsldoi4 <4,0,5,u>, <3,0,1,2>
+  1567828889U,	// <0,5,u,4>: Cost 2 vsldoi4 <4,0,5,u>, <4,0,5,u>
+  1146335236U,	// <0,5,u,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+  1146335330U,	// <0,5,u,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+  1713410308U,	// <0,5,u,7>: Cost 2 vsldoi12 <5,u,7,0>, <5,u,7,0>
+  1713484045U,	// <0,5,u,u>: Cost 2 vsldoi12 <5,u,u,0>, <5,u,u,0>
+  2214596949U,	// <0,6,0,0>: Cost 3 vmrghw <0,0,0,0>, <6,0,7,0>
+  2214678951U,	// <0,6,0,1>: Cost 3 vmrghw <0,0,1,1>, <6,1,7,1>
+  2214597114U,	// <0,6,0,2>: Cost 3 vmrghw <0,0,0,0>, <6,2,7,3>
+  3852857653U,	// <0,6,0,3>: Cost 4 vsldoi12 <4,5,6,0>, <6,0,3,4>
+  3832729919U,	// <0,6,0,4>: Cost 4 vsldoi12 <1,2,3,0>, <6,0,4,5>
+  3721293427U,	// <0,6,0,5>: Cost 4 vsldoi4 <5,0,6,0>, <5,0,6,0>
+  2214597432U,	// <0,6,0,6>: Cost 3 vmrghw <0,0,0,0>, <6,6,6,6>
+  1207962934U,	// <0,6,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+  1207962935U,	// <0,6,0,u>: Cost 2 vmrglw <0,0,0,0>, RHS
+  2215432481U,	// <0,6,1,0>: Cost 3 vmrghw LHS, <6,0,1,2>
+  2215432615U,	// <0,6,1,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+  1141690874U,	// <0,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+  2215432754U,	// <0,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+  2215432817U,	// <0,6,1,4>: Cost 3 vmrghw LHS, <6,4,2,5>
+  2215432939U,	// <0,6,1,5>: Cost 3 vmrghw LHS, <6,5,7,1>
+  1141691192U,	// <0,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+  1221905718U,	// <0,6,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+  1221905719U,	// <0,6,1,u>: Cost 2 vmrglw <2,3,0,1>, RHS
+  3852857787U,	// <0,6,2,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,2,0,3>
+  3289764265U,	// <0,6,2,1>: Cost 4 vmrghw <0,2,1,3>, <6,1,7,3>
+  3289690618U,	// <0,6,2,2>: Cost 4 vmrghw <0,2,0,3>, <6,2,7,3>
+  3862589907U,	// <0,6,2,3>: Cost 4 vsldoi12 <6,2,3,0>, <6,2,3,0>
+  3733253430U,	// <0,6,2,4>: Cost 4 vsldoi4 <7,0,6,2>, RHS
+  3733254242U,	// <0,6,2,5>: Cost 4 vsldoi4 <7,0,6,2>, <5,6,7,0>
+  3777390522U,	// <0,6,2,6>: Cost 4 vsldoi8 <3,2,0,6>, <2,6,3,7>
+  2785825274U,	// <0,6,2,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,7,3>
+  2785825283U,	// <0,6,2,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,2,u,3>
+  3777390742U,	// <0,6,3,0>: Cost 4 vsldoi8 <3,2,0,6>, <3,0,1,2>
+  3863106066U,	// <0,6,3,1>: Cost 4 vsldoi12 <6,3,1,0>, <6,3,1,0>
+  3777390899U,	// <0,6,3,2>: Cost 4 vsldoi8 <3,2,0,6>, <3,2,0,6>
+  3290436146U,	// <0,6,3,3>: Cost 4 vmrghw <0,3,1,4>, <6,3,4,5>
+  3779381762U,	// <0,6,3,4>: Cost 4 vsldoi8 <3,5,0,6>, <3,4,5,6>
+  3779381798U,	// <0,6,3,5>: Cost 4 vsldoi8 <3,5,0,6>, <3,5,0,6>
+  3733262920U,	// <0,6,3,6>: Cost 4 vsldoi4 <7,0,6,3>, <6,3,7,0>
+  2300972342U,	// <0,6,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+  2300972343U,	// <0,6,3,u>: Cost 3 vmrglw <3,2,0,3>, RHS
+  3802606482U,	// <0,6,4,0>: Cost 4 vsldoi8 <7,4,0,6>, <4,0,5,1>
+  2217365931U,	// <0,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+  2217366010U,	// <0,6,4,2>: Cost 3 vmrghw <0,4,1,5>, <6,2,7,3>
+  3291107890U,	// <0,6,4,3>: Cost 4 vmrghw <0,4,1,5>, <6,3,4,5>
+  3291099805U,	// <0,6,4,4>: Cost 4 vmrghw <0,4,1,4>, <6,4,7,4>
+  3777391926U,	// <0,6,4,5>: Cost 4 vsldoi8 <3,2,0,6>, RHS
+  2217366328U,	// <0,6,4,6>: Cost 3 vmrghw <0,4,1,5>, <6,6,6,6>
+  2291027254U,	// <0,6,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+  2291027255U,	// <0,6,4,u>: Cost 3 vmrglw <1,5,0,4>, RHS
+  3852858033U,	// <0,6,5,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,5,0,6>
+  3395964532U,	// <0,6,5,1>: Cost 4 vmrglw <6,7,0,5>, <5,0,6,1>
+  3864507069U,	// <0,6,5,2>: Cost 4 vsldoi12 <6,5,2,0>, <6,5,2,0>
+  3376056678U,	// <0,6,5,3>: Cost 5 vmrglw <3,4,0,5>, <3,2,6,3>
+  3721334070U,	// <0,6,5,4>: Cost 4 vsldoi4 <5,0,6,5>, RHS
+  3395964860U,	// <0,6,5,5>: Cost 4 vmrglw <6,7,0,5>, <5,4,6,5>
+  3864802017U,	// <0,6,5,6>: Cost 4 vsldoi12 <6,5,6,0>, <6,5,6,0>
+  2302315830U,	// <0,6,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+  2302315831U,	// <0,6,5,u>: Cost 3 vmrglw <3,4,0,5>, RHS
+  3852858108U,	// <0,6,6,0>: Cost 4 vsldoi12 <4,5,6,0>, <6,6,0,0>
+  3398624745U,	// <0,6,6,1>: Cost 4 vmrglw <7,2,0,6>, <2,0,6,1>
+  2218668538U,	// <0,6,6,2>: Cost 3 vmrghw <0,6,1,2>, <6,2,7,3>
+  3292418610U,	// <0,6,6,3>: Cost 4 vmrghw <0,6,1,3>, <6,3,4,5>
+  3733286198U,	// <0,6,6,4>: Cost 4 vsldoi4 <7,0,6,6>, RHS
+  3797299889U,	// <0,6,6,5>: Cost 4 vsldoi8 <6,5,0,6>, <6,5,0,6>
+  2785825592U,	// <0,6,6,6>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,6,6>
+  2785825602U,	// <0,6,6,7>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,7,7>
+  2785825611U,	// <0,6,6,u>: Cost 3 vsldoi12 <5,6,7,0>, <6,6,u,7>
+  2785825614U,	// <0,6,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,0,1>
+  2758988632U,	// <0,6,7,1>: Cost 3 vsldoi12 <1,2,3,0>, <6,7,1,2>
+  3377400084U,	// <0,6,7,2>: Cost 4 vmrglw <3,6,0,7>, <3,1,6,2>
+  2792166248U,	// <0,6,7,3>: Cost 3 vsldoi12 <6,7,3,0>, <6,7,3,0>
+  2785825654U,	// <0,6,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,4,5>
+  2785825664U,	// <0,6,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+  3859567493U,	// <0,6,7,6>: Cost 4 vsldoi12 <5,6,7,0>, <6,7,6,2>
+  2303659318U,	// <0,6,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+  2303659319U,	// <0,6,7,u>: Cost 3 vmrglw <3,6,0,7>, RHS
+  2785825695U,	// <0,6,u,0>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,0,1>
+  2220077479U,	// <0,6,u,1>: Cost 3 vmrghw LHS, <6,1,7,1>
+  1146335738U,	// <0,6,u,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+  2792829881U,	// <0,6,u,3>: Cost 3 vsldoi12 <6,u,3,0>, <6,u,3,0>
+  2785825735U,	// <0,6,u,4>: Cost 3 vsldoi12 <5,6,7,0>, <6,u,4,5>
+  2785825664U,	// <0,6,u,5>: Cost 3 vsldoi12 <5,6,7,0>, <6,7,5,6>
+  1146336056U,	// <0,6,u,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+  1221963062U,	// <0,6,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+  1221963063U,	// <0,6,u,u>: Cost 2 vmrglw <2,3,0,u>, RHS
+  2653593600U,	// <0,7,0,0>: Cost 3 vsldoi4 <6,0,7,0>, <0,0,0,0>
+  2706309222U,	// <0,7,0,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+  3709421498U,	// <0,7,0,2>: Cost 4 vsldoi4 <3,0,7,0>, <2,6,3,7>
+  2281705978U,	// <0,7,0,3>: Cost 3 vmrglw <0,0,0,0>, <6,2,7,3>
+  2785825816U,	// <0,7,0,4>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,4,5>
+  2785825826U,	// <0,7,0,5>: Cost 3 vsldoi12 <5,6,7,0>, <7,0,5,6>
+  2653598037U,	// <0,7,0,6>: Cost 3 vsldoi4 <6,0,7,0>, <6,0,7,0>
+  2214598252U,	// <0,7,0,7>: Cost 3 vmrghw <0,0,0,0>, <7,7,7,7>
+  2706309789U,	// <0,7,0,u>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+  1141691386U,	// <0,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+  2215433290U,	// <0,7,1,1>: Cost 3 vmrghw LHS, <7,1,1,1>
+  2706310038U,	// <0,7,1,2>: Cost 3 vsldoi8 <3,6,0,7>, <1,2,3,0>
+  2322190842U,	// <0,7,1,3>: Cost 3 vmrglw <6,7,0,1>, <6,2,7,3>
+  1141691750U,	// <0,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+  2215433654U,	// <0,7,1,5>: Cost 3 vmrghw LHS, <7,5,5,5>
+  2653606230U,	// <0,7,1,6>: Cost 3 vsldoi4 <6,0,7,1>, <6,0,7,1>
+  1141692012U,	// <0,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+  1141692034U,	// <0,7,1,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+  2785825940U,	// <0,7,2,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,2,0,3>
+  3768108576U,	// <0,7,2,1>: Cost 5 vsldoi8 <1,6,0,7>, <2,1,3,2>
+  3780052584U,	// <0,7,2,2>: Cost 4 vsldoi8 <3,6,0,7>, <2,2,2,2>
+  2794820780U,	// <0,7,2,3>: Cost 3 vsldoi12 <7,2,3,0>, <7,2,3,0>
+  3859641528U,	// <0,7,2,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,2,4,3>
+  3733327970U,	// <0,7,2,5>: Cost 4 vsldoi4 <7,0,7,2>, <5,6,7,0>
+  3778062266U,	// <0,7,2,6>: Cost 4 vsldoi8 <3,3,0,7>, <2,6,3,7>
+  3733328944U,	// <0,7,2,7>: Cost 4 vsldoi4 <7,0,7,2>, <7,0,7,2>
+  2795189465U,	// <0,7,2,u>: Cost 3 vsldoi12 <7,2,u,0>, <7,2,u,0>
+  2324861026U,	// <0,7,3,0>: Cost 3 vmrglw <7,2,0,3>, <5,6,7,0>
+  3780053233U,	// <0,7,3,1>: Cost 4 vsldoi8 <3,6,0,7>, <3,1,2,3>
+  3780053296U,	// <0,7,3,2>: Cost 4 vsldoi8 <3,6,0,7>, <3,2,0,3>
+  3778062725U,	// <0,7,3,3>: Cost 4 vsldoi8 <3,3,0,7>, <3,3,0,7>
+  3780053506U,	// <0,7,3,4>: Cost 4 vsldoi8 <3,6,0,7>, <3,4,5,6>
+  3803941469U,	// <0,7,3,5>: Cost 4 vsldoi8 <7,6,0,7>, <3,5,6,7>
+  2706311800U,	// <0,7,3,6>: Cost 3 vsldoi8 <3,6,0,7>, <3,6,0,7>
+  3398603586U,	// <0,7,3,7>: Cost 4 vmrglw <7,2,0,3>, <6,6,7,7>
+  2707639066U,	// <0,7,3,u>: Cost 3 vsldoi8 <3,u,0,7>, <3,u,0,7>
+  2217366522U,	// <0,7,4,0>: Cost 3 vmrghw <0,4,1,5>, <7,0,1,2>
+  3727369110U,	// <0,7,4,1>: Cost 4 vsldoi4 <6,0,7,4>, <1,2,3,0>
+  3291108500U,	// <0,7,4,2>: Cost 4 vmrghw <0,4,1,5>, <7,2,0,3>
+  3727370872U,	// <0,7,4,3>: Cost 4 vsldoi4 <6,0,7,4>, <3,6,0,7>
+  2217366886U,	// <0,7,4,4>: Cost 3 vmrghw <0,4,1,5>, <7,4,5,6>
+  2706312502U,	// <0,7,4,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+  3786026321U,	// <0,7,4,6>: Cost 4 vsldoi8 <4,6,0,7>, <4,6,0,7>
+  2217367148U,	// <0,7,4,7>: Cost 3 vmrghw <0,4,1,5>, <7,7,7,7>
+  2706312745U,	// <0,7,4,u>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+  2322223202U,	// <0,7,5,0>: Cost 3 vmrglw <6,7,0,5>, <5,6,7,0>
+  3399946987U,	// <0,7,5,1>: Cost 4 vmrglw <7,4,0,5>, <6,5,7,1>
+  3291780244U,	// <0,7,5,2>: Cost 4 vmrghw <0,5,1,6>, <7,2,0,3>
+  3727378582U,	// <0,7,5,3>: Cost 4 vsldoi4 <6,0,7,5>, <3,0,1,2>
+  3727379766U,	// <0,7,5,4>: Cost 4 vsldoi4 <6,0,7,5>, RHS
+  3859568054U,	// <0,7,5,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,5,5,5>
+  2785826241U,	// <0,7,5,6>: Cost 3 vsldoi12 <5,6,7,0>, <7,5,6,7>
+  3395965762U,	// <0,7,5,7>: Cost 4 vmrglw <6,7,0,5>, <6,6,7,7>
+  2787153363U,	// <0,7,5,u>: Cost 3 vsldoi12 <5,u,7,0>, <7,5,u,7>
+  2785826268U,	// <0,7,6,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,6,0,7>
+  3780055420U,	// <0,7,6,1>: Cost 5 vsldoi8 <3,6,0,7>, <6,1,2,3>
+  3859568110U,	// <0,7,6,2>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,2,7>
+  3874534903U,	// <0,7,6,3>: Cost 4 vsldoi12 <u,2,3,0>, <7,6,3,7>
+  3859641856U,	// <0,7,6,4>: Cost 4 vsldoi12 <5,6,u,0>, <7,6,4,7>
+  3733360738U,	// <0,7,6,5>: Cost 4 vsldoi4 <7,0,7,6>, <5,6,7,0>
+  3859568145U,	// <0,7,6,6>: Cost 4 vsldoi12 <5,6,7,0>, <7,6,6,6>
+  2797770260U,	// <0,7,6,7>: Cost 3 vsldoi12 <7,6,7,0>, <7,6,7,0>
+  2797843997U,	// <0,7,6,u>: Cost 3 vsldoi12 <7,6,u,0>, <7,6,u,0>
+  2785826342U,	// <0,7,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,0,0>
+  3727393686U,	// <0,7,7,1>: Cost 4 vsldoi4 <6,0,7,7>, <1,2,3,0>
+  3868563003U,	// <0,7,7,2>: Cost 4 vsldoi12 <7,2,3,0>, <7,7,2,3>
+  3377397988U,	// <0,7,7,3>: Cost 4 vmrglw <3,6,0,7>, <0,2,7,3>
+  2219349350U,	// <0,7,7,4>: Cost 3 vmrghw <0,7,1,4>, <7,4,5,6>
+  3859568217U,	// <0,7,7,5>: Cost 4 vsldoi12 <5,6,7,0>, <7,7,5,6>
+  2730202588U,	// <0,7,7,6>: Cost 3 vsldoi8 <7,6,0,7>, <7,6,0,7>
+  2785826412U,	// <0,7,7,7>: Cost 3 vsldoi12 <5,6,7,0>, <7,7,7,7>
+  2731529854U,	// <0,7,7,u>: Cost 3 vsldoi8 <7,u,0,7>, <7,u,0,7>
+  1146336250U,	// <0,7,u,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+  2706315054U,	// <0,7,u,1>: Cost 3 vsldoi8 <3,6,0,7>, LHS
+  2653660845U,	// <0,7,u,2>: Cost 3 vsldoi4 <6,0,7,u>, <2,3,0,u>
+  2322248186U,	// <0,7,u,3>: Cost 3 vmrglw <6,7,0,u>, <6,2,7,3>
+  1146336614U,	// <0,7,u,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+  2706315418U,	// <0,7,u,5>: Cost 3 vsldoi8 <3,6,0,7>, RHS
+  2653663581U,	// <0,7,u,6>: Cost 3 vsldoi4 <6,0,7,u>, <6,0,7,u>
+  1146336876U,	// <0,7,u,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+  1146336898U,	// <0,7,u,u>: Cost 2 vmrghw LHS, <7,u,1,2>
+  202162278U,	// <0,u,0,0>: Cost 1 vspltisw0 LHS
+  1624612966U,	// <0,u,0,1>: Cost 2 vsldoi8 <2,3,0,u>, LHS
+  2629780986U,	// <0,u,0,2>: Cost 3 vsldoi4 <2,0,u,0>, <2,0,u,0>
+  1207959708U,	// <0,u,0,3>: Cost 2 vmrglw <0,0,0,0>, LHS
+  1544097078U,	// <0,u,0,4>: Cost 2 vsldoi4 <0,0,u,0>, RHS
+  1140856986U,	// <0,u,0,5>: Cost 2 vmrghw <0,0,0,0>, RHS
+  2698355253U,	// <0,u,0,6>: Cost 3 vsldoi8 <2,3,0,u>, <0,6,u,7>
+  1207962952U,	// <0,u,0,7>: Cost 2 vmrglw <0,0,0,0>, RHS
+  202162278U,	// <0,u,0,u>: Cost 1 vspltisw0 LHS
+  1142134483U,	// <0,u,1,0>: Cost 2 vmrghw LHS, <u,0,1,2>
+  67950382U,	// <0,u,1,1>: Cost 1 vmrghw LHS, LHS
+  1142175624U,	// <0,u,1,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+  1142175676U,	// <0,u,1,3>: Cost 2 vmrghw LHS, <u,3,0,1>
+  1142134847U,	// <0,u,1,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+  67950746U,	// <0,u,1,5>: Cost 1 vmrghw LHS, RHS
+  1142175952U,	// <0,u,1,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+  1221905736U,	// <0,u,1,7>: Cost 2 vmrglw <2,3,0,1>, RHS
+  67950949U,	// <0,u,1,u>: Cost 1 vmrghw LHS, LHS
+  1562026086U,	// <0,u,2,0>: Cost 2 vsldoi4 <3,0,u,2>, LHS
+  2216015662U,	// <0,u,2,1>: Cost 3 vmrghw <0,2,1,2>, LHS
+  2698356328U,	// <0,u,2,2>: Cost 3 vsldoi8 <2,3,0,u>, <2,2,2,2>
+  835584U,	// <0,u,2,3>: Cost 0 copy LHS
+  1562029366U,	// <0,u,2,4>: Cost 2 vsldoi4 <3,0,u,2>, RHS
+  2216016026U,	// <0,u,2,5>: Cost 3 vmrghw <0,2,1,2>, RHS
+  2698356666U,	// <0,u,2,6>: Cost 3 vsldoi8 <2,3,0,u>, <2,6,3,7>
+  1585919033U,	// <0,u,2,7>: Cost 2 vsldoi4 <7,0,u,2>, <7,0,u,2>
+  835584U,	// <0,u,2,u>: Cost 0 copy LHS
+  2758989756U,	// <0,u,3,0>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,0,1>
+  2216662830U,	// <0,u,3,1>: Cost 3 vmrghw <0,3,1,0>, LHS
+  2703665461U,	// <0,u,3,2>: Cost 3 vsldoi8 <3,2,0,u>, <3,2,0,u>
+  2758989782U,	// <0,u,3,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,3,0>
+  2758989796U,	// <0,u,3,4>: Cost 3 vsldoi12 <1,2,3,0>, <u,3,4,5>
+  2216663194U,	// <0,u,3,5>: Cost 3 vmrghw <0,3,1,0>, RHS
+  2706319993U,	// <0,u,3,6>: Cost 3 vsldoi8 <3,6,0,u>, <3,6,0,u>
+  2300972360U,	// <0,u,3,7>: Cost 3 vmrglw <3,2,0,3>, RHS
+  2216663397U,	// <0,u,3,u>: Cost 3 vmrghw <0,3,1,0>, LHS
+  2217367251U,	// <0,u,4,0>: Cost 3 vmrghw <0,4,1,5>, <u,0,1,2>
+  1143625518U,	// <0,u,4,1>: Cost 2 vmrghw <0,4,1,5>, LHS
+  2217367432U,	// <0,u,4,2>: Cost 3 vmrghw <0,4,1,5>, <u,2,3,3>
+  2217367484U,	// <0,u,4,3>: Cost 3 vmrghw <0,4,1,5>, <u,3,0,1>
+  1143619922U,	// <0,u,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+  1143625882U,	// <0,u,4,5>: Cost 2 vmrghw <0,4,1,5>, RHS
+  2217367760U,	// <0,u,4,6>: Cost 3 vmrghw <0,4,1,5>, <u,6,3,7>
+  2291027272U,	// <0,u,4,7>: Cost 3 vmrglw <1,5,0,4>, RHS
+  1143626085U,	// <0,u,4,u>: Cost 2 vmrghw <0,4,1,5>, LHS
+  2635792486U,	// <0,u,5,0>: Cost 3 vsldoi4 <3,0,u,5>, LHS
+  2635793302U,	// <0,u,5,1>: Cost 3 vsldoi4 <3,0,u,5>, <1,2,3,0>
+  2302314646U,	// <0,u,5,2>: Cost 3 vmrglw <3,4,0,5>, <3,0,1,2>
+  2635794648U,	// <0,u,5,3>: Cost 3 vsldoi4 <3,0,u,5>, <3,0,u,5>
+  2635795766U,	// <0,u,5,4>: Cost 3 vsldoi4 <3,0,u,5>, RHS
+  2717601754U,	// <0,u,5,5>: Cost 3 vsldoi8 <5,5,0,u>, <5,5,0,u>
+  1685248154U,	// <0,u,5,6>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+  2302315848U,	// <0,u,5,7>: Cost 3 vmrglw <3,4,0,5>, RHS
+  1685248172U,	// <0,u,5,u>: Cost 2 vsldoi12 <1,2,3,0>, RHS
+  2759358645U,	// <0,u,6,0>: Cost 3 vsldoi12 <1,2,u,0>, <u,6,0,7>
+  2218637102U,	// <0,u,6,1>: Cost 3 vmrghw <0,6,0,7>, LHS
+  2724901370U,	// <0,u,6,2>: Cost 3 vsldoi8 <6,7,0,u>, <6,2,7,3>
+  2758990032U,	// <0,u,6,3>: Cost 3 vsldoi12 <1,2,3,0>, <u,6,3,7>
+  2659691830U,	// <0,u,6,4>: Cost 3 vsldoi4 <7,0,u,6>, RHS
+  2659471458U,	// <0,u,6,5>: Cost 3 vsldoi4 <7,0,5,6>, <5,6,7,0>
+  2724901688U,	// <0,u,6,6>: Cost 3 vsldoi8 <6,7,0,u>, <6,6,6,6>
+  1651159893U,	// <0,u,6,7>: Cost 2 vsldoi8 <6,7,0,u>, <6,7,0,u>
+  1651823526U,	// <0,u,6,u>: Cost 2 vsldoi8 <6,u,0,u>, <6,u,0,u>
+  2785827072U,	// <0,u,7,0>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,0,1>
+  2803964168U,	// <0,u,7,1>: Cost 3 vsldoi12 <u,7,1,0>, <u,7,1,0>
+  2727556249U,	// <0,u,7,2>: Cost 3 vsldoi8 <7,2,0,u>, <7,2,0,u>
+  2303656092U,	// <0,u,7,3>: Cost 3 vmrglw <3,6,0,7>, LHS
+  2785827112U,	// <0,u,7,4>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,4,5>
+  2785827122U,	// <0,u,7,5>: Cost 3 vsldoi12 <5,6,7,0>, <u,7,5,6>
+  2730210781U,	// <0,u,7,6>: Cost 3 vsldoi8 <7,6,0,u>, <7,6,0,u>
+  2303659336U,	// <0,u,7,7>: Cost 3 vmrglw <3,6,0,7>, RHS
+  2303656097U,	// <0,u,7,u>: Cost 3 vmrglw <3,6,0,7>, LHS
+  202162278U,	// <0,u,u,0>: Cost 1 vspltisw0 LHS
+  72595246U,	// <0,u,u,1>: Cost 1 vmrghw LHS, LHS
+  1146337160U,	// <0,u,u,2>: Cost 2 vmrghw LHS, <u,2,3,3>
+  835584U,	// <0,u,u,3>: Cost 0 copy LHS
+  1146337343U,	// <0,u,u,4>: Cost 2 vmrghw LHS, <u,4,5,6>
+  72595610U,	// <0,u,u,5>: Cost 1 vmrghw LHS, RHS
+  1146337488U,	// <0,u,u,6>: Cost 2 vmrghw LHS, <u,6,3,7>
+  1221963080U,	// <0,u,u,7>: Cost 2 vmrglw <2,3,0,u>, RHS
+  835584U,	// <0,u,u,u>: Cost 0 copy LHS
+  2756853760U,	// <1,0,0,0>: Cost 3 vsldoi12 <0,u,1,1>, <0,0,0,0>
+  1677803530U,	// <1,0,0,1>: Cost 2 vsldoi12 <0,0,1,1>, <0,0,1,1>
+  3759497387U,	// <1,0,0,2>: Cost 4 vsldoi8 <0,2,1,0>, <0,2,1,0>
+  2686419196U,	// <1,0,0,3>: Cost 3 vsldoi8 <0,3,1,0>, <0,3,1,0>
+  2751766565U,	// <1,0,0,4>: Cost 3 vsldoi12 <0,0,4,1>, <0,0,4,1>
+  2687746462U,	// <1,0,0,5>: Cost 3 vsldoi8 <0,5,1,0>, <0,5,1,0>
+  3776086518U,	// <1,0,0,6>: Cost 4 vsldoi8 <3,0,1,0>, <0,6,1,7>
+  2689073728U,	// <1,0,0,7>: Cost 3 vsldoi8 <0,7,1,0>, <0,7,1,0>
+  1678319689U,	// <1,0,0,u>: Cost 2 vsldoi12 <0,0,u,1>, <0,0,u,1>
+  2287091712U,	// <1,0,1,0>: Cost 3 vmrglw <0,u,1,1>, <0,0,0,0>
+  1147568230U,	// <1,0,1,1>: Cost 2 vmrghw <1,1,1,1>, LHS
+  1683112038U,	// <1,0,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+  3294970108U,	// <1,0,1,3>: Cost 4 vmrghw <1,1,0,0>, <0,3,1,0>
+  2623892790U,	// <1,0,1,4>: Cost 3 vsldoi4 <1,1,0,1>, RHS
+  2647781007U,	// <1,0,1,5>: Cost 3 vsldoi4 <5,1,0,1>, <5,1,0,1>
+  2791948430U,	// <1,0,1,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+  3721524218U,	// <1,0,1,7>: Cost 4 vsldoi4 <5,1,0,1>, <7,0,1,2>
+  1683112092U,	// <1,0,1,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+  2222112768U,	// <1,0,2,0>: Cost 3 vmrghw <1,2,3,0>, <0,0,0,0>
+  1148371046U,	// <1,0,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+  3356862524U,	// <1,0,2,2>: Cost 4 vmrglw <0,2,1,2>, <2,u,0,2>
+  2702345894U,	// <1,0,2,3>: Cost 3 vsldoi8 <3,0,1,0>, <2,3,0,1>
+  2222113106U,	// <1,0,2,4>: Cost 3 vmrghw <1,2,3,0>, <0,4,1,5>
+  2299709908U,	// <1,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+  3760162746U,	// <1,0,2,6>: Cost 4 vsldoi8 <0,3,1,0>, <2,6,3,7>
+  3369470584U,	// <1,0,2,7>: Cost 4 vmrglw <2,3,1,2>, <3,6,0,7>
+  1148371613U,	// <1,0,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+  2686421142U,	// <1,0,3,0>: Cost 3 vsldoi8 <0,3,1,0>, <3,0,1,2>
+  2283128486U,	// <1,0,3,1>: Cost 3 vmrglw <0,2,1,3>, <2,3,0,1>
+  3296305326U,	// <1,0,3,2>: Cost 4 vmrghw <1,3,0,1>, <0,2,1,3>
+  3760163199U,	// <1,0,3,3>: Cost 4 vsldoi8 <0,3,1,0>, <3,3,0,1>
+  3760163330U,	// <1,0,3,4>: Cost 4 vsldoi8 <0,3,1,0>, <3,4,5,6>
+  3779406377U,	// <1,0,3,5>: Cost 4 vsldoi8 <3,5,1,0>, <3,5,1,0>
+  3865690416U,	// <1,0,3,6>: Cost 4 vsldoi12 <6,7,0,1>, <0,3,6,7>
+  3366824568U,	// <1,0,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,0,7>
+  2707655452U,	// <1,0,3,u>: Cost 3 vsldoi8 <3,u,1,0>, <3,u,1,0>
+  2734861202U,	// <1,0,4,0>: Cost 3 vsldoi8 <u,4,1,0>, <4,0,5,1>
+  2756854098U,	// <1,0,4,1>: Cost 3 vsldoi12 <0,u,1,1>, <0,4,1,5>
+  3830595931U,	// <1,0,4,2>: Cost 5 vsldoi12 <0,u,1,1>, <0,4,2,5>
+  3296968960U,	// <1,0,4,3>: Cost 4 vmrghw <1,4,0,1>, <0,3,1,4>
+  3830595949U,	// <1,0,4,4>: Cost 4 vsldoi12 <0,u,1,1>, <0,4,4,5>
+  2686422326U,	// <1,0,4,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+  3297378806U,	// <1,0,4,6>: Cost 5 vmrghw <1,4,5,6>, <0,6,1,7>
+  3810594248U,	// <1,0,4,7>: Cost 4 vsldoi8 <u,7,1,0>, <4,7,5,0>
+  2686422569U,	// <1,0,4,u>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+  2284470272U,	// <1,0,5,0>: Cost 3 vmrglw <0,4,1,5>, <0,0,0,0>
+  2284471974U,	// <1,0,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,1>
+  3809267435U,	// <1,0,5,2>: Cost 4 vsldoi8 <u,5,1,0>, <5,2,1,3>
+  3297968384U,	// <1,0,5,3>: Cost 4 vmrghw <1,5,4,6>, <0,3,1,4>
+  2284471977U,	// <1,0,5,4>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,4>
+  3721555603U,	// <1,0,5,5>: Cost 4 vsldoi4 <5,1,0,5>, <5,1,0,5>
+  3792679010U,	// <1,0,5,6>: Cost 4 vsldoi8 <5,7,1,0>, <5,6,7,0>
+  3792679037U,	// <1,0,5,7>: Cost 4 vsldoi8 <5,7,1,0>, <5,7,1,0>
+  2284471981U,	// <1,0,5,u>: Cost 3 vmrglw <0,4,1,5>, <2,3,0,u>
+  3356893184U,	// <1,0,6,0>: Cost 4 vmrglw <0,2,1,6>, <0,0,0,0>
+  2224676966U,	// <1,0,6,1>: Cost 3 vmrghw <1,6,1,7>, LHS
+  3298295985U,	// <1,0,6,2>: Cost 4 vmrghw <1,6,0,1>, <0,2,1,6>
+  3298345212U,	// <1,0,6,3>: Cost 4 vmrghw <1,6,0,7>, <0,3,1,0>
+  2224972114U,	// <1,0,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+  3808604907U,	// <1,0,6,5>: Cost 4 vsldoi8 <u,4,1,0>, <6,5,7,1>
+  3799978808U,	// <1,0,6,6>: Cost 4 vsldoi8 <7,0,1,0>, <6,6,6,6>
+  2726237006U,	// <1,0,6,7>: Cost 3 vsldoi8 <7,0,1,0>, <6,7,0,1>
+  2224677522U,	// <1,0,6,u>: Cost 3 vmrghw <1,6,1,7>, <0,u,1,1>
+  2726237176U,	// <1,0,7,0>: Cost 3 vsldoi8 <7,0,1,0>, <7,0,1,0>
+  2285815462U,	// <1,0,7,1>: Cost 3 vmrglw <0,6,1,7>, <2,3,0,1>
+  3805951193U,	// <1,0,7,2>: Cost 4 vsldoi8 <u,0,1,0>, <7,2,u,0>
+  3807941859U,	// <1,0,7,3>: Cost 4 vsldoi8 <u,3,1,0>, <7,3,0,1>
+  3799979366U,	// <1,0,7,4>: Cost 4 vsldoi8 <7,0,1,0>, <7,4,5,6>
+  3803297165U,	// <1,0,7,5>: Cost 4 vsldoi8 <7,5,1,0>, <7,5,1,0>
+  3799979540U,	// <1,0,7,6>: Cost 4 vsldoi8 <7,0,1,0>, <7,6,7,0>
+  3799979628U,	// <1,0,7,7>: Cost 4 vsldoi8 <7,0,1,0>, <7,7,7,7>
+  2731546240U,	// <1,0,7,u>: Cost 3 vsldoi8 <7,u,1,0>, <7,u,1,0>
+  2284494848U,	// <1,0,u,0>: Cost 3 vmrglw <0,4,1,u>, <0,0,0,0>
+  1683112594U,	// <1,0,u,1>: Cost 2 vsldoi12 <0,u,1,1>, <0,u,1,1>
+  1683112605U,	// <1,0,u,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+  2734200772U,	// <1,0,u,3>: Cost 3 vsldoi8 <u,3,1,0>, <u,3,1,0>
+  2757075629U,	// <1,0,u,4>: Cost 3 vsldoi12 <0,u,4,1>, <0,u,4,1>
+  2686425242U,	// <1,0,u,5>: Cost 3 vsldoi8 <0,3,1,0>, RHS
+  2791948430U,	// <1,0,u,6>: Cost 3 vsldoi12 <6,7,0,1>, <0,1,6,7>
+  2736855304U,	// <1,0,u,7>: Cost 3 vsldoi8 <u,7,1,0>, <u,7,1,0>
+  1683112659U,	// <1,0,u,u>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+  1610694666U,	// <1,1,0,0>: Cost 2 vsldoi8 <0,0,1,1>, <0,0,1,1>
+  1616003174U,	// <1,1,0,1>: Cost 2 vsldoi8 <0,u,1,1>, LHS
+  2283767958U,	// <1,1,0,2>: Cost 3 vmrglw <0,3,1,0>, <3,0,1,2>
+  3357507596U,	// <1,1,0,3>: Cost 4 vmrglw <0,3,1,0>, <0,0,1,3>
+  2689745234U,	// <1,1,0,4>: Cost 3 vsldoi8 <0,u,1,1>, <0,4,1,5>
+  3357507922U,	// <1,1,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,1,5>
+  3294397647U,	// <1,1,0,6>: Cost 4 vmrghw <1,0,1,2>, <1,6,1,7>
+  3373433334U,	// <1,1,0,7>: Cost 4 vmrglw <3,0,1,0>, <0,6,1,7>
+  1616003730U,	// <1,1,0,u>: Cost 2 vsldoi8 <0,u,1,1>, <0,u,1,1>
+  1550221414U,	// <1,1,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+  269271142U,	// <1,1,1,1>: Cost 1 vspltisw1 LHS
+  2287093910U,	// <1,1,1,2>: Cost 3 vmrglw <0,u,1,1>, <3,0,1,2>
+  2287092615U,	// <1,1,1,3>: Cost 3 vmrglw <0,u,1,1>, <1,2,1,3>
+  1550224694U,	// <1,1,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+  2287092050U,	// <1,1,1,5>: Cost 3 vmrglw <0,u,1,1>, <0,4,1,5>
+  2689746127U,	// <1,1,1,6>: Cost 3 vsldoi8 <0,u,1,1>, <1,6,1,7>
+  2659800138U,	// <1,1,1,7>: Cost 3 vsldoi4 <7,1,1,1>, <7,1,1,1>
+  269271142U,	// <1,1,1,u>: Cost 1 vspltisw1 LHS
+  2222113516U,	// <1,1,2,0>: Cost 3 vmrghw <1,2,3,0>, <1,0,2,1>
+  2756854663U,	// <1,1,2,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,2,1,3>
+  1148371862U,	// <1,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+  2689746598U,	// <1,1,2,3>: Cost 3 vsldoi8 <0,u,1,1>, <2,3,0,1>
+  2618002742U,	// <1,1,2,4>: Cost 3 vsldoi4 <0,1,1,2>, RHS
+  2299707730U,	// <1,1,2,5>: Cost 3 vmrglw <3,0,1,2>, <0,4,1,5>
+  2689746874U,	// <1,1,2,6>: Cost 3 vsldoi8 <0,u,1,1>, <2,6,3,7>
+  3361506511U,	// <1,1,2,7>: Cost 4 vmrglw <1,0,1,2>, <1,6,1,7>
+  1148371862U,	// <1,1,2,u>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+  2689747094U,	// <1,1,3,0>: Cost 3 vsldoi8 <0,u,1,1>, <3,0,1,2>
+  2691074278U,	// <1,1,3,1>: Cost 3 vsldoi8 <1,1,1,1>, <3,1,1,1>
+  3356870806U,	// <1,1,3,2>: Cost 4 vmrglw <0,2,1,3>, <3,0,1,2>
+  2283126958U,	// <1,1,3,3>: Cost 3 vmrglw <0,2,1,3>, <0,2,1,3>
+  2689747458U,	// <1,1,3,4>: Cost 3 vsldoi8 <0,u,1,1>, <3,4,5,6>
+  3356868946U,	// <1,1,3,5>: Cost 4 vmrglw <0,2,1,3>, <0,4,1,5>
+  3811265144U,	// <1,1,3,6>: Cost 4 vsldoi8 <u,u,1,1>, <3,6,0,7>
+  3362841807U,	// <1,1,3,7>: Cost 4 vmrglw <1,2,1,3>, <1,6,1,7>
+  2689747742U,	// <1,1,3,u>: Cost 3 vsldoi8 <0,u,1,1>, <3,u,1,2>
+  2623987814U,	// <1,1,4,0>: Cost 3 vsldoi4 <1,1,1,4>, LHS
+  2758181931U,	// <1,1,4,1>: Cost 3 vsldoi12 <1,1,1,1>, <1,4,1,5>
+  2223408022U,	// <1,1,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+  3697731734U,	// <1,1,4,3>: Cost 4 vsldoi4 <1,1,1,4>, <3,0,1,2>
+  2283798784U,	// <1,1,4,4>: Cost 3 vmrglw <0,3,1,4>, <0,3,1,4>
+  1616006454U,	// <1,1,4,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+  3297379535U,	// <1,1,4,6>: Cost 4 vmrghw <1,4,5,6>, <1,6,1,7>
+  3373466102U,	// <1,1,4,7>: Cost 4 vmrglw <3,0,1,4>, <0,6,1,7>
+  1616006697U,	// <1,1,4,u>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+  2760762479U,	// <1,1,5,0>: Cost 3 vsldoi12 <1,5,0,1>, <1,5,0,1>
+  2284470282U,	// <1,1,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,1>
+  2284472470U,	// <1,1,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,1,2>
+  3358212270U,	// <1,1,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,1,3>
+  2284470285U,	// <1,1,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,1,4>
+  1210728786U,	// <1,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+  2737524834U,	// <1,1,5,6>: Cost 3 vsldoi8 <u,u,1,1>, <5,6,7,0>
+  3360867535U,	// <1,1,5,7>: Cost 4 vmrglw <0,u,1,5>, <1,6,1,7>
+  1210728786U,	// <1,1,5,u>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+  3697746022U,	// <1,1,6,0>: Cost 4 vsldoi4 <1,1,1,6>, LHS
+  2756854991U,	// <1,1,6,1>: Cost 3 vsldoi12 <0,u,1,1>, <1,6,1,7>
+  2737525242U,	// <1,1,6,2>: Cost 3 vsldoi8 <u,u,1,1>, <6,2,7,3>
+  3839149281U,	// <1,1,6,3>: Cost 4 vsldoi12 <2,3,0,1>, <1,6,3,7>
+  3697749302U,	// <1,1,6,4>: Cost 4 vsldoi4 <1,1,1,6>, RHS
+  3356893522U,	// <1,1,6,5>: Cost 4 vmrglw <0,2,1,6>, <0,4,1,5>
+  2283151537U,	// <1,1,6,6>: Cost 3 vmrglw <0,2,1,6>, <0,2,1,6>
+  2791949566U,	// <1,1,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <1,6,7,0>
+  2792613127U,	// <1,1,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <1,6,u,0>
+  2737525754U,	// <1,1,7,0>: Cost 3 vsldoi8 <u,u,1,1>, <7,0,1,2>
+  2291786386U,	// <1,1,7,1>: Cost 3 vmrglw <1,6,1,7>, <0,u,1,1>
+  3365528292U,	// <1,1,7,2>: Cost 4 vmrglw <1,6,1,7>, <1,0,1,2>
+  3365528455U,	// <1,1,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,1,3>
+  2737526118U,	// <1,1,7,4>: Cost 3 vsldoi8 <u,u,1,1>, <7,4,5,6>
+  3365527890U,	// <1,1,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,1,5>
+  3365528377U,	// <1,1,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,1,1,6>
+  2291786959U,	// <1,1,7,7>: Cost 3 vmrglw <1,6,1,7>, <1,6,1,7>
+  2737526402U,	// <1,1,7,u>: Cost 3 vsldoi8 <u,u,1,1>, <7,u,1,2>
+  1550221414U,	// <1,1,u,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+  269271142U,	// <1,1,u,1>: Cost 1 vspltisw1 LHS
+  1148371862U,	// <1,1,u,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+  2689750972U,	// <1,1,u,3>: Cost 3 vsldoi8 <0,u,1,1>, <u,3,0,1>
+  1550224694U,	// <1,1,u,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+  1616009370U,	// <1,1,u,5>: Cost 2 vsldoi8 <0,u,1,1>, RHS
+  2689751248U,	// <1,1,u,6>: Cost 3 vsldoi8 <0,u,1,1>, <u,6,3,7>
+  2736863497U,	// <1,1,u,7>: Cost 3 vsldoi8 <u,7,1,1>, <u,7,1,1>
+  269271142U,	// <1,1,u,u>: Cost 1 vspltisw1 LHS
+  2702360576U,	// <1,2,0,0>: Cost 3 vsldoi8 <3,0,1,2>, <0,0,0,0>
+  1628618854U,	// <1,2,0,1>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+  2685771949U,	// <1,2,0,2>: Cost 3 vsldoi8 <0,2,1,2>, <0,2,1,2>
+  2283765862U,	// <1,2,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+  2702360914U,	// <1,2,0,4>: Cost 3 vsldoi8 <3,0,1,2>, <0,4,1,5>
+  3788046813U,	// <1,2,0,5>: Cost 4 vsldoi8 <5,0,1,2>, <0,5,u,0>
+  2688426481U,	// <1,2,0,6>: Cost 3 vsldoi8 <0,6,1,2>, <0,6,1,2>
+  2726249024U,	// <1,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+  1628619421U,	// <1,2,0,u>: Cost 2 vsldoi8 <3,0,1,2>, LHS
+  2690417380U,	// <1,2,1,0>: Cost 3 vsldoi8 <1,0,1,2>, <1,0,1,2>
+  2702361396U,	// <1,2,1,1>: Cost 3 vsldoi8 <3,0,1,2>, <1,1,1,1>
+  2287093352U,	// <1,2,1,2>: Cost 3 vmrglw <0,u,1,1>, <2,2,2,2>
+  1213349990U,	// <1,2,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+  3764159522U,	// <1,2,1,4>: Cost 4 vsldoi8 <1,0,1,2>, <1,4,0,5>
+  3295053672U,	// <1,2,1,5>: Cost 4 vmrghw <1,1,1,1>, <2,5,3,6>
+  2221311930U,	// <1,2,1,6>: Cost 3 vmrghw <1,1,1,1>, <2,6,3,7>
+  3799991593U,	// <1,2,1,7>: Cost 4 vsldoi8 <7,0,1,2>, <1,7,2,7>
+  1213349995U,	// <1,2,1,u>: Cost 2 vmrglw <0,u,1,1>, LHS
+  2624045158U,	// <1,2,2,0>: Cost 3 vsldoi4 <1,1,2,2>, LHS
+  2702362144U,	// <1,2,2,1>: Cost 3 vsldoi8 <3,0,1,2>, <2,1,3,2>
+  2283120232U,	// <1,2,2,2>: Cost 3 vmrglw <0,2,1,2>, <2,2,2,2>
+  1225965670U,	// <1,2,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+  2624048438U,	// <1,2,2,4>: Cost 3 vsldoi4 <1,1,2,2>, RHS
+  3356860763U,	// <1,2,2,5>: Cost 4 vmrglw <0,2,1,2>, <0,4,2,5>
+  2222114746U,	// <1,2,2,6>: Cost 3 vmrghw <1,2,3,0>, <2,6,3,7>
+  2299708632U,	// <1,2,2,7>: Cost 3 vmrglw <3,0,1,2>, <1,6,2,7>
+  1225965675U,	// <1,2,2,u>: Cost 2 vmrglw <3,0,1,2>, LHS
+  470597734U,	// <1,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+  1544340276U,	// <1,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+  1544341096U,	// <1,2,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+  1544341916U,	// <1,2,3,3>: Cost 2 vsldoi4 LHS, <3,3,3,3>
+  470601014U,	// <1,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+  1592119300U,	// <1,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+  1592119802U,	// <1,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+  1592120314U,	// <1,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+  470603566U,	// <1,2,3,u>: Cost 1 vsldoi4 LHS, LHS
+  2708335471U,	// <1,2,4,0>: Cost 3 vsldoi8 <4,0,1,2>, <4,0,1,2>
+  3838043908U,	// <1,2,4,1>: Cost 4 vsldoi12 <2,1,3,1>, <2,4,1,5>
+  3357541992U,	// <1,2,4,2>: Cost 4 vmrglw <0,3,1,4>, <2,2,2,2>
+  2283798630U,	// <1,2,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+  2726251728U,	// <1,2,4,4>: Cost 3 vsldoi8 <7,0,1,2>, <4,4,4,4>
+  1628622134U,	// <1,2,4,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+  3297077178U,	// <1,2,4,6>: Cost 4 vmrghw <1,4,1,5>, <2,6,3,7>
+  2726251976U,	// <1,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+  1628622377U,	// <1,2,4,u>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+  2714308168U,	// <1,2,5,0>: Cost 3 vsldoi8 <5,0,1,2>, <5,0,1,2>
+  3297633827U,	// <1,2,5,1>: Cost 4 vmrghw <1,5,0,1>, <2,1,3,5>
+  2284471912U,	// <1,2,5,2>: Cost 3 vmrglw <0,4,1,5>, <2,2,2,2>
+  1210728550U,	// <1,2,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+  3776106420U,	// <1,2,5,4>: Cost 4 vsldoi8 <3,0,1,2>, <5,4,5,6>
+  2726252548U,	// <1,2,5,5>: Cost 3 vsldoi8 <7,0,1,2>, <5,5,5,5>
+  2726252642U,	// <1,2,5,6>: Cost 3 vsldoi8 <7,0,1,2>, <5,6,7,0>
+  3799994538U,	// <1,2,5,7>: Cost 4 vsldoi8 <7,0,1,2>, <5,7,6,0>
+  1210728555U,	// <1,2,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+  2720280865U,	// <1,2,6,0>: Cost 3 vsldoi8 <6,0,1,2>, <6,0,1,2>
+  2702365096U,	// <1,2,6,1>: Cost 3 vsldoi8 <3,0,1,2>, <6,1,7,2>
+  2726253050U,	// <1,2,6,2>: Cost 3 vsldoi8 <7,0,1,2>, <6,2,7,3>
+  2283151462U,	// <1,2,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+  3697823030U,	// <1,2,6,4>: Cost 4 vsldoi4 <1,1,2,6>, RHS
+  3298715497U,	// <1,2,6,5>: Cost 4 vmrghw <1,6,5,7>, <2,5,3,7>
+  2726253368U,	// <1,2,6,6>: Cost 3 vsldoi8 <7,0,1,2>, <6,6,6,6>
+  2724926296U,	// <1,2,6,7>: Cost 3 vsldoi8 <6,7,1,2>, <6,7,1,2>
+  2283151467U,	// <1,2,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+  1652511738U,	// <1,2,7,0>: Cost 2 vsldoi8 <7,0,1,2>, <7,0,1,2>
+  3371500916U,	// <1,2,7,1>: Cost 4 vmrglw <2,6,1,7>, <1,u,2,1>
+  3365529192U,	// <1,2,7,2>: Cost 4 vmrglw <1,6,1,7>, <2,2,2,2>
+  2291785830U,	// <1,2,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+  2726253926U,	// <1,2,7,4>: Cost 3 vsldoi8 <7,0,1,2>, <7,4,5,6>
+  3788051845U,	// <1,2,7,5>: Cost 4 vsldoi8 <5,0,1,2>, <7,5,0,1>
+  3794023894U,	// <1,2,7,6>: Cost 4 vsldoi8 <6,0,1,2>, <7,6,0,1>
+  2726254119U,	// <1,2,7,7>: Cost 3 vsldoi8 <7,0,1,2>, <7,7,0,1>
+  1657820802U,	// <1,2,7,u>: Cost 2 vsldoi8 <7,u,1,2>, <7,u,1,2>
+  470638699U,	// <1,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+  1544381236U,	// <1,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+  1544382056U,	// <1,2,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+  1544382614U,	// <1,2,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+  470641974U,	// <1,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+  1628625050U,	// <1,2,u,5>: Cost 2 vsldoi8 <3,0,1,2>, RHS
+  1592160762U,	// <1,2,u,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+  1592161274U,	// <1,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+  470644526U,	// <1,2,u,u>: Cost 1 vsldoi4 LHS, LHS
+  2769389708U,	// <1,3,0,0>: Cost 3 vsldoi12 <3,0,0,1>, <3,0,0,1>
+  2685780070U,	// <1,3,0,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+  2685780142U,	// <1,3,0,2>: Cost 3 vsldoi8 <0,2,1,3>, <0,2,1,3>
+  2686443775U,	// <1,3,0,3>: Cost 3 vsldoi8 <0,3,1,3>, <0,3,1,3>
+  2769684656U,	// <1,3,0,4>: Cost 3 vsldoi12 <3,0,4,1>, <3,0,4,1>
+  3357507940U,	// <1,3,0,5>: Cost 4 vmrglw <0,3,1,0>, <0,4,3,5>
+  3759522294U,	// <1,3,0,6>: Cost 4 vsldoi8 <0,2,1,3>, <0,6,1,7>
+  3357509562U,	// <1,3,0,7>: Cost 4 vmrglw <0,3,1,0>, <2,6,3,7>
+  2685780637U,	// <1,3,0,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+  2287092630U,	// <1,3,1,0>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,0>
+  2221312230U,	// <1,3,1,1>: Cost 3 vmrghw <1,1,1,1>, <3,1,1,1>
+  2691752839U,	// <1,3,1,2>: Cost 3 vsldoi8 <1,2,1,3>, <1,2,1,3>
+  2287093362U,	// <1,3,1,3>: Cost 3 vmrglw <0,u,1,1>, <2,2,3,3>
+  2287092634U,	// <1,3,1,4>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,4>
+  3360835107U,	// <1,3,1,5>: Cost 4 vmrglw <0,u,1,1>, <2,1,3,5>
+  3759523041U,	// <1,3,1,6>: Cost 4 vsldoi8 <0,2,1,3>, <1,6,3,7>
+  2287093690U,	// <1,3,1,7>: Cost 3 vmrglw <0,u,1,1>, <2,6,3,7>
+  2287092638U,	// <1,3,1,u>: Cost 3 vmrglw <0,u,1,1>, <1,2,3,u>
+  2222114966U,	// <1,3,2,0>: Cost 3 vmrghw <1,2,3,0>, <3,0,1,2>
+  2222115057U,	// <1,3,2,1>: Cost 3 vmrghw <1,2,3,0>, <3,1,2,3>
+  2630092320U,	// <1,3,2,2>: Cost 3 vsldoi4 <2,1,3,2>, <2,1,3,2>
+  2685781670U,	// <1,3,2,3>: Cost 3 vsldoi8 <0,2,1,3>, <2,3,0,1>
+  2222115330U,	// <1,3,2,4>: Cost 3 vmrghw <1,2,3,0>, <3,4,5,6>
+  3373449572U,	// <1,3,2,5>: Cost 4 vmrglw <3,0,1,2>, <0,4,3,5>
+  2222115448U,	// <1,3,2,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+  2299709370U,	// <1,3,2,7>: Cost 3 vmrglw <3,0,1,2>, <2,6,3,7>
+  2222115614U,	// <1,3,2,u>: Cost 3 vmrghw <1,2,3,0>, <3,u,1,2>
+  2771380607U,	// <1,3,3,0>: Cost 3 vsldoi12 <3,3,0,1>, <3,3,0,1>
+  3356874468U,	// <1,3,3,1>: Cost 4 vmrglw <0,2,1,3>, <u,0,3,1>
+  3759524168U,	// <1,3,3,2>: Cost 4 vsldoi8 <0,2,1,3>, <3,2,3,0>
+  2283792796U,	// <1,3,3,3>: Cost 3 vmrglw <0,3,1,3>, <3,3,3,3>
+  3356869530U,	// <1,3,3,4>: Cost 4 vmrglw <0,2,1,3>, <1,2,3,4>
+  3721760428U,	// <1,3,3,5>: Cost 4 vsldoi4 <5,1,3,3>, <5,1,3,3>
+  3296496248U,	// <1,3,3,6>: Cost 4 vmrghw <1,3,2,6>, <3,6,0,7>
+  3356870586U,	// <1,3,3,7>: Cost 4 vmrglw <0,2,1,3>, <2,6,3,7>
+  2771970503U,	// <1,3,3,u>: Cost 3 vsldoi12 <3,3,u,1>, <3,3,u,1>
+  2772044240U,	// <1,3,4,0>: Cost 3 vsldoi12 <3,4,0,1>, <3,4,0,1>
+  3362186135U,	// <1,3,4,1>: Cost 4 vmrglw <1,1,1,4>, <1,2,3,1>
+  3297151280U,	// <1,3,4,2>: Cost 4 vmrghw <1,4,2,5>, <3,2,0,3>
+  3357542002U,	// <1,3,4,3>: Cost 4 vmrglw <0,3,1,4>, <2,2,3,3>
+  3357540626U,	// <1,3,4,4>: Cost 4 vmrglw <0,3,1,4>, <0,3,3,4>
+  2685783350U,	// <1,3,4,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+  3357546622U,	// <1,3,4,6>: Cost 4 vmrglw <0,3,1,4>, <u,5,3,6>
+  3357542330U,	// <1,3,4,7>: Cost 4 vmrglw <0,3,1,4>, <2,6,3,7>
+  2685783593U,	// <1,3,4,u>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+  2284471190U,	// <1,3,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,0>
+  3358213015U,	// <1,3,5,1>: Cost 4 vmrglw <0,4,1,5>, <1,2,3,1>
+  2630116899U,	// <1,3,5,2>: Cost 3 vsldoi4 <2,1,3,5>, <2,1,3,5>
+  2284471922U,	// <1,3,5,3>: Cost 3 vmrglw <0,4,1,5>, <2,2,3,3>
+  2284471194U,	// <1,3,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,4>
+  2284471843U,	// <1,3,5,5>: Cost 3 vmrglw <0,4,1,5>, <2,1,3,5>
+  3358218366U,	// <1,3,5,6>: Cost 4 vmrglw <0,4,1,5>, <u,5,3,6>
+  2284472250U,	// <1,3,5,7>: Cost 3 vmrglw <0,4,1,5>, <2,6,3,7>
+  2284471198U,	// <1,3,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,3,u>
+  2224752790U,	// <1,3,6,0>: Cost 3 vmrghw <1,6,2,7>, <3,0,1,2>
+  3832736385U,	// <1,3,6,1>: Cost 4 vsldoi12 <1,2,3,1>, <3,6,1,7>
+  3703866916U,	// <1,3,6,2>: Cost 4 vsldoi4 <2,1,3,6>, <2,1,3,6>
+  3356894834U,	// <1,3,6,3>: Cost 4 vmrglw <0,2,1,6>, <2,2,3,3>
+  3356894106U,	// <1,3,6,4>: Cost 4 vmrglw <0,2,1,6>, <1,2,3,4>
+  3356894755U,	// <1,3,6,5>: Cost 5 vmrglw <0,2,1,6>, <2,1,3,5>
+  3356899130U,	// <1,3,6,6>: Cost 4 vmrglw <0,2,1,6>, <u,1,3,6>
+  2283153338U,	// <1,3,6,7>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+  2283153338U,	// <1,3,6,u>: Cost 3 vmrglw <0,2,1,6>, <2,6,3,7>
+  2774035139U,	// <1,3,7,0>: Cost 3 vsldoi12 <3,7,0,1>, <3,7,0,1>
+  3703874767U,	// <1,3,7,1>: Cost 4 vsldoi4 <2,1,3,7>, <1,6,1,7>
+  3703875109U,	// <1,3,7,2>: Cost 4 vsldoi4 <2,1,3,7>, <2,1,3,7>
+  3365529202U,	// <1,3,7,3>: Cost 4 vmrglw <1,6,1,7>, <2,2,3,3>
+  3365528474U,	// <1,3,7,4>: Cost 4 vmrglw <1,6,1,7>, <1,2,3,4>
+  3789387159U,	// <1,3,7,5>: Cost 4 vsldoi8 <5,2,1,3>, <7,5,2,1>
+  3865692927U,	// <1,3,7,6>: Cost 4 vsldoi12 <6,7,0,1>, <3,7,6,7>
+  3363538874U,	// <1,3,7,7>: Cost 4 vmrglw <1,3,1,7>, <2,6,3,7>
+  2774625035U,	// <1,3,7,u>: Cost 3 vsldoi12 <3,7,u,1>, <3,7,u,1>
+  2284495766U,	// <1,3,u,0>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,0>
+  2685785902U,	// <1,3,u,1>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+  2630141478U,	// <1,3,u,2>: Cost 3 vsldoi4 <2,1,3,u>, <2,1,3,u>
+  2283169880U,	// <1,3,u,3>: Cost 3 vmrglw <0,2,1,u>, <2,u,3,3>
+  2284495770U,	// <1,3,u,4>: Cost 3 vmrglw <0,4,1,u>, <1,2,3,4>
+  2685786266U,	// <1,3,u,5>: Cost 3 vsldoi8 <0,2,1,3>, RHS
+  2222115448U,	// <1,3,u,6>: Cost 3 vmrghw <1,2,3,0>, <3,6,0,7>
+  2284496826U,	// <1,3,u,7>: Cost 3 vmrglw <0,4,1,u>, <2,6,3,7>
+  2685786469U,	// <1,3,u,u>: Cost 3 vsldoi8 <0,2,1,3>, LHS
+  2684461069U,	// <1,4,0,0>: Cost 3 vsldoi8 <0,0,1,4>, <0,0,1,4>
+  2686451814U,	// <1,4,0,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+  3759530159U,	// <1,4,0,2>: Cost 4 vsldoi8 <0,2,1,4>, <0,2,1,4>
+  2686451968U,	// <1,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+  2684461394U,	// <1,4,0,4>: Cost 3 vsldoi8 <0,0,1,4>, <0,4,1,5>
+  1701989266U,	// <1,4,0,5>: Cost 2 vsldoi12 <4,0,5,1>, <4,0,5,1>
+  3776119286U,	// <1,4,0,6>: Cost 4 vsldoi8 <3,0,1,4>, <0,6,1,7>
+  2689106500U,	// <1,4,0,7>: Cost 3 vsldoi8 <0,7,1,4>, <0,7,1,4>
+  1702210477U,	// <1,4,0,u>: Cost 2 vsldoi12 <4,0,u,1>, <4,0,u,1>
+  2221312914U,	// <1,4,1,0>: Cost 3 vmrghw <1,1,1,1>, <4,0,5,1>
+  2691097399U,	// <1,4,1,1>: Cost 3 vsldoi8 <1,1,1,4>, <1,1,1,4>
+  3760194454U,	// <1,4,1,2>: Cost 4 vsldoi8 <0,3,1,4>, <1,2,3,0>
+  3766166489U,	// <1,4,1,3>: Cost 4 vsldoi8 <1,3,1,4>, <1,3,1,4>
+  2334870736U,	// <1,4,1,4>: Cost 3 vmrglw <u,u,1,1>, <4,4,4,4>
+  1147571510U,	// <1,4,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+  3760194794U,	// <1,4,1,6>: Cost 4 vsldoi8 <0,3,1,4>, <1,6,4,7>
+  3867315188U,	// <1,4,1,7>: Cost 4 vsldoi12 <7,0,4,1>, <4,1,7,0>
+  1147571753U,	// <1,4,1,u>: Cost 2 vmrghw <1,1,1,1>, RHS
+  2222115730U,	// <1,4,2,0>: Cost 3 vmrghw <1,2,3,0>, <4,0,5,1>
+  2222115812U,	// <1,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+  3760195176U,	// <1,4,2,2>: Cost 4 vsldoi8 <0,3,1,4>, <2,2,2,2>
+  2702378662U,	// <1,4,2,3>: Cost 3 vsldoi8 <3,0,1,4>, <2,3,0,1>
+  2323598544U,	// <1,4,2,4>: Cost 3 vmrglw <7,0,1,2>, <4,4,4,4>
+  1148374326U,	// <1,4,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+  3760195514U,	// <1,4,2,6>: Cost 4 vsldoi8 <0,3,1,4>, <2,6,3,7>
+  3373451932U,	// <1,4,2,7>: Cost 4 vmrglw <3,0,1,2>, <3,6,4,7>
+  1148374569U,	// <1,4,2,u>: Cost 2 vmrghw <1,2,3,0>, RHS
+  2702379160U,	// <1,4,3,0>: Cost 3 vsldoi8 <3,0,1,4>, <3,0,1,4>
+  3760195840U,	// <1,4,3,1>: Cost 4 vsldoi8 <0,3,1,4>, <3,1,4,0>
+  3776121160U,	// <1,4,3,2>: Cost 4 vsldoi8 <3,0,1,4>, <3,2,3,0>
+  3760195996U,	// <1,4,3,3>: Cost 4 vsldoi8 <0,3,1,4>, <3,3,3,3>
+  2686454274U,	// <1,4,3,4>: Cost 3 vsldoi8 <0,3,1,4>, <3,4,5,6>
+  3356870350U,	// <1,4,3,5>: Cost 4 vmrglw <0,2,1,3>, <2,3,4,5>
+  3800009392U,	// <1,4,3,6>: Cost 4 vsldoi8 <7,0,1,4>, <3,6,7,0>
+  3366824604U,	// <1,4,3,7>: Cost 5 vmrglw <1,u,1,3>, <3,6,4,7>
+  2707688224U,	// <1,4,3,u>: Cost 3 vsldoi8 <3,u,1,4>, <3,u,1,4>
+  2775731368U,	// <1,4,4,0>: Cost 3 vsldoi12 <4,0,5,1>, <4,4,0,0>
+  3830820018U,	// <1,4,4,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,4,1,1>
+  3691980454U,	// <1,4,4,2>: Cost 4 vsldoi4 <0,1,4,4>, <2,3,0,1>
+  3357541282U,	// <1,4,4,3>: Cost 4 vmrglw <0,3,1,4>, <1,2,4,3>
+  2781039824U,	// <1,4,4,4>: Cost 3 vsldoi12 <4,u,5,1>, <4,4,4,4>
+  2686455094U,	// <1,4,4,5>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+  3357541528U,	// <1,4,4,6>: Cost 4 vmrglw <0,3,1,4>, <1,5,4,6>
+  3810627020U,	// <1,4,4,7>: Cost 4 vsldoi8 <u,7,1,4>, <4,7,5,4>
+  2686455337U,	// <1,4,4,u>: Cost 3 vsldoi8 <0,3,1,4>, RHS
+  2624217190U,	// <1,4,5,0>: Cost 3 vsldoi4 <1,1,4,5>, LHS
+  2284470309U,	// <1,4,5,1>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,1>
+  2618246822U,	// <1,4,5,2>: Cost 3 vsldoi4 <0,1,4,5>, <2,3,0,1>
+  3358212297U,	// <1,4,5,3>: Cost 4 vmrglw <0,4,1,5>, <0,2,4,3>
+  2284470312U,	// <1,4,5,4>: Cost 3 vmrglw <0,4,1,5>, <0,0,4,4>
+  2284470637U,	// <1,4,5,5>: Cost 3 vmrglw <0,4,1,5>, <0,4,4,5>
+  1683115318U,	// <1,4,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+  3721851898U,	// <1,4,5,7>: Cost 4 vsldoi4 <5,1,4,5>, <7,0,1,2>
+  1683115336U,	// <1,4,5,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+  3794039075U,	// <1,4,6,0>: Cost 4 vsldoi8 <6,0,1,4>, <6,0,1,4>
+  3830820186U,	// <1,4,6,1>: Cost 4 vsldoi12 <0,u,4,1>, <4,6,1,7>
+  3800011258U,	// <1,4,6,2>: Cost 4 vsldoi8 <7,0,1,4>, <6,2,7,3>
+  3807973938U,	// <1,4,6,3>: Cost 4 vsldoi8 <u,3,1,4>, <6,3,4,5>
+  3298716880U,	// <1,4,6,4>: Cost 4 vmrghw <1,6,5,7>, <4,4,4,4>
+  2224680246U,	// <1,4,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+  3800011576U,	// <1,4,6,6>: Cost 4 vsldoi8 <7,0,1,4>, <6,6,6,6>
+  2726269774U,	// <1,4,6,7>: Cost 3 vsldoi8 <7,0,1,4>, <6,7,0,1>
+  2224680489U,	// <1,4,6,u>: Cost 3 vmrghw <1,6,1,7>, RHS
+  2726269948U,	// <1,4,7,0>: Cost 3 vsldoi8 <7,0,1,4>, <7,0,1,4>
+  3383444141U,	// <1,4,7,1>: Cost 4 vmrglw <4,6,1,7>, <0,u,4,1>
+  3805983961U,	// <1,4,7,2>: Cost 4 vsldoi8 <u,0,1,4>, <7,2,u,0>
+  3807974667U,	// <1,4,7,3>: Cost 4 vsldoi8 <u,3,1,4>, <7,3,4,5>
+  2736887142U,	// <1,4,7,4>: Cost 3 vsldoi8 <u,7,1,4>, <7,4,5,6>
+  3365528403U,	// <1,4,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,1,4,5>
+  3800012308U,	// <1,4,7,6>: Cost 4 vsldoi8 <7,0,1,4>, <7,6,7,0>
+  3800012396U,	// <1,4,7,7>: Cost 4 vsldoi8 <7,0,1,4>, <7,7,7,7>
+  2731579012U,	// <1,4,7,u>: Cost 3 vsldoi8 <7,u,1,4>, <7,u,1,4>
+  2624241766U,	// <1,4,u,0>: Cost 3 vsldoi4 <1,1,4,u>, LHS
+  2686457646U,	// <1,4,u,1>: Cost 3 vsldoi8 <0,3,1,4>, LHS
+  2618271398U,	// <1,4,u,2>: Cost 3 vsldoi4 <0,1,4,u>, <2,3,0,1>
+  2734233544U,	// <1,4,u,3>: Cost 3 vsldoi8 <u,3,1,4>, <u,3,1,4>
+  2689775679U,	// <1,4,u,4>: Cost 3 vsldoi8 <0,u,1,4>, <u,4,5,6>
+  1152355638U,	// <1,4,u,5>: Cost 2 vmrghw <1,u,3,0>, RHS
+  1683115561U,	// <1,4,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+  2736888076U,	// <1,4,u,7>: Cost 3 vsldoi8 <u,7,1,4>, <u,7,1,4>
+  1683115579U,	// <1,4,u,u>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+  2687123456U,	// <1,5,0,0>: Cost 3 vsldoi8 <0,4,1,5>, <0,0,0,0>
+  1613381734U,	// <1,5,0,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+  3759538352U,	// <1,5,0,2>: Cost 4 vsldoi8 <0,2,1,5>, <0,2,1,5>
+  3760865532U,	// <1,5,0,3>: Cost 4 vsldoi8 <0,4,1,5>, <0,3,1,0>
+  1613381970U,	// <1,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+  2687787427U,	// <1,5,0,5>: Cost 3 vsldoi8 <0,5,1,5>, <0,5,1,5>
+  2781777524U,	// <1,5,0,6>: Cost 3 vsldoi12 <5,0,6,1>, <5,0,6,1>
+  3733828717U,	// <1,5,0,7>: Cost 4 vsldoi4 <7,1,5,0>, <7,1,5,0>
+  1613382301U,	// <1,5,0,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+  2781040271U,	// <1,5,1,0>: Cost 3 vsldoi12 <4,u,5,1>, <5,1,0,1>
+  2687124276U,	// <1,5,1,1>: Cost 3 vsldoi8 <0,4,1,5>, <1,1,1,1>
+  2687124374U,	// <1,5,1,2>: Cost 3 vsldoi8 <0,4,1,5>, <1,2,3,0>
+  3760866297U,	// <1,5,1,3>: Cost 4 vsldoi8 <0,4,1,5>, <1,3,5,0>
+  2693096491U,	// <1,5,1,4>: Cost 3 vsldoi8 <1,4,1,5>, <1,4,1,5>
+  2687124591U,	// <1,5,1,5>: Cost 3 vsldoi8 <0,4,1,5>, <1,5,0,1>
+  2687124723U,	// <1,5,1,6>: Cost 3 vsldoi8 <0,4,1,5>, <1,6,5,7>
+  3360834803U,	// <1,5,1,7>: Cost 4 vmrglw <0,u,1,1>, <1,6,5,7>
+  2687124860U,	// <1,5,1,u>: Cost 3 vsldoi8 <0,4,1,5>, <1,u,3,0>
+  2323598792U,	// <1,5,2,0>: Cost 3 vmrglw <7,0,1,2>, <4,7,5,0>
+  2687125027U,	// <1,5,2,1>: Cost 3 vsldoi8 <0,4,1,5>, <2,1,3,5>
+  2687125096U,	// <1,5,2,2>: Cost 3 vsldoi8 <0,4,1,5>, <2,2,2,2>
+  2687125158U,	// <1,5,2,3>: Cost 3 vsldoi8 <0,4,1,5>, <2,3,0,1>
+  2642185188U,	// <1,5,2,4>: Cost 3 vsldoi4 <4,1,5,2>, <4,1,5,2>
+  2323598554U,	// <1,5,2,5>: Cost 3 vmrglw <7,0,1,2>, <4,4,5,5>
+  2687125434U,	// <1,5,2,6>: Cost 3 vsldoi8 <0,4,1,5>, <2,6,3,7>
+  3373450483U,	// <1,5,2,7>: Cost 4 vmrglw <3,0,1,2>, <1,6,5,7>
+  2687125563U,	// <1,5,2,u>: Cost 3 vsldoi8 <0,4,1,5>, <2,u,0,1>
+  2687125654U,	// <1,5,3,0>: Cost 3 vsldoi8 <0,4,1,5>, <3,0,1,2>
+  2312990234U,	// <1,5,3,1>: Cost 3 vmrglw <5,2,1,3>, <4,u,5,1>
+  3760867649U,	// <1,5,3,2>: Cost 4 vsldoi8 <0,4,1,5>, <3,2,2,2>
+  2687125916U,	// <1,5,3,3>: Cost 3 vsldoi8 <0,4,1,5>, <3,3,3,3>
+  2687126018U,	// <1,5,3,4>: Cost 3 vsldoi8 <0,4,1,5>, <3,4,5,6>
+  3386731738U,	// <1,5,3,5>: Cost 4 vmrglw <5,2,1,3>, <4,4,5,5>
+  3356871170U,	// <1,5,3,6>: Cost 4 vmrglw <0,2,1,3>, <3,4,5,6>
+  3808643779U,	// <1,5,3,7>: Cost 4 vsldoi8 <u,4,1,5>, <3,7,0,1>
+  2687126302U,	// <1,5,3,u>: Cost 3 vsldoi8 <0,4,1,5>, <3,u,1,2>
+  2642198630U,	// <1,5,4,0>: Cost 3 vsldoi4 <4,1,5,4>, LHS
+  2687126498U,	// <1,5,4,1>: Cost 3 vsldoi8 <0,4,1,5>, <4,1,5,0>
+  3715941923U,	// <1,5,4,2>: Cost 4 vsldoi4 <4,1,5,4>, <2,1,3,5>
+  3709970701U,	// <1,5,4,3>: Cost 4 vsldoi4 <3,1,5,4>, <3,1,5,4>
+  2687126736U,	// <1,5,4,4>: Cost 3 vsldoi8 <0,4,1,5>, <4,4,4,4>
+  1613385014U,	// <1,5,4,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+  2283801090U,	// <1,5,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+  3733861489U,	// <1,5,4,7>: Cost 4 vsldoi4 <7,1,5,4>, <7,1,5,4>
+  1613385257U,	// <1,5,4,u>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+  2624290918U,	// <1,5,5,0>: Cost 3 vsldoi4 <1,1,5,5>, LHS
+  2624291676U,	// <1,5,5,1>: Cost 3 vsldoi4 <1,1,5,5>, <1,1,5,5>
+  3698034211U,	// <1,5,5,2>: Cost 4 vsldoi4 <1,1,5,5>, <2,1,3,5>
+  2284471211U,	// <1,5,5,3>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,3>
+  2624294198U,	// <1,5,5,4>: Cost 3 vsldoi4 <1,1,5,5>, RHS
+  2284471132U,	// <1,5,5,5>: Cost 3 vmrglw <0,4,1,5>, <1,1,5,5>
+  2284472834U,	// <1,5,5,6>: Cost 3 vmrglw <0,4,1,5>, <3,4,5,6>
+  2284471539U,	// <1,5,5,7>: Cost 3 vmrglw <0,4,1,5>, <1,6,5,7>
+  2284471216U,	// <1,5,5,u>: Cost 3 vmrglw <0,4,1,5>, <1,2,5,u>
+  2785316900U,	// <1,5,6,0>: Cost 3 vsldoi12 <5,6,0,1>, <5,6,0,1>
+  2781040691U,	// <1,5,6,1>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,1,7>
+  2734903802U,	// <1,5,6,2>: Cost 3 vsldoi8 <u,4,1,5>, <6,2,7,3>
+  3848736834U,	// <1,5,6,3>: Cost 4 vsldoi12 <3,u,4,1>, <5,6,3,4>
+  3298717620U,	// <1,5,6,4>: Cost 4 vmrghw <1,6,5,7>, <5,4,5,6>
+  3298717700U,	// <1,5,6,5>: Cost 4 vmrghw <1,6,5,7>, <5,5,5,5>
+  2734904120U,	// <1,5,6,6>: Cost 3 vsldoi8 <u,4,1,5>, <6,6,6,6>
+  2781040738U,	// <1,5,6,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,7,0>
+  2781040747U,	// <1,5,6,u>: Cost 3 vsldoi12 <4,u,5,1>, <5,6,u,0>
+  2734904314U,	// <1,5,7,0>: Cost 3 vsldoi8 <u,4,1,5>, <7,0,1,2>
+  2315677210U,	// <1,5,7,1>: Cost 3 vmrglw <5,6,1,7>, <4,u,5,1>
+  3808646292U,	// <1,5,7,2>: Cost 4 vsldoi8 <u,4,1,5>, <7,2,0,3>
+  3808646371U,	// <1,5,7,3>: Cost 4 vsldoi8 <u,4,1,5>, <7,3,0,1>
+  2734904678U,	// <1,5,7,4>: Cost 3 vsldoi8 <u,4,1,5>, <7,4,5,6>
+  3389418714U,	// <1,5,7,5>: Cost 4 vmrglw <5,6,1,7>, <4,4,5,5>
+  3365528656U,	// <1,5,7,6>: Cost 4 vmrglw <1,6,1,7>, <1,4,5,6>
+  2734904940U,	// <1,5,7,7>: Cost 3 vsldoi8 <u,4,1,5>, <7,7,7,7>
+  2734904962U,	// <1,5,7,u>: Cost 3 vsldoi8 <u,4,1,5>, <7,u,1,2>
+  2687129299U,	// <1,5,u,0>: Cost 3 vsldoi8 <0,4,1,5>, <u,0,1,2>
+  1613387566U,	// <1,5,u,1>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+  2687129480U,	// <1,5,u,2>: Cost 3 vsldoi8 <0,4,1,5>, <u,2,3,3>
+  2687129532U,	// <1,5,u,3>: Cost 3 vsldoi8 <0,4,1,5>, <u,3,0,1>
+  1661163546U,	// <1,5,u,4>: Cost 2 vsldoi8 <u,4,1,5>, <u,4,1,5>
+  1613387930U,	// <1,5,u,5>: Cost 2 vsldoi8 <0,4,1,5>, RHS
+  2687129808U,	// <1,5,u,6>: Cost 3 vsldoi8 <0,4,1,5>, <u,6,3,7>
+  2781040900U,	// <1,5,u,7>: Cost 3 vsldoi12 <4,u,5,1>, <5,u,7,0>
+  1613388133U,	// <1,5,u,u>: Cost 2 vsldoi8 <0,4,1,5>, LHS
+  3759546368U,	// <1,6,0,0>: Cost 4 vsldoi8 <0,2,1,6>, <0,0,0,0>
+  2685804646U,	// <1,6,0,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+  2685804721U,	// <1,6,0,2>: Cost 3 vsldoi8 <0,2,1,6>, <0,2,1,6>
+  3861270834U,	// <1,6,0,3>: Cost 4 vsldoi12 <6,0,3,1>, <6,0,3,1>
+  3759546706U,	// <1,6,0,4>: Cost 4 vsldoi8 <0,2,1,6>, <0,4,1,5>
+  2687795620U,	// <1,6,0,5>: Cost 3 vsldoi8 <0,5,1,6>, <0,5,1,6>
+  2688459253U,	// <1,6,0,6>: Cost 3 vsldoi8 <0,6,1,6>, <0,6,1,6>
+  2283769142U,	// <1,6,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+  2685805213U,	// <1,6,0,u>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+  3698073702U,	// <1,6,1,0>: Cost 4 vsldoi4 <1,1,6,1>, LHS
+  3759547188U,	// <1,6,1,1>: Cost 4 vsldoi8 <0,2,1,6>, <1,1,1,1>
+  2221314554U,	// <1,6,1,2>: Cost 3 vmrghw <1,1,1,1>, <6,2,7,3>
+  3759547401U,	// <1,6,1,3>: Cost 4 vsldoi8 <0,2,1,6>, <1,3,6,7>
+  3698076982U,	// <1,6,1,4>: Cost 4 vsldoi4 <1,1,6,1>, RHS
+  3767510141U,	// <1,6,1,5>: Cost 4 vsldoi8 <1,5,1,6>, <1,5,1,6>
+  2334872376U,	// <1,6,1,6>: Cost 3 vmrglw <u,u,1,1>, <6,6,6,6>
+  1213353270U,	// <1,6,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+  1213353271U,	// <1,6,1,u>: Cost 2 vmrglw <0,u,1,1>, RHS
+  3704053862U,	// <1,6,2,0>: Cost 4 vsldoi4 <2,1,6,2>, LHS
+  3759547961U,	// <1,6,2,1>: Cost 4 vsldoi8 <0,2,1,6>, <2,1,6,0>
+  2222117370U,	// <1,6,2,2>: Cost 3 vmrghw <1,2,3,0>, <6,2,7,3>
+  3759548070U,	// <1,6,2,3>: Cost 4 vsldoi8 <0,2,1,6>, <2,3,0,1>
+  3704057142U,	// <1,6,2,4>: Cost 4 vsldoi4 <2,1,6,2>, RHS
+  3373451057U,	// <1,6,2,5>: Cost 4 vmrglw <3,0,1,2>, <2,4,6,5>
+  2685806522U,	// <1,6,2,6>: Cost 3 vsldoi8 <0,2,1,6>, <2,6,3,7>
+  1225968950U,	// <1,6,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+  1225968951U,	// <1,6,2,u>: Cost 2 vmrglw <3,0,1,2>, RHS
+  3759548566U,	// <1,6,3,0>: Cost 4 vsldoi8 <0,2,1,6>, <3,0,1,2>
+  3842912793U,	// <1,6,3,1>: Cost 4 vsldoi12 <2,u,6,1>, <6,3,1,7>
+  3759548774U,	// <1,6,3,2>: Cost 4 vsldoi8 <0,2,1,6>, <3,2,6,3>
+  3759548828U,	// <1,6,3,3>: Cost 4 vsldoi8 <0,2,1,6>, <3,3,3,3>
+  3759548930U,	// <1,6,3,4>: Cost 4 vsldoi8 <0,2,1,6>, <3,4,5,6>
+  3809315421U,	// <1,6,3,5>: Cost 4 vsldoi8 <u,5,1,6>, <3,5,6,7>
+  3386733368U,	// <1,6,3,6>: Cost 4 vmrglw <5,2,1,3>, <6,6,6,6>
+  2283130166U,	// <1,6,3,7>: Cost 3 vmrglw <0,2,1,3>, RHS
+  2283130167U,	// <1,6,3,u>: Cost 3 vmrglw <0,2,1,3>, RHS
+  3704070246U,	// <1,6,4,0>: Cost 4 vsldoi4 <2,1,6,4>, LHS
+  3862229608U,	// <1,6,4,1>: Cost 4 vsldoi12 <6,1,7,1>, <6,4,1,5>
+  3704071741U,	// <1,6,4,2>: Cost 4 vsldoi4 <2,1,6,4>, <2,1,6,4>
+  3721988610U,	// <1,6,4,3>: Cost 4 vsldoi4 <5,1,6,4>, <3,4,5,6>
+  3704073526U,	// <1,6,4,4>: Cost 4 vsldoi4 <2,1,6,4>, RHS
+  2685807926U,	// <1,6,4,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+  3865621141U,	// <1,6,4,6>: Cost 4 vsldoi12 <6,6,u,1>, <6,4,6,5>
+  2283801910U,	// <1,6,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+  2685808169U,	// <1,6,4,u>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+  3710050406U,	// <1,6,5,0>: Cost 4 vsldoi4 <3,1,6,5>, LHS
+  3710051571U,	// <1,6,5,1>: Cost 4 vsldoi4 <3,1,6,5>, <1,6,5,7>
+  3405989597U,	// <1,6,5,2>: Cost 4 vmrglw <u,4,1,5>, <2,3,6,2>
+  3358214502U,	// <1,6,5,3>: Cost 4 vmrglw <0,4,1,5>, <3,2,6,3>
+  3710053686U,	// <1,6,5,4>: Cost 4 vsldoi4 <3,1,6,5>, RHS
+  3721998025U,	// <1,6,5,5>: Cost 4 vsldoi4 <5,1,6,5>, <5,1,6,5>
+  2332250936U,	// <1,6,5,6>: Cost 3 vmrglw <u,4,1,5>, <6,6,6,6>
+  1210731830U,	// <1,6,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+  1210731831U,	// <1,6,5,u>: Cost 2 vmrglw <0,4,1,5>, RHS
+  2791289597U,	// <1,6,6,0>: Cost 3 vsldoi12 <6,6,0,1>, <6,6,0,1>
+  3698115430U,	// <1,6,6,1>: Cost 4 vsldoi4 <1,1,6,6>, <1,1,6,6>
+  3698116538U,	// <1,6,6,2>: Cost 4 vsldoi4 <1,1,6,6>, <2,6,3,7>
+  3356894132U,	// <1,6,6,3>: Cost 4 vmrglw <0,2,1,6>, <1,2,6,3>
+  3698117942U,	// <1,6,6,4>: Cost 4 vsldoi4 <1,1,6,6>, RHS
+  3722006218U,	// <1,6,6,5>: Cost 4 vsldoi4 <5,1,6,6>, <5,1,6,6>
+  2781041464U,	// <1,6,6,6>: Cost 3 vsldoi12 <4,u,5,1>, <6,6,6,6>
+  2283154742U,	// <1,6,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+  2283154743U,	// <1,6,6,u>: Cost 3 vmrglw <0,2,1,6>, RHS
+  1718211406U,	// <1,6,7,0>: Cost 2 vsldoi12 <6,7,0,1>, <6,7,0,1>
+  2792026967U,	// <1,6,7,1>: Cost 3 vsldoi12 <6,7,1,1>, <6,7,1,1>
+  2765411170U,	// <1,6,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <6,7,2,3>
+  3854783336U,	// <1,6,7,3>: Cost 4 vsldoi12 <4,u,5,1>, <6,7,3,0>
+  2781041526U,	// <1,6,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,7,4,5>
+  3365528664U,	// <1,6,7,5>: Cost 4 vmrglw <1,6,1,7>, <1,4,6,5>
+  2791953290U,	// <1,6,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <6,7,6,7>
+  2291789110U,	// <1,6,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+  1718801302U,	// <1,6,7,u>: Cost 2 vsldoi12 <6,7,u,1>, <6,7,u,1>
+  1718875039U,	// <1,6,u,0>: Cost 2 vsldoi12 <6,u,0,1>, <6,u,0,1>
+  2685810478U,	// <1,6,u,1>: Cost 3 vsldoi8 <0,2,1,6>, LHS
+  2792764337U,	// <1,6,u,2>: Cost 3 vsldoi12 <6,u,2,1>, <6,u,2,1>
+  3759552444U,	// <1,6,u,3>: Cost 4 vsldoi8 <0,2,1,6>, <u,3,0,1>
+  2781041607U,	// <1,6,u,4>: Cost 3 vsldoi12 <4,u,5,1>, <6,u,4,5>
+  2685810842U,	// <1,6,u,5>: Cost 3 vsldoi8 <0,2,1,6>, RHS
+  2689792208U,	// <1,6,u,6>: Cost 3 vsldoi8 <0,u,1,6>, <u,6,3,7>
+  1210756406U,	// <1,6,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+  1210756407U,	// <1,6,u,u>: Cost 2 vmrglw <0,4,1,u>, RHS
+  2793280496U,	// <1,7,0,0>: Cost 3 vsldoi12 <7,0,0,1>, <7,0,0,1>
+  2694439014U,	// <1,7,0,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+  3393343912U,	// <1,7,0,2>: Cost 4 vmrglw <6,3,1,0>, <6,1,7,2>
+  3397325306U,	// <1,7,0,3>: Cost 4 vmrglw <7,0,1,0>, <6,2,7,3>
+  2793575444U,	// <1,7,0,4>: Cost 3 vsldoi12 <7,0,4,1>, <7,0,4,1>
+  3722030797U,	// <1,7,0,5>: Cost 4 vsldoi4 <5,1,7,0>, <5,1,7,0>
+  2688467446U,	// <1,7,0,6>: Cost 3 vsldoi8 <0,6,1,7>, <0,6,1,7>
+  2689131079U,	// <1,7,0,7>: Cost 3 vsldoi8 <0,7,1,7>, <0,7,1,7>
+  2694439570U,	// <1,7,0,u>: Cost 3 vsldoi8 <1,6,1,7>, <0,u,1,1>
+  2654265354U,	// <1,7,1,0>: Cost 3 vsldoi4 <6,1,7,1>, <0,0,1,1>
+  2794017866U,	// <1,7,1,1>: Cost 3 vsldoi12 <7,1,1,1>, <7,1,1,1>
+  3768181639U,	// <1,7,1,2>: Cost 4 vsldoi8 <1,6,1,7>, <1,2,1,3>
+  2334872058U,	// <1,7,1,3>: Cost 3 vmrglw <u,u,1,1>, <6,2,7,3>
+  2654268726U,	// <1,7,1,4>: Cost 3 vsldoi4 <6,1,7,1>, RHS
+  3792069797U,	// <1,7,1,5>: Cost 4 vsldoi8 <5,6,1,7>, <1,5,6,1>
+  2694440143U,	// <1,7,1,6>: Cost 3 vsldoi8 <1,6,1,7>, <1,6,1,7>
+  2334872386U,	// <1,7,1,7>: Cost 3 vmrglw <u,u,1,1>, <6,6,7,7>
+  2695767409U,	// <1,7,1,u>: Cost 3 vsldoi8 <1,u,1,7>, <1,u,1,7>
+  2654273638U,	// <1,7,2,0>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+  2222117973U,	// <1,7,2,1>: Cost 3 vmrghw <1,2,3,0>, <7,1,2,3>
+  2299711912U,	// <1,7,2,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+  2654275734U,	// <1,7,2,3>: Cost 3 vsldoi4 <6,1,7,2>, <3,0,1,2>
+  2654276918U,	// <1,7,2,4>: Cost 3 vsldoi4 <6,1,7,2>, RHS
+  3385397675U,	// <1,7,2,5>: Cost 4 vmrglw <5,0,1,2>, <6,1,7,5>
+  2654278056U,	// <1,7,2,6>: Cost 3 vsldoi4 <6,1,7,2>, <6,1,7,2>
+  2323599627U,	// <1,7,2,7>: Cost 3 vmrglw <7,0,1,2>, <5,u,7,7>
+  2654279470U,	// <1,7,2,u>: Cost 3 vsldoi4 <6,1,7,2>, LHS
+  2795271395U,	// <1,7,3,0>: Cost 3 vsldoi12 <7,3,0,1>, <7,3,0,1>
+  3768183059U,	// <1,7,3,1>: Cost 4 vsldoi8 <1,6,1,7>, <3,1,6,1>
+  3728025254U,	// <1,7,3,2>: Cost 4 vsldoi4 <6,1,7,3>, <2,3,0,1>
+  3768183196U,	// <1,7,3,3>: Cost 4 vsldoi8 <1,6,1,7>, <3,3,3,3>
+  3768183298U,	// <1,7,3,4>: Cost 4 vsldoi8 <1,6,1,7>, <3,4,5,6>
+  3792071255U,	// <1,7,3,5>: Cost 4 vsldoi8 <5,6,1,7>, <3,5,6,1>
+  3780127361U,	// <1,7,3,6>: Cost 4 vsldoi8 <3,6,1,7>, <3,6,1,7>
+  3847779617U,	// <1,7,3,7>: Cost 4 vsldoi12 <3,7,0,1>, <7,3,7,0>
+  2795861291U,	// <1,7,3,u>: Cost 3 vsldoi12 <7,3,u,1>, <7,3,u,1>
+  2795935028U,	// <1,7,4,0>: Cost 3 vsldoi12 <7,4,0,1>, <7,4,0,1>
+  3728032975U,	// <1,7,4,1>: Cost 4 vsldoi4 <6,1,7,4>, <1,6,1,7>
+  3839153480U,	// <1,7,4,2>: Cost 4 vsldoi12 <2,3,0,1>, <7,4,2,3>
+  3397358074U,	// <1,7,4,3>: Cost 4 vmrglw <7,0,1,4>, <6,2,7,3>
+  3854783835U,	// <1,7,4,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,4,4,4>
+  2694442294U,	// <1,7,4,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+  3786100058U,	// <1,7,4,6>: Cost 4 vsldoi8 <4,6,1,7>, <4,6,1,7>
+  3722065254U,	// <1,7,4,7>: Cost 4 vsldoi4 <5,1,7,4>, <7,4,5,6>
+  2694442537U,	// <1,7,4,u>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+  2654298214U,	// <1,7,5,0>: Cost 3 vsldoi4 <6,1,7,5>, LHS
+  3854783893U,	// <1,7,5,1>: Cost 4 vsldoi12 <4,u,5,1>, <7,5,1,u>
+  3710126010U,	// <1,7,5,2>: Cost 4 vsldoi4 <3,1,7,5>, <2,6,3,7>
+  2332250618U,	// <1,7,5,3>: Cost 3 vmrglw <u,4,1,5>, <6,2,7,3>
+  2654301494U,	// <1,7,5,4>: Cost 3 vsldoi4 <6,1,7,5>, RHS
+  2284474795U,	// <1,7,5,5>: Cost 3 vmrglw <0,4,1,5>, <6,1,7,5>
+  2718330931U,	// <1,7,5,6>: Cost 3 vsldoi8 <5,6,1,7>, <5,6,1,7>
+  2332250946U,	// <1,7,5,7>: Cost 3 vmrglw <u,4,1,5>, <6,6,7,7>
+  2719658197U,	// <1,7,5,u>: Cost 3 vsldoi8 <5,u,1,7>, <5,u,1,7>
+  2332921954U,	// <1,7,6,0>: Cost 3 vmrglw <u,5,1,6>, <5,6,7,0>
+  3768185254U,	// <1,7,6,1>: Cost 4 vsldoi8 <1,6,1,7>, <6,1,7,0>
+  3710134202U,	// <1,7,6,2>: Cost 4 vsldoi4 <3,1,7,6>, <2,6,3,7>
+  3710134561U,	// <1,7,6,3>: Cost 4 vsldoi4 <3,1,7,6>, <3,1,7,6>
+  3710135606U,	// <1,7,6,4>: Cost 4 vsldoi4 <3,1,7,6>, RHS
+  3864884745U,	// <1,7,6,5>: Cost 4 vsldoi12 <6,5,7,1>, <7,6,5,7>
+  3854784017U,	// <1,7,6,6>: Cost 4 vsldoi12 <4,u,5,1>, <7,6,6,6>
+  2791953940U,	// <1,7,6,7>: Cost 3 vsldoi12 <6,7,0,1>, <7,6,7,0>
+  2792617501U,	// <1,7,6,u>: Cost 3 vsldoi12 <6,u,0,1>, <7,6,u,0>
+  2797925927U,	// <1,7,7,0>: Cost 3 vsldoi12 <7,7,0,1>, <7,7,0,1>
+  3365528426U,	// <1,7,7,1>: Cost 4 vmrglw <1,6,1,7>, <1,1,7,1>
+  3728058022U,	// <1,7,7,2>: Cost 4 vsldoi4 <6,1,7,7>, <2,3,0,1>
+  3365528509U,	// <1,7,7,3>: Cost 4 vmrglw <1,6,1,7>, <1,2,7,3>
+  3854784079U,	// <1,7,7,4>: Cost 4 vsldoi12 <4,u,5,1>, <7,7,4,5>
+  3722088148U,	// <1,7,7,5>: Cost 4 vsldoi4 <5,1,7,7>, <5,1,7,7>
+  3728060845U,	// <1,7,7,6>: Cost 4 vsldoi4 <6,1,7,7>, <6,1,7,7>
+  2781042284U,	// <1,7,7,7>: Cost 3 vsldoi12 <4,u,5,1>, <7,7,7,7>
+  2798515823U,	// <1,7,7,u>: Cost 3 vsldoi12 <7,7,u,1>, <7,7,u,1>
+  2654322705U,	// <1,7,u,0>: Cost 3 vsldoi4 <6,1,7,u>, <0,0,1,u>
+  2694444846U,	// <1,7,u,1>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+  2299711912U,	// <1,7,u,2>: Cost 3 vmrglw <3,0,1,2>, <6,1,7,2>
+  2323649018U,	// <1,7,u,3>: Cost 3 vmrglw <7,0,1,u>, <6,2,7,3>
+  2654326070U,	// <1,7,u,4>: Cost 3 vsldoi4 <6,1,7,u>, RHS
+  2694445210U,	// <1,7,u,5>: Cost 3 vsldoi8 <1,6,1,7>, RHS
+  2654327214U,	// <1,7,u,6>: Cost 3 vsldoi4 <6,1,7,u>, <6,1,7,u>
+  2323649346U,	// <1,7,u,7>: Cost 3 vmrglw <7,0,1,u>, <6,6,7,7>
+  2694445413U,	// <1,7,u,u>: Cost 3 vsldoi8 <1,6,1,7>, LHS
+  1610752017U,	// <1,u,0,0>: Cost 2 vsldoi8 <0,0,1,u>, <0,0,1,u>
+  1613406310U,	// <1,u,0,1>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+  2685821107U,	// <1,u,0,2>: Cost 3 vsldoi8 <0,2,1,u>, <0,2,1,u>
+  2283765916U,	// <1,u,0,3>: Cost 3 vmrglw <0,3,1,0>, LHS
+  1613406549U,	// <1,u,0,4>: Cost 2 vsldoi8 <0,4,1,u>, <0,4,1,u>
+  1725880054U,	// <1,u,0,5>: Cost 2 vsldoi12 <u,0,5,1>, <u,0,5,1>
+  2688475639U,	// <1,u,0,6>: Cost 3 vsldoi8 <0,6,1,u>, <0,6,1,u>
+  2283769160U,	// <1,u,0,7>: Cost 3 vmrglw <0,3,1,0>, RHS
+  1613406877U,	// <1,u,0,u>: Cost 2 vsldoi8 <0,4,1,u>, LHS
+  1550221414U,	// <1,u,1,0>: Cost 2 vsldoi4 <1,1,1,1>, LHS
+  269271142U,	// <1,u,1,1>: Cost 1 vspltisw1 LHS
+  1683117870U,	// <1,u,1,2>: Cost 2 vsldoi12 <0,u,1,1>, LHS
+  1213350044U,	// <1,u,1,3>: Cost 2 vmrglw <0,u,1,1>, LHS
+  1550224694U,	// <1,u,1,4>: Cost 2 vsldoi4 <1,1,1,1>, RHS
+  1147574426U,	// <1,u,1,5>: Cost 2 vmrghw <1,1,1,1>, RHS
+  2687149326U,	// <1,u,1,6>: Cost 3 vsldoi8 <0,4,1,u>, <1,6,u,7>
+  1213353288U,	// <1,u,1,7>: Cost 2 vmrglw <0,u,1,1>, RHS
+  269271142U,	// <1,u,1,u>: Cost 1 vspltisw1 LHS
+  2222118611U,	// <1,u,2,0>: Cost 3 vmrghw <1,2,3,0>, <u,0,1,2>
+  1148376878U,	// <1,u,2,1>: Cost 2 vmrghw <1,2,3,0>, LHS
+  1148371862U,	// <1,u,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+  1225965724U,	// <1,u,2,3>: Cost 2 vmrglw <3,0,1,2>, LHS
+  2222118975U,	// <1,u,2,4>: Cost 3 vmrghw <1,2,3,0>, <u,4,5,6>
+  1148377242U,	// <1,u,2,5>: Cost 2 vmrghw <1,2,3,0>, RHS
+  2687150010U,	// <1,u,2,6>: Cost 3 vsldoi8 <0,4,1,u>, <2,6,3,7>
+  1225968968U,	// <1,u,2,7>: Cost 2 vmrglw <3,0,1,2>, RHS
+  1148377445U,	// <1,u,2,u>: Cost 2 vmrghw <1,2,3,0>, LHS
+  471040156U,	// <1,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+  1544782644U,	// <1,u,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+  1544783464U,	// <1,u,3,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+  1544784022U,	// <1,u,3,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+  471043382U,	// <1,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+  1592561668U,	// <1,u,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+  1592562170U,	// <1,u,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+  1592562682U,	// <1,u,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+  471045934U,	// <1,u,3,u>: Cost 1 vsldoi4 LHS, LHS
+  2708384629U,	// <1,u,4,0>: Cost 3 vsldoi8 <4,0,1,u>, <4,0,1,u>
+  2687151101U,	// <1,u,4,1>: Cost 3 vsldoi8 <0,4,1,u>, <4,1,u,0>
+  2223408022U,	// <1,u,4,2>: Cost 3 vmrghw <1,4,2,5>, <1,2,3,0>
+  2283798684U,	// <1,u,4,3>: Cost 3 vmrglw <0,3,1,4>, LHS
+  2642422785U,	// <1,u,4,4>: Cost 3 vsldoi4 <4,1,u,4>, <4,1,u,4>
+  1613409590U,	// <1,u,4,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+  2283801090U,	// <1,u,4,6>: Cost 3 vmrglw <0,3,1,4>, <3,4,5,6>
+  2283801928U,	// <1,u,4,7>: Cost 3 vmrglw <0,3,1,4>, RHS
+  1613409833U,	// <1,u,4,u>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+  2284471235U,	// <1,u,5,0>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,0>
+  2284472046U,	// <1,u,5,1>: Cost 3 vmrglw <0,4,1,5>, <2,3,u,1>
+  2284472533U,	// <1,u,5,2>: Cost 3 vmrglw <0,4,1,5>, <3,0,u,2>
+  1210728604U,	// <1,u,5,3>: Cost 2 vmrglw <0,4,1,5>, LHS
+  2284471239U,	// <1,u,5,4>: Cost 3 vmrglw <0,4,1,5>, <1,2,u,4>
+  1210728786U,	// <1,u,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+  1683118234U,	// <1,u,5,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+  1210731848U,	// <1,u,5,7>: Cost 2 vmrglw <0,4,1,5>, RHS
+  1210728609U,	// <1,u,5,u>: Cost 2 vmrglw <0,4,1,5>, LHS
+  2720330023U,	// <1,u,6,0>: Cost 3 vsldoi8 <6,0,1,u>, <6,0,1,u>
+  2757376190U,	// <1,u,6,1>: Cost 3 vsldoi12 <0,u,u,1>, <u,6,1,7>
+  2726302202U,	// <1,u,6,2>: Cost 3 vsldoi8 <7,0,1,u>, <6,2,7,3>
+  2283151516U,	// <1,u,6,3>: Cost 3 vmrglw <0,2,1,6>, LHS
+  2224972114U,	// <1,u,6,4>: Cost 3 vmrghw <1,6,5,7>, <0,4,1,5>
+  2224683162U,	// <1,u,6,5>: Cost 3 vmrghw <1,6,1,7>, RHS
+  2726302520U,	// <1,u,6,6>: Cost 3 vsldoi8 <7,0,1,u>, <6,6,6,6>
+  2283154760U,	// <1,u,6,7>: Cost 3 vmrglw <0,2,1,6>, RHS
+  2283151521U,	// <1,u,6,u>: Cost 3 vmrglw <0,2,1,6>, LHS
+  1652560896U,	// <1,u,7,0>: Cost 2 vsldoi8 <7,0,1,u>, <7,0,1,u>
+  2333590225U,	// <1,u,7,1>: Cost 3 vmrglw <u,6,1,7>, <0,u,u,1>
+  2765412628U,	// <1,u,7,2>: Cost 3 vsldoi12 <2,3,0,1>, <u,7,2,3>
+  2291785884U,	// <1,u,7,3>: Cost 3 vmrglw <1,6,1,7>, LHS
+  2781042984U,	// <1,u,7,4>: Cost 3 vsldoi12 <4,u,5,1>, <u,7,4,5>
+  3365527953U,	// <1,u,7,5>: Cost 4 vmrglw <1,6,1,7>, <0,4,u,5>
+  2791954748U,	// <1,u,7,6>: Cost 3 vsldoi12 <6,7,0,1>, <u,7,6,7>
+  2291789128U,	// <1,u,7,7>: Cost 3 vmrglw <1,6,1,7>, RHS
+  1657869960U,	// <1,u,7,u>: Cost 2 vsldoi8 <7,u,1,u>, <7,u,1,u>
+  471081121U,	// <1,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+  269271142U,	// <1,u,u,1>: Cost 1 vspltisw1 LHS
+  1544824424U,	// <1,u,u,2>: Cost 2 vsldoi4 LHS, <2,2,2,2>
+  1544824982U,	// <1,u,u,3>: Cost 2 vsldoi4 LHS, <3,0,1,2>
+  471084342U,	// <1,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+  1613412506U,	// <1,u,u,5>: Cost 2 vsldoi8 <0,4,1,u>, RHS
+  1683118477U,	// <1,u,u,6>: Cost 2 vsldoi12 <0,u,1,1>, RHS
+  1210756424U,	// <1,u,u,7>: Cost 2 vmrglw <0,4,1,u>, RHS
+  471086894U,	// <1,u,u,u>: Cost 1 vsldoi4 LHS, LHS
+  2226757632U,	// <2,0,0,0>: Cost 3 vmrghw <2,0,3,0>, <0,0,0,0>
+  2226757734U,	// <2,0,0,1>: Cost 3 vmrghw <2,0,3,0>, LHS
+  3826622483U,	// <2,0,0,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,0,2,1>
+  3843211292U,	// <2,0,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,0,3,1>
+  3300499794U,	// <2,0,0,4>: Cost 4 vmrghw <2,0,3,0>, <0,4,1,5>
+  3356256724U,	// <2,0,0,5>: Cost 4 vmrglw <0,1,2,0>, <3,4,0,5>
+  3825664056U,	// <2,0,0,6>: Cost 4 vsldoi12 <0,0,6,2>, <0,0,6,2>
+  3762889289U,	// <2,0,0,7>: Cost 4 vsldoi8 <0,7,2,0>, <0,7,2,0>
+  2226758301U,	// <2,0,0,u>: Cost 3 vmrghw <2,0,3,0>, LHS
+  2227429386U,	// <2,0,1,0>: Cost 3 vmrghw <2,1,3,1>, <0,0,1,1>
+  2227429478U,	// <2,0,1,1>: Cost 3 vmrghw <2,1,3,1>, LHS
+  1691156582U,	// <2,0,1,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+  2666358997U,	// <2,0,1,3>: Cost 3 vsldoi4 <u,2,0,1>, <3,0,u,2>
+  2227462482U,	// <2,0,1,4>: Cost 3 vmrghw <2,1,3,5>, <0,4,1,5>
+  3722186464U,	// <2,0,1,5>: Cost 4 vsldoi4 <5,2,0,1>, <5,2,0,1>
+  3867099278U,	// <2,0,1,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,1,6,7>
+  3366881912U,	// <2,0,1,7>: Cost 4 vmrglw <1,u,2,1>, <3,6,0,7>
+  1691156636U,	// <2,0,1,u>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+  2228027392U,	// <2,0,2,0>: Cost 3 vmrghw <2,2,2,2>, <0,0,0,0>
+  1154285670U,	// <2,0,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+  2228027565U,	// <2,0,2,2>: Cost 3 vmrghw <2,2,2,2>, <0,2,1,2>
+  3301769468U,	// <2,0,2,3>: Cost 4 vmrghw <2,2,2,2>, <0,3,1,0>
+  2228027730U,	// <2,0,2,4>: Cost 3 vmrghw <2,2,2,2>, <0,4,1,5>
+  3301769635U,	// <2,0,2,5>: Cost 4 vmrghw <2,2,2,2>, <0,5,1,5>
+  3780806586U,	// <2,0,2,6>: Cost 4 vsldoi8 <3,7,2,0>, <2,6,3,7>
+  3368880760U,	// <2,0,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,0,7>
+  1154286237U,	// <2,0,2,u>: Cost 2 vmrghw <2,2,2,2>, LHS
+  1213440000U,	// <2,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+  1213441702U,	// <2,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+  2228535470U,	// <2,0,3,2>: Cost 3 vmrghw <2,3,0,1>, <0,2,1,3>
+  2636515632U,	// <2,0,3,3>: Cost 3 vsldoi4 <3,2,0,3>, <3,2,0,3>
+  2287182962U,	// <2,0,3,4>: Cost 3 vmrglw LHS, <1,5,0,4>
+  2660405346U,	// <2,0,3,5>: Cost 3 vsldoi4 <7,2,0,3>, <5,6,7,0>
+  2228535798U,	// <2,0,3,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+  2660406420U,	// <2,0,3,7>: Cost 3 vsldoi4 <7,2,0,3>, <7,2,0,3>
+  1213441709U,	// <2,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+  3368894464U,	// <2,0,4,0>: Cost 4 vmrglw <2,2,2,4>, <0,0,0,0>
+  2764898642U,	// <2,0,4,1>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,1,5>
+  3826622811U,	// <2,0,4,2>: Cost 4 vsldoi12 <0,2,1,2>, <0,4,2,5>
+  3843211620U,	// <2,0,4,3>: Cost 4 vsldoi12 <3,0,1,2>, <0,4,3,5>
+  3838640493U,	// <2,0,4,4>: Cost 4 vsldoi12 <2,2,2,2>, <0,4,4,5>
+  2732944694U,	// <2,0,4,5>: Cost 3 vsldoi8 <u,1,2,0>, RHS
+  3797396857U,	// <2,0,4,6>: Cost 4 vsldoi8 <6,5,2,0>, <4,6,5,2>
+  3867099528U,	// <2,0,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <0,4,7,5>
+  2764898705U,	// <2,0,4,u>: Cost 3 vsldoi12 <2,2,2,2>, <0,4,u,5>
+  3364257792U,	// <2,0,5,0>: Cost 4 vmrglw <1,4,2,5>, <0,0,0,0>
+  2230124646U,	// <2,0,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+  3304235184U,	// <2,0,5,2>: Cost 4 vmrghw <2,5,u,6>, <0,2,1,5>
+  3364260144U,	// <2,0,5,3>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,3>
+  3303817554U,	// <2,0,5,4>: Cost 4 vmrghw <2,5,3,0>, <0,4,1,5>
+  3364260146U,	// <2,0,5,5>: Cost 4 vmrglw <1,4,2,5>, <3,2,0,5>
+  3867099602U,	// <2,0,5,6>: Cost 4 vsldoi12 <7,0,1,2>, <0,5,6,7>
+  3364260472U,	// <2,0,5,7>: Cost 4 vmrglw <1,4,2,5>, <3,6,0,7>
+  2230125213U,	// <2,0,5,u>: Cost 3 vmrghw <2,5,3,6>, LHS
+  2230796288U,	// <2,0,6,0>: Cost 3 vmrghw <2,6,3,7>, <0,0,0,0>
+  1157054566U,	// <2,0,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+  2230796465U,	// <2,0,6,2>: Cost 3 vmrghw <2,6,3,7>, <0,2,1,6>
+  3304538364U,	// <2,0,6,3>: Cost 4 vmrghw <2,6,3,7>, <0,3,1,0>
+  2230796626U,	// <2,0,6,4>: Cost 3 vmrghw <2,6,3,7>, <0,4,1,5>
+  3797398205U,	// <2,0,6,5>: Cost 4 vsldoi8 <6,5,2,0>, <6,5,2,0>
+  3304538614U,	// <2,0,6,6>: Cost 4 vmrghw <2,6,3,7>, <0,6,1,7>
+  3798725471U,	// <2,0,6,7>: Cost 4 vsldoi8 <6,7,2,0>, <6,7,2,0>
+  1157055133U,	// <2,0,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+  3371573248U,	// <2,0,7,0>: Cost 4 vmrglw <2,6,2,7>, <0,0,0,0>
+  2231189606U,	// <2,0,7,1>: Cost 3 vmrghw <2,7,0,1>, LHS
+  3801380003U,	// <2,0,7,2>: Cost 4 vsldoi8 <7,2,2,0>, <7,2,2,0>
+  3802043636U,	// <2,0,7,3>: Cost 4 vsldoi8 <7,3,2,0>, <7,3,2,0>
+  3806688614U,	// <2,0,7,4>: Cost 4 vsldoi8 <u,1,2,0>, <7,4,5,6>
+  3356317308U,	// <2,0,7,5>: Cost 4 vmrglw <0,1,2,7>, <7,u,0,5>
+  3804034535U,	// <2,0,7,6>: Cost 4 vsldoi8 <7,6,2,0>, <7,6,2,0>
+  3806688876U,	// <2,0,7,7>: Cost 4 vsldoi8 <u,1,2,0>, <7,7,7,7>
+  2231190173U,	// <2,0,7,u>: Cost 3 vmrghw <2,7,0,1>, LHS
+  1208836096U,	// <2,0,u,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+  1208837798U,	// <2,0,u,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+  1691157149U,	// <2,0,u,2>: Cost 2 vsldoi12 <2,2,2,2>, LHS
+  2636556597U,	// <2,0,u,3>: Cost 3 vsldoi4 <3,2,0,u>, <3,2,0,u>
+  2282579625U,	// <2,0,u,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+  2660446306U,	// <2,0,u,5>: Cost 3 vsldoi4 <7,2,0,u>, <5,6,7,0>
+  2228535798U,	// <2,0,u,6>: Cost 3 vmrghw <2,3,0,1>, <0,6,1,7>
+  2660447385U,	// <2,0,u,7>: Cost 3 vsldoi4 <7,2,0,u>, <7,2,0,u>
+  1208837805U,	// <2,0,u,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+  3692388523U,	// <2,1,0,0>: Cost 4 vsldoi4 <0,2,1,0>, <0,2,1,0>
+  2757526244U,	// <2,1,0,1>: Cost 3 vsldoi12 <1,0,1,2>, <1,0,1,2>
+  2330290974U,	// <2,1,0,2>: Cost 3 vmrglw <u,1,2,0>, <3,u,1,2>
+  3843212020U,	// <2,1,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <1,0,3,0>
+  3692391734U,	// <2,1,0,4>: Cost 4 vsldoi4 <0,2,1,0>, RHS
+  3300533362U,	// <2,1,0,5>: Cost 4 vmrghw <2,0,3,4>, <1,5,0,4>
+  3794084337U,	// <2,1,0,6>: Cost 4 vsldoi8 <6,0,2,1>, <0,6,1,2>
+  3374170614U,	// <2,1,0,7>: Cost 5 vmrglw <3,1,2,0>, <0,6,1,7>
+  2758042403U,	// <2,1,0,u>: Cost 3 vsldoi12 <1,0,u,2>, <1,0,u,2>
+  2690482924U,	// <2,1,1,0>: Cost 3 vsldoi8 <1,0,2,1>, <1,0,2,1>
+  2764899124U,	// <2,1,1,1>: Cost 3 vsldoi12 <2,2,2,2>, <1,1,1,1>
+  2695791510U,	// <2,1,1,2>: Cost 3 vsldoi8 <1,u,2,1>, <1,2,3,0>
+  3362235271U,	// <2,1,1,3>: Cost 4 vmrglw <1,1,2,1>, <1,2,1,3>
+  3692399926U,	// <2,1,1,4>: Cost 4 vsldoi4 <0,2,1,1>, RHS
+  3832226649U,	// <2,1,1,5>: Cost 4 vsldoi12 <1,1,5,2>, <1,1,5,2>
+  3301205235U,	// <2,1,1,6>: Cost 4 vmrghw <2,1,3,5>, <1,6,5,7>
+  3768870179U,	// <2,1,1,7>: Cost 4 vsldoi8 <1,7,2,1>, <1,7,2,1>
+  2695791988U,	// <2,1,1,u>: Cost 3 vsldoi8 <1,u,2,1>, <1,u,2,1>
+  2618663085U,	// <2,1,2,0>: Cost 3 vsldoi4 <0,2,1,2>, <0,2,1,2>
+  2228028212U,	// <2,1,2,1>: Cost 3 vmrghw <2,2,2,2>, <1,1,1,1>
+  2618664552U,	// <2,1,2,2>: Cost 3 vsldoi4 <0,2,1,2>, <2,2,2,2>
+  2759000984U,	// <2,1,2,3>: Cost 3 vsldoi12 <1,2,3,2>, <1,2,3,2>
+  2618666294U,	// <2,1,2,4>: Cost 3 vsldoi4 <0,2,1,2>, RHS
+  2295136594U,	// <2,1,2,5>: Cost 3 vmrglw <2,2,2,2>, <0,4,1,5>
+  3769534376U,	// <2,1,2,6>: Cost 4 vsldoi8 <1,u,2,1>, <2,6,1,7>
+  2793358266U,	// <2,1,2,7>: Cost 3 vsldoi12 <7,0,1,2>, <1,2,7,0>
+  2618668846U,	// <2,1,2,u>: Cost 3 vsldoi4 <0,2,1,2>, LHS
+  2282536969U,	// <2,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+  1208795146U,	// <2,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+  1213442198U,	// <2,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+  2287181998U,	// <2,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+  2618674486U,	// <2,1,3,4>: Cost 3 vsldoi4 <0,2,1,3>, RHS
+  1208795474U,	// <2,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+  2287182001U,	// <2,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+  2287183055U,	// <2,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+  1208795153U,	// <2,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+  3692421295U,	// <2,1,4,0>: Cost 4 vsldoi4 <0,2,1,4>, <0,2,1,4>
+  3838641195U,	// <2,1,4,1>: Cost 4 vsldoi12 <2,2,2,2>, <1,4,1,5>
+  2330323742U,	// <2,1,4,2>: Cost 3 vmrglw <u,1,2,4>, <3,u,1,2>
+  3692423318U,	// <2,1,4,3>: Cost 5 vsldoi4 <0,2,1,4>, <3,0,1,2>
+  3692424502U,	// <2,1,4,4>: Cost 4 vsldoi4 <0,2,1,4>, RHS
+  2695793974U,	// <2,1,4,5>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+  3799395705U,	// <2,1,4,6>: Cost 4 vsldoi8 <6,u,2,1>, <4,6,5,2>
+  3368895695U,	// <2,1,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,1,7>
+  2695794217U,	// <2,1,4,u>: Cost 3 vsldoi8 <1,u,2,1>, RHS
+  3692429488U,	// <2,1,5,0>: Cost 4 vsldoi4 <0,2,1,5>, <0,2,1,5>
+  3364257802U,	// <2,1,5,1>: Cost 4 vmrglw <1,4,2,5>, <0,0,1,1>
+  3692431253U,	// <2,1,5,2>: Cost 4 vsldoi4 <0,2,1,5>, <2,5,u,6>
+  3692431874U,	// <2,1,5,3>: Cost 4 vsldoi4 <0,2,1,5>, <3,4,5,6>
+  3692432694U,	// <2,1,5,4>: Cost 4 vsldoi4 <0,2,1,5>, RHS
+  3364258130U,	// <2,1,5,5>: Cost 4 vmrglw <1,4,2,5>, <0,4,1,5>
+  3303875827U,	// <2,1,5,6>: Cost 4 vmrghw <2,5,3,7>, <1,6,5,7>
+  3867100333U,	// <2,1,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <1,5,7,0>
+  3692435246U,	// <2,1,5,u>: Cost 4 vsldoi4 <0,2,1,5>, LHS
+  2618695857U,	// <2,1,6,0>: Cost 3 vsldoi4 <0,2,1,6>, <0,2,1,6>
+  2230797108U,	// <2,1,6,1>: Cost 3 vmrghw <2,6,3,7>, <1,1,1,1>
+  2618697658U,	// <2,1,6,2>: Cost 3 vsldoi4 <0,2,1,6>, <2,6,3,7>
+  3692439702U,	// <2,1,6,3>: Cost 4 vsldoi4 <0,2,1,6>, <3,0,1,2>
+  2618699062U,	// <2,1,6,4>: Cost 3 vsldoi4 <0,2,1,6>, RHS
+  3364929874U,	// <2,1,6,5>: Cost 4 vmrglw <1,5,2,6>, <0,4,1,5>
+  3692442424U,	// <2,1,6,6>: Cost 4 vsldoi4 <0,2,1,6>, <6,6,6,6>
+  3798733664U,	// <2,1,6,7>: Cost 4 vsldoi8 <6,7,2,1>, <6,7,2,1>
+  2618701614U,	// <2,1,6,u>: Cost 3 vsldoi4 <0,2,1,6>, LHS
+  3799397370U,	// <2,1,7,0>: Cost 4 vsldoi8 <6,u,2,1>, <7,0,1,2>
+  3371573258U,	// <2,1,7,1>: Cost 4 vmrglw <2,6,2,7>, <0,0,1,1>
+  2330351234U,	// <2,1,7,2>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+  3799397658U,	// <2,1,7,3>: Cost 4 vsldoi8 <6,u,2,1>, <7,3,6,2>
+  3799397734U,	// <2,1,7,4>: Cost 4 vsldoi8 <6,u,2,1>, <7,4,5,6>
+  3371573586U,	// <2,1,7,5>: Cost 4 vmrglw <2,6,2,7>, <0,4,1,5>
+  3799397870U,	// <2,1,7,6>: Cost 4 vsldoi8 <6,u,2,1>, <7,6,2,7>
+  3799397956U,	// <2,1,7,7>: Cost 4 vsldoi8 <6,u,2,1>, <7,7,3,3>
+  2330351234U,	// <2,1,7,u>: Cost 3 vmrglw <u,1,2,7>, <7,u,1,2>
+  2282577929U,	// <2,1,u,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+  1208836106U,	// <2,1,u,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+  1208838294U,	// <2,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+  2282578094U,	// <2,1,u,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+  2282577933U,	// <2,1,u,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+  1208836434U,	// <2,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+  2282578097U,	// <2,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+  2287224015U,	// <2,1,u,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+  1208836113U,	// <2,1,u,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+  2226759117U,	// <2,2,0,0>: Cost 3 vmrghw <2,0,3,0>, <2,0,3,0>
+  1624047718U,	// <2,2,0,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+  2697789613U,	// <2,2,0,2>: Cost 3 vsldoi8 <2,2,2,2>, <0,2,1,2>
+  2226767526U,	// <2,2,0,3>: Cost 3 vmrghw <2,0,3,1>, <2,3,0,1>
+  2697789778U,	// <2,2,0,4>: Cost 3 vsldoi8 <2,2,2,2>, <0,4,1,5>
+  3300657000U,	// <2,2,0,5>: Cost 4 vmrghw <2,0,5,1>, <2,5,3,6>
+  2226988986U,	// <2,2,0,6>: Cost 3 vmrghw <2,0,6,1>, <2,6,3,7>
+  3734271139U,	// <2,2,0,7>: Cost 4 vsldoi4 <7,2,2,0>, <7,2,2,0>
+  1624048285U,	// <2,2,0,u>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+  3831268868U,	// <2,2,1,0>: Cost 4 vsldoi12 <1,0,1,2>, <2,1,0,1>
+  2293138804U,	// <2,2,1,1>: Cost 3 vmrglw <1,u,2,1>, <1,u,2,1>
+  2697790358U,	// <2,2,1,2>: Cost 3 vsldoi8 <2,2,2,2>, <1,2,3,0>
+  2293137510U,	// <2,2,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+  3771532331U,	// <2,2,1,4>: Cost 4 vsldoi8 <2,2,2,2>, <1,4,1,5>
+  3767551106U,	// <2,2,1,5>: Cost 4 vsldoi8 <1,5,2,2>, <1,5,2,2>
+  3301173178U,	// <2,2,1,6>: Cost 4 vmrghw <2,1,3,1>, <2,6,3,7>
+  3372853169U,	// <2,2,1,7>: Cost 4 vmrglw <2,u,2,1>, <2,6,2,7>
+  2293137515U,	// <2,2,1,u>: Cost 3 vmrglw <1,u,2,1>, LHS
+  1556938854U,	// <2,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+  2295137733U,	// <2,2,2,1>: Cost 3 vmrglw <2,2,2,2>, <2,0,2,1>
+  336380006U,	// <2,2,2,2>: Cost 1 vspltisw2 LHS
+  1221394534U,	// <2,2,2,3>: Cost 2 vmrglw <2,2,2,2>, LHS
+  1556942134U,	// <2,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+  2295138061U,	// <2,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+  2228029370U,	// <2,2,2,6>: Cost 3 vmrghw <2,2,2,2>, <2,6,3,7>
+  2660545701U,	// <2,2,2,7>: Cost 3 vsldoi4 <7,2,2,2>, <7,2,2,2>
+  336380006U,	// <2,2,2,u>: Cost 1 vspltisw2 LHS
+  2697791638U,	// <2,2,3,0>: Cost 3 vsldoi8 <2,2,2,2>, <3,0,1,2>
+  2765489840U,	// <2,2,3,1>: Cost 3 vsldoi12 <2,3,1,2>, <2,3,1,2>
+  1213441640U,	// <2,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+  135053414U,	// <2,2,3,3>: Cost 1 vmrglw LHS, LHS
+  2697792002U,	// <2,2,3,4>: Cost 3 vsldoi8 <2,2,2,2>, <3,4,5,6>
+  2330313780U,	// <2,2,3,5>: Cost 3 vmrglw LHS, <1,4,2,5>
+  2287183549U,	// <2,2,3,6>: Cost 3 vmrglw LHS, <2,3,2,6>
+  2660553894U,	// <2,2,3,7>: Cost 3 vsldoi4 <7,2,2,3>, <7,2,2,3>
+  135053419U,	// <2,2,3,u>: Cost 1 vmrglw LHS, LHS
+  2630697062U,	// <2,2,4,0>: Cost 3 vsldoi4 <2,2,2,4>, LHS
+  3771534282U,	// <2,2,4,1>: Cost 4 vsldoi8 <2,2,2,2>, <4,1,2,3>
+  2764900109U,	// <2,2,4,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,4,2,5>
+  2295152742U,	// <2,2,4,3>: Cost 3 vmrglw <2,2,2,4>, LHS
+  2295154282U,	// <2,2,4,4>: Cost 3 vmrglw <2,2,2,4>, <2,2,2,4>
+  1624050998U,	// <2,2,4,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+  2229675962U,	// <2,2,4,6>: Cost 3 vmrghw <2,4,6,5>, <2,6,3,7>
+  3368896433U,	// <2,2,4,7>: Cost 4 vmrglw <2,2,2,4>, <2,6,2,7>
+  1624051241U,	// <2,2,4,u>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+  3771534920U,	// <2,2,5,0>: Cost 4 vsldoi8 <2,2,2,2>, <5,0,1,2>
+  3364258540U,	// <2,2,5,1>: Cost 4 vmrglw <1,4,2,5>, <1,0,2,1>
+  2296489576U,	// <2,2,5,2>: Cost 3 vmrglw <2,4,2,5>, <2,2,2,2>
+  2290516070U,	// <2,2,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+  3771535284U,	// <2,2,5,4>: Cost 4 vsldoi8 <2,2,2,2>, <5,4,5,6>
+  2290517044U,	// <2,2,5,5>: Cost 3 vmrglw <1,4,2,5>, <1,4,2,5>
+  2697793634U,	// <2,2,5,6>: Cost 3 vsldoi8 <2,2,2,2>, <5,6,7,0>
+  3370231729U,	// <2,2,5,7>: Cost 4 vmrglw <2,4,2,5>, <2,6,2,7>
+  2290516075U,	// <2,2,5,u>: Cost 3 vmrglw <1,4,2,5>, LHS
+  2230797801U,	// <2,2,6,0>: Cost 3 vmrghw <2,6,3,7>, <2,0,6,1>
+  3304539679U,	// <2,2,6,1>: Cost 4 vmrghw <2,6,3,7>, <2,1,3,1>
+  2764900273U,	// <2,2,6,2>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,2,7>
+  2764900282U,	// <2,2,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <2,6,3,7>
+  2230798129U,	// <2,2,6,4>: Cost 3 vmrghw <2,6,3,7>, <2,4,6,5>
+  3304540008U,	// <2,2,6,5>: Cost 4 vmrghw <2,6,3,7>, <2,5,3,6>
+  1157056442U,	// <2,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+  2725000033U,	// <2,2,6,7>: Cost 3 vsldoi8 <6,7,2,2>, <6,7,2,2>
+  1157056442U,	// <2,2,6,u>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+  2793359338U,	// <2,2,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <2,7,0,1>
+  3371574725U,	// <2,2,7,1>: Cost 4 vmrglw <2,6,2,7>, <2,0,2,1>
+  2297833064U,	// <2,2,7,2>: Cost 3 vmrglw <2,6,2,7>, <2,2,2,2>
+  2297831526U,	// <2,2,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+  2697794918U,	// <2,2,7,4>: Cost 3 vsldoi8 <2,2,2,2>, <7,4,5,6>
+  3371575053U,	// <2,2,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,2,5>
+  3304933297U,	// <2,2,7,6>: Cost 4 vmrghw <2,7,0,1>, <2,6,2,7>
+  2297833393U,	// <2,2,7,7>: Cost 3 vmrglw <2,6,2,7>, <2,6,2,7>
+  2297831531U,	// <2,2,7,u>: Cost 3 vmrglw <2,6,2,7>, LHS
+  1556938854U,	// <2,2,u,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+  1624053550U,	// <2,2,u,1>: Cost 2 vsldoi8 <2,2,2,2>, LHS
+  336380006U,	// <2,2,u,2>: Cost 1 vspltisw2 LHS
+  135094374U,	// <2,2,u,3>: Cost 1 vmrglw LHS, LHS
+  1556942134U,	// <2,2,u,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+  1624053914U,	// <2,2,u,5>: Cost 2 vsldoi8 <2,2,2,2>, RHS
+  1157056442U,	// <2,2,u,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+  2660594859U,	// <2,2,u,7>: Cost 3 vsldoi4 <7,2,2,u>, <7,2,2,u>
+  135094379U,	// <2,2,u,u>: Cost 1 vmrglw LHS, LHS
+  1611448320U,	// <2,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+  537706598U,	// <2,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+  2689835181U,	// <2,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+  2689835260U,	// <2,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+  1611448658U,	// <2,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+  2732966354U,	// <2,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+  2732966390U,	// <2,3,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+  2660603052U,	// <2,3,0,7>: Cost 3 vsldoi4 <7,2,3,0>, <7,2,3,0>
+  537707165U,	// <2,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+  2689835748U,	// <2,3,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+  1611449140U,	// <2,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+  1611449238U,	// <2,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+  3763577805U,	// <2,3,1,3>: Cost 4 vsldoi8 LHS, <1,3,0,1>
+  2689836112U,	// <2,3,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+  2689836143U,	// <2,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+  2689836239U,	// <2,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+  3366881210U,	// <2,3,1,7>: Cost 4 vmrglw <1,u,2,1>, <2,6,3,7>
+  1616094588U,	// <2,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+  2689836493U,	// <2,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,3,0>
+  2685191711U,	// <2,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+  1611449960U,	// <2,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+  1611450022U,	// <2,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+  2689836822U,	// <2,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,3,5>
+  2689836904U,	// <2,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,3,6>
+  1611450298U,	// <2,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+  2295138234U,	// <2,3,2,7>: Cost 3 vmrglw <2,2,2,2>, <2,6,3,7>
+  1611450456U,	// <2,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,3,3>
+  1213440918U,	// <2,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+  2282538527U,	// <2,3,3,1>: Cost 3 vmrglw LHS, <2,1,3,1>
+  1557022322U,	// <2,3,3,2>: Cost 2 vsldoi4 <2,2,3,3>, <2,2,3,3>
+  1208796786U,	// <2,3,3,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+  1213440922U,	// <2,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+  2282538531U,	// <2,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+  2287188094U,	// <2,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+  1213441978U,	// <2,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+  1208796791U,	// <2,3,3,u>: Cost 2 vmrglw LHS, <2,2,3,u>
+  1551056998U,	// <2,3,4,0>: Cost 2 vsldoi4 <1,2,3,4>, LHS
+  1551057818U,	// <2,3,4,1>: Cost 2 vsldoi4 <1,2,3,4>, <1,2,3,4>
+  2624800360U,	// <2,3,4,2>: Cost 3 vsldoi4 <1,2,3,4>, <2,2,2,2>
+  2624800918U,	// <2,3,4,3>: Cost 3 vsldoi4 <1,2,3,4>, <3,0,1,2>
+  1551060278U,	// <2,3,4,4>: Cost 2 vsldoi4 <1,2,3,4>, RHS
+  537709878U,	// <2,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+  2732969337U,	// <2,3,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+  2660635824U,	// <2,3,4,7>: Cost 3 vsldoi4 <7,2,3,4>, <7,2,3,4>
+  537710121U,	// <2,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+  2689838664U,	// <2,3,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+  2732969615U,	// <2,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+  2732969707U,	// <2,3,5,2>: Cost 3 vsldoi8 LHS, <5,2,1,3>
+  3763580721U,	// <2,3,5,3>: Cost 4 vsldoi8 LHS, <5,3,0,1>
+  2689839028U,	// <2,3,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+  1659228164U,	// <2,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+  1659228258U,	// <2,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+  3364259770U,	// <2,3,5,7>: Cost 4 vmrglw <1,4,2,5>, <2,6,3,7>
+  1659228420U,	// <2,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+  2230798486U,	// <2,3,6,0>: Cost 3 vmrghw <2,6,3,7>, <3,0,1,2>
+  2732970407U,	// <2,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+  1659228666U,	// <2,3,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+  2230798748U,	// <2,3,6,3>: Cost 3 vmrghw <2,6,3,7>, <3,3,3,3>
+  2230798850U,	// <2,3,6,4>: Cost 3 vmrghw <2,6,3,7>, <3,4,5,6>
+  2732970731U,	// <2,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+  1659228984U,	// <2,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+  1659229006U,	// <2,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+  1659229087U,	// <2,3,6,u>: Cost 2 vsldoi8 LHS, <6,u,0,1>
+  1659229178U,	// <2,3,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+  2726999125U,	// <2,3,7,1>: Cost 3 vsldoi8 <7,1,2,3>, <7,1,2,3>
+  2727662758U,	// <2,3,7,2>: Cost 3 vsldoi8 <7,2,2,3>, <7,2,2,3>
+  2732971235U,	// <2,3,7,3>: Cost 3 vsldoi8 LHS, <7,3,0,1>
+  1659229542U,	// <2,3,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+  2732971446U,	// <2,3,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+  2732971484U,	// <2,3,7,6>: Cost 3 vsldoi8 LHS, <7,6,0,7>
+  1659229804U,	// <2,3,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+  1659229826U,	// <2,3,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+  1208837014U,	// <2,3,u,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+  537712430U,	// <2,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+  1616099205U,	// <2,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,0>
+  1208837746U,	// <2,3,u,3>: Cost 2 vmrglw LHS, <2,2,3,3>
+  1208837018U,	// <2,3,u,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+  537712794U,	// <2,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+  1616099536U,	// <2,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+  1208838074U,	// <2,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+  537712997U,	// <2,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+  3771547648U,	// <2,4,0,0>: Cost 4 vsldoi8 <2,2,2,4>, <0,0,0,0>
+  2697805926U,	// <2,4,0,1>: Cost 3 vsldoi8 <2,2,2,4>, LHS
+  3770884269U,	// <2,4,0,2>: Cost 4 vsldoi8 <2,1,2,4>, <0,2,1,2>
+  3806716164U,	// <2,4,0,3>: Cost 4 vsldoi8 <u,1,2,4>, <0,3,1,u>
+  3771547986U,	// <2,4,0,4>: Cost 4 vsldoi8 <2,2,2,4>, <0,4,1,5>
+  2226761014U,	// <2,4,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+  3853462427U,	// <2,4,0,6>: Cost 4 vsldoi12 <4,6,5,2>, <4,0,6,1>
+  3867102116U,	// <2,4,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,0,7,1>
+  2226761257U,	// <2,4,0,u>: Cost 3 vmrghw <2,0,3,0>, RHS
+  3849186231U,	// <2,4,1,0>: Cost 4 vsldoi12 <4,0,1,2>, <4,1,0,2>
+  3301207010U,	// <2,4,1,1>: Cost 4 vmrghw <2,1,3,5>, <4,1,5,0>
+  3766240150U,	// <2,4,1,2>: Cost 4 vsldoi8 <1,3,2,4>, <1,2,3,0>
+  3766240226U,	// <2,4,1,3>: Cost 4 vsldoi8 <1,3,2,4>, <1,3,2,4>
+  3301207248U,	// <2,4,1,4>: Cost 4 vmrghw <2,1,3,5>, <4,4,4,4>
+  2227432758U,	// <2,4,1,5>: Cost 3 vmrghw <2,1,3,1>, RHS
+  3758941400U,	// <2,4,1,6>: Cost 4 vsldoi8 <0,1,2,4>, <1,6,2,7>
+  3768894758U,	// <2,4,1,7>: Cost 4 vsldoi8 <1,7,2,4>, <1,7,2,4>
+  2227433001U,	// <2,4,1,u>: Cost 3 vmrghw <2,1,3,1>, RHS
+  2228030354U,	// <2,4,2,0>: Cost 3 vmrghw <2,2,2,2>, <4,0,5,1>
+  3770885657U,	// <2,4,2,1>: Cost 4 vsldoi8 <2,1,2,4>, <2,1,2,4>
+  2697807466U,	// <2,4,2,2>: Cost 3 vsldoi8 <2,2,2,4>, <2,2,2,4>
+  3368880468U,	// <2,4,2,3>: Cost 4 vmrglw <2,2,2,2>, <3,2,4,3>
+  2228030672U,	// <2,4,2,4>: Cost 3 vmrghw <2,2,2,2>, <4,4,4,4>
+  1154288950U,	// <2,4,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+  3771549617U,	// <2,4,2,6>: Cost 4 vsldoi8 <2,2,2,4>, <2,6,2,7>
+  3368880796U,	// <2,4,2,7>: Cost 4 vmrglw <2,2,2,2>, <3,6,4,7>
+  1154289193U,	// <2,4,2,u>: Cost 2 vmrghw <2,2,2,2>, RHS
+  2636808294U,	// <2,4,3,0>: Cost 3 vsldoi4 <3,2,4,3>, LHS
+  2287181861U,	// <2,4,3,1>: Cost 3 vmrglw LHS, <0,0,4,1>
+  2228866102U,	// <2,4,3,2>: Cost 3 vmrghw <2,3,4,5>, <4,2,5,3>
+  2636810580U,	// <2,4,3,3>: Cost 3 vsldoi4 <3,2,4,3>, <3,2,4,3>
+  1256574160U,	// <2,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+  1213441742U,	// <2,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+  2228866430U,	// <2,4,3,6>: Cost 3 vmrghw <2,3,4,5>, <4,6,5,7>
+  2660701368U,	// <2,4,3,7>: Cost 3 vsldoi4 <7,2,4,3>, <7,2,4,3>
+  1213441745U,	// <2,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+  3704586342U,	// <2,4,4,0>: Cost 4 vsldoi4 <2,2,4,4>, LHS
+  3782831051U,	// <2,4,4,1>: Cost 4 vsldoi8 <4,1,2,4>, <4,1,2,4>
+  3704587900U,	// <2,4,4,2>: Cost 4 vsldoi4 <2,2,4,4>, <2,2,4,4>
+  3368896123U,	// <2,4,4,3>: Cost 4 vmrglw <2,2,2,4>, <2,2,4,3>
+  2793360592U,	// <2,4,4,4>: Cost 3 vsldoi12 <7,0,1,2>, <4,4,4,4>
+  2697809206U,	// <2,4,4,5>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+  3303198078U,	// <2,4,4,6>: Cost 4 vmrghw <2,4,3,5>, <4,6,5,7>
+  3867102444U,	// <2,4,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,4,7,5>
+  2697809449U,	// <2,4,4,u>: Cost 3 vsldoi8 <2,2,2,4>, RHS
+  2630852710U,	// <2,4,5,0>: Cost 3 vsldoi4 <2,2,4,5>, LHS
+  2624881572U,	// <2,4,5,1>: Cost 3 vsldoi4 <1,2,4,5>, <1,2,4,5>
+  2630854269U,	// <2,4,5,2>: Cost 3 vsldoi4 <2,2,4,5>, <2,2,4,5>
+  2666686677U,	// <2,4,5,3>: Cost 3 vsldoi4 <u,2,4,5>, <3,0,u,2>
+  2630855990U,	// <2,4,5,4>: Cost 3 vsldoi4 <2,2,4,5>, RHS
+  2230127926U,	// <2,4,5,5>: Cost 3 vmrghw <2,5,3,6>, RHS
+  1691159862U,	// <2,4,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+  3867102520U,	// <2,4,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,5,7,0>
+  1691159880U,	// <2,4,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+  2230799250U,	// <2,4,6,0>: Cost 3 vmrghw <2,6,3,7>, <4,0,5,1>
+  3304541130U,	// <2,4,6,1>: Cost 4 vmrghw <2,6,3,7>, <4,1,2,3>
+  2230799417U,	// <2,4,6,2>: Cost 3 vmrghw <2,6,3,7>, <4,2,5,6>
+  3304541323U,	// <2,4,6,3>: Cost 4 vmrghw <2,6,3,7>, <4,3,5,7>
+  2230799568U,	// <2,4,6,4>: Cost 3 vmrghw <2,6,3,7>, <4,4,4,4>
+  1157057846U,	// <2,4,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+  3304541566U,	// <2,4,6,6>: Cost 4 vmrghw <2,6,3,7>, <4,6,5,7>
+  3798758243U,	// <2,4,6,7>: Cost 4 vsldoi8 <6,7,2,4>, <6,7,2,4>
+  1157058089U,	// <2,4,6,u>: Cost 2 vmrghw <2,6,3,7>, RHS
+  3806721018U,	// <2,4,7,0>: Cost 4 vsldoi8 <u,1,2,4>, <7,0,1,2>
+  3853831590U,	// <2,4,7,1>: Cost 4 vsldoi12 <4,7,1,2>, <4,7,1,2>
+  3801412775U,	// <2,4,7,2>: Cost 4 vsldoi8 <7,2,2,4>, <7,2,2,4>
+  3802076408U,	// <2,4,7,3>: Cost 4 vsldoi8 <7,3,2,4>, <7,3,2,4>
+  3401436368U,	// <2,4,7,4>: Cost 4 vmrglw <7,6,2,7>, <4,4,4,4>
+  2793360840U,	// <2,4,7,5>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,5,0>
+  3804067307U,	// <2,4,7,6>: Cost 4 vsldoi8 <7,6,2,4>, <7,6,2,4>
+  3867102682U,	// <2,4,7,7>: Cost 4 vsldoi12 <7,0,1,2>, <4,7,7,0>
+  2793360867U,	// <2,4,7,u>: Cost 3 vsldoi12 <7,0,1,2>, <4,7,u,0>
+  2630877286U,	// <2,4,u,0>: Cost 3 vsldoi4 <2,2,4,u>, LHS
+  2282580144U,	// <2,4,u,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+  2630878848U,	// <2,4,u,2>: Cost 3 vsldoi4 <2,2,4,u>, <2,2,4,u>
+  2636851545U,	// <2,4,u,3>: Cost 3 vsldoi4 <3,2,4,u>, <3,2,4,u>
+  1256615120U,	// <2,4,u,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+  1208837838U,	// <2,4,u,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+  1691160105U,	// <2,4,u,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+  2660742333U,	// <2,4,u,7>: Cost 3 vsldoi4 <7,2,4,u>, <7,2,4,u>
+  1208837841U,	// <2,4,u,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+  3766910976U,	// <2,5,0,0>: Cost 4 vsldoi8 <1,4,2,5>, <0,0,0,0>
+  2693169254U,	// <2,5,0,1>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+  3760939181U,	// <2,5,0,2>: Cost 4 vsldoi8 <0,4,2,5>, <0,2,1,2>
+  3843214936U,	// <2,5,0,3>: Cost 4 vsldoi12 <3,0,1,2>, <5,0,3,0>
+  3760939355U,	// <2,5,0,4>: Cost 4 vsldoi8 <0,4,2,5>, <0,4,2,5>
+  3867102827U,	// <2,5,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,5,1>
+  3867102836U,	// <2,5,0,6>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,6,1>
+  3867102844U,	// <2,5,0,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,0,7,0>
+  2693169821U,	// <2,5,0,u>: Cost 3 vsldoi8 <1,4,2,5>, LHS
+  3766911724U,	// <2,5,1,0>: Cost 4 vsldoi8 <1,4,2,5>, <1,0,2,1>
+  3766911796U,	// <2,5,1,1>: Cost 4 vsldoi8 <1,4,2,5>, <1,1,1,1>
+  2693170070U,	// <2,5,1,2>: Cost 3 vsldoi8 <1,4,2,5>, <1,2,3,0>
+  3384798262U,	// <2,5,1,3>: Cost 4 vmrglw <4,u,2,1>, <4,2,5,3>
+  2693170228U,	// <2,5,1,4>: Cost 3 vsldoi8 <1,4,2,5>, <1,4,2,5>
+  3301208068U,	// <2,5,1,5>: Cost 4 vmrghw <2,1,3,5>, <5,5,5,5>
+  3366879607U,	// <2,5,1,6>: Cost 4 vmrglw <1,u,2,1>, <0,4,5,6>
+  3867102925U,	// <2,5,1,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,1,7,0>
+  2695824760U,	// <2,5,1,u>: Cost 3 vsldoi8 <1,u,2,5>, <1,u,2,5>
+  2642845798U,	// <2,5,2,0>: Cost 3 vsldoi4 <4,2,5,2>, LHS
+  2295139218U,	// <2,5,2,1>: Cost 3 vmrglw <2,2,2,2>, <4,0,5,1>
+  2699142760U,	// <2,5,2,2>: Cost 3 vsldoi8 <2,4,2,5>, <2,2,2,2>
+  3766912678U,	// <2,5,2,3>: Cost 4 vsldoi8 <1,4,2,5>, <2,3,0,1>
+  2699142925U,	// <2,5,2,4>: Cost 3 vsldoi8 <2,4,2,5>, <2,4,2,5>
+  2228031492U,	// <2,5,2,5>: Cost 3 vmrghw <2,2,2,2>, <5,5,5,5>
+  2295138818U,	// <2,5,2,6>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,6>
+  3368879347U,	// <2,5,2,7>: Cost 4 vmrglw <2,2,2,2>, <1,6,5,7>
+  2295138820U,	// <2,5,2,u>: Cost 3 vmrglw <2,2,2,2>, <3,4,5,u>
+  2287184866U,	// <2,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+  1256573842U,	// <2,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+  2642855630U,	// <2,5,3,2>: Cost 3 vsldoi4 <4,2,5,3>, <2,3,4,5>
+  2287182763U,	// <2,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+  2287184870U,	// <2,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+  1256574170U,	// <2,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+  1213442562U,	// <2,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+  2287183091U,	// <2,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+  1213442564U,	// <2,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+  3716604006U,	// <2,5,4,0>: Cost 4 vsldoi4 <4,2,5,4>, LHS
+  3716604822U,	// <2,5,4,1>: Cost 4 vsldoi4 <4,2,5,4>, <1,2,3,0>
+  3766914099U,	// <2,5,4,2>: Cost 4 vsldoi8 <1,4,2,5>, <4,2,5,0>
+  3368895403U,	// <2,5,4,3>: Cost 5 vmrglw <2,2,2,4>, <1,2,5,3>
+  3716607031U,	// <2,5,4,4>: Cost 4 vsldoi4 <4,2,5,4>, <4,2,5,4>
+  2693172534U,	// <2,5,4,5>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+  3363588610U,	// <2,5,4,6>: Cost 4 vmrglw <1,3,2,4>, <3,4,5,6>
+  3368895731U,	// <2,5,4,7>: Cost 5 vmrglw <2,2,2,4>, <1,6,5,7>
+  2693172777U,	// <2,5,4,u>: Cost 3 vsldoi8 <1,4,2,5>, RHS
+  3704668262U,	// <2,5,5,0>: Cost 4 vsldoi4 <2,2,5,5>, LHS
+  3704669078U,	// <2,5,5,1>: Cost 4 vsldoi4 <2,2,5,5>, <1,2,3,0>
+  3704669830U,	// <2,5,5,2>: Cost 4 vsldoi4 <2,2,5,5>, <2,2,5,5>
+  3364259460U,	// <2,5,5,3>: Cost 4 vmrglw <1,4,2,5>, <2,2,5,3>
+  3704671542U,	// <2,5,5,4>: Cost 4 vsldoi4 <2,2,5,5>, RHS
+  2793361412U,	// <2,5,5,5>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+  3364258167U,	// <2,5,5,6>: Cost 4 vmrglw <1,4,2,5>, <0,4,5,6>
+  3867103249U,	// <2,5,5,7>: Cost 4 vsldoi12 <7,0,1,2>, <5,5,7,0>
+  2793361412U,	// <2,5,5,u>: Cost 3 vsldoi12 <7,0,1,2>, <5,5,5,5>
+  2642878566U,	// <2,5,6,0>: Cost 3 vsldoi4 <4,2,5,6>, LHS
+  3386166810U,	// <2,5,6,1>: Cost 4 vmrglw <5,1,2,6>, <4,u,5,1>
+  2723033594U,	// <2,5,6,2>: Cost 3 vsldoi8 <6,4,2,5>, <6,2,7,3>
+  3848523842U,	// <2,5,6,3>: Cost 4 vsldoi12 <3,u,1,2>, <5,6,3,4>
+  2723033713U,	// <2,5,6,4>: Cost 3 vsldoi8 <6,4,2,5>, <6,4,2,5>
+  2230800388U,	// <2,5,6,5>: Cost 3 vmrghw <2,6,3,7>, <5,5,5,5>
+  2230800482U,	// <2,5,6,6>: Cost 3 vmrghw <2,6,3,7>, <5,6,7,0>
+  2785841252U,	// <2,5,6,7>: Cost 3 vsldoi12 <5,6,7,2>, <5,6,7,2>
+  2785914989U,	// <2,5,6,u>: Cost 3 vsldoi12 <5,6,u,2>, <5,6,u,2>
+  3796775930U,	// <2,5,7,0>: Cost 4 vsldoi8 <6,4,2,5>, <7,0,1,2>
+  3800757335U,	// <2,5,7,1>: Cost 4 vsldoi8 <7,1,2,5>, <7,1,2,5>
+  3853463689U,	// <2,5,7,2>: Cost 4 vsldoi12 <4,6,5,2>, <5,7,2,3>
+  3796776218U,	// <2,5,7,3>: Cost 4 vsldoi8 <6,4,2,5>, <7,3,6,2>
+  3796776294U,	// <2,5,7,4>: Cost 4 vsldoi8 <6,4,2,5>, <7,4,5,6>
+  3803411867U,	// <2,5,7,5>: Cost 4 vsldoi8 <7,5,2,5>, <7,5,2,5>
+  3371575081U,	// <2,5,7,6>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,6>
+  3796776516U,	// <2,5,7,7>: Cost 4 vsldoi8 <6,4,2,5>, <7,7,3,3>
+  3371575083U,	// <2,5,7,u>: Cost 4 vmrglw <2,6,2,7>, <2,4,5,u>
+  2287225826U,	// <2,5,u,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+  1256614802U,	// <2,5,u,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+  2642896590U,	// <2,5,u,2>: Cost 3 vsldoi4 <4,2,5,u>, <2,3,4,5>
+  2287223723U,	// <2,5,u,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+  2287225830U,	// <2,5,u,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+  1256615130U,	// <2,5,u,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+  1208838658U,	// <2,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+  2287224051U,	// <2,5,u,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+  1208838660U,	// <2,5,u,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+  3772227584U,	// <2,6,0,0>: Cost 4 vsldoi8 <2,3,2,6>, <0,0,0,0>
+  2698485862U,	// <2,6,0,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+  3759620282U,	// <2,6,0,2>: Cost 4 vsldoi8 <0,2,2,6>, <0,2,2,6>
+  3710675299U,	// <2,6,0,3>: Cost 4 vsldoi4 <3,2,6,0>, <3,2,6,0>
+  3767583058U,	// <2,6,0,4>: Cost 4 vsldoi8 <1,5,2,6>, <0,4,1,5>
+  3378153265U,	// <2,6,0,5>: Cost 5 vmrglw <3,7,2,0>, <2,4,6,5>
+  3865186637U,	// <2,6,0,6>: Cost 4 vsldoi12 <6,6,2,2>, <6,0,6,1>
+  2330291510U,	// <2,6,0,7>: Cost 3 vmrglw <u,1,2,0>, RHS
+  2698486429U,	// <2,6,0,u>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+  3734569062U,	// <2,6,1,0>: Cost 4 vsldoi4 <7,2,6,1>, LHS
+  3764929346U,	// <2,6,1,1>: Cost 4 vsldoi8 <1,1,2,6>, <1,1,2,6>
+  3772228502U,	// <2,6,1,2>: Cost 4 vsldoi8 <2,3,2,6>, <1,2,3,0>
+  3734571158U,	// <2,6,1,3>: Cost 4 vsldoi4 <7,2,6,1>, <3,0,1,2>
+  3734572342U,	// <2,6,1,4>: Cost 4 vsldoi4 <7,2,6,1>, RHS
+  3767583878U,	// <2,6,1,5>: Cost 4 vsldoi8 <1,5,2,6>, <1,5,2,6>
+  3768247511U,	// <2,6,1,6>: Cost 4 vsldoi8 <1,6,2,6>, <1,6,2,6>
+  2293140790U,	// <2,6,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+  2293140791U,	// <2,6,1,u>: Cost 3 vmrglw <1,u,2,1>, RHS
+  3704717414U,	// <2,6,2,0>: Cost 4 vsldoi4 <2,2,6,2>, LHS
+  3395424589U,	// <2,6,2,1>: Cost 4 vmrglw <6,6,2,2>, <6,0,6,1>
+  2228031993U,	// <2,6,2,2>: Cost 3 vmrghw <2,2,2,2>, <6,2,7,2>
+  2698487485U,	// <2,6,2,3>: Cost 3 vsldoi8 <2,3,2,6>, <2,3,2,6>
+  3704720694U,	// <2,6,2,4>: Cost 4 vsldoi4 <2,2,6,2>, RHS
+  3773556575U,	// <2,6,2,5>: Cost 4 vsldoi8 <2,5,2,6>, <2,5,2,6>
+  2698487738U,	// <2,6,2,6>: Cost 3 vsldoi8 <2,3,2,6>, <2,6,3,7>
+  1221397814U,	// <2,6,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+  1221397815U,	// <2,6,2,u>: Cost 2 vmrglw <2,2,2,2>, RHS
+  2636955750U,	// <2,6,3,0>: Cost 3 vsldoi4 <3,2,6,3>, LHS
+  2330314217U,	// <2,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+  2636957626U,	// <2,6,3,2>: Cost 3 vsldoi4 <3,2,6,3>, <2,6,3,7>
+  2287184230U,	// <2,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+  2636959030U,	// <2,6,3,4>: Cost 3 vsldoi4 <3,2,6,3>, RHS
+  2648903448U,	// <2,6,3,5>: Cost 3 vsldoi4 <5,2,6,3>, <5,2,6,3>
+  1256575800U,	// <2,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+  135056694U,	// <2,6,3,7>: Cost 1 vmrglw LHS, RHS
+  135056695U,	// <2,6,3,u>: Cost 1 vmrglw LHS, RHS
+  3710705766U,	// <2,6,4,0>: Cost 4 vsldoi4 <3,2,6,4>, LHS
+  3698762677U,	// <2,6,4,1>: Cost 5 vsldoi4 <1,2,6,4>, <1,2,6,4>
+  3710707389U,	// <2,6,4,2>: Cost 4 vsldoi4 <3,2,6,4>, <2,3,2,6>
+  3710708071U,	// <2,6,4,3>: Cost 4 vsldoi4 <3,2,6,4>, <3,2,6,4>
+  3710709046U,	// <2,6,4,4>: Cost 4 vsldoi4 <3,2,6,4>, RHS
+  2698489142U,	// <2,6,4,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+  3796782457U,	// <2,6,4,6>: Cost 4 vsldoi8 <6,4,2,6>, <4,6,5,2>
+  2295156022U,	// <2,6,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+  2295156023U,	// <2,6,4,u>: Cost 3 vmrglw <2,2,2,4>, RHS
+  3303870753U,	// <2,6,5,0>: Cost 4 vmrghw <2,5,3,6>, <6,0,1,2>
+  3788820134U,	// <2,6,5,1>: Cost 4 vsldoi8 <5,1,2,6>, <5,1,2,6>
+  3779530520U,	// <2,6,5,2>: Cost 4 vsldoi8 <3,5,2,6>, <5,2,6,3>
+  3303871026U,	// <2,6,5,3>: Cost 4 vmrghw <2,5,3,6>, <6,3,4,5>
+  3303871117U,	// <2,6,5,4>: Cost 4 vmrghw <2,5,3,6>, <6,4,5,6>
+  3791474666U,	// <2,6,5,5>: Cost 4 vsldoi8 <5,5,2,6>, <5,5,2,6>
+  3792138299U,	// <2,6,5,6>: Cost 4 vsldoi8 <5,6,2,6>, <5,6,2,6>
+  2290519350U,	// <2,6,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+  2290519351U,	// <2,6,5,u>: Cost 3 vmrglw <1,4,2,5>, RHS
+  2631008358U,	// <2,6,6,0>: Cost 3 vsldoi4 <2,2,6,6>, LHS
+  3372893673U,	// <2,6,6,1>: Cost 4 vmrglw <2,u,2,6>, <2,0,6,1>
+  2791445264U,	// <2,6,6,2>: Cost 3 vsldoi12 <6,6,2,2>, <6,6,2,2>
+  2230800968U,	// <2,6,6,3>: Cost 3 vmrghw <2,6,3,7>, <6,3,7,0>
+  2631011638U,	// <2,6,6,4>: Cost 3 vsldoi4 <2,2,6,6>, RHS
+  3372894001U,	// <2,6,6,5>: Cost 4 vmrglw <2,u,2,6>, <2,4,6,5>
+  2793362232U,	// <2,6,6,6>: Cost 3 vsldoi12 <7,0,1,2>, <6,6,6,6>
+  2295835958U,	// <2,6,6,7>: Cost 3 vmrglw <2,3,2,6>, RHS
+  2295835959U,	// <2,6,6,u>: Cost 3 vmrglw <2,3,2,6>, RHS
+  2793362254U,	// <2,6,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,0,1>
+  2792035160U,	// <2,6,7,1>: Cost 3 vsldoi12 <6,7,1,2>, <6,7,1,2>
+  2792108897U,	// <2,6,7,2>: Cost 3 vsldoi12 <6,7,2,2>, <6,7,2,2>
+  2769474408U,	// <2,6,7,3>: Cost 3 vsldoi12 <3,0,1,2>, <6,7,3,0>
+  2793362294U,	// <2,6,7,4>: Cost 3 vsldoi12 <7,0,1,2>, <6,7,4,5>
+  3371575089U,	// <2,6,7,5>: Cost 4 vmrglw <2,6,2,7>, <2,4,6,5>
+  2792403845U,	// <2,6,7,6>: Cost 3 vsldoi12 <6,7,6,2>, <6,7,6,2>
+  2297834806U,	// <2,6,7,7>: Cost 3 vmrglw <2,6,2,7>, RHS
+  2297834807U,	// <2,6,7,u>: Cost 3 vmrglw <2,6,2,7>, RHS
+  2636996710U,	// <2,6,u,0>: Cost 3 vsldoi4 <3,2,6,u>, LHS
+  2698491694U,	// <2,6,u,1>: Cost 3 vsldoi8 <2,3,2,6>, LHS
+  2636998631U,	// <2,6,u,2>: Cost 3 vsldoi4 <3,2,6,u>, <2,6,u,7>
+  2282580326U,	// <2,6,u,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+  2636999990U,	// <2,6,u,4>: Cost 3 vsldoi4 <3,2,6,u>, RHS
+  2698492058U,	// <2,6,u,5>: Cost 3 vsldoi8 <2,3,2,6>, RHS
+  1256616760U,	// <2,6,u,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+  135097654U,	// <2,6,u,7>: Cost 1 vmrglw LHS, RHS
+  135097655U,	// <2,6,u,u>: Cost 1 vmrglw LHS, RHS
+  2666864742U,	// <2,7,0,0>: Cost 3 vsldoi4 <u,2,7,0>, LHS
+  1719620602U,	// <2,7,0,1>: Cost 2 vsldoi12 <7,0,1,2>, <7,0,1,2>
+  3768254637U,	// <2,7,0,2>: Cost 4 vsldoi8 <1,6,2,7>, <0,2,1,2>
+  3393417722U,	// <2,7,0,3>: Cost 4 vmrglw <6,3,2,0>, <6,2,7,3>
+  2666868022U,	// <2,7,0,4>: Cost 3 vsldoi4 <u,2,7,0>, RHS
+  3867104290U,	// <2,7,0,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,0,5,6>
+  3728667127U,	// <2,7,0,6>: Cost 4 vsldoi4 <6,2,7,0>, <6,2,7,0>
+  2666869817U,	// <2,7,0,7>: Cost 3 vsldoi4 <u,2,7,0>, <7,0,u,2>
+  1720136761U,	// <2,7,0,u>: Cost 2 vsldoi12 <7,0,u,2>, <7,0,u,2>
+  3728670822U,	// <2,7,1,0>: Cost 4 vsldoi4 <6,2,7,1>, LHS
+  3774227252U,	// <2,7,1,1>: Cost 4 vsldoi8 <2,6,2,7>, <1,1,1,1>
+  3774227350U,	// <2,7,1,2>: Cost 4 vsldoi8 <2,6,2,7>, <1,2,3,0>
+  2323001850U,	// <2,7,1,3>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+  3728674102U,	// <2,7,1,4>: Cost 4 vsldoi4 <6,2,7,1>, RHS
+  3774227567U,	// <2,7,1,5>: Cost 5 vsldoi8 <2,6,2,7>, <1,5,0,1>
+  2694513880U,	// <2,7,1,6>: Cost 3 vsldoi8 <1,6,2,7>, <1,6,2,7>
+  3396744002U,	// <2,7,1,7>: Cost 4 vmrglw <6,u,2,1>, <6,6,7,7>
+  2323001850U,	// <2,7,1,u>: Cost 3 vmrglw <6,u,2,1>, <6,2,7,3>
+  2654937190U,	// <2,7,2,0>: Cost 3 vsldoi4 <6,2,7,2>, LHS
+  3728679732U,	// <2,7,2,1>: Cost 4 vsldoi4 <6,2,7,2>, <1,1,1,1>
+  2700486248U,	// <2,7,2,2>: Cost 3 vsldoi8 <2,6,2,7>, <2,2,2,2>
+  2321682938U,	// <2,7,2,3>: Cost 3 vmrglw <6,6,2,2>, <6,2,7,3>
+  2654940470U,	// <2,7,2,4>: Cost 3 vsldoi4 <6,2,7,2>, RHS
+  3859584196U,	// <2,7,2,5>: Cost 4 vsldoi12 <5,6,7,2>, <7,2,5,6>
+  2700486577U,	// <2,7,2,6>: Cost 3 vsldoi8 <2,6,2,7>, <2,6,2,7>
+  2228033132U,	// <2,7,2,7>: Cost 3 vmrghw <2,2,2,2>, <7,7,7,7>
+  2701813843U,	// <2,7,2,u>: Cost 3 vsldoi8 <2,u,2,7>, <2,u,2,7>
+  1581203558U,	// <2,7,3,0>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+  2654946100U,	// <2,7,3,1>: Cost 3 vsldoi4 <6,2,7,3>, <1,1,1,1>
+  2637031354U,	// <2,7,3,2>: Cost 3 vsldoi4 <3,2,7,3>, <2,6,3,7>
+  1256575482U,	// <2,7,3,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+  1581206838U,	// <2,7,3,4>: Cost 2 vsldoi4 <6,2,7,3>, RHS
+  2654949380U,	// <2,7,3,5>: Cost 3 vsldoi4 <6,2,7,3>, <5,5,5,5>
+  1581208058U,	// <2,7,3,6>: Cost 2 vsldoi4 <6,2,7,3>, <6,2,7,3>
+  1256575810U,	// <2,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+  1581209390U,	// <2,7,3,u>: Cost 2 vsldoi4 <6,2,7,3>, LHS
+  3728695398U,	// <2,7,4,0>: Cost 4 vsldoi4 <6,2,7,4>, LHS
+  3869758782U,	// <2,7,4,1>: Cost 4 vsldoi12 <7,4,1,2>, <7,4,1,2>
+  3728696936U,	// <2,7,4,2>: Cost 4 vsldoi4 <6,2,7,4>, <2,2,2,2>
+  3393450490U,	// <2,7,4,3>: Cost 4 vmrglw <6,3,2,4>, <6,2,7,3>
+  3728698678U,	// <2,7,4,4>: Cost 4 vsldoi4 <6,2,7,4>, RHS
+  2700487990U,	// <2,7,4,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+  3728699899U,	// <2,7,4,6>: Cost 4 vsldoi4 <6,2,7,4>, <6,2,7,4>
+  3867104626U,	// <2,7,4,7>: Cost 4 vsldoi12 <7,0,1,2>, <7,4,7,0>
+  2700488233U,	// <2,7,4,u>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+  3855160709U,	// <2,7,5,0>: Cost 4 vsldoi12 <5,0,1,2>, <7,5,0,1>
+  3728704406U,	// <2,7,5,1>: Cost 4 vsldoi4 <6,2,7,5>, <1,2,3,0>
+  3370233956U,	// <2,7,5,2>: Cost 4 vmrglw <2,4,2,5>, <5,6,7,2>
+  2320380410U,	// <2,7,5,3>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+  3728706870U,	// <2,7,5,4>: Cost 4 vsldoi4 <6,2,7,5>, RHS
+  3867104694U,	// <2,7,5,5>: Cost 4 vsldoi12 <7,0,1,2>, <7,5,5,5>
+  3792146492U,	// <2,7,5,6>: Cost 4 vsldoi8 <5,6,2,7>, <5,6,2,7>
+  3394122562U,	// <2,7,5,7>: Cost 4 vmrglw <6,4,2,5>, <6,6,7,7>
+  2320380410U,	// <2,7,5,u>: Cost 3 vmrglw <6,4,2,5>, <6,2,7,3>
+  2230801402U,	// <2,7,6,0>: Cost 3 vmrghw <2,6,3,7>, <7,0,1,2>
+  3768258984U,	// <2,7,6,1>: Cost 4 vsldoi8 <1,6,2,7>, <6,1,7,2>
+  2730349050U,	// <2,7,6,2>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+  3372894575U,	// <2,7,6,3>: Cost 4 vmrglw <2,u,2,6>, <3,2,7,3>
+  2230801766U,	// <2,7,6,4>: Cost 3 vmrghw <2,6,3,7>, <7,4,5,6>
+  3304543670U,	// <2,7,6,5>: Cost 4 vmrghw <2,6,3,7>, <7,5,5,5>
+  3728716285U,	// <2,7,6,6>: Cost 4 vsldoi4 <6,2,7,6>, <6,2,7,6>
+  2230802028U,	// <2,7,6,7>: Cost 3 vmrghw <2,6,3,7>, <7,7,7,7>
+  2730349050U,	// <2,7,6,u>: Cost 3 vsldoi8 <7,6,2,7>, <6,2,7,3>
+  2793362983U,	// <2,7,7,0>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,0,1>
+  3728721112U,	// <2,7,7,1>: Cost 4 vsldoi4 <6,2,7,7>, <1,6,2,7>
+  3371574933U,	// <2,7,7,2>: Cost 4 vmrglw <2,6,2,7>, <2,2,7,2>
+  2327695866U,	// <2,7,7,3>: Cost 3 vmrglw <7,6,2,7>, <6,2,7,3>
+  3728723254U,	// <2,7,7,4>: Cost 4 vsldoi4 <6,2,7,7>, RHS
+  3371574855U,	// <2,7,7,5>: Cost 5 vmrglw <2,6,2,7>, <2,1,7,5>
+  2730350062U,	// <2,7,7,6>: Cost 3 vsldoi8 <7,6,2,7>, <7,6,2,7>
+  2793363052U,	// <2,7,7,7>: Cost 3 vsldoi12 <7,0,1,2>, <7,7,7,7>
+  2798671471U,	// <2,7,7,u>: Cost 3 vsldoi12 <7,u,1,2>, <7,7,u,1>
+  1581244518U,	// <2,7,u,0>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+  1724929666U,	// <2,7,u,1>: Cost 2 vsldoi12 <7,u,1,2>, <7,u,1,2>
+  2637072314U,	// <2,7,u,2>: Cost 3 vsldoi4 <3,2,7,u>, <2,6,3,7>
+  1256616442U,	// <2,7,u,3>: Cost 2 vmrglw LHS, <6,2,7,3>
+  1581247798U,	// <2,7,u,4>: Cost 2 vsldoi4 <6,2,7,u>, RHS
+  2700490906U,	// <2,7,u,5>: Cost 3 vsldoi8 <2,6,2,7>, RHS
+  1581249023U,	// <2,7,u,6>: Cost 2 vsldoi4 <6,2,7,u>, <6,2,7,u>
+  1256616770U,	// <2,7,u,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+  1581250350U,	// <2,7,u,u>: Cost 2 vsldoi4 <6,2,7,u>, LHS
+  1611489280U,	// <2,u,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+  537747563U,	// <2,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+  2685231277U,	// <2,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+  2685231356U,	// <2,u,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+  1611489618U,	// <2,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+  2226763930U,	// <2,u,0,5>: Cost 3 vmrghw <2,0,3,0>, RHS
+  2733007350U,	// <2,u,0,6>: Cost 3 vsldoi8 LHS, <0,6,1,7>
+  2660971737U,	// <2,u,0,7>: Cost 3 vsldoi4 <7,2,u,0>, <7,2,u,0>
+  537748125U,	// <2,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+  2689876708U,	// <2,u,1,0>: Cost 3 vsldoi8 LHS, <1,0,1,2>
+  1611490100U,	// <2,u,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+  1611490198U,	// <2,u,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+  2293137564U,	// <2,u,1,3>: Cost 3 vmrglw <1,u,2,1>, LHS
+  2689877072U,	// <2,u,1,4>: Cost 3 vsldoi8 LHS, <1,4,5,6>
+  2689877103U,	// <2,u,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+  2689877199U,	// <2,u,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+  2293140808U,	// <2,u,1,7>: Cost 3 vmrglw <1,u,2,1>, RHS
+  1616135548U,	// <2,u,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+  1556938854U,	// <2,u,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+  1154291502U,	// <2,u,2,1>: Cost 2 vmrghw <2,2,2,2>, LHS
+  336380006U,	// <2,u,2,2>: Cost 1 vspltisw2 LHS
+  1611490982U,	// <2,u,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+  1556942134U,	// <2,u,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+  1154291866U,	// <2,u,2,5>: Cost 2 vmrghw <2,2,2,2>, RHS
+  1611491258U,	// <2,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+  1221397832U,	// <2,u,2,7>: Cost 2 vmrglw <2,2,2,2>, RHS
+  336380006U,	// <2,u,2,u>: Cost 1 vspltisw2 LHS
+  1611491478U,	// <2,u,3,0>: Cost 2 vsldoi8 LHS, <3,0,1,2>
+  1213440073U,	// <2,u,3,1>: Cost 2 vmrglw LHS, <0,0,u,1>
+  1213442261U,	// <2,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+  135053468U,	// <2,u,3,3>: Cost 1 vmrglw LHS, LHS
+  1611491842U,	// <2,u,3,4>: Cost 2 vsldoi8 LHS, <3,4,5,6>
+  1213440401U,	// <2,u,3,5>: Cost 2 vmrglw LHS, <0,4,u,5>
+  1213442589U,	// <2,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+  135056712U,	// <2,u,3,7>: Cost 1 vmrglw LHS, RHS
+  135053473U,	// <2,u,3,u>: Cost 1 vmrglw LHS, LHS
+  1551425638U,	// <2,u,4,0>: Cost 2 vsldoi4 <1,2,u,4>, LHS
+  1551426503U,	// <2,u,4,1>: Cost 2 vsldoi4 <1,2,u,4>, <1,2,u,4>
+  2625169000U,	// <2,u,4,2>: Cost 3 vsldoi4 <1,2,u,4>, <2,2,2,2>
+  2625169558U,	// <2,u,4,3>: Cost 3 vsldoi4 <1,2,u,4>, <3,0,1,2>
+  1551428918U,	// <2,u,4,4>: Cost 2 vsldoi4 <1,2,u,4>, RHS
+  537750838U,	// <2,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+  2733010297U,	// <2,u,4,6>: Cost 3 vsldoi8 LHS, <4,6,5,2>
+  2295156040U,	// <2,u,4,7>: Cost 3 vmrglw <2,2,2,4>, RHS
+  537751081U,	// <2,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+  2689879624U,	// <2,u,5,0>: Cost 3 vsldoi8 LHS, <5,0,1,2>
+  2230130478U,	// <2,u,5,1>: Cost 3 vmrghw <2,5,3,6>, LHS
+  2631149217U,	// <2,u,5,2>: Cost 3 vsldoi4 <2,2,u,5>, <2,2,u,5>
+  2290516124U,	// <2,u,5,3>: Cost 3 vmrglw <1,4,2,5>, LHS
+  2689879988U,	// <2,u,5,4>: Cost 3 vsldoi8 LHS, <5,4,5,6>
+  1659269124U,	// <2,u,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+  1691162778U,	// <2,u,5,6>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+  2290519368U,	// <2,u,5,7>: Cost 3 vmrglw <1,4,2,5>, RHS
+  1691162796U,	// <2,u,5,u>: Cost 2 vsldoi12 <2,2,2,2>, RHS
+  2230802131U,	// <2,u,6,0>: Cost 3 vmrghw <2,6,3,7>, <u,0,1,2>
+  1157060398U,	// <2,u,6,1>: Cost 2 vmrghw <2,6,3,7>, LHS
+  1659269626U,	// <2,u,6,2>: Cost 2 vsldoi8 LHS, <6,2,7,3>
+  2764904656U,	// <2,u,6,3>: Cost 3 vsldoi12 <2,2,2,2>, <u,6,3,7>
+  2230802495U,	// <2,u,6,4>: Cost 3 vmrghw <2,6,3,7>, <u,4,5,6>
+  1157060762U,	// <2,u,6,5>: Cost 2 vmrghw <2,6,3,7>, RHS
+  1659269944U,	// <2,u,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+  1659269966U,	// <2,u,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+  1157060965U,	// <2,u,6,u>: Cost 2 vmrghw <2,6,3,7>, LHS
+  1659270138U,	// <2,u,7,0>: Cost 2 vsldoi8 LHS, <7,0,1,2>
+  2727040090U,	// <2,u,7,1>: Cost 3 vsldoi8 <7,1,2,u>, <7,1,2,u>
+  2727703723U,	// <2,u,7,2>: Cost 3 vsldoi8 <7,2,2,u>, <7,2,2,u>
+  2297831580U,	// <2,u,7,3>: Cost 3 vmrglw <2,6,2,7>, LHS
+  1659270502U,	// <2,u,7,4>: Cost 2 vsldoi8 LHS, <7,4,5,6>
+  2733012406U,	// <2,u,7,5>: Cost 3 vsldoi8 LHS, <7,5,5,5>
+  2730358255U,	// <2,u,7,6>: Cost 3 vsldoi8 <7,6,2,u>, <7,6,2,u>
+  1659270764U,	// <2,u,7,7>: Cost 2 vsldoi8 LHS, <7,7,7,7>
+  1659270786U,	// <2,u,7,u>: Cost 2 vsldoi8 LHS, <7,u,1,2>
+  1213481923U,	// <2,u,u,0>: Cost 2 vmrglw LHS, <1,2,u,0>
+  537753390U,	// <2,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+  336380006U,	// <2,u,u,2>: Cost 1 vspltisw2 LHS
+  135094428U,	// <2,u,u,3>: Cost 1 vmrglw LHS, LHS
+  1213481927U,	// <2,u,u,4>: Cost 2 vmrglw LHS, <1,2,u,4>
+  537753754U,	// <2,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+  1208838685U,	// <2,u,u,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+  135097672U,	// <2,u,u,7>: Cost 1 vmrglw LHS, RHS
+  135094433U,	// <2,u,u,u>: Cost 1 vmrglw LHS, LHS
+  1678557184U,	// <3,0,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+  1678557194U,	// <3,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+  2631181989U,	// <3,0,0,2>: Cost 3 vsldoi4 <2,3,0,0>, <2,3,0,0>
+  2289223984U,	// <3,0,0,3>: Cost 3 vmrglw <1,2,3,0>, <3,2,0,3>
+  2756943909U,	// <3,0,0,4>: Cost 3 vsldoi12 LHS, <0,0,4,1>
+  3362965729U,	// <3,0,0,5>: Cost 4 vmrglw <1,2,3,0>, <3,1,0,5>
+  3362966054U,	// <3,0,0,6>: Cost 4 vmrglw <1,2,3,0>, <3,5,0,6>
+  2289224312U,	// <3,0,0,7>: Cost 3 vmrglw <1,2,3,0>, <3,6,0,7>
+  1683202121U,	// <3,0,0,u>: Cost 2 vsldoi12 LHS, <0,0,u,1>
+  1557446758U,	// <3,0,1,0>: Cost 2 vsldoi4 <2,3,0,1>, LHS
+  2752741467U,	// <3,0,1,1>: Cost 3 vsldoi12 LHS, <0,1,1,1>
+  604815462U,	// <3,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+  2631190676U,	// <3,0,1,3>: Cost 3 vsldoi4 <2,3,0,1>, <3,0,1,0>
+  1557450038U,	// <3,0,1,4>: Cost 2 vsldoi4 <2,3,0,1>, RHS
+  2667024388U,	// <3,0,1,5>: Cost 3 vsldoi4 <u,3,0,1>, <5,5,5,5>
+  2800074894U,	// <3,0,1,6>: Cost 3 vsldoi12 LHS, <0,1,6,7>
+  2661053667U,	// <3,0,1,7>: Cost 3 vsldoi4 <7,3,0,1>, <7,3,0,1>
+  604815516U,	// <3,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+  2696521165U,	// <3,0,2,0>: Cost 3 vsldoi8 <2,0,3,0>, <2,0,3,0>
+  2752741549U,	// <3,0,2,1>: Cost 3 vsldoi12 LHS, <0,2,1,2>
+  2691876456U,	// <3,0,2,2>: Cost 3 vsldoi8 <1,2,3,0>, <2,2,2,2>
+  2691876518U,	// <3,0,2,3>: Cost 3 vsldoi8 <1,2,3,0>, <2,3,0,1>
+  3830685895U,	// <3,0,2,4>: Cost 4 vsldoi12 LHS, <0,2,4,1>
+  3765618536U,	// <3,0,2,5>: Cost 4 vsldoi8 <1,2,3,0>, <2,5,3,6>
+  2691876794U,	// <3,0,2,6>: Cost 3 vsldoi8 <1,2,3,0>, <2,6,3,7>
+  2701166596U,	// <3,0,2,7>: Cost 3 vsldoi8 <2,7,3,0>, <2,7,3,0>
+  2756944108U,	// <3,0,2,u>: Cost 3 vsldoi12 LHS, <0,2,u,2>
+  2691877014U,	// <3,0,3,0>: Cost 3 vsldoi8 <1,2,3,0>, <3,0,1,2>
+  1161003110U,	// <3,0,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+  2691877168U,	// <3,0,3,2>: Cost 3 vsldoi8 <1,2,3,0>, <3,2,0,3>
+  2691877246U,	// <3,0,3,3>: Cost 3 vsldoi8 <1,2,3,0>, <3,3,0,0>
+  2691877378U,	// <3,0,3,4>: Cost 3 vsldoi8 <1,2,3,0>, <3,4,5,6>
+  3765619238U,	// <3,0,3,5>: Cost 4 vsldoi8 <1,2,3,0>, <3,5,0,6>
+  2691877496U,	// <3,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+  3368962680U,	// <3,0,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,0,7>
+  1161003677U,	// <3,0,3,u>: Cost 2 vmrghw <3,3,3,3>, LHS
+  2289254400U,	// <3,0,4,0>: Cost 3 vmrglw <1,2,3,4>, <0,0,0,0>
+  1678557522U,	// <3,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+  2631214761U,	// <3,0,4,2>: Cost 3 vsldoi4 <2,3,0,4>, <2,3,0,4>
+  2235580672U,	// <3,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+  2756944237U,	// <3,0,4,4>: Cost 3 vsldoi12 LHS, <0,4,4,5>
+  1618136374U,	// <3,0,4,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+  3309322742U,	// <3,0,4,6>: Cost 4 vmrghw <3,4,5,6>, <0,6,1,7>
+  3362998904U,	// <3,0,4,7>: Cost 4 vmrglw <1,2,3,4>, <3,6,0,7>
+  1683202449U,	// <3,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+  3765620296U,	// <3,0,5,0>: Cost 4 vsldoi8 <1,2,3,0>, <5,0,1,2>
+  2752299427U,	// <3,0,5,1>: Cost 3 vsldoi12 LHS, <0,5,1,5>
+  3789508346U,	// <3,0,5,2>: Cost 4 vsldoi8 <5,2,3,0>, <5,2,3,0>
+  3403486842U,	// <3,0,5,3>: Cost 4 vmrglw <u,0,3,5>, <7,u,0,3>
+  3765620660U,	// <3,0,5,4>: Cost 4 vsldoi8 <1,2,3,0>, <5,4,5,6>
+  2733682692U,	// <3,0,5,5>: Cost 3 vsldoi8 <u,2,3,0>, <5,5,5,5>
+  2800075218U,	// <3,0,5,6>: Cost 3 vsldoi12 LHS, <0,5,6,7>
+  3873817044U,	// <3,0,5,7>: Cost 4 vsldoi12 LHS, <0,5,7,0>
+  2800075234U,	// <3,0,5,u>: Cost 3 vsldoi12 LHS, <0,5,u,5>
+  2752299501U,	// <3,0,6,0>: Cost 3 vsldoi12 LHS, <0,6,0,7>
+  2236547174U,	// <3,0,6,1>: Cost 3 vmrghw <3,6,0,7>, LHS
+  2733683194U,	// <3,0,6,2>: Cost 3 vsldoi8 <u,2,3,0>, <6,2,7,3>
+  3844473352U,	// <3,0,6,3>: Cost 4 vsldoi12 <3,2,0,3>, <0,6,3,7>
+  3310289234U,	// <3,0,6,4>: Cost 4 vmrghw <3,6,0,7>, <0,4,1,5>
+  3873817114U,	// <3,0,6,5>: Cost 4 vsldoi12 LHS, <0,6,5,7>
+  2733683512U,	// <3,0,6,6>: Cost 3 vsldoi8 <u,2,3,0>, <6,6,6,6>
+  2725057384U,	// <3,0,6,7>: Cost 3 vsldoi8 <6,7,3,0>, <6,7,3,0>
+  2236547741U,	// <3,0,6,u>: Cost 3 vmrghw <3,6,0,7>, LHS
+  2297905152U,	// <3,0,7,0>: Cost 3 vmrglw <2,6,3,7>, <0,0,0,0>
+  2297906854U,	// <3,0,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,1>
+  2727711916U,	// <3,0,7,2>: Cost 3 vsldoi8 <7,2,3,0>, <7,2,3,0>
+  3371649328U,	// <3,0,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,0,3>
+  2733684070U,	// <3,0,7,4>: Cost 3 vsldoi8 <u,2,3,0>, <7,4,5,6>
+  3734843490U,	// <3,0,7,5>: Cost 4 vsldoi4 <7,3,0,7>, <5,6,7,0>
+  3798799895U,	// <3,0,7,6>: Cost 4 vsldoi8 <6,7,3,0>, <7,6,7,3>
+  2733684332U,	// <3,0,7,7>: Cost 3 vsldoi8 <u,2,3,0>, <7,7,7,7>
+  2297906861U,	// <3,0,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,0,u>
+  1557504102U,	// <3,0,u,0>: Cost 2 vsldoi4 <2,3,0,u>, LHS
+  1678557842U,	// <3,0,u,1>: Cost 2 vsldoi12 LHS, <0,u,1,1>
+  604816029U,	// <3,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+  2691880892U,	// <3,0,u,3>: Cost 3 vsldoi8 <1,2,3,0>, <u,3,0,1>
+  1557507382U,	// <3,0,u,4>: Cost 2 vsldoi4 <2,3,0,u>, RHS
+  1618139290U,	// <3,0,u,5>: Cost 2 vsldoi8 <1,2,3,0>, RHS
+  2691881168U,	// <3,0,u,6>: Cost 3 vsldoi8 <1,2,3,0>, <u,6,3,7>
+  2661111018U,	// <3,0,u,7>: Cost 3 vsldoi4 <7,3,0,u>, <7,3,0,u>
+  604816083U,	// <3,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+  2619310332U,	// <3,1,0,0>: Cost 3 vsldoi4 <0,3,1,0>, <0,3,1,0>
+  2756944612U,	// <3,1,0,1>: Cost 3 vsldoi12 LHS, <1,0,1,2>
+  2289221724U,	// <3,1,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,1,2>
+  2619312278U,	// <3,1,0,3>: Cost 3 vsldoi4 <0,3,1,0>, <3,0,1,2>
+  2619313462U,	// <3,1,0,4>: Cost 3 vsldoi4 <0,3,1,0>, RHS
+  2289221970U,	// <3,1,0,5>: Cost 3 vmrglw <1,2,3,0>, <0,4,1,5>
+  2232599768U,	// <3,1,0,6>: Cost 3 vmrghw <3,0,1,2>, <1,6,2,7>
+  3362964687U,	// <3,1,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,1,7>
+  2619316014U,	// <3,1,0,u>: Cost 3 vsldoi4 <0,3,1,0>, LHS
+  2756944683U,	// <3,1,1,0>: Cost 3 vsldoi12 LHS, <1,1,0,1>
+  1678558004U,	// <3,1,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+  2691883927U,	// <3,1,1,2>: Cost 3 vsldoi8 <1,2,3,1>, <1,2,3,1>
+  3826631496U,	// <3,1,1,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,1,3,3>
+  2756944723U,	// <3,1,1,4>: Cost 3 vsldoi12 LHS, <1,1,4,5>
+  2756944732U,	// <3,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+  3830686561U,	// <3,1,1,6>: Cost 4 vsldoi12 LHS, <1,1,6,1>
+  3734869228U,	// <3,1,1,7>: Cost 4 vsldoi4 <7,3,1,1>, <7,3,1,1>
+  1678558004U,	// <3,1,1,u>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+  2696529358U,	// <3,1,2,0>: Cost 3 vsldoi8 <2,0,3,1>, <2,0,3,1>
+  2756944775U,	// <3,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+  2294548630U,	// <3,1,2,2>: Cost 3 vmrglw <2,1,3,2>, <3,0,1,2>
+  1678558102U,	// <3,1,2,3>: Cost 2 vsldoi12 LHS, <1,2,3,0>
+  2631273782U,	// <3,1,2,4>: Cost 3 vsldoi4 <2,3,1,2>, RHS
+  2756944811U,	// <3,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+  3830686644U,	// <3,1,2,6>: Cost 4 vsldoi12 LHS, <1,2,6,3>
+  2800075706U,	// <3,1,2,7>: Cost 3 vsldoi12 LHS, <1,2,7,0>
+  1679000515U,	// <3,1,2,u>: Cost 2 vsldoi12 LHS, <1,2,u,0>
+  2619334911U,	// <3,1,3,0>: Cost 3 vsldoi4 <0,3,1,3>, <0,3,1,3>
+  2295218186U,	// <3,1,3,1>: Cost 3 vmrglw <2,2,3,3>, <0,0,1,1>
+  2293229718U,	// <3,1,3,2>: Cost 3 vmrglw <1,u,3,3>, <3,0,1,2>
+  2619337116U,	// <3,1,3,3>: Cost 3 vsldoi4 <0,3,1,3>, <3,3,3,3>
+  2619338038U,	// <3,1,3,4>: Cost 3 vsldoi4 <0,3,1,3>, RHS
+  2295218514U,	// <3,1,3,5>: Cost 3 vmrglw <2,2,3,3>, <0,4,1,5>
+  3830686729U,	// <3,1,3,6>: Cost 4 vsldoi12 LHS, <1,3,6,7>
+  3368961231U,	// <3,1,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,1,7>
+  2619340590U,	// <3,1,3,u>: Cost 3 vsldoi4 <0,3,1,3>, LHS
+  2619343104U,	// <3,1,4,0>: Cost 3 vsldoi4 <0,3,1,4>, <0,3,1,4>
+  2289254410U,	// <3,1,4,1>: Cost 3 vmrglw <1,2,3,4>, <0,0,1,1>
+  2289256598U,	// <3,1,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,1,2>
+  2619345410U,	// <3,1,4,3>: Cost 3 vsldoi4 <0,3,1,4>, <3,4,5,6>
+  2619346230U,	// <3,1,4,4>: Cost 3 vsldoi4 <0,3,1,4>, RHS
+  2756944976U,	// <3,1,4,5>: Cost 3 vsldoi12 LHS, <1,4,5,6>
+  3362996401U,	// <3,1,4,6>: Cost 4 vmrglw <1,2,3,4>, <0,2,1,6>
+  3362997455U,	// <3,1,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,1,7>
+  2619348782U,	// <3,1,4,u>: Cost 3 vsldoi4 <0,3,1,4>, LHS
+  2756945007U,	// <3,1,5,0>: Cost 3 vsldoi12 LHS, <1,5,0,1>
+  3830686840U,	// <3,1,5,1>: Cost 4 vsldoi12 LHS, <1,5,1,1>
+  3358361750U,	// <3,1,5,2>: Cost 4 vmrglw <0,4,3,5>, <3,0,1,2>
+  3830686857U,	// <3,1,5,3>: Cost 4 vsldoi12 LHS, <1,5,3,0>
+  2756945047U,	// <3,1,5,4>: Cost 3 vsldoi12 LHS, <1,5,4,5>
+  2294571346U,	// <3,1,5,5>: Cost 3 vmrglw <2,1,3,5>, <0,4,1,5>
+  3806105698U,	// <3,1,5,6>: Cost 4 vsldoi8 <u,0,3,1>, <5,6,7,0>
+  3873817774U,	// <3,1,5,7>: Cost 4 vsldoi12 LHS, <1,5,7,1>
+  2756945079U,	// <3,1,5,u>: Cost 3 vsldoi12 LHS, <1,5,u,1>
+  3830686912U,	// <3,1,6,0>: Cost 4 vsldoi12 LHS, <1,6,0,1>
+  2756945103U,	// <3,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+  2236547990U,	// <3,1,6,2>: Cost 3 vmrghw <3,6,0,7>, <1,2,3,0>
+  3826631905U,	// <3,1,6,3>: Cost 4 vsldoi12 <0,2,1,3>, <1,6,3,7>
+  3830686952U,	// <3,1,6,4>: Cost 4 vsldoi12 LHS, <1,6,4,5>
+  2756945139U,	// <3,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+  3830686972U,	// <3,1,6,6>: Cost 4 vsldoi12 LHS, <1,6,6,7>
+  2800076030U,	// <3,1,6,7>: Cost 3 vsldoi12 LHS, <1,6,7,0>
+  2756945166U,	// <3,1,6,u>: Cost 3 vsldoi12 LHS, <1,6,u,7>
+  3699081318U,	// <3,1,7,0>: Cost 4 vsldoi4 <1,3,1,7>, LHS
+  2297905162U,	// <3,1,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,1>
+  2297907350U,	// <3,1,7,2>: Cost 3 vmrglw <2,6,3,7>, <3,0,1,2>
+  3365675182U,	// <3,1,7,3>: Cost 4 vmrglw <1,6,3,7>, <0,2,1,3>
+  3699084598U,	// <3,1,7,4>: Cost 4 vsldoi4 <1,3,1,7>, RHS
+  2297905490U,	// <3,1,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,1,5>
+  2297905329U,	// <3,1,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+  3368330447U,	// <3,1,7,7>: Cost 4 vmrglw <2,1,3,7>, <1,6,1,7>
+  2297905169U,	// <3,1,7,u>: Cost 3 vmrglw <2,6,3,7>, <0,0,1,u>
+  2619375876U,	// <3,1,u,0>: Cost 3 vsldoi4 <0,3,1,u>, <0,3,1,u>
+  1678558004U,	// <3,1,u,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+  2289289366U,	// <3,1,u,2>: Cost 3 vmrglw <1,2,3,u>, <3,0,1,2>
+  1679000956U,	// <3,1,u,3>: Cost 2 vsldoi12 LHS, <1,u,3,0>
+  2619378998U,	// <3,1,u,4>: Cost 3 vsldoi4 <0,3,1,u>, RHS
+  2756945297U,	// <3,1,u,5>: Cost 3 vsldoi12 LHS, <1,u,5,3>
+  2297905329U,	// <3,1,u,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,1,6>
+  2800076192U,	// <3,1,u,7>: Cost 3 vsldoi12 LHS, <1,u,7,0>
+  1683203497U,	// <3,1,u,u>: Cost 2 vsldoi12 LHS, <1,u,u,0>
+  3362964203U,	// <3,2,0,0>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,0>
+  2289222380U,	// <3,2,0,1>: Cost 3 vmrglw <1,2,3,0>, <1,0,2,1>
+  2289222462U,	// <3,2,0,2>: Cost 3 vmrglw <1,2,3,0>, <1,1,2,2>
+  1215479910U,	// <3,2,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+  3362964207U,	// <3,2,0,4>: Cost 4 vmrglw <1,2,3,0>, <1,0,2,4>
+  2289222708U,	// <3,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+  2232600506U,	// <3,2,0,6>: Cost 3 vmrghw <3,0,1,2>, <2,6,3,7>
+  3396142296U,	// <3,2,0,7>: Cost 4 vmrglw <6,7,3,0>, <1,6,2,7>
+  1215479915U,	// <3,2,0,u>: Cost 2 vmrglw <1,2,3,0>, LHS
+  3699105894U,	// <3,2,1,0>: Cost 4 vsldoi4 <1,3,2,1>, LHS
+  3765633844U,	// <3,2,1,1>: Cost 4 vsldoi8 <1,2,3,2>, <1,1,1,1>
+  2691892120U,	// <3,2,1,2>: Cost 3 vsldoi8 <1,2,3,2>, <1,2,3,2>
+  2752300575U,	// <3,2,1,3>: Cost 3 vsldoi12 LHS, <2,1,3,1>
+  3699109174U,	// <3,2,1,4>: Cost 4 vsldoi4 <1,3,2,1>, RHS
+  3830687280U,	// <3,2,1,5>: Cost 5 vsldoi12 LHS, <2,1,5,0>
+  3830687289U,	// <3,2,1,6>: Cost 4 vsldoi12 LHS, <2,1,6,0>
+  3874260548U,	// <3,2,1,7>: Cost 4 vsldoi12 LHS, <2,1,7,2>
+  2752742988U,	// <3,2,1,u>: Cost 3 vsldoi12 LHS, <2,1,u,1>
+  2631344230U,	// <3,2,2,0>: Cost 3 vsldoi4 <2,3,2,2>, LHS
+  2697201184U,	// <3,2,2,1>: Cost 3 vsldoi8 <2,1,3,2>, <2,1,3,2>
+  1678558824U,	// <3,2,2,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+  1678558834U,	// <3,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+  2631347510U,	// <3,2,2,4>: Cost 3 vsldoi4 <2,3,2,2>, RHS
+  3368953613U,	// <3,2,2,5>: Cost 4 vmrglw <2,2,3,2>, <2,4,2,5>
+  2234304442U,	// <3,2,2,6>: Cost 3 vmrghw <3,2,6,3>, <2,6,3,7>
+  3368953777U,	// <3,2,2,7>: Cost 4 vmrglw <2,2,3,2>, <2,6,2,7>
+  1679001247U,	// <3,2,2,u>: Cost 2 vsldoi12 LHS, <2,2,u,3>
+  1678558886U,	// <3,2,3,0>: Cost 2 vsldoi12 LHS, <2,3,0,1>
+  2752300719U,	// <3,2,3,1>: Cost 3 vsldoi12 LHS, <2,3,1,1>
+  2752300729U,	// <3,2,3,2>: Cost 3 vsldoi12 LHS, <2,3,2,2>
+  1221476454U,	// <3,2,3,3>: Cost 2 vmrglw <2,2,3,3>, LHS
+  1678558926U,	// <3,2,3,4>: Cost 2 vsldoi12 LHS, <2,3,4,5>
+  2800076503U,	// <3,2,3,5>: Cost 3 vsldoi12 LHS, <2,3,5,5>
+  2234746810U,	// <3,2,3,6>: Cost 3 vmrghw <3,3,3,3>, <2,6,3,7>
+  2800076516U,	// <3,2,3,7>: Cost 3 vsldoi12 LHS, <2,3,7,0>
+  1678558958U,	// <3,2,3,u>: Cost 2 vsldoi12 LHS, <2,3,u,1>
+  3699130470U,	// <3,2,4,0>: Cost 4 vsldoi4 <1,3,2,4>, LHS
+  3362996972U,	// <3,2,4,1>: Cost 4 vmrglw <1,2,3,4>, <1,0,2,1>
+  2289256040U,	// <3,2,4,2>: Cost 3 vmrglw <1,2,3,4>, <2,2,2,2>
+  1215512678U,	// <3,2,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+  3362998676U,	// <3,2,4,4>: Cost 4 vmrglw <1,2,3,4>, <3,3,2,4>
+  2691894582U,	// <3,2,4,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+  2235582394U,	// <3,2,4,6>: Cost 3 vmrghw <3,4,5,6>, <2,6,3,7>
+  3734967544U,	// <3,2,4,7>: Cost 4 vsldoi4 <7,3,2,4>, <7,3,2,4>
+  1215512683U,	// <3,2,4,u>: Cost 2 vmrglw <1,2,3,4>, LHS
+  3705110630U,	// <3,2,5,0>: Cost 4 vsldoi4 <2,3,2,5>, LHS
+  3368313985U,	// <3,2,5,1>: Cost 4 vmrglw <2,1,3,5>, <1,5,2,1>
+  3368314472U,	// <3,2,5,2>: Cost 4 vmrglw <2,1,3,5>, <2,2,2,2>
+  2756945768U,	// <3,2,5,3>: Cost 3 vsldoi12 LHS, <2,5,3,6>
+  3705113910U,	// <3,2,5,4>: Cost 4 vsldoi4 <2,3,2,5>, RHS
+  3310061416U,	// <3,2,5,5>: Cost 4 vmrghw <3,5,6,6>, <2,5,3,6>
+  3310135226U,	// <3,2,5,6>: Cost 4 vmrghw <3,5,7,6>, <2,6,3,7>
+  3370305457U,	// <3,2,5,7>: Cost 5 vmrglw <2,4,3,5>, <2,6,2,7>
+  2752743317U,	// <3,2,5,u>: Cost 3 vsldoi12 LHS, <2,5,u,6>
+  2631376998U,	// <3,2,6,0>: Cost 3 vsldoi4 <2,3,2,6>, LHS
+  3705119540U,	// <3,2,6,1>: Cost 4 vsldoi4 <2,3,2,6>, <1,1,1,1>
+  2631378621U,	// <3,2,6,2>: Cost 3 vsldoi4 <2,3,2,6>, <2,3,2,6>
+  1678559162U,	// <3,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+  2631380278U,	// <3,2,6,4>: Cost 3 vsldoi4 <2,3,2,6>, RHS
+  3370976956U,	// <3,2,6,5>: Cost 4 vmrglw <2,5,3,6>, <2,3,2,5>
+  2237065146U,	// <3,2,6,6>: Cost 3 vmrghw <3,6,7,7>, <2,6,3,7>
+  3798815594U,	// <3,2,6,7>: Cost 4 vsldoi8 <6,7,3,2>, <6,7,3,2>
+  1679001575U,	// <3,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+  2800076778U,	// <3,2,7,0>: Cost 3 vsldoi12 LHS, <2,7,0,1>
+  3371647724U,	// <3,2,7,1>: Cost 4 vmrglw <2,6,3,7>, <1,0,2,1>
+  2297906792U,	// <3,2,7,2>: Cost 3 vmrglw <2,6,3,7>, <2,2,2,2>
+  1224163430U,	// <3,2,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+  3705130294U,	// <3,2,7,4>: Cost 4 vsldoi4 <2,3,2,7>, RHS
+  3371648052U,	// <3,2,7,5>: Cost 4 vmrglw <2,6,3,7>, <1,4,2,5>
+  2297906877U,	// <3,2,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,3,2,6>
+  3371648702U,	// <3,2,7,7>: Cost 4 vmrglw <2,6,3,7>, <2,3,2,7>
+  1224163435U,	// <3,2,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+  1679001659U,	// <3,2,u,0>: Cost 2 vsldoi12 LHS, <2,u,0,1>
+  2752743492U,	// <3,2,u,1>: Cost 3 vsldoi12 LHS, <2,u,1,1>
+  1678558824U,	// <3,2,u,2>: Cost 2 vsldoi12 LHS, <2,2,2,2>
+  1678559320U,	// <3,2,u,3>: Cost 2 vsldoi12 LHS, <2,u,3,3>
+  1679001699U,	// <3,2,u,4>: Cost 2 vsldoi12 LHS, <2,u,4,5>
+  2691897498U,	// <3,2,u,5>: Cost 3 vsldoi8 <1,2,3,2>, RHS
+  2237908922U,	// <3,2,u,6>: Cost 3 vmrghw <3,u,1,2>, <2,6,3,7>
+  2800519289U,	// <3,2,u,7>: Cost 3 vsldoi12 LHS, <2,u,7,0>
+  1679001731U,	// <3,2,u,u>: Cost 2 vsldoi12 LHS, <2,u,u,1>
+  1215480726U,	// <3,3,0,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+  1678559382U,	// <3,3,0,1>: Cost 2 vsldoi12 LHS, <3,0,1,2>
+  2631403200U,	// <3,3,0,2>: Cost 3 vsldoi4 <2,3,3,0>, <2,3,3,0>
+  2289223282U,	// <3,3,0,3>: Cost 3 vmrglw <1,2,3,0>, <2,2,3,3>
+  2752301232U,	// <3,3,0,4>: Cost 3 vsldoi12 LHS, <3,0,4,1>
+  3362965027U,	// <3,3,0,5>: Cost 4 vmrglw <1,2,3,0>, <2,1,3,5>
+  3362965352U,	// <3,3,0,6>: Cost 4 vmrglw <1,2,3,0>, <2,5,3,6>
+  2289223610U,	// <3,3,0,7>: Cost 3 vmrglw <1,2,3,0>, <2,6,3,7>
+  1678559445U,	// <3,3,0,u>: Cost 2 vsldoi12 LHS, <3,0,u,2>
+  3830687964U,	// <3,3,1,0>: Cost 4 vsldoi12 LHS, <3,1,0,0>
+  2752301286U,	// <3,3,1,1>: Cost 3 vsldoi12 LHS, <3,1,1,1>
+  2752301297U,	// <3,3,1,2>: Cost 3 vsldoi12 LHS, <3,1,2,3>
+  2305157532U,	// <3,3,1,3>: Cost 3 vmrglw <3,u,3,1>, <3,3,3,3>
+  3830688000U,	// <3,3,1,4>: Cost 4 vsldoi12 LHS, <3,1,4,0>
+  3830688009U,	// <3,3,1,5>: Cost 4 vsldoi12 LHS, <3,1,5,0>
+  3830688019U,	// <3,3,1,6>: Cost 4 vsldoi12 LHS, <3,1,6,1>
+  3362973626U,	// <3,3,1,7>: Cost 4 vmrglw <1,2,3,1>, <2,6,3,7>
+  2752743719U,	// <3,3,1,u>: Cost 3 vsldoi12 LHS, <3,1,u,3>
+  2631417958U,	// <3,3,2,0>: Cost 3 vsldoi4 <2,3,3,2>, LHS
+  3826043193U,	// <3,3,2,1>: Cost 4 vsldoi12 LHS, <3,2,1,3>
+  1624131186U,	// <3,3,2,2>: Cost 2 vsldoi8 <2,2,3,3>, <2,2,3,3>
+  2752301384U,	// <3,3,2,3>: Cost 3 vsldoi12 LHS, <3,2,3,0>
+  2631421238U,	// <3,3,2,4>: Cost 3 vsldoi4 <2,3,3,2>, RHS
+  3826485602U,	// <3,3,2,5>: Cost 4 vsldoi12 LHS, <3,2,5,u>
+  2752301414U,	// <3,3,2,6>: Cost 3 vsldoi12 LHS, <3,2,6,3>
+  2771249519U,	// <3,3,2,7>: Cost 3 vsldoi12 <3,2,7,3>, <3,2,7,3>
+  1628112984U,	// <3,3,2,u>: Cost 2 vsldoi8 <2,u,3,3>, <2,u,3,3>
+  1563656294U,	// <3,3,3,0>: Cost 2 vsldoi4 <3,3,3,3>, LHS
+  2301855911U,	// <3,3,3,1>: Cost 3 vmrglw <3,3,3,3>, <3,0,3,1>
+  2697873730U,	// <3,3,3,2>: Cost 3 vsldoi8 <2,2,3,3>, <3,2,2,3>
+  403488870U,	// <3,3,3,3>: Cost 1 vspltisw3 LHS
+  1563659574U,	// <3,3,3,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+  2301856239U,	// <3,3,3,5>: Cost 3 vmrglw <3,3,3,3>, <3,4,3,5>
+  2697874067U,	// <3,3,3,6>: Cost 3 vsldoi8 <2,2,3,3>, <3,6,3,7>
+  2295220154U,	// <3,3,3,7>: Cost 3 vmrglw <2,2,3,3>, <2,6,3,7>
+  403488870U,	// <3,3,3,u>: Cost 1 vspltisw3 LHS
+  2289255318U,	// <3,3,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,3,0>
+  2631435162U,	// <3,3,4,1>: Cost 3 vsldoi4 <2,3,3,4>, <1,2,3,4>
+  2631435972U,	// <3,3,4,2>: Cost 3 vsldoi4 <2,3,3,4>, <2,3,3,4>
+  2289256050U,	// <3,3,4,3>: Cost 3 vmrglw <1,2,3,4>, <2,2,3,3>
+  1215513498U,	// <3,3,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+  1679002114U,	// <3,3,4,5>: Cost 2 vsldoi12 LHS, <3,4,5,6>
+  3362998120U,	// <3,3,4,6>: Cost 4 vmrglw <1,2,3,4>, <2,5,3,6>
+  2289256378U,	// <3,3,4,7>: Cost 3 vmrglw <1,2,3,4>, <2,6,3,7>
+  1679002141U,	// <3,3,4,u>: Cost 2 vsldoi12 LHS, <3,4,u,6>
+  3831130657U,	// <3,3,5,0>: Cost 4 vsldoi12 LHS, <3,5,0,1>
+  3376277671U,	// <3,3,5,1>: Cost 4 vmrglw <3,4,3,5>, <3,0,3,1>
+  3771617012U,	// <3,3,5,2>: Cost 4 vsldoi8 <2,2,3,3>, <5,2,2,3>
+  2302536092U,	// <3,3,5,3>: Cost 3 vmrglw <3,4,3,5>, <3,3,3,3>
+  3831130697U,	// <3,3,5,4>: Cost 4 vsldoi12 LHS, <3,5,4,5>
+  2294572579U,	// <3,3,5,5>: Cost 3 vmrglw <2,1,3,5>, <2,1,3,5>
+  2800519773U,	// <3,3,5,6>: Cost 3 vsldoi12 LHS, <3,5,6,7>
+  3368314810U,	// <3,3,5,7>: Cost 4 vmrglw <2,1,3,5>, <2,6,3,7>
+  2800519791U,	// <3,3,5,u>: Cost 3 vsldoi12 LHS, <3,5,u,7>
+  2800077432U,	// <3,3,6,0>: Cost 3 vsldoi12 LHS, <3,6,0,7>
+  3310291185U,	// <3,3,6,1>: Cost 4 vmrghw <3,6,0,7>, <3,1,2,3>
+  2789165706U,	// <3,3,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <3,6,2,7>
+  2764982931U,	// <3,3,6,3>: Cost 3 vsldoi12 <2,2,3,3>, <3,6,3,7>
+  2800077468U,	// <3,3,6,4>: Cost 3 vsldoi12 LHS, <3,6,4,7>
+  3873819301U,	// <3,3,6,5>: Cost 4 vsldoi12 LHS, <3,6,5,7>
+  2297235304U,	// <3,3,6,6>: Cost 3 vmrglw <2,5,3,6>, <2,5,3,6>
+  2725081963U,	// <3,3,6,7>: Cost 3 vsldoi8 <6,7,3,3>, <6,7,3,3>
+  2725745596U,	// <3,3,6,u>: Cost 3 vsldoi8 <6,u,3,3>, <6,u,3,3>
+  2631458918U,	// <3,3,7,0>: Cost 3 vsldoi4 <2,3,3,7>, LHS
+  3705201460U,	// <3,3,7,1>: Cost 4 vsldoi4 <2,3,3,7>, <1,1,1,1>
+  2631460551U,	// <3,3,7,2>: Cost 3 vsldoi4 <2,3,3,7>, <2,3,3,7>
+  2297906802U,	// <3,3,7,3>: Cost 3 vmrglw <2,6,3,7>, <2,2,3,3>
+  2631462198U,	// <3,3,7,4>: Cost 3 vsldoi4 <2,3,3,7>, RHS
+  3371648547U,	// <3,3,7,5>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,5>
+  3371648548U,	// <3,3,7,6>: Cost 4 vmrglw <2,6,3,7>, <2,1,3,6>
+  1224165306U,	// <3,3,7,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+  1224165306U,	// <3,3,7,u>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+  1215480726U,	// <3,3,u,0>: Cost 2 vmrglw <1,2,3,0>, <1,2,3,0>
+  1679002398U,	// <3,3,u,1>: Cost 2 vsldoi12 LHS, <3,u,1,2>
+  1659967368U,	// <3,3,u,2>: Cost 2 vsldoi8 <u,2,3,3>, <u,2,3,3>
+  403488870U,	// <3,3,u,3>: Cost 1 vspltisw3 LHS
+  1563659574U,	// <3,3,u,4>: Cost 2 vsldoi4 <3,3,3,3>, RHS
+  1679002438U,	// <3,3,u,5>: Cost 2 vsldoi12 LHS, <3,u,5,6>
+  2756946764U,	// <3,3,u,6>: Cost 3 vsldoi12 LHS, <3,u,6,3>
+  1224165306U,	// <3,3,u,7>: Cost 2 vmrglw <2,6,3,7>, <2,6,3,7>
+  403488870U,	// <3,3,u,u>: Cost 1 vspltisw3 LHS
+  2691907584U,	// <3,4,0,0>: Cost 3 vsldoi8 <1,2,3,4>, <0,0,0,0>
+  1618165862U,	// <3,4,0,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+  2631476937U,	// <3,4,0,2>: Cost 3 vsldoi4 <2,3,4,0>, <2,3,4,0>
+  2232601732U,	// <3,4,0,3>: Cost 3 vmrghw <3,0,1,2>, <4,3,5,0>
+  2691907922U,	// <3,4,0,4>: Cost 3 vsldoi8 <1,2,3,4>, <0,4,1,5>
+  1158860086U,	// <3,4,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+  3306343806U,	// <3,4,0,6>: Cost 4 vmrghw <3,0,1,2>, <4,6,5,7>
+  3366947484U,	// <3,4,0,7>: Cost 4 vmrglw <1,u,3,0>, <3,6,4,7>
+  1618166429U,	// <3,4,0,u>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+  2631483494U,	// <3,4,1,0>: Cost 3 vsldoi4 <2,3,4,1>, LHS
+  2691908404U,	// <3,4,1,1>: Cost 3 vsldoi8 <1,2,3,4>, <1,1,1,1>
+  1618166682U,	// <3,4,1,2>: Cost 2 vsldoi8 <1,2,3,4>, <1,2,3,4>
+  3765650393U,	// <3,4,1,3>: Cost 4 vsldoi8 <1,2,3,4>, <1,3,1,4>
+  2631486774U,	// <3,4,1,4>: Cost 3 vsldoi4 <2,3,4,1>, RHS
+  2756946914U,	// <3,4,1,5>: Cost 3 vsldoi12 LHS, <4,1,5,0>
+  3765650639U,	// <3,4,1,6>: Cost 4 vsldoi8 <1,2,3,4>, <1,6,1,7>
+  3735090439U,	// <3,4,1,7>: Cost 4 vsldoi4 <7,3,4,1>, <7,3,4,1>
+  1622148480U,	// <3,4,1,u>: Cost 2 vsldoi8 <1,u,3,4>, <1,u,3,4>
+  3765650893U,	// <3,4,2,0>: Cost 4 vsldoi8 <1,2,3,4>, <2,0,3,0>
+  3831131154U,	// <3,4,2,1>: Cost 4 vsldoi12 LHS, <4,2,1,3>
+  2691909224U,	// <3,4,2,2>: Cost 3 vsldoi8 <1,2,3,4>, <2,2,2,2>
+  2691909286U,	// <3,4,2,3>: Cost 3 vsldoi8 <1,2,3,4>, <2,3,0,1>
+  2699208469U,	// <3,4,2,4>: Cost 3 vsldoi8 <2,4,3,4>, <2,4,3,4>
+  2233863478U,	// <3,4,2,5>: Cost 3 vmrghw <3,2,0,3>, RHS
+  2691909562U,	// <3,4,2,6>: Cost 3 vsldoi8 <1,2,3,4>, <2,6,3,7>
+  2701199368U,	// <3,4,2,7>: Cost 3 vsldoi8 <2,7,3,4>, <2,7,3,4>
+  2691909691U,	// <3,4,2,u>: Cost 3 vsldoi8 <1,2,3,4>, <2,u,0,1>
+  2691909782U,	// <3,4,3,0>: Cost 3 vsldoi8 <1,2,3,4>, <3,0,1,2>
+  3765651686U,	// <3,4,3,1>: Cost 4 vsldoi8 <1,2,3,4>, <3,1,1,1>
+  2691909972U,	// <3,4,3,2>: Cost 3 vsldoi8 <1,2,3,4>, <3,2,4,3>
+  2691910044U,	// <3,4,3,3>: Cost 3 vsldoi8 <1,2,3,4>, <3,3,3,3>
+  2691910096U,	// <3,4,3,4>: Cost 3 vsldoi8 <1,2,3,4>, <3,4,0,1>
+  1161006390U,	// <3,4,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+  2691910300U,	// <3,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+  3368962716U,	// <3,4,3,7>: Cost 4 vmrglw <2,2,3,3>, <3,6,4,7>
+  1161006633U,	// <3,4,3,u>: Cost 2 vmrghw <3,3,3,3>, RHS
+  2631508070U,	// <3,4,4,0>: Cost 3 vsldoi4 <2,3,4,4>, LHS
+  2631508890U,	// <3,4,4,1>: Cost 3 vsldoi4 <2,3,4,4>, <1,2,3,4>
+  2631509709U,	// <3,4,4,2>: Cost 3 vsldoi4 <2,3,4,4>, <2,3,4,4>
+  2289256788U,	// <3,4,4,3>: Cost 3 vmrglw <1,2,3,4>, <3,2,4,3>
+  1726336208U,	// <3,4,4,4>: Cost 2 vsldoi12 LHS, <4,4,4,4>
+  1618169142U,	// <3,4,4,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+  3362998858U,	// <3,4,4,6>: Cost 4 vmrglw <1,2,3,4>, <3,5,4,6>
+  2289257116U,	// <3,4,4,7>: Cost 3 vmrglw <1,2,3,4>, <3,6,4,7>
+  1618169385U,	// <3,4,4,u>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+  1557774438U,	// <3,4,5,0>: Cost 2 vsldoi4 <2,3,4,5>, LHS
+  2631516980U,	// <3,4,5,1>: Cost 3 vsldoi4 <2,3,4,5>, <1,1,1,1>
+  1557776078U,	// <3,4,5,2>: Cost 2 vsldoi4 <2,3,4,5>, <2,3,4,5>
+  2631518358U,	// <3,4,5,3>: Cost 3 vsldoi4 <2,3,4,5>, <3,0,1,2>
+  1557777718U,	// <3,4,5,4>: Cost 2 vsldoi4 <2,3,4,5>, RHS
+  2296563406U,	// <3,4,5,5>: Cost 3 vmrglw <2,4,3,5>, <2,3,4,5>
+  604818742U,	// <3,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+  2661381387U,	// <3,4,5,7>: Cost 3 vsldoi4 <7,3,4,5>, <7,3,4,5>
+  604818760U,	// <3,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+  3705266278U,	// <3,4,6,0>: Cost 4 vsldoi4 <2,3,4,6>, LHS
+  3831131482U,	// <3,4,6,1>: Cost 4 vsldoi12 LHS, <4,6,1,7>
+  2733715962U,	// <3,4,6,2>: Cost 3 vsldoi8 <u,2,3,4>, <6,2,7,3>
+  3844771180U,	// <3,4,6,3>: Cost 4 vsldoi12 <3,2,4,3>, <4,6,3,7>
+  2800078197U,	// <3,4,6,4>: Cost 3 vsldoi12 LHS, <4,6,4,7>
+  2236550454U,	// <3,4,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+  2733716280U,	// <3,4,6,6>: Cost 3 vsldoi8 <u,2,3,4>, <6,6,6,6>
+  2725090156U,	// <3,4,6,7>: Cost 3 vsldoi8 <6,7,3,4>, <6,7,3,4>
+  2236550697U,	// <3,4,6,u>: Cost 3 vmrghw <3,6,0,7>, RHS
+  2733716474U,	// <3,4,7,0>: Cost 3 vsldoi8 <u,2,3,4>, <7,0,1,2>
+  3371647013U,	// <3,4,7,1>: Cost 4 vmrglw <2,6,3,7>, <0,0,4,1>
+  2727744688U,	// <3,4,7,2>: Cost 3 vsldoi8 <7,2,3,4>, <7,2,3,4>
+  3371649364U,	// <3,4,7,3>: Cost 4 vmrglw <2,6,3,7>, <3,2,4,3>
+  2733716838U,	// <3,4,7,4>: Cost 3 vsldoi8 <u,2,3,4>, <7,4,5,6>
+  2297906894U,	// <3,4,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,5>
+  3371647180U,	// <3,4,7,6>: Cost 4 vmrglw <2,6,3,7>, <0,2,4,6>
+  2733717100U,	// <3,4,7,7>: Cost 3 vsldoi8 <u,2,3,4>, <7,7,7,7>
+  2297906897U,	// <3,4,7,u>: Cost 3 vmrglw <2,6,3,7>, <2,3,4,u>
+  1557799014U,	// <3,4,u,0>: Cost 2 vsldoi4 <2,3,4,u>, LHS
+  1618171694U,	// <3,4,u,1>: Cost 2 vsldoi8 <1,2,3,4>, LHS
+  1557800657U,	// <3,4,u,2>: Cost 2 vsldoi4 <2,3,4,u>, <2,3,4,u>
+  2691913660U,	// <3,4,u,3>: Cost 3 vsldoi8 <1,2,3,4>, <u,3,0,1>
+  1557802294U,	// <3,4,u,4>: Cost 2 vsldoi4 <2,3,4,u>, RHS
+  1618172058U,	// <3,4,u,5>: Cost 2 vsldoi8 <1,2,3,4>, RHS
+  604818985U,	// <3,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+  2661405966U,	// <3,4,u,7>: Cost 3 vsldoi4 <7,3,4,u>, <7,3,4,u>
+  604819003U,	// <3,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+  2643492966U,	// <3,5,0,0>: Cost 3 vsldoi4 <4,3,5,0>, LHS
+  2756947528U,	// <3,5,0,1>: Cost 3 vsldoi12 LHS, <5,0,1,2>
+  2331029019U,	// <3,5,0,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+  2643495062U,	// <3,5,0,3>: Cost 3 vsldoi4 <4,3,5,0>, <3,0,1,2>
+  2756947554U,	// <3,5,0,4>: Cost 3 vsldoi12 LHS, <5,0,4,1>
+  2800078443U,	// <3,5,0,5>: Cost 3 vsldoi12 LHS, <5,0,5,1>
+  2289224194U,	// <3,5,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,5,6>
+  3362964723U,	// <3,5,0,7>: Cost 4 vmrglw <1,2,3,0>, <1,6,5,7>
+  2756947590U,	// <3,5,0,u>: Cost 3 vsldoi12 LHS, <5,0,u,1>
+  2800078479U,	// <3,5,1,0>: Cost 3 vsldoi12 LHS, <5,1,0,1>
+  2333027218U,	// <3,5,1,1>: Cost 3 vmrglw <u,5,3,1>, <4,0,5,1>
+  2691916699U,	// <3,5,1,2>: Cost 3 vsldoi8 <1,2,3,5>, <1,2,3,5>
+  3832901294U,	// <3,5,1,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,1,3,5>
+  2800078519U,	// <3,5,1,4>: Cost 3 vsldoi12 LHS, <5,1,4,5>
+  3830689467U,	// <3,5,1,5>: Cost 4 vsldoi12 LHS, <5,1,5,0>
+  3830689481U,	// <3,5,1,6>: Cost 4 vsldoi12 LHS, <5,1,6,5>
+  3873820365U,	// <3,5,1,7>: Cost 4 vsldoi12 LHS, <5,1,7,0>
+  2800078551U,	// <3,5,1,u>: Cost 3 vsldoi12 LHS, <5,1,u,1>
+  3770967487U,	// <3,5,2,0>: Cost 4 vsldoi8 <2,1,3,5>, <2,0,1,4>
+  2697225763U,	// <3,5,2,1>: Cost 3 vsldoi8 <2,1,3,5>, <2,1,3,5>
+  3830689523U,	// <3,5,2,2>: Cost 4 vsldoi12 LHS, <5,2,2,2>
+  2699216590U,	// <3,5,2,3>: Cost 3 vsldoi8 <2,4,3,5>, <2,3,4,5>
+  2699216662U,	// <3,5,2,4>: Cost 3 vsldoi8 <2,4,3,5>, <2,4,3,5>
+  2783047439U,	// <3,5,2,5>: Cost 3 vsldoi12 <5,2,5,3>, <5,2,5,3>
+  2783121176U,	// <3,5,2,6>: Cost 3 vsldoi12 <5,2,6,3>, <5,2,6,3>
+  3856936737U,	// <3,5,2,7>: Cost 4 vsldoi12 <5,2,7,3>, <5,2,7,3>
+  2701871194U,	// <3,5,2,u>: Cost 3 vsldoi8 <2,u,3,5>, <2,u,3,5>
+  2643517542U,	// <3,5,3,0>: Cost 3 vsldoi4 <4,3,5,3>, LHS
+  2331052946U,	// <3,5,3,1>: Cost 3 vmrglw <u,2,3,3>, <4,0,5,1>
+  3699345010U,	// <3,5,3,2>: Cost 4 vsldoi4 <1,3,5,3>, <2,2,3,3>
+  2705189276U,	// <3,5,3,3>: Cost 3 vsldoi8 <3,4,3,5>, <3,3,3,3>
+  2705189359U,	// <3,5,3,4>: Cost 3 vsldoi8 <3,4,3,5>, <3,4,3,5>
+  2331053274U,	// <3,5,3,5>: Cost 3 vmrglw <u,2,3,3>, <4,4,5,5>
+  2295220738U,	// <3,5,3,6>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,6>
+  3368961267U,	// <3,5,3,7>: Cost 4 vmrglw <2,2,3,3>, <1,6,5,7>
+  2295220740U,	// <3,5,3,u>: Cost 3 vmrglw <2,2,3,3>, <3,4,5,u>
+  2643525734U,	// <3,5,4,0>: Cost 3 vsldoi4 <4,3,5,4>, LHS
+  2331061138U,	// <3,5,4,1>: Cost 3 vmrglw <u,2,3,4>, <4,0,5,1>
+  2235584280U,	// <3,5,4,2>: Cost 3 vmrghw <3,4,5,6>, <5,2,6,3>
+  2643528194U,	// <3,5,4,3>: Cost 3 vsldoi4 <4,3,5,4>, <3,4,5,6>
+  2735713498U,	// <3,5,4,4>: Cost 3 vsldoi8 <u,5,3,5>, <4,4,5,5>
+  2756947892U,	// <3,5,4,5>: Cost 3 vsldoi12 LHS, <5,4,5,6>
+  2289256962U,	// <3,5,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,5,6>
+  3362997491U,	// <3,5,4,7>: Cost 4 vmrglw <1,2,3,4>, <1,6,5,7>
+  2756947919U,	// <3,5,4,u>: Cost 3 vsldoi12 LHS, <5,4,u,6>
+  2800078803U,	// <3,5,5,0>: Cost 3 vsldoi12 LHS, <5,5,0,1>
+  2800078812U,	// <3,5,5,1>: Cost 3 vsldoi12 LHS, <5,5,1,1>
+  2631591639U,	// <3,5,5,2>: Cost 3 vsldoi4 <2,3,5,5>, <2,3,5,5>
+  3832901616U,	// <3,5,5,3>: Cost 4 vsldoi12 <1,2,5,3>, <5,5,3,3>
+  2800078843U,	// <3,5,5,4>: Cost 3 vsldoi12 LHS, <5,5,4,5>
+  1726337028U,	// <3,5,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+  2800078862U,	// <3,5,5,6>: Cost 3 vsldoi12 LHS, <5,5,6,6>
+  3368314099U,	// <3,5,5,7>: Cost 4 vmrglw <2,1,3,5>, <1,6,5,7>
+  1726337028U,	// <3,5,5,u>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+  2800078884U,	// <3,5,6,0>: Cost 3 vsldoi12 LHS, <5,6,0,1>
+  2800078899U,	// <3,5,6,1>: Cost 3 vsldoi12 LHS, <5,6,1,7>
+  2631599832U,	// <3,5,6,2>: Cost 3 vsldoi4 <2,3,5,6>, <2,3,5,6>
+  2800078914U,	// <3,5,6,3>: Cost 3 vsldoi12 LHS, <5,6,3,4>
+  2800078924U,	// <3,5,6,4>: Cost 3 vsldoi12 LHS, <5,6,4,5>
+  2800078935U,	// <3,5,6,5>: Cost 3 vsldoi12 LHS, <5,6,5,7>
+  2297235970U,	// <3,5,6,6>: Cost 3 vmrglw <2,5,3,6>, <3,4,5,6>
+  1726337122U,	// <3,5,6,7>: Cost 2 vsldoi12 LHS, <5,6,7,0>
+  1726337131U,	// <3,5,6,u>: Cost 2 vsldoi12 LHS, <5,6,u,0>
+  3699376230U,	// <3,5,7,0>: Cost 4 vsldoi4 <1,3,5,7>, LHS
+  2333739922U,	// <3,5,7,1>: Cost 3 vmrglw <u,6,3,7>, <4,0,5,1>
+  3699378106U,	// <3,5,7,2>: Cost 4 vsldoi4 <1,3,5,7>, <2,6,3,7>
+  3371647915U,	// <3,5,7,3>: Cost 4 vmrglw <2,6,3,7>, <1,2,5,3>
+  3699379510U,	// <3,5,7,4>: Cost 4 vsldoi4 <1,3,5,7>, RHS
+  2333740250U,	// <3,5,7,5>: Cost 3 vmrglw <u,6,3,7>, <4,4,5,5>
+  2297907714U,	// <3,5,7,6>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,6>
+  3370984691U,	// <3,5,7,7>: Cost 4 vmrglw <2,5,3,7>, <1,6,5,7>
+  2297907716U,	// <3,5,7,u>: Cost 3 vmrglw <2,6,3,7>, <3,4,5,u>
+  2800079046U,	// <3,5,u,0>: Cost 3 vsldoi12 LHS, <5,u,0,1>
+  2756948176U,	// <3,5,u,1>: Cost 3 vsldoi12 LHS, <5,u,1,2>
+  2331029019U,	// <3,5,u,2>: Cost 3 vmrglw <u,2,3,0>, <4,u,5,2>
+  2800079076U,	// <3,5,u,3>: Cost 3 vsldoi12 LHS, <5,u,3,4>
+  2800079085U,	// <3,5,u,4>: Cost 3 vsldoi12 LHS, <5,u,4,4>
+  1726337028U,	// <3,5,u,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+  2289289730U,	// <3,5,u,6>: Cost 3 vmrglw <1,2,3,u>, <3,4,5,6>
+  1726337284U,	// <3,5,u,7>: Cost 2 vsldoi12 LHS, <5,u,7,0>
+  1726337293U,	// <3,5,u,u>: Cost 2 vsldoi12 LHS, <5,u,u,0>
+  3773628416U,	// <3,6,0,0>: Cost 4 vsldoi8 <2,5,3,6>, <0,0,0,0>
+  2699886694U,	// <3,6,0,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+  2789167401U,	// <3,6,0,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,0,2,1>
+  3362965862U,	// <3,6,0,3>: Cost 4 vmrglw <1,2,3,0>, <3,2,6,3>
+  3773628754U,	// <3,6,0,4>: Cost 4 vsldoi8 <2,5,3,6>, <0,4,1,5>
+  3723284326U,	// <3,6,0,5>: Cost 4 vsldoi4 <5,3,6,0>, <5,3,6,0>
+  2800079181U,	// <3,6,0,6>: Cost 3 vsldoi12 LHS, <6,0,6,1>
+  1215483190U,	// <3,6,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+  1215483191U,	// <3,6,0,u>: Cost 2 vmrglw <1,2,3,0>, RHS
+  3873821032U,	// <3,6,1,0>: Cost 4 vsldoi12 LHS, <6,1,0,1>
+  3773629236U,	// <3,6,1,1>: Cost 4 vsldoi8 <2,5,3,6>, <1,1,1,1>
+  2691924892U,	// <3,6,1,2>: Cost 3 vsldoi8 <1,2,3,6>, <1,2,3,6>
+  3830690184U,	// <3,6,1,3>: Cost 5 vsldoi12 LHS, <6,1,3,6>
+  3873821072U,	// <3,6,1,4>: Cost 4 vsldoi12 LHS, <6,1,4,5>
+  3873821082U,	// <3,6,1,5>: Cost 4 vsldoi12 LHS, <6,1,5,6>
+  3403453240U,	// <3,6,1,6>: Cost 4 vmrglw <u,0,3,1>, <6,6,6,6>
+  2289233206U,	// <3,6,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+  2289233207U,	// <3,6,1,u>: Cost 3 vmrglw <1,2,3,1>, RHS
+  2661498982U,	// <3,6,2,0>: Cost 3 vsldoi4 <7,3,6,2>, LHS
+  3770975780U,	// <3,6,2,1>: Cost 4 vsldoi8 <2,1,3,6>, <2,1,3,6>
+  2631640797U,	// <3,6,2,2>: Cost 3 vsldoi4 <2,3,6,2>, <2,3,6,2>
+  3771639485U,	// <3,6,2,3>: Cost 4 vsldoi8 <2,2,3,6>, <2,3,2,6>
+  2661502262U,	// <3,6,2,4>: Cost 3 vsldoi4 <7,3,6,2>, RHS
+  2699888488U,	// <3,6,2,5>: Cost 3 vsldoi8 <2,5,3,6>, <2,5,3,6>
+  2661503482U,	// <3,6,2,6>: Cost 3 vsldoi4 <7,3,6,2>, <6,2,7,3>
+  1715425786U,	// <3,6,2,7>: Cost 2 vsldoi12 <6,2,7,3>, <6,2,7,3>
+  1715499523U,	// <3,6,2,u>: Cost 2 vsldoi12 <6,2,u,3>, <6,2,u,3>
+  3773630614U,	// <3,6,3,0>: Cost 4 vsldoi8 <2,5,3,6>, <3,0,1,2>
+  3372942825U,	// <3,6,3,1>: Cost 4 vmrglw <2,u,3,3>, <2,0,6,1>
+  2234749434U,	// <3,6,3,2>: Cost 3 vmrghw <3,3,3,3>, <6,2,7,3>
+  3368962406U,	// <3,6,3,3>: Cost 4 vmrglw <2,2,3,3>, <3,2,6,3>
+  2699889154U,	// <3,6,3,4>: Cost 3 vsldoi8 <2,5,3,6>, <3,4,5,6>
+  3773631068U,	// <3,6,3,5>: Cost 4 vsldoi8 <2,5,3,6>, <3,5,6,6>
+  2331054904U,	// <3,6,3,6>: Cost 3 vmrglw <u,2,3,3>, <6,6,6,6>
+  1221479734U,	// <3,6,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+  1221479735U,	// <3,6,3,u>: Cost 2 vmrglw <2,2,3,3>, RHS
+  2235584801U,	// <3,6,4,0>: Cost 3 vmrghw <3,4,5,6>, <6,0,1,2>
+  3717342106U,	// <3,6,4,1>: Cost 4 vsldoi4 <4,3,6,4>, <1,2,3,4>
+  2789167729U,	// <3,6,4,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,4,2,5>
+  2235585074U,	// <3,6,4,3>: Cost 3 vmrghw <3,4,5,6>, <6,3,4,5>
+  2235585165U,	// <3,6,4,4>: Cost 3 vmrghw <3,4,5,6>, <6,4,5,6>
+  2699889974U,	// <3,6,4,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+  2800079509U,	// <3,6,4,6>: Cost 3 vsldoi12 LHS, <6,4,6,5>
+  1215515958U,	// <3,6,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+  1215515959U,	// <3,6,4,u>: Cost 2 vmrglw <1,2,3,4>, RHS
+  3873821356U,	// <3,6,5,0>: Cost 4 vsldoi12 LHS, <6,5,0,1>
+  3372959209U,	// <3,6,5,1>: Cost 5 vmrglw <2,u,3,5>, <2,0,6,1>
+  3862909629U,	// <3,6,5,2>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,2,0>
+  3773632358U,	// <3,6,5,3>: Cost 4 vsldoi8 <2,5,3,6>, <5,3,6,0>
+  3873821396U,	// <3,6,5,4>: Cost 4 vsldoi12 LHS, <6,5,4,5>
+  3873821405U,	// <3,6,5,5>: Cost 4 vsldoi12 LHS, <6,5,5,5>
+  3862909672U,	// <3,6,5,6>: Cost 4 vsldoi12 <6,2,7,3>, <6,5,6,7>
+  2294574390U,	// <3,6,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+  2294574391U,	// <3,6,5,u>: Cost 3 vmrglw <2,1,3,5>, RHS
+  2800079613U,	// <3,6,6,0>: Cost 3 vsldoi12 LHS, <6,6,0,1>
+  3873821446U,	// <3,6,6,1>: Cost 4 vsldoi12 LHS, <6,6,1,1>
+  2789167888U,	// <3,6,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,6,2,2>
+  3844920090U,	// <3,6,6,3>: Cost 4 vsldoi12 <3,2,6,3>, <6,6,3,3>
+  2800079653U,	// <3,6,6,4>: Cost 3 vsldoi12 LHS, <6,6,4,5>
+  3723333484U,	// <3,6,6,5>: Cost 4 vsldoi4 <5,3,6,6>, <5,3,6,6>
+  1726337848U,	// <3,6,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+  1726337858U,	// <3,6,6,7>: Cost 2 vsldoi12 LHS, <6,6,7,7>
+  1726337867U,	// <3,6,6,u>: Cost 2 vsldoi12 LHS, <6,6,u,7>
+  1726337870U,	// <3,6,7,0>: Cost 2 vsldoi12 LHS, <6,7,0,1>
+  2297906665U,	// <3,6,7,1>: Cost 3 vmrglw <2,6,3,7>, <2,0,6,1>
+  2792117090U,	// <3,6,7,2>: Cost 3 vsldoi12 <6,7,2,3>, <6,7,2,3>
+  2297907558U,	// <3,6,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,6,3>
+  1726337910U,	// <3,6,7,4>: Cost 2 vsldoi12 LHS, <6,7,4,5>
+  2297906993U,	// <3,6,7,5>: Cost 3 vmrglw <2,6,3,7>, <2,4,6,5>
+  2297906832U,	// <3,6,7,6>: Cost 3 vmrglw <2,6,3,7>, <2,2,6,6>
+  1224166710U,	// <3,6,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+  1224166711U,	// <3,6,7,u>: Cost 2 vmrglw <2,6,3,7>, RHS
+  1726337951U,	// <3,6,u,0>: Cost 2 vsldoi12 LHS, <6,u,0,1>
+  2699892526U,	// <3,6,u,1>: Cost 3 vsldoi8 <2,5,3,6>, LHS
+  2789168049U,	// <3,6,u,2>: Cost 3 vsldoi12 <6,2,7,3>, <6,u,2,1>
+  2792854460U,	// <3,6,u,3>: Cost 3 vsldoi12 <6,u,3,3>, <6,u,3,3>
+  1726337991U,	// <3,6,u,4>: Cost 2 vsldoi12 LHS, <6,u,4,5>
+  2699892890U,	// <3,6,u,5>: Cost 3 vsldoi8 <2,5,3,6>, RHS
+  1726337848U,	// <3,6,u,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+  1215548726U,	// <3,6,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+  1215548727U,	// <3,6,u,u>: Cost 2 vmrglw <1,2,3,u>, RHS
+  2700558336U,	// <3,7,0,0>: Cost 3 vsldoi8 <2,6,3,7>, <0,0,0,0>
+  1626816614U,	// <3,7,0,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+  2700558513U,	// <3,7,0,2>: Cost 3 vsldoi8 <2,6,3,7>, <0,2,1,6>
+  2331030010U,	// <3,7,0,3>: Cost 3 vmrglw <u,2,3,0>, <6,2,7,3>
+  2700558674U,	// <3,7,0,4>: Cost 3 vsldoi8 <2,6,3,7>, <0,4,1,5>
+  2800079906U,	// <3,7,0,5>: Cost 3 vsldoi12 LHS, <7,0,5,6>
+  2655588936U,	// <3,7,0,6>: Cost 3 vsldoi4 <6,3,7,0>, <6,3,7,0>
+  2800079919U,	// <3,7,0,7>: Cost 3 vsldoi12 LHS, <7,0,7,1>
+  1626817181U,	// <3,7,0,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+  3774300899U,	// <3,7,1,0>: Cost 4 vsldoi8 <2,6,3,7>, <1,0,1,1>
+  2700559156U,	// <3,7,1,1>: Cost 3 vsldoi8 <2,6,3,7>, <1,1,1,1>
+  2700559254U,	// <3,7,1,2>: Cost 3 vsldoi8 <2,6,3,7>, <1,2,3,0>
+  3774301148U,	// <3,7,1,3>: Cost 4 vsldoi8 <2,6,3,7>, <1,3,1,7>
+  3774301227U,	// <3,7,1,4>: Cost 4 vsldoi8 <2,6,3,7>, <1,4,1,5>
+  3774301295U,	// <3,7,1,5>: Cost 4 vsldoi8 <2,6,3,7>, <1,5,0,1>
+  3768329441U,	// <3,7,1,6>: Cost 4 vsldoi8 <1,6,3,7>, <1,6,3,7>
+  3403453250U,	// <3,7,1,7>: Cost 4 vmrglw <u,0,3,1>, <6,6,7,7>
+  2700559740U,	// <3,7,1,u>: Cost 3 vsldoi8 <2,6,3,7>, <1,u,3,0>
+  2700559849U,	// <3,7,2,0>: Cost 3 vsldoi8 <2,6,3,7>, <2,0,6,1>
+  3770983973U,	// <3,7,2,1>: Cost 4 vsldoi8 <2,1,3,7>, <2,1,3,7>
+  2700559976U,	// <3,7,2,2>: Cost 3 vsldoi8 <2,6,3,7>, <2,2,2,2>
+  2698569415U,	// <3,7,2,3>: Cost 3 vsldoi8 <2,3,3,7>, <2,3,3,7>
+  2700560177U,	// <3,7,2,4>: Cost 3 vsldoi8 <2,6,3,7>, <2,4,6,5>
+  3773638505U,	// <3,7,2,5>: Cost 4 vsldoi8 <2,5,3,7>, <2,5,3,7>
+  1626818490U,	// <3,7,2,6>: Cost 2 vsldoi8 <2,6,3,7>, <2,6,3,7>
+  2795140307U,	// <3,7,2,7>: Cost 3 vsldoi12 <7,2,7,3>, <7,2,7,3>
+  1628145756U,	// <3,7,2,u>: Cost 2 vsldoi8 <2,u,3,7>, <2,u,3,7>
+  2700560534U,	// <3,7,3,0>: Cost 3 vsldoi8 <2,6,3,7>, <3,0,1,2>
+  3774302438U,	// <3,7,3,1>: Cost 4 vsldoi8 <2,6,3,7>, <3,1,1,1>
+  2700560742U,	// <3,7,3,2>: Cost 3 vsldoi8 <2,6,3,7>, <3,2,6,3>
+  2700560796U,	// <3,7,3,3>: Cost 3 vsldoi8 <2,6,3,7>, <3,3,3,3>
+  2700560898U,	// <3,7,3,4>: Cost 3 vsldoi8 <2,6,3,7>, <3,4,5,6>
+  3774302821U,	// <3,7,3,5>: Cost 4 vsldoi8 <2,6,3,7>, <3,5,7,6>
+  2700561079U,	// <3,7,3,6>: Cost 3 vsldoi8 <2,6,3,7>, <3,6,7,7>
+  2700561091U,	// <3,7,3,7>: Cost 3 vsldoi8 <2,6,3,7>, <3,7,0,1>
+  2700561182U,	// <3,7,3,u>: Cost 3 vsldoi8 <2,6,3,7>, <3,u,1,2>
+  2655617126U,	// <3,7,4,0>: Cost 3 vsldoi4 <6,3,7,4>, LHS
+  3774303178U,	// <3,7,4,1>: Cost 4 vsldoi8 <2,6,3,7>, <4,1,2,3>
+  2655619002U,	// <3,7,4,2>: Cost 3 vsldoi4 <6,3,7,4>, <2,6,3,7>
+  2331062778U,	// <3,7,4,3>: Cost 3 vmrglw <u,2,3,4>, <6,2,7,3>
+  2655620406U,	// <3,7,4,4>: Cost 3 vsldoi4 <6,3,7,4>, RHS
+  1626819894U,	// <3,7,4,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+  2655621708U,	// <3,7,4,6>: Cost 3 vsldoi4 <6,3,7,4>, <6,3,7,4>
+  2800080247U,	// <3,7,4,7>: Cost 3 vsldoi12 LHS, <7,4,7,5>
+  1626820137U,	// <3,7,4,u>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+  3774303816U,	// <3,7,5,0>: Cost 4 vsldoi8 <2,6,3,7>, <5,0,1,2>
+  3873822093U,	// <3,7,5,1>: Cost 4 vsldoi12 LHS, <7,5,1,0>
+  3774303998U,	// <3,7,5,2>: Cost 4 vsldoi8 <2,6,3,7>, <5,2,3,4>
+  3862910368U,	// <3,7,5,3>: Cost 4 vsldoi12 <6,2,7,3>, <7,5,3,1>
+  3774304180U,	// <3,7,5,4>: Cost 4 vsldoi8 <2,6,3,7>, <5,4,5,6>
+  2800080310U,	// <3,7,5,5>: Cost 3 vsldoi12 LHS, <7,5,5,5>
+  2800080321U,	// <3,7,5,6>: Cost 3 vsldoi12 LHS, <7,5,6,7>
+  3873822147U,	// <3,7,5,7>: Cost 4 vsldoi12 LHS, <7,5,7,0>
+  2800080339U,	// <3,7,5,u>: Cost 3 vsldoi12 LHS, <7,5,u,7>
+  2800080348U,	// <3,7,6,0>: Cost 3 vsldoi12 LHS, <7,6,0,7>
+  3873822181U,	// <3,7,6,1>: Cost 4 vsldoi12 LHS, <7,6,1,7>
+  2789168622U,	// <3,7,6,2>: Cost 3 vsldoi12 <6,2,7,3>, <7,6,2,7>
+  2700563016U,	// <3,7,6,3>: Cost 3 vsldoi8 <2,6,3,7>, <6,3,7,0>
+  2800080384U,	// <3,7,6,4>: Cost 3 vsldoi12 LHS, <7,6,4,7>
+  3862910472U,	// <3,7,6,5>: Cost 4 vsldoi12 <6,2,7,3>, <7,6,5,6>
+  2700563256U,	// <3,7,6,6>: Cost 3 vsldoi8 <2,6,3,7>, <6,6,6,6>
+  2800080404U,	// <3,7,6,7>: Cost 3 vsldoi12 LHS, <7,6,7,0>
+  2793149988U,	// <3,7,6,u>: Cost 3 vsldoi12 <6,u,7,3>, <7,6,u,7>
+  2637725798U,	// <3,7,7,0>: Cost 3 vsldoi4 <3,3,7,7>, LHS
+  3371649227U,	// <3,7,7,1>: Cost 4 vmrglw <2,6,3,7>, <3,0,7,1>
+  2637727674U,	// <3,7,7,2>: Cost 3 vsldoi4 <3,3,7,7>, <2,6,3,7>
+  2297907567U,	// <3,7,7,3>: Cost 3 vmrglw <2,6,3,7>, <3,2,7,3>
+  2637729078U,	// <3,7,7,4>: Cost 3 vsldoi4 <3,3,7,7>, RHS
+  3371649312U,	// <3,7,7,5>: Cost 4 vmrglw <2,6,3,7>, <3,1,7,5>
+  2655646287U,	// <3,7,7,6>: Cost 3 vsldoi4 <6,3,7,7>, <6,3,7,7>
+  1726338668U,	// <3,7,7,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+  1726338668U,	// <3,7,7,u>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+  2700564179U,	// <3,7,u,0>: Cost 3 vsldoi8 <2,6,3,7>, <u,0,1,2>
+  1626822446U,	// <3,7,u,1>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+  2700564357U,	// <3,7,u,2>: Cost 3 vsldoi8 <2,6,3,7>, <u,2,3,0>
+  2700564412U,	// <3,7,u,3>: Cost 3 vsldoi8 <2,6,3,7>, <u,3,0,1>
+  2700564543U,	// <3,7,u,4>: Cost 3 vsldoi8 <2,6,3,7>, <u,4,5,6>
+  1626822810U,	// <3,7,u,5>: Cost 2 vsldoi8 <2,6,3,7>, RHS
+  1662654672U,	// <3,7,u,6>: Cost 2 vsldoi8 <u,6,3,7>, <u,6,3,7>
+  1726338668U,	// <3,7,u,7>: Cost 2 vsldoi12 LHS, <7,7,7,7>
+  1626823013U,	// <3,7,u,u>: Cost 2 vsldoi8 <2,6,3,7>, LHS
+  1678557184U,	// <3,u,0,0>: Cost 2 vsldoi12 LHS, <0,0,0,0>
+  1679005395U,	// <3,u,0,1>: Cost 2 vsldoi12 LHS, <u,0,1,2>
+  2289221787U,	// <3,u,0,2>: Cost 3 vmrglw <1,2,3,0>, <0,1,u,2>
+  1215479964U,	// <3,u,0,3>: Cost 2 vmrglw <1,2,3,0>, LHS
+  2752747245U,	// <3,u,0,4>: Cost 3 vsldoi12 LHS, <u,0,4,1>
+  1158863002U,	// <3,u,0,5>: Cost 2 vmrghw <3,0,1,2>, RHS
+  2289224221U,	// <3,u,0,6>: Cost 3 vmrglw <1,2,3,0>, <3,4,u,6>
+  1215483208U,	// <3,u,0,7>: Cost 2 vmrglw <1,2,3,0>, RHS
+  1679005458U,	// <3,u,0,u>: Cost 2 vsldoi12 LHS, <u,0,u,2>
+  1558036582U,	// <3,u,1,0>: Cost 2 vsldoi4 <2,3,u,1>, LHS
+  1678558004U,	// <3,u,1,1>: Cost 2 vsldoi12 LHS, <1,1,1,1>
+  604821294U,	// <3,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+  2752747317U,	// <3,u,1,3>: Cost 3 vsldoi12 LHS, <u,1,3,1>
+  1558039862U,	// <3,u,1,4>: Cost 2 vsldoi4 <2,3,u,1>, RHS
+  2756949830U,	// <3,u,1,5>: Cost 3 vsldoi12 LHS, <u,1,5,0>
+  2800080726U,	// <3,u,1,6>: Cost 3 vsldoi12 LHS, <u,1,6,7>
+  2289233224U,	// <3,u,1,7>: Cost 3 vmrglw <1,2,3,1>, RHS
+  604821348U,	// <3,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+  2696586709U,	// <3,u,2,0>: Cost 3 vsldoi8 <2,0,3,u>, <2,0,3,u>
+  2757392246U,	// <3,u,2,1>: Cost 3 vsldoi12 LHS, <u,2,1,3>
+  1624172151U,	// <3,u,2,2>: Cost 2 vsldoi8 <2,2,3,u>, <2,2,3,u>
+  1679005576U,	// <3,u,2,3>: Cost 2 vsldoi12 LHS, <u,2,3,3>
+  2631789878U,	// <3,u,2,4>: Cost 3 vsldoi4 <2,3,u,2>, RHS
+  2699904874U,	// <3,u,2,5>: Cost 3 vsldoi8 <2,5,3,u>, <2,5,3,u>
+  1626826683U,	// <3,u,2,6>: Cost 2 vsldoi8 <2,6,3,u>, <2,6,3,u>
+  1726338988U,	// <3,u,2,7>: Cost 2 vsldoi12 LHS, <u,2,7,3>
+  1683208117U,	// <3,u,2,u>: Cost 2 vsldoi12 LHS, <u,2,u,3>
+  1679005628U,	// <3,u,3,0>: Cost 2 vsldoi12 LHS, <u,3,0,1>
+  1161008942U,	// <3,u,3,1>: Cost 2 vmrghw <3,3,3,3>, LHS
+  2752747471U,	// <3,u,3,2>: Cost 3 vsldoi12 LHS, <u,3,2,2>
+  403488870U,	// <3,u,3,3>: Cost 1 vspltisw3 LHS
+  1679005668U,	// <3,u,3,4>: Cost 2 vsldoi12 LHS, <u,3,4,5>
+  1161009306U,	// <3,u,3,5>: Cost 2 vmrghw <3,3,3,3>, RHS
+  2691943104U,	// <3,u,3,6>: Cost 3 vsldoi8 <1,2,3,u>, <3,6,u,7>
+  1221479752U,	// <3,u,3,7>: Cost 2 vmrglw <2,2,3,3>, RHS
+  403488870U,	// <3,u,3,u>: Cost 1 vspltisw3 LHS
+  2289255363U,	// <3,u,4,0>: Cost 3 vmrglw <1,2,3,4>, <1,2,u,0>
+  1161844526U,	// <3,u,4,1>: Cost 2 vmrghw <3,4,5,6>, LHS
+  2289256661U,	// <3,u,4,2>: Cost 3 vmrglw <1,2,3,4>, <3,0,u,2>
+  1215512732U,	// <3,u,4,3>: Cost 2 vmrglw <1,2,3,4>, LHS
+  1215513498U,	// <3,u,4,4>: Cost 2 vmrglw <1,2,3,4>, <1,2,3,4>
+  1679005759U,	// <3,u,4,5>: Cost 2 vsldoi12 LHS, <u,4,5,6>
+  2289256989U,	// <3,u,4,6>: Cost 3 vmrglw <1,2,3,4>, <3,4,u,6>
+  1215515976U,	// <3,u,4,7>: Cost 2 vmrglw <1,2,3,4>, RHS
+  1679005786U,	// <3,u,4,u>: Cost 2 vsldoi12 LHS, <u,4,u,6>
+  1558069350U,	// <3,u,5,0>: Cost 2 vsldoi4 <2,3,u,5>, LHS
+  2631811892U,	// <3,u,5,1>: Cost 3 vsldoi4 <2,3,u,5>, <1,1,1,1>
+  1558071026U,	// <3,u,5,2>: Cost 2 vsldoi4 <2,3,u,5>, <2,3,u,5>
+  2752747646U,	// <3,u,5,3>: Cost 3 vsldoi12 LHS, <u,5,3,6>
+  1558072630U,	// <3,u,5,4>: Cost 2 vsldoi4 <2,3,u,5>, RHS
+  1726337028U,	// <3,u,5,5>: Cost 2 vsldoi12 LHS, <5,5,5,5>
+  604821658U,	// <3,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+  2294574408U,	// <3,u,5,7>: Cost 3 vmrglw <2,1,3,5>, RHS
+  604821676U,	// <3,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+  2631819366U,	// <3,u,6,0>: Cost 3 vsldoi4 <2,3,u,6>, LHS
+  2757392574U,	// <3,u,6,1>: Cost 3 vsldoi12 LHS, <u,6,1,7>
+  2631821043U,	// <3,u,6,2>: Cost 3 vsldoi4 <2,3,u,6>, <2,3,u,6>
+  1679005904U,	// <3,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+  2631822646U,	// <3,u,6,4>: Cost 3 vsldoi4 <2,3,u,6>, RHS
+  2236553370U,	// <3,u,6,5>: Cost 3 vmrghw <3,6,0,7>, RHS
+  1726337848U,	// <3,u,6,6>: Cost 2 vsldoi12 LHS, <6,6,6,6>
+  1726339309U,	// <3,u,6,7>: Cost 2 vsldoi12 LHS, <u,6,7,0>
+  1683208445U,	// <3,u,6,u>: Cost 2 vsldoi12 LHS, <u,6,u,7>
+  1726339328U,	// <3,u,7,0>: Cost 2 vsldoi12 LHS, <u,7,0,1>
+  2297905225U,	// <3,u,7,1>: Cost 3 vmrglw <2,6,3,7>, <0,0,u,1>
+  2631829236U,	// <3,u,7,2>: Cost 3 vsldoi4 <2,3,u,7>, <2,3,u,7>
+  1224163484U,	// <3,u,7,3>: Cost 2 vmrglw <2,6,3,7>, LHS
+  1726339368U,	// <3,u,7,4>: Cost 2 vsldoi12 LHS, <u,7,4,5>
+  2297905553U,	// <3,u,7,5>: Cost 3 vmrglw <2,6,3,7>, <0,4,u,5>
+  2297905392U,	// <3,u,7,6>: Cost 3 vmrglw <2,6,3,7>, <0,2,u,6>
+  1224166728U,	// <3,u,7,7>: Cost 2 vmrglw <2,6,3,7>, RHS
+  1224163489U,	// <3,u,7,u>: Cost 2 vmrglw <2,6,3,7>, LHS
+  1683208529U,	// <3,u,u,0>: Cost 2 vsldoi12 LHS, <u,u,0,1>
+  1679006043U,	// <3,u,u,1>: Cost 2 vsldoi12 LHS, <u,u,1,2>
+  604821861U,	// <3,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+  403488870U,	// <3,u,u,3>: Cost 1 vspltisw3 LHS
+  1683208569U,	// <3,u,u,4>: Cost 2 vsldoi12 LHS, <u,u,4,5>
+  1679006083U,	// <3,u,u,5>: Cost 2 vsldoi12 LHS, <u,u,5,6>
+  604821901U,	// <3,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+  1215548744U,	// <3,u,u,7>: Cost 2 vmrglw <1,2,3,u>, RHS
+  604821915U,	// <3,u,u,u>: Cost 1 vsldoi12 LHS, LHS
+  2759016448U,	// <4,0,0,0>: Cost 3 vsldoi12 <1,2,3,4>, <0,0,0,0>
+  1165115494U,	// <4,0,0,1>: Cost 2 vmrghw <4,0,5,1>, LHS
+  3717531337U,	// <4,0,0,2>: Cost 4 vsldoi4 <4,4,0,0>, <2,3,4,0>
+  3369675785U,	// <4,0,0,3>: Cost 4 vmrglw <2,3,4,0>, <4,2,0,3>
+  2751791144U,	// <4,0,0,4>: Cost 3 vsldoi12 <0,0,4,4>, <0,0,4,4>
+  2238857630U,	// <4,0,0,5>: Cost 3 vmrghw <4,0,5,1>, <0,5,1,0>
+  3312591341U,	// <4,0,0,6>: Cost 4 vmrghw <4,0,5,0>, <0,6,0,7>
+  3369676113U,	// <4,0,0,7>: Cost 4 vmrglw <2,3,4,0>, <4,6,0,7>
+  1165116061U,	// <4,0,0,u>: Cost 2 vmrghw <4,0,5,1>, LHS
+  2637824102U,	// <4,0,1,0>: Cost 3 vsldoi4 <3,4,0,1>, LHS
+  2637824922U,	// <4,0,1,1>: Cost 3 vsldoi4 <3,4,0,1>, <1,2,3,4>
+  1685274726U,	// <4,0,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+  2637826512U,	// <4,0,1,3>: Cost 3 vsldoi4 <3,4,0,1>, <3,4,0,1>
+  2637827382U,	// <4,0,1,4>: Cost 3 vsldoi4 <3,4,0,1>, RHS
+  2661716070U,	// <4,0,1,5>: Cost 3 vsldoi4 <7,4,0,1>, <5,6,7,4>
+  3729486427U,	// <4,0,1,6>: Cost 4 vsldoi4 <6,4,0,1>, <6,4,0,1>
+  2661717300U,	// <4,0,1,7>: Cost 3 vsldoi4 <7,4,0,1>, <7,4,0,1>
+  1685274780U,	// <4,0,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+  3711574118U,	// <4,0,2,0>: Cost 4 vsldoi4 <3,4,0,2>, LHS
+  2240200806U,	// <4,0,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+  3771663992U,	// <4,0,2,2>: Cost 4 vsldoi8 <2,2,4,0>, <2,2,4,0>
+  2698585801U,	// <4,0,2,3>: Cost 3 vsldoi8 <2,3,4,0>, <2,3,4,0>
+  3373672105U,	// <4,0,2,4>: Cost 4 vmrglw <3,0,4,2>, <2,3,0,4>
+  3810813795U,	// <4,0,2,5>: Cost 4 vsldoi8 <u,7,4,0>, <2,5,3,1>
+  3772327866U,	// <4,0,2,6>: Cost 4 vsldoi8 <2,3,4,0>, <2,6,3,7>
+  3386280568U,	// <4,0,2,7>: Cost 5 vmrglw <5,1,4,2>, <3,6,0,7>
+  2701903966U,	// <4,0,2,u>: Cost 3 vsldoi8 <2,u,4,0>, <2,u,4,0>
+  3699638374U,	// <4,0,3,0>: Cost 4 vsldoi4 <1,4,0,3>, LHS
+  2753560832U,	// <4,0,3,1>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+  3772328276U,	// <4,0,3,2>: Cost 4 vsldoi8 <2,3,4,0>, <3,2,4,3>
+  3827302674U,	// <4,0,3,3>: Cost 4 vsldoi12 <0,3,1,4>, <0,3,3,4>
+  3699641654U,	// <4,0,3,4>: Cost 4 vsldoi4 <1,4,0,3>, RHS
+  3779627588U,	// <4,0,3,5>: Cost 4 vsldoi8 <3,5,4,0>, <3,5,4,0>
+  3772328604U,	// <4,0,3,6>: Cost 4 vsldoi8 <2,3,4,0>, <3,6,4,7>
+  3780954854U,	// <4,0,3,7>: Cost 4 vsldoi8 <3,7,4,0>, <3,7,4,0>
+  2753560832U,	// <4,0,3,u>: Cost 3 vsldoi12 <0,3,1,4>, <0,3,1,4>
+  2725129106U,	// <4,0,4,0>: Cost 3 vsldoi8 <6,7,4,0>, <4,0,5,1>
+  1167720550U,	// <4,0,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+  3839172953U,	// <4,0,4,2>: Cost 4 vsldoi12 <2,3,0,4>, <0,4,2,3>
+  3772329051U,	// <4,0,4,3>: Cost 4 vsldoi8 <2,3,4,0>, <4,3,0,4>
+  2241462610U,	// <4,0,4,4>: Cost 3 vmrghw <4,4,4,4>, <0,4,1,5>
+  2698587446U,	// <4,0,4,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+  3772329297U,	// <4,0,4,6>: Cost 4 vsldoi8 <2,3,4,0>, <4,6,0,7>
+  3735483703U,	// <4,0,4,7>: Cost 4 vsldoi4 <7,4,0,4>, <7,4,0,4>
+  1167721117U,	// <4,0,4,u>: Cost 2 vmrghw <4,4,4,4>, LHS
+  1168556032U,	// <4,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+  94814310U,	// <4,0,5,1>: Cost 1 vmrghw RHS, LHS
+  2242298029U,	// <4,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+  2637859284U,	// <4,0,5,3>: Cost 3 vsldoi4 <3,4,0,5>, <3,4,0,5>
+  1168556370U,	// <4,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+  2242306530U,	// <4,0,5,5>: Cost 3 vmrghw RHS, <0,5,u,5>
+  2242298358U,	// <4,0,5,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+  2661750072U,	// <4,0,5,7>: Cost 3 vsldoi4 <7,4,0,5>, <7,4,0,5>
+  94814877U,	// <4,0,5,u>: Cost 1 vmrghw RHS, LHS
+  3316580362U,	// <4,0,6,0>: Cost 4 vmrghw <4,6,5,1>, <0,0,1,1>
+  2242846822U,	// <4,0,6,1>: Cost 3 vmrghw <4,6,5,2>, LHS
+  3798872570U,	// <4,0,6,2>: Cost 4 vsldoi8 <6,7,4,0>, <6,2,7,3>
+  3796218413U,	// <4,0,6,3>: Cost 4 vsldoi8 <6,3,4,0>, <6,3,4,0>
+  3834528273U,	// <4,0,6,4>: Cost 4 vsldoi12 <1,5,0,4>, <0,6,4,7>
+  3798872811U,	// <4,0,6,5>: Cost 4 vsldoi8 <6,7,4,0>, <6,5,7,1>
+  3316621876U,	// <4,0,6,6>: Cost 4 vmrghw <4,6,5,6>, <0,6,u,6>
+  2725131121U,	// <4,0,6,7>: Cost 3 vsldoi8 <6,7,4,0>, <6,7,4,0>
+  2242847389U,	// <4,0,6,u>: Cost 3 vmrghw <4,6,5,2>, LHS
+  3377692672U,	// <4,0,7,0>: Cost 4 vmrglw <3,6,4,7>, <0,0,0,0>
+  2243493990U,	// <4,0,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+  3775648970U,	// <4,0,7,2>: Cost 5 vsldoi8 <2,u,4,0>, <7,2,6,3>
+  3802191110U,	// <4,0,7,3>: Cost 4 vsldoi8 <7,3,4,0>, <7,3,4,0>
+  3317236050U,	// <4,0,7,4>: Cost 4 vmrghw <4,7,5,0>, <0,4,1,5>
+  3803518376U,	// <4,0,7,5>: Cost 4 vsldoi8 <7,5,4,0>, <7,5,4,0>
+  3317236214U,	// <4,0,7,6>: Cost 5 vmrghw <4,7,5,0>, <0,6,1,7>
+  3798873708U,	// <4,0,7,7>: Cost 4 vsldoi8 <6,7,4,0>, <7,7,7,7>
+  2243494557U,	// <4,0,7,u>: Cost 3 vmrghw <4,7,5,0>, LHS
+  1170546688U,	// <4,0,u,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+  96804966U,	// <4,0,u,1>: Cost 1 vmrghw RHS, LHS
+  1685275293U,	// <4,0,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+  2637883863U,	// <4,0,u,3>: Cost 3 vsldoi4 <3,4,0,u>, <3,4,0,u>
+  1170547026U,	// <4,0,u,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+  2698590362U,	// <4,0,u,5>: Cost 3 vsldoi8 <2,3,4,0>, RHS
+  2244289014U,	// <4,0,u,6>: Cost 3 vmrghw RHS, <0,6,1,7>
+  2661774651U,	// <4,0,u,7>: Cost 3 vsldoi4 <7,4,0,u>, <7,4,0,u>
+  96805533U,	// <4,0,u,u>: Cost 1 vmrghw RHS, LHS
+  2667749478U,	// <4,1,0,0>: Cost 3 vsldoi4 <u,4,1,0>, LHS
+  2689966182U,	// <4,1,0,1>: Cost 3 vsldoi8 <0,u,4,1>, LHS
+  2238571418U,	// <4,1,0,2>: Cost 3 vmrghw <4,0,1,2>, <1,2,3,4>
+  3711633880U,	// <4,1,0,3>: Cost 4 vsldoi4 <3,4,1,0>, <3,4,1,0>
+  2689966418U,	// <4,1,0,4>: Cost 3 vsldoi8 <0,u,4,1>, <0,4,1,5>
+  3361046866U,	// <4,1,0,5>: Cost 4 vmrglw <0,u,4,0>, <0,4,1,5>
+  3741495802U,	// <4,1,0,6>: Cost 4 vsldoi4 <u,4,1,0>, <6,2,7,3>
+  3741496314U,	// <4,1,0,7>: Cost 4 vsldoi4 <u,4,1,0>, <7,0,1,2>
+  2689966765U,	// <4,1,0,u>: Cost 3 vsldoi8 <0,u,4,1>, <0,u,4,1>
+  3764372222U,	// <4,1,1,0>: Cost 4 vsldoi8 <1,0,4,1>, <1,0,4,1>
+  2758206263U,	// <4,1,1,1>: Cost 3 vsldoi12 <1,1,1,4>, <1,1,1,4>
+  2698593178U,	// <4,1,1,2>: Cost 3 vsldoi8 <2,3,4,1>, <1,2,3,4>
+  3361057810U,	// <4,1,1,3>: Cost 4 vmrglw <0,u,4,1>, <4,2,1,3>
+  3827303250U,	// <4,1,1,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,1,4,4>
+  2287313234U,	// <4,1,1,5>: Cost 3 vmrglw <0,u,4,1>, <0,4,1,5>
+  3763709171U,	// <4,1,1,6>: Cost 4 vsldoi8 <0,u,4,1>, <1,6,5,7>
+  3361058138U,	// <4,1,1,7>: Cost 4 vmrglw <0,u,4,1>, <4,6,1,7>
+  2239759744U,	// <4,1,1,u>: Cost 3 vmrghw <4,1,u,3>, <1,u,3,4>
+  2637906022U,	// <4,1,2,0>: Cost 3 vsldoi4 <3,4,1,2>, LHS
+  2637906842U,	// <4,1,2,1>: Cost 3 vsldoi4 <3,4,1,2>, <1,2,3,4>
+  3763709544U,	// <4,1,2,2>: Cost 4 vsldoi8 <0,u,4,1>, <2,2,2,2>
+  1685275546U,	// <4,1,2,3>: Cost 2 vsldoi12 <1,2,3,4>, <1,2,3,4>
+  2637909302U,	// <4,1,2,4>: Cost 3 vsldoi4 <3,4,1,2>, RHS
+  3361063250U,	// <4,1,2,5>: Cost 4 vmrglw <0,u,4,2>, <0,4,1,5>
+  3763709882U,	// <4,1,2,6>: Cost 4 vsldoi8 <0,u,4,1>, <2,6,3,7>
+  3735541054U,	// <4,1,2,7>: Cost 4 vsldoi4 <7,4,1,2>, <7,4,1,2>
+  1685644231U,	// <4,1,2,u>: Cost 2 vsldoi12 <1,2,u,4>, <1,2,u,4>
+  2702575792U,	// <4,1,3,0>: Cost 3 vsldoi8 <3,0,4,1>, <3,0,4,1>
+  3832759257U,	// <4,1,3,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,3,1,4>
+  3833349090U,	// <4,1,3,2>: Cost 4 vsldoi12 <1,3,2,4>, <1,3,2,4>
+  3763710364U,	// <4,1,3,3>: Cost 4 vsldoi8 <0,u,4,1>, <3,3,3,3>
+  2707884546U,	// <4,1,3,4>: Cost 3 vsldoi8 <3,u,4,1>, <3,4,5,6>
+  3361071442U,	// <4,1,3,5>: Cost 4 vmrglw <0,u,4,3>, <0,4,1,5>
+  3772336796U,	// <4,1,3,6>: Cost 4 vsldoi8 <2,3,4,1>, <3,6,4,7>
+  3775654595U,	// <4,1,3,7>: Cost 5 vsldoi8 <2,u,4,1>, <3,7,0,1>
+  2707884856U,	// <4,1,3,u>: Cost 3 vsldoi8 <3,u,4,1>, <3,u,4,1>
+  2667782246U,	// <4,1,4,0>: Cost 3 vsldoi4 <u,4,1,4>, LHS
+  2241463092U,	// <4,1,4,1>: Cost 3 vmrghw <4,4,4,4>, <1,1,1,1>
+  2241553306U,	// <4,1,4,2>: Cost 3 vmrghw <4,4,5,6>, <1,2,3,4>
+  3827303484U,	// <4,1,4,3>: Cost 4 vsldoi12 <0,3,1,4>, <1,4,3,4>
+  2667785424U,	// <4,1,4,4>: Cost 3 vsldoi4 <u,4,1,4>, <4,4,4,4>
+  2689969462U,	// <4,1,4,5>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+  3763711322U,	// <4,1,4,6>: Cost 4 vsldoi8 <0,u,4,1>, <4,6,1,7>
+  3867116636U,	// <4,1,4,7>: Cost 4 vsldoi12 <7,0,1,4>, <1,4,7,0>
+  2689969705U,	// <4,1,4,u>: Cost 3 vsldoi8 <0,u,4,1>, RHS
+  1546273106U,	// <4,1,5,0>: Cost 2 vsldoi4 <0,4,1,5>, <0,4,1,5>
+  1168556852U,	// <4,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+  1168556950U,	// <4,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+  2620016790U,	// <4,1,5,3>: Cost 3 vsldoi4 <0,4,1,5>, <3,0,1,2>
+  1546276150U,	// <4,1,5,4>: Cost 2 vsldoi4 <0,4,1,5>, RHS
+  2620018692U,	// <4,1,5,5>: Cost 3 vsldoi4 <0,4,1,5>, <5,5,5,5>
+  2242299087U,	// <4,1,5,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+  2667795450U,	// <4,1,5,7>: Cost 3 vsldoi4 <u,4,1,5>, <7,0,1,2>
+  1546278702U,	// <4,1,5,u>: Cost 2 vsldoi4 <0,4,1,5>, LHS
+  3781628193U,	// <4,1,6,0>: Cost 4 vsldoi8 <3,u,4,1>, <6,0,1,2>
+  3832759503U,	// <4,1,6,1>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,1,7>
+  3316261786U,	// <4,1,6,2>: Cost 4 vmrghw <4,6,0,7>, <1,2,3,4>
+  3781628466U,	// <4,1,6,3>: Cost 4 vsldoi8 <3,u,4,1>, <6,3,4,5>
+  3827303658U,	// <4,1,6,4>: Cost 4 vsldoi12 <0,3,1,4>, <1,6,4,7>
+  3361096018U,	// <4,1,6,5>: Cost 4 vmrglw <0,u,4,6>, <0,4,1,5>
+  3788264248U,	// <4,1,6,6>: Cost 4 vsldoi8 <5,0,4,1>, <6,6,6,6>
+  3788264270U,	// <4,1,6,7>: Cost 4 vsldoi8 <5,0,4,1>, <6,7,0,1>
+  3832759566U,	// <4,1,6,u>: Cost 4 vsldoi12 <1,2,3,4>, <1,6,u,7>
+  2726466580U,	// <4,1,7,0>: Cost 3 vsldoi8 <7,0,4,1>, <7,0,4,1>
+  3377692682U,	// <4,1,7,1>: Cost 4 vmrglw <3,6,4,7>, <0,0,1,1>
+  3377694870U,	// <4,1,7,2>: Cost 4 vmrglw <3,6,4,7>, <3,0,1,2>
+  3802199303U,	// <4,1,7,3>: Cost 4 vsldoi8 <7,3,4,1>, <7,3,4,1>
+  2731775334U,	// <4,1,7,4>: Cost 3 vsldoi8 <7,u,4,1>, <7,4,5,6>
+  3377693010U,	// <4,1,7,5>: Cost 4 vmrglw <3,6,4,7>, <0,4,1,5>
+  3365749804U,	// <4,1,7,6>: Cost 5 vmrglw <1,6,4,7>, <1,4,1,6>
+  3788265068U,	// <4,1,7,7>: Cost 4 vsldoi8 <5,0,4,1>, <7,7,7,7>
+  2731775644U,	// <4,1,7,u>: Cost 3 vsldoi8 <7,u,4,1>, <7,u,4,1>
+  1546297685U,	// <4,1,u,0>: Cost 2 vsldoi4 <0,4,1,u>, <0,4,1,u>
+  1170547508U,	// <4,1,u,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+  1170547606U,	// <4,1,u,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+  1689257344U,	// <4,1,u,3>: Cost 2 vsldoi12 <1,u,3,4>, <1,u,3,4>
+  1546300726U,	// <4,1,u,4>: Cost 2 vsldoi4 <0,4,1,u>, RHS
+  2284716370U,	// <4,1,u,5>: Cost 3 vmrglw <0,4,4,u>, <0,4,1,5>
+  2244289743U,	// <4,1,u,6>: Cost 3 vmrghw RHS, <1,6,1,7>
+  2667820026U,	// <4,1,u,7>: Cost 3 vsldoi4 <u,4,1,u>, <7,0,1,2>
+  1546303278U,	// <4,1,u,u>: Cost 2 vsldoi4 <0,4,1,u>, LHS
+  3729621094U,	// <4,2,0,0>: Cost 4 vsldoi4 <6,4,2,0>, LHS
+  3763716198U,	// <4,2,0,1>: Cost 4 vsldoi8 <0,u,4,2>, LHS
+  2238858856U,	// <4,2,0,2>: Cost 3 vmrghw <4,0,5,1>, <2,2,2,2>
+  2295930982U,	// <4,2,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+  3763716434U,	// <4,2,0,4>: Cost 4 vsldoi8 <0,u,4,2>, <0,4,1,5>
+  2238859107U,	// <4,2,0,5>: Cost 3 vmrghw <4,0,5,1>, <2,5,3,1>
+  2238859194U,	// <4,2,0,6>: Cost 3 vmrghw <4,0,5,1>, <2,6,3,7>
+  3312601066U,	// <4,2,0,7>: Cost 4 vmrghw <4,0,5,1>, <2,7,0,1>
+  2295930987U,	// <4,2,0,u>: Cost 3 vmrglw <2,3,4,0>, LHS
+  3699769446U,	// <4,2,1,0>: Cost 4 vsldoi4 <1,4,2,1>, LHS
+  3313255971U,	// <4,2,1,1>: Cost 4 vmrghw <4,1,5,0>, <2,1,3,5>
+  3361056360U,	// <4,2,1,2>: Cost 4 vmrglw <0,u,4,1>, <2,2,2,2>
+  2287312998U,	// <4,2,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+  3788932148U,	// <4,2,1,4>: Cost 4 vsldoi8 <5,1,4,2>, <1,4,2,5>
+  3313256290U,	// <4,2,1,5>: Cost 4 vmrghw <4,1,5,0>, <2,5,3,0>
+  3838289469U,	// <4,2,1,6>: Cost 4 vsldoi12 <2,1,6,4>, <2,1,6,4>
+  3369682865U,	// <4,2,1,7>: Cost 5 vmrglw <2,3,4,1>, <2,6,2,7>
+  2287313003U,	// <4,2,1,u>: Cost 3 vmrglw <0,u,4,1>, LHS
+  3838658133U,	// <4,2,2,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,0,1>
+  3711722394U,	// <4,2,2,1>: Cost 4 vsldoi4 <3,4,2,2>, <1,2,3,4>
+  2759018088U,	// <4,2,2,2>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,2,2>
+  2759018098U,	// <4,2,2,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,3,3>
+  3838658168U,	// <4,2,2,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,2,4,0>
+  3369027341U,	// <4,2,2,5>: Cost 4 vmrglw <2,2,4,2>, <2,4,2,5>
+  2240227258U,	// <4,2,2,6>: Cost 3 vmrghw <4,2,5,6>, <2,6,3,7>
+  3735614791U,	// <4,2,2,7>: Cost 4 vsldoi4 <7,4,2,2>, <7,4,2,2>
+  2759018143U,	// <4,2,2,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,2,u,3>
+  2759018150U,	// <4,2,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,0,1>
+  3831948975U,	// <4,2,3,1>: Cost 4 vsldoi12 <1,1,1,4>, <2,3,1,1>
+  3832759993U,	// <4,2,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <2,3,2,2>
+  2759018180U,	// <4,2,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,3,4>
+  2759018185U,	// <4,2,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,4,0>
+  3839542998U,	// <4,2,3,5>: Cost 4 vsldoi12 <2,3,5,4>, <2,3,5,4>
+  3314640826U,	// <4,2,3,6>: Cost 4 vmrghw <4,3,5,7>, <2,6,3,7>
+  2765948648U,	// <4,2,3,7>: Cost 3 vsldoi12 <2,3,7,4>, <2,3,7,4>
+  2759018222U,	// <4,2,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,3,u,1>
+  3838658295U,	// <4,2,4,0>: Cost 4 vsldoi12 <2,2,2,4>, <2,4,0,1>
+  3315205667U,	// <4,2,4,1>: Cost 4 vmrghw <4,4,4,4>, <2,1,3,5>
+  2241463912U,	// <4,2,4,2>: Cost 3 vmrghw <4,4,4,4>, <2,2,2,2>
+  1234829414U,	// <4,2,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+  2241464085U,	// <4,2,4,4>: Cost 3 vmrghw <4,4,4,4>, <2,4,3,4>
+  2241546087U,	// <4,2,4,5>: Cost 3 vmrghw <4,4,5,5>, <2,5,3,5>
+  2241464250U,	// <4,2,4,6>: Cost 3 vmrghw <4,4,4,4>, <2,6,3,7>
+  3741602873U,	// <4,2,4,7>: Cost 4 vsldoi4 <u,4,2,4>, <7,0,u,2>
+  1234829419U,	// <4,2,4,u>: Cost 2 vmrglw <4,4,4,4>, LHS
+  2626060390U,	// <4,2,5,0>: Cost 3 vsldoi4 <1,4,2,5>, LHS
+  2626061364U,	// <4,2,5,1>: Cost 3 vsldoi4 <1,4,2,5>, <1,4,2,5>
+  1168557672U,	// <4,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+  1222230118U,	// <4,2,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+  2626063670U,	// <4,2,5,4>: Cost 3 vsldoi4 <1,4,2,5>, RHS
+  2242299752U,	// <4,2,5,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+  1168558010U,	// <4,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+  2242299882U,	// <4,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+  1222230123U,	// <4,2,5,u>: Cost 2 vmrglw <2,3,4,5>, LHS
+  3711754342U,	// <4,2,6,0>: Cost 4 vsldoi4 <3,4,2,6>, LHS
+  3711755162U,	// <4,2,6,1>: Cost 4 vsldoi4 <3,4,2,6>, <1,2,3,4>
+  3838658481U,	// <4,2,6,2>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,2,7>
+  2759018426U,	// <4,2,6,3>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,3,7>
+  3838658499U,	// <4,2,6,4>: Cost 4 vsldoi12 <2,2,2,4>, <2,6,4,7>
+  3735646310U,	// <4,2,6,5>: Cost 4 vsldoi4 <7,4,2,6>, <5,6,7,4>
+  3316590522U,	// <4,2,6,6>: Cost 4 vmrghw <4,6,5,2>, <2,6,3,7>
+  3798889331U,	// <4,2,6,7>: Cost 4 vsldoi8 <6,7,4,2>, <6,7,4,2>
+  2759018471U,	// <4,2,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <2,6,u,7>
+  3874564074U,	// <4,2,7,0>: Cost 4 vsldoi12 <u,2,3,4>, <2,7,0,1>
+  3800880230U,	// <4,2,7,1>: Cost 4 vsldoi8 <7,1,4,2>, <7,1,4,2>
+  3371722344U,	// <4,2,7,2>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,2>
+  2303950950U,	// <4,2,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+  3371722346U,	// <4,2,7,4>: Cost 4 vmrglw <2,6,4,7>, <2,2,2,4>
+  3371722509U,	// <4,2,7,5>: Cost 5 vmrglw <2,6,4,7>, <2,4,2,5>
+  3317237690U,	// <4,2,7,6>: Cost 4 vmrghw <4,7,5,0>, <2,6,3,7>
+  3317237738U,	// <4,2,7,7>: Cost 4 vmrghw <4,7,5,0>, <2,7,0,1>
+  2303950955U,	// <4,2,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+  2759018555U,	// <4,2,u,0>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,0,1>
+  2626085943U,	// <4,2,u,1>: Cost 3 vsldoi4 <1,4,2,u>, <1,4,2,u>
+  1170548328U,	// <4,2,u,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+  1222254694U,	// <4,2,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+  2759018595U,	// <4,2,u,4>: Cost 3 vsldoi12 <1,2,3,4>, <2,u,4,5>
+  2244290408U,	// <4,2,u,5>: Cost 3 vmrghw RHS, <2,5,3,6>
+  1170548666U,	// <4,2,u,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+  2769266813U,	// <4,2,u,7>: Cost 3 vsldoi12 <2,u,7,4>, <2,u,7,4>
+  1222254699U,	// <4,2,u,u>: Cost 2 vmrglw <2,3,4,u>, LHS
+  2238859414U,	// <4,3,0,0>: Cost 3 vmrghw <4,0,5,1>, <3,0,1,2>
+  2759018646U,	// <4,3,0,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,1,2>
+  3312314708U,	// <4,3,0,2>: Cost 4 vmrghw <4,0,1,2>, <3,2,4,3>
+  2238859676U,	// <4,3,0,3>: Cost 3 vmrghw <4,0,5,1>, <3,3,3,3>
+  2295931802U,	// <4,3,0,4>: Cost 3 vmrglw <2,3,4,0>, <1,2,3,4>
+  3735670886U,	// <4,3,0,5>: Cost 4 vsldoi4 <7,4,3,0>, <5,6,7,4>
+  3312315036U,	// <4,3,0,6>: Cost 4 vmrghw <4,0,1,2>, <3,6,4,7>
+  3369674682U,	// <4,3,0,7>: Cost 4 vmrglw <2,3,4,0>, <2,6,3,7>
+  2759018709U,	// <4,3,0,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,0,u,2>
+  3361055638U,	// <4,3,1,0>: Cost 4 vmrglw <0,u,4,1>, <1,2,3,0>
+  3831949542U,	// <4,3,1,1>: Cost 4 vsldoi12 <1,1,1,4>, <3,1,1,1>
+  2703917978U,	// <4,3,1,2>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+  3361056370U,	// <4,3,1,3>: Cost 4 vmrglw <0,u,4,1>, <2,2,3,3>
+  2295939994U,	// <4,3,1,4>: Cost 3 vmrglw <2,3,4,1>, <1,2,3,4>
+  3361056291U,	// <4,3,1,5>: Cost 4 vmrglw <0,u,4,1>, <2,1,3,5>
+  3378972520U,	// <4,3,1,6>: Cost 4 vmrglw <3,u,4,1>, <2,5,3,6>
+  3361056698U,	// <4,3,1,7>: Cost 4 vmrglw <0,u,4,1>, <2,6,3,7>
+  2703917978U,	// <4,3,1,u>: Cost 3 vsldoi8 <3,2,4,3>, <1,2,3,4>
+  3832760624U,	// <4,3,2,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,0,3>
+  3711796122U,	// <4,3,2,1>: Cost 4 vsldoi4 <3,4,3,2>, <1,2,3,4>
+  3832760641U,	// <4,3,2,2>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,2,2>
+  2770962764U,	// <4,3,2,3>: Cost 3 vsldoi12 <3,2,3,4>, <3,2,3,4>
+  2759018836U,	// <4,3,2,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,2,4,3>
+  3827304802U,	// <4,3,2,5>: Cost 5 vsldoi12 <0,3,1,4>, <3,2,5,u>
+  3832760678U,	// <4,3,2,6>: Cost 4 vsldoi12 <1,2,3,4>, <3,2,6,3>
+  3859597679U,	// <4,3,2,7>: Cost 4 vsldoi12 <5,6,7,4>, <3,2,7,3>
+  2771331449U,	// <4,3,2,u>: Cost 3 vsldoi12 <3,2,u,4>, <3,2,u,4>
+  2240841878U,	// <4,3,3,0>: Cost 3 vmrghw <4,3,5,0>, <3,0,1,2>
+  3776997635U,	// <4,3,3,1>: Cost 4 vsldoi8 <3,1,4,3>, <3,1,4,3>
+  2703919444U,	// <4,3,3,2>: Cost 3 vsldoi8 <3,2,4,3>, <3,2,4,3>
+  2759018908U,	// <4,3,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,3,3>
+  2759018918U,	// <4,3,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,3,4,4>
+  3386951446U,	// <4,3,3,5>: Cost 4 vmrglw <5,2,4,3>, <2,4,3,5>
+  3777661596U,	// <4,3,3,6>: Cost 4 vsldoi8 <3,2,4,3>, <3,6,4,7>
+  3375007674U,	// <4,3,3,7>: Cost 4 vmrglw <3,2,4,3>, <2,6,3,7>
+  2707901242U,	// <4,3,3,u>: Cost 3 vsldoi8 <3,u,4,3>, <3,u,4,3>
+  2759018960U,	// <4,3,4,0>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,0,1>
+  2759018970U,	// <4,3,4,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,1,2>
+  2632099605U,	// <4,3,4,2>: Cost 3 vsldoi4 <2,4,3,4>, <2,4,3,4>
+  2241464732U,	// <4,3,4,3>: Cost 3 vmrghw <4,4,4,4>, <3,3,3,3>
+  2759019000U,	// <4,3,4,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,4,5>
+  2753563138U,	// <4,3,4,5>: Cost 3 vsldoi12 <0,3,1,4>, <3,4,5,6>
+  3777662316U,	// <4,3,4,6>: Cost 4 vsldoi8 <3,2,4,3>, <4,6,3,7>
+  2308573114U,	// <4,3,4,7>: Cost 3 vmrglw <4,4,4,4>, <2,6,3,7>
+  2759019032U,	// <4,3,4,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,4,u,1>
+  1168558230U,	// <4,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+  2242300134U,	// <4,3,5,1>: Cost 3 vmrghw RHS, <3,1,1,1>
+  2632107798U,	// <4,3,5,2>: Cost 3 vsldoi4 <2,4,3,5>, <2,4,3,5>
+  1168558492U,	// <4,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+  1168558594U,	// <4,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+  2295973654U,	// <4,3,5,5>: Cost 3 vmrglw <2,3,4,5>, <2,4,3,5>
+  2242300536U,	// <4,3,5,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+  2295973818U,	// <4,3,5,7>: Cost 3 vmrglw <2,3,4,5>, <2,6,3,7>
+  1168558878U,	// <4,3,5,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+  3832760952U,	// <4,3,6,0>: Cost 4 vsldoi12 <1,2,3,4>, <3,6,0,7>
+  3711828890U,	// <4,3,6,1>: Cost 4 vsldoi4 <3,4,3,6>, <1,2,3,4>
+  3316484436U,	// <4,3,6,2>: Cost 4 vmrghw <4,6,3,7>, <3,2,4,3>
+  3711830512U,	// <4,3,6,3>: Cost 4 vsldoi4 <3,4,3,6>, <3,4,3,6>
+  2759019164U,	// <4,3,6,4>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+  3361097251U,	// <4,3,6,5>: Cost 5 vmrglw <0,u,4,6>, <2,1,3,5>
+  3316624045U,	// <4,3,6,6>: Cost 4 vmrghw <4,6,5,6>, <3,6,6,6>
+  2773912244U,	// <4,3,6,7>: Cost 3 vsldoi12 <3,6,7,4>, <3,6,7,4>
+  2759019164U,	// <4,3,6,u>: Cost 3 vsldoi12 <1,2,3,4>, <3,6,4,7>
+  3377693590U,	// <4,3,7,0>: Cost 4 vmrglw <3,6,4,7>, <1,2,3,0>
+  3365751680U,	// <4,3,7,1>: Cost 5 vmrglw <1,6,4,7>, <4,0,3,1>
+  2727810232U,	// <4,3,7,2>: Cost 3 vsldoi8 <7,2,4,3>, <7,2,4,3>
+  3377694322U,	// <4,3,7,3>: Cost 4 vmrglw <3,6,4,7>, <2,2,3,3>
+  2303951770U,	// <4,3,7,4>: Cost 3 vmrglw <3,6,4,7>, <1,2,3,4>
+  3741700198U,	// <4,3,7,5>: Cost 4 vsldoi4 <u,4,3,7>, <5,6,7,4>
+  3377695216U,	// <4,3,7,6>: Cost 4 vmrglw <3,6,4,7>, <3,4,3,6>
+  3375703994U,	// <4,3,7,7>: Cost 4 vmrglw <3,3,4,7>, <2,6,3,7>
+  2731792030U,	// <4,3,7,u>: Cost 3 vsldoi8 <7,u,4,3>, <7,u,4,3>
+  1170548886U,	// <4,3,u,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+  2759019294U,	// <4,3,u,1>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,1,2>
+  2632132377U,	// <4,3,u,2>: Cost 3 vsldoi4 <2,4,3,u>, <2,4,3,u>
+  1170549148U,	// <4,3,u,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+  1170549250U,	// <4,3,u,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+  2759019334U,	// <4,3,u,5>: Cost 3 vsldoi12 <1,2,3,4>, <3,u,5,6>
+  2244291192U,	// <4,3,u,6>: Cost 3 vmrghw RHS, <3,6,0,7>
+  2295998394U,	// <4,3,u,7>: Cost 3 vmrglw <2,3,4,u>, <2,6,3,7>
+  1170549534U,	// <4,3,u,u>: Cost 2 vmrghw RHS, <3,u,1,2>
+  1165118354U,	// <4,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+  1637482598U,	// <4,4,0,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+  3711854285U,	// <4,4,0,2>: Cost 4 vsldoi4 <3,4,4,0>, <2,3,4,4>
+  3827305344U,	// <4,4,0,3>: Cost 4 vsldoi12 <0,3,1,4>, <4,0,3,1>
+  2711224658U,	// <4,4,0,4>: Cost 3 vsldoi8 <4,4,4,4>, <0,4,1,5>
+  1165118774U,	// <4,4,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+  3312602489U,	// <4,4,0,6>: Cost 4 vmrghw <4,0,5,1>, <4,6,5,2>
+  3369675420U,	// <4,4,0,7>: Cost 4 vmrglw <2,3,4,0>, <3,6,4,7>
+  1165119017U,	// <4,4,0,u>: Cost 2 vmrghw <4,0,5,1>, RHS
+  3369682633U,	// <4,4,1,0>: Cost 4 vmrglw <2,3,4,1>, <2,3,4,0>
+  2287313581U,	// <4,4,1,1>: Cost 3 vmrglw <0,u,4,1>, <0,u,4,1>
+  2759019466U,	// <4,4,1,2>: Cost 3 vsldoi12 <1,2,3,4>, <4,1,2,3>
+  3369683284U,	// <4,4,1,3>: Cost 4 vmrglw <2,3,4,1>, <3,2,4,3>
+  2311204048U,	// <4,4,1,4>: Cost 3 vmrglw <4,u,4,1>, <4,4,4,4>
+  2239319350U,	// <4,4,1,5>: Cost 3 vmrghw <4,1,2,3>, RHS
+  3784967411U,	// <4,4,1,6>: Cost 4 vsldoi8 <4,4,4,4>, <1,6,5,7>
+  3369683612U,	// <4,4,1,7>: Cost 4 vmrglw <2,3,4,1>, <3,6,4,7>
+  2763000832U,	// <4,4,1,u>: Cost 3 vsldoi12 <1,u,3,4>, <4,1,u,3>
+  3711869030U,	// <4,4,2,0>: Cost 4 vsldoi4 <3,4,4,2>, LHS
+  3711869850U,	// <4,4,2,1>: Cost 4 vsldoi4 <3,4,4,2>, <1,2,3,4>
+  2240203830U,	// <4,4,2,2>: Cost 3 vmrghw <4,2,5,3>, <4,2,5,3>
+  2698618573U,	// <4,4,2,3>: Cost 3 vsldoi8 <2,3,4,4>, <2,3,4,4>
+  2711226133U,	// <4,4,2,4>: Cost 3 vsldoi8 <4,4,4,4>, <2,4,3,4>
+  2240204086U,	// <4,4,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+  2711226298U,	// <4,4,2,6>: Cost 3 vsldoi8 <4,4,4,4>, <2,6,3,7>
+  3832761416U,	// <4,4,2,7>: Cost 4 vsldoi12 <1,2,3,4>, <4,2,7,3>
+  2701936738U,	// <4,4,2,u>: Cost 3 vsldoi8 <2,u,4,4>, <2,u,4,4>
+  2711226518U,	// <4,4,3,0>: Cost 3 vsldoi8 <4,4,4,4>, <3,0,1,2>
+  3777005828U,	// <4,4,3,1>: Cost 4 vsldoi8 <3,1,4,4>, <3,1,4,4>
+  3832761453U,	// <4,4,3,2>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,2,4>
+  2301266260U,	// <4,4,3,3>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+  2705254903U,	// <4,4,3,4>: Cost 3 vsldoi8 <3,4,4,4>, <3,4,4,4>
+  2240843062U,	// <4,4,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+  3832761489U,	// <4,4,3,6>: Cost 4 vsldoi12 <1,2,3,4>, <4,3,6,4>
+  3375008412U,	// <4,4,3,7>: Cost 4 vmrglw <3,2,4,3>, <3,6,4,7>
+  2301266260U,	// <4,4,3,u>: Cost 3 vmrglw <3,2,4,3>, <3,2,4,3>
+  1570373734U,	// <4,4,4,0>: Cost 2 vsldoi4 <4,4,4,4>, LHS
+  2308574089U,	// <4,4,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,4,1>
+  2644117096U,	// <4,4,4,2>: Cost 3 vsldoi4 <4,4,4,4>, <2,2,2,2>
+  2638146039U,	// <4,4,4,3>: Cost 3 vsldoi4 <3,4,4,4>, <3,4,4,4>
+  229035318U,	// <4,4,4,4>: Cost 1 vspltisw0 RHS
+  1167723830U,	// <4,4,4,5>: Cost 2 vmrghw <4,4,4,4>, RHS
+  2644120058U,	// <4,4,4,6>: Cost 3 vsldoi4 <4,4,4,4>, <6,2,7,3>
+  2662036827U,	// <4,4,4,7>: Cost 3 vsldoi4 <7,4,4,4>, <7,4,4,4>
+  229035318U,	// <4,4,4,u>: Cost 1 vspltisw0 RHS
+  1168558994U,	// <4,4,5,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+  2638152602U,	// <4,4,5,1>: Cost 3 vsldoi4 <3,4,4,5>, <1,2,3,4>
+  2242300981U,	// <4,4,5,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+  2638154232U,	// <4,4,5,3>: Cost 3 vsldoi4 <3,4,4,5>, <3,4,4,5>
+  1168559322U,	// <4,4,5,4>: Cost 2 vmrghw RHS, <4,4,5,5>
+  94817590U,	// <4,4,5,5>: Cost 1 vmrghw RHS, RHS
+  1685278006U,	// <4,4,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+  2242309576U,	// <4,4,5,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+  94817833U,	// <4,4,5,u>: Cost 1 vmrghw RHS, RHS
+  3316591506U,	// <4,4,6,0>: Cost 4 vmrghw <4,6,5,2>, <4,0,5,1>
+  3758428587U,	// <4,4,6,1>: Cost 4 vsldoi8 <0,0,4,4>, <6,1,7,5>
+  2711228922U,	// <4,4,6,2>: Cost 3 vsldoi8 <4,4,4,4>, <6,2,7,3>
+  3796251185U,	// <4,4,6,3>: Cost 4 vsldoi8 <6,3,4,4>, <6,3,4,4>
+  2711229085U,	// <4,4,6,4>: Cost 3 vsldoi8 <4,4,4,4>, <6,4,7,4>
+  2242850102U,	// <4,4,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+  2242850169U,	// <4,4,6,6>: Cost 3 vmrghw <4,6,5,2>, <4,6,5,2>
+  2725163893U,	// <4,4,6,7>: Cost 3 vsldoi8 <6,7,4,4>, <6,7,4,4>
+  2242850345U,	// <4,4,6,u>: Cost 3 vmrghw <4,6,5,2>, RHS
+  2711229434U,	// <4,4,7,0>: Cost 3 vsldoi8 <4,4,4,4>, <7,0,1,2>
+  3377694410U,	// <4,4,7,1>: Cost 4 vmrglw <3,6,4,7>, <2,3,4,1>
+  3868593584U,	// <4,4,7,2>: Cost 4 vsldoi12 <7,2,3,4>, <4,7,2,3>
+  3377695060U,	// <4,4,7,3>: Cost 4 vmrglw <3,6,4,7>, <3,2,4,3>
+  2729145691U,	// <4,4,7,4>: Cost 3 vsldoi8 <7,4,4,4>, <7,4,4,4>
+  2243497270U,	// <4,4,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+  3871542744U,	// <4,4,7,6>: Cost 4 vsldoi12 <7,6,7,4>, <4,7,6,7>
+  2303953564U,	// <4,4,7,7>: Cost 3 vmrglw <3,6,4,7>, <3,6,4,7>
+  2243497513U,	// <4,4,7,u>: Cost 3 vmrghw <4,7,5,0>, RHS
+  1170549650U,	// <4,4,u,0>: Cost 2 vmrghw RHS, <4,0,5,1>
+  1637488430U,	// <4,4,u,1>: Cost 2 vsldoi8 <4,4,4,4>, LHS
+  2244291637U,	// <4,4,u,2>: Cost 3 vmrghw RHS, <4,2,5,2>
+  2638178811U,	// <4,4,u,3>: Cost 3 vsldoi4 <3,4,4,u>, <3,4,4,u>
+  229035318U,	// <4,4,u,4>: Cost 1 vspltisw0 RHS
+  96808246U,	// <4,4,u,5>: Cost 1 vmrghw RHS, RHS
+  1685278249U,	// <4,4,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+  2244292040U,	// <4,4,u,7>: Cost 3 vmrghw RHS, <4,7,5,0>
+  96808489U,	// <4,4,u,u>: Cost 1 vmrghw RHS, RHS
+  2698625024U,	// <4,5,0,0>: Cost 3 vsldoi8 <2,3,4,5>, <0,0,0,0>
+  1624883302U,	// <4,5,0,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+  2638186190U,	// <4,5,0,2>: Cost 3 vsldoi4 <3,4,5,0>, <2,3,4,5>
+  2638187004U,	// <4,5,0,3>: Cost 3 vsldoi4 <3,4,5,0>, <3,4,5,0>
+  2687345005U,	// <4,5,0,4>: Cost 3 vsldoi8 <0,4,4,5>, <0,4,4,5>
+  2238861316U,	// <4,5,0,5>: Cost 3 vmrghw <4,0,5,1>, <5,5,5,5>
+  2662077302U,	// <4,5,0,6>: Cost 3 vsldoi4 <7,4,5,0>, <6,7,4,5>
+  2662077792U,	// <4,5,0,7>: Cost 3 vsldoi4 <7,4,5,0>, <7,4,5,0>
+  1624883869U,	// <4,5,0,u>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+  3361057762U,	// <4,5,1,0>: Cost 4 vmrglw <0,u,4,1>, <4,1,5,0>
+  2691326803U,	// <4,5,1,1>: Cost 3 vsldoi8 <1,1,4,5>, <1,1,4,5>
+  2698625942U,	// <4,5,1,2>: Cost 3 vsldoi8 <2,3,4,5>, <1,2,3,0>
+  3361055659U,	// <4,5,1,3>: Cost 4 vmrglw <0,u,4,1>, <1,2,5,3>
+  3761087567U,	// <4,5,1,4>: Cost 4 vsldoi8 <0,4,4,5>, <1,4,5,5>
+  2693981335U,	// <4,5,1,5>: Cost 3 vsldoi8 <1,5,4,5>, <1,5,4,5>
+  2305231362U,	// <4,5,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+  3361055987U,	// <4,5,1,7>: Cost 4 vmrglw <0,u,4,1>, <1,6,5,7>
+  2695972234U,	// <4,5,1,u>: Cost 3 vsldoi8 <1,u,4,5>, <1,u,4,5>
+  2638200934U,	// <4,5,2,0>: Cost 3 vsldoi4 <3,4,5,2>, LHS
+  3761088035U,	// <4,5,2,1>: Cost 4 vsldoi8 <0,4,4,5>, <2,1,3,5>
+  2697963133U,	// <4,5,2,2>: Cost 3 vsldoi8 <2,2,4,5>, <2,2,4,5>
+  1624884942U,	// <4,5,2,3>: Cost 2 vsldoi8 <2,3,4,5>, <2,3,4,5>
+  2698626838U,	// <4,5,2,4>: Cost 3 vsldoi8 <2,3,4,5>, <2,4,3,5>
+  3772368744U,	// <4,5,2,5>: Cost 4 vsldoi8 <2,3,4,5>, <2,5,3,6>
+  2698627002U,	// <4,5,2,6>: Cost 3 vsldoi8 <2,3,4,5>, <2,6,3,7>
+  3775023122U,	// <4,5,2,7>: Cost 4 vsldoi8 <2,7,4,5>, <2,7,4,5>
+  1628203107U,	// <4,5,2,u>: Cost 2 vsldoi8 <2,u,4,5>, <2,u,4,5>
+  2698627222U,	// <4,5,3,0>: Cost 3 vsldoi8 <2,3,4,5>, <3,0,1,2>
+  3765070057U,	// <4,5,3,1>: Cost 4 vsldoi8 <1,1,4,5>, <3,1,1,4>
+  2698627404U,	// <4,5,3,2>: Cost 3 vsldoi8 <2,3,4,5>, <3,2,3,4>
+  2698627484U,	// <4,5,3,3>: Cost 3 vsldoi8 <2,3,4,5>, <3,3,3,3>
+  2698627580U,	// <4,5,3,4>: Cost 3 vsldoi8 <2,3,4,5>, <3,4,5,0>
+  3779668553U,	// <4,5,3,5>: Cost 4 vsldoi8 <3,5,4,5>, <3,5,4,5>
+  2725169844U,	// <4,5,3,6>: Cost 3 vsldoi8 <6,7,4,5>, <3,6,7,4>
+  2707253995U,	// <4,5,3,7>: Cost 3 vsldoi8 <3,7,4,5>, <3,7,4,5>
+  2698627870U,	// <4,5,3,u>: Cost 3 vsldoi8 <2,3,4,5>, <3,u,1,2>
+  2638217318U,	// <4,5,4,0>: Cost 3 vsldoi4 <3,4,5,4>, LHS
+  2308574098U,	// <4,5,4,1>: Cost 3 vmrglw <4,4,4,4>, <4,0,5,1>
+  2698628150U,	// <4,5,4,2>: Cost 3 vsldoi8 <2,3,4,5>, <4,2,5,3>
+  2638219776U,	// <4,5,4,3>: Cost 3 vsldoi4 <3,4,5,4>, <3,4,5,4>
+  2698628314U,	// <4,5,4,4>: Cost 3 vsldoi8 <2,3,4,5>, <4,4,5,5>
+  1624886582U,	// <4,5,4,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+  2698628478U,	// <4,5,4,6>: Cost 3 vsldoi8 <2,3,4,5>, <4,6,5,7>
+  2662110564U,	// <4,5,4,7>: Cost 3 vsldoi4 <7,4,5,4>, <7,4,5,4>
+  1624886825U,	// <4,5,4,u>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+  1570455654U,	// <4,5,5,0>: Cost 2 vsldoi4 <4,4,5,5>, LHS
+  2312564250U,	// <4,5,5,1>: Cost 3 vmrglw <5,1,4,5>, <4,u,5,1>
+  2644199118U,	// <4,5,5,2>: Cost 3 vsldoi4 <4,4,5,5>, <2,3,4,5>
+  2295974966U,	// <4,5,5,3>: Cost 3 vmrglw <2,3,4,5>, <4,2,5,3>
+  1570458842U,	// <4,5,5,4>: Cost 2 vsldoi4 <4,4,5,5>, <4,4,5,5>
+  1168568324U,	// <4,5,5,5>: Cost 2 vmrghw RHS, <5,5,5,5>
+  1168568418U,	// <4,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+  2295975294U,	// <4,5,5,7>: Cost 3 vmrglw <2,3,4,5>, <4,6,5,7>
+  1168716036U,	// <4,5,5,u>: Cost 2 vmrghw RHS, <5,u,7,0>
+  1564491878U,	// <4,5,6,0>: Cost 2 vsldoi4 <3,4,5,6>, LHS
+  2626290768U,	// <4,5,6,1>: Cost 3 vsldoi4 <1,4,5,6>, <1,4,5,6>
+  2632263465U,	// <4,5,6,2>: Cost 3 vsldoi4 <2,4,5,6>, <2,4,5,6>
+  1564494338U,	// <4,5,6,3>: Cost 2 vsldoi4 <3,4,5,6>, <3,4,5,6>
+  1564495158U,	// <4,5,6,4>: Cost 2 vsldoi4 <3,4,5,6>, RHS
+  2638237464U,	// <4,5,6,5>: Cost 3 vsldoi4 <3,4,5,6>, <5,2,6,3>
+  2656154253U,	// <4,5,6,6>: Cost 3 vsldoi4 <6,4,5,6>, <6,4,5,6>
+  27705344U,	// <4,5,6,7>: Cost 0 copy RHS
+  27705344U,	// <4,5,6,u>: Cost 0 copy RHS
+  2725172218U,	// <4,5,7,0>: Cost 3 vsldoi8 <6,7,4,5>, <7,0,1,2>
+  3859599489U,	// <4,5,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <5,7,1,4>
+  2698630320U,	// <4,5,7,2>: Cost 3 vsldoi8 <2,3,4,5>, <7,2,3,4>
+  2728490251U,	// <4,5,7,3>: Cost 3 vsldoi8 <7,3,4,5>, <7,3,4,5>
+  2725172576U,	// <4,5,7,4>: Cost 3 vsldoi8 <6,7,4,5>, <7,4,5,0>
+  3317239812U,	// <4,5,7,5>: Cost 4 vmrghw <4,7,5,0>, <5,5,5,5>
+  2725172760U,	// <4,5,7,6>: Cost 3 vsldoi8 <6,7,4,5>, <7,6,7,4>
+  2725172844U,	// <4,5,7,7>: Cost 3 vsldoi8 <6,7,4,5>, <7,7,7,7>
+  2725172866U,	// <4,5,7,u>: Cost 3 vsldoi8 <6,7,4,5>, <7,u,1,2>
+  1564508262U,	// <4,5,u,0>: Cost 2 vsldoi4 <3,4,5,u>, LHS
+  1624889134U,	// <4,5,u,1>: Cost 2 vsldoi8 <2,3,4,5>, LHS
+  2698631045U,	// <4,5,u,2>: Cost 3 vsldoi8 <2,3,4,5>, <u,2,3,0>
+  1564510724U,	// <4,5,u,3>: Cost 2 vsldoi4 <3,4,5,u>, <3,4,5,u>
+  1564511542U,	// <4,5,u,4>: Cost 2 vsldoi4 <3,4,5,u>, RHS
+  1624889498U,	// <4,5,u,5>: Cost 2 vsldoi8 <2,3,4,5>, RHS
+  1170550882U,	// <4,5,u,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+  27705344U,	// <4,5,u,7>: Cost 0 copy RHS
+  27705344U,	// <4,5,u,u>: Cost 0 copy RHS
+  3312595285U,	// <4,6,0,0>: Cost 4 vmrghw <4,0,5,0>, <6,0,7,0>
+  3763748966U,	// <4,6,0,1>: Cost 4 vsldoi8 <0,u,4,6>, LHS
+  2238861818U,	// <4,6,0,2>: Cost 3 vmrghw <4,0,5,1>, <6,2,7,3>
+  3767730432U,	// <4,6,0,3>: Cost 4 vsldoi8 <1,5,4,6>, <0,3,1,4>
+  3763749202U,	// <4,6,0,4>: Cost 4 vsldoi8 <0,u,4,6>, <0,4,1,5>
+  2238862059U,	// <4,6,0,5>: Cost 3 vmrghw <4,0,5,1>, <6,5,7,1>
+  2238862136U,	// <4,6,0,6>: Cost 3 vmrghw <4,0,5,1>, <6,6,6,6>
+  2295934262U,	// <4,6,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+  2295934263U,	// <4,6,0,u>: Cost 3 vmrglw <2,3,4,0>, RHS
+  3378973999U,	// <4,6,1,0>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,0>
+  3378974648U,	// <4,6,1,1>: Cost 4 vmrglw <3,u,4,1>, <5,4,6,1>
+  3779675034U,	// <4,6,1,2>: Cost 4 vsldoi8 <3,5,4,6>, <1,2,3,4>
+  3378974002U,	// <4,6,1,3>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,3>
+  3378974003U,	// <4,6,1,4>: Cost 4 vmrglw <3,u,4,1>, <4,5,6,4>
+  3767731352U,	// <4,6,1,5>: Cost 4 vsldoi8 <1,5,4,6>, <1,5,4,6>
+  3378974734U,	// <4,6,1,6>: Cost 4 vmrglw <3,u,4,1>, <5,5,6,6>
+  2287316278U,	// <4,6,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+  2287316279U,	// <4,6,1,u>: Cost 3 vmrglw <0,u,4,1>, RHS
+  3735904358U,	// <4,6,2,0>: Cost 4 vsldoi4 <7,4,6,2>, LHS
+  3763750435U,	// <4,6,2,1>: Cost 5 vsldoi8 <0,u,4,6>, <2,1,3,5>
+  3313938937U,	// <4,6,2,2>: Cost 4 vmrghw <4,2,5,2>, <6,2,7,2>
+  3772376782U,	// <4,6,2,3>: Cost 4 vsldoi8 <2,3,4,6>, <2,3,4,5>
+  3852890591U,	// <4,6,2,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,2,4,3>
+  3735908454U,	// <4,6,2,5>: Cost 4 vsldoi4 <7,4,6,2>, <5,6,7,4>
+  3801573306U,	// <4,6,2,6>: Cost 4 vsldoi8 <7,2,4,6>, <2,6,3,7>
+  2785858042U,	// <4,6,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,7,3>
+  2785858051U,	// <4,6,2,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,2,u,3>
+  3863065101U,	// <4,6,3,0>: Cost 4 vsldoi12 <6,3,0,4>, <6,3,0,4>
+  3314586024U,	// <4,6,3,1>: Cost 4 vmrghw <4,3,5,0>, <6,1,7,2>
+  3863212575U,	// <4,6,3,2>: Cost 4 vsldoi12 <6,3,2,4>, <6,3,2,4>
+  3863286312U,	// <4,6,3,3>: Cost 4 vsldoi12 <6,3,3,4>, <6,3,3,4>
+  3767732738U,	// <4,6,3,4>: Cost 4 vsldoi8 <1,5,4,6>, <3,4,5,6>
+  3779676746U,	// <4,6,3,5>: Cost 4 vsldoi8 <3,5,4,6>, <3,5,4,6>
+  3398898488U,	// <4,6,3,6>: Cost 4 vmrglw <7,2,4,3>, <6,6,6,6>
+  2301267254U,	// <4,6,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+  2301267255U,	// <4,6,3,u>: Cost 3 vmrglw <3,2,4,3>, RHS
+  3852890715U,	// <4,6,4,0>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,0,1>
+  3315208615U,	// <4,6,4,1>: Cost 4 vmrghw <4,4,4,4>, <6,1,7,1>
+  2241466874U,	// <4,6,4,2>: Cost 3 vmrghw <4,4,4,4>, <6,2,7,3>
+  3852890745U,	// <4,6,4,3>: Cost 4 vsldoi12 <4,5,6,4>, <6,4,3,4>
+  2241467037U,	// <4,6,4,4>: Cost 3 vmrghw <4,4,4,4>, <6,4,7,4>
+  2241549039U,	// <4,6,4,5>: Cost 3 vmrghw <4,4,5,5>, <6,5,7,5>
+  2241467192U,	// <4,6,4,6>: Cost 3 vmrghw <4,4,4,4>, <6,6,6,6>
+  1234832694U,	// <4,6,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+  1234832695U,	// <4,6,4,u>: Cost 2 vmrglw <4,4,4,4>, RHS
+  2242302241U,	// <4,6,5,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+  2242310567U,	// <4,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+  1168568826U,	// <4,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+  2242302514U,	// <4,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+  2242302605U,	// <4,6,5,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+  2242310891U,	// <4,6,5,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+  1168569144U,	// <4,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+  1222233398U,	// <4,6,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+  1222233399U,	// <4,6,5,u>: Cost 2 vmrglw <2,3,4,5>, RHS
+  3316576545U,	// <4,6,6,0>: Cost 4 vmrghw <4,6,5,0>, <6,0,1,2>
+  3316584871U,	// <4,6,6,1>: Cost 4 vmrghw <4,6,5,1>, <6,1,7,1>
+  2242851322U,	// <4,6,6,2>: Cost 3 vmrghw <4,6,5,2>, <6,2,7,3>
+  3316601394U,	// <4,6,6,3>: Cost 4 vmrghw <4,6,5,3>, <6,3,4,5>
+  3852890916U,	// <4,6,6,4>: Cost 4 vsldoi12 <4,5,6,4>, <6,6,4,4>
+  3316617963U,	// <4,6,6,5>: Cost 4 vmrghw <4,6,5,5>, <6,5,7,1>
+  2242884408U,	// <4,6,6,6>: Cost 3 vmrghw <4,6,5,6>, <6,6,6,6>
+  2785858370U,	// <4,6,6,7>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,7,7>
+  2785858379U,	// <4,6,6,u>: Cost 3 vsldoi12 <5,6,7,4>, <6,6,u,7>
+  2785858382U,	// <4,6,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,0,1>
+  3859600215U,	// <4,6,7,1>: Cost 4 vsldoi12 <5,6,7,4>, <6,7,1,1>
+  3317240314U,	// <4,6,7,2>: Cost 4 vmrghw <4,7,5,0>, <6,2,7,3>
+  2792199020U,	// <4,6,7,3>: Cost 3 vsldoi12 <6,7,3,4>, <6,7,3,4>
+  2785858422U,	// <4,6,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <6,7,4,5>
+  3856651132U,	// <4,6,7,5>: Cost 4 vsldoi12 <5,2,3,4>, <6,7,5,2>
+  3317240632U,	// <4,6,7,6>: Cost 4 vmrghw <4,7,5,0>, <6,6,6,6>
+  2303954230U,	// <4,6,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+  2303954231U,	// <4,6,7,u>: Cost 3 vmrglw <3,6,4,7>, RHS
+  2244292897U,	// <4,6,u,0>: Cost 3 vmrghw RHS, <6,0,1,2>
+  2244293031U,	// <4,6,u,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+  1170551290U,	// <4,6,u,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+  2244293170U,	// <4,6,u,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+  2244293261U,	// <4,6,u,4>: Cost 3 vmrghw RHS, <6,4,5,6>
+  2244293355U,	// <4,6,u,5>: Cost 3 vmrghw RHS, <6,5,7,1>
+  1170551608U,	// <4,6,u,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+  1222257974U,	// <4,6,u,7>: Cost 2 vmrglw <2,3,4,u>, RHS
+  1222257975U,	// <4,6,u,u>: Cost 2 vmrglw <2,3,4,u>, RHS
+  2238862330U,	// <4,7,0,0>: Cost 3 vmrghw <4,0,5,1>, <7,0,1,2>
+  2706604134U,	// <4,7,0,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+  3312604308U,	// <4,7,0,2>: Cost 4 vmrghw <4,0,5,1>, <7,2,0,3>
+  3768402176U,	// <4,7,0,3>: Cost 4 vsldoi8 <1,6,4,7>, <0,3,1,4>
+  2238862648U,	// <4,7,0,4>: Cost 3 vmrghw <4,0,5,1>, <7,4,0,5>
+  3859600418U,	// <4,7,0,5>: Cost 4 vsldoi12 <5,6,7,4>, <7,0,5,6>
+  3729994393U,	// <4,7,0,6>: Cost 4 vsldoi4 <6,4,7,0>, <6,4,7,0>
+  2238862956U,	// <4,7,0,7>: Cost 3 vmrghw <4,0,5,1>, <7,7,7,7>
+  2706604701U,	// <4,7,0,u>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+  3385610338U,	// <4,7,1,0>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,0>
+  3780346676U,	// <4,7,1,1>: Cost 4 vsldoi8 <3,6,4,7>, <1,1,1,1>
+  2706604954U,	// <4,7,1,2>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+  3385610746U,	// <4,7,1,3>: Cost 4 vmrglw <5,0,4,1>, <6,2,7,3>
+  3385610342U,	// <4,7,1,4>: Cost 4 vmrglw <5,0,4,1>, <5,6,7,4>
+  3385610667U,	// <4,7,1,5>: Cost 4 vmrglw <5,0,4,1>, <6,1,7,5>
+  3768403178U,	// <4,7,1,6>: Cost 4 vsldoi8 <1,6,4,7>, <1,6,4,7>
+  3385611074U,	// <4,7,1,7>: Cost 4 vmrglw <5,0,4,1>, <6,6,7,7>
+  2706604954U,	// <4,7,1,u>: Cost 3 vsldoi8 <3,6,4,7>, <1,2,3,4>
+  3859600532U,	// <4,7,2,0>: Cost 4 vsldoi12 <5,6,7,4>, <7,2,0,3>
+  3712091034U,	// <4,7,2,1>: Cost 5 vsldoi4 <3,4,7,2>, <1,2,3,4>
+  3774375528U,	// <4,7,2,2>: Cost 4 vsldoi8 <2,6,4,7>, <2,2,2,2>
+  2794853552U,	// <4,7,2,3>: Cost 3 vsldoi12 <7,2,3,4>, <7,2,3,4>
+  2785858744U,	// <4,7,2,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,2,4,3>
+  3735982182U,	// <4,7,2,5>: Cost 4 vsldoi4 <7,4,7,2>, <5,6,7,4>
+  3774375875U,	// <4,7,2,6>: Cost 4 vsldoi8 <2,6,4,7>, <2,6,4,7>
+  3735983476U,	// <4,7,2,7>: Cost 4 vsldoi4 <7,4,7,2>, <7,4,7,2>
+  2795222237U,	// <4,7,2,u>: Cost 3 vsldoi12 <7,2,u,4>, <7,2,u,4>
+  3780348054U,	// <4,7,3,0>: Cost 4 vsldoi8 <3,6,4,7>, <3,0,1,2>
+  3730015130U,	// <4,7,3,1>: Cost 4 vsldoi4 <6,4,7,3>, <1,2,3,4>
+  3780348244U,	// <4,7,3,2>: Cost 4 vsldoi8 <3,6,4,7>, <3,2,4,3>
+  3778357673U,	// <4,7,3,3>: Cost 4 vsldoi8 <3,3,4,7>, <3,3,4,7>
+  2325155942U,	// <4,7,3,4>: Cost 3 vmrglw <7,2,4,3>, <5,6,7,4>
+  3779684939U,	// <4,7,3,5>: Cost 5 vsldoi8 <3,5,4,7>, <3,5,4,7>
+  2706606748U,	// <4,7,3,6>: Cost 3 vsldoi8 <3,6,4,7>, <3,6,4,7>
+  3398898498U,	// <4,7,3,7>: Cost 4 vmrglw <7,2,4,3>, <6,6,7,7>
+  2707934014U,	// <4,7,3,u>: Cost 3 vsldoi8 <3,u,4,7>, <3,u,4,7>
+  2785858868U,	// <4,7,4,0>: Cost 3 vsldoi12 <5,6,7,4>, <7,4,0,1>
+  3780348874U,	// <4,7,4,1>: Cost 4 vsldoi8 <3,6,4,7>, <4,1,2,3>
+  3780349000U,	// <4,7,4,2>: Cost 4 vsldoi8 <3,6,4,7>, <4,2,7,3>
+  2308575738U,	// <4,7,4,3>: Cost 3 vmrglw <4,4,4,4>, <6,2,7,3>
+  2656283856U,	// <4,7,4,4>: Cost 3 vsldoi4 <6,4,7,4>, <4,4,4,4>
+  2706607414U,	// <4,7,4,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+  2656285341U,	// <4,7,4,6>: Cost 3 vsldoi4 <6,4,7,4>, <6,4,7,4>
+  2241468012U,	// <4,7,4,7>: Cost 3 vmrghw <4,4,4,4>, <7,7,7,7>
+  2706607657U,	// <4,7,4,u>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+  1168569338U,	// <4,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+  2242311242U,	// <4,7,5,1>: Cost 3 vmrghw RHS, <7,1,1,1>
+  2242303178U,	// <4,7,5,2>: Cost 3 vmrghw RHS, <7,2,6,3>
+  2242311395U,	// <4,7,5,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+  1168569702U,	// <4,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+  2242311606U,	// <4,7,5,5>: Cost 3 vmrghw RHS, <7,5,5,5>
+  2242311662U,	// <4,7,5,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+  1168569964U,	// <4,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+  1168569986U,	// <4,7,5,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+  3316593658U,	// <4,7,6,0>: Cost 4 vmrghw <4,6,5,2>, <7,0,1,2>
+  3316593738U,	// <4,7,6,1>: Cost 5 vmrghw <4,6,5,2>, <7,1,1,1>
+  3316634800U,	// <4,7,6,2>: Cost 4 vmrghw <4,6,5,7>, <7,2,3,4>
+  3386978810U,	// <4,7,6,3>: Cost 4 vmrglw <5,2,4,6>, <6,2,7,3>
+  2785859072U,	// <4,7,6,4>: Cost 3 vsldoi12 <5,6,7,4>, <7,6,4,7>
+  3736014950U,	// <4,7,6,5>: Cost 4 vsldoi4 <7,4,7,6>, <5,6,7,4>
+  3316594158U,	// <4,7,6,6>: Cost 4 vmrghw <4,6,5,2>, <7,6,2,7>
+  2797803032U,	// <4,7,6,7>: Cost 3 vsldoi12 <7,6,7,4>, <7,6,7,4>
+  2797876769U,	// <4,7,6,u>: Cost 3 vsldoi12 <7,6,u,4>, <7,6,u,4>
+  2243499002U,	// <4,7,7,0>: Cost 3 vmrghw <4,7,5,0>, <7,0,1,2>
+  3718103962U,	// <4,7,7,1>: Cost 4 vsldoi4 <4,4,7,7>, <1,2,3,4>
+  3317257418U,	// <4,7,7,2>: Cost 4 vmrghw <4,7,5,2>, <7,2,6,3>
+  3377695816U,	// <4,7,7,3>: Cost 4 vmrglw <3,6,4,7>, <4,2,7,3>
+  2243532134U,	// <4,7,7,4>: Cost 3 vmrghw <4,7,5,4>, <7,4,5,6>
+  3317282230U,	// <4,7,7,5>: Cost 4 vmrghw <4,7,5,5>, <7,5,5,5>
+  2730497536U,	// <4,7,7,6>: Cost 3 vsldoi8 <7,6,4,7>, <7,6,4,7>
+  2243556972U,	// <4,7,7,7>: Cost 3 vmrghw <4,7,5,7>, <7,7,7,7>
+  2243565186U,	// <4,7,7,u>: Cost 3 vmrghw <4,7,5,u>, <7,u,1,2>
+  1170551802U,	// <4,7,u,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+  2706609966U,	// <4,7,u,1>: Cost 3 vsldoi8 <3,6,4,7>, LHS
+  2244293797U,	// <4,7,u,2>: Cost 3 vmrghw RHS, <7,2,2,2>
+  2244293859U,	// <4,7,u,3>: Cost 3 vmrghw RHS, <7,3,0,1>
+  1170552166U,	// <4,7,u,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+  2706610330U,	// <4,7,u,5>: Cost 3 vsldoi8 <3,6,4,7>, RHS
+  2244294126U,	// <4,7,u,6>: Cost 3 vmrghw RHS, <7,6,2,7>
+  1170552428U,	// <4,7,u,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+  1170552450U,	// <4,7,u,u>: Cost 2 vmrghw RHS, <7,u,1,2>
+  1165118354U,	// <4,u,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+  1624907878U,	// <4,u,0,1>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+  2638407377U,	// <4,u,0,2>: Cost 3 vsldoi4 <3,4,u,0>, <2,3,4,u>
+  2295931036U,	// <4,u,0,3>: Cost 3 vmrglw <2,3,4,0>, LHS
+  2687369584U,	// <4,u,0,4>: Cost 3 vsldoi8 <0,4,4,u>, <0,4,4,u>
+  1165121690U,	// <4,u,0,5>: Cost 2 vmrghw <4,0,5,1>, RHS
+  2662298489U,	// <4,u,0,6>: Cost 3 vsldoi4 <7,4,u,0>, <6,7,4,u>
+  2295934280U,	// <4,u,0,7>: Cost 3 vmrglw <2,3,4,0>, RHS
+  1624908445U,	// <4,u,0,u>: Cost 2 vsldoi8 <2,3,4,u>, LHS
+  2638413926U,	// <4,u,1,0>: Cost 3 vsldoi4 <3,4,u,1>, LHS
+  2691351382U,	// <4,u,1,1>: Cost 3 vsldoi8 <1,1,4,u>, <1,1,4,u>
+  1685280558U,	// <4,u,1,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+  2287313052U,	// <4,u,1,3>: Cost 3 vmrglw <0,u,4,1>, LHS
+  2299257799U,	// <4,u,1,4>: Cost 3 vmrglw <2,u,4,1>, <1,2,u,4>
+  2694005914U,	// <4,u,1,5>: Cost 3 vsldoi8 <1,5,4,u>, <1,5,4,u>
+  2305231362U,	// <4,u,1,6>: Cost 3 vmrglw <3,u,4,1>, <3,4,5,6>
+  2287316296U,	// <4,u,1,7>: Cost 3 vmrglw <0,u,4,1>, RHS
+  1685280612U,	// <4,u,1,u>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+  2638422118U,	// <4,u,2,0>: Cost 3 vsldoi4 <3,4,u,2>, LHS
+  2240206638U,	// <4,u,2,1>: Cost 3 vmrghw <4,2,5,3>, LHS
+  2697987712U,	// <4,u,2,2>: Cost 3 vsldoi8 <2,2,4,u>, <2,2,4,u>
+  1624909521U,	// <4,u,2,3>: Cost 2 vsldoi8 <2,3,4,u>, <2,3,4,u>
+  2759391121U,	// <4,u,2,4>: Cost 3 vsldoi12 <1,2,u,4>, <u,2,4,3>
+  2240207002U,	// <4,u,2,5>: Cost 3 vmrghw <4,2,5,3>, RHS
+  2698651578U,	// <4,u,2,6>: Cost 3 vsldoi8 <2,3,4,u>, <2,6,3,7>
+  2785859500U,	// <4,u,2,7>: Cost 3 vsldoi12 <5,6,7,4>, <u,2,7,3>
+  1628227686U,	// <4,u,2,u>: Cost 2 vsldoi8 <2,u,4,u>, <2,u,4,u>
+  2759022524U,	// <4,u,3,0>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,0,1>
+  2801342408U,	// <4,u,3,1>: Cost 3 vsldoi12 <u,3,1,4>, <u,3,1,4>
+  2703960409U,	// <4,u,3,2>: Cost 3 vsldoi8 <3,2,4,u>, <3,2,4,u>
+  2759022554U,	// <4,u,3,3>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,3,4>
+  2759022564U,	// <4,u,3,4>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,4,5>
+  2240845978U,	// <4,u,3,5>: Cost 3 vmrghw <4,3,5,0>, RHS
+  2706614941U,	// <4,u,3,6>: Cost 3 vsldoi8 <3,6,4,u>, <3,6,4,u>
+  2301267272U,	// <4,u,3,7>: Cost 3 vmrglw <3,2,4,3>, RHS
+  2759022596U,	// <4,u,3,u>: Cost 3 vsldoi12 <1,2,3,4>, <u,3,u,1>
+  1570668646U,	// <4,u,4,0>: Cost 2 vsldoi4 <4,4,u,4>, LHS
+  1167726382U,	// <4,u,4,1>: Cost 2 vmrghw <4,4,4,4>, LHS
+  2698652753U,	// <4,u,4,2>: Cost 3 vsldoi8 <2,3,4,u>, <4,2,u,3>
+  1234829468U,	// <4,u,4,3>: Cost 2 vmrglw <4,4,4,4>, LHS
+  229035318U,	// <4,u,4,4>: Cost 1 vspltisw0 RHS
+  1624911158U,	// <4,u,4,5>: Cost 2 vsldoi8 <2,3,4,u>, RHS
+  2698653081U,	// <4,u,4,6>: Cost 3 vsldoi8 <2,3,4,u>, <4,6,u,7>
+  1234832712U,	// <4,u,4,7>: Cost 2 vmrglw <4,4,4,4>, RHS
+  229035318U,	// <4,u,4,u>: Cost 1 vspltisw0 RHS
+  1168561875U,	// <4,u,5,0>: Cost 2 vmrghw RHS, <u,0,1,2>
+  94820142U,	// <4,u,5,1>: Cost 1 vmrghw RHS, LHS
+  1168562053U,	// <4,u,5,2>: Cost 2 vmrghw RHS, <u,2,3,0>
+  1222230172U,	// <4,u,5,3>: Cost 2 vmrglw <2,3,4,5>, LHS
+  1168562239U,	// <4,u,5,4>: Cost 2 vmrghw RHS, <u,4,5,6>
+  94820506U,	// <4,u,5,5>: Cost 1 vmrghw RHS, RHS
+  1685280922U,	// <4,u,5,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+  1222233416U,	// <4,u,5,7>: Cost 2 vmrglw <2,3,4,5>, RHS
+  94820709U,	// <4,u,5,u>: Cost 1 vmrghw RHS, LHS
+  1564713062U,	// <4,u,6,0>: Cost 2 vsldoi4 <3,4,u,6>, LHS
+  2626511979U,	// <4,u,6,1>: Cost 3 vsldoi4 <1,4,u,6>, <1,4,u,6>
+  2632484676U,	// <4,u,6,2>: Cost 3 vsldoi4 <2,4,u,6>, <2,4,u,6>
+  1564715549U,	// <4,u,6,3>: Cost 2 vsldoi4 <3,4,u,6>, <3,4,u,6>
+  1564716342U,	// <4,u,6,4>: Cost 2 vsldoi4 <3,4,u,6>, RHS
+  2242853018U,	// <4,u,6,5>: Cost 3 vmrghw <4,6,5,2>, RHS
+  2656375464U,	// <4,u,6,6>: Cost 3 vsldoi4 <6,4,u,6>, <6,4,u,6>
+  27705344U,	// <4,u,6,7>: Cost 0 copy RHS
+  27705344U,	// <4,u,6,u>: Cost 0 copy RHS
+  2785859840U,	// <4,u,7,0>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,0,1>
+  2243499822U,	// <4,u,7,1>: Cost 3 vmrghw <4,7,5,0>, LHS
+  2727851197U,	// <4,u,7,2>: Cost 3 vsldoi8 <7,2,4,u>, <7,2,4,u>
+  2303951004U,	// <4,u,7,3>: Cost 3 vmrglw <3,6,4,7>, LHS
+  2785859880U,	// <4,u,7,4>: Cost 3 vsldoi12 <5,6,7,4>, <u,7,4,5>
+  2243500186U,	// <4,u,7,5>: Cost 3 vmrghw <4,7,5,0>, RHS
+  2730505729U,	// <4,u,7,6>: Cost 3 vsldoi8 <7,6,4,u>, <7,6,4,u>
+  2303954248U,	// <4,u,7,7>: Cost 3 vmrglw <3,6,4,7>, RHS
+  2303951009U,	// <4,u,7,u>: Cost 3 vmrglw <3,6,4,7>, LHS
+  1564729446U,	// <4,u,u,0>: Cost 2 vsldoi4 <3,4,u,u>, LHS
+  96810798U,	// <4,u,u,1>: Cost 1 vmrghw RHS, LHS
+  1685281125U,	// <4,u,u,2>: Cost 2 vsldoi12 <1,2,3,4>, LHS
+  1222254748U,	// <4,u,u,3>: Cost 2 vmrglw <2,3,4,u>, LHS
+  229035318U,	// <4,u,u,4>: Cost 1 vspltisw0 RHS
+  96811162U,	// <4,u,u,5>: Cost 1 vmrghw RHS, RHS
+  1685281165U,	// <4,u,u,6>: Cost 2 vsldoi12 <1,2,3,4>, RHS
+  27705344U,	// <4,u,u,7>: Cost 0 copy RHS
+  27705344U,	// <4,u,u,u>: Cost 0 copy RHS
+  2754232320U,	// <5,0,0,0>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,0,0>
+  2754232330U,	// <5,0,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,1,1>
+  3718194894U,	// <5,0,0,2>: Cost 4 vsldoi4 <4,5,0,0>, <2,3,4,5>
+  3376385762U,	// <5,0,0,3>: Cost 4 vmrglw <3,4,5,0>, <5,2,0,3>
+  2754232357U,	// <5,0,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,0,4,1>
+  3845816370U,	// <5,0,0,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,0,5,5>
+  3782353389U,	// <5,0,0,6>: Cost 4 vsldoi8 <4,0,5,0>, <0,6,0,7>
+  3376386090U,	// <5,0,0,7>: Cost 4 vmrglw <3,4,5,0>, <5,6,0,7>
+  2757402697U,	// <5,0,0,u>: Cost 3 vsldoi12 <0,u,u,5>, <0,0,u,1>
+  2626543718U,	// <5,0,1,0>: Cost 3 vsldoi4 <1,5,0,1>, LHS
+  2626544751U,	// <5,0,1,1>: Cost 3 vsldoi4 <1,5,0,1>, <1,5,0,1>
+  1680490598U,	// <5,0,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+  3766428665U,	// <5,0,1,3>: Cost 4 vsldoi8 <1,3,5,0>, <1,3,5,0>
+  2626546998U,	// <5,0,1,4>: Cost 3 vsldoi4 <1,5,0,1>, RHS
+  2650435539U,	// <5,0,1,5>: Cost 3 vsldoi4 <5,5,0,1>, <5,5,0,1>
+  3783017715U,	// <5,0,1,6>: Cost 4 vsldoi8 <4,1,5,0>, <1,6,5,7>
+  3385019000U,	// <5,0,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,0,7>
+  1680490652U,	// <5,0,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+  3376398336U,	// <5,0,2,0>: Cost 4 vmrglw <3,4,5,2>, <0,0,0,0>
+  2245877862U,	// <5,0,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+  3773064808U,	// <5,0,2,2>: Cost 4 vsldoi8 <2,4,5,0>, <2,2,2,2>
+  2705295054U,	// <5,0,2,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+  3827974343U,	// <5,0,2,4>: Cost 4 vsldoi12 <0,4,1,5>, <0,2,4,1>
+  3845816530U,	// <5,0,2,5>: Cost 4 vsldoi12 <3,4,0,5>, <0,2,5,3>
+  3779037114U,	// <5,0,2,6>: Cost 4 vsldoi8 <3,4,5,0>, <2,6,3,7>
+  3810887658U,	// <5,0,2,7>: Cost 4 vsldoi8 <u,7,5,0>, <2,7,0,1>
+  2245878429U,	// <5,0,2,u>: Cost 3 vmrghw <5,2,1,3>, LHS
+  2710603926U,	// <5,0,3,0>: Cost 3 vsldoi8 <4,3,5,0>, <3,0,1,2>
+  3827974396U,	// <5,0,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <0,3,1,0>
+  3779037516U,	// <5,0,3,2>: Cost 4 vsldoi8 <3,4,5,0>, <3,2,3,4>
+  3779037596U,	// <5,0,3,3>: Cost 4 vsldoi8 <3,4,5,0>, <3,3,3,3>
+  2705295868U,	// <5,0,3,4>: Cost 3 vsldoi8 <3,4,5,0>, <3,4,5,0>
+  3379726804U,	// <5,0,3,5>: Cost 4 vmrglw <4,0,5,3>, <3,4,0,5>
+  3802925748U,	// <5,0,3,6>: Cost 4 vsldoi8 <7,4,5,0>, <3,6,7,4>
+  3363138168U,	// <5,0,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,0,7>
+  2707950400U,	// <5,0,3,u>: Cost 3 vsldoi8 <3,u,5,0>, <3,u,5,0>
+  2626568294U,	// <5,0,4,0>: Cost 3 vsldoi4 <1,5,0,4>, LHS
+  1680490834U,	// <5,0,4,1>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+  3828048219U,	// <5,0,4,2>: Cost 4 vsldoi12 <0,4,2,5>, <0,4,2,5>
+  2710604932U,	// <5,0,4,3>: Cost 3 vsldoi8 <4,3,5,0>, <4,3,5,0>
+  2754232685U,	// <5,0,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,4,4,5>
+  2705296694U,	// <5,0,4,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+  3779038590U,	// <5,0,4,6>: Cost 4 vsldoi8 <3,4,5,0>, <4,6,5,7>
+  2713259464U,	// <5,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+  1680490834U,	// <5,0,4,u>: Cost 2 vsldoi12 <0,4,1,5>, <0,4,1,5>
+  2311307264U,	// <5,0,5,0>: Cost 3 vmrglw <4,u,5,5>, <0,0,0,0>
+  1174437990U,	// <5,0,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+  3779038946U,	// <5,0,5,2>: Cost 4 vsldoi8 <3,4,5,0>, <5,2,0,3>
+  3845816752U,	// <5,0,5,3>: Cost 4 vsldoi12 <3,4,0,5>, <0,5,3,0>
+  2248180050U,	// <5,0,5,4>: Cost 3 vmrghw <5,5,5,5>, <0,4,1,5>
+  2248180194U,	// <5,0,5,5>: Cost 3 vmrghw <5,5,5,5>, <0,5,u,5>
+  3779039274U,	// <5,0,5,6>: Cost 4 vsldoi8 <3,4,5,0>, <5,6,0,7>
+  3385051768U,	// <5,0,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,0,7>
+  1174438557U,	// <5,0,5,u>: Cost 2 vmrghw <5,5,5,5>, LHS
+  2302689280U,	// <5,0,6,0>: Cost 3 vmrglw <3,4,5,6>, <0,0,0,0>
+  1175208038U,	// <5,0,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+  3787002362U,	// <5,0,6,2>: Cost 4 vsldoi8 <4,7,5,0>, <6,2,7,3>
+  3376432160U,	// <5,0,6,3>: Cost 4 vmrglw <3,4,5,6>, <1,4,0,3>
+  2248950098U,	// <5,0,6,4>: Cost 3 vmrghw <5,6,7,0>, <0,4,1,5>
+  2248950180U,	// <5,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+  3376433702U,	// <5,0,6,6>: Cost 4 vmrglw <3,4,5,6>, <3,5,0,6>
+  2729186166U,	// <5,0,6,7>: Cost 3 vsldoi8 <7,4,5,0>, <6,7,4,5>
+  1175208605U,	// <5,0,6,u>: Cost 2 vmrghw <5,6,7,0>, LHS
+  2713261050U,	// <5,0,7,0>: Cost 3 vsldoi8 <4,7,5,0>, <7,0,1,2>
+  3365823599U,	// <5,0,7,1>: Cost 4 vmrglw <1,6,5,7>, <1,5,0,1>
+  3808900317U,	// <5,0,7,2>: Cost 4 vsldoi8 <u,4,5,0>, <7,2,u,4>
+  3784348899U,	// <5,0,7,3>: Cost 4 vsldoi8 <4,3,5,0>, <7,3,0,1>
+  2729186656U,	// <5,0,7,4>: Cost 3 vsldoi8 <7,4,5,0>, <7,4,5,0>
+  3787003268U,	// <5,0,7,5>: Cost 4 vsldoi8 <4,7,5,0>, <7,5,0,0>
+  3802928664U,	// <5,0,7,6>: Cost 4 vsldoi8 <7,4,5,0>, <7,6,7,4>
+  3787003431U,	// <5,0,7,7>: Cost 4 vsldoi8 <4,7,5,0>, <7,7,0,1>
+  2731841188U,	// <5,0,7,u>: Cost 3 vsldoi8 <7,u,5,0>, <7,u,5,0>
+  2626601062U,	// <5,0,u,0>: Cost 3 vsldoi4 <1,5,0,u>, LHS
+  1683145366U,	// <5,0,u,1>: Cost 2 vsldoi12 <0,u,1,5>, <0,u,1,5>
+  1680491165U,	// <5,0,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+  2705295054U,	// <5,0,u,3>: Cost 3 vsldoi8 <3,4,5,0>, <2,3,4,5>
+  2754233005U,	// <5,0,u,4>: Cost 3 vsldoi12 <0,4,1,5>, <0,u,4,1>
+  2705299610U,	// <5,0,u,5>: Cost 3 vsldoi8 <3,4,5,0>, RHS
+  3779041488U,	// <5,0,u,6>: Cost 4 vsldoi8 <3,4,5,0>, <u,6,3,7>
+  2737150252U,	// <5,0,u,7>: Cost 3 vsldoi8 <u,7,5,0>, <u,7,5,0>
+  1680491219U,	// <5,0,u,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+  2713927680U,	// <5,1,0,0>: Cost 3 vsldoi8 <4,u,5,1>, <0,0,0,0>
+  1640185958U,	// <5,1,0,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+  2310607866U,	// <5,1,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+  3787669756U,	// <5,1,0,3>: Cost 4 vsldoi8 <4,u,5,1>, <0,3,1,0>
+  2713928018U,	// <5,1,0,4>: Cost 3 vsldoi8 <4,u,5,1>, <0,4,1,5>
+  2306621778U,	// <5,1,0,5>: Cost 3 vmrglw <4,1,5,0>, <0,4,1,5>
+  3787670006U,	// <5,1,0,6>: Cost 4 vsldoi8 <4,u,5,1>, <0,6,1,7>
+  3736188301U,	// <5,1,0,7>: Cost 4 vsldoi4 <7,5,1,0>, <7,5,1,0>
+  1640186525U,	// <5,1,0,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+  2650505318U,	// <5,1,1,0>: Cost 3 vsldoi4 <5,5,1,1>, LHS
+  2754233140U,	// <5,1,1,1>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,1,1>
+  2311276694U,	// <5,1,1,2>: Cost 3 vmrglw <4,u,5,1>, <3,0,1,2>
+  2311278315U,	// <5,1,1,3>: Cost 3 vmrglw <4,u,5,1>, <5,2,1,3>
+  2758435667U,	// <5,1,1,4>: Cost 3 vsldoi12 <1,1,4,5>, <1,1,4,5>
+  2754233180U,	// <5,1,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,1,5,5>
+  3385016497U,	// <5,1,1,6>: Cost 4 vmrglw <4,u,5,1>, <0,2,1,6>
+  2311278643U,	// <5,1,1,7>: Cost 3 vmrglw <4,u,5,1>, <5,6,1,7>
+  2758730615U,	// <5,1,1,u>: Cost 3 vsldoi12 <1,1,u,5>, <1,1,u,5>
+  3700367462U,	// <5,1,2,0>: Cost 4 vsldoi4 <1,5,1,2>, LHS
+  3830629255U,	// <5,1,2,1>: Cost 4 vsldoi12 <0,u,1,5>, <1,2,1,3>
+  2713929320U,	// <5,1,2,2>: Cost 3 vsldoi8 <4,u,5,1>, <2,2,2,2>
+  2754233238U,	// <5,1,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,3,0>
+  2759099300U,	// <5,1,2,4>: Cost 3 vsldoi12 <1,2,4,5>, <1,2,4,5>
+  2754233259U,	// <5,1,2,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,5,3>
+  2713929658U,	// <5,1,2,6>: Cost 3 vsldoi8 <4,u,5,1>, <2,6,3,7>
+  3872359354U,	// <5,1,2,7>: Cost 4 vsldoi12 <7,u,0,5>, <1,2,7,0>
+  2754233283U,	// <5,1,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,2,u,0>
+  2713929878U,	// <5,1,3,0>: Cost 3 vsldoi8 <4,u,5,1>, <3,0,1,2>
+  3363135498U,	// <5,1,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,1,1>
+  3363137686U,	// <5,1,3,2>: Cost 4 vmrglw <1,2,5,3>, <3,0,1,2>
+  2713930140U,	// <5,1,3,3>: Cost 3 vsldoi8 <4,u,5,1>, <3,3,3,3>
+  2713930242U,	// <5,1,3,4>: Cost 3 vsldoi8 <4,u,5,1>, <3,4,5,6>
+  2289394002U,	// <5,1,3,5>: Cost 3 vmrglw <1,2,5,3>, <0,4,1,5>
+  3787672184U,	// <5,1,3,6>: Cost 4 vsldoi8 <4,u,5,1>, <3,6,0,7>
+  3787672259U,	// <5,1,3,7>: Cost 4 vsldoi8 <4,u,5,1>, <3,7,0,1>
+  2713930526U,	// <5,1,3,u>: Cost 3 vsldoi8 <4,u,5,1>, <3,u,1,2>
+  1634880402U,	// <5,1,4,0>: Cost 2 vsldoi8 <4,0,5,1>, <4,0,5,1>
+  2760205355U,	// <5,1,4,1>: Cost 3 vsldoi12 <1,4,1,5>, <1,4,1,5>
+  2760279092U,	// <5,1,4,2>: Cost 3 vsldoi12 <1,4,2,5>, <1,4,2,5>
+  3787672708U,	// <5,1,4,3>: Cost 4 vsldoi8 <4,u,5,1>, <4,3,5,0>
+  2713930960U,	// <5,1,4,4>: Cost 3 vsldoi8 <4,u,5,1>, <4,4,4,4>
+  1640189238U,	// <5,1,4,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+  3786345848U,	// <5,1,4,6>: Cost 4 vsldoi8 <4,6,5,1>, <4,6,5,1>
+  3787009481U,	// <5,1,4,7>: Cost 4 vsldoi8 <4,7,5,1>, <4,7,5,1>
+  1640189466U,	// <5,1,4,u>: Cost 2 vsldoi8 <4,u,5,1>, <4,u,5,1>
+  2754233455U,	// <5,1,5,0>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,0,1>
+  2713931407U,	// <5,1,5,1>: Cost 3 vsldoi8 <4,u,5,1>, <5,1,0,1>
+  2713931499U,	// <5,1,5,2>: Cost 3 vsldoi8 <4,u,5,1>, <5,2,1,3>
+  3827975305U,	// <5,1,5,3>: Cost 4 vsldoi12 <0,4,1,5>, <1,5,3,0>
+  2754233495U,	// <5,1,5,4>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,4,5>
+  2288746834U,	// <5,1,5,5>: Cost 3 vmrglw <1,1,5,5>, <0,4,1,5>
+  2713931827U,	// <5,1,5,6>: Cost 3 vsldoi8 <4,u,5,1>, <5,6,1,7>
+  3787673725U,	// <5,1,5,7>: Cost 4 vsldoi8 <4,u,5,1>, <5,7,1,0>
+  2754233527U,	// <5,1,5,u>: Cost 3 vsldoi12 <0,4,1,5>, <1,5,u,1>
+  2668462182U,	// <5,1,6,0>: Cost 3 vsldoi4 <u,5,1,6>, LHS
+  2290746002U,	// <5,1,6,1>: Cost 3 vmrglw <1,4,5,6>, <0,u,1,1>
+  2302691478U,	// <5,1,6,2>: Cost 3 vmrglw <3,4,5,6>, <3,0,1,2>
+  3364488071U,	// <5,1,6,3>: Cost 4 vmrglw <1,4,5,6>, <1,2,1,3>
+  2302689536U,	// <5,1,6,4>: Cost 3 vmrglw <3,4,5,6>, <0,3,1,4>
+  2754233587U,	// <5,1,6,5>: Cost 3 vsldoi12 <0,4,1,5>, <1,6,5,7>
+  2713932600U,	// <5,1,6,6>: Cost 3 vsldoi8 <4,u,5,1>, <6,6,6,6>
+  2713932622U,	// <5,1,6,7>: Cost 3 vsldoi8 <4,u,5,1>, <6,7,0,1>
+  2302689297U,	// <5,1,6,u>: Cost 3 vmrglw <3,4,5,6>, <0,0,1,u>
+  2713932794U,	// <5,1,7,0>: Cost 3 vsldoi8 <4,u,5,1>, <7,0,1,2>
+  3365822474U,	// <5,1,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,1,1>
+  3365824662U,	// <5,1,7,2>: Cost 4 vmrglw <1,6,5,7>, <3,0,1,2>
+  3787674851U,	// <5,1,7,3>: Cost 4 vsldoi8 <4,u,5,1>, <7,3,0,1>
+  2713933158U,	// <5,1,7,4>: Cost 3 vsldoi8 <4,u,5,1>, <7,4,5,6>
+  2292080978U,	// <5,1,7,5>: Cost 3 vmrglw <1,6,5,7>, <0,4,1,5>
+  3365823613U,	// <5,1,7,6>: Cost 4 vmrglw <1,6,5,7>, <1,5,1,6>
+  2713933420U,	// <5,1,7,7>: Cost 3 vsldoi8 <4,u,5,1>, <7,7,7,7>
+  2713933442U,	// <5,1,7,u>: Cost 3 vsldoi8 <4,u,5,1>, <7,u,1,2>
+  1658771190U,	// <5,1,u,0>: Cost 2 vsldoi8 <u,0,5,1>, <u,0,5,1>
+  1640191790U,	// <5,1,u,1>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+  2762933624U,	// <5,1,u,2>: Cost 3 vsldoi12 <1,u,2,5>, <1,u,2,5>
+  2754233724U,	// <5,1,u,3>: Cost 3 vsldoi12 <0,4,1,5>, <1,u,3,0>
+  2763081098U,	// <5,1,u,4>: Cost 3 vsldoi12 <1,u,4,5>, <1,u,4,5>
+  1640192154U,	// <5,1,u,5>: Cost 2 vsldoi8 <4,u,5,1>, RHS
+  2713934032U,	// <5,1,u,6>: Cost 3 vsldoi8 <4,u,5,1>, <u,6,3,7>
+  2713934080U,	// <5,1,u,7>: Cost 3 vsldoi8 <4,u,5,1>, <u,7,0,1>
+  1640192357U,	// <5,1,u,u>: Cost 2 vsldoi8 <4,u,5,1>, LHS
+  3779051520U,	// <5,2,0,0>: Cost 4 vsldoi8 <3,4,5,2>, <0,0,0,0>
+  2705309798U,	// <5,2,0,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+  3838813637U,	// <5,2,0,2>: Cost 4 vsldoi12 <2,2,4,5>, <2,0,2,1>
+  2302640230U,	// <5,2,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+  3765117266U,	// <5,2,0,4>: Cost 4 vsldoi8 <1,1,5,2>, <0,4,1,5>
+  3381027892U,	// <5,2,0,5>: Cost 4 vmrglw <4,2,5,0>, <1,4,2,5>
+  3842794985U,	// <5,2,0,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,0,6,1>
+  3408232554U,	// <5,2,0,7>: Cost 4 vmrglw <u,7,5,0>, <0,1,2,7>
+  2302640235U,	// <5,2,0,u>: Cost 3 vmrglw <3,4,5,0>, LHS
+  3700432998U,	// <5,2,1,0>: Cost 4 vsldoi4 <1,5,2,1>, LHS
+  3765117785U,	// <5,2,1,1>: Cost 4 vsldoi8 <1,1,5,2>, <1,1,5,2>
+  2311276136U,	// <5,2,1,2>: Cost 3 vmrglw <4,u,5,1>, <2,2,2,2>
+  1237532774U,	// <5,2,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+  3700436278U,	// <5,2,1,4>: Cost 4 vsldoi4 <1,5,2,1>, RHS
+  3381036084U,	// <5,2,1,5>: Cost 4 vmrglw <4,2,5,1>, <1,4,2,5>
+  3385018045U,	// <5,2,1,6>: Cost 4 vmrglw <4,u,5,1>, <2,3,2,6>
+  3385017560U,	// <5,2,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,2,7>
+  1237532779U,	// <5,2,1,u>: Cost 2 vmrglw <4,u,5,1>, LHS
+  3700441190U,	// <5,2,2,0>: Cost 4 vsldoi4 <1,5,2,2>, LHS
+  3700442242U,	// <5,2,2,1>: Cost 4 vsldoi4 <1,5,2,2>, <1,5,2,2>
+  2754233960U,	// <5,2,2,2>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,2,2>
+  2754233970U,	// <5,2,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,3,3>
+  2765071997U,	// <5,2,2,4>: Cost 3 vsldoi12 <2,2,4,5>, <2,2,4,5>
+  3834021508U,	// <5,2,2,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,2,5,3>
+  3842795152U,	// <5,2,2,6>: Cost 4 vsldoi12 <2,u,4,5>, <2,2,6,6>
+  3376402492U,	// <5,2,2,7>: Cost 4 vmrglw <3,4,5,2>, <5,6,2,7>
+  2754234015U,	// <5,2,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <2,2,u,3>
+  2754234022U,	// <5,2,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,3,0,1>
+  3827975855U,	// <5,2,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <2,3,1,1>
+  2644625102U,	// <5,2,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+  2289393766U,	// <5,2,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+  1691993806U,	// <5,2,3,4>: Cost 2 vsldoi12 <2,3,4,5>, <2,3,4,5>
+  2785052375U,	// <5,2,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <2,3,5,5>
+  3854812897U,	// <5,2,3,6>: Cost 4 vsldoi12 <4,u,5,5>, <2,3,6,6>
+  3802942187U,	// <5,2,3,7>: Cost 4 vsldoi8 <7,4,5,2>, <3,7,4,5>
+  1692288754U,	// <5,2,3,u>: Cost 2 vsldoi12 <2,3,u,5>, <2,3,u,5>
+  3839846139U,	// <5,2,4,0>: Cost 4 vsldoi12 <2,4,0,5>, <2,4,0,5>
+  2709294052U,	// <5,2,4,1>: Cost 3 vsldoi8 <4,1,5,2>, <4,1,5,2>
+  2766251789U,	// <5,2,4,2>: Cost 3 vsldoi12 <2,4,2,5>, <2,4,2,5>
+  2765735702U,	// <5,2,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,3,5>
+  3840141087U,	// <5,2,4,4>: Cost 4 vsldoi12 <2,4,4,5>, <2,4,4,5>
+  2705313078U,	// <5,2,4,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+  2712612217U,	// <5,2,4,6>: Cost 3 vsldoi8 <4,6,5,2>, <4,6,5,2>
+  3787017674U,	// <5,2,4,7>: Cost 4 vsldoi8 <4,7,5,2>, <4,7,5,2>
+  2765735747U,	// <5,2,4,u>: Cost 3 vsldoi12 <2,3,4,5>, <2,4,u,5>
+  3834021704U,	// <5,2,5,0>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,0,1>
+  3834021714U,	// <5,2,5,1>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,1,2>
+  2311308904U,	// <5,2,5,2>: Cost 3 vmrglw <4,u,5,5>, <2,2,2,2>
+  1237565542U,	// <5,2,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+  3834021744U,	// <5,2,5,4>: Cost 4 vsldoi12 <1,4,2,5>, <2,5,4,5>
+  3369124916U,	// <5,2,5,5>: Cost 4 vmrglw <2,2,5,5>, <1,4,2,5>
+  2248181690U,	// <5,2,5,6>: Cost 3 vmrghw <5,5,5,5>, <2,6,3,7>
+  3786354825U,	// <5,2,5,7>: Cost 4 vsldoi8 <4,6,5,2>, <5,7,2,3>
+  1237565547U,	// <5,2,5,u>: Cost 2 vmrglw <4,u,5,5>, LHS
+  3700473958U,	// <5,2,6,0>: Cost 4 vsldoi4 <1,5,2,6>, LHS
+  3700475014U,	// <5,2,6,1>: Cost 4 vsldoi4 <1,5,2,6>, <1,5,2,6>
+  2296718952U,	// <5,2,6,2>: Cost 3 vmrglw <2,4,5,6>, <2,2,2,2>
+  1228947558U,	// <5,2,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+  3700477238U,	// <5,2,6,4>: Cost 4 vsldoi4 <1,5,2,6>, RHS
+  3834021836U,	// <5,2,6,5>: Cost 4 vsldoi12 <1,4,2,5>, <2,6,5,7>
+  2248951738U,	// <5,2,6,6>: Cost 3 vmrghw <5,6,7,0>, <2,6,3,7>
+  3370461105U,	// <5,2,6,7>: Cost 4 vmrglw <2,4,5,6>, <2,6,2,7>
+  1228947563U,	// <5,2,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+  3786355706U,	// <5,2,7,0>: Cost 4 vsldoi8 <4,6,5,2>, <7,0,1,2>
+  3783038037U,	// <5,2,7,1>: Cost 4 vsldoi8 <4,1,5,2>, <7,1,2,3>
+  3365824104U,	// <5,2,7,2>: Cost 4 vmrglw <1,6,5,7>, <2,2,2,2>
+  2292080742U,	// <5,2,7,3>: Cost 3 vmrglw <1,6,5,7>, LHS
+  3842131986U,	// <5,2,7,4>: Cost 4 vsldoi12 <2,7,4,5>, <2,7,4,5>
+  3371795508U,	// <5,2,7,5>: Cost 4 vmrglw <2,6,5,7>, <1,4,2,5>
+  3786356206U,	// <5,2,7,6>: Cost 4 vsldoi8 <4,6,5,2>, <7,6,2,7>
+  3786356332U,	// <5,2,7,7>: Cost 4 vsldoi8 <4,6,5,2>, <7,7,7,7>
+  2292080747U,	// <5,2,7,u>: Cost 3 vmrglw <1,6,5,7>, LHS
+  2754234427U,	// <5,2,u,0>: Cost 3 vsldoi12 <0,4,1,5>, <2,u,0,1>
+  2705315630U,	// <5,2,u,1>: Cost 3 vsldoi8 <3,4,5,2>, LHS
+  2296735336U,	// <5,2,u,2>: Cost 3 vmrglw <2,4,5,u>, <2,2,2,2>
+  1228963942U,	// <5,2,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+  1695311971U,	// <5,2,u,4>: Cost 2 vsldoi12 <2,u,4,5>, <2,u,4,5>
+  2705315994U,	// <5,2,u,5>: Cost 3 vsldoi8 <3,4,5,2>, RHS
+  2769201269U,	// <5,2,u,6>: Cost 3 vsldoi12 <2,u,6,5>, <2,u,6,5>
+  3370477489U,	// <5,2,u,7>: Cost 4 vmrglw <2,4,5,u>, <2,6,2,7>
+  1695606919U,	// <5,2,u,u>: Cost 2 vsldoi12 <2,u,u,5>, <2,u,u,5>
+  3827976331U,	// <5,3,0,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,0,0,0>
+  2754234518U,	// <5,3,0,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,1,2>
+  3706472290U,	// <5,3,0,2>: Cost 4 vsldoi4 <2,5,3,0>, <2,5,3,0>
+  3700500630U,	// <5,3,0,3>: Cost 4 vsldoi4 <1,5,3,0>, <3,0,1,2>
+  2754234544U,	// <5,3,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,4,1>
+  3376383766U,	// <5,3,0,5>: Cost 4 vmrglw <3,4,5,0>, <2,4,3,5>
+  3769770513U,	// <5,3,0,6>: Cost 5 vsldoi8 <1,u,5,3>, <0,6,4,7>
+  3376383930U,	// <5,3,0,7>: Cost 4 vmrglw <3,4,5,0>, <2,6,3,7>
+  2754234581U,	// <5,3,0,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,0,u,2>
+  2311275414U,	// <5,3,1,0>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,0>
+  2305967971U,	// <5,3,1,1>: Cost 3 vmrglw <4,0,5,1>, <2,5,3,1>
+  2692047787U,	// <5,3,1,2>: Cost 3 vsldoi8 <1,2,5,3>, <1,2,5,3>
+  2311276146U,	// <5,3,1,3>: Cost 3 vmrglw <4,u,5,1>, <2,2,3,3>
+  2311275418U,	// <5,3,1,4>: Cost 3 vmrglw <4,u,5,1>, <1,2,3,4>
+  3765789807U,	// <5,3,1,5>: Cost 4 vsldoi8 <1,2,5,3>, <1,5,0,1>
+  3765789939U,	// <5,3,1,6>: Cost 4 vsldoi8 <1,2,5,3>, <1,6,5,7>
+  2311276474U,	// <5,3,1,7>: Cost 3 vmrglw <4,u,5,1>, <2,6,3,7>
+  2696029585U,	// <5,3,1,u>: Cost 3 vsldoi8 <1,u,5,3>, <1,u,5,3>
+  2311288709U,	// <5,3,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+  3765790243U,	// <5,3,2,1>: Cost 4 vsldoi8 <1,2,5,3>, <2,1,3,5>
+  3827976513U,	// <5,3,2,2>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,2,2>
+  2765736268U,	// <5,3,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <3,2,3,4>
+  2246248962U,	// <5,3,2,4>: Cost 3 vmrghw <5,2,6,3>, <3,4,5,6>
+  3765790563U,	// <5,3,2,5>: Cost 4 vsldoi8 <1,2,5,3>, <2,5,3,1>
+  3827976550U,	// <5,3,2,6>: Cost 4 vsldoi12 <0,4,1,5>, <3,2,6,3>
+  3842795887U,	// <5,3,2,7>: Cost 4 vsldoi12 <2,u,4,5>, <3,2,7,3>
+  2769054073U,	// <5,3,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <3,2,u,4>
+  3827976575U,	// <5,3,3,0>: Cost 4 vsldoi12 <0,4,1,5>, <3,3,0,1>
+  3765790963U,	// <5,3,3,1>: Cost 4 vsldoi8 <1,2,5,3>, <3,1,2,5>
+  3839478162U,	// <5,3,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <3,3,2,2>
+  2754234780U,	// <5,3,3,3>: Cost 3 vsldoi12 <0,4,1,5>, <3,3,3,3>
+  2771708327U,	// <5,3,3,4>: Cost 3 vsldoi12 <3,3,4,5>, <3,3,4,5>
+  3363137059U,	// <5,3,3,5>: Cost 4 vmrglw <1,2,5,3>, <2,1,3,5>
+  3375081320U,	// <5,3,3,6>: Cost 4 vmrglw <3,2,5,3>, <2,5,3,6>
+  3363137466U,	// <5,3,3,7>: Cost 4 vmrglw <1,2,5,3>, <2,6,3,7>
+  2772003275U,	// <5,3,3,u>: Cost 3 vsldoi12 <3,3,u,5>, <3,3,u,5>
+  2772077012U,	// <5,3,4,0>: Cost 3 vsldoi12 <3,4,0,5>, <3,4,0,5>
+  3765791714U,	// <5,3,4,1>: Cost 4 vsldoi8 <1,2,5,3>, <4,1,5,0>
+  2709965878U,	// <5,3,4,2>: Cost 3 vsldoi8 <4,2,5,3>, <4,2,5,3>
+  2772298223U,	// <5,3,4,3>: Cost 3 vsldoi12 <3,4,3,5>, <3,4,3,5>
+  2772371960U,	// <5,3,4,4>: Cost 3 vsldoi12 <3,4,4,5>, <3,4,4,5>
+  2754234882U,	// <5,3,4,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,5,6>
+  3839478282U,	// <5,3,4,6>: Cost 4 vsldoi12 <2,3,4,5>, <3,4,6,5>
+  3376416698U,	// <5,3,4,7>: Cost 4 vmrglw <3,4,5,4>, <2,6,3,7>
+  2754234909U,	// <5,3,4,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,4,u,6>
+  2311308182U,	// <5,3,5,0>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,0>
+  3765792421U,	// <5,3,5,1>: Cost 4 vsldoi8 <1,2,5,3>, <5,1,2,5>
+  2715938575U,	// <5,3,5,2>: Cost 3 vsldoi8 <5,2,5,3>, <5,2,5,3>
+  2311308914U,	// <5,3,5,3>: Cost 3 vmrglw <4,u,5,5>, <2,2,3,3>
+  2311308186U,	// <5,3,5,4>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,4>
+  2248182354U,	// <5,3,5,5>: Cost 3 vmrghw <5,5,5,5>, <3,5,5,5>
+  3765792837U,	// <5,3,5,6>: Cost 4 vsldoi8 <1,2,5,3>, <5,6,3,7>
+  2311309242U,	// <5,3,5,7>: Cost 3 vmrglw <4,u,5,5>, <2,6,3,7>
+  2311308190U,	// <5,3,5,u>: Cost 3 vmrglw <4,u,5,5>, <1,2,3,u>
+  2632777830U,	// <5,3,6,0>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+  3706520372U,	// <5,3,6,1>: Cost 4 vsldoi4 <2,5,3,6>, <1,1,1,1>
+  2632779624U,	// <5,3,6,2>: Cost 3 vsldoi4 <2,5,3,6>, <2,5,3,6>
+  2632780290U,	// <5,3,6,3>: Cost 3 vsldoi4 <2,5,3,6>, <3,4,5,6>
+  2632781110U,	// <5,3,6,4>: Cost 3 vsldoi4 <2,5,3,6>, RHS
+  2248952413U,	// <5,3,6,5>: Cost 3 vmrghw <5,6,7,0>, <3,5,6,7>
+  2302691176U,	// <5,3,6,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+  2302691258U,	// <5,3,6,7>: Cost 3 vmrglw <3,4,5,6>, <2,6,3,7>
+  2632783662U,	// <5,3,6,u>: Cost 3 vsldoi4 <2,5,3,6>, LHS
+  3365823382U,	// <5,3,7,0>: Cost 4 vmrglw <1,6,5,7>, <1,2,3,0>
+  3706529011U,	// <5,3,7,1>: Cost 4 vsldoi4 <2,5,3,7>, <1,6,5,7>
+  3706529641U,	// <5,3,7,2>: Cost 4 vsldoi4 <2,5,3,7>, <2,5,3,7>
+  3365824114U,	// <5,3,7,3>: Cost 4 vmrglw <1,6,5,7>, <2,2,3,3>
+  2774362859U,	// <5,3,7,4>: Cost 3 vsldoi12 <3,7,4,5>, <3,7,4,5>
+  3365824035U,	// <5,3,7,5>: Cost 4 vmrglw <1,6,5,7>, <2,1,3,5>
+  3383740183U,	// <5,3,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,4,3,6>
+  3363833786U,	// <5,3,7,7>: Cost 4 vmrglw <1,3,5,7>, <2,6,3,7>
+  2774657807U,	// <5,3,7,u>: Cost 3 vsldoi12 <3,7,u,5>, <3,7,u,5>
+  2632794214U,	// <5,3,u,0>: Cost 3 vsldoi4 <2,5,3,u>, LHS
+  2754235166U,	// <5,3,u,1>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,1,2>
+  2632796010U,	// <5,3,u,2>: Cost 3 vsldoi4 <2,5,3,u>, <2,5,3,u>
+  2632796676U,	// <5,3,u,3>: Cost 3 vsldoi4 <2,5,3,u>, <3,4,5,u>
+  2632797494U,	// <5,3,u,4>: Cost 3 vsldoi4 <2,5,3,u>, RHS
+  2754235206U,	// <5,3,u,5>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,5,6>
+  2302691176U,	// <5,3,u,6>: Cost 3 vmrglw <3,4,5,6>, <2,5,3,6>
+  2302707642U,	// <5,3,u,7>: Cost 3 vmrglw <3,4,5,u>, <2,6,3,7>
+  2754235229U,	// <5,3,u,u>: Cost 3 vsldoi12 <0,4,1,5>, <3,u,u,2>
+  3765133325U,	// <5,4,0,0>: Cost 4 vsldoi8 <1,1,5,4>, <0,0,1,4>
+  2705326182U,	// <5,4,0,1>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+  3718489806U,	// <5,4,0,2>: Cost 4 vsldoi4 <4,5,4,0>, <2,3,4,5>
+  3718490624U,	// <5,4,0,3>: Cost 4 vsldoi4 <4,5,4,0>, <3,4,5,4>
+  2709307730U,	// <5,4,0,4>: Cost 3 vsldoi8 <4,1,5,4>, <0,4,1,5>
+  2302641870U,	// <5,4,0,5>: Cost 3 vmrglw <3,4,5,0>, <2,3,4,5>
+  3376383695U,	// <5,4,0,6>: Cost 5 vmrglw <3,4,5,0>, <2,3,4,6>
+  3384351018U,	// <5,4,0,7>: Cost 4 vmrglw <4,7,5,0>, <u,7,4,7>
+  2705326749U,	// <5,4,0,u>: Cost 3 vsldoi8 <3,4,5,4>, LHS
+  2305971057U,	// <5,4,1,0>: Cost 3 vmrglw <4,0,5,1>, <6,7,4,0>
+  3765134171U,	// <5,4,1,1>: Cost 4 vsldoi8 <1,1,5,4>, <1,1,5,4>
+  3766461338U,	// <5,4,1,2>: Cost 4 vsldoi8 <1,3,5,4>, <1,2,3,4>
+  3766461437U,	// <5,4,1,3>: Cost 4 vsldoi8 <1,3,5,4>, <1,3,5,4>
+  2311277776U,	// <5,4,1,4>: Cost 3 vmrglw <4,u,5,1>, <4,4,4,4>
+  2754235362U,	// <5,4,1,5>: Cost 3 vsldoi12 <0,4,1,5>, <4,1,5,0>
+  3783050483U,	// <5,4,1,6>: Cost 4 vsldoi8 <4,1,5,4>, <1,6,5,7>
+  3385019036U,	// <5,4,1,7>: Cost 4 vmrglw <4,u,5,1>, <3,6,4,7>
+  2311276241U,	// <5,4,1,u>: Cost 3 vmrglw <4,u,5,1>, <2,3,4,u>
+  3718504550U,	// <5,4,2,0>: Cost 4 vsldoi4 <4,5,4,2>, LHS
+  3783050787U,	// <5,4,2,1>: Cost 4 vsldoi8 <4,1,5,4>, <2,1,3,5>
+  3773097576U,	// <5,4,2,2>: Cost 4 vsldoi8 <2,4,5,4>, <2,2,2,2>
+  2705327822U,	// <5,4,2,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+  3773097767U,	// <5,4,2,4>: Cost 4 vsldoi8 <2,4,5,4>, <2,4,5,4>
+  2765737014U,	// <5,4,2,5>: Cost 3 vsldoi12 <2,3,4,5>, <4,2,5,3>
+  3779069882U,	// <5,4,2,6>: Cost 4 vsldoi8 <3,4,5,4>, <2,6,3,7>
+  3376401052U,	// <5,4,2,7>: Cost 5 vmrglw <3,4,5,2>, <3,6,4,7>
+  2245881370U,	// <5,4,2,u>: Cost 3 vmrghw <5,2,1,3>, <4,u,5,1>
+  3779070102U,	// <5,4,3,0>: Cost 4 vsldoi8 <3,4,5,4>, <3,0,1,2>
+  3363135525U,	// <5,4,3,1>: Cost 4 vmrglw <1,2,5,3>, <0,0,4,1>
+  3779070284U,	// <5,4,3,2>: Cost 4 vsldoi8 <3,4,5,4>, <3,2,3,4>
+  3779070364U,	// <5,4,3,3>: Cost 4 vsldoi8 <3,4,5,4>, <3,3,3,3>
+  2705328640U,	// <5,4,3,4>: Cost 3 vsldoi8 <3,4,5,4>, <3,4,5,4>
+  2307311310U,	// <5,4,3,5>: Cost 3 vmrglw <4,2,5,3>, <2,3,4,5>
+  3866021012U,	// <5,4,3,6>: Cost 4 vsldoi12 <6,7,4,5>, <4,3,6,7>
+  3363138204U,	// <5,4,3,7>: Cost 5 vmrglw <1,2,5,3>, <3,6,4,7>
+  2707983172U,	// <5,4,3,u>: Cost 3 vsldoi8 <3,u,5,4>, <3,u,5,4>
+  2708646805U,	// <5,4,4,0>: Cost 3 vsldoi8 <4,0,5,4>, <4,0,5,4>
+  2709310438U,	// <5,4,4,1>: Cost 3 vsldoi8 <4,1,5,4>, <4,1,5,4>
+  3779071030U,	// <5,4,4,2>: Cost 4 vsldoi8 <3,4,5,4>, <4,2,5,3>
+  2710637704U,	// <5,4,4,3>: Cost 3 vsldoi8 <4,3,5,4>, <4,3,5,4>
+  2754235600U,	// <5,4,4,4>: Cost 3 vsldoi12 <0,4,1,5>, <4,4,4,4>
+  1704676570U,	// <5,4,4,5>: Cost 2 vsldoi12 <4,4,5,5>, <4,4,5,5>
+  3779071358U,	// <5,4,4,6>: Cost 4 vsldoi8 <3,4,5,4>, <4,6,5,7>
+  2713292236U,	// <5,4,4,7>: Cost 3 vsldoi8 <4,7,5,4>, <4,7,5,4>
+  1704897781U,	// <5,4,4,u>: Cost 2 vsldoi12 <4,4,u,5>, <4,4,u,5>
+  2626871398U,	// <5,4,5,0>: Cost 3 vsldoi4 <1,5,4,5>, LHS
+  2626872471U,	// <5,4,5,1>: Cost 3 vsldoi4 <1,5,4,5>, <1,5,4,5>
+  2765737230U,	// <5,4,5,2>: Cost 3 vsldoi12 <2,3,4,5>, <4,5,2,3>
+  3700615318U,	// <5,4,5,3>: Cost 4 vsldoi4 <1,5,4,5>, <3,0,1,2>
+  2626874678U,	// <5,4,5,4>: Cost 3 vsldoi4 <1,5,4,5>, RHS
+  1174441270U,	// <5,4,5,5>: Cost 2 vmrghw <5,5,5,5>, RHS
+  1680493878U,	// <5,4,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+  3385051804U,	// <5,4,5,7>: Cost 4 vmrglw <4,u,5,5>, <3,6,4,7>
+  1680493896U,	// <5,4,5,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+  2248952722U,	// <5,4,6,0>: Cost 3 vmrghw <5,6,7,0>, <4,0,5,1>
+  2302692152U,	// <5,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+  3382406107U,	// <5,4,6,2>: Cost 4 vmrglw <4,4,5,6>, <4,1,4,2>
+  3700623874U,	// <5,4,6,3>: Cost 4 vsldoi4 <1,5,4,6>, <3,4,5,6>
+  2248953040U,	// <5,4,6,4>: Cost 3 vmrghw <5,6,7,0>, <4,4,4,4>
+  1175211318U,	// <5,4,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+  3376432280U,	// <5,4,6,6>: Cost 4 vmrglw <3,4,5,6>, <1,5,4,6>
+  2729218934U,	// <5,4,6,7>: Cost 3 vsldoi8 <7,4,5,4>, <6,7,4,5>
+  1175211561U,	// <5,4,6,u>: Cost 2 vmrghw <5,6,7,0>, RHS
+  3787035642U,	// <5,4,7,0>: Cost 4 vsldoi8 <4,7,5,4>, <7,0,1,2>
+  3365822501U,	// <5,4,7,1>: Cost 4 vmrglw <1,6,5,7>, <0,0,4,1>
+  3808933085U,	// <5,4,7,2>: Cost 4 vsldoi8 <u,4,5,4>, <7,2,u,4>
+  3784381707U,	// <5,4,7,3>: Cost 4 vsldoi8 <4,3,5,4>, <7,3,4,5>
+  2713294182U,	// <5,4,7,4>: Cost 3 vsldoi8 <4,7,5,4>, <7,4,5,6>
+  2309998286U,	// <5,4,7,5>: Cost 3 vmrglw <4,6,5,7>, <2,3,4,5>
+  3383740111U,	// <5,4,7,6>: Cost 4 vmrglw <4,6,5,7>, <2,3,4,6>
+  3787036239U,	// <5,4,7,7>: Cost 4 vsldoi8 <4,7,5,4>, <7,7,4,5>
+  2731873960U,	// <5,4,7,u>: Cost 3 vsldoi8 <7,u,5,4>, <7,u,5,4>
+  2626895974U,	// <5,4,u,0>: Cost 3 vsldoi4 <1,5,4,u>, LHS
+  2626897050U,	// <5,4,u,1>: Cost 3 vsldoi4 <1,5,4,u>, <1,5,4,u>
+  2644813518U,	// <5,4,u,2>: Cost 3 vsldoi4 <4,5,4,u>, <2,3,4,5>
+  2705327822U,	// <5,4,u,3>: Cost 3 vsldoi8 <3,4,5,4>, <2,3,4,5>
+  2626899254U,	// <5,4,u,4>: Cost 3 vsldoi4 <1,5,4,u>, RHS
+  1707331102U,	// <5,4,u,5>: Cost 2 vsldoi12 <4,u,5,5>, <4,u,5,5>
+  1680494121U,	// <5,4,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+  2737183024U,	// <5,4,u,7>: Cost 3 vsldoi8 <u,7,5,4>, <u,7,5,4>
+  1680494139U,	// <5,4,u,u>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+  2302642684U,	// <5,5,0,0>: Cost 3 vmrglw <3,4,5,0>, <3,4,5,0>
+  1640218726U,	// <5,5,0,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+  3376384510U,	// <5,5,0,2>: Cost 4 vmrglw <3,4,5,0>, <3,4,5,2>
+  3376385078U,	// <5,5,0,3>: Cost 4 vmrglw <3,4,5,0>, <4,2,5,3>
+  2754236002U,	// <5,5,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <5,0,4,1>
+  2717942242U,	// <5,5,0,5>: Cost 3 vsldoi8 <5,5,5,5>, <0,5,u,5>
+  2244907106U,	// <5,5,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+  3376385406U,	// <5,5,0,7>: Cost 4 vmrglw <3,4,5,0>, <4,6,5,7>
+  1640219293U,	// <5,5,0,u>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+  2305969365U,	// <5,5,1,0>: Cost 3 vmrglw <4,0,5,1>, <4,4,5,0>
+  1237536282U,	// <5,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+  2713961366U,	// <5,5,1,2>: Cost 3 vsldoi8 <4,u,5,5>, <1,2,3,0>
+  3766469630U,	// <5,5,1,3>: Cost 4 vsldoi8 <1,3,5,5>, <1,3,5,5>
+  2782326455U,	// <5,5,1,4>: Cost 3 vsldoi12 <5,1,4,5>, <5,1,4,5>
+  2311277786U,	// <5,5,1,5>: Cost 3 vmrglw <4,u,5,1>, <4,4,5,5>
+  2311277058U,	// <5,5,1,6>: Cost 3 vmrglw <4,u,5,1>, <3,4,5,6>
+  3385017587U,	// <5,5,1,7>: Cost 4 vmrglw <4,u,5,1>, <1,6,5,7>
+  1237536282U,	// <5,5,1,u>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+  3376400892U,	// <5,5,2,0>: Cost 4 vmrglw <3,4,5,2>, <3,4,5,0>
+  3827977963U,	// <5,5,2,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,2,1,3>
+  2302659070U,	// <5,5,2,2>: Cost 3 vmrglw <3,4,5,2>, <3,4,5,2>
+  2765737726U,	// <5,5,2,3>: Cost 3 vsldoi12 <2,3,4,5>, <5,2,3,4>
+  3839479558U,	// <5,5,2,4>: Cost 4 vsldoi12 <2,3,4,5>, <5,2,4,3>
+  2781073167U,	// <5,5,2,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,2,5,3>
+  2713962426U,	// <5,5,2,6>: Cost 3 vsldoi8 <4,u,5,5>, <2,6,3,7>
+  3376401790U,	// <5,5,2,7>: Cost 4 vmrglw <3,4,5,2>, <4,6,5,7>
+  2769055531U,	// <5,5,2,u>: Cost 3 vsldoi12 <2,u,4,5>, <5,2,u,4>
+  2713962646U,	// <5,5,3,0>: Cost 3 vsldoi8 <4,u,5,5>, <3,0,1,2>
+  3765143786U,	// <5,5,3,1>: Cost 4 vsldoi8 <1,1,5,5>, <3,1,1,5>
+  3839479621U,	// <5,5,3,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,3,2,3>
+  2289394603U,	// <5,5,3,3>: Cost 3 vmrglw <1,2,5,3>, <1,2,5,3>
+  2713963010U,	// <5,5,3,4>: Cost 3 vsldoi8 <4,u,5,5>, <3,4,5,6>
+  2313285150U,	// <5,5,3,5>: Cost 3 vmrglw <5,2,5,3>, <4,u,5,5>
+  3363138050U,	// <5,5,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,5,6>
+  3363136755U,	// <5,5,3,7>: Cost 4 vmrglw <1,2,5,3>, <1,6,5,7>
+  2713963294U,	// <5,5,3,u>: Cost 3 vsldoi8 <4,u,5,5>, <3,u,1,2>
+  2713963410U,	// <5,5,4,0>: Cost 3 vsldoi8 <4,u,5,5>, <4,0,5,1>
+  3827978127U,	// <5,5,4,1>: Cost 4 vsldoi12 <0,4,1,5>, <5,4,1,5>
+  3839479704U,	// <5,5,4,2>: Cost 4 vsldoi12 <2,3,4,5>, <5,4,2,5>
+  3376417846U,	// <5,5,4,3>: Cost 4 vmrglw <3,4,5,4>, <4,2,5,3>
+  1637567706U,	// <5,5,4,4>: Cost 2 vsldoi8 <4,4,5,5>, <4,4,5,5>
+  1640222006U,	// <5,5,4,5>: Cost 2 vsldoi8 <4,u,5,5>, RHS
+  2310640998U,	// <5,5,4,6>: Cost 3 vmrglw <4,7,5,4>, <7,4,5,6>
+  3376418174U,	// <5,5,4,7>: Cost 4 vmrglw <3,4,5,4>, <4,6,5,7>
+  1640222238U,	// <5,5,4,u>: Cost 2 vsldoi8 <4,u,5,5>, <4,u,5,5>
+  1577091174U,	// <5,5,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+  2311310226U,	// <5,5,5,1>: Cost 3 vmrglw <4,u,5,5>, <4,0,5,1>
+  2713964303U,	// <5,5,5,2>: Cost 3 vsldoi8 <4,u,5,5>, <5,2,5,3>
+  2311311119U,	// <5,5,5,3>: Cost 3 vmrglw <4,u,5,5>, <5,2,5,3>
+  1577094454U,	// <5,5,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+  296144182U,	// <5,5,5,5>: Cost 1 vspltisw1 RHS
+  2311309826U,	// <5,5,5,6>: Cost 3 vmrglw <4,u,5,5>, <3,4,5,6>
+  2311311447U,	// <5,5,5,7>: Cost 3 vmrglw <4,u,5,5>, <5,6,5,7>
+  296144182U,	// <5,5,5,u>: Cost 1 vspltisw1 RHS
+  2248953460U,	// <5,5,6,0>: Cost 3 vmrghw <5,6,7,0>, <5,0,6,1>
+  2326580114U,	// <5,5,6,1>: Cost 3 vmrglw <7,4,5,6>, <4,0,5,1>
+  2713965050U,	// <5,5,6,2>: Cost 3 vsldoi8 <4,u,5,5>, <6,2,7,3>
+  3700697602U,	// <5,5,6,3>: Cost 4 vsldoi4 <1,5,5,6>, <3,4,5,6>
+  2785644620U,	// <5,5,6,4>: Cost 3 vsldoi12 <5,6,4,5>, <5,6,4,5>
+  2781073495U,	// <5,5,6,5>: Cost 3 vsldoi12 <4,u,5,5>, <5,6,5,7>
+  1228950018U,	// <5,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+  2713965390U,	// <5,5,6,7>: Cost 3 vsldoi8 <4,u,5,5>, <6,7,0,1>
+  1228950018U,	// <5,5,6,u>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+  2713965562U,	// <5,5,7,0>: Cost 3 vsldoi8 <4,u,5,5>, <7,0,1,2>
+  3383741330U,	// <5,5,7,1>: Cost 4 vmrglw <4,6,5,7>, <4,0,5,1>
+  3718620878U,	// <5,5,7,2>: Cost 4 vsldoi4 <4,5,5,7>, <2,3,4,5>
+  3365823403U,	// <5,5,7,3>: Cost 4 vmrglw <1,6,5,7>, <1,2,5,3>
+  2713965926U,	// <5,5,7,4>: Cost 3 vsldoi8 <4,u,5,5>, <7,4,5,6>
+  2717947318U,	// <5,5,7,5>: Cost 3 vsldoi8 <5,5,5,5>, <7,5,5,5>
+  3365825026U,	// <5,5,7,6>: Cost 4 vmrglw <1,6,5,7>, <3,4,5,6>
+  2292081907U,	// <5,5,7,7>: Cost 3 vmrglw <1,6,5,7>, <1,6,5,7>
+  2713966210U,	// <5,5,7,u>: Cost 3 vsldoi8 <4,u,5,5>, <7,u,1,2>
+  1577091174U,	// <5,5,u,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+  1640224558U,	// <5,5,u,1>: Cost 2 vsldoi8 <4,u,5,5>, LHS
+  2713966469U,	// <5,5,u,2>: Cost 3 vsldoi8 <4,u,5,5>, <u,2,3,0>
+  2713966524U,	// <5,5,u,3>: Cost 3 vsldoi8 <4,u,5,5>, <u,3,0,1>
+  1577094454U,	// <5,5,u,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+  296144182U,	// <5,5,u,5>: Cost 1 vspltisw1 RHS
+  1228950018U,	// <5,5,u,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+  2713966848U,	// <5,5,u,7>: Cost 3 vsldoi8 <4,u,5,5>, <u,7,0,1>
+  296144182U,	// <5,5,u,u>: Cost 1 vspltisw1 RHS
+  2705342464U,	// <5,6,0,0>: Cost 3 vsldoi8 <3,4,5,6>, <0,0,0,0>
+  1631600742U,	// <5,6,0,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+  3773112493U,	// <5,6,0,2>: Cost 4 vsldoi8 <2,4,5,6>, <0,2,1,2>
+  2705342720U,	// <5,6,0,3>: Cost 3 vsldoi8 <3,4,5,6>, <0,3,1,4>
+  2705342802U,	// <5,6,0,4>: Cost 3 vsldoi8 <3,4,5,6>, <0,4,1,5>
+  3779084708U,	// <5,6,0,5>: Cost 4 vsldoi8 <3,4,5,6>, <0,5,1,6>
+  3779084790U,	// <5,6,0,6>: Cost 4 vsldoi8 <3,4,5,6>, <0,6,1,7>
+  2302643510U,	// <5,6,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+  1631601309U,	// <5,6,0,u>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+  3767141092U,	// <5,6,1,0>: Cost 4 vsldoi8 <1,4,5,6>, <1,0,1,2>
+  2705343284U,	// <5,6,1,1>: Cost 3 vsldoi8 <3,4,5,6>, <1,1,1,1>
+  2705343382U,	// <5,6,1,2>: Cost 3 vsldoi8 <3,4,5,6>, <1,2,3,0>
+  3779085282U,	// <5,6,1,3>: Cost 4 vsldoi8 <3,4,5,6>, <1,3,2,4>
+  2693399632U,	// <5,6,1,4>: Cost 3 vsldoi8 <1,4,5,6>, <1,4,5,6>
+  3767805089U,	// <5,6,1,5>: Cost 4 vsldoi8 <1,5,5,6>, <1,5,5,6>
+  2311279416U,	// <5,6,1,6>: Cost 3 vmrglw <4,u,5,1>, <6,6,6,6>
+  1237536054U,	// <5,6,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+  1237536055U,	// <5,6,1,u>: Cost 2 vmrglw <4,u,5,1>, RHS
+  3773113789U,	// <5,6,2,0>: Cost 4 vsldoi8 <2,4,5,6>, <2,0,1,2>
+  3779085855U,	// <5,6,2,1>: Cost 4 vsldoi8 <3,4,5,6>, <2,1,3,1>
+  2699372136U,	// <5,6,2,2>: Cost 3 vsldoi8 <2,4,5,6>, <2,2,2,2>
+  2705344166U,	// <5,6,2,3>: Cost 3 vsldoi8 <3,4,5,6>, <2,3,0,1>
+  2699372329U,	// <5,6,2,4>: Cost 3 vsldoi8 <2,4,5,6>, <2,4,5,6>
+  2705344360U,	// <5,6,2,5>: Cost 3 vsldoi8 <3,4,5,6>, <2,5,3,6>
+  2705344442U,	// <5,6,2,6>: Cost 3 vsldoi8 <3,4,5,6>, <2,6,3,7>
+  2302659894U,	// <5,6,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+  2702026861U,	// <5,6,2,u>: Cost 3 vsldoi8 <2,u,5,6>, <2,u,5,6>
+  2705344662U,	// <5,6,3,0>: Cost 3 vsldoi8 <3,4,5,6>, <3,0,1,2>
+  3767142661U,	// <5,6,3,1>: Cost 4 vsldoi8 <1,4,5,6>, <3,1,4,5>
+  3773114689U,	// <5,6,3,2>: Cost 4 vsldoi8 <2,4,5,6>, <3,2,2,2>
+  2705344924U,	// <5,6,3,3>: Cost 3 vsldoi8 <3,4,5,6>, <3,3,3,3>
+  1631603202U,	// <5,6,3,4>: Cost 2 vsldoi8 <3,4,5,6>, <3,4,5,6>
+  3842945597U,	// <5,6,3,5>: Cost 4 vsldoi12 <2,u,6,5>, <6,3,5,7>
+  3779086962U,	// <5,6,3,6>: Cost 4 vsldoi8 <3,4,5,6>, <3,6,0,1>
+  2289397046U,	// <5,6,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+  1634257734U,	// <5,6,3,u>: Cost 2 vsldoi8 <3,u,5,6>, <3,u,5,6>
+  2644926566U,	// <5,6,4,0>: Cost 3 vsldoi4 <4,5,6,4>, LHS
+  3779087306U,	// <5,6,4,1>: Cost 4 vsldoi8 <3,4,5,6>, <4,1,2,3>
+  2790142577U,	// <5,6,4,2>: Cost 3 vsldoi12 <6,4,2,5>, <6,4,2,5>
+  2644929026U,	// <5,6,4,3>: Cost 3 vsldoi4 <4,5,6,4>, <3,4,5,6>
+  2711317723U,	// <5,6,4,4>: Cost 3 vsldoi8 <4,4,5,6>, <4,4,5,6>
+  1631604022U,	// <5,6,4,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+  2712644989U,	// <5,6,4,6>: Cost 3 vsldoi8 <4,6,5,6>, <4,6,5,6>
+  2302676278U,	// <5,6,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+  1631604265U,	// <5,6,4,u>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+  3842945708U,	// <5,6,5,0>: Cost 4 vsldoi12 <2,u,6,5>, <6,5,0,1>
+  3767144133U,	// <5,6,5,1>: Cost 4 vsldoi8 <1,4,5,6>, <5,1,6,1>
+  2705346328U,	// <5,6,5,2>: Cost 3 vsldoi8 <3,4,5,6>, <5,2,6,3>
+  3779088207U,	// <5,6,5,3>: Cost 4 vsldoi8 <3,4,5,6>, <5,3,3,4>
+  2717290420U,	// <5,6,5,4>: Cost 3 vsldoi8 <5,4,5,6>, <5,4,5,6>
+  2705346574U,	// <5,6,5,5>: Cost 3 vsldoi8 <3,4,5,6>, <5,5,6,6>
+  2705346596U,	// <5,6,5,6>: Cost 3 vsldoi8 <3,4,5,6>, <5,6,0,1>
+  1237568822U,	// <5,6,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+  1237568823U,	// <5,6,5,u>: Cost 2 vmrglw <4,u,5,5>, RHS
+  2650914918U,	// <5,6,6,0>: Cost 3 vsldoi4 <5,5,6,6>, LHS
+  3364490949U,	// <5,6,6,1>: Cost 4 vmrglw <1,4,5,6>, <5,1,6,1>
+  2248954362U,	// <5,6,6,2>: Cost 3 vmrghw <5,6,7,0>, <6,2,7,3>
+  2302693144U,	// <5,6,6,3>: Cost 3 vmrglw <3,4,5,6>, <5,2,6,3>
+  2650918198U,	// <5,6,6,4>: Cost 3 vsldoi4 <5,5,6,6>, RHS
+  2650918926U,	// <5,6,6,5>: Cost 3 vsldoi4 <5,5,6,6>, <5,5,6,6>
+  2302693390U,	// <5,6,6,6>: Cost 3 vmrglw <3,4,5,6>, <5,5,6,6>
+  1228950838U,	// <5,6,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+  1228950839U,	// <5,6,6,u>: Cost 2 vmrglw <3,4,5,6>, RHS
+  497467494U,	// <5,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+  1571210036U,	// <5,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+  1571210856U,	// <5,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+  1571211414U,	// <5,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+  497470774U,	// <5,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+  1571213316U,	// <5,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+  1571213818U,	// <5,6,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+  1571214956U,	// <5,6,7,7>: Cost 2 vsldoi4 RHS, <7,7,7,7>
+  497473326U,	// <5,6,7,u>: Cost 1 vsldoi4 RHS, LHS
+  497475686U,	// <5,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+  1631606574U,	// <5,6,u,1>: Cost 2 vsldoi8 <3,4,5,6>, LHS
+  1571219048U,	// <5,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+  1571219606U,	// <5,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+  497478967U,	// <5,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+  1631606938U,	// <5,6,u,5>: Cost 2 vsldoi8 <3,4,5,6>, RHS
+  1571222010U,	// <5,6,u,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+  1228967222U,	// <5,6,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+  497481518U,	// <5,6,u,u>: Cost 1 vsldoi4 RHS, LHS
+  3768475648U,	// <5,7,0,0>: Cost 4 vsldoi8 <1,6,5,7>, <0,0,0,0>
+  2694733926U,	// <5,7,0,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+  3718711395U,	// <5,7,0,2>: Cost 4 vsldoi4 <4,5,7,0>, <2,u,4,5>
+  3384349178U,	// <5,7,0,3>: Cost 4 vmrglw <4,7,5,0>, <6,2,7,3>
+  2694734162U,	// <5,7,0,4>: Cost 3 vsldoi8 <1,6,5,7>, <0,4,1,5>
+  3384347884U,	// <5,7,0,5>: Cost 4 vmrglw <4,7,5,0>, <4,4,7,5>
+  3730658026U,	// <5,7,0,6>: Cost 4 vsldoi4 <6,5,7,0>, <6,5,7,0>
+  3718714362U,	// <5,7,0,7>: Cost 4 vsldoi4 <4,5,7,0>, <7,0,1,2>
+  2694734493U,	// <5,7,0,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+  2311278690U,	// <5,7,1,0>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,0>
+  2305970923U,	// <5,7,1,1>: Cost 3 vmrglw <4,0,5,1>, <6,5,7,1>
+  3768476566U,	// <5,7,1,2>: Cost 4 vsldoi8 <1,6,5,7>, <1,2,3,0>
+  2311279098U,	// <5,7,1,3>: Cost 3 vmrglw <4,u,5,1>, <6,2,7,3>
+  2311278694U,	// <5,7,1,4>: Cost 3 vmrglw <4,u,5,1>, <5,6,7,4>
+  3768476783U,	// <5,7,1,5>: Cost 4 vsldoi8 <1,6,5,7>, <1,5,0,1>
+  2694735091U,	// <5,7,1,6>: Cost 3 vsldoi8 <1,6,5,7>, <1,6,5,7>
+  2311279426U,	// <5,7,1,7>: Cost 3 vmrglw <4,u,5,1>, <6,6,7,7>
+  2696062357U,	// <5,7,1,u>: Cost 3 vsldoi8 <1,u,5,7>, <1,u,5,7>
+  3383701602U,	// <5,7,2,0>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,0>
+  3768477219U,	// <5,7,2,1>: Cost 4 vsldoi8 <1,6,5,7>, <2,1,3,5>
+  3768477288U,	// <5,7,2,2>: Cost 4 vsldoi8 <1,6,5,7>, <2,2,2,2>
+  2309960186U,	// <5,7,2,3>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+  3383701606U,	// <5,7,2,4>: Cost 4 vmrglw <4,6,5,2>, <5,6,7,4>
+  3768477545U,	// <5,7,2,5>: Cost 4 vsldoi8 <1,6,5,7>, <2,5,3,7>
+  3766486970U,	// <5,7,2,6>: Cost 4 vsldoi8 <1,3,5,7>, <2,6,3,7>
+  3383702338U,	// <5,7,2,7>: Cost 4 vmrglw <4,6,5,2>, <6,6,7,7>
+  2309960186U,	// <5,7,2,u>: Cost 3 vmrglw <4,6,5,2>, <6,2,7,3>
+  3768477846U,	// <5,7,3,0>: Cost 4 vsldoi8 <1,6,5,7>, <3,0,1,2>
+  3768477975U,	// <5,7,3,1>: Cost 4 vsldoi8 <1,6,5,7>, <3,1,6,5>
+  3786393932U,	// <5,7,3,2>: Cost 4 vsldoi8 <4,6,5,7>, <3,2,3,4>
+  3768478108U,	// <5,7,3,3>: Cost 4 vsldoi8 <1,6,5,7>, <3,3,3,3>
+  2795599115U,	// <5,7,3,4>: Cost 3 vsldoi12 <7,3,4,5>, <7,3,4,5>
+  3385037470U,	// <5,7,3,5>: Cost 4 vmrglw <4,u,5,3>, <6,4,7,5>
+  3780422309U,	// <5,7,3,6>: Cost 4 vsldoi8 <3,6,5,7>, <3,6,5,7>
+  3848107301U,	// <5,7,3,7>: Cost 4 vsldoi12 <3,7,4,5>, <7,3,7,4>
+  2795894063U,	// <5,7,3,u>: Cost 3 vsldoi12 <7,3,u,5>, <7,3,u,5>
+  2795967800U,	// <5,7,4,0>: Cost 3 vsldoi12 <7,4,0,5>, <7,4,0,5>
+  3768478690U,	// <5,7,4,1>: Cost 4 vsldoi8 <1,6,5,7>, <4,1,5,0>
+  3718744163U,	// <5,7,4,2>: Cost 4 vsldoi4 <4,5,7,4>, <2,u,4,5>
+  3784404107U,	// <5,7,4,3>: Cost 4 vsldoi8 <4,3,5,7>, <4,3,5,7>
+  2796262748U,	// <5,7,4,4>: Cost 3 vsldoi12 <7,4,4,5>, <7,4,4,5>
+  2694737206U,	// <5,7,4,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+  2712653182U,	// <5,7,4,6>: Cost 3 vsldoi8 <4,6,5,7>, <4,6,5,7>
+  2713316815U,	// <5,7,4,7>: Cost 3 vsldoi8 <4,7,5,7>, <4,7,5,7>
+  2694737449U,	// <5,7,4,u>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+  2311311458U,	// <5,7,5,0>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,0>
+  3768479433U,	// <5,7,5,1>: Cost 4 vsldoi8 <1,6,5,7>, <5,1,6,5>
+  3768479521U,	// <5,7,5,2>: Cost 4 vsldoi8 <1,6,5,7>, <5,2,7,3>
+  2311311866U,	// <5,7,5,3>: Cost 3 vmrglw <4,u,5,5>, <6,2,7,3>
+  2311311462U,	// <5,7,5,4>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,4>
+  2248185270U,	// <5,7,5,5>: Cost 3 vmrghw <5,5,5,5>, <7,5,5,5>
+  2718625879U,	// <5,7,5,6>: Cost 3 vsldoi8 <5,6,5,7>, <5,6,5,7>
+  2311312194U,	// <5,7,5,7>: Cost 3 vmrglw <4,u,5,5>, <6,6,7,7>
+  2311311466U,	// <5,7,5,u>: Cost 3 vmrglw <4,u,5,5>, <5,6,7,u>
+  2248954874U,	// <5,7,6,0>: Cost 3 vmrghw <5,6,7,0>, <7,0,1,2>
+  3322696778U,	// <5,7,6,1>: Cost 4 vmrghw <5,6,7,0>, <7,1,1,1>
+  2248955028U,	// <5,7,6,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+  2656963074U,	// <5,7,6,3>: Cost 3 vsldoi4 <6,5,7,6>, <3,4,5,6>
+  2248955238U,	// <5,7,6,4>: Cost 3 vmrghw <5,6,7,0>, <7,4,5,6>
+  2248955329U,	// <5,7,6,5>: Cost 3 vmrghw <5,6,7,0>, <7,5,6,7>
+  2656965360U,	// <5,7,6,6>: Cost 3 vsldoi4 <6,5,7,6>, <6,5,7,6>
+  2248955500U,	// <5,7,6,7>: Cost 3 vmrghw <5,6,7,0>, <7,7,7,7>
+  2248955522U,	// <5,7,6,u>: Cost 3 vmrghw <5,6,7,0>, <7,u,1,2>
+  3718766694U,	// <5,7,7,0>: Cost 4 vsldoi4 <4,5,7,7>, LHS
+  3724739827U,	// <5,7,7,1>: Cost 4 vsldoi4 <5,5,7,7>, <1,6,5,7>
+  3718768739U,	// <5,7,7,2>: Cost 4 vsldoi4 <4,5,7,7>, <2,u,4,5>
+  3365826337U,	// <5,7,7,3>: Cost 4 vmrglw <1,6,5,7>, <5,2,7,3>
+  2798253647U,	// <5,7,7,4>: Cost 3 vsldoi12 <7,7,4,5>, <7,7,4,5>
+  3365826258U,	// <5,7,7,5>: Cost 4 vmrglw <1,6,5,7>, <5,1,7,5>
+  3730715377U,	// <5,7,7,6>: Cost 4 vsldoi4 <6,5,7,7>, <6,5,7,7>
+  2310665836U,	// <5,7,7,7>: Cost 3 vmrglw <4,7,5,7>, <7,7,7,7>
+  2798548595U,	// <5,7,7,u>: Cost 3 vsldoi12 <7,7,u,5>, <7,7,u,5>
+  2311336034U,	// <5,7,u,0>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,0>
+  2694739758U,	// <5,7,u,1>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+  2248955028U,	// <5,7,u,2>: Cost 3 vmrghw <5,6,7,0>, <7,2,0,3>
+  2311336442U,	// <5,7,u,3>: Cost 3 vmrglw <4,u,5,u>, <6,2,7,3>
+  2311336038U,	// <5,7,u,4>: Cost 3 vmrglw <4,u,5,u>, <5,6,7,4>
+  2694740122U,	// <5,7,u,5>: Cost 3 vsldoi8 <1,6,5,7>, RHS
+  2656981746U,	// <5,7,u,6>: Cost 3 vsldoi4 <6,5,7,u>, <6,5,7,u>
+  2311336770U,	// <5,7,u,7>: Cost 3 vmrglw <4,u,5,u>, <6,6,7,7>
+  2694740325U,	// <5,7,u,u>: Cost 3 vsldoi8 <1,6,5,7>, LHS
+  2705358848U,	// <5,u,0,0>: Cost 3 vsldoi8 <3,4,5,u>, <0,0,0,0>
+  1631617126U,	// <5,u,0,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+  2310607866U,	// <5,u,0,2>: Cost 3 vmrglw <4,7,5,0>, <7,0,1,2>
+  2302640284U,	// <5,u,0,3>: Cost 3 vmrglw <3,4,5,0>, LHS
+  2754238189U,	// <5,u,0,4>: Cost 3 vsldoi12 <0,4,1,5>, <u,0,4,1>
+  2305296114U,	// <5,u,0,5>: Cost 3 vmrglw <3,u,5,0>, <2,3,u,5>
+  2244907106U,	// <5,u,0,6>: Cost 3 vmrghw <5,0,6,1>, <5,6,7,0>
+  2302643528U,	// <5,u,0,7>: Cost 3 vmrglw <3,4,5,0>, RHS
+  1631617693U,	// <5,u,0,u>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+  2627133542U,	// <5,u,1,0>: Cost 3 vsldoi4 <1,5,u,1>, LHS
+  1237536282U,	// <5,u,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+  1680496430U,	// <5,u,1,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+  1237532828U,	// <5,u,1,3>: Cost 2 vmrglw <4,u,5,1>, LHS
+  2693416018U,	// <5,u,1,4>: Cost 3 vsldoi8 <1,4,5,u>, <1,4,5,u>
+  2756892486U,	// <5,u,1,5>: Cost 3 vsldoi12 <0,u,1,5>, <u,1,5,0>
+  2694743284U,	// <5,u,1,6>: Cost 3 vsldoi8 <1,6,5,u>, <1,6,5,u>
+  1237536072U,	// <5,u,1,7>: Cost 2 vmrglw <4,u,5,1>, RHS
+  1680496484U,	// <5,u,1,u>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+  2311288709U,	// <5,u,2,0>: Cost 3 vmrglw <4,u,5,2>, <u,2,3,0>
+  2245883694U,	// <5,u,2,1>: Cost 3 vmrghw <5,2,1,3>, LHS
+  2699388520U,	// <5,u,2,2>: Cost 3 vsldoi8 <2,4,5,u>, <2,2,2,2>
+  2754238344U,	// <5,u,2,3>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,3,3>
+  2699388715U,	// <5,u,2,4>: Cost 3 vsldoi8 <2,4,5,u>, <2,4,5,u>
+  2757408666U,	// <5,u,2,5>: Cost 3 vsldoi12 <0,u,u,5>, <u,2,5,3>
+  2705360826U,	// <5,u,2,6>: Cost 3 vsldoi8 <3,4,5,u>, <2,6,3,7>
+  2302659912U,	// <5,u,2,7>: Cost 3 vmrglw <3,4,5,2>, RHS
+  2754238389U,	// <5,u,2,u>: Cost 3 vsldoi12 <0,4,1,5>, <u,2,u,3>
+  2754238396U,	// <5,u,3,0>: Cost 3 vsldoi12 <0,4,1,5>, <u,3,0,1>
+  3827980229U,	// <5,u,3,1>: Cost 4 vsldoi12 <0,4,1,5>, <u,3,1,1>
+  2644625102U,	// <5,u,3,2>: Cost 3 vsldoi4 <4,5,2,3>, <2,3,4,5>
+  2289393820U,	// <5,u,3,3>: Cost 3 vmrglw <1,2,5,3>, LHS
+  1631619588U,	// <5,u,3,4>: Cost 2 vsldoi8 <3,4,5,u>, <3,4,5,u>
+  2785056749U,	// <5,u,3,5>: Cost 3 vsldoi12 <5,5,5,5>, <u,3,5,5>
+  3363138077U,	// <5,u,3,6>: Cost 4 vmrglw <1,2,5,3>, <3,4,u,6>
+  2289397064U,	// <5,u,3,7>: Cost 3 vmrglw <1,2,5,3>, RHS
+  1634274120U,	// <5,u,3,u>: Cost 2 vsldoi8 <3,u,5,u>, <3,u,5,u>
+  1634937753U,	// <5,u,4,0>: Cost 2 vsldoi8 <4,0,5,u>, <4,0,5,u>
+  1728272410U,	// <5,u,4,1>: Cost 2 vsldoi12 <u,4,1,5>, <u,4,1,5>
+  2710006843U,	// <5,u,4,2>: Cost 3 vsldoi8 <4,2,5,u>, <4,2,5,u>
+  2765740076U,	// <5,u,4,3>: Cost 3 vsldoi12 <2,3,4,5>, <u,4,3,5>
+  1637592285U,	// <5,u,4,4>: Cost 2 vsldoi8 <4,4,5,u>, <4,4,5,u>
+  1631620406U,	// <5,u,4,5>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+  2712661375U,	// <5,u,4,6>: Cost 3 vsldoi8 <4,6,5,u>, <4,6,5,u>
+  2302676296U,	// <5,u,4,7>: Cost 3 vmrglw <3,4,5,4>, RHS
+  1631620649U,	// <5,u,4,u>: Cost 2 vsldoi8 <3,4,5,u>, RHS
+  1577091174U,	// <5,u,5,0>: Cost 2 vsldoi4 <5,5,5,5>, LHS
+  1174443822U,	// <5,u,5,1>: Cost 2 vmrghw <5,5,5,5>, LHS
+  2766035058U,	// <5,u,5,2>: Cost 3 vsldoi12 <2,3,u,5>, <u,5,2,3>
+  1237565596U,	// <5,u,5,3>: Cost 2 vmrglw <4,u,5,5>, LHS
+  1577094454U,	// <5,u,5,4>: Cost 2 vsldoi4 <5,5,5,5>, RHS
+  296144182U,	// <5,u,5,5>: Cost 1 vspltisw1 RHS
+  1680496794U,	// <5,u,5,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+  1237568840U,	// <5,u,5,7>: Cost 2 vmrglw <4,u,5,5>, RHS
+  296144182U,	// <5,u,5,u>: Cost 1 vspltisw1 RHS
+  2633146470U,	// <5,u,6,0>: Cost 3 vsldoi4 <2,5,u,6>, LHS
+  1175213870U,	// <5,u,6,1>: Cost 2 vmrghw <5,6,7,0>, LHS
+  2633148309U,	// <5,u,6,2>: Cost 3 vsldoi4 <2,5,u,6>, <2,5,u,6>
+  1228947612U,	// <5,u,6,3>: Cost 2 vmrglw <3,4,5,6>, LHS
+  2633149750U,	// <5,u,6,4>: Cost 3 vsldoi4 <2,5,u,6>, RHS
+  1175214234U,	// <5,u,6,5>: Cost 2 vmrghw <5,6,7,0>, RHS
+  1228950018U,	// <5,u,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+  1228950856U,	// <5,u,6,7>: Cost 2 vmrglw <3,4,5,6>, RHS
+  1228947617U,	// <5,u,6,u>: Cost 2 vmrglw <3,4,5,6>, LHS
+  497614950U,	// <5,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+  1571357492U,	// <5,u,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+  1571358312U,	// <5,u,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+  1571358870U,	// <5,u,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+  497618248U,	// <5,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+  1571360772U,	// <5,u,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+  1571361274U,	// <5,u,7,6>: Cost 2 vsldoi4 RHS, <6,2,7,3>
+  1571361786U,	// <5,u,7,7>: Cost 2 vsldoi4 RHS, <7,0,1,2>
+  497620782U,	// <5,u,7,u>: Cost 1 vsldoi4 RHS, LHS
+  497623142U,	// <5,u,u,0>: Cost 1 vsldoi4 RHS, LHS
+  1631622958U,	// <5,u,u,1>: Cost 2 vsldoi8 <3,4,5,u>, LHS
+  1680496997U,	// <5,u,u,2>: Cost 2 vsldoi12 <0,4,1,5>, LHS
+  1228963996U,	// <5,u,u,3>: Cost 2 vmrglw <3,4,5,u>, LHS
+  497626441U,	// <5,u,u,4>: Cost 1 vsldoi4 RHS, RHS
+  296144182U,	// <5,u,u,5>: Cost 1 vspltisw1 RHS
+  1680497037U,	// <5,u,u,6>: Cost 2 vsldoi12 <0,4,1,5>, RHS
+  1228967240U,	// <5,u,u,7>: Cost 2 vmrglw <3,4,5,u>, RHS
+  497628974U,	// <5,u,u,u>: Cost 1 vsldoi4 RHS, LHS
+  2772451328U,	// <6,0,0,0>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,0,0>
+  2772451338U,	// <6,0,0,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,1,1>
+  3771146417U,	// <6,0,0,2>: Cost 4 vsldoi8 <2,1,6,0>, <0,2,1,6>
+  3383095739U,	// <6,0,0,3>: Cost 4 vmrglw <4,5,6,0>, <6,2,0,3>
+  3846193189U,	// <6,0,0,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,0,4,1>
+  3724832803U,	// <6,0,0,5>: Cost 4 vsldoi4 <5,6,0,0>, <5,6,0,0>
+  3383095985U,	// <6,0,0,6>: Cost 4 vmrglw <4,5,6,0>, <6,5,0,6>
+  3383096067U,	// <6,0,0,7>: Cost 4 vmrglw <4,5,6,0>, <6,6,0,7>
+  2772451401U,	// <6,0,0,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,0,u,1>
+  2651095142U,	// <6,0,1,0>: Cost 3 vsldoi4 <5,6,0,1>, LHS
+  2251612262U,	// <6,0,1,1>: Cost 3 vmrghw <6,1,7,1>, LHS
+  1698709606U,	// <6,0,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+  2651097602U,	// <6,0,1,3>: Cost 3 vsldoi4 <5,6,0,1>, <3,4,5,6>
+  2651098422U,	// <6,0,1,4>: Cost 3 vsldoi4 <5,6,0,1>, RHS
+  2651099172U,	// <6,0,1,5>: Cost 3 vsldoi4 <5,6,0,1>, <5,6,0,1>
+  2657071869U,	// <6,0,1,6>: Cost 3 vsldoi4 <6,6,0,1>, <6,6,0,1>
+  3724841978U,	// <6,0,1,7>: Cost 4 vsldoi4 <5,6,0,1>, <7,0,1,2>
+  1698709660U,	// <6,0,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+  2252292096U,	// <6,0,2,0>: Cost 3 vmrghw <6,2,7,3>, <0,0,0,0>
+  1178550374U,	// <6,0,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+  3826655418U,	// <6,0,2,2>: Cost 4 vsldoi12 <0,2,1,6>, <0,2,2,6>
+  3777783485U,	// <6,0,2,3>: Cost 4 vsldoi8 <3,2,6,0>, <2,3,2,6>
+  2252292434U,	// <6,0,2,4>: Cost 3 vmrghw <6,2,7,3>, <0,4,1,5>
+  3785746280U,	// <6,0,2,5>: Cost 4 vsldoi8 <4,5,6,0>, <2,5,3,6>
+  2252292593U,	// <6,0,2,6>: Cost 3 vmrghw <6,2,7,3>, <0,6,1,2>
+  3736794583U,	// <6,0,2,7>: Cost 4 vsldoi4 <7,6,0,2>, <7,6,0,2>
+  1178550941U,	// <6,0,2,u>: Cost 2 vmrghw <6,2,7,3>, LHS
+  3375153152U,	// <6,0,3,0>: Cost 4 vmrglw <3,2,6,3>, <0,0,0,0>
+  2772451584U,	// <6,0,3,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,1,4>
+  3777784163U,	// <6,0,3,2>: Cost 4 vsldoi8 <3,2,6,0>, <3,2,6,0>
+  3846193426U,	// <6,0,3,3>: Cost 4 vsldoi12 <3,4,5,6>, <0,3,3,4>
+  2712005122U,	// <6,0,3,4>: Cost 3 vsldoi8 <4,5,6,0>, <3,4,5,6>
+  3724857382U,	// <6,0,3,5>: Cost 4 vsldoi4 <5,6,0,3>, <5,6,0,3>
+  3802335864U,	// <6,0,3,6>: Cost 4 vsldoi8 <7,3,6,0>, <3,6,0,7>
+  3801672410U,	// <6,0,3,7>: Cost 4 vsldoi8 <7,2,6,0>, <3,7,2,6>
+  2772451647U,	// <6,0,3,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,3,u,4>
+  3383123968U,	// <6,0,4,0>: Cost 4 vmrglw <4,5,6,4>, <0,0,0,0>
+  2772451666U,	// <6,0,4,1>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,1,5>
+  3773803577U,	// <6,0,4,2>: Cost 4 vsldoi8 <2,5,6,0>, <4,2,5,6>
+  3724864002U,	// <6,0,4,3>: Cost 4 vsldoi4 <5,6,0,4>, <3,4,5,6>
+  3846193517U,	// <6,0,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <0,4,4,5>
+  2712005935U,	// <6,0,4,5>: Cost 3 vsldoi8 <4,5,6,0>, <4,5,6,0>
+  3327009265U,	// <6,0,4,6>: Cost 4 vmrghw <6,4,2,5>, <0,6,1,2>
+  3383126648U,	// <6,0,4,7>: Cost 5 vmrglw <4,5,6,4>, <3,6,0,7>
+  2772451729U,	// <6,0,4,u>: Cost 3 vsldoi12 <3,4,5,6>, <0,4,u,5>
+  3373178880U,	// <6,0,5,0>: Cost 4 vmrglw <2,u,6,5>, <0,0,0,0>
+  2254266470U,	// <6,0,5,1>: Cost 3 vmrghw <6,5,7,1>, LHS
+  3785748248U,	// <6,0,5,2>: Cost 4 vsldoi8 <4,5,6,0>, <5,2,6,3>
+  3790393190U,	// <6,0,5,3>: Cost 4 vsldoi8 <5,3,6,0>, <5,3,6,0>
+  3328000338U,	// <6,0,5,4>: Cost 4 vmrghw <6,5,7,0>, <0,4,1,5>
+  3785748494U,	// <6,0,5,5>: Cost 4 vsldoi8 <4,5,6,0>, <5,5,6,6>
+  3785748516U,	// <6,0,5,6>: Cost 4 vsldoi8 <4,5,6,0>, <5,6,0,1>
+  3379153528U,	// <6,0,5,7>: Cost 4 vmrglw <3,u,6,5>, <3,6,0,7>
+  2254267037U,	// <6,0,5,u>: Cost 3 vmrghw <6,5,7,1>, LHS
+  2254897152U,	// <6,0,6,0>: Cost 3 vmrghw <6,6,6,6>, <0,0,0,0>
+  1181155430U,	// <6,0,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+  3785748923U,	// <6,0,6,2>: Cost 4 vsldoi8 <4,5,6,0>, <6,2,0,3>
+  3785749042U,	// <6,0,6,3>: Cost 4 vsldoi8 <4,5,6,0>, <6,3,4,5>
+  2254897490U,	// <6,0,6,4>: Cost 3 vmrghw <6,6,6,6>, <0,4,1,5>
+  3785749169U,	// <6,0,6,5>: Cost 4 vsldoi8 <4,5,6,0>, <6,5,0,6>
+  2724614962U,	// <6,0,6,6>: Cost 3 vsldoi8 <6,6,6,0>, <6,6,6,0>
+  3787739982U,	// <6,0,6,7>: Cost 4 vsldoi8 <4,u,6,0>, <6,7,0,1>
+  1181155997U,	// <6,0,6,u>: Cost 2 vmrghw <6,6,6,6>, LHS
+  1235664896U,	// <6,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+  1235666598U,	// <6,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+  3712943720U,	// <6,0,7,2>: Cost 4 vsldoi4 <3,6,0,7>, <2,2,2,2>
+  2639202936U,	// <6,0,7,3>: Cost 3 vsldoi4 <3,6,0,7>, <3,6,0,7>
+  2639203638U,	// <6,0,7,4>: Cost 3 vsldoi4 <3,6,0,7>, RHS
+  2309409236U,	// <6,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+  3712946517U,	// <6,0,7,6>: Cost 4 vsldoi4 <3,6,0,7>, <6,0,7,0>
+  2309409400U,	// <6,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+  1235666605U,	// <6,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+  1235673088U,	// <6,0,u,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+  1235674790U,	// <6,0,u,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+  1698710173U,	// <6,0,u,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+  2639211129U,	// <6,0,u,3>: Cost 3 vsldoi4 <3,6,0,u>, <3,6,0,u>
+  2639211830U,	// <6,0,u,4>: Cost 3 vsldoi4 <3,6,0,u>, RHS
+  2712008858U,	// <6,0,u,5>: Cost 3 vsldoi8 <4,5,6,0>, RHS
+  2657129220U,	// <6,0,u,6>: Cost 3 vsldoi4 <6,6,0,u>, <6,6,0,u>
+  2309417592U,	// <6,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+  1698710227U,	// <6,0,u,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+  3775799296U,	// <6,1,0,0>: Cost 4 vsldoi8 <2,u,6,1>, <0,0,0,0>
+  2702057574U,	// <6,1,0,1>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+  3373143763U,	// <6,1,0,2>: Cost 4 vmrglw <2,u,6,0>, <u,0,1,2>
+  3695045122U,	// <6,1,0,3>: Cost 4 vsldoi4 <0,6,1,0>, <3,4,5,6>
+  3775799634U,	// <6,1,0,4>: Cost 4 vsldoi8 <2,u,6,1>, <0,4,1,5>
+  3383091538U,	// <6,1,0,5>: Cost 4 vmrglw <4,5,6,0>, <0,4,1,5>
+  3368493233U,	// <6,1,0,6>: Cost 4 vmrglw <2,1,6,0>, <0,2,1,6>
+  3362522319U,	// <6,1,0,7>: Cost 5 vmrglw <1,1,6,0>, <1,6,1,7>
+  2702058141U,	// <6,1,0,u>: Cost 3 vsldoi8 <2,u,6,1>, LHS
+  3834250027U,	// <6,1,1,0>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,0,1>
+  2772452148U,	// <6,1,1,1>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+  3832038210U,	// <6,1,1,2>: Cost 4 vsldoi12 <1,1,2,6>, <1,1,2,6>
+  3373150660U,	// <6,1,1,3>: Cost 4 vmrglw <2,u,6,1>, <6,2,1,3>
+  3834250067U,	// <6,1,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <1,1,4,5>
+  3373146450U,	// <6,1,1,5>: Cost 4 vmrglw <2,u,6,1>, <0,4,1,5>
+  3826656102U,	// <6,1,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,1,6,6>
+  3362530511U,	// <6,1,1,7>: Cost 4 vmrglw <1,1,6,1>, <1,6,1,7>
+  2772452148U,	// <6,1,1,u>: Cost 3 vsldoi12 <3,4,5,6>, <1,1,1,1>
+  2669092966U,	// <6,1,2,0>: Cost 3 vsldoi4 <u,6,1,2>, LHS
+  2252292916U,	// <6,1,2,1>: Cost 3 vmrghw <6,2,7,3>, <1,1,1,1>
+  2252293014U,	// <6,1,2,2>: Cost 3 vmrghw <6,2,7,3>, <1,2,3,0>
+  2772452246U,	// <6,1,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,2,3,0>
+  2669096246U,	// <6,1,2,4>: Cost 3 vsldoi4 <u,6,1,2>, RHS
+  3846194091U,	// <6,1,2,5>: Cost 4 vsldoi12 <3,4,5,6>, <1,2,5,3>
+  2702059450U,	// <6,1,2,6>: Cost 3 vsldoi8 <2,u,6,1>, <2,6,3,7>
+  3870081978U,	// <6,1,2,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,2,7,0>
+  2702059633U,	// <6,1,2,u>: Cost 3 vsldoi8 <2,u,6,1>, <2,u,6,1>
+  3775801494U,	// <6,1,3,0>: Cost 4 vsldoi8 <2,u,6,1>, <3,0,1,2>
+  3777128723U,	// <6,1,3,1>: Cost 4 vsldoi8 <3,1,6,1>, <3,1,6,1>
+  3775801702U,	// <6,1,3,2>: Cost 4 vsldoi8 <2,u,6,1>, <3,2,6,3>
+  3775801756U,	// <6,1,3,3>: Cost 4 vsldoi8 <2,u,6,1>, <3,3,3,3>
+  3775801858U,	// <6,1,3,4>: Cost 4 vsldoi8 <2,u,6,1>, <3,4,5,6>
+  3375153490U,	// <6,1,3,5>: Cost 4 vmrglw <3,2,6,3>, <0,4,1,5>
+  3826656265U,	// <6,1,3,6>: Cost 4 vsldoi12 <0,2,1,6>, <1,3,6,7>
+  3775802051U,	// <6,1,3,7>: Cost 4 vsldoi8 <2,u,6,1>, <3,7,0,1>
+  3775802142U,	// <6,1,3,u>: Cost 4 vsldoi8 <2,u,6,1>, <3,u,1,2>
+  3846194206U,	// <6,1,4,0>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,0,1>
+  3846194219U,	// <6,1,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,1,5>
+  3846194228U,	// <6,1,4,2>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,2,5>
+  3846194236U,	// <6,1,4,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,3,4>
+  3846194246U,	// <6,1,4,4>: Cost 4 vsldoi12 <3,4,5,6>, <1,4,4,5>
+  2760508496U,	// <6,1,4,5>: Cost 3 vsldoi12 <1,4,5,6>, <1,4,5,6>
+  3368526001U,	// <6,1,4,6>: Cost 4 vmrglw <2,1,6,4>, <0,2,1,6>
+  3870082144U,	// <6,1,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <1,4,7,4>
+  2760729707U,	// <6,1,4,u>: Cost 3 vsldoi12 <1,4,u,6>, <1,4,u,6>
+  2714668660U,	// <6,1,5,0>: Cost 3 vsldoi8 <5,0,6,1>, <5,0,6,1>
+  3834619005U,	// <6,1,5,1>: Cost 4 vsldoi12 <1,5,1,6>, <1,5,1,6>
+  3834692742U,	// <6,1,5,2>: Cost 4 vsldoi12 <1,5,2,6>, <1,5,2,6>
+  3846194317U,	// <6,1,5,3>: Cost 4 vsldoi12 <3,4,5,6>, <1,5,3,4>
+  3834840216U,	// <6,1,5,4>: Cost 4 vsldoi12 <1,5,4,6>, <1,5,4,6>
+  3834913953U,	// <6,1,5,5>: Cost 4 vsldoi12 <1,5,5,6>, <1,5,5,6>
+  2719977570U,	// <6,1,5,6>: Cost 3 vsldoi8 <5,u,6,1>, <5,6,7,0>
+  3367208143U,	// <6,1,5,7>: Cost 4 vmrglw <1,u,6,5>, <1,6,1,7>
+  2719977724U,	// <6,1,5,u>: Cost 3 vsldoi8 <5,u,6,1>, <5,u,6,1>
+  2669125734U,	// <6,1,6,0>: Cost 3 vsldoi4 <u,6,1,6>, LHS
+  2254897972U,	// <6,1,6,1>: Cost 3 vmrghw <6,6,6,6>, <1,1,1,1>
+  2254898070U,	// <6,1,6,2>: Cost 3 vmrghw <6,6,6,6>, <1,2,3,0>
+  3775803929U,	// <6,1,6,3>: Cost 4 vsldoi8 <2,u,6,1>, <6,3,1,7>
+  2669129014U,	// <6,1,6,4>: Cost 3 vsldoi4 <u,6,1,6>, RHS
+  2322006354U,	// <6,1,6,5>: Cost 3 vmrglw <6,6,6,6>, <0,4,1,5>
+  2725950264U,	// <6,1,6,6>: Cost 3 vsldoi8 <6,u,6,1>, <6,6,6,6>
+  3793720142U,	// <6,1,6,7>: Cost 4 vsldoi8 <5,u,6,1>, <6,7,0,1>
+  2254898556U,	// <6,1,6,u>: Cost 3 vmrghw <6,6,6,6>, <1,u,3,0>
+  2627330150U,	// <6,1,7,0>: Cost 3 vsldoi4 <1,6,1,7>, LHS
+  1235664906U,	// <6,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+  1235667094U,	// <6,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+  2309406894U,	// <6,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+  2627333430U,	// <6,1,7,4>: Cost 3 vsldoi4 <1,6,1,7>, RHS
+  1235665234U,	// <6,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+  2309406897U,	// <6,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+  2309407222U,	// <6,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+  1235664913U,	// <6,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+  2627338342U,	// <6,1,u,0>: Cost 3 vsldoi4 <1,6,1,u>, LHS
+  1235673098U,	// <6,1,u,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+  1235675286U,	// <6,1,u,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+  2772452732U,	// <6,1,u,3>: Cost 3 vsldoi12 <3,4,5,6>, <1,u,3,0>
+  2627341622U,	// <6,1,u,4>: Cost 3 vsldoi4 <1,6,1,u>, RHS
+  1235673426U,	// <6,1,u,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+  2309415089U,	// <6,1,u,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+  2309415414U,	// <6,1,u,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+  1235673105U,	// <6,1,u,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+  3324683725U,	// <6,2,0,0>: Cost 4 vmrghw <6,0,7,0>, <2,0,3,0>
+  2725290086U,	// <6,2,0,1>: Cost 3 vsldoi8 <6,7,6,2>, LHS
+  3771162801U,	// <6,2,0,2>: Cost 4 vsldoi8 <2,1,6,2>, <0,2,1,6>
+  2309349478U,	// <6,2,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+  3730951478U,	// <6,2,0,4>: Cost 4 vsldoi4 <6,6,2,0>, RHS
+  3840738784U,	// <6,2,0,5>: Cost 4 vsldoi12 <2,5,3,6>, <2,0,5,1>
+  3842655721U,	// <6,2,0,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,0,6,1>
+  3736925671U,	// <6,2,0,7>: Cost 4 vsldoi4 <7,6,2,0>, <7,6,2,0>
+  2309349483U,	// <6,2,0,u>: Cost 3 vmrglw <4,5,6,0>, LHS
+  3367840468U,	// <6,2,1,0>: Cost 4 vmrglw <2,0,6,1>, <3,7,2,0>
+  3325355551U,	// <6,2,1,1>: Cost 4 vmrghw <6,1,7,1>, <2,1,3,1>
+  3373147752U,	// <6,2,1,2>: Cost 4 vmrglw <2,u,6,1>, <2,2,2,2>
+  2299404390U,	// <6,2,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+  3701099830U,	// <6,2,1,4>: Cost 5 vsldoi4 <1,6,2,1>, RHS
+  3767846054U,	// <6,2,1,5>: Cost 4 vsldoi8 <1,5,6,2>, <1,5,6,2>
+  3826656825U,	// <6,2,1,6>: Cost 4 vsldoi12 <0,2,1,6>, <2,1,6,0>
+  3373147838U,	// <6,2,1,7>: Cost 5 vmrglw <2,u,6,1>, <2,3,2,7>
+  2299404395U,	// <6,2,1,u>: Cost 3 vmrglw <2,u,6,1>, LHS
+  2657222758U,	// <6,2,2,0>: Cost 3 vsldoi4 <6,6,2,2>, LHS
+  3771164219U,	// <6,2,2,1>: Cost 4 vsldoi8 <2,1,6,2>, <2,1,6,2>
+  2766481000U,	// <6,2,2,2>: Cost 3 vsldoi12 <2,4,5,6>, <2,2,2,2>
+  2772452978U,	// <6,2,2,3>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,3,3>
+  2657226038U,	// <6,2,2,4>: Cost 3 vsldoi4 <6,6,2,2>, RHS
+  3790407528U,	// <6,2,2,5>: Cost 4 vsldoi8 <5,3,6,2>, <2,5,3,6>
+  2252294074U,	// <6,2,2,6>: Cost 3 vmrghw <6,2,7,3>, <2,6,3,7>
+  2252294148U,	// <6,2,2,7>: Cost 3 vmrghw <6,2,7,3>, <2,7,3,0>
+  2772453023U,	// <6,2,2,u>: Cost 3 vsldoi12 <3,4,5,6>, <2,2,u,3>
+  2772453030U,	// <6,2,3,0>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,0,1>
+  3834250930U,	// <6,2,3,1>: Cost 4 vsldoi12 <1,4,5,6>, <2,3,1,4>
+  2765596349U,	// <6,2,3,2>: Cost 3 vsldoi12 <2,3,2,6>, <2,3,2,6>
+  2301411430U,	// <6,2,3,3>: Cost 3 vmrglw <3,2,6,3>, LHS
+  2772453070U,	// <6,2,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <2,3,4,5>
+  2765817560U,	// <6,2,3,5>: Cost 3 vsldoi12 <2,3,5,6>, <2,3,5,6>
+  2252933050U,	// <6,2,3,6>: Cost 3 vmrghw <6,3,7,0>, <2,6,3,7>
+  2796340968U,	// <6,2,3,7>: Cost 3 vsldoi12 <7,4,5,6>, <2,3,7,4>
+  2766038771U,	// <6,2,3,u>: Cost 3 vsldoi12 <2,3,u,6>, <2,3,u,6>
+  3725008998U,	// <6,2,4,0>: Cost 4 vsldoi4 <5,6,2,4>, LHS
+  3368530217U,	// <6,2,4,1>: Cost 5 vmrglw <2,1,6,4>, <6,0,2,1>
+  3840222989U,	// <6,2,4,2>: Cost 4 vsldoi12 <2,4,5,6>, <2,4,2,5>
+  2309382246U,	// <6,2,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+  3725012278U,	// <6,2,4,4>: Cost 4 vsldoi4 <5,6,2,4>, RHS
+  2766481193U,	// <6,2,4,5>: Cost 3 vsldoi12 <2,4,5,6>, <2,4,5,6>
+  3842656049U,	// <6,2,4,6>: Cost 4 vsldoi12 <2,u,2,6>, <2,4,6,5>
+  3327010820U,	// <6,2,4,7>: Cost 4 vmrghw <6,4,2,5>, <2,7,3,0>
+  2766702404U,	// <6,2,4,u>: Cost 3 vsldoi12 <2,4,u,6>, <2,4,u,6>
+  3713073254U,	// <6,2,5,0>: Cost 4 vsldoi4 <3,6,2,5>, LHS
+  3789082310U,	// <6,2,5,1>: Cost 4 vsldoi8 <5,1,6,2>, <5,1,6,2>
+  3840665439U,	// <6,2,5,2>: Cost 4 vsldoi12 <2,5,2,6>, <2,5,2,6>
+  2766997352U,	// <6,2,5,3>: Cost 3 vsldoi12 <2,5,3,6>, <2,5,3,6>
+  3713076534U,	// <6,2,5,4>: Cost 4 vsldoi4 <3,6,2,5>, RHS
+  3791736842U,	// <6,2,5,5>: Cost 4 vsldoi8 <5,5,6,2>, <5,5,6,2>
+  3373180605U,	// <6,2,5,6>: Cost 4 vmrglw <2,u,6,5>, <2,3,2,6>
+  3793064108U,	// <6,2,5,7>: Cost 4 vsldoi8 <5,7,6,2>, <5,7,6,2>
+  2767366037U,	// <6,2,5,u>: Cost 3 vsldoi12 <2,5,u,6>, <2,5,u,6>
+  3701137510U,	// <6,2,6,0>: Cost 4 vsldoi4 <1,6,2,6>, LHS
+  3701138647U,	// <6,2,6,1>: Cost 4 vsldoi4 <1,6,2,6>, <1,6,2,6>
+  2254898792U,	// <6,2,6,2>: Cost 3 vmrghw <6,6,6,6>, <2,2,2,2>
+  1248264294U,	// <6,2,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+  3701140790U,	// <6,2,6,4>: Cost 4 vsldoi4 <1,6,2,6>, RHS
+  3725029435U,	// <6,2,6,5>: Cost 4 vsldoi4 <5,6,2,6>, <5,6,2,6>
+  2254899130U,	// <6,2,6,6>: Cost 3 vmrghw <6,6,6,6>, <2,6,3,7>
+  2725294981U,	// <6,2,6,7>: Cost 3 vsldoi8 <6,7,6,2>, <6,7,6,2>
+  1248264299U,	// <6,2,6,u>: Cost 2 vmrglw <6,6,6,6>, LHS
+  2633375846U,	// <6,2,7,0>: Cost 3 vsldoi4 <2,6,2,7>, LHS
+  2309407468U,	// <6,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+  1235666536U,	// <6,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+  161923174U,	// <6,2,7,3>: Cost 1 vmrglw RHS, LHS
+  2633379126U,	// <6,2,7,4>: Cost 3 vsldoi4 <2,6,2,7>, RHS
+  2309407796U,	// <6,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+  2309408445U,	// <6,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+  2309407960U,	// <6,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+  161923179U,	// <6,2,7,u>: Cost 1 vmrglw RHS, LHS
+  2633384038U,	// <6,2,u,0>: Cost 3 vsldoi4 <2,6,2,u>, LHS
+  2309415660U,	// <6,2,u,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+  1235674728U,	// <6,2,u,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+  161931366U,	// <6,2,u,3>: Cost 1 vmrglw RHS, LHS
+  2633387318U,	// <6,2,u,4>: Cost 3 vsldoi4 <2,6,2,u>, RHS
+  2769135725U,	// <6,2,u,5>: Cost 3 vsldoi12 <2,u,5,6>, <2,u,5,6>
+  2309416637U,	// <6,2,u,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+  2309416152U,	// <6,2,u,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+  161931371U,	// <6,2,u,u>: Cost 1 vmrglw RHS, LHS
+  3777806336U,	// <6,3,0,0>: Cost 4 vsldoi8 <3,2,6,3>, <0,0,0,0>
+  2704064614U,	// <6,3,0,1>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+  3765862577U,	// <6,3,0,2>: Cost 4 vsldoi8 <1,2,6,3>, <0,2,1,6>
+  3843393708U,	// <6,3,0,3>: Cost 4 vsldoi12 <3,0,3,6>, <3,0,3,6>
+  2250516994U,	// <6,3,0,4>: Cost 3 vmrghw <6,0,1,2>, <3,4,5,6>
+  3725054014U,	// <6,3,0,5>: Cost 4 vsldoi4 <5,6,3,0>, <5,6,3,0>
+  3383093096U,	// <6,3,0,6>: Cost 4 vmrglw <4,5,6,0>, <2,5,3,6>
+  3368495034U,	// <6,3,0,7>: Cost 4 vmrglw <2,1,6,0>, <2,6,3,7>
+  2704065181U,	// <6,3,0,u>: Cost 3 vsldoi8 <3,2,6,3>, LHS
+  2251622550U,	// <6,3,1,0>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+  3777807156U,	// <6,3,1,1>: Cost 4 vsldoi8 <3,2,6,3>, <1,1,1,1>
+  3765863348U,	// <6,3,1,2>: Cost 4 vsldoi8 <1,2,6,3>, <1,2,6,3>
+  3373147762U,	// <6,3,1,3>: Cost 4 vmrglw <2,u,6,1>, <2,2,3,3>
+  3834251525U,	// <6,3,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <3,1,4,5>
+  3373147683U,	// <6,3,1,5>: Cost 5 vmrglw <2,u,6,1>, <2,1,3,5>
+  3391727545U,	// <6,3,1,6>: Cost 4 vmrglw <6,0,6,1>, <2,6,3,6>
+  2299406266U,	// <6,3,1,7>: Cost 3 vmrglw <2,u,6,1>, <2,6,3,7>
+  2251622550U,	// <6,3,1,u>: Cost 3 vmrghw <6,1,7,2>, <3,0,1,2>
+  2252294294U,	// <6,3,2,0>: Cost 3 vmrghw <6,2,7,3>, <3,0,1,2>
+  3326036198U,	// <6,3,2,1>: Cost 4 vmrghw <6,2,7,3>, <3,1,1,1>
+  3771836045U,	// <6,3,2,2>: Cost 4 vsldoi8 <2,2,6,3>, <2,2,6,3>
+  2252294556U,	// <6,3,2,3>: Cost 3 vmrghw <6,2,7,3>, <3,3,3,3>
+  2252294658U,	// <6,3,2,4>: Cost 3 vmrghw <6,2,7,3>, <3,4,5,6>
+  3840739677U,	// <6,3,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <3,2,5,3>
+  2704066490U,	// <6,3,2,6>: Cost 3 vsldoi8 <3,2,6,3>, <2,6,3,7>
+  3368511418U,	// <6,3,2,7>: Cost 4 vmrglw <2,1,6,2>, <2,6,3,7>
+  2252294942U,	// <6,3,2,u>: Cost 3 vmrghw <6,2,7,3>, <3,u,1,2>
+  3707158630U,	// <6,3,3,0>: Cost 4 vsldoi4 <2,6,3,3>, LHS
+  3765864692U,	// <6,3,3,1>: Cost 5 vsldoi8 <1,2,6,3>, <3,1,2,6>
+  2704066918U,	// <6,3,3,2>: Cost 3 vsldoi8 <3,2,6,3>, <3,2,6,3>
+  2772453788U,	// <6,3,3,3>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,3,3>
+  2772453799U,	// <6,3,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <3,3,4,5>
+  3789752888U,	// <6,3,3,5>: Cost 4 vsldoi8 <5,2,6,3>, <3,5,2,6>
+  3840739770U,	// <6,3,3,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,3,6,6>
+  2301413306U,	// <6,3,3,7>: Cost 3 vmrglw <3,2,6,3>, <2,6,3,7>
+  2775108043U,	// <6,3,3,u>: Cost 3 vsldoi12 <3,u,5,6>, <3,3,u,5>
+  2651340902U,	// <6,3,4,0>: Cost 3 vsldoi4 <5,6,3,4>, LHS
+  3846195674U,	// <6,3,4,1>: Cost 4 vsldoi12 <3,4,5,6>, <3,4,1,2>
+  3845974503U,	// <6,3,4,2>: Cost 4 vsldoi12 <3,4,2,6>, <3,4,2,6>
+  2651343362U,	// <6,3,4,3>: Cost 3 vsldoi4 <5,6,3,4>, <3,4,5,6>
+  2651344182U,	// <6,3,4,4>: Cost 3 vsldoi4 <5,6,3,4>, RHS
+  1698712066U,	// <6,3,4,5>: Cost 2 vsldoi12 <3,4,5,6>, <3,4,5,6>
+  3383125864U,	// <6,3,4,6>: Cost 4 vmrglw <4,5,6,4>, <2,5,3,6>
+  3368527802U,	// <6,3,4,7>: Cost 4 vmrglw <2,1,6,4>, <2,6,3,7>
+  1698933277U,	// <6,3,4,u>: Cost 2 vsldoi12 <3,4,u,6>, <3,4,u,6>
+  3373179798U,	// <6,3,5,0>: Cost 4 vmrglw <2,u,6,5>, <1,2,3,0>
+  3707176179U,	// <6,3,5,1>: Cost 5 vsldoi4 <2,6,3,5>, <1,6,5,7>
+  2716012312U,	// <6,3,5,2>: Cost 3 vsldoi8 <5,2,6,3>, <5,2,6,3>
+  3373180530U,	// <6,3,5,3>: Cost 4 vmrglw <2,u,6,5>, <2,2,3,3>
+  2254309890U,	// <6,3,5,4>: Cost 3 vmrghw <6,5,7,6>, <3,4,5,6>
+  3785773070U,	// <6,3,5,5>: Cost 4 vsldoi8 <4,5,6,3>, <5,5,6,6>
+  3840739932U,	// <6,3,5,6>: Cost 4 vsldoi12 <2,5,3,6>, <3,5,6,6>
+  2299439034U,	// <6,3,5,7>: Cost 3 vmrglw <2,u,6,5>, <2,6,3,7>
+  2719994110U,	// <6,3,5,u>: Cost 3 vsldoi8 <5,u,6,3>, <5,u,6,3>
+  2254899350U,	// <6,3,6,0>: Cost 3 vmrghw <6,6,6,6>, <3,0,1,2>
+  3328641254U,	// <6,3,6,1>: Cost 4 vmrghw <6,6,6,6>, <3,1,1,1>
+  2633443257U,	// <6,3,6,2>: Cost 3 vsldoi4 <2,6,3,6>, <2,6,3,6>
+  2254899612U,	// <6,3,6,3>: Cost 3 vmrghw <6,6,6,6>, <3,3,3,3>
+  2254899714U,	// <6,3,6,4>: Cost 3 vmrghw <6,6,6,6>, <3,4,5,6>
+  3785773772U,	// <6,3,6,5>: Cost 4 vsldoi8 <4,5,6,3>, <6,5,3,6>
+  2725966648U,	// <6,3,6,6>: Cost 3 vsldoi8 <6,u,6,3>, <6,6,6,6>
+  2322007994U,	// <6,3,6,7>: Cost 3 vmrglw <6,6,6,6>, <2,6,3,7>
+  2254899998U,	// <6,3,6,u>: Cost 3 vmrghw <6,6,6,6>, <3,u,1,2>
+  1559707750U,	// <6,3,7,0>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+  2633450292U,	// <6,3,7,1>: Cost 3 vsldoi4 <2,6,3,7>, <1,1,1,1>
+  1559709626U,	// <6,3,7,2>: Cost 2 vsldoi4 <2,6,3,7>, <2,6,3,7>
+  1235666546U,	// <6,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+  1559711030U,	// <6,3,7,4>: Cost 2 vsldoi4 <2,6,3,7>, RHS
+  2309408291U,	// <6,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+  2633454152U,	// <6,3,7,6>: Cost 3 vsldoi4 <2,6,3,7>, <6,3,7,0>
+  1235666874U,	// <6,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+  1559713582U,	// <6,3,7,u>: Cost 2 vsldoi4 <2,6,3,7>, LHS
+  1559715942U,	// <6,3,u,0>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+  2633458484U,	// <6,3,u,1>: Cost 3 vsldoi4 <2,6,3,u>, <1,1,1,1>
+  1559717819U,	// <6,3,u,2>: Cost 2 vsldoi4 <2,6,3,u>, <2,6,3,u>
+  1235674738U,	// <6,3,u,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+  1559719222U,	// <6,3,u,4>: Cost 2 vsldoi4 <2,6,3,u>, RHS
+  1701366598U,	// <6,3,u,5>: Cost 2 vsldoi12 <3,u,5,6>, <3,u,5,6>
+  2633462353U,	// <6,3,u,6>: Cost 3 vsldoi4 <2,6,3,u>, <6,3,u,0>
+  1235675066U,	// <6,3,u,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+  1559721774U,	// <6,3,u,u>: Cost 2 vsldoi4 <2,6,3,u>, LHS
+  3785777152U,	// <6,4,0,0>: Cost 4 vsldoi8 <4,5,6,4>, <0,0,0,0>
+  2712035430U,	// <6,4,0,1>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+  3771179185U,	// <6,4,0,2>: Cost 4 vsldoi8 <2,1,6,4>, <0,2,1,6>
+  3846196096U,	// <6,4,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <4,0,3,1>
+  3785777490U,	// <6,4,0,4>: Cost 4 vsldoi8 <4,5,6,4>, <0,4,1,5>
+  2250517814U,	// <6,4,0,5>: Cost 3 vmrghw <6,0,1,2>, RHS
+  3324259703U,	// <6,4,0,6>: Cost 4 vmrghw <6,0,1,2>, <4,6,5,0>
+  3383092458U,	// <6,4,0,7>: Cost 5 vmrglw <4,5,6,0>, <1,6,4,7>
+  2712035997U,	// <6,4,0,u>: Cost 3 vsldoi8 <4,5,6,4>, LHS
+  3325356946U,	// <6,4,1,0>: Cost 4 vmrghw <6,1,7,1>, <4,0,5,1>
+  3785777972U,	// <6,4,1,1>: Cost 4 vsldoi8 <4,5,6,4>, <1,1,1,1>
+  3846196170U,	// <6,4,1,2>: Cost 4 vsldoi12 <3,4,5,6>, <4,1,2,3>
+  3325365380U,	// <6,4,1,3>: Cost 4 vmrghw <6,1,7,2>, <4,3,5,0>
+  3852168155U,	// <6,4,1,4>: Cost 4 vsldoi12 <4,4,5,6>, <4,1,4,2>
+  2251615542U,	// <6,4,1,5>: Cost 3 vmrghw <6,1,7,1>, RHS
+  3325357432U,	// <6,4,1,6>: Cost 4 vmrghw <6,1,7,1>, <4,6,5,1>
+  3870084088U,	// <6,4,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <4,1,7,4>
+  2251615785U,	// <6,4,1,u>: Cost 3 vmrghw <6,1,7,1>, RHS
+  2252295058U,	// <6,4,2,0>: Cost 3 vmrghw <6,2,7,3>, <4,0,5,1>
+  3771180605U,	// <6,4,2,1>: Cost 4 vsldoi8 <2,1,6,4>, <2,1,6,4>
+  3785778792U,	// <6,4,2,2>: Cost 4 vsldoi8 <4,5,6,4>, <2,2,2,2>
+  3777816253U,	// <6,4,2,3>: Cost 4 vsldoi8 <3,2,6,4>, <2,3,2,6>
+  2252295376U,	// <6,4,2,4>: Cost 3 vmrghw <6,2,7,3>, <4,4,4,4>
+  1178553654U,	// <6,4,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+  2252295545U,	// <6,4,2,6>: Cost 3 vmrghw <6,2,7,3>, <4,6,5,2>
+  3326037448U,	// <6,4,2,7>: Cost 4 vmrghw <6,2,7,3>, <4,7,5,0>
+  1178553897U,	// <6,4,2,u>: Cost 2 vmrghw <6,2,7,3>, RHS
+  3785779350U,	// <6,4,3,0>: Cost 4 vsldoi8 <4,5,6,4>, <3,0,1,2>
+  3383118648U,	// <6,4,3,1>: Cost 4 vmrglw <4,5,6,3>, <3,u,4,1>
+  3777816935U,	// <6,4,3,2>: Cost 4 vsldoi8 <3,2,6,4>, <3,2,6,4>
+  3785779612U,	// <6,4,3,3>: Cost 4 vsldoi8 <4,5,6,4>, <3,3,3,3>
+  2712037890U,	// <6,4,3,4>: Cost 3 vsldoi8 <4,5,6,4>, <3,4,5,6>
+  2252754230U,	// <6,4,3,5>: Cost 3 vmrghw <6,3,4,5>, RHS
+  3784452764U,	// <6,4,3,6>: Cost 4 vsldoi8 <4,3,6,4>, <3,6,4,7>
+  3801705178U,	// <6,4,3,7>: Cost 4 vsldoi8 <7,2,6,4>, <3,7,2,6>
+  2252754473U,	// <6,4,3,u>: Cost 3 vmrghw <6,3,4,5>, RHS
+  3787770770U,	// <6,4,4,0>: Cost 4 vsldoi8 <4,u,6,4>, <4,0,5,1>
+  3383126840U,	// <6,4,4,1>: Cost 4 vmrglw <4,5,6,4>, <3,u,4,1>
+  3327380534U,	// <6,4,4,2>: Cost 4 vmrghw <6,4,7,5>, <4,2,5,3>
+  3784453265U,	// <6,4,4,3>: Cost 4 vsldoi8 <4,3,6,4>, <4,3,6,4>
+  2253630672U,	// <6,4,4,4>: Cost 3 vmrghw <6,4,7,4>, <4,4,4,4>
+  2778426587U,	// <6,4,4,5>: Cost 3 vsldoi12 <4,4,5,6>, <4,4,5,6>
+  3383128789U,	// <6,4,4,6>: Cost 4 vmrglw <4,5,6,4>, <6,5,4,6>
+  3381799580U,	// <6,4,4,7>: Cost 4 vmrglw <4,3,6,4>, <3,6,4,7>
+  2778647798U,	// <6,4,4,u>: Cost 3 vsldoi12 <4,4,u,6>, <4,4,u,6>
+  2651422822U,	// <6,4,5,0>: Cost 3 vsldoi4 <5,6,4,5>, LHS
+  3701277928U,	// <6,4,5,1>: Cost 4 vsldoi4 <1,6,4,5>, <1,6,4,5>
+  3701278650U,	// <6,4,5,2>: Cost 4 vsldoi4 <1,6,4,5>, <2,6,3,7>
+  2651425282U,	// <6,4,5,3>: Cost 3 vsldoi4 <5,6,4,5>, <3,4,5,6>
+  2651426102U,	// <6,4,5,4>: Cost 3 vsldoi4 <5,6,4,5>, RHS
+  2651426892U,	// <6,4,5,5>: Cost 3 vsldoi4 <5,6,4,5>, <5,6,4,5>
+  1698712886U,	// <6,4,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+  3725169658U,	// <6,4,5,7>: Cost 4 vsldoi4 <5,6,4,5>, <7,0,1,2>
+  1698712904U,	// <6,4,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+  2254900114U,	// <6,4,6,0>: Cost 3 vmrghw <6,6,6,6>, <4,0,5,1>
+  3389115192U,	// <6,4,6,1>: Cost 4 vmrglw <5,5,6,6>, <3,u,4,1>
+  3785781727U,	// <6,4,6,2>: Cost 4 vsldoi8 <4,5,6,4>, <6,2,4,3>
+  3785781810U,	// <6,4,6,3>: Cost 4 vsldoi8 <4,5,6,4>, <6,3,4,5>
+  2254900432U,	// <6,4,6,4>: Cost 3 vmrghw <6,6,6,6>, <4,4,4,4>
+  1181158710U,	// <6,4,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+  2254900605U,	// <6,4,6,6>: Cost 3 vmrghw <6,6,6,6>, <4,6,5,6>
+  3787772750U,	// <6,4,6,7>: Cost 4 vsldoi8 <4,u,6,4>, <6,7,0,1>
+  1181158953U,	// <6,4,6,u>: Cost 2 vmrghw <6,6,6,6>, RHS
+  2639495270U,	// <6,4,7,0>: Cost 3 vsldoi4 <3,6,4,7>, LHS
+  2639496090U,	// <6,4,7,1>: Cost 3 vsldoi4 <3,6,4,7>, <1,2,3,4>
+  3707267011U,	// <6,4,7,2>: Cost 4 vsldoi4 <2,6,4,7>, <2,6,4,7>
+  2639497884U,	// <6,4,7,3>: Cost 3 vsldoi4 <3,6,4,7>, <3,6,4,7>
+  1237658832U,	// <6,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+  1235666638U,	// <6,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+  3713241753U,	// <6,4,7,6>: Cost 4 vsldoi4 <3,6,4,7>, <6,4,7,0>
+  2309409436U,	// <6,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+  1235666641U,	// <6,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+  2639503462U,	// <6,4,u,0>: Cost 3 vsldoi4 <3,6,4,u>, LHS
+  2639504282U,	// <6,4,u,1>: Cost 3 vsldoi4 <3,6,4,u>, <1,2,3,4>
+  3701303226U,	// <6,4,u,2>: Cost 4 vsldoi4 <1,6,4,u>, <2,6,3,7>
+  2639506077U,	// <6,4,u,3>: Cost 3 vsldoi4 <3,6,4,u>, <3,6,4,u>
+  1235676368U,	// <6,4,u,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+  1235674830U,	// <6,4,u,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+  1698713129U,	// <6,4,u,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+  2309417628U,	// <6,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+  1698713147U,	// <6,4,u,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+  3775832064U,	// <6,5,0,0>: Cost 4 vsldoi8 <2,u,6,5>, <0,0,0,0>
+  2702090342U,	// <6,5,0,1>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+  3775832241U,	// <6,5,0,2>: Cost 4 vsldoi8 <2,u,6,5>, <0,2,1,6>
+  3719227906U,	// <6,5,0,3>: Cost 4 vsldoi4 <4,6,5,0>, <3,4,5,6>
+  3775832402U,	// <6,5,0,4>: Cost 4 vsldoi8 <2,u,6,5>, <0,4,1,5>
+  3385085146U,	// <6,5,0,5>: Cost 4 vmrglw <4,u,6,0>, <4,4,5,5>
+  2309351938U,	// <6,5,0,6>: Cost 3 vmrglw <4,5,6,0>, <3,4,5,6>
+  3376459134U,	// <6,5,0,7>: Cost 5 vmrglw <3,4,6,0>, <4,6,5,7>
+  2702090909U,	// <6,5,0,u>: Cost 3 vsldoi8 <2,u,6,5>, LHS
+  3719233546U,	// <6,5,1,0>: Cost 4 vsldoi4 <4,6,5,1>, <0,0,1,1>
+  3775832884U,	// <6,5,1,1>: Cost 4 vsldoi8 <2,u,6,5>, <1,1,1,1>
+  3775832982U,	// <6,5,1,2>: Cost 4 vsldoi8 <2,u,6,5>, <1,2,3,0>
+  3846196909U,	// <6,5,1,3>: Cost 4 vsldoi12 <3,4,5,6>, <5,1,3,4>
+  3719236984U,	// <6,5,1,4>: Cost 4 vsldoi4 <4,6,5,1>, <4,6,5,1>
+  3856150209U,	// <6,5,1,5>: Cost 4 vsldoi12 <5,1,5,6>, <5,1,5,6>
+  3834252997U,	// <6,5,1,6>: Cost 4 vsldoi12 <1,4,5,6>, <5,1,6,1>
+  3870084817U,	// <6,5,1,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,1,7,4>
+  3769861532U,	// <6,5,1,u>: Cost 4 vsldoi8 <1,u,6,5>, <1,u,6,5>
+  2645500006U,	// <6,5,2,0>: Cost 3 vsldoi4 <4,6,5,2>, LHS
+  3719242548U,	// <6,5,2,1>: Cost 4 vsldoi4 <4,6,5,2>, <1,1,1,1>
+  3775833704U,	// <6,5,2,2>: Cost 4 vsldoi8 <2,u,6,5>, <2,2,2,2>
+  3775833766U,	// <6,5,2,3>: Cost 4 vsldoi8 <2,u,6,5>, <2,3,0,1>
+  2645503353U,	// <6,5,2,4>: Cost 3 vsldoi4 <4,6,5,2>, <4,6,5,2>
+  2252296196U,	// <6,5,2,5>: Cost 3 vmrghw <6,2,7,3>, <5,5,5,5>
+  2702092218U,	// <6,5,2,6>: Cost 3 vsldoi8 <2,u,6,5>, <2,6,3,7>
+  3719246842U,	// <6,5,2,7>: Cost 4 vsldoi4 <4,6,5,2>, <7,0,1,2>
+  2702092405U,	// <6,5,2,u>: Cost 3 vsldoi8 <2,u,6,5>, <2,u,6,5>
+  3775834262U,	// <6,5,3,0>: Cost 4 vsldoi8 <2,u,6,5>, <3,0,1,2>
+  3777161495U,	// <6,5,3,1>: Cost 4 vsldoi8 <3,1,6,5>, <3,1,6,5>
+  3775834470U,	// <6,5,3,2>: Cost 4 vsldoi8 <2,u,6,5>, <3,2,6,3>
+  3775834524U,	// <6,5,3,3>: Cost 4 vsldoi8 <2,u,6,5>, <3,3,3,3>
+  3775834626U,	// <6,5,3,4>: Cost 4 vsldoi8 <2,u,6,5>, <3,4,5,6>
+  3385109722U,	// <6,5,3,5>: Cost 4 vmrglw <4,u,6,3>, <4,4,5,5>
+  2309376514U,	// <6,5,3,6>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+  3775834819U,	// <6,5,3,7>: Cost 4 vsldoi8 <2,u,6,5>, <3,7,0,1>
+  2309376514U,	// <6,5,3,u>: Cost 3 vmrglw <4,5,6,3>, <3,4,5,6>
+  3719258214U,	// <6,5,4,0>: Cost 4 vsldoi4 <4,6,5,4>, LHS
+  3385117586U,	// <6,5,4,1>: Cost 4 vmrglw <4,u,6,4>, <4,0,5,1>
+  3327242008U,	// <6,5,4,2>: Cost 4 vmrghw <6,4,5,6>, <5,2,6,3>
+  3719260674U,	// <6,5,4,3>: Cost 4 vsldoi4 <4,6,5,4>, <3,4,5,6>
+  3719261563U,	// <6,5,4,4>: Cost 4 vsldoi4 <4,6,5,4>, <4,6,5,4>
+  2702093622U,	// <6,5,4,5>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+  2309384706U,	// <6,5,4,6>: Cost 3 vmrglw <4,5,6,4>, <3,4,5,6>
+  3870085060U,	// <6,5,4,7>: Cost 4 vsldoi12 <7,4,5,6>, <5,4,7,4>
+  2702093865U,	// <6,5,4,u>: Cost 3 vsldoi8 <2,u,6,5>, RHS
+  3719266406U,	// <6,5,5,0>: Cost 4 vsldoi4 <4,6,5,5>, LHS
+  3789106889U,	// <6,5,5,1>: Cost 4 vsldoi8 <5,1,6,5>, <5,1,6,5>
+  3785789208U,	// <6,5,5,2>: Cost 4 vsldoi8 <4,5,6,5>, <5,2,6,3>
+  3373183950U,	// <6,5,5,3>: Cost 4 vmrglw <2,u,6,5>, <6,u,5,3>
+  2717355964U,	// <6,5,5,4>: Cost 3 vsldoi8 <5,4,6,5>, <5,4,6,5>
+  2791772164U,	// <6,5,5,5>: Cost 3 vsldoi12 <6,6,6,6>, <5,5,5,5>
+  2772455438U,	// <6,5,5,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,5,6,6>
+  3373183549U,	// <6,5,5,7>: Cost 4 vmrglw <2,u,6,5>, <6,3,5,7>
+  2720010496U,	// <6,5,5,u>: Cost 3 vsldoi8 <5,u,6,5>, <5,u,6,5>
+  2772455460U,	// <6,5,6,0>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,0,1>
+  2322008978U,	// <6,5,6,1>: Cost 3 vmrglw <6,6,6,6>, <4,0,5,1>
+  3840225335U,	// <6,5,6,2>: Cost 4 vsldoi12 <2,4,5,6>, <5,6,2,2>
+  2772455490U,	// <6,5,6,3>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,3,4>
+  2772455500U,	// <6,5,6,4>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,4,5>
+  2254901252U,	// <6,5,6,5>: Cost 3 vmrghw <6,6,6,6>, <5,5,5,5>
+  2772455520U,	// <6,5,6,6>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,6,7>
+  2785874024U,	// <6,5,6,7>: Cost 3 vsldoi12 <5,6,7,6>, <5,6,7,6>
+  2772455532U,	// <6,5,6,u>: Cost 3 vsldoi12 <3,4,5,6>, <5,6,u,1>
+  2627625062U,	// <6,5,7,0>: Cost 3 vsldoi4 <1,6,5,7>, LHS
+  1235667858U,	// <6,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+  2309409278U,	// <6,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+  2309407659U,	// <6,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+  2627628342U,	// <6,5,7,4>: Cost 3 vsldoi4 <1,6,5,7>, RHS
+  1235668186U,	// <6,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+  1235667458U,	// <6,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+  2309407987U,	// <6,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+  1235667460U,	// <6,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+  2627633254U,	// <6,5,u,0>: Cost 3 vsldoi4 <1,6,5,u>, LHS
+  1235676050U,	// <6,5,u,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+  2309417470U,	// <6,5,u,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+  2309415851U,	// <6,5,u,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+  2627636534U,	// <6,5,u,4>: Cost 3 vsldoi4 <1,6,5,u>, RHS
+  1235676378U,	// <6,5,u,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+  1235675650U,	// <6,5,u,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+  2309416179U,	// <6,5,u,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+  1235675652U,	// <6,5,u,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+  2309352751U,	// <6,6,0,0>: Cost 3 vmrglw <4,5,6,0>, <4,5,6,0>
+  1650917478U,	// <6,6,0,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+  2250584570U,	// <6,6,0,2>: Cost 3 vmrghw <6,0,2,1>, <6,2,7,3>
+  3846197554U,	// <6,6,0,3>: Cost 4 vsldoi12 <3,4,5,6>, <6,0,3,1>
+  2724659538U,	// <6,6,0,4>: Cost 3 vsldoi8 <6,6,6,6>, <0,4,1,5>
+  3725275225U,	// <6,6,0,5>: Cost 4 vsldoi4 <5,6,6,0>, <5,6,6,0>
+  2791772493U,	// <6,6,0,6>: Cost 3 vsldoi12 <6,6,6,6>, <6,0,6,1>
+  2309352758U,	// <6,6,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+  1650918045U,	// <6,6,0,u>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+  3325358368U,	// <6,6,1,0>: Cost 4 vmrghw <6,1,7,1>, <6,0,1,1>
+  2299406449U,	// <6,6,1,1>: Cost 3 vmrglw <2,u,6,1>, <2,u,6,1>
+  2724660118U,	// <6,6,1,2>: Cost 3 vsldoi8 <6,6,6,6>, <1,2,3,0>
+  3373148518U,	// <6,6,1,3>: Cost 4 vmrglw <2,u,6,1>, <3,2,6,3>
+  3834253712U,	// <6,6,1,4>: Cost 4 vsldoi12 <1,4,5,6>, <6,1,4,5>
+  3373147953U,	// <6,6,1,5>: Cost 4 vmrglw <2,u,6,1>, <2,4,6,5>
+  2323297080U,	// <6,6,1,6>: Cost 3 vmrglw <6,u,6,1>, <6,6,6,6>
+  2299407670U,	// <6,6,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+  2299407671U,	// <6,6,1,u>: Cost 3 vmrglw <2,u,6,1>, RHS
+  2252296489U,	// <6,6,2,0>: Cost 3 vmrghw <6,2,7,3>, <6,0,2,1>
+  3326038394U,	// <6,6,2,1>: Cost 4 vmrghw <6,2,7,3>, <6,1,2,1>
+  1178554874U,	// <6,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+  2724660902U,	// <6,6,2,3>: Cost 3 vsldoi8 <6,6,6,6>, <2,3,0,1>
+  2252296817U,	// <6,6,2,4>: Cost 3 vmrghw <6,2,7,3>, <6,4,2,5>
+  3840741864U,	// <6,6,2,5>: Cost 4 vsldoi12 <2,5,3,6>, <6,2,5,3>
+  2252296976U,	// <6,6,2,6>: Cost 3 vmrghw <6,2,7,3>, <6,6,2,2>
+  2785874426U,	// <6,6,2,7>: Cost 3 vsldoi12 <5,6,7,6>, <6,2,7,3>
+  1178554874U,	// <6,6,2,u>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+  2724661398U,	// <6,6,3,0>: Cost 3 vsldoi8 <6,6,6,6>, <3,0,1,2>
+  3375154665U,	// <6,6,3,1>: Cost 4 vmrglw <3,2,6,3>, <2,0,6,1>
+  3375154909U,	// <6,6,3,2>: Cost 4 vmrglw <3,2,6,3>, <2,3,6,2>
+  2301413734U,	// <6,6,3,3>: Cost 3 vmrglw <3,2,6,3>, <3,2,6,3>
+  2772455986U,	// <6,6,3,4>: Cost 3 vsldoi12 <3,4,5,6>, <6,3,4,5>
+  3375154993U,	// <6,6,3,5>: Cost 4 vmrglw <3,2,6,3>, <2,4,6,5>
+  2323313464U,	// <6,6,3,6>: Cost 3 vmrglw <6,u,6,3>, <6,6,6,6>
+  2301414710U,	// <6,6,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+  2301414711U,	// <6,6,3,u>: Cost 3 vmrglw <3,2,6,3>, RHS
+  2724662162U,	// <6,6,4,0>: Cost 3 vsldoi8 <6,6,6,6>, <4,0,5,1>
+  3326939559U,	// <6,6,4,1>: Cost 4 vmrghw <6,4,1,5>, <6,1,7,1>
+  2253271546U,	// <6,6,4,2>: Cost 3 vmrghw <6,4,2,5>, <6,2,7,3>
+  3383127346U,	// <6,6,4,3>: Cost 4 vmrglw <4,5,6,4>, <4,5,6,3>
+  2309385523U,	// <6,6,4,4>: Cost 3 vmrglw <4,5,6,4>, <4,5,6,4>
+  1650920758U,	// <6,6,4,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+  2724662653U,	// <6,6,4,6>: Cost 3 vsldoi8 <6,6,6,6>, <4,6,5,6>
+  2309385526U,	// <6,6,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+  1650921001U,	// <6,6,4,u>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+  3725312102U,	// <6,6,5,0>: Cost 4 vsldoi4 <5,6,6,5>, LHS
+  3373180393U,	// <6,6,5,1>: Cost 4 vmrglw <2,u,6,5>, <2,0,6,1>
+  3791769368U,	// <6,6,5,2>: Cost 4 vsldoi8 <5,5,6,6>, <5,2,6,3>
+  3373181286U,	// <6,6,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,6,3>
+  3725315382U,	// <6,6,5,4>: Cost 4 vsldoi4 <5,6,6,5>, RHS
+  2299439221U,	// <6,6,5,5>: Cost 3 vmrglw <2,u,6,5>, <2,u,6,5>
+  2724663394U,	// <6,6,5,6>: Cost 3 vsldoi8 <6,6,6,6>, <5,6,7,0>
+  2299440438U,	// <6,6,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+  2299440439U,	// <6,6,5,u>: Cost 3 vmrglw <2,u,6,5>, RHS
+  1583808614U,	// <6,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+  2322010445U,	// <6,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+  2254574074U,	// <6,6,6,2>: Cost 3 vmrghw <6,6,2,2>, <6,2,7,3>
+  2322010609U,	// <6,6,6,3>: Cost 3 vmrglw <6,6,6,6>, <6,2,6,3>
+  1583811894U,	// <6,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+  2322010773U,	// <6,6,6,5>: Cost 3 vmrglw <6,6,6,6>, <6,4,6,5>
+  363253046U,	// <6,6,6,6>: Cost 1 vspltisw2 RHS
+  1248267574U,	// <6,6,6,7>: Cost 2 vmrglw <6,6,6,6>, RHS
+  363253046U,	// <6,6,6,u>: Cost 1 vspltisw2 RHS
+  2309410095U,	// <6,6,7,0>: Cost 3 vmrglw RHS, <4,5,6,0>
+  2309408233U,	// <6,6,7,1>: Cost 3 vmrglw RHS, <2,0,6,1>
+  2311402373U,	// <6,6,7,2>: Cost 3 vmrglw RHS, <6,7,6,2>
+  2309409126U,	// <6,6,7,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+  2309410099U,	// <6,6,7,4>: Cost 3 vmrglw RHS, <4,5,6,4>
+  2309408561U,	// <6,6,7,5>: Cost 3 vmrglw RHS, <2,4,6,5>
+  1237660472U,	// <6,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+  161926454U,	// <6,6,7,7>: Cost 1 vmrglw RHS, RHS
+  161926455U,	// <6,6,7,u>: Cost 1 vmrglw RHS, RHS
+  1583808614U,	// <6,6,u,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+  1650923310U,	// <6,6,u,1>: Cost 2 vsldoi8 <6,6,6,6>, LHS
+  1178554874U,	// <6,6,u,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+  2309417318U,	// <6,6,u,3>: Cost 3 vmrglw RHS, <3,2,6,3>
+  1583811894U,	// <6,6,u,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+  1650923674U,	// <6,6,u,5>: Cost 2 vsldoi8 <6,6,6,6>, RHS
+  363253046U,	// <6,6,u,6>: Cost 1 vspltisw2 RHS
+  161934646U,	// <6,6,u,7>: Cost 1 vmrglw RHS, RHS
+  161934647U,	// <6,6,u,u>: Cost 1 vmrglw RHS, RHS
+  1638318080U,	// <6,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+  564576358U,	// <6,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+  2712060077U,	// <6,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+  2712060156U,	// <6,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+  1638318418U,	// <6,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+  1577865314U,	// <6,7,0,5>: Cost 2 vsldoi4 <5,6,7,0>, <5,6,7,0>
+  2712060406U,	// <6,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+  2651608058U,	// <6,7,0,7>: Cost 3 vsldoi4 <5,6,7,0>, <7,0,1,2>
+  564576925U,	// <6,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+  2712060643U,	// <6,7,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+  1638318900U,	// <6,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+  1638318998U,	// <6,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+  3766559753U,	// <6,7,1,3>: Cost 4 vsldoi8 <1,3,6,7>, <1,3,6,7>
+  2712060971U,	// <6,7,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+  2712061039U,	// <6,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+  2712061135U,	// <6,7,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+  3373148612U,	// <6,7,1,7>: Cost 4 vmrglw <2,u,6,1>, <3,3,7,7>
+  1638319484U,	// <6,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+  2712061373U,	// <6,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+  2712061471U,	// <6,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+  1638319720U,	// <6,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+  1638319782U,	// <6,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+  2712061709U,	// <6,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+  2712061800U,	// <6,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+  1638320058U,	// <6,7,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+  2252297836U,	// <6,7,2,7>: Cost 3 vmrghw <6,2,7,3>, <7,7,7,7>
+  1638320187U,	// <6,7,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+  1638320278U,	// <6,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+  2712062182U,	// <6,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+  2712062256U,	// <6,7,3,2>: Cost 3 vsldoi8 RHS, <3,2,0,3>
+  1638320540U,	// <6,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+  1638320642U,	// <6,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+  2712062546U,	// <6,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+  2712062584U,	// <6,7,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+  2712062659U,	// <6,7,3,7>: Cost 3 vsldoi8 RHS, <3,7,0,1>
+  1638320926U,	// <6,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+  1638321042U,	// <6,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+  2712062922U,	// <6,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+  2712063029U,	// <6,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+  2712063108U,	// <6,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+  1638321360U,	// <6,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+  564579638U,	// <6,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+  2712063357U,	// <6,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,6>
+  2712063439U,	// <6,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,7>
+  564579881U,	// <6,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+  2712063560U,	// <6,7,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+  2714054287U,	// <6,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+  2712063742U,	// <6,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+  3373181295U,	// <6,7,5,3>: Cost 4 vmrglw <2,u,6,5>, <3,2,7,3>
+  2712063924U,	// <6,7,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+  1638322180U,	// <6,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+  1638322274U,	// <6,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+  3373181380U,	// <6,7,5,7>: Cost 4 vmrglw <2,u,6,5>, <3,3,7,7>
+  1640313092U,	// <6,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+  2712064289U,	// <6,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+  2712064423U,	// <6,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+  1638322682U,	// <6,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+  2712064562U,	// <6,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+  2712064653U,	// <6,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+  2712064747U,	// <6,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+  1638323000U,	// <6,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+  1638323022U,	// <6,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+  1638323168U,	// <6,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,7,3>
+  1237659746U,	// <6,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+  2309411158U,	// <6,7,7,1>: Cost 3 vmrglw RHS, <6,0,7,1>
+  2639718330U,	// <6,7,7,2>: Cost 3 vsldoi4 <3,6,7,7>, <2,6,3,7>
+  1235669498U,	// <6,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+  1237659750U,	// <6,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+  2309411243U,	// <6,7,7,5>: Cost 3 vmrglw RHS, <6,1,7,5>
+  1583895362U,	// <6,7,7,6>: Cost 2 vsldoi4 <6,6,7,7>, <6,6,7,7>
+  1235669826U,	// <6,7,7,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+  1235669503U,	// <6,7,7,u>: Cost 2 vmrglw RHS, <6,2,7,u>
+  1638323923U,	// <6,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+  564582190U,	// <6,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+  1638324101U,	// <6,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+  1638324156U,	// <6,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+  1638324287U,	// <6,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+  564582554U,	// <6,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+  1638324432U,	// <6,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+  1235678018U,	// <6,7,u,7>: Cost 2 vmrglw RHS, <6,6,7,7>
+  564582757U,	// <6,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+  1638326272U,	// <6,u,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+  564584550U,	// <6,u,0,1>: Cost 1 vsldoi8 RHS, LHS
+  2712068269U,	// <6,u,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+  2309349532U,	// <6,u,0,3>: Cost 3 vmrglw <4,5,6,0>, LHS
+  1638326610U,	// <6,u,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+  1577939051U,	// <6,u,0,5>: Cost 2 vsldoi4 <5,6,u,0>, <5,6,u,0>
+  2712068598U,	// <6,u,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+  2309352776U,	// <6,u,0,7>: Cost 3 vmrglw <4,5,6,0>, RHS
+  564585117U,	// <6,u,0,u>: Cost 1 vsldoi8 RHS, LHS
+  2712068835U,	// <6,u,1,0>: Cost 3 vsldoi8 RHS, <1,0,1,1>
+  1638327092U,	// <6,u,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+  1698715438U,	// <6,u,1,2>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+  2299404444U,	// <6,u,1,3>: Cost 3 vmrglw <2,u,6,1>, LHS
+  2712069163U,	// <6,u,1,4>: Cost 3 vsldoi8 RHS, <1,4,1,5>
+  2712069231U,	// <6,u,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+  2712069327U,	// <6,u,1,6>: Cost 3 vsldoi8 RHS, <1,6,1,7>
+  2299407688U,	// <6,u,1,7>: Cost 3 vmrglw <2,u,6,1>, RHS
+  1698715492U,	// <6,u,1,u>: Cost 2 vsldoi12 <3,4,5,6>, LHS
+  2712069565U,	// <6,u,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+  1178556206U,	// <6,u,2,1>: Cost 2 vmrghw <6,2,7,3>, LHS
+  1638327912U,	// <6,u,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+  1638327974U,	// <6,u,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+  2712069901U,	// <6,u,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+  1178556570U,	// <6,u,2,5>: Cost 2 vmrghw <6,2,7,3>, RHS
+  1638328250U,	// <6,u,2,6>: Cost 2 vsldoi8 RHS, <2,6,3,7>
+  2252298496U,	// <6,u,2,7>: Cost 3 vmrghw <6,2,7,3>, <u,7,0,1>
+  1638328379U,	// <6,u,2,u>: Cost 2 vsldoi8 RHS, <2,u,0,1>
+  1638328470U,	// <6,u,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+  2712070374U,	// <6,u,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+  2704107883U,	// <6,u,3,2>: Cost 3 vsldoi8 <3,2,6,u>, <3,2,6,u>
+  1638328732U,	// <6,u,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+  1638328834U,	// <6,u,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+  2712070738U,	// <6,u,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+  2712070776U,	// <6,u,3,6>: Cost 3 vsldoi8 RHS, <3,6,0,7>
+  2301414728U,	// <6,u,3,7>: Cost 3 vmrglw <3,2,6,3>, RHS
+  1638329118U,	// <6,u,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+  1638329234U,	// <6,u,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+  2712071114U,	// <6,u,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+  2712071221U,	// <6,u,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+  2309382300U,	// <6,u,4,3>: Cost 3 vmrglw <4,5,6,4>, LHS
+  1638329552U,	// <6,u,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+  564587831U,	// <6,u,4,5>: Cost 1 vsldoi8 RHS, RHS
+  2712071545U,	// <6,u,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+  2309385544U,	// <6,u,4,7>: Cost 3 vmrglw <4,5,6,4>, RHS
+  564588073U,	// <6,u,4,u>: Cost 1 vsldoi8 RHS, RHS
+  2712071752U,	// <6,u,5,0>: Cost 3 vsldoi8 RHS, <5,0,1,2>
+  2714062479U,	// <6,u,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+  2712071934U,	// <6,u,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+  2299437212U,	// <6,u,5,3>: Cost 3 vmrglw <2,u,6,5>, LHS
+  2712072116U,	// <6,u,5,4>: Cost 3 vsldoi8 RHS, <5,4,5,6>
+  1638330372U,	// <6,u,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+  1698715802U,	// <6,u,5,6>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+  2299440456U,	// <6,u,5,7>: Cost 3 vmrglw <2,u,6,5>, RHS
+  1698715820U,	// <6,u,5,u>: Cost 2 vsldoi12 <3,4,5,6>, RHS
+  1583808614U,	// <6,u,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+  1181161262U,	// <6,u,6,1>: Cost 2 vmrghw <6,6,6,6>, LHS
+  1638330874U,	// <6,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+  1248264348U,	// <6,u,6,3>: Cost 2 vmrglw <6,6,6,6>, LHS
+  1583811894U,	// <6,u,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+  1181161626U,	// <6,u,6,5>: Cost 2 vmrghw <6,6,6,6>, RHS
+  363253046U,	// <6,u,6,6>: Cost 1 vspltisw2 RHS
+  1638331214U,	// <6,u,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+  363253046U,	// <6,u,6,u>: Cost 1 vspltisw2 RHS
+  1560076390U,	// <6,u,7,0>: Cost 2 vsldoi4 <2,6,u,7>, LHS
+  1235664969U,	// <6,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+  1560078311U,	// <6,u,7,2>: Cost 2 vsldoi4 <2,6,u,7>, <2,6,u,7>
+  161923228U,	// <6,u,7,3>: Cost 1 vmrglw RHS, LHS
+  1560079670U,	// <6,u,7,4>: Cost 2 vsldoi4 <2,6,u,7>, RHS
+  1235665297U,	// <6,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+  1235667485U,	// <6,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+  161926472U,	// <6,u,7,7>: Cost 1 vmrglw RHS, RHS
+  161923233U,	// <6,u,7,u>: Cost 1 vmrglw RHS, LHS
+  1560084582U,	// <6,u,u,0>: Cost 2 vsldoi4 <2,6,u,u>, LHS
+  564590382U,	// <6,u,u,1>: Cost 1 vsldoi8 RHS, LHS
+  1560086504U,	// <6,u,u,2>: Cost 2 vsldoi4 <2,6,u,u>, <2,6,u,u>
+  161931420U,	// <6,u,u,3>: Cost 1 vmrglw RHS, LHS
+  1560087862U,	// <6,u,u,4>: Cost 2 vsldoi4 <2,6,u,u>, RHS
+  564590746U,	// <6,u,u,5>: Cost 1 vsldoi8 RHS, RHS
+  363253046U,	// <6,u,u,6>: Cost 1 vspltisw2 RHS
+  161934664U,	// <6,u,u,7>: Cost 1 vmrglw RHS, RHS
+  161931425U,	// <6,u,u,u>: Cost 1 vmrglw RHS, LHS
+  1705426944U,	// <7,0,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+  1705426954U,	// <7,0,0,1>: Cost 2 vsldoi12 RHS, <0,0,1,1>
+  3713550266U,	// <7,0,0,2>: Cost 4 vsldoi4 <3,7,0,0>, <2,6,3,7>
+  2316063892U,	// <7,0,0,3>: Cost 3 vmrglw <5,6,7,0>, <7,2,0,3>
+  2779168805U,	// <7,0,0,4>: Cost 3 vsldoi12 RHS, <0,0,4,1>
+  2663698530U,	// <7,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+  2657727309U,	// <7,0,0,6>: Cost 3 vsldoi4 <6,7,0,0>, <6,7,0,0>
+  2316064220U,	// <7,0,0,7>: Cost 3 vmrglw <5,6,7,0>, <7,6,0,7>
+  1705427017U,	// <7,0,0,u>: Cost 2 vsldoi12 RHS, <0,0,u,1>
+  1583988838U,	// <7,0,1,0>: Cost 2 vsldoi4 <6,7,0,1>, LHS
+  2779168859U,	// <7,0,1,1>: Cost 3 vsldoi12 RHS, <0,1,1,1>
+  631685222U,	// <7,0,1,2>: Cost 1 vsldoi12 RHS, LHS
+  2639817411U,	// <7,0,1,3>: Cost 3 vsldoi4 <3,7,0,1>, <3,7,0,1>
+  1583992118U,	// <7,0,1,4>: Cost 2 vsldoi4 <6,7,0,1>, RHS
+  2657734660U,	// <7,0,1,5>: Cost 3 vsldoi4 <6,7,0,1>, <5,5,5,5>
+  1583993678U,	// <7,0,1,6>: Cost 2 vsldoi4 <6,7,0,1>, <6,7,0,1>
+  2657735672U,	// <7,0,1,7>: Cost 3 vsldoi4 <6,7,0,1>, <7,0,1,0>
+  631685276U,	// <7,0,1,u>: Cost 1 vsldoi12 RHS, LHS
+  2779168933U,	// <7,0,2,0>: Cost 3 vsldoi12 RHS, <0,2,0,3>
+  2767667377U,	// <7,0,2,1>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,1,6>
+  2718713448U,	// <7,0,2,2>: Cost 3 vsldoi8 <5,6,7,0>, <2,2,2,2>
+  2718713510U,	// <7,0,2,3>: Cost 3 vsldoi8 <5,6,7,0>, <2,3,0,1>
+  3841409228U,	// <7,0,2,4>: Cost 4 vsldoi12 <2,6,3,7>, <0,2,4,6>
+  3852910802U,	// <7,0,2,5>: Cost 4 vsldoi12 RHS, <0,2,5,3>
+  2718713786U,	// <7,0,2,6>: Cost 3 vsldoi8 <5,6,7,0>, <2,6,3,7>
+  3847160036U,	// <7,0,2,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,2,7,3>
+  2767667440U,	// <7,0,2,u>: Cost 3 vsldoi12 <2,6,3,7>, <0,2,u,6>
+  2718714006U,	// <7,0,3,0>: Cost 3 vsldoi8 <5,6,7,0>, <3,0,1,2>
+  2779169020U,	// <7,0,3,1>: Cost 3 vsldoi12 RHS, <0,3,1,0>
+  3852910853U,	// <7,0,3,2>: Cost 4 vsldoi12 RHS, <0,3,2,0>
+  2718714268U,	// <7,0,3,3>: Cost 3 vsldoi8 <5,6,7,0>, <3,3,3,3>
+  2718714370U,	// <7,0,3,4>: Cost 3 vsldoi8 <5,6,7,0>, <3,4,5,6>
+  2718714461U,	// <7,0,3,5>: Cost 3 vsldoi8 <5,6,7,0>, <3,5,6,7>
+  2706770608U,	// <7,0,3,6>: Cost 3 vsldoi8 <3,6,7,0>, <3,6,7,0>
+  3847160114U,	// <7,0,3,7>: Cost 4 vsldoi12 <3,6,0,7>, <0,3,7,0>
+  2779169083U,	// <7,0,3,u>: Cost 3 vsldoi12 RHS, <0,3,u,0>
+  2718714770U,	// <7,0,4,0>: Cost 3 vsldoi8 <5,6,7,0>, <4,0,5,1>
+  1705427282U,	// <7,0,4,1>: Cost 2 vsldoi12 RHS, <0,4,1,5>
+  3713583034U,	// <7,0,4,2>: Cost 4 vsldoi4 <3,7,0,4>, <2,6,3,7>
+  3713583814U,	// <7,0,4,3>: Cost 4 vsldoi4 <3,7,0,4>, <3,7,0,4>
+  2779169133U,	// <7,0,4,4>: Cost 3 vsldoi12 RHS, <0,4,4,5>
+  1644973366U,	// <7,0,4,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+  2657760081U,	// <7,0,4,6>: Cost 3 vsldoi4 <6,7,0,4>, <6,7,0,4>
+  2259468868U,	// <7,0,4,7>: Cost 3 vmrghw <7,4,5,6>, <0,7,1,4>
+  1705427345U,	// <7,0,4,u>: Cost 2 vsldoi12 RHS, <0,4,u,5>
+  2718715508U,	// <7,0,5,0>: Cost 3 vsldoi8 <5,6,7,0>, <5,0,6,1>
+  2260123750U,	// <7,0,5,1>: Cost 3 vmrghw <7,5,5,5>, LHS
+  3792457451U,	// <7,0,5,2>: Cost 4 vsldoi8 <5,6,7,0>, <5,2,1,3>
+  3852911024U,	// <7,0,5,3>: Cost 4 vsldoi12 RHS, <0,5,3,0>
+  2718715836U,	// <7,0,5,4>: Cost 3 vsldoi8 <5,6,7,0>, <5,4,6,5>
+  2718715908U,	// <7,0,5,5>: Cost 3 vsldoi8 <5,6,7,0>, <5,5,5,5>
+  1644974178U,	// <7,0,5,6>: Cost 2 vsldoi8 <5,6,7,0>, <5,6,7,0>
+  3792457853U,	// <7,0,5,7>: Cost 4 vsldoi8 <5,6,7,0>, <5,7,1,0>
+  1646301444U,	// <7,0,5,u>: Cost 2 vsldoi8 <5,u,7,0>, <5,u,7,0>
+  2720706901U,	// <7,0,6,0>: Cost 3 vsldoi8 <6,0,7,0>, <6,0,7,0>
+  2779169270U,	// <7,0,6,1>: Cost 3 vsldoi12 RHS, <0,6,1,7>
+  2718716410U,	// <7,0,6,2>: Cost 3 vsldoi8 <5,6,7,0>, <6,2,7,3>
+  2722697800U,	// <7,0,6,3>: Cost 3 vsldoi8 <6,3,7,0>, <6,3,7,0>
+  3852911121U,	// <7,0,6,4>: Cost 4 vsldoi12 RHS, <0,6,4,7>
+  3852911130U,	// <7,0,6,5>: Cost 4 vsldoi12 RHS, <0,6,5,7>
+  2718716728U,	// <7,0,6,6>: Cost 3 vsldoi8 <5,6,7,0>, <6,6,6,6>
+  2718716750U,	// <7,0,6,7>: Cost 3 vsldoi8 <5,6,7,0>, <6,7,0,1>
+  2779169333U,	// <7,0,6,u>: Cost 3 vsldoi12 RHS, <0,6,u,7>
+  2718716922U,	// <7,0,7,0>: Cost 3 vsldoi8 <5,6,7,0>, <7,0,1,2>
+  1187872870U,	// <7,0,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+  2718717076U,	// <7,0,7,2>: Cost 3 vsldoi8 <5,6,7,0>, <7,2,0,3>
+  3847160408U,	// <7,0,7,3>: Cost 4 vsldoi12 <3,6,0,7>, <0,7,3,6>
+  2718717286U,	// <7,0,7,4>: Cost 3 vsldoi8 <5,6,7,0>, <7,4,5,6>
+  2718717377U,	// <7,0,7,5>: Cost 3 vsldoi8 <5,6,7,0>, <7,5,6,7>
+  2718717404U,	// <7,0,7,6>: Cost 3 vsldoi8 <5,6,7,0>, <7,6,0,7>
+  2718717478U,	// <7,0,7,7>: Cost 3 vsldoi8 <5,6,7,0>, <7,7,0,0>
+  1187873437U,	// <7,0,7,u>: Cost 2 vmrghw <7,7,7,7>, LHS
+  1584046182U,	// <7,0,u,0>: Cost 2 vsldoi4 <6,7,0,u>, LHS
+  1705427602U,	// <7,0,u,1>: Cost 2 vsldoi12 RHS, <0,u,1,1>
+  631685789U,	// <7,0,u,2>: Cost 1 vsldoi12 RHS, LHS
+  2639874762U,	// <7,0,u,3>: Cost 3 vsldoi4 <3,7,0,u>, <3,7,0,u>
+  1584049462U,	// <7,0,u,4>: Cost 2 vsldoi4 <6,7,0,u>, RHS
+  1644976282U,	// <7,0,u,5>: Cost 2 vsldoi8 <5,6,7,0>, RHS
+  1584051029U,	// <7,0,u,6>: Cost 2 vsldoi4 <6,7,0,u>, <6,7,0,u>
+  2718718208U,	// <7,0,u,7>: Cost 3 vsldoi8 <5,6,7,0>, <u,7,0,1>
+  631685843U,	// <7,0,u,u>: Cost 1 vsldoi12 RHS, LHS
+  2721374218U,	// <7,1,0,0>: Cost 3 vsldoi8 <6,1,7,1>, <0,0,1,1>
+  2779169507U,	// <7,1,0,1>: Cost 3 vsldoi12 RHS, <1,0,1,1>
+  2779169516U,	// <7,1,0,2>: Cost 3 vsldoi12 RHS, <1,0,2,1>
+  3852911348U,	// <7,1,0,3>: Cost 4 vsldoi12 RHS, <1,0,3,0>
+  2669743414U,	// <7,1,0,4>: Cost 3 vsldoi4 <u,7,1,0>, RHS
+  2316058962U,	// <7,1,0,5>: Cost 3 vmrglw <5,6,7,0>, <0,4,1,5>
+  2316059044U,	// <7,1,0,6>: Cost 3 vmrglw <5,6,7,0>, <0,5,1,6>
+  2669745146U,	// <7,1,0,7>: Cost 3 vsldoi4 <u,7,1,0>, <7,0,1,2>
+  2779169570U,	// <7,1,0,u>: Cost 3 vsldoi12 RHS, <1,0,u,1>
+  2779169579U,	// <7,1,1,0>: Cost 3 vsldoi12 RHS, <1,1,0,1>
+  1705427764U,	// <7,1,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+  2779169598U,	// <7,1,1,2>: Cost 3 vsldoi12 RHS, <1,1,2,2>
+  3713632972U,	// <7,1,1,3>: Cost 4 vsldoi4 <3,7,1,1>, <3,7,1,1>
+  2779169619U,	// <7,1,1,4>: Cost 3 vsldoi12 RHS, <1,1,4,5>
+  2779169628U,	// <7,1,1,5>: Cost 3 vsldoi12 RHS, <1,1,5,5>
+  2657809239U,	// <7,1,1,6>: Cost 3 vsldoi4 <6,7,1,1>, <6,7,1,1>
+  3835290474U,	// <7,1,1,7>: Cost 4 vsldoi12 <1,6,1,7>, <1,1,7,1>
+  1705427764U,	// <7,1,1,u>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+  2779169660U,	// <7,1,2,0>: Cost 3 vsldoi12 RHS, <1,2,0,1>
+  2779169671U,	// <7,1,2,1>: Cost 3 vsldoi12 RHS, <1,2,1,3>
+  2779169680U,	// <7,1,2,2>: Cost 3 vsldoi12 RHS, <1,2,2,3>
+  1705427862U,	// <7,1,2,3>: Cost 2 vsldoi12 RHS, <1,2,3,0>
+  2779169700U,	// <7,1,2,4>: Cost 3 vsldoi12 RHS, <1,2,4,5>
+  2779169707U,	// <7,1,2,5>: Cost 3 vsldoi12 RHS, <1,2,5,3>
+  2657817432U,	// <7,1,2,6>: Cost 3 vsldoi4 <6,7,1,2>, <6,7,1,2>
+  2803057594U,	// <7,1,2,7>: Cost 3 vsldoi12 RHS, <1,2,7,0>
+  1705427907U,	// <7,1,2,u>: Cost 2 vsldoi12 RHS, <1,2,u,0>
+  3776538827U,	// <7,1,3,0>: Cost 4 vsldoi8 <3,0,7,1>, <3,0,7,1>
+  2319400970U,	// <7,1,3,1>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,1>
+  2316085398U,	// <7,1,3,2>: Cost 3 vmrglw <5,6,7,3>, <3,0,1,2>
+  3852911591U,	// <7,1,3,3>: Cost 4 vsldoi12 RHS, <1,3,3,0>
+  3852911600U,	// <7,1,3,4>: Cost 4 vsldoi12 RHS, <1,3,4,0>
+  2319401298U,	// <7,1,3,5>: Cost 3 vmrglw <6,2,7,3>, <0,4,1,5>
+  3833668617U,	// <7,1,3,6>: Cost 4 vsldoi12 <1,3,6,7>, <1,3,6,7>
+  3367265487U,	// <7,1,3,7>: Cost 4 vmrglw <1,u,7,3>, <1,6,1,7>
+  2319400977U,	// <7,1,3,u>: Cost 3 vmrglw <6,2,7,3>, <0,0,1,u>
+  2724031378U,	// <7,1,4,0>: Cost 3 vsldoi8 <6,5,7,1>, <4,0,5,1>
+  2779169835U,	// <7,1,4,1>: Cost 3 vsldoi12 RHS, <1,4,1,5>
+  2779169844U,	// <7,1,4,2>: Cost 3 vsldoi12 RHS, <1,4,2,5>
+  3852911672U,	// <7,1,4,3>: Cost 4 vsldoi12 RHS, <1,4,3,0>
+  2669776182U,	// <7,1,4,4>: Cost 3 vsldoi4 <u,7,1,4>, RHS
+  2779169872U,	// <7,1,4,5>: Cost 3 vsldoi12 RHS, <1,4,5,6>
+  3835290712U,	// <7,1,4,6>: Cost 4 vsldoi12 <1,6,1,7>, <1,4,6,5>
+  2669778278U,	// <7,1,4,7>: Cost 3 vsldoi4 <u,7,1,4>, <7,4,5,6>
+  2779169898U,	// <7,1,4,u>: Cost 3 vsldoi12 RHS, <1,4,u,5>
+  2779169903U,	// <7,1,5,0>: Cost 3 vsldoi12 RHS, <1,5,0,1>
+  3835585661U,	// <7,1,5,1>: Cost 4 vsldoi12 <1,6,5,7>, <1,5,1,6>
+  3841410182U,	// <7,1,5,2>: Cost 4 vsldoi12 <2,6,3,7>, <1,5,2,6>
+  3852911753U,	// <7,1,5,3>: Cost 4 vsldoi12 RHS, <1,5,3,0>
+  2779169943U,	// <7,1,5,4>: Cost 3 vsldoi12 RHS, <1,5,4,5>
+  2318754130U,	// <7,1,5,5>: Cost 3 vmrglw <6,1,7,5>, <0,4,1,5>
+  2718724195U,	// <7,1,5,6>: Cost 3 vsldoi8 <5,6,7,1>, <5,6,7,1>
+  3859178670U,	// <7,1,5,7>: Cost 4 vsldoi12 <5,6,1,7>, <1,5,7,1>
+  2779169975U,	// <7,1,5,u>: Cost 3 vsldoi12 RHS, <1,5,u,1>
+  2720715094U,	// <7,1,6,0>: Cost 3 vsldoi8 <6,0,7,1>, <6,0,7,1>
+  2761549007U,	// <7,1,6,1>: Cost 3 vsldoi12 <1,6,1,7>, <1,6,1,7>
+  2779170008U,	// <7,1,6,2>: Cost 3 vsldoi12 RHS, <1,6,2,7>
+  3835438305U,	// <7,1,6,3>: Cost 4 vsldoi12 <1,6,3,7>, <1,6,3,7>
+  3835512042U,	// <7,1,6,4>: Cost 4 vsldoi12 <1,6,4,7>, <1,6,4,7>
+  2761843955U,	// <7,1,6,5>: Cost 3 vsldoi12 <1,6,5,7>, <1,6,5,7>
+  3835659516U,	// <7,1,6,6>: Cost 4 vsldoi12 <1,6,6,7>, <1,6,6,7>
+  2803057918U,	// <7,1,6,7>: Cost 3 vsldoi12 RHS, <1,6,7,0>
+  2762065166U,	// <7,1,6,u>: Cost 3 vsldoi12 <1,6,u,7>, <1,6,u,7>
+  2669797478U,	// <7,1,7,0>: Cost 3 vsldoi4 <u,7,1,7>, LHS
+  2322087946U,	// <7,1,7,1>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,1>
+  2317448186U,	// <7,1,7,2>: Cost 3 vmrglw <5,u,7,7>, <7,0,1,2>
+  3395829934U,	// <7,1,7,3>: Cost 4 vmrglw <6,6,7,7>, <0,2,1,3>
+  2669800758U,	// <7,1,7,4>: Cost 3 vsldoi4 <u,7,1,7>, RHS
+  2322088274U,	// <7,1,7,5>: Cost 3 vmrglw <6,6,7,7>, <0,4,1,5>
+  3375923377U,	// <7,1,7,6>: Cost 4 vmrglw <3,3,7,7>, <0,2,1,6>
+  2731996780U,	// <7,1,7,7>: Cost 3 vsldoi8 <7,u,7,1>, <7,7,7,7>
+  2322087953U,	// <7,1,7,u>: Cost 3 vmrglw <6,6,7,7>, <0,0,1,u>
+  2779170146U,	// <7,1,u,0>: Cost 3 vsldoi12 RHS, <1,u,0,1>
+  1705427764U,	// <7,1,u,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+  2779170164U,	// <7,1,u,2>: Cost 3 vsldoi12 RHS, <1,u,2,1>
+  1705428348U,	// <7,1,u,3>: Cost 2 vsldoi12 RHS, <1,u,3,0>
+  2779170186U,	// <7,1,u,4>: Cost 3 vsldoi12 RHS, <1,u,4,5>
+  2763171221U,	// <7,1,u,5>: Cost 3 vsldoi12 <1,u,5,7>, <1,u,5,7>
+  2657866590U,	// <7,1,u,6>: Cost 3 vsldoi4 <6,7,1,u>, <6,7,1,u>
+  2803058080U,	// <7,1,u,7>: Cost 3 vsldoi12 RHS, <1,u,7,0>
+  1705428393U,	// <7,1,u,u>: Cost 2 vsldoi12 RHS, <1,u,u,0>
+  3713695846U,	// <7,2,0,0>: Cost 4 vsldoi4 <3,7,2,0>, LHS
+  2779170237U,	// <7,2,0,1>: Cost 3 vsldoi12 RHS, <2,0,1,2>
+  2779170245U,	// <7,2,0,2>: Cost 3 vsldoi12 RHS, <2,0,2,1>
+  1242316902U,	// <7,2,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+  3713699126U,	// <7,2,0,4>: Cost 4 vsldoi4 <3,7,2,0>, RHS
+  3852912096U,	// <7,2,0,5>: Cost 4 vsldoi12 RHS, <2,0,5,1>
+  2767668713U,	// <7,2,0,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,0,6,1>
+  2256488426U,	// <7,2,0,7>: Cost 3 vmrghw <7,0,1,2>, <2,7,0,1>
+  1242316907U,	// <7,2,0,u>: Cost 2 vmrglw <5,6,7,0>, LHS
+  3852912132U,	// <7,2,1,0>: Cost 4 vsldoi12 RHS, <2,1,0,1>
+  3852912141U,	// <7,2,1,1>: Cost 4 vsldoi12 RHS, <2,1,1,1>
+  3852912149U,	// <7,2,1,2>: Cost 4 vsldoi12 RHS, <2,1,2,0>
+  2779170335U,	// <7,2,1,3>: Cost 3 vsldoi12 RHS, <2,1,3,1>
+  3852912172U,	// <7,2,1,4>: Cost 4 vsldoi12 RHS, <2,1,4,5>
+  3840747062U,	// <7,2,1,5>: Cost 5 vsldoi12 <2,5,3,7>, <2,1,5,6>
+  3841410617U,	// <7,2,1,6>: Cost 4 vsldoi12 <2,6,3,7>, <2,1,6,0>
+  3795125538U,	// <7,2,1,7>: Cost 4 vsldoi8 <6,1,7,2>, <1,7,2,0>
+  2779170380U,	// <7,2,1,u>: Cost 3 vsldoi12 RHS, <2,1,u,1>
+  2779170389U,	// <7,2,2,0>: Cost 3 vsldoi12 RHS, <2,2,0,1>
+  3852912222U,	// <7,2,2,1>: Cost 4 vsldoi12 RHS, <2,2,1,1>
+  1705428584U,	// <7,2,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+  1705428594U,	// <7,2,2,3>: Cost 2 vsldoi12 RHS, <2,2,3,3>
+  2779170429U,	// <7,2,2,4>: Cost 3 vsldoi12 RHS, <2,2,4,5>
+  3852912259U,	// <7,2,2,5>: Cost 4 vsldoi12 RHS, <2,2,5,2>
+  2767668880U,	// <7,2,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,2,6,6>
+  3841336981U,	// <7,2,2,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,2,7,2>
+  1705428639U,	// <7,2,2,u>: Cost 2 vsldoi12 RHS, <2,2,u,3>
+  1705428646U,	// <7,2,3,0>: Cost 2 vsldoi12 RHS, <2,3,0,1>
+  2779170479U,	// <7,2,3,1>: Cost 3 vsldoi12 RHS, <2,3,1,1>
+  2767668925U,	// <7,2,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <2,3,2,6>
+  1245659238U,	// <7,2,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+  1705428686U,	// <7,2,3,4>: Cost 2 vsldoi12 RHS, <2,3,4,5>
+  2779170519U,	// <7,2,3,5>: Cost 3 vsldoi12 RHS, <2,3,5,5>
+  2657899362U,	// <7,2,3,6>: Cost 3 vsldoi4 <6,7,2,3>, <6,7,2,3>
+  2319406574U,	// <7,2,3,7>: Cost 3 vmrglw <6,2,7,3>, <7,6,2,7>
+  1705428718U,	// <7,2,3,u>: Cost 2 vsldoi12 RHS, <2,3,u,1>
+  3713728614U,	// <7,2,4,0>: Cost 4 vsldoi4 <3,7,2,4>, LHS
+  3852912388U,	// <7,2,4,1>: Cost 4 vsldoi12 RHS, <2,4,1,5>
+  2779170573U,	// <7,2,4,2>: Cost 3 vsldoi12 RHS, <2,4,2,5>
+  1242349670U,	// <7,2,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+  3713731894U,	// <7,2,4,4>: Cost 4 vsldoi4 <3,7,2,4>, RHS
+  2779170601U,	// <7,2,4,5>: Cost 3 vsldoi12 RHS, <2,4,5,6>
+  2767669041U,	// <7,2,4,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,4,6,5>
+  3389834456U,	// <7,2,4,7>: Cost 4 vmrglw <5,6,7,4>, <1,6,2,7>
+  1242349675U,	// <7,2,4,u>: Cost 2 vmrglw <5,6,7,4>, LHS
+  3852912456U,	// <7,2,5,0>: Cost 4 vsldoi12 RHS, <2,5,0,1>
+  3852912466U,	// <7,2,5,1>: Cost 4 vsldoi12 RHS, <2,5,1,2>
+  3852912475U,	// <7,2,5,2>: Cost 4 vsldoi12 RHS, <2,5,2,2>
+  2779170664U,	// <7,2,5,3>: Cost 3 vsldoi12 RHS, <2,5,3,6>
+  3852912496U,	// <7,2,5,4>: Cost 4 vsldoi12 RHS, <2,5,4,5>
+  3792474116U,	// <7,2,5,5>: Cost 4 vsldoi8 <5,6,7,2>, <5,5,5,5>
+  2718732388U,	// <7,2,5,6>: Cost 3 vsldoi8 <5,6,7,2>, <5,6,7,2>
+  3841337228U,	// <7,2,5,7>: Cost 5 vsldoi12 <2,6,2,7>, <2,5,7,6>
+  2779170709U,	// <7,2,5,u>: Cost 3 vsldoi12 RHS, <2,5,u,6>
+  2640003174U,	// <7,2,6,0>: Cost 3 vsldoi4 <3,7,2,6>, LHS
+  2721386920U,	// <7,2,6,1>: Cost 3 vsldoi8 <6,1,7,2>, <6,1,7,2>
+  2767595441U,	// <7,2,6,2>: Cost 3 vsldoi12 <2,6,2,7>, <2,6,2,7>
+  1693927354U,	// <7,2,6,3>: Cost 2 vsldoi12 <2,6,3,7>, <2,6,3,7>
+  2640006454U,	// <7,2,6,4>: Cost 3 vsldoi4 <3,7,2,6>, RHS
+  3841558476U,	// <7,2,6,5>: Cost 4 vsldoi12 <2,6,5,7>, <2,6,5,7>
+  2657923941U,	// <7,2,6,6>: Cost 3 vsldoi4 <6,7,2,6>, <6,7,2,6>
+  3841337310U,	// <7,2,6,7>: Cost 4 vsldoi12 <2,6,2,7>, <2,6,7,7>
+  1694296039U,	// <7,2,6,u>: Cost 2 vsldoi12 <2,6,u,7>, <2,6,u,7>
+  2803058666U,	// <7,2,7,0>: Cost 3 vsldoi12 RHS, <2,7,0,1>
+  3852912632U,	// <7,2,7,1>: Cost 4 vsldoi12 RHS, <2,7,1,6>
+  2322089576U,	// <7,2,7,2>: Cost 3 vmrglw <6,6,7,7>, <2,2,2,2>
+  1248346214U,	// <7,2,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+  3841337362U,	// <7,2,7,4>: Cost 4 vsldoi12 <2,6,2,7>, <2,7,4,5>
+  3395830836U,	// <7,2,7,5>: Cost 4 vmrglw <6,6,7,7>, <1,4,2,5>
+  2261616570U,	// <7,2,7,6>: Cost 3 vmrghw <7,7,7,7>, <2,6,3,7>
+  3371943857U,	// <7,2,7,7>: Cost 4 vmrglw <2,6,7,7>, <2,6,2,7>
+  1248346219U,	// <7,2,7,u>: Cost 2 vmrglw <6,6,7,7>, LHS
+  1705429051U,	// <7,2,u,0>: Cost 2 vsldoi12 RHS, <2,u,0,1>
+  2779170884U,	// <7,2,u,1>: Cost 3 vsldoi12 RHS, <2,u,1,1>
+  1705428584U,	// <7,2,u,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+  1695254620U,	// <7,2,u,3>: Cost 2 vsldoi12 <2,u,3,7>, <2,u,3,7>
+  1705429091U,	// <7,2,u,4>: Cost 2 vsldoi12 RHS, <2,u,4,5>
+  2779170924U,	// <7,2,u,5>: Cost 3 vsldoi12 RHS, <2,u,5,5>
+  2767669361U,	// <7,2,u,6>: Cost 3 vsldoi12 <2,6,3,7>, <2,u,6,1>
+  2803058809U,	// <7,2,u,7>: Cost 3 vsldoi12 RHS, <2,u,7,0>
+  1695623305U,	// <7,2,u,u>: Cost 2 vsldoi12 <2,u,u,7>, <2,u,u,7>
+  2779170955U,	// <7,3,0,0>: Cost 3 vsldoi12 RHS, <3,0,0,0>
+  1705429142U,	// <7,3,0,1>: Cost 2 vsldoi12 RHS, <3,0,1,2>
+  2634057732U,	// <7,3,0,2>: Cost 3 vsldoi4 <2,7,3,0>, <2,7,3,0>
+  2779170983U,	// <7,3,0,3>: Cost 3 vsldoi12 RHS, <3,0,3,1>
+  2779170992U,	// <7,3,0,4>: Cost 3 vsldoi12 RHS, <3,0,4,1>
+  3852912829U,	// <7,3,0,5>: Cost 4 vsldoi12 RHS, <3,0,5,5>
+  2657948520U,	// <7,3,0,6>: Cost 3 vsldoi4 <6,7,3,0>, <6,7,3,0>
+  2316060602U,	// <7,3,0,7>: Cost 3 vmrglw <5,6,7,0>, <2,6,3,7>
+  1705429205U,	// <7,3,0,u>: Cost 2 vsldoi12 RHS, <3,0,u,2>
+  3852912860U,	// <7,3,1,0>: Cost 4 vsldoi12 RHS, <3,1,0,0>
+  2779171046U,	// <7,3,1,1>: Cost 3 vsldoi12 RHS, <3,1,1,1>
+  2779171057U,	// <7,3,1,2>: Cost 3 vsldoi12 RHS, <3,1,2,3>
+  3852912887U,	// <7,3,1,3>: Cost 4 vsldoi12 RHS, <3,1,3,0>
+  3852912896U,	// <7,3,1,4>: Cost 4 vsldoi12 RHS, <3,1,4,0>
+  3852912905U,	// <7,3,1,5>: Cost 4 vsldoi12 RHS, <3,1,5,0>
+  3835291923U,	// <7,3,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <3,1,6,1>
+  3841411356U,	// <7,3,1,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,1,7,1>
+  2779171111U,	// <7,3,1,u>: Cost 3 vsldoi12 RHS, <3,1,u,3>
+  2779171120U,	// <7,3,2,0>: Cost 3 vsldoi12 RHS, <3,2,0,3>
+  3852912952U,	// <7,3,2,1>: Cost 4 vsldoi12 RHS, <3,2,1,2>
+  2779171137U,	// <7,3,2,2>: Cost 3 vsldoi12 RHS, <3,2,2,2>
+  2779171144U,	// <7,3,2,3>: Cost 3 vsldoi12 RHS, <3,2,3,0>
+  2779171156U,	// <7,3,2,4>: Cost 3 vsldoi12 RHS, <3,2,4,3>
+  3852912989U,	// <7,3,2,5>: Cost 4 vsldoi12 RHS, <3,2,5,3>
+  2767669606U,	// <7,3,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,6,3>
+  2767669615U,	// <7,3,2,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,2,7,3>
+  2779171189U,	// <7,3,2,u>: Cost 3 vsldoi12 RHS, <3,2,u,0>
+  2779171198U,	// <7,3,3,0>: Cost 3 vsldoi12 RHS, <3,3,0,0>
+  3852913032U,	// <7,3,3,1>: Cost 4 vsldoi12 RHS, <3,3,1,1>
+  2704140655U,	// <7,3,3,2>: Cost 3 vsldoi8 <3,2,7,3>, <3,2,7,3>
+  1705429404U,	// <7,3,3,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+  2779171238U,	// <7,3,3,4>: Cost 3 vsldoi12 RHS, <3,3,4,4>
+  3852913070U,	// <7,3,3,5>: Cost 4 vsldoi12 RHS, <3,3,5,3>
+  2657973099U,	// <7,3,3,6>: Cost 3 vsldoi4 <6,7,3,3>, <6,7,3,3>
+  2767669700U,	// <7,3,3,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,3,7,7>
+  1705429404U,	// <7,3,3,u>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+  2779171280U,	// <7,3,4,0>: Cost 3 vsldoi12 RHS, <3,4,0,1>
+  2779171290U,	// <7,3,4,1>: Cost 3 vsldoi12 RHS, <3,4,1,2>
+  2634090504U,	// <7,3,4,2>: Cost 3 vsldoi4 <2,7,3,4>, <2,7,3,4>
+  2779171311U,	// <7,3,4,3>: Cost 3 vsldoi12 RHS, <3,4,3,5>
+  2779171319U,	// <7,3,4,4>: Cost 3 vsldoi12 RHS, <3,4,4,4>
+  1705429506U,	// <7,3,4,5>: Cost 2 vsldoi12 RHS, <3,4,5,6>
+  2722057593U,	// <7,3,4,6>: Cost 3 vsldoi8 <6,2,7,3>, <4,6,5,2>
+  2316093370U,	// <7,3,4,7>: Cost 3 vmrglw <5,6,7,4>, <2,6,3,7>
+  1705429533U,	// <7,3,4,u>: Cost 2 vsldoi12 RHS, <3,4,u,6>
+  3852913185U,	// <7,3,5,0>: Cost 4 vsldoi12 RHS, <3,5,0,1>
+  3795799695U,	// <7,3,5,1>: Cost 4 vsldoi8 <6,2,7,3>, <5,1,0,1>
+  3852913203U,	// <7,3,5,2>: Cost 4 vsldoi12 RHS, <3,5,2,1>
+  3852913214U,	// <7,3,5,3>: Cost 4 vsldoi12 RHS, <3,5,3,3>
+  3852913225U,	// <7,3,5,4>: Cost 4 vsldoi12 RHS, <3,5,4,5>
+  2779171410U,	// <7,3,5,5>: Cost 3 vsldoi12 RHS, <3,5,5,5>
+  2718740581U,	// <7,3,5,6>: Cost 3 vsldoi8 <5,6,7,3>, <5,6,7,3>
+  3841411685U,	// <7,3,5,7>: Cost 4 vsldoi12 <2,6,3,7>, <3,5,7,6>
+  2720067847U,	// <7,3,5,u>: Cost 3 vsldoi8 <5,u,7,3>, <5,u,7,3>
+  2773420664U,	// <7,3,6,0>: Cost 3 vsldoi12 <3,6,0,7>, <3,6,0,7>
+  3847236225U,	// <7,3,6,1>: Cost 4 vsldoi12 <3,6,1,7>, <3,6,1,7>
+  1648316922U,	// <7,3,6,2>: Cost 2 vsldoi8 <6,2,7,3>, <6,2,7,3>
+  2773641875U,	// <7,3,6,3>: Cost 3 vsldoi12 <3,6,3,7>, <3,6,3,7>
+  2773715612U,	// <7,3,6,4>: Cost 3 vsldoi12 <3,6,4,7>, <3,6,4,7>
+  3847531173U,	// <7,3,6,5>: Cost 4 vsldoi12 <3,6,5,7>, <3,6,5,7>
+  2722059024U,	// <7,3,6,6>: Cost 3 vsldoi8 <6,2,7,3>, <6,6,2,2>
+  2767669943U,	// <7,3,6,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,6,7,7>
+  1652298720U,	// <7,3,6,u>: Cost 2 vsldoi8 <6,u,7,3>, <6,u,7,3>
+  2767669955U,	// <7,3,7,0>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,0,1>
+  3841411788U,	// <7,3,7,1>: Cost 4 vsldoi12 <2,6,3,7>, <3,7,1,1>
+  2767669978U,	// <7,3,7,2>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,2,6>
+  2722059546U,	// <7,3,7,3>: Cost 3 vsldoi8 <6,2,7,3>, <7,3,6,2>
+  2767669995U,	// <7,3,7,4>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,4,5>
+  3852913396U,	// <7,3,7,5>: Cost 4 vsldoi12 RHS, <3,7,5,5>
+  2722059758U,	// <7,3,7,6>: Cost 3 vsldoi8 <6,2,7,3>, <7,6,2,7>
+  2302183354U,	// <7,3,7,7>: Cost 3 vmrglw <3,3,7,7>, <2,6,3,7>
+  2767670027U,	// <7,3,7,u>: Cost 3 vsldoi12 <2,6,3,7>, <3,7,u,1>
+  2774747930U,	// <7,3,u,0>: Cost 3 vsldoi12 <3,u,0,7>, <3,u,0,7>
+  1705429790U,	// <7,3,u,1>: Cost 2 vsldoi12 RHS, <3,u,1,2>
+  1660262316U,	// <7,3,u,2>: Cost 2 vsldoi8 <u,2,7,3>, <u,2,7,3>
+  1705429404U,	// <7,3,u,3>: Cost 2 vsldoi12 RHS, <3,3,3,3>
+  2775042878U,	// <7,3,u,4>: Cost 3 vsldoi12 <3,u,4,7>, <3,u,4,7>
+  1705429830U,	// <7,3,u,5>: Cost 2 vsldoi12 RHS, <3,u,5,6>
+  2779171660U,	// <7,3,u,6>: Cost 3 vsldoi12 RHS, <3,u,6,3>
+  2767670101U,	// <7,3,u,7>: Cost 3 vsldoi12 <2,6,3,7>, <3,u,7,3>
+  1705429853U,	// <7,3,u,u>: Cost 2 vsldoi12 RHS, <3,u,u,2>
+  2718744576U,	// <7,4,0,0>: Cost 3 vsldoi8 <5,6,7,4>, <0,0,0,0>
+  1645002854U,	// <7,4,0,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+  3852913527U,	// <7,4,0,2>: Cost 4 vsldoi12 RHS, <4,0,2,1>
+  3852913536U,	// <7,4,0,3>: Cost 4 vsldoi12 RHS, <4,0,3,1>
+  2316061904U,	// <7,4,0,4>: Cost 3 vmrglw <5,6,7,0>, <4,4,4,4>
+  1705429906U,	// <7,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+  2658022257U,	// <7,4,0,6>: Cost 3 vsldoi4 <6,7,4,0>, <6,7,4,0>
+  2256489928U,	// <7,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+  1707420589U,	// <7,4,0,u>: Cost 2 vsldoi12 RHS, <4,0,u,1>
+  3852913590U,	// <7,4,1,0>: Cost 4 vsldoi12 RHS, <4,1,0,1>
+  2718745396U,	// <7,4,1,1>: Cost 3 vsldoi8 <5,6,7,4>, <1,1,1,1>
+  2779171786U,	// <7,4,1,2>: Cost 3 vsldoi12 RHS, <4,1,2,3>
+  3852913616U,	// <7,4,1,3>: Cost 4 vsldoi12 RHS, <4,1,3,0>
+  3852913627U,	// <7,4,1,4>: Cost 4 vsldoi12 RHS, <4,1,4,2>
+  2779171810U,	// <7,4,1,5>: Cost 3 vsldoi12 RHS, <4,1,5,0>
+  3792487631U,	// <7,4,1,6>: Cost 4 vsldoi8 <5,6,7,4>, <1,6,1,7>
+  3394456220U,	// <7,4,1,7>: Cost 4 vmrglw <6,4,7,1>, <3,6,4,7>
+  2779171837U,	// <7,4,1,u>: Cost 3 vsldoi12 RHS, <4,1,u,0>
+  3852913673U,	// <7,4,2,0>: Cost 4 vsldoi12 RHS, <4,2,0,3>
+  3852913682U,	// <7,4,2,1>: Cost 4 vsldoi12 RHS, <4,2,1,3>
+  2718746216U,	// <7,4,2,2>: Cost 3 vsldoi8 <5,6,7,4>, <2,2,2,2>
+  2718746278U,	// <7,4,2,3>: Cost 3 vsldoi8 <5,6,7,4>, <2,3,0,1>
+  2779171885U,	// <7,4,2,4>: Cost 3 vsldoi12 RHS, <4,2,4,3>
+  2779171893U,	// <7,4,2,5>: Cost 3 vsldoi12 RHS, <4,2,5,2>
+  2718746554U,	// <7,4,2,6>: Cost 3 vsldoi8 <5,6,7,4>, <2,6,3,7>
+  3847457864U,	// <7,4,2,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,2,7,3>
+  2779171921U,	// <7,4,2,u>: Cost 3 vsldoi12 RHS, <4,2,u,3>
+  2718746774U,	// <7,4,3,0>: Cost 3 vsldoi8 <5,6,7,4>, <3,0,1,2>
+  3852913762U,	// <7,4,3,1>: Cost 4 vsldoi12 RHS, <4,3,1,2>
+  3852913772U,	// <7,4,3,2>: Cost 4 vsldoi12 RHS, <4,3,2,3>
+  2718747036U,	// <7,4,3,3>: Cost 3 vsldoi8 <5,6,7,4>, <3,3,3,3>
+  2718747138U,	// <7,4,3,4>: Cost 3 vsldoi8 <5,6,7,4>, <3,4,5,6>
+  2779171972U,	// <7,4,3,5>: Cost 3 vsldoi12 RHS, <4,3,5,0>
+  2706803380U,	// <7,4,3,6>: Cost 3 vsldoi8 <3,6,7,4>, <3,6,7,4>
+  3847457946U,	// <7,4,3,7>: Cost 4 vsldoi12 <3,6,4,7>, <4,3,7,4>
+  2781162655U,	// <7,4,3,u>: Cost 3 vsldoi12 RHS, <4,3,u,0>
+  2718747538U,	// <7,4,4,0>: Cost 3 vsldoi8 <5,6,7,4>, <4,0,5,1>
+  3852913842U,	// <7,4,4,1>: Cost 4 vsldoi12 RHS, <4,4,1,1>
+  3852913852U,	// <7,4,4,2>: Cost 4 vsldoi12 RHS, <4,4,2,2>
+  2316096696U,	// <7,4,4,3>: Cost 3 vmrglw <5,6,7,4>, <7,2,4,3>
+  1705430224U,	// <7,4,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+  1705430234U,	// <7,4,4,5>: Cost 2 vsldoi12 RHS, <4,4,5,5>
+  2658055029U,	// <7,4,4,6>: Cost 3 vsldoi4 <6,7,4,4>, <6,7,4,4>
+  2316097024U,	// <7,4,4,7>: Cost 3 vmrglw <5,6,7,4>, <7,6,4,7>
+  1707420917U,	// <7,4,4,u>: Cost 2 vsldoi12 RHS, <4,4,u,5>
+  1584316518U,	// <7,4,5,0>: Cost 2 vsldoi4 <6,7,4,5>, LHS
+  2658059060U,	// <7,4,5,1>: Cost 3 vsldoi4 <6,7,4,5>, <1,1,1,1>
+  2640144314U,	// <7,4,5,2>: Cost 3 vsldoi4 <3,7,4,5>, <2,6,3,7>
+  2640145131U,	// <7,4,5,3>: Cost 3 vsldoi4 <3,7,4,5>, <3,7,4,5>
+  1584319798U,	// <7,4,5,4>: Cost 2 vsldoi4 <6,7,4,5>, RHS
+  2779172134U,	// <7,4,5,5>: Cost 3 vsldoi12 RHS, <4,5,5,0>
+  631688502U,	// <7,4,5,6>: Cost 1 vsldoi12 RHS, RHS
+  2658063354U,	// <7,4,5,7>: Cost 3 vsldoi4 <6,7,4,5>, <7,0,1,2>
+  631688520U,	// <7,4,5,u>: Cost 1 vsldoi12 RHS, RHS
+  3852914001U,	// <7,4,6,0>: Cost 4 vsldoi12 RHS, <4,6,0,7>
+  3852914010U,	// <7,4,6,1>: Cost 4 vsldoi12 RHS, <4,6,1,7>
+  2718749178U,	// <7,4,6,2>: Cost 3 vsldoi8 <5,6,7,4>, <6,2,7,3>
+  2722730572U,	// <7,4,6,3>: Cost 3 vsldoi8 <6,3,7,4>, <6,3,7,4>
+  2723394205U,	// <7,4,6,4>: Cost 3 vsldoi8 <6,4,7,4>, <6,4,7,4>
+  2779172221U,	// <7,4,6,5>: Cost 3 vsldoi12 RHS, <4,6,5,6>
+  2718749496U,	// <7,4,6,6>: Cost 3 vsldoi8 <5,6,7,4>, <6,6,6,6>
+  2718749518U,	// <7,4,6,7>: Cost 3 vsldoi8 <5,6,7,4>, <6,7,0,1>
+  2779172249U,	// <7,4,6,u>: Cost 3 vsldoi12 RHS, <4,6,u,7>
+  2718749690U,	// <7,4,7,0>: Cost 3 vsldoi8 <5,6,7,4>, <7,0,1,2>
+  3847458214U,	// <7,4,7,1>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,1,2>
+  2718749880U,	// <7,4,7,2>: Cost 3 vsldoi8 <5,6,7,4>, <7,2,4,3>
+  3847458236U,	// <7,4,7,3>: Cost 4 vsldoi12 <3,6,4,7>, <4,7,3,6>
+  2718750004U,	// <7,4,7,4>: Cost 3 vsldoi8 <5,6,7,4>, <7,4,0,1>
+  1187876150U,	// <7,4,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+  2718750208U,	// <7,4,7,6>: Cost 3 vsldoi8 <5,6,7,4>, <7,6,4,7>
+  2718750286U,	// <7,4,7,7>: Cost 3 vsldoi8 <5,6,7,4>, <7,7,4,4>
+  1187876393U,	// <7,4,7,u>: Cost 2 vmrghw <7,7,7,7>, RHS
+  1584341094U,	// <7,4,u,0>: Cost 2 vsldoi4 <6,7,4,u>, LHS
+  1645008686U,	// <7,4,u,1>: Cost 2 vsldoi8 <5,6,7,4>, LHS
+  2640168890U,	// <7,4,u,2>: Cost 3 vsldoi4 <3,7,4,u>, <2,6,3,7>
+  2640169710U,	// <7,4,u,3>: Cost 3 vsldoi4 <3,7,4,u>, <3,7,4,u>
+  1584344374U,	// <7,4,u,4>: Cost 2 vsldoi4 <6,7,4,u>, RHS
+  1705430554U,	// <7,4,u,5>: Cost 2 vsldoi12 RHS, <4,u,5,1>
+  631688745U,	// <7,4,u,6>: Cost 1 vsldoi12 RHS, RHS
+  2718750976U,	// <7,4,u,7>: Cost 3 vsldoi8 <5,6,7,4>, <u,7,0,1>
+  631688763U,	// <7,4,u,u>: Cost 1 vsldoi12 RHS, RHS
+  2646147174U,	// <7,5,0,0>: Cost 3 vsldoi4 <4,7,5,0>, LHS
+  2779172424U,	// <7,5,0,1>: Cost 3 vsldoi12 RHS, <5,0,1,2>
+  3852914258U,	// <7,5,0,2>: Cost 4 vsldoi12 RHS, <5,0,2,3>
+  3852914268U,	// <7,5,0,3>: Cost 4 vsldoi12 RHS, <5,0,3,4>
+  2779172450U,	// <7,5,0,4>: Cost 3 vsldoi12 RHS, <5,0,4,1>
+  2316061914U,	// <7,5,0,5>: Cost 3 vmrglw <5,6,7,0>, <4,4,5,5>
+  2316061186U,	// <7,5,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,5,6>
+  2646152186U,	// <7,5,0,7>: Cost 3 vsldoi4 <4,7,5,0>, <7,0,1,2>
+  2779172486U,	// <7,5,0,u>: Cost 3 vsldoi12 RHS, <5,0,u,1>
+  2781163151U,	// <7,5,1,0>: Cost 3 vsldoi12 RHS, <5,1,0,1>
+  2321378194U,	// <7,5,1,1>: Cost 3 vmrglw <6,5,7,1>, <4,0,5,1>
+  3852914339U,	// <7,5,1,2>: Cost 4 vsldoi12 RHS, <5,1,2,3>
+  3852914350U,	// <7,5,1,3>: Cost 4 vsldoi12 RHS, <5,1,3,5>
+  2781163191U,	// <7,5,1,4>: Cost 3 vsldoi12 RHS, <5,1,4,5>
+  3852914363U,	// <7,5,1,5>: Cost 4 vsldoi12 RHS, <5,1,5,0>
+  3835588297U,	// <7,5,1,6>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,6,5>
+  3835588306U,	// <7,5,1,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,1,7,5>
+  2781163223U,	// <7,5,1,u>: Cost 3 vsldoi12 RHS, <5,1,u,1>
+  3852914400U,	// <7,5,2,0>: Cost 4 vsldoi12 RHS, <5,2,0,1>
+  2781163243U,	// <7,5,2,1>: Cost 3 vsldoi12 RHS, <5,2,1,3>
+  3852914419U,	// <7,5,2,2>: Cost 4 vsldoi12 RHS, <5,2,2,2>
+  2779172606U,	// <7,5,2,3>: Cost 3 vsldoi12 RHS, <5,2,3,4>
+  3780552497U,	// <7,5,2,4>: Cost 4 vsldoi8 <3,6,7,5>, <2,4,6,5>
+  2781163279U,	// <7,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+  2779172632U,	// <7,5,2,6>: Cost 3 vsldoi12 RHS, <5,2,6,3>
+  3835588385U,	// <7,5,2,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,2,7,3>
+  2779172650U,	// <7,5,2,u>: Cost 3 vsldoi12 RHS, <5,2,u,3>
+  3852914481U,	// <7,5,3,0>: Cost 4 vsldoi12 RHS, <5,3,0,1>
+  2319403922U,	// <7,5,3,1>: Cost 3 vmrglw <6,2,7,3>, <4,0,5,1>
+  2319404409U,	// <7,5,3,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+  3852914510U,	// <7,5,3,3>: Cost 4 vsldoi12 RHS, <5,3,3,3>
+  3779226131U,	// <7,5,3,4>: Cost 4 vsldoi8 <3,4,7,5>, <3,4,7,5>
+  2319404250U,	// <7,5,3,5>: Cost 3 vmrglw <6,2,7,3>, <4,4,5,5>
+  2319403522U,	// <7,5,3,6>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,6>
+  3852914547U,	// <7,5,3,7>: Cost 4 vsldoi12 RHS, <5,3,7,4>
+  2319403524U,	// <7,5,3,u>: Cost 3 vmrglw <6,2,7,3>, <3,4,5,u>
+  2646179942U,	// <7,5,4,0>: Cost 3 vsldoi4 <4,7,5,4>, LHS
+  2316094354U,	// <7,5,4,1>: Cost 3 vmrglw <5,6,7,4>, <4,0,5,1>
+  3852914582U,	// <7,5,4,2>: Cost 4 vsldoi12 RHS, <5,4,2,3>
+  3852914592U,	// <7,5,4,3>: Cost 4 vsldoi12 RHS, <5,4,3,4>
+  2646183372U,	// <7,5,4,4>: Cost 3 vsldoi4 <4,7,5,4>, <4,7,5,4>
+  2779172788U,	// <7,5,4,5>: Cost 3 vsldoi12 RHS, <5,4,5,6>
+  2316093954U,	// <7,5,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,5,6>
+  2646185318U,	// <7,5,4,7>: Cost 3 vsldoi4 <4,7,5,4>, <7,4,5,6>
+  2779172815U,	// <7,5,4,u>: Cost 3 vsldoi12 RHS, <5,4,u,6>
+  2781163475U,	// <7,5,5,0>: Cost 3 vsldoi12 RHS, <5,5,0,1>
+  2781163484U,	// <7,5,5,1>: Cost 3 vsldoi12 RHS, <5,5,1,1>
+  3852914662U,	// <7,5,5,2>: Cost 4 vsldoi12 RHS, <5,5,2,2>
+  3852914672U,	// <7,5,5,3>: Cost 4 vsldoi12 RHS, <5,5,3,3>
+  2781163515U,	// <7,5,5,4>: Cost 3 vsldoi12 RHS, <5,5,4,5>
+  1705431044U,	// <7,5,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+  2779172878U,	// <7,5,5,6>: Cost 3 vsldoi12 RHS, <5,5,6,6>
+  3835588632U,	// <7,5,5,7>: Cost 4 vsldoi12 <1,6,5,7>, <5,5,7,7>
+  1705431044U,	// <7,5,5,u>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+  2779172900U,	// <7,5,6,0>: Cost 3 vsldoi12 RHS, <5,6,0,1>
+  2781163571U,	// <7,5,6,1>: Cost 3 vsldoi12 RHS, <5,6,1,7>
+  3852914743U,	// <7,5,6,2>: Cost 4 vsldoi12 RHS, <5,6,2,2>
+  2779172930U,	// <7,5,6,3>: Cost 3 vsldoi12 RHS, <5,6,3,4>
+  2779172940U,	// <7,5,6,4>: Cost 3 vsldoi12 RHS, <5,6,4,5>
+  2781163607U,	// <7,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+  2779172960U,	// <7,5,6,6>: Cost 3 vsldoi12 RHS, <5,6,6,7>
+  1705431138U,	// <7,5,6,7>: Cost 2 vsldoi12 RHS, <5,6,7,0>
+  1705578603U,	// <7,5,6,u>: Cost 2 vsldoi12 RHS, <5,6,u,0>
+  2646204518U,	// <7,5,7,0>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+  2322090898U,	// <7,5,7,1>: Cost 3 vmrglw <6,6,7,7>, <4,0,5,1>
+  3719947880U,	// <7,5,7,2>: Cost 4 vsldoi4 <4,7,5,7>, <2,2,2,2>
+  3719948438U,	// <7,5,7,3>: Cost 4 vsldoi4 <4,7,5,7>, <3,0,1,2>
+  2646207951U,	// <7,5,7,4>: Cost 3 vsldoi4 <4,7,5,7>, <4,7,5,7>
+  2322091226U,	// <7,5,7,5>: Cost 3 vmrglw <6,6,7,7>, <4,4,5,5>
+  2322090498U,	// <7,5,7,6>: Cost 3 vmrglw <6,6,7,7>, <3,4,5,6>
+  2646210156U,	// <7,5,7,7>: Cost 3 vsldoi4 <4,7,5,7>, <7,7,7,7>
+  2646210350U,	// <7,5,7,u>: Cost 3 vsldoi4 <4,7,5,7>, LHS
+  2779173062U,	// <7,5,u,0>: Cost 3 vsldoi12 RHS, <5,u,0,1>
+  2779173072U,	// <7,5,u,1>: Cost 3 vsldoi12 RHS, <5,u,1,2>
+  2319404409U,	// <7,5,u,2>: Cost 3 vmrglw <6,2,7,3>, <4,6,5,2>
+  2779173092U,	// <7,5,u,3>: Cost 3 vsldoi12 RHS, <5,u,3,4>
+  2779173101U,	// <7,5,u,4>: Cost 3 vsldoi12 RHS, <5,u,4,4>
+  1705431044U,	// <7,5,u,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+  2779173118U,	// <7,5,u,6>: Cost 3 vsldoi12 RHS, <5,u,6,3>
+  1705578756U,	// <7,5,u,7>: Cost 2 vsldoi12 RHS, <5,u,7,0>
+  1707421965U,	// <7,5,u,u>: Cost 2 vsldoi12 RHS, <5,u,u,0>
+  3852914966U,	// <7,6,0,0>: Cost 4 vsldoi12 RHS, <6,0,0,0>
+  2779173153U,	// <7,6,0,1>: Cost 3 vsldoi12 RHS, <6,0,1,2>
+  2256491002U,	// <7,6,0,2>: Cost 3 vmrghw <7,0,1,2>, <6,2,7,3>
+  3852914994U,	// <7,6,0,3>: Cost 4 vsldoi12 RHS, <6,0,3,1>
+  3852915003U,	// <7,6,0,4>: Cost 4 vsldoi12 RHS, <6,0,4,1>
+  2316062652U,	// <7,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+  2316063544U,	// <7,6,0,6>: Cost 3 vmrglw <5,6,7,0>, <6,6,6,6>
+  1242320182U,	// <7,6,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+  1242320183U,	// <7,6,0,u>: Cost 2 vmrglw <5,6,7,0>, RHS
+  3852915048U,	// <7,6,1,0>: Cost 4 vsldoi12 RHS, <6,1,0,1>
+  3377866217U,	// <7,6,1,1>: Cost 4 vmrglw <3,6,7,1>, <2,0,6,1>
+  3852915068U,	// <7,6,1,2>: Cost 4 vsldoi12 RHS, <6,1,2,3>
+  3833672072U,	// <7,6,1,3>: Cost 5 vsldoi12 <1,3,6,7>, <6,1,3,6>
+  3852915088U,	// <7,6,1,4>: Cost 4 vsldoi12 RHS, <6,1,4,5>
+  3395122056U,	// <7,6,1,5>: Cost 4 vmrglw <6,5,7,1>, <6,7,6,5>
+  3389813560U,	// <7,6,1,6>: Cost 4 vmrglw <5,6,7,1>, <6,6,6,6>
+  2779173287U,	// <7,6,1,7>: Cost 3 vsldoi12 RHS, <6,1,7,1>
+  2779320752U,	// <7,6,1,u>: Cost 3 vsldoi12 RHS, <6,1,u,1>
+  2658181222U,	// <7,6,2,0>: Cost 3 vsldoi4 <6,7,6,2>, LHS
+  3852915140U,	// <7,6,2,1>: Cost 4 vsldoi12 RHS, <6,2,1,3>
+  2257973754U,	// <7,6,2,2>: Cost 3 vmrghw <7,2,3,3>, <6,2,7,3>
+  3841413589U,	// <7,6,2,3>: Cost 4 vsldoi12 <2,6,3,7>, <6,2,3,2>
+  2658184502U,	// <7,6,2,4>: Cost 3 vsldoi4 <6,7,6,2>, RHS
+  3852915176U,	// <7,6,2,5>: Cost 4 vsldoi12 RHS, <6,2,5,3>
+  2658186117U,	// <7,6,2,6>: Cost 3 vsldoi4 <6,7,6,2>, <6,7,6,2>
+  1705431546U,	// <7,6,2,7>: Cost 2 vsldoi12 RHS, <6,2,7,3>
+  1705579011U,	// <7,6,2,u>: Cost 2 vsldoi12 RHS, <6,2,u,3>
+  3714015334U,	// <7,6,3,0>: Cost 4 vsldoi4 <3,7,6,3>, LHS
+  3777243425U,	// <7,6,3,1>: Cost 4 vsldoi8 <3,1,7,6>, <3,1,7,6>
+  2319405957U,	// <7,6,3,2>: Cost 3 vmrglw <6,2,7,3>, <6,7,6,2>
+  3375229286U,	// <7,6,3,3>: Cost 4 vmrglw <3,2,7,3>, <3,2,6,3>
+  2779173426U,	// <7,6,3,4>: Cost 3 vsldoi12 RHS, <6,3,4,5>
+  3375228721U,	// <7,6,3,5>: Cost 4 vmrglw <3,2,7,3>, <2,4,6,5>
+  2319405880U,	// <7,6,3,6>: Cost 3 vmrglw <6,2,7,3>, <6,6,6,6>
+  1245662518U,	// <7,6,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+  1245662519U,	// <7,6,3,u>: Cost 2 vmrglw <6,2,7,3>, RHS
+  3852915291U,	// <7,6,4,0>: Cost 4 vsldoi12 RHS, <6,4,0,1>
+  3389834729U,	// <7,6,4,1>: Cost 4 vmrglw <5,6,7,4>, <2,0,6,1>
+  2259472890U,	// <7,6,4,2>: Cost 3 vmrghw <7,4,5,6>, <6,2,7,3>
+  3852915321U,	// <7,6,4,3>: Cost 4 vsldoi12 RHS, <6,4,3,4>
+  3852915330U,	// <7,6,4,4>: Cost 4 vsldoi12 RHS, <6,4,4,4>
+  2779173517U,	// <7,6,4,5>: Cost 3 vsldoi12 RHS, <6,4,5,6>
+  2316096312U,	// <7,6,4,6>: Cost 3 vmrglw <5,6,7,4>, <6,6,6,6>
+  1242352950U,	// <7,6,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+  1242352951U,	// <7,6,4,u>: Cost 2 vmrglw <5,6,7,4>, RHS
+  3852915372U,	// <7,6,5,0>: Cost 4 vsldoi12 RHS, <6,5,0,1>
+  3835294392U,	// <7,6,5,1>: Cost 5 vsldoi12 <1,6,1,7>, <6,5,1,4>
+  3852915395U,	// <7,6,5,2>: Cost 4 vsldoi12 RHS, <6,5,2,6>
+  3852915404U,	// <7,6,5,3>: Cost 4 vsldoi12 RHS, <6,5,3,6>
+  3852915412U,	// <7,6,5,4>: Cost 4 vsldoi12 RHS, <6,5,4,5>
+  3377899313U,	// <7,6,5,5>: Cost 4 vmrglw <3,6,7,5>, <2,4,6,5>
+  2718765160U,	// <7,6,5,6>: Cost 3 vsldoi8 <5,6,7,6>, <5,6,7,6>
+  2779173611U,	// <7,6,5,7>: Cost 3 vsldoi12 RHS, <6,5,7,1>
+  2779321076U,	// <7,6,5,u>: Cost 3 vsldoi12 RHS, <6,5,u,1>
+  2658213990U,	// <7,6,6,0>: Cost 3 vsldoi4 <6,7,6,6>, LHS
+  3852915462U,	// <7,6,6,1>: Cost 4 vsldoi12 RHS, <6,6,1,1>
+  2718765562U,	// <7,6,6,2>: Cost 3 vsldoi8 <5,6,7,6>, <6,2,7,3>
+  3714042622U,	// <7,6,6,3>: Cost 4 vsldoi4 <3,7,6,6>, <3,7,6,6>
+  2658217270U,	// <7,6,6,4>: Cost 3 vsldoi4 <6,7,6,6>, RHS
+  2724074224U,	// <7,6,6,5>: Cost 3 vsldoi8 <6,5,7,6>, <6,5,7,6>
+  1705431864U,	// <7,6,6,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+  1705431874U,	// <7,6,6,7>: Cost 2 vsldoi12 RHS, <6,6,7,7>
+  1705579339U,	// <7,6,6,u>: Cost 2 vsldoi12 RHS, <6,6,u,7>
+  1705431886U,	// <7,6,7,0>: Cost 2 vsldoi12 RHS, <6,7,0,1>
+  2779173719U,	// <7,6,7,1>: Cost 3 vsldoi12 RHS, <6,7,1,1>
+  2779173729U,	// <7,6,7,2>: Cost 3 vsldoi12 RHS, <6,7,2,2>
+  2779173736U,	// <7,6,7,3>: Cost 3 vsldoi12 RHS, <6,7,3,0>
+  1705431926U,	// <7,6,7,4>: Cost 2 vsldoi12 RHS, <6,7,4,5>
+  2779173759U,	// <7,6,7,5>: Cost 3 vsldoi12 RHS, <6,7,5,5>
+  2779173765U,	// <7,6,7,6>: Cost 3 vsldoi12 RHS, <6,7,6,2>
+  1248349494U,	// <7,6,7,7>: Cost 2 vmrglw <6,6,7,7>, RHS
+  1705431958U,	// <7,6,7,u>: Cost 2 vsldoi12 RHS, <6,7,u,1>
+  1705579423U,	// <7,6,u,0>: Cost 2 vsldoi12 RHS, <6,u,0,1>
+  2779173801U,	// <7,6,u,1>: Cost 3 vsldoi12 RHS, <6,u,1,2>
+  2779321266U,	// <7,6,u,2>: Cost 3 vsldoi12 RHS, <6,u,2,2>
+  2779321273U,	// <7,6,u,3>: Cost 3 vsldoi12 RHS, <6,u,3,0>
+  1705579463U,	// <7,6,u,4>: Cost 2 vsldoi12 RHS, <6,u,4,5>
+  2779173841U,	// <7,6,u,5>: Cost 3 vsldoi12 RHS, <6,u,5,6>
+  1705431864U,	// <7,6,u,6>: Cost 2 vsldoi12 RHS, <6,6,6,6>
+  1705432032U,	// <7,6,u,7>: Cost 2 vsldoi12 RHS, <6,u,7,3>
+  1705579495U,	// <7,6,u,u>: Cost 2 vsldoi12 RHS, <6,u,u,1>
+  1242320994U,	// <7,7,0,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+  1705432058U,	// <7,7,0,1>: Cost 2 vsldoi12 RHS, <7,0,1,2>
+  3841414146U,	// <7,7,0,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,0,2,1>
+  2316063226U,	// <7,7,0,3>: Cost 3 vmrglw <5,6,7,0>, <6,2,7,3>
+  2779173908U,	// <7,7,0,4>: Cost 3 vsldoi12 RHS, <7,0,4,1>
+  2658242658U,	// <7,7,0,5>: Cost 3 vsldoi4 <6,7,7,0>, <5,6,7,0>
+  2658243468U,	// <7,7,0,6>: Cost 3 vsldoi4 <6,7,7,0>, <6,7,7,0>
+  2316063554U,	// <7,7,0,7>: Cost 3 vmrglw <5,6,7,0>, <6,6,7,7>
+  1705432121U,	// <7,7,0,u>: Cost 2 vsldoi12 RHS, <7,0,u,2>
+  3852915777U,	// <7,7,1,0>: Cost 4 vsldoi12 RHS, <7,1,0,1>
+  2779173962U,	// <7,7,1,1>: Cost 3 vsldoi12 RHS, <7,1,1,1>
+  2779173973U,	// <7,7,1,2>: Cost 3 vsldoi12 RHS, <7,1,2,3>
+  3389813242U,	// <7,7,1,3>: Cost 4 vmrglw <5,6,7,1>, <6,2,7,3>
+  3852915813U,	// <7,7,1,4>: Cost 4 vsldoi12 RHS, <7,1,4,1>
+  3852915821U,	// <7,7,1,5>: Cost 4 vsldoi12 RHS, <7,1,5,0>
+  3835294839U,	// <7,7,1,6>: Cost 4 vsldoi12 <1,6,1,7>, <7,1,6,1>
+  2329343596U,	// <7,7,1,7>: Cost 3 vmrglw <7,u,7,1>, <7,7,7,7>
+  2779174027U,	// <7,7,1,u>: Cost 3 vsldoi12 RHS, <7,1,u,3>
+  2803061908U,	// <7,7,2,0>: Cost 3 vsldoi12 RHS, <7,2,0,3>
+  3852915869U,	// <7,7,2,1>: Cost 4 vsldoi12 RHS, <7,2,1,3>
+  2779174053U,	// <7,7,2,2>: Cost 3 vsldoi12 RHS, <7,2,2,2>
+  2779174060U,	// <7,7,2,3>: Cost 3 vsldoi12 RHS, <7,2,3,0>
+  2803061944U,	// <7,7,2,4>: Cost 3 vsldoi12 RHS, <7,2,4,3>
+  3852915905U,	// <7,7,2,5>: Cost 4 vsldoi12 RHS, <7,2,5,3>
+  2767672522U,	// <7,7,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <7,2,6,3>
+  2791855315U,	// <7,7,2,7>: Cost 3 vsldoi12 <6,6,7,7>, <7,2,7,3>
+  2768999644U,	// <7,7,2,u>: Cost 3 vsldoi12 <2,u,3,7>, <7,2,u,3>
+  2779174115U,	// <7,7,3,0>: Cost 3 vsldoi12 RHS, <7,3,0,1>
+  3852915948U,	// <7,7,3,1>: Cost 4 vsldoi12 RHS, <7,3,1,1>
+  3841414394U,	// <7,7,3,2>: Cost 4 vsldoi12 <2,6,3,7>, <7,3,2,6>
+  1245663738U,	// <7,7,3,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+  2779174155U,	// <7,7,3,4>: Cost 3 vsldoi12 RHS, <7,3,4,5>
+  3852915988U,	// <7,7,3,5>: Cost 4 vsldoi12 RHS, <7,3,5,5>
+  2706827959U,	// <7,7,3,6>: Cost 3 vsldoi8 <3,6,7,7>, <3,6,7,7>
+  2319405890U,	// <7,7,3,7>: Cost 3 vmrglw <6,2,7,3>, <6,6,7,7>
+  1245663738U,	// <7,7,3,u>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+  2779174200U,	// <7,7,4,0>: Cost 3 vsldoi12 RHS, <7,4,0,5>
+  3852916030U,	// <7,7,4,1>: Cost 4 vsldoi12 RHS, <7,4,1,2>
+  3714099130U,	// <7,7,4,2>: Cost 4 vsldoi4 <3,7,7,4>, <2,6,3,7>
+  2316095994U,	// <7,7,4,3>: Cost 3 vmrglw <5,6,7,4>, <6,2,7,3>
+  1242353766U,	// <7,7,4,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+  1705432422U,	// <7,7,4,5>: Cost 2 vsldoi12 RHS, <7,4,5,6>
+  2658276240U,	// <7,7,4,6>: Cost 3 vsldoi4 <6,7,7,4>, <6,7,7,4>
+  2316096322U,	// <7,7,4,7>: Cost 3 vmrglw <5,6,7,4>, <6,6,7,7>
+  1705432449U,	// <7,7,4,u>: Cost 2 vsldoi12 RHS, <7,4,u,6>
+  3852916101U,	// <7,7,5,0>: Cost 4 vsldoi12 RHS, <7,5,0,1>
+  3854906765U,	// <7,7,5,1>: Cost 4 vsldoi12 RHS, <7,5,1,0>
+  3852916121U,	// <7,7,5,2>: Cost 4 vsldoi12 RHS, <7,5,2,3>
+  3389846010U,	// <7,7,5,3>: Cost 4 vmrglw <5,6,7,5>, <6,2,7,3>
+  3852916141U,	// <7,7,5,4>: Cost 4 vsldoi12 RHS, <7,5,4,5>
+  2779174326U,	// <7,7,5,5>: Cost 3 vsldoi12 RHS, <7,5,5,5>
+  2779174337U,	// <7,7,5,6>: Cost 3 vsldoi12 RHS, <7,5,6,7>
+  2329376364U,	// <7,7,5,7>: Cost 3 vmrglw <7,u,7,5>, <7,7,7,7>
+  2779321811U,	// <7,7,5,u>: Cost 3 vsldoi12 RHS, <7,5,u,7>
+  2658287718U,	// <7,7,6,0>: Cost 3 vsldoi4 <6,7,7,6>, LHS
+  3852916197U,	// <7,7,6,1>: Cost 4 vsldoi12 RHS, <7,6,1,7>
+  2779174382U,	// <7,7,6,2>: Cost 3 vsldoi12 RHS, <7,6,2,7>
+  2316112378U,	// <7,7,6,3>: Cost 3 vmrglw <5,6,7,6>, <6,2,7,3>
+  2658290998U,	// <7,7,6,4>: Cost 3 vsldoi4 <6,7,7,6>, RHS
+  3852916233U,	// <7,7,6,5>: Cost 4 vsldoi12 RHS, <7,6,5,7>
+  1651004226U,	// <7,7,6,6>: Cost 2 vsldoi8 <6,6,7,7>, <6,6,7,7>
+  2779174420U,	// <7,7,6,7>: Cost 3 vsldoi12 RHS, <7,6,7,0>
+  1652331492U,	// <7,7,6,u>: Cost 2 vsldoi8 <6,u,7,7>, <6,u,7,7>
+  1590526054U,	// <7,7,7,0>: Cost 2 vsldoi4 <7,7,7,7>, LHS
+  2328728623U,	// <7,7,7,1>: Cost 3 vmrglw <7,7,7,7>, <7,0,7,1>
+  2724746451U,	// <7,7,7,2>: Cost 3 vsldoi8 <6,6,7,7>, <7,2,7,3>
+  2322092538U,	// <7,7,7,3>: Cost 3 vmrglw <6,6,7,7>, <6,2,7,3>
+  1590529334U,	// <7,7,7,4>: Cost 2 vsldoi4 <7,7,7,7>, RHS
+  2328728951U,	// <7,7,7,5>: Cost 3 vmrglw <7,7,7,7>, <7,4,7,5>
+  2724746770U,	// <7,7,7,6>: Cost 3 vsldoi8 <6,6,7,7>, <7,6,6,7>
+  430361910U,	// <7,7,7,7>: Cost 1 vspltisw3 RHS
+  430361910U,	// <7,7,7,u>: Cost 1 vspltisw3 RHS
+  1242320994U,	// <7,7,u,0>: Cost 2 vmrglw <5,6,7,0>, <5,6,7,0>
+  1705580162U,	// <7,7,u,1>: Cost 2 vsldoi12 RHS, <7,u,1,2>
+  2779321996U,	// <7,7,u,2>: Cost 3 vsldoi12 RHS, <7,u,2,3>
+  1245663738U,	// <7,7,u,3>: Cost 2 vmrglw <6,2,7,3>, <6,2,7,3>
+  1242353766U,	// <7,7,u,4>: Cost 2 vmrglw <5,6,7,4>, <5,6,7,4>
+  1705580202U,	// <7,7,u,5>: Cost 2 vsldoi12 RHS, <7,u,5,6>
+  1662949620U,	// <7,7,u,6>: Cost 2 vsldoi8 <u,6,7,7>, <u,6,7,7>
+  430361910U,	// <7,7,u,7>: Cost 1 vspltisw3 RHS
+  430361910U,	// <7,7,u,u>: Cost 1 vspltisw3 RHS
+  1705426944U,	// <7,u,0,0>: Cost 2 vsldoi12 RHS, <0,0,0,0>
+  1705432787U,	// <7,u,0,1>: Cost 2 vsldoi12 RHS, <u,0,1,2>
+  2316060885U,	// <7,u,0,2>: Cost 3 vmrglw <5,6,7,0>, <3,0,u,2>
+  1242316956U,	// <7,u,0,3>: Cost 2 vmrglw <5,6,7,0>, LHS
+  2779174637U,	// <7,u,0,4>: Cost 3 vsldoi12 RHS, <u,0,4,1>
+  1182750874U,	// <7,u,0,5>: Cost 2 vmrghw <7,0,1,2>, RHS
+  2316061213U,	// <7,u,0,6>: Cost 3 vmrglw <5,6,7,0>, <3,4,u,6>
+  1242320200U,	// <7,u,0,7>: Cost 2 vmrglw <5,6,7,0>, RHS
+  1705432850U,	// <7,u,0,u>: Cost 2 vsldoi12 RHS, <u,0,u,2>
+  1584578662U,	// <7,u,1,0>: Cost 2 vsldoi4 <6,7,u,1>, LHS
+  1705427764U,	// <7,u,1,1>: Cost 2 vsldoi12 RHS, <1,1,1,1>
+  631691054U,	// <7,u,1,2>: Cost 1 vsldoi12 RHS, LHS
+  2640407307U,	// <7,u,1,3>: Cost 3 vsldoi4 <3,7,u,1>, <3,7,u,1>
+  1584581942U,	// <7,u,1,4>: Cost 2 vsldoi4 <6,7,u,1>, RHS
+  2779174726U,	// <7,u,1,5>: Cost 3 vsldoi12 RHS, <u,1,5,0>
+  1584583574U,	// <7,u,1,6>: Cost 2 vsldoi4 <6,7,u,1>, <6,7,u,1>
+  2779322201U,	// <7,u,1,7>: Cost 3 vsldoi12 RHS, <u,1,7,1>
+  631691108U,	// <7,u,1,u>: Cost 1 vsldoi12 RHS, LHS
+  2779174763U,	// <7,u,2,0>: Cost 3 vsldoi12 RHS, <u,2,0,1>
+  2779174774U,	// <7,u,2,1>: Cost 3 vsldoi12 RHS, <u,2,1,3>
+  1705428584U,	// <7,u,2,2>: Cost 2 vsldoi12 RHS, <2,2,2,2>
+  1705432965U,	// <7,u,2,3>: Cost 2 vsldoi12 RHS, <u,2,3,0>
+  2779174801U,	// <7,u,2,4>: Cost 3 vsldoi12 RHS, <u,2,4,3>
+  2779174810U,	// <7,u,2,5>: Cost 3 vsldoi12 RHS, <u,2,5,3>
+  2767673251U,	// <7,u,2,6>: Cost 3 vsldoi12 <2,6,3,7>, <u,2,6,3>
+  1705580460U,	// <7,u,2,7>: Cost 2 vsldoi12 RHS, <u,2,7,3>
+  1705433010U,	// <7,u,2,u>: Cost 2 vsldoi12 RHS, <u,2,u,0>
+  1705433020U,	// <7,u,3,0>: Cost 2 vsldoi12 RHS, <u,3,0,1>
+  2779174853U,	// <7,u,3,1>: Cost 3 vsldoi12 RHS, <u,3,1,1>
+  2767673299U,	// <7,u,3,2>: Cost 3 vsldoi12 <2,6,3,7>, <u,3,2,6>
+  1245659292U,	// <7,u,3,3>: Cost 2 vmrglw <6,2,7,3>, LHS
+  1705433060U,	// <7,u,3,4>: Cost 2 vsldoi12 RHS, <u,3,4,5>
+  2779174893U,	// <7,u,3,5>: Cost 3 vsldoi12 RHS, <u,3,5,5>
+  2706836152U,	// <7,u,3,6>: Cost 3 vsldoi8 <3,6,7,u>, <3,6,7,u>
+  1245662536U,	// <7,u,3,7>: Cost 2 vmrglw <6,2,7,3>, RHS
+  1705433092U,	// <7,u,3,u>: Cost 2 vsldoi12 RHS, <u,3,u,1>
+  2779174925U,	// <7,u,4,0>: Cost 3 vsldoi12 RHS, <u,4,0,1>
+  1185732398U,	// <7,u,4,1>: Cost 2 vmrghw <7,4,5,6>, LHS
+  2316093653U,	// <7,u,4,2>: Cost 3 vmrglw <5,6,7,4>, <3,0,u,2>
+  1242349724U,	// <7,u,4,3>: Cost 2 vmrglw <5,6,7,4>, LHS
+  1705430224U,	// <7,u,4,4>: Cost 2 vsldoi12 RHS, <4,4,4,4>
+  1705433151U,	// <7,u,4,5>: Cost 2 vsldoi12 RHS, <u,4,5,6>
+  2316093981U,	// <7,u,4,6>: Cost 3 vmrglw <5,6,7,4>, <3,4,u,6>
+  1242352968U,	// <7,u,4,7>: Cost 2 vmrglw <5,6,7,4>, RHS
+  1705433178U,	// <7,u,4,u>: Cost 2 vsldoi12 RHS, <u,4,u,6>
+  1584611430U,	// <7,u,5,0>: Cost 2 vsldoi4 <6,7,u,5>, LHS
+  2781165670U,	// <7,u,5,1>: Cost 3 vsldoi12 RHS, <u,5,1,0>
+  2640439226U,	// <7,u,5,2>: Cost 3 vsldoi4 <3,7,u,5>, <2,6,3,7>
+  2640440079U,	// <7,u,5,3>: Cost 3 vsldoi4 <3,7,u,5>, <3,7,u,5>
+  1584614710U,	// <7,u,5,4>: Cost 2 vsldoi4 <6,7,u,5>, RHS
+  1705431044U,	// <7,u,5,5>: Cost 2 vsldoi12 RHS, <5,5,5,5>
+  631691418U,	// <7,u,5,6>: Cost 1 vsldoi12 RHS, RHS
+  2779322525U,	// <7,u,5,7>: Cost 3 vsldoi12 RHS, <u,5,7,1>
+  631691436U,	// <7,u,5,u>: Cost 1 vsldoi12 RHS, RHS
+  2779175087U,	// <7,u,6,0>: Cost 3 vsldoi12 RHS, <u,6,0,1>
+  2779175102U,	// <7,u,6,1>: Cost 3 vsldoi12 RHS, <u,6,1,7>
+  1648357887U,	// <7,u,6,2>: Cost 2 vsldoi8 <6,2,7,u>, <6,2,7,u>
+  1705433296U,	// <7,u,6,3>: Cost 2 vsldoi12 RHS, <u,6,3,7>
+  2779175127U,	// <7,u,6,4>: Cost 3 vsldoi12 RHS, <u,6,4,5>
+  2779175138U,	// <7,u,6,5>: Cost 3 vsldoi12 RHS, <u,6,5,7>
+  1651012419U,	// <7,u,6,6>: Cost 2 vsldoi8 <6,6,7,u>, <6,6,7,u>
+  1705580788U,	// <7,u,6,7>: Cost 2 vsldoi12 RHS, <u,6,7,7>
+  1705433341U,	// <7,u,6,u>: Cost 2 vsldoi12 RHS, <u,6,u,7>
+  1705580800U,	// <7,u,7,0>: Cost 2 vsldoi12 RHS, <u,7,0,1>
+  1187878702U,	// <7,u,7,1>: Cost 2 vmrghw <7,7,7,7>, LHS
+  2768042263U,	// <7,u,7,2>: Cost 3 vsldoi12 <2,6,u,7>, <u,7,2,6>
+  1248346268U,	// <7,u,7,3>: Cost 2 vmrglw <6,6,7,7>, LHS
+  1705580840U,	// <7,u,7,4>: Cost 2 vsldoi12 RHS, <u,7,4,5>
+  1187879066U,	// <7,u,7,5>: Cost 2 vmrghw <7,7,7,7>, RHS
+  2779322679U,	// <7,u,7,6>: Cost 3 vsldoi12 RHS, <u,7,6,2>
+  430361910U,	// <7,u,7,7>: Cost 1 vspltisw3 RHS
+  430361910U,	// <7,u,7,u>: Cost 1 vspltisw3 RHS
+  1705433425U,	// <7,u,u,0>: Cost 2 vsldoi12 RHS, <u,u,0,1>
+  1705433435U,	// <7,u,u,1>: Cost 2 vsldoi12 RHS, <u,u,1,2>
+  631691621U,	// <7,u,u,2>: Cost 1 vsldoi12 RHS, LHS
+  1705433451U,	// <7,u,u,3>: Cost 2 vsldoi12 RHS, <u,u,3,0>
+  1705433465U,	// <7,u,u,4>: Cost 2 vsldoi12 RHS, <u,u,4,5>
+  1705433475U,	// <7,u,u,5>: Cost 2 vsldoi12 RHS, <u,u,5,6>
+  631691661U,	// <7,u,u,6>: Cost 1 vsldoi12 RHS, RHS
+  430361910U,	// <7,u,u,7>: Cost 1 vspltisw3 RHS
+  631691675U,	// <7,u,u,u>: Cost 1 vsldoi12 RHS, LHS
+  202162278U,	// <u,0,0,0>: Cost 1 vspltisw0 LHS
+  1678598154U,	// <u,0,0,1>: Cost 2 vsldoi12 LHS, <0,0,1,1>
+  2634500154U,	// <u,0,0,2>: Cost 3 vsldoi4 <2,u,0,0>, <2,u,0,0>
+  2289596269U,	// <u,0,0,3>: Cost 3 vmrglw <1,2,u,0>, <u,2,0,3>
+  1548815670U,	// <u,0,0,4>: Cost 2 vsldoi4 <0,u,0,0>, RHS
+  2663698530U,	// <u,0,0,5>: Cost 3 vsldoi4 <7,7,0,0>, <5,6,7,0>
+  2658390942U,	// <u,0,0,6>: Cost 3 vsldoi4 <6,u,0,0>, <6,u,0,0>
+  2289596597U,	// <u,0,0,7>: Cost 3 vmrglw <1,2,u,0>, <u,6,0,7>
+  202162278U,	// <u,0,0,u>: Cost 1 vspltisw0 LHS
+  1560764518U,	// <u,0,1,0>: Cost 2 vsldoi4 <2,u,0,1>, LHS
+  115720294U,	// <u,0,1,1>: Cost 1 vmrghw LHS, LHS
+  604856427U,	// <u,0,1,2>: Cost 1 vsldoi12 LHS, LHS
+  2634508438U,	// <u,0,1,3>: Cost 3 vsldoi4 <2,u,0,1>, <3,0,1,2>
+  1560767798U,	// <u,0,1,4>: Cost 2 vsldoi4 <2,u,0,1>, RHS
+  2652426438U,	// <u,0,1,5>: Cost 3 vsldoi4 <5,u,0,1>, <5,u,0,1>
+  1584657311U,	// <u,0,1,6>: Cost 2 vsldoi4 <6,u,0,1>, <6,u,0,1>
+  2658399226U,	// <u,0,1,7>: Cost 3 vsldoi4 <6,u,0,1>, <7,0,1,2>
+  604856476U,	// <u,0,1,u>: Cost 1 vsldoi12 LHS, LHS
+  2696889850U,	// <u,0,2,0>: Cost 3 vsldoi8 <2,0,u,0>, <2,0,u,0>
+  1190174822U,	// <u,0,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+  2692245096U,	// <u,0,2,2>: Cost 3 vsldoi8 <1,2,u,0>, <2,2,2,2>
+  2692245158U,	// <u,0,2,3>: Cost 3 vsldoi8 <1,2,u,0>, <2,3,0,1>
+  2263916882U,	// <u,0,2,4>: Cost 3 vmrghw <u,2,3,0>, <0,4,1,5>
+  2299709908U,	// <u,0,2,5>: Cost 3 vmrglw <3,0,1,2>, <3,4,0,5>
+  2692245434U,	// <u,0,2,6>: Cost 3 vsldoi8 <1,2,u,0>, <2,6,3,7>
+  2701535281U,	// <u,0,2,7>: Cost 3 vsldoi8 <2,7,u,0>, <2,7,u,0>
+  1190175389U,	// <u,0,2,u>: Cost 2 vmrghw <u,2,3,0>, LHS
+  1209237504U,	// <u,0,3,0>: Cost 2 vmrglw LHS, <0,0,0,0>
+  1209239206U,	// <u,0,3,1>: Cost 2 vmrglw LHS, <2,3,0,1>
+  2704189813U,	// <u,0,3,2>: Cost 3 vsldoi8 <3,2,u,0>, <3,2,u,0>
+  2692245916U,	// <u,0,3,3>: Cost 3 vsldoi8 <1,2,u,0>, <3,3,3,3>
+  2282981033U,	// <u,0,3,4>: Cost 3 vmrglw LHS, <2,3,0,4>
+  2664386658U,	// <u,0,3,5>: Cost 3 vsldoi4 <7,u,0,3>, <5,6,7,0>
+  2691877496U,	// <u,0,3,6>: Cost 3 vsldoi8 <1,2,3,0>, <3,6,0,7>
+  2664388218U,	// <u,0,3,7>: Cost 3 vsldoi4 <7,u,0,3>, <7,u,0,3>
+  1209239213U,	// <u,0,3,u>: Cost 2 vmrglw LHS, <2,3,0,u>
+  2289623040U,	// <u,0,4,0>: Cost 3 vmrglw <1,2,u,4>, <0,0,0,0>
+  1678598482U,	// <u,0,4,1>: Cost 2 vsldoi12 LHS, <0,4,1,5>
+  2634532926U,	// <u,0,4,2>: Cost 3 vsldoi4 <2,u,0,4>, <2,u,0,4>
+  2235580672U,	// <u,0,4,3>: Cost 3 vmrghw <3,4,5,6>, <0,3,1,4>
+  1143619922U,	// <u,0,4,4>: Cost 2 vmrghw <0,4,1,5>, <0,4,1,5>
+  1618505014U,	// <u,0,4,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+  2658423714U,	// <u,0,4,6>: Cost 3 vsldoi4 <6,u,0,4>, <6,u,0,4>
+  2713259464U,	// <u,0,4,7>: Cost 3 vsldoi8 <4,7,5,0>, <4,7,5,0>
+  1683243409U,	// <u,0,4,u>: Cost 2 vsldoi12 LHS, <0,4,u,5>
+  1192443904U,	// <u,0,5,0>: Cost 2 vmrghw RHS, <0,0,0,0>
+  118702182U,	// <u,0,5,1>: Cost 1 vmrghw RHS, LHS
+  2266185901U,	// <u,0,5,2>: Cost 3 vmrghw RHS, <0,2,1,2>
+  2640513816U,	// <u,0,5,3>: Cost 3 vsldoi4 <3,u,0,5>, <3,u,0,5>
+  1192444242U,	// <u,0,5,4>: Cost 2 vmrghw RHS, <0,4,1,5>
+  2718789636U,	// <u,0,5,5>: Cost 3 vsldoi8 <5,6,u,0>, <5,5,5,5>
+  1645047915U,	// <u,0,5,6>: Cost 2 vsldoi8 <5,6,u,0>, <5,6,u,0>
+  2664404604U,	// <u,0,5,7>: Cost 3 vsldoi4 <7,u,0,5>, <7,u,0,5>
+  118702749U,	// <u,0,5,u>: Cost 1 vmrghw RHS, LHS
+  2302910464U,	// <u,0,6,0>: Cost 3 vmrglw <3,4,u,6>, <0,0,0,0>
+  1192886374U,	// <u,0,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+  2718790138U,	// <u,0,6,2>: Cost 3 vsldoi8 <5,6,u,0>, <6,2,7,3>
+  2722771537U,	// <u,0,6,3>: Cost 3 vsldoi8 <6,3,u,0>, <6,3,u,0>
+  2266628434U,	// <u,0,6,4>: Cost 3 vmrghw <u,6,3,7>, <0,4,1,5>
+  2248950180U,	// <u,0,6,5>: Cost 3 vmrghw <5,6,7,0>, <0,5,1,6>
+  2718790456U,	// <u,0,6,6>: Cost 3 vsldoi8 <5,6,u,0>, <6,6,6,6>
+  2718790478U,	// <u,0,6,7>: Cost 3 vsldoi8 <5,6,u,0>, <6,7,0,1>
+  1192886941U,	// <u,0,6,u>: Cost 2 vmrghw <u,6,3,7>, LHS
+  1235812352U,	// <u,0,7,0>: Cost 2 vmrglw RHS, <0,0,0,0>
+  1235814054U,	// <u,0,7,1>: Cost 2 vmrglw RHS, <2,3,0,1>
+  2728080601U,	// <u,0,7,2>: Cost 3 vsldoi8 <7,2,u,0>, <7,2,u,0>
+  2640530202U,	// <u,0,7,3>: Cost 3 vsldoi4 <3,u,0,7>, <3,u,0,7>
+  2640530742U,	// <u,0,7,4>: Cost 3 vsldoi4 <3,u,0,7>, RHS
+  2309556692U,	// <u,0,7,5>: Cost 3 vmrglw RHS, <3,4,0,5>
+  2730735133U,	// <u,0,7,6>: Cost 3 vsldoi8 <7,6,u,0>, <7,6,u,0>
+  2309556856U,	// <u,0,7,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+  1235814061U,	// <u,0,7,u>: Cost 2 vmrglw RHS, <2,3,0,u>
+  202162278U,	// <u,0,u,0>: Cost 1 vspltisw0 LHS
+  120365158U,	// <u,0,u,1>: Cost 1 vmrghw LHS, LHS
+  604856989U,	// <u,0,u,2>: Cost 1 vsldoi12 LHS, LHS
+  2692249532U,	// <u,0,u,3>: Cost 3 vsldoi8 <1,2,u,0>, <u,3,0,1>
+  1560825142U,	// <u,0,u,4>: Cost 2 vsldoi4 <2,u,0,u>, RHS
+  1618507930U,	// <u,0,u,5>: Cost 2 vsldoi8 <1,2,u,0>, RHS
+  1584714662U,	// <u,0,u,6>: Cost 2 vsldoi4 <6,u,0,u>, <6,u,0,u>
+  2309565048U,	// <u,0,u,7>: Cost 3 vmrglw RHS, <3,6,0,7>
+  604857043U,	// <u,0,u,u>: Cost 1 vsldoi12 LHS, LHS
+  1611210825U,	// <u,1,0,0>: Cost 2 vsldoi8 <0,0,u,1>, <0,0,u,1>
+  1616519270U,	// <u,1,0,1>: Cost 2 vsldoi8 <0,u,u,1>, LHS
+  2287605459U,	// <u,1,0,2>: Cost 3 vmrglw <0,u,u,0>, <u,0,1,2>
+  2640546588U,	// <u,1,0,3>: Cost 3 vsldoi4 <3,u,1,0>, <3,u,1,0>
+  2622631222U,	// <u,1,0,4>: Cost 3 vsldoi4 <0,u,1,0>, RHS
+  2289590610U,	// <u,1,0,5>: Cost 3 vmrglw <1,2,u,0>, <0,4,1,5>
+  2664436630U,	// <u,1,0,6>: Cost 3 vsldoi4 <7,u,1,0>, <6,7,u,1>
+  2664437376U,	// <u,1,0,7>: Cost 3 vsldoi4 <7,u,1,0>, <7,u,1,0>
+  1616519889U,	// <u,1,0,u>: Cost 2 vsldoi8 <0,u,u,1>, <0,u,u,1>
+  1548894866U,	// <u,1,1,0>: Cost 2 vsldoi4 <0,u,1,1>, <0,u,1,1>
+  269271142U,	// <u,1,1,1>: Cost 1 vspltisw1 LHS
+  1189462934U,	// <u,1,1,2>: Cost 2 vmrghw LHS, <1,2,3,0>
+  2622638230U,	// <u,1,1,3>: Cost 3 vsldoi4 <0,u,1,1>, <3,0,1,2>
+  1548897590U,	// <u,1,1,4>: Cost 2 vsldoi4 <0,u,1,1>, RHS
+  2756985692U,	// <u,1,1,5>: Cost 3 vsldoi12 LHS, <1,1,5,5>
+  2658472872U,	// <u,1,1,6>: Cost 3 vsldoi4 <6,u,1,1>, <6,u,1,1>
+  2287614142U,	// <u,1,1,7>: Cost 3 vmrglw <0,u,u,1>, <u,6,1,7>
+  269271142U,	// <u,1,1,u>: Cost 1 vspltisw1 LHS
+  1566818406U,	// <u,1,2,0>: Cost 2 vsldoi4 <3,u,1,2>, LHS
+  2756985735U,	// <u,1,2,1>: Cost 3 vsldoi12 LHS, <1,2,1,3>
+  1148371862U,	// <u,1,2,2>: Cost 2 vmrghw <1,2,3,0>, <1,2,3,0>
+  835584U,	// <u,1,2,3>: Cost 0 copy LHS
+  1566821686U,	// <u,1,2,4>: Cost 2 vsldoi4 <3,u,1,2>, RHS
+  2756985771U,	// <u,1,2,5>: Cost 3 vsldoi12 LHS, <1,2,5,3>
+  2690262970U,	// <u,1,2,6>: Cost 3 vsldoi8 <0,u,u,1>, <2,6,3,7>
+  1590711938U,	// <u,1,2,7>: Cost 2 vsldoi4 <7,u,1,2>, <7,u,1,2>
+  835584U,	// <u,1,2,u>: Cost 0 copy LHS
+  2282979337U,	// <u,1,3,0>: Cost 3 vmrglw LHS, <0,0,1,0>
+  1209237514U,	// <u,1,3,1>: Cost 2 vmrglw LHS, <0,0,1,1>
+  1209239702U,	// <u,1,3,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+  2282979502U,	// <u,1,3,3>: Cost 3 vmrglw LHS, <0,2,1,3>
+  2282979341U,	// <u,1,3,4>: Cost 3 vmrglw LHS, <0,0,1,4>
+  1209237842U,	// <u,1,3,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+  2282979505U,	// <u,1,3,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+  2287625423U,	// <u,1,3,7>: Cost 3 vmrglw LHS, <1,6,1,7>
+  1209237521U,	// <u,1,3,u>: Cost 2 vmrglw LHS, <0,0,1,u>
+  1635101613U,	// <u,1,4,0>: Cost 2 vsldoi8 <4,0,u,1>, <4,0,u,1>
+  2289623050U,	// <u,1,4,1>: Cost 3 vmrglw <1,2,u,4>, <0,0,1,1>
+  2289625238U,	// <u,1,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,1,2>
+  2640579360U,	// <u,1,4,3>: Cost 3 vsldoi4 <3,u,1,4>, <3,u,1,4>
+  2622663990U,	// <u,1,4,4>: Cost 3 vsldoi4 <0,u,1,4>, RHS
+  1616522550U,	// <u,1,4,5>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+  2664469398U,	// <u,1,4,6>: Cost 3 vsldoi4 <7,u,1,4>, <6,7,u,1>
+  2664470148U,	// <u,1,4,7>: Cost 3 vsldoi4 <7,u,1,4>, <7,u,1,4>
+  1616522793U,	// <u,1,4,u>: Cost 2 vsldoi8 <0,u,u,1>, RHS
+  1548927638U,	// <u,1,5,0>: Cost 2 vsldoi4 <0,u,1,5>, <0,u,1,5>
+  1192444724U,	// <u,1,5,1>: Cost 2 vmrghw RHS, <1,1,1,1>
+  1192444822U,	// <u,1,5,2>: Cost 2 vmrghw RHS, <1,2,3,0>
+  2622670998U,	// <u,1,5,3>: Cost 3 vsldoi4 <0,u,1,5>, <3,0,1,2>
+  1548930358U,	// <u,1,5,4>: Cost 2 vsldoi4 <0,u,1,5>, RHS
+  1210728786U,	// <u,1,5,5>: Cost 2 vmrglw <0,4,1,5>, <0,4,1,5>
+  2714153058U,	// <u,1,5,6>: Cost 3 vsldoi8 <4,u,u,1>, <5,6,7,0>
+  2670449658U,	// <u,1,5,7>: Cost 3 vsldoi4 <u,u,1,5>, <7,0,1,2>
+  1548932910U,	// <u,1,5,u>: Cost 2 vsldoi4 <0,u,1,5>, LHS
+  2622677655U,	// <u,1,6,0>: Cost 3 vsldoi4 <0,u,1,6>, <0,u,1,6>
+  2756986063U,	// <u,1,6,1>: Cost 3 vsldoi12 LHS, <1,6,1,7>
+  2302912662U,	// <u,1,6,2>: Cost 3 vmrglw <3,4,u,6>, <3,0,1,2>
+  3696421014U,	// <u,1,6,3>: Cost 4 vsldoi4 <0,u,1,6>, <3,0,1,2>
+  2622680374U,	// <u,1,6,4>: Cost 3 vsldoi4 <0,u,1,6>, RHS
+  2756986099U,	// <u,1,6,5>: Cost 3 vsldoi12 LHS, <1,6,5,7>
+  2714153784U,	// <u,1,6,6>: Cost 3 vsldoi8 <4,u,u,1>, <6,6,6,6>
+  1651692438U,	// <u,1,6,7>: Cost 2 vsldoi8 <6,7,u,1>, <6,7,u,1>
+  1652356071U,	// <u,1,6,u>: Cost 2 vsldoi8 <6,u,u,1>, <6,u,u,1>
+  2628657254U,	// <u,1,7,0>: Cost 3 vsldoi4 <1,u,1,7>, LHS
+  1235812362U,	// <u,1,7,1>: Cost 2 vmrglw RHS, <0,0,1,1>
+  1235814550U,	// <u,1,7,2>: Cost 2 vmrglw RHS, <3,0,1,2>
+  2309554350U,	// <u,1,7,3>: Cost 3 vmrglw RHS, <0,2,1,3>
+  2628660534U,	// <u,1,7,4>: Cost 3 vsldoi4 <1,u,1,7>, RHS
+  1235812690U,	// <u,1,7,5>: Cost 2 vmrglw RHS, <0,4,1,5>
+  2309554353U,	// <u,1,7,6>: Cost 3 vmrglw RHS, <0,2,1,6>
+  2309554678U,	// <u,1,7,7>: Cost 3 vmrglw RHS, <0,6,1,7>
+  1235812369U,	// <u,1,7,u>: Cost 2 vmrglw RHS, <0,0,1,u>
+  1548952217U,	// <u,1,u,0>: Cost 2 vsldoi4 <0,u,1,u>, <0,u,1,u>
+  269271142U,	// <u,1,u,1>: Cost 1 vspltisw1 LHS
+  1209280662U,	// <u,1,u,2>: Cost 2 vmrglw LHS, <3,0,1,2>
+  835584U,	// <u,1,u,3>: Cost 0 copy LHS
+  1548954934U,	// <u,1,u,4>: Cost 2 vsldoi4 <0,u,1,u>, RHS
+  1209278802U,	// <u,1,u,5>: Cost 2 vmrglw LHS, <0,4,1,5>
+  2283020465U,	// <u,1,u,6>: Cost 3 vmrglw LHS, <0,2,1,6>
+  1590761096U,	// <u,1,u,7>: Cost 2 vsldoi4 <7,u,1,u>, <7,u,1,u>
+  835584U,	// <u,1,u,u>: Cost 0 copy LHS
+  2702876672U,	// <u,2,0,0>: Cost 3 vsldoi8 <3,0,u,2>, <0,0,0,0>
+  1629134950U,	// <u,2,0,1>: Cost 2 vsldoi8 <3,0,u,2>, LHS
+  2289591912U,	// <u,2,0,2>: Cost 3 vmrglw <1,2,u,0>, <2,2,2,2>
+  1215848550U,	// <u,2,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+  2702877010U,	// <u,2,0,4>: Cost 3 vsldoi8 <3,0,u,2>, <0,4,1,5>
+  2289222708U,	// <u,2,0,5>: Cost 3 vmrglw <1,2,3,0>, <1,4,2,5>
+  2779178473U,	// <u,2,0,6>: Cost 3 vsldoi12 RHS, <2,0,6,1>
+  2726249024U,	// <u,2,0,7>: Cost 3 vsldoi8 <7,0,1,2>, <0,7,1,0>
+  1215848555U,	// <u,2,0,u>: Cost 2 vmrglw <1,2,u,0>, LHS
+  2690933539U,	// <u,2,1,0>: Cost 3 vsldoi8 <1,0,u,2>, <1,0,u,2>
+  2628683124U,	// <u,2,1,1>: Cost 3 vsldoi4 <1,u,2,1>, <1,u,2,1>
+  1189463656U,	// <u,2,1,2>: Cost 2 vmrghw LHS, <2,2,2,2>
+  1213866086U,	// <u,2,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+  2628685110U,	// <u,2,1,4>: Cost 3 vsldoi4 <1,u,2,1>, RHS
+  2263205736U,	// <u,2,1,5>: Cost 3 vmrghw LHS, <2,5,3,6>
+  1189463994U,	// <u,2,1,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+  2263205866U,	// <u,2,1,7>: Cost 3 vmrghw LHS, <2,7,0,1>
+  1213866091U,	// <u,2,1,u>: Cost 2 vmrglw <0,u,u,1>, LHS
+  1556938854U,	// <u,2,2,0>: Cost 2 vsldoi4 <2,2,2,2>, LHS
+  2697569869U,	// <u,2,2,1>: Cost 3 vsldoi8 <2,1,u,2>, <2,1,u,2>
+  336380006U,	// <u,2,2,2>: Cost 1 vspltisw2 LHS
+  1678599794U,	// <u,2,2,3>: Cost 2 vsldoi12 LHS, <2,2,3,3>
+  1556942134U,	// <u,2,2,4>: Cost 2 vsldoi4 <2,2,2,2>, RHS
+  2295138061U,	// <u,2,2,5>: Cost 3 vmrglw <2,2,2,2>, <2,4,2,5>
+  2702878650U,	// <u,2,2,6>: Cost 3 vsldoi8 <3,0,u,2>, <2,6,3,7>
+  2300229831U,	// <u,2,2,7>: Cost 3 vmrglw <3,0,u,2>, <u,6,2,7>
+  336380006U,	// <u,2,2,u>: Cost 1 vspltisw2 LHS
+  475243165U,	// <u,2,3,0>: Cost 1 vsldoi4 LHS, LHS
+  1548985140U,	// <u,2,3,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+  1209239144U,	// <u,2,3,2>: Cost 2 vmrglw LHS, <2,2,2,2>
+  135495782U,	// <u,2,3,3>: Cost 1 vmrglw LHS, LHS
+  475245878U,	// <u,2,3,4>: Cost 1 vsldoi4 LHS, RHS
+  1596764164U,	// <u,2,3,5>: Cost 2 vsldoi4 LHS, <5,5,5,5>
+  1596764666U,	// <u,2,3,6>: Cost 2 vsldoi4 LHS, <6,2,7,3>
+  1596765178U,	// <u,2,3,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+  135495787U,	// <u,2,3,u>: Cost 1 vmrglw LHS, LHS
+  2708851630U,	// <u,2,4,0>: Cost 3 vsldoi8 <4,0,u,2>, <4,0,u,2>
+  2217362979U,	// <u,2,4,1>: Cost 3 vmrghw <0,4,1,5>, <2,1,3,5>
+  2289624680U,	// <u,2,4,2>: Cost 3 vmrglw <1,2,u,4>, <2,2,2,2>
+  1215881318U,	// <u,2,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+  2726767824U,	// <u,2,4,4>: Cost 3 vsldoi8 <7,0,u,2>, <4,4,4,4>
+  1629138230U,	// <u,2,4,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+  2779178801U,	// <u,2,4,6>: Cost 3 vsldoi12 RHS, <2,4,6,5>
+  2726251976U,	// <u,2,4,7>: Cost 3 vsldoi8 <7,0,1,2>, <4,7,5,0>
+  1215881323U,	// <u,2,4,u>: Cost 2 vmrglw <1,2,u,4>, LHS
+  2628714598U,	// <u,2,5,0>: Cost 3 vsldoi4 <1,u,2,5>, LHS
+  2628715896U,	// <u,2,5,1>: Cost 3 vsldoi4 <1,u,2,5>, <1,u,2,5>
+  1192445544U,	// <u,2,5,2>: Cost 2 vmrghw RHS, <2,2,2,2>
+  1213898854U,	// <u,2,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+  2628717878U,	// <u,2,5,4>: Cost 3 vsldoi4 <1,u,2,5>, RHS
+  2726768644U,	// <u,2,5,5>: Cost 3 vsldoi8 <7,0,u,2>, <5,5,5,5>
+  1192445882U,	// <u,2,5,6>: Cost 2 vmrghw RHS, <2,6,3,7>
+  2266187754U,	// <u,2,5,7>: Cost 3 vmrghw RHS, <2,7,0,1>
+  1213898859U,	// <u,2,5,u>: Cost 2 vmrglw <0,u,u,5>, LHS
+  2634694758U,	// <u,2,6,0>: Cost 3 vsldoi4 <2,u,2,6>, LHS
+  2721460657U,	// <u,2,6,1>: Cost 3 vsldoi8 <6,1,u,2>, <6,1,u,2>
+  2296940136U,	// <u,2,6,2>: Cost 3 vmrglw <2,4,u,6>, <2,2,2,2>
+  1678600122U,	// <u,2,6,3>: Cost 2 vsldoi12 LHS, <2,6,3,7>
+  2634698038U,	// <u,2,6,4>: Cost 3 vsldoi4 <2,u,2,6>, RHS
+  3370682125U,	// <u,2,6,5>: Cost 4 vmrglw <2,4,u,6>, <2,4,2,5>
+  1157056442U,	// <u,2,6,6>: Cost 2 vmrghw <2,6,3,7>, <2,6,3,7>
+  2725442455U,	// <u,2,6,7>: Cost 3 vsldoi8 <6,7,u,2>, <6,7,u,2>
+  1678600167U,	// <u,2,6,u>: Cost 2 vsldoi12 LHS, <2,6,u,7>
+  1653027897U,	// <u,2,7,0>: Cost 2 vsldoi8 <7,0,u,2>, <7,0,u,2>
+  2309554924U,	// <u,2,7,1>: Cost 3 vmrglw RHS, <1,0,2,1>
+  1235813992U,	// <u,2,7,2>: Cost 2 vmrglw RHS, <2,2,2,2>
+  162070630U,	// <u,2,7,3>: Cost 1 vmrglw RHS, LHS
+  2634706230U,	// <u,2,7,4>: Cost 3 vsldoi4 <2,u,2,7>, RHS
+  2309555252U,	// <u,2,7,5>: Cost 3 vmrglw RHS, <1,4,2,5>
+  2309555901U,	// <u,2,7,6>: Cost 3 vmrglw RHS, <2,3,2,6>
+  2309555416U,	// <u,2,7,7>: Cost 3 vmrglw RHS, <1,6,2,7>
+  162070635U,	// <u,2,7,u>: Cost 1 vmrglw RHS, LHS
+  475284130U,	// <u,2,u,0>: Cost 1 vsldoi4 LHS, LHS
+  1549026100U,	// <u,2,u,1>: Cost 2 vsldoi4 LHS, <1,1,1,1>
+  336380006U,	// <u,2,u,2>: Cost 1 vspltisw2 LHS
+  135536742U,	// <u,2,u,3>: Cost 1 vmrglw LHS, LHS
+  475286838U,	// <u,2,u,4>: Cost 1 vsldoi4 LHS, RHS
+  1629141146U,	// <u,2,u,5>: Cost 2 vsldoi8 <3,0,u,2>, RHS
+  1194108858U,	// <u,2,u,6>: Cost 2 vmrghw LHS, <2,6,3,7>
+  1596806138U,	// <u,2,u,7>: Cost 2 vsldoi4 LHS, <7,0,1,2>
+  135536747U,	// <u,2,u,u>: Cost 1 vmrglw LHS, LHS
+  1611890688U,	// <u,3,0,0>: Cost 2 vsldoi8 LHS, <0,0,0,0>
+  538149020U,	// <u,3,0,1>: Cost 1 vsldoi8 LHS, LHS
+  2685632685U,	// <u,3,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+  2685632764U,	// <u,3,0,3>: Cost 3 vsldoi8 LHS, <0,3,1,0>
+  1611891026U,	// <u,3,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+  2733408722U,	// <u,3,0,5>: Cost 3 vsldoi8 LHS, <0,5,6,7>
+  2658612153U,	// <u,3,0,6>: Cost 3 vsldoi4 <6,u,3,0>, <6,u,3,0>
+  2289592250U,	// <u,3,0,7>: Cost 3 vmrglw <1,2,u,0>, <2,6,3,7>
+  538149533U,	// <u,3,0,u>: Cost 1 vsldoi8 LHS, LHS
+  1189464214U,	// <u,3,1,0>: Cost 2 vmrghw LHS, <3,0,1,2>
+  1611891508U,	// <u,3,1,1>: Cost 2 vsldoi8 LHS, <1,1,1,1>
+  1611891606U,	// <u,3,1,2>: Cost 2 vsldoi8 LHS, <1,2,3,0>
+  1189464476U,	// <u,3,1,3>: Cost 2 vmrghw LHS, <3,3,3,3>
+  1189464578U,	// <u,3,1,4>: Cost 2 vmrghw LHS, <3,4,5,6>
+  2690278511U,	// <u,3,1,5>: Cost 3 vsldoi8 LHS, <1,5,0,1>
+  2690278607U,	// <u,3,1,6>: Cost 3 vsldoi8 LHS, <1,6,1,7>
+  2287609786U,	// <u,3,1,7>: Cost 3 vmrglw <0,u,u,1>, <2,6,3,7>
+  1611892092U,	// <u,3,1,u>: Cost 2 vsldoi8 LHS, <1,u,3,0>
+  2685634042U,	// <u,3,2,0>: Cost 3 vsldoi8 LHS, <2,0,u,0>
+  2685634079U,	// <u,3,2,1>: Cost 3 vsldoi8 LHS, <2,1,3,1>
+  1611892328U,	// <u,3,2,2>: Cost 2 vsldoi8 LHS, <2,2,2,2>
+  1611892390U,	// <u,3,2,3>: Cost 2 vsldoi8 LHS, <2,3,0,1>
+  2685634371U,	// <u,3,2,4>: Cost 3 vsldoi8 LHS, <2,4,u,5>
+  2685634453U,	// <u,3,2,5>: Cost 3 vsldoi8 LHS, <2,5,u,6>
+  1611892666U,	// <u,3,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+  2300225466U,	// <u,3,2,7>: Cost 3 vmrglw <3,0,u,2>, <2,6,3,7>
+  1611892795U,	// <u,3,2,u>: Cost 2 vsldoi8 LHS, <2,u,0,1>
+  1209238422U,	// <u,3,3,0>: Cost 2 vmrglw LHS, <1,2,3,0>
+  2282980247U,	// <u,3,3,1>: Cost 3 vmrglw LHS, <1,2,3,1>
+  1561004120U,	// <u,3,3,2>: Cost 2 vsldoi4 <2,u,3,3>, <2,u,3,3>
+  403488870U,	// <u,3,3,3>: Cost 1 vspltisw3 LHS
+  1209238426U,	// <u,3,3,4>: Cost 2 vmrglw LHS, <1,2,3,4>
+  2282980899U,	// <u,3,3,5>: Cost 3 vmrglw LHS, <2,1,3,5>
+  2282985598U,	// <u,3,3,6>: Cost 3 vmrglw LHS, <u,5,3,6>
+  1209239482U,	// <u,3,3,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+  403488870U,	// <u,3,3,u>: Cost 1 vspltisw3 LHS
+  1555038310U,	// <u,3,4,0>: Cost 2 vsldoi4 <1,u,3,4>, LHS
+  1555039616U,	// <u,3,4,1>: Cost 2 vsldoi4 <1,u,3,4>, <1,u,3,4>
+  2628781672U,	// <u,3,4,2>: Cost 3 vsldoi4 <1,u,3,4>, <2,2,2,2>
+  2289624690U,	// <u,3,4,3>: Cost 3 vmrglw <1,2,u,4>, <2,2,3,3>
+  1555041590U,	// <u,3,4,4>: Cost 2 vsldoi4 <1,u,3,4>, RHS
+  538152246U,	// <u,3,4,5>: Cost 1 vsldoi8 LHS, RHS
+  2658644925U,	// <u,3,4,6>: Cost 3 vsldoi4 <6,u,3,4>, <6,u,3,4>
+  2289625018U,	// <u,3,4,7>: Cost 3 vmrglw <1,2,u,4>, <2,6,3,7>
+  538152489U,	// <u,3,4,u>: Cost 1 vsldoi8 LHS, RHS
+  1192446102U,	// <u,3,5,0>: Cost 2 vmrghw RHS, <3,0,1,2>
+  2733411983U,	// <u,3,5,1>: Cost 3 vsldoi8 LHS, <5,1,0,1>
+  2634762330U,	// <u,3,5,2>: Cost 3 vsldoi4 <2,u,3,5>, <2,u,3,5>
+  1192446364U,	// <u,3,5,3>: Cost 2 vmrghw RHS, <3,3,3,3>
+  1192446466U,	// <u,3,5,4>: Cost 2 vmrghw RHS, <3,4,5,6>
+  1659670532U,	// <u,3,5,5>: Cost 2 vsldoi8 LHS, <5,5,5,5>
+  1659670626U,	// <u,3,5,6>: Cost 2 vsldoi8 LHS, <5,6,7,0>
+  2287642554U,	// <u,3,5,7>: Cost 3 vmrglw <0,u,u,5>, <2,6,3,7>
+  1659670788U,	// <u,3,5,u>: Cost 2 vsldoi8 LHS, <5,u,7,0>
+  2634768486U,	// <u,3,6,0>: Cost 3 vsldoi4 <2,u,3,6>, LHS
+  2733412775U,	// <u,3,6,1>: Cost 3 vsldoi8 LHS, <6,1,7,1>
+  1648390659U,	// <u,3,6,2>: Cost 2 vsldoi8 <6,2,u,3>, <6,2,u,3>
+  2634770973U,	// <u,3,6,3>: Cost 3 vsldoi4 <2,u,3,6>, <3,4,u,6>
+  2634771766U,	// <u,3,6,4>: Cost 3 vsldoi4 <2,u,3,6>, RHS
+  2733413099U,	// <u,3,6,5>: Cost 3 vsldoi8 LHS, <6,5,7,1>
+  1659671352U,	// <u,3,6,6>: Cost 2 vsldoi8 LHS, <6,6,6,6>
+  1659671374U,	// <u,3,6,7>: Cost 2 vsldoi8 LHS, <6,7,0,1>
+  1652372457U,	// <u,3,6,u>: Cost 2 vsldoi8 <6,u,u,3>, <6,u,u,3>
+  1561034854U,	// <u,3,7,0>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+  2634777396U,	// <u,3,7,1>: Cost 3 vsldoi4 <2,u,3,7>, <1,1,1,1>
+  1561036892U,	// <u,3,7,2>: Cost 2 vsldoi4 <2,u,3,7>, <2,u,3,7>
+  1235814002U,	// <u,3,7,3>: Cost 2 vmrglw RHS, <2,2,3,3>
+  1561038134U,	// <u,3,7,4>: Cost 2 vsldoi4 <2,u,3,7>, RHS
+  2309555747U,	// <u,3,7,5>: Cost 3 vmrglw RHS, <2,1,3,5>
+  2309556072U,	// <u,3,7,6>: Cost 3 vmrglw RHS, <2,5,3,6>
+  1235814330U,	// <u,3,7,7>: Cost 2 vmrglw RHS, <2,6,3,7>
+  1561040686U,	// <u,3,7,u>: Cost 2 vsldoi4 <2,u,3,7>, LHS
+  1611896531U,	// <u,3,u,0>: Cost 2 vsldoi8 LHS, <u,0,1,2>
+  538154798U,	// <u,3,u,1>: Cost 1 vsldoi8 LHS, LHS
+  1611896712U,	// <u,3,u,2>: Cost 2 vsldoi8 LHS, <u,2,3,3>
+  403488870U,	// <u,3,u,3>: Cost 1 vspltisw3 LHS
+  1611896895U,	// <u,3,u,4>: Cost 2 vsldoi8 LHS, <u,4,5,6>
+  538155162U,	// <u,3,u,5>: Cost 1 vsldoi8 LHS, RHS
+  1611897040U,	// <u,3,u,6>: Cost 2 vsldoi8 LHS, <u,6,3,7>
+  1209280442U,	// <u,3,u,7>: Cost 2 vmrglw LHS, <2,6,3,7>
+  538155365U,	// <u,3,u,u>: Cost 1 vsldoi8 LHS, LHS
+  1165118354U,	// <u,4,0,0>: Cost 2 vmrghw <4,0,5,1>, <4,0,5,1>
+  1618534502U,	// <u,4,0,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+  2634795102U,	// <u,4,0,2>: Cost 3 vsldoi4 <2,u,4,0>, <2,u,4,0>
+  2686451968U,	// <u,4,0,3>: Cost 3 vsldoi8 <0,3,1,4>, <0,3,1,4>
+  2692276562U,	// <u,4,0,4>: Cost 3 vsldoi8 <1,2,u,4>, <0,4,1,5>
+  1705438098U,	// <u,4,0,5>: Cost 2 vsldoi12 RHS, <4,0,5,1>
+  2658685890U,	// <u,4,0,6>: Cost 3 vsldoi4 <6,u,4,0>, <6,u,4,0>
+  2256489928U,	// <u,4,0,7>: Cost 3 vmrghw <7,0,1,2>, <4,7,5,0>
+  1618535069U,	// <u,4,0,u>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+  1189464978U,	// <u,4,1,0>: Cost 2 vmrghw LHS, <4,0,5,1>
+  2692277044U,	// <u,4,1,1>: Cost 3 vsldoi8 <1,2,u,4>, <1,1,1,1>
+  1618535367U,	// <u,4,1,2>: Cost 2 vsldoi8 <1,2,u,4>, <1,2,u,4>
+  2640775992U,	// <u,4,1,3>: Cost 3 vsldoi4 <3,u,4,1>, <3,u,4,1>
+  1189465296U,	// <u,4,1,4>: Cost 2 vmrghw LHS, <4,4,4,4>
+  115723574U,	// <u,4,1,5>: Cost 1 vmrghw LHS, RHS
+  2263207289U,	// <u,4,1,6>: Cost 3 vmrghw LHS, <4,6,5,2>
+  2664666780U,	// <u,4,1,7>: Cost 3 vsldoi4 <7,u,4,1>, <7,u,4,1>
+  115723817U,	// <u,4,1,u>: Cost 1 vmrghw LHS, RHS
+  2263919506U,	// <u,4,2,0>: Cost 3 vmrghw <u,2,3,0>, <4,0,5,1>
+  2222115812U,	// <u,4,2,1>: Cost 3 vmrghw <1,2,3,0>, <4,1,5,2>
+  2692277864U,	// <u,4,2,2>: Cost 3 vsldoi8 <1,2,u,4>, <2,2,2,2>
+  2692277926U,	// <u,4,2,3>: Cost 3 vsldoi8 <1,2,u,4>, <2,3,0,1>
+  2324114640U,	// <u,4,2,4>: Cost 3 vmrglw <7,0,u,2>, <4,4,4,4>
+  1190178102U,	// <u,4,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+  2692278202U,	// <u,4,2,6>: Cost 3 vsldoi8 <1,2,u,4>, <2,6,3,7>
+  2701568053U,	// <u,4,2,7>: Cost 3 vsldoi8 <2,7,u,4>, <2,7,u,4>
+  1190178345U,	// <u,4,2,u>: Cost 2 vmrghw <u,2,3,0>, RHS
+  2692278422U,	// <u,4,3,0>: Cost 3 vsldoi8 <1,2,u,4>, <3,0,1,2>
+  2282981552U,	// <u,4,3,1>: Cost 3 vmrglw LHS, <3,0,4,1>
+  2704222585U,	// <u,4,3,2>: Cost 3 vsldoi8 <3,2,u,4>, <3,2,u,4>
+  2692278684U,	// <u,4,3,3>: Cost 3 vsldoi8 <1,2,u,4>, <3,3,3,3>
+  1257016528U,	// <u,4,3,4>: Cost 2 vmrglw LHS, <4,4,4,4>
+  1209239246U,	// <u,4,3,5>: Cost 2 vmrglw LHS, <2,3,4,5>
+  2691910300U,	// <u,4,3,6>: Cost 3 vsldoi8 <1,2,3,4>, <3,6,4,7>
+  2664683166U,	// <u,4,3,7>: Cost 3 vsldoi4 <7,u,4,3>, <7,u,4,3>
+  1209239249U,	// <u,4,3,u>: Cost 2 vmrglw LHS, <2,3,4,u>
+  1573027942U,	// <u,4,4,0>: Cost 2 vsldoi4 <4,u,4,4>, LHS
+  2634826695U,	// <u,4,4,1>: Cost 3 vsldoi4 <2,u,4,4>, <1,2,u,4>
+  2634827874U,	// <u,4,4,2>: Cost 3 vsldoi4 <2,u,4,4>, <2,u,4,4>
+  2289629073U,	// <u,4,4,3>: Cost 3 vmrglw <1,2,u,4>, <u,2,4,3>
+  229035318U,	// <u,4,4,4>: Cost 1 vspltisw0 RHS
+  1618537782U,	// <u,4,4,5>: Cost 2 vsldoi8 <1,2,u,4>, RHS
+  2658718662U,	// <u,4,4,6>: Cost 3 vsldoi4 <6,u,4,4>, <6,u,4,4>
+  2289629401U,	// <u,4,4,7>: Cost 3 vmrglw <1,2,u,4>, <u,6,4,7>
+  229035318U,	// <u,4,4,u>: Cost 1 vspltisw0 RHS
+  1561092198U,	// <u,4,5,0>: Cost 2 vsldoi4 <2,u,4,5>, LHS
+  2628863370U,	// <u,4,5,1>: Cost 3 vsldoi4 <1,u,4,5>, <1,u,4,5>
+  1561094243U,	// <u,4,5,2>: Cost 2 vsldoi4 <2,u,4,5>, <2,u,4,5>
+  2634836118U,	// <u,4,5,3>: Cost 3 vsldoi4 <2,u,4,5>, <3,0,1,2>
+  1561095478U,	// <u,4,5,4>: Cost 2 vsldoi4 <2,u,4,5>, RHS
+  118705462U,	// <u,4,5,5>: Cost 1 vmrghw RHS, RHS
+  604859702U,	// <u,4,5,6>: Cost 1 vsldoi12 LHS, RHS
+  2658726906U,	// <u,4,5,7>: Cost 3 vsldoi4 <6,u,4,5>, <7,0,1,2>
+  604859720U,	// <u,4,5,u>: Cost 1 vsldoi12 LHS, RHS
+  2266631058U,	// <u,4,6,0>: Cost 3 vmrghw <u,6,3,7>, <4,0,5,1>
+  2302692152U,	// <u,4,6,1>: Cost 3 vmrglw <3,4,5,6>, <3,u,4,1>
+  2718822906U,	// <u,4,6,2>: Cost 3 vsldoi8 <5,6,u,4>, <6,2,7,3>
+  2722804309U,	// <u,4,6,3>: Cost 3 vsldoi8 <6,3,u,4>, <6,3,u,4>
+  2723467942U,	// <u,4,6,4>: Cost 3 vsldoi8 <6,4,u,4>, <6,4,u,4>
+  1192889654U,	// <u,4,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+  2718823224U,	// <u,4,6,6>: Cost 3 vsldoi8 <5,6,u,4>, <6,6,6,6>
+  2718823246U,	// <u,4,6,7>: Cost 3 vsldoi8 <5,6,u,4>, <6,7,0,1>
+  1192889897U,	// <u,4,6,u>: Cost 2 vmrghw <u,6,3,7>, RHS
+  2640822374U,	// <u,4,7,0>: Cost 3 vsldoi4 <3,u,4,7>, LHS
+  2640823194U,	// <u,4,7,1>: Cost 3 vsldoi4 <3,u,4,7>, <1,2,3,4>
+  2728113373U,	// <u,4,7,2>: Cost 3 vsldoi8 <7,2,u,4>, <7,2,u,4>
+  2640825150U,	// <u,4,7,3>: Cost 3 vsldoi4 <3,u,4,7>, <3,u,4,7>
+  1235815632U,	// <u,4,7,4>: Cost 2 vmrglw RHS, <4,4,4,4>
+  1235814094U,	// <u,4,7,5>: Cost 2 vmrglw RHS, <2,3,4,5>
+  2730767905U,	// <u,4,7,6>: Cost 3 vsldoi8 <7,6,u,4>, <7,6,u,4>
+  2309556892U,	// <u,4,7,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+  1235814097U,	// <u,4,7,u>: Cost 2 vmrglw RHS, <2,3,4,u>
+  1561116774U,	// <u,4,u,0>: Cost 2 vsldoi4 <2,u,4,u>, LHS
+  1618540334U,	// <u,4,u,1>: Cost 2 vsldoi8 <1,2,u,4>, LHS
+  1561118822U,	// <u,4,u,2>: Cost 2 vsldoi4 <2,u,4,u>, <2,u,4,u>
+  2692282300U,	// <u,4,u,3>: Cost 3 vsldoi8 <1,2,u,4>, <u,3,0,1>
+  229035318U,	// <u,4,u,4>: Cost 1 vspltisw0 RHS
+  120368438U,	// <u,4,u,5>: Cost 1 vmrghw LHS, RHS
+  604859945U,	// <u,4,u,6>: Cost 1 vsldoi12 LHS, RHS
+  2309565084U,	// <u,4,u,7>: Cost 3 vmrglw RHS, <3,6,4,7>
+  604859963U,	// <u,4,u,u>: Cost 1 vsldoi12 LHS, RHS
+  2690293760U,	// <u,5,0,0>: Cost 3 vsldoi8 <0,u,u,5>, <0,0,0,0>
+  1616552038U,	// <u,5,0,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+  2640840434U,	// <u,5,0,2>: Cost 3 vsldoi4 <3,u,5,0>, <2,3,u,5>
+  2640841536U,	// <u,5,0,3>: Cost 3 vsldoi4 <3,u,5,0>, <3,u,5,0>
+  1613381970U,	// <u,5,0,4>: Cost 2 vsldoi8 <0,4,1,5>, <0,4,1,5>
+  2316135642U,	// <u,5,0,5>: Cost 3 vmrglw <5,6,u,0>, <4,4,5,5>
+  2289592834U,	// <u,5,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,5,6>
+  2664732324U,	// <u,5,0,7>: Cost 3 vsldoi4 <7,u,5,0>, <7,u,5,0>
+  1616552661U,	// <u,5,0,u>: Cost 2 vsldoi8 <0,u,u,5>, <0,u,u,5>
+  1573077094U,	// <u,5,1,0>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+  1237536282U,	// <u,5,1,1>: Cost 2 vmrglw <4,u,5,1>, <4,u,5,1>
+  2690294678U,	// <u,5,1,2>: Cost 3 vsldoi8 <0,u,u,5>, <1,2,3,0>
+  2646821014U,	// <u,5,1,3>: Cost 3 vsldoi4 <4,u,5,1>, <3,0,1,2>
+  1573080602U,	// <u,5,1,4>: Cost 2 vsldoi4 <4,u,5,1>, <4,u,5,1>
+  1189466116U,	// <u,5,1,5>: Cost 2 vmrghw LHS, <5,5,5,5>
+  1189466210U,	// <u,5,1,6>: Cost 2 vmrghw LHS, <5,6,7,0>
+  2646823930U,	// <u,5,1,7>: Cost 3 vsldoi4 <4,u,5,1>, <7,0,1,2>
+  1573082926U,	// <u,5,1,u>: Cost 2 vsldoi4 <4,u,5,1>, LHS
+  2640855142U,	// <u,5,2,0>: Cost 3 vsldoi4 <3,u,5,2>, LHS
+  2697594448U,	// <u,5,2,1>: Cost 3 vsldoi8 <2,1,u,5>, <2,1,u,5>
+  2690295400U,	// <u,5,2,2>: Cost 3 vsldoi8 <0,u,u,5>, <2,2,2,2>
+  1625179890U,	// <u,5,2,3>: Cost 2 vsldoi8 <2,3,u,5>, <2,3,u,5>
+  2699585347U,	// <u,5,2,4>: Cost 3 vsldoi8 <2,4,u,5>, <2,4,u,5>
+  2781171471U,	// <u,5,2,5>: Cost 3 vsldoi12 RHS, <5,2,5,3>
+  2690295738U,	// <u,5,2,6>: Cost 3 vsldoi8 <0,u,u,5>, <2,6,3,7>
+  3775318070U,	// <u,5,2,7>: Cost 4 vsldoi8 <2,7,u,5>, <2,7,u,5>
+  1628498055U,	// <u,5,2,u>: Cost 2 vsldoi8 <2,u,u,5>, <2,u,u,5>
+  2287627234U,	// <u,5,3,0>: Cost 3 vmrglw LHS, <4,1,5,0>
+  1257016210U,	// <u,5,3,1>: Cost 2 vmrglw LHS, <4,0,5,1>
+  2646836942U,	// <u,5,3,2>: Cost 3 vsldoi4 <4,u,5,3>, <2,3,4,5>
+  2287625131U,	// <u,5,3,3>: Cost 3 vmrglw LHS, <1,2,5,3>
+  2287627238U,	// <u,5,3,4>: Cost 3 vmrglw LHS, <4,1,5,4>
+  1257016538U,	// <u,5,3,5>: Cost 2 vmrglw LHS, <4,4,5,5>
+  1209240066U,	// <u,5,3,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+  2287625459U,	// <u,5,3,7>: Cost 3 vmrglw LHS, <1,6,5,7>
+  1209240068U,	// <u,5,3,u>: Cost 2 vmrglw LHS, <3,4,5,u>
+  2640871526U,	// <u,5,4,0>: Cost 3 vsldoi4 <3,u,5,4>, LHS
+  2316168082U,	// <u,5,4,1>: Cost 3 vmrglw <5,6,u,4>, <4,0,5,1>
+  2640873202U,	// <u,5,4,2>: Cost 3 vsldoi4 <3,u,5,4>, <2,3,u,5>
+  2640874308U,	// <u,5,4,3>: Cost 3 vsldoi4 <3,u,5,4>, <3,u,5,4>
+  1637788917U,	// <u,5,4,4>: Cost 2 vsldoi8 <4,4,u,5>, <4,4,u,5>
+  1616555318U,	// <u,5,4,5>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+  2287638591U,	// <u,5,4,6>: Cost 3 vmrglw <0,u,u,4>, <u,4,5,6>
+  2664765096U,	// <u,5,4,7>: Cost 3 vsldoi4 <7,u,5,4>, <7,u,5,4>
+  1616555561U,	// <u,5,4,u>: Cost 2 vsldoi8 <0,u,u,5>, RHS
+  1573109862U,	// <u,5,5,0>: Cost 2 vsldoi4 <4,u,5,5>, LHS
+  2646852404U,	// <u,5,5,1>: Cost 3 vsldoi4 <4,u,5,5>, <1,1,1,1>
+  2646853224U,	// <u,5,5,2>: Cost 3 vsldoi4 <4,u,5,5>, <2,2,2,2>
+  2287646618U,	// <u,5,5,3>: Cost 3 vmrglw <0,u,u,5>, <u,2,5,3>
+  1573113374U,	// <u,5,5,4>: Cost 2 vsldoi4 <4,u,5,5>, <4,u,5,5>
+  296144182U,	// <u,5,5,5>: Cost 1 vspltisw1 RHS
+  1192448098U,	// <u,5,5,6>: Cost 2 vmrghw RHS, <5,6,7,0>
+  2287646946U,	// <u,5,5,7>: Cost 3 vmrglw <0,u,u,5>, <u,6,5,7>
+  296144182U,	// <u,5,5,u>: Cost 1 vspltisw1 RHS
+  1567146086U,	// <u,5,6,0>: Cost 2 vsldoi4 <3,u,5,6>, LHS
+  2628945300U,	// <u,5,6,1>: Cost 3 vsldoi4 <1,u,5,6>, <1,u,5,6>
+  2634917997U,	// <u,5,6,2>: Cost 3 vsldoi4 <2,u,5,6>, <2,u,5,6>
+  1567148870U,	// <u,5,6,3>: Cost 2 vsldoi4 <3,u,5,6>, <3,u,5,6>
+  1567149366U,	// <u,5,6,4>: Cost 2 vsldoi4 <3,u,5,6>, RHS
+  2781171799U,	// <u,5,6,5>: Cost 3 vsldoi12 RHS, <5,6,5,7>
+  1228950018U,	// <u,5,6,6>: Cost 2 vmrglw <3,4,5,6>, <3,4,5,6>
+  27705344U,	// <u,5,6,7>: Cost 0 copy RHS
+  27705344U,	// <u,5,6,u>: Cost 0 copy RHS
+  2628952166U,	// <u,5,7,0>: Cost 3 vsldoi4 <1,u,5,7>, LHS
+  1235815314U,	// <u,5,7,1>: Cost 2 vmrglw RHS, <4,0,5,1>
+  2309556734U,	// <u,5,7,2>: Cost 3 vmrglw RHS, <3,4,5,2>
+  2309555115U,	// <u,5,7,3>: Cost 3 vmrglw RHS, <1,2,5,3>
+  2628955446U,	// <u,5,7,4>: Cost 3 vsldoi4 <1,u,5,7>, RHS
+  1235815642U,	// <u,5,7,5>: Cost 2 vmrglw RHS, <4,4,5,5>
+  1235814914U,	// <u,5,7,6>: Cost 2 vmrglw RHS, <3,4,5,6>
+  2309555443U,	// <u,5,7,7>: Cost 3 vmrglw RHS, <1,6,5,7>
+  1235814916U,	// <u,5,7,u>: Cost 2 vmrglw RHS, <3,4,5,u>
+  1567162470U,	// <u,5,u,0>: Cost 2 vsldoi4 <3,u,5,u>, LHS
+  1616557870U,	// <u,5,u,1>: Cost 2 vsldoi8 <0,u,u,5>, LHS
+  2690299781U,	// <u,5,u,2>: Cost 3 vsldoi8 <0,u,u,5>, <u,2,3,0>
+  1567165256U,	// <u,5,u,3>: Cost 2 vsldoi4 <3,u,5,u>, <3,u,5,u>
+  1567165750U,	// <u,5,u,4>: Cost 2 vsldoi4 <3,u,5,u>, RHS
+  296144182U,	// <u,5,u,5>: Cost 1 vspltisw1 RHS
+  1209281026U,	// <u,5,u,6>: Cost 2 vmrglw LHS, <3,4,5,6>
+  27705344U,	// <u,5,u,7>: Cost 0 copy RHS
+  27705344U,	// <u,5,u,u>: Cost 0 copy RHS
+  2705563648U,	// <u,6,0,0>: Cost 3 vsldoi8 <3,4,u,6>, <0,0,0,0>
+  1631821926U,	// <u,6,0,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+  2262462970U,	// <u,6,0,2>: Cost 3 vmrghw <u,0,1,2>, <6,2,7,3>
+  2646886941U,	// <u,6,0,3>: Cost 3 vsldoi4 <4,u,6,0>, <3,4,u,6>
+  2705563986U,	// <u,6,0,4>: Cost 3 vsldoi8 <3,4,u,6>, <0,4,1,5>
+  2316062652U,	// <u,6,0,5>: Cost 3 vmrglw <5,6,7,0>, <5,4,6,5>
+  2316137272U,	// <u,6,0,6>: Cost 3 vmrglw <5,6,u,0>, <6,6,6,6>
+  1215851830U,	// <u,6,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+  1215851831U,	// <u,6,0,u>: Cost 2 vmrglw <1,2,u,0>, RHS
+  2634948710U,	// <u,6,1,0>: Cost 3 vsldoi4 <2,u,6,1>, LHS
+  2705564468U,	// <u,6,1,1>: Cost 3 vsldoi8 <3,4,u,6>, <1,1,1,1>
+  1189466618U,	// <u,6,1,2>: Cost 2 vmrghw LHS, <6,2,7,3>
+  2263208498U,	// <u,6,1,3>: Cost 3 vmrghw LHS, <6,3,4,5>
+  2693620843U,	// <u,6,1,4>: Cost 3 vsldoi8 <1,4,u,6>, <1,4,u,6>
+  2652868860U,	// <u,6,1,5>: Cost 3 vsldoi4 <5,u,6,1>, <5,u,6,1>
+  1189466936U,	// <u,6,1,6>: Cost 2 vmrghw LHS, <6,6,6,6>
+  1213869366U,	// <u,6,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+  1213869367U,	// <u,6,1,u>: Cost 2 vmrglw <0,u,u,1>, RHS
+  2658844774U,	// <u,6,2,0>: Cost 3 vsldoi4 <6,u,6,2>, LHS
+  3771344465U,	// <u,6,2,1>: Cost 4 vsldoi8 <2,1,u,6>, <2,1,u,6>
+  1178554874U,	// <u,6,2,2>: Cost 2 vmrghw <6,2,7,3>, <6,2,7,3>
+  2698929907U,	// <u,6,2,3>: Cost 3 vsldoi8 <2,3,u,6>, <2,3,u,6>
+  2699593540U,	// <u,6,2,4>: Cost 3 vsldoi8 <2,4,u,6>, <2,4,u,6>
+  2700257173U,	// <u,6,2,5>: Cost 3 vsldoi8 <2,5,u,6>, <2,5,u,6>
+  2705565626U,	// <u,6,2,6>: Cost 3 vsldoi8 <3,4,u,6>, <2,6,3,7>
+  1226485046U,	// <u,6,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+  1226485047U,	// <u,6,2,u>: Cost 2 vmrglw <3,0,u,2>, RHS
+  2705565846U,	// <u,6,3,0>: Cost 3 vsldoi8 <3,4,u,6>, <3,0,1,2>
+  2330756585U,	// <u,6,3,1>: Cost 3 vmrglw LHS, <2,0,6,1>
+  2330756829U,	// <u,6,3,2>: Cost 3 vmrglw LHS, <2,3,6,2>
+  2282981734U,	// <u,6,3,3>: Cost 3 vmrglw LHS, <3,2,6,3>
+  1631824413U,	// <u,6,3,4>: Cost 2 vsldoi8 <3,4,u,6>, <3,4,u,6>
+  2652885246U,	// <u,6,3,5>: Cost 3 vsldoi4 <5,u,6,3>, <5,u,6,3>
+  1257018168U,	// <u,6,3,6>: Cost 2 vmrglw LHS, <6,6,6,6>
+  135499062U,	// <u,6,3,7>: Cost 1 vmrglw LHS, RHS
+  135499063U,	// <u,6,3,u>: Cost 1 vmrglw LHS, RHS
+  2646917222U,	// <u,6,4,0>: Cost 3 vsldoi4 <4,u,6,4>, LHS
+  2217365931U,	// <u,6,4,1>: Cost 3 vmrghw <0,4,1,5>, <6,1,7,5>
+  2790167156U,	// <u,6,4,2>: Cost 3 vsldoi12 <6,4,2,u>, <6,4,2,u>
+  2646919709U,	// <u,6,4,3>: Cost 3 vsldoi4 <4,u,6,4>, <3,4,u,6>
+  2711538934U,	// <u,6,4,4>: Cost 3 vsldoi8 <4,4,u,6>, <4,4,u,6>
+  1631825206U,	// <u,6,4,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+  2316170040U,	// <u,6,4,6>: Cost 3 vmrglw <5,6,u,4>, <6,6,6,6>
+  1215884598U,	// <u,6,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+  1215884599U,	// <u,6,4,u>: Cost 2 vmrglw <1,2,u,4>, RHS
+  2634981478U,	// <u,6,5,0>: Cost 3 vsldoi4 <2,u,6,5>, LHS
+  2266190247U,	// <u,6,5,1>: Cost 3 vmrghw RHS, <6,1,7,1>
+  1192448506U,	// <u,6,5,2>: Cost 2 vmrghw RHS, <6,2,7,3>
+  2266190386U,	// <u,6,5,3>: Cost 3 vmrghw RHS, <6,3,4,5>
+  2634984758U,	// <u,6,5,4>: Cost 3 vsldoi4 <2,u,6,5>, RHS
+  2652901632U,	// <u,6,5,5>: Cost 3 vsldoi4 <5,u,6,5>, <5,u,6,5>
+  1192448824U,	// <u,6,5,6>: Cost 2 vmrghw RHS, <6,6,6,6>
+  1213902134U,	// <u,6,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+  1213902135U,	// <u,6,5,u>: Cost 2 vmrglw <0,u,u,5>, RHS
+  1583808614U,	// <u,6,6,0>: Cost 2 vsldoi4 <6,6,6,6>, LHS
+  2322010445U,	// <u,6,6,1>: Cost 3 vmrglw <6,6,6,6>, <6,0,6,1>
+  2718839290U,	// <u,6,6,2>: Cost 3 vsldoi8 <5,6,u,6>, <6,2,7,3>
+  2670823965U,	// <u,6,6,3>: Cost 3 vsldoi4 <u,u,6,6>, <3,4,u,6>
+  1583811894U,	// <u,6,6,4>: Cost 2 vsldoi4 <6,6,6,6>, RHS
+  2724147961U,	// <u,6,6,5>: Cost 3 vsldoi8 <6,5,u,6>, <6,5,u,6>
+  363253046U,	// <u,6,6,6>: Cost 1 vspltisw2 RHS
+  1229172022U,	// <u,6,6,7>: Cost 2 vmrglw <3,4,u,6>, RHS
+  363253046U,	// <u,6,6,u>: Cost 1 vspltisw2 RHS
+  499458150U,	// <u,6,7,0>: Cost 1 vsldoi4 RHS, LHS
+  1573200692U,	// <u,6,7,1>: Cost 2 vsldoi4 RHS, <1,1,1,1>
+  1573201512U,	// <u,6,7,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+  1573202070U,	// <u,6,7,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+  499461673U,	// <u,6,7,4>: Cost 1 vsldoi4 RHS, RHS
+  1573203972U,	// <u,6,7,5>: Cost 2 vsldoi4 RHS, <5,5,5,5>
+  1235817272U,	// <u,6,7,6>: Cost 2 vmrglw RHS, <6,6,6,6>
+  162073910U,	// <u,6,7,7>: Cost 1 vmrglw RHS, RHS
+  162073911U,	// <u,6,7,u>: Cost 1 vmrglw RHS, RHS
+  499466342U,	// <u,6,u,0>: Cost 1 vsldoi4 RHS, LHS
+  1631827758U,	// <u,6,u,1>: Cost 2 vsldoi8 <3,4,u,6>, LHS
+  1573209704U,	// <u,6,u,2>: Cost 2 vsldoi4 RHS, <2,2,2,2>
+  1573210262U,	// <u,6,u,3>: Cost 2 vsldoi4 RHS, <3,0,1,2>
+  499469866U,	// <u,6,u,4>: Cost 1 vsldoi4 RHS, RHS
+  1631828122U,	// <u,6,u,5>: Cost 2 vsldoi8 <3,4,u,6>, RHS
+  363253046U,	// <u,6,u,6>: Cost 1 vspltisw2 RHS
+  135540022U,	// <u,6,u,7>: Cost 1 vmrglw LHS, RHS
+  135540023U,	// <u,6,u,u>: Cost 1 vmrglw LHS, RHS
+  1638465536U,	// <u,7,0,0>: Cost 2 vsldoi8 RHS, <0,0,0,0>
+  564723814U,	// <u,7,0,1>: Cost 1 vsldoi8 RHS, LHS
+  2712207533U,	// <u,7,0,2>: Cost 3 vsldoi8 RHS, <0,2,1,2>
+  2712207612U,	// <u,7,0,3>: Cost 3 vsldoi8 RHS, <0,3,1,0>
+  1638465874U,	// <u,7,0,4>: Cost 2 vsldoi8 RHS, <0,4,1,5>
+  1579192580U,	// <u,7,0,5>: Cost 2 vsldoi4 <5,u,7,0>, <5,u,7,0>
+  2712207862U,	// <u,7,0,6>: Cost 3 vsldoi8 RHS, <0,6,1,7>
+  2316137282U,	// <u,7,0,7>: Cost 3 vmrglw <5,6,u,0>, <6,6,7,7>
+  564724381U,	// <u,7,0,u>: Cost 1 vsldoi8 RHS, LHS
+  1189467130U,	// <u,7,1,0>: Cost 2 vmrghw LHS, <7,0,1,2>
+  1638466356U,	// <u,7,1,1>: Cost 2 vsldoi8 RHS, <1,1,1,1>
+  1638466454U,	// <u,7,1,2>: Cost 2 vsldoi8 RHS, <1,2,3,0>
+  2311500282U,	// <u,7,1,3>: Cost 3 vmrglw <4,u,u,1>, <6,2,7,3>
+  1189467494U,	// <u,7,1,4>: Cost 2 vmrghw LHS, <7,4,5,6>
+  2712208495U,	// <u,7,1,5>: Cost 3 vsldoi8 RHS, <1,5,0,1>
+  2694956302U,	// <u,7,1,6>: Cost 3 vsldoi8 <1,6,u,7>, <1,6,u,7>
+  1189467756U,	// <u,7,1,7>: Cost 2 vmrghw LHS, <7,7,7,7>
+  1638466940U,	// <u,7,1,u>: Cost 2 vsldoi8 RHS, <1,u,3,0>
+  2712208829U,	// <u,7,2,0>: Cost 3 vsldoi8 RHS, <2,0,1,2>
+  2712208927U,	// <u,7,2,1>: Cost 3 vsldoi8 RHS, <2,1,3,1>
+  1638467176U,	// <u,7,2,2>: Cost 2 vsldoi8 RHS, <2,2,2,2>
+  1638467238U,	// <u,7,2,3>: Cost 2 vsldoi8 RHS, <2,3,0,1>
+  2712209165U,	// <u,7,2,4>: Cost 3 vsldoi8 RHS, <2,4,2,5>
+  2712209256U,	// <u,7,2,5>: Cost 3 vsldoi8 RHS, <2,5,3,6>
+  1627187175U,	// <u,7,2,6>: Cost 2 vsldoi8 <2,6,u,7>, <2,6,u,7>
+  2324116290U,	// <u,7,2,7>: Cost 3 vmrglw <7,0,u,2>, <6,6,7,7>
+  1628514441U,	// <u,7,2,u>: Cost 2 vsldoi8 <2,u,u,7>, <2,u,u,7>
+  1638467734U,	// <u,7,3,0>: Cost 2 vsldoi8 RHS, <3,0,1,2>
+  2712209638U,	// <u,7,3,1>: Cost 3 vsldoi8 RHS, <3,1,1,1>
+  2700929387U,	// <u,7,3,2>: Cost 3 vsldoi8 <2,6,u,7>, <3,2,6,u>
+  1638467996U,	// <u,7,3,3>: Cost 2 vsldoi8 RHS, <3,3,3,3>
+  1638468098U,	// <u,7,3,4>: Cost 2 vsldoi8 RHS, <3,4,5,6>
+  2712210002U,	// <u,7,3,5>: Cost 3 vsldoi8 RHS, <3,5,5,5>
+  1585189856U,	// <u,7,3,6>: Cost 2 vsldoi4 <6,u,7,3>, <6,u,7,3>
+  1257018178U,	// <u,7,3,7>: Cost 2 vmrglw LHS, <6,6,7,7>
+  1638468382U,	// <u,7,3,u>: Cost 2 vsldoi8 RHS, <3,u,1,2>
+  1638468498U,	// <u,7,4,0>: Cost 2 vsldoi8 RHS, <4,0,5,1>
+  2712210378U,	// <u,7,4,1>: Cost 3 vsldoi8 RHS, <4,1,2,3>
+  2712210485U,	// <u,7,4,2>: Cost 3 vsldoi8 RHS, <4,2,5,2>
+  2712210564U,	// <u,7,4,3>: Cost 3 vsldoi8 RHS, <4,3,5,0>
+  1638468816U,	// <u,7,4,4>: Cost 2 vsldoi8 RHS, <4,4,4,4>
+  564727112U,	// <u,7,4,5>: Cost 1 vsldoi8 RHS, RHS
+  2712210809U,	// <u,7,4,6>: Cost 3 vsldoi8 RHS, <4,6,5,2>
+  2712210888U,	// <u,7,4,7>: Cost 3 vsldoi8 RHS, <4,7,5,0>
+  564727337U,	// <u,7,4,u>: Cost 1 vsldoi8 RHS, RHS
+  1192449018U,	// <u,7,5,0>: Cost 2 vmrghw RHS, <7,0,1,2>
+  2714201743U,	// <u,7,5,1>: Cost 3 vsldoi8 RHS, <5,1,0,1>
+  2712211198U,	// <u,7,5,2>: Cost 3 vsldoi8 RHS, <5,2,3,4>
+  2311533050U,	// <u,7,5,3>: Cost 3 vmrglw <4,u,u,5>, <6,2,7,3>
+  1192449382U,	// <u,7,5,4>: Cost 2 vmrghw RHS, <7,4,5,6>
+  1638469636U,	// <u,7,5,5>: Cost 2 vsldoi8 RHS, <5,5,5,5>
+  1638469730U,	// <u,7,5,6>: Cost 2 vsldoi8 RHS, <5,6,7,0>
+  1192449644U,	// <u,7,5,7>: Cost 2 vmrghw RHS, <7,7,7,7>
+  1638469892U,	// <u,7,5,u>: Cost 2 vsldoi8 RHS, <5,u,7,0>
+  2712211745U,	// <u,7,6,0>: Cost 3 vsldoi8 RHS, <6,0,1,2>
+  2712211879U,	// <u,7,6,1>: Cost 3 vsldoi8 RHS, <6,1,7,1>
+  1638470138U,	// <u,7,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+  2712212018U,	// <u,7,6,3>: Cost 3 vsldoi8 RHS, <6,3,4,5>
+  2712212109U,	// <u,7,6,4>: Cost 3 vsldoi8 RHS, <6,4,5,6>
+  2712212203U,	// <u,7,6,5>: Cost 3 vsldoi8 RHS, <6,5,7,1>
+  1638470456U,	// <u,7,6,6>: Cost 2 vsldoi8 RHS, <6,6,6,6>
+  1638470478U,	// <u,7,6,7>: Cost 2 vsldoi8 RHS, <6,7,0,1>
+  1638470559U,	// <u,7,6,u>: Cost 2 vsldoi8 RHS, <6,u,0,1>
+  1235816546U,	// <u,7,7,0>: Cost 2 vmrglw RHS, <5,6,7,0>
+  2309558371U,	// <u,7,7,1>: Cost 3 vmrglw RHS, <5,6,7,1>
+  2641045434U,	// <u,7,7,2>: Cost 3 vsldoi4 <3,u,7,7>, <2,6,3,7>
+  1235816954U,	// <u,7,7,3>: Cost 2 vmrglw RHS, <6,2,7,3>
+  1235816550U,	// <u,7,7,4>: Cost 2 vmrglw RHS, <5,6,7,4>
+  2309558375U,	// <u,7,7,5>: Cost 3 vmrglw RHS, <5,6,7,5>
+  1585222628U,	// <u,7,7,6>: Cost 2 vsldoi4 <6,u,7,7>, <6,u,7,7>
+  430361910U,	// <u,7,7,7>: Cost 1 vspltisw3 RHS
+  430361910U,	// <u,7,7,u>: Cost 1 vspltisw3 RHS
+  1638471379U,	// <u,7,u,0>: Cost 2 vsldoi8 RHS, <u,0,1,2>
+  564729646U,	// <u,7,u,1>: Cost 1 vsldoi8 RHS, LHS
+  1638471557U,	// <u,7,u,2>: Cost 2 vsldoi8 RHS, <u,2,3,0>
+  1638471612U,	// <u,7,u,3>: Cost 2 vsldoi8 RHS, <u,3,0,1>
+  1638471743U,	// <u,7,u,4>: Cost 2 vsldoi8 RHS, <u,4,5,6>
+  564730010U,	// <u,7,u,5>: Cost 1 vsldoi8 RHS, RHS
+  1638471888U,	// <u,7,u,6>: Cost 2 vsldoi8 RHS, <u,6,3,7>
+  430361910U,	// <u,7,u,7>: Cost 1 vspltisw3 RHS
+  564730213U,	// <u,7,u,u>: Cost 1 vsldoi8 RHS, LHS
+  202162278U,	// <u,u,0,0>: Cost 1 vspltisw0 LHS
+  538189985U,	// <u,u,0,1>: Cost 1 vsldoi8 LHS, LHS
+  2685673645U,	// <u,u,0,2>: Cost 3 vsldoi8 LHS, <0,2,1,2>
+  1215848604U,	// <u,u,0,3>: Cost 2 vmrglw <1,2,u,0>, LHS
+  1611931986U,	// <u,u,0,4>: Cost 2 vsldoi8 LHS, <0,4,1,5>
+  1579266317U,	// <u,u,0,5>: Cost 2 vsldoi4 <5,u,u,0>, <5,u,u,0>
+  2289592861U,	// <u,u,0,6>: Cost 3 vmrglw <1,2,u,0>, <3,4,u,6>
+  1215851848U,	// <u,u,0,7>: Cost 2 vmrglw <1,2,u,0>, RHS
+  538190493U,	// <u,u,0,u>: Cost 1 vsldoi8 LHS, LHS
+  1549411025U,	// <u,u,1,0>: Cost 2 vsldoi4 <0,u,u,1>, <0,u,u,1>
+  115726126U,	// <u,u,1,1>: Cost 1 vmrghw LHS, LHS
+  604862254U,	// <u,u,1,2>: Cost 1 vsldoi12 LHS, LHS
+  1213866140U,	// <u,u,1,3>: Cost 2 vmrglw <0,u,u,1>, LHS
+  1549413686U,	// <u,u,1,4>: Cost 2 vsldoi4 <0,u,u,1>, RHS
+  115726490U,	// <u,u,1,5>: Cost 1 vmrghw LHS, RHS
+  1585247207U,	// <u,u,1,6>: Cost 2 vsldoi4 <6,u,u,1>, <6,u,u,1>
+  1213869384U,	// <u,u,1,7>: Cost 2 vmrglw <0,u,u,1>, RHS
+  604862308U,	// <u,u,1,u>: Cost 1 vsldoi12 LHS, LHS
+  1567334502U,	// <u,u,2,0>: Cost 2 vsldoi4 <3,u,u,2>, LHS
+  1190180654U,	// <u,u,2,1>: Cost 2 vmrghw <u,2,3,0>, LHS
+  336380006U,	// <u,u,2,2>: Cost 1 vspltisw2 LHS
+  835584U,	// <u,u,2,3>: Cost 0 copy LHS
+  1567337782U,	// <u,u,2,4>: Cost 2 vsldoi4 <3,u,u,2>, RHS
+  1190181018U,	// <u,u,2,5>: Cost 2 vmrghw <u,2,3,0>, RHS
+  1611933626U,	// <u,u,2,6>: Cost 2 vsldoi8 LHS, <2,6,3,7>
+  1226485064U,	// <u,u,2,7>: Cost 2 vmrglw <3,0,u,2>, RHS
+  835584U,	// <u,u,2,u>: Cost 0 copy LHS
+  475685587U,	// <u,u,3,0>: Cost 1 vsldoi4 LHS, LHS
+  1209239278U,	// <u,u,3,1>: Cost 2 vmrglw LHS, <2,3,u,1>
+  1209239765U,	// <u,u,3,2>: Cost 2 vmrglw LHS, <3,0,u,2>
+  135495836U,	// <u,u,3,3>: Cost 1 vmrglw LHS, LHS
+  475688246U,	// <u,u,3,4>: Cost 1 vsldoi4 LHS, RHS
+  1209239282U,	// <u,u,3,5>: Cost 2 vmrglw LHS, <2,3,u,5>
+  1209240093U,	// <u,u,3,6>: Cost 2 vmrglw LHS, <3,4,u,6>
+  135499080U,	// <u,u,3,7>: Cost 1 vmrglw LHS, RHS
+  135495841U,	// <u,u,3,u>: Cost 1 vmrglw LHS, LHS
+  1555406950U,	// <u,u,4,0>: Cost 2 vsldoi4 <1,u,u,4>, LHS
+  1555408301U,	// <u,u,4,1>: Cost 2 vsldoi4 <1,u,u,4>, <1,u,u,4>
+  2289625301U,	// <u,u,4,2>: Cost 3 vmrglw <1,2,u,4>, <3,0,u,2>
+  1215881372U,	// <u,u,4,3>: Cost 2 vmrglw <1,2,u,4>, LHS
+  229035318U,	// <u,u,4,4>: Cost 1 vspltisw0 RHS
+  538193206U,	// <u,u,4,5>: Cost 1 vsldoi8 LHS, RHS
+  2289625629U,	// <u,u,4,6>: Cost 3 vmrglw <1,2,u,4>, <3,4,u,6>
+  1215884616U,	// <u,u,4,7>: Cost 2 vmrglw <1,2,u,4>, RHS
+  538193449U,	// <u,u,4,u>: Cost 1 vsldoi8 LHS, RHS
+  1549443797U,	// <u,u,5,0>: Cost 2 vsldoi4 <0,u,u,5>, <0,u,u,5>
+  118708014U,	// <u,u,5,1>: Cost 1 vmrghw RHS, LHS
+  1561389191U,	// <u,u,5,2>: Cost 2 vsldoi4 <2,u,u,5>, <2,u,u,5>
+  1213898908U,	// <u,u,5,3>: Cost 2 vmrglw <0,u,u,5>, LHS
+  1549446454U,	// <u,u,5,4>: Cost 2 vsldoi4 <0,u,u,5>, RHS
+  118708378U,	// <u,u,5,5>: Cost 1 vmrghw RHS, RHS
+  604862618U,	// <u,u,5,6>: Cost 1 vsldoi12 LHS, RHS
+  1213902152U,	// <u,u,5,7>: Cost 2 vmrglw <0,u,u,5>, RHS
+  604862636U,	// <u,u,5,u>: Cost 1 vsldoi12 LHS, RHS
+  1567367270U,	// <u,u,6,0>: Cost 2 vsldoi4 <3,u,u,6>, LHS
+  1192892206U,	// <u,u,6,1>: Cost 2 vmrghw <u,6,3,7>, LHS
+  1638478330U,	// <u,u,6,2>: Cost 2 vsldoi8 RHS, <6,2,7,3>
+  1679046864U,	// <u,u,6,3>: Cost 2 vsldoi12 LHS, <u,6,3,7>
+  1567370550U,	// <u,u,6,4>: Cost 2 vsldoi4 <3,u,u,6>, RHS
+  1192892570U,	// <u,u,6,5>: Cost 2 vmrghw <u,6,3,7>, RHS
+  363253046U,	// <u,u,6,6>: Cost 1 vspltisw2 RHS
+  27705344U,	// <u,u,6,7>: Cost 0 copy RHS
+  27705344U,	// <u,u,6,u>: Cost 0 copy RHS
+  499605606U,	// <u,u,7,0>: Cost 1 vsldoi4 RHS, LHS
+  1235812425U,	// <u,u,7,1>: Cost 2 vmrglw RHS, <0,0,u,1>
+  1561405577U,	// <u,u,7,2>: Cost 2 vsldoi4 <2,u,u,7>, <2,u,u,7>
+  162070684U,	// <u,u,7,3>: Cost 1 vmrglw RHS, LHS
+  499609147U,	// <u,u,7,4>: Cost 1 vsldoi4 RHS, RHS
+  1235812753U,	// <u,u,7,5>: Cost 2 vmrglw RHS, <0,4,u,5>
+  1235814941U,	// <u,u,7,6>: Cost 2 vmrglw RHS, <3,4,u,6>
+  162073928U,	// <u,u,7,7>: Cost 1 vmrglw RHS, RHS
+  162070689U,	// <u,u,7,u>: Cost 1 vmrglw RHS, LHS
+  475726552U,	// <u,u,u,0>: Cost 1 vsldoi4 LHS, LHS
+  538195758U,	// <u,u,u,1>: Cost 1 vsldoi8 LHS, LHS
+  604862821U,	// <u,u,u,2>: Cost 1 vsldoi12 LHS, LHS
+  835584U,	// <u,u,u,3>: Cost 0 copy LHS
+  475729206U,	// <u,u,u,4>: Cost 1 vsldoi4 LHS, RHS
+  538196122U,	// <u,u,u,5>: Cost 1 vsldoi8 LHS, RHS
+  604862861U,	// <u,u,u,6>: Cost 1 vsldoi12 LHS, RHS
+  27705344U,	// <u,u,u,7>: Cost 0 copy RHS
+  835584U,	// <u,u,u,u>: Cost 0 copy LHS
+  0
+};
diff --git a/final/lib/Target/PowerPC/PPCPredicates.cpp b/final/lib/Target/PowerPC/PPCPredicates.cpp
new file mode 100644
index 00000000000..12bb0a14340
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCPredicates.cpp
@@ -0,0 +1,31 @@
+//===-- PPCPredicates.cpp - PPC Branch Predicate Information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCPredicates.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+using namespace llvm;
+
+PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
+  switch (Opcode) {
+  default: llvm_unreachable("Unknown PPC branch opcode!");
+  case PPC::PRED_EQ: return PPC::PRED_NE;
+  case PPC::PRED_NE: return PPC::PRED_EQ;
+  case PPC::PRED_LT: return PPC::PRED_GE;
+  case PPC::PRED_GE: return PPC::PRED_LT;
+  case PPC::PRED_GT: return PPC::PRED_LE;
+  case PPC::PRED_LE: return PPC::PRED_GT;
+  case PPC::PRED_NU: return PPC::PRED_UN;
+  case PPC::PRED_UN: return PPC::PRED_NU;
+  }
+}
diff --git a/final/lib/Target/PowerPC/PPCPredicates.h b/final/lib/Target/PowerPC/PPCPredicates.h
new file mode 100644
index 00000000000..b2c831579f7
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCPredicates.h
@@ -0,0 +1,39 @@
+//===-- PPCPredicates.h - PPC Branch Predicate Information ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC branch predicates.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_POWERPC_PPCPREDICATES_H
+#define LLVM_TARGET_POWERPC_PPCPREDICATES_H
+
+#include "PPC.h"
+
+namespace llvm {
+namespace PPC {
+  /// Predicate - These are "(BI << 5) | BO"  for various predicates.
+  enum Predicate {
+    PRED_ALWAYS = (0 << 5) | 20,
+    PRED_LT     = (0 << 5) | 12,
+    PRED_LE     = (1 << 5) |  4,
+    PRED_EQ     = (2 << 5) | 12,
+    PRED_GE     = (0 << 5) |  4,
+    PRED_GT     = (1 << 5) | 12,
+    PRED_NE     = (2 << 5) |  4,
+    PRED_UN     = (3 << 5) | 12,
+    PRED_NU     = (3 << 5) |  4
+  };
+  
+  /// Invert the specified predicate.  != -> ==, < -> >=.
+  Predicate InvertPredicate(Predicate Opcode);
+}
+}
+
+#endif
diff --git a/final/lib/Target/PowerPC/PPCRegisterInfo.cpp b/final/lib/Target/PowerPC/PPCRegisterInfo.cpp
new file mode 100644
index 00000000000..45d8b6bb238
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -0,0 +1,694 @@
+//===- PPCRegisterInfo.cpp - PowerPC Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reginfo"
+#include "PPC.h"
+#include "PPCInstrBuilder.h"
+#include "PPCMachineFunctionInfo.h"
+#include "PPCRegisterInfo.h"
+#include "PPCFrameLowering.h"
+#include "PPCSubtarget.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cstdlib>
+
+// FIXME (64-bit): Eventually enable by default.
+namespace llvm {
+cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
+                                   cl::init(false),
+                                   cl::desc("Enable PPC32 register scavenger"),
+                                   cl::Hidden);
+cl::opt<bool> EnablePPC64RS("enable-ppc64-regscavenger",
+                                   cl::init(false),
+                                   cl::desc("Enable PPC64 register scavenger"),
+                                   cl::Hidden);
+}
+
+using namespace llvm;
+
+// FIXME (64-bit): Should be inlined.
+bool
+PPCRegisterInfo::requiresRegisterScavenging(const MachineFunction &) const {
+  return ((EnablePPC32RS && !Subtarget.isPPC64()) ||
+          (EnablePPC64RS && Subtarget.isPPC64()));
+}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// PPC::F14, return the number that it corresponds to (e.g. 14).
+unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+  using namespace PPC;
+  switch (RegEnum) {
+  case 0: return 0;
+  case R0 :  case X0 :  case F0 :  case V0 : case CR0:  case CR0LT: return  0;
+  case R1 :  case X1 :  case F1 :  case V1 : case CR1:  case CR0GT: return  1;
+  case R2 :  case X2 :  case F2 :  case V2 : case CR2:  case CR0EQ: return  2;
+  case R3 :  case X3 :  case F3 :  case V3 : case CR3:  case CR0UN: return  3;
+  case R4 :  case X4 :  case F4 :  case V4 : case CR4:  case CR1LT: return  4;
+  case R5 :  case X5 :  case F5 :  case V5 : case CR5:  case CR1GT: return  5;
+  case R6 :  case X6 :  case F6 :  case V6 : case CR6:  case CR1EQ: return  6;
+  case R7 :  case X7 :  case F7 :  case V7 : case CR7:  case CR1UN: return  7;
+  case R8 :  case X8 :  case F8 :  case V8 : case CR2LT: return  8;
+  case R9 :  case X9 :  case F9 :  case V9 : case CR2GT: return  9;
+  case R10:  case X10:  case F10:  case V10: case CR2EQ: return 10;
+  case R11:  case X11:  case F11:  case V11: case CR2UN: return 11;
+  case R12:  case X12:  case F12:  case V12: case CR3LT: return 12;
+  case R13:  case X13:  case F13:  case V13: case CR3GT: return 13;
+  case R14:  case X14:  case F14:  case V14: case CR3EQ: return 14;
+  case R15:  case X15:  case F15:  case V15: case CR3UN: return 15;
+  case R16:  case X16:  case F16:  case V16: case CR4LT: return 16;
+  case R17:  case X17:  case F17:  case V17: case CR4GT: return 17;
+  case R18:  case X18:  case F18:  case V18: case CR4EQ: return 18;
+  case R19:  case X19:  case F19:  case V19: case CR4UN: return 19;
+  case R20:  case X20:  case F20:  case V20: case CR5LT: return 20;
+  case R21:  case X21:  case F21:  case V21: case CR5GT: return 21;
+  case R22:  case X22:  case F22:  case V22: case CR5EQ: return 22;
+  case R23:  case X23:  case F23:  case V23: case CR5UN: return 23;
+  case R24:  case X24:  case F24:  case V24: case CR6LT: return 24;
+  case R25:  case X25:  case F25:  case V25: case CR6GT: return 25;
+  case R26:  case X26:  case F26:  case V26: case CR6EQ: return 26;
+  case R27:  case X27:  case F27:  case V27: case CR6UN: return 27;
+  case R28:  case X28:  case F28:  case V28: case CR7LT: return 28;
+  case R29:  case X29:  case F29:  case V29: case CR7GT: return 29;
+  case R30:  case X30:  case F30:  case V30: case CR7EQ: return 30;
+  case R31:  case X31:  case F31:  case V31: case CR7UN: return 31;
+  default:
+    llvm_unreachable("Unhandled reg in PPCRegisterInfo::getRegisterNumbering!");
+  }
+}
+
+PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
+                                 const TargetInstrInfo &tii)
+  : PPCGenRegisterInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+    Subtarget(ST), TII(tii) {
+  ImmToIdxMap[PPC::LD]   = PPC::LDX;    ImmToIdxMap[PPC::STD]  = PPC::STDX;
+  ImmToIdxMap[PPC::LBZ]  = PPC::LBZX;   ImmToIdxMap[PPC::STB]  = PPC::STBX;
+  ImmToIdxMap[PPC::LHZ]  = PPC::LHZX;   ImmToIdxMap[PPC::LHA]  = PPC::LHAX;
+  ImmToIdxMap[PPC::LWZ]  = PPC::LWZX;   ImmToIdxMap[PPC::LWA]  = PPC::LWAX;
+  ImmToIdxMap[PPC::LFS]  = PPC::LFSX;   ImmToIdxMap[PPC::LFD]  = PPC::LFDX;
+  ImmToIdxMap[PPC::STH]  = PPC::STHX;   ImmToIdxMap[PPC::STW]  = PPC::STWX;
+  ImmToIdxMap[PPC::STFS] = PPC::STFSX;  ImmToIdxMap[PPC::STFD] = PPC::STFDX;
+  ImmToIdxMap[PPC::ADDI] = PPC::ADD4;
+
+  // 64-bit
+  ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8;
+  ImmToIdxMap[PPC::LHZ8] = PPC::LHZX8; ImmToIdxMap[PPC::LWZ8] = PPC::LWZX8;
+  ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
+  ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
+  ImmToIdxMap[PPC::ADDI8] = PPC::ADD8; ImmToIdxMap[PPC::STD_32] = PPC::STDX_32;
+}
+
+/// getPointerRegClass - Return the register class to use to hold pointers.
+/// This is used for addressing modes.
+const TargetRegisterClass *
+PPCRegisterInfo::getPointerRegClass(unsigned Kind) const {
+  if (Subtarget.isPPC64())
+    return &PPC::G8RCRegClass;
+  return &PPC::GPRCRegClass;
+}
+
+const unsigned*
+PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  // 32-bit Darwin calling convention. 
+  static const unsigned Darwin32_CalleeSavedRegs[] = {
+              PPC::R13, PPC::R14, PPC::R15,
+    PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+    PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+    PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+    PPC::R28, PPC::R29, PPC::R30, PPC::R31,
+
+    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+    PPC::F30, PPC::F31,
+    
+    PPC::CR2, PPC::CR3, PPC::CR4,
+    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+    
+    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+    
+    PPC::LR,  0
+  };
+
+  // 32-bit SVR4 calling convention.
+  static const unsigned SVR4_CalleeSavedRegs[] = {
+                        PPC::R14, PPC::R15,
+    PPC::R16, PPC::R17, PPC::R18, PPC::R19,
+    PPC::R20, PPC::R21, PPC::R22, PPC::R23,
+    PPC::R24, PPC::R25, PPC::R26, PPC::R27,
+    PPC::R28, PPC::R29, PPC::R30, PPC::R31,
+
+    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+    PPC::F30, PPC::F31,
+    
+    PPC::CR2, PPC::CR3, PPC::CR4,
+    
+    PPC::VRSAVE,
+    
+    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+    
+    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+    
+    0
+  };
+  // 64-bit Darwin calling convention. 
+  static const unsigned Darwin64_CalleeSavedRegs[] = {
+    PPC::X14, PPC::X15,
+    PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+    PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+    PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+    PPC::X28, PPC::X29, PPC::X30, PPC::X31,
+    
+    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+    PPC::F30, PPC::F31,
+    
+    PPC::CR2, PPC::CR3, PPC::CR4,
+    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+    
+    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+    
+    PPC::LR8,  0
+  };
+
+  // 64-bit SVR4 calling convention.
+  static const unsigned SVR4_64_CalleeSavedRegs[] = {
+    PPC::X14, PPC::X15,
+    PPC::X16, PPC::X17, PPC::X18, PPC::X19,
+    PPC::X20, PPC::X21, PPC::X22, PPC::X23,
+    PPC::X24, PPC::X25, PPC::X26, PPC::X27,
+    PPC::X28, PPC::X29, PPC::X30, PPC::X31,
+
+    PPC::F14, PPC::F15, PPC::F16, PPC::F17,
+    PPC::F18, PPC::F19, PPC::F20, PPC::F21,
+    PPC::F22, PPC::F23, PPC::F24, PPC::F25,
+    PPC::F26, PPC::F27, PPC::F28, PPC::F29,
+    PPC::F30, PPC::F31,
+
+    PPC::CR2, PPC::CR3, PPC::CR4,
+
+    PPC::VRSAVE,
+
+    PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+    PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+    PPC::V28, PPC::V29, PPC::V30, PPC::V31,
+
+    PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN,
+    PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN,
+    PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN,
+
+    0
+  };
+  
+  if (Subtarget.isDarwinABI())
+    return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs :
+                                 Darwin32_CalleeSavedRegs;
+
+  return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs;
+}
+
+BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  const PPCFrameLowering *PPCFI =
+    static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+
+  Reserved.set(PPC::R0);
+  Reserved.set(PPC::R1);
+  Reserved.set(PPC::LR);
+  Reserved.set(PPC::LR8);
+  Reserved.set(PPC::RM);
+
+  // The SVR4 ABI reserves r2 and r13
+  if (Subtarget.isSVR4ABI()) {
+    Reserved.set(PPC::R2);  // System-reserved register
+    Reserved.set(PPC::R13); // Small Data Area pointer register
+  }
+  // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+  // when the stack frame is too big to address directly; we need two regs.
+  // This is a hack.
+  if (Subtarget.isDarwinABI()) {
+    Reserved.set(PPC::R2);
+  }
+  
+  // On PPC64, r13 is the thread pointer. Never allocate this register.
+  // Note that this is over conservative, as it also prevents allocation of R31
+  // when the FP is not needed.
+  if (Subtarget.isPPC64()) {
+    Reserved.set(PPC::R13);
+    Reserved.set(PPC::R31);
+
+    if (!requiresRegisterScavenging(MF))
+      Reserved.set(PPC::R0);    // FIXME (64-bit): Remove
+
+    Reserved.set(PPC::X0);
+    Reserved.set(PPC::X1);
+    Reserved.set(PPC::X13);
+    Reserved.set(PPC::X31);
+
+    // The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
+    if (Subtarget.isSVR4ABI()) {
+      Reserved.set(PPC::X2);
+    }
+    // Reserve R2 on Darwin to hack around the problem of save/restore of CR
+    // when the stack frame is too big to address directly; we need two regs.
+    // This is a hack.
+    if (Subtarget.isDarwinABI()) {
+      Reserved.set(PPC::X2);
+    }
+  }
+
+  if (PPCFI->needsFP(MF))
+    Reserved.set(PPC::R31);
+
+  return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+void PPCRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  if (GuaranteedTailCallOpt && I->getOpcode() == PPC::ADJCALLSTACKUP) {
+    // Add (actually subtract) back the amount the callee popped on return.
+    if (int CalleeAmt =  I->getOperand(1).getImm()) {
+      bool is64Bit = Subtarget.isPPC64();
+      CalleeAmt *= -1;
+      unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
+      unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
+      unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
+      unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
+      unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
+      unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
+      MachineInstr *MI = I;
+      DebugLoc dl = MI->getDebugLoc();
+
+      if (isInt<16>(CalleeAmt)) {
+        BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg).addReg(StackReg).
+          addImm(CalleeAmt);
+      } else {
+        MachineBasicBlock::iterator MBBI = I;
+        BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+          .addImm(CalleeAmt >> 16);
+        BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
+          .addReg(TmpReg, RegState::Kill)
+          .addImm(CalleeAmt & 0xFFFF);
+        BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
+          .addReg(StackReg)
+          .addReg(StackReg)
+          .addReg(TmpReg);
+      }
+    }
+  }
+  // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+  MBB.erase(I);
+}
+
+/// findScratchRegister - Find a 'free' PPC register. Try for a call-clobbered
+/// register first and then a spilled callee-saved register if that fails.
+static
+unsigned findScratchRegister(MachineBasicBlock::iterator II, RegScavenger *RS,
+                             const TargetRegisterClass *RC, int SPAdj) {
+  assert(RS && "Register scavenging must be on");
+  unsigned Reg = RS->FindUnusedReg(RC);
+  // FIXME: move ARM callee-saved reg scan to target independent code, then 
+  // search for already spilled CS register here.
+  if (Reg == 0)
+    Reg = RS->scavengeRegister(RC, II, SPAdj);
+  return Reg;
+}
+
+/// lowerDynamicAlloc - Generate the code for allocating an object in the
+/// current frame.  The sequence of code with be in the general form
+///
+///   addi   R0, SP, \#frameSize ; get the address of the previous frame
+///   stwxu  R0, SP, Rnegsize   ; add and update the SP with the negated size
+///   addi   Rnew, SP, \#maxCalFrameSize ; get the top of the allocation
+///
+void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
+                                        int SPAdj, RegScavenger *RS) const {
+  // Get the instruction.
+  MachineInstr &MI = *II;
+  // Get the instruction's basic block.
+  MachineBasicBlock &MBB = *MI.getParent();
+  // Get the basic block's function.
+  MachineFunction &MF = *MBB.getParent();
+  // Get the frame info.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  // Determine whether 64-bit pointers are used.
+  bool LP64 = Subtarget.isPPC64();
+  DebugLoc dl = MI.getDebugLoc();
+
+  // Get the maximum call stack size.
+  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+  // Get the total frame size.
+  unsigned FrameSize = MFI->getStackSize();
+  
+  // Get stack alignments.
+  unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+  unsigned MaxAlign = MFI->getMaxAlignment();
+  if (MaxAlign > TargetAlign)
+    report_fatal_error("Dynamic alloca with large aligns not supported");
+
+  // Determine the previous frame's address.  If FrameSize can't be
+  // represented as 16 bits or we need special alignment, then we load the
+  // previous frame's address from 0(SP).  Why not do an addis of the hi? 
+  // Because R0 is our only safe tmp register and addi/addis treat R0 as zero. 
+  // Constructing the constant and adding would take 3 instructions. 
+  // Fortunately, a frame greater than 32K is rare.
+  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+  const TargetRegisterClass *RC = LP64 ? G8RC : GPRC;
+
+  // FIXME (64-bit): Use "findScratchRegister"
+  unsigned Reg;
+  if (requiresRegisterScavenging(MF))
+    Reg = findScratchRegister(II, RS, RC, SPAdj);
+  else
+    Reg = PPC::R0;
+  
+  if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
+    BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
+      .addReg(PPC::R31)
+      .addImm(FrameSize);
+  } else if (LP64) {
+    if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
+      BuildMI(MBB, II, dl, TII.get(PPC::LD), Reg)
+        .addImm(0)
+        .addReg(PPC::X1);
+    else
+      BuildMI(MBB, II, dl, TII.get(PPC::LD), PPC::X0)
+        .addImm(0)
+        .addReg(PPC::X1);
+  } else {
+    BuildMI(MBB, II, dl, TII.get(PPC::LWZ), Reg)
+      .addImm(0)
+      .addReg(PPC::R1);
+  }
+  
+  // Grow the stack and update the stack pointer link, then determine the
+  // address of new allocated space.
+  if (LP64) {
+    if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Use "true" part.
+      BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+        .addReg(Reg, RegState::Kill)
+        .addReg(PPC::X1)
+        .addReg(MI.getOperand(1).getReg());
+    else
+      BuildMI(MBB, II, dl, TII.get(PPC::STDUX))
+        .addReg(PPC::X0, RegState::Kill)
+        .addReg(PPC::X1)
+        .addReg(MI.getOperand(1).getReg());
+
+    if (!MI.getOperand(1).isKill())
+      BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+        .addReg(PPC::X1)
+        .addImm(maxCallFrameSize);
+    else
+      // Implicitly kill the register.
+      BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+        .addReg(PPC::X1)
+        .addImm(maxCallFrameSize)
+        .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
+  } else {
+    BuildMI(MBB, II, dl, TII.get(PPC::STWUX))
+      .addReg(Reg, RegState::Kill)
+      .addReg(PPC::R1)
+      .addReg(MI.getOperand(1).getReg());
+
+    if (!MI.getOperand(1).isKill())
+      BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+        .addReg(PPC::R1)
+        .addImm(maxCallFrameSize);
+    else
+      // Implicitly kill the register.
+      BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+        .addReg(PPC::R1)
+        .addImm(maxCallFrameSize)
+        .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
+  }
+  
+  // Discard the DYNALLOC instruction.
+  MBB.erase(II);
+}
+
+/// lowerCRSpilling - Generate the code for spilling a CR register. Instead of
+/// reserving a whole register (R0), we scrounge for one here. This generates
+/// code like this:
+///
+///   mfcr rA                  ; Move the conditional register into GPR rA.
+///   rlwinm rA, rA, SB, 0, 31 ; Shift the bits left so they are in CR0's slot.
+///   stw rA, FI               ; Store rA to the frame.
+///
+void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
+                                      unsigned FrameIndex, int SPAdj,
+                                      RegScavenger *RS) const {
+  // Get the instruction.
+  MachineInstr &MI = *II;       // ; SPILL_CR <SrcReg>, <offset>, <FI>
+  // Get the instruction's basic block.
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc dl = MI.getDebugLoc();
+
+  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
+  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
+  const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
+  unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
+  unsigned SrcReg = MI.getOperand(0).getReg();
+
+  // We need to store the CR in the low 4-bits of the saved value. First, issue
+  // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
+  BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg)
+          .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
+    
+  // If the saved register wasn't CR0, shift the bits left so that they are in
+  // CR0's slot.
+  if (SrcReg != PPC::CR0)
+    // rlwinm rA, rA, ShiftBits, 0, 31.
+    BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
+      .addReg(Reg, RegState::Kill)
+      .addImm(PPCRegisterInfo::getRegisterNumbering(SrcReg) * 4)
+      .addImm(0)
+      .addImm(31);
+
+  addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
+                    .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
+                    FrameIndex);
+
+  // Discard the pseudo instruction.
+  MBB.erase(II);
+}
+
+void
+PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                     int SPAdj, RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Unexpected");
+
+  // Get the instruction.
+  MachineInstr &MI = *II;
+  // Get the instruction's basic block.
+  MachineBasicBlock &MBB = *MI.getParent();
+  // Get the basic block's function.
+  MachineFunction &MF = *MBB.getParent();
+  // Get the frame info.
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  DebugLoc dl = MI.getDebugLoc();
+
+  // Find out which operand is the frame index.
+  unsigned FIOperandNo = 0;
+  while (!MI.getOperand(FIOperandNo).isFI()) {
+    ++FIOperandNo;
+    assert(FIOperandNo != MI.getNumOperands() &&
+           "Instr doesn't have FrameIndex operand!");
+  }
+  // Take into account whether it's an add or mem instruction
+  unsigned OffsetOperandNo = (FIOperandNo == 2) ? 1 : 2;
+  if (MI.isInlineAsm())
+    OffsetOperandNo = FIOperandNo-1;
+
+  // Get the frame index.
+  int FrameIndex = MI.getOperand(FIOperandNo).getIndex();
+
+  // Get the frame pointer save index.  Users of this index are primarily
+  // DYNALLOC instructions.
+  PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
+  int FPSI = FI->getFramePointerSaveIndex();
+  // Get the instruction opcode.
+  unsigned OpC = MI.getOpcode();
+  
+  // Special case for dynamic alloca.
+  if (FPSI && FrameIndex == FPSI &&
+      (OpC == PPC::DYNALLOC || OpC == PPC::DYNALLOC8)) {
+    lowerDynamicAlloc(II, SPAdj, RS);
+    return;
+  }
+
+  // Special case for pseudo-op SPILL_CR.
+  if (requiresRegisterScavenging(MF)) // FIXME (64-bit): Enable by default.
+    if (OpC == PPC::SPILL_CR) {
+      lowerCRSpilling(II, FrameIndex, SPAdj, RS);
+      return;
+    }
+
+  // Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
+  MI.getOperand(FIOperandNo).ChangeToRegister(TFI->hasFP(MF) ?
+                                              PPC::R31 : PPC::R1,
+                                              false);
+
+  // Figure out if the offset in the instruction is shifted right two bits. This
+  // is true for instructions like "STD", which the machine implicitly adds two
+  // low zeros to.
+  bool isIXAddr = false;
+  switch (OpC) {
+  case PPC::LWA:
+  case PPC::LD:
+  case PPC::STD:
+  case PPC::STD_32:
+    isIXAddr = true;
+    break;
+  }
+  
+  // Now add the frame object offset to the offset from r1.
+  int Offset = MFI->getObjectOffset(FrameIndex);
+  if (!isIXAddr)
+    Offset += MI.getOperand(OffsetOperandNo).getImm();
+  else
+    Offset += MI.getOperand(OffsetOperandNo).getImm() << 2;
+
+  // If we're not using a Frame Pointer that has been set to the value of the
+  // SP before having the stack size subtracted from it, then add the stack size
+  // to Offset to get the correct offset.
+  // Naked functions have stack size 0, although getStackSize may not reflect that
+  // because we didn't call all the pieces that compute it for naked functions.
+  if (!MF.getFunction()->hasFnAttr(Attribute::Naked))
+    Offset += MFI->getStackSize();
+
+  // If we can, encode the offset directly into the instruction.  If this is a
+  // normal PPC "ri" instruction, any 16-bit value can be safely encoded.  If
+  // this is a PPC64 "ix" instruction, only a 16-bit value with the low two bits
+  // clear can be encoded.  This is extremely uncommon, because normally you
+  // only "std" to a stack slot that is at least 4-byte aligned, but it can
+  // happen in invalid code.
+  if (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
+    if (isIXAddr)
+      Offset >>= 2;    // The actual encoded value has the low two bits zero.
+    MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
+    return;
+  }
+
+  // The offset doesn't fit into a single register, scavenge one to build the
+  // offset in.
+  // FIXME: figure out what SPAdj is doing here.
+
+  // FIXME (64-bit): Use "findScratchRegister".
+  unsigned SReg;
+  if (requiresRegisterScavenging(MF))
+    SReg = findScratchRegister(II, RS, &PPC::GPRCRegClass, SPAdj);
+  else
+    SReg = PPC::R0;
+
+  // Insert a set of rA with the full offset value before the ld, st, or add
+  BuildMI(MBB, II, dl, TII.get(PPC::LIS), SReg)
+    .addImm(Offset >> 16);
+  BuildMI(MBB, II, dl, TII.get(PPC::ORI), SReg)
+    .addReg(SReg, RegState::Kill)
+    .addImm(Offset);
+
+  // Convert into indexed form of the instruction:
+  // 
+  //   sth 0:rA, 1:imm 2:(rB) ==> sthx 0:rA, 2:rB, 1:r0
+  //   addi 0:rA 1:rB, 2, imm ==> add 0:rA, 1:rB, 2:r0
+  unsigned OperandBase;
+
+  if (OpC != TargetOpcode::INLINEASM) {
+    assert(ImmToIdxMap.count(OpC) &&
+           "No indexed form of load or store available!");
+    unsigned NewOpcode = ImmToIdxMap.find(OpC)->second;
+    MI.setDesc(TII.get(NewOpcode));
+    OperandBase = 1;
+  } else {
+    OperandBase = OffsetOperandNo;
+  }
+
+  unsigned StackReg = MI.getOperand(FIOperandNo).getReg();
+  MI.getOperand(OperandBase).ChangeToRegister(StackReg, false);
+  MI.getOperand(OperandBase + 1).ChangeToRegister(SReg, false);
+}
+
+unsigned PPCRegisterInfo::getRARegister() const {
+  return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8;
+}
+
+unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!Subtarget.isPPC64())
+    return TFI->hasFP(MF) ? PPC::R31 : PPC::R1;
+  else
+    return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
+}
+
+unsigned PPCRegisterInfo::getEHExceptionRegister() const {
+  return !Subtarget.isPPC64() ? PPC::R3 : PPC::X3;
+}
+
+unsigned PPCRegisterInfo::getEHHandlerRegister() const {
+  return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
+}
+
+int PPCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  // FIXME: Most probably dwarf numbers differs for Linux and Darwin
+  return PPCGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+#include "PPCGenRegisterInfo.inc"
diff --git a/final/lib/Target/PowerPC/PPCRegisterInfo.h b/final/lib/Target/PowerPC/PPCRegisterInfo.h
new file mode 100644
index 00000000000..aa29ffef067
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -0,0 +1,75 @@
+//===- PPCRegisterInfo.h - PowerPC Register Information Impl -----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the PowerPC implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPC32_REGISTERINFO_H
+#define POWERPC32_REGISTERINFO_H
+
+#include "PPC.h"
+#include "PPCGenRegisterInfo.h.inc"
+#include <map>
+
+namespace llvm {
+class PPCSubtarget;
+class TargetInstrInfo;
+class Type;
+
+class PPCRegisterInfo : public PPCGenRegisterInfo {
+  std::map<unsigned, unsigned> ImmToIdxMap;
+  const PPCSubtarget &Subtarget;
+  const TargetInstrInfo &TII;
+public:
+  PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
+  
+  /// getRegisterNumbering - Given the enum value for some register, e.g.
+  /// PPC::F14, return the number that it corresponds to (e.g. 14).
+  static unsigned getRegisterNumbering(unsigned RegEnum);
+
+  /// getPointerRegClass - Return the register class to use to hold pointers.
+  /// This is used for addressing modes.
+  virtual const TargetRegisterClass *getPointerRegClass(unsigned Kind=0) const;  
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  /// requiresRegisterScavenging - We require a register scavenger.
+  /// FIXME (64-bit): Should be inlined.
+  bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void lowerDynamicAlloc(MachineBasicBlock::iterator II,
+                         int SPAdj, RegScavenger *RS) const;
+  void lowerCRSpilling(MachineBasicBlock::iterator II, unsigned FrameIndex,
+                       int SPAdj, RegScavenger *RS) const;
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/PowerPC/PPCRegisterInfo.td b/final/lib/Target/PowerPC/PPCRegisterInfo.td
new file mode 100644
index 00000000000..26391657fd1
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -0,0 +1,389 @@
+//===- PPCRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+let Namespace = "PPC" in {
+def sub_lt : SubRegIndex;
+def sub_gt : SubRegIndex;
+def sub_eq : SubRegIndex;
+def sub_un : SubRegIndex;
+def sub_32 : SubRegIndex;
+}
+
+
+class PPCReg<string n> : Register<n> {
+  let Namespace = "PPC";
+}
+
+// We identify all our registers with a 5-bit ID, for consistency's sake.
+
+// GPR - One of the 32 32-bit general-purpose registers
+class GPR<bits<5> num, string n> : PPCReg<n> {
+  field bits<5> Num = num;
+}
+
+// GP8 - One of the 32 64-bit general-purpose registers
+class GP8<GPR SubReg, string n> : PPCReg<n> {
+  field bits<5> Num = SubReg.Num;
+  let SubRegs = [SubReg];
+  let SubRegIndices = [sub_32];
+}
+
+// SPR - One of the 32-bit special-purpose registers
+class SPR<bits<10> num, string n> : PPCReg<n> {
+  field bits<10> Num = num;
+}
+
+// FPR - One of the 32 64-bit floating-point registers
+class FPR<bits<5> num, string n> : PPCReg<n> {
+  field bits<5> Num = num;
+}
+
+// VR - One of the 32 128-bit vector registers
+class VR<bits<5> num, string n> : PPCReg<n> {
+  field bits<5> Num = num;
+}
+
+// CR - One of the 8 4-bit condition registers
+class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
+  field bits<3> Num = num;
+  let SubRegs = subregs;
+}
+
+// CRBIT - One of the 32 1-bit condition register fields
+class CRBIT<bits<5> num, string n> : PPCReg<n> {
+  field bits<5> Num = num;
+}
+
+
+// General-purpose registers
+def R0  : GPR< 0,  "r0">, DwarfRegNum<[0]>;
+def R1  : GPR< 1,  "r1">, DwarfRegNum<[1]>;
+def R2  : GPR< 2,  "r2">, DwarfRegNum<[2]>;
+def R3  : GPR< 3,  "r3">, DwarfRegNum<[3]>;
+def R4  : GPR< 4,  "r4">, DwarfRegNum<[4]>;
+def R5  : GPR< 5,  "r5">, DwarfRegNum<[5]>;
+def R6  : GPR< 6,  "r6">, DwarfRegNum<[6]>;
+def R7  : GPR< 7,  "r7">, DwarfRegNum<[7]>;
+def R8  : GPR< 8,  "r8">, DwarfRegNum<[8]>;
+def R9  : GPR< 9,  "r9">, DwarfRegNum<[9]>;
+def R10 : GPR<10, "r10">, DwarfRegNum<[10]>;
+def R11 : GPR<11, "r11">, DwarfRegNum<[11]>;
+def R12 : GPR<12, "r12">, DwarfRegNum<[12]>;
+def R13 : GPR<13, "r13">, DwarfRegNum<[13]>;
+def R14 : GPR<14, "r14">, DwarfRegNum<[14]>;
+def R15 : GPR<15, "r15">, DwarfRegNum<[15]>;
+def R16 : GPR<16, "r16">, DwarfRegNum<[16]>;
+def R17 : GPR<17, "r17">, DwarfRegNum<[17]>;
+def R18 : GPR<18, "r18">, DwarfRegNum<[18]>;
+def R19 : GPR<19, "r19">, DwarfRegNum<[19]>;
+def R20 : GPR<20, "r20">, DwarfRegNum<[20]>;
+def R21 : GPR<21, "r21">, DwarfRegNum<[21]>;
+def R22 : GPR<22, "r22">, DwarfRegNum<[22]>;
+def R23 : GPR<23, "r23">, DwarfRegNum<[23]>;
+def R24 : GPR<24, "r24">, DwarfRegNum<[24]>;
+def R25 : GPR<25, "r25">, DwarfRegNum<[25]>;
+def R26 : GPR<26, "r26">, DwarfRegNum<[26]>;
+def R27 : GPR<27, "r27">, DwarfRegNum<[27]>;
+def R28 : GPR<28, "r28">, DwarfRegNum<[28]>;
+def R29 : GPR<29, "r29">, DwarfRegNum<[29]>;
+def R30 : GPR<30, "r30">, DwarfRegNum<[30]>;
+def R31 : GPR<31, "r31">, DwarfRegNum<[31]>;
+
+// 64-bit General-purpose registers
+def X0  : GP8< R0,  "r0">, DwarfRegNum<[0]>;
+def X1  : GP8< R1,  "r1">, DwarfRegNum<[1]>;
+def X2  : GP8< R2,  "r2">, DwarfRegNum<[2]>;
+def X3  : GP8< R3,  "r3">, DwarfRegNum<[3]>;
+def X4  : GP8< R4,  "r4">, DwarfRegNum<[4]>;
+def X5  : GP8< R5,  "r5">, DwarfRegNum<[5]>;
+def X6  : GP8< R6,  "r6">, DwarfRegNum<[6]>;
+def X7  : GP8< R7,  "r7">, DwarfRegNum<[7]>;
+def X8  : GP8< R8,  "r8">, DwarfRegNum<[8]>;
+def X9  : GP8< R9,  "r9">, DwarfRegNum<[9]>;
+def X10 : GP8<R10, "r10">, DwarfRegNum<[10]>;
+def X11 : GP8<R11, "r11">, DwarfRegNum<[11]>;
+def X12 : GP8<R12, "r12">, DwarfRegNum<[12]>;
+def X13 : GP8<R13, "r13">, DwarfRegNum<[13]>;
+def X14 : GP8<R14, "r14">, DwarfRegNum<[14]>;
+def X15 : GP8<R15, "r15">, DwarfRegNum<[15]>;
+def X16 : GP8<R16, "r16">, DwarfRegNum<[16]>;
+def X17 : GP8<R17, "r17">, DwarfRegNum<[17]>;
+def X18 : GP8<R18, "r18">, DwarfRegNum<[18]>;
+def X19 : GP8<R19, "r19">, DwarfRegNum<[19]>;
+def X20 : GP8<R20, "r20">, DwarfRegNum<[20]>;
+def X21 : GP8<R21, "r21">, DwarfRegNum<[21]>;
+def X22 : GP8<R22, "r22">, DwarfRegNum<[22]>;
+def X23 : GP8<R23, "r23">, DwarfRegNum<[23]>;
+def X24 : GP8<R24, "r24">, DwarfRegNum<[24]>;
+def X25 : GP8<R25, "r25">, DwarfRegNum<[25]>;
+def X26 : GP8<R26, "r26">, DwarfRegNum<[26]>;
+def X27 : GP8<R27, "r27">, DwarfRegNum<[27]>;
+def X28 : GP8<R28, "r28">, DwarfRegNum<[28]>;
+def X29 : GP8<R29, "r29">, DwarfRegNum<[29]>;
+def X30 : GP8<R30, "r30">, DwarfRegNum<[30]>;
+def X31 : GP8<R31, "r31">, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0  : FPR< 0,  "f0">, DwarfRegNum<[32]>;
+def F1  : FPR< 1,  "f1">, DwarfRegNum<[33]>;
+def F2  : FPR< 2,  "f2">, DwarfRegNum<[34]>;
+def F3  : FPR< 3,  "f3">, DwarfRegNum<[35]>;
+def F4  : FPR< 4,  "f4">, DwarfRegNum<[36]>;
+def F5  : FPR< 5,  "f5">, DwarfRegNum<[37]>;
+def F6  : FPR< 6,  "f6">, DwarfRegNum<[38]>;
+def F7  : FPR< 7,  "f7">, DwarfRegNum<[39]>;
+def F8  : FPR< 8,  "f8">, DwarfRegNum<[40]>;
+def F9  : FPR< 9,  "f9">, DwarfRegNum<[41]>;
+def F10 : FPR<10, "f10">, DwarfRegNum<[42]>;
+def F11 : FPR<11, "f11">, DwarfRegNum<[43]>;
+def F12 : FPR<12, "f12">, DwarfRegNum<[44]>;
+def F13 : FPR<13, "f13">, DwarfRegNum<[45]>;
+def F14 : FPR<14, "f14">, DwarfRegNum<[46]>;
+def F15 : FPR<15, "f15">, DwarfRegNum<[47]>;
+def F16 : FPR<16, "f16">, DwarfRegNum<[48]>;
+def F17 : FPR<17, "f17">, DwarfRegNum<[49]>;
+def F18 : FPR<18, "f18">, DwarfRegNum<[50]>;
+def F19 : FPR<19, "f19">, DwarfRegNum<[51]>;
+def F20 : FPR<20, "f20">, DwarfRegNum<[52]>;
+def F21 : FPR<21, "f21">, DwarfRegNum<[53]>;
+def F22 : FPR<22, "f22">, DwarfRegNum<[54]>;
+def F23 : FPR<23, "f23">, DwarfRegNum<[55]>;
+def F24 : FPR<24, "f24">, DwarfRegNum<[56]>;
+def F25 : FPR<25, "f25">, DwarfRegNum<[57]>;
+def F26 : FPR<26, "f26">, DwarfRegNum<[58]>;
+def F27 : FPR<27, "f27">, DwarfRegNum<[59]>;
+def F28 : FPR<28, "f28">, DwarfRegNum<[60]>;
+def F29 : FPR<29, "f29">, DwarfRegNum<[61]>;
+def F30 : FPR<30, "f30">, DwarfRegNum<[62]>;
+def F31 : FPR<31, "f31">, DwarfRegNum<[63]>;
+
+// Vector registers
+def V0  : VR< 0,  "v0">, DwarfRegNum<[77]>;
+def V1  : VR< 1,  "v1">, DwarfRegNum<[78]>;
+def V2  : VR< 2,  "v2">, DwarfRegNum<[79]>;
+def V3  : VR< 3,  "v3">, DwarfRegNum<[80]>;
+def V4  : VR< 4,  "v4">, DwarfRegNum<[81]>;
+def V5  : VR< 5,  "v5">, DwarfRegNum<[82]>;
+def V6  : VR< 6,  "v6">, DwarfRegNum<[83]>;
+def V7  : VR< 7,  "v7">, DwarfRegNum<[84]>;
+def V8  : VR< 8,  "v8">, DwarfRegNum<[85]>;
+def V9  : VR< 9,  "v9">, DwarfRegNum<[86]>;
+def V10 : VR<10, "v10">, DwarfRegNum<[87]>;
+def V11 : VR<11, "v11">, DwarfRegNum<[88]>;
+def V12 : VR<12, "v12">, DwarfRegNum<[89]>;
+def V13 : VR<13, "v13">, DwarfRegNum<[90]>;
+def V14 : VR<14, "v14">, DwarfRegNum<[91]>;
+def V15 : VR<15, "v15">, DwarfRegNum<[92]>;
+def V16 : VR<16, "v16">, DwarfRegNum<[93]>;
+def V17 : VR<17, "v17">, DwarfRegNum<[94]>;
+def V18 : VR<18, "v18">, DwarfRegNum<[95]>;
+def V19 : VR<19, "v19">, DwarfRegNum<[96]>;
+def V20 : VR<20, "v20">, DwarfRegNum<[97]>;
+def V21 : VR<21, "v21">, DwarfRegNum<[98]>;
+def V22 : VR<22, "v22">, DwarfRegNum<[99]>;
+def V23 : VR<23, "v23">, DwarfRegNum<[100]>;
+def V24 : VR<24, "v24">, DwarfRegNum<[101]>;
+def V25 : VR<25, "v25">, DwarfRegNum<[102]>;
+def V26 : VR<26, "v26">, DwarfRegNum<[103]>;
+def V27 : VR<27, "v27">, DwarfRegNum<[104]>;
+def V28 : VR<28, "v28">, DwarfRegNum<[105]>;
+def V29 : VR<29, "v29">, DwarfRegNum<[106]>;
+def V30 : VR<30, "v30">, DwarfRegNum<[107]>;
+def V31 : VR<31, "v31">, DwarfRegNum<[108]>;
+
+// Condition register bits
+def CR0LT : CRBIT< 0, "0">, DwarfRegNum<[0]>;
+def CR0GT : CRBIT< 1, "1">, DwarfRegNum<[0]>;
+def CR0EQ : CRBIT< 2, "2">, DwarfRegNum<[0]>;
+def CR0UN : CRBIT< 3, "3">, DwarfRegNum<[0]>;
+def CR1LT : CRBIT< 4, "4">, DwarfRegNum<[0]>;
+def CR1GT : CRBIT< 5, "5">, DwarfRegNum<[0]>;
+def CR1EQ : CRBIT< 6, "6">, DwarfRegNum<[0]>;
+def CR1UN : CRBIT< 7, "7">, DwarfRegNum<[0]>;
+def CR2LT : CRBIT< 8, "8">, DwarfRegNum<[0]>;
+def CR2GT : CRBIT< 9, "9">, DwarfRegNum<[0]>;
+def CR2EQ : CRBIT<10, "10">, DwarfRegNum<[0]>;
+def CR2UN : CRBIT<11, "11">, DwarfRegNum<[0]>;
+def CR3LT : CRBIT<12, "12">, DwarfRegNum<[0]>;
+def CR3GT : CRBIT<13, "13">, DwarfRegNum<[0]>;
+def CR3EQ : CRBIT<14, "14">, DwarfRegNum<[0]>;
+def CR3UN : CRBIT<15, "15">, DwarfRegNum<[0]>;
+def CR4LT : CRBIT<16, "16">, DwarfRegNum<[0]>;
+def CR4GT : CRBIT<17, "17">, DwarfRegNum<[0]>;
+def CR4EQ : CRBIT<18, "18">, DwarfRegNum<[0]>;
+def CR4UN : CRBIT<19, "19">, DwarfRegNum<[0]>;
+def CR5LT : CRBIT<20, "20">, DwarfRegNum<[0]>;
+def CR5GT : CRBIT<21, "21">, DwarfRegNum<[0]>;
+def CR5EQ : CRBIT<22, "22">, DwarfRegNum<[0]>;
+def CR5UN : CRBIT<23, "23">, DwarfRegNum<[0]>;
+def CR6LT : CRBIT<24, "24">, DwarfRegNum<[0]>;
+def CR6GT : CRBIT<25, "25">, DwarfRegNum<[0]>;
+def CR6EQ : CRBIT<26, "26">, DwarfRegNum<[0]>;
+def CR6UN : CRBIT<27, "27">, DwarfRegNum<[0]>;
+def CR7LT : CRBIT<28, "28">, DwarfRegNum<[0]>;
+def CR7GT : CRBIT<29, "29">, DwarfRegNum<[0]>;
+def CR7EQ : CRBIT<30, "30">, DwarfRegNum<[0]>;
+def CR7UN : CRBIT<31, "31">, DwarfRegNum<[0]>;
+
+// Condition registers
+let SubRegIndices = [sub_lt, sub_gt, sub_eq, sub_un] in {
+def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68]>;
+def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69]>;
+def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70]>;
+def CR3 : CR<3, "cr3", [CR3LT, CR3GT, CR3EQ, CR3UN]>, DwarfRegNum<[71]>;
+def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72]>;
+def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73]>;
+def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74]>;
+def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75]>;
+}
+
+// Link register
+def LR  : SPR<8, "lr">, DwarfRegNum<[65]>;
+//let Aliases = [LR] in
+def LR8 : SPR<8, "lr">, DwarfRegNum<[65]>;
+
+// Count register
+def CTR  : SPR<9, "ctr">, DwarfRegNum<[66]>;
+def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66]>;
+
+// VRsave register
+def VRSAVE: SPR<256, "VRsave">, DwarfRegNum<[107]>;
+
+// Carry bit.  In the architecture this is really bit 0 of the XER register
+// (which really is SPR register 1);  this is the only bit interesting to a
+// compiler.
+def CARRY: SPR<1, "ca">, DwarfRegNum<[0]>;
+
+// FP rounding mode:  bits 30 and 31 of the FP status and control register
+// This is not allocated as a normal register; it appears only in
+// Uses and Defs.  The ABI says it needs to be preserved by a function,
+// but this is not achieved by saving and restoring it as with
+// most registers, it has to be done in code; to make this work all the
+// return and call instructions are described as Uses of RM, so instructions
+// that do nothing but change RM will not get deleted.
+// Also, in the architecture it is not really a SPR; 512 is arbitrary.
+def RM: SPR<512, "**ROUNDING MODE**">, DwarfRegNum<[0]>;
+
+/// Register classes
+// Allocate volatiles first
+// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
+def GPRC : RegisterClass<"PPC", [i32], 32,
+     [R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12,
+      R30, R29, R28, R27, R26, R25, R24, R23, R22, R21, R20, R19, R18, R17,
+      R16, R15, R14, R13, R31, R0, R1, LR]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GPRCClass::iterator
+    GPRCClass::allocation_order_begin(const MachineFunction &MF) const {
+      // 32-bit SVR4 ABI: r2 is reserved for the OS.
+      // 64-bit SVR4 ABI: r2 is reserved for the TOC pointer.
+      // Darwin: R2 is reserved for CR save/restore sequence.
+      return begin()+1;
+    }
+    GPRCClass::iterator
+    GPRCClass::allocation_order_end(const MachineFunction &MF) const {
+      // On PPC64, r13 is the thread pointer.  Never allocate this register.
+      // Note that this is overconservative, as it also prevents allocation of
+      // R31 when the FP is not needed.
+      // When using the 32-bit SVR4 ABI, r13 is reserved for the Small Data Area
+      // pointer.
+      const PPCSubtarget &Subtarget = MF.getTarget().getSubtarget<PPCSubtarget>();
+      const PPCFrameLowering *PPCFI =
+        static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+   
+      if (Subtarget.isPPC64() || Subtarget.isSVR4ABI())
+        return end()-5;  // don't allocate R13, R31, R0, R1, LR
+        
+      if (PPCFI->needsFP(MF))
+        return end()-4;  // don't allocate R31, R0, R1, LR
+      else
+        return end()-3;  // don't allocate R0, R1, LR
+    }
+  }];
+}
+def G8RC : RegisterClass<"PPC", [i64], 64,
+     [X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12,
+      X30, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20, X19, X18, X17,
+      X16, X15, X14, X31, X13, X0, X1, LR8]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    G8RCClass::iterator
+    G8RCClass::allocation_order_begin(const MachineFunction &MF) const {
+      // 64-bit SVR4 ABI: r2 is reserved for the TOC pointer.
+      // Darwin: r2 is reserved for CR save/restore sequence.
+      return begin()+1;
+    }
+    G8RCClass::iterator
+    G8RCClass::allocation_order_end(const MachineFunction &MF) const {
+      const PPCFrameLowering *PPCFI =
+        static_cast<const PPCFrameLowering*>(MF.getTarget().getFrameLowering());
+      if (PPCFI->needsFP(MF))
+        return end()-5;
+      else
+        return end()-4;
+    }
+  }];
+}
+
+// Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
+// ABI the size of the Floating-point register save area is determined by the
+// allocated non-volatile register with the lowest register number, as FP
+// register N is spilled to offset 8 * (32 - N) below the back chain word of the
+// previous stack frame. By allocating non-volatiles in reverse order we make
+// sure that the Floating-point register save area is always as small as
+// possible because there aren't any unused spill slots.
+def F8RC : RegisterClass<"PPC", [f64], 64, [F0, F1, F2, F3, F4, F5, F6, F7,
+  F8, F9, F10, F11, F12, F13, F31, F30, F29, F28, F27, F26, F25, F24, F23,
+  F22, F21, F20, F19, F18, F17, F16, F15, F14]>;
+def F4RC : RegisterClass<"PPC", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7,
+  F8, F9, F10, F11, F12, F13, F31, F30, F29, F28, F27, F26, F25, F24, F23,
+  F22, F21, F20, F19, F18, F17, F16, F15, F14]>;
+
+def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
+ [V2, V3, V4, V5, V0, V1, 
+  V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
+  V29, V28, V27, V26, V25, V24, V23, V22, V21, V20]>;
+
+def CRBITRC : RegisterClass<"PPC", [i32], 32,
+  [CR0LT, CR0GT, CR0EQ, CR0UN,
+   CR1LT, CR1GT, CR1EQ, CR1UN,
+   CR2LT, CR2GT, CR2EQ, CR2UN,
+   CR3LT, CR3GT, CR3EQ, CR3UN,
+   CR4LT, CR4GT, CR4EQ, CR4UN,
+   CR5LT, CR5GT, CR5EQ, CR5UN,
+   CR6LT, CR6GT, CR6EQ, CR6UN,
+   CR7LT, CR7GT, CR7EQ, CR7UN
+  ]>
+{
+  let CopyCost = -1;
+}
+
+def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, 
+  CR3, CR4]>
+{
+  let SubRegClasses = [(CRBITRC sub_lt, sub_gt, sub_eq, sub_un)];
+}
+
+def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>;
+def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>;
+def VRSAVERC : RegisterClass<"PPC", [i32], 32, [VRSAVE]>;
+def CARRYRC : RegisterClass<"PPC", [i32], 32, [CARRY]> {
+  let CopyCost = -1;
+}
diff --git a/final/lib/Target/PowerPC/PPCRelocations.h b/final/lib/Target/PowerPC/PPCRelocations.h
new file mode 100644
index 00000000000..a33e7e03370
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCRelocations.h
@@ -0,0 +1,56 @@
+//===- PPCRelocations.h - PPC32 Code Relocations ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC 32-bit target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC32RELOCATIONS_H
+#define PPC32RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+// Hack to rid us of a PPC pre-processor symbol which is erroneously
+// defined in a PowerPC header file (bug in Linux/PPC)
+#ifdef PPC
+#undef PPC
+#endif
+
+namespace llvm {
+  namespace PPC {
+    enum RelocationType {
+      // reloc_vanilla - A standard relocation, where the address of the
+      // relocated object completely overwrites the address of the relocation.
+      reloc_vanilla,
+    
+      // reloc_pcrel_bx - PC relative relocation, for the b or bl instructions.
+      reloc_pcrel_bx,
+
+      // reloc_pcrel_bcx - PC relative relocation, for BLT,BLE,BEQ,BGE,BGT,BNE,
+      // and other bcx instructions.
+      reloc_pcrel_bcx,
+
+      // reloc_absolute_high - Absolute relocation, for the loadhi instruction
+      // (which is really addis).  Add the high 16-bits of the specified global
+      // address into the low 16-bits of the instruction.
+      reloc_absolute_high,
+
+      // reloc_absolute_low - Absolute relocation, for the la instruction (which
+      // is really an addi).  Add the low 16-bits of the specified global
+      // address into the low 16-bits of the instruction.
+      reloc_absolute_low,
+      
+      // reloc_absolute_low_ix - Absolute relocation for the 64-bit load/store
+      // instruction which have two implicit zero bits.
+      reloc_absolute_low_ix
+    };
+  }
+}
+
+#endif
diff --git a/final/lib/Target/PowerPC/PPCSchedule.td b/final/lib/Target/PowerPC/PPCSchedule.td
new file mode 100644
index 00000000000..9664f145717
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCSchedule.td
@@ -0,0 +1,505 @@
+//===- PPCSchedule.td - PowerPC Scheduling Definitions -----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across PowerPC chips sets
+//
+def BPU    : FuncUnit; // Branch unit
+def SLU    : FuncUnit; // Store/load unit
+def SRU    : FuncUnit; // special register unit
+def IU1    : FuncUnit; // integer unit 1 (simple)
+def IU2    : FuncUnit; // integer unit 2 (complex)
+def FPU1   : FuncUnit; // floating point unit 1
+def FPU2   : FuncUnit; // floating point unit 2
+def VPU    : FuncUnit; // vector permutation unit
+def VIU1   : FuncUnit; // vector integer unit 1 (simple)
+def VIU2   : FuncUnit; // vector integer unit 2 (complex)
+def VFPU   : FuncUnit; // vector floating point unit
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for PowerPC
+//
+def IntGeneral   : InstrItinClass;
+def IntCompare   : InstrItinClass;
+def IntDivD      : InstrItinClass;
+def IntDivW      : InstrItinClass;
+def IntMFFS      : InstrItinClass;
+def IntMFVSCR    : InstrItinClass;
+def IntMTFSB0    : InstrItinClass;
+def IntMTSRD     : InstrItinClass;
+def IntMulHD     : InstrItinClass;
+def IntMulHW     : InstrItinClass;
+def IntMulHWU    : InstrItinClass;
+def IntMulLI     : InstrItinClass;
+def IntRFID      : InstrItinClass;
+def IntRotateD   : InstrItinClass;
+def IntRotate    : InstrItinClass;
+def IntShift     : InstrItinClass;
+def IntTrapD     : InstrItinClass;
+def IntTrapW     : InstrItinClass;
+def BrB          : InstrItinClass;
+def BrCR         : InstrItinClass;
+def BrMCR        : InstrItinClass;
+def BrMCRX       : InstrItinClass;
+def LdStDCBA     : InstrItinClass;
+def LdStDCBF     : InstrItinClass;
+def LdStDCBI     : InstrItinClass;
+def LdStGeneral  : InstrItinClass;
+def LdStDSS      : InstrItinClass;
+def LdStICBI     : InstrItinClass;
+def LdStUX       : InstrItinClass;
+def LdStLD       : InstrItinClass;
+def LdStLDARX    : InstrItinClass;
+def LdStLFD      : InstrItinClass;
+def LdStLFDU     : InstrItinClass;
+def LdStLHA      : InstrItinClass;
+def LdStLMW      : InstrItinClass;
+def LdStLVecX    : InstrItinClass;
+def LdStLWA      : InstrItinClass;
+def LdStLWARX    : InstrItinClass;
+def LdStSLBIA    : InstrItinClass;
+def LdStSLBIE    : InstrItinClass;
+def LdStSTD      : InstrItinClass;
+def LdStSTDCX    : InstrItinClass;
+def LdStSTVEBX   : InstrItinClass;
+def LdStSTWCX    : InstrItinClass;
+def LdStSync     : InstrItinClass;
+def SprISYNC     : InstrItinClass;
+def SprMFSR      : InstrItinClass;
+def SprMTMSR     : InstrItinClass;
+def SprMTSR      : InstrItinClass;
+def SprTLBSYNC   : InstrItinClass;
+def SprMFCR      : InstrItinClass;
+def SprMFMSR     : InstrItinClass;
+def SprMFSPR     : InstrItinClass;
+def SprMFTB      : InstrItinClass;
+def SprMTSPR     : InstrItinClass;
+def SprMTSRIN    : InstrItinClass;
+def SprRFI       : InstrItinClass;
+def SprSC        : InstrItinClass;
+def FPGeneral    : InstrItinClass;
+def FPCompare    : InstrItinClass;
+def FPDivD       : InstrItinClass;
+def FPDivS       : InstrItinClass;
+def FPFused      : InstrItinClass;
+def FPRes        : InstrItinClass;
+def FPSqrt       : InstrItinClass;
+def VecGeneral   : InstrItinClass;
+def VecFP        : InstrItinClass;
+def VecFPCompare : InstrItinClass;
+def VecComplex   : InstrItinClass;
+def VecPerm      : InstrItinClass;
+def VecFPRound   : InstrItinClass;
+def VecVSL       : InstrItinClass;
+def VecVSR       : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+include "PPCScheduleG3.td"
+include "PPCScheduleG4.td"
+include "PPCScheduleG4Plus.td"
+include "PPCScheduleG5.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction to itinerary class map - When add new opcodes to the supported
+// set, refer to the following table to determine which itinerary class the
+// opcode belongs.
+//
+//    opcode     itinerary class
+//    ======     ===============
+//    add        IntGeneral
+//    addc       IntGeneral
+//    adde       IntGeneral
+//    addi       IntGeneral
+//    addic      IntGeneral
+//    addic.     IntGeneral
+//    addis      IntGeneral
+//    addme      IntGeneral
+//    addze      IntGeneral
+//    and        IntGeneral
+//    andc       IntGeneral
+//    andi.      IntGeneral
+//    andis.     IntGeneral
+//    b          BrB
+//    bc         BrB
+//    bcctr      BrB
+//    bclr       BrB
+//    cmp        IntCompare
+//    cmpi       IntCompare
+//    cmpl       IntCompare
+//    cmpli      IntCompare
+//    cntlzd     IntRotateD
+//    cntlzw     IntGeneral
+//    crand      BrCR
+//    crandc     BrCR
+//    creqv      BrCR
+//    crnand     BrCR
+//    crnor      BrCR
+//    cror       BrCR
+//    crorc      BrCR
+//    crxor      BrCR
+//    dcba       LdStDCBA
+//    dcbf       LdStDCBF
+//    dcbi       LdStDCBI
+//    dcbst      LdStDCBF
+//    dcbt       LdStGeneral
+//    dcbtst     LdStGeneral
+//    dcbz       LdStDCBF
+//    divd       IntDivD
+//    divdu      IntDivD
+//    divw       IntDivW
+//    divwu      IntDivW
+//    dss        LdStDSS
+//    dst        LdStDSS
+//    dstst      LdStDSS
+//    eciwx      LdStGeneral
+//    ecowx      LdStGeneral
+//    eieio      LdStGeneral
+//    eqv        IntGeneral
+//    extsb      IntGeneral
+//    extsh      IntGeneral
+//    extsw      IntRotateD
+//    fabs       FPGeneral
+//    fadd       FPGeneral
+//    fadds      FPGeneral
+//    fcfid      FPGeneral
+//    fcmpo      FPCompare
+//    fcmpu      FPCompare
+//    fctid      FPGeneral
+//    fctidz     FPGeneral
+//    fctiw      FPGeneral
+//    fctiwz     FPGeneral
+//    fdiv       FPDivD
+//    fdivs      FPDivS
+//    fmadd      FPFused
+//    fmadds     FPGeneral
+//    fmr        FPGeneral
+//    fmsub      FPFused
+//    fmsubs     FPGeneral
+//    fmul       FPFused
+//    fmuls      FPGeneral
+//    fnabs      FPGeneral
+//    fneg       FPGeneral
+//    fnmadd     FPFused
+//    fnmadds    FPGeneral
+//    fnmsub     FPFused
+//    fnmsubs    FPGeneral
+//    fres       FPRes
+//    frsp       FPGeneral
+//    frsqrte    FPGeneral
+//    fsel       FPGeneral
+//    fsqrt      FPSqrt
+//    fsqrts     FPSqrt
+//    fsub       FPGeneral
+//    fsubs      FPGeneral
+//    icbi       LdStICBI
+//    isync      SprISYNC
+//    lbz        LdStGeneral
+//    lbzu       LdStGeneral
+//    lbzux      LdStUX
+//    lbzx       LdStGeneral
+//    ld         LdStLD
+//    ldarx      LdStLDARX
+//    ldu        LdStLD
+//    ldux       LdStLD
+//    ldx        LdStLD
+//    lfd        LdStLFD
+//    lfdu       LdStLFDU
+//    lfdux      LdStLFDU
+//    lfdx       LdStLFDU
+//    lfs        LdStLFDU
+//    lfsu       LdStLFDU
+//    lfsux      LdStLFDU
+//    lfsx       LdStLFDU
+//    lha        LdStLHA
+//    lhau       LdStLHA
+//    lhaux      LdStLHA
+//    lhax       LdStLHA
+//    lhbrx      LdStGeneral
+//    lhz        LdStGeneral
+//    lhzu       LdStGeneral
+//    lhzux      LdStUX
+//    lhzx       LdStGeneral
+//    lmw        LdStLMW
+//    lswi       LdStLMW
+//    lswx       LdStLMW
+//    lvebx      LdStLVecX
+//    lvehx      LdStLVecX
+//    lvewx      LdStLVecX
+//    lvsl       LdStLVecX
+//    lvsr       LdStLVecX
+//    lvx        LdStLVecX
+//    lvxl       LdStLVecX
+//    lwa        LdStLWA
+//    lwarx      LdStLWARX
+//    lwaux      LdStLHA
+//    lwax       LdStLHA
+//    lwbrx      LdStGeneral
+//    lwz        LdStGeneral
+//    lwzu       LdStGeneral
+//    lwzux      LdStUX
+//    lwzx       LdStGeneral
+//    mcrf       BrMCR
+//    mcrfs      FPGeneral
+//    mcrxr      BrMCRX
+//    mfcr       SprMFCR
+//    mffs       IntMFFS
+//    mfmsr      SprMFMSR
+//    mfspr      SprMFSPR
+//    mfsr       SprMFSR
+//    mfsrin     SprMFSR
+//    mftb       SprMFTB
+//    mfvscr     IntMFVSCR
+//    mtcrf      BrMCRX
+//    mtfsb0     IntMTFSB0
+//    mtfsb1     IntMTFSB0
+//    mtfsf      IntMTFSB0
+//    mtfsfi     IntMTFSB0
+//    mtmsr      SprMTMSR
+//    mtmsrd     LdStLD
+//    mtspr      SprMTSPR
+//    mtsr       SprMTSR
+//    mtsrd      IntMTSRD
+//    mtsrdin    IntMTSRD
+//    mtsrin     SprMTSRIN
+//    mtvscr     IntMFVSCR
+//    mulhd      IntMulHD
+//    mulhdu     IntMulHD
+//    mulhw      IntMulHW
+//    mulhwu     IntMulHWU
+//    mulld      IntMulHD
+//    mulli      IntMulLI
+//    mullw      IntMulHW
+//    nand       IntGeneral
+//    neg        IntGeneral
+//    nor        IntGeneral
+//    or         IntGeneral
+//    orc        IntGeneral
+//    ori        IntGeneral
+//    oris       IntGeneral
+//    rfi        SprRFI
+//    rfid       IntRFID
+//    rldcl      IntRotateD
+//    rldcr      IntRotateD
+//    rldic      IntRotateD
+//    rldicl     IntRotateD
+//    rldicr     IntRotateD
+//    rldimi     IntRotateD
+//    rlwimi     IntRotate
+//    rlwinm     IntGeneral
+//    rlwnm      IntGeneral
+//    sc         SprSC
+//    slbia      LdStSLBIA
+//    slbie      LdStSLBIE
+//    sld        IntRotateD
+//    slw        IntGeneral
+//    srad       IntRotateD
+//    sradi      IntRotateD
+//    sraw       IntShift
+//    srawi      IntShift
+//    srd        IntRotateD
+//    srw        IntGeneral
+//    stb        LdStGeneral
+//    stbu       LdStGeneral
+//    stbux      LdStGeneral
+//    stbx       LdStGeneral
+//    std        LdStSTD
+//    stdcx.     LdStSTDCX
+//    stdu       LdStSTD
+//    stdux      LdStSTD
+//    stdx       LdStSTD
+//    stfd       LdStUX
+//    stfdu      LdStUX
+//    stfdux     LdStUX
+//    stfdx      LdStUX
+//    stfiwx     LdStUX
+//    stfs       LdStUX
+//    stfsu      LdStUX
+//    stfsux     LdStUX
+//    stfsx      LdStUX
+//    sth        LdStGeneral
+//    sthbrx     LdStGeneral
+//    sthu       LdStGeneral
+//    sthux      LdStGeneral
+//    sthx       LdStGeneral
+//    stmw       LdStLMW
+//    stswi      LdStLMW
+//    stswx      LdStLMW
+//    stvebx     LdStSTVEBX
+//    stvehx     LdStSTVEBX
+//    stvewx     LdStSTVEBX
+//    stvx       LdStSTVEBX
+//    stvxl      LdStSTVEBX
+//    stw        LdStGeneral
+//    stwbrx     LdStGeneral
+//    stwcx.     LdStSTWCX
+//    stwu       LdStGeneral
+//    stwux      LdStGeneral
+//    stwx       LdStGeneral
+//    subf       IntGeneral
+//    subfc      IntGeneral
+//    subfe      IntGeneral
+//    subfic     IntGeneral
+//    subfme     IntGeneral
+//    subfze     IntGeneral
+//    sync       LdStSync
+//    td         IntTrapD
+//    tdi        IntTrapD
+//    tlbia      LdStSLBIA
+//    tlbie      LdStDCBF
+//    tlbsync    SprTLBSYNC
+//    tw         IntTrapW
+//    twi        IntTrapW
+//    vaddcuw    VecGeneral
+//    vaddfp     VecFP
+//    vaddsbs    VecGeneral
+//    vaddshs    VecGeneral
+//    vaddsws    VecGeneral
+//    vaddubm    VecGeneral
+//    vaddubs    VecGeneral
+//    vadduhm    VecGeneral
+//    vadduhs    VecGeneral
+//    vadduwm    VecGeneral
+//    vadduws    VecGeneral
+//    vand       VecGeneral
+//    vandc      VecGeneral
+//    vavgsb     VecGeneral
+//    vavgsh     VecGeneral
+//    vavgsw     VecGeneral
+//    vavgub     VecGeneral
+//    vavguh     VecGeneral
+//    vavguw     VecGeneral
+//    vcfsx      VecFP
+//    vcfux      VecFP
+//    vcmpbfp    VecFPCompare
+//    vcmpeqfp   VecFPCompare
+//    vcmpequb   VecGeneral
+//    vcmpequh   VecGeneral
+//    vcmpequw   VecGeneral
+//    vcmpgefp   VecFPCompare
+//    vcmpgtfp   VecFPCompare
+//    vcmpgtsb   VecGeneral
+//    vcmpgtsh   VecGeneral
+//    vcmpgtsw   VecGeneral
+//    vcmpgtub   VecGeneral
+//    vcmpgtuh   VecGeneral
+//    vcmpgtuw   VecGeneral
+//    vctsxs     VecFP
+//    vctuxs     VecFP
+//    vexptefp   VecFP
+//    vlogefp    VecFP
+//    vmaddfp    VecFP
+//    vmaxfp     VecFPCompare
+//    vmaxsb     VecGeneral
+//    vmaxsh     VecGeneral
+//    vmaxsw     VecGeneral
+//    vmaxub     VecGeneral
+//    vmaxuh     VecGeneral
+//    vmaxuw     VecGeneral
+//    vmhaddshs  VecComplex
+//    vmhraddshs VecComplex
+//    vminfp     VecFPCompare
+//    vminsb     VecGeneral
+//    vminsh     VecGeneral
+//    vminsw     VecGeneral
+//    vminub     VecGeneral
+//    vminuh     VecGeneral
+//    vminuw     VecGeneral
+//    vmladduhm  VecComplex
+//    vmrghb     VecPerm
+//    vmrghh     VecPerm
+//    vmrghw     VecPerm
+//    vmrglb     VecPerm
+//    vmrglh     VecPerm
+//    vmrglw     VecPerm
+//    vmsubfp    VecFP
+//    vmsummbm   VecComplex
+//    vmsumshm   VecComplex
+//    vmsumshs   VecComplex
+//    vmsumubm   VecComplex
+//    vmsumuhm   VecComplex
+//    vmsumuhs   VecComplex
+//    vmulesb    VecComplex
+//    vmulesh    VecComplex
+//    vmuleub    VecComplex
+//    vmuleuh    VecComplex
+//    vmulosb    VecComplex
+//    vmulosh    VecComplex
+//    vmuloub    VecComplex
+//    vmulouh    VecComplex
+//    vnor       VecGeneral
+//    vor        VecGeneral
+//    vperm      VecPerm
+//    vpkpx      VecPerm
+//    vpkshss    VecPerm
+//    vpkshus    VecPerm
+//    vpkswss    VecPerm
+//    vpkswus    VecPerm
+//    vpkuhum    VecPerm
+//    vpkuhus    VecPerm
+//    vpkuwum    VecPerm
+//    vpkuwus    VecPerm
+//    vrefp      VecFPRound
+//    vrfim      VecFPRound
+//    vrfin      VecFPRound
+//    vrfip      VecFPRound
+//    vrfiz      VecFPRound
+//    vrlb       VecGeneral
+//    vrlh       VecGeneral
+//    vrlw       VecGeneral
+//    vrsqrtefp  VecFP
+//    vsel       VecGeneral
+//    vsl        VecVSL
+//    vslb       VecGeneral
+//    vsldoi     VecPerm
+//    vslh       VecGeneral
+//    vslo       VecPerm
+//    vslw       VecGeneral
+//    vspltb     VecPerm
+//    vsplth     VecPerm
+//    vspltisb   VecPerm
+//    vspltish   VecPerm
+//    vspltisw   VecPerm
+//    vspltw     VecPerm
+//    vsr        VecVSR
+//    vsrab      VecGeneral
+//    vsrah      VecGeneral
+//    vsraw      VecGeneral
+//    vsrb       VecGeneral
+//    vsrh       VecGeneral
+//    vsro       VecPerm
+//    vsrw       VecGeneral
+//    vsubcuw    VecGeneral
+//    vsubfp     VecFP
+//    vsubsbs    VecGeneral
+//    vsubshs    VecGeneral
+//    vsubsws    VecGeneral
+//    vsububm    VecGeneral
+//    vsububs    VecGeneral
+//    vsubuhm    VecGeneral
+//    vsubuhs    VecGeneral
+//    vsubuwm    VecGeneral
+//    vsubuws    VecGeneral
+//    vsum2sws   VecComplex
+//    vsum4sbs   VecComplex
+//    vsum4shs   VecComplex
+//    vsum4ubs   VecComplex
+//    vsumsws    VecComplex
+//    vupkhpx    VecPerm
+//    vupkhsb    VecPerm
+//    vupkhsh    VecPerm
+//    vupklpx    VecPerm
+//    vupklsb    VecPerm
+//    vupklsh    VecPerm
+//    vxor       VecGeneral
+//    xor        IntGeneral
+//    xori       IntGeneral
+//    xoris      IntGeneral
+//
diff --git a/final/lib/Target/PowerPC/PPCScheduleG3.td b/final/lib/Target/PowerPC/PPCScheduleG3.td
new file mode 100644
index 00000000000..ad4da1fe224
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCScheduleG3.td
@@ -0,0 +1,64 @@
+//===- PPCScheduleG3.td - PPC G3 Scheduling Definitions ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G3 (750) processor.
+//
+//===----------------------------------------------------------------------===//
+
+
+def G3Itineraries : ProcessorItineraries<
+  [IU1, IU2, FPU1, BPU, SRU, SLU], [], [
+  InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntDivW     , [InstrStage<19, [IU1]>]>,
+  InstrItinData<IntMFFS     , [InstrStage<1, [FPU1]>]>,
+  InstrItinData<IntMTFSB0   , [InstrStage<3, [FPU1]>]>,
+  InstrItinData<IntMulHW    , [InstrStage<5, [IU1]>]>,
+  InstrItinData<IntMulHWU   , [InstrStage<6, [IU1]>]>,
+  InstrItinData<IntMulLI    , [InstrStage<3, [IU1]>]>,
+  InstrItinData<IntRotate   , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntShift    , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntTrapW    , [InstrStage<2, [IU1, IU2]>]>,
+  InstrItinData<BrB         , [InstrStage<1, [BPU]>]>,
+  InstrItinData<BrCR        , [InstrStage<1, [SRU]>]>,
+  InstrItinData<BrMCR       , [InstrStage<1, [SRU]>]>,
+  InstrItinData<BrMCRX      , [InstrStage<1, [SRU]>]>,
+  InstrItinData<LdStDCBA    , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStDCBF    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStDCBI    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStICBI    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStUX      , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLFD     , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLFDU    , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLHA     , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLMW     , [InstrStage<34, [SLU]>]>,
+  InstrItinData<LdStLWARX   , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStSTWCX   , [InstrStage<8, [SLU]>]>,
+  InstrItinData<LdStSync    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<SprISYNC    , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprMFSR     , [InstrStage<3, [SRU]>]>,
+  InstrItinData<SprMTMSR    , [InstrStage<1, [SRU]>]>,
+  InstrItinData<SprMTSR     , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprTLBSYNC  , [InstrStage<3, [SRU]>]>,
+  InstrItinData<SprMFCR     , [InstrStage<1, [SRU]>]>,
+  InstrItinData<SprMFMSR    , [InstrStage<1, [SRU]>]>,
+  InstrItinData<SprMFSPR    , [InstrStage<3, [SRU]>]>,
+  InstrItinData<SprMFTB     , [InstrStage<3, [SRU]>]>,
+  InstrItinData<SprMTSPR    , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprMTSRIN   , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprRFI      , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprSC       , [InstrStage<2, [SRU]>]>,
+  InstrItinData<FPGeneral   , [InstrStage<1, [FPU1]>]>,
+  InstrItinData<FPCompare   , [InstrStage<1, [FPU1]>]>,
+  InstrItinData<FPDivD      , [InstrStage<31, [FPU1]>]>,
+  InstrItinData<FPDivS      , [InstrStage<17, [FPU1]>]>,
+  InstrItinData<FPFused     , [InstrStage<2, [FPU1]>]>,
+  InstrItinData<FPRes       , [InstrStage<10, [FPU1]>]>
+]>;
diff --git a/final/lib/Target/PowerPC/PPCScheduleG4.td b/final/lib/Target/PowerPC/PPCScheduleG4.td
new file mode 100644
index 00000000000..03c3b29cc10
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCScheduleG4.td
@@ -0,0 +1,74 @@
+//===- PPCScheduleG4.td - PPC G4 Scheduling Definitions ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4 (7400) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G4Itineraries : ProcessorItineraries<
+  [IU1, IU2, SLU, SRU, BPU, FPU1, VIU1, VIU2, VPU, VFPU], [], [
+  InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntDivW     , [InstrStage<19, [IU1]>]>,
+  InstrItinData<IntMFFS     , [InstrStage<3, [FPU1]>]>,
+  InstrItinData<IntMFVSCR   , [InstrStage<1, [VIU1]>]>,
+  InstrItinData<IntMTFSB0   , [InstrStage<3, [FPU1]>]>,
+  InstrItinData<IntMulHW    , [InstrStage<5, [IU1]>]>,
+  InstrItinData<IntMulHWU   , [InstrStage<6, [IU1]>]>,
+  InstrItinData<IntMulLI    , [InstrStage<3, [IU1]>]>,
+  InstrItinData<IntRotate   , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntShift    , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntTrapW    , [InstrStage<2, [IU1, IU2]>]>,
+  InstrItinData<BrB         , [InstrStage<1, [BPU]>]>,
+  InstrItinData<BrCR        , [InstrStage<1, [SRU]>]>,
+  InstrItinData<BrMCR       , [InstrStage<1, [SRU]>]>,
+  InstrItinData<BrMCRX      , [InstrStage<1, [SRU]>]>,
+  InstrItinData<LdStDCBF    , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStDCBI    , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStGeneral , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStDSS     , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStICBI    , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStUX      , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLFD     , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLFDU    , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLHA     , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLMW     , [InstrStage<34, [SLU]>]>,
+  InstrItinData<LdStLVecX   , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStLWARX   , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStSTVEBX  , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStSTWCX   , [InstrStage<5, [SLU]>]>,
+  InstrItinData<LdStSync    , [InstrStage<8, [SLU]>]>,
+  InstrItinData<SprISYNC    , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprMFSR     , [InstrStage<3, [SRU]>]>,
+  InstrItinData<SprMTMSR    , [InstrStage<1, [SRU]>]>,
+  InstrItinData<SprMTSR     , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprTLBSYNC  , [InstrStage<8, [SRU]>]>,
+  InstrItinData<SprMFCR     , [InstrStage<1, [SRU]>]>,
+  InstrItinData<SprMFMSR    , [InstrStage<1, [SRU]>]>,
+  InstrItinData<SprMFSPR    , [InstrStage<3, [SRU]>]>,
+  InstrItinData<SprMFTB     , [InstrStage<1, [SRU]>]>,
+  InstrItinData<SprMTSPR    , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprMTSRIN   , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprRFI      , [InstrStage<2, [SRU]>]>,
+  InstrItinData<SprSC       , [InstrStage<2, [SRU]>]>,
+  InstrItinData<FPGeneral   , [InstrStage<1, [FPU1]>]>,
+  InstrItinData<FPCompare   , [InstrStage<1, [FPU1]>]>,
+  InstrItinData<FPDivD      , [InstrStage<31, [FPU1]>]>,
+  InstrItinData<FPDivS      , [InstrStage<17, [FPU1]>]>,
+  InstrItinData<FPFused     , [InstrStage<1, [FPU1]>]>,
+  InstrItinData<FPRes       , [InstrStage<10, [FPU1]>]>,
+  InstrItinData<VecGeneral  , [InstrStage<1, [VIU1]>]>,
+  InstrItinData<VecFP       , [InstrStage<4, [VFPU]>]>,
+  InstrItinData<VecFPCompare, [InstrStage<1, [VIU1]>]>,
+  InstrItinData<VecComplex  , [InstrStage<3, [VIU2]>]>,
+  InstrItinData<VecPerm     , [InstrStage<1, [VPU]>]>,
+  InstrItinData<VecFPRound  , [InstrStage<4, [VFPU]>]>,
+  InstrItinData<VecVSL      , [InstrStage<1, [VIU1]>]>,
+  InstrItinData<VecVSR      , [InstrStage<1, [VIU1]>]>
+]>;
diff --git a/final/lib/Target/PowerPC/PPCScheduleG4Plus.td b/final/lib/Target/PowerPC/PPCScheduleG4Plus.td
new file mode 100644
index 00000000000..00cac3c7cab
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCScheduleG4Plus.td
@@ -0,0 +1,80 @@
+//===- PPCScheduleG4Plus.td - PPC G4+ Scheduling Defs. -----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G4+ (7450) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def IU3    : FuncUnit; // integer unit 3 (7450 simple)
+def IU4    : FuncUnit; // integer unit 4 (7450 simple)
+
+def G4PlusItineraries : ProcessorItineraries<
+  [IU1, IU2, IU3, IU4, BPU, SLU, FPU1, VFPU, VIU1, VIU2, VPU], [], [
+  InstrItinData<IntGeneral  , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+  InstrItinData<IntCompare  , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+  InstrItinData<IntDivW     , [InstrStage<23, [IU2]>]>,
+  InstrItinData<IntMFFS     , [InstrStage<5, [FPU1]>]>,
+  InstrItinData<IntMFVSCR   , [InstrStage<2, [VFPU]>]>,
+  InstrItinData<IntMTFSB0   , [InstrStage<5, [FPU1]>]>,
+  InstrItinData<IntMulHW    , [InstrStage<4, [IU2]>]>,
+  InstrItinData<IntMulHWU   , [InstrStage<4, [IU2]>]>,
+  InstrItinData<IntMulLI    , [InstrStage<3, [IU2]>]>,
+  InstrItinData<IntRotate   , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+  InstrItinData<IntShift    , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
+  InstrItinData<IntTrapW    , [InstrStage<2, [IU1, IU2, IU3, IU4]>]>,
+  InstrItinData<BrB         , [InstrStage<1, [BPU]>]>,
+  InstrItinData<BrCR        , [InstrStage<2, [IU2]>]>,
+  InstrItinData<BrMCR       , [InstrStage<2, [IU2]>]>,
+  InstrItinData<BrMCRX      , [InstrStage<2, [IU2]>]>,
+  InstrItinData<LdStDCBF    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStDCBI    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStDSS     , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStICBI    , [InstrStage<3, [IU2]>]>,
+  InstrItinData<LdStUX      , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStLFD     , [InstrStage<4, [SLU]>]>,
+  InstrItinData<LdStLFDU    , [InstrStage<4, [SLU]>]>,
+  InstrItinData<LdStLHA     , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStLMW     , [InstrStage<37, [SLU]>]>,
+  InstrItinData<LdStLVecX   , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStLWA     , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStLWARX   , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStSTD     , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStSTDCX   , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStSTVEBX  , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStSTWCX   , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStSync    , [InstrStage<35, [SLU]>]>,
+  InstrItinData<SprISYNC    , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
+  InstrItinData<SprMFSR     , [InstrStage<4, [IU2]>]>,
+  InstrItinData<SprMTMSR    , [InstrStage<2, [IU2]>]>,
+  InstrItinData<SprMTSR     , [InstrStage<2, [IU2]>]>,
+  InstrItinData<SprTLBSYNC  , [InstrStage<3, [SLU]>]>,
+  InstrItinData<SprMFCR     , [InstrStage<2, [IU2]>]>,
+  InstrItinData<SprMFMSR    , [InstrStage<3, [IU2]>]>,
+  InstrItinData<SprMFSPR    , [InstrStage<4, [IU2]>]>,
+  InstrItinData<SprMFTB     , [InstrStage<5, [IU2]>]>,
+  InstrItinData<SprMTSPR    , [InstrStage<2, [IU2]>]>,
+  InstrItinData<SprMTSRIN   , [InstrStage<2, [IU2]>]>,
+  InstrItinData<SprRFI      , [InstrStage<1, [IU1, IU2, IU3, IU4]>]>,
+  InstrItinData<SprSC       , [InstrStage<0, [IU1, IU2, IU3, IU4]>]>,
+  InstrItinData<FPGeneral   , [InstrStage<5, [FPU1]>]>,
+  InstrItinData<FPCompare   , [InstrStage<5, [FPU1]>]>,
+  InstrItinData<FPDivD      , [InstrStage<35, [FPU1]>]>,
+  InstrItinData<FPDivS      , [InstrStage<21, [FPU1]>]>,
+  InstrItinData<FPFused     , [InstrStage<5, [FPU1]>]>,
+  InstrItinData<FPRes       , [InstrStage<14, [FPU1]>]>,
+  InstrItinData<VecGeneral  , [InstrStage<1, [VIU1]>]>,
+  InstrItinData<VecFP       , [InstrStage<4, [VFPU]>]>,
+  InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
+  InstrItinData<VecComplex  , [InstrStage<4, [VIU2]>]>,
+  InstrItinData<VecPerm     , [InstrStage<2, [VPU]>]>,
+  InstrItinData<VecFPRound  , [InstrStage<4, [VIU1]>]>,
+  InstrItinData<VecVSL      , [InstrStage<2, [VPU]>]>,
+  InstrItinData<VecVSR      , [InstrStage<2, [VPU]>]>
+]>;
diff --git a/final/lib/Target/PowerPC/PPCScheduleG5.td b/final/lib/Target/PowerPC/PPCScheduleG5.td
new file mode 100644
index 00000000000..1671f22b30a
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCScheduleG5.td
@@ -0,0 +1,84 @@
+//===- PPCScheduleG5.td - PPC G5 Scheduling Definitions ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the G5 (970) processor.
+//
+//===----------------------------------------------------------------------===//
+
+def G5Itineraries : ProcessorItineraries<
+  [IU1, IU2, SLU, BPU, FPU1, FPU2, VFPU, VIU1, VIU2, VPU], [], [
+  InstrItinData<IntGeneral  , [InstrStage<2, [IU1, IU2]>]>,
+  InstrItinData<IntCompare  , [InstrStage<3, [IU1, IU2]>]>,
+  InstrItinData<IntDivD     , [InstrStage<68, [IU1]>]>,
+  InstrItinData<IntDivW     , [InstrStage<36, [IU1]>]>,
+  InstrItinData<IntMFFS     , [InstrStage<6, [IU2]>]>,
+  InstrItinData<IntMFVSCR   , [InstrStage<1, [VFPU]>]>,
+  InstrItinData<IntMTFSB0   , [InstrStage<6, [FPU1, FPU2]>]>,
+  InstrItinData<IntMulHD    , [InstrStage<7, [IU1, IU2]>]>,
+  InstrItinData<IntMulHW    , [InstrStage<5, [IU1, IU2]>]>,
+  InstrItinData<IntMulHWU   , [InstrStage<5, [IU1, IU2]>]>,
+  InstrItinData<IntMulLI    , [InstrStage<4, [IU1, IU2]>]>,
+  InstrItinData<IntRFID     , [InstrStage<1, [IU2]>]>,
+  InstrItinData<IntRotateD  , [InstrStage<2, [IU1, IU2]>]>,
+  InstrItinData<IntRotate   , [InstrStage<4, [IU1, IU2]>]>,
+  InstrItinData<IntShift    , [InstrStage<2, [IU1, IU2]>]>,
+  InstrItinData<IntTrapD    , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<IntTrapW    , [InstrStage<1, [IU1, IU2]>]>,
+  InstrItinData<BrB         , [InstrStage<1, [BPU]>]>,
+  InstrItinData<BrCR        , [InstrStage<4, [BPU]>]>,
+  InstrItinData<BrMCR       , [InstrStage<2, [BPU]>]>,
+  InstrItinData<BrMCRX      , [InstrStage<3, [BPU]>]>,
+  InstrItinData<LdStDCBF    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStGeneral , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStDSS     , [InstrStage<10, [SLU]>]>,
+  InstrItinData<LdStICBI    , [InstrStage<40, [SLU]>]>,
+  InstrItinData<LdStUX      , [InstrStage<4, [SLU]>]>,
+  InstrItinData<LdStLD      , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStLDARX   , [InstrStage<11, [SLU]>]>,
+  InstrItinData<LdStLFD     , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStLFDU    , [InstrStage<5, [SLU]>]>,
+  InstrItinData<LdStLHA     , [InstrStage<5, [SLU]>]>,
+  InstrItinData<LdStLMW     , [InstrStage<64, [SLU]>]>,
+  InstrItinData<LdStLVecX   , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStLWA     , [InstrStage<5, [SLU]>]>,
+  InstrItinData<LdStLWARX   , [InstrStage<11, [SLU]>]>,
+  InstrItinData<LdStSLBIA   , [InstrStage<40, [SLU]>]>, // needs work
+  InstrItinData<LdStSLBIE   , [InstrStage<2, [SLU]>]>,
+  InstrItinData<LdStSTD     , [InstrStage<3, [SLU]>]>,
+  InstrItinData<LdStSTDCX   , [InstrStage<11, [SLU]>]>,
+  InstrItinData<LdStSTVEBX  , [InstrStage<5, [SLU]>]>,
+  InstrItinData<LdStSTWCX   , [InstrStage<11, [SLU]>]>,
+  InstrItinData<LdStSync    , [InstrStage<35, [SLU]>]>,
+  InstrItinData<SprISYNC    , [InstrStage<40, [SLU]>]>, // needs work
+  InstrItinData<SprMFSR     , [InstrStage<3, [SLU]>]>,
+  InstrItinData<SprMTMSR    , [InstrStage<3, [SLU]>]>,
+  InstrItinData<SprMTSR     , [InstrStage<3, [SLU]>]>,
+  InstrItinData<SprTLBSYNC  , [InstrStage<3, [SLU]>]>,
+  InstrItinData<SprMFCR     , [InstrStage<2, [IU2]>]>,
+  InstrItinData<SprMFMSR    , [InstrStage<3, [IU2]>]>,
+  InstrItinData<SprMFSPR    , [InstrStage<3, [IU2]>]>,
+  InstrItinData<SprMFTB     , [InstrStage<10, [IU2]>]>,
+  InstrItinData<SprMTSPR    , [InstrStage<8, [IU2]>]>,
+  InstrItinData<SprSC       , [InstrStage<1, [IU2]>]>,
+  InstrItinData<FPGeneral   , [InstrStage<6, [FPU1, FPU2]>]>,
+  InstrItinData<FPCompare   , [InstrStage<8, [FPU1, FPU2]>]>,
+  InstrItinData<FPDivD      , [InstrStage<33, [FPU1, FPU2]>]>,
+  InstrItinData<FPDivS      , [InstrStage<33, [FPU1, FPU2]>]>,
+  InstrItinData<FPFused     , [InstrStage<6, [FPU1, FPU2]>]>,
+  InstrItinData<FPRes       , [InstrStage<6, [FPU1, FPU2]>]>,
+  InstrItinData<FPSqrt      , [InstrStage<40, [FPU1, FPU2]>]>,
+  InstrItinData<VecGeneral  , [InstrStage<2, [VIU1]>]>,
+  InstrItinData<VecFP       , [InstrStage<8, [VFPU]>]>,
+  InstrItinData<VecFPCompare, [InstrStage<2, [VFPU]>]>,
+  InstrItinData<VecComplex  , [InstrStage<5, [VIU2]>]>,
+  InstrItinData<VecPerm     , [InstrStage<3, [VPU]>]>,
+  InstrItinData<VecFPRound  , [InstrStage<8, [VFPU]>]>,
+  InstrItinData<VecVSL      , [InstrStage<2, [VIU1]>]>,
+  InstrItinData<VecVSR      , [InstrStage<3, [VPU]>]>
+]>;
diff --git a/final/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/final/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
new file mode 100644
index 00000000000..d4258b4a0eb
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- PPCSelectionDAGInfo.cpp - PowerPC SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPCSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "powerpc-selectiondag-info"
+#include "PPCTargetMachine.h"
+using namespace llvm;
+
+PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {
+}
diff --git a/final/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/final/lib/Target/PowerPC/PPCSelectionDAGInfo.h
new file mode 100644
index 00000000000..341b69cdfb5
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- PPCSelectionDAGInfo.h - PowerPC SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCCSELECTIONDAGINFO_H
+#define POWERPCCSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class PPCTargetMachine;
+
+class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM);
+  ~PPCSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/PowerPC/PPCSubtarget.cpp b/final/lib/Target/PowerPC/PPCSubtarget.cpp
new file mode 100644
index 00000000000..72a1deeced4
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -0,0 +1,142 @@
+//===- PowerPCSubtarget.cpp - PPC Subtarget Information -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PPC specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCSubtarget.h"
+#include "PPC.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetMachine.h"
+#include "PPCGenSubtarget.inc"
+#include <cstdlib>
+using namespace llvm;
+
+#if defined(__APPLE__)
+#include <mach/mach.h>
+#include <mach/mach_host.h>
+#include <mach/host_info.h>
+#include <mach/machine.h>
+
+/// GetCurrentPowerPCFeatures - Returns the current CPUs features.
+static const char *GetCurrentPowerPCCPU() {
+  host_basic_info_data_t hostInfo;
+  mach_msg_type_number_t infoCount;
+
+  infoCount = HOST_BASIC_INFO_COUNT;
+  host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, 
+            &infoCount);
+            
+  if (hostInfo.cpu_type != CPU_TYPE_POWERPC) return "generic";
+
+  switch(hostInfo.cpu_subtype) {
+  case CPU_SUBTYPE_POWERPC_601:   return "601";
+  case CPU_SUBTYPE_POWERPC_602:   return "602";
+  case CPU_SUBTYPE_POWERPC_603:   return "603";
+  case CPU_SUBTYPE_POWERPC_603e:  return "603e";
+  case CPU_SUBTYPE_POWERPC_603ev: return "603ev";
+  case CPU_SUBTYPE_POWERPC_604:   return "604";
+  case CPU_SUBTYPE_POWERPC_604e:  return "604e";
+  case CPU_SUBTYPE_POWERPC_620:   return "620";
+  case CPU_SUBTYPE_POWERPC_750:   return "750";
+  case CPU_SUBTYPE_POWERPC_7400:  return "7400";
+  case CPU_SUBTYPE_POWERPC_7450:  return "7450";
+  case CPU_SUBTYPE_POWERPC_970:   return "970";
+  default: ;
+  }
+  
+  return "generic";
+}
+#endif
+
+
+PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS,
+                           bool is64Bit)
+  : StackAlignment(16)
+  , DarwinDirective(PPC::DIR_NONE)
+  , IsGigaProcessor(false)
+  , Has64BitSupport(false)
+  , Use64BitRegs(false)
+  , IsPPC64(is64Bit)
+  , HasAltivec(false)
+  , HasFSQRT(false)
+  , HasSTFIWX(false)
+  , HasLazyResolverStubs(false)
+  , IsJITCodeModel(false)
+  , DarwinVers(0) {
+
+  // Determine default and user specified characteristics
+  std::string CPU = "generic";
+#if defined(__APPLE__)
+  CPU = GetCurrentPowerPCCPU();
+#endif
+
+  // Parse features string.
+  ParseSubtargetFeatures(FS, CPU);
+
+  // If we are generating code for ppc64, verify that options make sense.
+  if (is64Bit) {
+    Has64BitSupport = true;
+    // Silently force 64-bit register use on ppc64.
+    Use64BitRegs = true;
+  }
+  
+  // If the user requested use of 64-bit regs, but the cpu selected doesn't
+  // support it, ignore.
+  if (use64BitRegs() && !has64BitSupport())
+    Use64BitRegs = false;
+  
+  // Set the boolean corresponding to the current target triple, or the default
+  // if one cannot be determined, to true.
+  if (TT.length() > 7) {
+    // Determine which version of darwin this is.
+    size_t DarwinPos = TT.find("-darwin");
+    if (DarwinPos != std::string::npos) {
+      if (isdigit(TT[DarwinPos+7]))
+        DarwinVers = atoi(&TT[DarwinPos+7]);
+      else
+        DarwinVers = 8;  // Minimum supported darwin is Tiger.
+    }
+  }
+
+  // Set up darwin-specific properties.
+  if (isDarwin())
+    HasLazyResolverStubs = true;
+}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void PPCSubtarget::SetJITMode() {
+  // JIT mode doesn't want lazy resolver stubs, it knows exactly where
+  // everything is.  This matters for PPC64, which codegens in PIC mode without
+  // stubs.
+  HasLazyResolverStubs = false;
+
+  // Calls to external functions need to use indirect calls
+  IsJITCodeModel = true;
+}
+
+
+/// hasLazyResolverStub - Return true if accesses to the specified global have
+/// to go through a dyld lazy resolution stub.  This means that an extra load
+/// is required to get the address of the global.
+bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
+                                       const TargetMachine &TM) const {
+  // We never have stubs if HasLazyResolverStubs=false or if in static mode.
+  if (!HasLazyResolverStubs || TM.getRelocationModel() == Reloc::Static)
+    return false;
+  // If symbol visibility is hidden, the extra load is not needed if
+  // the symbol is definitely defined in the current translation unit.
+  bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+  if (GV->hasHiddenVisibility() && !isDecl && !GV->hasCommonLinkage())
+    return false;
+  return GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
+         GV->hasCommonLinkage() || isDecl;
+}
diff --git a/final/lib/Target/PowerPC/PPCSubtarget.h b/final/lib/Target/PowerPC/PPCSubtarget.h
new file mode 100644
index 00000000000..00ec7474c9e
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCSubtarget.h
@@ -0,0 +1,151 @@
+//=====-- PPCSubtarget.h - Define Subtarget for the PPC -------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef POWERPCSUBTARGET_H
+#define POWERPCSUBTARGET_H
+
+#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+// GCC #defines PPC on Linux but we use it as our namespace name
+#undef PPC
+
+namespace llvm {
+
+namespace PPC {
+  // -m directive values.
+  enum {
+    DIR_NONE,
+    DIR_32,
+    DIR_601, 
+    DIR_602, 
+    DIR_603, 
+    DIR_7400,
+    DIR_750, 
+    DIR_970, 
+    DIR_64  
+  };
+}
+
+class GlobalValue;
+class TargetMachine;
+  
+class PPCSubtarget : public TargetSubtarget {
+protected:
+  /// stackAlignment - The minimum alignment known to hold of the stack frame on
+  /// entry to the function and which must be maintained by every function.
+  unsigned StackAlignment;
+  
+  /// Selected instruction itineraries (one entry per itinerary class.)
+  InstrItineraryData InstrItins;
+  
+  /// Which cpu directive was used.
+  unsigned DarwinDirective;
+
+  /// Used by the ISel to turn in optimizations for POWER4-derived architectures
+  bool IsGigaProcessor;
+  bool Has64BitSupport;
+  bool Use64BitRegs;
+  bool IsPPC64;
+  bool HasAltivec;
+  bool HasFSQRT;
+  bool HasSTFIWX;
+  bool HasLazyResolverStubs;
+  bool IsJITCodeModel;
+  
+  /// DarwinVers - Nonzero if this is a darwin platform.  Otherwise, the numeric
+  /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
+  unsigned char DarwinVers; // Is any darwin-ppc platform.
+public:
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  PPCSubtarget(const std::string &TT, const std::string &FS, bool is64Bit);
+  
+  /// ParseSubtargetFeatures - Parses features string setting specified 
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  std::string ParseSubtargetFeatures(const std::string &FS,
+                                     const std::string &CPU);
+
+  
+  /// SetJITMode - This is called to inform the subtarget info that we are
+  /// producing code for the JIT.
+  void SetJITMode();
+
+  /// getStackAlignment - Returns the minimum alignment known to hold of the
+  /// stack frame on entry to the function and which must be maintained by every
+  /// function for this subtarget.
+  unsigned getStackAlignment() const { return StackAlignment; }
+  
+  /// getDarwinDirective - Returns the -m directive specified for the cpu.
+  ///
+  unsigned getDarwinDirective() const { return DarwinDirective; }
+  
+  /// getInstrItins - Return the instruction itineraies based on subtarget 
+  /// selection.
+  const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
+  /// getTargetDataString - Return the pointer size and type alignment
+  /// properties of this subtarget.
+  const char *getTargetDataString() const {
+    // Note, the alignment values for f64 and i64 on ppc64 in Darwin
+    // documentation are wrong; these are correct (i.e. "what gcc does").
+    return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64"
+                     : "E-p:32:32-f64:32:64-i64:32:64-f128:64:128-n32";
+  }
+
+  /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
+  ///
+  bool isPPC64() const { return IsPPC64; }
+  
+  /// has64BitSupport - Return true if the selected CPU supports 64-bit
+  /// instructions, regardless of whether we are in 32-bit or 64-bit mode.
+  bool has64BitSupport() const { return Has64BitSupport; }
+  
+  /// use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit
+  /// registers in 32-bit mode when possible.  This can only true if
+  /// has64BitSupport() returns true.
+  bool use64BitRegs() const { return Use64BitRegs; }
+  
+  /// hasLazyResolverStub - Return true if accesses to the specified global have
+  /// to go through a dyld lazy resolution stub.  This means that an extra load
+  /// is required to get the address of the global.
+  bool hasLazyResolverStub(const GlobalValue *GV, 
+                           const TargetMachine &TM) const;
+  
+  // isJITCodeModel - True if we're generating code for the JIT
+  bool isJITCodeModel() const { return IsJITCodeModel; }
+
+  // Specific obvious features.
+  bool hasFSQRT() const { return HasFSQRT; }
+  bool hasSTFIWX() const { return HasSTFIWX; }
+  bool hasAltivec() const { return HasAltivec; }
+  bool isGigaProcessor() const { return IsGigaProcessor; }
+
+  /// isDarwin - True if this is any darwin platform.
+  bool isDarwin() const { return DarwinVers != 0; }
+  /// isDarwin - True if this is darwin9 (leopard, 10.5) or above.
+  bool isDarwin9() const { return DarwinVers >= 9; }
+
+  /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
+  unsigned getDarwinVers() const { return DarwinVers; }
+
+  bool isDarwinABI() const { return isDarwin(); }
+  bool isSVR4ABI() const { return !isDarwin(); }
+
+};
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/PowerPC/PPCTargetMachine.cpp b/final/lib/Target/PowerPC/PPCTargetMachine.cpp
new file mode 100644
index 00000000000..212b450e7db
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -0,0 +1,147 @@
+//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Top-level implementation for the PowerPC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "PPCMCAsmInfo.h"
+#include "PPCTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+  bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
+  if (TheTriple.getOS() == Triple::Darwin)
+    return new PPCMCAsmInfoDarwin(isPPC64);
+  return new PPCLinuxMCAsmInfo(isPPC64);
+  
+}
+
+// This is duplicated code. Refactor this.
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+                                    MCContext &Ctx, TargetAsmBackend &TAB,
+                                    raw_ostream &OS,
+                                    MCCodeEmitter *Emitter,
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
+  switch (Triple(TT).getOS()) {
+  case Triple::Darwin:
+    return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll);
+  default:
+    return NULL;
+  }
+}
+
+extern "C" void LLVMInitializePowerPCTarget() {
+  // Register the targets
+  RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);  
+  RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
+  
+  RegisterAsmInfoFn C(ThePPC32Target, createMCAsmInfo);
+  RegisterAsmInfoFn D(ThePPC64Target, createMCAsmInfo);
+  
+  // Register the MC Code Emitter
+  TargetRegistry::RegisterCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
+  TargetRegistry::RegisterCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
+  
+  
+  // Register the asm backend.
+  TargetRegistry::RegisterAsmBackend(ThePPC32Target, createPPCAsmBackend);
+  TargetRegistry::RegisterAsmBackend(ThePPC64Target, createPPCAsmBackend);
+  
+  // Register the object streamer.
+  TargetRegistry::RegisterObjectStreamer(ThePPC32Target, createMCStreamer);
+  TargetRegistry::RegisterObjectStreamer(ThePPC64Target, createMCStreamer);
+}
+
+
+PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &FS, bool is64Bit)
+  : LLVMTargetMachine(T, TT),
+    Subtarget(TT, FS, is64Bit),
+    DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
+    FrameLowering(Subtarget), JITInfo(*this, is64Bit),
+    TLInfo(*this), TSInfo(*this),
+    InstrItins(Subtarget.getInstrItineraryData()) {
+
+  if (getRelocationModel() == Reloc::Default) {
+    if (Subtarget.isDarwin())
+      setRelocationModel(Reloc::DynamicNoPIC);
+    else
+      setRelocationModel(Reloc::Static);
+  }
+}
+
+/// Override this for PowerPC.  Tail merging happily breaks up instruction issue
+/// groups, which typically degrades performance.
+bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
+
+PPC32TargetMachine::PPC32TargetMachine(const Target &T, const std::string &TT, 
+                                       const std::string &FS) 
+  : PPCTargetMachine(T, TT, FS, false) {
+}
+
+
+PPC64TargetMachine::PPC64TargetMachine(const Target &T, const std::string &TT, 
+                                       const std::string &FS)
+  : PPCTargetMachine(T, TT, FS, true) {
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool PPCTargetMachine::addInstSelector(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
+  // Install an instruction selector.
+  PM.add(createPPCISelDag(*this));
+  return false;
+}
+
+bool PPCTargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel) {
+  // Must run branch selection immediately preceding the asm printer.
+  PM.add(createPPCBranchSelectionPass());
+  return false;
+}
+
+bool PPCTargetMachine::addCodeEmitter(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel,
+                                      JITCodeEmitter &JCE) {
+  // The JIT should use the static relocation model in ppc32 mode, PIC in ppc64.
+  // FIXME: This should be moved to TargetJITInfo!!
+  if (Subtarget.isPPC64()) {
+    // We use PIC codegen in ppc64 mode, because otherwise we'd have to use many
+    // instructions to materialize arbitrary global variable + function +
+    // constant pool addresses.
+    setRelocationModel(Reloc::PIC_);
+    // Temporary workaround for the inability of PPC64 JIT to handle jump
+    // tables.
+    DisableJumpTables = true;      
+  } else {
+    setRelocationModel(Reloc::Static);
+  }
+  
+  // Inform the subtarget that we are in JIT mode.  FIXME: does this break macho
+  // writing?
+  Subtarget.SetJITMode();
+  
+  // Machine code emitter pass for PowerPC.
+  PM.add(createPPCJITCodeEmitterPass(*this, JCE));
+
+  return false;
+}
diff --git a/final/lib/Target/PowerPC/PPCTargetMachine.h b/final/lib/Target/PowerPC/PPCTargetMachine.h
new file mode 100644
index 00000000000..2d2498943a2
--- /dev/null
+++ b/final/lib/Target/PowerPC/PPCTargetMachine.h
@@ -0,0 +1,93 @@
+//===-- PPCTargetMachine.h - Define TargetMachine for PowerPC -----*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PowerPC specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPC_TARGETMACHINE_H
+#define PPC_TARGETMACHINE_H
+
+#include "PPCFrameLowering.h"
+#include "PPCSubtarget.h"
+#include "PPCJITInfo.h"
+#include "PPCInstrInfo.h"
+#include "PPCISelLowering.h"
+#include "PPCSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+
+namespace llvm {
+class PassManager;
+class GlobalValue;
+
+/// PPCTargetMachine - Common code between 32-bit and 64-bit PowerPC targets.
+///
+class PPCTargetMachine : public LLVMTargetMachine {
+  PPCSubtarget        Subtarget;
+  const TargetData    DataLayout;       // Calculates type size & alignment
+  PPCInstrInfo        InstrInfo;
+  PPCFrameLowering    FrameLowering;
+  PPCJITInfo          JITInfo;
+  PPCTargetLowering   TLInfo;
+  PPCSelectionDAGInfo TSInfo;
+  InstrItineraryData  InstrItins;
+
+public:
+  PPCTargetMachine(const Target &T, const std::string &TT,
+                   const std::string &FS, bool is64Bit);
+
+  virtual const PPCInstrInfo      *getInstrInfo() const { return &InstrInfo; }
+  virtual const PPCFrameLowering  *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual       PPCJITInfo        *getJITInfo()         { return &JITInfo; }
+  virtual const PPCTargetLowering *getTargetLowering() const { 
+   return &TLInfo;
+  }
+  virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+  virtual const PPCRegisterInfo   *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+  
+  virtual const TargetData    *getTargetData() const    { return &DataLayout; }
+  virtual const PPCSubtarget  *getSubtargetImpl() const { return &Subtarget; }
+  virtual const InstrItineraryData *getInstrItineraryData() const {  
+    return &InstrItins;
+  }
+
+  // Pass Pipeline Configuration
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+                              JITCodeEmitter &JCE);
+  virtual bool getEnableTailMergeDefault() const;
+};
+
+/// PPC32TargetMachine - PowerPC 32-bit target machine.
+///
+class PPC32TargetMachine : public PPCTargetMachine {
+public:
+  PPC32TargetMachine(const Target &T, const std::string &TT,
+                     const std::string &FS);
+};
+
+/// PPC64TargetMachine - PowerPC 64-bit target machine.
+///
+class PPC64TargetMachine : public PPCTargetMachine {
+public:
+  PPC64TargetMachine(const Target &T, const std::string &TT,
+                     const std::string &FS);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/PowerPC/README.txt b/final/lib/Target/PowerPC/README.txt
new file mode 100644
index 00000000000..349cd890d5e
--- /dev/null
+++ b/final/lib/Target/PowerPC/README.txt
@@ -0,0 +1,939 @@
+//===- README.txt - Notes for improving PowerPC-specific code gen ---------===//
+
+TODO:
+* gpr0 allocation
+* implement do-loop -> bdnz transform
+* lmw/stmw pass a la arm load store optimizer for prolog/epilog
+
+===-------------------------------------------------------------------------===
+
+On PPC64, this:
+
+long f2 (long x) { return 0xfffffff000000000UL; }
+long f3 (long x) { return 0x1ffffffffUL; }
+
+could compile into:
+
+_f2:
+	li r3,-1
+	rldicr r3,r3,0,27
+	blr
+_f3:
+	li r3,-1
+	rldicl r3,r3,0,31
+	blr
+
+we produce:
+
+_f2:
+	lis r2, 4095
+	ori r2, r2, 65535
+	sldi r3, r2, 36
+	blr 
+_f3:
+	li r2, 1
+	sldi r2, r2, 32
+	oris r2, r2, 65535
+	ori r3, r2, 65535
+	blr 
+
+===-------------------------------------------------------------------------===
+
+This code:
+
+unsigned add32carry(unsigned sum, unsigned x) {
+ unsigned z = sum + x;
+ if (sum + x < x)
+     z++;
+ return z;
+}
+
+Should compile to something like:
+
+	addc r3,r3,r4
+	addze r3,r3
+
+instead we get:
+
+	add r3, r4, r3
+	cmplw cr7, r3, r4
+	mfcr r4 ; 1
+	rlwinm r4, r4, 29, 31, 31
+	add r3, r3, r4
+
+Ick.
+
+===-------------------------------------------------------------------------===
+
+Support 'update' load/store instructions.  These are cracked on the G5, but are
+still a codesize win.
+
+With preinc enabled, this:
+
+long *%test4(long *%X, long *%dest) {
+        %Y = getelementptr long* %X, int 4
+        %A = load long* %Y
+        store long %A, long* %dest
+        ret long* %Y
+}
+
+compiles to:
+
+_test4:
+        mr r2, r3
+        lwzu r5, 32(r2)
+        lwz r3, 36(r3)
+        stw r5, 0(r4)
+        stw r3, 4(r4)
+        mr r3, r2
+        blr 
+
+with -sched=list-burr, I get:
+
+_test4:
+        lwz r2, 36(r3)
+        lwzu r5, 32(r3)
+        stw r2, 4(r4)
+        stw r5, 0(r4)
+        blr 
+
+===-------------------------------------------------------------------------===
+
+We compile the hottest inner loop of viterbi to:
+
+        li r6, 0
+        b LBB1_84       ;bb432.i
+LBB1_83:        ;bb420.i
+        lbzx r8, r5, r7
+        addi r6, r7, 1
+        stbx r8, r4, r7
+LBB1_84:        ;bb432.i
+        mr r7, r6
+        cmplwi cr0, r7, 143
+        bne cr0, LBB1_83        ;bb420.i
+
+The CBE manages to produce:
+
+	li r0, 143
+	mtctr r0
+loop:
+	lbzx r2, r2, r11
+	stbx r0, r2, r9
+	addi r2, r2, 1
+	bdz later
+	b loop
+
+This could be much better (bdnz instead of bdz) but it still beats us.  If we
+produced this with bdnz, the loop would be a single dispatch group.
+
+===-------------------------------------------------------------------------===
+
+Compile:
+
+void foo(int *P) {
+ if (P)  *P = 0;
+}
+
+into:
+
+_foo:
+        cmpwi cr0,r3,0
+        beqlr cr0
+        li r0,0
+        stw r0,0(r3)
+        blr
+
+This is effectively a simple form of predication.
+
+===-------------------------------------------------------------------------===
+
+Lump the constant pool for each function into ONE pic object, and reference
+pieces of it as offsets from the start.  For functions like this (contrived
+to have lots of constants obviously):
+
+double X(double Y) { return (Y*1.23 + 4.512)*2.34 + 14.38; }
+
+We generate:
+
+_X:
+        lis r2, ha16(.CPI_X_0)
+        lfd f0, lo16(.CPI_X_0)(r2)
+        lis r2, ha16(.CPI_X_1)
+        lfd f2, lo16(.CPI_X_1)(r2)
+        fmadd f0, f1, f0, f2
+        lis r2, ha16(.CPI_X_2)
+        lfd f1, lo16(.CPI_X_2)(r2)
+        lis r2, ha16(.CPI_X_3)
+        lfd f2, lo16(.CPI_X_3)(r2)
+        fmadd f1, f0, f1, f2
+        blr
+
+It would be better to materialize .CPI_X into a register, then use immediates
+off of the register to avoid the lis's.  This is even more important in PIC 
+mode.
+
+Note that this (and the static variable version) is discussed here for GCC:
+http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
+
+Here's another example (the sgn function):
+double testf(double a) {
+       return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0);
+}
+
+it produces a BB like this:
+LBB1_1: ; cond_true
+        lis r2, ha16(LCPI1_0)
+        lfs f0, lo16(LCPI1_0)(r2)
+        lis r2, ha16(LCPI1_1)
+        lis r3, ha16(LCPI1_2)
+        lfs f2, lo16(LCPI1_2)(r3)
+        lfs f3, lo16(LCPI1_1)(r2)
+        fsub f0, f0, f1
+        fsel f1, f0, f2, f3
+        blr 
+
+===-------------------------------------------------------------------------===
+
+PIC Code Gen IPO optimization:
+
+Squish small scalar globals together into a single global struct, allowing the 
+address of the struct to be CSE'd, avoiding PIC accesses (also reduces the size
+of the GOT on targets with one).
+
+Note that this is discussed here for GCC:
+http://gcc.gnu.org/ml/gcc-patches/2006-02/msg00133.html
+
+===-------------------------------------------------------------------------===
+
+Implement Newton-Rhapson method for improving estimate instructions to the
+correct accuracy, and implementing divide as multiply by reciprocal when it has
+more than one use.  Itanium would want this too.
+
+===-------------------------------------------------------------------------===
+
+Compile offsets from allocas:
+
+int *%test() {
+        %X = alloca { int, int }
+        %Y = getelementptr {int,int}* %X, int 0, uint 1
+        ret int* %Y
+}
+
+into a single add, not two:
+
+_test:
+        addi r2, r1, -8
+        addi r3, r2, 4
+        blr
+
+--> important for C++.
+
+===-------------------------------------------------------------------------===
+
+No loads or stores of the constants should be needed:
+
+struct foo { double X, Y; };
+void xxx(struct foo F);
+void bar() { struct foo R = { 1.0, 2.0 }; xxx(R); }
+
+===-------------------------------------------------------------------------===
+
+Darwin Stub removal:
+
+We still generate calls to foo$stub, and stubs, on Darwin.  This is not
+necessary when building with the Leopard (10.5) or later linker, as stubs are
+generated by ld when necessary.  Parameterizing this based on the deployment
+target (-mmacosx-version-min) is probably enough.  x86-32 does this right, see
+its logic.
+
+===-------------------------------------------------------------------------===
+
+Darwin Stub LICM optimization:
+
+Loops like this:
+  
+  for (...)  bar();
+
+Have to go through an indirect stub if bar is external or linkonce.  It would 
+be better to compile it as:
+
+     fp = &bar;
+     for (...)  fp();
+
+which only computes the address of bar once (instead of each time through the 
+stub).  This is Darwin specific and would have to be done in the code generator.
+Probably not a win on x86.
+
+===-------------------------------------------------------------------------===
+
+Simple IPO for argument passing, change:
+  void foo(int X, double Y, int Z) -> void foo(int X, int Z, double Y)
+
+the Darwin ABI specifies that any integer arguments in the first 32 bytes worth
+of arguments get assigned to r3 through r10. That is, if you have a function
+foo(int, double, int) you get r3, f1, r6, since the 64 bit double ate up the
+argument bytes for r4 and r5. The trick then would be to shuffle the argument
+order for functions we can internalize so that the maximum number of 
+integers/pointers get passed in regs before you see any of the fp arguments.
+
+Instead of implementing this, it would actually probably be easier to just 
+implement a PPC fastcc, where we could do whatever we wanted to the CC, 
+including having this work sanely.
+
+===-------------------------------------------------------------------------===
+
+Fix Darwin FP-In-Integer Registers ABI
+
+Darwin passes doubles in structures in integer registers, which is very very 
+bad.  Add something like a BITCAST to LLVM, then do an i-p transformation that
+percolates these things out of functions.
+
+Check out how horrible this is:
+http://gcc.gnu.org/ml/gcc/2005-10/msg01036.html
+
+This is an extension of "interprocedural CC unmunging" that can't be done with
+just fastcc.
+
+===-------------------------------------------------------------------------===
+
+Compile this:
+
+int foo(int a) {
+  int b = (a < 8);
+  if (b) {
+    return b * 3;     // ignore the fact that this is always 3.
+  } else {
+    return 2;
+  }
+}
+
+into something not this:
+
+_foo:
+1)      cmpwi cr7, r3, 8
+        mfcr r2, 1
+        rlwinm r2, r2, 29, 31, 31
+1)      cmpwi cr0, r3, 7
+        bgt cr0, LBB1_2 ; UnifiedReturnBlock
+LBB1_1: ; then
+        rlwinm r2, r2, 0, 31, 31
+        mulli r3, r2, 3
+        blr
+LBB1_2: ; UnifiedReturnBlock
+        li r3, 2
+        blr
+
+In particular, the two compares (marked 1) could be shared by reversing one.
+This could be done in the dag combiner, by swapping a BR_CC when a SETCC of the
+same operands (but backwards) exists.  In this case, this wouldn't save us 
+anything though, because the compares still wouldn't be shared.
+
+===-------------------------------------------------------------------------===
+
+We should custom expand setcc instead of pretending that we have it.  That
+would allow us to expose the access of the crbit after the mfcr, allowing
+that access to be trivially folded into other ops.  A simple example:
+
+int foo(int a, int b) { return (a < b) << 4; }
+
+compiles into:
+
+_foo:
+        cmpw cr7, r3, r4
+        mfcr r2, 1
+        rlwinm r2, r2, 29, 31, 31
+        slwi r3, r2, 4
+        blr
+
+===-------------------------------------------------------------------------===
+
+Fold add and sub with constant into non-extern, non-weak addresses so this:
+
+static int a;
+void bar(int b) { a = b; }
+void foo(unsigned char *c) {
+  *c = a;
+}
+
+So that 
+
+_foo:
+        lis r2, ha16(_a)
+        la r2, lo16(_a)(r2)
+        lbz r2, 3(r2)
+        stb r2, 0(r3)
+        blr
+
+Becomes
+
+_foo:
+        lis r2, ha16(_a+3)
+        lbz r2, lo16(_a+3)(r2)
+        stb r2, 0(r3)
+        blr
+
+===-------------------------------------------------------------------------===
+
+We generate really bad code for this:
+
+int f(signed char *a, _Bool b, _Bool c) {
+   signed char t = 0;
+  if (b)  t = *a;
+  if (c)  *a = t;
+}
+
+===-------------------------------------------------------------------------===
+
+This:
+int test(unsigned *P) { return *P >> 24; }
+
+Should compile to:
+
+_test:
+        lbz r3,0(r3)
+        blr
+
+not:
+
+_test:
+        lwz r2, 0(r3)
+        srwi r3, r2, 24
+        blr
+
+===-------------------------------------------------------------------------===
+
+On the G5, logical CR operations are more expensive in their three
+address form: ops that read/write the same register are half as expensive as
+those that read from two registers that are different from their destination.
+
+We should model this with two separate instructions.  The isel should generate
+the "two address" form of the instructions.  When the register allocator 
+detects that it needs to insert a copy due to the two-addresness of the CR
+logical op, it will invoke PPCInstrInfo::convertToThreeAddress.  At this point
+we can convert to the "three address" instruction, to save code space.
+
+This only matters when we start generating cr logical ops.
+
+===-------------------------------------------------------------------------===
+
+We should compile these two functions to the same thing:
+
+#include <stdlib.h>
+void f(int a, int b, int *P) {
+  *P = (a-b)>=0?(a-b):(b-a);
+}
+void g(int a, int b, int *P) {
+  *P = abs(a-b);
+}
+
+Further, they should compile to something better than:
+
+_g:
+        subf r2, r4, r3
+        subfic r3, r2, 0
+        cmpwi cr0, r2, -1
+        bgt cr0, LBB2_2 ; entry
+LBB2_1: ; entry
+        mr r2, r3
+LBB2_2: ; entry
+        stw r2, 0(r5)
+        blr
+
+GCC produces:
+
+_g:
+        subf r4,r4,r3
+        srawi r2,r4,31
+        xor r0,r2,r4
+        subf r0,r2,r0
+        stw r0,0(r5)
+        blr
+
+... which is much nicer.
+
+This theoretically may help improve twolf slightly (used in dimbox.c:142?).
+
+===-------------------------------------------------------------------------===
+
+PR5945: This: 
+define i32 @clamp0g(i32 %a) {
+entry:
+        %cmp = icmp slt i32 %a, 0
+        %sel = select i1 %cmp, i32 0, i32 %a
+        ret i32 %sel
+}
+
+Is compile to this with the PowerPC (32-bit) backend:
+
+_clamp0g:
+        cmpwi cr0, r3, 0
+        li r2, 0
+        blt cr0, LBB1_2
+; BB#1:                                                     ; %entry
+        mr r2, r3
+LBB1_2:                                                     ; %entry
+        mr r3, r2
+        blr
+
+This could be reduced to the much simpler:
+
+_clamp0g:
+        srawi r2, r3, 31
+        andc r3, r3, r2
+        blr
+
+===-------------------------------------------------------------------------===
+
+int foo(int N, int ***W, int **TK, int X) {
+  int t, i;
+  
+  for (t = 0; t < N; ++t)
+    for (i = 0; i < 4; ++i)
+      W[t / X][i][t % X] = TK[i][t];
+      
+  return 5;
+}
+
+We generate relatively atrocious code for this loop compared to gcc.
+
+We could also strength reduce the rem and the div:
+http://www.lcs.mit.edu/pubs/pdf/MIT-LCS-TM-600.pdf
+
+===-------------------------------------------------------------------------===
+
+float foo(float X) { return (int)(X); }
+
+Currently produces:
+
+_foo:
+        fctiwz f0, f1
+        stfd f0, -8(r1)
+        lwz r2, -4(r1)
+        extsw r2, r2
+        std r2, -16(r1)
+        lfd f0, -16(r1)
+        fcfid f0, f0
+        frsp f1, f0
+        blr
+
+We could use a target dag combine to turn the lwz/extsw into an lwa when the 
+lwz has a single use.  Since LWA is cracked anyway, this would be a codesize
+win only.
+
+===-------------------------------------------------------------------------===
+
+We generate ugly code for this:
+
+void func(unsigned int *ret, float dx, float dy, float dz, float dw) {
+  unsigned code = 0;
+  if(dx < -dw) code |= 1;
+  if(dx > dw)  code |= 2;
+  if(dy < -dw) code |= 4;
+  if(dy > dw)  code |= 8;
+  if(dz < -dw) code |= 16;
+  if(dz > dw)  code |= 32;
+  *ret = code;
+}
+
+===-------------------------------------------------------------------------===
+
+Complete the signed i32 to FP conversion code using 64-bit registers
+transformation, good for PI.  See PPCISelLowering.cpp, this comment:
+
+     // FIXME: disable this lowered code.  This generates 64-bit register values,
+     // and we don't model the fact that the top part is clobbered by calls.  We
+     // need to flag these together so that the value isn't live across a call.
+     //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+
+Also, if the registers are spilled to the stack, we have to ensure that all
+64-bits of them are save/restored, otherwise we will miscompile the code.  It
+sounds like we need to get the 64-bit register classes going.
+
+===-------------------------------------------------------------------------===
+
+%struct.B = type { i8, [3 x i8] }
+
+define void @bar(%struct.B* %b) {
+entry:
+        %tmp = bitcast %struct.B* %b to i32*              ; <uint*> [#uses=1]
+        %tmp = load i32* %tmp          ; <uint> [#uses=1]
+        %tmp3 = bitcast %struct.B* %b to i32*             ; <uint*> [#uses=1]
+        %tmp4 = load i32* %tmp3                ; <uint> [#uses=1]
+        %tmp8 = bitcast %struct.B* %b to i32*             ; <uint*> [#uses=2]
+        %tmp9 = load i32* %tmp8                ; <uint> [#uses=1]
+        %tmp4.mask17 = shl i32 %tmp4, i8 1          ; <uint> [#uses=1]
+        %tmp1415 = and i32 %tmp4.mask17, 2147483648            ; <uint> [#uses=1]
+        %tmp.masked = and i32 %tmp, 2147483648         ; <uint> [#uses=1]
+        %tmp11 = or i32 %tmp1415, %tmp.masked          ; <uint> [#uses=1]
+        %tmp12 = and i32 %tmp9, 2147483647             ; <uint> [#uses=1]
+        %tmp13 = or i32 %tmp12, %tmp11         ; <uint> [#uses=1]
+        store i32 %tmp13, i32* %tmp8
+        ret void
+}
+
+We emit:
+
+_foo:
+        lwz r2, 0(r3)
+        slwi r4, r2, 1
+        or r4, r4, r2
+        rlwimi r2, r4, 0, 0, 0
+        stw r2, 0(r3)
+        blr
+
+We could collapse a bunch of those ORs and ANDs and generate the following
+equivalent code:
+
+_foo:
+        lwz r2, 0(r3)
+        rlwinm r4, r2, 1, 0, 0
+        or r2, r2, r4
+        stw r2, 0(r3)
+        blr
+
+===-------------------------------------------------------------------------===
+
+We compile:
+
+unsigned test6(unsigned x) { 
+  return ((x & 0x00FF0000) >> 16) | ((x & 0x000000FF) << 16);
+}
+
+into:
+
+_test6:
+        lis r2, 255
+        rlwinm r3, r3, 16, 0, 31
+        ori r2, r2, 255
+        and r3, r3, r2
+        blr
+
+GCC gets it down to:
+
+_test6:
+        rlwinm r0,r3,16,8,15
+        rlwinm r3,r3,16,24,31
+        or r3,r3,r0
+        blr
+
+
+===-------------------------------------------------------------------------===
+
+Consider a function like this:
+
+float foo(float X) { return X + 1234.4123f; }
+
+The FP constant ends up in the constant pool, so we need to get the LR register.
+ This ends up producing code like this:
+
+_foo:
+.LBB_foo_0:     ; entry
+        mflr r11
+***     stw r11, 8(r1)
+        bl "L00000$pb"
+"L00000$pb":
+        mflr r2
+        addis r2, r2, ha16(.CPI_foo_0-"L00000$pb")
+        lfs f0, lo16(.CPI_foo_0-"L00000$pb")(r2)
+        fadds f1, f1, f0
+***     lwz r11, 8(r1)
+        mtlr r11
+        blr
+
+This is functional, but there is no reason to spill the LR register all the way
+to the stack (the two marked instrs): spilling it to a GPR is quite enough.
+
+Implementing this will require some codegen improvements.  Nate writes:
+
+"So basically what we need to support the "no stack frame save and restore" is a
+generalization of the LR optimization to "callee-save regs".
+
+Currently, we have LR marked as a callee-save reg.  The register allocator sees
+that it's callee save, and spills it directly to the stack.
+
+Ideally, something like this would happen:
+
+LR would be in a separate register class from the GPRs. The class of LR would be
+marked "unspillable".  When the register allocator came across an unspillable
+reg, it would ask "what is the best class to copy this into that I *can* spill"
+If it gets a class back, which it will in this case (the gprs), it grabs a free
+register of that class.  If it is then later necessary to spill that reg, so be
+it.
+
+===-------------------------------------------------------------------------===
+
+We compile this:
+int test(_Bool X) {
+  return X ? 524288 : 0;
+}
+
+to: 
+_test:
+        cmplwi cr0, r3, 0
+        lis r2, 8
+        li r3, 0
+        beq cr0, LBB1_2 ;entry
+LBB1_1: ;entry
+        mr r3, r2
+LBB1_2: ;entry
+        blr 
+
+instead of:
+_test:
+        addic r2,r3,-1
+        subfe r0,r2,r3
+        slwi r3,r0,19
+        blr
+
+This sort of thing occurs a lot due to globalopt.
+
+===-------------------------------------------------------------------------===
+
+We compile:
+
+define i32 @bar(i32 %x) nounwind readnone ssp {
+entry:
+  %0 = icmp eq i32 %x, 0                          ; <i1> [#uses=1]
+  %neg = sext i1 %0 to i32              ; <i32> [#uses=1]
+  ret i32 %neg
+}
+
+to:
+
+_bar:
+	cntlzw r2, r3
+	slwi r2, r2, 26
+	srawi r3, r2, 31
+	blr 
+
+it would be better to produce:
+
+_bar: 
+        addic r3,r3,-1
+        subfe r3,r3,r3
+        blr
+
+===-------------------------------------------------------------------------===
+
+We currently compile 32-bit bswap:
+
+declare i32 @llvm.bswap.i32(i32 %A)
+define i32 @test(i32 %A) {
+        %B = call i32 @llvm.bswap.i32(i32 %A)
+        ret i32 %B
+}
+
+to:
+
+_test:
+        rlwinm r2, r3, 24, 16, 23
+        slwi r4, r3, 24
+        rlwimi r2, r3, 8, 24, 31
+        rlwimi r4, r3, 8, 8, 15
+        rlwimi r4, r2, 0, 16, 31
+        mr r3, r4
+        blr 
+
+it would be more efficient to produce:
+
+_foo:   mr r0,r3
+        rlwinm r3,r3,8,0xffffffff
+        rlwimi r3,r0,24,0,7
+        rlwimi r3,r0,24,16,23
+        blr
+
+===-------------------------------------------------------------------------===
+
+test/CodeGen/PowerPC/2007-03-24-cntlzd.ll compiles to:
+
+__ZNK4llvm5APInt17countLeadingZerosEv:
+        ld r2, 0(r3)
+        cntlzd r2, r2
+        or r2, r2, r2     <<-- silly.
+        addi r3, r2, -64
+        blr 
+
+The dead or is a 'truncate' from 64- to 32-bits.
+
+===-------------------------------------------------------------------------===
+
+We generate horrible ppc code for this:
+
+#define N  2000000
+double   a[N],c[N];
+void simpleloop() {
+   int j;
+   for (j=0; j<N; j++)
+     c[j] = a[j];
+}
+
+LBB1_1: ;bb
+        lfdx f0, r3, r4
+        addi r5, r5, 1                 ;; Extra IV for the exit value compare.
+        stfdx f0, r2, r4
+        addi r4, r4, 8
+
+        xoris r6, r5, 30               ;; This is due to a large immediate.
+        cmplwi cr0, r6, 33920
+        bne cr0, LBB1_1
+
+//===---------------------------------------------------------------------===//
+
+This:
+        #include <algorithm>
+        inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b)
+        { return std::make_pair(a + b, a + b < a); }
+        bool no_overflow(unsigned a, unsigned b)
+        { return !full_add(a, b).second; }
+
+Should compile to:
+
+__Z11no_overflowjj:
+        add r4,r3,r4
+        subfc r3,r3,r4
+        li r3,0
+        adde r3,r3,r3
+        blr
+
+(or better) not:
+
+__Z11no_overflowjj:
+        add r2, r4, r3
+        cmplw cr7, r2, r3
+        mfcr r2
+        rlwinm r2, r2, 29, 31, 31
+        xori r3, r2, 1
+        blr 
+
+//===---------------------------------------------------------------------===//
+
+We compile some FP comparisons into an mfcr with two rlwinms and an or.  For
+example:
+#include <math.h>
+int test(double x, double y) { return islessequal(x, y);}
+int test2(double x, double y) {  return islessgreater(x, y);}
+int test3(double x, double y) {  return !islessequal(x, y);}
+
+Compiles into (all three are similar, but the bits differ):
+
+_test:
+	fcmpu cr7, f1, f2
+	mfcr r2
+	rlwinm r3, r2, 29, 31, 31
+	rlwinm r2, r2, 31, 31, 31
+	or r3, r2, r3
+	blr 
+
+GCC compiles this into:
+
+ _test:
+	fcmpu cr7,f1,f2
+	cror 30,28,30
+	mfcr r3
+	rlwinm r3,r3,31,1
+	blr
+        
+which is more efficient and can use mfocr.  See PR642 for some more context.
+
+//===---------------------------------------------------------------------===//
+
+void foo(float *data, float d) {
+   long i;
+   for (i = 0; i < 8000; i++)
+      data[i] = d;
+}
+void foo2(float *data, float d) {
+   long i;
+   data--;
+   for (i = 0; i < 8000; i++) {
+      data[1] = d;
+      data++;
+   }
+}
+
+These compile to:
+
+_foo:
+	li r2, 0
+LBB1_1:	; bb
+	addi r4, r2, 4
+	stfsx f1, r3, r2
+	cmplwi cr0, r4, 32000
+	mr r2, r4
+	bne cr0, LBB1_1	; bb
+	blr 
+_foo2:
+	li r2, 0
+LBB2_1:	; bb
+	addi r4, r2, 4
+	stfsx f1, r3, r2
+	cmplwi cr0, r4, 32000
+	mr r2, r4
+	bne cr0, LBB2_1	; bb
+	blr 
+
+The 'mr' could be eliminated to folding the add into the cmp better.
+
+//===---------------------------------------------------------------------===//
+Codegen for the following (low-probability) case deteriorated considerably 
+when the correctness fixes for unordered comparisons went in (PR 642, 58871).
+It should be possible to recover the code quality described in the comments.
+
+; RUN: llvm-as < %s | llc -march=ppc32  | grep or | count 3
+; This should produce one 'or' or 'cror' instruction per function.
+
+; RUN: llvm-as < %s | llc -march=ppc32  | grep mfcr | count 3
+; PR2964
+
+define i32 @test(double %x, double %y) nounwind  {
+entry:
+	%tmp3 = fcmp ole double %x, %y		; <i1> [#uses=1]
+	%tmp345 = zext i1 %tmp3 to i32		; <i32> [#uses=1]
+	ret i32 %tmp345
+}
+
+define i32 @test2(double %x, double %y) nounwind  {
+entry:
+	%tmp3 = fcmp one double %x, %y		; <i1> [#uses=1]
+	%tmp345 = zext i1 %tmp3 to i32		; <i32> [#uses=1]
+	ret i32 %tmp345
+}
+
+define i32 @test3(double %x, double %y) nounwind  {
+entry:
+	%tmp3 = fcmp ugt double %x, %y		; <i1> [#uses=1]
+	%tmp34 = zext i1 %tmp3 to i32		; <i32> [#uses=1]
+	ret i32 %tmp34
+}
+//===----------------------------------------------------------------------===//
+; RUN: llvm-as < %s | llc -march=ppc32 | not grep fneg
+
+; This could generate FSEL with appropriate flags (FSEL is not IEEE-safe, and 
+; should not be generated except with -enable-finite-only-fp-math or the like).
+; With the correctness fixes for PR642 (58871) LowerSELECT_CC would need to
+; recognize a more elaborate tree than a simple SETxx.
+
+define double @test_FNEG_sel(double %A, double %B, double %C) {
+        %D = fsub double -0.000000e+00, %A               ; <double> [#uses=1]
+        %Cond = fcmp ugt double %D, -0.000000e+00               ; <i1> [#uses=1]
+        %E = select i1 %Cond, double %B, double %C              ; <double> [#uses=1]
+        ret double %E
+}
+
+//===----------------------------------------------------------------------===//
+The save/restore sequence for CR in prolog/epilog is terrible:
+- Each CR subreg is saved individually, rather than doing one save as a unit.
+- On Darwin, the save is done after the decrement of SP, which means the offset
+from SP of the save slot can be too big for a store instruction, which means we
+need an additional register (currently hacked in 96015+96020; the solution there
+is correct, but poor).
+- On SVR4 the same thing can happen, and I don't think saving before the SP
+decrement is safe on that target, as there is no red zone.  This is currently
+broken AFAIK, although it's not a target I can exercise.
+The following demonstrates the problem:
+extern void bar(char *p);
+void foo() {
+  char x[100000];
+  bar(x);
+  __asm__("" ::: "cr2");
+}
diff --git a/final/lib/Target/PowerPC/README_ALTIVEC.txt b/final/lib/Target/PowerPC/README_ALTIVEC.txt
new file mode 100644
index 00000000000..1e4c6fb9844
--- /dev/null
+++ b/final/lib/Target/PowerPC/README_ALTIVEC.txt
@@ -0,0 +1,211 @@
+//===- README_ALTIVEC.txt - Notes for improving Altivec code gen ----------===//
+
+Implement PPCInstrInfo::isLoadFromStackSlot/isStoreToStackSlot for vector
+registers, to generate better spill code.
+
+//===----------------------------------------------------------------------===//
+
+The first should be a single lvx from the constant pool, the second should be 
+a xor/stvx:
+
+void foo(void) {
+  int x[8] __attribute__((aligned(128))) = { 1, 1, 1, 17, 1, 1, 1, 1 };
+  bar (x);
+}
+
+#include <string.h>
+void foo(void) {
+  int x[8] __attribute__((aligned(128)));
+  memset (x, 0, sizeof (x));
+  bar (x);
+}
+
+//===----------------------------------------------------------------------===//
+
+Altivec: Codegen'ing MUL with vector FMADD should add -0.0, not 0.0:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=8763
+
+When -ffast-math is on, we can use 0.0.
+
+//===----------------------------------------------------------------------===//
+
+  Consider this:
+  v4f32 Vector;
+  v4f32 Vector2 = { Vector.X, Vector.X, Vector.X, Vector.X };
+
+Since we know that "Vector" is 16-byte aligned and we know the element offset 
+of ".X", we should change the load into a lve*x instruction, instead of doing
+a load/store/lve*x sequence.
+
+//===----------------------------------------------------------------------===//
+
+For functions that use altivec AND have calls, we are VRSAVE'ing all call
+clobbered regs.
+
+//===----------------------------------------------------------------------===//
+
+Implement passing vectors by value into calls and receiving them as arguments.
+
+//===----------------------------------------------------------------------===//
+
+GCC apparently tries to codegen { C1, C2, Variable, C3 } as a constant pool load
+of C1/C2/C3, then a load and vperm of Variable.
+
+//===----------------------------------------------------------------------===//
+
+We need a way to teach tblgen that some operands of an intrinsic are required to
+be constants.  The verifier should enforce this constraint.
+
+//===----------------------------------------------------------------------===//
+
+We currently codegen SCALAR_TO_VECTOR as a store of the scalar to a 16-byte
+aligned stack slot, followed by a load/vperm.  We should probably just store it
+to a scalar stack slot, then use lvsl/vperm to load it.  If the value is already
+in memory this is a big win.
+
+//===----------------------------------------------------------------------===//
+
+extract_vector_elt of an arbitrary constant vector can be done with the 
+following instructions:
+
+vTemp = vec_splat(v0,2);    // 2 is the element the src is in.
+vec_ste(&destloc,0,vTemp);
+
+We can do an arbitrary non-constant value by using lvsr/perm/ste.
+
+//===----------------------------------------------------------------------===//
+
+If we want to tie instruction selection into the scheduler, we can do some
+constant formation with different instructions.  For example, we can generate
+"vsplti -1" with "vcmpequw R,R" and 1,1,1,1 with "vsubcuw R,R", and 0,0,0,0 with
+"vsplti 0" or "vxor", each of which use different execution units, thus could
+help scheduling.
+
+This is probably only reasonable for a post-pass scheduler.
+
+//===----------------------------------------------------------------------===//
+
+For this function:
+
+void test(vector float *A, vector float *B) {
+  vector float C = (vector float)vec_cmpeq(*A, *B);
+  if (!vec_any_eq(*A, *B))
+    *B = (vector float){0,0,0,0};
+  *A = C;
+}
+
+we get the following basic block:
+
+	...
+        lvx v2, 0, r4
+        lvx v3, 0, r3
+        vcmpeqfp v4, v3, v2
+        vcmpeqfp. v2, v3, v2
+        bne cr6, LBB1_2 ; cond_next
+
+The vcmpeqfp/vcmpeqfp. instructions currently cannot be merged when the
+vcmpeqfp. result is used by a branch.  This can be improved.
+
+//===----------------------------------------------------------------------===//
+
+The code generated for this is truly aweful:
+
+vector float test(float a, float b) {
+ return (vector float){ 0.0, a, 0.0, 0.0}; 
+}
+
+LCPI1_0:                                        ;  float
+        .space  4
+        .text
+        .globl  _test
+        .align  4
+_test:
+        mfspr r2, 256
+        oris r3, r2, 4096
+        mtspr 256, r3
+        lis r3, ha16(LCPI1_0)
+        addi r4, r1, -32
+        stfs f1, -16(r1)
+        addi r5, r1, -16
+        lfs f0, lo16(LCPI1_0)(r3)
+        stfs f0, -32(r1)
+        lvx v2, 0, r4
+        lvx v3, 0, r5
+        vmrghw v3, v3, v2
+        vspltw v2, v2, 0
+        vmrghw v2, v2, v3
+        mtspr 256, r2
+        blr
+
+//===----------------------------------------------------------------------===//
+
+int foo(vector float *x, vector float *y) {
+        if (vec_all_eq(*x,*y)) return 3245; 
+        else return 12;
+}
+
+A predicate compare being used in a select_cc should have the same peephole
+applied to it as a predicate compare used by a br_cc.  There should be no
+mfcr here:
+
+_foo:
+        mfspr r2, 256
+        oris r5, r2, 12288
+        mtspr 256, r5
+        li r5, 12
+        li r6, 3245
+        lvx v2, 0, r4
+        lvx v3, 0, r3
+        vcmpeqfp. v2, v3, v2
+        mfcr r3, 2
+        rlwinm r3, r3, 25, 31, 31
+        cmpwi cr0, r3, 0
+        bne cr0, LBB1_2 ; entry
+LBB1_1: ; entry
+        mr r6, r5
+LBB1_2: ; entry
+        mr r3, r6
+        mtspr 256, r2
+        blr
+
+//===----------------------------------------------------------------------===//
+
+CodeGen/PowerPC/vec_constants.ll has an and operation that should be
+codegen'd to andc.  The issue is that the 'all ones' build vector is
+SelectNodeTo'd a VSPLTISB instruction node before the and/xor is selected
+which prevents the vnot pattern from matching.
+
+
+//===----------------------------------------------------------------------===//
+
+An alternative to the store/store/load approach for illegal insert element 
+lowering would be:
+
+1. store element to any ol' slot
+2. lvx the slot
+3. lvsl 0; splat index; vcmpeq to generate a select mask
+4. lvsl slot + x; vperm to rotate result into correct slot
+5. vsel result together.
+
+//===----------------------------------------------------------------------===//
+
+Should codegen branches on vec_any/vec_all to avoid mfcr.  Two examples:
+
+#include <altivec.h>
+ int f(vector float a, vector float b)
+ {
+  int aa = 0;
+  if (vec_all_ge(a, b))
+    aa |= 0x1;
+  if (vec_any_ge(a,b))
+    aa |= 0x2;
+  return aa;
+}
+
+vector float f(vector float a, vector float b) { 
+  if (vec_any_eq(a, b)) 
+    return a; 
+  else 
+    return b; 
+}
+
diff --git a/final/lib/Target/PowerPC/TargetInfo/CMakeLists.txt b/final/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..058d599a4af
--- /dev/null
+++ b/final/lib/Target/PowerPC/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMPowerPCInfo
+  PowerPCTargetInfo.cpp
+  )
+
+add_dependencies(LLVMPowerPCInfo PowerPCCodeGenTable_gen)
diff --git a/final/lib/Target/PowerPC/TargetInfo/Makefile b/final/lib/Target/PowerPC/TargetInfo/Makefile
new file mode 100644
index 00000000000..a101aa4a449
--- /dev/null
+++ b/final/lib/Target/PowerPC/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/PowerPC/TargetInfo/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/final/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
new file mode 100644
index 00000000000..ad607d0ade6
--- /dev/null
+++ b/final/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- PowerPCTargetInfo.cpp - PowerPC Target Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPC.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::ThePPC32Target, llvm::ThePPC64Target;
+
+extern "C" void LLVMInitializePowerPCTargetInfo() { 
+  RegisterTarget<Triple::ppc, /*HasJIT=*/true>
+    X(ThePPC32Target, "ppc32", "PowerPC 32");
+
+  RegisterTarget<Triple::ppc64, /*HasJIT=*/true>
+    Y(ThePPC64Target, "ppc64", "PowerPC 64");
+}
diff --git a/final/lib/Target/README.txt b/final/lib/Target/README.txt
new file mode 100644
index 00000000000..45da3ddb607
--- /dev/null
+++ b/final/lib/Target/README.txt
@@ -0,0 +1,2268 @@
+Target Independent Opportunities:
+
+//===---------------------------------------------------------------------===//
+
+With the recent changes to make the implicit def/use set explicit in
+machineinstrs, we should change the target descriptions for 'call' instructions
+so that the .td files don't list all the call-clobbered registers as implicit
+defs.  Instead, these should be added by the code generator (e.g. on the dag).
+
+This has a number of uses:
+
+1. PPC32/64 and X86 32/64 can avoid having multiple copies of call instructions
+   for their different impdef sets.
+2. Targets with multiple calling convs (e.g. x86) which have different clobber
+   sets don't need copies of call instructions.
+3. 'Interprocedural register allocation' can be done to reduce the clobber sets
+   of calls.
+
+//===---------------------------------------------------------------------===//
+
+We should recognized various "overflow detection" idioms and translate them into
+llvm.uadd.with.overflow and similar intrinsics.  Here is a multiply idiom:
+
+unsigned int mul(unsigned int a,unsigned int b) {
+ if ((unsigned long long)a*b>0xffffffff)
+   exit(0);
+  return a*b;
+}
+
+The legalization code for mul-with-overflow needs to be made more robust before
+this can be implemented though.
+
+//===---------------------------------------------------------------------===//
+
+Get the C front-end to expand hypot(x,y) -> llvm.sqrt(x*x+y*y) when errno and
+precision don't matter (ffastmath).  Misc/mandel will like this. :)  This isn't
+safe in general, even on darwin.  See the libm implementation of hypot for
+examples (which special case when x/y are exactly zero to get signed zeros etc
+right).
+
+//===---------------------------------------------------------------------===//
+
+On targets with expensive 64-bit multiply, we could LSR this:
+
+for (i = ...; ++i) {
+   x = 1ULL << i;
+
+into:
+ long long tmp = 1;
+ for (i = ...; ++i, tmp+=tmp)
+   x = tmp;
+
+This would be a win on ppc32, but not x86 or ppc64.
+
+//===---------------------------------------------------------------------===//
+
+Shrink: (setlt (loadi32 P), 0) -> (setlt (loadi8 Phi), 0)
+
+//===---------------------------------------------------------------------===//
+
+Reassociate should turn things like:
+
+int factorial(int X) {
+ return X*X*X*X*X*X*X*X;
+}
+
+into llvm.powi calls, allowing the code generator to produce balanced
+multiplication trees.
+
+First, the intrinsic needs to be extended to support integers, and second the
+code generator needs to be enhanced to lower these to multiplication trees.
+
+//===---------------------------------------------------------------------===//
+
+Interesting? testcase for add/shift/mul reassoc:
+
+int bar(int x, int y) {
+  return x*x*x+y+x*x*x*x*x*y*y*y*y;
+}
+int foo(int z, int n) {
+  return bar(z, n) + bar(2*z, 2*n);
+}
+
+This is blocked on not handling X*X*X -> powi(X, 3) (see note above).  The issue
+is that we end up getting t = 2*X  s = t*t   and don't turn this into 4*X*X,
+which is the same number of multiplies and is canonical, because the 2*X has
+multiple uses.  Here's a simple example:
+
+define i32 @test15(i32 %X1) {
+  %B = mul i32 %X1, 47   ; X1*47
+  %C = mul i32 %B, %B
+  ret i32 %C
+}
+
+
+//===---------------------------------------------------------------------===//
+
+Reassociate should handle the example in GCC PR16157:
+
+extern int a0, a1, a2, a3, a4; extern int b0, b1, b2, b3, b4; 
+void f () {  /* this can be optimized to four additions... */ 
+        b4 = a4 + a3 + a2 + a1 + a0; 
+        b3 = a3 + a2 + a1 + a0; 
+        b2 = a2 + a1 + a0; 
+        b1 = a1 + a0; 
+} 
+
+This requires reassociating to forms of expressions that are already available,
+something that reassoc doesn't think about yet.
+
+
+//===---------------------------------------------------------------------===//
+
+This function: (derived from GCC PR19988)
+double foo(double x, double y) {
+  return ((x + 0.1234 * y) * (x + -0.1234 * y));
+}
+
+compiles to:
+_foo:
+	movapd	%xmm1, %xmm2
+	mulsd	LCPI1_1(%rip), %xmm1
+	mulsd	LCPI1_0(%rip), %xmm2
+	addsd	%xmm0, %xmm1
+	addsd	%xmm0, %xmm2
+	movapd	%xmm1, %xmm0
+	mulsd	%xmm2, %xmm0
+	ret
+
+Reassociate should be able to turn it into:
+
+double foo(double x, double y) {
+  return ((x + 0.1234 * y) * (x - 0.1234 * y));
+}
+
+Which allows the multiply by constant to be CSE'd, producing:
+
+_foo:
+	mulsd	LCPI1_0(%rip), %xmm1
+	movapd	%xmm1, %xmm2
+	addsd	%xmm0, %xmm2
+	subsd	%xmm1, %xmm0
+	mulsd	%xmm2, %xmm0
+	ret
+
+This doesn't need -ffast-math support at all.  This is particularly bad because
+the llvm-gcc frontend is canonicalizing the later into the former, but clang
+doesn't have this problem.
+
+//===---------------------------------------------------------------------===//
+
+These two functions should generate the same code on big-endian systems:
+
+int g(int *j,int *l)  {  return memcmp(j,l,4);  }
+int h(int *j, int *l) {  return *j - *l; }
+
+this could be done in SelectionDAGISel.cpp, along with other special cases,
+for 1,2,4,8 bytes.
+
+//===---------------------------------------------------------------------===//
+
+It would be nice to revert this patch:
+http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20060213/031986.html
+
+And teach the dag combiner enough to simplify the code expanded before 
+legalize.  It seems plausible that this knowledge would let it simplify other
+stuff too.
+
+//===---------------------------------------------------------------------===//
+
+For vector types, TargetData.cpp::getTypeInfo() returns alignment that is equal
+to the type size. It works but can be overly conservative as the alignment of
+specific vector types are target dependent.
+
+//===---------------------------------------------------------------------===//
+
+We should produce an unaligned load from code like this:
+
+v4sf example(float *P) {
+  return (v4sf){P[0], P[1], P[2], P[3] };
+}
+
+//===---------------------------------------------------------------------===//
+
+Add support for conditional increments, and other related patterns.  Instead
+of:
+
+	movl 136(%esp), %eax
+	cmpl $0, %eax
+	je LBB16_2	#cond_next
+LBB16_1:	#cond_true
+	incl _foo
+LBB16_2:	#cond_next
+
+emit:
+	movl	_foo, %eax
+	cmpl	$1, %edi
+	sbbl	$-1, %eax
+	movl	%eax, _foo
+
+//===---------------------------------------------------------------------===//
+
+Combine: a = sin(x), b = cos(x) into a,b = sincos(x).
+
+Expand these to calls of sin/cos and stores:
+      double sincos(double x, double *sin, double *cos);
+      float sincosf(float x, float *sin, float *cos);
+      long double sincosl(long double x, long double *sin, long double *cos);
+
+Doing so could allow SROA of the destination pointers.  See also:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17687
+
+This is now easily doable with MRVs.  We could even make an intrinsic for this
+if anyone cared enough about sincos.
+
+//===---------------------------------------------------------------------===//
+
+quantum_sigma_x in 462.libquantum contains the following loop:
+
+      for(i=0; i<reg->size; i++)
+	{
+	  /* Flip the target bit of each basis state */
+	  reg->node[i].state ^= ((MAX_UNSIGNED) 1 << target);
+	} 
+
+Where MAX_UNSIGNED/state is a 64-bit int.  On a 32-bit platform it would be just
+so cool to turn it into something like:
+
+   long long Res = ((MAX_UNSIGNED) 1 << target);
+   if (target < 32) {
+     for(i=0; i<reg->size; i++)
+       reg->node[i].state ^= Res & 0xFFFFFFFFULL;
+   } else {
+     for(i=0; i<reg->size; i++)
+       reg->node[i].state ^= Res & 0xFFFFFFFF00000000ULL
+   }
+   
+... which would only do one 32-bit XOR per loop iteration instead of two.
+
+It would also be nice to recognize the reg->size doesn't alias reg->node[i], but
+this requires TBAA.
+
+//===---------------------------------------------------------------------===//
+
+This isn't recognized as bswap by instcombine (yes, it really is bswap):
+
+unsigned long reverse(unsigned v) {
+    unsigned t;
+    t = v ^ ((v << 16) | (v >> 16));
+    t &= ~0xff0000;
+    v = (v << 24) | (v >> 8);
+    return v ^ (t >> 8);
+}
+
+//===---------------------------------------------------------------------===//
+
+[LOOP DELETION]
+
+We don't delete this output free loop, because trip count analysis doesn't
+realize that it is finite (if it were infinite, it would be undefined).  Not
+having this blocks Loop Idiom from matching strlen and friends.  
+
+void foo(char *C) {
+  int x = 0;
+  while (*C)
+    ++x,++C;
+}
+
+//===---------------------------------------------------------------------===//
+
+[LOOP RECOGNITION]
+
+These idioms should be recognized as popcount (see PR1488):
+
+unsigned countbits_slow(unsigned v) {
+  unsigned c;
+  for (c = 0; v; v >>= 1)
+    c += v & 1;
+  return c;
+}
+unsigned countbits_fast(unsigned v){
+  unsigned c;
+  for (c = 0; v; c++)
+    v &= v - 1; // clear the least significant bit set
+  return c;
+}
+
+BITBOARD = unsigned long long
+int PopCnt(register BITBOARD a) {
+  register int c=0;
+  while(a) {
+    c++;
+    a &= a - 1;
+  }
+  return c;
+}
+unsigned int popcount(unsigned int input) {
+  unsigned int count = 0;
+  for (unsigned int i =  0; i < 4 * 8; i++)
+    count += (input >> i) & i;
+  return count;
+}
+
+This should be recognized as CLZ:  rdar://8459039
+
+unsigned clz_a(unsigned a) {
+  int i;
+  for (i=0;i<32;i++)
+    if (a & (1<<(31-i)))
+      return i;
+  return 32;
+}
+
+This sort of thing should be added to the loop idiom pass.
+
+//===---------------------------------------------------------------------===//
+
+These should turn into single 16-bit (unaligned?) loads on little/big endian
+processors.
+
+unsigned short read_16_le(const unsigned char *adr) {
+  return adr[0] | (adr[1] << 8);
+}
+unsigned short read_16_be(const unsigned char *adr) {
+  return (adr[0] << 8) | adr[1];
+}
+
+//===---------------------------------------------------------------------===//
+
+-instcombine should handle this transform:
+   icmp pred (sdiv X / C1 ), C2
+when X, C1, and C2 are unsigned.  Similarly for udiv and signed operands. 
+
+Currently InstCombine avoids this transform but will do it when the signs of
+the operands and the sign of the divide match. See the FIXME in 
+InstructionCombining.cpp in the visitSetCondInst method after the switch case 
+for Instruction::UDiv (around line 4447) for more details.
+
+The SingleSource/Benchmarks/Shootout-C++/hash and hash2 tests have examples of
+this construct. 
+
+//===---------------------------------------------------------------------===//
+
+[LOOP OPTIMIZATION]
+
+SingleSource/Benchmarks/Misc/dt.c shows several interesting optimization
+opportunities in its double_array_divs_variable function: it needs loop
+interchange, memory promotion (which LICM already does), vectorization and
+variable trip count loop unrolling (since it has a constant trip count). ICC
+apparently produces this very nice code with -ffast-math:
+
+..B1.70:                        # Preds ..B1.70 ..B1.69
+       mulpd     %xmm0, %xmm1                                  #108.2
+       mulpd     %xmm0, %xmm1                                  #108.2
+       mulpd     %xmm0, %xmm1                                  #108.2
+       mulpd     %xmm0, %xmm1                                  #108.2
+       addl      $8, %edx                                      #
+       cmpl      $131072, %edx                                 #108.2
+       jb        ..B1.70       # Prob 99%                      #108.2
+
+It would be better to count down to zero, but this is a lot better than what we
+do.
+
+//===---------------------------------------------------------------------===//
+
+Consider:
+
+typedef unsigned U32;
+typedef unsigned long long U64;
+int test (U32 *inst, U64 *regs) {
+    U64 effective_addr2;
+    U32 temp = *inst;
+    int r1 = (temp >> 20) & 0xf;
+    int b2 = (temp >> 16) & 0xf;
+    effective_addr2 = temp & 0xfff;
+    if (b2) effective_addr2 += regs[b2];
+    b2 = (temp >> 12) & 0xf;
+    if (b2) effective_addr2 += regs[b2];
+    effective_addr2 &= regs[4];
+     if ((effective_addr2 & 3) == 0)
+        return 1;
+    return 0;
+}
+
+Note that only the low 2 bits of effective_addr2 are used.  On 32-bit systems,
+we don't eliminate the computation of the top half of effective_addr2 because
+we don't have whole-function selection dags.  On x86, this means we use one
+extra register for the function when effective_addr2 is declared as U64 than
+when it is declared U32.
+
+PHI Slicing could be extended to do this.
+
+//===---------------------------------------------------------------------===//
+
+LSR should know what GPR types a target has from TargetData.  This code:
+
+volatile short X, Y; // globals
+
+void foo(int N) {
+  int i;
+  for (i = 0; i < N; i++) { X = i; Y = i*4; }
+}
+
+produces two near identical IV's (after promotion) on PPC/ARM:
+
+LBB1_2:
+	ldr r3, LCPI1_0
+	ldr r3, [r3]
+	strh r2, [r3]
+	ldr r3, LCPI1_1
+	ldr r3, [r3]
+	strh r1, [r3]
+	add r1, r1, #4
+	add r2, r2, #1   <- [0,+,1]
+	sub r0, r0, #1   <- [0,-,1]
+	cmp r0, #0
+	bne LBB1_2
+
+LSR should reuse the "+" IV for the exit test.
+
+//===---------------------------------------------------------------------===//
+
+Tail call elim should be more aggressive, checking to see if the call is
+followed by an uncond branch to an exit block.
+
+; This testcase is due to tail-duplication not wanting to copy the return
+; instruction into the terminating blocks because there was other code
+; optimized out of the function after the taildup happened.
+; RUN: llvm-as < %s | opt -tailcallelim | llvm-dis | not grep call
+
+define i32 @t4(i32 %a) {
+entry:
+	%tmp.1 = and i32 %a, 1		; <i32> [#uses=1]
+	%tmp.2 = icmp ne i32 %tmp.1, 0		; <i1> [#uses=1]
+	br i1 %tmp.2, label %then.0, label %else.0
+
+then.0:		; preds = %entry
+	%tmp.5 = add i32 %a, -1		; <i32> [#uses=1]
+	%tmp.3 = call i32 @t4( i32 %tmp.5 )		; <i32> [#uses=1]
+	br label %return
+
+else.0:		; preds = %entry
+	%tmp.7 = icmp ne i32 %a, 0		; <i1> [#uses=1]
+	br i1 %tmp.7, label %then.1, label %return
+
+then.1:		; preds = %else.0
+	%tmp.11 = add i32 %a, -2		; <i32> [#uses=1]
+	%tmp.9 = call i32 @t4( i32 %tmp.11 )		; <i32> [#uses=1]
+	br label %return
+
+return:		; preds = %then.1, %else.0, %then.0
+	%result.0 = phi i32 [ 0, %else.0 ], [ %tmp.3, %then.0 ],
+                            [ %tmp.9, %then.1 ]
+	ret i32 %result.0
+}
+
+//===---------------------------------------------------------------------===//
+
+Tail recursion elimination should handle:
+
+int pow2m1(int n) {
+ if (n == 0)
+   return 0;
+ return 2 * pow2m1 (n - 1) + 1;
+}
+
+Also, multiplies can be turned into SHL's, so they should be handled as if
+they were associative.  "return foo() << 1" can be tail recursion eliminated.
+
+//===---------------------------------------------------------------------===//
+
+Argument promotion should promote arguments for recursive functions, like 
+this:
+
+; RUN: llvm-as < %s | opt -argpromotion | llvm-dis | grep x.val
+
+define internal i32 @foo(i32* %x) {
+entry:
+	%tmp = load i32* %x		; <i32> [#uses=0]
+	%tmp.foo = call i32 @foo( i32* %x )		; <i32> [#uses=1]
+	ret i32 %tmp.foo
+}
+
+define i32 @bar(i32* %x) {
+entry:
+	%tmp3 = call i32 @foo( i32* %x )		; <i32> [#uses=1]
+	ret i32 %tmp3
+}
+
+//===---------------------------------------------------------------------===//
+
+We should investigate an instruction sinking pass.  Consider this silly
+example in pic mode:
+
+#include <assert.h>
+void foo(int x) {
+  assert(x);
+  //...
+}
+
+we compile this to:
+_foo:
+	subl	$28, %esp
+	call	"L1$pb"
+"L1$pb":
+	popl	%eax
+	cmpl	$0, 32(%esp)
+	je	LBB1_2	# cond_true
+LBB1_1:	# return
+	# ...
+	addl	$28, %esp
+	ret
+LBB1_2:	# cond_true
+...
+
+The PIC base computation (call+popl) is only used on one path through the 
+code, but is currently always computed in the entry block.  It would be 
+better to sink the picbase computation down into the block for the 
+assertion, as it is the only one that uses it.  This happens for a lot of 
+code with early outs.
+
+Another example is loads of arguments, which are usually emitted into the 
+entry block on targets like x86.  If not used in all paths through a 
+function, they should be sunk into the ones that do.
+
+In this case, whole-function-isel would also handle this.
+
+//===---------------------------------------------------------------------===//
+
+Investigate lowering of sparse switch statements into perfect hash tables:
+http://burtleburtle.net/bob/hash/perfect.html
+
+//===---------------------------------------------------------------------===//
+
+We should turn things like "load+fabs+store" and "load+fneg+store" into the
+corresponding integer operations.  On a yonah, this loop:
+
+double a[256];
+void foo() {
+  int i, b;
+  for (b = 0; b < 10000000; b++)
+  for (i = 0; i < 256; i++)
+    a[i] = -a[i];
+}
+
+is twice as slow as this loop:
+
+long long a[256];
+void foo() {
+  int i, b;
+  for (b = 0; b < 10000000; b++)
+  for (i = 0; i < 256; i++)
+    a[i] ^= (1ULL << 63);
+}
+
+and I suspect other processors are similar.  On X86 in particular this is a
+big win because doing this with integers allows the use of read/modify/write
+instructions.
+
+//===---------------------------------------------------------------------===//
+
+DAG Combiner should try to combine small loads into larger loads when 
+profitable.  For example, we compile this C++ example:
+
+struct THotKey { short Key; bool Control; bool Shift; bool Alt; };
+extern THotKey m_HotKey;
+THotKey GetHotKey () { return m_HotKey; }
+
+into (-m64 -O3 -fno-exceptions -static -fomit-frame-pointer):
+
+__Z9GetHotKeyv:                         ## @_Z9GetHotKeyv
+	movq	_m_HotKey@GOTPCREL(%rip), %rax
+	movzwl	(%rax), %ecx
+	movzbl	2(%rax), %edx
+	shlq	$16, %rdx
+	orq	%rcx, %rdx
+	movzbl	3(%rax), %ecx
+	shlq	$24, %rcx
+	orq	%rdx, %rcx
+	movzbl	4(%rax), %eax
+	shlq	$32, %rax
+	orq	%rcx, %rax
+	ret
+
+//===---------------------------------------------------------------------===//
+
+We should add an FRINT node to the DAG to model targets that have legal
+implementations of ceil/floor/rint.
+
+//===---------------------------------------------------------------------===//
+
+Consider:
+
+int test() {
+  long long input[8] = {1,0,1,0,1,0,1,0};
+  foo(input);
+}
+
+Clang compiles this into:
+
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 64, i32 16, i1 false)
+  %0 = getelementptr [8 x i64]* %input, i64 0, i64 0
+  store i64 1, i64* %0, align 16
+  %1 = getelementptr [8 x i64]* %input, i64 0, i64 2
+  store i64 1, i64* %1, align 16
+  %2 = getelementptr [8 x i64]* %input, i64 0, i64 4
+  store i64 1, i64* %2, align 16
+  %3 = getelementptr [8 x i64]* %input, i64 0, i64 6
+  store i64 1, i64* %3, align 16
+
+Which gets codegen'd into:
+
+	pxor	%xmm0, %xmm0
+	movaps	%xmm0, -16(%rbp)
+	movaps	%xmm0, -32(%rbp)
+	movaps	%xmm0, -48(%rbp)
+	movaps	%xmm0, -64(%rbp)
+	movq	$1, -64(%rbp)
+	movq	$1, -48(%rbp)
+	movq	$1, -32(%rbp)
+	movq	$1, -16(%rbp)
+
+It would be better to have 4 movq's of 0 instead of the movaps's.
+
+//===---------------------------------------------------------------------===//
+
+http://llvm.org/PR717:
+
+The following code should compile into "ret int undef". Instead, LLVM
+produces "ret int 0":
+
+int f() {
+  int x = 4;
+  int y;
+  if (x == 3) y = 0;
+  return y;
+}
+
+//===---------------------------------------------------------------------===//
+
+The loop unroller should partially unroll loops (instead of peeling them)
+when code growth isn't too bad and when an unroll count allows simplification
+of some code within the loop.  One trivial example is:
+
+#include <stdio.h>
+int main() {
+    int nRet = 17;
+    int nLoop;
+    for ( nLoop = 0; nLoop < 1000; nLoop++ ) {
+        if ( nLoop & 1 )
+            nRet += 2;
+        else
+            nRet -= 1;
+    }
+    return nRet;
+}
+
+Unrolling by 2 would eliminate the '&1' in both copies, leading to a net
+reduction in code size.  The resultant code would then also be suitable for
+exit value computation.
+
+//===---------------------------------------------------------------------===//
+
+We miss a bunch of rotate opportunities on various targets, including ppc, x86,
+etc.  On X86, we miss a bunch of 'rotate by variable' cases because the rotate
+matching code in dag combine doesn't look through truncates aggressively 
+enough.  Here are some testcases reduces from GCC PR17886:
+
+unsigned long long f5(unsigned long long x, unsigned long long y) {
+  return (x << 8) | ((y >> 48) & 0xffull);
+}
+unsigned long long f6(unsigned long long x, unsigned long long y, int z) {
+  switch(z) {
+  case 1:
+    return (x << 8) | ((y >> 48) & 0xffull);
+  case 2:
+    return (x << 16) | ((y >> 40) & 0xffffull);
+  case 3:
+    return (x << 24) | ((y >> 32) & 0xffffffull);
+  case 4:
+    return (x << 32) | ((y >> 24) & 0xffffffffull);
+  default:
+    return (x << 40) | ((y >> 16) & 0xffffffffffull);
+  }
+}
+
+//===---------------------------------------------------------------------===//
+
+This (and similar related idioms):
+
+unsigned int foo(unsigned char i) {
+  return i | (i<<8) | (i<<16) | (i<<24);
+} 
+
+compiles into:
+
+define i32 @foo(i8 zeroext %i) nounwind readnone ssp noredzone {
+entry:
+  %conv = zext i8 %i to i32
+  %shl = shl i32 %conv, 8
+  %shl5 = shl i32 %conv, 16
+  %shl9 = shl i32 %conv, 24
+  %or = or i32 %shl9, %conv
+  %or6 = or i32 %or, %shl5
+  %or10 = or i32 %or6, %shl
+  ret i32 %or10
+}
+
+it would be better as:
+
+unsigned int bar(unsigned char i) {
+  unsigned int j=i | (i << 8); 
+  return j | (j<<16);
+}
+
+aka:
+
+define i32 @bar(i8 zeroext %i) nounwind readnone ssp noredzone {
+entry:
+  %conv = zext i8 %i to i32
+  %shl = shl i32 %conv, 8
+  %or = or i32 %shl, %conv
+  %shl5 = shl i32 %or, 16
+  %or6 = or i32 %shl5, %or
+  ret i32 %or6
+}
+
+or even i*0x01010101, depending on the speed of the multiplier.  The best way to
+handle this is to canonicalize it to a multiply in IR and have codegen handle
+lowering multiplies to shifts on cpus where shifts are faster.
+
+//===---------------------------------------------------------------------===//
+
+We do a number of simplifications in simplify libcalls to strength reduce
+standard library functions, but we don't currently merge them together.  For
+example, it is useful to merge memcpy(a,b,strlen(b)) -> strcpy.  This can only
+be done safely if "b" isn't modified between the strlen and memcpy of course.
+
+//===---------------------------------------------------------------------===//
+
+We compile this program: (from GCC PR11680)
+http://gcc.gnu.org/bugzilla/attachment.cgi?id=4487
+
+Into code that runs the same speed in fast/slow modes, but both modes run 2x
+slower than when compile with GCC (either 4.0 or 4.2):
+
+$ llvm-g++ perf.cpp -O3 -fno-exceptions
+$ time ./a.out fast
+1.821u 0.003s 0:01.82 100.0%	0+0k 0+0io 0pf+0w
+
+$ g++ perf.cpp -O3 -fno-exceptions
+$ time ./a.out fast
+0.821u 0.001s 0:00.82 100.0%	0+0k 0+0io 0pf+0w
+
+It looks like we are making the same inlining decisions, so this may be raw
+codegen badness or something else (haven't investigated).
+
+//===---------------------------------------------------------------------===//
+
+Divisibility by constant can be simplified (according to GCC PR12849) from
+being a mulhi to being a mul lo (cheaper).  Testcase:
+
+void bar(unsigned n) {
+  if (n % 3 == 0)
+    true();
+}
+
+This is equivalent to the following, where 2863311531 is the multiplicative
+inverse of 3, and 1431655766 is ((2^32)-1)/3+1:
+void bar(unsigned n) {
+  if (n * 2863311531U < 1431655766U)
+    true();
+}
+
+The same transformation can work with an even modulo with the addition of a
+rotate: rotate the result of the multiply to the right by the number of bits
+which need to be zero for the condition to be true, and shrink the compare RHS
+by the same amount.  Unless the target supports rotates, though, that
+transformation probably isn't worthwhile.
+
+The transformation can also easily be made to work with non-zero equality
+comparisons: just transform, for example, "n % 3 == 1" to "(n-1) % 3 == 0".
+
+//===---------------------------------------------------------------------===//
+
+Better mod/ref analysis for scanf would allow us to eliminate the vtable and a
+bunch of other stuff from this example (see PR1604): 
+
+#include <cstdio>
+struct test {
+    int val;
+    virtual ~test() {}
+};
+
+int main() {
+    test t;
+    std::scanf("%d", &t.val);
+    std::printf("%d\n", t.val);
+}
+
+//===---------------------------------------------------------------------===//
+
+These functions perform the same computation, but produce different assembly.
+
+define i8 @select(i8 %x) readnone nounwind {
+  %A = icmp ult i8 %x, 250
+  %B = select i1 %A, i8 0, i8 1
+  ret i8 %B 
+}
+
+define i8 @addshr(i8 %x) readnone nounwind {
+  %A = zext i8 %x to i9
+  %B = add i9 %A, 6       ;; 256 - 250 == 6
+  %C = lshr i9 %B, 8
+  %D = trunc i9 %C to i8
+  ret i8 %D
+}
+
+//===---------------------------------------------------------------------===//
+
+From gcc bug 24696:
+int
+f (unsigned long a, unsigned long b, unsigned long c)
+{
+  return ((a & (c - 1)) != 0) || ((b & (c - 1)) != 0);
+}
+int
+f (unsigned long a, unsigned long b, unsigned long c)
+{
+  return ((a & (c - 1)) != 0) | ((b & (c - 1)) != 0);
+}
+Both should combine to ((a|b) & (c-1)) != 0.  Currently not optimized with
+"clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 20192:
+#define PMD_MASK    (~((1UL << 23) - 1))
+void clear_pmd_range(unsigned long start, unsigned long end)
+{
+   if (!(start & ~PMD_MASK) && !(end & ~PMD_MASK))
+       f();
+}
+The expression should optimize to something like
+"!((start|end)&~PMD_MASK). Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return
+i;}
+unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;}
+These should combine to the same thing.  Currently, the first function
+produces better code on X86.
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 15784:
+#define abs(x) x>0?x:-x
+int f(int x, int y)
+{
+ return (abs(x)) >= 0;
+}
+This should optimize to x == INT_MIN. (With -fwrapv.)  Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 14753:
+void
+rotate_cst (unsigned int a)
+{
+ a = (a << 10) | (a >> 22);
+ if (a == 123)
+   bar ();
+}
+void
+minus_cst (unsigned int a)
+{
+ unsigned int tem;
+
+ tem = 20 - a;
+ if (tem == 5)
+   bar ();
+}
+void
+mask_gt (unsigned int a)
+{
+ /* This is equivalent to a > 15.  */
+ if ((a & ~7) > 8)
+   bar ();
+}
+void
+rshift_gt (unsigned int a)
+{
+ /* This is equivalent to a > 23.  */
+ if ((a >> 2) > 5)
+   bar ();
+}
+
+void neg_eq_cst(unsigned int a) {
+if (-a == 123)
+bar();
+}
+
+All should simplify to a single comparison.  All of these are
+currently not optimized with "clang -emit-llvm-bc | opt
+-std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+From GCC Bug 32605:
+int c(int* x) {return (char*)x+2 == (char*)x;}
+Should combine to 0.  Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts" (although llc can optimize it).
+
+//===---------------------------------------------------------------------===//
+
+int a(unsigned b) {return ((b << 31) | (b << 30)) >> 31;}
+Should be combined to  "((b >> 1) | b) & 1".  Currently not optimized
+with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+unsigned a(unsigned x, unsigned y) { return x | (y & 1) | (y & 2);}
+Should combine to "x | (y & 3)".  Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int a, int b, int c) {return (~a & c) | ((c|a) & b);}
+Should fold to "(~a & c) | (a & b)".  Currently not optimized with
+"clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int a,int b) {return (~(a|b))|a;}
+Should fold to "a|~b".  Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int a, int b) {return (a&&b) || (a&&!b);}
+Should fold to "a".  Currently not optimized with "clang -emit-llvm-bc
+| opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int a, int b, int c) {return (a&&b) || (!a&&c);}
+Should fold to "a ? b : c", or at least something sane.  Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int a, int b, int c) {return (a&&b) || (a&&c) || (a&&b&&c);}
+Should fold to a && (b || c).  Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int x) {return x | ((x & 8) ^ 8);}
+Should combine to x | 8.  Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int x) {return x ^ ((x & 8) ^ 8);}
+Should also combine to x | 8.  Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+int a(int x) {return ((x | -9) ^ 8) & x;}
+Should combine to x & -9.  Currently not optimized with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+unsigned a(unsigned a) {return a * 0x11111111 >> 28 & 1;}
+Should combine to "a * 0x88888888 >> 31".  Currently not optimized
+with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+unsigned a(char* x) {if ((*x & 32) == 0) return b();}
+There's an unnecessary zext in the generated code with "clang
+-emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+unsigned a(unsigned long long x) {return 40 * (x >> 1);}
+Should combine to "20 * (((unsigned)x) & -2)".  Currently not
+optimized with "clang -emit-llvm-bc | opt -std-compile-opts".
+
+//===---------------------------------------------------------------------===//
+
+This was noticed in the entryblock for grokdeclarator in 403.gcc:
+
+        %tmp = icmp eq i32 %decl_context, 4          
+        %decl_context_addr.0 = select i1 %tmp, i32 3, i32 %decl_context 
+        %tmp1 = icmp eq i32 %decl_context_addr.0, 1 
+        %decl_context_addr.1 = select i1 %tmp1, i32 0, i32 %decl_context_addr.0
+
+tmp1 should be simplified to something like:
+  (!tmp || decl_context == 1)
+
+This allows recursive simplifications, tmp1 is used all over the place in
+the function, e.g. by:
+
+        %tmp23 = icmp eq i32 %decl_context_addr.1, 0            ; <i1> [#uses=1]
+        %tmp24 = xor i1 %tmp1, true             ; <i1> [#uses=1]
+        %or.cond8 = and i1 %tmp23, %tmp24               ; <i1> [#uses=1]
+
+later.
+
+//===---------------------------------------------------------------------===//
+
+[STORE SINKING]
+
+Store sinking: This code:
+
+void f (int n, int *cond, int *res) {
+    int i;
+    *res = 0;
+    for (i = 0; i < n; i++)
+        if (*cond)
+            *res ^= 234; /* (*) */
+}
+
+On this function GVN hoists the fully redundant value of *res, but nothing
+moves the store out.  This gives us this code:
+
+bb:		; preds = %bb2, %entry
+	%.rle = phi i32 [ 0, %entry ], [ %.rle6, %bb2 ]	
+	%i.05 = phi i32 [ 0, %entry ], [ %indvar.next, %bb2 ]
+	%1 = load i32* %cond, align 4
+	%2 = icmp eq i32 %1, 0
+	br i1 %2, label %bb2, label %bb1
+
+bb1:		; preds = %bb
+	%3 = xor i32 %.rle, 234	
+	store i32 %3, i32* %res, align 4
+	br label %bb2
+
+bb2:		; preds = %bb, %bb1
+	%.rle6 = phi i32 [ %3, %bb1 ], [ %.rle, %bb ]	
+	%indvar.next = add i32 %i.05, 1	
+	%exitcond = icmp eq i32 %indvar.next, %n
+	br i1 %exitcond, label %return, label %bb
+
+DSE should sink partially dead stores to get the store out of the loop.
+
+Here's another partial dead case:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12395
+
+//===---------------------------------------------------------------------===//
+
+Scalar PRE hoists the mul in the common block up to the else:
+
+int test (int a, int b, int c, int g) {
+  int d, e;
+  if (a)
+    d = b * c;
+  else
+    d = b - c;
+  e = b * c + g;
+  return d + e;
+}
+
+It would be better to do the mul once to reduce codesize above the if.
+This is GCC PR38204.
+
+
+//===---------------------------------------------------------------------===//
+This simple function from 179.art:
+
+int winner, numf2s;
+struct { double y; int   reset; } *Y;
+
+void find_match() {
+   int i;
+   winner = 0;
+   for (i=0;i<numf2s;i++)
+       if (Y[i].y > Y[winner].y)
+              winner =i;
+}
+
+Compiles into (with clang TBAA):
+
+for.body:                                         ; preds = %for.inc, %bb.nph
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.inc ]
+  %i.01718 = phi i32 [ 0, %bb.nph ], [ %i.01719, %for.inc ]
+  %tmp4 = getelementptr inbounds %struct.anon* %tmp3, i64 %indvar, i32 0
+  %tmp5 = load double* %tmp4, align 8, !tbaa !4
+  %idxprom7 = sext i32 %i.01718 to i64
+  %tmp10 = getelementptr inbounds %struct.anon* %tmp3, i64 %idxprom7, i32 0
+  %tmp11 = load double* %tmp10, align 8, !tbaa !4
+  %cmp12 = fcmp ogt double %tmp5, %tmp11
+  br i1 %cmp12, label %if.then, label %for.inc
+
+if.then:                                          ; preds = %for.body
+  %i.017 = trunc i64 %indvar to i32
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body, %if.then
+  %i.01719 = phi i32 [ %i.01718, %for.body ], [ %i.017, %if.then ]
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %tmp22
+  br i1 %exitcond, label %for.cond.for.end_crit_edge, label %for.body
+
+
+It is good that we hoisted the reloads of numf2's, and Y out of the loop and
+sunk the store to winner out.
+
+However, this is awful on several levels: the conditional truncate in the loop
+(-indvars at fault? why can't we completely promote the IV to i64?).
+
+Beyond that, we have a partially redundant load in the loop: if "winner" (aka 
+%i.01718) isn't updated, we reload Y[winner].y the next time through the loop.
+Similarly, the addressing that feeds it (including the sext) is redundant. In
+the end we get this generated assembly:
+
+LBB0_2:                                 ## %for.body
+                                        ## =>This Inner Loop Header: Depth=1
+	movsd	(%rdi), %xmm0
+	movslq	%edx, %r8
+	shlq	$4, %r8
+	ucomisd	(%rcx,%r8), %xmm0
+	jbe	LBB0_4
+	movl	%esi, %edx
+LBB0_4:                                 ## %for.inc
+	addq	$16, %rdi
+	incq	%rsi
+	cmpq	%rsi, %rax
+	jne	LBB0_2
+
+All things considered this isn't too bad, but we shouldn't need the movslq or
+the shlq instruction, or the load folded into ucomisd every time through the
+loop.
+
+On an x86-specific topic, if the loop can't be restructure, the movl should be a
+cmov.
+
+//===---------------------------------------------------------------------===//
+
+[STORE SINKING]
+
+GCC PR37810 is an interesting case where we should sink load/store reload
+into the if block and outside the loop, so we don't reload/store it on the
+non-call path.
+
+for () {
+  *P += 1;
+  if ()
+    call();
+  else
+    ...
+->
+tmp = *P
+for () {
+  tmp += 1;
+  if () {
+    *P = tmp;
+    call();
+    tmp = *P;
+  } else ...
+}
+*P = tmp;
+
+We now hoist the reload after the call (Transforms/GVN/lpre-call-wrap.ll), but
+we don't sink the store.  We need partially dead store sinking.
+
+//===---------------------------------------------------------------------===//
+
+[LOAD PRE CRIT EDGE SPLITTING]
+
+GCC PR37166: Sinking of loads prevents SROA'ing the "g" struct on the stack
+leading to excess stack traffic. This could be handled by GVN with some crazy
+symbolic phi translation.  The code we get looks like (g is on the stack):
+
+bb2:		; preds = %bb1
+..
+	%9 = getelementptr %struct.f* %g, i32 0, i32 0		
+	store i32 %8, i32* %9, align  bel %bb3
+
+bb3:		; preds = %bb1, %bb2, %bb
+	%c_addr.0 = phi %struct.f* [ %g, %bb2 ], [ %c, %bb ], [ %c, %bb1 ]
+	%b_addr.0 = phi %struct.f* [ %b, %bb2 ], [ %g, %bb ], [ %b, %bb1 ]
+	%10 = getelementptr %struct.f* %c_addr.0, i32 0, i32 0
+	%11 = load i32* %10, align 4
+
+%11 is partially redundant, an in BB2 it should have the value %8.
+
+GCC PR33344 and PR35287 are similar cases.
+
+
+//===---------------------------------------------------------------------===//
+
+[LOAD PRE]
+
+There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the
+GCC testsuite, ones we don't get yet are (checked through loadpre25):
+
+[CRIT EDGE BREAKING]
+loadpre3.c predcom-4.c
+
+[PRE OF READONLY CALL]
+loadpre5.c
+
+[TURN SELECT INTO BRANCH]
+loadpre14.c loadpre15.c 
+
+actually a conditional increment: loadpre18.c loadpre19.c
+
+//===---------------------------------------------------------------------===//
+
+[LOAD PRE / STORE SINKING / SPEC HACK]
+
+This is a chunk of code from 456.hmmer:
+
+int f(int M, int *mc, int *mpp, int *tpmm, int *ip, int *tpim, int *dpp,
+     int *tpdm, int xmb, int *bp, int *ms) {
+ int k, sc;
+ for (k = 1; k <= M; k++) {
+     mc[k] = mpp[k-1]   + tpmm[k-1];
+     if ((sc = ip[k-1]  + tpim[k-1]) > mc[k])  mc[k] = sc;
+     if ((sc = dpp[k-1] + tpdm[k-1]) > mc[k])  mc[k] = sc;
+     if ((sc = xmb  + bp[k])         > mc[k])  mc[k] = sc;
+     mc[k] += ms[k];
+   }
+}
+
+It is very profitable for this benchmark to turn the conditional stores to mc[k]
+into a conditional move (select instr in IR) and allow the final store to do the
+store.  See GCC PR27313 for more details.  Note that this is valid to xform even
+with the new C++ memory model, since mc[k] is previously loaded and later
+stored.
+
+//===---------------------------------------------------------------------===//
+
+[SCALAR PRE]
+There are many PRE testcases in testsuite/gcc.dg/tree-ssa/ssa-pre-*.c in the
+GCC testsuite.
+
+//===---------------------------------------------------------------------===//
+
+There are some interesting cases in testsuite/gcc.dg/tree-ssa/pred-comm* in the
+GCC testsuite.  For example, we get the first example in predcom-1.c, but 
+miss the second one:
+
+unsigned fib[1000];
+unsigned avg[1000];
+
+__attribute__ ((noinline))
+void count_averages(int n) {
+  int i;
+  for (i = 1; i < n; i++)
+    avg[i] = (((unsigned long) fib[i - 1] + fib[i] + fib[i + 1]) / 3) & 0xffff;
+}
+
+which compiles into two loads instead of one in the loop.
+
+predcom-2.c is the same as predcom-1.c
+
+predcom-3.c is very similar but needs loads feeding each other instead of
+store->load.
+
+
+//===---------------------------------------------------------------------===//
+
+[ALIAS ANALYSIS]
+
+Type based alias analysis:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14705
+
+We should do better analysis of posix_memalign.  At the least it should
+no-capture its pointer argument, at best, we should know that the out-value
+result doesn't point to anything (like malloc).  One example of this is in
+SingleSource/Benchmarks/Misc/dt.c
+
+//===---------------------------------------------------------------------===//
+
+Interesting missed case because of control flow flattening (should be 2 loads):
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629
+With: llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | 
+             opt -mem2reg -gvn -instcombine | llvm-dis
+we miss it because we need 1) CRIT EDGE 2) MULTIPLE DIFFERENT
+VALS PRODUCED BY ONE BLOCK OVER DIFFERENT PATHS
+
+//===---------------------------------------------------------------------===//
+
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19633
+We could eliminate the branch condition here, loading from null is undefined:
+
+struct S { int w, x, y, z; };
+struct T { int r; struct S s; };
+void bar (struct S, int);
+void foo (int a, struct T b)
+{
+  struct S *c = 0;
+  if (a)
+    c = &b.s;
+  bar (*c, a);
+}
+
+//===---------------------------------------------------------------------===//
+
+simplifylibcalls should do several optimizations for strspn/strcspn:
+
+strcspn(x, "a") -> inlined loop for up to 3 letters (similarly for strspn):
+
+size_t __strcspn_c3 (__const char *__s, int __reject1, int __reject2,
+                     int __reject3) {
+  register size_t __result = 0;
+  while (__s[__result] != '\0' && __s[__result] != __reject1 &&
+         __s[__result] != __reject2 && __s[__result] != __reject3)
+    ++__result;
+  return __result;
+}
+
+This should turn into a switch on the character.  See PR3253 for some notes on
+codegen.
+
+456.hmmer apparently uses strcspn and strspn a lot.  471.omnetpp uses strspn.
+
+//===---------------------------------------------------------------------===//
+
+simplifylibcalls should turn these snprintf idioms into memcpy (GCC PR47917)
+
+char buf1[6], buf2[6], buf3[4], buf4[4];
+int i;
+
+int foo (void) {
+  int ret = snprintf (buf1, sizeof buf1, "abcde");
+  ret += snprintf (buf2, sizeof buf2, "abcdef") * 16;
+  ret += snprintf (buf3, sizeof buf3, "%s", i++ < 6 ? "abc" : "def") * 256;
+  ret += snprintf (buf4, sizeof buf4, "%s", i++ > 10 ? "abcde" : "defgh")*4096;
+  return ret;
+}
+
+//===---------------------------------------------------------------------===//
+
+"gas" uses this idiom:
+  else if (strchr ("+-/*%|&^:[]()~", *intel_parser.op_string))
+..
+  else if (strchr ("<>", *intel_parser.op_string)
+
+Those should be turned into a switch.
+
+//===---------------------------------------------------------------------===//
+
+252.eon contains this interesting code:
+
+        %3072 = getelementptr [100 x i8]* %tempString, i32 0, i32 0
+        %3073 = call i8* @strcpy(i8* %3072, i8* %3071) nounwind
+        %strlen = call i32 @strlen(i8* %3072)    ; uses = 1
+        %endptr = getelementptr [100 x i8]* %tempString, i32 0, i32 %strlen
+        call void @llvm.memcpy.i32(i8* %endptr, 
+          i8* getelementptr ([5 x i8]* @"\01LC42", i32 0, i32 0), i32 5, i32 1)
+        %3074 = call i32 @strlen(i8* %endptr) nounwind readonly 
+        
+This is interesting for a couple reasons.  First, in this:
+
+The memcpy+strlen strlen can be replaced with:
+
+        %3074 = call i32 @strlen([5 x i8]* @"\01LC42") nounwind readonly 
+
+Because the destination was just copied into the specified memory buffer.  This,
+in turn, can be constant folded to "4".
+
+In other code, it contains:
+
+        %endptr6978 = bitcast i8* %endptr69 to i32*            
+        store i32 7107374, i32* %endptr6978, align 1
+        %3167 = call i32 @strlen(i8* %endptr69) nounwind readonly    
+
+Which could also be constant folded.  Whatever is producing this should probably
+be fixed to leave this as a memcpy from a string.
+
+Further, eon also has an interesting partially redundant strlen call:
+
+bb8:            ; preds = %_ZN18eonImageCalculatorC1Ev.exit
+        %682 = getelementptr i8** %argv, i32 6          ; <i8**> [#uses=2]
+        %683 = load i8** %682, align 4          ; <i8*> [#uses=4]
+        %684 = load i8* %683, align 1           ; <i8> [#uses=1]
+        %685 = icmp eq i8 %684, 0               ; <i1> [#uses=1]
+        br i1 %685, label %bb10, label %bb9
+
+bb9:            ; preds = %bb8
+        %686 = call i32 @strlen(i8* %683) nounwind readonly          
+        %687 = icmp ugt i32 %686, 254           ; <i1> [#uses=1]
+        br i1 %687, label %bb10, label %bb11
+
+bb10:           ; preds = %bb9, %bb8
+        %688 = call i32 @strlen(i8* %683) nounwind readonly          
+
+This could be eliminated by doing the strlen once in bb8, saving code size and
+improving perf on the bb8->9->10 path.
+
+//===---------------------------------------------------------------------===//
+
+I see an interesting fully redundant call to strlen left in 186.crafty:InputMove
+which looks like:
+       %movetext11 = getelementptr [128 x i8]* %movetext, i32 0, i32 0 
+ 
+
+bb62:           ; preds = %bb55, %bb53
+        %promote.0 = phi i32 [ %169, %bb55 ], [ 0, %bb53 ]             
+        %171 = call i32 @strlen(i8* %movetext11) nounwind readonly align 1
+        %172 = add i32 %171, -1         ; <i32> [#uses=1]
+        %173 = getelementptr [128 x i8]* %movetext, i32 0, i32 %172       
+
+...  no stores ...
+       br i1 %or.cond, label %bb65, label %bb72
+
+bb65:           ; preds = %bb62
+        store i8 0, i8* %173, align 1
+        br label %bb72
+
+bb72:           ; preds = %bb65, %bb62
+        %trank.1 = phi i32 [ %176, %bb65 ], [ -1, %bb62 ]            
+        %177 = call i32 @strlen(i8* %movetext11) nounwind readonly align 1
+
+Note that on the bb62->bb72 path, that the %177 strlen call is partially
+redundant with the %171 call.  At worst, we could shove the %177 strlen call
+up into the bb65 block moving it out of the bb62->bb72 path.   However, note
+that bb65 stores to the string, zeroing out the last byte.  This means that on
+that path the value of %177 is actually just %171-1.  A sub is cheaper than a
+strlen!
+
+This pattern repeats several times, basically doing:
+
+  A = strlen(P);
+  P[A-1] = 0;
+  B = strlen(P);
+  where it is "obvious" that B = A-1.
+
+//===---------------------------------------------------------------------===//
+
+186.crafty has this interesting pattern with the "out.4543" variable:
+
+call void @llvm.memcpy.i32(
+        i8* getelementptr ([10 x i8]* @out.4543, i32 0, i32 0),
+       i8* getelementptr ([7 x i8]* @"\01LC28700", i32 0, i32 0), i32 7, i32 1) 
+%101 = call@printf(i8* ...   @out.4543, i32 0, i32 0)) nounwind 
+
+It is basically doing:
+
+  memcpy(globalarray, "string");
+  printf(...,  globalarray);
+  
+Anyway, by knowing that printf just reads the memory and forward substituting
+the string directly into the printf, this eliminates reads from globalarray.
+Since this pattern occurs frequently in crafty (due to the "DisplayTime" and
+other similar functions) there are many stores to "out".  Once all the printfs
+stop using "out", all that is left is the memcpy's into it.  This should allow
+globalopt to remove the "stored only" global.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+define inreg i32 @foo(i8* inreg %p) nounwind {
+  %tmp0 = load i8* %p
+  %tmp1 = ashr i8 %tmp0, 5
+  %tmp2 = sext i8 %tmp1 to i32
+  ret i32 %tmp2
+}
+
+could be dagcombine'd to a sign-extending load with a shift.
+For example, on x86 this currently gets this:
+
+	movb	(%eax), %al
+	sarb	$5, %al
+	movsbl	%al, %eax
+
+while it could get this:
+
+	movsbl	(%eax), %eax
+	sarl	$5, %eax
+
+//===---------------------------------------------------------------------===//
+
+GCC PR31029:
+
+int test(int x) { return 1-x == x; }     // --> return false
+int test2(int x) { return 2-x == x; }    // --> return x == 1 ?
+
+Always foldable for odd constants, what is the rule for even?
+
+//===---------------------------------------------------------------------===//
+
+PR 3381: GEP to field of size 0 inside a struct could be turned into GEP
+for next field in struct (which is at same address).
+
+For example: store of float into { {{}}, float } could be turned into a store to
+the float directly.
+
+//===---------------------------------------------------------------------===//
+
+The arg promotion pass should make use of nocapture to make its alias analysis
+stuff much more precise.
+
+//===---------------------------------------------------------------------===//
+
+The following functions should be optimized to use a select instead of a
+branch (from gcc PR40072):
+
+char char_int(int m) {if(m>7) return 0; return m;}
+int int_char(char m) {if(m>7) return 0; return m;}
+
+//===---------------------------------------------------------------------===//
+
+int func(int a, int b) { if (a & 0x80) b |= 0x80; else b &= ~0x80; return b; }
+
+Generates this:
+
+define i32 @func(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+  %0 = and i32 %a, 128                            ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  %2 = or i32 %b, 128                             ; <i32> [#uses=1]
+  %3 = and i32 %b, -129                           ; <i32> [#uses=1]
+  %b_addr.0 = select i1 %1, i32 %3, i32 %2        ; <i32> [#uses=1]
+  ret i32 %b_addr.0
+}
+
+However, it's functionally equivalent to:
+
+         b = (b & ~0x80) | (a & 0x80);
+
+Which generates this:
+
+define i32 @func(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+  %0 = and i32 %b, -129                           ; <i32> [#uses=1]
+  %1 = and i32 %a, 128                            ; <i32> [#uses=1]
+  %2 = or i32 %0, %1                              ; <i32> [#uses=1]
+  ret i32 %2
+}
+
+This can be generalized for other forms:
+
+     b = (b & ~0x80) | (a & 0x40) << 1;
+
+//===---------------------------------------------------------------------===//
+
+These two functions produce different code. They shouldn't:
+
+#include <stdint.h>
+ 
+uint8_t p1(uint8_t b, uint8_t a) {
+  b = (b & ~0xc0) | (a & 0xc0);
+  return (b);
+}
+ 
+uint8_t p2(uint8_t b, uint8_t a) {
+  b = (b & ~0x40) | (a & 0x40);
+  b = (b & ~0x80) | (a & 0x80);
+  return (b);
+}
+
+define zeroext i8 @p1(i8 zeroext %b, i8 zeroext %a) nounwind readnone ssp {
+entry:
+  %0 = and i8 %b, 63                              ; <i8> [#uses=1]
+  %1 = and i8 %a, -64                             ; <i8> [#uses=1]
+  %2 = or i8 %1, %0                               ; <i8> [#uses=1]
+  ret i8 %2
+}
+
+define zeroext i8 @p2(i8 zeroext %b, i8 zeroext %a) nounwind readnone ssp {
+entry:
+  %0 = and i8 %b, 63                              ; <i8> [#uses=1]
+  %.masked = and i8 %a, 64                        ; <i8> [#uses=1]
+  %1 = and i8 %a, -128                            ; <i8> [#uses=1]
+  %2 = or i8 %1, %0                               ; <i8> [#uses=1]
+  %3 = or i8 %2, %.masked                         ; <i8> [#uses=1]
+  ret i8 %3
+}
+
+//===---------------------------------------------------------------------===//
+
+IPSCCP does not currently propagate argument dependent constants through
+functions where it does not not all of the callers.  This includes functions
+with normal external linkage as well as templates, C99 inline functions etc.
+Specifically, it does nothing to:
+
+define i32 @test(i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %0 = add nsw i32 %y, %z                         
+  %1 = mul i32 %0, %x                             
+  %2 = mul i32 %y, %z                             
+  %3 = add nsw i32 %1, %2                         
+  ret i32 %3
+}
+
+define i32 @test2() nounwind {
+entry:
+  %0 = call i32 @test(i32 1, i32 2, i32 4) nounwind
+  ret i32 %0
+}
+
+It would be interesting extend IPSCCP to be able to handle simple cases like
+this, where all of the arguments to a call are constant.  Because IPSCCP runs
+before inlining, trivial templates and inline functions are not yet inlined.
+The results for a function + set of constant arguments should be memoized in a
+map.
+
+//===---------------------------------------------------------------------===//
+
+The libcall constant folding stuff should be moved out of SimplifyLibcalls into
+libanalysis' constantfolding logic.  This would allow IPSCCP to be able to
+handle simple things like this:
+
+static int foo(const char *X) { return strlen(X); }
+int bar() { return foo("abcd"); }
+
+//===---------------------------------------------------------------------===//
+
+functionattrs doesn't know much about memcpy/memset.  This function should be
+marked readnone rather than readonly, since it only twiddles local memory, but
+functionattrs doesn't handle memset/memcpy/memmove aggressively:
+
+struct X { int *p; int *q; };
+int foo() {
+ int i = 0, j = 1;
+ struct X x, y;
+ int **p;
+ y.p = &i;
+ x.q = &j;
+ p = __builtin_memcpy (&x, &y, sizeof (int *));
+ return **p;
+}
+
+This can be seen at:
+$ clang t.c -S -o - -mkernel -O0 -emit-llvm | opt -functionattrs -S
+
+
+//===---------------------------------------------------------------------===//
+
+Missed instcombine transformation:
+define i1 @a(i32 %x) nounwind readnone {
+entry:
+  %cmp = icmp eq i32 %x, 30
+  %sub = add i32 %x, -30
+  %cmp2 = icmp ugt i32 %sub, 9
+  %or = or i1 %cmp, %cmp2
+  ret i1 %or
+}
+This should be optimized to a single compare.  Testcase derived from gcc.
+
+//===---------------------------------------------------------------------===//
+
+Missed instcombine or reassociate transformation:
+int a(int a, int b) { return (a==12)&(b>47)&(b<58); }
+
+The sgt and slt should be combined into a single comparison. Testcase derived
+from gcc.
+
+//===---------------------------------------------------------------------===//
+
+Missed instcombine transformation:
+
+  %382 = srem i32 %tmp14.i, 64                    ; [#uses=1]
+  %383 = zext i32 %382 to i64                     ; [#uses=1]
+  %384 = shl i64 %381, %383                       ; [#uses=1]
+  %385 = icmp slt i32 %tmp14.i, 64                ; [#uses=1]
+
+The srem can be transformed to an and because if %tmp14.i is negative, the
+shift is undefined.  Testcase derived from 403.gcc.
+
+//===---------------------------------------------------------------------===//
+
+This is a range comparison on a divided result (from 403.gcc):
+
+  %1337 = sdiv i32 %1336, 8                       ; [#uses=1]
+  %.off.i208 = add i32 %1336, 7                   ; [#uses=1]
+  %1338 = icmp ult i32 %.off.i208, 15             ; [#uses=1]
+  
+We already catch this (removing the sdiv) if there isn't an add, we should
+handle the 'add' as well.  This is a common idiom with it's builtin_alloca code.
+C testcase:
+
+int a(int x) { return (unsigned)(x/16+7) < 15; }
+
+Another similar case involves truncations on 64-bit targets:
+
+  %361 = sdiv i64 %.046, 8                        ; [#uses=1]
+  %362 = trunc i64 %361 to i32                    ; [#uses=2]
+...
+  %367 = icmp eq i32 %362, 0                      ; [#uses=1]
+
+//===---------------------------------------------------------------------===//
+
+Missed instcombine/dagcombine transformation:
+define void @lshift_lt(i8 zeroext %a) nounwind {
+entry:
+  %conv = zext i8 %a to i32
+  %shl = shl i32 %conv, 3
+  %cmp = icmp ult i32 %shl, 33
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void @bar() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+declare void @bar() nounwind
+
+The shift should be eliminated.  Testcase derived from gcc.
+
+//===---------------------------------------------------------------------===//
+
+These compile into different code, one gets recognized as a switch and the
+other doesn't due to phase ordering issues (PR6212):
+
+int test1(int mainType, int subType) {
+  if (mainType == 7)
+    subType = 4;
+  else if (mainType == 9)
+    subType = 6;
+  else if (mainType == 11)
+    subType = 9;
+  return subType;
+}
+
+int test2(int mainType, int subType) {
+  if (mainType == 7)
+    subType = 4;
+  if (mainType == 9)
+    subType = 6;
+  if (mainType == 11)
+    subType = 9;
+  return subType;
+}
+
+//===---------------------------------------------------------------------===//
+
+The following test case (from PR6576):
+
+define i32 @mul(i32 %a, i32 %b) nounwind readnone {
+entry:
+ %cond1 = icmp eq i32 %b, 0                      ; <i1> [#uses=1]
+ br i1 %cond1, label %exit, label %bb.nph
+bb.nph:                                           ; preds = %entry
+ %tmp = mul i32 %b, %a                           ; <i32> [#uses=1]
+ ret i32 %tmp
+exit:                                             ; preds = %entry
+ ret i32 0
+}
+
+could be reduced to:
+
+define i32 @mul(i32 %a, i32 %b) nounwind readnone {
+entry:
+ %tmp = mul i32 %b, %a
+ ret i32 %tmp
+}
+
+//===---------------------------------------------------------------------===//
+
+We should use DSE + llvm.lifetime.end to delete dead vtable pointer updates.
+See GCC PR34949
+
+Another interesting case is that something related could be used for variables
+that go const after their ctor has finished.  In these cases, globalopt (which
+can statically run the constructor) could mark the global const (so it gets put
+in the readonly section).  A testcase would be:
+
+#include <complex>
+using namespace std;
+const complex<char> should_be_in_rodata (42,-42);
+complex<char> should_be_in_data (42,-42);
+complex<char> should_be_in_bss;
+
+Where we currently evaluate the ctors but the globals don't become const because
+the optimizer doesn't know they "become const" after the ctor is done.  See
+GCC PR4131 for more examples.
+
+//===---------------------------------------------------------------------===//
+
+In this code:
+
+long foo(long x) {
+  return x > 1 ? x : 1;
+}
+
+LLVM emits a comparison with 1 instead of 0. 0 would be equivalent
+and cheaper on most targets.
+
+LLVM prefers comparisons with zero over non-zero in general, but in this
+case it choses instead to keep the max operation obvious.
+
+//===---------------------------------------------------------------------===//
+
+Switch lowering generates less than ideal code for the following switch:
+define void @a(i32 %x) nounwind {
+entry:
+  switch i32 %x, label %if.end [
+    i32 0, label %if.then
+    i32 1, label %if.then
+    i32 2, label %if.then
+    i32 3, label %if.then
+    i32 5, label %if.then
+  ]
+if.then:
+  tail call void @foo() nounwind
+  ret void
+if.end:
+  ret void
+}
+declare void @foo()
+
+Generated code on x86-64 (other platforms give similar results):
+a:
+	cmpl	$5, %edi
+	ja	.LBB0_2
+	movl	%edi, %eax
+	movl	$47, %ecx
+	btq	%rax, %rcx
+	jb	.LBB0_3
+.LBB0_2:
+	ret
+.LBB0_3:
+	jmp	foo  # TAILCALL
+
+The movl+movl+btq+jb could be simplified to a cmpl+jne.
+
+Or, if we wanted to be really clever, we could simplify the whole thing to
+something like the following, which eliminates a branch:
+	xorl    $1, %edi
+	cmpl	$4, %edi
+	ja	.LBB0_2
+	ret
+.LBB0_2:
+	jmp	foo  # TAILCALL
+
+//===---------------------------------------------------------------------===//
+
+We compile this:
+
+int foo(int a) { return (a & (~15)) / 16; }
+
+Into:
+
+define i32 @foo(i32 %a) nounwind readnone ssp {
+entry:
+  %and = and i32 %a, -16
+  %div = sdiv i32 %and, 16
+  ret i32 %div
+}
+
+but this code (X & -A)/A is X >> log2(A) when A is a power of 2, so this case
+should be instcombined into just "a >> 4".
+
+We do get this at the codegen level, so something knows about it, but 
+instcombine should catch it earlier:
+
+_foo:                                   ## @foo
+## BB#0:                                ## %entry
+	movl	%edi, %eax
+	sarl	$4, %eax
+	ret
+
+//===---------------------------------------------------------------------===//
+
+This code (from GCC PR28685):
+
+int test(int a, int b) {
+  int lt = a < b;
+  int eq = a == b;
+  if (lt)
+    return 1;
+  return eq;
+}
+
+Is compiled to:
+
+define i32 @test(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+  %cmp = icmp slt i32 %a, %b
+  br i1 %cmp, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  %cmp5 = icmp eq i32 %a, %b
+  %conv6 = zext i1 %cmp5 to i32
+  ret i32 %conv6
+
+return:                                           ; preds = %entry
+  ret i32 1
+}
+
+it could be:
+
+define i32 @test__(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+  %0 = icmp sle i32 %a, %b
+  %retval = zext i1 %0 to i32
+  ret i32 %retval
+}
+
+//===---------------------------------------------------------------------===//
+
+This code can be seen in viterbi:
+
+  %64 = call noalias i8* @malloc(i64 %62) nounwind
+...
+  %67 = call i64 @llvm.objectsize.i64(i8* %64, i1 false) nounwind
+  %68 = call i8* @__memset_chk(i8* %64, i32 0, i64 %62, i64 %67) nounwind
+
+llvm.objectsize.i64 should be taught about malloc/calloc, allowing it to
+fold to %62.  This is a security win (overflows of malloc will get caught)
+and also a performance win by exposing more memsets to the optimizer.
+
+This occurs several times in viterbi.
+
+Note that this would change the semantics of @llvm.objectsize which by its
+current definition always folds to a constant. We also should make sure that
+we remove checking in code like
+
+  char *p = malloc(strlen(s)+1);
+  __strcpy_chk(p, s, __builtin_objectsize(p, 0));
+
+//===---------------------------------------------------------------------===//
+
+This code (from Benchmarks/Dhrystone/dry.c):
+
+define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
+entry:
+  %sext = shl i32 %0, 24
+  %conv = ashr i32 %sext, 24
+  %sext6 = shl i32 %1, 24
+  %conv4 = ashr i32 %sext6, 24
+  %cmp = icmp eq i32 %conv, %conv4
+  %. = select i1 %cmp, i32 10000, i32 0
+  ret i32 %.
+}
+
+Should be simplified into something like:
+
+define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
+entry:
+  %sext = shl i32 %0, 24
+  %conv = and i32 %sext, 0xFF000000
+  %sext6 = shl i32 %1, 24
+  %conv4 = and i32 %sext6, 0xFF000000
+  %cmp = icmp eq i32 %conv, %conv4
+  %. = select i1 %cmp, i32 10000, i32 0
+  ret i32 %.
+}
+
+and then to:
+
+define i32 @Func1(i32, i32) nounwind readnone optsize ssp {
+entry:
+  %conv = and i32 %0, 0xFF
+  %conv4 = and i32 %1, 0xFF
+  %cmp = icmp eq i32 %conv, %conv4
+  %. = select i1 %cmp, i32 10000, i32 0
+  ret i32 %.
+}
+//===---------------------------------------------------------------------===//
+
+clang -O3 currently compiles this code
+
+int g(unsigned int a) {
+  unsigned int c[100];
+  c[10] = a;
+  c[11] = a;
+  unsigned int b = c[10] + c[11];
+  if(b > a*2) a = 4;
+  else a = 8;
+  return a + 7;
+}
+
+into
+
+define i32 @g(i32 a) nounwind readnone {
+  %add = shl i32 %a, 1
+  %mul = shl i32 %a, 1
+  %cmp = icmp ugt i32 %add, %mul
+  %a.addr.0 = select i1 %cmp, i32 11, i32 15
+  ret i32 %a.addr.0
+}
+
+The icmp should fold to false. This CSE opportunity is only available
+after GVN and InstCombine have run.
+
+//===---------------------------------------------------------------------===//
+
+memcpyopt should turn this:
+
+define i8* @test10(i32 %x) {
+  %alloc = call noalias i8* @malloc(i32 %x) nounwind
+  call void @llvm.memset.p0i8.i32(i8* %alloc, i8 0, i32 %x, i32 1, i1 false)
+  ret i8* %alloc
+}
+
+into a call to calloc.  We should make sure that we analyze calloc as
+aggressively as malloc though.
+
+//===---------------------------------------------------------------------===//
+
+clang -O3 doesn't optimize this:
+
+void f1(int* begin, int* end) {
+  std::fill(begin, end, 0);
+}
+
+into a memset.  This is PR8942.
+
+//===---------------------------------------------------------------------===//
+
+clang -O3 -fno-exceptions currently compiles this code:
+
+void f(int N) {
+  std::vector<int> v(N);
+
+  extern void sink(void*); sink(&v);
+}
+
+into
+
+define void @_Z1fi(i32 %N) nounwind {
+entry:
+  %v2 = alloca [3 x i32*], align 8
+  %v2.sub = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 0
+  %tmpcast = bitcast [3 x i32*]* %v2 to %"class.std::vector"*
+  %conv = sext i32 %N to i64
+  store i32* null, i32** %v2.sub, align 8, !tbaa !0
+  %tmp3.i.i.i.i.i = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 1
+  store i32* null, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0
+  %tmp4.i.i.i.i.i = getelementptr inbounds [3 x i32*]* %v2, i64 0, i64 2
+  store i32* null, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0
+  %cmp.i.i.i.i = icmp eq i32 %N, 0
+  br i1 %cmp.i.i.i.i, label %_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.thread.i.i, label %cond.true.i.i.i.i
+
+_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.thread.i.i: ; preds = %entry
+  store i32* null, i32** %v2.sub, align 8, !tbaa !0
+  store i32* null, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0
+  %add.ptr.i5.i.i = getelementptr inbounds i32* null, i64 %conv
+  store i32* %add.ptr.i5.i.i, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0
+  br label %_ZNSt6vectorIiSaIiEEC1EmRKiRKS0_.exit
+
+cond.true.i.i.i.i:                                ; preds = %entry
+  %cmp.i.i.i.i.i = icmp slt i32 %N, 0
+  br i1 %cmp.i.i.i.i.i, label %if.then.i.i.i.i.i, label %_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.i.i
+
+if.then.i.i.i.i.i:                                ; preds = %cond.true.i.i.i.i
+  call void @_ZSt17__throw_bad_allocv() noreturn nounwind
+  unreachable
+
+_ZNSt12_Vector_baseIiSaIiEEC2EmRKS0_.exit.i.i:    ; preds = %cond.true.i.i.i.i
+  %mul.i.i.i.i.i = shl i64 %conv, 2
+  %call3.i.i.i.i.i = call noalias i8* @_Znwm(i64 %mul.i.i.i.i.i) nounwind
+  %0 = bitcast i8* %call3.i.i.i.i.i to i32*
+  store i32* %0, i32** %v2.sub, align 8, !tbaa !0
+  store i32* %0, i32** %tmp3.i.i.i.i.i, align 8, !tbaa !0
+  %add.ptr.i.i.i = getelementptr inbounds i32* %0, i64 %conv
+  store i32* %add.ptr.i.i.i, i32** %tmp4.i.i.i.i.i, align 8, !tbaa !0
+  call void @llvm.memset.p0i8.i64(i8* %call3.i.i.i.i.i, i8 0, i64 %mul.i.i.i.i.i, i32 4, i1 false)
+  br label %_ZNSt6vectorIiSaIiEEC1EmRKiRKS0_.exit
+
+This is just the handling the construction of the vector. Most surprising here
+is the fact that all three null stores in %entry are dead (because we do no
+cross-block DSE).
+
+Also surprising is that %conv isn't simplified to 0 in %....exit.thread.i.i.
+This is a because the client of LazyValueInfo doesn't simplify all instruction
+operands, just selected ones.
+
+//===---------------------------------------------------------------------===//
+
+clang -O3 -fno-exceptions currently compiles this code:
+
+void f(char* a, int n) {
+  __builtin_memset(a, 0, n);
+  for (int i = 0; i < n; ++i)
+    a[i] = 0;
+}
+
+into:
+
+define void @_Z1fPci(i8* nocapture %a, i32 %n) nounwind {
+entry:
+  %conv = sext i32 %n to i64
+  tail call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 %conv, i32 1, i1 false)
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %tmp10 = add i32 %n, -1
+  %tmp11 = zext i32 %tmp10 to i64
+  %tmp12 = add i64 %tmp11, 1
+  call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 %tmp12, i32 1, i1 false)
+  ret void
+
+for.end:                                          ; preds = %entry
+  ret void
+}
+
+This shouldn't need the ((zext (%n - 1)) + 1) game, and it should ideally fold
+the two memset's together. The issue with %n seems to stem from poor handling
+of the original loop.
+
+To simplify this, we need SCEV to know that "n != 0" because of the dominating
+conditional.  That would turn the second memset into a simple memset of 'n'.
+
+//===---------------------------------------------------------------------===//
+
+clang -O3 -fno-exceptions currently compiles this code:
+
+struct S {
+  unsigned short m1, m2;
+  unsigned char m3, m4;
+};
+
+void f(int N) {
+  std::vector<S> v(N);
+  extern void sink(void*); sink(&v);
+}
+
+into poor code for zero-initializing 'v' when N is >0. The problem is that
+S is only 6 bytes, but each element is 8 byte-aligned. We generate a loop and
+4 stores on each iteration. If the struct were 8 bytes, this gets turned into
+a memset.
+
+In order to handle this we have to:
+  A) Teach clang to generate metadata for memsets of structs that have holes in
+     them.
+  B) Teach clang to use such a memset for zero init of this struct (since it has
+     a hole), instead of doing elementwise zeroing.
+
+//===---------------------------------------------------------------------===//
+
+clang -O3 currently compiles this code:
+
+extern const int magic;
+double f() { return 0.0 * magic; }
+
+into
+
+@magic = external constant i32
+
+define double @_Z1fv() nounwind readnone {
+entry:
+  %tmp = load i32* @magic, align 4, !tbaa !0
+  %conv = sitofp i32 %tmp to double
+  %mul = fmul double %conv, 0.000000e+00
+  ret double %mul
+}
+
+We should be able to fold away this fmul to 0.0.  More generally, fmul(x,0.0)
+can be folded to 0.0 if we can prove that the LHS is not -0.0, not a NaN, and
+not an INF.  The CannotBeNegativeZero predicate in value tracking should be
+extended to support general "fpclassify" operations that can return 
+yes/no/unknown for each of these predicates.
+
+In this predicate, we know that uitofp is trivially never NaN or -0.0, and
+we know that it isn't +/-Inf if the floating point type has enough exponent bits
+to represent the largest integer value as < inf.
+
+//===---------------------------------------------------------------------===//
+
+When optimizing a transformation that can change the sign of 0.0 (such as the
+0.0*val -> 0.0 transformation above), it might be provable that the sign of the
+expression doesn't matter.  For example, by the above rules, we can't transform
+fmul(sitofp(x), 0.0) into 0.0, because x might be -1 and the result of the
+expression is defined to be -0.0.
+
+If we look at the uses of the fmul for example, we might be able to prove that
+all uses don't care about the sign of zero.  For example, if we have:
+
+  fadd(fmul(sitofp(x), 0.0), 2.0)
+
+Since we know that x+2.0 doesn't care about the sign of any zeros in X, we can
+transform the fmul to 0.0, and then the fadd to 2.0.
+
+//===---------------------------------------------------------------------===//
+
+We should enhance memcpy/memcpy/memset to allow a metadata node on them
+indicating that some bytes of the transfer are undefined.  This is useful for
+frontends like clang when lowering struct copies, when some elements of the
+struct are undefined.  Consider something like this:
+
+struct x {
+  char a;
+  int b[4];
+};
+void foo(struct x*P);
+struct x testfunc() {
+  struct x V1, V2;
+  foo(&V1);
+  V2 = V1;
+
+  return V2;
+}
+
+We currently compile this to:
+$ clang t.c -S -o - -O0 -emit-llvm | opt -scalarrepl -S
+
+
+%struct.x = type { i8, [4 x i32] }
+
+define void @testfunc(%struct.x* sret %agg.result) nounwind ssp {
+entry:
+  %V1 = alloca %struct.x, align 4
+  call void @foo(%struct.x* %V1)
+  %tmp1 = bitcast %struct.x* %V1 to i8*
+  %0 = bitcast %struct.x* %V1 to i160*
+  %srcval1 = load i160* %0, align 4
+  %tmp2 = bitcast %struct.x* %agg.result to i8*
+  %1 = bitcast %struct.x* %agg.result to i160*
+  store i160 %srcval1, i160* %1, align 4
+  ret void
+}
+
+This happens because SRoA sees that the temp alloca has is being memcpy'd into
+and out of and it has holes and it has to be conservative.  If we knew about the
+holes, then this could be much much better.
+
+Having information about these holes would also improve memcpy (etc) lowering at
+llc time when it gets inlined, because we can use smaller transfers.  This also
+avoids partial register stalls in some important cases.
+
+//===---------------------------------------------------------------------===//
+
+We don't fold (icmp (add) (add)) unless the two adds only have a single use.
+There are a lot of cases that we're refusing to fold in (e.g.) 256.bzip2, for
+example:
+
+ %indvar.next90 = add i64 %indvar89, 1     ;; Has 2 uses
+ %tmp96 = add i64 %tmp95, 1                ;; Has 1 use
+ %exitcond97 = icmp eq i64 %indvar.next90, %tmp96
+
+We don't fold this because we don't want to introduce an overlapped live range
+of the ivar.  However if we can make this more aggressive without causing
+performance issues in two ways:
+
+1. If *either* the LHS or RHS has a single use, we can definitely do the
+   transformation.  In the overlapping liverange case we're trading one register
+   use for one fewer operation, which is a reasonable trade.  Before doing this
+   we should verify that the llc output actually shrinks for some benchmarks.
+2. If both ops have multiple uses, we can still fold it if the operations are
+   both sinkable to *after* the icmp (e.g. in a subsequent block) which doesn't
+   increase register pressure.
+
+There are a ton of icmp's we aren't simplifying because of the reg pressure
+concern.  Care is warranted here though because many of these are induction
+variables and other cases that matter a lot to performance, like the above.
+Here's a blob of code that you can drop into the bottom of visitICmp to see some
+missed cases:
+
+  { Value *A, *B, *C, *D;
+    if (match(Op0, m_Add(m_Value(A), m_Value(B))) && 
+        match(Op1, m_Add(m_Value(C), m_Value(D))) &&
+        (A == C || A == D || B == C || B == D)) {
+      errs() << "OP0 = " << *Op0 << "  U=" << Op0->getNumUses() << "\n";
+      errs() << "OP1 = " << *Op1 << "  U=" << Op1->getNumUses() << "\n";
+      errs() << "CMP = " << I << "\n\n";
+    }
+  }
+
+//===---------------------------------------------------------------------===//
+
+
diff --git a/final/lib/Target/Sparc/CMakeLists.txt b/final/lib/Target/Sparc/CMakeLists.txt
new file mode 100644
index 00000000000..6839234a470
--- /dev/null
+++ b/final/lib/Target/Sparc/CMakeLists.txt
@@ -0,0 +1,28 @@
+set(LLVM_TARGET_DEFINITIONS Sparc.td)
+
+tablegen(SparcGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(SparcGenRegisterNames.inc -gen-register-enums)
+tablegen(SparcGenRegisterInfo.inc -gen-register-desc)
+tablegen(SparcGenInstrNames.inc -gen-instr-enums)
+tablegen(SparcGenInstrInfo.inc -gen-instr-desc)
+tablegen(SparcGenAsmWriter.inc -gen-asm-writer)
+tablegen(SparcGenDAGISel.inc -gen-dag-isel)
+tablegen(SparcGenSubtarget.inc -gen-subtarget)
+tablegen(SparcGenCallingConv.inc -gen-callingconv)
+
+add_llvm_target(SparcCodeGen
+  DelaySlotFiller.cpp
+  FPMover.cpp
+  SparcAsmPrinter.cpp
+  SparcInstrInfo.cpp
+  SparcISelDAGToDAG.cpp
+  SparcISelLowering.cpp
+  SparcFrameLowering.cpp
+  SparcMCAsmInfo.cpp
+  SparcRegisterInfo.cpp
+  SparcSubtarget.cpp
+  SparcTargetMachine.cpp
+  SparcSelectionDAGInfo.cpp
+  )
+
+add_subdirectory(TargetInfo)
diff --git a/final/lib/Target/Sparc/DelaySlotFiller.cpp b/final/lib/Target/Sparc/DelaySlotFiller.cpp
new file mode 100644
index 00000000000..4b12852ef87
--- /dev/null
+++ b/final/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -0,0 +1,323 @@
+//===-- DelaySlotFiller.cpp - SPARC delay slot filler ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a simple local pass that attempts to fill delay slots with useful
+// instructions. If no instructions can be moved into the delay slot, then a
+// NOP is placed.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+#include "Sparc.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+static cl::opt<bool> DisableDelaySlotFiller(
+  "disable-sparc-delay-filler",
+  cl::init(false),
+  cl::desc("Disable the Sparc delay slot filler."),
+  cl::Hidden);
+
+namespace {
+  struct Filler : public MachineFunctionPass {
+    /// Target machine description which we query for reg. names, data
+    /// layout, etc.
+    ///
+    TargetMachine &TM;
+    const TargetInstrInfo *TII;
+
+    static char ID;
+    Filler(TargetMachine &tm) 
+      : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+    virtual const char *getPassName() const {
+      return "SPARC Delay Slot Filler";
+    }
+
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+    bool runOnMachineFunction(MachineFunction &F) {
+      bool Changed = false;
+      for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+           FI != FE; ++FI)
+        Changed |= runOnMachineBasicBlock(*FI);
+      return Changed;
+    }
+
+    bool isDelayFiller(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator candidate);
+
+    void insertCallUses(MachineBasicBlock::iterator MI,
+                        SmallSet<unsigned, 32>& RegUses);
+
+    void insertDefsUses(MachineBasicBlock::iterator MI,
+                        SmallSet<unsigned, 32>& RegDefs,
+                        SmallSet<unsigned, 32>& RegUses);
+
+    bool IsRegInSet(SmallSet<unsigned, 32>& RegSet,
+                    unsigned Reg);
+
+    bool delayHasHazard(MachineBasicBlock::iterator candidate,
+                        bool &sawLoad, bool &sawStore,
+                        SmallSet<unsigned, 32> &RegDefs,
+                        SmallSet<unsigned, 32> &RegUses);
+
+    MachineBasicBlock::iterator
+    findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot);
+
+    bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize);
+
+  };
+  char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// createSparcDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in Sparc MachineFunctions
+///
+FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) {
+  return new Filler(tm);
+}
+
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// We assume there is only one delay slot per delayed instruction.
+///
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+  bool Changed = false;
+
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+    if (I->getDesc().hasDelaySlot()) {
+      MachineBasicBlock::iterator D = MBB.end();
+      MachineBasicBlock::iterator J = I;
+
+      if (!DisableDelaySlotFiller)
+        D = findDelayInstr(MBB, I);
+
+      ++FilledSlots;
+      Changed = true;
+
+      if (D == MBB.end())
+        BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP));
+      else
+        MBB.splice(++J, &MBB, D);
+      unsigned structSize = 0;
+      if (needsUnimp(I, structSize)) {
+        MachineBasicBlock::iterator J = I;
+        ++J; //skip the delay filler.
+        BuildMI(MBB, ++J, I->getDebugLoc(),
+                TII->get(SP::UNIMP)).addImm(structSize);
+      }
+    }
+  return Changed;
+}
+
+MachineBasicBlock::iterator
+Filler::findDelayInstr(MachineBasicBlock &MBB,
+                       MachineBasicBlock::iterator slot)
+{
+  SmallSet<unsigned, 32> RegDefs;
+  SmallSet<unsigned, 32> RegUses;
+  bool sawLoad = false;
+  bool sawStore = false;
+
+  MachineBasicBlock::iterator I = slot;
+
+  if (slot->getOpcode() == SP::RET)
+    return MBB.end();
+
+  if (slot->getOpcode() == SP::RETL) {
+    --I;
+    if (I->getOpcode() != SP::RESTORErr)
+      return MBB.end();
+    //change retl to ret
+    slot->setDesc(TII->get(SP::RET));
+    return I;
+  }
+
+  //Call's delay filler can def some of call's uses.
+  if (slot->getDesc().isCall())
+    insertCallUses(slot, RegUses);
+  else
+    insertDefsUses(slot, RegDefs, RegUses);
+
+  bool done = false;
+
+  while (!done) {
+    done = (I == MBB.begin());
+
+    if (!done)
+      --I;
+
+    // skip debug value
+    if (I->isDebugValue())
+      continue;
+
+
+    if (I->hasUnmodeledSideEffects()
+        || I->isInlineAsm()
+        || I->isLabel()
+        || I->getDesc().hasDelaySlot()
+        || isDelayFiller(MBB, I))
+      break;
+
+    if (delayHasHazard(I, sawLoad, sawStore, RegDefs, RegUses)) {
+      insertDefsUses(I, RegDefs, RegUses);
+      continue;
+    }
+
+    return I;
+  }
+  return MBB.end();
+}
+
+bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
+                            bool &sawLoad,
+                            bool &sawStore,
+                            SmallSet<unsigned, 32> &RegDefs,
+                            SmallSet<unsigned, 32> &RegUses)
+{
+
+  if (candidate->isImplicitDef() || candidate->isKill())
+    return true;
+
+  if (candidate->getDesc().mayLoad()) {
+    sawLoad = true;
+    if (sawStore)
+      return true;
+  }
+
+  if (candidate->getDesc().mayStore()) {
+    if (sawStore)
+      return true;
+    sawStore = true;
+    if (sawLoad)
+      return true;
+  }
+
+  for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
+    const MachineOperand &MO = candidate->getOperand(i);
+    if (!MO.isReg())
+      continue; // skip
+
+    unsigned Reg = MO.getReg();
+
+    if (MO.isDef()) {
+      //check whether Reg is defined or used before delay slot.
+      if (IsRegInSet(RegDefs, Reg) || IsRegInSet(RegUses, Reg))
+        return true;
+    }
+    if (MO.isUse()) {
+      //check whether Reg is defined before delay slot.
+      if (IsRegInSet(RegDefs, Reg))
+        return true;
+    }
+  }
+  return false;
+}
+
+
+void Filler::insertCallUses(MachineBasicBlock::iterator MI,
+                            SmallSet<unsigned, 32>& RegUses)
+{
+
+  switch(MI->getOpcode()) {
+  default: llvm_unreachable("Unknown opcode.");
+  case SP::CALL: break;
+  case SP::JMPLrr:
+  case SP::JMPLri:
+    assert(MI->getNumOperands() >= 2);
+    const MachineOperand &Reg = MI->getOperand(0);
+    assert(Reg.isReg() && "JMPL first operand is not a register.");
+    assert(Reg.isUse() && "JMPL first operand is not a use.");
+    RegUses.insert(Reg.getReg());
+
+    const MachineOperand &RegOrImm = MI->getOperand(1);
+    if (RegOrImm.isImm())
+        break;
+    assert(RegOrImm.isReg() && "JMPLrr second operand is not a register.");
+    assert(RegOrImm.isUse() && "JMPLrr second operand is not a use.");
+    RegUses.insert(RegOrImm.getReg());
+    break;
+  }
+}
+
+//Insert Defs and Uses of MI into the sets RegDefs and RegUses.
+void Filler::insertDefsUses(MachineBasicBlock::iterator MI,
+                            SmallSet<unsigned, 32>& RegDefs,
+                            SmallSet<unsigned, 32>& RegUses)
+{
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+
+    unsigned Reg = MO.getReg();
+    if (Reg == 0)
+      continue;
+    if (MO.isDef())
+      RegDefs.insert(Reg);
+    if (MO.isUse())
+      RegUses.insert(Reg);
+
+  }
+}
+
+//returns true if the Reg or its alias is in the RegSet.
+bool Filler::IsRegInSet(SmallSet<unsigned, 32>& RegSet, unsigned Reg)
+{
+  if (RegSet.count(Reg))
+    return true;
+  // check Aliased Registers
+  for (const unsigned *Alias = TM.getRegisterInfo()->getAliasSet(Reg);
+       *Alias; ++ Alias)
+    if (RegSet.count(*Alias))
+      return true;
+
+  return false;
+}
+
+// return true if the candidate is a delay filler.
+bool Filler::isDelayFiller(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator candidate)
+{
+  if (candidate == MBB.begin())
+    return false;
+  if (candidate->getOpcode() == SP::UNIMP)
+    return true;
+  const TargetInstrDesc &prevdesc = (--candidate)->getDesc();
+  return prevdesc.hasDelaySlot();
+}
+
+bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize)
+{
+  if (!I->getDesc().isCall())
+    return false;
+
+  unsigned structSizeOpNum = 0;
+  switch (I->getOpcode()) {
+  default: llvm_unreachable("Unknown call opcode.");
+  case SP::CALL: structSizeOpNum = 1; break;
+  case SP::JMPLrr:
+  case SP::JMPLri: structSizeOpNum = 2; break;
+  }
+
+  const MachineOperand &MO = I->getOperand(structSizeOpNum);
+  if (!MO.isImm())
+    return false;
+  StructSize = MO.getImm();
+  return true;
+}
diff --git a/final/lib/Target/Sparc/FPMover.cpp b/final/lib/Target/Sparc/FPMover.cpp
new file mode 100644
index 00000000000..1423b1e64d6
--- /dev/null
+++ b/final/lib/Target/Sparc/FPMover.cpp
@@ -0,0 +1,141 @@
+//===-- FPMover.cpp - Sparc double-precision floating point move fixer ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand FpMOVD/FpABSD/FpNEGD instructions into their single-precision pieces.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "fpmover"
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumFpDs , "Number of instructions translated");
+STATISTIC(NoopFpDs, "Number of noop instructions removed");
+
+namespace {
+  struct FPMover : public MachineFunctionPass {
+    /// Target machine description which we query for reg. names, data
+    /// layout, etc.
+    ///
+    TargetMachine &TM;
+    
+    static char ID;
+    explicit FPMover(TargetMachine &tm) 
+      : MachineFunctionPass(ID), TM(tm) { }
+
+    virtual const char *getPassName() const {
+      return "Sparc Double-FP Move Fixer";
+    }
+
+    bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+    bool runOnMachineFunction(MachineFunction &F);
+  };
+  char FPMover::ID = 0;
+} // end of anonymous namespace
+
+/// createSparcFPMoverPass - Returns a pass that turns FpMOVD
+/// instructions into FMOVS instructions
+///
+FunctionPass *llvm::createSparcFPMoverPass(TargetMachine &tm) {
+  return new FPMover(tm);
+}
+
+/// getDoubleRegPair - Given a DFP register, return the even and odd FP
+/// registers that correspond to it.
+static void getDoubleRegPair(unsigned DoubleReg, unsigned &EvenReg,
+                             unsigned &OddReg) {
+  static const unsigned EvenHalvesOfPairs[] = {
+    SP::F0, SP::F2, SP::F4, SP::F6, SP::F8, SP::F10, SP::F12, SP::F14,
+    SP::F16, SP::F18, SP::F20, SP::F22, SP::F24, SP::F26, SP::F28, SP::F30
+  };
+  static const unsigned OddHalvesOfPairs[] = {
+    SP::F1, SP::F3, SP::F5, SP::F7, SP::F9, SP::F11, SP::F13, SP::F15,
+    SP::F17, SP::F19, SP::F21, SP::F23, SP::F25, SP::F27, SP::F29, SP::F31
+  };
+  static const unsigned DoubleRegsInOrder[] = {
+    SP::D0, SP::D1, SP::D2, SP::D3, SP::D4, SP::D5, SP::D6, SP::D7, SP::D8,
+    SP::D9, SP::D10, SP::D11, SP::D12, SP::D13, SP::D14, SP::D15
+  };
+  for (unsigned i = 0; i < sizeof(DoubleRegsInOrder)/sizeof(unsigned); ++i)
+    if (DoubleRegsInOrder[i] == DoubleReg) {
+      EvenReg = EvenHalvesOfPairs[i];
+      OddReg = OddHalvesOfPairs[i];
+      return;
+    }
+  llvm_unreachable("Can't find reg");
+}
+
+/// runOnMachineBasicBlock - Fixup FpMOVD instructions in this MBB.
+///
+bool FPMover::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+  bool Changed = false;
+  for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+    MachineInstr *MI = I++;
+    DebugLoc dl = MI->getDebugLoc();
+    if (MI->getOpcode() == SP::FpMOVD || MI->getOpcode() == SP::FpABSD ||
+        MI->getOpcode() == SP::FpNEGD) {
+      Changed = true;
+      unsigned DestDReg = MI->getOperand(0).getReg();
+      unsigned SrcDReg  = MI->getOperand(1).getReg();
+      if (DestDReg == SrcDReg && MI->getOpcode() == SP::FpMOVD) {
+        MBB.erase(MI);   // Eliminate the noop copy.
+        ++NoopFpDs;
+        continue;
+      }
+      
+      unsigned EvenSrcReg = 0, OddSrcReg = 0, EvenDestReg = 0, OddDestReg = 0;
+      getDoubleRegPair(DestDReg, EvenDestReg, OddDestReg);
+      getDoubleRegPair(SrcDReg, EvenSrcReg, OddSrcReg);
+
+      const TargetInstrInfo *TII = TM.getInstrInfo();
+      if (MI->getOpcode() == SP::FpMOVD)
+        MI->setDesc(TII->get(SP::FMOVS));
+      else if (MI->getOpcode() == SP::FpNEGD)
+        MI->setDesc(TII->get(SP::FNEGS));
+      else if (MI->getOpcode() == SP::FpABSD)
+        MI->setDesc(TII->get(SP::FABSS));
+      else
+        llvm_unreachable("Unknown opcode!");
+        
+      MI->getOperand(0).setReg(EvenDestReg);
+      MI->getOperand(1).setReg(EvenSrcReg);
+      DEBUG(errs() << "FPMover: the modified instr is: " << *MI);
+      // Insert copy for the other half of the double.
+      if (DestDReg != SrcDReg) {
+        MI = BuildMI(MBB, I, dl, TM.getInstrInfo()->get(SP::FMOVS), OddDestReg)
+          .addReg(OddSrcReg);
+        DEBUG(errs() << "FPMover: the inserted instr is: " << *MI);
+      }
+      ++NumFpDs;
+    }
+  }
+  return Changed;
+}
+
+bool FPMover::runOnMachineFunction(MachineFunction &F) {
+  // If the target has V9 instructions, the fp-mover pseudos will never be
+  // emitted.  Avoid a scan of the instructions to improve compile time.
+  if (TM.getSubtarget<SparcSubtarget>().isV9())
+    return false;
+  
+  bool Changed = false;
+  for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+       FI != FE; ++FI)
+    Changed |= runOnMachineBasicBlock(*FI);
+  return Changed;
+}
diff --git a/final/lib/Target/Sparc/Makefile b/final/lib/Target/Sparc/Makefile
new file mode 100644
index 00000000000..27942c56fb3
--- /dev/null
+++ b/final/lib/Target/Sparc/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/Sparc/Makefile ---------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMSparcCodeGen
+TARGET = Sparc
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = SparcGenRegisterInfo.h.inc SparcGenRegisterNames.inc \
+                SparcGenRegisterInfo.inc SparcGenInstrNames.inc \
+                SparcGenInstrInfo.inc SparcGenAsmWriter.inc \
+                SparcGenDAGISel.inc SparcGenSubtarget.inc SparcGenCallingConv.inc
+
+DIRS = TargetInfo
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Target/Sparc/README.txt b/final/lib/Target/Sparc/README.txt
new file mode 100644
index 00000000000..b4991fe5790
--- /dev/null
+++ b/final/lib/Target/Sparc/README.txt
@@ -0,0 +1,59 @@
+
+To-do
+-----
+
+* Keep the address of the constant pool in a register instead of forming its
+  address all of the time.
+* We can fold small constant offsets into the %hi/%lo references to constant
+  pool addresses as well.
+* When in V9 mode, register allocate %icc[0-3].
+* Add support for isel'ing UMUL_LOHI instead of marking it as Expand.
+* Emit the 'Branch on Integer Register with Prediction' instructions.  It's
+  not clear how to write a pattern for this though:
+
+float %t1(int %a, int* %p) {
+        %C = seteq int %a, 0
+        br bool %C, label %T, label %F
+T:
+        store int 123, int* %p
+        br label %F
+F:
+        ret float undef
+}
+
+codegens to this:
+
+t1:
+        save -96, %o6, %o6
+1)      subcc %i0, 0, %l0
+1)      bne .LBBt1_2    ! F
+        nop
+.LBBt1_1:       ! T
+        or %g0, 123, %l0
+        st %l0, [%i1]
+.LBBt1_2:       ! F
+        restore %g0, %g0, %g0
+        retl
+        nop
+
+1) should be replaced with a brz in V9 mode.
+
+* Same as above, but emit conditional move on register zero (p192) in V9 
+  mode.  Testcase:
+
+int %t1(int %a, int %b) {
+        %C = seteq int %a, 0
+        %D = select bool %C, int %a, int %b
+        ret int %D
+}
+
+* Emit MULX/[SU]DIVX instructions in V9 mode instead of fiddling 
+  with the Y register, if they are faster.
+
+* Codegen bswap(load)/store(bswap) -> load/store ASI
+
+* Implement frame pointer elimination, e.g. eliminate save/restore for 
+  leaf fns.
+* Fill delay slots
+
+* Implement JIT support
diff --git a/final/lib/Target/Sparc/Sparc.h b/final/lib/Target/Sparc/Sparc.h
new file mode 100644
index 00000000000..a37920d8030
--- /dev/null
+++ b/final/lib/Target/Sparc/Sparc.h
@@ -0,0 +1,121 @@
+//===-- Sparc.h - Top-level interface for Sparc representation --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// Sparc back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_SPARC_H
+#define TARGET_SPARC_H
+
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+
+namespace llvm {
+  class FunctionPass;
+  class SparcTargetMachine;
+  class formatted_raw_ostream;
+
+  FunctionPass *createSparcISelDag(SparcTargetMachine &TM);
+  FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
+  FunctionPass *createSparcFPMoverPass(TargetMachine &TM);
+
+  extern Target TheSparcTarget;
+  extern Target TheSparcV9Target;
+
+} // end namespace llvm;
+
+// Defines symbolic names for Sparc registers.  This defines a mapping from
+// register name to register number.
+//
+#include "SparcGenRegisterNames.inc"
+
+// Defines symbolic names for the Sparc instructions.
+//
+#include "SparcGenInstrNames.inc"
+
+
+namespace llvm {
+  // Enums corresponding to Sparc condition codes, both icc's and fcc's.  These
+  // values must be kept in sync with the ones in the .td file.
+  namespace SPCC {
+    enum CondCodes {
+      //ICC_A   =  8   ,  // Always
+      //ICC_N   =  0   ,  // Never
+      ICC_NE  =  9   ,  // Not Equal
+      ICC_E   =  1   ,  // Equal
+      ICC_G   = 10   ,  // Greater
+      ICC_LE  =  2   ,  // Less or Equal
+      ICC_GE  = 11   ,  // Greater or Equal
+      ICC_L   =  3   ,  // Less
+      ICC_GU  = 12   ,  // Greater Unsigned
+      ICC_LEU =  4   ,  // Less or Equal Unsigned
+      ICC_CC  = 13   ,  // Carry Clear/Great or Equal Unsigned
+      ICC_CS  =  5   ,  // Carry Set/Less Unsigned
+      ICC_POS = 14   ,  // Positive
+      ICC_NEG =  6   ,  // Negative
+      ICC_VC  = 15   ,  // Overflow Clear
+      ICC_VS  =  7   ,  // Overflow Set
+      
+      //FCC_A   =  8+16,  // Always
+      //FCC_N   =  0+16,  // Never
+      FCC_U   =  7+16,  // Unordered
+      FCC_G   =  6+16,  // Greater
+      FCC_UG  =  5+16,  // Unordered or Greater
+      FCC_L   =  4+16,  // Less
+      FCC_UL  =  3+16,  // Unordered or Less
+      FCC_LG  =  2+16,  // Less or Greater
+      FCC_NE  =  1+16,  // Not Equal
+      FCC_E   =  9+16,  // Equal
+      FCC_UE  = 10+16,  // Unordered or Equal
+      FCC_GE  = 11+16,  // Greater or Equal
+      FCC_UGE = 12+16,  // Unordered or Greater or Equal
+      FCC_LE  = 13+16,  // Less or Equal
+      FCC_ULE = 14+16,  // Unordered or Less or Equal
+      FCC_O   = 15+16   // Ordered
+    };
+  }
+  
+  inline static const char *SPARCCondCodeToString(SPCC::CondCodes CC) {
+    switch (CC) {
+    default: llvm_unreachable("Unknown condition code");
+    case SPCC::ICC_NE:  return "ne";
+    case SPCC::ICC_E:   return "e";
+    case SPCC::ICC_G:   return "g";
+    case SPCC::ICC_LE:  return "le";
+    case SPCC::ICC_GE:  return "ge";
+    case SPCC::ICC_L:   return "l";
+    case SPCC::ICC_GU:  return "gu";
+    case SPCC::ICC_LEU: return "leu";
+    case SPCC::ICC_CC:  return "cc";
+    case SPCC::ICC_CS:  return "cs";
+    case SPCC::ICC_POS: return "pos";
+    case SPCC::ICC_NEG: return "neg";
+    case SPCC::ICC_VC:  return "vc";
+    case SPCC::ICC_VS:  return "vs";
+    case SPCC::FCC_U:   return "u";
+    case SPCC::FCC_G:   return "g";
+    case SPCC::FCC_UG:  return "ug";
+    case SPCC::FCC_L:   return "l";
+    case SPCC::FCC_UL:  return "ul";
+    case SPCC::FCC_LG:  return "lg";
+    case SPCC::FCC_NE:  return "ne";
+    case SPCC::FCC_E:   return "e";
+    case SPCC::FCC_UE:  return "ue";
+    case SPCC::FCC_GE:  return "ge";
+    case SPCC::FCC_UGE: return "uge";
+    case SPCC::FCC_LE:  return "le";
+    case SPCC::FCC_ULE: return "ule";
+    case SPCC::FCC_O:   return "o";
+    }       
+  }
+}  // end namespace llvm
+#endif
diff --git a/final/lib/Target/Sparc/Sparc.td b/final/lib/Target/Sparc/Sparc.td
new file mode 100644
index 00000000000..764336665d0
--- /dev/null
+++ b/final/lib/Target/Sparc/Sparc.td
@@ -0,0 +1,72 @@
+//===- Sparc.td - Describe the Sparc Target Machine --------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// SPARC Subtarget features.
+//
+ 
+def FeatureV9
+  : SubtargetFeature<"v9", "IsV9", "true",
+                     "Enable SPARC-V9 instructions">;
+def FeatureV8Deprecated
+  : SubtargetFeature<"deprecated-v8", "V8DeprecatedInsts", "true",
+                     "Enable deprecated V8 instructions in V9 mode">;
+def FeatureVIS
+  : SubtargetFeature<"vis", "IsVIS", "true",
+                     "Enable UltraSPARC Visual Instruction Set extensions">;
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "SparcRegisterInfo.td"
+include "SparcCallingConv.td"
+include "SparcInstrInfo.td"
+
+def SparcInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// SPARC processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic",         []>;
+def : Proc<"v8",              []>;
+def : Proc<"supersparc",      []>;
+def : Proc<"sparclite",       []>;
+def : Proc<"f934",            []>;
+def : Proc<"hypersparc",      []>;
+def : Proc<"sparclite86x",    []>;
+def : Proc<"sparclet",        []>;
+def : Proc<"tsc701",          []>;
+def : Proc<"v9",              [FeatureV9]>;
+def : Proc<"ultrasparc",      [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3",     [FeatureV9, FeatureV8Deprecated]>;
+def : Proc<"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
+
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def Sparc : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = SparcInstrInfo;
+}
diff --git a/final/lib/Target/Sparc/SparcAsmPrinter.cpp b/final/lib/Target/Sparc/SparcAsmPrinter.cpp
new file mode 100644
index 00000000000..edde8427aa8
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -0,0 +1,251 @@
+//===-- SparcAsmPrinter.cpp - Sparc LLVM assembly writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format SPARC assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "Sparc.h"
+#include "SparcInstrInfo.h"
+#include "SparcTargetMachine.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class SparcAsmPrinter : public AsmPrinter {
+  public:
+    explicit SparcAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "Sparc Assembly Printer";
+    }
+
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+                         const char *Modifier = 0);
+    void printCCOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
+
+    virtual void EmitInstruction(const MachineInstr *MI) {
+      SmallString<128> Str;
+      raw_svector_ostream OS(Str);
+      printInstruction(MI, OS);
+      OutStreamer.EmitRawText(OS.str());
+    }
+    void printInstruction(const MachineInstr *MI, raw_ostream &OS);// autogen'd.
+    static const char *getRegisterName(unsigned RegNo);
+
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+    bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                               unsigned AsmVariant, const char *ExtraCode,
+                               raw_ostream &O);
+
+    bool printGetPCX(const MachineInstr *MI, unsigned OpNo, raw_ostream &OS);
+    
+    virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+                       const;
+  };
+} // end of anonymous namespace
+
+#include "SparcGenAsmWriter.inc"
+
+void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                   raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand (opNum);
+  bool CloseParen = false;
+  if (MI->getOpcode() == SP::SETHIi && !MO.isReg() && !MO.isImm()) {
+    O << "%hi(";
+    CloseParen = true;
+  } else if ((MI->getOpcode() == SP::ORri || MI->getOpcode() == SP::ADDri) &&
+             !MO.isReg() && !MO.isImm()) {
+    O << "%lo(";
+    CloseParen = true;
+  }
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << "%" << LowercaseString(getRegisterName(MO.getReg()));
+    break;
+
+  case MachineOperand::MO_Immediate:
+    O << (int)MO.getImm();
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    O << MO.getSymbolName();
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_"
+      << MO.getIndex();
+    break;
+  default:
+    llvm_unreachable("<unknown operand type>");
+  }
+  if (CloseParen) O << ")";
+}
+
+void SparcAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+                                      raw_ostream &O, const char *Modifier) {
+  printOperand(MI, opNum, O);
+
+  // If this is an ADD operand, emit it like normal operands.
+  if (Modifier && !strcmp(Modifier, "arith")) {
+    O << ", ";
+    printOperand(MI, opNum+1, O);
+    return;
+  }
+
+  if (MI->getOperand(opNum+1).isReg() &&
+      MI->getOperand(opNum+1).getReg() == SP::G0)
+    return;   // don't print "+%g0"
+  if (MI->getOperand(opNum+1).isImm() &&
+      MI->getOperand(opNum+1).getImm() == 0)
+    return;   // don't print "+0"
+
+  O << "+";
+  if (MI->getOperand(opNum+1).isGlobal() ||
+      MI->getOperand(opNum+1).isCPI()) {
+    O << "%lo(";
+    printOperand(MI, opNum+1, O);
+    O << ")";
+  } else {
+    printOperand(MI, opNum+1, O);
+  }
+}
+
+bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum,
+                                  raw_ostream &O) {
+  std::string operand = "";
+  const MachineOperand &MO = MI->getOperand(opNum);
+  switch (MO.getType()) {
+  default: assert(0 && "Operand is not a register ");
+  case MachineOperand::MO_Register:
+    assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+           "Operand is not a physical register ");
+    assert(MO.getReg() != SP::O7 && 
+           "%o7 is assigned as destination for getpcx!");
+    operand = "%" + LowercaseString(getRegisterName(MO.getReg()));
+    break;
+  }
+
+  unsigned mfNum = MI->getParent()->getParent()->getFunctionNumber();
+  unsigned bbNum = MI->getParent()->getNumber();
+
+  O << '\n' << ".LLGETPCH" << mfNum << '_' << bbNum << ":\n";
+  O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ;
+
+  O << "\t  sethi\t"
+    << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum 
+    << ")), "  << operand << '\n' ;
+
+  O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ;
+  O << "\tor\t" << operand  
+    << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum
+    << ")), " << operand << '\n';
+  O << "\tadd\t" << operand << ", %o7, " << operand << '\n'; 
+  
+  return true;
+}
+
+void SparcAsmPrinter::printCCOperand(const MachineInstr *MI, int opNum,
+                                     raw_ostream &O) {
+  int CC = (int)MI->getOperand(opNum).getImm();
+  O << SPARCCondCodeToString((SPCC::CondCodes)CC);
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                      unsigned AsmVariant,
+                                      const char *ExtraCode,
+                                      raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'r':
+     break;
+    }
+  }
+
+  printOperand(MI, OpNo, O);
+
+  return false;
+}
+
+bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                            unsigned OpNo, unsigned AsmVariant,
+                                            const char *ExtraCode,
+                                            raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+    return true;  // Unknown modifier
+
+  O << '[';
+  printMemOperand(MI, OpNo, O);
+  O << ']';
+
+  return false;
+}
+
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+///
+/// This overrides AsmPrinter's implementation to handle delay slots.
+bool SparcAsmPrinter::
+isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
+  // If this is a landing pad, it isn't a fall through.  If it has no preds,
+  // then nothing falls through to it.
+  if (MBB->isLandingPad() || MBB->pred_empty())
+    return false;
+  
+  // If there isn't exactly one predecessor, it can't be a fall through.
+  MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+  ++PI2;
+  if (PI2 != MBB->pred_end())
+    return false;
+  
+  // The predecessor has to be immediately before this block.
+  const MachineBasicBlock *Pred = *PI;
+  
+  if (!Pred->isLayoutSuccessor(MBB))
+    return false;
+  
+  // Check if the last terminator is an unconditional branch.
+  MachineBasicBlock::const_iterator I = Pred->end();
+  while (I != Pred->begin() && !(--I)->getDesc().isTerminator())
+    ; // Noop
+  return I == Pred->end() || !I->getDesc().isBarrier();
+}
+
+
+
+// Force static initialization.
+extern "C" void LLVMInitializeSparcAsmPrinter() { 
+  RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
+  RegisterAsmPrinter<SparcAsmPrinter> Y(TheSparcV9Target);
+}
diff --git a/final/lib/Target/Sparc/SparcCallingConv.td b/final/lib/Target/Sparc/SparcCallingConv.td
new file mode 100644
index 00000000000..856f87ad1d3
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcCallingConv.td
@@ -0,0 +1,36 @@
+//===- SparcCallingConv.td - Calling Conventions Sparc -----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the Sparc architectures.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Sparc 32-bit C return-value convention.
+def RetCC_Sparc32 : CallingConv<[
+  CCIfType<[i32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+  CCIfType<[f32], CCAssignToReg<[F0, F1, F2, F3]>>,
+  CCIfType<[f64], CCAssignToReg<[D0, D1]>>
+]>;
+
+// Sparc 32-bit C Calling convention.
+def CC_Sparc32 : CallingConv<[
+  //Custom assign SRet to [sp+64].
+  CCIfSRet<CCCustom<"CC_Sparc_Assign_SRet">>,
+  // i32 f32 arguments get passed in integer registers if there is space.
+  CCIfType<[i32, f32], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>,
+  // f64 arguments are split and passed through registers or through stack.
+  CCIfType<[f64], CCCustom<"CC_Sparc_Assign_f64">>,
+
+  // Alternatively, they are assigned to the stack in 4-byte aligned units.
+  CCAssignToStack<4, 4>
+]>;
diff --git a/final/lib/Target/Sparc/SparcFrameLowering.cpp b/final/lib/Target/Sparc/SparcFrameLowering.cpp
new file mode 100644
index 00000000000..320c8ca26d7
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -0,0 +1,80 @@
+//====- SparcFrameLowering.cpp - Sparc Frame Information -------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcFrameLowering.h"
+#include "SparcInstrInfo.h"
+#include "SparcMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+void SparcFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const SparcInstrInfo &TII =
+    *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Get the number of bytes to allocate from the FrameInfo
+  int NumBytes = (int) MFI->getStackSize();
+
+  // Emit the correct save instruction based on the number of bytes in
+  // the frame. Minimum stack frame size according to V8 ABI is:
+  //   16 words for register window spill
+  //    1 word for address of returned aggregate-value
+  // +  6 words for passing parameters on the stack
+  // ----------
+  //   23 words * 4 bytes per word = 92 bytes
+  NumBytes += 92;
+
+  // Round up to next doubleword boundary -- a double-word boundary
+  // is required by the ABI.
+  NumBytes = (NumBytes + 7) & ~7;
+  NumBytes = -NumBytes;
+
+  if (NumBytes >= -4096) {
+    BuildMI(MBB, MBBI, dl, TII.get(SP::SAVEri), SP::O6)
+      .addReg(SP::O6).addImm(NumBytes);
+  } else {
+    // Emit this the hard way.  This clobbers G1 which we always know is
+    // available here.
+    unsigned OffHi = (unsigned)NumBytes >> 10U;
+    BuildMI(MBB, MBBI, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+    // Emit G1 = G1 + I6
+    BuildMI(MBB, MBBI, dl, TII.get(SP::ORri), SP::G1)
+      .addReg(SP::G1).addImm(NumBytes & ((1 << 10)-1));
+    BuildMI(MBB, MBBI, dl, TII.get(SP::SAVErr), SP::O6)
+      .addReg(SP::O6).addReg(SP::G1);
+  }
+}
+
+void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
+                                  MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const SparcInstrInfo &TII =
+    *static_cast<const SparcInstrInfo*>(MF.getTarget().getInstrInfo());
+  DebugLoc dl = MBBI->getDebugLoc();
+  assert(MBBI->getOpcode() == SP::RETL &&
+         "Can only put epilog before 'retl' instruction!");
+  BuildMI(MBB, MBBI, dl, TII.get(SP::RESTORErr), SP::G0).addReg(SP::G0)
+    .addReg(SP::G0);
+}
diff --git a/final/lib/Target/Sparc/SparcFrameLowering.h b/final/lib/Target/Sparc/SparcFrameLowering.h
new file mode 100644
index 00000000000..9a2ddc83f5a
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcFrameLowering.h
@@ -0,0 +1,41 @@
+//===- SparcFrameLowering.h - Define frame lowering for Sparc --*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_FRAMEINFO_H
+#define SPARC_FRAMEINFO_H
+
+#include "Sparc.h"
+#include "SparcSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class SparcSubtarget;
+
+class SparcFrameLowering : public TargetFrameLowering {
+  const SparcSubtarget &STI;
+public:
+  explicit SparcFrameLowering(const SparcSubtarget &sti)
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, 0), STI(sti) {
+  }
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool hasFP(const MachineFunction &MF) const { return false; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/final/lib/Target/Sparc/SparcISelDAGToDAG.cpp
new file mode 100644
index 00000000000..8c6103dd8a3
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -0,0 +1,212 @@
+//===-- SparcISelDAGToDAG.cpp - A dag to dag inst selector for Sparc ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SPARC target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcTargetMachine.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===--------------------------------------------------------------------===//
+/// SparcDAGToDAGISel - SPARC specific code to select SPARC machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+class SparcDAGToDAGISel : public SelectionDAGISel {
+  /// Subtarget - Keep a pointer to the Sparc Subtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const SparcSubtarget &Subtarget;
+  SparcTargetMachine& TM;
+public:
+  explicit SparcDAGToDAGISel(SparcTargetMachine &tm)
+    : SelectionDAGISel(tm),
+      Subtarget(tm.getSubtarget<SparcSubtarget>()),
+      TM(tm) {
+  }
+
+  SDNode *Select(SDNode *N);
+
+  // Complex Pattern Selectors.
+  bool SelectADDRrr(SDValue N, SDValue &R1, SDValue &R2);
+  bool SelectADDRri(SDValue N, SDValue &Base, SDValue &Offset);
+
+  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+  /// inline asm expressions.
+  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                            char ConstraintCode,
+                                            std::vector<SDValue> &OutOps);
+
+  virtual const char *getPassName() const {
+    return "SPARC DAG->DAG Pattern Instruction Selection";
+  }
+
+  // Include the pieces autogenerated from the target description.
+#include "SparcGenDAGISel.inc"
+
+private:
+  SDNode* getGlobalBaseReg();
+};
+}  // end anonymous namespace
+
+SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
+  unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
+  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+bool SparcDAGToDAGISel::SelectADDRri(SDValue Addr,
+                                     SDValue &Base, SDValue &Offset) {
+  if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // direct calls.
+
+  if (Addr.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+      if (isInt<13>(CN->getSExtValue())) {
+        if (FrameIndexSDNode *FIN =
+                dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
+          // Constant offset from frame ref.
+          Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+        } else {
+          Base = Addr.getOperand(0);
+        }
+        Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+        return true;
+      }
+    }
+    if (Addr.getOperand(0).getOpcode() == SPISD::Lo) {
+      Base = Addr.getOperand(1);
+      Offset = Addr.getOperand(0).getOperand(0);
+      return true;
+    }
+    if (Addr.getOperand(1).getOpcode() == SPISD::Lo) {
+      Base = Addr.getOperand(0);
+      Offset = Addr.getOperand(1).getOperand(0);
+      return true;
+    }
+  }
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, MVT::i32);
+  return true;
+}
+
+bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) {
+  if (Addr.getOpcode() == ISD::FrameIndex) return false;
+  if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
+      Addr.getOpcode() == ISD::TargetGlobalAddress)
+    return false;  // direct calls.
+
+  if (Addr.getOpcode() == ISD::ADD) {
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+      if (isInt<13>(CN->getSExtValue()))
+        return false;  // Let the reg+imm pattern catch this!
+    if (Addr.getOperand(0).getOpcode() == SPISD::Lo ||
+        Addr.getOperand(1).getOpcode() == SPISD::Lo)
+      return false;  // Let the reg+imm pattern catch this!
+    R1 = Addr.getOperand(0);
+    R2 = Addr.getOperand(1);
+    return true;
+  }
+
+  R1 = Addr;
+  R2 = CurDAG->getRegister(SP::G0, MVT::i32);
+  return true;
+}
+
+SDNode *SparcDAGToDAGISel::Select(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  if (N->isMachineOpcode())
+    return NULL;   // Already selected.
+
+  switch (N->getOpcode()) {
+  default: break;
+  case SPISD::GLOBAL_BASE_REG:
+    return getGlobalBaseReg();
+
+  case ISD::SDIV:
+  case ISD::UDIV: {
+    // FIXME: should use a custom expander to expose the SRA to the dag.
+    SDValue DivLHS = N->getOperand(0);
+    SDValue DivRHS = N->getOperand(1);
+
+    // Set the Y register to the high-part.
+    SDValue TopPart;
+    if (N->getOpcode() == ISD::SDIV) {
+      TopPart = SDValue(CurDAG->getMachineNode(SP::SRAri, dl, MVT::i32, DivLHS,
+                                   CurDAG->getTargetConstant(31, MVT::i32)), 0);
+    } else {
+      TopPart = CurDAG->getRegister(SP::G0, MVT::i32);
+    }
+    TopPart = SDValue(CurDAG->getMachineNode(SP::WRYrr, dl, MVT::Glue, TopPart,
+                                     CurDAG->getRegister(SP::G0, MVT::i32)), 0);
+
+    // FIXME: Handle div by immediate.
+    unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr;
+    return CurDAG->SelectNodeTo(N, Opcode, MVT::i32, DivLHS, DivRHS,
+                                TopPart);
+  }
+  case ISD::MULHU:
+  case ISD::MULHS: {
+    // FIXME: Handle mul by immediate.
+    SDValue MulLHS = N->getOperand(0);
+    SDValue MulRHS = N->getOperand(1);
+    unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr;
+    SDNode *Mul = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Glue,
+                                         MulLHS, MulRHS);
+    // The high part is in the Y register.
+    return CurDAG->SelectNodeTo(N, SP::RDY, MVT::i32, SDValue(Mul, 1));
+    return NULL;
+  }
+  }
+
+  return SelectCode(N);
+}
+
+
+/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+/// inline asm expressions.
+bool
+SparcDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                                char ConstraintCode,
+                                                std::vector<SDValue> &OutOps) {
+  SDValue Op0, Op1;
+  switch (ConstraintCode) {
+  default: return true;
+  case 'm':   // memory
+   if (!SelectADDRrr(Op, Op0, Op1))
+     SelectADDRri(Op, Op0, Op1);
+   break;
+  }
+
+  OutOps.push_back(Op0);
+  OutOps.push_back(Op1);
+  return false;
+}
+
+/// createSparcISelDag - This pass converts a legalized DAG into a
+/// SPARC-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSparcISelDag(SparcTargetMachine &TM) {
+  return new SparcDAGToDAGISel(TM);
+}
diff --git a/final/lib/Target/Sparc/SparcISelLowering.cpp b/final/lib/Target/Sparc/SparcISelLowering.cpp
new file mode 100644
index 00000000000..70574c370f3
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcISelLowering.cpp
@@ -0,0 +1,1297 @@
+
+//===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that Sparc uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcISelLowering.h"
+#include "SparcTargetMachine.h"
+#include "SparcMachineFunctionInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+static bool CC_Sparc_Assign_SRet(unsigned &ValNo, MVT &ValVT,
+                                 MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                                 ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+  assert (ArgFlags.isSRet());
+
+  //Assign SRet argument
+  State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                         0,
+                                         LocVT, LocInfo));
+  return true;
+}
+
+static bool CC_Sparc_Assign_f64(unsigned &ValNo, MVT &ValVT,
+                                MVT &LocVT, CCValAssign::LocInfo &LocInfo,
+                                ISD::ArgFlagsTy &ArgFlags, CCState &State)
+{
+  static const unsigned RegList[] = {
+    SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+  };
+  //Try to get first reg
+  if (unsigned Reg = State.AllocateReg(RegList, 6)) {
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  } else {
+    //Assign whole thing in stack
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(8,4),
+                                           LocVT, LocInfo));
+    return true;
+  }
+
+  //Try to get second reg
+  if (unsigned Reg = State.AllocateReg(RegList, 6))
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  else
+    State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT,
+                                           State.AllocateStack(4,4),
+                                           LocVT, LocInfo));
+  return true;
+}
+
+#include "SparcGenCallingConv.inc"
+
+SDValue
+SparcTargetLowering::LowerReturn(SDValue Chain,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 const SmallVectorImpl<SDValue> &OutVals,
+                                 DebugLoc dl, SelectionDAG &DAG) const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // CCValAssign - represent the assignment of the return value to locations.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, DAG.getTarget(),
+                 RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function.
+  if (MF.getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             OutVals[i], Flag);
+
+    // Guarantee that all emitted copies are stuck together with flags.
+    Flag = Chain.getValue(1);
+  }
+
+  unsigned RetAddrOffset = 8; //Call Inst + Delay Slot
+  // If the function returns a struct, copy the SRetReturnReg to I0
+  if (MF.getFunction()->hasStructRetAttr()) {
+    SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
+    unsigned Reg = SFI->getSRetReturnReg();
+    if (!Reg)
+      llvm_unreachable("sret virtual register not created in the entry block");
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+    Chain = DAG.getCopyToReg(Chain, dl, SP::I0, Val, Flag);
+    Flag = Chain.getValue(1);
+    if (MF.getRegInfo().liveout_empty())
+      MF.getRegInfo().addLiveOut(SP::I0);
+    RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
+  }
+
+  SDValue RetAddrOffsetNode = DAG.getConstant(RetAddrOffset, MVT::i32);
+
+  if (Flag.getNode())
+    return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain,
+                       RetAddrOffsetNode, Flag);
+  return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, 
+                     RetAddrOffsetNode);
+}
+
+/// LowerFormalArguments - V8 uses a very simple ABI, where all values are
+/// passed in either one or two GPRs, including FP values.  TODO: we should
+/// pass FP values in FP registers for fastcc functions.
+SDValue
+SparcTargetLowering::LowerFormalArguments(SDValue Chain,
+                                          CallingConv::ID CallConv, bool isVarArg,
+                                          const SmallVectorImpl<ISD::InputArg>
+                                            &Ins,
+                                          DebugLoc dl, SelectionDAG &DAG,
+                                          SmallVectorImpl<SDValue> &InVals)
+                                            const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CC_Sparc32);
+
+  const unsigned StackOffset = 92;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    if (i == 0  && Ins[i].Flags.isSRet()) {
+      //Get SRet from [%fp+64]
+      int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, 64, true);
+      SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+      SDValue Arg = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+                                MachinePointerInfo(),
+                                false, false, 0);
+      InVals.push_back(Arg);
+      continue;
+    }
+
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+
+      if (VA.needsCustom()) {
+        assert(VA.getLocVT() == MVT::f64);
+        unsigned VRegHi = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+        MF.getRegInfo().addLiveIn(VA.getLocReg(), VRegHi);
+        SDValue HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
+
+        assert(i+1 < e);
+        CCValAssign &NextVA = ArgLocs[++i];
+
+        SDValue LoVal;
+        if (NextVA.isMemLoc()) {
+          int FrameIdx = MF.getFrameInfo()->
+            CreateFixedObject(4, StackOffset+NextVA.getLocMemOffset(),true);
+          SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+          LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+                              MachinePointerInfo(),
+                              false, false, 0);
+        } else {
+          unsigned loReg = MF.addLiveIn(NextVA.getLocReg(),
+                                        &SP::IntRegsRegClass);
+          LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32);
+        }
+        SDValue WholeValue =
+          DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+        WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+        InVals.push_back(WholeValue);
+        continue;
+      }
+      unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+      MF.getRegInfo().addLiveIn(VA.getLocReg(), VReg);
+      SDValue Arg = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+      if (VA.getLocVT() == MVT::f32)
+        Arg = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Arg);
+      else if (VA.getLocVT() != MVT::i32) {
+        Arg = DAG.getNode(ISD::AssertSext, dl, MVT::i32, Arg,
+                          DAG.getValueType(VA.getLocVT()));
+        Arg = DAG.getNode(ISD::TRUNCATE, dl, VA.getLocVT(), Arg);
+      }
+      InVals.push_back(Arg);
+      continue;
+    }
+
+    assert(VA.isMemLoc());
+
+    unsigned Offset = VA.getLocMemOffset()+StackOffset;
+
+    if (VA.needsCustom()) {
+      assert(VA.getValVT() == MVT::f64);
+      //If it is double-word aligned, just load.
+      if (Offset % 8 == 0) {
+        int FI = MF.getFrameInfo()->CreateFixedObject(8,
+                                                      Offset,
+                                                      true);
+        SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+        SDValue Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
+                                   MachinePointerInfo(),
+                                   false,false, 0);
+        InVals.push_back(Load);
+        continue;
+      }
+
+      int FI = MF.getFrameInfo()->CreateFixedObject(4,
+                                                    Offset,
+                                                    true);
+      SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+      SDValue HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr,
+                                  MachinePointerInfo(),
+                                  false, false, 0);
+      int FI2 = MF.getFrameInfo()->CreateFixedObject(4,
+                                                     Offset+4,
+                                                     true);
+      SDValue FIPtr2 = DAG.getFrameIndex(FI2, getPointerTy());
+
+      SDValue LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr2,
+                                  MachinePointerInfo(),
+                                  false, false, 0);
+
+      SDValue WholeValue =
+        DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal);
+      WholeValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, WholeValue);
+      InVals.push_back(WholeValue);
+      continue;
+    }
+
+    int FI = MF.getFrameInfo()->CreateFixedObject(4,
+                                                  Offset,
+                                                  true);
+    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+    SDValue Load ;
+    if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) {
+      Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr,
+                         MachinePointerInfo(),
+                         false, false, 0);
+    } else {
+      ISD::LoadExtType LoadOp = ISD::SEXTLOAD;
+      // Sparc is big endian, so add an offset based on the ObjectVT.
+      unsigned Offset = 4-std::max(1U, VA.getValVT().getSizeInBits()/8);
+      FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
+                          DAG.getConstant(Offset, MVT::i32));
+      Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
+                            MachinePointerInfo(),
+                            VA.getValVT(), false, false,0);
+      Load = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Load);
+    }
+    InVals.push_back(Load);
+  }
+
+  if (MF.getFunction()->hasStructRetAttr()) {
+    //Copy the SRet Argument to SRetReturnReg
+    SparcMachineFunctionInfo *SFI = MF.getInfo<SparcMachineFunctionInfo>();
+    unsigned Reg = SFI->getSRetReturnReg();
+    if (!Reg) {
+      Reg = MF.getRegInfo().createVirtualRegister(&SP::IntRegsRegClass);
+      SFI->setSRetReturnReg(Reg);
+    }
+    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+  }
+
+  // Store remaining ArgRegs to the stack if this is a varargs function.
+  if (isVarArg) {
+    static const unsigned ArgRegs[] = {
+      SP::I0, SP::I1, SP::I2, SP::I3, SP::I4, SP::I5
+    };
+    unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs, 6);
+    const unsigned *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
+    unsigned ArgOffset = CCInfo.getNextStackOffset();
+    if (NumAllocated == 6)
+      ArgOffset += StackOffset;
+    else {
+      assert(!ArgOffset);
+      ArgOffset = 68+4*NumAllocated;
+    }
+
+    // Remember the vararg offset for the va_start implementation.
+    FuncInfo->setVarArgsFrameOffset(ArgOffset);
+
+    std::vector<SDValue> OutChains;
+
+    for (; CurArgReg != ArgRegEnd; ++CurArgReg) {
+      unsigned VReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+      MF.getRegInfo().addLiveIn(*CurArgReg, VReg);
+      SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32);
+
+      int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
+                                                          true);
+      SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
+
+      OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr,
+                                       MachinePointerInfo(),
+                                       false, false, 0));
+      ArgOffset += 4;
+    }
+
+    if (!OutChains.empty()) {
+      OutChains.push_back(Chain);
+      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                          &OutChains[0], OutChains.size());
+    }
+  }
+
+  return Chain;
+}
+
+SDValue
+SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               bool &isTailCall,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               DebugLoc dl, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) const {
+  // Sparc target does not yet support tail call optimization.
+  isTailCall = false;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getTarget(), ArgLocs,
+                 *DAG.getContext());
+  CCInfo.AnalyzeCallOperands(Outs, CC_Sparc32);
+
+  // Get the size of the outgoing arguments stack space requirement.
+  unsigned ArgsSize = CCInfo.getNextStackOffset();
+
+  // Keep stack frames 8-byte aligned.
+  ArgsSize = (ArgsSize+7) & ~7;
+
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+
+  //Create local copies for byval args.
+  SmallVector<SDValue, 8> ByValArgs;
+  for (unsigned i = 0,  e = Outs.size(); i != e; ++i) {
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    if (!Flags.isByVal())
+      continue;
+
+    SDValue Arg = OutVals[i];
+    unsigned Size = Flags.getByValSize();
+    unsigned Align = Flags.getByValAlign();
+
+    int FI = MFI->CreateStackObject(Size, Align, false);
+    SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy());
+    SDValue SizeNode = DAG.getConstant(Size, MVT::i32);
+
+    Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align,
+                          false,        //isVolatile,
+                          (Size <= 32), //AlwaysInline if size <= 32
+                          MachinePointerInfo(), MachinePointerInfo());
+    ByValArgs.push_back(FIPtr);
+  }
+
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true));
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+
+  const unsigned StackOffset = 92;
+  bool hasStructRetAttr = false;
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size();
+       i != e;
+       ++i, ++realArgIdx) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = OutVals[realArgIdx];
+
+    ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+
+    //Use local copy if it is a byval arg.
+    if (Flags.isByVal())
+      Arg = ByValArgs[byvalArgIdx++];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+      break;
+    }
+
+    if (Flags.isSRet()) {
+      assert(VA.needsCustom());
+      // store SRet argument in %sp+64
+      SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+      SDValue PtrOff = DAG.getIntPtrConstant(64);
+      PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         MachinePointerInfo(),
+                                         false, false, 0));
+      hasStructRetAttr = true;
+      continue;
+    }
+
+    if (VA.needsCustom()) {
+      assert(VA.getLocVT() == MVT::f64);
+
+      if (VA.isMemLoc()) {
+        unsigned Offset = VA.getLocMemOffset() + StackOffset;
+        //if it is double-word aligned, just store.
+        if (Offset % 8 == 0) {
+          SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+          SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+          PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+          MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                             MachinePointerInfo(),
+                                             false, false, 0));
+          continue;
+        }
+      }
+
+      SDValue StackPtr = DAG.CreateStackTemporary(MVT::f64, MVT::i32);
+      SDValue Store = DAG.getStore(DAG.getEntryNode(), dl,
+                                   Arg, StackPtr, MachinePointerInfo(),
+                                   false, false, 0);
+      // Sparc is big-endian, so the high part comes first.
+      SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
+      // Increment the pointer to the other half.
+      StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+                             DAG.getIntPtrConstant(4));
+      // Load the low part.
+      SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr,
+                               MachinePointerInfo(), false, false, 0);
+
+      if (VA.isRegLoc()) {
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi));
+        assert(i+1 != e);
+        CCValAssign &NextVA = ArgLocs[++i];
+        if (NextVA.isRegLoc()) {
+          RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Lo));
+        } else {
+          //Store the low part in stack.
+          unsigned Offset = NextVA.getLocMemOffset() + StackOffset;
+          SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+          SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+          PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+          MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+                                             MachinePointerInfo(),
+                                             false, false, 0));
+        }
+      } else {
+        unsigned Offset = VA.getLocMemOffset() + StackOffset;
+        // Store the high part.
+        SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+        SDValue PtrOff = DAG.getIntPtrConstant(Offset);
+        PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Hi, PtrOff,
+                                           MachinePointerInfo(),
+                                           false, false, 0));
+        // Store the low part.
+        PtrOff = DAG.getIntPtrConstant(Offset+4);
+        PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+        MemOpChains.push_back(DAG.getStore(Chain, dl, Lo, PtrOff,
+                                           MachinePointerInfo(),
+                                           false, false, 0));
+      }
+      continue;
+    }
+
+    // Arguments that can be passed on register must be kept at
+    // RegsToPass vector
+    if (VA.isRegLoc()) {
+      if (VA.getLocVT() != MVT::f32) {
+        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+        continue;
+      }
+      Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      continue;
+    }
+
+    assert(VA.isMemLoc());
+
+    // Create a store off the stack pointer for this argument.
+    SDValue StackPtr = DAG.getRegister(SP::O6, MVT::i32);
+    SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset()+StackOffset);
+    PtrOff = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, PtrOff);
+    MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                       MachinePointerInfo(),
+                                       false, false, 0));
+  }
+
+
+  // Emit all stores, make sure the occur before any copies into physregs.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emited instructions must be
+  // stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    unsigned Reg = RegsToPass[i].first;
+    // Remap I0->I7 -> O0->O7.
+    if (Reg >= SP::I0 && Reg <= SP::I7)
+      Reg = Reg-SP::I0+SP::O0;
+
+    Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0;
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+  // Returns a chain & a flag for retval copy to use
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+  if (hasStructRetAttr)
+    Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32));
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    unsigned Reg = RegsToPass[i].first;
+    if (Reg >= SP::I0 && Reg <= SP::I7)
+      Reg = Reg-SP::I0+SP::O0;
+
+    Ops.push_back(DAG.getRegister(Reg, RegsToPass[i].second.getValueType()));
+  }
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true),
+                             DAG.getIntPtrConstant(0, true), InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState RVInfo(CallConv, isVarArg, DAG.getTarget(),
+                 RVLocs, *DAG.getContext());
+
+  RVInfo.AnalyzeCallResult(Ins, RetCC_Sparc32);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    unsigned Reg = RVLocs[i].getLocReg();
+
+    // Remap I0->I7 -> O0->O7.
+    if (Reg >= SP::I0 && Reg <= SP::I7)
+      Reg = Reg-SP::I0+SP::O0;
+
+    Chain = DAG.getCopyFromReg(Chain, dl, Reg,
+                               RVLocs[i].getValVT(), InFlag).getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  }
+
+  return Chain;
+}
+
+unsigned
+SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const
+{
+  const Function *CalleeFn = 0;
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    CalleeFn = dyn_cast<Function>(G->getGlobal());
+  } else if (ExternalSymbolSDNode *E =
+             dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    const Function *Fn = DAG.getMachineFunction().getFunction();
+    const Module *M = Fn->getParent();
+    CalleeFn = M->getFunction(E->getSymbol());
+  }
+
+  if (!CalleeFn)
+    return 0;
+
+  assert(CalleeFn->hasStructRetAttr() &&
+         "Callee does not have the StructRet attribute.");
+
+  const PointerType *Ty = cast<PointerType>(CalleeFn->arg_begin()->getType());
+  const Type *ElementTy = Ty->getElementType();
+  return getTargetData()->getTypeAllocSize(ElementTy);
+}
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
+/// IntCondCCodeToICC - Convert a DAG integer condition code to a SPARC ICC
+/// condition.
+static SPCC::CondCodes IntCondCCodeToICC(ISD::CondCode CC) {
+  switch (CC) {
+  default: llvm_unreachable("Unknown integer condition code!");
+  case ISD::SETEQ:  return SPCC::ICC_E;
+  case ISD::SETNE:  return SPCC::ICC_NE;
+  case ISD::SETLT:  return SPCC::ICC_L;
+  case ISD::SETGT:  return SPCC::ICC_G;
+  case ISD::SETLE:  return SPCC::ICC_LE;
+  case ISD::SETGE:  return SPCC::ICC_GE;
+  case ISD::SETULT: return SPCC::ICC_CS;
+  case ISD::SETULE: return SPCC::ICC_LEU;
+  case ISD::SETUGT: return SPCC::ICC_GU;
+  case ISD::SETUGE: return SPCC::ICC_CC;
+  }
+}
+
+/// FPCondCCodeToFCC - Convert a DAG floatingp oint condition code to a SPARC
+/// FCC condition.
+static SPCC::CondCodes FPCondCCodeToFCC(ISD::CondCode CC) {
+  switch (CC) {
+  default: llvm_unreachable("Unknown fp condition code!");
+  case ISD::SETEQ:
+  case ISD::SETOEQ: return SPCC::FCC_E;
+  case ISD::SETNE:
+  case ISD::SETUNE: return SPCC::FCC_NE;
+  case ISD::SETLT:
+  case ISD::SETOLT: return SPCC::FCC_L;
+  case ISD::SETGT:
+  case ISD::SETOGT: return SPCC::FCC_G;
+  case ISD::SETLE:
+  case ISD::SETOLE: return SPCC::FCC_LE;
+  case ISD::SETGE:
+  case ISD::SETOGE: return SPCC::FCC_GE;
+  case ISD::SETULT: return SPCC::FCC_UL;
+  case ISD::SETULE: return SPCC::FCC_ULE;
+  case ISD::SETUGT: return SPCC::FCC_UG;
+  case ISD::SETUGE: return SPCC::FCC_UGE;
+  case ISD::SETUO:  return SPCC::FCC_U;
+  case ISD::SETO:   return SPCC::FCC_O;
+  case ISD::SETONE: return SPCC::FCC_LG;
+  case ISD::SETUEQ: return SPCC::FCC_UE;
+  }
+}
+
+SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
+  : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
+
+  // Set up the register classes.
+  addRegisterClass(MVT::i32, SP::IntRegsRegisterClass);
+  addRegisterClass(MVT::f32, SP::FPRegsRegisterClass);
+  addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass);
+
+  // Turn FP extload into load/fextend
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  // Sparc doesn't have i1 sign extending load
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+  // Turn FP truncstore into trunc + store.
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+  // Custom legalize GlobalAddress nodes into LO/HI parts.
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+  setOperationAction(ISD::ConstantPool , MVT::i32, Custom);
+
+  // Sparc doesn't have sext_inreg, replace them with shl/sra
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+
+  // Sparc has no REM or DIVREM operations.
+  setOperationAction(ISD::UREM, MVT::i32, Expand);
+  setOperationAction(ISD::SREM, MVT::i32, Expand);
+  setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+  setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+  // Custom expand fp<->sint
+  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+
+  // Expand fp<->uint
+  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+
+  setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+  setOperationAction(ISD::BITCAST, MVT::i32, Expand);
+
+  // Sparc has no select or setcc: expand to SELECT_CC.
+  setOperationAction(ISD::SELECT, MVT::i32, Expand);
+  setOperationAction(ISD::SELECT, MVT::f32, Expand);
+  setOperationAction(ISD::SELECT, MVT::f64, Expand);
+  setOperationAction(ISD::SETCC, MVT::i32, Expand);
+  setOperationAction(ISD::SETCC, MVT::f32, Expand);
+  setOperationAction(ISD::SETCC, MVT::f64, Expand);
+
+  // Sparc doesn't have BRCOND either, it has BR_CC.
+  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+  setOperationAction(ISD::BRIND, MVT::Other, Expand);
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+  setOperationAction(ISD::BR_CC, MVT::f32, Custom);
+  setOperationAction(ISD::BR_CC, MVT::f64, Custom);
+
+  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+
+  // SPARC has no intrinsics for these particular operations.
+  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+  setOperationAction(ISD::FSIN , MVT::f64, Expand);
+  setOperationAction(ISD::FCOS , MVT::f64, Expand);
+  setOperationAction(ISD::FREM , MVT::f64, Expand);
+  setOperationAction(ISD::FSIN , MVT::f32, Expand);
+  setOperationAction(ISD::FCOS , MVT::f32, Expand);
+  setOperationAction(ISD::FREM , MVT::f32, Expand);
+  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+  setOperationAction(ISD::CTTZ , MVT::i32, Expand);
+  setOperationAction(ISD::CTLZ , MVT::i32, Expand);
+  setOperationAction(ISD::ROTL , MVT::i32, Expand);
+  setOperationAction(ISD::ROTR , MVT::i32, Expand);
+  setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+  setOperationAction(ISD::FPOW , MVT::f64, Expand);
+  setOperationAction(ISD::FPOW , MVT::f32, Expand);
+
+  setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+  // FIXME: Sparc provides these multiplies, but we don't have them yet.
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+
+  setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+  // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
+  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
+  // VAARG needs to be lowered to not do unaligned accesses for doubles.
+  setOperationAction(ISD::VAARG             , MVT::Other, Custom);
+
+  // Use the default implementation.
+  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
+  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
+  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
+
+  // No debug info support yet.
+  setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+  setStackPointerRegisterToSaveRestore(SP::O6);
+
+  if (TM.getSubtarget<SparcSubtarget>().isV9())
+    setOperationAction(ISD::CTPOP, MVT::i32, Legal);
+
+  computeRegisterProperties();
+}
+
+const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return 0;
+  case SPISD::CMPICC:     return "SPISD::CMPICC";
+  case SPISD::CMPFCC:     return "SPISD::CMPFCC";
+  case SPISD::BRICC:      return "SPISD::BRICC";
+  case SPISD::BRFCC:      return "SPISD::BRFCC";
+  case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
+  case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
+  case SPISD::Hi:         return "SPISD::Hi";
+  case SPISD::Lo:         return "SPISD::Lo";
+  case SPISD::FTOI:       return "SPISD::FTOI";
+  case SPISD::ITOF:       return "SPISD::ITOF";
+  case SPISD::CALL:       return "SPISD::CALL";
+  case SPISD::RET_FLAG:   return "SPISD::RET_FLAG";
+  case SPISD::GLOBAL_BASE_REG: return "SPISD::GLOBAL_BASE_REG";
+  case SPISD::FLUSHW:     return "SPISD::FLUSHW";
+  }
+}
+
+/// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+/// be zero. Op is expected to be a target specific node. Used by DAG
+/// combiner.
+void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+                                                         const APInt &Mask,
+                                                         APInt &KnownZero,
+                                                         APInt &KnownOne,
+                                                         const SelectionDAG &DAG,
+                                                         unsigned Depth) const {
+  APInt KnownZero2, KnownOne2;
+  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);   // Don't know anything.
+
+  switch (Op.getOpcode()) {
+  default: break;
+  case SPISD::SELECT_ICC:
+  case SPISD::SELECT_FCC:
+    DAG.ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne,
+                          Depth+1);
+    DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2,
+                          Depth+1);
+    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+    // Only known if known in both the LHS and RHS.
+    KnownOne &= KnownOne2;
+    KnownZero &= KnownZero2;
+    break;
+  }
+}
+
+// Look at LHS/RHS/CC and see if they are a lowered setcc instruction.  If so
+// set LHS/RHS and SPCC to the LHS/RHS of the setcc and SPCC to the condition.
+static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
+                             ISD::CondCode CC, unsigned &SPCC) {
+  if (isa<ConstantSDNode>(RHS) &&
+      cast<ConstantSDNode>(RHS)->isNullValue() &&
+      CC == ISD::SETNE &&
+      ((LHS.getOpcode() == SPISD::SELECT_ICC &&
+        LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
+       (LHS.getOpcode() == SPISD::SELECT_FCC &&
+        LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
+      isa<ConstantSDNode>(LHS.getOperand(0)) &&
+      isa<ConstantSDNode>(LHS.getOperand(1)) &&
+      cast<ConstantSDNode>(LHS.getOperand(0))->isOne() &&
+      cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) {
+    SDValue CMPCC = LHS.getOperand(3);
+    SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
+    LHS = CMPCC.getOperand(0);
+    RHS = CMPCC.getOperand(1);
+  }
+}
+
+SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  // FIXME there isn't really any debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+  SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
+  SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
+
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+    return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+
+  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
+                                   getPointerTy());
+  SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                GlobalBase, RelAddr);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     AbsAddr, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue SparcTargetLowering::LowerConstantPool(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+  // FIXME there isn't really any debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  const Constant *C = N->getConstVal();
+  SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment());
+  SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, CP);
+  SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, CP);
+  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
+    return DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+
+  SDValue GlobalBase = DAG.getNode(SPISD::GLOBAL_BASE_REG, dl,
+                                   getPointerTy());
+  SDValue RelAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Lo, Hi);
+  SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                GlobalBase, RelAddr);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     AbsAddr, MachinePointerInfo(), false, false, 0);
+}
+
+static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
+  // Convert the fp value to integer in an FP register.
+  assert(Op.getValueType() == MVT::i32);
+  Op = DAG.getNode(SPISD::FTOI, dl, MVT::f32, Op.getOperand(0));
+  return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
+}
+
+static SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
+  assert(Op.getOperand(0).getValueType() == MVT::i32);
+  SDValue Tmp = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
+  // Convert the int value to FP in an FP register.
+  return DAG.getNode(SPISD::ITOF, dl, Op.getValueType(), Tmp);
+}
+
+static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS = Op.getOperand(2);
+  SDValue RHS = Op.getOperand(3);
+  SDValue Dest = Op.getOperand(4);
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Opc, SPCC = ~0U;
+
+  // If this is a br_cc of a "setcc", and if the setcc got lowered into
+  // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+  LookThroughSetCC(LHS, RHS, CC, SPCC);
+
+  // Get the condition flag.
+  SDValue CompareFlag;
+  if (LHS.getValueType() == MVT::i32) {
+    std::vector<EVT> VTs;
+    VTs.push_back(MVT::i32);
+    VTs.push_back(MVT::Glue);
+    SDValue Ops[2] = { LHS, RHS };
+    CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
+    if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
+    Opc = SPISD::BRICC;
+  } else {
+    CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
+    if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+    Opc = SPISD::BRFCC;
+  }
+  return DAG.getNode(Opc, dl, MVT::Other, Chain, Dest,
+                     DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+}
+
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  SDValue TrueVal = Op.getOperand(2);
+  SDValue FalseVal = Op.getOperand(3);
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Opc, SPCC = ~0U;
+
+  // If this is a select_cc of a "setcc", and if the setcc got lowered into
+  // an CMP[IF]CC/SELECT_[IF]CC pair, find the original compared values.
+  LookThroughSetCC(LHS, RHS, CC, SPCC);
+
+  SDValue CompareFlag;
+  if (LHS.getValueType() == MVT::i32) {
+    std::vector<EVT> VTs;
+    VTs.push_back(LHS.getValueType());   // subcc returns a value
+    VTs.push_back(MVT::Glue);
+    SDValue Ops[2] = { LHS, RHS };
+    CompareFlag = DAG.getNode(SPISD::CMPICC, dl, VTs, Ops, 2).getValue(1);
+    Opc = SPISD::SELECT_ICC;
+    if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
+  } else {
+    CompareFlag = DAG.getNode(SPISD::CMPFCC, dl, MVT::Glue, LHS, RHS);
+    Opc = SPISD::SELECT_FCC;
+    if (SPCC == ~0U) SPCC = FPCondCCodeToFCC(CC);
+  }
+  return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
+                     DAG.getConstant(SPCC, MVT::i32), CompareFlag);
+}
+
+static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
+                            const SparcTargetLowering &TLI) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
+
+  // vastart just stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument.
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Offset =
+    DAG.getNode(ISD::ADD, dl, MVT::i32,
+                DAG.getRegister(SP::I6, MVT::i32),
+                DAG.getConstant(FuncInfo->getVarArgsFrameOffset(),
+                                MVT::i32));
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  return DAG.getStore(Op.getOperand(0), dl, Offset, Op.getOperand(1),
+                      MachinePointerInfo(SV), false, false, 0);
+}
+
+static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) {
+  SDNode *Node = Op.getNode();
+  EVT VT = Node->getValueType(0);
+  SDValue InChain = Node->getOperand(0);
+  SDValue VAListPtr = Node->getOperand(1);
+  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr,
+                               MachinePointerInfo(SV), false, false, 0);
+  // Increment the pointer, VAList, to the next vaarg
+  SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList,
+                                  DAG.getConstant(VT.getSizeInBits()/8,
+                                                  MVT::i32));
+  // Store the incremented VAList to the legalized pointer
+  InChain = DAG.getStore(VAList.getValue(1), dl, NextPtr,
+                         VAListPtr, MachinePointerInfo(SV), false, false, 0);
+  // Load the actual argument out of the pointer VAList, unless this is an
+  // f64 load.
+  if (VT != MVT::f64)
+    return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(),
+                       false, false, 0);
+
+  // Otherwise, load it as i64, then do a bitconvert.
+  SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(),
+                          false, false, 0);
+
+  // Bit-Convert the value to f64.
+  SDValue Ops[2] = {
+    DAG.getNode(ISD::BITCAST, dl, MVT::f64, V),
+    V.getValue(1)
+  };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) {
+  SDValue Chain = Op.getOperand(0);  // Legalize the chain.
+  SDValue Size  = Op.getOperand(1);  // Legalize the size.
+  DebugLoc dl = Op.getDebugLoc();
+
+  unsigned SPReg = SP::O6;
+  SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, MVT::i32);
+  SDValue NewSP = DAG.getNode(ISD::SUB, dl, MVT::i32, SP, Size); // Value
+  Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP);    // Output chain
+
+  // The resultant pointer is actually 16 words from the bottom of the stack,
+  // to provide a register spill area.
+  SDValue NewVal = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP,
+                                 DAG.getConstant(96, MVT::i32));
+  SDValue Ops[2] = { NewVal, Chain };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+
+static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Chain = DAG.getNode(SPISD::FLUSHW,
+                              dl, MVT::Other, DAG.getEntryNode());
+  return Chain;
+}
+
+static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned FrameReg = SP::I6;
+
+  uint64_t depth = Op.getConstantOperandVal(0);
+
+  SDValue FrameAddr;
+  if (depth == 0)
+    FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+  else {
+    // flush first to make sure the windowed registers' values are in stack
+    SDValue Chain = getFLUSHW(Op, DAG);
+    FrameAddr = DAG.getCopyFromReg(Chain, dl, FrameReg, VT);
+
+    for (uint64_t i = 0; i != depth; ++i) {
+      SDValue Ptr = DAG.getNode(ISD::ADD,
+                                dl, MVT::i32,
+                                FrameAddr, DAG.getIntPtrConstant(56));
+      FrameAddr = DAG.getLoad(MVT::i32, dl,
+                              Chain,
+                              Ptr,
+                              MachinePointerInfo(), false, false, 0);
+    }
+  }
+  return FrameAddr;
+}
+
+static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned RetReg = SP::I7;
+
+  uint64_t depth = Op.getConstantOperandVal(0);
+
+  SDValue RetAddr;
+  if (depth == 0)
+    RetAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, RetReg, VT);
+  else {
+    // flush first to make sure the windowed registers' values are in stack
+    SDValue Chain = getFLUSHW(Op, DAG);
+    RetAddr = DAG.getCopyFromReg(Chain, dl, SP::I6, VT);
+
+    for (uint64_t i = 0; i != depth; ++i) {
+      SDValue Ptr = DAG.getNode(ISD::ADD,
+                                dl, MVT::i32,
+                                RetAddr,
+                                DAG.getIntPtrConstant((i == depth-1)?60:56));
+      RetAddr = DAG.getLoad(MVT::i32, dl,
+                            Chain,
+                            Ptr,
+                            MachinePointerInfo(), false, false, 0);
+    }
+  }
+  return RetAddr;
+}
+
+SDValue SparcTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Should not custom lower this!");
+  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
+  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
+  case ISD::GlobalTLSAddress:
+    llvm_unreachable("TLS not implemented for Sparc.");
+  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
+  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
+  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
+  case ISD::BR_CC:              return LowerBR_CC(Op, DAG);
+  case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
+  case ISD::VASTART:            return LowerVASTART(Op, DAG, *this);
+  case ISD::VAARG:              return LowerVAARG(Op, DAG);
+  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+  }
+}
+
+MachineBasicBlock *
+SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                 MachineBasicBlock *BB) const {
+  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+  unsigned BROpcode;
+  unsigned CC;
+  DebugLoc dl = MI->getDebugLoc();
+  // Figure out the conditional branch opcode to use for this select_cc.
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("Unknown SELECT_CC!");
+  case SP::SELECT_CC_Int_ICC:
+  case SP::SELECT_CC_FP_ICC:
+  case SP::SELECT_CC_DFP_ICC:
+    BROpcode = SP::BCOND;
+    break;
+  case SP::SELECT_CC_Int_FCC:
+  case SP::SELECT_CC_FP_FCC:
+  case SP::SELECT_CC_DFP_FCC:
+    BROpcode = SP::FBCOND;
+    break;
+  }
+
+  CC = (SPCC::CondCodes)MI->getOperand(3).getImm();
+
+  // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+  // control-flow pattern.  The incoming instruction knows the destination vreg
+  // to set, the condition code register to branch on, the true/false values to
+  // select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   [f]bCC copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, copy0MBB);
+  F->insert(It, sinkMBB);
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(sinkMBB);
+
+  BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC);
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to sinkMBB
+  BB = copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(sinkMBB);
+
+  //  sinkMBB:
+  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+  //  ...
+  BB = sinkMBB;
+  BuildMI(*BB, BB->begin(), dl, TII.get(SP::PHI), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+    .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Sparc Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+SparcTargetLowering::ConstraintType
+SparcTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    default:  break;
+    case 'r': return C_RegisterClass;
+    }
+  }
+
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+                                                  EVT VT) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      return std::make_pair(0U, SP::IntRegsRegisterClass);
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+std::vector<unsigned> SparcTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                  EVT VT) const {
+  if (Constraint.size() != 1)
+    return std::vector<unsigned>();
+
+  switch (Constraint[0]) {
+  default: break;
+  case 'r':
+    return make_vector<unsigned>(SP::L0, SP::L1, SP::L2, SP::L3,
+                                 SP::L4, SP::L5, SP::L6, SP::L7,
+                                 SP::I0, SP::I1, SP::I2, SP::I3,
+                                 SP::I4, SP::I5,
+                                 SP::O0, SP::O1, SP::O2, SP::O3,
+                                 SP::O4, SP::O5, SP::O7, 0);
+  }
+
+  return std::vector<unsigned>();
+}
+
+bool
+SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+  // The Sparc target isn't yet aware of offsets.
+  return false;
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned SparcTargetLowering::getFunctionAlignment(const Function *) const {
+  return 2;
+}
diff --git a/final/lib/Target/Sparc/SparcISelLowering.h b/final/lib/Target/Sparc/SparcISelLowering.h
new file mode 100644
index 00000000000..7d02df8adcc
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcISelLowering.h
@@ -0,0 +1,109 @@
+//===-- SparcISelLowering.h - Sparc DAG Lowering Interface ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that Sparc uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_ISELLOWERING_H
+#define SPARC_ISELLOWERING_H
+
+#include "llvm/Target/TargetLowering.h"
+#include "Sparc.h"
+
+namespace llvm {
+  namespace SPISD {
+    enum {
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+      CMPICC,      // Compare two GPR operands, set icc.
+      CMPFCC,      // Compare two FP operands, set fcc.
+      BRICC,       // Branch to dest on icc condition
+      BRFCC,       // Branch to dest on fcc condition
+      SELECT_ICC,  // Select between two values using the current ICC flags.
+      SELECT_FCC,  // Select between two values using the current FCC flags.
+
+      Hi, Lo,      // Hi/Lo operations, typically on a global address.
+
+      FTOI,        // FP to Int within a FP register.
+      ITOF,        // Int to FP within a FP register.
+
+      CALL,        // A call instruction.
+      RET_FLAG,    // Return with a flag operand.
+      GLOBAL_BASE_REG, // Global base reg for PIC
+      FLUSHW       // FLUSH register windows to stack
+    };
+  }
+
+  class SparcTargetLowering : public TargetLowering {
+  public:
+    SparcTargetLowering(TargetMachine &TM);
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+    /// in Mask are known to be either zero or one and return them in the
+    /// KnownZero/KnownOne bitsets.
+    virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+                                                const APInt &Mask,
+                                                APInt &KnownZero,
+                                                APInt &KnownOne,
+                                                const SelectionDAG &DAG,
+                                                unsigned Depth = 0) const;
+
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
+
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+    std::vector<unsigned>
+    getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                      EVT VT) const;
+
+    virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv,
+                           bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+
+    unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const;
+  };
+} // end namespace llvm
+
+#endif    // SPARC_ISELLOWERING_H
diff --git a/final/lib/Target/Sparc/SparcInstrFormats.td b/final/lib/Target/Sparc/SparcInstrFormats.td
new file mode 100644
index 00000000000..6535259e16f
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcInstrFormats.td
@@ -0,0 +1,114 @@
+//===- SparcInstrFormats.td - Sparc Instruction Formats ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+class InstSP<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction {
+  field bits<32> Inst;
+
+  let Namespace = "SP";
+
+  bits<2> op;
+  let Inst{31-30} = op;               // Top two bits are the 'op' field
+  
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  let AsmString   = asmstr;
+  let Pattern = pattern;
+}
+
+//===----------------------------------------------------------------------===//
+// Format #2 instruction classes in the Sparc
+//===----------------------------------------------------------------------===//
+
+// Format 2 instructions
+class F2<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : InstSP<outs, ins, asmstr, pattern> {
+  bits<3>  op2;
+  bits<22> imm22;
+  let op          = 0;    // op = 0
+  let Inst{24-22} = op2;
+  let Inst{21-0}  = imm22;
+}
+
+// Specific F2 classes: SparcV8 manual, page 44
+//
+class F2_1<bits<3> op2Val, dag outs, dag ins, string asmstr, list<dag> pattern>
+   : F2<outs, ins, asmstr, pattern> {
+  bits<5>  rd;
+
+  let op2         = op2Val;
+
+  let Inst{29-25} = rd;
+}
+
+class F2_2<bits<4> condVal, bits<3> op2Val, dag outs, dag ins, string asmstr, 
+           list<dag> pattern> : F2<outs, ins, asmstr, pattern> {
+  bits<4>   cond;
+  bit       annul = 0;     // currently unused
+
+  let cond        = condVal;
+  let op2         = op2Val;
+
+  let Inst{29}    = annul;
+  let Inst{28-25} = cond;
+}
+
+//===----------------------------------------------------------------------===//
+// Format #3 instruction classes in the Sparc
+//===----------------------------------------------------------------------===//
+
+class F3<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstSP<outs, ins, asmstr, pattern> {
+  bits<5> rd;
+  bits<6> op3;
+  bits<5> rs1;
+  let op{1} = 1;   // Op = 2 or 3
+  let Inst{29-25} = rd;
+  let Inst{24-19} = op3;
+  let Inst{18-14} = rs1;
+}
+
+// Specific F3 classes: SparcV8 manual, page 44
+//
+class F3_1<bits<2> opVal, bits<6> op3val, dag outs, dag ins,
+           string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+  bits<8> asi = 0; // asi not currently used
+  bits<5> rs2;
+
+  let op         = opVal;
+  let op3        = op3val;
+
+  let Inst{13}   = 0;     // i field = 0
+  let Inst{12-5} = asi;   // address space identifier
+  let Inst{4-0}  = rs2;
+}
+
+class F3_2<bits<2> opVal, bits<6> op3val, dag outs, dag ins, 
+           string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+  bits<13> simm13;
+
+  let op         = opVal;
+  let op3        = op3val;
+
+  let Inst{13}   = 1;     // i field = 1
+  let Inst{12-0} = simm13;
+}
+
+// floating-point
+class F3_3<bits<2> opVal, bits<6> op3val, bits<9> opfval, dag outs, dag ins,
+           string asmstr, list<dag> pattern> : F3<outs, ins, asmstr, pattern> {
+  bits<5> rs2;
+
+  let op         = opVal;
+  let op3        = op3val;
+
+  let Inst{13-5} = opfval;   // fp opcode
+  let Inst{4-0}  = rs2;
+}
+
+
diff --git a/final/lib/Target/Sparc/SparcInstrInfo.cpp b/final/lib/Target/Sparc/SparcInstrInfo.cpp
new file mode 100644
index 00000000000..afa3c1f88f9
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -0,0 +1,342 @@
+//===- SparcInstrInfo.cpp - Sparc Instruction Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcInstrInfo.h"
+#include "SparcSubtarget.h"
+#include "Sparc.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "SparcGenInstrInfo.inc"
+#include "SparcMachineFunctionInfo.h"
+using namespace llvm;
+
+SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
+  : TargetInstrInfoImpl(SparcInsts, array_lengthof(SparcInsts)),
+    RI(ST, *this), Subtarget(ST) {
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned SparcInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                             int &FrameIndex) const {
+  if (MI->getOpcode() == SP::LDri ||
+      MI->getOpcode() == SP::LDFri ||
+      MI->getOpcode() == SP::LDDFri) {
+    if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+  return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                            int &FrameIndex) const {
+  if (MI->getOpcode() == SP::STri ||
+      MI->getOpcode() == SP::STFri ||
+      MI->getOpcode() == SP::STDFri) {
+    if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
+        MI->getOperand(1).getImm() == 0) {
+      FrameIndex = MI->getOperand(0).getIndex();
+      return MI->getOperand(2).getReg();
+    }
+  }
+  return 0;
+}
+
+static bool IsIntegerCC(unsigned CC)
+{
+  return  (CC <= SPCC::ICC_VC);
+}
+
+
+static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
+{
+  switch(CC) {
+  default: llvm_unreachable("Unknown condition code");
+  case SPCC::ICC_NE:   return SPCC::ICC_E;
+  case SPCC::ICC_E:    return SPCC::ICC_NE;
+  case SPCC::ICC_G:    return SPCC::ICC_LE;
+  case SPCC::ICC_LE:   return SPCC::ICC_G;
+  case SPCC::ICC_GE:   return SPCC::ICC_L;
+  case SPCC::ICC_L:    return SPCC::ICC_GE;
+  case SPCC::ICC_GU:   return SPCC::ICC_LEU;
+  case SPCC::ICC_LEU:  return SPCC::ICC_GU;
+  case SPCC::ICC_CC:   return SPCC::ICC_CS;
+  case SPCC::ICC_CS:   return SPCC::ICC_CC;
+  case SPCC::ICC_POS:  return SPCC::ICC_NEG;
+  case SPCC::ICC_NEG:  return SPCC::ICC_POS;
+  case SPCC::ICC_VC:   return SPCC::ICC_VS;
+  case SPCC::ICC_VS:   return SPCC::ICC_VC;
+
+  case SPCC::FCC_U:    return SPCC::FCC_O;
+  case SPCC::FCC_O:    return SPCC::FCC_U;
+  case SPCC::FCC_G:    return SPCC::FCC_LE;
+  case SPCC::FCC_LE:   return SPCC::FCC_G;
+  case SPCC::FCC_UG:   return SPCC::FCC_ULE;
+  case SPCC::FCC_ULE:  return SPCC::FCC_UG;
+  case SPCC::FCC_L:    return SPCC::FCC_GE;
+  case SPCC::FCC_GE:   return SPCC::FCC_L;
+  case SPCC::FCC_UL:   return SPCC::FCC_UGE;
+  case SPCC::FCC_UGE:  return SPCC::FCC_UL;
+  case SPCC::FCC_LG:   return SPCC::FCC_UE;
+  case SPCC::FCC_UE:   return SPCC::FCC_LG;
+  case SPCC::FCC_NE:   return SPCC::FCC_E;
+  case SPCC::FCC_E:    return SPCC::FCC_NE;
+  }
+}
+
+
+bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                   MachineBasicBlock *&TBB,
+                                   MachineBasicBlock *&FBB,
+                                   SmallVectorImpl<MachineOperand> &Cond,
+                                   bool AllowModify) const
+{
+
+  MachineBasicBlock::iterator I = MBB.end();
+  MachineBasicBlock::iterator UnCondBrIter = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+
+    if (I->isDebugValue())
+      continue;
+
+    //When we see a non-terminator, we are done
+    if (!isUnpredicatedTerminator(I))
+      break;
+
+    //Terminator is not a branch
+    if (!I->getDesc().isBranch())
+      return true;
+
+    //Handle Unconditional branches
+    if (I->getOpcode() == SP::BA) {
+      UnCondBrIter = I;
+
+      if (!AllowModify) {
+        TBB = I->getOperand(0).getMBB();
+        continue;
+      }
+
+      while (llvm::next(I) != MBB.end())
+        llvm::next(I)->eraseFromParent();
+
+      Cond.clear();
+      FBB = 0;
+
+      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+        TBB = 0;
+        I->eraseFromParent();
+        I = MBB.end();
+        UnCondBrIter = MBB.end();
+        continue;
+      }
+
+      TBB = I->getOperand(0).getMBB();
+      continue;
+    }
+
+    unsigned Opcode = I->getOpcode();
+    if (Opcode != SP::BCOND && Opcode != SP::FBCOND)
+      return true; //Unknown Opcode
+
+    SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm();
+
+    if (Cond.empty()) {
+      MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+      if (AllowModify && UnCondBrIter != MBB.end() &&
+          MBB.isLayoutSuccessor(TargetBB)) {
+
+        //Transform the code
+        //
+        //    brCC L1
+        //    ba L2
+        // L1:
+        //    ..
+        // L2:
+        //
+        // into
+        //
+        //   brnCC L2
+        // L1:
+        //   ...
+        // L2:
+        //
+        BranchCode = GetOppositeBranchCondition(BranchCode);
+        MachineBasicBlock::iterator OldInst = I;
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(Opcode))
+          .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode);
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA))
+          .addMBB(TargetBB);
+        MBB.addSuccessor(TargetBB);
+        OldInst->eraseFromParent();
+        UnCondBrIter->eraseFromParent();
+
+        UnCondBrIter = MBB.end();
+        I = MBB.end();
+        continue;
+      }
+      FBB = TBB;
+      TBB = I->getOperand(0).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(BranchCode));
+      continue;
+    }
+    //FIXME: Handle subsequent conditional branches
+    //For now, we can't handle multiple conditional branches
+    return true;
+  }
+  return false;
+}
+
+unsigned
+SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+                             MachineBasicBlock *FBB,
+                             const SmallVectorImpl<MachineOperand> &Cond,
+                             DebugLoc DL) const {
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "Sparc branch conditions should have one component!");
+
+  if (Cond.empty()) {
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB);
+    return 1;
+  }
+
+  //Conditional branch
+  unsigned CC = Cond[0].getImm();
+
+  if (IsIntegerCC(CC))
+    BuildMI(&MBB, DL, get(SP::BCOND)).addMBB(TBB).addImm(CC);
+  else
+    BuildMI(&MBB, DL, get(SP::FBCOND)).addMBB(TBB).addImm(CC);
+  if (!FBB)
+    return 1;
+
+  BuildMI(&MBB, DL, get(SP::BA)).addMBB(FBB);
+  return 2;
+}
+
+unsigned SparcInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
+{
+  MachineBasicBlock::iterator I = MBB.end();
+  unsigned Count = 0;
+  while (I != MBB.begin()) {
+    --I;
+
+    if (I->isDebugValue())
+      continue;
+
+    if (I->getOpcode() != SP::BA
+        && I->getOpcode() != SP::BCOND
+        && I->getOpcode() != SP::FBCOND)
+      break; // Not a branch
+
+    I->eraseFromParent();
+    I = MBB.end();
+    ++Count;
+  }
+  return Count;
+}
+
+void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator I, DebugLoc DL,
+                                 unsigned DestReg, unsigned SrcReg,
+                                 bool KillSrc) const {
+  if (SP::IntRegsRegClass.contains(DestReg, SrcReg))
+    BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+  else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
+    BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+  else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg))
+    BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+  else
+    llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+void SparcInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC,
+                    const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  // On the order of operands here: think "[FrameIdx + 0] = SrcReg".
+  if (RC == SP::IntRegsRegisterClass)
+    BuildMI(MBB, I, DL, get(SP::STri)).addFrameIndex(FI).addImm(0)
+      .addReg(SrcReg, getKillRegState(isKill));
+  else if (RC == SP::FPRegsRegisterClass)
+    BuildMI(MBB, I, DL, get(SP::STFri)).addFrameIndex(FI).addImm(0)
+      .addReg(SrcReg,  getKillRegState(isKill));
+  else if (RC == SP::DFPRegsRegisterClass)
+    BuildMI(MBB, I, DL, get(SP::STDFri)).addFrameIndex(FI).addImm(0)
+      .addReg(SrcReg,  getKillRegState(isKill));
+  else
+    llvm_unreachable("Can't store this register to stack slot");
+}
+
+void SparcInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC,
+                     const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  if (RC == SP::IntRegsRegisterClass)
+    BuildMI(MBB, I, DL, get(SP::LDri), DestReg).addFrameIndex(FI).addImm(0);
+  else if (RC == SP::FPRegsRegisterClass)
+    BuildMI(MBB, I, DL, get(SP::LDFri), DestReg).addFrameIndex(FI).addImm(0);
+  else if (RC == SP::DFPRegsRegisterClass)
+    BuildMI(MBB, I, DL, get(SP::LDDFri), DestReg).addFrameIndex(FI).addImm(0);
+  else
+    llvm_unreachable("Can't load this register from stack slot");
+}
+
+unsigned SparcInstrInfo::getGlobalBaseReg(MachineFunction *MF) const
+{
+  SparcMachineFunctionInfo *SparcFI = MF->getInfo<SparcMachineFunctionInfo>();
+  unsigned GlobalBaseReg = SparcFI->getGlobalBaseReg();
+  if (GlobalBaseReg != 0)
+    return GlobalBaseReg;
+
+  // Insert the set of GlobalBaseReg into the first MBB of the function
+  MachineBasicBlock &FirstMBB = MF->front();
+  MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+
+  GlobalBaseReg = RegInfo.createVirtualRegister(&SP::IntRegsRegClass);
+
+
+  DebugLoc dl;
+
+  BuildMI(FirstMBB, MBBI, dl, get(SP::GETPCX), GlobalBaseReg);
+  SparcFI->setGlobalBaseReg(GlobalBaseReg);
+  return GlobalBaseReg;
+}
diff --git a/final/lib/Target/Sparc/SparcInstrInfo.h b/final/lib/Target/Sparc/SparcInstrInfo.h
new file mode 100644
index 00000000000..b2d24f52503
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcInstrInfo.h
@@ -0,0 +1,97 @@
+//===- SparcInstrInfo.h - Sparc Instruction Information ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCINSTRUCTIONINFO_H
+#define SPARCINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "SparcRegisterInfo.h"
+
+namespace llvm {
+
+/// SPII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace SPII {
+  enum {
+    Pseudo = (1<<0),
+    Load = (1<<1),
+    Store = (1<<2),
+    DelaySlot = (1<<3)
+  };
+}
+
+class SparcInstrInfo : public TargetInstrInfoImpl {
+  const SparcRegisterInfo RI;
+  const SparcSubtarget& Subtarget;
+public:
+  explicit SparcInstrInfo(SparcSubtarget &ST);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const SparcRegisterInfo &getRegisterInfo() const { return RI; }
+
+  /// isLoadFromStackSlot - If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify = false) const ;
+
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+  
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+  
+  unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/Sparc/SparcInstrInfo.td b/final/lib/Target/Sparc/SparcInstrInfo.td
new file mode 100644
index 00000000000..cf5c48fd18d
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcInstrInfo.td
@@ -0,0 +1,825 @@
+//===- SparcInstrInfo.td - Target Description for Sparc Target ------------===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Sparc instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+
+include "SparcInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Feature predicates.
+//===----------------------------------------------------------------------===//
+
+// HasV9 - This predicate is true when the target processor supports V9
+// instructions.  Note that the machine may be running in 32-bit mode.
+def HasV9   : Predicate<"Subtarget.isV9()">;
+
+// HasNoV9 - This predicate is true when the target doesn't have V9
+// instructions.  Use of this is just a hack for the isel not having proper
+// costs for V8 instructions that are more expensive than their V9 ones.
+def HasNoV9 : Predicate<"!Subtarget.isV9()">;
+
+// HasVIS - This is true when the target processor has VIS extensions.
+def HasVIS : Predicate<"Subtarget.isVIS()">;
+
+// UseDeprecatedInsts - This predicate is true when the target processor is a
+// V8, or when it is V9 but the V8 deprecated instructions are efficient enough
+// to use when appropriate.  In either of these cases, the instruction selector
+// will pick deprecated instructions.
+def UseDeprecatedInsts : Predicate<"Subtarget.useDeprecatedV8Instructions()">;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def simm11  : PatLeaf<(imm), [{ return isInt<11>(N->getSExtValue()); }]>;
+
+def simm13  : PatLeaf<(imm), [{ return isInt<13>(N->getSExtValue()); }]>;
+
+def LO10 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((unsigned)N->getZExtValue() & 1023,
+                                   MVT::i32);
+}]>;
+
+def HI22 : SDNodeXForm<imm, [{
+  // Transformation function: shift the immediate value down into the low bits.
+  return CurDAG->getTargetConstant((unsigned)N->getZExtValue() >> 10, MVT::i32);
+}]>;
+
+def SETHIimm : PatLeaf<(imm), [{
+  return (((unsigned)N->getZExtValue() >> 10) << 10) ==
+         (unsigned)N->getZExtValue();
+}], HI22>;
+
+// Addressing modes.
+def ADDRrr : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
+def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex], []>;
+
+// Address operands
+def MEMrr : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops IntRegs, IntRegs);
+}
+def MEMri : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops IntRegs, i32imm);
+}
+
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+
+// Operand for printing out a condition code.
+let PrintMethod = "printCCOperand" in
+  def CCOp : Operand<i32>;
+
+def SDTSPcmpfcc : 
+SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
+def SDTSPbrcc : 
+SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+def SDTSPselectcc :
+SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>]>;
+def SDTSPFTOI :
+SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisFP<1>]>;
+def SDTSPITOF :
+SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
+
+def SPcmpicc : SDNode<"SPISD::CMPICC", SDTIntBinOp, [SDNPOutGlue]>;
+def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>;
+def SPbricc : SDNode<"SPISD::BRICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+
+def SPhi    : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
+def SPlo    : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
+
+def SPftoi  : SDNode<"SPISD::FTOI", SDTSPFTOI>;
+def SPitof  : SDNode<"SPISD::ITOF", SDTSPITOF>;
+
+def SPselecticc : SDNode<"SPISD::SELECT_ICC", SDTSPselectcc, [SDNPInGlue]>;
+def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>;
+
+//  These are target-independent nodes, but have target-specific formats.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_SPCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+                                        SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+                           [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_SPCallSeqEnd,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_SPCall    : SDTypeProfile<0, -1, [SDTCisVT<0, i32>]>;
+def call          : SDNode<"SPISD::CALL", SDT_SPCall,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                            SDNPVariadic]>;
+
+def SDT_SPRet     : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+def retflag       : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
+                           [SDNPHasChain, SDNPOptInGlue]>;
+
+def flushw        : SDNode<"SPISD::FLUSHW", SDTNone,
+                           [SDNPHasChain]>;
+
+def getPCX        : Operand<i32> {
+  let PrintMethod = "printGetPCX";
+}
+
+//===----------------------------------------------------------------------===//
+// SPARC Flag Conditions
+//===----------------------------------------------------------------------===//
+
+// Note that these values must be kept in sync with the CCOp::CondCode enum
+// values.
+class ICC_VAL<int N> : PatLeaf<(i32 N)>;
+def ICC_NE  : ICC_VAL< 9>;  // Not Equal
+def ICC_E   : ICC_VAL< 1>;  // Equal
+def ICC_G   : ICC_VAL<10>;  // Greater
+def ICC_LE  : ICC_VAL< 2>;  // Less or Equal
+def ICC_GE  : ICC_VAL<11>;  // Greater or Equal
+def ICC_L   : ICC_VAL< 3>;  // Less
+def ICC_GU  : ICC_VAL<12>;  // Greater Unsigned
+def ICC_LEU : ICC_VAL< 4>;  // Less or Equal Unsigned
+def ICC_CC  : ICC_VAL<13>;  // Carry Clear/Great or Equal Unsigned
+def ICC_CS  : ICC_VAL< 5>;  // Carry Set/Less Unsigned
+def ICC_POS : ICC_VAL<14>;  // Positive
+def ICC_NEG : ICC_VAL< 6>;  // Negative
+def ICC_VC  : ICC_VAL<15>;  // Overflow Clear
+def ICC_VS  : ICC_VAL< 7>;  // Overflow Set
+
+class FCC_VAL<int N> : PatLeaf<(i32 N)>;
+def FCC_U   : FCC_VAL<23>;  // Unordered
+def FCC_G   : FCC_VAL<22>;  // Greater
+def FCC_UG  : FCC_VAL<21>;  // Unordered or Greater
+def FCC_L   : FCC_VAL<20>;  // Less
+def FCC_UL  : FCC_VAL<19>;  // Unordered or Less
+def FCC_LG  : FCC_VAL<18>;  // Less or Greater
+def FCC_NE  : FCC_VAL<17>;  // Not Equal
+def FCC_E   : FCC_VAL<25>;  // Equal
+def FCC_UE  : FCC_VAL<24>;  // Unordered or Equal
+def FCC_GE  : FCC_VAL<25>;  // Greater or Equal
+def FCC_UGE : FCC_VAL<26>;  // Unordered or Greater or Equal
+def FCC_LE  : FCC_VAL<27>;  // Less or Equal
+def FCC_ULE : FCC_VAL<28>;  // Unordered or Less or Equal
+def FCC_O   : FCC_VAL<29>;  // Ordered
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+/// F3_12 multiclass - Define a normal F3_1/F3_2 pattern in one shot.
+multiclass F3_12<string OpcStr, bits<6> Op3Val, SDNode OpNode> {
+  def rr  : F3_1<2, Op3Val, 
+                 (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                 !strconcat(OpcStr, " $b, $c, $dst"),
+                 [(set IntRegs:$dst, (OpNode IntRegs:$b, IntRegs:$c))]>;
+  def ri  : F3_2<2, Op3Val,
+                 (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+                 !strconcat(OpcStr, " $b, $c, $dst"),
+                 [(set IntRegs:$dst, (OpNode IntRegs:$b, simm13:$c))]>;
+}
+
+/// F3_12np multiclass - Define a normal F3_1/F3_2 pattern in one shot, with no
+/// pattern.
+multiclass F3_12np<string OpcStr, bits<6> Op3Val> {
+  def rr  : F3_1<2, Op3Val, 
+                 (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                 !strconcat(OpcStr, " $b, $c, $dst"), []>;
+  def ri  : F3_2<2, Op3Val,
+                 (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+                 !strconcat(OpcStr, " $b, $c, $dst"), []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Pseudo instructions.
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : InstSP<outs, ins, asmstr, pattern>;
+
+// GETPCX for PIC
+let Defs = [O7] in {
+  def GETPCX : Pseudo<(outs getPCX:$getpcseq), (ins), "$getpcseq", [] >;
+}
+
+let Defs = [O6], Uses = [O6] in {
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
+                               "!ADJCALLSTACKDOWN $amt",
+                               [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+                            "!ADJCALLSTACKUP $amt1",
+                            [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+let hasSideEffects = 1, mayStore = 1 in {
+  let rd = 0, rs1 = 0, rs2 = 0 in
+    def FLUSHW : F3_1<0b10, 0b101011, (outs), (ins),
+                      "flushw",
+                      [(flushw)]>, Requires<[HasV9]>;
+  let rd = 0, rs1 = 1, simm13 = 3 in
+    def TA3 : F3_2<0b10, 0b111010, (outs), (ins),
+                   "ta 3",
+                   [(flushw)]>;
+}
+
+def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val),
+                "unimp $val", []>;
+
+// FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the 
+// fpmover pass.
+let Predicates = [HasNoV9] in {  // Only emit these in V8 mode.
+  def FpMOVD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+                      "!FpMOVD $src, $dst", []>;
+  def FpNEGD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+                      "!FpNEGD $src, $dst",
+                      [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+  def FpABSD : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$src),
+                      "!FpABSD $src, $dst",
+                      [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+}
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
+// instruction selection into a branch sequence.  This has to handle all
+// permutations of selection between i32/f32/f64 on ICC and FCC.
+  // Expanded after instruction selection.
+let Uses = [ICC], usesCustomInserter = 1 in { 
+  def SELECT_CC_Int_ICC
+   : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+            "; SELECT_CC_Int_ICC PSEUDO!",
+            [(set IntRegs:$dst, (SPselecticc IntRegs:$T, IntRegs:$F,
+                                             imm:$Cond))]>;
+  def SELECT_CC_FP_ICC
+   : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+            "; SELECT_CC_FP_ICC PSEUDO!",
+            [(set FPRegs:$dst, (SPselecticc FPRegs:$T, FPRegs:$F,
+                                            imm:$Cond))]>;
+
+  def SELECT_CC_DFP_ICC
+   : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+            "; SELECT_CC_DFP_ICC PSEUDO!",
+            [(set DFPRegs:$dst, (SPselecticc DFPRegs:$T, DFPRegs:$F,
+                                             imm:$Cond))]>;
+}
+
+let usesCustomInserter = 1, Uses = [FCC] in {
+
+  def SELECT_CC_Int_FCC
+   : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, i32imm:$Cond),
+            "; SELECT_CC_Int_FCC PSEUDO!",
+            [(set IntRegs:$dst, (SPselectfcc IntRegs:$T, IntRegs:$F,
+                                             imm:$Cond))]>;
+
+  def SELECT_CC_FP_FCC
+   : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, i32imm:$Cond),
+            "; SELECT_CC_FP_FCC PSEUDO!",
+            [(set FPRegs:$dst, (SPselectfcc FPRegs:$T, FPRegs:$F,
+                                            imm:$Cond))]>;
+  def SELECT_CC_DFP_FCC
+   : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, i32imm:$Cond),
+            "; SELECT_CC_DFP_FCC PSEUDO!",
+            [(set DFPRegs:$dst, (SPselectfcc DFPRegs:$T, DFPRegs:$F,
+                                             imm:$Cond))]>;
+}
+
+
+// Section A.3 - Synthetic Instructions, p. 85
+// special cases of JMPL:
+let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
+  let rd = O7.Num, rs1 = G0.Num in
+    def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+                   "jmp %o7+$val", [(retflag simm13:$val)]>;
+
+  let rd = I7.Num, rs1 = G0.Num in
+    def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val),
+                  "jmp %i7+$val", []>;
+}
+
+// Section B.1 - Load Integer Instructions, p. 90
+def LDSBrr : F3_1<3, 0b001001,
+                  (outs IntRegs:$dst), (ins MEMrr:$addr),
+                  "ldsb [$addr], $dst",
+                  [(set IntRegs:$dst, (sextloadi8 ADDRrr:$addr))]>;
+def LDSBri : F3_2<3, 0b001001,
+                  (outs IntRegs:$dst), (ins MEMri:$addr),
+                  "ldsb [$addr], $dst",
+                  [(set IntRegs:$dst, (sextloadi8 ADDRri:$addr))]>;
+def LDSHrr : F3_1<3, 0b001010,
+                  (outs IntRegs:$dst), (ins MEMrr:$addr),
+                  "ldsh [$addr], $dst",
+                  [(set IntRegs:$dst, (sextloadi16 ADDRrr:$addr))]>;
+def LDSHri : F3_2<3, 0b001010,
+                  (outs IntRegs:$dst), (ins MEMri:$addr),
+                  "ldsh [$addr], $dst",
+                  [(set IntRegs:$dst, (sextloadi16 ADDRri:$addr))]>;
+def LDUBrr : F3_1<3, 0b000001,
+                  (outs IntRegs:$dst), (ins MEMrr:$addr),
+                  "ldub [$addr], $dst",
+                  [(set IntRegs:$dst, (zextloadi8 ADDRrr:$addr))]>;
+def LDUBri : F3_2<3, 0b000001,
+                  (outs IntRegs:$dst), (ins MEMri:$addr),
+                  "ldub [$addr], $dst",
+                  [(set IntRegs:$dst, (zextloadi8 ADDRri:$addr))]>;
+def LDUHrr : F3_1<3, 0b000010,
+                  (outs IntRegs:$dst), (ins MEMrr:$addr),
+                  "lduh [$addr], $dst",
+                  [(set IntRegs:$dst, (zextloadi16 ADDRrr:$addr))]>;
+def LDUHri : F3_2<3, 0b000010,
+                  (outs IntRegs:$dst), (ins MEMri:$addr),
+                  "lduh [$addr], $dst",
+                  [(set IntRegs:$dst, (zextloadi16 ADDRri:$addr))]>;
+def LDrr   : F3_1<3, 0b000000,
+                  (outs IntRegs:$dst), (ins MEMrr:$addr),
+                  "ld [$addr], $dst",
+                  [(set IntRegs:$dst, (load ADDRrr:$addr))]>;
+def LDri   : F3_2<3, 0b000000,
+                  (outs IntRegs:$dst), (ins MEMri:$addr),
+                  "ld [$addr], $dst",
+                  [(set IntRegs:$dst, (load ADDRri:$addr))]>;
+
+// Section B.2 - Load Floating-point Instructions, p. 92
+def LDFrr  : F3_1<3, 0b100000,
+                  (outs FPRegs:$dst), (ins MEMrr:$addr),
+                  "ld [$addr], $dst",
+                  [(set FPRegs:$dst, (load ADDRrr:$addr))]>;
+def LDFri  : F3_2<3, 0b100000,
+                  (outs FPRegs:$dst), (ins MEMri:$addr),
+                  "ld [$addr], $dst",
+                  [(set FPRegs:$dst, (load ADDRri:$addr))]>;
+def LDDFrr : F3_1<3, 0b100011,
+                  (outs DFPRegs:$dst), (ins MEMrr:$addr),
+                  "ldd [$addr], $dst",
+                  [(set DFPRegs:$dst, (load ADDRrr:$addr))]>;
+def LDDFri : F3_2<3, 0b100011,
+                  (outs DFPRegs:$dst), (ins MEMri:$addr),
+                  "ldd [$addr], $dst",
+                  [(set DFPRegs:$dst, (load ADDRri:$addr))]>;
+
+// Section B.4 - Store Integer Instructions, p. 95
+def STBrr : F3_1<3, 0b000101,
+                 (outs), (ins MEMrr:$addr, IntRegs:$src),
+                 "stb $src, [$addr]",
+                 [(truncstorei8 IntRegs:$src, ADDRrr:$addr)]>;
+def STBri : F3_2<3, 0b000101,
+                 (outs), (ins MEMri:$addr, IntRegs:$src),
+                 "stb $src, [$addr]",
+                 [(truncstorei8 IntRegs:$src, ADDRri:$addr)]>;
+def STHrr : F3_1<3, 0b000110,
+                 (outs), (ins MEMrr:$addr, IntRegs:$src),
+                 "sth $src, [$addr]",
+                 [(truncstorei16 IntRegs:$src, ADDRrr:$addr)]>;
+def STHri : F3_2<3, 0b000110,
+                 (outs), (ins MEMri:$addr, IntRegs:$src),
+                 "sth $src, [$addr]",
+                 [(truncstorei16 IntRegs:$src, ADDRri:$addr)]>;
+def STrr  : F3_1<3, 0b000100,
+                 (outs), (ins MEMrr:$addr, IntRegs:$src),
+                 "st $src, [$addr]",
+                 [(store IntRegs:$src, ADDRrr:$addr)]>;
+def STri  : F3_2<3, 0b000100,
+                 (outs), (ins MEMri:$addr, IntRegs:$src),
+                 "st $src, [$addr]",
+                 [(store IntRegs:$src, ADDRri:$addr)]>;
+
+// Section B.5 - Store Floating-point Instructions, p. 97
+def STFrr   : F3_1<3, 0b100100,
+                   (outs), (ins MEMrr:$addr, FPRegs:$src),
+                   "st $src, [$addr]",
+                   [(store FPRegs:$src, ADDRrr:$addr)]>;
+def STFri   : F3_2<3, 0b100100,
+                   (outs), (ins MEMri:$addr, FPRegs:$src),
+                   "st $src, [$addr]",
+                   [(store FPRegs:$src, ADDRri:$addr)]>;
+def STDFrr  : F3_1<3, 0b100111,
+                   (outs), (ins MEMrr:$addr, DFPRegs:$src),
+                   "std  $src, [$addr]",
+                   [(store DFPRegs:$src, ADDRrr:$addr)]>;
+def STDFri  : F3_2<3, 0b100111,
+                   (outs), (ins MEMri:$addr, DFPRegs:$src),
+                   "std $src, [$addr]",
+                   [(store DFPRegs:$src, ADDRri:$addr)]>;
+
+// Section B.9 - SETHI Instruction, p. 104
+def SETHIi: F2_1<0b100,
+                 (outs IntRegs:$dst), (ins i32imm:$src),
+                 "sethi $src, $dst",
+                 [(set IntRegs:$dst, SETHIimm:$src)]>;
+
+// Section B.10 - NOP Instruction, p. 105
+// (It's a special case of SETHI)
+let rd = 0, imm22 = 0 in
+  def NOP : F2_1<0b100, (outs), (ins), "nop", []>;
+
+// Section B.11 - Logical Instructions, p. 106
+defm AND    : F3_12<"and", 0b000001, and>;
+
+def ANDNrr  : F3_1<2, 0b000101,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                   "andn $b, $c, $dst",
+                   [(set IntRegs:$dst, (and IntRegs:$b, (not IntRegs:$c)))]>;
+def ANDNri  : F3_2<2, 0b000101,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+                   "andn $b, $c, $dst", []>;
+
+defm OR     : F3_12<"or", 0b000010, or>;
+
+def ORNrr   : F3_1<2, 0b000110,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                   "orn $b, $c, $dst",
+                   [(set IntRegs:$dst, (or IntRegs:$b, (not IntRegs:$c)))]>;
+def ORNri   : F3_2<2, 0b000110,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+                   "orn $b, $c, $dst", []>;
+defm XOR    : F3_12<"xor", 0b000011, xor>;
+
+def XNORrr  : F3_1<2, 0b000111,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                   "xnor $b, $c, $dst",
+                   [(set IntRegs:$dst, (not (xor IntRegs:$b, IntRegs:$c)))]>;
+def XNORri  : F3_2<2, 0b000111,
+                   (outs IntRegs:$dst), (ins IntRegs:$b, i32imm:$c),
+                   "xnor $b, $c, $dst", []>;
+
+// Section B.12 - Shift Instructions, p. 107
+defm SLL : F3_12<"sll", 0b100101, shl>;
+defm SRL : F3_12<"srl", 0b100110, srl>;
+defm SRA : F3_12<"sra", 0b100111, sra>;
+
+// Section B.13 - Add Instructions, p. 108
+defm ADD   : F3_12<"add", 0b000000, add>;
+
+// "LEA" forms of add (patterns to make tblgen happy)
+def LEA_ADDri   : F3_2<2, 0b000000,
+                   (outs IntRegs:$dst), (ins MEMri:$addr),
+                   "add ${addr:arith}, $dst",
+                   [(set IntRegs:$dst, ADDRri:$addr)]>;
+
+let Defs = [ICC] in                   
+  defm ADDCC  : F3_12<"addcc", 0b010000, addc>;
+
+let Uses = [ICC] in
+  defm ADDX  : F3_12<"addx", 0b001000, adde>;
+
+// Section B.15 - Subtract Instructions, p. 110
+defm SUB    : F3_12  <"sub"  , 0b000100, sub>;
+let Uses = [ICC] in 
+  defm SUBX   : F3_12  <"subx" , 0b001100, sube>;
+
+let Defs = [ICC] in 
+  defm SUBCC  : F3_12  <"subcc", 0b010100, SPcmpicc>;
+
+let Uses = [ICC], Defs = [ICC] in
+  def SUBXCCrr: F3_1<2, 0b011100, 
+                (outs IntRegs:$dst), (ins IntRegs:$b, IntRegs:$c),
+                "subxcc $b, $c, $dst", []>;
+
+
+// Section B.18 - Multiply Instructions, p. 113
+let Defs = [Y] in {
+  defm UMUL : F3_12np<"umul", 0b001010>;
+  defm SMUL : F3_12  <"smul", 0b001011, mul>;
+}
+
+// Section B.19 - Divide Instructions, p. 115
+let Defs = [Y] in {
+  defm UDIV : F3_12np<"udiv", 0b001110>;
+  defm SDIV : F3_12np<"sdiv", 0b001111>;
+}
+
+// Section B.20 - SAVE and RESTORE, p. 117
+defm SAVE    : F3_12np<"save"   , 0b111100>;
+defm RESTORE : F3_12np<"restore", 0b111101>;
+
+// Section B.21 - Branch on Integer Condition Codes Instructions, p. 119
+
+// conditional branch class:
+class BranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
+ : F2_2<cc, 0b010, (outs), ins, asmstr, pattern> {
+  let isBranch = 1;
+  let isTerminator = 1;
+  let hasDelaySlot = 1;
+}
+
+let isBarrier = 1 in
+  def BA   : BranchSP<0b1000, (ins brtarget:$dst),
+                      "ba $dst",
+                      [(br bb:$dst)]>;
+
+// FIXME: the encoding for the JIT should look at the condition field.
+let Uses = [ICC] in
+  def BCOND : BranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+                         "b$cc $dst",
+                        [(SPbricc bb:$dst, imm:$cc)]>;
+
+
+// Section B.22 - Branch on Floating-point Condition Codes Instructions, p. 121
+
+// floating-point conditional branch class:
+class FPBranchSP<bits<4> cc, dag ins, string asmstr, list<dag> pattern>
+ : F2_2<cc, 0b110, (outs), ins, asmstr, pattern> {
+  let isBranch = 1;
+  let isTerminator = 1;
+  let hasDelaySlot = 1;
+}
+
+// FIXME: the encoding for the JIT should look at the condition field.
+let Uses = [FCC] in
+  def FBCOND  : FPBranchSP<0, (ins brtarget:$dst, CCOp:$cc),
+                              "fb$cc $dst",
+                              [(SPbrfcc bb:$dst, imm:$cc)]>;
+
+
+// Section B.24 - Call and Link Instruction, p. 125
+// This is the only Format 1 instruction
+let Uses = [O6],
+    hasDelaySlot = 1, isCall = 1,
+    Defs = [O0, O1, O2, O3, O4, O5, O7, G1, G2, G3, G4, G5, G6, G7,
+    D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
+        ICC, FCC, Y] in {
+  def CALL : InstSP<(outs), (ins calltarget:$dst, variable_ops),
+                    "call $dst", []> {
+    bits<30> disp;
+    let op = 1;
+    let Inst{29-0} = disp;
+  }
+  
+  // indirect calls
+  def JMPLrr : F3_1<2, 0b111000,
+                    (outs), (ins MEMrr:$ptr, variable_ops),
+                    "call $ptr",
+                    [(call ADDRrr:$ptr)]>;
+  def JMPLri : F3_2<2, 0b111000,
+                    (outs), (ins MEMri:$ptr, variable_ops),
+                    "call $ptr",
+                    [(call ADDRri:$ptr)]>;
+}
+
+// Section B.28 - Read State Register Instructions
+let Uses = [Y] in 
+  def RDY : F3_1<2, 0b101000,
+                 (outs IntRegs:$dst), (ins),
+                 "rd %y, $dst", []>;
+
+// Section B.29 - Write State Register Instructions
+let Defs = [Y] in {
+  def WRYrr : F3_1<2, 0b110000,
+                   (outs), (ins IntRegs:$b, IntRegs:$c),
+                   "wr $b, $c, %y", []>;
+  def WRYri : F3_2<2, 0b110000,
+                   (outs), (ins IntRegs:$b, i32imm:$c),
+                   "wr $b, $c, %y", []>;
+}
+// Convert Integer to Floating-point Instructions, p. 141
+def FITOS : F3_3<2, 0b110100, 0b011000100,
+                 (outs FPRegs:$dst), (ins FPRegs:$src),
+                 "fitos $src, $dst",
+                 [(set FPRegs:$dst, (SPitof FPRegs:$src))]>;
+def FITOD : F3_3<2, 0b110100, 0b011001000, 
+                 (outs DFPRegs:$dst), (ins FPRegs:$src),
+                 "fitod $src, $dst",
+                 [(set DFPRegs:$dst, (SPitof FPRegs:$src))]>;
+
+// Convert Floating-point to Integer Instructions, p. 142
+def FSTOI : F3_3<2, 0b110100, 0b011010001,
+                 (outs FPRegs:$dst), (ins FPRegs:$src),
+                 "fstoi $src, $dst",
+                 [(set FPRegs:$dst, (SPftoi FPRegs:$src))]>;
+def FDTOI : F3_3<2, 0b110100, 0b011010010,
+                 (outs FPRegs:$dst), (ins DFPRegs:$src),
+                 "fdtoi $src, $dst",
+                 [(set FPRegs:$dst, (SPftoi DFPRegs:$src))]>;
+
+// Convert between Floating-point Formats Instructions, p. 143
+def FSTOD : F3_3<2, 0b110100, 0b011001001, 
+                 (outs DFPRegs:$dst), (ins FPRegs:$src),
+                 "fstod $src, $dst",
+                 [(set DFPRegs:$dst, (fextend FPRegs:$src))]>;
+def FDTOS : F3_3<2, 0b110100, 0b011000110,
+                 (outs FPRegs:$dst), (ins DFPRegs:$src),
+                 "fdtos $src, $dst",
+                 [(set FPRegs:$dst, (fround DFPRegs:$src))]>;
+
+// Floating-point Move Instructions, p. 144
+def FMOVS : F3_3<2, 0b110100, 0b000000001,
+                 (outs FPRegs:$dst), (ins FPRegs:$src),
+                 "fmovs $src, $dst", []>;
+def FNEGS : F3_3<2, 0b110100, 0b000000101, 
+                 (outs FPRegs:$dst), (ins FPRegs:$src),
+                 "fnegs $src, $dst",
+                 [(set FPRegs:$dst, (fneg FPRegs:$src))]>;
+def FABSS : F3_3<2, 0b110100, 0b000001001, 
+                 (outs FPRegs:$dst), (ins FPRegs:$src),
+                 "fabss $src, $dst",
+                 [(set FPRegs:$dst, (fabs FPRegs:$src))]>;
+
+
+// Floating-point Square Root Instructions, p.145
+def FSQRTS : F3_3<2, 0b110100, 0b000101001, 
+                  (outs FPRegs:$dst), (ins FPRegs:$src),
+                  "fsqrts $src, $dst",
+                  [(set FPRegs:$dst, (fsqrt FPRegs:$src))]>;
+def FSQRTD : F3_3<2, 0b110100, 0b000101010, 
+                  (outs DFPRegs:$dst), (ins DFPRegs:$src),
+                  "fsqrtd $src, $dst",
+                  [(set DFPRegs:$dst, (fsqrt DFPRegs:$src))]>;
+
+
+
+// Floating-point Add and Subtract Instructions, p. 146
+def FADDS  : F3_3<2, 0b110100, 0b001000001,
+                  (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+                  "fadds $src1, $src2, $dst",
+                  [(set FPRegs:$dst, (fadd FPRegs:$src1, FPRegs:$src2))]>;
+def FADDD  : F3_3<2, 0b110100, 0b001000010,
+                  (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+                  "faddd $src1, $src2, $dst",
+                  [(set DFPRegs:$dst, (fadd DFPRegs:$src1, DFPRegs:$src2))]>;
+def FSUBS  : F3_3<2, 0b110100, 0b001000101,
+                  (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+                  "fsubs $src1, $src2, $dst",
+                  [(set FPRegs:$dst, (fsub FPRegs:$src1, FPRegs:$src2))]>;
+def FSUBD  : F3_3<2, 0b110100, 0b001000110,
+                  (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+                  "fsubd $src1, $src2, $dst",
+                  [(set DFPRegs:$dst, (fsub DFPRegs:$src1, DFPRegs:$src2))]>;
+
+// Floating-point Multiply and Divide Instructions, p. 147
+def FMULS  : F3_3<2, 0b110100, 0b001001001,
+                  (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+                  "fmuls $src1, $src2, $dst",
+                  [(set FPRegs:$dst, (fmul FPRegs:$src1, FPRegs:$src2))]>;
+def FMULD  : F3_3<2, 0b110100, 0b001001010,
+                  (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+                  "fmuld $src1, $src2, $dst",
+                  [(set DFPRegs:$dst, (fmul DFPRegs:$src1, DFPRegs:$src2))]>;
+def FSMULD : F3_3<2, 0b110100, 0b001101001,
+                  (outs DFPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+                  "fsmuld $src1, $src2, $dst",
+                  [(set DFPRegs:$dst, (fmul (fextend FPRegs:$src1),
+                                            (fextend FPRegs:$src2)))]>;
+def FDIVS  : F3_3<2, 0b110100, 0b001001101,
+                 (outs FPRegs:$dst), (ins FPRegs:$src1, FPRegs:$src2),
+                 "fdivs $src1, $src2, $dst",
+                 [(set FPRegs:$dst, (fdiv FPRegs:$src1, FPRegs:$src2))]>;
+def FDIVD  : F3_3<2, 0b110100, 0b001001110,
+                 (outs DFPRegs:$dst), (ins DFPRegs:$src1, DFPRegs:$src2),
+                 "fdivd $src1, $src2, $dst",
+                 [(set DFPRegs:$dst, (fdiv DFPRegs:$src1, DFPRegs:$src2))]>;
+
+// Floating-point Compare Instructions, p. 148
+// Note: the 2nd template arg is different for these guys.
+// Note 2: the result of a FCMP is not available until the 2nd cycle
+// after the instr is retired, but there is no interlock. This behavior
+// is modelled with a forced noop after the instruction.
+let Defs = [FCC] in {
+  def FCMPS  : F3_3<2, 0b110101, 0b001010001,
+                   (outs), (ins FPRegs:$src1, FPRegs:$src2),
+                   "fcmps $src1, $src2\n\tnop",
+                   [(SPcmpfcc FPRegs:$src1, FPRegs:$src2)]>;
+  def FCMPD  : F3_3<2, 0b110101, 0b001010010,
+                   (outs), (ins DFPRegs:$src1, DFPRegs:$src2),
+                   "fcmpd $src1, $src2\n\tnop",
+                   [(SPcmpfcc DFPRegs:$src1, DFPRegs:$src2)]>;
+}
+
+//===----------------------------------------------------------------------===//
+// V9 Instructions
+//===----------------------------------------------------------------------===//
+
+// V9 Conditional Moves.
+let Predicates = [HasV9], Constraints = "$T = $dst" in {
+  // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual.
+  // FIXME: Add instruction encodings for the JIT some day.
+  let Uses = [ICC] in {
+    def MOVICCrr
+      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+               "mov$cc %icc, $F, $dst",
+               [(set IntRegs:$dst,
+                           (SPselecticc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+    def MOVICCri
+      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+               "mov$cc %icc, $F, $dst",
+               [(set IntRegs:$dst,
+                            (SPselecticc simm11:$F, IntRegs:$T, imm:$cc))]>;
+  }
+
+  let Uses = [FCC] in {
+    def MOVFCCrr
+      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, IntRegs:$F, CCOp:$cc),
+               "mov$cc %fcc0, $F, $dst",
+               [(set IntRegs:$dst,
+                           (SPselectfcc IntRegs:$F, IntRegs:$T, imm:$cc))]>;
+    def MOVFCCri
+      : Pseudo<(outs IntRegs:$dst), (ins IntRegs:$T, i32imm:$F, CCOp:$cc),
+               "mov$cc %fcc0, $F, $dst",
+               [(set IntRegs:$dst,
+                            (SPselectfcc simm11:$F, IntRegs:$T, imm:$cc))]>;
+  }
+
+  let Uses = [ICC] in {
+    def FMOVS_ICC
+      : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+               "fmovs$cc %icc, $F, $dst",
+               [(set FPRegs:$dst,
+                           (SPselecticc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+    def FMOVD_ICC
+      : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+               "fmovd$cc %icc, $F, $dst",
+               [(set DFPRegs:$dst,
+                           (SPselecticc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+  }
+
+  let Uses = [FCC] in {
+    def FMOVS_FCC
+      : Pseudo<(outs FPRegs:$dst), (ins FPRegs:$T, FPRegs:$F, CCOp:$cc),
+               "fmovs$cc %fcc0, $F, $dst",
+               [(set FPRegs:$dst,
+                           (SPselectfcc FPRegs:$F, FPRegs:$T, imm:$cc))]>;
+    def FMOVD_FCC
+      : Pseudo<(outs DFPRegs:$dst), (ins DFPRegs:$T, DFPRegs:$F, CCOp:$cc),
+               "fmovd$cc %fcc0, $F, $dst",
+               [(set DFPRegs:$dst,
+                           (SPselectfcc DFPRegs:$F, DFPRegs:$T, imm:$cc))]>;
+  }
+
+}
+
+// Floating-Point Move Instructions, p. 164 of the V9 manual.
+let Predicates = [HasV9] in {
+  def FMOVD : F3_3<2, 0b110100, 0b000000010,
+                   (outs DFPRegs:$dst), (ins DFPRegs:$src),
+                   "fmovd $src, $dst", []>;
+  def FNEGD : F3_3<2, 0b110100, 0b000000110, 
+                   (outs DFPRegs:$dst), (ins DFPRegs:$src),
+                   "fnegd $src, $dst",
+                   [(set DFPRegs:$dst, (fneg DFPRegs:$src))]>;
+  def FABSD : F3_3<2, 0b110100, 0b000001010, 
+                   (outs DFPRegs:$dst), (ins DFPRegs:$src),
+                   "fabsd $src, $dst",
+                   [(set DFPRegs:$dst, (fabs DFPRegs:$src))]>;
+}
+
+// POPCrr - This does a ctpop of a 64-bit register.  As such, we have to clear
+// the top 32-bits before using it.  To do this clearing, we use a SLLri X,0.
+def POPCrr : F3_1<2, 0b101110, 
+                  (outs IntRegs:$dst), (ins IntRegs:$src),
+                  "popc $src, $dst", []>, Requires<[HasV9]>;
+def : Pat<(ctpop IntRegs:$src),
+          (POPCrr (SLLri IntRegs:$src, 0))>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Small immediates.
+def : Pat<(i32 simm13:$val),
+          (ORri G0, imm:$val)>;
+// Arbitrary immediates.
+def : Pat<(i32 imm:$val),
+          (ORri (SETHIi (HI22 imm:$val)), (LO10 imm:$val))>;
+
+// subc
+def : Pat<(subc IntRegs:$b, IntRegs:$c),
+          (SUBCCrr IntRegs:$b, IntRegs:$c)>;
+def : Pat<(subc IntRegs:$b, simm13:$val),
+          (SUBCCri IntRegs:$b, imm:$val)>;
+
+// Global addresses, constant pool entries
+def : Pat<(SPhi tglobaladdr:$in), (SETHIi tglobaladdr:$in)>;
+def : Pat<(SPlo tglobaladdr:$in), (ORri G0, tglobaladdr:$in)>;
+def : Pat<(SPhi tconstpool:$in), (SETHIi tconstpool:$in)>;
+def : Pat<(SPlo tconstpool:$in), (ORri G0, tconstpool:$in)>;
+
+// Add reg, lo.  This is used when taking the addr of a global/constpool entry.
+def : Pat<(add IntRegs:$r, (SPlo tglobaladdr:$in)),
+          (ADDri IntRegs:$r, tglobaladdr:$in)>;
+def : Pat<(add IntRegs:$r, (SPlo tconstpool:$in)),
+          (ADDri IntRegs:$r, tconstpool:$in)>;
+
+// Calls: 
+def : Pat<(call tglobaladdr:$dst),
+          (CALL tglobaladdr:$dst)>;
+def : Pat<(call texternalsym:$dst),
+          (CALL texternalsym:$dst)>;
+
+// Map integer extload's to zextloads.
+def : Pat<(i32 (extloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+def : Pat<(i32 (extloadi8 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi8 ADDRri:$src)), (LDUBri ADDRri:$src)>;
+def : Pat<(i32 (extloadi16 ADDRrr:$src)), (LDUHrr ADDRrr:$src)>;
+def : Pat<(i32 (extloadi16 ADDRri:$src)), (LDUHri ADDRri:$src)>;
+
+// zextload bool -> zextload byte
+def : Pat<(i32 (zextloadi1 ADDRrr:$src)), (LDUBrr ADDRrr:$src)>;
+def : Pat<(i32 (zextloadi1 ADDRri:$src)), (LDUBri ADDRri:$src)>;
diff --git a/final/lib/Target/Sparc/SparcMCAsmInfo.cpp b/final/lib/Target/Sparc/SparcMCAsmInfo.cpp
new file mode 100644
index 00000000000..d37d6d23130
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcMCAsmInfo.cpp
@@ -0,0 +1,34 @@
+//===-- SparcMCAsmInfo.cpp - Sparc asm properties -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SparcMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcMCAsmInfo.h"
+using namespace llvm;
+
+SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) {
+  Data16bitsDirective = "\t.half\t";
+  Data32bitsDirective = "\t.word\t";
+  Data64bitsDirective = 0;  // .xword is only supported by V9.
+  ZeroDirective = "\t.skip\t";
+  CommentString = "!";
+  HasLEB128 = true;
+  SupportsDebugInformation = true;
+  
+  SunStyleELFSectionSwitchSyntax = true;
+  UsesELFSectionDirectiveForBSS = true;
+
+  WeakRefDirective = "\t.weak\t";
+
+  PrivateGlobalPrefix = ".L";
+}
+
+
diff --git a/final/lib/Target/Sparc/SparcMCAsmInfo.h b/final/lib/Target/Sparc/SparcMCAsmInfo.h
new file mode 100644
index 00000000000..0cb6827d277
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcMCAsmInfo.h
@@ -0,0 +1,29 @@
+//=====-- SparcMCAsmInfo.h - Sparc asm properties -------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SparcMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETASMINFO_H
+#define SPARCTARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+
+  struct SparcELFMCAsmInfo : public MCAsmInfo {
+    explicit SparcELFMCAsmInfo(const Target &T, StringRef TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/final/lib/Target/Sparc/SparcMachineFunctionInfo.h b/final/lib/Target/Sparc/SparcMachineFunctionInfo.h
new file mode 100644
index 00000000000..0b74308eb0e
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -0,0 +1,47 @@
+//===- SparcMachineFunctionInfo.h - Sparc Machine Function Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares  Sparc specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+#ifndef SPARCMACHINEFUNCTIONINFO_H
+#define SPARCMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+  class SparcMachineFunctionInfo : public MachineFunctionInfo {
+  private:
+    unsigned GlobalBaseReg;
+
+    /// VarArgsFrameOffset - Frame offset to start of varargs area.
+    int VarArgsFrameOffset;
+
+    /// SRetReturnReg - Holds the virtual register into which the sret
+    /// argument is passed.
+    unsigned SRetReturnReg;
+  public:
+    SparcMachineFunctionInfo()
+      : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
+    explicit SparcMachineFunctionInfo(MachineFunction &MF)
+      : GlobalBaseReg(0), VarArgsFrameOffset(0), SRetReturnReg(0) {}
+
+    unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+    void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+    int getVarArgsFrameOffset() const { return VarArgsFrameOffset; }
+    void setVarArgsFrameOffset(int Offset) { VarArgsFrameOffset = Offset; }
+
+    unsigned getSRetReturnReg() const { return SRetReturnReg; }
+    void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+  };
+}
+
+#endif
diff --git a/final/lib/Target/Sparc/SparcRegisterInfo.cpp b/final/lib/Target/Sparc/SparcRegisterInfo.cpp
new file mode 100644
index 00000000000..b010d04a27d
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -0,0 +1,134 @@
+//===- SparcRegisterInfo.cpp - SPARC Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SPARC implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "SparcRegisterInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st,
+                                     const TargetInstrInfo &tii)
+  : SparcGenRegisterInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
+    Subtarget(st), TII(tii) {
+}
+
+const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+                                                                         const {
+  static const unsigned CalleeSavedRegs[] = { 0 };
+  return CalleeSavedRegs;
+}
+
+BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  Reserved.set(SP::G2);
+  Reserved.set(SP::G3);
+  Reserved.set(SP::G4);
+  Reserved.set(SP::O6);
+  Reserved.set(SP::I6);
+  Reserved.set(SP::I7);
+  Reserved.set(SP::G0);
+  Reserved.set(SP::G5);
+  Reserved.set(SP::G6);
+  Reserved.set(SP::G7);
+  return Reserved;
+}
+
+void SparcRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  MachineInstr &MI = *I;
+  DebugLoc dl = MI.getDebugLoc();
+  int Size = MI.getOperand(0).getImm();
+  if (MI.getOpcode() == SP::ADJCALLSTACKDOWN)
+    Size = -Size;
+  if (Size)
+    BuildMI(MBB, I, dl, TII.get(SP::ADDri), SP::O6).addReg(SP::O6).addImm(Size);
+  MBB.erase(I);
+}
+
+void
+SparcRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                       int SPAdj, RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Unexpected");
+
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  DebugLoc dl = MI.getDebugLoc();
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+
+  // Addressable stack objects are accessed using neg. offsets from %fp
+  MachineFunction &MF = *MI.getParent()->getParent();
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+               MI.getOperand(i+1).getImm();
+
+  // Replace frame index with a frame pointer reference.
+  if (Offset >= -4096 && Offset <= 4095) {
+    // If the offset is small enough to fit in the immediate field, directly
+    // encode it.
+    MI.getOperand(i).ChangeToRegister(SP::I6, false);
+    MI.getOperand(i+1).ChangeToImmediate(Offset);
+  } else {
+    // Otherwise, emit a G1 = SETHI %hi(offset).  FIXME: it would be better to 
+    // scavenge a register here instead of reserving G1 all of the time.
+    unsigned OffHi = (unsigned)Offset >> 10U;
+    BuildMI(*MI.getParent(), II, dl, TII.get(SP::SETHIi), SP::G1).addImm(OffHi);
+    // Emit G1 = G1 + I6
+    BuildMI(*MI.getParent(), II, dl, TII.get(SP::ADDrr), SP::G1).addReg(SP::G1)
+      .addReg(SP::I6);
+    // Insert: G1+%lo(offset) into the user.
+    MI.getOperand(i).ChangeToRegister(SP::G1, false);
+    MI.getOperand(i+1).ChangeToImmediate(Offset & ((1 << 10)-1));
+  }
+}
+
+void SparcRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {}
+
+unsigned SparcRegisterInfo::getRARegister() const {
+  return SP::I7;
+}
+
+unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  return SP::I6;
+}
+
+unsigned SparcRegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned SparcRegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+int SparcRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  return SparcGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+#include "SparcGenRegisterInfo.inc"
+
diff --git a/final/lib/Target/Sparc/SparcRegisterInfo.h b/final/lib/Target/Sparc/SparcRegisterInfo.h
new file mode 100644
index 00000000000..d930b5398f8
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcRegisterInfo.h
@@ -0,0 +1,59 @@
+//===- SparcRegisterInfo.h - Sparc Register Information Impl ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Sparc implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCREGISTERINFO_H
+#define SPARCREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "SparcGenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class SparcSubtarget;
+class TargetInstrInfo;
+class Type;
+
+struct SparcRegisterInfo : public SparcGenRegisterInfo {
+  SparcSubtarget &Subtarget;
+  const TargetInstrInfo &TII;
+
+  SparcRegisterInfo(SparcSubtarget &st, const TargetInstrInfo &tii);
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/Sparc/SparcRegisterInfo.td b/final/lib/Target/Sparc/SparcRegisterInfo.td
new file mode 100644
index 00000000000..5ef4daed2fc
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcRegisterInfo.td
@@ -0,0 +1,175 @@
+//===- SparcRegisterInfo.td - Sparc Register defs ----------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the Sparc register file 
+//===----------------------------------------------------------------------===//
+
+class SparcReg<string n> : Register<n> {
+  field bits<5> Num;
+  let Namespace = "SP";
+}
+
+class SparcCtrlReg<string n>: Register<n> {
+  let Namespace = "SP";
+}
+
+let Namespace = "SP" in {
+def sub_even : SubRegIndex;
+def sub_odd  : SubRegIndex;
+}
+
+// Registers are identified with 5-bit ID numbers.
+// Ri - 32-bit integer registers
+class Ri<bits<5> num, string n> : SparcReg<n> {
+  let Num = num;
+}
+// Rf - 32-bit floating-point registers
+class Rf<bits<5> num, string n> : SparcReg<n> {
+  let Num = num;
+}
+// Rd - Slots in the FP register file for 64-bit floating-point values.
+class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
+  let Num = num;
+  let SubRegs = subregs;
+  let SubRegIndices = [sub_even, sub_odd];
+}
+
+// Control Registers
+def ICC : SparcCtrlReg<"ICC">;
+def FCC : SparcCtrlReg<"FCC">;
+
+// Y register
+def Y : SparcCtrlReg<"Y">;
+
+// Integer registers
+def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>;
+def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>;
+def G2 : Ri< 2, "G2">, DwarfRegNum<[2]>; 
+def G3 : Ri< 3, "G3">, DwarfRegNum<[3]>;
+def G4 : Ri< 4, "G4">, DwarfRegNum<[4]>;
+def G5 : Ri< 5, "G5">, DwarfRegNum<[5]>; 
+def G6 : Ri< 6, "G6">, DwarfRegNum<[6]>;
+def G7 : Ri< 7, "G7">, DwarfRegNum<[7]>;
+def O0 : Ri< 8, "O0">, DwarfRegNum<[8]>;
+def O1 : Ri< 9, "O1">, DwarfRegNum<[9]>;
+def O2 : Ri<10, "O2">, DwarfRegNum<[10]>; 
+def O3 : Ri<11, "O3">, DwarfRegNum<[11]>;
+def O4 : Ri<12, "O4">, DwarfRegNum<[12]>;
+def O5 : Ri<13, "O5">, DwarfRegNum<[13]>; 
+def O6 : Ri<14, "SP">, DwarfRegNum<[14]>;
+def O7 : Ri<15, "O7">, DwarfRegNum<[15]>;
+def L0 : Ri<16, "L0">, DwarfRegNum<[16]>;
+def L1 : Ri<17, "L1">, DwarfRegNum<[17]>;
+def L2 : Ri<18, "L2">, DwarfRegNum<[18]>; 
+def L3 : Ri<19, "L3">, DwarfRegNum<[19]>;
+def L4 : Ri<20, "L4">, DwarfRegNum<[20]>;
+def L5 : Ri<21, "L5">, DwarfRegNum<[21]>; 
+def L6 : Ri<22, "L6">, DwarfRegNum<[22]>;
+def L7 : Ri<23, "L7">, DwarfRegNum<[23]>;
+def I0 : Ri<24, "I0">, DwarfRegNum<[24]>;
+def I1 : Ri<25, "I1">, DwarfRegNum<[25]>;
+def I2 : Ri<26, "I2">, DwarfRegNum<[26]>; 
+def I3 : Ri<27, "I3">, DwarfRegNum<[27]>;
+def I4 : Ri<28, "I4">, DwarfRegNum<[28]>;
+def I5 : Ri<29, "I5">, DwarfRegNum<[29]>; 
+def I6 : Ri<30, "FP">, DwarfRegNum<[30]>;
+def I7 : Ri<31, "I7">, DwarfRegNum<[31]>;
+
+// Floating-point registers
+def F0  : Rf< 0,  "F0">, DwarfRegNum<[32]>;
+def F1  : Rf< 1,  "F1">, DwarfRegNum<[33]>;
+def F2  : Rf< 2,  "F2">, DwarfRegNum<[34]>; 
+def F3  : Rf< 3,  "F3">, DwarfRegNum<[35]>;
+def F4  : Rf< 4,  "F4">, DwarfRegNum<[36]>;
+def F5  : Rf< 5,  "F5">, DwarfRegNum<[37]>; 
+def F6  : Rf< 6,  "F6">, DwarfRegNum<[38]>;
+def F7  : Rf< 7,  "F7">, DwarfRegNum<[39]>;
+def F8  : Rf< 8,  "F8">, DwarfRegNum<[40]>; 
+def F9  : Rf< 9,  "F9">, DwarfRegNum<[41]>;
+def F10 : Rf<10, "F10">, DwarfRegNum<[42]>;
+def F11 : Rf<11, "F11">, DwarfRegNum<[43]>; 
+def F12 : Rf<12, "F12">, DwarfRegNum<[44]>;
+def F13 : Rf<13, "F13">, DwarfRegNum<[45]>;
+def F14 : Rf<14, "F14">, DwarfRegNum<[46]>; 
+def F15 : Rf<15, "F15">, DwarfRegNum<[47]>;
+def F16 : Rf<16, "F16">, DwarfRegNum<[48]>;
+def F17 : Rf<17, "F17">, DwarfRegNum<[49]>; 
+def F18 : Rf<18, "F18">, DwarfRegNum<[50]>;
+def F19 : Rf<19, "F19">, DwarfRegNum<[51]>;
+def F20 : Rf<20, "F20">, DwarfRegNum<[52]>; 
+def F21 : Rf<21, "F21">, DwarfRegNum<[53]>;
+def F22 : Rf<22, "F22">, DwarfRegNum<[54]>;
+def F23 : Rf<23, "F23">, DwarfRegNum<[55]>;
+def F24 : Rf<24, "F24">, DwarfRegNum<[56]>;
+def F25 : Rf<25, "F25">, DwarfRegNum<[57]>;
+def F26 : Rf<26, "F26">, DwarfRegNum<[58]>; 
+def F27 : Rf<27, "F27">, DwarfRegNum<[59]>;
+def F28 : Rf<28, "F28">, DwarfRegNum<[60]>;
+def F29 : Rf<29, "F29">, DwarfRegNum<[61]>; 
+def F30 : Rf<30, "F30">, DwarfRegNum<[62]>;
+def F31 : Rf<31, "F31">, DwarfRegNum<[63]>;
+
+// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+def D0  : Rd< 0,  "F0", [F0,   F1]>, DwarfRegNum<[32]>;
+def D1  : Rd< 2,  "F2", [F2,   F3]>, DwarfRegNum<[34]>; 
+def D2  : Rd< 4,  "F4", [F4,   F5]>, DwarfRegNum<[36]>;
+def D3  : Rd< 6,  "F6", [F6,   F7]>, DwarfRegNum<[38]>; 
+def D4  : Rd< 8,  "F8", [F8,   F9]>, DwarfRegNum<[40]>;
+def D5  : Rd<10, "F10", [F10, F11]>, DwarfRegNum<[42]>;
+def D6  : Rd<12, "F12", [F12, F13]>, DwarfRegNum<[44]>;
+def D7  : Rd<14, "F14", [F14, F15]>, DwarfRegNum<[46]>; 
+def D8  : Rd<16, "F16", [F16, F17]>, DwarfRegNum<[48]>;
+def D9  : Rd<18, "F18", [F18, F19]>, DwarfRegNum<[50]>; 
+def D10 : Rd<20, "F20", [F20, F21]>, DwarfRegNum<[52]>;
+def D11 : Rd<22, "F22", [F22, F23]>, DwarfRegNum<[54]>;
+def D12 : Rd<24, "F24", [F24, F25]>, DwarfRegNum<[56]>;
+def D13 : Rd<26, "F26", [F26, F27]>, DwarfRegNum<[58]>; 
+def D14 : Rd<28, "F28", [F28, F29]>, DwarfRegNum<[60]>;
+def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[62]>;
+
+// Register classes.
+//
+// FIXME: the register order should be defined in terms of the preferred
+// allocation order...
+//
+def IntRegs : RegisterClass<"SP", [i32], 32, [L0, L1, L2, L3, L4, L5, L6, L7,
+                                     I0, I1, I2, I3, I4, I5,
+                                     O0, O1, O2, O3, O4, O5, O7,
+
+   // FIXME: G1 reserved for now for large imm generation by frame code.
+                                     G1,
+                                     // Non-allocatable regs:
+                                     G2, G3, G4, // FIXME: OK for use only in
+                                                 // applications, not libraries.
+                                     O6, // stack ptr
+                                     I6, // frame ptr
+                                     I7, // return address
+                                     G0, // constant zero
+                                     G5, G6, G7 // reserved for kernel
+                                     ]> {
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    IntRegsClass::iterator
+    IntRegsClass::allocation_order_end(const MachineFunction &MF) const {
+      // FIXME: These special regs should be taken out of the regclass!
+      return end()-10  // Don't allocate special registers
+         -1;  // FIXME: G1 reserved for large imm generation by frame code.
+    }
+  }];
+}
+
+def FPRegs : RegisterClass<"SP", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7, F8,
+  F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22,
+  F23, F24, F25, F26, F27, F28, F29, F30, F31]>;
+
+def DFPRegs : RegisterClass<"SP", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7,
+  D8, D9, D10, D11, D12, D13, D14, D15]>;
diff --git a/final/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/final/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
new file mode 100644
index 00000000000..190c575a52d
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- SparcSelectionDAGInfo.cpp - Sparc SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SparcSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sparc-selectiondag-info"
+#include "SparcTargetMachine.h"
+using namespace llvm;
+
+SparcSelectionDAGInfo::SparcSelectionDAGInfo(const SparcTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+SparcSelectionDAGInfo::~SparcSelectionDAGInfo() {
+}
diff --git a/final/lib/Target/Sparc/SparcSelectionDAGInfo.h b/final/lib/Target/Sparc/SparcSelectionDAGInfo.h
new file mode 100644
index 00000000000..dcd42037253
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- SparcSelectionDAGInfo.h - Sparc SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Sparc subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCSELECTIONDAGINFO_H
+#define SPARCSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SparcTargetMachine;
+
+class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit SparcSelectionDAGInfo(const SparcTargetMachine &TM);
+  ~SparcSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/Sparc/SparcSubtarget.cpp b/final/lib/Target/Sparc/SparcSubtarget.cpp
new file mode 100644
index 00000000000..ce11af1fa84
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcSubtarget.cpp
@@ -0,0 +1,34 @@
+//===- SparcSubtarget.cpp - SPARC Subtarget Information -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SPARC specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcSubtarget.h"
+#include "SparcGenSubtarget.inc"
+using namespace llvm;
+
+SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &FS, 
+                               bool is64Bit) :
+  IsV9(false),
+  V8DeprecatedInsts(false),
+  IsVIS(false),
+  Is64Bit(is64Bit) {
+  
+  // Determine default and user specified characteristics
+  const char *CPU = "v8";
+  if (is64Bit) {
+    CPU = "v9";
+    IsV9 = true;
+  }
+
+  // Parse features string.
+  ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/final/lib/Target/Sparc/SparcSubtarget.h b/final/lib/Target/Sparc/SparcSubtarget.h
new file mode 100644
index 00000000000..cec0ab422bc
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcSubtarget.h
@@ -0,0 +1,54 @@
+//=====-- SparcSubtarget.h - Define Subtarget for the SPARC ----*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SPARC specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARC_SUBTARGET_H
+#define SPARC_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include <string>
+
+namespace llvm {
+
+class SparcSubtarget : public TargetSubtarget {
+  bool IsV9;
+  bool V8DeprecatedInsts;
+  bool IsVIS;
+  bool Is64Bit;
+  
+public:
+  SparcSubtarget(const std::string &TT, const std::string &FS, bool is64bit);
+
+  bool isV9() const { return IsV9; }
+  bool isVIS() const { return IsVIS; }
+  bool useDeprecatedV8Instructions() const { return V8DeprecatedInsts; }
+  
+  /// ParseSubtargetFeatures - Parses features string setting specified 
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  std::string ParseSubtargetFeatures(const std::string &FS,
+                                     const std::string &CPU);
+  
+  bool is64Bit() const { return Is64Bit; }
+  std::string getDataLayout() const {
+    const char *p;
+    if (is64Bit()) {
+      p = "E-p:64:64:64-i64:64:64-f64:64:64-f128:128:128-n32:64";
+    } else {
+      p = "E-p:32:32:32-i64:64:64-f64:64:64-f128:64:64-n32";
+    }
+    return std::string(p);
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/Sparc/SparcTargetMachine.cpp b/final/lib/Target/Sparc/SparcTargetMachine.cpp
new file mode 100644
index 00000000000..b84eab568d2
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -0,0 +1,67 @@
+//===-- SparcTargetMachine.cpp - Define TargetMachine for Sparc -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "SparcMCAsmInfo.h"
+#include "SparcTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeSparcTarget() {
+  // Register the target.
+  RegisterTargetMachine<SparcV8TargetMachine> X(TheSparcTarget);
+  RegisterTargetMachine<SparcV9TargetMachine> Y(TheSparcV9Target);
+
+  RegisterAsmInfo<SparcELFMCAsmInfo> A(TheSparcTarget);
+  RegisterAsmInfo<SparcELFMCAsmInfo> B(TheSparcV9Target);
+
+}
+
+/// SparcTargetMachine ctor - Create an ILP32 architecture model
+///
+SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, 
+                                       const std::string &FS, bool is64bit)
+  : LLVMTargetMachine(T, TT),
+    Subtarget(TT, FS, is64bit),
+    DataLayout(Subtarget.getDataLayout()),
+    TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
+    FrameLowering(Subtarget) {
+}
+
+bool SparcTargetMachine::addInstSelector(PassManagerBase &PM,
+                                         CodeGenOpt::Level OptLevel) {
+  PM.add(createSparcISelDag(*this));
+  return false;
+}
+
+/// addPreEmitPass - This pass may be implemented by targets that want to run
+/// passes immediately before machine code is emitted.  This should return
+/// true if -print-machineinstrs should print out the code after the passes.
+bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                        CodeGenOpt::Level OptLevel){
+  PM.add(createSparcFPMoverPass(*this));
+  PM.add(createSparcDelaySlotFillerPass(*this));
+  return true;
+}
+
+SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
+                                           const std::string &TT, 
+                                           const std::string &FS)
+  : SparcTargetMachine(T, TT, FS, false) {
+}
+
+SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, 
+                                           const std::string &TT, 
+                                           const std::string &FS)
+  : SparcTargetMachine(T, TT, FS, true) {
+}
diff --git a/final/lib/Target/Sparc/SparcTargetMachine.h b/final/lib/Target/Sparc/SparcTargetMachine.h
new file mode 100644
index 00000000000..c4bb6bd776d
--- /dev/null
+++ b/final/lib/Target/Sparc/SparcTargetMachine.h
@@ -0,0 +1,78 @@
+//===-- SparcTargetMachine.h - Define TargetMachine for Sparc ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Sparc specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCTARGETMACHINE_H
+#define SPARCTARGETMACHINE_H
+
+#include "SparcInstrInfo.h"
+#include "SparcISelLowering.h"
+#include "SparcFrameLowering.h"
+#include "SparcSelectionDAGInfo.h"
+#include "SparcSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+
+class SparcTargetMachine : public LLVMTargetMachine {
+  SparcSubtarget Subtarget;
+  const TargetData DataLayout;       // Calculates type size & alignment
+  SparcTargetLowering TLInfo;
+  SparcSelectionDAGInfo TSInfo;
+  SparcInstrInfo InstrInfo;
+  SparcFrameLowering FrameLowering;
+public:
+  SparcTargetMachine(const Target &T, const std::string &TT,
+                     const std::string &FS, bool is64bit);
+
+  virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
+  virtual const TargetFrameLowering  *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual const SparcSubtarget   *getSubtargetImpl() const{ return &Subtarget; }
+  virtual const SparcRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+  virtual const SparcTargetLowering* getTargetLowering() const {
+    return &TLInfo;
+  }
+  virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+
+  // Pass Pipeline Configuration
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+};
+
+/// SparcV8TargetMachine - Sparc 32-bit target machine
+///
+class SparcV8TargetMachine : public SparcTargetMachine {
+public:
+  SparcV8TargetMachine(const Target &T, const std::string &TT,
+                       const std::string &FS);
+};
+
+/// SparcV9TargetMachine - Sparc 64-bit target machine
+///
+class SparcV9TargetMachine : public SparcTargetMachine {
+public:
+  SparcV9TargetMachine(const Target &T, const std::string &TT,
+                       const std::string &FS);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/Sparc/TargetInfo/CMakeLists.txt b/final/lib/Target/Sparc/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..870b56a6ea1
--- /dev/null
+++ b/final/lib/Target/Sparc/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMSparcInfo
+  SparcTargetInfo.cpp
+  )
+
+add_dependencies(LLVMSparcInfo SparcCodeGenTable_gen)
diff --git a/final/lib/Target/Sparc/TargetInfo/Makefile b/final/lib/Target/Sparc/TargetInfo/Makefile
new file mode 100644
index 00000000000..641ed87160c
--- /dev/null
+++ b/final/lib/Target/Sparc/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/Sparc/TargetInfo/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSparcInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/final/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
new file mode 100644
index 00000000000..5c06f0727e9
--- /dev/null
+++ b/final/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -0,0 +1,21 @@
+//===-- SparcTargetInfo.cpp - Sparc Target Implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Sparc.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheSparcTarget;
+Target llvm::TheSparcV9Target;
+
+extern "C" void LLVMInitializeSparcTargetInfo() { 
+  RegisterTarget<Triple::sparc> X(TheSparcTarget, "sparc", "Sparc");
+  RegisterTarget<Triple::sparcv9> Y(TheSparcV9Target, "sparcv9", "Sparc V9");
+}
diff --git a/final/lib/Target/SubtargetFeature.cpp b/final/lib/Target/SubtargetFeature.cpp
new file mode 100644
index 00000000000..3cf95b57c5d
--- /dev/null
+++ b/final/lib/Target/SubtargetFeature.cpp
@@ -0,0 +1,384 @@
+//===- SubtargetFeature.cpp - CPU characteristics Implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SubtargetFeature interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/SubtargetFeature.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstdlib>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                          Static Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// hasFlag - Determine if a feature has a flag; '+' or '-'
+///
+static inline bool hasFlag(const std::string &Feature) {
+  assert(!Feature.empty() && "Empty string");
+  // Get first character
+  char Ch = Feature[0];
+  // Check if first character is '+' or '-' flag
+  return Ch == '+' || Ch =='-';
+}
+
+/// StripFlag - Return string stripped of flag.
+///
+static inline std::string StripFlag(const std::string &Feature) {
+  return hasFlag(Feature) ? Feature.substr(1) : Feature;
+}
+
+/// isEnabled - Return true if enable flag; '+'.
+///
+static inline bool isEnabled(const std::string &Feature) {
+  assert(!Feature.empty() && "Empty string");
+  // Get first character
+  char Ch = Feature[0];
+  // Check if first character is '+' for enabled
+  return Ch == '+';
+}
+
+/// PrependFlag - Return a string with a prepended flag; '+' or '-'.
+///
+static inline std::string PrependFlag(const std::string &Feature,
+                                      bool IsEnabled) {
+  assert(!Feature.empty() && "Empty string");
+  if (hasFlag(Feature)) return Feature;
+  return std::string(IsEnabled ? "+" : "-") + Feature;
+}
+
+/// Split - Splits a string of comma separated items in to a vector of strings.
+///
+static void Split(std::vector<std::string> &V, const std::string &S) {
+  // Start at beginning of string.
+  size_t Pos = 0;
+  while (true) {
+    // Find the next comma
+    size_t Comma = S.find(',', Pos);
+    // If no comma found then the rest of the string is used
+    if (Comma == std::string::npos) {
+      // Add string to vector
+      V.push_back(S.substr(Pos));
+      break;
+    }
+    // Otherwise add substring to vector
+    V.push_back(S.substr(Pos, Comma - Pos));
+    // Advance to next item
+    Pos = Comma + 1;
+  }
+}
+
+/// Join a vector of strings to a string with a comma separating each element.
+///
+static std::string Join(const std::vector<std::string> &V) {
+  // Start with empty string.
+  std::string Result;
+  // If the vector is not empty 
+  if (!V.empty()) {
+    // Start with the CPU feature
+    Result = V[0];
+    // For each successive feature
+    for (size_t i = 1; i < V.size(); i++) {
+      // Add a comma
+      Result += ",";
+      // Add the feature
+      Result += V[i];
+    }
+  }
+  // Return the features string 
+  return Result;
+}
+
+/// Adding features.
+void SubtargetFeatures::AddFeature(const std::string &String,
+                                   bool IsEnabled) {
+  // Don't add empty features
+  if (!String.empty()) {
+    // Convert to lowercase, prepend flag and add to vector
+    Features.push_back(PrependFlag(LowercaseString(String), IsEnabled));
+  }
+}
+
+/// Find KV in array using binary search.
+template<typename T> const T *Find(const std::string &S, const T *A, size_t L) {
+  // Make the lower bound element we're looking for
+  T KV;
+  KV.Key = S.c_str();
+  // Determine the end of the array
+  const T *Hi = A + L;
+  // Binary search the array
+  const T *F = std::lower_bound(A, Hi, KV);
+  // If not found then return NULL
+  if (F == Hi || std::string(F->Key) != S) return NULL;
+  // Return the found array item
+  return F;
+}
+
+/// getLongestEntryLength - Return the length of the longest entry in the table.
+///
+static size_t getLongestEntryLength(const SubtargetFeatureKV *Table,
+                                    size_t Size) {
+  size_t MaxLen = 0;
+  for (size_t i = 0; i < Size; i++)
+    MaxLen = std::max(MaxLen, std::strlen(Table[i].Key));
+  return MaxLen;
+}
+
+/// Display help for feature choices.
+///
+static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize,
+                 const SubtargetFeatureKV *FeatTable, size_t FeatTableSize) {
+  // Determine the length of the longest CPU and Feature entries.
+  unsigned MaxCPULen  = getLongestEntryLength(CPUTable, CPUTableSize);
+  unsigned MaxFeatLen = getLongestEntryLength(FeatTable, FeatTableSize);
+
+  // Print the CPU table.
+  errs() << "Available CPUs for this target:\n\n";
+  for (size_t i = 0; i != CPUTableSize; i++)
+    errs() << "  " << CPUTable[i].Key
+         << std::string(MaxCPULen - std::strlen(CPUTable[i].Key), ' ')
+         << " - " << CPUTable[i].Desc << ".\n";
+  errs() << "\n";
+  
+  // Print the Feature table.
+  errs() << "Available features for this target:\n\n";
+  for (size_t i = 0; i != FeatTableSize; i++)
+    errs() << "  " << FeatTable[i].Key
+         << std::string(MaxFeatLen - std::strlen(FeatTable[i].Key), ' ')
+         << " - " << FeatTable[i].Desc << ".\n";
+  errs() << "\n";
+  
+  errs() << "Use +feature to enable a feature, or -feature to disable it.\n"
+       << "For example, llc -mcpu=mycpu -mattr=+feature1,-feature2\n";
+  std::exit(1);
+}
+
+//===----------------------------------------------------------------------===//
+//                    SubtargetFeatures Implementation
+//===----------------------------------------------------------------------===//
+
+SubtargetFeatures::SubtargetFeatures(const std::string &Initial) {
+  // Break up string into separate features
+  Split(Features, Initial);
+}
+
+
+std::string SubtargetFeatures::getString() const {
+  return Join(Features);
+}
+void SubtargetFeatures::setString(const std::string &Initial) {
+  // Throw out old features
+  Features.clear();
+  // Break up string into separate features
+  Split(Features, LowercaseString(Initial));
+}
+
+
+/// setCPU - Set the CPU string.  Replaces previous setting.  Setting to ""
+/// clears CPU.
+void SubtargetFeatures::setCPU(const std::string &String) {
+  Features[0] = LowercaseString(String);
+}
+
+
+/// setCPUIfNone - Setting CPU string only if no string is set.
+///
+void SubtargetFeatures::setCPUIfNone(const std::string &String) {
+  if (Features[0].empty()) setCPU(String);
+}
+
+/// getCPU - Returns current CPU.
+///
+const std::string & SubtargetFeatures::getCPU() const {
+  return Features[0];
+}
+
+
+/// SetImpliedBits - For each feature that is (transitively) implied by this
+/// feature, set it.
+///
+static
+void SetImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+                    const SubtargetFeatureKV *FeatureTable,
+                    size_t FeatureTableSize) {
+  for (size_t i = 0; i < FeatureTableSize; ++i) {
+    const SubtargetFeatureKV &FE = FeatureTable[i];
+
+    if (FeatureEntry->Value == FE.Value) continue;
+
+    if (FeatureEntry->Implies & FE.Value) {
+      Bits |= FE.Value;
+      SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+    }
+  }
+}
+
+/// ClearImpliedBits - For each feature that (transitively) implies this
+/// feature, clear it.
+/// 
+static
+void ClearImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry,
+                      const SubtargetFeatureKV *FeatureTable,
+                      size_t FeatureTableSize) {
+  for (size_t i = 0; i < FeatureTableSize; ++i) {
+    const SubtargetFeatureKV &FE = FeatureTable[i];
+
+    if (FeatureEntry->Value == FE.Value) continue;
+
+    if (FE.Implies & FeatureEntry->Value) {
+      Bits &= ~FE.Value;
+      ClearImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+    }
+  }
+}
+
+/// getBits - Get feature bits.
+///
+uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
+                                          size_t CPUTableSize,
+                                    const SubtargetFeatureKV *FeatureTable,
+                                          size_t FeatureTableSize) {
+  assert(CPUTable && "missing CPU table");
+  assert(FeatureTable && "missing features table");
+#ifndef NDEBUG
+  for (size_t i = 1; i < CPUTableSize; i++) {
+    assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 &&
+           "CPU table is not sorted");
+  }
+  for (size_t i = 1; i < FeatureTableSize; i++) {
+    assert(strcmp(FeatureTable[i - 1].Key, FeatureTable[i].Key) < 0 &&
+          "CPU features table is not sorted");
+  }
+#endif
+  uint32_t Bits = 0;                    // Resulting bits
+
+  // Check if help is needed
+  if (Features[0] == "help")
+    Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
+  
+  // Find CPU entry
+  const SubtargetFeatureKV *CPUEntry =
+                            Find(Features[0], CPUTable, CPUTableSize);
+  // If there is a match
+  if (CPUEntry) {
+    // Set base feature bits
+    Bits = CPUEntry->Value;
+
+    // Set the feature implied by this CPU feature, if any.
+    for (size_t i = 0; i < FeatureTableSize; ++i) {
+      const SubtargetFeatureKV &FE = FeatureTable[i];
+      if (CPUEntry->Value & FE.Value)
+        SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+    }
+  } else {
+    errs() << "'" << Features[0]
+           << "' is not a recognized processor for this target"
+           << " (ignoring processor)\n";
+  }
+  // Iterate through each feature
+  for (size_t i = 1; i < Features.size(); i++) {
+    const std::string &Feature = Features[i];
+    
+    // Check for help
+    if (Feature == "+help")
+      Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
+    
+    // Find feature in table.
+    const SubtargetFeatureKV *FeatureEntry =
+                       Find(StripFlag(Feature), FeatureTable, FeatureTableSize);
+    // If there is a match
+    if (FeatureEntry) {
+      // Enable/disable feature in bits
+      if (isEnabled(Feature)) {
+        Bits |=  FeatureEntry->Value;
+
+        // For each feature that this implies, set it.
+        SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+      } else {
+        Bits &= ~FeatureEntry->Value;
+
+        // For each feature that implies this, clear it.
+        ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+      }
+    } else {
+      errs() << "'" << Feature
+             << "' is not a recognized feature for this target"
+             << " (ignoring feature)\n";
+    }
+  }
+
+  return Bits;
+}
+
+/// Get info pointer
+void *SubtargetFeatures::getInfo(const SubtargetInfoKV *Table,
+                                       size_t TableSize) {
+  assert(Table && "missing table");
+#ifndef NDEBUG
+  for (size_t i = 1; i < TableSize; i++) {
+    assert(strcmp(Table[i - 1].Key, Table[i].Key) < 0 && "Table is not sorted");
+  }
+#endif
+
+  // Find entry
+  const SubtargetInfoKV *Entry = Find(Features[0], Table, TableSize);
+  
+  if (Entry) {
+    return Entry->Value;
+  } else {
+    errs() << "'" << Features[0]
+           << "' is not a recognized processor for this target"
+           << " (ignoring processor)\n";
+    return NULL;
+  }
+}
+
+/// print - Print feature string.
+///
+void SubtargetFeatures::print(raw_ostream &OS) const {
+  for (size_t i = 0, e = Features.size(); i != e; ++i)
+    OS << Features[i] << "  ";
+  OS << "\n";
+}
+
+/// dump - Dump feature info.
+///
+void SubtargetFeatures::dump() const {
+  print(dbgs());
+}
+
+/// getDefaultSubtargetFeatures - Return a string listing the features
+/// associated with the target triple.
+///
+/// FIXME: This is an inelegant way of specifying the features of a
+/// subtarget. It would be better if we could encode this information
+/// into the IR. See <rdar://5972456>.
+///
+void SubtargetFeatures::getDefaultSubtargetFeatures(const std::string &CPU,
+                                                    const Triple& Triple) {
+  setCPU(CPU);
+
+  if (Triple.getVendor() == Triple::Apple) {
+    if (Triple.getArch() == Triple::ppc) {
+      // powerpc-apple-*
+      AddFeature("altivec");
+    } else if (Triple.getArch() == Triple::ppc64) {
+      // powerpc64-apple-*
+      AddFeature("64bit");
+      AddFeature("altivec");
+    }
+  }
+}
diff --git a/final/lib/Target/SystemZ/CMakeLists.txt b/final/lib/Target/SystemZ/CMakeLists.txt
new file mode 100644
index 00000000000..1f5d3552ae7
--- /dev/null
+++ b/final/lib/Target/SystemZ/CMakeLists.txt
@@ -0,0 +1,26 @@
+set(LLVM_TARGET_DEFINITIONS SystemZ.td)
+
+tablegen(SystemZGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(SystemZGenRegisterNames.inc -gen-register-enums)
+tablegen(SystemZGenRegisterInfo.inc -gen-register-desc)
+tablegen(SystemZGenInstrNames.inc -gen-instr-enums)
+tablegen(SystemZGenInstrInfo.inc -gen-instr-desc)
+tablegen(SystemZGenAsmWriter.inc -gen-asm-writer)
+tablegen(SystemZGenDAGISel.inc -gen-dag-isel)
+tablegen(SystemZGenCallingConv.inc -gen-callingconv)
+tablegen(SystemZGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(SystemZCodeGen
+  SystemZAsmPrinter.cpp
+  SystemZISelDAGToDAG.cpp
+  SystemZISelLowering.cpp
+  SystemZInstrInfo.cpp
+  SystemZFrameLowering.cpp
+  SystemZMCAsmInfo.cpp
+  SystemZRegisterInfo.cpp
+  SystemZSubtarget.cpp
+  SystemZTargetMachine.cpp
+  SystemZSelectionDAGInfo.cpp
+  )
+
+add_subdirectory(TargetInfo)
diff --git a/final/lib/Target/SystemZ/Makefile b/final/lib/Target/SystemZ/Makefile
new file mode 100644
index 00000000000..6930e14c061
--- /dev/null
+++ b/final/lib/Target/SystemZ/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/SystemZ/Makefile ---------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMSystemZCodeGen
+TARGET = SystemZ
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = SystemZGenRegisterInfo.h.inc SystemZGenRegisterNames.inc \
+                SystemZGenRegisterInfo.inc SystemZGenInstrNames.inc \
+                SystemZGenInstrInfo.inc SystemZGenAsmWriter.inc \
+                SystemZGenDAGISel.inc SystemZGenSubtarget.inc SystemZGenCallingConv.inc
+
+DIRS = TargetInfo
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Target/SystemZ/SystemZ.h b/final/lib/Target/SystemZ/SystemZ.h
new file mode 100644
index 00000000000..ea5240a10c9
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZ.h
@@ -0,0 +1,61 @@
+//=-- SystemZ.h - Top-level interface for SystemZ representation -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM SystemZ backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_H
+#define LLVM_TARGET_SystemZ_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class SystemZTargetMachine;
+  class FunctionPass;
+  class formatted_raw_ostream;
+
+  namespace SystemZCC {
+    // SystemZ specific condition code. These correspond to SYSTEMZ_*_COND in
+    // SystemZInstrInfo.td. They must be kept in synch.
+    enum CondCodes {
+      O   = 0,
+      H   = 1,
+      NLE = 2,
+      L   = 3,
+      NHE = 4,
+      LH  = 5,
+      NE  = 6,
+      E   = 7,
+      NLH = 8,
+      HE  = 9,
+      NL  = 10,
+      LE  = 11,
+      NH  = 12,
+      NO  = 13,
+      INVALID = -1
+    };
+  }
+
+  FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
+                                    CodeGenOpt::Level OptLevel);
+
+  extern Target TheSystemZTarget;
+
+} // end namespace llvm;
+
+// Defines symbolic names for SystemZ registers.
+// This defines a mapping from register name to register number.
+#include "SystemZGenRegisterNames.inc"
+
+// Defines symbolic names for the SystemZ instructions.
+#include "SystemZGenInstrNames.inc"
+
+#endif
diff --git a/final/lib/Target/SystemZ/SystemZ.td b/final/lib/Target/SystemZ/SystemZ.td
new file mode 100644
index 00000000000..4c08c087225
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZ.td
@@ -0,0 +1,61 @@
+//===- SystemZ.td - Describe the SystemZ Target Machine ------*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the SystemZ target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Subtarget Features. 
+//===----------------------------------------------------------------------===//
+def FeatureZ10 : SubtargetFeature<"z10", "HasZ10Insts", "true",
+                                  "Support Z10 instructions">;
+
+//===----------------------------------------------------------------------===//
+// SystemZ supported processors.
+//===----------------------------------------------------------------------===//
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"z9",  []>;
+def : Proc<"z10", [FeatureZ10]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "SystemZRegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Description
+//===----------------------------------------------------------------------===//
+
+include "SystemZCallingConv.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "SystemZInstrInfo.td"
+include "SystemZInstrFP.td"
+
+def SystemZInstrInfo : InstrInfo {} 
+
+//===----------------------------------------------------------------------===//
+// Target Declaration
+//===----------------------------------------------------------------------===//
+
+def SystemZ : Target {
+  let InstructionSet = SystemZInstrInfo;
+}
+
diff --git a/final/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/final/lib/Target/SystemZ/SystemZAsmPrinter.cpp
new file mode 100644
index 00000000000..fd4d8b70c75
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -0,0 +1,223 @@
+//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly writer ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the SystemZ assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  class SystemZAsmPrinter : public AsmPrinter {
+  public:
+    SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer) {}
+
+    virtual const char *getPassName() const {
+      return "SystemZ Assembly Printer";
+    }
+
+    void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+                      const char* Modifier = 0);
+    void printPCRelImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
+    void printRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+                            const char* Modifier = 0);
+    void printRRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O,
+                             const char* Modifier = 0);
+    void printS16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+      O << (int16_t)MI->getOperand(OpNum).getImm();
+    }
+    void printU16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+      O << (uint16_t)MI->getOperand(OpNum).getImm();
+    }
+    void printS32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+      O << (int32_t)MI->getOperand(OpNum).getImm();
+    }
+    void printU32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) {
+      O << (uint32_t)MI->getOperand(OpNum).getImm();
+    }
+
+    void printInstruction(const MachineInstr *MI, raw_ostream &O);
+    static const char *getRegisterName(unsigned RegNo);
+
+    void EmitInstruction(const MachineInstr *MI);
+  };
+} // end of anonymous namespace
+
+#include "SystemZGenAsmWriter.inc"
+
+void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  SmallString<128> Str;
+  raw_svector_ostream OS(Str);
+  printInstruction(MI, OS);
+  OutStreamer.EmitRawText(OS.str());
+}
+
+void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum,
+                                             raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+    O << *Mang->getSymbol(GV);
+
+    // Assemble calls via PLT for externally visible symbols if PIC.
+    if (TM.getRelocationModel() == Reloc::PIC_ &&
+        !GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
+        !GV->hasLocalLinkage())
+      O << "@PLT";
+
+    printOffset(MO.getOffset(), O);
+    return;
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    std::string Name(MAI->getGlobalPrefix());
+    Name += MO.getSymbolName();
+    O << Name;
+
+    if (TM.getRelocationModel() == Reloc::PIC_)
+      O << "@PLT";
+
+    return;
+  }
+  default:
+    assert(0 && "Not implemented yet!");
+  }
+}
+
+
+void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
+                                     raw_ostream &O, const char *Modifier) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register: {
+    assert (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
+            "Virtual registers should be already mapped!");
+    unsigned Reg = MO.getReg();
+    if (Modifier && strncmp(Modifier, "subreg", 6) == 0) {
+      if (strncmp(Modifier + 7, "even", 4) == 0)
+        Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_32bit);
+      else if (strncmp(Modifier + 7, "odd", 3) == 0)
+        Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32);
+      else
+        assert(0 && "Invalid subreg modifier");
+    }
+
+    O << '%' << getRegisterName(Reg);
+    return;
+  }
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
+      << MO.getIndex();
+
+    return;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
+      << MO.getIndex();
+
+    printOffset(MO.getOffset(), O);
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    break;
+  case MachineOperand::MO_ExternalSymbol: {
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    break;
+  }
+  default:
+    assert(0 && "Not implemented yet!");
+  }
+
+  switch (MO.getTargetFlags()) {
+  default: assert(0 && "Unknown target flag on GV operand");
+  case SystemZII::MO_NO_FLAG:
+    break;
+  case SystemZII::MO_GOTENT:    O << "@GOTENT";    break;
+  case SystemZII::MO_PLT:       O << "@PLT";       break;
+  }
+
+  printOffset(MO.getOffset(), O);
+}
+
+void SystemZAsmPrinter::printRIAddrOperand(const MachineInstr *MI, int OpNum,
+                                           raw_ostream &O,
+                                           const char *Modifier) {
+  const MachineOperand &Base = MI->getOperand(OpNum);
+
+  // Print displacement operand.
+  printOperand(MI, OpNum+1, O);
+
+  // Print base operand (if any)
+  if (Base.getReg()) {
+    O << '(';
+    printOperand(MI, OpNum, O);
+    O << ')';
+  }
+}
+
+void SystemZAsmPrinter::printRRIAddrOperand(const MachineInstr *MI, int OpNum,
+                                            raw_ostream &O,
+                                            const char *Modifier) {
+  const MachineOperand &Base = MI->getOperand(OpNum);
+  const MachineOperand &Index = MI->getOperand(OpNum+2);
+
+  // Print displacement operand.
+  printOperand(MI, OpNum+1, O);
+
+  // Print base operand (if any)
+  if (Base.getReg()) {
+    O << '(';
+    printOperand(MI, OpNum, O);
+    if (Index.getReg()) {
+      O << ',';
+      printOperand(MI, OpNum+2, O);
+    }
+    O << ')';
+  } else
+    assert(!Index.getReg() && "Should allocate base register first!");
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeSystemZAsmPrinter() {
+  RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
+}
diff --git a/final/lib/Target/SystemZ/SystemZCallingConv.td b/final/lib/Target/SystemZ/SystemZCallingConv.td
new file mode 100644
index 00000000000..c799a9e501a
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZCallingConv.td
@@ -0,0 +1,46 @@
+//=- SystemZCallingConv.td - Calling Conventions for SystemZ -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for SystemZ architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SystemZ Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_SystemZ : CallingConv<[
+  // Promote i8/i16/i32 arguments to i64.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // i64 is returned in register R2
+  CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>,
+
+  // f32 / f64 are returned in F0
+  CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+  CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// SystemZ Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_SystemZ : CallingConv<[
+  // Promote i8/i16/i32 arguments to i64.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // The first 5 integer arguments of non-varargs functions are passed in
+  // integer registers.
+  CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>,
+
+  // The first 4 floating point arguments of non-varargs functions are passed
+  // in FP registers.
+  CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
+  CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>>,
+
+  // Integer values get stored in stack slots that are 8 bytes in
+  // size and 8-byte aligned.
+  CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>>
+]>;
diff --git a/final/lib/Target/SystemZ/SystemZFrameLowering.cpp b/final/lib/Target/SystemZ/SystemZFrameLowering.cpp
new file mode 100644
index 00000000000..2ad84a2d052
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -0,0 +1,386 @@
+//=====- SystemZFrameLowering.cpp - SystemZ Frame Information ------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZFrameLowering.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+
+using namespace llvm;
+
+SystemZFrameLowering::SystemZFrameLowering(const SystemZSubtarget &sti)
+  : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, -160), STI(sti) {
+  // Fill the spill offsets map
+  static const unsigned SpillOffsTab[][2] = {
+    { SystemZ::R2D,  0x10 },
+    { SystemZ::R3D,  0x18 },
+    { SystemZ::R4D,  0x20 },
+    { SystemZ::R5D,  0x28 },
+    { SystemZ::R6D,  0x30 },
+    { SystemZ::R7D,  0x38 },
+    { SystemZ::R8D,  0x40 },
+    { SystemZ::R9D,  0x48 },
+    { SystemZ::R10D, 0x50 },
+    { SystemZ::R11D, 0x58 },
+    { SystemZ::R12D, 0x60 },
+    { SystemZ::R13D, 0x68 },
+    { SystemZ::R14D, 0x70 },
+    { SystemZ::R15D, 0x78 }
+  };
+
+  RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
+
+  for (unsigned i = 0, e = array_lengthof(SpillOffsTab); i != e; ++i)
+    RegSpillOffsets[SpillOffsTab[i][0]] = SpillOffsTab[i][1];
+}
+
+/// needsFP - Return true if the specified function should have a dedicated
+/// frame pointer register.  This is true if the function has variable sized
+/// allocas or if frame pointer elimination is disabled.
+bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects();
+}
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+                  int64_t NumBytes, const TargetInstrInfo &TII) {
+  unsigned Opc; uint64_t Chunk;
+  bool isSub = NumBytes < 0;
+  uint64_t Offset = isSub ? -NumBytes : NumBytes;
+
+  if (Offset >= (1LL << 15) - 1) {
+    Opc = SystemZ::ADD64ri32;
+    Chunk = (1LL << 31) - 1;
+  } else {
+    Opc = SystemZ::ADD64ri16;
+    Chunk = (1LL << 15) - 1;
+  }
+
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  while (Offset) {
+    uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+    MachineInstr *MI =
+      BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D)
+      .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal);
+    // The PSW implicit def is dead.
+    MI->getOperand(3).setIsDead();
+    Offset -= ThisVal;
+  }
+}
+
+void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const SystemZInstrInfo &TII =
+    *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+  SystemZMachineFunctionInfo *SystemZMFI =
+    MF.getInfo<SystemZMachineFunctionInfo>();
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  // Note that area for callee-saved stuff is already allocated, thus we need to
+  // 'undo' the stack movement.
+  uint64_t StackSize = MFI->getStackSize();
+  StackSize -= SystemZMFI->getCalleeSavedFrameSize();
+
+  uint64_t NumBytes = StackSize - getOffsetOfLocalArea();
+
+  // Skip the callee-saved push instructions.
+  while (MBBI != MBB.end() &&
+         (MBBI->getOpcode() == SystemZ::MOV64mr ||
+          MBBI->getOpcode() == SystemZ::MOV64mrm))
+    ++MBBI;
+
+  if (MBBI != MBB.end())
+    DL = MBBI->getDebugLoc();
+
+  // adjust stack pointer: R15 -= numbytes
+  if (StackSize || MFI->hasCalls()) {
+    assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
+           "Invalid stack frame calculation!");
+    emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII);
+  }
+
+  if (hasFP(MF)) {
+    // Update R11 with the new base value...
+    BuildMI(MBB, MBBI, DL, TII.get(SystemZ::MOV64rr), SystemZ::R11D)
+      .addReg(SystemZ::R15D);
+
+    // Mark the FramePtr as live-in in every block except the entry.
+    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+         I != E; ++I)
+      I->addLiveIn(SystemZ::R11D);
+
+  }
+}
+
+void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const SystemZInstrInfo &TII =
+    *static_cast<const SystemZInstrInfo*>(MF.getTarget().getInstrInfo());
+  SystemZMachineFunctionInfo *SystemZMFI =
+    MF.getInfo<SystemZMachineFunctionInfo>();
+  unsigned RetOpcode = MBBI->getOpcode();
+
+  switch (RetOpcode) {
+  case SystemZ::RET: break;  // These are ok
+  default:
+    assert(0 && "Can only insert epilog into returning blocks");
+  }
+
+  // Get the number of bytes to allocate from the FrameInfo
+  // Note that area for callee-saved stuff is already allocated, thus we need to
+  // 'undo' the stack movement.
+  uint64_t StackSize =
+    MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize();
+  uint64_t NumBytes = StackSize - getOffsetOfLocalArea();
+
+  // Skip the final terminator instruction.
+  while (MBBI != MBB.begin()) {
+    MachineBasicBlock::iterator PI = prior(MBBI);
+    --MBBI;
+    if (!PI->getDesc().isTerminator())
+      break;
+  }
+
+  // During callee-saved restores emission stack frame was not yet finialized
+  // (and thus - the stack size was unknown). Tune the offset having full stack
+  // size in hands.
+  if (StackSize || MFI->hasCalls()) {
+    assert((MBBI->getOpcode() == SystemZ::MOV64rmm ||
+            MBBI->getOpcode() == SystemZ::MOV64rm) &&
+           "Expected to see callee-save register restore code");
+    assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) &&
+           "Invalid stack frame calculation!");
+
+    unsigned i = 0;
+    MachineInstr &MI = *MBBI;
+    while (!MI.getOperand(i).isImm()) {
+      ++i;
+      assert(i < MI.getNumOperands() && "Unexpected restore code!");
+    }
+
+    uint64_t Offset = NumBytes + MI.getOperand(i).getImm();
+    // If Offset does not fit into 20-bit signed displacement field we need to
+    // emit some additional code...
+    if (Offset > 524287) {
+      // Fold the displacement into load instruction as much as possible.
+      NumBytes = Offset - 524287;
+      Offset = 524287;
+      emitSPUpdate(MBB, MBBI, NumBytes, TII);
+    }
+
+    MI.getOperand(i).ChangeToImmediate(Offset);
+  }
+}
+
+int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                          int FI) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const SystemZMachineFunctionInfo *SystemZMFI =
+    MF.getInfo<SystemZMachineFunctionInfo>();
+  int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment();
+  uint64_t StackSize = MFI->getStackSize();
+
+  // Fixed objects are really located in the "previous" frame.
+  if (FI < 0)
+    StackSize -= SystemZMFI->getCalleeSavedFrameSize();
+
+  Offset += StackSize - getOffsetOfLocalArea();
+
+  // Skip the register save area if we generated the stack frame.
+  if (StackSize || MFI->hasCalls())
+    Offset -= getOffsetOfLocalArea();
+
+  return Offset;
+}
+
+bool
+SystemZFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                            MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+  unsigned CalleeFrameSize = 0;
+
+  // Scan the callee-saved and find the bounds of register spill area.
+  unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0;
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (!SystemZ::FP64RegClass.contains(Reg)) {
+      unsigned Offset = RegSpillOffsets[Reg];
+      CalleeFrameSize += 8;
+      if (StartOffset > Offset) {
+        LowReg = Reg; StartOffset = Offset;
+      }
+      if (EndOffset < Offset) {
+        HighReg = Reg; EndOffset = RegSpillOffsets[Reg];
+      }
+    }
+  }
+
+  // Save information for epilogue inserter.
+  MFI->setCalleeSavedFrameSize(CalleeFrameSize);
+  MFI->setLowReg(LowReg); MFI->setHighReg(HighReg);
+
+  // Save GPRs
+  if (StartOffset) {
+    // Build a store instruction. Use STORE MULTIPLE instruction if there are many
+    // registers to store, otherwise - just STORE.
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ?
+                                    SystemZ::MOV64mr : SystemZ::MOV64mrm)));
+
+    // Add store operands.
+    MIB.addReg(SystemZ::R15D).addImm(StartOffset);
+    if (LowReg == HighReg)
+      MIB.addReg(0);
+    MIB.addReg(LowReg, RegState::Kill);
+    if (LowReg != HighReg)
+      MIB.addReg(HighReg, RegState::Kill);
+
+    // Do a second scan adding regs as being killed by instruction
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+      // Add the callee-saved register as live-in. It's killed at the spill.
+      MBB.addLiveIn(Reg);
+      if (Reg != LowReg && Reg != HighReg)
+        MIB.addReg(Reg, RegState::ImplicitKill);
+    }
+  }
+
+  // Save FPRs
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (SystemZ::FP64RegClass.contains(Reg)) {
+      MBB.addLiveIn(Reg);
+      TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(),
+                              &SystemZ::FP64RegClass, TRI);
+    }
+  }
+
+  return true;
+}
+
+bool
+SystemZFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                              MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+
+  // Restore FP registers
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (SystemZ::FP64RegClass.contains(Reg))
+      TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+                               &SystemZ::FP64RegClass, TRI);
+  }
+
+  // Restore GP registers
+  unsigned LowReg = MFI->getLowReg(), HighReg = MFI->getHighReg();
+  unsigned StartOffset = RegSpillOffsets[LowReg];
+
+  if (StartOffset) {
+    // Build a load instruction. Use LOAD MULTIPLE instruction if there are many
+    // registers to load, otherwise - just LOAD.
+    MachineInstrBuilder MIB =
+      BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ?
+                                    SystemZ::MOV64rm : SystemZ::MOV64rmm)));
+    // Add store operands.
+    MIB.addReg(LowReg, RegState::Define);
+    if (LowReg != HighReg)
+      MIB.addReg(HighReg, RegState::Define);
+
+    MIB.addReg(hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
+    MIB.addImm(StartOffset);
+    if (LowReg == HighReg)
+      MIB.addReg(0);
+
+    // Do a second scan adding regs as being defined by instruction
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+      if (Reg != LowReg && Reg != HighReg)
+        MIB.addReg(Reg, RegState::ImplicitDefine);
+    }
+  }
+
+  return true;
+}
+
+void
+SystemZFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                       RegScavenger *RS) const {
+  // Determine whether R15/R14 will ever be clobbered inside the function. And
+  // if yes - mark it as 'callee' saved.
+  MachineFrameInfo *FFI = MF.getFrameInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  // Check whether high FPRs are ever used, if yes - we need to save R15 as
+  // well.
+  static const unsigned HighFPRs[] = {
+    SystemZ::F8L,  SystemZ::F9L,  SystemZ::F10L, SystemZ::F11L,
+    SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
+    SystemZ::F8S,  SystemZ::F9S,  SystemZ::F10S, SystemZ::F11S,
+    SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S,
+  };
+
+  bool HighFPRsUsed = false;
+  for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i)
+    HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]);
+
+  if (FFI->hasCalls())
+    /* FIXME: function is varargs */
+    /* FIXME: function grabs RA */
+    /* FIXME: function calls eh_return */
+    MRI.setPhysRegUsed(SystemZ::R14D);
+
+  if (HighFPRsUsed ||
+      FFI->hasCalls() ||
+      FFI->getObjectIndexEnd() != 0 || // Contains automatic variables
+      FFI->hasVarSizedObjects() // Function calls dynamic alloca's
+      /* FIXME: function is varargs */)
+    MRI.setPhysRegUsed(SystemZ::R15D);
+}
diff --git a/final/lib/Target/SystemZ/SystemZFrameLowering.h b/final/lib/Target/SystemZ/SystemZFrameLowering.h
new file mode 100644
index 00000000000..1284b6802b3
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZFrameLowering.h
@@ -0,0 +1,57 @@
+//=- SystemZFrameLowering.h - Define frame lowering for z/System -*- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZ_FRAMEINFO_H
+#define SYSTEMZ_FRAMEINFO_H
+
+#include "SystemZ.h"
+#include "SystemZSubtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/ADT/IndexedMap.h"
+
+namespace llvm {
+  class SystemZSubtarget;
+
+class SystemZFrameLowering : public TargetFrameLowering {
+  IndexedMap<unsigned> RegSpillOffsets;
+protected:
+  const SystemZSubtarget &STI;
+
+public:
+  explicit SystemZFrameLowering(const SystemZSubtarget &sti);
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS) const;
+
+  bool hasReservedCallFrame(const MachineFunction &MF) const { return true; }
+  bool hasFP(const MachineFunction &MF) const;
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/final/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
new file mode 100644
index 00000000000..2186ff1fed5
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -0,0 +1,779 @@
+//==-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the SystemZ target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  /// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's
+  /// instead of register numbers for the leaves of the matched tree.
+  struct SystemZRRIAddressMode {
+    enum {
+      RegBase,
+      FrameIndexBase
+    } BaseType;
+
+    struct {            // This is really a union, discriminated by BaseType!
+      SDValue Reg;
+      int FrameIndex;
+    } Base;
+
+    SDValue IndexReg;
+    int64_t Disp;
+    bool isRI;
+
+    SystemZRRIAddressMode(bool RI = false)
+      : BaseType(RegBase), IndexReg(), Disp(0), isRI(RI) {
+    }
+
+    void dump() {
+      errs() << "SystemZRRIAddressMode " << this << '\n';
+      if (BaseType == RegBase) {
+        errs() << "Base.Reg ";
+        if (Base.Reg.getNode() != 0)
+          Base.Reg.getNode()->dump();
+        else
+          errs() << "nul";
+        errs() << '\n';
+      } else {
+        errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
+      }
+      if (!isRI) {
+        errs() << "IndexReg ";
+        if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
+        else errs() << "nul";
+      }
+      errs() << " Disp " << Disp << '\n';
+    }
+  };
+}
+
+/// SystemZDAGToDAGISel - SystemZ specific code to select SystemZ machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+  class SystemZDAGToDAGISel : public SelectionDAGISel {
+    const SystemZTargetLowering &Lowering;
+    const SystemZSubtarget &Subtarget;
+
+    void getAddressOperandsRI(const SystemZRRIAddressMode &AM,
+                            SDValue &Base, SDValue &Disp);
+    void getAddressOperands(const SystemZRRIAddressMode &AM,
+                            SDValue &Base, SDValue &Disp,
+                            SDValue &Index);
+
+  public:
+    SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(TM, OptLevel),
+        Lowering(*TM.getTargetLowering()),
+        Subtarget(*TM.getSubtargetImpl()) { }
+
+    virtual const char *getPassName() const {
+      return "SystemZ DAG->DAG Pattern Instruction Selection";
+    }
+
+    /// getI8Imm - Return a target constant with the specified value, of type
+    /// i8.
+    inline SDValue getI8Imm(uint64_t Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i8);
+    }
+
+    /// getI16Imm - Return a target constant with the specified value, of type
+    /// i16.
+    inline SDValue getI16Imm(uint64_t Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i16);
+    }
+
+    /// getI32Imm - Return a target constant with the specified value, of type
+    /// i32.
+    inline SDValue getI32Imm(uint64_t Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i32);
+    }
+
+    // Include the pieces autogenerated from the target description.
+    #include "SystemZGenDAGISel.inc"
+
+  private:
+    bool SelectAddrRI12Only(SDValue& Addr,
+                            SDValue &Base, SDValue &Disp);
+    bool SelectAddrRI12(SDValue& Addr,
+                        SDValue &Base, SDValue &Disp,
+                        bool is12BitOnly = false);
+    bool SelectAddrRI(SDValue& Addr, SDValue &Base, SDValue &Disp);
+    bool SelectAddrRRI12(SDValue Addr,
+                         SDValue &Base, SDValue &Disp, SDValue &Index);
+    bool SelectAddrRRI20(SDValue Addr,
+                         SDValue &Base, SDValue &Disp, SDValue &Index);
+    bool SelectLAAddr(SDValue Addr,
+                      SDValue &Base, SDValue &Disp, SDValue &Index);
+
+    SDNode *Select(SDNode *Node);
+
+    bool TryFoldLoad(SDNode *P, SDValue N,
+                     SDValue &Base, SDValue &Disp, SDValue &Index);
+
+    bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
+                      bool is12Bit, unsigned Depth = 0);
+    bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM);
+  };
+}  // end anonymous namespace
+
+/// createSystemZISelDag - This pass converts a legalized DAG into a
+/// SystemZ-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM,
+                                        CodeGenOpt::Level OptLevel) {
+  return new SystemZDAGToDAGISel(TM, OptLevel);
+}
+
+/// isImmSExt20 - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 20-bit value. If so, this returns true and the
+/// immediate.
+static bool isImmSExt20(int64_t Val, int64_t &Imm) {
+  if (Val >= -524288 && Val <= 524287) {
+    Imm = Val;
+    return true;
+  }
+  return false;
+}
+
+/// isImmZExt12 - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// zero extension from a 12-bit value. If so, this returns true and the
+/// immediate.
+static bool isImmZExt12(int64_t Val, int64_t &Imm) {
+  if (Val >= 0 && Val <= 0xFFF) {
+    Imm = Val;
+    return true;
+  }
+  return false;
+}
+
+/// MatchAddress - Add the specified node to the specified addressing mode,
+/// returning true if it cannot be done.  This just pattern matches for the
+/// addressing mode.
+bool SystemZDAGToDAGISel::MatchAddress(SDValue N, SystemZRRIAddressMode &AM,
+                                       bool is12Bit, unsigned Depth) {
+  DebugLoc dl = N.getDebugLoc();
+  DEBUG(errs() << "MatchAddress: "; AM.dump());
+  // Limit recursion.
+  if (Depth > 5)
+    return MatchAddressBase(N, AM);
+
+  // FIXME: We can perform better here. If we have something like
+  // (shift (add A, imm), N), we can try to reassociate stuff and fold shift of
+  // imm into addressing mode.
+  switch (N.getOpcode()) {
+  default: break;
+  case ISD::Constant: {
+    int64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+    int64_t Imm = 0;
+    bool Match = (is12Bit ?
+                  isImmZExt12(AM.Disp + Val, Imm) :
+                  isImmSExt20(AM.Disp + Val, Imm));
+    if (Match) {
+      AM.Disp = Imm;
+      return false;
+    }
+    break;
+  }
+
+  case ISD::FrameIndex:
+    if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
+        AM.Base.Reg.getNode() == 0) {
+      AM.BaseType = SystemZRRIAddressMode::FrameIndexBase;
+      AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+      return false;
+    }
+    break;
+
+  case ISD::SUB: {
+    // Given A-B, if A can be completely folded into the address and
+    // the index field with the index field unused, use -B as the index.
+    // This is a win if a has multiple parts that can be folded into
+    // the address. Also, this saves a mov if the base register has
+    // other uses, since it avoids a two-address sub instruction, however
+    // it costs an additional mov if the index register has other uses.
+
+    // Test if the LHS of the sub can be folded.
+    SystemZRRIAddressMode Backup = AM;
+    if (MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1)) {
+      AM = Backup;
+      break;
+    }
+    // Test if the index field is free for use.
+    if (AM.IndexReg.getNode() || AM.isRI) {
+      AM = Backup;
+      break;
+    }
+
+    // If the base is a register with multiple uses, this transformation may
+    // save a mov. Otherwise it's probably better not to do it.
+    if (AM.BaseType == SystemZRRIAddressMode::RegBase &&
+        (!AM.Base.Reg.getNode() || AM.Base.Reg.getNode()->hasOneUse())) {
+      AM = Backup;
+      break;
+    }
+
+    // Ok, the transformation is legal and appears profitable. Go for it.
+    SDValue RHS = N.getNode()->getOperand(1);
+    SDValue Zero = CurDAG->getConstant(0, N.getValueType());
+    SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
+    AM.IndexReg = Neg;
+
+    // Insert the new nodes into the topological ordering.
+    if (Zero.getNode()->getNodeId() == -1 ||
+        Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(N.getNode(), Zero.getNode());
+      Zero.getNode()->setNodeId(N.getNode()->getNodeId());
+    }
+    if (Neg.getNode()->getNodeId() == -1 ||
+        Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(N.getNode(), Neg.getNode());
+      Neg.getNode()->setNodeId(N.getNode()->getNodeId());
+    }
+    return false;
+  }
+
+  case ISD::ADD: {
+    SystemZRRIAddressMode Backup = AM;
+    if (!MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1) &&
+        !MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1))
+      return false;
+    AM = Backup;
+    if (!MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1) &&
+        !MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1))
+      return false;
+    AM = Backup;
+
+    // If we couldn't fold both operands into the address at the same time,
+    // see if we can just put each operand into a register and fold at least
+    // the add.
+    if (!AM.isRI &&
+        AM.BaseType == SystemZRRIAddressMode::RegBase &&
+        !AM.Base.Reg.getNode() && !AM.IndexReg.getNode()) {
+      AM.Base.Reg = N.getNode()->getOperand(0);
+      AM.IndexReg = N.getNode()->getOperand(1);
+      return false;
+    }
+    break;
+  }
+
+  case ISD::OR:
+    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      SystemZRRIAddressMode Backup = AM;
+      int64_t Offset = CN->getSExtValue();
+      int64_t Imm = 0;
+      bool MatchOffset = (is12Bit ?
+                          isImmZExt12(AM.Disp + Offset, Imm) :
+                          isImmSExt20(AM.Disp + Offset, Imm));
+      // The resultant disp must fit in 12 or 20-bits.
+      if (MatchOffset &&
+          // LHS should be an addr mode.
+          !MatchAddress(N.getOperand(0), AM, is12Bit, Depth+1) &&
+          // Check to see if the LHS & C is zero.
+          CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+        AM.Disp = Imm;
+        return false;
+      }
+      AM = Backup;
+    }
+    break;
+  }
+
+  return MatchAddressBase(N, AM);
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool SystemZDAGToDAGISel::MatchAddressBase(SDValue N,
+                                           SystemZRRIAddressMode &AM) {
+  // Is the base register already occupied?
+  if (AM.BaseType != SystemZRRIAddressMode::RegBase || AM.Base.Reg.getNode()) {
+    // If so, check to see if the index register is set.
+    if (AM.IndexReg.getNode() == 0 && !AM.isRI) {
+      AM.IndexReg = N;
+      return false;
+    }
+
+    // Otherwise, we cannot select it.
+    return true;
+  }
+
+  // Default, generate it as a register.
+  AM.BaseType = SystemZRRIAddressMode::RegBase;
+  AM.Base.Reg = N;
+  return false;
+}
+
+void SystemZDAGToDAGISel::getAddressOperandsRI(const SystemZRRIAddressMode &AM,
+                                               SDValue &Base, SDValue &Disp) {
+  if (AM.BaseType == SystemZRRIAddressMode::RegBase)
+    Base = AM.Base.Reg;
+  else
+    Base = CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy());
+  Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i64);
+}
+
+void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM,
+                                             SDValue &Base, SDValue &Disp,
+                                             SDValue &Index) {
+  getAddressOperandsRI(AM, Base, Disp);
+  Index = AM.IndexReg;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// an unsigned 12-bit displacement [r+imm].
+bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue &Addr,
+                                             SDValue &Base, SDValue &Disp) {
+  return SelectAddrRI12(Addr, Base, Disp, /*is12BitOnly*/true);
+}
+
+bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue &Addr,
+                                         SDValue &Base, SDValue &Disp,
+                                         bool is12BitOnly) {
+  SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true);
+  bool Done = false;
+
+  if (!Addr.hasOneUse()) {
+    unsigned Opcode = Addr.getOpcode();
+    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+      // If we are able to fold N into addressing mode, then we'll allow it even
+      // if N has multiple uses. In general, addressing computation is used as
+      // addresses by all of its uses. But watch out for CopyToReg uses, that
+      // means the address computation is liveout. It will be computed by a LA
+      // so we want to avoid computing the address twice.
+      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+        if (UI->getOpcode() == ISD::CopyToReg) {
+          MatchAddressBase(Addr, AM12);
+          Done = true;
+          break;
+        }
+      }
+    }
+  }
+  if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
+    return false;
+
+  // Check, whether we can match stuff using 20-bit displacements
+  if (!Done && !is12BitOnly &&
+      !MatchAddress(Addr, AM20, /* is12Bit */ false))
+    if (AM12.Disp == 0 && AM20.Disp != 0)
+      return false;
+
+  DEBUG(errs() << "MatchAddress (final): "; AM12.dump());
+
+  EVT VT = Addr.getValueType();
+  if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
+    if (!AM12.Base.Reg.getNode())
+      AM12.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  assert(AM12.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");
+
+  getAddressOperandsRI(AM12, Base, Disp);
+
+  return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// a signed 20-bit displacement [r+imm].
+bool SystemZDAGToDAGISel::SelectAddrRI(SDValue& Addr,
+                                       SDValue &Base, SDValue &Disp) {
+  SystemZRRIAddressMode AM(/*isRI*/true);
+  bool Done = false;
+
+  if (!Addr.hasOneUse()) {
+    unsigned Opcode = Addr.getOpcode();
+    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+      // If we are able to fold N into addressing mode, then we'll allow it even
+      // if N has multiple uses. In general, addressing computation is used as
+      // addresses by all of its uses. But watch out for CopyToReg uses, that
+      // means the address computation is liveout. It will be computed by a LA
+      // so we want to avoid computing the address twice.
+      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+        if (UI->getOpcode() == ISD::CopyToReg) {
+          MatchAddressBase(Addr, AM);
+          Done = true;
+          break;
+        }
+      }
+    }
+  }
+  if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
+    return false;
+
+  DEBUG(errs() << "MatchAddress (final): "; AM.dump());
+
+  EVT VT = Addr.getValueType();
+  if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
+    if (!AM.Base.Reg.getNode())
+      AM.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  assert(AM.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!");
+
+  getAddressOperandsRI(AM, Base, Disp);
+
+  return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// index register plus an unsigned 12-bit displacement [base + idx + imm].
+bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Addr,
+                                SDValue &Base, SDValue &Disp, SDValue &Index) {
+  SystemZRRIAddressMode AM20, AM12;
+  bool Done = false;
+
+  if (!Addr.hasOneUse()) {
+    unsigned Opcode = Addr.getOpcode();
+    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+      // If we are able to fold N into addressing mode, then we'll allow it even
+      // if N has multiple uses. In general, addressing computation is used as
+      // addresses by all of its uses. But watch out for CopyToReg uses, that
+      // means the address computation is liveout. It will be computed by a LA
+      // so we want to avoid computing the address twice.
+      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+        if (UI->getOpcode() == ISD::CopyToReg) {
+          MatchAddressBase(Addr, AM12);
+          Done = true;
+          break;
+        }
+      }
+    }
+  }
+  if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true))
+    return false;
+
+  // Check, whether we can match stuff using 20-bit displacements
+  if (!Done && !MatchAddress(Addr, AM20, /* is12Bit */ false))
+    if (AM12.Disp == 0 && AM20.Disp != 0)
+      return false;
+
+  DEBUG(errs() << "MatchAddress (final): "; AM12.dump());
+
+  EVT VT = Addr.getValueType();
+  if (AM12.BaseType == SystemZRRIAddressMode::RegBase) {
+    if (!AM12.Base.Reg.getNode())
+      AM12.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  if (!AM12.IndexReg.getNode())
+    AM12.IndexReg = CurDAG->getRegister(0, VT);
+
+  getAddressOperands(AM12, Base, Disp, Index);
+
+  return true;
+}
+
+/// Returns true if the address can be represented by a base register plus
+/// index register plus a signed 20-bit displacement [base + idx + imm].
+bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Addr,
+                                SDValue &Base, SDValue &Disp, SDValue &Index) {
+  SystemZRRIAddressMode AM;
+  bool Done = false;
+
+  if (!Addr.hasOneUse()) {
+    unsigned Opcode = Addr.getOpcode();
+    if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) {
+      // If we are able to fold N into addressing mode, then we'll allow it even
+      // if N has multiple uses. In general, addressing computation is used as
+      // addresses by all of its uses. But watch out for CopyToReg uses, that
+      // means the address computation is liveout. It will be computed by a LA
+      // so we want to avoid computing the address twice.
+      for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
+             UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
+        if (UI->getOpcode() == ISD::CopyToReg) {
+          MatchAddressBase(Addr, AM);
+          Done = true;
+          break;
+        }
+      }
+    }
+  }
+  if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false))
+    return false;
+
+  DEBUG(errs() << "MatchAddress (final): "; AM.dump());
+
+  EVT VT = Addr.getValueType();
+  if (AM.BaseType == SystemZRRIAddressMode::RegBase) {
+    if (!AM.Base.Reg.getNode())
+      AM.Base.Reg = CurDAG->getRegister(0, VT);
+  }
+
+  if (!AM.IndexReg.getNode())
+    AM.IndexReg = CurDAG->getRegister(0, VT);
+
+  getAddressOperands(AM, Base, Disp, Index);
+
+  return true;
+}
+
+/// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing
+/// mode it matches can be cost effectively emitted as an LA/LAY instruction.
+bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Addr,
+                                  SDValue &Base, SDValue &Disp, SDValue &Index) {
+  SystemZRRIAddressMode AM;
+
+  if (MatchAddress(Addr, AM, false))
+    return false;
+
+  EVT VT = Addr.getValueType();
+  unsigned Complexity = 0;
+  if (AM.BaseType == SystemZRRIAddressMode::RegBase)
+    if (AM.Base.Reg.getNode())
+      Complexity = 1;
+    else
+      AM.Base.Reg = CurDAG->getRegister(0, VT);
+  else if (AM.BaseType == SystemZRRIAddressMode::FrameIndexBase)
+    Complexity = 4;
+
+  if (AM.IndexReg.getNode())
+    Complexity += 1;
+  else
+    AM.IndexReg = CurDAG->getRegister(0, VT);
+
+  if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode()))
+    Complexity += 1;
+
+  if (Complexity > 2) {
+    getAddressOperands(AM, Base, Disp, Index);
+    return true;
+  }
+
+  return false;
+}
+
+bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
+                                 SDValue &Base, SDValue &Disp, SDValue &Index) {
+  if (ISD::isNON_EXTLoad(N.getNode()) &&
+      IsLegalToFold(N, P, P, OptLevel))
+    return SelectAddrRRI20(N.getOperand(1), Base, Disp, Index);
+  return false;
+}
+
+SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
+  EVT NVT = Node->getValueType(0);
+  DebugLoc dl = Node->getDebugLoc();
+  unsigned Opcode = Node->getOpcode();
+
+  // Dump information about the Node being selected
+  DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
+
+  // If we have a custom node, we already have selected!
+  if (Node->isMachineOpcode()) {
+    DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+    return NULL; // Already selected.
+  }
+
+  switch (Opcode) {
+  default: break;
+  case ISD::SDIVREM: {
+    unsigned Opc, MOpc;
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    EVT ResVT;
+    bool is32Bit = false;
+    switch (NVT.getSimpleVT().SimpleTy) {
+    default: assert(0 && "Unsupported VT!");
+    case MVT::i32:
+      Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m;
+      ResVT = MVT::v2i64;
+      is32Bit = true;
+      break;
+    case MVT::i64:
+      Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m;
+      ResVT = MVT::v2i64;
+      break;
+    }
+
+    SDValue Tmp0, Tmp1, Tmp2;
+    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2);
+
+    // Prepare the dividend
+    SDNode *Dividend;
+    if (is32Bit)
+      Dividend = CurDAG->getMachineNode(SystemZ::MOVSX64rr32, dl, MVT::i64, N0);
+    else
+      Dividend = N0.getNode();
+
+    // Insert prepared dividend into suitable 'subreg'
+    SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                         dl, ResVT);
+    Dividend =
+      CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
+                             SDValue(Tmp, 0), SDValue(Dividend, 0),
+                     CurDAG->getTargetConstant(SystemZ::subreg_odd, MVT::i32));
+
+    SDNode *Result;
+    SDValue DivVal = SDValue(Dividend, 0);
+    if (foldedLoad) {
+      SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
+      Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other,
+                                      Ops, array_lengthof(Ops));
+      // Update the chain.
+      ReplaceUses(N1.getValue(1), SDValue(Result, 1));
+    } else {
+      Result = CurDAG->getMachineNode(Opc, dl, ResVT, SDValue(Dividend, 0), N1);
+    }
+
+    // Copy the division (odd subreg) result, if it is needed.
+    if (!SDValue(Node, 0).use_empty()) {
+      unsigned SubRegIdx = (is32Bit ?
+                            SystemZ::subreg_odd32 : SystemZ::subreg_odd);
+      SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+                                           dl, NVT,
+                                           SDValue(Result, 0),
+                                           CurDAG->getTargetConstant(SubRegIdx,
+                                                                     MVT::i32));
+
+      ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
+      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
+    }
+
+    // Copy the remainder (even subreg) result, if it is needed.
+    if (!SDValue(Node, 1).use_empty()) {
+      unsigned SubRegIdx = (is32Bit ?
+                            SystemZ::subreg_32bit : SystemZ::subreg_even);
+      SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+                                           dl, NVT,
+                                           SDValue(Result, 0),
+                                           CurDAG->getTargetConstant(SubRegIdx,
+                                                                     MVT::i32));
+
+      ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
+      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
+    }
+
+    return NULL;
+  }
+  case ISD::UDIVREM: {
+    unsigned Opc, MOpc, ClrOpc;
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+    EVT ResVT;
+
+    bool is32Bit = false;
+    switch (NVT.getSimpleVT().SimpleTy) {
+    default: assert(0 && "Unsupported VT!");
+    case MVT::i32:
+      Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m;
+      ClrOpc = SystemZ::MOV64Pr0_even;
+      ResVT = MVT::v2i32;
+      is32Bit = true;
+      break;
+    case MVT::i64:
+      Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m;
+      ClrOpc = SystemZ::MOV128r0_even;
+      ResVT = MVT::v2i64;
+      break;
+    }
+
+    SDValue Tmp0, Tmp1, Tmp2;
+    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2);
+
+    // Prepare the dividend
+    SDNode *Dividend = N0.getNode();
+
+    // Insert prepared dividend into suitable 'subreg'
+    SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                         dl, ResVT);
+    {
+      unsigned SubRegIdx = (is32Bit ?
+                            SystemZ::subreg_odd32 : SystemZ::subreg_odd);
+      Dividend =
+        CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
+                               SDValue(Tmp, 0), SDValue(Dividend, 0),
+                               CurDAG->getTargetConstant(SubRegIdx, MVT::i32));
+    }
+
+    // Zero out even subreg
+    Dividend = CurDAG->getMachineNode(ClrOpc, dl, ResVT, SDValue(Dividend, 0));
+
+    SDValue DivVal = SDValue(Dividend, 0);
+    SDNode *Result;
+    if (foldedLoad) {
+      SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) };
+      Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other,
+                                      Ops, array_lengthof(Ops));
+      // Update the chain.
+      ReplaceUses(N1.getValue(1), SDValue(Result, 1));
+    } else {
+      Result = CurDAG->getMachineNode(Opc, dl, ResVT, DivVal, N1);
+    }
+
+    // Copy the division (odd subreg) result, if it is needed.
+    if (!SDValue(Node, 0).use_empty()) {
+      unsigned SubRegIdx = (is32Bit ?
+                            SystemZ::subreg_odd32 : SystemZ::subreg_odd);
+      SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+                                           dl, NVT,
+                                           SDValue(Result, 0),
+                                           CurDAG->getTargetConstant(SubRegIdx,
+                                                                     MVT::i32));
+      ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
+      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
+    }
+
+    // Copy the remainder (even subreg) result, if it is needed.
+    if (!SDValue(Node, 1).use_empty()) {
+      unsigned SubRegIdx = (is32Bit ?
+                            SystemZ::subreg_32bit : SystemZ::subreg_even);
+      SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
+                                           dl, NVT,
+                                           SDValue(Result, 0),
+                                           CurDAG->getTargetConstant(SubRegIdx,
+                                                                     MVT::i32));
+      ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
+      DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
+    }
+
+    return NULL;
+  }
+  }
+
+  // Select the default instruction
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(errs() << "=> ";
+        if (ResNode == NULL || ResNode == Node)
+          Node->dump(CurDAG);
+        else
+          ResNode->dump(CurDAG);
+        errs() << "\n";
+        );
+  return ResNode;
+}
diff --git a/final/lib/Target/SystemZ/SystemZISelLowering.cpp b/final/lib/Target/SystemZ/SystemZISelLowering.cpp
new file mode 100644
index 00000000000..90939c31206
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -0,0 +1,863 @@
+//===-- SystemZISelLowering.cpp - SystemZ DAG Lowering Implementation  -----==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-lower"
+
+#include "SystemZISelLowering.h"
+#include "SystemZ.h"
+#include "SystemZTargetMachine.h"
+#include "SystemZSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
+  TargetLowering(tm, new TargetLoweringObjectFileELF()),
+  Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+
+  RegInfo = TM.getRegisterInfo();
+
+  // Set up the register classes.
+  addRegisterClass(MVT::i32,  SystemZ::GR32RegisterClass);
+  addRegisterClass(MVT::i64,  SystemZ::GR64RegisterClass);
+  addRegisterClass(MVT::v2i32,SystemZ::GR64PRegisterClass);
+  addRegisterClass(MVT::v2i64,SystemZ::GR128RegisterClass);
+
+  if (!UseSoftFloat) {
+    addRegisterClass(MVT::f32, SystemZ::FP32RegisterClass);
+    addRegisterClass(MVT::f64, SystemZ::FP64RegisterClass);
+  }
+
+  // Compute derived properties from the register classes
+  computeRegisterProperties();
+
+  // Provide all sorts of operation actions
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
+
+  setLoadExtAction(ISD::SEXTLOAD, MVT::f32, Expand);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::f32, Expand);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
+
+  setLoadExtAction(ISD::SEXTLOAD, MVT::f64, Expand);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::f64, Expand);
+  setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
+
+  setStackPointerRegisterToSaveRestore(SystemZ::R15D);
+
+  // TODO: It may be better to default to latency-oriented scheduling, however
+  // LLVM's current latency-oriented scheduler can't handle physreg definitions
+  // such as SystemZ has with PSW, so set this to the register-pressure
+  // scheduler, because it can.
+  setSchedulingPreference(Sched::RegPressure);
+
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  setOperationAction(ISD::BR_JT,            MVT::Other, Expand);
+  setOperationAction(ISD::BRCOND,           MVT::Other, Expand);
+  setOperationAction(ISD::BR_CC,            MVT::i32, Custom);
+  setOperationAction(ISD::BR_CC,            MVT::i64, Custom);
+  setOperationAction(ISD::BR_CC,            MVT::f32, Custom);
+  setOperationAction(ISD::BR_CC,            MVT::f64, Custom);
+  setOperationAction(ISD::ConstantPool,     MVT::i32, Custom);
+  setOperationAction(ISD::ConstantPool,     MVT::i64, Custom);
+  setOperationAction(ISD::GlobalAddress,    MVT::i64, Custom);
+  setOperationAction(ISD::JumpTable,        MVT::i64, Custom);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+
+  setOperationAction(ISD::SDIV,             MVT::i32, Expand);
+  setOperationAction(ISD::UDIV,             MVT::i32, Expand);
+  setOperationAction(ISD::SDIV,             MVT::i64, Expand);
+  setOperationAction(ISD::UDIV,             MVT::i64, Expand);
+  setOperationAction(ISD::SREM,             MVT::i32, Expand);
+  setOperationAction(ISD::UREM,             MVT::i32, Expand);
+  setOperationAction(ISD::SREM,             MVT::i64, Expand);
+  setOperationAction(ISD::UREM,             MVT::i64, Expand);
+
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+
+  setOperationAction(ISD::CTPOP,            MVT::i32, Expand);
+  setOperationAction(ISD::CTPOP,            MVT::i64, Expand);
+  setOperationAction(ISD::CTTZ,             MVT::i32, Expand);
+  setOperationAction(ISD::CTTZ,             MVT::i64, Expand);
+  setOperationAction(ISD::CTLZ,             MVT::i32, Promote);
+  setOperationAction(ISD::CTLZ,             MVT::i64, Legal);
+
+  // FIXME: Can we lower these 2 efficiently?
+  setOperationAction(ISD::SETCC,            MVT::i32, Expand);
+  setOperationAction(ISD::SETCC,            MVT::i64, Expand);
+  setOperationAction(ISD::SETCC,            MVT::f32, Expand);
+  setOperationAction(ISD::SETCC,            MVT::f64, Expand);
+  setOperationAction(ISD::SELECT,           MVT::i32, Expand);
+  setOperationAction(ISD::SELECT,           MVT::i64, Expand);
+  setOperationAction(ISD::SELECT,           MVT::f32, Expand);
+  setOperationAction(ISD::SELECT,           MVT::f64, Expand);
+  setOperationAction(ISD::SELECT_CC,        MVT::i32, Custom);
+  setOperationAction(ISD::SELECT_CC,        MVT::i64, Custom);
+  setOperationAction(ISD::SELECT_CC,        MVT::f32, Custom);
+  setOperationAction(ISD::SELECT_CC,        MVT::f64, Custom);
+
+  setOperationAction(ISD::MULHS,            MVT::i64, Expand);
+  setOperationAction(ISD::SMUL_LOHI,        MVT::i64, Expand);
+
+  // FIXME: Can we support these natively?
+  setOperationAction(ISD::UMUL_LOHI,        MVT::i64, Expand);
+  setOperationAction(ISD::SRL_PARTS,        MVT::i64, Expand);
+  setOperationAction(ISD::SHL_PARTS,        MVT::i64, Expand);
+  setOperationAction(ISD::SRA_PARTS,        MVT::i64, Expand);
+
+  // Lower some FP stuff
+  setOperationAction(ISD::FSIN,             MVT::f32, Expand);
+  setOperationAction(ISD::FSIN,             MVT::f64, Expand);
+  setOperationAction(ISD::FCOS,             MVT::f32, Expand);
+  setOperationAction(ISD::FCOS,             MVT::f64, Expand);
+  setOperationAction(ISD::FREM,             MVT::f32, Expand);
+  setOperationAction(ISD::FREM,             MVT::f64, Expand);
+
+  // We have only 64-bit bitconverts
+  setOperationAction(ISD::BITCAST,          MVT::f32, Expand);
+  setOperationAction(ISD::BITCAST,          MVT::i32, Expand);
+
+  setOperationAction(ISD::UINT_TO_FP,       MVT::i32, Expand);
+  setOperationAction(ISD::UINT_TO_FP,       MVT::i64, Expand);
+  setOperationAction(ISD::FP_TO_UINT,       MVT::i32, Expand);
+  setOperationAction(ISD::FP_TO_UINT,       MVT::i64, Expand);
+
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+}
+
+SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  case ISD::BR_CC:            return LowerBR_CC(Op, DAG);
+  case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
+  case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
+  case ISD::JumpTable:        return LowerJumpTable(Op, DAG);
+  case ISD::ConstantPool:     return LowerConstantPool(Op, DAG);
+  default:
+    llvm_unreachable("Should not custom lower this!");
+    return SDValue();
+  }
+}
+
+bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  if (UseSoftFloat || (VT != MVT::f32 && VT != MVT::f64))
+    return false;
+
+  // +0.0  lzer
+  // +0.0f lzdr
+  // -0.0  lzer + lner
+  // -0.0f lzdr + lndr
+  return Imm.isZero() || Imm.isNegZero();
+}
+
+//===----------------------------------------------------------------------===//
+//                       SystemZ Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+TargetLowering::ConstraintType
+SystemZTargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'r':
+      return C_RegisterClass;
+    default:
+      break;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+SystemZTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+                             EVT VT) const {
+  if (Constraint.size() == 1) {
+    // GCC Constraint Letters
+    switch (Constraint[0]) {
+    default: break;
+    case 'r':   // GENERAL_REGS
+      if (VT == MVT::i32)
+        return std::make_pair(0U, SystemZ::GR32RegisterClass);
+      else if (VT == MVT::i128)
+        return std::make_pair(0U, SystemZ::GR128RegisterClass);
+
+      return std::make_pair(0U, SystemZ::GR64RegisterClass);
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "SystemZGenCallingConv.inc"
+
+SDValue
+SystemZTargetLowering::LowerFormalArguments(SDValue Chain,
+                                            CallingConv::ID CallConv,
+                                            bool isVarArg,
+                                            const SmallVectorImpl<ISD::InputArg>
+                                              &Ins,
+                                            DebugLoc dl,
+                                            SelectionDAG &DAG,
+                                            SmallVectorImpl<SDValue> &InVals)
+                                              const {
+
+  switch (CallConv) {
+  default:
+    llvm_unreachable("Unsupported calling convention");
+  case CallingConv::C:
+  case CallingConv::Fast:
+    return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
+  }
+}
+
+SDValue
+SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 bool &isTailCall,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 const SmallVectorImpl<SDValue> &OutVals,
+                                 const SmallVectorImpl<ISD::InputArg> &Ins,
+                                 DebugLoc dl, SelectionDAG &DAG,
+                                 SmallVectorImpl<SDValue> &InVals) const {
+  // SystemZ target does not yet support tail call optimization.
+  isTailCall = false;
+
+  switch (CallConv) {
+  default:
+    llvm_unreachable("Unsupported calling convention");
+  case CallingConv::Fast:
+  case CallingConv::C:
+    return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+                          Outs, OutVals, Ins, dl, DAG, InVals);
+  }
+}
+
+/// LowerCCCArguments - transform physical registers into virtual registers and
+/// generate load operations for arguments places on the stack.
+// FIXME: struct return stuff
+// FIXME: varargs
+SDValue
+SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
+                                         CallingConv::ID CallConv,
+                                         bool isVarArg,
+                                         const SmallVectorImpl<ISD::InputArg>
+                                           &Ins,
+                                         DebugLoc dl,
+                                         SelectionDAG &DAG,
+                                         SmallVectorImpl<SDValue> &InVals)
+                                           const {
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
+
+  if (isVarArg)
+    report_fatal_error("Varargs not supported yet");
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    SDValue ArgValue;
+    CCValAssign &VA = ArgLocs[i];
+    EVT LocVT = VA.getLocVT();
+    if (VA.isRegLoc()) {
+      // Arguments passed in registers
+      TargetRegisterClass *RC;
+      switch (LocVT.getSimpleVT().SimpleTy) {
+      default:
+#ifndef NDEBUG
+        errs() << "LowerFormalArguments Unhandled argument type: "
+             << LocVT.getSimpleVT().SimpleTy
+             << "\n";
+#endif
+        llvm_unreachable(0);
+      case MVT::i64:
+        RC = SystemZ::GR64RegisterClass;
+        break;
+      case MVT::f32:
+        RC = SystemZ::FP32RegisterClass;
+        break;
+      case MVT::f64:
+        RC = SystemZ::FP64RegisterClass;
+        break;
+      }
+
+      unsigned VReg = RegInfo.createVirtualRegister(RC);
+      RegInfo.addLiveIn(VA.getLocReg(), VReg);
+      ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
+    } else {
+      // Sanity check
+      assert(VA.isMemLoc());
+
+      // Create the nodes corresponding to a load from this parameter slot.
+      // Create the frame index object for this incoming parameter...
+      int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8,
+                                      VA.getLocMemOffset(), true);
+
+      // Create the SelectionDAG nodes corresponding to a load
+      // from this parameter
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+      ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN,
+                             MachinePointerInfo::getFixedStack(FI),
+                             false, false, 0);
+    }
+
+    // If this is an 8/16/32-bit value, it is really passed promoted to 64
+    // bits. Insert an assert[sz]ext to capture this, then truncate to the
+    // right size.
+    if (VA.getLocInfo() == CCValAssign::SExt)
+      ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
+                             DAG.getValueType(VA.getValVT()));
+    else if (VA.getLocInfo() == CCValAssign::ZExt)
+      ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
+                             DAG.getValueType(VA.getValVT()));
+
+    if (VA.getLocInfo() != CCValAssign::Full)
+      ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+
+    InVals.push_back(ArgValue);
+  }
+
+  return Chain;
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: sret.
+SDValue
+SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                                      CallingConv::ID CallConv, bool isVarArg,
+                                      bool isTailCall,
+                                      const SmallVectorImpl<ISD::OutputArg>
+                                        &Outs,
+                                      const SmallVectorImpl<SDValue> &OutVals,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                      DebugLoc dl, SelectionDAG &DAG,
+                                      SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  const TargetFrameLowering *TFI = TM.getFrameLowering();
+
+  // Offset to first argument stack slot.
+  const unsigned FirstArgOffset = 160;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes,
+                                                      getPointerTy(), true));
+
+  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+  SmallVector<SDValue, 12> MemOpChains;
+  SDValue StackPtr;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    SDValue Arg = OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+      default: assert(0 && "Unknown loc info!");
+      case CCValAssign::Full: break;
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::AExt:
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+    }
+
+    // Arguments that can be passed on register must be kept at RegsToPass
+    // vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      assert(VA.isMemLoc());
+
+      if (StackPtr.getNode() == 0)
+        StackPtr =
+          DAG.getCopyFromReg(Chain, dl,
+                             (TFI->hasFP(MF) ?
+                              SystemZ::R11D : SystemZ::R15D),
+                             getPointerTy());
+
+      unsigned Offset = FirstArgOffset + VA.getLocMemOffset();
+      SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   StackPtr,
+                                   DAG.getIntPtrConstant(Offset));
+
+      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
+                                         MachinePointerInfo(),
+                                         false, false, 0));
+    }
+  }
+
+  // Transform all store nodes into one single node because all store nodes are
+  // independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain and
+  // flag operands which copy the outgoing args into registers.  The InFlag in
+  // necessary since all emited instructions must be stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
+
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain = DAG.getNode(SystemZISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getConstant(NumBytes, getPointerTy(), true),
+                             DAG.getConstant(0, getPointerTy(), true),
+                             InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl,
+                         DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+SystemZTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                       CallingConv::ID CallConv, bool isVarArg,
+                                       const SmallVectorImpl<ISD::InputArg>
+                                         &Ins,
+                                       DebugLoc dl, SelectionDAG &DAG,
+                                       SmallVectorImpl<SDValue> &InVals) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
+                 *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+
+    Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+                               VA.getLocVT(), InFlag).getValue(1);
+    SDValue RetValue = Chain.getValue(0);
+    InFlag = Chain.getValue(2);
+
+    // If this is an 8/16/32-bit value, it is really passed promoted to 64
+    // bits. Insert an assert[sz]ext to capture this, then truncate to the
+    // right size.
+    if (VA.getLocInfo() == CCValAssign::SExt)
+      RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue,
+                             DAG.getValueType(VA.getValVT()));
+    else if (VA.getLocInfo() == CCValAssign::ZExt)
+      RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue,
+                             DAG.getValueType(VA.getValVT()));
+
+    if (VA.getLocInfo() != CCValAssign::Full)
+      RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue);
+
+    InVals.push_back(RetValue);
+  }
+
+  return Chain;
+}
+
+
+SDValue
+SystemZTargetLowering::LowerReturn(SDValue Chain,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                   const SmallVectorImpl<SDValue> &OutVals,
+                                   DebugLoc dl, SelectionDAG &DAG) const {
+
+  // CCValAssign - represent the assignment of the return value to a location
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
+
+  // If this is the first return lowered for this function, add the regs to the
+  // liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    SDValue ResValue = OutVals[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    // If this is an 8/16/32-bit value, it is really should be passed promoted
+    // to 64 bits.
+    if (VA.getLocInfo() == CCValAssign::SExt)
+      ResValue = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ResValue);
+    else if (VA.getLocInfo() == CCValAssign::ZExt)
+      ResValue = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ResValue);
+    else if (VA.getLocInfo() == CCValAssign::AExt)
+      ResValue = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ResValue);
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ResValue, Flag);
+
+    // Guarantee that all emitted copies are stuck together,
+    // avoiding something bad.
+    Flag = Chain.getValue(1);
+  }
+
+  if (Flag.getNode())
+    return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+
+  // Return Void
+  return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain);
+}
+
+SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS,
+                                       ISD::CondCode CC, SDValue &SystemZCC,
+                                       SelectionDAG &DAG) const {
+  // FIXME: Emit a test if RHS is zero
+
+  bool isUnsigned = false;
+  SystemZCC::CondCodes TCC;
+  switch (CC) {
+  default:
+    llvm_unreachable("Invalid integer condition!");
+  case ISD::SETEQ:
+  case ISD::SETOEQ:
+    TCC = SystemZCC::E;
+    break;
+  case ISD::SETUEQ:
+    TCC = SystemZCC::NLH;
+    break;
+  case ISD::SETNE:
+  case ISD::SETONE:
+    TCC = SystemZCC::NE;
+    break;
+  case ISD::SETUNE:
+    TCC = SystemZCC::LH;
+    break;
+  case ISD::SETO:
+    TCC = SystemZCC::O;
+    break;
+  case ISD::SETUO:
+    TCC = SystemZCC::NO;
+    break;
+  case ISD::SETULE:
+    if (LHS.getValueType().isFloatingPoint()) {
+      TCC = SystemZCC::NH;
+      break;
+    }
+    isUnsigned = true;   // FALLTHROUGH
+  case ISD::SETLE:
+  case ISD::SETOLE:
+    TCC = SystemZCC::LE;
+    break;
+  case ISD::SETUGE:
+    if (LHS.getValueType().isFloatingPoint()) {
+      TCC = SystemZCC::NL;
+      break;
+    }
+    isUnsigned = true;   // FALLTHROUGH
+  case ISD::SETGE:
+  case ISD::SETOGE:
+    TCC = SystemZCC::HE;
+    break;
+  case ISD::SETUGT:
+    if (LHS.getValueType().isFloatingPoint()) {
+      TCC = SystemZCC::NLE;
+      break;
+    }
+    isUnsigned = true;  // FALLTHROUGH
+  case ISD::SETGT:
+  case ISD::SETOGT:
+    TCC = SystemZCC::H;
+    break;
+  case ISD::SETULT:
+    if (LHS.getValueType().isFloatingPoint()) {
+      TCC = SystemZCC::NHE;
+      break;
+    }
+    isUnsigned = true;  // FALLTHROUGH
+  case ISD::SETLT:
+  case ISD::SETOLT:
+    TCC = SystemZCC::L;
+    break;
+  }
+
+  SystemZCC = DAG.getConstant(TCC, MVT::i32);
+
+  DebugLoc dl = LHS.getDebugLoc();
+  return DAG.getNode((isUnsigned ? SystemZISD::UCMP : SystemZISD::CMP),
+                     dl, MVT::i64, LHS, RHS);
+}
+
+
+SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS   = Op.getOperand(2);
+  SDValue RHS   = Op.getOperand(3);
+  SDValue Dest  = Op.getOperand(4);
+  DebugLoc dl   = Op.getDebugLoc();
+
+  SDValue SystemZCC;
+  SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG);
+  return DAG.getNode(SystemZISD::BRCOND, dl, Op.getValueType(),
+                     Chain, Dest, SystemZCC, Flag);
+}
+
+SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  SDValue LHS    = Op.getOperand(0);
+  SDValue RHS    = Op.getOperand(1);
+  SDValue TrueV  = Op.getOperand(2);
+  SDValue FalseV = Op.getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  DebugLoc dl   = Op.getDebugLoc();
+
+  SDValue SystemZCC;
+  SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG);
+
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+  SmallVector<SDValue, 4> Ops;
+  Ops.push_back(TrueV);
+  Ops.push_back(FalseV);
+  Ops.push_back(SystemZCC);
+  Ops.push_back(Flag);
+
+  return DAG.getNode(SystemZISD::SELECT, dl, VTs, &Ops[0], Ops.size());
+}
+
+SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+
+  bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+  bool ExtraLoadRequired =
+    Subtarget.GVRequiresExtraLoad(GV, getTargetMachine(), false);
+
+  SDValue Result;
+  if (!IsPic && !ExtraLoadRequired) {
+    Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+    Offset = 0;
+  } else {
+    unsigned char OpFlags = 0;
+    if (ExtraLoadRequired)
+      OpFlags = SystemZII::MO_GOTENT;
+
+    Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+  }
+
+  Result = DAG.getNode(SystemZISD::PCRelativeWrapper, dl,
+                       getPointerTy(), Result);
+
+  if (ExtraLoadRequired)
+    Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
+                         MachinePointerInfo::getGOT(), false, false, 0);
+
+  // If there was a non-zero offset that we didn't fold, create an explicit
+  // addition for it.
+  if (Offset != 0)
+    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
+                         DAG.getConstant(Offset, getPointerTy()));
+
+  return Result;
+}
+
+// FIXME: PIC here
+SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
+
+  return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result);
+}
+
+
+// FIXME: PIC here
+// FIXME: This is just dirty hack. We need to lower cpool properly
+SDValue SystemZTargetLowering::LowerConstantPool(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+
+  SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
+                                             CP->getAlignment(),
+                                             CP->getOffset());
+
+  return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result);
+}
+
+const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  case SystemZISD::RET_FLAG:           return "SystemZISD::RET_FLAG";
+  case SystemZISD::CALL:               return "SystemZISD::CALL";
+  case SystemZISD::BRCOND:             return "SystemZISD::BRCOND";
+  case SystemZISD::CMP:                return "SystemZISD::CMP";
+  case SystemZISD::UCMP:               return "SystemZISD::UCMP";
+  case SystemZISD::SELECT:             return "SystemZISD::SELECT";
+  case SystemZISD::PCRelativeWrapper:  return "SystemZISD::PCRelativeWrapper";
+  default: return NULL;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock*
+SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                   MachineBasicBlock *BB) const {
+  const SystemZInstrInfo &TII = *TM.getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  assert((MI->getOpcode() == SystemZ::Select32  ||
+          MI->getOpcode() == SystemZ::SelectF32 ||
+          MI->getOpcode() == SystemZ::Select64  ||
+          MI->getOpcode() == SystemZ::SelectF64) &&
+         "Unexpected instr type to insert");
+
+  // To "insert" a SELECT instruction, we actually have to insert the diamond
+  // control-flow pattern.  The incoming instruction knows the destination vreg
+  // to set, the condition code register to branch on, the true/false values to
+  // select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator I = BB;
+  ++I;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   cmpTY ccX, r1, r2
+  //   jCC copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  SystemZCC::CondCodes CC = (SystemZCC::CondCodes)MI->getOperand(3).getImm();
+  F->insert(I, copy0MBB);
+  F->insert(I, copy1MBB);
+  // Update machine-CFG edges by transferring all successors of the current
+  // block to the new block which will contain the Phi node for the select.
+  copy1MBB->splice(copy1MBB->begin(), BB,
+                   llvm::next(MachineBasicBlock::iterator(MI)),
+                   BB->end());
+  copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
+  // Next, add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(copy1MBB);
+
+  BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB);
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to copy1MBB
+  BB = copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(copy1MBB);
+
+  //  copy1MBB:
+  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+  //  ...
+  BB = copy1MBB;
+  BuildMI(*BB, BB->begin(), dl, TII.get(SystemZ::PHI),
+          MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
+    .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
diff --git a/final/lib/Target/SystemZ/SystemZISelLowering.h b/final/lib/Target/SystemZ/SystemZISelLowering.h
new file mode 100644
index 00000000000..30192420dcb
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZISelLowering.h
@@ -0,0 +1,150 @@
+//==-- SystemZISelLowering.h - SystemZ DAG Lowering Interface ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that SystemZ uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_ISELLOWERING_H
+#define LLVM_TARGET_SystemZ_ISELLOWERING_H
+
+#include "SystemZ.h"
+#include "SystemZRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+
+namespace llvm {
+  namespace SystemZISD {
+    enum {
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      /// Return with a flag operand. Operand 0 is the chain operand.
+      RET_FLAG,
+
+      /// CALL - These operations represent an abstract call
+      /// instruction, which includes a bunch of information.
+      CALL,
+
+      /// PCRelativeWrapper - PC relative address
+      PCRelativeWrapper,
+
+      /// CMP, UCMP - Compare instruction
+      CMP,
+      UCMP,
+
+      /// BRCOND - Conditional branch. Operand 0 is chain operand, operand 1 is
+      /// the block to branch if condition is true, operand 2 is condition code
+      /// and operand 3 is the flag operand produced by a CMP instruction.
+      BRCOND,
+
+      /// SELECT - Operands 0 and 1 are selection variables, operand 2 is
+      /// condition code and operand 3 is the flag operand.
+      SELECT
+    };
+  }
+
+  class SystemZSubtarget;
+  class SystemZTargetMachine;
+
+  class SystemZTargetLowering : public TargetLowering {
+  public:
+    explicit SystemZTargetLowering(SystemZTargetMachine &TM);
+
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; }
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// getTargetNodeName - This method returns the name of a target specific
+    /// DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const {
+      return 1;
+    }
+
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
+    TargetLowering::ConstraintType
+    getConstraintType(const std::string &Constraint) const;
+
+    SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue EmitCmp(SDValue LHS, SDValue RHS,
+                    ISD::CondCode CC, SDValue &SystemZCC,
+                    SelectionDAG &DAG) const;
+
+
+    MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                   MachineBasicBlock *BB) const;
+
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+  private:
+    SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           bool isTailCall,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<SDValue> &OutVals,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    SDValue LowerCCCArguments(SDValue Chain,
+                              CallingConv::ID CallConv,
+                              bool isVarArg,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              DebugLoc dl,
+                              SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals) const;
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    const SystemZSubtarget &Subtarget;
+    const SystemZTargetMachine &TM;
+    const SystemZRegisterInfo *RegInfo;
+  };
+} // namespace llvm
+
+#endif // LLVM_TARGET_SystemZ_ISELLOWERING_H
diff --git a/final/lib/Target/SystemZ/SystemZInstrBuilder.h b/final/lib/Target/SystemZ/SystemZInstrBuilder.h
new file mode 100644
index 00000000000..2f2ef08dece
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -0,0 +1,128 @@
+//===- SystemZInstrBuilder.h - Functions to aid building  insts -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle SystemZ'isms in a clean way.
+//
+// The BuildMem function may be used with the BuildMI function to add entire
+// memory references in a single, typed, function call.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Base, Displacement, Index.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZINSTRBUILDER_H
+#define SYSTEMZINSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+namespace llvm {
+
+/// SystemZAddressMode - This struct holds a generalized full x86 address mode.
+/// The base register can be a frame index, which will eventually be replaced
+/// with R15 or R11 and Disp being offsetted accordingly.
+struct SystemZAddressMode {
+  enum {
+    RegBase,
+    FrameIndexBase
+  } BaseType;
+
+  union {
+    unsigned Reg;
+    int FrameIndex;
+  } Base;
+
+  unsigned IndexReg;
+  int32_t Disp;
+  const GlobalValue *GV;
+
+  SystemZAddressMode() : BaseType(RegBase), IndexReg(0), Disp(0) {
+    Base.Reg = 0;
+  }
+};
+
+/// addDirectMem - This function is used to add a direct memory reference to the
+/// current instruction -- that is, a dereference of an address in a register,
+/// with no index or displacement.
+///
+static inline const MachineInstrBuilder &
+addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
+  // Because memory references are always represented with 3
+  // values, this adds: Reg, [0, NoReg] to the instruction.
+  return MIB.addReg(Reg).addImm(0).addReg(0);
+}
+
+static inline const MachineInstrBuilder &
+addOffset(const MachineInstrBuilder &MIB, int Offset) {
+  return MIB.addImm(Offset).addReg(0);
+}
+
+/// addRegOffset - This function is used to add a memory reference of the form
+/// [Reg + Offset], i.e., one with no or index, but with a
+/// displacement. An example is: 10(%r15).
+///
+static inline const MachineInstrBuilder &
+addRegOffset(const MachineInstrBuilder &MIB,
+             unsigned Reg, bool isKill, int Offset) {
+  return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
+}
+
+/// addRegReg - This function is used to add a memory reference of the form:
+/// [Reg + Reg].
+static inline const MachineInstrBuilder &
+addRegReg(const MachineInstrBuilder &MIB,
+            unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2) {
+  return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(0)
+    .addReg(Reg2, getKillRegState(isKill2));
+}
+
+static inline const MachineInstrBuilder &
+addFullAddress(const MachineInstrBuilder &MIB, const SystemZAddressMode &AM) {
+  if (AM.BaseType == SystemZAddressMode::RegBase)
+    MIB.addReg(AM.Base.Reg);
+  else if (AM.BaseType == SystemZAddressMode::FrameIndexBase)
+    MIB.addFrameIndex(AM.Base.FrameIndex);
+  else
+    assert(0);
+
+  return MIB.addImm(AM.Disp).addReg(AM.IndexReg);
+}
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function.  This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
+  MachineInstr *MI = MIB;
+  MachineFunction &MF = *MI->getParent()->getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned Flags = 0;
+  if (TID.mayLoad())
+    Flags |= MachineMemOperand::MOLoad;
+  if (TID.mayStore())
+    Flags |= MachineMemOperand::MOStore;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo(
+                                PseudoSourceValue::getFixedStack(FI), Offset),
+                            Flags, MFI.getObjectSize(FI),
+                            MFI.getObjectAlignment(FI));
+  return addOffset(MIB.addFrameIndex(FI), Offset)
+            .addMemOperand(MMO);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/SystemZ/SystemZInstrFP.td b/final/lib/Target/SystemZ/SystemZInstrFP.td
new file mode 100644
index 00000000000..a65828061d3
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZInstrFP.td
@@ -0,0 +1,340 @@
+//===- SystemZInstrFP.td - SystemZ FP Instruction defs --------*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the SystemZ (binary) floating point instructions in 
+// TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: multiclassify!
+
+//===----------------------------------------------------------------------===//
+// FP Pattern fragments
+
+def fpimm0 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimmneg0 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(-0.0);
+}]>;
+
+let Uses = [PSW], usesCustomInserter = 1 in {
+  def SelectF32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, i8imm:$cc),
+                        "# SelectF32 PSEUDO",
+                        [(set FP32:$dst,
+                              (SystemZselect FP32:$src1, FP32:$src2, imm:$cc, PSW))]>;
+  def SelectF64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, i8imm:$cc),
+                        "# SelectF64 PSEUDO",
+                        [(set FP64:$dst,
+                              (SystemZselect FP64:$src1, FP64:$src2, imm:$cc, PSW))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+// Floating point constant loads.
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def LD_Fp032 : Pseudo<(outs FP32:$dst), (ins),
+                      "lzer\t{$dst}",
+                      [(set FP32:$dst, fpimm0)]>;
+def LD_Fp064 : Pseudo<(outs FP64:$dst), (ins),
+                      "lzdr\t{$dst}",
+                      [(set FP64:$dst, fpimm0)]>;
+}
+
+let neverHasSideEffects = 1 in {
+def FMOV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+                      "ler\t{$dst, $src}",
+                      []>;
+def FMOV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+                      "ldr\t{$dst, $src}",
+                      []>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def FMOV32rm  : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
+                      "le\t{$dst, $src}",
+                      [(set FP32:$dst, (load rriaddr12:$src))]>;
+def FMOV32rmy : Pseudo<(outs FP32:$dst), (ins rriaddr:$src),
+                      "ley\t{$dst, $src}",
+                      [(set FP32:$dst, (load rriaddr:$src))]>;
+def FMOV64rm  : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
+                      "ld\t{$dst, $src}",
+                      [(set FP64:$dst, (load rriaddr12:$src))]>;
+def FMOV64rmy : Pseudo<(outs FP64:$dst), (ins rriaddr:$src),
+                      "ldy\t{$dst, $src}",
+                      [(set FP64:$dst, (load rriaddr:$src))]>;
+}
+
+def FMOV32mr  : Pseudo<(outs), (ins rriaddr12:$dst, FP32:$src),
+                       "ste\t{$src, $dst}",
+                       [(store FP32:$src, rriaddr12:$dst)]>;
+def FMOV32mry : Pseudo<(outs), (ins rriaddr:$dst, FP32:$src),
+                       "stey\t{$src, $dst}",
+                       [(store FP32:$src, rriaddr:$dst)]>;
+def FMOV64mr  : Pseudo<(outs), (ins rriaddr12:$dst, FP64:$src),
+                       "std\t{$src, $dst}",
+                       [(store FP64:$src, rriaddr12:$dst)]>;
+def FMOV64mry : Pseudo<(outs), (ins rriaddr:$dst, FP64:$src),
+                       "stdy\t{$src, $dst}",
+                       [(store FP64:$src, rriaddr:$dst)]>;
+
+def FCOPYSIGN32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+                         "cpsdr\t{$dst, $src2, $src1}",
+                         [(set FP32:$dst, (fcopysign FP32:$src1, FP32:$src2))]>;
+def FCOPYSIGN64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+                         "cpsdr\t{$dst, $src2, $src1}",
+                         [(set FP64:$dst, (fcopysign FP64:$src1, FP64:$src2))]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+
+let Defs = [PSW] in {
+def FNEG32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+                       "lcebr\t{$dst, $src}",
+                       [(set FP32:$dst, (fneg FP32:$src)),
+                        (implicit PSW)]>;
+def FNEG64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+                       "lcdbr\t{$dst, $src}",
+                       [(set FP64:$dst, (fneg FP64:$src)),
+                        (implicit PSW)]>;
+
+def FABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+                       "lpebr\t{$dst, $src}",
+                       [(set FP32:$dst, (fabs FP32:$src)),
+                        (implicit PSW)]>;
+def FABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+                       "lpdbr\t{$dst, $src}",
+                       [(set FP64:$dst, (fabs FP64:$src)),
+                        (implicit PSW)]>;
+
+def FNABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+                       "lnebr\t{$dst, $src}",
+                       [(set FP32:$dst, (fneg(fabs FP32:$src))),
+                        (implicit PSW)]>;
+def FNABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+                       "lndbr\t{$dst, $src}",
+                       [(set FP64:$dst, (fneg(fabs FP64:$src))),
+                        (implicit PSW)]>;
+}
+
+let Constraints = "$src1 = $dst" in {
+let Defs = [PSW] in {
+let isCommutable = 1 in { // X = ADD Y, Z  == X = ADD Z, Y
+def FADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+                       "aebr\t{$dst, $src2}",
+                       [(set FP32:$dst, (fadd FP32:$src1, FP32:$src2)),
+                        (implicit PSW)]>;
+def FADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+                       "adbr\t{$dst, $src2}",
+                       [(set FP64:$dst, (fadd FP64:$src1, FP64:$src2)),
+                        (implicit PSW)]>;
+}
+
+def FADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+                       "aeb\t{$dst, $src2}",
+                       [(set FP32:$dst, (fadd FP32:$src1, (load rriaddr12:$src2))),
+                        (implicit PSW)]>;
+def FADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+                       "adb\t{$dst, $src2}",
+                       [(set FP64:$dst, (fadd FP64:$src1, (load rriaddr12:$src2))),
+                        (implicit PSW)]>;
+
+def FSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+                       "sebr\t{$dst, $src2}",
+                       [(set FP32:$dst, (fsub FP32:$src1, FP32:$src2)),
+                        (implicit PSW)]>;
+def FSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+                       "sdbr\t{$dst, $src2}",
+                       [(set FP64:$dst, (fsub FP64:$src1, FP64:$src2)),
+                        (implicit PSW)]>;
+
+def FSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+                       "seb\t{$dst, $src2}",
+                       [(set FP32:$dst, (fsub FP32:$src1, (load rriaddr12:$src2))),
+                        (implicit PSW)]>;
+def FSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+                       "sdb\t{$dst, $src2}",
+                       [(set FP64:$dst, (fsub FP64:$src1, (load rriaddr12:$src2))),
+                        (implicit PSW)]>;
+} // Defs = [PSW]
+
+let isCommutable = 1 in { // X = MUL Y, Z  == X = MUL Z, Y
+def FMUL32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+                       "meebr\t{$dst, $src2}",
+                       [(set FP32:$dst, (fmul FP32:$src1, FP32:$src2))]>;
+def FMUL64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+                       "mdbr\t{$dst, $src2}",
+                       [(set FP64:$dst, (fmul FP64:$src1, FP64:$src2))]>;
+}
+
+def FMUL32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+                       "meeb\t{$dst, $src2}",
+                       [(set FP32:$dst, (fmul FP32:$src1, (load rriaddr12:$src2)))]>;
+def FMUL64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+                       "mdb\t{$dst, $src2}",
+                       [(set FP64:$dst, (fmul FP64:$src1, (load rriaddr12:$src2)))]>;
+
+def FMADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3),
+                       "maebr\t{$dst, $src3, $src2}",
+                       [(set FP32:$dst, (fadd (fmul FP32:$src2, FP32:$src3),
+                                              FP32:$src1))]>;
+def FMADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3),
+                       "maeb\t{$dst, $src3, $src2}",
+                       [(set FP32:$dst, (fadd (fmul (load rriaddr12:$src2),
+                                                     FP32:$src3),
+                                              FP32:$src1))]>;
+
+def FMADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3),
+                       "madbr\t{$dst, $src3, $src2}",
+                       [(set FP64:$dst, (fadd (fmul FP64:$src2, FP64:$src3),
+                                              FP64:$src1))]>;
+def FMADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3),
+                       "madb\t{$dst, $src3, $src2}",
+                       [(set FP64:$dst, (fadd (fmul (load rriaddr12:$src2),
+                                                     FP64:$src3),
+                                              FP64:$src1))]>;
+
+def FMSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3),
+                       "msebr\t{$dst, $src3, $src2}",
+                       [(set FP32:$dst, (fsub (fmul FP32:$src2, FP32:$src3),
+                                              FP32:$src1))]>;
+def FMSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3),
+                       "mseb\t{$dst, $src3, $src2}",
+                       [(set FP32:$dst, (fsub (fmul (load rriaddr12:$src2),
+                                                     FP32:$src3),
+                                              FP32:$src1))]>;
+
+def FMSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3),
+                       "msdbr\t{$dst, $src3, $src2}",
+                       [(set FP64:$dst, (fsub (fmul FP64:$src2, FP64:$src3),
+                                              FP64:$src1))]>;
+def FMSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3),
+                       "msdb\t{$dst, $src3, $src2}",
+                       [(set FP64:$dst, (fsub (fmul (load rriaddr12:$src2),
+                                                     FP64:$src3),
+                                              FP64:$src1))]>;
+
+def FDIV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
+                       "debr\t{$dst, $src2}",
+                       [(set FP32:$dst, (fdiv FP32:$src1, FP32:$src2))]>;
+def FDIV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2),
+                       "ddbr\t{$dst, $src2}",
+                       [(set FP64:$dst, (fdiv FP64:$src1, FP64:$src2))]>;
+
+def FDIV32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2),
+                       "deb\t{$dst, $src2}",
+                       [(set FP32:$dst, (fdiv FP32:$src1, (load rriaddr12:$src2)))]>;
+def FDIV64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
+                       "ddb\t{$dst, $src2}",
+                       [(set FP64:$dst, (fdiv FP64:$src1, (load rriaddr12:$src2)))]>;
+
+} // Constraints = "$src1 = $dst"
+
+def FSQRT32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
+                       "sqebr\t{$dst, $src}",
+                       [(set FP32:$dst, (fsqrt FP32:$src))]>;
+def FSQRT64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
+                       "sqdbr\t{$dst, $src}",
+                       [(set FP64:$dst, (fsqrt FP64:$src))]>;
+
+def FSQRT32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
+                       "sqeb\t{$dst, $src}",
+                       [(set FP32:$dst, (fsqrt (load rriaddr12:$src)))]>;
+def FSQRT64rm : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
+                       "sqdb\t{$dst, $src}",
+                       [(set FP64:$dst, (fsqrt (load rriaddr12:$src)))]>;
+
+def FROUND64r32 : Pseudo<(outs FP32:$dst), (ins FP64:$src),
+                         "ledbr\t{$dst, $src}",
+                         [(set FP32:$dst, (fround FP64:$src))]>;
+
+def FEXT32r64   : Pseudo<(outs FP64:$dst), (ins FP32:$src),
+                         "ldebr\t{$dst, $src}",
+                         [(set FP64:$dst, (fextend FP32:$src))]>;
+def FEXT32m64   : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src),
+                         "ldeb\t{$dst, $src}",
+                         [(set FP64:$dst, (fextend (load rriaddr12:$src)))]>;
+
+let Defs = [PSW] in {
+def FCONVFP32   : Pseudo<(outs FP32:$dst), (ins GR32:$src),
+                         "cefbr\t{$dst, $src}",
+                         [(set FP32:$dst, (sint_to_fp GR32:$src)),
+                          (implicit PSW)]>;
+def FCONVFP32r64: Pseudo<(outs FP32:$dst), (ins GR64:$src),
+                         "cegbr\t{$dst, $src}",
+                         [(set FP32:$dst, (sint_to_fp GR64:$src)),
+                          (implicit PSW)]>;
+
+def FCONVFP64r32: Pseudo<(outs FP64:$dst), (ins GR32:$src),
+                         "cdfbr\t{$dst, $src}",
+                         [(set FP64:$dst, (sint_to_fp GR32:$src)),
+                          (implicit PSW)]>;
+def FCONVFP64   : Pseudo<(outs FP64:$dst), (ins GR64:$src),
+                         "cdgbr\t{$dst, $src}",
+                         [(set FP64:$dst, (sint_to_fp GR64:$src)),
+                          (implicit PSW)]>;
+
+def FCONVGR32   : Pseudo<(outs GR32:$dst), (ins FP32:$src),
+                         "cfebr\t{$dst, 5, $src}",
+                         [(set GR32:$dst, (fp_to_sint FP32:$src)),
+                          (implicit PSW)]>;
+def FCONVGR32r64: Pseudo<(outs GR32:$dst), (ins FP64:$src),
+                         "cfdbr\t{$dst, 5, $src}",
+                         [(set GR32:$dst, (fp_to_sint FP64:$src)),
+                          (implicit PSW)]>;
+
+def FCONVGR64r32: Pseudo<(outs GR64:$dst), (ins FP32:$src),
+                         "cgebr\t{$dst, 5, $src}",
+                         [(set GR64:$dst, (fp_to_sint FP32:$src)),
+                          (implicit PSW)]>;
+def FCONVGR64   : Pseudo<(outs GR64:$dst), (ins FP64:$src),
+                         "cgdbr\t{$dst, 5, $src}",
+                         [(set GR64:$dst, (fp_to_sint FP64:$src)),
+                          (implicit PSW)]>;
+} // Defs = [PSW]
+
+def FBCONVG64   : Pseudo<(outs GR64:$dst), (ins FP64:$src),
+                         "lgdr\t{$dst, $src}",
+                         [(set GR64:$dst, (bitconvert FP64:$src))]>;
+def FBCONVF64   : Pseudo<(outs FP64:$dst), (ins GR64:$src),
+                         "ldgr\t{$dst, $src}",
+                         [(set FP64:$dst, (bitconvert GR64:$src))]>;
+
+//===----------------------------------------------------------------------===//
+// Test instructions (like AND but do not produce any result)
+
+// Integer comparisons
+let Defs = [PSW] in {
+def FCMP32rr : Pseudo<(outs), (ins FP32:$src1, FP32:$src2),
+                      "cebr\t$src1, $src2",
+                      [(set PSW, (SystemZcmp FP32:$src1, FP32:$src2))]>;
+def FCMP64rr : Pseudo<(outs), (ins FP64:$src1, FP64:$src2),
+                      "cdbr\t$src1, $src2",
+                      [(set PSW, (SystemZcmp FP64:$src1, FP64:$src2))]>;
+
+def FCMP32rm : Pseudo<(outs), (ins FP32:$src1, rriaddr12:$src2),
+                      "ceb\t$src1, $src2",
+                      [(set PSW, (SystemZcmp FP32:$src1,
+                                             (load rriaddr12:$src2)))]>;
+def FCMP64rm : Pseudo<(outs), (ins FP64:$src1, rriaddr12:$src2),
+                      "cdb\t$src1, $src2",
+                      [(set PSW, (SystemZcmp FP64:$src1,
+                                             (load rriaddr12:$src2)))]>;
+} // Defs = [PSW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Floating point constant -0.0
+def : Pat<(f32 fpimmneg0), (FNEG32rr (LD_Fp032))>;
+def : Pat<(f64 fpimmneg0), (FNEG64rr (LD_Fp064))>;
diff --git a/final/lib/Target/SystemZ/SystemZInstrFormats.td b/final/lib/Target/SystemZ/SystemZInstrFormats.td
new file mode 100644
index 00000000000..b4a8993c197
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -0,0 +1,133 @@
+//===- SystemZInstrFormats.td - SystemZ Instruction Formats ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+// Format specifies the encoding used by the instruction.  This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<5> val> {
+  bits<5> Value = val;
+}
+
+def Pseudo   : Format<0>;
+def EForm    : Format<1>;
+def IForm    : Format<2>;
+def RIForm   : Format<3>;
+def RIEForm  : Format<4>;
+def RILForm  : Format<5>;
+def RISForm  : Format<6>;
+def RRForm   : Format<7>;
+def RREForm  : Format<8>;
+def RRFForm  : Format<9>;
+def RRRForm  : Format<10>;
+def RRSForm  : Format<11>;
+def RSForm   : Format<12>;
+def RSIForm  : Format<13>;
+def RSILForm : Format<14>;
+def RSYForm  : Format<15>;
+def RXForm   : Format<16>;
+def RXEForm  : Format<17>;
+def RXFForm  : Format<18>;
+def RXYForm  : Format<19>;
+def SForm    : Format<20>;
+def SIForm   : Format<21>;
+def SILForm  : Format<22>;
+def SIYForm  : Format<23>;
+def SSForm   : Format<24>;
+def SSEForm  : Format<25>;
+def SSFForm  : Format<26>;
+
+class InstSystemZ<bits<16> op, Format f, dag outs, dag ins> : Instruction {
+  let Namespace = "SystemZ";
+
+  bits<16> Opcode = op;
+
+  Format Form = f;
+  bits<5> FormBits = Form.Value;
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+}
+
+class I8<bits<8> op, Format f, dag outs, dag ins, string asmstr, 
+         list<dag> pattern> 
+  : InstSystemZ<0, f, outs, ins> {
+  let Opcode{0-7} = op;
+  let Opcode{8-15} = 0;
+
+  let Pattern = pattern;
+  let AsmString = asmstr;
+}
+
+class I12<bits<12> op, Format f, dag outs, dag ins, string asmstr, 
+         list<dag> pattern> 
+  : InstSystemZ<0, f, outs, ins> {
+  let Opcode{0-11} = op;
+  let Opcode{12-15} = 0;
+
+  let Pattern = pattern;
+  let AsmString = asmstr;
+}
+
+class I16<bits<16> op, Format f, dag outs, dag ins, string asmstr,
+         list<dag> pattern>
+  : InstSystemZ<op, f, outs, ins> {
+  let Pattern = pattern;
+  let AsmString = asmstr;
+}
+
+class RRI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I8<op, RRForm, outs, ins, asmstr, pattern>;
+
+class RII<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I12<op, RIForm, outs, ins, asmstr, pattern>;
+
+class RILI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I12<op, RILForm, outs, ins, asmstr, pattern>;
+
+class RREI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I16<op, RREForm, outs, ins, asmstr, pattern>;
+
+class RXI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I8<op, RXForm, outs, ins, asmstr, pattern> {
+  let AddedComplexity = 1;
+}
+
+class RXYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I16<op, RXYForm, outs, ins, asmstr, pattern>;
+
+class RSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I8<op, RSForm, outs, ins, asmstr, pattern> {
+  let AddedComplexity = 1;
+}
+
+class RSYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I16<op, RSYForm, outs, ins, asmstr, pattern>;
+
+class SII<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I8<op, SIForm, outs, ins, asmstr, pattern> {
+  let AddedComplexity = 1;
+}
+
+class SIYI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I16<op, SIYForm, outs, ins, asmstr, pattern>;
+
+class SILI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : I16<op, SILForm, outs, ins, asmstr, pattern>;
+
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : InstSystemZ<0, Pseudo, outs, ins> {
+
+  let Pattern = pattern;
+  let AsmString = asmstr;
+}
diff --git a/final/lib/Target/SystemZ/SystemZInstrInfo.cpp b/final/lib/Target/SystemZ/SystemZInstrInfo.cpp
new file mode 100644
index 00000000000..be5280323c3
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -0,0 +1,435 @@
+//===- SystemZInstrInfo.cpp - SystemZ Instruction Information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrBuilder.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZTargetMachine.h"
+#include "SystemZGenInstrInfo.inc"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
+  : TargetInstrInfoImpl(SystemZInsts, array_lengthof(SystemZInsts)),
+    RI(tm, *this), TM(tm) {
+}
+
+/// isGVStub - Return true if the GV requires an extra load to get the
+/// real address.
+static inline bool isGVStub(GlobalValue *GV, SystemZTargetMachine &TM) {
+  return TM.getSubtarget<SystemZSubtarget>().GVRequiresExtraLoad(GV, TM, false);
+}
+
+void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator MI,
+                                    unsigned SrcReg, bool isKill, int FrameIdx,
+                                           const TargetRegisterClass *RC,
+                                           const TargetRegisterInfo *TRI) const {
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  unsigned Opc = 0;
+  if (RC == &SystemZ::GR32RegClass ||
+      RC == &SystemZ::ADDR32RegClass)
+    Opc = SystemZ::MOV32mr;
+  else if (RC == &SystemZ::GR64RegClass ||
+           RC == &SystemZ::ADDR64RegClass) {
+    Opc = SystemZ::MOV64mr;
+  } else if (RC == &SystemZ::FP32RegClass) {
+    Opc = SystemZ::FMOV32mr;
+  } else if (RC == &SystemZ::FP64RegClass) {
+    Opc = SystemZ::FMOV64mr;
+  } else if (RC == &SystemZ::GR64PRegClass) {
+    Opc = SystemZ::MOV64Pmr;
+  } else if (RC == &SystemZ::GR128RegClass) {
+    Opc = SystemZ::MOV128mr;
+  } else
+    llvm_unreachable("Unsupported regclass to store");
+
+  addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
+    .addReg(SrcReg, getKillRegState(isKill));
+}
+
+void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MI,
+                                           unsigned DestReg, int FrameIdx,
+                                            const TargetRegisterClass *RC,
+                                            const TargetRegisterInfo *TRI) const{
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  unsigned Opc = 0;
+  if (RC == &SystemZ::GR32RegClass ||
+      RC == &SystemZ::ADDR32RegClass)
+    Opc = SystemZ::MOV32rm;
+  else if (RC == &SystemZ::GR64RegClass ||
+           RC == &SystemZ::ADDR64RegClass) {
+    Opc = SystemZ::MOV64rm;
+  } else if (RC == &SystemZ::FP32RegClass) {
+    Opc = SystemZ::FMOV32rm;
+  } else if (RC == &SystemZ::FP64RegClass) {
+    Opc = SystemZ::FMOV64rm;
+  } else if (RC == &SystemZ::GR64PRegClass) {
+    Opc = SystemZ::MOV64Prm;
+  } else if (RC == &SystemZ::GR128RegClass) {
+    Opc = SystemZ::MOV128rm;
+  } else
+    llvm_unreachable("Unsupported regclass to load");
+
+  addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
+}
+
+void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator I, DebugLoc DL,
+                                   unsigned DestReg, unsigned SrcReg,
+                                   bool KillSrc) const {
+  unsigned Opc;
+  if (SystemZ::GR64RegClass.contains(DestReg, SrcReg))
+    Opc = SystemZ::MOV64rr;
+  else if (SystemZ::GR32RegClass.contains(DestReg, SrcReg))
+    Opc = SystemZ::MOV32rr;
+  else if (SystemZ::GR64PRegClass.contains(DestReg, SrcReg))
+    Opc = SystemZ::MOV64rrP;
+  else if (SystemZ::GR128RegClass.contains(DestReg, SrcReg))
+    Opc = SystemZ::MOV128rr;
+  else if (SystemZ::FP32RegClass.contains(DestReg, SrcReg))
+    Opc = SystemZ::FMOV32rr;
+  else if (SystemZ::FP64RegClass.contains(DestReg, SrcReg))
+    Opc = SystemZ::FMOV64rr;
+  else
+    llvm_unreachable("Impossible reg-to-reg copy");
+
+  BuildMI(MBB, I, DL, get(Opc), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc));
+}
+
+unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                               int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case SystemZ::MOV32rm:
+  case SystemZ::MOV32rmy:
+  case SystemZ::MOV64rm:
+  case SystemZ::MOVSX32rm8:
+  case SystemZ::MOVSX32rm16y:
+  case SystemZ::MOVSX64rm8:
+  case SystemZ::MOVSX64rm16:
+  case SystemZ::MOVSX64rm32:
+  case SystemZ::MOVZX32rm8:
+  case SystemZ::MOVZX32rm16:
+  case SystemZ::MOVZX64rm8:
+  case SystemZ::MOVZX64rm16:
+  case SystemZ::MOVZX64rm32:
+  case SystemZ::FMOV32rm:
+  case SystemZ::FMOV32rmy:
+  case SystemZ::FMOV64rm:
+  case SystemZ::FMOV64rmy:
+  case SystemZ::MOV64Prm:
+  case SystemZ::MOV64Prmy:
+  case SystemZ::MOV128rm:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() && MI->getOperand(3).isReg() &&
+        MI->getOperand(2).getImm() == 0 && MI->getOperand(3).getReg() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                              int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  case SystemZ::MOV32mr:
+  case SystemZ::MOV32mry:
+  case SystemZ::MOV64mr:
+  case SystemZ::MOV32m8r:
+  case SystemZ::MOV32m8ry:
+  case SystemZ::MOV32m16r:
+  case SystemZ::MOV32m16ry:
+  case SystemZ::MOV64m8r:
+  case SystemZ::MOV64m8ry:
+  case SystemZ::MOV64m16r:
+  case SystemZ::MOV64m16ry:
+  case SystemZ::MOV64m32r:
+  case SystemZ::MOV64m32ry:
+  case SystemZ::FMOV32mr:
+  case SystemZ::FMOV32mry:
+  case SystemZ::FMOV64mr:
+  case SystemZ::FMOV64mry:
+  case SystemZ::MOV64Pmr:
+  case SystemZ::MOV64Pmry:
+  case SystemZ::MOV128mr:
+    if (MI->getOperand(0).isFI() &&
+        MI->getOperand(1).isImm() && MI->getOperand(2).isReg() &&
+        MI->getOperand(1).getImm() == 0 && MI->getOperand(2).getReg() == 0) {
+      FrameIndex = MI->getOperand(0).getIndex();
+      return MI->getOperand(3).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+bool SystemZInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 1 && "Invalid Xbranch condition!");
+
+  SystemZCC::CondCodes CC = static_cast<SystemZCC::CondCodes>(Cond[0].getImm());
+  Cond[0].setImm(getOppositeCondition(CC));
+  return false;
+}
+
+bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.isTerminator()) return false;
+
+  // Conditional branch is a special case.
+  if (TID.isBranch() && !TID.isBarrier())
+    return true;
+  if (!TID.isPredicable())
+    return true;
+  return !isPredicated(MI);
+}
+
+bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                     MachineBasicBlock *&TBB,
+                                     MachineBasicBlock *&FBB,
+                                     SmallVectorImpl<MachineOperand> &Cond,
+                                     bool AllowModify) const {
+  // Start from the bottom of the block and work up, examining the
+  // terminator instructions.
+  MachineBasicBlock::iterator I = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    // Working from the bottom, when we see a non-terminator
+    // instruction, we're done.
+    if (!isUnpredicatedTerminator(I))
+      break;
+
+    // A terminator that isn't a branch can't easily be handled
+    // by this analysis.
+    if (!I->getDesc().isBranch())
+      return true;
+
+    // Handle unconditional branches.
+    if (I->getOpcode() == SystemZ::JMP) {
+      if (!AllowModify) {
+        TBB = I->getOperand(0).getMBB();
+        continue;
+      }
+
+      // If the block has any instructions after a JMP, delete them.
+      while (llvm::next(I) != MBB.end())
+        llvm::next(I)->eraseFromParent();
+      Cond.clear();
+      FBB = 0;
+
+      // Delete the JMP if it's equivalent to a fall-through.
+      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+        TBB = 0;
+        I->eraseFromParent();
+        I = MBB.end();
+        continue;
+      }
+
+      // TBB is used to indicate the unconditinal destination.
+      TBB = I->getOperand(0).getMBB();
+      continue;
+    }
+
+    // Handle conditional branches.
+    SystemZCC::CondCodes BranchCode = getCondFromBranchOpc(I->getOpcode());
+    if (BranchCode == SystemZCC::INVALID)
+      return true;  // Can't handle indirect branch.
+
+    // Working from the bottom, handle the first conditional branch.
+    if (Cond.empty()) {
+      FBB = TBB;
+      TBB = I->getOperand(0).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(BranchCode));
+      continue;
+    }
+
+    // Handle subsequent conditional branches. Only handle the case where all
+    // conditional branches branch to the same destination.
+    assert(Cond.size() == 1);
+    assert(TBB);
+
+    // Only handle the case where all conditional branches branch to
+    // the same destination.
+    if (TBB != I->getOperand(0).getMBB())
+      return true;
+
+    SystemZCC::CondCodes OldBranchCode = (SystemZCC::CondCodes)Cond[0].getImm();
+    // If the conditions are the same, we can leave them alone.
+    if (OldBranchCode == BranchCode)
+      continue;
+
+    return true;
+  }
+
+  return false;
+}
+
+unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  unsigned Count = 0;
+
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    if (I->getOpcode() != SystemZ::JMP &&
+        getCondFromBranchOpc(I->getOpcode()) == SystemZCC::INVALID)
+      break;
+    // Remove the branch.
+    I->eraseFromParent();
+    I = MBB.end();
+    ++Count;
+  }
+
+  return Count;
+}
+
+unsigned
+SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                               MachineBasicBlock *FBB,
+                               const SmallVectorImpl<MachineOperand> &Cond,
+                               DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "SystemZ branch conditions have one component!");
+
+  if (Cond.empty()) {
+    // Unconditional branch?
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, DL, get(SystemZ::JMP)).addMBB(TBB);
+    return 1;
+  }
+
+  // Conditional branch.
+  unsigned Count = 0;
+  SystemZCC::CondCodes CC = (SystemZCC::CondCodes)Cond[0].getImm();
+  BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB);
+  ++Count;
+
+  if (FBB) {
+    // Two-way Conditional branch. Insert the second branch.
+    BuildMI(&MBB, DL, get(SystemZ::JMP)).addMBB(FBB);
+    ++Count;
+  }
+  return Count;
+}
+
+const TargetInstrDesc&
+SystemZInstrInfo::getBrCond(SystemZCC::CondCodes CC) const {
+  switch (CC) {
+  default:
+   llvm_unreachable("Unknown condition code!");
+  case SystemZCC::O:   return get(SystemZ::JO);
+  case SystemZCC::H:   return get(SystemZ::JH);
+  case SystemZCC::NLE: return get(SystemZ::JNLE);
+  case SystemZCC::L:   return get(SystemZ::JL);
+  case SystemZCC::NHE: return get(SystemZ::JNHE);
+  case SystemZCC::LH:  return get(SystemZ::JLH);
+  case SystemZCC::NE:  return get(SystemZ::JNE);
+  case SystemZCC::E:   return get(SystemZ::JE);
+  case SystemZCC::NLH: return get(SystemZ::JNLH);
+  case SystemZCC::HE:  return get(SystemZ::JHE);
+  case SystemZCC::NL:  return get(SystemZ::JNL);
+  case SystemZCC::LE:  return get(SystemZ::JLE);
+  case SystemZCC::NH:  return get(SystemZ::JNH);
+  case SystemZCC::NO:  return get(SystemZ::JNO);
+  }
+}
+
+SystemZCC::CondCodes
+SystemZInstrInfo::getCondFromBranchOpc(unsigned Opc) const {
+  switch (Opc) {
+  default:            return SystemZCC::INVALID;
+  case SystemZ::JO:   return SystemZCC::O;
+  case SystemZ::JH:   return SystemZCC::H;
+  case SystemZ::JNLE: return SystemZCC::NLE;
+  case SystemZ::JL:   return SystemZCC::L;
+  case SystemZ::JNHE: return SystemZCC::NHE;
+  case SystemZ::JLH:  return SystemZCC::LH;
+  case SystemZ::JNE:  return SystemZCC::NE;
+  case SystemZ::JE:   return SystemZCC::E;
+  case SystemZ::JNLH: return SystemZCC::NLH;
+  case SystemZ::JHE:  return SystemZCC::HE;
+  case SystemZ::JNL:  return SystemZCC::NL;
+  case SystemZ::JLE:  return SystemZCC::LE;
+  case SystemZ::JNH:  return SystemZCC::NH;
+  case SystemZ::JNO:  return SystemZCC::NO;
+  }
+}
+
+SystemZCC::CondCodes
+SystemZInstrInfo::getOppositeCondition(SystemZCC::CondCodes CC) const {
+  switch (CC) {
+  default:
+    llvm_unreachable("Invalid condition!");
+  case SystemZCC::O:   return SystemZCC::NO;
+  case SystemZCC::H:   return SystemZCC::NH;
+  case SystemZCC::NLE: return SystemZCC::LE;
+  case SystemZCC::L:   return SystemZCC::NL;
+  case SystemZCC::NHE: return SystemZCC::HE;
+  case SystemZCC::LH:  return SystemZCC::NLH;
+  case SystemZCC::NE:  return SystemZCC::E;
+  case SystemZCC::E:   return SystemZCC::NE;
+  case SystemZCC::NLH: return SystemZCC::LH;
+  case SystemZCC::HE:  return SystemZCC::NHE;
+  case SystemZCC::NL:  return SystemZCC::L;
+  case SystemZCC::LE:  return SystemZCC::NLE;
+  case SystemZCC::NH:  return SystemZCC::H;
+  case SystemZCC::NO:  return SystemZCC::O;
+  }
+}
+
+const TargetInstrDesc&
+SystemZInstrInfo::getLongDispOpc(unsigned Opc) const {
+  switch (Opc) {
+  default:
+    llvm_unreachable("Don't have long disp version of this instruction");
+  case SystemZ::MOV32mr:   return get(SystemZ::MOV32mry);
+  case SystemZ::MOV32rm:   return get(SystemZ::MOV32rmy);
+  case SystemZ::MOVSX32rm16: return get(SystemZ::MOVSX32rm16y);
+  case SystemZ::MOV32m8r:  return get(SystemZ::MOV32m8ry);
+  case SystemZ::MOV32m16r: return get(SystemZ::MOV32m16ry);
+  case SystemZ::MOV64m8r:  return get(SystemZ::MOV64m8ry);
+  case SystemZ::MOV64m16r: return get(SystemZ::MOV64m16ry);
+  case SystemZ::MOV64m32r: return get(SystemZ::MOV64m32ry);
+  case SystemZ::MOV8mi:    return get(SystemZ::MOV8miy);
+  case SystemZ::MUL32rm:   return get(SystemZ::MUL32rmy);
+  case SystemZ::CMP32rm:   return get(SystemZ::CMP32rmy);
+  case SystemZ::UCMP32rm:  return get(SystemZ::UCMP32rmy);
+  case SystemZ::FMOV32mr:  return get(SystemZ::FMOV32mry);
+  case SystemZ::FMOV64mr:  return get(SystemZ::FMOV64mry);
+  case SystemZ::FMOV32rm:  return get(SystemZ::FMOV32rmy);
+  case SystemZ::FMOV64rm:  return get(SystemZ::FMOV64rmy);
+  case SystemZ::MOV64Pmr:  return get(SystemZ::MOV64Pmry);
+  case SystemZ::MOV64Prm:  return get(SystemZ::MOV64Prmy);
+  }
+}
diff --git a/final/lib/Target/SystemZ/SystemZInstrInfo.h b/final/lib/Target/SystemZ/SystemZInstrInfo.h
new file mode 100644
index 00000000000..6cb72001020
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -0,0 +1,110 @@
+//===- SystemZInstrInfo.h - SystemZ Instruction Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SYSTEMZINSTRINFO_H
+#define LLVM_TARGET_SYSTEMZINSTRINFO_H
+
+#include "SystemZ.h"
+#include "SystemZRegisterInfo.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+namespace llvm {
+
+class SystemZTargetMachine;
+
+/// SystemZII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace SystemZII {
+  enum {
+    //===------------------------------------------------------------------===//
+    // SystemZ Specific MachineOperand flags.
+
+    MO_NO_FLAG = 0,
+
+    /// MO_GOTENT - On a symbol operand this indicates that the immediate is
+    /// the offset to the location of the symbol name from the base of the GOT.
+    ///
+    ///    SYMBOL_LABEL @GOTENT
+    MO_GOTENT = 1,
+
+    /// MO_PLT - On a symbol operand this indicates that the immediate is
+    /// offset to the PLT entry of symbol name from the current code location.
+    ///
+    ///    SYMBOL_LABEL @PLT
+    MO_PLT = 2
+  };
+}
+
+class SystemZInstrInfo : public TargetInstrInfoImpl {
+  const SystemZRegisterInfo RI;
+  SystemZTargetMachine &TM;
+public:
+  explicit SystemZInstrInfo(SystemZTargetMachine &TM);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+
+  unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+  unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   unsigned SrcReg, bool isKill,
+                                   int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned DestReg, int FrameIdx,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+  virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB,
+                             MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+  SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const;
+  SystemZCC::CondCodes getCondFromBranchOpc(unsigned Opc) const;
+  const TargetInstrDesc& getBrCond(SystemZCC::CondCodes CC) const;
+  const TargetInstrDesc& getLongDispOpc(unsigned Opc) const;
+
+  const TargetInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const {
+    if (Offset < 0 || Offset >= 4096)
+      return getLongDispOpc(Opc);
+    else
+      return get(Opc);
+  }
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/SystemZ/SystemZInstrInfo.td b/final/lib/Target/SystemZ/SystemZInstrInfo.td
new file mode 100644
index 00000000000..11a39fcd023
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -0,0 +1,1147 @@
+//===- SystemZInstrInfo.td - SystemZ Instruction defs ---------*- tblgen-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the SystemZ instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SystemZ Instruction Predicate Definitions.
+def IsZ10 : Predicate<"Subtarget.isZ10()">;
+
+include "SystemZInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Type Constraints.
+//===----------------------------------------------------------------------===//
+class SDTCisI8<int OpNum> : SDTCisVT<OpNum, i8>;
+class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
+class SDTCisI32<int OpNum> : SDTCisVT<OpNum, i32>;
+class SDTCisI64<int OpNum> : SDTCisVT<OpNum, i64>;
+
+//===----------------------------------------------------------------------===//
+// Type Profiles.
+//===----------------------------------------------------------------------===//
+def SDT_SystemZCall         : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_SystemZCallSeqStart : SDCallSeqStart<[SDTCisI64<0>]>;
+def SDT_SystemZCallSeqEnd   : SDCallSeqEnd<[SDTCisI64<0>, SDTCisI64<1>]>;
+def SDT_CmpTest             : SDTypeProfile<1, 2, [SDTCisI64<0>,
+                                                   SDTCisSameAs<1, 2>]>;
+def SDT_BrCond              : SDTypeProfile<0, 3,
+                                           [SDTCisVT<0, OtherVT>,
+                                            SDTCisI8<1>, SDTCisVT<2, i64>]>;
+def SDT_SelectCC            : SDTypeProfile<1, 4,
+                                           [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+                                            SDTCisI8<3>, SDTCisVT<4, i64>]>;
+def SDT_Address             : SDTypeProfile<1, 1,
+                                            [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+//===----------------------------------------------------------------------===//
+// SystemZ Specific Node Definitions.
+//===----------------------------------------------------------------------===//
+def SystemZretflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
+                     [SDNPHasChain, SDNPOptInGlue]>;
+def SystemZcall    : SDNode<"SystemZISD::CALL", SDT_SystemZCall,
+                     [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>;
+def SystemZcallseq_start :
+                 SDNode<"ISD::CALLSEQ_START", SDT_SystemZCallSeqStart,
+                        [SDNPHasChain, SDNPOutGlue]>;
+def SystemZcallseq_end :
+                 SDNode<"ISD::CALLSEQ_END",   SDT_SystemZCallSeqEnd,
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+def SystemZcmp     : SDNode<"SystemZISD::CMP", SDT_CmpTest>;
+def SystemZucmp    : SDNode<"SystemZISD::UCMP", SDT_CmpTest>;
+def SystemZbrcond  : SDNode<"SystemZISD::BRCOND", SDT_BrCond,
+                            [SDNPHasChain]>;
+def SystemZselect  : SDNode<"SystemZISD::SELECT", SDT_SelectCC>;
+def SystemZpcrelwrapper : SDNode<"SystemZISD::PCRelativeWrapper", SDT_Address, []>;
+
+
+include "SystemZOperands.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction list..
+
+def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt),
+                              "#ADJCALLSTACKDOWN",
+                              [(SystemZcallseq_start timm:$amt)]>;
+def ADJCALLSTACKUP   : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+                              "#ADJCALLSTACKUP",
+                              [(SystemZcallseq_end timm:$amt1, timm:$amt2)]>;
+
+let Uses = [PSW], usesCustomInserter = 1 in {
+  def Select32 : Pseudo<(outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cc),
+                        "# Select32 PSEUDO",
+                        [(set GR32:$dst,
+                              (SystemZselect GR32:$src1, GR32:$src2, imm:$cc, PSW))]>;
+  def Select64 : Pseudo<(outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$cc),
+                        "# Select64 PSEUDO",
+                        [(set GR64:$dst,
+                              (SystemZselect GR64:$src1, GR64:$src2, imm:$cc, PSW))]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions...
+//
+
+// FIXME: Provide proper encoding!
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+  def RET : Pseudo<(outs), (ins), "br\t%r14", [(SystemZretflag)]>;
+}
+
+let isBranch = 1, isTerminator = 1 in {
+  let isBarrier = 1 in {
+    def JMP  : Pseudo<(outs), (ins brtarget:$dst), "j\t{$dst}", [(br bb:$dst)]>;
+
+    let isIndirectBranch = 1 in
+      def JMPr   : Pseudo<(outs), (ins GR64:$dst), "br\t{$dst}", [(brind GR64:$dst)]>;
+  }
+
+  let Uses = [PSW] in {
+    def JO  : Pseudo<(outs), (ins brtarget:$dst),
+                     "jo\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_O, PSW)]>;
+    def JH  : Pseudo<(outs), (ins brtarget:$dst),
+                     "jh\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_H, PSW)]>;
+    def JNLE: Pseudo<(outs), (ins brtarget:$dst),
+                     "jnle\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLE, PSW)]>;
+    def JL  : Pseudo<(outs), (ins brtarget:$dst),
+                     "jl\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_L, PSW)]>;
+    def JNHE: Pseudo<(outs), (ins brtarget:$dst),
+                     "jnhe\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NHE, PSW)]>;
+    def JLH : Pseudo<(outs), (ins brtarget:$dst),
+                     "jlh\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LH, PSW)]>;
+    def JNE : Pseudo<(outs), (ins brtarget:$dst),
+                     "jne\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NE, PSW)]>;
+    def JE  : Pseudo<(outs), (ins brtarget:$dst),
+                     "je\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_E, PSW)]>;
+    def JNLH: Pseudo<(outs), (ins brtarget:$dst),
+                     "jnlh\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLH, PSW)]>;
+    def JHE : Pseudo<(outs), (ins brtarget:$dst),
+                     "jhe\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_HE, PSW)]>;
+    def JNL : Pseudo<(outs), (ins brtarget:$dst),
+                     "jnl\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NL, PSW)]>;
+    def JLE : Pseudo<(outs), (ins brtarget:$dst),
+                     "jle\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LE, PSW)]>;
+    def JNH : Pseudo<(outs), (ins brtarget:$dst),
+                     "jnh\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NH, PSW)]>;
+    def JNO : Pseudo<(outs), (ins brtarget:$dst),
+                     "jno\t$dst",
+                     [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NO, PSW)]>;
+  } // Uses = [PSW]
+} // isBranch = 1
+
+//===----------------------------------------------------------------------===//
+//  Call Instructions...
+//
+
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. Uses for argument
+  // registers are added manually.
+  let Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D,
+              F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L] in {
+    def CALLi     : Pseudo<(outs), (ins imm_pcrel:$dst, variable_ops),
+                           "brasl\t%r14, $dst", [(SystemZcall imm:$dst)]>;
+    def CALLr     : Pseudo<(outs), (ins ADDR64:$dst, variable_ops),
+                           "basr\t%r14, $dst", [(SystemZcall ADDR64:$dst)]>;
+  }
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
+
+let isReMaterializable = 1 in
+// FIXME: Provide imm12 variant
+// FIXME: Address should be halfword aligned...
+def LA64r  : RXI<0x47,
+                 (outs GR64:$dst), (ins laaddr:$src),
+                 "lay\t{$dst, $src}",
+                 [(set GR64:$dst, laaddr:$src)]>;
+def LA64rm : RXYI<0x71E3,
+                  (outs GR64:$dst), (ins i64imm:$src),
+                  "larl\t{$dst, $src}",
+                  [(set GR64:$dst,
+                        (SystemZpcrelwrapper tglobaladdr:$src))]>;
+
+let neverHasSideEffects = 1 in
+def NOP : Pseudo<(outs), (ins), "# no-op", []>;
+
+//===----------------------------------------------------------------------===//
+// Move Instructions
+
+let neverHasSideEffects = 1 in {
+def MOV32rr : RRI<0x18,
+                  (outs GR32:$dst), (ins GR32:$src),
+                  "lr\t{$dst, $src}",
+                  []>;
+def MOV64rr : RREI<0xB904,
+                   (outs GR64:$dst), (ins GR64:$src),
+                   "lgr\t{$dst, $src}",
+                   []>;
+def MOV128rr : Pseudo<(outs GR128:$dst), (ins GR128:$src),
+                     "# MOV128 PSEUDO!\n"
+                     "\tlgr\t${dst:subreg_odd}, ${src:subreg_odd}\n"
+                     "\tlgr\t${dst:subreg_even}, ${src:subreg_even}",
+                     []>;
+def MOV64rrP : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
+                     "# MOV64P PSEUDO!\n"
+                     "\tlr\t${dst:subreg_odd}, ${src:subreg_odd}\n"
+                     "\tlr\t${dst:subreg_even}, ${src:subreg_even}",
+                     []>;
+}
+
+def MOVSX64rr32 : RREI<0xB914,
+                       (outs GR64:$dst), (ins GR32:$src),
+                       "lgfr\t{$dst, $src}",
+                       [(set GR64:$dst, (sext GR32:$src))]>;
+def MOVZX64rr32 : RREI<0xB916,
+                       (outs GR64:$dst), (ins GR32:$src),
+                       "llgfr\t{$dst, $src}",
+                       [(set GR64:$dst, (zext GR32:$src))]>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV32ri16 : RII<0x8A7,
+                    (outs GR32:$dst), (ins s16imm:$src),
+                    "lhi\t{$dst, $src}",
+                    [(set GR32:$dst, immSExt16:$src)]>;
+def MOV64ri16 : RII<0x9A7,
+                    (outs GR64:$dst), (ins s16imm64:$src),
+                    "lghi\t{$dst, $src}",
+                    [(set GR64:$dst, immSExt16:$src)]>;
+
+def MOV64rill16 : RII<0xFA5,
+                      (outs GR64:$dst), (ins u16imm:$src),
+                      "llill\t{$dst, $src}",
+                      [(set GR64:$dst, i64ll16:$src)]>;
+def MOV64rilh16 : RII<0xEA5,
+                      (outs GR64:$dst), (ins u16imm:$src),
+                      "llilh\t{$dst, $src}",
+                      [(set GR64:$dst, i64lh16:$src)]>;
+def MOV64rihl16 : RII<0xDA5,
+                      (outs GR64:$dst), (ins u16imm:$src),
+                      "llihl\t{$dst, $src}",
+                      [(set GR64:$dst, i64hl16:$src)]>;
+def MOV64rihh16 : RII<0xCA5,
+                      (outs GR64:$dst), (ins u16imm:$src),
+                      "llihh\t{$dst, $src}",
+                      [(set GR64:$dst, i64hh16:$src)]>;
+
+def MOV64ri32 : RILI<0x1C0,
+                     (outs GR64:$dst), (ins s32imm64:$src),
+                     "lgfi\t{$dst, $src}",
+                     [(set GR64:$dst, immSExt32:$src)]>;
+def MOV64rilo32 : RILI<0xFC0,
+                       (outs GR64:$dst), (ins u32imm:$src),
+                       "llilf\t{$dst, $src}",
+                       [(set GR64:$dst, i64lo32:$src)]>;
+def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins u32imm:$src),
+                       "llihf\t{$dst, $src}",
+                       [(set GR64:$dst, i64hi32:$src)]>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def MOV32rm  : RXI<0x58,
+                   (outs GR32:$dst), (ins rriaddr12:$src),
+                   "l\t{$dst, $src}",
+                   [(set GR32:$dst, (load rriaddr12:$src))]>;
+def MOV32rmy : RXYI<0x58E3,
+                    (outs GR32:$dst), (ins rriaddr:$src),
+                    "ly\t{$dst, $src}",
+                    [(set GR32:$dst, (load rriaddr:$src))]>;
+def MOV64rm  : RXYI<0x04E3,
+                    (outs GR64:$dst), (ins rriaddr:$src),
+                    "lg\t{$dst, $src}",
+                    [(set GR64:$dst, (load rriaddr:$src))]>;
+def MOV64Prm : Pseudo<(outs GR64P:$dst), (ins rriaddr12:$src),
+                      "# MOV64P PSEUDO!\n"
+                      "\tl\t${dst:subreg_odd},  $src\n"
+                      "\tl\t${dst:subreg_even}, 4+$src",
+                      [(set GR64P:$dst, (load rriaddr12:$src))]>;
+def MOV64Prmy : Pseudo<(outs GR64P:$dst), (ins rriaddr:$src),
+                       "# MOV64P PSEUDO!\n"
+                       "\tly\t${dst:subreg_odd},  $src\n"
+                       "\tly\t${dst:subreg_even}, 4+$src",
+                       [(set GR64P:$dst, (load rriaddr:$src))]>;
+def MOV128rm : Pseudo<(outs GR128:$dst), (ins rriaddr:$src),
+                      "# MOV128 PSEUDO!\n"
+                      "\tlg\t${dst:subreg_odd},  $src\n"
+                      "\tlg\t${dst:subreg_even}, 8+$src",
+                      [(set GR128:$dst, (load rriaddr:$src))]>;
+}
+
+def MOV32mr  : RXI<0x50,
+                   (outs), (ins rriaddr12:$dst, GR32:$src),
+                   "st\t{$src, $dst}",
+                   [(store GR32:$src, rriaddr12:$dst)]>;
+def MOV32mry : RXYI<0x50E3,
+                    (outs), (ins rriaddr:$dst, GR32:$src),
+                    "sty\t{$src, $dst}",
+                    [(store GR32:$src, rriaddr:$dst)]>;
+def MOV64mr  : RXYI<0x24E3,
+                    (outs), (ins rriaddr:$dst, GR64:$src),
+                    "stg\t{$src, $dst}",
+                    [(store GR64:$src, rriaddr:$dst)]>;
+def MOV64Pmr : Pseudo<(outs), (ins rriaddr12:$dst, GR64P:$src),
+                      "# MOV64P PSEUDO!\n"
+                      "\tst\t${src:subreg_odd}, $dst\n"
+                      "\tst\t${src:subreg_even}, 4+$dst",
+                      [(store GR64P:$src, rriaddr12:$dst)]>;
+def MOV64Pmry : Pseudo<(outs), (ins rriaddr:$dst, GR64P:$src),
+                       "# MOV64P PSEUDO!\n"
+                       "\tsty\t${src:subreg_odd}, $dst\n"
+                       "\tsty\t${src:subreg_even}, 4+$dst",
+                       [(store GR64P:$src, rriaddr:$dst)]>;
+def MOV128mr : Pseudo<(outs), (ins rriaddr:$dst, GR128:$src),
+                      "# MOV128 PSEUDO!\n"
+                      "\tstg\t${src:subreg_odd}, $dst\n"
+                      "\tstg\t${src:subreg_even}, 8+$dst",
+                      [(store GR128:$src, rriaddr:$dst)]>;
+
+def MOV8mi    : SII<0x92,
+                    (outs), (ins riaddr12:$dst, i32i8imm:$src),
+                    "mvi\t{$dst, $src}",
+                    [(truncstorei8 (i32 i32immSExt8:$src), riaddr12:$dst)]>;
+def MOV8miy   : SIYI<0x52EB,
+                     (outs), (ins riaddr:$dst, i32i8imm:$src),
+                     "mviy\t{$dst, $src}",
+                     [(truncstorei8 (i32 i32immSExt8:$src), riaddr:$dst)]>;
+
+let AddedComplexity = 2 in {
+def MOV16mi   : SILI<0xE544,
+                     (outs), (ins riaddr12:$dst, s16imm:$src),
+                     "mvhhi\t{$dst, $src}",
+                     [(truncstorei16 (i32 i32immSExt16:$src), riaddr12:$dst)]>,
+                     Requires<[IsZ10]>;
+def MOV32mi16 : SILI<0xE54C,
+                     (outs), (ins riaddr12:$dst, s32imm:$src),
+                     "mvhi\t{$dst, $src}",
+                     [(store (i32 immSExt16:$src), riaddr12:$dst)]>,
+                     Requires<[IsZ10]>;
+def MOV64mi16 : SILI<0xE548,
+                     (outs), (ins riaddr12:$dst, s32imm64:$src),
+                     "mvghi\t{$dst, $src}",
+                     [(store (i64 immSExt16:$src), riaddr12:$dst)]>,
+                     Requires<[IsZ10]>;
+}
+
+// sexts
+def MOVSX32rr8  : RREI<0xB926,
+                       (outs GR32:$dst), (ins GR32:$src),
+                       "lbr\t{$dst, $src}",
+                       [(set GR32:$dst, (sext_inreg GR32:$src, i8))]>;
+def MOVSX64rr8  : RREI<0xB906,
+                       (outs GR64:$dst), (ins GR64:$src),
+                       "lgbr\t{$dst, $src}",
+                       [(set GR64:$dst, (sext_inreg GR64:$src, i8))]>;
+def MOVSX32rr16 : RREI<0xB927,
+                       (outs GR32:$dst), (ins GR32:$src),
+                       "lhr\t{$dst, $src}",
+                       [(set GR32:$dst, (sext_inreg GR32:$src, i16))]>;
+def MOVSX64rr16 : RREI<0xB907,
+                       (outs GR64:$dst), (ins GR64:$src),
+                       "lghr\t{$dst, $src}",
+                       [(set GR64:$dst, (sext_inreg GR64:$src, i16))]>;
+
+// extloads
+def MOVSX32rm8   : RXYI<0x76E3,
+                        (outs GR32:$dst), (ins rriaddr:$src),
+                        "lb\t{$dst, $src}",
+                        [(set GR32:$dst, (sextloadi32i8 rriaddr:$src))]>;
+def MOVSX32rm16  : RXI<0x48,
+                       (outs GR32:$dst), (ins rriaddr12:$src),
+                       "lh\t{$dst, $src}",
+                       [(set GR32:$dst, (sextloadi32i16 rriaddr12:$src))]>;
+def MOVSX32rm16y : RXYI<0x78E3,
+                        (outs GR32:$dst), (ins rriaddr:$src),
+                        "lhy\t{$dst, $src}",
+                        [(set GR32:$dst, (sextloadi32i16 rriaddr:$src))]>;
+def MOVSX64rm8   : RXYI<0x77E3,
+                        (outs GR64:$dst), (ins rriaddr:$src),
+                        "lgb\t{$dst, $src}",
+                        [(set GR64:$dst, (sextloadi64i8 rriaddr:$src))]>;
+def MOVSX64rm16  : RXYI<0x15E3,
+                        (outs GR64:$dst), (ins rriaddr:$src),
+                        "lgh\t{$dst, $src}",
+                        [(set GR64:$dst, (sextloadi64i16 rriaddr:$src))]>;
+def MOVSX64rm32  : RXYI<0x14E3,
+                        (outs GR64:$dst), (ins rriaddr:$src),
+                        "lgf\t{$dst, $src}",
+                        [(set GR64:$dst, (sextloadi64i32 rriaddr:$src))]>;
+
+def MOVZX32rm8  : RXYI<0x94E3,
+                       (outs GR32:$dst), (ins rriaddr:$src),
+                       "llc\t{$dst, $src}",
+                       [(set GR32:$dst, (zextloadi32i8 rriaddr:$src))]>;
+def MOVZX32rm16 : RXYI<0x95E3,
+                       (outs GR32:$dst), (ins rriaddr:$src),
+                       "llh\t{$dst, $src}",
+                       [(set GR32:$dst, (zextloadi32i16 rriaddr:$src))]>;
+def MOVZX64rm8  : RXYI<0x90E3,
+                       (outs GR64:$dst), (ins rriaddr:$src),
+                       "llgc\t{$dst, $src}",
+                       [(set GR64:$dst, (zextloadi64i8 rriaddr:$src))]>;
+def MOVZX64rm16 : RXYI<0x91E3,
+                       (outs GR64:$dst), (ins rriaddr:$src),
+                       "llgh\t{$dst, $src}",
+                       [(set GR64:$dst, (zextloadi64i16 rriaddr:$src))]>;
+def MOVZX64rm32 : RXYI<0x16E3,
+                       (outs GR64:$dst), (ins rriaddr:$src),
+                       "llgf\t{$dst, $src}",
+                       [(set GR64:$dst, (zextloadi64i32 rriaddr:$src))]>;
+
+// truncstores
+def MOV32m8r   : RXI<0x42,
+                     (outs), (ins rriaddr12:$dst, GR32:$src),
+                     "stc\t{$src, $dst}",
+                     [(truncstorei8 GR32:$src, rriaddr12:$dst)]>;
+
+def MOV32m8ry  : RXYI<0x72E3,
+                      (outs), (ins rriaddr:$dst, GR32:$src),
+                      "stcy\t{$src, $dst}",
+                      [(truncstorei8 GR32:$src, rriaddr:$dst)]>;
+
+def MOV32m16r  : RXI<0x40,
+                     (outs), (ins rriaddr12:$dst, GR32:$src),
+                     "sth\t{$src, $dst}",
+                     [(truncstorei16 GR32:$src, rriaddr12:$dst)]>;
+
+def MOV32m16ry : RXYI<0x70E3,
+                      (outs), (ins rriaddr:$dst, GR32:$src),
+                      "sthy\t{$src, $dst}",
+                      [(truncstorei16 GR32:$src, rriaddr:$dst)]>;
+
+def MOV64m8r   : RXI<0x42,
+                     (outs), (ins rriaddr12:$dst, GR64:$src),
+                     "stc\t{$src, $dst}",
+                     [(truncstorei8 GR64:$src, rriaddr12:$dst)]>;
+
+def MOV64m8ry  : RXYI<0x72E3,
+                      (outs), (ins rriaddr:$dst, GR64:$src),
+                      "stcy\t{$src, $dst}",
+                      [(truncstorei8 GR64:$src, rriaddr:$dst)]>;
+
+def MOV64m16r  : RXI<0x40,
+                     (outs), (ins rriaddr12:$dst, GR64:$src),
+                     "sth\t{$src, $dst}",
+                     [(truncstorei16 GR64:$src, rriaddr12:$dst)]>;
+
+def MOV64m16ry : RXYI<0x70E3,
+                      (outs), (ins rriaddr:$dst, GR64:$src),
+                      "sthy\t{$src, $dst}",
+                      [(truncstorei16 GR64:$src, rriaddr:$dst)]>;
+
+def MOV64m32r  : RXI<0x50,
+                     (outs), (ins rriaddr12:$dst, GR64:$src),
+                     "st\t{$src, $dst}",
+                     [(truncstorei32 GR64:$src, rriaddr12:$dst)]>;
+
+def MOV64m32ry : RXYI<0x50E3,
+                      (outs), (ins rriaddr:$dst, GR64:$src),
+                      "sty\t{$src, $dst}",
+                      [(truncstorei32 GR64:$src, rriaddr:$dst)]>;
+
+// multiple regs moves
+// FIXME: should we use multiple arg nodes?
+def MOV32mrm  : RSYI<0x90EB,
+                     (outs), (ins riaddr:$dst, GR32:$from, GR32:$to),
+                     "stmy\t{$from, $to, $dst}",
+                     []>;
+def MOV64mrm  : RSYI<0x24EB,
+                     (outs), (ins riaddr:$dst, GR64:$from, GR64:$to),
+                     "stmg\t{$from, $to, $dst}",
+                     []>;
+def MOV32rmm  : RSYI<0x90EB,
+                     (outs GR32:$from, GR32:$to), (ins riaddr:$dst),
+                     "lmy\t{$from, $to, $dst}",
+                     []>;
+def MOV64rmm  : RSYI<0x04EB,
+                     (outs GR64:$from, GR64:$to), (ins riaddr:$dst),
+                     "lmg\t{$from, $to, $dst}",
+                     []>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1,
+    Constraints = "$src = $dst" in {
+def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
+                           "lhi\t${dst:subreg_even}, 0",
+                           []>;
+def MOV128r0_even : Pseudo<(outs GR128:$dst), (ins GR128:$src),
+                           "lghi\t${dst:subreg_even}, 0",
+                           []>;
+}
+
+// Byte swaps
+def BSWAP32rr : RREI<0xB91F,
+                     (outs GR32:$dst), (ins GR32:$src),
+                     "lrvr\t{$dst, $src}",
+                     [(set GR32:$dst, (bswap GR32:$src))]>;
+def BSWAP64rr : RREI<0xB90F,
+                     (outs GR64:$dst), (ins GR64:$src),
+                     "lrvgr\t{$dst, $src}",
+                     [(set GR64:$dst, (bswap GR64:$src))]>;
+
+// FIXME: this is invalid pattern for big-endian
+//def BSWAP16rm : RXYI<0x1FE3, (outs GR32:$dst), (ins rriaddr:$src),
+//                     "lrvh\t{$dst, $src}",
+//                     [(set GR32:$dst, (bswap (extloadi32i16 rriaddr:$src)))]>;
+def BSWAP32rm : RXYI<0x1EE3, (outs GR32:$dst), (ins rriaddr:$src),
+                     "lrv\t{$dst, $src}",
+                     [(set GR32:$dst, (bswap (load rriaddr:$src)))]>;
+def BSWAP64rm : RXYI<0x0FE3, (outs GR64:$dst), (ins rriaddr:$src),
+                     "lrvg\t{$dst, $src}",
+                     [(set GR64:$dst, (bswap (load rriaddr:$src)))]>;
+
+//def BSWAP16mr : RXYI<0xE33F, (outs), (ins rriaddr:$dst, GR32:$src),
+//                     "strvh\t{$src, $dst}",
+//                     [(truncstorei16 (bswap GR32:$src), rriaddr:$dst)]>;
+def BSWAP32mr : RXYI<0xE33E, (outs), (ins rriaddr:$dst, GR32:$src),
+                     "strv\t{$src, $dst}",
+                     [(store (bswap GR32:$src), rriaddr:$dst)]>;
+def BSWAP64mr : RXYI<0xE32F, (outs), (ins rriaddr:$dst, GR64:$src),
+                     "strvg\t{$src, $dst}",
+                     [(store (bswap GR64:$src), rriaddr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+
+let Defs = [PSW] in {
+def NEG32rr : RRI<0x13,
+                  (outs GR32:$dst), (ins GR32:$src),
+                  "lcr\t{$dst, $src}",
+                  [(set GR32:$dst, (ineg GR32:$src)),
+                   (implicit PSW)]>;
+def NEG64rr : RREI<0xB903, (outs GR64:$dst), (ins GR64:$src),
+                   "lcgr\t{$dst, $src}",
+                   [(set GR64:$dst, (ineg GR64:$src)),
+                    (implicit PSW)]>;
+def NEG64rr32 : RREI<0xB913, (outs GR64:$dst), (ins GR32:$src),
+                     "lcgfr\t{$dst, $src}",
+                     [(set GR64:$dst, (ineg (sext GR32:$src))),
+                      (implicit PSW)]>;
+}
+
+let Constraints = "$src1 = $dst" in {
+
+let Defs = [PSW] in {
+
+let isCommutable = 1 in { // X = ADD Y, Z  == X = ADD Z, Y
+def ADD32rr : RRI<0x1A, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "ar\t{$dst, $src2}",
+                  [(set GR32:$dst, (add GR32:$src1, GR32:$src2)),
+                   (implicit PSW)]>;
+def ADD64rr : RREI<0xB908, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "agr\t{$dst, $src2}",
+                   [(set GR64:$dst, (add GR64:$src1, GR64:$src2)),
+                    (implicit PSW)]>;
+}
+
+def ADD32rm   : RXI<0x5A, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                    "a\t{$dst, $src2}",
+                    [(set GR32:$dst, (add GR32:$src1, (load rriaddr12:$src2))),
+                     (implicit PSW)]>;
+def ADD32rmy  : RXYI<0xE35A, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                     "ay\t{$dst, $src2}",
+                     [(set GR32:$dst, (add GR32:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+def ADD64rm   : RXYI<0xE308, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                     "ag\t{$dst, $src2}",
+                     [(set GR64:$dst, (add GR64:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+
+
+def ADD32ri16 : RII<0xA7A,
+                    (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2),
+                    "ahi\t{$dst, $src2}",
+                    [(set GR32:$dst, (add GR32:$src1, immSExt16:$src2)),
+                     (implicit PSW)]>;
+def ADD32ri   : RILI<0xC29,
+                     (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+                     "afi\t{$dst, $src2}",
+                     [(set GR32:$dst, (add GR32:$src1, imm:$src2)),
+                      (implicit PSW)]>;
+def ADD64ri16 : RILI<0xA7B,
+                     (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2),
+                     "aghi\t{$dst, $src2}",
+                     [(set GR64:$dst, (add GR64:$src1, immSExt16:$src2)),
+                      (implicit PSW)]>;
+def ADD64ri32 : RILI<0xC28,
+                     (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+                     "agfi\t{$dst, $src2}",
+                     [(set GR64:$dst, (add GR64:$src1, immSExt32:$src2)),
+                      (implicit PSW)]>;
+
+let isCommutable = 1 in { // X = ADC Y, Z  == X = ADC Z, Y
+def ADC32rr : RRI<0x1E, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "alr\t{$dst, $src2}",
+                  [(set GR32:$dst, (addc GR32:$src1, GR32:$src2))]>;
+def ADC64rr : RREI<0xB90A, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "algr\t{$dst, $src2}",
+                   [(set GR64:$dst, (addc GR64:$src1, GR64:$src2))]>;
+}
+
+def ADC32ri   : RILI<0xC2B,
+                     (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+                     "alfi\t{$dst, $src2}",
+                     [(set GR32:$dst, (addc GR32:$src1, imm:$src2))]>;
+def ADC64ri32 : RILI<0xC2A,
+                     (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+                     "algfi\t{$dst, $src2}",
+                     [(set GR64:$dst, (addc GR64:$src1, immSExt32:$src2))]>;
+
+let Uses = [PSW] in {
+def ADDE32rr : RREI<0xB998, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                    "alcr\t{$dst, $src2}",
+                    [(set GR32:$dst, (adde GR32:$src1, GR32:$src2)),
+                     (implicit PSW)]>;
+def ADDE64rr : RREI<0xB988, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                    "alcgr\t{$dst, $src2}",
+                    [(set GR64:$dst, (adde GR64:$src1, GR64:$src2)),
+                     (implicit PSW)]>;
+}
+
+let isCommutable = 1 in { // X = AND Y, Z  == X = AND Z, Y
+def AND32rr : RRI<0x14,
+                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "nr\t{$dst, $src2}",
+                  [(set GR32:$dst, (and GR32:$src1, GR32:$src2))]>;
+def AND64rr : RREI<0xB980,
+                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "ngr\t{$dst, $src2}",
+                   [(set GR64:$dst, (and GR64:$src1, GR64:$src2))]>;
+}
+
+def AND32rm   : RXI<0x54, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                    "n\t{$dst, $src2}",
+                    [(set GR32:$dst, (and GR32:$src1, (load rriaddr12:$src2))),
+                     (implicit PSW)]>;
+def AND32rmy  : RXYI<0xE354, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                     "ny\t{$dst, $src2}",
+                     [(set GR32:$dst, (and GR32:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+def AND64rm   : RXYI<0xE360, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                     "ng\t{$dst, $src2}",
+                     [(set GR64:$dst, (and GR64:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+
+def AND32rill16 : RII<0xA57,
+                      (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2),
+                      "nill\t{$dst, $src2}",
+                      [(set GR32:$dst, (and GR32:$src1, i32ll16c:$src2))]>;
+def AND64rill16 : RII<0xA57,
+                      (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+                      "nill\t{$dst, $src2}",
+                      [(set GR64:$dst, (and GR64:$src1, i64ll16c:$src2))]>;
+
+def AND32rilh16 : RII<0xA56,
+                      (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2),
+                      "nilh\t{$dst, $src2}",
+                      [(set GR32:$dst, (and GR32:$src1, i32lh16c:$src2))]>;
+def AND64rilh16 : RII<0xA56,
+                      (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+                      "nilh\t{$dst, $src2}",
+                      [(set GR64:$dst, (and GR64:$src1, i64lh16c:$src2))]>;
+
+def AND64rihl16 : RII<0xA55,
+                      (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+                      "nihl\t{$dst, $src2}",
+                      [(set GR64:$dst, (and GR64:$src1, i64hl16c:$src2))]>;
+def AND64rihh16 : RII<0xA54,
+                      (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+                      "nihh\t{$dst, $src2}",
+                      [(set GR64:$dst, (and GR64:$src1, i64hh16c:$src2))]>;
+
+def AND32ri     : RILI<0xC0B,
+                       (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
+                       "nilf\t{$dst, $src2}",
+                       [(set GR32:$dst, (and GR32:$src1, imm:$src2))]>;
+def AND64rilo32 : RILI<0xC0B,
+                       (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
+                       "nilf\t{$dst, $src2}",
+                       [(set GR64:$dst, (and GR64:$src1, i64lo32c:$src2))]>;
+def AND64rihi32 : RILI<0xC0A,
+                       (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
+                       "nihf\t{$dst, $src2}",
+                       [(set GR64:$dst, (and GR64:$src1, i64hi32c:$src2))]>;
+
+let isCommutable = 1 in { // X = OR Y, Z  == X = OR Z, Y
+def OR32rr : RRI<0x16,
+                 (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                 "or\t{$dst, $src2}",
+                 [(set GR32:$dst, (or GR32:$src1, GR32:$src2))]>;
+def OR64rr : RREI<0xB981,
+                  (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                  "ogr\t{$dst, $src2}",
+                  [(set GR64:$dst, (or GR64:$src1, GR64:$src2))]>;
+}
+
+def OR32rm   : RXI<0x56, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                   "o\t{$dst, $src2}",
+                   [(set GR32:$dst, (or GR32:$src1, (load rriaddr12:$src2))),
+                    (implicit PSW)]>;
+def OR32rmy  : RXYI<0xE356, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                    "oy\t{$dst, $src2}",
+                    [(set GR32:$dst, (or GR32:$src1, (load rriaddr:$src2))),
+                     (implicit PSW)]>;
+def OR64rm   : RXYI<0xE381, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                    "og\t{$dst, $src2}",
+                    [(set GR64:$dst, (or GR64:$src1, (load rriaddr:$src2))),
+                     (implicit PSW)]>;
+
+ // FIXME: Provide proper encoding!
+def OR32ri16  : RII<0xA5B,
+                    (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
+                    "oill\t{$dst, $src2}",
+                    [(set GR32:$dst, (or GR32:$src1, i32ll16:$src2))]>;
+def OR32ri16h : RII<0xA5A,
+                    (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
+                    "oilh\t{$dst, $src2}",
+                    [(set GR32:$dst, (or GR32:$src1, i32lh16:$src2))]>;
+def OR32ri : RILI<0xC0D,
+                  (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2),
+                  "oilf\t{$dst, $src2}",
+                  [(set GR32:$dst, (or GR32:$src1, imm:$src2))]>;
+
+def OR64rill16 : RII<0xA5B,
+                     (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+                     "oill\t{$dst, $src2}",
+                     [(set GR64:$dst, (or GR64:$src1, i64ll16:$src2))]>;
+def OR64rilh16 : RII<0xA5A,
+                     (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+                     "oilh\t{$dst, $src2}",
+                     [(set GR64:$dst, (or GR64:$src1, i64lh16:$src2))]>;
+def OR64rihl16 : RII<0xA59,
+                     (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+                     "oihl\t{$dst, $src2}",
+                     [(set GR64:$dst, (or GR64:$src1, i64hl16:$src2))]>;
+def OR64rihh16 : RII<0xA58,
+                     (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2),
+                     "oihh\t{$dst, $src2}",
+                     [(set GR64:$dst, (or GR64:$src1, i64hh16:$src2))]>;
+
+def OR64rilo32 : RILI<0xC0D,
+                      (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
+                      "oilf\t{$dst, $src2}",
+                      [(set GR64:$dst, (or GR64:$src1, i64lo32:$src2))]>;
+def OR64rihi32 : RILI<0xC0C,
+                      (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2),
+                      "oihf\t{$dst, $src2}",
+                      [(set GR64:$dst, (or GR64:$src1, i64hi32:$src2))]>;
+
+def SUB32rr : RRI<0x1B,
+                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "sr\t{$dst, $src2}",
+                  [(set GR32:$dst, (sub GR32:$src1, GR32:$src2))]>;
+def SUB64rr : RREI<0xB909,
+                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "sgr\t{$dst, $src2}",
+                   [(set GR64:$dst, (sub GR64:$src1, GR64:$src2))]>;
+
+def SUB32rm   : RXI<0x5B, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                    "s\t{$dst, $src2}",
+                    [(set GR32:$dst, (sub GR32:$src1, (load rriaddr12:$src2))),
+                     (implicit PSW)]>;
+def SUB32rmy  : RXYI<0xE35B, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                     "sy\t{$dst, $src2}",
+                     [(set GR32:$dst, (sub GR32:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+def SUB64rm   : RXYI<0xE309, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                     "sg\t{$dst, $src2}",
+                     [(set GR64:$dst, (sub GR64:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+ 
+def SBC32rr : RRI<0x1F,
+                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "slr\t{$dst, $src2}",
+                  [(set GR32:$dst, (subc GR32:$src1, GR32:$src2))]>;
+def SBC64rr : RREI<0xB90B,
+                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "slgr\t{$dst, $src2}",
+                   [(set GR64:$dst, (subc GR64:$src1, GR64:$src2))]>;
+
+def SBC32ri   : RILI<0xC25,
+                     (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+                     "sllfi\t{$dst, $src2}",
+                     [(set GR32:$dst, (subc GR32:$src1, imm:$src2))]>;
+def SBC64ri32 : RILI<0xC24,
+                     (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+                     "slgfi\t{$dst, $src2}",
+                     [(set GR64:$dst, (subc GR64:$src1, immSExt32:$src2))]>;
+
+let Uses = [PSW] in {
+def SUBE32rr : RREI<0xB999, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                    "slbr\t{$dst, $src2}",
+                    [(set GR32:$dst, (sube GR32:$src1, GR32:$src2)),
+                     (implicit PSW)]>;
+def SUBE64rr : RREI<0xB989, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                    "slbgr\t{$dst, $src2}",
+                    [(set GR64:$dst, (sube GR64:$src1, GR64:$src2)),
+                     (implicit PSW)]>;
+}
+
+let isCommutable = 1 in { // X = XOR Y, Z  == X = XOR Z, Y
+def XOR32rr : RRI<0x17,
+                  (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                  "xr\t{$dst, $src2}",
+                  [(set GR32:$dst, (xor GR32:$src1, GR32:$src2))]>;
+def XOR64rr : RREI<0xB982,
+                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "xgr\t{$dst, $src2}",
+                   [(set GR64:$dst, (xor GR64:$src1, GR64:$src2))]>;
+}
+
+def XOR32rm   : RXI<0x57,(outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                    "x\t{$dst, $src2}",
+                    [(set GR32:$dst, (xor GR32:$src1, (load rriaddr12:$src2))),
+                     (implicit PSW)]>;
+def XOR32rmy  : RXYI<0xE357, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                     "xy\t{$dst, $src2}",
+                     [(set GR32:$dst, (xor GR32:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+def XOR64rm   : RXYI<0xE382, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                     "xg\t{$dst, $src2}",
+                     [(set GR64:$dst, (xor GR64:$src1, (load rriaddr:$src2))),
+                      (implicit PSW)]>;
+
+def XOR32ri : RILI<0xC07,
+                   (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                   "xilf\t{$dst, $src2}",
+                   [(set GR32:$dst, (xor GR32:$src1, imm:$src2))]>;
+
+} // Defs = [PSW]
+
+let isCommutable = 1 in { // X = MUL Y, Z == X = MUL Z, Y
+def MUL32rr : RREI<0xB252,
+                   (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                   "msr\t{$dst, $src2}",
+                   [(set GR32:$dst, (mul GR32:$src1, GR32:$src2))]>;
+def MUL64rr : RREI<0xB90C,
+                   (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                   "msgr\t{$dst, $src2}",
+                   [(set GR64:$dst, (mul GR64:$src1, GR64:$src2))]>;
+}
+
+def MUL64rrP   : RRI<0x1C,
+                     (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
+                     "mr\t{$dst, $src2}",
+                     []>;
+def UMUL64rrP  : RREI<0xB996,
+                      (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
+                      "mlr\t{$dst, $src2}",
+                      []>;
+def UMUL128rrP : RREI<0xB986,
+                      (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
+                      "mlgr\t{$dst, $src2}",
+                      []>;
+
+def MUL32ri16   : RII<0xA7C,
+                      (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2),
+                      "mhi\t{$dst, $src2}",
+                      [(set GR32:$dst, (mul GR32:$src1, i32immSExt16:$src2))]>;
+def MUL64ri16   : RII<0xA7D,
+                      (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2),
+                      "mghi\t{$dst, $src2}",
+                      [(set GR64:$dst, (mul GR64:$src1, immSExt16:$src2))]>;
+
+let AddedComplexity = 2 in {
+def MUL32ri     : RILI<0xC21,
+                       (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2),
+                       "msfi\t{$dst, $src2}",
+                       [(set GR32:$dst, (mul GR32:$src1, imm:$src2))]>,
+                       Requires<[IsZ10]>;
+def MUL64ri32   : RILI<0xC20,
+                       (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2),
+                       "msgfi\t{$dst, $src2}",
+                       [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2))]>,
+                       Requires<[IsZ10]>;
+}
+
+def MUL32rm : RXI<0x71,
+                  (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2),
+                  "ms\t{$dst, $src2}",
+                  [(set GR32:$dst, (mul GR32:$src1, (load rriaddr12:$src2)))]>;
+def MUL32rmy : RXYI<0xE351,
+                    (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2),
+                    "msy\t{$dst, $src2}",
+                    [(set GR32:$dst, (mul GR32:$src1, (load rriaddr:$src2)))]>;
+def MUL64rm  : RXYI<0xE30C,
+                    (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2),
+                    "msg\t{$dst, $src2}",
+                    [(set GR64:$dst, (mul GR64:$src1, (load rriaddr:$src2)))]>;
+
+def MULSX64rr32 : RREI<0xB91C,
+                       (outs GR64:$dst), (ins GR64:$src1, GR32:$src2),
+                       "msgfr\t{$dst, $src2}",
+                       [(set GR64:$dst, (mul GR64:$src1, (sext GR32:$src2)))]>;
+
+def SDIVREM32r : RREI<0xB91D,
+                      (outs GR128:$dst), (ins GR128:$src1, GR32:$src2),
+                      "dsgfr\t{$dst, $src2}",
+                      []>;
+def SDIVREM64r : RREI<0xB90D,
+                      (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
+                      "dsgr\t{$dst, $src2}",
+                      []>;
+
+def UDIVREM32r : RREI<0xB997,
+                      (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2),
+                      "dlr\t{$dst, $src2}",
+                      []>;
+def UDIVREM64r : RREI<0xB987,
+                      (outs GR128:$dst), (ins GR128:$src1, GR64:$src2),
+                      "dlgr\t{$dst, $src2}",
+                      []>;
+let mayLoad = 1 in {
+def SDIVREM32m : RXYI<0xE31D,
+                      (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
+                      "dsgf\t{$dst, $src2}",
+                      []>;
+def SDIVREM64m : RXYI<0xE30D,
+                      (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
+                      "dsg\t{$dst, $src2}",
+                      []>;
+
+def UDIVREM32m : RXYI<0xE397, (outs GR64P:$dst), (ins GR64P:$src1, rriaddr:$src2),
+                      "dl\t{$dst, $src2}",
+                      []>;
+def UDIVREM64m : RXYI<0xE387, (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2),
+                      "dlg\t{$dst, $src2}",
+                      []>;
+} // mayLoad
+} // Constraints = "$src1 = $dst"
+
+//===----------------------------------------------------------------------===//
+// Shifts
+
+let Constraints = "$src = $dst" in
+def SRL32rri : RSI<0x88,
+                   (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+                   "srl\t{$src, $amt}",
+                   [(set GR32:$dst, (srl GR32:$src, riaddr32:$amt))]>;
+def SRL64rri : RSYI<0xEB0C,
+                    (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+                    "srlg\t{$dst, $src, $amt}",
+                    [(set GR64:$dst, (srl GR64:$src, riaddr:$amt))]>;
+
+let Constraints = "$src = $dst" in
+def SHL32rri : RSI<0x89,
+                   (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+                   "sll\t{$src, $amt}",
+                   [(set GR32:$dst, (shl GR32:$src, riaddr32:$amt))]>;
+def SHL64rri : RSYI<0xEB0D,
+                    (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+                    "sllg\t{$dst, $src, $amt}",
+                    [(set GR64:$dst, (shl GR64:$src, riaddr:$amt))]>;
+
+let Defs = [PSW] in {
+let Constraints = "$src = $dst" in
+def SRA32rri : RSI<0x8A,
+                   (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+                   "sra\t{$src, $amt}",
+                   [(set GR32:$dst, (sra GR32:$src, riaddr32:$amt)),
+                    (implicit PSW)]>;
+
+def SRA64rri : RSYI<0xEB0A,
+                    (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+                    "srag\t{$dst, $src, $amt}",
+                    [(set GR64:$dst, (sra GR64:$src, riaddr:$amt)),
+                     (implicit PSW)]>;
+} // Defs = [PSW]
+
+def ROTL32rri : RSYI<0xEB1D,
+                     (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
+                     "rll\t{$dst, $src, $amt}",
+                     [(set GR32:$dst, (rotl GR32:$src, riaddr32:$amt))]>;
+def ROTL64rri : RSYI<0xEB1C,
+                     (outs GR64:$dst), (ins GR64:$src, riaddr:$amt),
+                     "rllg\t{$dst, $src, $amt}",
+                     [(set GR64:$dst, (rotl GR64:$src, riaddr:$amt))]>;
+
+//===----------------------------------------------------------------------===//
+// Test instructions (like AND but do not produce any result)
+
+// Integer comparisons
+let Defs = [PSW] in {
+def CMP32rr : RRI<0x19,
+                  (outs), (ins GR32:$src1, GR32:$src2),
+                  "cr\t$src1, $src2",
+                  [(set PSW, (SystemZcmp GR32:$src1, GR32:$src2))]>; 
+def CMP64rr : RREI<0xB920,
+                   (outs), (ins GR64:$src1, GR64:$src2),
+                   "cgr\t$src1, $src2",
+                   [(set PSW, (SystemZcmp GR64:$src1, GR64:$src2))]>;
+
+def CMP32ri   : RILI<0xC2D,
+                     (outs), (ins GR32:$src1, s32imm:$src2),
+                     "cfi\t$src1, $src2",
+                     [(set PSW, (SystemZcmp GR32:$src1, imm:$src2))]>;
+def CMP64ri32 : RILI<0xC2C,
+                     (outs), (ins GR64:$src1, s32imm64:$src2),
+                     "cgfi\t$src1, $src2",
+                     [(set PSW, (SystemZcmp GR64:$src1, i64immSExt32:$src2))]>;
+
+def CMP32rm : RXI<0x59,
+                  (outs), (ins GR32:$src1, rriaddr12:$src2),
+                  "c\t$src1, $src2",
+                  [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr12:$src2)))]>;
+def CMP32rmy : RXYI<0xE359,
+                    (outs), (ins GR32:$src1, rriaddr:$src2),
+                    "cy\t$src1, $src2",
+                    [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr:$src2)))]>;
+def CMP64rm  : RXYI<0xE320,
+                    (outs), (ins GR64:$src1, rriaddr:$src2),
+                    "cg\t$src1, $src2",
+                    [(set PSW, (SystemZcmp GR64:$src1, (load rriaddr:$src2)))]>;
+
+def UCMP32rr : RRI<0x15,
+                   (outs), (ins GR32:$src1, GR32:$src2),
+                   "clr\t$src1, $src2",
+                   [(set PSW, (SystemZucmp GR32:$src1, GR32:$src2))]>;
+def UCMP64rr : RREI<0xB921,
+                    (outs), (ins GR64:$src1, GR64:$src2),
+                    "clgr\t$src1, $src2",
+                    [(set PSW, (SystemZucmp GR64:$src1, GR64:$src2))]>;
+
+def UCMP32ri   : RILI<0xC2F,
+                      (outs), (ins GR32:$src1, i32imm:$src2),
+                      "clfi\t$src1, $src2",
+                      [(set PSW, (SystemZucmp GR32:$src1, imm:$src2))]>;
+def UCMP64ri32 : RILI<0xC2E,
+                      (outs), (ins GR64:$src1, i64i32imm:$src2),
+                      "clgfi\t$src1, $src2",
+                      [(set PSW,(SystemZucmp GR64:$src1, i64immZExt32:$src2))]>;
+
+def UCMP32rm  : RXI<0x55,
+                    (outs), (ins GR32:$src1, rriaddr12:$src2),
+                    "cl\t$src1, $src2",
+                    [(set PSW, (SystemZucmp GR32:$src1,
+                                            (load rriaddr12:$src2)))]>;
+def UCMP32rmy : RXYI<0xE355,
+                     (outs), (ins GR32:$src1, rriaddr:$src2),
+                     "cly\t$src1, $src2",
+                     [(set PSW, (SystemZucmp GR32:$src1,
+                                             (load rriaddr:$src2)))]>;
+def UCMP64rm  : RXYI<0xE351,
+                     (outs), (ins GR64:$src1, rriaddr:$src2),
+                     "clg\t$src1, $src2",
+                     [(set PSW, (SystemZucmp GR64:$src1,
+                                             (load rriaddr:$src2)))]>;
+
+def CMPSX64rr32  : RREI<0xB930,
+                        (outs), (ins GR64:$src1, GR32:$src2),
+                        "cgfr\t$src1, $src2",
+                        [(set PSW, (SystemZucmp GR64:$src1,
+                                                (sext GR32:$src2)))]>;
+def UCMPZX64rr32 : RREI<0xB931,
+                        (outs), (ins GR64:$src1, GR32:$src2),
+                        "clgfr\t$src1, $src2",
+                        [(set PSW, (SystemZucmp GR64:$src1,
+                                                (zext GR32:$src2)))]>;
+
+def CMPSX64rm32   : RXYI<0xE330,
+                         (outs), (ins GR64:$src1, rriaddr:$src2),
+                         "cgf\t$src1, $src2",
+                         [(set PSW, (SystemZucmp GR64:$src1,
+                                             (sextloadi64i32 rriaddr:$src2)))]>;
+def UCMPZX64rm32  : RXYI<0xE331,
+                         (outs), (ins GR64:$src1, rriaddr:$src2),
+                         "clgf\t$src1, $src2",
+                         [(set PSW, (SystemZucmp GR64:$src1,
+                                             (zextloadi64i32 rriaddr:$src2)))]>;
+
+// FIXME: Add other crazy ucmp forms
+
+} // Defs = [PSW]
+
+//===----------------------------------------------------------------------===//
+// Other crazy stuff
+let Defs = [PSW] in {
+def FLOGR64 : RREI<0xB983,
+                   (outs GR128:$dst), (ins GR64:$src),
+                   "flogr\t{$dst, $src}",
+                   []>;
+} // Defs = [PSW]
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns.
+//===----------------------------------------------------------------------===//
+
+// ConstPools, JumpTables
+def : Pat<(SystemZpcrelwrapper tjumptable:$src), (LA64rm tjumptable:$src)>;
+def : Pat<(SystemZpcrelwrapper tconstpool:$src), (LA64rm tconstpool:$src)>;
+
+// anyext
+def : Pat<(i64 (anyext GR32:$src)),
+          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>;
+
+// calls
+def : Pat<(SystemZcall (i64 tglobaladdr:$dst)), (CALLi tglobaladdr:$dst)>;
+def : Pat<(SystemZcall (i64 texternalsym:$dst)), (CALLi texternalsym:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// Peepholes.
+//===----------------------------------------------------------------------===//
+
+// FIXME: use add/sub tricks with 32678/-32768
+
+// Arbitrary immediate support.
+def : Pat<(i32 imm:$src),
+          (EXTRACT_SUBREG (MOV64ri32 (GetI64FromI32 (i32 imm:$src))),
+             subreg_32bit)>;
+
+// Implement in terms of LLIHF/OILF.
+def : Pat<(i64 imm:$imm),
+          (OR64rilo32 (MOV64rihi32 (HI32 imm:$imm)), (LO32 imm:$imm))>;
+
+// trunc patterns
+def : Pat<(i32 (trunc GR64:$src)),
+          (EXTRACT_SUBREG GR64:$src, subreg_32bit)>;
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR64:$src, i32),
+          (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, subreg_32bit))>;
+
+// extload patterns
+def : Pat<(extloadi32i8  rriaddr:$src), (MOVZX32rm8  rriaddr:$src)>;
+def : Pat<(extloadi32i16 rriaddr:$src), (MOVZX32rm16 rriaddr:$src)>;
+def : Pat<(extloadi64i8  rriaddr:$src), (MOVZX64rm8  rriaddr:$src)>;
+def : Pat<(extloadi64i16 rriaddr:$src), (MOVZX64rm16 rriaddr:$src)>;
+def : Pat<(extloadi64i32 rriaddr:$src), (MOVZX64rm32 rriaddr:$src)>;
+
+// muls
+def : Pat<(mulhs GR32:$src1, GR32:$src2),
+          (EXTRACT_SUBREG (MUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+                                                   GR32:$src1, subreg_odd32),
+                                    GR32:$src2),
+                          subreg_32bit)>;
+
+def : Pat<(mulhu GR32:$src1, GR32:$src2),
+          (EXTRACT_SUBREG (UMUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
+                                                    GR32:$src1, subreg_odd32),
+                                     GR32:$src2),
+                          subreg_32bit)>;
+def : Pat<(mulhu GR64:$src1, GR64:$src2),
+          (EXTRACT_SUBREG (UMUL128rrP (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
+                                                     GR64:$src1, subreg_odd),
+                                      GR64:$src2),
+                          subreg_even)>;
+
+def : Pat<(ctlz GR64:$src),
+          (EXTRACT_SUBREG (FLOGR64 GR64:$src), subreg_even)>;
diff --git a/final/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/final/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
new file mode 100644
index 00000000000..2dc7e7bd29b
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
@@ -0,0 +1,30 @@
+//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the SystemZMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective = "\t.weak\t";
+  PCSymbol = ".";
+}
+
+const MCSection *SystemZMCAsmInfo::
+getNonexecutableStackSection(MCContext &Ctx) const{
+  return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+                           0, SectionKind::getMetadata());
+}
diff --git a/final/lib/Target/SystemZ/SystemZMCAsmInfo.h b/final/lib/Target/SystemZ/SystemZMCAsmInfo.h
new file mode 100644
index 00000000000..a6a27e2f4b6
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZMCAsmInfo.h
@@ -0,0 +1,30 @@
+//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the SystemZMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SystemZTARGETASMINFO_H
+#define SystemZTARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+  class StringRef;
+
+  struct SystemZMCAsmInfo : public MCAsmInfo {
+    explicit SystemZMCAsmInfo(const Target &T, StringRef TT);
+    virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
+  };
+  
+} // namespace llvm
+
+#endif
diff --git a/final/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/final/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
new file mode 100644
index 00000000000..fd6e330344b
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -0,0 +1,51 @@
+//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares SystemZ-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZMACHINEFUNCTIONINFO_H
+#define SYSTEMZMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// SystemZMachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private SystemZ target-specific information for each MachineFunction.
+class SystemZMachineFunctionInfo : public MachineFunctionInfo {
+  /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+  /// stack frame in bytes.
+  unsigned CalleeSavedFrameSize;
+
+  /// LowReg - Low register of range of callee-saved registers to store.
+  unsigned LowReg;
+
+  /// HighReg - High register of range of callee-saved registers to store.
+  unsigned HighReg;
+public:
+  SystemZMachineFunctionInfo() : CalleeSavedFrameSize(0) {}
+
+  explicit SystemZMachineFunctionInfo(MachineFunction &MF)
+    : CalleeSavedFrameSize(0) {}
+
+  unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+  void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+  unsigned getLowReg() const { return LowReg; }
+  void setLowReg(unsigned Reg) { LowReg = Reg; }
+
+  unsigned getHighReg() const { return HighReg; }
+  void setHighReg(unsigned Reg) { HighReg = Reg; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/SystemZ/SystemZOperands.td b/final/lib/Target/SystemZ/SystemZOperands.td
new file mode 100644
index 00000000000..8b835cc26e2
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZOperands.td
@@ -0,0 +1,325 @@
+//=====- SystemZOperands.td - SystemZ Operands defs ---------*- tblgen-*-=====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source 
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the various SystemZ instruction operands.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff.
+//===----------------------------------------------------------------------===//
+
+// SystemZ specific condition code. These correspond to CondCode in
+// SystemZ.h. They must be kept in synch.
+def SYSTEMZ_COND_O   : PatLeaf<(i8 0)>;
+def SYSTEMZ_COND_H   : PatLeaf<(i8 1)>;
+def SYSTEMZ_COND_NLE : PatLeaf<(i8 2)>;
+def SYSTEMZ_COND_L   : PatLeaf<(i8 3)>;
+def SYSTEMZ_COND_NHE : PatLeaf<(i8 4)>;
+def SYSTEMZ_COND_LH  : PatLeaf<(i8 5)>;
+def SYSTEMZ_COND_NE  : PatLeaf<(i8 6)>;
+def SYSTEMZ_COND_E   : PatLeaf<(i8 7)>;
+def SYSTEMZ_COND_NLH : PatLeaf<(i8 8)>;
+def SYSTEMZ_COND_HE  : PatLeaf<(i8 9)>;
+def SYSTEMZ_COND_NL  : PatLeaf<(i8 10)>;
+def SYSTEMZ_COND_LE  : PatLeaf<(i8 11)>;
+def SYSTEMZ_COND_NH  : PatLeaf<(i8 12)>;
+def SYSTEMZ_COND_NO  : PatLeaf<(i8 13)>;
+
+def LO8 : SDNodeXForm<imm, [{
+  // Transformation function: return low 8 bits.
+  return getI8Imm(N->getZExtValue() & 0x00000000000000FFULL);
+}]>;
+
+def LL16 : SDNodeXForm<imm, [{
+  // Transformation function: return low 16 bits.
+  return getI16Imm(N->getZExtValue() & 0x000000000000FFFFULL);
+}]>;
+
+def LH16 : SDNodeXForm<imm, [{
+  // Transformation function: return bits 16-31.
+  return getI16Imm((N->getZExtValue() & 0x00000000FFFF0000ULL) >> 16);
+}]>;
+
+def HL16 : SDNodeXForm<imm, [{
+  // Transformation function: return bits 32-47.
+  return getI16Imm((N->getZExtValue() & 0x0000FFFF00000000ULL) >> 32);
+}]>;
+
+def HH16 : SDNodeXForm<imm, [{
+  // Transformation function: return bits 48-63.
+  return getI16Imm((N->getZExtValue() & 0xFFFF000000000000ULL) >> 48);
+}]>;
+
+def LO32 : SDNodeXForm<imm, [{
+  // Transformation function: return low 32 bits.
+  return getI32Imm(N->getZExtValue() & 0x00000000FFFFFFFFULL);
+}]>;
+
+def HI32 : SDNodeXForm<imm, [{
+  // Transformation function: return bits 32-63.
+  return getI32Imm(N->getZExtValue() >> 32);
+}]>;
+
+def GetI64FromI32 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i64);
+}]>;
+
+def i32ll16 : PatLeaf<(i32 imm), [{
+  // i32ll16 predicate - true if the 32-bit immediate has only rightmost 16
+  // bits set.
+  return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue());
+}], LL16>;
+
+def i32lh16 : PatLeaf<(i32 imm), [{
+  // i32lh16 predicate - true if the 32-bit immediate has only bits 16-31 set.
+  return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue());
+}], LH16>;
+
+def i32ll16c : PatLeaf<(i32 imm), [{
+  // i32ll16c predicate - true if the 32-bit immediate has all bits 16-31 set.
+  return ((N->getZExtValue() | 0x00000000FFFF0000ULL) == N->getZExtValue());
+}], LL16>;
+
+def i32lh16c : PatLeaf<(i32 imm), [{
+  // i32lh16c predicate - true if the 32-bit immediate has all rightmost 16
+  //  bits set.
+  return ((N->getZExtValue() | 0x000000000000FFFFULL) == N->getZExtValue());
+}], LH16>;
+
+def i64ll16 : PatLeaf<(i64 imm), [{  
+  // i64ll16 predicate - true if the 64-bit immediate has only rightmost 16
+  // bits set.
+  return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue());
+}], LL16>;
+
+def i64lh16 : PatLeaf<(i64 imm), [{  
+  // i64lh16 predicate - true if the 64-bit immediate has only bits 16-31 set.
+  return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue());
+}], LH16>;
+
+def i64hl16 : PatLeaf<(i64 imm), [{  
+  // i64hl16 predicate - true if the 64-bit immediate has only bits 32-47 set.
+  return ((N->getZExtValue() & 0x0000FFFF00000000ULL) == N->getZExtValue());
+}], HL16>;
+
+def i64hh16 : PatLeaf<(i64 imm), [{  
+  // i64hh16 predicate - true if the 64-bit immediate has only bits 48-63 set.
+  return ((N->getZExtValue() & 0xFFFF000000000000ULL) == N->getZExtValue());
+}], HH16>;
+
+def i64ll16c : PatLeaf<(i64 imm), [{  
+  // i64ll16c predicate - true if the 64-bit immediate has only rightmost 16
+  // bits set.
+  return ((N->getZExtValue() | 0xFFFFFFFFFFFF0000ULL) == N->getZExtValue());
+}], LL16>;
+
+def i64lh16c : PatLeaf<(i64 imm), [{  
+  // i64lh16c predicate - true if the 64-bit immediate has only bits 16-31 set.
+  return ((N->getZExtValue() | 0xFFFFFFFF0000FFFFULL) == N->getZExtValue());
+}], LH16>;
+
+def i64hl16c : PatLeaf<(i64 imm), [{  
+  // i64hl16c predicate - true if the 64-bit immediate has only bits 32-47 set.
+  return ((N->getZExtValue() | 0xFFFF0000FFFFFFFFULL) == N->getZExtValue());
+}], HL16>;
+
+def i64hh16c : PatLeaf<(i64 imm), [{  
+  // i64hh16c predicate - true if the 64-bit immediate has only bits 48-63 set.
+  return ((N->getZExtValue() | 0x0000FFFFFFFFFFFFULL) == N->getZExtValue());
+}], HH16>;
+
+def immSExt16 : PatLeaf<(imm), [{
+  // immSExt16 predicate - true if the immediate fits in a 16-bit sign extended
+  // field.
+  if (N->getValueType(0) == MVT::i64) {
+    uint64_t val = N->getZExtValue();
+    return ((int64_t)val == (int16_t)val);
+  } else if (N->getValueType(0) == MVT::i32) {
+    uint32_t val = N->getZExtValue();
+    return ((int32_t)val == (int16_t)val);
+  }
+
+  return false;
+}], LL16>;
+
+def immSExt32 : PatLeaf<(i64 imm), [{
+  // immSExt32 predicate - true if the immediate fits in a 32-bit sign extended
+  // field.
+  uint64_t val = N->getZExtValue();
+  return ((int64_t)val == (int32_t)val);
+}], LO32>;
+
+def i64lo32 : PatLeaf<(i64 imm), [{
+  // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32
+  // bits set.
+  return ((N->getZExtValue() & 0x00000000FFFFFFFFULL) == N->getZExtValue());
+}], LO32>;
+
+def i64hi32 : PatLeaf<(i64 imm), [{
+  // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set.
+  return ((N->getZExtValue() & 0xFFFFFFFF00000000ULL) == N->getZExtValue());
+}], HI32>;
+
+def i64lo32c : PatLeaf<(i64 imm), [{
+  // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32
+  // bits set.
+  return ((N->getZExtValue() | 0xFFFFFFFF00000000ULL) == N->getZExtValue());
+}], LO32>;
+
+def i64hi32c : PatLeaf<(i64 imm), [{
+  // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set.
+  return ((N->getZExtValue() | 0x00000000FFFFFFFFULL) == N->getZExtValue());
+}], HI32>;
+
+def i32immSExt8  : PatLeaf<(i32 imm), [{
+  // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit
+  // sign extended field.
+  return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+}], LO8>;
+
+def i32immSExt16 : PatLeaf<(i32 imm), [{
+  // i32immSExt16 predicate - True if the 32-bit immediate fits in a 16-bit
+  // sign extended field.
+  return (int32_t)N->getZExtValue() == (int16_t)N->getZExtValue();
+}], LL16>;
+
+def i64immSExt32 : PatLeaf<(i64 imm), [{
+  // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+  // sign extended field.
+  return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
+}], LO32>;
+
+def i64immZExt32 : PatLeaf<(i64 imm), [{
+  // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+  // zero extended field.
+  return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
+}], LO32>;
+
+// extloads
+def extloadi32i8   : PatFrag<(ops node:$ptr), (i32 (extloadi8  node:$ptr))>;
+def extloadi32i16  : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+def extloadi64i8   : PatFrag<(ops node:$ptr), (i64 (extloadi8  node:$ptr))>;
+def extloadi64i16  : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32  : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
+
+def sextloadi32i8   : PatFrag<(ops node:$ptr), (i32 (sextloadi8  node:$ptr))>;
+def sextloadi32i16  : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+def sextloadi64i8   : PatFrag<(ops node:$ptr), (i64 (sextloadi8  node:$ptr))>;
+def sextloadi64i16  : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32  : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
+
+def zextloadi32i8   : PatFrag<(ops node:$ptr), (i32 (zextloadi8  node:$ptr))>;
+def zextloadi32i16  : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+def zextloadi64i8   : PatFrag<(ops node:$ptr), (i64 (zextloadi8  node:$ptr))>;
+def zextloadi64i16  : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32  : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
+
+// A couple of more descriptive operand definitions.
+// 32-bits but only 8 bits are significant.
+def i32i8imm  : Operand<i32>;
+// 32-bits but only 16 bits are significant.
+def i32i16imm : Operand<i32>;
+// 64-bits but only 32 bits are significant.
+def i64i32imm : Operand<i64>;
+// Branch targets have OtherVT type.
+def brtarget : Operand<OtherVT>;
+
+// Unsigned i12
+def u12imm : Operand<i32> {
+  let PrintMethod = "printU12ImmOperand";
+}
+def u12imm64 : Operand<i64> {
+  let PrintMethod = "printU12ImmOperand";
+}
+
+// Signed i16
+def s16imm : Operand<i32> {
+  let PrintMethod = "printS16ImmOperand";
+}
+def s16imm64 : Operand<i64> {
+  let PrintMethod = "printS16ImmOperand";
+}
+// Unsigned i16
+def u16imm : Operand<i32> {
+  let PrintMethod = "printU16ImmOperand";
+}
+def u16imm64 : Operand<i64> {
+  let PrintMethod = "printU16ImmOperand";
+}
+
+// Signed i20
+def s20imm : Operand<i32> {
+  let PrintMethod = "printS20ImmOperand";
+}
+def s20imm64 : Operand<i64> {
+  let PrintMethod = "printS20ImmOperand";
+}
+// Signed i32
+def s32imm : Operand<i32> {
+  let PrintMethod = "printS32ImmOperand";
+}
+def s32imm64 : Operand<i64> {
+  let PrintMethod = "printS32ImmOperand";
+}
+// Unsigned i32
+def u32imm : Operand<i32> {
+  let PrintMethod = "printU32ImmOperand";
+}
+def u32imm64 : Operand<i64> {
+  let PrintMethod = "printU32ImmOperand";
+}
+
+def imm_pcrel : Operand<i64> {
+  let PrintMethod = "printPCRelImmOperand";
+}
+
+//===----------------------------------------------------------------------===//
+// SystemZ Operand Definitions.
+//===----------------------------------------------------------------------===//
+
+// Address operands
+
+// riaddr := reg + imm
+def riaddr32 : Operand<i64>,
+               ComplexPattern<i64, 2, "SelectAddrRI12Only", []> {
+  let PrintMethod = "printRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, u12imm:$disp);
+}
+
+def riaddr12 : Operand<i64>,
+               ComplexPattern<i64, 2, "SelectAddrRI12", []> {
+  let PrintMethod = "printRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp);
+}
+
+def riaddr : Operand<i64>,
+             ComplexPattern<i64, 2, "SelectAddrRI", []> {
+  let PrintMethod = "printRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp);
+}
+
+//===----------------------------------------------------------------------===//
+
+// rriaddr := reg + reg + imm
+def rriaddr12 : Operand<i64>,
+                ComplexPattern<i64, 3, "SelectAddrRRI12", [], []> {
+  let PrintMethod = "printRRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp, ADDR64:$index);
+}
+def rriaddr : Operand<i64>,
+              ComplexPattern<i64, 3, "SelectAddrRRI20", [], []> {
+  let PrintMethod = "printRRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index);
+}
+def laaddr : Operand<i64>,
+             ComplexPattern<i64, 3, "SelectLAAddr", [add, sub, or, frameindex], []> {
+  let PrintMethod = "printRRIAddrOperand";
+  let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index);
+}
diff --git a/final/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/final/lib/Target/SystemZ/SystemZRegisterInfo.cpp
new file mode 100644
index 00000000000..28f94f4b6c6
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -0,0 +1,128 @@
+//===- SystemZRegisterInfo.cpp - SystemZ Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZMachineFunctionInfo.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+using namespace llvm;
+
+SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm,
+                                         const SystemZInstrInfo &tii)
+  : SystemZGenRegisterInfo(SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN),
+    TM(tm), TII(tii) {
+}
+
+const unsigned*
+SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  static const unsigned CalleeSavedRegs[] = {
+    SystemZ::R6D,  SystemZ::R7D,  SystemZ::R8D,  SystemZ::R9D,
+    SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D,
+    SystemZ::R14D, SystemZ::R15D,
+    SystemZ::F8L,  SystemZ::F9L,  SystemZ::F10L, SystemZ::F11L,
+    SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L,
+    0
+  };
+
+  return CalleeSavedRegs;
+}
+
+BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (TFI->hasFP(MF))
+    Reserved.set(SystemZ::R11D);
+  Reserved.set(SystemZ::R14D);
+  Reserved.set(SystemZ::R15D);
+  return Reserved;
+}
+
+void SystemZRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  MBB.erase(I);
+}
+
+void
+SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                         int SPAdj, RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Unxpected");
+
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineFunction &MF = *MI.getParent()->getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+
+  unsigned BasePtr = (TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D);
+
+  // This must be part of a rri or ri operand memory reference.  Replace the
+  // FrameIndex with base register with BasePtr.  Add an offset to the
+  // displacement field.
+  MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+  // Offset is a either 12-bit unsigned or 20-bit signed integer.
+  // FIXME: handle "too long" displacements.
+  int Offset =
+    TFI->getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm();
+
+  // Check whether displacement is too long to fit into 12 bit zext field.
+  MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset));
+
+  MI.getOperand(i+1).ChangeToImmediate(Offset);
+}
+
+unsigned SystemZRegisterInfo::getRARegister() const {
+  assert(0 && "What is the return address register");
+  return 0;
+}
+
+unsigned
+SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  assert(0 && "What is the frame register");
+  return 0;
+}
+
+unsigned SystemZRegisterInfo::getEHExceptionRegister() const {
+  assert(0 && "What is the exception register");
+  return 0;
+}
+
+unsigned SystemZRegisterInfo::getEHHandlerRegister() const {
+  assert(0 && "What is the exception handler register");
+  return 0;
+}
+
+int SystemZRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  assert(0 && "What is the dwarf register number");
+  return -1;
+}
+
+#include "SystemZGenRegisterInfo.inc"
diff --git a/final/lib/Target/SystemZ/SystemZRegisterInfo.h b/final/lib/Target/SystemZ/SystemZRegisterInfo.h
new file mode 100644
index 00000000000..b45079889a2
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -0,0 +1,57 @@
+//===-- SystemZRegisterInfo.h - SystemZ Register Information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SystemZ implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SystemZREGISTERINFO_H
+#define SystemZREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "SystemZGenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class SystemZSubtarget;
+class SystemZInstrInfo;
+class Type;
+
+struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
+  SystemZTargetMachine &TM;
+  const SystemZInstrInfo &TII;
+
+  SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii);
+
+  /// Code Generation virtual methods...
+  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/SystemZ/SystemZRegisterInfo.td b/final/lib/Target/SystemZ/SystemZRegisterInfo.td
new file mode 100644
index 00000000000..0028c85b4a9
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -0,0 +1,478 @@
+//===- SystemZRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+class SystemZReg<string n> : Register<n> {
+  let Namespace = "SystemZ";
+}
+
+class SystemZRegWithSubregs<string n, list<Register> subregs>
+  : RegisterWithSubRegs<n, subregs> {
+  let Namespace = "SystemZ";
+}
+
+// We identify all our registers with a 4-bit ID, for consistency's sake.
+
+// GPR32 - Lower 32 bits of one of the 16 64-bit general-purpose registers
+class GPR32<bits<4> num, string n> : SystemZReg<n> {
+  field bits<4> Num = num;
+}
+
+// GPR64 - One of the 16 64-bit general-purpose registers
+class GPR64<bits<4> num, string n, list<Register> subregs,
+            list<Register> aliases = []>
+ : SystemZRegWithSubregs<n, subregs> {
+  field bits<4> Num = num;
+  let Aliases = aliases;
+}
+
+// GPR128 - 8 even-odd register pairs
+class GPR128<bits<4> num, string n, list<Register> subregs,
+             list<Register> aliases = []>
+ : SystemZRegWithSubregs<n, subregs> {
+  field bits<4> Num = num;
+  let Aliases = aliases;
+}
+
+// FPRS - Lower 32 bits of one of the 16 64-bit floating-point registers
+class FPRS<bits<4> num, string n> : SystemZReg<n> {
+  field bits<4> Num = num;
+}
+
+// FPRL - One of the 16 64-bit floating-point registers
+class FPRL<bits<4> num, string n, list<Register> subregs>
+ : SystemZRegWithSubregs<n, subregs> {
+  field bits<4> Num = num;
+}
+
+let Namespace = "SystemZ" in {
+def subreg_32bit  : SubRegIndex;
+def subreg_odd32  : SubRegIndex;
+def subreg_even   : SubRegIndex;
+def subreg_odd    : SubRegIndex;
+}
+
+// General-purpose registers
+def R0W  : GPR32< 0,  "r0">, DwarfRegNum<[0]>;
+def R1W  : GPR32< 1,  "r1">, DwarfRegNum<[1]>;
+def R2W  : GPR32< 2,  "r2">, DwarfRegNum<[2]>;
+def R3W  : GPR32< 3,  "r3">, DwarfRegNum<[3]>;
+def R4W  : GPR32< 4,  "r4">, DwarfRegNum<[4]>;
+def R5W  : GPR32< 5,  "r5">, DwarfRegNum<[5]>;
+def R6W  : GPR32< 6,  "r6">, DwarfRegNum<[6]>;
+def R7W  : GPR32< 7,  "r7">, DwarfRegNum<[7]>;
+def R8W  : GPR32< 8,  "r8">, DwarfRegNum<[8]>;
+def R9W  : GPR32< 9,  "r9">, DwarfRegNum<[9]>;
+def R10W : GPR32<10, "r10">, DwarfRegNum<[10]>;
+def R11W : GPR32<11, "r11">, DwarfRegNum<[11]>;
+def R12W : GPR32<12, "r12">, DwarfRegNum<[12]>;
+def R13W : GPR32<13, "r13">, DwarfRegNum<[13]>;
+def R14W : GPR32<14, "r14">, DwarfRegNum<[14]>;
+def R15W : GPR32<15, "r15">, DwarfRegNum<[15]>;
+
+let SubRegIndices = [subreg_32bit] in {
+def R0D  : GPR64< 0,  "r0", [R0W]>,  DwarfRegNum<[0]>;
+def R1D  : GPR64< 1,  "r1", [R1W]>,  DwarfRegNum<[1]>;
+def R2D  : GPR64< 2,  "r2", [R2W]>,  DwarfRegNum<[2]>;
+def R3D  : GPR64< 3,  "r3", [R3W]>,  DwarfRegNum<[3]>;
+def R4D  : GPR64< 4,  "r4", [R4W]>,  DwarfRegNum<[4]>;
+def R5D  : GPR64< 5,  "r5", [R5W]>,  DwarfRegNum<[5]>;
+def R6D  : GPR64< 6,  "r6", [R6W]>,  DwarfRegNum<[6]>;
+def R7D  : GPR64< 7,  "r7", [R7W]>,  DwarfRegNum<[7]>;
+def R8D  : GPR64< 8,  "r8", [R8W]>,  DwarfRegNum<[8]>;
+def R9D  : GPR64< 9,  "r9", [R9W]>,  DwarfRegNum<[9]>;
+def R10D : GPR64<10, "r10", [R10W]>, DwarfRegNum<[10]>;
+def R11D : GPR64<11, "r11", [R11W]>, DwarfRegNum<[11]>;
+def R12D : GPR64<12, "r12", [R12W]>, DwarfRegNum<[12]>;
+def R13D : GPR64<13, "r13", [R13W]>, DwarfRegNum<[13]>;
+def R14D : GPR64<14, "r14", [R14W]>, DwarfRegNum<[14]>;
+def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>;
+}
+
+// Register pairs
+let SubRegIndices = [subreg_32bit, subreg_odd32] in {
+def R0P  : GPR64< 0,  "r0", [R0W,  R1W],  [R0D,  R1D]>,  DwarfRegNum<[0]>;
+def R2P  : GPR64< 2,  "r2", [R2W,  R3W],  [R2D,  R3D]>,  DwarfRegNum<[2]>;
+def R4P  : GPR64< 4,  "r4", [R4W,  R5W],  [R4D,  R5D]>,  DwarfRegNum<[4]>;
+def R6P  : GPR64< 6,  "r6", [R6W,  R7W],  [R6D,  R7D]>,  DwarfRegNum<[6]>;
+def R8P  : GPR64< 8,  "r8", [R8W,  R9W],  [R8D,  R9D]>,  DwarfRegNum<[8]>;
+def R10P : GPR64<10, "r10", [R10W, R11W], [R10D, R11D]>, DwarfRegNum<[10]>;
+def R12P : GPR64<12, "r12", [R12W, R13W], [R12D, R13D]>, DwarfRegNum<[12]>;
+def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>, DwarfRegNum<[14]>;
+}
+
+let SubRegIndices = [subreg_even, subreg_odd],
+ CompositeIndices = [(subreg_odd32  subreg_odd,  subreg_32bit)] in {
+def R0Q  : GPR128< 0,  "r0", [R0D,  R1D],  [R0P]>,  DwarfRegNum<[0]>;
+def R2Q  : GPR128< 2,  "r2", [R2D,  R3D],  [R2P]>,  DwarfRegNum<[2]>;
+def R4Q  : GPR128< 4,  "r4", [R4D,  R5D],  [R4P]>,  DwarfRegNum<[4]>;
+def R6Q  : GPR128< 6,  "r6", [R6D,  R7D],  [R6P]>,  DwarfRegNum<[6]>;
+def R8Q  : GPR128< 8,  "r8", [R8D,  R9D],  [R8P]>,  DwarfRegNum<[8]>;
+def R10Q : GPR128<10, "r10", [R10D, R11D], [R10P]>, DwarfRegNum<[10]>;
+def R12Q : GPR128<12, "r12", [R12D, R13D], [R12P]>, DwarfRegNum<[12]>;
+def R14Q : GPR128<14, "r14", [R14D, R15D], [R14P]>, DwarfRegNum<[14]>;
+}
+
+// Floating-point registers
+def F0S  : FPRS< 0,  "f0">, DwarfRegNum<[16]>;
+def F1S  : FPRS< 1,  "f1">, DwarfRegNum<[17]>;
+def F2S  : FPRS< 2,  "f2">, DwarfRegNum<[18]>;
+def F3S  : FPRS< 3,  "f3">, DwarfRegNum<[19]>;
+def F4S  : FPRS< 4,  "f4">, DwarfRegNum<[20]>;
+def F5S  : FPRS< 5,  "f5">, DwarfRegNum<[21]>;
+def F6S  : FPRS< 6,  "f6">, DwarfRegNum<[22]>;
+def F7S  : FPRS< 7,  "f7">, DwarfRegNum<[23]>;
+def F8S  : FPRS< 8,  "f8">, DwarfRegNum<[24]>;
+def F9S  : FPRS< 9,  "f9">, DwarfRegNum<[25]>;
+def F10S : FPRS<10, "f10">, DwarfRegNum<[26]>;
+def F11S : FPRS<11, "f11">, DwarfRegNum<[27]>;
+def F12S : FPRS<12, "f12">, DwarfRegNum<[28]>;
+def F13S : FPRS<13, "f13">, DwarfRegNum<[29]>;
+def F14S : FPRS<14, "f14">, DwarfRegNum<[30]>;
+def F15S : FPRS<15, "f15">, DwarfRegNum<[31]>;
+
+let SubRegIndices = [subreg_32bit] in {
+def F0L  : FPRL< 0,  "f0", [F0S]>,  DwarfRegNum<[16]>;
+def F1L  : FPRL< 1,  "f1", [F1S]>,  DwarfRegNum<[17]>;
+def F2L  : FPRL< 2,  "f2", [F2S]>,  DwarfRegNum<[18]>;
+def F3L  : FPRL< 3,  "f3", [F3S]>,  DwarfRegNum<[19]>;
+def F4L  : FPRL< 4,  "f4", [F4S]>,  DwarfRegNum<[20]>;
+def F5L  : FPRL< 5,  "f5", [F5S]>,  DwarfRegNum<[21]>;
+def F6L  : FPRL< 6,  "f6", [F6S]>,  DwarfRegNum<[22]>;
+def F7L  : FPRL< 7,  "f7", [F7S]>,  DwarfRegNum<[23]>;
+def F8L  : FPRL< 8,  "f8", [F8S]>,  DwarfRegNum<[24]>;
+def F9L  : FPRL< 9,  "f9", [F9S]>,  DwarfRegNum<[25]>;
+def F10L : FPRL<10, "f10", [F10S]>, DwarfRegNum<[26]>;
+def F11L : FPRL<11, "f11", [F11S]>, DwarfRegNum<[27]>;
+def F12L : FPRL<12, "f12", [F12S]>, DwarfRegNum<[28]>;
+def F13L : FPRL<13, "f13", [F13S]>, DwarfRegNum<[29]>;
+def F14L : FPRL<14, "f14", [F14S]>, DwarfRegNum<[30]>;
+def F15L : FPRL<15, "f15", [F15S]>, DwarfRegNum<[31]>;
+}
+
+// Status register
+def PSW : SystemZReg<"psw">;
+
+/// Register classes
+def GR32 : RegisterClass<"SystemZ", [i32], 32,
+   // Volatile registers
+  [R0W, R1W, R2W, R3W, R4W, R5W, R6W, R7W, R8W, R9W, R10W, R12W, R13W,
+   // Frame pointer, sometimes allocable
+   R11W,
+   // Volatile, but not allocable
+   R14W, R15W]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned SystemZ_REG32[] = {
+      SystemZ::R1W,  SystemZ::R2W,  SystemZ::R3W,  SystemZ::R4W,
+      SystemZ::R5W,  SystemZ::R0W,  SystemZ::R12W, SystemZ::R11W,
+      SystemZ::R10W, SystemZ::R9W,  SystemZ::R8W,  SystemZ::R7W,
+      SystemZ::R6W,  SystemZ::R14W, SystemZ::R13W
+    };
+    static const unsigned SystemZ_REG32_nofp[] = {
+      SystemZ::R1W,  SystemZ::R2W,  SystemZ::R3W,  SystemZ::R4W,
+      SystemZ::R5W,  SystemZ::R0W,  SystemZ::R12W, /* No R11W */
+      SystemZ::R10W, SystemZ::R9W,  SystemZ::R8W,  SystemZ::R7W,
+      SystemZ::R6W,  SystemZ::R14W, SystemZ::R13W
+    };
+    GR32Class::iterator
+    GR32Class::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
+        return SystemZ_REG32_nofp;
+      else
+        return SystemZ_REG32;
+    }
+    GR32Class::iterator
+    GR32Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
+        return SystemZ_REG32_nofp + (sizeof(SystemZ_REG32_nofp) / sizeof(unsigned));
+      else
+        return SystemZ_REG32 + (sizeof(SystemZ_REG32) / sizeof(unsigned));
+    }
+  }];
+}
+
+/// Registers used to generate address. Everything except R0.
+def ADDR32 : RegisterClass<"SystemZ", [i32], 32,
+   // Volatile registers
+  [R1W, R2W, R3W, R4W, R5W, R6W, R7W, R8W, R9W, R10W, R12W, R13W,
+   // Frame pointer, sometimes allocable
+   R11W,
+   // Volatile, but not allocable
+   R14W, R15W]>
+{
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned SystemZ_ADDR32[] = {
+      SystemZ::R1W,  SystemZ::R2W,  SystemZ::R3W,  SystemZ::R4W,
+      SystemZ::R5W,  /* No R0W */   SystemZ::R12W, SystemZ::R11W,
+      SystemZ::R10W, SystemZ::R9W,  SystemZ::R8W,  SystemZ::R7W,
+      SystemZ::R6W,  SystemZ::R14W, SystemZ::R13W
+    };
+    static const unsigned SystemZ_ADDR32_nofp[] = {
+      SystemZ::R1W,  SystemZ::R2W,  SystemZ::R3W,  SystemZ::R4W,
+      SystemZ::R5W,  /* No R0W */   SystemZ::R12W, /* No R11W */
+      SystemZ::R10W, SystemZ::R9W,  SystemZ::R8W,  SystemZ::R7W,
+      SystemZ::R6W,  SystemZ::R14W, SystemZ::R13W
+    };
+    ADDR32Class::iterator
+    ADDR32Class::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
+        return SystemZ_ADDR32_nofp;
+      else
+        return SystemZ_ADDR32;
+    }
+    ADDR32Class::iterator
+    ADDR32Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
+        return SystemZ_ADDR32_nofp + (sizeof(SystemZ_ADDR32_nofp) / sizeof(unsigned));
+      else
+        return SystemZ_ADDR32 + (sizeof(SystemZ_ADDR32) / sizeof(unsigned));
+    }
+  }];
+}
+
+def GR64 : RegisterClass<"SystemZ", [i64], 64,
+   // Volatile registers
+  [R0D, R1D, R2D, R3D, R4D, R5D, R6D, R7D, R8D, R9D, R10D, R12D, R13D,
+   // Frame pointer, sometimes allocable
+   R11D,
+   // Volatile, but not allocable
+   R14D, R15D]>
+{
+  let SubRegClasses = [(GR32 subreg_32bit)];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned SystemZ_REG64[] = {
+      SystemZ::R1D,  SystemZ::R2D,  SystemZ::R3D,  SystemZ::R4D,
+      SystemZ::R5D,  SystemZ::R0D,  SystemZ::R12D, SystemZ::R11D,
+      SystemZ::R10D, SystemZ::R9D,  SystemZ::R8D,  SystemZ::R7D,
+      SystemZ::R6D,  SystemZ::R14D, SystemZ::R13D
+    };
+    static const unsigned SystemZ_REG64_nofp[] = {
+      SystemZ::R1D,  SystemZ::R2D,  SystemZ::R3D,  SystemZ::R4D,
+      SystemZ::R5D,  SystemZ::R0D,  SystemZ::R12D, /* No R11D */
+      SystemZ::R10D, SystemZ::R9D,  SystemZ::R8D,  SystemZ::R7D,
+      SystemZ::R6D,  SystemZ::R14D, SystemZ::R13D
+    };
+    GR64Class::iterator
+    GR64Class::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
+        return SystemZ_REG64_nofp;
+      else
+        return SystemZ_REG64;
+    }
+    GR64Class::iterator
+    GR64Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
+        return SystemZ_REG64_nofp + (sizeof(SystemZ_REG64_nofp) / sizeof(unsigned));
+      else
+        return SystemZ_REG64 + (sizeof(SystemZ_REG64) / sizeof(unsigned));
+    }
+  }];
+}
+
+def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
+   // Volatile registers
+  [R1D, R2D, R3D, R4D, R5D, R6D, R7D, R8D, R9D, R10D, R12D, R13D,
+   // Frame pointer, sometimes allocable
+   R11D,
+   // Volatile, but not allocable
+   R14D, R15D]>
+{
+  let SubRegClasses = [(ADDR32 subreg_32bit)];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned SystemZ_ADDR64[] = {
+      SystemZ::R1D,  SystemZ::R2D,  SystemZ::R3D,  SystemZ::R4D,
+      SystemZ::R5D,  /* No R0D */   SystemZ::R12D, SystemZ::R11D,
+      SystemZ::R10D, SystemZ::R9D,  SystemZ::R8D,  SystemZ::R7D,
+      SystemZ::R6D,  SystemZ::R14D, SystemZ::R13D
+    };
+    static const unsigned SystemZ_ADDR64_nofp[] = {
+      SystemZ::R1D,  SystemZ::R2D,  SystemZ::R3D,  SystemZ::R4D,
+      SystemZ::R5D,  /* No R0D */   SystemZ::R12D, /* No R11D */
+      SystemZ::R10D, SystemZ::R9D,  SystemZ::R8D,  SystemZ::R7D,
+      SystemZ::R6D,  SystemZ::R14D, SystemZ::R13D
+    };
+    ADDR64Class::iterator
+    ADDR64Class::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
+        return SystemZ_ADDR64_nofp;
+      else
+        return SystemZ_ADDR64;
+    }
+    ADDR64Class::iterator
+    ADDR64Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
+        return SystemZ_ADDR64_nofp + (sizeof(SystemZ_ADDR64_nofp) / sizeof(unsigned));
+      else
+        return SystemZ_ADDR64 + (sizeof(SystemZ_ADDR64) / sizeof(unsigned));
+    }
+  }];
+}
+
+// Even-odd register pairs
+def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
+  [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]>
+{
+  let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32)];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned SystemZ_REG64P[] = {
+      SystemZ::R0P,  SystemZ::R2P,  SystemZ::R4P, SystemZ::R10P,
+      SystemZ::R8P,  SystemZ::R6P };
+    static const unsigned SystemZ_REG64P_nofp[] = {
+      SystemZ::R0P,  SystemZ::R2P,  SystemZ::R4P, /* NO R10P */
+      SystemZ::R8P,  SystemZ::R6P };
+    GR64PClass::iterator
+    GR64PClass::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
+        return SystemZ_REG64P_nofp;
+      else
+        return SystemZ_REG64P;
+    }
+    GR64PClass::iterator
+    GR64PClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
+        return SystemZ_REG64P_nofp + (sizeof(SystemZ_REG64P_nofp) / sizeof(unsigned));
+      else
+        return SystemZ_REG64P + (sizeof(SystemZ_REG64P) / sizeof(unsigned));
+    }
+  }];
+}
+
+def GR128 : RegisterClass<"SystemZ", [v2i64], 128,
+  [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]>
+{
+  let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32),
+                         (GR64 subreg_even, subreg_odd)];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned SystemZ_REG128[] = {
+      SystemZ::R0Q,  SystemZ::R2Q,  SystemZ::R4Q,  SystemZ::R10Q,
+      SystemZ::R8Q,  SystemZ::R6Q };
+    static const unsigned SystemZ_REG128_nofp[] = {
+      SystemZ::R0Q,  SystemZ::R2Q,  SystemZ::R4Q, /* NO R10Q */
+      SystemZ::R8Q,  SystemZ::R6Q };
+    GR128Class::iterator
+    GR128Class::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
+        return SystemZ_REG128_nofp;
+      else
+        return SystemZ_REG128;
+    }
+    GR128Class::iterator
+    GR128Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
+        return SystemZ_REG128_nofp + (sizeof(SystemZ_REG128_nofp) / sizeof(unsigned));
+      else
+        return SystemZ_REG128 + (sizeof(SystemZ_REG128) / sizeof(unsigned));
+    }
+  }];
+}
+
+def FP32 : RegisterClass<"SystemZ", [f32], 32,
+ [F0S, F1S,  F2S,  F3S,  F4S,  F5S,  F6S,  F7S,
+  F8S, F9S, F10S, F11S, F12S, F13S, F14S, F15S]> {
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned SystemZ_REGFP32[] = {
+      SystemZ::F0S,  SystemZ::F2S,  SystemZ::F4S,  SystemZ::F6S,
+      SystemZ::F1S,  SystemZ::F3S,  SystemZ::F5S,  SystemZ::F7S,
+      SystemZ::F8S,  SystemZ::F9S,  SystemZ::F10S, SystemZ::F11S,
+      SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S };
+    FP32Class::iterator
+    FP32Class::allocation_order_begin(const MachineFunction &MF) const {
+      return SystemZ_REGFP32;
+    }
+    FP32Class::iterator
+    FP32Class::allocation_order_end(const MachineFunction &MF) const {
+      return SystemZ_REGFP32 + (sizeof(SystemZ_REGFP32) / sizeof(unsigned));
+    }
+  }];
+}
+
+def FP64 : RegisterClass<"SystemZ", [f64], 64,
+ [F0L, F1L,  F2L,  F3L,  F4L,  F5L,  F6L,  F7L, 
+  F8L, F9L, F10L, F11L, F12L, F13L, F14L, F15L]> {
+  let SubRegClasses = [(FP32 subreg_32bit)];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned SystemZ_REGFP64[] = {
+      SystemZ::F0L,  SystemZ::F2L,  SystemZ::F4L,  SystemZ::F6L,
+      SystemZ::F1L,  SystemZ::F3L,  SystemZ::F5L,  SystemZ::F7L,
+      SystemZ::F8L,  SystemZ::F9L,  SystemZ::F10L, SystemZ::F11L,
+      SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L };
+    FP64Class::iterator
+    FP64Class::allocation_order_begin(const MachineFunction &MF) const {
+      return SystemZ_REGFP64;
+    }
+    FP64Class::iterator
+    FP64Class::allocation_order_end(const MachineFunction &MF) const {
+      return SystemZ_REGFP64 + (sizeof(SystemZ_REGFP64) / sizeof(unsigned));
+    }
+  }];
+}
+
+// Status flags registers.
+def CCR : RegisterClass<"SystemZ", [i64], 64, [PSW]> {
+  let CopyCost = -1;  // Don't allow copying of status registers.
+}
diff --git a/final/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/final/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
new file mode 100644
index 00000000000..3eabcd24c59
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "systemz-selectiondag-info"
+#include "SystemZTargetMachine.h"
+using namespace llvm;
+
+SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
+}
diff --git a/final/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/final/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
new file mode 100644
index 00000000000..1450401d040
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SystemZ subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZSELECTIONDAGINFO_H
+#define SYSTEMZSELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class SystemZTargetMachine;
+
+class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM);
+  ~SystemZSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/SystemZ/SystemZSubtarget.cpp b/final/lib/Target/SystemZ/SystemZSubtarget.cpp
new file mode 100644
index 00000000000..a8b5e1f1867
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -0,0 +1,47 @@
+//===- SystemZSubtarget.cpp - SystemZ Subtarget Information -------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SystemZ specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZSubtarget.h"
+#include "SystemZ.h"
+#include "SystemZGenSubtarget.inc"
+#include "llvm/GlobalValue.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+SystemZSubtarget::SystemZSubtarget(const std::string &TT, 
+                                   const std::string &FS):
+  HasZ10Insts(false) {
+  std::string CPU = "z9";
+
+  // Parse features string.
+  ParseSubtargetFeatures(FS, CPU);
+}
+
+/// True if accessing the GV requires an extra load.
+bool SystemZSubtarget::GVRequiresExtraLoad(const GlobalValue* GV,
+                                           const TargetMachine& TM,
+                                           bool isDirectCall) const {
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    // Extra load is needed for all externally visible.
+    if (isDirectCall)
+      return false;
+
+    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+      return false;
+
+    return true;
+  }
+
+  return false;
+}
diff --git a/final/lib/Target/SystemZ/SystemZSubtarget.h b/final/lib/Target/SystemZ/SystemZSubtarget.h
new file mode 100644
index 00000000000..405d6e91b7e
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZSubtarget.h
@@ -0,0 +1,45 @@
+//==-- SystemZSubtarget.h - Define Subtarget for the SystemZ ---*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_SystemZ_SUBTARGET_H
+#define LLVM_TARGET_SystemZ_SUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+
+#include <string>
+
+namespace llvm {
+class GlobalValue;
+class TargetMachine;
+
+class SystemZSubtarget : public TargetSubtarget {
+  bool HasZ10Insts;
+public:
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  SystemZSubtarget(const std::string &TT, const std::string &FS);
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  std::string ParseSubtargetFeatures(const std::string &FS,
+                                     const std::string &CPU);
+
+  bool isZ10() const { return HasZ10Insts; }
+
+  bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM,
+                           bool isDirectCall) const;
+};
+} // End llvm namespace
+
+#endif  // LLVM_TARGET_SystemZ_SUBTARGET_H
diff --git a/final/lib/Target/SystemZ/SystemZTargetMachine.cpp b/final/lib/Target/SystemZ/SystemZTargetMachine.cpp
new file mode 100644
index 00000000000..16038994299
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -0,0 +1,44 @@
+//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCAsmInfo.h"
+#include "SystemZTargetMachine.h"
+#include "SystemZ.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeSystemZTarget() {
+  // Register the target.
+  RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
+  RegisterAsmInfo<SystemZMCAsmInfo> Y(TheSystemZTarget);
+}
+
+/// SystemZTargetMachine ctor - Create an ILP64 architecture model
+///
+SystemZTargetMachine::SystemZTargetMachine(const Target &T,
+                                           const std::string &TT,
+                                           const std::string &FS)
+  : LLVMTargetMachine(T, TT),
+    Subtarget(TT, FS),
+    DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
+               "-f64:64:64-f128:128:128-a0:16:16-n32:64"),
+    InstrInfo(*this), TLInfo(*this), TSInfo(*this),
+    FrameLowering(Subtarget) {
+
+  if (getRelocationModel() == Reloc::Default)
+    setRelocationModel(Reloc::Static);
+}
+
+bool SystemZTargetMachine::addInstSelector(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
+  // Install an instruction selector.
+  PM.add(createSystemZISelDag(*this, OptLevel));
+  return false;
+}
diff --git a/final/lib/Target/SystemZ/SystemZTargetMachine.h b/final/lib/Target/SystemZ/SystemZTargetMachine.h
new file mode 100644
index 00000000000..524f83d1322
--- /dev/null
+++ b/final/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -0,0 +1,67 @@
+//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the SystemZ specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H
+#define LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H
+
+#include "SystemZInstrInfo.h"
+#include "SystemZISelLowering.h"
+#include "SystemZFrameLowering.h"
+#include "SystemZSelectionDAGInfo.h"
+#include "SystemZRegisterInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// SystemZTargetMachine
+///
+class SystemZTargetMachine : public LLVMTargetMachine {
+  SystemZSubtarget        Subtarget;
+  const TargetData        DataLayout;       // Calculates type size & alignment
+  SystemZInstrInfo        InstrInfo;
+  SystemZTargetLowering   TLInfo;
+  SystemZSelectionDAGInfo TSInfo;
+  SystemZFrameLowering    FrameLowering;
+public:
+  SystemZTargetMachine(const Target &T, const std::string &TT,
+                       const std::string &FS);
+
+  virtual const TargetFrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual const SystemZInstrInfo *getInstrInfo() const  { return &InstrInfo; }
+  virtual const TargetData *getTargetData() const     { return &DataLayout;}
+  virtual const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; }
+
+  virtual const SystemZRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+
+  virtual const SystemZTargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+
+  virtual const SystemZSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+}; // SystemZTargetMachine.
+
+} // end namespace llvm
+
+#endif // LLVM_TARGET_SystemZ_TARGETMACHINE_H
diff --git a/final/lib/Target/SystemZ/TargetInfo/CMakeLists.txt b/final/lib/Target/SystemZ/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..743d8d322d0
--- /dev/null
+++ b/final/lib/Target/SystemZ/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMSystemZInfo
+  SystemZTargetInfo.cpp
+  )
+
+add_dependencies(LLVMSystemZInfo SystemZCodeGenTable_gen)
diff --git a/final/lib/Target/SystemZ/TargetInfo/Makefile b/final/lib/Target/SystemZ/TargetInfo/Makefile
new file mode 100644
index 00000000000..0be80eb4e6a
--- /dev/null
+++ b/final/lib/Target/SystemZ/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/SystemZ/TargetInfo/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSystemZInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/final/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
new file mode 100644
index 00000000000..8272b118820
--- /dev/null
+++ b/final/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- SystemZTargetInfo.cpp - SystemZ Target Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheSystemZTarget;
+
+extern "C" void LLVMInitializeSystemZTargetInfo() {
+  RegisterTarget<Triple::systemz> X(TheSystemZTarget, "systemz", "SystemZ");
+}
diff --git a/final/lib/Target/Target.cpp b/final/lib/Target/Target.cpp
new file mode 100644
index 00000000000..0919fe42dc0
--- /dev/null
+++ b/final/lib/Target/Target.cpp
@@ -0,0 +1,106 @@
+//===-- Target.cpp --------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common infrastructure (including C bindings) for 
+// libLLVMTarget.a, which implements target information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Target.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/LLVMContext.h"
+#include <cstring>
+
+using namespace llvm;
+
+void llvm::initializeTarget(PassRegistry &Registry) {
+  initializeTargetDataPass(Registry);
+  initializeTargetLibraryInfoPass(Registry);
+}
+
+void LLVMInitializeTarget(LLVMPassRegistryRef R) {
+  initializeTarget(*unwrap(R));
+}
+
+LLVMTargetDataRef LLVMCreateTargetData(const char *StringRep) {
+  return wrap(new TargetData(StringRep));
+}
+
+void LLVMAddTargetData(LLVMTargetDataRef TD, LLVMPassManagerRef PM) {
+  unwrap(PM)->add(new TargetData(*unwrap(TD)));
+}
+
+char *LLVMCopyStringRepOfTargetData(LLVMTargetDataRef TD) {
+  std::string StringRep = unwrap(TD)->getStringRepresentation();
+  return strdup(StringRep.c_str());
+}
+
+LLVMByteOrdering LLVMByteOrder(LLVMTargetDataRef TD) {
+  return unwrap(TD)->isLittleEndian() ? LLVMLittleEndian : LLVMBigEndian;
+}
+
+unsigned LLVMPointerSize(LLVMTargetDataRef TD) {
+  return unwrap(TD)->getPointerSize();
+}
+
+LLVMTypeRef LLVMIntPtrType(LLVMTargetDataRef TD) {
+  return wrap(unwrap(TD)->getIntPtrType(getGlobalContext()));
+}
+
+unsigned long long LLVMSizeOfTypeInBits(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return unwrap(TD)->getTypeSizeInBits(unwrap(Ty));
+}
+
+unsigned long long LLVMStoreSizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return unwrap(TD)->getTypeStoreSize(unwrap(Ty));
+}
+
+unsigned long long LLVMABISizeOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return unwrap(TD)->getTypeAllocSize(unwrap(Ty));
+}
+
+unsigned LLVMABIAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return unwrap(TD)->getABITypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMCallFrameAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return unwrap(TD)->getCallFrameTypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMPreferredAlignmentOfType(LLVMTargetDataRef TD, LLVMTypeRef Ty) {
+  return unwrap(TD)->getPrefTypeAlignment(unwrap(Ty));
+}
+
+unsigned LLVMPreferredAlignmentOfGlobal(LLVMTargetDataRef TD,
+                                        LLVMValueRef GlobalVar) {
+  return unwrap(TD)->getPreferredAlignment(unwrap<GlobalVariable>(GlobalVar));
+}
+
+unsigned LLVMElementAtOffset(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
+                             unsigned long long Offset) {
+  const StructType *STy = unwrap<StructType>(StructTy);
+  return unwrap(TD)->getStructLayout(STy)->getElementContainingOffset(Offset);
+}
+
+unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef TD, LLVMTypeRef StructTy,
+                                       unsigned Element) {
+  const StructType *STy = unwrap<StructType>(StructTy);
+  return unwrap(TD)->getStructLayout(STy)->getElementOffset(Element);
+}
+
+void LLVMInvalidateStructLayout(LLVMTargetDataRef TD, LLVMTypeRef StructTy) {
+  unwrap(TD)->InvalidateStructLayoutInfo(unwrap<StructType>(StructTy));
+}
+
+void LLVMDisposeTargetData(LLVMTargetDataRef TD) {
+  delete unwrap(TD);
+}
diff --git a/final/lib/Target/TargetAsmInfo.cpp b/final/lib/Target/TargetAsmInfo.cpp
new file mode 100644
index 00000000000..6fa5420120f
--- /dev/null
+++ b/final/lib/Target/TargetAsmInfo.cpp
@@ -0,0 +1,27 @@
+//===-- llvm/Target/TargetAsmInfo.cpp - Target Assembly Info --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+using namespace llvm;
+
+TargetAsmInfo::TargetAsmInfo(const TargetMachine &TM) {
+  TLOF = &TM.getTargetLowering()->getObjFileLowering();
+  const TargetData &TD = *TM.getTargetData();
+  IsLittleEndian = TD.isLittleEndian();
+  PointerSize = TD.getPointerSize();
+  const TargetFrameLowering &TFI = *TM.getFrameLowering();
+  StackDir = TFI.getStackGrowthDirection();
+  TRI = TM.getRegisterInfo();
+  TFI.getInitialFrameState(InitialFrameState);
+}
diff --git a/final/lib/Target/TargetAsmLexer.cpp b/final/lib/Target/TargetAsmLexer.cpp
new file mode 100644
index 00000000000..d4893ff2521
--- /dev/null
+++ b/final/lib/Target/TargetAsmLexer.cpp
@@ -0,0 +1,14 @@
+//===-- llvm/Target/TargetAsmLexer.cpp - Target Assembly Lexer ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmLexer.h"
+using namespace llvm;
+
+TargetAsmLexer::TargetAsmLexer(const Target &T) : TheTarget(T), Lexer(NULL) {}
+TargetAsmLexer::~TargetAsmLexer() {}
diff --git a/final/lib/Target/TargetData.cpp b/final/lib/Target/TargetData.cpp
new file mode 100644
index 00000000000..c628df04e71
--- /dev/null
+++ b/final/lib/Target/TargetData.cpp
@@ -0,0 +1,639 @@
+//===-- TargetData.cpp - Data size & alignment routines --------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines target properties related to datatype size/offset/alignment
+// information.
+//
+// This structure should be created once, filled in if the defaults are not
+// correct and then passed around by const&.  None of the members functions
+// require modification to the object.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetData.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/DenseMap.h"
+#include <algorithm>
+#include <cstdlib>
+using namespace llvm;
+
+// Handle the Pass registration stuff necessary to use TargetData's.
+
+// Register the default SparcV9 implementation...
+INITIALIZE_PASS(TargetData, "targetdata", "Target Data Layout", false, true)
+char TargetData::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// Support for StructLayout
+//===----------------------------------------------------------------------===//
+
+StructLayout::StructLayout(const StructType *ST, const TargetData &TD) {
+  StructAlignment = 0;
+  StructSize = 0;
+  NumElements = ST->getNumElements();
+
+  // Loop over each of the elements, placing them in memory.
+  for (unsigned i = 0, e = NumElements; i != e; ++i) {
+    const Type *Ty = ST->getElementType(i);
+    unsigned TyAlign = ST->isPacked() ? 1 : TD.getABITypeAlignment(Ty);
+
+    // Add padding if necessary to align the data element properly.
+    if ((StructSize & (TyAlign-1)) != 0)
+      StructSize = TargetData::RoundUpAlignment(StructSize, TyAlign);
+
+    // Keep track of maximum alignment constraint.
+    StructAlignment = std::max(TyAlign, StructAlignment);
+
+    MemberOffsets[i] = StructSize;
+    StructSize += TD.getTypeAllocSize(Ty); // Consume space for this data item
+  }
+
+  // Empty structures have alignment of 1 byte.
+  if (StructAlignment == 0) StructAlignment = 1;
+
+  // Add padding to the end of the struct so that it could be put in an array
+  // and all array elements would be aligned correctly.
+  if ((StructSize & (StructAlignment-1)) != 0)
+    StructSize = TargetData::RoundUpAlignment(StructSize, StructAlignment);
+}
+
+
+/// getElementContainingOffset - Given a valid offset into the structure,
+/// return the structure index that contains it.
+unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
+  const uint64_t *SI =
+    std::upper_bound(&MemberOffsets[0], &MemberOffsets[NumElements], Offset);
+  assert(SI != &MemberOffsets[0] && "Offset not in structure type!");
+  --SI;
+  assert(*SI <= Offset && "upper_bound didn't work");
+  assert((SI == &MemberOffsets[0] || *(SI-1) <= Offset) &&
+         (SI+1 == &MemberOffsets[NumElements] || *(SI+1) > Offset) &&
+         "Upper bound didn't work!");
+
+  // Multiple fields can have the same offset if any of them are zero sized.
+  // For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
+  // at the i32 element, because it is the last element at that offset.  This is
+  // the right one to return, because anything after it will have a higher
+  // offset, implying that this element is non-empty.
+  return SI-&MemberOffsets[0];
+}
+
+//===----------------------------------------------------------------------===//
+// TargetAlignElem, TargetAlign support
+//===----------------------------------------------------------------------===//
+
+TargetAlignElem
+TargetAlignElem::get(AlignTypeEnum align_type, unsigned abi_align,
+                     unsigned pref_align, uint32_t bit_width) {
+  assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+  TargetAlignElem retval;
+  retval.AlignType = align_type;
+  retval.ABIAlign = abi_align;
+  retval.PrefAlign = pref_align;
+  retval.TypeBitWidth = bit_width;
+  return retval;
+}
+
+bool
+TargetAlignElem::operator==(const TargetAlignElem &rhs) const {
+  return (AlignType == rhs.AlignType
+          && ABIAlign == rhs.ABIAlign
+          && PrefAlign == rhs.PrefAlign
+          && TypeBitWidth == rhs.TypeBitWidth);
+}
+
+const TargetAlignElem TargetData::InvalidAlignmentElem =
+                TargetAlignElem::get((AlignTypeEnum) -1, 0, 0, 0);
+
+//===----------------------------------------------------------------------===//
+//                       TargetData Class Implementation
+//===----------------------------------------------------------------------===//
+
+/// getInt - Get an integer ignoring errors.
+static unsigned getInt(StringRef R) {
+  unsigned Result = 0;
+  R.getAsInteger(10, Result);
+  return Result;
+}
+
+void TargetData::init(StringRef Desc) {
+  initializeTargetDataPass(*PassRegistry::getPassRegistry());
+  
+  LayoutMap = 0;
+  LittleEndian = false;
+  PointerMemSize = 8;
+  PointerABIAlign = 8;
+  PointerPrefAlign = PointerABIAlign;
+
+  // Default alignments
+  setAlignment(INTEGER_ALIGN,   1,  1, 1);   // i1
+  setAlignment(INTEGER_ALIGN,   1,  1, 8);   // i8
+  setAlignment(INTEGER_ALIGN,   2,  2, 16);  // i16
+  setAlignment(INTEGER_ALIGN,   4,  4, 32);  // i32
+  setAlignment(INTEGER_ALIGN,   4,  8, 64);  // i64
+  setAlignment(FLOAT_ALIGN,     4,  4, 32);  // float
+  setAlignment(FLOAT_ALIGN,     8,  8, 64);  // double
+  setAlignment(VECTOR_ALIGN,    8,  8, 64);  // v2i32, v1i64, ...
+  setAlignment(VECTOR_ALIGN,   16, 16, 128); // v16i8, v8i16, v4i32, ...
+  setAlignment(AGGREGATE_ALIGN, 0,  8,  0);  // struct
+
+  while (!Desc.empty()) {
+    std::pair<StringRef, StringRef> Split = Desc.split('-');
+    StringRef Token = Split.first;
+    Desc = Split.second;
+
+    if (Token.empty())
+      continue;
+
+    Split = Token.split(':');
+    StringRef Specifier = Split.first;
+    Token = Split.second;
+
+    assert(!Specifier.empty() && "Can't be empty here");
+
+    switch (Specifier[0]) {
+    case 'E':
+      LittleEndian = false;
+      break;
+    case 'e':
+      LittleEndian = true;
+      break;
+    case 'p':
+      Split = Token.split(':');
+      PointerMemSize = getInt(Split.first) / 8;
+      Split = Split.second.split(':');
+      PointerABIAlign = getInt(Split.first) / 8;
+      Split = Split.second.split(':');
+      PointerPrefAlign = getInt(Split.first) / 8;
+      if (PointerPrefAlign == 0)
+        PointerPrefAlign = PointerABIAlign;
+      break;
+    case 'i':
+    case 'v':
+    case 'f':
+    case 'a':
+    case 's': {
+      AlignTypeEnum AlignType;
+      switch (Specifier[0]) {
+      default:
+      case 'i': AlignType = INTEGER_ALIGN; break;
+      case 'v': AlignType = VECTOR_ALIGN; break;
+      case 'f': AlignType = FLOAT_ALIGN; break;
+      case 'a': AlignType = AGGREGATE_ALIGN; break;
+      case 's': AlignType = STACK_ALIGN; break;
+      }
+      unsigned Size = getInt(Specifier.substr(1));
+      Split = Token.split(':');
+      unsigned ABIAlign = getInt(Split.first) / 8;
+
+      Split = Split.second.split(':');
+      unsigned PrefAlign = getInt(Split.first) / 8;
+      if (PrefAlign == 0)
+        PrefAlign = ABIAlign;
+      setAlignment(AlignType, ABIAlign, PrefAlign, Size);
+      break;
+    }
+    case 'n':  // Native integer types.
+      Specifier = Specifier.substr(1);
+      do {
+        if (unsigned Width = getInt(Specifier))
+          LegalIntWidths.push_back(Width);
+        Split = Token.split(':');
+        Specifier = Split.first;
+        Token = Split.second;
+      } while (!Specifier.empty() || !Token.empty());
+      break;
+
+    default:
+      break;
+    }
+  }
+}
+
+/// Default ctor.
+///
+/// @note This has to exist, because this is a pass, but it should never be
+/// used.
+TargetData::TargetData() : ImmutablePass(ID) {
+  report_fatal_error("Bad TargetData ctor used.  "
+                    "Tool did not specify a TargetData to use?");
+}
+
+TargetData::TargetData(const Module *M)
+  : ImmutablePass(ID) {
+  init(M->getDataLayout());
+}
+
+void
+TargetData::setAlignment(AlignTypeEnum align_type, unsigned abi_align,
+                         unsigned pref_align, uint32_t bit_width) {
+  assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+  for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+    if (Alignments[i].AlignType == align_type &&
+        Alignments[i].TypeBitWidth == bit_width) {
+      // Update the abi, preferred alignments.
+      Alignments[i].ABIAlign = abi_align;
+      Alignments[i].PrefAlign = pref_align;
+      return;
+    }
+  }
+
+  Alignments.push_back(TargetAlignElem::get(align_type, abi_align,
+                                            pref_align, bit_width));
+}
+
+/// getAlignmentInfo - Return the alignment (either ABI if ABIInfo = true or
+/// preferred if ABIInfo = false) the target wants for the specified datatype.
+unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
+                                      uint32_t BitWidth, bool ABIInfo,
+                                      const Type *Ty) const {
+  // Check to see if we have an exact match and remember the best match we see.
+  int BestMatchIdx = -1;
+  int LargestInt = -1;
+  for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+    if (Alignments[i].AlignType == AlignType &&
+        Alignments[i].TypeBitWidth == BitWidth)
+      return ABIInfo ? Alignments[i].ABIAlign : Alignments[i].PrefAlign;
+
+    // The best match so far depends on what we're looking for.
+     if (AlignType == INTEGER_ALIGN &&
+         Alignments[i].AlignType == INTEGER_ALIGN) {
+      // The "best match" for integers is the smallest size that is larger than
+      // the BitWidth requested.
+      if (Alignments[i].TypeBitWidth > BitWidth && (BestMatchIdx == -1 ||
+           Alignments[i].TypeBitWidth < Alignments[BestMatchIdx].TypeBitWidth))
+        BestMatchIdx = i;
+      // However, if there isn't one that's larger, then we must use the
+      // largest one we have (see below)
+      if (LargestInt == -1 ||
+          Alignments[i].TypeBitWidth > Alignments[LargestInt].TypeBitWidth)
+        LargestInt = i;
+    }
+  }
+
+  // Okay, we didn't find an exact solution.  Fall back here depending on what
+  // is being looked for.
+  if (BestMatchIdx == -1) {
+    // If we didn't find an integer alignment, fall back on most conservative.
+    if (AlignType == INTEGER_ALIGN) {
+      BestMatchIdx = LargestInt;
+    } else {
+      assert(AlignType == VECTOR_ALIGN && "Unknown alignment type!");
+
+      // By default, use natural alignment for vector types. This is consistent
+      // with what clang and llvm-gcc do.
+      unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
+      Align *= cast<VectorType>(Ty)->getNumElements();
+      // If the alignment is not a power of 2, round up to the next power of 2.
+      // This happens for non-power-of-2 length vectors.
+      if (Align & (Align-1))
+        Align = llvm::NextPowerOf2(Align);
+      return Align;
+    }
+  }
+
+  // Since we got a "best match" index, just return it.
+  return ABIInfo ? Alignments[BestMatchIdx].ABIAlign
+                 : Alignments[BestMatchIdx].PrefAlign;
+}
+
+namespace {
+
+class StructLayoutMap : public AbstractTypeUser {
+  typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy;
+  LayoutInfoTy LayoutInfo;
+
+  void RemoveEntry(LayoutInfoTy::iterator I, bool WasAbstract) {
+    I->second->~StructLayout();
+    free(I->second);
+    if (WasAbstract)
+      I->first->removeAbstractTypeUser(this);
+    LayoutInfo.erase(I);
+  }
+
+
+  /// refineAbstractType - The callback method invoked when an abstract type is
+  /// resolved to another type.  An object must override this method to update
+  /// its internal state to reference NewType instead of OldType.
+  ///
+  virtual void refineAbstractType(const DerivedType *OldTy,
+                                  const Type *) {
+    LayoutInfoTy::iterator I = LayoutInfo.find(cast<const StructType>(OldTy));
+    assert(I != LayoutInfo.end() && "Using type but not in map?");
+    RemoveEntry(I, true);
+  }
+
+  /// typeBecameConcrete - The other case which AbstractTypeUsers must be aware
+  /// of is when a type makes the transition from being abstract (where it has
+  /// clients on its AbstractTypeUsers list) to concrete (where it does not).
+  /// This method notifies ATU's when this occurs for a type.
+  ///
+  virtual void typeBecameConcrete(const DerivedType *AbsTy) {
+    LayoutInfoTy::iterator I = LayoutInfo.find(cast<const StructType>(AbsTy));
+    assert(I != LayoutInfo.end() && "Using type but not in map?");
+    RemoveEntry(I, true);
+  }
+
+public:
+  virtual ~StructLayoutMap() {
+    // Remove any layouts.
+    for (LayoutInfoTy::iterator
+           I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I) {
+      const Type *Key = I->first;
+      StructLayout *Value = I->second;
+
+      if (Key->isAbstract())
+        Key->removeAbstractTypeUser(this);
+
+      Value->~StructLayout();
+      free(Value);
+    }
+  }
+
+  void InvalidateEntry(const StructType *Ty) {
+    LayoutInfoTy::iterator I = LayoutInfo.find(Ty);
+    if (I == LayoutInfo.end()) return;
+    RemoveEntry(I, Ty->isAbstract());
+  }
+
+  StructLayout *&operator[](const StructType *STy) {
+    return LayoutInfo[STy];
+  }
+
+  // for debugging...
+  virtual void dump() const {}
+};
+
+} // end anonymous namespace
+
+TargetData::~TargetData() {
+  delete static_cast<StructLayoutMap*>(LayoutMap);
+}
+
+const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
+  if (!LayoutMap)
+    LayoutMap = new StructLayoutMap();
+
+  StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
+  StructLayout *&SL = (*STM)[Ty];
+  if (SL) return SL;
+
+  // Otherwise, create the struct layout.  Because it is variable length, we
+  // malloc it, then use placement new.
+  int NumElts = Ty->getNumElements();
+  StructLayout *L =
+    (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t));
+
+  // Set SL before calling StructLayout's ctor.  The ctor could cause other
+  // entries to be added to TheMap, invalidating our reference.
+  SL = L;
+
+  new (L) StructLayout(Ty, *this);
+
+  if (Ty->isAbstract())
+    Ty->addAbstractTypeUser(STM);
+
+  return L;
+}
+
+/// InvalidateStructLayoutInfo - TargetData speculatively caches StructLayout
+/// objects.  If a TargetData object is alive when types are being refined and
+/// removed, this method must be called whenever a StructType is removed to
+/// avoid a dangling pointer in this cache.
+void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const {
+  if (!LayoutMap) return;  // No cache.
+
+  static_cast<StructLayoutMap*>(LayoutMap)->InvalidateEntry(Ty);
+}
+
+std::string TargetData::getStringRepresentation() const {
+  std::string Result;
+  raw_string_ostream OS(Result);
+
+  OS << (LittleEndian ? "e" : "E")
+     << "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8
+     << ':' << PointerPrefAlign*8;
+  for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+    const TargetAlignElem &AI = Alignments[i];
+    OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':'
+       << AI.ABIAlign*8 << ':' << AI.PrefAlign*8;
+  }
+
+  if (!LegalIntWidths.empty()) {
+    OS << "-n" << (unsigned)LegalIntWidths[0];
+
+    for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i)
+      OS << ':' << (unsigned)LegalIntWidths[i];
+  }
+  return OS.str();
+}
+
+
+uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const {
+  assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+  switch (Ty->getTypeID()) {
+  case Type::LabelTyID:
+  case Type::PointerTyID:
+    return getPointerSizeInBits();
+  case Type::ArrayTyID: {
+    const ArrayType *ATy = cast<ArrayType>(Ty);
+    return getTypeAllocSizeInBits(ATy->getElementType())*ATy->getNumElements();
+  }
+  case Type::StructTyID:
+    // Get the layout annotation... which is lazily created on demand.
+    return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
+  case Type::IntegerTyID:
+    return cast<IntegerType>(Ty)->getBitWidth();
+  case Type::VoidTyID:
+    return 8;
+  case Type::FloatTyID:
+    return 32;
+  case Type::DoubleTyID:
+  case Type::X86_MMXTyID:
+    return 64;
+  case Type::PPC_FP128TyID:
+  case Type::FP128TyID:
+    return 128;
+  // In memory objects this is always aligned to a higher boundary, but
+  // only 80 bits contain information.
+  case Type::X86_FP80TyID:
+    return 80;
+  case Type::VectorTyID:
+    return cast<VectorType>(Ty)->getBitWidth();
+  default:
+    llvm_unreachable("TargetData::getTypeSizeInBits(): Unsupported type");
+    break;
+  }
+  return 0;
+}
+
+/*!
+  \param abi_or_pref Flag that determines which alignment is returned. true
+  returns the ABI alignment, false returns the preferred alignment.
+  \param Ty The underlying type for which alignment is determined.
+
+  Get the ABI (\a abi_or_pref == true) or preferred alignment (\a abi_or_pref
+  == false) for the requested type \a Ty.
+ */
+unsigned TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const {
+  int AlignType = -1;
+
+  assert(Ty->isSized() && "Cannot getTypeInfo() on a type that is unsized!");
+  switch (Ty->getTypeID()) {
+  // Early escape for the non-numeric types.
+  case Type::LabelTyID:
+  case Type::PointerTyID:
+    return (abi_or_pref
+            ? getPointerABIAlignment()
+            : getPointerPrefAlignment());
+  case Type::ArrayTyID:
+    return getAlignment(cast<ArrayType>(Ty)->getElementType(), abi_or_pref);
+
+  case Type::StructTyID: {
+    // Packed structure types always have an ABI alignment of one.
+    if (cast<StructType>(Ty)->isPacked() && abi_or_pref)
+      return 1;
+
+    // Get the layout annotation... which is lazily created on demand.
+    const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
+    unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty);
+    return std::max(Align, Layout->getAlignment());
+  }
+  case Type::IntegerTyID:
+  case Type::VoidTyID:
+    AlignType = INTEGER_ALIGN;
+    break;
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+  // PPC_FP128TyID and FP128TyID have different data contents, but the
+  // same size and alignment, so they look the same here.
+  case Type::PPC_FP128TyID:
+  case Type::FP128TyID:
+  case Type::X86_FP80TyID:
+    AlignType = FLOAT_ALIGN;
+    break;
+  case Type::X86_MMXTyID:
+  case Type::VectorTyID:
+    AlignType = VECTOR_ALIGN;
+    break;
+  default:
+    llvm_unreachable("Bad type for getAlignment!!!");
+    break;
+  }
+
+  return getAlignmentInfo((AlignTypeEnum)AlignType, getTypeSizeInBits(Ty),
+                          abi_or_pref, Ty);
+}
+
+unsigned TargetData::getABITypeAlignment(const Type *Ty) const {
+  return getAlignment(Ty, true);
+}
+
+/// getABIIntegerTypeAlignment - Return the minimum ABI-required alignment for
+/// an integer type of the specified bitwidth.
+unsigned TargetData::getABIIntegerTypeAlignment(unsigned BitWidth) const {
+  return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0);
+}
+
+
+unsigned TargetData::getCallFrameTypeAlignment(const Type *Ty) const {
+  for (unsigned i = 0, e = Alignments.size(); i != e; ++i)
+    if (Alignments[i].AlignType == STACK_ALIGN)
+      return Alignments[i].ABIAlign;
+
+  return getABITypeAlignment(Ty);
+}
+
+unsigned TargetData::getPrefTypeAlignment(const Type *Ty) const {
+  return getAlignment(Ty, false);
+}
+
+unsigned TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
+  unsigned Align = getPrefTypeAlignment(Ty);
+  assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+  return Log2_32(Align);
+}
+
+/// getIntPtrType - Return an unsigned integer type that is the same size or
+/// greater to the host pointer size.
+const IntegerType *TargetData::getIntPtrType(LLVMContext &C) const {
+  return IntegerType::get(C, getPointerSizeInBits());
+}
+
+
+uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
+                                      unsigned NumIndices) const {
+  const Type *Ty = ptrTy;
+  assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()");
+  uint64_t Result = 0;
+
+  generic_gep_type_iterator<Value* const*>
+    TI = gep_type_begin(ptrTy, Indices, Indices+NumIndices);
+  for (unsigned CurIDX = 0; CurIDX != NumIndices; ++CurIDX, ++TI) {
+    if (const StructType *STy = dyn_cast<StructType>(*TI)) {
+      assert(Indices[CurIDX]->getType() ==
+             Type::getInt32Ty(ptrTy->getContext()) &&
+             "Illegal struct idx");
+      unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue();
+
+      // Get structure layout information...
+      const StructLayout *Layout = getStructLayout(STy);
+
+      // Add in the offset, as calculated by the structure layout info...
+      Result += Layout->getElementOffset(FieldNo);
+
+      // Update Ty to refer to current element
+      Ty = STy->getElementType(FieldNo);
+    } else {
+      // Update Ty to refer to current element
+      Ty = cast<SequentialType>(Ty)->getElementType();
+
+      // Get the array index and the size of each array element.
+      if (int64_t arrayIdx = cast<ConstantInt>(Indices[CurIDX])->getSExtValue())
+        Result += (uint64_t)arrayIdx * getTypeAllocSize(Ty);
+    }
+  }
+
+  return Result;
+}
+
+/// getPreferredAlignment - Return the preferred alignment of the specified
+/// global.  This includes an explicitly requested alignment (if the global
+/// has one).
+unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const {
+  const Type *ElemType = GV->getType()->getElementType();
+  unsigned Alignment = getPrefTypeAlignment(ElemType);
+  if (GV->getAlignment() > Alignment)
+    Alignment = GV->getAlignment();
+
+  if (GV->hasInitializer()) {
+    if (Alignment < 16) {
+      // If the global is not external, see if it is large.  If so, give it a
+      // larger alignment.
+      if (getTypeSizeInBits(ElemType) > 128)
+        Alignment = 16;    // 16-byte alignment.
+    }
+  }
+  return Alignment;
+}
+
+/// getPreferredAlignmentLog - Return the preferred alignment of the
+/// specified global, returned in log form.  This includes an explicitly
+/// requested alignment (if the global has one).
+unsigned TargetData::getPreferredAlignmentLog(const GlobalVariable *GV) const {
+  return Log2_32(getPreferredAlignment(GV));
+}
diff --git a/final/lib/Target/TargetELFWriterInfo.cpp b/final/lib/Target/TargetELFWriterInfo.cpp
new file mode 100644
index 00000000000..a661ee9c0c6
--- /dev/null
+++ b/final/lib/Target/TargetELFWriterInfo.cpp
@@ -0,0 +1,25 @@
+//===-- lib/Target/TargetELFWriterInfo.cpp - ELF Writer Info --0-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetELFWriterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+TargetELFWriterInfo::TargetELFWriterInfo(bool is64Bit_, bool isLittleEndian_) :
+  is64Bit(is64Bit_), isLittleEndian(isLittleEndian_) {
+}
+
+TargetELFWriterInfo::~TargetELFWriterInfo() {}
+
diff --git a/final/lib/Target/TargetFrameLowering.cpp b/final/lib/Target/TargetFrameLowering.cpp
new file mode 100644
index 00000000000..19fd581c7dd
--- /dev/null
+++ b/final/lib/Target/TargetFrameLowering.cpp
@@ -0,0 +1,53 @@
+//===----- TargetFrameLowering.cpp - Implement target frame interface ------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the layout of a stack frame on the target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#include <cstdlib>
+using namespace llvm;
+
+TargetFrameLowering::~TargetFrameLowering() {
+}
+
+/// getInitialFrameState - Returns a list of machine moves that are assumed
+/// on entry to a function.
+void
+TargetFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
+                                                                         const {
+  // Default is to do nothing.
+}
+
+/// getFrameIndexOffset - Returns the displacement from the frame register to
+/// the stack frame of the specified index. This is the default implementation
+/// which is overridden for some targets.
+int TargetFrameLowering::getFrameIndexOffset(const MachineFunction &MF,
+                                         int FI) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return MFI->getObjectOffset(FI) + MFI->getStackSize() -
+    getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+}
+
+int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
+                                             int FI, unsigned &FrameReg) const {
+  const TargetRegisterInfo *RI = MF.getTarget().getRegisterInfo();
+
+  // By default, assume all frame indices are referenced via whatever
+  // getFrameRegister() says. The target can override this if it's doing
+  // something different.
+  FrameReg = RI->getFrameRegister(MF);
+  return getFrameIndexOffset(MF, FI);
+}
diff --git a/final/lib/Target/TargetInstrInfo.cpp b/final/lib/Target/TargetInstrInfo.cpp
new file mode 100644
index 00000000000..97f3bf6e57a
--- /dev/null
+++ b/final/lib/Target/TargetInstrInfo.cpp
@@ -0,0 +1,178 @@
+//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cctype>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  TargetOperandInfo
+//===----------------------------------------------------------------------===//
+
+/// getRegClass - Get the register class for the operand, handling resolution
+/// of "symbolic" pointer register classes etc.  If this is not a register
+/// operand, this returns null.
+const TargetRegisterClass *
+TargetOperandInfo::getRegClass(const TargetRegisterInfo *TRI) const {
+  if (isLookupPtrRegClass())
+    return TRI->getPointerRegClass(RegClass);
+  // Instructions like INSERT_SUBREG do not have fixed register classes.
+  if (RegClass < 0)
+    return 0;
+  // Otherwise just look it up normally.
+  return TRI->getRegClass(RegClass);
+}
+
+//===----------------------------------------------------------------------===//
+//  TargetInstrInfo
+//===----------------------------------------------------------------------===//
+
+TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc,
+                                 unsigned numOpcodes)
+  : Descriptors(Desc), NumOpcodes(numOpcodes) {
+}
+
+TargetInstrInfo::~TargetInstrInfo() {
+}
+
+unsigned
+TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
+                                const MachineInstr *MI) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  unsigned Class = MI->getDesc().getSchedClass();
+  unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
+  if (UOps)
+    return UOps;
+
+  // The # of u-ops is dynamically determined. The specific target should
+  // override this function to return the right number.
+  return 1;
+}
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                             const MachineInstr *DefMI, unsigned DefIdx,
+                             const MachineInstr *UseMI, unsigned UseIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return -1;
+
+  unsigned DefClass = DefMI->getDesc().getSchedClass();
+  unsigned UseClass = UseMI->getDesc().getSchedClass();
+  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int
+TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
+                                   SDNode *DefNode, unsigned DefIdx,
+                                   SDNode *UseNode, unsigned UseIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return -1;
+
+  if (!DefNode->isMachineOpcode())
+    return -1;
+
+  unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass();
+  if (!UseNode->isMachineOpcode())
+    return ItinData->getOperandCycle(DefClass, DefIdx);
+  unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass();
+  return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                     const MachineInstr *MI,
+                                     unsigned *PredCost) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  return ItinData->getStageLatency(MI->getDesc().getSchedClass());
+}
+
+int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
+                                     SDNode *N) const {
+  if (!ItinData || ItinData->isEmpty())
+    return 1;
+
+  if (!N->isMachineOpcode())
+    return 1;
+
+  return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
+}
+
+bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
+                                       const MachineInstr *DefMI,
+                                       unsigned DefIdx) const {
+  if (!ItinData || ItinData->isEmpty())
+    return false;
+
+  unsigned DefClass = DefMI->getDesc().getSchedClass();
+  int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
+  return (DefCycle != -1 && DefCycle <= 1);
+}
+
+/// insertNoop - Insert a noop into the instruction stream at the specified
+/// point.
+void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI) const {
+  llvm_unreachable("Target didn't implement insertNoop!");
+}
+
+
+bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.isTerminator()) return false;
+
+  // Conditional branch is a special case.
+  if (TID.isBranch() && !TID.isBarrier())
+    return true;
+  if (!TID.isPredicable())
+    return true;
+  return !isPredicated(MI);
+}
+
+
+/// Measure the specified inline asm to determine an approximation of its
+/// length.
+/// Comments (which run till the next SeparatorChar or newline) do not
+/// count as an instruction.
+/// Any other non-whitespace text is considered an instruction, with
+/// multiple instructions separated by SeparatorChar or newlines.
+/// Variable-length instructions are not handled here; this function
+/// may be overloaded in the target code to do that.
+unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
+                                             const MCAsmInfo &MAI) const {
+
+
+  // Count the number of instructions in the asm.
+  bool atInsnStart = true;
+  unsigned Length = 0;
+  for (; *Str; ++Str) {
+    if (*Str == '\n' || *Str == MAI.getSeparatorChar())
+      atInsnStart = true;
+    if (atInsnStart && !std::isspace(*Str)) {
+      Length += MAI.getMaxInstLength();
+      atInsnStart = false;
+    }
+    if (atInsnStart && strncmp(Str, MAI.getCommentString(),
+                               strlen(MAI.getCommentString())) == 0)
+      atInsnStart = false;
+  }
+
+  return Length;
+}
diff --git a/final/lib/Target/TargetIntrinsicInfo.cpp b/final/lib/Target/TargetIntrinsicInfo.cpp
new file mode 100644
index 00000000000..e049a1d3b62
--- /dev/null
+++ b/final/lib/Target/TargetIntrinsicInfo.cpp
@@ -0,0 +1,30 @@
+//===-- TargetIntrinsicInfo.cpp - Target Instruction Information ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetIntrinsicInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/StringMap.h"
+using namespace llvm;
+
+TargetIntrinsicInfo::TargetIntrinsicInfo() {
+}
+
+TargetIntrinsicInfo::~TargetIntrinsicInfo() {
+}
+
+unsigned TargetIntrinsicInfo::getIntrinsicID(Function *F) const {
+  const ValueName *ValName = F->getValueName();
+  if (!ValName)
+    return 0;
+  return lookupName(ValName->getKeyData(), ValName->getKeyLength());
+}
diff --git a/final/lib/Target/TargetLibraryInfo.cpp b/final/lib/Target/TargetLibraryInfo.cpp
new file mode 100644
index 00000000000..90ea343322f
--- /dev/null
+++ b/final/lib/Target/TargetLibraryInfo.cpp
@@ -0,0 +1,61 @@
+//===-- TargetLibraryInfo.cpp - Runtime library information ----------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetLibraryInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/ADT/Triple.h"
+using namespace llvm;
+
+// Register the default implementation.
+INITIALIZE_PASS(TargetLibraryInfo, "targetlibinfo",
+                "Target Library Information", false, true)
+char TargetLibraryInfo::ID = 0;
+
+/// initialize - Initialize the set of available library functions based on the
+/// specified target triple.  This should be carefully written so that a missing
+/// target triple gets a sane set of defaults.
+static void initialize(TargetLibraryInfo &TLI, const Triple &T) {
+  initializeTargetLibraryInfoPass(*PassRegistry::getPassRegistry());
+
+  
+  // memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later.
+  if (T.getOS() != Triple::Darwin || T.getDarwinMajorNumber() < 9)
+    TLI.setUnavailable(LibFunc::memset_pattern16);
+
+  // iprintf and friends are only available on XCore.
+  if (T.getArch() != Triple::xcore) {
+    TLI.setUnavailable(LibFunc::iprintf);
+    TLI.setUnavailable(LibFunc::siprintf);
+    TLI.setUnavailable(LibFunc::fiprintf);
+  }
+}
+
+
+TargetLibraryInfo::TargetLibraryInfo() : ImmutablePass(ID) {
+  // Default to everything being available.
+  memset(AvailableArray, -1, sizeof(AvailableArray));
+
+  initialize(*this, Triple());
+}
+
+TargetLibraryInfo::TargetLibraryInfo(const Triple &T) : ImmutablePass(ID) {
+  // Default to everything being available.
+  memset(AvailableArray, -1, sizeof(AvailableArray));
+  
+  initialize(*this, T);
+}
+
+/// disableAllFunctions - This disables all builtins, which is used for options
+/// like -fno-builtin.
+void TargetLibraryInfo::disableAllFunctions() {
+  memset(AvailableArray, 0, sizeof(AvailableArray));
+}
diff --git a/final/lib/Target/TargetLoweringObjectFile.cpp b/final/lib/Target/TargetLoweringObjectFile.cpp
new file mode 100644
index 00000000000..5d34c7d7fa3
--- /dev/null
+++ b/final/lib/Target/TargetLoweringObjectFile.cpp
@@ -0,0 +1,349 @@
+//===-- llvm/Target/TargetLoweringObjectFile.cpp - Object File Info -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements classes used to handle lowerings specific to common
+// object file formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                              Generic Code
+//===----------------------------------------------------------------------===//
+
+TargetLoweringObjectFile::TargetLoweringObjectFile() : Ctx(0) {
+  TextSection = 0;
+  DataSection = 0;
+  BSSSection = 0;
+  ReadOnlySection = 0;
+  StaticCtorSection = 0;
+  StaticDtorSection = 0;
+  LSDASection = 0;
+
+  CommDirectiveSupportsAlignment = true;
+  DwarfAbbrevSection = 0;
+  DwarfInfoSection = 0;
+  DwarfLineSection = 0;
+  DwarfFrameSection = 0;
+  DwarfPubNamesSection = 0;
+  DwarfPubTypesSection = 0;
+  DwarfDebugInlineSection = 0;
+  DwarfStrSection = 0;
+  DwarfLocSection = 0;
+  DwarfARangesSection = 0;
+  DwarfRangesSection = 0;
+  DwarfMacroInfoSection = 0;
+  
+  IsFunctionEHSymbolGlobal = false;
+  IsFunctionEHFrameSymbolPrivate = true;
+  SupportsWeakOmittedEHFrame = true;
+}
+
+TargetLoweringObjectFile::~TargetLoweringObjectFile() {
+}
+
+static bool isSuitableForBSS(const GlobalVariable *GV) {
+  Constant *C = GV->getInitializer();
+
+  // Must have zero initializer.
+  if (!C->isNullValue())
+    return false;
+
+  // Leave constant zeros in readonly constant sections, so they can be shared.
+  if (GV->isConstant())
+    return false;
+
+  // If the global has an explicit section specified, don't put it in BSS.
+  if (!GV->getSection().empty())
+    return false;
+
+  // If -nozero-initialized-in-bss is specified, don't ever use BSS.
+  if (NoZerosInBSS)
+    return false;
+
+  // Otherwise, put it in BSS!
+  return true;
+}
+
+/// IsNullTerminatedString - Return true if the specified constant (which is
+/// known to have a type that is an array of 1/2/4 byte elements) ends with a
+/// nul value and contains no other nuls in it.
+static bool IsNullTerminatedString(const Constant *C) {
+  const ArrayType *ATy = cast<ArrayType>(C->getType());
+
+  // First check: is we have constant array of i8 terminated with zero
+  if (const ConstantArray *CVA = dyn_cast<ConstantArray>(C)) {
+    if (ATy->getNumElements() == 0) return false;
+
+    ConstantInt *Null =
+      dyn_cast<ConstantInt>(CVA->getOperand(ATy->getNumElements()-1));
+    if (Null == 0 || !Null->isZero())
+      return false; // Not null terminated.
+
+    // Verify that the null doesn't occur anywhere else in the string.
+    for (unsigned i = 0, e = ATy->getNumElements()-1; i != e; ++i)
+      // Reject constantexpr elements etc.
+      if (!isa<ConstantInt>(CVA->getOperand(i)) ||
+          CVA->getOperand(i) == Null)
+        return false;
+    return true;
+  }
+
+  // Another possibility: [1 x i8] zeroinitializer
+  if (isa<ConstantAggregateZero>(C))
+    return ATy->getNumElements() == 1;
+
+  return false;
+}
+
+/// getKindForGlobal - This is a top-level target-independent classifier for
+/// a global variable.  Given an global variable and information from TM, it
+/// classifies the global in a variety of ways that make various target
+/// implementations simpler.  The target implementation is free to ignore this
+/// extra info of course.
+SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
+                                                       const TargetMachine &TM){
+  assert(!GV->isDeclaration() && !GV->hasAvailableExternallyLinkage() &&
+         "Can only be used for global definitions");
+
+  Reloc::Model ReloModel = TM.getRelocationModel();
+
+  // Early exit - functions should be always in text sections.
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+  if (GVar == 0)
+    return SectionKind::getText();
+
+  // Handle thread-local data first.
+  if (GVar->isThreadLocal()) {
+    if (isSuitableForBSS(GVar))
+      return SectionKind::getThreadBSS();
+    return SectionKind::getThreadData();
+  }
+
+  // Variables with common linkage always get classified as common.
+  if (GVar->hasCommonLinkage())
+    return SectionKind::getCommon();
+
+  // Variable can be easily put to BSS section.
+  if (isSuitableForBSS(GVar)) {
+    if (GVar->hasLocalLinkage())
+      return SectionKind::getBSSLocal();
+    else if (GVar->hasExternalLinkage())
+      return SectionKind::getBSSExtern();
+    return SectionKind::getBSS();
+  }
+
+  Constant *C = GVar->getInitializer();
+
+  // If the global is marked constant, we can put it into a mergable section,
+  // a mergable string section, or general .data if it contains relocations.
+  if (GVar->isConstant()) {
+    // If the initializer for the global contains something that requires a
+    // relocation, then we may have to drop this into a wriable data section
+    // even though it is marked const.
+    switch (C->getRelocationInfo()) {
+    default: assert(0 && "unknown relocation info kind");
+    case Constant::NoRelocation:
+      // If the global is required to have a unique address, it can't be put
+      // into a mergable section: just drop it into the general read-only
+      // section instead.
+      if (!GVar->hasUnnamedAddr())
+        return SectionKind::getReadOnly();
+        
+      // If initializer is a null-terminated string, put it in a "cstring"
+      // section of the right width.
+      if (const ArrayType *ATy = dyn_cast<ArrayType>(C->getType())) {
+        if (const IntegerType *ITy =
+              dyn_cast<IntegerType>(ATy->getElementType())) {
+          if ((ITy->getBitWidth() == 8 || ITy->getBitWidth() == 16 ||
+               ITy->getBitWidth() == 32) &&
+              IsNullTerminatedString(C)) {
+            if (ITy->getBitWidth() == 8)
+              return SectionKind::getMergeable1ByteCString();
+            if (ITy->getBitWidth() == 16)
+              return SectionKind::getMergeable2ByteCString();
+
+            assert(ITy->getBitWidth() == 32 && "Unknown width");
+            return SectionKind::getMergeable4ByteCString();
+          }
+        }
+      }
+
+      // Otherwise, just drop it into a mergable constant section.  If we have
+      // a section for this size, use it, otherwise use the arbitrary sized
+      // mergable section.
+      switch (TM.getTargetData()->getTypeAllocSize(C->getType())) {
+      case 4:  return SectionKind::getMergeableConst4();
+      case 8:  return SectionKind::getMergeableConst8();
+      case 16: return SectionKind::getMergeableConst16();
+      default: return SectionKind::getMergeableConst();
+      }
+
+    case Constant::LocalRelocation:
+      // In static relocation model, the linker will resolve all addresses, so
+      // the relocation entries will actually be constants by the time the app
+      // starts up.  However, we can't put this into a mergable section, because
+      // the linker doesn't take relocations into consideration when it tries to
+      // merge entries in the section.
+      if (ReloModel == Reloc::Static)
+        return SectionKind::getReadOnly();
+
+      // Otherwise, the dynamic linker needs to fix it up, put it in the
+      // writable data.rel.local section.
+      return SectionKind::getReadOnlyWithRelLocal();
+
+    case Constant::GlobalRelocations:
+      // In static relocation model, the linker will resolve all addresses, so
+      // the relocation entries will actually be constants by the time the app
+      // starts up.  However, we can't put this into a mergable section, because
+      // the linker doesn't take relocations into consideration when it tries to
+      // merge entries in the section.
+      if (ReloModel == Reloc::Static)
+        return SectionKind::getReadOnly();
+
+      // Otherwise, the dynamic linker needs to fix it up, put it in the
+      // writable data.rel section.
+      return SectionKind::getReadOnlyWithRel();
+    }
+  }
+
+  // Okay, this isn't a constant.  If the initializer for the global is going
+  // to require a runtime relocation by the dynamic linker, put it into a more
+  // specific section to improve startup time of the app.  This coalesces these
+  // globals together onto fewer pages, improving the locality of the dynamic
+  // linker.
+  if (ReloModel == Reloc::Static)
+    return SectionKind::getDataNoRel();
+
+  switch (C->getRelocationInfo()) {
+  default: assert(0 && "unknown relocation info kind");
+  case Constant::NoRelocation:
+    return SectionKind::getDataNoRel();
+  case Constant::LocalRelocation:
+    return SectionKind::getDataRelLocal();
+  case Constant::GlobalRelocations:
+    return SectionKind::getDataRel();
+  }
+}
+
+/// SectionForGlobal - This method computes the appropriate section to emit
+/// the specified global variable or function definition.  This should not
+/// be passed external (or available externally) globals.
+const MCSection *TargetLoweringObjectFile::
+SectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang,
+                 const TargetMachine &TM) const {
+  // Select section name.
+  if (GV->hasSection())
+    return getExplicitSectionGlobal(GV, Kind, Mang, TM);
+
+
+  // Use default section depending on the 'type' of global
+  return SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
+
+
+// Lame default implementation. Calculate the section name for global.
+const MCSection *
+TargetLoweringObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+                                                 SectionKind Kind,
+                                                 Mangler *Mang,
+                                                 const TargetMachine &TM) const{
+  assert(!Kind.isThreadLocal() && "Doesn't support TLS");
+
+  if (Kind.isText())
+    return getTextSection();
+
+  if (Kind.isBSS() && BSSSection != 0)
+    return BSSSection;
+
+  if (Kind.isReadOnly() && ReadOnlySection != 0)
+    return ReadOnlySection;
+
+  return getDataSection();
+}
+
+/// getSectionForConstant - Given a mergable constant with the
+/// specified size and relocation information, return a section that it
+/// should be placed in.
+const MCSection *
+TargetLoweringObjectFile::getSectionForConstant(SectionKind Kind) const {
+  if (Kind.isReadOnly() && ReadOnlySection != 0)
+    return ReadOnlySection;
+
+  return DataSection;
+}
+
+/// getExprForDwarfGlobalReference - Return an MCExpr to use for a
+/// reference to the specified global variable from exception
+/// handling information.
+const MCExpr *TargetLoweringObjectFile::
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                               MachineModuleInfo *MMI, unsigned Encoding,
+                               MCStreamer &Streamer) const {
+  const MCSymbol *Sym = Mang->getSymbol(GV);
+  return getExprForDwarfReference(Sym, Mang, MMI, Encoding, Streamer);
+}
+
+const MCExpr *TargetLoweringObjectFile::
+getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang,
+                         MachineModuleInfo *MMI, unsigned Encoding,
+                         MCStreamer &Streamer) const {
+  const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext());
+
+  switch (Encoding & 0xF0) {
+  default:
+    report_fatal_error("We do not support this DWARF encoding yet!");
+  case dwarf::DW_EH_PE_absptr:
+    // Do nothing special
+    return Res;
+  case dwarf::DW_EH_PE_pcrel: {
+    // Emit a label to the streamer for the current position.  This gives us
+    // .-foo addressing.
+    MCSymbol *PCSym = getContext().CreateTempSymbol();
+    Streamer.EmitLabel(PCSym);
+    const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext());
+    return MCBinaryExpr::CreateSub(Res, PC, getContext());
+  }
+  }
+}
+
+unsigned TargetLoweringObjectFile::getPersonalityEncoding() const {
+  return dwarf::DW_EH_PE_absptr;
+}
+
+unsigned TargetLoweringObjectFile::getLSDAEncoding() const {
+  return dwarf::DW_EH_PE_absptr;
+}
+
+unsigned TargetLoweringObjectFile::getFDEEncoding() const {
+  return dwarf::DW_EH_PE_absptr;
+}
+
+unsigned TargetLoweringObjectFile::getTTypeEncoding() const {
+  return dwarf::DW_EH_PE_absptr;
+}
+
diff --git a/final/lib/Target/TargetMachine.cpp b/final/lib/Target/TargetMachine.cpp
new file mode 100644
index 00000000000..d579d95a99c
--- /dev/null
+++ b/final/lib/Target/TargetMachine.cpp
@@ -0,0 +1,306 @@
+//===-- TargetMachine.cpp - General Target Information ---------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the general parts of a Target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// Command-line options that tend to be useful on more than one back-end.
+//
+
+namespace llvm {
+  bool LessPreciseFPMADOption;
+  bool PrintMachineCode;
+  bool NoFramePointerElim;
+  bool NoFramePointerElimNonLeaf;
+  bool NoExcessFPPrecision;
+  bool UnsafeFPMath;
+  bool NoInfsFPMath;
+  bool NoNaNsFPMath;
+  bool HonorSignDependentRoundingFPMathOption;
+  bool UseSoftFloat;
+  FloatABI::ABIType FloatABIType;
+  bool NoImplicitFloat;
+  bool NoZerosInBSS;
+  bool JITExceptionHandling;
+  bool JITEmitDebugInfo;
+  bool JITEmitDebugInfoToDisk;
+  bool UnwindTablesMandatory;
+  Reloc::Model RelocationModel;
+  CodeModel::Model CMModel;
+  bool GuaranteedTailCallOpt;
+  unsigned StackAlignment;
+  bool RealignStack;
+  bool DisableJumpTables;
+  bool StrongPHIElim;
+  bool AsmVerbosityDefault(false);
+}
+
+static cl::opt<bool, true>
+PrintCode("print-machineinstrs",
+  cl::desc("Print generated machine code"),
+  cl::location(PrintMachineCode), cl::init(false));
+static cl::opt<bool, true>
+DisableFPElim("disable-fp-elim",
+  cl::desc("Disable frame pointer elimination optimization"),
+  cl::location(NoFramePointerElim),
+  cl::init(false));
+static cl::opt<bool, true>
+DisableFPElimNonLeaf("disable-non-leaf-fp-elim",
+  cl::desc("Disable frame pointer elimination optimization for non-leaf funcs"),
+  cl::location(NoFramePointerElimNonLeaf),
+  cl::init(false));
+static cl::opt<bool, true>
+DisableExcessPrecision("disable-excess-fp-precision",
+  cl::desc("Disable optimizations that may increase FP precision"),
+  cl::location(NoExcessFPPrecision),
+  cl::init(false));
+static cl::opt<bool, true>
+EnableFPMAD("enable-fp-mad",
+  cl::desc("Enable less precise MAD instructions to be generated"),
+  cl::location(LessPreciseFPMADOption),
+  cl::init(false));
+static cl::opt<bool, true>
+EnableUnsafeFPMath("enable-unsafe-fp-math",
+  cl::desc("Enable optimizations that may decrease FP precision"),
+  cl::location(UnsafeFPMath),
+  cl::init(false));
+static cl::opt<bool, true>
+EnableNoInfsFPMath("enable-no-infs-fp-math",
+  cl::desc("Enable FP math optimizations that assume no +-Infs"),
+  cl::location(NoInfsFPMath),
+  cl::init(false));
+static cl::opt<bool, true>
+EnableNoNaNsFPMath("enable-no-nans-fp-math",
+  cl::desc("Enable FP math optimizations that assume no NaNs"),
+  cl::location(NoNaNsFPMath),
+  cl::init(false));
+static cl::opt<bool, true>
+EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
+  cl::Hidden,
+  cl::desc("Force codegen to assume rounding mode can change dynamically"),
+  cl::location(HonorSignDependentRoundingFPMathOption),
+  cl::init(false));
+static cl::opt<bool, true>
+GenerateSoftFloatCalls("soft-float",
+  cl::desc("Generate software floating point library calls"),
+  cl::location(UseSoftFloat),
+  cl::init(false));
+static cl::opt<llvm::FloatABI::ABIType, true>
+FloatABIForCalls("float-abi",
+  cl::desc("Choose float ABI type"),
+  cl::location(FloatABIType),
+  cl::init(FloatABI::Default),
+  cl::values(
+    clEnumValN(FloatABI::Default, "default",
+               "Target default float ABI type"),
+    clEnumValN(FloatABI::Soft, "soft",
+               "Soft float ABI (implied by -soft-float)"),
+    clEnumValN(FloatABI::Hard, "hard",
+               "Hard float ABI (uses FP registers)"),
+    clEnumValEnd));
+static cl::opt<bool, true>
+DontPlaceZerosInBSS("nozero-initialized-in-bss",
+  cl::desc("Don't place zero-initialized symbols into bss section"),
+  cl::location(NoZerosInBSS),
+  cl::init(false));
+static cl::opt<bool, true>
+EnableJITExceptionHandling("jit-enable-eh",
+  cl::desc("Emit exception handling information"),
+  cl::location(JITExceptionHandling),
+  cl::init(false));
+// In debug builds, make this default to true.
+#ifdef NDEBUG
+#define EMIT_DEBUG false
+#else
+#define EMIT_DEBUG true
+#endif
+static cl::opt<bool, true>
+EmitJitDebugInfo("jit-emit-debug",
+  cl::desc("Emit debug information to debugger"),
+  cl::location(JITEmitDebugInfo),
+  cl::init(EMIT_DEBUG));
+#undef EMIT_DEBUG
+static cl::opt<bool, true>
+EmitJitDebugInfoToDisk("jit-emit-debug-to-disk",
+  cl::Hidden,
+  cl::desc("Emit debug info objfiles to disk"),
+  cl::location(JITEmitDebugInfoToDisk),
+  cl::init(false));
+static cl::opt<bool, true>
+EnableUnwindTables("unwind-tables",
+  cl::desc("Generate unwinding tables for all functions"),
+  cl::location(UnwindTablesMandatory),
+  cl::init(false));
+
+static cl::opt<llvm::Reloc::Model, true>
+DefRelocationModel("relocation-model",
+  cl::desc("Choose relocation model"),
+  cl::location(RelocationModel),
+  cl::init(Reloc::Default),
+  cl::values(
+    clEnumValN(Reloc::Default, "default",
+               "Target default relocation model"),
+    clEnumValN(Reloc::Static, "static",
+               "Non-relocatable code"),
+    clEnumValN(Reloc::PIC_, "pic",
+               "Fully relocatable, position independent code"),
+    clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
+               "Relocatable external references, non-relocatable code"),
+    clEnumValEnd));
+static cl::opt<llvm::CodeModel::Model, true>
+DefCodeModel("code-model",
+  cl::desc("Choose code model"),
+  cl::location(CMModel),
+  cl::init(CodeModel::Default),
+  cl::values(
+    clEnumValN(CodeModel::Default, "default",
+               "Target default code model"),
+    clEnumValN(CodeModel::Small, "small",
+               "Small code model"),
+    clEnumValN(CodeModel::Kernel, "kernel",
+               "Kernel code model"),
+    clEnumValN(CodeModel::Medium, "medium",
+               "Medium code model"),
+    clEnumValN(CodeModel::Large, "large",
+               "Large code model"),
+    clEnumValEnd));
+static cl::opt<bool, true>
+EnableGuaranteedTailCallOpt("tailcallopt",
+  cl::desc("Turn fastcc calls into tail calls by (potentially) changing ABI."),
+  cl::location(GuaranteedTailCallOpt),
+  cl::init(false));
+static cl::opt<unsigned, true>
+OverrideStackAlignment("stack-alignment",
+  cl::desc("Override default stack alignment"),
+  cl::location(StackAlignment),
+  cl::init(0));
+static cl::opt<bool, true>
+EnableRealignStack("realign-stack",
+  cl::desc("Realign stack if needed"),
+  cl::location(RealignStack),
+  cl::init(true));
+static cl::opt<bool, true>
+DisableSwitchTables(cl::Hidden, "disable-jump-tables", 
+  cl::desc("Do not generate jump tables."),
+  cl::location(DisableJumpTables),
+  cl::init(false));
+static cl::opt<bool, true>
+EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
+  cl::desc("Use strong PHI elimination."),
+  cl::location(StrongPHIElim),
+  cl::init(false));
+static cl::opt<bool>
+DataSections("fdata-sections",
+  cl::desc("Emit data into separate sections"),
+  cl::init(false));
+static cl::opt<bool>
+FunctionSections("ffunction-sections",
+  cl::desc("Emit functions into separate sections"),
+  cl::init(false));
+//---------------------------------------------------------------------------
+// TargetMachine Class
+//
+
+TargetMachine::TargetMachine(const Target &T) 
+  : TheTarget(T), AsmInfo(0),
+    MCRelaxAll(false),
+    MCNoExecStack(false),
+    MCUseLoc(true) {
+  // Typically it will be subtargets that will adjust FloatABIType from Default
+  // to Soft or Hard.
+  if (UseSoftFloat)
+    FloatABIType = FloatABI::Soft;
+}
+
+TargetMachine::~TargetMachine() {
+  delete AsmInfo;
+}
+
+/// getRelocationModel - Returns the code generation relocation model. The
+/// choices are static, PIC, and dynamic-no-pic, and target default.
+Reloc::Model TargetMachine::getRelocationModel() {
+  return RelocationModel;
+}
+
+/// setRelocationModel - Sets the code generation relocation model.
+void TargetMachine::setRelocationModel(Reloc::Model Model) {
+  RelocationModel = Model;
+}
+
+/// getCodeModel - Returns the code model. The choices are small, kernel,
+/// medium, large, and target default.
+CodeModel::Model TargetMachine::getCodeModel() {
+  return CMModel;
+}
+
+/// setCodeModel - Sets the code model.
+void TargetMachine::setCodeModel(CodeModel::Model Model) {
+  CMModel = Model;
+}
+
+bool TargetMachine::getAsmVerbosityDefault() {
+  return AsmVerbosityDefault;
+}
+
+void TargetMachine::setAsmVerbosityDefault(bool V) {
+  AsmVerbosityDefault = V;
+}
+
+bool TargetMachine::getFunctionSections() {
+  return FunctionSections;
+}
+
+bool TargetMachine::getDataSections() {
+  return DataSections;
+}
+
+void TargetMachine::setFunctionSections(bool V) {
+  FunctionSections = V;
+}
+
+void TargetMachine::setDataSections(bool V) {
+  DataSections = V;
+}
+
+namespace llvm {
+  /// DisableFramePointerElim - This returns true if frame pointer elimination
+  /// optimization should be disabled for the given machine function.
+  bool DisableFramePointerElim(const MachineFunction &MF) {
+    // Check to see if we should eliminate non-leaf frame pointers and then
+    // check to see if we should eliminate all frame pointers.
+    if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
+      const MachineFrameInfo *MFI = MF.getFrameInfo();
+      return MFI->hasCalls();
+    }
+
+    return NoFramePointerElim;
+  }
+
+  /// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
+  /// is specified on the command line.  When this flag is off(default), the
+  /// code generator is not allowed to generate mad (multiply add) if the
+  /// result is "less precise" than doing those operations individually.
+  bool LessPreciseFPMAD() { return UnsafeFPMath || LessPreciseFPMADOption; }
+
+  /// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
+  /// that the rounding mode of the FPU can change from its default.
+  bool HonorSignDependentRoundingFPMath() {
+    return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption;
+  }
+}
diff --git a/final/lib/Target/TargetRegisterInfo.cpp b/final/lib/Target/TargetRegisterInfo.cpp
new file mode 100644
index 00000000000..4811ba5cc48
--- /dev/null
+++ b/final/lib/Target/TargetRegisterInfo.cpp
@@ -0,0 +1,150 @@
+//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetRegisterInfo interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
+                             regclass_iterator RCB, regclass_iterator RCE,
+                             const char *const *subregindexnames,
+                             int CFSO, int CFDO,
+                             const unsigned* subregs, const unsigned subregsize,
+                         const unsigned* aliases, const unsigned aliasessize)
+  : SubregHash(subregs), SubregHashSize(subregsize),
+    AliasesHash(aliases), AliasesHashSize(aliasessize),
+    Desc(D), SubRegIndexNames(subregindexnames), NumRegs(NR),
+    RegClassBegin(RCB), RegClassEnd(RCE) {
+  assert(isPhysicalRegister(NumRegs) &&
+         "Target has too many physical registers!");
+
+  CallFrameSetupOpcode   = CFSO;
+  CallFrameDestroyOpcode = CFDO;
+}
+
+TargetRegisterInfo::~TargetRegisterInfo() {}
+
+void PrintReg::print(raw_ostream &OS) const {
+  if (!Reg)
+    OS << "%noreg";
+  else if (TargetRegisterInfo::isStackSlot(Reg))
+    OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
+  else if (TargetRegisterInfo::isVirtualRegister(Reg))
+    OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg);
+  else if (TRI && Reg < TRI->getNumRegs())
+    OS << '%' << TRI->getName(Reg);
+  else
+    OS << "%physreg" << Reg;
+  if (SubIdx) {
+    if (TRI)
+      OS << ':' << TRI->getSubRegIndexName(SubIdx);
+    else
+      OS << ":sub(" << SubIdx << ')';
+  }
+}
+
+/// getMinimalPhysRegClass - Returns the Register Class of a physical
+/// register of the given type, picking the most sub register class of
+/// the right type that contains this physreg.
+const TargetRegisterClass *
+TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
+  assert(isPhysicalRegister(reg) && "reg must be a physical register");
+
+  // Pick the most sub register class of the right type that contains
+  // this physreg.
+  const TargetRegisterClass* BestRC = 0;
+  for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
+    const TargetRegisterClass* RC = *I;
+    if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
+        (!BestRC || BestRC->hasSubClass(RC)))
+      BestRC = RC;
+  }
+
+  assert(BestRC && "Couldn't find the register class");
+  return BestRC;
+}
+
+/// getAllocatableSetForRC - Toggle the bits that represent allocatable
+/// registers for the specific register class.
+static void getAllocatableSetForRC(const MachineFunction &MF,
+                                   const TargetRegisterClass *RC, BitVector &R){
+  for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
+         E = RC->allocation_order_end(MF); I != E; ++I)
+    R.set(*I);
+}
+
+BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
+                                          const TargetRegisterClass *RC) const {
+  BitVector Allocatable(NumRegs);
+  if (RC) {
+    getAllocatableSetForRC(MF, RC, Allocatable);
+  } else {
+    for (TargetRegisterInfo::regclass_iterator I = regclass_begin(),
+         E = regclass_end(); I != E; ++I)
+      getAllocatableSetForRC(MF, *I, Allocatable);
+  }
+
+  // Mask out the reserved registers
+  BitVector Reserved = getReservedRegs(MF);
+  Allocatable &= Reserved.flip();
+
+  return Allocatable;
+}
+
+const TargetRegisterClass *
+llvm::getCommonSubClass(const TargetRegisterClass *A,
+                        const TargetRegisterClass *B) {
+  // First take care of the trivial cases
+  if (A == B)
+    return A;
+  if (!A || !B)
+    return 0;
+
+  // If B is a subclass of A, it will be handled in the loop below
+  if (B->hasSubClass(A))
+    return A;
+
+  const TargetRegisterClass *Best = 0;
+  for (TargetRegisterClass::sc_iterator I = A->subclasses_begin();
+       const TargetRegisterClass *X = *I; ++I) {
+    if (X == B)
+      return B;                 // B is a subclass of A
+
+    // X must be a common subclass of A and B
+    if (!B->hasSubClass(X))
+      continue;
+
+    // A superclass is definitely better.
+    if (!Best || Best->hasSuperClass(X)) {
+      Best = X;
+      continue;
+    }
+
+    // A subclass is definitely worse
+    if (Best->hasSubClass(X))
+      continue;
+
+    // Best and *I have no super/sub class relation - pick the larger class, or
+    // the smaller spill size.
+    int nb = std::distance(Best->begin(), Best->end());
+    int ni = std::distance(X->begin(), X->end());
+    if (ni>nb || (ni==nb && X->getSize() < Best->getSize()))
+      Best = X;
+  }
+  return Best;
+}
diff --git a/final/lib/Target/TargetSubtarget.cpp b/final/lib/Target/TargetSubtarget.cpp
new file mode 100644
index 00000000000..edb76f97153
--- /dev/null
+++ b/final/lib/Target/TargetSubtarget.cpp
@@ -0,0 +1,33 @@
+//===-- TargetSubtarget.cpp - General Target Information -------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the general parts of a Subtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+//---------------------------------------------------------------------------
+// TargetSubtarget Class
+//
+TargetSubtarget::TargetSubtarget() {}
+
+TargetSubtarget::~TargetSubtarget() {}
+
+bool TargetSubtarget::enablePostRAScheduler(
+          CodeGenOpt::Level OptLevel,
+          AntiDepBreakMode& Mode,
+          RegClassVector& CriticalPathRCs) const {
+  Mode = ANTIDEP_NONE;
+  CriticalPathRCs.clear();
+  return false;
+}
+
diff --git a/final/lib/Target/X86/AsmParser/CMakeLists.txt b/final/lib/Target/X86/AsmParser/CMakeLists.txt
new file mode 100644
index 00000000000..40dbdd72faa
--- /dev/null
+++ b/final/lib/Target/X86/AsmParser/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86AsmParser
+  X86AsmLexer.cpp
+  X86AsmParser.cpp
+  )
+add_dependencies(LLVMX86AsmParser X86CodeGenTable_gen)
diff --git a/final/lib/Target/X86/AsmParser/Makefile b/final/lib/Target/X86/AsmParser/Makefile
new file mode 100644
index 00000000000..fb976079662
--- /dev/null
+++ b/final/lib/Target/X86/AsmParser/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/AsmParser/Makefile -------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86AsmParser
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/final/lib/Target/X86/AsmParser/X86AsmLexer.cpp
new file mode 100644
index 00000000000..ec73087a330
--- /dev/null
+++ b/final/lib/Target/X86/AsmParser/X86AsmLexer.cpp
@@ -0,0 +1,165 @@
+//===-- X86AsmLexer.cpp - Tokenize X86 assembly to AsmTokens --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Target/TargetAsmLexer.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "X86.h"
+
+using namespace llvm;
+
+namespace {
+  
+class X86AsmLexer : public TargetAsmLexer {
+  const MCAsmInfo &AsmInfo;
+  
+  bool tentativeIsValid;
+  AsmToken tentativeToken;
+  
+  const AsmToken &lexTentative() {
+    tentativeToken = getLexer()->Lex();
+    tentativeIsValid = true;
+    return tentativeToken;
+  }
+  
+  const AsmToken &lexDefinite() {
+    if (tentativeIsValid) {
+      tentativeIsValid = false;
+      return tentativeToken;
+    }
+    return getLexer()->Lex();
+  }
+  
+  AsmToken LexTokenATT();
+  AsmToken LexTokenIntel();
+protected:
+  AsmToken LexToken() {
+    if (!Lexer) {
+      SetError(SMLoc(), "No MCAsmLexer installed");
+      return AsmToken(AsmToken::Error, "", 0);
+    }
+    
+    switch (AsmInfo.getAssemblerDialect()) {
+    default:
+      SetError(SMLoc(), "Unhandled dialect");
+      return AsmToken(AsmToken::Error, "", 0);
+    case 0:
+      return LexTokenATT();
+    case 1:
+      return LexTokenIntel();
+    }
+  }
+public:
+  X86AsmLexer(const Target &T, const MCAsmInfo &MAI)
+    : TargetAsmLexer(T), AsmInfo(MAI), tentativeIsValid(false) {
+  }
+};
+
+} // end anonymous namespace
+
+#define GET_REGISTER_MATCHER
+#include "X86GenAsmMatcher.inc"
+
+AsmToken X86AsmLexer::LexTokenATT() {
+  AsmToken lexedToken = lexDefinite();
+  
+  switch (lexedToken.getKind()) {
+  default:
+    return lexedToken;
+  case AsmToken::Error:
+    SetError(Lexer->getErrLoc(), Lexer->getErr());
+    return lexedToken;
+      
+  case AsmToken::Percent: {
+    const AsmToken &nextToken = lexTentative();
+    if (nextToken.getKind() != AsmToken::Identifier)
+      return lexedToken;
+
+      
+    if (unsigned regID = MatchRegisterName(nextToken.getString())) {
+      lexDefinite();
+        
+      // FIXME: This is completely wrong when there is a space or other
+      // punctuation between the % and the register name.
+      StringRef regStr(lexedToken.getString().data(),
+                       lexedToken.getString().size() + 
+                       nextToken.getString().size());
+      
+      return AsmToken(AsmToken::Register, regStr, 
+                      static_cast<int64_t>(regID));
+    }
+    
+    // Match register name failed.  If this is "db[0-7]", match it as an alias
+    // for dr[0-7].
+    if (nextToken.getString().size() == 3 &&
+        nextToken.getString().startswith("db")) {
+      int RegNo = -1;
+      switch (nextToken.getString()[2]) {
+      case '0': RegNo = X86::DR0; break;
+      case '1': RegNo = X86::DR1; break;
+      case '2': RegNo = X86::DR2; break;
+      case '3': RegNo = X86::DR3; break;
+      case '4': RegNo = X86::DR4; break;
+      case '5': RegNo = X86::DR5; break;
+      case '6': RegNo = X86::DR6; break;
+      case '7': RegNo = X86::DR7; break;
+      }
+      
+      if (RegNo != -1) {
+        lexDefinite();
+
+        // FIXME: This is completely wrong when there is a space or other
+        // punctuation between the % and the register name.
+        StringRef regStr(lexedToken.getString().data(),
+                         lexedToken.getString().size() + 
+                         nextToken.getString().size());
+        return AsmToken(AsmToken::Register, regStr, 
+                        static_cast<int64_t>(RegNo));
+      }
+    }
+      
+   
+    return lexedToken;
+  }    
+  }
+}
+
+AsmToken X86AsmLexer::LexTokenIntel() {
+  const AsmToken &lexedToken = lexDefinite();
+  
+  switch(lexedToken.getKind()) {
+  default:
+    return lexedToken;
+  case AsmToken::Error:
+    SetError(Lexer->getErrLoc(), Lexer->getErr());
+    return lexedToken;
+  case AsmToken::Identifier: {
+    std::string upperCase = lexedToken.getString().str();
+    std::string lowerCase = LowercaseString(upperCase);
+    StringRef lowerRef(lowerCase);
+    
+    unsigned regID = MatchRegisterName(lowerRef);
+    
+    if (regID)
+      return AsmToken(AsmToken::Register,
+                      lexedToken.getString(),
+                      static_cast<int64_t>(regID));
+    return lexedToken;
+  }
+  }
+}
+
+extern "C" void LLVMInitializeX86AsmLexer() {
+  RegisterAsmLexer<X86AsmLexer> X(TheX86_32Target);
+  RegisterAsmLexer<X86AsmLexer> Y(TheX86_64Target);
+}
diff --git a/final/lib/Target/X86/AsmParser/X86AsmParser.cpp b/final/lib/Target/X86/AsmParser/X86AsmParser.cpp
new file mode 100644
index 00000000000..8fe549ba312
--- /dev/null
+++ b/final/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -0,0 +1,1034 @@
+//===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmParser.h"
+#include "X86.h"
+#include "X86Subtarget.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+struct X86Operand;
+
+class X86ATTAsmParser : public TargetAsmParser {
+  MCAsmParser &Parser;
+  TargetMachine &TM;
+
+protected:
+  unsigned Is64Bit : 1;
+
+private:
+  MCAsmParser &getParser() const { return Parser; }
+
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+
+  bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
+
+  X86Operand *ParseOperand();
+  X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
+
+  bool ParseDirectiveWord(unsigned Size, SMLoc L);
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc,
+                               SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                               MCStreamer &Out);
+
+  /// @name Auto-generated Matcher Functions
+  /// {
+
+#define GET_ASSEMBLER_HEADER
+#include "X86GenAsmMatcher.inc"
+
+  /// }
+
+public:
+  X86ATTAsmParser(const Target &T, MCAsmParser &parser, TargetMachine &TM)
+    : TargetAsmParser(T), Parser(parser), TM(TM) {
+
+    // Initialize the set of available features.
+    setAvailableFeatures(ComputeAvailableFeatures(
+                           &TM.getSubtarget<X86Subtarget>()));
+  }
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+
+  virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
+                                SmallVectorImpl<MCParsedAsmOperand*> &Operands);
+
+  virtual bool ParseDirective(AsmToken DirectiveID);
+};
+
+class X86_32ATTAsmParser : public X86ATTAsmParser {
+public:
+  X86_32ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM)
+    : X86ATTAsmParser(T, Parser, TM) {
+    Is64Bit = false;
+  }
+};
+
+class X86_64ATTAsmParser : public X86ATTAsmParser {
+public:
+  X86_64ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM)
+    : X86ATTAsmParser(T, Parser, TM) {
+    Is64Bit = true;
+  }
+};
+
+} // end anonymous namespace
+
+/// @name Auto-generated Match Functions
+/// {
+
+static unsigned MatchRegisterName(StringRef Name);
+
+/// }
+
+namespace {
+
+/// X86Operand - Instances of this class represent a parsed X86 machine
+/// instruction.
+struct X86Operand : public MCParsedAsmOperand {
+  enum KindTy {
+    Token,
+    Register,
+    Immediate,
+    Memory
+  } Kind;
+
+  SMLoc StartLoc, EndLoc;
+
+  union {
+    struct {
+      const char *Data;
+      unsigned Length;
+    } Tok;
+
+    struct {
+      unsigned RegNo;
+    } Reg;
+
+    struct {
+      const MCExpr *Val;
+    } Imm;
+
+    struct {
+      unsigned SegReg;
+      const MCExpr *Disp;
+      unsigned BaseReg;
+      unsigned IndexReg;
+      unsigned Scale;
+    } Mem;
+  };
+
+  X86Operand(KindTy K, SMLoc Start, SMLoc End)
+    : Kind(K), StartLoc(Start), EndLoc(End) {}
+
+  /// getStartLoc - Get the location of the first token of this operand.
+  SMLoc getStartLoc() const { return StartLoc; }
+  /// getEndLoc - Get the location of the last token of this operand.
+  SMLoc getEndLoc() const { return EndLoc; }
+
+  virtual void dump(raw_ostream &OS) const {}
+
+  StringRef getToken() const {
+    assert(Kind == Token && "Invalid access!");
+    return StringRef(Tok.Data, Tok.Length);
+  }
+  void setTokenValue(StringRef Value) {
+    assert(Kind == Token && "Invalid access!");
+    Tok.Data = Value.data();
+    Tok.Length = Value.size();
+  }
+
+  unsigned getReg() const {
+    assert(Kind == Register && "Invalid access!");
+    return Reg.RegNo;
+  }
+
+  const MCExpr *getImm() const {
+    assert(Kind == Immediate && "Invalid access!");
+    return Imm.Val;
+  }
+
+  const MCExpr *getMemDisp() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.Disp;
+  }
+  unsigned getMemSegReg() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.SegReg;
+  }
+  unsigned getMemBaseReg() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.BaseReg;
+  }
+  unsigned getMemIndexReg() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.IndexReg;
+  }
+  unsigned getMemScale() const {
+    assert(Kind == Memory && "Invalid access!");
+    return Mem.Scale;
+  }
+
+  bool isToken() const {return Kind == Token; }
+
+  bool isImm() const { return Kind == Immediate; }
+
+  bool isImmSExti16i8() const {
+    if (!isImm())
+      return false;
+
+    // If this isn't a constant expr, just assume it fits and let relaxation
+    // handle it.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE)
+      return true;
+
+    // Otherwise, check the value is in a range that makes sense for this
+    // extension.
+    uint64_t Value = CE->getValue();
+    return ((                                  Value <= 0x000000000000007FULL)||
+            (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
+            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+  }
+  bool isImmSExti32i8() const {
+    if (!isImm())
+      return false;
+
+    // If this isn't a constant expr, just assume it fits and let relaxation
+    // handle it.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE)
+      return true;
+
+    // Otherwise, check the value is in a range that makes sense for this
+    // extension.
+    uint64_t Value = CE->getValue();
+    return ((                                  Value <= 0x000000000000007FULL)||
+            (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
+            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+  }
+  bool isImmSExti64i8() const {
+    if (!isImm())
+      return false;
+
+    // If this isn't a constant expr, just assume it fits and let relaxation
+    // handle it.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE)
+      return true;
+
+    // Otherwise, check the value is in a range that makes sense for this
+    // extension.
+    uint64_t Value = CE->getValue();
+    return ((                                  Value <= 0x000000000000007FULL)||
+            (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+  }
+  bool isImmSExti64i32() const {
+    if (!isImm())
+      return false;
+
+    // If this isn't a constant expr, just assume it fits and let relaxation
+    // handle it.
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE)
+      return true;
+
+    // Otherwise, check the value is in a range that makes sense for this
+    // extension.
+    uint64_t Value = CE->getValue();
+    return ((                                  Value <= 0x000000007FFFFFFFULL)||
+            (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+  }
+
+  bool isMem() const { return Kind == Memory; }
+
+  bool isAbsMem() const {
+    return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
+      !getMemIndexReg() && getMemScale() == 1;
+  }
+
+  bool isReg() const { return Kind == Register; }
+
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediates when possible.
+    if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::CreateImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::CreateExpr(Expr));
+  }
+
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getReg()));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addMemOperands(MCInst &Inst, unsigned N) const {
+    assert((N == 5) && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
+    Inst.addOperand(MCOperand::CreateImm(getMemScale()));
+    Inst.addOperand(MCOperand::CreateReg(getMemIndexReg()));
+    addExpr(Inst, getMemDisp());
+    Inst.addOperand(MCOperand::CreateReg(getMemSegReg()));
+  }
+
+  void addAbsMemOperands(MCInst &Inst, unsigned N) const {
+    assert((N == 1) && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::CreateExpr(getMemDisp()));
+  }
+
+  static X86Operand *CreateToken(StringRef Str, SMLoc Loc) {
+    X86Operand *Res = new X86Operand(Token, Loc, Loc);
+    Res->Tok.Data = Str.data();
+    Res->Tok.Length = Str.size();
+    return Res;
+  }
+
+  static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc) {
+    X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc);
+    Res->Reg.RegNo = RegNo;
+    return Res;
+  }
+
+  static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){
+    X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc);
+    Res->Imm.Val = Val;
+    return Res;
+  }
+
+  /// Create an absolute memory operand.
+  static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc,
+                               SMLoc EndLoc) {
+    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
+    Res->Mem.SegReg   = 0;
+    Res->Mem.Disp     = Disp;
+    Res->Mem.BaseReg  = 0;
+    Res->Mem.IndexReg = 0;
+    Res->Mem.Scale    = 1;
+    return Res;
+  }
+
+  /// Create a generalized memory operand.
+  static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp,
+                               unsigned BaseReg, unsigned IndexReg,
+                               unsigned Scale, SMLoc StartLoc, SMLoc EndLoc) {
+    // We should never just have a displacement, that should be parsed as an
+    // absolute memory operand.
+    assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!");
+
+    // The scale should always be one of {1,2,4,8}.
+    assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) &&
+           "Invalid scale!");
+    X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc);
+    Res->Mem.SegReg   = SegReg;
+    Res->Mem.Disp     = Disp;
+    Res->Mem.BaseReg  = BaseReg;
+    Res->Mem.IndexReg = IndexReg;
+    Res->Mem.Scale    = Scale;
+    return Res;
+  }
+};
+
+} // end anonymous namespace.
+
+
+bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
+                                    SMLoc &StartLoc, SMLoc &EndLoc) {
+  RegNo = 0;
+  const AsmToken &TokPercent = Parser.getTok();
+  assert(TokPercent.is(AsmToken::Percent) && "Invalid token kind!");
+  StartLoc = TokPercent.getLoc();
+  Parser.Lex(); // Eat percent token.
+
+  const AsmToken &Tok = Parser.getTok();
+  if (Tok.isNot(AsmToken::Identifier))
+    return Error(Tok.getLoc(), "invalid register name");
+
+  // FIXME: Validate register for the current architecture; we have to do
+  // validation later, so maybe there is no need for this here.
+  RegNo = MatchRegisterName(Tok.getString());
+
+  // If the match failed, try the register name as lowercase.
+  if (RegNo == 0)
+    RegNo = MatchRegisterName(LowercaseString(Tok.getString()));
+
+  // FIXME: This should be done using Requires<In32BitMode> and
+  // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
+  // can be also checked.
+  if (RegNo == X86::RIZ && !Is64Bit)
+    return Error(Tok.getLoc(), "riz register in 64-bit mode only");
+
+  // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
+  if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
+    RegNo = X86::ST0;
+    EndLoc = Tok.getLoc();
+    Parser.Lex(); // Eat 'st'
+
+    // Check to see if we have '(4)' after %st.
+    if (getLexer().isNot(AsmToken::LParen))
+      return false;
+    // Lex the paren.
+    getParser().Lex();
+
+    const AsmToken &IntTok = Parser.getTok();
+    if (IntTok.isNot(AsmToken::Integer))
+      return Error(IntTok.getLoc(), "expected stack index");
+    switch (IntTok.getIntVal()) {
+    case 0: RegNo = X86::ST0; break;
+    case 1: RegNo = X86::ST1; break;
+    case 2: RegNo = X86::ST2; break;
+    case 3: RegNo = X86::ST3; break;
+    case 4: RegNo = X86::ST4; break;
+    case 5: RegNo = X86::ST5; break;
+    case 6: RegNo = X86::ST6; break;
+    case 7: RegNo = X86::ST7; break;
+    default: return Error(IntTok.getLoc(), "invalid stack index");
+    }
+
+    if (getParser().Lex().isNot(AsmToken::RParen))
+      return Error(Parser.getTok().getLoc(), "expected ')'");
+
+    EndLoc = Tok.getLoc();
+    Parser.Lex(); // Eat ')'
+    return false;
+  }
+
+  // If this is "db[0-7]", match it as an alias
+  // for dr[0-7].
+  if (RegNo == 0 && Tok.getString().size() == 3 &&
+      Tok.getString().startswith("db")) {
+    switch (Tok.getString()[2]) {
+    case '0': RegNo = X86::DR0; break;
+    case '1': RegNo = X86::DR1; break;
+    case '2': RegNo = X86::DR2; break;
+    case '3': RegNo = X86::DR3; break;
+    case '4': RegNo = X86::DR4; break;
+    case '5': RegNo = X86::DR5; break;
+    case '6': RegNo = X86::DR6; break;
+    case '7': RegNo = X86::DR7; break;
+    }
+
+    if (RegNo != 0) {
+      EndLoc = Tok.getLoc();
+      Parser.Lex(); // Eat it.
+      return false;
+    }
+  }
+
+  if (RegNo == 0)
+    return Error(Tok.getLoc(), "invalid register name");
+
+  EndLoc = Tok.getLoc();
+  Parser.Lex(); // Eat identifier token.
+  return false;
+}
+
+X86Operand *X86ATTAsmParser::ParseOperand() {
+  switch (getLexer().getKind()) {
+  default:
+    // Parse a memory operand with no segment register.
+    return ParseMemOperand(0, Parser.getTok().getLoc());
+  case AsmToken::Percent: {
+    // Read the register.
+    unsigned RegNo;
+    SMLoc Start, End;
+    if (ParseRegister(RegNo, Start, End)) return 0;
+    if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
+      Error(Start, "eiz and riz can only be used as index registers");
+      return 0;
+    }
+
+    // If this is a segment register followed by a ':', then this is the start
+    // of a memory reference, otherwise this is a normal register reference.
+    if (getLexer().isNot(AsmToken::Colon))
+      return X86Operand::CreateReg(RegNo, Start, End);
+
+
+    getParser().Lex(); // Eat the colon.
+    return ParseMemOperand(RegNo, Start);
+  }
+  case AsmToken::Dollar: {
+    // $42 -> immediate.
+    SMLoc Start = Parser.getTok().getLoc(), End;
+    Parser.Lex();
+    const MCExpr *Val;
+    if (getParser().ParseExpression(Val, End))
+      return 0;
+    return X86Operand::CreateImm(Val, Start, End);
+  }
+  }
+}
+
+/// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
+/// has already been parsed if present.
+X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
+
+  // We have to disambiguate a parenthesized expression "(4+5)" from the start
+  // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)".  The
+  // only way to do this without lookahead is to eat the '(' and see what is
+  // after it.
+  const MCExpr *Disp = MCConstantExpr::Create(0, getParser().getContext());
+  if (getLexer().isNot(AsmToken::LParen)) {
+    SMLoc ExprEnd;
+    if (getParser().ParseExpression(Disp, ExprEnd)) return 0;
+
+    // After parsing the base expression we could either have a parenthesized
+    // memory address or not.  If not, return now.  If so, eat the (.
+    if (getLexer().isNot(AsmToken::LParen)) {
+      // Unless we have a segment register, treat this as an immediate.
+      if (SegReg == 0)
+        return X86Operand::CreateMem(Disp, MemStart, ExprEnd);
+      return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
+    }
+
+    // Eat the '('.
+    Parser.Lex();
+  } else {
+    // Okay, we have a '('.  We don't know if this is an expression or not, but
+    // so we have to eat the ( to see beyond it.
+    SMLoc LParenLoc = Parser.getTok().getLoc();
+    Parser.Lex(); // Eat the '('.
+
+    if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
+      // Nothing to do here, fall into the code below with the '(' part of the
+      // memory operand consumed.
+    } else {
+      SMLoc ExprEnd;
+
+      // It must be an parenthesized expression, parse it now.
+      if (getParser().ParseParenExpression(Disp, ExprEnd))
+        return 0;
+
+      // After parsing the base expression we could either have a parenthesized
+      // memory address or not.  If not, return now.  If so, eat the (.
+      if (getLexer().isNot(AsmToken::LParen)) {
+        // Unless we have a segment register, treat this as an immediate.
+        if (SegReg == 0)
+          return X86Operand::CreateMem(Disp, LParenLoc, ExprEnd);
+        return X86Operand::CreateMem(SegReg, Disp, 0, 0, 1, MemStart, ExprEnd);
+      }
+
+      // Eat the '('.
+      Parser.Lex();
+    }
+  }
+
+  // If we reached here, then we just ate the ( of the memory operand.  Process
+  // the rest of the memory operand.
+  unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
+
+  if (getLexer().is(AsmToken::Percent)) {
+    SMLoc L;
+    if (ParseRegister(BaseReg, L, L)) return 0;
+    if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
+      Error(L, "eiz and riz can only be used as index registers");
+      return 0;
+    }
+  }
+
+  if (getLexer().is(AsmToken::Comma)) {
+    Parser.Lex(); // Eat the comma.
+
+    // Following the comma we should have either an index register, or a scale
+    // value. We don't support the later form, but we want to parse it
+    // correctly.
+    //
+    // Not that even though it would be completely consistent to support syntax
+    // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
+    if (getLexer().is(AsmToken::Percent)) {
+      SMLoc L;
+      if (ParseRegister(IndexReg, L, L)) return 0;
+
+      if (getLexer().isNot(AsmToken::RParen)) {
+        // Parse the scale amount:
+        //  ::= ',' [scale-expression]
+        if (getLexer().isNot(AsmToken::Comma)) {
+          Error(Parser.getTok().getLoc(),
+                "expected comma in scale expression");
+          return 0;
+        }
+        Parser.Lex(); // Eat the comma.
+
+        if (getLexer().isNot(AsmToken::RParen)) {
+          SMLoc Loc = Parser.getTok().getLoc();
+
+          int64_t ScaleVal;
+          if (getParser().ParseAbsoluteExpression(ScaleVal))
+            return 0;
+
+          // Validate the scale amount.
+          if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8){
+            Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
+            return 0;
+          }
+          Scale = (unsigned)ScaleVal;
+        }
+      }
+    } else if (getLexer().isNot(AsmToken::RParen)) {
+      // A scale amount without an index is ignored.
+      // index.
+      SMLoc Loc = Parser.getTok().getLoc();
+
+      int64_t Value;
+      if (getParser().ParseAbsoluteExpression(Value))
+        return 0;
+
+      if (Value != 1)
+        Warning(Loc, "scale factor without index register is ignored");
+      Scale = 1;
+    }
+  }
+
+  // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
+  if (getLexer().isNot(AsmToken::RParen)) {
+    Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
+    return 0;
+  }
+  SMLoc MemEnd = Parser.getTok().getLoc();
+  Parser.Lex(); // Eat the ')'.
+
+  return X86Operand::CreateMem(SegReg, Disp, BaseReg, IndexReg, Scale,
+                               MemStart, MemEnd);
+}
+
+bool X86ATTAsmParser::
+ParseInstruction(StringRef Name, SMLoc NameLoc,
+                 SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+  StringRef PatchedName = Name;
+
+  // FIXME: Hack to recognize setneb as setne.
+  if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
+      PatchedName != "setb" && PatchedName != "setnb")
+    PatchedName = PatchedName.substr(0, Name.size()-1);
+  
+  // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
+  const MCExpr *ExtraImmOp = 0;
+  if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
+      (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
+       PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
+    bool IsVCMP = PatchedName.startswith("vcmp");
+    unsigned SSECCIdx = IsVCMP ? 4 : 3;
+    unsigned SSEComparisonCode = StringSwitch<unsigned>(
+      PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
+      .Case("eq",          0)
+      .Case("lt",          1)
+      .Case("le",          2)
+      .Case("unord",       3)
+      .Case("neq",         4)
+      .Case("nlt",         5)
+      .Case("nle",         6)
+      .Case("ord",         7)
+      .Case("eq_uq",       8)
+      .Case("nge",         9)
+      .Case("ngt",      0x0A)
+      .Case("false",    0x0B)
+      .Case("neq_oq",   0x0C)
+      .Case("ge",       0x0D)
+      .Case("gt",       0x0E)
+      .Case("true",     0x0F)
+      .Case("eq_os",    0x10)
+      .Case("lt_oq",    0x11)
+      .Case("le_oq",    0x12)
+      .Case("unord_s",  0x13)
+      .Case("neq_us",   0x14)
+      .Case("nlt_uq",   0x15)
+      .Case("nle_uq",   0x16)
+      .Case("ord_s",    0x17)
+      .Case("eq_us",    0x18)
+      .Case("nge_uq",   0x19)
+      .Case("ngt_uq",   0x1A)
+      .Case("false_os", 0x1B)
+      .Case("neq_os",   0x1C)
+      .Case("ge_oq",    0x1D)
+      .Case("gt_oq",    0x1E)
+      .Case("true_us",  0x1F)
+      .Default(~0U);
+    if (SSEComparisonCode != ~0U) {
+      ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
+                                          getParser().getContext());
+      if (PatchedName.endswith("ss")) {
+        PatchedName = IsVCMP ? "vcmpss" : "cmpss";
+      } else if (PatchedName.endswith("sd")) {
+        PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
+      } else if (PatchedName.endswith("ps")) {
+        PatchedName = IsVCMP ? "vcmpps" : "cmpps";
+      } else {
+        assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
+        PatchedName = IsVCMP ? "vcmppd" : "cmppd";
+      }
+    }
+  }
+
+  // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
+  if (PatchedName.startswith("vpclmul")) {
+    unsigned CLMULQuadWordSelect = StringSwitch<unsigned>(
+      PatchedName.slice(7, PatchedName.size() - 2))
+      .Case("lqlq", 0x00) // src1[63:0],   src2[63:0]
+      .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
+      .Case("lqhq", 0x10) // src1[63:0],   src2[127:64]
+      .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
+      .Default(~0U);
+    if (CLMULQuadWordSelect != ~0U) {
+      ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect,
+                                          getParser().getContext());
+      assert(PatchedName.endswith("dq") && "Unexpected mnemonic!");
+      PatchedName = "vpclmulqdq";
+    }
+  }
+
+  Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
+
+  if (ExtraImmOp)
+    Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
+
+
+  // Determine whether this is an instruction prefix.
+  bool isPrefix =
+    Name == "lock" || Name == "rep" ||
+    Name == "repe" || Name == "repz" ||
+    Name == "repne" || Name == "repnz" ||
+    Name == "rex64" || Name == "data16";
+
+
+  // This does the actual operand parsing.  Don't parse any more if we have a
+  // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
+  // just want to parse the "lock" as the first instruction and the "incl" as
+  // the next one.
+  if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
+
+    // Parse '*' modifier.
+    if (getLexer().is(AsmToken::Star)) {
+      SMLoc Loc = Parser.getTok().getLoc();
+      Operands.push_back(X86Operand::CreateToken("*", Loc));
+      Parser.Lex(); // Eat the star.
+    }
+
+    // Read the first operand.
+    if (X86Operand *Op = ParseOperand())
+      Operands.push_back(Op);
+    else {
+      Parser.EatToEndOfStatement();
+      return true;
+    }
+
+    while (getLexer().is(AsmToken::Comma)) {
+      Parser.Lex();  // Eat the comma.
+
+      // Parse and remember the operand.
+      if (X86Operand *Op = ParseOperand())
+        Operands.push_back(Op);
+      else {
+        Parser.EatToEndOfStatement();
+        return true;
+      }
+    }
+
+    if (getLexer().isNot(AsmToken::EndOfStatement)) {
+      SMLoc Loc = getLexer().getLoc();
+      Parser.EatToEndOfStatement();
+      return Error(Loc, "unexpected token in argument list");
+    }
+  }
+
+  if (getLexer().is(AsmToken::EndOfStatement))
+    Parser.Lex(); // Consume the EndOfStatement
+  else if (isPrefix && getLexer().is(AsmToken::Slash))
+    Parser.Lex(); // Consume the prefix separator Slash
+
+  // This is a terrible hack to handle "out[bwl]? %al, (%dx)" ->
+  // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
+  // documented form in various unofficial manuals, so a lot of code uses it.
+  if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") &&
+      Operands.size() == 3) {
+    X86Operand &Op = *(X86Operand*)Operands.back();
+    if (Op.isMem() && Op.Mem.SegReg == 0 &&
+        isa<MCConstantExpr>(Op.Mem.Disp) &&
+        cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+        Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+      SMLoc Loc = Op.getEndLoc();
+      Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+      delete &Op;
+    }
+  }
+  // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al".
+  if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") &&
+      Operands.size() == 3) {
+    X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+    if (Op.isMem() && Op.Mem.SegReg == 0 &&
+        isa<MCConstantExpr>(Op.Mem.Disp) &&
+        cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+        Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
+      SMLoc Loc = Op.getEndLoc();
+      Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
+      delete &Op;
+    }
+  }
+  
+  // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>.  Canonicalize to
+  // "shift <op>".
+  if ((Name.startswith("shr") || Name.startswith("sar") ||
+       Name.startswith("shl") || Name.startswith("sal") ||
+       Name.startswith("rcl") || Name.startswith("rcr") ||
+       Name.startswith("rol") || Name.startswith("ror")) &&
+      Operands.size() == 3) {
+    X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+    if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+        cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) {
+      delete Operands[1];
+      Operands.erase(Operands.begin() + 1);
+    }
+  }
+
+  return false;
+}
+
+bool X86ATTAsmParser::
+MatchAndEmitInstruction(SMLoc IDLoc,
+                        SmallVectorImpl<MCParsedAsmOperand*> &Operands,
+                        MCStreamer &Out) {
+  assert(!Operands.empty() && "Unexpect empty operand list!");
+  X86Operand *Op = static_cast<X86Operand*>(Operands[0]);
+  assert(Op->isToken() && "Leading operand should always be a mnemonic!");
+
+  // First, handle aliases that expand to multiple instructions.
+  // FIXME: This should be replaced with a real .td file alias mechanism.
+  // Also, MatchInstructionImpl should do actually *do* the EmitInstruction
+  // call.
+  if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" ||
+      Op->getToken() == "fstsww" || Op->getToken() == "fstcww" ||
+      Op->getToken() == "finit" || Op->getToken() == "fsave" ||
+      Op->getToken() == "fstenv" || Op->getToken() == "fclex") {
+    MCInst Inst;
+    Inst.setOpcode(X86::WAIT);
+    Out.EmitInstruction(Inst);
+
+    const char *Repl =
+      StringSwitch<const char*>(Op->getToken())
+        .Case("finit",  "fninit")
+        .Case("fsave",  "fnsave")
+        .Case("fstcw",  "fnstcw")
+        .Case("fstcww",  "fnstcw")
+        .Case("fstenv", "fnstenv")
+        .Case("fstsw",  "fnstsw")
+        .Case("fstsww", "fnstsw")
+        .Case("fclex",  "fnclex")
+        .Default(0);
+    assert(Repl && "Unknown wait-prefixed instruction");
+    delete Operands[0];
+    Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
+  }
+
+  bool WasOriginallyInvalidOperand = false;
+  unsigned OrigErrorInfo;
+  MCInst Inst;
+
+  // First, try a direct match.
+  switch (MatchInstructionImpl(Operands, Inst, OrigErrorInfo)) {
+  case Match_Success:
+    Out.EmitInstruction(Inst);
+    return false;
+  case Match_MissingFeature:
+    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+    return true;
+  case Match_ConversionFail:
+    return Error(IDLoc, "unable to convert operands to instruction");
+  case Match_InvalidOperand:
+    WasOriginallyInvalidOperand = true;
+    break;
+  case Match_MnemonicFail:
+    break;
+  }
+
+  // FIXME: Ideally, we would only attempt suffix matches for things which are
+  // valid prefixes, and we could just infer the right unambiguous
+  // type. However, that requires substantially more matcher support than the
+  // following hack.
+
+  // Change the operand to point to a temporary token.
+  StringRef Base = Op->getToken();
+  SmallString<16> Tmp;
+  Tmp += Base;
+  Tmp += ' ';
+  Op->setTokenValue(Tmp.str());
+
+  // If this instruction starts with an 'f', then it is a floating point stack
+  // instruction.  These come in up to three forms for 32-bit, 64-bit, and
+  // 80-bit floating point, which use the suffixes s,l,t respectively.
+  //
+  // Otherwise, we assume that this may be an integer instruction, which comes
+  // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
+  const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
+  
+  // Check for the various suffix matches.
+  Tmp[Base.size()] = Suffixes[0];
+  unsigned ErrorInfoIgnore;
+  MatchResultTy Match1, Match2, Match3, Match4;
+  
+  Match1 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+  Tmp[Base.size()] = Suffixes[1];
+  Match2 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+  Tmp[Base.size()] = Suffixes[2];
+  Match3 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+  Tmp[Base.size()] = Suffixes[3];
+  Match4 = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore);
+
+  // Restore the old token.
+  Op->setTokenValue(Base);
+
+  // If exactly one matched, then we treat that as a successful match (and the
+  // instruction will already have been filled in correctly, since the failing
+  // matches won't have modified it).
+  unsigned NumSuccessfulMatches =
+    (Match1 == Match_Success) + (Match2 == Match_Success) +
+    (Match3 == Match_Success) + (Match4 == Match_Success);
+  if (NumSuccessfulMatches == 1) {
+    Out.EmitInstruction(Inst);
+    return false;
+  }
+
+  // Otherwise, the match failed, try to produce a decent error message.
+
+  // If we had multiple suffix matches, then identify this as an ambiguous
+  // match.
+  if (NumSuccessfulMatches > 1) {
+    char MatchChars[4];
+    unsigned NumMatches = 0;
+    if (Match1 == Match_Success) MatchChars[NumMatches++] = Suffixes[0];
+    if (Match2 == Match_Success) MatchChars[NumMatches++] = Suffixes[1];
+    if (Match3 == Match_Success) MatchChars[NumMatches++] = Suffixes[2];
+    if (Match4 == Match_Success) MatchChars[NumMatches++] = Suffixes[3];
+
+    SmallString<126> Msg;
+    raw_svector_ostream OS(Msg);
+    OS << "ambiguous instructions require an explicit suffix (could be ";
+    for (unsigned i = 0; i != NumMatches; ++i) {
+      if (i != 0)
+        OS << ", ";
+      if (i + 1 == NumMatches)
+        OS << "or ";
+      OS << "'" << Base << MatchChars[i] << "'";
+    }
+    OS << ")";
+    Error(IDLoc, OS.str());
+    return true;
+  }
+
+  // Okay, we know that none of the variants matched successfully.
+
+  // If all of the instructions reported an invalid mnemonic, then the original
+  // mnemonic was invalid.
+  if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) &&
+      (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) {
+    if (!WasOriginallyInvalidOperand) {
+      Error(IDLoc, "invalid instruction mnemonic '" + Base + "'");
+      return true;
+    }
+
+    // Recover location info for the operand if we know which was the problem.
+    SMLoc ErrorLoc = IDLoc;
+    if (OrigErrorInfo != ~0U) {
+      if (OrigErrorInfo >= Operands.size())
+        return Error(IDLoc, "too few operands for instruction");
+
+      ErrorLoc = ((X86Operand*)Operands[OrigErrorInfo])->getStartLoc();
+      if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
+    }
+
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+
+  // If one instruction matched with a missing feature, report this as a
+  // missing feature.
+  if ((Match1 == Match_MissingFeature) + (Match2 == Match_MissingFeature) +
+      (Match3 == Match_MissingFeature) + (Match4 == Match_MissingFeature) == 1){
+    Error(IDLoc, "instruction requires a CPU feature not currently enabled");
+    return true;
+  }
+
+  // If one instruction matched with an invalid operand, report this as an
+  // operand failure.
+  if ((Match1 == Match_InvalidOperand) + (Match2 == Match_InvalidOperand) +
+      (Match3 == Match_InvalidOperand) + (Match4 == Match_InvalidOperand) == 1){
+    Error(IDLoc, "invalid operand for instruction");
+    return true;
+  }
+
+  // If all of these were an outright failure, report it in a useless way.
+  // FIXME: We should give nicer diagnostics about the exact failure.
+  Error(IDLoc, "unknown use of instruction mnemonic without a size suffix");
+  return true;
+}
+
+
+bool X86ATTAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+  if (IDVal == ".word")
+    return ParseDirectiveWord(2, DirectiveID.getLoc());
+  return true;
+}
+
+/// ParseDirectiveWord
+///  ::= .word [ expression (, expression)* ]
+bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
+  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+    for (;;) {
+      const MCExpr *Value;
+      if (getParser().ParseExpression(Value))
+        return true;
+      
+      getParser().getStreamer().EmitValue(Value, Size, 0 /*addrspace*/);
+      
+      if (getLexer().is(AsmToken::EndOfStatement))
+        break;
+      
+      // FIXME: Improve diagnostic.
+      if (getLexer().isNot(AsmToken::Comma))
+        return Error(L, "unexpected token in directive");
+      Parser.Lex();
+    }
+  }
+  
+  Parser.Lex();
+  return false;
+}
+
+
+
+
+extern "C" void LLVMInitializeX86AsmLexer();
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86AsmParser() {
+  RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
+  RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
+  LLVMInitializeX86AsmLexer();
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "X86GenAsmMatcher.inc"
diff --git a/final/lib/Target/X86/CMakeLists.txt b/final/lib/Target/X86/CMakeLists.txt
new file mode 100644
index 00000000000..b5fa94f12bc
--- /dev/null
+++ b/final/lib/Target/X86/CMakeLists.txt
@@ -0,0 +1,64 @@
+set(LLVM_TARGET_DEFINITIONS X86.td)
+
+tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(X86GenRegisterNames.inc -gen-register-enums)
+tablegen(X86GenRegisterInfo.inc -gen-register-desc)
+tablegen(X86GenDisassemblerTables.inc -gen-disassembler)
+tablegen(X86GenInstrNames.inc -gen-instr-enums)
+tablegen(X86GenInstrInfo.inc -gen-instr-desc)
+tablegen(X86GenAsmWriter.inc -gen-asm-writer)
+tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
+tablegen(X86GenAsmMatcher.inc -gen-asm-matcher)
+tablegen(X86GenDAGISel.inc -gen-dag-isel)
+tablegen(X86GenFastISel.inc -gen-fast-isel)
+tablegen(X86GenCallingConv.inc -gen-callingconv)
+tablegen(X86GenSubtarget.inc -gen-subtarget)
+tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info)
+
+set(sources
+  SSEDomainFix.cpp
+  X86AsmBackend.cpp
+  X86AsmPrinter.cpp
+  X86COFFMachineModuleInfo.cpp
+  X86CodeEmitter.cpp
+  X86ELFWriterInfo.cpp
+  X86FastISel.cpp
+  X86FloatingPoint.cpp
+  X86FrameLowering.cpp
+  X86ISelDAGToDAG.cpp
+  X86ISelLowering.cpp
+  X86InstrInfo.cpp
+  X86JITInfo.cpp
+  X86MachObjectWriter.cpp
+  X86MCAsmInfo.cpp
+  X86MCCodeEmitter.cpp 
+  X86MCInstLower.cpp
+  X86RegisterInfo.cpp
+  X86SelectionDAGInfo.cpp
+  X86Subtarget.cpp
+  X86TargetMachine.cpp
+  X86TargetObjectFile.cpp
+  )
+
+if( CMAKE_CL_64 )
+  # A workaround for a bug in cmake 2.8.3. See PR 8885.
+  if( CMAKE_VERSION STREQUAL "2.8.3" )
+    include(CMakeDetermineCompilerId)
+  endif()
+  # end of workaround.
+  enable_language(ASM_MASM)
+  ADD_CUSTOM_COMMAND(
+    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj
+    MAIN_DEPENDENCY X86CompilationCallback_Win64.asm
+    COMMAND ${CMAKE_ASM_MASM_COMPILER} /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm
+   )
+   set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj)
+endif()
+
+add_llvm_target(X86CodeGen ${sources})
+
+add_subdirectory(AsmParser)
+add_subdirectory(Disassembler)
+add_subdirectory(InstPrinter)
+add_subdirectory(TargetInfo)
+add_subdirectory(Utils)
diff --git a/final/lib/Target/X86/Disassembler/CMakeLists.txt b/final/lib/Target/X86/Disassembler/CMakeLists.txt
new file mode 100644
index 00000000000..972a0d9e7e0
--- /dev/null
+++ b/final/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -0,0 +1,14 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86Disassembler
+  X86Disassembler.cpp
+  X86DisassemblerDecoder.c
+  )
+# workaround for hanging compilation on MSVC9 and 10
+if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
+set_property(
+  SOURCE X86Disassembler.cpp
+  PROPERTY COMPILE_FLAGS "/Od"
+  )
+endif()
+add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen)
diff --git a/final/lib/Target/X86/Disassembler/Makefile b/final/lib/Target/X86/Disassembler/Makefile
new file mode 100644
index 00000000000..8669fd8fd93
--- /dev/null
+++ b/final/lib/Target/X86/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/X86/Disassembler/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Disassembler
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/X86/Disassembler/X86Disassembler.cpp b/final/lib/Target/X86/Disassembler/X86Disassembler.cpp
new file mode 100644
index 00000000000..f7777561b6a
--- /dev/null
+++ b/final/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -0,0 +1,554 @@
+//===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler.
+// It contains code to translate the data produced by the decoder into
+//  MCInsts.
+// Documentation for the disassembler can be found in X86Disassembler.h.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86Disassembler.h"
+#include "X86DisassemblerDecoder.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "X86GenRegisterNames.inc"
+#include "X86GenEDInfo.inc"
+
+using namespace llvm;
+using namespace llvm::X86Disassembler;
+
+void x86DisassemblerDebug(const char *file,
+                          unsigned line,
+                          const char *s) {
+  dbgs() << file << ":" << line << ": " << s;
+}
+
+#define debug(s) DEBUG(x86DisassemblerDebug(__FILE__, __LINE__, s));
+
+namespace llvm {  
+  
+// Fill-ins to make the compiler happy.  These constants are never actually
+//   assigned; they are just filler to make an automatically-generated switch
+//   statement work.
+namespace X86 {
+  enum {
+    BX_SI = 500,
+    BX_DI = 501,
+    BP_SI = 502,
+    BP_DI = 503,
+    sib   = 504,
+    sib64 = 505
+  };
+}
+
+extern Target TheX86_32Target, TheX86_64Target;
+
+}
+
+static bool translateInstruction(MCInst &target,
+                                InternalInstruction &source);
+
+X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) :
+    MCDisassembler(),
+    fMode(mode) {
+}
+
+X86GenericDisassembler::~X86GenericDisassembler() {
+}
+
+EDInstInfo *X86GenericDisassembler::getEDInfo() const {
+  return instInfoX86;
+}
+
+/// regionReader - a callback function that wraps the readByte method from
+///   MemoryObject.
+///
+/// @param arg      - The generic callback parameter.  In this case, this should
+///                   be a pointer to a MemoryObject.
+/// @param byte     - A pointer to the byte to be read.
+/// @param address  - The address to be read.
+static int regionReader(void* arg, uint8_t* byte, uint64_t address) {
+  MemoryObject* region = static_cast<MemoryObject*>(arg);
+  return region->readByte(address, byte);
+}
+
+/// logger - a callback function that wraps the operator<< method from
+///   raw_ostream.
+///
+/// @param arg      - The generic callback parameter.  This should be a pointe
+///                   to a raw_ostream.
+/// @param log      - A string to be logged.  logger() adds a newline.
+static void logger(void* arg, const char* log) {
+  if (!arg)
+    return;
+  
+  raw_ostream &vStream = *(static_cast<raw_ostream*>(arg));
+  vStream << log << "\n";
+}  
+  
+//
+// Public interface for the disassembler
+//
+
+bool X86GenericDisassembler::getInstruction(MCInst &instr,
+                                            uint64_t &size,
+                                            const MemoryObject &region,
+                                            uint64_t address,
+                                            raw_ostream &vStream) const {
+  InternalInstruction internalInstr;
+  
+  int ret = decodeInstruction(&internalInstr,
+                              regionReader,
+                              (void*)&region,
+                              logger,
+                              (void*)&vStream,
+                              address,
+                              fMode);
+
+  if (ret) {
+    size = internalInstr.readerCursor - address;
+    return false;
+  }
+  else {
+    size = internalInstr.length;
+    return !translateInstruction(instr, internalInstr);
+  }
+}
+
+//
+// Private code that translates from struct InternalInstructions to MCInsts.
+//
+
+/// translateRegister - Translates an internal register to the appropriate LLVM
+///   register, and appends it as an operand to an MCInst.
+///
+/// @param mcInst     - The MCInst to append to.
+/// @param reg        - The Reg to append.
+static void translateRegister(MCInst &mcInst, Reg reg) {
+#define ENTRY(x) X86::x,
+  uint8_t llvmRegnums[] = {
+    ALL_REGS
+    0
+  };
+#undef ENTRY
+
+  uint8_t llvmRegnum = llvmRegnums[reg];
+  mcInst.addOperand(MCOperand::CreateReg(llvmRegnum));
+}
+
+/// translateImmediate  - Appends an immediate operand to an MCInst.
+///
+/// @param mcInst       - The MCInst to append to.
+/// @param immediate    - The immediate value to append.
+/// @param operand      - The operand, as stored in the descriptor table.
+/// @param insn         - The internal instruction.
+static void translateImmediate(MCInst &mcInst, uint64_t immediate,
+                               const OperandSpecifier &operand,
+                               InternalInstruction &insn) {
+  // Sign-extend the immediate if necessary.
+
+  OperandType type = operand.type;
+
+  if (type == TYPE_RELv) {
+    switch (insn.displacementSize) {
+    default:
+      break;
+    case 1:
+      type = TYPE_MOFFS8;
+      break;
+    case 2:
+      type = TYPE_MOFFS16;
+      break;
+    case 4:
+      type = TYPE_MOFFS32;
+      break;
+    case 8:
+      type = TYPE_MOFFS64;
+      break;
+    }
+  }
+
+  switch (type) {
+  case TYPE_MOFFS8:
+  case TYPE_REL8:
+    if(immediate & 0x80)
+      immediate |= ~(0xffull);
+    break;
+  case TYPE_MOFFS16:
+    if(immediate & 0x8000)
+      immediate |= ~(0xffffull);
+    break;
+  case TYPE_MOFFS32:
+  case TYPE_REL32:
+  case TYPE_REL64:
+    if(immediate & 0x80000000)
+      immediate |= ~(0xffffffffull);
+    break;
+  case TYPE_MOFFS64:
+  default:
+    // operand is 64 bits wide.  Do nothing.
+    break;
+  }
+    
+  mcInst.addOperand(MCOperand::CreateImm(immediate));
+}
+
+/// translateRMRegister - Translates a register stored in the R/M field of the
+///   ModR/M byte to its LLVM equivalent and appends it to an MCInst.
+/// @param mcInst       - The MCInst to append to.
+/// @param insn         - The internal instruction to extract the R/M field
+///                       from.
+/// @return             - 0 on success; -1 otherwise
+static bool translateRMRegister(MCInst &mcInst,
+                                InternalInstruction &insn) {
+  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
+    debug("A R/M register operand may not have a SIB byte");
+    return true;
+  }
+  
+  switch (insn.eaBase) {
+  default:
+    debug("Unexpected EA base register");
+    return true;
+  case EA_BASE_NONE:
+    debug("EA_BASE_NONE for ModR/M base");
+    return true;
+#define ENTRY(x) case EA_BASE_##x:
+  ALL_EA_BASES
+#undef ENTRY
+    debug("A R/M register operand may not have a base; "
+          "the operand must be a register.");
+    return true;
+#define ENTRY(x)                                                      \
+  case EA_REG_##x:                                                    \
+    mcInst.addOperand(MCOperand::CreateReg(X86::x)); break;
+  ALL_REGS
+#undef ENTRY
+  }
+  
+  return false;
+}
+
+/// translateRMMemory - Translates a memory operand stored in the Mod and R/M
+///   fields of an internal instruction (and possibly its SIB byte) to a memory
+///   operand in LLVM's format, and appends it to an MCInst.
+///
+/// @param mcInst       - The MCInst to append to.
+/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
+///                       from.
+/// @return             - 0 on success; nonzero otherwise
+static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
+  // Addresses in an MCInst are represented as five operands:
+  //   1. basereg       (register)  The R/M base, or (if there is a SIB) the 
+  //                                SIB base
+  //   2. scaleamount   (immediate) 1, or (if there is a SIB) the specified 
+  //                                scale amount
+  //   3. indexreg      (register)  x86_registerNONE, or (if there is a SIB)
+  //                                the index (which is multiplied by the 
+  //                                scale amount)
+  //   4. displacement  (immediate) 0, or the displacement if there is one
+  //   5. segmentreg    (register)  x86_registerNONE for now, but could be set
+  //                                if we have segment overrides
+  
+  MCOperand baseReg;
+  MCOperand scaleAmount;
+  MCOperand indexReg;
+  MCOperand displacement;
+  MCOperand segmentReg;
+  
+  if (insn.eaBase == EA_BASE_sib || insn.eaBase == EA_BASE_sib64) {
+    if (insn.sibBase != SIB_BASE_NONE) {
+      switch (insn.sibBase) {
+      default:
+        debug("Unexpected sibBase");
+        return true;
+#define ENTRY(x)                                          \
+      case SIB_BASE_##x:                                  \
+        baseReg = MCOperand::CreateReg(X86::x); break;
+      ALL_SIB_BASES
+#undef ENTRY
+      }
+    } else {
+      baseReg = MCOperand::CreateReg(0);
+    }
+    
+    if (insn.sibIndex != SIB_INDEX_NONE) {
+      switch (insn.sibIndex) {
+      default:
+        debug("Unexpected sibIndex");
+        return true;
+#define ENTRY(x)                                          \
+      case SIB_INDEX_##x:                                 \
+        indexReg = MCOperand::CreateReg(X86::x); break;
+      EA_BASES_32BIT
+      EA_BASES_64BIT
+#undef ENTRY
+      }
+    } else {
+      indexReg = MCOperand::CreateReg(0);
+    }
+    
+    scaleAmount = MCOperand::CreateImm(insn.sibScale);
+  } else {
+    switch (insn.eaBase) {
+    case EA_BASE_NONE:
+      if (insn.eaDisplacement == EA_DISP_NONE) {
+        debug("EA_BASE_NONE and EA_DISP_NONE for ModR/M base");
+        return true;
+      }
+      if (insn.mode == MODE_64BIT)
+        baseReg = MCOperand::CreateReg(X86::RIP); // Section 2.2.1.6
+      else
+        baseReg = MCOperand::CreateReg(0);
+      
+      indexReg = MCOperand::CreateReg(0);
+      break;
+    case EA_BASE_BX_SI:
+      baseReg = MCOperand::CreateReg(X86::BX);
+      indexReg = MCOperand::CreateReg(X86::SI);
+      break;
+    case EA_BASE_BX_DI:
+      baseReg = MCOperand::CreateReg(X86::BX);
+      indexReg = MCOperand::CreateReg(X86::DI);
+      break;
+    case EA_BASE_BP_SI:
+      baseReg = MCOperand::CreateReg(X86::BP);
+      indexReg = MCOperand::CreateReg(X86::SI);
+      break;
+    case EA_BASE_BP_DI:
+      baseReg = MCOperand::CreateReg(X86::BP);
+      indexReg = MCOperand::CreateReg(X86::DI);
+      break;
+    default:
+      indexReg = MCOperand::CreateReg(0);
+      switch (insn.eaBase) {
+      default:
+        debug("Unexpected eaBase");
+        return true;
+        // Here, we will use the fill-ins defined above.  However,
+        //   BX_SI, BX_DI, BP_SI, and BP_DI are all handled above and
+        //   sib and sib64 were handled in the top-level if, so they're only
+        //   placeholders to keep the compiler happy.
+#define ENTRY(x)                                        \
+      case EA_BASE_##x:                                 \
+        baseReg = MCOperand::CreateReg(X86::x); break; 
+      ALL_EA_BASES
+#undef ENTRY
+#define ENTRY(x) case EA_REG_##x:
+      ALL_REGS
+#undef ENTRY
+        debug("A R/M memory operand may not be a register; "
+              "the base field must be a base.");
+        return true;
+      }
+    }
+    
+    scaleAmount = MCOperand::CreateImm(1);
+  }
+  
+  displacement = MCOperand::CreateImm(insn.displacement);
+  
+  static const uint8_t segmentRegnums[SEG_OVERRIDE_max] = {
+    0,        // SEG_OVERRIDE_NONE
+    X86::CS,
+    X86::SS,
+    X86::DS,
+    X86::ES,
+    X86::FS,
+    X86::GS
+  };
+  
+  segmentReg = MCOperand::CreateReg(segmentRegnums[insn.segmentOverride]);
+  
+  mcInst.addOperand(baseReg);
+  mcInst.addOperand(scaleAmount);
+  mcInst.addOperand(indexReg);
+  mcInst.addOperand(displacement);
+  mcInst.addOperand(segmentReg);
+  return false;
+}
+
+/// translateRM - Translates an operand stored in the R/M (and possibly SIB)
+///   byte of an instruction to LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst       - The MCInst to append to.
+/// @param operand      - The operand, as stored in the descriptor table.
+/// @param insn         - The instruction to extract Mod, R/M, and SIB fields
+///                       from.
+/// @return             - 0 on success; nonzero otherwise
+static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
+                        InternalInstruction &insn) {
+  switch (operand.type) {
+  default:
+    debug("Unexpected type for a R/M operand");
+    return true;
+  case TYPE_R8:
+  case TYPE_R16:
+  case TYPE_R32:
+  case TYPE_R64:
+  case TYPE_Rv:
+  case TYPE_MM:
+  case TYPE_MM32:
+  case TYPE_MM64:
+  case TYPE_XMM:
+  case TYPE_XMM32:
+  case TYPE_XMM64:
+  case TYPE_XMM128:
+  case TYPE_DEBUGREG:
+  case TYPE_CONTROLREG:
+    return translateRMRegister(mcInst, insn);
+  case TYPE_M:
+  case TYPE_M8:
+  case TYPE_M16:
+  case TYPE_M32:
+  case TYPE_M64:
+  case TYPE_M128:
+  case TYPE_M512:
+  case TYPE_Mv:
+  case TYPE_M32FP:
+  case TYPE_M64FP:
+  case TYPE_M80FP:
+  case TYPE_M16INT:
+  case TYPE_M32INT:
+  case TYPE_M64INT:
+  case TYPE_M1616:
+  case TYPE_M1632:
+  case TYPE_M1664:
+  case TYPE_LEA:
+    return translateRMMemory(mcInst, insn);
+  }
+}
+  
+/// translateFPRegister - Translates a stack position on the FPU stack to its
+///   LLVM form, and appends it to an MCInst.
+///
+/// @param mcInst       - The MCInst to append to.
+/// @param stackPos     - The stack position to translate.
+/// @return             - 0 on success; nonzero otherwise.
+static bool translateFPRegister(MCInst &mcInst,
+                               uint8_t stackPos) {
+  if (stackPos >= 8) {
+    debug("Invalid FP stack position");
+    return true;
+  }
+  
+  mcInst.addOperand(MCOperand::CreateReg(X86::ST0 + stackPos));
+
+  return false;
+}
+
+/// translateOperand - Translates an operand stored in an internal instruction 
+///   to LLVM's format and appends it to an MCInst.
+///
+/// @param mcInst       - The MCInst to append to.
+/// @param operand      - The operand, as stored in the descriptor table.
+/// @param insn         - The internal instruction.
+/// @return             - false on success; true otherwise.
+static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
+                             InternalInstruction &insn) {
+  switch (operand.encoding) {
+  default:
+    debug("Unhandled operand encoding during translation");
+    return true;
+  case ENCODING_REG:
+    translateRegister(mcInst, insn.reg);
+    return false;
+  case ENCODING_RM:
+    return translateRM(mcInst, operand, insn);
+  case ENCODING_CB:
+  case ENCODING_CW:
+  case ENCODING_CD:
+  case ENCODING_CP:
+  case ENCODING_CO:
+  case ENCODING_CT:
+    debug("Translation of code offsets isn't supported.");
+    return true;
+  case ENCODING_IB:
+  case ENCODING_IW:
+  case ENCODING_ID:
+  case ENCODING_IO:
+  case ENCODING_Iv:
+  case ENCODING_Ia:
+    translateImmediate(mcInst,
+                       insn.immediates[insn.numImmediatesTranslated++],
+                       operand,
+                       insn);
+    return false;
+  case ENCODING_RB:
+  case ENCODING_RW:
+  case ENCODING_RD:
+  case ENCODING_RO:
+    translateRegister(mcInst, insn.opcodeRegister);
+    return false;
+  case ENCODING_I:
+    return translateFPRegister(mcInst, insn.opcodeModifier);
+  case ENCODING_Rv:
+    translateRegister(mcInst, insn.opcodeRegister);
+    return false;
+  case ENCODING_DUP:
+    return translateOperand(mcInst,
+                            insn.spec->operands[operand.type - TYPE_DUP0],
+                            insn);
+  }
+}
+  
+/// translateInstruction - Translates an internal instruction and all its
+///   operands to an MCInst.
+///
+/// @param mcInst       - The MCInst to populate with the instruction's data.
+/// @param insn         - The internal instruction.
+/// @return             - false on success; true otherwise.
+static bool translateInstruction(MCInst &mcInst,
+                                InternalInstruction &insn) {  
+  if (!insn.spec) {
+    debug("Instruction has no specification");
+    return true;
+  }
+  
+  mcInst.setOpcode(insn.instructionID);
+  
+  int index;
+  
+  insn.numImmediatesTranslated = 0;
+  
+  for (index = 0; index < X86_MAX_OPERANDS; ++index) {
+    if (insn.spec->operands[index].encoding != ENCODING_NONE) {
+      if (translateOperand(mcInst, insn.spec->operands[index], insn)) {
+        return true;
+      }
+    }
+  }
+  
+  return false;
+}
+
+static MCDisassembler *createX86_32Disassembler(const Target &T) {
+  return new X86Disassembler::X86_32Disassembler;
+}
+
+static MCDisassembler *createX86_64Disassembler(const Target &T) {
+  return new X86Disassembler::X86_64Disassembler;
+}
+
+extern "C" void LLVMInitializeX86Disassembler() { 
+  // Register the disassembler.
+  TargetRegistry::RegisterMCDisassembler(TheX86_32Target, 
+                                         createX86_32Disassembler);
+  TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
+                                         createX86_64Disassembler);
+}
diff --git a/final/lib/Target/X86/Disassembler/X86Disassembler.h b/final/lib/Target/X86/Disassembler/X86Disassembler.h
new file mode 100644
index 00000000000..550cf9d40de
--- /dev/null
+++ b/final/lib/Target/X86/Disassembler/X86Disassembler.h
@@ -0,0 +1,155 @@
+//===- X86Disassembler.h - Disassembler for x86 and x86_64 ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
+// 64-bit X86 instruction sets.  The main decode sequence for an assembly
+// instruction in this disassembler is:
+//
+// 1. Read the prefix bytes and determine the attributes of the instruction.
+//    These attributes, recorded in enum attributeBits
+//    (X86DisassemblerDecoderCommon.h), form a bitmask.  The table CONTEXTS_SYM
+//    provides a mapping from bitmasks to contexts, which are represented by
+//    enum InstructionContext (ibid.).
+//
+// 2. Read the opcode, and determine what kind of opcode it is.  The
+//    disassembler distinguishes four kinds of opcodes, which are enumerated in
+//    OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
+//    (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a 
+//    (0x0f 0x3a 0xnn).  Mandatory prefixes are treated as part of the context.
+//
+// 3. Depending on the opcode type, look in one of four ClassDecision structures
+//    (X86DisassemblerDecoderCommon.h).  Use the opcode class to determine which
+//    OpcodeDecision (ibid.) to look the opcode in.  Look up the opcode, to get
+//    a ModRMDecision (ibid.).
+//
+// 4. Some instructions, such as escape opcodes or extended opcodes, or even
+//    instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
+//    ModR/M byte to complete decode.  The ModRMDecision's type is an entry from
+//    ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
+//    ModR/M byte is required and how to interpret it.
+//
+// 5. After resolving the ModRMDecision, the disassembler has a unique ID
+//    of type InstrUID (X86DisassemblerDecoderCommon.h).  Looking this ID up in
+//    INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
+//    meanings of its operands.
+//
+// 6. For each operand, its encoding is an entry from OperandEncoding
+//    (X86DisassemblerDecoderCommon.h) and its type is an entry from
+//    OperandType (ibid.).  The encoding indicates how to read it from the
+//    instruction; the type indicates how to interpret the value once it has
+//    been read.  For example, a register operand could be stored in the R/M
+//    field of the ModR/M byte, the REG field of the ModR/M byte, or added to
+//    the main opcode.  This is orthogonal from its meaning (an GPR or an XMM
+//    register, for instance).  Given this information, the operands can be
+//    extracted and interpreted.
+//
+// 7. As the last step, the disassembler translates the instruction information
+//    and operands into a format understandable by the client - in this case, an
+//    MCInst for use by the MC infrastructure.
+//
+// The disassembler is broken broadly into two parts: the table emitter that
+// emits the instruction decode tables discussed above during compilation, and
+// the disassembler itself.  The table emitter is documented in more detail in
+// utils/TableGen/X86DisassemblerEmitter.h.
+//
+// X86Disassembler.h contains the public interface for the disassembler,
+//   adhering to the MCDisassembler interface.
+// X86Disassembler.cpp contains the code responsible for step 7, and for
+//   invoking the decoder to execute steps 1-6.
+// X86DisassemblerDecoderCommon.h contains the definitions needed by both the
+//   table emitter and the disassembler.
+// X86DisassemblerDecoder.h contains the public interface of the decoder,
+//   factored out into C for possible use by other projects.
+// X86DisassemblerDecoder.c contains the source code of the decoder, which is
+//   responsible for steps 1-6.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86DISASSEMBLER_H
+#define X86DISASSEMBLER_H
+
+#define INSTRUCTION_SPECIFIER_FIELDS  \
+  const char*             name;
+
+#define INSTRUCTION_IDS               \
+  const InstrUID *instructionIDs;
+
+#include "X86DisassemblerDecoderCommon.h"
+
+#undef INSTRUCTION_SPECIFIER_FIELDS
+#undef INSTRUCTION_IDS
+
+#include "llvm/MC/MCDisassembler.h"
+
+struct InternalInstruction;
+
+namespace llvm {
+  
+class MCInst;
+class MemoryObject;
+class raw_ostream;
+
+struct EDInstInfo;
+  
+namespace X86Disassembler {
+
+/// X86GenericDisassembler - Generic disassembler for all X86 platforms.
+///   All each platform class should have to do is subclass the constructor, and
+///   provide a different disassemblerMode value.
+class X86GenericDisassembler : public MCDisassembler {
+protected:
+  /// Constructor     - Initializes the disassembler.
+  ///
+  /// @param mode     - The X86 architecture mode to decode for.
+  X86GenericDisassembler(DisassemblerMode mode);
+public:
+  ~X86GenericDisassembler();
+
+  /// getInstruction - See MCDisassembler.
+  bool getInstruction(MCInst &instr,
+                      uint64_t &size,
+                      const MemoryObject &region,
+                      uint64_t address,
+                      raw_ostream &vStream) const;
+
+  /// getEDInfo - See MCDisassembler.
+  EDInstInfo *getEDInfo() const;
+private:
+  DisassemblerMode              fMode;
+};
+
+/// X86_16Disassembler - 16-bit X86 disassembler.
+class X86_16Disassembler : public X86GenericDisassembler {
+public:
+  X86_16Disassembler() :
+    X86GenericDisassembler(MODE_16BIT) {
+  }
+};  
+
+/// X86_16Disassembler - 32-bit X86 disassembler.
+class X86_32Disassembler : public X86GenericDisassembler {
+public:
+  X86_32Disassembler() :
+    X86GenericDisassembler(MODE_32BIT) {
+  }
+};
+
+/// X86_16Disassembler - 64-bit X86 disassembler.
+class X86_64Disassembler : public X86GenericDisassembler {
+public:
+  X86_64Disassembler() :
+    X86GenericDisassembler(MODE_64BIT) {
+  }
+};
+
+} // namespace X86Disassembler
+  
+} // namespace llvm
+  
+#endif
diff --git a/final/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/final/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
new file mode 100644
index 00000000000..a9d28c965ac
--- /dev/null
+++ b/final/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -0,0 +1,1385 @@
+/*===- X86DisassemblerDecoder.c - Disassembler decoder -------------*- C -*-==*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains the implementation of the instruction decoder.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#include <stdarg.h>   /* for va_*()       */
+#include <stdio.h>    /* for vsnprintf()  */
+#include <stdlib.h>   /* for exit()       */
+#include <string.h>   /* for memset()     */
+
+#include "X86DisassemblerDecoder.h"
+
+#include "X86GenDisassemblerTables.inc"
+
+#define TRUE  1
+#define FALSE 0
+
+typedef int8_t bool;
+
+#ifndef NDEBUG
+#define debug(s) do { x86DisassemblerDebug(__FILE__, __LINE__, s); } while (0)
+#else
+#define debug(s) do { } while (0)
+#endif
+
+
+/*
+ * contextForAttrs - Client for the instruction context table.  Takes a set of
+ *   attributes and returns the appropriate decode context.
+ *
+ * @param attrMask  - Attributes, from the enumeration attributeBits.
+ * @return          - The InstructionContext to use when looking up an
+ *                    an instruction with these attributes.
+ */
+static InstructionContext contextForAttrs(uint8_t attrMask) {
+  return CONTEXTS_SYM[attrMask];
+}
+
+/*
+ * modRMRequired - Reads the appropriate instruction table to determine whether
+ *   the ModR/M byte is required to decode a particular instruction.
+ *
+ * @param type        - The opcode type (i.e., how many bytes it has).
+ * @param insnContext - The context for the instruction, as returned by
+ *                      contextForAttrs.
+ * @param opcode      - The last byte of the instruction's opcode, not counting
+ *                      ModR/M extensions and escapes.
+ * @return            - TRUE if the ModR/M byte is required, FALSE otherwise.
+ */
+static int modRMRequired(OpcodeType type,
+                                InstructionContext insnContext,
+                                uint8_t opcode) {
+  const struct ContextDecision* decision = 0;
+  
+  switch (type) {
+  case ONEBYTE:
+    decision = &ONEBYTE_SYM;
+    break;
+  case TWOBYTE:
+    decision = &TWOBYTE_SYM;
+    break;
+  case THREEBYTE_38:
+    decision = &THREEBYTE38_SYM;
+    break;
+  case THREEBYTE_3A:
+    decision = &THREEBYTE3A_SYM;
+    break;
+  }
+  
+  return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
+    modrm_type != MODRM_ONEENTRY;
+  
+  return 0;
+}
+
+/*
+ * decode - Reads the appropriate instruction table to obtain the unique ID of
+ *   an instruction.
+ *
+ * @param type        - See modRMRequired().
+ * @param insnContext - See modRMRequired().
+ * @param opcode      - See modRMRequired().
+ * @param modRM       - The ModR/M byte if required, or any value if not.
+ * @return            - The UID of the instruction, or 0 on failure.
+ */
+static InstrUID decode(OpcodeType type,
+                       InstructionContext insnContext,
+                       uint8_t opcode,
+                       uint8_t modRM) {
+  const struct ModRMDecision* dec;
+  
+  switch (type) {
+  default:
+    debug("Unknown opcode type");
+    return 0;
+  case ONEBYTE:
+    dec = &ONEBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+    break;
+  case TWOBYTE:
+    dec = &TWOBYTE_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+    break;
+  case THREEBYTE_38:
+    dec = &THREEBYTE38_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+    break;
+  case THREEBYTE_3A:
+    dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+    break;
+  }
+  
+  switch (dec->modrm_type) {
+  default:
+    debug("Corrupt table!  Unknown modrm_type");
+    return 0;
+  case MODRM_ONEENTRY:
+    return dec->instructionIDs[0];
+  case MODRM_SPLITRM:
+    if (modFromModRM(modRM) == 0x3)
+      return dec->instructionIDs[1];
+    else
+      return dec->instructionIDs[0];
+  case MODRM_FULL:
+    return dec->instructionIDs[modRM];
+  }
+}
+
+/*
+ * specifierForUID - Given a UID, returns the name and operand specification for
+ *   that instruction.
+ *
+ * @param uid - The unique ID for the instruction.  This should be returned by
+ *              decode(); specifierForUID will not check bounds.
+ * @return    - A pointer to the specification for that instruction.
+ */
+static const struct InstructionSpecifier *specifierForUID(InstrUID uid) {
+  return &INSTRUCTIONS_SYM[uid];
+}
+
+/*
+ * consumeByte - Uses the reader function provided by the user to consume one
+ *   byte from the instruction's memory and advance the cursor.
+ *
+ * @param insn  - The instruction with the reader function to use.  The cursor
+ *                for this instruction is advanced.
+ * @param byte  - A pointer to a pre-allocated memory buffer to be populated
+ *                with the data read.
+ * @return      - 0 if the read was successful; nonzero otherwise.
+ */
+static int consumeByte(struct InternalInstruction* insn, uint8_t* byte) {
+  int ret = insn->reader(insn->readerArg, byte, insn->readerCursor);
+  
+  if (!ret)
+    ++(insn->readerCursor);
+  
+  return ret;
+}
+
+/*
+ * lookAtByte - Like consumeByte, but does not advance the cursor.
+ *
+ * @param insn  - See consumeByte().
+ * @param byte  - See consumeByte().
+ * @return      - See consumeByte().
+ */
+static int lookAtByte(struct InternalInstruction* insn, uint8_t* byte) {
+  return insn->reader(insn->readerArg, byte, insn->readerCursor);
+}
+
+static void unconsumeByte(struct InternalInstruction* insn) {
+  insn->readerCursor--;
+}
+
+#define CONSUME_FUNC(name, type)                                  \
+  static int name(struct InternalInstruction* insn, type* ptr) {  \
+    type combined = 0;                                            \
+    unsigned offset;                                              \
+    for (offset = 0; offset < sizeof(type); ++offset) {           \
+      uint8_t byte;                                               \
+      int ret = insn->reader(insn->readerArg,                     \
+                             &byte,                               \
+                             insn->readerCursor + offset);        \
+      if (ret)                                                    \
+        return ret;                                               \
+      combined = combined | ((type)byte << ((type)offset * 8));   \
+    }                                                             \
+    *ptr = combined;                                              \
+    insn->readerCursor += sizeof(type);                           \
+    return 0;                                                     \
+  }
+
+/*
+ * consume* - Use the reader function provided by the user to consume data
+ *   values of various sizes from the instruction's memory and advance the
+ *   cursor appropriately.  These readers perform endian conversion.
+ *
+ * @param insn    - See consumeByte().
+ * @param ptr     - A pointer to a pre-allocated memory of appropriate size to
+ *                  be populated with the data read.
+ * @return        - See consumeByte().
+ */
+CONSUME_FUNC(consumeInt8, int8_t)
+CONSUME_FUNC(consumeInt16, int16_t)
+CONSUME_FUNC(consumeInt32, int32_t)
+CONSUME_FUNC(consumeUInt16, uint16_t)
+CONSUME_FUNC(consumeUInt32, uint32_t)
+CONSUME_FUNC(consumeUInt64, uint64_t)
+
+/*
+ * dbgprintf - Uses the logging function provided by the user to log a single
+ *   message, typically without a carriage-return.
+ *
+ * @param insn    - The instruction containing the logging function.
+ * @param format  - See printf().
+ * @param ...     - See printf().
+ */
+static void dbgprintf(struct InternalInstruction* insn,
+                      const char* format,
+                      ...) {  
+  char buffer[256];
+  va_list ap;
+  
+  if (!insn->dlog)
+    return;
+    
+  va_start(ap, format);
+  (void)vsnprintf(buffer, sizeof(buffer), format, ap);
+  va_end(ap);
+  
+  insn->dlog(insn->dlogArg, buffer);
+  
+  return;
+}
+
+/*
+ * setPrefixPresent - Marks that a particular prefix is present at a particular
+ *   location.
+ *
+ * @param insn      - The instruction to be marked as having the prefix.
+ * @param prefix    - The prefix that is present.
+ * @param location  - The location where the prefix is located (in the address
+ *                    space of the instruction's reader).
+ */
+static void setPrefixPresent(struct InternalInstruction* insn,
+                                    uint8_t prefix,
+                                    uint64_t location)
+{
+  insn->prefixPresent[prefix] = 1;
+  insn->prefixLocations[prefix] = location;
+}
+
+/*
+ * isPrefixAtLocation - Queries an instruction to determine whether a prefix is
+ *   present at a given location.
+ *
+ * @param insn      - The instruction to be queried.
+ * @param prefix    - The prefix.
+ * @param location  - The location to query.
+ * @return          - Whether the prefix is at that location.
+ */
+static BOOL isPrefixAtLocation(struct InternalInstruction* insn,
+                               uint8_t prefix,
+                               uint64_t location)
+{
+  if (insn->prefixPresent[prefix] == 1 &&
+     insn->prefixLocations[prefix] == location)
+    return TRUE;
+  else
+    return FALSE;
+}
+
+/*
+ * readPrefixes - Consumes all of an instruction's prefix bytes, and marks the
+ *   instruction as having them.  Also sets the instruction's default operand,
+ *   address, and other relevant data sizes to report operands correctly.
+ *
+ * @param insn  - The instruction whose prefixes are to be read.
+ * @return      - 0 if the instruction could be read until the end of the prefix
+ *                bytes, and no prefixes conflicted; nonzero otherwise.
+ */
+static int readPrefixes(struct InternalInstruction* insn) {
+  BOOL isPrefix = TRUE;
+  BOOL prefixGroups[4] = { FALSE };
+  uint64_t prefixLocation;
+  uint8_t byte = 0;
+  
+  BOOL hasAdSize = FALSE;
+  BOOL hasOpSize = FALSE;
+  
+  dbgprintf(insn, "readPrefixes()");
+    
+  while (isPrefix) {
+    prefixLocation = insn->readerCursor;
+    
+    if (consumeByte(insn, &byte))
+      return -1;
+    
+    switch (byte) {
+    case 0xf0:  /* LOCK */
+    case 0xf2:  /* REPNE/REPNZ */
+    case 0xf3:  /* REP or REPE/REPZ */
+      if (prefixGroups[0])
+        dbgprintf(insn, "Redundant Group 1 prefix");
+      prefixGroups[0] = TRUE;
+      setPrefixPresent(insn, byte, prefixLocation);
+      break;
+    case 0x2e:  /* CS segment override -OR- Branch not taken */
+    case 0x36:  /* SS segment override -OR- Branch taken */
+    case 0x3e:  /* DS segment override */
+    case 0x26:  /* ES segment override */
+    case 0x64:  /* FS segment override */
+    case 0x65:  /* GS segment override */
+      switch (byte) {
+      case 0x2e:
+        insn->segmentOverride = SEG_OVERRIDE_CS;
+        break;
+      case 0x36:
+        insn->segmentOverride = SEG_OVERRIDE_SS;
+        break;
+      case 0x3e:
+        insn->segmentOverride = SEG_OVERRIDE_DS;
+        break;
+      case 0x26:
+        insn->segmentOverride = SEG_OVERRIDE_ES;
+        break;
+      case 0x64:
+        insn->segmentOverride = SEG_OVERRIDE_FS;
+        break;
+      case 0x65:
+        insn->segmentOverride = SEG_OVERRIDE_GS;
+        break;
+      default:
+        debug("Unhandled override");
+        return -1;
+      }
+      if (prefixGroups[1])
+        dbgprintf(insn, "Redundant Group 2 prefix");
+      prefixGroups[1] = TRUE;
+      setPrefixPresent(insn, byte, prefixLocation);
+      break;
+    case 0x66:  /* Operand-size override */
+      if (prefixGroups[2])
+        dbgprintf(insn, "Redundant Group 3 prefix");
+      prefixGroups[2] = TRUE;
+      hasOpSize = TRUE;
+      setPrefixPresent(insn, byte, prefixLocation);
+      break;
+    case 0x67:  /* Address-size override */
+      if (prefixGroups[3])
+        dbgprintf(insn, "Redundant Group 4 prefix");
+      prefixGroups[3] = TRUE;
+      hasAdSize = TRUE;
+      setPrefixPresent(insn, byte, prefixLocation);
+      break;
+    default:    /* Not a prefix byte */
+      isPrefix = FALSE;
+      break;
+    }
+    
+    if (isPrefix)
+      dbgprintf(insn, "Found prefix 0x%hhx", byte);
+  }
+  
+  if (insn->mode == MODE_64BIT) {
+    if ((byte & 0xf0) == 0x40) {
+      uint8_t opcodeByte;
+      
+      if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
+        dbgprintf(insn, "Redundant REX prefix");
+        return -1;
+      }
+      
+      insn->rexPrefix = byte;
+      insn->necessaryPrefixLocation = insn->readerCursor - 2;
+      
+      dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
+    } else {                
+      unconsumeByte(insn);
+      insn->necessaryPrefixLocation = insn->readerCursor - 1;
+    }
+  } else {
+    unconsumeByte(insn);
+    insn->necessaryPrefixLocation = insn->readerCursor - 1;
+  }
+  
+  if (insn->mode == MODE_16BIT) {
+    insn->registerSize       = (hasOpSize ? 4 : 2);
+    insn->addressSize        = (hasAdSize ? 4 : 2);
+    insn->displacementSize   = (hasAdSize ? 4 : 2);
+    insn->immediateSize      = (hasOpSize ? 4 : 2);
+  } else if (insn->mode == MODE_32BIT) {
+    insn->registerSize       = (hasOpSize ? 2 : 4);
+    insn->addressSize        = (hasAdSize ? 2 : 4);
+    insn->displacementSize   = (hasAdSize ? 2 : 4);
+    insn->immediateSize      = (hasOpSize ? 2 : 4);
+  } else if (insn->mode == MODE_64BIT) {
+    if (insn->rexPrefix && wFromREX(insn->rexPrefix)) {
+      insn->registerSize       = 8;
+      insn->addressSize        = (hasAdSize ? 4 : 8);
+      insn->displacementSize   = 4;
+      insn->immediateSize      = 4;
+    } else if (insn->rexPrefix) {
+      insn->registerSize       = (hasOpSize ? 2 : 4);
+      insn->addressSize        = (hasAdSize ? 4 : 8);
+      insn->displacementSize   = (hasOpSize ? 2 : 4);
+      insn->immediateSize      = (hasOpSize ? 2 : 4);
+    } else {
+      insn->registerSize       = (hasOpSize ? 2 : 4);
+      insn->addressSize        = (hasAdSize ? 4 : 8);
+      insn->displacementSize   = (hasOpSize ? 2 : 4);
+      insn->immediateSize      = (hasOpSize ? 2 : 4);
+    }
+  }
+  
+  return 0;
+}
+
+/*
+ * readOpcode - Reads the opcode (excepting the ModR/M byte in the case of
+ *   extended or escape opcodes).
+ *
+ * @param insn  - The instruction whose opcode is to be read.
+ * @return      - 0 if the opcode could be read successfully; nonzero otherwise.
+ */
+static int readOpcode(struct InternalInstruction* insn) {  
+  /* Determine the length of the primary opcode */
+  
+  uint8_t current;
+  
+  dbgprintf(insn, "readOpcode()");
+  
+  insn->opcodeType = ONEBYTE;
+  if (consumeByte(insn, &current))
+    return -1;
+  
+  if (current == 0x0f) {
+    dbgprintf(insn, "Found a two-byte escape prefix (0x%hhx)", current);
+    
+    insn->twoByteEscape = current;
+    
+    if (consumeByte(insn, &current))
+      return -1;
+    
+    if (current == 0x38) {
+      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+      
+      insn->threeByteEscape = current;
+      
+      if (consumeByte(insn, &current))
+        return -1;
+      
+      insn->opcodeType = THREEBYTE_38;
+    } else if (current == 0x3a) {
+      dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+      
+      insn->threeByteEscape = current;
+      
+      if (consumeByte(insn, &current))
+        return -1;
+      
+      insn->opcodeType = THREEBYTE_3A;
+    } else {
+      dbgprintf(insn, "Didn't find a three-byte escape prefix");
+      
+      insn->opcodeType = TWOBYTE;
+    }
+  }
+  
+  /*
+   * At this point we have consumed the full opcode.
+   * Anything we consume from here on must be unconsumed.
+   */
+  
+  insn->opcode = current;
+  
+  return 0;
+}
+
+static int readModRM(struct InternalInstruction* insn);
+
+/*
+ * getIDWithAttrMask - Determines the ID of an instruction, consuming
+ *   the ModR/M byte as appropriate for extended and escape opcodes,
+ *   and using a supplied attribute mask.
+ *
+ * @param instructionID - A pointer whose target is filled in with the ID of the
+ *                        instruction.
+ * @param insn          - The instruction whose ID is to be determined.
+ * @param attrMask      - The attribute mask to search.
+ * @return              - 0 if the ModR/M could be read when needed or was not
+ *                        needed; nonzero otherwise.
+ */
+static int getIDWithAttrMask(uint16_t* instructionID,
+                             struct InternalInstruction* insn,
+                             uint8_t attrMask) {
+  BOOL hasModRMExtension;
+  
+  uint8_t instructionClass;
+
+  instructionClass = contextForAttrs(attrMask);
+  
+  hasModRMExtension = modRMRequired(insn->opcodeType,
+                                    instructionClass,
+                                    insn->opcode);
+  
+  if (hasModRMExtension) {
+    if (readModRM(insn))
+      return -1;
+    
+    *instructionID = decode(insn->opcodeType,
+                            instructionClass,
+                            insn->opcode,
+                            insn->modRM);
+  } else {
+    *instructionID = decode(insn->opcodeType,
+                            instructionClass,
+                            insn->opcode,
+                            0);
+  }
+      
+  return 0;
+}
+
+/*
+ * is16BitEquivalent - Determines whether two instruction names refer to
+ * equivalent instructions but one is 16-bit whereas the other is not.
+ *
+ * @param orig  - The instruction that is not 16-bit
+ * @param equiv - The instruction that is 16-bit
+ */
+static BOOL is16BitEquvalent(const char* orig, const char* equiv) {
+  off_t i;
+  
+  for (i = 0;; i++) {
+    if (orig[i] == '\0' && equiv[i] == '\0')
+      return TRUE;
+    if (orig[i] == '\0' || equiv[i] == '\0')
+      return FALSE;
+    if (orig[i] != equiv[i]) {
+      if ((orig[i] == 'Q' || orig[i] == 'L') && equiv[i] == 'W')
+        continue;
+      if ((orig[i] == '6' || orig[i] == '3') && equiv[i] == '1')
+        continue;
+      if ((orig[i] == '4' || orig[i] == '2') && equiv[i] == '6')
+        continue;
+      return FALSE;
+    }
+  }
+}
+
+/*
+ * is64BitEquivalent - Determines whether two instruction names refer to
+ * equivalent instructions but one is 64-bit whereas the other is not.
+ *
+ * @param orig  - The instruction that is not 64-bit
+ * @param equiv - The instruction that is 64-bit
+ */
+static BOOL is64BitEquivalent(const char* orig, const char* equiv) {
+  off_t i;
+  
+  for (i = 0;; i++) {
+    if (orig[i] == '\0' && equiv[i] == '\0')
+      return TRUE;
+    if (orig[i] == '\0' || equiv[i] == '\0')
+      return FALSE;
+    if (orig[i] != equiv[i]) {
+      if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q')
+        continue;
+      if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6')
+        continue;
+      if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4')
+        continue;
+      return FALSE;
+    }
+  }
+}
+
+
+/*
+ * getID - Determines the ID of an instruction, consuming the ModR/M byte as 
+ *   appropriate for extended and escape opcodes.  Determines the attributes and 
+ *   context for the instruction before doing so.
+ *
+ * @param insn  - The instruction whose ID is to be determined.
+ * @return      - 0 if the ModR/M could be read when needed or was not needed;
+ *                nonzero otherwise.
+ */
+static int getID(struct InternalInstruction* insn) {  
+  uint8_t attrMask;
+  uint16_t instructionID;
+  
+  dbgprintf(insn, "getID()");
+    
+  attrMask = ATTR_NONE;
+  
+  if (insn->mode == MODE_64BIT)
+    attrMask |= ATTR_64BIT;
+  
+  if (insn->rexPrefix & 0x08)
+    attrMask |= ATTR_REXW;
+  
+  if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
+    attrMask |= ATTR_OPSIZE;
+  else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
+    attrMask |= ATTR_XS;
+  else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
+    attrMask |= ATTR_XD;
+  
+  if (getIDWithAttrMask(&instructionID, insn, attrMask))
+    return -1;
+  
+  /* The following clauses compensate for limitations of the tables. */
+  
+  if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) {
+    /*
+     * Although for SSE instructions it is usually necessary to treat REX.W+F2
+     * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is
+     * an occasional instruction where F2 is incidental and REX.W is the more
+     * significant.  If the decoded instruction is 32-bit and adding REX.W
+     * instead of F2 changes a 32 to a 64, we adopt the new encoding.
+     */
+    
+    const struct InstructionSpecifier *spec;
+    uint16_t instructionIDWithREXw;
+    const struct InstructionSpecifier *specWithREXw;
+    
+    spec = specifierForUID(instructionID);
+    
+    if (getIDWithAttrMask(&instructionIDWithREXw,
+                          insn,
+                          attrMask & (~ATTR_XD))) {
+      /*
+       * Decoding with REX.w would yield nothing; give up and return original
+       * decode.
+       */
+      
+      insn->instructionID = instructionID;
+      insn->spec = spec;
+      return 0;
+    }
+    
+    specWithREXw = specifierForUID(instructionIDWithREXw);
+    
+    if (is64BitEquivalent(spec->name, specWithREXw->name)) {
+      insn->instructionID = instructionIDWithREXw;
+      insn->spec = specWithREXw;
+    } else {
+      insn->instructionID = instructionID;
+      insn->spec = spec;
+    }
+    return 0;
+  }
+  
+  if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) {
+    /*
+     * The instruction tables make no distinction between instructions that
+     * allow OpSize anywhere (i.e., 16-bit operations) and that need it in a
+     * particular spot (i.e., many MMX operations).  In general we're
+     * conservative, but in the specific case where OpSize is present but not
+     * in the right place we check if there's a 16-bit operation.
+     */
+    
+    const struct InstructionSpecifier *spec;
+    uint16_t instructionIDWithOpsize;
+    const struct InstructionSpecifier *specWithOpsize;
+    
+    spec = specifierForUID(instructionID);
+    
+    if (getIDWithAttrMask(&instructionIDWithOpsize,
+                          insn,
+                          attrMask | ATTR_OPSIZE)) {
+      /* 
+       * ModRM required with OpSize but not present; give up and return version
+       * without OpSize set
+       */
+      
+      insn->instructionID = instructionID;
+      insn->spec = spec;
+      return 0;
+    }
+    
+    specWithOpsize = specifierForUID(instructionIDWithOpsize);
+    
+    if (is16BitEquvalent(spec->name, specWithOpsize->name)) {
+      insn->instructionID = instructionIDWithOpsize;
+      insn->spec = specWithOpsize;
+    } else {
+      insn->instructionID = instructionID;
+      insn->spec = spec;
+    }
+    return 0;
+  }
+  
+  insn->instructionID = instructionID;
+  insn->spec = specifierForUID(insn->instructionID);
+  
+  return 0;
+}
+
+/*
+ * readSIB - Consumes the SIB byte to determine addressing information for an
+ *   instruction.
+ *
+ * @param insn  - The instruction whose SIB byte is to be read.
+ * @return      - 0 if the SIB byte was successfully read; nonzero otherwise.
+ */
+static int readSIB(struct InternalInstruction* insn) {
+  SIBIndex sibIndexBase = 0;
+  SIBBase sibBaseBase = 0;
+  uint8_t index, base;
+  
+  dbgprintf(insn, "readSIB()");
+  
+  if (insn->consumedSIB)
+    return 0;
+  
+  insn->consumedSIB = TRUE;
+  
+  switch (insn->addressSize) {
+  case 2:
+    dbgprintf(insn, "SIB-based addressing doesn't work in 16-bit mode");
+    return -1;
+    break;
+  case 4:
+    sibIndexBase = SIB_INDEX_EAX;
+    sibBaseBase = SIB_BASE_EAX;
+    break;
+  case 8:
+    sibIndexBase = SIB_INDEX_RAX;
+    sibBaseBase = SIB_BASE_RAX;
+    break;
+  }
+
+  if (consumeByte(insn, &insn->sib))
+    return -1;
+  
+  index = indexFromSIB(insn->sib) | (xFromREX(insn->rexPrefix) << 3);
+  
+  switch (index) {
+  case 0x4:
+    insn->sibIndex = SIB_INDEX_NONE;
+    break;
+  default:
+    insn->sibIndex = (SIBIndex)(sibIndexBase + index);
+    if (insn->sibIndex == SIB_INDEX_sib ||
+        insn->sibIndex == SIB_INDEX_sib64)
+      insn->sibIndex = SIB_INDEX_NONE;
+    break;
+  }
+  
+  switch (scaleFromSIB(insn->sib)) {
+  case 0:
+    insn->sibScale = 1;
+    break;
+  case 1:
+    insn->sibScale = 2;
+    break;
+  case 2:
+    insn->sibScale = 4;
+    break;
+  case 3:
+    insn->sibScale = 8;
+    break;
+  }
+  
+  base = baseFromSIB(insn->sib) | (bFromREX(insn->rexPrefix) << 3);
+  
+  switch (base) {
+  case 0x5:
+    switch (modFromModRM(insn->modRM)) {
+    case 0x0:
+      insn->eaDisplacement = EA_DISP_32;
+      insn->sibBase = SIB_BASE_NONE;
+      break;
+    case 0x1:
+      insn->eaDisplacement = EA_DISP_8;
+      insn->sibBase = (insn->addressSize == 4 ? 
+                       SIB_BASE_EBP : SIB_BASE_RBP);
+      break;
+    case 0x2:
+      insn->eaDisplacement = EA_DISP_32;
+      insn->sibBase = (insn->addressSize == 4 ? 
+                       SIB_BASE_EBP : SIB_BASE_RBP);
+      break;
+    case 0x3:
+      debug("Cannot have Mod = 0b11 and a SIB byte");
+      return -1;
+    }
+    break;
+  default:
+    insn->sibBase = (SIBBase)(sibBaseBase + base);
+    break;
+  }
+  
+  return 0;
+}
+
+/*
+ * readDisplacement - Consumes the displacement of an instruction.
+ *
+ * @param insn  - The instruction whose displacement is to be read.
+ * @return      - 0 if the displacement byte was successfully read; nonzero 
+ *                otherwise.
+ */
+static int readDisplacement(struct InternalInstruction* insn) {  
+  int8_t d8;
+  int16_t d16;
+  int32_t d32;
+  
+  dbgprintf(insn, "readDisplacement()");
+  
+  if (insn->consumedDisplacement)
+    return 0;
+  
+  insn->consumedDisplacement = TRUE;
+  
+  switch (insn->eaDisplacement) {
+  case EA_DISP_NONE:
+    insn->consumedDisplacement = FALSE;
+    break;
+  case EA_DISP_8:
+    if (consumeInt8(insn, &d8))
+      return -1;
+    insn->displacement = d8;
+    break;
+  case EA_DISP_16:
+    if (consumeInt16(insn, &d16))
+      return -1;
+    insn->displacement = d16;
+    break;
+  case EA_DISP_32:
+    if (consumeInt32(insn, &d32))
+      return -1;
+    insn->displacement = d32;
+    break;
+  }
+  
+  insn->consumedDisplacement = TRUE;
+  return 0;
+}
+
+/*
+ * readModRM - Consumes all addressing information (ModR/M byte, SIB byte, and
+ *   displacement) for an instruction and interprets it.
+ *
+ * @param insn  - The instruction whose addressing information is to be read.
+ * @return      - 0 if the information was successfully read; nonzero otherwise.
+ */
+static int readModRM(struct InternalInstruction* insn) {  
+  uint8_t mod, rm, reg;
+  
+  dbgprintf(insn, "readModRM()");
+  
+  if (insn->consumedModRM)
+    return 0;
+  
+  if (consumeByte(insn, &insn->modRM))
+    return -1;
+  insn->consumedModRM = TRUE;
+  
+  mod     = modFromModRM(insn->modRM);
+  rm      = rmFromModRM(insn->modRM);
+  reg     = regFromModRM(insn->modRM);
+  
+  /*
+   * This goes by insn->registerSize to pick the correct register, which messes
+   * up if we're using (say) XMM or 8-bit register operands.  That gets fixed in
+   * fixupReg().
+   */
+  switch (insn->registerSize) {
+  case 2:
+    insn->regBase = MODRM_REG_AX;
+    insn->eaRegBase = EA_REG_AX;
+    break;
+  case 4:
+    insn->regBase = MODRM_REG_EAX;
+    insn->eaRegBase = EA_REG_EAX;
+    break;
+  case 8:
+    insn->regBase = MODRM_REG_RAX;
+    insn->eaRegBase = EA_REG_RAX;
+    break;
+  }
+  
+  reg |= rFromREX(insn->rexPrefix) << 3;
+  rm  |= bFromREX(insn->rexPrefix) << 3;
+  
+  insn->reg = (Reg)(insn->regBase + reg);
+  
+  switch (insn->addressSize) {
+  case 2:
+    insn->eaBaseBase = EA_BASE_BX_SI;
+     
+    switch (mod) {
+    case 0x0:
+      if (rm == 0x6) {
+        insn->eaBase = EA_BASE_NONE;
+        insn->eaDisplacement = EA_DISP_16;
+        if (readDisplacement(insn))
+          return -1;
+      } else {
+        insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+        insn->eaDisplacement = EA_DISP_NONE;
+      }
+      break;
+    case 0x1:
+      insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+      insn->eaDisplacement = EA_DISP_8;
+      if (readDisplacement(insn))
+        return -1;
+      break;
+    case 0x2:
+      insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+      insn->eaDisplacement = EA_DISP_16;
+      if (readDisplacement(insn))
+        return -1;
+      break;
+    case 0x3:
+      insn->eaBase = (EABase)(insn->eaRegBase + rm);
+      if (readDisplacement(insn))
+        return -1;
+      break;
+    }
+    break;
+  case 4:
+  case 8:
+    insn->eaBaseBase = (insn->addressSize == 4 ? EA_BASE_EAX : EA_BASE_RAX);
+    
+    switch (mod) {
+    case 0x0:
+      insn->eaDisplacement = EA_DISP_NONE; /* readSIB may override this */
+      switch (rm) {
+      case 0x4:
+      case 0xc:   /* in case REXW.b is set */
+        insn->eaBase = (insn->addressSize == 4 ? 
+                        EA_BASE_sib : EA_BASE_sib64);
+        readSIB(insn);
+        if (readDisplacement(insn))
+          return -1;
+        break;
+      case 0x5:
+        insn->eaBase = EA_BASE_NONE;
+        insn->eaDisplacement = EA_DISP_32;
+        if (readDisplacement(insn))
+          return -1;
+        break;
+      default:
+        insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+        break;
+      }
+      break;
+    case 0x1:
+    case 0x2:
+      insn->eaDisplacement = (mod == 0x1 ? EA_DISP_8 : EA_DISP_32);
+      switch (rm) {
+      case 0x4:
+      case 0xc:   /* in case REXW.b is set */
+        insn->eaBase = EA_BASE_sib;
+        readSIB(insn);
+        if (readDisplacement(insn))
+          return -1;
+        break;
+      default:
+        insn->eaBase = (EABase)(insn->eaBaseBase + rm);
+        if (readDisplacement(insn))
+          return -1;
+        break;
+      }
+      break;
+    case 0x3:
+      insn->eaDisplacement = EA_DISP_NONE;
+      insn->eaBase = (EABase)(insn->eaRegBase + rm);
+      break;
+    }
+    break;
+  } /* switch (insn->addressSize) */
+  
+  return 0;
+}
+
+#define GENERIC_FIXUP_FUNC(name, base, prefix)            \
+  static uint8_t name(struct InternalInstruction *insn,   \
+                      OperandType type,                   \
+                      uint8_t index,                      \
+                      uint8_t *valid) {                   \
+    *valid = 1;                                           \
+    switch (type) {                                       \
+    default:                                              \
+      debug("Unhandled register type");                   \
+      *valid = 0;                                         \
+      return 0;                                           \
+    case TYPE_Rv:                                         \
+      return base + index;                                \
+    case TYPE_R8:                                         \
+      if (insn->rexPrefix &&                              \
+         index >= 4 && index <= 7) {                      \
+        return prefix##_SPL + (index - 4);                \
+      } else {                                            \
+        return prefix##_AL + index;                       \
+      }                                                   \
+    case TYPE_R16:                                        \
+      return prefix##_AX + index;                         \
+    case TYPE_R32:                                        \
+      return prefix##_EAX + index;                        \
+    case TYPE_R64:                                        \
+      return prefix##_RAX + index;                        \
+    case TYPE_XMM128:                                     \
+    case TYPE_XMM64:                                      \
+    case TYPE_XMM32:                                      \
+    case TYPE_XMM:                                        \
+      return prefix##_XMM0 + index;                       \
+    case TYPE_MM64:                                       \
+    case TYPE_MM32:                                       \
+    case TYPE_MM:                                         \
+      if (index > 7)                                      \
+        *valid = 0;                                       \
+      return prefix##_MM0 + index;                        \
+    case TYPE_SEGMENTREG:                                 \
+      if (index > 5)                                      \
+        *valid = 0;                                       \
+      return prefix##_ES + index;                         \
+    case TYPE_DEBUGREG:                                   \
+      if (index > 7)                                      \
+        *valid = 0;                                       \
+      return prefix##_DR0 + index;                        \
+    case TYPE_CONTROLREG:                                 \
+      if (index > 8)                                      \
+        *valid = 0;                                       \
+      return prefix##_CR0 + index;                        \
+    }                                                     \
+  }
+
+/*
+ * fixup*Value - Consults an operand type to determine the meaning of the
+ *   reg or R/M field.  If the operand is an XMM operand, for example, an
+ *   operand would be XMM0 instead of AX, which readModRM() would otherwise
+ *   misinterpret it as.
+ *
+ * @param insn  - The instruction containing the operand.
+ * @param type  - The operand type.
+ * @param index - The existing value of the field as reported by readModRM().
+ * @param valid - The address of a uint8_t.  The target is set to 1 if the
+ *                field is valid for the register class; 0 if not.
+ * @return      - The proper value.
+ */
+GENERIC_FIXUP_FUNC(fixupRegValue, insn->regBase,    MODRM_REG)
+GENERIC_FIXUP_FUNC(fixupRMValue,  insn->eaRegBase,  EA_REG)
+
+/*
+ * fixupReg - Consults an operand specifier to determine which of the
+ *   fixup*Value functions to use in correcting readModRM()'ss interpretation.
+ *
+ * @param insn  - See fixup*Value().
+ * @param op    - The operand specifier.
+ * @return      - 0 if fixup was successful; -1 if the register returned was
+ *                invalid for its class.
+ */
+static int fixupReg(struct InternalInstruction *insn, 
+                    const struct OperandSpecifier *op) {
+  uint8_t valid;
+  
+  dbgprintf(insn, "fixupReg()");
+  
+  switch ((OperandEncoding)op->encoding) {
+  default:
+    debug("Expected a REG or R/M encoding in fixupReg");
+    return -1;
+  case ENCODING_REG:
+    insn->reg = (Reg)fixupRegValue(insn,
+                                   (OperandType)op->type,
+                                   insn->reg - insn->regBase,
+                                   &valid);
+    if (!valid)
+      return -1;
+    break;
+  case ENCODING_RM:
+    if (insn->eaBase >= insn->eaRegBase) {
+      insn->eaBase = (EABase)fixupRMValue(insn,
+                                          (OperandType)op->type,
+                                          insn->eaBase - insn->eaRegBase,
+                                          &valid);
+      if (!valid)
+        return -1;
+    }
+    break;
+  }
+  
+  return 0;
+}
+
+/*
+ * readOpcodeModifier - Reads an operand from the opcode field of an 
+ *   instruction.  Handles AddRegFrm instructions.
+ *
+ * @param insn    - The instruction whose opcode field is to be read.
+ * @param inModRM - Indicates that the opcode field is to be read from the
+ *                  ModR/M extension; useful for escape opcodes
+ * @return        - 0 on success; nonzero otherwise.
+ */
+static int readOpcodeModifier(struct InternalInstruction* insn) {
+  dbgprintf(insn, "readOpcodeModifier()");
+  
+  if (insn->consumedOpcodeModifier)
+    return 0;
+  
+  insn->consumedOpcodeModifier = TRUE;
+  
+  switch (insn->spec->modifierType) {
+  default:
+    debug("Unknown modifier type.");
+    return -1;
+  case MODIFIER_NONE:
+    debug("No modifier but an operand expects one.");
+    return -1;
+  case MODIFIER_OPCODE:
+    insn->opcodeModifier = insn->opcode - insn->spec->modifierBase;
+    return 0;
+  case MODIFIER_MODRM:
+    insn->opcodeModifier = insn->modRM - insn->spec->modifierBase;
+    return 0;
+  }  
+}
+
+/*
+ * readOpcodeRegister - Reads an operand from the opcode field of an 
+ *   instruction and interprets it appropriately given the operand width.
+ *   Handles AddRegFrm instructions.
+ *
+ * @param insn  - See readOpcodeModifier().
+ * @param size  - The width (in bytes) of the register being specified.
+ *                1 means AL and friends, 2 means AX, 4 means EAX, and 8 means
+ *                RAX.
+ * @return      - 0 on success; nonzero otherwise.
+ */
+static int readOpcodeRegister(struct InternalInstruction* insn, uint8_t size) {
+  dbgprintf(insn, "readOpcodeRegister()");
+
+  if (readOpcodeModifier(insn))
+    return -1;
+  
+  if (size == 0)
+    size = insn->registerSize;
+  
+  switch (size) {
+  case 1:
+    insn->opcodeRegister = (Reg)(MODRM_REG_AL + ((bFromREX(insn->rexPrefix) << 3) 
+                                                  | insn->opcodeModifier));
+    if (insn->rexPrefix && 
+        insn->opcodeRegister >= MODRM_REG_AL + 0x4 &&
+        insn->opcodeRegister < MODRM_REG_AL + 0x8) {
+      insn->opcodeRegister = (Reg)(MODRM_REG_SPL
+                                   + (insn->opcodeRegister - MODRM_REG_AL - 4));
+    }
+      
+    break;
+  case 2:
+    insn->opcodeRegister = (Reg)(MODRM_REG_AX
+                                 + ((bFromREX(insn->rexPrefix) << 3) 
+                                    | insn->opcodeModifier));
+    break;
+  case 4:
+    insn->opcodeRegister = (Reg)(MODRM_REG_EAX
+                                 + ((bFromREX(insn->rexPrefix) << 3) 
+                                    | insn->opcodeModifier));
+    break;
+  case 8:
+    insn->opcodeRegister = (Reg)(MODRM_REG_RAX 
+                                 + ((bFromREX(insn->rexPrefix) << 3) 
+                                    | insn->opcodeModifier));
+    break;
+  }
+  
+  return 0;
+}
+
+/*
+ * readImmediate - Consumes an immediate operand from an instruction, given the
+ *   desired operand size.
+ *
+ * @param insn  - The instruction whose operand is to be read.
+ * @param size  - The width (in bytes) of the operand.
+ * @return      - 0 if the immediate was successfully consumed; nonzero
+ *                otherwise.
+ */
+static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
+  uint8_t imm8;
+  uint16_t imm16;
+  uint32_t imm32;
+  uint64_t imm64;
+  
+  dbgprintf(insn, "readImmediate()");
+  
+  if (insn->numImmediatesConsumed == 2) {
+    debug("Already consumed two immediates");
+    return -1;
+  }
+  
+  if (size == 0)
+    size = insn->immediateSize;
+  else
+    insn->immediateSize = size;
+  
+  switch (size) {
+  case 1:
+    if (consumeByte(insn, &imm8))
+      return -1;
+    insn->immediates[insn->numImmediatesConsumed] = imm8;
+    break;
+  case 2:
+    if (consumeUInt16(insn, &imm16))
+      return -1;
+    insn->immediates[insn->numImmediatesConsumed] = imm16;
+    break;
+  case 4:
+    if (consumeUInt32(insn, &imm32))
+      return -1;
+    insn->immediates[insn->numImmediatesConsumed] = imm32;
+    break;
+  case 8:
+    if (consumeUInt64(insn, &imm64))
+      return -1;
+    insn->immediates[insn->numImmediatesConsumed] = imm64;
+    break;
+  }
+  
+  insn->numImmediatesConsumed++;
+  
+  return 0;
+}
+
+/*
+ * readOperands - Consults the specifier for an instruction and consumes all
+ *   operands for that instruction, interpreting them as it goes.
+ *
+ * @param insn  - The instruction whose operands are to be read and interpreted.
+ * @return      - 0 if all operands could be read; nonzero otherwise.
+ */
+static int readOperands(struct InternalInstruction* insn) {
+  int index;
+  
+  dbgprintf(insn, "readOperands()");
+  
+  for (index = 0; index < X86_MAX_OPERANDS; ++index) {
+    switch (insn->spec->operands[index].encoding) {
+    case ENCODING_NONE:
+      break;
+    case ENCODING_REG:
+    case ENCODING_RM:
+      if (readModRM(insn))
+        return -1;
+      if (fixupReg(insn, &insn->spec->operands[index]))
+        return -1;
+      break;
+    case ENCODING_CB:
+    case ENCODING_CW:
+    case ENCODING_CD:
+    case ENCODING_CP:
+    case ENCODING_CO:
+    case ENCODING_CT:
+      dbgprintf(insn, "We currently don't hande code-offset encodings");
+      return -1;
+    case ENCODING_IB:
+      if (readImmediate(insn, 1))
+        return -1;
+      if (insn->spec->operands[index].type == TYPE_IMM3 &&
+          insn->immediates[insn->numImmediatesConsumed - 1] > 7)
+        return -1;
+      break;
+    case ENCODING_IW:
+      if (readImmediate(insn, 2))
+        return -1;
+      break;
+    case ENCODING_ID:
+      if (readImmediate(insn, 4))
+        return -1;
+      break;
+    case ENCODING_IO:
+      if (readImmediate(insn, 8))
+        return -1;
+      break;
+    case ENCODING_Iv:
+      if (readImmediate(insn, insn->immediateSize))
+        return -1;
+      break;
+    case ENCODING_Ia:
+      if (readImmediate(insn, insn->addressSize))
+        return -1;
+      break;
+    case ENCODING_RB:
+      if (readOpcodeRegister(insn, 1))
+        return -1;
+      break;
+    case ENCODING_RW:
+      if (readOpcodeRegister(insn, 2))
+        return -1;
+      break;
+    case ENCODING_RD:
+      if (readOpcodeRegister(insn, 4))
+        return -1;
+      break;
+    case ENCODING_RO:
+      if (readOpcodeRegister(insn, 8))
+        return -1;
+      break;
+    case ENCODING_Rv:
+      if (readOpcodeRegister(insn, 0))
+        return -1;
+      break;
+    case ENCODING_I:
+      if (readOpcodeModifier(insn))
+        return -1;
+    case ENCODING_DUP:
+      break;
+    default:
+      dbgprintf(insn, "Encountered an operand with an unknown encoding.");
+      return -1;
+    }
+  }
+  
+  return 0;
+}
+
+/*
+ * decodeInstruction - Reads and interprets a full instruction provided by the
+ *   user.
+ *
+ * @param insn      - A pointer to the instruction to be populated.  Must be 
+ *                    pre-allocated.
+ * @param reader    - The function to be used to read the instruction's bytes.
+ * @param readerArg - A generic argument to be passed to the reader to store
+ *                    any internal state.
+ * @param logger    - If non-NULL, the function to be used to write log messages
+ *                    and warnings.
+ * @param loggerArg - A generic argument to be passed to the logger to store
+ *                    any internal state.
+ * @param startLoc  - The address (in the reader's address space) of the first
+ *                    byte in the instruction.
+ * @param mode      - The mode (real mode, IA-32e, or IA-32e in 64-bit mode) to
+ *                    decode the instruction in.
+ * @return          - 0 if the instruction's memory could be read; nonzero if
+ *                    not.
+ */
+int decodeInstruction(struct InternalInstruction* insn,
+                      byteReader_t reader,
+                      void* readerArg,
+                      dlog_t logger,
+                      void* loggerArg,
+                      uint64_t startLoc,
+                      DisassemblerMode mode) {
+  memset(insn, 0, sizeof(struct InternalInstruction));
+    
+  insn->reader = reader;
+  insn->readerArg = readerArg;
+  insn->dlog = logger;
+  insn->dlogArg = loggerArg;
+  insn->startLocation = startLoc;
+  insn->readerCursor = startLoc;
+  insn->mode = mode;
+  insn->numImmediatesConsumed = 0;
+  
+  if (readPrefixes(insn)       ||
+      readOpcode(insn)         ||
+      getID(insn)              ||
+      insn->instructionID == 0 ||
+      readOperands(insn))
+    return -1;
+  
+  insn->length = insn->readerCursor - insn->startLocation;
+  
+  dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu",
+            startLoc, insn->readerCursor, insn->length);
+    
+  if (insn->length > 15)
+    dbgprintf(insn, "Instruction exceeds 15-byte limit");
+  
+  return 0;
+}
diff --git a/final/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/final/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
new file mode 100644
index 00000000000..d0dc8b56aea
--- /dev/null
+++ b/final/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -0,0 +1,515 @@
+/*===- X86DisassemblerDecoderInternal.h - Disassembler decoder -----*- C -*-==*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains the public interface of the instruction decoder.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+#ifndef X86DISASSEMBLERDECODER_H
+#define X86DISASSEMBLERDECODER_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+  
+#define INSTRUCTION_SPECIFIER_FIELDS  \
+  const char*             name;
+
+#define INSTRUCTION_IDS     \
+  const InstrUID *instructionIDs;
+
+#include "X86DisassemblerDecoderCommon.h"
+  
+#undef INSTRUCTION_SPECIFIER_FIELDS
+#undef INSTRUCTION_IDS
+  
+/*
+ * Accessor functions for various fields of an Intel instruction
+ */
+#define modFromModRM(modRM)  ((modRM & 0xc0) >> 6)
+#define regFromModRM(modRM)  ((modRM & 0x38) >> 3)
+#define rmFromModRM(modRM)   (modRM & 0x7)
+#define scaleFromSIB(sib)    ((sib & 0xc0) >> 6)
+#define indexFromSIB(sib)    ((sib & 0x38) >> 3)
+#define baseFromSIB(sib)     (sib & 0x7)
+#define wFromREX(rex)        ((rex & 0x8) >> 3)
+#define rFromREX(rex)        ((rex & 0x4) >> 2)
+#define xFromREX(rex)        ((rex & 0x2) >> 1)
+#define bFromREX(rex)        (rex & 0x1)
+
+/*
+ * These enums represent Intel registers for use by the decoder.
+ */
+
+#define REGS_8BIT     \
+  ENTRY(AL)           \
+  ENTRY(CL)           \
+  ENTRY(DL)           \
+  ENTRY(BL)           \
+  ENTRY(AH)           \
+  ENTRY(CH)           \
+  ENTRY(DH)           \
+  ENTRY(BH)           \
+  ENTRY(R8B)          \
+  ENTRY(R9B)          \
+  ENTRY(R10B)         \
+  ENTRY(R11B)         \
+  ENTRY(R12B)         \
+  ENTRY(R13B)         \
+  ENTRY(R14B)         \
+  ENTRY(R15B)         \
+  ENTRY(SPL)          \
+  ENTRY(BPL)          \
+  ENTRY(SIL)          \
+  ENTRY(DIL)
+
+#define EA_BASES_16BIT  \
+  ENTRY(BX_SI)          \
+  ENTRY(BX_DI)          \
+  ENTRY(BP_SI)          \
+  ENTRY(BP_DI)          \
+  ENTRY(SI)             \
+  ENTRY(DI)             \
+  ENTRY(BP)             \
+  ENTRY(BX)             \
+  ENTRY(R8W)            \
+  ENTRY(R9W)            \
+  ENTRY(R10W)           \
+  ENTRY(R11W)           \
+  ENTRY(R12W)           \
+  ENTRY(R13W)           \
+  ENTRY(R14W)           \
+  ENTRY(R15W)
+
+#define REGS_16BIT    \
+  ENTRY(AX)           \
+  ENTRY(CX)           \
+  ENTRY(DX)           \
+  ENTRY(BX)           \
+  ENTRY(SP)           \
+  ENTRY(BP)           \
+  ENTRY(SI)           \
+  ENTRY(DI)           \
+  ENTRY(R8W)          \
+  ENTRY(R9W)          \
+  ENTRY(R10W)         \
+  ENTRY(R11W)         \
+  ENTRY(R12W)         \
+  ENTRY(R13W)         \
+  ENTRY(R14W)         \
+  ENTRY(R15W)
+
+#define EA_BASES_32BIT  \
+  ENTRY(EAX)            \
+  ENTRY(ECX)            \
+  ENTRY(EDX)            \
+  ENTRY(EBX)            \
+  ENTRY(sib)            \
+  ENTRY(EBP)            \
+  ENTRY(ESI)            \
+  ENTRY(EDI)            \
+  ENTRY(R8D)            \
+  ENTRY(R9D)            \
+  ENTRY(R10D)           \
+  ENTRY(R11D)           \
+  ENTRY(R12D)           \
+  ENTRY(R13D)           \
+  ENTRY(R14D)           \
+  ENTRY(R15D)
+
+#define REGS_32BIT  \
+  ENTRY(EAX)        \
+  ENTRY(ECX)        \
+  ENTRY(EDX)        \
+  ENTRY(EBX)        \
+  ENTRY(ESP)        \
+  ENTRY(EBP)        \
+  ENTRY(ESI)        \
+  ENTRY(EDI)        \
+  ENTRY(R8D)        \
+  ENTRY(R9D)        \
+  ENTRY(R10D)       \
+  ENTRY(R11D)       \
+  ENTRY(R12D)       \
+  ENTRY(R13D)       \
+  ENTRY(R14D)       \
+  ENTRY(R15D)
+
+#define EA_BASES_64BIT  \
+  ENTRY(RAX)            \
+  ENTRY(RCX)            \
+  ENTRY(RDX)            \
+  ENTRY(RBX)            \
+  ENTRY(sib64)          \
+  ENTRY(RBP)            \
+  ENTRY(RSI)            \
+  ENTRY(RDI)            \
+  ENTRY(R8)             \
+  ENTRY(R9)             \
+  ENTRY(R10)            \
+  ENTRY(R11)            \
+  ENTRY(R12)            \
+  ENTRY(R13)            \
+  ENTRY(R14)            \
+  ENTRY(R15)
+
+#define REGS_64BIT  \
+  ENTRY(RAX)        \
+  ENTRY(RCX)        \
+  ENTRY(RDX)        \
+  ENTRY(RBX)        \
+  ENTRY(RSP)        \
+  ENTRY(RBP)        \
+  ENTRY(RSI)        \
+  ENTRY(RDI)        \
+  ENTRY(R8)         \
+  ENTRY(R9)         \
+  ENTRY(R10)        \
+  ENTRY(R11)        \
+  ENTRY(R12)        \
+  ENTRY(R13)        \
+  ENTRY(R14)        \
+  ENTRY(R15)
+
+#define REGS_MMX  \
+  ENTRY(MM0)      \
+  ENTRY(MM1)      \
+  ENTRY(MM2)      \
+  ENTRY(MM3)      \
+  ENTRY(MM4)      \
+  ENTRY(MM5)      \
+  ENTRY(MM6)      \
+  ENTRY(MM7)
+
+#define REGS_XMM  \
+  ENTRY(XMM0)     \
+  ENTRY(XMM1)     \
+  ENTRY(XMM2)     \
+  ENTRY(XMM3)     \
+  ENTRY(XMM4)     \
+  ENTRY(XMM5)     \
+  ENTRY(XMM6)     \
+  ENTRY(XMM7)     \
+  ENTRY(XMM8)     \
+  ENTRY(XMM9)     \
+  ENTRY(XMM10)    \
+  ENTRY(XMM11)    \
+  ENTRY(XMM12)    \
+  ENTRY(XMM13)    \
+  ENTRY(XMM14)    \
+  ENTRY(XMM15)
+  
+#define REGS_SEGMENT \
+  ENTRY(ES)          \
+  ENTRY(CS)          \
+  ENTRY(SS)          \
+  ENTRY(DS)          \
+  ENTRY(FS)          \
+  ENTRY(GS)
+  
+#define REGS_DEBUG  \
+  ENTRY(DR0)        \
+  ENTRY(DR1)        \
+  ENTRY(DR2)        \
+  ENTRY(DR3)        \
+  ENTRY(DR4)        \
+  ENTRY(DR5)        \
+  ENTRY(DR6)        \
+  ENTRY(DR7)
+
+#define REGS_CONTROL  \
+  ENTRY(CR0)          \
+  ENTRY(CR1)          \
+  ENTRY(CR2)          \
+  ENTRY(CR3)          \
+  ENTRY(CR4)          \
+  ENTRY(CR5)          \
+  ENTRY(CR6)          \
+  ENTRY(CR7)          \
+  ENTRY(CR8)
+  
+#define ALL_EA_BASES  \
+  EA_BASES_16BIT      \
+  EA_BASES_32BIT      \
+  EA_BASES_64BIT
+  
+#define ALL_SIB_BASES \
+  REGS_32BIT          \
+  REGS_64BIT
+
+#define ALL_REGS      \
+  REGS_8BIT           \
+  REGS_16BIT          \
+  REGS_32BIT          \
+  REGS_64BIT          \
+  REGS_MMX            \
+  REGS_XMM            \
+  REGS_SEGMENT        \
+  REGS_DEBUG          \
+  REGS_CONTROL        \
+  ENTRY(RIP)
+
+/*
+ * EABase - All possible values of the base field for effective-address 
+ *   computations, a.k.a. the Mod and R/M fields of the ModR/M byte.  We
+ *   distinguish between bases (EA_BASE_*) and registers that just happen to be
+ *   referred to when Mod == 0b11 (EA_REG_*).
+ */
+typedef enum {
+  EA_BASE_NONE,
+#define ENTRY(x) EA_BASE_##x,
+  ALL_EA_BASES
+#undef ENTRY
+#define ENTRY(x) EA_REG_##x,
+  ALL_REGS
+#undef ENTRY
+  EA_max
+} EABase;
+  
+/* 
+ * SIBIndex - All possible values of the SIB index field.
+ *   Borrows entries from ALL_EA_BASES with the special case that
+ *   sib is synonymous with NONE.
+ */
+typedef enum {
+  SIB_INDEX_NONE,
+#define ENTRY(x) SIB_INDEX_##x,
+  ALL_EA_BASES
+#undef ENTRY
+  SIB_INDEX_max
+} SIBIndex;
+  
+/*
+ * SIBBase - All possible values of the SIB base field.
+ */
+typedef enum {
+  SIB_BASE_NONE,
+#define ENTRY(x) SIB_BASE_##x,
+  ALL_SIB_BASES
+#undef ENTRY
+  SIB_BASE_max
+} SIBBase;
+
+/*
+ * EADisplacement - Possible displacement types for effective-address
+ *   computations.
+ */
+typedef enum {
+  EA_DISP_NONE,
+  EA_DISP_8,
+  EA_DISP_16,
+  EA_DISP_32
+} EADisplacement;
+
+/*
+ * Reg - All possible values of the reg field in the ModR/M byte.
+ */
+typedef enum {
+#define ENTRY(x) MODRM_REG_##x,
+  ALL_REGS
+#undef ENTRY
+  MODRM_REG_max
+} Reg;
+  
+/*
+ * SegmentOverride - All possible segment overrides.
+ */
+typedef enum {
+  SEG_OVERRIDE_NONE,
+  SEG_OVERRIDE_CS,
+  SEG_OVERRIDE_SS,
+  SEG_OVERRIDE_DS,
+  SEG_OVERRIDE_ES,
+  SEG_OVERRIDE_FS,
+  SEG_OVERRIDE_GS,
+  SEG_OVERRIDE_max
+} SegmentOverride;
+
+typedef uint8_t BOOL;
+
+/*
+ * byteReader_t - Type for the byte reader that the consumer must provide to
+ *   the decoder.  Reads a single byte from the instruction's address space.
+ * @param arg     - A baton that the consumer can associate with any internal
+ *                  state that it needs.
+ * @param byte    - A pointer to a single byte in memory that should be set to
+ *                  contain the value at address.
+ * @param address - The address in the instruction's address space that should
+ *                  be read from.
+ * @return        - -1 if the byte cannot be read for any reason; 0 otherwise.
+ */
+typedef int (*byteReader_t)(void* arg, uint8_t* byte, uint64_t address);
+
+/*
+ * dlog_t - Type for the logging function that the consumer can provide to
+ *   get debugging output from the decoder.
+ * @param arg     - A baton that the consumer can associate with any internal
+ *                  state that it needs.
+ * @param log     - A string that contains the message.  Will be reused after
+ *                  the logger returns.
+ */
+typedef void (*dlog_t)(void* arg, const char *log);
+
+/*
+ * The x86 internal instruction, which is produced by the decoder.
+ */
+struct InternalInstruction {
+  /* Reader interface (C) */
+  byteReader_t reader;
+  /* Opaque value passed to the reader */
+  void* readerArg;
+  /* The address of the next byte to read via the reader */
+  uint64_t readerCursor;
+
+  /* Logger interface (C) */
+  dlog_t dlog;
+  /* Opaque value passed to the logger */
+  void* dlogArg;
+
+  /* General instruction information */
+  
+  /* The mode to disassemble for (64-bit, protected, real) */
+  DisassemblerMode mode;
+  /* The start of the instruction, usable with the reader */
+  uint64_t startLocation;
+  /* The length of the instruction, in bytes */
+  size_t length;
+  
+  /* Prefix state */
+  
+  /* 1 if the prefix byte corresponding to the entry is present; 0 if not */
+  uint8_t prefixPresent[0x100];
+  /* contains the location (for use with the reader) of the prefix byte */
+  uint64_t prefixLocations[0x100];
+  /* The value of the REX prefix, if present */
+  uint8_t rexPrefix;
+  /* The location of the REX prefix */
+  uint64_t rexLocation;
+  /* The location where a mandatory prefix would have to be (i.e., right before
+     the opcode, or right before the REX prefix if one is present) */
+  uint64_t necessaryPrefixLocation;
+  /* The segment override type */
+  SegmentOverride segmentOverride;
+  
+  /* Sizes of various critical pieces of data, in bytes */
+  uint8_t registerSize;
+  uint8_t addressSize;
+  uint8_t displacementSize;
+  uint8_t immediateSize;
+  
+  /* opcode state */
+  
+  /* The value of the two-byte escape prefix (usually 0x0f) */
+  uint8_t twoByteEscape;
+  /* The value of the three-byte escape prefix (usually 0x38 or 0x3a) */
+  uint8_t threeByteEscape;
+  /* The last byte of the opcode, not counting any ModR/M extension */
+  uint8_t opcode;
+  /* The ModR/M byte of the instruction, if it is an opcode extension */
+  uint8_t modRMExtension;
+  
+  /* decode state */
+  
+  /* The type of opcode, used for indexing into the array of decode tables */
+  OpcodeType opcodeType;
+  /* The instruction ID, extracted from the decode table */
+  uint16_t instructionID;
+  /* The specifier for the instruction, from the instruction info table */
+  const struct InstructionSpecifier *spec;
+  
+  /* state for additional bytes, consumed during operand decode.  Pattern:
+     consumed___ indicates that the byte was already consumed and does not
+     need to be consumed again */
+  
+  /* The ModR/M byte, which contains most register operands and some portion of
+     all memory operands */
+  BOOL                          consumedModRM;
+  uint8_t                       modRM;
+  
+  /* The SIB byte, used for more complex 32- or 64-bit memory operands */
+  BOOL                          consumedSIB;
+  uint8_t                       sib;
+
+  /* The displacement, used for memory operands */
+  BOOL                          consumedDisplacement;
+  int32_t                       displacement;
+  
+  /* Immediates.  There can be two in some cases */
+  uint8_t                       numImmediatesConsumed;
+  uint8_t                       numImmediatesTranslated;
+  uint64_t                      immediates[2];
+  
+  /* A register or immediate operand encoded into the opcode */
+  BOOL                          consumedOpcodeModifier;
+  uint8_t                       opcodeModifier;
+  Reg                           opcodeRegister;
+  
+  /* Portions of the ModR/M byte */
+  
+  /* These fields determine the allowable values for the ModR/M fields, which
+     depend on operand and address widths */
+  EABase                        eaBaseBase;
+  EABase                        eaRegBase;
+  Reg                           regBase;
+
+  /* The Mod and R/M fields can encode a base for an effective address, or a
+     register.  These are separated into two fields here */
+  EABase                        eaBase;
+  EADisplacement                eaDisplacement;
+  /* The reg field always encodes a register */
+  Reg                           reg;
+  
+  /* SIB state */
+  SIBIndex                      sibIndex;
+  uint8_t                       sibScale;
+  SIBBase                       sibBase;
+};
+
+/* decodeInstruction - Decode one instruction and store the decoding results in
+ *   a buffer provided by the consumer.
+ * @param insn      - The buffer to store the instruction in.  Allocated by the
+ *                    consumer.
+ * @param reader    - The byteReader_t for the bytes to be read.
+ * @param readerArg - An argument to pass to the reader for storing context
+ *                    specific to the consumer.  May be NULL.
+ * @param logger    - The dlog_t to be used in printing status messages from the
+ *                    disassembler.  May be NULL.
+ * @param loggerArg - An argument to pass to the logger for storing context
+ *                    specific to the logger.  May be NULL.
+ * @param startLoc  - The address (in the reader's address space) of the first
+ *                    byte in the instruction.
+ * @param mode      - The mode (16-bit, 32-bit, 64-bit) to decode in.
+ * @return          - Nonzero if there was an error during decode, 0 otherwise.
+ */
+int decodeInstruction(struct InternalInstruction* insn,
+                      byteReader_t reader,
+                      void* readerArg,
+                      dlog_t logger,
+                      void* loggerArg,
+                      uint64_t startLoc,
+                      DisassemblerMode mode);
+
+/* x86DisassemblerDebug - C-accessible function for printing a message to
+ *   debugs()
+ * @param file  - The name of the file printing the debug message.
+ * @param line  - The line number that printed the debug message.
+ * @param s     - The message to print.
+ */
+  
+void x86DisassemblerDebug(const char *file,
+                          unsigned line,
+                          const char *s);
+
+#ifdef __cplusplus 
+}
+#endif
+  
+#endif
diff --git a/final/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/final/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
new file mode 100644
index 00000000000..1425b86ba53
--- /dev/null
+++ b/final/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -0,0 +1,356 @@
+/*===- X86DisassemblerDecoderCommon.h - Disassembler decoder -------*- C -*-==*
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+ *===----------------------------------------------------------------------===*
+ *
+ * This file is part of the X86 Disassembler.
+ * It contains common definitions used by both the disassembler and the table
+ *  generator.
+ * Documentation for the disassembler can be found in X86Disassembler.h.
+ *
+ *===----------------------------------------------------------------------===*/
+
+/*
+ * This header file provides those definitions that need to be shared between
+ * the decoder and the table generator in a C-friendly manner.
+ */
+
+#ifndef X86DISASSEMBLERDECODERCOMMON_H
+#define X86DISASSEMBLERDECODERCOMMON_H
+
+#include "llvm/Support/DataTypes.h"
+
+#define INSTRUCTIONS_SYM  x86DisassemblerInstrSpecifiers
+#define CONTEXTS_SYM      x86DisassemblerContexts
+#define ONEBYTE_SYM       x86DisassemblerOneByteOpcodes
+#define TWOBYTE_SYM       x86DisassemblerTwoByteOpcodes
+#define THREEBYTE38_SYM   x86DisassemblerThreeByte38Opcodes
+#define THREEBYTE3A_SYM   x86DisassemblerThreeByte3AOpcodes
+
+#define INSTRUCTIONS_STR  "x86DisassemblerInstrSpecifiers"
+#define CONTEXTS_STR      "x86DisassemblerContexts"
+#define ONEBYTE_STR       "x86DisassemblerOneByteOpcodes"
+#define TWOBYTE_STR       "x86DisassemblerTwoByteOpcodes"
+#define THREEBYTE38_STR   "x86DisassemblerThreeByte38Opcodes"
+#define THREEBYTE3A_STR   "x86DisassemblerThreeByte3AOpcodes"
+
+/*
+ * Attributes of an instruction that must be known before the opcode can be
+ * processed correctly.  Most of these indicate the presence of particular
+ * prefixes, but ATTR_64BIT is simply an attribute of the decoding context.
+ */
+#define ATTRIBUTE_BITS          \
+  ENUM_ENTRY(ATTR_NONE,   0x00) \
+  ENUM_ENTRY(ATTR_64BIT,  0x01) \
+  ENUM_ENTRY(ATTR_XS,     0x02) \
+  ENUM_ENTRY(ATTR_XD,     0x04) \
+  ENUM_ENTRY(ATTR_REXW,   0x08) \
+  ENUM_ENTRY(ATTR_OPSIZE, 0x10)
+
+#define ENUM_ENTRY(n, v) n = v,
+enum attributeBits {
+  ATTRIBUTE_BITS
+  ATTR_max
+};
+#undef ENUM_ENTRY
+
+/*
+ * Combinations of the above attributes that are relevant to instruction
+ * decode.  Although other combinations are possible, they can be reduced to
+ * these without affecting the ultimately decoded instruction.
+ */
+
+/*           Class name           Rank  Rationale for rank assignment         */
+#define INSTRUCTION_CONTEXTS                                                   \
+  ENUM_ENTRY(IC,                    0,  "says nothing about the instruction")  \
+  ENUM_ENTRY(IC_64BIT,              1,  "says the instruction applies in "     \
+                                        "64-bit mode but no more")             \
+  ENUM_ENTRY(IC_OPSIZE,             3,  "requires an OPSIZE prefix, so "       \
+                                        "operands change width")               \
+  ENUM_ENTRY(IC_XD,                 2,  "may say something about the opcode "  \
+                                        "but not the operands")                \
+  ENUM_ENTRY(IC_XS,                 2,  "may say something about the opcode "  \
+                                        "but not the operands")                \
+  ENUM_ENTRY(IC_64BIT_REXW,         4,  "requires a REX.W prefix, so operands "\
+                                        "change width; overrides IC_OPSIZE")   \
+  ENUM_ENTRY(IC_64BIT_OPSIZE,       3,  "Just as meaningful as IC_OPSIZE")     \
+  ENUM_ENTRY(IC_64BIT_XD,           5,  "XD instructions are SSE; REX.W is "   \
+                                        "secondary")                           \
+  ENUM_ENTRY(IC_64BIT_XS,           5,  "Just as meaningful as IC_64BIT_XD")   \
+  ENUM_ENTRY(IC_64BIT_REXW_XS,      6,  "OPSIZE could mean a different "       \
+                                        "opcode")                              \
+  ENUM_ENTRY(IC_64BIT_REXW_XD,      6,  "Just as meaningful as "               \
+                                        "IC_64BIT_REXW_XS")                    \
+  ENUM_ENTRY(IC_64BIT_REXW_OPSIZE,  7,  "The Dynamic Duo!  Prefer over all "   \
+                                        "else because this changes most "      \
+                                        "operands' meaning")
+
+#define ENUM_ENTRY(n, r, d) n,    
+typedef enum {
+  INSTRUCTION_CONTEXTS
+  IC_max
+} InstructionContext;
+#undef ENUM_ENTRY
+
+/*
+ * Opcode types, which determine which decode table to use, both in the Intel
+ * manual and also for the decoder.
+ */
+typedef enum {
+  ONEBYTE       = 0,
+  TWOBYTE       = 1,
+  THREEBYTE_38  = 2,
+  THREEBYTE_3A  = 3
+} OpcodeType;
+
+/*
+ * The following structs are used for the hierarchical decode table.  After
+ * determining the instruction's class (i.e., which IC_* constant applies to
+ * it), the decoder reads the opcode.  Some instructions require specific
+ * values of the ModR/M byte, so the ModR/M byte indexes into the final table.
+ *
+ * If a ModR/M byte is not required, "required" is left unset, and the values
+ * for each instructionID are identical.
+ */
+ 
+typedef uint16_t InstrUID;
+
+/*
+ * ModRMDecisionType - describes the type of ModR/M decision, allowing the 
+ * consumer to determine the number of entries in it.
+ *
+ * MODRM_ONEENTRY - No matter what the value of the ModR/M byte is, the decoded
+ *                  instruction is the same.
+ * MODRM_SPLITRM  - If the ModR/M byte is between 0x00 and 0xbf, the opcode
+ *                  corresponds to one instruction; otherwise, it corresponds to
+ *                  a different instruction.
+ * MODRM_FULL     - Potentially, each value of the ModR/M byte could correspond
+ *                  to a different instruction.
+ */
+
+#define MODRMTYPES            \
+  ENUM_ENTRY(MODRM_ONEENTRY)  \
+  ENUM_ENTRY(MODRM_SPLITRM)   \
+  ENUM_ENTRY(MODRM_FULL)
+
+#define ENUM_ENTRY(n) n,    
+typedef enum {
+  MODRMTYPES
+  MODRM_max
+} ModRMDecisionType;
+#undef ENUM_ENTRY
+
+/*
+ * ModRMDecision - Specifies whether a ModR/M byte is needed and (if so) which 
+ *  instruction each possible value of the ModR/M byte corresponds to.  Once
+ *  this information is known, we have narrowed down to a single instruction.
+ */
+struct ModRMDecision {
+  uint8_t     modrm_type;
+  
+  /* The macro below must be defined wherever this file is included. */
+  INSTRUCTION_IDS
+};
+
+/*
+ * OpcodeDecision - Specifies which set of ModR/M->instruction tables to look at
+ *   given a particular opcode.
+ */
+struct OpcodeDecision {
+  struct ModRMDecision modRMDecisions[256];
+};
+
+/*
+ * ContextDecision - Specifies which opcode->instruction tables to look at given
+ *   a particular context (set of attributes).  Since there are many possible
+ *   contexts, the decoder first uses CONTEXTS_SYM to determine which context
+ *   applies given a specific set of attributes.  Hence there are only IC_max
+ *   entries in this table, rather than 2^(ATTR_max).
+ */
+struct ContextDecision {
+  struct OpcodeDecision opcodeDecisions[IC_max];
+};
+
+/* 
+ * Physical encodings of instruction operands.
+ */
+
+#define ENCODINGS                                                              \
+  ENUM_ENTRY(ENCODING_NONE,   "")                                              \
+  ENUM_ENTRY(ENCODING_REG,    "Register operand in ModR/M byte.")              \
+  ENUM_ENTRY(ENCODING_RM,     "R/M operand in ModR/M byte.")                   \
+  ENUM_ENTRY(ENCODING_CB,     "1-byte code offset (possible new CS value)")    \
+  ENUM_ENTRY(ENCODING_CW,     "2-byte")                                        \
+  ENUM_ENTRY(ENCODING_CD,     "4-byte")                                        \
+  ENUM_ENTRY(ENCODING_CP,     "6-byte")                                        \
+  ENUM_ENTRY(ENCODING_CO,     "8-byte")                                        \
+  ENUM_ENTRY(ENCODING_CT,     "10-byte")                                       \
+  ENUM_ENTRY(ENCODING_IB,     "1-byte immediate")                              \
+  ENUM_ENTRY(ENCODING_IW,     "2-byte")                                        \
+  ENUM_ENTRY(ENCODING_ID,     "4-byte")                                        \
+  ENUM_ENTRY(ENCODING_IO,     "8-byte")                                        \
+  ENUM_ENTRY(ENCODING_RB,     "(AL..DIL, R8L..R15L) Register code added to "   \
+                              "the opcode byte")                               \
+  ENUM_ENTRY(ENCODING_RW,     "(AX..DI, R8W..R15W)")                           \
+  ENUM_ENTRY(ENCODING_RD,     "(EAX..EDI, R8D..R15D)")                         \
+  ENUM_ENTRY(ENCODING_RO,     "(RAX..RDI, R8..R15)")                           \
+  ENUM_ENTRY(ENCODING_I,      "Position on floating-point stack added to the " \
+                              "opcode byte")                                   \
+                                                                               \
+  ENUM_ENTRY(ENCODING_Iv,     "Immediate of operand size")                     \
+  ENUM_ENTRY(ENCODING_Ia,     "Immediate of address size")                     \
+  ENUM_ENTRY(ENCODING_Rv,     "Register code of operand size added to the "    \
+                              "opcode byte")                                   \
+  ENUM_ENTRY(ENCODING_DUP,    "Duplicate of another operand; ID is encoded "   \
+                              "in type")
+
+#define ENUM_ENTRY(n, d) n,    
+  typedef enum {
+    ENCODINGS
+    ENCODING_max
+  } OperandEncoding;
+#undef ENUM_ENTRY
+
+/* 
+ * Semantic interpretations of instruction operands.
+ */
+
+#define TYPES                                                                  \
+  ENUM_ENTRY(TYPE_NONE,       "")                                              \
+  ENUM_ENTRY(TYPE_REL8,       "1-byte immediate address")                      \
+  ENUM_ENTRY(TYPE_REL16,      "2-byte")                                        \
+  ENUM_ENTRY(TYPE_REL32,      "4-byte")                                        \
+  ENUM_ENTRY(TYPE_REL64,      "8-byte")                                        \
+  ENUM_ENTRY(TYPE_PTR1616,    "2+2-byte segment+offset address")               \
+  ENUM_ENTRY(TYPE_PTR1632,    "2+4-byte")                                      \
+  ENUM_ENTRY(TYPE_PTR1664,    "2+8-byte")                                      \
+  ENUM_ENTRY(TYPE_R8,         "1-byte register operand")                       \
+  ENUM_ENTRY(TYPE_R16,        "2-byte")                                        \
+  ENUM_ENTRY(TYPE_R32,        "4-byte")                                        \
+  ENUM_ENTRY(TYPE_R64,        "8-byte")                                        \
+  ENUM_ENTRY(TYPE_IMM8,       "1-byte immediate operand")                      \
+  ENUM_ENTRY(TYPE_IMM16,      "2-byte")                                        \
+  ENUM_ENTRY(TYPE_IMM32,      "4-byte")                                        \
+  ENUM_ENTRY(TYPE_IMM64,      "8-byte")                                        \
+  ENUM_ENTRY(TYPE_IMM3,       "1-byte immediate operand between 0 and 7")      \
+  ENUM_ENTRY(TYPE_RM8,        "1-byte register or memory operand")             \
+  ENUM_ENTRY(TYPE_RM16,       "2-byte")                                        \
+  ENUM_ENTRY(TYPE_RM32,       "4-byte")                                        \
+  ENUM_ENTRY(TYPE_RM64,       "8-byte")                                        \
+  ENUM_ENTRY(TYPE_M,          "Memory operand")                                \
+  ENUM_ENTRY(TYPE_M8,         "1-byte")                                        \
+  ENUM_ENTRY(TYPE_M16,        "2-byte")                                        \
+  ENUM_ENTRY(TYPE_M32,        "4-byte")                                        \
+  ENUM_ENTRY(TYPE_M64,        "8-byte")                                        \
+  ENUM_ENTRY(TYPE_LEA,        "Effective address")                             \
+  ENUM_ENTRY(TYPE_M128,       "16-byte (SSE/SSE2)")                            \
+  ENUM_ENTRY(TYPE_M256,       "256-byte (AVX)")                                \
+  ENUM_ENTRY(TYPE_M1616,      "2+2-byte segment+offset address")               \
+  ENUM_ENTRY(TYPE_M1632,      "2+4-byte")                                      \
+  ENUM_ENTRY(TYPE_M1664,      "2+8-byte")                                      \
+  ENUM_ENTRY(TYPE_M16_32,     "2+4-byte two-part memory operand (LIDT, LGDT)") \
+  ENUM_ENTRY(TYPE_M16_16,     "2+2-byte (BOUND)")                              \
+  ENUM_ENTRY(TYPE_M32_32,     "4+4-byte (BOUND)")                              \
+  ENUM_ENTRY(TYPE_M16_64,     "2+8-byte (LIDT, LGDT)")                         \
+  ENUM_ENTRY(TYPE_MOFFS8,     "1-byte memory offset (relative to segment "     \
+                              "base)")                                         \
+  ENUM_ENTRY(TYPE_MOFFS16,    "2-byte")                                        \
+  ENUM_ENTRY(TYPE_MOFFS32,    "4-byte")                                        \
+  ENUM_ENTRY(TYPE_MOFFS64,    "8-byte")                                        \
+  ENUM_ENTRY(TYPE_SREG,       "Byte with single bit set: 0 = ES, 1 = CS, "     \
+                              "2 = SS, 3 = DS, 4 = FS, 5 = GS")                \
+  ENUM_ENTRY(TYPE_M32FP,      "32-bit IEE754 memory floating-point operand")   \
+  ENUM_ENTRY(TYPE_M64FP,      "64-bit")                                        \
+  ENUM_ENTRY(TYPE_M80FP,      "80-bit extended")                               \
+  ENUM_ENTRY(TYPE_M16INT,     "2-byte memory integer operand for use in "      \
+                              "floating-point instructions")                   \
+  ENUM_ENTRY(TYPE_M32INT,     "4-byte")                                        \
+  ENUM_ENTRY(TYPE_M64INT,     "8-byte")                                        \
+  ENUM_ENTRY(TYPE_ST,         "Position on the floating-point stack")          \
+  ENUM_ENTRY(TYPE_MM,         "MMX register operand")                          \
+  ENUM_ENTRY(TYPE_MM32,       "4-byte MMX register or memory operand")         \
+  ENUM_ENTRY(TYPE_MM64,       "8-byte")                                        \
+  ENUM_ENTRY(TYPE_XMM,        "XMM register operand")                          \
+  ENUM_ENTRY(TYPE_XMM32,      "4-byte XMM register or memory operand")         \
+  ENUM_ENTRY(TYPE_XMM64,      "8-byte")                                        \
+  ENUM_ENTRY(TYPE_XMM128,     "16-byte")                                       \
+  ENUM_ENTRY(TYPE_XMM0,       "Implicit use of XMM0")                          \
+  ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand")                      \
+  ENUM_ENTRY(TYPE_DEBUGREG,   "Debug register operand")                        \
+  ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand")                      \
+                                                                               \
+  ENUM_ENTRY(TYPE_Mv,         "Memory operand of operand size")                \
+  ENUM_ENTRY(TYPE_Rv,         "Register operand of operand size")              \
+  ENUM_ENTRY(TYPE_IMMv,       "Immediate operand of operand size")             \
+  ENUM_ENTRY(TYPE_RELv,       "Immediate address of operand size")             \
+  ENUM_ENTRY(TYPE_DUP0,       "Duplicate of operand 0")                        \
+  ENUM_ENTRY(TYPE_DUP1,       "operand 1")                                     \
+  ENUM_ENTRY(TYPE_DUP2,       "operand 2")                                     \
+  ENUM_ENTRY(TYPE_DUP3,       "operand 3")                                     \
+  ENUM_ENTRY(TYPE_DUP4,       "operand 4")                                     \
+  ENUM_ENTRY(TYPE_M512,       "512-bit FPU/MMX/XMM/MXCSR state")
+
+#define ENUM_ENTRY(n, d) n,    
+typedef enum {
+  TYPES
+  TYPE_max
+} OperandType;
+#undef ENUM_ENTRY
+
+/* 
+ * OperandSpecifier - The specification for how to extract and interpret one
+ *   operand.
+ */
+struct OperandSpecifier {
+  OperandEncoding  encoding;
+  OperandType      type;
+};
+
+/*
+ * Indicates where the opcode modifier (if any) is to be found.  Extended
+ * opcodes with AddRegFrm have the opcode modifier in the ModR/M byte.
+ */
+
+#define MODIFIER_TYPES        \
+  ENUM_ENTRY(MODIFIER_NONE)   \
+  ENUM_ENTRY(MODIFIER_OPCODE) \
+  ENUM_ENTRY(MODIFIER_MODRM)
+
+#define ENUM_ENTRY(n) n,
+typedef enum {
+  MODIFIER_TYPES
+  MODIFIER_max
+} ModifierType;
+#undef ENUM_ENTRY
+
+#define X86_MAX_OPERANDS 5
+
+/*
+ * The specification for how to extract and interpret a full instruction and
+ * its operands.
+ */
+struct InstructionSpecifier {
+  ModifierType modifierType;
+  uint8_t modifierBase;
+  struct OperandSpecifier operands[X86_MAX_OPERANDS];
+  
+  /* The macro below must be defined wherever this file is included. */
+  INSTRUCTION_SPECIFIER_FIELDS
+};
+
+/*
+ * Decoding mode for the Intel disassembler.  16-bit, 32-bit, and 64-bit mode
+ * are supported, and represent real mode, IA-32e, and IA-32e in 64-bit mode,
+ * respectively.
+ */
+typedef enum {
+  MODE_16BIT,
+  MODE_32BIT,
+  MODE_64BIT
+} DisassemblerMode;
+
+#endif
diff --git a/final/lib/Target/X86/InstPrinter/CMakeLists.txt b/final/lib/Target/X86/InstPrinter/CMakeLists.txt
new file mode 100644
index 00000000000..033973eeeff
--- /dev/null
+++ b/final/lib/Target/X86/InstPrinter/CMakeLists.txt
@@ -0,0 +1,8 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86AsmPrinter
+  X86ATTInstPrinter.cpp
+  X86IntelInstPrinter.cpp
+  X86InstComments.cpp
+  )
+add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
diff --git a/final/lib/Target/X86/InstPrinter/Makefile b/final/lib/Target/X86/InstPrinter/Makefile
new file mode 100644
index 00000000000..c82aa330a20
--- /dev/null
+++ b/final/lib/Target/X86/InstPrinter/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/AsmPrinter/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86AsmPrinter
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/final/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
new file mode 100644
index 00000000000..d6950f49f82
--- /dev/null
+++ b/final/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -0,0 +1,127 @@
+//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86ATTInstPrinter.h"
+#include "X86InstComments.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "X86GenInstrNames.inc"
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define GET_INSTRUCTION_NAME
+#include "X86GenAsmWriter.inc"
+
+void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
+  printInstruction(MI, OS);
+  
+  // If verbose assembly is enabled, we can print some informative comments.
+  if (CommentStream)
+    EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+}
+StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
+
+void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
+                                   raw_ostream &O) {
+  switch (MI->getOperand(Op).getImm()) {
+  default: assert(0 && "Invalid ssecc argument!");
+  case 0: O << "eq"; break;
+  case 1: O << "lt"; break;
+  case 2: O << "le"; break;
+  case 3: O << "unord"; break;
+  case 4: O << "neq"; break;
+  case 5: O << "nlt"; break;
+  case 6: O << "nle"; break;
+  case 7: O << "ord"; break;
+  }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value (e.g. for jumps and calls).  These
+/// print slightly differently than normal immediates.  For example, a $ is not
+/// emitted.
+void X86ATTInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm())
+    // Print this as a signed 32-bit value.
+    O << (int)Op.getImm();
+  else {
+    assert(Op.isExpr() && "unknown pcrel immediate operand");
+    O << *Op.getExpr();
+  }
+}
+
+void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    O << '%' << getRegisterName(Op.getReg());
+  } else if (Op.isImm()) {
+    O << '$' << Op.getImm();
+    
+    if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
+      *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm());
+    
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << '$' << *Op.getExpr();
+  }
+}
+
+void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+                                          raw_ostream &O) {
+  const MCOperand &BaseReg  = MI->getOperand(Op);
+  const MCOperand &IndexReg = MI->getOperand(Op+2);
+  const MCOperand &DispSpec = MI->getOperand(Op+3);
+  const MCOperand &SegReg = MI->getOperand(Op+4);
+  
+  // If this has a segment register, print it.
+  if (SegReg.getReg()) {
+    printOperand(MI, Op+4, O);
+    O << ':';
+  }
+  
+  if (DispSpec.isImm()) {
+    int64_t DispVal = DispSpec.getImm();
+    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
+      O << DispVal;
+  } else {
+    assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+    O << *DispSpec.getExpr();
+  }
+  
+  if (IndexReg.getReg() || BaseReg.getReg()) {
+    O << '(';
+    if (BaseReg.getReg())
+      printOperand(MI, Op, O);
+    
+    if (IndexReg.getReg()) {
+      O << ',';
+      printOperand(MI, Op+2, O);
+      unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+      if (ScaleVal != 1)
+        O << ',' << ScaleVal;
+    }
+    O << ')';
+  }
+}
diff --git a/final/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/final/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
new file mode 100644
index 00000000000..eb986643014
--- /dev/null
+++ b/final/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -0,0 +1,81 @@
+//===-- X86ATTInstPrinter.h - Convert X86 MCInst to assembly syntax -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to AT&T style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_ATT_INST_PRINTER_H
+#define X86_ATT_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+  class MCOperand;
+  
+class X86ATTInstPrinter : public MCInstPrinter {
+public:
+  X86ATTInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
+
+  
+  virtual void printInst(const MCInst *MI, raw_ostream &OS);
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &OS);
+  static const char *getRegisterName(unsigned RegNo);
+  static const char *getInstructionName(unsigned Opcode);
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+  void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
+  void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS);
+  void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
+  
+  void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  
+  void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+  void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    printMemReference(MI, OpNo, O);
+  }
+};
+  
+}
+
+#endif
diff --git a/final/lib/Target/X86/InstPrinter/X86InstComments.cpp b/final/lib/Target/X86/InstPrinter/X86InstComments.cpp
new file mode 100644
index 00000000000..c642acc3b9a
--- /dev/null
+++ b/final/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -0,0 +1,260 @@
+//===-- X86InstComments.cpp - Generate verbose-asm comments for instrs ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstComments.h"
+#include "X86GenInstrNames.inc"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/raw_ostream.h"
+#include "../Utils/X86ShuffleDecode.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Top Level Entrypoint
+//===----------------------------------------------------------------------===//
+
+/// EmitAnyX86InstComments - This function decodes x86 instructions and prints
+/// newline terminated strings to the specified string if desired.  This
+/// information is shown in disassembly dumps when verbose assembly is enabled.
+void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+                                  const char *(*getRegName)(unsigned)) {
+  // If this is a shuffle operation, the switch should fill in this state.
+  SmallVector<unsigned, 8> ShuffleMask;
+  const char *DestName = 0, *Src1Name = 0, *Src2Name = 0;
+
+  switch (MI->getOpcode()) {
+  case X86::INSERTPSrr:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    DecodeINSERTPSMask(MI->getOperand(3).getImm(), ShuffleMask);
+    break;
+
+  case X86::MOVLHPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodeMOVLHPSMask(2, ShuffleMask);
+    break;
+
+  case X86::MOVHLPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodeMOVHLPSMask(2, ShuffleMask);
+    break;
+
+  case X86::PSHUFDri:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::PSHUFDmi:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodePSHUFMask(4, MI->getOperand(MI->getNumOperands()-1).getImm(),
+                    ShuffleMask);
+    break;
+
+  case X86::PSHUFHWri:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::PSHUFHWmi:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodePSHUFHWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      ShuffleMask);
+    break;
+  case X86::PSHUFLWri:
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    // FALL THROUGH.
+  case X86::PSHUFLWmi:
+    DestName = getRegName(MI->getOperand(0).getReg());
+    DecodePSHUFLWMask(MI->getOperand(MI->getNumOperands()-1).getImm(),
+                      ShuffleMask);
+    break;
+
+  case X86::PUNPCKHBWrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKHBWrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKHMask(16, ShuffleMask);
+    break;
+  case X86::PUNPCKHWDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKHWDrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKHMask(8, ShuffleMask);
+    break;
+  case X86::PUNPCKHDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKHDQrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKHMask(4, ShuffleMask);
+    break;
+  case X86::PUNPCKHQDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKHQDQrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKHMask(2, ShuffleMask);
+    break;
+
+  case X86::PUNPCKLBWrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKLBWrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKLBWMask(16, ShuffleMask);
+    break;
+  case X86::PUNPCKLWDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKLWDrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKLWDMask(8, ShuffleMask);
+    break;
+  case X86::PUNPCKLDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKLDQrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKLDQMask(4, ShuffleMask);
+    break;
+  case X86::PUNPCKLQDQrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::PUNPCKLQDQrm:
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    DecodePUNPCKLQDQMask(2, ShuffleMask);
+    break;
+
+  case X86::SHUFPDrri:
+    DecodeSHUFPSMask(2, MI->getOperand(3).getImm(), ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    break;
+
+  case X86::SHUFPSrri:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::SHUFPSrmi:
+    DecodeSHUFPSMask(4, MI->getOperand(3).getImm(), ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    break;
+
+  case X86::UNPCKLPDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::UNPCKLPDrm:
+    DecodeUNPCKLPDMask(2, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::VUNPCKLPDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKLPDrm:
+    DecodeUNPCKLPDMask(2, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
+  case X86::VUNPCKLPDYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKLPDYrm:
+    DecodeUNPCKLPDMask(4, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
+  case X86::UNPCKLPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::UNPCKLPSrm:
+    DecodeUNPCKLPSMask(4, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::VUNPCKLPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKLPSrm:
+    DecodeUNPCKLPSMask(4, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
+  case X86::VUNPCKLPSYrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::VUNPCKLPSYrm:
+    DecodeUNPCKLPSMask(8, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(1).getReg());
+    break;
+  case X86::UNPCKHPDrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::UNPCKHPDrm:
+    DecodeUNPCKHPMask(2, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    break;
+  case X86::UNPCKHPSrr:
+    Src2Name = getRegName(MI->getOperand(2).getReg());
+    // FALL THROUGH.
+  case X86::UNPCKHPSrm:
+    DecodeUNPCKHPMask(4, ShuffleMask);
+    Src1Name = getRegName(MI->getOperand(0).getReg());
+    break;
+  }
+
+
+  // If this was a shuffle operation, print the shuffle mask.
+  if (!ShuffleMask.empty()) {
+    if (DestName == 0) DestName = Src1Name;
+    OS << (DestName ? DestName : "mem") << " = ";
+
+    // If the two sources are the same, canonicalize the input elements to be
+    // from the first src so that we get larger element spans.
+    if (Src1Name == Src2Name) {
+      for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+        if ((int)ShuffleMask[i] >= 0 && // Not sentinel.
+            ShuffleMask[i] >= e)        // From second mask.
+          ShuffleMask[i] -= e;
+      }
+    }
+
+    // The shuffle mask specifies which elements of the src1/src2 fill in the
+    // destination, with a few sentinel values.  Loop through and print them
+    // out.
+    for (unsigned i = 0, e = ShuffleMask.size(); i != e; ++i) {
+      if (i != 0)
+        OS << ',';
+      if (ShuffleMask[i] == SM_SentinelZero) {
+        OS << "zero";
+        continue;
+      }
+
+      // Otherwise, it must come from src1 or src2.  Print the span of elements
+      // that comes from this src.
+      bool isSrc1 = ShuffleMask[i] < ShuffleMask.size();
+      const char *SrcName = isSrc1 ? Src1Name : Src2Name;
+      OS << (SrcName ? SrcName : "mem") << '[';
+      bool IsFirst = true;
+      while (i != e &&
+             (int)ShuffleMask[i] >= 0 &&
+             (ShuffleMask[i] < ShuffleMask.size()) == isSrc1) {
+        if (!IsFirst)
+          OS << ',';
+        else
+          IsFirst = false;
+        OS << ShuffleMask[i] % ShuffleMask.size();
+        ++i;
+      }
+      OS << ']';
+      --i;  // For loop increments element #.
+    }
+    //MI->print(OS, 0);
+    OS << "\n";
+  }
+
+}
diff --git a/final/lib/Target/X86/InstPrinter/X86InstComments.h b/final/lib/Target/X86/InstPrinter/X86InstComments.h
new file mode 100644
index 00000000000..6b86db4f9e5
--- /dev/null
+++ b/final/lib/Target/X86/InstPrinter/X86InstComments.h
@@ -0,0 +1,25 @@
+//===-- X86InstComments.h - Generate verbose-asm comments for instrs ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines functionality used to emit comments about X86 instructions to
+// an output stream for -fverbose-asm.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INST_COMMENTS_H
+#define X86_INST_COMMENTS_H
+
+namespace llvm {
+  class MCInst;
+  class raw_ostream;
+  void EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
+                              const char *(*getRegName)(unsigned));
+}
+
+#endif
diff --git a/final/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/final/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
new file mode 100644
index 00000000000..04845298508
--- /dev/null
+++ b/final/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -0,0 +1,139 @@
+//===-- X86IntelInstPrinter.cpp - AT&T assembly instruction printing ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "X86IntelInstPrinter.h"
+#include "X86InstComments.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+#include "X86GenInstrNames.inc"
+#include <cctype>
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define GET_INSTRUCTION_NAME
+#include "X86GenAsmWriter1.inc"
+
+void X86IntelInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
+  printInstruction(MI, OS);
+  
+  // If verbose assembly is enabled, we can print some informative comments.
+  if (CommentStream)
+    EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
+}
+StringRef X86IntelInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
+void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
+                                     raw_ostream &O) {
+  switch (MI->getOperand(Op).getImm()) {
+  default: assert(0 && "Invalid ssecc argument!");
+  case 0: O << "eq"; break;
+  case 1: O << "lt"; break;
+  case 2: O << "le"; break;
+  case 3: O << "unord"; break;
+  case 4: O << "neq"; break;
+  case 5: O << "nlt"; break;
+  case 6: O << "nle"; break;
+  case 7: O << "ord"; break;
+  }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.
+void X86IntelInstPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo,
+                                          raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isImm())
+    O << Op.getImm();
+  else {
+    assert(Op.isExpr() && "unknown pcrel immediate operand");
+    O << *Op.getExpr();
+  }
+}
+
+static void PrintRegName(raw_ostream &O, StringRef RegName) {
+  for (unsigned i = 0, e = RegName.size(); i != e; ++i)
+    O << (char)toupper(RegName[i]);
+}
+
+void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    PrintRegName(O, getRegisterName(Op.getReg()));
+  } else if (Op.isImm()) {
+    O << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "unknown operand kind in printOperand");
+    O << *Op.getExpr();
+  }
+}
+
+void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+                                            raw_ostream &O) {
+  const MCOperand &BaseReg  = MI->getOperand(Op);
+  unsigned ScaleVal         = MI->getOperand(Op+1).getImm();
+  const MCOperand &IndexReg = MI->getOperand(Op+2);
+  const MCOperand &DispSpec = MI->getOperand(Op+3);
+  const MCOperand &SegReg   = MI->getOperand(Op+4);
+  
+  // If this has a segment register, print it.
+  if (SegReg.getReg()) {
+    printOperand(MI, Op+4, O);
+    O << ':';
+  }
+  
+  O << '[';
+  
+  bool NeedPlus = false;
+  if (BaseReg.getReg()) {
+    printOperand(MI, Op, O);
+    NeedPlus = true;
+  }
+  
+  if (IndexReg.getReg()) {
+    if (NeedPlus) O << " + ";
+    if (ScaleVal != 1)
+      O << ScaleVal << '*';
+    printOperand(MI, Op+2, O);
+    NeedPlus = true;
+  }
+  
+  
+  if (!DispSpec.isImm()) {
+    if (NeedPlus) O << " + ";
+    assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
+    O << *DispSpec.getExpr();
+  } else {
+    int64_t DispVal = DispSpec.getImm();
+    if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg())) {
+      if (NeedPlus) {
+        if (DispVal > 0)
+          O << " + ";
+        else {
+          O << " - ";
+          DispVal = -DispVal;
+        }
+      }
+      O << DispVal;
+    }
+  }
+  
+  O << ']';
+}
diff --git a/final/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/final/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
new file mode 100644
index 00000000000..6f120322742
--- /dev/null
+++ b/final/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -0,0 +1,95 @@
+//===-- X86IntelInstPrinter.h - Convert X86 MCInst to assembly syntax -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an X86 MCInst to intel style .s file syntax.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_INTEL_INST_PRINTER_H
+#define X86_INTEL_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+  class MCOperand;
+  
+class X86IntelInstPrinter : public MCInstPrinter {
+public:
+  X86IntelInstPrinter(const MCAsmInfo &MAI)
+    : MCInstPrinter(MAI) {}
+  
+  virtual void printInst(const MCInst *MI, raw_ostream &OS);
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+  
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getRegisterName(unsigned RegNo);
+  static const char *getInstructionName(unsigned Opcode);
+
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
+  void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O);
+  void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  
+  void printopaquemem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "OPAQUE PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  
+  void printi8mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "BYTE PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printi16mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "WORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printi32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "DWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printi64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "QWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printi128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "XMMWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printi256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "YMMWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printf32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "DWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printf64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "QWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printf80mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "XWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "XMMWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+  void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+    O << "YMMWORD PTR ";
+    printMemReference(MI, OpNo, O);
+  }
+};
+  
+}
+
+#endif
diff --git a/final/lib/Target/X86/Makefile b/final/lib/Target/X86/Makefile
new file mode 100644
index 00000000000..12fb090d4dc
--- /dev/null
+++ b/final/lib/Target/X86/Makefile
@@ -0,0 +1,25 @@
+##===- lib/Target/X86/Makefile -----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMX86CodeGen
+TARGET = X86
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
+                X86GenRegisterInfo.inc X86GenInstrNames.inc \
+                X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \
+                X86GenAsmWriter1.inc X86GenDAGISel.inc  \
+                X86GenDisassemblerTables.inc X86GenFastISel.inc \
+                X86GenCallingConv.inc X86GenSubtarget.inc \
+		X86GenEDInfo.inc
+
+DIRS = InstPrinter AsmParser Disassembler TargetInfo Utils
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/X86/README-FPStack.txt b/final/lib/Target/X86/README-FPStack.txt
new file mode 100644
index 00000000000..39efd2dbcf1
--- /dev/null
+++ b/final/lib/Target/X86/README-FPStack.txt
@@ -0,0 +1,85 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend: FP stack related stuff
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+
+Some targets (e.g. athlons) prefer freep to fstp ST(0):
+http://gcc.gnu.org/ml/gcc-patches/2004-04/msg00659.html
+
+//===---------------------------------------------------------------------===//
+
+This should use fiadd on chips where it is profitable:
+double foo(double P, int *I) { return P+*I; }
+
+We have fiadd patterns now but the followings have the same cost and
+complexity. We need a way to specify the later is more profitable.
+
+def FpADD32m  : FpI<(ops RFP:$dst, RFP:$src1, f32mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fadd RFP:$src1,
+                                     (extloadf64f32 addr:$src2)))]>;
+                // ST(0) = ST(0) + [mem32]
+
+def FpIADD32m : FpI<(ops RFP:$dst, RFP:$src1, i32mem:$src2), OneArgFPRW,
+                    [(set RFP:$dst, (fadd RFP:$src1,
+                                     (X86fild addr:$src2, i32)))]>;
+                // ST(0) = ST(0) + [mem32int]
+
+//===---------------------------------------------------------------------===//
+
+The FP stackifier should handle simple permutates to reduce number of shuffle
+instructions, e.g. turning:
+
+fld P	->		fld Q
+fld Q			fld P
+fxch
+
+or:
+
+fxch	->		fucomi
+fucomi			jl X
+jg X
+
+Ideas:
+http://gcc.gnu.org/ml/gcc-patches/2004-11/msg02410.html
+
+
+//===---------------------------------------------------------------------===//
+
+Add a target specific hook to DAG combiner to handle SINT_TO_FP and
+FP_TO_SINT when the source operand is already in memory.
+
+//===---------------------------------------------------------------------===//
+
+Open code rint,floor,ceil,trunc:
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02006.html
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg02011.html
+
+Opencode the sincos[f] libcall.
+
+//===---------------------------------------------------------------------===//
+
+None of the FPStack instructions are handled in
+X86RegisterInfo::foldMemoryOperand, which prevents the spiller from
+folding spill code into the instructions.
+
+//===---------------------------------------------------------------------===//
+
+Currently the x86 codegen isn't very good at mixing SSE and FPStack
+code:
+
+unsigned int foo(double x) { return x; }
+
+foo:
+	subl $20, %esp
+	movsd 24(%esp), %xmm0
+	movsd %xmm0, 8(%esp)
+	fldl 8(%esp)
+	fisttpll (%esp)
+	movl (%esp), %eax
+	addl $20, %esp
+	ret
+
+This just requires being smarter when custom expanding fptoui.
+
+//===---------------------------------------------------------------------===//
diff --git a/final/lib/Target/X86/README-MMX.txt b/final/lib/Target/X86/README-MMX.txt
new file mode 100644
index 00000000000..a6c8616b6d2
--- /dev/null
+++ b/final/lib/Target/X86/README-MMX.txt
@@ -0,0 +1,71 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend: MMX-specific stuff.
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+
+This:
+
+#include <mmintrin.h>
+
+__v2si qux(int A) {
+  return (__v2si){ 0, A };
+}
+
+is compiled into:
+
+_qux:
+        subl $28, %esp
+        movl 32(%esp), %eax
+        movd %eax, %mm0
+        movq %mm0, (%esp)
+        movl (%esp), %eax
+        movl %eax, 20(%esp)
+        movq %mm0, 8(%esp)
+        movl 12(%esp), %eax
+        movl %eax, 16(%esp)
+        movq 16(%esp), %mm0
+        addl $28, %esp
+        ret
+
+Yuck!
+
+GCC gives us:
+
+_qux:
+        subl    $12, %esp
+        movl    16(%esp), %eax
+        movl    20(%esp), %edx
+        movl    $0, (%eax)
+        movl    %edx, 4(%eax)
+        addl    $12, %esp
+        ret     $4
+
+//===---------------------------------------------------------------------===//
+
+We generate crappy code for this:
+
+__m64 t() {
+  return _mm_cvtsi32_si64(1);
+}
+
+_t:
+	subl	$12, %esp
+	movl	$1, %eax
+	movd	%eax, %mm0
+	movq	%mm0, (%esp)
+	movl	(%esp), %eax
+	movl	4(%esp), %edx
+	addl	$12, %esp
+	ret
+
+The extra stack traffic is covered in the previous entry. But the other reason
+is we are not smart about materializing constants in MMX registers. With -m64
+
+	movl	$1, %eax
+	movd	%eax, %mm0
+	movd	%mm0, %rax
+	ret
+
+We should be using a constantpool load instead:
+	movq	LC0(%rip), %rax
diff --git a/final/lib/Target/X86/README-SSE.txt b/final/lib/Target/X86/README-SSE.txt
new file mode 100644
index 00000000000..f16ec029e96
--- /dev/null
+++ b/final/lib/Target/X86/README-SSE.txt
@@ -0,0 +1,937 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend: SSE-specific stuff.
+//===---------------------------------------------------------------------===//
+
+//===---------------------------------------------------------------------===//
+
+SSE Variable shift can be custom lowered to something like this, which uses a
+small table + unaligned load + shuffle instead of going through memory.
+
+__m128i_shift_right:
+	.byte	  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15
+	.byte	 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+
+...
+__m128i shift_right(__m128i value, unsigned long offset) {
+  return _mm_shuffle_epi8(value,
+               _mm_loadu_si128((__m128 *) (___m128i_shift_right + offset)));
+}
+
+//===---------------------------------------------------------------------===//
+
+SSE has instructions for doing operations on complex numbers, we should pattern
+match them.   For example, this should turn into a horizontal add:
+
+typedef float __attribute__((vector_size(16))) v4f32;
+float f32(v4f32 A) {
+  return A[0]+A[1]+A[2]+A[3];
+}
+
+Instead we get this:
+
+_f32:                                   ## @f32
+	pshufd	$1, %xmm0, %xmm1        ## xmm1 = xmm0[1,0,0,0]
+	addss	%xmm0, %xmm1
+	pshufd	$3, %xmm0, %xmm2        ## xmm2 = xmm0[3,0,0,0]
+	movhlps	%xmm0, %xmm0            ## xmm0 = xmm0[1,1]
+	movaps	%xmm0, %xmm3
+	addss	%xmm1, %xmm3
+	movdqa	%xmm2, %xmm0
+	addss	%xmm3, %xmm0
+	ret
+
+Also, there are cases where some simple local SLP would improve codegen a bit.
+compiling this:
+
+_Complex float f32(_Complex float A, _Complex float B) {
+  return A+B;
+}
+
+into:
+
+_f32:                                   ## @f32
+	movdqa	%xmm0, %xmm2
+	addss	%xmm1, %xmm2
+	pshufd	$1, %xmm1, %xmm1        ## xmm1 = xmm1[1,0,0,0]
+	pshufd	$1, %xmm0, %xmm3        ## xmm3 = xmm0[1,0,0,0]
+	addss	%xmm1, %xmm3
+	movaps	%xmm2, %xmm0
+	unpcklps	%xmm3, %xmm0    ## xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
+	ret
+
+seems silly when it could just be one addps.
+
+
+//===---------------------------------------------------------------------===//
+
+Expand libm rounding functions inline:  Significant speedups possible.
+http://gcc.gnu.org/ml/gcc-patches/2006-10/msg00909.html
+
+//===---------------------------------------------------------------------===//
+
+When compiled with unsafemath enabled, "main" should enable SSE DAZ mode and
+other fast SSE modes.
+
+//===---------------------------------------------------------------------===//
+
+Think about doing i64 math in SSE regs on x86-32.
+
+//===---------------------------------------------------------------------===//
+
+This testcase should have no SSE instructions in it, and only one load from
+a constant pool:
+
+double %test3(bool %B) {
+        %C = select bool %B, double 123.412, double 523.01123123
+        ret double %C
+}
+
+Currently, the select is being lowered, which prevents the dag combiner from
+turning 'select (load CPI1), (load CPI2)' -> 'load (select CPI1, CPI2)'
+
+The pattern isel got this one right.
+
+//===---------------------------------------------------------------------===//
+
+SSE should implement 'select_cc' using 'emulated conditional moves' that use
+pcmp/pand/pandn/por to do a selection instead of a conditional branch:
+
+double %X(double %Y, double %Z, double %A, double %B) {
+        %C = setlt double %A, %B
+        %z = fadd double %Z, 0.0    ;; select operand is not a load
+        %D = select bool %C, double %Y, double %z
+        ret double %D
+}
+
+We currently emit:
+
+_X:
+        subl $12, %esp
+        xorpd %xmm0, %xmm0
+        addsd 24(%esp), %xmm0
+        movsd 32(%esp), %xmm1
+        movsd 16(%esp), %xmm2
+        ucomisd 40(%esp), %xmm1
+        jb LBB_X_2
+LBB_X_1:
+        movsd %xmm0, %xmm2
+LBB_X_2:
+        movsd %xmm2, (%esp)
+        fldl (%esp)
+        addl $12, %esp
+        ret
+
+//===---------------------------------------------------------------------===//
+
+Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
+feasible.
+
+//===---------------------------------------------------------------------===//
+
+Codegen:
+  if (copysign(1.0, x) == copysign(1.0, y))
+into:
+  if (x^y & mask)
+when using SSE.
+
+//===---------------------------------------------------------------------===//
+
+Use movhps to update upper 64-bits of a v4sf value. Also movlps on lower half
+of a v4sf value.
+
+//===---------------------------------------------------------------------===//
+
+Better codegen for vector_shuffles like this { x, 0, 0, 0 } or { x, 0, x, 0}.
+Perhaps use pxor / xorp* to clear a XMM register first?
+
+//===---------------------------------------------------------------------===//
+
+External test Nurbs exposed some problems. Look for
+__ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc
+emits:
+
+        movaps    (%edx), %xmm2                                 #59.21
+        movaps    (%edx), %xmm5                                 #60.21
+        movaps    (%edx), %xmm4                                 #61.21
+        movaps    (%edx), %xmm3                                 #62.21
+        movl      40(%ecx), %ebp                                #69.49
+        shufps    $0, %xmm2, %xmm5                              #60.21
+        movl      100(%esp), %ebx                               #69.20
+        movl      (%ebx), %edi                                  #69.20
+        imull     %ebp, %edi                                    #69.49
+        addl      (%eax), %edi                                  #70.33
+        shufps    $85, %xmm2, %xmm4                             #61.21
+        shufps    $170, %xmm2, %xmm3                            #62.21
+        shufps    $255, %xmm2, %xmm2                            #63.21
+        lea       (%ebp,%ebp,2), %ebx                           #69.49
+        negl      %ebx                                          #69.49
+        lea       -3(%edi,%ebx), %ebx                           #70.33
+        shll      $4, %ebx                                      #68.37
+        addl      32(%ecx), %ebx                                #68.37
+        testb     $15, %bl                                      #91.13
+        jne       L_B1.24       # Prob 5%                       #91.13
+
+This is the llvm code after instruction scheduling:
+
+cond_next140 (0xa910740, LLVM BB @0xa90beb0):
+	%reg1078 = MOV32ri -3
+	%reg1079 = ADD32rm %reg1078, %reg1068, 1, %NOREG, 0
+	%reg1037 = MOV32rm %reg1024, 1, %NOREG, 40
+	%reg1080 = IMUL32rr %reg1079, %reg1037
+	%reg1081 = MOV32rm %reg1058, 1, %NOREG, 0
+	%reg1038 = LEA32r %reg1081, 1, %reg1080, -3
+	%reg1036 = MOV32rm %reg1024, 1, %NOREG, 32
+	%reg1082 = SHL32ri %reg1038, 4
+	%reg1039 = ADD32rr %reg1036, %reg1082
+	%reg1083 = MOVAPSrm %reg1059, 1, %NOREG, 0
+	%reg1034 = SHUFPSrr %reg1083, %reg1083, 170
+	%reg1032 = SHUFPSrr %reg1083, %reg1083, 0
+	%reg1035 = SHUFPSrr %reg1083, %reg1083, 255
+	%reg1033 = SHUFPSrr %reg1083, %reg1083, 85
+	%reg1040 = MOV32rr %reg1039
+	%reg1084 = AND32ri8 %reg1039, 15
+	CMP32ri8 %reg1084, 0
+	JE mbb<cond_next204,0xa914d30>
+
+Still ok. After register allocation:
+
+cond_next140 (0xa910740, LLVM BB @0xa90beb0):
+	%EAX = MOV32ri -3
+	%EDX = MOV32rm <fi#3>, 1, %NOREG, 0
+	ADD32rm %EAX<def&use>, %EDX, 1, %NOREG, 0
+	%EDX = MOV32rm <fi#7>, 1, %NOREG, 0
+	%EDX = MOV32rm %EDX, 1, %NOREG, 40
+	IMUL32rr %EAX<def&use>, %EDX
+	%ESI = MOV32rm <fi#5>, 1, %NOREG, 0
+	%ESI = MOV32rm %ESI, 1, %NOREG, 0
+	MOV32mr <fi#4>, 1, %NOREG, 0, %ESI
+	%EAX = LEA32r %ESI, 1, %EAX, -3
+	%ESI = MOV32rm <fi#7>, 1, %NOREG, 0
+	%ESI = MOV32rm %ESI, 1, %NOREG, 32
+	%EDI = MOV32rr %EAX
+	SHL32ri %EDI<def&use>, 4
+	ADD32rr %EDI<def&use>, %ESI
+	%XMM0 = MOVAPSrm %ECX, 1, %NOREG, 0
+	%XMM1 = MOVAPSrr %XMM0
+	SHUFPSrr %XMM1<def&use>, %XMM1, 170
+	%XMM2 = MOVAPSrr %XMM0
+	SHUFPSrr %XMM2<def&use>, %XMM2, 0
+	%XMM3 = MOVAPSrr %XMM0
+	SHUFPSrr %XMM3<def&use>, %XMM3, 255
+	SHUFPSrr %XMM0<def&use>, %XMM0, 85
+	%EBX = MOV32rr %EDI
+	AND32ri8 %EBX<def&use>, 15
+	CMP32ri8 %EBX, 0
+	JE mbb<cond_next204,0xa914d30>
+
+This looks really bad. The problem is shufps is a destructive opcode. Since it
+appears as operand two in more than one shufps ops. It resulted in a number of
+copies. Note icc also suffers from the same problem. Either the instruction
+selector should select pshufd or The register allocator can made the two-address
+to three-address transformation.
+
+It also exposes some other problems. See MOV32ri -3 and the spills.
+
+//===---------------------------------------------------------------------===//
+
+Consider:
+
+__m128 test(float a) {
+  return _mm_set_ps(0.0, 0.0, 0.0, a*a);
+}
+
+This compiles into:
+
+movss 4(%esp), %xmm1
+mulss %xmm1, %xmm1
+xorps %xmm0, %xmm0
+movss %xmm1, %xmm0
+ret
+
+Because mulss doesn't modify the top 3 elements, the top elements of 
+xmm1 are already zero'd.  We could compile this to:
+
+movss 4(%esp), %xmm0
+mulss %xmm0, %xmm0
+ret
+
+//===---------------------------------------------------------------------===//
+
+Here's a sick and twisted idea.  Consider code like this:
+
+__m128 test(__m128 a) {
+  float b = *(float*)&A;
+  ...
+  return _mm_set_ps(0.0, 0.0, 0.0, b);
+}
+
+This might compile to this code:
+
+movaps c(%esp), %xmm1
+xorps %xmm0, %xmm0
+movss %xmm1, %xmm0
+ret
+
+Now consider if the ... code caused xmm1 to get spilled.  This might produce
+this code:
+
+movaps c(%esp), %xmm1
+movaps %xmm1, c2(%esp)
+...
+
+xorps %xmm0, %xmm0
+movaps c2(%esp), %xmm1
+movss %xmm1, %xmm0
+ret
+
+However, since the reload is only used by these instructions, we could 
+"fold" it into the uses, producing something like this:
+
+movaps c(%esp), %xmm1
+movaps %xmm1, c2(%esp)
+...
+
+movss c2(%esp), %xmm0
+ret
+
+... saving two instructions.
+
+The basic idea is that a reload from a spill slot, can, if only one 4-byte 
+chunk is used, bring in 3 zeros the one element instead of 4 elements.
+This can be used to simplify a variety of shuffle operations, where the
+elements are fixed zeros.
+
+//===---------------------------------------------------------------------===//
+
+This code generates ugly code, probably due to costs being off or something:
+
+define void @test(float* %P, <4 x float>* %P2 ) {
+        %xFloat0.688 = load float* %P
+        %tmp = load <4 x float>* %P2
+        %inFloat3.713 = insertelement <4 x float> %tmp, float 0.0, i32 3
+        store <4 x float> %inFloat3.713, <4 x float>* %P2
+        ret void
+}
+
+Generates:
+
+_test:
+	movl	8(%esp), %eax
+	movaps	(%eax), %xmm0
+	pxor	%xmm1, %xmm1
+	movaps	%xmm0, %xmm2
+	shufps	$50, %xmm1, %xmm2
+	shufps	$132, %xmm2, %xmm0
+	movaps	%xmm0, (%eax)
+	ret
+
+Would it be better to generate:
+
+_test:
+        movl 8(%esp), %ecx
+        movaps (%ecx), %xmm0
+	xor %eax, %eax
+        pinsrw $6, %eax, %xmm0
+        pinsrw $7, %eax, %xmm0
+        movaps %xmm0, (%ecx)
+        ret
+
+?
+
+//===---------------------------------------------------------------------===//
+
+Some useful information in the Apple Altivec / SSE Migration Guide:
+
+http://developer.apple.com/documentation/Performance/Conceptual/
+Accelerate_sse_migration/index.html
+
+e.g. SSE select using and, andnot, or. Various SSE compare translations.
+
+//===---------------------------------------------------------------------===//
+
+Add hooks to commute some CMPP operations.
+
+//===---------------------------------------------------------------------===//
+
+Apply the same transformation that merged four float into a single 128-bit load
+to loads from constant pool.
+
+//===---------------------------------------------------------------------===//
+
+Floating point max / min are commutable when -enable-unsafe-fp-path is
+specified. We should turn int_x86_sse_max_ss and X86ISD::FMIN etc. into other
+nodes which are selected to max / min instructions that are marked commutable.
+
+//===---------------------------------------------------------------------===//
+
+We should materialize vector constants like "all ones" and "signbit" with 
+code like:
+
+     cmpeqps xmm1, xmm1   ; xmm1 = all-ones
+
+and:
+     cmpeqps xmm1, xmm1   ; xmm1 = all-ones
+     psrlq   xmm1, 31     ; xmm1 = all 100000000000...
+
+instead of using a load from the constant pool.  The later is important for
+ABS/NEG/copysign etc.
+
+//===---------------------------------------------------------------------===//
+
+These functions:
+
+#include <xmmintrin.h>
+__m128i a;
+void x(unsigned short n) {
+  a = _mm_slli_epi32 (a, n);
+}
+void y(unsigned n) {
+  a = _mm_slli_epi32 (a, n);
+}
+
+compile to ( -O3 -static -fomit-frame-pointer):
+_x:
+        movzwl  4(%esp), %eax
+        movd    %eax, %xmm0
+        movaps  _a, %xmm1
+        pslld   %xmm0, %xmm1
+        movaps  %xmm1, _a
+        ret
+_y:
+        movd    4(%esp), %xmm0
+        movaps  _a, %xmm1
+        pslld   %xmm0, %xmm1
+        movaps  %xmm1, _a
+        ret
+
+"y" looks good, but "x" does silly movzwl stuff around into a GPR.  It seems
+like movd would be sufficient in both cases as the value is already zero 
+extended in the 32-bit stack slot IIRC.  For signed short, it should also be
+save, as a really-signed value would be undefined for pslld.
+
+
+//===---------------------------------------------------------------------===//
+
+#include <math.h>
+int t1(double d) { return signbit(d); }
+
+This currently compiles to:
+	subl	$12, %esp
+	movsd	16(%esp), %xmm0
+	movsd	%xmm0, (%esp)
+	movl	4(%esp), %eax
+	shrl	$31, %eax
+	addl	$12, %esp
+	ret
+
+We should use movmskp{s|d} instead.
+
+//===---------------------------------------------------------------------===//
+
+CodeGen/X86/vec_align.ll tests whether we can turn 4 scalar loads into a single
+(aligned) vector load.  This functionality has a couple of problems.
+
+1. The code to infer alignment from loads of globals is in the X86 backend,
+   not the dag combiner.  This is because dagcombine2 needs to be able to see
+   through the X86ISD::Wrapper node, which DAGCombine can't really do.
+2. The code for turning 4 x load into a single vector load is target 
+   independent and should be moved to the dag combiner.
+3. The code for turning 4 x load into a vector load can only handle a direct 
+   load from a global or a direct load from the stack.  It should be generalized
+   to handle any load from P, P+4, P+8, P+12, where P can be anything.
+4. The alignment inference code cannot handle loads from globals in non-static
+   mode because it doesn't look through the extra dyld stub load.  If you try
+   vec_align.ll without -relocation-model=static, you'll see what I mean.
+
+//===---------------------------------------------------------------------===//
+
+We should lower store(fneg(load p), q) into an integer load+xor+store, which
+eliminates a constant pool load.  For example, consider:
+
+define i64 @ccosf(float %z.0, float %z.1) nounwind readonly  {
+entry:
+ %tmp6 = fsub float -0.000000e+00, %z.1		; <float> [#uses=1]
+ %tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly
+ ret i64 %tmp20
+}
+declare i64 @ccoshf(float %z.0, float %z.1) nounwind readonly
+
+This currently compiles to:
+
+LCPI1_0:					#  <4 x float>
+	.long	2147483648	# float -0
+	.long	2147483648	# float -0
+	.long	2147483648	# float -0
+	.long	2147483648	# float -0
+_ccosf:
+	subl	$12, %esp
+	movss	16(%esp), %xmm0
+	movss	%xmm0, 4(%esp)
+	movss	20(%esp), %xmm0
+	xorps	LCPI1_0, %xmm0
+	movss	%xmm0, (%esp)
+	call	L_ccoshf$stub
+	addl	$12, %esp
+	ret
+
+Note the load into xmm0, then xor (to negate), then store.  In PIC mode,
+this code computes the pic base and does two loads to do the constant pool 
+load, so the improvement is much bigger.
+
+The tricky part about this xform is that the argument load/store isn't exposed
+until post-legalize, and at that point, the fneg has been custom expanded into 
+an X86 fxor.  This means that we need to handle this case in the x86 backend
+instead of in target independent code.
+
+//===---------------------------------------------------------------------===//
+
+Non-SSE4 insert into 16 x i8 is atrociously bad.
+
+//===---------------------------------------------------------------------===//
+
+<2 x i64> extract is substantially worse than <2 x f64>, even if the destination
+is memory.
+
+//===---------------------------------------------------------------------===//
+
+SSE4 extract-to-mem ops aren't being pattern matched because of the AssertZext
+sitting between the truncate and the extract.
+
+//===---------------------------------------------------------------------===//
+
+INSERTPS can match any insert (extract, imm1), imm2 for 4 x float, and insert
+any number of 0.0 simultaneously.  Currently we only use it for simple
+insertions.
+
+See comments in LowerINSERT_VECTOR_ELT_SSE4.
+
+//===---------------------------------------------------------------------===//
+
+On a random note, SSE2 should declare insert/extract of 2 x f64 as legal, not
+Custom.  All combinations of insert/extract reg-reg, reg-mem, and mem-reg are
+legal, it'll just take a few extra patterns written in the .td file.
+
+Note: this is not a code quality issue; the custom lowered code happens to be
+right, but we shouldn't have to custom lower anything.  This is probably related
+to <2 x i64> ops being so bad.
+
+//===---------------------------------------------------------------------===//
+
+'select' on vectors and scalars could be a whole lot better.  We currently 
+lower them to conditional branches.  On x86-64 for example, we compile this:
+
+double test(double a, double b, double c, double d) { return a<b ? c : d; }
+
+to:
+
+_test:
+	ucomisd	%xmm0, %xmm1
+	ja	LBB1_2	# entry
+LBB1_1:	# entry
+	movapd	%xmm3, %xmm2
+LBB1_2:	# entry
+	movapd	%xmm2, %xmm0
+	ret
+
+instead of:
+
+_test:
+	cmpltsd	%xmm1, %xmm0
+	andpd	%xmm0, %xmm2
+	andnpd	%xmm3, %xmm0
+	orpd	%xmm2, %xmm0
+	ret
+
+For unpredictable branches, the later is much more efficient.  This should
+just be a matter of having scalar sse map to SELECT_CC and custom expanding
+or iseling it.
+
+//===---------------------------------------------------------------------===//
+
+LLVM currently generates stack realignment code, when it is not necessary
+needed. The problem is that we need to know about stack alignment too early,
+before RA runs.
+
+At that point we don't know, whether there will be vector spill, or not.
+Stack realignment logic is overly conservative here, but otherwise we can
+produce unaligned loads/stores.
+
+Fixing this will require some huge RA changes.
+
+Testcase:
+#include <emmintrin.h>
+
+typedef short vSInt16 __attribute__ ((__vector_size__ (16)));
+
+static const vSInt16 a = {- 22725, - 12873, - 22725, - 12873, - 22725, - 12873,
+- 22725, - 12873};;
+
+vSInt16 madd(vSInt16 b)
+{
+    return _mm_madd_epi16(a, b);
+}
+
+Generated code (x86-32, linux):
+madd:
+        pushl   %ebp
+        movl    %esp, %ebp
+        andl    $-16, %esp
+        movaps  .LCPI1_0, %xmm1
+        pmaddwd %xmm1, %xmm0
+        movl    %ebp, %esp
+        popl    %ebp
+        ret
+
+//===---------------------------------------------------------------------===//
+
+Consider:
+#include <emmintrin.h> 
+__m128 foo2 (float x) {
+ return _mm_set_ps (0, 0, x, 0);
+}
+
+In x86-32 mode, we generate this spiffy code:
+
+_foo2:
+	movss	4(%esp), %xmm0
+	pshufd	$81, %xmm0, %xmm0
+	ret
+
+in x86-64 mode, we generate this code, which could be better:
+
+_foo2:
+	xorps	%xmm1, %xmm1
+	movss	%xmm0, %xmm1
+	pshufd	$81, %xmm1, %xmm0
+	ret
+
+In sse4 mode, we could use insertps to make both better.
+
+Here's another testcase that could use insertps [mem]:
+
+#include <xmmintrin.h>
+extern float x2, x3;
+__m128 foo1 (float x1, float x4) {
+ return _mm_set_ps (x2, x1, x3, x4);
+}
+
+gcc mainline compiles it to:
+
+foo1:
+       insertps        $0x10, x2(%rip), %xmm0
+       insertps        $0x10, x3(%rip), %xmm1
+       movaps  %xmm1, %xmm2
+       movlhps %xmm0, %xmm2
+       movaps  %xmm2, %xmm0
+       ret
+
+//===---------------------------------------------------------------------===//
+
+We compile vector multiply-by-constant into poor code:
+
+define <4 x i32> @f(<4 x i32> %i) nounwind  {
+	%A = mul <4 x i32> %i, < i32 10, i32 10, i32 10, i32 10 >
+	ret <4 x i32> %A
+}
+
+On targets without SSE4.1, this compiles into:
+
+LCPI1_0:					##  <4 x i32>
+	.long	10
+	.long	10
+	.long	10
+	.long	10
+	.text
+	.align	4,0x90
+	.globl	_f
+_f:
+	pshufd	$3, %xmm0, %xmm1
+	movd	%xmm1, %eax
+	imull	LCPI1_0+12, %eax
+	movd	%eax, %xmm1
+	pshufd	$1, %xmm0, %xmm2
+	movd	%xmm2, %eax
+	imull	LCPI1_0+4, %eax
+	movd	%eax, %xmm2
+	punpckldq	%xmm1, %xmm2
+	movd	%xmm0, %eax
+	imull	LCPI1_0, %eax
+	movd	%eax, %xmm1
+	movhlps	%xmm0, %xmm0
+	movd	%xmm0, %eax
+	imull	LCPI1_0+8, %eax
+	movd	%eax, %xmm0
+	punpckldq	%xmm0, %xmm1
+	movaps	%xmm1, %xmm0
+	punpckldq	%xmm2, %xmm0
+	ret
+
+It would be better to synthesize integer vector multiplication by constants
+using shifts and adds, pslld and paddd here. And even on targets with SSE4.1,
+simple cases such as multiplication by powers of two would be better as
+vector shifts than as multiplications.
+
+//===---------------------------------------------------------------------===//
+
+We compile this:
+
+__m128i
+foo2 (char x)
+{
+  return _mm_set_epi8 (1, 0, 0, 0, 0, 0, 0, 0, 0, x, 0, 1, 0, 0, 0, 0);
+}
+
+into:
+	movl	$1, %eax
+	xorps	%xmm0, %xmm0
+	pinsrw	$2, %eax, %xmm0
+	movzbl	4(%esp), %eax
+	pinsrw	$3, %eax, %xmm0
+	movl	$256, %eax
+	pinsrw	$7, %eax, %xmm0
+	ret
+
+
+gcc-4.2:
+	subl	$12, %esp
+	movzbl	16(%esp), %eax
+	movdqa	LC0, %xmm0
+	pinsrw	$3, %eax, %xmm0
+	addl	$12, %esp
+	ret
+	.const
+	.align 4
+LC0:
+	.word	0
+	.word	0
+	.word	1
+	.word	0
+	.word	0
+	.word	0
+	.word	0
+	.word	256
+
+With SSE4, it should be
+      movdqa  .LC0(%rip), %xmm0
+      pinsrb  $6, %edi, %xmm0
+
+//===---------------------------------------------------------------------===//
+
+We should transform a shuffle of two vectors of constants into a single vector
+of constants. Also, insertelement of a constant into a vector of constants
+should also result in a vector of constants. e.g. 2008-06-25-VecISelBug.ll.
+
+We compiled it to something horrible:
+
+	.align	4
+LCPI1_1:					##  float
+	.long	1065353216	## float 1
+	.const
+
+	.align	4
+LCPI1_0:					##  <4 x float>
+	.space	4
+	.long	1065353216	## float 1
+	.space	4
+	.long	1065353216	## float 1
+	.text
+	.align	4,0x90
+	.globl	_t
+_t:
+	xorps	%xmm0, %xmm0
+	movhps	LCPI1_0, %xmm0
+	movss	LCPI1_1, %xmm1
+	movaps	%xmm0, %xmm2
+	shufps	$2, %xmm1, %xmm2
+	shufps	$132, %xmm2, %xmm0
+	movaps	%xmm0, 0
+
+//===---------------------------------------------------------------------===//
+rdar://5907648
+
+This function:
+
+float foo(unsigned char x) {
+  return x;
+}
+
+compiles to (x86-32):
+
+define float @foo(i8 zeroext  %x) nounwind  {
+	%tmp12 = uitofp i8 %x to float		; <float> [#uses=1]
+	ret float %tmp12
+}
+
+compiles to:
+
+_foo:
+	subl	$4, %esp
+	movzbl	8(%esp), %eax
+	cvtsi2ss	%eax, %xmm0
+	movss	%xmm0, (%esp)
+	flds	(%esp)
+	addl	$4, %esp
+	ret
+
+We should be able to use:
+  cvtsi2ss 8($esp), %xmm0
+since we know the stack slot is already zext'd.
+
+//===---------------------------------------------------------------------===//
+
+Consider using movlps instead of movsd to implement (scalar_to_vector (loadf64))
+when code size is critical. movlps is slower than movsd on core2 but it's one
+byte shorter.
+
+//===---------------------------------------------------------------------===//
+
+We should use a dynamic programming based approach to tell when using FPStack
+operations is cheaper than SSE.  SciMark montecarlo contains code like this
+for example:
+
+double MonteCarlo_num_flops(int Num_samples) {
+    return ((double) Num_samples)* 4.0;
+}
+
+In fpstack mode, this compiles into:
+
+LCPI1_0:					
+	.long	1082130432	## float 4.000000e+00
+_MonteCarlo_num_flops:
+	subl	$4, %esp
+	movl	8(%esp), %eax
+	movl	%eax, (%esp)
+	fildl	(%esp)
+	fmuls	LCPI1_0
+	addl	$4, %esp
+	ret
+        
+in SSE mode, it compiles into significantly slower code:
+
+_MonteCarlo_num_flops:
+	subl	$12, %esp
+	cvtsi2sd	16(%esp), %xmm0
+	mulsd	LCPI1_0, %xmm0
+	movsd	%xmm0, (%esp)
+	fldl	(%esp)
+	addl	$12, %esp
+	ret
+
+There are also other cases in scimark where using fpstack is better, it is
+cheaper to do fld1 than load from a constant pool for example, so
+"load, add 1.0, store" is better done in the fp stack, etc.
+
+//===---------------------------------------------------------------------===//
+
+The X86 backend should be able to if-convert SSE comparisons like "ucomisd" to
+"cmpsd".  For example, this code:
+
+double d1(double x) { return x == x ? x : x + x; }
+
+Compiles into:
+
+_d1:
+	ucomisd	%xmm0, %xmm0
+	jnp	LBB1_2
+	addsd	%xmm0, %xmm0
+	ret
+LBB1_2:
+	ret
+
+Also, the 'ret's should be shared.  This is PR6032.
+
+//===---------------------------------------------------------------------===//
+
+These should compile into the same code (PR6214): Perhaps instcombine should
+canonicalize the former into the later?
+
+define float @foo(float %x) nounwind {
+  %t = bitcast float %x to i32
+  %s = and i32 %t, 2147483647
+  %d = bitcast i32 %s to float
+  ret float %d
+}
+
+declare float @fabsf(float %n)
+define float @bar(float %x) nounwind {
+  %d = call float @fabsf(float %x)
+  ret float %d
+}
+
+//===---------------------------------------------------------------------===//
+
+This IR (from PR6194):
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%0 = type { double, double }
+%struct.float3 = type { float, float, float }
+
+define void @test(%0, %struct.float3* nocapture %res) nounwind noinline ssp {
+entry:
+  %tmp18 = extractvalue %0 %0, 0                  ; <double> [#uses=1]
+  %tmp19 = bitcast double %tmp18 to i64           ; <i64> [#uses=1]
+  %tmp20 = zext i64 %tmp19 to i128                ; <i128> [#uses=1]
+  %tmp10 = lshr i128 %tmp20, 32                   ; <i128> [#uses=1]
+  %tmp11 = trunc i128 %tmp10 to i32               ; <i32> [#uses=1]
+  %tmp12 = bitcast i32 %tmp11 to float            ; <float> [#uses=1]
+  %tmp5 = getelementptr inbounds %struct.float3* %res, i64 0, i32 1 ; <float*> [#uses=1]
+  store float %tmp12, float* %tmp5
+  ret void
+}
+
+Compiles to:
+
+_test:                                  ## @test
+	movd	%xmm0, %rax
+	shrq	$32, %rax
+	movl	%eax, 4(%rdi)
+	ret
+
+This would be better kept in the SSE unit by treating XMM0 as a 4xfloat and
+doing a shuffle from v[1] to v[0] then a float store.
+
+//===---------------------------------------------------------------------===//
+
+On SSE4 machines, we compile this code:
+
+define <2 x float> @test2(<2 x float> %Q, <2 x float> %R,
+       <2 x float> *%P) nounwind {
+  %Z = fadd <2 x float> %Q, %R
+
+  store <2 x float> %Z, <2 x float> *%P
+  ret <2 x float> %Z
+}
+
+into:
+
+_test2:                                 ## @test2
+## BB#0:
+	insertps	$0, %xmm2, %xmm2
+	insertps	$16, %xmm3, %xmm2
+	insertps	$0, %xmm0, %xmm3
+	insertps	$16, %xmm1, %xmm3
+	addps	%xmm2, %xmm3
+	movq	%xmm3, (%rdi)
+	movaps	%xmm3, %xmm0
+	pshufd	$1, %xmm3, %xmm1
+                                        ## kill: XMM1<def> XMM1<kill>
+	ret
+
+The insertps's of $0 are pointless complex copies.
+
+//===---------------------------------------------------------------------===//
+
+If SSE4.1 is available we should inline rounding functions instead of emitting
+a libcall.
+
+floor: roundsd $0x01, %xmm, %xmm
+ceil:  roundsd $0x02, %xmm, %xmm
+
+and likewise for the single precision versions.
+
+Currently, SelectionDAGBuilder doesn't turn calls to these functions into the
+corresponding nodes and some targets (including X86) aren't ready for them.
+
+//===---------------------------------------------------------------------===//
diff --git a/final/lib/Target/X86/README-UNIMPLEMENTED.txt b/final/lib/Target/X86/README-UNIMPLEMENTED.txt
new file mode 100644
index 00000000000..c26c75ab951
--- /dev/null
+++ b/final/lib/Target/X86/README-UNIMPLEMENTED.txt
@@ -0,0 +1,14 @@
+//===---------------------------------------------------------------------===//
+// Testcases that crash the X86 backend because they aren't implemented
+//===---------------------------------------------------------------------===//
+
+These are cases we know the X86 backend doesn't handle.  Patches are welcome
+and appreciated, because no one has signed up to implemented these yet.
+Implementing these would allow elimination of the corresponding intrinsics,
+which would be great.
+
+1) vector shifts
+2) vector comparisons
+3) vector fp<->int conversions: PR2683, PR2684, PR2685, PR2686, PR2688
+4) bitcasts from vectors to scalars: PR2804
+5) llvm.atomic.cmp.swap.i128.p0i128: PR3462
diff --git a/final/lib/Target/X86/README-X86-64.txt b/final/lib/Target/X86/README-X86-64.txt
new file mode 100644
index 00000000000..e21d69a7bcb
--- /dev/null
+++ b/final/lib/Target/X86/README-X86-64.txt
@@ -0,0 +1,229 @@
+//===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===//
+
+AMD64 Optimization Manual 8.2 has some nice information about optimizing integer
+multiplication by a constant. How much of it applies to Intel's X86-64
+implementation? There are definite trade-offs to consider: latency vs. register
+pressure vs. code size.
+
+//===---------------------------------------------------------------------===//
+
+Are we better off using branches instead of cmove to implement FP to
+unsigned i64?
+
+_conv:
+	ucomiss	LC0(%rip), %xmm0
+	cvttss2siq	%xmm0, %rdx
+	jb	L3
+	subss	LC0(%rip), %xmm0
+	movabsq	$-9223372036854775808, %rax
+	cvttss2siq	%xmm0, %rdx
+	xorq	%rax, %rdx
+L3:
+	movq	%rdx, %rax
+	ret
+
+instead of
+
+_conv:
+	movss LCPI1_0(%rip), %xmm1
+	cvttss2siq %xmm0, %rcx
+	movaps %xmm0, %xmm2
+	subss %xmm1, %xmm2
+	cvttss2siq %xmm2, %rax
+	movabsq $-9223372036854775808, %rdx
+	xorq %rdx, %rax
+	ucomiss %xmm1, %xmm0
+	cmovb %rcx, %rax
+	ret
+
+Seems like the jb branch has high likelyhood of being taken. It would have
+saved a few instructions.
+
+//===---------------------------------------------------------------------===//
+
+It's not possible to reference AH, BH, CH, and DH registers in an instruction
+requiring REX prefix. However, divb and mulb both produce results in AH. If isel
+emits a CopyFromReg which gets turned into a movb and that can be allocated a
+r8b - r15b.
+
+To get around this, isel emits a CopyFromReg from AX and then right shift it
+down by 8 and truncate it. It's not pretty but it works. We need some register
+allocation magic to make the hack go away (e.g. putting additional constraints
+on the result of the movb).
+
+//===---------------------------------------------------------------------===//
+
+The x86-64 ABI for hidden-argument struct returns requires that the
+incoming value of %rdi be copied into %rax by the callee upon return.
+
+The idea is that it saves callers from having to remember this value,
+which would often require a callee-saved register. Callees usually
+need to keep this value live for most of their body anyway, so it
+doesn't add a significant burden on them.
+
+We currently implement this in codegen, however this is suboptimal
+because it means that it would be quite awkward to implement the
+optimization for callers.
+
+A better implementation would be to relax the LLVM IR rules for sret
+arguments to allow a function with an sret argument to have a non-void
+return type, and to have the front-end to set up the sret argument value
+as the return value of the function. The front-end could more easily
+emit uses of the returned struct value to be in terms of the function's
+lowered return value, and it would free non-C frontends from a
+complication only required by a C-based ABI.
+
+//===---------------------------------------------------------------------===//
+
+We get a redundant zero extension for code like this:
+
+int mask[1000];
+int foo(unsigned x) {
+ if (x < 10)
+   x = x * 45;
+ else
+   x = x * 78;
+ return mask[x];
+}
+
+_foo:
+LBB1_0:	## entry
+	cmpl	$9, %edi
+	jbe	LBB1_3	## bb
+LBB1_1:	## bb1
+	imull	$78, %edi, %eax
+LBB1_2:	## bb2
+	movl	%eax, %eax                    <----
+	movq	_mask@GOTPCREL(%rip), %rcx
+	movl	(%rcx,%rax,4), %eax
+	ret
+LBB1_3:	## bb
+	imull	$45, %edi, %eax
+	jmp	LBB1_2	## bb2
+  
+Before regalloc, we have:
+
+        %reg1025<def> = IMUL32rri8 %reg1024, 45, %EFLAGS<imp-def>
+        JMP mbb<bb2,0x203afb0>
+    Successors according to CFG: 0x203afb0 (#3)
+
+bb1: 0x203af60, LLVM BB @0x1e02310, ID#2:
+    Predecessors according to CFG: 0x203aec0 (#0)
+        %reg1026<def> = IMUL32rri8 %reg1024, 78, %EFLAGS<imp-def>
+    Successors according to CFG: 0x203afb0 (#3)
+
+bb2: 0x203afb0, LLVM BB @0x1e02340, ID#3:
+    Predecessors according to CFG: 0x203af10 (#1) 0x203af60 (#2)
+        %reg1027<def> = PHI %reg1025, mbb<bb,0x203af10>,
+                            %reg1026, mbb<bb1,0x203af60>
+        %reg1029<def> = MOVZX64rr32 %reg1027
+
+so we'd have to know that IMUL32rri8 leaves the high word zero extended and to
+be able to recognize the zero extend.  This could also presumably be implemented
+if we have whole-function selectiondags.
+
+//===---------------------------------------------------------------------===//
+
+Take the following C code
+(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43640):
+
+struct u1
+{
+        float x;
+        float y;
+};
+
+float foo(struct u1 u)
+{
+        return u.x + u.y;
+}
+
+Optimizes to the following IR:
+define float @foo(double %u.0) nounwind readnone {
+entry:
+  %tmp8 = bitcast double %u.0 to i64              ; <i64> [#uses=2]
+  %tmp6 = trunc i64 %tmp8 to i32                  ; <i32> [#uses=1]
+  %tmp7 = bitcast i32 %tmp6 to float              ; <float> [#uses=1]
+  %tmp2 = lshr i64 %tmp8, 32                      ; <i64> [#uses=1]
+  %tmp3 = trunc i64 %tmp2 to i32                  ; <i32> [#uses=1]
+  %tmp4 = bitcast i32 %tmp3 to float              ; <float> [#uses=1]
+  %0 = fadd float %tmp7, %tmp4                    ; <float> [#uses=1]
+  ret float %0
+}
+
+And current llvm-gcc/clang output:
+	movd	%xmm0, %rax
+	movd	%eax, %xmm1
+	shrq	$32, %rax
+	movd	%eax, %xmm0
+	addss	%xmm1, %xmm0
+	ret
+
+We really shouldn't move the floats to RAX, only to immediately move them
+straight back to the XMM registers.
+
+There really isn't any good way to handle this purely in IR optimizers; it
+could possibly be handled by changing the output of the fronted, though.  It
+would also be feasible to add a x86-specific DAGCombine to optimize the
+bitcast+trunc+(lshr+)bitcast combination.
+
+//===---------------------------------------------------------------------===//
+
+Take the following code
+(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34653):
+extern unsigned long table[];
+unsigned long foo(unsigned char *p) {
+  unsigned long tag = *p;
+  return table[tag >> 4] + table[tag & 0xf];
+}
+
+Current code generated:
+	movzbl	(%rdi), %eax
+	movq	%rax, %rcx
+	andq	$240, %rcx
+	shrq	%rcx
+	andq	$15, %rax
+	movq	table(,%rax,8), %rax
+	addq	table(%rcx), %rax
+	ret
+
+Issues:
+1. First movq should be movl; saves a byte.
+2. Both andq's should be andl; saves another two bytes.  I think this was
+   implemented at one point, but subsequently regressed.
+3. shrq should be shrl; saves another byte.
+4. The first andq can be completely eliminated by using a slightly more
+   expensive addressing mode.
+
+//===---------------------------------------------------------------------===//
+
+Consider the following (contrived testcase, but contains common factors):
+
+#include <stdarg.h>
+int test(int x, ...) {
+  int sum, i;
+  va_list l;
+  va_start(l, x);
+  for (i = 0; i < x; i++)
+    sum += va_arg(l, int);
+  va_end(l);
+  return sum;
+}
+
+Testcase given in C because fixing it will likely involve changing the IR
+generated for it.  The primary issue with the result is that it doesn't do any
+of the optimizations which are possible if we know the address of a va_list
+in the current function is never taken:
+1. We shouldn't spill the XMM registers because we only call va_arg with "int".
+2. It would be nice if we could scalarrepl the va_list.
+3. Probably overkill, but it'd be cool if we could peel off the first five
+iterations of the loop.
+
+Other optimizations involving functions which use va_arg on floats which don't
+have the address of a va_list taken:
+1. Conversely to the above, we shouldn't spill general registers if we only
+   call va_arg on "double".
+2. If we know nothing more than 64 bits wide is read from the XMM registers,
+   we can change the spilling code to reduce the amount of stack used by half.
+
+//===---------------------------------------------------------------------===//
diff --git a/final/lib/Target/X86/README.txt b/final/lib/Target/X86/README.txt
new file mode 100644
index 00000000000..abd1515cf5d
--- /dev/null
+++ b/final/lib/Target/X86/README.txt
@@ -0,0 +1,1949 @@
+//===---------------------------------------------------------------------===//
+// Random ideas for the X86 backend.
+//===---------------------------------------------------------------------===//
+
+We should add support for the "movbe" instruction, which does a byte-swapping
+copy (3-addr bswap + memory support?)  This is available on Atom processors.
+
+//===---------------------------------------------------------------------===//
+
+CodeGen/X86/lea-3.ll:test3 should be a single LEA, not a shift/move.  The X86
+backend knows how to three-addressify this shift, but it appears the register
+allocator isn't even asking it to do so in this case.  We should investigate
+why this isn't happening, it could have significant impact on other important
+cases for X86 as well.
+
+//===---------------------------------------------------------------------===//
+
+This should be one DIV/IDIV instruction, not a libcall:
+
+unsigned test(unsigned long long X, unsigned Y) {
+        return X/Y;
+}
+
+This can be done trivially with a custom legalizer.  What about overflow 
+though?  http://gcc.gnu.org/bugzilla/show_bug.cgi?id=14224
+
+//===---------------------------------------------------------------------===//
+
+Improvements to the multiply -> shift/add algorithm:
+http://gcc.gnu.org/ml/gcc-patches/2004-08/msg01590.html
+
+//===---------------------------------------------------------------------===//
+
+Improve code like this (occurs fairly frequently, e.g. in LLVM):
+long long foo(int x) { return 1LL << x; }
+
+http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01109.html
+http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01128.html
+http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01136.html
+
+Another useful one would be  ~0ULL >> X and ~0ULL << X.
+
+One better solution for 1LL << x is:
+        xorl    %eax, %eax
+        xorl    %edx, %edx
+        testb   $32, %cl
+        sete    %al
+        setne   %dl
+        sall    %cl, %eax
+        sall    %cl, %edx
+
+But that requires good 8-bit subreg support.
+
+Also, this might be better.  It's an extra shift, but it's one instruction
+shorter, and doesn't stress 8-bit subreg support.
+(From http://gcc.gnu.org/ml/gcc-patches/2004-09/msg01148.html,
+but without the unnecessary and.)
+        movl %ecx, %eax
+        shrl $5, %eax
+        movl %eax, %edx
+        xorl $1, %edx
+        sall %cl, %eax
+        sall %cl. %edx
+
+64-bit shifts (in general) expand to really bad code.  Instead of using
+cmovs, we should expand to a conditional branch like GCC produces.
+
+//===---------------------------------------------------------------------===//
+
+Some isel ideas:
+
+1. Dynamic programming based approach when compile time if not an
+   issue.
+2. Code duplication (addressing mode) during isel.
+3. Other ideas from "Register-Sensitive Selection, Duplication, and
+   Sequencing of Instructions".
+4. Scheduling for reduced register pressure.  E.g. "Minimum Register 
+   Instruction Sequence Problem: Revisiting Optimal Code Generation for DAGs" 
+   and other related papers.
+   http://citeseer.ist.psu.edu/govindarajan01minimum.html
+
+//===---------------------------------------------------------------------===//
+
+Should we promote i16 to i32 to avoid partial register update stalls?
+
+//===---------------------------------------------------------------------===//
+
+Leave any_extend as pseudo instruction and hint to register
+allocator. Delay codegen until post register allocation.
+Note. any_extend is now turned into an INSERT_SUBREG. We still need to teach
+the coalescer how to deal with it though.
+
+//===---------------------------------------------------------------------===//
+
+It appears icc use push for parameter passing. Need to investigate.
+
+//===---------------------------------------------------------------------===//
+
+This:
+
+void foo(void);
+void bar(int x, int *P) { 
+  x >>= 2;
+  if (x) 
+    foo();
+  *P = x;
+}
+
+compiles into:
+
+	movq	%rsi, %rbx
+	movl	%edi, %r14d
+	sarl	$2, %r14d
+	testl	%r14d, %r14d
+	je	LBB0_2
+
+Instead of doing an explicit test, we can use the flags off the sar.  This
+occurs in a bigger testcase like this, which is pretty common:
+
+#include <vector>
+int test1(std::vector<int> &X) {
+  int Sum = 0;
+  for (long i = 0, e = X.size(); i != e; ++i)
+    X[i] = 0;
+  return Sum;
+}
+
+//===---------------------------------------------------------------------===//
+
+Only use inc/neg/not instructions on processors where they are faster than
+add/sub/xor.  They are slower on the P4 due to only updating some processor
+flags.
+
+//===---------------------------------------------------------------------===//
+
+The instruction selector sometimes misses folding a load into a compare.  The
+pattern is written as (cmp reg, (load p)).  Because the compare isn't 
+commutative, it is not matched with the load on both sides.  The dag combiner
+should be made smart enough to cannonicalize the load into the RHS of a compare
+when it can invert the result of the compare for free.
+
+//===---------------------------------------------------------------------===//
+
+In many cases, LLVM generates code like this:
+
+_test:
+        movl 8(%esp), %eax
+        cmpl %eax, 4(%esp)
+        setl %al
+        movzbl %al, %eax
+        ret
+
+on some processors (which ones?), it is more efficient to do this:
+
+_test:
+        movl 8(%esp), %ebx
+        xor  %eax, %eax
+        cmpl %ebx, 4(%esp)
+        setl %al
+        ret
+
+Doing this correctly is tricky though, as the xor clobbers the flags.
+
+//===---------------------------------------------------------------------===//
+
+We should generate bts/btr/etc instructions on targets where they are cheap or
+when codesize is important.  e.g., for:
+
+void setbit(int *target, int bit) {
+    *target |= (1 << bit);
+}
+void clearbit(int *target, int bit) {
+    *target &= ~(1 << bit);
+}
+
+//===---------------------------------------------------------------------===//
+
+Instead of the following for memset char*, 1, 10:
+
+	movl $16843009, 4(%edx)
+	movl $16843009, (%edx)
+	movw $257, 8(%edx)
+
+It might be better to generate
+
+	movl $16843009, %eax
+	movl %eax, 4(%edx)
+	movl %eax, (%edx)
+	movw al, 8(%edx)
+	
+when we can spare a register. It reduces code size.
+
+//===---------------------------------------------------------------------===//
+
+Evaluate what the best way to codegen sdiv X, (2^C) is.  For X/8, we currently
+get this:
+
+define i32 @test1(i32 %X) {
+    %Y = sdiv i32 %X, 8
+    ret i32 %Y
+}
+
+_test1:
+        movl 4(%esp), %eax
+        movl %eax, %ecx
+        sarl $31, %ecx
+        shrl $29, %ecx
+        addl %ecx, %eax
+        sarl $3, %eax
+        ret
+
+GCC knows several different ways to codegen it, one of which is this:
+
+_test1:
+        movl    4(%esp), %eax
+        cmpl    $-1, %eax
+        leal    7(%eax), %ecx
+        cmovle  %ecx, %eax
+        sarl    $3, %eax
+        ret
+
+which is probably slower, but it's interesting at least :)
+
+//===---------------------------------------------------------------------===//
+
+We are currently lowering large (1MB+) memmove/memcpy to rep/stosl and rep/movsl
+We should leave these as libcalls for everything over a much lower threshold,
+since libc is hand tuned for medium and large mem ops (avoiding RFO for large
+stores, TLB preheating, etc)
+
+//===---------------------------------------------------------------------===//
+
+Optimize this into something reasonable:
+ x * copysign(1.0, y) * copysign(1.0, z)
+
+//===---------------------------------------------------------------------===//
+
+Optimize copysign(x, *y) to use an integer load from y.
+
+//===---------------------------------------------------------------------===//
+
+The following tests perform worse with LSR:
+
+lambda, siod, optimizer-eval, ackermann, hash2, nestedloop, strcat, and Treesor.
+
+//===---------------------------------------------------------------------===//
+
+Adding to the list of cmp / test poor codegen issues:
+
+int test(__m128 *A, __m128 *B) {
+  if (_mm_comige_ss(*A, *B))
+    return 3;
+  else
+    return 4;
+}
+
+_test:
+	movl 8(%esp), %eax
+	movaps (%eax), %xmm0
+	movl 4(%esp), %eax
+	movaps (%eax), %xmm1
+	comiss %xmm0, %xmm1
+	setae %al
+	movzbl %al, %ecx
+	movl $3, %eax
+	movl $4, %edx
+	cmpl $0, %ecx
+	cmove %edx, %eax
+	ret
+
+Note the setae, movzbl, cmpl, cmove can be replaced with a single cmovae. There
+are a number of issues. 1) We are introducing a setcc between the result of the
+intrisic call and select. 2) The intrinsic is expected to produce a i32 value
+so a any extend (which becomes a zero extend) is added.
+
+We probably need some kind of target DAG combine hook to fix this.
+
+//===---------------------------------------------------------------------===//
+
+We generate significantly worse code for this than GCC:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=21150
+http://gcc.gnu.org/bugzilla/attachment.cgi?id=8701
+
+There is also one case we do worse on PPC.
+
+//===---------------------------------------------------------------------===//
+
+For this:
+
+int test(int a)
+{
+  return a * 3;
+}
+
+We currently emits
+	imull $3, 4(%esp), %eax
+
+Perhaps this is what we really should generate is? Is imull three or four
+cycles? Note: ICC generates this:
+	movl	4(%esp), %eax
+	leal	(%eax,%eax,2), %eax
+
+The current instruction priority is based on pattern complexity. The former is
+more "complex" because it folds a load so the latter will not be emitted.
+
+Perhaps we should use AddedComplexity to give LEA32r a higher priority? We
+should always try to match LEA first since the LEA matching code does some
+estimate to determine whether the match is profitable.
+
+However, if we care more about code size, then imull is better. It's two bytes
+shorter than movl + leal.
+
+On a Pentium M, both variants have the same characteristics with regard
+to throughput; however, the multiplication has a latency of four cycles, as
+opposed to two cycles for the movl+lea variant.
+
+//===---------------------------------------------------------------------===//
+
+__builtin_ffs codegen is messy.
+
+int ffs_(unsigned X) { return __builtin_ffs(X); }
+
+llvm produces:
+ffs_:
+        movl    4(%esp), %ecx
+        bsfl    %ecx, %eax
+        movl    $32, %edx
+        cmove   %edx, %eax
+        incl    %eax
+        xorl    %edx, %edx
+        testl   %ecx, %ecx
+        cmove   %edx, %eax
+        ret
+
+vs gcc:
+
+_ffs_:
+        movl    $-1, %edx
+        bsfl    4(%esp), %eax
+        cmove   %edx, %eax
+        addl    $1, %eax
+        ret
+
+Another example of __builtin_ffs (use predsimplify to eliminate a select):
+
+int foo (unsigned long j) {
+  if (j)
+    return __builtin_ffs (j) - 1;
+  else
+    return 0;
+}
+
+//===---------------------------------------------------------------------===//
+
+It appears gcc place string data with linkonce linkage in
+.section __TEXT,__const_coal,coalesced instead of
+.section __DATA,__const_coal,coalesced.
+Take a look at darwin.h, there are other Darwin assembler directives that we
+do not make use of.
+
+//===---------------------------------------------------------------------===//
+
+define i32 @foo(i32* %a, i32 %t) {
+entry:
+	br label %cond_true
+
+cond_true:		; preds = %cond_true, %entry
+	%x.0.0 = phi i32 [ 0, %entry ], [ %tmp9, %cond_true ]		; <i32> [#uses=3]
+	%t_addr.0.0 = phi i32 [ %t, %entry ], [ %tmp7, %cond_true ]		; <i32> [#uses=1]
+	%tmp2 = getelementptr i32* %a, i32 %x.0.0		; <i32*> [#uses=1]
+	%tmp3 = load i32* %tmp2		; <i32> [#uses=1]
+	%tmp5 = add i32 %t_addr.0.0, %x.0.0		; <i32> [#uses=1]
+	%tmp7 = add i32 %tmp5, %tmp3		; <i32> [#uses=2]
+	%tmp9 = add i32 %x.0.0, 1		; <i32> [#uses=2]
+	%tmp = icmp sgt i32 %tmp9, 39		; <i1> [#uses=1]
+	br i1 %tmp, label %bb12, label %cond_true
+
+bb12:		; preds = %cond_true
+	ret i32 %tmp7
+}
+is pessimized by -loop-reduce and -indvars
+
+//===---------------------------------------------------------------------===//
+
+u32 to float conversion improvement:
+
+float uint32_2_float( unsigned u ) {
+  float fl = (int) (u & 0xffff);
+  float fh = (int) (u >> 16);
+  fh *= 0x1.0p16f;
+  return fh + fl;
+}
+
+00000000        subl    $0x04,%esp
+00000003        movl    0x08(%esp,1),%eax
+00000007        movl    %eax,%ecx
+00000009        shrl    $0x10,%ecx
+0000000c        cvtsi2ss        %ecx,%xmm0
+00000010        andl    $0x0000ffff,%eax
+00000015        cvtsi2ss        %eax,%xmm1
+00000019        mulss   0x00000078,%xmm0
+00000021        addss   %xmm1,%xmm0
+00000025        movss   %xmm0,(%esp,1)
+0000002a        flds    (%esp,1)
+0000002d        addl    $0x04,%esp
+00000030        ret
+
+//===---------------------------------------------------------------------===//
+
+When using fastcc abi, align stack slot of argument of type double on 8 byte
+boundary to improve performance.
+
+//===---------------------------------------------------------------------===//
+
+GCC's ix86_expand_int_movcc function (in i386.c) has a ton of interesting
+simplifications for integer "x cmp y ? a : b".
+
+//===---------------------------------------------------------------------===//
+
+Consider the expansion of:
+
+define i32 @test3(i32 %X) {
+        %tmp1 = urem i32 %X, 255
+        ret i32 %tmp1
+}
+
+Currently it compiles to:
+
+...
+        movl $2155905153, %ecx
+        movl 8(%esp), %esi
+        movl %esi, %eax
+        mull %ecx
+...
+
+This could be "reassociated" into:
+
+        movl $2155905153, %eax
+        movl 8(%esp), %ecx
+        mull %ecx
+
+to avoid the copy.  In fact, the existing two-address stuff would do this
+except that mul isn't a commutative 2-addr instruction.  I guess this has
+to be done at isel time based on the #uses to mul?
+
+//===---------------------------------------------------------------------===//
+
+Make sure the instruction which starts a loop does not cross a cacheline
+boundary. This requires knowning the exact length of each machine instruction.
+That is somewhat complicated, but doable. Example 256.bzip2:
+
+In the new trace, the hot loop has an instruction which crosses a cacheline
+boundary.  In addition to potential cache misses, this can't help decoding as I
+imagine there has to be some kind of complicated decoder reset and realignment
+to grab the bytes from the next cacheline.
+
+532  532 0x3cfc movb     (1809(%esp, %esi), %bl   <<<--- spans 2 64 byte lines
+942  942 0x3d03 movl     %dh, (1809(%esp, %esi)
+937  937 0x3d0a incl     %esi
+3    3   0x3d0b cmpb     %bl, %dl
+27   27  0x3d0d jnz      0x000062db <main+11707>
+
+//===---------------------------------------------------------------------===//
+
+In c99 mode, the preprocessor doesn't like assembly comments like #TRUNCATE.
+
+//===---------------------------------------------------------------------===//
+
+This could be a single 16-bit load.
+
+int f(char *p) {
+    if ((p[0] == 1) & (p[1] == 2)) return 1;
+    return 0;
+}
+
+//===---------------------------------------------------------------------===//
+
+We should inline lrintf and probably other libc functions.
+
+//===---------------------------------------------------------------------===//
+
+Use the FLAGS values from arithmetic instructions more.  For example, compile:
+
+int add_zf(int *x, int y, int a, int b) {
+     if ((*x += y) == 0)
+          return a;
+     else
+          return b;
+}
+
+to:
+       addl    %esi, (%rdi)
+       movl    %edx, %eax
+       cmovne  %ecx, %eax
+       ret
+instead of:
+
+_add_zf:
+        addl (%rdi), %esi
+        movl %esi, (%rdi)
+        testl %esi, %esi
+        cmove %edx, %ecx
+        movl %ecx, %eax
+        ret
+
+As another example, compile function f2 in test/CodeGen/X86/cmp-test.ll
+without a test instruction.
+
+//===---------------------------------------------------------------------===//
+
+These two functions have identical effects:
+
+unsigned int f(unsigned int i, unsigned int n) {++i; if (i == n) ++i; return i;}
+unsigned int f2(unsigned int i, unsigned int n) {++i; i += i == n; return i;}
+
+We currently compile them to:
+
+_f:
+        movl 4(%esp), %eax
+        movl %eax, %ecx
+        incl %ecx
+        movl 8(%esp), %edx
+        cmpl %edx, %ecx
+        jne LBB1_2      #UnifiedReturnBlock
+LBB1_1: #cond_true
+        addl $2, %eax
+        ret
+LBB1_2: #UnifiedReturnBlock
+        movl %ecx, %eax
+        ret
+_f2:
+        movl 4(%esp), %eax
+        movl %eax, %ecx
+        incl %ecx
+        cmpl 8(%esp), %ecx
+        sete %cl
+        movzbl %cl, %ecx
+        leal 1(%ecx,%eax), %eax
+        ret
+
+both of which are inferior to GCC's:
+
+_f:
+        movl    4(%esp), %edx
+        leal    1(%edx), %eax
+        addl    $2, %edx
+        cmpl    8(%esp), %eax
+        cmove   %edx, %eax
+        ret
+_f2:
+        movl    4(%esp), %eax
+        addl    $1, %eax
+        xorl    %edx, %edx
+        cmpl    8(%esp), %eax
+        sete    %dl
+        addl    %edx, %eax
+        ret
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+void test(int X) {
+  if (X) abort();
+}
+
+is currently compiled to:
+
+_test:
+        subl $12, %esp
+        cmpl $0, 16(%esp)
+        jne LBB1_1
+        addl $12, %esp
+        ret
+LBB1_1:
+        call L_abort$stub
+
+It would be better to produce:
+
+_test:
+        subl $12, %esp
+        cmpl $0, 16(%esp)
+        jne L_abort$stub
+        addl $12, %esp
+        ret
+
+This can be applied to any no-return function call that takes no arguments etc.
+Alternatively, the stack save/restore logic could be shrink-wrapped, producing
+something like this:
+
+_test:
+        cmpl $0, 4(%esp)
+        jne LBB1_1
+        ret
+LBB1_1:
+        subl $12, %esp
+        call L_abort$stub
+
+Both are useful in different situations.  Finally, it could be shrink-wrapped
+and tail called, like this:
+
+_test:
+        cmpl $0, 4(%esp)
+        jne LBB1_1
+        ret
+LBB1_1:
+        pop %eax   # realign stack.
+        call L_abort$stub
+
+Though this probably isn't worth it.
+
+//===---------------------------------------------------------------------===//
+
+Sometimes it is better to codegen subtractions from a constant (e.g. 7-x) with
+a neg instead of a sub instruction.  Consider:
+
+int test(char X) { return 7-X; }
+
+we currently produce:
+_test:
+        movl $7, %eax
+        movsbl 4(%esp), %ecx
+        subl %ecx, %eax
+        ret
+
+We would use one fewer register if codegen'd as:
+
+        movsbl 4(%esp), %eax
+	neg %eax
+        add $7, %eax
+        ret
+
+Note that this isn't beneficial if the load can be folded into the sub.  In
+this case, we want a sub:
+
+int test(int X) { return 7-X; }
+_test:
+        movl $7, %eax
+        subl 4(%esp), %eax
+        ret
+
+//===---------------------------------------------------------------------===//
+
+Leaf functions that require one 4-byte spill slot have a prolog like this:
+
+_foo:
+        pushl   %esi
+        subl    $4, %esp
+...
+and an epilog like this:
+        addl    $4, %esp
+        popl    %esi
+        ret
+
+It would be smaller, and potentially faster, to push eax on entry and to
+pop into a dummy register instead of using addl/subl of esp.  Just don't pop 
+into any return registers :)
+
+//===---------------------------------------------------------------------===//
+
+The X86 backend should fold (branch (or (setcc, setcc))) into multiple 
+branches.  We generate really poor code for:
+
+double testf(double a) {
+       return a == 0.0 ? 0.0 : (a > 0.0 ? 1.0 : -1.0);
+}
+
+For example, the entry BB is:
+
+_testf:
+        subl    $20, %esp
+        pxor    %xmm0, %xmm0
+        movsd   24(%esp), %xmm1
+        ucomisd %xmm0, %xmm1
+        setnp   %al
+        sete    %cl
+        testb   %cl, %al
+        jne     LBB1_5  # UnifiedReturnBlock
+LBB1_1: # cond_true
+
+
+it would be better to replace the last four instructions with:
+
+	jp LBB1_1
+	je LBB1_5
+LBB1_1:
+
+We also codegen the inner ?: into a diamond:
+
+       cvtss2sd        LCPI1_0(%rip), %xmm2
+        cvtss2sd        LCPI1_1(%rip), %xmm3
+        ucomisd %xmm1, %xmm0
+        ja      LBB1_3  # cond_true
+LBB1_2: # cond_true
+        movapd  %xmm3, %xmm2
+LBB1_3: # cond_true
+        movapd  %xmm2, %xmm0
+        ret
+
+We should sink the load into xmm3 into the LBB1_2 block.  This should
+be pretty easy, and will nuke all the copies.
+
+//===---------------------------------------------------------------------===//
+
+This:
+        #include <algorithm>
+        inline std::pair<unsigned, bool> full_add(unsigned a, unsigned b)
+        { return std::make_pair(a + b, a + b < a); }
+        bool no_overflow(unsigned a, unsigned b)
+        { return !full_add(a, b).second; }
+
+Should compile to:
+	addl	%esi, %edi
+	setae	%al
+	movzbl	%al, %eax
+	ret
+
+on x86-64, instead of the rather stupid-looking:
+	addl	%esi, %edi
+	setb	%al
+	xorb	$1, %al
+	movzbl	%al, %eax
+	ret
+
+
+//===---------------------------------------------------------------------===//
+
+The following code:
+
+bb114.preheader:		; preds = %cond_next94
+	%tmp231232 = sext i16 %tmp62 to i32		; <i32> [#uses=1]
+	%tmp233 = sub i32 32, %tmp231232		; <i32> [#uses=1]
+	%tmp245246 = sext i16 %tmp65 to i32		; <i32> [#uses=1]
+	%tmp252253 = sext i16 %tmp68 to i32		; <i32> [#uses=1]
+	%tmp254 = sub i32 32, %tmp252253		; <i32> [#uses=1]
+	%tmp553554 = bitcast i16* %tmp37 to i8*		; <i8*> [#uses=2]
+	%tmp583584 = sext i16 %tmp98 to i32		; <i32> [#uses=1]
+	%tmp585 = sub i32 32, %tmp583584		; <i32> [#uses=1]
+	%tmp614615 = sext i16 %tmp101 to i32		; <i32> [#uses=1]
+	%tmp621622 = sext i16 %tmp104 to i32		; <i32> [#uses=1]
+	%tmp623 = sub i32 32, %tmp621622		; <i32> [#uses=1]
+	br label %bb114
+
+produces:
+
+LBB3_5:	# bb114.preheader
+	movswl	-68(%ebp), %eax
+	movl	$32, %ecx
+	movl	%ecx, -80(%ebp)
+	subl	%eax, -80(%ebp)
+	movswl	-52(%ebp), %eax
+	movl	%ecx, -84(%ebp)
+	subl	%eax, -84(%ebp)
+	movswl	-70(%ebp), %eax
+	movl	%ecx, -88(%ebp)
+	subl	%eax, -88(%ebp)
+	movswl	-50(%ebp), %eax
+	subl	%eax, %ecx
+	movl	%ecx, -76(%ebp)
+	movswl	-42(%ebp), %eax
+	movl	%eax, -92(%ebp)
+	movswl	-66(%ebp), %eax
+	movl	%eax, -96(%ebp)
+	movw	$0, -98(%ebp)
+
+This appears to be bad because the RA is not folding the store to the stack 
+slot into the movl.  The above instructions could be:
+	movl    $32, -80(%ebp)
+...
+	movl    $32, -84(%ebp)
+...
+This seems like a cross between remat and spill folding.
+
+This has redundant subtractions of %eax from a stack slot. However, %ecx doesn't
+change, so we could simply subtract %eax from %ecx first and then use %ecx (or
+vice-versa).
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+	%tmp659 = icmp slt i16 %tmp654, 0		; <i1> [#uses=1]
+	br i1 %tmp659, label %cond_true662, label %cond_next715
+
+produces this:
+
+	testw	%cx, %cx
+	movswl	%cx, %esi
+	jns	LBB4_109	# cond_next715
+
+Shark tells us that using %cx in the testw instruction is sub-optimal. It
+suggests using the 32-bit register (which is what ICC uses).
+
+//===---------------------------------------------------------------------===//
+
+We compile this:
+
+void compare (long long foo) {
+  if (foo < 4294967297LL)
+    abort();
+}
+
+to:
+
+compare:
+        subl    $4, %esp
+        cmpl    $0, 8(%esp)
+        setne   %al
+        movzbw  %al, %ax
+        cmpl    $1, 12(%esp)
+        setg    %cl
+        movzbw  %cl, %cx
+        cmove   %ax, %cx
+        testb   $1, %cl
+        jne     .LBB1_2 # UnifiedReturnBlock
+.LBB1_1:        # ifthen
+        call    abort
+.LBB1_2:        # UnifiedReturnBlock
+        addl    $4, %esp
+        ret
+
+(also really horrible code on ppc).  This is due to the expand code for 64-bit
+compares.  GCC produces multiple branches, which is much nicer:
+
+compare:
+        subl    $12, %esp
+        movl    20(%esp), %edx
+        movl    16(%esp), %eax
+        decl    %edx
+        jle     .L7
+.L5:
+        addl    $12, %esp
+        ret
+        .p2align 4,,7
+.L7:
+        jl      .L4
+        cmpl    $0, %eax
+        .p2align 4,,8
+        ja      .L5
+.L4:
+        .p2align 4,,9
+        call    abort
+
+//===---------------------------------------------------------------------===//
+
+Tail call optimization improvements: Tail call optimization currently
+pushes all arguments on the top of the stack (their normal place for
+non-tail call optimized calls) that source from the callers arguments
+or  that source from a virtual register (also possibly sourcing from
+callers arguments).
+This is done to prevent overwriting of parameters (see example
+below) that might be used later.
+
+example:  
+
+int callee(int32, int64); 
+int caller(int32 arg1, int32 arg2) { 
+  int64 local = arg2 * 2; 
+  return callee(arg2, (int64)local); 
+}
+
+[arg1]          [!arg2 no longer valid since we moved local onto it]
+[arg2]      ->  [(int64)
+[RETADDR]        local  ]
+
+Moving arg1 onto the stack slot of callee function would overwrite
+arg2 of the caller.
+
+Possible optimizations:
+
+
+ - Analyse the actual parameters of the callee to see which would
+   overwrite a caller parameter which is used by the callee and only
+   push them onto the top of the stack.
+
+   int callee (int32 arg1, int32 arg2);
+   int caller (int32 arg1, int32 arg2) {
+       return callee(arg1,arg2);
+   }
+
+   Here we don't need to write any variables to the top of the stack
+   since they don't overwrite each other.
+
+   int callee (int32 arg1, int32 arg2);
+   int caller (int32 arg1, int32 arg2) {
+       return callee(arg2,arg1);
+   }
+
+   Here we need to push the arguments because they overwrite each
+   other.
+
+//===---------------------------------------------------------------------===//
+
+main ()
+{
+  int i = 0;
+  unsigned long int z = 0;
+
+  do {
+    z -= 0x00004000;
+    i++;
+    if (i > 0x00040000)
+      abort ();
+  } while (z > 0);
+  exit (0);
+}
+
+gcc compiles this to:
+
+_main:
+	subl	$28, %esp
+	xorl	%eax, %eax
+	jmp	L2
+L3:
+	cmpl	$262144, %eax
+	je	L10
+L2:
+	addl	$1, %eax
+	cmpl	$262145, %eax
+	jne	L3
+	call	L_abort$stub
+L10:
+	movl	$0, (%esp)
+	call	L_exit$stub
+
+llvm:
+
+_main:
+	subl	$12, %esp
+	movl	$1, %eax
+	movl	$16384, %ecx
+LBB1_1:	# bb
+	cmpl	$262145, %eax
+	jge	LBB1_4	# cond_true
+LBB1_2:	# cond_next
+	incl	%eax
+	addl	$4294950912, %ecx
+	cmpl	$16384, %ecx
+	jne	LBB1_1	# bb
+LBB1_3:	# bb11
+	xorl	%eax, %eax
+	addl	$12, %esp
+	ret
+LBB1_4:	# cond_true
+	call	L_abort$stub
+
+1. LSR should rewrite the first cmp with induction variable %ecx.
+2. DAG combiner should fold
+        leal    1(%eax), %edx
+        cmpl    $262145, %edx
+   =>
+        cmpl    $262144, %eax
+
+//===---------------------------------------------------------------------===//
+
+define i64 @test(double %X) {
+	%Y = fptosi double %X to i64
+	ret i64 %Y
+}
+
+compiles to:
+
+_test:
+	subl	$20, %esp
+	movsd	24(%esp), %xmm0
+	movsd	%xmm0, 8(%esp)
+	fldl	8(%esp)
+	fisttpll	(%esp)
+	movl	4(%esp), %edx
+	movl	(%esp), %eax
+	addl	$20, %esp
+	#FP_REG_KILL
+	ret
+
+This should just fldl directly from the input stack slot.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+int foo (int x) { return (x & 65535) | 255; }
+
+Should compile into:
+
+_foo:
+        movzwl  4(%esp), %eax
+        orl     $255, %eax
+        ret
+
+instead of:
+_foo:
+	movl	$65280, %eax
+	andl	4(%esp), %eax
+	orl	$255, %eax
+	ret
+
+//===---------------------------------------------------------------------===//
+
+We're codegen'ing multiply of long longs inefficiently:
+
+unsigned long long LLM(unsigned long long arg1, unsigned long long arg2) {
+  return arg1 *  arg2;
+}
+
+We compile to (fomit-frame-pointer):
+
+_LLM:
+	pushl	%esi
+	movl	8(%esp), %ecx
+	movl	16(%esp), %esi
+	movl	%esi, %eax
+	mull	%ecx
+	imull	12(%esp), %esi
+	addl	%edx, %esi
+	imull	20(%esp), %ecx
+	movl	%esi, %edx
+	addl	%ecx, %edx
+	popl	%esi
+	ret
+
+This looks like a scheduling deficiency and lack of remat of the load from
+the argument area.  ICC apparently produces:
+
+        movl      8(%esp), %ecx
+        imull     12(%esp), %ecx
+        movl      16(%esp), %eax
+        imull     4(%esp), %eax 
+        addl      %eax, %ecx  
+        movl      4(%esp), %eax
+        mull      12(%esp) 
+        addl      %ecx, %edx
+        ret
+
+Note that it remat'd loads from 4(esp) and 12(esp).  See this GCC PR:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=17236
+
+//===---------------------------------------------------------------------===//
+
+We can fold a store into "zeroing a reg".  Instead of:
+
+xorl    %eax, %eax
+movl    %eax, 124(%esp)
+
+we should get:
+
+movl    $0, 124(%esp)
+
+if the flags of the xor are dead.
+
+Likewise, we isel "x<<1" into "add reg,reg".  If reg is spilled, this should
+be folded into: shl [mem], 1
+
+//===---------------------------------------------------------------------===//
+
+In SSE mode, we turn abs and neg into a load from the constant pool plus a xor
+or and instruction, for example:
+
+	xorpd	LCPI1_0, %xmm2
+
+However, if xmm2 gets spilled, we end up with really ugly code like this:
+
+	movsd	(%esp), %xmm0
+	xorpd	LCPI1_0, %xmm0
+	movsd	%xmm0, (%esp)
+
+Since we 'know' that this is a 'neg', we can actually "fold" the spill into
+the neg/abs instruction, turning it into an *integer* operation, like this:
+
+	xorl 2147483648, [mem+4]     ## 2147483648 = (1 << 31)
+
+you could also use xorb, but xorl is less likely to lead to a partial register
+stall.  Here is a contrived testcase:
+
+double a, b, c;
+void test(double *P) {
+  double X = *P;
+  a = X;
+  bar();
+  X = -X;
+  b = X;
+  bar();
+  c = X;
+}
+
+//===---------------------------------------------------------------------===//
+
+The generated code on x86 for checking for signed overflow on a multiply the
+obvious way is much longer than it needs to be.
+
+int x(int a, int b) {
+  long long prod = (long long)a*b;
+  return  prod > 0x7FFFFFFF || prod < (-0x7FFFFFFF-1);
+}
+
+See PR2053 for more details.
+
+//===---------------------------------------------------------------------===//
+
+We should investigate using cdq/ctld (effect: edx = sar eax, 31)
+more aggressively; it should cost the same as a move+shift on any modern
+processor, but it's a lot shorter. Downside is that it puts more
+pressure on register allocation because it has fixed operands.
+
+Example:
+int abs(int x) {return x < 0 ? -x : x;}
+
+gcc compiles this to the following when using march/mtune=pentium2/3/4/m/etc.:
+abs:
+        movl    4(%esp), %eax
+        cltd
+        xorl    %edx, %eax
+        subl    %edx, %eax
+        ret
+
+//===---------------------------------------------------------------------===//
+
+Take the following code (from 
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16541):
+
+extern unsigned char first_one[65536];
+int FirstOnet(unsigned long long arg1)
+{
+  if (arg1 >> 48)
+    return (first_one[arg1 >> 48]);
+  return 0;
+}
+
+
+The following code is currently generated:
+FirstOnet:
+        movl    8(%esp), %eax
+        cmpl    $65536, %eax
+        movl    4(%esp), %ecx
+        jb      .LBB1_2 # UnifiedReturnBlock
+.LBB1_1:        # ifthen
+        shrl    $16, %eax
+        movzbl  first_one(%eax), %eax
+        ret
+.LBB1_2:        # UnifiedReturnBlock
+        xorl    %eax, %eax
+        ret
+
+We could change the "movl 8(%esp), %eax" into "movzwl 10(%esp), %eax"; this
+lets us change the cmpl into a testl, which is shorter, and eliminate the shift.
+
+//===---------------------------------------------------------------------===//
+
+We compile this function:
+
+define i32 @foo(i32 %a, i32 %b, i32 %c, i8 zeroext  %d) nounwind  {
+entry:
+	%tmp2 = icmp eq i8 %d, 0		; <i1> [#uses=1]
+	br i1 %tmp2, label %bb7, label %bb
+
+bb:		; preds = %entry
+	%tmp6 = add i32 %b, %a		; <i32> [#uses=1]
+	ret i32 %tmp6
+
+bb7:		; preds = %entry
+	%tmp10 = sub i32 %a, %c		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
+
+to:
+
+foo:                                    # @foo
+# BB#0:                                 # %entry
+	movl	4(%esp), %ecx
+	cmpb	$0, 16(%esp)
+	je	.LBB0_2
+# BB#1:                                 # %bb
+	movl	8(%esp), %eax
+	addl	%ecx, %eax
+	ret
+.LBB0_2:                                # %bb7
+	movl	12(%esp), %edx
+	movl	%ecx, %eax
+	subl	%edx, %eax
+	ret
+
+There's an obviously unnecessary movl in .LBB0_2, and we could eliminate a
+couple more movls by putting 4(%esp) into %eax instead of %ecx.
+
+//===---------------------------------------------------------------------===//
+
+See rdar://4653682.
+
+From flops:
+
+LBB1_15:        # bb310
+        cvtss2sd        LCPI1_0, %xmm1
+        addsd   %xmm1, %xmm0
+        movsd   176(%esp), %xmm2
+        mulsd   %xmm0, %xmm2
+        movapd  %xmm2, %xmm3
+        mulsd   %xmm3, %xmm3
+        movapd  %xmm3, %xmm4
+        mulsd   LCPI1_23, %xmm4
+        addsd   LCPI1_24, %xmm4
+        mulsd   %xmm3, %xmm4
+        addsd   LCPI1_25, %xmm4
+        mulsd   %xmm3, %xmm4
+        addsd   LCPI1_26, %xmm4
+        mulsd   %xmm3, %xmm4
+        addsd   LCPI1_27, %xmm4
+        mulsd   %xmm3, %xmm4
+        addsd   LCPI1_28, %xmm4
+        mulsd   %xmm3, %xmm4
+        addsd   %xmm1, %xmm4
+        mulsd   %xmm2, %xmm4
+        movsd   152(%esp), %xmm1
+        addsd   %xmm4, %xmm1
+        movsd   %xmm1, 152(%esp)
+        incl    %eax
+        cmpl    %eax, %esi
+        jge     LBB1_15 # bb310
+LBB1_16:        # bb358.loopexit
+        movsd   152(%esp), %xmm0
+        addsd   %xmm0, %xmm0
+        addsd   LCPI1_22, %xmm0
+        movsd   %xmm0, 152(%esp)
+
+Rather than spilling the result of the last addsd in the loop, we should have
+insert a copy to split the interval (one for the duration of the loop, one
+extending to the fall through). The register pressure in the loop isn't high
+enough to warrant the spill.
+
+Also check why xmm7 is not used at all in the function.
+
+//===---------------------------------------------------------------------===//
+
+Take the following:
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@in_exit.4870.b = internal global i1 false		; <i1*> [#uses=2]
+define fastcc void @abort_gzip() noreturn nounwind  {
+entry:
+	%tmp.b.i = load i1* @in_exit.4870.b		; <i1> [#uses=1]
+	br i1 %tmp.b.i, label %bb.i, label %bb4.i
+bb.i:		; preds = %entry
+	tail call void @exit( i32 1 ) noreturn nounwind 
+	unreachable
+bb4.i:		; preds = %entry
+	store i1 true, i1* @in_exit.4870.b
+	tail call void @exit( i32 1 ) noreturn nounwind 
+	unreachable
+}
+declare void @exit(i32) noreturn nounwind 
+
+This compiles into:
+_abort_gzip:                            ## @abort_gzip
+## BB#0:                                ## %entry
+	subl	$12, %esp
+	movb	_in_exit.4870.b, %al
+	cmpb	$1, %al
+	jne	LBB0_2
+
+We somehow miss folding the movb into the cmpb.
+
+//===---------------------------------------------------------------------===//
+
+We compile:
+
+int test(int x, int y) {
+  return x-y-1;
+}
+
+into (-m64):
+
+_test:
+	decl	%edi
+	movl	%edi, %eax
+	subl	%esi, %eax
+	ret
+
+it would be better to codegen as: x+~y  (notl+addl)
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+int foo(const char *str,...)
+{
+ __builtin_va_list a; int x;
+ __builtin_va_start(a,str); x = __builtin_va_arg(a,int); __builtin_va_end(a);
+ return x;
+}
+
+gets compiled into this on x86-64:
+	subq    $200, %rsp
+        movaps  %xmm7, 160(%rsp)
+        movaps  %xmm6, 144(%rsp)
+        movaps  %xmm5, 128(%rsp)
+        movaps  %xmm4, 112(%rsp)
+        movaps  %xmm3, 96(%rsp)
+        movaps  %xmm2, 80(%rsp)
+        movaps  %xmm1, 64(%rsp)
+        movaps  %xmm0, 48(%rsp)
+        movq    %r9, 40(%rsp)
+        movq    %r8, 32(%rsp)
+        movq    %rcx, 24(%rsp)
+        movq    %rdx, 16(%rsp)
+        movq    %rsi, 8(%rsp)
+        leaq    (%rsp), %rax
+        movq    %rax, 192(%rsp)
+        leaq    208(%rsp), %rax
+        movq    %rax, 184(%rsp)
+        movl    $48, 180(%rsp)
+        movl    $8, 176(%rsp)
+        movl    176(%rsp), %eax
+        cmpl    $47, %eax
+        jbe     .LBB1_3 # bb
+.LBB1_1:        # bb3
+        movq    184(%rsp), %rcx
+        leaq    8(%rcx), %rax
+        movq    %rax, 184(%rsp)
+.LBB1_2:        # bb4
+        movl    (%rcx), %eax
+        addq    $200, %rsp
+        ret
+.LBB1_3:        # bb
+        movl    %eax, %ecx
+        addl    $8, %eax
+        addq    192(%rsp), %rcx
+        movl    %eax, 176(%rsp)
+        jmp     .LBB1_2 # bb4
+
+gcc 4.3 generates:
+	subq    $96, %rsp
+.LCFI0:
+        leaq    104(%rsp), %rax
+        movq    %rsi, -80(%rsp)
+        movl    $8, -120(%rsp)
+        movq    %rax, -112(%rsp)
+        leaq    -88(%rsp), %rax
+        movq    %rax, -104(%rsp)
+        movl    $8, %eax
+        cmpl    $48, %eax
+        jb      .L6
+        movq    -112(%rsp), %rdx
+        movl    (%rdx), %eax
+        addq    $96, %rsp
+        ret
+        .p2align 4,,10
+        .p2align 3
+.L6:
+        mov     %eax, %edx
+        addq    -104(%rsp), %rdx
+        addl    $8, %eax
+        movl    %eax, -120(%rsp)
+        movl    (%rdx), %eax
+        addq    $96, %rsp
+        ret
+
+and it gets compiled into this on x86:
+	pushl   %ebp
+        movl    %esp, %ebp
+        subl    $4, %esp
+        leal    12(%ebp), %eax
+        movl    %eax, -4(%ebp)
+        leal    16(%ebp), %eax
+        movl    %eax, -4(%ebp)
+        movl    12(%ebp), %eax
+        addl    $4, %esp
+        popl    %ebp
+        ret
+
+gcc 4.3 generates:
+	pushl   %ebp
+        movl    %esp, %ebp
+        movl    12(%ebp), %eax
+        popl    %ebp
+        ret
+
+//===---------------------------------------------------------------------===//
+
+Teach tblgen not to check bitconvert source type in some cases. This allows us
+to consolidate the following patterns in X86InstrMMX.td:
+
+def : Pat<(v2i32 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+                                                  (iPTR 0))))),
+          (v2i32 (MMX_MOVDQ2Qrr VR128:$src))>;
+def : Pat<(v4i16 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+                                                  (iPTR 0))))),
+          (v4i16 (MMX_MOVDQ2Qrr VR128:$src))>;
+def : Pat<(v8i8 (bitconvert (i64 (vector_extract (v2i64 VR128:$src),
+                                                  (iPTR 0))))),
+          (v8i8 (MMX_MOVDQ2Qrr VR128:$src))>;
+
+There are other cases in various td files.
+
+//===---------------------------------------------------------------------===//
+
+Take something like the following on x86-32:
+unsigned a(unsigned long long x, unsigned y) {return x % y;}
+
+We currently generate a libcall, but we really shouldn't: the expansion is
+shorter and likely faster than the libcall.  The expected code is something
+like the following:
+
+	movl	12(%ebp), %eax
+	movl	16(%ebp), %ecx
+	xorl	%edx, %edx
+	divl	%ecx
+	movl	8(%ebp), %eax
+	divl	%ecx
+	movl	%edx, %eax
+	ret
+
+A similar code sequence works for division.
+
+//===---------------------------------------------------------------------===//
+
+These should compile to the same code, but the later codegen's to useless
+instructions on X86. This may be a trivial dag combine (GCC PR7061):
+
+struct s1 { unsigned char a, b; };
+unsigned long f1(struct s1 x) {
+    return x.a + x.b;
+}
+struct s2 { unsigned a: 8, b: 8; };
+unsigned long f2(struct s2 x) {
+    return x.a + x.b;
+}
+
+//===---------------------------------------------------------------------===//
+
+We currently compile this:
+
+define i32 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+normal:
+  ret i32 %sum
+overflow:
+  call void @llvm.trap()
+  unreachable
+}
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
+declare void @llvm.trap()
+
+to:
+
+_func1:
+	movl	4(%esp), %eax
+	addl	8(%esp), %eax
+	jo	LBB1_2	## overflow
+LBB1_1:	## normal
+	ret
+LBB1_2:	## overflow
+	ud2
+
+it would be nice to produce "into" someday.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+void vec_mpys1(int y[], const int x[], int scaler) {
+int i;
+for (i = 0; i < 150; i++)
+ y[i] += (((long long)scaler * (long long)x[i]) >> 31);
+}
+
+Compiles to this loop with GCC 3.x:
+
+.L5:
+	movl	%ebx, %eax
+	imull	(%edi,%ecx,4)
+	shrdl	$31, %edx, %eax
+	addl	%eax, (%esi,%ecx,4)
+	incl	%ecx
+	cmpl	$149, %ecx
+	jle	.L5
+
+llvm-gcc compiles it to the much uglier:
+
+LBB1_1:	## bb1
+	movl	24(%esp), %eax
+	movl	(%eax,%edi,4), %ebx
+	movl	%ebx, %ebp
+	imull	%esi, %ebp
+	movl	%ebx, %eax
+	mull	%ecx
+	addl	%ebp, %edx
+	sarl	$31, %ebx
+	imull	%ecx, %ebx
+	addl	%edx, %ebx
+	shldl	$1, %eax, %ebx
+	movl	20(%esp), %eax
+	addl	%ebx, (%eax,%edi,4)
+	incl	%edi
+	cmpl	$150, %edi
+	jne	LBB1_1	## bb1
+
+The issue is that we hoist the cast of "scaler" to long long outside of the
+loop, the value comes into the loop as two values, and
+RegsForValue::getCopyFromRegs doesn't know how to put an AssertSext on the
+constructed BUILD_PAIR which represents the cast value.
+
+This can be handled by making CodeGenPrepare sink the cast.
+
+//===---------------------------------------------------------------------===//
+
+Test instructions can be eliminated by using EFLAGS values from arithmetic
+instructions. This is currently not done for mul, and, or, xor, neg, shl,
+sra, srl, shld, shrd, atomic ops, and others. It is also currently not done
+for read-modify-write instructions. It is also current not done if the
+OF or CF flags are needed.
+
+The shift operators have the complication that when the shift count is
+zero, EFLAGS is not set, so they can only subsume a test instruction if
+the shift count is known to be non-zero. Also, using the EFLAGS value
+from a shift is apparently very slow on some x86 implementations.
+
+In read-modify-write instructions, the root node in the isel match is
+the store, and isel has no way for the use of the EFLAGS result of the
+arithmetic to be remapped to the new node.
+
+Add and subtract instructions set OF on signed overflow and CF on unsiged
+overflow, while test instructions always clear OF and CF. In order to
+replace a test with an add or subtract in a situation where OF or CF is
+needed, codegen must be able to prove that the operation cannot see
+signed or unsigned overflow, respectively.
+
+//===---------------------------------------------------------------------===//
+
+memcpy/memmove do not lower to SSE copies when possible.  A silly example is:
+define <16 x float> @foo(<16 x float> %A) nounwind {
+	%tmp = alloca <16 x float>, align 16
+	%tmp2 = alloca <16 x float>, align 16
+	store <16 x float> %A, <16 x float>* %tmp
+	%s = bitcast <16 x float>* %tmp to i8*
+	%s2 = bitcast <16 x float>* %tmp2 to i8*
+	call void @llvm.memcpy.i64(i8* %s, i8* %s2, i64 64, i32 16)
+	%R = load <16 x float>* %tmp2
+	ret <16 x float> %R
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+
+which compiles to:
+
+_foo:
+	subl	$140, %esp
+	movaps	%xmm3, 112(%esp)
+	movaps	%xmm2, 96(%esp)
+	movaps	%xmm1, 80(%esp)
+	movaps	%xmm0, 64(%esp)
+	movl	60(%esp), %eax
+	movl	%eax, 124(%esp)
+	movl	56(%esp), %eax
+	movl	%eax, 120(%esp)
+	movl	52(%esp), %eax
+        <many many more 32-bit copies>
+      	movaps	(%esp), %xmm0
+	movaps	16(%esp), %xmm1
+	movaps	32(%esp), %xmm2
+	movaps	48(%esp), %xmm3
+	addl	$140, %esp
+	ret
+
+On Nehalem, it may even be cheaper to just use movups when unaligned than to
+fall back to lower-granularity chunks.
+
+//===---------------------------------------------------------------------===//
+
+Implement processor-specific optimizations for parity with GCC on these
+processors.  GCC does two optimizations:
+
+1. ix86_pad_returns inserts a noop before ret instructions if immediately
+   preceeded by a conditional branch or is the target of a jump.
+2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of
+   code contains more than 3 branches.
+   
+The first one is done for all AMDs, Core2, and "Generic"
+The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona,
+  Core 2, and "Generic"
+
+//===---------------------------------------------------------------------===//
+
+Testcase:
+int a(int x) { return (x & 127) > 31; }
+
+Current output:
+	movl	4(%esp), %eax
+	andl	$127, %eax
+	cmpl	$31, %eax
+	seta	%al
+	movzbl	%al, %eax
+	ret
+
+Ideal output:
+	xorl	%eax, %eax
+	testl	$96, 4(%esp)
+	setne	%al
+	ret
+
+This should definitely be done in instcombine, canonicalizing the range
+condition into a != condition.  We get this IR:
+
+define i32 @a(i32 %x) nounwind readnone {
+entry:
+	%0 = and i32 %x, 127		; <i32> [#uses=1]
+	%1 = icmp ugt i32 %0, 31		; <i1> [#uses=1]
+	%2 = zext i1 %1 to i32		; <i32> [#uses=1]
+	ret i32 %2
+}
+
+Instcombine prefers to strength reduce relational comparisons to equality
+comparisons when possible, this should be another case of that.  This could
+be handled pretty easily in InstCombiner::visitICmpInstWithInstAndIntCst, but it
+looks like InstCombiner::visitICmpInstWithInstAndIntCst should really already
+be redesigned to use ComputeMaskedBits and friends.
+
+
+//===---------------------------------------------------------------------===//
+Testcase:
+int x(int a) { return (a&0xf0)>>4; }
+
+Current output:
+	movl	4(%esp), %eax
+	shrl	$4, %eax
+	andl	$15, %eax
+	ret
+
+Ideal output:
+	movzbl	4(%esp), %eax
+	shrl	$4, %eax
+	ret
+
+//===---------------------------------------------------------------------===//
+
+Re-implement atomic builtins __sync_add_and_fetch() and __sync_sub_and_fetch
+properly.
+
+When the return value is not used (i.e. only care about the value in the
+memory), x86 does not have to use add to implement these. Instead, it can use
+add, sub, inc, dec instructions with the "lock" prefix.
+
+This is currently implemented using a bit of instruction selection trick. The
+issue is the target independent pattern produces one output and a chain and we
+want to map it into one that just output a chain. The current trick is to select
+it into a MERGE_VALUES with the first definition being an implicit_def. The
+proper solution is to add new ISD opcodes for the no-output variant. DAG
+combiner can then transform the node before it gets to target node selection.
+
+Problem #2 is we are adding a whole bunch of x86 atomic instructions when in
+fact these instructions are identical to the non-lock versions. We need a way to
+add target specific information to target nodes and have this information
+carried over to machine instructions. Asm printer (or JIT) can use this
+information to add the "lock" prefix.
+
+//===---------------------------------------------------------------------===//
+
+_Bool bar(int *x) { return *x & 1; }
+
+define zeroext i1 @bar(i32* nocapture %x) nounwind readonly {
+entry:
+  %tmp1 = load i32* %x                            ; <i32> [#uses=1]
+  %and = and i32 %tmp1, 1                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 0                   ; <i1> [#uses=1]
+  ret i1 %tobool
+}
+
+bar:                                                        # @bar
+# BB#0:                                                     # %entry
+	movl	4(%esp), %eax
+	movb	(%eax), %al
+	andb	$1, %al
+	movzbl	%al, %eax
+	ret
+
+Missed optimization: should be movl+andl.
+
+//===---------------------------------------------------------------------===//
+
+Consider the following two functions compiled with clang:
+_Bool foo(int *x) { return !(*x & 4); }
+unsigned bar(int *x) { return !(*x & 4); }
+
+foo:
+	movl	4(%esp), %eax
+	testb	$4, (%eax)
+	sete	%al
+	movzbl	%al, %eax
+	ret
+
+bar:
+	movl	4(%esp), %eax
+	movl	(%eax), %eax
+	shrl	$2, %eax
+	andl	$1, %eax
+	xorl	$1, %eax
+	ret
+
+The second function generates more code even though the two functions are
+are functionally identical.
+
+//===---------------------------------------------------------------------===//
+
+Take the following C code:
+int x(int y) { return (y & 63) << 14; }
+
+Code produced by gcc:
+	andl	$63, %edi
+	sall	$14, %edi
+	movl	%edi, %eax
+	ret
+
+Code produced by clang:
+	shll	$14, %edi
+	movl	%edi, %eax
+	andl	$1032192, %eax
+	ret
+
+The code produced by gcc is 3 bytes shorter.  This sort of construct often
+shows up with bitfields.
+
+//===---------------------------------------------------------------------===//
+
+Take the following C code:
+int f(int a, int b) { return (unsigned char)a == (unsigned char)b; }
+
+We generate the following IR with clang:
+define i32 @f(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %tmp = xor i32 %b, %a                           ; <i32> [#uses=1]
+  %tmp6 = and i32 %tmp, 255                       ; <i32> [#uses=1]
+  %cmp = icmp eq i32 %tmp6, 0                     ; <i1> [#uses=1]
+  %conv5 = zext i1 %cmp to i32                    ; <i32> [#uses=1]
+  ret i32 %conv5
+}
+
+And the following x86 code:
+	xorl	%esi, %edi
+	testb	$-1, %dil
+	sete	%al
+	movzbl	%al, %eax
+	ret
+
+A cmpb instead of the xorl+testb would be one instruction shorter.
+
+//===---------------------------------------------------------------------===//
+
+Given the following C code:
+int f(int a, int b) { return (signed char)a == (signed char)b; }
+
+We generate the following IR with clang:
+define i32 @f(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %sext = shl i32 %a, 24                          ; <i32> [#uses=1]
+  %conv1 = ashr i32 %sext, 24                     ; <i32> [#uses=1]
+  %sext6 = shl i32 %b, 24                         ; <i32> [#uses=1]
+  %conv4 = ashr i32 %sext6, 24                    ; <i32> [#uses=1]
+  %cmp = icmp eq i32 %conv1, %conv4               ; <i1> [#uses=1]
+  %conv5 = zext i1 %cmp to i32                    ; <i32> [#uses=1]
+  ret i32 %conv5
+}
+
+And the following x86 code:
+	movsbl	%sil, %eax
+	movsbl	%dil, %ecx
+	cmpl	%eax, %ecx
+	sete	%al
+	movzbl	%al, %eax
+	ret
+
+
+It should be possible to eliminate the sign extensions.
+
+//===---------------------------------------------------------------------===//
+
+LLVM misses a load+store narrowing opportunity in this code:
+
+%struct.bf = type { i64, i16, i16, i32 }
+
+@bfi = external global %struct.bf*                ; <%struct.bf**> [#uses=2]
+
+define void @t1() nounwind ssp {
+entry:
+  %0 = load %struct.bf** @bfi, align 8            ; <%struct.bf*> [#uses=1]
+  %1 = getelementptr %struct.bf* %0, i64 0, i32 1 ; <i16*> [#uses=1]
+  %2 = bitcast i16* %1 to i32*                    ; <i32*> [#uses=2]
+  %3 = load i32* %2, align 1                      ; <i32> [#uses=1]
+  %4 = and i32 %3, -65537                         ; <i32> [#uses=1]
+  store i32 %4, i32* %2, align 1
+  %5 = load %struct.bf** @bfi, align 8            ; <%struct.bf*> [#uses=1]
+  %6 = getelementptr %struct.bf* %5, i64 0, i32 1 ; <i16*> [#uses=1]
+  %7 = bitcast i16* %6 to i32*                    ; <i32*> [#uses=2]
+  %8 = load i32* %7, align 1                      ; <i32> [#uses=1]
+  %9 = and i32 %8, -131073                        ; <i32> [#uses=1]
+  store i32 %9, i32* %7, align 1
+  ret void
+}
+
+LLVM currently emits this:
+
+  movq  bfi(%rip), %rax
+  andl  $-65537, 8(%rax)
+  movq  bfi(%rip), %rax
+  andl  $-131073, 8(%rax)
+  ret
+
+It could narrow the loads and stores to emit this:
+
+  movq  bfi(%rip), %rax
+  andb  $-2, 10(%rax)
+  movq  bfi(%rip), %rax
+  andb  $-3, 10(%rax)
+  ret
+
+The trouble is that there is a TokenFactor between the store and the
+load, making it non-trivial to determine if there's anything between
+the load and the store which would prohibit narrowing.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+void foo(unsigned x) {
+  if (x == 0) bar();
+  else if (x == 1) qux();
+}
+
+currently compiles into:
+_foo:
+	movl	4(%esp), %eax
+	cmpl	$1, %eax
+	je	LBB0_3
+	testl	%eax, %eax
+	jne	LBB0_4
+
+the testl could be removed:
+_foo:
+	movl	4(%esp), %eax
+	cmpl	$1, %eax
+	je	LBB0_3
+	jb	LBB0_4
+
+0 is the only unsigned number < 1.
+
+//===---------------------------------------------------------------------===//
+
+This code:
+
+%0 = type { i32, i1 }
+
+define i32 @add32carry(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+  %uadd = tail call %0 @llvm.uadd.with.overflow.i32(i32 %sum, i32 %x)
+  %cmp = extractvalue %0 %uadd, 1
+  %inc = zext i1 %cmp to i32
+  %add = add i32 %x, %sum
+  %z.0 = add i32 %add, %inc
+  ret i32 %z.0
+}
+
+declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+
+compiles to:
+
+_add32carry:                            ## @add32carry
+	addl	%esi, %edi
+	sbbl	%ecx, %ecx
+	movl	%edi, %eax
+	subl	%ecx, %eax
+	ret
+
+But it could be:
+
+_add32carry:
+	leal	(%rsi,%rdi), %eax
+	cmpl	%esi, %eax
+	adcl	$0, %eax
+	ret
+
+//===---------------------------------------------------------------------===//
+
+The hot loop of 256.bzip2 contains code that looks a bit like this:
+
+int foo(char *P, char *Q, int x, int y) {
+  if (P[0] != Q[0])
+     return P[0] < Q[0];
+  if (P[1] != Q[1])
+     return P[1] < Q[1];
+  if (P[2] != Q[2])
+     return P[2] < Q[2];
+   return P[3] < Q[3];
+}
+
+In the real code, we get a lot more wrong than this.  However, even in this
+code we generate:
+
+_foo:                                   ## @foo
+## BB#0:                                ## %entry
+	movb	(%rsi), %al
+	movb	(%rdi), %cl
+	cmpb	%al, %cl
+	je	LBB0_2
+LBB0_1:                                 ## %if.then
+	cmpb	%al, %cl
+	jmp	LBB0_5
+LBB0_2:                                 ## %if.end
+	movb	1(%rsi), %al
+	movb	1(%rdi), %cl
+	cmpb	%al, %cl
+	jne	LBB0_1
+## BB#3:                                ## %if.end38
+	movb	2(%rsi), %al
+	movb	2(%rdi), %cl
+	cmpb	%al, %cl
+	jne	LBB0_1
+## BB#4:                                ## %if.end60
+	movb	3(%rdi), %al
+	cmpb	3(%rsi), %al
+LBB0_5:                                 ## %if.end60
+	setl	%al
+	movzbl	%al, %eax
+	ret
+
+Note that we generate jumps to LBB0_1 which does a redundant compare.  The
+redundant compare also forces the register values to be live, which prevents
+folding one of the loads into the compare.  In contrast, GCC 4.2 produces:
+
+_foo:
+	movzbl	(%rsi), %eax
+	cmpb	%al, (%rdi)
+	jne	L10
+L12:
+	movzbl	1(%rsi), %eax
+	cmpb	%al, 1(%rdi)
+	jne	L10
+	movzbl	2(%rsi), %eax
+	cmpb	%al, 2(%rdi)
+	jne	L10
+	movzbl	3(%rdi), %eax
+	cmpb	3(%rsi), %al
+L10:
+	setl	%al
+	movzbl	%al, %eax
+	ret
+
+which is "perfect".
+
+//===---------------------------------------------------------------------===//
+
diff --git a/final/lib/Target/X86/SSEDomainFix.cpp b/final/lib/Target/X86/SSEDomainFix.cpp
new file mode 100644
index 00000000000..13680c592e0
--- /dev/null
+++ b/final/lib/Target/X86/SSEDomainFix.cpp
@@ -0,0 +1,506 @@
+//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SSEDomainFix pass.
+//
+// Some SSE instructions like mov, and, or, xor are available in different
+// variants for different operand types. These variant instructions are
+// equivalent, but on Nehalem and newer cpus there is extra latency
+// transferring data between integer and floating point domains.
+//
+// This pass changes the variant instructions to minimize domain crossings.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sse-domain-fix"
+#include "X86InstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
+/// of execution domains.
+///
+/// An open DomainValue represents a set of instructions that can still switch
+/// execution domain. Multiple registers may refer to the same open
+/// DomainValue - they will eventually be collapsed to the same execution
+/// domain.
+///
+/// A collapsed DomainValue represents a single register that has been forced
+/// into one of more execution domains. There is a separate collapsed
+/// DomainValue for each register, but it may contain multiple execution
+/// domains. A register value is initially created in a single execution
+/// domain, but if we were forced to pay the penalty of a domain crossing, we
+/// keep track of the fact the the register is now available in multiple
+/// domains.
+namespace {
+struct DomainValue {
+  // Basic reference counting.
+  unsigned Refs;
+
+  // Bitmask of available domains. For an open DomainValue, it is the still
+  // possible domains for collapsing. For a collapsed DomainValue it is the
+  // domains where the register is available for free.
+  unsigned AvailableDomains;
+
+  // Position of the last defining instruction.
+  unsigned Dist;
+
+  // Twiddleable instructions using or defining these registers.
+  SmallVector<MachineInstr*, 8> Instrs;
+
+  // A collapsed DomainValue has no instructions to twiddle - it simply keeps
+  // track of the domains where the registers are already available.
+  bool isCollapsed() const { return Instrs.empty(); }
+
+  // Is domain available?
+  bool hasDomain(unsigned domain) const {
+    return AvailableDomains & (1u << domain);
+  }
+
+  // Mark domain as available.
+  void addDomain(unsigned domain) {
+    AvailableDomains |= 1u << domain;
+  }
+
+  // Restrict to a single domain available.
+  void setSingleDomain(unsigned domain) {
+    AvailableDomains = 1u << domain;
+  }
+
+  // Return bitmask of domains that are available and in mask.
+  unsigned getCommonDomains(unsigned mask) const {
+    return AvailableDomains & mask;
+  }
+
+  // First domain available.
+  unsigned getFirstDomain() const {
+    return CountTrailingZeros_32(AvailableDomains);
+  }
+
+  DomainValue() { clear(); }
+
+  void clear() {
+    Refs = AvailableDomains = Dist = 0;
+    Instrs.clear();
+  }
+};
+}
+
+static const unsigned NumRegs = 16;
+
+namespace {
+class SSEDomainFixPass : public MachineFunctionPass {
+  static char ID;
+  SpecificBumpPtrAllocator<DomainValue> Allocator;
+  SmallVector<DomainValue*,16> Avail;
+
+  MachineFunction *MF;
+  const X86InstrInfo *TII;
+  const TargetRegisterInfo *TRI;
+  MachineBasicBlock *MBB;
+  DomainValue **LiveRegs;
+  typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
+  LiveOutMap LiveOuts;
+  unsigned Distance;
+
+public:
+  SSEDomainFixPass() : MachineFunctionPass(ID) {}
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  virtual bool runOnMachineFunction(MachineFunction &MF);
+
+  virtual const char *getPassName() const {
+    return "SSE execution domain fixup";
+  }
+
+private:
+  // Register mapping.
+  int RegIndex(unsigned Reg);
+
+  // DomainValue allocation.
+  DomainValue *Alloc(int domain = -1);
+  void Recycle(DomainValue*);
+
+  // LiveRegs manipulations.
+  void SetLiveReg(int rx, DomainValue *DV);
+  void Kill(int rx);
+  void Force(int rx, unsigned domain);
+  void Collapse(DomainValue *dv, unsigned domain);
+  bool Merge(DomainValue *A, DomainValue *B);
+
+  void enterBasicBlock();
+  void visitGenericInstr(MachineInstr*);
+  void visitSoftInstr(MachineInstr*, unsigned mask);
+  void visitHardInstr(MachineInstr*, unsigned domain);
+};
+}
+
+char SSEDomainFixPass::ID = 0;
+
+/// Translate TRI register number to an index into our smaller tables of
+/// interesting registers. Return -1 for boring registers.
+int SSEDomainFixPass::RegIndex(unsigned reg) {
+  assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort");
+  reg -= X86::XMM0;
+  return reg < NumRegs ? (int) reg : -1;
+}
+
+DomainValue *SSEDomainFixPass::Alloc(int domain) {
+  DomainValue *dv = Avail.empty() ?
+                      new(Allocator.Allocate()) DomainValue :
+                      Avail.pop_back_val();
+  dv->Dist = Distance;
+  if (domain >= 0)
+    dv->addDomain(domain);
+  return dv;
+}
+
+void SSEDomainFixPass::Recycle(DomainValue *dv) {
+  assert(dv && "Cannot recycle NULL");
+  dv->clear();
+  Avail.push_back(dv);
+}
+
+/// Set LiveRegs[rx] = dv, updating reference counts.
+void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) {
+  assert(unsigned(rx) < NumRegs && "Invalid index");
+  if (!LiveRegs) {
+    LiveRegs = new DomainValue*[NumRegs];
+    std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0);
+  }
+
+  if (LiveRegs[rx] == dv)
+    return;
+  if (LiveRegs[rx]) {
+    assert(LiveRegs[rx]->Refs && "Bad refcount");
+    if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]);
+  }
+  LiveRegs[rx] = dv;
+  if (dv) ++dv->Refs;
+}
+
+// Kill register rx, recycle or collapse any DomainValue.
+void SSEDomainFixPass::Kill(int rx) {
+  assert(unsigned(rx) < NumRegs && "Invalid index");
+  if (!LiveRegs || !LiveRegs[rx]) return;
+
+  // Before killing the last reference to an open DomainValue, collapse it to
+  // the first available domain.
+  if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed())
+    Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain());
+  else
+    SetLiveReg(rx, 0);
+}
+
+/// Force register rx into domain.
+void SSEDomainFixPass::Force(int rx, unsigned domain) {
+  assert(unsigned(rx) < NumRegs && "Invalid index");
+  DomainValue *dv;
+  if (LiveRegs && (dv = LiveRegs[rx])) {
+    if (dv->isCollapsed())
+      dv->addDomain(domain);
+    else if (dv->hasDomain(domain))
+      Collapse(dv, domain);
+    else {
+      // This is an incompatible open DomainValue. Collapse it to whatever and force
+      // the new value into domain. This costs a domain crossing.
+      Collapse(dv, dv->getFirstDomain());
+      assert(LiveRegs[rx] && "Not live after collapse?");
+      LiveRegs[rx]->addDomain(domain);
+    }
+  } else {
+    // Set up basic collapsed DomainValue.
+    SetLiveReg(rx, Alloc(domain));
+  }
+}
+
+/// Collapse open DomainValue into given domain. If there are multiple
+/// registers using dv, they each get a unique collapsed DomainValue.
+void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) {
+  assert(dv->hasDomain(domain) && "Cannot collapse");
+
+  // Collapse all the instructions.
+  while (!dv->Instrs.empty())
+    TII->SetSSEDomain(dv->Instrs.pop_back_val(), domain);
+  dv->setSingleDomain(domain);
+
+  // If there are multiple users, give them new, unique DomainValues.
+  if (LiveRegs && dv->Refs > 1)
+    for (unsigned rx = 0; rx != NumRegs; ++rx)
+      if (LiveRegs[rx] == dv)
+        SetLiveReg(rx, Alloc(domain));
+}
+
+/// Merge - All instructions and registers in B are moved to A, and B is
+/// released.
+bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
+  assert(!A->isCollapsed() && "Cannot merge into collapsed");
+  assert(!B->isCollapsed() && "Cannot merge from collapsed");
+  if (A == B)
+    return true;
+  // Restrict to the domains that A and B have in common.
+  unsigned common = A->getCommonDomains(B->AvailableDomains);
+  if (!common)
+    return false;
+  A->AvailableDomains = common;
+  A->Dist = std::max(A->Dist, B->Dist);
+  A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+  for (unsigned rx = 0; rx != NumRegs; ++rx)
+    if (LiveRegs[rx] == B)
+      SetLiveReg(rx, A);
+  return true;
+}
+
+void SSEDomainFixPass::enterBasicBlock() {
+  // Try to coalesce live-out registers from predecessors.
+  for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
+         e = MBB->livein_end(); i != e; ++i) {
+    int rx = RegIndex(*i);
+    if (rx < 0) continue;
+    for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
+           pe = MBB->pred_end(); pi != pe; ++pi) {
+      LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
+      if (fi == LiveOuts.end()) continue;
+      DomainValue *pdv = fi->second[rx];
+      if (!pdv) continue;
+      if (!LiveRegs || !LiveRegs[rx]) {
+        SetLiveReg(rx, pdv);
+        continue;
+      }
+
+      // We have a live DomainValue from more than one predecessor.
+      if (LiveRegs[rx]->isCollapsed()) {
+        // We are already collapsed, but predecessor is not. Force him.
+        unsigned domain = LiveRegs[rx]->getFirstDomain();
+        if (!pdv->isCollapsed() && pdv->hasDomain(domain))
+          Collapse(pdv, domain);
+        continue;
+      }
+
+      // Currently open, merge in predecessor.
+      if (!pdv->isCollapsed())
+        Merge(LiveRegs[rx], pdv);
+      else
+        Force(rx, pdv->getFirstDomain());
+    }
+  }
+}
+
+// A hard instruction only works in one domain. All input registers will be
+// forced into that domain.
+void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) {
+  // Collapse all uses.
+  for (unsigned i = mi->getDesc().getNumDefs(),
+                e = mi->getDesc().getNumOperands(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    Force(rx, domain);
+  }
+
+  // Kill all defs and force them.
+  for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    Kill(rx);
+    Force(rx, domain);
+  }
+}
+
+// A soft instruction can be changed to work in other domains given by mask.
+void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+  // Bitmask of available domains for this instruction after taking collapsed
+  // operands into account.
+  unsigned available = mask;
+
+  // Scan the explicit use operands for incoming domains.
+  SmallVector<int, 4> used;
+  if (LiveRegs)
+    for (unsigned i = mi->getDesc().getNumDefs(),
+                  e = mi->getDesc().getNumOperands(); i != e; ++i) {
+      MachineOperand &mo = mi->getOperand(i);
+      if (!mo.isReg()) continue;
+      int rx = RegIndex(mo.getReg());
+      if (rx < 0) continue;
+      if (DomainValue *dv = LiveRegs[rx]) {
+        // Bitmask of domains that dv and available have in common.
+        unsigned common = dv->getCommonDomains(available);
+        // Is it possible to use this collapsed register for free?
+        if (dv->isCollapsed()) {
+          // Restrict available domains to the ones in common with the operand.
+          // If there are no common domains, we must pay the cross-domain 
+          // penalty for this operand.
+          if (common) available = common;
+        } else if (common)
+          // Open DomainValue is compatible, save it for merging.
+          used.push_back(rx);
+        else
+          // Open DomainValue is not compatible with instruction. It is useless
+          // now.
+          Kill(rx);
+      }
+    }
+
+  // If the collapsed operands force a single domain, propagate the collapse.
+  if (isPowerOf2_32(available)) {
+    unsigned domain = CountTrailingZeros_32(available);
+    TII->SetSSEDomain(mi, domain);
+    visitHardInstr(mi, domain);
+    return;
+  }
+
+  // Kill off any remaining uses that don't match available, and build a list of
+  // incoming DomainValues that we want to merge.
+  SmallVector<DomainValue*,4> doms;
+  for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
+    int rx = *i;
+    DomainValue *dv = LiveRegs[rx];
+    // This useless DomainValue could have been missed above.
+    if (!dv->getCommonDomains(available)) {
+      Kill(*i);
+      continue;
+    }
+    // sorted, uniqued insert.
+    bool inserted = false;
+    for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
+           i != e && !inserted; ++i) {
+      if (dv == *i)
+        inserted = true;
+      else if (dv->Dist < (*i)->Dist) {
+        inserted = true;
+        doms.insert(i, dv);
+      }
+    }
+    if (!inserted)
+      doms.push_back(dv);
+  }
+
+  // doms are now sorted in order of appearance. Try to merge them all, giving
+  // priority to the latest ones.
+  DomainValue *dv = 0;
+  while (!doms.empty()) {
+    if (!dv) {
+      dv = doms.pop_back_val();
+      continue;
+    }
+
+    DomainValue *latest = doms.pop_back_val();
+    if (Merge(dv, latest)) continue;
+
+    // If latest didn't merge, it is useless now. Kill all registers using it.
+    for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
+      if (LiveRegs[*i] == latest)
+        Kill(*i);
+  }
+
+  // dv is the DomainValue we are going to use for this instruction.
+  if (!dv)
+    dv = Alloc();
+  dv->Dist = Distance;
+  dv->AvailableDomains = available;
+  dv->Instrs.push_back(mi);
+
+  // Finally set all defs and non-collapsed uses to dv.
+  for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
+      Kill(rx);
+      SetLiveReg(rx, dv);
+    }
+  }
+}
+
+void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
+  // Process explicit defs, kill any XMM registers redefined.
+  for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+    MachineOperand &mo = mi->getOperand(i);
+    if (!mo.isReg()) continue;
+    int rx = RegIndex(mo.getReg());
+    if (rx < 0) continue;
+    Kill(rx);
+  }
+}
+
+bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
+  TRI = MF->getTarget().getRegisterInfo();
+  MBB = 0;
+  LiveRegs = 0;
+  Distance = 0;
+  assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass");
+
+  // If no XMM registers are used in the function, we can skip it completely.
+  bool anyregs = false;
+  for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
+         E = X86::VR128RegClass.end(); I != E; ++I)
+    if (MF->getRegInfo().isPhysRegUsed(*I)) {
+      anyregs = true;
+      break;
+    }
+  if (!anyregs) return false;
+
+  MachineBasicBlock *Entry = MF->begin();
+  SmallPtrSet<MachineBasicBlock*, 16> Visited;
+  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
+         DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
+         DFI != DFE; ++DFI) {
+    MBB = *DFI;
+    enterBasicBlock();
+    for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+        ++I) {
+      MachineInstr *mi = I;
+      if (mi->isDebugValue()) continue;
+      ++Distance;
+      std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi);
+      if (domp.first)
+        if (domp.second)
+          visitSoftInstr(mi, domp.second);
+        else
+          visitHardInstr(mi, domp.first);
+      else if (LiveRegs)
+        visitGenericInstr(mi);
+    }
+
+    // Save live registers at end of MBB - used by enterBasicBlock().
+    if (LiveRegs)
+      LiveOuts.insert(std::make_pair(MBB, LiveRegs));
+    LiveRegs = 0;
+  }
+
+  // Clear the LiveOuts vectors. Should we also collapse any remaining
+  // DomainValues?
+  for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
+         i != e; ++i)
+    delete[] i->second;
+  LiveOuts.clear();
+  Avail.clear();
+  Allocator.DestroyAll();
+
+  return false;
+}
+
+FunctionPass *llvm::createSSEDomainFixPass() {
+  return new SSEDomainFixPass();
+}
diff --git a/final/lib/Target/X86/TargetInfo/CMakeLists.txt b/final/lib/Target/X86/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..90be9f58cc7
--- /dev/null
+++ b/final/lib/Target/X86/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86Info
+  X86TargetInfo.cpp
+  )
+
+add_dependencies(LLVMX86Info X86CodeGenTable_gen)
diff --git a/final/lib/Target/X86/TargetInfo/Makefile b/final/lib/Target/X86/TargetInfo/Makefile
new file mode 100644
index 00000000000..ee91982df0c
--- /dev/null
+++ b/final/lib/Target/X86/TargetInfo/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/X86/TargetInfo/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Info
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/final/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
new file mode 100644
index 00000000000..08d4d84f8a8
--- /dev/null
+++ b/final/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -0,0 +1,23 @@
+//===-- X86TargetInfo.cpp - X86 Target Implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheX86_32Target, llvm::TheX86_64Target;
+
+extern "C" void LLVMInitializeX86TargetInfo() { 
+  RegisterTarget<Triple::x86, /*HasJIT=*/true>
+    X(TheX86_32Target, "x86", "32-bit X86: Pentium-Pro and above");
+
+  RegisterTarget<Triple::x86_64, /*HasJIT=*/true>
+    Y(TheX86_64Target, "x86-64", "64-bit X86: EM64T and AMD64");
+}
diff --git a/final/lib/Target/X86/Utils/CMakeLists.txt b/final/lib/Target/X86/Utils/CMakeLists.txt
new file mode 100644
index 00000000000..3ad5f991c86
--- /dev/null
+++ b/final/lib/Target/X86/Utils/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMX86Utils
+  X86ShuffleDecode.cpp
+  )
+add_dependencies(LLVMX86Utils X86CodeGenTable_gen)
diff --git a/final/lib/Target/X86/Utils/Makefile b/final/lib/Target/X86/Utils/Makefile
new file mode 100644
index 00000000000..1df6f0f561d
--- /dev/null
+++ b/final/lib/Target/X86/Utils/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/X86/Utils/Makefile -----------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Utils
+
+# Hack: we need to include 'main' x86 target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/final/lib/Target/X86/Utils/X86ShuffleDecode.cpp
new file mode 100644
index 00000000000..cd06060748b
--- /dev/null
+++ b/final/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -0,0 +1,190 @@
+//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ShuffleDecode.h"
+
+//===----------------------------------------------------------------------===//
+//  Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) {
+  // Defaults the copying the dest value.
+  ShuffleMask.push_back(0);
+  ShuffleMask.push_back(1);
+  ShuffleMask.push_back(2);
+  ShuffleMask.push_back(3);
+
+  // Decode the immediate.
+  unsigned ZMask = Imm & 15;
+  unsigned CountD = (Imm >> 4) & 3;
+  unsigned CountS = (Imm >> 6) & 3;
+
+  // CountS selects which input element to use.
+  unsigned InVal = 4+CountS;
+  // CountD specifies which element of destination to update.
+  ShuffleMask[CountD] = InVal;
+  // ZMask zaps values, potentially overriding the CountD elt.
+  if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
+  if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
+  if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
+  if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
+}
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = NElts/2; i != NElts; ++i)
+    ShuffleMask.push_back(NElts+i);
+
+  for (unsigned i = NElts/2; i != NElts; ++i)
+    ShuffleMask.push_back(i);
+}
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != NElts/2; ++i)
+    ShuffleMask.push_back(i);
+
+  for (unsigned i = 0; i != NElts/2; ++i)
+    ShuffleMask.push_back(NElts+i);
+}
+
+void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+                     SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != NElts; ++i) {
+    ShuffleMask.push_back(Imm % NElts);
+    Imm /= NElts;
+  }
+}
+
+void DecodePSHUFHWMask(unsigned Imm,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  ShuffleMask.push_back(0);
+  ShuffleMask.push_back(1);
+  ShuffleMask.push_back(2);
+  ShuffleMask.push_back(3);
+  for (unsigned i = 0; i != 4; ++i) {
+    ShuffleMask.push_back(4+(Imm & 3));
+    Imm >>= 2;
+  }
+}
+
+void DecodePSHUFLWMask(unsigned Imm,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != 4; ++i) {
+    ShuffleMask.push_back((Imm & 3));
+    Imm >>= 2;
+  }
+  ShuffleMask.push_back(4);
+  ShuffleMask.push_back(5);
+  ShuffleMask.push_back(6);
+  ShuffleMask.push_back(7);
+}
+
+void DecodePUNPCKLBWMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLWDMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLDQMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLQDQMask(unsigned NElts,
+                          SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLMask(EVT VT,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(VT, ShuffleMask);
+}
+
+void DecodePUNPCKHMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != NElts/2; ++i) {
+    ShuffleMask.push_back(i+NElts/2);
+    ShuffleMask.push_back(i+NElts+NElts/2);
+  }
+}
+
+void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+                      SmallVectorImpl<unsigned> &ShuffleMask) {
+  // Part that reads from dest.
+  for (unsigned i = 0; i != NElts/2; ++i) {
+    ShuffleMask.push_back(Imm % NElts);
+    Imm /= NElts;
+  }
+  // Part that reads from src.
+  for (unsigned i = 0; i != NElts/2; ++i) {
+    ShuffleMask.push_back(Imm % NElts + NElts);
+    Imm /= NElts;
+  }
+}
+
+void DecodeUNPCKHPMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  for (unsigned i = 0; i != NElts/2; ++i) {
+    ShuffleMask.push_back(i+NElts/2);        // Reads from dest
+    ShuffleMask.push_back(i+NElts+NElts/2);  // Reads from src
+  }
+}
+
+void DecodeUNPCKLPSMask(unsigned NElts,
+                        SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodeUNPCKLPDMask(unsigned NElts,
+                        SmallVectorImpl<unsigned> &ShuffleMask) {
+  DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc.  VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKLPMask(EVT VT,
+                       SmallVectorImpl<unsigned> &ShuffleMask) {
+  unsigned NumElts = VT.getVectorNumElements();
+
+  // Handle vector lengths > 128 bits.  Define a "section" as a set of
+  // 128 bits.  AVX defines UNPCK* to operate independently on 128-bit
+  // sections.
+  unsigned NumSections = VT.getSizeInBits() / 128;
+  if (NumSections == 0 ) NumSections = 1;  // Handle MMX
+  unsigned NumSectionElts = NumElts / NumSections;
+
+  unsigned Start = 0;
+  unsigned End = NumSectionElts / 2;
+  for (unsigned s = 0; s < NumSections; ++s) {
+    for (unsigned i = Start; i != End; ++i) {
+      ShuffleMask.push_back(i);                 // Reads from dest/src1
+      ShuffleMask.push_back(i+NumSectionElts);  // Reads from src/src2
+    }
+    // Process the next 128 bits.
+    Start += NumSectionElts;
+    End += NumSectionElts;
+  }
+}
+
+} // llvm namespace
diff --git a/final/lib/Target/X86/Utils/X86ShuffleDecode.h b/final/lib/Target/X86/Utils/X86ShuffleDecode.h
new file mode 100644
index 00000000000..b18f6703309
--- /dev/null
+++ b/final/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -0,0 +1,87 @@
+//===-- X86ShuffleDecode.h - X86 shuffle decode logic -----------*-C++-*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define several functions to decode x86 specific shuffle semantics into a
+// generic vector mask.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_SHUFFLE_DECODE_H
+#define X86_SHUFFLE_DECODE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
+
+//===----------------------------------------------------------------------===//
+//  Vector Mask Decoding
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+enum {
+  SM_SentinelZero = ~0U
+};
+
+void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask);
+
+// <3,1> or <6,7,2,3>
+void DecodeMOVHLPSMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+// <0,2> or <0,1,4,5>
+void DecodeMOVLHPSMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFMask(unsigned NElts, unsigned Imm,
+                     SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFHWMask(unsigned Imm,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePSHUFLWMask(unsigned Imm,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLBWMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLWDMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLDQMask(unsigned NElts,
+                         SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLQDQMask(unsigned NElts,
+                          SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLMask(EVT VT,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKHMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
+                      SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKHPMask(unsigned NElts,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKLPSMask(unsigned NElts,
+                        SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKLPDMask(unsigned NElts,
+                        SmallVectorImpl<unsigned> &ShuffleMask);
+
+/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
+/// etc.  VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKLPMask(EVT VT,
+                       SmallVectorImpl<unsigned> &ShuffleMask);
+
+} // llvm namespace
+
+#endif
diff --git a/final/lib/Target/X86/X86.h b/final/lib/Target/X86/X86.h
new file mode 100644
index 00000000000..0ca43669004
--- /dev/null
+++ b/final/lib/Target/X86/X86.h
@@ -0,0 +1,100 @@
+//===-- X86.h - Top-level interface for X86 representation ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the x86
+// target library, as used by the LLVM JIT.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_X86_H
+#define TARGET_X86_H
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+class FunctionPass;
+class JITCodeEmitter;
+class MCCodeEmitter;
+class MCContext;
+class MCObjectWriter;
+class MachineCodeEmitter;
+class Target;
+class TargetAsmBackend;
+class X86TargetMachine;
+class formatted_raw_ostream;
+class raw_ostream;
+
+/// createX86ISelDag - This pass converts a legalized DAG into a 
+/// X86-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *createX86ISelDag(X86TargetMachine &TM,
+                               CodeGenOpt::Level OptLevel);
+
+/// createGlobalBaseRegPass - This pass initializes a global base
+/// register for PIC on x86-32.
+FunctionPass* createGlobalBaseRegPass();
+
+/// createX86FloatingPointStackifierPass - This function returns a pass which
+/// converts floating point register references and pseudo instructions into
+/// floating point stack references and physical instructions.
+///
+FunctionPass *createX86FloatingPointStackifierPass();
+
+/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain
+/// crossings.
+FunctionPass *createSSEDomainFixPass();
+
+/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
+/// to the specified MCE object.
+FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
+                                          JITCodeEmitter &JCE);
+
+MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM,
+                                         MCContext &Ctx);
+MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM,
+                                         MCContext &Ctx);
+
+TargetAsmBackend *createX86_32AsmBackend(const Target &, const std::string &);
+TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &);
+
+/// createX86EmitCodeToMemory - Returns a pass that converts a register
+/// allocated function into raw machine code in a dynamically
+/// allocated chunk of memory.
+///
+FunctionPass *createEmitX86CodeToMemory();
+
+/// createX86MaxStackAlignmentHeuristicPass - This function returns a pass
+/// which determines whether the frame pointer register should be
+/// reserved in case dynamic stack alignment is later required.
+///
+FunctionPass *createX86MaxStackAlignmentHeuristicPass();
+
+
+/// createX86MachObjectWriter - Construct an X86 Mach-O object writer.
+MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
+                                          bool Is64Bit,
+                                          uint32_t CPUType,
+                                          uint32_t CPUSubtype);
+
+extern Target TheX86_32Target, TheX86_64Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for X86 registers.  This defines a mapping from
+// register name to register number.
+//
+#include "X86GenRegisterNames.inc"
+
+// Defines symbolic names for the X86 instructions.
+//
+#include "X86GenInstrNames.inc"
+
+#endif
diff --git a/final/lib/Target/X86/X86.td b/final/lib/Target/X86/X86.td
new file mode 100644
index 00000000000..efb6c8c0adc
--- /dev/null
+++ b/final/lib/Target/X86/X86.td
@@ -0,0 +1,224 @@
+//===- X86.td - Target definition file for the Intel X86 ---*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This is a target description file for the Intel i386 architecture, refered to
+// here as the "X86" architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// Get the target-independent interfaces which we are implementing...
+//
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// X86 Subtarget features.
+//===----------------------------------------------------------------------===//
+
+def FeatureCMOV    : SubtargetFeature<"cmov","HasCMov", "true",
+                                      "Enable conditional move instructions">;
+
+def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",
+                                       "Support POPCNT instruction">;
+
+
+def FeatureMMX     : SubtargetFeature<"mmx","X86SSELevel", "MMX",
+                                      "Enable MMX instructions">;
+def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
+                                      "Enable SSE instructions",
+                                      // SSE codegen depends on cmovs, and all
+                                      // SSE1+ processors support them. 
+                                      [FeatureMMX, FeatureCMOV]>;
+def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
+                                      "Enable SSE2 instructions",
+                                      [FeatureSSE1]>;
+def FeatureSSE3    : SubtargetFeature<"sse3", "X86SSELevel", "SSE3",
+                                      "Enable SSE3 instructions",
+                                      [FeatureSSE2]>;
+def FeatureSSSE3   : SubtargetFeature<"ssse3", "X86SSELevel", "SSSE3",
+                                      "Enable SSSE3 instructions",
+                                      [FeatureSSE3]>;
+def FeatureSSE41   : SubtargetFeature<"sse41", "X86SSELevel", "SSE41",
+                                      "Enable SSE 4.1 instructions",
+                                      [FeatureSSSE3]>;
+def FeatureSSE42   : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
+                                      "Enable SSE 4.2 instructions",
+                                      [FeatureSSE41, FeaturePOPCNT]>;
+def Feature3DNow   : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
+                                      "Enable 3DNow! instructions">;
+def Feature3DNowA  : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
+                                      "Enable 3DNow! Athlon instructions",
+                                      [Feature3DNow]>;
+// All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied
+// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
+// without disabling 64-bit mode.
+def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
+                                      "Support 64-bit instructions",
+                                      [FeatureCMOV]>;
+def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
+                                       "Bit testing of memory is slow">;
+def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
+                                        "IsUAMemFast", "true",
+                                        "Fast unaligned memory access">;
+def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
+                                      "Support SSE 4a instructions",
+                                      [FeaturePOPCNT]>;
+
+def FeatureAVX     : SubtargetFeature<"avx", "HasAVX", "true",
+                                      "Enable AVX instructions">;
+def FeatureCLMUL   : SubtargetFeature<"clmul", "HasCLMUL", "true",
+                               "Enable carry-less multiplication instructions">;
+def FeatureFMA3    : SubtargetFeature<"fma3", "HasFMA3", "true",
+                                     "Enable three-operand fused multiple-add">;
+def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
+                                      "Enable four-operand fused multiple-add">;
+def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
+                                          "HasVectorUAMem", "true",
+                 "Allow unaligned memory operands on vector/SIMD instructions">;
+def FeatureAES     : SubtargetFeature<"aes", "HasAES", "true",
+                                      "Enable AES instructions">;
+
+//===----------------------------------------------------------------------===//
+// X86 processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic",         []>;
+def : Proc<"i386",            []>;
+def : Proc<"i486",            []>;
+def : Proc<"i586",            []>;
+def : Proc<"pentium",         []>;
+def : Proc<"pentium-mmx",     [FeatureMMX]>;
+def : Proc<"i686",            []>;
+def : Proc<"pentiumpro",      [FeatureCMOV]>;
+def : Proc<"pentium2",        [FeatureMMX, FeatureCMOV]>;
+def : Proc<"pentium3",        [FeatureSSE1]>;
+def : Proc<"pentium-m",       [FeatureSSE2, FeatureSlowBTMem]>;
+def : Proc<"pentium4",        [FeatureSSE2]>;
+def : Proc<"x86-64",          [FeatureSSE2,   Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"yonah",           [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"prescott",        [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona",          [FeatureSSE3,   Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"core2",           [FeatureSSSE3,  Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"penryn",          [FeatureSSE41,  Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"atom",            [FeatureSSE3,   Feature64Bit, FeatureSlowBTMem]>;
+// "Arrandale" along with corei3 and corei5
+def : Proc<"corei7",          [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem,
+                               FeatureFastUAMem, FeatureAES]>;
+def : Proc<"nehalem",         [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem,
+                               FeatureFastUAMem]>;
+// Westmere is a similar machine to nehalem with some additional features.
+// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
+def : Proc<"westmere",        [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem,
+                               FeatureFastUAMem, FeatureAES, FeatureCLMUL]>;
+// SSE is not listed here since llvm treats AVX as a reimplementation of SSE,
+// rather than a superset.
+// FIXME: Disabling AVX for now since it's not ready.
+def : Proc<"sandybridge",     [FeatureSSE42, Feature64Bit,
+                               FeatureAES, FeatureCLMUL]>;
+
+def : Proc<"k6",              [FeatureMMX]>;
+def : Proc<"k6-2",            [FeatureMMX,    Feature3DNow]>;
+def : Proc<"k6-3",            [FeatureMMX,    Feature3DNow]>;
+def : Proc<"athlon",          [FeatureMMX,    Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-tbird",    [FeatureMMX,    Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-4",        [FeatureSSE1,   Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-xp",       [FeatureSSE1,   Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-mp",       [FeatureSSE1,   Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"k8",              [FeatureSSE2,   Feature3DNowA, Feature64Bit,
+                               FeatureSlowBTMem]>;
+def : Proc<"opteron",         [FeatureSSE2,   Feature3DNowA, Feature64Bit,
+                               FeatureSlowBTMem]>;
+def : Proc<"athlon64",        [FeatureSSE2,   Feature3DNowA, Feature64Bit,
+                               FeatureSlowBTMem]>;
+def : Proc<"athlon-fx",       [FeatureSSE2,   Feature3DNowA, Feature64Bit,
+                               FeatureSlowBTMem]>;
+def : Proc<"k8-sse3",         [FeatureSSE3,   Feature3DNowA, Feature64Bit,
+                               FeatureSlowBTMem]>;
+def : Proc<"opteron-sse3",    [FeatureSSE3,   Feature3DNowA, Feature64Bit,
+                               FeatureSlowBTMem]>;
+def : Proc<"athlon64-sse3",   [FeatureSSE3,   Feature3DNowA, Feature64Bit,
+                               FeatureSlowBTMem]>;
+def : Proc<"amdfam10",        [FeatureSSE3,   FeatureSSE4A,
+                               Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"barcelona",       [FeatureSSE3,   FeatureSSE4A,
+                               Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"istanbul",        [Feature3DNowA, Feature64Bit, FeatureSSE4A,
+                               Feature3DNowA]>;
+def : Proc<"shanghai",        [Feature3DNowA, Feature64Bit, FeatureSSE4A,
+                               Feature3DNowA]>;
+
+def : Proc<"winchip-c6",      [FeatureMMX]>;
+def : Proc<"winchip2",        [FeatureMMX, Feature3DNow]>;
+def : Proc<"c3",              [FeatureMMX, Feature3DNow]>;
+def : Proc<"c3-2",            [FeatureSSE1]>;
+
+//===----------------------------------------------------------------------===//
+// Register File Description
+//===----------------------------------------------------------------------===//
+
+include "X86RegisterInfo.td"
+
+//===----------------------------------------------------------------------===//
+// Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "X86InstrInfo.td"
+
+def X86InstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "X86CallingConv.td"
+
+
+//===----------------------------------------------------------------------===//
+// Assembly Parser
+//===----------------------------------------------------------------------===//
+
+// Currently the X86 assembly parser only supports ATT syntax.
+def ATTAsmParser : AsmParser {
+  string AsmParserClassName = "ATTAsmParser";
+  int Variant = 0;
+
+  // Discard comments in assembly strings.
+  string CommentDelimiter = "#";
+
+  // Recognize hard coded registers.
+  string RegisterPrefix = "%";
+}
+
+//===----------------------------------------------------------------------===//
+// Assembly Printers
+//===----------------------------------------------------------------------===//
+
+// The X86 target supports two different syntaxes for emitting machine code.
+// This is controlled by the -x86-asm-syntax={att|intel}
+def ATTAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "ATTInstPrinter";
+  int Variant = 0;
+  bit isMCAsmWriter = 1;
+}
+def IntelAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "IntelInstPrinter";
+  int Variant = 1;
+  bit isMCAsmWriter = 1;
+}
+
+def X86 : Target {
+  // Information about the instructions...
+  let InstructionSet = X86InstrInfo;
+
+  let AssemblyParsers = [ATTAsmParser];
+
+  let AssemblyWriters = [ATTAsmWriter, IntelAsmWriter];
+}
diff --git a/final/lib/Target/X86/X86AsmBackend.cpp b/final/lib/Target/X86/X86AsmBackend.cpp
new file mode 100644
index 00000000000..da5f5b182ce
--- /dev/null
+++ b/final/lib/Target/X86/X86AsmBackend.cpp
@@ -0,0 +1,441 @@
+//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+#include "X86.h"
+#include "X86FixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+  switch (Kind) {
+  default: assert(0 && "invalid fixup kind!");
+  case FK_PCRel_1:
+  case FK_Data_1: return 0;
+  case FK_PCRel_2:
+  case FK_Data_2: return 1;
+  case FK_PCRel_4:
+  case X86::reloc_riprel_4byte:
+  case X86::reloc_riprel_4byte_movq_load:
+  case X86::reloc_signed_4byte:
+  case X86::reloc_global_offset_table:
+  case FK_Data_4: return 2;
+  case FK_PCRel_8:
+  case FK_Data_8: return 3;
+  }
+}
+
+namespace {
+
+class X86ELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  X86ELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine,
+                     bool HasRelocationAddend)
+    : MCELFObjectTargetWriter(is64Bit, OSType, EMachine, HasRelocationAddend) {}
+};
+
+class X86AsmBackend : public TargetAsmBackend {
+public:
+  X86AsmBackend(const Target &T)
+    : TargetAsmBackend() {}
+
+  unsigned getNumFixupKinds() const {
+    return X86::NumTargetFixupKinds;
+  }
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
+    const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {
+      { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
+      { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel},
+      { "reloc_signed_4byte", 0, 4 * 8, 0},
+      { "reloc_global_offset_table", 0, 4 * 8, 0}
+    };
+
+    if (Kind < FirstTargetFixupKind)
+      return TargetAsmBackend::getFixupKindInfo(Kind);
+
+    assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+           "Invalid kind!");
+    return Infos[Kind - FirstTargetFixupKind];
+  }
+
+  void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value) const {
+    unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
+
+    assert(Fixup.getOffset() + Size <= DataSize &&
+           "Invalid fixup offset!");
+    for (unsigned i = 0; i != Size; ++i)
+      Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
+  }
+
+  bool MayNeedRelaxation(const MCInst &Inst) const;
+
+  void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
+
+  bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
+};
+} // end anonymous namespace
+
+static unsigned getRelaxedOpcodeBranch(unsigned Op) {
+  switch (Op) {
+  default:
+    return Op;
+
+  case X86::JAE_1: return X86::JAE_4;
+  case X86::JA_1:  return X86::JA_4;
+  case X86::JBE_1: return X86::JBE_4;
+  case X86::JB_1:  return X86::JB_4;
+  case X86::JE_1:  return X86::JE_4;
+  case X86::JGE_1: return X86::JGE_4;
+  case X86::JG_1:  return X86::JG_4;
+  case X86::JLE_1: return X86::JLE_4;
+  case X86::JL_1:  return X86::JL_4;
+  case X86::JMP_1: return X86::JMP_4;
+  case X86::JNE_1: return X86::JNE_4;
+  case X86::JNO_1: return X86::JNO_4;
+  case X86::JNP_1: return X86::JNP_4;
+  case X86::JNS_1: return X86::JNS_4;
+  case X86::JO_1:  return X86::JO_4;
+  case X86::JP_1:  return X86::JP_4;
+  case X86::JS_1:  return X86::JS_4;
+  }
+}
+
+static unsigned getRelaxedOpcodeArith(unsigned Op) {
+  switch (Op) {
+  default:
+    return Op;
+
+    // IMUL
+  case X86::IMUL16rri8: return X86::IMUL16rri;
+  case X86::IMUL16rmi8: return X86::IMUL16rmi;
+  case X86::IMUL32rri8: return X86::IMUL32rri;
+  case X86::IMUL32rmi8: return X86::IMUL32rmi;
+  case X86::IMUL64rri8: return X86::IMUL64rri32;
+  case X86::IMUL64rmi8: return X86::IMUL64rmi32;
+
+    // AND
+  case X86::AND16ri8: return X86::AND16ri;
+  case X86::AND16mi8: return X86::AND16mi;
+  case X86::AND32ri8: return X86::AND32ri;
+  case X86::AND32mi8: return X86::AND32mi;
+  case X86::AND64ri8: return X86::AND64ri32;
+  case X86::AND64mi8: return X86::AND64mi32;
+
+    // OR
+  case X86::OR16ri8: return X86::OR16ri;
+  case X86::OR16mi8: return X86::OR16mi;
+  case X86::OR32ri8: return X86::OR32ri;
+  case X86::OR32mi8: return X86::OR32mi;
+  case X86::OR64ri8: return X86::OR64ri32;
+  case X86::OR64mi8: return X86::OR64mi32;
+
+    // XOR
+  case X86::XOR16ri8: return X86::XOR16ri;
+  case X86::XOR16mi8: return X86::XOR16mi;
+  case X86::XOR32ri8: return X86::XOR32ri;
+  case X86::XOR32mi8: return X86::XOR32mi;
+  case X86::XOR64ri8: return X86::XOR64ri32;
+  case X86::XOR64mi8: return X86::XOR64mi32;
+
+    // ADD
+  case X86::ADD16ri8: return X86::ADD16ri;
+  case X86::ADD16mi8: return X86::ADD16mi;
+  case X86::ADD32ri8: return X86::ADD32ri;
+  case X86::ADD32mi8: return X86::ADD32mi;
+  case X86::ADD64ri8: return X86::ADD64ri32;
+  case X86::ADD64mi8: return X86::ADD64mi32;
+
+    // SUB
+  case X86::SUB16ri8: return X86::SUB16ri;
+  case X86::SUB16mi8: return X86::SUB16mi;
+  case X86::SUB32ri8: return X86::SUB32ri;
+  case X86::SUB32mi8: return X86::SUB32mi;
+  case X86::SUB64ri8: return X86::SUB64ri32;
+  case X86::SUB64mi8: return X86::SUB64mi32;
+
+    // CMP
+  case X86::CMP16ri8: return X86::CMP16ri;
+  case X86::CMP16mi8: return X86::CMP16mi;
+  case X86::CMP32ri8: return X86::CMP32ri;
+  case X86::CMP32mi8: return X86::CMP32mi;
+  case X86::CMP64ri8: return X86::CMP64ri32;
+  case X86::CMP64mi8: return X86::CMP64mi32;
+
+    // PUSH
+  case X86::PUSHi8: return X86::PUSHi32;
+  }
+}
+
+static unsigned getRelaxedOpcode(unsigned Op) {
+  unsigned R = getRelaxedOpcodeArith(Op);
+  if (R != Op)
+    return R;
+  return getRelaxedOpcodeBranch(Op);
+}
+
+bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+  // Branches can always be relaxed.
+  if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
+    return true;
+
+  // Check if this instruction is ever relaxable.
+  if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
+    return false;
+
+
+  // Check if it has an expression and is not RIP relative.
+  bool hasExp = false;
+  bool hasRIP = false;
+  for (unsigned i = 0; i < Inst.getNumOperands(); ++i) {
+    const MCOperand &Op = Inst.getOperand(i);
+    if (Op.isExpr())
+      hasExp = true;
+
+    if (Op.isReg() && Op.getReg() == X86::RIP)
+      hasRIP = true;
+  }
+
+  // FIXME: Why exactly do we need the !hasRIP? Is it just a limitation on
+  // how we do relaxations?
+  return hasExp && !hasRIP;
+}
+
+// FIXME: Can tblgen help at all here to verify there aren't other instructions
+// we can relax?
+void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
+  // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
+  unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
+
+  if (RelaxedOp == Inst.getOpcode()) {
+    SmallString<256> Tmp;
+    raw_svector_ostream OS(Tmp);
+    Inst.dump_pretty(OS);
+    OS << "\n";
+    report_fatal_error("unexpected instruction to relax: " + OS.str());
+  }
+
+  Res = Inst;
+  Res.setOpcode(RelaxedOp);
+}
+
+/// WriteNopData - Write optimal nops to the output file for the \arg Count
+/// bytes.  This returns the number of bytes written.  It may return 0 if
+/// the \arg Count is more than the maximum optimal nops.
+bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+  static const uint8_t Nops[10][10] = {
+    // nop
+    {0x90},
+    // xchg %ax,%ax
+    {0x66, 0x90},
+    // nopl (%[re]ax)
+    {0x0f, 0x1f, 0x00},
+    // nopl 0(%[re]ax)
+    {0x0f, 0x1f, 0x40, 0x00},
+    // nopl 0(%[re]ax,%[re]ax,1)
+    {0x0f, 0x1f, 0x44, 0x00, 0x00},
+    // nopw 0(%[re]ax,%[re]ax,1)
+    {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+    // nopl 0L(%[re]ax)
+    {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+    // nopl 0L(%[re]ax,%[re]ax,1)
+    {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+    // nopw 0L(%[re]ax,%[re]ax,1)
+    {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+    // nopw %cs:0L(%[re]ax,%[re]ax,1)
+    {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+  };
+
+  // Write an optimal sequence for the first 15 bytes.
+  const uint64_t OptimalCount = (Count < 16) ? Count : 15;
+  const uint64_t Prefixes = OptimalCount <= 10 ? 0 : OptimalCount - 10;
+  for (uint64_t i = 0, e = Prefixes; i != e; i++)
+    OW->Write8(0x66);
+  const uint64_t Rest = OptimalCount - Prefixes;
+  for (uint64_t i = 0, e = Rest; i != e; i++)
+    OW->Write8(Nops[Rest - 1][i]);
+
+  // Finish with single byte nops.
+  for (uint64_t i = OptimalCount, e = Count; i != e; ++i)
+   OW->Write8(0x90);
+
+  return true;
+}
+
+/* *** */
+
+namespace {
+class ELFX86AsmBackend : public X86AsmBackend {
+public:
+  Triple::OSType OSType;
+  ELFX86AsmBackend(const Target &T, Triple::OSType _OSType)
+    : X86AsmBackend(T), OSType(_OSType) {
+    HasReliableSymbolDifference = true;
+  }
+
+  virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+    const MCSectionELF &ES = static_cast<const MCSectionELF&>(Section);
+    return ES.getFlags() & ELF::SHF_MERGE;
+  }
+};
+
+class ELFX86_32AsmBackend : public ELFX86AsmBackend {
+public:
+  ELFX86_32AsmBackend(const Target &T, Triple::OSType OSType)
+    : ELFX86AsmBackend(T, OSType) {}
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createELFObjectWriter(new X86ELFObjectWriter(false, OSType,
+                                                        ELF::EM_386, false),
+                                 OS, /*IsLittleEndian*/ true);
+  }
+};
+
+class ELFX86_64AsmBackend : public ELFX86AsmBackend {
+public:
+  ELFX86_64AsmBackend(const Target &T, Triple::OSType OSType)
+    : ELFX86AsmBackend(T, OSType) {}
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createELFObjectWriter(new X86ELFObjectWriter(true, OSType,
+                                                        ELF::EM_X86_64, true),
+                                 OS, /*IsLittleEndian*/ true);
+  }
+};
+
+class WindowsX86AsmBackend : public X86AsmBackend {
+  bool Is64Bit;
+
+public:
+  WindowsX86AsmBackend(const Target &T, bool is64Bit)
+    : X86AsmBackend(T)
+    , Is64Bit(is64Bit) {
+  }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createWinCOFFObjectWriter(OS, Is64Bit);
+  }
+};
+
+class DarwinX86AsmBackend : public X86AsmBackend {
+public:
+  DarwinX86AsmBackend(const Target &T)
+    : X86AsmBackend(T) { }
+};
+
+class DarwinX86_32AsmBackend : public DarwinX86AsmBackend {
+public:
+  DarwinX86_32AsmBackend(const Target &T)
+    : DarwinX86AsmBackend(T) {}
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createX86MachObjectWriter(OS, /*Is64Bit=*/false,
+                                     object::mach::CTM_i386,
+                                     object::mach::CSX86_ALL);
+  }
+};
+
+class DarwinX86_64AsmBackend : public DarwinX86AsmBackend {
+public:
+  DarwinX86_64AsmBackend(const Target &T)
+    : DarwinX86AsmBackend(T) {
+    HasReliableSymbolDifference = true;
+  }
+
+  MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
+    return createX86MachObjectWriter(OS, /*Is64Bit=*/true,
+                                     object::mach::CTM_x86_64,
+                                     object::mach::CSX86_ALL);
+  }
+
+  virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
+    // Temporary labels in the string literals sections require symbols. The
+    // issue is that the x86_64 relocation format does not allow symbol +
+    // offset, and so the linker does not have enough information to resolve the
+    // access to the appropriate atom unless an external relocation is used. For
+    // non-cstring sections, we expect the compiler to use a non-temporary label
+    // for anything that could have an addend pointing outside the symbol.
+    //
+    // See <rdar://problem/4765733>.
+    const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+    return SMO.getType() == MCSectionMachO::S_CSTRING_LITERALS;
+  }
+
+  virtual bool isSectionAtomizable(const MCSection &Section) const {
+    const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+    // Fixed sized data sections are uniqued, they cannot be diced into atoms.
+    switch (SMO.getType()) {
+    default:
+      return true;
+
+    case MCSectionMachO::S_4BYTE_LITERALS:
+    case MCSectionMachO::S_8BYTE_LITERALS:
+    case MCSectionMachO::S_16BYTE_LITERALS:
+    case MCSectionMachO::S_LITERAL_POINTERS:
+    case MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS:
+    case MCSectionMachO::S_LAZY_SYMBOL_POINTERS:
+    case MCSectionMachO::S_MOD_INIT_FUNC_POINTERS:
+    case MCSectionMachO::S_MOD_TERM_FUNC_POINTERS:
+    case MCSectionMachO::S_INTERPOSING:
+      return false;
+    }
+  }
+};
+
+} // end anonymous namespace
+
+TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
+                                               const std::string &TT) {
+  switch (Triple(TT).getOS()) {
+  case Triple::Darwin:
+    return new DarwinX86_32AsmBackend(T);
+  case Triple::MinGW32:
+  case Triple::Cygwin:
+  case Triple::Win32:
+    if (Triple(TT).getEnvironment() == Triple::MachO)
+      return new DarwinX86_32AsmBackend(T);
+    else
+      return new WindowsX86AsmBackend(T, false);
+  default:
+    return new ELFX86_32AsmBackend(T, Triple(TT).getOS());
+  }
+}
+
+TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
+                                               const std::string &TT) {
+  switch (Triple(TT).getOS()) {
+  case Triple::Darwin:
+    return new DarwinX86_64AsmBackend(T);
+  case Triple::MinGW32:
+  case Triple::Cygwin:
+  case Triple::Win32:
+    if (Triple(TT).getEnvironment() == Triple::MachO)
+      return new DarwinX86_64AsmBackend(T);
+    else
+      return new WindowsX86AsmBackend(T, true);
+  default:
+    return new ELFX86_64AsmBackend(T, Triple(TT).getOS());
+  }
+}
diff --git a/final/lib/Target/X86/X86AsmPrinter.cpp b/final/lib/Target/X86/X86AsmPrinter.cpp
new file mode 100644
index 00000000000..99b4479a9fc
--- /dev/null
+++ b/final/lib/Target/X86/X86AsmPrinter.cpp
@@ -0,0 +1,728 @@
+//===-- X86AsmPrinter.cpp - Convert X86 LLVM code to AT&T assembly --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to X86 machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86AsmPrinter.h"
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86IntelInstPrinter.h"
+#include "X86MCInstLower.h"
+#include "X86.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Primitive Helper Functions.
+//===----------------------------------------------------------------------===//
+
+/// runOnMachineFunction - Emit the function body.
+///
+bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+  SetupMachineFunction(MF);
+
+  if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
+    bool Intrn = MF.getFunction()->hasInternalLinkage();
+    OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+    OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
+                                              : COFF::IMAGE_SYM_CLASS_EXTERNAL);
+    OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+                                               << COFF::SCT_COMPLEX_TYPE_SHIFT);
+    OutStreamer.EndCOFFSymbolDef();
+  }
+
+  // Have common code print out the function header with linkage info etc.
+  EmitFunctionHeader();
+
+  // Emit the rest of the function body.
+  EmitFunctionBody();
+
+  // We didn't modify anything.
+  return false;
+}
+
+/// printSymbolOperand - Print a raw symbol reference operand.  This handles
+/// jump tables, constant pools, global address and external symbols, all of
+/// which print to a label with various suffixes for relocation types etc.
+void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
+                                       raw_ostream &O) {
+  switch (MO.getType()) {
+  default: llvm_unreachable("unknown symbol type!");
+  case MachineOperand::MO_JumpTableIndex:
+    O << *GetJTISymbol(MO.getIndex());
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << *GetCPISymbol(MO.getIndex());
+    printOffset(MO.getOffset(), O);
+    break;
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+
+    MCSymbol *GVSym;
+    if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB)
+      GVSym = GetSymbolWithGlobalValueBase(GV, "$stub");
+    else if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+             MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
+             MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
+      GVSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+    else
+      GVSym = Mang->getSymbol(GV);
+
+    // Handle dllimport linkage.
+    if (MO.getTargetFlags() == X86II::MO_DLLIMPORT)
+      GVSym = OutContext.GetOrCreateSymbol(Twine("__imp_") + GVSym->getName());
+
+    if (MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+        MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE) {
+      MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+      MachineModuleInfoImpl::StubValueTy &StubSym =
+        MMI->getObjFileInfo<MachineModuleInfoMachO>().getGVStubEntry(Sym);
+      if (StubSym.getPointer() == 0)
+        StubSym = MachineModuleInfoImpl::
+          StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+    } else if (MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE){
+      MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
+      MachineModuleInfoImpl::StubValueTy &StubSym =
+        MMI->getObjFileInfo<MachineModuleInfoMachO>().getHiddenGVStubEntry(Sym);
+      if (StubSym.getPointer() == 0)
+        StubSym = MachineModuleInfoImpl::
+          StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+    } else if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
+      MCSymbol *Sym = GetSymbolWithGlobalValueBase(GV, "$stub");
+      MachineModuleInfoImpl::StubValueTy &StubSym =
+        MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
+      if (StubSym.getPointer() == 0)
+        StubSym = MachineModuleInfoImpl::
+          StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+    }
+
+    // If the name begins with a dollar-sign, enclose it in parens.  We do this
+    // to avoid having it look like an integer immediate to the assembler.
+    if (GVSym->getName()[0] != '$')
+      O << *GVSym;
+    else
+      O << '(' << *GVSym << ')';
+    printOffset(MO.getOffset(), O);
+    break;
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    const MCSymbol *SymToPrint;
+    if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB) {
+      SmallString<128> TempNameStr;
+      TempNameStr += StringRef(MO.getSymbolName());
+      TempNameStr += StringRef("$stub");
+
+      MCSymbol *Sym = GetExternalSymbolSymbol(TempNameStr.str());
+      MachineModuleInfoImpl::StubValueTy &StubSym =
+        MMI->getObjFileInfo<MachineModuleInfoMachO>().getFnStubEntry(Sym);
+      if (StubSym.getPointer() == 0) {
+        TempNameStr.erase(TempNameStr.end()-5, TempNameStr.end());
+        StubSym = MachineModuleInfoImpl::
+          StubValueTy(OutContext.GetOrCreateSymbol(TempNameStr.str()),
+                      true);
+      }
+      SymToPrint = StubSym.getPointer();
+    } else {
+      SymToPrint = GetExternalSymbolSymbol(MO.getSymbolName());
+    }
+
+    // If the name begins with a dollar-sign, enclose it in parens.  We do this
+    // to avoid having it look like an integer immediate to the assembler.
+    if (SymToPrint->getName()[0] != '$')
+      O << *SymToPrint;
+    else
+      O << '(' << *SymToPrint << '(';
+    break;
+  }
+  }
+
+  switch (MO.getTargetFlags()) {
+  default:
+    llvm_unreachable("Unknown target flag on GV operand");
+  case X86II::MO_NO_FLAG:    // No flag.
+    break;
+  case X86II::MO_DARWIN_NONLAZY:
+  case X86II::MO_DLLIMPORT:
+  case X86II::MO_DARWIN_STUB:
+    // These affect the name of the symbol, not any suffix.
+    break;
+  case X86II::MO_GOT_ABSOLUTE_ADDRESS:
+    O << " + [.-" << *MF->getPICBaseSymbol() << ']';
+    break;
+  case X86II::MO_PIC_BASE_OFFSET:
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+    O << '-' << *MF->getPICBaseSymbol();
+    break;
+  case X86II::MO_TLSGD:     O << "@TLSGD";     break;
+  case X86II::MO_GOTTPOFF:  O << "@GOTTPOFF";  break;
+  case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
+  case X86II::MO_TPOFF:     O << "@TPOFF";     break;
+  case X86II::MO_NTPOFF:    O << "@NTPOFF";    break;
+  case X86II::MO_GOTPCREL:  O << "@GOTPCREL";  break;
+  case X86II::MO_GOT:       O << "@GOT";       break;
+  case X86II::MO_GOTOFF:    O << "@GOTOFF";    break;
+  case X86II::MO_PLT:       O << "@PLT";       break;
+  case X86II::MO_TLVP:      O << "@TLVP";      break;
+  case X86II::MO_TLVP_PIC_BASE:
+    O << "@TLVP" << '-' << *MF->getPICBaseSymbol();
+    break;
+  }
+}
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.  These print slightly differently, for
+/// example, a $ is not emitted.
+void X86AsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo,
+                                    raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  switch (MO.getType()) {
+  default: llvm_unreachable("Unknown pcrel immediate operand");
+  case MachineOperand::MO_Register:
+    // pc-relativeness was handled when computing the value in the reg.
+    printOperand(MI, OpNo, O);
+    return;
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    return;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    return;
+  case MachineOperand::MO_GlobalAddress:
+  case MachineOperand::MO_ExternalSymbol:
+    printSymbolOperand(MO, O);
+    return;
+  }
+}
+
+
+void X86AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+                                 raw_ostream &O, const char *Modifier) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+  switch (MO.getType()) {
+  default: llvm_unreachable("unknown operand type!");
+  case MachineOperand::MO_Register: {
+    O << '%';
+    unsigned Reg = MO.getReg();
+    if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+      EVT VT = (strcmp(Modifier+6,"64") == 0) ?
+        MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
+                    ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
+      Reg = getX86SubSuperRegister(Reg, VT);
+    }
+    O << X86ATTInstPrinter::getRegisterName(Reg);
+    return;
+  }
+
+  case MachineOperand::MO_Immediate:
+    O << '$' << MO.getImm();
+    return;
+
+  case MachineOperand::MO_JumpTableIndex:
+  case MachineOperand::MO_ConstantPoolIndex:
+  case MachineOperand::MO_GlobalAddress:
+  case MachineOperand::MO_ExternalSymbol: {
+    O << '$';
+    printSymbolOperand(MO, O);
+    break;
+  }
+  }
+}
+
+void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op,
+                               raw_ostream &O) {
+  unsigned char value = MI->getOperand(Op).getImm();
+  assert(value <= 7 && "Invalid ssecc argument!");
+  switch (value) {
+  case 0: O << "eq"; break;
+  case 1: O << "lt"; break;
+  case 2: O << "le"; break;
+  case 3: O << "unord"; break;
+  case 4: O << "neq"; break;
+  case 5: O << "nlt"; break;
+  case 6: O << "nle"; break;
+  case 7: O << "ord"; break;
+  }
+}
+
+void X86AsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
+                                         raw_ostream &O, const char *Modifier) {
+  const MachineOperand &BaseReg  = MI->getOperand(Op);
+  const MachineOperand &IndexReg = MI->getOperand(Op+2);
+  const MachineOperand &DispSpec = MI->getOperand(Op+3);
+
+  // If we really don't want to print out (rip), don't.
+  bool HasBaseReg = BaseReg.getReg() != 0;
+  if (HasBaseReg && Modifier && !strcmp(Modifier, "no-rip") &&
+      BaseReg.getReg() == X86::RIP)
+    HasBaseReg = false;
+
+  // HasParenPart - True if we will print out the () part of the mem ref.
+  bool HasParenPart = IndexReg.getReg() || HasBaseReg;
+
+  if (DispSpec.isImm()) {
+    int DispVal = DispSpec.getImm();
+    if (DispVal || !HasParenPart)
+      O << DispVal;
+  } else {
+    assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
+           DispSpec.isJTI() || DispSpec.isSymbol());
+    printSymbolOperand(MI->getOperand(Op+3), O);
+  }
+
+  if (Modifier && strcmp(Modifier, "H") == 0)
+    O << "+8";
+
+  if (HasParenPart) {
+    assert(IndexReg.getReg() != X86::ESP &&
+           "X86 doesn't allow scaling by ESP");
+
+    O << '(';
+    if (HasBaseReg)
+      printOperand(MI, Op, O, Modifier);
+
+    if (IndexReg.getReg()) {
+      O << ',';
+      printOperand(MI, Op+2, O, Modifier);
+      unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+      if (ScaleVal != 1)
+        O << ',' << ScaleVal;
+    }
+    O << ')';
+  }
+}
+
+void X86AsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
+                                      raw_ostream &O, const char *Modifier) {
+  assert(isMem(MI, Op) && "Invalid memory reference!");
+  const MachineOperand &Segment = MI->getOperand(Op+4);
+  if (Segment.getReg()) {
+    printOperand(MI, Op+4, O, Modifier);
+    O << ':';
+  }
+  printLeaMemReference(MI, Op, O, Modifier);
+}
+
+void X86AsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op,
+                                  raw_ostream &O) {
+  O << *MF->getPICBaseSymbol() << '\n';
+  O << *MF->getPICBaseSymbol() << ':';
+}
+
+bool X86AsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode,
+                                      raw_ostream &O) {
+  unsigned Reg = MO.getReg();
+  switch (Mode) {
+  default: return true;  // Unknown mode.
+  case 'b': // Print QImode register
+    Reg = getX86SubSuperRegister(Reg, MVT::i8);
+    break;
+  case 'h': // Print QImode high register
+    Reg = getX86SubSuperRegister(Reg, MVT::i8, true);
+    break;
+  case 'w': // Print HImode register
+    Reg = getX86SubSuperRegister(Reg, MVT::i16);
+    break;
+  case 'k': // Print SImode register
+    Reg = getX86SubSuperRegister(Reg, MVT::i32);
+    break;
+  case 'q': // Print DImode register
+    Reg = getX86SubSuperRegister(Reg, MVT::i64);
+    break;
+  }
+
+  O << '%' << X86ATTInstPrinter::getRegisterName(Reg);
+  return false;
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                    unsigned AsmVariant,
+                                    const char *ExtraCode, raw_ostream &O) {
+  // Does this asm operand have a single letter operand modifier?
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    const MachineOperand &MO = MI->getOperand(OpNo);
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'a': // This is an address.  Currently only 'i' and 'r' are expected.
+      if (MO.isImm()) {
+        O << MO.getImm();
+        return false;
+      }
+      if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) {
+        printSymbolOperand(MO, O);
+        if (Subtarget->isPICStyleRIPRel())
+          O << "(%rip)";
+        return false;
+      }
+      if (MO.isReg()) {
+        O << '(';
+        printOperand(MI, OpNo, O);
+        O << ')';
+        return false;
+      }
+      return true;
+
+    case 'c': // Don't print "$" before a global var name or constant.
+      if (MO.isImm())
+        O << MO.getImm();
+      else if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol())
+        printSymbolOperand(MO, O);
+      else
+        printOperand(MI, OpNo, O);
+      return false;
+
+    case 'A': // Print '*' before a register (it must be a register)
+      if (MO.isReg()) {
+        O << '*';
+        printOperand(MI, OpNo, O);
+        return false;
+      }
+      return true;
+
+    case 'b': // Print QImode register
+    case 'h': // Print QImode high register
+    case 'w': // Print HImode register
+    case 'k': // Print SImode register
+    case 'q': // Print DImode register
+      if (MO.isReg())
+        return printAsmMRegister(MO, ExtraCode[0], O);
+      printOperand(MI, OpNo, O);
+      return false;
+
+    case 'P': // This is the operand of a call, treat specially.
+      print_pcrel_imm(MI, OpNo, O);
+      return false;
+
+    case 'n':  // Negate the immediate or print a '-' before the operand.
+      // Note: this is a temporary solution. It should be handled target
+      // independently as part of the 'MC' work.
+      if (MO.isImm()) {
+        O << -MO.getImm();
+        return false;
+      }
+      O << '-';
+    }
+  }
+
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+bool X86AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                          unsigned OpNo, unsigned AsmVariant,
+                                          const char *ExtraCode,
+                                          raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0) return true; // Unknown modifier.
+
+    switch (ExtraCode[0]) {
+    default: return true;  // Unknown modifier.
+    case 'b': // Print QImode register
+    case 'h': // Print QImode high register
+    case 'w': // Print HImode register
+    case 'k': // Print SImode register
+    case 'q': // Print SImode register
+      // These only apply to registers, ignore on mem.
+      break;
+    case 'H':
+      printMemReference(MI, OpNo, O, "H");
+      return false;
+    case 'P': // Don't print @PLT, but do print as memory.
+      printMemReference(MI, OpNo, O, "no-rip");
+      return false;
+    }
+  }
+  printMemReference(MI, OpNo, O);
+  return false;
+}
+
+void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
+  if (Subtarget->isTargetEnvMacho())
+    OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
+}
+
+
+void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
+  if (Subtarget->isTargetEnvMacho()) {
+    // All darwin targets use mach-o.
+    MachineModuleInfoMachO &MMIMacho =
+      MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+    // Output stubs for dynamically-linked functions.
+    MachineModuleInfoMachO::SymbolListTy Stubs;
+
+    Stubs = MMIMacho.GetFnStubList();
+    if (!Stubs.empty()) {
+      const MCSection *TheSection =
+        OutContext.getMachOSection("__IMPORT", "__jump_table",
+                                   MCSectionMachO::S_SYMBOL_STUBS |
+                                   MCSectionMachO::S_ATTR_SELF_MODIFYING_CODE |
+                                   MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+                                   5, SectionKind::getMetadata());
+      OutStreamer.SwitchSection(TheSection);
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        // L_foo$stub:
+        OutStreamer.EmitLabel(Stubs[i].first);
+        //   .indirect_symbol _foo
+        OutStreamer.EmitSymbolAttribute(Stubs[i].second.getPointer(),
+                                        MCSA_IndirectSymbol);
+        // hlt; hlt; hlt; hlt; hlt     hlt = 0xf4 = -12.
+        const char HltInsts[] = { -12, -12, -12, -12, -12 };
+        OutStreamer.EmitBytes(StringRef(HltInsts, 5), 0/*addrspace*/);
+      }
+
+      Stubs.clear();
+      OutStreamer.AddBlankLine();
+    }
+
+    // Output stubs for external and common global variables.
+    Stubs = MMIMacho.GetGVStubList();
+    if (!Stubs.empty()) {
+      const MCSection *TheSection =
+        OutContext.getMachOSection("__IMPORT", "__pointers",
+                                   MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
+                                   SectionKind::getMetadata());
+      OutStreamer.SwitchSection(TheSection);
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        // L_foo$non_lazy_ptr:
+        OutStreamer.EmitLabel(Stubs[i].first);
+        // .indirect_symbol _foo
+        MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+        OutStreamer.EmitSymbolAttribute(MCSym.getPointer(),
+                                        MCSA_IndirectSymbol);
+        // .long 0
+        if (MCSym.getInt())
+          // External to current translation unit.
+          OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+        else
+          // Internal to current translation unit.
+          //
+          // When we place the LSDA into the TEXT section, the type info
+          // pointers need to be indirect and pc-rel. We accomplish this by
+          // using NLPs.  However, sometimes the types are local to the file. So
+          // we need to fill in the value for the NLP in those cases.
+          OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
+                                                        OutContext),
+                                4/*size*/, 0/*addrspace*/);
+      }
+      Stubs.clear();
+      OutStreamer.AddBlankLine();
+    }
+
+    Stubs = MMIMacho.GetHiddenGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer.SwitchSection(getObjFileLowering().getDataSection());
+      EmitAlignment(2);
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        // L_foo$non_lazy_ptr:
+        OutStreamer.EmitLabel(Stubs[i].first);
+        // .long _foo
+        OutStreamer.EmitValue(MCSymbolRefExpr::
+                              Create(Stubs[i].second.getPointer(),
+                                     OutContext),
+                              4/*size*/, 0/*addrspace*/);
+      }
+      Stubs.clear();
+      OutStreamer.AddBlankLine();
+    }
+
+    // Funny Darwin hack: This flag tells the linker that no global symbols
+    // contain code that falls through to other global symbols (e.g. the obvious
+    // implementation of multiple entry points).  If this doesn't occur, the
+    // linker can safely perform dead code stripping.  Since LLVM never
+    // generates code that does this, it is always safe to set.
+    OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+  }
+
+  if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing() &&
+      MMI->callsExternalVAFunctionWithFloatingPointArguments()) {
+    StringRef SymbolName = Subtarget->is64Bit() ? "_fltused" : "__fltused";
+    MCSymbol *S = MMI->getContext().GetOrCreateSymbol(SymbolName);
+    OutStreamer.EmitSymbolAttribute(S, MCSA_Global);
+  }
+
+  if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho()) {
+    X86COFFMachineModuleInfo &COFFMMI =
+      MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
+
+    // Emit type information for external functions
+    typedef X86COFFMachineModuleInfo::externals_iterator externals_iterator;
+    for (externals_iterator I = COFFMMI.externals_begin(),
+                            E = COFFMMI.externals_end();
+                            I != E; ++I) {
+      OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+      OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL);
+      OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+                                               << COFF::SCT_COMPLEX_TYPE_SHIFT);
+      OutStreamer.EndCOFFSymbolDef();
+    }
+
+    // Necessary for dllexport support
+    std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
+
+    const TargetLoweringObjectFileCOFF &TLOFCOFF =
+      static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
+
+    for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
+      if (I->hasDLLExportLinkage())
+        DLLExportedFns.push_back(Mang->getSymbol(I));
+
+    for (Module::const_global_iterator I = M.global_begin(),
+           E = M.global_end(); I != E; ++I)
+      if (I->hasDLLExportLinkage())
+        DLLExportedGlobals.push_back(Mang->getSymbol(I));
+
+    // Output linker support code for dllexported globals on windows.
+    if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
+      OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection());
+      SmallString<128> name;
+      for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) {
+        if (Subtarget->isTargetWindows())
+          name = " /EXPORT:";
+        else
+          name = " -export:";
+        name += DLLExportedGlobals[i]->getName();
+        if (Subtarget->isTargetWindows())
+          name += ",DATA";
+        else
+        name += ",data";
+        OutStreamer.EmitBytes(name, 0);
+      }
+
+      for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) {
+        if (Subtarget->isTargetWindows())
+          name = " /EXPORT:";
+        else
+          name = " -export:";
+        name += DLLExportedFns[i]->getName();
+        OutStreamer.EmitBytes(name, 0);
+      }
+    }
+  }
+
+  if (Subtarget->isTargetELF()) {
+    const TargetLoweringObjectFileELF &TLOFELF =
+      static_cast<const TargetLoweringObjectFileELF &>(getObjFileLowering());
+
+    MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
+
+    // Output stubs for external and common global variables.
+    MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList();
+    if (!Stubs.empty()) {
+      OutStreamer.SwitchSection(TLOFELF.getDataRelSection());
+      const TargetData *TD = TM.getTargetData();
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        OutStreamer.EmitLabel(Stubs[i].first);
+        OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(),
+                                    TD->getPointerSize(), 0);
+      }
+      Stubs.clear();
+    }
+  }
+}
+
+MachineLocation
+X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+  MachineLocation Location;
+  assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
+  // Frame address.  Currently handles register +- offset only.
+
+  if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
+    Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
+  else {
+    DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
+  }
+  return Location;
+}
+
+void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+                                           raw_ostream &O) {
+  // Only the target-dependent form of DBG_VALUE should get here.
+  // Referencing the offset and metadata as NOps-2 and NOps-1 is
+  // probably portable to other targets; frame pointer location is not.
+  unsigned NOps = MI->getNumOperands();
+  assert(NOps==7);
+  O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
+  // cast away const; DIetc do not take const operands for some reason.
+  DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
+  if (V.getContext().isSubprogram())
+    O << DISubprogram(V.getContext()).getDisplayName() << ":";
+  O << V.getName();
+  O << " <- ";
+  // Frame address.  Currently handles register +- offset only.
+  O << '[';
+  if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
+    printOperand(MI, 0, O);
+  else
+    O << "undef";
+  O << '+'; printOperand(MI, 3, O);
+  O << ']';
+  O << "+";
+  printOperand(MI, NOps-2, O);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Target Registry Stuff
+//===----------------------------------------------------------------------===//
+
+static MCInstPrinter *createX86MCInstPrinter(const Target &T,
+                                             unsigned SyntaxVariant,
+                                             const MCAsmInfo &MAI) {
+  if (SyntaxVariant == 0)
+    return new X86ATTInstPrinter(MAI);
+  if (SyntaxVariant == 1)
+    return new X86IntelInstPrinter(MAI);
+  return 0;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86AsmPrinter() {
+  RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target);
+  RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target);
+
+  TargetRegistry::RegisterMCInstPrinter(TheX86_32Target,createX86MCInstPrinter);
+  TargetRegistry::RegisterMCInstPrinter(TheX86_64Target,createX86MCInstPrinter);
+}
diff --git a/final/lib/Target/X86/X86AsmPrinter.h b/final/lib/Target/X86/X86AsmPrinter.h
new file mode 100644
index 00000000000..3a50435d38b
--- /dev/null
+++ b/final/lib/Target/X86/X86AsmPrinter.h
@@ -0,0 +1,87 @@
+//===-- X86AsmPrinter.h - Convert X86 LLVM code to assembly -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// AT&T assembly code printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ASMPRINTER_H
+#define X86ASMPRINTER_H
+
+#include "X86.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class MachineJumpTableInfo;
+class MCContext;
+class MCInst;
+class MCStreamer;
+class MCSymbol;
+
+class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
+  const X86Subtarget *Subtarget;
+ public:
+  explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer) {
+    Subtarget = &TM.getSubtarget<X86Subtarget>();
+  }
+
+  virtual const char *getPassName() const {
+    return "X86 AT&T-Style Assembly Printer";
+  }
+  
+  const X86Subtarget &getSubtarget() const { return *Subtarget; }
+
+  virtual void EmitStartOfAsmFile(Module &M);
+
+  virtual void EmitEndOfAsmFile(Module &M);
+  
+  virtual void EmitInstruction(const MachineInstr *MI);
+  
+  void printSymbolOperand(const MachineOperand &MO, raw_ostream &O);
+
+  // These methods are used by the tablegen'erated instruction printer.
+  void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O,
+                    const char *Modifier = 0);
+  void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo, raw_ostream &O);
+
+  bool printAsmMRegister(const MachineOperand &MO, char Mode, raw_ostream &O);
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       unsigned AsmVariant, const char *ExtraCode,
+                       raw_ostream &OS);
+  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+                             unsigned AsmVariant, const char *ExtraCode,
+                             raw_ostream &OS);
+
+  void printMachineInstruction(const MachineInstr *MI);
+  void printSSECC(const MachineInstr *MI, unsigned Op, raw_ostream &O);
+  void printMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O,
+                         const char *Modifier=NULL);
+  void printLeaMemReference(const MachineInstr *MI, unsigned Op, raw_ostream &O,
+                            const char *Modifier=NULL);
+
+  void printPICLabel(const MachineInstr *MI, unsigned Op, raw_ostream &O);
+
+  bool runOnMachineFunction(MachineFunction &F);
+  
+  void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+
+  MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/X86/X86COFFMachineModuleInfo.cpp b/final/lib/Target/X86/X86COFFMachineModuleInfo.cpp
new file mode 100644
index 00000000000..4326814a7a9
--- /dev/null
+++ b/final/lib/Target/X86/X86COFFMachineModuleInfo.cpp
@@ -0,0 +1,20 @@
+//===-- llvm/CodeGen/X86COFFMachineModuleInfo.cpp -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an MMI implementation for X86 COFF (windows) targets.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86COFFMachineModuleInfo.h"
+using namespace llvm;
+
+
+X86COFFMachineModuleInfo::~X86COFFMachineModuleInfo() {
+}
+
diff --git a/final/lib/Target/X86/X86COFFMachineModuleInfo.h b/final/lib/Target/X86/X86COFFMachineModuleInfo.h
new file mode 100644
index 00000000000..98ab2a66a17
--- /dev/null
+++ b/final/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -0,0 +1,46 @@
+//===-- llvm/CodeGen/X86COFFMachineModuleInfo.h -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an MMI implementation for X86 COFF (windows) targets.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86COFF_MACHINEMODULEINFO_H
+#define X86COFF_MACHINEMODULEINFO_H
+
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/ADT/DenseSet.h"
+#include "X86MachineFunctionInfo.h"
+
+namespace llvm {
+  class X86MachineFunctionInfo;
+  class TargetData;
+
+/// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation
+/// for X86 COFF targets.
+class X86COFFMachineModuleInfo : public MachineModuleInfoImpl {
+  DenseSet<MCSymbol const *> Externals;
+public:
+  X86COFFMachineModuleInfo(const MachineModuleInfo &) {}
+  virtual ~X86COFFMachineModuleInfo();
+
+  void addExternalFunction(MCSymbol* Symbol) {
+    Externals.insert(Symbol);
+  }
+    
+  typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator;
+  externals_iterator externals_begin() const { return Externals.begin(); }
+  externals_iterator externals_end() const { return Externals.end(); }
+};
+
+
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/X86/X86CallingConv.td b/final/lib/Target/X86/X86CallingConv.td
new file mode 100644
index 00000000000..56351756e8d
--- /dev/null
+++ b/final/lib/Target/X86/X86CallingConv.td
@@ -0,0 +1,420 @@
+//===- X86CallingConv.td - Calling Conventions X86 32/64 ---*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the X86-32 and X86-64
+// architectures.
+//
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getTarget().getSubtarget<X86Subtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// Return-value conventions common to all X86 CC's.
+def RetCC_X86Common : CallingConv<[
+  // Scalar values are returned in AX first, then DX.  For i8, the ABI
+  // requires the values to be in AL and AH, however this code uses AL and DL
+  // instead. This is because using AH for the second register conflicts with
+  // the way LLVM does multiple return values -- a return of {i16,i8} would end
+  // up in AX and AH, which overlap. Front-ends wishing to conform to the ABI
+  // for functions that return two i8 values are currently expected to pack the
+  // values into an i16 (which uses AX, and thus AL:AH).
+  CCIfType<[i8] , CCAssignToReg<[AL, DL]>>,
+  CCIfType<[i16], CCAssignToReg<[AX, DX]>>,
+  CCIfType<[i32], CCAssignToReg<[EAX, EDX]>>,
+  CCIfType<[i64], CCAssignToReg<[RAX, RDX]>>,
+
+  // Vector types are returned in XMM0 and XMM1, when they fit.  XMM2 and XMM3
+  // can only be used by ABI non-compliant code. If the target doesn't have XMM
+  // registers, it won't have vector types.
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+            CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
+
+  // 256-bit vectors are returned in YMM0 and XMM1, when they fit. YMM2 and YMM3
+  // can only be used by ABI non-compliant code. This vector type is only
+  // supported while using the AVX target feature.
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+            CCIfSubtarget<"hasAVX()", CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>>,
+
+  // MMX vector types are always returned in MM0. If the target doesn't have
+  // MM0, it doesn't support these vector types.
+  CCIfType<[x86mmx, v1i64], CCAssignToReg<[MM0]>>,
+
+  // Long double types are always returned in ST0 (even with SSE).
+  CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
+]>;
+
+// X86-32 C return-value convention.
+def RetCC_X86_32_C : CallingConv<[
+  // The X86-32 calling convention returns FP values in ST0, unless marked
+  // with "inreg" (used here to distinguish one kind of reg from another,
+  // weirdly; this is really the sse-regparm calling convention) in which
+  // case they use XMM0, otherwise it is the same as the common X86 calling
+  // conv.
+  CCIfInReg<CCIfSubtarget<"hasXMMInt()",
+    CCIfType<[f32, f64], CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
+  CCIfType<[f32,f64], CCAssignToReg<[ST0, ST1]>>,
+  CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-32 FastCC return-value convention.
+def RetCC_X86_32_Fast : CallingConv<[
+  // The X86-32 fastcc returns 1, 2, or 3 FP values in XMM0-2 if the target has
+  // SSE2.
+  // This can happen when a float, 2 x float, or 3 x float vector is split by
+  // target lowering, and is returned in 1-3 sse regs.
+  CCIfType<[f32], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+  CCIfType<[f64], CCIfSubtarget<"hasXMMInt()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+
+  // For integers, ECX can be used as an extra return register
+  CCIfType<[i8],  CCAssignToReg<[AL, DL, CL]>>,
+  CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>,
+  CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>,
+
+  // Otherwise, it is the same as the common X86 calling convention.
+  CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-64 C return-value convention.
+def RetCC_X86_64_C : CallingConv<[
+  // The X86-64 calling convention always returns FP values in XMM0.
+  CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
+  CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
+
+  // MMX vector types are always returned in XMM0 except for v1i64 which is
+  // returned in RAX. This disagrees with ABI documentation but is bug
+  // compatible with gcc.
+  CCIfType<[v1i64], CCAssignToReg<[RAX]>>,
+  CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
+  CCDelegateTo<RetCC_X86Common>
+]>;
+
+// X86-Win64 C return-value convention.
+def RetCC_X86_Win64_C : CallingConv<[
+  // The X86-Win64 calling convention always returns __m64 values in RAX.
+  CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>,
+
+  // And FP in XMM0 only.
+  CCIfType<[f32], CCAssignToReg<[XMM0]>>,
+  CCIfType<[f64], CCAssignToReg<[XMM0]>>,
+
+  // Otherwise, everything is the same as 'normal' X86-64 C CC.
+  CCDelegateTo<RetCC_X86_64_C>
+]>;
+
+
+// This is the root return-value convention for the X86-32 backend.
+def RetCC_X86_32 : CallingConv<[
+  // If FastCC, use RetCC_X86_32_Fast.
+  CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
+  // Otherwise, use RetCC_X86_32_C.
+  CCDelegateTo<RetCC_X86_32_C>
+]>;
+
+// This is the root return-value convention for the X86-64 backend.
+def RetCC_X86_64 : CallingConv<[
+  // Mingw64 and native Win64 use Win64 CC
+  CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
+
+  // Otherwise, drop to normal X86-64 CC
+  CCDelegateTo<RetCC_X86_64_C>
+]>;
+
+// This is the return-value convention used for the entire X86 backend.
+def RetCC_X86 : CallingConv<[
+  CCIfSubtarget<"is64Bit()", CCDelegateTo<RetCC_X86_64>>,
+  CCDelegateTo<RetCC_X86_32>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86-64 Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+def CC_X86_64_C : CallingConv<[
+  // Handles byval parameters.
+  CCIfByVal<CCPassByVal<8, 8>>,
+
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // The 'nest' parameter, if any, is passed in R10.
+  CCIfNest<CCAssignToReg<[R10]>>,
+
+  // The first 6 v1i64 vector arguments are passed in GPRs on Darwin.
+  CCIfType<[v1i64],
+            CCIfSubtarget<"isTargetDarwin()",
+            CCBitConvertToType<i64>>>,
+
+  // The first 6 integer arguments are passed in integer registers.
+  CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
+  CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
+
+  // The first 8 MMX (except for v1i64) vector arguments are passed in XMM
+  // registers on Darwin.
+  CCIfType<[x86mmx],
+            CCIfSubtarget<"isTargetDarwin()",
+            CCIfSubtarget<"hasXMMInt()",
+            CCPromoteToType<v2i64>>>>,
+
+  // The first 8 FP/Vector arguments are passed in XMM registers.
+  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+            CCIfSubtarget<"hasXMM()",
+            CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
+
+  // The first 8 256-bit vector arguments are passed in YMM registers.
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+            CCIfSubtarget<"hasAVX()",
+            CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]>>>,
+
+  // Integer/FP values get stored in stack slots that are 8 bytes in size and
+  // 8-byte aligned if there are no more registers to hold them.
+  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+  // Long doubles get stack slots whose size and alignment depends on the
+  // subtarget.
+  CCIfType<[f80], CCAssignToStack<0, 0>>,
+
+  // Vectors get 16-byte stack slots that are 16-byte aligned.
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+  // 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+           CCAssignToStack<32, 32>>,
+
+  // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
+  CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>>
+]>;
+
+// Calling convention used on Win64
+def CC_X86_Win64_C : CallingConv<[
+  // FIXME: Handle byval stuff.
+  // FIXME: Handle varargs.
+
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // The 'nest' parameter, if any, is passed in R10.
+  CCIfNest<CCAssignToReg<[R10]>>,
+
+  // 128 bit vectors are passed by pointer
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
+
+  // The first 4 MMX vector arguments are passed in GPRs.
+  CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>,
+
+  // The first 4 integer arguments are passed in integer registers.
+  CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
+                                          [XMM0, XMM1, XMM2, XMM3]>>,
+  
+  // Do not pass the sret argument in RCX, the Win64 thiscall calling
+  // convention requires "this" to be passed in RCX.                                        
+  CCIfCC<"CallingConv::X86_ThisCall", 
+    CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[RDX , R8  , R9  ],
+                                                     [XMM1, XMM2, XMM3]>>>>,
+
+  CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8  , R9  ],
+                                          [XMM0, XMM1, XMM2, XMM3]>>,
+
+  // The first 4 FP/Vector arguments are passed in XMM registers.
+  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+           CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3],
+                                   [RCX , RDX , R8  , R9  ]>>,
+
+  // Integer/FP values get stored in stack slots that are 8 bytes in size and
+  // 8-byte aligned if there are no more registers to hold them.
+  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
+
+  // Long doubles get stack slots whose size and alignment depends on the
+  // subtarget.
+  CCIfType<[f80], CCAssignToStack<0, 0>>,
+
+  // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
+  CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>>
+]>;
+
+def CC_X86_64_GHC : CallingConv<[
+  // Promote i8/i16/i32 arguments to i64.
+  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
+
+  // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
+  CCIfType<[i64],
+            CCAssignToReg<[R13, RBP, R12, RBX, R14, RSI, RDI, R8, R9, R15]>>,
+
+  // Pass in STG registers: F1, F2, F3, F4, D1, D2
+  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+            CCIfSubtarget<"hasXMM()",
+            CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86 C Calling Convention
+//===----------------------------------------------------------------------===//
+
+/// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
+/// values are spilled on the stack, and the first 4 vector values go in XMM
+/// regs.
+def CC_X86_32_Common : CallingConv<[
+  // Handles byval parameters.
+  CCIfByVal<CCPassByVal<4, 4>>,
+
+  // The first 3 float or double arguments, if marked 'inreg' and if the call
+  // is not a vararg call and if SSE2 is available, are passed in SSE registers.
+  CCIfNotVarArg<CCIfInReg<CCIfType<[f32,f64],
+                CCIfSubtarget<"hasXMMInt()",
+                CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
+
+  // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx
+  // registers if the call is not a vararg call.
+  CCIfNotVarArg<CCIfType<[x86mmx],
+                CCAssignToReg<[MM0, MM1, MM2]>>>,
+
+  // Integer/Float values get stored in stack slots that are 4 bytes in
+  // size and 4-byte aligned.
+  CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+  
+  // Doubles get 8-byte slots that are 4-byte aligned.
+  CCIfType<[f64], CCAssignToStack<8, 4>>,
+
+  // Long doubles get slots whose size depends on the subtarget.
+  CCIfType<[f80], CCAssignToStack<0, 4>>,
+
+  // The first 4 SSE vector arguments are passed in XMM registers.
+  CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
+
+  // The first 4 AVX 256-bit vector arguments are passed in YMM registers.
+  CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+                CCIfSubtarget<"hasAVX()",
+                CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
+
+  // Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
+
+  // 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
+  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
+           CCAssignToStack<32, 32>>,
+
+  // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
+  // passed in the parameter area.
+  CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 4>>]>;
+
+def CC_X86_32_C : CallingConv<[
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // The 'nest' parameter, if any, is passed in ECX.
+  CCIfNest<CCAssignToReg<[ECX]>>,
+
+  // The first 3 integer arguments, if marked 'inreg' and if the call is not
+  // a vararg call, are passed in integer registers.
+  CCIfNotVarArg<CCIfInReg<CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>>>,
+
+  // Otherwise, same as everything else.
+  CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_FastCall : CallingConv<[
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // The 'nest' parameter, if any, is passed in EAX.
+  CCIfNest<CCAssignToReg<[EAX]>>,
+
+  // The first 2 integer arguments are passed in ECX/EDX
+  CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
+
+  // Otherwise, same as everything else.
+  CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_ThisCall : CallingConv<[
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // The 'nest' parameter, if any, is passed in EAX.
+  CCIfNest<CCAssignToReg<[EAX]>>,
+
+  // The first integer argument is passed in ECX
+  CCIfType<[i32], CCAssignToReg<[ECX]>>,
+
+  // Otherwise, same as everything else.
+  CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_FastCC : CallingConv<[
+  // Handles byval parameters.  Note that we can't rely on the delegation
+  // to CC_X86_32_Common for this because that happens after code that
+  // puts arguments in registers.
+  CCIfByVal<CCPassByVal<4, 4>>,
+
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // The 'nest' parameter, if any, is passed in EAX.
+  CCIfNest<CCAssignToReg<[EAX]>>,
+
+  // The first 2 integer arguments are passed in ECX/EDX
+  CCIfType<[i32], CCAssignToReg<[ECX, EDX]>>,
+
+  // The first 3 float or double arguments, if the call is not a vararg
+  // call and if SSE2 is available, are passed in SSE registers.
+  CCIfNotVarArg<CCIfType<[f32,f64],
+                CCIfSubtarget<"hasXMMInt()",
+                CCAssignToReg<[XMM0,XMM1,XMM2]>>>>,
+
+  // Doubles get 8-byte slots that are 8-byte aligned.
+  CCIfType<[f64], CCAssignToStack<8, 8>>,
+
+  // Otherwise, same as everything else.
+  CCDelegateTo<CC_X86_32_Common>
+]>;
+
+def CC_X86_32_GHC : CallingConv<[
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // Pass in STG registers: Base, Sp, Hp, R1
+  CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// X86 Root Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+
+// This is the root argument convention for the X86-32 backend.
+def CC_X86_32 : CallingConv<[
+  CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
+  CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
+  CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
+  CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
+
+  // Otherwise, drop to normal X86-32 CC
+  CCDelegateTo<CC_X86_32_C>
+]>;
+
+// This is the root argument convention for the X86-64 backend.
+def CC_X86_64 : CallingConv<[
+  CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>,
+
+  // Mingw64 and native Win64 use Win64 CC
+  CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,
+
+  // Otherwise, drop to normal X86-64 CC
+  CCDelegateTo<CC_X86_64_C>
+]>;
+
+// This is the argument convention used for the entire X86 backend.
+def CC_X86 : CallingConv<[
+  CCIfSubtarget<"is64Bit()", CCDelegateTo<CC_X86_64>>,
+  CCDelegateTo<CC_X86_32>
+]>;
diff --git a/final/lib/Target/X86/X86CodeEmitter.cpp b/final/lib/Target/X86/X86CodeEmitter.cpp
new file mode 100644
index 00000000000..60d9d4ad064
--- /dev/null
+++ b/final/lib/Target/X86/X86CodeEmitter.cpp
@@ -0,0 +1,997 @@
+//===-- X86/X86CodeEmitter.cpp - Convert X86 code to machine code ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the pass that transforms the X86 machine instructions into
+// relocatable machine code.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-emitter"
+#include "X86InstrInfo.h"
+#include "X86JITInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "X86Relocations.h"
+#include "X86.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+STATISTIC(NumEmitted, "Number of machine instructions emitted");
+
+namespace {
+  template<class CodeEmitter>
+  class Emitter : public MachineFunctionPass {
+    const X86InstrInfo  *II;
+    const TargetData    *TD;
+    X86TargetMachine    &TM;
+    CodeEmitter         &MCE;
+    MachineModuleInfo   *MMI;
+    intptr_t PICBaseOffset;
+    bool Is64BitMode;
+    bool IsPIC;
+  public:
+    static char ID;
+    explicit Emitter(X86TargetMachine &tm, CodeEmitter &mce)
+      : MachineFunctionPass(ID), II(0), TD(0), TM(tm), 
+      MCE(mce), PICBaseOffset(0), Is64BitMode(false),
+      IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+    Emitter(X86TargetMachine &tm, CodeEmitter &mce,
+            const X86InstrInfo &ii, const TargetData &td, bool is64)
+      : MachineFunctionPass(ID), II(&ii), TD(&td), TM(tm), 
+      MCE(mce), PICBaseOffset(0), Is64BitMode(is64),
+      IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+
+    bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual const char *getPassName() const {
+      return "X86 Machine Code Emitter";
+    }
+
+    void emitInstruction(MachineInstr &MI, const TargetInstrDesc *Desc);
+    
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<MachineModuleInfo>();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
+    void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc,
+                           intptr_t Disp = 0, intptr_t PCAdj = 0,
+                           bool Indirect = false);
+    void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
+    void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
+                              intptr_t PCAdj = 0);
+    void emitJumpTableAddress(unsigned JTI, unsigned Reloc,
+                              intptr_t PCAdj = 0);
+
+    void emitDisplacementField(const MachineOperand *RelocOp, int DispVal,
+                               intptr_t Adj = 0, bool IsPCRel = true);
+
+    void emitRegModRMByte(unsigned ModRMReg, unsigned RegOpcodeField);
+    void emitRegModRMByte(unsigned RegOpcodeField);
+    void emitSIBByte(unsigned SS, unsigned Index, unsigned Base);
+    void emitConstant(uint64_t Val, unsigned Size);
+
+    void emitMemModRMByte(const MachineInstr &MI,
+                          unsigned Op, unsigned RegOpcodeField,
+                          intptr_t PCAdj = 0);
+
+    unsigned getX86RegNum(unsigned RegNo) const;
+  };
+
+template<class CodeEmitter>
+  char Emitter<CodeEmitter>::ID = 0;
+} // end anonymous namespace.
+
+/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
+/// to the specified templated MachineCodeEmitter object.
+FunctionPass *llvm::createX86JITCodeEmitterPass(X86TargetMachine &TM,
+                                                JITCodeEmitter &JCE) {
+  return new Emitter<JITCodeEmitter>(TM, JCE);
+}
+
+template<class CodeEmitter>
+bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
+  MMI = &getAnalysis<MachineModuleInfo>();
+  MCE.setModuleInfo(MMI);
+  
+  II = TM.getInstrInfo();
+  TD = TM.getTargetData();
+  Is64BitMode = TM.getSubtarget<X86Subtarget>().is64Bit();
+  IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+  
+  do {
+    DEBUG(dbgs() << "JITTing function '" 
+          << MF.getFunction()->getName() << "'\n");
+    MCE.startFunction(MF);
+    for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); 
+         MBB != E; ++MBB) {
+      MCE.StartMachineBasicBlock(MBB);
+      for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+           I != E; ++I) {
+        const TargetInstrDesc &Desc = I->getDesc();
+        emitInstruction(*I, &Desc);
+        // MOVPC32r is basically a call plus a pop instruction.
+        if (Desc.getOpcode() == X86::MOVPC32r)
+          emitInstruction(*I, &II->get(X86::POP32r));
+        ++NumEmitted;  // Keep track of the # of mi's emitted
+      }
+    }
+  } while (MCE.finishFunction(MF));
+
+  return false;
+}
+
+/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+static unsigned determineREX(const MachineInstr &MI) {
+  unsigned REX = 0;
+  const TargetInstrDesc &Desc = MI.getDesc();
+  
+  // Pseudo instructions do not need REX prefix byte.
+  if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
+    return 0;
+  if (Desc.TSFlags & X86II::REX_W)
+    REX |= 1 << 3;
+  
+  unsigned NumOps = Desc.getNumOperands();
+  if (NumOps) {
+    bool isTwoAddr = NumOps > 1 &&
+    Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
+    
+    // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+    unsigned i = isTwoAddr ? 1 : 0;
+    for (unsigned e = NumOps; i != e; ++i) {
+      const MachineOperand& MO = MI.getOperand(i);
+      if (MO.isReg()) {
+        unsigned Reg = MO.getReg();
+        if (X86InstrInfo::isX86_64NonExtLowByteReg(Reg))
+          REX |= 0x40;
+      }
+    }
+    
+    switch (Desc.TSFlags & X86II::FormMask) {
+      case X86II::MRMInitReg:
+        if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+          REX |= (1 << 0) | (1 << 2);
+        break;
+      case X86II::MRMSrcReg: {
+        if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+          REX |= 1 << 2;
+        i = isTwoAddr ? 2 : 1;
+        for (unsigned e = NumOps; i != e; ++i) {
+          const MachineOperand& MO = MI.getOperand(i);
+          if (X86InstrInfo::isX86_64ExtendedReg(MO))
+            REX |= 1 << 0;
+        }
+        break;
+      }
+      case X86II::MRMSrcMem: {
+        if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+          REX |= 1 << 2;
+        unsigned Bit = 0;
+        i = isTwoAddr ? 2 : 1;
+        for (; i != NumOps; ++i) {
+          const MachineOperand& MO = MI.getOperand(i);
+          if (MO.isReg()) {
+            if (X86InstrInfo::isX86_64ExtendedReg(MO))
+              REX |= 1 << Bit;
+            Bit++;
+          }
+        }
+        break;
+      }
+      case X86II::MRM0m: case X86II::MRM1m:
+      case X86II::MRM2m: case X86II::MRM3m:
+      case X86II::MRM4m: case X86II::MRM5m:
+      case X86II::MRM6m: case X86II::MRM7m:
+      case X86II::MRMDestMem: {
+        unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
+        i = isTwoAddr ? 1 : 0;
+        if (NumOps > e && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e)))
+          REX |= 1 << 2;
+        unsigned Bit = 0;
+        for (; i != e; ++i) {
+          const MachineOperand& MO = MI.getOperand(i);
+          if (MO.isReg()) {
+            if (X86InstrInfo::isX86_64ExtendedReg(MO))
+              REX |= 1 << Bit;
+            Bit++;
+          }
+        }
+        break;
+      }
+      default: {
+        if (X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0)))
+          REX |= 1 << 0;
+        i = isTwoAddr ? 2 : 1;
+        for (unsigned e = NumOps; i != e; ++i) {
+          const MachineOperand& MO = MI.getOperand(i);
+          if (X86InstrInfo::isX86_64ExtendedReg(MO))
+            REX |= 1 << 2;
+        }
+        break;
+      }
+    }
+  }
+  return REX;
+}
+
+
+/// emitPCRelativeBlockAddress - This method keeps track of the information
+/// necessary to resolve the address of this block later and emits a dummy
+/// value.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitPCRelativeBlockAddress(MachineBasicBlock *MBB) {
+  // Remember where this reference was and where it is to so we can
+  // deal with it later.
+  MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(),
+                                             X86::reloc_pcrel_word, MBB));
+  MCE.emitWordLE(0);
+}
+
+/// emitGlobalAddress - Emit the specified address to the code stream assuming
+/// this is part of a "take the address of a global" instruction.
+///
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitGlobalAddress(const GlobalValue *GV,
+                                unsigned Reloc,
+                                intptr_t Disp /* = 0 */,
+                                intptr_t PCAdj /* = 0 */,
+                                bool Indirect /* = false */) {
+  intptr_t RelocCST = Disp;
+  if (Reloc == X86::reloc_picrel_word)
+    RelocCST = PICBaseOffset;
+  else if (Reloc == X86::reloc_pcrel_word)
+    RelocCST = PCAdj;
+  MachineRelocation MR = Indirect
+    ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
+                                           const_cast<GlobalValue *>(GV),
+                                           RelocCST, false)
+    : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
+                               const_cast<GlobalValue *>(GV), RelocCST, false);
+  MCE.addRelocation(MR);
+  // The relocated value will be added to the displacement
+  if (Reloc == X86::reloc_absolute_dword)
+    MCE.emitDWordLE(Disp);
+  else
+    MCE.emitWordLE((int32_t)Disp);
+}
+
+/// emitExternalSymbolAddress - Arrange for the address of an external symbol to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitExternalSymbolAddress(const char *ES,
+                                                     unsigned Reloc) {
+  intptr_t RelocCST = (Reloc == X86::reloc_picrel_word) ? PICBaseOffset : 0;
+
+  // X86 never needs stubs because instruction selection will always pick
+  // an instruction sequence that is large enough to hold any address
+  // to a symbol.
+  // (see X86ISelLowering.cpp, near 2039: X86TargetLowering::LowerCall)
+  bool NeedStub = false;
+  MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(),
+                                                 Reloc, ES, RelocCST,
+                                                 0, NeedStub));
+  if (Reloc == X86::reloc_absolute_dword)
+    MCE.emitDWordLE(0);
+  else
+    MCE.emitWordLE(0);
+}
+
+/// emitConstPoolAddress - Arrange for the address of an constant pool
+/// to be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstPoolAddress(unsigned CPI, unsigned Reloc,
+                                   intptr_t Disp /* = 0 */,
+                                   intptr_t PCAdj /* = 0 */) {
+  intptr_t RelocCST = 0;
+  if (Reloc == X86::reloc_picrel_word)
+    RelocCST = PICBaseOffset;
+  else if (Reloc == X86::reloc_pcrel_word)
+    RelocCST = PCAdj;
+  MCE.addRelocation(MachineRelocation::getConstPool(MCE.getCurrentPCOffset(),
+                                                    Reloc, CPI, RelocCST));
+  // The relocated value will be added to the displacement
+  if (Reloc == X86::reloc_absolute_dword)
+    MCE.emitDWordLE(Disp);
+  else
+    MCE.emitWordLE((int32_t)Disp);
+}
+
+/// emitJumpTableAddress - Arrange for the address of a jump table to
+/// be emitted to the current location in the function, and allow it to be PC
+/// relative.
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitJumpTableAddress(unsigned JTI, unsigned Reloc,
+                                   intptr_t PCAdj /* = 0 */) {
+  intptr_t RelocCST = 0;
+  if (Reloc == X86::reloc_picrel_word)
+    RelocCST = PICBaseOffset;
+  else if (Reloc == X86::reloc_pcrel_word)
+    RelocCST = PCAdj;
+  MCE.addRelocation(MachineRelocation::getJumpTable(MCE.getCurrentPCOffset(),
+                                                    Reloc, JTI, RelocCST));
+  // The relocated value will be added to the displacement
+  if (Reloc == X86::reloc_absolute_dword)
+    MCE.emitDWordLE(0);
+  else
+    MCE.emitWordLE(0);
+}
+
+template<class CodeEmitter>
+unsigned Emitter<CodeEmitter>::getX86RegNum(unsigned RegNo) const {
+  return X86RegisterInfo::getX86RegNum(RegNo);
+}
+
+inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
+                                      unsigned RM) {
+  assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
+  return RM | (RegOpcode << 3) | (Mod << 6);
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitRegModRMByte(unsigned ModRMReg,
+                                            unsigned RegOpcodeFld){
+  MCE.emitByte(ModRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitRegModRMByte(unsigned RegOpcodeFld) {
+  MCE.emitByte(ModRMByte(3, RegOpcodeFld, 0));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitSIBByte(unsigned SS, 
+                                       unsigned Index,
+                                       unsigned Base) {
+  // SIB byte is in the same format as the ModRMByte...
+  MCE.emitByte(ModRMByte(SS, Index, Base));
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitConstant(uint64_t Val, unsigned Size) {
+  // Output the constant in little endian byte order...
+  for (unsigned i = 0; i != Size; ++i) {
+    MCE.emitByte(Val & 255);
+    Val >>= 8;
+  }
+}
+
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit 
+/// sign-extended field. 
+static bool isDisp8(int Value) {
+  return Value == (signed char)Value;
+}
+
+static bool gvNeedsNonLazyPtr(const MachineOperand &GVOp,
+                              const TargetMachine &TM) {
+  // For Darwin-64, simulate the linktime GOT by using the same non-lazy-pointer
+  // mechanism as 32-bit mode.
+  if (TM.getSubtarget<X86Subtarget>().is64Bit() && 
+      !TM.getSubtarget<X86Subtarget>().isTargetDarwin())
+    return false;
+  
+  // Return true if this is a reference to a stub containing the address of the
+  // global, not the global itself.
+  return isGlobalStubReference(GVOp.getTargetFlags());
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
+                                                 int DispVal,
+                                                 intptr_t Adj /* = 0 */,
+                                                 bool IsPCRel /* = true */) {
+  // If this is a simple integer displacement that doesn't require a relocation,
+  // emit it now.
+  if (!RelocOp) {
+    emitConstant(DispVal, 4);
+    return;
+  }
+
+  // Otherwise, this is something that requires a relocation.  Emit it as such
+  // now.
+  unsigned RelocType = Is64BitMode ?
+    (IsPCRel ? X86::reloc_pcrel_word : X86::reloc_absolute_word_sext)
+    : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+  if (RelocOp->isGlobal()) {
+    // In 64-bit static small code model, we could potentially emit absolute.
+    // But it's probably not beneficial. If the MCE supports using RIP directly
+    // do it, otherwise fallback to absolute (this is determined by IsPCRel). 
+    //  89 05 00 00 00 00     mov    %eax,0(%rip)  # PC-relative
+    //  89 04 25 00 00 00 00  mov    %eax,0x0      # Absolute
+    bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
+    emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
+                      Adj, Indirect);
+  } else if (RelocOp->isSymbol()) {
+    emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
+  } else if (RelocOp->isCPI()) {
+    emitConstPoolAddress(RelocOp->getIndex(), RelocType,
+                         RelocOp->getOffset(), Adj);
+  } else {
+    assert(RelocOp->isJTI() && "Unexpected machine operand!");
+    emitJumpTableAddress(RelocOp->getIndex(), RelocType, Adj);
+  }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
+                                            unsigned Op,unsigned RegOpcodeField,
+                                            intptr_t PCAdj) {
+  const MachineOperand &Op3 = MI.getOperand(Op+3);
+  int DispVal = 0;
+  const MachineOperand *DispForReloc = 0;
+  
+  // Figure out what sort of displacement we have to handle here.
+  if (Op3.isGlobal()) {
+    DispForReloc = &Op3;
+  } else if (Op3.isSymbol()) {
+    DispForReloc = &Op3;
+  } else if (Op3.isCPI()) {
+    if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
+      DispForReloc = &Op3;
+    } else {
+      DispVal += MCE.getConstantPoolEntryAddress(Op3.getIndex());
+      DispVal += Op3.getOffset();
+    }
+  } else if (Op3.isJTI()) {
+    if (!MCE.earlyResolveAddresses() || Is64BitMode || IsPIC) {
+      DispForReloc = &Op3;
+    } else {
+      DispVal += MCE.getJumpTableEntryAddress(Op3.getIndex());
+    }
+  } else {
+    DispVal = Op3.getImm();
+  }
+
+  const MachineOperand &Base     = MI.getOperand(Op);
+  const MachineOperand &Scale    = MI.getOperand(Op+1);
+  const MachineOperand &IndexReg = MI.getOperand(Op+2);
+
+  unsigned BaseReg = Base.getReg();
+  
+  // Handle %rip relative addressing.
+  if (BaseReg == X86::RIP ||
+      (Is64BitMode && DispForReloc)) { // [disp32+RIP] in X86-64 mode
+    assert(IndexReg.getReg() == 0 && Is64BitMode &&
+           "Invalid rip-relative address");
+    MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
+    emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
+    return;
+  }
+
+  // Indicate that the displacement will use an pcrel or absolute reference
+  // by default. MCEs able to resolve addresses on-the-fly use pcrel by default
+  // while others, unless explicit asked to use RIP, use absolute references.
+  bool IsPCRel = MCE.earlyResolveAddresses() ? true : false;
+
+  // Is a SIB byte needed?
+  // If no BaseReg, issue a RIP relative instruction only if the MCE can 
+  // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
+  // 2-7) and absolute references.
+  unsigned BaseRegNo = -1U;
+  if (BaseReg != 0 && BaseReg != X86::RIP)
+    BaseRegNo = getX86RegNum(BaseReg);
+
+  if (// The SIB byte must be used if there is an index register.
+      IndexReg.getReg() == 0 && 
+      // The SIB byte must be used if the base is ESP/RSP/R12, all of which
+      // encode to an R/M value of 4, which indicates that a SIB byte is
+      // present.
+      BaseRegNo != N86::ESP &&
+      // If there is no base register and we're in 64-bit mode, we need a SIB
+      // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
+      (!Is64BitMode || BaseReg != 0)) {
+    if (BaseReg == 0 ||          // [disp32]     in X86-32 mode
+        BaseReg == X86::RIP) {   // [disp32+RIP] in X86-64 mode
+      MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
+      emitDisplacementField(DispForReloc, DispVal, PCAdj, true);
+      return;
+    }
+    
+    // If the base is not EBP/ESP and there is no displacement, use simple
+    // indirect register encoding, this handles addresses like [EAX].  The
+    // encoding for [EBP] with no displacement means [disp32] so we handle it
+    // by emitting a displacement of 0 below.
+    if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) {
+      MCE.emitByte(ModRMByte(0, RegOpcodeField, BaseRegNo));
+      return;
+    }
+    
+    // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
+    if (!DispForReloc && isDisp8(DispVal)) {
+      MCE.emitByte(ModRMByte(1, RegOpcodeField, BaseRegNo));
+      emitConstant(DispVal, 1);
+      return;
+    }
+    
+    // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
+    MCE.emitByte(ModRMByte(2, RegOpcodeField, BaseRegNo));
+    emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
+    return;
+  }
+  
+  // Otherwise we need a SIB byte, so start by outputting the ModR/M byte first.
+  assert(IndexReg.getReg() != X86::ESP &&
+         IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+
+  bool ForceDisp32 = false;
+  bool ForceDisp8  = false;
+  if (BaseReg == 0) {
+    // If there is no base register, we emit the special case SIB byte with
+    // MOD=0, BASE=4, to JUST get the index, scale, and displacement.
+    MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+    ForceDisp32 = true;
+  } else if (DispForReloc) {
+    // Emit the normal disp32 encoding.
+    MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+    ForceDisp32 = true;
+  } else if (DispVal == 0 && BaseRegNo != N86::EBP) {
+    // Emit no displacement ModR/M byte
+    MCE.emitByte(ModRMByte(0, RegOpcodeField, 4));
+  } else if (isDisp8(DispVal)) {
+    // Emit the disp8 encoding...
+    MCE.emitByte(ModRMByte(1, RegOpcodeField, 4));
+    ForceDisp8 = true;           // Make sure to force 8 bit disp if Base=EBP
+  } else {
+    // Emit the normal disp32 encoding...
+    MCE.emitByte(ModRMByte(2, RegOpcodeField, 4));
+  }
+
+  // Calculate what the SS field value should be...
+  static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+  unsigned SS = SSTable[Scale.getImm()];
+
+  if (BaseReg == 0) {
+    // Handle the SIB byte for the case where there is no base, see Intel 
+    // Manual 2A, table 2-7. The displacement has already been output.
+    unsigned IndexRegNo;
+    if (IndexReg.getReg())
+      IndexRegNo = getX86RegNum(IndexReg.getReg());
+    else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
+      IndexRegNo = 4;
+    emitSIBByte(SS, IndexRegNo, 5);
+  } else {
+    unsigned BaseRegNo = getX86RegNum(BaseReg);
+    unsigned IndexRegNo;
+    if (IndexReg.getReg())
+      IndexRegNo = getX86RegNum(IndexReg.getReg());
+    else
+      IndexRegNo = 4;   // For example [ESP+1*<noreg>+4]
+    emitSIBByte(SS, IndexRegNo, BaseRegNo);
+  }
+
+  // Do we need to output a displacement?
+  if (ForceDisp8) {
+    emitConstant(DispVal, 1);
+  } else if (DispVal != 0 || ForceDisp32) {
+    emitDisplacementField(DispForReloc, DispVal, PCAdj, IsPCRel);
+  }
+}
+
+template<class CodeEmitter>
+void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
+                                           const TargetInstrDesc *Desc) {
+  DEBUG(dbgs() << MI);
+  
+  // If this is a pseudo instruction, lower it.
+  switch (Desc->getOpcode()) {
+  case X86::ADD16rr_DB:   Desc = &II->get(X86::OR16rr); MI.setDesc(*Desc);break;
+  case X86::ADD32rr_DB:   Desc = &II->get(X86::OR32rr); MI.setDesc(*Desc);break;
+  case X86::ADD64rr_DB:   Desc = &II->get(X86::OR64rr); MI.setDesc(*Desc);break;
+  case X86::ADD16ri_DB:   Desc = &II->get(X86::OR16ri); MI.setDesc(*Desc);break;
+  case X86::ADD32ri_DB:   Desc = &II->get(X86::OR32ri); MI.setDesc(*Desc);break;
+  case X86::ADD64ri32_DB:Desc = &II->get(X86::OR64ri32);MI.setDesc(*Desc);break;
+  case X86::ADD16ri8_DB:  Desc = &II->get(X86::OR16ri8);MI.setDesc(*Desc);break;
+  case X86::ADD32ri8_DB:  Desc = &II->get(X86::OR32ri8);MI.setDesc(*Desc);break;
+  case X86::ADD64ri8_DB:  Desc = &II->get(X86::OR64ri8);MI.setDesc(*Desc);break;
+  }
+  
+
+  MCE.processDebugLoc(MI.getDebugLoc(), true);
+
+  unsigned Opcode = Desc->Opcode;
+
+  // Emit the lock opcode prefix as needed.
+  if (Desc->TSFlags & X86II::LOCK)
+    MCE.emitByte(0xF0);
+
+  // Emit segment override opcode prefix as needed.
+  switch (Desc->TSFlags & X86II::SegOvrMask) {
+  case X86II::FS:
+    MCE.emitByte(0x64);
+    break;
+  case X86II::GS:
+    MCE.emitByte(0x65);
+    break;
+  default: llvm_unreachable("Invalid segment!");
+  case 0: break;  // No segment override!
+  }
+
+  // Emit the repeat opcode prefix as needed.
+  if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP)
+    MCE.emitByte(0xF3);
+
+  // Emit the operand size opcode prefix as needed.
+  if (Desc->TSFlags & X86II::OpSize)
+    MCE.emitByte(0x66);
+
+  // Emit the address size opcode prefix as needed.
+  if (Desc->TSFlags & X86II::AdSize)
+    MCE.emitByte(0x67);
+
+  bool Need0FPrefix = false;
+  switch (Desc->TSFlags & X86II::Op0Mask) {
+  case X86II::TB:  // Two-byte opcode prefix
+  case X86II::T8:  // 0F 38
+  case X86II::TA:  // 0F 3A
+    Need0FPrefix = true;
+    break;
+  case X86II::TF: // F2 0F 38
+    MCE.emitByte(0xF2);
+    Need0FPrefix = true;
+    break;
+  case X86II::REP: break; // already handled.
+  case X86II::XS:   // F3 0F
+    MCE.emitByte(0xF3);
+    Need0FPrefix = true;
+    break;
+  case X86II::XD:   // F2 0F
+    MCE.emitByte(0xF2);
+    Need0FPrefix = true;
+    break;
+  case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB:
+  case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF:
+    MCE.emitByte(0xD8+
+                 (((Desc->TSFlags & X86II::Op0Mask)-X86II::D8)
+                                   >> X86II::Op0Shift));
+    break; // Two-byte opcode prefix
+  default: llvm_unreachable("Invalid prefix!");
+  case 0: break;  // No prefix!
+  }
+
+  // Handle REX prefix.
+  if (Is64BitMode) {
+    if (unsigned REX = determineREX(MI))
+      MCE.emitByte(0x40 | REX);
+  }
+
+  // 0x0F escape code must be emitted just before the opcode.
+  if (Need0FPrefix)
+    MCE.emitByte(0x0F);
+
+  switch (Desc->TSFlags & X86II::Op0Mask) {
+  case X86II::TF:    // F2 0F 38
+  case X86II::T8:    // 0F 38
+    MCE.emitByte(0x38);
+    break;
+  case X86II::TA:    // 0F 3A
+    MCE.emitByte(0x3A);
+    break;
+  }
+
+  // If this is a two-address instruction, skip one of the register operands.
+  unsigned NumOps = Desc->getNumOperands();
+  unsigned CurOp = 0;
+  if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1)
+    ++CurOp;
+  else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
+    // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
+    --NumOps;
+
+  unsigned char BaseOpcode = X86II::getBaseOpcodeFor(Desc->TSFlags);
+  switch (Desc->TSFlags & X86II::FormMask) {
+  default:
+    llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!");
+  case X86II::Pseudo:
+    // Remember the current PC offset, this is the PIC relocation
+    // base address.
+    switch (Opcode) {
+    default: 
+      llvm_unreachable("pseudo instructions should be removed before code"
+                       " emission");
+      break;
+    // Do nothing for Int_MemBarrier - it's just a comment.  Add a debug
+    // to make it slightly easier to see.
+    case X86::Int_MemBarrier:
+      DEBUG(dbgs() << "#MEMBARRIER\n");
+      break;
+    
+    case TargetOpcode::INLINEASM:
+      // We allow inline assembler nodes with empty bodies - they can
+      // implicitly define registers, which is ok for JIT.
+      if (MI.getOperand(0).getSymbolName()[0])
+        report_fatal_error("JIT does not support inline asm!");
+      break;
+    case TargetOpcode::PROLOG_LABEL:
+    case TargetOpcode::GC_LABEL:
+    case TargetOpcode::EH_LABEL:
+      MCE.emitLabel(MI.getOperand(0).getMCSymbol());
+      break;
+    
+    case TargetOpcode::IMPLICIT_DEF:
+    case TargetOpcode::KILL:
+      break;
+    case X86::MOVPC32r: {
+      // This emits the "call" portion of this pseudo instruction.
+      MCE.emitByte(BaseOpcode);
+      emitConstant(0, X86II::getSizeOfImm(Desc->TSFlags));
+      // Remember PIC base.
+      PICBaseOffset = (intptr_t) MCE.getCurrentPCOffset();
+      X86JITInfo *JTI = TM.getJITInfo();
+      JTI->setPICBase(MCE.getCurrentPCValue());
+      break;
+    }
+    }
+    CurOp = NumOps;
+    break;
+  case X86II::RawFrm: {
+    MCE.emitByte(BaseOpcode);
+
+    if (CurOp == NumOps)
+      break;
+      
+    const MachineOperand &MO = MI.getOperand(CurOp++);
+
+    DEBUG(dbgs() << "RawFrm CurOp " << CurOp << "\n");
+    DEBUG(dbgs() << "isMBB " << MO.isMBB() << "\n");
+    DEBUG(dbgs() << "isGlobal " << MO.isGlobal() << "\n");
+    DEBUG(dbgs() << "isSymbol " << MO.isSymbol() << "\n");
+    DEBUG(dbgs() << "isImm " << MO.isImm() << "\n");
+
+    if (MO.isMBB()) {
+      emitPCRelativeBlockAddress(MO.getMBB());
+      break;
+    }
+    
+    if (MO.isGlobal()) {
+      emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
+                        MO.getOffset(), 0);
+      break;
+    }
+    
+    if (MO.isSymbol()) {
+      emitExternalSymbolAddress(MO.getSymbolName(), X86::reloc_pcrel_word);
+      break;
+    }
+
+    // FIXME: Only used by hackish MCCodeEmitter, remove when dead.
+    if (MO.isJTI()) {
+      emitJumpTableAddress(MO.getIndex(), X86::reloc_pcrel_word);
+      break;
+    }
+    
+    assert(MO.isImm() && "Unknown RawFrm operand!");
+    if (Opcode == X86::CALLpcrel32 || Opcode == X86::CALL64pcrel32 ||
+        Opcode == X86::WINCALL64pcrel32) {
+      // Fix up immediate operand for pc relative calls.
+      intptr_t Imm = (intptr_t)MO.getImm();
+      Imm = Imm - MCE.getCurrentPCValue() - 4;
+      emitConstant(Imm, X86II::getSizeOfImm(Desc->TSFlags));
+    } else
+      emitConstant(MO.getImm(), X86II::getSizeOfImm(Desc->TSFlags));
+    break;
+  }
+      
+  case X86II::AddRegFrm: {
+    MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg()));
+    
+    if (CurOp == NumOps)
+      break;
+      
+    const MachineOperand &MO1 = MI.getOperand(CurOp++);
+    unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
+    if (MO1.isImm()) {
+      emitConstant(MO1.getImm(), Size);
+      break;
+    }
+    
+    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+      : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+    if (Opcode == X86::MOV64ri64i32)
+      rt = X86::reloc_absolute_word;  // FIXME: add X86II flag?
+    // This should not occur on Darwin for relocatable objects.
+    if (Opcode == X86::MOV64ri)
+      rt = X86::reloc_absolute_dword;  // FIXME: add X86II flag?
+    if (MO1.isGlobal()) {
+      bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
+      emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+                        Indirect);
+    } else if (MO1.isSymbol())
+      emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+    else if (MO1.isCPI())
+      emitConstPoolAddress(MO1.getIndex(), rt);
+    else if (MO1.isJTI())
+      emitJumpTableAddress(MO1.getIndex(), rt);
+    break;
+  }
+
+  case X86II::MRMDestReg: {
+    MCE.emitByte(BaseOpcode);
+    emitRegModRMByte(MI.getOperand(CurOp).getReg(),
+                     getX86RegNum(MI.getOperand(CurOp+1).getReg()));
+    CurOp += 2;
+    if (CurOp != NumOps)
+      emitConstant(MI.getOperand(CurOp++).getImm(),
+                   X86II::getSizeOfImm(Desc->TSFlags));
+    break;
+  }
+  case X86II::MRMDestMem: {
+    MCE.emitByte(BaseOpcode);
+    emitMemModRMByte(MI, CurOp,
+                     getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)
+                                  .getReg()));
+    CurOp +=  X86::AddrNumOperands + 1;
+    if (CurOp != NumOps)
+      emitConstant(MI.getOperand(CurOp++).getImm(),
+                   X86II::getSizeOfImm(Desc->TSFlags));
+    break;
+  }
+
+  case X86II::MRMSrcReg:
+    MCE.emitByte(BaseOpcode);
+    emitRegModRMByte(MI.getOperand(CurOp+1).getReg(),
+                     getX86RegNum(MI.getOperand(CurOp).getReg()));
+    CurOp += 2;
+    if (CurOp != NumOps)
+      emitConstant(MI.getOperand(CurOp++).getImm(),
+                   X86II::getSizeOfImm(Desc->TSFlags));
+    break;
+
+  case X86II::MRMSrcMem: {
+    int AddrOperands = X86::AddrNumOperands;
+
+    intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
+      X86II::getSizeOfImm(Desc->TSFlags) : 0;
+
+    MCE.emitByte(BaseOpcode);
+    emitMemModRMByte(MI, CurOp+1, getX86RegNum(MI.getOperand(CurOp).getReg()),
+                     PCAdj);
+    CurOp += AddrOperands + 1;
+    if (CurOp != NumOps)
+      emitConstant(MI.getOperand(CurOp++).getImm(),
+                   X86II::getSizeOfImm(Desc->TSFlags));
+    break;
+  }
+
+  case X86II::MRM0r: case X86II::MRM1r:
+  case X86II::MRM2r: case X86II::MRM3r:
+  case X86II::MRM4r: case X86II::MRM5r:
+  case X86II::MRM6r: case X86II::MRM7r: {
+    MCE.emitByte(BaseOpcode);
+    emitRegModRMByte(MI.getOperand(CurOp++).getReg(),
+                     (Desc->TSFlags & X86II::FormMask)-X86II::MRM0r);
+
+    if (CurOp == NumOps)
+      break;
+    
+    const MachineOperand &MO1 = MI.getOperand(CurOp++);
+    unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
+    if (MO1.isImm()) {
+      emitConstant(MO1.getImm(), Size);
+      break;
+    }
+    
+    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+      : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+    if (Opcode == X86::MOV64ri32)
+      rt = X86::reloc_absolute_word_sext;  // FIXME: add X86II flag?
+    if (MO1.isGlobal()) {
+      bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
+      emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
+                        Indirect);
+    } else if (MO1.isSymbol())
+      emitExternalSymbolAddress(MO1.getSymbolName(), rt);
+    else if (MO1.isCPI())
+      emitConstPoolAddress(MO1.getIndex(), rt);
+    else if (MO1.isJTI())
+      emitJumpTableAddress(MO1.getIndex(), rt);
+    break;
+  }
+
+  case X86II::MRM0m: case X86II::MRM1m:
+  case X86II::MRM2m: case X86II::MRM3m:
+  case X86II::MRM4m: case X86II::MRM5m:
+  case X86II::MRM6m: case X86II::MRM7m: {
+    intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ?
+      (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ? 
+          X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0;
+
+    MCE.emitByte(BaseOpcode);
+    emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m,
+                     PCAdj);
+    CurOp += X86::AddrNumOperands;
+
+    if (CurOp == NumOps)
+      break;
+    
+    const MachineOperand &MO = MI.getOperand(CurOp++);
+    unsigned Size = X86II::getSizeOfImm(Desc->TSFlags);
+    if (MO.isImm()) {
+      emitConstant(MO.getImm(), Size);
+      break;
+    }
+    
+    unsigned rt = Is64BitMode ? X86::reloc_pcrel_word
+      : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word);
+    if (Opcode == X86::MOV64mi32)
+      rt = X86::reloc_absolute_word_sext;  // FIXME: add X86II flag?
+    if (MO.isGlobal()) {
+      bool Indirect = gvNeedsNonLazyPtr(MO, TM);
+      emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
+                        Indirect);
+    } else if (MO.isSymbol())
+      emitExternalSymbolAddress(MO.getSymbolName(), rt);
+    else if (MO.isCPI())
+      emitConstPoolAddress(MO.getIndex(), rt);
+    else if (MO.isJTI())
+      emitJumpTableAddress(MO.getIndex(), rt);
+    break;
+  }
+
+  case X86II::MRMInitReg:
+    MCE.emitByte(BaseOpcode);
+    // Duplicate register, used by things like MOV8r0 (aka xor reg,reg).
+    emitRegModRMByte(MI.getOperand(CurOp).getReg(),
+                     getX86RegNum(MI.getOperand(CurOp).getReg()));
+    ++CurOp;
+    break;
+      
+  case X86II::MRM_C1:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xC1);
+    break;
+  case X86II::MRM_C8:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xC8);
+    break;
+  case X86II::MRM_C9:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xC9);
+    break;
+  case X86II::MRM_E8:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xE8);
+    break;
+  case X86II::MRM_F0:
+    MCE.emitByte(BaseOpcode);
+    MCE.emitByte(0xF0);
+    break;
+  }
+
+  if (!Desc->isVariadic() && CurOp != NumOps) {
+#ifndef NDEBUG
+    dbgs() << "Cannot encode all operands of: " << MI << "\n";
+#endif
+    llvm_unreachable(0);
+  }
+
+  MCE.processDebugLoc(MI.getDebugLoc(), false);
+}
diff --git a/final/lib/Target/X86/X86CompilationCallback_Win64.asm b/final/lib/Target/X86/X86CompilationCallback_Win64.asm
new file mode 100644
index 00000000000..f321778db24
--- /dev/null
+++ b/final/lib/Target/X86/X86CompilationCallback_Win64.asm
@@ -0,0 +1,68 @@
+;;===-- X86CompilationCallback_Win64.asm - Implement Win64 JIT callback ---===
+;;
+;;                     The LLVM Compiler Infrastructure
+;;
+;; This file is distributed under the University of Illinois Open Source
+;; License. See LICENSE.TXT for details.
+;;
+;;===----------------------------------------------------------------------===
+;;
+;; This file implements the JIT interfaces for the X86 target.
+;;
+;;===----------------------------------------------------------------------===
+
+extrn X86CompilationCallback2: PROC
+
+.code
+X86CompilationCallback proc
+    push    rbp
+
+    ; Save RSP.
+    mov     rbp, rsp
+
+    ; Save all int arg registers
+    ; WARNING: We cannot use register spill area - we're generating stubs by hands!
+    push    rcx
+    push    rdx
+    push    r8
+    push    r9
+
+    ; Align stack on 16-byte boundary.
+    and     rsp, -16
+
+    ; Save all XMM arg registers. Also allocate reg spill area.
+    sub     rsp, 96
+    movaps  [rsp   +32],  xmm0
+    movaps  [rsp+16+32],  xmm1
+    movaps  [rsp+32+32],  xmm2
+    movaps  [rsp+48+32],  xmm3
+
+    ; JIT callee
+
+    ; Pass prev frame and return address.
+    mov     rcx, rbp
+    mov     rdx, qword ptr [rbp+8]
+    call    X86CompilationCallback2
+
+    ; Restore all XMM arg registers.
+    movaps  xmm3, [rsp+48+32]
+    movaps  xmm2, [rsp+32+32]
+    movaps  xmm1, [rsp+16+32]
+    movaps  xmm0, [rsp   +32]
+
+    ; Restore RSP.
+    mov     rsp, rbp
+
+    ; Restore all int arg registers
+    sub     rsp, 32
+    pop     r9
+    pop     r8
+    pop     rdx
+    pop     rcx
+
+    ; Restore RBP.
+    pop     rbp
+    ret
+X86CompilationCallback endp
+
+End
diff --git a/final/lib/Target/X86/X86ELFWriterInfo.cpp b/final/lib/Target/X86/X86ELFWriterInfo.cpp
new file mode 100644
index 00000000000..f1d7edea721
--- /dev/null
+++ b/final/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -0,0 +1,153 @@
+//===-- X86ELFWriterInfo.cpp - ELF Writer Info for the X86 backend --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the X86 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86ELFWriterInfo.h"
+#include "X86Relocations.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Implementation of the X86ELFWriterInfo class
+//===----------------------------------------------------------------------===//
+
+X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_)
+  : TargetELFWriterInfo(is64Bit_, isLittleEndian_) {
+    EMachine = is64Bit ? EM_X86_64 : EM_386;
+  }
+
+X86ELFWriterInfo::~X86ELFWriterInfo() {}
+
+unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
+  if (is64Bit) {
+    switch(MachineRelTy) {
+    case X86::reloc_pcrel_word:
+      return ELF::R_X86_64_PC32;
+    case X86::reloc_absolute_word:
+      return ELF::R_X86_64_32;
+    case X86::reloc_absolute_word_sext:
+      return ELF::R_X86_64_32S;
+    case X86::reloc_absolute_dword:
+      return ELF::R_X86_64_64;
+    case X86::reloc_picrel_word:
+    default:
+      llvm_unreachable("unknown x86_64 machine relocation type");
+    }
+  } else {
+    switch(MachineRelTy) {
+    case X86::reloc_pcrel_word:
+      return ELF::R_386_PC32;
+    case X86::reloc_absolute_word:
+      return ELF::R_386_32;
+    case X86::reloc_absolute_word_sext:
+    case X86::reloc_absolute_dword:
+    case X86::reloc_picrel_word:
+    default:
+      llvm_unreachable("unknown x86 machine relocation type");
+    }
+  }
+  return 0;
+}
+
+long int X86ELFWriterInfo::getDefaultAddendForRelTy(unsigned RelTy,
+                                                    long int Modifier) const {
+  if (is64Bit) {
+    switch(RelTy) {
+    case ELF::R_X86_64_PC32: return Modifier - 4;
+    case ELF::R_X86_64_32:
+    case ELF::R_X86_64_32S:
+    case ELF::R_X86_64_64:
+      return Modifier;
+    default:
+      llvm_unreachable("unknown x86_64 relocation type");
+    }
+  } else {
+    switch(RelTy) {
+    case ELF::R_386_PC32: return Modifier - 4;
+    case ELF::R_386_32: return Modifier;
+    default:
+      llvm_unreachable("unknown x86 relocation type");
+    }
+  }
+  return 0;
+}
+
+unsigned X86ELFWriterInfo::getRelocationTySize(unsigned RelTy) const {
+  if (is64Bit) {
+    switch(RelTy) {
+    case ELF::R_X86_64_PC32:
+    case ELF::R_X86_64_32:
+    case ELF::R_X86_64_32S:
+        return 32;
+    case ELF::R_X86_64_64:
+        return 64;
+    default:
+      llvm_unreachable("unknown x86_64 relocation type");
+    }
+  } else {
+    switch(RelTy) {
+    case ELF::R_386_PC32:
+    case ELF::R_386_32:
+        return 32;
+    default:
+      llvm_unreachable("unknown x86 relocation type");
+    }
+  }
+  return 0;
+}
+
+bool X86ELFWriterInfo::isPCRelativeRel(unsigned RelTy) const {
+  if (is64Bit) {
+    switch(RelTy) {
+    case ELF::R_X86_64_PC32:
+        return true;
+    case ELF::R_X86_64_32:
+    case ELF::R_X86_64_32S:
+    case ELF::R_X86_64_64:
+        return false;
+    default:
+      llvm_unreachable("unknown x86_64 relocation type");
+    }
+  } else {
+    switch(RelTy) {
+    case ELF::R_386_PC32:
+        return true;
+    case ELF::R_386_32:
+        return false;
+    default:
+      llvm_unreachable("unknown x86 relocation type");
+    }
+  }
+  return 0;
+}
+
+unsigned X86ELFWriterInfo::getAbsoluteLabelMachineRelTy() const {
+  return is64Bit ?
+    X86::reloc_absolute_dword : X86::reloc_absolute_word;
+}
+
+long int X86ELFWriterInfo::computeRelocation(unsigned SymOffset,
+                                             unsigned RelOffset,
+                                             unsigned RelTy) const {
+
+  if (RelTy == ELF::R_X86_64_PC32 || RelTy == ELF::R_386_PC32)
+    return SymOffset - (RelOffset + 4);
+  else
+    assert("computeRelocation unknown for this relocation type");
+
+  return 0;
+}
diff --git a/final/lib/Target/X86/X86ELFWriterInfo.h b/final/lib/Target/X86/X86ELFWriterInfo.h
new file mode 100644
index 00000000000..a45b5bb66a0
--- /dev/null
+++ b/final/lib/Target/X86/X86ELFWriterInfo.h
@@ -0,0 +1,59 @@
+//===-- X86ELFWriterInfo.h - ELF Writer Info for X86 ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ELF writer information for the X86 backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_ELF_WRITER_INFO_H
+#define X86_ELF_WRITER_INFO_H
+
+#include "llvm/Target/TargetELFWriterInfo.h"
+
+namespace llvm {
+
+  class X86ELFWriterInfo : public TargetELFWriterInfo {
+
+  public:
+    X86ELFWriterInfo(bool is64Bit_, bool isLittleEndian_);
+    virtual ~X86ELFWriterInfo();
+
+    /// getRelocationType - Returns the target specific ELF Relocation type.
+    /// 'MachineRelTy' contains the object code independent relocation type
+    virtual unsigned getRelocationType(unsigned MachineRelTy) const;
+
+    /// hasRelocationAddend - True if the target uses an addend in the
+    /// ELF relocation entry.
+    virtual bool hasRelocationAddend() const { return is64Bit ? true : false; }
+
+    /// getDefaultAddendForRelTy - Gets the default addend value for a
+    /// relocation entry based on the target ELF relocation type.
+    virtual long int getDefaultAddendForRelTy(unsigned RelTy,
+                                              long int Modifier = 0) const;
+
+    /// getRelTySize - Returns the size of relocatable field in bits
+    virtual unsigned getRelocationTySize(unsigned RelTy) const;
+
+    /// isPCRelativeRel - True if the relocation type is pc relative
+    virtual bool isPCRelativeRel(unsigned RelTy) const;
+
+    /// getJumpTableRelocationTy - Returns the machine relocation type used
+    /// to reference a jumptable.
+    virtual unsigned getAbsoluteLabelMachineRelTy() const;
+
+    /// computeRelocation - Some relocatable fields could be relocated
+    /// directly, avoiding the relocation symbol emission, compute the
+    /// final relocation value for this symbol.
+    virtual long int computeRelocation(unsigned SymOffset, unsigned RelOffset,
+                                       unsigned RelTy) const;
+  };
+
+} // end llvm namespace
+
+#endif // X86_ELF_WRITER_INFO_H
diff --git a/final/lib/Target/X86/X86FastISel.cpp b/final/lib/Target/X86/X86FastISel.cpp
new file mode 100644
index 00000000000..88744861e86
--- /dev/null
+++ b/final/lib/Target/X86/X86FastISel.cpp
@@ -0,0 +1,1956 @@
+//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86-specific support for the FastISel class. Much
+// of the target-specific code is generated by tablegen in the file
+// X86GenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/CallingConv.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+namespace {
+
+class X86FastISel : public FastISel {
+  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const X86Subtarget *Subtarget;
+
+  /// StackPtr - Register used as the stack pointer.
+  ///
+  unsigned StackPtr;
+
+  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+  /// floating point ops.
+  /// When SSE is available, use it for f32 operations.
+  /// When SSE2 is available, use it for f64 operations.
+  bool X86ScalarSSEf64;
+  bool X86ScalarSSEf32;
+
+public:
+  explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
+    Subtarget = &TM.getSubtarget<X86Subtarget>();
+    StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+    X86ScalarSSEf64 = Subtarget->hasSSE2();
+    X86ScalarSSEf32 = Subtarget->hasSSE1();
+  }
+
+  virtual bool TargetSelectInstruction(const Instruction *I);
+
+  /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+  /// vreg is being provided by the specified load instruction.  If possible,
+  /// try to fold the load as an operand to the instruction, returning true if
+  /// possible.
+  virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+                             const LoadInst *LI);
+
+#include "X86GenFastISel.inc"
+
+private:
+  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
+
+  bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
+
+  bool X86FastEmitStore(EVT VT, const Value *Val,
+                        const X86AddressMode &AM);
+  bool X86FastEmitStore(EVT VT, unsigned Val,
+                        const X86AddressMode &AM);
+
+  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
+                         unsigned &ResultReg);
+
+  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
+  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
+
+  bool X86SelectLoad(const Instruction *I);
+
+  bool X86SelectStore(const Instruction *I);
+
+  bool X86SelectRet(const Instruction *I);
+
+  bool X86SelectCmp(const Instruction *I);
+
+  bool X86SelectZExt(const Instruction *I);
+
+  bool X86SelectBranch(const Instruction *I);
+
+  bool X86SelectShift(const Instruction *I);
+
+  bool X86SelectSelect(const Instruction *I);
+
+  bool X86SelectTrunc(const Instruction *I);
+
+  bool X86SelectFPExt(const Instruction *I);
+  bool X86SelectFPTrunc(const Instruction *I);
+
+  bool X86SelectExtractValue(const Instruction *I);
+
+  bool X86VisitIntrinsicCall(const IntrinsicInst &I);
+  bool X86SelectCall(const Instruction *I);
+
+  const X86InstrInfo *getInstrInfo() const {
+    return getTargetMachine()->getInstrInfo();
+  }
+  const X86TargetMachine *getTargetMachine() const {
+    return static_cast<const X86TargetMachine *>(&TM);
+  }
+
+  unsigned TargetMaterializeConstant(const Constant *C);
+
+  unsigned TargetMaterializeAlloca(const AllocaInst *C);
+
+  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
+  /// computed in an SSE register, not on the X87 floating point stack.
+  bool isScalarFPTypeInSSEReg(EVT VT) const {
+    return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
+      (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
+  }
+
+  bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
+};
+
+} // end anonymous namespace.
+
+bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
+  EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
+  if (evt == MVT::Other || !evt.isSimple())
+    // Unhandled type. Halt "fast" selection and bail.
+    return false;
+
+  VT = evt.getSimpleVT();
+  // For now, require SSE/SSE2 for performing floating-point operations,
+  // since x87 requires additional work.
+  if (VT == MVT::f64 && !X86ScalarSSEf64)
+     return false;
+  if (VT == MVT::f32 && !X86ScalarSSEf32)
+     return false;
+  // Similarly, no f80 support yet.
+  if (VT == MVT::f80)
+    return false;
+  // We only handle legal types. For example, on x86-32 the instruction
+  // selector contains all of the 64-bit instructions from x86-64,
+  // under the assumption that i64 won't be used if the target doesn't
+  // support it.
+  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
+}
+
+#include "X86GenCallingConv.inc"
+
+/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
+/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
+/// Return true and the result register by reference if it is possible.
+bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
+                                  unsigned &ResultReg) {
+  // Get opcode and regclass of the output for the given load instruction.
+  unsigned Opc = 0;
+  const TargetRegisterClass *RC = NULL;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return false;
+  case MVT::i1:
+  case MVT::i8:
+    Opc = X86::MOV8rm;
+    RC  = X86::GR8RegisterClass;
+    break;
+  case MVT::i16:
+    Opc = X86::MOV16rm;
+    RC  = X86::GR16RegisterClass;
+    break;
+  case MVT::i32:
+    Opc = X86::MOV32rm;
+    RC  = X86::GR32RegisterClass;
+    break;
+  case MVT::i64:
+    // Must be in x86-64 mode.
+    Opc = X86::MOV64rm;
+    RC  = X86::GR64RegisterClass;
+    break;
+  case MVT::f32:
+    if (Subtarget->hasSSE1()) {
+      Opc = X86::MOVSSrm;
+      RC  = X86::FR32RegisterClass;
+    } else {
+      Opc = X86::LD_Fp32m;
+      RC  = X86::RFP32RegisterClass;
+    }
+    break;
+  case MVT::f64:
+    if (Subtarget->hasSSE2()) {
+      Opc = X86::MOVSDrm;
+      RC  = X86::FR64RegisterClass;
+    } else {
+      Opc = X86::LD_Fp64m;
+      RC  = X86::RFP64RegisterClass;
+    }
+    break;
+  case MVT::f80:
+    // No f80 support yet.
+    return false;
+  }
+
+  ResultReg = createResultReg(RC);
+  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+                         DL, TII.get(Opc), ResultReg), AM);
+  return true;
+}
+
+/// X86FastEmitStore - Emit a machine instruction to store a value Val of
+/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
+/// and a displacement offset, or a GlobalAddress,
+/// i.e. V. Return true if it is possible.
+bool
+X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
+                              const X86AddressMode &AM) {
+  // Get opcode and regclass of the output for the given store instruction.
+  unsigned Opc = 0;
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::f80: // No f80 support yet.
+  default: return false;
+  case MVT::i1: {
+    // Mask out all but lowest bit.
+    unsigned AndResult = createResultReg(X86::GR8RegisterClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
+    Val = AndResult;
+  }
+  // FALLTHROUGH, handling i1 as i8.
+  case MVT::i8:  Opc = X86::MOV8mr;  break;
+  case MVT::i16: Opc = X86::MOV16mr; break;
+  case MVT::i32: Opc = X86::MOV32mr; break;
+  case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
+  case MVT::f32:
+    Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m;
+    break;
+  case MVT::f64:
+    Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
+    break;
+  }
+
+  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+                         DL, TII.get(Opc)), AM).addReg(Val);
+  return true;
+}
+
+bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
+                                   const X86AddressMode &AM) {
+  // Handle 'null' like i32/i64 0.
+  if (isa<ConstantPointerNull>(Val))
+    Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
+
+  // If this is a store of a simple constant, fold the constant into the store.
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+    unsigned Opc = 0;
+    bool Signed = true;
+    switch (VT.getSimpleVT().SimpleTy) {
+    default: break;
+    case MVT::i1:  Signed = false;     // FALLTHROUGH to handle as i8.
+    case MVT::i8:  Opc = X86::MOV8mi;  break;
+    case MVT::i16: Opc = X86::MOV16mi; break;
+    case MVT::i32: Opc = X86::MOV32mi; break;
+    case MVT::i64:
+      // Must be a 32-bit sign extended value.
+      if ((int)CI->getSExtValue() == CI->getSExtValue())
+        Opc = X86::MOV64mi32;
+      break;
+    }
+
+    if (Opc) {
+      addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+                             DL, TII.get(Opc)), AM)
+                             .addImm(Signed ? (uint64_t) CI->getSExtValue() :
+                                              CI->getZExtValue());
+      return true;
+    }
+  }
+
+  unsigned ValReg = getRegForValue(Val);
+  if (ValReg == 0)
+    return false;
+
+  return X86FastEmitStore(VT, ValReg, AM);
+}
+
+/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
+/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
+/// ISD::SIGN_EXTEND).
+bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
+                                    unsigned Src, EVT SrcVT,
+                                    unsigned &ResultReg) {
+  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+                           Src, /*TODO: Kill=*/false);
+
+  if (RR != 0) {
+    ResultReg = RR;
+    return true;
+  } else
+    return false;
+}
+
+/// X86SelectAddress - Attempt to fill in an address from the given value.
+///
+bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
+  const User *U = NULL;
+  unsigned Opcode = Instruction::UserOp1;
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    // Don't walk into other basic blocks; it's possible we haven't
+    // visited them yet, so the instructions may not yet be assigned
+    // virtual registers.
+    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
+        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+      Opcode = I->getOpcode();
+      U = I;
+    }
+  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+    Opcode = C->getOpcode();
+    U = C;
+  }
+
+  if (const PointerType *Ty = dyn_cast<PointerType>(V->getType()))
+    if (Ty->getAddressSpace() > 255)
+      // Fast instruction selection doesn't support the special
+      // address spaces.
+      return false;
+
+  switch (Opcode) {
+  default: break;
+  case Instruction::BitCast:
+    // Look past bitcasts.
+    return X86SelectAddress(U->getOperand(0), AM);
+
+  case Instruction::IntToPtr:
+    // Look past no-op inttoptrs.
+    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+      return X86SelectAddress(U->getOperand(0), AM);
+    break;
+
+  case Instruction::PtrToInt:
+    // Look past no-op ptrtoints.
+    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+      return X86SelectAddress(U->getOperand(0), AM);
+    break;
+
+  case Instruction::Alloca: {
+    // Do static allocas.
+    const AllocaInst *A = cast<AllocaInst>(V);
+    DenseMap<const AllocaInst*, int>::iterator SI =
+      FuncInfo.StaticAllocaMap.find(A);
+    if (SI != FuncInfo.StaticAllocaMap.end()) {
+      AM.BaseType = X86AddressMode::FrameIndexBase;
+      AM.Base.FrameIndex = SI->second;
+      return true;
+    }
+    break;
+  }
+
+  case Instruction::Add: {
+    // Adds of constants are common and easy enough.
+    if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
+      uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
+      // They have to fit in the 32-bit signed displacement field though.
+      if (isInt<32>(Disp)) {
+        AM.Disp = (uint32_t)Disp;
+        return X86SelectAddress(U->getOperand(0), AM);
+      }
+    }
+    break;
+  }
+
+  case Instruction::GetElementPtr: {
+    X86AddressMode SavedAM = AM;
+
+    // Pattern-match simple GEPs.
+    uint64_t Disp = (int32_t)AM.Disp;
+    unsigned IndexReg = AM.IndexReg;
+    unsigned Scale = AM.Scale;
+    gep_type_iterator GTI = gep_type_begin(U);
+    // Iterate through the indices, folding what we can. Constants can be
+    // folded, and one dynamic index can be handled, if the scale is supported.
+    for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
+         i != e; ++i, ++GTI) {
+      const Value *Op = *i;
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        const StructLayout *SL = TD.getStructLayout(STy);
+        unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+        Disp += SL->getElementOffset(Idx);
+      } else {
+        uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+        for (;;) {
+          if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+            // Constant-offset addressing.
+            Disp += CI->getSExtValue() * S;
+            break;
+          }
+          if (isa<AddOperator>(Op) &&
+              (!isa<Instruction>(Op) ||
+               FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+                 == FuncInfo.MBB) &&
+              isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+            // An add (in the same block) with a constant operand. Fold the
+            // constant.
+            ConstantInt *CI =
+              cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+            Disp += CI->getSExtValue() * S;
+            // Iterate on the other operand.
+            Op = cast<AddOperator>(Op)->getOperand(0);
+            continue;
+          }
+          if (IndexReg == 0 &&
+              (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
+              (S == 1 || S == 2 || S == 4 || S == 8)) {
+            // Scaled-index addressing.
+            Scale = S;
+            IndexReg = getRegForGEPIndex(Op).first;
+            if (IndexReg == 0)
+              return false;
+            break;
+          }
+          // Unsupported.
+          goto unsupported_gep;
+        }
+      }
+    }
+    // Check for displacement overflow.
+    if (!isInt<32>(Disp))
+      break;
+    // Ok, the GEP indices were covered by constant-offset and scaled-index
+    // addressing. Update the address state and move on to examining the base.
+    AM.IndexReg = IndexReg;
+    AM.Scale = Scale;
+    AM.Disp = (uint32_t)Disp;
+    if (X86SelectAddress(U->getOperand(0), AM))
+      return true;
+
+    // If we couldn't merge the sub value into this addr mode, revert back to
+    // our address and just match the value instead of completely failing.
+    AM = SavedAM;
+    break;
+  unsupported_gep:
+    // Ok, the GEP indices weren't all covered.
+    break;
+  }
+  }
+
+  // Handle constant address.
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    // Can't handle alternate code models yet.
+    if (TM.getCodeModel() != CodeModel::Small)
+      return false;
+
+    // RIP-relative addresses can't have additional register operands.
+    if (Subtarget->isPICStyleRIPRel() &&
+        (AM.Base.Reg != 0 || AM.IndexReg != 0))
+      return false;
+
+    // Can't handle TLS yet.
+    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+      if (GVar->isThreadLocal())
+        return false;
+
+    // Okay, we've committed to selecting this global. Set up the basic address.
+    AM.GV = GV;
+
+    // Allow the subtarget to classify the global.
+    unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
+
+    // If this reference is relative to the pic base, set it now.
+    if (isGlobalRelativeToPICBase(GVFlags)) {
+      // FIXME: How do we know Base.Reg is free??
+      AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+    }
+
+    // Unless the ABI requires an extra load, return a direct reference to
+    // the global.
+    if (!isGlobalStubReference(GVFlags)) {
+      if (Subtarget->isPICStyleRIPRel()) {
+        // Use rip-relative addressing if we can.  Above we verified that the
+        // base and index registers are unused.
+        assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+        AM.Base.Reg = X86::RIP;
+      }
+      AM.GVOpFlags = GVFlags;
+      return true;
+    }
+
+    // Ok, we need to do a load from a stub.  If we've already loaded from this
+    // stub, reuse the loaded pointer, otherwise emit the load now.
+    DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
+    unsigned LoadReg;
+    if (I != LocalValueMap.end() && I->second != 0) {
+      LoadReg = I->second;
+    } else {
+      // Issue load from stub.
+      unsigned Opc = 0;
+      const TargetRegisterClass *RC = NULL;
+      X86AddressMode StubAM;
+      StubAM.Base.Reg = AM.Base.Reg;
+      StubAM.GV = GV;
+      StubAM.GVOpFlags = GVFlags;
+
+      // Prepare for inserting code in the local-value area.
+      SavePoint SaveInsertPt = enterLocalValueArea();
+
+      if (TLI.getPointerTy() == MVT::i64) {
+        Opc = X86::MOV64rm;
+        RC  = X86::GR64RegisterClass;
+
+        if (Subtarget->isPICStyleRIPRel())
+          StubAM.Base.Reg = X86::RIP;
+      } else {
+        Opc = X86::MOV32rm;
+        RC  = X86::GR32RegisterClass;
+      }
+
+      LoadReg = createResultReg(RC);
+      MachineInstrBuilder LoadMI =
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
+      addFullAddress(LoadMI, StubAM);
+
+      // Ok, back to normal mode.
+      leaveLocalValueArea(SaveInsertPt);
+
+      // Prevent loading GV stub multiple times in same MBB.
+      LocalValueMap[V] = LoadReg;
+    }
+
+    // Now construct the final address. Note that the Disp, Scale,
+    // and Index values may already be set here.
+    AM.Base.Reg = LoadReg;
+    AM.GV = 0;
+    return true;
+  }
+
+  // If all else fails, try to materialize the value in a register.
+  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+    if (AM.Base.Reg == 0) {
+      AM.Base.Reg = getRegForValue(V);
+      return AM.Base.Reg != 0;
+    }
+    if (AM.IndexReg == 0) {
+      assert(AM.Scale == 1 && "Scale with no index!");
+      AM.IndexReg = getRegForValue(V);
+      return AM.IndexReg != 0;
+    }
+  }
+
+  return false;
+}
+
+/// X86SelectCallAddress - Attempt to fill in an address from the given value.
+///
+bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
+  const User *U = NULL;
+  unsigned Opcode = Instruction::UserOp1;
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    Opcode = I->getOpcode();
+    U = I;
+  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
+    Opcode = C->getOpcode();
+    U = C;
+  }
+
+  switch (Opcode) {
+  default: break;
+  case Instruction::BitCast:
+    // Look past bitcasts.
+    return X86SelectCallAddress(U->getOperand(0), AM);
+
+  case Instruction::IntToPtr:
+    // Look past no-op inttoptrs.
+    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+      return X86SelectCallAddress(U->getOperand(0), AM);
+    break;
+
+  case Instruction::PtrToInt:
+    // Look past no-op ptrtoints.
+    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+      return X86SelectCallAddress(U->getOperand(0), AM);
+    break;
+  }
+
+  // Handle constant address.
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    // Can't handle alternate code models yet.
+    if (TM.getCodeModel() != CodeModel::Small)
+      return false;
+
+    // RIP-relative addresses can't have additional register operands.
+    if (Subtarget->isPICStyleRIPRel() &&
+        (AM.Base.Reg != 0 || AM.IndexReg != 0))
+      return false;
+
+    // Can't handle DLLImport.
+    if (GV->hasDLLImportLinkage())
+      return false;
+
+    // Can't handle TLS.
+    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+      if (GVar->isThreadLocal())
+        return false;
+
+    // Okay, we've committed to selecting this global. Set up the basic address.
+    AM.GV = GV;
+
+    // No ABI requires an extra load for anything other than DLLImport, which
+    // we rejected above. Return a direct reference to the global.
+    if (Subtarget->isPICStyleRIPRel()) {
+      // Use rip-relative addressing if we can.  Above we verified that the
+      // base and index registers are unused.
+      assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+      AM.Base.Reg = X86::RIP;
+    } else if (Subtarget->isPICStyleStubPIC()) {
+      AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
+    } else if (Subtarget->isPICStyleGOT()) {
+      AM.GVOpFlags = X86II::MO_GOTOFF;
+    }
+
+    return true;
+  }
+
+  // If all else fails, try to materialize the value in a register.
+  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
+    if (AM.Base.Reg == 0) {
+      AM.Base.Reg = getRegForValue(V);
+      return AM.Base.Reg != 0;
+    }
+    if (AM.IndexReg == 0) {
+      assert(AM.Scale == 1 && "Scale with no index!");
+      AM.IndexReg = getRegForValue(V);
+      return AM.IndexReg != 0;
+    }
+  }
+
+  return false;
+}
+
+
+/// X86SelectStore - Select and emit code to implement store instructions.
+bool X86FastISel::X86SelectStore(const Instruction *I) {
+  MVT VT;
+  if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
+    return false;
+
+  X86AddressMode AM;
+  if (!X86SelectAddress(I->getOperand(1), AM))
+    return false;
+
+  return X86FastEmitStore(VT, I->getOperand(0), AM);
+}
+
+/// X86SelectRet - Select and emit code to implement ret instructions.
+bool X86FastISel::X86SelectRet(const Instruction *I) {
+  const ReturnInst *Ret = cast<ReturnInst>(I);
+  const Function &F = *I->getParent()->getParent();
+
+  if (!FuncInfo.CanLowerReturn)
+    return false;
+
+  CallingConv::ID CC = F.getCallingConv();
+  if (CC != CallingConv::C &&
+      CC != CallingConv::Fast &&
+      CC != CallingConv::X86_FastCall)
+    return false;
+
+  if (Subtarget->isTargetWin64())
+    return false;
+
+  // Don't handle popping bytes on return for now.
+  if (FuncInfo.MF->getInfo<X86MachineFunctionInfo>()
+        ->getBytesToPopOnReturn() != 0)
+    return 0;
+
+  // fastcc with -tailcallopt is intended to provide a guaranteed
+  // tail call optimization. Fastisel doesn't know how to do that.
+  if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+    return false;
+
+  // Let SDISel handle vararg functions.
+  if (F.isVarArg())
+    return false;
+
+  if (Ret->getNumOperands() > 0) {
+    SmallVector<ISD::OutputArg, 4> Outs;
+    GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+                  Outs, TLI);
+
+    // Analyze operands of the call, assigning locations to each operand.
+    SmallVector<CCValAssign, 16> ValLocs;
+    CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
+    CCInfo.AnalyzeReturn(Outs, RetCC_X86);
+
+    const Value *RV = Ret->getOperand(0);
+    unsigned Reg = getRegForValue(RV);
+    if (Reg == 0)
+      return false;
+
+    // Only handle a single return value for now.
+    if (ValLocs.size() != 1)
+      return false;
+
+    CCValAssign &VA = ValLocs[0];
+
+    // Don't bother handling odd stuff for now.
+    if (VA.getLocInfo() != CCValAssign::Full)
+      return false;
+    // Only handle register returns for now.
+    if (!VA.isRegLoc())
+      return false;
+    // TODO: For now, don't try to handle cases where getLocInfo()
+    // says Full but the types don't match.
+    if (TLI.getValueType(RV->getType()) != VA.getValVT())
+      return false;
+
+    // The calling-convention tables for x87 returns don't tell
+    // the whole story.
+    if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+      return false;
+
+    // Make the copy.
+    unsigned SrcReg = Reg + VA.getValNo();
+    unsigned DstReg = VA.getLocReg();
+    const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+    // Avoid a cross-class copy. This is very unlikely.
+    if (!SrcRC->contains(DstReg))
+      return false;
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            DstReg).addReg(SrcReg);
+
+    // Mark the register as live out of the function.
+    MRI.addLiveOut(VA.getLocReg());
+  }
+
+  // Now emit the RET.
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
+  return true;
+}
+
+/// X86SelectLoad - Select and emit code to implement load instructions.
+///
+bool X86FastISel::X86SelectLoad(const Instruction *I)  {
+  MVT VT;
+  if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
+    return false;
+
+  X86AddressMode AM;
+  if (!X86SelectAddress(I->getOperand(0), AM))
+    return false;
+
+  unsigned ResultReg = 0;
+  if (X86FastEmitLoad(VT, AM, ResultReg)) {
+    UpdateValueMap(I, ResultReg);
+    return true;
+  }
+  return false;
+}
+
+static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:       return 0;
+  case MVT::i8:  return X86::CMP8rr;
+  case MVT::i16: return X86::CMP16rr;
+  case MVT::i32: return X86::CMP32rr;
+  case MVT::i64: return X86::CMP64rr;
+  case MVT::f32: return Subtarget->hasSSE1() ? X86::UCOMISSrr : 0;
+  case MVT::f64: return Subtarget->hasSSE2() ? X86::UCOMISDrr : 0;
+  }
+}
+
+/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
+/// of the comparison, return an opcode that works for the compare (e.g.
+/// CMP32ri) otherwise return 0.
+static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  // Otherwise, we can't fold the immediate into this comparison.
+  default: return 0;
+  case MVT::i8: return X86::CMP8ri;
+  case MVT::i16: return X86::CMP16ri;
+  case MVT::i32: return X86::CMP32ri;
+  case MVT::i64:
+    // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
+    // field.
+    if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
+      return X86::CMP64ri32;
+    return 0;
+  }
+}
+
+bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
+                                     EVT VT) {
+  unsigned Op0Reg = getRegForValue(Op0);
+  if (Op0Reg == 0) return false;
+
+  // Handle 'null' like i32/i64 0.
+  if (isa<ConstantPointerNull>(Op1))
+    Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
+
+  // We have two options: compare with register or immediate.  If the RHS of
+  // the compare is an immediate that we can fold into this compare, use
+  // CMPri, otherwise use CMPrr.
+  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+    if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareImmOpc))
+        .addReg(Op0Reg)
+        .addImm(Op1C->getSExtValue());
+      return true;
+    }
+  }
+
+  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
+  if (CompareOpc == 0) return false;
+
+  unsigned Op1Reg = getRegForValue(Op1);
+  if (Op1Reg == 0) return false;
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc))
+    .addReg(Op0Reg)
+    .addReg(Op1Reg);
+
+  return true;
+}
+
+bool X86FastISel::X86SelectCmp(const Instruction *I) {
+  const CmpInst *CI = cast<CmpInst>(I);
+
+  MVT VT;
+  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
+    return false;
+
+  unsigned ResultReg = createResultReg(&X86::GR8RegClass);
+  unsigned SetCCOpc;
+  bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
+  switch (CI->getPredicate()) {
+  case CmpInst::FCMP_OEQ: {
+    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+      return false;
+
+    unsigned EReg = createResultReg(&X86::GR8RegClass);
+    unsigned NPReg = createResultReg(&X86::GR8RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(X86::SETNPr), NPReg);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
+    UpdateValueMap(I, ResultReg);
+    return true;
+  }
+  case CmpInst::FCMP_UNE: {
+    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
+      return false;
+
+    unsigned NEReg = createResultReg(&X86::GR8RegClass);
+    unsigned PReg = createResultReg(&X86::GR8RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(X86::SETNEr), NEReg);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(X86::SETPr), PReg);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(X86::OR8rr), ResultReg)
+      .addReg(PReg).addReg(NEReg);
+    UpdateValueMap(I, ResultReg);
+    return true;
+  }
+  case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
+  case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
+  case CmpInst::FCMP_OLT: SwapArgs = true;  SetCCOpc = X86::SETAr;  break;
+  case CmpInst::FCMP_OLE: SwapArgs = true;  SetCCOpc = X86::SETAEr; break;
+  case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
+  case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
+  case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr;  break;
+  case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr;  break;
+  case CmpInst::FCMP_UGT: SwapArgs = true;  SetCCOpc = X86::SETBr;  break;
+  case CmpInst::FCMP_UGE: SwapArgs = true;  SetCCOpc = X86::SETBEr; break;
+  case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
+  case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
+
+  case CmpInst::ICMP_EQ:  SwapArgs = false; SetCCOpc = X86::SETEr;  break;
+  case CmpInst::ICMP_NE:  SwapArgs = false; SetCCOpc = X86::SETNEr; break;
+  case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
+  case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
+  case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
+  case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
+  case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr;  break;
+  case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
+  case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr;  break;
+  case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
+  default:
+    return false;
+  }
+
+  const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+  if (SwapArgs)
+    std::swap(Op0, Op1);
+
+  // Emit a compare of Op0/Op1.
+  if (!X86FastEmitCompare(Op0, Op1, VT))
+    return false;
+
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg);
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool X86FastISel::X86SelectZExt(const Instruction *I) {
+  // Handle zero-extension from i1 to i8, which is common.
+  if (I->getType()->isIntegerTy(8) &&
+      I->getOperand(0)->getType()->isIntegerTy(1)) {
+    unsigned ResultReg = getRegForValue(I->getOperand(0));
+    if (ResultReg == 0) return false;
+    // Set the high bits to zero.
+    ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
+    if (ResultReg == 0) return false;
+    UpdateValueMap(I, ResultReg);
+    return true;
+  }
+
+  return false;
+}
+
+
+bool X86FastISel::X86SelectBranch(const Instruction *I) {
+  // Unconditional branches are selected by tablegen-generated code.
+  // Handle a conditional branch.
+  const BranchInst *BI = cast<BranchInst>(I);
+  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+  // Fold the common case of a conditional branch with a comparison
+  // in the same block (values defined on other blocks may not have
+  // initialized registers).
+  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+    if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
+      EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
+
+      // Try to take advantage of fallthrough opportunities.
+      CmpInst::Predicate Predicate = CI->getPredicate();
+      if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
+        std::swap(TrueMBB, FalseMBB);
+        Predicate = CmpInst::getInversePredicate(Predicate);
+      }
+
+      bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
+      unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
+
+      switch (Predicate) {
+      case CmpInst::FCMP_OEQ:
+        std::swap(TrueMBB, FalseMBB);
+        Predicate = CmpInst::FCMP_UNE;
+        // FALL THROUGH
+      case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+      case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
+      case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
+      case CmpInst::FCMP_OLT: SwapArgs = true;  BranchOpc = X86::JA_4;  break;
+      case CmpInst::FCMP_OLE: SwapArgs = true;  BranchOpc = X86::JAE_4; break;
+      case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
+      case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
+      case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4;  break;
+      case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4;  break;
+      case CmpInst::FCMP_UGT: SwapArgs = true;  BranchOpc = X86::JB_4;  break;
+      case CmpInst::FCMP_UGE: SwapArgs = true;  BranchOpc = X86::JBE_4; break;
+      case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
+      case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
+
+      case CmpInst::ICMP_EQ:  SwapArgs = false; BranchOpc = X86::JE_4;  break;
+      case CmpInst::ICMP_NE:  SwapArgs = false; BranchOpc = X86::JNE_4; break;
+      case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
+      case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
+      case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
+      case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
+      case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4;  break;
+      case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
+      case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4;  break;
+      case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
+      default:
+        return false;
+      }
+
+      const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+      if (SwapArgs)
+        std::swap(Op0, Op1);
+
+      // Emit a compare of the LHS and RHS, setting the flags.
+      if (!X86FastEmitCompare(Op0, Op1, VT))
+        return false;
+
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc))
+        .addMBB(TrueMBB);
+
+      if (Predicate == CmpInst::FCMP_UNE) {
+        // X86 requires a second branch to handle UNE (and OEQ,
+        // which is mapped to UNE above).
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JP_4))
+          .addMBB(TrueMBB);
+      }
+
+      FastEmitBranch(FalseMBB, DL);
+      FuncInfo.MBB->addSuccessor(TrueMBB);
+      return true;
+    }
+  } else if (ExtractValueInst *EI =
+             dyn_cast<ExtractValueInst>(BI->getCondition())) {
+    // Check to see if the branch instruction is from an "arithmetic with
+    // overflow" intrinsic. The main way these intrinsics are used is:
+    //
+    //   %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+    //   %sum = extractvalue { i32, i1 } %t, 0
+    //   %obit = extractvalue { i32, i1 } %t, 1
+    //   br i1 %obit, label %overflow, label %normal
+    //
+    // The %sum and %obit are converted in an ADD and a SETO/SETB before
+    // reaching the branch. Therefore, we search backwards through the MBB
+    // looking for the SETO/SETB instruction. If an instruction modifies the
+    // EFLAGS register before we reach the SETO/SETB instruction, then we can't
+    // convert the branch into a JO/JB instruction.
+    if (const IntrinsicInst *CI =
+          dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){
+      if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
+          CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
+        const MachineInstr *SetMI = 0;
+        unsigned Reg = getRegForValue(EI);
+
+        for (MachineBasicBlock::const_reverse_iterator
+               RI = FuncInfo.MBB->rbegin(), RE = FuncInfo.MBB->rend();
+             RI != RE; ++RI) {
+          const MachineInstr &MI = *RI;
+
+          if (MI.definesRegister(Reg)) {
+            if (MI.isCopy()) {
+              Reg = MI.getOperand(1).getReg();
+              continue;
+            }
+
+            SetMI = &MI;
+            break;
+          }
+
+          const TargetInstrDesc &TID = MI.getDesc();
+          if (TID.hasImplicitDefOfPhysReg(X86::EFLAGS) ||
+              MI.hasUnmodeledSideEffects())
+            break;
+        }
+
+        if (SetMI) {
+          unsigned OpCode = SetMI->getOpcode();
+
+          if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
+            BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                    TII.get(OpCode == X86::SETOr ?  X86::JO_4 : X86::JB_4))
+              .addMBB(TrueMBB);
+            FastEmitBranch(FalseMBB, DL);
+            FuncInfo.MBB->addSuccessor(TrueMBB);
+            return true;
+          }
+        }
+      }
+    }
+  }
+
+  // Otherwise do a clumsy setcc and re-test it.
+  unsigned OpReg = getRegForValue(BI->getCondition());
+  if (OpReg == 0) return false;
+
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
+    .addReg(OpReg).addReg(OpReg);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4))
+    .addMBB(TrueMBB);
+  FastEmitBranch(FalseMBB, DL);
+  FuncInfo.MBB->addSuccessor(TrueMBB);
+  return true;
+}
+
+bool X86FastISel::X86SelectShift(const Instruction *I) {
+  unsigned CReg = 0, OpReg = 0, OpImm = 0;
+  const TargetRegisterClass *RC = NULL;
+  if (I->getType()->isIntegerTy(8)) {
+    CReg = X86::CL;
+    RC = &X86::GR8RegClass;
+    switch (I->getOpcode()) {
+    case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break;
+    case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break;
+    case Instruction::Shl:  OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
+    default: return false;
+    }
+  } else if (I->getType()->isIntegerTy(16)) {
+    CReg = X86::CX;
+    RC = &X86::GR16RegClass;
+    switch (I->getOpcode()) {
+    case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break;
+    case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break;
+    case Instruction::Shl:  OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
+    default: return false;
+    }
+  } else if (I->getType()->isIntegerTy(32)) {
+    CReg = X86::ECX;
+    RC = &X86::GR32RegClass;
+    switch (I->getOpcode()) {
+    case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break;
+    case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break;
+    case Instruction::Shl:  OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
+    default: return false;
+    }
+  } else if (I->getType()->isIntegerTy(64)) {
+    CReg = X86::RCX;
+    RC = &X86::GR64RegClass;
+    switch (I->getOpcode()) {
+    case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break;
+    case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break;
+    case Instruction::Shl:  OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break;
+    default: return false;
+    }
+  } else {
+    return false;
+  }
+
+  MVT VT;
+  if (!isTypeLegal(I->getType(), VT))
+    return false;
+
+  unsigned Op0Reg = getRegForValue(I->getOperand(0));
+  if (Op0Reg == 0) return false;
+
+  // Fold immediate in shl(x,3).
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+    unsigned ResultReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm),
+            ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
+    UpdateValueMap(I, ResultReg);
+    return true;
+  }
+
+  unsigned Op1Reg = getRegForValue(I->getOperand(1));
+  if (Op1Reg == 0) return false;
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+          CReg).addReg(Op1Reg);
+
+  // The shift instruction uses X86::CL. If we defined a super-register
+  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
+  if (CReg != X86::CL)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+            TII.get(TargetOpcode::KILL), X86::CL)
+      .addReg(CReg, RegState::Kill);
+
+  unsigned ResultReg = createResultReg(RC);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg)
+    .addReg(Op0Reg);
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool X86FastISel::X86SelectSelect(const Instruction *I) {
+  MVT VT;
+  if (!isTypeLegal(I->getType(), VT))
+    return false;
+
+  // We only use cmov here, if we don't have a cmov instruction bail.
+  if (!Subtarget->hasCMov()) return false;
+
+  unsigned Opc = 0;
+  const TargetRegisterClass *RC = NULL;
+  if (VT == MVT::i16) {
+    Opc = X86::CMOVE16rr;
+    RC = &X86::GR16RegClass;
+  } else if (VT == MVT::i32) {
+    Opc = X86::CMOVE32rr;
+    RC = &X86::GR32RegClass;
+  } else if (VT == MVT::i64) {
+    Opc = X86::CMOVE64rr;
+    RC = &X86::GR64RegClass;
+  } else {
+    return false;
+  }
+
+  unsigned Op0Reg = getRegForValue(I->getOperand(0));
+  if (Op0Reg == 0) return false;
+  unsigned Op1Reg = getRegForValue(I->getOperand(1));
+  if (Op1Reg == 0) return false;
+  unsigned Op2Reg = getRegForValue(I->getOperand(2));
+  if (Op2Reg == 0) return false;
+
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
+    .addReg(Op0Reg).addReg(Op0Reg);
+  unsigned ResultReg = createResultReg(RC);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+    .addReg(Op1Reg).addReg(Op2Reg);
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool X86FastISel::X86SelectFPExt(const Instruction *I) {
+  // fpext from float to double.
+  if (Subtarget->hasSSE2() &&
+      I->getType()->isDoubleTy()) {
+    const Value *V = I->getOperand(0);
+    if (V->getType()->isFloatTy()) {
+      unsigned OpReg = getRegForValue(V);
+      if (OpReg == 0) return false;
+      unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+              TII.get(X86::CVTSS2SDrr), ResultReg)
+        .addReg(OpReg);
+      UpdateValueMap(I, ResultReg);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
+  if (Subtarget->hasSSE2()) {
+    if (I->getType()->isFloatTy()) {
+      const Value *V = I->getOperand(0);
+      if (V->getType()->isDoubleTy()) {
+        unsigned OpReg = getRegForValue(V);
+        if (OpReg == 0) return false;
+        unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                TII.get(X86::CVTSD2SSrr), ResultReg)
+          .addReg(OpReg);
+        UpdateValueMap(I, ResultReg);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+bool X86FastISel::X86SelectTrunc(const Instruction *I) {
+  if (Subtarget->is64Bit())
+    // All other cases should be handled by the tblgen generated code.
+    return false;
+  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(I->getType());
+
+  // This code only handles truncation to byte right now.
+  if (DstVT != MVT::i8 && DstVT != MVT::i1)
+    // All other cases should be handled by the tblgen generated code.
+    return false;
+  if (SrcVT != MVT::i16 && SrcVT != MVT::i32)
+    // All other cases should be handled by the tblgen generated code.
+    return false;
+
+  unsigned InputReg = getRegForValue(I->getOperand(0));
+  if (!InputReg)
+    // Unhandled operand.  Halt "fast" selection and bail.
+    return false;
+
+  // First issue a copy to GR16_ABCD or GR32_ABCD.
+  const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
+    ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
+  unsigned CopyReg = createResultReg(CopyRC);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+          CopyReg).addReg(InputReg);
+
+  // Then issue an extract_subreg.
+  unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
+                                                  CopyReg, /*Kill=*/true,
+                                                  X86::sub_8bit);
+  if (!ResultReg)
+    return false;
+
+  UpdateValueMap(I, ResultReg);
+  return true;
+}
+
+bool X86FastISel::X86SelectExtractValue(const Instruction *I) {
+  const ExtractValueInst *EI = cast<ExtractValueInst>(I);
+  const Value *Agg = EI->getAggregateOperand();
+
+  if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) {
+    switch (CI->getIntrinsicID()) {
+    default: break;
+    case Intrinsic::sadd_with_overflow:
+    case Intrinsic::uadd_with_overflow: {
+      // Cheat a little. We know that the registers for "add" and "seto" are
+      // allocated sequentially. However, we only keep track of the register
+      // for "add" in the value map. Use extractvalue's index to get the
+      // correct register for "seto".
+      unsigned OpReg = getRegForValue(Agg);
+      if (OpReg == 0)
+        return false;
+      UpdateValueMap(I, OpReg + *EI->idx_begin());
+      return true;
+    }
+    }
+  }
+
+  return false;
+}
+
+bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
+  // FIXME: Handle more intrinsics.
+  switch (I.getIntrinsicID()) {
+  default: return false;
+  case Intrinsic::stackprotector: {
+    // Emit code inline code to store the stack guard onto the stack.
+    EVT PtrTy = TLI.getPointerTy();
+
+    const Value *Op1 = I.getArgOperand(0); // The guard's value.
+    const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
+
+    // Grab the frame index.
+    X86AddressMode AM;
+    if (!X86SelectAddress(Slot, AM)) return false;
+
+    if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
+
+    return true;
+  }
+  case Intrinsic::objectsize: {
+    ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
+    const Type *Ty = I.getCalledFunction()->getReturnType();
+
+    assert(CI && "Non-constant type in Intrinsic::objectsize?");
+
+    MVT VT;
+    if (!isTypeLegal(Ty, VT))
+      return false;
+
+    unsigned OpC = 0;
+    if (VT == MVT::i32)
+      OpC = X86::MOV32ri;
+    else if (VT == MVT::i64)
+      OpC = X86::MOV64ri;
+    else
+      return false;
+
+    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg).
+                                  addImm(CI->isZero() ? -1ULL : 0);
+    UpdateValueMap(&I, ResultReg);
+    return true;
+  }
+  case Intrinsic::dbg_declare: {
+    const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
+    X86AddressMode AM;
+    assert(DI->getAddress() && "Null address should be checked earlier!");
+    if (!X86SelectAddress(DI->getAddress(), AM))
+      return false;
+    const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+    // FIXME may need to add RegState::Debug to any registers produced,
+    // although ESP/EBP should be the only ones at the moment.
+    addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM).
+      addImm(0).addMetadata(DI->getVariable());
+    return true;
+  }
+  case Intrinsic::trap: {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TRAP));
+    return true;
+  }
+  case Intrinsic::sadd_with_overflow:
+  case Intrinsic::uadd_with_overflow: {
+    // Replace "add with overflow" intrinsics with an "add" instruction followed
+    // by a seto/setc instruction. Later on, when the "extractvalue"
+    // instructions are encountered, we use the fact that two registers were
+    // created sequentially to get the correct registers for the "sum" and the
+    // "overflow bit".
+    const Function *Callee = I.getCalledFunction();
+    const Type *RetTy =
+      cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
+
+    MVT VT;
+    if (!isTypeLegal(RetTy, VT))
+      return false;
+
+    const Value *Op1 = I.getArgOperand(0);
+    const Value *Op2 = I.getArgOperand(1);
+    unsigned Reg1 = getRegForValue(Op1);
+    unsigned Reg2 = getRegForValue(Op2);
+
+    if (Reg1 == 0 || Reg2 == 0)
+      // FIXME: Handle values *not* in registers.
+      return false;
+
+    unsigned OpC = 0;
+    if (VT == MVT::i32)
+      OpC = X86::ADD32rr;
+    else if (VT == MVT::i64)
+      OpC = X86::ADD64rr;
+    else
+      return false;
+
+    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
+      .addReg(Reg1).addReg(Reg2);
+    unsigned DestReg1 = UpdateValueMap(&I, ResultReg);
+
+    // If the add with overflow is an intra-block value then we just want to
+    // create temporaries for it like normal.  If it is a cross-block value then
+    // UpdateValueMap will return the cross-block register used.  Since we
+    // *really* want the value to be live in the register pair known by
+    // UpdateValueMap, we have to use DestReg1+1 as the destination register in
+    // the cross block case.  In the non-cross-block case, we should just make
+    // another register for the value.
+    if (DestReg1 != ResultReg)
+      ResultReg = DestReg1+1;
+    else
+      ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8));
+
+    unsigned Opc = X86::SETBr;
+    if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
+      Opc = X86::SETOr;
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
+    return true;
+  }
+  }
+}
+
+bool X86FastISel::X86SelectCall(const Instruction *I) {
+  const CallInst *CI = cast<CallInst>(I);
+  const Value *Callee = CI->getCalledValue();
+
+  // Can't handle inline asm yet.
+  if (isa<InlineAsm>(Callee))
+    return false;
+
+  // Handle intrinsic calls.
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
+    return X86VisitIntrinsicCall(*II);
+
+  // Handle only C and fastcc calling conventions for now.
+  ImmutableCallSite CS(CI);
+  CallingConv::ID CC = CS.getCallingConv();
+  if (CC != CallingConv::C &&
+      CC != CallingConv::Fast &&
+      CC != CallingConv::X86_FastCall)
+    return false;
+
+  // fastcc with -tailcallopt is intended to provide a guaranteed
+  // tail call optimization. Fastisel doesn't know how to do that.
+  if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+    return false;
+
+  // Let SDISel handle vararg functions.
+  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+  if (FTy->isVarArg())
+    return false;
+
+  // Fast-isel doesn't know about callee-pop yet.
+  if (Subtarget->IsCalleePop(FTy->isVarArg(), CC))
+    return false;
+
+  // Handle *simple* calls for now.
+  const Type *RetTy = CS.getType();
+  MVT RetVT;
+  if (RetTy->isVoidTy())
+    RetVT = MVT::isVoid;
+  else if (!isTypeLegal(RetTy, RetVT, true))
+    return false;
+
+  // Materialize callee address in a register. FIXME: GV address can be
+  // handled with a CALLpcrel32 instead.
+  X86AddressMode CalleeAM;
+  if (!X86SelectCallAddress(Callee, CalleeAM))
+    return false;
+  unsigned CalleeOp = 0;
+  const GlobalValue *GV = 0;
+  if (CalleeAM.GV != 0) {
+    GV = CalleeAM.GV;
+  } else if (CalleeAM.Base.Reg != 0) {
+    CalleeOp = CalleeAM.Base.Reg;
+  } else
+    return false;
+
+  // Allow calls which produce i1 results.
+  bool AndToI1 = false;
+  if (RetVT == MVT::i1) {
+    RetVT = MVT::i8;
+    AndToI1 = true;
+  }
+
+  // Deal with call operands first.
+  SmallVector<const Value *, 8> ArgVals;
+  SmallVector<unsigned, 8> Args;
+  SmallVector<MVT, 8> ArgVTs;
+  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+  Args.reserve(CS.arg_size());
+  ArgVals.reserve(CS.arg_size());
+  ArgVTs.reserve(CS.arg_size());
+  ArgFlags.reserve(CS.arg_size());
+  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
+       i != e; ++i) {
+    unsigned Arg = getRegForValue(*i);
+    if (Arg == 0)
+      return false;
+    ISD::ArgFlagsTy Flags;
+    unsigned AttrInd = i - CS.arg_begin() + 1;
+    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
+      Flags.setSExt();
+    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
+      Flags.setZExt();
+
+    // FIXME: Only handle *easy* calls for now.
+    if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
+        CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
+        CS.paramHasAttr(AttrInd, Attribute::Nest) ||
+        CS.paramHasAttr(AttrInd, Attribute::ByVal))
+      return false;
+
+    const Type *ArgTy = (*i)->getType();
+    MVT ArgVT;
+    if (!isTypeLegal(ArgTy, ArgVT))
+      return false;
+    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+    Flags.setOrigAlign(OriginalAlignment);
+
+    Args.push_back(Arg);
+    ArgVals.push_back(*i);
+    ArgVTs.push_back(ArgVT);
+    ArgFlags.push_back(Flags);
+  }
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
+
+  // Allocate shadow area for Win64
+  if (Subtarget->isTargetWin64()) {
+    CCInfo.AllocateStack(32, 8);
+  }
+
+  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  // Issue CALLSEQ_START
+  unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
+    .addImm(NumBytes);
+
+  // Process argument: walk the register/memloc assignments, inserting
+  // copies / loads.
+  SmallVector<unsigned, 4> RegArgs;
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    unsigned Arg = Args[VA.getValNo()];
+    EVT ArgVT = ArgVTs[VA.getValNo()];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt: {
+      bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+                                       Arg, ArgVT, Arg);
+      assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
+      ArgVT = VA.getLocVT();
+      break;
+    }
+    case CCValAssign::ZExt: {
+      bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+                                       Arg, ArgVT, Arg);
+      assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
+      ArgVT = VA.getLocVT();
+      break;
+    }
+    case CCValAssign::AExt: {
+      // We don't handle MMX parameters yet.
+      if (VA.getLocVT().isVector() && VA.getLocVT().getSizeInBits() == 128)
+        return false;
+      bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
+                                       Arg, ArgVT, Arg);
+      if (!Emitted)
+        Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
+                                    Arg, ArgVT, Arg);
+      if (!Emitted)
+        Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
+                                    Arg, ArgVT, Arg);
+
+      assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
+      ArgVT = VA.getLocVT();
+      break;
+    }
+    case CCValAssign::BCvt: {
+      unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(),
+                               ISD::BITCAST, Arg, /*TODO: Kill=*/false);
+      assert(BC != 0 && "Failed to emit a bitcast!");
+      Arg = BC;
+      ArgVT = VA.getLocVT();
+      break;
+    }
+    }
+
+    if (VA.isRegLoc()) {
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+              VA.getLocReg()).addReg(Arg);
+      RegArgs.push_back(VA.getLocReg());
+    } else {
+      unsigned LocMemOffset = VA.getLocMemOffset();
+      X86AddressMode AM;
+      AM.Base.Reg = StackPtr;
+      AM.Disp = LocMemOffset;
+      const Value *ArgVal = ArgVals[VA.getValNo()];
+
+      // If this is a really simple value, emit this with the Value* version of
+      // X86FastEmitStore.  If it isn't simple, we don't want to do this, as it
+      // can cause us to reevaluate the argument.
+      if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal))
+        X86FastEmitStore(ArgVT, ArgVal, AM);
+      else
+        X86FastEmitStore(ArgVT, Arg, AM);
+    }
+  }
+
+  // ELF / PIC requires GOT in the EBX register before function calls via PLT
+  // GOT pointer.
+  if (Subtarget->isPICStyleGOT()) {
+    unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            X86::EBX).addReg(Base);
+  }
+
+  // Issue the call.
+  MachineInstrBuilder MIB;
+  if (CalleeOp) {
+    // Register-indirect call.
+    unsigned CallOpc;
+    if (Subtarget->isTargetWin64())
+      CallOpc = X86::WINCALL64r;
+    else if (Subtarget->is64Bit())
+      CallOpc = X86::CALL64r;
+    else
+      CallOpc = X86::CALL32r;
+    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
+      .addReg(CalleeOp);
+
+  } else {
+    // Direct call.
+    assert(GV && "Not a direct call");
+    unsigned CallOpc;
+    if (Subtarget->isTargetWin64())
+      CallOpc = X86::WINCALL64pcrel32;
+    else if (Subtarget->is64Bit())
+      CallOpc = X86::CALL64pcrel32;
+    else
+      CallOpc = X86::CALLpcrel32;
+
+    // See if we need any target-specific flags on the GV operand.
+    unsigned char OpFlags = 0;
+
+    // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
+    // external symbols most go through the PLT in PIC mode.  If the symbol
+    // has hidden or protected visibility, or if it is static or local, then
+    // we don't need to use the PLT - we can directly call it.
+    if (Subtarget->isTargetELF() &&
+        TM.getRelocationModel() == Reloc::PIC_ &&
+        GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
+      OpFlags = X86II::MO_PLT;
+    } else if (Subtarget->isPICStyleStubAny() &&
+               (GV->isDeclaration() || GV->isWeakForLinker()) &&
+               Subtarget->getDarwinVers() < 9) {
+      // PC-relative references to external symbols should go through $stub,
+      // unless we're building with the leopard linker or later, which
+      // automatically synthesizes these stubs.
+      OpFlags = X86II::MO_DARWIN_STUB;
+    }
+
+
+    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
+      .addGlobalAddress(GV, 0, OpFlags);
+  }
+
+  // Add an implicit use GOT pointer in EBX.
+  if (Subtarget->isPICStyleGOT())
+    MIB.addReg(X86::EBX);
+
+  // Add implicit physical register uses to the call.
+  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
+    MIB.addReg(RegArgs[i]);
+
+  // Issue CALLSEQ_END
+  unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
+    .addImm(NumBytes).addImm(0);
+
+  // Now handle call return value (if any).
+  SmallVector<unsigned, 4> UsedRegs;
+  if (RetVT != MVT::isVoid) {
+    SmallVector<CCValAssign, 16> RVLocs;
+    CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
+    CCInfo.AnalyzeCallResult(RetVT, RetCC_X86);
+
+    // Copy all of the result registers out of their specified physreg.
+    assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
+    EVT CopyVT = RVLocs[0].getValVT();
+    TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
+
+    // If this is a call to a function that returns an fp value on the x87 fp
+    // stack, but where we prefer to use the value in xmm registers, copy it
+    // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
+    if ((RVLocs[0].getLocReg() == X86::ST0 ||
+         RVLocs[0].getLocReg() == X86::ST1) &&
+        isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
+      CopyVT = MVT::f80;
+      DstRC = X86::RFP80RegisterClass;
+    }
+
+    unsigned ResultReg = createResultReg(DstRC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+            ResultReg).addReg(RVLocs[0].getLocReg());
+    UsedRegs.push_back(RVLocs[0].getLocReg());
+
+    if (CopyVT != RVLocs[0].getValVT()) {
+      // Round the F80 the right size, which also moves to the appropriate xmm
+      // register. This is accomplished by storing the F80 value in memory and
+      // then loading it back. Ewww...
+      EVT ResVT = RVLocs[0].getValVT();
+      unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
+      unsigned MemSize = ResVT.getSizeInBits()/8;
+      int FI = MFI.CreateStackObject(MemSize, MemSize, false);
+      addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                TII.get(Opc)), FI)
+        .addReg(ResultReg);
+      DstRC = ResVT == MVT::f32
+        ? X86::FR32RegisterClass : X86::FR64RegisterClass;
+      Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
+      ResultReg = createResultReg(DstRC);
+      addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                TII.get(Opc), ResultReg), FI);
+    }
+
+    if (AndToI1) {
+      // Mask out all but lowest bit for some call which produces an i1.
+      unsigned AndResult = createResultReg(X86::GR8RegisterClass);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+              TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1);
+      ResultReg = AndResult;
+    }
+
+    UpdateValueMap(I, ResultReg);
+  }
+
+  // Set all unused physreg defs as dead.
+  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+  return true;
+}
+
+
+bool
+X86FastISel::TargetSelectInstruction(const Instruction *I)  {
+  switch (I->getOpcode()) {
+  default: break;
+  case Instruction::Load:
+    return X86SelectLoad(I);
+  case Instruction::Store:
+    return X86SelectStore(I);
+  case Instruction::Ret:
+    return X86SelectRet(I);
+  case Instruction::ICmp:
+  case Instruction::FCmp:
+    return X86SelectCmp(I);
+  case Instruction::ZExt:
+    return X86SelectZExt(I);
+  case Instruction::Br:
+    return X86SelectBranch(I);
+  case Instruction::Call:
+    return X86SelectCall(I);
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::Shl:
+    return X86SelectShift(I);
+  case Instruction::Select:
+    return X86SelectSelect(I);
+  case Instruction::Trunc:
+    return X86SelectTrunc(I);
+  case Instruction::FPExt:
+    return X86SelectFPExt(I);
+  case Instruction::FPTrunc:
+    return X86SelectFPTrunc(I);
+  case Instruction::ExtractValue:
+    return X86SelectExtractValue(I);
+  case Instruction::IntToPtr: // Deliberate fall-through.
+  case Instruction::PtrToInt: {
+    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
+    EVT DstVT = TLI.getValueType(I->getType());
+    if (DstVT.bitsGT(SrcVT))
+      return X86SelectZExt(I);
+    if (DstVT.bitsLT(SrcVT))
+      return X86SelectTrunc(I);
+    unsigned Reg = getRegForValue(I->getOperand(0));
+    if (Reg == 0) return false;
+    UpdateValueMap(I, Reg);
+    return true;
+  }
+  }
+
+  return false;
+}
+
+unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
+  MVT VT;
+  if (!isTypeLegal(C->getType(), VT))
+    return false;
+
+  // Get opcode and regclass of the output for the given load instruction.
+  unsigned Opc = 0;
+  const TargetRegisterClass *RC = NULL;
+  switch (VT.SimpleTy) {
+  default: return false;
+  case MVT::i8:
+    Opc = X86::MOV8rm;
+    RC  = X86::GR8RegisterClass;
+    break;
+  case MVT::i16:
+    Opc = X86::MOV16rm;
+    RC  = X86::GR16RegisterClass;
+    break;
+  case MVT::i32:
+    Opc = X86::MOV32rm;
+    RC  = X86::GR32RegisterClass;
+    break;
+  case MVT::i64:
+    // Must be in x86-64 mode.
+    Opc = X86::MOV64rm;
+    RC  = X86::GR64RegisterClass;
+    break;
+  case MVT::f32:
+    if (Subtarget->hasSSE1()) {
+      Opc = X86::MOVSSrm;
+      RC  = X86::FR32RegisterClass;
+    } else {
+      Opc = X86::LD_Fp32m;
+      RC  = X86::RFP32RegisterClass;
+    }
+    break;
+  case MVT::f64:
+    if (Subtarget->hasSSE2()) {
+      Opc = X86::MOVSDrm;
+      RC  = X86::FR64RegisterClass;
+    } else {
+      Opc = X86::LD_Fp64m;
+      RC  = X86::RFP64RegisterClass;
+    }
+    break;
+  case MVT::f80:
+    // No f80 support yet.
+    return false;
+  }
+
+  // Materialize addresses with LEA instructions.
+  if (isa<GlobalValue>(C)) {
+    X86AddressMode AM;
+    if (X86SelectAddress(C, AM)) {
+      if (TLI.getPointerTy() == MVT::i32)
+        Opc = X86::LEA32r;
+      else
+        Opc = X86::LEA64r;
+      unsigned ResultReg = createResultReg(RC);
+      addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                             TII.get(Opc), ResultReg), AM);
+      return ResultReg;
+    }
+    return 0;
+  }
+
+  // MachineConstantPool wants an explicit alignment.
+  unsigned Align = TD.getPrefTypeAlignment(C->getType());
+  if (Align == 0) {
+    // Alignment of vector types.  FIXME!
+    Align = TD.getTypeAllocSize(C->getType());
+  }
+
+  // x86-32 PIC requires a PIC base register for constant pools.
+  unsigned PICBase = 0;
+  unsigned char OpFlag = 0;
+  if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
+    PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+  } else if (Subtarget->isPICStyleGOT()) {
+    OpFlag = X86II::MO_GOTOFF;
+    PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+  } else if (Subtarget->isPICStyleRIPRel() &&
+             TM.getCodeModel() == CodeModel::Small) {
+    PICBase = X86::RIP;
+  }
+
+  // Create the load from the constant pool.
+  unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
+  unsigned ResultReg = createResultReg(RC);
+  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                                   TII.get(Opc), ResultReg),
+                           MCPOffset, PICBase, OpFlag);
+
+  return ResultReg;
+}
+
+unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
+  // Fail on dynamic allocas. At this point, getRegForValue has already
+  // checked its CSE maps, so if we're here trying to handle a dynamic
+  // alloca, we're not going to succeed. X86SelectAddress has a
+  // check for dynamic allocas, because it's called directly from
+  // various places, but TargetMaterializeAlloca also needs a check
+  // in order to avoid recursion between getRegForValue,
+  // X86SelectAddrss, and TargetMaterializeAlloca.
+  if (!FuncInfo.StaticAllocaMap.count(C))
+    return 0;
+
+  X86AddressMode AM;
+  if (!X86SelectAddress(C, AM))
+    return 0;
+  unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+  TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
+  unsigned ResultReg = createResultReg(RC);
+  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+                         TII.get(Opc), ResultReg), AM);
+  return ResultReg;
+}
+
+/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
+/// vreg is being provided by the specified load instruction.  If possible,
+/// try to fold the load as an operand to the instruction, returning true if
+/// possible.
+bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
+                                const LoadInst *LI) {
+  X86AddressMode AM;
+  if (!X86SelectAddress(LI->getOperand(0), AM))
+    return false;
+
+  X86InstrInfo &XII = (X86InstrInfo&)TII;
+
+  unsigned Size = TD.getTypeAllocSize(LI->getType());
+  unsigned Alignment = LI->getAlignment();
+
+  SmallVector<MachineOperand, 8> AddrOps;
+  AM.getFullAddress(AddrOps);
+
+  MachineInstr *Result =
+    XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
+  if (Result == 0) return false;
+
+  FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
+  MI->eraseFromParent();
+  return true;
+}
+
+
+namespace llvm {
+  llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
+    return new X86FastISel(funcInfo);
+  }
+}
diff --git a/final/lib/Target/X86/X86FixupKinds.h b/final/lib/Target/X86/X86FixupKinds.h
new file mode 100644
index 00000000000..17d242ab761
--- /dev/null
+++ b/final/lib/Target/X86/X86FixupKinds.h
@@ -0,0 +1,33 @@
+//===-- X86/X86FixupKinds.h - X86 Specific Fixup Entries --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_X86_X86FIXUPKINDS_H
+#define LLVM_X86_X86FIXUPKINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace X86 {
+enum Fixups {
+  reloc_riprel_4byte = FirstTargetFixupKind, // 32-bit rip-relative
+  reloc_riprel_4byte_movq_load,              // 32-bit rip-relative in movq
+  reloc_signed_4byte,                        // 32-bit signed. Unlike FK_Data_4
+                                             // this will be sign extended at
+                                             // runtime.
+  reloc_global_offset_table,                 // 32-bit, relative to the start
+                                             // of the instruction. Used only
+                                             // for _GLOBAL_OFFSET_TABLE_.
+  // Marker
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+}
+}
+
+#endif
diff --git a/final/lib/Target/X86/X86FloatingPoint.cpp b/final/lib/Target/X86/X86FloatingPoint.cpp
new file mode 100644
index 00000000000..3aaa6932797
--- /dev/null
+++ b/final/lib/Target/X86/X86FloatingPoint.cpp
@@ -0,0 +1,1521 @@
+//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the pass which converts floating point instructions from
+// pseudo registers into register stack instructions.  This pass uses live
+// variable information to indicate where the FPn registers are used and their
+// lifetimes.
+//
+// The x87 hardware tracks liveness of the stack registers, so it is necessary
+// to implement exact liveness tracking between basic blocks. The CFG edges are
+// partitioned into bundles where the same FP registers must be live in
+// identical stack positions. Instructions are inserted at the end of each basic
+// block to rearrange the live registers to match the outgoing bundle.
+//
+// This approach avoids splitting critical edges at the potential cost of more
+// live register shuffling instructions when critical edges are present.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-codegen"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumFXCH, "Number of fxch instructions inserted");
+STATISTIC(NumFP  , "Number of floating point instructions");
+
+namespace {
+  struct FPS : public MachineFunctionPass {
+    static char ID;
+    FPS() : MachineFunctionPass(ID) {
+      initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
+      // This is really only to keep valgrind quiet.
+      // The logic in isLive() is too much for it.
+      memset(Stack, 0, sizeof(Stack));
+      memset(RegMap, 0, sizeof(RegMap));
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<EdgeBundles>();
+      AU.addPreservedID(MachineLoopInfoID);
+      AU.addPreservedID(MachineDominatorsID);
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+
+    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    virtual const char *getPassName() const { return "X86 FP Stackifier"; }
+
+  private:
+    const TargetInstrInfo *TII; // Machine instruction info.
+
+    // Two CFG edges are related if they leave the same block, or enter the same
+    // block. The transitive closure of an edge under this relation is a
+    // LiveBundle. It represents a set of CFG edges where the live FP stack
+    // registers must be allocated identically in the x87 stack.
+    //
+    // A LiveBundle is usually all the edges leaving a block, or all the edges
+    // entering a block, but it can contain more edges if critical edges are
+    // present.
+    //
+    // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
+    // but the exact mapping of FP registers to stack slots is fixed later.
+    struct LiveBundle {
+      // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
+      unsigned Mask;
+
+      // Number of pre-assigned live registers in FixStack. This is 0 when the
+      // stack order has not yet been fixed.
+      unsigned FixCount;
+
+      // Assigned stack order for live-in registers.
+      // FixStack[i] == getStackEntry(i) for all i < FixCount.
+      unsigned char FixStack[8];
+
+      LiveBundle() : Mask(0), FixCount(0) {}
+
+      // Have the live registers been assigned a stack order yet?
+      bool isFixed() const { return !Mask || FixCount; }
+    };
+
+    // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
+    // with no live FP registers.
+    SmallVector<LiveBundle, 8> LiveBundles;
+
+    // The edge bundle analysis provides indices into the LiveBundles vector.
+    EdgeBundles *Bundles;
+
+    // Return a bitmask of FP registers in block's live-in list.
+    unsigned calcLiveInMask(MachineBasicBlock *MBB) {
+      unsigned Mask = 0;
+      for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+           E = MBB->livein_end(); I != E; ++I) {
+        unsigned Reg = *I - X86::FP0;
+        if (Reg < 8)
+          Mask |= 1 << Reg;
+      }
+      return Mask;
+    }
+
+    // Partition all the CFG edges into LiveBundles.
+    void bundleCFG(MachineFunction &MF);
+
+    MachineBasicBlock *MBB;     // Current basic block
+    unsigned Stack[8];          // FP<n> Registers in each stack slot...
+    unsigned RegMap[8];         // Track which stack slot contains each register
+    unsigned StackTop;          // The current top of the FP stack.
+
+    // Set up our stack model to match the incoming registers to MBB.
+    void setupBlockStack();
+
+    // Shuffle live registers to match the expectations of successor blocks.
+    void finishBlockStack();
+
+    void dumpStack() const {
+      dbgs() << "Stack contents:";
+      for (unsigned i = 0; i != StackTop; ++i) {
+        dbgs() << " FP" << Stack[i];
+        assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
+      }
+      dbgs() << "\n";
+    }
+
+    /// getSlot - Return the stack slot number a particular register number is
+    /// in.
+    unsigned getSlot(unsigned RegNo) const {
+      assert(RegNo < 8 && "Regno out of range!");
+      return RegMap[RegNo];
+    }
+
+    /// isLive - Is RegNo currently live in the stack?
+    bool isLive(unsigned RegNo) const {
+      unsigned Slot = getSlot(RegNo);
+      return Slot < StackTop && Stack[Slot] == RegNo;
+    }
+
+    /// getScratchReg - Return an FP register that is not currently in use.
+    unsigned getScratchReg() {
+      for (int i = 7; i >= 0; --i)
+        if (!isLive(i))
+          return i;
+      llvm_unreachable("Ran out of scratch FP registers");
+    }
+
+    /// getStackEntry - Return the X86::FP<n> register in register ST(i).
+    unsigned getStackEntry(unsigned STi) const {
+      if (STi >= StackTop)
+        report_fatal_error("Access past stack top!");
+      return Stack[StackTop-1-STi];
+    }
+
+    /// getSTReg - Return the X86::ST(i) register which contains the specified
+    /// FP<RegNo> register.
+    unsigned getSTReg(unsigned RegNo) const {
+      return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0;
+    }
+
+    // pushReg - Push the specified FP<n> register onto the stack.
+    void pushReg(unsigned Reg) {
+      assert(Reg < 8 && "Register number out of range!");
+      if (StackTop >= 8)
+        report_fatal_error("Stack overflow!");
+      Stack[StackTop] = Reg;
+      RegMap[Reg] = StackTop++;
+    }
+
+    bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
+    void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
+      DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
+      if (isAtTop(RegNo)) return;
+
+      unsigned STReg = getSTReg(RegNo);
+      unsigned RegOnTop = getStackEntry(0);
+
+      // Swap the slots the regs are in.
+      std::swap(RegMap[RegNo], RegMap[RegOnTop]);
+
+      // Swap stack slot contents.
+      if (RegMap[RegOnTop] >= StackTop)
+        report_fatal_error("Access past stack top!");
+      std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
+
+      // Emit an fxch to update the runtime processors version of the state.
+      BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
+      ++NumFXCH;
+    }
+
+    void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
+      DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
+      unsigned STReg = getSTReg(RegNo);
+      pushReg(AsReg);   // New register on top of stack
+
+      BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg);
+    }
+
+    /// popStackAfter - Pop the current value off of the top of the FP stack
+    /// after the specified instruction.
+    void popStackAfter(MachineBasicBlock::iterator &I);
+
+    /// freeStackSlotAfter - Free the specified register from the register
+    /// stack, so that it is no longer in a register.  If the register is
+    /// currently at the top of the stack, we just pop the current instruction,
+    /// otherwise we store the current top-of-stack into the specified slot,
+    /// then pop the top of stack.
+    void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg);
+
+    /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
+    /// instruction.
+    MachineBasicBlock::iterator
+    freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo);
+
+    /// Adjust the live registers to be the set in Mask.
+    void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
+
+    /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is
+    /// st(0), FP reg FixStack[1] is st(1) etc.
+    void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
+                         MachineBasicBlock::iterator I);
+
+    bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB);
+
+    void handleZeroArgFP(MachineBasicBlock::iterator &I);
+    void handleOneArgFP(MachineBasicBlock::iterator &I);
+    void handleOneArgFPRW(MachineBasicBlock::iterator &I);
+    void handleTwoArgFP(MachineBasicBlock::iterator &I);
+    void handleCompareFP(MachineBasicBlock::iterator &I);
+    void handleCondMovFP(MachineBasicBlock::iterator &I);
+    void handleSpecialFP(MachineBasicBlock::iterator &I);
+
+    bool translateCopy(MachineInstr*);
+  };
+  char FPS::ID = 0;
+}
+
+FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
+
+/// getFPReg - Return the X86::FPx register number for the specified operand.
+/// For example, this returns 3 for X86::FP3.
+static unsigned getFPReg(const MachineOperand &MO) {
+  assert(MO.isReg() && "Expected an FP register!");
+  unsigned Reg = MO.getReg();
+  assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!");
+  return Reg - X86::FP0;
+}
+
+/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
+/// register references into FP stack references.
+///
+bool FPS::runOnMachineFunction(MachineFunction &MF) {
+  // We only need to run this pass if there are any FP registers used in this
+  // function.  If it is all integer, there is nothing for us to do!
+  bool FPIsUsed = false;
+
+  assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!");
+  for (unsigned i = 0; i <= 6; ++i)
+    if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) {
+      FPIsUsed = true;
+      break;
+    }
+
+  // Early exit.
+  if (!FPIsUsed) return false;
+
+  Bundles = &getAnalysis<EdgeBundles>();
+  TII = MF.getTarget().getInstrInfo();
+
+  // Prepare cross-MBB liveness.
+  bundleCFG(MF);
+
+  StackTop = 0;
+
+  // Process the function in depth first order so that we process at least one
+  // of the predecessors for every reachable block in the function.
+  SmallPtrSet<MachineBasicBlock*, 8> Processed;
+  MachineBasicBlock *Entry = MF.begin();
+
+  bool Changed = false;
+  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> >
+         I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed);
+       I != E; ++I)
+    Changed |= processBasicBlock(MF, **I);
+
+  // Process any unreachable blocks in arbitrary order now.
+  if (MF.size() != Processed.size())
+    for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB)
+      if (Processed.insert(BB))
+        Changed |= processBasicBlock(MF, *BB);
+
+  LiveBundles.clear();
+
+  return Changed;
+}
+
+/// bundleCFG - Scan all the basic blocks to determine consistent live-in and
+/// live-out sets for the FP registers. Consistent means that the set of
+/// registers live-out from a block is identical to the live-in set of all
+/// successors. This is not enforced by the normal live-in lists since
+/// registers may be implicitly defined, or not used by all successors.
+void FPS::bundleCFG(MachineFunction &MF) {
+  assert(LiveBundles.empty() && "Stale data in LiveBundles");
+  LiveBundles.resize(Bundles->getNumBundles());
+
+  // Gather the actual live-in masks for all MBBs.
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I;
+    const unsigned Mask = calcLiveInMask(MBB);
+    if (!Mask)
+      continue;
+    // Update MBB ingoing bundle mask.
+    LiveBundles[Bundles->getBundle(MBB->getNumber(), false)].Mask |= Mask;
+  }
+}
+
+/// processBasicBlock - Loop over all of the instructions in the basic block,
+/// transforming FP instructions into their stack form.
+///
+bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
+  bool Changed = false;
+  MBB = &BB;
+
+  setupBlockStack();
+
+  for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
+    MachineInstr *MI = I;
+    uint64_t Flags = MI->getDesc().TSFlags;
+
+    unsigned FPInstClass = Flags & X86II::FPTypeMask;
+    if (MI->isInlineAsm())
+      FPInstClass = X86II::SpecialFP;
+
+    if (MI->isCopy() && translateCopy(MI))
+      FPInstClass = X86II::SpecialFP;
+
+    if (FPInstClass == X86II::NotFP)
+      continue;  // Efficiently ignore non-fp insts!
+
+    MachineInstr *PrevMI = 0;
+    if (I != BB.begin())
+      PrevMI = prior(I);
+
+    ++NumFP;  // Keep track of # of pseudo instrs
+    DEBUG(dbgs() << "\nFPInst:\t" << *MI);
+
+    // Get dead variables list now because the MI pointer may be deleted as part
+    // of processing!
+    SmallVector<unsigned, 8> DeadRegs;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      if (MO.isReg() && MO.isDead())
+        DeadRegs.push_back(MO.getReg());
+    }
+
+    switch (FPInstClass) {
+    case X86II::ZeroArgFP:  handleZeroArgFP(I); break;
+    case X86II::OneArgFP:   handleOneArgFP(I);  break;  // fstp ST(0)
+    case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0))
+    case X86II::TwoArgFP:   handleTwoArgFP(I);  break;
+    case X86II::CompareFP:  handleCompareFP(I); break;
+    case X86II::CondMovFP:  handleCondMovFP(I); break;
+    case X86II::SpecialFP:  handleSpecialFP(I); break;
+    default: llvm_unreachable("Unknown FP Type!");
+    }
+
+    // Check to see if any of the values defined by this instruction are dead
+    // after definition.  If so, pop them.
+    for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) {
+      unsigned Reg = DeadRegs[i];
+      if (Reg >= X86::FP0 && Reg <= X86::FP6) {
+        DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n");
+        freeStackSlotAfter(I, Reg-X86::FP0);
+      }
+    }
+
+    // Print out all of the instructions expanded to if -debug
+    DEBUG(
+      MachineBasicBlock::iterator PrevI(PrevMI);
+      if (I == PrevI) {
+        dbgs() << "Just deleted pseudo instruction\n";
+      } else {
+        MachineBasicBlock::iterator Start = I;
+        // Rewind to first instruction newly inserted.
+        while (Start != BB.begin() && prior(Start) != PrevI) --Start;
+        dbgs() << "Inserted instructions:\n\t";
+        Start->print(dbgs(), &MF.getTarget());
+        while (++Start != llvm::next(I)) {}
+      }
+      dumpStack();
+    );
+
+    Changed = true;
+  }
+
+  finishBlockStack();
+
+  return Changed;
+}
+
+/// setupBlockStack - Use the live bundles to set up our model of the stack
+/// to match predecessors' live out stack.
+void FPS::setupBlockStack() {
+  DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber()
+               << " derived from " << MBB->getName() << ".\n");
+  StackTop = 0;
+  // Get the live-in bundle for MBB.
+  const LiveBundle &Bundle =
+    LiveBundles[Bundles->getBundle(MBB->getNumber(), false)];
+
+  if (!Bundle.Mask) {
+    DEBUG(dbgs() << "Block has no FP live-ins.\n");
+    return;
+  }
+
+  // Depth-first iteration should ensure that we always have an assigned stack.
+  assert(Bundle.isFixed() && "Reached block before any predecessors");
+
+  // Push the fixed live-in registers.
+  for (unsigned i = Bundle.FixCount; i > 0; --i) {
+    MBB->addLiveIn(X86::ST0+i-1);
+    DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP"
+                 << unsigned(Bundle.FixStack[i-1]) << '\n');
+    pushReg(Bundle.FixStack[i-1]);
+  }
+
+  // Kill off unwanted live-ins. This can happen with a critical edge.
+  // FIXME: We could keep these live registers around as zombies. They may need
+  // to be revived at the end of a short block. It might save a few instrs.
+  adjustLiveRegs(calcLiveInMask(MBB), MBB->begin());
+  DEBUG(MBB->dump());
+}
+
+/// finishBlockStack - Revive live-outs that are implicitly defined out of
+/// MBB. Shuffle live registers to match the expected fixed stack of any
+/// predecessors, and ensure that all predecessors are expecting the same
+/// stack.
+void FPS::finishBlockStack() {
+  // The RET handling below takes care of return blocks for us.
+  if (MBB->succ_empty())
+    return;
+
+  DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber()
+               << " derived from " << MBB->getName() << ".\n");
+
+  // Get MBB's live-out bundle.
+  unsigned BundleIdx = Bundles->getBundle(MBB->getNumber(), true);
+  LiveBundle &Bundle = LiveBundles[BundleIdx];
+
+  // We may need to kill and define some registers to match successors.
+  // FIXME: This can probably be combined with the shuffle below.
+  MachineBasicBlock::iterator Term = MBB->getFirstTerminator();
+  adjustLiveRegs(Bundle.Mask, Term);
+
+  if (!Bundle.Mask) {
+    DEBUG(dbgs() << "No live-outs.\n");
+    return;
+  }
+
+  // Has the stack order been fixed yet?
+  DEBUG(dbgs() << "LB#" << BundleIdx << ": ");
+  if (Bundle.isFixed()) {
+    DEBUG(dbgs() << "Shuffling stack to match.\n");
+    shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term);
+  } else {
+    // Not fixed yet, we get to choose.
+    DEBUG(dbgs() << "Fixing stack order now.\n");
+    Bundle.FixCount = StackTop;
+    for (unsigned i = 0; i < StackTop; ++i)
+      Bundle.FixStack[i] = getStackEntry(i);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+// Efficient Lookup Table Support
+//===----------------------------------------------------------------------===//
+
+namespace {
+  struct TableEntry {
+    unsigned from;
+    unsigned to;
+    bool operator<(const TableEntry &TE) const { return from < TE.from; }
+    friend bool operator<(const TableEntry &TE, unsigned V) {
+      return TE.from < V;
+    }
+    friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V,
+                                              const TableEntry &TE) {
+      return V < TE.from;
+    }
+  };
+}
+
+#ifndef NDEBUG
+static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) {
+  for (unsigned i = 0; i != NumEntries-1; ++i)
+    if (!(Table[i] < Table[i+1])) return false;
+  return true;
+}
+#endif
+
+static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) {
+  const TableEntry *I = std::lower_bound(Table, Table+N, Opcode);
+  if (I != Table+N && I->from == Opcode)
+    return I->to;
+  return -1;
+}
+
+#ifdef NDEBUG
+#define ASSERT_SORTED(TABLE)
+#else
+#define ASSERT_SORTED(TABLE)                                              \
+  { static bool TABLE##Checked = false;                                   \
+    if (!TABLE##Checked) {                                                \
+       assert(TableIsSorted(TABLE, array_lengthof(TABLE)) &&              \
+              "All lookup tables must be sorted for efficient access!");  \
+       TABLE##Checked = true;                                             \
+    }                                                                     \
+  }
+#endif
+
+//===----------------------------------------------------------------------===//
+// Register File -> Register Stack Mapping Methods
+//===----------------------------------------------------------------------===//
+
+// OpcodeTable - Sorted map of register instructions to their stack version.
+// The first element is an register file pseudo instruction, the second is the
+// concrete X86 instruction which uses the register stack.
+//
+static const TableEntry OpcodeTable[] = {
+  { X86::ABS_Fp32     , X86::ABS_F     },
+  { X86::ABS_Fp64     , X86::ABS_F     },
+  { X86::ABS_Fp80     , X86::ABS_F     },
+  { X86::ADD_Fp32m    , X86::ADD_F32m  },
+  { X86::ADD_Fp64m    , X86::ADD_F64m  },
+  { X86::ADD_Fp64m32  , X86::ADD_F32m  },
+  { X86::ADD_Fp80m32  , X86::ADD_F32m  },
+  { X86::ADD_Fp80m64  , X86::ADD_F64m  },
+  { X86::ADD_FpI16m32 , X86::ADD_FI16m },
+  { X86::ADD_FpI16m64 , X86::ADD_FI16m },
+  { X86::ADD_FpI16m80 , X86::ADD_FI16m },
+  { X86::ADD_FpI32m32 , X86::ADD_FI32m },
+  { X86::ADD_FpI32m64 , X86::ADD_FI32m },
+  { X86::ADD_FpI32m80 , X86::ADD_FI32m },
+  { X86::CHS_Fp32     , X86::CHS_F     },
+  { X86::CHS_Fp64     , X86::CHS_F     },
+  { X86::CHS_Fp80     , X86::CHS_F     },
+  { X86::CMOVBE_Fp32  , X86::CMOVBE_F  },
+  { X86::CMOVBE_Fp64  , X86::CMOVBE_F  },
+  { X86::CMOVBE_Fp80  , X86::CMOVBE_F  },
+  { X86::CMOVB_Fp32   , X86::CMOVB_F   },
+  { X86::CMOVB_Fp64   , X86::CMOVB_F  },
+  { X86::CMOVB_Fp80   , X86::CMOVB_F  },
+  { X86::CMOVE_Fp32   , X86::CMOVE_F  },
+  { X86::CMOVE_Fp64   , X86::CMOVE_F   },
+  { X86::CMOVE_Fp80   , X86::CMOVE_F   },
+  { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F },
+  { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F },
+  { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F },
+  { X86::CMOVNB_Fp32  , X86::CMOVNB_F  },
+  { X86::CMOVNB_Fp64  , X86::CMOVNB_F  },
+  { X86::CMOVNB_Fp80  , X86::CMOVNB_F  },
+  { X86::CMOVNE_Fp32  , X86::CMOVNE_F  },
+  { X86::CMOVNE_Fp64  , X86::CMOVNE_F  },
+  { X86::CMOVNE_Fp80  , X86::CMOVNE_F  },
+  { X86::CMOVNP_Fp32  , X86::CMOVNP_F  },
+  { X86::CMOVNP_Fp64  , X86::CMOVNP_F  },
+  { X86::CMOVNP_Fp80  , X86::CMOVNP_F  },
+  { X86::CMOVP_Fp32   , X86::CMOVP_F   },
+  { X86::CMOVP_Fp64   , X86::CMOVP_F   },
+  { X86::CMOVP_Fp80   , X86::CMOVP_F   },
+  { X86::COS_Fp32     , X86::COS_F     },
+  { X86::COS_Fp64     , X86::COS_F     },
+  { X86::COS_Fp80     , X86::COS_F     },
+  { X86::DIVR_Fp32m   , X86::DIVR_F32m },
+  { X86::DIVR_Fp64m   , X86::DIVR_F64m },
+  { X86::DIVR_Fp64m32 , X86::DIVR_F32m },
+  { X86::DIVR_Fp80m32 , X86::DIVR_F32m },
+  { X86::DIVR_Fp80m64 , X86::DIVR_F64m },
+  { X86::DIVR_FpI16m32, X86::DIVR_FI16m},
+  { X86::DIVR_FpI16m64, X86::DIVR_FI16m},
+  { X86::DIVR_FpI16m80, X86::DIVR_FI16m},
+  { X86::DIVR_FpI32m32, X86::DIVR_FI32m},
+  { X86::DIVR_FpI32m64, X86::DIVR_FI32m},
+  { X86::DIVR_FpI32m80, X86::DIVR_FI32m},
+  { X86::DIV_Fp32m    , X86::DIV_F32m  },
+  { X86::DIV_Fp64m    , X86::DIV_F64m  },
+  { X86::DIV_Fp64m32  , X86::DIV_F32m  },
+  { X86::DIV_Fp80m32  , X86::DIV_F32m  },
+  { X86::DIV_Fp80m64  , X86::DIV_F64m  },
+  { X86::DIV_FpI16m32 , X86::DIV_FI16m },
+  { X86::DIV_FpI16m64 , X86::DIV_FI16m },
+  { X86::DIV_FpI16m80 , X86::DIV_FI16m },
+  { X86::DIV_FpI32m32 , X86::DIV_FI32m },
+  { X86::DIV_FpI32m64 , X86::DIV_FI32m },
+  { X86::DIV_FpI32m80 , X86::DIV_FI32m },
+  { X86::ILD_Fp16m32  , X86::ILD_F16m  },
+  { X86::ILD_Fp16m64  , X86::ILD_F16m  },
+  { X86::ILD_Fp16m80  , X86::ILD_F16m  },
+  { X86::ILD_Fp32m32  , X86::ILD_F32m  },
+  { X86::ILD_Fp32m64  , X86::ILD_F32m  },
+  { X86::ILD_Fp32m80  , X86::ILD_F32m  },
+  { X86::ILD_Fp64m32  , X86::ILD_F64m  },
+  { X86::ILD_Fp64m64  , X86::ILD_F64m  },
+  { X86::ILD_Fp64m80  , X86::ILD_F64m  },
+  { X86::ISTT_Fp16m32 , X86::ISTT_FP16m},
+  { X86::ISTT_Fp16m64 , X86::ISTT_FP16m},
+  { X86::ISTT_Fp16m80 , X86::ISTT_FP16m},
+  { X86::ISTT_Fp32m32 , X86::ISTT_FP32m},
+  { X86::ISTT_Fp32m64 , X86::ISTT_FP32m},
+  { X86::ISTT_Fp32m80 , X86::ISTT_FP32m},
+  { X86::ISTT_Fp64m32 , X86::ISTT_FP64m},
+  { X86::ISTT_Fp64m64 , X86::ISTT_FP64m},
+  { X86::ISTT_Fp64m80 , X86::ISTT_FP64m},
+  { X86::IST_Fp16m32  , X86::IST_F16m  },
+  { X86::IST_Fp16m64  , X86::IST_F16m  },
+  { X86::IST_Fp16m80  , X86::IST_F16m  },
+  { X86::IST_Fp32m32  , X86::IST_F32m  },
+  { X86::IST_Fp32m64  , X86::IST_F32m  },
+  { X86::IST_Fp32m80  , X86::IST_F32m  },
+  { X86::IST_Fp64m32  , X86::IST_FP64m },
+  { X86::IST_Fp64m64  , X86::IST_FP64m },
+  { X86::IST_Fp64m80  , X86::IST_FP64m },
+  { X86::LD_Fp032     , X86::LD_F0     },
+  { X86::LD_Fp064     , X86::LD_F0     },
+  { X86::LD_Fp080     , X86::LD_F0     },
+  { X86::LD_Fp132     , X86::LD_F1     },
+  { X86::LD_Fp164     , X86::LD_F1     },
+  { X86::LD_Fp180     , X86::LD_F1     },
+  { X86::LD_Fp32m     , X86::LD_F32m   },
+  { X86::LD_Fp32m64   , X86::LD_F32m   },
+  { X86::LD_Fp32m80   , X86::LD_F32m   },
+  { X86::LD_Fp64m     , X86::LD_F64m   },
+  { X86::LD_Fp64m80   , X86::LD_F64m   },
+  { X86::LD_Fp80m     , X86::LD_F80m   },
+  { X86::MUL_Fp32m    , X86::MUL_F32m  },
+  { X86::MUL_Fp64m    , X86::MUL_F64m  },
+  { X86::MUL_Fp64m32  , X86::MUL_F32m  },
+  { X86::MUL_Fp80m32  , X86::MUL_F32m  },
+  { X86::MUL_Fp80m64  , X86::MUL_F64m  },
+  { X86::MUL_FpI16m32 , X86::MUL_FI16m },
+  { X86::MUL_FpI16m64 , X86::MUL_FI16m },
+  { X86::MUL_FpI16m80 , X86::MUL_FI16m },
+  { X86::MUL_FpI32m32 , X86::MUL_FI32m },
+  { X86::MUL_FpI32m64 , X86::MUL_FI32m },
+  { X86::MUL_FpI32m80 , X86::MUL_FI32m },
+  { X86::SIN_Fp32     , X86::SIN_F     },
+  { X86::SIN_Fp64     , X86::SIN_F     },
+  { X86::SIN_Fp80     , X86::SIN_F     },
+  { X86::SQRT_Fp32    , X86::SQRT_F    },
+  { X86::SQRT_Fp64    , X86::SQRT_F    },
+  { X86::SQRT_Fp80    , X86::SQRT_F    },
+  { X86::ST_Fp32m     , X86::ST_F32m   },
+  { X86::ST_Fp64m     , X86::ST_F64m   },
+  { X86::ST_Fp64m32   , X86::ST_F32m   },
+  { X86::ST_Fp80m32   , X86::ST_F32m   },
+  { X86::ST_Fp80m64   , X86::ST_F64m   },
+  { X86::ST_FpP80m    , X86::ST_FP80m  },
+  { X86::SUBR_Fp32m   , X86::SUBR_F32m },
+  { X86::SUBR_Fp64m   , X86::SUBR_F64m },
+  { X86::SUBR_Fp64m32 , X86::SUBR_F32m },
+  { X86::SUBR_Fp80m32 , X86::SUBR_F32m },
+  { X86::SUBR_Fp80m64 , X86::SUBR_F64m },
+  { X86::SUBR_FpI16m32, X86::SUBR_FI16m},
+  { X86::SUBR_FpI16m64, X86::SUBR_FI16m},
+  { X86::SUBR_FpI16m80, X86::SUBR_FI16m},
+  { X86::SUBR_FpI32m32, X86::SUBR_FI32m},
+  { X86::SUBR_FpI32m64, X86::SUBR_FI32m},
+  { X86::SUBR_FpI32m80, X86::SUBR_FI32m},
+  { X86::SUB_Fp32m    , X86::SUB_F32m  },
+  { X86::SUB_Fp64m    , X86::SUB_F64m  },
+  { X86::SUB_Fp64m32  , X86::SUB_F32m  },
+  { X86::SUB_Fp80m32  , X86::SUB_F32m  },
+  { X86::SUB_Fp80m64  , X86::SUB_F64m  },
+  { X86::SUB_FpI16m32 , X86::SUB_FI16m },
+  { X86::SUB_FpI16m64 , X86::SUB_FI16m },
+  { X86::SUB_FpI16m80 , X86::SUB_FI16m },
+  { X86::SUB_FpI32m32 , X86::SUB_FI32m },
+  { X86::SUB_FpI32m64 , X86::SUB_FI32m },
+  { X86::SUB_FpI32m80 , X86::SUB_FI32m },
+  { X86::TST_Fp32     , X86::TST_F     },
+  { X86::TST_Fp64     , X86::TST_F     },
+  { X86::TST_Fp80     , X86::TST_F     },
+  { X86::UCOM_FpIr32  , X86::UCOM_FIr  },
+  { X86::UCOM_FpIr64  , X86::UCOM_FIr  },
+  { X86::UCOM_FpIr80  , X86::UCOM_FIr  },
+  { X86::UCOM_Fpr32   , X86::UCOM_Fr   },
+  { X86::UCOM_Fpr64   , X86::UCOM_Fr   },
+  { X86::UCOM_Fpr80   , X86::UCOM_Fr   },
+};
+
+static unsigned getConcreteOpcode(unsigned Opcode) {
+  ASSERT_SORTED(OpcodeTable);
+  int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode);
+  assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!");
+  return Opc;
+}
+
+//===----------------------------------------------------------------------===//
+// Helper Methods
+//===----------------------------------------------------------------------===//
+
+// PopTable - Sorted map of instructions to their popping version.  The first
+// element is an instruction, the second is the version which pops.
+//
+static const TableEntry PopTable[] = {
+  { X86::ADD_FrST0 , X86::ADD_FPrST0  },
+
+  { X86::DIVR_FrST0, X86::DIVR_FPrST0 },
+  { X86::DIV_FrST0 , X86::DIV_FPrST0  },
+
+  { X86::IST_F16m  , X86::IST_FP16m   },
+  { X86::IST_F32m  , X86::IST_FP32m   },
+
+  { X86::MUL_FrST0 , X86::MUL_FPrST0  },
+
+  { X86::ST_F32m   , X86::ST_FP32m    },
+  { X86::ST_F64m   , X86::ST_FP64m    },
+  { X86::ST_Frr    , X86::ST_FPrr     },
+
+  { X86::SUBR_FrST0, X86::SUBR_FPrST0 },
+  { X86::SUB_FrST0 , X86::SUB_FPrST0  },
+
+  { X86::UCOM_FIr  , X86::UCOM_FIPr   },
+
+  { X86::UCOM_FPr  , X86::UCOM_FPPr   },
+  { X86::UCOM_Fr   , X86::UCOM_FPr    },
+};
+
+/// popStackAfter - Pop the current value off of the top of the FP stack after
+/// the specified instruction.  This attempts to be sneaky and combine the pop
+/// into the instruction itself if possible.  The iterator is left pointing to
+/// the last instruction, be it a new pop instruction inserted, or the old
+/// instruction if it was modified in place.
+///
+void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
+  MachineInstr* MI = I;
+  DebugLoc dl = MI->getDebugLoc();
+  ASSERT_SORTED(PopTable);
+  if (StackTop == 0)
+    report_fatal_error("Cannot pop empty stack!");
+  RegMap[Stack[--StackTop]] = ~0;     // Update state
+
+  // Check to see if there is a popping version of this instruction...
+  int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode());
+  if (Opcode != -1) {
+    I->setDesc(TII->get(Opcode));
+    if (Opcode == X86::UCOM_FPPr)
+      I->RemoveOperand(0);
+  } else {    // Insert an explicit pop
+    I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0);
+  }
+}
+
+/// freeStackSlotAfter - Free the specified register from the register stack, so
+/// that it is no longer in a register.  If the register is currently at the top
+/// of the stack, we just pop the current instruction, otherwise we store the
+/// current top-of-stack into the specified slot, then pop the top of stack.
+void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) {
+  if (getStackEntry(0) == FPRegNo) {  // already at the top of stack? easy.
+    popStackAfter(I);
+    return;
+  }
+
+  // Otherwise, store the top of stack into the dead slot, killing the operand
+  // without having to add in an explicit xchg then pop.
+  //
+  I = freeStackSlotBefore(++I, FPRegNo);
+}
+
+/// freeStackSlotBefore - Free the specified register without trying any
+/// folding.
+MachineBasicBlock::iterator
+FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) {
+  unsigned STReg    = getSTReg(FPRegNo);
+  unsigned OldSlot  = getSlot(FPRegNo);
+  unsigned TopReg   = Stack[StackTop-1];
+  Stack[OldSlot]    = TopReg;
+  RegMap[TopReg]    = OldSlot;
+  RegMap[FPRegNo]   = ~0;
+  Stack[--StackTop] = ~0;
+  return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg);
+}
+
+/// adjustLiveRegs - Kill and revive registers such that exactly the FP
+/// registers with a bit in Mask are live.
+void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
+  unsigned Defs = Mask;
+  unsigned Kills = 0;
+  for (unsigned i = 0; i < StackTop; ++i) {
+    unsigned RegNo = Stack[i];
+    if (!(Defs & (1 << RegNo)))
+      // This register is live, but we don't want it.
+      Kills |= (1 << RegNo);
+    else
+      // We don't need to imp-def this live register.
+      Defs &= ~(1 << RegNo);
+  }
+  assert((Kills & Defs) == 0 && "Register needs killing and def'ing?");
+
+  // Produce implicit-defs for free by using killed registers.
+  while (Kills && Defs) {
+    unsigned KReg = CountTrailingZeros_32(Kills);
+    unsigned DReg = CountTrailingZeros_32(Defs);
+    DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n");
+    std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
+    std::swap(RegMap[KReg], RegMap[DReg]);
+    Kills &= ~(1 << KReg);
+    Defs &= ~(1 << DReg);
+  }
+
+  // Kill registers by popping.
+  if (Kills && I != MBB->begin()) {
+    MachineBasicBlock::iterator I2 = llvm::prior(I);
+    for (;;) {
+      unsigned KReg = getStackEntry(0);
+      if (!(Kills & (1 << KReg)))
+        break;
+      DEBUG(dbgs() << "Popping %FP" << KReg << "\n");
+      popStackAfter(I2);
+      Kills &= ~(1 << KReg);
+    }
+  }
+
+  // Manually kill the rest.
+  while (Kills) {
+    unsigned KReg = CountTrailingZeros_32(Kills);
+    DEBUG(dbgs() << "Killing %FP" << KReg << "\n");
+    freeStackSlotBefore(I, KReg);
+    Kills &= ~(1 << KReg);
+  }
+
+  // Load zeros for all the imp-defs.
+  while(Defs) {
+    unsigned DReg = CountTrailingZeros_32(Defs);
+    DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n");
+    BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
+    pushReg(DReg);
+    Defs &= ~(1 << DReg);
+  }
+
+  // Now we should have the correct registers live.
+  DEBUG(dumpStack());
+  assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch");
+}
+
+/// shuffleStackTop - emit fxch instructions before I to shuffle the top
+/// FixCount entries into the order given by FixStack.
+/// FIXME: Is there a better algorithm than insertion sort?
+void FPS::shuffleStackTop(const unsigned char *FixStack,
+                          unsigned FixCount,
+                          MachineBasicBlock::iterator I) {
+  // Move items into place, starting from the desired stack bottom.
+  while (FixCount--) {
+    // Old register at position FixCount.
+    unsigned OldReg = getStackEntry(FixCount);
+    // Desired register at position FixCount.
+    unsigned Reg = FixStack[FixCount];
+    if (Reg == OldReg)
+      continue;
+    // (Reg st0) (OldReg st0) = (Reg OldReg st0)
+    moveToTop(Reg, I);
+    moveToTop(OldReg, I);
+  }
+  DEBUG(dumpStack());
+}
+
+
+//===----------------------------------------------------------------------===//
+// Instruction transformation implementation
+//===----------------------------------------------------------------------===//
+
+/// handleZeroArgFP - ST(0) = fld0    ST(0) = flds <mem>
+///
+void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
+  MachineInstr *MI = I;
+  unsigned DestReg = getFPReg(MI->getOperand(0));
+
+  // Change from the pseudo instruction to the concrete instruction.
+  MI->RemoveOperand(0);   // Remove the explicit ST(0) operand
+  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+  
+  // Result gets pushed on the stack.
+  pushReg(DestReg);
+}
+
+/// handleOneArgFP - fst <mem>, ST(0)
+///
+void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
+  MachineInstr *MI = I;
+  unsigned NumOps = MI->getDesc().getNumOperands();
+  assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
+         "Can only handle fst* & ftst instructions!");
+
+  // Is this the last use of the source register?
+  unsigned Reg = getFPReg(MI->getOperand(NumOps-1));
+  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+
+  // FISTP64m is strange because there isn't a non-popping versions.
+  // If we have one _and_ we don't want to pop the operand, duplicate the value
+  // on the stack instead of moving it.  This ensure that popping the value is
+  // always ok.
+  // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
+  //
+  if (!KillsSrc &&
+      (MI->getOpcode() == X86::IST_Fp64m32 ||
+       MI->getOpcode() == X86::ISTT_Fp16m32 ||
+       MI->getOpcode() == X86::ISTT_Fp32m32 ||
+       MI->getOpcode() == X86::ISTT_Fp64m32 ||
+       MI->getOpcode() == X86::IST_Fp64m64 ||
+       MI->getOpcode() == X86::ISTT_Fp16m64 ||
+       MI->getOpcode() == X86::ISTT_Fp32m64 ||
+       MI->getOpcode() == X86::ISTT_Fp64m64 ||
+       MI->getOpcode() == X86::IST_Fp64m80 ||
+       MI->getOpcode() == X86::ISTT_Fp16m80 ||
+       MI->getOpcode() == X86::ISTT_Fp32m80 ||
+       MI->getOpcode() == X86::ISTT_Fp64m80 ||
+       MI->getOpcode() == X86::ST_FpP80m)) {
+    duplicateToTop(Reg, getScratchReg(), I);
+  } else {
+    moveToTop(Reg, I);            // Move to the top of the stack...
+  }
+  
+  // Convert from the pseudo instruction to the concrete instruction.
+  MI->RemoveOperand(NumOps-1);    // Remove explicit ST(0) operand
+  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+  if (MI->getOpcode() == X86::IST_FP64m ||
+      MI->getOpcode() == X86::ISTT_FP16m ||
+      MI->getOpcode() == X86::ISTT_FP32m ||
+      MI->getOpcode() == X86::ISTT_FP64m ||
+      MI->getOpcode() == X86::ST_FP80m) {
+    if (StackTop == 0)
+      report_fatal_error("Stack empty??");
+    --StackTop;
+  } else if (KillsSrc) { // Last use of operand?
+    popStackAfter(I);
+  }
+}
+
+
+/// handleOneArgFPRW: Handle instructions that read from the top of stack and
+/// replace the value with a newly computed value.  These instructions may have
+/// non-fp operands after their FP operands.
+///
+///  Examples:
+///     R1 = fchs R2
+///     R1 = fadd R2, [mem]
+///
+void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) {
+  MachineInstr *MI = I;
+#ifndef NDEBUG
+  unsigned NumOps = MI->getDesc().getNumOperands();
+  assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!");
+#endif
+
+  // Is this the last use of the source register?
+  unsigned Reg = getFPReg(MI->getOperand(1));
+  bool KillsSrc = MI->killsRegister(X86::FP0+Reg);
+
+  if (KillsSrc) {
+    // If this is the last use of the source register, just make sure it's on
+    // the top of the stack.
+    moveToTop(Reg, I);
+    if (StackTop == 0)
+      report_fatal_error("Stack cannot be empty!");
+    --StackTop;
+    pushReg(getFPReg(MI->getOperand(0)));
+  } else {
+    // If this is not the last use of the source register, _copy_ it to the top
+    // of the stack.
+    duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I);
+  }
+
+  // Change from the pseudo instruction to the concrete instruction.
+  MI->RemoveOperand(1);   // Drop the source operand.
+  MI->RemoveOperand(0);   // Drop the destination operand.
+  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define tables of various ways to map pseudo instructions
+//
+
+// ForwardST0Table - Map: A = B op C  into: ST(0) = ST(0) op ST(i)
+static const TableEntry ForwardST0Table[] = {
+  { X86::ADD_Fp32  , X86::ADD_FST0r },
+  { X86::ADD_Fp64  , X86::ADD_FST0r },
+  { X86::ADD_Fp80  , X86::ADD_FST0r },
+  { X86::DIV_Fp32  , X86::DIV_FST0r },
+  { X86::DIV_Fp64  , X86::DIV_FST0r },
+  { X86::DIV_Fp80  , X86::DIV_FST0r },
+  { X86::MUL_Fp32  , X86::MUL_FST0r },
+  { X86::MUL_Fp64  , X86::MUL_FST0r },
+  { X86::MUL_Fp80  , X86::MUL_FST0r },
+  { X86::SUB_Fp32  , X86::SUB_FST0r },
+  { X86::SUB_Fp64  , X86::SUB_FST0r },
+  { X86::SUB_Fp80  , X86::SUB_FST0r },
+};
+
+// ReverseST0Table - Map: A = B op C  into: ST(0) = ST(i) op ST(0)
+static const TableEntry ReverseST0Table[] = {
+  { X86::ADD_Fp32  , X86::ADD_FST0r  },   // commutative
+  { X86::ADD_Fp64  , X86::ADD_FST0r  },   // commutative
+  { X86::ADD_Fp80  , X86::ADD_FST0r  },   // commutative
+  { X86::DIV_Fp32  , X86::DIVR_FST0r },
+  { X86::DIV_Fp64  , X86::DIVR_FST0r },
+  { X86::DIV_Fp80  , X86::DIVR_FST0r },
+  { X86::MUL_Fp32  , X86::MUL_FST0r  },   // commutative
+  { X86::MUL_Fp64  , X86::MUL_FST0r  },   // commutative
+  { X86::MUL_Fp80  , X86::MUL_FST0r  },   // commutative
+  { X86::SUB_Fp32  , X86::SUBR_FST0r },
+  { X86::SUB_Fp64  , X86::SUBR_FST0r },
+  { X86::SUB_Fp80  , X86::SUBR_FST0r },
+};
+
+// ForwardSTiTable - Map: A = B op C  into: ST(i) = ST(0) op ST(i)
+static const TableEntry ForwardSTiTable[] = {
+  { X86::ADD_Fp32  , X86::ADD_FrST0  },   // commutative
+  { X86::ADD_Fp64  , X86::ADD_FrST0  },   // commutative
+  { X86::ADD_Fp80  , X86::ADD_FrST0  },   // commutative
+  { X86::DIV_Fp32  , X86::DIVR_FrST0 },
+  { X86::DIV_Fp64  , X86::DIVR_FrST0 },
+  { X86::DIV_Fp80  , X86::DIVR_FrST0 },
+  { X86::MUL_Fp32  , X86::MUL_FrST0  },   // commutative
+  { X86::MUL_Fp64  , X86::MUL_FrST0  },   // commutative
+  { X86::MUL_Fp80  , X86::MUL_FrST0  },   // commutative
+  { X86::SUB_Fp32  , X86::SUBR_FrST0 },
+  { X86::SUB_Fp64  , X86::SUBR_FrST0 },
+  { X86::SUB_Fp80  , X86::SUBR_FrST0 },
+};
+
+// ReverseSTiTable - Map: A = B op C  into: ST(i) = ST(i) op ST(0)
+static const TableEntry ReverseSTiTable[] = {
+  { X86::ADD_Fp32  , X86::ADD_FrST0 },
+  { X86::ADD_Fp64  , X86::ADD_FrST0 },
+  { X86::ADD_Fp80  , X86::ADD_FrST0 },
+  { X86::DIV_Fp32  , X86::DIV_FrST0 },
+  { X86::DIV_Fp64  , X86::DIV_FrST0 },
+  { X86::DIV_Fp80  , X86::DIV_FrST0 },
+  { X86::MUL_Fp32  , X86::MUL_FrST0 },
+  { X86::MUL_Fp64  , X86::MUL_FrST0 },
+  { X86::MUL_Fp80  , X86::MUL_FrST0 },
+  { X86::SUB_Fp32  , X86::SUB_FrST0 },
+  { X86::SUB_Fp64  , X86::SUB_FrST0 },
+  { X86::SUB_Fp80  , X86::SUB_FrST0 },
+};
+
+
+/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
+/// instructions which need to be simplified and possibly transformed.
+///
+/// Result: ST(0) = fsub  ST(0), ST(i)
+///         ST(i) = fsub  ST(0), ST(i)
+///         ST(0) = fsubr ST(0), ST(i)
+///         ST(i) = fsubr ST(0), ST(i)
+///
+void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) {
+  ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
+  ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
+  MachineInstr *MI = I;
+
+  unsigned NumOperands = MI->getDesc().getNumOperands();
+  assert(NumOperands == 3 && "Illegal TwoArgFP instruction!");
+  unsigned Dest = getFPReg(MI->getOperand(0));
+  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
+  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
+  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
+  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+  DebugLoc dl = MI->getDebugLoc();
+
+  unsigned TOS = getStackEntry(0);
+
+  // One of our operands must be on the top of the stack.  If neither is yet, we
+  // need to move one.
+  if (Op0 != TOS && Op1 != TOS) {   // No operand at TOS?
+    // We can choose to move either operand to the top of the stack.  If one of
+    // the operands is killed by this instruction, we want that one so that we
+    // can update right on top of the old version.
+    if (KillsOp0) {
+      moveToTop(Op0, I);         // Move dead operand to TOS.
+      TOS = Op0;
+    } else if (KillsOp1) {
+      moveToTop(Op1, I);
+      TOS = Op1;
+    } else {
+      // All of the operands are live after this instruction executes, so we
+      // cannot update on top of any operand.  Because of this, we must
+      // duplicate one of the stack elements to the top.  It doesn't matter
+      // which one we pick.
+      //
+      duplicateToTop(Op0, Dest, I);
+      Op0 = TOS = Dest;
+      KillsOp0 = true;
+    }
+  } else if (!KillsOp0 && !KillsOp1) {
+    // If we DO have one of our operands at the top of the stack, but we don't
+    // have a dead operand, we must duplicate one of the operands to a new slot
+    // on the stack.
+    duplicateToTop(Op0, Dest, I);
+    Op0 = TOS = Dest;
+    KillsOp0 = true;
+  }
+
+  // Now we know that one of our operands is on the top of the stack, and at
+  // least one of our operands is killed by this instruction.
+  assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) &&
+         "Stack conditions not set up right!");
+
+  // We decide which form to use based on what is on the top of the stack, and
+  // which operand is killed by this instruction.
+  const TableEntry *InstTable;
+  bool isForward = TOS == Op0;
+  bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0);
+  if (updateST0) {
+    if (isForward)
+      InstTable = ForwardST0Table;
+    else
+      InstTable = ReverseST0Table;
+  } else {
+    if (isForward)
+      InstTable = ForwardSTiTable;
+    else
+      InstTable = ReverseSTiTable;
+  }
+
+  int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table),
+                      MI->getOpcode());
+  assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!");
+
+  // NotTOS - The register which is not on the top of stack...
+  unsigned NotTOS = (TOS == Op0) ? Op1 : Op0;
+
+  // Replace the old instruction with a new instruction
+  MBB->remove(I++);
+  I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS));
+
+  // If both operands are killed, pop one off of the stack in addition to
+  // overwriting the other one.
+  if (KillsOp0 && KillsOp1 && Op0 != Op1) {
+    assert(!updateST0 && "Should have updated other operand!");
+    popStackAfter(I);   // Pop the top of stack
+  }
+
+  // Update stack information so that we know the destination register is now on
+  // the stack.
+  unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS);
+  assert(UpdatedSlot < StackTop && Dest < 7);
+  Stack[UpdatedSlot]   = Dest;
+  RegMap[Dest]         = UpdatedSlot;
+  MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction
+}
+
+/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
+/// register arguments and no explicit destinations.
+///
+void FPS::handleCompareFP(MachineBasicBlock::iterator &I) {
+  ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table);
+  ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable);
+  MachineInstr *MI = I;
+
+  unsigned NumOperands = MI->getDesc().getNumOperands();
+  assert(NumOperands == 2 && "Illegal FUCOM* instruction!");
+  unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2));
+  unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1));
+  bool KillsOp0 = MI->killsRegister(X86::FP0+Op0);
+  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+
+  // Make sure the first operand is on the top of stack, the other one can be
+  // anywhere.
+  moveToTop(Op0, I);
+
+  // Change from the pseudo instruction to the concrete instruction.
+  MI->getOperand(0).setReg(getSTReg(Op1));
+  MI->RemoveOperand(1);
+  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+
+  // If any of the operands are killed by this instruction, free them.
+  if (KillsOp0) freeStackSlotAfter(I, Op0);
+  if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1);
+}
+
+/// handleCondMovFP - Handle two address conditional move instructions.  These
+/// instructions move a st(i) register to st(0) iff a condition is true.  These
+/// instructions require that the first operand is at the top of the stack, but
+/// otherwise don't modify the stack at all.
+void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) {
+  MachineInstr *MI = I;
+
+  unsigned Op0 = getFPReg(MI->getOperand(0));
+  unsigned Op1 = getFPReg(MI->getOperand(2));
+  bool KillsOp1 = MI->killsRegister(X86::FP0+Op1);
+
+  // The first operand *must* be on the top of the stack.
+  moveToTop(Op0, I);
+
+  // Change the second operand to the stack register that the operand is in.
+  // Change from the pseudo instruction to the concrete instruction.
+  MI->RemoveOperand(0);
+  MI->RemoveOperand(1);
+  MI->getOperand(0).setReg(getSTReg(Op1));
+  MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode())));
+  
+  // If we kill the second operand, make sure to pop it from the stack.
+  if (Op0 != Op1 && KillsOp1) {
+    // Get this value off of the register stack.
+    freeStackSlotAfter(I, Op1);
+  }
+}
+
+
+/// handleSpecialFP - Handle special instructions which behave unlike other
+/// floating point instructions.  This is primarily intended for use by pseudo
+/// instructions.
+///
+void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
+  MachineInstr *MI = I;
+  switch (MI->getOpcode()) {
+  default: llvm_unreachable("Unknown SpecialFP instruction!");
+  case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
+  case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type!
+  case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type!
+    assert(StackTop == 0 && "Stack should be empty after a call!");
+    pushReg(getFPReg(MI->getOperand(0)));
+    break;
+  case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type!
+  case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type!
+  case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type!
+    // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm.
+    // The pattern we expect is:
+    //  CALL
+    //  FP1 = FpGET_ST0
+    //  FP4 = FpGET_ST1
+    //
+    // At this point, we've pushed FP1 on the top of stack, so it should be
+    // present if it isn't dead.  If it was dead, we already emitted a pop to
+    // remove it from the stack and StackTop = 0.
+    
+    // Push FP4 as top of stack next.
+    pushReg(getFPReg(MI->getOperand(0)));
+
+    // If StackTop was 0 before we pushed our operand, then ST(0) must have been
+    // dead.  In this case, the ST(1) value is the only thing that is live, so
+    // it should be on the TOS (after the pop that was emitted) and is.  Just
+    // continue in this case.
+    if (StackTop == 1)
+      break;
+    
+    // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top
+    // elements so that our accounting is correct.
+    unsigned RegOnTop = getStackEntry(0);
+    unsigned RegNo = getStackEntry(1);
+    
+    // Swap the slots the regs are in.
+    std::swap(RegMap[RegNo], RegMap[RegOnTop]);
+    
+    // Swap stack slot contents.
+    if (RegMap[RegOnTop] >= StackTop)
+      report_fatal_error("Access past stack top!");
+    std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
+    break;
+  }
+  case X86::FpSET_ST0_32:
+  case X86::FpSET_ST0_64:
+  case X86::FpSET_ST0_80: {
+    // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm
+    // arguments that use an st constraint. We expect a sequence of
+    // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM
+    unsigned Op0 = getFPReg(MI->getOperand(0));
+
+    if (!MI->killsRegister(X86::FP0 + Op0)) {
+      // Duplicate Op0 into a temporary on the stack top.
+      duplicateToTop(Op0, getScratchReg(), I);
+    } else {
+      // Op0 is killed, so just swap it into position.
+      moveToTop(Op0, I);
+    }
+    --StackTop;   // "Forget" we have something on the top of stack!
+    break;
+  }
+  case X86::FpSET_ST1_32:
+  case X86::FpSET_ST1_64:
+  case X86::FpSET_ST1_80: {
+    // Set up st(1) for inline asm. We are assuming that st(0) has already been
+    // set up by FpSET_ST0, and our StackTop is off by one because of it.
+    unsigned Op0 = getFPReg(MI->getOperand(0));
+    // Restore the actual StackTop from before Fp_SET_ST0.
+    // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we
+    // are not enforcing the constraint.
+    ++StackTop;
+    unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0).
+    if (!MI->killsRegister(X86::FP0 + Op0)) {
+      duplicateToTop(Op0, getScratchReg(), I);
+      moveToTop(RegOnTop, I);
+    } else if (getSTReg(Op0) != X86::ST1) {
+      // We have the wrong value at st(1). Shuffle! Untested!
+      moveToTop(getStackEntry(1), I);
+      moveToTop(Op0, I);
+      moveToTop(RegOnTop, I);
+    }
+    assert(StackTop >= 2 && "Too few live registers");
+    StackTop -= 2; // "Forget" both st(0) and st(1).
+    break;
+  }
+  case X86::MOV_Fp3232:
+  case X86::MOV_Fp3264:
+  case X86::MOV_Fp6432:
+  case X86::MOV_Fp6464: 
+  case X86::MOV_Fp3280:
+  case X86::MOV_Fp6480:
+  case X86::MOV_Fp8032:
+  case X86::MOV_Fp8064: 
+  case X86::MOV_Fp8080: {
+    const MachineOperand &MO1 = MI->getOperand(1);
+    unsigned SrcReg = getFPReg(MO1);
+
+    const MachineOperand &MO0 = MI->getOperand(0);
+    unsigned DestReg = getFPReg(MO0);
+    if (MI->killsRegister(X86::FP0+SrcReg)) {
+      // If the input operand is killed, we can just change the owner of the
+      // incoming stack slot into the result.
+      unsigned Slot = getSlot(SrcReg);
+      assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!");
+      Stack[Slot] = DestReg;
+      RegMap[DestReg] = Slot;
+
+    } else {
+      // For FMOV we just duplicate the specified value to a new stack slot.
+      // This could be made better, but would require substantial changes.
+      duplicateToTop(SrcReg, DestReg, I);
+    }
+    }
+    break;
+  case TargetOpcode::INLINEASM: {
+    // The inline asm MachineInstr currently only *uses* FP registers for the
+    // 'f' constraint.  These should be turned into the current ST(x) register
+    // in the machine instr.  Also, any kills should be explicitly popped after
+    // the inline asm.
+    unsigned Kills = 0;
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &Op = MI->getOperand(i);
+      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+        continue;
+      assert(Op.isUse() && "Only handle inline asm uses right now");
+      
+      unsigned FPReg = getFPReg(Op);
+      Op.setReg(getSTReg(FPReg));
+      
+      // If we kill this operand, make sure to pop it from the stack after the
+      // asm.  We just remember it for now, and pop them all off at the end in
+      // a batch.
+      if (Op.isKill())
+        Kills |= 1U << FPReg;
+    }
+
+    // If this asm kills any FP registers (is the last use of them) we must
+    // explicitly emit pop instructions for them.  Do this now after the asm has
+    // executed so that the ST(x) numbers are not off (which would happen if we
+    // did this inline with operand rewriting).
+    //
+    // Note: this might be a non-optimal pop sequence.  We might be able to do
+    // better by trying to pop in stack order or something.
+    MachineBasicBlock::iterator InsertPt = MI;
+    while (Kills) {
+      unsigned FPReg = CountTrailingZeros_32(Kills);
+      freeStackSlotAfter(InsertPt, FPReg);
+      Kills &= ~(1U << FPReg);
+    }
+    // Don't delete the inline asm!
+    return;
+  }
+      
+  case X86::RET:
+  case X86::RETI:
+    // If RET has an FP register use operand, pass the first one in ST(0) and
+    // the second one in ST(1).
+
+    // Find the register operands.
+    unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U;
+    unsigned LiveMask = 0;
+
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &Op = MI->getOperand(i);
+      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+        continue;
+      // FP Register uses must be kills unless there are two uses of the same
+      // register, in which case only one will be a kill.
+      assert(Op.isUse() &&
+             (Op.isKill() ||                        // Marked kill.
+              getFPReg(Op) == FirstFPRegOp ||       // Second instance.
+              MI->killsRegister(Op.getReg())) &&    // Later use is marked kill.
+             "Ret only defs operands, and values aren't live beyond it");
+
+      if (FirstFPRegOp == ~0U)
+        FirstFPRegOp = getFPReg(Op);
+      else {
+        assert(SecondFPRegOp == ~0U && "More than two fp operands!");
+        SecondFPRegOp = getFPReg(Op);
+      }
+      LiveMask |= (1 << getFPReg(Op));
+
+      // Remove the operand so that later passes don't see it.
+      MI->RemoveOperand(i);
+      --i, --e;
+    }
+
+    // We may have been carrying spurious live-ins, so make sure only the returned
+    // registers are left live.
+    adjustLiveRegs(LiveMask, MI);
+    if (!LiveMask) return;  // Quick check to see if any are possible.
+
+    // There are only four possibilities here:
+    // 1) we are returning a single FP value.  In this case, it has to be in
+    //    ST(0) already, so just declare success by removing the value from the
+    //    FP Stack.
+    if (SecondFPRegOp == ~0U) {
+      // Assert that the top of stack contains the right FP register.
+      assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) &&
+             "Top of stack not the right register for RET!");
+      
+      // Ok, everything is good, mark the value as not being on the stack
+      // anymore so that our assertion about the stack being empty at end of
+      // block doesn't fire.
+      StackTop = 0;
+      return;
+    }
+    
+    // Otherwise, we are returning two values:
+    // 2) If returning the same value for both, we only have one thing in the FP
+    //    stack.  Consider:  RET FP1, FP1
+    if (StackTop == 1) {
+      assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&&
+             "Stack misconfiguration for RET!");
+      
+      // Duplicate the TOS so that we return it twice.  Just pick some other FPx
+      // register to hold it.
+      unsigned NewReg = getScratchReg();
+      duplicateToTop(FirstFPRegOp, NewReg, MI);
+      FirstFPRegOp = NewReg;
+    }
+    
+    /// Okay we know we have two different FPx operands now:
+    assert(StackTop == 2 && "Must have two values live!");
+    
+    /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
+    ///    in ST(1).  In this case, emit an fxch.
+    if (getStackEntry(0) == SecondFPRegOp) {
+      assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live");
+      moveToTop(FirstFPRegOp, MI);
+    }
+    
+    /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
+    /// ST(1).  Just remove both from our understanding of the stack and return.
+    assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live");
+    assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live");
+    StackTop = 0;
+    return;
+  }
+
+  I = MBB->erase(I);  // Remove the pseudo instruction
+
+  // We want to leave I pointing to the previous instruction, but what if we
+  // just erased the first instruction?
+  if (I == MBB->begin()) {
+    DEBUG(dbgs() << "Inserting dummy KILL\n");
+    I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL));
+  } else
+    --I;
+}
+
+// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands.
+bool FPS::translateCopy(MachineInstr *MI) {
+  unsigned DstReg = MI->getOperand(0).getReg();
+  unsigned SrcReg = MI->getOperand(1).getReg();
+
+  if (DstReg == X86::ST0) {
+    MI->setDesc(TII->get(X86::FpSET_ST0_80));
+    MI->RemoveOperand(0);
+    return true;
+  }
+  if (DstReg == X86::ST1) {
+    MI->setDesc(TII->get(X86::FpSET_ST1_80));
+    MI->RemoveOperand(0);
+    return true;
+  }
+  if (SrcReg == X86::ST0) {
+    MI->setDesc(TII->get(X86::FpGET_ST0_80));
+    return true;
+  }
+  if (SrcReg == X86::ST1) {
+    MI->setDesc(TII->get(X86::FpGET_ST1_80));
+    return true;
+  }
+  if (X86::RFP80RegClass.contains(DstReg, SrcReg)) {
+    MI->setDesc(TII->get(X86::MOV_Fp8080));
+    return true;
+  }
+  return false;
+}
diff --git a/final/lib/Target/X86/X86FrameLowering.cpp b/final/lib/Target/X86/X86FrameLowering.cpp
new file mode 100644
index 00000000000..3246c4379ce
--- /dev/null
+++ b/final/lib/Target/X86/X86FrameLowering.cpp
@@ -0,0 +1,1016 @@
+//=======- X86FrameLowering.cpp - X86 Frame Information ------------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86FrameLowering.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallSet.h"
+
+using namespace llvm;
+
+// FIXME: completely move here.
+extern cl::opt<bool> ForceStackAlign;
+
+bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  return !MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+/// hasFP - Return true if the specified function should have a dedicated frame
+/// pointer register.  This is true if the function has variable sized allocas
+/// or if frame pointer elimination is disabled.
+bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineModuleInfo &MMI = MF.getMMI();
+  const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+  return (DisableFramePointerElim(MF) ||
+          RI->needsStackRealignment(MF) ||
+          MFI->hasVarSizedObjects() ||
+          MFI->isFrameAddressTaken() ||
+          MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
+          MMI.callsUnwindInit());
+}
+
+static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
+  if (is64Bit) {
+    if (isInt<8>(Imm))
+      return X86::SUB64ri8;
+    return X86::SUB64ri32;
+  } else {
+    if (isInt<8>(Imm))
+      return X86::SUB32ri8;
+    return X86::SUB32ri;
+  }
+}
+
+static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
+  if (is64Bit) {
+    if (isInt<8>(Imm))
+      return X86::ADD64ri8;
+    return X86::ADD64ri32;
+  } else {
+    if (isInt<8>(Imm))
+      return X86::ADD32ri8;
+    return X86::ADD32ri;
+  }
+}
+
+/// findDeadCallerSavedReg - Return a caller-saved register that isn't live
+/// when it reaches the "return" instruction. We can then pop a stack object
+/// to this register without worry about clobbering it.
+static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator &MBBI,
+                                       const TargetRegisterInfo &TRI,
+                                       bool Is64Bit) {
+  const MachineFunction *MF = MBB.getParent();
+  const Function *F = MF->getFunction();
+  if (!F || MF->getMMI().callsEHReturn())
+    return 0;
+
+  static const unsigned CallerSavedRegs32Bit[] = {
+    X86::EAX, X86::EDX, X86::ECX
+  };
+
+  static const unsigned CallerSavedRegs64Bit[] = {
+    X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
+    X86::R8,  X86::R9,  X86::R10, X86::R11
+  };
+
+  unsigned Opc = MBBI->getOpcode();
+  switch (Opc) {
+  default: return 0;
+  case X86::RET:
+  case X86::RETI:
+  case X86::TCRETURNdi:
+  case X86::TCRETURNri:
+  case X86::TCRETURNmi:
+  case X86::TCRETURNdi64:
+  case X86::TCRETURNri64:
+  case X86::TCRETURNmi64:
+  case X86::EH_RETURN:
+  case X86::EH_RETURN64: {
+    SmallSet<unsigned, 8> Uses;
+    for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = MBBI->getOperand(i);
+      if (!MO.isReg() || MO.isDef())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (!Reg)
+        continue;
+      for (const unsigned *AsI = TRI.getOverlaps(Reg); *AsI; ++AsI)
+        Uses.insert(*AsI);
+    }
+
+    const unsigned *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
+    for (; *CS; ++CS)
+      if (!Uses.count(*CS))
+        return *CS;
+  }
+  }
+
+  return 0;
+}
+
+
+/// emitSPUpdate - Emit a series of instructions to increment / decrement the
+/// stack pointer by a constant value.
+static
+void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+                  unsigned StackPtr, int64_t NumBytes,
+                  bool Is64Bit, const TargetInstrInfo &TII,
+                  const TargetRegisterInfo &TRI) {
+  bool isSub = NumBytes < 0;
+  uint64_t Offset = isSub ? -NumBytes : NumBytes;
+  unsigned Opc = isSub ?
+    getSUBriOpcode(Is64Bit, Offset) :
+    getADDriOpcode(Is64Bit, Offset);
+  uint64_t Chunk = (1LL << 31) - 1;
+  DebugLoc DL = MBB.findDebugLoc(MBBI);
+
+  while (Offset) {
+    uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
+    if (ThisVal == (Is64Bit ? 8 : 4)) {
+      // Use push / pop instead.
+      unsigned Reg = isSub
+        ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
+        : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
+      if (Reg) {
+        Opc = isSub
+          ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
+          : (Is64Bit ? X86::POP64r  : X86::POP32r);
+        BuildMI(MBB, MBBI, DL, TII.get(Opc))
+          .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
+        Offset -= ThisVal;
+        continue;
+      }
+    }
+
+    MachineInstr *MI =
+      BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+      .addReg(StackPtr)
+      .addImm(ThisVal);
+    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+    Offset -= ThisVal;
+  }
+}
+
+/// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
+static
+void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
+                      unsigned StackPtr, uint64_t *NumBytes = NULL) {
+  if (MBBI == MBB.begin()) return;
+
+  MachineBasicBlock::iterator PI = prior(MBBI);
+  unsigned Opc = PI->getOpcode();
+  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+      PI->getOperand(0).getReg() == StackPtr) {
+    if (NumBytes)
+      *NumBytes += PI->getOperand(2).getImm();
+    MBB.erase(PI);
+  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+             PI->getOperand(0).getReg() == StackPtr) {
+    if (NumBytes)
+      *NumBytes -= PI->getOperand(2).getImm();
+    MBB.erase(PI);
+  }
+}
+
+/// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
+static
+void mergeSPUpdatesDown(MachineBasicBlock &MBB,
+                        MachineBasicBlock::iterator &MBBI,
+                        unsigned StackPtr, uint64_t *NumBytes = NULL) {
+  // FIXME: THIS ISN'T RUN!!!
+  return;
+
+  if (MBBI == MBB.end()) return;
+
+  MachineBasicBlock::iterator NI = llvm::next(MBBI);
+  if (NI == MBB.end()) return;
+
+  unsigned Opc = NI->getOpcode();
+  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+      NI->getOperand(0).getReg() == StackPtr) {
+    if (NumBytes)
+      *NumBytes -= NI->getOperand(2).getImm();
+    MBB.erase(NI);
+    MBBI = NI;
+  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+             NI->getOperand(0).getReg() == StackPtr) {
+    if (NumBytes)
+      *NumBytes += NI->getOperand(2).getImm();
+    MBB.erase(NI);
+    MBBI = NI;
+  }
+}
+
+/// mergeSPUpdates - Checks the instruction before/after the passed
+/// instruction. If it is an ADD/SUB instruction it is deleted argument and the
+/// stack adjustment is returned as a positive value for ADD and a negative for
+/// SUB.
+static int mergeSPUpdates(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator &MBBI,
+                           unsigned StackPtr,
+                           bool doMergeWithPrevious) {
+  if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
+      (!doMergeWithPrevious && MBBI == MBB.end()))
+    return 0;
+
+  MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
+  MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
+  unsigned Opc = PI->getOpcode();
+  int Offset = 0;
+
+  if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
+       Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
+      PI->getOperand(0).getReg() == StackPtr){
+    Offset += PI->getOperand(2).getImm();
+    MBB.erase(PI);
+    if (!doMergeWithPrevious) MBBI = NI;
+  } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
+              Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+             PI->getOperand(0).getReg() == StackPtr) {
+    Offset -= PI->getOperand(2).getImm();
+    MBB.erase(PI);
+    if (!doMergeWithPrevious) MBBI = NI;
+  }
+
+  return Offset;
+}
+
+static bool isEAXLiveIn(MachineFunction &MF) {
+  for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
+       EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
+    unsigned Reg = II->first;
+
+    if (Reg == X86::EAX || Reg == X86::AX ||
+        Reg == X86::AH || Reg == X86::AL)
+      return true;
+  }
+
+  return false;
+}
+
+void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
+                                             MCSymbol *Label,
+                                             unsigned FramePtr) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineModuleInfo &MMI = MF.getMMI();
+
+  // Add callee saved registers to move list.
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  if (CSI.empty()) return;
+
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  const TargetData *TD = TM.getTargetData();
+  bool HasFP = hasFP(MF);
+
+  // Calculate amount of bytes used for return address storing.
+  int stackGrowth = -TD->getPointerSize();
+
+  // FIXME: This is dirty hack. The code itself is pretty mess right now.
+  // It should be rewritten from scratch and generalized sometimes.
+
+  // Determine maximum offset (minumum due to stack growth).
+  int64_t MaxOffset = 0;
+  for (std::vector<CalleeSavedInfo>::const_iterator
+         I = CSI.begin(), E = CSI.end(); I != E; ++I)
+    MaxOffset = std::min(MaxOffset,
+                         MFI->getObjectOffset(I->getFrameIdx()));
+
+  // Calculate offsets.
+  int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
+  for (std::vector<CalleeSavedInfo>::const_iterator
+         I = CSI.begin(), E = CSI.end(); I != E; ++I) {
+    int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+    unsigned Reg = I->getReg();
+    Offset = MaxOffset - Offset + saveAreaOffset;
+
+    // Don't output a new machine move if we're re-saving the frame
+    // pointer. This happens when the PrologEpilogInserter has inserted an extra
+    // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
+    // generates one when frame pointers are used. If we generate a "machine
+    // move" for this extra "PUSH", the linker will lose track of the fact that
+    // the frame pointer should have the value of the first "PUSH" when it's
+    // trying to unwind.
+    //
+    // FIXME: This looks inelegant. It's possibly correct, but it's covering up
+    //        another bug. I.e., one where we generate a prolog like this:
+    //
+    //          pushl  %ebp
+    //          movl   %esp, %ebp
+    //          pushl  %ebp
+    //          pushl  %esi
+    //           ...
+    //
+    //        The immediate re-push of EBP is unnecessary. At the least, it's an
+    //        optimization bug. EBP can be used as a scratch register in certain
+    //        cases, but probably not when we have a frame pointer.
+    if (HasFP && FramePtr == Reg)
+      continue;
+
+    MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+    MachineLocation CSSrc(Reg);
+    Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+  }
+}
+
+/// emitPrologue - Push callee-saved registers onto the stack, which
+/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
+/// space for local variables. Also emit labels used by the exception handler to
+/// generate the exception handling frames.
+void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const Function *Fn = MF.getFunction();
+  const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+  const X86InstrInfo &TII = *TM.getInstrInfo();
+  MachineModuleInfo &MMI = MF.getMMI();
+  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  bool needsFrameMoves = MMI.hasDebugInfo() ||
+                          !Fn->doesNotThrow() || UnwindTablesMandatory;
+  uint64_t MaxAlign  = MFI->getMaxAlignment(); // Desired stack alignment.
+  uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
+  bool HasFP = hasFP(MF);
+  bool Is64Bit = STI.is64Bit();
+  bool IsWin64 = STI.isTargetWin64();
+  unsigned StackAlign = getStackAlignment();
+  unsigned SlotSize = RegInfo->getSlotSize();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+  unsigned StackPtr = RegInfo->getStackRegister();
+
+  DebugLoc DL;
+
+  // If we're forcing a stack realignment we can't rely on just the frame
+  // info, we need to know the ABI stack alignment as well in case we
+  // have a call out.  Otherwise just make sure we have some alignment - we'll
+  // go with the minimum SlotSize.
+  if (ForceStackAlign) {
+    if (MFI->hasCalls())
+      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+    else if (MaxAlign < SlotSize)
+      MaxAlign = SlotSize;
+  }
+
+  // Add RETADDR move area to callee saved frame size.
+  int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+  if (TailCallReturnAddrDelta < 0)
+    X86FI->setCalleeSavedFrameSize(
+      X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
+
+  // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
+  // function, and use up to 128 bytes of stack space, don't have a frame
+  // pointer, calls, or dynamic alloca then we do not need to adjust the
+  // stack pointer (we fit in the Red Zone).
+  if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
+      !RegInfo->needsStackRealignment(MF) &&
+      !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
+      !MFI->adjustsStack() &&                      // No calls.
+      !IsWin64) {                                  // Win64 has no Red Zone
+    uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
+    if (HasFP) MinSize += SlotSize;
+    StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
+    MFI->setStackSize(StackSize);
+  }
+
+  // Insert stack pointer adjustment for later moving of return addr.  Only
+  // applies to tail call optimized functions where the callee argument stack
+  // size is bigger than the callers.
+  if (TailCallReturnAddrDelta < 0) {
+    MachineInstr *MI =
+      BuildMI(MBB, MBBI, DL,
+              TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
+              StackPtr)
+        .addReg(StackPtr)
+        .addImm(-TailCallReturnAddrDelta);
+    MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+  }
+
+  // Mapping for machine moves:
+  //
+  //   DST: VirtualFP AND
+  //        SRC: VirtualFP              => DW_CFA_def_cfa_offset
+  //        ELSE                        => DW_CFA_def_cfa
+  //
+  //   SRC: VirtualFP AND
+  //        DST: Register               => DW_CFA_def_cfa_register
+  //
+  //   ELSE
+  //        OFFSET < 0                  => DW_CFA_offset_extended_sf
+  //        REG < 64                    => DW_CFA_offset + Reg
+  //        ELSE                        => DW_CFA_offset_extended
+
+  std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+  const TargetData *TD = MF.getTarget().getTargetData();
+  uint64_t NumBytes = 0;
+  int stackGrowth = -TD->getPointerSize();
+
+  if (HasFP) {
+    // Calculate required stack adjustment.
+    uint64_t FrameSize = StackSize - SlotSize;
+    if (RegInfo->needsStackRealignment(MF))
+      FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+
+    NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
+
+    // Get the offset of the stack slot for the EBP register, which is
+    // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
+    // Update the frame offset adjustment.
+    MFI->setOffsetAdjustment(-NumBytes);
+
+    // Save EBP/RBP into the appropriate stack slot.
+    BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
+      .addReg(FramePtr, RegState::Kill);
+
+    if (needsFrameMoves) {
+      // Mark the place where EBP/RBP was saved.
+      MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
+
+      // Define the current CFA rule to use the provided offset.
+      if (StackSize) {
+        MachineLocation SPDst(MachineLocation::VirtualFP);
+        MachineLocation SPSrc(MachineLocation::VirtualFP, 2 * stackGrowth);
+        Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+      } else {
+        MachineLocation SPDst(StackPtr);
+        MachineLocation SPSrc(StackPtr, stackGrowth);
+        Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+      }
+
+      // Change the rule for the FramePtr to be an "offset" rule.
+      MachineLocation FPDst(MachineLocation::VirtualFP, 2 * stackGrowth);
+      MachineLocation FPSrc(FramePtr);
+      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+    }
+
+    // Update EBP with the new base value...
+    BuildMI(MBB, MBBI, DL,
+            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
+        .addReg(StackPtr);
+
+    if (needsFrameMoves) {
+      // Mark effective beginning of when frame pointer becomes valid.
+      MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(FrameLabel);
+
+      // Define the current CFA to use the EBP/RBP register.
+      MachineLocation FPDst(FramePtr);
+      MachineLocation FPSrc(MachineLocation::VirtualFP);
+      Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+    }
+
+    // Mark the FramePtr as live-in in every block except the entry.
+    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
+         I != E; ++I)
+      I->addLiveIn(FramePtr);
+
+    // Realign stack
+    if (RegInfo->needsStackRealignment(MF)) {
+      MachineInstr *MI =
+        BuildMI(MBB, MBBI, DL,
+                TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri),
+                StackPtr).addReg(StackPtr).addImm(-MaxAlign);
+
+      // The EFLAGS implicit def is dead.
+      MI->getOperand(3).setIsDead();
+    }
+  } else {
+    NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
+  }
+
+  // Skip the callee-saved push instructions.
+  bool PushedRegs = false;
+  int StackOffset = 2 * stackGrowth;
+
+  while (MBBI != MBB.end() &&
+         (MBBI->getOpcode() == X86::PUSH32r ||
+          MBBI->getOpcode() == X86::PUSH64r)) {
+    PushedRegs = true;
+    ++MBBI;
+
+    if (!HasFP && needsFrameMoves) {
+      // Mark callee-saved push instruction.
+      MCSymbol *Label = MMI.getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
+
+      // Define the current CFA rule to use the provided offset.
+      unsigned Ptr = StackSize ?
+        MachineLocation::VirtualFP : StackPtr;
+      MachineLocation SPDst(Ptr);
+      MachineLocation SPSrc(Ptr, StackOffset);
+      Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+      StackOffset += stackGrowth;
+    }
+  }
+
+  DL = MBB.findDebugLoc(MBBI);
+
+  // If there is an SUB32ri of ESP immediately before this instruction, merge
+  // the two. This can be the case when tail call elimination is enabled and
+  // the callee has more arguments then the caller.
+  NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+
+  // If there is an ADD32ri or SUB32ri of ESP immediately after this
+  // instruction, merge the two instructions.
+  mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
+
+  // Adjust stack pointer: ESP -= numbytes.
+
+  // Windows and cygwin/mingw require a prologue helper routine when allocating
+  // more than 4K bytes on the stack.  Windows uses __chkstk and cygwin/mingw
+  // uses __alloca.  __alloca and the 32-bit version of __chkstk will probe the
+  // stack and adjust the stack pointer in one go.  The 64-bit version of
+  // __chkstk is only responsible for probing the stack.  The 64-bit prologue is
+  // responsible for adjusting the stack pointer.  Touching the stack at 4K
+  // increments is necessary to ensure that the guard pages used by the OS
+  // virtual memory manager are allocated in correct sequence.
+  if (NumBytes >= 4096 &&
+      (STI.isTargetCygMing() || STI.isTargetWin32()) &&
+      !STI.isTargetEnvMacho()) {
+    // Check whether EAX is livein for this function.
+    bool isEAXAlive = isEAXLiveIn(MF);
+
+    const char *StackProbeSymbol =
+      STI.isTargetWindows() ? "_chkstk" : "_alloca";
+    if (Is64Bit && STI.isTargetCygMing())
+      StackProbeSymbol = "__chkstk";
+    unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
+    if (!isEAXAlive) {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+        .addImm(NumBytes);
+      BuildMI(MBB, MBBI, DL, TII.get(CallOp))
+        .addExternalSymbol(StackProbeSymbol)
+        .addReg(StackPtr,    RegState::Define | RegState::Implicit)
+        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+    } else {
+      // Save EAX
+      BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
+        .addReg(X86::EAX, RegState::Kill);
+
+      // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
+      // allocated bytes for EAX.
+      BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+        .addImm(NumBytes - 4);
+      BuildMI(MBB, MBBI, DL, TII.get(CallOp))
+        .addExternalSymbol(StackProbeSymbol)
+        .addReg(StackPtr,    RegState::Define | RegState::Implicit)
+        .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+
+      // Restore EAX
+      MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+                                              X86::EAX),
+                                      StackPtr, false, NumBytes - 4);
+      MBB.insert(MBBI, MI);
+    }
+  } else if (NumBytes >= 4096 &&
+             STI.isTargetWin64() &&
+             !STI.isTargetEnvMacho()) {
+    // Sanity check that EAX is not livein for this function.  It should
+    // not be, so throw an assert.
+    assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!");
+
+    // Handle the 64-bit Windows ABI case where we need to call __chkstk.
+    // Function prologue is responsible for adjusting the stack pointer.
+    BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+      .addImm(NumBytes);
+    BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32))
+      .addExternalSymbol("__chkstk")
+      .addReg(StackPtr, RegState::Define | RegState::Implicit);
+    emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+                 TII, *RegInfo);
+  } else if (NumBytes)
+    emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+                 TII, *RegInfo);
+
+  if ((NumBytes || PushedRegs) && needsFrameMoves) {
+    // Mark end of stack pointer adjustment.
+    MCSymbol *Label = MMI.getContext().CreateTempSymbol();
+    BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
+
+    if (!HasFP && NumBytes) {
+      // Define the current CFA rule to use the provided offset.
+      if (StackSize) {
+        MachineLocation SPDst(MachineLocation::VirtualFP);
+        MachineLocation SPSrc(MachineLocation::VirtualFP,
+                              -StackSize + stackGrowth);
+        Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+      } else {
+        MachineLocation SPDst(StackPtr);
+        MachineLocation SPSrc(StackPtr, stackGrowth);
+        Moves.push_back(MachineMove(Label, SPDst, SPSrc));
+      }
+    }
+
+    // Emit DWARF info specifying the offsets of the callee-saved registers.
+    if (PushedRegs)
+      emitCalleeSavedFrameMoves(MF, Label, HasFP ? FramePtr : StackPtr);
+  }
+}
+
+void X86FrameLowering::emitEpilogue(MachineFunction &MF,
+                                MachineBasicBlock &MBB) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+  const X86InstrInfo &TII = *TM.getInstrInfo();
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  assert(MBBI != MBB.end() && "Returning block has no instructions");
+  unsigned RetOpcode = MBBI->getOpcode();
+  DebugLoc DL = MBBI->getDebugLoc();
+  bool Is64Bit = STI.is64Bit();
+  unsigned StackAlign = getStackAlignment();
+  unsigned SlotSize = RegInfo->getSlotSize();
+  unsigned FramePtr = RegInfo->getFrameRegister(MF);
+  unsigned StackPtr = RegInfo->getStackRegister();
+
+  switch (RetOpcode) {
+  default:
+    llvm_unreachable("Can only insert epilog into returning blocks");
+  case X86::RET:
+  case X86::RETI:
+  case X86::TCRETURNdi:
+  case X86::TCRETURNri:
+  case X86::TCRETURNmi:
+  case X86::TCRETURNdi64:
+  case X86::TCRETURNri64:
+  case X86::TCRETURNmi64:
+  case X86::EH_RETURN:
+  case X86::EH_RETURN64:
+    break;  // These are ok
+  }
+
+  // Get the number of bytes to allocate from the FrameInfo.
+  uint64_t StackSize = MFI->getStackSize();
+  uint64_t MaxAlign  = MFI->getMaxAlignment();
+  unsigned CSSize = X86FI->getCalleeSavedFrameSize();
+  uint64_t NumBytes = 0;
+
+  // If we're forcing a stack realignment we can't rely on just the frame
+  // info, we need to know the ABI stack alignment as well in case we
+  // have a call out.  Otherwise just make sure we have some alignment - we'll
+  // go with the minimum.
+  if (ForceStackAlign) {
+    if (MFI->hasCalls())
+      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+    else
+      MaxAlign = MaxAlign ? MaxAlign : 4;
+  }
+
+  if (hasFP(MF)) {
+    // Calculate required stack adjustment.
+    uint64_t FrameSize = StackSize - SlotSize;
+    if (RegInfo->needsStackRealignment(MF))
+      FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
+
+    NumBytes = FrameSize - CSSize;
+
+    // Pop EBP.
+    BuildMI(MBB, MBBI, DL,
+            TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
+  } else {
+    NumBytes = StackSize - CSSize;
+  }
+
+  // Skip the callee-saved pop instructions.
+  MachineBasicBlock::iterator LastCSPop = MBBI;
+  while (MBBI != MBB.begin()) {
+    MachineBasicBlock::iterator PI = prior(MBBI);
+    unsigned Opc = PI->getOpcode();
+
+    if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
+        !PI->getDesc().isTerminator())
+      break;
+
+    --MBBI;
+  }
+
+  DL = MBBI->getDebugLoc();
+
+  // If there is an ADD32ri or SUB32ri of ESP immediately before this
+  // instruction, merge the two instructions.
+  if (NumBytes || MFI->hasVarSizedObjects())
+    mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
+
+  // If dynamic alloca is used, then reset esp to point to the last callee-saved
+  // slot before popping them off! Same applies for the case, when stack was
+  // realigned.
+  if (RegInfo->needsStackRealignment(MF)) {
+    // We cannot use LEA here, because stack pointer was realigned. We need to
+    // deallocate local frame back.
+    if (CSSize) {
+      emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+      MBBI = prior(LastCSPop);
+    }
+
+    BuildMI(MBB, MBBI, DL,
+            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+            StackPtr).addReg(FramePtr);
+  } else if (MFI->hasVarSizedObjects()) {
+    if (CSSize) {
+      unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
+      MachineInstr *MI =
+        addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
+                     FramePtr, false, -CSSize);
+      MBB.insert(MBBI, MI);
+    } else {
+      BuildMI(MBB, MBBI, DL,
+              TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
+        .addReg(FramePtr);
+    }
+  } else if (NumBytes) {
+    // Adjust stack pointer back: ESP += numbytes.
+    emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, TII, *RegInfo);
+  }
+
+  // We're returning from function via eh_return.
+  if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &DestAddr  = MBBI->getOperand(0);
+    assert(DestAddr.isReg() && "Offset should be in register!");
+    BuildMI(MBB, MBBI, DL,
+            TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
+            StackPtr).addReg(DestAddr.getReg());
+  } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
+             RetOpcode == X86::TCRETURNmi ||
+             RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
+             RetOpcode == X86::TCRETURNmi64) {
+    bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
+    // Tail call return: adjust the stack pointer and jump to callee.
+    MBBI = MBB.getLastNonDebugInstr();
+    MachineOperand &JumpTarget = MBBI->getOperand(0);
+    MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
+    assert(StackAdjust.isImm() && "Expecting immediate value.");
+
+    // Adjust stack pointer.
+    int StackAdj = StackAdjust.getImm();
+    int MaxTCDelta = X86FI->getTCReturnAddrDelta();
+    int Offset = 0;
+    assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
+
+    // Incoporate the retaddr area.
+    Offset = StackAdj-MaxTCDelta;
+    assert(Offset >= 0 && "Offset should never be negative");
+
+    if (Offset) {
+      // Check for possible merge with preceeding ADD instruction.
+      Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+      emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo);
+    }
+
+    // Jump to label or value in register.
+    if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
+                                       ? X86::TAILJMPd : X86::TAILJMPd64));
+      if (JumpTarget.isGlobal())
+        MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+                             JumpTarget.getTargetFlags());
+      else {
+        assert(JumpTarget.isSymbol());
+        MIB.addExternalSymbol(JumpTarget.getSymbolName(),
+                              JumpTarget.getTargetFlags());
+      }
+    } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
+      MachineInstrBuilder MIB =
+        BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
+                                       ? X86::TAILJMPm : X86::TAILJMPm64));
+      for (unsigned i = 0; i != 5; ++i)
+        MIB.addOperand(MBBI->getOperand(i));
+    } else if (RetOpcode == X86::TCRETURNri64) {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
+        addReg(JumpTarget.getReg(), RegState::Kill);
+    } else {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
+        addReg(JumpTarget.getReg(), RegState::Kill);
+    }
+
+    MachineInstr *NewMI = prior(MBBI);
+    for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
+      NewMI->addOperand(MBBI->getOperand(i));
+
+    // Delete the pseudo instruction TCRETURN.
+    MBB.erase(MBBI);
+  } else if ((RetOpcode == X86::RET || RetOpcode == X86::RETI) &&
+             (X86FI->getTCReturnAddrDelta() < 0)) {
+    // Add the return addr area delta back since we are not tail calling.
+    int delta = -1*X86FI->getTCReturnAddrDelta();
+    MBBI = MBB.getLastNonDebugInstr();
+
+    // Check for possible merge with preceeding ADD instruction.
+    delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
+    emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo);
+  }
+}
+
+void
+X86FrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves) const {
+  // Calculate amount of bytes used for return address storing
+  int stackGrowth = (STI.is64Bit() ? -8 : -4);
+  const X86RegisterInfo *RI = TM.getRegisterInfo();
+
+  // Initial state of the frame pointer is esp+stackGrowth.
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(RI->getStackRegister(), stackGrowth);
+  Moves.push_back(MachineMove(0, Dst, Src));
+
+  // Add return address to move list
+  MachineLocation CSDst(RI->getStackRegister(), stackGrowth);
+  MachineLocation CSSrc(RI->getRARegister());
+  Moves.push_back(MachineMove(0, CSDst, CSSrc));
+}
+
+int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
+  const X86RegisterInfo *RI =
+    static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
+  uint64_t StackSize = MFI->getStackSize();
+
+  if (RI->needsStackRealignment(MF)) {
+    if (FI < 0) {
+      // Skip the saved EBP.
+      Offset += RI->getSlotSize();
+    } else {
+      unsigned Align = MFI->getObjectAlignment(FI);
+      assert((-(Offset + StackSize)) % Align == 0);
+      Align = 0;
+      return Offset + StackSize;
+    }
+    // FIXME: Support tail calls
+  } else {
+    if (!hasFP(MF))
+      return Offset + StackSize;
+
+    // Skip the saved EBP.
+    Offset += RI->getSlotSize();
+
+    // Skip the RETADDR move area
+    const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+    int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+    if (TailCallReturnAddrDelta < 0)
+      Offset -= TailCallReturnAddrDelta;
+  }
+
+  return Offset;
+}
+
+bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                             MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL = MBB.findDebugLoc(MI);
+
+  MachineFunction &MF = *MBB.getParent();
+
+  unsigned SlotSize = STI.is64Bit() ? 8 : 4;
+  unsigned FPReg = TRI->getFrameRegister(MF);
+  unsigned CalleeFrameSize = 0;
+
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+
+  // Push GPRs. It increases frame size.
+  unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    if (!X86::GR64RegClass.contains(Reg) &&
+        !X86::GR32RegClass.contains(Reg))
+      continue;
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(Reg);
+    if (Reg == FPReg)
+      // X86RegisterInfo::emitPrologue will handle spilling of frame register.
+      continue;
+    CalleeFrameSize += SlotSize;
+    BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill);
+  }
+
+  X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
+
+  // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
+  // It can be done by spilling XMMs to stack frame.
+  // Note that only Win64 ABI might spill XMMs.
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    if (X86::GR64RegClass.contains(Reg) ||
+        X86::GR32RegClass.contains(Reg))
+      continue;
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(Reg);
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
+                            RC, TRI);
+  }
+
+  return true;
+}
+
+bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL = MBB.findDebugLoc(MI);
+
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+  // Reload XMMs from stack frame.
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (X86::GR64RegClass.contains(Reg) ||
+        X86::GR32RegClass.contains(Reg))
+      continue;
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+                             RC, TRI);
+  }
+
+  // POP GPRs.
+  unsigned FPReg = TRI->getFrameRegister(MF);
+  unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
+  for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+    unsigned Reg = CSI[i].getReg();
+    if (!X86::GR64RegClass.contains(Reg) &&
+        !X86::GR32RegClass.contains(Reg))
+      continue;
+    if (Reg == FPReg)
+      // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
+      continue;
+    BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
+  }
+  return true;
+}
+
+void
+X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                   RegScavenger *RS) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
+  unsigned SlotSize = RegInfo->getSlotSize();
+
+  X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+
+  if (TailCallReturnAddrDelta < 0) {
+    // create RETURNADDR area
+    //   arg
+    //   arg
+    //   RETADDR
+    //   { ...
+    //     RETADDR area
+    //     ...
+    //   }
+    //   [EBP]
+    MFI->CreateFixedObject(-TailCallReturnAddrDelta,
+                           (-1U*SlotSize)+TailCallReturnAddrDelta, true);
+  }
+
+  if (hasFP(MF)) {
+    assert((TailCallReturnAddrDelta <= 0) &&
+           "The Delta should always be zero or negative");
+    const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering();
+
+    // Create a frame entry for the EBP register that must be saved.
+    int FrameIdx = MFI->CreateFixedObject(SlotSize,
+                                          -(int)SlotSize +
+                                          TFI.getOffsetOfLocalArea() +
+                                          TailCallReturnAddrDelta,
+                                          true);
+    assert(FrameIdx == MFI->getObjectIndexBegin() &&
+           "Slot for EBP register must be last in order to be found!");
+    FrameIdx = 0;
+  }
+}
diff --git a/final/lib/Target/X86/X86FrameLowering.h b/final/lib/Target/X86/X86FrameLowering.h
new file mode 100644
index 00000000000..d71108cd058
--- /dev/null
+++ b/final/lib/Target/X86/X86FrameLowering.h
@@ -0,0 +1,65 @@
+//=-- X86TargetFrameLowering.h - Define frame lowering for X86 ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements X86-specific bits of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_FRAMELOWERING_H
+#define X86_FRAMELOWERING_H
+
+#include "X86Subtarget.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  class MCSymbol;
+  class X86TargetMachine;
+
+class X86FrameLowering : public TargetFrameLowering {
+  const X86TargetMachine &TM;
+  const X86Subtarget &STI;
+public:
+  explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti)
+    : TargetFrameLowering(StackGrowsDown,
+                          sti.getStackAlignment(),
+                          (sti.is64Bit() ? -8 : -4)),
+      TM(tm), STI(sti) {
+  }
+
+  void emitCalleeSavedFrameMoves(MachineFunction &MF, MCSymbol *Label,
+                                 unsigned FramePtr) const;
+
+  /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+  /// the function.
+  void emitPrologue(MachineFunction &MF) const;
+  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+  void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                            RegScavenger *RS = NULL) const;
+
+  bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI,
+                                 const TargetRegisterInfo *TRI) const;
+
+  bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+
+  bool hasFP(const MachineFunction &MF) const;
+  bool hasReservedCallFrame(const MachineFunction &MF) const;
+
+  void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+  int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/X86/X86ISelDAGToDAG.cpp b/final/lib/Target/X86/X86ISelDAGToDAG.cpp
new file mode 100644
index 00000000000..9b0ec6e123f
--- /dev/null
+++ b/final/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -0,0 +1,2007 @@
+//===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a DAG pattern matching instruction selector for X86,
+// converting from a legalized dag to a X86 dag.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-isel"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
+
+//===----------------------------------------------------------------------===//
+//                      Pattern Matcher Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
+  /// SDValue's instead of register numbers for the leaves of the matched
+  /// tree.
+  struct X86ISelAddressMode {
+    enum {
+      RegBase,
+      FrameIndexBase
+    } BaseType;
+
+    // This is really a union, discriminated by BaseType!
+    SDValue Base_Reg;
+    int Base_FrameIndex;
+
+    unsigned Scale;
+    SDValue IndexReg; 
+    int32_t Disp;
+    SDValue Segment;
+    const GlobalValue *GV;
+    const Constant *CP;
+    const BlockAddress *BlockAddr;
+    const char *ES;
+    int JT;
+    unsigned Align;    // CP alignment.
+    unsigned char SymbolFlags;  // X86II::MO_*
+
+    X86ISelAddressMode()
+      : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
+        Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
+        SymbolFlags(X86II::MO_NO_FLAG) {
+    }
+
+    bool hasSymbolicDisplacement() const {
+      return GV != 0 || CP != 0 || ES != 0 || JT != -1 || BlockAddr != 0;
+    }
+    
+    bool hasBaseOrIndexReg() const {
+      return IndexReg.getNode() != 0 || Base_Reg.getNode() != 0;
+    }
+    
+    /// isRIPRelative - Return true if this addressing mode is already RIP
+    /// relative.
+    bool isRIPRelative() const {
+      if (BaseType != RegBase) return false;
+      if (RegisterSDNode *RegNode =
+            dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
+        return RegNode->getReg() == X86::RIP;
+      return false;
+    }
+    
+    void setBaseReg(SDValue Reg) {
+      BaseType = RegBase;
+      Base_Reg = Reg;
+    }
+
+    void dump() {
+      dbgs() << "X86ISelAddressMode " << this << '\n';
+      dbgs() << "Base_Reg ";
+      if (Base_Reg.getNode() != 0)
+        Base_Reg.getNode()->dump(); 
+      else
+        dbgs() << "nul";
+      dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
+             << " Scale" << Scale << '\n'
+             << "IndexReg ";
+      if (IndexReg.getNode() != 0)
+        IndexReg.getNode()->dump();
+      else
+        dbgs() << "nul"; 
+      dbgs() << " Disp " << Disp << '\n'
+             << "GV ";
+      if (GV)
+        GV->dump();
+      else
+        dbgs() << "nul";
+      dbgs() << " CP ";
+      if (CP)
+        CP->dump();
+      else
+        dbgs() << "nul";
+      dbgs() << '\n'
+             << "ES ";
+      if (ES)
+        dbgs() << ES;
+      else
+        dbgs() << "nul";
+      dbgs() << " JT" << JT << " Align" << Align << '\n';
+    }
+  };
+}
+
+namespace {
+  //===--------------------------------------------------------------------===//
+  /// ISel - X86 specific code to select X86 machine instructions for
+  /// SelectionDAG operations.
+  ///
+  class X86DAGToDAGISel : public SelectionDAGISel {
+    /// X86Lowering - This object fully describes how to lower LLVM code to an
+    /// X86-specific SelectionDAG.
+    const X86TargetLowering &X86Lowering;
+
+    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+    /// make the right decision when generating code for different targets.
+    const X86Subtarget *Subtarget;
+
+    /// OptForSize - If true, selector should try to optimize for code size
+    /// instead of performance.
+    bool OptForSize;
+
+  public:
+    explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(tm, OptLevel),
+        X86Lowering(*tm.getTargetLowering()),
+        Subtarget(&tm.getSubtarget<X86Subtarget>()),
+        OptForSize(false) {}
+
+    virtual const char *getPassName() const {
+      return "X86 DAG->DAG Instruction Selection";
+    }
+
+    virtual void EmitFunctionEntryCode();
+
+    virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const;
+
+    virtual void PreprocessISelDAG();
+
+    inline bool immSext8(SDNode *N) const {
+      return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
+    }
+
+    // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+    // sign extended field.
+    inline bool i64immSExt32(SDNode *N) const {
+      uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
+      return (int64_t)v == (int32_t)v;
+    }
+
+// Include the pieces autogenerated from the target description.
+#include "X86GenDAGISel.inc"
+
+  private:
+    SDNode *Select(SDNode *N);
+    SDNode *SelectAtomic64(SDNode *Node, unsigned Opc);
+    SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
+
+    bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
+    bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
+    bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
+    bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+                                 unsigned Depth);
+    bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
+    bool SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
+                    SDValue &Scale, SDValue &Index, SDValue &Disp,
+                    SDValue &Segment);
+    bool SelectLEAAddr(SDValue N, SDValue &Base,
+                       SDValue &Scale, SDValue &Index, SDValue &Disp,
+                       SDValue &Segment);
+    bool SelectTLSADDRAddr(SDValue N, SDValue &Base,
+                           SDValue &Scale, SDValue &Index, SDValue &Disp,
+                           SDValue &Segment);
+    bool SelectScalarSSELoad(SDNode *Root, SDValue N,
+                             SDValue &Base, SDValue &Scale,
+                             SDValue &Index, SDValue &Disp,
+                             SDValue &Segment,
+                             SDValue &NodeWithChain);
+    
+    bool TryFoldLoad(SDNode *P, SDValue N,
+                     SDValue &Base, SDValue &Scale,
+                     SDValue &Index, SDValue &Disp,
+                     SDValue &Segment);
+    
+    /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
+    /// inline asm expressions.
+    virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                              char ConstraintCode,
+                                              std::vector<SDValue> &OutOps);
+    
+    void EmitSpecialCodeForMain(MachineBasicBlock *BB, MachineFrameInfo *MFI);
+
+    inline void getAddressOperands(X86ISelAddressMode &AM, SDValue &Base, 
+                                   SDValue &Scale, SDValue &Index,
+                                   SDValue &Disp, SDValue &Segment) {
+      Base  = (AM.BaseType == X86ISelAddressMode::FrameIndexBase) ?
+        CurDAG->getTargetFrameIndex(AM.Base_FrameIndex, TLI.getPointerTy()) :
+        AM.Base_Reg;
+      Scale = getI8Imm(AM.Scale);
+      Index = AM.IndexReg;
+      // These are 32-bit even in 64-bit mode since RIP relative offset
+      // is 32-bit.
+      if (AM.GV)
+        Disp = CurDAG->getTargetGlobalAddress(AM.GV, DebugLoc(),
+                                              MVT::i32, AM.Disp,
+                                              AM.SymbolFlags);
+      else if (AM.CP)
+        Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
+                                             AM.Align, AM.Disp, AM.SymbolFlags);
+      else if (AM.ES)
+        Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
+      else if (AM.JT != -1)
+        Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
+      else if (AM.BlockAddr)
+        Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32,
+                                       true, AM.SymbolFlags);
+      else
+        Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
+
+      if (AM.Segment.getNode())
+        Segment = AM.Segment;
+      else
+        Segment = CurDAG->getRegister(0, MVT::i32);
+    }
+
+    /// getI8Imm - Return a target constant with the specified value, of type
+    /// i8.
+    inline SDValue getI8Imm(unsigned Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i8);
+    }
+
+    /// getI32Imm - Return a target constant with the specified value, of type
+    /// i32.
+    inline SDValue getI32Imm(unsigned Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i32);
+    }
+
+    /// getGlobalBaseReg - Return an SDNode that returns the value of
+    /// the global base register. Output instructions required to
+    /// initialize the global base register, if necessary.
+    ///
+    SDNode *getGlobalBaseReg();
+
+    /// getTargetMachine - Return a reference to the TargetMachine, casted
+    /// to the target-specific type.
+    const X86TargetMachine &getTargetMachine() {
+      return static_cast<const X86TargetMachine &>(TM);
+    }
+
+    /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+    /// to the target-specific type.
+    const X86InstrInfo *getInstrInfo() {
+      return getTargetMachine().getInstrInfo();
+    }
+  };
+}
+
+
+bool
+X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
+  if (OptLevel == CodeGenOpt::None) return false;
+
+  if (!N.hasOneUse())
+    return false;
+
+  if (N.getOpcode() != ISD::LOAD)
+    return true;
+
+  // If N is a load, do additional profitability checks.
+  if (U == Root) {
+    switch (U->getOpcode()) {
+    default: break;
+    case X86ISD::ADD:
+    case X86ISD::SUB:
+    case X86ISD::AND:
+    case X86ISD::XOR:
+    case X86ISD::OR:
+    case ISD::ADD:
+    case ISD::ADDC:
+    case ISD::ADDE:
+    case ISD::AND:
+    case ISD::OR:
+    case ISD::XOR: {
+      SDValue Op1 = U->getOperand(1);
+
+      // If the other operand is a 8-bit immediate we should fold the immediate
+      // instead. This reduces code size.
+      // e.g.
+      // movl 4(%esp), %eax
+      // addl $4, %eax
+      // vs.
+      // movl $4, %eax
+      // addl 4(%esp), %eax
+      // The former is 2 bytes shorter. In case where the increment is 1, then
+      // the saving can be 4 bytes (by using incl %eax).
+      if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
+        if (Imm->getAPIntValue().isSignedIntN(8))
+          return false;
+
+      // If the other operand is a TLS address, we should fold it instead.
+      // This produces
+      // movl    %gs:0, %eax
+      // leal    i@NTPOFF(%eax), %eax
+      // instead of
+      // movl    $i@NTPOFF, %eax
+      // addl    %gs:0, %eax
+      // if the block also has an access to a second TLS address this will save
+      // a load.
+      // FIXME: This is probably also true for non TLS addresses.
+      if (Op1.getOpcode() == X86ISD::Wrapper) {
+        SDValue Val = Op1.getOperand(0);
+        if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
+          return false;
+      }
+    }
+    }
+  }
+
+  return true;
+}
+
+/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
+/// load's chain operand and move load below the call's chain operand.
+static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
+                                  SDValue Call, SDValue OrigChain) {
+  SmallVector<SDValue, 8> Ops;
+  SDValue Chain = OrigChain.getOperand(0);
+  if (Chain.getNode() == Load.getNode())
+    Ops.push_back(Load.getOperand(0));
+  else {
+    assert(Chain.getOpcode() == ISD::TokenFactor &&
+           "Unexpected chain operand");
+    for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
+      if (Chain.getOperand(i).getNode() == Load.getNode())
+        Ops.push_back(Load.getOperand(0));
+      else
+        Ops.push_back(Chain.getOperand(i));
+    SDValue NewChain =
+      CurDAG->getNode(ISD::TokenFactor, Load.getDebugLoc(),
+                      MVT::Other, &Ops[0], Ops.size());
+    Ops.clear();
+    Ops.push_back(NewChain);
+  }
+  for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
+    Ops.push_back(OrigChain.getOperand(i));
+  CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size());
+  CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
+                             Load.getOperand(1), Load.getOperand(2));
+  Ops.clear();
+  Ops.push_back(SDValue(Load.getNode(), 1));
+  for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
+    Ops.push_back(Call.getOperand(i));
+  CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size());
+}
+
+/// isCalleeLoad - Return true if call address is a load and it can be
+/// moved below CALLSEQ_START and the chains leading up to the call.
+/// Return the CALLSEQ_START by reference as a second output.
+/// In the case of a tail call, there isn't a callseq node between the call
+/// chain and the load.
+static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
+  if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
+    return false;
+  LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
+  if (!LD ||
+      LD->isVolatile() ||
+      LD->getAddressingMode() != ISD::UNINDEXED ||
+      LD->getExtensionType() != ISD::NON_EXTLOAD)
+    return false;
+
+  // Now let's find the callseq_start.
+  while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
+    if (!Chain.hasOneUse())
+      return false;
+    Chain = Chain.getOperand(0);
+  }
+
+  if (!Chain.getNumOperands())
+    return false;
+  if (Chain.getOperand(0).getNode() == Callee.getNode())
+    return true;
+  if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
+      Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
+      Callee.getValue(1).hasOneUse())
+    return true;
+  return false;
+}
+
+void X86DAGToDAGISel::PreprocessISelDAG() {
+  // OptForSize is used in pattern predicates that isel is matching.
+  OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+  
+  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
+       E = CurDAG->allnodes_end(); I != E; ) {
+    SDNode *N = I++;  // Preincrement iterator to avoid invalidation issues.
+
+    if (OptLevel != CodeGenOpt::None &&
+        (N->getOpcode() == X86ISD::CALL ||
+         N->getOpcode() == X86ISD::TC_RETURN)) {
+      /// Also try moving call address load from outside callseq_start to just
+      /// before the call to allow it to be folded.
+      ///
+      ///     [Load chain]
+      ///         ^
+      ///         |
+      ///       [Load]
+      ///       ^    ^
+      ///       |    |
+      ///      /      \--
+      ///     /          |
+      ///[CALLSEQ_START] |
+      ///     ^          |
+      ///     |          |
+      /// [LOAD/C2Reg]   |
+      ///     |          |
+      ///      \        /
+      ///       \      /
+      ///       [CALL]
+      bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
+      SDValue Chain = N->getOperand(0);
+      SDValue Load  = N->getOperand(1);
+      if (!isCalleeLoad(Load, Chain, HasCallSeq))
+        continue;
+      MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
+      ++NumLoadMoved;
+      continue;
+    }
+    
+    // Lower fpround and fpextend nodes that target the FP stack to be store and
+    // load to the stack.  This is a gross hack.  We would like to simply mark
+    // these as being illegal, but when we do that, legalize produces these when
+    // it expands calls, then expands these in the same legalize pass.  We would
+    // like dag combine to be able to hack on these between the call expansion
+    // and the node legalization.  As such this pass basically does "really
+    // late" legalization of these inline with the X86 isel pass.
+    // FIXME: This should only happen when not compiled with -O0.
+    if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
+      continue;
+    
+    // If the source and destination are SSE registers, then this is a legal
+    // conversion that should not be lowered.
+    EVT SrcVT = N->getOperand(0).getValueType();
+    EVT DstVT = N->getValueType(0);
+    bool SrcIsSSE = X86Lowering.isScalarFPTypeInSSEReg(SrcVT);
+    bool DstIsSSE = X86Lowering.isScalarFPTypeInSSEReg(DstVT);
+    if (SrcIsSSE && DstIsSSE)
+      continue;
+
+    if (!SrcIsSSE && !DstIsSSE) {
+      // If this is an FPStack extension, it is a noop.
+      if (N->getOpcode() == ISD::FP_EXTEND)
+        continue;
+      // If this is a value-preserving FPStack truncation, it is a noop.
+      if (N->getConstantOperandVal(1))
+        continue;
+    }
+   
+    // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
+    // FPStack has extload and truncstore.  SSE can fold direct loads into other
+    // operations.  Based on this, decide what we want to do.
+    EVT MemVT;
+    if (N->getOpcode() == ISD::FP_ROUND)
+      MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
+    else
+      MemVT = SrcIsSSE ? SrcVT : DstVT;
+    
+    SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
+    DebugLoc dl = N->getDebugLoc();
+    
+    // FIXME: optimize the case where the src/dest is a load or store?
+    SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
+                                          N->getOperand(0),
+                                          MemTmp, MachinePointerInfo(), MemVT,
+                                          false, false, 0);
+    SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+                                        MachinePointerInfo(),
+                                        MemVT, false, false, 0);
+
+    // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
+    // extload we created.  This will cause general havok on the dag because
+    // anything below the conversion could be folded into other existing nodes.
+    // To avoid invalidating 'I', back it up to the convert node.
+    --I;
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+    
+    // Now that we did that, the node is dead.  Increment the iterator to the
+    // next node to process, then delete N.
+    ++I;
+    CurDAG->DeleteNode(N);
+  }  
+}
+
+
+/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
+/// the main function.
+void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock *BB,
+                                             MachineFrameInfo *MFI) {
+  const TargetInstrInfo *TII = TM.getInstrInfo();
+  if (Subtarget->isTargetCygMing()) {
+    unsigned CallOp =
+      Subtarget->is64Bit() ? X86::WINCALL64pcrel32 : X86::CALLpcrel32;
+    BuildMI(BB, DebugLoc(),
+            TII->get(CallOp)).addExternalSymbol("__main");
+  }
+}
+
+void X86DAGToDAGISel::EmitFunctionEntryCode() {
+  // If this is main, emit special code for main.
+  if (const Function *Fn = MF->getFunction())
+    if (Fn->hasExternalLinkage() && Fn->getName() == "main")
+      EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo());
+}
+
+
+bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
+  SDValue Address = N->getOperand(1);
+  
+  // load gs:0 -> GS segment register.
+  // load fs:0 -> FS segment register.
+  //
+  // This optimization is valid because the GNU TLS model defines that
+  // gs:0 (or fs:0 on X86-64) contains its own address.
+  // For more information see http://people.redhat.com/drepper/tls.pdf
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
+    if (C->getSExtValue() == 0 && AM.Segment.getNode() == 0 &&
+        Subtarget->isTargetELF())
+      switch (N->getPointerInfo().getAddrSpace()) {
+      case 256:
+        AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+        return false;
+      case 257:
+        AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+        return false;
+      }
+  
+  return true;
+}
+
+/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
+/// into an addressing mode.  These wrap things that will resolve down into a
+/// symbol reference.  If no match is possible, this returns true, otherwise it
+/// returns false.
+bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
+  // If the addressing mode already has a symbol as the displacement, we can
+  // never match another symbol.
+  if (AM.hasSymbolicDisplacement())
+    return true;
+
+  SDValue N0 = N.getOperand(0);
+  CodeModel::Model M = TM.getCodeModel();
+
+  // Handle X86-64 rip-relative addresses.  We check this before checking direct
+  // folding because RIP is preferable to non-RIP accesses.
+  if (Subtarget->is64Bit() &&
+      // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
+      // they cannot be folded into immediate fields.
+      // FIXME: This can be improved for kernel and other models?
+      (M == CodeModel::Small || M == CodeModel::Kernel) &&
+      // Base and index reg must be 0 in order to use %rip as base and lowering
+      // must allow RIP.
+      !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+      int64_t Offset = AM.Disp + G->getOffset();
+      if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
+      AM.GV = G->getGlobal();
+      AM.Disp = Offset;
+      AM.SymbolFlags = G->getTargetFlags();
+    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+      int64_t Offset = AM.Disp + CP->getOffset();
+      if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
+      AM.CP = CP->getConstVal();
+      AM.Align = CP->getAlignment();
+      AM.Disp = Offset;
+      AM.SymbolFlags = CP->getTargetFlags();
+    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+      AM.ES = S->getSymbol();
+      AM.SymbolFlags = S->getTargetFlags();
+    } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+      AM.JT = J->getIndex();
+      AM.SymbolFlags = J->getTargetFlags();
+    } else {
+      AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+      AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+    }
+
+    if (N.getOpcode() == X86ISD::WrapperRIP)
+      AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
+    return false;
+  }
+
+  // Handle the case when globals fit in our immediate field: This is true for
+  // X86-32 always and X86-64 when in -static -mcmodel=small mode.  In 64-bit
+  // mode, this results in a non-RIP-relative computation.
+  if (!Subtarget->is64Bit() ||
+      ((M == CodeModel::Small || M == CodeModel::Kernel) &&
+       TM.getRelocationModel() == Reloc::Static)) {
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+      AM.GV = G->getGlobal();
+      AM.Disp += G->getOffset();
+      AM.SymbolFlags = G->getTargetFlags();
+    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+      AM.CP = CP->getConstVal();
+      AM.Align = CP->getAlignment();
+      AM.Disp += CP->getOffset();
+      AM.SymbolFlags = CP->getTargetFlags();
+    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+      AM.ES = S->getSymbol();
+      AM.SymbolFlags = S->getTargetFlags();
+    } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+      AM.JT = J->getIndex();
+      AM.SymbolFlags = J->getTargetFlags();
+    } else {
+      AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+      AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
+    }
+    return false;
+  }
+
+  return true;
+}
+
+/// MatchAddress - Add the specified node to the specified addressing mode,
+/// returning true if it cannot be done.  This just pattern matches for the
+/// addressing mode.
+bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
+  if (MatchAddressRecursively(N, AM, 0))
+    return true;
+
+  // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
+  // a smaller encoding and avoids a scaled-index.
+  if (AM.Scale == 2 &&
+      AM.BaseType == X86ISelAddressMode::RegBase &&
+      AM.Base_Reg.getNode() == 0) {
+    AM.Base_Reg = AM.IndexReg;
+    AM.Scale = 1;
+  }
+
+  // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
+  // because it has a smaller encoding.
+  // TODO: Which other code models can use this?
+  if (TM.getCodeModel() == CodeModel::Small &&
+      Subtarget->is64Bit() &&
+      AM.Scale == 1 &&
+      AM.BaseType == X86ISelAddressMode::RegBase &&
+      AM.Base_Reg.getNode() == 0 &&
+      AM.IndexReg.getNode() == 0 &&
+      AM.SymbolFlags == X86II::MO_NO_FLAG &&
+      AM.hasSymbolicDisplacement())
+    AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
+
+  return false;
+}
+
+bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
+                                              unsigned Depth) {
+  bool is64Bit = Subtarget->is64Bit();
+  DebugLoc dl = N.getDebugLoc();
+  DEBUG({
+      dbgs() << "MatchAddress: ";
+      AM.dump();
+    });
+  // Limit recursion.
+  if (Depth > 5)
+    return MatchAddressBase(N, AM);
+
+  CodeModel::Model M = TM.getCodeModel();
+
+  // If this is already a %rip relative address, we can only merge immediates
+  // into it.  Instead of handling this in every case, we handle it here.
+  // RIP relative addressing: %rip + 32-bit displacement!
+  if (AM.isRIPRelative()) {
+    // FIXME: JumpTable and ExternalSymbol address currently don't like
+    // displacements.  It isn't very important, but this should be fixed for
+    // consistency.
+    if (!AM.ES && AM.JT != -1) return true;
+
+    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) {
+      int64_t Val = AM.Disp + Cst->getSExtValue();
+      if (X86::isOffsetSuitableForCodeModel(Val, M,
+                                            AM.hasSymbolicDisplacement())) {
+        AM.Disp = Val;
+        return false;
+      }
+    }
+    return true;
+  }
+
+  switch (N.getOpcode()) {
+  default: break;
+  case ISD::Constant: {
+    uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+    if (!is64Bit ||
+        X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M,
+                                          AM.hasSymbolicDisplacement())) {
+      AM.Disp += Val;
+      return false;
+    }
+    break;
+  }
+
+  case X86ISD::Wrapper:
+  case X86ISD::WrapperRIP:
+    if (!MatchWrapper(N, AM))
+      return false;
+    break;
+
+  case ISD::LOAD:
+    if (!MatchLoadInAddress(cast<LoadSDNode>(N), AM))
+      return false;
+    break;
+
+  case ISD::FrameIndex:
+    if (AM.BaseType == X86ISelAddressMode::RegBase
+        && AM.Base_Reg.getNode() == 0) {
+      AM.BaseType = X86ISelAddressMode::FrameIndexBase;
+      AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+      return false;
+    }
+    break;
+
+  case ISD::SHL:
+    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
+      break;
+      
+    if (ConstantSDNode
+          *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
+      unsigned Val = CN->getZExtValue();
+      // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
+      // that the base operand remains free for further matching. If
+      // the base doesn't end up getting used, a post-processing step
+      // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
+      if (Val == 1 || Val == 2 || Val == 3) {
+        AM.Scale = 1 << Val;
+        SDValue ShVal = N.getNode()->getOperand(0);
+
+        // Okay, we know that we have a scale by now.  However, if the scaled
+        // value is an add of something and a constant, we can fold the
+        // constant into the disp field here.
+        if (CurDAG->isBaseWithConstantOffset(ShVal)) {
+          AM.IndexReg = ShVal.getNode()->getOperand(0);
+          ConstantSDNode *AddVal =
+            cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
+          uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val);
+          if (!is64Bit ||
+              X86::isOffsetSuitableForCodeModel(Disp, M,
+                                                AM.hasSymbolicDisplacement()))
+            AM.Disp = Disp;
+          else
+            AM.IndexReg = ShVal;
+        } else {
+          AM.IndexReg = ShVal;
+        }
+        return false;
+      }
+    break;
+    }
+
+  case ISD::SMUL_LOHI:
+  case ISD::UMUL_LOHI:
+    // A mul_lohi where we need the low part can be folded as a plain multiply.
+    if (N.getResNo() != 0) break;
+    // FALL THROUGH
+  case ISD::MUL:
+  case X86ISD::MUL_IMM:
+    // X*[3,5,9] -> X+X*[2,4,8]
+    if (AM.BaseType == X86ISelAddressMode::RegBase &&
+        AM.Base_Reg.getNode() == 0 &&
+        AM.IndexReg.getNode() == 0) {
+      if (ConstantSDNode
+            *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
+        if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
+            CN->getZExtValue() == 9) {
+          AM.Scale = unsigned(CN->getZExtValue())-1;
+
+          SDValue MulVal = N.getNode()->getOperand(0);
+          SDValue Reg;
+
+          // Okay, we know that we have a scale by now.  However, if the scaled
+          // value is an add of something and a constant, we can fold the
+          // constant into the disp field here.
+          if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
+              isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
+            Reg = MulVal.getNode()->getOperand(0);
+            ConstantSDNode *AddVal =
+              cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
+            uint64_t Disp = AM.Disp + AddVal->getSExtValue() *
+                                      CN->getZExtValue();
+            if (!is64Bit ||
+                X86::isOffsetSuitableForCodeModel(Disp, M,
+                                                  AM.hasSymbolicDisplacement()))
+              AM.Disp = Disp;
+            else
+              Reg = N.getNode()->getOperand(0);
+          } else {
+            Reg = N.getNode()->getOperand(0);
+          }
+
+          AM.IndexReg = AM.Base_Reg = Reg;
+          return false;
+        }
+    }
+    break;
+
+  case ISD::SUB: {
+    // Given A-B, if A can be completely folded into the address and
+    // the index field with the index field unused, use -B as the index.
+    // This is a win if a has multiple parts that can be folded into
+    // the address. Also, this saves a mov if the base register has
+    // other uses, since it avoids a two-address sub instruction, however
+    // it costs an additional mov if the index register has other uses.
+
+    // Add an artificial use to this node so that we can keep track of
+    // it if it gets CSE'd with a different node.
+    HandleSDNode Handle(N);
+
+    // Test if the LHS of the sub can be folded.
+    X86ISelAddressMode Backup = AM;
+    if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
+      AM = Backup;
+      break;
+    }
+    // Test if the index field is free for use.
+    if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
+      AM = Backup;
+      break;
+    }
+
+    int Cost = 0;
+    SDValue RHS = Handle.getValue().getNode()->getOperand(1);
+    // If the RHS involves a register with multiple uses, this
+    // transformation incurs an extra mov, due to the neg instruction
+    // clobbering its operand.
+    if (!RHS.getNode()->hasOneUse() ||
+        RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
+        RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
+        RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
+        (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
+         RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
+      ++Cost;
+    // If the base is a register with multiple uses, this
+    // transformation may save a mov.
+    if ((AM.BaseType == X86ISelAddressMode::RegBase &&
+         AM.Base_Reg.getNode() &&
+         !AM.Base_Reg.getNode()->hasOneUse()) ||
+        AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+      --Cost;
+    // If the folded LHS was interesting, this transformation saves
+    // address arithmetic.
+    if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
+        ((AM.Disp != 0) && (Backup.Disp == 0)) +
+        (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
+      --Cost;
+    // If it doesn't look like it may be an overall win, don't do it.
+    if (Cost >= 0) {
+      AM = Backup;
+      break;
+    }
+
+    // Ok, the transformation is legal and appears profitable. Go for it.
+    SDValue Zero = CurDAG->getConstant(0, N.getValueType());
+    SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
+    AM.IndexReg = Neg;
+    AM.Scale = 1;
+
+    // Insert the new nodes into the topological ordering.
+    if (Zero.getNode()->getNodeId() == -1 ||
+        Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(N.getNode(), Zero.getNode());
+      Zero.getNode()->setNodeId(N.getNode()->getNodeId());
+    }
+    if (Neg.getNode()->getNodeId() == -1 ||
+        Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(N.getNode(), Neg.getNode());
+      Neg.getNode()->setNodeId(N.getNode()->getNodeId());
+    }
+    return false;
+  }
+
+  case ISD::ADD: {
+    // Add an artificial use to this node so that we can keep track of
+    // it if it gets CSE'd with a different node.
+    HandleSDNode Handle(N);
+
+    X86ISelAddressMode Backup = AM;
+    if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+        !MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
+      return false;
+    AM = Backup;
+    
+    // Try again after commuting the operands.
+    if (!MatchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)&&
+        !MatchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
+      return false;
+    AM = Backup;
+
+    // If we couldn't fold both operands into the address at the same time,
+    // see if we can just put each operand into a register and fold at least
+    // the add.
+    if (AM.BaseType == X86ISelAddressMode::RegBase &&
+        !AM.Base_Reg.getNode() &&
+        !AM.IndexReg.getNode()) {
+      N = Handle.getValue();
+      AM.Base_Reg = N.getOperand(0);
+      AM.IndexReg = N.getOperand(1);
+      AM.Scale = 1;
+      return false;
+    }
+    N = Handle.getValue();
+    break;
+  }
+
+  case ISD::OR:
+    // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+    if (CurDAG->isBaseWithConstantOffset(N)) {
+      X86ISelAddressMode Backup = AM;
+      ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
+      uint64_t Offset = CN->getSExtValue();
+
+      // Start with the LHS as an addr mode.
+      if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
+          // Address could not have picked a GV address for the displacement.
+          AM.GV == NULL &&
+          // On x86-64, the resultant disp must fit in 32-bits.
+          (!is64Bit ||
+           X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M,
+                                             AM.hasSymbolicDisplacement()))) {
+        AM.Disp += Offset;
+        return false;
+      }
+      AM = Backup;
+    }
+    break;
+      
+  case ISD::AND: {
+    // Perform some heroic transforms on an and of a constant-count shift
+    // with a constant to enable use of the scaled offset field.
+
+    SDValue Shift = N.getOperand(0);
+    if (Shift.getNumOperands() != 2) break;
+
+    // Scale must not be used already.
+    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
+
+    SDValue X = Shift.getOperand(0);
+    ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
+    if (!C1 || !C2) break;
+
+    // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
+    // allows us to convert the shift and and into an h-register extract and
+    // a scaled index.
+    if (Shift.getOpcode() == ISD::SRL && Shift.hasOneUse()) {
+      unsigned ScaleLog = 8 - C1->getZExtValue();
+      if (ScaleLog > 0 && ScaleLog < 4 &&
+          C2->getZExtValue() == (UINT64_C(0xff) << ScaleLog)) {
+        SDValue Eight = CurDAG->getConstant(8, MVT::i8);
+        SDValue Mask = CurDAG->getConstant(0xff, N.getValueType());
+        SDValue Srl = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
+                                      X, Eight);
+        SDValue And = CurDAG->getNode(ISD::AND, dl, N.getValueType(),
+                                      Srl, Mask);
+        SDValue ShlCount = CurDAG->getConstant(ScaleLog, MVT::i8);
+        SDValue Shl = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
+                                      And, ShlCount);
+
+        // Insert the new nodes into the topological ordering.
+        if (Eight.getNode()->getNodeId() == -1 ||
+            Eight.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+          CurDAG->RepositionNode(X.getNode(), Eight.getNode());
+          Eight.getNode()->setNodeId(X.getNode()->getNodeId());
+        }
+        if (Mask.getNode()->getNodeId() == -1 ||
+            Mask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+          CurDAG->RepositionNode(X.getNode(), Mask.getNode());
+          Mask.getNode()->setNodeId(X.getNode()->getNodeId());
+        }
+        if (Srl.getNode()->getNodeId() == -1 ||
+            Srl.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
+          CurDAG->RepositionNode(Shift.getNode(), Srl.getNode());
+          Srl.getNode()->setNodeId(Shift.getNode()->getNodeId());
+        }
+        if (And.getNode()->getNodeId() == -1 ||
+            And.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+          CurDAG->RepositionNode(N.getNode(), And.getNode());
+          And.getNode()->setNodeId(N.getNode()->getNodeId());
+        }
+        if (ShlCount.getNode()->getNodeId() == -1 ||
+            ShlCount.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+          CurDAG->RepositionNode(X.getNode(), ShlCount.getNode());
+          ShlCount.getNode()->setNodeId(N.getNode()->getNodeId());
+        }
+        if (Shl.getNode()->getNodeId() == -1 ||
+            Shl.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+          CurDAG->RepositionNode(N.getNode(), Shl.getNode());
+          Shl.getNode()->setNodeId(N.getNode()->getNodeId());
+        }
+        CurDAG->ReplaceAllUsesWith(N, Shl);
+        AM.IndexReg = And;
+        AM.Scale = (1 << ScaleLog);
+        return false;
+      }
+    }
+
+    // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
+    // allows us to fold the shift into this addressing mode.
+    if (Shift.getOpcode() != ISD::SHL) break;
+
+    // Not likely to be profitable if either the AND or SHIFT node has more
+    // than one use (unless all uses are for address computation). Besides,
+    // isel mechanism requires their node ids to be reused.
+    if (!N.hasOneUse() || !Shift.hasOneUse())
+      break;
+    
+    // Verify that the shift amount is something we can fold.
+    unsigned ShiftCst = C1->getZExtValue();
+    if (ShiftCst != 1 && ShiftCst != 2 && ShiftCst != 3)
+      break;
+    
+    // Get the new AND mask, this folds to a constant.
+    SDValue NewANDMask = CurDAG->getNode(ISD::SRL, dl, N.getValueType(),
+                                         SDValue(C2, 0), SDValue(C1, 0));
+    SDValue NewAND = CurDAG->getNode(ISD::AND, dl, N.getValueType(), X, 
+                                     NewANDMask);
+    SDValue NewSHIFT = CurDAG->getNode(ISD::SHL, dl, N.getValueType(),
+                                       NewAND, SDValue(C1, 0));
+
+    // Insert the new nodes into the topological ordering.
+    if (C1->getNodeId() > X.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(X.getNode(), C1);
+      C1->setNodeId(X.getNode()->getNodeId());
+    }
+    if (NewANDMask.getNode()->getNodeId() == -1 ||
+        NewANDMask.getNode()->getNodeId() > X.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(X.getNode(), NewANDMask.getNode());
+      NewANDMask.getNode()->setNodeId(X.getNode()->getNodeId());
+    }
+    if (NewAND.getNode()->getNodeId() == -1 ||
+        NewAND.getNode()->getNodeId() > Shift.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(Shift.getNode(), NewAND.getNode());
+      NewAND.getNode()->setNodeId(Shift.getNode()->getNodeId());
+    }
+    if (NewSHIFT.getNode()->getNodeId() == -1 ||
+        NewSHIFT.getNode()->getNodeId() > N.getNode()->getNodeId()) {
+      CurDAG->RepositionNode(N.getNode(), NewSHIFT.getNode());
+      NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
+    }
+
+    CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
+    
+    AM.Scale = 1 << ShiftCst;
+    AM.IndexReg = NewAND;
+    return false;
+  }
+  }
+
+  return MatchAddressBase(N, AM);
+}
+
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
+  // Is the base register already occupied?
+  if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
+    // If so, check to see if the scale index register is set.
+    if (AM.IndexReg.getNode() == 0) {
+      AM.IndexReg = N;
+      AM.Scale = 1;
+      return false;
+    }
+
+    // Otherwise, we cannot select it.
+    return true;
+  }
+
+  // Default, generate it as a register.
+  AM.BaseType = X86ISelAddressMode::RegBase;
+  AM.Base_Reg = N;
+  return false;
+}
+
+/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// It returns the operands which make up the maximal addressing mode it can
+/// match by reference.
+///
+/// Parent is the parent node of the addr operand that is being matched.  It
+/// is always a load, store, atomic node, or null.  It is only null when
+/// checking memory operands for inline asm nodes.
+bool X86DAGToDAGISel::SelectAddr(SDNode *Parent, SDValue N, SDValue &Base,
+                                 SDValue &Scale, SDValue &Index,
+                                 SDValue &Disp, SDValue &Segment) {
+  X86ISelAddressMode AM;
+  
+  if (Parent &&
+      // This list of opcodes are all the nodes that have an "addr:$ptr" operand
+      // that are not a MemSDNode, and thus don't have proper addrspace info.
+      Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
+      Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
+      Parent->getOpcode() != X86ISD::TLSCALL) { // Fixme
+    unsigned AddrSpace =
+      cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
+    // AddrSpace 256 -> GS, 257 -> FS.
+    if (AddrSpace == 256)
+      AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+    if (AddrSpace == 257)
+      AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+  }
+  
+  if (MatchAddress(N, AM))
+    return false;
+
+  EVT VT = N.getValueType();
+  if (AM.BaseType == X86ISelAddressMode::RegBase) {
+    if (!AM.Base_Reg.getNode())
+      AM.Base_Reg = CurDAG->getRegister(0, VT);
+  }
+
+  if (!AM.IndexReg.getNode())
+    AM.IndexReg = CurDAG->getRegister(0, VT);
+
+  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+  return true;
+}
+
+/// SelectScalarSSELoad - Match a scalar SSE load.  In particular, we want to
+/// match a load whose top elements are either undef or zeros.  The load flavor
+/// is derived from the type of N, which is either v4f32 or v2f64.
+///
+/// We also return:
+///   PatternChainNode: this is the matched node that has a chain input and
+///   output.
+bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
+                                          SDValue N, SDValue &Base,
+                                          SDValue &Scale, SDValue &Index,
+                                          SDValue &Disp, SDValue &Segment,
+                                          SDValue &PatternNodeWithChain) {
+  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+    PatternNodeWithChain = N.getOperand(0);
+    if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
+        PatternNodeWithChain.hasOneUse() &&
+        IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+        IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
+      LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
+      if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+        return false;
+      return true;
+    }
+  }
+
+  // Also handle the case where we explicitly require zeros in the top
+  // elements.  This is a vector shuffle from the zero vector.
+  if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
+      // Check to see if the top elements are all zeros (or bitcast of zeros).
+      N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && 
+      N.getOperand(0).getNode()->hasOneUse() &&
+      ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
+      N.getOperand(0).getOperand(0).hasOneUse() &&
+      IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+      IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
+    // Okay, this is a zero extending load.  Fold it.
+    LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
+    if (!SelectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+      return false;
+    PatternNodeWithChain = SDValue(LD, 0);
+    return true;
+  }
+  return false;
+}
+
+
+/// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
+/// mode it matches can be cost effectively emitted as an LEA instruction.
+bool X86DAGToDAGISel::SelectLEAAddr(SDValue N,
+                                    SDValue &Base, SDValue &Scale,
+                                    SDValue &Index, SDValue &Disp,
+                                    SDValue &Segment) {
+  X86ISelAddressMode AM;
+
+  // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
+  // segments.
+  SDValue Copy = AM.Segment;
+  SDValue T = CurDAG->getRegister(0, MVT::i32);
+  AM.Segment = T;
+  if (MatchAddress(N, AM))
+    return false;
+  assert (T == AM.Segment);
+  AM.Segment = Copy;
+
+  EVT VT = N.getValueType();
+  unsigned Complexity = 0;
+  if (AM.BaseType == X86ISelAddressMode::RegBase)
+    if (AM.Base_Reg.getNode())
+      Complexity = 1;
+    else
+      AM.Base_Reg = CurDAG->getRegister(0, VT);
+  else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
+    Complexity = 4;
+
+  if (AM.IndexReg.getNode())
+    Complexity++;
+  else
+    AM.IndexReg = CurDAG->getRegister(0, VT);
+
+  // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
+  // a simple shift.
+  if (AM.Scale > 1)
+    Complexity++;
+
+  // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
+  // to a LEA. This is determined with some expermentation but is by no means
+  // optimal (especially for code size consideration). LEA is nice because of
+  // its three-address nature. Tweak the cost function again when we can run
+  // convertToThreeAddress() at register allocation time.
+  if (AM.hasSymbolicDisplacement()) {
+    // For X86-64, we should always use lea to materialize RIP relative
+    // addresses.
+    if (Subtarget->is64Bit())
+      Complexity = 4;
+    else
+      Complexity += 2;
+  }
+
+  if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
+    Complexity++;
+
+  // If it isn't worth using an LEA, reject it.
+  if (Complexity <= 2)
+    return false;
+  
+  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+  return true;
+}
+
+/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N, SDValue &Base,
+                                        SDValue &Scale, SDValue &Index,
+                                        SDValue &Disp, SDValue &Segment) {
+  assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
+  const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+    
+  X86ISelAddressMode AM;
+  AM.GV = GA->getGlobal();
+  AM.Disp += GA->getOffset();
+  AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
+  AM.SymbolFlags = GA->getTargetFlags();
+
+  if (N.getValueType() == MVT::i32) {
+    AM.Scale = 1;
+    AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
+  } else {
+    AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
+  }
+  
+  getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+  return true;
+}
+
+
+bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
+                                  SDValue &Base, SDValue &Scale,
+                                  SDValue &Index, SDValue &Disp,
+                                  SDValue &Segment) {
+  if (!ISD::isNON_EXTLoad(N.getNode()) ||
+      !IsProfitableToFold(N, P, P) ||
+      !IsLegalToFold(N, P, P, OptLevel))
+    return false;
+  
+  return SelectAddr(N.getNode(),
+                    N.getOperand(1), Base, Scale, Index, Disp, Segment);
+}
+
+/// getGlobalBaseReg - Return an SDNode that returns the value of
+/// the global base register. Output instructions required to
+/// initialize the global base register, if necessary.
+///
+SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
+  unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+  return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+SDNode *X86DAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) {
+  SDValue Chain = Node->getOperand(0);
+  SDValue In1 = Node->getOperand(1);
+  SDValue In2L = Node->getOperand(2);
+  SDValue In2H = Node->getOperand(3);
+  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+  if (!SelectAddr(Node, In1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+    return NULL;
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+  const SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, In2L, In2H, Chain};
+  SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(),
+                                           MVT::i32, MVT::i32, MVT::Other, Ops,
+                                           array_lengthof(Ops));
+  cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+  return ResNode;
+}
+
+SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
+  if (Node->hasAnyUseOfValue(0))
+    return 0;
+
+  // Optimize common patterns for __sync_add_and_fetch and
+  // __sync_sub_and_fetch where the result is not used. This allows us
+  // to use "lock" version of add, sub, inc, dec instructions.
+  // FIXME: Do not use special instructions but instead add the "lock"
+  // prefix to the target node somehow. The extra information will then be
+  // transferred to machine instruction and it denotes the prefix.
+  SDValue Chain = Node->getOperand(0);
+  SDValue Ptr = Node->getOperand(1);
+  SDValue Val = Node->getOperand(2);
+  SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+  if (!SelectAddr(Node, Ptr, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4))
+    return 0;
+
+  bool isInc = false, isDec = false, isSub = false, isCN = false;
+  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
+  if (CN) {
+    isCN = true;
+    int64_t CNVal = CN->getSExtValue();
+    if (CNVal == 1)
+      isInc = true;
+    else if (CNVal == -1)
+      isDec = true;
+    else if (CNVal >= 0)
+      Val = CurDAG->getTargetConstant(CNVal, NVT);
+    else {
+      isSub = true;
+      Val = CurDAG->getTargetConstant(-CNVal, NVT);
+    }
+  } else if (Val.hasOneUse() &&
+             Val.getOpcode() == ISD::SUB &&
+             X86::isZeroNode(Val.getOperand(0))) {
+    isSub = true;
+    Val = Val.getOperand(1);
+  }
+
+  unsigned Opc = 0;
+  switch (NVT.getSimpleVT().SimpleTy) {
+  default: return 0;
+  case MVT::i8:
+    if (isInc)
+      Opc = X86::LOCK_INC8m;
+    else if (isDec)
+      Opc = X86::LOCK_DEC8m;
+    else if (isSub) {
+      if (isCN)
+        Opc = X86::LOCK_SUB8mi;
+      else
+        Opc = X86::LOCK_SUB8mr;
+    } else {
+      if (isCN)
+        Opc = X86::LOCK_ADD8mi;
+      else
+        Opc = X86::LOCK_ADD8mr;
+    }
+    break;
+  case MVT::i16:
+    if (isInc)
+      Opc = X86::LOCK_INC16m;
+    else if (isDec)
+      Opc = X86::LOCK_DEC16m;
+    else if (isSub) {
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = X86::LOCK_SUB16mi8;
+        else
+          Opc = X86::LOCK_SUB16mi;
+      } else
+        Opc = X86::LOCK_SUB16mr;
+    } else {
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = X86::LOCK_ADD16mi8;
+        else
+          Opc = X86::LOCK_ADD16mi;
+      } else
+        Opc = X86::LOCK_ADD16mr;
+    }
+    break;
+  case MVT::i32:
+    if (isInc)
+      Opc = X86::LOCK_INC32m;
+    else if (isDec)
+      Opc = X86::LOCK_DEC32m;
+    else if (isSub) {
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = X86::LOCK_SUB32mi8;
+        else
+          Opc = X86::LOCK_SUB32mi;
+      } else
+        Opc = X86::LOCK_SUB32mr;
+    } else {
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = X86::LOCK_ADD32mi8;
+        else
+          Opc = X86::LOCK_ADD32mi;
+      } else
+        Opc = X86::LOCK_ADD32mr;
+    }
+    break;
+  case MVT::i64:
+    if (isInc)
+      Opc = X86::LOCK_INC64m;
+    else if (isDec)
+      Opc = X86::LOCK_DEC64m;
+    else if (isSub) {
+      Opc = X86::LOCK_SUB64mr;
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = X86::LOCK_SUB64mi8;
+        else if (i64immSExt32(Val.getNode()))
+          Opc = X86::LOCK_SUB64mi32;
+      }
+    } else {
+      Opc = X86::LOCK_ADD64mr;
+      if (isCN) {
+        if (immSext8(Val.getNode()))
+          Opc = X86::LOCK_ADD64mi8;
+        else if (i64immSExt32(Val.getNode()))
+          Opc = X86::LOCK_ADD64mi32;
+      }
+    }
+    break;
+  }
+
+  DebugLoc dl = Node->getDebugLoc();
+  SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+                                                 dl, NVT), 0);
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
+  if (isInc || isDec) {
+    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain };
+    SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6), 0);
+    cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+    SDValue RetVals[] = { Undef, Ret };
+    return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+  } else {
+    SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Val, Chain };
+    SDValue Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7), 0);
+    cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+    SDValue RetVals[] = { Undef, Ret };
+    return CurDAG->getMergeValues(RetVals, 2, dl).getNode();
+  }
+}
+
+/// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
+/// any uses which require the SF or OF bits to be accurate.
+static bool HasNoSignedComparisonUses(SDNode *N) {
+  // Examine each user of the node.
+  for (SDNode::use_iterator UI = N->use_begin(),
+         UE = N->use_end(); UI != UE; ++UI) {
+    // Only examine CopyToReg uses.
+    if (UI->getOpcode() != ISD::CopyToReg)
+      return false;
+    // Only examine CopyToReg uses that copy to EFLAGS.
+    if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
+          X86::EFLAGS)
+      return false;
+    // Examine each user of the CopyToReg use.
+    for (SDNode::use_iterator FlagUI = UI->use_begin(),
+           FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
+      // Only examine the Flag result.
+      if (FlagUI.getUse().getResNo() != 1) continue;
+      // Anything unusual: assume conservatively.
+      if (!FlagUI->isMachineOpcode()) return false;
+      // Examine the opcode of the user.
+      switch (FlagUI->getMachineOpcode()) {
+      // These comparisons don't treat the most significant bit specially.
+      case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
+      case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
+      case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
+      case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
+      case X86::JA_4: case X86::JAE_4: case X86::JB_4: case X86::JBE_4:
+      case X86::JE_4: case X86::JNE_4: case X86::JP_4: case X86::JNP_4:
+      case X86::CMOVA16rr: case X86::CMOVA16rm:
+      case X86::CMOVA32rr: case X86::CMOVA32rm:
+      case X86::CMOVA64rr: case X86::CMOVA64rm:
+      case X86::CMOVAE16rr: case X86::CMOVAE16rm:
+      case X86::CMOVAE32rr: case X86::CMOVAE32rm:
+      case X86::CMOVAE64rr: case X86::CMOVAE64rm:
+      case X86::CMOVB16rr: case X86::CMOVB16rm:
+      case X86::CMOVB32rr: case X86::CMOVB32rm:
+      case X86::CMOVB64rr: case X86::CMOVB64rm:
+      case X86::CMOVBE16rr: case X86::CMOVBE16rm:
+      case X86::CMOVBE32rr: case X86::CMOVBE32rm:
+      case X86::CMOVBE64rr: case X86::CMOVBE64rm:
+      case X86::CMOVE16rr: case X86::CMOVE16rm:
+      case X86::CMOVE32rr: case X86::CMOVE32rm:
+      case X86::CMOVE64rr: case X86::CMOVE64rm:
+      case X86::CMOVNE16rr: case X86::CMOVNE16rm:
+      case X86::CMOVNE32rr: case X86::CMOVNE32rm:
+      case X86::CMOVNE64rr: case X86::CMOVNE64rm:
+      case X86::CMOVNP16rr: case X86::CMOVNP16rm:
+      case X86::CMOVNP32rr: case X86::CMOVNP32rm:
+      case X86::CMOVNP64rr: case X86::CMOVNP64rm:
+      case X86::CMOVP16rr: case X86::CMOVP16rm:
+      case X86::CMOVP32rr: case X86::CMOVP32rm:
+      case X86::CMOVP64rr: case X86::CMOVP64rm:
+        continue;
+      // Anything else: assume conservatively.
+      default: return false;
+      }
+    }
+  }
+  return true;
+}
+
+SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
+  EVT NVT = Node->getValueType(0);
+  unsigned Opc, MOpc;
+  unsigned Opcode = Node->getOpcode();
+  DebugLoc dl = Node->getDebugLoc();
+  
+  DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
+
+  if (Node->isMachineOpcode()) {
+    DEBUG(dbgs() << "== ";  Node->dump(CurDAG); dbgs() << '\n');
+    return NULL;   // Already selected.
+  }
+
+  switch (Opcode) {
+  default: break;
+  case X86ISD::GlobalBaseReg:
+    return getGlobalBaseReg();
+
+  case X86ISD::ATOMOR64_DAG:
+    return SelectAtomic64(Node, X86::ATOMOR6432);
+  case X86ISD::ATOMXOR64_DAG:
+    return SelectAtomic64(Node, X86::ATOMXOR6432);
+  case X86ISD::ATOMADD64_DAG:
+    return SelectAtomic64(Node, X86::ATOMADD6432);
+  case X86ISD::ATOMSUB64_DAG:
+    return SelectAtomic64(Node, X86::ATOMSUB6432);
+  case X86ISD::ATOMNAND64_DAG:
+    return SelectAtomic64(Node, X86::ATOMNAND6432);
+  case X86ISD::ATOMAND64_DAG:
+    return SelectAtomic64(Node, X86::ATOMAND6432);
+  case X86ISD::ATOMSWAP64_DAG:
+    return SelectAtomic64(Node, X86::ATOMSWAP6432);
+
+  case ISD::ATOMIC_LOAD_ADD: {
+    SDNode *RetVal = SelectAtomicLoadAdd(Node, NVT);
+    if (RetVal)
+      return RetVal;
+    break;
+  }
+  case X86ISD::UMUL: {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+    
+    unsigned LoReg;
+    switch (NVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unsupported VT!");
+    case MVT::i8:  LoReg = X86::AL;  Opc = X86::MUL8r; break;
+    case MVT::i16: LoReg = X86::AX;  Opc = X86::MUL16r; break;
+    case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
+    case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
+    }
+    
+    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+                                          N0, SDValue()).getValue(1);
+    
+    SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
+    SDValue Ops[] = {N1, InFlag};
+    SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops, 2);
+    
+    ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
+    ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
+    ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
+    return NULL;
+  }
+      
+  case ISD::SMUL_LOHI:
+  case ISD::UMUL_LOHI: {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    bool isSigned = Opcode == ISD::SMUL_LOHI;
+    if (!isSigned) {
+      switch (NVT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unsupported VT!");
+      case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
+      case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
+      case MVT::i32: Opc = X86::MUL32r; MOpc = X86::MUL32m; break;
+      case MVT::i64: Opc = X86::MUL64r; MOpc = X86::MUL64m; break;
+      }
+    } else {
+      switch (NVT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unsupported VT!");
+      case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
+      case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
+      case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
+      case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
+      }
+    }
+
+    unsigned LoReg, HiReg;
+    switch (NVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unsupported VT!");
+    case MVT::i8:  LoReg = X86::AL;  HiReg = X86::AH;  break;
+    case MVT::i16: LoReg = X86::AX;  HiReg = X86::DX;  break;
+    case MVT::i32: LoReg = X86::EAX; HiReg = X86::EDX; break;
+    case MVT::i64: LoReg = X86::RAX; HiReg = X86::RDX; break;
+    }
+
+    SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+    // Multiply is commmutative.
+    if (!foldedLoad) {
+      foldedLoad = TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+      if (foldedLoad)
+        std::swap(N0, N1);
+    }
+
+    SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+                                            N0, SDValue()).getValue(1);
+
+    if (foldedLoad) {
+      SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+                        InFlag };
+      SDNode *CNode =
+        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
+                               array_lengthof(Ops));
+      InFlag = SDValue(CNode, 1);
+
+      // Update the chain.
+      ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+    } else {
+      SDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag);
+      InFlag = SDValue(CNode, 0);
+    }
+
+    // Prevent use of AH in a REX instruction by referencing AX instead.
+    if (HiReg == X86::AH && Subtarget->is64Bit() &&
+        !SDValue(Node, 1).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              X86::AX, MVT::i16, InFlag);
+      InFlag = Result.getValue(2);
+      // Get the low part if needed. Don't use getCopyFromReg for aliasing
+      // registers.
+      if (!SDValue(Node, 0).use_empty())
+        ReplaceUses(SDValue(Node, 1),
+          CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+      // Shift AX down 8 bits.
+      Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+                                              Result,
+                                     CurDAG->getTargetConstant(8, MVT::i8)), 0);
+      // Then truncate it down to i8.
+      ReplaceUses(SDValue(Node, 1),
+        CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+    }
+    // Copy the low half of the result, if it is needed.
+    if (!SDValue(Node, 0).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                                LoReg, NVT, InFlag);
+      InFlag = Result.getValue(2);
+      ReplaceUses(SDValue(Node, 0), Result);
+      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+    }
+    // Copy the high half of the result, if it is needed.
+    if (!SDValue(Node, 1).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              HiReg, NVT, InFlag);
+      InFlag = Result.getValue(2);
+      ReplaceUses(SDValue(Node, 1), Result);
+      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+    }
+    
+    return NULL;
+  }
+
+  case ISD::SDIVREM:
+  case ISD::UDIVREM: {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    bool isSigned = Opcode == ISD::SDIVREM;
+    if (!isSigned) {
+      switch (NVT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unsupported VT!");
+      case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
+      case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
+      case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
+      case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
+      }
+    } else {
+      switch (NVT.getSimpleVT().SimpleTy) {
+      default: llvm_unreachable("Unsupported VT!");
+      case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
+      case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
+      case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
+      case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
+      }
+    }
+
+    unsigned LoReg, HiReg, ClrReg;
+    unsigned ClrOpcode, SExtOpcode;
+    switch (NVT.getSimpleVT().SimpleTy) {
+    default: llvm_unreachable("Unsupported VT!");
+    case MVT::i8:
+      LoReg = X86::AL;  ClrReg = HiReg = X86::AH;
+      ClrOpcode  = 0;
+      SExtOpcode = X86::CBW;
+      break;
+    case MVT::i16:
+      LoReg = X86::AX;  HiReg = X86::DX;
+      ClrOpcode  = X86::MOV16r0; ClrReg = X86::DX;
+      SExtOpcode = X86::CWD;
+      break;
+    case MVT::i32:
+      LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
+      ClrOpcode  = X86::MOV32r0;
+      SExtOpcode = X86::CDQ;
+      break;
+    case MVT::i64:
+      LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
+      ClrOpcode  = X86::MOV64r0;
+      SExtOpcode = X86::CQO;
+      break;
+    }
+
+    SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
+    bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
+    bool signBitIsZero = CurDAG->SignBitIsZero(N0);
+
+    SDValue InFlag;
+    if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
+      // Special case for div8, just use a move with zero extension to AX to
+      // clear the upper 8 bits (AH).
+      SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
+      if (TryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
+        SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
+        Move =
+          SDValue(CurDAG->getMachineNode(X86::MOVZX16rm8, dl, MVT::i16,
+                                         MVT::Other, Ops,
+                                         array_lengthof(Ops)), 0);
+        Chain = Move.getValue(1);
+        ReplaceUses(N0.getValue(1), Chain);
+      } else {
+        Move =
+          SDValue(CurDAG->getMachineNode(X86::MOVZX16rr8, dl, MVT::i16, N0),0);
+        Chain = CurDAG->getEntryNode();
+      }
+      Chain  = CurDAG->getCopyToReg(Chain, dl, X86::AX, Move, SDValue());
+      InFlag = Chain.getValue(1);
+    } else {
+      InFlag =
+        CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
+                             LoReg, N0, SDValue()).getValue(1);
+      if (isSigned && !signBitIsZero) {
+        // Sign extend the low part into the high part.
+        InFlag =
+          SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
+      } else {
+        // Zero out the high part, effectively zero extending the input.
+        SDValue ClrNode =
+          SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
+        InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
+                                      ClrNode, InFlag).getValue(1);
+      }
+    }
+
+    if (foldedLoad) {
+      SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
+                        InFlag };
+      SDNode *CNode =
+        CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops,
+                               array_lengthof(Ops));
+      InFlag = SDValue(CNode, 1);
+      // Update the chain.
+      ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
+    } else {
+      InFlag =
+        SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
+    }
+
+    // Prevent use of AH in a REX instruction by referencing AX instead.
+    // Shift it down 8 bits.
+    if (HiReg == X86::AH && Subtarget->is64Bit() &&
+        !SDValue(Node, 1).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              X86::AX, MVT::i16, InFlag);
+      InFlag = Result.getValue(2);
+
+      // If we also need AL (the quotient), get it by extracting a subreg from
+      // Result. The fast register allocator does not like multiple CopyFromReg
+      // nodes using aliasing registers.
+      if (!SDValue(Node, 0).use_empty())
+        ReplaceUses(SDValue(Node, 0),
+          CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+      // Shift AX right by 8 bits instead of using AH.
+      Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+                                         Result,
+                                         CurDAG->getTargetConstant(8, MVT::i8)),
+                       0);
+      ReplaceUses(SDValue(Node, 1),
+        CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+    }
+    // Copy the division (low) result, if it is needed.
+    if (!SDValue(Node, 0).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                                LoReg, NVT, InFlag);
+      InFlag = Result.getValue(2);
+      ReplaceUses(SDValue(Node, 0), Result);
+      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+    }
+    // Copy the remainder (high) result, if it is needed.
+    if (!SDValue(Node, 1).use_empty()) {
+      SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+                                              HiReg, NVT, InFlag);
+      InFlag = Result.getValue(2);
+      ReplaceUses(SDValue(Node, 1), Result);
+      DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
+    }
+    return NULL;
+  }
+
+  case X86ISD::CMP: {
+    SDValue N0 = Node->getOperand(0);
+    SDValue N1 = Node->getOperand(1);
+
+    // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
+    // use a smaller encoding.
+    if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+        HasNoSignedComparisonUses(Node))
+      // Look past the truncate if CMP is the only use of it.
+      N0 = N0.getOperand(0);
+    if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
+        N0.getValueType() != MVT::i8 &&
+        X86::isZeroNode(N1)) {
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
+      if (!C) break;
+
+      // For example, convert "testl %eax, $8" to "testb %al, $8"
+      if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
+          (!(C->getZExtValue() & 0x80) ||
+           HasNoSignedComparisonUses(Node))) {
+        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i8);
+        SDValue Reg = N0.getNode()->getOperand(0);
+
+        // On x86-32, only the ABCD registers have 8-bit subregisters.
+        if (!Subtarget->is64Bit()) {
+          TargetRegisterClass *TRC = 0;
+          switch (N0.getValueType().getSimpleVT().SimpleTy) {
+          case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
+          case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
+          default: llvm_unreachable("Unsupported TEST operand type!");
+          }
+          SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+          Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
+                                               Reg.getValueType(), Reg, RC), 0);
+        }
+
+        // Extract the l-register.
+        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
+                                                        MVT::i8, Reg);
+
+        // Emit a testb.
+        return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32, Subreg, Imm);
+      }
+
+      // For example, "testl %eax, $2048" to "testb %ah, $8".
+      if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
+          (!(C->getZExtValue() & 0x8000) ||
+           HasNoSignedComparisonUses(Node))) {
+        // Shift the immediate right by 8 bits.
+        SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
+                                                       MVT::i8);
+        SDValue Reg = N0.getNode()->getOperand(0);
+
+        // Put the value in an ABCD register.
+        TargetRegisterClass *TRC = 0;
+        switch (N0.getValueType().getSimpleVT().SimpleTy) {
+        case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
+        case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
+        case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
+        default: llvm_unreachable("Unsupported TEST operand type!");
+        }
+        SDValue RC = CurDAG->getTargetConstant(TRC->getID(), MVT::i32);
+        Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
+                                             Reg.getValueType(), Reg, RC), 0);
+
+        // Extract the h-register.
+        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
+                                                        MVT::i8, Reg);
+
+        // Emit a testb. No special NOREX tricks are needed since there's
+        // only one GPR operand!
+        return CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
+                                      Subreg, ShiftedImm);
+      }
+
+      // For example, "testl %eax, $32776" to "testw %ax, $32776".
+      if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
+          N0.getValueType() != MVT::i16 &&
+          (!(C->getZExtValue() & 0x8000) ||
+           HasNoSignedComparisonUses(Node))) {
+        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i16);
+        SDValue Reg = N0.getNode()->getOperand(0);
+
+        // Extract the 16-bit subregister.
+        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
+                                                        MVT::i16, Reg);
+
+        // Emit a testw.
+        return CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32, Subreg, Imm);
+      }
+
+      // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
+      if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
+          N0.getValueType() == MVT::i64 &&
+          (!(C->getZExtValue() & 0x80000000) ||
+           HasNoSignedComparisonUses(Node))) {
+        SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), MVT::i32);
+        SDValue Reg = N0.getNode()->getOperand(0);
+
+        // Extract the 32-bit subregister.
+        SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
+                                                        MVT::i32, Reg);
+
+        // Emit a testl.
+        return CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32, Subreg, Imm);
+      }
+    }
+    break;
+  }
+  }
+
+  SDNode *ResNode = SelectCode(Node);
+
+  DEBUG(dbgs() << "=> ";
+        if (ResNode == NULL || ResNode == Node)
+          Node->dump(CurDAG);
+        else
+          ResNode->dump(CurDAG);
+        dbgs() << '\n');
+
+  return ResNode;
+}
+
+bool X86DAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                             std::vector<SDValue> &OutOps) {
+  SDValue Op0, Op1, Op2, Op3, Op4;
+  switch (ConstraintCode) {
+  case 'o':   // offsetable        ??
+  case 'v':   // not offsetable    ??
+  default: return true;
+  case 'm':   // memory
+    if (!SelectAddr(0, Op, Op0, Op1, Op2, Op3, Op4))
+      return true;
+    break;
+  }
+  
+  OutOps.push_back(Op0);
+  OutOps.push_back(Op1);
+  OutOps.push_back(Op2);
+  OutOps.push_back(Op3);
+  OutOps.push_back(Op4);
+  return false;
+}
+
+/// createX86ISelDag - This pass converts a legalized DAG into a 
+/// X86-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
+                                     llvm::CodeGenOpt::Level OptLevel) {
+  return new X86DAGToDAGISel(TM, OptLevel);
+}
diff --git a/final/lib/Target/X86/X86ISelLowering.cpp b/final/lib/Target/X86/X86ISelLowering.cpp
new file mode 100644
index 00000000000..d42b91b61cb
--- /dev/null
+++ b/final/lib/Target/X86/X86ISelLowering.cpp
@@ -0,0 +1,12814 @@
+//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that X86 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-isel"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
+#include "X86TargetMachine.h"
+#include "X86TargetObjectFile.h"
+#include "Utils/X86ShuffleDecode.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace dwarf;
+
+STATISTIC(NumTailCalls, "Number of tail calls");
+
+// Forward declarations.
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+                       SDValue V2);
+
+static SDValue Insert128BitVector(SDValue Result,
+                                  SDValue Vec,
+                                  SDValue Idx,
+                                  SelectionDAG &DAG,
+                                  DebugLoc dl);
+
+static SDValue Extract128BitVector(SDValue Vec,
+                                   SDValue Idx,
+                                   SelectionDAG &DAG,
+                                   DebugLoc dl);
+
+static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG);
+
+
+/// Generate a DAG to grab 128-bits from a vector > 128 bits.  This
+/// sets things up to match to an AVX VEXTRACTF128 instruction or a
+/// simple subregister reference.  Idx is an index in the 128 bits we
+/// want.  It need not be aligned to a 128-bit bounday.  That makes
+/// lowering EXTRACT_VECTOR_ELT operations easier.
+static SDValue Extract128BitVector(SDValue Vec,
+                                   SDValue Idx,
+                                   SelectionDAG &DAG,
+                                   DebugLoc dl) {
+  EVT VT = Vec.getValueType();
+  assert(VT.getSizeInBits() == 256 && "Unexpected vector size!");
+
+  EVT ElVT = VT.getVectorElementType();
+
+  int Factor = VT.getSizeInBits() / 128;
+
+  EVT ResultVT = EVT::getVectorVT(*DAG.getContext(),
+                                  ElVT,
+                                  VT.getVectorNumElements() / Factor);
+
+  // Extract from UNDEF is UNDEF.
+  if (Vec.getOpcode() == ISD::UNDEF)
+    return DAG.getNode(ISD::UNDEF, dl, ResultVT);
+
+  if (isa<ConstantSDNode>(Idx)) {
+    unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+    // Extract the relevant 128 bits.  Generate an EXTRACT_SUBVECTOR
+    // we can match to VEXTRACTF128.
+    unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+    // This is the index of the first element of the 128-bit chunk
+    // we want.
+    unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+                                 * ElemsPerChunk);
+
+    SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+
+    SDValue Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec,
+                                 VecIdx);
+
+    return Result;
+  }
+
+  return SDValue();
+}
+
+/// Generate a DAG to put 128-bits into a vector > 128 bits.  This
+/// sets things up to match to an AVX VINSERTF128 instruction or a
+/// simple superregister reference.  Idx is an index in the 128 bits
+/// we want.  It need not be aligned to a 128-bit bounday.  That makes
+/// lowering INSERT_VECTOR_ELT operations easier.
+static SDValue Insert128BitVector(SDValue Result,
+                                  SDValue Vec,
+                                  SDValue Idx,
+                                  SelectionDAG &DAG,
+                                  DebugLoc dl) {
+  if (isa<ConstantSDNode>(Idx)) {
+    EVT VT = Vec.getValueType();
+    assert(VT.getSizeInBits() == 128 && "Unexpected vector size!");
+
+    EVT ElVT = VT.getVectorElementType();
+
+    unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+    EVT ResultVT = Result.getValueType();
+
+    // Insert the relevant 128 bits.
+    unsigned ElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+    // This is the index of the first element of the 128-bit chunk
+    // we want.
+    unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / 128)
+                                 * ElemsPerChunk);
+
+    SDValue VecIdx = DAG.getConstant(NormalizedIdxVal, MVT::i32);
+
+    Result = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec,
+                         VecIdx);
+    return Result;
+  }
+
+  return SDValue();
+}
+
+/// Given two vectors, concat them.
+static SDValue ConcatVectors(SDValue Lower, SDValue Upper, SelectionDAG &DAG) {
+  DebugLoc dl = Lower.getDebugLoc();
+
+  assert(Lower.getValueType() == Upper.getValueType() && "Mismatched vectors!");
+
+  EVT VT = EVT::getVectorVT(*DAG.getContext(),
+                            Lower.getValueType().getVectorElementType(),
+                            Lower.getValueType().getVectorNumElements() * 2);
+
+  // TODO: Generalize to arbitrary vector length (this assumes 256-bit vectors).
+  assert(VT.getSizeInBits() == 256 && "Unsupported vector concat!");
+
+  // Insert the upper subvector.
+  SDValue Vec = Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, VT), Upper,
+                                   DAG.getConstant(
+                                     // This is half the length of the result
+                                     // vector.  Start inserting the upper 128
+                                     // bits here.
+                                     Lower.getValueType().getVectorNumElements(),
+                                     MVT::i32),
+                                   DAG, dl);
+
+  // Insert the lower subvector.
+  Vec = Insert128BitVector(Vec, Lower, DAG.getConstant(0, MVT::i32), DAG, dl);
+  return Vec;
+}
+
+static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
+  const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+  bool is64Bit = Subtarget->is64Bit();
+
+  if (Subtarget->isTargetEnvMacho()) {
+    if (is64Bit)
+      return new X8664_MachoTargetObjectFile();
+    return new TargetLoweringObjectFileMachO();
+  }
+
+  if (Subtarget->isTargetELF()) {
+    if (is64Bit)
+      return new X8664_ELFTargetObjectFile(TM);
+    return new X8632_ELFTargetObjectFile(TM);
+  }
+  if (Subtarget->isTargetCOFF() && !Subtarget->isTargetEnvMacho())
+    return new TargetLoweringObjectFileCOFF();
+  llvm_unreachable("unknown subtarget type");
+}
+
+X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
+  : TargetLowering(TM, createTLOF(TM)) {
+  Subtarget = &TM.getSubtarget<X86Subtarget>();
+  X86ScalarSSEf64 = Subtarget->hasXMMInt();
+  X86ScalarSSEf32 = Subtarget->hasXMM();
+  X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
+
+  RegInfo = TM.getRegisterInfo();
+  TD = getTargetData();
+
+  // Set up the TargetLowering object.
+  static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 };
+
+  // X86 is weird, it always uses i8 for shift amounts and setcc results.
+  setBooleanContents(ZeroOrOneBooleanContent);
+    
+  // For 64-bit since we have so many registers use the ILP scheduler, for
+  // 32-bit code use the register pressure specific scheduling.
+  if (Subtarget->is64Bit())
+    setSchedulingPreference(Sched::ILP);
+  else
+    setSchedulingPreference(Sched::RegPressure);
+  setStackPointerRegisterToSaveRestore(X86StackPtr);
+
+  if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
+    // Setup Windows compiler runtime calls.
+    setLibcallName(RTLIB::SDIV_I64, "_alldiv");
+    setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
+    setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
+    setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
+    setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
+    setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
+  }
+
+  if (Subtarget->isTargetDarwin()) {
+    // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
+    setUseUnderscoreSetJmp(false);
+    setUseUnderscoreLongJmp(false);
+  } else if (Subtarget->isTargetMingw()) {
+    // MS runtime is weird: it exports _setjmp, but longjmp!
+    setUseUnderscoreSetJmp(true);
+    setUseUnderscoreLongJmp(false);
+  } else {
+    setUseUnderscoreSetJmp(true);
+    setUseUnderscoreLongJmp(true);
+  }
+
+  // Set up the register classes.
+  addRegisterClass(MVT::i8, X86::GR8RegisterClass);
+  addRegisterClass(MVT::i16, X86::GR16RegisterClass);
+  addRegisterClass(MVT::i32, X86::GR32RegisterClass);
+  if (Subtarget->is64Bit())
+    addRegisterClass(MVT::i64, X86::GR64RegisterClass);
+
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+  // We don't accept any truncstore of integer registers.
+  setTruncStoreAction(MVT::i64, MVT::i32, Expand);
+  setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+  setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
+  setTruncStoreAction(MVT::i32, MVT::i16, Expand);
+  setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
+  setTruncStoreAction(MVT::i16, MVT::i8,  Expand);
+
+  // SETOEQ and SETUNE require checking two conditions.
+  setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
+  setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
+  setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
+  setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
+
+  // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
+  // operation.
+  setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
+  setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
+  setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
+
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
+    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Expand);
+  } else if (!UseSoftFloat) {
+    // We have an algorithm for SSE2->double, and we turn this into a
+    // 64-bit FILD followed by conditional FADD for other targets.
+    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
+    // We have an algorithm for SSE2, and we turn this into a 64-bit
+    // FILD for other targets.
+    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Custom);
+  }
+
+  // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
+  // this operation.
+  setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
+  setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
+
+  if (!UseSoftFloat) {
+    // SSE has no i16 to fp conversion, only i32
+    if (X86ScalarSSEf32) {
+      setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
+      // f32 and f64 cases are Legal, f80 case is not
+      setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
+    } else {
+      setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
+      setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
+    }
+  } else {
+    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
+    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Promote);
+  }
+
+  // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
+  // are Legal, f80 is custom lowered.
+  setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
+  setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
+
+  // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
+  // this operation.
+  setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
+  setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
+
+  if (X86ScalarSSEf32) {
+    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
+    // f32 and f64 cases are Legal, f80 case is not
+    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
+  } else {
+    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
+    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
+  }
+
+  // Handle FP_TO_UINT by promoting the destination to a larger signed
+  // conversion.
+  setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
+  setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
+  setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
+
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
+    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
+  } else if (!UseSoftFloat) {
+    if (X86ScalarSSEf32 && !Subtarget->hasSSE3())
+      // Expand FP_TO_UINT into a select.
+      // FIXME: We would like to use a Custom expander here eventually to do
+      // the optimal thing for SSE vs. the default expansion in the legalizer.
+      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
+    else
+      // With SSE3 we can use fisttpll to convert to a signed i64; without
+      // SSE, we're stuck with a fistpll.
+      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
+  }
+
+  // TODO: when we have SSE, these could be more efficient, by using movd/movq.
+  if (!X86ScalarSSEf64) {
+    setOperationAction(ISD::BITCAST        , MVT::f32  , Expand);
+    setOperationAction(ISD::BITCAST        , MVT::i32  , Expand);
+    if (Subtarget->is64Bit()) {
+      setOperationAction(ISD::BITCAST      , MVT::f64  , Expand);
+      // Without SSE, i64->f64 goes through memory.
+      setOperationAction(ISD::BITCAST      , MVT::i64  , Expand);
+    }
+  }
+
+  // Scalar integer divide and remainder are lowered to use operations that
+  // produce two results, to match the available instructions. This exposes
+  // the two-result form to trivial CSE, which is able to combine x/y and x%y
+  // into a single instruction.
+  //
+  // Scalar integer multiply-high is also lowered to use two-result
+  // operations, to match the available instructions. However, plain multiply
+  // (low) operations are left as Legal, as there are single-result
+  // instructions for this in x86. Using the two-result multiply instructions
+  // when both high and low results are needed must be arranged by dagcombine.
+  for (unsigned i = 0, e = 4; i != e; ++i) {
+    MVT VT = IntVTs[i];
+    setOperationAction(ISD::MULHS, VT, Expand);
+    setOperationAction(ISD::MULHU, VT, Expand);
+    setOperationAction(ISD::SDIV, VT, Expand);
+    setOperationAction(ISD::UDIV, VT, Expand);
+    setOperationAction(ISD::SREM, VT, Expand);
+    setOperationAction(ISD::UREM, VT, Expand);
+
+    // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
+    setOperationAction(ISD::ADDC, VT, Custom);
+    setOperationAction(ISD::ADDE, VT, Custom);
+    setOperationAction(ISD::SUBC, VT, Custom);
+    setOperationAction(ISD::SUBE, VT, Custom);
+  }
+
+  setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
+  setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
+  setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
+  setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
+  if (Subtarget->is64Bit())
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Legal);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Legal);
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
+  setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
+  setOperationAction(ISD::FREM             , MVT::f32  , Expand);
+  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
+  setOperationAction(ISD::FREM             , MVT::f80  , Expand);
+  setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
+
+  setOperationAction(ISD::CTTZ             , MVT::i8   , Custom);
+  setOperationAction(ISD::CTLZ             , MVT::i8   , Custom);
+  setOperationAction(ISD::CTTZ             , MVT::i16  , Custom);
+  setOperationAction(ISD::CTLZ             , MVT::i16  , Custom);
+  setOperationAction(ISD::CTTZ             , MVT::i32  , Custom);
+  setOperationAction(ISD::CTLZ             , MVT::i32  , Custom);
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::CTTZ           , MVT::i64  , Custom);
+    setOperationAction(ISD::CTLZ           , MVT::i64  , Custom);
+  }
+
+  if (Subtarget->hasPOPCNT()) {
+    setOperationAction(ISD::CTPOP          , MVT::i8   , Promote);
+  } else {
+    setOperationAction(ISD::CTPOP          , MVT::i8   , Expand);
+    setOperationAction(ISD::CTPOP          , MVT::i16  , Expand);
+    setOperationAction(ISD::CTPOP          , MVT::i32  , Expand);
+    if (Subtarget->is64Bit())
+      setOperationAction(ISD::CTPOP        , MVT::i64  , Expand);
+  }
+
+  setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
+  setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);
+
+  // These should be promoted to a larger select which is supported.
+  setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
+  // X86 wants to expand cmov itself.
+  setOperationAction(ISD::SELECT          , MVT::i8   , Custom);
+  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
+  setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
+  setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
+  setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
+  setOperationAction(ISD::SELECT          , MVT::f80  , Custom);
+  setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
+  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
+  setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
+  setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
+  setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
+  setOperationAction(ISD::SETCC           , MVT::f80  , Custom);
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
+    setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
+  }
+  setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
+
+  // Darwin ABI issue.
+  setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
+  setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
+  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i32  , Custom);
+  if (Subtarget->is64Bit())
+    setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
+  setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
+  setOperationAction(ISD::BlockAddress    , MVT::i32  , Custom);
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
+    setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
+    setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
+    setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
+    setOperationAction(ISD::BlockAddress  , MVT::i64  , Custom);
+  }
+  // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
+  setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
+  setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
+  setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::SHL_PARTS     , MVT::i64  , Custom);
+    setOperationAction(ISD::SRA_PARTS     , MVT::i64  , Custom);
+    setOperationAction(ISD::SRL_PARTS     , MVT::i64  , Custom);
+  }
+
+  if (Subtarget->hasXMM())
+    setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);
+
+  // We may not have a libcall for MEMBARRIER so we should lower this.
+  setOperationAction(ISD::MEMBARRIER    , MVT::Other, Custom);
+
+  // On X86 and X86-64, atomic operations are lowered to locked instructions.
+  // Locked instructions, in turn, have implicit fence semantics (all memory
+  // operations are flushed before issuing the locked instruction, and they
+  // are not buffered), so we can fold away the common pattern of
+  // fence-atomic-fence.
+  setShouldFoldAtomicFences(true);
+
+  // Expand certain atomics
+  for (unsigned i = 0, e = 4; i != e; ++i) {
+    MVT VT = IntVTs[i];
+    setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
+  }
+
+  if (!Subtarget->is64Bit()) {
+    setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom);
+    setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom);
+  }
+
+  // FIXME - use subtarget debug flags
+  if (!Subtarget->isTargetDarwin() &&
+      !Subtarget->isTargetELF() &&
+      !Subtarget->isTargetCygMing()) {
+    setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+  }
+
+  setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
+  setOperationAction(ISD::EHSELECTION,   MVT::i64, Expand);
+  setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
+  setOperationAction(ISD::EHSELECTION,   MVT::i32, Expand);
+  if (Subtarget->is64Bit()) {
+    setExceptionPointerRegister(X86::RAX);
+    setExceptionSelectorRegister(X86::RDX);
+  } else {
+    setExceptionPointerRegister(X86::EAX);
+    setExceptionSelectorRegister(X86::EDX);
+  }
+  setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
+  setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
+
+  setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+
+  setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
+  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
+  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
+  if (Subtarget->is64Bit()) {
+    setOperationAction(ISD::VAARG           , MVT::Other, Custom);
+    setOperationAction(ISD::VACOPY          , MVT::Other, Custom);
+  } else {
+    setOperationAction(ISD::VAARG           , MVT::Other, Expand);
+    setOperationAction(ISD::VACOPY          , MVT::Other, Expand);
+  }
+
+  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
+  if (Subtarget->is64Bit())
+    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
+  if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows())
+    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
+  else
+    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
+  if (!UseSoftFloat && X86ScalarSSEf64) {
+    // f32 and f64 use SSE.
+    // Set up the FP register classes.
+    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
+    addRegisterClass(MVT::f64, X86::FR64RegisterClass);
+
+    // Use ANDPD to simulate FABS.
+    setOperationAction(ISD::FABS , MVT::f64, Custom);
+    setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+    // Use XORP to simulate FNEG.
+    setOperationAction(ISD::FNEG , MVT::f64, Custom);
+    setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+    // Use ANDPD and ORPD to simulate FCOPYSIGN.
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+    // We don't support sin/cos/fmod
+    setOperationAction(ISD::FSIN , MVT::f64, Expand);
+    setOperationAction(ISD::FCOS , MVT::f64, Expand);
+    setOperationAction(ISD::FSIN , MVT::f32, Expand);
+    setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+    // Expand FP immediates into loads from the stack, except for the special
+    // cases we handle.
+    addLegalFPImmediate(APFloat(+0.0)); // xorpd
+    addLegalFPImmediate(APFloat(+0.0f)); // xorps
+  } else if (!UseSoftFloat && X86ScalarSSEf32) {
+    // Use SSE for f32, x87 for f64.
+    // Set up the FP register classes.
+    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
+    addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+
+    // Use ANDPS to simulate FABS.
+    setOperationAction(ISD::FABS , MVT::f32, Custom);
+
+    // Use XORP to simulate FNEG.
+    setOperationAction(ISD::FNEG , MVT::f32, Custom);
+
+    setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
+
+    // Use ANDPS and ORPS to simulate FCOPYSIGN.
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
+
+    // We don't support sin/cos/fmod
+    setOperationAction(ISD::FSIN , MVT::f32, Expand);
+    setOperationAction(ISD::FCOS , MVT::f32, Expand);
+
+    // Special cases we handle for FP constants.
+    addLegalFPImmediate(APFloat(+0.0f)); // xorps
+    addLegalFPImmediate(APFloat(+0.0)); // FLD0
+    addLegalFPImmediate(APFloat(+1.0)); // FLD1
+    addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+    addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+
+    if (!UnsafeFPMath) {
+      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
+      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
+    }
+  } else if (!UseSoftFloat) {
+    // f32 and f64 in x87.
+    // Set up the FP register classes.
+    addRegisterClass(MVT::f64, X86::RFP64RegisterClass);
+    addRegisterClass(MVT::f32, X86::RFP32RegisterClass);
+
+    setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
+    setOperationAction(ISD::UNDEF,     MVT::f32, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+
+    if (!UnsafeFPMath) {
+      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
+      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
+    }
+    addLegalFPImmediate(APFloat(+0.0)); // FLD0
+    addLegalFPImmediate(APFloat(+1.0)); // FLD1
+    addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
+    addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
+    addLegalFPImmediate(APFloat(+0.0f)); // FLD0
+    addLegalFPImmediate(APFloat(+1.0f)); // FLD1
+    addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
+    addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
+  }
+
+  // Long double always uses X87.
+  if (!UseSoftFloat) {
+    addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
+    setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
+    {
+      APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
+      addLegalFPImmediate(TmpFlt);  // FLD0
+      TmpFlt.changeSign();
+      addLegalFPImmediate(TmpFlt);  // FLD0/FCHS
+
+      bool ignored;
+      APFloat TmpFlt2(+1.0);
+      TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
+                      &ignored);
+      addLegalFPImmediate(TmpFlt2);  // FLD1
+      TmpFlt2.changeSign();
+      addLegalFPImmediate(TmpFlt2);  // FLD1/FCHS
+    }
+
+    if (!UnsafeFPMath) {
+      setOperationAction(ISD::FSIN           , MVT::f80  , Expand);
+      setOperationAction(ISD::FCOS           , MVT::f80  , Expand);
+    }
+  }
+
+  // Always use a library call for pow.
+  setOperationAction(ISD::FPOW             , MVT::f32  , Expand);
+  setOperationAction(ISD::FPOW             , MVT::f64  , Expand);
+  setOperationAction(ISD::FPOW             , MVT::f80  , Expand);
+
+  setOperationAction(ISD::FLOG, MVT::f80, Expand);
+  setOperationAction(ISD::FLOG2, MVT::f80, Expand);
+  setOperationAction(ISD::FLOG10, MVT::f80, Expand);
+  setOperationAction(ISD::FEXP, MVT::f80, Expand);
+  setOperationAction(ISD::FEXP2, MVT::f80, Expand);
+
+  // First set operation action for all vector types to either promote
+  // (for widening) or expand (for scalarization). Then we will selectively
+  // turn on ones that can be effectively codegen'd.
+  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+       VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
+    setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
+    setOperationAction(ISD::INSERT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand);
+    setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand);
+    setOperationAction(ISD::TRUNCATE,  (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SIGN_EXTEND,  (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::ZERO_EXTEND,  (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::ANY_EXTEND,  (MVT::SimpleValueType)VT, Expand);
+    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+      setTruncStoreAction((MVT::SimpleValueType)VT,
+                          (MVT::SimpleValueType)InnerVT, Expand);
+    setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+    setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+    setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
+  }
+
+  // FIXME: In order to prevent SSE instructions being expanded to MMX ones
+  // with -msoft-float, disable use of MMX as well.
+  if (!UseSoftFloat && Subtarget->hasMMX()) {
+    addRegisterClass(MVT::x86mmx, X86::VR64RegisterClass);
+    // No operations on x86mmx supported, everything uses intrinsics.
+  }
+
+  // MMX-sized vectors (other than x86mmx) are expected to be expanded
+  // into smaller operations.
+  setOperationAction(ISD::MULHS,              MVT::v8i8,  Expand);
+  setOperationAction(ISD::MULHS,              MVT::v4i16, Expand);
+  setOperationAction(ISD::MULHS,              MVT::v2i32, Expand);
+  setOperationAction(ISD::MULHS,              MVT::v1i64, Expand);
+  setOperationAction(ISD::AND,                MVT::v8i8,  Expand);
+  setOperationAction(ISD::AND,                MVT::v4i16, Expand);
+  setOperationAction(ISD::AND,                MVT::v2i32, Expand);
+  setOperationAction(ISD::AND,                MVT::v1i64, Expand);
+  setOperationAction(ISD::OR,                 MVT::v8i8,  Expand);
+  setOperationAction(ISD::OR,                 MVT::v4i16, Expand);
+  setOperationAction(ISD::OR,                 MVT::v2i32, Expand);
+  setOperationAction(ISD::OR,                 MVT::v1i64, Expand);
+  setOperationAction(ISD::XOR,                MVT::v8i8,  Expand);
+  setOperationAction(ISD::XOR,                MVT::v4i16, Expand);
+  setOperationAction(ISD::XOR,                MVT::v2i32, Expand);
+  setOperationAction(ISD::XOR,                MVT::v1i64, Expand);
+  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i8,  Expand);
+  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v4i16, Expand);
+  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v2i32, Expand);
+  setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v1i64, Expand);
+  setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v1i64, Expand);
+  setOperationAction(ISD::SELECT,             MVT::v8i8,  Expand);
+  setOperationAction(ISD::SELECT,             MVT::v4i16, Expand);
+  setOperationAction(ISD::SELECT,             MVT::v2i32, Expand);
+  setOperationAction(ISD::SELECT,             MVT::v1i64, Expand);
+  setOperationAction(ISD::BITCAST,            MVT::v8i8,  Expand);
+  setOperationAction(ISD::BITCAST,            MVT::v4i16, Expand);
+  setOperationAction(ISD::BITCAST,            MVT::v2i32, Expand);
+  setOperationAction(ISD::BITCAST,            MVT::v1i64, Expand);
+
+  if (!UseSoftFloat && Subtarget->hasXMM()) {
+    addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
+
+    setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
+    setOperationAction(ISD::FSUB,               MVT::v4f32, Legal);
+    setOperationAction(ISD::FMUL,               MVT::v4f32, Legal);
+    setOperationAction(ISD::FDIV,               MVT::v4f32, Legal);
+    setOperationAction(ISD::FSQRT,              MVT::v4f32, Legal);
+    setOperationAction(ISD::FNEG,               MVT::v4f32, Custom);
+    setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
+    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+    setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v4f32, Custom);
+  }
+
+  if (!UseSoftFloat && Subtarget->hasXMMInt()) {
+    addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
+
+    // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM
+    // registers cannot be used even for integer operations.
+    addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
+    addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
+    addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
+    addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
+
+    setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
+    setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
+    setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
+    setOperationAction(ISD::ADD,                MVT::v2i64, Legal);
+    setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
+    setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
+    setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
+    setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
+    setOperationAction(ISD::SUB,                MVT::v2i64, Legal);
+    setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
+    setOperationAction(ISD::FADD,               MVT::v2f64, Legal);
+    setOperationAction(ISD::FSUB,               MVT::v2f64, Legal);
+    setOperationAction(ISD::FMUL,               MVT::v2f64, Legal);
+    setOperationAction(ISD::FDIV,               MVT::v2f64, Legal);
+    setOperationAction(ISD::FSQRT,              MVT::v2f64, Legal);
+    setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);
+
+    setOperationAction(ISD::VSETCC,             MVT::v2f64, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v16i8, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v8i16, Custom);
+    setOperationAction(ISD::VSETCC,             MVT::v4i32, Custom);
+
+    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
+    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
+
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v2f64, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v2i64, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i8, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i16, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4i32, Custom);
+
+    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
+    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) {
+      EVT VT = (MVT::SimpleValueType)i;
+      // Do not attempt to custom lower non-power-of-2 vectors
+      if (!isPowerOf2_32(VT.getVectorNumElements()))
+        continue;
+      // Do not attempt to custom lower non-128-bit vectors
+      if (!VT.is128BitVector())
+        continue;
+      setOperationAction(ISD::BUILD_VECTOR,
+                         VT.getSimpleVT().SimpleTy, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE,
+                         VT.getSimpleVT().SimpleTy, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT,
+                         VT.getSimpleVT().SimpleTy, Custom);
+    }
+
+    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
+    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2f64, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
+
+    if (Subtarget->is64Bit()) {
+      setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
+    }
+
+    // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
+    for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) {
+      MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+      EVT VT = SVT;
+
+      // Do not attempt to promote non-128-bit vectors
+      if (!VT.is128BitVector())
+        continue;
+
+      setOperationAction(ISD::AND,    SVT, Promote);
+      AddPromotedToType (ISD::AND,    SVT, MVT::v2i64);
+      setOperationAction(ISD::OR,     SVT, Promote);
+      AddPromotedToType (ISD::OR,     SVT, MVT::v2i64);
+      setOperationAction(ISD::XOR,    SVT, Promote);
+      AddPromotedToType (ISD::XOR,    SVT, MVT::v2i64);
+      setOperationAction(ISD::LOAD,   SVT, Promote);
+      AddPromotedToType (ISD::LOAD,   SVT, MVT::v2i64);
+      setOperationAction(ISD::SELECT, SVT, Promote);
+      AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64);
+    }
+
+    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+    // Custom lower v2i64 and v2f64 selects.
+    setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
+    setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
+    setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
+
+    setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
+  }
+
+  if (Subtarget->hasSSE41()) {
+    setOperationAction(ISD::FFLOOR,             MVT::f32,   Legal);
+    setOperationAction(ISD::FCEIL,              MVT::f32,   Legal);
+    setOperationAction(ISD::FTRUNC,             MVT::f32,   Legal);
+    setOperationAction(ISD::FRINT,              MVT::f32,   Legal);
+    setOperationAction(ISD::FNEARBYINT,         MVT::f32,   Legal);
+    setOperationAction(ISD::FFLOOR,             MVT::f64,   Legal);
+    setOperationAction(ISD::FCEIL,              MVT::f64,   Legal);
+    setOperationAction(ISD::FTRUNC,             MVT::f64,   Legal);
+    setOperationAction(ISD::FRINT,              MVT::f64,   Legal);
+    setOperationAction(ISD::FNEARBYINT,         MVT::f64,   Legal);
+
+    // FIXME: Do we need to handle scalar-to-vector here?
+    setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
+
+    // Can turn SHL into an integer multiply.
+    setOperationAction(ISD::SHL,                MVT::v4i32, Custom);
+    setOperationAction(ISD::SHL,                MVT::v16i8, Custom);
+
+    // i8 and i16 vectors are custom , because the source register and source
+    // source memory operand types are not the same width.  f32 vectors are
+    // custom since the immediate controlling the insert encodes additional
+    // information.
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i8, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
+    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
+
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
+
+    if (Subtarget->is64Bit()) {
+      setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v2i64, Legal);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
+    }
+  }
+
+  if (Subtarget->hasSSE42())
+    setOperationAction(ISD::VSETCC,             MVT::v2i64, Custom);
+
+  if (!UseSoftFloat && Subtarget->hasAVX()) {
+    addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
+    addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
+
+    setOperationAction(ISD::LOAD,               MVT::v8f32, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v8i32, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v4f64, Legal);
+    setOperationAction(ISD::LOAD,               MVT::v4i64, Legal);
+
+    setOperationAction(ISD::FADD,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FSUB,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FMUL,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FDIV,               MVT::v8f32, Legal);
+    setOperationAction(ISD::FSQRT,              MVT::v8f32, Legal);
+    setOperationAction(ISD::FNEG,               MVT::v8f32, Custom);
+
+    setOperationAction(ISD::FADD,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FSUB,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FMUL,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FDIV,               MVT::v4f64, Legal);
+    setOperationAction(ISD::FSQRT,              MVT::v4f64, Legal);
+    setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);
+
+    // Custom lower build_vector, vector_shuffle, scalar_to_vector,
+    // insert_vector_elt extract_subvector and extract_vector_elt for
+    // 256-bit types.
+    for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+         ++i) {
+      MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+      // Do not attempt to custom lower non-256-bit vectors
+      if (!isPowerOf2_32(MVT(VT).getVectorNumElements())
+          || (MVT(VT).getSizeInBits() < 256))
+        continue;
+      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
+      setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+      setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
+    }
+    // Custom-lower insert_subvector and extract_subvector based on
+    // the result type.
+    for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+         ++i) {
+      MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
+      // Do not attempt to custom lower non-256-bit vectors
+      if (!isPowerOf2_32(MVT(VT).getVectorNumElements()))
+        continue;
+
+      if (MVT(VT).getSizeInBits() == 128) {
+        setOperationAction(ISD::EXTRACT_SUBVECTOR,  VT, Custom);
+      }
+      else if (MVT(VT).getSizeInBits() == 256) {
+        setOperationAction(ISD::INSERT_SUBVECTOR,  VT, Custom);
+      }
+    }
+
+    // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
+    // Don't promote loads because we need them for VPERM vector index versions.
+
+    for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE;
+         VT++) {
+      if (!isPowerOf2_32(MVT((MVT::SimpleValueType)VT).getVectorNumElements())
+          || (MVT((MVT::SimpleValueType)VT).getSizeInBits() < 256))
+        continue;
+      setOperationAction(ISD::AND,    (MVT::SimpleValueType)VT, Promote);
+      AddPromotedToType (ISD::AND,    (MVT::SimpleValueType)VT, MVT::v4i64);
+      setOperationAction(ISD::OR,     (MVT::SimpleValueType)VT, Promote);
+      AddPromotedToType (ISD::OR,     (MVT::SimpleValueType)VT, MVT::v4i64);
+      setOperationAction(ISD::XOR,    (MVT::SimpleValueType)VT, Promote);
+      AddPromotedToType (ISD::XOR,    (MVT::SimpleValueType)VT, MVT::v4i64);
+      //setOperationAction(ISD::LOAD,   (MVT::SimpleValueType)VT, Promote);
+      //AddPromotedToType (ISD::LOAD,   (MVT::SimpleValueType)VT, MVT::v4i64);
+      setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote);
+      AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v4i64);
+    }
+  }
+
+  // We want to custom lower some of our intrinsics.
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+
+
+  // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
+  // handle type legalization for these operations here.
+  //
+  // FIXME: We really should do custom legalization for addition and
+  // subtraction on x86-32 once PR3203 is fixed.  We really can't do much better
+  // than generic legalization for 64-bit multiplication-with-overflow, though.
+  for (unsigned i = 0, e = 3+Subtarget->is64Bit(); i != e; ++i) {
+    // Add/Sub/Mul with overflow operations are custom lowered.
+    MVT VT = IntVTs[i];
+    setOperationAction(ISD::SADDO, VT, Custom);
+    setOperationAction(ISD::UADDO, VT, Custom);
+    setOperationAction(ISD::SSUBO, VT, Custom);
+    setOperationAction(ISD::USUBO, VT, Custom);
+    setOperationAction(ISD::SMULO, VT, Custom);
+    setOperationAction(ISD::UMULO, VT, Custom);
+  }
+
+  // There are no 8-bit 3-address imul/mul instructions
+  setOperationAction(ISD::SMULO, MVT::i8, Expand);
+  setOperationAction(ISD::UMULO, MVT::i8, Expand);
+
+  if (!Subtarget->is64Bit()) {
+    // These libcalls are not available in 32-bit.
+    setLibcallName(RTLIB::SHL_I128, 0);
+    setLibcallName(RTLIB::SRL_I128, 0);
+    setLibcallName(RTLIB::SRA_I128, 0);
+  }
+
+  // We have target-specific dag combine patterns for the following nodes:
+  setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+  setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+  setTargetDAGCombine(ISD::BUILD_VECTOR);
+  setTargetDAGCombine(ISD::SELECT);
+  setTargetDAGCombine(ISD::SHL);
+  setTargetDAGCombine(ISD::SRA);
+  setTargetDAGCombine(ISD::SRL);
+  setTargetDAGCombine(ISD::OR);
+  setTargetDAGCombine(ISD::AND);
+  setTargetDAGCombine(ISD::ADD);
+  setTargetDAGCombine(ISD::SUB);
+  setTargetDAGCombine(ISD::STORE);
+  setTargetDAGCombine(ISD::ZERO_EXTEND);
+  if (Subtarget->is64Bit())
+    setTargetDAGCombine(ISD::MUL);
+
+  computeRegisterProperties();
+
+  // On Darwin, -Os means optimize for size without hurting performance,
+  // do not reduce the limit.
+  maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
+  maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 16 : 8;
+  maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
+  maxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  maxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
+  maxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
+  setPrefLoopAlignment(16);
+  benefitFromCodePlacementOpt = true;
+}
+
+
+MVT::SimpleValueType X86TargetLowering::getSetCCResultType(EVT VT) const {
+  return MVT::i8;
+}
+
+
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
+  if (MaxAlign == 16)
+    return;
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+    if (VTy->getBitWidth() == 128)
+      MaxAlign = 16;
+  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    unsigned EltAlign = 0;
+    getMaxByValAlign(ATy->getElementType(), EltAlign);
+    if (EltAlign > MaxAlign)
+      MaxAlign = EltAlign;
+  } else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+      unsigned EltAlign = 0;
+      getMaxByValAlign(STy->getElementType(i), EltAlign);
+      if (EltAlign > MaxAlign)
+        MaxAlign = EltAlign;
+      if (MaxAlign == 16)
+        break;
+    }
+  }
+  return;
+}
+
+/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+/// function arguments in the caller parameter area. For X86, aggregates
+/// that contain SSE vectors are placed at 16-byte boundaries while the rest
+/// are at 4-byte boundaries.
+unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
+  if (Subtarget->is64Bit()) {
+    // Max of 8 and alignment of type.
+    unsigned TyAlign = TD->getABITypeAlignment(Ty);
+    if (TyAlign > 8)
+      return TyAlign;
+    return 8;
+  }
+
+  unsigned Align = 4;
+  if (Subtarget->hasXMM())
+    getMaxByValAlign(Ty, Align);
+  return Align;
+}
+
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove
+/// lowering. If DstAlign is zero that means it's safe to destination
+/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+/// means there isn't a need to check it against alignment requirement,
+/// probably because the source does not need to be loaded. If
+/// 'NonScalarIntSafe' is true, that means it's safe to return a
+/// non-scalar-integer type, e.g. empty string source, constant, or loaded
+/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+/// constant so it does not need to be loaded.
+/// It returns EVT::Other if the type should be determined using generic
+/// target-independent logic.
+EVT
+X86TargetLowering::getOptimalMemOpType(uint64_t Size,
+                                       unsigned DstAlign, unsigned SrcAlign,
+                                       bool NonScalarIntSafe,
+                                       bool MemcpyStrSrc,
+                                       MachineFunction &MF) const {
+  // FIXME: This turns off use of xmm stores for memset/memcpy on targets like
+  // linux.  This is because the stack realignment code can't handle certain
+  // cases like PR2962.  This should be removed when PR2962 is fixed.
+  const Function *F = MF.getFunction();
+  if (NonScalarIntSafe &&
+      !F->hasFnAttr(Attribute::NoImplicitFloat)) {
+    if (Size >= 16 &&
+        (Subtarget->isUnalignedMemAccessFast() ||
+         ((DstAlign == 0 || DstAlign >= 16) &&
+          (SrcAlign == 0 || SrcAlign >= 16))) &&
+        Subtarget->getStackAlignment() >= 16) {
+      if (Subtarget->hasSSE2())
+        return MVT::v4i32;
+      if (Subtarget->hasSSE1())
+        return MVT::v4f32;
+    } else if (!MemcpyStrSrc && Size >= 8 &&
+               !Subtarget->is64Bit() &&
+               Subtarget->getStackAlignment() >= 8 &&
+               Subtarget->hasXMMInt()) {
+      // Do not use f64 to lower memcpy if source is string constant. It's
+      // better to use i32 to avoid the loads.
+      return MVT::f64;
+    }
+  }
+  if (Subtarget->is64Bit() && Size >= 8)
+    return MVT::i64;
+  return MVT::i32;
+}
+
+/// getJumpTableEncoding - Return the entry encoding for a jump table in the
+/// current function.  The returned value is a member of the
+/// MachineJumpTableInfo::JTEntryKind enum.
+unsigned X86TargetLowering::getJumpTableEncoding() const {
+  // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
+  // symbol.
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+      Subtarget->isPICStyleGOT())
+    return MachineJumpTableInfo::EK_Custom32;
+
+  // Otherwise, use the normal jump table encoding heuristics.
+  return TargetLowering::getJumpTableEncoding();
+}
+
+const MCExpr *
+X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                                             const MachineBasicBlock *MBB,
+                                             unsigned uid,MCContext &Ctx) const{
+  assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+         Subtarget->isPICStyleGOT());
+  // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
+  // entries.
+  return MCSymbolRefExpr::Create(MBB->getSymbol(),
+                                 MCSymbolRefExpr::VK_GOTOFF, Ctx);
+}
+
+/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+/// jumptable.
+SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
+                                                    SelectionDAG &DAG) const {
+  if (!Subtarget->is64Bit())
+    // This doesn't have DebugLoc associated with it, but is not really the
+    // same as a Register.
+    return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy());
+  return Table;
+}
+
+/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the
+/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an
+/// MCExpr.
+const MCExpr *X86TargetLowering::
+getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
+                             MCContext &Ctx) const {
+  // X86-64 uses RIP relative addressing based on the jump table label.
+  if (Subtarget->isPICStyleRIPRel())
+    return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
+
+  // Otherwise, the reference is relative to the PIC base.
+  return MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
+  return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
+}
+
+// FIXME: Why this routine is here? Move to RegInfo!
+std::pair<const TargetRegisterClass*, uint8_t>
+X86TargetLowering::findRepresentativeClass(EVT VT) const{
+  const TargetRegisterClass *RRC = 0;
+  uint8_t Cost = 1;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:
+    return TargetLowering::findRepresentativeClass(VT);
+  case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
+    RRC = (Subtarget->is64Bit()
+           ? X86::GR64RegisterClass : X86::GR32RegisterClass);
+    break;
+  case MVT::x86mmx:
+    RRC = X86::VR64RegisterClass;
+    break;
+  case MVT::f32: case MVT::f64:
+  case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
+  case MVT::v4f32: case MVT::v2f64:
+  case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
+  case MVT::v4f64:
+    RRC = X86::VR128RegisterClass;
+    break;
+  }
+  return std::make_pair(RRC, Cost);
+}
+
+bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
+                                               unsigned &Offset) const {
+  if (!Subtarget->isTargetLinux())
+    return false;
+
+  if (Subtarget->is64Bit()) {
+    // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
+    Offset = 0x28;
+    if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
+      AddressSpace = 256;
+    else
+      AddressSpace = 257;
+  } else {
+    // %gs:0x14 on i386
+    Offset = 0x14;
+    AddressSpace = 256;
+  }
+  return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+//               Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "X86GenCallingConv.inc"
+
+bool
+X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+                        const SmallVectorImpl<ISD::OutputArg> &Outs,
+                        LLVMContext &Context) const {
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, Context);
+  return CCInfo.CheckReturn(Outs, RetCC_X86);
+}
+
+SDValue
+X86TargetLowering::LowerReturn(SDValue Chain,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               DebugLoc dl, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeReturn(Outs, RetCC_X86);
+
+  // Add the regs to the liveout set for the function.
+  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+  for (unsigned i = 0; i != RVLocs.size(); ++i)
+    if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg()))
+      MRI.addLiveOut(RVLocs[i].getLocReg());
+
+  SDValue Flag;
+
+  SmallVector<SDValue, 6> RetOps;
+  RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+  // Operand #1 = Bytes To Pop
+  RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(),
+                   MVT::i16));
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+    SDValue ValToCopy = OutVals[i];
+    EVT ValVT = ValToCopy.getValueType();
+
+    // If this is x86-64, and we disabled SSE, we can't return FP values,
+    // or SSE or MMX vectors.
+    if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
+         VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
+          (Subtarget->is64Bit() && !Subtarget->hasXMM())) {
+      report_fatal_error("SSE register return with SSE disabled");
+    }
+    // Likewise we can't return F64 values with SSE1 only.  gcc does so, but
+    // llvm-gcc has never done it right and no one has noticed, so this
+    // should be OK for now.
+    if (ValVT == MVT::f64 &&
+        (Subtarget->is64Bit() && !Subtarget->hasXMMInt()))
+      report_fatal_error("SSE2 register return with SSE2 disabled");
+
+    // Returns in ST0/ST1 are handled specially: these are pushed as operands to
+    // the RET instruction and handled by the FP Stackifier.
+    if (VA.getLocReg() == X86::ST0 ||
+        VA.getLocReg() == X86::ST1) {
+      // If this is a copy from an xmm register to ST(0), use an FPExtend to
+      // change the value to the FP stack register class.
+      if (isScalarFPTypeInSSEReg(VA.getValVT()))
+        ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
+      RetOps.push_back(ValToCopy);
+      // Don't emit a copytoreg.
+      continue;
+    }
+
+    // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
+    // which is returned in RAX / RDX.
+    if (Subtarget->is64Bit()) {
+      if (ValVT == MVT::x86mmx) {
+        if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+          ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ValToCopy);
+          ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
+                                  ValToCopy);
+          // If we don't have SSE2 available, convert to v4f32 so the generated
+          // register is legal.
+          if (!Subtarget->hasSSE2())
+            ValToCopy = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,ValToCopy);
+        }
+      }
+    }
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
+    Flag = Chain.getValue(1);
+  }
+
+  // The x86-64 ABI for returning structs by value requires that we copy
+  // the sret argument into %rax for the return. We saved the argument into
+  // a virtual register in the entry block, so now we copy the value out
+  // and into %rax.
+  if (Subtarget->is64Bit() &&
+      DAG.getMachineFunction().getFunction()->hasStructRetAttr()) {
+    MachineFunction &MF = DAG.getMachineFunction();
+    X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+    unsigned Reg = FuncInfo->getSRetReturnReg();
+    assert(Reg &&
+           "SRetReturnReg should have been set in LowerFormalArguments().");
+    SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
+
+    Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
+    Flag = Chain.getValue(1);
+
+    // RAX now acts like a return value.
+    MRI.addLiveOut(X86::RAX);
+  }
+
+  RetOps[0] = Chain;  // Update chain.
+
+  // Add the flag if we have it.
+  if (Flag.getNode())
+    RetOps.push_back(Flag);
+
+  return DAG.getNode(X86ISD::RET_FLAG, dl,
+                     MVT::Other, &RetOps[0], RetOps.size());
+}
+
+bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
+  if (N->getNumValues() != 1)
+    return false;
+  if (!N->hasNUsesOfValue(1, 0))
+    return false;
+
+  SDNode *Copy = *N->use_begin();
+  if (Copy->getOpcode() != ISD::CopyToReg &&
+      Copy->getOpcode() != ISD::FP_EXTEND)
+    return false;
+
+  bool HasRet = false;
+  for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
+       UI != UE; ++UI) {
+    if (UI->getOpcode() != X86ISD::RET_FLAG)
+      return false;
+    HasRet = true;
+  }
+
+  return HasRet;
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue
+X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                   CallingConv::ID CallConv, bool isVarArg,
+                                   const SmallVectorImpl<ISD::InputArg> &Ins,
+                                   DebugLoc dl, SelectionDAG &DAG,
+                                   SmallVectorImpl<SDValue> &InVals) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  bool Is64Bit = Subtarget->is64Bit();
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+  CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    EVT CopyVT = VA.getValVT();
+
+    // If this is x86-64, and we disabled SSE, we can't return FP values
+    if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
+        ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasXMM())) {
+      report_fatal_error("SSE register return with SSE disabled");
+    }
+
+    SDValue Val;
+
+    // If this is a call to a function that returns an fp value on the floating
+    // point stack, we must guarantee the the value is popped from the stack, so
+    // a CopyFromReg is not good enough - the copy instruction may be eliminated
+    // if the return value is not used. We use the FpGET_ST0 instructions
+    // instead.
+    if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
+      // If we prefer to use the value in xmm registers, copy it out as f80 and
+      // use a truncate to move it from fp stack reg to xmm reg.
+      if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
+      bool isST0 = VA.getLocReg() == X86::ST0;
+      unsigned Opc = 0;
+      if (CopyVT == MVT::f32) Opc = isST0 ? X86::FpGET_ST0_32:X86::FpGET_ST1_32;
+      if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64;
+      if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80;
+      SDValue Ops[] = { Chain, InFlag };
+      Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Glue,
+                                         Ops, 2), 1);
+      Val = Chain.getValue(0);
+
+      // Round the f80 to the right size, which also moves it to the appropriate
+      // xmm register.
+      if (CopyVT != VA.getValVT())
+        Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
+                          // This truncation won't change the value.
+                          DAG.getIntPtrConstant(1));
+    } else if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) {
+      // For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64.
+      if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
+        Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+                                   MVT::v2i64, InFlag).getValue(1);
+        Val = Chain.getValue(0);
+        Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
+                          Val, DAG.getConstant(0, MVT::i64));
+      } else {
+        Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+                                   MVT::i64, InFlag).getValue(1);
+        Val = Chain.getValue(0);
+      }
+      Val = DAG.getNode(ISD::BITCAST, dl, CopyVT, Val);
+    } else {
+      Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
+                                 CopyVT, InFlag).getValue(1);
+      Val = Chain.getValue(0);
+    }
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Val);
+  }
+
+  return Chain;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                C & StdCall & Fast Calling Convention implementation
+//===----------------------------------------------------------------------===//
+//  StdCall calling convention seems to be standard for many Windows' API
+//  routines and around. It differs from C calling convention just a little:
+//  callee should clean up the stack, not caller. Symbols should be also
+//  decorated in some fancy way :) It doesn't support any vector arguments.
+//  For info on fast calling convention see Fast Calling Convention (tail call)
+//  implementation LowerX86_32FastCCCallTo.
+
+/// CallIsStructReturn - Determines whether a call uses struct return
+/// semantics.
+static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) {
+  if (Outs.empty())
+    return false;
+
+  return Outs[0].Flags.isSRet();
+}
+
+/// ArgsAreStructReturn - Determines whether a function uses struct
+/// return semantics.
+static bool
+ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
+  if (Ins.empty())
+    return false;
+
+  return Ins[0].Flags.isSRet();
+}
+
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" with size and alignment information specified by
+/// the specific parameter attribute. The copy will be passed as a byval
+/// function parameter.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+                          ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
+                          DebugLoc dl) {
+  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
+  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
+                       /*isVolatile*/false, /*AlwaysInline=*/true,
+                       MachinePointerInfo(), MachinePointerInfo());
+}
+
+/// IsTailCallConvention - Return true if the calling convention is one that
+/// supports tail call optimization.
+static bool IsTailCallConvention(CallingConv::ID CC) {
+  return (CC == CallingConv::Fast || CC == CallingConv::GHC);
+}
+
+bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+  if (!CI->isTailCall())
+    return false;
+
+  CallSite CS(CI);
+  CallingConv::ID CalleeCC = CS.getCallingConv();
+  if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
+    return false;
+
+  return true;
+}
+
+/// FuncIsMadeTailCallSafe - Return true if the function is being made into
+/// a tailcall target by changing its ABI.
+static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
+  return GuaranteedTailCallOpt && IsTailCallConvention(CC);
+}
+
+SDValue
+X86TargetLowering::LowerMemArgument(SDValue Chain,
+                                    CallingConv::ID CallConv,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    const CCValAssign &VA,
+                                    MachineFrameInfo *MFI,
+                                    unsigned i) const {
+  // Create the nodes corresponding to a load from this parameter slot.
+  ISD::ArgFlagsTy Flags = Ins[i].Flags;
+  bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv);
+  bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+  EVT ValVT;
+
+  // If value is passed by pointer we have address passed instead of the value
+  // itself.
+  if (VA.getLocInfo() == CCValAssign::Indirect)
+    ValVT = VA.getLocVT();
+  else
+    ValVT = VA.getValVT();
+
+  // FIXME: For now, all byval parameter objects are marked mutable. This can be
+  // changed with more analysis.
+  // In case of tail call optimization mark all arguments mutable. Since they
+  // could be overwritten by lowering of arguments in case of a tail call.
+  if (Flags.isByVal()) {
+    int FI = MFI->CreateFixedObject(Flags.getByValSize(),
+                                    VA.getLocMemOffset(), isImmutable);
+    return DAG.getFrameIndex(FI, getPointerTy());
+  } else {
+    int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
+                                    VA.getLocMemOffset(), isImmutable);
+    SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+    return DAG.getLoad(ValVT, dl, Chain, FIN,
+                       MachinePointerInfo::getFixedStack(FI),
+                       false, false, 0);
+  }
+}
+
+SDValue
+X86TargetLowering::LowerFormalArguments(SDValue Chain,
+                                        CallingConv::ID CallConv,
+                                        bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                        DebugLoc dl,
+                                        SelectionDAG &DAG,
+                                        SmallVectorImpl<SDValue> &InVals)
+                                          const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+  const Function* Fn = MF.getFunction();
+  if (Fn->hasExternalLinkage() &&
+      Subtarget->isTargetCygMing() &&
+      Fn->getName() == "main")
+    FuncInfo->setForceFramePointer(true);
+
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  bool Is64Bit = Subtarget->is64Bit();
+  bool IsWin64 = Subtarget->isTargetWin64();
+
+  assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+         "Var args not supported with calling convention fastcc or ghc");
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+
+  // Allocate shadow area for Win64
+  if (IsWin64) {
+    CCInfo.AllocateStack(32, 8);
+  }
+
+  CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
+
+  unsigned LastVal = ~0U;
+  SDValue ArgValue;
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
+    // places.
+    assert(VA.getValNo() != LastVal &&
+           "Don't support value assigned to multiple locs yet");
+    LastVal = VA.getValNo();
+
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+      TargetRegisterClass *RC = NULL;
+      if (RegVT == MVT::i32)
+        RC = X86::GR32RegisterClass;
+      else if (Is64Bit && RegVT == MVT::i64)
+        RC = X86::GR64RegisterClass;
+      else if (RegVT == MVT::f32)
+        RC = X86::FR32RegisterClass;
+      else if (RegVT == MVT::f64)
+        RC = X86::FR64RegisterClass;
+      else if (RegVT.isVector() && RegVT.getSizeInBits() == 256)
+        RC = X86::VR256RegisterClass;
+      else if (RegVT.isVector() && RegVT.getSizeInBits() == 128)
+        RC = X86::VR128RegisterClass;
+      else if (RegVT == MVT::x86mmx)
+        RC = X86::VR64RegisterClass;
+      else
+        llvm_unreachable("Unknown argument type!");
+
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+      ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+      // If this is an 8 or 16-bit value, it is really passed promoted to 32
+      // bits.  Insert an assert[sz]ext to capture this, then truncate to the
+      // right size.
+      if (VA.getLocInfo() == CCValAssign::SExt)
+        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+      else if (VA.getLocInfo() == CCValAssign::ZExt)
+        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+      else if (VA.getLocInfo() == CCValAssign::BCvt)
+        ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
+
+      if (VA.isExtInLoc()) {
+        // Handle MMX values passed in XMM regs.
+        if (RegVT.isVector()) {
+          ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(),
+                                 ArgValue);
+        } else
+          ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+      }
+    } else {
+      assert(VA.isMemLoc());
+      ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
+    }
+
+    // If value is passed via pointer - do a load.
+    if (VA.getLocInfo() == CCValAssign::Indirect)
+      ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
+                             MachinePointerInfo(), false, false, 0);
+
+    InVals.push_back(ArgValue);
+  }
+
+  // The x86-64 ABI for returning structs by value requires that we copy
+  // the sret argument into %rax for the return. Save the argument into
+  // a virtual register so that we can access it from the return points.
+  if (Is64Bit && MF.getFunction()->hasStructRetAttr()) {
+    X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+    unsigned Reg = FuncInfo->getSRetReturnReg();
+    if (!Reg) {
+      Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64));
+      FuncInfo->setSRetReturnReg(Reg);
+    }
+    SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]);
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
+  }
+
+  unsigned StackSize = CCInfo.getNextStackOffset();
+  // Align stack specially for tail calls.
+  if (FuncIsMadeTailCallSafe(CallConv))
+    StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
+
+  // If the function takes variable number of arguments, make a frame index for
+  // the start of the first vararg value... for expansion of llvm.va_start.
+  if (isVarArg) {
+    if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+                    CallConv != CallingConv::X86_ThisCall)) {
+      FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
+    }
+    if (Is64Bit) {
+      unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
+
+      // FIXME: We should really autogenerate these arrays
+      static const unsigned GPR64ArgRegsWin64[] = {
+        X86::RCX, X86::RDX, X86::R8,  X86::R9
+      };
+      static const unsigned GPR64ArgRegs64Bit[] = {
+        X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+      };
+      static const unsigned XMMArgRegs64Bit[] = {
+        X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+        X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+      };
+      const unsigned *GPR64ArgRegs;
+      unsigned NumXMMRegs = 0;
+
+      if (IsWin64) {
+        // The XMM registers which might contain var arg parameters are shadowed
+        // in their paired GPR.  So we only need to save the GPR to their home
+        // slots.
+        TotalNumIntRegs = 4;
+        GPR64ArgRegs = GPR64ArgRegsWin64;
+      } else {
+        TotalNumIntRegs = 6; TotalNumXMMRegs = 8;
+        GPR64ArgRegs = GPR64ArgRegs64Bit;
+
+        NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs64Bit, TotalNumXMMRegs);
+      }
+      unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs,
+                                                       TotalNumIntRegs);
+
+      bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat);
+      assert(!(NumXMMRegs && !Subtarget->hasXMM()) &&
+             "SSE register cannot be used when SSE is disabled!");
+      assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) &&
+             "SSE register cannot be used when SSE is disabled!");
+      if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasXMM())
+        // Kernel mode asks for SSE to be disabled, so don't push them
+        // on the stack.
+        TotalNumXMMRegs = 0;
+
+      if (IsWin64) {
+        const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering();
+        // Get to the caller-allocated home save location.  Add 8 to account
+        // for the return address.
+        int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
+        FuncInfo->setRegSaveFrameIndex(
+          MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
+        // Fixup to set vararg frame on shadow area (4 x i64).
+        if (NumIntRegs < 4)
+          FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
+      } else {
+        // For X86-64, if there are vararg parameters that are passed via
+        // registers, then we must store them to their spots on the stack so they
+        // may be loaded by deferencing the result of va_next.
+        FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
+        FuncInfo->setVarArgsFPOffset(TotalNumIntRegs * 8 + NumXMMRegs * 16);
+        FuncInfo->setRegSaveFrameIndex(
+          MFI->CreateStackObject(TotalNumIntRegs * 8 + TotalNumXMMRegs * 16, 16,
+                               false));
+      }
+
+      // Store the integer parameter registers.
+      SmallVector<SDValue, 8> MemOps;
+      SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+                                        getPointerTy());
+      unsigned Offset = FuncInfo->getVarArgsGPOffset();
+      for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) {
+        SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN,
+                                  DAG.getIntPtrConstant(Offset));
+        unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs],
+                                     X86::GR64RegisterClass);
+        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
+        SDValue Store =
+          DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                       MachinePointerInfo::getFixedStack(
+                         FuncInfo->getRegSaveFrameIndex(), Offset),
+                       false, false, 0);
+        MemOps.push_back(Store);
+        Offset += 8;
+      }
+
+      if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) {
+        // Now store the XMM (fp + vector) parameter registers.
+        SmallVector<SDValue, 11> SaveXMMOps;
+        SaveXMMOps.push_back(Chain);
+
+        unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass);
+        SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8);
+        SaveXMMOps.push_back(ALVal);
+
+        SaveXMMOps.push_back(DAG.getIntPtrConstant(
+                               FuncInfo->getRegSaveFrameIndex()));
+        SaveXMMOps.push_back(DAG.getIntPtrConstant(
+                               FuncInfo->getVarArgsFPOffset()));
+
+        for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) {
+          unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs],
+                                       X86::VR128RegisterClass);
+          SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32);
+          SaveXMMOps.push_back(Val);
+        }
+        MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
+                                     MVT::Other,
+                                     &SaveXMMOps[0], SaveXMMOps.size()));
+      }
+
+      if (!MemOps.empty())
+        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                            &MemOps[0], MemOps.size());
+    }
+  }
+
+  // Some CCs need callee pop.
+  if (Subtarget->IsCalleePop(isVarArg, CallConv)) {
+    FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
+  } else {
+    FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
+    // If this is an sret function, the return should pop the hidden pointer.
+    if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins))
+      FuncInfo->setBytesToPopOnReturn(4);
+  }
+
+  if (!Is64Bit) {
+    // RegSaveFrameIndex is X86-64 only.
+    FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
+    if (CallConv == CallingConv::X86_FastCall ||
+        CallConv == CallingConv::X86_ThisCall)
+      // fastcc functions can't have varargs.
+      FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
+  }
+
+  return Chain;
+}
+
+SDValue
+X86TargetLowering::LowerMemOpCallTo(SDValue Chain,
+                                    SDValue StackPtr, SDValue Arg,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    const CCValAssign &VA,
+                                    ISD::ArgFlagsTy Flags) const {
+  unsigned LocMemOffset = VA.getLocMemOffset();
+  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
+  if (Flags.isByVal())
+    return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
+
+  return DAG.getStore(Chain, dl, Arg, PtrOff,
+                      MachinePointerInfo::getStack(LocMemOffset),
+                      false, false, 0);
+}
+
+/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call
+/// optimization is performed and it is required.
+SDValue
+X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
+                                           SDValue &OutRetAddr, SDValue Chain,
+                                           bool IsTailCall, bool Is64Bit,
+                                           int FPDiff, DebugLoc dl) const {
+  // Adjust the Return address stack slot.
+  EVT VT = getPointerTy();
+  OutRetAddr = getReturnAddressFrameIndex(DAG);
+
+  // Load the "old" Return address.
+  OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
+                           false, false, 0);
+  return SDValue(OutRetAddr.getNode(), 1);
+}
+
+/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call
+/// optimization is performed and it is required (FPDiff!=0).
+static SDValue
+EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
+                         SDValue Chain, SDValue RetAddrFrIdx,
+                         bool Is64Bit, int FPDiff, DebugLoc dl) {
+  // Store the return address to the appropriate stack slot.
+  if (!FPDiff) return Chain;
+  // Calculate the new stack slot for the return address.
+  int SlotSize = Is64Bit ? 8 : 4;
+  int NewReturnAddrFI =
+    MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false);
+  EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
+  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
+  Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
+                       MachinePointerInfo::getFixedStack(NewReturnAddrFI),
+                       false, false, 0);
+  return Chain;
+}
+
+SDValue
+X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                             CallingConv::ID CallConv, bool isVarArg,
+                             bool &isTailCall,
+                             const SmallVectorImpl<ISD::OutputArg> &Outs,
+                             const SmallVectorImpl<SDValue> &OutVals,
+                             const SmallVectorImpl<ISD::InputArg> &Ins,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  bool Is64Bit        = Subtarget->is64Bit();
+  bool IsWin64        = Subtarget->isTargetWin64();
+  bool IsStructRet    = CallIsStructReturn(Outs);
+  bool IsSibcall      = false;
+
+  if (isTailCall) {
+    // Check if it's really possible to do a tail call.
+    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+                    isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+                                                   Outs, OutVals, Ins, DAG);
+
+    // Sibcalls are automatically detected tailcalls which do not require
+    // ABI changes.
+    if (!GuaranteedTailCallOpt && isTailCall)
+      IsSibcall = true;
+
+    if (isTailCall)
+      ++NumTailCalls;
+  }
+
+  assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
+         "Var args not supported with calling convention fastcc or ghc");
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+
+  // Allocate shadow area for Win64
+  if (IsWin64) {
+    CCInfo.AllocateStack(32, 8);
+  }
+
+  CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+  if (IsSibcall)
+    // This is a sibcall. The memory operands are available in caller's
+    // own caller's stack.
+    NumBytes = 0;
+  else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv))
+    NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
+
+  int FPDiff = 0;
+  if (isTailCall && !IsSibcall) {
+    // Lower arguments at fp - stackoffset + fpdiff.
+    unsigned NumBytesCallerPushed =
+      MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn();
+    FPDiff = NumBytesCallerPushed - NumBytes;
+
+    // Set the delta of movement of the returnaddr stackslot.
+    // But only set if delta is greater than previous delta.
+    if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta()))
+      MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff);
+  }
+
+  if (!IsSibcall)
+    Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+  SDValue RetAddrFrIdx;
+  // Load return adress for tail calls.
+  if (isTailCall && FPDiff)
+    Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
+                                    Is64Bit, FPDiff, dl);
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+  SmallVector<SDValue, 8> MemOpChains;
+  SDValue StackPtr;
+
+  // Walk the register/memloc assignments, inserting copies/loads.  In the case
+  // of tail call optimization arguments are handle later.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    EVT RegVT = VA.getLocVT();
+    SDValue Arg = OutVals[i];
+    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    bool isByVal = Flags.isByVal();
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default: llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full: break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
+      break;
+    case CCValAssign::AExt:
+      if (RegVT.isVector() && RegVT.getSizeInBits() == 128) {
+        // Special case: passing MMX values in XMM registers.
+        Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
+        Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
+        Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
+      } else
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, dl, RegVT, Arg);
+      break;
+    case CCValAssign::Indirect: {
+      // Store the argument.
+      SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+      int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+      Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
+                           MachinePointerInfo::getFixedStack(FI),
+                           false, false, 0);
+      Arg = SpillSlot;
+      break;
+    }
+    }
+
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      if (isVarArg && IsWin64) {
+        // Win64 ABI requires argument XMM reg to be copied to the corresponding
+        // shadow reg if callee is a varargs function.
+        unsigned ShadowReg = 0;
+        switch (VA.getLocReg()) {
+        case X86::XMM0: ShadowReg = X86::RCX; break;
+        case X86::XMM1: ShadowReg = X86::RDX; break;
+        case X86::XMM2: ShadowReg = X86::R8; break;
+        case X86::XMM3: ShadowReg = X86::R9; break;
+        }
+        if (ShadowReg)
+          RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
+      }
+    } else if (!IsSibcall && (!isTailCall || isByVal)) {
+      assert(VA.isMemLoc());
+      if (StackPtr.getNode() == 0)
+        StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy());
+      MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
+                                             dl, DAG, VA, Flags));
+    }
+  }
+
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain
+  // and flag operands which copy the outgoing args into registers.
+  SDValue InFlag;
+  // Tail call byval lowering might overwrite argument registers so in case of
+  // tail call optimization the copies to registers are lowered later.
+  if (!isTailCall)
+    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                               RegsToPass[i].second, InFlag);
+      InFlag = Chain.getValue(1);
+    }
+
+  if (Subtarget->isPICStyleGOT()) {
+    // ELF / PIC requires GOT in the EBX register before function calls via PLT
+    // GOT pointer.
+    if (!isTailCall) {
+      Chain = DAG.getCopyToReg(Chain, dl, X86::EBX,
+                               DAG.getNode(X86ISD::GlobalBaseReg,
+                                           DebugLoc(), getPointerTy()),
+                               InFlag);
+      InFlag = Chain.getValue(1);
+    } else {
+      // If we are tail calling and generating PIC/GOT style code load the
+      // address of the callee into ECX. The value in ecx is used as target of
+      // the tail jump. This is done to circumvent the ebx/callee-saved problem
+      // for tail calls on PIC/GOT architectures. Normally we would just put the
+      // address of GOT into ebx and then call target@PLT. But for tail calls
+      // ebx would be restored (since ebx is callee saved) before jumping to the
+      // target@PLT.
+
+      // Note: The actual moving to ECX is done further down.
+      GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
+      if (G && !G->getGlobal()->hasHiddenVisibility() &&
+          !G->getGlobal()->hasProtectedVisibility())
+        Callee = LowerGlobalAddress(Callee, DAG);
+      else if (isa<ExternalSymbolSDNode>(Callee))
+        Callee = LowerExternalSymbol(Callee, DAG);
+    }
+  }
+
+  if (Is64Bit && isVarArg && !IsWin64) {
+    // From AMD64 ABI document:
+    // For calls that may call functions that use varargs or stdargs
+    // (prototype-less calls or calls to functions containing ellipsis (...) in
+    // the declaration) %al is used as hidden argument to specify the number
+    // of SSE registers used. The contents of %al do not need to match exactly
+    // the number of registers, but must be an ubound on the number of SSE
+    // registers used and is in the range 0 - 8 inclusive.
+
+    // Count the number of XMM registers allocated.
+    static const unsigned XMMArgRegs[] = {
+      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+    };
+    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+    assert((Subtarget->hasXMM() || !NumXMMRegs)
+           && "SSE registers cannot be used when SSE is disabled");
+
+    Chain = DAG.getCopyToReg(Chain, dl, X86::AL,
+                             DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+
+  // For tail calls lower the arguments to the 'real' stack slot.
+  if (isTailCall) {
+    // Force all the incoming stack arguments to be loaded from the stack
+    // before any new outgoing arguments are stored to the stack, because the
+    // outgoing stack slots may alias the incoming argument stack slots, and
+    // the alias isn't otherwise explicit. This is slightly more conservative
+    // than necessary, because it means that each store effectively depends
+    // on every argument instead of just those arguments it would clobber.
+    SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
+
+    SmallVector<SDValue, 8> MemOpChains2;
+    SDValue FIN;
+    int FI = 0;
+    // Do not flag preceeding copytoreg stuff together with the following stuff.
+    InFlag = SDValue();
+    if (GuaranteedTailCallOpt) {
+      for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+        CCValAssign &VA = ArgLocs[i];
+        if (VA.isRegLoc())
+          continue;
+        assert(VA.isMemLoc());
+        SDValue Arg = OutVals[i];
+        ISD::ArgFlagsTy Flags = Outs[i].Flags;
+        // Create frame index.
+        int32_t Offset = VA.getLocMemOffset()+FPDiff;
+        uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
+        FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+        FIN = DAG.getFrameIndex(FI, getPointerTy());
+
+        if (Flags.isByVal()) {
+          // Copy relative to framepointer.
+          SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset());
+          if (StackPtr.getNode() == 0)
+            StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
+                                          getPointerTy());
+          Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source);
+
+          MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
+                                                           ArgChain,
+                                                           Flags, DAG, dl));
+        } else {
+          // Store relative to framepointer.
+          MemOpChains2.push_back(
+            DAG.getStore(ArgChain, dl, Arg, FIN,
+                         MachinePointerInfo::getFixedStack(FI),
+                         false, false, 0));
+        }
+      }
+    }
+
+    if (!MemOpChains2.empty())
+      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                          &MemOpChains2[0], MemOpChains2.size());
+
+    // Copy arguments to their registers.
+    for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+      Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                               RegsToPass[i].second, InFlag);
+      InFlag = Chain.getValue(1);
+    }
+    InFlag =SDValue();
+
+    // Store the return address to the appropriate stack slot.
+    Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit,
+                                     FPDiff, dl);
+  }
+
+  if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+    assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
+    // In the 64-bit large code model, we have to make all calls
+    // through a register, since the call instruction's 32-bit
+    // pc-relative offset may not be large enough to hold the whole
+    // address.
+  } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    // If the callee is a GlobalAddress node (quite common, every direct call
+    // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
+    // it.
+
+    // We should use extra load for direct calls to dllimported functions in
+    // non-JIT mode.
+    const GlobalValue *GV = G->getGlobal();
+    if (!GV->hasDLLImportLinkage()) {
+      unsigned char OpFlags = 0;
+
+      // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
+      // external symbols most go through the PLT in PIC mode.  If the symbol
+      // has hidden or protected visibility, or if it is static or local, then
+      // we don't need to use the PLT - we can directly call it.
+      if (Subtarget->isTargetELF() &&
+          getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+          GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
+        OpFlags = X86II::MO_PLT;
+      } else if (Subtarget->isPICStyleStubAny() &&
+                 (GV->isDeclaration() || GV->isWeakForLinker()) &&
+                 Subtarget->getDarwinVers() < 9) {
+        // PC-relative references to external symbols should go through $stub,
+        // unless we're building with the leopard linker or later, which
+        // automatically synthesizes these stubs.
+        OpFlags = X86II::MO_DARWIN_STUB;
+      }
+
+      Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
+                                          G->getOffset(), OpFlags);
+    }
+  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    unsigned char OpFlags = 0;
+
+    // On ELF targets, in either X86-64 or X86-32 mode, direct calls to
+    // external symbols should go through the PLT.
+    if (Subtarget->isTargetELF() &&
+        getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+      OpFlags = X86II::MO_PLT;
+    } else if (Subtarget->isPICStyleStubAny() &&
+               Subtarget->getDarwinVers() < 9) {
+      // PC-relative references to external symbols should go through $stub,
+      // unless we're building with the leopard linker or later, which
+      // automatically synthesizes these stubs.
+      OpFlags = X86II::MO_DARWIN_STUB;
+    }
+
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
+                                         OpFlags);
+  }
+
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+
+  if (!IsSibcall && isTailCall) {
+    Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+                           DAG.getIntPtrConstant(0, true), InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  if (isTailCall)
+    Ops.push_back(DAG.getConstant(FPDiff, MVT::i32));
+
+  // Add argument registers to the end of the list so that they are known live
+  // into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  // Add an implicit use GOT pointer in EBX.
+  if (!isTailCall && Subtarget->isPICStyleGOT())
+    Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
+
+  // Add an implicit use of AL for non-Windows x86 64-bit vararg functions.
+  if (Is64Bit && isVarArg && !IsWin64)
+    Ops.push_back(DAG.getRegister(X86::AL, MVT::i8));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  if (isTailCall) {
+    // We used to do:
+    //// If this is the first return lowered for this function, add the regs
+    //// to the liveout set for the function.
+    // This isn't right, although it's probably harmless on x86; liveouts
+    // should be computed from returns not tail calls.  Consider a void
+    // function making a tail call to a function returning int.
+    return DAG.getNode(X86ISD::TC_RETURN, dl,
+                       NodeTys, &Ops[0], Ops.size());
+  }
+
+  Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  unsigned NumBytesForCalleeToPush;
+  if (Subtarget->IsCalleePop(isVarArg, CallConv))
+    NumBytesForCalleeToPush = NumBytes;    // Callee pops everything
+  else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
+    // If this is a call to a struct-return function, the callee
+    // pops the hidden struct pointer, so we have to push it back.
+    // This is common for Darwin/X86, Linux & Mingw32 targets.
+    NumBytesForCalleeToPush = 4;
+  else
+    NumBytesForCalleeToPush = 0;  // Callee pops nothing.
+
+  // Returns a flag for retval copy to use.
+  if (!IsSibcall) {
+    Chain = DAG.getCALLSEQ_END(Chain,
+                               DAG.getIntPtrConstant(NumBytes, true),
+                               DAG.getIntPtrConstant(NumBytesForCalleeToPush,
+                                                     true),
+                               InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                Fast Calling Convention (tail call) implementation
+//===----------------------------------------------------------------------===//
+
+//  Like std call, callee cleans arguments, convention except that ECX is
+//  reserved for storing the tail called function address. Only 2 registers are
+//  free for argument passing (inreg). Tail call optimization is performed
+//  provided:
+//                * tailcallopt is enabled
+//                * caller/callee are fastcc
+//  On X86_64 architecture with GOT-style position independent code only local
+//  (within module) calls are supported at the moment.
+//  To keep the stack aligned according to platform abi the function
+//  GetAlignedArgumentStackSize ensures that argument delta is always multiples
+//  of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
+//  If a tail called function callee has more arguments than the caller the
+//  caller needs to make sure that there is room to move the RETADDR to. This is
+//  achieved by reserving an area the size of the argument delta right after the
+//  original REtADDR, but before the saved framepointer or the spilled registers
+//  e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
+//  stack layout:
+//    arg1
+//    arg2
+//    RETADDR
+//    [ new RETADDR
+//      move area ]
+//    (possible EBP)
+//    ESI
+//    EDI
+//    local1 ..
+
+/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned
+/// for a 16 byte align requirement.
+unsigned
+X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
+                                               SelectionDAG& DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  const TargetMachine &TM = MF.getTarget();
+  const TargetFrameLowering &TFI = *TM.getFrameLowering();
+  unsigned StackAlignment = TFI.getStackAlignment();
+  uint64_t AlignMask = StackAlignment - 1;
+  int64_t Offset = StackSize;
+  uint64_t SlotSize = TD->getPointerSize();
+  if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
+    // Number smaller than 12 so just add the difference.
+    Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
+  } else {
+    // Mask out lower bits, add stackalignment once plus the 12 bytes.
+    Offset = ((~AlignMask) & Offset) + StackAlignment +
+      (StackAlignment-SlotSize);
+  }
+  return Offset;
+}
+
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+                         const X86InstrInfo *TII) {
+  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+  int FI = INT_MAX;
+  if (Arg.getOpcode() == ISD::CopyFromReg) {
+    unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(VR))
+      return false;
+    MachineInstr *Def = MRI->getVRegDef(VR);
+    if (!Def)
+      return false;
+    if (!Flags.isByVal()) {
+      if (!TII->isLoadFromStackSlot(Def, FI))
+        return false;
+    } else {
+      unsigned Opcode = Def->getOpcode();
+      if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) &&
+          Def->getOperand(1).isFI()) {
+        FI = Def->getOperand(1).getIndex();
+        Bytes = Flags.getByValSize();
+      } else
+        return false;
+    }
+  } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+    if (Flags.isByVal())
+      // ByVal argument is passed in as a pointer but it's now being
+      // dereferenced. e.g.
+      // define @foo(%struct.X* %A) {
+      //   tail call @bar(%struct.X* byval %A)
+      // }
+      return false;
+    SDValue Ptr = Ld->getBasePtr();
+    FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+    if (!FINode)
+      return false;
+    FI = FINode->getIndex();
+  } else
+    return false;
+
+  assert(FI != INT_MAX);
+  if (!MFI->isFixedObjectIndex(FI))
+    return false;
+  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+                                                     CallingConv::ID CalleeCC,
+                                                     bool isVarArg,
+                                                     bool isCalleeStructRet,
+                                                     bool isCallerStructRet,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                                     SelectionDAG& DAG) const {
+  if (!IsTailCallConvention(CalleeCC) &&
+      CalleeCC != CallingConv::C)
+    return false;
+
+  // If -tailcallopt is specified, make fastcc functions tail-callable.
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const Function *CallerF = DAG.getMachineFunction().getFunction();
+  CallingConv::ID CallerCC = CallerF->getCallingConv();
+  bool CCMatch = CallerCC == CalleeCC;
+
+  if (GuaranteedTailCallOpt) {
+    if (IsTailCallConvention(CalleeCC) && CCMatch)
+      return true;
+    return false;
+  }
+
+  // Look for obvious safe cases to perform tail call optimization that do not
+  // require ABI changes. This is what gcc calls sibcall.
+
+  // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
+  // emit a special epilogue.
+  if (RegInfo->needsStackRealignment(MF))
+    return false;
+
+  // Do not sibcall optimize vararg calls unless the call site is not passing
+  // any arguments.
+  if (isVarArg && !Outs.empty())
+    return false;
+
+  // Also avoid sibcall optimization if either caller or callee uses struct
+  // return semantics.
+  if (isCalleeStructRet || isCallerStructRet)
+    return false;
+
+  // If the call result is in ST0 / ST1, it needs to be popped off the x87 stack.
+  // Therefore if it's not used by the call it is not safe to optimize this into
+  // a sibcall.
+  bool Unused = false;
+  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+    if (!Ins[i].Used) {
+      Unused = true;
+      break;
+    }
+  }
+  if (Unused) {
+    SmallVector<CCValAssign, 16> RVLocs;
+    CCState CCInfo(CalleeCC, false, getTargetMachine(),
+                   RVLocs, *DAG.getContext());
+    CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
+    for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+      CCValAssign &VA = RVLocs[i];
+      if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+        return false;
+    }
+  }
+
+  // If the calling conventions do not match, then we'd better make sure the
+  // results are returned in the same way as what the caller expects.
+  if (!CCMatch) {
+    SmallVector<CCValAssign, 16> RVLocs1;
+    CCState CCInfo1(CalleeCC, false, getTargetMachine(),
+                    RVLocs1, *DAG.getContext());
+    CCInfo1.AnalyzeCallResult(Ins, RetCC_X86);
+
+    SmallVector<CCValAssign, 16> RVLocs2;
+    CCState CCInfo2(CallerCC, false, getTargetMachine(),
+                    RVLocs2, *DAG.getContext());
+    CCInfo2.AnalyzeCallResult(Ins, RetCC_X86);
+
+    if (RVLocs1.size() != RVLocs2.size())
+      return false;
+    for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+      if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+        return false;
+      if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+        return false;
+      if (RVLocs1[i].isRegLoc()) {
+        if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+          return false;
+      } else {
+        if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+          return false;
+      }
+    }
+  }
+
+  // If the callee takes no arguments then go on to check the results of the
+  // call.
+  if (!Outs.empty()) {
+    // Check if stack adjustment is needed. For now, do not do this if any
+    // argument is passed on the stack.
+    SmallVector<CCValAssign, 16> ArgLocs;
+    CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
+                   ArgLocs, *DAG.getContext());
+
+    // Allocate shadow area for Win64
+    if (Subtarget->isTargetWin64()) {
+      CCInfo.AllocateStack(32, 8);
+    }
+
+    CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+    if (CCInfo.getNextStackOffset()) {
+      MachineFunction &MF = DAG.getMachineFunction();
+      if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn())
+        return false;
+
+      // Check if the arguments are already laid out in the right way as
+      // the caller's fixed stack objects.
+      MachineFrameInfo *MFI = MF.getFrameInfo();
+      const MachineRegisterInfo *MRI = &MF.getRegInfo();
+      const X86InstrInfo *TII =
+        ((X86TargetMachine&)getTargetMachine()).getInstrInfo();
+      for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+        CCValAssign &VA = ArgLocs[i];
+        SDValue Arg = OutVals[i];
+        ISD::ArgFlagsTy Flags = Outs[i].Flags;
+        if (VA.getLocInfo() == CCValAssign::Indirect)
+          return false;
+        if (!VA.isRegLoc()) {
+          if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+                                   MFI, MRI, TII))
+            return false;
+        }
+      }
+    }
+
+    // If the tailcall address may be in a register, then make sure it's
+    // possible to register allocate for it. In 32-bit, the call address can
+    // only target EAX, EDX, or ECX since the tail call must be scheduled after
+    // callee-saved registers are restored. These happen to be the same
+    // registers used to pass 'inreg' arguments so watch out for those.
+    if (!Subtarget->is64Bit() &&
+        !isa<GlobalAddressSDNode>(Callee) &&
+        !isa<ExternalSymbolSDNode>(Callee)) {
+      unsigned NumInRegs = 0;
+      for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+        CCValAssign &VA = ArgLocs[i];
+        if (!VA.isRegLoc())
+          continue;
+        unsigned Reg = VA.getLocReg();
+        switch (Reg) {
+        default: break;
+        case X86::EAX: case X86::EDX: case X86::ECX:
+          if (++NumInRegs == 3)
+            return false;
+          break;
+        }
+      }
+    }
+  }
+
+  // An stdcall caller is expected to clean up its arguments; the callee
+  // isn't going to do that.
+  if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+    return false;
+
+  return true;
+}
+
+FastISel *
+X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
+  return X86::createFastISel(funcInfo);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                           Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+static bool MayFoldLoad(SDValue Op) {
+  return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
+}
+
+static bool MayFoldIntoStore(SDValue Op) {
+  return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
+}
+
+static bool isTargetShuffle(unsigned Opcode) {
+  switch(Opcode) {
+  default: return false;
+  case X86ISD::PSHUFD:
+  case X86ISD::PSHUFHW:
+  case X86ISD::PSHUFLW:
+  case X86ISD::SHUFPD:
+  case X86ISD::PALIGN:
+  case X86ISD::SHUFPS:
+  case X86ISD::MOVLHPS:
+  case X86ISD::MOVLHPD:
+  case X86ISD::MOVHLPS:
+  case X86ISD::MOVLPS:
+  case X86ISD::MOVLPD:
+  case X86ISD::MOVSHDUP:
+  case X86ISD::MOVSLDUP:
+  case X86ISD::MOVDDUP:
+  case X86ISD::MOVSS:
+  case X86ISD::MOVSD:
+  case X86ISD::UNPCKLPS:
+  case X86ISD::UNPCKLPD:
+  case X86ISD::VUNPCKLPS:
+  case X86ISD::VUNPCKLPD:
+  case X86ISD::VUNPCKLPSY:
+  case X86ISD::VUNPCKLPDY:
+  case X86ISD::PUNPCKLWD:
+  case X86ISD::PUNPCKLBW:
+  case X86ISD::PUNPCKLDQ:
+  case X86ISD::PUNPCKLQDQ:
+  case X86ISD::UNPCKHPS:
+  case X86ISD::UNPCKHPD:
+  case X86ISD::PUNPCKHWD:
+  case X86ISD::PUNPCKHBW:
+  case X86ISD::PUNPCKHDQ:
+  case X86ISD::PUNPCKHQDQ:
+    return true;
+  }
+  return false;
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+                                               SDValue V1, SelectionDAG &DAG) {
+  switch(Opc) {
+  default: llvm_unreachable("Unknown x86 shuffle node");
+  case X86ISD::MOVSHDUP:
+  case X86ISD::MOVSLDUP:
+  case X86ISD::MOVDDUP:
+    return DAG.getNode(Opc, dl, VT, V1);
+  }
+
+  return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+                          SDValue V1, unsigned TargetMask, SelectionDAG &DAG) {
+  switch(Opc) {
+  default: llvm_unreachable("Unknown x86 shuffle node");
+  case X86ISD::PSHUFD:
+  case X86ISD::PSHUFHW:
+  case X86ISD::PSHUFLW:
+    return DAG.getNode(Opc, dl, VT, V1, DAG.getConstant(TargetMask, MVT::i8));
+  }
+
+  return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+               SDValue V1, SDValue V2, unsigned TargetMask, SelectionDAG &DAG) {
+  switch(Opc) {
+  default: llvm_unreachable("Unknown x86 shuffle node");
+  case X86ISD::PALIGN:
+  case X86ISD::SHUFPD:
+  case X86ISD::SHUFPS:
+    return DAG.getNode(Opc, dl, VT, V1, V2,
+                       DAG.getConstant(TargetMask, MVT::i8));
+  }
+  return SDValue();
+}
+
+static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
+                                    SDValue V1, SDValue V2, SelectionDAG &DAG) {
+  switch(Opc) {
+  default: llvm_unreachable("Unknown x86 shuffle node");
+  case X86ISD::MOVLHPS:
+  case X86ISD::MOVLHPD:
+  case X86ISD::MOVHLPS:
+  case X86ISD::MOVLPS:
+  case X86ISD::MOVLPD:
+  case X86ISD::MOVSS:
+  case X86ISD::MOVSD:
+  case X86ISD::UNPCKLPS:
+  case X86ISD::UNPCKLPD:
+  case X86ISD::VUNPCKLPS:
+  case X86ISD::VUNPCKLPD:
+  case X86ISD::VUNPCKLPSY:
+  case X86ISD::VUNPCKLPDY:
+  case X86ISD::PUNPCKLWD:
+  case X86ISD::PUNPCKLBW:
+  case X86ISD::PUNPCKLDQ:
+  case X86ISD::PUNPCKLQDQ:
+  case X86ISD::UNPCKHPS:
+  case X86ISD::UNPCKHPD:
+  case X86ISD::PUNPCKHWD:
+  case X86ISD::PUNPCKHBW:
+  case X86ISD::PUNPCKHDQ:
+  case X86ISD::PUNPCKHQDQ:
+    return DAG.getNode(Opc, dl, VT, V1, V2);
+  }
+  return SDValue();
+}
+
+SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+  int ReturnAddrIndex = FuncInfo->getRAIndex();
+
+  if (ReturnAddrIndex == 0) {
+    // Set up a frame object for the return address.
+    uint64_t SlotSize = TD->getPointerSize();
+    ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+                                                           false);
+    FuncInfo->setRAIndex(ReturnAddrIndex);
+  }
+
+  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+}
+
+
+bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
+                                       bool hasSymbolicDisplacement) {
+  // Offset should fit into 32 bit immediate field.
+  if (!isInt<32>(Offset))
+    return false;
+
+  // If we don't have a symbolic displacement - we don't have any extra
+  // restrictions.
+  if (!hasSymbolicDisplacement)
+    return true;
+
+  // FIXME: Some tweaks might be needed for medium code model.
+  if (M != CodeModel::Small && M != CodeModel::Kernel)
+    return false;
+
+  // For small code model we assume that latest object is 16MB before end of 31
+  // bits boundary. We may also accept pretty large negative constants knowing
+  // that all objects are in the positive half of address space.
+  if (M == CodeModel::Small && Offset < 16*1024*1024)
+    return true;
+
+  // For kernel code model we know that all object resist in the negative half
+  // of 32bits address space. We may not accept negative offsets, since they may
+  // be just off and we may accept pretty large positive ones.
+  if (M == CodeModel::Kernel && Offset > 0)
+    return true;
+
+  return false;
+}
+
+/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
+/// specific condition code, returning the condition code and the LHS/RHS of the
+/// comparison to make.
+static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
+                               SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) {
+  if (!isFP) {
+    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+      if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
+        // X > -1   -> X == 0, jump !sign.
+        RHS = DAG.getConstant(0, RHS.getValueType());
+        return X86::COND_NS;
+      } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
+        // X < 0   -> X == 0, jump on sign.
+        return X86::COND_S;
+      } else if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
+        // X < 1   -> X <= 0
+        RHS = DAG.getConstant(0, RHS.getValueType());
+        return X86::COND_LE;
+      }
+    }
+
+    switch (SetCCOpcode) {
+    default: llvm_unreachable("Invalid integer condition!");
+    case ISD::SETEQ:  return X86::COND_E;
+    case ISD::SETGT:  return X86::COND_G;
+    case ISD::SETGE:  return X86::COND_GE;
+    case ISD::SETLT:  return X86::COND_L;
+    case ISD::SETLE:  return X86::COND_LE;
+    case ISD::SETNE:  return X86::COND_NE;
+    case ISD::SETULT: return X86::COND_B;
+    case ISD::SETUGT: return X86::COND_A;
+    case ISD::SETULE: return X86::COND_BE;
+    case ISD::SETUGE: return X86::COND_AE;
+    }
+  }
+
+  // First determine if it is required or is profitable to flip the operands.
+
+  // If LHS is a foldable load, but RHS is not, flip the condition.
+  if (ISD::isNON_EXTLoad(LHS.getNode()) &&
+      !ISD::isNON_EXTLoad(RHS.getNode())) {
+    SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
+    std::swap(LHS, RHS);
+  }
+
+  switch (SetCCOpcode) {
+  default: break;
+  case ISD::SETOLT:
+  case ISD::SETOLE:
+  case ISD::SETUGT:
+  case ISD::SETUGE:
+    std::swap(LHS, RHS);
+    break;
+  }
+
+  // On a floating point condition, the flags are set as follows:
+  // ZF  PF  CF   op
+  //  0 | 0 | 0 | X > Y
+  //  0 | 0 | 1 | X < Y
+  //  1 | 0 | 0 | X == Y
+  //  1 | 1 | 1 | unordered
+  switch (SetCCOpcode) {
+  default: llvm_unreachable("Condcode should be pre-legalized away");
+  case ISD::SETUEQ:
+  case ISD::SETEQ:   return X86::COND_E;
+  case ISD::SETOLT:              // flipped
+  case ISD::SETOGT:
+  case ISD::SETGT:   return X86::COND_A;
+  case ISD::SETOLE:              // flipped
+  case ISD::SETOGE:
+  case ISD::SETGE:   return X86::COND_AE;
+  case ISD::SETUGT:              // flipped
+  case ISD::SETULT:
+  case ISD::SETLT:   return X86::COND_B;
+  case ISD::SETUGE:              // flipped
+  case ISD::SETULE:
+  case ISD::SETLE:   return X86::COND_BE;
+  case ISD::SETONE:
+  case ISD::SETNE:   return X86::COND_NE;
+  case ISD::SETUO:   return X86::COND_P;
+  case ISD::SETO:    return X86::COND_NP;
+  case ISD::SETOEQ:
+  case ISD::SETUNE:  return X86::COND_INVALID;
+  }
+}
+
+/// hasFPCMov - is there a floating point cmov for the specific X86 condition
+/// code. Current x86 isa includes the following FP cmov instructions:
+/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
+static bool hasFPCMov(unsigned X86CC) {
+  switch (X86CC) {
+  default:
+    return false;
+  case X86::COND_B:
+  case X86::COND_BE:
+  case X86::COND_E:
+  case X86::COND_P:
+  case X86::COND_A:
+  case X86::COND_AE:
+  case X86::COND_NE:
+  case X86::COND_NP:
+    return true;
+  }
+}
+
+/// isFPImmLegal - Returns true if the target can instruction select the
+/// specified FP immediate natively. If false, the legalizer will
+/// materialize the FP immediate as a load from a constant pool.
+bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+  for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
+    if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
+      return true;
+  }
+  return false;
+}
+
+/// isUndefOrInRange - Return true if Val is undef or if its value falls within
+/// the specified range (L, H].
+static bool isUndefOrInRange(int Val, int Low, int Hi) {
+  return (Val < 0) || (Val >= Low && Val < Hi);
+}
+
+/// isUndefOrEqual - Val is either less than zero (undef) or equal to the
+/// specified value.
+static bool isUndefOrEqual(int Val, int CmpVal) {
+  if (Val < 0 || Val == CmpVal)
+    return true;
+  return false;
+}
+
+/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFD or PSHUFW.  That is, it doesn't reference
+/// the second operand.
+static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+  if (VT == MVT::v4f32 || VT == MVT::v4i32 )
+    return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4);
+  if (VT == MVT::v2f64 || VT == MVT::v2i64)
+    return (Mask[0] < 2 && Mask[1] < 2);
+  return false;
+}
+
+bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isPSHUFDMask(M, N->getValueType(0));
+}
+
+/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFHW.
+static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+  if (VT != MVT::v8i16)
+    return false;
+
+  // Lower quadword copied in order or undef.
+  for (int i = 0; i != 4; ++i)
+    if (Mask[i] >= 0 && Mask[i] != i)
+      return false;
+
+  // Upper quadword shuffled.
+  for (int i = 4; i != 8; ++i)
+    if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7))
+      return false;
+
+  return true;
+}
+
+bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isPSHUFHWMask(M, N->getValueType(0));
+}
+
+/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PSHUFLW.
+static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+  if (VT != MVT::v8i16)
+    return false;
+
+  // Upper quadword copied in order.
+  for (int i = 4; i != 8; ++i)
+    if (Mask[i] >= 0 && Mask[i] != i)
+      return false;
+
+  // Lower quadword shuffled.
+  for (int i = 0; i != 4; ++i)
+    if (Mask[i] >= 4)
+      return false;
+
+  return true;
+}
+
+bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isPSHUFLWMask(M, N->getValueType(0));
+}
+
+/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that
+/// is suitable for input to PALIGNR.
+static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT,
+                          bool hasSSSE3) {
+  int i, e = VT.getVectorNumElements();
+
+  // Do not handle v2i64 / v2f64 shuffles with palignr.
+  if (e < 4 || !hasSSSE3)
+    return false;
+
+  for (i = 0; i != e; ++i)
+    if (Mask[i] >= 0)
+      break;
+
+  // All undef, not a palignr.
+  if (i == e)
+    return false;
+
+  // Determine if it's ok to perform a palignr with only the LHS, since we
+  // don't have access to the actual shuffle elements to see if RHS is undef.
+  bool Unary = Mask[i] < (int)e;
+  bool NeedsUnary = false;
+
+  int s = Mask[i] - i;
+
+  // Check the rest of the elements to see if they are consecutive.
+  for (++i; i != e; ++i) {
+    int m = Mask[i];
+    if (m < 0)
+      continue;
+
+    Unary = Unary && (m < (int)e);
+    NeedsUnary = NeedsUnary || (m < s);
+
+    if (NeedsUnary && !Unary)
+      return false;
+    if (Unary && m != ((s+i) & (e-1)))
+      return false;
+    if (!Unary && m != (s+i))
+      return false;
+  }
+  return true;
+}
+
+bool X86::isPALIGNRMask(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isPALIGNRMask(M, N->getValueType(0), true);
+}
+
+/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to SHUFP*.
+static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+  int NumElems = VT.getVectorNumElements();
+  if (NumElems != 2 && NumElems != 4)
+    return false;
+
+  int Half = NumElems / 2;
+  for (int i = 0; i < Half; ++i)
+    if (!isUndefOrInRange(Mask[i], 0, NumElems))
+      return false;
+  for (int i = Half; i < NumElems; ++i)
+    if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+      return false;
+
+  return true;
+}
+
+bool X86::isSHUFPMask(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isSHUFPMask(M, N->getValueType(0));
+}
+
+/// isCommutedSHUFP - Returns true if the shuffle mask is exactly
+/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
+/// half elements to come from vector 1 (which would equal the dest.) and
+/// the upper half to come from vector 2.
+static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+  int NumElems = VT.getVectorNumElements();
+
+  if (NumElems != 2 && NumElems != 4)
+    return false;
+
+  int Half = NumElems / 2;
+  for (int i = 0; i < Half; ++i)
+    if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2))
+      return false;
+  for (int i = Half; i < NumElems; ++i)
+    if (!isUndefOrInRange(Mask[i], 0, NumElems))
+      return false;
+  return true;
+}
+
+static bool isCommutedSHUFP(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return isCommutedSHUFPMask(M, N->getValueType(0));
+}
+
+/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
+bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
+  if (N->getValueType(0).getVectorNumElements() != 4)
+    return false;
+
+  // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
+  return isUndefOrEqual(N->getMaskElt(0), 6) &&
+         isUndefOrEqual(N->getMaskElt(1), 7) &&
+         isUndefOrEqual(N->getMaskElt(2), 2) &&
+         isUndefOrEqual(N->getMaskElt(3), 3);
+}
+
+/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+/// <2, 3, 2, 3>
+bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
+  unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+  if (NumElems != 4)
+    return false;
+
+  return isUndefOrEqual(N->getMaskElt(0), 2) &&
+  isUndefOrEqual(N->getMaskElt(1), 3) &&
+  isUndefOrEqual(N->getMaskElt(2), 2) &&
+  isUndefOrEqual(N->getMaskElt(3), 3);
+}
+
+/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
+bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
+  unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+  if (NumElems != 2 && NumElems != 4)
+    return false;
+
+  for (unsigned i = 0; i < NumElems/2; ++i)
+    if (!isUndefOrEqual(N->getMaskElt(i), i + NumElems))
+      return false;
+
+  for (unsigned i = NumElems/2; i < NumElems; ++i)
+    if (!isUndefOrEqual(N->getMaskElt(i), i))
+      return false;
+
+  return true;
+}
+
+/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
+bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
+  unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+  if ((NumElems != 2 && NumElems != 4)
+      || N->getValueType(0).getSizeInBits() > 128)
+    return false;
+
+  for (unsigned i = 0; i < NumElems/2; ++i)
+    if (!isUndefOrEqual(N->getMaskElt(i), i))
+      return false;
+
+  for (unsigned i = 0; i < NumElems/2; ++i)
+    if (!isUndefOrEqual(N->getMaskElt(i + NumElems/2), i + NumElems))
+      return false;
+
+  return true;
+}
+
+/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKL.
+static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
+                         bool V2IsSplat = false) {
+  int NumElts = VT.getVectorNumElements();
+  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+    return false;
+
+  // Handle vector lengths > 128 bits.  Define a "section" as a set of
+  // 128 bits.  AVX defines UNPCK* to operate independently on 128-bit
+  // sections.
+  unsigned NumSections = VT.getSizeInBits() / 128;
+  if (NumSections == 0 ) NumSections = 1;  // Handle MMX
+  unsigned NumSectionElts = NumElts / NumSections;
+
+  unsigned Start = 0;
+  unsigned End = NumSectionElts;
+  for (unsigned s = 0; s < NumSections; ++s) {
+    for (unsigned i = Start, j = s * NumSectionElts;
+         i != End;
+         i += 2, ++j) {
+      int BitI  = Mask[i];
+      int BitI1 = Mask[i+1];
+      if (!isUndefOrEqual(BitI, j))
+        return false;
+      if (V2IsSplat) {
+        if (!isUndefOrEqual(BitI1, NumElts))
+          return false;
+      } else {
+        if (!isUndefOrEqual(BitI1, j + NumElts))
+          return false;
+      }
+    }
+    // Process the next 128 bits.
+    Start += NumSectionElts;
+    End += NumSectionElts;
+  }
+
+  return true;
+}
+
+bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat);
+}
+
+/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to UNPCKH.
+static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT,
+                         bool V2IsSplat = false) {
+  int NumElts = VT.getVectorNumElements();
+  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+    return false;
+
+  for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
+    int BitI  = Mask[i];
+    int BitI1 = Mask[i+1];
+    if (!isUndefOrEqual(BitI, j + NumElts/2))
+      return false;
+    if (V2IsSplat) {
+      if (isUndefOrEqual(BitI1, NumElts))
+        return false;
+    } else {
+      if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
+        return false;
+    }
+  }
+  return true;
+}
+
+bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat);
+}
+
+/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
+/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
+/// <0, 0, 1, 1>
+static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
+  int NumElems = VT.getVectorNumElements();
+  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+    return false;
+
+  // Handle vector lengths > 128 bits.  Define a "section" as a set of
+  // 128 bits.  AVX defines UNPCK* to operate independently on 128-bit
+  // sections.
+  unsigned NumSections = VT.getSizeInBits() / 128;
+  if (NumSections == 0 ) NumSections = 1;  // Handle MMX
+  unsigned NumSectionElts = NumElems / NumSections;
+
+  for (unsigned s = 0; s < NumSections; ++s) {
+    for (unsigned i = s * NumSectionElts, j = s * NumSectionElts;
+         i != NumSectionElts * (s + 1);
+         i += 2, ++j) {
+      int BitI  = Mask[i];
+      int BitI1 = Mask[i+1];
+
+      if (!isUndefOrEqual(BitI, j))
+        return false;
+      if (!isUndefOrEqual(BitI1, j))
+        return false;
+    }
+  }
+
+  return true;
+}
+
+bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isUNPCKL_v_undef_Mask(M, N->getValueType(0));
+}
+
+/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
+/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
+/// <2, 2, 3, 3>
+static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
+  int NumElems = VT.getVectorNumElements();
+  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
+    return false;
+
+  for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) {
+    int BitI  = Mask[i];
+    int BitI1 = Mask[i+1];
+    if (!isUndefOrEqual(BitI, j))
+      return false;
+    if (!isUndefOrEqual(BitI1, j))
+      return false;
+  }
+  return true;
+}
+
+bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isUNPCKH_v_undef_Mask(M, N->getValueType(0));
+}
+
+/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSS,
+/// MOVSD, and MOVD, i.e. setting the lowest element.
+static bool isMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT) {
+  if (VT.getVectorElementType().getSizeInBits() < 32)
+    return false;
+
+  int NumElts = VT.getVectorNumElements();
+
+  if (!isUndefOrEqual(Mask[0], NumElts))
+    return false;
+
+  for (int i = 1; i < NumElts; ++i)
+    if (!isUndefOrEqual(Mask[i], i))
+      return false;
+
+  return true;
+}
+
+bool X86::isMOVLMask(ShuffleVectorSDNode *N) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return ::isMOVLMask(M, N->getValueType(0));
+}
+
+/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
+/// of what x86 movss want. X86 movs requires the lowest  element to be lowest
+/// element of vector 2 and the other elements to come from vector 1 in order.
+static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT,
+                               bool V2IsSplat = false, bool V2IsUndef = false) {
+  int NumOps = VT.getVectorNumElements();
+  if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
+    return false;
+
+  if (!isUndefOrEqual(Mask[0], 0))
+    return false;
+
+  for (int i = 1; i < NumOps; ++i)
+    if (!(isUndefOrEqual(Mask[i], i+NumOps) ||
+          (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) ||
+          (V2IsSplat && isUndefOrEqual(Mask[i], NumOps))))
+      return false;
+
+  return true;
+}
+
+static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false,
+                           bool V2IsUndef = false) {
+  SmallVector<int, 8> M;
+  N->getMask(M);
+  return isCommutedMOVLMask(M, N->getValueType(0), V2IsSplat, V2IsUndef);
+}
+
+/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
+bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) {
+  if (N->getValueType(0).getVectorNumElements() != 4)
+    return false;
+
+  // Expect 1, 1, 3, 3
+  for (unsigned i = 0; i < 2; ++i) {
+    int Elt = N->getMaskElt(i);
+    if (Elt >= 0 && Elt != 1)
+      return false;
+  }
+
+  bool HasHi = false;
+  for (unsigned i = 2; i < 4; ++i) {
+    int Elt = N->getMaskElt(i);
+    if (Elt >= 0 && Elt != 3)
+      return false;
+    if (Elt == 3)
+      HasHi = true;
+  }
+  // Don't use movshdup if it can be done with a shufps.
+  // FIXME: verify that matching u, u, 3, 3 is what we want.
+  return HasHi;
+}
+
+/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
+bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) {
+  if (N->getValueType(0).getVectorNumElements() != 4)
+    return false;
+
+  // Expect 0, 0, 2, 2
+  for (unsigned i = 0; i < 2; ++i)
+    if (N->getMaskElt(i) > 0)
+      return false;
+
+  bool HasHi = false;
+  for (unsigned i = 2; i < 4; ++i) {
+    int Elt = N->getMaskElt(i);
+    if (Elt >= 0 && Elt != 2)
+      return false;
+    if (Elt == 2)
+      HasHi = true;
+  }
+  // Don't use movsldup if it can be done with a shufps.
+  return HasHi;
+}
+
+/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVDDUP.
+bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) {
+  int e = N->getValueType(0).getVectorNumElements() / 2;
+
+  for (int i = 0; i < e; ++i)
+    if (!isUndefOrEqual(N->getMaskElt(i), i))
+      return false;
+  for (int i = 0; i < e; ++i)
+    if (!isUndefOrEqual(N->getMaskElt(e+i), i))
+      return false;
+  return true;
+}
+
+/// isVEXTRACTF128Index - Return true if the specified
+/// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+/// suitable for input to VEXTRACTF128.
+bool X86::isVEXTRACTF128Index(SDNode *N) {
+  if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+    return false;
+
+  // The index should be aligned on a 128-bit boundary.
+  uint64_t Index =
+    cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+  unsigned VL = N->getValueType(0).getVectorNumElements();
+  unsigned VBits = N->getValueType(0).getSizeInBits();
+  unsigned ElSize = VBits / VL;
+  bool Result = (Index * ElSize) % 128 == 0;
+
+  return Result;
+}
+
+/// isVINSERTF128Index - Return true if the specified INSERT_SUBVECTOR
+/// operand specifies a subvector insert that is suitable for input to
+/// VINSERTF128.
+bool X86::isVINSERTF128Index(SDNode *N) {
+  if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+    return false;
+
+  // The index should be aligned on a 128-bit boundary.
+  uint64_t Index =
+    cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+  unsigned VL = N->getValueType(0).getVectorNumElements();
+  unsigned VBits = N->getValueType(0).getSizeInBits();
+  unsigned ElSize = VBits / VL;
+  bool Result = (Index * ElSize) % 128 == 0;
+
+  return Result;
+}
+
+/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions.
+unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+  int NumOperands = SVOp->getValueType(0).getVectorNumElements();
+
+  unsigned Shift = (NumOperands == 4) ? 2 : 1;
+  unsigned Mask = 0;
+  for (int i = 0; i < NumOperands; ++i) {
+    int Val = SVOp->getMaskElt(NumOperands-i-1);
+    if (Val < 0) Val = 0;
+    if (Val >= NumOperands) Val -= NumOperands;
+    Mask |= Val;
+    if (i != NumOperands - 1)
+      Mask <<= Shift;
+  }
+  return Mask;
+}
+
+/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction.
+unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+  unsigned Mask = 0;
+  // 8 nodes, but we only care about the last 4.
+  for (unsigned i = 7; i >= 4; --i) {
+    int Val = SVOp->getMaskElt(i);
+    if (Val >= 0)
+      Mask |= (Val - 4);
+    if (i != 4)
+      Mask <<= 2;
+  }
+  return Mask;
+}
+
+/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction.
+unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+  unsigned Mask = 0;
+  // 8 nodes, but we only care about the first 4.
+  for (int i = 3; i >= 0; --i) {
+    int Val = SVOp->getMaskElt(i);
+    if (Val >= 0)
+      Mask |= Val;
+    if (i != 0)
+      Mask <<= 2;
+  }
+  return Mask;
+}
+
+/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
+/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
+unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+  EVT VVT = N->getValueType(0);
+  unsigned EltSize = VVT.getVectorElementType().getSizeInBits() >> 3;
+  int Val = 0;
+
+  unsigned i, e;
+  for (i = 0, e = VVT.getVectorNumElements(); i != e; ++i) {
+    Val = SVOp->getMaskElt(i);
+    if (Val >= 0)
+      break;
+  }
+  return (Val - i) * EltSize;
+}
+
+/// getExtractVEXTRACTF128Immediate - Return the appropriate immediate
+/// to extract the specified EXTRACT_SUBVECTOR index with VEXTRACTF128
+/// instructions.
+unsigned X86::getExtractVEXTRACTF128Immediate(SDNode *N) {
+  if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
+    llvm_unreachable("Illegal extract subvector for VEXTRACTF128");
+
+  uint64_t Index =
+    cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
+
+  EVT VecVT = N->getOperand(0).getValueType();
+  EVT ElVT = VecVT.getVectorElementType();
+
+  unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+  return Index / NumElemsPerChunk;
+}
+
+/// getInsertVINSERTF128Immediate - Return the appropriate immediate
+/// to insert at the specified INSERT_SUBVECTOR index with VINSERTF128
+/// instructions.
+unsigned X86::getInsertVINSERTF128Immediate(SDNode *N) {
+  if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
+    llvm_unreachable("Illegal insert subvector for VINSERTF128");
+
+  uint64_t Index =
+    cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
+
+  EVT VecVT = N->getValueType(0);
+  EVT ElVT = VecVT.getVectorElementType();
+
+  unsigned NumElemsPerChunk = 128 / ElVT.getSizeInBits();
+
+  return Index / NumElemsPerChunk;
+}
+
+/// isZeroNode - Returns true if Elt is a constant zero or a floating point
+/// constant +0.0.
+bool X86::isZeroNode(SDValue Elt) {
+  return ((isa<ConstantSDNode>(Elt) &&
+           cast<ConstantSDNode>(Elt)->isNullValue()) ||
+          (isa<ConstantFPSDNode>(Elt) &&
+           cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
+}
+
+/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in
+/// their permute mask.
+static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp,
+                                    SelectionDAG &DAG) {
+  EVT VT = SVOp->getValueType(0);
+  unsigned NumElems = VT.getVectorNumElements();
+  SmallVector<int, 8> MaskVec;
+
+  for (unsigned i = 0; i != NumElems; ++i) {
+    int idx = SVOp->getMaskElt(i);
+    if (idx < 0)
+      MaskVec.push_back(idx);
+    else if (idx < (int)NumElems)
+      MaskVec.push_back(idx + NumElems);
+    else
+      MaskVec.push_back(idx - NumElems);
+  }
+  return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1),
+                              SVOp->getOperand(0), &MaskVec[0]);
+}
+
+/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming
+/// the two vector operands have swapped position.
+static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) {
+  unsigned NumElems = VT.getVectorNumElements();
+  for (unsigned i = 0; i != NumElems; ++i) {
+    int idx = Mask[i];
+    if (idx < 0)
+      continue;
+    else if (idx < (int)NumElems)
+      Mask[i] = idx + NumElems;
+    else
+      Mask[i] = idx - NumElems;
+  }
+}
+
+/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
+/// match movhlps. The lower half elements should come from upper half of
+/// V1 (and in order), and the upper half elements should come from the upper
+/// half of V2 (and in order).
+static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) {
+  if (Op->getValueType(0).getVectorNumElements() != 4)
+    return false;
+  for (unsigned i = 0, e = 2; i != e; ++i)
+    if (!isUndefOrEqual(Op->getMaskElt(i), i+2))
+      return false;
+  for (unsigned i = 2; i != 4; ++i)
+    if (!isUndefOrEqual(Op->getMaskElt(i), i+4))
+      return false;
+  return true;
+}
+
+/// isScalarLoadToVector - Returns true if the node is a scalar load that
+/// is promoted to a vector. It also returns the LoadSDNode by reference if
+/// required.
+static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) {
+  if (N->getOpcode() != ISD::SCALAR_TO_VECTOR)
+    return false;
+  N = N->getOperand(0).getNode();
+  if (!ISD::isNON_EXTLoad(N))
+    return false;
+  if (LD)
+    *LD = cast<LoadSDNode>(N);
+  return true;
+}
+
+/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
+/// match movlp{s|d}. The lower half elements should come from lower half of
+/// V1 (and in order), and the upper half elements should come from the upper
+/// half of V2 (and in order). And since V1 will become the source of the
+/// MOVLP, it must be either a vector load or a scalar load to vector.
+static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2,
+                               ShuffleVectorSDNode *Op) {
+  if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
+    return false;
+  // Is V2 is a vector load, don't do this transformation. We will try to use
+  // load folding shufps op.
+  if (ISD::isNON_EXTLoad(V2))
+    return false;
+
+  unsigned NumElems = Op->getValueType(0).getVectorNumElements();
+
+  if (NumElems != 2 && NumElems != 4)
+    return false;
+  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
+    if (!isUndefOrEqual(Op->getMaskElt(i), i))
+      return false;
+  for (unsigned i = NumElems/2; i != NumElems; ++i)
+    if (!isUndefOrEqual(Op->getMaskElt(i), i+NumElems))
+      return false;
+  return true;
+}
+
+/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
+/// all the same.
+static bool isSplatVector(SDNode *N) {
+  if (N->getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+
+  SDValue SplatValue = N->getOperand(0);
+  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
+    if (N->getOperand(i) != SplatValue)
+      return false;
+  return true;
+}
+
+/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
+/// to an zero vector.
+/// FIXME: move to dag combiner / method on ShuffleVectorSDNode
+static bool isZeroShuffle(ShuffleVectorSDNode *N) {
+  SDValue V1 = N->getOperand(0);
+  SDValue V2 = N->getOperand(1);
+  unsigned NumElems = N->getValueType(0).getVectorNumElements();
+  for (unsigned i = 0; i != NumElems; ++i) {
+    int Idx = N->getMaskElt(i);
+    if (Idx >= (int)NumElems) {
+      unsigned Opc = V2.getOpcode();
+      if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode()))
+        continue;
+      if (Opc != ISD::BUILD_VECTOR ||
+          !X86::isZeroNode(V2.getOperand(Idx-NumElems)))
+        return false;
+    } else if (Idx >= 0) {
+      unsigned Opc = V1.getOpcode();
+      if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode()))
+        continue;
+      if (Opc != ISD::BUILD_VECTOR ||
+          !X86::isZeroNode(V1.getOperand(Idx)))
+        return false;
+    }
+  }
+  return true;
+}
+
+/// getZeroVector - Returns a vector of specified type with all zero elements.
+///
+static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
+                             DebugLoc dl) {
+  assert(VT.isVector() && "Expected a vector type");
+
+  // Always build SSE zero vectors as <4 x i32> bitcasted
+  // to their dest type. This ensures they get CSE'd.
+  SDValue Vec;
+  if (VT.getSizeInBits() == 128) {  // SSE
+    if (HasSSE2) {  // SSE2
+      SDValue Cst = DAG.getTargetConstant(0, MVT::i32);
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+    } else { // SSE1
+      SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+      Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst);
+    }
+  } else if (VT.getSizeInBits() == 256) { // AVX
+    // 256-bit logic and arithmetic instructions in AVX are
+    // all floating-point, no support for integer ops. Default
+    // to emitting fp zeroed vectors then.
+    SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32);
+    SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8f32, Ops, 8);
+  }
+  return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+}
+
+/// getOnesVector - Returns a vector of specified type with all bits set.
+///
+static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+  assert(VT.isVector() && "Expected a vector type");
+
+  // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
+  // type.  This ensures they get CSE'd.
+  SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
+  SDValue Vec;
+  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+  return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
+}
+
+
+/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
+/// that point to V2 points to its first element.
+static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+  EVT VT = SVOp->getValueType(0);
+  unsigned NumElems = VT.getVectorNumElements();
+
+  bool Changed = false;
+  SmallVector<int, 8> MaskVec;
+  SVOp->getMask(MaskVec);
+
+  for (unsigned i = 0; i != NumElems; ++i) {
+    if (MaskVec[i] > (int)NumElems) {
+      MaskVec[i] = NumElems;
+      Changed = true;
+    }
+  }
+  if (Changed)
+    return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0),
+                                SVOp->getOperand(1), &MaskVec[0]);
+  return SDValue(SVOp, 0);
+}
+
+/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
+/// operation of specified width.
+static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+                       SDValue V2) {
+  unsigned NumElems = VT.getVectorNumElements();
+  SmallVector<int, 8> Mask;
+  Mask.push_back(NumElems);
+  for (unsigned i = 1; i != NumElems; ++i)
+    Mask.push_back(i);
+  return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// getUnpackl - Returns a vector_shuffle node for an unpackl operation.
+static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+                          SDValue V2) {
+  unsigned NumElems = VT.getVectorNumElements();
+  SmallVector<int, 8> Mask;
+  for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
+    Mask.push_back(i);
+    Mask.push_back(i + NumElems);
+  }
+  return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation.
+static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
+                          SDValue V2) {
+  unsigned NumElems = VT.getVectorNumElements();
+  unsigned Half = NumElems/2;
+  SmallVector<int, 8> Mask;
+  for (unsigned i = 0; i != Half; ++i) {
+    Mask.push_back(i + Half);
+    Mask.push_back(i + NumElems + Half);
+  }
+  return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]);
+}
+
+/// PromoteSplat - Promote a splat of v4i32, v8i16 or v16i8 to v4f32.
+static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG) {
+  EVT PVT = MVT::v4f32;
+  EVT VT = SV->getValueType(0);
+  DebugLoc dl = SV->getDebugLoc();
+  SDValue V1 = SV->getOperand(0);
+  int NumElems = VT.getVectorNumElements();
+  int EltNo = SV->getSplatIndex();
+
+  // unpack elements to the correct location
+  while (NumElems > 4) {
+    if (EltNo < NumElems/2) {
+      V1 = getUnpackl(DAG, dl, VT, V1, V1);
+    } else {
+      V1 = getUnpackh(DAG, dl, VT, V1, V1);
+      EltNo -= NumElems/2;
+    }
+    NumElems >>= 1;
+  }
+
+  // Perform the splat.
+  int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo };
+  V1 = DAG.getNode(ISD::BITCAST, dl, PVT, V1);
+  V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]);
+  return DAG.getNode(ISD::BITCAST, dl, VT, V1);
+}
+
+/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
+/// vector of zero or undef vector.  This produces a shuffle where the low
+/// element of V2 is swizzled into the zero/undef vector, landing at element
+/// Idx.  This produces a shuffle mask like 4,1,2,3 (idx=0) or  0,1,2,4 (idx=3).
+static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
+                                             bool isZero, bool HasSSE2,
+                                             SelectionDAG &DAG) {
+  EVT VT = V2.getValueType();
+  SDValue V1 = isZero
+    ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT);
+  unsigned NumElems = VT.getVectorNumElements();
+  SmallVector<int, 16> MaskVec;
+  for (unsigned i = 0; i != NumElems; ++i)
+    // If this is the insertion idx, put the low elt of V2 here.
+    MaskVec.push_back(i == Idx ? NumElems : i);
+  return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]);
+}
+
+/// getShuffleScalarElt - Returns the scalar element that will make up the ith
+/// element of the result of the vector shuffle.
+SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
+                            unsigned Depth) {
+  if (Depth == 6)
+    return SDValue();  // Limit search depth.
+
+  SDValue V = SDValue(N, 0);
+  EVT VT = V.getValueType();
+  unsigned Opcode = V.getOpcode();
+
+  // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
+  if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
+    Index = SV->getMaskElt(Index);
+
+    if (Index < 0)
+      return DAG.getUNDEF(VT.getVectorElementType());
+
+    int NumElems = VT.getVectorNumElements();
+    SDValue NewV = (Index < NumElems) ? SV->getOperand(0) : SV->getOperand(1);
+    return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG, Depth+1);
+  }
+
+  // Recurse into target specific vector shuffles to find scalars.
+  if (isTargetShuffle(Opcode)) {
+    int NumElems = VT.getVectorNumElements();
+    SmallVector<unsigned, 16> ShuffleMask;
+    SDValue ImmN;
+
+    switch(Opcode) {
+    case X86ISD::SHUFPS:
+    case X86ISD::SHUFPD:
+      ImmN = N->getOperand(N->getNumOperands()-1);
+      DecodeSHUFPSMask(NumElems,
+                       cast<ConstantSDNode>(ImmN)->getZExtValue(),
+                       ShuffleMask);
+      break;
+    case X86ISD::PUNPCKHBW:
+    case X86ISD::PUNPCKHWD:
+    case X86ISD::PUNPCKHDQ:
+    case X86ISD::PUNPCKHQDQ:
+      DecodePUNPCKHMask(NumElems, ShuffleMask);
+      break;
+    case X86ISD::UNPCKHPS:
+    case X86ISD::UNPCKHPD:
+      DecodeUNPCKHPMask(NumElems, ShuffleMask);
+      break;
+    case X86ISD::PUNPCKLBW:
+    case X86ISD::PUNPCKLWD:
+    case X86ISD::PUNPCKLDQ:
+    case X86ISD::PUNPCKLQDQ:
+      DecodePUNPCKLMask(VT, ShuffleMask);
+      break;
+    case X86ISD::UNPCKLPS:
+    case X86ISD::UNPCKLPD:
+    case X86ISD::VUNPCKLPS:
+    case X86ISD::VUNPCKLPD:
+    case X86ISD::VUNPCKLPSY:
+    case X86ISD::VUNPCKLPDY:
+      DecodeUNPCKLPMask(VT, ShuffleMask);
+      break;
+    case X86ISD::MOVHLPS:
+      DecodeMOVHLPSMask(NumElems, ShuffleMask);
+      break;
+    case X86ISD::MOVLHPS:
+      DecodeMOVLHPSMask(NumElems, ShuffleMask);
+      break;
+    case X86ISD::PSHUFD:
+      ImmN = N->getOperand(N->getNumOperands()-1);
+      DecodePSHUFMask(NumElems,
+                      cast<ConstantSDNode>(ImmN)->getZExtValue(),
+                      ShuffleMask);
+      break;
+    case X86ISD::PSHUFHW:
+      ImmN = N->getOperand(N->getNumOperands()-1);
+      DecodePSHUFHWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
+                        ShuffleMask);
+      break;
+    case X86ISD::PSHUFLW:
+      ImmN = N->getOperand(N->getNumOperands()-1);
+      DecodePSHUFLWMask(cast<ConstantSDNode>(ImmN)->getZExtValue(),
+                        ShuffleMask);
+      break;
+    case X86ISD::MOVSS:
+    case X86ISD::MOVSD: {
+      // The index 0 always comes from the first element of the second source,
+      // this is why MOVSS and MOVSD are used in the first place. The other
+      // elements come from the other positions of the first source vector.
+      unsigned OpNum = (Index == 0) ? 1 : 0;
+      return getShuffleScalarElt(V.getOperand(OpNum).getNode(), Index, DAG,
+                                 Depth+1);
+    }
+    default:
+      assert("not implemented for target shuffle node");
+      return SDValue();
+    }
+
+    Index = ShuffleMask[Index];
+    if (Index < 0)
+      return DAG.getUNDEF(VT.getVectorElementType());
+
+    SDValue NewV = (Index < NumElems) ? N->getOperand(0) : N->getOperand(1);
+    return getShuffleScalarElt(NewV.getNode(), Index % NumElems, DAG,
+                               Depth+1);
+  }
+
+  // Actual nodes that may contain scalar elements
+  if (Opcode == ISD::BITCAST) {
+    V = V.getOperand(0);
+    EVT SrcVT = V.getValueType();
+    unsigned NumElems = VT.getVectorNumElements();
+
+    if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
+      return SDValue();
+  }
+
+  if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+    return (Index == 0) ? V.getOperand(0)
+                          : DAG.getUNDEF(VT.getVectorElementType());
+
+  if (V.getOpcode() == ISD::BUILD_VECTOR)
+    return V.getOperand(Index);
+
+  return SDValue();
+}
+
+/// getNumOfConsecutiveZeros - Return the number of elements of a vector
+/// shuffle operation which come from a consecutively from a zero. The
+/// search can start in two diferent directions, from left or right.
+static
+unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
+                                  bool ZerosFromLeft, SelectionDAG &DAG) {
+  int i = 0;
+
+  while (i < NumElems) {
+    unsigned Index = ZerosFromLeft ? i : NumElems-i-1;
+    SDValue Elt = getShuffleScalarElt(N, Index, DAG, 0);
+    if (!(Elt.getNode() &&
+         (Elt.getOpcode() == ISD::UNDEF || X86::isZeroNode(Elt))))
+      break;
+    ++i;
+  }
+
+  return i;
+}
+
+/// isShuffleMaskConsecutive - Check if the shuffle mask indicies from MaskI to
+/// MaskE correspond consecutively to elements from one of the vector operands,
+/// starting from its index OpIdx. Also tell OpNum which source vector operand.
+static
+bool isShuffleMaskConsecutive(ShuffleVectorSDNode *SVOp, int MaskI, int MaskE,
+                              int OpIdx, int NumElems, unsigned &OpNum) {
+  bool SeenV1 = false;
+  bool SeenV2 = false;
+
+  for (int i = MaskI; i <= MaskE; ++i, ++OpIdx) {
+    int Idx = SVOp->getMaskElt(i);
+    // Ignore undef indicies
+    if (Idx < 0)
+      continue;
+
+    if (Idx < NumElems)
+      SeenV1 = true;
+    else
+      SeenV2 = true;
+
+    // Only accept consecutive elements from the same vector
+    if ((Idx % NumElems != OpIdx) || (SeenV1 && SeenV2))
+      return false;
+  }
+
+  OpNum = SeenV1 ? 0 : 1;
+  return true;
+}
+
+/// isVectorShiftRight - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftRight(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+                               bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+  unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+  unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+              false /* check zeros from right */, DAG);
+  unsigned OpSrc;
+
+  if (!NumZeros)
+    return false;
+
+  // Considering the elements in the mask that are not consecutive zeros,
+  // check if they consecutively come from only one of the source vectors.
+  //
+  //               V1 = {X, A, B, C}     0
+  //                         \  \  \    /
+  //   vector_shuffle V1, V2 <1, 2, 3, X>
+  //
+  if (!isShuffleMaskConsecutive(SVOp,
+            0,                   // Mask Start Index
+            NumElems-NumZeros-1, // Mask End Index
+            NumZeros,            // Where to start looking in the src vector
+            NumElems,            // Number of elements in vector
+            OpSrc))              // Which source operand ?
+    return false;
+
+  isLeft = false;
+  ShAmt = NumZeros;
+  ShVal = SVOp->getOperand(OpSrc);
+  return true;
+}
+
+/// isVectorShiftLeft - Returns true if the shuffle can be implemented as a
+/// logical left shift of a vector.
+static bool isVectorShiftLeft(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+                              bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+  unsigned NumElems = SVOp->getValueType(0).getVectorNumElements();
+  unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems,
+              true /* check zeros from left */, DAG);
+  unsigned OpSrc;
+
+  if (!NumZeros)
+    return false;
+
+  // Considering the elements in the mask that are not consecutive zeros,
+  // check if they consecutively come from only one of the source vectors.
+  //
+  //                           0    { A, B, X, X } = V2
+  //                          / \    /  /
+  //   vector_shuffle V1, V2 <X, X, 4, 5>
+  //
+  if (!isShuffleMaskConsecutive(SVOp,
+            NumZeros,     // Mask Start Index
+            NumElems-1,   // Mask End Index
+            0,            // Where to start looking in the src vector
+            NumElems,     // Number of elements in vector
+            OpSrc))       // Which source operand ?
+    return false;
+
+  isLeft = true;
+  ShAmt = NumZeros;
+  ShVal = SVOp->getOperand(OpSrc);
+  return true;
+}
+
+/// isVectorShift - Returns true if the shuffle can be implemented as a
+/// logical left or right shift of a vector.
+static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG,
+                          bool &isLeft, SDValue &ShVal, unsigned &ShAmt) {
+  if (isVectorShiftLeft(SVOp, DAG, isLeft, ShVal, ShAmt) ||
+      isVectorShiftRight(SVOp, DAG, isLeft, ShVal, ShAmt))
+    return true;
+
+  return false;
+}
+
+/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
+///
+static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
+                                       unsigned NumNonZero, unsigned NumZero,
+                                       SelectionDAG &DAG,
+                                       const TargetLowering &TLI) {
+  if (NumNonZero > 8)
+    return SDValue();
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue V(0, 0);
+  bool First = true;
+  for (unsigned i = 0; i < 16; ++i) {
+    bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
+    if (ThisIsNonZero && First) {
+      if (NumZero)
+        V = getZeroVector(MVT::v8i16, true, DAG, dl);
+      else
+        V = DAG.getUNDEF(MVT::v8i16);
+      First = false;
+    }
+
+    if ((i & 1) != 0) {
+      SDValue ThisElt(0, 0), LastElt(0, 0);
+      bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
+      if (LastIsNonZero) {
+        LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl,
+                              MVT::i16, Op.getOperand(i-1));
+      }
+      if (ThisIsNonZero) {
+        ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
+        ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16,
+                              ThisElt, DAG.getConstant(8, MVT::i8));
+        if (LastIsNonZero)
+          ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
+      } else
+        ThisElt = LastElt;
+
+      if (ThisElt.getNode())
+        V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
+                        DAG.getIntPtrConstant(i/2));
+    }
+  }
+
+  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V);
+}
+
+/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16.
+///
+static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
+                                     unsigned NumNonZero, unsigned NumZero,
+                                     SelectionDAG &DAG,
+                                     const TargetLowering &TLI) {
+  if (NumNonZero > 4)
+    return SDValue();
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue V(0, 0);
+  bool First = true;
+  for (unsigned i = 0; i < 8; ++i) {
+    bool isNonZero = (NonZeros & (1 << i)) != 0;
+    if (isNonZero) {
+      if (First) {
+        if (NumZero)
+          V = getZeroVector(MVT::v8i16, true, DAG, dl);
+        else
+          V = DAG.getUNDEF(MVT::v8i16);
+        First = false;
+      }
+      V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
+                      MVT::v8i16, V, Op.getOperand(i),
+                      DAG.getIntPtrConstant(i));
+    }
+  }
+
+  return V;
+}
+
+/// getVShift - Return a vector logical shift node.
+///
+static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
+                         unsigned NumBits, SelectionDAG &DAG,
+                         const TargetLowering &TLI, DebugLoc dl) {
+  EVT ShVT = MVT::v2i64;
+  unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL;
+  SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp);
+  return DAG.getNode(ISD::BITCAST, dl, VT,
+                     DAG.getNode(Opc, dl, ShVT, SrcOp,
+                             DAG.getConstant(NumBits,
+                                  TLI.getShiftAmountTy(SrcOp.getValueType()))));
+}
+
+SDValue
+X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+                                          SelectionDAG &DAG) const {
+
+  // Check if the scalar load can be widened into a vector load. And if
+  // the address is "base + cst" see if the cst can be "absorbed" into
+  // the shuffle mask.
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
+    SDValue Ptr = LD->getBasePtr();
+    if (!ISD::isNormalLoad(LD) || LD->isVolatile())
+      return SDValue();
+    EVT PVT = LD->getValueType(0);
+    if (PVT != MVT::i32 && PVT != MVT::f32)
+      return SDValue();
+
+    int FI = -1;
+    int64_t Offset = 0;
+    if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
+      FI = FINode->getIndex();
+      Offset = 0;
+    } else if (DAG.isBaseWithConstantOffset(Ptr) &&
+               isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+      FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+      Offset = Ptr.getConstantOperandVal(1);
+      Ptr = Ptr.getOperand(0);
+    } else {
+      return SDValue();
+    }
+
+    SDValue Chain = LD->getChain();
+    // Make sure the stack object alignment is at least 16.
+    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    if (DAG.InferPtrAlignment(Ptr) < 16) {
+      if (MFI->isFixedObjectIndex(FI)) {
+        // Can't change the alignment. FIXME: It's possible to compute
+        // the exact stack offset and reference FI + adjust offset instead.
+        // If someone *really* cares about this. That's the way to implement it.
+        return SDValue();
+      } else {
+        MFI->setObjectAlignment(FI, 16);
+      }
+    }
+
+    // (Offset % 16) must be multiple of 4. Then address is then
+    // Ptr + (Offset & ~15).
+    if (Offset < 0)
+      return SDValue();
+    if ((Offset % 16) & 3)
+      return SDValue();
+    int64_t StartOffset = Offset & ~15;
+    if (StartOffset)
+      Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(),
+                        Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
+
+    int EltNo = (Offset - StartOffset) >> 2;
+    int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
+    EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
+    SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,
+                             LD->getPointerInfo().getWithOffset(StartOffset),
+                             false, false, 0);
+    // Canonicalize it to a v4i32 shuffle.
+    V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
+    return DAG.getNode(ISD::BITCAST, dl, VT,
+                       DAG.getVectorShuffle(MVT::v4i32, dl, V1,
+                                            DAG.getUNDEF(MVT::v4i32),&Mask[0]));
+  }
+
+  return SDValue();
+}
+
+/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
+/// vector of type 'VT', see if the elements can be replaced by a single large
+/// load which has the same value as a build_vector whose operands are 'elts'.
+///
+/// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
+///
+/// FIXME: we'd also like to handle the case where the last elements are zero
+/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
+/// There's even a handy isZeroNode for that purpose.
+static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
+                                        DebugLoc &DL, SelectionDAG &DAG) {
+  EVT EltVT = VT.getVectorElementType();
+  unsigned NumElems = Elts.size();
+
+  LoadSDNode *LDBase = NULL;
+  unsigned LastLoadedElt = -1U;
+
+  // For each element in the initializer, see if we've found a load or an undef.
+  // If we don't find an initial load element, or later load elements are
+  // non-consecutive, bail out.
+  for (unsigned i = 0; i < NumElems; ++i) {
+    SDValue Elt = Elts[i];
+
+    if (!Elt.getNode() ||
+        (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
+      return SDValue();
+    if (!LDBase) {
+      if (Elt.getNode()->getOpcode() == ISD::UNDEF)
+        return SDValue();
+      LDBase = cast<LoadSDNode>(Elt.getNode());
+      LastLoadedElt = i;
+      continue;
+    }
+    if (Elt.getOpcode() == ISD::UNDEF)
+      continue;
+
+    LoadSDNode *LD = cast<LoadSDNode>(Elt);
+    if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
+      return SDValue();
+    LastLoadedElt = i;
+  }
+
+  // If we have found an entire vector of loads and undefs, then return a large
+  // load of the entire vector width starting at the base pointer.  If we found
+  // consecutive loads for the low half, generate a vzext_load node.
+  if (LastLoadedElt == NumElems - 1) {
+    if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
+      return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+                         LDBase->getPointerInfo(),
+                         LDBase->isVolatile(), LDBase->isNonTemporal(), 0);
+    return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
+                       LDBase->getPointerInfo(),
+                       LDBase->isVolatile(), LDBase->isNonTemporal(),
+                       LDBase->getAlignment());
+  } else if (NumElems == 4 && LastLoadedElt == 1) {
+    SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+    SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
+    SDValue ResNode = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys,
+                                              Ops, 2, MVT::i32,
+                                              LDBase->getMemOperand());
+    return DAG.getNode(ISD::BITCAST, DL, VT, ResNode);
+  }
+  return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+
+  EVT VT = Op.getValueType();
+  EVT ExtVT = VT.getVectorElementType();
+
+  unsigned NumElems = Op.getNumOperands();
+
+  // For AVX-length vectors, build the individual 128-bit pieces and
+  // use shuffles to put them in place.
+  if (VT.getSizeInBits() > 256 &&
+      Subtarget->hasAVX() &&
+      !ISD::isBuildVectorAllZeros(Op.getNode())) {
+    SmallVector<SDValue, 8> V;
+    V.resize(NumElems);
+    for (unsigned i = 0; i < NumElems; ++i) {
+      V[i] = Op.getOperand(i);
+    }
+
+    EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
+
+    // Build the lower subvector.
+    SDValue Lower = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[0], NumElems/2);
+    // Build the upper subvector.
+    SDValue Upper = DAG.getNode(ISD::BUILD_VECTOR, dl, HVT, &V[NumElems / 2],
+                                NumElems/2);
+
+    return ConcatVectors(Lower, Upper, DAG);
+  }
+
+  // All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
+  // All one's are handled with pcmpeqd. In AVX, zero's are handled with
+  // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
+  // is present, so AllOnes is ignored.
+  if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
+      (Op.getValueType().getSizeInBits() != 256 &&
+       ISD::isBuildVectorAllOnes(Op.getNode()))) {
+    // Canonicalize this to <4 x i32> (SSE) to
+    // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
+    // eliminated on x86-32 hosts.
+    if (Op.getValueType() == MVT::v4i32)
+      return Op;
+
+    if (ISD::isBuildVectorAllOnes(Op.getNode()))
+      return getOnesVector(Op.getValueType(), DAG, dl);
+    return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl);
+  }
+
+  unsigned EVTBits = ExtVT.getSizeInBits();
+
+  unsigned NumZero  = 0;
+  unsigned NumNonZero = 0;
+  unsigned NonZeros = 0;
+  bool IsAllConstants = true;
+  SmallSet<SDValue, 8> Values;
+  for (unsigned i = 0; i < NumElems; ++i) {
+    SDValue Elt = Op.getOperand(i);
+    if (Elt.getOpcode() == ISD::UNDEF)
+      continue;
+    Values.insert(Elt);
+    if (Elt.getOpcode() != ISD::Constant &&
+        Elt.getOpcode() != ISD::ConstantFP)
+      IsAllConstants = false;
+    if (X86::isZeroNode(Elt))
+      NumZero++;
+    else {
+      NonZeros |= (1 << i);
+      NumNonZero++;
+    }
+  }
+
+  // All undef vector. Return an UNDEF.  All zero vectors were handled above.
+  if (NumNonZero == 0)
+    return DAG.getUNDEF(VT);
+
+  // Special case for single non-zero, non-undef, element.
+  if (NumNonZero == 1) {
+    unsigned Idx = CountTrailingZeros_32(NonZeros);
+    SDValue Item = Op.getOperand(Idx);
+
+    // If this is an insertion of an i64 value on x86-32, and if the top bits of
+    // the value are obviously zero, truncate the value to i32 and do the
+    // insertion that way.  Only do this if the value is non-constant or if the
+    // value is a constant being inserted into element 0.  It is cheaper to do
+    // a constant pool load than it is to do a movd + shuffle.
+    if (ExtVT == MVT::i64 && !Subtarget->is64Bit() &&
+        (!IsAllConstants || Idx == 0)) {
+      if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) {
+        // Handle SSE only.
+        assert(VT == MVT::v2i64 && "Expected an SSE value type!");
+        EVT VecVT = MVT::v4i32;
+        unsigned VecElts = 4;
+
+        // Truncate the value (which may itself be a constant) to i32, and
+        // convert it to a vector with movd (S2V+shuffle to zero extend).
+        Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
+        Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
+        Item = getShuffleVectorZeroOrUndef(Item, 0, true,
+                                           Subtarget->hasSSE2(), DAG);
+
+        // Now we have our 32-bit value zero extended in the low element of
+        // a vector.  If Idx != 0, swizzle it into place.
+        if (Idx != 0) {
+          SmallVector<int, 4> Mask;
+          Mask.push_back(Idx);
+          for (unsigned i = 1; i != VecElts; ++i)
+            Mask.push_back(i);
+          Item = DAG.getVectorShuffle(VecVT, dl, Item,
+                                      DAG.getUNDEF(Item.getValueType()),
+                                      &Mask[0]);
+        }
+        return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Item);
+      }
+    }
+
+    // If we have a constant or non-constant insertion into the low element of
+    // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
+    // the rest of the elements.  This will be matched as movd/movq/movss/movsd
+    // depending on what the source datatype is.
+    if (Idx == 0) {
+      if (NumZero == 0) {
+        return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+      } else if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
+          (ExtVT == MVT::i64 && Subtarget->is64Bit())) {
+        Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+        // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
+        return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(),
+                                           DAG);
+      } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
+        Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
+        assert(VT.getSizeInBits() == 128 && "Expected an SSE value type!");
+        EVT MiddleVT = MVT::v4i32;
+        Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item);
+        Item = getShuffleVectorZeroOrUndef(Item, 0, true,
+                                           Subtarget->hasSSE2(), DAG);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Item);
+      }
+    }
+
+    // Is it a vector logical left shift?
+    if (NumElems == 2 && Idx == 1 &&
+        X86::isZeroNode(Op.getOperand(0)) &&
+        !X86::isZeroNode(Op.getOperand(1))) {
+      unsigned NumBits = VT.getSizeInBits();
+      return getVShift(true, VT,
+                       DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+                                   VT, Op.getOperand(1)),
+                       NumBits/2, DAG, *this, dl);
+    }
+
+    if (IsAllConstants) // Otherwise, it's better to do a constpool load.
+      return SDValue();
+
+    // Otherwise, if this is a vector with i32 or f32 elements, and the element
+    // is a non-constant being inserted into an element other than the low one,
+    // we can't use a constant pool load.  Instead, use SCALAR_TO_VECTOR (aka
+    // movd/movss) to move this into the low element, then shuffle it into
+    // place.
+    if (EVTBits == 32) {
+      Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
+
+      // Turn it into a shuffle of zero and zero-extended scalar to vector.
+      Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0,
+                                         Subtarget->hasSSE2(), DAG);
+      SmallVector<int, 8> MaskVec;
+      for (unsigned i = 0; i < NumElems; i++)
+        MaskVec.push_back(i == Idx ? 0 : 1);
+      return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]);
+    }
+  }
+
+  // Splat is obviously ok. Let legalizer expand it to a shuffle.
+  if (Values.size() == 1) {
+    if (EVTBits == 32) {
+      // Instead of a shuffle like this:
+      // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
+      // Check if it's possible to issue this instead.
+      // shuffle (vload ptr)), undef, <1, 1, 1, 1>
+      unsigned Idx = CountTrailingZeros_32(NonZeros);
+      SDValue Item = Op.getOperand(Idx);
+      if (Op.getNode()->isOnlyUserOf(Item.getNode()))
+        return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
+    }
+    return SDValue();
+  }
+
+  // A vector full of immediates; various special cases are already
+  // handled, so this is best done with a single constant-pool load.
+  if (IsAllConstants)
+    return SDValue();
+
+  // Let legalizer expand 2-wide build_vectors.
+  if (EVTBits == 64) {
+    if (NumNonZero == 1) {
+      // One half is zero or undef.
+      unsigned Idx = CountTrailingZeros_32(NonZeros);
+      SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
+                                 Op.getOperand(Idx));
+      return getShuffleVectorZeroOrUndef(V2, Idx, true,
+                                         Subtarget->hasSSE2(), DAG);
+    }
+    return SDValue();
+  }
+
+  // If element VT is < 32 bits, convert it to inserts into a zero vector.
+  if (EVTBits == 8 && NumElems == 16) {
+    SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
+                                        *this);
+    if (V.getNode()) return V;
+  }
+
+  if (EVTBits == 16 && NumElems == 8) {
+    SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
+                                      *this);
+    if (V.getNode()) return V;
+  }
+
+  // If element VT is == 32 bits, turn it into a number of shuffles.
+  SmallVector<SDValue, 8> V;
+  V.resize(NumElems);
+  if (NumElems == 4 && NumZero > 0) {
+    for (unsigned i = 0; i < 4; ++i) {
+      bool isZero = !(NonZeros & (1 << i));
+      if (isZero)
+        V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+      else
+        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+    }
+
+    for (unsigned i = 0; i < 2; ++i) {
+      switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
+        default: break;
+        case 0:
+          V[i] = V[i*2];  // Must be a zero vector.
+          break;
+        case 1:
+          V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]);
+          break;
+        case 2:
+          V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]);
+          break;
+        case 3:
+          V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]);
+          break;
+      }
+    }
+
+    SmallVector<int, 8> MaskVec;
+    bool Reverse = (NonZeros & 0x3) == 2;
+    for (unsigned i = 0; i < 2; ++i)
+      MaskVec.push_back(Reverse ? 1-i : i);
+    Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
+    for (unsigned i = 0; i < 2; ++i)
+      MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems);
+    return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
+  }
+
+  if (Values.size() > 1 && VT.getSizeInBits() == 128) {
+    // Check for a build vector of consecutive loads.
+    for (unsigned i = 0; i < NumElems; ++i)
+      V[i] = Op.getOperand(i);
+
+    // Check for elements which are consecutive loads.
+    SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
+    if (LD.getNode())
+      return LD;
+
+    // For SSE 4.1, use insertps to put the high elements into the low element.
+    if (getSubtarget()->hasSSE41()) {
+      SDValue Result;
+      if (Op.getOperand(0).getOpcode() != ISD::UNDEF)
+        Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0));
+      else
+        Result = DAG.getUNDEF(VT);
+
+      for (unsigned i = 1; i < NumElems; ++i) {
+        if (Op.getOperand(i).getOpcode() == ISD::UNDEF) continue;
+        Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result,
+                             Op.getOperand(i), DAG.getIntPtrConstant(i));
+      }
+      return Result;
+    }
+
+    // Otherwise, expand into a number of unpckl*, start by extending each of
+    // our (non-undef) elements to the full vector width with the element in the
+    // bottom slot of the vector (which generates no code for SSE).
+    for (unsigned i = 0; i < NumElems; ++i) {
+      if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
+        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i));
+      else
+        V[i] = DAG.getUNDEF(VT);
+    }
+
+    // Next, we iteratively mix elements, e.g. for v4f32:
+    //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
+    //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
+    //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
+    unsigned EltStride = NumElems >> 1;
+    while (EltStride != 0) {
+      for (unsigned i = 0; i < EltStride; ++i) {
+        // If V[i+EltStride] is undef and this is the first round of mixing,
+        // then it is safe to just drop this shuffle: V[i] is already in the
+        // right place, the one element (since it's the first round) being
+        // inserted as undef can be dropped.  This isn't safe for successive
+        // rounds because they will permute elements within both vectors.
+        if (V[i+EltStride].getOpcode() == ISD::UNDEF &&
+            EltStride == NumElems/2)
+          continue;
+
+        V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + EltStride]);
+      }
+      EltStride >>= 1;
+    }
+    return V[0];
+  }
+  return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
+  // We support concatenate two MMX registers and place them in a MMX
+  // register.  This is better than doing a stack convert.
+  DebugLoc dl = Op.getDebugLoc();
+  EVT ResVT = Op.getValueType();
+  assert(Op.getNumOperands() == 2);
+  assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 ||
+         ResVT == MVT::v8i16 || ResVT == MVT::v16i8);
+  int Mask[2];
+  SDValue InVec = DAG.getNode(ISD::BITCAST,dl, MVT::v1i64, Op.getOperand(0));
+  SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
+  InVec = Op.getOperand(1);
+  if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+    unsigned NumElts = ResVT.getVectorNumElements();
+    VecOp = DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
+    VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp,
+                       InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1));
+  } else {
+    InVec = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, InVec);
+    SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec);
+    Mask[0] = 0; Mask[1] = 2;
+    VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask);
+  }
+  return DAG.getNode(ISD::BITCAST, dl, ResVT, VecOp);
+}
+
+// v8i16 shuffles - Prefer shuffles in the following order:
+// 1. [all]   pshuflw, pshufhw, optional move
+// 2. [ssse3] 1 x pshufb
+// 3. [ssse3] 2 x pshufb + 1 x por
+// 4. [all]   mov + pshuflw + pshufhw + N x (pextrw + pinsrw)
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLEv8i16(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  SDValue V1 = SVOp->getOperand(0);
+  SDValue V2 = SVOp->getOperand(1);
+  DebugLoc dl = SVOp->getDebugLoc();
+  SmallVector<int, 8> MaskVals;
+
+  // Determine if more than 1 of the words in each of the low and high quadwords
+  // of the result come from the same quadword of one of the two inputs.  Undef
+  // mask values count as coming from any quadword, for better codegen.
+  SmallVector<unsigned, 4> LoQuad(4);
+  SmallVector<unsigned, 4> HiQuad(4);
+  BitVector InputQuads(4);
+  for (unsigned i = 0; i < 8; ++i) {
+    SmallVectorImpl<unsigned> &Quad = i < 4 ? LoQuad : HiQuad;
+    int EltIdx = SVOp->getMaskElt(i);
+    MaskVals.push_back(EltIdx);
+    if (EltIdx < 0) {
+      ++Quad[0];
+      ++Quad[1];
+      ++Quad[2];
+      ++Quad[3];
+      continue;
+    }
+    ++Quad[EltIdx / 4];
+    InputQuads.set(EltIdx / 4);
+  }
+
+  int BestLoQuad = -1;
+  unsigned MaxQuad = 1;
+  for (unsigned i = 0; i < 4; ++i) {
+    if (LoQuad[i] > MaxQuad) {
+      BestLoQuad = i;
+      MaxQuad = LoQuad[i];
+    }
+  }
+
+  int BestHiQuad = -1;
+  MaxQuad = 1;
+  for (unsigned i = 0; i < 4; ++i) {
+    if (HiQuad[i] > MaxQuad) {
+      BestHiQuad = i;
+      MaxQuad = HiQuad[i];
+    }
+  }
+
+  // For SSSE3, If all 8 words of the result come from only 1 quadword of each
+  // of the two input vectors, shuffle them into one input vector so only a
+  // single pshufb instruction is necessary. If There are more than 2 input
+  // quads, disable the next transformation since it does not help SSSE3.
+  bool V1Used = InputQuads[0] || InputQuads[1];
+  bool V2Used = InputQuads[2] || InputQuads[3];
+  if (Subtarget->hasSSSE3()) {
+    if (InputQuads.count() == 2 && V1Used && V2Used) {
+      BestLoQuad = InputQuads.find_first();
+      BestHiQuad = InputQuads.find_next(BestLoQuad);
+    }
+    if (InputQuads.count() > 2) {
+      BestLoQuad = -1;
+      BestHiQuad = -1;
+    }
+  }
+
+  // If BestLoQuad or BestHiQuad are set, shuffle the quads together and update
+  // the shuffle mask.  If a quad is scored as -1, that means that it contains
+  // words from all 4 input quadwords.
+  SDValue NewV;
+  if (BestLoQuad >= 0 || BestHiQuad >= 0) {
+    SmallVector<int, 8> MaskV;
+    MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad);
+    MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad);
+    NewV = DAG.getVectorShuffle(MVT::v2i64, dl,
+                  DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1),
+                  DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2), &MaskV[0]);
+    NewV = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, NewV);
+
+    // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the
+    // source words for the shuffle, to aid later transformations.
+    bool AllWordsInNewV = true;
+    bool InOrder[2] = { true, true };
+    for (unsigned i = 0; i != 8; ++i) {
+      int idx = MaskVals[i];
+      if (idx != (int)i)
+        InOrder[i/4] = false;
+      if (idx < 0 || (idx/4) == BestLoQuad || (idx/4) == BestHiQuad)
+        continue;
+      AllWordsInNewV = false;
+      break;
+    }
+
+    bool pshuflw = AllWordsInNewV, pshufhw = AllWordsInNewV;
+    if (AllWordsInNewV) {
+      for (int i = 0; i != 8; ++i) {
+        int idx = MaskVals[i];
+        if (idx < 0)
+          continue;
+        idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4;
+        if ((idx != i) && idx < 4)
+          pshufhw = false;
+        if ((idx != i) && idx > 3)
+          pshuflw = false;
+      }
+      V1 = NewV;
+      V2Used = false;
+      BestLoQuad = 0;
+      BestHiQuad = 1;
+    }
+
+    // If we've eliminated the use of V2, and the new mask is a pshuflw or
+    // pshufhw, that's as cheap as it gets.  Return the new shuffle.
+    if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) {
+      unsigned Opc = pshufhw ? X86ISD::PSHUFHW : X86ISD::PSHUFLW;
+      unsigned TargetMask = 0;
+      NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV,
+                                  DAG.getUNDEF(MVT::v8i16), &MaskVals[0]);
+      TargetMask = pshufhw ? X86::getShufflePSHUFHWImmediate(NewV.getNode()):
+                             X86::getShufflePSHUFLWImmediate(NewV.getNode());
+      V1 = NewV.getOperand(0);
+      return getTargetShuffleNode(Opc, dl, MVT::v8i16, V1, TargetMask, DAG);
+    }
+  }
+
+  // If we have SSSE3, and all words of the result are from 1 input vector,
+  // case 2 is generated, otherwise case 3 is generated.  If no SSSE3
+  // is present, fall back to case 4.
+  if (Subtarget->hasSSSE3()) {
+    SmallVector<SDValue,16> pshufbMask;
+
+    // If we have elements from both input vectors, set the high bit of the
+    // shuffle mask element to zero out elements that come from V2 in the V1
+    // mask, and elements that come from V1 in the V2 mask, so that the two
+    // results can be OR'd together.
+    bool TwoInputs = V1Used && V2Used;
+    for (unsigned i = 0; i != 8; ++i) {
+      int EltIdx = MaskVals[i] * 2;
+      if (TwoInputs && (EltIdx >= 16)) {
+        pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+        continue;
+      }
+      pshufbMask.push_back(DAG.getConstant(EltIdx,   MVT::i8));
+      pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8));
+    }
+    V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V1);
+    V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
+                     DAG.getNode(ISD::BUILD_VECTOR, dl,
+                                 MVT::v16i8, &pshufbMask[0], 16));
+    if (!TwoInputs)
+      return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+
+    // Calculate the shuffle mask for the second input, shuffle it, and
+    // OR it with the first shuffled input.
+    pshufbMask.clear();
+    for (unsigned i = 0; i != 8; ++i) {
+      int EltIdx = MaskVals[i] * 2;
+      if (EltIdx < 16) {
+        pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+        pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+        continue;
+      }
+      pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
+      pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8));
+    }
+    V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, V2);
+    V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
+                     DAG.getNode(ISD::BUILD_VECTOR, dl,
+                                 MVT::v16i8, &pshufbMask[0], 16));
+    V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
+    return DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+  }
+
+  // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order,
+  // and update MaskVals with new element order.
+  BitVector InOrder(8);
+  if (BestLoQuad >= 0) {
+    SmallVector<int, 8> MaskV;
+    for (int i = 0; i != 4; ++i) {
+      int idx = MaskVals[i];
+      if (idx < 0) {
+        MaskV.push_back(-1);
+        InOrder.set(i);
+      } else if ((idx / 4) == BestLoQuad) {
+        MaskV.push_back(idx & 3);
+        InOrder.set(i);
+      } else {
+        MaskV.push_back(-1);
+      }
+    }
+    for (unsigned i = 4; i != 8; ++i)
+      MaskV.push_back(i);
+    NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
+                                &MaskV[0]);
+
+    if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+      NewV = getTargetShuffleNode(X86ISD::PSHUFLW, dl, MVT::v8i16,
+                               NewV.getOperand(0),
+                               X86::getShufflePSHUFLWImmediate(NewV.getNode()),
+                               DAG);
+  }
+
+  // If BestHi >= 0, generate a pshufhw to put the high elements in order,
+  // and update MaskVals with the new element order.
+  if (BestHiQuad >= 0) {
+    SmallVector<int, 8> MaskV;
+    for (unsigned i = 0; i != 4; ++i)
+      MaskV.push_back(i);
+    for (unsigned i = 4; i != 8; ++i) {
+      int idx = MaskVals[i];
+      if (idx < 0) {
+        MaskV.push_back(-1);
+        InOrder.set(i);
+      } else if ((idx / 4) == BestHiQuad) {
+        MaskV.push_back((idx & 3) + 4);
+        InOrder.set(i);
+      } else {
+        MaskV.push_back(-1);
+      }
+    }
+    NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16),
+                                &MaskV[0]);
+
+    if (NewV.getOpcode() == ISD::VECTOR_SHUFFLE && Subtarget->hasSSSE3())
+      NewV = getTargetShuffleNode(X86ISD::PSHUFHW, dl, MVT::v8i16,
+                              NewV.getOperand(0),
+                              X86::getShufflePSHUFHWImmediate(NewV.getNode()),
+                              DAG);
+  }
+
+  // In case BestHi & BestLo were both -1, which means each quadword has a word
+  // from each of the four input quadwords, calculate the InOrder bitvector now
+  // before falling through to the insert/extract cleanup.
+  if (BestLoQuad == -1 && BestHiQuad == -1) {
+    NewV = V1;
+    for (int i = 0; i != 8; ++i)
+      if (MaskVals[i] < 0 || MaskVals[i] == i)
+        InOrder.set(i);
+  }
+
+  // The other elements are put in the right place using pextrw and pinsrw.
+  for (unsigned i = 0; i != 8; ++i) {
+    if (InOrder[i])
+      continue;
+    int EltIdx = MaskVals[i];
+    if (EltIdx < 0)
+      continue;
+    SDValue ExtOp = (EltIdx < 8)
+    ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1,
+                  DAG.getIntPtrConstant(EltIdx))
+    : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2,
+                  DAG.getIntPtrConstant(EltIdx - 8));
+    NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, ExtOp,
+                       DAG.getIntPtrConstant(i));
+  }
+  return NewV;
+}
+
+// v16i8 shuffles - Prefer shuffles in the following order:
+// 1. [ssse3] 1 x pshufb
+// 2. [ssse3] 2 x pshufb + 1 x por
+// 3. [all]   v8i16 shuffle + N x pextrw + rotate + pinsrw
+static
+SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
+                                 SelectionDAG &DAG,
+                                 const X86TargetLowering &TLI) {
+  SDValue V1 = SVOp->getOperand(0);
+  SDValue V2 = SVOp->getOperand(1);
+  DebugLoc dl = SVOp->getDebugLoc();
+  SmallVector<int, 16> MaskVals;
+  SVOp->getMask(MaskVals);
+
+  // If we have SSSE3, case 1 is generated when all result bytes come from
+  // one of  the inputs.  Otherwise, case 2 is generated.  If no SSSE3 is
+  // present, fall back to case 3.
+  // FIXME: kill V2Only once shuffles are canonizalized by getNode.
+  bool V1Only = true;
+  bool V2Only = true;
+  for (unsigned i = 0; i < 16; ++i) {
+    int EltIdx = MaskVals[i];
+    if (EltIdx < 0)
+      continue;
+    if (EltIdx < 16)
+      V2Only = false;
+    else
+      V1Only = false;
+  }
+
+  // If SSSE3, use 1 pshufb instruction per vector with elements in the result.
+  if (TLI.getSubtarget()->hasSSSE3()) {
+    SmallVector<SDValue,16> pshufbMask;
+
+    // If all result elements are from one input vector, then only translate
+    // undef mask values to 0x80 (zero out result) in the pshufb mask.
+    //
+    // Otherwise, we have elements from both input vectors, and must zero out
+    // elements that come from V2 in the first mask, and V1 in the second mask
+    // so that we can OR them together.
+    bool TwoInputs = !(V1Only || V2Only);
+    for (unsigned i = 0; i != 16; ++i) {
+      int EltIdx = MaskVals[i];
+      if (EltIdx < 0 || (TwoInputs && EltIdx >= 16)) {
+        pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+        continue;
+      }
+      pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8));
+    }
+    // If all the elements are from V2, assign it to V1 and return after
+    // building the first pshufb.
+    if (V2Only)
+      V1 = V2;
+    V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1,
+                     DAG.getNode(ISD::BUILD_VECTOR, dl,
+                                 MVT::v16i8, &pshufbMask[0], 16));
+    if (!TwoInputs)
+      return V1;
+
+    // Calculate the shuffle mask for the second input, shuffle it, and
+    // OR it with the first shuffled input.
+    pshufbMask.clear();
+    for (unsigned i = 0; i != 16; ++i) {
+      int EltIdx = MaskVals[i];
+      if (EltIdx < 16) {
+        pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8));
+        continue;
+      }
+      pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8));
+    }
+    V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2,
+                     DAG.getNode(ISD::BUILD_VECTOR, dl,
+                                 MVT::v16i8, &pshufbMask[0], 16));
+    return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2);
+  }
+
+  // No SSSE3 - Calculate in place words and then fix all out of place words
+  // With 0-16 extracts & inserts.  Worst case is 16 bytes out of order from
+  // the 16 different words that comprise the two doublequadword input vectors.
+  V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
+  V2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
+  SDValue NewV = V2Only ? V2 : V1;
+  for (int i = 0; i != 8; ++i) {
+    int Elt0 = MaskVals[i*2];
+    int Elt1 = MaskVals[i*2+1];
+
+    // This word of the result is all undef, skip it.
+    if (Elt0 < 0 && Elt1 < 0)
+      continue;
+
+    // This word of the result is already in the correct place, skip it.
+    if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1))
+      continue;
+    if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17))
+      continue;
+
+    SDValue Elt0Src = Elt0 < 16 ? V1 : V2;
+    SDValue Elt1Src = Elt1 < 16 ? V1 : V2;
+    SDValue InsElt;
+
+    // If Elt0 and Elt1 are defined, are consecutive, and can be load
+    // using a single extract together, load it and store it.
+    if ((Elt0 >= 0) && ((Elt0 + 1) == Elt1) && ((Elt0 & 1) == 0)) {
+      InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
+                           DAG.getIntPtrConstant(Elt1 / 2));
+      NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
+                        DAG.getIntPtrConstant(i));
+      continue;
+    }
+
+    // If Elt1 is defined, extract it from the appropriate source.  If the
+    // source byte is not also odd, shift the extracted word left 8 bits
+    // otherwise clear the bottom 8 bits if we need to do an or.
+    if (Elt1 >= 0) {
+      InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src,
+                           DAG.getIntPtrConstant(Elt1 / 2));
+      if ((Elt1 & 1) == 0)
+        InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt,
+                             DAG.getConstant(8,
+                                  TLI.getShiftAmountTy(InsElt.getValueType())));
+      else if (Elt0 >= 0)
+        InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt,
+                             DAG.getConstant(0xFF00, MVT::i16));
+    }
+    // If Elt0 is defined, extract it from the appropriate source.  If the
+    // source byte is not also even, shift the extracted word right 8 bits. If
+    // Elt1 was also defined, OR the extracted values together before
+    // inserting them in the result.
+    if (Elt0 >= 0) {
+      SDValue InsElt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16,
+                                    Elt0Src, DAG.getIntPtrConstant(Elt0 / 2));
+      if ((Elt0 & 1) != 0)
+        InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0,
+                              DAG.getConstant(8,
+                                 TLI.getShiftAmountTy(InsElt0.getValueType())));
+      else if (Elt1 >= 0)
+        InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0,
+                             DAG.getConstant(0x00FF, MVT::i16));
+      InsElt = Elt1 >= 0 ? DAG.getNode(ISD::OR, dl, MVT::i16, InsElt, InsElt0)
+                         : InsElt0;
+    }
+    NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt,
+                       DAG.getIntPtrConstant(i));
+  }
+  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, NewV);
+}
+
+/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
+/// ones, or rewriting v4i32 / v4f32 as 2 wide ones if possible. This can be
+/// done when every pair / quad of shuffle mask elements point to elements in
+/// the right sequence. e.g.
+/// vector_shuffle X, Y, <2, 3, | 10, 11, | 0, 1, | 14, 15>
+static
+SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
+                                 SelectionDAG &DAG, DebugLoc dl) {
+  EVT VT = SVOp->getValueType(0);
+  SDValue V1 = SVOp->getOperand(0);
+  SDValue V2 = SVOp->getOperand(1);
+  unsigned NumElems = VT.getVectorNumElements();
+  unsigned NewWidth = (NumElems == 4) ? 2 : 4;
+  EVT NewVT;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: assert(false && "Unexpected!");
+  case MVT::v4f32: NewVT = MVT::v2f64; break;
+  case MVT::v4i32: NewVT = MVT::v2i64; break;
+  case MVT::v8i16: NewVT = MVT::v4i32; break;
+  case MVT::v16i8: NewVT = MVT::v4i32; break;
+  }
+
+  int Scale = NumElems / NewWidth;
+  SmallVector<int, 8> MaskVec;
+  for (unsigned i = 0; i < NumElems; i += Scale) {
+    int StartIdx = -1;
+    for (int j = 0; j < Scale; ++j) {
+      int EltIdx = SVOp->getMaskElt(i+j);
+      if (EltIdx < 0)
+        continue;
+      if (StartIdx == -1)
+        StartIdx = EltIdx - (EltIdx % Scale);
+      if (EltIdx != StartIdx + j)
+        return SDValue();
+    }
+    if (StartIdx == -1)
+      MaskVec.push_back(-1);
+    else
+      MaskVec.push_back(StartIdx / Scale);
+  }
+
+  V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1);
+  V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2);
+  return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]);
+}
+
+/// getVZextMovL - Return a zero-extending vector move low node.
+///
+static SDValue getVZextMovL(EVT VT, EVT OpVT,
+                            SDValue SrcOp, SelectionDAG &DAG,
+                            const X86Subtarget *Subtarget, DebugLoc dl) {
+  if (VT == MVT::v2f64 || VT == MVT::v4f32) {
+    LoadSDNode *LD = NULL;
+    if (!isScalarLoadToVector(SrcOp.getNode(), &LD))
+      LD = dyn_cast<LoadSDNode>(SrcOp);
+    if (!LD) {
+      // movssrr and movsdrr do not clear top bits. Try to use movd, movq
+      // instead.
+      MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32;
+      if ((ExtVT != MVT::i64 || Subtarget->is64Bit()) &&
+          SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+          SrcOp.getOperand(0).getOpcode() == ISD::BITCAST &&
+          SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) {
+        // PR2108
+        OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32;
+        return DAG.getNode(ISD::BITCAST, dl, VT,
+                           DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
+                                       DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+                                                   OpVT,
+                                                   SrcOp.getOperand(0)
+                                                          .getOperand(0))));
+      }
+    }
+  }
+
+  return DAG.getNode(ISD::BITCAST, dl, VT,
+                     DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT,
+                                 DAG.getNode(ISD::BITCAST, dl,
+                                             OpVT, SrcOp)));
+}
+
+/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of
+/// shuffles.
+static SDValue
+LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
+  SDValue V1 = SVOp->getOperand(0);
+  SDValue V2 = SVOp->getOperand(1);
+  DebugLoc dl = SVOp->getDebugLoc();
+  EVT VT = SVOp->getValueType(0);
+
+  SmallVector<std::pair<int, int>, 8> Locs;
+  Locs.resize(4);
+  SmallVector<int, 8> Mask1(4U, -1);
+  SmallVector<int, 8> PermMask;
+  SVOp->getMask(PermMask);
+
+  unsigned NumHi = 0;
+  unsigned NumLo = 0;
+  for (unsigned i = 0; i != 4; ++i) {
+    int Idx = PermMask[i];
+    if (Idx < 0) {
+      Locs[i] = std::make_pair(-1, -1);
+    } else {
+      assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!");
+      if (Idx < 4) {
+        Locs[i] = std::make_pair(0, NumLo);
+        Mask1[NumLo] = Idx;
+        NumLo++;
+      } else {
+        Locs[i] = std::make_pair(1, NumHi);
+        if (2+NumHi < 4)
+          Mask1[2+NumHi] = Idx;
+        NumHi++;
+      }
+    }
+  }
+
+  if (NumLo <= 2 && NumHi <= 2) {
+    // If no more than two elements come from either vector. This can be
+    // implemented with two shuffles. First shuffle gather the elements.
+    // The second shuffle, which takes the first shuffle as both of its
+    // vector operands, put the elements into the right order.
+    V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+
+    SmallVector<int, 8> Mask2(4U, -1);
+
+    for (unsigned i = 0; i != 4; ++i) {
+      if (Locs[i].first == -1)
+        continue;
+      else {
+        unsigned Idx = (i < 2) ? 0 : 4;
+        Idx += Locs[i].first * 2 + Locs[i].second;
+        Mask2[i] = Idx;
+      }
+    }
+
+    return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]);
+  } else if (NumLo == 3 || NumHi == 3) {
+    // Otherwise, we must have three elements from one vector, call it X, and
+    // one element from the other, call it Y.  First, use a shufps to build an
+    // intermediate vector with the one element from Y and the element from X
+    // that will be in the same half in the final destination (the indexes don't
+    // matter). Then, use a shufps to build the final vector, taking the half
+    // containing the element from Y from the intermediate, and the other half
+    // from X.
+    if (NumHi == 3) {
+      // Normalize it so the 3 elements come from V1.
+      CommuteVectorShuffleMask(PermMask, VT);
+      std::swap(V1, V2);
+    }
+
+    // Find the element from V2.
+    unsigned HiIndex;
+    for (HiIndex = 0; HiIndex < 3; ++HiIndex) {
+      int Val = PermMask[HiIndex];
+      if (Val < 0)
+        continue;
+      if (Val >= 4)
+        break;
+    }
+
+    Mask1[0] = PermMask[HiIndex];
+    Mask1[1] = -1;
+    Mask1[2] = PermMask[HiIndex^1];
+    Mask1[3] = -1;
+    V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+
+    if (HiIndex >= 2) {
+      Mask1[0] = PermMask[0];
+      Mask1[1] = PermMask[1];
+      Mask1[2] = HiIndex & 1 ? 6 : 4;
+      Mask1[3] = HiIndex & 1 ? 4 : 6;
+      return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]);
+    } else {
+      Mask1[0] = HiIndex & 1 ? 2 : 0;
+      Mask1[1] = HiIndex & 1 ? 0 : 2;
+      Mask1[2] = PermMask[2];
+      Mask1[3] = PermMask[3];
+      if (Mask1[2] >= 0)
+        Mask1[2] += 4;
+      if (Mask1[3] >= 0)
+        Mask1[3] += 4;
+      return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]);
+    }
+  }
+
+  // Break it into (shuffle shuffle_hi, shuffle_lo).
+  Locs.clear();
+  Locs.resize(4);
+  SmallVector<int,8> LoMask(4U, -1);
+  SmallVector<int,8> HiMask(4U, -1);
+
+  SmallVector<int,8> *MaskPtr = &LoMask;
+  unsigned MaskIdx = 0;
+  unsigned LoIdx = 0;
+  unsigned HiIdx = 2;
+  for (unsigned i = 0; i != 4; ++i) {
+    if (i == 2) {
+      MaskPtr = &HiMask;
+      MaskIdx = 1;
+      LoIdx = 0;
+      HiIdx = 2;
+    }
+    int Idx = PermMask[i];
+    if (Idx < 0) {
+      Locs[i] = std::make_pair(-1, -1);
+    } else if (Idx < 4) {
+      Locs[i] = std::make_pair(MaskIdx, LoIdx);
+      (*MaskPtr)[LoIdx] = Idx;
+      LoIdx++;
+    } else {
+      Locs[i] = std::make_pair(MaskIdx, HiIdx);
+      (*MaskPtr)[HiIdx] = Idx;
+      HiIdx++;
+    }
+  }
+
+  SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]);
+  SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]);
+  SmallVector<int, 8> MaskOps;
+  for (unsigned i = 0; i != 4; ++i) {
+    if (Locs[i].first == -1) {
+      MaskOps.push_back(-1);
+    } else {
+      unsigned Idx = Locs[i].first * 4 + Locs[i].second;
+      MaskOps.push_back(Idx);
+    }
+  }
+  return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]);
+}
+
+static bool MayFoldVectorLoad(SDValue V) {
+  if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+    V = V.getOperand(0);
+  if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+    V = V.getOperand(0);
+  if (MayFoldLoad(V))
+    return true;
+  return false;
+}
+
+// FIXME: the version above should always be used. Since there's
+// a bug where several vector shuffles can't be folded because the
+// DAG is not updated during lowering and a node claims to have two
+// uses while it only has one, use this version, and let isel match
+// another instruction if the load really happens to have more than
+// one use. Remove this version after this bug get fixed.
+// rdar://8434668, PR8156
+static bool RelaxedMayFoldVectorLoad(SDValue V) {
+  if (V.hasOneUse() && V.getOpcode() == ISD::BITCAST)
+    V = V.getOperand(0);
+  if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+    V = V.getOperand(0);
+  if (ISD::isNormalLoad(V.getNode()))
+    return true;
+  return false;
+}
+
+/// CanFoldShuffleIntoVExtract - Check if the current shuffle is used by
+/// a vector extract, and if both can be later optimized into a single load.
+/// This is done in visitEXTRACT_VECTOR_ELT and the conditions are checked
+/// here because otherwise a target specific shuffle node is going to be
+/// emitted for this shuffle, and the optimization not done.
+/// FIXME: This is probably not the best approach, but fix the problem
+/// until the right path is decided.
+static
+bool CanXFormVExtractWithShuffleIntoLoad(SDValue V, SelectionDAG &DAG,
+                                         const TargetLowering &TLI) {
+  EVT VT = V.getValueType();
+  ShuffleVectorSDNode *SVOp = dyn_cast<ShuffleVectorSDNode>(V);
+
+  // Be sure that the vector shuffle is present in a pattern like this:
+  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), c) -> (f32 load $addr)
+  if (!V.hasOneUse())
+    return false;
+
+  SDNode *N = *V.getNode()->use_begin();
+  if (N->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+    return false;
+
+  SDValue EltNo = N->getOperand(1);
+  if (!isa<ConstantSDNode>(EltNo))
+    return false;
+
+  // If the bit convert changed the number of elements, it is unsafe
+  // to examine the mask.
+  bool HasShuffleIntoBitcast = false;
+  if (V.getOpcode() == ISD::BITCAST) {
+    EVT SrcVT = V.getOperand(0).getValueType();
+    if (SrcVT.getVectorNumElements() != VT.getVectorNumElements())
+      return false;
+    V = V.getOperand(0);
+    HasShuffleIntoBitcast = true;
+  }
+
+  // Select the input vector, guarding against out of range extract vector.
+  unsigned NumElems = VT.getVectorNumElements();
+  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+  int Idx = (Elt > NumElems) ? -1 : SVOp->getMaskElt(Elt);
+  V = (Idx < (int)NumElems) ? V.getOperand(0) : V.getOperand(1);
+
+  // Skip one more bit_convert if necessary
+  if (V.getOpcode() == ISD::BITCAST)
+    V = V.getOperand(0);
+
+  if (ISD::isNormalLoad(V.getNode())) {
+    // Is the original load suitable?
+    LoadSDNode *LN0 = cast<LoadSDNode>(V);
+
+    // FIXME: avoid the multi-use bug that is preventing lots of
+    // of foldings to be detected, this is still wrong of course, but
+    // give the temporary desired behavior, and if it happens that
+    // the load has real more uses, during isel it will not fold, and
+    // will generate poor code.
+    if (!LN0 || LN0->isVolatile()) // || !LN0->hasOneUse()
+      return false;
+
+    if (!HasShuffleIntoBitcast)
+      return true;
+
+    // If there's a bitcast before the shuffle, check if the load type and
+    // alignment is valid.
+    unsigned Align = LN0->getAlignment();
+    unsigned NewAlign =
+      TLI.getTargetData()->getABITypeAlignment(
+                                    VT.getTypeForEVT(*DAG.getContext()));
+
+    if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VT))
+      return false;
+  }
+
+  return true;
+}
+
+static
+SDValue getMOVDDup(SDValue &Op, DebugLoc &dl, SDValue V1, SelectionDAG &DAG) {
+  EVT VT = Op.getValueType();
+
+  // Canonizalize to v2f64.
+  V1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
+  return DAG.getNode(ISD::BITCAST, dl, VT,
+                     getTargetShuffleNode(X86ISD::MOVDDUP, dl, MVT::v2f64,
+                                          V1, DAG));
+}
+
+static
+SDValue getMOVLowToHigh(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG,
+                        bool HasSSE2) {
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+
+  assert(VT != MVT::v2i64 && "unsupported shuffle type");
+
+  if (HasSSE2 && VT == MVT::v2f64)
+    return getTargetShuffleNode(X86ISD::MOVLHPD, dl, VT, V1, V2, DAG);
+
+  // v4f32 or v4i32
+  return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V2, DAG);
+}
+
+static
+SDValue getMOVHighToLow(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG) {
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+
+  assert((VT == MVT::v4i32 || VT == MVT::v4f32) &&
+         "unsupported shuffle type");
+
+  if (V2.getOpcode() == ISD::UNDEF)
+    V2 = V1;
+
+  // v4i32 or v4f32
+  return getTargetShuffleNode(X86ISD::MOVHLPS, dl, VT, V1, V2, DAG);
+}
+
+static
+SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+  unsigned NumElems = VT.getVectorNumElements();
+
+  // Use MOVLPS and MOVLPD in case V1 or V2 are loads. During isel, the second
+  // operand of these instructions is only memory, so check if there's a
+  // potencial load folding here, otherwise use SHUFPS or MOVSD to match the
+  // same masks.
+  bool CanFoldLoad = false;
+
+  // Trivial case, when V2 comes from a load.
+  if (MayFoldVectorLoad(V2))
+    CanFoldLoad = true;
+
+  // When V1 is a load, it can be folded later into a store in isel, example:
+  //  (store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), addr:$src1)
+  //    turns into:
+  //  (MOVLPSmr addr:$src1, VR128:$src2)
+  // So, recognize this potential and also use MOVLPS or MOVLPD
+  if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op))
+    CanFoldLoad = true;
+
+  // Both of them can't be memory operations though.
+  if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2))
+    CanFoldLoad = false;
+
+  if (CanFoldLoad) {
+    if (HasSSE2 && NumElems == 2)
+      return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG);
+
+    if (NumElems == 4)
+      return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG);
+  }
+
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  // movl and movlp will both match v2i64, but v2i64 is never matched by
+  // movl earlier because we make it strict to avoid messing with the movlp load
+  // folding logic (see the code above getMOVLP call). Match it here then,
+  // this is horrible, but will stay like this until we move all shuffle
+  // matching to x86 specific nodes. Note that for the 1st condition all
+  // types are matched with movsd.
+  if ((HasSSE2 && NumElems == 2) || !X86::isMOVLMask(SVOp))
+    return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+  else if (HasSSE2)
+    return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+
+
+  assert(VT != MVT::v4i32 && "unsupported shuffle type");
+
+  // Invert the operand order and use SHUFPS to match it.
+  return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V2, V1,
+                              X86::getShuffleSHUFImmediate(SVOp), DAG);
+}
+
+static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) {
+  switch(VT.getSimpleVT().SimpleTy) {
+  case MVT::v4i32: return X86ISD::PUNPCKLDQ;
+  case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
+  case MVT::v4f32:
+    return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS;
+  case MVT::v2f64:
+    return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+  case MVT::v8f32: return X86ISD::VUNPCKLPSY;
+  case MVT::v4f64: return X86ISD::VUNPCKLPDY;
+  case MVT::v16i8: return X86ISD::PUNPCKLBW;
+  case MVT::v8i16: return X86ISD::PUNPCKLWD;
+  default:
+    llvm_unreachable("Unknown type for unpckl");
+  }
+  return 0;
+}
+
+static inline unsigned getUNPCKHOpcode(EVT VT) {
+  switch(VT.getSimpleVT().SimpleTy) {
+  case MVT::v4i32: return X86ISD::PUNPCKHDQ;
+  case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
+  case MVT::v4f32: return X86ISD::UNPCKHPS;
+  case MVT::v2f64: return X86ISD::UNPCKHPD;
+  case MVT::v16i8: return X86ISD::PUNPCKHBW;
+  case MVT::v8i16: return X86ISD::PUNPCKHWD;
+  default:
+    llvm_unreachable("Unknown type for unpckh");
+  }
+  return 0;
+}
+
+static
+SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG,
+                               const TargetLowering &TLI,
+                               const X86Subtarget *Subtarget) {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+
+  if (isZeroShuffle(SVOp))
+    return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl);
+
+  // Handle splat operations
+  if (SVOp->isSplat()) {
+    // Special case, this is the only place now where it's
+    // allowed to return a vector_shuffle operation without
+    // using a target specific node, because *hopefully* it
+    // will be optimized away by the dag combiner.
+    if (VT.getVectorNumElements() <= 4 &&
+        CanXFormVExtractWithShuffleIntoLoad(Op, DAG, TLI))
+      return Op;
+
+    // Handle splats by matching through known masks
+    if (VT.getVectorNumElements() <= 4)
+      return SDValue();
+
+    // Canonicalize all of the remaining to v4f32.
+    return PromoteSplat(SVOp, DAG);
+  }
+
+  // If the shuffle can be profitably rewritten as a narrower shuffle, then
+  // do it!
+  if (VT == MVT::v8i16 || VT == MVT::v16i8) {
+    SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+    if (NewOp.getNode())
+      return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
+  } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
+    // FIXME: Figure out a cleaner way to do this.
+    // Try to make use of movq to zero out the top part.
+    if (ISD::isBuildVectorAllZeros(V2.getNode())) {
+      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+      if (NewOp.getNode()) {
+        if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false))
+          return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0),
+                              DAG, Subtarget, dl);
+      }
+    } else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
+      SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, dl);
+      if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)))
+        return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1),
+                            DAG, Subtarget, dl);
+    }
+  }
+  return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
+  SDValue V1 = Op.getOperand(0);
+  SDValue V2 = Op.getOperand(1);
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned NumElems = VT.getVectorNumElements();
+  bool isMMX = VT.getSizeInBits() == 64;
+  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
+  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
+  bool V1IsSplat = false;
+  bool V2IsSplat = false;
+  bool HasSSE2 = Subtarget->hasSSE2() || Subtarget->hasAVX();
+  bool HasSSE3 = Subtarget->hasSSE3() || Subtarget->hasAVX();
+  bool HasSSSE3 = Subtarget->hasSSSE3() || Subtarget->hasAVX();
+  MachineFunction &MF = DAG.getMachineFunction();
+  bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+
+  // Shuffle operations on MMX not supported.
+  if (isMMX)
+    return Op;
+
+  // Vector shuffle lowering takes 3 steps:
+  //
+  // 1) Normalize the input vectors. Here splats, zeroed vectors, profitable
+  //    narrowing and commutation of operands should be handled.
+  // 2) Matching of shuffles with known shuffle masks to x86 target specific
+  //    shuffle nodes.
+  // 3) Rewriting of unmatched masks into new generic shuffle operations,
+  //    so the shuffle can be broken into other shuffles and the legalizer can
+  //    try the lowering again.
+  //
+  // The general ideia is that no vector_shuffle operation should be left to
+  // be matched during isel, all of them must be converted to a target specific
+  // node here.
+
+  // Normalize the input vectors. Here splats, zeroed vectors, profitable
+  // narrowing and commutation of operands should be handled. The actual code
+  // doesn't include all of those, work in progress...
+  SDValue NewOp = NormalizeVectorShuffle(Op, DAG, *this, Subtarget);
+  if (NewOp.getNode())
+    return NewOp;
+
+  // NOTE: isPSHUFDMask can also match both masks below (unpckl_undef and
+  // unpckh_undef). Only use pshufd if speed is more important than size.
+  if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
+    if (VT != MVT::v2i64 && VT != MVT::v2f64)
+      return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG);
+  if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
+    if (VT != MVT::v2i64 && VT != MVT::v2f64)
+      return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+  if (X86::isMOVDDUPMask(SVOp) && HasSSE3 && V2IsUndef &&
+      RelaxedMayFoldVectorLoad(V1))
+    return getMOVDDup(Op, dl, V1, DAG);
+
+  if (X86::isMOVHLPS_v_undef_Mask(SVOp))
+    return getMOVHighToLow(Op, dl, DAG);
+
+  // Use to match splats
+  if (HasSSE2 && X86::isUNPCKHMask(SVOp) && V2IsUndef &&
+      (VT == MVT::v2f64 || VT == MVT::v2i64))
+    return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+  if (X86::isPSHUFDMask(SVOp)) {
+    // The actual implementation will match the mask in the if above and then
+    // during isel it can match several different instructions, not only pshufd
+    // as its name says, sad but true, emulate the behavior for now...
+    if (X86::isMOVDDUPMask(SVOp) && ((VT == MVT::v4f32 || VT == MVT::v2i64)))
+        return getTargetShuffleNode(X86ISD::MOVLHPS, dl, VT, V1, V1, DAG);
+
+    unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+
+    if (HasSSE2 && (VT == MVT::v4f32 || VT == MVT::v4i32))
+      return getTargetShuffleNode(X86ISD::PSHUFD, dl, VT, V1, TargetMask, DAG);
+
+    if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+      return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V1,
+                                  TargetMask, DAG);
+
+    if (VT == MVT::v4f32)
+      return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V1,
+                                  TargetMask, DAG);
+  }
+
+  // Check if this can be converted into a logical shift.
+  bool isLeft = false;
+  unsigned ShAmt = 0;
+  SDValue ShVal;
+  bool isShift = getSubtarget()->hasSSE2() &&
+    isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+  if (isShift && ShVal.hasOneUse()) {
+    // If the shifted value has multiple uses, it may be cheaper to use
+    // v_set0 + movlhps or movhlps, etc.
+    EVT EltVT = VT.getVectorElementType();
+    ShAmt *= EltVT.getSizeInBits();
+    return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
+  }
+
+  if (X86::isMOVLMask(SVOp)) {
+    if (V1IsUndef)
+      return V2;
+    if (ISD::isBuildVectorAllZeros(V1.getNode()))
+      return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl);
+    if (!X86::isMOVLPMask(SVOp)) {
+      if (HasSSE2 && (VT == MVT::v2i64 || VT == MVT::v2f64))
+        return getTargetShuffleNode(X86ISD::MOVSD, dl, VT, V1, V2, DAG);
+
+      if (VT == MVT::v4i32 || VT == MVT::v4f32)
+        return getTargetShuffleNode(X86ISD::MOVSS, dl, VT, V1, V2, DAG);
+    }
+  }
+
+  // FIXME: fold these into legal mask.
+  if (X86::isMOVLHPSMask(SVOp) && !X86::isUNPCKLMask(SVOp))
+    return getMOVLowToHigh(Op, dl, DAG, HasSSE2);
+
+  if (X86::isMOVHLPSMask(SVOp))
+    return getMOVHighToLow(Op, dl, DAG);
+
+  if (X86::isMOVSHDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+    return getTargetShuffleNode(X86ISD::MOVSHDUP, dl, VT, V1, DAG);
+
+  if (X86::isMOVSLDUPMask(SVOp) && HasSSE3 && V2IsUndef && NumElems == 4)
+    return getTargetShuffleNode(X86ISD::MOVSLDUP, dl, VT, V1, DAG);
+
+  if (X86::isMOVLPMask(SVOp))
+    return getMOVLP(Op, dl, DAG, HasSSE2);
+
+  if (ShouldXformToMOVHLPS(SVOp) ||
+      ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp))
+    return CommuteVectorShuffle(SVOp, DAG);
+
+  if (isShift) {
+    // No better options. Use a vshl / vsrl.
+    EVT EltVT = VT.getVectorElementType();
+    ShAmt *= EltVT.getSizeInBits();
+    return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl);
+  }
+
+  bool Commuted = false;
+  // FIXME: This should also accept a bitcast of a splat?  Be careful, not
+  // 1,1,1,1 -> v8i16 though.
+  V1IsSplat = isSplatVector(V1.getNode());
+  V2IsSplat = isSplatVector(V2.getNode());
+
+  // Canonicalize the splat or undef, if present, to be on the RHS.
+  if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
+    Op = CommuteVectorShuffle(SVOp, DAG);
+    SVOp = cast<ShuffleVectorSDNode>(Op);
+    V1 = SVOp->getOperand(0);
+    V2 = SVOp->getOperand(1);
+    std::swap(V1IsSplat, V2IsSplat);
+    std::swap(V1IsUndef, V2IsUndef);
+    Commuted = true;
+  }
+
+  if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) {
+    // Shuffling low element of v1 into undef, just return v1.
+    if (V2IsUndef)
+      return V1;
+    // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which
+    // the instruction selector will not match, so get a canonical MOVL with
+    // swapped operands to undo the commute.
+    return getMOVL(DAG, dl, VT, V2, V1);
+  }
+
+  if (X86::isUNPCKLMask(SVOp))
+    return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+                                dl, VT, V1, V2, DAG);
+
+  if (X86::isUNPCKHMask(SVOp))
+    return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
+
+  if (V2IsSplat) {
+    // Normalize mask so all entries that point to V2 points to its first
+    // element then try to match unpck{h|l} again. If match, return a
+    // new vector_shuffle with the corrected mask.
+    SDValue NewMask = NormalizeMask(SVOp, DAG);
+    ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask);
+    if (NSVOp != SVOp) {
+      if (X86::isUNPCKLMask(NSVOp, true)) {
+        return NewMask;
+      } else if (X86::isUNPCKHMask(NSVOp, true)) {
+        return NewMask;
+      }
+    }
+  }
+
+  if (Commuted) {
+    // Commute is back and try unpck* again.
+    // FIXME: this seems wrong.
+    SDValue NewOp = CommuteVectorShuffle(SVOp, DAG);
+    ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
+
+    if (X86::isUNPCKLMask(NewSVOp))
+      return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+                                  dl, VT, V2, V1, DAG);
+
+    if (X86::isUNPCKHMask(NewSVOp))
+      return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
+  }
+
+  // Normalize the node to match x86 shuffle ops if needed
+  if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp))
+    return CommuteVectorShuffle(SVOp, DAG);
+
+  // The checks below are all present in isShuffleMaskLegal, but they are
+  // inlined here right now to enable us to directly emit target specific
+  // nodes, and remove one by one until they don't return Op anymore.
+  SmallVector<int, 16> M;
+  SVOp->getMask(M);
+
+  if (isPALIGNRMask(M, VT, HasSSSE3))
+    return getTargetShuffleNode(X86ISD::PALIGN, dl, VT, V1, V2,
+                                X86::getShufflePALIGNRImmediate(SVOp),
+                                DAG);
+
+  if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
+      SVOp->getSplatIndex() == 0 && V2IsUndef) {
+    if (VT == MVT::v2f64) {
+      X86ISD::NodeType Opcode =
+        getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+      return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG);
+    }
+    if (VT == MVT::v2i64)
+      return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
+  }
+
+  if (isPSHUFHWMask(M, VT))
+    return getTargetShuffleNode(X86ISD::PSHUFHW, dl, VT, V1,
+                                X86::getShufflePSHUFHWImmediate(SVOp),
+                                DAG);
+
+  if (isPSHUFLWMask(M, VT))
+    return getTargetShuffleNode(X86ISD::PSHUFLW, dl, VT, V1,
+                                X86::getShufflePSHUFLWImmediate(SVOp),
+                                DAG);
+
+  if (isSHUFPMask(M, VT)) {
+    unsigned TargetMask = X86::getShuffleSHUFImmediate(SVOp);
+    if (VT == MVT::v4f32 || VT == MVT::v4i32)
+      return getTargetShuffleNode(X86ISD::SHUFPS, dl, VT, V1, V2,
+                                  TargetMask, DAG);
+    if (VT == MVT::v2f64 || VT == MVT::v2i64)
+      return getTargetShuffleNode(X86ISD::SHUFPD, dl, VT, V1, V2,
+                                  TargetMask, DAG);
+  }
+
+  if (X86::isUNPCKL_v_undef_Mask(SVOp))
+    if (VT != MVT::v2i64 && VT != MVT::v2f64)
+      return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+                                  dl, VT, V1, V1, DAG);
+  if (X86::isUNPCKH_v_undef_Mask(SVOp))
+    if (VT != MVT::v2i64 && VT != MVT::v2f64)
+      return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
+
+  // Handle v8i16 specifically since SSE can do byte extraction and insertion.
+  if (VT == MVT::v8i16) {
+    SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(Op, DAG);
+    if (NewOp.getNode())
+      return NewOp;
+  }
+
+  if (VT == MVT::v16i8) {
+    SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this);
+    if (NewOp.getNode())
+      return NewOp;
+  }
+
+  // Handle all 4 wide cases with a number of shuffles.
+  if (NumElems == 4)
+    return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG);
+
+  return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  if (VT.getSizeInBits() == 8) {
+    SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32,
+                                    Op.getOperand(0), Op.getOperand(1));
+    SDValue Assert  = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
+                                    DAG.getValueType(VT));
+    return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+  } else if (VT.getSizeInBits() == 16) {
+    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    // If Idx is 0, it's cheaper to do a move instead of a pextrw.
+    if (Idx == 0)
+      return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
+                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+                                     DAG.getNode(ISD::BITCAST, dl,
+                                                 MVT::v4i32,
+                                                 Op.getOperand(0)),
+                                     Op.getOperand(1)));
+    SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
+                                    Op.getOperand(0), Op.getOperand(1));
+    SDValue Assert  = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
+                                    DAG.getValueType(VT));
+    return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+  } else if (VT == MVT::f32) {
+    // EXTRACTPS outputs to a GPR32 register which will require a movd to copy
+    // the result back to FR32 register. It's only worth matching if the
+    // result has a single use which is a store or a bitcast to i32.  And in
+    // the case of a store, it's not worth it if the index is a constant 0,
+    // because a MOVSSmr can be used instead, which is smaller and faster.
+    if (!Op.hasOneUse())
+      return SDValue();
+    SDNode *User = *Op.getNode()->use_begin();
+    if ((User->getOpcode() != ISD::STORE ||
+         (isa<ConstantSDNode>(Op.getOperand(1)) &&
+          cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
+        (User->getOpcode() != ISD::BITCAST ||
+         User->getValueType(0) != MVT::i32))
+      return SDValue();
+    SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+                                  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32,
+                                              Op.getOperand(0)),
+                                              Op.getOperand(1));
+    return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract);
+  } else if (VT == MVT::i32) {
+    // ExtractPS works with constant index.
+    if (isa<ConstantSDNode>(Op.getOperand(1)))
+      return Op;
+  }
+  return SDValue();
+}
+
+
+SDValue
+X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  if (!isa<ConstantSDNode>(Op.getOperand(1)))
+    return SDValue();
+
+  SDValue Vec = Op.getOperand(0);
+  EVT VecVT = Vec.getValueType();
+
+  // If this is a 256-bit vector result, first extract the 128-bit
+  // vector and then extract from the 128-bit vector.
+  if (VecVT.getSizeInBits() > 128) {
+    DebugLoc dl = Op.getNode()->getDebugLoc();
+    unsigned NumElems = VecVT.getVectorNumElements();
+    SDValue Idx = Op.getOperand(1);
+
+    if (!isa<ConstantSDNode>(Idx))
+      return SDValue();
+
+    unsigned ExtractNumElems = NumElems / (VecVT.getSizeInBits() / 128);
+    unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+
+    // Get the 128-bit vector.
+    bool Upper = IdxVal >= ExtractNumElems;
+    Vec = Extract128BitVector(Vec, Idx, DAG, dl);
+
+    // Extract from it.
+    SDValue ScaledIdx = Idx;
+    if (Upper)
+      ScaledIdx = DAG.getNode(ISD::SUB, dl, Idx.getValueType(), Idx,
+                              DAG.getConstant(ExtractNumElems,
+                                              Idx.getValueType()));
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
+                       ScaledIdx);
+  }
+
+  assert(Vec.getValueSizeInBits() <= 128 && "Unexpected vector length");
+
+  if (Subtarget->hasSSE41()) {
+    SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG);
+    if (Res.getNode())
+      return Res;
+  }
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  // TODO: handle v16i8.
+  if (VT.getSizeInBits() == 16) {
+    SDValue Vec = Op.getOperand(0);
+    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    if (Idx == 0)
+      return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
+                         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
+                                     DAG.getNode(ISD::BITCAST, dl,
+                                                 MVT::v4i32, Vec),
+                                     Op.getOperand(1)));
+    // Transform it so it match pextrw which produces a 32-bit result.
+    EVT EltVT = MVT::i32;
+    SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT,
+                                    Op.getOperand(0), Op.getOperand(1));
+    SDValue Assert  = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
+                                    DAG.getValueType(VT));
+    return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
+  } else if (VT.getSizeInBits() == 32) {
+    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    if (Idx == 0)
+      return Op;
+
+    // SHUFPS the element to the lowest double word, then movss.
+    int Mask[4] = { Idx, -1, -1, -1 };
+    EVT VVT = Op.getOperand(0).getValueType();
+    SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
+                                       DAG.getUNDEF(VVT), Mask);
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
+                       DAG.getIntPtrConstant(0));
+  } else if (VT.getSizeInBits() == 64) {
+    // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b
+    // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught
+    //        to match extract_elt for f64.
+    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    if (Idx == 0)
+      return Op;
+
+    // UNPCKHPD the element to the lowest double word, then movsd.
+    // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
+    // to a f64mem, the whole operation is folded into a single MOVHPDmr.
+    int Mask[2] = { 1, -1 };
+    EVT VVT = Op.getOperand(0).getValueType();
+    SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0),
+                                       DAG.getUNDEF(VVT), Mask);
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec,
+                       DAG.getIntPtrConstant(0));
+  }
+
+  return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc dl = Op.getDebugLoc();
+
+  SDValue N0 = Op.getOperand(0);
+  SDValue N1 = Op.getOperand(1);
+  SDValue N2 = Op.getOperand(2);
+
+  if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
+      isa<ConstantSDNode>(N2)) {
+    unsigned Opc;
+    if (VT == MVT::v8i16)
+      Opc = X86ISD::PINSRW;
+    else if (VT == MVT::v16i8)
+      Opc = X86ISD::PINSRB;
+    else
+      Opc = X86ISD::PINSRB;
+
+    // Transform it so it match pinsr{b,w} which expects a GR32 as its second
+    // argument.
+    if (N1.getValueType() != MVT::i32)
+      N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+    if (N2.getValueType() != MVT::i32)
+      N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
+    return DAG.getNode(Opc, dl, VT, N0, N1, N2);
+  } else if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) {
+    // Bits [7:6] of the constant are the source select.  This will always be
+    //  zero here.  The DAG Combiner may combine an extract_elt index into these
+    //  bits.  For example (insert (extract, 3), 2) could be matched by putting
+    //  the '3' into bits [7:6] of X86ISD::INSERTPS.
+    // Bits [5:4] of the constant are the destination select.  This is the
+    //  value of the incoming immediate.
+    // Bits [3:0] of the constant are the zero mask.  The DAG Combiner may
+    //   combine either bitwise AND or insert of float 0.0 to set these bits.
+    N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4);
+    // Create this as a scalar to vector..
+    N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1);
+    return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2);
+  } else if (EltVT == MVT::i32 && isa<ConstantSDNode>(N2)) {
+    // PINSR* works with constant index.
+    return Op;
+  }
+  return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT.getVectorElementType();
+
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue N0 = Op.getOperand(0);
+  SDValue N1 = Op.getOperand(1);
+  SDValue N2 = Op.getOperand(2);
+
+  // If this is a 256-bit vector result, first insert into a 128-bit
+  // vector and then insert into the 256-bit vector.
+  if (VT.getSizeInBits() > 128) {
+    if (!isa<ConstantSDNode>(N2))
+      return SDValue();
+
+    // Get the 128-bit vector.
+    unsigned NumElems = VT.getVectorNumElements();
+    unsigned IdxVal = cast<ConstantSDNode>(N2)->getZExtValue();
+    bool Upper = IdxVal >= NumElems / 2;
+
+    SDValue SubN0 = Extract128BitVector(N0, N2, DAG, dl);
+
+    // Insert into it.
+    SDValue ScaledN2 = N2;
+    if (Upper)
+      ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2,
+                             DAG.getConstant(NumElems /
+                                             (VT.getSizeInBits() / 128),
+                                             N2.getValueType()));
+    Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0,
+                     N1, ScaledN2);
+
+    // Insert the 128-bit vector
+    // FIXME: Why UNDEF?
+    return Insert128BitVector(N0, Op, N2, DAG, dl);
+  }
+
+  if (Subtarget->hasSSE41())
+    return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG);
+
+  if (EltVT == MVT::i8)
+    return SDValue();
+
+  if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) {
+    // Transform it so it match pinsrw which expects a 16-bit value in a GR32
+    // as its second argument.
+    if (N1.getValueType() != MVT::i32)
+      N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
+    if (N2.getValueType() != MVT::i32)
+      N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
+    return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
+  }
+  return SDValue();
+}
+
+SDValue
+X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
+  LLVMContext *Context = DAG.getContext();
+  DebugLoc dl = Op.getDebugLoc();
+  EVT OpVT = Op.getValueType();
+
+  // If this is a 256-bit vector result, first insert into a 128-bit
+  // vector and then insert into the 256-bit vector.
+  if (OpVT.getSizeInBits() > 128) {
+    // Insert into a 128-bit vector.
+    EVT VT128 = EVT::getVectorVT(*Context,
+                                 OpVT.getVectorElementType(),
+                                 OpVT.getVectorNumElements() / 2);
+
+    Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0));
+
+    // Insert the 128-bit vector.
+    return Insert128BitVector(DAG.getNode(ISD::UNDEF, dl, OpVT), Op,
+                              DAG.getConstant(0, MVT::i32),
+                              DAG, dl);
+  }
+
+  if (Op.getValueType() == MVT::v1i64 &&
+      Op.getOperand(0).getValueType() == MVT::i64)
+    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
+
+  SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
+  assert(Op.getValueType().getSimpleVT().getSizeInBits() == 128 &&
+         "Expected an SSE type!");
+  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(),
+                     DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,AnyExt));
+}
+
+// Lower a node with an EXTRACT_SUBVECTOR opcode.  This may result in
+// a simple subregister reference or explicit instructions to grab
+// upper bits of a vector.
+SDValue
+X86TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+  if (Subtarget->hasAVX()) {
+    DebugLoc dl = Op.getNode()->getDebugLoc();
+    SDValue Vec = Op.getNode()->getOperand(0);
+    SDValue Idx = Op.getNode()->getOperand(1);
+
+    if (Op.getNode()->getValueType(0).getSizeInBits() == 128
+        && Vec.getNode()->getValueType(0).getSizeInBits() == 256) {
+        return Extract128BitVector(Vec, Idx, DAG, dl);
+    }
+  }
+  return SDValue();
+}
+
+// Lower a node with an INSERT_SUBVECTOR opcode.  This may result in a
+// simple superregister reference or explicit instructions to insert
+// the upper bits of a vector.
+SDValue
+X86TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const {
+  if (Subtarget->hasAVX()) {
+    DebugLoc dl = Op.getNode()->getDebugLoc();
+    SDValue Vec = Op.getNode()->getOperand(0);
+    SDValue SubVec = Op.getNode()->getOperand(1);
+    SDValue Idx = Op.getNode()->getOperand(2);
+
+    if (Op.getNode()->getValueType(0).getSizeInBits() == 256
+        && SubVec.getNode()->getValueType(0).getSizeInBits() == 128) {
+      return Insert128BitVector(Vec, SubVec, Idx, DAG, dl);
+    }
+  }
+  return SDValue();
+}
+
+// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
+// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
+// one of the above mentioned nodes. It has to be wrapped because otherwise
+// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
+// be used to form addressing mode. These wrapped nodes will be selected
+// into MOV32ri.
+SDValue
+X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+
+  // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+  // global base reg.
+  unsigned char OpFlag = 0;
+  unsigned WrapperKind = X86ISD::Wrapper;
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+
+  if (Subtarget->isPICStyleRIPRel() &&
+      (M == CodeModel::Small || M == CodeModel::Kernel))
+    WrapperKind = X86ISD::WrapperRIP;
+  else if (Subtarget->isPICStyleGOT())
+    OpFlag = X86II::MO_GOTOFF;
+  else if (Subtarget->isPICStyleStubPIC())
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
+  SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
+                                             CP->getAlignment(),
+                                             CP->getOffset(), OpFlag);
+  DebugLoc DL = CP->getDebugLoc();
+  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+  // With PIC, the address is actually $g + Offset.
+  if (OpFlag) {
+    Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                         DAG.getNode(X86ISD::GlobalBaseReg,
+                                     DebugLoc(), getPointerTy()),
+                         Result);
+  }
+
+  return Result;
+}
+
+SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+  // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+  // global base reg.
+  unsigned char OpFlag = 0;
+  unsigned WrapperKind = X86ISD::Wrapper;
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+
+  if (Subtarget->isPICStyleRIPRel() &&
+      (M == CodeModel::Small || M == CodeModel::Kernel))
+    WrapperKind = X86ISD::WrapperRIP;
+  else if (Subtarget->isPICStyleGOT())
+    OpFlag = X86II::MO_GOTOFF;
+  else if (Subtarget->isPICStyleStubPIC())
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
+  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
+                                          OpFlag);
+  DebugLoc DL = JT->getDebugLoc();
+  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+  // With PIC, the address is actually $g + Offset.
+  if (OpFlag)
+    Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                         DAG.getNode(X86ISD::GlobalBaseReg,
+                                     DebugLoc(), getPointerTy()),
+                         Result);
+
+  return Result;
+}
+
+SDValue
+X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
+  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+
+  // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+  // global base reg.
+  unsigned char OpFlag = 0;
+  unsigned WrapperKind = X86ISD::Wrapper;
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+
+  if (Subtarget->isPICStyleRIPRel() &&
+      (M == CodeModel::Small || M == CodeModel::Kernel))
+    WrapperKind = X86ISD::WrapperRIP;
+  else if (Subtarget->isPICStyleGOT())
+    OpFlag = X86II::MO_GOTOFF;
+  else if (Subtarget->isPICStyleStubPIC())
+    OpFlag = X86II::MO_PIC_BASE_OFFSET;
+
+  SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
+
+  DebugLoc DL = Op.getDebugLoc();
+  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+
+  // With PIC, the address is actually $g + Offset.
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
+      !Subtarget->is64Bit()) {
+    Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                         DAG.getNode(X86ISD::GlobalBaseReg,
+                                     DebugLoc(), getPointerTy()),
+                         Result);
+  }
+
+  return Result;
+}
+
+SDValue
+X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
+  // Create the TargetBlockAddressAddress node.
+  unsigned char OpFlags =
+    Subtarget->ClassifyBlockAddressReference();
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Result = DAG.getBlockAddress(BA, getPointerTy(),
+                                       /*isTarget=*/true, OpFlags);
+
+  if (Subtarget->isPICStyleRIPRel() &&
+      (M == CodeModel::Small || M == CodeModel::Kernel))
+    Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+  else
+    Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+
+  // With PIC, the address is actually $g + Offset.
+  if (isGlobalRelativeToPICBase(OpFlags)) {
+    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                         DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
+                         Result);
+  }
+
+  return Result;
+}
+
+SDValue
+X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
+                                      int64_t Offset,
+                                      SelectionDAG &DAG) const {
+  // Create the TargetGlobalAddress node, folding in the constant
+  // offset if it is legal.
+  unsigned char OpFlags =
+    Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+  SDValue Result;
+  if (OpFlags == X86II::MO_NO_FLAG &&
+      X86::isOffsetSuitableForCodeModel(Offset, M)) {
+    // A direct static reference to a global.
+    Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
+    Offset = 0;
+  } else {
+    Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
+  }
+
+  if (Subtarget->isPICStyleRIPRel() &&
+      (M == CodeModel::Small || M == CodeModel::Kernel))
+    Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+  else
+    Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+
+  // With PIC, the address is actually $g + Offset.
+  if (isGlobalRelativeToPICBase(OpFlags)) {
+    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                         DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()),
+                         Result);
+  }
+
+  // For globals that require a load from a stub to get the address, emit the
+  // load.
+  if (isGlobalStubReference(OpFlags))
+    Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result,
+                         MachinePointerInfo::getGOT(), false, false, 0);
+
+  // If there was a non-zero offset that we didn't fold, create an explicit
+  // addition for it.
+  if (Offset != 0)
+    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result,
+                         DAG.getConstant(Offset, getPointerTy()));
+
+  return Result;
+}
+
+SDValue
+X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+  return LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
+}
+
+static SDValue
+GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
+           SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
+           unsigned char OperandFlags) {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  DebugLoc dl = GA->getDebugLoc();
+  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+                                           GA->getValueType(0),
+                                           GA->getOffset(),
+                                           OperandFlags);
+  if (InFlag) {
+    SDValue Ops[] = { Chain,  TGA, *InFlag };
+    Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3);
+  } else {
+    SDValue Ops[]  = { Chain, TGA };
+    Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2);
+  }
+
+  // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+  MFI->setAdjustsStack(true);
+
+  SDValue Flag = Chain.getValue(1);
+  return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
+static SDValue
+LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+                                const EVT PtrVT) {
+  SDValue InFlag;
+  DebugLoc dl = GA->getDebugLoc();  // ? function entry point might be better
+  SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
+                                     DAG.getNode(X86ISD::GlobalBaseReg,
+                                                 DebugLoc(), PtrVT), InFlag);
+  InFlag = Chain.getValue(1);
+
+  return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
+}
+
+// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
+static SDValue
+LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+                                const EVT PtrVT) {
+  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
+                    X86::RAX, X86II::MO_TLSGD);
+}
+
+// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
+// "local exec" model.
+static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+                                   const EVT PtrVT, TLSModel::Model model,
+                                   bool is64Bit) {
+  DebugLoc dl = GA->getDebugLoc();
+
+  // Get the Thread Pointer, which is %gs:0 (32-bit) or %fs:0 (64-bit).
+  Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(),
+                                                         is64Bit ? 257 : 256));
+
+  SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
+                                      DAG.getIntPtrConstant(0),
+                                      MachinePointerInfo(Ptr), false, false, 0);
+
+  unsigned char OperandFlags = 0;
+  // Most TLS accesses are not RIP relative, even on x86-64.  One exception is
+  // initialexec.
+  unsigned WrapperKind = X86ISD::Wrapper;
+  if (model == TLSModel::LocalExec) {
+    OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF;
+  } else if (is64Bit) {
+    assert(model == TLSModel::InitialExec);
+    OperandFlags = X86II::MO_GOTTPOFF;
+    WrapperKind = X86ISD::WrapperRIP;
+  } else {
+    assert(model == TLSModel::InitialExec);
+    OperandFlags = X86II::MO_INDNTPOFF;
+  }
+
+  // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
+  // exec)
+  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+                                           GA->getValueType(0),
+                                           GA->getOffset(), OperandFlags);
+  SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
+
+  if (model == TLSModel::InitialExec)
+    Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
+                         MachinePointerInfo::getGOT(), false, false, 0);
+
+  // The address of the thread local variable is the add of the thread
+  // pointer with the offset of the variable.
+  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+}
+
+SDValue
+X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+
+  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
+  const GlobalValue *GV = GA->getGlobal();
+
+  if (Subtarget->isTargetELF()) {
+    // TODO: implement the "local dynamic" model
+    // TODO: implement the "initial exec"model for pic executables
+
+    // If GV is an alias then use the aliasee for determining
+    // thread-localness.
+    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+      GV = GA->resolveAliasedGlobal(false);
+
+    TLSModel::Model model
+      = getTLSModel(GV, getTargetMachine().getRelocationModel());
+
+    switch (model) {
+      case TLSModel::GeneralDynamic:
+      case TLSModel::LocalDynamic: // not implemented
+        if (Subtarget->is64Bit())
+          return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
+        return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+
+      case TLSModel::InitialExec:
+      case TLSModel::LocalExec:
+        return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
+                                   Subtarget->is64Bit());
+    }
+  } else if (Subtarget->isTargetDarwin()) {
+    // Darwin only has one model of TLS.  Lower to that.
+    unsigned char OpFlag = 0;
+    unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
+                           X86ISD::WrapperRIP : X86ISD::Wrapper;
+
+    // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+    // global base reg.
+    bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) &&
+                  !Subtarget->is64Bit();
+    if (PIC32)
+      OpFlag = X86II::MO_TLVP_PIC_BASE;
+    else
+      OpFlag = X86II::MO_TLVP;
+    DebugLoc DL = Op.getDebugLoc();
+    SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
+                                                GA->getValueType(0),
+                                                GA->getOffset(), OpFlag);
+    SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+    // With PIC32, the address is actually $g + Offset.
+    if (PIC32)
+      Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                           DAG.getNode(X86ISD::GlobalBaseReg,
+                                       DebugLoc(), getPointerTy()),
+                           Offset);
+
+    // Lowering the machine isd will make sure everything is in the right
+    // location.
+    SDValue Chain = DAG.getEntryNode();
+    SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+    SDValue Args[] = { Chain, Offset };
+    Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args, 2);
+
+    // TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
+    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    MFI->setAdjustsStack(true);
+
+    // And our return value (tls address) is in the standard call return value
+    // location.
+    unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+    return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
+  }
+
+  assert(false &&
+         "TLS not implemented for this target.");
+
+  llvm_unreachable("Unreachable");
+  return SDValue();
+}
+
+
+/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
+/// take a 2 x i32 value to shift plus a shift amount.
+SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getNumOperands() == 3 && "Not a double-shift!");
+  EVT VT = Op.getValueType();
+  unsigned VTBits = VT.getSizeInBits();
+  DebugLoc dl = Op.getDebugLoc();
+  bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
+  SDValue ShOpLo = Op.getOperand(0);
+  SDValue ShOpHi = Op.getOperand(1);
+  SDValue ShAmt  = Op.getOperand(2);
+  SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
+                                     DAG.getConstant(VTBits - 1, MVT::i8))
+                       : DAG.getConstant(0, VT);
+
+  SDValue Tmp2, Tmp3;
+  if (Op.getOpcode() == ISD::SHL_PARTS) {
+    Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt);
+    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+  } else {
+    Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt);
+    Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, ShAmt);
+  }
+
+  SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
+                                DAG.getConstant(VTBits, MVT::i8));
+  SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
+                             AndNode, DAG.getConstant(0, MVT::i8));
+
+  SDValue Hi, Lo;
+  SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+  SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond };
+  SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond };
+
+  if (Op.getOpcode() == ISD::SHL_PARTS) {
+    Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
+    Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
+  } else {
+    Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4);
+    Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4);
+  }
+
+  SDValue Ops[2] = { Lo, Hi };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  EVT SrcVT = Op.getOperand(0).getValueType();
+
+  if (SrcVT.isVector())
+    return SDValue();
+
+  assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 &&
+         "Unknown SINT_TO_FP to lower!");
+
+  // These are really Legal; return the operand so the caller accepts it as
+  // Legal.
+  if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType()))
+    return Op;
+  if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
+      Subtarget->is64Bit()) {
+    return Op;
+  }
+
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned Size = SrcVT.getSizeInBits()/8;
+  MachineFunction &MF = DAG.getMachineFunction();
+  int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
+  SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+                               StackSlot,
+                               MachinePointerInfo::getFixedStack(SSFI),
+                               false, false, 0);
+  return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
+}
+
+SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
+                                     SDValue StackSlot,
+                                     SelectionDAG &DAG) const {
+  // Build the FILD
+  DebugLoc DL = Op.getDebugLoc();
+  SDVTList Tys;
+  bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType());
+  if (useSSE)
+    Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Glue);
+  else
+    Tys = DAG.getVTList(Op.getValueType(), MVT::Other);
+
+  unsigned ByteSize = SrcVT.getSizeInBits()/8;
+
+  int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+  MachineMemOperand *MMO =
+    DAG.getMachineFunction()
+    .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                          MachineMemOperand::MOLoad, ByteSize, ByteSize);
+
+  SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) };
+  SDValue Result = DAG.getMemIntrinsicNode(useSSE ? X86ISD::FILD_FLAG :
+                                           X86ISD::FILD, DL,
+                                           Tys, Ops, array_lengthof(Ops),
+                                           SrcVT, MMO);
+
+  if (useSSE) {
+    Chain = Result.getValue(1);
+    SDValue InFlag = Result.getValue(2);
+
+    // FIXME: Currently the FST is flagged to the FILD_FLAG. This
+    // shouldn't be necessary except that RFP cannot be live across
+    // multiple blocks. When stackifier is fixed, they can be uncoupled.
+    MachineFunction &MF = DAG.getMachineFunction();
+    unsigned SSFISize = Op.getValueType().getSizeInBits()/8;
+    int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false);
+    SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+    Tys = DAG.getVTList(MVT::Other);
+    SDValue Ops[] = {
+      Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag
+    };
+    MachineMemOperand *MMO =
+      DAG.getMachineFunction()
+      .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                            MachineMemOperand::MOStore, SSFISize, SSFISize);
+
+    Chain = DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys,
+                                    Ops, array_lengthof(Ops),
+                                    Op.getValueType(), MMO);
+    Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot,
+                         MachinePointerInfo::getFixedStack(SSFI),
+                         false, false, 0);
+  }
+
+  return Result;
+}
+
+// LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion.
+SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  // This algorithm is not obvious. Here it is in C code, more or less:
+  /*
+    double uint64_to_double( uint32_t hi, uint32_t lo ) {
+      static const __m128i exp = { 0x4330000045300000ULL, 0 };
+      static const __m128d bias = { 0x1.0p84, 0x1.0p52 };
+
+      // Copy ints to xmm registers.
+      __m128i xh = _mm_cvtsi32_si128( hi );
+      __m128i xl = _mm_cvtsi32_si128( lo );
+
+      // Combine into low half of a single xmm register.
+      __m128i x = _mm_unpacklo_epi32( xh, xl );
+      __m128d d;
+      double sd;
+
+      // Merge in appropriate exponents to give the integer bits the right
+      // magnitude.
+      x = _mm_unpacklo_epi32( x, exp );
+
+      // Subtract away the biases to deal with the IEEE-754 double precision
+      // implicit 1.
+      d = _mm_sub_pd( (__m128d) x, bias );
+
+      // All conversions up to here are exact. The correctly rounded result is
+      // calculated using the current rounding mode using the following
+      // horizontal add.
+      d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) );
+      _mm_store_sd( &sd, d );   // Because we are returning doubles in XMM, this
+                                // store doesn't really need to be here (except
+                                // maybe to zero the other double)
+      return sd;
+    }
+  */
+
+  DebugLoc dl = Op.getDebugLoc();
+  LLVMContext *Context = DAG.getContext();
+
+  // Build some magic constants.
+  std::vector<Constant*> CV0;
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+  CV0.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+  Constant *C0 = ConstantVector::get(CV0);
+  SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16);
+
+  std::vector<Constant*> CV1;
+  CV1.push_back(
+    ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL))));
+  CV1.push_back(
+    ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL))));
+  Constant *C1 = ConstantVector::get(CV1);
+  SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16);
+
+  SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+                            DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                        Op.getOperand(0),
+                                        DAG.getIntPtrConstant(1)));
+  SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+                            DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                        Op.getOperand(0),
+                                        DAG.getIntPtrConstant(0)));
+  SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2);
+  SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0,
+                              MachinePointerInfo::getConstantPool(),
+                              false, false, 16);
+  SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0);
+  SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck2);
+  SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1,
+                              MachinePointerInfo::getConstantPool(),
+                              false, false, 16);
+  SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1);
+
+  // Add the halves; easiest way is to swap them into another reg first.
+  int ShufMask[2] = { 1, -1 };
+  SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub,
+                                      DAG.getUNDEF(MVT::v2f64), ShufMask);
+  SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add,
+                     DAG.getIntPtrConstant(0));
+}
+
+// LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion.
+SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  // FP constant to bias correct the final result.
+  SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL),
+                                   MVT::f64);
+
+  // Load the 32-bit value into an XMM register.
+  SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32,
+                             DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                         Op.getOperand(0),
+                                         DAG.getIntPtrConstant(0)));
+
+  Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+                     DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Load),
+                     DAG.getIntPtrConstant(0));
+
+  // Or the load with the bias.
+  SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64,
+                           DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+                                       DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+                                                   MVT::v2f64, Load)),
+                           DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+                                       DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
+                                                   MVT::v2f64, Bias)));
+  Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
+                   DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Or),
+                   DAG.getIntPtrConstant(0));
+
+  // Subtract the bias.
+  SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias);
+
+  // Handle final rounding.
+  EVT DestVT = Op.getValueType();
+
+  if (DestVT.bitsLT(MVT::f64)) {
+    return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub,
+                       DAG.getIntPtrConstant(0));
+  } else if (DestVT.bitsGT(MVT::f64)) {
+    return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub);
+  }
+
+  // Handle final rounding.
+  return Sub;
+}
+
+SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  SDValue N0 = Op.getOperand(0);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
+  // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
+  // the optimization here.
+  if (DAG.SignBitIsZero(N0))
+    return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
+
+  EVT SrcVT = N0.getValueType();
+  EVT DstVT = Op.getValueType();
+  if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
+    return LowerUINT_TO_FP_i64(Op, DAG);
+  else if (SrcVT == MVT::i32 && X86ScalarSSEf64)
+    return LowerUINT_TO_FP_i32(Op, DAG);
+
+  // Make a 64-bit buffer, and use it to build an FILD.
+  SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
+  if (SrcVT == MVT::i32) {
+    SDValue WordOff = DAG.getConstant(4, getPointerTy());
+    SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
+                                     getPointerTy(), StackSlot, WordOff);
+    SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+                                  StackSlot, MachinePointerInfo(),
+                                  false, false, 0);
+    SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
+                                  OffsetSlot, MachinePointerInfo(),
+                                  false, false, 0);
+    SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+    return Fild;
+  }
+
+  assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
+  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+                                StackSlot, MachinePointerInfo(),
+                               false, false, 0);
+  // For i64 source, we need to add the appropriate power of 2 if the input
+  // was negative.  This is the same as the optimization in
+  // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
+  // we must be careful to do the computation in x87 extended precision, not
+  // in SSE. (The generic code can't know it's OK to do this, or how to.)
+  int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+  MachineMemOperand *MMO =
+    DAG.getMachineFunction()
+    .getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                          MachineMemOperand::MOLoad, 8, 8);
+
+  SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
+  SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
+  SDValue Fild = DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, 3,
+                                         MVT::i64, MMO);
+
+  APInt FF(32, 0x5F800000ULL);
+
+  // Check whether the sign bit is set.
+  SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
+                                 Op.getOperand(0), DAG.getConstant(0, MVT::i64),
+                                 ISD::SETLT);
+
+  // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+  SDValue FudgePtr = DAG.getConstantPool(
+                             ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+                                         getPointerTy());
+
+  // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+  SDValue Zero = DAG.getIntPtrConstant(0);
+  SDValue Four = DAG.getIntPtrConstant(4);
+  SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+                               Zero, Four);
+  FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset);
+
+  // Load the value out, extending it from f32 to f80.
+  // FIXME: Avoid the extend by constructing the right constant pool?
+  SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
+                                 FudgePtr, MachinePointerInfo::getConstantPool(),
+                                 MVT::f32, false, false, 4);
+  // Extend everything to 80 bits to force it to be done on x87.
+  SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
+  return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
+}
+
+std::pair<SDValue,SDValue> X86TargetLowering::
+FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) const {
+  DebugLoc DL = Op.getDebugLoc();
+
+  EVT DstTy = Op.getValueType();
+
+  if (!IsSigned) {
+    assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT");
+    DstTy = MVT::i64;
+  }
+
+  assert(DstTy.getSimpleVT() <= MVT::i64 &&
+         DstTy.getSimpleVT() >= MVT::i16 &&
+         "Unknown FP_TO_SINT to lower!");
+
+  // These are really Legal.
+  if (DstTy == MVT::i32 &&
+      isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
+    return std::make_pair(SDValue(), SDValue());
+  if (Subtarget->is64Bit() &&
+      DstTy == MVT::i64 &&
+      isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType()))
+    return std::make_pair(SDValue(), SDValue());
+
+  // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
+  // stack slot.
+  MachineFunction &MF = DAG.getMachineFunction();
+  unsigned MemSize = DstTy.getSizeInBits()/8;
+  int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
+  SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+
+
+  unsigned Opc;
+  switch (DstTy.getSimpleVT().SimpleTy) {
+  default: llvm_unreachable("Invalid FP_TO_SINT to lower!");
+  case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
+  case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
+  case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
+  }
+
+  SDValue Chain = DAG.getEntryNode();
+  SDValue Value = Op.getOperand(0);
+  EVT TheVT = Op.getOperand(0).getValueType();
+  if (isScalarFPTypeInSSEReg(TheVT)) {
+    assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!");
+    Chain = DAG.getStore(Chain, DL, Value, StackSlot,
+                         MachinePointerInfo::getFixedStack(SSFI),
+                         false, false, 0);
+    SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other);
+    SDValue Ops[] = {
+      Chain, StackSlot, DAG.getValueType(TheVT)
+    };
+
+    MachineMemOperand *MMO =
+      MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                              MachineMemOperand::MOLoad, MemSize, MemSize);
+    Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, 3,
+                                    DstTy, MMO);
+    Chain = Value.getValue(1);
+    SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
+    StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+  }
+
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                            MachineMemOperand::MOStore, MemSize, MemSize);
+
+  // Build the FP_TO_INT*_IN_MEM
+  SDValue Ops[] = { Chain, Value, StackSlot };
+  SDValue FIST = DAG.getMemIntrinsicNode(Opc, DL, DAG.getVTList(MVT::Other),
+                                         Ops, 3, DstTy, MMO);
+
+  return std::make_pair(FIST, StackSlot);
+}
+
+SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  if (Op.getValueType().isVector())
+    return SDValue();
+
+  std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true);
+  SDValue FIST = Vals.first, StackSlot = Vals.second;
+  // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
+  if (FIST.getNode() == 0) return Op;
+
+  // Load the result.
+  return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+                     FIST, StackSlot, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, false);
+  SDValue FIST = Vals.first, StackSlot = Vals.second;
+  assert(FIST.getNode() && "Unexpected failure");
+
+  // Load the result.
+  return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(),
+                     FIST, StackSlot, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerFABS(SDValue Op,
+                                     SelectionDAG &DAG) const {
+  LLVMContext *Context = DAG.getContext();
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT;
+  if (VT.isVector())
+    EltVT = VT.getVectorElementType();
+  std::vector<Constant*> CV;
+  if (EltVT == MVT::f64) {
+    Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))));
+    CV.push_back(C);
+    CV.push_back(C);
+  } else {
+    Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))));
+    CV.push_back(C);
+    CV.push_back(C);
+    CV.push_back(C);
+    CV.push_back(C);
+  }
+  Constant *C = ConstantVector::get(CV);
+  SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+  SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                             MachinePointerInfo::getConstantPool(),
+                             false, false, 16);
+  return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask);
+}
+
+SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const {
+  LLVMContext *Context = DAG.getContext();
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  EVT EltVT = VT;
+  if (VT.isVector())
+    EltVT = VT.getVectorElementType();
+  std::vector<Constant*> CV;
+  if (EltVT == MVT::f64) {
+    Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)));
+    CV.push_back(C);
+    CV.push_back(C);
+  } else {
+    Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)));
+    CV.push_back(C);
+    CV.push_back(C);
+    CV.push_back(C);
+    CV.push_back(C);
+  }
+  Constant *C = ConstantVector::get(CV);
+  SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+  SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                             MachinePointerInfo::getConstantPool(),
+                             false, false, 16);
+  if (VT.isVector()) {
+    return DAG.getNode(ISD::BITCAST, dl, VT,
+                       DAG.getNode(ISD::XOR, dl, MVT::v2i64,
+                    DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
+                                Op.getOperand(0)),
+                    DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Mask)));
+  } else {
+    return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask);
+  }
+}
+
+SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
+  LLVMContext *Context = DAG.getContext();
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+  EVT VT = Op.getValueType();
+  EVT SrcVT = Op1.getValueType();
+
+  // If second operand is smaller, extend it first.
+  if (SrcVT.bitsLT(VT)) {
+    Op1 = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op1);
+    SrcVT = VT;
+  }
+  // And if it is bigger, shrink it first.
+  if (SrcVT.bitsGT(VT)) {
+    Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1));
+    SrcVT = VT;
+  }
+
+  // At this point the operands and the result should have the same
+  // type, and that won't be f80 since that is not custom lowered.
+
+  // First get the sign bit of second operand.
+  std::vector<Constant*> CV;
+  if (SrcVT == MVT::f64) {
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+  } else {
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+  }
+  Constant *C = ConstantVector::get(CV);
+  SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+  SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx,
+                              MachinePointerInfo::getConstantPool(),
+                              false, false, 16);
+  SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1);
+
+  // Shift sign bit right or left if the two operands have different types.
+  if (SrcVT.bitsGT(VT)) {
+    // Op0 is MVT::f32, Op1 is MVT::f64.
+    SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit);
+    SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit,
+                          DAG.getConstant(32, MVT::i32));
+    SignBit = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, SignBit);
+    SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit,
+                          DAG.getIntPtrConstant(0));
+  }
+
+  // Clear first operand sign bit.
+  CV.clear();
+  if (VT == MVT::f64) {
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0))));
+  } else {
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+    CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0))));
+  }
+  C = ConstantVector::get(CV);
+  CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+  SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                              MachinePointerInfo::getConstantPool(),
+                              false, false, 16);
+  SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2);
+
+  // Or the value with the sign bit.
+  return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit);
+}
+
+/// Emit nodes that will be selected as "test Op0,Op0", or something
+/// equivalent.
+SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
+                                    SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+
+  // CF and OF aren't always set the way we want. Determine which
+  // of these we need.
+  bool NeedCF = false;
+  bool NeedOF = false;
+  switch (X86CC) {
+  default: break;
+  case X86::COND_A: case X86::COND_AE:
+  case X86::COND_B: case X86::COND_BE:
+    NeedCF = true;
+    break;
+  case X86::COND_G: case X86::COND_GE:
+  case X86::COND_L: case X86::COND_LE:
+  case X86::COND_O: case X86::COND_NO:
+    NeedOF = true;
+    break;
+  }
+
+  // See if we can use the EFLAGS value from the operand instead of
+  // doing a separate TEST. TEST always sets OF and CF to 0, so unless
+  // we prove that the arithmetic won't overflow, we can't use OF or CF.
+  if (Op.getResNo() != 0 || NeedOF || NeedCF)
+    // Emit a CMP with 0, which is the TEST pattern.
+    return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+                       DAG.getConstant(0, Op.getValueType()));
+
+  unsigned Opcode = 0;
+  unsigned NumOperands = 0;
+  switch (Op.getNode()->getOpcode()) {
+  case ISD::ADD:
+    // Due to an isel shortcoming, be conservative if this add is likely to be
+    // selected as part of a load-modify-store instruction. When the root node
+    // in a match is a store, isel doesn't know how to remap non-chain non-flag
+    // uses of other nodes in the match, such as the ADD in this case. This
+    // leads to the ADD being left around and reselected, with the result being
+    // two adds in the output.  Alas, even if none our users are stores, that
+    // doesn't prove we're O.K.  Ergo, if we have any parents that aren't
+    // CopyToReg or SETCC, eschew INC/DEC.  A better fix seems to require
+    // climbing the DAG back to the root, and it doesn't seem to be worth the
+    // effort.
+    for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+           UE = Op.getNode()->use_end(); UI != UE; ++UI)
+      if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
+        goto default_case;
+
+    if (ConstantSDNode *C =
+        dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
+      // An add of one will be selected as an INC.
+      if (C->getAPIntValue() == 1) {
+        Opcode = X86ISD::INC;
+        NumOperands = 1;
+        break;
+      }
+
+      // An add of negative one (subtract of one) will be selected as a DEC.
+      if (C->getAPIntValue().isAllOnesValue()) {
+        Opcode = X86ISD::DEC;
+        NumOperands = 1;
+        break;
+      }
+    }
+
+    // Otherwise use a regular EFLAGS-setting add.
+    Opcode = X86ISD::ADD;
+    NumOperands = 2;
+    break;
+  case ISD::AND: {
+    // If the primary and result isn't used, don't bother using X86ISD::AND,
+    // because a TEST instruction will be better.
+    bool NonFlagUse = false;
+    for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+           UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+      SDNode *User = *UI;
+      unsigned UOpNo = UI.getOperandNo();
+      if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
+        // Look pass truncate.
+        UOpNo = User->use_begin().getOperandNo();
+        User = *User->use_begin();
+      }
+
+      if (User->getOpcode() != ISD::BRCOND &&
+          User->getOpcode() != ISD::SETCC &&
+          (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
+        NonFlagUse = true;
+        break;
+      }
+    }
+
+    if (!NonFlagUse)
+      break;
+  }
+    // FALL THROUGH
+  case ISD::SUB:
+  case ISD::OR:
+  case ISD::XOR:
+    // Due to the ISEL shortcoming noted above, be conservative if this op is
+    // likely to be selected as part of a load-modify-store instruction.
+    for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+           UE = Op.getNode()->use_end(); UI != UE; ++UI)
+      if (UI->getOpcode() == ISD::STORE)
+        goto default_case;
+
+    // Otherwise use a regular EFLAGS-setting instruction.
+    switch (Op.getNode()->getOpcode()) {
+    default: llvm_unreachable("unexpected operator!");
+    case ISD::SUB: Opcode = X86ISD::SUB; break;
+    case ISD::OR:  Opcode = X86ISD::OR;  break;
+    case ISD::XOR: Opcode = X86ISD::XOR; break;
+    case ISD::AND: Opcode = X86ISD::AND; break;
+    }
+
+    NumOperands = 2;
+    break;
+  case X86ISD::ADD:
+  case X86ISD::SUB:
+  case X86ISD::INC:
+  case X86ISD::DEC:
+  case X86ISD::OR:
+  case X86ISD::XOR:
+  case X86ISD::AND:
+    return SDValue(Op.getNode(), 1);
+  default:
+  default_case:
+    break;
+  }
+
+  if (Opcode == 0)
+    // Emit a CMP with 0, which is the TEST pattern.
+    return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+                       DAG.getConstant(0, Op.getValueType()));
+
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+  SmallVector<SDValue, 4> Ops;
+  for (unsigned i = 0; i != NumOperands; ++i)
+    Ops.push_back(Op.getOperand(i));
+
+  SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
+  DAG.ReplaceAllUsesWith(Op, New);
+  return SDValue(New.getNode(), 1);
+}
+
+/// Emit nodes that will be selected as "cmp Op0,Op1", or something
+/// equivalent.
+SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+                                   SelectionDAG &DAG) const {
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1))
+    if (C->getAPIntValue() == 0)
+      return EmitTest(Op0, X86CC, DAG);
+
+  DebugLoc dl = Op0.getDebugLoc();
+  return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
+}
+
+/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
+/// if it's possible.
+SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
+                                     DebugLoc dl, SelectionDAG &DAG) const {
+  SDValue Op0 = And.getOperand(0);
+  SDValue Op1 = And.getOperand(1);
+  if (Op0.getOpcode() == ISD::TRUNCATE)
+    Op0 = Op0.getOperand(0);
+  if (Op1.getOpcode() == ISD::TRUNCATE)
+    Op1 = Op1.getOperand(0);
+
+  SDValue LHS, RHS;
+  if (Op1.getOpcode() == ISD::SHL)
+    std::swap(Op0, Op1);
+  if (Op0.getOpcode() == ISD::SHL) {
+    if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
+      if (And00C->getZExtValue() == 1) {
+        // If we looked past a truncate, check that it's only truncating away
+        // known zeros.
+        unsigned BitWidth = Op0.getValueSizeInBits();
+        unsigned AndBitWidth = And.getValueSizeInBits();
+        if (BitWidth > AndBitWidth) {
+          APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones;
+          DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones);
+          if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth)
+            return SDValue();
+        }
+        LHS = Op1;
+        RHS = Op0.getOperand(1);
+      }
+  } else if (Op1.getOpcode() == ISD::Constant) {
+    ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
+    SDValue AndLHS = Op0;
+    if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
+      LHS = AndLHS.getOperand(0);
+      RHS = AndLHS.getOperand(1);
+    }
+  }
+
+  if (LHS.getNode()) {
+    // If LHS is i8, promote it to i32 with any_extend.  There is no i8 BT
+    // instruction.  Since the shift amount is in-range-or-undefined, we know
+    // that doing a bittest on the i32 value is ok.  We extend to i32 because
+    // the encoding for the i16 version is larger than the i32 version.
+    // Also promote i16 to i32 for performance / code size reason.
+    if (LHS.getValueType() == MVT::i8 ||
+        LHS.getValueType() == MVT::i16)
+      LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
+
+    // If the operand types disagree, extend the shift amount to match.  Since
+    // BT ignores high bits (like shifts) we can use anyextend.
+    if (LHS.getValueType() != RHS.getValueType())
+      RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
+
+    SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
+    unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+    return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                       DAG.getConstant(Cond, MVT::i8), BT);
+  }
+
+  return SDValue();
+}
+
+SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  DebugLoc dl = Op.getDebugLoc();
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+
+  // Optimize to BT if possible.
+  // Lower (X & (1 << N)) == 0 to BT(X, N).
+  // Lower ((X >>u N) & 1) != 0 to BT(X, N).
+  // Lower ((X >>s N) & 1) != 0 to BT(X, N).
+  if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
+      Op1.getOpcode() == ISD::Constant &&
+      cast<ConstantSDNode>(Op1)->isNullValue() &&
+      (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+    SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
+    if (NewSetCC.getNode())
+      return NewSetCC;
+  }
+
+  // Look for X == 0, X == 1, X != 0, or X != 1.  We can simplify some forms of
+  // these.
+  if (Op1.getOpcode() == ISD::Constant &&
+      (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+       cast<ConstantSDNode>(Op1)->isNullValue()) &&
+      (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+
+    // If the input is a setcc, then reuse the input setcc or use a new one with
+    // the inverted condition.
+    if (Op0.getOpcode() == X86ISD::SETCC) {
+      X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+      bool Invert = (CC == ISD::SETNE) ^
+        cast<ConstantSDNode>(Op1)->isNullValue();
+      if (!Invert) return Op0;
+
+      CCode = X86::GetOppositeBranchCondition(CCode);
+      return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                         DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+    }
+  }
+
+  bool isFP = Op1.getValueType().isFloatingPoint();
+  unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
+  if (X86CC == X86::COND_INVALID)
+    return SDValue();
+
+  SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, DAG);
+  return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                     DAG.getConstant(X86CC, MVT::i8), EFLAGS);
+}
+
+SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Cond;
+  SDValue Op0 = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  SDValue CC = Op.getOperand(2);
+  EVT VT = Op.getValueType();
+  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+  bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (isFP) {
+    unsigned SSECC = 8;
+    EVT VT0 = Op0.getValueType();
+    assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64);
+    unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD;
+    bool Swap = false;
+
+    switch (SetCCOpcode) {
+    default: break;
+    case ISD::SETOEQ:
+    case ISD::SETEQ:  SSECC = 0; break;
+    case ISD::SETOGT:
+    case ISD::SETGT: Swap = true; // Fallthrough
+    case ISD::SETLT:
+    case ISD::SETOLT: SSECC = 1; break;
+    case ISD::SETOGE:
+    case ISD::SETGE: Swap = true; // Fallthrough
+    case ISD::SETLE:
+    case ISD::SETOLE: SSECC = 2; break;
+    case ISD::SETUO:  SSECC = 3; break;
+    case ISD::SETUNE:
+    case ISD::SETNE:  SSECC = 4; break;
+    case ISD::SETULE: Swap = true;
+    case ISD::SETUGE: SSECC = 5; break;
+    case ISD::SETULT: Swap = true;
+    case ISD::SETUGT: SSECC = 6; break;
+    case ISD::SETO:   SSECC = 7; break;
+    }
+    if (Swap)
+      std::swap(Op0, Op1);
+
+    // In the two special cases we can't handle, emit two comparisons.
+    if (SSECC == 8) {
+      if (SetCCOpcode == ISD::SETUEQ) {
+        SDValue UNORD, EQ;
+        UNORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(3, MVT::i8));
+        EQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(0, MVT::i8));
+        return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ);
+      }
+      else if (SetCCOpcode == ISD::SETONE) {
+        SDValue ORD, NEQ;
+        ORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(7, MVT::i8));
+        NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8));
+        return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ);
+      }
+      llvm_unreachable("Illegal FP comparison");
+    }
+    // Handle all other FP comparisons here.
+    return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8));
+  }
+
+  // We are handling one of the integer comparisons here.  Since SSE only has
+  // GT and EQ comparisons for integer, swapping operands and multiple
+  // operations may be required for some comparisons.
+  unsigned Opc = 0, EQOpc = 0, GTOpc = 0;
+  bool Swap = false, Invert = false, FlipSigns = false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: break;
+  case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break;
+  case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break;
+  case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break;
+  case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break;
+  }
+
+  switch (SetCCOpcode) {
+  default: break;
+  case ISD::SETNE:  Invert = true;
+  case ISD::SETEQ:  Opc = EQOpc; break;
+  case ISD::SETLT:  Swap = true;
+  case ISD::SETGT:  Opc = GTOpc; break;
+  case ISD::SETGE:  Swap = true;
+  case ISD::SETLE:  Opc = GTOpc; Invert = true; break;
+  case ISD::SETULT: Swap = true;
+  case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break;
+  case ISD::SETUGE: Swap = true;
+  case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break;
+  }
+  if (Swap)
+    std::swap(Op0, Op1);
+
+  // Since SSE has no unsigned integer comparisons, we need to flip  the sign
+  // bits of the inputs before performing those operations.
+  if (FlipSigns) {
+    EVT EltVT = VT.getVectorElementType();
+    SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()),
+                                      EltVT);
+    std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit);
+    SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0],
+                                    SignBits.size());
+    Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec);
+    Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec);
+  }
+
+  SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
+
+  // If the logical-not of the result is required, perform that now.
+  if (Invert)
+    Result = DAG.getNOT(dl, Result, VT);
+
+  return Result;
+}
+
+// isX86LogicalCmp - Return true if opcode is a X86 logical comparison.
+static bool isX86LogicalCmp(SDValue Op) {
+  unsigned Opc = Op.getNode()->getOpcode();
+  if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI)
+    return true;
+  if (Op.getResNo() == 1 &&
+      (Opc == X86ISD::ADD ||
+       Opc == X86ISD::SUB ||
+       Opc == X86ISD::ADC ||
+       Opc == X86ISD::SBB ||
+       Opc == X86ISD::SMUL ||
+       Opc == X86ISD::UMUL ||
+       Opc == X86ISD::INC ||
+       Opc == X86ISD::DEC ||
+       Opc == X86ISD::OR ||
+       Opc == X86ISD::XOR ||
+       Opc == X86ISD::AND))
+    return true;
+
+  if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
+    return true;
+
+  return false;
+}
+
+static bool isZero(SDValue V) {
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+  return C && C->isNullValue();
+}
+
+static bool isAllOnes(SDValue V) {
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
+  return C && C->isAllOnesValue();
+}
+
+SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+  bool addTest = true;
+  SDValue Cond  = Op.getOperand(0);
+  SDValue Op1 = Op.getOperand(1);
+  SDValue Op2 = Op.getOperand(2);
+  DebugLoc DL = Op.getDebugLoc();
+  SDValue CC;
+
+  if (Cond.getOpcode() == ISD::SETCC) {
+    SDValue NewCond = LowerSETCC(Cond, DAG);
+    if (NewCond.getNode())
+      Cond = NewCond;
+  }
+
+  // (select (x == 0), -1, y) -> (sign_bit (x - 1)) | y
+  // (select (x == 0), y, -1) -> ~(sign_bit (x - 1)) | y
+  // (select (x != 0), y, -1) -> (sign_bit (x - 1)) | y
+  // (select (x != 0), -1, y) -> ~(sign_bit (x - 1)) | y
+  if (Cond.getOpcode() == X86ISD::SETCC &&
+      Cond.getOperand(1).getOpcode() == X86ISD::CMP &&
+      isZero(Cond.getOperand(1).getOperand(1))) {
+    SDValue Cmp = Cond.getOperand(1);
+
+    unsigned CondCode =cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue();
+
+    if ((isAllOnes(Op1) || isAllOnes(Op2)) &&
+        (CondCode == X86::COND_E || CondCode == X86::COND_NE)) {
+      SDValue Y = isAllOnes(Op2) ? Op1 : Op2;
+
+      SDValue CmpOp0 = Cmp.getOperand(0);
+      Cmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32,
+                        CmpOp0, DAG.getConstant(1, CmpOp0.getValueType()));
+
+      SDValue Res =   // Res = 0 or -1.
+        DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+                    DAG.getConstant(X86::COND_B, MVT::i8), Cmp);
+
+      if (isAllOnes(Op1) != (CondCode == X86::COND_E))
+        Res = DAG.getNOT(DL, Res, Res.getValueType());
+
+      ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2);
+      if (N2C == 0 || !N2C->isNullValue())
+        Res = DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y);
+      return Res;
+    }
+  }
+
+  // Look past (and (setcc_carry (cmp ...)), 1).
+  if (Cond.getOpcode() == ISD::AND &&
+      Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+    if (C && C->getAPIntValue() == 1)
+      Cond = Cond.getOperand(0);
+  }
+
+  // If condition flag is set by a X86ISD::CMP, then use it as the condition
+  // setting operand in place of the X86ISD::SETCC.
+  if (Cond.getOpcode() == X86ISD::SETCC ||
+      Cond.getOpcode() == X86ISD::SETCC_CARRY) {
+    CC = Cond.getOperand(0);
+
+    SDValue Cmp = Cond.getOperand(1);
+    unsigned Opc = Cmp.getOpcode();
+    EVT VT = Op.getValueType();
+
+    bool IllegalFPCMov = false;
+    if (VT.isFloatingPoint() && !VT.isVector() &&
+        !isScalarFPTypeInSSEReg(VT))  // FPStack?
+      IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue());
+
+    if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
+        Opc == X86ISD::BT) { // FIXME
+      Cond = Cmp;
+      addTest = false;
+    }
+  }
+
+  if (addTest) {
+    // Look pass the truncate.
+    if (Cond.getOpcode() == ISD::TRUNCATE)
+      Cond = Cond.getOperand(0);
+
+    // We know the result of AND is compared against zero. Try to match
+    // it to BT.
+    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+      SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG);
+      if (NewSetCC.getNode()) {
+        CC = NewSetCC.getOperand(0);
+        Cond = NewSetCC.getOperand(1);
+        addTest = false;
+      }
+    }
+  }
+
+  if (addTest) {
+    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+    Cond = EmitTest(Cond, X86::COND_NE, DAG);
+  }
+
+  // a <  b ? -1 :  0 -> RES = ~setcc_carry
+  // a <  b ?  0 : -1 -> RES = setcc_carry
+  // a >= b ? -1 :  0 -> RES = setcc_carry
+  // a >= b ?  0 : -1 -> RES = ~setcc_carry
+  if (Cond.getOpcode() == X86ISD::CMP) {
+    unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue();
+
+    if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) &&
+        (isAllOnes(Op1) || isAllOnes(Op2)) && (isZero(Op1) || isZero(Op2))) {
+      SDValue Res = DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(),
+                                DAG.getConstant(X86::COND_B, MVT::i8), Cond);
+      if (isAllOnes(Op1) != (CondCode == X86::COND_B))
+        return DAG.getNOT(DL, Res, Res.getValueType());
+      return Res;
+    }
+  }
+
+  // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
+  // condition is true.
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+  SDValue Ops[] = { Op2, Op1, CC, Cond };
+  return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops, array_lengthof(Ops));
+}
+
+// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or
+// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart
+// from the AND / OR.
+static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) {
+  Opc = Op.getOpcode();
+  if (Opc != ISD::OR && Opc != ISD::AND)
+    return false;
+  return (Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
+          Op.getOperand(0).hasOneUse() &&
+          Op.getOperand(1).getOpcode() == X86ISD::SETCC &&
+          Op.getOperand(1).hasOneUse());
+}
+
+// isXor1OfSetCC - Return true if node is an ISD::XOR of a X86ISD::SETCC and
+// 1 and that the SETCC node has a single use.
+static bool isXor1OfSetCC(SDValue Op) {
+  if (Op.getOpcode() != ISD::XOR)
+    return false;
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+  if (N1C && N1C->getAPIntValue() == 1) {
+    return Op.getOperand(0).getOpcode() == X86ISD::SETCC &&
+      Op.getOperand(0).hasOneUse();
+  }
+  return false;
+}
+
+SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
+  bool addTest = true;
+  SDValue Chain = Op.getOperand(0);
+  SDValue Cond  = Op.getOperand(1);
+  SDValue Dest  = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue CC;
+
+  if (Cond.getOpcode() == ISD::SETCC) {
+    SDValue NewCond = LowerSETCC(Cond, DAG);
+    if (NewCond.getNode())
+      Cond = NewCond;
+  }
+#if 0
+  // FIXME: LowerXALUO doesn't handle these!!
+  else if (Cond.getOpcode() == X86ISD::ADD  ||
+           Cond.getOpcode() == X86ISD::SUB  ||
+           Cond.getOpcode() == X86ISD::SMUL ||
+           Cond.getOpcode() == X86ISD::UMUL)
+    Cond = LowerXALUO(Cond, DAG);
+#endif
+
+  // Look pass (and (setcc_carry (cmp ...)), 1).
+  if (Cond.getOpcode() == ISD::AND &&
+      Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+    if (C && C->getAPIntValue() == 1)
+      Cond = Cond.getOperand(0);
+  }
+
+  // If condition flag is set by a X86ISD::CMP, then use it as the condition
+  // setting operand in place of the X86ISD::SETCC.
+  if (Cond.getOpcode() == X86ISD::SETCC ||
+      Cond.getOpcode() == X86ISD::SETCC_CARRY) {
+    CC = Cond.getOperand(0);
+
+    SDValue Cmp = Cond.getOperand(1);
+    unsigned Opc = Cmp.getOpcode();
+    // FIXME: WHY THE SPECIAL CASING OF LogicalCmp??
+    if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) {
+      Cond = Cmp;
+      addTest = false;
+    } else {
+      switch (cast<ConstantSDNode>(CC)->getZExtValue()) {
+      default: break;
+      case X86::COND_O:
+      case X86::COND_B:
+        // These can only come from an arithmetic instruction with overflow,
+        // e.g. SADDO, UADDO.
+        Cond = Cond.getNode()->getOperand(1);
+        addTest = false;
+        break;
+      }
+    }
+  } else {
+    unsigned CondOpc;
+    if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) {
+      SDValue Cmp = Cond.getOperand(0).getOperand(1);
+      if (CondOpc == ISD::OR) {
+        // Also, recognize the pattern generated by an FCMP_UNE. We can emit
+        // two branches instead of an explicit OR instruction with a
+        // separate test.
+        if (Cmp == Cond.getOperand(1).getOperand(1) &&
+            isX86LogicalCmp(Cmp)) {
+          CC = Cond.getOperand(0).getOperand(0);
+          Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+                              Chain, Dest, CC, Cmp);
+          CC = Cond.getOperand(1).getOperand(0);
+          Cond = Cmp;
+          addTest = false;
+        }
+      } else { // ISD::AND
+        // Also, recognize the pattern generated by an FCMP_OEQ. We can emit
+        // two branches instead of an explicit AND instruction with a
+        // separate test. However, we only do this if this block doesn't
+        // have a fall-through edge, because this requires an explicit
+        // jmp when the condition is false.
+        if (Cmp == Cond.getOperand(1).getOperand(1) &&
+            isX86LogicalCmp(Cmp) &&
+            Op.getNode()->hasOneUse()) {
+          X86::CondCode CCode =
+            (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+          CCode = X86::GetOppositeBranchCondition(CCode);
+          CC = DAG.getConstant(CCode, MVT::i8);
+          SDNode *User = *Op.getNode()->use_begin();
+          // Look for an unconditional branch following this conditional branch.
+          // We need this because we need to reverse the successors in order
+          // to implement FCMP_OEQ.
+          if (User->getOpcode() == ISD::BR) {
+            SDValue FalseBB = User->getOperand(1);
+            SDNode *NewBR =
+              DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
+            assert(NewBR == User);
+            (void)NewBR;
+            Dest = FalseBB;
+
+            Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+                                Chain, Dest, CC, Cmp);
+            X86::CondCode CCode =
+              (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0);
+            CCode = X86::GetOppositeBranchCondition(CCode);
+            CC = DAG.getConstant(CCode, MVT::i8);
+            Cond = Cmp;
+            addTest = false;
+          }
+        }
+      }
+    } else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) {
+      // Recognize for xorb (setcc), 1 patterns. The xor inverts the condition.
+      // It should be transformed during dag combiner except when the condition
+      // is set by a arithmetics with overflow node.
+      X86::CondCode CCode =
+        (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
+      CCode = X86::GetOppositeBranchCondition(CCode);
+      CC = DAG.getConstant(CCode, MVT::i8);
+      Cond = Cond.getOperand(0).getOperand(1);
+      addTest = false;
+    }
+  }
+
+  if (addTest) {
+    // Look pass the truncate.
+    if (Cond.getOpcode() == ISD::TRUNCATE)
+      Cond = Cond.getOperand(0);
+
+    // We know the result of AND is compared against zero. Try to match
+    // it to BT.
+    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+      SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG);
+      if (NewSetCC.getNode()) {
+        CC = NewSetCC.getOperand(0);
+        Cond = NewSetCC.getOperand(1);
+        addTest = false;
+      }
+    }
+  }
+
+  if (addTest) {
+    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
+    Cond = EmitTest(Cond, X86::COND_NE, DAG);
+  }
+  return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
+                     Chain, Dest, CC, Cond);
+}
+
+
+// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
+// Calls to _alloca is needed to probe the stack when allocating more than 4k
+// bytes in one go. Touching the stack at 4K increments is necessary to ensure
+// that the guard pages used by the OS virtual memory manager are allocated in
+// correct sequence.
+SDValue
+X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) &&
+         "This should be used only on Windows targets");
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Get the inputs.
+  SDValue Chain = Op.getOperand(0);
+  SDValue Size  = Op.getOperand(1);
+  // FIXME: Ensure alignment here
+
+  SDValue Flag;
+
+  EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+
+  Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
+  Flag = Chain.getValue(1);
+
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+
+  Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag);
+  Flag = Chain.getValue(1);
+
+  Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1);
+
+  SDValue Ops1[2] = { Chain.getValue(0), Chain };
+  return DAG.getMergeValues(Ops1, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  DebugLoc DL = Op.getDebugLoc();
+
+  if (!Subtarget->is64Bit() || Subtarget->isTargetWin64()) {
+    // vastart just stores the address of the VarArgsFrameIndex slot into the
+    // memory location argument.
+    SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                   getPointerTy());
+    return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
+                        MachinePointerInfo(SV), false, false, 0);
+  }
+
+  // __va_list_tag:
+  //   gp_offset         (0 - 6 * 8)
+  //   fp_offset         (48 - 48 + 8 * 16)
+  //   overflow_arg_area (point to parameters coming in memory).
+  //   reg_save_area
+  SmallVector<SDValue, 8> MemOps;
+  SDValue FIN = Op.getOperand(1);
+  // Store gp_offset
+  SDValue Store = DAG.getStore(Op.getOperand(0), DL,
+                               DAG.getConstant(FuncInfo->getVarArgsGPOffset(),
+                                               MVT::i32),
+                               FIN, MachinePointerInfo(SV), false, false, 0);
+  MemOps.push_back(Store);
+
+  // Store fp_offset
+  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                    FIN, DAG.getIntPtrConstant(4));
+  Store = DAG.getStore(Op.getOperand(0), DL,
+                       DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
+                                       MVT::i32),
+                       FIN, MachinePointerInfo(SV, 4), false, false, 0);
+  MemOps.push_back(Store);
+
+  // Store ptr to overflow_arg_area
+  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                    FIN, DAG.getIntPtrConstant(4));
+  SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
+                                    getPointerTy());
+  Store = DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN,
+                       MachinePointerInfo(SV, 8),
+                       false, false, 0);
+  MemOps.push_back(Store);
+
+  // Store ptr to reg_save_area.
+  FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                    FIN, DAG.getIntPtrConstant(8));
+  SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
+                                    getPointerTy());
+  Store = DAG.getStore(Op.getOperand(0), DL, RSFIN, FIN,
+                       MachinePointerInfo(SV, 16), false, false, 0);
+  MemOps.push_back(Store);
+  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+                     &MemOps[0], MemOps.size());
+}
+
+SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
+  assert(Subtarget->is64Bit() &&
+         "LowerVAARG only handles 64-bit va_arg!");
+  assert((Subtarget->isTargetLinux() ||
+          Subtarget->isTargetDarwin()) &&
+          "Unhandled target in LowerVAARG");
+  assert(Op.getNode()->getNumOperands() == 4);
+  SDValue Chain = Op.getOperand(0);
+  SDValue SrcPtr = Op.getOperand(1);
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  unsigned Align = Op.getConstantOperandVal(3);
+  DebugLoc dl = Op.getDebugLoc();
+
+  EVT ArgVT = Op.getNode()->getValueType(0);
+  const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+  uint32_t ArgSize = getTargetData()->getTypeAllocSize(ArgTy);
+  uint8_t ArgMode;
+
+  // Decide which area this value should be read from.
+  // TODO: Implement the AMD64 ABI in its entirety. This simple
+  // selection mechanism works only for the basic types.
+  if (ArgVT == MVT::f80) {
+    llvm_unreachable("va_arg for f80 not yet implemented");
+  } else if (ArgVT.isFloatingPoint() && ArgSize <= 16 /*bytes*/) {
+    ArgMode = 2;  // Argument passed in XMM register. Use fp_offset.
+  } else if (ArgVT.isInteger() && ArgSize <= 32 /*bytes*/) {
+    ArgMode = 1;  // Argument passed in GPR64 register(s). Use gp_offset.
+  } else {
+    llvm_unreachable("Unhandled argument type in LowerVAARG");
+  }
+
+  if (ArgMode == 2) {
+    // Sanity Check: Make sure using fp_offset makes sense.
+    assert(!UseSoftFloat &&
+           !(DAG.getMachineFunction()
+                .getFunction()->hasFnAttr(Attribute::NoImplicitFloat)) &&
+           Subtarget->hasXMM());
+  }
+
+  // Insert VAARG_64 node into the DAG
+  // VAARG_64 returns two values: Variable Argument Address, Chain
+  SmallVector<SDValue, 11> InstOps;
+  InstOps.push_back(Chain);
+  InstOps.push_back(SrcPtr);
+  InstOps.push_back(DAG.getConstant(ArgSize, MVT::i32));
+  InstOps.push_back(DAG.getConstant(ArgMode, MVT::i8));
+  InstOps.push_back(DAG.getConstant(Align, MVT::i32));
+  SDVTList VTs = DAG.getVTList(getPointerTy(), MVT::Other);
+  SDValue VAARG = DAG.getMemIntrinsicNode(X86ISD::VAARG_64, dl,
+                                          VTs, &InstOps[0], InstOps.size(),
+                                          MVT::i64,
+                                          MachinePointerInfo(SV),
+                                          /*Align=*/0,
+                                          /*Volatile=*/false,
+                                          /*ReadMem=*/true,
+                                          /*WriteMem=*/true);
+  Chain = VAARG.getValue(1);
+
+  // Load the next argument and return it
+  return DAG.getLoad(ArgVT, dl,
+                     Chain,
+                     VAARG,
+                     MachinePointerInfo(),
+                     false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
+  // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
+  assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!");
+  SDValue Chain = Op.getOperand(0);
+  SDValue DstPtr = Op.getOperand(1);
+  SDValue SrcPtr = Op.getOperand(2);
+  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
+  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+  DebugLoc DL = Op.getDebugLoc();
+
+  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr,
+                       DAG.getIntPtrConstant(24), 8, /*isVolatile*/false,
+                       false,
+                       MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV));
+}
+
+SDValue
+X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  switch (IntNo) {
+  default: return SDValue();    // Don't custom lower most intrinsics.
+  // Comparison intrinsics.
+  case Intrinsic::x86_sse_comieq_ss:
+  case Intrinsic::x86_sse_comilt_ss:
+  case Intrinsic::x86_sse_comile_ss:
+  case Intrinsic::x86_sse_comigt_ss:
+  case Intrinsic::x86_sse_comige_ss:
+  case Intrinsic::x86_sse_comineq_ss:
+  case Intrinsic::x86_sse_ucomieq_ss:
+  case Intrinsic::x86_sse_ucomilt_ss:
+  case Intrinsic::x86_sse_ucomile_ss:
+  case Intrinsic::x86_sse_ucomigt_ss:
+  case Intrinsic::x86_sse_ucomige_ss:
+  case Intrinsic::x86_sse_ucomineq_ss:
+  case Intrinsic::x86_sse2_comieq_sd:
+  case Intrinsic::x86_sse2_comilt_sd:
+  case Intrinsic::x86_sse2_comile_sd:
+  case Intrinsic::x86_sse2_comigt_sd:
+  case Intrinsic::x86_sse2_comige_sd:
+  case Intrinsic::x86_sse2_comineq_sd:
+  case Intrinsic::x86_sse2_ucomieq_sd:
+  case Intrinsic::x86_sse2_ucomilt_sd:
+  case Intrinsic::x86_sse2_ucomile_sd:
+  case Intrinsic::x86_sse2_ucomigt_sd:
+  case Intrinsic::x86_sse2_ucomige_sd:
+  case Intrinsic::x86_sse2_ucomineq_sd: {
+    unsigned Opc = 0;
+    ISD::CondCode CC = ISD::SETCC_INVALID;
+    switch (IntNo) {
+    default: break;
+    case Intrinsic::x86_sse_comieq_ss:
+    case Intrinsic::x86_sse2_comieq_sd:
+      Opc = X86ISD::COMI;
+      CC = ISD::SETEQ;
+      break;
+    case Intrinsic::x86_sse_comilt_ss:
+    case Intrinsic::x86_sse2_comilt_sd:
+      Opc = X86ISD::COMI;
+      CC = ISD::SETLT;
+      break;
+    case Intrinsic::x86_sse_comile_ss:
+    case Intrinsic::x86_sse2_comile_sd:
+      Opc = X86ISD::COMI;
+      CC = ISD::SETLE;
+      break;
+    case Intrinsic::x86_sse_comigt_ss:
+    case Intrinsic::x86_sse2_comigt_sd:
+      Opc = X86ISD::COMI;
+      CC = ISD::SETGT;
+      break;
+    case Intrinsic::x86_sse_comige_ss:
+    case Intrinsic::x86_sse2_comige_sd:
+      Opc = X86ISD::COMI;
+      CC = ISD::SETGE;
+      break;
+    case Intrinsic::x86_sse_comineq_ss:
+    case Intrinsic::x86_sse2_comineq_sd:
+      Opc = X86ISD::COMI;
+      CC = ISD::SETNE;
+      break;
+    case Intrinsic::x86_sse_ucomieq_ss:
+    case Intrinsic::x86_sse2_ucomieq_sd:
+      Opc = X86ISD::UCOMI;
+      CC = ISD::SETEQ;
+      break;
+    case Intrinsic::x86_sse_ucomilt_ss:
+    case Intrinsic::x86_sse2_ucomilt_sd:
+      Opc = X86ISD::UCOMI;
+      CC = ISD::SETLT;
+      break;
+    case Intrinsic::x86_sse_ucomile_ss:
+    case Intrinsic::x86_sse2_ucomile_sd:
+      Opc = X86ISD::UCOMI;
+      CC = ISD::SETLE;
+      break;
+    case Intrinsic::x86_sse_ucomigt_ss:
+    case Intrinsic::x86_sse2_ucomigt_sd:
+      Opc = X86ISD::UCOMI;
+      CC = ISD::SETGT;
+      break;
+    case Intrinsic::x86_sse_ucomige_ss:
+    case Intrinsic::x86_sse2_ucomige_sd:
+      Opc = X86ISD::UCOMI;
+      CC = ISD::SETGE;
+      break;
+    case Intrinsic::x86_sse_ucomineq_ss:
+    case Intrinsic::x86_sse2_ucomineq_sd:
+      Opc = X86ISD::UCOMI;
+      CC = ISD::SETNE;
+      break;
+    }
+
+    SDValue LHS = Op.getOperand(1);
+    SDValue RHS = Op.getOperand(2);
+    unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG);
+    assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!");
+    SDValue Cond = DAG.getNode(Opc, dl, MVT::i32, LHS, RHS);
+    SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                                DAG.getConstant(X86CC, MVT::i8), Cond);
+    return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+  }
+  // ptest and testp intrinsics. The intrinsic these come from are designed to
+  // return an integer value, not just an instruction so lower it to the ptest
+  // or testp pattern and a setcc for the result.
+  case Intrinsic::x86_sse41_ptestz:
+  case Intrinsic::x86_sse41_ptestc:
+  case Intrinsic::x86_sse41_ptestnzc:
+  case Intrinsic::x86_avx_ptestz_256:
+  case Intrinsic::x86_avx_ptestc_256:
+  case Intrinsic::x86_avx_ptestnzc_256:
+  case Intrinsic::x86_avx_vtestz_ps:
+  case Intrinsic::x86_avx_vtestc_ps:
+  case Intrinsic::x86_avx_vtestnzc_ps:
+  case Intrinsic::x86_avx_vtestz_pd:
+  case Intrinsic::x86_avx_vtestc_pd:
+  case Intrinsic::x86_avx_vtestnzc_pd:
+  case Intrinsic::x86_avx_vtestz_ps_256:
+  case Intrinsic::x86_avx_vtestc_ps_256:
+  case Intrinsic::x86_avx_vtestnzc_ps_256:
+  case Intrinsic::x86_avx_vtestz_pd_256:
+  case Intrinsic::x86_avx_vtestc_pd_256:
+  case Intrinsic::x86_avx_vtestnzc_pd_256: {
+    bool IsTestPacked = false;
+    unsigned X86CC = 0;
+    switch (IntNo) {
+    default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
+    case Intrinsic::x86_avx_vtestz_ps:
+    case Intrinsic::x86_avx_vtestz_pd:
+    case Intrinsic::x86_avx_vtestz_ps_256:
+    case Intrinsic::x86_avx_vtestz_pd_256:
+      IsTestPacked = true; // Fallthrough
+    case Intrinsic::x86_sse41_ptestz:
+    case Intrinsic::x86_avx_ptestz_256:
+      // ZF = 1
+      X86CC = X86::COND_E;
+      break;
+    case Intrinsic::x86_avx_vtestc_ps:
+    case Intrinsic::x86_avx_vtestc_pd:
+    case Intrinsic::x86_avx_vtestc_ps_256:
+    case Intrinsic::x86_avx_vtestc_pd_256:
+      IsTestPacked = true; // Fallthrough
+    case Intrinsic::x86_sse41_ptestc:
+    case Intrinsic::x86_avx_ptestc_256:
+      // CF = 1
+      X86CC = X86::COND_B;
+      break;
+    case Intrinsic::x86_avx_vtestnzc_ps:
+    case Intrinsic::x86_avx_vtestnzc_pd:
+    case Intrinsic::x86_avx_vtestnzc_ps_256:
+    case Intrinsic::x86_avx_vtestnzc_pd_256:
+      IsTestPacked = true; // Fallthrough
+    case Intrinsic::x86_sse41_ptestnzc:
+    case Intrinsic::x86_avx_ptestnzc_256:
+      // ZF and CF = 0
+      X86CC = X86::COND_A;
+      break;
+    }
+
+    SDValue LHS = Op.getOperand(1);
+    SDValue RHS = Op.getOperand(2);
+    unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
+    SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
+    SDValue CC = DAG.getConstant(X86CC, MVT::i8);
+    SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
+    return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
+  }
+
+  // Fix vector shift instructions where the last operand is a non-immediate
+  // i32 value.
+  case Intrinsic::x86_sse2_pslli_w:
+  case Intrinsic::x86_sse2_pslli_d:
+  case Intrinsic::x86_sse2_pslli_q:
+  case Intrinsic::x86_sse2_psrli_w:
+  case Intrinsic::x86_sse2_psrli_d:
+  case Intrinsic::x86_sse2_psrli_q:
+  case Intrinsic::x86_sse2_psrai_w:
+  case Intrinsic::x86_sse2_psrai_d:
+  case Intrinsic::x86_mmx_pslli_w:
+  case Intrinsic::x86_mmx_pslli_d:
+  case Intrinsic::x86_mmx_pslli_q:
+  case Intrinsic::x86_mmx_psrli_w:
+  case Intrinsic::x86_mmx_psrli_d:
+  case Intrinsic::x86_mmx_psrli_q:
+  case Intrinsic::x86_mmx_psrai_w:
+  case Intrinsic::x86_mmx_psrai_d: {
+    SDValue ShAmt = Op.getOperand(2);
+    if (isa<ConstantSDNode>(ShAmt))
+      return SDValue();
+
+    unsigned NewIntNo = 0;
+    EVT ShAmtVT = MVT::v4i32;
+    switch (IntNo) {
+    case Intrinsic::x86_sse2_pslli_w:
+      NewIntNo = Intrinsic::x86_sse2_psll_w;
+      break;
+    case Intrinsic::x86_sse2_pslli_d:
+      NewIntNo = Intrinsic::x86_sse2_psll_d;
+      break;
+    case Intrinsic::x86_sse2_pslli_q:
+      NewIntNo = Intrinsic::x86_sse2_psll_q;
+      break;
+    case Intrinsic::x86_sse2_psrli_w:
+      NewIntNo = Intrinsic::x86_sse2_psrl_w;
+      break;
+    case Intrinsic::x86_sse2_psrli_d:
+      NewIntNo = Intrinsic::x86_sse2_psrl_d;
+      break;
+    case Intrinsic::x86_sse2_psrli_q:
+      NewIntNo = Intrinsic::x86_sse2_psrl_q;
+      break;
+    case Intrinsic::x86_sse2_psrai_w:
+      NewIntNo = Intrinsic::x86_sse2_psra_w;
+      break;
+    case Intrinsic::x86_sse2_psrai_d:
+      NewIntNo = Intrinsic::x86_sse2_psra_d;
+      break;
+    default: {
+      ShAmtVT = MVT::v2i32;
+      switch (IntNo) {
+      case Intrinsic::x86_mmx_pslli_w:
+        NewIntNo = Intrinsic::x86_mmx_psll_w;
+        break;
+      case Intrinsic::x86_mmx_pslli_d:
+        NewIntNo = Intrinsic::x86_mmx_psll_d;
+        break;
+      case Intrinsic::x86_mmx_pslli_q:
+        NewIntNo = Intrinsic::x86_mmx_psll_q;
+        break;
+      case Intrinsic::x86_mmx_psrli_w:
+        NewIntNo = Intrinsic::x86_mmx_psrl_w;
+        break;
+      case Intrinsic::x86_mmx_psrli_d:
+        NewIntNo = Intrinsic::x86_mmx_psrl_d;
+        break;
+      case Intrinsic::x86_mmx_psrli_q:
+        NewIntNo = Intrinsic::x86_mmx_psrl_q;
+        break;
+      case Intrinsic::x86_mmx_psrai_w:
+        NewIntNo = Intrinsic::x86_mmx_psra_w;
+        break;
+      case Intrinsic::x86_mmx_psrai_d:
+        NewIntNo = Intrinsic::x86_mmx_psra_d;
+        break;
+      default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
+      }
+      break;
+    }
+    }
+
+    // The vector shift intrinsics with scalars uses 32b shift amounts but
+    // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+    // to be zero.
+    SDValue ShOps[4];
+    ShOps[0] = ShAmt;
+    ShOps[1] = DAG.getConstant(0, MVT::i32);
+    if (ShAmtVT == MVT::v4i32) {
+      ShOps[2] = DAG.getUNDEF(MVT::i32);
+      ShOps[3] = DAG.getUNDEF(MVT::i32);
+      ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4);
+    } else {
+      ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+// FIXME this must be lowered to get rid of the invalid type.
+    }
+
+    EVT VT = Op.getValueType();
+    ShAmt = DAG.getNode(ISD::BITCAST, dl, VT, ShAmt);
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(NewIntNo, MVT::i32),
+                       Op.getOperand(1), ShAmt);
+  }
+  }
+}
+
+SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
+                                           SelectionDAG &DAG) const {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setReturnAddressIsTaken(true);
+
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (Depth > 0) {
+    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+    SDValue Offset =
+      DAG.getConstant(TD->getPointerSize(),
+                      Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   FrameAddr, Offset),
+                       MachinePointerInfo(), false, false, 0);
+  }
+
+  // Just load the return address.
+  SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     RetAddrFI, MachinePointerInfo(), false, false, 0);
+}
+
+SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP;
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
+  while (Depth--)
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
+                            MachinePointerInfo(),
+                            false, false, 0);
+  return FrameAddr;
+}
+
+SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op,
+                                                     SelectionDAG &DAG) const {
+  return DAG.getIntPtrConstant(2*TD->getPointerSize());
+}
+
+SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  SDValue Chain     = Op.getOperand(0);
+  SDValue Offset    = Op.getOperand(1);
+  SDValue Handler   = Op.getOperand(2);
+  DebugLoc dl       = Op.getDebugLoc();
+
+  SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+                                     Subtarget->is64Bit() ? X86::RBP : X86::EBP,
+                                     getPointerTy());
+  unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX);
+
+  SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Frame,
+                                  DAG.getIntPtrConstant(TD->getPointerSize()));
+  StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset);
+  Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo(),
+                       false, false, 0);
+  Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr);
+  MF.getRegInfo().addLiveOut(StoreAddrReg);
+
+  return DAG.getNode(X86ISD::EH_RETURN, dl,
+                     MVT::Other,
+                     Chain, DAG.getRegister(StoreAddrReg, getPointerTy()));
+}
+
+SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  SDValue Root = Op.getOperand(0);
+  SDValue Trmp = Op.getOperand(1); // trampoline
+  SDValue FPtr = Op.getOperand(2); // nested function
+  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+  DebugLoc dl  = Op.getDebugLoc();
+
+  const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+  if (Subtarget->is64Bit()) {
+    SDValue OutChains[6];
+
+    // Large code-model.
+    const unsigned char JMP64r  = 0xFF; // 64-bit jmp through register opcode.
+    const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode.
+
+    const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10);
+    const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11);
+
+    const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix
+
+    // Load the pointer to the nested function into R11.
+    unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11
+    SDValue Addr = Trmp;
+    OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+                                Addr, MachinePointerInfo(TrmpAddr),
+                                false, false, 0);
+
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+                       DAG.getConstant(2, MVT::i64));
+    OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr,
+                                MachinePointerInfo(TrmpAddr, 2),
+                                false, false, 2);
+
+    // Load the 'nest' parameter value into R10.
+    // R10 is specified in X86CallingConv.td
+    OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+                       DAG.getConstant(10, MVT::i64));
+    OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+                                Addr, MachinePointerInfo(TrmpAddr, 10),
+                                false, false, 0);
+
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+                       DAG.getConstant(12, MVT::i64));
+    OutChains[3] = DAG.getStore(Root, dl, Nest, Addr,
+                                MachinePointerInfo(TrmpAddr, 12),
+                                false, false, 2);
+
+    // Jump to the nested function.
+    OpCode = (JMP64r << 8) | REX_WB; // jmpq *...
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+                       DAG.getConstant(20, MVT::i64));
+    OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16),
+                                Addr, MachinePointerInfo(TrmpAddr, 20),
+                                false, false, 0);
+
+    unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+                       DAG.getConstant(22, MVT::i64));
+    OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr,
+                                MachinePointerInfo(TrmpAddr, 22),
+                                false, false, 0);
+
+    SDValue Ops[] =
+      { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) };
+    return DAG.getMergeValues(Ops, 2, dl);
+  } else {
+    const Function *Func =
+      cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
+    CallingConv::ID CC = Func->getCallingConv();
+    unsigned NestReg;
+
+    switch (CC) {
+    default:
+      llvm_unreachable("Unsupported calling convention");
+    case CallingConv::C:
+    case CallingConv::X86_StdCall: {
+      // Pass 'nest' parameter in ECX.
+      // Must be kept in sync with X86CallingConv.td
+      NestReg = X86::ECX;
+
+      // Check that ECX wasn't needed by an 'inreg' parameter.
+      const FunctionType *FTy = Func->getFunctionType();
+      const AttrListPtr &Attrs = Func->getAttributes();
+
+      if (!Attrs.isEmpty() && !Func->isVarArg()) {
+        unsigned InRegCount = 0;
+        unsigned Idx = 1;
+
+        for (FunctionType::param_iterator I = FTy->param_begin(),
+             E = FTy->param_end(); I != E; ++I, ++Idx)
+          if (Attrs.paramHasAttr(Idx, Attribute::InReg))
+            // FIXME: should only count parameters that are lowered to integers.
+            InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32;
+
+        if (InRegCount > 2) {
+          report_fatal_error("Nest register in use - reduce number of inreg"
+                             " parameters!");
+        }
+      }
+      break;
+    }
+    case CallingConv::X86_FastCall:
+    case CallingConv::X86_ThisCall:
+    case CallingConv::Fast:
+      // Pass 'nest' parameter in EAX.
+      // Must be kept in sync with X86CallingConv.td
+      NestReg = X86::EAX;
+      break;
+    }
+
+    SDValue OutChains[4];
+    SDValue Addr, Disp;
+
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                       DAG.getConstant(10, MVT::i32));
+    Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr);
+
+    // This is storing the opcode for MOV32ri.
+    const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte.
+    const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg);
+    OutChains[0] = DAG.getStore(Root, dl,
+                                DAG.getConstant(MOV32ri|N86Reg, MVT::i8),
+                                Trmp, MachinePointerInfo(TrmpAddr),
+                                false, false, 0);
+
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                       DAG.getConstant(1, MVT::i32));
+    OutChains[1] = DAG.getStore(Root, dl, Nest, Addr,
+                                MachinePointerInfo(TrmpAddr, 1),
+                                false, false, 1);
+
+    const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode.
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                       DAG.getConstant(5, MVT::i32));
+    OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr,
+                                MachinePointerInfo(TrmpAddr, 5),
+                                false, false, 1);
+
+    Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                       DAG.getConstant(6, MVT::i32));
+    OutChains[3] = DAG.getStore(Root, dl, Disp, Addr,
+                                MachinePointerInfo(TrmpAddr, 6),
+                                false, false, 1);
+
+    SDValue Ops[] =
+      { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) };
+    return DAG.getMergeValues(Ops, 2, dl);
+  }
+}
+
+SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  /*
+   The rounding mode is in bits 11:10 of FPSR, and has the following
+   settings:
+     00 Round to nearest
+     01 Round to -inf
+     10 Round to +inf
+     11 Round to 0
+
+  FLT_ROUNDS, on the other hand, expects the following:
+    -1 Undefined
+     0 Round to 0
+     1 Round to nearest
+     2 Round to +inf
+     3 Round to -inf
+
+  To perform the conversion, we do:
+    (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3)
+  */
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  const TargetMachine &TM = MF.getTarget();
+  const TargetFrameLowering &TFI = *TM.getFrameLowering();
+  unsigned StackAlignment = TFI.getStackAlignment();
+  EVT VT = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+
+  // Save FP Control Word to stack slot
+  int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
+  SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
+
+
+  MachineMemOperand *MMO =
+   MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(SSFI),
+                           MachineMemOperand::MOStore, 2, 2);
+
+  SDValue Ops[] = { DAG.getEntryNode(), StackSlot };
+  SDValue Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL,
+                                          DAG.getVTList(MVT::Other),
+                                          Ops, 2, MVT::i16, MMO);
+
+  // Load FP Control Word from stack slot
+  SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot,
+                            MachinePointerInfo(), false, false, 0);
+
+  // Transform as necessary
+  SDValue CWD1 =
+    DAG.getNode(ISD::SRL, DL, MVT::i16,
+                DAG.getNode(ISD::AND, DL, MVT::i16,
+                            CWD, DAG.getConstant(0x800, MVT::i16)),
+                DAG.getConstant(11, MVT::i8));
+  SDValue CWD2 =
+    DAG.getNode(ISD::SRL, DL, MVT::i16,
+                DAG.getNode(ISD::AND, DL, MVT::i16,
+                            CWD, DAG.getConstant(0x400, MVT::i16)),
+                DAG.getConstant(9, MVT::i8));
+
+  SDValue RetVal =
+    DAG.getNode(ISD::AND, DL, MVT::i16,
+                DAG.getNode(ISD::ADD, DL, MVT::i16,
+                            DAG.getNode(ISD::OR, DL, MVT::i16, CWD1, CWD2),
+                            DAG.getConstant(1, MVT::i16)),
+                DAG.getConstant(3, MVT::i16));
+
+
+  return DAG.getNode((VT.getSizeInBits() < 16 ?
+                      ISD::TRUNCATE : ISD::ZERO_EXTEND), DL, VT, RetVal);
+}
+
+SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  EVT OpVT = VT;
+  unsigned NumBits = VT.getSizeInBits();
+  DebugLoc dl = Op.getDebugLoc();
+
+  Op = Op.getOperand(0);
+  if (VT == MVT::i8) {
+    // Zero extend to i32 since there is not an i8 bsr.
+    OpVT = MVT::i32;
+    Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
+  }
+
+  // Issue a bsr (scan bits in reverse) which also sets EFLAGS.
+  SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+  Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op);
+
+  // If src is zero (i.e. bsr sets ZF), returns NumBits.
+  SDValue Ops[] = {
+    Op,
+    DAG.getConstant(NumBits+NumBits-1, OpVT),
+    DAG.getConstant(X86::COND_E, MVT::i8),
+    Op.getValue(1)
+  };
+  Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops));
+
+  // Finally xor with NumBits-1.
+  Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT));
+
+  if (VT == MVT::i8)
+    Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+  return Op;
+}
+
+SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  EVT OpVT = VT;
+  unsigned NumBits = VT.getSizeInBits();
+  DebugLoc dl = Op.getDebugLoc();
+
+  Op = Op.getOperand(0);
+  if (VT == MVT::i8) {
+    OpVT = MVT::i32;
+    Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op);
+  }
+
+  // Issue a bsf (scan bits forward) which also sets EFLAGS.
+  SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+  Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op);
+
+  // If src is zero (i.e. bsf sets ZF), returns NumBits.
+  SDValue Ops[] = {
+    Op,
+    DAG.getConstant(NumBits, OpVT),
+    DAG.getConstant(X86::COND_E, MVT::i8),
+    Op.getValue(1)
+  };
+  Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops));
+
+  if (VT == MVT::i8)
+    Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op);
+  return Op;
+}
+
+SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
+  DebugLoc dl = Op.getDebugLoc();
+
+  //  ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32);
+  //  ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32);
+  //  ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b );
+  //  ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi );
+  //  ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b );
+  //
+  //  AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 );
+  //  AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
+  //  return AloBlo + AloBhi + AhiBlo;
+
+  SDValue A = Op.getOperand(0);
+  SDValue B = Op.getOperand(1);
+
+  SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+                       A, DAG.getConstant(32, MVT::i32));
+  SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+                       B, DAG.getConstant(32, MVT::i32));
+  SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+                       A, B);
+  SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+                       A, Bhi);
+  SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
+                       Ahi, B);
+  AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+                       AloBhi, DAG.getConstant(32, MVT::i32));
+  AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                       DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+                       AhiBlo, DAG.getConstant(32, MVT::i32));
+  SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
+  Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
+  return Res;
+}
+
+SDValue X86TargetLowering::LowerSHL(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue R = Op.getOperand(0);
+
+  LLVMContext *Context = DAG.getContext();
+
+  assert(Subtarget->hasSSE41() && "Cannot lower SHL without SSE4.1 or later");
+
+  if (VT == MVT::v4i32) {
+    Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+                     Op.getOperand(1), DAG.getConstant(23, MVT::i32));
+
+    ConstantInt *CI = ConstantInt::get(*Context, APInt(32, 0x3f800000U));
+
+    std::vector<Constant*> CV(4, CI);
+    Constant *C = ConstantVector::get(CV);
+    SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+    SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                                 MachinePointerInfo::getConstantPool(),
+                                 false, false, 16);
+
+    Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend);
+    Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op);
+    Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op);
+    return DAG.getNode(ISD::MUL, dl, VT, Op, R);
+  }
+  if (VT == MVT::v16i8) {
+    // a = a << 5;
+    Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                     DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+                     Op.getOperand(1), DAG.getConstant(5, MVT::i32));
+
+    ConstantInt *CM1 = ConstantInt::get(*Context, APInt(8, 15));
+    ConstantInt *CM2 = ConstantInt::get(*Context, APInt(8, 63));
+
+    std::vector<Constant*> CVM1(16, CM1);
+    std::vector<Constant*> CVM2(16, CM2);
+    Constant *C = ConstantVector::get(CVM1);
+    SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+    SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                            MachinePointerInfo::getConstantPool(),
+                            false, false, 16);
+
+    // r = pblendv(r, psllw(r & (char16)15, 4), a);
+    M = DAG.getNode(ISD::AND, dl, VT, R, M);
+    M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                    DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
+                    DAG.getConstant(4, MVT::i32));
+    R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
+    // a += a
+    Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+
+    C = ConstantVector::get(CVM2);
+    CPIdx = DAG.getConstantPool(C, getPointerTy(), 16);
+    M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
+                    MachinePointerInfo::getConstantPool(),
+                    false, false, 16);
+
+    // r = pblendv(r, psllw(r & (char16)63, 2), a);
+    M = DAG.getNode(ISD::AND, dl, VT, R, M);
+    M = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                    DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), M,
+                    DAG.getConstant(2, MVT::i32));
+    R = DAG.getNode(X86ISD::PBLENDVB, dl, VT, R, M, Op);
+    // a += a
+    Op = DAG.getNode(ISD::ADD, dl, VT, Op, Op);
+
+    // return pblendv(r, r+r, a);
+    R = DAG.getNode(X86ISD::PBLENDVB, dl, VT,
+                    R, DAG.getNode(ISD::ADD, dl, VT, R, R), Op);
+    return R;
+  }
+  return SDValue();
+}
+
+SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
+  // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
+  // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
+  // looks for this combo and may remove the "setcc" instruction if the "setcc"
+  // has only one use.
+  SDNode *N = Op.getNode();
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  unsigned BaseOp = 0;
+  unsigned Cond = 0;
+  DebugLoc DL = Op.getDebugLoc();
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Unknown ovf instruction!");
+  case ISD::SADDO:
+    // A subtract of one will be selected as a INC. Note that INC doesn't
+    // set CF, so we can't do this for UADDO.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+      if (C->getAPIntValue() == 1) {
+        BaseOp = X86ISD::INC;
+        Cond = X86::COND_O;
+        break;
+      }
+    BaseOp = X86ISD::ADD;
+    Cond = X86::COND_O;
+    break;
+  case ISD::UADDO:
+    BaseOp = X86ISD::ADD;
+    Cond = X86::COND_B;
+    break;
+  case ISD::SSUBO:
+    // A subtract of one will be selected as a DEC. Note that DEC doesn't
+    // set CF, so we can't do this for USUBO.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
+      if (C->getAPIntValue() == 1) {
+        BaseOp = X86ISD::DEC;
+        Cond = X86::COND_O;
+        break;
+      }
+    BaseOp = X86ISD::SUB;
+    Cond = X86::COND_O;
+    break;
+  case ISD::USUBO:
+    BaseOp = X86ISD::SUB;
+    Cond = X86::COND_B;
+    break;
+  case ISD::SMULO:
+    BaseOp = X86ISD::SMUL;
+    Cond = X86::COND_O;
+    break;
+  case ISD::UMULO: { // i64, i8 = umulo lhs, rhs --> i64, i64, i32 umul lhs,rhs
+    SDVTList VTs = DAG.getVTList(N->getValueType(0), N->getValueType(0),
+                                 MVT::i32);
+    SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
+
+    SDValue SetCC =
+      DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+                  DAG.getConstant(X86::COND_O, MVT::i32),
+                  SDValue(Sum.getNode(), 2));
+
+    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
+    return Sum;
+  }
+  }
+
+  // Also sets EFLAGS.
+  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
+  SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
+
+  SDValue SetCC =
+    DAG.getNode(X86ISD::SETCC, DL, N->getValueType(1),
+                DAG.getConstant(Cond, MVT::i32),
+                SDValue(Sum.getNode(), 1));
+
+  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC);
+  return Sum;
+}
+
+SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (!Subtarget->hasSSE2()) {
+    SDValue Chain = Op.getOperand(0);
+    SDValue Zero = DAG.getConstant(0,
+                                   Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+    SDValue Ops[] = {
+      DAG.getRegister(X86::ESP, MVT::i32), // Base
+      DAG.getTargetConstant(1, MVT::i8),   // Scale
+      DAG.getRegister(0, MVT::i32),        // Index
+      DAG.getTargetConstant(0, MVT::i32),  // Disp
+      DAG.getRegister(0, MVT::i32),        // Segment.
+      Zero,
+      Chain
+    };
+    SDNode *Res =
+      DAG.getMachineNode(X86::OR32mrLocked, dl, MVT::Other, Ops,
+                          array_lengthof(Ops));
+    return SDValue(Res, 0);
+  }
+
+  unsigned isDev = cast<ConstantSDNode>(Op.getOperand(5))->getZExtValue();
+  if (!isDev)
+    return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+
+  unsigned Op1 = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  unsigned Op2 = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+  unsigned Op3 = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
+  unsigned Op4 = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+
+  // def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+  if (!Op1 && !Op2 && !Op3 && Op4)
+    return DAG.getNode(X86ISD::SFENCE, dl, MVT::Other, Op.getOperand(0));
+
+  // def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+  if (Op1 && !Op2 && !Op3 && !Op4)
+    return DAG.getNode(X86ISD::LFENCE, dl, MVT::Other, Op.getOperand(0));
+
+  // def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), (i8 1)),
+  //           (MFENCE)>;
+  return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
+}
+
+SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const {
+  EVT T = Op.getValueType();
+  DebugLoc DL = Op.getDebugLoc();
+  unsigned Reg = 0;
+  unsigned size = 0;
+  switch(T.getSimpleVT().SimpleTy) {
+  default:
+    assert(false && "Invalid value type!");
+  case MVT::i8:  Reg = X86::AL;  size = 1; break;
+  case MVT::i16: Reg = X86::AX;  size = 2; break;
+  case MVT::i32: Reg = X86::EAX; size = 4; break;
+  case MVT::i64:
+    assert(Subtarget->is64Bit() && "Node not type legal!");
+    Reg = X86::RAX; size = 8;
+    break;
+  }
+  SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg,
+                                    Op.getOperand(2), SDValue());
+  SDValue Ops[] = { cpIn.getValue(0),
+                    Op.getOperand(1),
+                    Op.getOperand(3),
+                    DAG.getTargetConstant(size, MVT::i8),
+                    cpIn.getValue(1) };
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+  MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand();
+  SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys,
+                                           Ops, 5, T, MMO);
+  SDValue cpOut =
+    DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
+  return cpOut;
+}
+
+SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  assert(Subtarget->is64Bit() && "Result not type legalized?");
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue TheChain = Op.getOperand(0);
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
+  SDValue rax = DAG.getCopyFromReg(rd, dl, X86::RAX, MVT::i64, rd.getValue(1));
+  SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), dl, X86::RDX, MVT::i64,
+                                   rax.getValue(2));
+  SDValue Tmp = DAG.getNode(ISD::SHL, dl, MVT::i64, rdx,
+                            DAG.getConstant(32, MVT::i8));
+  SDValue Ops[] = {
+    DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp),
+    rdx.getValue(1)
+  };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue X86TargetLowering::LowerBITCAST(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  EVT SrcVT = Op.getOperand(0).getValueType();
+  EVT DstVT = Op.getValueType();
+  assert(Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
+         Subtarget->hasMMX() && "Unexpected custom BITCAST");
+  assert((DstVT == MVT::i64 ||
+          (DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
+         "Unexpected custom BITCAST");
+  // i64 <=> MMX conversions are Legal.
+  if (SrcVT==MVT::i64 && DstVT.isVector())
+    return Op;
+  if (DstVT==MVT::i64 && SrcVT.isVector())
+    return Op;
+  // MMX <=> MMX conversions are Legal.
+  if (SrcVT.isVector() && DstVT.isVector())
+    return Op;
+  // All other conversions need to be expanded.
+  return SDValue();
+}
+
+SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
+  SDNode *Node = Op.getNode();
+  DebugLoc dl = Node->getDebugLoc();
+  EVT T = Node->getValueType(0);
+  SDValue negOp = DAG.getNode(ISD::SUB, dl, T,
+                              DAG.getConstant(0, T), Node->getOperand(2));
+  return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl,
+                       cast<AtomicSDNode>(Node)->getMemoryVT(),
+                       Node->getOperand(0),
+                       Node->getOperand(1), negOp,
+                       cast<AtomicSDNode>(Node)->getSrcValue(),
+                       cast<AtomicSDNode>(Node)->getAlignment());
+}
+
+static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
+  EVT VT = Op.getNode()->getValueType(0);
+
+  // Let legalize expand this if it isn't a legal type yet.
+  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+    return SDValue();
+
+  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+  unsigned Opc;
+  bool ExtraOp = false;
+  switch (Op.getOpcode()) {
+  default: assert(0 && "Invalid code");
+  case ISD::ADDC: Opc = X86ISD::ADD; break;
+  case ISD::ADDE: Opc = X86ISD::ADC; ExtraOp = true; break;
+  case ISD::SUBC: Opc = X86ISD::SUB; break;
+  case ISD::SUBE: Opc = X86ISD::SBB; ExtraOp = true; break;
+  }
+
+  if (!ExtraOp)
+    return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+                       Op.getOperand(1));
+  return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
+                     Op.getOperand(1), Op.getOperand(2));
+}
+
+/// LowerOperation - Provide custom lowering hooks for some operations.
+///
+SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default: llvm_unreachable("Should not custom lower this!");
+  case ISD::MEMBARRIER:         return LowerMEMBARRIER(Op,DAG);
+  case ISD::ATOMIC_CMP_SWAP:    return LowerCMP_SWAP(Op,DAG);
+  case ISD::ATOMIC_LOAD_SUB:    return LowerLOAD_SUB(Op,DAG);
+  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
+  case ISD::CONCAT_VECTORS:     return LowerCONCAT_VECTORS(Op, DAG);
+  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
+  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
+  case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
+  case ISD::EXTRACT_SUBVECTOR:  return LowerEXTRACT_SUBVECTOR(Op, DAG);
+  case ISD::INSERT_SUBVECTOR:   return LowerINSERT_SUBVECTOR(Op, DAG);
+  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
+  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
+  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
+  case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::ExternalSymbol:     return LowerExternalSymbol(Op, DAG);
+  case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
+  case ISD::SHL_PARTS:
+  case ISD::SRA_PARTS:
+  case ISD::SRL_PARTS:          return LowerShift(Op, DAG);
+  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
+  case ISD::UINT_TO_FP:         return LowerUINT_TO_FP(Op, DAG);
+  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
+  case ISD::FP_TO_UINT:         return LowerFP_TO_UINT(Op, DAG);
+  case ISD::FABS:               return LowerFABS(Op, DAG);
+  case ISD::FNEG:               return LowerFNEG(Op, DAG);
+  case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
+  case ISD::SETCC:              return LowerSETCC(Op, DAG);
+  case ISD::VSETCC:             return LowerVSETCC(Op, DAG);
+  case ISD::SELECT:             return LowerSELECT(Op, DAG);
+  case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
+  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
+  case ISD::VASTART:            return LowerVASTART(Op, DAG);
+  case ISD::VAARG:              return LowerVAARG(Op, DAG);
+  case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
+  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
+  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
+  case ISD::FRAME_TO_ARGS_OFFSET:
+                                return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
+  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
+  case ISD::EH_RETURN:          return LowerEH_RETURN(Op, DAG);
+  case ISD::TRAMPOLINE:         return LowerTRAMPOLINE(Op, DAG);
+  case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
+  case ISD::CTLZ:               return LowerCTLZ(Op, DAG);
+  case ISD::CTTZ:               return LowerCTTZ(Op, DAG);
+  case ISD::MUL:                return LowerMUL_V2I64(Op, DAG);
+  case ISD::SHL:                return LowerSHL(Op, DAG);
+  case ISD::SADDO:
+  case ISD::UADDO:
+  case ISD::SSUBO:
+  case ISD::USUBO:
+  case ISD::SMULO:
+  case ISD::UMULO:              return LowerXALUO(Op, DAG);
+  case ISD::READCYCLECOUNTER:   return LowerREADCYCLECOUNTER(Op, DAG);
+  case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
+  case ISD::ADDC:
+  case ISD::ADDE:
+  case ISD::SUBC:
+  case ISD::SUBE:               return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
+  }
+}
+
+void X86TargetLowering::
+ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results,
+                        SelectionDAG &DAG, unsigned NewOp) const {
+  EVT T = Node->getValueType(0);
+  DebugLoc dl = Node->getDebugLoc();
+  assert (T == MVT::i64 && "Only know how to expand i64 atomics");
+
+  SDValue Chain = Node->getOperand(0);
+  SDValue In1 = Node->getOperand(1);
+  SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                             Node->getOperand(2), DAG.getIntPtrConstant(0));
+  SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                             Node->getOperand(2), DAG.getIntPtrConstant(1));
+  SDValue Ops[] = { Chain, In1, In2L, In2H };
+  SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
+  SDValue Result =
+    DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64,
+                            cast<MemSDNode>(Node)->getMemOperand());
+  SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)};
+  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+  Results.push_back(Result.getValue(2));
+}
+
+/// ReplaceNodeResults - Replace a node with an illegal result type
+/// with a new node built out of custom code.
+void X86TargetLowering::ReplaceNodeResults(SDNode *N,
+                                           SmallVectorImpl<SDValue>&Results,
+                                           SelectionDAG &DAG) const {
+  DebugLoc dl = N->getDebugLoc();
+  switch (N->getOpcode()) {
+  default:
+    assert(false && "Do not know how to custom type legalize this operation!");
+    return;
+  case ISD::ADDC:
+  case ISD::ADDE:
+  case ISD::SUBC:
+  case ISD::SUBE:
+    // We don't want to expand or promote these.
+    return;
+  case ISD::FP_TO_SINT: {
+    std::pair<SDValue,SDValue> Vals =
+        FP_TO_INTHelper(SDValue(N, 0), DAG, true);
+    SDValue FIST = Vals.first, StackSlot = Vals.second;
+    if (FIST.getNode() != 0) {
+      EVT VT = N->getValueType(0);
+      // Return a load from the stack slot.
+      Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot,
+                                    MachinePointerInfo(), false, false, 0));
+    }
+    return;
+  }
+  case ISD::READCYCLECOUNTER: {
+    SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+    SDValue TheChain = N->getOperand(0);
+    SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1);
+    SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32,
+                                     rd.getValue(1));
+    SDValue edx = DAG.getCopyFromReg(eax.getValue(1), dl, X86::EDX, MVT::i32,
+                                     eax.getValue(2));
+    // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+    SDValue Ops[] = { eax, edx };
+    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, 2));
+    Results.push_back(edx.getValue(1));
+    return;
+  }
+  case ISD::ATOMIC_CMP_SWAP: {
+    EVT T = N->getValueType(0);
+    assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap");
+    SDValue cpInL, cpInH;
+    cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2),
+                        DAG.getConstant(0, MVT::i32));
+    cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2),
+                        DAG.getConstant(1, MVT::i32));
+    cpInL = DAG.getCopyToReg(N->getOperand(0), dl, X86::EAX, cpInL, SDValue());
+    cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, X86::EDX, cpInH,
+                             cpInL.getValue(1));
+    SDValue swapInL, swapInH;
+    swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3),
+                          DAG.getConstant(0, MVT::i32));
+    swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3),
+                          DAG.getConstant(1, MVT::i32));
+    swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl, X86::EBX, swapInL,
+                               cpInH.getValue(1));
+    swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, X86::ECX, swapInH,
+                               swapInL.getValue(1));
+    SDValue Ops[] = { swapInH.getValue(0),
+                      N->getOperand(1),
+                      swapInH.getValue(1) };
+    SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+    MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
+    SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys,
+                                             Ops, 3, T, MMO);
+    SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX,
+                                        MVT::i32, Result.getValue(1));
+    SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX,
+                                        MVT::i32, cpOutL.getValue(2));
+    SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)};
+    Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
+    Results.push_back(cpOutH.getValue(1));
+    return;
+  }
+  case ISD::ATOMIC_LOAD_ADD:
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG);
+    return;
+  case ISD::ATOMIC_LOAD_AND:
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG);
+    return;
+  case ISD::ATOMIC_LOAD_NAND:
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG);
+    return;
+  case ISD::ATOMIC_LOAD_OR:
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG);
+    return;
+  case ISD::ATOMIC_LOAD_SUB:
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG);
+    return;
+  case ISD::ATOMIC_LOAD_XOR:
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG);
+    return;
+  case ISD::ATOMIC_SWAP:
+    ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG);
+    return;
+  }
+}
+
+const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
+  switch (Opcode) {
+  default: return NULL;
+  case X86ISD::BSF:                return "X86ISD::BSF";
+  case X86ISD::BSR:                return "X86ISD::BSR";
+  case X86ISD::SHLD:               return "X86ISD::SHLD";
+  case X86ISD::SHRD:               return "X86ISD::SHRD";
+  case X86ISD::FAND:               return "X86ISD::FAND";
+  case X86ISD::FOR:                return "X86ISD::FOR";
+  case X86ISD::FXOR:               return "X86ISD::FXOR";
+  case X86ISD::FSRL:               return "X86ISD::FSRL";
+  case X86ISD::FILD:               return "X86ISD::FILD";
+  case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
+  case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
+  case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
+  case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
+  case X86ISD::FLD:                return "X86ISD::FLD";
+  case X86ISD::FST:                return "X86ISD::FST";
+  case X86ISD::CALL:               return "X86ISD::CALL";
+  case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
+  case X86ISD::BT:                 return "X86ISD::BT";
+  case X86ISD::CMP:                return "X86ISD::CMP";
+  case X86ISD::COMI:               return "X86ISD::COMI";
+  case X86ISD::UCOMI:              return "X86ISD::UCOMI";
+  case X86ISD::SETCC:              return "X86ISD::SETCC";
+  case X86ISD::SETCC_CARRY:        return "X86ISD::SETCC_CARRY";
+  case X86ISD::CMOV:               return "X86ISD::CMOV";
+  case X86ISD::BRCOND:             return "X86ISD::BRCOND";
+  case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
+  case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
+  case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
+  case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
+  case X86ISD::Wrapper:            return "X86ISD::Wrapper";
+  case X86ISD::WrapperRIP:         return "X86ISD::WrapperRIP";
+  case X86ISD::PEXTRB:             return "X86ISD::PEXTRB";
+  case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
+  case X86ISD::INSERTPS:           return "X86ISD::INSERTPS";
+  case X86ISD::PINSRB:             return "X86ISD::PINSRB";
+  case X86ISD::PINSRW:             return "X86ISD::PINSRW";
+  case X86ISD::PSHUFB:             return "X86ISD::PSHUFB";
+  case X86ISD::PANDN:              return "X86ISD::PANDN";
+  case X86ISD::PSIGNB:             return "X86ISD::PSIGNB";
+  case X86ISD::PSIGNW:             return "X86ISD::PSIGNW";
+  case X86ISD::PSIGND:             return "X86ISD::PSIGND";
+  case X86ISD::PBLENDVB:           return "X86ISD::PBLENDVB";
+  case X86ISD::FMAX:               return "X86ISD::FMAX";
+  case X86ISD::FMIN:               return "X86ISD::FMIN";
+  case X86ISD::FRSQRT:             return "X86ISD::FRSQRT";
+  case X86ISD::FRCP:               return "X86ISD::FRCP";
+  case X86ISD::TLSADDR:            return "X86ISD::TLSADDR";
+  case X86ISD::TLSCALL:            return "X86ISD::TLSCALL";
+  case X86ISD::EH_RETURN:          return "X86ISD::EH_RETURN";
+  case X86ISD::TC_RETURN:          return "X86ISD::TC_RETURN";
+  case X86ISD::FNSTCW16m:          return "X86ISD::FNSTCW16m";
+  case X86ISD::LCMPXCHG_DAG:       return "X86ISD::LCMPXCHG_DAG";
+  case X86ISD::LCMPXCHG8_DAG:      return "X86ISD::LCMPXCHG8_DAG";
+  case X86ISD::ATOMADD64_DAG:      return "X86ISD::ATOMADD64_DAG";
+  case X86ISD::ATOMSUB64_DAG:      return "X86ISD::ATOMSUB64_DAG";
+  case X86ISD::ATOMOR64_DAG:       return "X86ISD::ATOMOR64_DAG";
+  case X86ISD::ATOMXOR64_DAG:      return "X86ISD::ATOMXOR64_DAG";
+  case X86ISD::ATOMAND64_DAG:      return "X86ISD::ATOMAND64_DAG";
+  case X86ISD::ATOMNAND64_DAG:     return "X86ISD::ATOMNAND64_DAG";
+  case X86ISD::VZEXT_MOVL:         return "X86ISD::VZEXT_MOVL";
+  case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD";
+  case X86ISD::VSHL:               return "X86ISD::VSHL";
+  case X86ISD::VSRL:               return "X86ISD::VSRL";
+  case X86ISD::CMPPD:              return "X86ISD::CMPPD";
+  case X86ISD::CMPPS:              return "X86ISD::CMPPS";
+  case X86ISD::PCMPEQB:            return "X86ISD::PCMPEQB";
+  case X86ISD::PCMPEQW:            return "X86ISD::PCMPEQW";
+  case X86ISD::PCMPEQD:            return "X86ISD::PCMPEQD";
+  case X86ISD::PCMPEQQ:            return "X86ISD::PCMPEQQ";
+  case X86ISD::PCMPGTB:            return "X86ISD::PCMPGTB";
+  case X86ISD::PCMPGTW:            return "X86ISD::PCMPGTW";
+  case X86ISD::PCMPGTD:            return "X86ISD::PCMPGTD";
+  case X86ISD::PCMPGTQ:            return "X86ISD::PCMPGTQ";
+  case X86ISD::ADD:                return "X86ISD::ADD";
+  case X86ISD::SUB:                return "X86ISD::SUB";
+  case X86ISD::ADC:                return "X86ISD::ADC";
+  case X86ISD::SBB:                return "X86ISD::SBB";
+  case X86ISD::SMUL:               return "X86ISD::SMUL";
+  case X86ISD::UMUL:               return "X86ISD::UMUL";
+  case X86ISD::INC:                return "X86ISD::INC";
+  case X86ISD::DEC:                return "X86ISD::DEC";
+  case X86ISD::OR:                 return "X86ISD::OR";
+  case X86ISD::XOR:                return "X86ISD::XOR";
+  case X86ISD::AND:                return "X86ISD::AND";
+  case X86ISD::MUL_IMM:            return "X86ISD::MUL_IMM";
+  case X86ISD::PTEST:              return "X86ISD::PTEST";
+  case X86ISD::TESTP:              return "X86ISD::TESTP";
+  case X86ISD::PALIGN:             return "X86ISD::PALIGN";
+  case X86ISD::PSHUFD:             return "X86ISD::PSHUFD";
+  case X86ISD::PSHUFHW:            return "X86ISD::PSHUFHW";
+  case X86ISD::PSHUFHW_LD:         return "X86ISD::PSHUFHW_LD";
+  case X86ISD::PSHUFLW:            return "X86ISD::PSHUFLW";
+  case X86ISD::PSHUFLW_LD:         return "X86ISD::PSHUFLW_LD";
+  case X86ISD::SHUFPS:             return "X86ISD::SHUFPS";
+  case X86ISD::SHUFPD:             return "X86ISD::SHUFPD";
+  case X86ISD::MOVLHPS:            return "X86ISD::MOVLHPS";
+  case X86ISD::MOVLHPD:            return "X86ISD::MOVLHPD";
+  case X86ISD::MOVHLPS:            return "X86ISD::MOVHLPS";
+  case X86ISD::MOVHLPD:            return "X86ISD::MOVHLPD";
+  case X86ISD::MOVLPS:             return "X86ISD::MOVLPS";
+  case X86ISD::MOVLPD:             return "X86ISD::MOVLPD";
+  case X86ISD::MOVDDUP:            return "X86ISD::MOVDDUP";
+  case X86ISD::MOVSHDUP:           return "X86ISD::MOVSHDUP";
+  case X86ISD::MOVSLDUP:           return "X86ISD::MOVSLDUP";
+  case X86ISD::MOVSHDUP_LD:        return "X86ISD::MOVSHDUP_LD";
+  case X86ISD::MOVSLDUP_LD:        return "X86ISD::MOVSLDUP_LD";
+  case X86ISD::MOVSD:              return "X86ISD::MOVSD";
+  case X86ISD::MOVSS:              return "X86ISD::MOVSS";
+  case X86ISD::UNPCKLPS:           return "X86ISD::UNPCKLPS";
+  case X86ISD::UNPCKLPD:           return "X86ISD::UNPCKLPD";
+  case X86ISD::VUNPCKLPS:          return "X86ISD::VUNPCKLPS";
+  case X86ISD::VUNPCKLPD:          return "X86ISD::VUNPCKLPD";
+  case X86ISD::VUNPCKLPSY:         return "X86ISD::VUNPCKLPSY";
+  case X86ISD::VUNPCKLPDY:         return "X86ISD::VUNPCKLPDY";
+  case X86ISD::UNPCKHPS:           return "X86ISD::UNPCKHPS";
+  case X86ISD::UNPCKHPD:           return "X86ISD::UNPCKHPD";
+  case X86ISD::PUNPCKLBW:          return "X86ISD::PUNPCKLBW";
+  case X86ISD::PUNPCKLWD:          return "X86ISD::PUNPCKLWD";
+  case X86ISD::PUNPCKLDQ:          return "X86ISD::PUNPCKLDQ";
+  case X86ISD::PUNPCKLQDQ:         return "X86ISD::PUNPCKLQDQ";
+  case X86ISD::PUNPCKHBW:          return "X86ISD::PUNPCKHBW";
+  case X86ISD::PUNPCKHWD:          return "X86ISD::PUNPCKHWD";
+  case X86ISD::PUNPCKHDQ:          return "X86ISD::PUNPCKHDQ";
+  case X86ISD::PUNPCKHQDQ:         return "X86ISD::PUNPCKHQDQ";
+  case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS";
+  case X86ISD::VAARG_64:           return "X86ISD::VAARG_64";
+  case X86ISD::WIN_ALLOCA:         return "X86ISD::WIN_ALLOCA";
+  }
+}
+
+// isLegalAddressingMode - Return true if the addressing mode represented
+// by AM is legal for this target, for a load/store of the specified type.
+bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                              const Type *Ty) const {
+  // X86 supports extremely general addressing modes.
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+  Reloc::Model R = getTargetMachine().getRelocationModel();
+
+  // X86 allows a sign-extended 32-bit immediate field as a displacement.
+  if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL))
+    return false;
+
+  if (AM.BaseGV) {
+    unsigned GVFlags =
+      Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine());
+
+    // If a reference to this global requires an extra load, we can't fold it.
+    if (isGlobalStubReference(GVFlags))
+      return false;
+
+    // If BaseGV requires a register for the PIC base, we cannot also have a
+    // BaseReg specified.
+    if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags))
+      return false;
+
+    // If lower 4G is not available, then we must use rip-relative addressing.
+    if ((M != CodeModel::Small || R != Reloc::Static) &&
+        Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1))
+      return false;
+  }
+
+  switch (AM.Scale) {
+  case 0:
+  case 1:
+  case 2:
+  case 4:
+  case 8:
+    // These scales always work.
+    break;
+  case 3:
+  case 5:
+  case 9:
+    // These scales are formed with basereg+scalereg.  Only accept if there is
+    // no basereg yet.
+    if (AM.HasBaseReg)
+      return false;
+    break;
+  default:  // Other stuff never works.
+    return false;
+  }
+
+  return true;
+}
+
+
+bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
+  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
+    return false;
+  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
+  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
+  if (NumBits1 <= NumBits2)
+    return false;
+  return true;
+}
+
+bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
+  if (!VT1.isInteger() || !VT2.isInteger())
+    return false;
+  unsigned NumBits1 = VT1.getSizeInBits();
+  unsigned NumBits2 = VT2.getSizeInBits();
+  if (NumBits1 <= NumBits2)
+    return false;
+  return true;
+}
+
+bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
+  // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
+  return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit();
+}
+
+bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
+  // x86-64 implicitly zero-extends 32-bit results in 64-bit registers.
+  return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
+}
+
+bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
+  // i16 instructions are longer (0x66 prefix) and potentially slower.
+  return !(VT1 == MVT::i32 && VT2 == MVT::i16);
+}
+
+/// isShuffleMaskLegal - Targets can use this to indicate that they only
+/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
+/// are assumed to be legal.
+bool
+X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
+                                      EVT VT) const {
+  // Very little shuffling can be done for 64-bit vectors right now.
+  if (VT.getSizeInBits() == 64)
+    return isPALIGNRMask(M, VT, Subtarget->hasSSSE3());
+
+  // FIXME: pshufb, blends, shifts.
+  return (VT.getVectorNumElements() == 2 ||
+          ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+          isMOVLMask(M, VT) ||
+          isSHUFPMask(M, VT) ||
+          isPSHUFDMask(M, VT) ||
+          isPSHUFHWMask(M, VT) ||
+          isPSHUFLWMask(M, VT) ||
+          isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) ||
+          isUNPCKLMask(M, VT) ||
+          isUNPCKHMask(M, VT) ||
+          isUNPCKL_v_undef_Mask(M, VT) ||
+          isUNPCKH_v_undef_Mask(M, VT));
+}
+
+bool
+X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
+                                          EVT VT) const {
+  unsigned NumElts = VT.getVectorNumElements();
+  // FIXME: This collection of masks seems suspect.
+  if (NumElts == 2)
+    return true;
+  if (NumElts == 4 && VT.getSizeInBits() == 128) {
+    return (isMOVLMask(Mask, VT)  ||
+            isCommutedMOVLMask(Mask, VT, true) ||
+            isSHUFPMask(Mask, VT) ||
+            isCommutedSHUFPMask(Mask, VT));
+  }
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+//                           X86 Scheduler Hooks
+//===----------------------------------------------------------------------===//
+
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
+                                                       MachineBasicBlock *MBB,
+                                                       unsigned regOpc,
+                                                       unsigned immOpc,
+                                                       unsigned LoadOpc,
+                                                       unsigned CXchgOpc,
+                                                       unsigned notOpc,
+                                                       unsigned EAXreg,
+                                                       TargetRegisterClass *RC,
+                                                       bool invSrc) const {
+  // For the atomic bitwise operator, we generate
+  //   thisMBB:
+  //   newMBB:
+  //     ld  t1 = [bitinstr.addr]
+  //     op  t2 = t1, [bitinstr.val]
+  //     mov EAX = t1
+  //     lcs dest = [bitinstr.addr], t2  [EAX is implicit]
+  //     bz  newMBB
+  //     fallthrough -->nextMBB
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction::iterator MBBIter = MBB;
+  ++MBBIter;
+
+  /// First build the CFG
+  MachineFunction *F = MBB->getParent();
+  MachineBasicBlock *thisMBB = MBB;
+  MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(MBBIter, newMBB);
+  F->insert(MBBIter, nextMBB);
+
+  // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+  nextMBB->splice(nextMBB->begin(), thisMBB,
+                  llvm::next(MachineBasicBlock::iterator(bInstr)),
+                  thisMBB->end());
+  nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+  // Update thisMBB to fall through to newMBB
+  thisMBB->addSuccessor(newMBB);
+
+  // newMBB jumps to itself and fall through to nextMBB
+  newMBB->addSuccessor(nextMBB);
+  newMBB->addSuccessor(newMBB);
+
+  // Insert instructions into newMBB based on incoming instruction
+  assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
+         "unexpected number of operands");
+  DebugLoc dl = bInstr->getDebugLoc();
+  MachineOperand& destOper = bInstr->getOperand(0);
+  MachineOperand* argOpers[2 + X86::AddrNumOperands];
+  int numArgs = bInstr->getNumOperands() - 1;
+  for (int i=0; i < numArgs; ++i)
+    argOpers[i] = &bInstr->getOperand(i+1);
+
+  // x86 address has 4 operands: base, index, scale, and displacement
+  int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
+  int valArgIndx = lastAddrIndx + 1;
+
+  unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
+  MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1);
+  for (int i=0; i <= lastAddrIndx; ++i)
+    (*MIB).addOperand(*argOpers[i]);
+
+  unsigned tt = F->getRegInfo().createVirtualRegister(RC);
+  if (invSrc) {
+    MIB = BuildMI(newMBB, dl, TII->get(notOpc), tt).addReg(t1);
+  }
+  else
+    tt = t1;
+
+  unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
+  assert((argOpers[valArgIndx]->isReg() ||
+          argOpers[valArgIndx]->isImm()) &&
+         "invalid operand");
+  if (argOpers[valArgIndx]->isReg())
+    MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2);
+  else
+    MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2);
+  MIB.addReg(tt);
+  (*MIB).addOperand(*argOpers[valArgIndx]);
+
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
+  MIB.addReg(t1);
+
+  MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
+  for (int i=0; i <= lastAddrIndx; ++i)
+    (*MIB).addOperand(*argOpers[i]);
+  MIB.addReg(t2);
+  assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+  (*MIB).setMemRefs(bInstr->memoperands_begin(),
+                    bInstr->memoperands_end());
+
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
+  MIB.addReg(EAXreg);
+
+  // insert branch
+  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+
+  bInstr->eraseFromParent();   // The pseudo instruction is gone now.
+  return nextMBB;
+}
+
+// private utility function:  64 bit atomics on 32 bit host.
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
+                                                       MachineBasicBlock *MBB,
+                                                       unsigned regOpcL,
+                                                       unsigned regOpcH,
+                                                       unsigned immOpcL,
+                                                       unsigned immOpcH,
+                                                       bool invSrc) const {
+  // For the atomic bitwise operator, we generate
+  //   thisMBB (instructions are in pairs, except cmpxchg8b)
+  //     ld t1,t2 = [bitinstr.addr]
+  //   newMBB:
+  //     out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4)
+  //     op  t5, t6 <- out1, out2, [bitinstr.val]
+  //      (for SWAP, substitute:  mov t5, t6 <- [bitinstr.val])
+  //     mov ECX, EBX <- t5, t6
+  //     mov EAX, EDX <- t1, t2
+  //     cmpxchg8b [bitinstr.addr]  [EAX, EDX, EBX, ECX implicit]
+  //     mov t3, t4 <- EAX, EDX
+  //     bz  newMBB
+  //     result in out1, out2
+  //     fallthrough -->nextMBB
+
+  const TargetRegisterClass *RC = X86::GR32RegisterClass;
+  const unsigned LoadOpc = X86::MOV32rm;
+  const unsigned NotOpc = X86::NOT32r;
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction::iterator MBBIter = MBB;
+  ++MBBIter;
+
+  /// First build the CFG
+  MachineFunction *F = MBB->getParent();
+  MachineBasicBlock *thisMBB = MBB;
+  MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(MBBIter, newMBB);
+  F->insert(MBBIter, nextMBB);
+
+  // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+  nextMBB->splice(nextMBB->begin(), thisMBB,
+                  llvm::next(MachineBasicBlock::iterator(bInstr)),
+                  thisMBB->end());
+  nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+  // Update thisMBB to fall through to newMBB
+  thisMBB->addSuccessor(newMBB);
+
+  // newMBB jumps to itself and fall through to nextMBB
+  newMBB->addSuccessor(nextMBB);
+  newMBB->addSuccessor(newMBB);
+
+  DebugLoc dl = bInstr->getDebugLoc();
+  // Insert instructions into newMBB based on incoming instruction
+  // There are 8 "real" operands plus 9 implicit def/uses, ignored here.
+  assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 &&
+         "unexpected number of operands");
+  MachineOperand& dest1Oper = bInstr->getOperand(0);
+  MachineOperand& dest2Oper = bInstr->getOperand(1);
+  MachineOperand* argOpers[2 + X86::AddrNumOperands];
+  for (int i=0; i < 2 + X86::AddrNumOperands; ++i) {
+    argOpers[i] = &bInstr->getOperand(i+2);
+
+    // We use some of the operands multiple times, so conservatively just
+    // clear any kill flags that might be present.
+    if (argOpers[i]->isReg() && argOpers[i]->isUse())
+      argOpers[i]->setIsKill(false);
+  }
+
+  // x86 address has 5 operands: base, index, scale, displacement, and segment.
+  int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
+
+  unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
+  MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
+  for (int i=0; i <= lastAddrIndx; ++i)
+    (*MIB).addOperand(*argOpers[i]);
+  unsigned t2 = F->getRegInfo().createVirtualRegister(RC);
+  MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2);
+  // add 4 to displacement.
+  for (int i=0; i <= lastAddrIndx-2; ++i)
+    (*MIB).addOperand(*argOpers[i]);
+  MachineOperand newOp3 = *(argOpers[3]);
+  if (newOp3.isImm())
+    newOp3.setImm(newOp3.getImm()+4);
+  else
+    newOp3.setOffset(newOp3.getOffset()+4);
+  (*MIB).addOperand(newOp3);
+  (*MIB).addOperand(*argOpers[lastAddrIndx]);
+
+  // t3/4 are defined later, at the bottom of the loop
+  unsigned t3 = F->getRegInfo().createVirtualRegister(RC);
+  unsigned t4 = F->getRegInfo().createVirtualRegister(RC);
+  BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg())
+    .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB);
+  BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg())
+    .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB);
+
+  // The subsequent operations should be using the destination registers of
+  //the PHI instructions.
+  if (invSrc) {
+    t1 = F->getRegInfo().createVirtualRegister(RC);
+    t2 = F->getRegInfo().createVirtualRegister(RC);
+    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg());
+    MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg());
+  } else {
+    t1 = dest1Oper.getReg();
+    t2 = dest2Oper.getReg();
+  }
+
+  int valArgIndx = lastAddrIndx + 1;
+  assert((argOpers[valArgIndx]->isReg() ||
+          argOpers[valArgIndx]->isImm()) &&
+         "invalid operand");
+  unsigned t5 = F->getRegInfo().createVirtualRegister(RC);
+  unsigned t6 = F->getRegInfo().createVirtualRegister(RC);
+  if (argOpers[valArgIndx]->isReg())
+    MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5);
+  else
+    MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5);
+  if (regOpcL != X86::MOV32rr)
+    MIB.addReg(t1);
+  (*MIB).addOperand(*argOpers[valArgIndx]);
+  assert(argOpers[valArgIndx + 1]->isReg() ==
+         argOpers[valArgIndx]->isReg());
+  assert(argOpers[valArgIndx + 1]->isImm() ==
+         argOpers[valArgIndx]->isImm());
+  if (argOpers[valArgIndx + 1]->isReg())
+    MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6);
+  else
+    MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6);
+  if (regOpcH != X86::MOV32rr)
+    MIB.addReg(t2);
+  (*MIB).addOperand(*argOpers[valArgIndx + 1]);
+
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
+  MIB.addReg(t1);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
+  MIB.addReg(t2);
+
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
+  MIB.addReg(t5);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
+  MIB.addReg(t6);
+
+  MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
+  for (int i=0; i <= lastAddrIndx; ++i)
+    (*MIB).addOperand(*argOpers[i]);
+
+  assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+  (*MIB).setMemRefs(bInstr->memoperands_begin(),
+                    bInstr->memoperands_end());
+
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
+  MIB.addReg(X86::EAX);
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
+  MIB.addReg(X86::EDX);
+
+  // insert branch
+  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+
+  bInstr->eraseFromParent();   // The pseudo instruction is gone now.
+  return nextMBB;
+}
+
+// private utility function
+MachineBasicBlock *
+X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
+                                                      MachineBasicBlock *MBB,
+                                                      unsigned cmovOpc) const {
+  // For the atomic min/max operator, we generate
+  //   thisMBB:
+  //   newMBB:
+  //     ld t1 = [min/max.addr]
+  //     mov t2 = [min/max.val]
+  //     cmp  t1, t2
+  //     cmov[cond] t2 = t1
+  //     mov EAX = t1
+  //     lcs dest = [bitinstr.addr], t2  [EAX is implicit]
+  //     bz   newMBB
+  //     fallthrough -->nextMBB
+  //
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction::iterator MBBIter = MBB;
+  ++MBBIter;
+
+  /// First build the CFG
+  MachineFunction *F = MBB->getParent();
+  MachineBasicBlock *thisMBB = MBB;
+  MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(MBBIter, newMBB);
+  F->insert(MBBIter, nextMBB);
+
+  // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+  nextMBB->splice(nextMBB->begin(), thisMBB,
+                  llvm::next(MachineBasicBlock::iterator(mInstr)),
+                  thisMBB->end());
+  nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+  // Update thisMBB to fall through to newMBB
+  thisMBB->addSuccessor(newMBB);
+
+  // newMBB jumps to newMBB and fall through to nextMBB
+  newMBB->addSuccessor(nextMBB);
+  newMBB->addSuccessor(newMBB);
+
+  DebugLoc dl = mInstr->getDebugLoc();
+  // Insert instructions into newMBB based on incoming instruction
+  assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
+         "unexpected number of operands");
+  MachineOperand& destOper = mInstr->getOperand(0);
+  MachineOperand* argOpers[2 + X86::AddrNumOperands];
+  int numArgs = mInstr->getNumOperands() - 1;
+  for (int i=0; i < numArgs; ++i)
+    argOpers[i] = &mInstr->getOperand(i+1);
+
+  // x86 address has 4 operands: base, index, scale, and displacement
+  int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
+  int valArgIndx = lastAddrIndx + 1;
+
+  unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+  MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1);
+  for (int i=0; i <= lastAddrIndx; ++i)
+    (*MIB).addOperand(*argOpers[i]);
+
+  // We only support register and immediate values
+  assert((argOpers[valArgIndx]->isReg() ||
+          argOpers[valArgIndx]->isImm()) &&
+         "invalid operand");
+
+  unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+  if (argOpers[valArgIndx]->isReg())
+    MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
+  else
+    MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
+  (*MIB).addOperand(*argOpers[valArgIndx]);
+
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
+  MIB.addReg(t1);
+
+  MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
+  MIB.addReg(t1);
+  MIB.addReg(t2);
+
+  // Generate movc
+  unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
+  MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3);
+  MIB.addReg(t2);
+  MIB.addReg(t1);
+
+  // Cmp and exchange if none has modified the memory location
+  MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32));
+  for (int i=0; i <= lastAddrIndx; ++i)
+    (*MIB).addOperand(*argOpers[i]);
+  MIB.addReg(t3);
+  assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand");
+  (*MIB).setMemRefs(mInstr->memoperands_begin(),
+                    mInstr->memoperands_end());
+
+  MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
+  MIB.addReg(X86::EAX);
+
+  // insert branch
+  BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
+
+  mInstr->eraseFromParent();   // The pseudo instruction is gone now.
+  return nextMBB;
+}
+
+// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8
+// or XMM0_V32I8 in AVX all of this code can be replaced with that
+// in the .td file.
+MachineBasicBlock *
+X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
+                            unsigned numArgs, bool memArg) const {
+  assert((Subtarget->hasSSE42() || Subtarget->hasAVX()) &&
+         "Target must have SSE4.2 or AVX features enabled");
+
+  DebugLoc dl = MI->getDebugLoc();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  unsigned Opc;
+  if (!Subtarget->hasAVX()) {
+    if (memArg)
+      Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm;
+    else
+      Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr;
+  } else {
+    if (memArg)
+      Opc = numArgs == 3 ? X86::VPCMPISTRM128rm : X86::VPCMPESTRM128rm;
+    else
+      Opc = numArgs == 3 ? X86::VPCMPISTRM128rr : X86::VPCMPESTRM128rr;
+  }
+
+  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(Opc));
+  for (unsigned i = 0; i < numArgs; ++i) {
+    MachineOperand &Op = MI->getOperand(i+1);
+    if (!(Op.isReg() && Op.isImplicit()))
+      MIB.addOperand(Op);
+  }
+  BuildMI(*BB, MI, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
+    .addReg(X86::XMM0);
+
+  MI->eraseFromParent();
+  return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitMonitor(MachineInstr *MI, MachineBasicBlock *BB) const {
+  DebugLoc dl = MI->getDebugLoc();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  // Address into RAX/EAX, other two args into ECX, EDX.
+  unsigned MemOpc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
+  unsigned MemReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+  MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(MemOpc), MemReg);
+  for (int i = 0; i < X86::AddrNumOperands; ++i)
+    MIB.addOperand(MI->getOperand(i));
+
+  unsigned ValOps = X86::AddrNumOperands;
+  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+    .addReg(MI->getOperand(ValOps).getReg());
+  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EDX)
+    .addReg(MI->getOperand(ValOps+1).getReg());
+
+  // The instruction doesn't actually take any operands though.
+  BuildMI(*BB, MI, dl, TII->get(X86::MONITORrrr));
+
+  MI->eraseFromParent(); // The pseudo is gone now.
+  return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const {
+  DebugLoc dl = MI->getDebugLoc();
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  // First arg in ECX, the second in EAX.
+  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::ECX)
+    .addReg(MI->getOperand(0).getReg());
+  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), X86::EAX)
+    .addReg(MI->getOperand(1).getReg());
+
+  // The instruction doesn't actually take any operands though.
+  BuildMI(*BB, MI, dl, TII->get(X86::MWAITrr));
+
+  MI->eraseFromParent(); // The pseudo is gone now.
+  return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitVAARG64WithCustomInserter(
+                   MachineInstr *MI,
+                   MachineBasicBlock *MBB) const {
+  // Emit va_arg instruction on X86-64.
+
+  // Operands to this pseudo-instruction:
+  // 0  ) Output        : destination address (reg)
+  // 1-5) Input         : va_list address (addr, i64mem)
+  // 6  ) ArgSize       : Size (in bytes) of vararg type
+  // 7  ) ArgMode       : 0=overflow only, 1=use gp_offset, 2=use fp_offset
+  // 8  ) Align         : Alignment of type
+  // 9  ) EFLAGS (implicit-def)
+
+  assert(MI->getNumOperands() == 10 && "VAARG_64 should have 10 operands!");
+  assert(X86::AddrNumOperands == 5 && "VAARG_64 assumes 5 address operands");
+
+  unsigned DestReg = MI->getOperand(0).getReg();
+  MachineOperand &Base = MI->getOperand(1);
+  MachineOperand &Scale = MI->getOperand(2);
+  MachineOperand &Index = MI->getOperand(3);
+  MachineOperand &Disp = MI->getOperand(4);
+  MachineOperand &Segment = MI->getOperand(5);
+  unsigned ArgSize = MI->getOperand(6).getImm();
+  unsigned ArgMode = MI->getOperand(7).getImm();
+  unsigned Align = MI->getOperand(8).getImm();
+
+  // Memory Reference
+  assert(MI->hasOneMemOperand() && "Expected VAARG_64 to have one memoperand");
+  MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
+  MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end();
+
+  // Machine Information
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+  const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
+  const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
+  DebugLoc DL = MI->getDebugLoc();
+
+  // struct va_list {
+  //   i32   gp_offset
+  //   i32   fp_offset
+  //   i64   overflow_area (address)
+  //   i64   reg_save_area (address)
+  // }
+  // sizeof(va_list) = 24
+  // alignment(va_list) = 8
+
+  unsigned TotalNumIntRegs = 6;
+  unsigned TotalNumXMMRegs = 8;
+  bool UseGPOffset = (ArgMode == 1);
+  bool UseFPOffset = (ArgMode == 2);
+  unsigned MaxOffset = TotalNumIntRegs * 8 +
+                       (UseFPOffset ? TotalNumXMMRegs * 16 : 0);
+
+  /* Align ArgSize to a multiple of 8 */
+  unsigned ArgSizeA8 = (ArgSize + 7) & ~7;
+  bool NeedsAlign = (Align > 8);
+
+  MachineBasicBlock *thisMBB = MBB;
+  MachineBasicBlock *overflowMBB;
+  MachineBasicBlock *offsetMBB;
+  MachineBasicBlock *endMBB;
+
+  unsigned OffsetDestReg = 0;    // Argument address computed by offsetMBB
+  unsigned OverflowDestReg = 0;  // Argument address computed by overflowMBB
+  unsigned OffsetReg = 0;
+
+  if (!UseGPOffset && !UseFPOffset) {
+    // If we only pull from the overflow region, we don't create a branch.
+    // We don't need to alter control flow.
+    OffsetDestReg = 0; // unused
+    OverflowDestReg = DestReg;
+
+    offsetMBB = NULL;
+    overflowMBB = thisMBB;
+    endMBB = thisMBB;
+  } else {
+    // First emit code to check if gp_offset (or fp_offset) is below the bound.
+    // If so, pull the argument from reg_save_area. (branch to offsetMBB)
+    // If not, pull from overflow_area. (branch to overflowMBB)
+    //
+    //       thisMBB
+    //         |     .
+    //         |        .
+    //     offsetMBB   overflowMBB
+    //         |        .
+    //         |     .
+    //        endMBB
+
+    // Registers for the PHI in endMBB
+    OffsetDestReg = MRI.createVirtualRegister(AddrRegClass);
+    OverflowDestReg = MRI.createVirtualRegister(AddrRegClass);
+
+    const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+    MachineFunction *MF = MBB->getParent();
+    overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+    offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+    endMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+    MachineFunction::iterator MBBIter = MBB;
+    ++MBBIter;
+
+    // Insert the new basic blocks
+    MF->insert(MBBIter, offsetMBB);
+    MF->insert(MBBIter, overflowMBB);
+    MF->insert(MBBIter, endMBB);
+
+    // Transfer the remainder of MBB and its successor edges to endMBB.
+    endMBB->splice(endMBB->begin(), thisMBB,
+                    llvm::next(MachineBasicBlock::iterator(MI)),
+                    thisMBB->end());
+    endMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
+
+    // Make offsetMBB and overflowMBB successors of thisMBB
+    thisMBB->addSuccessor(offsetMBB);
+    thisMBB->addSuccessor(overflowMBB);
+
+    // endMBB is a successor of both offsetMBB and overflowMBB
+    offsetMBB->addSuccessor(endMBB);
+    overflowMBB->addSuccessor(endMBB);
+
+    // Load the offset value into a register
+    OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+    BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
+      .addOperand(Base)
+      .addOperand(Scale)
+      .addOperand(Index)
+      .addDisp(Disp, UseFPOffset ? 4 : 0)
+      .addOperand(Segment)
+      .setMemRefs(MMOBegin, MMOEnd);
+
+    // Check if there is enough room left to pull this argument.
+    BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
+      .addReg(OffsetReg)
+      .addImm(MaxOffset + 8 - ArgSizeA8);
+
+    // Branch to "overflowMBB" if offset >= max
+    // Fall through to "offsetMBB" otherwise
+    BuildMI(thisMBB, DL, TII->get(X86::GetCondBranchFromCond(X86::COND_AE)))
+      .addMBB(overflowMBB);
+  }
+
+  // In offsetMBB, emit code to use the reg_save_area.
+  if (offsetMBB) {
+    assert(OffsetReg != 0);
+
+    // Read the reg_save_area address.
+    unsigned RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
+    BuildMI(offsetMBB, DL, TII->get(X86::MOV64rm), RegSaveReg)
+      .addOperand(Base)
+      .addOperand(Scale)
+      .addOperand(Index)
+      .addDisp(Disp, 16)
+      .addOperand(Segment)
+      .setMemRefs(MMOBegin, MMOEnd);
+
+    // Zero-extend the offset
+    unsigned OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
+      BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
+        .addImm(0)
+        .addReg(OffsetReg)
+        .addImm(X86::sub_32bit);
+
+    // Add the offset to the reg_save_area to get the final address.
+    BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
+      .addReg(OffsetReg64)
+      .addReg(RegSaveReg);
+
+    // Compute the offset for the next argument
+    unsigned NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
+    BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
+      .addReg(OffsetReg)
+      .addImm(UseFPOffset ? 16 : 8);
+
+    // Store it back into the va_list.
+    BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
+      .addOperand(Base)
+      .addOperand(Scale)
+      .addOperand(Index)
+      .addDisp(Disp, UseFPOffset ? 4 : 0)
+      .addOperand(Segment)
+      .addReg(NextOffsetReg)
+      .setMemRefs(MMOBegin, MMOEnd);
+
+    // Jump to endMBB
+    BuildMI(offsetMBB, DL, TII->get(X86::JMP_4))
+      .addMBB(endMBB);
+  }
+
+  //
+  // Emit code to use overflow area
+  //
+
+  // Load the overflow_area address into a register.
+  unsigned OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
+  BuildMI(overflowMBB, DL, TII->get(X86::MOV64rm), OverflowAddrReg)
+    .addOperand(Base)
+    .addOperand(Scale)
+    .addOperand(Index)
+    .addDisp(Disp, 8)
+    .addOperand(Segment)
+    .setMemRefs(MMOBegin, MMOEnd);
+
+  // If we need to align it, do so. Otherwise, just copy the address
+  // to OverflowDestReg.
+  if (NeedsAlign) {
+    // Align the overflow address
+    assert((Align & (Align-1)) == 0 && "Alignment must be a power of 2");
+    unsigned TmpReg = MRI.createVirtualRegister(AddrRegClass);
+
+    // aligned_addr = (addr + (align-1)) & ~(align-1)
+    BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), TmpReg)
+      .addReg(OverflowAddrReg)
+      .addImm(Align-1);
+
+    BuildMI(overflowMBB, DL, TII->get(X86::AND64ri32), OverflowDestReg)
+      .addReg(TmpReg)
+      .addImm(~(uint64_t)(Align-1));
+  } else {
+    BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
+      .addReg(OverflowAddrReg);
+  }
+
+  // Compute the next overflow address after this argument.
+  // (the overflow address should be kept 8-byte aligned)
+  unsigned NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
+  BuildMI(overflowMBB, DL, TII->get(X86::ADD64ri32), NextAddrReg)
+    .addReg(OverflowDestReg)
+    .addImm(ArgSizeA8);
+
+  // Store the new overflow address.
+  BuildMI(overflowMBB, DL, TII->get(X86::MOV64mr))
+    .addOperand(Base)
+    .addOperand(Scale)
+    .addOperand(Index)
+    .addDisp(Disp, 8)
+    .addOperand(Segment)
+    .addReg(NextAddrReg)
+    .setMemRefs(MMOBegin, MMOEnd);
+
+  // If we branched, emit the PHI to the front of endMBB.
+  if (offsetMBB) {
+    BuildMI(*endMBB, endMBB->begin(), DL,
+            TII->get(X86::PHI), DestReg)
+      .addReg(OffsetDestReg).addMBB(offsetMBB)
+      .addReg(OverflowDestReg).addMBB(overflowMBB);
+  }
+
+  // Erase the pseudo instruction
+  MI->eraseFromParent();
+
+  return endMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
+                                                 MachineInstr *MI,
+                                                 MachineBasicBlock *MBB) const {
+  // Emit code to save XMM registers to the stack. The ABI says that the
+  // number of registers to save is given in %al, so it's theoretically
+  // possible to do an indirect jump trick to avoid saving all of them,
+  // however this code takes a simpler approach and just executes all
+  // of the stores if %al is non-zero. It's less code, and it's probably
+  // easier on the hardware branch predictor, and stores aren't all that
+  // expensive anyway.
+
+  // Create the new basic blocks. One block contains all the XMM stores,
+  // and one block is the final destination regardless of whether any
+  // stores were performed.
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineFunction *F = MBB->getParent();
+  MachineFunction::iterator MBBIter = MBB;
+  ++MBBIter;
+  MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(MBBIter, XMMSaveMBB);
+  F->insert(MBBIter, EndMBB);
+
+  // Transfer the remainder of MBB and its successor edges to EndMBB.
+  EndMBB->splice(EndMBB->begin(), MBB,
+                 llvm::next(MachineBasicBlock::iterator(MI)),
+                 MBB->end());
+  EndMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  // The original block will now fall through to the XMM save block.
+  MBB->addSuccessor(XMMSaveMBB);
+  // The XMMSaveMBB will fall through to the end block.
+  XMMSaveMBB->addSuccessor(EndMBB);
+
+  // Now add the instructions.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+
+  unsigned CountReg = MI->getOperand(0).getReg();
+  int64_t RegSaveFrameIndex = MI->getOperand(1).getImm();
+  int64_t VarArgsFPOffset = MI->getOperand(2).getImm();
+
+  if (!Subtarget->isTargetWin64()) {
+    // If %al is 0, branch around the XMM save block.
+    BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg);
+    BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB);
+    MBB->addSuccessor(EndMBB);
+  }
+
+  // In the XMM save block, save all the XMM argument registers.
+  for (int i = 3, e = MI->getNumOperands(); i != e; ++i) {
+    int64_t Offset = (i - 3) * 16 + VarArgsFPOffset;
+    MachineMemOperand *MMO =
+      F->getMachineMemOperand(
+          MachinePointerInfo::getFixedStack(RegSaveFrameIndex, Offset),
+        MachineMemOperand::MOStore,
+        /*Size=*/16, /*Align=*/16);
+    BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr))
+      .addFrameIndex(RegSaveFrameIndex)
+      .addImm(/*Scale=*/1)
+      .addReg(/*IndexReg=*/0)
+      .addImm(/*Disp=*/Offset)
+      .addReg(/*Segment=*/0)
+      .addReg(MI->getOperand(i).getReg())
+      .addMemOperand(MMO);
+  }
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+
+  return EndMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
+                                     MachineBasicBlock *BB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+
+  // To "insert" a SELECT_CC instruction, we actually have to insert the
+  // diamond control-flow pattern.  The incoming instruction knows the
+  // destination vreg to set, the condition code register to branch on, the
+  // true/false values to select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   cmpTY ccX, r1, r2
+  //   bCC copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, copy0MBB);
+  F->insert(It, sinkMBB);
+
+  // If the EFLAGS register isn't dead in the terminator, then claim that it's
+  // live into the sink and copy blocks.
+  const MachineFunction *MF = BB->getParent();
+  const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+  BitVector ReservedRegs = TRI->getReservedRegs(*MF);
+
+  for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+    const MachineOperand &MO = MI->getOperand(I);
+    if (!MO.isReg() || !MO.isUse() || MO.isKill()) continue;
+    unsigned Reg = MO.getReg();
+    if (Reg != X86::EFLAGS) continue;
+    copy0MBB->addLiveIn(Reg);
+    sinkMBB->addLiveIn(Reg);
+  }
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(sinkMBB);
+
+  // Create the conditional branch instruction.
+  unsigned Opc =
+    X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+  BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to sinkMBB
+  copy0MBB->addSuccessor(sinkMBB);
+
+  //  sinkMBB:
+  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+  //  ...
+  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+          TII->get(X86::PHI), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
+    .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return sinkMBB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
+                                          MachineBasicBlock *BB) const {
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc DL = MI->getDebugLoc();
+
+  // The lowering is pretty easy: we're just emitting the call to _alloca.  The
+  // non-trivial part is impdef of ESP.
+  // FIXME: The code should be tweaked as soon as we'll try to do codegen for
+  // mingw-w64.
+
+  const char *StackProbeSymbol =
+      Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
+
+  BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
+    .addExternalSymbol(StackProbeSymbol)
+    .addReg(X86::EAX, RegState::Implicit)
+    .addReg(X86::ESP, RegState::Implicit)
+    .addReg(X86::EAX, RegState::Define | RegState::Implicit)
+    .addReg(X86::ESP, RegState::Define | RegState::Implicit)
+    .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
+                                      MachineBasicBlock *BB) const {
+  // This is pretty easy.  We're taking the value that we received from
+  // our load from the relocation, sticking it in either RDI (x86-64)
+  // or EAX and doing an indirect call.  The return value will then
+  // be in the normal return register.
+  const X86InstrInfo *TII
+    = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
+  DebugLoc DL = MI->getDebugLoc();
+  MachineFunction *F = BB->getParent();
+
+  assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?");
+  assert(MI->getOperand(3).isGlobal() && "This should be a global");
+
+  if (Subtarget->is64Bit()) {
+    MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+                                      TII->get(X86::MOV64rm), X86::RDI)
+    .addReg(X86::RIP)
+    .addImm(0).addReg(0)
+    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+                      MI->getOperand(3).getTargetFlags())
+    .addReg(0);
+    MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
+    addDirectMem(MIB, X86::RDI);
+  } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+    MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+                                      TII->get(X86::MOV32rm), X86::EAX)
+    .addReg(0)
+    .addImm(0).addReg(0)
+    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+                      MI->getOperand(3).getTargetFlags())
+    .addReg(0);
+    MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+    addDirectMem(MIB, X86::EAX);
+  } else {
+    MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+                                      TII->get(X86::MOV32rm), X86::EAX)
+    .addReg(TII->getGlobalBaseReg(F))
+    .addImm(0).addReg(0)
+    .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+                      MI->getOperand(3).getTargetFlags())
+    .addReg(0);
+    MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+    addDirectMem(MIB, X86::EAX);
+  }
+
+  MI->eraseFromParent(); // The pseudo instruction is gone now.
+  return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                               MachineBasicBlock *BB) const {
+  switch (MI->getOpcode()) {
+  default: assert(false && "Unexpected instr type to insert");
+  case X86::TAILJMPd64:
+  case X86::TAILJMPr64:
+  case X86::TAILJMPm64:
+    assert(!"TAILJMP64 would not be touched here.");
+  case X86::TCRETURNdi64:
+  case X86::TCRETURNri64:
+  case X86::TCRETURNmi64:
+    // Defs of TCRETURNxx64 has Win64's callee-saved registers, as subset.
+    // On AMD64, additional defs should be added before register allocation.
+    if (!Subtarget->isTargetWin64()) {
+      MI->addRegisterDefined(X86::RSI);
+      MI->addRegisterDefined(X86::RDI);
+      MI->addRegisterDefined(X86::XMM6);
+      MI->addRegisterDefined(X86::XMM7);
+      MI->addRegisterDefined(X86::XMM8);
+      MI->addRegisterDefined(X86::XMM9);
+      MI->addRegisterDefined(X86::XMM10);
+      MI->addRegisterDefined(X86::XMM11);
+      MI->addRegisterDefined(X86::XMM12);
+      MI->addRegisterDefined(X86::XMM13);
+      MI->addRegisterDefined(X86::XMM14);
+      MI->addRegisterDefined(X86::XMM15);
+    }
+    return BB;
+  case X86::WIN_ALLOCA:
+    return EmitLoweredWinAlloca(MI, BB);
+  case X86::TLSCall_32:
+  case X86::TLSCall_64:
+    return EmitLoweredTLSCall(MI, BB);
+  case X86::CMOV_GR8:
+  case X86::CMOV_FR32:
+  case X86::CMOV_FR64:
+  case X86::CMOV_V4F32:
+  case X86::CMOV_V2F64:
+  case X86::CMOV_V2I64:
+  case X86::CMOV_GR16:
+  case X86::CMOV_GR32:
+  case X86::CMOV_RFP32:
+  case X86::CMOV_RFP64:
+  case X86::CMOV_RFP80:
+    return EmitLoweredSelect(MI, BB);
+
+  case X86::FP32_TO_INT16_IN_MEM:
+  case X86::FP32_TO_INT32_IN_MEM:
+  case X86::FP32_TO_INT64_IN_MEM:
+  case X86::FP64_TO_INT16_IN_MEM:
+  case X86::FP64_TO_INT32_IN_MEM:
+  case X86::FP64_TO_INT64_IN_MEM:
+  case X86::FP80_TO_INT16_IN_MEM:
+  case X86::FP80_TO_INT32_IN_MEM:
+  case X86::FP80_TO_INT64_IN_MEM: {
+    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+    DebugLoc DL = MI->getDebugLoc();
+
+    // Change the floating point control register to use "round towards zero"
+    // mode when truncating to an integer value.
+    MachineFunction *F = BB->getParent();
+    int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false);
+    addFrameReference(BuildMI(*BB, MI, DL,
+                              TII->get(X86::FNSTCW16m)), CWFrameIdx);
+
+    // Load the old value of the high byte of the control word...
+    unsigned OldCW =
+      F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
+    addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
+                      CWFrameIdx);
+
+    // Set the high part to be round to zero...
+    addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
+      .addImm(0xC7F);
+
+    // Reload the modified control word now...
+    addFrameReference(BuildMI(*BB, MI, DL,
+                              TII->get(X86::FLDCW16m)), CWFrameIdx);
+
+    // Restore the memory image of control word to original value
+    addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
+      .addReg(OldCW);
+
+    // Get the X86 opcode to use.
+    unsigned Opc;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("illegal opcode!");
+    case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break;
+    case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break;
+    case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break;
+    case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break;
+    case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break;
+    case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break;
+    case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break;
+    case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break;
+    case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break;
+    }
+
+    X86AddressMode AM;
+    MachineOperand &Op = MI->getOperand(0);
+    if (Op.isReg()) {
+      AM.BaseType = X86AddressMode::RegBase;
+      AM.Base.Reg = Op.getReg();
+    } else {
+      AM.BaseType = X86AddressMode::FrameIndexBase;
+      AM.Base.FrameIndex = Op.getIndex();
+    }
+    Op = MI->getOperand(1);
+    if (Op.isImm())
+      AM.Scale = Op.getImm();
+    Op = MI->getOperand(2);
+    if (Op.isImm())
+      AM.IndexReg = Op.getImm();
+    Op = MI->getOperand(3);
+    if (Op.isGlobal()) {
+      AM.GV = Op.getGlobal();
+    } else {
+      AM.Disp = Op.getImm();
+    }
+    addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM)
+                      .addReg(MI->getOperand(X86::AddrNumOperands).getReg());
+
+    // Reload the original control word now.
+    addFrameReference(BuildMI(*BB, MI, DL,
+                              TII->get(X86::FLDCW16m)), CWFrameIdx);
+
+    MI->eraseFromParent();   // The pseudo instruction is gone now.
+    return BB;
+  }
+    // String/text processing lowering.
+  case X86::PCMPISTRM128REG:
+  case X86::VPCMPISTRM128REG:
+    return EmitPCMP(MI, BB, 3, false /* in-mem */);
+  case X86::PCMPISTRM128MEM:
+  case X86::VPCMPISTRM128MEM:
+    return EmitPCMP(MI, BB, 3, true /* in-mem */);
+  case X86::PCMPESTRM128REG:
+  case X86::VPCMPESTRM128REG:
+    return EmitPCMP(MI, BB, 5, false /* in mem */);
+  case X86::PCMPESTRM128MEM:
+  case X86::VPCMPESTRM128MEM:
+    return EmitPCMP(MI, BB, 5, true /* in mem */);
+
+    // Thread synchronization.
+  case X86::MONITOR:
+    return EmitMonitor(MI, BB);
+  case X86::MWAIT:
+    return EmitMwait(MI, BB);
+
+    // Atomic Lowering.
+  case X86::ATOMAND32:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
+                                               X86::AND32ri, X86::MOV32rm,
+                                               X86::LCMPXCHG32,
+                                               X86::NOT32r, X86::EAX,
+                                               X86::GR32RegisterClass);
+  case X86::ATOMOR32:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
+                                               X86::OR32ri, X86::MOV32rm,
+                                               X86::LCMPXCHG32,
+                                               X86::NOT32r, X86::EAX,
+                                               X86::GR32RegisterClass);
+  case X86::ATOMXOR32:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
+                                               X86::XOR32ri, X86::MOV32rm,
+                                               X86::LCMPXCHG32,
+                                               X86::NOT32r, X86::EAX,
+                                               X86::GR32RegisterClass);
+  case X86::ATOMNAND32:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
+                                               X86::AND32ri, X86::MOV32rm,
+                                               X86::LCMPXCHG32,
+                                               X86::NOT32r, X86::EAX,
+                                               X86::GR32RegisterClass, true);
+  case X86::ATOMMIN32:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr);
+  case X86::ATOMMAX32:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr);
+  case X86::ATOMUMIN32:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr);
+  case X86::ATOMUMAX32:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr);
+
+  case X86::ATOMAND16:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
+                                               X86::AND16ri, X86::MOV16rm,
+                                               X86::LCMPXCHG16,
+                                               X86::NOT16r, X86::AX,
+                                               X86::GR16RegisterClass);
+  case X86::ATOMOR16:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
+                                               X86::OR16ri, X86::MOV16rm,
+                                               X86::LCMPXCHG16,
+                                               X86::NOT16r, X86::AX,
+                                               X86::GR16RegisterClass);
+  case X86::ATOMXOR16:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
+                                               X86::XOR16ri, X86::MOV16rm,
+                                               X86::LCMPXCHG16,
+                                               X86::NOT16r, X86::AX,
+                                               X86::GR16RegisterClass);
+  case X86::ATOMNAND16:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
+                                               X86::AND16ri, X86::MOV16rm,
+                                               X86::LCMPXCHG16,
+                                               X86::NOT16r, X86::AX,
+                                               X86::GR16RegisterClass, true);
+  case X86::ATOMMIN16:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr);
+  case X86::ATOMMAX16:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr);
+  case X86::ATOMUMIN16:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr);
+  case X86::ATOMUMAX16:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr);
+
+  case X86::ATOMAND8:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
+                                               X86::AND8ri, X86::MOV8rm,
+                                               X86::LCMPXCHG8,
+                                               X86::NOT8r, X86::AL,
+                                               X86::GR8RegisterClass);
+  case X86::ATOMOR8:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
+                                               X86::OR8ri, X86::MOV8rm,
+                                               X86::LCMPXCHG8,
+                                               X86::NOT8r, X86::AL,
+                                               X86::GR8RegisterClass);
+  case X86::ATOMXOR8:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
+                                               X86::XOR8ri, X86::MOV8rm,
+                                               X86::LCMPXCHG8,
+                                               X86::NOT8r, X86::AL,
+                                               X86::GR8RegisterClass);
+  case X86::ATOMNAND8:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
+                                               X86::AND8ri, X86::MOV8rm,
+                                               X86::LCMPXCHG8,
+                                               X86::NOT8r, X86::AL,
+                                               X86::GR8RegisterClass, true);
+  // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
+  // This group is for 64-bit host.
+  case X86::ATOMAND64:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
+                                               X86::AND64ri32, X86::MOV64rm,
+                                               X86::LCMPXCHG64,
+                                               X86::NOT64r, X86::RAX,
+                                               X86::GR64RegisterClass);
+  case X86::ATOMOR64:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
+                                               X86::OR64ri32, X86::MOV64rm,
+                                               X86::LCMPXCHG64,
+                                               X86::NOT64r, X86::RAX,
+                                               X86::GR64RegisterClass);
+  case X86::ATOMXOR64:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
+                                               X86::XOR64ri32, X86::MOV64rm,
+                                               X86::LCMPXCHG64,
+                                               X86::NOT64r, X86::RAX,
+                                               X86::GR64RegisterClass);
+  case X86::ATOMNAND64:
+    return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
+                                               X86::AND64ri32, X86::MOV64rm,
+                                               X86::LCMPXCHG64,
+                                               X86::NOT64r, X86::RAX,
+                                               X86::GR64RegisterClass, true);
+  case X86::ATOMMIN64:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr);
+  case X86::ATOMMAX64:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr);
+  case X86::ATOMUMIN64:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr);
+  case X86::ATOMUMAX64:
+    return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr);
+
+  // This group does 64-bit operations on a 32-bit host.
+  case X86::ATOMAND6432:
+    return EmitAtomicBit6432WithCustomInserter(MI, BB,
+                                               X86::AND32rr, X86::AND32rr,
+                                               X86::AND32ri, X86::AND32ri,
+                                               false);
+  case X86::ATOMOR6432:
+    return EmitAtomicBit6432WithCustomInserter(MI, BB,
+                                               X86::OR32rr, X86::OR32rr,
+                                               X86::OR32ri, X86::OR32ri,
+                                               false);
+  case X86::ATOMXOR6432:
+    return EmitAtomicBit6432WithCustomInserter(MI, BB,
+                                               X86::XOR32rr, X86::XOR32rr,
+                                               X86::XOR32ri, X86::XOR32ri,
+                                               false);
+  case X86::ATOMNAND6432:
+    return EmitAtomicBit6432WithCustomInserter(MI, BB,
+                                               X86::AND32rr, X86::AND32rr,
+                                               X86::AND32ri, X86::AND32ri,
+                                               true);
+  case X86::ATOMADD6432:
+    return EmitAtomicBit6432WithCustomInserter(MI, BB,
+                                               X86::ADD32rr, X86::ADC32rr,
+                                               X86::ADD32ri, X86::ADC32ri,
+                                               false);
+  case X86::ATOMSUB6432:
+    return EmitAtomicBit6432WithCustomInserter(MI, BB,
+                                               X86::SUB32rr, X86::SBB32rr,
+                                               X86::SUB32ri, X86::SBB32ri,
+                                               false);
+  case X86::ATOMSWAP6432:
+    return EmitAtomicBit6432WithCustomInserter(MI, BB,
+                                               X86::MOV32rr, X86::MOV32rr,
+                                               X86::MOV32ri, X86::MOV32ri,
+                                               false);
+  case X86::VASTART_SAVE_XMM_REGS:
+    return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB);
+
+  case X86::VAARG_64:
+    return EmitVAARG64WithCustomInserter(MI, BB);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                           X86 Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+                                                       const APInt &Mask,
+                                                       APInt &KnownZero,
+                                                       APInt &KnownOne,
+                                                       const SelectionDAG &DAG,
+                                                       unsigned Depth) const {
+  unsigned Opc = Op.getOpcode();
+  assert((Opc >= ISD::BUILTIN_OP_END ||
+          Opc == ISD::INTRINSIC_WO_CHAIN ||
+          Opc == ISD::INTRINSIC_W_CHAIN ||
+          Opc == ISD::INTRINSIC_VOID) &&
+         "Should use MaskedValueIsZero if you don't know whether Op"
+         " is a target node!");
+
+  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);   // Don't know anything.
+  switch (Opc) {
+  default: break;
+  case X86ISD::ADD:
+  case X86ISD::SUB:
+  case X86ISD::ADC:
+  case X86ISD::SBB:
+  case X86ISD::SMUL:
+  case X86ISD::UMUL:
+  case X86ISD::INC:
+  case X86ISD::DEC:
+  case X86ISD::OR:
+  case X86ISD::XOR:
+  case X86ISD::AND:
+    // These nodes' second result is a boolean.
+    if (Op.getResNo() == 0)
+      break;
+    // Fallthrough
+  case X86ISD::SETCC:
+    KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(),
+                                       Mask.getBitWidth() - 1);
+    break;
+  }
+}
+
+unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                         unsigned Depth) const {
+  // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
+  if (Op.getOpcode() == X86ISD::SETCC_CARRY)
+    return Op.getValueType().getScalarType().getSizeInBits();
+
+  // Fallback case.
+  return 1;
+}
+
+/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
+/// node is a GlobalAddress + offset.
+bool X86TargetLowering::isGAPlusOffset(SDNode *N,
+                                       const GlobalValue* &GA,
+                                       int64_t &Offset) const {
+  if (N->getOpcode() == X86ISD::Wrapper) {
+    if (isa<GlobalAddressSDNode>(N->getOperand(0))) {
+      GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
+      Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset();
+      return true;
+    }
+  }
+  return TargetLowering::isGAPlusOffset(N, GA, Offset);
+}
+
+/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
+/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
+/// if the load addresses are consecutive, non-overlapping, and in the right
+/// order.
+static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
+                                     TargetLowering::DAGCombinerInfo &DCI) {
+  DebugLoc dl = N->getDebugLoc();
+  EVT VT = N->getValueType(0);
+
+  if (VT.getSizeInBits() != 128)
+    return SDValue();
+
+  // Don't create instructions with illegal types after legalize types has run.
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType()))
+    return SDValue();
+
+  SmallVector<SDValue, 16> Elts;
+  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
+    Elts.push_back(getShuffleScalarElt(N, i, DAG, 0));
+
+  return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
+}
+
+/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
+/// generation and convert it from being a bunch of shuffles and extracts
+/// to a simple store and scalar loads to extract the elements.
+static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
+                                                const TargetLowering &TLI) {
+  SDValue InputVector = N->getOperand(0);
+
+  // Only operate on vectors of 4 elements, where the alternative shuffling
+  // gets to be more expensive.
+  if (InputVector.getValueType() != MVT::v4i32)
+    return SDValue();
+
+  // Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a
+  // single use which is a sign-extend or zero-extend, and all elements are
+  // used.
+  SmallVector<SDNode *, 4> Uses;
+  unsigned ExtractedElements = 0;
+  for (SDNode::use_iterator UI = InputVector.getNode()->use_begin(),
+       UE = InputVector.getNode()->use_end(); UI != UE; ++UI) {
+    if (UI.getUse().getResNo() != InputVector.getResNo())
+      return SDValue();
+
+    SDNode *Extract = *UI;
+    if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+      return SDValue();
+
+    if (Extract->getValueType(0) != MVT::i32)
+      return SDValue();
+    if (!Extract->hasOneUse())
+      return SDValue();
+    if (Extract->use_begin()->getOpcode() != ISD::SIGN_EXTEND &&
+        Extract->use_begin()->getOpcode() != ISD::ZERO_EXTEND)
+      return SDValue();
+    if (!isa<ConstantSDNode>(Extract->getOperand(1)))
+      return SDValue();
+
+    // Record which element was extracted.
+    ExtractedElements |=
+      1 << cast<ConstantSDNode>(Extract->getOperand(1))->getZExtValue();
+
+    Uses.push_back(Extract);
+  }
+
+  // If not all the elements were used, this may not be worthwhile.
+  if (ExtractedElements != 15)
+    return SDValue();
+
+  // Ok, we've now decided to do the transformation.
+  DebugLoc dl = InputVector.getDebugLoc();
+
+  // Store the value to a temporary stack slot.
+  SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType());
+  SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr,
+                            MachinePointerInfo(), false, false, 0);
+
+  // Replace each use (extract) with a load of the appropriate element.
+  for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(),
+       UE = Uses.end(); UI != UE; ++UI) {
+    SDNode *Extract = *UI;
+
+    // Compute the element's address.
+    SDValue Idx = Extract->getOperand(1);
+    unsigned EltSize =
+        InputVector.getValueType().getVectorElementType().getSizeInBits()/8;
+    uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue();
+    SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy());
+
+    SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(),
+                                     StackPtr, OffsetVal);
+
+    // Load the scalar.
+    SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch,
+                                     ScalarAddr, MachinePointerInfo(),
+                                     false, false, 0);
+
+    // Replace the exact with the load.
+    DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar);
+  }
+
+  // The replacement was made in place; don't return anything.
+  return SDValue();
+}
+
+/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
+static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
+                                    const X86Subtarget *Subtarget) {
+  DebugLoc DL = N->getDebugLoc();
+  SDValue Cond = N->getOperand(0);
+  // Get the LHS/RHS of the select.
+  SDValue LHS = N->getOperand(1);
+  SDValue RHS = N->getOperand(2);
+
+  // If we have SSE[12] support, try to form min/max nodes. SSE min/max
+  // instructions match the semantics of the common C idiom x<y?x:y but not
+  // x<=y?x:y, because of how they handle negative zero (which can be
+  // ignored in unsafe-math mode).
+  if (Subtarget->hasSSE2() &&
+      (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
+      Cond.getOpcode() == ISD::SETCC) {
+    ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+
+    unsigned Opcode = 0;
+    // Check for x CC y ? x : y.
+    if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
+        DAG.isEqualTo(RHS, Cond.getOperand(1))) {
+      switch (CC) {
+      default: break;
+      case ISD::SETULT:
+        // Converting this to a min would handle NaNs incorrectly, and swapping
+        // the operands would cause it to handle comparisons between positive
+        // and negative zero incorrectly.
+        if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
+          if (!UnsafeFPMath &&
+              !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+            break;
+          std::swap(LHS, RHS);
+        }
+        Opcode = X86ISD::FMIN;
+        break;
+      case ISD::SETOLE:
+        // Converting this to a min would handle comparisons between positive
+        // and negative zero incorrectly.
+        if (!UnsafeFPMath &&
+            !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
+          break;
+        Opcode = X86ISD::FMIN;
+        break;
+      case ISD::SETULE:
+        // Converting this to a min would handle both negative zeros and NaNs
+        // incorrectly, but we can swap the operands to fix both.
+        std::swap(LHS, RHS);
+      case ISD::SETOLT:
+      case ISD::SETLT:
+      case ISD::SETLE:
+        Opcode = X86ISD::FMIN;
+        break;
+
+      case ISD::SETOGE:
+        // Converting this to a max would handle comparisons between positive
+        // and negative zero incorrectly.
+        if (!UnsafeFPMath &&
+            !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS))
+          break;
+        Opcode = X86ISD::FMAX;
+        break;
+      case ISD::SETUGT:
+        // Converting this to a max would handle NaNs incorrectly, and swapping
+        // the operands would cause it to handle comparisons between positive
+        // and negative zero incorrectly.
+        if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
+          if (!UnsafeFPMath &&
+              !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+            break;
+          std::swap(LHS, RHS);
+        }
+        Opcode = X86ISD::FMAX;
+        break;
+      case ISD::SETUGE:
+        // Converting this to a max would handle both negative zeros and NaNs
+        // incorrectly, but we can swap the operands to fix both.
+        std::swap(LHS, RHS);
+      case ISD::SETOGT:
+      case ISD::SETGT:
+      case ISD::SETGE:
+        Opcode = X86ISD::FMAX;
+        break;
+      }
+    // Check for x CC y ? y : x -- a min/max with reversed arms.
+    } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
+               DAG.isEqualTo(RHS, Cond.getOperand(0))) {
+      switch (CC) {
+      default: break;
+      case ISD::SETOGE:
+        // Converting this to a min would handle comparisons between positive
+        // and negative zero incorrectly, and swapping the operands would
+        // cause it to handle NaNs incorrectly.
+        if (!UnsafeFPMath &&
+            !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
+          if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
+            break;
+          std::swap(LHS, RHS);
+        }
+        Opcode = X86ISD::FMIN;
+        break;
+      case ISD::SETUGT:
+        // Converting this to a min would handle NaNs incorrectly.
+        if (!UnsafeFPMath &&
+            (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+          break;
+        Opcode = X86ISD::FMIN;
+        break;
+      case ISD::SETUGE:
+        // Converting this to a min would handle both negative zeros and NaNs
+        // incorrectly, but we can swap the operands to fix both.
+        std::swap(LHS, RHS);
+      case ISD::SETOGT:
+      case ISD::SETGT:
+      case ISD::SETGE:
+        Opcode = X86ISD::FMIN;
+        break;
+
+      case ISD::SETULT:
+        // Converting this to a max would handle NaNs incorrectly.
+        if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
+          break;
+        Opcode = X86ISD::FMAX;
+        break;
+      case ISD::SETOLE:
+        // Converting this to a max would handle comparisons between positive
+        // and negative zero incorrectly, and swapping the operands would
+        // cause it to handle NaNs incorrectly.
+        if (!UnsafeFPMath &&
+            !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
+          if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
+            break;
+          std::swap(LHS, RHS);
+        }
+        Opcode = X86ISD::FMAX;
+        break;
+      case ISD::SETULE:
+        // Converting this to a max would handle both negative zeros and NaNs
+        // incorrectly, but we can swap the operands to fix both.
+        std::swap(LHS, RHS);
+      case ISD::SETOLT:
+      case ISD::SETLT:
+      case ISD::SETLE:
+        Opcode = X86ISD::FMAX;
+        break;
+      }
+    }
+
+    if (Opcode)
+      return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
+  }
+
+  // If this is a select between two integer constants, try to do some
+  // optimizations.
+  if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
+    if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(RHS))
+      // Don't do this for crazy integer types.
+      if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) {
+        // If this is efficiently invertible, canonicalize the LHSC/RHSC values
+        // so that TrueC (the true value) is larger than FalseC.
+        bool NeedsCondInvert = false;
+
+        if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
+            // Efficiently invertible.
+            (Cond.getOpcode() == ISD::SETCC ||  // setcc -> invertible.
+             (Cond.getOpcode() == ISD::XOR &&   // xor(X, C) -> invertible.
+              isa<ConstantSDNode>(Cond.getOperand(1))))) {
+          NeedsCondInvert = true;
+          std::swap(TrueC, FalseC);
+        }
+
+        // Optimize C ? 8 : 0 -> zext(C) << 3.  Likewise for any pow2/0.
+        if (FalseC->getAPIntValue() == 0 &&
+            TrueC->getAPIntValue().isPowerOf2()) {
+          if (NeedsCondInvert) // Invert the condition if needed.
+            Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+                               DAG.getConstant(1, Cond.getValueType()));
+
+          // Zero extend the condition if needed.
+          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
+
+          unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+          return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
+                             DAG.getConstant(ShAmt, MVT::i8));
+        }
+
+        // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
+        if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+          if (NeedsCondInvert) // Invert the condition if needed.
+            Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+                               DAG.getConstant(1, Cond.getValueType()));
+
+          // Zero extend the condition if needed.
+          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                             FalseC->getValueType(0), Cond);
+          return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+                             SDValue(FalseC, 0));
+        }
+
+        // Optimize cases that will turn into an LEA instruction.  This requires
+        // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+        if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+          uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+          if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+
+          bool isFastMultiplier = false;
+          if (Diff < 10) {
+            switch ((unsigned char)Diff) {
+              default: break;
+              case 1:  // result = add base, cond
+              case 2:  // result = lea base(    , cond*2)
+              case 3:  // result = lea base(cond, cond*2)
+              case 4:  // result = lea base(    , cond*4)
+              case 5:  // result = lea base(cond, cond*4)
+              case 8:  // result = lea base(    , cond*8)
+              case 9:  // result = lea base(cond, cond*8)
+                isFastMultiplier = true;
+                break;
+            }
+          }
+
+          if (isFastMultiplier) {
+            APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+            if (NeedsCondInvert) // Invert the condition if needed.
+              Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+                                 DAG.getConstant(1, Cond.getValueType()));
+
+            // Zero extend the condition if needed.
+            Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+                               Cond);
+            // Scale the condition by the difference.
+            if (Diff != 1)
+              Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+                                 DAG.getConstant(Diff, Cond.getValueType()));
+
+            // Add the base if non-zero.
+            if (FalseC->getAPIntValue() != 0)
+              Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+                                 SDValue(FalseC, 0));
+            return Cond;
+          }
+        }
+      }
+  }
+
+  return SDValue();
+}
+
+/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
+static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI) {
+  DebugLoc DL = N->getDebugLoc();
+
+  // If the flag operand isn't dead, don't touch this CMOV.
+  if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty())
+    return SDValue();
+
+  // If this is a select between two integer constants, try to do some
+  // optimizations.  Note that the operands are ordered the opposite of SELECT
+  // operands.
+  if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+    if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
+      // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is
+      // larger than FalseC (the false value).
+      X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2);
+
+      if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) {
+        CC = X86::GetOppositeBranchCondition(CC);
+        std::swap(TrueC, FalseC);
+      }
+
+      // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3.  Likewise for any pow2/0.
+      // This is efficient for any integer data type (including i8/i16) and
+      // shift amount.
+      if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
+        SDValue Cond = N->getOperand(3);
+        Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+                           DAG.getConstant(CC, MVT::i8), Cond);
+
+        // Zero extend the condition if needed.
+        Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
+
+        unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+        Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond,
+                           DAG.getConstant(ShAmt, MVT::i8));
+        if (N->getNumValues() == 2)  // Dead flag value?
+          return DCI.CombineTo(N, Cond, SDValue());
+        return Cond;
+      }
+
+      // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.  This is efficient
+      // for any integer data type, including i8/i16.
+      if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
+        SDValue Cond = N->getOperand(3);
+        Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+                           DAG.getConstant(CC, MVT::i8), Cond);
+
+        // Zero extend the condition if needed.
+        Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
+                           FalseC->getValueType(0), Cond);
+        Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+                           SDValue(FalseC, 0));
+
+        if (N->getNumValues() == 2)  // Dead flag value?
+          return DCI.CombineTo(N, Cond, SDValue());
+        return Cond;
+      }
+
+      // Optimize cases that will turn into an LEA instruction.  This requires
+      // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+      if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+        uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
+        if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
+
+        bool isFastMultiplier = false;
+        if (Diff < 10) {
+          switch ((unsigned char)Diff) {
+          default: break;
+          case 1:  // result = add base, cond
+          case 2:  // result = lea base(    , cond*2)
+          case 3:  // result = lea base(cond, cond*2)
+          case 4:  // result = lea base(    , cond*4)
+          case 5:  // result = lea base(cond, cond*4)
+          case 8:  // result = lea base(    , cond*8)
+          case 9:  // result = lea base(cond, cond*8)
+            isFastMultiplier = true;
+            break;
+          }
+        }
+
+        if (isFastMultiplier) {
+          APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
+          SDValue Cond = N->getOperand(3);
+          Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
+                             DAG.getConstant(CC, MVT::i8), Cond);
+          // Zero extend the condition if needed.
+          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
+                             Cond);
+          // Scale the condition by the difference.
+          if (Diff != 1)
+            Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+                               DAG.getConstant(Diff, Cond.getValueType()));
+
+          // Add the base if non-zero.
+          if (FalseC->getAPIntValue() != 0)
+            Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+                               SDValue(FalseC, 0));
+          if (N->getNumValues() == 2)  // Dead flag value?
+            return DCI.CombineTo(N, Cond, SDValue());
+          return Cond;
+        }
+      }
+    }
+  }
+  return SDValue();
+}
+
+
+/// PerformMulCombine - Optimize a single multiply with constant into two
+/// in order to implement it with two cheaper instructions, e.g.
+/// LEA + SHL, LEA + LEA.
+static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI) {
+  if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i64)
+    return SDValue();
+
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!C)
+    return SDValue();
+  uint64_t MulAmt = C->getZExtValue();
+  if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9)
+    return SDValue();
+
+  uint64_t MulAmt1 = 0;
+  uint64_t MulAmt2 = 0;
+  if ((MulAmt % 9) == 0) {
+    MulAmt1 = 9;
+    MulAmt2 = MulAmt / 9;
+  } else if ((MulAmt % 5) == 0) {
+    MulAmt1 = 5;
+    MulAmt2 = MulAmt / 5;
+  } else if ((MulAmt % 3) == 0) {
+    MulAmt1 = 3;
+    MulAmt2 = MulAmt / 3;
+  }
+  if (MulAmt2 &&
+      (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){
+    DebugLoc DL = N->getDebugLoc();
+
+    if (isPowerOf2_64(MulAmt2) &&
+        !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD))
+      // If second multiplifer is pow2, issue it first. We want the multiply by
+      // 3, 5, or 9 to be folded into the addressing mode unless the lone use
+      // is an add.
+      std::swap(MulAmt1, MulAmt2);
+
+    SDValue NewMul;
+    if (isPowerOf2_64(MulAmt1))
+      NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+                           DAG.getConstant(Log2_64(MulAmt1), MVT::i8));
+    else
+      NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+                           DAG.getConstant(MulAmt1, VT));
+
+    if (isPowerOf2_64(MulAmt2))
+      NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
+                           DAG.getConstant(Log2_64(MulAmt2), MVT::i8));
+    else
+      NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
+                           DAG.getConstant(MulAmt2, VT));
+
+    // Do not add new nodes to DAG combiner worklist.
+    DCI.CombineTo(N, NewMul, false);
+  }
+  return SDValue();
+}
+
+static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
+  // since the result of setcc_c is all zero's or all ones.
+  if (N1C && N0.getOpcode() == ISD::AND &&
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    SDValue N00 = N0.getOperand(0);
+    if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
+        ((N00.getOpcode() == ISD::ANY_EXTEND ||
+          N00.getOpcode() == ISD::ZERO_EXTEND) &&
+         N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
+      APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+      APInt ShAmt = N1C->getAPIntValue();
+      Mask = Mask.shl(ShAmt);
+      if (Mask != 0)
+        return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                           N00, DAG.getConstant(Mask, VT));
+    }
+  }
+
+  return SDValue();
+}
+
+/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
+///                       when possible.
+static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
+                                   const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
+  if (!VT.isVector() && VT.isInteger() &&
+      N->getOpcode() == ISD::SHL)
+    return PerformSHLCombine(N, DAG);
+
+  // On X86 with SSE2 support, we can transform this to a vector shift if
+  // all elements are shifted by the same amount.  We can't do this in legalize
+  // because the a constant vector is typically transformed to a constant pool
+  // so we have no knowledge of the shift amount.
+  if (!Subtarget->hasSSE2())
+    return SDValue();
+
+  if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
+    return SDValue();
+
+  SDValue ShAmtOp = N->getOperand(1);
+  EVT EltVT = VT.getVectorElementType();
+  DebugLoc DL = N->getDebugLoc();
+  SDValue BaseShAmt = SDValue();
+  if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) {
+    unsigned NumElts = VT.getVectorNumElements();
+    unsigned i = 0;
+    for (; i != NumElts; ++i) {
+      SDValue Arg = ShAmtOp.getOperand(i);
+      if (Arg.getOpcode() == ISD::UNDEF) continue;
+      BaseShAmt = Arg;
+      break;
+    }
+    for (; i != NumElts; ++i) {
+      SDValue Arg = ShAmtOp.getOperand(i);
+      if (Arg.getOpcode() == ISD::UNDEF) continue;
+      if (Arg != BaseShAmt) {
+        return SDValue();
+      }
+    }
+  } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE &&
+             cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) {
+    SDValue InVec = ShAmtOp.getOperand(0);
+    if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
+      unsigned NumElts = InVec.getValueType().getVectorNumElements();
+      unsigned i = 0;
+      for (; i != NumElts; ++i) {
+        SDValue Arg = InVec.getOperand(i);
+        if (Arg.getOpcode() == ISD::UNDEF) continue;
+        BaseShAmt = Arg;
+        break;
+      }
+    } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
+       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
+         unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
+         if (C->getZExtValue() == SplatIdx)
+           BaseShAmt = InVec.getOperand(1);
+       }
+    }
+    if (BaseShAmt.getNode() == 0)
+      BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp,
+                              DAG.getIntPtrConstant(0));
+  } else
+    return SDValue();
+
+  // The shift amount is an i32.
+  if (EltVT.bitsGT(MVT::i32))
+    BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt);
+  else if (EltVT.bitsLT(MVT::i32))
+    BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt);
+
+  // The shift amount is identical so we can do a vector shift.
+  SDValue  ValOp = N->getOperand(0);
+  switch (N->getOpcode()) {
+  default:
+    llvm_unreachable("Unknown shift opcode!");
+    break;
+  case ISD::SHL:
+    if (VT == MVT::v2i64)
+      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
+                         ValOp, BaseShAmt);
+    if (VT == MVT::v4i32)
+      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32),
+                         ValOp, BaseShAmt);
+    if (VT == MVT::v8i16)
+      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32),
+                         ValOp, BaseShAmt);
+    break;
+  case ISD::SRA:
+    if (VT == MVT::v4i32)
+      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32),
+                         ValOp, BaseShAmt);
+    if (VT == MVT::v8i16)
+      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32),
+                         ValOp, BaseShAmt);
+    break;
+  case ISD::SRL:
+    if (VT == MVT::v2i64)
+      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
+                         ValOp, BaseShAmt);
+    if (VT == MVT::v4i32)
+      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32),
+                         ValOp, BaseShAmt);
+    if (VT ==  MVT::v8i16)
+      return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
+                         DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32),
+                         ValOp, BaseShAmt);
+    break;
+  }
+  return SDValue();
+}
+
+
+static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const X86Subtarget *Subtarget) {
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  // Want to form PANDN nodes, in the hopes of then easily combining them with
+  // OR and AND nodes to form PBLEND/PSIGN.
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::v2i64)
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  DebugLoc DL = N->getDebugLoc();
+
+  // Check LHS for vnot
+  if (N0.getOpcode() == ISD::XOR &&
+      ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
+    return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1);
+
+  // Check RHS for vnot
+  if (N1.getOpcode() == ISD::XOR &&
+      ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
+    return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0);
+
+  return SDValue();
+}
+
+static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
+                                TargetLowering::DAGCombinerInfo &DCI,
+                                const X86Subtarget *Subtarget) {
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64 && VT != MVT::v2i64)
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // look for psign/blend
+  if (Subtarget->hasSSSE3()) {
+    if (VT == MVT::v2i64) {
+      // Canonicalize pandn to RHS
+      if (N0.getOpcode() == X86ISD::PANDN)
+        std::swap(N0, N1);
+      // or (and (m, x), (pandn m, y))
+      if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) {
+        SDValue Mask = N1.getOperand(0);
+        SDValue X    = N1.getOperand(1);
+        SDValue Y;
+        if (N0.getOperand(0) == Mask)
+          Y = N0.getOperand(1);
+        if (N0.getOperand(1) == Mask)
+          Y = N0.getOperand(0);
+
+        // Check to see if the mask appeared in both the AND and PANDN and
+        if (!Y.getNode())
+          return SDValue();
+
+        // Validate that X, Y, and Mask are BIT_CONVERTS, and see through them.
+        if (Mask.getOpcode() != ISD::BITCAST ||
+            X.getOpcode() != ISD::BITCAST ||
+            Y.getOpcode() != ISD::BITCAST)
+          return SDValue();
+
+        // Look through mask bitcast.
+        Mask = Mask.getOperand(0);
+        EVT MaskVT = Mask.getValueType();
+
+        // Validate that the Mask operand is a vector sra node.  The sra node
+        // will be an intrinsic.
+        if (Mask.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
+          return SDValue();
+
+        // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
+        // there is no psrai.b
+        switch (cast<ConstantSDNode>(Mask.getOperand(0))->getZExtValue()) {
+        case Intrinsic::x86_sse2_psrai_w:
+        case Intrinsic::x86_sse2_psrai_d:
+          break;
+        default: return SDValue();
+        }
+
+        // Check that the SRA is all signbits.
+        SDValue SraC = Mask.getOperand(2);
+        unsigned SraAmt  = cast<ConstantSDNode>(SraC)->getZExtValue();
+        unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+        if ((SraAmt + 1) != EltBits)
+          return SDValue();
+
+        DebugLoc DL = N->getDebugLoc();
+
+        // Now we know we at least have a plendvb with the mask val.  See if
+        // we can form a psignb/w/d.
+        // psign = x.type == y.type == mask.type && y = sub(0, x);
+        X = X.getOperand(0);
+        Y = Y.getOperand(0);
+        if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X &&
+            ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) &&
+            X.getValueType() == MaskVT && X.getValueType() == Y.getValueType()){
+          unsigned Opc = 0;
+          switch (EltBits) {
+          case 8: Opc = X86ISD::PSIGNB; break;
+          case 16: Opc = X86ISD::PSIGNW; break;
+          case 32: Opc = X86ISD::PSIGND; break;
+          default: break;
+          }
+          if (Opc) {
+            SDValue Sign = DAG.getNode(Opc, DL, MaskVT, X, Mask.getOperand(1));
+            return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Sign);
+          }
+        }
+        // PBLENDVB only available on SSE 4.1
+        if (!Subtarget->hasSSE41())
+          return SDValue();
+
+        X = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, X);
+        Y = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Y);
+        Mask = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Mask);
+        Mask = DAG.getNode(X86ISD::PBLENDVB, DL, MVT::v16i8, X, Y, Mask);
+        return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Mask);
+      }
+    }
+  }
+
+  // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c)
+  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+    std::swap(N0, N1);
+  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+    return SDValue();
+  if (!N0.hasOneUse() || !N1.hasOneUse())
+    return SDValue();
+
+  SDValue ShAmt0 = N0.getOperand(1);
+  if (ShAmt0.getValueType() != MVT::i8)
+    return SDValue();
+  SDValue ShAmt1 = N1.getOperand(1);
+  if (ShAmt1.getValueType() != MVT::i8)
+    return SDValue();
+  if (ShAmt0.getOpcode() == ISD::TRUNCATE)
+    ShAmt0 = ShAmt0.getOperand(0);
+  if (ShAmt1.getOpcode() == ISD::TRUNCATE)
+    ShAmt1 = ShAmt1.getOperand(0);
+
+  DebugLoc DL = N->getDebugLoc();
+  unsigned Opc = X86ISD::SHLD;
+  SDValue Op0 = N0.getOperand(0);
+  SDValue Op1 = N1.getOperand(0);
+  if (ShAmt0.getOpcode() == ISD::SUB) {
+    Opc = X86ISD::SHRD;
+    std::swap(Op0, Op1);
+    std::swap(ShAmt0, ShAmt1);
+  }
+
+  unsigned Bits = VT.getSizeInBits();
+  if (ShAmt1.getOpcode() == ISD::SUB) {
+    SDValue Sum = ShAmt1.getOperand(0);
+    if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
+      SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
+      if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
+        ShAmt1Op1 = ShAmt1Op1.getOperand(0);
+      if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
+        return DAG.getNode(Opc, DL, VT,
+                           Op0, Op1,
+                           DAG.getNode(ISD::TRUNCATE, DL,
+                                       MVT::i8, ShAmt0));
+    }
+  } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
+    ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
+    if (ShAmt0C &&
+        ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits)
+      return DAG.getNode(Opc, DL, VT,
+                         N0.getOperand(0), N1.getOperand(0),
+                         DAG.getNode(ISD::TRUNCATE, DL,
+                                       MVT::i8, ShAmt0));
+  }
+
+  return SDValue();
+}
+
+/// PerformSTORECombine - Do target-specific dag combines on STORE nodes.
+static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG,
+                                   const X86Subtarget *Subtarget) {
+  // Turn load->store of MMX types into GPR load/stores.  This avoids clobbering
+  // the FP state in cases where an emms may be missing.
+  // A preferable solution to the general problem is to figure out the right
+  // places to insert EMMS.  This qualifies as a quick hack.
+
+  // Similarly, turn load->store of i64 into double load/stores in 32-bit mode.
+  StoreSDNode *St = cast<StoreSDNode>(N);
+  EVT VT = St->getValue().getValueType();
+  if (VT.getSizeInBits() != 64)
+    return SDValue();
+
+  const Function *F = DAG.getMachineFunction().getFunction();
+  bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
+  bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps
+    && Subtarget->hasSSE2();
+  if ((VT.isVector() ||
+       (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) &&
+      isa<LoadSDNode>(St->getValue()) &&
+      !cast<LoadSDNode>(St->getValue())->isVolatile() &&
+      St->getChain().hasOneUse() && !St->isVolatile()) {
+    SDNode* LdVal = St->getValue().getNode();
+    LoadSDNode *Ld = 0;
+    int TokenFactorIndex = -1;
+    SmallVector<SDValue, 8> Ops;
+    SDNode* ChainVal = St->getChain().getNode();
+    // Must be a store of a load.  We currently handle two cases:  the load
+    // is a direct child, and it's under an intervening TokenFactor.  It is
+    // possible to dig deeper under nested TokenFactors.
+    if (ChainVal == LdVal)
+      Ld = cast<LoadSDNode>(St->getChain());
+    else if (St->getValue().hasOneUse() &&
+             ChainVal->getOpcode() == ISD::TokenFactor) {
+      for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) {
+        if (ChainVal->getOperand(i).getNode() == LdVal) {
+          TokenFactorIndex = i;
+          Ld = cast<LoadSDNode>(St->getValue());
+        } else
+          Ops.push_back(ChainVal->getOperand(i));
+      }
+    }
+
+    if (!Ld || !ISD::isNormalLoad(Ld))
+      return SDValue();
+
+    // If this is not the MMX case, i.e. we are just turning i64 load/store
+    // into f64 load/store, avoid the transformation if there are multiple
+    // uses of the loaded value.
+    if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0))
+      return SDValue();
+
+    DebugLoc LdDL = Ld->getDebugLoc();
+    DebugLoc StDL = N->getDebugLoc();
+    // If we are a 64-bit capable x86, lower to a single movq load/store pair.
+    // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store
+    // pair instead.
+    if (Subtarget->is64Bit() || F64IsLegal) {
+      EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64;
+      SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(),
+                                  Ld->getPointerInfo(), Ld->isVolatile(),
+                                  Ld->isNonTemporal(), Ld->getAlignment());
+      SDValue NewChain = NewLd.getValue(1);
+      if (TokenFactorIndex != -1) {
+        Ops.push_back(NewChain);
+        NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
+                               Ops.size());
+      }
+      return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(),
+                          St->getPointerInfo(),
+                          St->isVolatile(), St->isNonTemporal(),
+                          St->getAlignment());
+    }
+
+    // Otherwise, lower to two pairs of 32-bit loads / stores.
+    SDValue LoAddr = Ld->getBasePtr();
+    SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr,
+                                 DAG.getConstant(4, MVT::i32));
+
+    SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr,
+                               Ld->getPointerInfo(),
+                               Ld->isVolatile(), Ld->isNonTemporal(),
+                               Ld->getAlignment());
+    SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr,
+                               Ld->getPointerInfo().getWithOffset(4),
+                               Ld->isVolatile(), Ld->isNonTemporal(),
+                               MinAlign(Ld->getAlignment(), 4));
+
+    SDValue NewChain = LoLd.getValue(1);
+    if (TokenFactorIndex != -1) {
+      Ops.push_back(LoLd);
+      Ops.push_back(HiLd);
+      NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0],
+                             Ops.size());
+    }
+
+    LoAddr = St->getBasePtr();
+    HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr,
+                         DAG.getConstant(4, MVT::i32));
+
+    SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr,
+                                St->getPointerInfo(),
+                                St->isVolatile(), St->isNonTemporal(),
+                                St->getAlignment());
+    SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr,
+                                St->getPointerInfo().getWithOffset(4),
+                                St->isVolatile(),
+                                St->isNonTemporal(),
+                                MinAlign(St->getAlignment(), 4));
+    return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt);
+  }
+  return SDValue();
+}
+
+/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and
+/// X86ISD::FXOR nodes.
+static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) {
+  assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
+  // F[X]OR(0.0, x) -> x
+  // F[X]OR(x, 0.0) -> x
+  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+    if (C->getValueAPF().isPosZero())
+      return N->getOperand(1);
+  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+    if (C->getValueAPF().isPosZero())
+      return N->getOperand(0);
+  return SDValue();
+}
+
+/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes.
+static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {
+  // FAND(0.0, x) -> 0.0
+  // FAND(x, 0.0) -> 0.0
+  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
+    if (C->getValueAPF().isPosZero())
+      return N->getOperand(0);
+  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
+    if (C->getValueAPF().isPosZero())
+      return N->getOperand(1);
+  return SDValue();
+}
+
+static SDValue PerformBTCombine(SDNode *N,
+                                SelectionDAG &DAG,
+                                TargetLowering::DAGCombinerInfo &DCI) {
+  // BT ignores high bits in the bit index operand.
+  SDValue Op1 = N->getOperand(1);
+  if (Op1.hasOneUse()) {
+    unsigned BitWidth = Op1.getValueSizeInBits();
+    APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
+    APInt KnownZero, KnownOne;
+    TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+                                          !DCI.isBeforeLegalizeOps());
+    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+    if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
+        TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
+      DCI.CommitTargetLoweringOpt(TLO);
+  }
+  return SDValue();
+}
+
+static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
+  SDValue Op = N->getOperand(0);
+  if (Op.getOpcode() == ISD::BITCAST)
+    Op = Op.getOperand(0);
+  EVT VT = N->getValueType(0), OpVT = Op.getValueType();
+  if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
+      VT.getVectorElementType().getSizeInBits() ==
+      OpVT.getVectorElementType().getSizeInBits()) {
+    return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
+  }
+  return SDValue();
+}
+
+static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
+  // (i32 zext (and (i8  x86isd::setcc_carry), 1)) ->
+  //           (and (i32 x86isd::setcc_carry), 1)
+  // This eliminates the zext. This transformation is necessary because
+  // ISD::SETCC is always legalized to i8.
+  DebugLoc dl = N->getDebugLoc();
+  SDValue N0 = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  if (N0.getOpcode() == ISD::AND &&
+      N0.hasOneUse() &&
+      N0.getOperand(0).hasOneUse()) {
+    SDValue N00 = N0.getOperand(0);
+    if (N00.getOpcode() != X86ISD::SETCC_CARRY)
+      return SDValue();
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (!C || C->getZExtValue() != 1)
+      return SDValue();
+    return DAG.getNode(ISD::AND, dl, VT,
+                       DAG.getNode(X86ISD::SETCC_CARRY, dl, VT,
+                                   N00.getOperand(0), N00.getOperand(1)),
+                       DAG.getConstant(1, VT));
+  }
+
+  return SDValue();
+}
+
+// Optimize  RES = X86ISD::SETCC CONDCODE, EFLAG_INPUT
+static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+  unsigned X86CC = N->getConstantOperandVal(0);
+  SDValue EFLAG = N->getOperand(1);
+  DebugLoc DL = N->getDebugLoc();
+
+  // Materialize "setb reg" as "sbb reg,reg", since it can be extended without
+  // a zext and produces an all-ones bit which is more useful than 0/1 in some
+  // cases.
+  if (X86CC == X86::COND_B)
+    return DAG.getNode(ISD::AND, DL, MVT::i8,
+                       DAG.getNode(X86ISD::SETCC_CARRY, DL, MVT::i8,
+                                   DAG.getConstant(X86CC, MVT::i8), EFLAG),
+                       DAG.getConstant(1, MVT::i8));
+
+  return SDValue();
+}
+
+// Optimize RES, EFLAGS = X86ISD::ADC LHS, RHS, EFLAGS
+static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
+                                 X86TargetLowering::DAGCombinerInfo &DCI) {
+  // If the LHS and RHS of the ADC node are zero, then it can't overflow and
+  // the result is either zero or one (depending on the input carry bit).
+  // Strength reduce this down to a "set on carry" aka SETCC_CARRY&1.
+  if (X86::isZeroNode(N->getOperand(0)) &&
+      X86::isZeroNode(N->getOperand(1)) &&
+      // We don't have a good way to replace an EFLAGS use, so only do this when
+      // dead right now.
+      SDValue(N, 1).use_empty()) {
+    DebugLoc DL = N->getDebugLoc();
+    EVT VT = N->getValueType(0);
+    SDValue CarryOut = DAG.getConstant(0, N->getValueType(1));
+    SDValue Res1 = DAG.getNode(ISD::AND, DL, VT,
+                               DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+                                           DAG.getConstant(X86::COND_B,MVT::i8),
+                                           N->getOperand(2)),
+                               DAG.getConstant(1, VT));
+    return DCI.CombineTo(N, Res1, CarryOut);
+  }
+
+  return SDValue();
+}
+
+// fold (add Y, (sete  X, 0)) -> adc  0, Y
+//      (add Y, (setne X, 0)) -> sbb -1, Y
+//      (sub (sete  X, 0), Y) -> sbb  0, Y
+//      (sub (setne X, 0), Y) -> adc -1, Y
+static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
+  DebugLoc DL = N->getDebugLoc();
+
+  // Look through ZExts.
+  SDValue Ext = N->getOperand(N->getOpcode() == ISD::SUB ? 1 : 0);
+  if (Ext.getOpcode() != ISD::ZERO_EXTEND || !Ext.hasOneUse())
+    return SDValue();
+
+  SDValue SetCC = Ext.getOperand(0);
+  if (SetCC.getOpcode() != X86ISD::SETCC || !SetCC.hasOneUse())
+    return SDValue();
+
+  X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0);
+  if (CC != X86::COND_E && CC != X86::COND_NE)
+    return SDValue();
+
+  SDValue Cmp = SetCC.getOperand(1);
+  if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() ||
+      !X86::isZeroNode(Cmp.getOperand(1)) ||
+      !Cmp.getOperand(0).getValueType().isInteger())
+    return SDValue();
+
+  SDValue CmpOp0 = Cmp.getOperand(0);
+  SDValue NewCmp = DAG.getNode(X86ISD::CMP, DL, MVT::i32, CmpOp0,
+                               DAG.getConstant(1, CmpOp0.getValueType()));
+
+  SDValue OtherVal = N->getOperand(N->getOpcode() == ISD::SUB ? 0 : 1);
+  if (CC == X86::COND_NE)
+    return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::ADC : X86ISD::SBB,
+                       DL, OtherVal.getValueType(), OtherVal,
+                       DAG.getConstant(-1ULL, OtherVal.getValueType()), NewCmp);
+  return DAG.getNode(N->getOpcode() == ISD::SUB ? X86ISD::SBB : X86ISD::ADC,
+                     DL, OtherVal.getValueType(), OtherVal,
+                     DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
+}
+
+SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
+                                             DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  switch (N->getOpcode()) {
+  default: break;
+  case ISD::EXTRACT_VECTOR_ELT:
+    return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
+  case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
+  case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI);
+  case ISD::ADD:
+  case ISD::SUB:            return OptimizeConditonalInDecrement(N, DAG);
+  case X86ISD::ADC:         return PerformADCCombine(N, DAG, DCI);
+  case ISD::MUL:            return PerformMulCombine(N, DAG, DCI);
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:            return PerformShiftCombine(N, DAG, Subtarget);
+  case ISD::AND:            return PerformAndCombine(N, DAG, DCI, Subtarget);
+  case ISD::OR:             return PerformOrCombine(N, DAG, DCI, Subtarget);
+  case ISD::STORE:          return PerformSTORECombine(N, DAG, Subtarget);
+  case X86ISD::FXOR:
+  case X86ISD::FOR:         return PerformFORCombine(N, DAG);
+  case X86ISD::FAND:        return PerformFANDCombine(N, DAG);
+  case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);
+  case X86ISD::VZEXT_MOVL:  return PerformVZEXT_MOVLCombine(N, DAG);
+  case ISD::ZERO_EXTEND:    return PerformZExtCombine(N, DAG);
+  case X86ISD::SETCC:       return PerformSETCCCombine(N, DAG);
+  case X86ISD::SHUFPS:      // Handle all target specific shuffles
+  case X86ISD::SHUFPD:
+  case X86ISD::PALIGN:
+  case X86ISD::PUNPCKHBW:
+  case X86ISD::PUNPCKHWD:
+  case X86ISD::PUNPCKHDQ:
+  case X86ISD::PUNPCKHQDQ:
+  case X86ISD::UNPCKHPS:
+  case X86ISD::UNPCKHPD:
+  case X86ISD::PUNPCKLBW:
+  case X86ISD::PUNPCKLWD:
+  case X86ISD::PUNPCKLDQ:
+  case X86ISD::PUNPCKLQDQ:
+  case X86ISD::UNPCKLPS:
+  case X86ISD::UNPCKLPD:
+  case X86ISD::VUNPCKLPS:
+  case X86ISD::VUNPCKLPD:
+  case X86ISD::VUNPCKLPSY:
+  case X86ISD::VUNPCKLPDY:
+  case X86ISD::MOVHLPS:
+  case X86ISD::MOVLHPS:
+  case X86ISD::PSHUFD:
+  case X86ISD::PSHUFHW:
+  case X86ISD::PSHUFLW:
+  case X86ISD::MOVSS:
+  case X86ISD::MOVSD:
+  case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, DCI);
+  }
+
+  return SDValue();
+}
+
+/// isTypeDesirableForOp - Return true if the target has native support for
+/// the specified value type and it is 'desirable' to use the type for the
+/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+/// instruction encodings are longer and some i16 instructions are slow.
+bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
+  if (!isTypeLegal(VT))
+    return false;
+  if (VT != MVT::i16)
+    return true;
+
+  switch (Opc) {
+  default:
+    return true;
+  case ISD::LOAD:
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+  case ISD::SHL:
+  case ISD::SRL:
+  case ISD::SUB:
+  case ISD::ADD:
+  case ISD::MUL:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+    return false;
+  }
+}
+
+/// IsDesirableToPromoteOp - This method query the target whether it is
+/// beneficial for dag combiner to promote the specified node. If true, it
+/// should return the desired promotion type by reference.
+bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
+  EVT VT = Op.getValueType();
+  if (VT != MVT::i16)
+    return false;
+
+  bool Promote = false;
+  bool Commute = false;
+  switch (Op.getOpcode()) {
+  default: break;
+  case ISD::LOAD: {
+    LoadSDNode *LD = cast<LoadSDNode>(Op);
+    // If the non-extending load has a single use and it's not live out, then it
+    // might be folded.
+    if (LD->getExtensionType() == ISD::NON_EXTLOAD /*&&
+                                                     Op.hasOneUse()*/) {
+      for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+             UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+        // The only case where we'd want to promote LOAD (rather then it being
+        // promoted as an operand is when it's only use is liveout.
+        if (UI->getOpcode() != ISD::CopyToReg)
+          return false;
+      }
+    }
+    Promote = true;
+    break;
+  }
+  case ISD::SIGN_EXTEND:
+  case ISD::ZERO_EXTEND:
+  case ISD::ANY_EXTEND:
+    Promote = true;
+    break;
+  case ISD::SHL:
+  case ISD::SRL: {
+    SDValue N0 = Op.getOperand(0);
+    // Look out for (store (shl (load), x)).
+    if (MayFoldLoad(N0) && MayFoldIntoStore(Op))
+      return false;
+    Promote = true;
+    break;
+  }
+  case ISD::ADD:
+  case ISD::MUL:
+  case ISD::AND:
+  case ISD::OR:
+  case ISD::XOR:
+    Commute = true;
+    // fallthrough
+  case ISD::SUB: {
+    SDValue N0 = Op.getOperand(0);
+    SDValue N1 = Op.getOperand(1);
+    if (!Commute && MayFoldLoad(N1))
+      return false;
+    // Avoid disabling potential load folding opportunities.
+    if (MayFoldLoad(N0) && (!isa<ConstantSDNode>(N1) || MayFoldIntoStore(Op)))
+      return false;
+    if (MayFoldLoad(N1) && (!isa<ConstantSDNode>(N0) || MayFoldIntoStore(Op)))
+      return false;
+    Promote = true;
+  }
+  }
+
+  PVT = MVT::i32;
+  return Promote;
+}
+
+//===----------------------------------------------------------------------===//
+//                           X86 Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
+  InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
+
+  std::string AsmStr = IA->getAsmString();
+
+  // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a"
+  SmallVector<StringRef, 4> AsmPieces;
+  SplitString(AsmStr, AsmPieces, ";\n");
+
+  switch (AsmPieces.size()) {
+  default: return false;
+  case 1:
+    AsmStr = AsmPieces[0];
+    AsmPieces.clear();
+    SplitString(AsmStr, AsmPieces, " \t");  // Split with whitespace.
+
+    // FIXME: this should verify that we are targetting a 486 or better.  If not,
+    // we will turn this bswap into something that will be lowered to logical ops
+    // instead of emitting the bswap asm.  For now, we don't support 486 or lower
+    // so don't worry about this.
+    // bswap $0
+    if (AsmPieces.size() == 2 &&
+        (AsmPieces[0] == "bswap" ||
+         AsmPieces[0] == "bswapq" ||
+         AsmPieces[0] == "bswapl") &&
+        (AsmPieces[1] == "$0" ||
+         AsmPieces[1] == "${0:q}")) {
+      // No need to check constraints, nothing other than the equivalent of
+      // "=r,0" would be valid here.
+      const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+      if (!Ty || Ty->getBitWidth() % 16 != 0)
+        return false;
+      return IntrinsicLowering::LowerToByteSwap(CI);
+    }
+    // rorw $$8, ${0:w}  -->  llvm.bswap.i16
+    if (CI->getType()->isIntegerTy(16) &&
+        AsmPieces.size() == 3 &&
+        (AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") &&
+        AsmPieces[1] == "$$8," &&
+        AsmPieces[2] == "${0:w}" &&
+        IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
+      AsmPieces.clear();
+      const std::string &ConstraintsStr = IA->getConstraintString();
+      SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
+      std::sort(AsmPieces.begin(), AsmPieces.end());
+      if (AsmPieces.size() == 4 &&
+          AsmPieces[0] == "~{cc}" &&
+          AsmPieces[1] == "~{dirflag}" &&
+          AsmPieces[2] == "~{flags}" &&
+          AsmPieces[3] == "~{fpsr}") {
+        const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+        if (!Ty || Ty->getBitWidth() % 16 != 0)
+          return false;
+        return IntrinsicLowering::LowerToByteSwap(CI);
+      }
+    }
+    break;
+  case 3:
+    if (CI->getType()->isIntegerTy(32) &&
+        IA->getConstraintString().compare(0, 5, "=r,0,") == 0) {
+      SmallVector<StringRef, 4> Words;
+      SplitString(AsmPieces[0], Words, " \t,");
+      if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
+          Words[2] == "${0:w}") {
+        Words.clear();
+        SplitString(AsmPieces[1], Words, " \t,");
+        if (Words.size() == 3 && Words[0] == "rorl" && Words[1] == "$$16" &&
+            Words[2] == "$0") {
+          Words.clear();
+          SplitString(AsmPieces[2], Words, " \t,");
+          if (Words.size() == 3 && Words[0] == "rorw" && Words[1] == "$$8" &&
+              Words[2] == "${0:w}") {
+            AsmPieces.clear();
+            const std::string &ConstraintsStr = IA->getConstraintString();
+            SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ",");
+            std::sort(AsmPieces.begin(), AsmPieces.end());
+            if (AsmPieces.size() == 4 &&
+                AsmPieces[0] == "~{cc}" &&
+                AsmPieces[1] == "~{dirflag}" &&
+                AsmPieces[2] == "~{flags}" &&
+                AsmPieces[3] == "~{fpsr}") {
+              const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+              if (!Ty || Ty->getBitWidth() % 16 != 0)
+                return false;
+              return IntrinsicLowering::LowerToByteSwap(CI);
+            }
+          }
+        }
+      }
+    }
+
+    if (CI->getType()->isIntegerTy(64)) {
+      InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
+      if (Constraints.size() >= 2 &&
+          Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" &&
+          Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") {
+        // bswap %eax / bswap %edx / xchgl %eax, %edx  -> llvm.bswap.i64
+        SmallVector<StringRef, 4> Words;
+        SplitString(AsmPieces[0], Words, " \t");
+        if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") {
+          Words.clear();
+          SplitString(AsmPieces[1], Words, " \t");
+          if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") {
+            Words.clear();
+            SplitString(AsmPieces[2], Words, " \t,");
+            if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" &&
+                Words[2] == "%edx") {
+              const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+              if (!Ty || Ty->getBitWidth() % 16 != 0)
+                return false;
+              return IntrinsicLowering::LowerToByteSwap(CI);
+            }
+          }
+        }
+      }
+    }
+    break;
+  }
+  return false;
+}
+
+
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+X86TargetLowering::ConstraintType
+X86TargetLowering::getConstraintType(const std::string &Constraint) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'R':
+    case 'q':
+    case 'Q':
+    case 'f':
+    case 't':
+    case 'u':
+    case 'y':
+    case 'x':
+    case 'Y':
+      return C_RegisterClass;
+    case 'a':
+    case 'b':
+    case 'c':
+    case 'd':
+    case 'S':
+    case 'D':
+    case 'A':
+      return C_Register;
+    case 'I':
+    case 'J':
+    case 'K':
+    case 'L':
+    case 'M':
+    case 'N':
+    case 'G':
+    case 'C':
+    case 'e':
+    case 'Z':
+      return C_Other;
+    default:
+      break;
+    }
+  }
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+  X86TargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+    // If we don't have a value, we can't do a match,
+    // but allow it at the lowest weight.
+  if (CallOperandVal == NULL)
+    return CW_Default;
+  const Type *type = CallOperandVal->getType();
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+  case 'R':
+  case 'q':
+  case 'Q':
+  case 'a':
+  case 'b':
+  case 'c':
+  case 'd':
+  case 'S':
+  case 'D':
+  case 'A':
+    if (CallOperandVal->getType()->isIntegerTy())
+      weight = CW_SpecificReg;
+    break;
+  case 'f':
+  case 't':
+  case 'u':
+      if (type->isFloatingPointTy())
+        weight = CW_SpecificReg;
+      break;
+  case 'y':
+      if (type->isX86_MMXTy() && Subtarget->hasMMX())
+        weight = CW_SpecificReg;
+      break;
+  case 'x':
+  case 'Y':
+    if ((type->getPrimitiveSizeInBits() == 128) && Subtarget->hasXMM())
+      weight = CW_Register;
+    break;
+  case 'I':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
+      if (C->getZExtValue() <= 31)
+        weight = CW_Constant;
+    }
+    break;
+  case 'J':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (C->getZExtValue() <= 63)
+        weight = CW_Constant;
+    }
+    break;
+  case 'K':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f))
+        weight = CW_Constant;
+    }
+    break;
+  case 'L':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff))
+        weight = CW_Constant;
+    }
+    break;
+  case 'M':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (C->getZExtValue() <= 3)
+        weight = CW_Constant;
+    }
+    break;
+  case 'N':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (C->getZExtValue() <= 0xff)
+        weight = CW_Constant;
+    }
+    break;
+  case 'G':
+  case 'C':
+    if (dyn_cast<ConstantFP>(CallOperandVal)) {
+      weight = CW_Constant;
+    }
+    break;
+  case 'e':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if ((C->getSExtValue() >= -0x80000000LL) &&
+          (C->getSExtValue() <= 0x7fffffffLL))
+        weight = CW_Constant;
+    }
+    break;
+  case 'Z':
+    if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (C->getZExtValue() <= 0xffffffff)
+        weight = CW_Constant;
+    }
+    break;
+  }
+  return weight;
+}
+
+/// LowerXConstraint - try to replace an X constraint, which matches anything,
+/// with another that has more specific requirements based on the type of the
+/// corresponding operand.
+const char *X86TargetLowering::
+LowerXConstraint(EVT ConstraintVT) const {
+  // FP X constraints get lowered to SSE1/2 registers if available, otherwise
+  // 'f' like normal targets.
+  if (ConstraintVT.isFloatingPoint()) {
+    if (Subtarget->hasXMMInt())
+      return "Y";
+    if (Subtarget->hasXMM())
+      return "x";
+  }
+
+  return TargetLowering::LowerXConstraint(ConstraintVT);
+}
+
+/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+/// vector.  If it is invalid, don't add anything to Ops.
+void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+                                                     char Constraint,
+                                                     std::vector<SDValue>&Ops,
+                                                     SelectionDAG &DAG) const {
+  SDValue Result(0, 0);
+
+  switch (Constraint) {
+  default: break;
+  case 'I':
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+      if (C->getZExtValue() <= 31) {
+        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        break;
+      }
+    }
+    return;
+  case 'J':
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+      if (C->getZExtValue() <= 63) {
+        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        break;
+      }
+    }
+    return;
+  case 'K':
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+      if ((int8_t)C->getSExtValue() == C->getSExtValue()) {
+        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        break;
+      }
+    }
+    return;
+  case 'N':
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+      if (C->getZExtValue() <= 255) {
+        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        break;
+      }
+    }
+    return;
+  case 'e': {
+    // 32-bit signed value
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+      if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+                                           C->getSExtValue())) {
+        // Widen to 64 bits here to get it sign extended.
+        Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64);
+        break;
+      }
+    // FIXME gcc accepts some relocatable values here too, but only in certain
+    // memory models; it's complicated.
+    }
+    return;
+  }
+  case 'Z': {
+    // 32-bit unsigned value
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+      if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+                                           C->getZExtValue())) {
+        Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+        break;
+      }
+    }
+    // FIXME gcc accepts some relocatable values here too, but only in certain
+    // memory models; it's complicated.
+    return;
+  }
+  case 'i': {
+    // Literal immediates are always ok.
+    if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) {
+      // Widen to 64 bits here to get it sign extended.
+      Result = DAG.getTargetConstant(CST->getSExtValue(), MVT::i64);
+      break;
+    }
+
+    // In any sort of PIC mode addresses need to be computed at runtime by
+    // adding in a register or some sort of table lookup.  These can't
+    // be used as immediates.
+    if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC())
+      return;
+
+    // If we are in non-pic codegen mode, we allow the address of a global (with
+    // an optional displacement) to be used with 'i'.
+    GlobalAddressSDNode *GA = 0;
+    int64_t Offset = 0;
+
+    // Match either (GA), (GA+C), (GA+C1+C2), etc.
+    while (1) {
+      if ((GA = dyn_cast<GlobalAddressSDNode>(Op))) {
+        Offset += GA->getOffset();
+        break;
+      } else if (Op.getOpcode() == ISD::ADD) {
+        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+          Offset += C->getZExtValue();
+          Op = Op.getOperand(0);
+          continue;
+        }
+      } else if (Op.getOpcode() == ISD::SUB) {
+        if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+          Offset += -C->getZExtValue();
+          Op = Op.getOperand(0);
+          continue;
+        }
+      }
+
+      // Otherwise, this isn't something we can handle, reject it.
+      return;
+    }
+
+    const GlobalValue *GV = GA->getGlobal();
+    // If we require an extra load to get this address, as in PIC mode, we
+    // can't accept it.
+    if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV,
+                                                        getTargetMachine())))
+      return;
+
+    Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+                                        GA->getValueType(0), Offset);
+    break;
+  }
+  }
+
+  if (Result.getNode()) {
+    Ops.push_back(Result);
+    return;
+  }
+  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+std::vector<unsigned> X86TargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                  EVT VT) const {
+  if (Constraint.size() == 1) {
+    // FIXME: not handling fp-stack yet!
+    switch (Constraint[0]) {      // GCC X86 Constraint Letters
+    default: break;  // Unknown constraint letter
+    case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
+      if (Subtarget->is64Bit()) {
+        if (VT == MVT::i32)
+          return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
+                                       X86::ESI, X86::EDI, X86::R8D, X86::R9D,
+                                       X86::R10D,X86::R11D,X86::R12D,
+                                       X86::R13D,X86::R14D,X86::R15D,
+                                       X86::EBP, X86::ESP, 0);
+        else if (VT == MVT::i16)
+          return make_vector<unsigned>(X86::AX,  X86::DX,  X86::CX, X86::BX,
+                                       X86::SI,  X86::DI,  X86::R8W,X86::R9W,
+                                       X86::R10W,X86::R11W,X86::R12W,
+                                       X86::R13W,X86::R14W,X86::R15W,
+                                       X86::BP,  X86::SP, 0);
+        else if (VT == MVT::i8)
+          return make_vector<unsigned>(X86::AL,  X86::DL,  X86::CL, X86::BL,
+                                       X86::SIL, X86::DIL, X86::R8B,X86::R9B,
+                                       X86::R10B,X86::R11B,X86::R12B,
+                                       X86::R13B,X86::R14B,X86::R15B,
+                                       X86::BPL, X86::SPL, 0);
+
+        else if (VT == MVT::i64)
+          return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX,
+                                       X86::RSI, X86::RDI, X86::R8,  X86::R9,
+                                       X86::R10, X86::R11, X86::R12,
+                                       X86::R13, X86::R14, X86::R15,
+                                       X86::RBP, X86::RSP, 0);
+
+        break;
+      }
+      // 32-bit fallthrough
+    case 'Q':   // Q_REGS
+      if (VT == MVT::i32)
+        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
+      else if (VT == MVT::i16)
+        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
+      else if (VT == MVT::i8)
+        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
+      else if (VT == MVT::i64)
+        return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0);
+      break;
+    }
+  }
+
+  return std::vector<unsigned>();
+}
+
+std::pair<unsigned, const TargetRegisterClass*>
+X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
+                                                EVT VT) const {
+  // First, see if this is a constraint that directly corresponds to an LLVM
+  // register class.
+  if (Constraint.size() == 1) {
+    // GCC Constraint Letters
+    switch (Constraint[0]) {
+    default: break;
+    case 'r':   // GENERAL_REGS
+    case 'l':   // INDEX_REGS
+      if (VT == MVT::i8)
+        return std::make_pair(0U, X86::GR8RegisterClass);
+      if (VT == MVT::i16)
+        return std::make_pair(0U, X86::GR16RegisterClass);
+      if (VT == MVT::i32 || !Subtarget->is64Bit())
+        return std::make_pair(0U, X86::GR32RegisterClass);
+      return std::make_pair(0U, X86::GR64RegisterClass);
+    case 'R':   // LEGACY_REGS
+      if (VT == MVT::i8)
+        return std::make_pair(0U, X86::GR8_NOREXRegisterClass);
+      if (VT == MVT::i16)
+        return std::make_pair(0U, X86::GR16_NOREXRegisterClass);
+      if (VT == MVT::i32 || !Subtarget->is64Bit())
+        return std::make_pair(0U, X86::GR32_NOREXRegisterClass);
+      return std::make_pair(0U, X86::GR64_NOREXRegisterClass);
+    case 'f':  // FP Stack registers.
+      // If SSE is enabled for this VT, use f80 to ensure the isel moves the
+      // value to the correct fpstack register class.
+      if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT))
+        return std::make_pair(0U, X86::RFP32RegisterClass);
+      if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT))
+        return std::make_pair(0U, X86::RFP64RegisterClass);
+      return std::make_pair(0U, X86::RFP80RegisterClass);
+    case 'y':   // MMX_REGS if MMX allowed.
+      if (!Subtarget->hasMMX()) break;
+      return std::make_pair(0U, X86::VR64RegisterClass);
+    case 'Y':   // SSE_REGS if SSE2 allowed
+      if (!Subtarget->hasXMMInt()) break;
+      // FALL THROUGH.
+    case 'x':   // SSE_REGS if SSE1 allowed
+      if (!Subtarget->hasXMM()) break;
+
+      switch (VT.getSimpleVT().SimpleTy) {
+      default: break;
+      // Scalar SSE types.
+      case MVT::f32:
+      case MVT::i32:
+        return std::make_pair(0U, X86::FR32RegisterClass);
+      case MVT::f64:
+      case MVT::i64:
+        return std::make_pair(0U, X86::FR64RegisterClass);
+      // Vector types.
+      case MVT::v16i8:
+      case MVT::v8i16:
+      case MVT::v4i32:
+      case MVT::v2i64:
+      case MVT::v4f32:
+      case MVT::v2f64:
+        return std::make_pair(0U, X86::VR128RegisterClass);
+      }
+      break;
+    }
+  }
+
+  // Use the default implementation in TargetLowering to convert the register
+  // constraint into a member of a register class.
+  std::pair<unsigned, const TargetRegisterClass*> Res;
+  Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+  // Not found as a standard register?
+  if (Res.second == 0) {
+    // Map st(0) -> st(7) -> ST0
+    if (Constraint.size() == 7 && Constraint[0] == '{' &&
+        tolower(Constraint[1]) == 's' &&
+        tolower(Constraint[2]) == 't' &&
+        Constraint[3] == '(' &&
+        (Constraint[4] >= '0' && Constraint[4] <= '7') &&
+        Constraint[5] == ')' &&
+        Constraint[6] == '}') {
+
+      Res.first = X86::ST0+Constraint[4]-'0';
+      Res.second = X86::RFP80RegisterClass;
+      return Res;
+    }
+
+    // GCC allows "st(0)" to be called just plain "st".
+    if (StringRef("{st}").equals_lower(Constraint)) {
+      Res.first = X86::ST0;
+      Res.second = X86::RFP80RegisterClass;
+      return Res;
+    }
+
+    // flags -> EFLAGS
+    if (StringRef("{flags}").equals_lower(Constraint)) {
+      Res.first = X86::EFLAGS;
+      Res.second = X86::CCRRegisterClass;
+      return Res;
+    }
+
+    // 'A' means EAX + EDX.
+    if (Constraint == "A") {
+      Res.first = X86::EAX;
+      Res.second = X86::GR32_ADRegisterClass;
+      return Res;
+    }
+    return Res;
+  }
+
+  // Otherwise, check to see if this is a register class of the wrong value
+  // type.  For example, we want to map "{ax},i32" -> {eax}, we don't want it to
+  // turn into {ax},{dx}.
+  if (Res.second->hasType(VT))
+    return Res;   // Correct type already, nothing to do.
+
+  // All of the single-register GCC register classes map their values onto
+  // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp".  If we
+  // really want an 8-bit or 32-bit register, map to the appropriate register
+  // class and return the appropriate register.
+  if (Res.second == X86::GR16RegisterClass) {
+    if (VT == MVT::i8) {
+      unsigned DestReg = 0;
+      switch (Res.first) {
+      default: break;
+      case X86::AX: DestReg = X86::AL; break;
+      case X86::DX: DestReg = X86::DL; break;
+      case X86::CX: DestReg = X86::CL; break;
+      case X86::BX: DestReg = X86::BL; break;
+      }
+      if (DestReg) {
+        Res.first = DestReg;
+        Res.second = X86::GR8RegisterClass;
+      }
+    } else if (VT == MVT::i32) {
+      unsigned DestReg = 0;
+      switch (Res.first) {
+      default: break;
+      case X86::AX: DestReg = X86::EAX; break;
+      case X86::DX: DestReg = X86::EDX; break;
+      case X86::CX: DestReg = X86::ECX; break;
+      case X86::BX: DestReg = X86::EBX; break;
+      case X86::SI: DestReg = X86::ESI; break;
+      case X86::DI: DestReg = X86::EDI; break;
+      case X86::BP: DestReg = X86::EBP; break;
+      case X86::SP: DestReg = X86::ESP; break;
+      }
+      if (DestReg) {
+        Res.first = DestReg;
+        Res.second = X86::GR32RegisterClass;
+      }
+    } else if (VT == MVT::i64) {
+      unsigned DestReg = 0;
+      switch (Res.first) {
+      default: break;
+      case X86::AX: DestReg = X86::RAX; break;
+      case X86::DX: DestReg = X86::RDX; break;
+      case X86::CX: DestReg = X86::RCX; break;
+      case X86::BX: DestReg = X86::RBX; break;
+      case X86::SI: DestReg = X86::RSI; break;
+      case X86::DI: DestReg = X86::RDI; break;
+      case X86::BP: DestReg = X86::RBP; break;
+      case X86::SP: DestReg = X86::RSP; break;
+      }
+      if (DestReg) {
+        Res.first = DestReg;
+        Res.second = X86::GR64RegisterClass;
+      }
+    }
+  } else if (Res.second == X86::FR32RegisterClass ||
+             Res.second == X86::FR64RegisterClass ||
+             Res.second == X86::VR128RegisterClass) {
+    // Handle references to XMM physical registers that got mapped into the
+    // wrong class.  This can happen with constraints like {xmm0} where the
+    // target independent register mapper will just pick the first match it can
+    // find, ignoring the required type.
+    if (VT == MVT::f32)
+      Res.second = X86::FR32RegisterClass;
+    else if (VT == MVT::f64)
+      Res.second = X86::FR64RegisterClass;
+    else if (X86::VR128RegisterClass->hasType(VT))
+      Res.second = X86::VR128RegisterClass;
+  }
+
+  return Res;
+}
diff --git a/final/lib/Target/X86/X86ISelLowering.h b/final/lib/Target/X86/X86ISelLowering.h
new file mode 100644
index 00000000000..dd2efcdc0b5
--- /dev/null
+++ b/final/lib/Target/X86/X86ISelLowering.h
@@ -0,0 +1,939 @@
+//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that X86 uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86ISELLOWERING_H
+#define X86ISELLOWERING_H
+
+#include "X86Subtarget.h"
+#include "X86RegisterInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+
+namespace llvm {
+  namespace X86ISD {
+    // X86 Specific DAG Nodes
+    enum NodeType {
+      // Start the numbering where the builtin ops leave off.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      /// BSF - Bit scan forward.
+      /// BSR - Bit scan reverse.
+      BSF,
+      BSR,
+
+      /// SHLD, SHRD - Double shift instructions. These correspond to
+      /// X86::SHLDxx and X86::SHRDxx instructions.
+      SHLD,
+      SHRD,
+
+      /// FAND - Bitwise logical AND of floating point values. This corresponds
+      /// to X86::ANDPS or X86::ANDPD.
+      FAND,
+
+      /// FOR - Bitwise logical OR of floating point values. This corresponds
+      /// to X86::ORPS or X86::ORPD.
+      FOR,
+
+      /// FXOR - Bitwise logical XOR of floating point values. This corresponds
+      /// to X86::XORPS or X86::XORPD.
+      FXOR,
+
+      /// FSRL - Bitwise logical right shift of floating point values. These
+      /// corresponds to X86::PSRLDQ.
+      FSRL,
+
+      /// CALL - These operations represent an abstract X86 call
+      /// instruction, which includes a bunch of information.  In particular the
+      /// operands of these node are:
+      ///
+      ///     #0 - The incoming token chain
+      ///     #1 - The callee
+      ///     #2 - The number of arg bytes the caller pushes on the stack.
+      ///     #3 - The number of arg bytes the callee pops off the stack.
+      ///     #4 - The value to pass in AL/AX/EAX (optional)
+      ///     #5 - The value to pass in DL/DX/EDX (optional)
+      ///
+      /// The result values of these nodes are:
+      ///
+      ///     #0 - The outgoing token chain
+      ///     #1 - The first register result value (optional)
+      ///     #2 - The second register result value (optional)
+      ///
+      CALL,
+
+      /// RDTSC_DAG - This operation implements the lowering for
+      /// readcyclecounter
+      RDTSC_DAG,
+
+      /// X86 compare and logical compare instructions.
+      CMP, COMI, UCOMI,
+
+      /// X86 bit-test instructions.
+      BT,
+
+      /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
+      /// operand, usually produced by a CMP instruction.
+      SETCC,
+
+      // Same as SETCC except it's materialized with a sbb and the value is all
+      // one's or all zero's.
+      SETCC_CARRY,  // R = carry_bit ? ~0 : 0
+
+      /// X86 conditional moves. Operand 0 and operand 1 are the two values
+      /// to select from. Operand 2 is the condition code, and operand 3 is the
+      /// flag operand produced by a CMP or TEST instruction. It also writes a
+      /// flag result.
+      CMOV,
+
+      /// X86 conditional branches. Operand 0 is the chain operand, operand 1
+      /// is the block to branch if condition is true, operand 2 is the
+      /// condition code, and operand 3 is the flag operand produced by a CMP
+      /// or TEST instruction.
+      BRCOND,
+
+      /// Return with a flag operand. Operand 0 is the chain operand, operand
+      /// 1 is the number of bytes of stack to pop.
+      RET_FLAG,
+
+      /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx.
+      REP_STOS,
+
+      /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
+      REP_MOVS,
+
+      /// GlobalBaseReg - On Darwin, this node represents the result of the popl
+      /// at function entry, used for PIC code.
+      GlobalBaseReg,
+
+      /// Wrapper - A wrapper node for TargetConstantPool,
+      /// TargetExternalSymbol, and TargetGlobalAddress.
+      Wrapper,
+
+      /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP
+      /// relative displacements.
+      WrapperRIP,
+
+      /// MOVQ2DQ - Copies a 64-bit value from an MMX vector to the low word
+      /// of an XMM vector, with the high word zero filled.
+      MOVQ2DQ,
+
+      /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
+      /// to an MMX vector.  If you think this is too close to the previous
+      /// mnemonic, so do I; blame Intel.
+      MOVDQ2Q,
+
+      /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
+      /// i32, corresponds to X86::PEXTRB.
+      PEXTRB,
+
+      /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to
+      /// i32, corresponds to X86::PEXTRW.
+      PEXTRW,
+
+      /// INSERTPS - Insert any element of a 4 x float vector into any element
+      /// of a destination 4 x floatvector.
+      INSERTPS,
+
+      /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector,
+      /// corresponds to X86::PINSRB.
+      PINSRB,
+
+      /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
+      /// corresponds to X86::PINSRW.
+      PINSRW, MMX_PINSRW,
+
+      /// PSHUFB - Shuffle 16 8-bit values within a vector.
+      PSHUFB,
+
+      /// PANDN - and with not'd value.
+      PANDN,
+
+      /// PSIGNB/W/D - Copy integer sign.
+      PSIGNB, PSIGNW, PSIGND,
+
+      /// PBLENDVB - Variable blend
+      PBLENDVB,
+
+      /// FMAX, FMIN - Floating point max and min.
+      ///
+      FMAX, FMIN,
+
+      /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
+      /// approximation.  Note that these typically require refinement
+      /// in order to obtain suitable precision.
+      FRSQRT, FRCP,
+
+      // TLSADDR - Thread Local Storage.
+      TLSADDR,
+
+      // TLSCALL - Thread Local Storage.  When calling to an OS provided
+      // thunk at the address from an earlier relocation.
+      TLSCALL,
+
+      // EH_RETURN - Exception Handling helpers.
+      EH_RETURN,
+
+      /// TC_RETURN - Tail call return.
+      ///   operand #0 chain
+      ///   operand #1 callee (register or absolute)
+      ///   operand #2 stack adjustment
+      ///   operand #3 optional in flag
+      TC_RETURN,
+
+      // VZEXT_MOVL - Vector move low and zero extend.
+      VZEXT_MOVL,
+
+      // VSHL, VSRL - Vector logical left / right shift.
+      VSHL, VSRL,
+
+      // CMPPD, CMPPS - Vector double/float comparison.
+      // CMPPD, CMPPS - Vector double/float comparison.
+      CMPPD, CMPPS,
+
+      // PCMP* - Vector integer comparisons.
+      PCMPEQB, PCMPEQW, PCMPEQD, PCMPEQQ,
+      PCMPGTB, PCMPGTW, PCMPGTD, PCMPGTQ,
+
+      // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
+      ADD, SUB, ADC, SBB, SMUL,
+      INC, DEC, OR, XOR, AND,
+
+      UMUL, // LOW, HI, FLAGS = umul LHS, RHS
+
+      // MUL_IMM - X86 specific multiply by immediate.
+      MUL_IMM,
+
+      // PTEST - Vector bitwise comparisons
+      PTEST,
+
+      // TESTP - Vector packed fp sign bitwise comparisons
+      TESTP,
+
+      // Several flavors of instructions with vector shuffle behaviors.
+      PALIGN,
+      PSHUFD,
+      PSHUFHW,
+      PSHUFLW,
+      PSHUFHW_LD,
+      PSHUFLW_LD,
+      SHUFPD,
+      SHUFPS,
+      MOVDDUP,
+      MOVSHDUP,
+      MOVSLDUP,
+      MOVSHDUP_LD,
+      MOVSLDUP_LD,
+      MOVLHPS,
+      MOVLHPD,
+      MOVHLPS,
+      MOVHLPD,
+      MOVLPS,
+      MOVLPD,
+      MOVSD,
+      MOVSS,
+      UNPCKLPS,
+      UNPCKLPD,
+      VUNPCKLPS,
+      VUNPCKLPD,
+      VUNPCKLPSY,
+      VUNPCKLPDY,
+      UNPCKHPS,
+      UNPCKHPD,
+      PUNPCKLBW,
+      PUNPCKLWD,
+      PUNPCKLDQ,
+      PUNPCKLQDQ,
+      PUNPCKHBW,
+      PUNPCKHWD,
+      PUNPCKHDQ,
+      PUNPCKHQDQ,
+
+      // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
+      // according to %al. An operator is needed so that this can be expanded
+      // with control flow.
+      VASTART_SAVE_XMM_REGS,
+
+      // WIN_ALLOCA - Windows's _chkstk call to do stack probing.
+      WIN_ALLOCA,
+
+      // Memory barrier
+      MEMBARRIER,
+      MFENCE,
+      SFENCE,
+      LFENCE,
+
+      // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
+      // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
+      // Atomic 64-bit binary operations.
+      ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
+      ATOMSUB64_DAG,
+      ATOMOR64_DAG,
+      ATOMXOR64_DAG,
+      ATOMAND64_DAG,
+      ATOMNAND64_DAG,
+      ATOMSWAP64_DAG,
+
+      // LCMPXCHG_DAG, LCMPXCHG8_DAG - Compare and swap.
+      LCMPXCHG_DAG,
+      LCMPXCHG8_DAG,
+
+      // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
+      VZEXT_LOAD,
+
+      // FNSTCW16m - Store FP control world into i16 memory.
+      FNSTCW16m,
+
+      /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
+      /// integer destination in memory and a FP reg source.  This corresponds
+      /// to the X86::FIST*m instructions and the rounding mode change stuff. It
+      /// has two inputs (token chain and address) and two outputs (int value
+      /// and token chain).
+      FP_TO_INT16_IN_MEM,
+      FP_TO_INT32_IN_MEM,
+      FP_TO_INT64_IN_MEM,
+
+      /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
+      /// integer source in memory and FP reg result.  This corresponds to the
+      /// X86::FILD*m instructions. It has three inputs (token chain, address,
+      /// and source type) and two outputs (FP value and token chain). FILD_FLAG
+      /// also produces a flag).
+      FILD,
+      FILD_FLAG,
+
+      /// FLD - This instruction implements an extending load to FP stack slots.
+      /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
+      /// operand, ptr to load from, and a ValueType node indicating the type
+      /// to load to.
+      FLD,
+
+      /// FST - This instruction implements a truncating store to FP stack
+      /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
+      /// chain operand, value to store, address, and a ValueType to store it
+      /// as.
+      FST,
+
+      /// VAARG_64 - This instruction grabs the address of the next argument
+      /// from a va_list. (reads and modifies the va_list in memory)
+      VAARG_64
+
+      // WARNING: Do not add anything in the end unless you want the node to
+      // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
+      // thought as target memory ops!
+    };
+  }
+
+  /// Define some predicates that are used for node matching.
+  namespace X86 {
+    /// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+    bool isPSHUFDMask(ShuffleVectorSDNode *N);
+
+    /// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+    bool isPSHUFHWMask(ShuffleVectorSDNode *N);
+
+    /// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to PSHUFD.
+    bool isPSHUFLWMask(ShuffleVectorSDNode *N);
+
+    /// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to SHUFP*.
+    bool isSHUFPMask(ShuffleVectorSDNode *N);
+
+    /// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to MOVHLPS.
+    bool isMOVHLPSMask(ShuffleVectorSDNode *N);
+
+    /// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+    /// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+    /// <2, 3, 2, 3>
+    bool isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N);
+
+    /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for MOVLP{S|D}.
+    bool isMOVLPMask(ShuffleVectorSDNode *N);
+
+    /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
+    /// as well as MOVLHPS.
+    bool isMOVLHPSMask(ShuffleVectorSDNode *N);
+
+    /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to UNPCKL.
+    bool isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
+
+    /// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to UNPCKH.
+    bool isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat = false);
+
+    /// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
+    /// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
+    /// <0, 0, 1, 1>
+    bool isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N);
+
+    /// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form
+    /// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef,
+    /// <2, 2, 3, 3>
+    bool isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N);
+
+    /// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to MOVSS,
+    /// MOVSD, and MOVD, i.e. setting the lowest element.
+    bool isMOVLMask(ShuffleVectorSDNode *N);
+
+    /// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
+    bool isMOVSHDUPMask(ShuffleVectorSDNode *N);
+
+    /// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
+    bool isMOVSLDUPMask(ShuffleVectorSDNode *N);
+
+    /// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to MOVDDUP.
+    bool isMOVDDUPMask(ShuffleVectorSDNode *N);
+
+    /// isPALIGNRMask - Return true if the specified VECTOR_SHUFFLE operand
+    /// specifies a shuffle of elements that is suitable for input to PALIGNR.
+    bool isPALIGNRMask(ShuffleVectorSDNode *N);
+
+    /// isVEXTRACTF128Index - Return true if the specified
+    /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
+    /// suitable for input to VEXTRACTF128.
+    bool isVEXTRACTF128Index(SDNode *N);
+
+    /// isVINSERTF128Index - Return true if the specified
+    /// INSERT_SUBVECTOR operand specifies a subvector insert that is
+    /// suitable for input to VINSERTF128.
+    bool isVINSERTF128Index(SDNode *N);
+
+    /// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
+    /// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
+    /// instructions.
+    unsigned getShuffleSHUFImmediate(SDNode *N);
+
+    /// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
+    /// the specified VECTOR_SHUFFLE mask with PSHUFHW instruction.
+    unsigned getShufflePSHUFHWImmediate(SDNode *N);
+
+    /// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
+    /// the specified VECTOR_SHUFFLE mask with PSHUFLW instruction.
+    unsigned getShufflePSHUFLWImmediate(SDNode *N);
+
+    /// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle
+    /// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction.
+    unsigned getShufflePALIGNRImmediate(SDNode *N);
+
+    /// getExtractVEXTRACTF128Immediate - Return the appropriate
+    /// immediate to extract the specified EXTRACT_SUBVECTOR index
+    /// with VEXTRACTF128 instructions.
+    unsigned getExtractVEXTRACTF128Immediate(SDNode *N);
+
+    /// getInsertVINSERTF128Immediate - Return the appropriate
+    /// immediate to insert at the specified INSERT_SUBVECTOR index
+    /// with VINSERTF128 instructions.
+    unsigned getInsertVINSERTF128Immediate(SDNode *N);
+
+    /// isZeroNode - Returns true if Elt is a constant zero or a floating point
+    /// constant +0.0.
+    bool isZeroNode(SDValue Elt);
+
+    /// isOffsetSuitableForCodeModel - Returns true of the given offset can be
+    /// fit into displacement field of the instruction.
+    bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
+                                      bool hasSymbolicDisplacement = true);
+  }
+
+  //===--------------------------------------------------------------------===//
+  //  X86TargetLowering - X86 Implementation of the TargetLowering interface
+  class X86TargetLowering : public TargetLowering {
+  public:
+    explicit X86TargetLowering(X86TargetMachine &TM);
+
+    virtual unsigned getJumpTableEncoding() const;
+
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
+
+    virtual const MCExpr *
+    LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                              const MachineBasicBlock *MBB, unsigned uid,
+                              MCContext &Ctx) const;
+
+    /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
+    /// jumptable.
+    virtual SDValue getPICJumpTableRelocBase(SDValue Table,
+                                             SelectionDAG &DAG) const;
+    virtual const MCExpr *
+    getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+                                 unsigned JTI, MCContext &Ctx) const;
+
+    /// getStackPtrReg - Return the stack pointer register we are using: either
+    /// ESP or RSP.
+    unsigned getStackPtrReg() const { return X86StackPtr; }
+
+    /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
+    /// function arguments in the caller parameter area. For X86, aggregates
+    /// that contains are placed at 16-byte boundaries while the rest are at
+    /// 4-byte boundaries.
+    virtual unsigned getByValTypeAlignment(const Type *Ty) const;
+
+    /// getOptimalMemOpType - Returns the target specific optimal type for load
+    /// and store operations as a result of memset, memcpy, and memmove
+    /// lowering. If DstAlign is zero that means it's safe to destination
+    /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
+    /// means there isn't a need to check it against alignment requirement,
+    /// probably because the source does not need to be loaded. If
+    /// 'NonScalarIntSafe' is true, that means it's safe to return a
+    /// non-scalar-integer type, e.g. empty string source, constant, or loaded
+    /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
+    /// constant so it does not need to be loaded.
+    /// It returns EVT::Other if the type should be determined using generic
+    /// target-independent logic.
+    virtual EVT
+    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+                        bool NonScalarIntSafe, bool MemcpyStrSrc,
+                        MachineFunction &MF) const;
+
+    /// allowsUnalignedMemoryAccesses - Returns true if the target allows
+    /// unaligned memory accesses. of the specified type.
+    virtual bool allowsUnalignedMemoryAccesses(EVT VT) const {
+      return true;
+    }
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    ///
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// ReplaceNodeResults - Replace the results of node with an illegal result
+    /// type with new values built out of custom code.
+    ///
+    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+                                    SelectionDAG &DAG) const;
+
+
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    /// isTypeDesirableForOp - Return true if the target has native support for
+    /// the specified value type and it is 'desirable' to use the type for the
+    /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
+    /// instruction encodings are longer and some i16 instructions are slow.
+    virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const;
+
+    /// isTypeDesirable - Return true if the target has native support for the
+    /// specified value type and it is 'desirable' to use the type. e.g. On x86
+    /// i16 is legal, but undesirable since i16 instruction encodings are longer
+    /// and some i16 instructions are slow.
+    virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const;
+
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
+
+
+    /// getTargetNodeName - This method returns the name of a target specific
+    /// DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    /// getSetCCResultType - Return the ISD::SETCC ValueType
+    virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+    /// computeMaskedBitsForTargetNode - Determine which of the bits specified
+    /// in Mask are known to be either zero or one and return them in the
+    /// KnownZero/KnownOne bitsets.
+    virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+                                                const APInt &Mask,
+                                                APInt &KnownZero,
+                                                APInt &KnownOne,
+                                                const SelectionDAG &DAG,
+                                                unsigned Depth = 0) const;
+
+    // ComputeNumSignBitsForTargetNode - Determine the number of bits in the
+    // operation that are sign bits.
+    virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+                                                     unsigned Depth) const;
+
+    virtual bool
+    isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
+
+    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+
+    virtual bool ExpandInlineAsm(CallInst *CI) const;
+
+    ConstraintType getConstraintType(const std::string &Constraint) const;
+
+    /// Examine constraint string and operand type and determine a weight value.
+    /// The operand object must already have been set up with the operand type.
+    virtual ConstraintWeight getSingleConstraintMatchWeight(
+      AsmOperandInfo &info, const char *constraint) const;
+
+    std::vector<unsigned>
+      getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                        EVT VT) const;
+
+    virtual const char *LowerXConstraint(EVT ConstraintVT) const;
+
+    /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
+    /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
+    /// true it means one of the asm constraint of the inline asm instruction
+    /// being processed is 'm'.
+    virtual void LowerAsmOperandForConstraint(SDValue Op,
+                                              char ConstraintLetter,
+                                              std::vector<SDValue> &Ops,
+                                              SelectionDAG &DAG) const;
+
+    /// getRegForInlineAsmConstraint - Given a physical register constraint
+    /// (e.g. {edx}), return the register number and the register class for the
+    /// register.  This should only be used for C_Register constraints.  On
+    /// error, this returns a register number of 0.
+    std::pair<unsigned, const TargetRegisterClass*>
+      getRegForInlineAsmConstraint(const std::string &Constraint,
+                                   EVT VT) const;
+
+    /// isLegalAddressingMode - Return true if the addressing mode represented
+    /// by AM is legal for this target, for a load/store of the specified type.
+    virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
+
+    /// isTruncateFree - Return true if it's free to truncate a value of
+    /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
+    /// register EAX to i16 by referencing its sub-register AX.
+    virtual bool isTruncateFree(const Type *Ty1, const Type *Ty2) const;
+    virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
+
+    /// isZExtFree - Return true if any actual instruction that defines a
+    /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
+    /// register. This does not necessarily include registers defined in
+    /// unknown ways, such as incoming arguments, or copies from unknown
+    /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
+    /// does not necessarily apply to truncate instructions. e.g. on x86-64,
+    /// all instructions that define 32-bit values implicit zero-extend the
+    /// result out to 64 bits.
+    virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
+    virtual bool isZExtFree(EVT VT1, EVT VT2) const;
+
+    /// isNarrowingProfitable - Return true if it's profitable to narrow
+    /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
+    /// from i32 to i8 but not from i32 to i16.
+    virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const;
+
+    /// isFPImmLegal - Returns true if the target can instruction select the
+    /// specified FP immediate natively. If false, the legalizer will
+    /// materialize the FP immediate as a load from a constant pool.
+    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+
+    /// isShuffleMaskLegal - Targets can use this to indicate that they only
+    /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
+    /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
+    /// values are assumed to be legal.
+    virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
+                                    EVT VT) const;
+
+    /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
+    /// used by Targets can use this to indicate if there is a suitable
+    /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
+    /// pool entry.
+    virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
+                                        EVT VT) const;
+
+    /// ShouldShrinkFPConstant - If true, then instruction selection should
+    /// seek to shrink the FP constant of the specified type to a smaller type
+    /// in order to save space and / or reduce runtime.
+    virtual bool ShouldShrinkFPConstant(EVT VT) const {
+      // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
+      // expensive than a straight movsd. On the other hand, it's important to
+      // shrink long double fp constant since fldt is very slow.
+      return !X86ScalarSSEf64 || VT == MVT::f80;
+    }
+
+    const X86Subtarget* getSubtarget() const {
+      return Subtarget;
+    }
+
+    /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
+    /// computed in an SSE register, not on the X87 floating point stack.
+    bool isScalarFPTypeInSSEReg(EVT VT) const {
+      return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
+      (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
+    }
+
+    /// createFastISel - This method returns a target specific FastISel object,
+    /// or null if the target does not support "fast" ISel.
+    virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
+    /// getStackCookieLocation - Return true if the target stores stack
+    /// protector cookies at a fixed offset in some non-standard address
+    /// space, and populates the address space and offset as
+    /// appropriate.
+    virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const;
+
+  protected:
+    std::pair<const TargetRegisterClass*, uint8_t>
+    findRepresentativeClass(EVT VT) const;
+
+  private:
+    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+    /// make the right decision when generating code for different targets.
+    const X86Subtarget *Subtarget;
+    const X86RegisterInfo *RegInfo;
+    const TargetData *TD;
+
+    /// X86StackPtr - X86 physical register used as stack ptr.
+    unsigned X86StackPtr;
+
+    /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
+    /// floating point ops.
+    /// When SSE is available, use it for f32 operations.
+    /// When SSE2 is available, use it for f64 operations.
+    bool X86ScalarSSEf32;
+    bool X86ScalarSSEf64;
+
+    /// LegalFPImmediates - A list of legal fp immediates.
+    std::vector<APFloat> LegalFPImmediates;
+
+    /// addLegalFPImmediate - Indicate that this x86 target can instruction
+    /// select the specified FP immediate natively.
+    void addLegalFPImmediate(const APFloat& Imm) {
+      LegalFPImmediates.push_back(Imm);
+    }
+
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerMemArgument(SDValue Chain,
+                             CallingConv::ID CallConv,
+                             const SmallVectorImpl<ISD::InputArg> &ArgInfo,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             const CCValAssign &VA,  MachineFrameInfo *MFI,
+                              unsigned i) const;
+    SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
+                             DebugLoc dl, SelectionDAG &DAG,
+                             const CCValAssign &VA,
+                             ISD::ArgFlagsTy Flags) const;
+
+    // Call lowering helpers.
+
+    /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+    /// for tail call optimization. Targets which want to do tail call
+    /// optimization should implement this function.
+    bool IsEligibleForTailCallOptimization(SDValue Callee,
+                                           CallingConv::ID CalleeCC,
+                                           bool isVarArg,
+                                           bool isCalleeStructRet,
+                                           bool isCallerStructRet,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                           SelectionDAG& DAG) const;
+    bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
+    SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
+                                SDValue Chain, bool IsTailCall, bool Is64Bit,
+                                int FPDiff, DebugLoc dl) const;
+
+    unsigned GetAlignedArgumentStackSize(unsigned StackSize,
+                                         SelectionDAG &DAG) const;
+
+    std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
+                                               bool isSigned) const;
+
+    SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+                                   SelectionDAG &DAG) const;
+    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
+                               int64_t Offset, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
+    SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
+                      SelectionDAG &DAG) const;
+    SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
+    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerToBT(SDValue And, ISD::CondCode CC,
+                      DebugLoc dl, SelectionDAG &DAG) const;
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSHL(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
+
+    SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
+
+    // Utility functions to help LowerVECTOR_SHUFFLE
+    SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual bool isUsedByReturnOnly(SDNode *N) const;
+
+    virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
+    virtual bool
+      CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+                     const SmallVectorImpl<ISD::OutputArg> &Outs,
+                     LLVMContext &Context) const;
+
+    void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
+                                 SelectionDAG &DAG, unsigned NewOp) const;
+
+    /// Utility function to emit string processing sse4.2 instructions
+    /// that return in xmm0.
+    /// This takes the instruction to expand, the associated machine basic
+    /// block, the number of args, and whether or not the second arg is
+    /// in memory or not.
+    MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
+                                unsigned argNum, bool inMem) const;
+
+    /// Utility functions to emit monitor and mwait instructions. These
+    /// need to make sure that the arguments to the intrinsic are in the
+    /// correct registers.
+    MachineBasicBlock *EmitMonitor(MachineInstr *MI,
+                                   MachineBasicBlock *BB) const;
+    MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const;
+
+    /// Utility function to emit atomic bitwise operations (and, or, xor).
+    /// It takes the bitwise instruction to expand, the associated machine basic
+    /// block, and the associated X86 opcodes for reg/reg and reg/imm.
+    MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
+                                                    MachineInstr *BInstr,
+                                                    MachineBasicBlock *BB,
+                                                    unsigned regOpc,
+                                                    unsigned immOpc,
+                                                    unsigned loadOpc,
+                                                    unsigned cxchgOpc,
+                                                    unsigned notOpc,
+                                                    unsigned EAXreg,
+                                                    TargetRegisterClass *RC,
+                                                    bool invSrc = false) const;
+
+    MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
+                                                    MachineInstr *BInstr,
+                                                    MachineBasicBlock *BB,
+                                                    unsigned regOpcL,
+                                                    unsigned regOpcH,
+                                                    unsigned immOpcL,
+                                                    unsigned immOpcH,
+                                                    bool invSrc = false) const;
+
+    /// Utility function to emit atomic min and max.  It takes the min/max
+    /// instruction to expand, the associated basic block, and the associated
+    /// cmov opcode for moving the min or max value.
+    MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr,
+                                                          MachineBasicBlock *BB,
+                                                        unsigned cmovOpc) const;
+
+    // Utility function to emit the low-level va_arg code for X86-64.
+    MachineBasicBlock *EmitVAARG64WithCustomInserter(
+                       MachineInstr *MI,
+                       MachineBasicBlock *MBB) const;
+
+    /// Utility function to emit the xmm reg save portion of va_start.
+    MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter(
+                                                   MachineInstr *BInstr,
+                                                   MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
+                                         MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI,
+                                              MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
+                                          MachineBasicBlock *BB) const;
+
+    MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
+                                          MachineBasicBlock *BB) const;
+
+    /// Emit nodes that will be selected as "test Op0,Op0", or something
+    /// equivalent, for use with the given x86 condition code.
+    SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
+
+    /// Emit nodes that will be selected as "cmp Op0,Op1", or something
+    /// equivalent, for use with the given x86 condition code.
+    SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
+                    SelectionDAG &DAG) const;
+  };
+
+  namespace X86 {
+    FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
+  }
+}
+
+#endif    // X86ISELLOWERING_H
diff --git a/final/lib/Target/X86/X86Instr3DNow.td b/final/lib/Target/X86/X86Instr3DNow.td
new file mode 100644
index 00000000000..45d1c6bc9d2
--- /dev/null
+++ b/final/lib/Target/X86/X86Instr3DNow.td
@@ -0,0 +1,77 @@
+//====- X86Instr3DNow.td - The 3DNow! Instruction Set ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the 3DNow! instruction set, which extends MMX to support
+// floating point and also adds a few more random instructions for good measure.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: We don't support any intrinsics for these instructions yet.
+
+class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, 
+             list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, TB, Requires<[Has3DNow]> {
+}
+
+class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic>
+      : I<o, F, (outs VR64:$dst), ins,
+          !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), []>,
+          TB, Requires<[Has3DNow]>, Has3DNow0F0FOpcode {
+  // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+  let isAsmParserOnly = 1;
+}
+
+
+let Constraints = "$src1 = $dst" in {
+  // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
+  // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
+  multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
+    def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn>;
+    def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn>;
+  }
+}
+
+defm PAVGUSB  : I3DNow_binop_rm<0xBF, "pavgusb">;
+defm PF2ID    : I3DNow_binop_rm<0x1D, "pf2id">;
+defm PFACC    : I3DNow_binop_rm<0xAE, "pfacc">;
+defm PFADD    : I3DNow_binop_rm<0x9E, "pfadd">;
+defm PFCMPEQ  : I3DNow_binop_rm<0xB0, "pfcmpeq">;
+defm PFCMPGE  : I3DNow_binop_rm<0x90, "pfcmpge">;
+defm PFCMPGT  : I3DNow_binop_rm<0xA0, "pfcmpgt">;
+defm PFMAX    : I3DNow_binop_rm<0xA4, "pfmax">;
+defm PFMIN    : I3DNow_binop_rm<0x94, "pfmin">;
+defm PFMUL    : I3DNow_binop_rm<0xB4, "pfmul">;
+defm PFRCP    : I3DNow_binop_rm<0x96, "pfrcp">;
+defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1">;
+defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2">;
+defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1">;
+defm PFRSQRT  : I3DNow_binop_rm<0x97, "pfrsqrt">;
+defm PFSUB    : I3DNow_binop_rm<0x9A, "pfsub">;
+defm PFSUBR   : I3DNow_binop_rm<0xAA, "pfsubr">;
+defm PI2FD    : I3DNow_binop_rm<0x0D, "pi2fd">;
+defm PMULHRW  : I3DNow_binop_rm<0xB7, "pmulhrw">;
+
+
+def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
+
+def PREFETCH  : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
+                       "prefetch $addr", []>;
+                       
+// FIXME: Diassembler gets a bogus decode conflict.
+let isAsmParserOnly = 1 in {
+def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
+                       "prefetchw $addr", []>;
+}
+
+// "3DNowA" instructions
+defm PF2IW    : I3DNow_binop_rm<0x1C, "pf2iw">;
+defm PI2FW    : I3DNow_binop_rm<0x0C, "pi2fw">;
+defm PFNACC   : I3DNow_binop_rm<0x8A, "pfnacc">;
+defm PFPNACC  : I3DNow_binop_rm<0x8E, "pfpnacc">;
+defm PSWAPD   : I3DNow_binop_rm<0xBB, "pswapd">;
diff --git a/final/lib/Target/X86/X86InstrArithmetic.td b/final/lib/Target/X86/X86InstrArithmetic.td
new file mode 100644
index 00000000000..f0ea0687086
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrArithmetic.td
@@ -0,0 +1,1125 @@
+//===- X86InstrArithmetic.td - Integer Arithmetic Instrs ---*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the integer arithmetic instructions in the X86
+// architecture.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// LEA - Load Effective Address
+
+let neverHasSideEffects = 1 in
+def LEA16r   : I<0x8D, MRMSrcMem,
+                 (outs GR16:$dst), (ins i32mem:$src),
+                 "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
+let isReMaterializable = 1 in
+def LEA32r   : I<0x8D, MRMSrcMem,
+                 (outs GR32:$dst), (ins i32mem:$src),
+                 "lea{l}\t{$src|$dst}, {$dst|$src}",
+                 [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
+
+def LEA64_32r : I<0x8D, MRMSrcMem,
+                  (outs GR32:$dst), (ins lea64_32mem:$src),
+                  "lea{l}\t{$src|$dst}, {$dst|$src}",
+                  [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
+
+let isReMaterializable = 1 in
+def LEA64r   : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                  "lea{q}\t{$src|$dst}, {$dst|$src}",
+                  [(set GR64:$dst, lea64addr:$src)]>;
+
+
+
+//===----------------------------------------------------------------------===//
+//  Fixed-Register Multiplication and Division Instructions.
+//
+
+// Extra precision multiplication
+
+// AL is really implied by AX, but the registers in Defs must match the
+// SDNode results (i8, i32).
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8r  : I<0xF6, MRM4r, (outs),  (ins GR8:$src), "mul{b}\t$src",
+               // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+               // This probably ought to be moved to a def : Pat<> if the
+               // syntax can be accepted.
+               [(set AL, (mul AL, GR8:$src)),
+                (implicit EFLAGS)]>;     // AL,AH = AL*GR8
+
+let Defs = [AX,DX,EFLAGS], Uses = [AX], neverHasSideEffects = 1 in
+def MUL16r : I<0xF7, MRM4r, (outs),  (ins GR16:$src),
+               "mul{w}\t$src", 
+               []>, OpSize;    // AX,DX = AX*GR16
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], neverHasSideEffects = 1 in
+def MUL32r : I<0xF7, MRM4r, (outs),  (ins GR32:$src),
+               "mul{l}\t$src",   // EAX,EDX = EAX*GR32
+               [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>;
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
+                "mul{q}\t$src",          // RAX,RDX = RAX*GR64
+                [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
+
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8m  : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
+               "mul{b}\t$src",
+               // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+               // This probably ought to be moved to a def : Pat<> if the
+               // syntax can be accepted.
+               [(set AL, (mul AL, (loadi8 addr:$src))),
+                (implicit EFLAGS)]>;   // AL,AH = AL*[mem8]
+
+let mayLoad = 1, neverHasSideEffects = 1 in {
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
+               "mul{w}\t$src",
+               []>, OpSize; // AX,DX = AX*[mem16]
+
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
+              "mul{l}\t$src",
+              []>;          // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
+                "mul{q}\t$src", []>;         // RAX,RDX = RAX*[mem64]
+}
+
+let neverHasSideEffects = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8r  : I<0xF6, MRM5r, (outs),  (ins GR8:$src), "imul{b}\t$src", []>;
+              // AL,AH = AL*GR8
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16r : I<0xF7, MRM5r, (outs),  (ins GR16:$src), "imul{w}\t$src", []>,
+              OpSize;    // AX,DX = AX*GR16
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32r : I<0xF7, MRM5r, (outs),  (ins GR32:$src), "imul{l}\t$src", []>;
+              // EAX,EDX = EAX*GR32
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>;
+              // RAX,RDX = RAX*GR64
+
+let mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8m  : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
+                "imul{b}\t$src", []>;    // AL,AH = AL*[mem8]
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
+                "imul{w}\t$src", []>, OpSize; // AX,DX = AX*[mem16]
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
+                "imul{l}\t$src", []>;  // EAX,EDX = EAX*[mem32]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in
+def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
+                 "imul{q}\t$src", []>;         // RAX,RDX = RAX*[mem64]
+}
+} // neverHasSideEffects
+
+
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+
+let isCommutable = 1 in {  // X = IMUL Y, Z --> X = IMUL Z, Y
+// Register-Register Signed Integer Multiply
+def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
+                 "imul{w}\t{$src2, $dst|$dst, $src2}",
+                 [(set GR16:$dst, EFLAGS,
+                       (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize;
+def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
+                 "imul{l}\t{$src2, $dst|$dst, $src2}",
+                 [(set GR32:$dst, EFLAGS,
+                       (X86smul_flag GR32:$src1, GR32:$src2))]>, TB;
+def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
+                                   (ins GR64:$src1, GR64:$src2),
+                  "imul{q}\t{$src2, $dst|$dst, $src2}",
+                  [(set GR64:$dst, EFLAGS,
+                        (X86smul_flag GR64:$src1, GR64:$src2))]>, TB;
+}
+
+// Register-Memory Signed Integer Multiply
+def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
+                                  (ins GR16:$src1, i16mem:$src2),
+                 "imul{w}\t{$src2, $dst|$dst, $src2}",
+                 [(set GR16:$dst, EFLAGS,
+                       (X86smul_flag GR16:$src1, (load addr:$src2)))]>,
+               TB, OpSize;
+def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), 
+                 (ins GR32:$src1, i32mem:$src2),
+                 "imul{l}\t{$src2, $dst|$dst, $src2}",
+                 [(set GR32:$dst, EFLAGS,
+                       (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB;
+def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
+                                   (ins GR64:$src1, i64mem:$src2),
+                  "imul{q}\t{$src2, $dst|$dst, $src2}",
+                  [(set GR64:$dst, EFLAGS,
+                        (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
+} // Constraints = "$src1 = $dst"
+
+} // Defs = [EFLAGS]
+
+// Suprisingly enough, these are not two address instructions!
+let Defs = [EFLAGS] in {
+// Register-Integer Signed Integer Multiply
+def IMUL16rri  : Ii16<0x69, MRMSrcReg,                      // GR16 = GR16*I16
+                      (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR16:$dst, EFLAGS, 
+                            (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize;
+def IMUL16rri8 : Ii8<0x6B, MRMSrcReg,                       // GR16 = GR16*I8
+                     (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+                     "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set GR16:$dst, EFLAGS,
+                           (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
+                 OpSize;
+def IMUL32rri  : Ii32<0x69, MRMSrcReg,                      // GR32 = GR32*I32
+                      (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR32:$dst, EFLAGS,
+                            (X86smul_flag GR32:$src1, imm:$src2))]>;
+def IMUL32rri8 : Ii8<0x6B, MRMSrcReg,                       // GR32 = GR32*I8
+                     (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+                     "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set GR32:$dst, EFLAGS,
+                           (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>;
+def IMUL64rri32 : RIi32<0x69, MRMSrcReg,                    // GR64 = GR64*I32
+                        (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+                        "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                       [(set GR64:$dst, EFLAGS,
+                             (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>;
+def IMUL64rri8 : RIi8<0x6B, MRMSrcReg,                      // GR64 = GR64*I8
+                      (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+                      "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR64:$dst, EFLAGS,
+                            (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>;
+
+
+// Memory-Integer Signed Integer Multiply
+def IMUL16rmi  : Ii16<0x69, MRMSrcMem,                     // GR16 = [mem16]*I16
+                      (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
+                      "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR16:$dst, EFLAGS,
+                            (X86smul_flag (load addr:$src1), imm:$src2))]>,
+                 OpSize;
+def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR16 = [mem16]*I8
+                     (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
+                     "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set GR16:$dst, EFLAGS,
+                           (X86smul_flag (load addr:$src1),
+                                         i16immSExt8:$src2))]>, OpSize;
+def IMUL32rmi  : Ii32<0x69, MRMSrcMem,                     // GR32 = [mem32]*I32
+                      (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
+                      "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR32:$dst, EFLAGS,
+                            (X86smul_flag (load addr:$src1), imm:$src2))]>;
+def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem,                       // GR32 = [mem32]*I8
+                     (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
+                     "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                     [(set GR32:$dst, EFLAGS,
+                           (X86smul_flag (load addr:$src1),
+                                         i32immSExt8:$src2))]>;
+def IMUL64rmi32 : RIi32<0x69, MRMSrcMem,                   // GR64 = [mem64]*I32
+                        (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
+                        "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                        [(set GR64:$dst, EFLAGS,
+                              (X86smul_flag (load addr:$src1),
+                                            i64immSExt32:$src2))]>;
+def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem,                      // GR64 = [mem64]*I8
+                      (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
+                      "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set GR64:$dst, EFLAGS,
+                            (X86smul_flag (load addr:$src1),
+                                          i64immSExt8:$src2))]>;
+} // Defs = [EFLAGS]
+
+
+
+
+// unsigned division/remainder
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def DIV8r  : I<0xF6, MRM6r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
+               "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16r : I<0xF7, MRM6r, (outs),  (ins GR16:$src),   // DX:AX/r16 = AX,DX
+               "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def DIV32r : I<0xF7, MRM6r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
+               "div{l}\t$src", []>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
+                "div{q}\t$src", []>;
+
+let mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def DIV8m  : I<0xF6, MRM6m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
+               "div{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
+               "div{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in    // EDX:EAX/[mem32] = EAX,EDX
+def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
+               "div{l}\t$src", []>;
+// RDX:RAX/[mem64] = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
+                "div{q}\t$src", []>;
+}
+
+// Signed division/remainder.
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def IDIV8r : I<0xF6, MRM7r, (outs),  (ins GR8:$src),    // AX/r8 = AL,AH
+               "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16r: I<0xF7, MRM7r, (outs),  (ins GR16:$src),   // DX:AX/r16 = AX,DX
+               "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+def IDIV32r: I<0xF7, MRM7r, (outs),  (ins GR32:$src),   // EDX:EAX/r32 = EAX,EDX
+               "idiv{l}\t$src", []>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
+                "idiv{q}\t$src", []>;
+               
+let mayLoad = 1, mayLoad = 1 in {
+let Defs = [AL,EFLAGS,AX], Uses = [AX] in
+def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src),   // AX/[mem8] = AL,AH
+               "idiv{b}\t$src", []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src),  // DX:AX/[mem16] = AX,DX
+               "idiv{w}\t$src", []>, OpSize;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in    // EDX:EAX/[mem32] = EAX,EDX
+def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), 
+               "idiv{l}\t$src", []>;
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
+def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
+                "idiv{q}\t$src", []>;
+}
+
+//===----------------------------------------------------------------------===//
+//  Two address Instructions.
+//
+
+// unary instructions
+let CodeSize = 2 in {
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+def NEG8r  : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
+               "neg{b}\t$dst",
+               [(set GR8:$dst, (ineg GR8:$src1)),
+                (implicit EFLAGS)]>;
+def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+               "neg{w}\t$dst",
+               [(set GR16:$dst, (ineg GR16:$src1)),
+                (implicit EFLAGS)]>, OpSize;
+def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+               "neg{l}\t$dst",
+               [(set GR32:$dst, (ineg GR32:$src1)),
+                (implicit EFLAGS)]>;
+def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
+                [(set GR64:$dst, (ineg GR64:$src1)),
+                 (implicit EFLAGS)]>;
+} // Constraints = "$src1 = $dst"
+
+def NEG8m  : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
+               "neg{b}\t$dst",
+               [(store (ineg (loadi8 addr:$dst)), addr:$dst),
+                (implicit EFLAGS)]>;
+def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
+               "neg{w}\t$dst",
+               [(store (ineg (loadi16 addr:$dst)), addr:$dst),
+                (implicit EFLAGS)]>, OpSize;
+def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
+               "neg{l}\t$dst",
+               [(store (ineg (loadi32 addr:$dst)), addr:$dst),
+                (implicit EFLAGS)]>;
+def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
+                [(store (ineg (loadi64 addr:$dst)), addr:$dst),
+                 (implicit EFLAGS)]>;
+} // Defs = [EFLAGS]
+
+
+// Note: NOT does not set EFLAGS!
+
+let Constraints = "$src1 = $dst" in {
+// Match xor -1 to not. Favors these over a move imm + xor to save code size.
+let AddedComplexity = 15 in {
+def NOT8r  : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
+               "not{b}\t$dst",
+               [(set GR8:$dst, (not GR8:$src1))]>;
+def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+               "not{w}\t$dst",
+               [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
+def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+               "not{l}\t$dst",
+               [(set GR32:$dst, (not GR32:$src1))]>;
+def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
+                [(set GR64:$dst, (not GR64:$src1))]>;
+}
+} // Constraints = "$src1 = $dst"
+
+def NOT8m  : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
+               "not{b}\t$dst",
+               [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
+def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
+               "not{w}\t$dst",
+               [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
+def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
+               "not{l}\t$dst",
+               [(store (not (loadi32 addr:$dst)), addr:$dst)]>;
+def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
+                [(store (not (loadi64 addr:$dst)), addr:$dst)]>;
+} // CodeSize
+
+// TODO: inc/dec is slow for P4, but fast for Pentium-M.
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+let CodeSize = 2 in
+def INC8r  : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+               "inc{b}\t$dst",
+               [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
+
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in {  // Can xform into LEA.
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), 
+               "inc{w}\t$dst",
+               [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+             OpSize, Requires<[In32BitMode]>;
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), 
+               "inc{l}\t$dst",
+               [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+             Requires<[In32BitMode]>;
+def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
+                [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src1))]>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 1
+
+
+// In 64-bit mode, single byte INC and DEC cannot be encoded.
+let isConvertibleToThreeAddress = 1, CodeSize = 2 in {
+// Can transform into LEA.
+def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1), 
+                  "inc{w}\t$dst",
+                  [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
+                OpSize, Requires<[In64BitMode]>;
+def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1), 
+                  "inc{l}\t$dst",
+                  [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
+                Requires<[In64BitMode]>;
+def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1), 
+                  "dec{w}\t$dst",
+                  [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+                OpSize, Requires<[In64BitMode]>;
+def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1), 
+                  "dec{l}\t$dst",
+                  [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+                Requires<[In64BitMode]>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 2
+
+} // Constraints = "$src1 = $dst"
+
+let CodeSize = 2 in {
+  def INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
+               [(store (add (loadi8 addr:$dst), 1), addr:$dst),
+                (implicit EFLAGS)]>;
+  def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+               [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+                (implicit EFLAGS)]>,
+               OpSize, Requires<[In32BitMode]>;
+  def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+               [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+                (implicit EFLAGS)]>,
+               Requires<[In32BitMode]>;
+  def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
+                  [(store (add (loadi64 addr:$dst), 1), addr:$dst),
+                   (implicit EFLAGS)]>;
+                   
+// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
+// how to unfold them.
+// FIXME: What is this for??
+def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+                  [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+                    (implicit EFLAGS)]>,
+                OpSize, Requires<[In64BitMode]>;
+def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+                  [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+                    (implicit EFLAGS)]>,
+                Requires<[In64BitMode]>;
+def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+                  [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+                    (implicit EFLAGS)]>,
+                OpSize, Requires<[In64BitMode]>;
+def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+                  [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+                    (implicit EFLAGS)]>,
+                Requires<[In64BitMode]>;
+} // CodeSize = 2
+
+let Constraints = "$src1 = $dst" in {
+let CodeSize = 2 in
+def DEC8r  : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+               "dec{b}\t$dst",
+               [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
+let isConvertibleToThreeAddress = 1, CodeSize = 1 in {   // Can xform into LEA.
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), 
+               "dec{w}\t$dst",
+               [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
+             OpSize, Requires<[In32BitMode]>;
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), 
+               "dec{l}\t$dst",
+               [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
+             Requires<[In32BitMode]>;
+def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
+                [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src1))]>;
+} // CodeSize = 2
+} // Constraints = "$src1 = $dst"
+
+
+let CodeSize = 2 in {
+  def DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
+               [(store (add (loadi8 addr:$dst), -1), addr:$dst),
+                (implicit EFLAGS)]>;
+  def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+               [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+                (implicit EFLAGS)]>,
+               OpSize, Requires<[In32BitMode]>;
+  def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+               [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+                (implicit EFLAGS)]>,
+               Requires<[In32BitMode]>;
+  def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
+                  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
+                   (implicit EFLAGS)]>;
+} // CodeSize = 2
+} // Defs = [EFLAGS]
+
+
+/// X86TypeInfo - This is a bunch of information that describes relevant X86
+/// information about value types.  For example, it can tell you what the
+/// register class and preferred load to use.
+class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
+                  PatFrag loadnode, X86MemOperand memoperand, ImmType immkind,
+                  Operand immoperand, SDPatternOperator immoperator,
+                  Operand imm8operand, SDPatternOperator imm8operator,
+                  bit hasOddOpcode, bit hasOpSizePrefix, bit hasREX_WPrefix> {
+  /// VT - This is the value type itself.
+  ValueType VT = vt;
+  
+  /// InstrSuffix - This is the suffix used on instructions with this type.  For
+  /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q".
+  string InstrSuffix = instrsuffix;
+  
+  /// RegClass - This is the register class associated with this type.  For
+  /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64.
+  RegisterClass RegClass = regclass;
+  
+  /// LoadNode - This is the load node associated with this type.  For
+  /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64.
+  PatFrag LoadNode = loadnode;
+  
+  /// MemOperand - This is the memory operand associated with this type.  For
+  /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem.
+  X86MemOperand MemOperand = memoperand;
+  
+  /// ImmEncoding - This is the encoding of an immediate of this type.  For
+  /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32.  Note that i64 -> Imm32
+  /// since the immediate fields of i64 instructions is a 32-bit sign extended
+  /// value.
+  ImmType ImmEncoding = immkind;
+  
+  /// ImmOperand - This is the operand kind of an immediate of this type.  For
+  /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm.  Note that i64 ->
+  /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign
+  /// extended value.
+  Operand ImmOperand = immoperand;
+  
+  /// ImmOperator - This is the operator that should be used to match an
+  /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32).
+  SDPatternOperator ImmOperator = immoperator;
+  
+  /// Imm8Operand - This is the operand kind to use for an imm8 of this type.
+  /// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm.  This is
+  /// only used for instructions that have a sign-extended imm8 field form.
+  Operand Imm8Operand = imm8operand;
+  
+  /// Imm8Operator - This is the operator that should be used to match an 8-bit
+  /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8).
+  SDPatternOperator Imm8Operator = imm8operator;
+  
+  /// HasOddOpcode - This bit is true if the instruction should have an odd (as
+  /// opposed to even) opcode.  Operations on i8 are usually even, operations on
+  /// other datatypes are odd.
+  bit HasOddOpcode = hasOddOpcode;
+  
+  /// HasOpSizePrefix - This bit is set to true if the instruction should have
+  /// the 0x66 operand size prefix.  This is set for i16 types.
+  bit HasOpSizePrefix = hasOpSizePrefix;
+  
+  /// HasREX_WPrefix - This bit is set to true if the instruction should have
+  /// the 0x40 REX prefix.  This is set for i64 types.
+  bit HasREX_WPrefix = hasREX_WPrefix;
+}
+
+def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
+
+
+def Xi8  : X86TypeInfo<i8 , "b", GR8 , loadi8 , i8mem ,
+                       Imm8 , i8imm ,    imm,          i8imm   , invalid_node,
+                       0, 0, 0>;
+def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
+                       Imm16, i16imm,    imm,          i16i8imm, i16immSExt8,
+                       1, 1, 0>;
+def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
+                       Imm32, i32imm,    imm,          i32i8imm, i32immSExt8,
+                       1, 0, 0>;
+def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
+                       Imm32, i64i32imm, i64immSExt32, i64i8imm, i64immSExt8,
+                       1, 0, 1>;
+
+/// ITy - This instruction base class takes the type info for the instruction.
+/// Using this, it:
+/// 1. Concatenates together the instruction mnemonic with the appropriate
+///    suffix letter, a tab, and the arguments.
+/// 2. Infers whether the instruction should have a 0x66 prefix byte.
+/// 3. Infers whether the instruction should have a 0x40 REX_W prefix.
+/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
+///    or 1 (for i16,i32,i64 operations).
+class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins, 
+          string mnemonic, string args, list<dag> pattern>
+  : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
+       opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
+      f, outs, ins, 
+      !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> {
+
+  // Infer instruction prefixes from type info.
+  let hasOpSizePrefix = typeinfo.HasOpSizePrefix;
+  let hasREX_WPrefix  = typeinfo.HasREX_WPrefix;
+}
+
+// BinOpRR - Instructions like "add reg, reg, reg".
+class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              dag outlist, list<dag> pattern, Format f = MRMDestReg>
+  : ITy<opcode, f, typeinfo, outlist,
+        (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+
+// BinOpRR_R - Instructions like "add reg, reg, reg", where the pattern has
+// just a regclass (no eflags) as a result.
+class BinOpRR_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                SDNode opnode>
+  : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst,
+                  (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+
+// BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has
+// just a EFLAGS as a result.
+class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                SDPatternOperator opnode, Format f = MRMDestReg>
+  : BinOpRR<opcode, mnemonic, typeinfo, (outs),
+            [(set EFLAGS,
+                  (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))],
+            f>;
+
+// BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result.
+class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode>
+  : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS,
+                  (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
+
+// BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has
+// both a regclass and EFLAGS as a result, and has EFLAGS as input.
+class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode>
+  : BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS,
+                  (opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2,
+                          EFLAGS))]>;
+
+// BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding).
+class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
+  : ITy<opcode, MRMSrcReg, typeinfo,
+        (outs typeinfo.RegClass:$dst),
+        (ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
+        mnemonic, "{$src2, $dst|$dst, $src2}", []> {
+  // The disassembler should know about this, but not the asmparser.
+  let isCodeGenOnly = 1;
+}
+
+// BinOpRM - Instructions like "add reg, reg, [mem]".
+class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              dag outlist, list<dag> pattern>
+  : ITy<opcode, MRMSrcMem, typeinfo, outlist,
+        (ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern>;
+
+// BinOpRM_R - Instructions like "add reg, reg, [mem]".
+class BinOpRM_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              SDNode opnode>
+  : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst,
+            (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_F - Instructions like "cmp reg, [mem]".
+class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              SDPatternOperator opnode>
+  : BinOpRM<opcode, mnemonic, typeinfo, (outs),
+            [(set EFLAGS,
+            (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_RF - Instructions like "add reg, reg, [mem]".
+class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode>
+  : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS,
+            (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
+
+// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
+class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode>
+  : BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS,
+            (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
+                    EFLAGS))]>;
+
+// BinOpRI - Instructions like "add reg, reg, imm".
+class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              Format f, dag outlist, list<dag> pattern>
+  : ITy<opcode, f, typeinfo, outlist,
+        (ins typeinfo.RegClass:$src1, typeinfo.ImmOperand:$src2),
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+  let ImmT = typeinfo.ImmEncoding;
+}
+
+// BinOpRI_R - Instructions like "add reg, reg, imm".
+class BinOpRI_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                SDNode opnode, Format f>
+  : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst,
+                (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_F - Instructions like "cmp reg, imm".
+class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                SDPatternOperator opnode, Format f>
+  : BinOpRI<opcode, mnemonic, typeinfo, f, (outs),
+            [(set EFLAGS,
+                (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_RF - Instructions like "add reg, reg, imm".
+class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode, Format f>
+  : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS, 
+                (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
+
+// BinOpRI_RFF - Instructions like "adc reg, reg, imm".
+class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode, Format f>
+  : BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+            [(set typeinfo.RegClass:$dst, EFLAGS, 
+                (opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2,
+                        EFLAGS))]>;
+
+// BinOpRI8 - Instructions like "add reg, reg, imm8".
+class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+               Format f, dag outlist, list<dag> pattern>
+  : ITy<opcode, f, typeinfo, outlist,
+        (ins typeinfo.RegClass:$src1, typeinfo.Imm8Operand:$src2),
+        mnemonic, "{$src2, $src1|$src1, $src2}", pattern> {
+  let ImmT = Imm8; // Always 8-bit immediate.
+}
+
+// BinOpRI8_R - Instructions like "add reg, reg, imm8".
+class BinOpRI8_R<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode, Format f>
+  : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+             [(set typeinfo.RegClass:$dst,
+               (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+               
+// BinOpRI8_F - Instructions like "cmp reg, imm8".
+class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode, Format f>
+  : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs),
+             [(set EFLAGS,
+               (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_RF - Instructions like "add reg, reg, imm8".
+class BinOpRI8_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode, Format f>
+  : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+             [(set typeinfo.RegClass:$dst, EFLAGS,
+               (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
+
+// BinOpRI8_RFF - Instructions like "adc reg, reg, imm8".
+class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                   SDNode opnode, Format f>
+  : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst),
+             [(set typeinfo.RegClass:$dst, EFLAGS,
+               (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2,
+                       EFLAGS))]>;
+
+// BinOpMR - Instructions like "add [mem], reg".
+class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              list<dag> pattern>
+  : ITy<opcode, MRMDestMem, typeinfo,
+        (outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
+        mnemonic, "{$src, $dst|$dst, $src}", pattern>;
+
+// BinOpMR_RMW - Instructions like "add [mem], reg".
+class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode>
+  : BinOpMR<opcode, mnemonic, typeinfo,
+          [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
+           (implicit EFLAGS)]>;
+
+// BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
+class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                    SDNode opnode>
+  : BinOpMR<opcode, mnemonic, typeinfo,
+          [(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
+                  addr:$dst),
+           (implicit EFLAGS)]>;
+
+// BinOpMR_F - Instructions like "cmp [mem], reg".
+class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode>
+  : BinOpMR<opcode, mnemonic, typeinfo,
+            [(set EFLAGS, (opnode (load addr:$dst), typeinfo.RegClass:$src))]>;
+
+// BinOpMI - Instructions like "add [mem], imm".
+class BinOpMI<string mnemonic, X86TypeInfo typeinfo,
+              Format f, list<dag> pattern, bits<8> opcode = 0x80>
+  : ITy<opcode, f, typeinfo,
+        (outs), (ins typeinfo.MemOperand:$dst, typeinfo.ImmOperand:$src),
+        mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+  let ImmT = typeinfo.ImmEncoding;
+}
+
+// BinOpMI_RMW - Instructions like "add [mem], imm".
+class BinOpMI_RMW<string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode, Format f>
+  : BinOpMI<mnemonic, typeinfo, f, 
+            [(store (opnode (typeinfo.VT (load addr:$dst)),
+                            typeinfo.ImmOperator:$src), addr:$dst),
+             (implicit EFLAGS)]>;
+
+// BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
+class BinOpMI_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+                  SDNode opnode, Format f>
+  : BinOpMI<mnemonic, typeinfo, f, 
+            [(store (opnode (typeinfo.VT (load addr:$dst)),
+                            typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
+             (implicit EFLAGS)]>;
+
+// BinOpMI_F - Instructions like "cmp [mem], imm".
+class BinOpMI_F<string mnemonic, X86TypeInfo typeinfo,
+                SDPatternOperator opnode, Format f, bits<8> opcode = 0x80>
+  : BinOpMI<mnemonic, typeinfo, f, 
+            [(set EFLAGS, (opnode (typeinfo.VT (load addr:$dst)),
+                                               typeinfo.ImmOperator:$src))],
+            opcode>;
+
+// BinOpMI8 - Instructions like "add [mem], imm8".
+class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
+               Format f, list<dag> pattern>
+  : ITy<0x82, f, typeinfo,
+        (outs), (ins typeinfo.MemOperand:$dst, typeinfo.Imm8Operand:$src),
+        mnemonic, "{$src, $dst|$dst, $src}", pattern> {
+  let ImmT = Imm8; // Always 8-bit immediate.
+}
+
+// BinOpMI8_RMW - Instructions like "add [mem], imm8".
+class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo,
+                   SDNode opnode, Format f>
+  : BinOpMI8<mnemonic, typeinfo, f,
+             [(store (opnode (load addr:$dst),
+                             typeinfo.Imm8Operator:$src), addr:$dst),
+              (implicit EFLAGS)]>;
+
+// BinOpMI8_RMW_FF - Instructions like "adc [mem], imm8".
+class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
+                   SDNode opnode, Format f>
+  : BinOpMI8<mnemonic, typeinfo, f,
+             [(store (opnode (load addr:$dst),
+                             typeinfo.Imm8Operator:$src, EFLAGS), addr:$dst),
+              (implicit EFLAGS)]>;
+
+// BinOpMI8_F - Instructions like "cmp [mem], imm8".
+class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
+                 SDNode opnode, Format f>
+  : BinOpMI8<mnemonic, typeinfo, f,
+             [(set EFLAGS, (opnode (load addr:$dst),
+                                   typeinfo.Imm8Operator:$src))]>;
+
+// BinOpAI - Instructions like "add %eax, %eax, imm".
+class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
+              Register areg>
+  : ITy<opcode, RawFrm, typeinfo,
+        (outs), (ins typeinfo.ImmOperand:$src),
+        mnemonic, !strconcat("{$src, %", areg.AsmName, "|%",
+                               areg.AsmName, ", $src}"), []> {
+  let ImmT = typeinfo.ImmEncoding;
+  let Uses = [areg];
+  let Defs = [areg];
+}
+
+/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (...".
+///
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+                         string mnemonic, Format RegMRM, Format MemMRM,
+                         SDNode opnodeflag, SDNode opnode,
+                         bit CommutableRR, bit ConvertibleToThreeAddress> {
+  let Defs = [EFLAGS] in {
+    let Constraints = "$src1 = $dst" in {
+      let isCommutable = CommutableRR,
+          isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+        def #NAME#8rr  : BinOpRR_RF<BaseOpc, mnemonic, Xi8 , opnodeflag>;
+        def #NAME#16rr : BinOpRR_RF<BaseOpc, mnemonic, Xi16, opnodeflag>;
+        def #NAME#32rr : BinOpRR_RF<BaseOpc, mnemonic, Xi32, opnodeflag>;
+        def #NAME#64rr : BinOpRR_RF<BaseOpc, mnemonic, Xi64, opnodeflag>;
+      } // isCommutable
+
+      def #NAME#8rr_REV  : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+      def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+      def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+      def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+      def #NAME#8rm   : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
+      def #NAME#16rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
+      def #NAME#32rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
+      def #NAME#64rm  : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
+
+      let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+        // NOTE: These are order specific, we want the ri8 forms to be listed
+        // first so that they are slightly preferred to the ri forms.
+        def #NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
+        def #NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>;
+        def #NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>;
+
+        def #NAME#8ri   : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
+        def #NAME#16ri  : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
+        def #NAME#32ri  : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
+        def #NAME#64ri32: BinOpRI_RF<0x80, mnemonic, Xi64, opnodeflag, RegMRM>;
+      }
+    } // Constraints = "$src1 = $dst"
+
+    def #NAME#8mr    : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
+    def #NAME#16mr   : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
+    def #NAME#32mr   : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
+    def #NAME#64mr   : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
+
+    // NOTE: These are order specific, we want the mi8 forms to be listed
+    // first so that they are slightly preferred to the mi forms.
+    def #NAME#16mi8  : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi8  : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi8  : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
+                       
+    def #NAME#8mi    : BinOpMI_RMW<mnemonic, Xi8 , opnode, MemMRM>;
+    def #NAME#16mi   : BinOpMI_RMW<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi   : BinOpMI_RMW<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi32 : BinOpMI_RMW<mnemonic, Xi64, opnode, MemMRM>;
+
+    def #NAME#8i8   : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+    def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+    def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+    def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+  }                          
+}
+
+/// ArithBinOp_RFF - This is an arithmetic binary operator where the pattern is
+/// defined with "(set GPR:$dst, EFLAGS, (node LHS, RHS, EFLAGS))" like ADC and
+/// SBB.
+///
+/// It would be nice to get rid of the second and third argument here, but
+/// tblgen can't handle dependent type references aggressively enough: PR8330
+multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+                          string mnemonic, Format RegMRM, Format MemMRM,
+                          SDNode opnode, bit CommutableRR,
+                           bit ConvertibleToThreeAddress> {
+  let Defs = [EFLAGS] in {
+    let Constraints = "$src1 = $dst" in {
+      let isCommutable = CommutableRR,
+          isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+        def #NAME#8rr  : BinOpRR_RFF<BaseOpc, mnemonic, Xi8 , opnode>;
+        def #NAME#16rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi16, opnode>;
+        def #NAME#32rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi32, opnode>;
+        def #NAME#64rr : BinOpRR_RFF<BaseOpc, mnemonic, Xi64, opnode>;
+      } // isCommutable
+
+      def #NAME#8rr_REV  : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+      def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+      def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+      def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+      def #NAME#8rm   : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
+      def #NAME#16rm  : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
+      def #NAME#32rm  : BinOpRM_RFF<BaseOpc2, mnemonic, Xi32, opnode>;
+      def #NAME#64rm  : BinOpRM_RFF<BaseOpc2, mnemonic, Xi64, opnode>;
+
+      let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+        // NOTE: These are order specific, we want the ri8 forms to be listed
+        // first so that they are slightly preferred to the ri forms.
+        def #NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>;
+        def #NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
+        def #NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
+
+        def #NAME#8ri   : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+        def #NAME#16ri  : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
+        def #NAME#32ri  : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
+        def #NAME#64ri32: BinOpRI_RFF<0x80, mnemonic, Xi64, opnode, RegMRM>;
+      }
+    } // Constraints = "$src1 = $dst"
+
+    def #NAME#8mr    : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi8 , opnode>;
+    def #NAME#16mr   : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi16, opnode>;
+    def #NAME#32mr   : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi32, opnode>;
+    def #NAME#64mr   : BinOpMR_RMW_FF<BaseOpc, mnemonic, Xi64, opnode>;
+
+    // NOTE: These are order specific, we want the mi8 forms to be listed
+    // first so that they are slightly preferred to the mi forms.
+    def #NAME#16mi8  : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi8  : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi8  : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+                       
+    def #NAME#8mi    : BinOpMI_RMW_FF<mnemonic, Xi8 , opnode, MemMRM>;
+    def #NAME#16mi   : BinOpMI_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi   : BinOpMI_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi32 : BinOpMI_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+
+    def #NAME#8i8   : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+    def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+    def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+    def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+  }                          
+}
+
+/// ArithBinOp_F - This is an arithmetic binary operator where the pattern is
+/// defined with "(set EFLAGS, (...".  It would be really nice to find a way
+/// to factor this with the other ArithBinOp_*.
+///
+multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
+                        string mnemonic, Format RegMRM, Format MemMRM,
+                        SDNode opnode,
+                        bit CommutableRR, bit ConvertibleToThreeAddress> {
+  let Defs = [EFLAGS] in {
+    let isCommutable = CommutableRR,
+        isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+      def #NAME#8rr  : BinOpRR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+      def #NAME#16rr : BinOpRR_F<BaseOpc, mnemonic, Xi16, opnode>;
+      def #NAME#32rr : BinOpRR_F<BaseOpc, mnemonic, Xi32, opnode>;
+      def #NAME#64rr : BinOpRR_F<BaseOpc, mnemonic, Xi64, opnode>;
+    } // isCommutable
+
+    def #NAME#8rr_REV  : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+    def #NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+    def #NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+    def #NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
+
+    def #NAME#8rm   : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
+    def #NAME#16rm  : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
+    def #NAME#32rm  : BinOpRM_F<BaseOpc2, mnemonic, Xi32, opnode>;
+    def #NAME#64rm  : BinOpRM_F<BaseOpc2, mnemonic, Xi64, opnode>;
+
+    let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+      // NOTE: These are order specific, we want the ri8 forms to be listed
+      // first so that they are slightly preferred to the ri forms.
+      def #NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>;
+      def #NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>;
+      def #NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>;
+      
+      def #NAME#8ri   : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
+      def #NAME#16ri  : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
+      def #NAME#32ri  : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
+      def #NAME#64ri32: BinOpRI_F<0x80, mnemonic, Xi64, opnode, RegMRM>;
+    }
+
+    def #NAME#8mr    : BinOpMR_F<BaseOpc, mnemonic, Xi8 , opnode>;
+    def #NAME#16mr   : BinOpMR_F<BaseOpc, mnemonic, Xi16, opnode>;
+    def #NAME#32mr   : BinOpMR_F<BaseOpc, mnemonic, Xi32, opnode>;
+    def #NAME#64mr   : BinOpMR_F<BaseOpc, mnemonic, Xi64, opnode>;
+
+    // NOTE: These are order specific, we want the mi8 forms to be listed
+    // first so that they are slightly preferred to the mi forms.
+    def #NAME#16mi8  : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi8  : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi8  : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>;
+                       
+    def #NAME#8mi    : BinOpMI_F<mnemonic, Xi8 , opnode, MemMRM>;
+    def #NAME#16mi   : BinOpMI_F<mnemonic, Xi16, opnode, MemMRM>;
+    def #NAME#32mi   : BinOpMI_F<mnemonic, Xi32, opnode, MemMRM>;
+    def #NAME#64mi32 : BinOpMI_F<mnemonic, Xi64, opnode, MemMRM>;
+
+    def #NAME#8i8   : BinOpAI<BaseOpc4, mnemonic, Xi8 , AL>;
+    def #NAME#16i16 : BinOpAI<BaseOpc4, mnemonic, Xi16, AX>;
+    def #NAME#32i32 : BinOpAI<BaseOpc4, mnemonic, Xi32, EAX>;
+    def #NAME#64i32 : BinOpAI<BaseOpc4, mnemonic, Xi64, RAX>;
+  }                          
+}
+
+
+defm AND : ArithBinOp_RF<0x20, 0x22, 0x24, "and", MRM4r, MRM4m,
+                         X86and_flag, and, 1, 0>;
+defm OR  : ArithBinOp_RF<0x08, 0x0A, 0x0C, "or", MRM1r, MRM1m,
+                         X86or_flag, or, 1, 0>;
+defm XOR : ArithBinOp_RF<0x30, 0x32, 0x34, "xor", MRM6r, MRM6m,
+                         X86xor_flag, xor, 1, 0>;
+defm ADD : ArithBinOp_RF<0x00, 0x02, 0x04, "add", MRM0r, MRM0m,
+                         X86add_flag, add, 1, 1>;
+defm SUB : ArithBinOp_RF<0x28, 0x2A, 0x2C, "sub", MRM5r, MRM5m,
+                         X86sub_flag, sub, 0, 0>;
+
+// Arithmetic.
+let Uses = [EFLAGS] in {
+  defm ADC : ArithBinOp_RFF<0x10, 0x12, 0x14, "adc", MRM2r, MRM2m, X86adc_flag,
+                            1, 0>;
+  defm SBB : ArithBinOp_RFF<0x18, 0x1A, 0x1C, "sbb", MRM3r, MRM3m, X86sbb_flag,
+                            0, 0>;
+}
+
+defm CMP : ArithBinOp_F<0x38, 0x3A, 0x3C, "cmp", MRM7r, MRM7m, X86cmp, 0, 0>;
+
+
+//===----------------------------------------------------------------------===//
+// Semantically, test instructions are similar like AND, except they don't
+// generate a result.  From an encoding perspective, they are very different:
+// they don't have all the usual imm8 and REV forms, and are encoded into a
+// different space.
+def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
+                         (X86cmp (and_su node:$lhs, node:$rhs), 0)>;
+
+let Defs = [EFLAGS] in {
+  let isCommutable = 1 in {
+    def TEST8rr  : BinOpRR_F<0x84, "test", Xi8 , X86testpat, MRMSrcReg>;
+    def TEST16rr : BinOpRR_F<0x84, "test", Xi16, X86testpat, MRMSrcReg>;
+    def TEST32rr : BinOpRR_F<0x84, "test", Xi32, X86testpat, MRMSrcReg>;
+    def TEST64rr : BinOpRR_F<0x84, "test", Xi64, X86testpat, MRMSrcReg>;
+  } // isCommutable
+
+  def TEST8rm    : BinOpRM_F<0x84, "test", Xi8 , X86testpat>;
+  def TEST16rm   : BinOpRM_F<0x84, "test", Xi16, X86testpat>;
+  def TEST32rm   : BinOpRM_F<0x84, "test", Xi32, X86testpat>;
+  def TEST64rm   : BinOpRM_F<0x84, "test", Xi64, X86testpat>;
+
+  def TEST8ri    : BinOpRI_F<0xF6, "test", Xi8 , X86testpat, MRM0r>;
+  def TEST16ri   : BinOpRI_F<0xF6, "test", Xi16, X86testpat, MRM0r>;
+  def TEST32ri   : BinOpRI_F<0xF6, "test", Xi32, X86testpat, MRM0r>;
+  def TEST64ri32 : BinOpRI_F<0xF6, "test", Xi64, X86testpat, MRM0r>;
+
+  def TEST8mi    : BinOpMI_F<"test", Xi8 , X86testpat, MRM0m, 0xF6>;
+  def TEST16mi   : BinOpMI_F<"test", Xi16, X86testpat, MRM0m, 0xF6>;
+  def TEST32mi   : BinOpMI_F<"test", Xi32, X86testpat, MRM0m, 0xF6>;
+  def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>;
+                     
+  def TEST8i8    : BinOpAI<0xA8, "test", Xi8 , AL>;
+  def TEST16i16  : BinOpAI<0xA8, "test", Xi16, AX>;
+  def TEST32i32  : BinOpAI<0xA8, "test", Xi32, EAX>;
+  def TEST64i32  : BinOpAI<0xA8, "test", Xi64, RAX>;
+}                          
+
diff --git a/final/lib/Target/X86/X86InstrBuilder.h b/final/lib/Target/X86/X86InstrBuilder.h
new file mode 100644
index 00000000000..1ea8071053e
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrBuilder.h
@@ -0,0 +1,184 @@
+//===-- X86InstrBuilder.h - Functions to aid building x86 insts -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes functions that may be used with BuildMI from the
+// MachineInstrBuilder.h file to handle X86'isms in a clean way.
+//
+// The BuildMem function may be used with the BuildMI function to add entire
+// memory references in a single, typed, function call.  X86 memory references
+// can be very complex expressions (described in the README), so wrapping them
+// up behind an easier to use interface makes sense.  Descriptions of the
+// functions are included below.
+//
+// For reference, the order of operands for memory references is:
+// (Operand), Base, Scale, Index, Displacement.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRBUILDER_H
+#define X86INSTRBUILDER_H
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+
+namespace llvm {
+
+/// X86AddressMode - This struct holds a generalized full x86 address mode.
+/// The base register can be a frame index, which will eventually be replaced
+/// with BP or SP and Disp being offsetted accordingly.  The displacement may
+/// also include the offset of a global value.
+struct X86AddressMode {
+  enum {
+    RegBase,
+    FrameIndexBase
+  } BaseType;
+
+  union {
+    unsigned Reg;
+    int FrameIndex;
+  } Base;
+
+  unsigned Scale;
+  unsigned IndexReg;
+  int Disp;
+  const GlobalValue *GV;
+  unsigned GVOpFlags;
+
+  X86AddressMode()
+    : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
+    Base.Reg = 0;
+  }
+  
+  
+  void getFullAddress(SmallVectorImpl<MachineOperand> &MO) {
+    assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
+    
+    if (BaseType == X86AddressMode::RegBase)
+      MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false,
+                                             false, false, false, 0, false));
+    else {
+      assert(BaseType == X86AddressMode::FrameIndexBase);
+      MO.push_back(MachineOperand::CreateFI(Base.FrameIndex));
+    }
+    
+    MO.push_back(MachineOperand::CreateImm(Scale));
+    MO.push_back(MachineOperand::CreateReg(IndexReg, false, false,
+                                           false, false, false, 0, false));
+    
+    if (GV)
+      MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags));
+    else
+      MO.push_back(MachineOperand::CreateImm(Disp));
+    
+    MO.push_back(MachineOperand::CreateReg(0, false, false,
+                                           false, false, false, 0, false));
+  }
+};
+
+/// addDirectMem - This function is used to add a direct memory reference to the
+/// current instruction -- that is, a dereference of an address in a register,
+/// with no scale, index or displacement. An example is: DWORD PTR [EAX].
+///
+static inline const MachineInstrBuilder &
+addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
+  // Because memory references are always represented with five
+  // values, this adds: Reg, 1, NoReg, 0, NoReg to the instruction.
+  return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0).addReg(0);
+}
+
+
+static inline const MachineInstrBuilder &
+addOffset(const MachineInstrBuilder &MIB, int Offset) {
+  return MIB.addImm(1).addReg(0).addImm(Offset).addReg(0);
+}
+
+/// addRegOffset - This function is used to add a memory reference of the form
+/// [Reg + Offset], i.e., one with no scale or index, but with a
+/// displacement. An example is: DWORD PTR [EAX + 4].
+///
+static inline const MachineInstrBuilder &
+addRegOffset(const MachineInstrBuilder &MIB,
+             unsigned Reg, bool isKill, int Offset) {
+  return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
+}
+
+/// addRegReg - This function is used to add a memory reference of the form:
+/// [Reg + Reg].
+static inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB,
+                                            unsigned Reg1, bool isKill1,
+                                            unsigned Reg2, bool isKill2) {
+  return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1)
+    .addReg(Reg2, getKillRegState(isKill2)).addImm(0).addReg(0);
+}
+
+static inline const MachineInstrBuilder &
+addFullAddress(const MachineInstrBuilder &MIB,
+               const X86AddressMode &AM) {
+  assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
+  
+  if (AM.BaseType == X86AddressMode::RegBase)
+    MIB.addReg(AM.Base.Reg);
+  else {
+    assert(AM.BaseType == X86AddressMode::FrameIndexBase);
+    MIB.addFrameIndex(AM.Base.FrameIndex);
+  }
+
+  MIB.addImm(AM.Scale).addReg(AM.IndexReg);
+  if (AM.GV)
+    MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
+  else
+    MIB.addImm(AM.Disp);
+    
+  return MIB.addReg(0);
+}
+
+/// addFrameReference - This function is used to add a reference to the base of
+/// an abstract object on the stack frame of the current function.  This
+/// reference has base register as the FrameIndex offset until it is resolved.
+/// This allows a constant offset to be specified as well...
+///
+static inline const MachineInstrBuilder &
+addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
+  MachineInstr *MI = MIB;
+  MachineFunction &MF = *MI->getParent()->getParent();
+  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const TargetInstrDesc &TID = MI->getDesc();
+  unsigned Flags = 0;
+  if (TID.mayLoad())
+    Flags |= MachineMemOperand::MOLoad;
+  if (TID.mayStore())
+    Flags |= MachineMemOperand::MOStore;
+  MachineMemOperand *MMO =
+    MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset),
+                            Flags, MFI.getObjectSize(FI),
+                            MFI.getObjectAlignment(FI));
+  return addOffset(MIB.addFrameIndex(FI), Offset)
+            .addMemOperand(MMO);
+}
+
+/// addConstantPoolReference - This function is used to add a reference to the
+/// base of a constant value spilled to the per-function constant pool.  The
+/// reference uses the abstract ConstantPoolIndex which is retained until
+/// either machine code emission or assembly output. In PIC mode on x86-32,
+/// the GlobalBaseReg parameter can be used to make this a
+/// GlobalBaseReg-relative reference.
+///
+static inline const MachineInstrBuilder &
+addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
+                         unsigned GlobalBaseReg, unsigned char OpFlags) {
+  //FIXME: factor this
+  return MIB.addReg(GlobalBaseReg).addImm(1).addReg(0)
+    .addConstantPoolIndex(CPI, 0, OpFlags).addReg(0);
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/X86/X86InstrCMovSetCC.td b/final/lib/Target/X86/X86InstrCMovSetCC.td
new file mode 100644
index 00000000000..3a43b22ddf3
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrCMovSetCC.td
@@ -0,0 +1,104 @@
+//===- X86InstrCMovSetCC.td - Conditional Move and SetCC ---*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 conditional move and set on condition
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+
+// SetCC instructions.
+multiclass CMOV<bits<8> opc, string Mnemonic, PatLeaf CondNode> {
+  let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst",
+      isCommutable = 1 in {
+    def #NAME#16rr
+      : I<opc, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+          !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR16:$dst,
+                (X86cmov GR16:$src1, GR16:$src2, CondNode, EFLAGS))]>,TB,OpSize;
+    def #NAME#32rr
+      : I<opc, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+          !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR32:$dst,
+                (X86cmov GR32:$src1, GR32:$src2, CondNode, EFLAGS))]>, TB;
+    def #NAME#64rr
+      :RI<opc, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+          !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR64:$dst,
+                (X86cmov GR64:$src1, GR64:$src2, CondNode, EFLAGS))]>, TB;
+  }
+
+  let Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst" in {
+    def #NAME#16rm
+      : I<opc, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
+          !strconcat(Mnemonic, "{w}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR16:$dst, (X86cmov GR16:$src1, (loadi16 addr:$src2),
+                                    CondNode, EFLAGS))]>, TB, OpSize;
+    def #NAME#32rm
+      : I<opc, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
+          !strconcat(Mnemonic, "{l}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR32:$dst, (X86cmov GR32:$src1, (loadi32 addr:$src2),
+                                    CondNode, EFLAGS))]>, TB;
+    def #NAME#64rm
+      :RI<opc, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
+          !strconcat(Mnemonic, "{q}\t{$src2, $dst|$dst, $src2}"),
+          [(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
+                                    CondNode, EFLAGS))]>, TB;
+  } // Uses = [EFLAGS], Predicates = [HasCMov], Constraints = "$src1 = $dst"
+} // end multiclass
+
+
+// Conditional Moves.
+defm CMOVO  : CMOV<0x40, "cmovo" , X86_COND_O>;
+defm CMOVNO : CMOV<0x41, "cmovno", X86_COND_NO>;
+defm CMOVB  : CMOV<0x42, "cmovb" , X86_COND_B>;
+defm CMOVAE : CMOV<0x43, "cmovae", X86_COND_AE>;
+defm CMOVE  : CMOV<0x44, "cmove" , X86_COND_E>;
+defm CMOVNE : CMOV<0x45, "cmovne", X86_COND_NE>;
+defm CMOVBE : CMOV<0x46, "cmovbe", X86_COND_BE>;
+defm CMOVA  : CMOV<0x47, "cmova" , X86_COND_A>;
+defm CMOVS  : CMOV<0x48, "cmovs" , X86_COND_S>;
+defm CMOVNS : CMOV<0x49, "cmovns", X86_COND_NS>;
+defm CMOVP  : CMOV<0x4A, "cmovp" , X86_COND_P>;
+defm CMOVNP : CMOV<0x4B, "cmovnp", X86_COND_NP>;
+defm CMOVL  : CMOV<0x4C, "cmovl" , X86_COND_L>;
+defm CMOVGE : CMOV<0x4D, "cmovge", X86_COND_GE>;
+defm CMOVLE : CMOV<0x4E, "cmovle", X86_COND_LE>;
+defm CMOVG  : CMOV<0x4F, "cmovg" , X86_COND_G>;
+
+
+// SetCC instructions.
+multiclass SETCC<bits<8> opc, string Mnemonic, PatLeaf OpNode> {
+  let Uses = [EFLAGS] in {
+    def r    : I<opc, MRM0r,  (outs GR8:$dst), (ins),
+                     !strconcat(Mnemonic, "\t$dst"),
+                     [(set GR8:$dst, (X86setcc OpNode, EFLAGS))]>, TB;
+    def m    : I<opc, MRM0m,  (outs), (ins i8mem:$dst),
+                     !strconcat(Mnemonic, "\t$dst"),
+                     [(store (X86setcc OpNode, EFLAGS), addr:$dst)]>, TB;
+  } // Uses = [EFLAGS]
+}
+
+defm SETO  : SETCC<0x90, "seto",  X86_COND_O>;   // is overflow bit set
+defm SETNO : SETCC<0x91, "setno", X86_COND_NO>;  // is overflow bit not set
+defm SETB  : SETCC<0x92, "setb",  X86_COND_B>;   // unsigned less than
+defm SETAE : SETCC<0x93, "setae", X86_COND_AE>;  // unsigned greater or equal
+defm SETE  : SETCC<0x94, "sete",  X86_COND_E>;   // equal to
+defm SETNE : SETCC<0x95, "setne", X86_COND_NE>;  // not equal to
+defm SETBE : SETCC<0x96, "setbe", X86_COND_BE>;  // unsigned less than or equal
+defm SETA  : SETCC<0x97, "seta",  X86_COND_A>;   // unsigned greater than
+defm SETS  : SETCC<0x98, "sets",  X86_COND_S>;   // is signed bit set
+defm SETNS : SETCC<0x99, "setns", X86_COND_NS>;  // is not signed
+defm SETP  : SETCC<0x9A, "setp",  X86_COND_P>;   // is parity bit set
+defm SETNP : SETCC<0x9B, "setnp", X86_COND_NP>;  // is parity bit not set
+defm SETL  : SETCC<0x9C, "setl",  X86_COND_L>;   // signed less than
+defm SETGE : SETCC<0x9D, "setge", X86_COND_GE>;  // signed greater or equal
+defm SETLE : SETCC<0x9E, "setle", X86_COND_LE>;  // signed less than or equal
+defm SETG  : SETCC<0x9F, "setg",  X86_COND_G>;   // signed greater than
+
diff --git a/final/lib/Target/X86/X86InstrCompiler.td b/final/lib/Target/X86/X86InstrCompiler.td
new file mode 100644
index 00000000000..4c915d97b62
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrCompiler.td
@@ -0,0 +1,1626 @@
+//===- X86InstrCompiler.td - Compiler Pseudos and Patterns -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the various pseudo instructions used by the compiler,
+// as well as Pat patterns used during instruction selection.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Pattern Matching Support
+
+def GetLo32XForm : SDNodeXForm<imm, [{
+  // Transformation function: get the low 32 bits.
+  return getI32Imm((unsigned)N->getZExtValue());
+}]>;
+
+def GetLo8XForm : SDNodeXForm<imm, [{
+  // Transformation function: get the low 8 bits.
+  return getI8Imm((uint8_t)N->getZExtValue());
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Random Pseudo Instructions.
+
+// PIC base construction.  This expands to code that looks like this:
+//     call  $next_inst
+//     popl %destreg"
+let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
+  def MOVPC32r : Ii32<0xE8, Pseudo, (outs GR32:$reg), (ins i32imm:$label),
+                      "", []>;
+
+
+// ADJCALLSTACKDOWN/UP implicitly use/def ESP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [ESP, EFLAGS], Uses = [ESP] in {
+def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+                           "#ADJCALLSTACKDOWN",
+                           [(X86callseq_start timm:$amt)]>,
+                          Requires<[In32BitMode]>;
+def ADJCALLSTACKUP32   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+                           "#ADJCALLSTACKUP",
+                           [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+                          Requires<[In32BitMode]>;
+}
+
+// ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
+// a stack adjustment and the codegen must know that they may modify the stack
+// pointer before prolog-epilog rewriting occurs.
+// Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become
+// sub / add which can clobber EFLAGS.
+let Defs = [RSP, EFLAGS], Uses = [RSP] in {
+def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
+                           "#ADJCALLSTACKDOWN",
+                           [(X86callseq_start timm:$amt)]>,
+                          Requires<[In64BitMode]>;
+def ADJCALLSTACKUP64   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
+                           "#ADJCALLSTACKUP",
+                           [(X86callseq_end timm:$amt1, timm:$amt2)]>,
+                          Requires<[In64BitMode]>;
+}
+
+
+
+// x86-64 va_start lowering magic.
+let usesCustomInserter = 1 in {
+def VASTART_SAVE_XMM_REGS : I<0, Pseudo,
+                              (outs),
+                              (ins GR8:$al,
+                                   i64imm:$regsavefi, i64imm:$offset,
+                                   variable_ops),
+                              "#VASTART_SAVE_XMM_REGS $al, $regsavefi, $offset",
+                              [(X86vastart_save_xmm_regs GR8:$al,
+                                                         imm:$regsavefi,
+                                                         imm:$offset)]>;
+
+// The VAARG_64 pseudo-instruction takes the address of the va_list,
+// and places the address of the next argument into a register.
+let Defs = [EFLAGS] in
+def VAARG_64 : I<0, Pseudo,
+                 (outs GR64:$dst),
+                 (ins i8mem:$ap, i32imm:$size, i8imm:$mode, i32imm:$align),
+                 "#VAARG_64 $dst, $ap, $size, $mode, $align",
+                 [(set GR64:$dst,
+                    (X86vaarg64 addr:$ap, imm:$size, imm:$mode, imm:$align)),
+                  (implicit EFLAGS)]>;
+
+// Dynamic stack allocation yields a _chkstk or _alloca call for all Windows
+// targets.  These calls are needed to probe the stack when allocating more than
+// 4k bytes in one go. Touching the stack at 4K increments is necessary to
+// ensure that the guard pages used by the OS virtual memory manager are
+// allocated in correct sequence.
+// The main point of having separate instruction are extra unmodelled effects
+// (compared to ordinary calls) like stack pointer change.
+
+let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
+  def WIN_ALLOCA : I<0, Pseudo, (outs), (ins),
+                     "# dynamic stack allocation",
+                     [(X86WinAlloca)]>;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// EH Pseudo Instructions
+//
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, isCodeGenOnly = 1 in {
+def EH_RETURN   : I<0xC3, RawFrm, (outs), (ins GR32:$addr),
+                    "ret\t#eh_return, addr: $addr",
+                    [(X86ehret GR32:$addr)]>;
+
+}
+
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, isCodeGenOnly = 1 in {
+def EH_RETURN64   : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
+                     "ret\t#eh_return, addr: $addr",
+                     [(X86ehret GR64:$addr)]>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Alias Instructions
+//===----------------------------------------------------------------------===//
+
+// Alias instructions that map movr0 to xor.
+// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
+// FIXME: Set encoding to pseudo.
+let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
+    isCodeGenOnly = 1 in {
+def MOV8r0   : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "",
+                 [(set GR8:$dst, 0)]>;
+
+// We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller
+// encoding and avoids a partial-register update sometimes, but doing so
+// at isel time interferes with rematerialization in the current register
+// allocator. For now, this is rewritten when the instruction is lowered
+// to an MCInst.
+def MOV16r0   : I<0x31, MRMInitReg, (outs GR16:$dst), (ins),
+                 "",
+                 [(set GR16:$dst, 0)]>, OpSize;
+
+// FIXME: Set encoding to pseudo.
+def MOV32r0  : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
+                 [(set GR32:$dst, 0)]>;
+}
+
+// We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a
+// smaller encoding, but doing so at isel time interferes with rematerialization
+// in the current register allocator. For now, this is rewritten when the
+// instruction is lowered to an MCInst.
+// FIXME: AddedComplexity gives this a higher priority than MOV64ri32. Remove
+// when we have a better way to specify isel priority.
+let Defs = [EFLAGS], isCodeGenOnly=1,
+    AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def MOV64r0   : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "",
+                 [(set GR64:$dst, 0)]>;
+
+// Materialize i64 constant where top 32-bits are zero. This could theoretically
+// use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however
+// that would make it more difficult to rematerialize.
+let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1,
+    isCodeGenOnly = 1 in
+def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
+                        "", [(set GR64:$dst, i64immZExt32:$src)]>;
+
+// Use sbb to materialize carry bit.
+let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in {
+// FIXME: These are pseudo ops that should be replaced with Pat<> patterns.
+// However, Pat<> can't replicate the destination reg into the inputs of the
+// result.
+// FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces
+// X86CodeEmitter.
+def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "",
+                 [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "",
+                 [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>,
+                OpSize;
+def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "",
+                 [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "",
+                 [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+} // isCodeGenOnly
+
+
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C32r)>;
+def : Pat<(i64 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C64r)>;
+
+def : Pat<(i16 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C16r)>;
+def : Pat<(i32 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C32r)>;
+def : Pat<(i64 (sext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C64r)>;
+
+// We canonicalize 'setb' to "(and (sbb reg,reg), 1)" on the hope that the and
+// will be eliminated and that the sbb can be extended up to a wider type.  When
+// this happens, it is great.  However, if we are left with an 8-bit sbb and an
+// and, we might as well just match it as a setb.
+def : Pat<(and (i8 (X86setcc_c X86_COND_B, EFLAGS)), 1),
+          (SETBr)>;
+
+//===----------------------------------------------------------------------===//
+// String Pseudo Instructions
+//
+let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in {
+def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}",
+                  [(X86rep_movs i8)]>, REP;
+def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}",
+                  [(X86rep_movs i16)]>, REP, OpSize;
+def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}",
+                  [(X86rep_movs i32)]>, REP;
+}
+
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
+def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
+                   [(X86rep_movs i64)]>, REP;
+
+
+// FIXME: Should use "(X86rep_stos AL)" as the pattern.
+let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}",
+                  [(X86rep_stos i8)]>, REP;
+let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}",
+                  [(X86rep_stos i16)]>, REP, OpSize;
+let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in
+def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
+                  [(X86rep_stos i32)]>, REP;
+
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
+def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
+                   [(X86rep_stos i64)]>, REP;
+
+
+//===----------------------------------------------------------------------===//
+// Thread Local Storage Instructions
+//
+
+// ELF TLS Support
+// All calls clobber the non-callee saved registers. ESP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+    Uses = [ESP] in
+def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+                  "# TLS_addr32",
+                  [(X86tlsaddr tls32addr:$sym)]>,
+                  Requires<[In32BitMode]>;
+
+// All calls clobber the non-callee saved registers. RSP is marked as
+// a use to prevent stack-pointer assignments that appear immediately
+// before calls from potentially appearing dead.
+let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+            FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+            MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+            XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+            XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+    Uses = [RSP] in
+def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+                   "# TLS_addr64",
+                  [(X86tlsaddr tls64addr:$sym)]>,
+                  Requires<[In64BitMode]>;
+
+// Darwin TLS Support
+// For i386, the address of the thunk is passed on the stack, on return the
+// address of the variable is in %eax.  %ecx is trashed during the function
+// call.  All other registers are preserved.
+let Defs = [EAX, ECX, EFLAGS],
+    Uses = [ESP],
+    usesCustomInserter = 1 in
+def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+                "# TLSCall_32",
+                [(X86TLSCall addr:$sym)]>,
+                Requires<[In32BitMode]>;
+
+// For x86_64, the address of the thunk is passed in %rdi, on return
+// the address of the variable is in %rax.  All other registers are preserved.
+let Defs = [RAX, EFLAGS],
+    Uses = [RSP, RDI],
+    usesCustomInserter = 1 in
+def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+                  "# TLSCall_64",
+                  [(X86TLSCall addr:$sym)]>,
+                  Requires<[In64BitMode]>;
+
+
+//===----------------------------------------------------------------------===//
+// Conditional Move Pseudo Instructions
+
+let Constraints = "$src1 = $dst" in {
+
+// Conditional moves
+let Uses = [EFLAGS] in {
+
+// X86 doesn't have 8-bit conditional moves. Use a customInserter to
+// emit control flow. An alternative to this is to mark i8 SELECT as Promote,
+// however that requires promoting the operands, and can induce additional
+// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
+// clobber EFLAGS, because if one of the operands is zero, the expansion
+// could involve an xor.
+let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
+def CMOV_GR8 : I<0, Pseudo,
+                 (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
+                 "#CMOV_GR8 PSEUDO!",
+                 [(set GR8:$dst, (X86cmov GR8:$src1, GR8:$src2,
+                                          imm:$cond, EFLAGS))]>;
+
+let Predicates = [NoCMov] in {
+def CMOV_GR32 : I<0, Pseudo,
+                    (outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cond),
+                    "#CMOV_GR32* PSEUDO!",
+                    [(set GR32:$dst,
+                      (X86cmov GR32:$src1, GR32:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_GR16 : I<0, Pseudo,
+                    (outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cond),
+                    "#CMOV_GR16* PSEUDO!",
+                    [(set GR16:$dst,
+                      (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
+def CMOV_RFP32 : I<0, Pseudo,
+                    (outs RFP32:$dst),
+                    (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
+                    "#CMOV_RFP32 PSEUDO!",
+                    [(set RFP32:$dst,
+                      (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
+                                                  EFLAGS))]>;
+def CMOV_RFP64 : I<0, Pseudo,
+                    (outs RFP64:$dst),
+                    (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
+                    "#CMOV_RFP64 PSEUDO!",
+                    [(set RFP64:$dst,
+                      (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
+                                                  EFLAGS))]>;
+def CMOV_RFP80 : I<0, Pseudo,
+                    (outs RFP80:$dst),
+                    (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
+                    "#CMOV_RFP80 PSEUDO!",
+                    [(set RFP80:$dst,
+                      (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
+                                                  EFLAGS))]>;
+} // Predicates = [NoCMov]
+} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS]
+} // Uses = [EFLAGS]
+
+} // Constraints = "$src1 = $dst" in
+
+
+//===----------------------------------------------------------------------===//
+// Atomic Instruction Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+// Atomic exchange, and, or, xor
+let Constraints = "$val = $dst", Defs = [EFLAGS],
+                  usesCustomInserter = 1 in {
+
+def ATOMAND8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+               "#ATOMAND8 PSEUDO!",
+               [(set GR8:$dst, (atomic_load_and_8 addr:$ptr, GR8:$val))]>;
+def ATOMOR8 : I<0, Pseudo, (outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+               "#ATOMOR8 PSEUDO!",
+               [(set GR8:$dst, (atomic_load_or_8 addr:$ptr, GR8:$val))]>;
+def ATOMXOR8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+               "#ATOMXOR8 PSEUDO!",
+               [(set GR8:$dst, (atomic_load_xor_8 addr:$ptr, GR8:$val))]>;
+def ATOMNAND8 : I<0, Pseudo,(outs GR8:$dst),(ins i8mem:$ptr, GR8:$val),
+               "#ATOMNAND8 PSEUDO!",
+               [(set GR8:$dst, (atomic_load_nand_8 addr:$ptr, GR8:$val))]>;
+
+def ATOMAND16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMAND16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_and_16 addr:$ptr, GR16:$val))]>;
+def ATOMOR16 : I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMOR16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_or_16 addr:$ptr, GR16:$val))]>;
+def ATOMXOR16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMXOR16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_xor_16 addr:$ptr, GR16:$val))]>;
+def ATOMNAND16 : I<0, Pseudo,(outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMNAND16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_nand_16 addr:$ptr, GR16:$val))]>;
+def ATOMMIN16: I<0, Pseudo, (outs GR16:$dst), (ins i16mem:$ptr, GR16:$val),
+               "#ATOMMIN16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_min_16 addr:$ptr, GR16:$val))]>;
+def ATOMMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMMAX16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_max_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMIN16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMUMIN16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_umin_16 addr:$ptr, GR16:$val))]>;
+def ATOMUMAX16: I<0, Pseudo, (outs GR16:$dst),(ins i16mem:$ptr, GR16:$val),
+               "#ATOMUMAX16 PSEUDO!",
+               [(set GR16:$dst, (atomic_load_umax_16 addr:$ptr, GR16:$val))]>;
+
+
+def ATOMAND32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMAND32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_and_32 addr:$ptr, GR32:$val))]>;
+def ATOMOR32 : I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMOR32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_or_32 addr:$ptr, GR32:$val))]>;
+def ATOMXOR32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMXOR32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_xor_32 addr:$ptr, GR32:$val))]>;
+def ATOMNAND32 : I<0, Pseudo,(outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMNAND32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_nand_32 addr:$ptr, GR32:$val))]>;
+def ATOMMIN32: I<0, Pseudo, (outs GR32:$dst), (ins i32mem:$ptr, GR32:$val),
+               "#ATOMMIN32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_min_32 addr:$ptr, GR32:$val))]>;
+def ATOMMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMMAX32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_max_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMIN32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMUMIN32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_umin_32 addr:$ptr, GR32:$val))]>;
+def ATOMUMAX32: I<0, Pseudo, (outs GR32:$dst),(ins i32mem:$ptr, GR32:$val),
+               "#ATOMUMAX32 PSEUDO!",
+               [(set GR32:$dst, (atomic_load_umax_32 addr:$ptr, GR32:$val))]>;
+
+
+
+def ATOMAND64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMAND64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_and_64 addr:$ptr, GR64:$val))]>;
+def ATOMOR64 : I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMOR64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_or_64 addr:$ptr, GR64:$val))]>;
+def ATOMXOR64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMXOR64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_xor_64 addr:$ptr, GR64:$val))]>;
+def ATOMNAND64 : I<0, Pseudo,(outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMNAND64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_nand_64 addr:$ptr, GR64:$val))]>;
+def ATOMMIN64: I<0, Pseudo, (outs GR64:$dst), (ins i64mem:$ptr, GR64:$val),
+               "#ATOMMIN64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_min_64 addr:$ptr, GR64:$val))]>;
+def ATOMMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMMAX64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_max_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMIN64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMUMIN64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_umin_64 addr:$ptr, GR64:$val))]>;
+def ATOMUMAX64: I<0, Pseudo, (outs GR64:$dst),(ins i64mem:$ptr, GR64:$val),
+               "#ATOMUMAX64 PSEUDO!",
+               [(set GR64:$dst, (atomic_load_umax_64 addr:$ptr, GR64:$val))]>;
+}
+
+let Constraints = "$val1 = $dst1, $val2 = $dst2",
+                  Defs = [EFLAGS, EAX, EBX, ECX, EDX],
+                  Uses = [EAX, EBX, ECX, EDX],
+                  mayLoad = 1, mayStore = 1,
+                  usesCustomInserter = 1 in {
+def ATOMAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMAND6432 PSEUDO!", []>;
+def ATOMOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMOR6432 PSEUDO!", []>;
+def ATOMXOR6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMXOR6432 PSEUDO!", []>;
+def ATOMNAND6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMNAND6432 PSEUDO!", []>;
+def ATOMADD6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMADD6432 PSEUDO!", []>;
+def ATOMSUB6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMSUB6432 PSEUDO!", []>;
+def ATOMSWAP6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2),
+                               (ins i64mem:$ptr, GR32:$val1, GR32:$val2),
+               "#ATOMSWAP6432 PSEUDO!", []>;
+}
+
+//===----------------------------------------------------------------------===//
+// Normal-Instructions-With-Lock-Prefix Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+// FIXME: Use normal instructions and add lock prefix dynamically.
+
+// Memory barriers
+
+// TODO: Get this to fold the constant into the instruction.
+let isCodeGenOnly = 1 in
+def OR32mrLocked  : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero),
+                      "lock\n\t"
+                      "or{l}\t{$zero, $dst|$dst, $zero}",
+                      []>, Requires<[In32BitMode]>, LOCK;
+
+let hasSideEffects = 1 in
+def Int_MemBarrier : I<0, Pseudo, (outs), (ins),
+                     "#MEMBARRIER",
+                     [(X86MemBarrier)]>, Requires<[HasSSE2]>;
+
+// TODO: Get this to fold the constant into the instruction.
+let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in
+def Int_MemBarrierNoSSE64  : RI<0x09, MRM1r, (outs), (ins GR64:$zero),
+                           "lock\n\t"
+                           "or{q}\t{$zero, (%rsp)|(%rsp), $zero}",
+                           [(X86MemBarrierNoSSE GR64:$zero)]>,
+                           Requires<[In64BitMode]>, LOCK;
+
+
+// Optimized codegen when the non-memory output is not used.
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in {
+def LOCK_ADD8mr  : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
+                    "lock\n\t"
+                    "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mr  : I<0x01, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+                    "lock\n\t"
+                    "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_ADD32mr  : I<0x01, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+                    "lock\n\t"
+                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+                      "lock\n\t"
+                      "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_ADD8mi   : Ii8<0x80, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src2),
+                    "lock\n\t"
+                    "add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD16mi  : Ii16<0x81, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src2),
+                    "lock\n\t"
+                     "add{w}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD32mi  : Ii32<0x81, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src2),
+                    "lock\n\t"
+                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mi32 : RIi32<0x81, MRM0m, (outs),
+                                        (ins i64mem:$dst, i64i32imm :$src2),
+                      "lock\n\t"
+                      "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_ADD16mi8 : Ii8<0x83, MRM0m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+                    "lock\n\t"
+                    "add{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_ADD32mi8 : Ii8<0x83, MRM0m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+                    "lock\n\t"
+                    "add{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_ADD64mi8 : RIi8<0x83, MRM0m, (outs),
+                                      (ins i64mem:$dst, i64i8imm :$src2),
+                    "lock\n\t"
+                    "add{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_SUB8mr   : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
+                    "lock\n\t"
+                    "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mr  : I<0x29, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+                    "lock\n\t"
+                    "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mr  : I<0x29, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+                    "lock\n\t"
+                    "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mr : RI<0x29, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+                      "lock\n\t"
+                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+
+def LOCK_SUB8mi   : Ii8<0x80, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src2),
+                    "lock\n\t"
+                    "sub{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB16mi  : Ii16<0x81, MRM5m, (outs), (ins i16mem:$dst, i16imm:$src2),
+                    "lock\n\t"
+                    "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mi  : Ii32<0x81, MRM5m, (outs), (ins i32mem:$dst, i32imm:$src2),
+                    "lock\n\t"
+                     "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mi32 : RIi32<0x81, MRM5m, (outs),
+                                        (ins i64mem:$dst, i64i32imm:$src2),
+                      "lock\n\t"
+                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+
+def LOCK_SUB16mi8 : Ii8<0x83, MRM5m, (outs), (ins i16mem:$dst, i16i8imm :$src2),
+                    "lock\n\t"
+                     "sub{w}\t{$src2, $dst|$dst, $src2}", []>, OpSize, LOCK;
+def LOCK_SUB32mi8 : Ii8<0x83, MRM5m, (outs), (ins i32mem:$dst, i32i8imm :$src2),
+                    "lock\n\t"
+                     "sub{l}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+def LOCK_SUB64mi8 : RIi8<0x83, MRM5m, (outs),
+                                      (ins i64mem:$dst, i64i8imm :$src2),
+                      "lock\n\t"
+                      "sub{q}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
+
+def LOCK_INC8m  : I<0xFE, MRM0m, (outs), (ins i8mem :$dst),
+                    "lock\n\t"
+                    "inc{b}\t$dst", []>, LOCK;
+def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst),
+                    "lock\n\t"
+                    "inc{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst),
+                    "lock\n\t"
+                    "inc{l}\t$dst", []>, LOCK;
+def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst),
+                     "lock\n\t"
+                     "inc{q}\t$dst", []>, LOCK;
+
+def LOCK_DEC8m  : I<0xFE, MRM1m, (outs), (ins i8mem :$dst),
+                    "lock\n\t"
+                    "dec{b}\t$dst", []>, LOCK;
+def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst),
+                    "lock\n\t"
+                    "dec{w}\t$dst", []>, OpSize, LOCK;
+def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst),
+                    "lock\n\t"
+                    "dec{l}\t$dst", []>, LOCK;
+def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst),
+                      "lock\n\t"
+                      "dec{q}\t$dst", []>, LOCK;
+}
+
+// Atomic compare and swap.
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX],
+    isCodeGenOnly = 1 in {
+def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr),
+               "lock\n\t"
+               "cmpxchg8b\t$ptr",
+               [(X86cas8 addr:$ptr)]>, TB, LOCK;
+}
+let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in {
+def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap),
+               "lock\n\t"
+               "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}",
+               [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK;
+}
+
+let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in {
+def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap),
+               "lock\n\t"
+               "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}",
+               [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK;
+}
+
+let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in {
+def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap),
+               "lock\n\t"
+               "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}",
+               [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK;
+}
+
+let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in {
+def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap),
+               "lock\n\t"
+               "cmpxchgq\t$swap,$ptr",
+               [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK;
+}
+
+// Atomic exchange and add
+let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in {
+def LXADD8  : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
+               "lock\n\t"
+               "xadd{b}\t{$val, $ptr|$ptr, $val}",
+               [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>,
+                TB, LOCK;
+def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr),
+               "lock\n\t"
+               "xadd{w}\t{$val, $ptr|$ptr, $val}",
+               [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>,
+                TB, OpSize, LOCK;
+def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr),
+               "lock\n\t"
+               "xadd{l}\t{$val, $ptr|$ptr, $val}",
+               [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>,
+                TB, LOCK;
+def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr),
+               "lock\n\t"
+               "xadd\t$val, $ptr",
+               [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>,
+                TB, LOCK;
+}
+
+//===----------------------------------------------------------------------===//
+// Conditional Move Pseudo Instructions.
+//===----------------------------------------------------------------------===//
+
+
+// CMOV* - Used to implement the SSE SELECT DAG operation.  Expanded after
+// instruction selection into a branch sequence.
+let Uses = [EFLAGS], usesCustomInserter = 1 in {
+  def CMOV_FR32 : I<0, Pseudo,
+                    (outs FR32:$dst), (ins FR32:$t, FR32:$f, i8imm:$cond),
+                    "#CMOV_FR32 PSEUDO!",
+                    [(set FR32:$dst, (X86cmov FR32:$t, FR32:$f, imm:$cond,
+                                                  EFLAGS))]>;
+  def CMOV_FR64 : I<0, Pseudo,
+                    (outs FR64:$dst), (ins FR64:$t, FR64:$f, i8imm:$cond),
+                    "#CMOV_FR64 PSEUDO!",
+                    [(set FR64:$dst, (X86cmov FR64:$t, FR64:$f, imm:$cond,
+                                                  EFLAGS))]>;
+  def CMOV_V4F32 : I<0, Pseudo,
+                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+                    "#CMOV_V4F32 PSEUDO!",
+                    [(set VR128:$dst,
+                      (v4f32 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+                                          EFLAGS)))]>;
+  def CMOV_V2F64 : I<0, Pseudo,
+                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+                    "#CMOV_V2F64 PSEUDO!",
+                    [(set VR128:$dst,
+                      (v2f64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+                                          EFLAGS)))]>;
+  def CMOV_V2I64 : I<0, Pseudo,
+                    (outs VR128:$dst), (ins VR128:$t, VR128:$f, i8imm:$cond),
+                    "#CMOV_V2I64 PSEUDO!",
+                    [(set VR128:$dst,
+                      (v2i64 (X86cmov VR128:$t, VR128:$f, imm:$cond,
+                                          EFLAGS)))]>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DAG Pattern Matching Rules
+//===----------------------------------------------------------------------===//
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
+def : Pat<(i32 (X86Wrapper tconstpool  :$dst)), (MOV32ri tconstpool  :$dst)>;
+def : Pat<(i32 (X86Wrapper tjumptable  :$dst)), (MOV32ri tjumptable  :$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
+def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
+def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
+def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
+
+def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
+          (ADD32ri GR32:$src1, tconstpool:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
+          (ADD32ri GR32:$src1, tjumptable:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
+          (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
+          (ADD32ri GR32:$src1, texternalsym:$src2)>;
+def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
+          (ADD32ri GR32:$src1, tblockaddress:$src2)>;
+
+def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+          (MOV32mi addr:$dst, tglobaladdr:$src)>;
+def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
+          (MOV32mi addr:$dst, texternalsym:$src)>;
+def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
+          (MOV32mi addr:$dst, tblockaddress:$src)>;
+
+
+
+// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
+// code model mode, should use 'movabs'.  FIXME: This is really a hack, the
+//  'movabs' predicate should handle this sort of thing.
+def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
+          (MOV64ri tconstpool  :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
+          (MOV64ri tjumptable  :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+          (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+          (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+          (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
+
+// In static codegen with small code model, we can get the address of a label
+// into a register with 'movl'.  FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri64i32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
+          (MOV64ri64i32 tconstpool  :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
+          (MOV64ri64i32 tjumptable  :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+          (MOV64ri64i32 tglobaladdr :$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+          (MOV64ri64i32 texternalsym:$dst)>, Requires<[SmallCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+          (MOV64ri64i32 tblockaddress:$dst)>, Requires<[SmallCode]>;
+
+// In kernel code model, we can get the address of a label
+// into a register with 'movq'.  FIXME: This is a hack, the 'imm' predicate of
+// the MOV64ri32 should accept these.
+def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
+          (MOV64ri32 tconstpool  :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
+          (MOV64ri32 tjumptable  :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
+          (MOV64ri32 tglobaladdr :$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
+          (MOV64ri32 texternalsym:$dst)>, Requires<[KernelCode]>;
+def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
+          (MOV64ri32 tblockaddress:$dst)>, Requires<[KernelCode]>;
+
+// If we have small model and -static mode, it is safe to store global addresses
+// directly as immediates.  FIXME: This is really a hack, the 'imm' predicate
+// for MOV64mi32 should handle this sort of thing.
+def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tconstpool:$src)>,
+          Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tjumptable:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tjumptable:$src)>,
+          Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tglobaladdr:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
+          Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, texternalsym:$src)>,
+          Requires<[NearData, IsStatic]>;
+def : Pat<(store (i64 (X86Wrapper tblockaddress:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tblockaddress:$src)>,
+          Requires<[NearData, IsStatic]>;
+
+
+
+// Calls
+
+// tls has some funny stuff here...
+// This corresponds to movabs $foo@tpoff, %rax
+def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
+          (MOV64ri tglobaltlsaddr :$dst)>;
+// This corresponds to add $foo@tpoff, %rax
+def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
+          (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
+// This corresponds to mov foo@tpoff(%rbx), %eax
+def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
+          (MOV64rm tglobaltlsaddr :$dst)>;
+
+
+// Direct PC relative function call for small code model. 32-bit displacement
+// sign extended to 64-bit.
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+          (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+          (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
+
+def : Pat<(X86call (i64 tglobaladdr:$dst)),
+          (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
+def : Pat<(X86call (i64 texternalsym:$dst)),
+          (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
+
+// tailcall stuff
+def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
+          (TCRETURNri GR32_TC:$dst, imm:$off)>,
+          Requires<[In32BitMode]>;
+
+// FIXME: This is disabled for 32-bit PIC mode because the global base
+// register which is part of the address mode may be assigned a
+// callee-saved register.
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+          (TCRETURNmi addr:$dst, imm:$off)>,
+          Requires<[In32BitMode, IsNotPIC]>;
+
+def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
+          (TCRETURNdi texternalsym:$dst, imm:$off)>,
+          Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
+          (TCRETURNdi texternalsym:$dst, imm:$off)>,
+          Requires<[In32BitMode]>;
+
+def : Pat<(X86tcret ptr_rc_tailcall:$dst, imm:$off),
+          (TCRETURNri64 ptr_rc_tailcall:$dst, imm:$off)>,
+          Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (load addr:$dst), imm:$off),
+          (TCRETURNmi64 addr:$dst, imm:$off)>,
+          Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
+          (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
+          Requires<[In64BitMode]>;
+
+def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
+          (TCRETURNdi64 texternalsym:$dst, imm:$off)>,
+          Requires<[In64BitMode]>;
+
+// Normal calls, with various flavors of addresses.
+def : Pat<(X86call (i32 tglobaladdr:$dst)),
+          (CALLpcrel32 tglobaladdr:$dst)>;
+def : Pat<(X86call (i32 texternalsym:$dst)),
+          (CALLpcrel32 texternalsym:$dst)>;
+def : Pat<(X86call (i32 imm:$dst)),
+          (CALLpcrel32 imm:$dst)>, Requires<[CallImmAddr]>;
+
+// Comparisons.
+
+// TEST R,R is smaller than CMP R,0
+def : Pat<(X86cmp GR8:$src1, 0),
+          (TEST8rr GR8:$src1, GR8:$src1)>;
+def : Pat<(X86cmp GR16:$src1, 0),
+          (TEST16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(X86cmp GR32:$src1, 0),
+          (TEST32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(X86cmp GR64:$src1, 0),
+          (TEST64rr GR64:$src1, GR64:$src1)>;
+
+// Conditional moves with folded loads with operands swapped and conditions
+// inverted.
+multiclass CMOVmr<PatLeaf InvertedCond, Instruction Inst16, Instruction Inst32,
+                  Instruction Inst64> {
+  def : Pat<(X86cmov (loadi16 addr:$src1), GR16:$src2, InvertedCond, EFLAGS),
+            (Inst16 GR16:$src2, addr:$src1)>;
+  def : Pat<(X86cmov (loadi32 addr:$src1), GR32:$src2, InvertedCond, EFLAGS),
+            (Inst32 GR32:$src2, addr:$src1)>;
+  def : Pat<(X86cmov (loadi64 addr:$src1), GR64:$src2, InvertedCond, EFLAGS),
+            (Inst64 GR64:$src2, addr:$src1)>;
+}
+
+defm : CMOVmr<X86_COND_B , CMOVAE16rm, CMOVAE32rm, CMOVAE64rm>;
+defm : CMOVmr<X86_COND_AE, CMOVB16rm , CMOVB32rm , CMOVB64rm>;
+defm : CMOVmr<X86_COND_E , CMOVNE16rm, CMOVNE32rm, CMOVNE64rm>;
+defm : CMOVmr<X86_COND_NE, CMOVE16rm , CMOVE32rm , CMOVE64rm>;
+defm : CMOVmr<X86_COND_BE, CMOVA16rm , CMOVA32rm , CMOVA64rm>;
+defm : CMOVmr<X86_COND_A , CMOVBE16rm, CMOVBE32rm, CMOVBE64rm>;
+defm : CMOVmr<X86_COND_L , CMOVGE16rm, CMOVGE32rm, CMOVGE64rm>;
+defm : CMOVmr<X86_COND_GE, CMOVL16rm , CMOVL32rm , CMOVL64rm>;
+defm : CMOVmr<X86_COND_LE, CMOVG16rm , CMOVG32rm , CMOVG64rm>;
+defm : CMOVmr<X86_COND_G , CMOVLE16rm, CMOVLE32rm, CMOVLE64rm>;
+defm : CMOVmr<X86_COND_P , CMOVNP16rm, CMOVNP32rm, CMOVNP64rm>;
+defm : CMOVmr<X86_COND_NP, CMOVP16rm , CMOVP32rm , CMOVP64rm>;
+defm : CMOVmr<X86_COND_S , CMOVNS16rm, CMOVNS32rm, CMOVNS64rm>;
+defm : CMOVmr<X86_COND_NS, CMOVS16rm , CMOVS32rm , CMOVS64rm>;
+defm : CMOVmr<X86_COND_O , CMOVNO16rm, CMOVNO32rm, CMOVNO64rm>;
+defm : CMOVmr<X86_COND_NO, CMOVO16rm , CMOVO32rm , CMOVO64rm>;
+
+// zextload bool -> zextload byte
+def : Pat<(zextloadi8i1  addr:$src), (MOV8rm     addr:$src)>;
+def : Pat<(zextloadi16i1 addr:$src), (MOVZX16rm8 addr:$src)>;
+def : Pat<(zextloadi32i1 addr:$src), (MOVZX32rm8 addr:$src)>;
+def : Pat<(zextloadi64i1 addr:$src), (MOVZX64rm8 addr:$src)>;
+
+// extload bool -> extload byte
+// When extloading from 16-bit and smaller memory locations into 64-bit
+// registers, use zero-extending loads so that the entire 64-bit register is
+// defined, avoiding partial-register updates.
+
+def : Pat<(extloadi8i1 addr:$src),   (MOV8rm      addr:$src)>;
+def : Pat<(extloadi16i1 addr:$src),  (MOVZX16rm8  addr:$src)>;
+def : Pat<(extloadi32i1 addr:$src),  (MOVZX32rm8  addr:$src)>;
+def : Pat<(extloadi16i8 addr:$src),  (MOVZX16rm8  addr:$src)>;
+def : Pat<(extloadi32i8 addr:$src),  (MOVZX32rm8  addr:$src)>;
+def : Pat<(extloadi32i16 addr:$src), (MOVZX32rm16 addr:$src)>;
+
+def : Pat<(extloadi64i1 addr:$src),  (MOVZX64rm8  addr:$src)>;
+def : Pat<(extloadi64i8 addr:$src),  (MOVZX64rm8  addr:$src)>;
+def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
+// For other extloads, use subregs, since the high contents of the register are
+// defined after an extload.
+def : Pat<(extloadi64i32 addr:$src),
+          (SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
+                         sub_32bit)>;
+
+// anyext. Define these to do an explicit zero-extend to
+// avoid partial-register updates.
+def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8  GR8 :$src)>;
+def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8  GR8 :$src)>;
+
+// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
+def : Pat<(i32 (anyext GR16:$src)),
+          (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
+
+def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8  GR8  :$src)>;
+def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
+def : Pat<(i64 (anyext GR32:$src)),
+          (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+
+
+// Any instruction that defines a 32-bit result leaves the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
+// be copying from a truncate. And x86's cmov doesn't do anything if the
+// condition is false. But any other 32-bit operation will zero-extend
+// up to 64 bits.
+def def32 : PatLeaf<(i32 GR32:$src), [{
+  return N->getOpcode() != ISD::TRUNCATE &&
+         N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+         N->getOpcode() != ISD::CopyFromReg &&
+         N->getOpcode() != X86ISD::CMOV;
+}]>;
+
+// In the case of a 32-bit def that is known to implicitly zero-extend,
+// we can use a SUBREG_TO_REG.
+def : Pat<(i64 (zext def32:$src)),
+          (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
+
+//===----------------------------------------------------------------------===//
+// Pattern match OR as ADD
+//===----------------------------------------------------------------------===//
+
+// If safe, we prefer to pattern match OR as ADD at isel time. ADD can be
+// 3-addressified into an LEA instruction to avoid copies.  However, we also
+// want to finally emit these instructions as an or at the end of the code
+// generator to make the generated code easier to read.  To do this, we select
+// into "disjoint bits" pseudo ops.
+
+// Treat an 'or' node is as an 'add' if the or'ed bits are known to be zero.
+def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+    return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
+
+  unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+  APInt Mask = APInt::getAllOnesValue(BitWidth);
+  APInt KnownZero0, KnownOne0;
+  CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+  APInt KnownZero1, KnownOne1;
+  CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+  return (~KnownZero0 & ~KnownZero1) == 0;
+}]>;
+
+
+// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
+let AddedComplexity = 5 in { // Try this before the selecting to OR
+
+let isConvertibleToThreeAddress = 1,
+    Constraints = "$src1 = $dst", Defs = [EFLAGS] in {
+let isCommutable = 1 in {
+def ADD16rr_DB  : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+                    "", // orw/addw REG, REG
+                    [(set GR16:$dst, (or_is_add GR16:$src1, GR16:$src2))]>;
+def ADD32rr_DB  : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+                    "", // orl/addl REG, REG
+                    [(set GR32:$dst, (or_is_add GR32:$src1, GR32:$src2))]>;
+def ADD64rr_DB  : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+                    "", // orq/addq REG, REG
+                    [(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
+} // isCommutable
+
+// NOTE: These are order specific, we want the ri8 forms to be listed
+// first so that they are slightly preferred to the ri forms.
+
+def ADD16ri8_DB : I<0, Pseudo,
+                    (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
+                    "", // orw/addw REG, imm8
+                    [(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;
+def ADD16ri_DB  : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+                    "", // orw/addw REG, imm
+                    [(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;
+
+def ADD32ri8_DB : I<0, Pseudo,
+                    (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
+                    "", // orl/addl REG, imm8
+                    [(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;
+def ADD32ri_DB  : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
+                    "", // orl/addl REG, imm
+                    [(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;
+
+
+def ADD64ri8_DB : I<0, Pseudo,
+                    (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
+                    "", // orq/addq REG, imm8
+                    [(set GR64:$dst, (or_is_add GR64:$src1,
+                                                i64immSExt8:$src2))]>;
+def ADD64ri32_DB : I<0, Pseudo,
+                     (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
+                      "", // orq/addq REG, imm
+                      [(set GR64:$dst, (or_is_add GR64:$src1,
+                                                  i64immSExt32:$src2))]>;
+}
+} // AddedComplexity
+
+
+//===----------------------------------------------------------------------===//
+// Some peepholes
+//===----------------------------------------------------------------------===//
+
+// Odd encoding trick: -128 fits into an 8-bit immediate field while
+// +128 doesn't, so in this special case use a sub instead of an add.
+def : Pat<(add GR16:$src1, 128),
+          (SUB16ri8 GR16:$src1, -128)>;
+def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
+          (SUB16mi8 addr:$dst, -128)>;
+
+def : Pat<(add GR32:$src1, 128),
+          (SUB32ri8 GR32:$src1, -128)>;
+def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
+          (SUB32mi8 addr:$dst, -128)>;
+
+def : Pat<(add GR64:$src1, 128),
+          (SUB64ri8 GR64:$src1, -128)>;
+def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
+          (SUB64mi8 addr:$dst, -128)>;
+
+// The same trick applies for 32-bit immediate fields in 64-bit
+// instructions.
+def : Pat<(add GR64:$src1, 0x0000000080000000),
+          (SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
+def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst),
+          (SUB64mi32 addr:$dst, 0xffffffff80000000)>;
+
+// To avoid needing to materialize an immediate in a register, use a 32-bit and
+// with implicit zero-extension instead of a 64-bit and if the immediate has at
+// least 32 bits of leading zeros. If in addition the last 32 bits can be
+// represented with a sign extension of a 8 bit constant, use that.
+
+def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
+          (SUBREG_TO_REG
+            (i64 0),
+            (AND32ri8
+              (EXTRACT_SUBREG GR64:$src, sub_32bit),
+              (i32 (GetLo8XForm imm:$imm))),
+            sub_32bit)>;
+
+def : Pat<(and GR64:$src, i64immZExt32:$imm),
+          (SUBREG_TO_REG
+            (i64 0),
+            (AND32ri
+              (EXTRACT_SUBREG GR64:$src, sub_32bit),
+              (i32 (GetLo32XForm imm:$imm))),
+            sub_32bit)>;
+
+
+// r & (2^16-1) ==> movz
+def : Pat<(and GR32:$src1, 0xffff),
+          (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
+                                                             GR32_ABCD)),
+                                      sub_8bit))>,
+      Requires<[In32BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+          (MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1,
+                                                             GR16_ABCD)),
+                                      sub_8bit))>,
+      Requires<[In32BitMode]>;
+
+// r & (2^32-1) ==> movz
+def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
+          (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+// r & (2^16-1) ==> movz
+def : Pat<(and GR64:$src, 0xffff),
+          (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR64:$src, 0xff),
+          (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR32:$src1, 0xff),
+           (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
+      Requires<[In64BitMode]>;
+// r & (2^8-1) ==> movz
+def : Pat<(and GR16:$src1, 0xff),
+           (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
+      Requires<[In64BitMode]>;
+
+
+// sext_inreg patterns
+def : Pat<(sext_inreg GR32:$src, i16),
+          (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+          (MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+                                                             GR32_ABCD)),
+                                      sub_8bit))>,
+      Requires<[In32BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+          (MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+                                                             GR16_ABCD)),
+                                      sub_8bit))>,
+      Requires<[In32BitMode]>;
+
+def : Pat<(sext_inreg GR64:$src, i32),
+          (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
+def : Pat<(sext_inreg GR64:$src, i16),
+          (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
+def : Pat<(sext_inreg GR64:$src, i8),
+          (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
+def : Pat<(sext_inreg GR32:$src, i8),
+          (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
+      Requires<[In64BitMode]>;
+def : Pat<(sext_inreg GR16:$src, i8),
+          (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
+      Requires<[In64BitMode]>;
+
+
+// trunc patterns
+def : Pat<(i16 (trunc GR32:$src)),
+          (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+                          sub_8bit)>,
+      Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                          sub_8bit)>,
+      Requires<[In32BitMode]>;
+def : Pat<(i32 (trunc GR64:$src)),
+          (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
+def : Pat<(i16 (trunc GR64:$src)),
+          (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
+def : Pat<(i8 (trunc GR64:$src)),
+          (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
+def : Pat<(i8 (trunc GR32:$src)),
+          (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
+      Requires<[In64BitMode]>;
+def : Pat<(i8 (trunc GR16:$src)),
+          (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
+      Requires<[In64BitMode]>;
+
+// h-register tricks
+def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
+          (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                          sub_8bit_hi)>,
+      Requires<[In32BitMode]>;
+def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
+          (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+                          sub_8bit_hi)>,
+      Requires<[In32BitMode]>;
+def : Pat<(srl GR16:$src, (i8 8)),
+          (EXTRACT_SUBREG
+            (MOVZX32rr8
+              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                              sub_8bit_hi)),
+            sub_16bit)>,
+      Requires<[In32BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+                                                             GR16_ABCD)),
+                                      sub_8bit_hi))>,
+      Requires<[In32BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
+                                                             GR16_ABCD)),
+                                      sub_8bit_hi))>,
+      Requires<[In32BitMode]>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+                                                             GR32_ABCD)),
+                                      sub_8bit_hi))>,
+      Requires<[In32BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+          (MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+                                                             GR32_ABCD)),
+                                      sub_8bit_hi))>,
+      Requires<[In32BitMode]>;
+
+// h-register tricks.
+// For now, be conservative on x86-64 and use an h-register extract only if the
+// value is immediately zero-extended or stored, which are somewhat common
+// cases. This uses a bunch of code to prevent a register requiring a REX prefix
+// from being allocated in the same instruction as the h register, as there's
+// currently no way to describe this requirement to the register allocator.
+
+// h-register extract and zero-extend.
+def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
+          (SUBREG_TO_REG
+            (i64 0),
+            (MOVZX32_NOREXrr8
+              (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
+                              sub_8bit_hi)),
+            sub_32bit)>;
+def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
+          (MOVZX32_NOREXrr8
+            (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+                            sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
+          (MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
+                                                                   GR32_ABCD)),
+                                             sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(srl GR16:$src, (i8 8)),
+          (EXTRACT_SUBREG
+            (MOVZX32_NOREXrr8
+              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                              sub_8bit_hi)),
+            sub_16bit)>,
+      Requires<[In64BitMode]>;
+def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
+          (MOVZX32_NOREXrr8
+            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                            sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
+          (MOVZX32_NOREXrr8
+            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                            sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
+          (SUBREG_TO_REG
+            (i64 0),
+            (MOVZX32_NOREXrr8
+              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                              sub_8bit_hi)),
+            sub_32bit)>;
+def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
+          (SUBREG_TO_REG
+            (i64 0),
+            (MOVZX32_NOREXrr8
+              (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                              sub_8bit_hi)),
+            sub_32bit)>;
+
+// h-register extract and store.
+def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
+          (MOV8mr_NOREX
+            addr:$dst,
+            (EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
+                            sub_8bit_hi))>;
+def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
+          (MOV8mr_NOREX
+            addr:$dst,
+            (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
+                            sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
+          (MOV8mr_NOREX
+            addr:$dst,
+            (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
+                            sub_8bit_hi))>,
+      Requires<[In64BitMode]>;
+
+
+// (shl x, 1) ==> (add x, x)
+def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr  GR8 :$src1, GR8 :$src1)>;
+def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+
+// (shl x (and y, 31)) ==> (shl x, y)
+def : Pat<(shl GR8:$src1, (and CL, 31)),
+          (SHL8rCL GR8:$src1)>;
+def : Pat<(shl GR16:$src1, (and CL, 31)),
+          (SHL16rCL GR16:$src1)>;
+def : Pat<(shl GR32:$src1, (and CL, 31)),
+          (SHL32rCL GR32:$src1)>;
+def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHL8mCL addr:$dst)>;
+def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHL16mCL addr:$dst)>;
+def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHL32mCL addr:$dst)>;
+
+def : Pat<(srl GR8:$src1, (and CL, 31)),
+          (SHR8rCL GR8:$src1)>;
+def : Pat<(srl GR16:$src1, (and CL, 31)),
+          (SHR16rCL GR16:$src1)>;
+def : Pat<(srl GR32:$src1, (and CL, 31)),
+          (SHR32rCL GR32:$src1)>;
+def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHR8mCL addr:$dst)>;
+def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHR16mCL addr:$dst)>;
+def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+          (SHR32mCL addr:$dst)>;
+
+def : Pat<(sra GR8:$src1, (and CL, 31)),
+          (SAR8rCL GR8:$src1)>;
+def : Pat<(sra GR16:$src1, (and CL, 31)),
+          (SAR16rCL GR16:$src1)>;
+def : Pat<(sra GR32:$src1, (and CL, 31)),
+          (SAR32rCL GR32:$src1)>;
+def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
+          (SAR8mCL addr:$dst)>;
+def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
+          (SAR16mCL addr:$dst)>;
+def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
+          (SAR32mCL addr:$dst)>;
+
+// (shl x (and y, 63)) ==> (shl x, y)
+def : Pat<(shl GR64:$src1, (and CL, 63)),
+          (SHL64rCL GR64:$src1)>;
+def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+          (SHL64mCL addr:$dst)>;
+
+def : Pat<(srl GR64:$src1, (and CL, 63)),
+          (SHR64rCL GR64:$src1)>;
+def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+          (SHR64mCL addr:$dst)>;
+
+def : Pat<(sra GR64:$src1, (and CL, 63)),
+          (SAR64rCL GR64:$src1)>;
+def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
+          (SAR64mCL addr:$dst)>;
+
+
+// (anyext (setcc_carry)) -> (setcc_carry)
+def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C16r)>;
+def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C32r)>;
+def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
+          (SETB_C32r)>;
+
+
+
+
+//===----------------------------------------------------------------------===//
+// EFLAGS-defining Patterns
+//===----------------------------------------------------------------------===//
+
+// add reg, reg
+def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr  GR8 :$src1, GR8 :$src2)>;
+def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
+
+// add reg, mem
+def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
+          (ADD8rm GR8:$src1, addr:$src2)>;
+def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
+          (ADD16rm GR16:$src1, addr:$src2)>;
+def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
+          (ADD32rm GR32:$src1, addr:$src2)>;
+
+// add reg, imm
+def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri  GR8:$src1 , imm:$src2)>;
+def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(add GR16:$src1, i16immSExt8:$src2),
+          (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(add GR32:$src1, i32immSExt8:$src2),
+          (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// sub reg, reg
+def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr  GR8 :$src1, GR8 :$src2)>;
+def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
+
+// sub reg, mem
+def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
+          (SUB8rm GR8:$src1, addr:$src2)>;
+def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
+          (SUB16rm GR16:$src1, addr:$src2)>;
+def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
+          (SUB32rm GR32:$src1, addr:$src2)>;
+
+// sub reg, imm
+def : Pat<(sub GR8:$src1, imm:$src2),
+          (SUB8ri GR8:$src1, imm:$src2)>;
+def : Pat<(sub GR16:$src1, imm:$src2),
+          (SUB16ri GR16:$src1, imm:$src2)>;
+def : Pat<(sub GR32:$src1, imm:$src2),
+          (SUB32ri GR32:$src1, imm:$src2)>;
+def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
+          (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
+          (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// mul reg, reg
+def : Pat<(mul GR16:$src1, GR16:$src2),
+          (IMUL16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(mul GR32:$src1, GR32:$src2),
+          (IMUL32rr GR32:$src1, GR32:$src2)>;
+
+// mul reg, mem
+def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
+          (IMUL16rm GR16:$src1, addr:$src2)>;
+def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
+          (IMUL32rm GR32:$src1, addr:$src2)>;
+
+// mul reg, imm
+def : Pat<(mul GR16:$src1, imm:$src2),
+          (IMUL16rri GR16:$src1, imm:$src2)>;
+def : Pat<(mul GR32:$src1, imm:$src2),
+          (IMUL32rri GR32:$src1, imm:$src2)>;
+def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
+          (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
+          (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
+
+// reg = mul mem, imm
+def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
+          (IMUL16rmi addr:$src1, imm:$src2)>;
+def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
+          (IMUL32rmi addr:$src1, imm:$src2)>;
+def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
+          (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
+def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
+          (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
+
+// Optimize multiply by 2 with EFLAGS result.
+let AddedComplexity = 2 in {
+def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
+}
+
+// Patterns for nodes that do not produce flags, for instructions that do.
+
+// addition
+def : Pat<(add GR64:$src1, GR64:$src2),
+          (ADD64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt8:$src2),
+          (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt32:$src2),
+          (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
+          (ADD64rm GR64:$src1, addr:$src2)>;
+
+// subtraction
+def : Pat<(sub GR64:$src1, GR64:$src2),
+          (SUB64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
+          (SUB64rm GR64:$src1, addr:$src2)>;
+def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
+          (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
+          (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// Multiply
+def : Pat<(mul GR64:$src1, GR64:$src2),
+          (IMUL64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
+          (IMUL64rm GR64:$src1, addr:$src2)>;
+def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
+          (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
+          (IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
+          (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
+          (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
+
+// Increment reg.
+def : Pat<(add GR8 :$src, 1), (INC8r     GR8 :$src)>;
+def : Pat<(add GR16:$src, 1), (INC16r    GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC32r    GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, 1), (INC64r    GR64:$src)>;
+
+// Decrement reg.
+def : Pat<(add GR8 :$src, -1), (DEC8r     GR8 :$src)>;
+def : Pat<(add GR16:$src, -1), (DEC16r    GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC32r    GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, -1), (DEC64r    GR64:$src)>;
+
+// or reg/reg.
+def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr  GR8 :$src1, GR8 :$src2)>;
+def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
+
+// or reg/mem
+def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
+          (OR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
+          (OR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
+          (OR32rm GR32:$src1, addr:$src2)>;
+def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
+          (OR64rm GR64:$src1, addr:$src2)>;
+
+// or reg/imm
+def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri  GR8 :$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, i16immSExt8:$src2),
+          (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(or GR32:$src1, i32immSExt8:$src2),
+          (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt8:$src2),
+          (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt32:$src2),
+          (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// xor reg/reg
+def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr  GR8 :$src1, GR8 :$src2)>;
+def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
+
+// xor reg/mem
+def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
+          (XOR8rm GR8:$src1, addr:$src2)>;
+def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
+          (XOR16rm GR16:$src1, addr:$src2)>;
+def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
+          (XOR32rm GR32:$src1, addr:$src2)>;
+def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
+          (XOR64rm GR64:$src1, addr:$src2)>;
+
+// xor reg/imm
+def : Pat<(xor GR8:$src1, imm:$src2),
+          (XOR8ri GR8:$src1, imm:$src2)>;
+def : Pat<(xor GR16:$src1, imm:$src2),
+          (XOR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(xor GR32:$src1, imm:$src2),
+          (XOR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
+          (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
+          (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
+          (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
+          (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
+
+// and reg/reg
+def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr  GR8 :$src1, GR8 :$src2)>;
+def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
+
+// and reg/mem
+def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
+          (AND8rm GR8:$src1, addr:$src2)>;
+def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
+          (AND16rm GR16:$src1, addr:$src2)>;
+def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
+          (AND32rm GR32:$src1, addr:$src2)>;
+def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
+          (AND64rm GR64:$src1, addr:$src2)>;
+
+// and reg/imm
+def : Pat<(and GR8:$src1, imm:$src2),
+          (AND8ri GR8:$src1, imm:$src2)>;
+def : Pat<(and GR16:$src1, imm:$src2),
+          (AND16ri GR16:$src1, imm:$src2)>;
+def : Pat<(and GR32:$src1, imm:$src2),
+          (AND32ri GR32:$src1, imm:$src2)>;
+def : Pat<(and GR16:$src1, i16immSExt8:$src2),
+          (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
+def : Pat<(and GR32:$src1, i32immSExt8:$src2),
+          (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
+def : Pat<(and GR64:$src1, i64immSExt8:$src2),
+          (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
+def : Pat<(and GR64:$src1, i64immSExt32:$src2),
+          (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
diff --git a/final/lib/Target/X86/X86InstrControl.td b/final/lib/Target/X86/X86InstrControl.td
new file mode 100644
index 00000000000..77f47250e9f
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrControl.td
@@ -0,0 +1,294 @@
+//===- X86InstrControl.td - Control Flow Instructions ------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 jump, return, call, and related instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Instructions.
+//
+
+// Return instructions.
+let isTerminator = 1, isReturn = 1, isBarrier = 1,
+    hasCtrlDep = 1, FPForm = SpecialFP in {
+  def RET    : I   <0xC3, RawFrm, (outs), (ins variable_ops),
+                    "ret",
+                    [(X86retflag 0)]>;
+  def RETI   : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+                    "ret\t$amt",
+                    [(X86retflag timm:$amt)]>;
+  def RETIW  : Ii16<0xC2, RawFrm, (outs), (ins i16imm:$amt, variable_ops),
+                    "retw\t$amt",
+                    []>, OpSize;
+  def LRETL  : I   <0xCB, RawFrm, (outs), (ins),
+                    "lretl", []>;
+  def LRETQ  : RI  <0xCB, RawFrm, (outs), (ins),
+                    "lretq", []>;
+  def LRETI  : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+                    "lret\t$amt", []>;
+  def LRETIW : Ii16<0xCA, RawFrm, (outs), (ins i16imm:$amt),
+                    "lretw\t$amt", []>, OpSize;
+}
+
+// Unconditional branches.
+let isBarrier = 1, isBranch = 1, isTerminator = 1 in {
+  def JMP_4 : Ii32PCRel<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+                        "jmp\t$dst", [(br bb:$dst)]>;
+  def JMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+                       "jmp\t$dst", []>;
+  def JMP64pcrel32 : I<0xE9, RawFrm, (outs), (ins brtarget:$dst),
+                       "jmp{q}\t$dst", []>;
+}
+
+// Conditional Branches.
+let isBranch = 1, isTerminator = 1, Uses = [EFLAGS] in {
+  multiclass ICBr<bits<8> opc1, bits<8> opc4, string asm, PatFrag Cond> {
+    def _1 : Ii8PCRel <opc1, RawFrm, (outs), (ins brtarget8:$dst), asm, []>;
+    def _4 : Ii32PCRel<opc4, RawFrm, (outs), (ins brtarget:$dst), asm,
+                       [(X86brcond bb:$dst, Cond, EFLAGS)]>, TB;
+  }
+}
+
+defm JO  : ICBr<0x70, 0x80, "jo\t$dst" , X86_COND_O>;
+defm JNO : ICBr<0x71, 0x81, "jno\t$dst" , X86_COND_NO>;
+defm JB  : ICBr<0x72, 0x82, "jb\t$dst" , X86_COND_B>;
+defm JAE : ICBr<0x73, 0x83, "jae\t$dst", X86_COND_AE>;
+defm JE  : ICBr<0x74, 0x84, "je\t$dst" , X86_COND_E>;
+defm JNE : ICBr<0x75, 0x85, "jne\t$dst", X86_COND_NE>;
+defm JBE : ICBr<0x76, 0x86, "jbe\t$dst", X86_COND_BE>;
+defm JA  : ICBr<0x77, 0x87, "ja\t$dst" , X86_COND_A>;
+defm JS  : ICBr<0x78, 0x88, "js\t$dst" , X86_COND_S>;
+defm JNS : ICBr<0x79, 0x89, "jns\t$dst", X86_COND_NS>;
+defm JP  : ICBr<0x7A, 0x8A, "jp\t$dst" , X86_COND_P>;
+defm JNP : ICBr<0x7B, 0x8B, "jnp\t$dst", X86_COND_NP>;
+defm JL  : ICBr<0x7C, 0x8C, "jl\t$dst" , X86_COND_L>;
+defm JGE : ICBr<0x7D, 0x8D, "jge\t$dst", X86_COND_GE>;
+defm JLE : ICBr<0x7E, 0x8E, "jle\t$dst", X86_COND_LE>;
+defm JG  : ICBr<0x7F, 0x8F, "jg\t$dst" , X86_COND_G>;
+
+// jcx/jecx/jrcx instructions.
+let isAsmParserOnly = 1, isBranch = 1, isTerminator = 1 in {
+  // These are the 32-bit versions of this instruction for the asmparser.  In
+  // 32-bit mode, the address size prefix is jcxz and the unprefixed version is
+  // jecxz.
+  let Uses = [CX] in
+    def JCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                        "jcxz\t$dst", []>, AdSize, Requires<[In32BitMode]>;
+  let Uses = [ECX] in
+    def JECXZ_32 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                           "jecxz\t$dst", []>, Requires<[In32BitMode]>;
+
+  // J*CXZ instruction: 64-bit versions of this instruction for the asmparser.
+  // In 64-bit mode, the address size prefix is jecxz and the unprefixed version
+  // is jrcxz.
+  let Uses = [ECX] in
+    def JECXZ_64 : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                            "jecxz\t$dst", []>, AdSize, Requires<[In64BitMode]>;
+  let Uses = [RCX] in
+    def JRCXZ : Ii8PCRel<0xE3, RawFrm, (outs), (ins brtarget8:$dst),
+                           "jrcxz\t$dst", []>, Requires<[In64BitMode]>;
+}
+
+// Indirect branches
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  def JMP32r     : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst",
+                     [(brind GR32:$dst)]>, Requires<[In32BitMode]>;
+  def JMP32m     : I<0xFF, MRM4m, (outs), (ins i32mem:$dst), "jmp{l}\t{*}$dst",
+                     [(brind (loadi32 addr:$dst))]>, Requires<[In32BitMode]>;
+
+  def JMP64r     : I<0xFF, MRM4r, (outs), (ins GR64:$dst), "jmp{q}\t{*}$dst",
+                     [(brind GR64:$dst)]>, Requires<[In64BitMode]>;
+  def JMP64m     : I<0xFF, MRM4m, (outs), (ins i64mem:$dst), "jmp{q}\t{*}$dst",
+                     [(brind (loadi64 addr:$dst))]>, Requires<[In64BitMode]>;
+
+  def FARJMP16i  : Iseg16<0xEA, RawFrmImm16, (outs),
+                          (ins i16imm:$off, i16imm:$seg),
+                          "ljmp{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+  def FARJMP32i  : Iseg32<0xEA, RawFrmImm16, (outs),
+                          (ins i32imm:$off, i16imm:$seg),
+                          "ljmp{l}\t{$seg, $off|$off, $seg}", []>;
+  def FARJMP64   : RI<0xFF, MRM5m, (outs), (ins opaque80mem:$dst),
+                      "ljmp{q}\t{*}$dst", []>;
+
+  def FARJMP16m  : I<0xFF, MRM5m, (outs), (ins opaque32mem:$dst),
+                     "ljmp{w}\t{*}$dst", []>, OpSize;
+  def FARJMP32m  : I<0xFF, MRM5m, (outs), (ins opaque48mem:$dst),
+                     "ljmp{l}\t{*}$dst", []>;
+}
+
+
+// Loop instructions
+
+def LOOP   : Ii8PCRel<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>;
+def LOOPE  : Ii8PCRel<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>;
+def LOOPNE : Ii8PCRel<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>;
+
+//===----------------------------------------------------------------------===//
+//  Call Instructions...
+//
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. ESP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [ESP] in {
+    def CALLpcrel32 : Ii32PCRel<0xE8, RawFrm,
+                           (outs), (ins i32imm_pcrel:$dst,variable_ops),
+                           "call{l}\t$dst", []>, Requires<[In32BitMode]>;
+    def CALL32r     : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
+                        "call{l}\t{*}$dst", [(X86call GR32:$dst)]>,
+                         Requires<[In32BitMode]>;
+    def CALL32m     : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
+                        "call{l}\t{*}$dst", [(X86call (loadi32 addr:$dst))]>,
+                        Requires<[In32BitMode]>;
+
+    def FARCALL16i  : Iseg16<0x9A, RawFrmImm16, (outs),
+                             (ins i16imm:$off, i16imm:$seg),
+                             "lcall{w}\t{$seg, $off|$off, $seg}", []>, OpSize;
+    def FARCALL32i  : Iseg32<0x9A, RawFrmImm16, (outs),
+                             (ins i32imm:$off, i16imm:$seg),
+                             "lcall{l}\t{$seg, $off|$off, $seg}", []>;
+
+    def FARCALL16m  : I<0xFF, MRM3m, (outs), (ins opaque32mem:$dst),
+                        "lcall{w}\t{*}$dst", []>, OpSize;
+    def FARCALL32m  : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
+                        "lcall{l}\t{*}$dst", []>;
+
+    // callw for 16 bit code for the assembler.
+    let isAsmParserOnly = 1 in
+      def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
+                       (outs), (ins i16imm_pcrel:$dst, variable_ops),
+                       "callw\t$dst", []>, OpSize;
+  }
+
+
+// Tail call stuff.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+    isCodeGenOnly = 1 in
+  let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [ESP] in {
+  def TCRETURNdi : PseudoI<(outs),
+                     (ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops), []>;
+  def TCRETURNri : PseudoI<(outs),
+                     (ins GR32_TC:$dst, i32imm:$offset, variable_ops), []>;
+  let mayLoad = 1 in
+  def TCRETURNmi : PseudoI<(outs),
+                     (ins i32mem_TC:$dst, i32imm:$offset, variable_ops), []>;
+
+  // FIXME: The should be pseudo instructions that are lowered when going to
+  // mcinst.
+  def TAILJMPd : Ii32PCRel<0xE9, RawFrm, (outs),
+                           (ins i32imm_pcrel:$dst, variable_ops),
+                 "jmp\t$dst  # TAILCALL",
+                 []>;
+  def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
+                   "", []>;  // FIXME: Remove encoding when JIT is dead.
+  let mayLoad = 1 in
+  def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
+                   "jmp{l}\t{*}$dst  # TAILCALL", []>;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Call Instructions...
+//
+let isCall = 1 in
+  // All calls clobber the non-callee saved registers. RSP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+              XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
+      Uses = [RSP] in {
+
+    // NOTE: this pattern doesn't match "X86call imm", because we do not know
+    // that the offset between an arbitrary immediate and the call will fit in
+    // the 32-bit pcrel field that we have.
+    def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+                          (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+                          "call{q}\t$dst", []>,
+                        Requires<[In64BitMode, NotWin64]>;
+    def CALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+                          "call{q}\t{*}$dst", [(X86call GR64:$dst)]>,
+                        Requires<[In64BitMode, NotWin64]>;
+    def CALL64m       : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
+                          "call{q}\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
+                        Requires<[In64BitMode, NotWin64]>;
+
+    def FARCALL64   : RI<0xFF, MRM3m, (outs), (ins opaque80mem:$dst),
+                         "lcall{q}\t{*}$dst", []>;
+  }
+
+  // FIXME: We need to teach codegen about single list of call-clobbered
+  // registers.
+let isCall = 1, isCodeGenOnly = 1 in
+  // All calls clobber the non-callee saved registers. RSP is marked as
+  // a use to prevent stack-pointer assignments that appear immediately
+  // before calls from potentially appearing dead. Uses for argument
+  // registers are added manually.
+  let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+      Uses = [RSP] in {
+    def WINCALL64pcrel32 : Ii32PCRel<0xE8, RawFrm,
+                             (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+                             "call{q}\t$dst", []>,
+                           Requires<[IsWin64]>;
+    def WINCALL64r       : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
+                             "call{q}\t{*}$dst",
+                             [(X86call GR64:$dst)]>, Requires<[IsWin64]>;
+    def WINCALL64m       : I<0xFF, MRM2m, (outs),
+                              (ins i64mem:$dst,variable_ops),
+                             "call{q}\t{*}$dst",
+                             [(X86call (loadi64 addr:$dst))]>,
+                           Requires<[IsWin64]>;
+  }
+
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+    isCodeGenOnly = 1 in
+  // AMD64 cc clobbers RSI, RDI, XMM6-XMM15.
+  let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
+              FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
+              MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
+              XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
+      Uses = [RSP],
+      usesCustomInserter = 1 in {
+  def TCRETURNdi64 : PseudoI<(outs),
+                      (ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
+                      []>;
+  def TCRETURNri64 : PseudoI<(outs),
+                      (ins ptr_rc_tailcall:$dst, i32imm:$offset, variable_ops), []>;
+  let mayLoad = 1 in
+  def TCRETURNmi64 : PseudoI<(outs),
+                       (ins i64mem_TC:$dst, i32imm:$offset, variable_ops), []>;
+
+  def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs),
+                                      (ins i64i32imm_pcrel:$dst, variable_ops),
+                   "jmp\t$dst  # TAILCALL", []>;
+  def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst, variable_ops),
+                     "jmp{q}\t{*}$dst  # TAILCALL", []>;
+
+  let mayLoad = 1 in
+  def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
+                     "jmp{q}\t{*}$dst  # TAILCALL", []>;
+}
diff --git a/final/lib/Target/X86/X86InstrExtension.td b/final/lib/Target/X86/X86InstrExtension.td
new file mode 100644
index 00000000000..867c0f8b684
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrExtension.td
@@ -0,0 +1,172 @@
+//===- X86InstrExtension.td - Sign and Zero Extensions -----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the sign and zero extension operations.
+//
+//===----------------------------------------------------------------------===//
+
+let neverHasSideEffects = 1 in {
+  let Defs = [AX], Uses = [AL] in
+  def CBW : I<0x98, RawFrm, (outs), (ins),
+              "{cbtw|cbw}", []>, OpSize;   // AX = signext(AL)
+  let Defs = [EAX], Uses = [AX] in
+  def CWDE : I<0x98, RawFrm, (outs), (ins),
+              "{cwtl|cwde}", []>;   // EAX = signext(AX)
+
+  let Defs = [AX,DX], Uses = [AX] in
+  def CWD : I<0x99, RawFrm, (outs), (ins),
+              "{cwtd|cwd}", []>, OpSize; // DX:AX = signext(AX)
+  let Defs = [EAX,EDX], Uses = [EAX] in
+  def CDQ : I<0x99, RawFrm, (outs), (ins),
+              "{cltd|cdq}", []>; // EDX:EAX = signext(EAX)
+
+
+  let Defs = [RAX], Uses = [EAX] in
+  def CDQE : RI<0x98, RawFrm, (outs), (ins),
+               "{cltq|cdqe}", []>;     // RAX = signext(EAX)
+
+  let Defs = [RAX,RDX], Uses = [RAX] in
+  def CQO  : RI<0x99, RawFrm, (outs), (ins),
+                "{cqto|cqo}", []>; // RDX:RAX = signext(RAX)
+}
+
+
+// Sign/Zero extenders
+// Use movsbl intead of movsbw; we don't care about the high 16 bits
+// of the register here. This has a smaller encoding and avoids a
+// partial-register update.  Actual movsbw included for the disassembler.
+def MOVSX16rr8W : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+                    "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVSX16rm8W : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+                    "movs{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+
+// FIXME: Use a pat pattern or define a syntax here.                    
+let isCodeGenOnly=1 in {
+def MOVSX16rr8 : I<0xBE, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
+                   "", [(set GR16:$dst, (sext GR8:$src))]>, TB;
+def MOVSX16rm8 : I<0xBE, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
+                   "", [(set GR16:$dst, (sextloadi16i8 addr:$src))]>, TB;
+}
+def MOVSX32rr8 : I<0xBE, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sext GR8:$src))]>, TB;
+def MOVSX32rm8 : I<0xBE, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+                   "movs{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sextloadi32i8 addr:$src))]>, TB;
+def MOVSX32rr16: I<0xBF, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sext GR16:$src))]>, TB;
+def MOVSX32rm16: I<0xBF, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+                   "movs{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (sextloadi32i16 addr:$src))]>, TB;
+
+// Use movzbl intead of movzbw; we don't care about the high 16 bits
+// of the register here. This has a smaller encoding and avoids a
+// partial-register update.  Actual movzbw included for the disassembler.
+def MOVZX16rr8W : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8:$src),
+                    "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def MOVZX16rm8W : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem:$src),
+                    "movz{bw|x}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;  
+// FIXME: Use a pat pattern or define a syntax here.                    
+let isCodeGenOnly=1 in {
+def MOVZX16rr8 : I<0xB6, MRMSrcReg, (outs GR16:$dst), (ins GR8 :$src),
+                   "", [(set GR16:$dst, (zext GR8:$src))]>, TB;
+def MOVZX16rm8 : I<0xB6, MRMSrcMem, (outs GR16:$dst), (ins i8mem :$src),
+                   "", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>, TB;
+}
+def MOVZX32rr8 : I<0xB6, MRMSrcReg, (outs GR32:$dst), (ins GR8 :$src),
+                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zext GR8:$src))]>, TB;
+def MOVZX32rm8 : I<0xB6, MRMSrcMem, (outs GR32:$dst), (ins i8mem :$src),
+                   "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zextloadi32i8 addr:$src))]>, TB;
+def MOVZX32rr16: I<0xB7, MRMSrcReg, (outs GR32:$dst), (ins GR16:$src),
+                   "movz{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zext GR16:$src))]>, TB;
+def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
+                   "movz{wl|x}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (zextloadi32i16 addr:$src))]>, TB;
+
+// These are the same as the regular MOVZX32rr8 and MOVZX32rm8
+// except that they use GR32_NOREX for the output operand register class
+// instead of GR32. This allows them to operate on h registers on x86-64.
+def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
+                         (outs GR32_NOREX:$dst), (ins GR8:$src),
+                         "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                         []>, TB;
+let mayLoad = 1 in
+def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
+                         (outs GR32_NOREX:$dst), (ins i8mem:$src),
+                         "movz{bl|x}\t{$src, $dst|$dst, $src}",
+                         []>, TB;
+
+// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
+// operand, which makes it a rare instruction with an 8-bit register
+// operand that can never access an h register. If support for h registers
+// were generalized, this would require a special register class.
+def MOVSX64rr8 : RI<0xBE, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+                    "movs{bq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sext GR8:$src))]>, TB;
+def MOVSX64rm8 : RI<0xBE, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+                    "movs{bq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sextloadi64i8 addr:$src))]>, TB;
+def MOVSX64rr16: RI<0xBF, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+                    "movs{wq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sext GR16:$src))]>, TB;
+def MOVSX64rm16: RI<0xBF, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                    "movs{wq|x}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sextloadi64i16 addr:$src))]>, TB;
+def MOVSX64rr32: RI<0x63, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+                    "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sext GR32:$src))]>;
+def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+                    "movs{lq|xd}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (sextloadi64i32 addr:$src))]>;
+
+// movzbq and movzwq encodings for the disassembler
+def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
+                       "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
+                       "movz{bq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+                       "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                       "movz{wq|x}\t{$src, $dst|$dst, $src}", []>, TB;
+
+// FIXME: These should be Pat patterns.
+let isCodeGenOnly = 1 in {
+
+// Use movzbl instead of movzbq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr8 : I<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8 :$src),
+                   "", [(set GR64:$dst, (zext GR8:$src))]>, TB;
+def MOVZX64rm8 : I<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem :$src),
+                   "", [(set GR64:$dst, (zextloadi64i8 addr:$src))]>, TB;
+// Use movzwl instead of movzwq when the destination is a register; it's
+// equivalent due to implicit zero-extending, and it has a smaller encoding.
+def MOVZX64rr16: I<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+                   "", [(set GR64:$dst, (zext GR16:$src))]>, TB;
+def MOVZX64rm16: I<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+                   "", [(set GR64:$dst, (zextloadi64i16 addr:$src))]>, TB;
+
+// There's no movzlq instruction, but movl can be used for this purpose, using
+// implicit zero-extension. The preferred way to do 32-bit-to-64-bit zero
+// extension on x86-64 is to use a SUBREG_TO_REG to utilize implicit
+// zero-extension, however this isn't possible when the 32-bit value is
+// defined by a truncate or is copied from something where the high bits aren't
+// necessarily all zero. In such cases, we fall back to these explicit zext
+// instructions.
+def MOVZX64rr32 : I<0x89, MRMDestReg, (outs GR64:$dst), (ins GR32:$src),
+                    "", [(set GR64:$dst, (zext GR32:$src))]>;
+def MOVZX64rm32 : I<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
+                    "", [(set GR64:$dst, (zextloadi64i32 addr:$src))]>;
+
+
+}
+
diff --git a/final/lib/Target/X86/X86InstrFMA.td b/final/lib/Target/X86/X86InstrFMA.td
new file mode 100644
index 00000000000..d868773d2d6
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrFMA.td
@@ -0,0 +1,60 @@
+//====- X86InstrFMA.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes FMA (Fused Multiply-Add) instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FMA3 - Intel 3 operand Fused Multiply-Add instructions
+//===----------------------------------------------------------------------===//
+
+multiclass fma_rm<bits<8> opc, string OpcodeStr> {
+  def r : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           []>;
+  def m : FMA3<opc, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, f128mem:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           []>;
+  def rY : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
+           (ins VR256:$src1, VR256:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           []>;
+  def mY : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
+           (ins VR256:$src1, f256mem:$src2),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+           []>;
+}
+
+multiclass fma_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
+                     string OpcodeStr, string PackTy> {
+  defm r132 : fma_rm<opc132, !strconcat(OpcodeStr, !strconcat("132", PackTy))>;
+  defm r213 : fma_rm<opc213, !strconcat(OpcodeStr, !strconcat("213", PackTy))>;
+  defm r231 : fma_rm<opc231, !strconcat(OpcodeStr, !strconcat("231", PackTy))>;
+}
+
+let isAsmParserOnly = 1 in {
+  // Fused Multiply-Add
+  defm VFMADDPS    : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps">;
+  defm VFMADDPD    : fma_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd">, VEX_W;
+  defm VFMADDSUBPS : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps">;
+  defm VFMADDSUBPD : fma_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd">, VEX_W;
+  defm VFMSUBADDPS : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps">;
+  defm VFMSUBADDPD : fma_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd">, VEX_W;
+  defm VFMSUBPS    : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps">;
+  defm VFMSUBPD    : fma_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd">, VEX_W;
+
+  // Fused Negative Multiply-Add
+  defm VFNMADDPS : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps">;
+  defm VFNMADDPD : fma_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd">, VEX_W;
+  defm VFNMSUBPS : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps">;
+  defm VFNMSUBPD : fma_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd">, VEX_W;
+}
diff --git a/final/lib/Target/X86/X86InstrFPStack.td b/final/lib/Target/X86/X86InstrFPStack.td
new file mode 100644
index 00000000000..b506f5e0b81
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrFPStack.td
@@ -0,0 +1,684 @@
+//==- X86InstrFPStack.td - Describe the X86 Instruction Set --*- tablegen -*-=//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 x87 FPU instruction set, defining the
+// instructions, and properties of the instructions which are needed for code
+// generation, machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPStack specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FpGet2    : SDTypeProfile<2, 0, [SDTCisVT<0, f80>, 
+                                           SDTCisVT<1, f80>]>;
+def SDTX86Fld       : SDTypeProfile<1, 2, [SDTCisFP<0>,
+                                           SDTCisPtrTy<1>, 
+                                           SDTCisVT<2, OtherVT>]>;
+def SDTX86Fst       : SDTypeProfile<0, 3, [SDTCisFP<0>,
+                                           SDTCisPtrTy<1>, 
+                                           SDTCisVT<2, OtherVT>]>;
+def SDTX86Fild      : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisPtrTy<1>,
+                                           SDTCisVT<2, OtherVT>]>;
+def SDTX86FpToIMem  : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
+
+def SDTX86CwdStore  : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def X86fld          : SDNode<"X86ISD::FLD", SDTX86Fld,
+                             [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86fst          : SDNode<"X86ISD::FST", SDTX86Fst,
+                             [SDNPHasChain, SDNPInGlue, SDNPMayStore,
+                              SDNPMemOperand]>;
+def X86fild         : SDNode<"X86ISD::FILD", SDTX86Fild,
+                             [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86fildflag     : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild,
+                             [SDNPHasChain, SDNPOutGlue, SDNPMayLoad,
+                              SDNPMemOperand]>;
+def X86fp_to_i16mem : SDNode<"X86ISD::FP_TO_INT16_IN_MEM", SDTX86FpToIMem,
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_i32mem : SDNode<"X86ISD::FP_TO_INT32_IN_MEM", SDTX86FpToIMem,
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_to_i64mem : SDNode<"X86ISD::FP_TO_INT64_IN_MEM", SDTX86FpToIMem,
+                             [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m",          SDTX86CwdStore,
+                             [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
+                              SDNPMemOperand]>;
+
+//===----------------------------------------------------------------------===//
+// FPStack pattern fragments
+//===----------------------------------------------------------------------===//
+
+def fpimm0 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(+0.0);
+}]>;
+
+def fpimmneg0 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(-0.0);
+}]>;
+
+def fpimm1 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(+1.0);
+}]>;
+
+def fpimmneg1 : PatLeaf<(fpimm), [{
+  return N->isExactlyValue(-1.0);
+}]>;
+
+// Some 'special' instructions
+let usesCustomInserter = 1 in {  // Expanded after instruction selection.
+  def FP32_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP32:$src),
+                              [(X86fp_to_i16mem RFP32:$src, addr:$dst)]>;
+  def FP32_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP32:$src),
+                              [(X86fp_to_i32mem RFP32:$src, addr:$dst)]>;
+  def FP32_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP32:$src),
+                              [(X86fp_to_i64mem RFP32:$src, addr:$dst)]>;
+  def FP64_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP64:$src),
+                              [(X86fp_to_i16mem RFP64:$src, addr:$dst)]>;
+  def FP64_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP64:$src),
+                              [(X86fp_to_i32mem RFP64:$src, addr:$dst)]>;
+  def FP64_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP64:$src),
+                              [(X86fp_to_i64mem RFP64:$src, addr:$dst)]>;
+  def FP80_TO_INT16_IN_MEM : PseudoI<(outs), (ins i16mem:$dst, RFP80:$src),
+                              [(X86fp_to_i16mem RFP80:$src, addr:$dst)]>;
+  def FP80_TO_INT32_IN_MEM : PseudoI<(outs), (ins i32mem:$dst, RFP80:$src),
+                              [(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
+  def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src),
+                              [(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
+}
+
+// All FP Stack operations are represented with four instructions here.  The
+// first three instructions, generated by the instruction selector, use "RFP32"
+// "RFP64" or "RFP80" registers: traditional register files to reference 32-bit,
+// 64-bit or 80-bit floating point values.  These sizes apply to the values, 
+// not the registers, which are always 80 bits; RFP32, RFP64 and RFP80 can be
+// copied to each other without losing information.  These instructions are all
+// pseudo instructions and use the "_Fp" suffix.
+// In some cases there are additional variants with a mixture of different
+// register sizes.
+// The second instruction is defined with FPI, which is the actual instruction
+// emitted by the assembler.  These use "RST" registers, although frequently
+// the actual register(s) used are implicit.  These are always 80 bits.
+// The FP stackifier pass converts one to the other after register allocation 
+// occurs.
+//
+// Note that the FpI instruction should have instruction selection info (e.g.
+// a pattern) and the FPI instruction should have emission info (e.g. opcode
+// encoding and asm printing info).
+
+// Pseudo Instructions for FP stack return values.
+def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
+def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
+def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
+
+// FpGET_ST1* should only be issued *after* an FpGET_ST0* has been issued when
+// there are two values live out on the stack from a call or inlineasm.  This
+// magic is handled by the stackifier.  It is not valid to emit FpGET_ST1* and
+// then FpGET_ST0*.  In addition, it is invalid for any FP-using operations to
+// occur between them.
+def FpGET_ST1_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(1)
+def FpGET_ST1_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(1)
+def FpGET_ST1_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(1)
+
+let Defs = [ST0] in {
+def FpSET_ST0_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(0) = FPR
+def FpSET_ST0_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(0) = FPR
+def FpSET_ST0_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(0) = FPR
+}
+
+let Defs = [ST1] in {
+def FpSET_ST1_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(1) = FPR
+def FpSET_ST1_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(1) = FPR
+def FpSET_ST1_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(1) = FPR
+}
+
+// FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
+// f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
+// f64 instructions can use SSE2 and are predicated on FPStackf64 == !SSE2.
+// f80 instructions cannot use SSE and use neither of these.
+class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+  FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32]>;
+class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+  FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>;
+
+// Register copies.  Just copies, the shortening ones do not truncate.
+let neverHasSideEffects = 1 in {
+  def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>; 
+  def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>; 
+  def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>; 
+  def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>; 
+  def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>; 
+  def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>; 
+  def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>; 
+  def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>; 
+  def MOV_Fp8080 : FpI_  <(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>; 
+}
+
+// Factoring for arithmetic.
+multiclass FPBinary_rr<SDNode OpNode> {
+// Register op register -> register
+// These are separated out because they have no reversed form.
+def _Fp32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2), TwoArgFP,
+                [(set RFP32:$dst, (OpNode RFP32:$src1, RFP32:$src2))]>;
+def _Fp64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2), TwoArgFP,
+                [(set RFP64:$dst, (OpNode RFP64:$src1, RFP64:$src2))]>;
+def _Fp80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2), TwoArgFP,
+                [(set RFP80:$dst, (OpNode RFP80:$src1, RFP80:$src2))]>;
+}
+// The FopST0 series are not included here because of the irregularities
+// in where the 'r' goes in assembly output.
+// These instructions cannot address 80-bit memory.
+multiclass FPBinary<SDNode OpNode, Format fp, string asmstring> {
+// ST(0) = ST(0) + [mem]
+def _Fp32m  : FpIf32<(outs RFP32:$dst), 
+                     (ins RFP32:$src1, f32mem:$src2), OneArgFPRW,
+                  [(set RFP32:$dst, 
+                    (OpNode RFP32:$src1, (loadf32 addr:$src2)))]>;
+def _Fp64m  : FpIf64<(outs RFP64:$dst), 
+                     (ins RFP64:$src1, f64mem:$src2), OneArgFPRW,
+                  [(set RFP64:$dst, 
+                    (OpNode RFP64:$src1, (loadf64 addr:$src2)))]>;
+def _Fp64m32: FpIf64<(outs RFP64:$dst), 
+                     (ins RFP64:$src1, f32mem:$src2), OneArgFPRW,
+                  [(set RFP64:$dst, 
+                    (OpNode RFP64:$src1, (f64 (extloadf32 addr:$src2))))]>;
+def _Fp80m32: FpI_<(outs RFP80:$dst), 
+                   (ins RFP80:$src1, f32mem:$src2), OneArgFPRW,
+                  [(set RFP80:$dst, 
+                    (OpNode RFP80:$src1, (f80 (extloadf32 addr:$src2))))]>;
+def _Fp80m64: FpI_<(outs RFP80:$dst), 
+                   (ins RFP80:$src1, f64mem:$src2), OneArgFPRW,
+                  [(set RFP80:$dst, 
+                    (OpNode RFP80:$src1, (f80 (extloadf64 addr:$src2))))]>;
+def _F32m  : FPI<0xD8, fp, (outs), (ins f32mem:$src), 
+                 !strconcat("f", asmstring, "{s}\t$src")> { 
+  let mayLoad = 1; 
+}
+def _F64m  : FPI<0xDC, fp, (outs), (ins f64mem:$src), 
+                 !strconcat("f", asmstring, "{l}\t$src")> { 
+  let mayLoad = 1; 
+}
+// ST(0) = ST(0) + [memint]
+def _FpI16m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i16mem:$src2), 
+                       OneArgFPRW,
+                    [(set RFP32:$dst, (OpNode RFP32:$src1,
+                                       (X86fild addr:$src2, i16)))]>;
+def _FpI32m32 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src1, i32mem:$src2), 
+                       OneArgFPRW,
+                    [(set RFP32:$dst, (OpNode RFP32:$src1,
+                                       (X86fild addr:$src2, i32)))]>;
+def _FpI16m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i16mem:$src2), 
+                       OneArgFPRW,
+                    [(set RFP64:$dst, (OpNode RFP64:$src1,
+                                       (X86fild addr:$src2, i16)))]>;
+def _FpI32m64 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src1, i32mem:$src2), 
+                       OneArgFPRW,
+                    [(set RFP64:$dst, (OpNode RFP64:$src1,
+                                       (X86fild addr:$src2, i32)))]>;
+def _FpI16m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i16mem:$src2), 
+                       OneArgFPRW,
+                    [(set RFP80:$dst, (OpNode RFP80:$src1,
+                                       (X86fild addr:$src2, i16)))]>;
+def _FpI32m80 : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, i32mem:$src2), 
+                       OneArgFPRW,
+                    [(set RFP80:$dst, (OpNode RFP80:$src1,
+                                       (X86fild addr:$src2, i32)))]>;
+def _FI16m  : FPI<0xDE, fp, (outs), (ins i16mem:$src), 
+                  !strconcat("fi", asmstring, "{s}\t$src")> { 
+  let mayLoad = 1; 
+}
+def _FI32m  : FPI<0xDA, fp, (outs), (ins i32mem:$src), 
+                  !strconcat("fi", asmstring, "{l}\t$src")> { 
+  let mayLoad = 1; 
+}
+}
+
+defm ADD : FPBinary_rr<fadd>;
+defm SUB : FPBinary_rr<fsub>;
+defm MUL : FPBinary_rr<fmul>;
+defm DIV : FPBinary_rr<fdiv>;
+defm ADD : FPBinary<fadd, MRM0m, "add">;
+defm SUB : FPBinary<fsub, MRM4m, "sub">;
+defm SUBR: FPBinary<fsub ,MRM5m, "subr">;
+defm MUL : FPBinary<fmul, MRM1m, "mul">;
+defm DIV : FPBinary<fdiv, MRM6m, "div">;
+defm DIVR: FPBinary<fdiv, MRM7m, "divr">;
+
+class FPST0rInst<bits<8> o, string asm>
+  : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, D8;
+class FPrST0Inst<bits<8> o, string asm>
+  : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, DC;
+class FPrST0PInst<bits<8> o, string asm>
+  : FPI<o, AddRegFrm, (outs), (ins RST:$op), asm>, DE;
+
+// NOTE: GAS and apparently all other AT&T style assemblers have a broken notion
+// of some of the 'reverse' forms of the fsub and fdiv instructions.  As such,
+// we have to put some 'r's in and take them out of weird places.
+def ADD_FST0r   : FPST0rInst <0xC0, "fadd\t$op">;
+def ADD_FrST0   : FPrST0Inst <0xC0, "fadd\t{%st(0), $op|$op, %ST(0)}">;
+def ADD_FPrST0  : FPrST0PInst<0xC0, "faddp\t$op">;
+def SUBR_FST0r  : FPST0rInst <0xE8, "fsubr\t$op">;
+def SUB_FrST0   : FPrST0Inst <0xE8, "fsub{r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUB_FPrST0  : FPrST0PInst<0xE8, "fsub{r}p\t$op">;
+def SUB_FST0r   : FPST0rInst <0xE0, "fsub\t$op">;
+def SUBR_FrST0  : FPrST0Inst <0xE0, "fsub{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def SUBR_FPrST0 : FPrST0PInst<0xE0, "fsub{|r}p\t$op">;
+def MUL_FST0r   : FPST0rInst <0xC8, "fmul\t$op">;
+def MUL_FrST0   : FPrST0Inst <0xC8, "fmul\t{%st(0), $op|$op, %ST(0)}">;
+def MUL_FPrST0  : FPrST0PInst<0xC8, "fmulp\t$op">;
+def DIVR_FST0r  : FPST0rInst <0xF8, "fdivr\t$op">;
+def DIV_FrST0   : FPrST0Inst <0xF8, "fdiv{r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIV_FPrST0  : FPrST0PInst<0xF8, "fdiv{r}p\t$op">;
+def DIV_FST0r   : FPST0rInst <0xF0, "fdiv\t$op">;
+def DIVR_FrST0  : FPrST0Inst <0xF0, "fdiv{|r}\t{%st(0), $op|$op, %ST(0)}">;
+def DIVR_FPrST0 : FPrST0PInst<0xF0, "fdiv{|r}p\t$op">;
+
+def COM_FST0r   : FPST0rInst <0xD0, "fcom\t$op">;
+def COMP_FST0r  : FPST0rInst <0xD8, "fcomp\t$op">;
+
+// Unary operations.
+multiclass FPUnary<SDNode OpNode, bits<8> opcode, string asmstring> {
+def _Fp32  : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), OneArgFPRW,
+                 [(set RFP32:$dst, (OpNode RFP32:$src))]>;
+def _Fp64  : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), OneArgFPRW,
+                 [(set RFP64:$dst, (OpNode RFP64:$src))]>;
+def _Fp80  : FpI_<(outs RFP80:$dst), (ins RFP80:$src), OneArgFPRW,
+                 [(set RFP80:$dst, (OpNode RFP80:$src))]>;
+def _F     : FPI<opcode, RawFrm, (outs), (ins), asmstring>, D9;
+}
+
+defm CHS : FPUnary<fneg, 0xE0, "fchs">;
+defm ABS : FPUnary<fabs, 0xE1, "fabs">;
+defm SQRT: FPUnary<fsqrt,0xFA, "fsqrt">;
+defm SIN : FPUnary<fsin, 0xFE, "fsin">;
+defm COS : FPUnary<fcos, 0xFF, "fcos">;
+
+let neverHasSideEffects = 1 in {
+def TST_Fp32  : FpIf32<(outs), (ins RFP32:$src), OneArgFP, []>;
+def TST_Fp64  : FpIf64<(outs), (ins RFP64:$src), OneArgFP, []>;
+def TST_Fp80  : FpI_<(outs), (ins RFP80:$src), OneArgFP, []>;
+}
+def TST_F  : FPI<0xE4, RawFrm, (outs), (ins), "ftst">, D9;
+
+// Versions of FP instructions that take a single memory operand.  Added for the
+//   disassembler; remove as they are included with patterns elsewhere.
+def FCOM32m  : FPI<0xD8, MRM2m, (outs), (ins f32mem:$src), "fcom{s}\t$src">;
+def FCOMP32m : FPI<0xD8, MRM3m, (outs), (ins f32mem:$src), "fcomp{s}\t$src">;
+
+def FLDENVm  : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">;
+def FSTENVm  : FPI<0xD9, MRM6m, (outs f32mem:$dst), (ins), "fnstenv\t$dst">;
+
+def FICOM32m : FPI<0xDA, MRM2m, (outs), (ins i32mem:$src), "ficom{l}\t$src">;
+def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
+
+def FCOM64m  : FPI<0xDC, MRM2m, (outs), (ins f64mem:$src), "fcom{l}\t$src">;
+def FCOMP64m : FPI<0xDC, MRM3m, (outs), (ins f64mem:$src), "fcomp{l}\t$src">;
+
+def FRSTORm  : FPI<0xDD, MRM4m, (outs f32mem:$dst), (ins), "frstor\t$dst">;
+def FSAVEm   : FPI<0xDD, MRM6m, (outs f32mem:$dst), (ins), "fnsave\t$dst">;
+def FNSTSWm  : FPI<0xDD, MRM7m, (outs f32mem:$dst), (ins), "fnstsw\t$dst">;
+
+def FICOM16m : FPI<0xDE, MRM2m, (outs), (ins i16mem:$src), "ficom{s}\t$src">;
+def FICOMP16m: FPI<0xDE, MRM3m, (outs), (ins i16mem:$src), "ficomp{s}\t$src">;
+
+def FBLDm    : FPI<0xDF, MRM4m, (outs), (ins f32mem:$src), "fbld\t$src">;
+def FBSTPm   : FPI<0xDF, MRM6m, (outs f32mem:$dst), (ins), "fbstp\t$dst">;
+
+// Floating point cmovs.
+class FpIf32CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+  FpI_<outs, ins, fp, pattern>, Requires<[FPStackf32, HasCMov]>;
+class FpIf64CMov<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
+  FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64, HasCMov]>;
+
+multiclass FPCMov<PatLeaf cc> {
+  def _Fp32  : FpIf32CMov<(outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2),
+                       CondMovFP,
+                     [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2,
+                                        cc, EFLAGS))]>;
+  def _Fp64  : FpIf64CMov<(outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2),
+                       CondMovFP,
+                     [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2,
+                                        cc, EFLAGS))]>;
+  def _Fp80  : FpI_<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2),
+                     CondMovFP,
+                     [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2,
+                                        cc, EFLAGS))]>,
+                                        Requires<[HasCMov]>;
+}
+
+let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
+defm CMOVB  : FPCMov<X86_COND_B>;
+defm CMOVBE : FPCMov<X86_COND_BE>;
+defm CMOVE  : FPCMov<X86_COND_E>;
+defm CMOVP  : FPCMov<X86_COND_P>;
+defm CMOVNB : FPCMov<X86_COND_AE>;
+defm CMOVNBE: FPCMov<X86_COND_A>;
+defm CMOVNE : FPCMov<X86_COND_NE>;
+defm CMOVNP : FPCMov<X86_COND_NP>;
+} // Uses = [EFLAGS], Constraints = "$src1 = $dst"
+
+let Predicates = [HasCMov] in {
+// These are not factored because there's no clean way to pass DA/DB.
+def CMOVB_F  : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
+                  "fcmovb\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVBE_F : FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
+                  "fcmovbe\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVE_F  : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
+                  "fcmove\t{$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVP_F  : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
+                  "fcmovu\t {$op, %st(0)|%ST(0), $op}">, DA;
+def CMOVNB_F : FPI<0xC0, AddRegFrm, (outs RST:$op), (ins),
+                  "fcmovnb\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNBE_F: FPI<0xD0, AddRegFrm, (outs RST:$op), (ins),
+                  "fcmovnbe\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNE_F : FPI<0xC8, AddRegFrm, (outs RST:$op), (ins),
+                  "fcmovne\t{$op, %st(0)|%ST(0), $op}">, DB;
+def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
+                  "fcmovnu\t{$op, %st(0)|%ST(0), $op}">, DB;
+} // Predicates = [HasCMov]
+
+// Floating point loads & stores.
+let canFoldAsLoad = 1 in {
+def LD_Fp32m   : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
+                  [(set RFP32:$dst, (loadf32 addr:$src))]>;
+let isReMaterializable = 1 in
+  def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (loadf64 addr:$src))]>;
+def LD_Fp80m   : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (loadf80 addr:$src))]>;
+}
+def LD_Fp32m64 : FpIf64<(outs RFP64:$dst), (ins f32mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (f64 (extloadf32 addr:$src)))]>;
+def LD_Fp64m80 : FpI_<(outs RFP80:$dst), (ins f64mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (f80 (extloadf64 addr:$src)))]>;
+def LD_Fp32m80 : FpI_<(outs RFP80:$dst), (ins f32mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (f80 (extloadf32 addr:$src)))]>;
+def ILD_Fp16m32: FpIf32<(outs RFP32:$dst), (ins i16mem:$src), ZeroArgFP,
+                  [(set RFP32:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m32: FpIf32<(outs RFP32:$dst), (ins i32mem:$src), ZeroArgFP,
+                  [(set RFP32:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m32: FpIf32<(outs RFP32:$dst), (ins i64mem:$src), ZeroArgFP,
+                  [(set RFP32:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m64: FpIf64<(outs RFP64:$dst), (ins i16mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m64: FpIf64<(outs RFP64:$dst), (ins i32mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m64: FpIf64<(outs RFP64:$dst), (ins i64mem:$src), ZeroArgFP,
+                  [(set RFP64:$dst, (X86fild addr:$src, i64))]>;
+def ILD_Fp16m80: FpI_<(outs RFP80:$dst), (ins i16mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (X86fild addr:$src, i16))]>;
+def ILD_Fp32m80: FpI_<(outs RFP80:$dst), (ins i32mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (X86fild addr:$src, i32))]>;
+def ILD_Fp64m80: FpI_<(outs RFP80:$dst), (ins i64mem:$src), ZeroArgFP,
+                  [(set RFP80:$dst, (X86fild addr:$src, i64))]>;
+
+def ST_Fp32m   : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP,
+                  [(store RFP32:$src, addr:$op)]>;
+def ST_Fp64m32 : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP,
+                  [(truncstoref32 RFP64:$src, addr:$op)]>;
+def ST_Fp64m   : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP,
+                  [(store RFP64:$src, addr:$op)]>;
+def ST_Fp80m32 : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP,
+                  [(truncstoref32 RFP80:$src, addr:$op)]>;
+def ST_Fp80m64 : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP,
+                  [(truncstoref64 RFP80:$src, addr:$op)]>;
+// FST does not support 80-bit memory target; FSTP must be used.
+
+let mayStore = 1, neverHasSideEffects = 1 in {
+def ST_FpP32m    : FpIf32<(outs), (ins f32mem:$op, RFP32:$src), OneArgFP, []>;
+def ST_FpP64m32  : FpIf64<(outs), (ins f32mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP64m    : FpIf64<(outs), (ins f64mem:$op, RFP64:$src), OneArgFP, []>;
+def ST_FpP80m32  : FpI_<(outs), (ins f32mem:$op, RFP80:$src), OneArgFP, []>;
+def ST_FpP80m64  : FpI_<(outs), (ins f64mem:$op, RFP80:$src), OneArgFP, []>;
+}
+def ST_FpP80m    : FpI_<(outs), (ins f80mem:$op, RFP80:$src), OneArgFP,
+                    [(store RFP80:$src, addr:$op)]>;
+let mayStore = 1, neverHasSideEffects = 1 in {
+def IST_Fp16m32  : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp32m32  : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp64m32  : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>;
+def IST_Fp16m64  : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp32m64  : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp64m64  : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>;
+def IST_Fp16m80  : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp32m80  : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>;
+def IST_Fp64m80  : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>;
+}
+
+let mayLoad = 1 in {
+def LD_F32m   : FPI<0xD9, MRM0m, (outs), (ins f32mem:$src), "fld{s}\t$src">;
+def LD_F64m   : FPI<0xDD, MRM0m, (outs), (ins f64mem:$src), "fld{l}\t$src">;
+def LD_F80m   : FPI<0xDB, MRM5m, (outs), (ins f80mem:$src), "fld{t}\t$src">;
+def ILD_F16m  : FPI<0xDF, MRM0m, (outs), (ins i16mem:$src), "fild{s}\t$src">;
+def ILD_F32m  : FPI<0xDB, MRM0m, (outs), (ins i32mem:$src), "fild{l}\t$src">;
+def ILD_F64m  : FPI<0xDF, MRM5m, (outs), (ins i64mem:$src), "fild{ll}\t$src">;
+}
+let mayStore = 1 in {
+def ST_F32m   : FPI<0xD9, MRM2m, (outs), (ins f32mem:$dst), "fst{s}\t$dst">;
+def ST_F64m   : FPI<0xDD, MRM2m, (outs), (ins f64mem:$dst), "fst{l}\t$dst">;
+def ST_FP32m  : FPI<0xD9, MRM3m, (outs), (ins f32mem:$dst), "fstp{s}\t$dst">;
+def ST_FP64m  : FPI<0xDD, MRM3m, (outs), (ins f64mem:$dst), "fstp{l}\t$dst">;
+def ST_FP80m  : FPI<0xDB, MRM7m, (outs), (ins f80mem:$dst), "fstp{t}\t$dst">;
+def IST_F16m  : FPI<0xDF, MRM2m, (outs), (ins i16mem:$dst), "fist{s}\t$dst">;
+def IST_F32m  : FPI<0xDB, MRM2m, (outs), (ins i32mem:$dst), "fist{l}\t$dst">;
+def IST_FP16m : FPI<0xDF, MRM3m, (outs), (ins i16mem:$dst), "fistp{s}\t$dst">;
+def IST_FP32m : FPI<0xDB, MRM3m, (outs), (ins i32mem:$dst), "fistp{l}\t$dst">;
+def IST_FP64m : FPI<0xDF, MRM7m, (outs), (ins i64mem:$dst), "fistp{ll}\t$dst">;
+}
+
+// FISTTP requires SSE3 even though it's a FPStack op.
+def ISTT_Fp16m32 : FpI_<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP,
+                    [(X86fp_to_i16mem RFP32:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+def ISTT_Fp32m32 : FpI_<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP,
+                    [(X86fp_to_i32mem RFP32:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+def ISTT_Fp64m32 : FpI_<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP,
+                    [(X86fp_to_i64mem RFP32:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+def ISTT_Fp16m64 : FpI_<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP,
+                    [(X86fp_to_i16mem RFP64:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+def ISTT_Fp32m64 : FpI_<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP,
+                    [(X86fp_to_i32mem RFP64:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+def ISTT_Fp64m64 : FpI_<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP,
+                    [(X86fp_to_i64mem RFP64:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+def ISTT_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP,
+                    [(X86fp_to_i16mem RFP80:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+def ISTT_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP,
+                    [(X86fp_to_i32mem RFP80:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+def ISTT_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP,
+                    [(X86fp_to_i64mem RFP80:$src, addr:$op)]>,
+                    Requires<[HasSSE3]>;
+
+let mayStore = 1 in {
+def ISTT_FP16m : FPI<0xDF, MRM1m, (outs), (ins i16mem:$dst), "fisttp{s}\t$dst">;
+def ISTT_FP32m : FPI<0xDB, MRM1m, (outs), (ins i32mem:$dst), "fisttp{l}\t$dst">;
+def ISTT_FP64m : FPI<0xDD, MRM1m, (outs), (ins i64mem:$dst), 
+  "fisttp{ll}\t$dst">;
+}
+
+// FP Stack manipulation instructions.
+def LD_Frr   : FPI<0xC0, AddRegFrm, (outs), (ins RST:$op), "fld\t$op">, D9;
+def ST_Frr   : FPI<0xD0, AddRegFrm, (outs), (ins RST:$op), "fst\t$op">, DD;
+def ST_FPrr  : FPI<0xD8, AddRegFrm, (outs), (ins RST:$op), "fstp\t$op">, DD;
+def XCH_F    : FPI<0xC8, AddRegFrm, (outs), (ins RST:$op), "fxch\t$op">, D9;
+
+// Floating point constant loads.
+let isReMaterializable = 1 in {
+def LD_Fp032 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+                [(set RFP32:$dst, fpimm0)]>;
+def LD_Fp132 : FpIf32<(outs RFP32:$dst), (ins), ZeroArgFP,
+                [(set RFP32:$dst, fpimm1)]>;
+def LD_Fp064 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+                [(set RFP64:$dst, fpimm0)]>;
+def LD_Fp164 : FpIf64<(outs RFP64:$dst), (ins), ZeroArgFP,
+                [(set RFP64:$dst, fpimm1)]>;
+def LD_Fp080 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+                [(set RFP80:$dst, fpimm0)]>;
+def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP,
+                [(set RFP80:$dst, fpimm1)]>;
+}
+
+def LD_F0 : FPI<0xEE, RawFrm, (outs), (ins), "fldz">, D9;
+def LD_F1 : FPI<0xE8, RawFrm, (outs), (ins), "fld1">, D9;
+
+
+// Floating point compares.
+let Defs = [EFLAGS] in {
+def UCOM_Fpr32 : FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+                        []>;  // FPSW = cmp ST(0) with ST(i)
+def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+                        []>;  // FPSW = cmp ST(0) with ST(i)
+def UCOM_Fpr80 : FpI_  <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+                        []>;  // FPSW = cmp ST(0) with ST(i)
+                        
+// CC = ST(0) cmp ST(i)
+def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>;
+def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>;
+def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP,
+                  [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>;
+}
+
+let Defs = [EFLAGS], Uses = [ST0] in {
+def UCOM_Fr    : FPI<0xE0, AddRegFrm,    // FPSW = cmp ST(0) with ST(i)
+                    (outs), (ins RST:$reg),
+                    "fucom\t$reg">, DD;
+def UCOM_FPr   : FPI<0xE8, AddRegFrm,    // FPSW = cmp ST(0) with ST(i), pop
+                    (outs), (ins RST:$reg),
+                    "fucomp\t$reg">, DD;
+def UCOM_FPPr  : FPI<0xE9, RawFrm,       // cmp ST(0) with ST(1), pop, pop
+                    (outs), (ins),
+                    "fucompp">, DA;
+
+def UCOM_FIr   : FPI<0xE8, AddRegFrm,     // CC = cmp ST(0) with ST(i)
+                    (outs), (ins RST:$reg),
+                    "fucomi\t$reg">, DB;
+def UCOM_FIPr  : FPI<0xE8, AddRegFrm,     // CC = cmp ST(0) with ST(i), pop
+                    (outs), (ins RST:$reg),
+                    "fucompi\t$reg">, DF;
+}
+
+def COM_FIr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
+                  "fcomi\t$reg">, DB;
+def COM_FIPr : FPI<0xF0, AddRegFrm, (outs), (ins RST:$reg),
+                   "fcompi\t$reg">, DF;
+
+// Floating point flag ops.
+let Defs = [AX] in
+def FNSTSW8r  : I<0xE0, RawFrm,                  // AX = fp flags
+                  (outs), (ins), "fnstsw %ax", []>, DF;
+
+def FNSTCW16m : I<0xD9, MRM7m,                   // [mem16] = X87 control world
+                  (outs), (ins i16mem:$dst), "fnstcw\t$dst",
+                  [(X86fp_cwd_get16 addr:$dst)]>;
+                  
+let mayLoad = 1 in
+def FLDCW16m  : I<0xD9, MRM5m,                   // X87 control world = [mem16]
+                  (outs), (ins i16mem:$dst), "fldcw\t$dst", []>;
+
+// FPU control instructions
+def FNINIT : I<0xE3, RawFrm, (outs), (ins), "fninit", []>, DB;
+def FFREE : FPI<0xC0, AddRegFrm, (outs), (ins RST:$reg),
+                "ffree\t$reg">, DD;
+
+// Clear exceptions
+
+def FNCLEX : I<0xE2, RawFrm, (outs), (ins), "fnclex", []>, DB;
+
+// Operandless floating-point instructions for the disassembler.
+def WAIT : I<0x9B, RawFrm, (outs), (ins), "wait", []>;
+
+def FNOP : I<0xD0, RawFrm, (outs), (ins), "fnop", []>, D9;
+def FXAM : I<0xE5, RawFrm, (outs), (ins), "fxam", []>, D9;
+def FLDL2T : I<0xE9, RawFrm, (outs), (ins), "fldl2t", []>, D9;
+def FLDL2E : I<0xEA, RawFrm, (outs), (ins), "fldl2e", []>, D9;
+def FLDPI : I<0xEB, RawFrm, (outs), (ins), "fldpi", []>, D9;
+def FLDLG2 : I<0xEC, RawFrm, (outs), (ins), "fldlg2", []>, D9;
+def FLDLN2 : I<0xED, RawFrm, (outs), (ins), "fldln2", []>, D9;
+def F2XM1 : I<0xF0, RawFrm, (outs), (ins), "f2xm1", []>, D9;
+def FYL2X : I<0xF1, RawFrm, (outs), (ins), "fyl2x", []>, D9;
+def FPTAN : I<0xF2, RawFrm, (outs), (ins), "fptan", []>, D9;
+def FPATAN : I<0xF3, RawFrm, (outs), (ins), "fpatan", []>, D9;
+def FXTRACT : I<0xF4, RawFrm, (outs), (ins), "fxtract", []>, D9;
+def FPREM1 : I<0xF5, RawFrm, (outs), (ins), "fprem1", []>, D9;
+def FDECSTP : I<0xF6, RawFrm, (outs), (ins), "fdecstp", []>, D9;
+def FINCSTP : I<0xF7, RawFrm, (outs), (ins), "fincstp", []>, D9;
+def FPREM : I<0xF8, RawFrm, (outs), (ins), "fprem", []>, D9;
+def FYL2XP1 : I<0xF9, RawFrm, (outs), (ins), "fyl2xp1", []>, D9;
+def FSINCOS : I<0xFB, RawFrm, (outs), (ins), "fsincos", []>, D9;
+def FRNDINT : I<0xFC, RawFrm, (outs), (ins), "frndint", []>, D9;
+def FSCALE : I<0xFD, RawFrm, (outs), (ins), "fscale", []>, D9;
+def FCOMPP : I<0xD9, RawFrm, (outs), (ins), "fcompp", []>, DE;
+
+def FXSAVE : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
+               "fxsave\t$dst", []>, TB;
+def FXSAVE64 : I<0xAE, MRM0m, (outs opaque512mem:$dst), (ins),
+                 "fxsaveq\t$dst", []>, TB, REX_W, Requires<[In64BitMode]>;
+def FXRSTOR : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+                "fxrstor\t$src", []>, TB;
+def FXRSTOR64 : I<0xAE, MRM1m, (outs), (ins opaque512mem:$src),
+                  "fxrstorq\t$src", []>, TB, REX_W, Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+// Required for RET of f32 / f64 / f80 values.
+def : Pat<(X86fld addr:$src, f32), (LD_Fp32m addr:$src)>;
+def : Pat<(X86fld addr:$src, f64), (LD_Fp64m addr:$src)>;
+def : Pat<(X86fld addr:$src, f80), (LD_Fp80m addr:$src)>;
+
+// Required for CALL which return f32 / f64 / f80 values.
+def : Pat<(X86fst RFP32:$src, addr:$op, f32), (ST_Fp32m addr:$op, RFP32:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f32), (ST_Fp64m32 addr:$op, 
+                                                          RFP64:$src)>;
+def : Pat<(X86fst RFP64:$src, addr:$op, f64), (ST_Fp64m addr:$op, RFP64:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f32), (ST_Fp80m32 addr:$op, 
+                                                          RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f64), (ST_Fp80m64 addr:$op, 
+                                                          RFP80:$src)>;
+def : Pat<(X86fst RFP80:$src, addr:$op, f80), (ST_FpP80m addr:$op,
+                                                         RFP80:$src)>;
+
+// Floating point constant -0.0 and -1.0
+def : Pat<(f32 fpimmneg0), (CHS_Fp32 (LD_Fp032))>, Requires<[FPStackf32]>;
+def : Pat<(f32 fpimmneg1), (CHS_Fp32 (LD_Fp132))>, Requires<[FPStackf32]>;
+def : Pat<(f64 fpimmneg0), (CHS_Fp64 (LD_Fp064))>, Requires<[FPStackf64]>;
+def : Pat<(f64 fpimmneg1), (CHS_Fp64 (LD_Fp164))>, Requires<[FPStackf64]>;
+def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>;
+def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>;
+
+// Used to conv. i64 to f64 since there isn't a SSE version.
+def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
+
+// FP extensions map onto simple pseudo-value conversions if they are to/from
+// the FP stack.
+def : Pat<(f64 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
+          Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>,
+           Requires<[FPStackf32]>;
+def : Pat<(f80 (fextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>,
+           Requires<[FPStackf64]>;
+
+// FP truncations map onto simple pseudo-value conversions if they are to/from
+// the FP stack.  We have validated that only value-preserving truncations make
+// it through isel.
+def : Pat<(f32 (fround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>,
+          Requires<[FPStackf32]>;
+def : Pat<(f32 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>,
+           Requires<[FPStackf32]>;
+def : Pat<(f64 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>,
+           Requires<[FPStackf64]>;
diff --git a/final/lib/Target/X86/X86InstrFormats.td b/final/lib/Target/X86/X86InstrFormats.td
new file mode 100644
index 00000000000..0660072589e
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrFormats.td
@@ -0,0 +1,528 @@
+//===- X86InstrFormats.td - X86 Instruction Formats --------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+// Format specifies the encoding used by the instruction.  This is part of the
+// ad-hoc solution used to emit machine instruction encodings by our machine
+// code emitter.
+class Format<bits<6> val> {
+  bits<6> Value = val;
+}
+
+def Pseudo     : Format<0>; def RawFrm     : Format<1>;
+def AddRegFrm  : Format<2>; def MRMDestReg : Format<3>;
+def MRMDestMem : Format<4>; def MRMSrcReg  : Format<5>;
+def MRMSrcMem  : Format<6>;
+def MRM0r  : Format<16>; def MRM1r  : Format<17>; def MRM2r  : Format<18>;
+def MRM3r  : Format<19>; def MRM4r  : Format<20>; def MRM5r  : Format<21>;
+def MRM6r  : Format<22>; def MRM7r  : Format<23>;
+def MRM0m  : Format<24>; def MRM1m  : Format<25>; def MRM2m  : Format<26>;
+def MRM3m  : Format<27>; def MRM4m  : Format<28>; def MRM5m  : Format<29>;
+def MRM6m  : Format<30>; def MRM7m  : Format<31>;
+def MRMInitReg : Format<32>;
+def MRM_C1 : Format<33>;
+def MRM_C2 : Format<34>;
+def MRM_C3 : Format<35>;
+def MRM_C4 : Format<36>;
+def MRM_C8 : Format<37>;
+def MRM_C9 : Format<38>;
+def MRM_E8 : Format<39>;
+def MRM_F0 : Format<40>;
+def MRM_F8 : Format<41>;
+def MRM_F9 : Format<42>;
+def RawFrmImm8 : Format<43>;
+def RawFrmImm16 : Format<44>;
+def MRM_D0 : Format<45>;
+def MRM_D1 : Format<46>;
+
+// ImmType - This specifies the immediate type used by an instruction. This is
+// part of the ad-hoc solution used to emit machine instruction encodings by our
+// machine code emitter.
+class ImmType<bits<3> val> {
+  bits<3> Value = val;
+}
+def NoImm      : ImmType<0>;
+def Imm8       : ImmType<1>;
+def Imm8PCRel  : ImmType<2>;
+def Imm16      : ImmType<3>;
+def Imm16PCRel : ImmType<4>;
+def Imm32      : ImmType<5>;
+def Imm32PCRel : ImmType<6>;
+def Imm64      : ImmType<7>;
+
+// FPFormat - This specifies what form this FP instruction has.  This is used by
+// the Floating-Point stackifier pass.
+class FPFormat<bits<3> val> {
+  bits<3> Value = val;
+}
+def NotFP      : FPFormat<0>;
+def ZeroArgFP  : FPFormat<1>;
+def OneArgFP   : FPFormat<2>;
+def OneArgFPRW : FPFormat<3>;
+def TwoArgFP   : FPFormat<4>;
+def CompareFP  : FPFormat<5>;
+def CondMovFP  : FPFormat<6>;
+def SpecialFP  : FPFormat<7>;
+
+// Class specifying the SSE execution domain, used by the SSEDomainFix pass.
+// Keep in sync with tables in X86InstrInfo.cpp.
+class Domain<bits<2> val> {
+  bits<2> Value = val;
+}
+def GenericDomain   : Domain<0>;
+def SSEPackedSingle : Domain<1>;
+def SSEPackedDouble : Domain<2>;
+def SSEPackedInt    : Domain<3>;
+
+// Prefix byte classes which are used to indicate to the ad-hoc machine code
+// emitter that various prefix bytes are required.
+class OpSize { bit hasOpSizePrefix = 1; }
+class AdSize { bit hasAdSizePrefix = 1; }
+class REX_W  { bit hasREX_WPrefix = 1; }
+class LOCK   { bit hasLockPrefix = 1; }
+class SegFS  { bits<2> SegOvrBits = 1; }
+class SegGS  { bits<2> SegOvrBits = 2; }
+class TB     { bits<4> Prefix = 1; }
+class REP    { bits<4> Prefix = 2; }
+class D8     { bits<4> Prefix = 3; }
+class D9     { bits<4> Prefix = 4; }
+class DA     { bits<4> Prefix = 5; }
+class DB     { bits<4> Prefix = 6; }
+class DC     { bits<4> Prefix = 7; }
+class DD     { bits<4> Prefix = 8; }
+class DE     { bits<4> Prefix = 9; }
+class DF     { bits<4> Prefix = 10; }
+class XD     { bits<4> Prefix = 11; }
+class XS     { bits<4> Prefix = 12; }
+class T8     { bits<4> Prefix = 13; }
+class TA     { bits<4> Prefix = 14; }
+class TF     { bits<4> Prefix = 15; }
+class VEX    { bit hasVEXPrefix = 1; }
+class VEX_W  { bit hasVEX_WPrefix = 1; }
+class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
+class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
+class VEX_L  { bit hasVEX_L = 1; }
+class Has3DNow0F0FOpcode  { bit has3DNow0F0FOpcode = 1; }
+
+class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
+              string AsmStr, Domain d = GenericDomain>
+  : Instruction {
+  let Namespace = "X86";
+
+  bits<8> Opcode = opcod;
+  Format Form = f;
+  bits<6> FormBits = Form.Value;
+  ImmType ImmT = i;
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  string AsmString = AsmStr;
+
+  // If this is a pseudo instruction, mark it isCodeGenOnly.
+  let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
+
+  //
+  // Attributes specific to X86 instructions...
+  //
+  bit hasOpSizePrefix = 0;  // Does this inst have a 0x66 prefix?
+  bit hasAdSizePrefix = 0;  // Does this inst have a 0x67 prefix?
+
+  bits<4> Prefix = 0;       // Which prefix byte does this inst have?
+  bit hasREX_WPrefix  = 0;  // Does this inst require the REX.W prefix?
+  FPFormat FPForm = NotFP;  // What flavor of FP instruction is this?
+  bit hasLockPrefix = 0;    // Does this inst have a 0xF0 prefix?
+  bits<2> SegOvrBits = 0;   // Segment override prefix.
+  Domain ExeDomain = d;
+  bit hasVEXPrefix = 0;     // Does this inst require a VEX prefix?
+  bit hasVEX_WPrefix = 0;   // Does this inst set the VEX_W field?
+  bit hasVEX_4VPrefix = 0;  // Does this inst require the VEX.VVVV field?
+  bit hasVEX_i8ImmReg = 0;  // Does this inst require the last source register
+                            // to be encoded in a immediate field?
+  bit hasVEX_L = 0;         // Does this inst use large (256-bit) registers?
+  bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
+
+  // TSFlags layout should be kept in sync with X86InstrInfo.h.
+  let TSFlags{5-0}   = FormBits;
+  let TSFlags{6}     = hasOpSizePrefix;
+  let TSFlags{7}     = hasAdSizePrefix;
+  let TSFlags{11-8}  = Prefix;
+  let TSFlags{12}    = hasREX_WPrefix;
+  let TSFlags{15-13} = ImmT.Value;
+  let TSFlags{18-16} = FPForm.Value;
+  let TSFlags{19}    = hasLockPrefix;
+  let TSFlags{21-20} = SegOvrBits;
+  let TSFlags{23-22} = ExeDomain.Value;
+  let TSFlags{31-24} = Opcode;
+  let TSFlags{32}    = hasVEXPrefix;
+  let TSFlags{33}    = hasVEX_WPrefix;
+  let TSFlags{34}    = hasVEX_4VPrefix;
+  let TSFlags{35}    = hasVEX_i8ImmReg;
+  let TSFlags{36}    = hasVEX_L;
+  let TSFlags{37}    = has3DNow0F0FOpcode;
+}
+
+class PseudoI<dag oops, dag iops, list<dag> pattern>
+  : X86Inst<0, Pseudo, NoImm, oops, iops, ""> {
+  let Pattern = pattern;
+}
+
+class I<bits<8> o, Format f, dag outs, dag ins, string asm,
+        list<dag> pattern, Domain d = GenericDomain>
+  : X86Inst<o, f, NoImm, outs, ins, asm, d> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm, 
+           list<dag> pattern, Domain d = GenericDomain>
+  : X86Inst<o, f, Imm8, outs, ins, asm, d> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
+               list<dag> pattern>
+  : X86Inst<o, f, Imm8PCRel, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii16<bits<8> o, Format f, dag outs, dag ins, string asm, 
+           list<dag> pattern>
+  : X86Inst<o, f, Imm16, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm, 
+           list<dag> pattern>
+  : X86Inst<o, f, Imm32, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
+           list<dag> pattern>
+  : X86Inst<o, f, Imm16PCRel, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm, 
+           list<dag> pattern>
+  : X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+// FPStack Instruction Templates:
+// FPI - Floating Point Instruction template.
+class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
+  : I<o, F, outs, ins, asm, []> {}
+
+// FpI_ - Floating Point Pseudo Instruction template. Not Predicated.
+class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
+  : X86Inst<0, Pseudo, NoImm, outs, ins, ""> {
+  let FPForm = fp;
+  let Pattern = pattern;
+}
+
+// Templates for instructions that use a 16- or 32-bit segmented address as
+//  their only operand: lcall (FAR CALL) and ljmp (FAR JMP)
+//
+//   Iseg16 - 16-bit segment selector, 16-bit offset
+//   Iseg32 - 16-bit segment selector, 32-bit offset
+
+class Iseg16 <bits<8> o, Format f, dag outs, dag ins, string asm, 
+              list<dag> pattern> : X86Inst<o, f, Imm16, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm, 
+              list<dag> pattern> : X86Inst<o, f, Imm32, outs, ins, asm> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+// SI - SSE 1 & 2 scalar instructions
+class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern> {
+  let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+            !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// SIi8 - SSE 1 & 2 scalar instructions
+class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern> {
+  let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+            !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PI - SSE 1 & 2 packed instructions
+class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+         Domain d>
+      : I<o, F, outs, ins, asm, pattern, d> {
+  let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+        !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PIi8 - SSE 1 & 2 packed instructions with immediate
+class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern, Domain d>
+      : Ii8<o, F, outs, ins, asm, pattern, d> {
+  let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX],
+        !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+
+  // AVX instructions have a 'v' prefix in the mnemonic
+  let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm);
+}
+
+// SSE1 Instruction Templates:
+// 
+//   SSI   - SSE1 instructions with XS prefix.
+//   PSI   - SSE1 instructions with TB prefix.
+//   PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
+//   VSSI  - SSE1 instructions with XS prefix in AVX form.
+//   VPSI  - SSE1 instructions with TB prefix in AVX form.
+
+class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
+class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
+class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+        Requires<[HasSSE1]>;
+class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+        Requires<[HasSSE1]>;
+class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
+        Requires<[HasAVX]>;
+class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>,
+        Requires<[HasAVX]>;
+
+// SSE2 Instruction Templates:
+// 
+//   SDI    - SSE2 instructions with XD prefix.
+//   SDIi8  - SSE2 instructions with ImmT == Imm8 and XD prefix.
+//   SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
+//   PDI    - SSE2 instructions with TB and OpSize prefixes.
+//   PDIi8  - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
+//   VSDI   - SSE2 instructions with XD prefix in AVX form.
+//   VPDI   - SSE2 instructions with TB and OpSize prefixes in AVX form.
+
+class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
+class SDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
+class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
+class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+        Requires<[HasSSE2]>;
+class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+        Requires<[HasSSE2]>;
+class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
+        Requires<[HasAVX]>;
+class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
+        OpSize, Requires<[HasAVX]>;
+
+// SSE3 Instruction Templates:
+// 
+//   S3I   - SSE3 instructions with TB and OpSize prefixes.
+//   S3SI  - SSE3 instructions with XS prefix.
+//   S3DI  - SSE3 instructions with XD prefix.
+
+class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm, 
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
+        Requires<[HasSSE3]>;
+class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm, 
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
+        Requires<[HasSSE3]>;
+class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+        Requires<[HasSSE3]>;
+
+
+// SSSE3 Instruction Templates:
+// 
+//   SS38I - SSSE3 instructions with T8 prefix.
+//   SS3AI - SSSE3 instructions with TA prefix.
+//
+// Note: SSSE3 instructions have 64-bit and 128-bit versions. The 64-bit version
+// uses the MMX registers. We put those instructions here because they better
+// fit into the SSSE3 instruction category rather than the MMX category.
+
+class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+        Requires<[HasSSSE3]>;
+class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        Requires<[HasSSSE3]>;
+
+// SSE4.1 Instruction Templates:
+// 
+//   SS48I - SSE 4.1 instructions with T8 prefix.
+//   SS41AIi8 - SSE 4.1 instructions with TA prefix and ImmT == Imm8.
+//
+class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+        Requires<[HasSSE41]>;
+class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        Requires<[HasSSE41]>;
+
+// SSE4.2 Instruction Templates:
+// 
+//   SS428I - SSE 4.2 instructions with T8 prefix.
+class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+        Requires<[HasSSE42]>;
+
+//   SS42FI - SSE 4.2 instructions with TF prefix.
+class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, TF, Requires<[HasSSE42]>;
+      
+//   SS42AI = SSE 4.2 instructions with TA prefix
+class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        Requires<[HasSSE42]>;
+
+// AVX Instruction Templates:
+//   Instructions introduced in AVX (no SSE equivalent forms)
+//
+//   AVX8I - AVX instructions with T8 and OpSize prefix.
+//   AVXAIi8 - AVX instructions with TA, OpSize prefix and ImmT = Imm8.
+class AVX8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize,
+        Requires<[HasAVX]>;
+class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+              list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize,
+        Requires<[HasAVX]>;
+
+// AES Instruction Templates:
+//
+// AES8I
+// These use the same encoding as the SSE4.2 T8 and TA encodings.
+class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+        Requires<[HasAES]>;
+
+class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        Requires<[HasAES]>;
+
+// CLMUL Instruction Templates
+class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+               list<dag>pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>;
+
+// FMA3 Instruction Templates
+class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag>pattern>
+      : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+        OpSize, VEX_4V, Requires<[HasFMA3]>;
+
+// X86-64 Instruction templates...
+//
+
+class RI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi8 <bits<8> o, Format F, dag outs, dag ins, string asm,
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, REX_W;
+class RIi32 <bits<8> o, Format F, dag outs, dag ins, string asm,
+             list<dag> pattern>
+      : Ii32<o, F, outs, ins, asm, pattern>, REX_W;
+
+class RIi64<bits<8> o, Format f, dag outs, dag ins, string asm,
+            list<dag> pattern>
+  : X86Inst<o, f, Imm64, outs, ins, asm>, REX_W {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
+
+class RSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : SSI<o, F, outs, ins, asm, pattern>, REX_W;
+class RSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : SDI<o, F, outs, ins, asm, pattern>, REX_W;
+class RPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : PDI<o, F, outs, ins, asm, pattern>, REX_W;
+
+// MMX Instruction templates
+//
+
+// MMXI   - MMX instructions with TB prefix.
+// MMXI64 - MMX instructions with TB prefix valid only in 64 bit mode.
+// MMX2I  - MMX / SSE2 instructions with TB and OpSize prefixes.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
+// MMXIi8 - MMX instructions with ImmT == Imm8 and TB prefix.
+// MMXID  - MMX instructions with XD prefix.
+// MMXIS  - MMX instructions with XS prefix.
+class MMXI<bits<8> o, Format F, dag outs, dag ins, string asm, 
+           list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
+class MMXI64<bits<8> o, Format F, dag outs, dag ins, string asm, 
+             list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX,In64BitMode]>;
+class MMXRI<bits<8> o, Format F, dag outs, dag ins, string asm, 
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, TB, REX_W, Requires<[HasMMX]>;
+class MMX2I<bits<8> o, Format F, dag outs, dag ins, string asm, 
+            list<dag> pattern>
+      : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasMMX]>;
+class MMXIi8<bits<8> o, Format F, dag outs, dag ins, string asm, 
+             list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasMMX]>;
+class MMXID<bits<8> o, Format F, dag outs, dag ins, string asm, 
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XD, Requires<[HasMMX]>;
+class MMXIS<bits<8> o, Format F, dag outs, dag ins, string asm, 
+            list<dag> pattern>
+      : Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasMMX]>;
diff --git a/final/lib/Target/X86/X86InstrFragmentsSIMD.td b/final/lib/Target/X86/X86InstrFragmentsSIMD.td
new file mode 100644
index 00000000000..3cbfac1c1a9
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -0,0 +1,461 @@
+//======- X86InstrFragmentsSIMD.td - x86 ISA -------------*- tablegen -*-=====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file provides pattern fragments useful for SIMD instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MMX Pattern Fragments
+//===----------------------------------------------------------------------===//
+
+def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
+def bc_mmx  : PatFrag<(ops node:$in), (x86mmx  (bitconvert node:$in))>;
+
+//===----------------------------------------------------------------------===//
+// SSE specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
+                                            SDTCisFP<0>, SDTCisInt<2> ]>;
+def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
+                                       SDTCisFP<1>, SDTCisVT<3, i8>]>;
+
+def X86fmin    : SDNode<"X86ISD::FMIN",      SDTFPBinOp>;
+def X86fmax    : SDNode<"X86ISD::FMAX",      SDTFPBinOp>;
+def X86fand    : SDNode<"X86ISD::FAND",      SDTFPBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def X86for     : SDNode<"X86ISD::FOR",       SDTFPBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def X86fxor    : SDNode<"X86ISD::FXOR",      SDTFPBinOp,
+                        [SDNPCommutative, SDNPAssociative]>;
+def X86frsqrt  : SDNode<"X86ISD::FRSQRT",    SDTFPUnaryOp>;
+def X86frcp    : SDNode<"X86ISD::FRCP",      SDTFPUnaryOp>;
+def X86fsrl    : SDNode<"X86ISD::FSRL",      SDTX86FPShiftOp>;
+def X86comi    : SDNode<"X86ISD::COMI",      SDTX86CmpTest>;
+def X86ucomi   : SDNode<"X86ISD::UCOMI",     SDTX86CmpTest>;
+def X86pshufb  : SDNode<"X86ISD::PSHUFB",
+                 SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>]>>;
+def X86pandn   : SDNode<"X86ISD::PANDN", 
+                 SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>]>>;
+def X86psignb  : SDNode<"X86ISD::PSIGNB", 
+                 SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>]>>;
+def X86psignw  : SDNode<"X86ISD::PSIGNW", 
+                 SDTypeProfile<1, 2, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>]>>;
+def X86psignd  : SDNode<"X86ISD::PSIGND", 
+                 SDTypeProfile<1, 2, [SDTCisVT<0, v4i32>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>]>>;
+def X86pblendv : SDNode<"X86ISD::PBLENDVB", 
+                 SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+                                      SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>>;
+def X86pextrb  : SDNode<"X86ISD::PEXTRB",
+                 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pextrw  : SDNode<"X86ISD::PEXTRW",
+                 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pinsrb  : SDNode<"X86ISD::PINSRB",
+                 SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+                                      SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86pinsrw  : SDNode<"X86ISD::PINSRW",
+                 SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+                                      SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86insrtps : SDNode<"X86ISD::INSERTPS",
+                 SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
+                                      SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
+def X86vzmovl  : SDNode<"X86ISD::VZEXT_MOVL",
+                 SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
+def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
+                        [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def X86vshl    : SDNode<"X86ISD::VSHL",      SDTIntShiftOp>;
+def X86vshr    : SDNode<"X86ISD::VSRL",      SDTIntShiftOp>;
+def X86cmpps   : SDNode<"X86ISD::CMPPS",     SDTX86VFCMP>;
+def X86cmppd   : SDNode<"X86ISD::CMPPD",     SDTX86VFCMP>;
+def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
+def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
+def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
+def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
+
+def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+                                          SDTCisVec<1>,
+                                          SDTCisSameAs<2, 1>]>;
+def X86ptest   : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
+def X86testp   : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
+
+// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get
+// translated into one of the target nodes below during lowering.
+// Note: this is a work in progress...
+def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
+def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                SDTCisSameAs<0,2>]>;
+
+def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>,
+                                 SDTCisSameAs<0,1>, SDTCisInt<2>]>;
+def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                 SDTCisSameAs<0,2>, SDTCisInt<3>]>;
+
+def X86PAlign : SDNode<"X86ISD::PALIGN", SDTShuff3OpI>;
+
+def X86PShufd  : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;
+def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>;
+def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>;
+
+def X86Shufpd : SDNode<"X86ISD::SHUFPD", SDTShuff3OpI>;
+def X86Shufps : SDNode<"X86ISD::SHUFPS", SDTShuff3OpI>;
+
+def X86Movddup  : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>;
+def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>;
+def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>;
+
+def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>;
+def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>;
+
+def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>;
+def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>;
+def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
+def X86Movhlpd : SDNode<"X86ISD::MOVHLPD", SDTShuff2Op>;
+
+def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
+def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
+
+def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
+def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
+def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
+def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
+def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
+
+def X86Punpcklbw  : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
+def X86Punpcklwd  : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;
+def X86Punpckldq  : SDNode<"X86ISD::PUNPCKLDQ", SDTShuff2Op>;
+def X86Punpcklqdq : SDNode<"X86ISD::PUNPCKLQDQ", SDTShuff2Op>;
+
+def X86Punpckhbw  : SDNode<"X86ISD::PUNPCKHBW", SDTShuff2Op>;
+def X86Punpckhwd  : SDNode<"X86ISD::PUNPCKHWD", SDTShuff2Op>;
+def X86Punpckhdq  : SDNode<"X86ISD::PUNPCKHDQ", SDTShuff2Op>;
+def X86Punpckhqdq : SDNode<"X86ISD::PUNPCKHQDQ", SDTShuff2Op>;
+
+//===----------------------------------------------------------------------===//
+// SSE Complex Patterns
+//===----------------------------------------------------------------------===//
+
+// These are 'extloads' from a scalar to the low element of a vector, zeroing
+// the top elements.  These are used for the SSE 'ss' and 'sd' instruction
+// forms.
+def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
+                                  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+                                   SDNPWantRoot]>;
+def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
+                                  [SDNPHasChain, SDNPMayLoad, SDNPMemOperand,
+                                   SDNPWantRoot]>;
+
+def ssmem : Operand<v4f32> {
+  let PrintMethod = "printf32mem";
+  let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
+def sdmem : Operand<v2f64> {
+  let PrintMethod = "printf64mem";
+  let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE pattern fragments
+//===----------------------------------------------------------------------===//
+
+// 128-bit load pattern fragments
+def loadv4f32    : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
+def loadv2f64    : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
+def loadv4i32    : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
+def loadv2i64    : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
+
+// 256-bit load pattern fragments
+def loadv8f32    : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
+def loadv4f64    : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
+def loadv8i32    : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
+def loadv4i64    : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
+
+// Like 'store', but always requires vector alignment.
+def alignedstore : PatFrag<(ops node:$val, node:$ptr),
+                           (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+// Like 'load', but always requires vector alignment.
+def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+def alignedloadfsf32 : PatFrag<(ops node:$ptr),
+                               (f32 (alignedload node:$ptr))>;
+def alignedloadfsf64 : PatFrag<(ops node:$ptr),
+                               (f64 (alignedload node:$ptr))>;
+
+// 128-bit aligned load pattern fragments
+def alignedloadv4f32 : PatFrag<(ops node:$ptr),
+                               (v4f32 (alignedload node:$ptr))>;
+def alignedloadv2f64 : PatFrag<(ops node:$ptr),
+                               (v2f64 (alignedload node:$ptr))>;
+def alignedloadv4i32 : PatFrag<(ops node:$ptr),
+                               (v4i32 (alignedload node:$ptr))>;
+def alignedloadv2i64 : PatFrag<(ops node:$ptr),
+                               (v2i64 (alignedload node:$ptr))>;
+
+// 256-bit aligned load pattern fragments
+def alignedloadv8f32 : PatFrag<(ops node:$ptr),
+                               (v8f32 (alignedload node:$ptr))>;
+def alignedloadv4f64 : PatFrag<(ops node:$ptr),
+                               (v4f64 (alignedload node:$ptr))>;
+def alignedloadv8i32 : PatFrag<(ops node:$ptr),
+                               (v8i32 (alignedload node:$ptr))>;
+def alignedloadv4i64 : PatFrag<(ops node:$ptr),
+                               (v4i64 (alignedload node:$ptr))>;
+
+// Like 'load', but uses special alignment checks suitable for use in
+// memory operands in most SSE instructions, which are required to
+// be naturally aligned on some targets but not on others.  If the subtarget
+// allows unaligned accesses, match any load, though this may require
+// setting a feature bit in the processor (on startup, for example).
+// Opteron 10h and later implement such a feature.
+def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return    Subtarget->hasVectorUAMem()
+         || cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+def memopfsf32 : PatFrag<(ops node:$ptr), (f32   (memop node:$ptr))>;
+def memopfsf64 : PatFrag<(ops node:$ptr), (f64   (memop node:$ptr))>;
+
+// 128-bit memop pattern fragments
+def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
+def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
+def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
+def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
+def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
+def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
+
+// 256-bit memop pattern fragments
+def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
+def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
+def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
+def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
+def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>;
+
+// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
+// 16-byte boundary.
+// FIXME: 8 byte alignment for mmx reads is not required
+def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() >= 8;
+}]>;
+
+def memopmmx  : PatFrag<(ops node:$ptr), (x86mmx  (memop64 node:$ptr))>;
+
+// MOVNT Support
+// Like 'store', but requires the non-temporal bit to be set
+def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+                           (st node:$val, node:$ptr), [{
+  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+    return ST->isNonTemporal();
+  return false;
+}]>;
+
+def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+			           (st node:$val, node:$ptr), [{
+  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+    return ST->isNonTemporal() && !ST->isTruncatingStore() &&
+           ST->getAddressingMode() == ISD::UNINDEXED &&
+           ST->getAlignment() >= 16;
+  return false;
+}]>;
+
+def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+			           (st node:$val, node:$ptr), [{
+  if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+    return ST->isNonTemporal() &&
+           ST->getAlignment() < 16;
+  return false;
+}]>;
+
+// 128-bit bitconvert pattern fragments
+def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
+def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
+def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
+def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
+def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
+def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
+
+// 256-bit bitconvert pattern fragments
+def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
+
+def vzmovl_v2i64 : PatFrag<(ops node:$src),
+                           (bitconvert (v2i64 (X86vzmovl
+                             (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
+def vzmovl_v4i32 : PatFrag<(ops node:$src),
+                           (bitconvert (v4i32 (X86vzmovl
+                             (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
+
+def vzload_v2i64 : PatFrag<(ops node:$src),
+                           (bitconvert (v2i64 (X86vzload node:$src)))>;
+
+
+def fp32imm0 : PatLeaf<(f32 fpimm), [{
+  return N->isExactlyValue(+0.0);
+}]>;
+
+// BYTE_imm - Transform bit immediates into byte immediates.
+def BYTE_imm  : SDNodeXForm<imm, [{
+  // Transformation function: imm >> 3
+  return getI32Imm(N->getZExtValue() >> 3);
+}]>;
+
+// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
+// SHUFP* etc. imm.
+def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
+  return getI8Imm(X86::getShuffleSHUFImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
+// PSHUFHW imm.
+def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
+  return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
+// PSHUFLW imm.
+def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
+  return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
+}]>;
+
+// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to
+// a PALIGNR imm.
+def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
+  return getI8Imm(X86::getShufflePALIGNRImmediate(N));
+}]>;
+
+// EXTRACT_get_vextractf128_imm xform function: convert extract_subvector index
+// to VEXTRACTF128 imm.
+def EXTRACT_get_vextractf128_imm : SDNodeXForm<extract_subvector, [{
+  return getI8Imm(X86::getExtractVEXTRACTF128Immediate(N));
+}]>;
+
+// INSERT_get_vinsertf128_imm xform function: convert insert_subvector index to 
+// VINSERTF128 imm.
+def INSERT_get_vinsertf128_imm : SDNodeXForm<insert_subvector, [{
+  return getI8Imm(X86::getInsertVINSERTF128Immediate(N));
+}]>;
+
+def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
+                       (vector_shuffle node:$lhs, node:$rhs), [{
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+  return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
+}]>;
+
+def movddup : PatFrag<(ops node:$lhs, node:$rhs),
+                      (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhlps : PatFrag<(ops node:$lhs, node:$rhs),
+                      (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
+                            (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movlhps : PatFrag<(ops node:$lhs, node:$rhs),
+                      (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movlp : PatFrag<(ops node:$lhs, node:$rhs),
+                    (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movl : PatFrag<(ops node:$lhs, node:$rhs),
+                   (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movshdup : PatFrag<(ops node:$lhs, node:$rhs),
+                       (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movsldup : PatFrag<(ops node:$lhs, node:$rhs),
+                       (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
+                     (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
+                     (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
+                           (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
+                           (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
+                     (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_shuf_imm>;
+
+def shufp : PatFrag<(ops node:$lhs, node:$rhs),
+                    (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_shuf_imm>;
+
+def pshufhw : PatFrag<(ops node:$lhs, node:$rhs),
+                      (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_pshufhw_imm>;
+
+def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
+                      (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_pshuflw_imm>;
+
+def palign : PatFrag<(ops node:$lhs, node:$rhs),
+                     (vector_shuffle node:$lhs, node:$rhs), [{
+  return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_palign_imm>;
+
+def vextractf128_extract : PatFrag<(ops node:$bigvec, node:$index),
+                                   (extract_subvector node:$bigvec,
+                                                      node:$index), [{
+  return X86::isVEXTRACTF128Index(N);
+}], EXTRACT_get_vextractf128_imm>;
+
+def vinsertf128_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
+                                      node:$index),
+                                 (insert_subvector node:$bigvec, node:$smallvec,
+                                                   node:$index), [{
+  return X86::isVINSERTF128Index(N);
+}], INSERT_get_vinsertf128_imm>;
diff --git a/final/lib/Target/X86/X86InstrInfo.cpp b/final/lib/Target/X86/X86InstrInfo.cpp
new file mode 100644
index 00000000000..21df57c9cda
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrInfo.cpp
@@ -0,0 +1,3194 @@
+//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstrInfo.h"
+#include "X86.h"
+#include "X86GenInstrInfo.inc"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include <limits>
+
+using namespace llvm;
+
+static cl::opt<bool>
+NoFusing("disable-spill-fusing",
+         cl::desc("Disable fusing of spill code into instructions"));
+static cl::opt<bool>
+PrintFailedFusing("print-failed-fuse-candidates",
+                  cl::desc("Print instructions that the allocator wants to"
+                           " fuse, but the X86 backend currently can't"),
+                  cl::Hidden);
+static cl::opt<bool>
+ReMatPICStubLoad("remat-pic-stub-load",
+                 cl::desc("Re-materialize load from stub in PIC mode"),
+                 cl::init(false), cl::Hidden);
+
+X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
+  : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)),
+    TM(tm), RI(tm, *this) {
+  enum {
+    TB_NOT_REVERSABLE = 1U << 31,
+    TB_FLAGS = TB_NOT_REVERSABLE
+  };
+
+  static const unsigned OpTbl2Addr[][2] = {
+    { X86::ADC32ri,     X86::ADC32mi },
+    { X86::ADC32ri8,    X86::ADC32mi8 },
+    { X86::ADC32rr,     X86::ADC32mr },
+    { X86::ADC64ri32,   X86::ADC64mi32 },
+    { X86::ADC64ri8,    X86::ADC64mi8 },
+    { X86::ADC64rr,     X86::ADC64mr },
+    { X86::ADD16ri,     X86::ADD16mi },
+    { X86::ADD16ri8,    X86::ADD16mi8 },
+    { X86::ADD16ri_DB,  X86::ADD16mi  | TB_NOT_REVERSABLE },
+    { X86::ADD16ri8_DB, X86::ADD16mi8 | TB_NOT_REVERSABLE },
+    { X86::ADD16rr,     X86::ADD16mr },
+    { X86::ADD16rr_DB,  X86::ADD16mr | TB_NOT_REVERSABLE },
+    { X86::ADD32ri,     X86::ADD32mi },
+    { X86::ADD32ri8,    X86::ADD32mi8 },
+    { X86::ADD32ri_DB,  X86::ADD32mi | TB_NOT_REVERSABLE },
+    { X86::ADD32ri8_DB, X86::ADD32mi8 | TB_NOT_REVERSABLE },
+    { X86::ADD32rr,     X86::ADD32mr },
+    { X86::ADD32rr_DB,  X86::ADD32mr | TB_NOT_REVERSABLE },
+    { X86::ADD64ri32,   X86::ADD64mi32 },
+    { X86::ADD64ri8,    X86::ADD64mi8 },
+    { X86::ADD64ri32_DB,X86::ADD64mi32 | TB_NOT_REVERSABLE },
+    { X86::ADD64ri8_DB, X86::ADD64mi8 | TB_NOT_REVERSABLE },
+    { X86::ADD64rr,     X86::ADD64mr },
+    { X86::ADD64rr_DB,  X86::ADD64mr | TB_NOT_REVERSABLE },
+    { X86::ADD8ri,      X86::ADD8mi },
+    { X86::ADD8rr,      X86::ADD8mr },
+    { X86::AND16ri,     X86::AND16mi },
+    { X86::AND16ri8,    X86::AND16mi8 },
+    { X86::AND16rr,     X86::AND16mr },
+    { X86::AND32ri,     X86::AND32mi },
+    { X86::AND32ri8,    X86::AND32mi8 },
+    { X86::AND32rr,     X86::AND32mr },
+    { X86::AND64ri32,   X86::AND64mi32 },
+    { X86::AND64ri8,    X86::AND64mi8 },
+    { X86::AND64rr,     X86::AND64mr },
+    { X86::AND8ri,      X86::AND8mi },
+    { X86::AND8rr,      X86::AND8mr },
+    { X86::DEC16r,      X86::DEC16m },
+    { X86::DEC32r,      X86::DEC32m },
+    { X86::DEC64_16r,   X86::DEC64_16m },
+    { X86::DEC64_32r,   X86::DEC64_32m },
+    { X86::DEC64r,      X86::DEC64m },
+    { X86::DEC8r,       X86::DEC8m },
+    { X86::INC16r,      X86::INC16m },
+    { X86::INC32r,      X86::INC32m },
+    { X86::INC64_16r,   X86::INC64_16m },
+    { X86::INC64_32r,   X86::INC64_32m },
+    { X86::INC64r,      X86::INC64m },
+    { X86::INC8r,       X86::INC8m },
+    { X86::NEG16r,      X86::NEG16m },
+    { X86::NEG32r,      X86::NEG32m },
+    { X86::NEG64r,      X86::NEG64m },
+    { X86::NEG8r,       X86::NEG8m },
+    { X86::NOT16r,      X86::NOT16m },
+    { X86::NOT32r,      X86::NOT32m },
+    { X86::NOT64r,      X86::NOT64m },
+    { X86::NOT8r,       X86::NOT8m },
+    { X86::OR16ri,      X86::OR16mi },
+    { X86::OR16ri8,     X86::OR16mi8 },
+    { X86::OR16rr,      X86::OR16mr },
+    { X86::OR32ri,      X86::OR32mi },
+    { X86::OR32ri8,     X86::OR32mi8 },
+    { X86::OR32rr,      X86::OR32mr },
+    { X86::OR64ri32,    X86::OR64mi32 },
+    { X86::OR64ri8,     X86::OR64mi8 },
+    { X86::OR64rr,      X86::OR64mr },
+    { X86::OR8ri,       X86::OR8mi },
+    { X86::OR8rr,       X86::OR8mr },
+    { X86::ROL16r1,     X86::ROL16m1 },
+    { X86::ROL16rCL,    X86::ROL16mCL },
+    { X86::ROL16ri,     X86::ROL16mi },
+    { X86::ROL32r1,     X86::ROL32m1 },
+    { X86::ROL32rCL,    X86::ROL32mCL },
+    { X86::ROL32ri,     X86::ROL32mi },
+    { X86::ROL64r1,     X86::ROL64m1 },
+    { X86::ROL64rCL,    X86::ROL64mCL },
+    { X86::ROL64ri,     X86::ROL64mi },
+    { X86::ROL8r1,      X86::ROL8m1 },
+    { X86::ROL8rCL,     X86::ROL8mCL },
+    { X86::ROL8ri,      X86::ROL8mi },
+    { X86::ROR16r1,     X86::ROR16m1 },
+    { X86::ROR16rCL,    X86::ROR16mCL },
+    { X86::ROR16ri,     X86::ROR16mi },
+    { X86::ROR32r1,     X86::ROR32m1 },
+    { X86::ROR32rCL,    X86::ROR32mCL },
+    { X86::ROR32ri,     X86::ROR32mi },
+    { X86::ROR64r1,     X86::ROR64m1 },
+    { X86::ROR64rCL,    X86::ROR64mCL },
+    { X86::ROR64ri,     X86::ROR64mi },
+    { X86::ROR8r1,      X86::ROR8m1 },
+    { X86::ROR8rCL,     X86::ROR8mCL },
+    { X86::ROR8ri,      X86::ROR8mi },
+    { X86::SAR16r1,     X86::SAR16m1 },
+    { X86::SAR16rCL,    X86::SAR16mCL },
+    { X86::SAR16ri,     X86::SAR16mi },
+    { X86::SAR32r1,     X86::SAR32m1 },
+    { X86::SAR32rCL,    X86::SAR32mCL },
+    { X86::SAR32ri,     X86::SAR32mi },
+    { X86::SAR64r1,     X86::SAR64m1 },
+    { X86::SAR64rCL,    X86::SAR64mCL },
+    { X86::SAR64ri,     X86::SAR64mi },
+    { X86::SAR8r1,      X86::SAR8m1 },
+    { X86::SAR8rCL,     X86::SAR8mCL },
+    { X86::SAR8ri,      X86::SAR8mi },
+    { X86::SBB32ri,     X86::SBB32mi },
+    { X86::SBB32ri8,    X86::SBB32mi8 },
+    { X86::SBB32rr,     X86::SBB32mr },
+    { X86::SBB64ri32,   X86::SBB64mi32 },
+    { X86::SBB64ri8,    X86::SBB64mi8 },
+    { X86::SBB64rr,     X86::SBB64mr },
+    { X86::SHL16rCL,    X86::SHL16mCL },
+    { X86::SHL16ri,     X86::SHL16mi },
+    { X86::SHL32rCL,    X86::SHL32mCL },
+    { X86::SHL32ri,     X86::SHL32mi },
+    { X86::SHL64rCL,    X86::SHL64mCL },
+    { X86::SHL64ri,     X86::SHL64mi },
+    { X86::SHL8rCL,     X86::SHL8mCL },
+    { X86::SHL8ri,      X86::SHL8mi },
+    { X86::SHLD16rrCL,  X86::SHLD16mrCL },
+    { X86::SHLD16rri8,  X86::SHLD16mri8 },
+    { X86::SHLD32rrCL,  X86::SHLD32mrCL },
+    { X86::SHLD32rri8,  X86::SHLD32mri8 },
+    { X86::SHLD64rrCL,  X86::SHLD64mrCL },
+    { X86::SHLD64rri8,  X86::SHLD64mri8 },
+    { X86::SHR16r1,     X86::SHR16m1 },
+    { X86::SHR16rCL,    X86::SHR16mCL },
+    { X86::SHR16ri,     X86::SHR16mi },
+    { X86::SHR32r1,     X86::SHR32m1 },
+    { X86::SHR32rCL,    X86::SHR32mCL },
+    { X86::SHR32ri,     X86::SHR32mi },
+    { X86::SHR64r1,     X86::SHR64m1 },
+    { X86::SHR64rCL,    X86::SHR64mCL },
+    { X86::SHR64ri,     X86::SHR64mi },
+    { X86::SHR8r1,      X86::SHR8m1 },
+    { X86::SHR8rCL,     X86::SHR8mCL },
+    { X86::SHR8ri,      X86::SHR8mi },
+    { X86::SHRD16rrCL,  X86::SHRD16mrCL },
+    { X86::SHRD16rri8,  X86::SHRD16mri8 },
+    { X86::SHRD32rrCL,  X86::SHRD32mrCL },
+    { X86::SHRD32rri8,  X86::SHRD32mri8 },
+    { X86::SHRD64rrCL,  X86::SHRD64mrCL },
+    { X86::SHRD64rri8,  X86::SHRD64mri8 },
+    { X86::SUB16ri,     X86::SUB16mi },
+    { X86::SUB16ri8,    X86::SUB16mi8 },
+    { X86::SUB16rr,     X86::SUB16mr },
+    { X86::SUB32ri,     X86::SUB32mi },
+    { X86::SUB32ri8,    X86::SUB32mi8 },
+    { X86::SUB32rr,     X86::SUB32mr },
+    { X86::SUB64ri32,   X86::SUB64mi32 },
+    { X86::SUB64ri8,    X86::SUB64mi8 },
+    { X86::SUB64rr,     X86::SUB64mr },
+    { X86::SUB8ri,      X86::SUB8mi },
+    { X86::SUB8rr,      X86::SUB8mr },
+    { X86::XOR16ri,     X86::XOR16mi },
+    { X86::XOR16ri8,    X86::XOR16mi8 },
+    { X86::XOR16rr,     X86::XOR16mr },
+    { X86::XOR32ri,     X86::XOR32mi },
+    { X86::XOR32ri8,    X86::XOR32mi8 },
+    { X86::XOR32rr,     X86::XOR32mr },
+    { X86::XOR64ri32,   X86::XOR64mi32 },
+    { X86::XOR64ri8,    X86::XOR64mi8 },
+    { X86::XOR64rr,     X86::XOR64mr },
+    { X86::XOR8ri,      X86::XOR8mi },
+    { X86::XOR8rr,      X86::XOR8mr }
+  };
+
+  for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) {
+    unsigned RegOp = OpTbl2Addr[i][0];
+    unsigned MemOp = OpTbl2Addr[i][1] & ~TB_FLAGS;
+    assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
+    RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
+
+    // If this is not a reversable operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
+      continue;
+
+    // Index 0, folded load and store, no alignment requirement.
+    unsigned AuxInfo = 0 | (1 << 4) | (1 << 5);
+
+    assert(!MemOp2RegOpTable.count(MemOp) &&
+            "Duplicated entries in unfolding maps?");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
+  }
+
+  // If the third value is 1, then it's folding either a load or a store.
+  static const unsigned OpTbl0[][4] = {
+    { X86::BT16ri8,     X86::BT16mi8, 1, 0 },
+    { X86::BT32ri8,     X86::BT32mi8, 1, 0 },
+    { X86::BT64ri8,     X86::BT64mi8, 1, 0 },
+    { X86::CALL32r,     X86::CALL32m, 1, 0 },
+    { X86::CALL64r,     X86::CALL64m, 1, 0 },
+    { X86::WINCALL64r,  X86::WINCALL64m, 1, 0 },
+    { X86::CMP16ri,     X86::CMP16mi, 1, 0 },
+    { X86::CMP16ri8,    X86::CMP16mi8, 1, 0 },
+    { X86::CMP16rr,     X86::CMP16mr, 1, 0 },
+    { X86::CMP32ri,     X86::CMP32mi, 1, 0 },
+    { X86::CMP32ri8,    X86::CMP32mi8, 1, 0 },
+    { X86::CMP32rr,     X86::CMP32mr, 1, 0 },
+    { X86::CMP64ri32,   X86::CMP64mi32, 1, 0 },
+    { X86::CMP64ri8,    X86::CMP64mi8, 1, 0 },
+    { X86::CMP64rr,     X86::CMP64mr, 1, 0 },
+    { X86::CMP8ri,      X86::CMP8mi, 1, 0 },
+    { X86::CMP8rr,      X86::CMP8mr, 1, 0 },
+    { X86::DIV16r,      X86::DIV16m, 1, 0 },
+    { X86::DIV32r,      X86::DIV32m, 1, 0 },
+    { X86::DIV64r,      X86::DIV64m, 1, 0 },
+    { X86::DIV8r,       X86::DIV8m, 1, 0 },
+    { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 },
+    { X86::FsMOVAPDrr,  X86::MOVSDmr | TB_NOT_REVERSABLE , 0, 0 },
+    { X86::FsMOVAPSrr,  X86::MOVSSmr | TB_NOT_REVERSABLE , 0, 0 },
+    { X86::IDIV16r,     X86::IDIV16m, 1, 0 },
+    { X86::IDIV32r,     X86::IDIV32m, 1, 0 },
+    { X86::IDIV64r,     X86::IDIV64m, 1, 0 },
+    { X86::IDIV8r,      X86::IDIV8m, 1, 0 },
+    { X86::IMUL16r,     X86::IMUL16m, 1, 0 },
+    { X86::IMUL32r,     X86::IMUL32m, 1, 0 },
+    { X86::IMUL64r,     X86::IMUL64m, 1, 0 },
+    { X86::IMUL8r,      X86::IMUL8m, 1, 0 },
+    { X86::JMP32r,      X86::JMP32m, 1, 0 },
+    { X86::JMP64r,      X86::JMP64m, 1, 0 },
+    { X86::MOV16ri,     X86::MOV16mi, 0, 0 },
+    { X86::MOV16rr,     X86::MOV16mr, 0, 0 },
+    { X86::MOV32ri,     X86::MOV32mi, 0, 0 },
+    { X86::MOV32rr,     X86::MOV32mr, 0, 0 },
+    { X86::MOV64ri32,   X86::MOV64mi32, 0, 0 },
+    { X86::MOV64rr,     X86::MOV64mr, 0, 0 },
+    { X86::MOV8ri,      X86::MOV8mi, 0, 0 },
+    { X86::MOV8rr,      X86::MOV8mr, 0, 0 },
+    { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 },
+    { X86::MOVAPDrr,    X86::MOVAPDmr, 0, 16 },
+    { X86::MOVAPSrr,    X86::MOVAPSmr, 0, 16 },
+    { X86::MOVDQArr,    X86::MOVDQAmr, 0, 16 },
+    { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
+    { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
+    { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
+    { X86::MOVSS2DIrr,  X86::MOVSS2DImr, 0, 0 },
+    { X86::MOVUPDrr,    X86::MOVUPDmr, 0, 0 },
+    { X86::MOVUPSrr,    X86::MOVUPSmr, 0, 0 },
+    { X86::MUL16r,      X86::MUL16m, 1, 0 },
+    { X86::MUL32r,      X86::MUL32m, 1, 0 },
+    { X86::MUL64r,      X86::MUL64m, 1, 0 },
+    { X86::MUL8r,       X86::MUL8m, 1, 0 },
+    { X86::SETAEr,      X86::SETAEm, 0, 0 },
+    { X86::SETAr,       X86::SETAm, 0, 0 },
+    { X86::SETBEr,      X86::SETBEm, 0, 0 },
+    { X86::SETBr,       X86::SETBm, 0, 0 },
+    { X86::SETEr,       X86::SETEm, 0, 0 },
+    { X86::SETGEr,      X86::SETGEm, 0, 0 },
+    { X86::SETGr,       X86::SETGm, 0, 0 },
+    { X86::SETLEr,      X86::SETLEm, 0, 0 },
+    { X86::SETLr,       X86::SETLm, 0, 0 },
+    { X86::SETNEr,      X86::SETNEm, 0, 0 },
+    { X86::SETNOr,      X86::SETNOm, 0, 0 },
+    { X86::SETNPr,      X86::SETNPm, 0, 0 },
+    { X86::SETNSr,      X86::SETNSm, 0, 0 },
+    { X86::SETOr,       X86::SETOm, 0, 0 },
+    { X86::SETPr,       X86::SETPm, 0, 0 },
+    { X86::SETSr,       X86::SETSm, 0, 0 },
+    { X86::TAILJMPr,    X86::TAILJMPm, 1, 0 },
+    { X86::TAILJMPr64,  X86::TAILJMPm64, 1, 0 },
+    { X86::TEST16ri,    X86::TEST16mi, 1, 0 },
+    { X86::TEST32ri,    X86::TEST32mi, 1, 0 },
+    { X86::TEST64ri32,  X86::TEST64mi32, 1, 0 },
+    { X86::TEST8ri,     X86::TEST8mi, 1, 0 }
+  };
+
+  for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) {
+    unsigned RegOp      = OpTbl0[i][0];
+    unsigned MemOp      = OpTbl0[i][1] & ~TB_FLAGS;
+    unsigned FoldedLoad = OpTbl0[i][2];
+    unsigned Align      = OpTbl0[i][3];
+    assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
+    RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
+
+    // If this is not a reversable operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
+      continue;
+
+    // Index 0, folded load or store.
+    unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5);
+    assert(!MemOp2RegOpTable.count(MemOp) && "Duplicated entries?");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
+  }
+
+  static const unsigned OpTbl1[][3] = {
+    { X86::CMP16rr,         X86::CMP16rm, 0 },
+    { X86::CMP32rr,         X86::CMP32rm, 0 },
+    { X86::CMP64rr,         X86::CMP64rm, 0 },
+    { X86::CMP8rr,          X86::CMP8rm, 0 },
+    { X86::CVTSD2SSrr,      X86::CVTSD2SSrm, 0 },
+    { X86::CVTSI2SD64rr,    X86::CVTSI2SD64rm, 0 },
+    { X86::CVTSI2SDrr,      X86::CVTSI2SDrm, 0 },
+    { X86::CVTSI2SS64rr,    X86::CVTSI2SS64rm, 0 },
+    { X86::CVTSI2SSrr,      X86::CVTSI2SSrm, 0 },
+    { X86::CVTSS2SDrr,      X86::CVTSS2SDrm, 0 },
+    { X86::CVTTSD2SI64rr,   X86::CVTTSD2SI64rm, 0 },
+    { X86::CVTTSD2SIrr,     X86::CVTTSD2SIrm, 0 },
+    { X86::CVTTSS2SI64rr,   X86::CVTTSS2SI64rm, 0 },
+    { X86::CVTTSS2SIrr,     X86::CVTTSS2SIrm, 0 },
+    { X86::FsMOVAPDrr,      X86::MOVSDrm | TB_NOT_REVERSABLE , 0 },
+    { X86::FsMOVAPSrr,      X86::MOVSSrm | TB_NOT_REVERSABLE , 0 },
+    { X86::IMUL16rri,       X86::IMUL16rmi, 0 },
+    { X86::IMUL16rri8,      X86::IMUL16rmi8, 0 },
+    { X86::IMUL32rri,       X86::IMUL32rmi, 0 },
+    { X86::IMUL32rri8,      X86::IMUL32rmi8, 0 },
+    { X86::IMUL64rri32,     X86::IMUL64rmi32, 0 },
+    { X86::IMUL64rri8,      X86::IMUL64rmi8, 0 },
+    { X86::Int_COMISDrr,    X86::Int_COMISDrm, 0 },
+    { X86::Int_COMISSrr,    X86::Int_COMISSrm, 0 },
+    { X86::Int_CVTDQ2PDrr,  X86::Int_CVTDQ2PDrm, 16 },
+    { X86::Int_CVTDQ2PSrr,  X86::Int_CVTDQ2PSrm, 16 },
+    { X86::Int_CVTPD2DQrr,  X86::Int_CVTPD2DQrm, 16 },
+    { X86::Int_CVTPD2PSrr,  X86::Int_CVTPD2PSrm, 16 },
+    { X86::Int_CVTPS2DQrr,  X86::Int_CVTPS2DQrm, 16 },
+    { X86::Int_CVTPS2PDrr,  X86::Int_CVTPS2PDrm, 0 },
+    { X86::CVTSD2SI64rr,    X86::CVTSD2SI64rm, 0 },
+    { X86::CVTSD2SIrr,      X86::CVTSD2SIrm, 0 },
+    { X86::Int_CVTSD2SSrr,  X86::Int_CVTSD2SSrm, 0 },
+    { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 },
+    { X86::Int_CVTSI2SDrr,  X86::Int_CVTSI2SDrm, 0 },
+    { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 },
+    { X86::Int_CVTSI2SSrr,  X86::Int_CVTSI2SSrm, 0 },
+    { X86::Int_CVTSS2SDrr,  X86::Int_CVTSS2SDrm, 0 },
+    { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 },
+    { X86::Int_CVTSS2SIrr,  X86::Int_CVTSS2SIrm, 0 },
+    { X86::CVTTPD2DQrr,     X86::CVTTPD2DQrm, 16 },
+    { X86::CVTTPS2DQrr,     X86::CVTTPS2DQrm, 16 },
+    { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 },
+    { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 },
+    { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 },
+    { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 },
+    { X86::Int_UCOMISDrr,   X86::Int_UCOMISDrm, 0 },
+    { X86::Int_UCOMISSrr,   X86::Int_UCOMISSrm, 0 },
+    { X86::MOV16rr,         X86::MOV16rm, 0 },
+    { X86::MOV32rr,         X86::MOV32rm, 0 },
+    { X86::MOV64rr,         X86::MOV64rm, 0 },
+    { X86::MOV64toPQIrr,    X86::MOVQI2PQIrm, 0 },
+    { X86::MOV64toSDrr,     X86::MOV64toSDrm, 0 },
+    { X86::MOV8rr,          X86::MOV8rm, 0 },
+    { X86::MOVAPDrr,        X86::MOVAPDrm, 16 },
+    { X86::MOVAPSrr,        X86::MOVAPSrm, 16 },
+    { X86::MOVDDUPrr,       X86::MOVDDUPrm, 0 },
+    { X86::MOVDI2PDIrr,     X86::MOVDI2PDIrm, 0 },
+    { X86::MOVDI2SSrr,      X86::MOVDI2SSrm, 0 },
+    { X86::MOVDQArr,        X86::MOVDQArm, 16 },
+    { X86::MOVSHDUPrr,      X86::MOVSHDUPrm, 16 },
+    { X86::MOVSLDUPrr,      X86::MOVSLDUPrm, 16 },
+    { X86::MOVSX16rr8,      X86::MOVSX16rm8, 0 },
+    { X86::MOVSX32rr16,     X86::MOVSX32rm16, 0 },
+    { X86::MOVSX32rr8,      X86::MOVSX32rm8, 0 },
+    { X86::MOVSX64rr16,     X86::MOVSX64rm16, 0 },
+    { X86::MOVSX64rr32,     X86::MOVSX64rm32, 0 },
+    { X86::MOVSX64rr8,      X86::MOVSX64rm8, 0 },
+    { X86::MOVUPDrr,        X86::MOVUPDrm, 16 },
+    { X86::MOVUPSrr,        X86::MOVUPSrm, 0 },
+    { X86::MOVZDI2PDIrr,    X86::MOVZDI2PDIrm, 0 },
+    { X86::MOVZQI2PQIrr,    X86::MOVZQI2PQIrm, 0 },
+    { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 },
+    { X86::MOVZX16rr8,      X86::MOVZX16rm8, 0 },
+    { X86::MOVZX32rr16,     X86::MOVZX32rm16, 0 },
+    { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 },
+    { X86::MOVZX32rr8,      X86::MOVZX32rm8, 0 },
+    { X86::MOVZX64rr16,     X86::MOVZX64rm16, 0 },
+    { X86::MOVZX64rr32,     X86::MOVZX64rm32, 0 },
+    { X86::MOVZX64rr8,      X86::MOVZX64rm8, 0 },
+    { X86::PSHUFDri,        X86::PSHUFDmi, 16 },
+    { X86::PSHUFHWri,       X86::PSHUFHWmi, 16 },
+    { X86::PSHUFLWri,       X86::PSHUFLWmi, 16 },
+    { X86::RCPPSr,          X86::RCPPSm, 16 },
+    { X86::RCPPSr_Int,      X86::RCPPSm_Int, 16 },
+    { X86::RSQRTPSr,        X86::RSQRTPSm, 16 },
+    { X86::RSQRTPSr_Int,    X86::RSQRTPSm_Int, 16 },
+    { X86::RSQRTSSr,        X86::RSQRTSSm, 0 },
+    { X86::RSQRTSSr_Int,    X86::RSQRTSSm_Int, 0 },
+    { X86::SQRTPDr,         X86::SQRTPDm, 16 },
+    { X86::SQRTPDr_Int,     X86::SQRTPDm_Int, 16 },
+    { X86::SQRTPSr,         X86::SQRTPSm, 16 },
+    { X86::SQRTPSr_Int,     X86::SQRTPSm_Int, 16 },
+    { X86::SQRTSDr,         X86::SQRTSDm, 0 },
+    { X86::SQRTSDr_Int,     X86::SQRTSDm_Int, 0 },
+    { X86::SQRTSSr,         X86::SQRTSSm, 0 },
+    { X86::SQRTSSr_Int,     X86::SQRTSSm_Int, 0 },
+    { X86::TEST16rr,        X86::TEST16rm, 0 },
+    { X86::TEST32rr,        X86::TEST32rm, 0 },
+    { X86::TEST64rr,        X86::TEST64rm, 0 },
+    { X86::TEST8rr,         X86::TEST8rm, 0 },
+    // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0
+    { X86::UCOMISDrr,       X86::UCOMISDrm, 0 },
+    { X86::UCOMISSrr,       X86::UCOMISSrm, 0 }
+  };
+
+  for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {
+    unsigned RegOp = OpTbl1[i][0];
+    unsigned MemOp = OpTbl1[i][1] & ~TB_FLAGS;
+    unsigned Align = OpTbl1[i][2];
+    assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
+    RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
+
+    // If this is not a reversable operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
+      continue;
+
+    // Index 1, folded load
+    unsigned AuxInfo = 1 | (1 << 4);
+    assert(!MemOp2RegOpTable.count(MemOp) && "Duplicate entries");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
+  }
+
+  static const unsigned OpTbl2[][3] = {
+    { X86::ADC32rr,         X86::ADC32rm, 0 },
+    { X86::ADC64rr,         X86::ADC64rm, 0 },
+    { X86::ADD16rr,         X86::ADD16rm, 0 },
+    { X86::ADD16rr_DB,      X86::ADD16rm | TB_NOT_REVERSABLE, 0 },
+    { X86::ADD32rr,         X86::ADD32rm, 0 },
+    { X86::ADD32rr_DB,      X86::ADD32rm | TB_NOT_REVERSABLE, 0 },
+    { X86::ADD64rr,         X86::ADD64rm, 0 },
+    { X86::ADD64rr_DB,      X86::ADD64rm | TB_NOT_REVERSABLE, 0 },
+    { X86::ADD8rr,          X86::ADD8rm, 0 },
+    { X86::ADDPDrr,         X86::ADDPDrm, 16 },
+    { X86::ADDPSrr,         X86::ADDPSrm, 16 },
+    { X86::ADDSDrr,         X86::ADDSDrm, 0 },
+    { X86::ADDSSrr,         X86::ADDSSrm, 0 },
+    { X86::ADDSUBPDrr,      X86::ADDSUBPDrm, 16 },
+    { X86::ADDSUBPSrr,      X86::ADDSUBPSrm, 16 },
+    { X86::AND16rr,         X86::AND16rm, 0 },
+    { X86::AND32rr,         X86::AND32rm, 0 },
+    { X86::AND64rr,         X86::AND64rm, 0 },
+    { X86::AND8rr,          X86::AND8rm, 0 },
+    { X86::ANDNPDrr,        X86::ANDNPDrm, 16 },
+    { X86::ANDNPSrr,        X86::ANDNPSrm, 16 },
+    { X86::ANDPDrr,         X86::ANDPDrm, 16 },
+    { X86::ANDPSrr,         X86::ANDPSrm, 16 },
+    { X86::CMOVA16rr,       X86::CMOVA16rm, 0 },
+    { X86::CMOVA32rr,       X86::CMOVA32rm, 0 },
+    { X86::CMOVA64rr,       X86::CMOVA64rm, 0 },
+    { X86::CMOVAE16rr,      X86::CMOVAE16rm, 0 },
+    { X86::CMOVAE32rr,      X86::CMOVAE32rm, 0 },
+    { X86::CMOVAE64rr,      X86::CMOVAE64rm, 0 },
+    { X86::CMOVB16rr,       X86::CMOVB16rm, 0 },
+    { X86::CMOVB32rr,       X86::CMOVB32rm, 0 },
+    { X86::CMOVB64rr,       X86::CMOVB64rm, 0 },
+    { X86::CMOVBE16rr,      X86::CMOVBE16rm, 0 },
+    { X86::CMOVBE32rr,      X86::CMOVBE32rm, 0 },
+    { X86::CMOVBE64rr,      X86::CMOVBE64rm, 0 },
+    { X86::CMOVE16rr,       X86::CMOVE16rm, 0 },
+    { X86::CMOVE32rr,       X86::CMOVE32rm, 0 },
+    { X86::CMOVE64rr,       X86::CMOVE64rm, 0 },
+    { X86::CMOVG16rr,       X86::CMOVG16rm, 0 },
+    { X86::CMOVG32rr,       X86::CMOVG32rm, 0 },
+    { X86::CMOVG64rr,       X86::CMOVG64rm, 0 },
+    { X86::CMOVGE16rr,      X86::CMOVGE16rm, 0 },
+    { X86::CMOVGE32rr,      X86::CMOVGE32rm, 0 },
+    { X86::CMOVGE64rr,      X86::CMOVGE64rm, 0 },
+    { X86::CMOVL16rr,       X86::CMOVL16rm, 0 },
+    { X86::CMOVL32rr,       X86::CMOVL32rm, 0 },
+    { X86::CMOVL64rr,       X86::CMOVL64rm, 0 },
+    { X86::CMOVLE16rr,      X86::CMOVLE16rm, 0 },
+    { X86::CMOVLE32rr,      X86::CMOVLE32rm, 0 },
+    { X86::CMOVLE64rr,      X86::CMOVLE64rm, 0 },
+    { X86::CMOVNE16rr,      X86::CMOVNE16rm, 0 },
+    { X86::CMOVNE32rr,      X86::CMOVNE32rm, 0 },
+    { X86::CMOVNE64rr,      X86::CMOVNE64rm, 0 },
+    { X86::CMOVNO16rr,      X86::CMOVNO16rm, 0 },
+    { X86::CMOVNO32rr,      X86::CMOVNO32rm, 0 },
+    { X86::CMOVNO64rr,      X86::CMOVNO64rm, 0 },
+    { X86::CMOVNP16rr,      X86::CMOVNP16rm, 0 },
+    { X86::CMOVNP32rr,      X86::CMOVNP32rm, 0 },
+    { X86::CMOVNP64rr,      X86::CMOVNP64rm, 0 },
+    { X86::CMOVNS16rr,      X86::CMOVNS16rm, 0 },
+    { X86::CMOVNS32rr,      X86::CMOVNS32rm, 0 },
+    { X86::CMOVNS64rr,      X86::CMOVNS64rm, 0 },
+    { X86::CMOVO16rr,       X86::CMOVO16rm, 0 },
+    { X86::CMOVO32rr,       X86::CMOVO32rm, 0 },
+    { X86::CMOVO64rr,       X86::CMOVO64rm, 0 },
+    { X86::CMOVP16rr,       X86::CMOVP16rm, 0 },
+    { X86::CMOVP32rr,       X86::CMOVP32rm, 0 },
+    { X86::CMOVP64rr,       X86::CMOVP64rm, 0 },
+    { X86::CMOVS16rr,       X86::CMOVS16rm, 0 },
+    { X86::CMOVS32rr,       X86::CMOVS32rm, 0 },
+    { X86::CMOVS64rr,       X86::CMOVS64rm, 0 },
+    { X86::CMPPDrri,        X86::CMPPDrmi, 16 },
+    { X86::CMPPSrri,        X86::CMPPSrmi, 16 },
+    { X86::CMPSDrr,         X86::CMPSDrm, 0 },
+    { X86::CMPSSrr,         X86::CMPSSrm, 0 },
+    { X86::DIVPDrr,         X86::DIVPDrm, 16 },
+    { X86::DIVPSrr,         X86::DIVPSrm, 16 },
+    { X86::DIVSDrr,         X86::DIVSDrm, 0 },
+    { X86::DIVSSrr,         X86::DIVSSrm, 0 },
+    { X86::FsANDNPDrr,      X86::FsANDNPDrm, 16 },
+    { X86::FsANDNPSrr,      X86::FsANDNPSrm, 16 },
+    { X86::FsANDPDrr,       X86::FsANDPDrm, 16 },
+    { X86::FsANDPSrr,       X86::FsANDPSrm, 16 },
+    { X86::FsORPDrr,        X86::FsORPDrm, 16 },
+    { X86::FsORPSrr,        X86::FsORPSrm, 16 },
+    { X86::FsXORPDrr,       X86::FsXORPDrm, 16 },
+    { X86::FsXORPSrr,       X86::FsXORPSrm, 16 },
+    { X86::HADDPDrr,        X86::HADDPDrm, 16 },
+    { X86::HADDPSrr,        X86::HADDPSrm, 16 },
+    { X86::HSUBPDrr,        X86::HSUBPDrm, 16 },
+    { X86::HSUBPSrr,        X86::HSUBPSrm, 16 },
+    { X86::IMUL16rr,        X86::IMUL16rm, 0 },
+    { X86::IMUL32rr,        X86::IMUL32rm, 0 },
+    { X86::IMUL64rr,        X86::IMUL64rm, 0 },
+    { X86::Int_CMPSDrr,     X86::Int_CMPSDrm, 0 },
+    { X86::Int_CMPSSrr,     X86::Int_CMPSSrm, 0 },
+    { X86::MAXPDrr,         X86::MAXPDrm, 16 },
+    { X86::MAXPDrr_Int,     X86::MAXPDrm_Int, 16 },
+    { X86::MAXPSrr,         X86::MAXPSrm, 16 },
+    { X86::MAXPSrr_Int,     X86::MAXPSrm_Int, 16 },
+    { X86::MAXSDrr,         X86::MAXSDrm, 0 },
+    { X86::MAXSDrr_Int,     X86::MAXSDrm_Int, 0 },
+    { X86::MAXSSrr,         X86::MAXSSrm, 0 },
+    { X86::MAXSSrr_Int,     X86::MAXSSrm_Int, 0 },
+    { X86::MINPDrr,         X86::MINPDrm, 16 },
+    { X86::MINPDrr_Int,     X86::MINPDrm_Int, 16 },
+    { X86::MINPSrr,         X86::MINPSrm, 16 },
+    { X86::MINPSrr_Int,     X86::MINPSrm_Int, 16 },
+    { X86::MINSDrr,         X86::MINSDrm, 0 },
+    { X86::MINSDrr_Int,     X86::MINSDrm_Int, 0 },
+    { X86::MINSSrr,         X86::MINSSrm, 0 },
+    { X86::MINSSrr_Int,     X86::MINSSrm_Int, 0 },
+    { X86::MULPDrr,         X86::MULPDrm, 16 },
+    { X86::MULPSrr,         X86::MULPSrm, 16 },
+    { X86::MULSDrr,         X86::MULSDrm, 0 },
+    { X86::MULSSrr,         X86::MULSSrm, 0 },
+    { X86::OR16rr,          X86::OR16rm, 0 },
+    { X86::OR32rr,          X86::OR32rm, 0 },
+    { X86::OR64rr,          X86::OR64rm, 0 },
+    { X86::OR8rr,           X86::OR8rm, 0 },
+    { X86::ORPDrr,          X86::ORPDrm, 16 },
+    { X86::ORPSrr,          X86::ORPSrm, 16 },
+    { X86::PACKSSDWrr,      X86::PACKSSDWrm, 16 },
+    { X86::PACKSSWBrr,      X86::PACKSSWBrm, 16 },
+    { X86::PACKUSWBrr,      X86::PACKUSWBrm, 16 },
+    { X86::PADDBrr,         X86::PADDBrm, 16 },
+    { X86::PADDDrr,         X86::PADDDrm, 16 },
+    { X86::PADDQrr,         X86::PADDQrm, 16 },
+    { X86::PADDSBrr,        X86::PADDSBrm, 16 },
+    { X86::PADDSWrr,        X86::PADDSWrm, 16 },
+    { X86::PADDWrr,         X86::PADDWrm, 16 },
+    { X86::PANDNrr,         X86::PANDNrm, 16 },
+    { X86::PANDrr,          X86::PANDrm, 16 },
+    { X86::PAVGBrr,         X86::PAVGBrm, 16 },
+    { X86::PAVGWrr,         X86::PAVGWrm, 16 },
+    { X86::PCMPEQBrr,       X86::PCMPEQBrm, 16 },
+    { X86::PCMPEQDrr,       X86::PCMPEQDrm, 16 },
+    { X86::PCMPEQWrr,       X86::PCMPEQWrm, 16 },
+    { X86::PCMPGTBrr,       X86::PCMPGTBrm, 16 },
+    { X86::PCMPGTDrr,       X86::PCMPGTDrm, 16 },
+    { X86::PCMPGTWrr,       X86::PCMPGTWrm, 16 },
+    { X86::PINSRWrri,       X86::PINSRWrmi, 16 },
+    { X86::PMADDWDrr,       X86::PMADDWDrm, 16 },
+    { X86::PMAXSWrr,        X86::PMAXSWrm, 16 },
+    { X86::PMAXUBrr,        X86::PMAXUBrm, 16 },
+    { X86::PMINSWrr,        X86::PMINSWrm, 16 },
+    { X86::PMINUBrr,        X86::PMINUBrm, 16 },
+    { X86::PMULDQrr,        X86::PMULDQrm, 16 },
+    { X86::PMULHUWrr,       X86::PMULHUWrm, 16 },
+    { X86::PMULHWrr,        X86::PMULHWrm, 16 },
+    { X86::PMULLDrr,        X86::PMULLDrm, 16 },
+    { X86::PMULLWrr,        X86::PMULLWrm, 16 },
+    { X86::PMULUDQrr,       X86::PMULUDQrm, 16 },
+    { X86::PORrr,           X86::PORrm, 16 },
+    { X86::PSADBWrr,        X86::PSADBWrm, 16 },
+    { X86::PSLLDrr,         X86::PSLLDrm, 16 },
+    { X86::PSLLQrr,         X86::PSLLQrm, 16 },
+    { X86::PSLLWrr,         X86::PSLLWrm, 16 },
+    { X86::PSRADrr,         X86::PSRADrm, 16 },
+    { X86::PSRAWrr,         X86::PSRAWrm, 16 },
+    { X86::PSRLDrr,         X86::PSRLDrm, 16 },
+    { X86::PSRLQrr,         X86::PSRLQrm, 16 },
+    { X86::PSRLWrr,         X86::PSRLWrm, 16 },
+    { X86::PSUBBrr,         X86::PSUBBrm, 16 },
+    { X86::PSUBDrr,         X86::PSUBDrm, 16 },
+    { X86::PSUBSBrr,        X86::PSUBSBrm, 16 },
+    { X86::PSUBSWrr,        X86::PSUBSWrm, 16 },
+    { X86::PSUBWrr,         X86::PSUBWrm, 16 },
+    { X86::PUNPCKHBWrr,     X86::PUNPCKHBWrm, 16 },
+    { X86::PUNPCKHDQrr,     X86::PUNPCKHDQrm, 16 },
+    { X86::PUNPCKHQDQrr,    X86::PUNPCKHQDQrm, 16 },
+    { X86::PUNPCKHWDrr,     X86::PUNPCKHWDrm, 16 },
+    { X86::PUNPCKLBWrr,     X86::PUNPCKLBWrm, 16 },
+    { X86::PUNPCKLDQrr,     X86::PUNPCKLDQrm, 16 },
+    { X86::PUNPCKLQDQrr,    X86::PUNPCKLQDQrm, 16 },
+    { X86::PUNPCKLWDrr,     X86::PUNPCKLWDrm, 16 },
+    { X86::PXORrr,          X86::PXORrm, 16 },
+    { X86::SBB32rr,         X86::SBB32rm, 0 },
+    { X86::SBB64rr,         X86::SBB64rm, 0 },
+    { X86::SHUFPDrri,       X86::SHUFPDrmi, 16 },
+    { X86::SHUFPSrri,       X86::SHUFPSrmi, 16 },
+    { X86::SUB16rr,         X86::SUB16rm, 0 },
+    { X86::SUB32rr,         X86::SUB32rm, 0 },
+    { X86::SUB64rr,         X86::SUB64rm, 0 },
+    { X86::SUB8rr,          X86::SUB8rm, 0 },
+    { X86::SUBPDrr,         X86::SUBPDrm, 16 },
+    { X86::SUBPSrr,         X86::SUBPSrm, 16 },
+    { X86::SUBSDrr,         X86::SUBSDrm, 0 },
+    { X86::SUBSSrr,         X86::SUBSSrm, 0 },
+    // FIXME: TEST*rr -> swapped operand of TEST*mr.
+    { X86::UNPCKHPDrr,      X86::UNPCKHPDrm, 16 },
+    { X86::UNPCKHPSrr,      X86::UNPCKHPSrm, 16 },
+    { X86::UNPCKLPDrr,      X86::UNPCKLPDrm, 16 },
+    { X86::UNPCKLPSrr,      X86::UNPCKLPSrm, 16 },
+    { X86::XOR16rr,         X86::XOR16rm, 0 },
+    { X86::XOR32rr,         X86::XOR32rm, 0 },
+    { X86::XOR64rr,         X86::XOR64rm, 0 },
+    { X86::XOR8rr,          X86::XOR8rm, 0 },
+    { X86::XORPDrr,         X86::XORPDrm, 16 },
+    { X86::XORPSrr,         X86::XORPSrm, 16 }
+  };
+
+  for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) {
+    unsigned RegOp = OpTbl2[i][0];
+    unsigned MemOp = OpTbl2[i][1] & ~TB_FLAGS;
+    unsigned Align = OpTbl2[i][2];
+
+    assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
+    RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
+
+    // If this is not a reversable operation (because there is a many->one)
+    // mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
+    if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
+      continue;
+
+    // Index 2, folded load
+    unsigned AuxInfo = 2 | (1 << 4);
+    assert(!MemOp2RegOpTable.count(MemOp) &&
+           "Duplicated entries in unfolding maps?");
+    MemOp2RegOpTable[MemOp] = std::make_pair(RegOp, AuxInfo);
+  }
+}
+
+bool
+X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
+                                    unsigned &SrcReg, unsigned &DstReg,
+                                    unsigned &SubIdx) const {
+  switch (MI.getOpcode()) {
+  default: break;
+  case X86::MOVSX16rr8:
+  case X86::MOVZX16rr8:
+  case X86::MOVSX32rr8:
+  case X86::MOVZX32rr8:
+  case X86::MOVSX64rr8:
+  case X86::MOVZX64rr8:
+    if (!TM.getSubtarget<X86Subtarget>().is64Bit())
+      // It's not always legal to reference the low 8-bit of the larger
+      // register in 32-bit mode.
+      return false;
+  case X86::MOVSX32rr16:
+  case X86::MOVZX32rr16:
+  case X86::MOVSX64rr16:
+  case X86::MOVZX64rr16:
+  case X86::MOVSX64rr32:
+  case X86::MOVZX64rr32: {
+    if (MI.getOperand(0).getSubReg() || MI.getOperand(1).getSubReg())
+      // Be conservative.
+      return false;
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    switch (MI.getOpcode()) {
+    default:
+      llvm_unreachable(0);
+      break;
+    case X86::MOVSX16rr8:
+    case X86::MOVZX16rr8:
+    case X86::MOVSX32rr8:
+    case X86::MOVZX32rr8:
+    case X86::MOVSX64rr8:
+    case X86::MOVZX64rr8:
+      SubIdx = X86::sub_8bit;
+      break;
+    case X86::MOVSX32rr16:
+    case X86::MOVZX32rr16:
+    case X86::MOVSX64rr16:
+    case X86::MOVZX64rr16:
+      SubIdx = X86::sub_16bit;
+      break;
+    case X86::MOVSX64rr32:
+    case X86::MOVZX64rr32:
+      SubIdx = X86::sub_32bit;
+      break;
+    }
+    return true;
+  }
+  }
+  return false;
+}
+
+/// isFrameOperand - Return true and the FrameIndex if the specified
+/// operand and follow operands form a reference to the stack frame.
+bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
+                                  int &FrameIndex) const {
+  if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() &&
+      MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() &&
+      MI->getOperand(Op+1).getImm() == 1 &&
+      MI->getOperand(Op+2).getReg() == 0 &&
+      MI->getOperand(Op+3).getImm() == 0) {
+    FrameIndex = MI->getOperand(Op).getIndex();
+    return true;
+  }
+  return false;
+}
+
+static bool isFrameLoadOpcode(int Opcode) {
+  switch (Opcode) {
+  default: break;
+  case X86::MOV8rm:
+  case X86::MOV16rm:
+  case X86::MOV32rm:
+  case X86::MOV64rm:
+  case X86::LD_Fp64m:
+  case X86::MOVSSrm:
+  case X86::MOVSDrm:
+  case X86::MOVAPSrm:
+  case X86::MOVAPDrm:
+  case X86::MOVDQArm:
+  case X86::MMX_MOVD64rm:
+  case X86::MMX_MOVQ64rm:
+    return true;
+    break;
+  }
+  return false;
+}
+
+static bool isFrameStoreOpcode(int Opcode) {
+  switch (Opcode) {
+  default: break;
+  case X86::MOV8mr:
+  case X86::MOV16mr:
+  case X86::MOV32mr:
+  case X86::MOV64mr:
+  case X86::ST_FpP64m:
+  case X86::MOVSSmr:
+  case X86::MOVSDmr:
+  case X86::MOVAPSmr:
+  case X86::MOVAPDmr:
+  case X86::MOVDQAmr:
+  case X86::MMX_MOVD64mr:
+  case X86::MMX_MOVQ64mr:
+  case X86::MMX_MOVNTQmr:
+    return true;
+  }
+  return false;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                           int &FrameIndex) const {
+  if (isFrameLoadOpcode(MI->getOpcode()))
+    if (MI->getOperand(0).getSubReg() == 0 && isFrameOperand(MI, 1, FrameIndex))
+      return MI->getOperand(0).getReg();
+  return 0;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+                                                 int &FrameIndex) const {
+  if (isFrameLoadOpcode(MI->getOpcode())) {
+    unsigned Reg;
+    if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
+      return Reg;
+    // Check for post-frame index elimination operations
+    const MachineMemOperand *Dummy;
+    return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
+  }
+  return 0;
+}
+
+bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+                                        const MachineMemOperand *&MMO,
+                                        int &FrameIndex) const {
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+         oe = MI->memoperands_end();
+       o != oe;
+       ++o) {
+    if ((*o)->isLoad() && (*o)->getValue())
+      if (const FixedStackPseudoSourceValue *Value =
+          dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+        FrameIndex = Value->getFrameIndex();
+        MMO = *o;
+        return true;
+      }
+  }
+  return false;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                          int &FrameIndex) const {
+  if (isFrameStoreOpcode(MI->getOpcode()))
+    if (MI->getOperand(X86::AddrNumOperands).getSubReg() == 0 &&
+        isFrameOperand(MI, 0, FrameIndex))
+      return MI->getOperand(X86::AddrNumOperands).getReg();
+  return 0;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+                                                int &FrameIndex) const {
+  if (isFrameStoreOpcode(MI->getOpcode())) {
+    unsigned Reg;
+    if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
+      return Reg;
+    // Check for post-frame index elimination operations
+    const MachineMemOperand *Dummy;
+    return hasStoreToStackSlot(MI, Dummy, FrameIndex);
+  }
+  return 0;
+}
+
+bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+                                       const MachineMemOperand *&MMO,
+                                       int &FrameIndex) const {
+  for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+         oe = MI->memoperands_end();
+       o != oe;
+       ++o) {
+    if ((*o)->isStore() && (*o)->getValue())
+      if (const FixedStackPseudoSourceValue *Value =
+          dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+        FrameIndex = Value->getFrameIndex();
+        MMO = *o;
+        return true;
+      }
+  }
+  return false;
+}
+
+/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
+/// X86::MOVPC32r.
+static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
+  bool isPICBase = false;
+  for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
+         E = MRI.def_end(); I != E; ++I) {
+    MachineInstr *DefMI = I.getOperand().getParent();
+    if (DefMI->getOpcode() != X86::MOVPC32r)
+      return false;
+    assert(!isPICBase && "More than one PIC base?");
+    isPICBase = true;
+  }
+  return isPICBase;
+}
+
+bool
+X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
+                                                AliasAnalysis *AA) const {
+  switch (MI->getOpcode()) {
+  default: break;
+    case X86::MOV8rm:
+    case X86::MOV16rm:
+    case X86::MOV32rm:
+    case X86::MOV64rm:
+    case X86::LD_Fp64m:
+    case X86::MOVSSrm:
+    case X86::MOVSDrm:
+    case X86::MOVAPSrm:
+    case X86::MOVUPSrm:
+    case X86::MOVUPSrm_Int:
+    case X86::MOVAPDrm:
+    case X86::MOVDQArm:
+    case X86::MMX_MOVD64rm:
+    case X86::MMX_MOVQ64rm:
+    case X86::FsMOVAPSrm:
+    case X86::FsMOVAPDrm: {
+      // Loads from constant pools are trivially rematerializable.
+      if (MI->getOperand(1).isReg() &&
+          MI->getOperand(2).isImm() &&
+          MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+          MI->isInvariantLoad(AA)) {
+        unsigned BaseReg = MI->getOperand(1).getReg();
+        if (BaseReg == 0 || BaseReg == X86::RIP)
+          return true;
+        // Allow re-materialization of PIC load.
+        if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
+          return false;
+        const MachineFunction &MF = *MI->getParent()->getParent();
+        const MachineRegisterInfo &MRI = MF.getRegInfo();
+        bool isPICBase = false;
+        for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg),
+               E = MRI.def_end(); I != E; ++I) {
+          MachineInstr *DefMI = I.getOperand().getParent();
+          if (DefMI->getOpcode() != X86::MOVPC32r)
+            return false;
+          assert(!isPICBase && "More than one PIC base?");
+          isPICBase = true;
+        }
+        return isPICBase;
+      }
+      return false;
+    }
+
+     case X86::LEA32r:
+     case X86::LEA64r: {
+       if (MI->getOperand(2).isImm() &&
+           MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
+           !MI->getOperand(4).isReg()) {
+         // lea fi#, lea GV, etc. are all rematerializable.
+         if (!MI->getOperand(1).isReg())
+           return true;
+         unsigned BaseReg = MI->getOperand(1).getReg();
+         if (BaseReg == 0)
+           return true;
+         // Allow re-materialization of lea PICBase + x.
+         const MachineFunction &MF = *MI->getParent()->getParent();
+         const MachineRegisterInfo &MRI = MF.getRegInfo();
+         return regIsPICBase(BaseReg, MRI);
+       }
+       return false;
+     }
+  }
+
+  // All other instructions marked M_REMATERIALIZABLE are always trivially
+  // rematerializable.
+  return true;
+}
+
+/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that
+/// would clobber the EFLAGS condition register. Note the result may be
+/// conservative. If it cannot definitely determine the safety after visiting
+/// a few instructions in each direction it assumes it's not safe.
+static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator I) {
+  MachineBasicBlock::iterator E = MBB.end();
+
+  // It's always safe to clobber EFLAGS at the end of a block.
+  if (I == E)
+    return true;
+
+  // For compile time consideration, if we are not able to determine the
+  // safety after visiting 4 instructions in each direction, we will assume
+  // it's not safe.
+  MachineBasicBlock::iterator Iter = I;
+  for (unsigned i = 0; i < 4; ++i) {
+    bool SeenDef = false;
+    for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+      MachineOperand &MO = Iter->getOperand(j);
+      if (!MO.isReg())
+        continue;
+      if (MO.getReg() == X86::EFLAGS) {
+        if (MO.isUse())
+          return false;
+        SeenDef = true;
+      }
+    }
+
+    if (SeenDef)
+      // This instruction defines EFLAGS, no need to look any further.
+      return true;
+    ++Iter;
+    // Skip over DBG_VALUE.
+    while (Iter != E && Iter->isDebugValue())
+      ++Iter;
+
+    // If we make it to the end of the block, it's safe to clobber EFLAGS.
+    if (Iter == E)
+      return true;
+  }
+
+  MachineBasicBlock::iterator B = MBB.begin();
+  Iter = I;
+  for (unsigned i = 0; i < 4; ++i) {
+    // If we make it to the beginning of the block, it's safe to clobber
+    // EFLAGS iff EFLAGS is not live-in.
+    if (Iter == B)
+      return !MBB.isLiveIn(X86::EFLAGS);
+
+    --Iter;
+    // Skip over DBG_VALUE.
+    while (Iter != B && Iter->isDebugValue())
+      --Iter;
+
+    bool SawKill = false;
+    for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
+      MachineOperand &MO = Iter->getOperand(j);
+      if (MO.isReg() && MO.getReg() == X86::EFLAGS) {
+        if (MO.isDef()) return MO.isDead();
+        if (MO.isKill()) SawKill = true;
+      }
+    }
+
+    if (SawKill)
+      // This instruction kills EFLAGS and doesn't redefine it, so
+      // there's no need to look further.
+      return true;
+  }
+
+  // Conservative answer.
+  return false;
+}
+
+void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator I,
+                                 unsigned DestReg, unsigned SubIdx,
+                                 const MachineInstr *Orig,
+                                 const TargetRegisterInfo &TRI) const {
+  DebugLoc DL = Orig->getDebugLoc();
+
+  // MOV32r0 etc. are implemented with xor which clobbers condition code.
+  // Re-materialize them as movri instructions to avoid side effects.
+  bool Clone = true;
+  unsigned Opc = Orig->getOpcode();
+  switch (Opc) {
+  default: break;
+  case X86::MOV8r0:
+  case X86::MOV16r0:
+  case X86::MOV32r0:
+  case X86::MOV64r0: {
+    if (!isSafeToClobberEFLAGS(MBB, I)) {
+      switch (Opc) {
+      default: break;
+      case X86::MOV8r0:  Opc = X86::MOV8ri;  break;
+      case X86::MOV16r0: Opc = X86::MOV16ri; break;
+      case X86::MOV32r0: Opc = X86::MOV32ri; break;
+      case X86::MOV64r0: Opc = X86::MOV64ri64i32; break;
+      }
+      Clone = false;
+    }
+    break;
+  }
+  }
+
+  if (Clone) {
+    MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+    MBB.insert(I, MI);
+  } else {
+    BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0);
+  }
+
+  MachineInstr *NewMI = prior(I);
+  NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
+}
+
+/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
+/// is not marked dead.
+static bool hasLiveCondCodeDef(MachineInstr *MI) {
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (MO.isReg() && MO.isDef() &&
+        MO.getReg() == X86::EFLAGS && !MO.isDead()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when
+/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting
+/// to a 32-bit superregister and then truncating back down to a 16-bit
+/// subregister.
+MachineInstr *
+X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
+                                           MachineFunction::iterator &MFI,
+                                           MachineBasicBlock::iterator &MBBI,
+                                           LiveVariables *LV) const {
+  MachineInstr *MI = MBBI;
+  unsigned Dest = MI->getOperand(0).getReg();
+  unsigned Src = MI->getOperand(1).getReg();
+  bool isDead = MI->getOperand(0).isDead();
+  bool isKill = MI->getOperand(1).isKill();
+
+  unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
+    ? X86::LEA64_32r : X86::LEA32r;
+  MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
+  unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+  unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+
+  // Build and insert into an implicit UNDEF value. This is OK because
+  // well be shifting and then extracting the lower 16-bits.
+  // This has the potential to cause partial register stall. e.g.
+  //   movw    (%rbp,%rcx,2), %dx
+  //   leal    -65(%rdx), %esi
+  // But testing has shown this *does* help performance in 64-bit mode (at
+  // least on modern x86 machines).
+  BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
+  MachineInstr *InsMI =
+    BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
+    .addReg(leaInReg, RegState::Define, X86::sub_16bit)
+    .addReg(Src, getKillRegState(isKill));
+
+  MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
+                                    get(Opc), leaOutReg);
+  switch (MIOpc) {
+  default:
+    llvm_unreachable(0);
+    break;
+  case X86::SHL16ri: {
+    unsigned ShAmt = MI->getOperand(2).getImm();
+    MIB.addReg(0).addImm(1 << ShAmt)
+       .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0);
+    break;
+  }
+  case X86::INC16r:
+  case X86::INC64_16r:
+    addRegOffset(MIB, leaInReg, true, 1);
+    break;
+  case X86::DEC16r:
+  case X86::DEC64_16r:
+    addRegOffset(MIB, leaInReg, true, -1);
+    break;
+  case X86::ADD16ri:
+  case X86::ADD16ri8:
+  case X86::ADD16ri_DB:
+  case X86::ADD16ri8_DB:
+    addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
+    break;
+  case X86::ADD16rr:
+  case X86::ADD16rr_DB: {
+    unsigned Src2 = MI->getOperand(2).getReg();
+    bool isKill2 = MI->getOperand(2).isKill();
+    unsigned leaInReg2 = 0;
+    MachineInstr *InsMI2 = 0;
+    if (Src == Src2) {
+      // ADD16rr %reg1028<kill>, %reg1028
+      // just a single insert_subreg.
+      addRegReg(MIB, leaInReg, true, leaInReg, false);
+    } else {
+      leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass);
+      // Build and insert into an implicit UNDEF value. This is OK because
+      // well be shifting and then extracting the lower 16-bits.
+      BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+      InsMI2 =
+        BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
+        .addReg(leaInReg2, RegState::Define, X86::sub_16bit)
+        .addReg(Src2, getKillRegState(isKill2));
+      addRegReg(MIB, leaInReg, true, leaInReg2, true);
+    }
+    if (LV && isKill2 && InsMI2)
+      LV->replaceKillInstruction(Src2, MI, InsMI2);
+    break;
+  }
+  }
+
+  MachineInstr *NewMI = MIB;
+  MachineInstr *ExtMI =
+    BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
+    .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+    .addReg(leaOutReg, RegState::Kill, X86::sub_16bit);
+
+  if (LV) {
+    // Update live variables
+    LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
+    LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
+    if (isKill)
+      LV->replaceKillInstruction(Src, MI, InsMI);
+    if (isDead)
+      LV->replaceKillInstruction(Dest, MI, ExtMI);
+  }
+
+  return ExtMI;
+}
+
+/// convertToThreeAddress - This method must be implemented by targets that
+/// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
+/// may be able to convert a two-address instruction into a true
+/// three-address instruction on demand.  This allows the X86 target (for
+/// example) to convert ADD and SHL instructions into LEA instructions if they
+/// would require register copies due to two-addressness.
+///
+/// This method returns a null pointer if the transformation cannot be
+/// performed, otherwise it returns the new instruction.
+///
+MachineInstr *
+X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+                                    MachineBasicBlock::iterator &MBBI,
+                                    LiveVariables *LV) const {
+  MachineInstr *MI = MBBI;
+  MachineFunction &MF = *MI->getParent()->getParent();
+  // All instructions input are two-addr instructions.  Get the known operands.
+  unsigned Dest = MI->getOperand(0).getReg();
+  unsigned Src = MI->getOperand(1).getReg();
+  bool isDead = MI->getOperand(0).isDead();
+  bool isKill = MI->getOperand(1).isKill();
+
+  MachineInstr *NewMI = NULL;
+  // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's.  When
+  // we have better subtarget support, enable the 16-bit LEA generation here.
+  // 16-bit LEA is also slow on Core2.
+  bool DisableLEA16 = true;
+  bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+
+  unsigned MIOpc = MI->getOpcode();
+  switch (MIOpc) {
+  case X86::SHUFPSrri: {
+    assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!");
+    if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0;
+
+    unsigned B = MI->getOperand(1).getReg();
+    unsigned C = MI->getOperand(2).getReg();
+    if (B != C) return 0;
+    unsigned A = MI->getOperand(0).getReg();
+    unsigned M = MI->getOperand(3).getImm();
+    NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri))
+      .addReg(A, RegState::Define | getDeadRegState(isDead))
+      .addReg(B, getKillRegState(isKill)).addImm(M);
+    break;
+  }
+  case X86::SHL64ri: {
+    assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+    // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+    // the flags produced by a shift yet, so this is safe.
+    unsigned ShAmt = MI->getOperand(2).getImm();
+    if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+    // LEA can't handle RSP.
+    if (TargetRegisterInfo::isVirtualRegister(Src) &&
+        !MF.getRegInfo().constrainRegClass(Src, &X86::GR64_NOSPRegClass))
+      return 0;
+
+    NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+      .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+      .addReg(0).addImm(1 << ShAmt)
+      .addReg(Src, getKillRegState(isKill))
+      .addImm(0).addReg(0);
+    break;
+  }
+  case X86::SHL32ri: {
+    assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+    // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+    // the flags produced by a shift yet, so this is safe.
+    unsigned ShAmt = MI->getOperand(2).getImm();
+    if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+    // LEA can't handle ESP.
+    if (TargetRegisterInfo::isVirtualRegister(Src) &&
+        !MF.getRegInfo().constrainRegClass(Src, &X86::GR32_NOSPRegClass))
+      return 0;
+
+    unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+    NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
+      .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+      .addReg(0).addImm(1 << ShAmt)
+      .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0);
+    break;
+  }
+  case X86::SHL16ri: {
+    assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!");
+    // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses
+    // the flags produced by a shift yet, so this is safe.
+    unsigned ShAmt = MI->getOperand(2).getImm();
+    if (ShAmt == 0 || ShAmt >= 4) return 0;
+
+    if (DisableLEA16)
+      return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+    NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+      .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+      .addReg(0).addImm(1 << ShAmt)
+      .addReg(Src, getKillRegState(isKill))
+      .addImm(0).addReg(0);
+    break;
+  }
+  default: {
+    // The following opcodes also sets the condition code register(s). Only
+    // convert them to equivalent lea if the condition code register def's
+    // are dead!
+    if (hasLiveCondCodeDef(MI))
+      return 0;
+
+    switch (MIOpc) {
+    default: return 0;
+    case X86::INC64r:
+    case X86::INC32r:
+    case X86::INC64_32r: {
+      assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
+      unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
+        : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+
+      // LEA can't handle RSP.
+      if (TargetRegisterInfo::isVirtualRegister(Src) &&
+          !MF.getRegInfo().constrainRegClass(Src,
+                            MIOpc == X86::INC64r ? X86::GR64_NOSPRegisterClass :
+                                                   X86::GR32_NOSPRegisterClass))
+        return 0;
+
+      NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+                              .addReg(Dest, RegState::Define |
+                                      getDeadRegState(isDead)),
+                              Src, isKill, 1);
+      break;
+    }
+    case X86::INC16r:
+    case X86::INC64_16r:
+      if (DisableLEA16)
+        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+      assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
+      NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+                           .addReg(Dest, RegState::Define |
+                                   getDeadRegState(isDead)),
+                           Src, isKill, 1);
+      break;
+    case X86::DEC64r:
+    case X86::DEC32r:
+    case X86::DEC64_32r: {
+      assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
+      unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
+        : (is64Bit ? X86::LEA64_32r : X86::LEA32r);
+      // LEA can't handle RSP.
+      if (TargetRegisterInfo::isVirtualRegister(Src) &&
+          !MF.getRegInfo().constrainRegClass(Src,
+                            MIOpc == X86::DEC64r ? X86::GR64_NOSPRegisterClass :
+                                                   X86::GR32_NOSPRegisterClass))
+        return 0;
+
+      NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+                              .addReg(Dest, RegState::Define |
+                                      getDeadRegState(isDead)),
+                              Src, isKill, -1);
+      break;
+    }
+    case X86::DEC16r:
+    case X86::DEC64_16r:
+      if (DisableLEA16)
+        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+      assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
+      NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+                           .addReg(Dest, RegState::Define |
+                                   getDeadRegState(isDead)),
+                           Src, isKill, -1);
+      break;
+    case X86::ADD64rr:
+    case X86::ADD64rr_DB:
+    case X86::ADD32rr:
+    case X86::ADD32rr_DB: {
+      assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+      unsigned Opc;
+      TargetRegisterClass *RC;
+      if (MIOpc == X86::ADD64rr || MIOpc == X86::ADD64rr_DB) {
+        Opc = X86::LEA64r;
+        RC = X86::GR64_NOSPRegisterClass;
+      } else {
+        Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+        RC = X86::GR32_NOSPRegisterClass;
+      }
+
+
+      unsigned Src2 = MI->getOperand(2).getReg();
+      bool isKill2 = MI->getOperand(2).isKill();
+
+      // LEA can't handle RSP.
+      if (TargetRegisterInfo::isVirtualRegister(Src2) &&
+          !MF.getRegInfo().constrainRegClass(Src2, RC))
+        return 0;
+
+      NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+                        .addReg(Dest, RegState::Define |
+                                getDeadRegState(isDead)),
+                        Src, isKill, Src2, isKill2);
+      if (LV && isKill2)
+        LV->replaceKillInstruction(Src2, MI, NewMI);
+      break;
+    }
+    case X86::ADD16rr:
+    case X86::ADD16rr_DB: {
+      if (DisableLEA16)
+        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+      assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+      unsigned Src2 = MI->getOperand(2).getReg();
+      bool isKill2 = MI->getOperand(2).isKill();
+      NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+                        .addReg(Dest, RegState::Define |
+                                getDeadRegState(isDead)),
+                        Src, isKill, Src2, isKill2);
+      if (LV && isKill2)
+        LV->replaceKillInstruction(Src2, MI, NewMI);
+      break;
+    }
+    case X86::ADD64ri32:
+    case X86::ADD64ri8:
+    case X86::ADD64ri32_DB:
+    case X86::ADD64ri8_DB:
+      assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+      NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+                              .addReg(Dest, RegState::Define |
+                                      getDeadRegState(isDead)),
+                              Src, isKill, MI->getOperand(2).getImm());
+      break;
+    case X86::ADD32ri:
+    case X86::ADD32ri8:
+    case X86::ADD32ri_DB:
+    case X86::ADD32ri8_DB: {
+      assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+      unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+      NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+                              .addReg(Dest, RegState::Define |
+                                      getDeadRegState(isDead)),
+                                Src, isKill, MI->getOperand(2).getImm());
+      break;
+    }
+    case X86::ADD16ri:
+    case X86::ADD16ri8:
+    case X86::ADD16ri_DB:
+    case X86::ADD16ri8_DB:
+      if (DisableLEA16)
+        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+      assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
+      NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+                              .addReg(Dest, RegState::Define |
+                                      getDeadRegState(isDead)),
+                              Src, isKill, MI->getOperand(2).getImm());
+      break;
+    }
+  }
+  }
+
+  if (!NewMI) return 0;
+
+  if (LV) {  // Update live variables
+    if (isKill)
+      LV->replaceKillInstruction(Src, MI, NewMI);
+    if (isDead)
+      LV->replaceKillInstruction(Dest, MI, NewMI);
+  }
+
+  MFI->insert(MBBI, NewMI);          // Insert the new inst
+  return NewMI;
+}
+
+/// commuteInstruction - We have a few instructions that must be hacked on to
+/// commute them.
+///
+MachineInstr *
+X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
+  switch (MI->getOpcode()) {
+  case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I)
+  case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I)
+  case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I)
+  case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I)
+  case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I)
+  case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I)
+    unsigned Opc;
+    unsigned Size;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("Unreachable!");
+    case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break;
+    case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break;
+    case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break;
+    case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break;
+    case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break;
+    case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break;
+    }
+    unsigned Amt = MI->getOperand(3).getImm();
+    if (NewMI) {
+      MachineFunction &MF = *MI->getParent()->getParent();
+      MI = MF.CloneMachineInstr(MI);
+      NewMI = false;
+    }
+    MI->setDesc(get(Opc));
+    MI->getOperand(3).setImm(Size-Amt);
+    return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+  }
+  case X86::CMOVB16rr:
+  case X86::CMOVB32rr:
+  case X86::CMOVB64rr:
+  case X86::CMOVAE16rr:
+  case X86::CMOVAE32rr:
+  case X86::CMOVAE64rr:
+  case X86::CMOVE16rr:
+  case X86::CMOVE32rr:
+  case X86::CMOVE64rr:
+  case X86::CMOVNE16rr:
+  case X86::CMOVNE32rr:
+  case X86::CMOVNE64rr:
+  case X86::CMOVBE16rr:
+  case X86::CMOVBE32rr:
+  case X86::CMOVBE64rr:
+  case X86::CMOVA16rr:
+  case X86::CMOVA32rr:
+  case X86::CMOVA64rr:
+  case X86::CMOVL16rr:
+  case X86::CMOVL32rr:
+  case X86::CMOVL64rr:
+  case X86::CMOVGE16rr:
+  case X86::CMOVGE32rr:
+  case X86::CMOVGE64rr:
+  case X86::CMOVLE16rr:
+  case X86::CMOVLE32rr:
+  case X86::CMOVLE64rr:
+  case X86::CMOVG16rr:
+  case X86::CMOVG32rr:
+  case X86::CMOVG64rr:
+  case X86::CMOVS16rr:
+  case X86::CMOVS32rr:
+  case X86::CMOVS64rr:
+  case X86::CMOVNS16rr:
+  case X86::CMOVNS32rr:
+  case X86::CMOVNS64rr:
+  case X86::CMOVP16rr:
+  case X86::CMOVP32rr:
+  case X86::CMOVP64rr:
+  case X86::CMOVNP16rr:
+  case X86::CMOVNP32rr:
+  case X86::CMOVNP64rr:
+  case X86::CMOVO16rr:
+  case X86::CMOVO32rr:
+  case X86::CMOVO64rr:
+  case X86::CMOVNO16rr:
+  case X86::CMOVNO32rr:
+  case X86::CMOVNO64rr: {
+    unsigned Opc = 0;
+    switch (MI->getOpcode()) {
+    default: break;
+    case X86::CMOVB16rr:  Opc = X86::CMOVAE16rr; break;
+    case X86::CMOVB32rr:  Opc = X86::CMOVAE32rr; break;
+    case X86::CMOVB64rr:  Opc = X86::CMOVAE64rr; break;
+    case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break;
+    case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break;
+    case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break;
+    case X86::CMOVE16rr:  Opc = X86::CMOVNE16rr; break;
+    case X86::CMOVE32rr:  Opc = X86::CMOVNE32rr; break;
+    case X86::CMOVE64rr:  Opc = X86::CMOVNE64rr; break;
+    case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break;
+    case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break;
+    case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break;
+    case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break;
+    case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break;
+    case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break;
+    case X86::CMOVA16rr:  Opc = X86::CMOVBE16rr; break;
+    case X86::CMOVA32rr:  Opc = X86::CMOVBE32rr; break;
+    case X86::CMOVA64rr:  Opc = X86::CMOVBE64rr; break;
+    case X86::CMOVL16rr:  Opc = X86::CMOVGE16rr; break;
+    case X86::CMOVL32rr:  Opc = X86::CMOVGE32rr; break;
+    case X86::CMOVL64rr:  Opc = X86::CMOVGE64rr; break;
+    case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break;
+    case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break;
+    case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break;
+    case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break;
+    case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break;
+    case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break;
+    case X86::CMOVG16rr:  Opc = X86::CMOVLE16rr; break;
+    case X86::CMOVG32rr:  Opc = X86::CMOVLE32rr; break;
+    case X86::CMOVG64rr:  Opc = X86::CMOVLE64rr; break;
+    case X86::CMOVS16rr:  Opc = X86::CMOVNS16rr; break;
+    case X86::CMOVS32rr:  Opc = X86::CMOVNS32rr; break;
+    case X86::CMOVS64rr:  Opc = X86::CMOVNS64rr; break;
+    case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break;
+    case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break;
+    case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break;
+    case X86::CMOVP16rr:  Opc = X86::CMOVNP16rr; break;
+    case X86::CMOVP32rr:  Opc = X86::CMOVNP32rr; break;
+    case X86::CMOVP64rr:  Opc = X86::CMOVNP64rr; break;
+    case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break;
+    case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break;
+    case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break;
+    case X86::CMOVO16rr:  Opc = X86::CMOVNO16rr; break;
+    case X86::CMOVO32rr:  Opc = X86::CMOVNO32rr; break;
+    case X86::CMOVO64rr:  Opc = X86::CMOVNO64rr; break;
+    case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break;
+    case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break;
+    case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break;
+    }
+    if (NewMI) {
+      MachineFunction &MF = *MI->getParent()->getParent();
+      MI = MF.CloneMachineInstr(MI);
+      NewMI = false;
+    }
+    MI->setDesc(get(Opc));
+    // Fallthrough intended.
+  }
+  default:
+    return TargetInstrInfoImpl::commuteInstruction(MI, NewMI);
+  }
+}
+
+static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) {
+  switch (BrOpc) {
+  default: return X86::COND_INVALID;
+  case X86::JE_4:  return X86::COND_E;
+  case X86::JNE_4: return X86::COND_NE;
+  case X86::JL_4:  return X86::COND_L;
+  case X86::JLE_4: return X86::COND_LE;
+  case X86::JG_4:  return X86::COND_G;
+  case X86::JGE_4: return X86::COND_GE;
+  case X86::JB_4:  return X86::COND_B;
+  case X86::JBE_4: return X86::COND_BE;
+  case X86::JA_4:  return X86::COND_A;
+  case X86::JAE_4: return X86::COND_AE;
+  case X86::JS_4:  return X86::COND_S;
+  case X86::JNS_4: return X86::COND_NS;
+  case X86::JP_4:  return X86::COND_P;
+  case X86::JNP_4: return X86::COND_NP;
+  case X86::JO_4:  return X86::COND_O;
+  case X86::JNO_4: return X86::COND_NO;
+  }
+}
+
+unsigned X86::GetCondBranchFromCond(X86::CondCode CC) {
+  switch (CC) {
+  default: llvm_unreachable("Illegal condition code!");
+  case X86::COND_E:  return X86::JE_4;
+  case X86::COND_NE: return X86::JNE_4;
+  case X86::COND_L:  return X86::JL_4;
+  case X86::COND_LE: return X86::JLE_4;
+  case X86::COND_G:  return X86::JG_4;
+  case X86::COND_GE: return X86::JGE_4;
+  case X86::COND_B:  return X86::JB_4;
+  case X86::COND_BE: return X86::JBE_4;
+  case X86::COND_A:  return X86::JA_4;
+  case X86::COND_AE: return X86::JAE_4;
+  case X86::COND_S:  return X86::JS_4;
+  case X86::COND_NS: return X86::JNS_4;
+  case X86::COND_P:  return X86::JP_4;
+  case X86::COND_NP: return X86::JNP_4;
+  case X86::COND_O:  return X86::JO_4;
+  case X86::COND_NO: return X86::JNO_4;
+  }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified condition,
+/// e.g. turning COND_E to COND_NE.
+X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
+  switch (CC) {
+  default: llvm_unreachable("Illegal condition code!");
+  case X86::COND_E:  return X86::COND_NE;
+  case X86::COND_NE: return X86::COND_E;
+  case X86::COND_L:  return X86::COND_GE;
+  case X86::COND_LE: return X86::COND_G;
+  case X86::COND_G:  return X86::COND_LE;
+  case X86::COND_GE: return X86::COND_L;
+  case X86::COND_B:  return X86::COND_AE;
+  case X86::COND_BE: return X86::COND_A;
+  case X86::COND_A:  return X86::COND_BE;
+  case X86::COND_AE: return X86::COND_B;
+  case X86::COND_S:  return X86::COND_NS;
+  case X86::COND_NS: return X86::COND_S;
+  case X86::COND_P:  return X86::COND_NP;
+  case X86::COND_NP: return X86::COND_P;
+  case X86::COND_O:  return X86::COND_NO;
+  case X86::COND_NO: return X86::COND_O;
+  }
+}
+
+bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
+  const TargetInstrDesc &TID = MI->getDesc();
+  if (!TID.isTerminator()) return false;
+
+  // Conditional branch is a special case.
+  if (TID.isBranch() && !TID.isBarrier())
+    return true;
+  if (!TID.isPredicable())
+    return true;
+  return !isPredicated(MI);
+}
+
+bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+                                 MachineBasicBlock *&TBB,
+                                 MachineBasicBlock *&FBB,
+                                 SmallVectorImpl<MachineOperand> &Cond,
+                                 bool AllowModify) const {
+  // Start from the bottom of the block and work up, examining the
+  // terminator instructions.
+  MachineBasicBlock::iterator I = MBB.end();
+  MachineBasicBlock::iterator UnCondBrIter = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+
+    // Working from the bottom, when we see a non-terminator instruction, we're
+    // done.
+    if (!isUnpredicatedTerminator(I))
+      break;
+
+    // A terminator that isn't a branch can't easily be handled by this
+    // analysis.
+    if (!I->getDesc().isBranch())
+      return true;
+
+    // Handle unconditional branches.
+    if (I->getOpcode() == X86::JMP_4) {
+      UnCondBrIter = I;
+
+      if (!AllowModify) {
+        TBB = I->getOperand(0).getMBB();
+        continue;
+      }
+
+      // If the block has any instructions after a JMP, delete them.
+      while (llvm::next(I) != MBB.end())
+        llvm::next(I)->eraseFromParent();
+
+      Cond.clear();
+      FBB = 0;
+
+      // Delete the JMP if it's equivalent to a fall-through.
+      if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
+        TBB = 0;
+        I->eraseFromParent();
+        I = MBB.end();
+        UnCondBrIter = MBB.end();
+        continue;
+      }
+
+      // TBB is used to indicate the unconditional destination.
+      TBB = I->getOperand(0).getMBB();
+      continue;
+    }
+
+    // Handle conditional branches.
+    X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode());
+    if (BranchCode == X86::COND_INVALID)
+      return true;  // Can't handle indirect branch.
+
+    // Working from the bottom, handle the first conditional branch.
+    if (Cond.empty()) {
+      MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
+      if (AllowModify && UnCondBrIter != MBB.end() &&
+          MBB.isLayoutSuccessor(TargetBB)) {
+        // If we can modify the code and it ends in something like:
+        //
+        //     jCC L1
+        //     jmp L2
+        //   L1:
+        //     ...
+        //   L2:
+        //
+        // Then we can change this to:
+        //
+        //     jnCC L2
+        //   L1:
+        //     ...
+        //   L2:
+        //
+        // Which is a bit more efficient.
+        // We conditionally jump to the fall-through block.
+        BranchCode = GetOppositeBranchCondition(BranchCode);
+        unsigned JNCC = GetCondBranchFromCond(BranchCode);
+        MachineBasicBlock::iterator OldInst = I;
+
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(JNCC))
+          .addMBB(UnCondBrIter->getOperand(0).getMBB());
+        BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4))
+          .addMBB(TargetBB);
+        MBB.addSuccessor(TargetBB);
+
+        OldInst->eraseFromParent();
+        UnCondBrIter->eraseFromParent();
+
+        // Restart the analysis.
+        UnCondBrIter = MBB.end();
+        I = MBB.end();
+        continue;
+      }
+
+      FBB = TBB;
+      TBB = I->getOperand(0).getMBB();
+      Cond.push_back(MachineOperand::CreateImm(BranchCode));
+      continue;
+    }
+
+    // Handle subsequent conditional branches. Only handle the case where all
+    // conditional branches branch to the same destination and their condition
+    // opcodes fit one of the special multi-branch idioms.
+    assert(Cond.size() == 1);
+    assert(TBB);
+
+    // Only handle the case where all conditional branches branch to the same
+    // destination.
+    if (TBB != I->getOperand(0).getMBB())
+      return true;
+
+    // If the conditions are the same, we can leave them alone.
+    X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
+    if (OldBranchCode == BranchCode)
+      continue;
+
+    // If they differ, see if they fit one of the known patterns. Theoretically,
+    // we could handle more patterns here, but we shouldn't expect to see them
+    // if instruction selection has done a reasonable job.
+    if ((OldBranchCode == X86::COND_NP &&
+         BranchCode == X86::COND_E) ||
+        (OldBranchCode == X86::COND_E &&
+         BranchCode == X86::COND_NP))
+      BranchCode = X86::COND_NP_OR_E;
+    else if ((OldBranchCode == X86::COND_P &&
+              BranchCode == X86::COND_NE) ||
+             (OldBranchCode == X86::COND_NE &&
+              BranchCode == X86::COND_P))
+      BranchCode = X86::COND_NE_OR_P;
+    else
+      return true;
+
+    // Update the MachineOperand.
+    Cond[0].setImm(BranchCode);
+  }
+
+  return false;
+}
+
+unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  unsigned Count = 0;
+
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    if (I->getOpcode() != X86::JMP_4 &&
+        GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
+      break;
+    // Remove the branch.
+    I->eraseFromParent();
+    I = MBB.end();
+    ++Count;
+  }
+
+  return Count;
+}
+
+unsigned
+X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                           MachineBasicBlock *FBB,
+                           const SmallVectorImpl<MachineOperand> &Cond,
+                           DebugLoc DL) const {
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 1 || Cond.size() == 0) &&
+         "X86 branch conditions have one component!");
+
+  if (Cond.empty()) {
+    // Unconditional branch?
+    assert(!FBB && "Unconditional branch with multiple successors!");
+    BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB);
+    return 1;
+  }
+
+  // Conditional branch.
+  unsigned Count = 0;
+  X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
+  switch (CC) {
+  case X86::COND_NP_OR_E:
+    // Synthesize NP_OR_E with two branches.
+    BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB);
+    ++Count;
+    BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB);
+    ++Count;
+    break;
+  case X86::COND_NE_OR_P:
+    // Synthesize NE_OR_P with two branches.
+    BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB);
+    ++Count;
+    BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB);
+    ++Count;
+    break;
+  default: {
+    unsigned Opc = GetCondBranchFromCond(CC);
+    BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
+    ++Count;
+  }
+  }
+  if (FBB) {
+    // Two-way Conditional branch. Insert the second branch.
+    BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB);
+    ++Count;
+  }
+  return Count;
+}
+
+/// isHReg - Test if the given register is a physical h register.
+static bool isHReg(unsigned Reg) {
+  return X86::GR8_ABCD_HRegClass.contains(Reg);
+}
+
+// Try and copy between VR128/VR64 and GR64 registers.
+static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg) {
+  // SrcReg(VR128) -> DestReg(GR64)
+  // SrcReg(VR64)  -> DestReg(GR64)
+  // SrcReg(GR64)  -> DestReg(VR128)
+  // SrcReg(GR64)  -> DestReg(VR64)
+
+  if (X86::GR64RegClass.contains(DestReg)) {
+    if (X86::VR128RegClass.contains(SrcReg)) {
+      // Copy from a VR128 register to a GR64 register.
+      return X86::MOVPQIto64rr;
+    } else if (X86::VR64RegClass.contains(SrcReg)) {
+      // Copy from a VR64 register to a GR64 register.
+      return X86::MOVSDto64rr;
+    }
+  } else if (X86::GR64RegClass.contains(SrcReg)) {
+    // Copy from a GR64 register to a VR128 register.
+    if (X86::VR128RegClass.contains(DestReg))
+      return X86::MOV64toPQIrr;
+    // Copy from a GR64 register to a VR64 register.
+    else if (X86::VR64RegClass.contains(DestReg))
+      return X86::MOV64toSDrr;
+  }
+
+  return 0;
+}
+
+void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator MI, DebugLoc DL,
+                               unsigned DestReg, unsigned SrcReg,
+                               bool KillSrc) const {
+  // First deal with the normal symmetric copies.
+  unsigned Opc = 0;
+  if (X86::GR64RegClass.contains(DestReg, SrcReg))
+    Opc = X86::MOV64rr;
+  else if (X86::GR32RegClass.contains(DestReg, SrcReg))
+    Opc = X86::MOV32rr;
+  else if (X86::GR16RegClass.contains(DestReg, SrcReg))
+    Opc = X86::MOV16rr;
+  else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
+    // Copying to or from a physical H register on x86-64 requires a NOREX
+    // move.  Otherwise use a normal move.
+    if ((isHReg(DestReg) || isHReg(SrcReg)) &&
+        TM.getSubtarget<X86Subtarget>().is64Bit())
+      Opc = X86::MOV8rr_NOREX;
+    else
+      Opc = X86::MOV8rr;
+  } else if (X86::VR128RegClass.contains(DestReg, SrcReg))
+    Opc = X86::MOVAPSrr;
+  else if (X86::VR64RegClass.contains(DestReg, SrcReg))
+    Opc = X86::MMX_MOVQ64rr;
+  else
+    Opc = CopyToFromAsymmetricReg(DestReg, SrcReg);
+
+  if (Opc) {
+    BuildMI(MBB, MI, DL, get(Opc), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+
+  // Moving EFLAGS to / from another register requires a push and a pop.
+  if (SrcReg == X86::EFLAGS) {
+    if (X86::GR64RegClass.contains(DestReg)) {
+      BuildMI(MBB, MI, DL, get(X86::PUSHF64));
+      BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
+      return;
+    } else if (X86::GR32RegClass.contains(DestReg)) {
+      BuildMI(MBB, MI, DL, get(X86::PUSHF32));
+      BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
+      return;
+    }
+  }
+  if (DestReg == X86::EFLAGS) {
+    if (X86::GR64RegClass.contains(SrcReg)) {
+      BuildMI(MBB, MI, DL, get(X86::PUSH64r))
+        .addReg(SrcReg, getKillRegState(KillSrc));
+      BuildMI(MBB, MI, DL, get(X86::POPF64));
+      return;
+    } else if (X86::GR32RegClass.contains(SrcReg)) {
+      BuildMI(MBB, MI, DL, get(X86::PUSH32r))
+        .addReg(SrcReg, getKillRegState(KillSrc));
+      BuildMI(MBB, MI, DL, get(X86::POPF32));
+      return;
+    }
+  }
+
+  DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
+               << " to " << RI.getName(DestReg) << '\n');
+  llvm_unreachable("Cannot emit physreg copy instruction");
+}
+
+static unsigned getLoadStoreRegOpcode(unsigned Reg,
+                                      const TargetRegisterClass *RC,
+                                      bool isStackAligned,
+                                      const TargetMachine &TM,
+                                      bool load) {
+  switch (RC->getID()) {
+  default:
+    llvm_unreachable("Unknown regclass");
+  case X86::GR64RegClassID:
+  case X86::GR64_ABCDRegClassID:
+  case X86::GR64_NOREXRegClassID:
+  case X86::GR64_NOREX_NOSPRegClassID:
+  case X86::GR64_NOSPRegClassID:
+  case X86::GR64_TCRegClassID:
+  case X86::GR64_TCW64RegClassID:
+    return load ? X86::MOV64rm : X86::MOV64mr;
+  case X86::GR32RegClassID:
+  case X86::GR32_ABCDRegClassID:
+  case X86::GR32_ADRegClassID:
+  case X86::GR32_NOREXRegClassID:
+  case X86::GR32_NOSPRegClassID:
+  case X86::GR32_TCRegClassID:
+    return load ? X86::MOV32rm : X86::MOV32mr;
+  case X86::GR16RegClassID:
+  case X86::GR16_ABCDRegClassID:
+  case X86::GR16_NOREXRegClassID:
+    return load ? X86::MOV16rm : X86::MOV16mr;
+  case X86::GR8RegClassID:
+    // Copying to or from a physical H register on x86-64 requires a NOREX
+    // move.  Otherwise use a normal move.
+    if (isHReg(Reg) &&
+        TM.getSubtarget<X86Subtarget>().is64Bit())
+      return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
+    else
+      return load ? X86::MOV8rm : X86::MOV8mr;
+  case X86::GR8_ABCD_LRegClassID:
+  case X86::GR8_NOREXRegClassID:
+    return load ? X86::MOV8rm :X86::MOV8mr;
+  case X86::GR8_ABCD_HRegClassID:
+    if (TM.getSubtarget<X86Subtarget>().is64Bit())
+      return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
+    else
+      return load ? X86::MOV8rm : X86::MOV8mr;
+  case X86::RFP80RegClassID:
+    return load ? X86::LD_Fp80m : X86::ST_FpP80m;
+  case X86::RFP64RegClassID:
+    return load ? X86::LD_Fp64m : X86::ST_Fp64m;
+  case X86::RFP32RegClassID:
+    return load ? X86::LD_Fp32m : X86::ST_Fp32m;
+  case X86::FR32RegClassID:
+    return load ? X86::MOVSSrm : X86::MOVSSmr;
+  case X86::FR64RegClassID:
+    return load ? X86::MOVSDrm : X86::MOVSDmr;
+  case X86::VR128RegClassID:
+    // If stack is realigned we can use aligned stores.
+    if (isStackAligned)
+      return load ? X86::MOVAPSrm : X86::MOVAPSmr;
+    else
+      return load ? X86::MOVUPSrm : X86::MOVUPSmr;
+  case X86::VR64RegClassID:
+    return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
+  }
+}
+
+static unsigned getStoreRegOpcode(unsigned SrcReg,
+                                  const TargetRegisterClass *RC,
+                                  bool isStackAligned,
+                                  TargetMachine &TM) {
+  return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false);
+}
+
+
+static unsigned getLoadRegOpcode(unsigned DestReg,
+                                 const TargetRegisterClass *RC,
+                                 bool isStackAligned,
+                                 const TargetMachine &TM) {
+  return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true);
+}
+
+void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MI,
+                                       unsigned SrcReg, bool isKill, int FrameIdx,
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
+  const MachineFunction &MF = *MBB.getParent();
+  assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
+         "Stack slot too small for store");
+  bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
+  unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+  DebugLoc DL = MBB.findDebugLoc(MI);
+  addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
+    .addReg(SrcReg, getKillRegState(isKill));
+}
+
+void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+                                  bool isKill,
+                                  SmallVectorImpl<MachineOperand> &Addr,
+                                  const TargetRegisterClass *RC,
+                                  MachineInstr::mmo_iterator MMOBegin,
+                                  MachineInstr::mmo_iterator MMOEnd,
+                                  SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
+  unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
+  DebugLoc DL;
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
+  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+    MIB.addOperand(Addr[i]);
+  MIB.addReg(SrcReg, getKillRegState(isKill));
+  (*MIB).setMemRefs(MMOBegin, MMOEnd);
+  NewMIs.push_back(MIB);
+}
+
+
+void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MI,
+                                        unsigned DestReg, int FrameIdx,
+                                        const TargetRegisterClass *RC,
+                                        const TargetRegisterInfo *TRI) const {
+  const MachineFunction &MF = *MBB.getParent();
+  bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
+  unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+  DebugLoc DL = MBB.findDebugLoc(MI);
+  addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
+}
+
+void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                                 SmallVectorImpl<MachineOperand> &Addr,
+                                 const TargetRegisterClass *RC,
+                                 MachineInstr::mmo_iterator MMOBegin,
+                                 MachineInstr::mmo_iterator MMOEnd,
+                                 SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
+  unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
+  DebugLoc DL;
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+    MIB.addOperand(Addr[i]);
+  (*MIB).setMemRefs(MMOBegin, MMOEnd);
+  NewMIs.push_back(MIB);
+}
+
+MachineInstr*
+X86InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
+                                       int FrameIx, uint64_t Offset,
+                                       const MDNode *MDPtr,
+                                       DebugLoc DL) const {
+  X86AddressMode AM;
+  AM.BaseType = X86AddressMode::FrameIndexBase;
+  AM.Base.FrameIndex = FrameIx;
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(X86::DBG_VALUE));
+  addFullAddress(MIB, AM).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
+static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode,
+                                     const SmallVectorImpl<MachineOperand> &MOs,
+                                     MachineInstr *MI,
+                                     const TargetInstrInfo &TII) {
+  // Create the base instruction with the memory operand as the first part.
+  MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
+                                              MI->getDebugLoc(), true);
+  MachineInstrBuilder MIB(NewMI);
+  unsigned NumAddrOps = MOs.size();
+  for (unsigned i = 0; i != NumAddrOps; ++i)
+    MIB.addOperand(MOs[i]);
+  if (NumAddrOps < 4)  // FrameIndex only
+    addOffset(MIB, 0);
+
+  // Loop over the rest of the ri operands, converting them over.
+  unsigned NumOps = MI->getDesc().getNumOperands()-2;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    MachineOperand &MO = MI->getOperand(i+2);
+    MIB.addOperand(MO);
+  }
+  for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    MIB.addOperand(MO);
+  }
+  return MIB;
+}
+
+static MachineInstr *FuseInst(MachineFunction &MF,
+                              unsigned Opcode, unsigned OpNo,
+                              const SmallVectorImpl<MachineOperand> &MOs,
+                              MachineInstr *MI, const TargetInstrInfo &TII) {
+  MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode),
+                                              MI->getDebugLoc(), true);
+  MachineInstrBuilder MIB(NewMI);
+
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = MI->getOperand(i);
+    if (i == OpNo) {
+      assert(MO.isReg() && "Expected to fold into reg operand!");
+      unsigned NumAddrOps = MOs.size();
+      for (unsigned i = 0; i != NumAddrOps; ++i)
+        MIB.addOperand(MOs[i]);
+      if (NumAddrOps < 4)  // FrameIndex only
+        addOffset(MIB, 0);
+    } else {
+      MIB.addOperand(MO);
+    }
+  }
+  return MIB;
+}
+
+static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
+                                const SmallVectorImpl<MachineOperand> &MOs,
+                                MachineInstr *MI) {
+  MachineFunction &MF = *MI->getParent()->getParent();
+  MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode));
+
+  unsigned NumAddrOps = MOs.size();
+  for (unsigned i = 0; i != NumAddrOps; ++i)
+    MIB.addOperand(MOs[i]);
+  if (NumAddrOps < 4)  // FrameIndex only
+    addOffset(MIB, 0);
+  return MIB.addImm(0);
+}
+
+MachineInstr*
+X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                    MachineInstr *MI, unsigned i,
+                                    const SmallVectorImpl<MachineOperand> &MOs,
+                                    unsigned Size, unsigned Align) const {
+  const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+  bool isTwoAddrFold = false;
+  unsigned NumOps = MI->getDesc().getNumOperands();
+  bool isTwoAddr = NumOps > 1 &&
+    MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
+
+  MachineInstr *NewMI = NULL;
+  // Folding a memory location into the two-address part of a two-address
+  // instruction is different than folding it other places.  It requires
+  // replacing the *two* registers with the memory location.
+  if (isTwoAddr && NumOps >= 2 && i < 2 &&
+      MI->getOperand(0).isReg() &&
+      MI->getOperand(1).isReg() &&
+      MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) {
+    OpcodeTablePtr = &RegOp2MemOpTable2Addr;
+    isTwoAddrFold = true;
+  } else if (i == 0) { // If operand 0
+    if (MI->getOpcode() == X86::MOV64r0)
+      NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI);
+    else if (MI->getOpcode() == X86::MOV32r0)
+      NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI);
+    else if (MI->getOpcode() == X86::MOV16r0)
+      NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI);
+    else if (MI->getOpcode() == X86::MOV8r0)
+      NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI);
+    if (NewMI)
+      return NewMI;
+
+    OpcodeTablePtr = &RegOp2MemOpTable0;
+  } else if (i == 1) {
+    OpcodeTablePtr = &RegOp2MemOpTable1;
+  } else if (i == 2) {
+    OpcodeTablePtr = &RegOp2MemOpTable2;
+  }
+
+  // If table selected...
+  if (OpcodeTablePtr) {
+    // Find the Opcode to fuse
+    DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+      OpcodeTablePtr->find(MI->getOpcode());
+    if (I != OpcodeTablePtr->end()) {
+      unsigned Opcode = I->second.first;
+      unsigned MinAlign = I->second.second;
+      if (Align < MinAlign)
+        return NULL;
+      bool NarrowToMOV32rm = false;
+      if (Size) {
+        unsigned RCSize =  MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize();
+        if (Size < RCSize) {
+          // Check if it's safe to fold the load. If the size of the object is
+          // narrower than the load width, then it's not.
+          if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4)
+            return NULL;
+          // If this is a 64-bit load, but the spill slot is 32, then we can do
+          // a 32-bit load which is implicitly zero-extended. This likely is due
+          // to liveintervalanalysis remat'ing a load from stack slot.
+          if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg())
+            return NULL;
+          Opcode = X86::MOV32rm;
+          NarrowToMOV32rm = true;
+        }
+      }
+
+      if (isTwoAddrFold)
+        NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this);
+      else
+        NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this);
+
+      if (NarrowToMOV32rm) {
+        // If this is the special case where we use a MOV32rm to load a 32-bit
+        // value and zero-extend the top bits. Change the destination register
+        // to a 32-bit one.
+        unsigned DstReg = NewMI->getOperand(0).getReg();
+        if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+          NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
+                                                   X86::sub_32bit));
+        else
+          NewMI->getOperand(0).setSubReg(X86::sub_32bit);
+      }
+      return NewMI;
+    }
+  }
+
+  // No fusion
+  if (PrintFailedFusing && !MI->isCopy())
+    dbgs() << "We failed to fuse operand " << i << " in " << *MI;
+  return NULL;
+}
+
+
+MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                                  MachineInstr *MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                                  int FrameIndex) const {
+  // Check switch flag
+  if (NoFusing) return NULL;
+
+  if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+    switch (MI->getOpcode()) {
+    case X86::CVTSD2SSrr:
+    case X86::Int_CVTSD2SSrr:
+    case X86::CVTSS2SDrr:
+    case X86::Int_CVTSS2SDrr:
+    case X86::RCPSSr:
+    case X86::RCPSSr_Int:
+    case X86::ROUNDSDr:
+    case X86::ROUNDSSr:
+    case X86::RSQRTSSr:
+    case X86::RSQRTSSr_Int:
+    case X86::SQRTSSr:
+    case X86::SQRTSSr_Int:
+      return 0;
+    }
+
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  unsigned Size = MFI->getObjectSize(FrameIndex);
+  unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
+  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+    unsigned NewOpc = 0;
+    unsigned RCSize = 0;
+    switch (MI->getOpcode()) {
+    default: return NULL;
+    case X86::TEST8rr:  NewOpc = X86::CMP8ri; RCSize = 1; break;
+    case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
+    case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
+    case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
+    }
+    // Check if it's safe to fold the load. If the size of the object is
+    // narrower than the load width, then it's not.
+    if (Size < RCSize)
+      return NULL;
+    // Change to CMPXXri r, 0 first.
+    MI->setDesc(get(NewOpc));
+    MI->getOperand(1).ChangeToImmediate(0);
+  } else if (Ops.size() != 1)
+    return NULL;
+
+  SmallVector<MachineOperand,4> MOs;
+  MOs.push_back(MachineOperand::CreateFI(FrameIndex));
+  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment);
+}
+
+MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
+                                                  MachineInstr *MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                                  MachineInstr *LoadMI) const {
+  // Check switch flag
+  if (NoFusing) return NULL;
+
+  if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+    switch (MI->getOpcode()) {
+    case X86::CVTSD2SSrr:
+    case X86::Int_CVTSD2SSrr:
+    case X86::CVTSS2SDrr:
+    case X86::Int_CVTSS2SDrr:
+    case X86::RCPSSr:
+    case X86::RCPSSr_Int:
+    case X86::ROUNDSDr:
+    case X86::ROUNDSSr:
+    case X86::RSQRTSSr:
+    case X86::RSQRTSSr_Int:
+    case X86::SQRTSSr:
+    case X86::SQRTSSr_Int:
+      return 0;
+    }
+
+  // Determine the alignment of the load.
+  unsigned Alignment = 0;
+  if (LoadMI->hasOneMemOperand())
+    Alignment = (*LoadMI->memoperands_begin())->getAlignment();
+  else
+    switch (LoadMI->getOpcode()) {
+    case X86::AVX_SET0PSY:
+    case X86::AVX_SET0PDY:
+      Alignment = 32;
+      break;
+    case X86::V_SET0PS:
+    case X86::V_SET0PD:
+    case X86::V_SET0PI:
+    case X86::V_SETALLONES:
+    case X86::AVX_SET0PS:
+    case X86::AVX_SET0PD:
+    case X86::AVX_SET0PI:
+      Alignment = 16;
+      break;
+    case X86::FsFLD0SD:
+    case X86::VFsFLD0SD:
+      Alignment = 8;
+      break;
+    case X86::FsFLD0SS:
+    case X86::VFsFLD0SS:
+      Alignment = 4;
+      break;
+    default:
+      llvm_unreachable("Don't know how to fold this instruction!");
+    }
+  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+    unsigned NewOpc = 0;
+    switch (MI->getOpcode()) {
+    default: return NULL;
+    case X86::TEST8rr:  NewOpc = X86::CMP8ri; break;
+    case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
+    case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
+    case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
+    }
+    // Change to CMPXXri r, 0 first.
+    MI->setDesc(get(NewOpc));
+    MI->getOperand(1).ChangeToImmediate(0);
+  } else if (Ops.size() != 1)
+    return NULL;
+
+  // Make sure the subregisters match.
+  // Otherwise we risk changing the size of the load.
+  if (LoadMI->getOperand(0).getSubReg() != MI->getOperand(Ops[0]).getSubReg())
+    return NULL;
+
+  SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
+  switch (LoadMI->getOpcode()) {
+  case X86::V_SET0PS:
+  case X86::V_SET0PD:
+  case X86::V_SET0PI:
+  case X86::V_SETALLONES:
+  case X86::AVX_SET0PS:
+  case X86::AVX_SET0PD:
+  case X86::AVX_SET0PI:
+  case X86::AVX_SET0PSY:
+  case X86::AVX_SET0PDY:
+  case X86::FsFLD0SD:
+  case X86::FsFLD0SS: {
+    // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
+    // Create a constant-pool entry and operands to load from it.
+
+    // Medium and large mode can't fold loads this way.
+    if (TM.getCodeModel() != CodeModel::Small &&
+        TM.getCodeModel() != CodeModel::Kernel)
+      return NULL;
+
+    // x86-32 PIC requires a PIC base register for constant pools.
+    unsigned PICBase = 0;
+    if (TM.getRelocationModel() == Reloc::PIC_) {
+      if (TM.getSubtarget<X86Subtarget>().is64Bit())
+        PICBase = X86::RIP;
+      else
+        // FIXME: PICBase = getGlobalBaseReg(&MF);
+        // This doesn't work for several reasons.
+        // 1. GlobalBaseReg may have been spilled.
+        // 2. It may not be live at MI.
+        return NULL;
+    }
+
+    // Create a constant-pool entry.
+    MachineConstantPool &MCP = *MF.getConstantPool();
+    const Type *Ty;
+    unsigned Opc = LoadMI->getOpcode();
+    if (Opc == X86::FsFLD0SS || Opc == X86::VFsFLD0SS)
+      Ty = Type::getFloatTy(MF.getFunction()->getContext());
+    else if (Opc == X86::FsFLD0SD || Opc == X86::VFsFLD0SD)
+      Ty = Type::getDoubleTy(MF.getFunction()->getContext());
+    else if (Opc == X86::AVX_SET0PSY || Opc == X86::AVX_SET0PDY)
+      Ty = VectorType::get(Type::getFloatTy(MF.getFunction()->getContext()), 8);
+    else
+      Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4);
+    const Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ?
+                    Constant::getAllOnesValue(Ty) :
+                    Constant::getNullValue(Ty);
+    unsigned CPI = MCP.getConstantPoolIndex(C, Alignment);
+
+    // Create operands to load from the constant pool entry.
+    MOs.push_back(MachineOperand::CreateReg(PICBase, false));
+    MOs.push_back(MachineOperand::CreateImm(1));
+    MOs.push_back(MachineOperand::CreateReg(0, false));
+    MOs.push_back(MachineOperand::CreateCPI(CPI, 0));
+    MOs.push_back(MachineOperand::CreateReg(0, false));
+    break;
+  }
+  default: {
+    // Folding a normal load. Just copy the load's address operands.
+    unsigned NumOps = LoadMI->getDesc().getNumOperands();
+    for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
+      MOs.push_back(LoadMI->getOperand(i));
+    break;
+  }
+  }
+  return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment);
+}
+
+
+bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
+                                  const SmallVectorImpl<unsigned> &Ops) const {
+  // Check switch flag
+  if (NoFusing) return 0;
+
+  if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
+    switch (MI->getOpcode()) {
+    default: return false;
+    case X86::TEST8rr:
+    case X86::TEST16rr:
+    case X86::TEST32rr:
+    case X86::TEST64rr:
+      return true;
+    }
+  }
+
+  if (Ops.size() != 1)
+    return false;
+
+  unsigned OpNum = Ops[0];
+  unsigned Opc = MI->getOpcode();
+  unsigned NumOps = MI->getDesc().getNumOperands();
+  bool isTwoAddr = NumOps > 1 &&
+    MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
+
+  // Folding a memory location into the two-address part of a two-address
+  // instruction is different than folding it other places.  It requires
+  // replacing the *two* registers with the memory location.
+  const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = 0;
+  if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
+    OpcodeTablePtr = &RegOp2MemOpTable2Addr;
+  } else if (OpNum == 0) { // If operand 0
+    switch (Opc) {
+    case X86::MOV8r0:
+    case X86::MOV16r0:
+    case X86::MOV32r0:
+    case X86::MOV64r0: return true;
+    default: break;
+    }
+    OpcodeTablePtr = &RegOp2MemOpTable0;
+  } else if (OpNum == 1) {
+    OpcodeTablePtr = &RegOp2MemOpTable1;
+  } else if (OpNum == 2) {
+    OpcodeTablePtr = &RegOp2MemOpTable2;
+  }
+
+  if (OpcodeTablePtr && OpcodeTablePtr->count(Opc))
+    return true;
+  return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
+}
+
+bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+                                unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+                                SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+    MemOp2RegOpTable.find(MI->getOpcode());
+  if (I == MemOp2RegOpTable.end())
+    return false;
+  unsigned Opc = I->second.first;
+  unsigned Index = I->second.second & 0xf;
+  bool FoldedLoad = I->second.second & (1 << 4);
+  bool FoldedStore = I->second.second & (1 << 5);
+  if (UnfoldLoad && !FoldedLoad)
+    return false;
+  UnfoldLoad &= FoldedLoad;
+  if (UnfoldStore && !FoldedStore)
+    return false;
+  UnfoldStore &= FoldedStore;
+
+  const TargetInstrDesc &TID = get(Opc);
+  const TargetOperandInfo &TOI = TID.OpInfo[Index];
+  const TargetRegisterClass *RC = TOI.getRegClass(&RI);
+  if (!MI->hasOneMemOperand() &&
+      RC == &X86::VR128RegClass &&
+      !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+    // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
+    // conservatively assume the address is unaligned. That's bad for
+    // performance.
+    return false;
+  SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps;
+  SmallVector<MachineOperand,2> BeforeOps;
+  SmallVector<MachineOperand,2> AfterOps;
+  SmallVector<MachineOperand,4> ImpOps;
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    MachineOperand &Op = MI->getOperand(i);
+    if (i >= Index && i < Index + X86::AddrNumOperands)
+      AddrOps.push_back(Op);
+    else if (Op.isReg() && Op.isImplicit())
+      ImpOps.push_back(Op);
+    else if (i < Index)
+      BeforeOps.push_back(Op);
+    else if (i > Index)
+      AfterOps.push_back(Op);
+  }
+
+  // Emit the load instruction.
+  if (UnfoldLoad) {
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractLoadMemRefs(MI->memoperands_begin(),
+                            MI->memoperands_end());
+    loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
+    if (UnfoldStore) {
+      // Address operands cannot be marked isKill.
+      for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
+        MachineOperand &MO = NewMIs[0]->getOperand(i);
+        if (MO.isReg())
+          MO.setIsKill(false);
+      }
+    }
+  }
+
+  // Emit the data processing instruction.
+  MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true);
+  MachineInstrBuilder MIB(DataMI);
+
+  if (FoldedStore)
+    MIB.addReg(Reg, RegState::Define);
+  for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
+    MIB.addOperand(BeforeOps[i]);
+  if (FoldedLoad)
+    MIB.addReg(Reg);
+  for (unsigned i = 0, e = AfterOps.size(); i != e; ++i)
+    MIB.addOperand(AfterOps[i]);
+  for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) {
+    MachineOperand &MO = ImpOps[i];
+    MIB.addReg(MO.getReg(),
+               getDefRegState(MO.isDef()) |
+               RegState::Implicit |
+               getKillRegState(MO.isKill()) |
+               getDeadRegState(MO.isDead()) |
+               getUndefRegState(MO.isUndef()));
+  }
+  // Change CMP32ri r, 0 back to TEST32rr r, r, etc.
+  unsigned NewOpc = 0;
+  switch (DataMI->getOpcode()) {
+  default: break;
+  case X86::CMP64ri32:
+  case X86::CMP64ri8:
+  case X86::CMP32ri:
+  case X86::CMP32ri8:
+  case X86::CMP16ri:
+  case X86::CMP16ri8:
+  case X86::CMP8ri: {
+    MachineOperand &MO0 = DataMI->getOperand(0);
+    MachineOperand &MO1 = DataMI->getOperand(1);
+    if (MO1.getImm() == 0) {
+      switch (DataMI->getOpcode()) {
+      default: break;
+      case X86::CMP64ri8:
+      case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
+      case X86::CMP32ri8:
+      case X86::CMP32ri:   NewOpc = X86::TEST32rr; break;
+      case X86::CMP16ri8:
+      case X86::CMP16ri:   NewOpc = X86::TEST16rr; break;
+      case X86::CMP8ri:    NewOpc = X86::TEST8rr; break;
+      }
+      DataMI->setDesc(get(NewOpc));
+      MO1.ChangeToRegister(MO0.getReg(), false);
+    }
+  }
+  }
+  NewMIs.push_back(DataMI);
+
+  // Emit the store instruction.
+  if (UnfoldStore) {
+    const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI);
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractStoreMemRefs(MI->memoperands_begin(),
+                             MI->memoperands_end());
+    storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs);
+  }
+
+  return true;
+}
+
+bool
+X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+                                  SmallVectorImpl<SDNode*> &NewNodes) const {
+  if (!N->isMachineOpcode())
+    return false;
+
+  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+    MemOp2RegOpTable.find(N->getMachineOpcode());
+  if (I == MemOp2RegOpTable.end())
+    return false;
+  unsigned Opc = I->second.first;
+  unsigned Index = I->second.second & 0xf;
+  bool FoldedLoad = I->second.second & (1 << 4);
+  bool FoldedStore = I->second.second & (1 << 5);
+  const TargetInstrDesc &TID = get(Opc);
+  const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI);
+  unsigned NumDefs = TID.NumDefs;
+  std::vector<SDValue> AddrOps;
+  std::vector<SDValue> BeforeOps;
+  std::vector<SDValue> AfterOps;
+  DebugLoc dl = N->getDebugLoc();
+  unsigned NumOps = N->getNumOperands();
+  for (unsigned i = 0; i != NumOps-1; ++i) {
+    SDValue Op = N->getOperand(i);
+    if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands)
+      AddrOps.push_back(Op);
+    else if (i < Index-NumDefs)
+      BeforeOps.push_back(Op);
+    else if (i > Index-NumDefs)
+      AfterOps.push_back(Op);
+  }
+  SDValue Chain = N->getOperand(NumOps-1);
+  AddrOps.push_back(Chain);
+
+  // Emit the load instruction.
+  SDNode *Load = 0;
+  MachineFunction &MF = DAG.getMachineFunction();
+  if (FoldedLoad) {
+    EVT VT = *RC->vt_begin();
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+                            cast<MachineSDNode>(N)->memoperands_end());
+    if (!(*MMOs.first) &&
+        RC == &X86::VR128RegClass &&
+        !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+      // Do not introduce a slow unaligned load.
+      return false;
+    bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
+    Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
+                              VT, MVT::Other, &AddrOps[0], AddrOps.size());
+    NewNodes.push_back(Load);
+
+    // Preserve memory reference information.
+    cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
+  }
+
+  // Emit the data processing instruction.
+  std::vector<EVT> VTs;
+  const TargetRegisterClass *DstRC = 0;
+  if (TID.getNumDefs() > 0) {
+    DstRC = TID.OpInfo[0].getRegClass(&RI);
+    VTs.push_back(*DstRC->vt_begin());
+  }
+  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+    EVT VT = N->getValueType(i);
+    if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs())
+      VTs.push_back(VT);
+  }
+  if (Load)
+    BeforeOps.push_back(SDValue(Load, 0));
+  std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps));
+  SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0],
+                                      BeforeOps.size());
+  NewNodes.push_back(NewNode);
+
+  // Emit the store instruction.
+  if (FoldedStore) {
+    AddrOps.pop_back();
+    AddrOps.push_back(SDValue(NewNode, 0));
+    AddrOps.push_back(Chain);
+    std::pair<MachineInstr::mmo_iterator,
+              MachineInstr::mmo_iterator> MMOs =
+      MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+                             cast<MachineSDNode>(N)->memoperands_end());
+    if (!(*MMOs.first) &&
+        RC == &X86::VR128RegClass &&
+        !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+      // Do not introduce a slow unaligned store.
+      return false;
+    bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
+    SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
+                                                         isAligned, TM),
+                                       dl, MVT::Other,
+                                       &AddrOps[0], AddrOps.size());
+    NewNodes.push_back(Store);
+
+    // Preserve memory reference information.
+    cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
+  }
+
+  return true;
+}
+
+unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
+                                      bool UnfoldLoad, bool UnfoldStore,
+                                      unsigned *LoadRegIndex) const {
+  DenseMap<unsigned, std::pair<unsigned,unsigned> >::const_iterator I =
+    MemOp2RegOpTable.find(Opc);
+  if (I == MemOp2RegOpTable.end())
+    return 0;
+  bool FoldedLoad = I->second.second & (1 << 4);
+  bool FoldedStore = I->second.second & (1 << 5);
+  if (UnfoldLoad && !FoldedLoad)
+    return 0;
+  if (UnfoldStore && !FoldedStore)
+    return 0;
+  if (LoadRegIndex)
+    *LoadRegIndex = I->second.second & 0xf;
+  return I->second.first;
+}
+
+bool
+X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+                                     int64_t &Offset1, int64_t &Offset2) const {
+  if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+    return false;
+  unsigned Opc1 = Load1->getMachineOpcode();
+  unsigned Opc2 = Load2->getMachineOpcode();
+  switch (Opc1) {
+  default: return false;
+  case X86::MOV8rm:
+  case X86::MOV16rm:
+  case X86::MOV32rm:
+  case X86::MOV64rm:
+  case X86::LD_Fp32m:
+  case X86::LD_Fp64m:
+  case X86::LD_Fp80m:
+  case X86::MOVSSrm:
+  case X86::MOVSDrm:
+  case X86::MMX_MOVD64rm:
+  case X86::MMX_MOVQ64rm:
+  case X86::FsMOVAPSrm:
+  case X86::FsMOVAPDrm:
+  case X86::MOVAPSrm:
+  case X86::MOVUPSrm:
+  case X86::MOVUPSrm_Int:
+  case X86::MOVAPDrm:
+  case X86::MOVDQArm:
+  case X86::MOVDQUrm:
+  case X86::MOVDQUrm_Int:
+    break;
+  }
+  switch (Opc2) {
+  default: return false;
+  case X86::MOV8rm:
+  case X86::MOV16rm:
+  case X86::MOV32rm:
+  case X86::MOV64rm:
+  case X86::LD_Fp32m:
+  case X86::LD_Fp64m:
+  case X86::LD_Fp80m:
+  case X86::MOVSSrm:
+  case X86::MOVSDrm:
+  case X86::MMX_MOVD64rm:
+  case X86::MMX_MOVQ64rm:
+  case X86::FsMOVAPSrm:
+  case X86::FsMOVAPDrm:
+  case X86::MOVAPSrm:
+  case X86::MOVUPSrm:
+  case X86::MOVUPSrm_Int:
+  case X86::MOVAPDrm:
+  case X86::MOVDQArm:
+  case X86::MOVDQUrm:
+  case X86::MOVDQUrm_Int:
+    break;
+  }
+
+  // Check if chain operands and base addresses match.
+  if (Load1->getOperand(0) != Load2->getOperand(0) ||
+      Load1->getOperand(5) != Load2->getOperand(5))
+    return false;
+  // Segment operands should match as well.
+  if (Load1->getOperand(4) != Load2->getOperand(4))
+    return false;
+  // Scale should be 1, Index should be Reg0.
+  if (Load1->getOperand(1) == Load2->getOperand(1) &&
+      Load1->getOperand(2) == Load2->getOperand(2)) {
+    if (cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue() != 1)
+      return false;
+
+    // Now let's examine the displacements.
+    if (isa<ConstantSDNode>(Load1->getOperand(3)) &&
+        isa<ConstantSDNode>(Load2->getOperand(3))) {
+      Offset1 = cast<ConstantSDNode>(Load1->getOperand(3))->getSExtValue();
+      Offset2 = cast<ConstantSDNode>(Load2->getOperand(3))->getSExtValue();
+      return true;
+    }
+  }
+  return false;
+}
+
+bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+                                           int64_t Offset1, int64_t Offset2,
+                                           unsigned NumLoads) const {
+  assert(Offset2 > Offset1);
+  if ((Offset2 - Offset1) / 8 > 64)
+    return false;
+
+  unsigned Opc1 = Load1->getMachineOpcode();
+  unsigned Opc2 = Load2->getMachineOpcode();
+  if (Opc1 != Opc2)
+    return false;  // FIXME: overly conservative?
+
+  switch (Opc1) {
+  default: break;
+  case X86::LD_Fp32m:
+  case X86::LD_Fp64m:
+  case X86::LD_Fp80m:
+  case X86::MMX_MOVD64rm:
+  case X86::MMX_MOVQ64rm:
+    return false;
+  }
+
+  EVT VT = Load1->getValueType(0);
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:
+    // XMM registers. In 64-bit mode we can be a bit more aggressive since we
+    // have 16 of them to play with.
+    if (TM.getSubtargetImpl()->is64Bit()) {
+      if (NumLoads >= 3)
+        return false;
+    } else if (NumLoads) {
+      return false;
+    }
+    break;
+  case MVT::i8:
+  case MVT::i16:
+  case MVT::i32:
+  case MVT::i64:
+  case MVT::f32:
+  case MVT::f64:
+    if (NumLoads)
+      return false;
+    break;
+  }
+
+  return true;
+}
+
+
+bool X86InstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert(Cond.size() == 1 && "Invalid X86 branch condition!");
+  X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
+  if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E)
+    return true;
+  Cond[0].setImm(GetOppositeBranchCondition(CC));
+  return false;
+}
+
+bool X86InstrInfo::
+isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const {
+  // FIXME: Return false for x87 stack register classes for now. We can't
+  // allow any loads of these registers before FpGet_ST0_80.
+  return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass ||
+           RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass);
+}
+
+
+/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or higher)
+/// register?  e.g. r8, xmm8, xmm13, etc.
+bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
+  switch (RegNo) {
+  default: break;
+  case X86::R8:    case X86::R9:    case X86::R10:   case X86::R11:
+  case X86::R12:   case X86::R13:   case X86::R14:   case X86::R15:
+  case X86::R8D:   case X86::R9D:   case X86::R10D:  case X86::R11D:
+  case X86::R12D:  case X86::R13D:  case X86::R14D:  case X86::R15D:
+  case X86::R8W:   case X86::R9W:   case X86::R10W:  case X86::R11W:
+  case X86::R12W:  case X86::R13W:  case X86::R14W:  case X86::R15W:
+  case X86::R8B:   case X86::R9B:   case X86::R10B:  case X86::R11B:
+  case X86::R12B:  case X86::R13B:  case X86::R14B:  case X86::R15B:
+  case X86::XMM8:  case X86::XMM9:  case X86::XMM10: case X86::XMM11:
+  case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+  case X86::YMM8:  case X86::YMM9:  case X86::YMM10: case X86::YMM11:
+  case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
+  case X86::CR8:   case X86::CR9:   case X86::CR10:  case X86::CR11:
+  case X86::CR12:  case X86::CR13:  case X86::CR14:  case X86::CR15:
+    return true;
+  }
+  return false;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
+///
+unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+  assert(!TM.getSubtarget<X86Subtarget>().is64Bit() &&
+         "X86-64 PIC uses RIP relative addressing");
+
+  X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>();
+  unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+  if (GlobalBaseReg != 0)
+    return GlobalBaseReg;
+
+  // Create the register. The code to initialize it is inserted
+  // later, by the CGBR pass (below).
+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
+  GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+  X86FI->setGlobalBaseReg(GlobalBaseReg);
+  return GlobalBaseReg;
+}
+
+// These are the replaceable SSE instructions. Some of these have Int variants
+// that we don't include here. We don't want to replace instructions selected
+// by intrinsics.
+static const unsigned ReplaceableInstrs[][3] = {
+  //PackedSingle     PackedDouble    PackedInt
+  { X86::MOVAPSmr,   X86::MOVAPDmr,  X86::MOVDQAmr  },
+  { X86::MOVAPSrm,   X86::MOVAPDrm,  X86::MOVDQArm  },
+  { X86::MOVAPSrr,   X86::MOVAPDrr,  X86::MOVDQArr  },
+  { X86::MOVUPSmr,   X86::MOVUPDmr,  X86::MOVDQUmr  },
+  { X86::MOVUPSrm,   X86::MOVUPDrm,  X86::MOVDQUrm  },
+  { X86::MOVNTPSmr,  X86::MOVNTPDmr, X86::MOVNTDQmr },
+  { X86::ANDNPSrm,   X86::ANDNPDrm,  X86::PANDNrm   },
+  { X86::ANDNPSrr,   X86::ANDNPDrr,  X86::PANDNrr   },
+  { X86::ANDPSrm,    X86::ANDPDrm,   X86::PANDrm    },
+  { X86::ANDPSrr,    X86::ANDPDrr,   X86::PANDrr    },
+  { X86::ORPSrm,     X86::ORPDrm,    X86::PORrm     },
+  { X86::ORPSrr,     X86::ORPDrr,    X86::PORrr     },
+  { X86::V_SET0PS,   X86::V_SET0PD,  X86::V_SET0PI  },
+  { X86::XORPSrm,    X86::XORPDrm,   X86::PXORrm    },
+  { X86::XORPSrr,    X86::XORPDrr,   X86::PXORrr    },
+  // AVX 128-bit support
+  { X86::VMOVAPSmr,  X86::VMOVAPDmr,  X86::VMOVDQAmr  },
+  { X86::VMOVAPSrm,  X86::VMOVAPDrm,  X86::VMOVDQArm  },
+  { X86::VMOVAPSrr,  X86::VMOVAPDrr,  X86::VMOVDQArr  },
+  { X86::VMOVUPSmr,  X86::VMOVUPDmr,  X86::VMOVDQUmr  },
+  { X86::VMOVUPSrm,  X86::VMOVUPDrm,  X86::VMOVDQUrm  },
+  { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
+  { X86::VANDNPSrm,  X86::VANDNPDrm,  X86::VPANDNrm   },
+  { X86::VANDNPSrr,  X86::VANDNPDrr,  X86::VPANDNrr   },
+  { X86::VANDPSrm,   X86::VANDPDrm,   X86::VPANDrm    },
+  { X86::VANDPSrr,   X86::VANDPDrr,   X86::VPANDrr    },
+  { X86::VORPSrm,    X86::VORPDrm,    X86::VPORrm     },
+  { X86::VORPSrr,    X86::VORPDrr,    X86::VPORrr     },
+  { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI },
+  { X86::VXORPSrm,   X86::VXORPDrm,   X86::VPXORrm    },
+  { X86::VXORPSrr,   X86::VXORPDrr,   X86::VPXORrr    },
+};
+
+// FIXME: Some shuffle and unpack instructions have equivalents in different
+// domains, but they require a bit more work than just switching opcodes.
+
+static const unsigned *lookup(unsigned opcode, unsigned domain) {
+  for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
+    if (ReplaceableInstrs[i][domain-1] == opcode)
+      return ReplaceableInstrs[i];
+  return 0;
+}
+
+std::pair<uint16_t, uint16_t>
+X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const {
+  uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+  return std::make_pair(domain,
+                        domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
+}
+
+void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const {
+  assert(Domain>0 && Domain<4 && "Invalid execution domain");
+  uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+  assert(dom && "Not an SSE instruction");
+  const unsigned *table = lookup(MI->getOpcode(), dom);
+  assert(table && "Cannot change domain");
+  MI->setDesc(get(table[Domain-1]));
+}
+
+/// getNoopForMachoTarget - Return the noop instruction to use for a noop.
+void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
+  NopInst.setOpcode(X86::NOOP);
+}
+
+bool X86InstrInfo::isHighLatencyDef(int opc) const {
+  switch (opc) {
+  default: return false;
+  case X86::DIVSDrm:
+  case X86::DIVSDrm_Int:
+  case X86::DIVSDrr:
+  case X86::DIVSDrr_Int:
+  case X86::DIVSSrm:
+  case X86::DIVSSrm_Int:
+  case X86::DIVSSrr:
+  case X86::DIVSSrr_Int:
+  case X86::SQRTPDm:
+  case X86::SQRTPDm_Int:
+  case X86::SQRTPDr:
+  case X86::SQRTPDr_Int:
+  case X86::SQRTPSm:
+  case X86::SQRTPSm_Int:
+  case X86::SQRTPSr:
+  case X86::SQRTPSr_Int:
+  case X86::SQRTSDm:
+  case X86::SQRTSDm_Int:
+  case X86::SQRTSDr:
+  case X86::SQRTSDr_Int:
+  case X86::SQRTSSm:
+  case X86::SQRTSSm_Int:
+  case X86::SQRTSSr:
+  case X86::SQRTSSr_Int:
+    return true;
+  }
+}
+
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+                      const MachineRegisterInfo *MRI,
+                      const MachineInstr *DefMI, unsigned DefIdx,
+                      const MachineInstr *UseMI, unsigned UseIdx) const {
+  return isHighLatencyDef(DefMI->getOpcode());
+}
+
+namespace {
+  /// CGBR - Create Global Base Reg pass. This initializes the PIC
+  /// global base register for x86-32.
+  struct CGBR : public MachineFunctionPass {
+    static char ID;
+    CGBR() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      const X86TargetMachine *TM =
+        static_cast<const X86TargetMachine *>(&MF.getTarget());
+
+      assert(!TM->getSubtarget<X86Subtarget>().is64Bit() &&
+             "X86-64 PIC uses RIP relative addressing");
+
+      // Only emit a global base reg in PIC mode.
+      if (TM->getRelocationModel() != Reloc::PIC_)
+        return false;
+
+      X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+      unsigned GlobalBaseReg = X86FI->getGlobalBaseReg();
+
+      // If we didn't need a GlobalBaseReg, don't insert code.
+      if (GlobalBaseReg == 0)
+        return false;
+
+      // Insert the set of GlobalBaseReg into the first MBB of the function
+      MachineBasicBlock &FirstMBB = MF.front();
+      MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+      DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
+      MachineRegisterInfo &RegInfo = MF.getRegInfo();
+      const X86InstrInfo *TII = TM->getInstrInfo();
+
+      unsigned PC;
+      if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
+        PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+      else
+        PC = GlobalBaseReg;
+
+      // Operand of MovePCtoStack is completely ignored by asm printer. It's
+      // only used in JIT code emission as displacement to pc.
+      BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
+
+      // If we're using vanilla 'GOT' PIC style, we should use relative addressing
+      // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
+      if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
+        // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
+        BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
+          .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
+                                        X86II::MO_GOT_ABSOLUTE_ADDRESS);
+      }
+
+      return true;
+    }
+
+    virtual const char *getPassName() const {
+      return "X86 PIC Global Base Reg Initialization";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+}
+
+char CGBR::ID = 0;
+FunctionPass*
+llvm::createGlobalBaseRegPass() { return new CGBR(); }
diff --git a/final/lib/Target/X86/X86InstrInfo.h b/final/lib/Target/X86/X86InstrInfo.h
new file mode 100644
index 00000000000..81bbd3dd031
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrInfo.h
@@ -0,0 +1,882 @@
+//===- X86InstrInfo.h - X86 Instruction Information ------------*- C++ -*- ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86INSTRUCTIONINFO_H
+#define X86INSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "X86.h"
+#include "X86RegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+  class X86RegisterInfo;
+  class X86TargetMachine;
+
+namespace X86 {
+  // Enums for memory operand decoding.  Each memory operand is represented with
+  // a 5 operand sequence in the form:
+  //   [BaseReg, ScaleAmt, IndexReg, Disp, Segment]
+  // These enums help decode this.
+  enum {
+    AddrBaseReg = 0,
+    AddrScaleAmt = 1,
+    AddrIndexReg = 2,
+    AddrDisp = 3,
+
+    /// AddrSegmentReg - The operand # of the segment in the memory operand.
+    AddrSegmentReg = 4,
+
+    /// AddrNumOperands - Total number of operands in a memory reference.
+    AddrNumOperands = 5
+  };
+
+
+  // X86 specific condition code. These correspond to X86_*_COND in
+  // X86InstrInfo.td. They must be kept in synch.
+  enum CondCode {
+    COND_A  = 0,
+    COND_AE = 1,
+    COND_B  = 2,
+    COND_BE = 3,
+    COND_E  = 4,
+    COND_G  = 5,
+    COND_GE = 6,
+    COND_L  = 7,
+    COND_LE = 8,
+    COND_NE = 9,
+    COND_NO = 10,
+    COND_NP = 11,
+    COND_NS = 12,
+    COND_O  = 13,
+    COND_P  = 14,
+    COND_S  = 15,
+
+    // Artificial condition codes. These are used by AnalyzeBranch
+    // to indicate a block terminated with two conditional branches to
+    // the same location. This occurs in code using FCMP_OEQ or FCMP_UNE,
+    // which can't be represented on x86 with a single condition. These
+    // are never used in MachineInstrs.
+    COND_NE_OR_P,
+    COND_NP_OR_E,
+
+    COND_INVALID
+  };
+
+  // Turn condition code into conditional branch opcode.
+  unsigned GetCondBranchFromCond(CondCode CC);
+
+  /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+  /// e.g. turning COND_E to COND_NE.
+  CondCode GetOppositeBranchCondition(X86::CondCode CC);
+
+}
+
+/// X86II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace X86II {
+  /// Target Operand Flag enum.
+  enum TOF {
+    //===------------------------------------------------------------------===//
+    // X86 Specific MachineOperand flags.
+
+    MO_NO_FLAG,
+
+    /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
+    /// relocation of:
+    ///    SYMBOL_LABEL + [. - PICBASELABEL]
+    MO_GOT_ABSOLUTE_ADDRESS,
+
+    /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
+    /// immediate should get the value of the symbol minus the PIC base label:
+    ///    SYMBOL_LABEL - PICBASELABEL
+    MO_PIC_BASE_OFFSET,
+
+    /// MO_GOT - On a symbol operand this indicates that the immediate is the
+    /// offset to the GOT entry for the symbol name from the base of the GOT.
+    ///
+    /// See the X86-64 ELF ABI supplement for more details.
+    ///    SYMBOL_LABEL @GOT
+    MO_GOT,
+
+    /// MO_GOTOFF - On a symbol operand this indicates that the immediate is
+    /// the offset to the location of the symbol name from the base of the GOT.
+    ///
+    /// See the X86-64 ELF ABI supplement for more details.
+    ///    SYMBOL_LABEL @GOTOFF
+    MO_GOTOFF,
+
+    /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
+    /// offset to the GOT entry for the symbol name from the current code
+    /// location.
+    ///
+    /// See the X86-64 ELF ABI supplement for more details.
+    ///    SYMBOL_LABEL @GOTPCREL
+    MO_GOTPCREL,
+
+    /// MO_PLT - On a symbol operand this indicates that the immediate is
+    /// offset to the PLT entry of symbol name from the current code location.
+    ///
+    /// See the X86-64 ELF ABI supplement for more details.
+    ///    SYMBOL_LABEL @PLT
+    MO_PLT,
+
+    /// MO_TLSGD - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details.
+    ///    SYMBOL_LABEL @TLSGD
+    MO_TLSGD,
+
+    /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details.
+    ///    SYMBOL_LABEL @GOTTPOFF
+    MO_GOTTPOFF,
+
+    /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details.
+    ///    SYMBOL_LABEL @INDNTPOFF
+    MO_INDNTPOFF,
+
+    /// MO_TPOFF - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details.
+    ///    SYMBOL_LABEL @TPOFF
+    MO_TPOFF,
+
+    /// MO_NTPOFF - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details.
+    ///    SYMBOL_LABEL @NTPOFF
+    MO_NTPOFF,
+
+    /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
+    /// reference is actually to the "__imp_FOO" symbol.  This is used for
+    /// dllimport linkage on windows.
+    MO_DLLIMPORT,
+
+    /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
+    /// reference is actually to the "FOO$stub" symbol.  This is used for calls
+    /// and jumps to external functions on Tiger and earlier.
+    MO_DARWIN_STUB,
+
+    /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
+    /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
+    /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+    MO_DARWIN_NONLAZY,
+
+    /// MO_DARWIN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this indicates
+    /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
+    /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
+    MO_DARWIN_NONLAZY_PIC_BASE,
+
+    /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this
+    /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
+    /// which is a PIC-base-relative reference to a hidden dyld lazy pointer
+    /// stub.
+    MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE,
+
+    /// MO_TLVP - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// This is the TLS offset for the Darwin TLS mechanism.
+    MO_TLVP,
+
+    /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
+    /// is some TLS offset from the picbase.
+    ///
+    /// This is the 32-bit TLS offset for Darwin TLS in PIC mode.
+    MO_TLVP_PIC_BASE
+  };
+}
+
+/// isGlobalStubReference - Return true if the specified TargetFlag operand is
+/// a reference to a stub for a global, not the global itself.
+inline static bool isGlobalStubReference(unsigned char TargetFlag) {
+  switch (TargetFlag) {
+  case X86II::MO_DLLIMPORT: // dllimport stub.
+  case X86II::MO_GOTPCREL:  // rip-relative GOT reference.
+  case X86II::MO_GOT:       // normal GOT reference.
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:        // Normal $non_lazy_ptr ref.
+  case X86II::MO_DARWIN_NONLAZY:                 // Normal $non_lazy_ptr ref.
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Hidden $non_lazy_ptr ref.
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// isGlobalRelativeToPICBase - Return true if the specified global value
+/// reference is relative to a 32-bit PIC base (X86ISD::GlobalBaseReg).  If this
+/// is true, the addressing mode has the PIC base register added in (e.g. EBX).
+inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) {
+  switch (TargetFlag) {
+  case X86II::MO_GOTOFF:                         // isPICStyleGOT: local global.
+  case X86II::MO_GOT:                            // isPICStyleGOT: other global.
+  case X86II::MO_PIC_BASE_OFFSET:                // Darwin local global.
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:        // Darwin/32 external global.
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Darwin/32 hidden global.
+  case X86II::MO_TLVP:                           // ??? Pretty sure..
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// X86II - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace X86II {
+  enum {
+    //===------------------------------------------------------------------===//
+    // Instruction encodings.  These are the standard/most common forms for X86
+    // instructions.
+    //
+
+    // PseudoFrm - This represents an instruction that is a pseudo instruction
+    // or one that has not been implemented yet.  It is illegal to code generate
+    // it, but tolerated for intermediate implementation stages.
+    Pseudo         = 0,
+
+    /// Raw - This form is for instructions that don't have any operands, so
+    /// they are just a fixed opcode value, like 'leave'.
+    RawFrm         = 1,
+
+    /// AddRegFrm - This form is used for instructions like 'push r32' that have
+    /// their one register operand added to their opcode.
+    AddRegFrm      = 2,
+
+    /// MRMDestReg - This form is used for instructions that use the Mod/RM byte
+    /// to specify a destination, which in this case is a register.
+    ///
+    MRMDestReg     = 3,
+
+    /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
+    /// to specify a destination, which in this case is memory.
+    ///
+    MRMDestMem     = 4,
+
+    /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
+    /// to specify a source, which in this case is a register.
+    ///
+    MRMSrcReg      = 5,
+
+    /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
+    /// to specify a source, which in this case is memory.
+    ///
+    MRMSrcMem      = 6,
+
+    /// MRM[0-7][rm] - These forms are used to represent instructions that use
+    /// a Mod/RM byte, and use the middle field to hold extended opcode
+    /// information.  In the intel manual these are represented as /0, /1, ...
+    ///
+
+    // First, instructions that operate on a register r/m operand...
+    MRM0r = 16,  MRM1r = 17,  MRM2r = 18,  MRM3r = 19, // Format /0 /1 /2 /3
+    MRM4r = 20,  MRM5r = 21,  MRM6r = 22,  MRM7r = 23, // Format /4 /5 /6 /7
+
+    // Next, instructions that operate on a memory r/m operand...
+    MRM0m = 24,  MRM1m = 25,  MRM2m = 26,  MRM3m = 27, // Format /0 /1 /2 /3
+    MRM4m = 28,  MRM5m = 29,  MRM6m = 30,  MRM7m = 31, // Format /4 /5 /6 /7
+
+    // MRMInitReg - This form is used for instructions whose source and
+    // destinations are the same register.
+    MRMInitReg = 32,
+
+    //// MRM_C1 - A mod/rm byte of exactly 0xC1.
+    MRM_C1 = 33,
+    MRM_C2 = 34,
+    MRM_C3 = 35,
+    MRM_C4 = 36,
+    MRM_C8 = 37,
+    MRM_C9 = 38,
+    MRM_E8 = 39,
+    MRM_F0 = 40,
+    MRM_F8 = 41,
+    MRM_F9 = 42,
+    MRM_D0 = 45,
+    MRM_D1 = 46,
+
+    /// RawFrmImm8 - This is used for the ENTER instruction, which has two
+    /// immediates, the first of which is a 16-bit immediate (specified by
+    /// the imm encoding) and the second is a 8-bit fixed value.
+    RawFrmImm8 = 43,
+
+    /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
+    /// immediates, the first of which is a 16 or 32-bit immediate (specified by
+    /// the imm encoding) and the second is a 16-bit fixed value.  In the AMD
+    /// manual, this operand is described as pntr16:32 and pntr16:16
+    RawFrmImm16 = 44,
+
+    FormMask       = 63,
+
+    //===------------------------------------------------------------------===//
+    // Actual flags...
+
+    // OpSize - Set if this instruction requires an operand size prefix (0x66),
+    // which most often indicates that the instruction operates on 16 bit data
+    // instead of 32 bit data.
+    OpSize      = 1 << 6,
+
+    // AsSize - Set if this instruction requires an operand size prefix (0x67),
+    // which most often indicates that the instruction address 16 bit address
+    // instead of 32 bit address (or 32 bit address in 64 bit mode).
+    AdSize      = 1 << 7,
+
+    //===------------------------------------------------------------------===//
+    // Op0Mask - There are several prefix bytes that are used to form two byte
+    // opcodes.  These are currently 0x0F, 0xF3, and 0xD8-0xDF.  This mask is
+    // used to obtain the setting of this field.  If no bits in this field is
+    // set, there is no prefix byte for obtaining a multibyte opcode.
+    //
+    Op0Shift    = 8,
+    Op0Mask     = 0xF << Op0Shift,
+
+    // TB - TwoByte - Set if this instruction has a two byte opcode, which
+    // starts with a 0x0F byte before the real opcode.
+    TB          = 1 << Op0Shift,
+
+    // REP - The 0xF3 prefix byte indicating repetition of the following
+    // instruction.
+    REP         = 2 << Op0Shift,
+
+    // D8-DF - These escape opcodes are used by the floating point unit.  These
+    // values must remain sequential.
+    D8 = 3 << Op0Shift,   D9 = 4 << Op0Shift,
+    DA = 5 << Op0Shift,   DB = 6 << Op0Shift,
+    DC = 7 << Op0Shift,   DD = 8 << Op0Shift,
+    DE = 9 << Op0Shift,   DF = 10 << Op0Shift,
+
+    // XS, XD - These prefix codes are for single and double precision scalar
+    // floating point operations performed in the SSE registers.
+    XD = 11 << Op0Shift,  XS = 12 << Op0Shift,
+
+    // T8, TA - Prefix after the 0x0F prefix.
+    T8 = 13 << Op0Shift,  TA = 14 << Op0Shift,
+
+    // TF - Prefix before and after 0x0F
+    TF = 15 << Op0Shift,
+
+    //===------------------------------------------------------------------===//
+    // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
+    // They are used to specify GPRs and SSE registers, 64-bit operand size,
+    // etc. We only cares about REX.W and REX.R bits and only the former is
+    // statically determined.
+    //
+    REXShift    = 12,
+    REX_W       = 1 << REXShift,
+
+    //===------------------------------------------------------------------===//
+    // This three-bit field describes the size of an immediate operand.  Zero is
+    // unused so that we can tell if we forgot to set a value.
+    ImmShift = 13,
+    ImmMask    = 7 << ImmShift,
+    Imm8       = 1 << ImmShift,
+    Imm8PCRel  = 2 << ImmShift,
+    Imm16      = 3 << ImmShift,
+    Imm16PCRel = 4 << ImmShift,
+    Imm32      = 5 << ImmShift,
+    Imm32PCRel = 6 << ImmShift,
+    Imm64      = 7 << ImmShift,
+
+    //===------------------------------------------------------------------===//
+    // FP Instruction Classification...  Zero is non-fp instruction.
+
+    // FPTypeMask - Mask for all of the FP types...
+    FPTypeShift = 16,
+    FPTypeMask  = 7 << FPTypeShift,
+
+    // NotFP - The default, set for instructions that do not use FP registers.
+    NotFP      = 0 << FPTypeShift,
+
+    // ZeroArgFP - 0 arg FP instruction which implicitly pushes ST(0), f.e. fld0
+    ZeroArgFP  = 1 << FPTypeShift,
+
+    // OneArgFP - 1 arg FP instructions which implicitly read ST(0), such as fst
+    OneArgFP   = 2 << FPTypeShift,
+
+    // OneArgFPRW - 1 arg FP instruction which implicitly read ST(0) and write a
+    // result back to ST(0).  For example, fcos, fsqrt, etc.
+    //
+    OneArgFPRW = 3 << FPTypeShift,
+
+    // TwoArgFP - 2 arg FP instructions which implicitly read ST(0), and an
+    // explicit argument, storing the result to either ST(0) or the implicit
+    // argument.  For example: fadd, fsub, fmul, etc...
+    TwoArgFP   = 4 << FPTypeShift,
+
+    // CompareFP - 2 arg FP instructions which implicitly read ST(0) and an
+    // explicit argument, but have no destination.  Example: fucom, fucomi, ...
+    CompareFP  = 5 << FPTypeShift,
+
+    // CondMovFP - "2 operand" floating point conditional move instructions.
+    CondMovFP  = 6 << FPTypeShift,
+
+    // SpecialFP - Special instruction forms.  Dispatch by opcode explicitly.
+    SpecialFP  = 7 << FPTypeShift,
+
+    // Lock prefix
+    LOCKShift = 19,
+    LOCK = 1 << LOCKShift,
+
+    // Segment override prefixes. Currently we just need ability to address
+    // stuff in gs and fs segments.
+    SegOvrShift = 20,
+    SegOvrMask  = 3 << SegOvrShift,
+    FS          = 1 << SegOvrShift,
+    GS          = 2 << SegOvrShift,
+
+    // Execution domain for SSE instructions in bits 22, 23.
+    // 0 in bits 22-23 means normal, non-SSE instruction.
+    SSEDomainShift = 22,
+
+    OpcodeShift   = 24,
+    OpcodeMask    = 0xFF << OpcodeShift,
+
+    //===------------------------------------------------------------------===//
+    /// VEX - The opcode prefix used by AVX instructions
+    VEX         = 1U << 0,
+
+    /// VEX_W - Has a opcode specific functionality, but is used in the same
+    /// way as REX_W is for regular SSE instructions.
+    VEX_W       = 1U << 1,
+
+    /// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
+    /// address instructions in SSE are represented as 3 address ones in AVX
+    /// and the additional register is encoded in VEX_VVVV prefix.
+    VEX_4V      = 1U << 2,
+
+    /// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
+    /// must be encoded in the i8 immediate field. This usually happens in
+    /// instructions with 4 operands.
+    VEX_I8IMM   = 1U << 3,
+
+    /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
+    /// instruction uses 256-bit wide registers. This is usually auto detected
+    /// if a VR256 register is used, but some AVX instructions also have this
+    /// field marked when using a f256 memory references.
+    VEX_L       = 1U << 4,
+
+    /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
+    /// wacky 0x0F 0x0F prefix for 3DNow! instructions.  The manual documents
+    /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
+    /// storing a classifier in the imm8 field.  To simplify our implementation,
+    /// we handle this by storeing the classifier in the opcode field and using
+    /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
+    Has3DNow0F0FOpcode = 1U << 5
+  };
+
+  // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
+  // specified machine instruction.
+  //
+  static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
+    return TSFlags >> X86II::OpcodeShift;
+  }
+
+  static inline bool hasImm(uint64_t TSFlags) {
+    return (TSFlags & X86II::ImmMask) != 0;
+  }
+
+  /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
+  /// of the specified instruction.
+  static inline unsigned getSizeOfImm(uint64_t TSFlags) {
+    switch (TSFlags & X86II::ImmMask) {
+    default: assert(0 && "Unknown immediate size");
+    case X86II::Imm8:
+    case X86II::Imm8PCRel:  return 1;
+    case X86II::Imm16:
+    case X86II::Imm16PCRel: return 2;
+    case X86II::Imm32:
+    case X86II::Imm32PCRel: return 4;
+    case X86II::Imm64:      return 8;
+    }
+  }
+
+  /// isImmPCRel - Return true if the immediate of the specified instruction's
+  /// TSFlags indicates that it is pc relative.
+  static inline unsigned isImmPCRel(uint64_t TSFlags) {
+    switch (TSFlags & X86II::ImmMask) {
+    default: assert(0 && "Unknown immediate size");
+    case X86II::Imm8PCRel:
+    case X86II::Imm16PCRel:
+    case X86II::Imm32PCRel:
+      return true;
+    case X86II::Imm8:
+    case X86II::Imm16:
+    case X86II::Imm32:
+    case X86II::Imm64:
+      return false;
+    }
+  }
+
+  /// getMemoryOperandNo - The function returns the MCInst operand # for the
+  /// first field of the memory operand.  If the instruction doesn't have a
+  /// memory operand, this returns -1.
+  ///
+  /// Note that this ignores tied operands.  If there is a tied register which
+  /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
+  /// counted as one operand.
+  ///
+  static inline int getMemoryOperandNo(uint64_t TSFlags) {
+    switch (TSFlags & X86II::FormMask) {
+    case X86II::MRMInitReg:  assert(0 && "FIXME: Remove this form");
+    default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!");
+    case X86II::Pseudo:
+    case X86II::RawFrm:
+    case X86II::AddRegFrm:
+    case X86II::MRMDestReg:
+    case X86II::MRMSrcReg:
+    case X86II::RawFrmImm8:
+    case X86II::RawFrmImm16:
+       return -1;
+    case X86II::MRMDestMem:
+      return 0;
+    case X86II::MRMSrcMem: {
+      bool HasVEX_4V = (TSFlags >> 32) & X86II::VEX_4V;
+      unsigned FirstMemOp = 1;
+      if (HasVEX_4V)
+        ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
+
+      // FIXME: Maybe lea should have its own form?  This is a horrible hack.
+      //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
+      //    Opcode == X86::LEA16r || Opcode == X86::LEA32r)
+      return FirstMemOp;
+    }
+    case X86II::MRM0r: case X86II::MRM1r:
+    case X86II::MRM2r: case X86II::MRM3r:
+    case X86II::MRM4r: case X86II::MRM5r:
+    case X86II::MRM6r: case X86II::MRM7r:
+      return -1;
+    case X86II::MRM0m: case X86II::MRM1m:
+    case X86II::MRM2m: case X86II::MRM3m:
+    case X86II::MRM4m: case X86II::MRM5m:
+    case X86II::MRM6m: case X86II::MRM7m:
+      return 0;
+    case X86II::MRM_C1:
+    case X86II::MRM_C2:
+    case X86II::MRM_C3:
+    case X86II::MRM_C4:
+    case X86II::MRM_C8:
+    case X86II::MRM_C9:
+    case X86II::MRM_E8:
+    case X86II::MRM_F0:
+    case X86II::MRM_F8:
+    case X86II::MRM_F9:
+    case X86II::MRM_D0:
+    case X86II::MRM_D1:
+      return -1;
+    }
+  }
+}
+
+inline static bool isScale(const MachineOperand &MO) {
+  return MO.isImm() &&
+    (MO.getImm() == 1 || MO.getImm() == 2 ||
+     MO.getImm() == 4 || MO.getImm() == 8);
+}
+
+inline static bool isLeaMem(const MachineInstr *MI, unsigned Op) {
+  if (MI->getOperand(Op).isFI()) return true;
+  return Op+4 <= MI->getNumOperands() &&
+    MI->getOperand(Op  ).isReg() && isScale(MI->getOperand(Op+1)) &&
+    MI->getOperand(Op+2).isReg() &&
+    (MI->getOperand(Op+3).isImm() ||
+     MI->getOperand(Op+3).isGlobal() ||
+     MI->getOperand(Op+3).isCPI() ||
+     MI->getOperand(Op+3).isJTI());
+}
+
+inline static bool isMem(const MachineInstr *MI, unsigned Op) {
+  if (MI->getOperand(Op).isFI()) return true;
+  return Op+5 <= MI->getNumOperands() &&
+    MI->getOperand(Op+4).isReg() &&
+    isLeaMem(MI, Op);
+}
+
+class X86InstrInfo : public TargetInstrInfoImpl {
+  X86TargetMachine &TM;
+  const X86RegisterInfo RI;
+
+  /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
+  /// RegOp2MemOpTable2 - Load / store folding opcode maps.
+  ///
+  DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2Addr;
+  DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
+  DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
+  DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
+
+  /// MemOp2RegOpTable - Load / store unfolding opcode map.
+  ///
+  DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
+
+public:
+  explicit X86InstrInfo(X86TargetMachine &tm);
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const X86RegisterInfo &getRegisterInfo() const { return RI; }
+
+  /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
+  /// extension instruction. That is, it's like a copy where it's legal for the
+  /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
+  /// true, then it's expected the pre-extension value is available as a subreg
+  /// of the result register. This also returns the sub-register index in
+  /// SubIdx.
+  virtual bool isCoalescableExtInstr(const MachineInstr &MI,
+                                     unsigned &SrcReg, unsigned &DstReg,
+                                     unsigned &SubIdx) const;
+
+  unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+  /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
+  /// stack locations as well.  This uses a heuristic so it isn't
+  /// reliable for correctness.
+  unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+                                     int &FrameIndex) const;
+
+  /// hasLoadFromStackSlot - If the specified machine instruction has
+  /// a load from a stack slot, return true along with the FrameIndex
+  /// of the loaded stack slot and the machine mem operand containing
+  /// the reference.  If not, return false.  Unlike
+  /// isLoadFromStackSlot, this returns true for any instructions that
+  /// loads from the stack.  This is a hint only and may not catch all
+  /// cases.
+  bool hasLoadFromStackSlot(const MachineInstr *MI,
+                            const MachineMemOperand *&MMO,
+                            int &FrameIndex) const;
+
+  unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+  /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
+  /// stack locations as well.  This uses a heuristic so it isn't
+  /// reliable for correctness.
+  unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+                                    int &FrameIndex) const;
+
+  /// hasStoreToStackSlot - If the specified machine instruction has a
+  /// store to a stack slot, return true along with the FrameIndex of
+  /// the loaded stack slot and the machine mem operand containing the
+  /// reference.  If not, return false.  Unlike isStoreToStackSlot,
+  /// this returns true for any instructions that loads from the
+  /// stack.  This is a hint only and may not catch all cases.
+  bool hasStoreToStackSlot(const MachineInstr *MI,
+                           const MachineMemOperand *&MMO,
+                           int &FrameIndex) const;
+
+  bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
+                                         AliasAnalysis *AA) const;
+  void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                     unsigned DestReg, unsigned SubIdx,
+                     const MachineInstr *Orig,
+                     const TargetRegisterInfo &TRI) const;
+
+  /// convertToThreeAddress - This method must be implemented by targets that
+  /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
+  /// may be able to convert a two-address instruction into a true
+  /// three-address instruction on demand.  This allows the X86 target (for
+  /// example) to convert ADD and SHL instructions into LEA instructions if they
+  /// would require register copies due to two-addressness.
+  ///
+  /// This method returns a null pointer if the transformation cannot be
+  /// performed, otherwise it returns the new instruction.
+  ///
+  virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
+                                              MachineBasicBlock::iterator &MBBI,
+                                              LiveVariables *LV) const;
+
+  /// commuteInstruction - We have a few instructions that must be hacked on to
+  /// commute them.
+  ///
+  virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI) const;
+
+  // Branch analysis.
+  virtual bool isUnpredicatedTerminator(const MachineInstr* MI) const;
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+                              SmallVectorImpl<MachineOperand> &Addr,
+                              const TargetRegisterClass *RC,
+                              MachineInstr::mmo_iterator MMOBegin,
+                              MachineInstr::mmo_iterator MMOEnd,
+                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                               SmallVectorImpl<MachineOperand> &Addr,
+                               const TargetRegisterClass *RC,
+                               MachineInstr::mmo_iterator MMOBegin,
+                               MachineInstr::mmo_iterator MMOEnd,
+                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
+  virtual
+  MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
+                                         int FrameIx, uint64_t Offset,
+                                         const MDNode *MDPtr,
+                                         DebugLoc DL) const;
+
+  /// foldMemoryOperand - If this target supports it, fold a load or store of
+  /// the specified stack slot into the specified machine instruction for the
+  /// specified operand(s).  If this is possible, the target should perform the
+  /// folding and return true, otherwise it should return false.  If it folds
+  /// the instruction, it is likely that the MachineInstruction the iterator
+  /// references has been changed.
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                              int FrameIndex) const;
+
+  /// foldMemoryOperand - Same as the previous version except it allows folding
+  /// of any load and store from / to any address, not just from a specific
+  /// stack slot.
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                              MachineInstr* LoadMI) const;
+
+  /// canFoldMemoryOperand - Returns true if the specified load / store is
+  /// folding is possible.
+  virtual bool canFoldMemoryOperand(const MachineInstr*,
+                                    const SmallVectorImpl<unsigned> &) const;
+
+  /// unfoldMemoryOperand - Separate a single instruction which folded a load or
+  /// a store or a load and a store into two or more instruction. If this is
+  /// possible, returns true as well as the new instructions by reference.
+  virtual bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
+                           unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
+                           SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  virtual bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
+                           SmallVectorImpl<SDNode*> &NewNodes) const;
+
+  /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new
+  /// instruction after load / store are unfolded from an instruction of the
+  /// specified opcode. It returns zero if the specified unfolding is not
+  /// possible. If LoadRegIndex is non-null, it is filled in with the operand
+  /// index of the operand which will hold the register holding the loaded
+  /// value.
+  virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
+                                      bool UnfoldLoad, bool UnfoldStore,
+                                      unsigned *LoadRegIndex = 0) const;
+
+  /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler
+  /// to determine if two loads are loading from the same base address. It
+  /// should only return true if the base pointers are the same and the
+  /// only differences between the two addresses are the offset. It also returns
+  /// the offsets by reference.
+  virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+                                       int64_t &Offset1, int64_t &Offset2) const;
+
+  /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+  /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+  /// be scheduled togther. On some targets if two loads are loading from
+  /// addresses in the same cache line, it's better if they are scheduled
+  /// together. This function takes two integers that represent the load offsets
+  /// from the common base address. It returns true if it decides it's desirable
+  /// to schedule the two loads together. "NumLoads" is the number of loads that
+  /// have already been scheduled after Load1.
+  virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+                                       int64_t Offset1, int64_t Offset2,
+                                       unsigned NumLoads) const;
+
+  virtual void getNoopForMachoTarget(MCInst &NopInst) const;
+
+  virtual
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  /// isSafeToMoveRegClassDefs - Return true if it's safe to move a machine
+  /// instruction that defines the specified register class.
+  bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
+
+  static bool isX86_64NonExtLowByteReg(unsigned reg) {
+    return (reg == X86::SPL || reg == X86::BPL ||
+          reg == X86::SIL || reg == X86::DIL);
+  }
+
+  static bool isX86_64ExtendedReg(const MachineOperand &MO) {
+    if (!MO.isReg()) return false;
+    return isX86_64ExtendedReg(MO.getReg());
+  }
+
+  /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
+  /// higher) register?  e.g. r8, xmm8, xmm13, etc.
+  static bool isX86_64ExtendedReg(unsigned RegNo);
+
+  /// getGlobalBaseReg - Return a virtual register initialized with the
+  /// the global base register value. Output instructions required to
+  /// initialize the register in the function entry block, if necessary.
+  ///
+  unsigned getGlobalBaseReg(MachineFunction *MF) const;
+
+  /// GetSSEDomain - Return the SSE execution domain of MI as the first element,
+  /// and a bitmask of possible arguments to SetSSEDomain ase the second.
+  std::pair<uint16_t, uint16_t> GetSSEDomain(const MachineInstr *MI) const;
+
+  /// SetSSEDomain - Set the SSEDomain of MI.
+  void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
+
+  MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                      MachineInstr* MI,
+                                      unsigned OpNum,
+                                      const SmallVectorImpl<MachineOperand> &MOs,
+                                      unsigned Size, unsigned Alignment) const;
+
+  bool isHighLatencyDef(int opc) const;
+
+  bool hasHighOperandLatency(const InstrItineraryData *ItinData,
+                             const MachineRegisterInfo *MRI,
+                             const MachineInstr *DefMI, unsigned DefIdx,
+                             const MachineInstr *UseMI, unsigned UseIdx) const;
+
+private:
+  MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
+                                              MachineFunction::iterator &MFI,
+                                              MachineBasicBlock::iterator &MBBI,
+                                              LiveVariables *LV) const;
+
+  /// isFrameOperand - Return true and the FrameIndex if the specified
+  /// operand and follow operands form a reference to the stack frame.
+  bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
+                      int &FrameIndex) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/X86/X86InstrInfo.td b/final/lib/Target/X86/X86InstrInfo.td
new file mode 100644
index 00000000000..f832a7c85a8
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrInfo.td
@@ -0,0 +1,1628 @@
+//===- X86InstrInfo.td - Main X86 Instruction Definition ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instruction set, defining the instructions, and
+// properties of the instructions which are needed for code generation, machine
+// code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// X86 specific DAG Nodes.
+//
+
+def SDTIntShiftDOp: SDTypeProfile<1, 3,
+                                  [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+                                   SDTCisInt<0>, SDTCisInt<3>]>;
+
+def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>;
+
+def SDTX86Cmov    : SDTypeProfile<1, 4,
+                                  [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+                                   SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
+
+// Unary and binary operator instructions that set EFLAGS as a side-effect.
+def SDTUnaryArithWithFlags : SDTypeProfile<2, 1,
+                                           [SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
+                                            [SDTCisSameAs<0, 2>,
+                                             SDTCisSameAs<0, 3>,
+                                             SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+// SDTBinaryArithWithFlagsInOut - RES1, EFLAGS = op LHS, RHS, EFLAGS
+def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
+                                            [SDTCisSameAs<0, 2>,
+                                             SDTCisSameAs<0, 3>,
+                                             SDTCisInt<0>,
+                                             SDTCisVT<1, i32>,
+                                             SDTCisVT<4, i32>]>;
+// RES1, RES2, FLAGS = op LHS, RHS
+def SDT2ResultBinaryArithWithFlags : SDTypeProfile<3, 2,
+                                            [SDTCisSameAs<0, 1>,
+                                             SDTCisSameAs<0, 2>,
+                                             SDTCisSameAs<0, 3>,
+                                             SDTCisInt<0>, SDTCisVT<1, i32>]>;
+def SDTX86BrCond  : SDTypeProfile<0, 3,
+                                  [SDTCisVT<0, OtherVT>,
+                                   SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+
+def SDTX86SetCC   : SDTypeProfile<1, 2,
+                                  [SDTCisVT<0, i8>,
+                                   SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+def SDTX86SetCC_C : SDTypeProfile<1, 2,
+                                  [SDTCisInt<0>,
+                                   SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
+
+def SDTX86cas : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisInt<1>,
+                                     SDTCisVT<2, i8>]>;
+def SDTX86cas8 : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>,
+                                SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>;
+def SDTX86Ret     : SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>;
+
+def SDT_X86CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_X86CallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>,
+                                        SDTCisVT<1, i32>]>;
+
+def SDT_X86Call   : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
+
+def SDT_X86VASTART_SAVE_XMM_REGS : SDTypeProfile<0, -1, [SDTCisVT<0, i8>,
+                                                         SDTCisVT<1, iPTR>,
+                                                         SDTCisVT<2, iPTR>]>;
+
+def SDT_X86VAARG_64 : SDTypeProfile<1, -1, [SDTCisPtrTy<0>,
+                                            SDTCisPtrTy<1>,
+                                            SDTCisVT<2, i32>,
+                                            SDTCisVT<3, i8>,
+                                            SDTCisVT<4, i32>]>;
+
+def SDTX86RepStr  : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
+
+def SDTX86Void    : SDTypeProfile<0, 0, []>;
+
+def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
+
+def SDT_X86MEMBARRIER : SDTypeProfile<0, 0, []>;
+def SDT_X86MEMBARRIERNoSSE : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
+def X86MemBarrier : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIER,
+                            [SDNPHasChain]>;
+def X86MemBarrierNoSSE : SDNode<"X86ISD::MEMBARRIER", SDT_X86MEMBARRIERNoSSE,
+                                [SDNPHasChain]>;
+def X86MFence : SDNode<"X86ISD::MFENCE", SDT_X86MEMBARRIER,
+                        [SDNPHasChain]>;
+def X86SFence : SDNode<"X86ISD::SFENCE", SDT_X86MEMBARRIER,
+                        [SDNPHasChain]>;
+def X86LFence : SDNode<"X86ISD::LFENCE", SDT_X86MEMBARRIER,
+                        [SDNPHasChain]>;
+
+
+def X86bsf     : SDNode<"X86ISD::BSF",      SDTUnaryArithWithFlags>;
+def X86bsr     : SDNode<"X86ISD::BSR",      SDTUnaryArithWithFlags>;
+def X86shld    : SDNode<"X86ISD::SHLD",     SDTIntShiftDOp>;
+def X86shrd    : SDNode<"X86ISD::SHRD",     SDTIntShiftDOp>;
+
+def X86cmp     : SDNode<"X86ISD::CMP" ,     SDTX86CmpTest>;
+def X86bt      : SDNode<"X86ISD::BT",       SDTX86CmpTest>;
+
+def X86cmov    : SDNode<"X86ISD::CMOV",     SDTX86Cmov>;
+def X86brcond  : SDNode<"X86ISD::BRCOND",   SDTX86BrCond,
+                        [SDNPHasChain]>;
+def X86setcc   : SDNode<"X86ISD::SETCC",    SDTX86SetCC>;
+def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC_C>;
+
+def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
+                        [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86cas8 : SDNode<"X86ISD::LCMPXCHG8_DAG", SDTX86cas8,
+                        [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary,
+                        [SDNPHasChain, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary,
+                        [SDNPHasChain, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary,
+                        [SDNPHasChain, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary,
+                        [SDNPHasChain, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary,
+                        [SDNPHasChain, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary,
+                        [SDNPHasChain, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary,
+                        [SDNPHasChain, SDNPMayStore,
+                         SDNPMayLoad, SDNPMemOperand]>;
+def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
+                        [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+
+def X86vastart_save_xmm_regs :
+                 SDNode<"X86ISD::VASTART_SAVE_XMM_REGS",
+                        SDT_X86VASTART_SAVE_XMM_REGS,
+                        [SDNPHasChain, SDNPVariadic]>;
+def X86vaarg64 :
+                 SDNode<"X86ISD::VAARG_64", SDT_X86VAARG_64,
+                        [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
+                         SDNPMemOperand]>;
+def X86callseq_start :
+                 SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart,
+                        [SDNPHasChain, SDNPOutGlue]>;
+def X86callseq_end :
+                 SDNode<"ISD::CALLSEQ_END",   SDT_X86CallSeqEnd,
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86call    : SDNode<"X86ISD::CALL",     SDT_X86Call,
+                        [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
+                         SDNPVariadic]>;
+
+def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
+                        [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore]>;
+def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
+                        [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
+                         SDNPMayLoad]>;
+
+def X86rdtsc   : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void,
+                        [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
+
+def X86Wrapper    : SDNode<"X86ISD::Wrapper",     SDTX86Wrapper>;
+def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP",  SDTX86Wrapper>;
+
+def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
+                        [SDNPHasChain]>;
+
+def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET,
+                        [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
+
+def X86add_flag  : SDNode<"X86ISD::ADD",  SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+def X86sub_flag  : SDNode<"X86ISD::SUB",  SDTBinaryArithWithFlags>;
+def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+def X86umul_flag : SDNode<"X86ISD::UMUL", SDT2ResultBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+def X86adc_flag  : SDNode<"X86ISD::ADC",  SDTBinaryArithWithFlagsInOut>;
+def X86sbb_flag  : SDNode<"X86ISD::SBB",  SDTBinaryArithWithFlagsInOut>;
+
+def X86inc_flag  : SDNode<"X86ISD::INC",  SDTUnaryArithWithFlags>;
+def X86dec_flag  : SDNode<"X86ISD::DEC",  SDTUnaryArithWithFlags>;
+def X86or_flag   : SDNode<"X86ISD::OR",   SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+def X86xor_flag  : SDNode<"X86ISD::XOR",  SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+def X86and_flag  : SDNode<"X86ISD::AND",  SDTBinaryArithWithFlags,
+                          [SDNPCommutative]>;
+
+def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
+
+def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void,
+                          [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>;
+
+def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
+                        [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//===----------------------------------------------------------------------===//
+// X86 Operand Definitions.
+//
+
+// A version of ptr_rc which excludes SP, ESP, and RSP. This is used for
+// the index operand of an address, to conform to x86 encoding restrictions.
+def ptr_rc_nosp : PointerLikeRegClass<1>;
+
+// *mem - Operand definitions for the funky X86 addressing mode operands.
+//
+def X86MemAsmOperand : AsmOperandClass {
+  let Name = "Mem";
+  let SuperClasses = [];
+}
+def X86AbsMemAsmOperand : AsmOperandClass {
+  let Name = "AbsMem";
+  let SuperClasses = [X86MemAsmOperand];
+}
+class X86MemOperand<string printMethod> : Operand<iPTR> {
+  let PrintMethod = printMethod;
+  let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
+
+def opaque32mem : X86MemOperand<"printopaquemem">;
+def opaque48mem : X86MemOperand<"printopaquemem">;
+def opaque80mem : X86MemOperand<"printopaquemem">;
+def opaque512mem : X86MemOperand<"printopaquemem">;
+
+def i8mem   : X86MemOperand<"printi8mem">;
+def i16mem  : X86MemOperand<"printi16mem">;
+def i32mem  : X86MemOperand<"printi32mem">;
+def i64mem  : X86MemOperand<"printi64mem">;
+def i128mem : X86MemOperand<"printi128mem">;
+def i256mem : X86MemOperand<"printi256mem">;
+def f32mem  : X86MemOperand<"printf32mem">;
+def f64mem  : X86MemOperand<"printf64mem">;
+def f80mem  : X86MemOperand<"printf80mem">;
+def f128mem : X86MemOperand<"printf128mem">;
+def f256mem : X86MemOperand<"printf256mem">;
+
+// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
+// plain GR64, so that it doesn't potentially require a REX prefix.
+def i8mem_NOREX : Operand<i64> {
+  let PrintMethod = "printi8mem";
+  let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
+
+// GPRs available for tailcall.
+// It represents GR64_TC or GR64_TCW64.
+def ptr_rc_tailcall : PointerLikeRegClass<2>;
+
+// Special i32mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i32mem_TC : Operand<i32> {
+  let PrintMethod = "printi32mem";
+  let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
+
+// Special i64mem for addresses of load folding tail calls. These are not
+// allowed to use callee-saved registers since they must be scheduled
+// after callee-saved register are popped.
+def i64mem_TC : Operand<i64> {
+  let PrintMethod = "printi64mem";
+  let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
+                       ptr_rc_tailcall, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
+
+let ParserMatchClass = X86AbsMemAsmOperand,
+    PrintMethod = "print_pcrel_imm" in {
+def i32imm_pcrel : Operand<i32>;
+def i16imm_pcrel : Operand<i16>;
+
+def offset8 : Operand<i64>;
+def offset16 : Operand<i64>;
+def offset32 : Operand<i64>;
+def offset64 : Operand<i64>;
+
+// Branch targets have OtherVT type and print as pc-relative values.
+def brtarget : Operand<OtherVT>;
+def brtarget8 : Operand<OtherVT>;
+
+}
+
+def SSECC : Operand<i8> {
+  let PrintMethod = "printSSECC";
+}
+
+class ImmSExtAsmOperandClass : AsmOperandClass {
+  let SuperClasses = [ImmAsmOperand];
+  let RenderMethod = "addImmOperands";
+}
+
+// Sign-extended immediate classes. We don't need to define the full lattice
+// here because there is no instruction with an ambiguity between ImmSExti64i32
+// and ImmSExti32i8.
+//
+// The strange ranges come from the fact that the assembler always works with
+// 64-bit immediates, but for a 16-bit target value we want to accept both "-1"
+// (which will be a -1ULL), and "0xFF" (-1 in 16-bits).
+
+// [0, 0x7FFFFFFF]                                            |
+//   [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass {
+  let Name = "ImmSExti64i32";
+}
+
+// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] |
+//   [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass {
+  let Name = "ImmSExti16i8";
+  let SuperClasses = [ImmSExti64i32AsmOperand];
+}
+
+// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] |
+//   [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
+  let Name = "ImmSExti32i8";
+}
+
+// [0, 0x0000007F]                                            |
+//   [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
+  let Name = "ImmSExti64i8";
+  let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand,
+                      ImmSExti64i32AsmOperand];
+}
+
+// A couple of more descriptive operand definitions.
+// 16-bits but only 8 bits are significant.
+def i16i8imm  : Operand<i16> {
+  let ParserMatchClass = ImmSExti16i8AsmOperand;
+}
+// 32-bits but only 8 bits are significant.
+def i32i8imm  : Operand<i32> {
+  let ParserMatchClass = ImmSExti32i8AsmOperand;
+}
+
+// 64-bits but only 32 bits are significant.
+def i64i32imm  : Operand<i64> {
+  let ParserMatchClass = ImmSExti64i32AsmOperand;
+}
+
+// 64-bits but only 32 bits are significant, and those bits are treated as being
+// pc relative.
+def i64i32imm_pcrel : Operand<i64> {
+  let PrintMethod = "print_pcrel_imm";
+  let ParserMatchClass = X86AbsMemAsmOperand;
+}
+
+// 64-bits but only 8 bits are significant.
+def i64i8imm   : Operand<i64> {
+  let ParserMatchClass = ImmSExti64i8AsmOperand;
+}
+
+def lea64_32mem : Operand<i32> {
+  let PrintMethod = "printi32mem";
+  let AsmOperandLowerMethod = "lower_lea64_32mem";
+  let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
+  let ParserMatchClass = X86MemAsmOperand;
+}
+
+
+//===----------------------------------------------------------------------===//
+// X86 Complex Pattern Definitions.
+//
+
+// Define X86 specific addressing mode.
+def addr      : ComplexPattern<iPTR, 5, "SelectAddr", [], [SDNPWantParent]>;
+def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
+                               [add, sub, mul, X86mul_imm, shl, or, frameindex],
+                               []>;
+def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
+                               [tglobaltlsaddr], []>;
+
+def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
+                        [add, sub, mul, X86mul_imm, shl, or, frameindex,
+                         X86WrapperRIP], []>;
+
+def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
+                               [tglobaltlsaddr], []>;
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Predicate Definitions.
+def HasCMov      : Predicate<"Subtarget->hasCMov()">;
+def NoCMov       : Predicate<"!Subtarget->hasCMov()">;
+
+def HasMMX       : Predicate<"Subtarget->hasMMX()">;
+def Has3DNow     : Predicate<"Subtarget->has3DNow()">;
+def Has3DNowA    : Predicate<"Subtarget->has3DNowA()">;
+def HasSSE1      : Predicate<"Subtarget->hasSSE1()">;
+def HasSSE2      : Predicate<"Subtarget->hasSSE2()">;
+def HasSSE3      : Predicate<"Subtarget->hasSSE3()">;
+def HasSSSE3     : Predicate<"Subtarget->hasSSSE3()">;
+def HasSSE41     : Predicate<"Subtarget->hasSSE41()">;
+def HasSSE42     : Predicate<"Subtarget->hasSSE42()">;
+def HasSSE4A     : Predicate<"Subtarget->hasSSE4A()">;
+
+def HasAVX       : Predicate<"Subtarget->hasAVX()">;
+def HasXMMInt    : Predicate<"Subtarget->hasXMMInt()">;
+
+def HasAES       : Predicate<"Subtarget->hasAES()">;
+def HasCLMUL     : Predicate<"Subtarget->hasCLMUL()">;
+def HasFMA3      : Predicate<"Subtarget->hasFMA3()">;
+def HasFMA4      : Predicate<"Subtarget->hasFMA4()">;
+def FPStackf32   : Predicate<"!Subtarget->hasXMM()">;
+def FPStackf64   : Predicate<"!Subtarget->hasXMMInt()">;
+def In32BitMode  : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate;
+def In64BitMode  : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate;
+def IsWin64      : Predicate<"Subtarget->isTargetWin64()">;
+def NotWin64     : Predicate<"!Subtarget->isTargetWin64()">;
+def SmallCode    : Predicate<"TM.getCodeModel() == CodeModel::Small">;
+def KernelCode   : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
+def FarData      : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
+                             "TM.getCodeModel() != CodeModel::Kernel">;
+def NearData     : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
+                             "TM.getCodeModel() == CodeModel::Kernel">;
+def IsStatic     : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def IsNotPIC     : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;
+def OptForSize   : Predicate<"OptForSize">;
+def OptForSpeed  : Predicate<"!OptForSize">;
+def FastBTMem    : Predicate<"!Subtarget->isBTMemSlow()">;
+def CallImmAddr  : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
+
+//===----------------------------------------------------------------------===//
+// X86 Instruction Format Definitions.
+//
+
+include "X86InstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments...
+//
+
+// X86 specific condition code. These correspond to CondCode in
+// X86InstrInfo.h. They must be kept in synch.
+def X86_COND_A   : PatLeaf<(i8 0)>;  // alt. COND_NBE
+def X86_COND_AE  : PatLeaf<(i8 1)>;  // alt. COND_NC
+def X86_COND_B   : PatLeaf<(i8 2)>;  // alt. COND_C
+def X86_COND_BE  : PatLeaf<(i8 3)>;  // alt. COND_NA
+def X86_COND_E   : PatLeaf<(i8 4)>;  // alt. COND_Z
+def X86_COND_G   : PatLeaf<(i8 5)>;  // alt. COND_NLE
+def X86_COND_GE  : PatLeaf<(i8 6)>;  // alt. COND_NL
+def X86_COND_L   : PatLeaf<(i8 7)>;  // alt. COND_NGE
+def X86_COND_LE  : PatLeaf<(i8 8)>;  // alt. COND_NG
+def X86_COND_NE  : PatLeaf<(i8 9)>;  // alt. COND_NZ
+def X86_COND_NO  : PatLeaf<(i8 10)>;
+def X86_COND_NP  : PatLeaf<(i8 11)>; // alt. COND_PO
+def X86_COND_NS  : PatLeaf<(i8 12)>;
+def X86_COND_O   : PatLeaf<(i8 13)>;
+def X86_COND_P   : PatLeaf<(i8 14)>; // alt. COND_PE
+def X86_COND_S   : PatLeaf<(i8 15)>;
+
+def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>;
+
+def i16immSExt8  : PatLeaf<(i16 immSext8)>;
+def i32immSExt8  : PatLeaf<(i32 immSext8)>;
+def i64immSExt8  : PatLeaf<(i64 immSext8)>;
+def i64immSExt32  : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>;
+def i64immZExt32  : PatLeaf<(i64 imm), [{
+  // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+  // unsignedsign extended field.
+  return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
+}]>;
+
+def i64immZExt32SExt8 : PatLeaf<(i64 imm), [{
+    uint64_t v = N->getZExtValue();
+    return v == (uint32_t)v && (int32_t)v == (int8_t)v;
+}]>;
+
+// Helper fragments for loads.
+// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
+// known to be 32-bit aligned or better. Ditto for i8 to i16.
+def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+  if (ExtType == ISD::NON_EXTLOAD)
+    return true;
+  if (ExtType == ISD::EXTLOAD)
+    return LD->getAlignment() >= 2 && !LD->isVolatile();
+  return false;
+}]>;
+
+def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+  if (ExtType == ISD::EXTLOAD)
+    return LD->getAlignment() >= 2 && !LD->isVolatile();
+  return false;
+}]>;
+
+def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
+  LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::LoadExtType ExtType = LD->getExtensionType();
+  if (ExtType == ISD::NON_EXTLOAD)
+    return true;
+  if (ExtType == ISD::EXTLOAD)
+    return LD->getAlignment() >= 4 && !LD->isVolatile();
+  return false;
+}]>;
+
+def loadi8  : PatFrag<(ops node:$ptr), (i8  (load node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr))>;
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr))>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr))>;
+
+def sextloadi16i8  : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
+def sextloadi32i8  : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
+def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>;
+def sextloadi64i8  : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>;
+def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>;
+def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>;
+
+def zextloadi8i1   : PatFrag<(ops node:$ptr), (i8  (zextloadi1 node:$ptr))>;
+def zextloadi16i1  : PatFrag<(ops node:$ptr), (i16 (zextloadi1 node:$ptr))>;
+def zextloadi32i1  : PatFrag<(ops node:$ptr), (i32 (zextloadi1 node:$ptr))>;
+def zextloadi16i8  : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
+def zextloadi32i8  : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>;
+def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>;
+def zextloadi64i1  : PatFrag<(ops node:$ptr), (i64 (zextloadi1 node:$ptr))>;
+def zextloadi64i8  : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>;
+def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>;
+def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>;
+
+def extloadi8i1    : PatFrag<(ops node:$ptr), (i8  (extloadi1 node:$ptr))>;
+def extloadi16i1   : PatFrag<(ops node:$ptr), (i16 (extloadi1 node:$ptr))>;
+def extloadi32i1   : PatFrag<(ops node:$ptr), (i32 (extloadi1 node:$ptr))>;
+def extloadi16i8   : PatFrag<(ops node:$ptr), (i16 (extloadi8 node:$ptr))>;
+def extloadi32i8   : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>;
+def extloadi32i16  : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>;
+def extloadi64i1   : PatFrag<(ops node:$ptr), (i64 (extloadi1 node:$ptr))>;
+def extloadi64i8   : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>;
+def extloadi64i16  : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>;
+def extloadi64i32  : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>;
+
+
+// An 'and' node with a single use.
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+  return N->hasOneUse();
+}]>;
+// An 'srl' node with a single use.
+def srl_su : PatFrag<(ops node:$lhs, node:$rhs), (srl node:$lhs, node:$rhs), [{
+  return N->hasOneUse();
+}]>;
+// An 'trunc' node with a single use.
+def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
+  return N->hasOneUse();
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction list.
+//
+
+// Nop
+let neverHasSideEffects = 1 in {
+  def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
+  def NOOPW : I<0x1f, MRM0m, (outs), (ins i16mem:$zero),
+                "nop{w}\t$zero", []>, TB, OpSize;
+  def NOOPL : I<0x1f, MRM0m, (outs), (ins i32mem:$zero),
+                "nop{l}\t$zero", []>, TB;
+}
+
+
+// Constructing a stack frame.
+def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
+                 "enter\t$len, $lvl", []>;
+
+let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, neverHasSideEffects=1 in
+def LEAVE    : I<0xC9, RawFrm,
+                 (outs), (ins), "leave", []>, Requires<[In32BitMode]>;
+
+let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, neverHasSideEffects = 1 in
+def LEAVE64  : I<0xC9, RawFrm,
+                 (outs), (ins), "leave", []>, Requires<[In64BitMode]>;
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
+
+let Defs = [ESP], Uses = [ESP], neverHasSideEffects=1 in {
+let mayLoad = 1 in {
+def POP16r  : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
+  OpSize;
+def POP32r  : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
+def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
+  OpSize;
+def POP16rmm: I<0x8F, MRM0m, (outs i16mem:$dst), (ins), "pop{w}\t$dst", []>,
+  OpSize;
+def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>;
+def POP32rmm: I<0x8F, MRM0m, (outs i32mem:$dst), (ins), "pop{l}\t$dst", []>;
+
+def POPF16   : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
+def POPF32   : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
+               Requires<[In32BitMode]>;
+}
+
+let mayStore = 1 in {
+def PUSH16r  : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
+  OpSize;
+def PUSH32r  : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
+def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
+  OpSize;
+def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src",[]>,
+  OpSize;
+def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
+def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src",[]>;
+
+def PUSHi8   : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
+                      "push{l}\t$imm", []>;
+def PUSHi16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+                      "push{w}\t$imm", []>, OpSize;
+def PUSHi32  : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
+                      "push{l}\t$imm", []>;
+
+def PUSHF16  : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
+def PUSHF32  : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
+               Requires<[In32BitMode]>;
+
+}
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects=1 in {
+let mayLoad = 1 in {
+def POP64r   : I<0x58, AddRegFrm,
+                 (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>;
+def POP64rmm: I<0x8F, MRM0m, (outs i64mem:$dst), (ins), "pop{q}\t$dst", []>;
+}
+let mayStore = 1 in {
+def PUSH64r  : I<0x50, AddRegFrm,
+                 (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>;
+def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
+}
+}
+
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
+                     "push{q}\t$imm", []>;
+def PUSH64i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+                      "push{q}\t$imm", []>;
+def PUSH64i32  : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
+                      "push{q}\t$imm", []>;
+}
+
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
+def POPF64   : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
+               Requires<[In64BitMode]>;
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+def PUSHF64    : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
+                 Requires<[In64BitMode]>;
+
+
+
+let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
+    mayLoad=1, neverHasSideEffects=1 in {
+def POPA32   : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>,
+               Requires<[In32BitMode]>;
+}
+let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
+    mayStore=1, neverHasSideEffects=1 in {
+def PUSHA32  : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>,
+               Requires<[In32BitMode]>;
+}
+
+let Constraints = "$src = $dst" in {    // GR32 = bswap GR32
+def BSWAP32r : I<0xC8, AddRegFrm,
+                 (outs GR32:$dst), (ins GR32:$src),
+                 "bswap{l}\t$dst",
+                 [(set GR32:$dst, (bswap GR32:$src))]>, TB;
+
+def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
+                  "bswap{q}\t$dst",
+                  [(set GR64:$dst, (bswap GR64:$src))]>, TB;
+} // Constraints = "$src = $dst"
+
+// Bit scan instructions.
+let Defs = [EFLAGS] in {
+def BSF16rr  : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                 "bsf{w}\t{$src, $dst|$dst, $src}",
+                 [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB, OpSize;
+def BSF16rm  : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                 "bsf{w}\t{$src, $dst|$dst, $src}",
+                 [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB,
+                 OpSize;
+def BSF32rr  : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                 "bsf{l}\t{$src, $dst|$dst, $src}",
+                 [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>, TB;
+def BSF32rm  : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                 "bsf{l}\t{$src, $dst|$dst, $src}",
+                 [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, TB;
+def BSF64rr  : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                  "bsf{q}\t{$src, $dst|$dst, $src}",
+                  [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB;
+def BSF64rm  : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                  "bsf{q}\t{$src, $dst|$dst, $src}",
+                  [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB;
+
+def BSR16rr  : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                 "bsr{w}\t{$src, $dst|$dst, $src}",
+                 [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB, OpSize;
+def BSR16rm  : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                 "bsr{w}\t{$src, $dst|$dst, $src}",
+                 [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB,
+                 OpSize;
+def BSR32rr  : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                 "bsr{l}\t{$src, $dst|$dst, $src}",
+                 [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>, TB;
+def BSR32rm  : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                 "bsr{l}\t{$src, $dst|$dst, $src}",
+                 [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, TB;
+def BSR64rr  : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                  "bsr{q}\t{$src, $dst|$dst, $src}",
+                  [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB;
+def BSR64rm  : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                  "bsr{q}\t{$src, $dst|$dst, $src}",
+                  [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB;
+} // Defs = [EFLAGS]
+
+
+// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
+let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in {
+def MOVSB : I<0xA4, RawFrm, (outs), (ins), "{movsb}", []>;
+def MOVSW : I<0xA5, RawFrm, (outs), (ins), "{movsw}", []>, OpSize;
+def MOVSD : I<0xA5, RawFrm, (outs), (ins), "{movsl|movsd}", []>;
+def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
+}
+
+// These uses the DF flag in the EFLAGS register to inc or dec EDI and ESI
+let Defs = [EDI], Uses = [AL,EDI,EFLAGS] in
+def STOSB : I<0xAA, RawFrm, (outs), (ins), "{stosb}", []>;
+let Defs = [EDI], Uses = [AX,EDI,EFLAGS] in
+def STOSW : I<0xAB, RawFrm, (outs), (ins), "{stosw}", []>, OpSize;
+let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
+def STOSD : I<0xAB, RawFrm, (outs), (ins), "{stosl|stosd}", []>;
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
+
+def SCAS8 : I<0xAE, RawFrm, (outs), (ins), "scas{b}", []>;
+def SCAS16 : I<0xAF, RawFrm, (outs), (ins), "scas{w}", []>, OpSize;
+def SCAS32 : I<0xAF, RawFrm, (outs), (ins), "scas{l}", []>;
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
+
+def CMPS8 : I<0xA6, RawFrm, (outs), (ins), "cmps{b}", []>;
+def CMPS16 : I<0xA7, RawFrm, (outs), (ins), "cmps{w}", []>, OpSize;
+def CMPS32 : I<0xA7, RawFrm, (outs), (ins), "cmps{l}", []>;
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
+
+
+//===----------------------------------------------------------------------===//
+//  Move Instructions.
+//
+
+let neverHasSideEffects = 1 in {
+def MOV8rr  : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
+                "mov{b}\t{$src, $dst|$dst, $src}", []>;
+def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+def MOV8ri  : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
+                   "mov{b}\t{$src, $dst|$dst, $src}",
+                   [(set GR8:$dst, imm:$src)]>;
+def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
+                   "mov{w}\t{$src, $dst|$dst, $src}",
+                   [(set GR16:$dst, imm:$src)]>, OpSize;
+def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
+                   "mov{l}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, imm:$src)]>;
+def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+                    "movabs{q}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, imm:$src)]>;
+def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
+                      "mov{q}\t{$src, $dst|$dst, $src}",
+                      [(set GR64:$dst, i64immSExt32:$src)]>;
+}
+
+def MOV8mi  : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
+                   "mov{b}\t{$src, $dst|$dst, $src}",
+                   [(store (i8 imm:$src), addr:$dst)]>;
+def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
+                   "mov{w}\t{$src, $dst|$dst, $src}",
+                   [(store (i16 imm:$src), addr:$dst)]>, OpSize;
+def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
+                   "mov{l}\t{$src, $dst|$dst, $src}",
+                   [(store (i32 imm:$src), addr:$dst)]>;
+def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+                      "mov{q}\t{$src, $dst|$dst, $src}",
+                      [(store i64immSExt32:$src, addr:$dst)]>;
+
+/// moffs8, moffs16 and moffs32 versions of moves.  The immediate is a
+/// 32-bit offset from the PC.  These are only valid in x86-32 mode.
+def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
+                   "mov{b}\t{$src, %al|%al, $src}", []>,
+                   Requires<[In32BitMode]>;
+def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
+                      "mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize,
+                     Requires<[In32BitMode]>;
+def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
+                      "mov{l}\t{$src, %eax|%eax, $src}", []>,
+                     Requires<[In32BitMode]>;
+def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
+                   "mov{b}\t{%al, $dst|$dst, %al}", []>,
+                  Requires<[In32BitMode]>;
+def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
+                      "mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize,
+                     Requires<[In32BitMode]>;
+def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
+                      "mov{l}\t{%eax, $dst|$dst, %eax}", []>,
+                     Requires<[In32BitMode]>;
+
+// FIXME: These definitions are utterly broken
+// Just leave them commented out for now because they're useless outside
+// of the large code model, and most compilers won't generate the instructions
+// in question.
+/*
+def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
+                      "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
+                       "mov{q}\t{$src, %rax|%rax, $src}", []>;
+def MOV64ao8 : RIi8<0xA2, RawFrm, (outs offset8:$dst), (ins),
+                       "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+def MOV64ao64 : RIi32<0xA3, RawFrm, (outs offset64:$dst), (ins),
+                       "mov{q}\t{%rax, $dst|$dst, %rax}", []>;
+*/
+
+
+let isCodeGenOnly = 1 in {
+def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
+                   "mov{b}\t{$src, $dst|$dst, $src}", []>;
+def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                    "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                    "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                     "mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def MOV8rm  : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
+                "mov{b}\t{$src, $dst|$dst, $src}",
+                [(set GR8:$dst, (loadi8 addr:$src))]>;
+def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}",
+                [(set GR16:$dst, (loadi16 addr:$src))]>, OpSize;
+def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}",
+                [(set GR32:$dst, (loadi32 addr:$src))]>;
+def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}",
+                 [(set GR64:$dst, (load addr:$src))]>;
+}
+
+def MOV8mr  : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
+                "mov{b}\t{$src, $dst|$dst, $src}",
+                [(store GR8:$src, addr:$dst)]>;
+def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}",
+                [(store GR16:$src, addr:$dst)]>, OpSize;
+def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}",
+                [(store GR32:$src, addr:$dst)]>;
+def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}",
+                 [(store GR64:$src, addr:$dst)]>;
+
+// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
+// that they can be used for copying and storing h registers, which can't be
+// encoded when a REX prefix is present.
+let isCodeGenOnly = 1 in {
+let neverHasSideEffects = 1 in
+def MOV8rr_NOREX : I<0x88, MRMDestReg,
+                     (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
+                     "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", []>;
+let mayStore = 1 in
+def MOV8mr_NOREX : I<0x88, MRMDestMem,
+                     (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
+                     "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", []>;
+let mayLoad = 1,
+    canFoldAsLoad = 1, isReMaterializable = 1 in
+def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
+                     (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
+                     "mov{b}\t{$src, $dst|$dst, $src}  # NOREX", []>;
+}
+
+
+// Condition code ops, incl. set if equal/not equal/...
+let Defs = [EFLAGS], Uses = [AH], neverHasSideEffects = 1 in
+def SAHF     : I<0x9E, RawFrm, (outs),  (ins), "sahf", []>;  // flags = AH
+let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
+def LAHF     : I<0x9F, RawFrm, (outs),  (ins), "lahf", []>;  // AH = flags
+
+
+//===----------------------------------------------------------------------===//
+// Bit tests instructions: BT, BTS, BTR, BTC.
+
+let Defs = [EFLAGS] in {
+def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+               "bt{w}\t{$src2, $src1|$src1, $src2}",
+               [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>, OpSize, TB;
+def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+               "bt{l}\t{$src2, $src1|$src1, $src2}",
+               [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB;
+def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+               "bt{q}\t{$src2, $src1|$src1, $src2}",
+               [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
+
+// Unlike with the register+register form, the memory+register form of the
+// bt instruction does not ignore the high bits of the index. From ISel's
+// perspective, this is pretty bizarre. Make these instructions disassembly
+// only for now.
+
+def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+               "bt{w}\t{$src2, $src1|$src1, $src2}",
+//               [(X86bt (loadi16 addr:$src1), GR16:$src2),
+//                (implicit EFLAGS)]
+               []
+               >, OpSize, TB, Requires<[FastBTMem]>;
+def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+               "bt{l}\t{$src2, $src1|$src1, $src2}",
+//               [(X86bt (loadi32 addr:$src1), GR32:$src2),
+//                (implicit EFLAGS)]
+               []
+               >, TB, Requires<[FastBTMem]>;
+def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+               "bt{q}\t{$src2, $src1|$src1, $src2}",
+//               [(X86bt (loadi64 addr:$src1), GR64:$src2),
+//                (implicit EFLAGS)]
+                []
+                >, TB;
+
+def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+                "bt{w}\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>,
+                OpSize, TB;
+def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+                "bt{l}\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB;
+def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+                "bt{q}\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
+
+// Note that these instructions don't need FastBTMem because that
+// only applies when the other operand is in a register. When it's
+// an immediate, bt is still fast.
+def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+                "bt{w}\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2))
+                 ]>, OpSize, TB;
+def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+                "bt{l}\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2))
+                 ]>, TB;
+def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+                "bt{q}\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86bt (loadi64 addr:$src1),
+                                     i64immSExt8:$src2))]>, TB;
+
+
+def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+                "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTC32rr : I<0xBB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+                "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+                 "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+                "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+                "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+                 "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC16ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+                    "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTC32ri8 : Ii8<0xBA, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+                    "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64ri8 : RIi8<0xBA, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+                    "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+                    "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+                    "btc{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+                    "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+
+def BTR16rr : I<0xB3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+                "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTR32rr : I<0xB3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+                "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64rr : RI<0xB3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+                 "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+                "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+                "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+                 "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR16ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+                    "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTR32ri8 : Ii8<0xBA, MRM6r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+                    "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64ri8 : RIi8<0xBA, MRM6r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+                    "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+                    "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+                    "btr{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+                    "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+
+def BTS16rr : I<0xAB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+                "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTS32rr : I<0xAB, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+                "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64rr : RI<0xAB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+                 "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+                "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+                "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+                 "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS16ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR16:$src1, i16i8imm:$src2),
+                    "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTS32ri8 : Ii8<0xBA, MRM5r, (outs), (ins GR32:$src1, i32i8imm:$src2),
+                    "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64ri8 : RIi8<0xBA, MRM5r, (outs), (ins GR64:$src1, i64i8imm:$src2),
+                    "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
+                    "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB;
+def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32i8imm:$src2),
+                    "bts{l}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
+                    "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // Defs = [EFLAGS]
+
+
+//===----------------------------------------------------------------------===//
+// Atomic support
+//
+
+
+// Atomic swap. These are just normal xchg instructions. But since a memory
+// operand is referenced, the atomicity is ensured.
+let Constraints = "$val = $dst" in {
+def XCHG8rm  : I<0x86, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr),
+               "xchg{b}\t{$val, $ptr|$ptr, $val}",
+               [(set GR8:$dst, (atomic_swap_8 addr:$ptr, GR8:$val))]>;
+def XCHG16rm : I<0x87, MRMSrcMem, (outs GR16:$dst),(ins GR16:$val, i16mem:$ptr),
+               "xchg{w}\t{$val, $ptr|$ptr, $val}",
+               [(set GR16:$dst, (atomic_swap_16 addr:$ptr, GR16:$val))]>,
+                OpSize;
+def XCHG32rm : I<0x87, MRMSrcMem, (outs GR32:$dst),(ins GR32:$val, i32mem:$ptr),
+               "xchg{l}\t{$val, $ptr|$ptr, $val}",
+               [(set GR32:$dst, (atomic_swap_32 addr:$ptr, GR32:$val))]>;
+def XCHG64rm : RI<0x87, MRMSrcMem, (outs GR64:$dst),(ins GR64:$val,i64mem:$ptr),
+                  "xchg{q}\t{$val, $ptr|$ptr, $val}",
+                  [(set GR64:$dst, (atomic_swap_64 addr:$ptr, GR64:$val))]>;
+
+def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst), (ins GR8:$val, GR8:$src),
+                "xchg{b}\t{$val, $src|$src, $val}", []>;
+def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst), (ins GR16:$val, GR16:$src),
+                 "xchg{w}\t{$val, $src|$src, $val}", []>, OpSize;
+def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst), (ins GR32:$val, GR32:$src),
+                 "xchg{l}\t{$val, $src|$src, $val}", []>;
+def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
+                  "xchg{q}\t{$val, $src|$src, $val}", []>;
+}
+
+def XCHG16ar : I<0x90, AddRegFrm, (outs), (ins GR16:$src),
+                  "xchg{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+def XCHG32ar : I<0x90, AddRegFrm, (outs), (ins GR32:$src),
+                  "xchg{l}\t{$src, %eax|%eax, $src}", []>;
+def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
+                  "xchg{q}\t{$src, %rax|%rax, $src}", []>;
+
+
+
+def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
+                "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+                 "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def XADD32rr  : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+                 "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rr  : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+                   "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+let mayLoad = 1, mayStore = 1 in {
+def XADD8rm   : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
+                 "xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD16rm  : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+                 "xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def XADD32rm  : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                 "xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def XADD64rm  : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                   "xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+}
+
+def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
+                   "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+                    "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def CMPXCHG32rr  : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+                     "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rr  : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+                      "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+let mayLoad = 1, mayStore = 1 in {
+def CMPXCHG8rm   : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
+                     "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG16rm  : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+                     "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def CMPXCHG32rm  : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                     "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def CMPXCHG64rm  : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                      "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+}
+
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
+def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
+                  "cmpxchg8b\t$dst", []>, TB;
+
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
+def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
+                    "cmpxchg16b\t$dst", []>, TB;
+
+
+
+// Lock instruction prefix
+def LOCK_PREFIX : I<0xF0, RawFrm, (outs),  (ins), "lock", []>;
+
+// Rex64 instruction prefix
+def REX64_PREFIX : I<0x48, RawFrm, (outs),  (ins), "rex64", []>;
+
+// Data16 instruction prefix
+def DATA16_PREFIX : I<0x66, RawFrm, (outs),  (ins), "data16", []>;
+
+// Repeat string operation instruction prefixes
+// These uses the DF flag in the EFLAGS register to inc or dec ECX
+let Defs = [ECX], Uses = [ECX,EFLAGS] in {
+// Repeat (used with INS, OUTS, MOVS, LODS and STOS)
+def REP_PREFIX : I<0xF3, RawFrm, (outs),  (ins), "rep", []>;
+// Repeat while not equal (used with CMPS and SCAS)
+def REPNE_PREFIX : I<0xF2, RawFrm, (outs),  (ins), "repne", []>;
+}
+
+
+// String manipulation instructions
+def LODSB : I<0xAC, RawFrm, (outs), (ins), "lodsb", []>;
+def LODSW : I<0xAD, RawFrm, (outs), (ins), "lodsw", []>, OpSize;
+def LODSD : I<0xAD, RawFrm, (outs), (ins), "lods{l|d}", []>;
+def LODSQ : RI<0xAD, RawFrm, (outs), (ins), "lodsq", []>;
+
+def OUTSB : I<0x6E, RawFrm, (outs), (ins), "outsb", []>;
+def OUTSW : I<0x6F, RawFrm, (outs), (ins), "outsw", []>, OpSize;
+def OUTSD : I<0x6F, RawFrm, (outs), (ins), "outs{l|d}", []>;
+
+
+// Flag instructions
+def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", []>;
+def STC : I<0xF9, RawFrm, (outs), (ins), "stc", []>;
+def CLI : I<0xFA, RawFrm, (outs), (ins), "cli", []>;
+def STI : I<0xFB, RawFrm, (outs), (ins), "sti", []>;
+def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", []>;
+def STD : I<0xFD, RawFrm, (outs), (ins), "std", []>;
+def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", []>;
+
+def CLTS : I<0x06, RawFrm, (outs), (ins), "clts", []>, TB;
+
+// Table lookup instructions
+def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", []>;
+
+// ASCII Adjust After Addition
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AX Before Division
+// sets AL, AH and EFLAGS and uses AL and AH
+def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src),
+                 "aad\t$src", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AX After Multiply
+// sets AL, AH and EFLAGS and uses AL
+def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src),
+                 "aam\t$src", []>, Requires<[In32BitMode]>;
+
+// ASCII Adjust AL After Subtraction - sets
+// sets AL, AH and CF and AF of EFLAGS and uses AL and AF of EFLAGS
+def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", []>, Requires<[In32BitMode]>;
+
+// Decimal Adjust AL after Addition
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAA : I<0x27, RawFrm, (outs), (ins), "daa", []>, Requires<[In32BitMode]>;
+
+// Decimal Adjust AL after Subtraction
+// sets AL, CF and AF of EFLAGS and uses AL, CF and AF of EFLAGS
+def DAS : I<0x2F, RawFrm, (outs), (ins), "das", []>, Requires<[In32BitMode]>;
+
+// Check Array Index Against Bounds
+def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                   "bound\t{$src, $dst|$dst, $src}", []>, OpSize,
+                   Requires<[In32BitMode]>;
+def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                   "bound\t{$src, $dst|$dst, $src}", []>,
+                   Requires<[In32BitMode]>;
+
+// Adjust RPL Field of Segment Selector
+def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$src), (ins GR16:$dst),
+                 "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
+def ARPL16mr : I<0x63, MRMSrcMem, (outs GR16:$src), (ins i16mem:$dst),
+                 "arpl\t{$src, $dst|$dst, $src}", []>, Requires<[In32BitMode]>;
+
+//===----------------------------------------------------------------------===//
+// Subsystems.
+//===----------------------------------------------------------------------===//
+
+include "X86InstrArithmetic.td"
+include "X86InstrCMovSetCC.td"
+include "X86InstrExtension.td"
+include "X86InstrControl.td"
+include "X86InstrShiftRotate.td"
+
+// X87 Floating Point Stack.
+include "X86InstrFPStack.td"
+
+// SIMD support (SSE, MMX and AVX)
+include "X86InstrFragmentsSIMD.td"
+
+// FMA - Fused Multiply-Add support (requires FMA)
+include "X86InstrFMA.td"
+
+// SSE, MMX and 3DNow! vector support.
+include "X86InstrSSE.td"
+include "X86InstrMMX.td"
+include "X86Instr3DNow.td"
+
+include "X86InstrVMX.td"
+
+// System instructions.
+include "X86InstrSystem.td"
+
+// Compiler Pseudo Instructions and Pat Patterns
+include "X86InstrCompiler.td"
+
+//===----------------------------------------------------------------------===//
+// Assembler Mnemonic Aliases
+//===----------------------------------------------------------------------===//
+
+def : MnemonicAlias<"call", "calll">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"call", "callq">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"cbw",  "cbtw">;
+def : MnemonicAlias<"cwd",  "cwtd">;
+def : MnemonicAlias<"cdq", "cltd">;
+def : MnemonicAlias<"cwde", "cwtl">;
+def : MnemonicAlias<"cdqe", "cltq">;
+
+// lret maps to lretl, it is not ambiguous with lretq.
+def : MnemonicAlias<"lret", "lretl">;
+
+def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"loopz", "loope">;
+def : MnemonicAlias<"loopnz", "loopne">;
+
+def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popf", "popfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popfd",  "popfl">;
+
+// FIXME: This is wrong for "push reg".  "push %bx" should turn into pushw in
+// all modes.  However: "push (addr)" and "push $42" should default to
+// pushl/pushq depending on the current mode.  Similar for "pop %bx"
+def : MnemonicAlias<"push", "pushl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"push", "pushq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushfd", "pushfl">;
+
+def : MnemonicAlias<"repe", "rep">;
+def : MnemonicAlias<"repz", "rep">;
+def : MnemonicAlias<"repnz", "repne">;
+
+def : MnemonicAlias<"retl", "ret">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"retq", "ret">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"salb", "shlb">;
+def : MnemonicAlias<"salw", "shlw">;
+def : MnemonicAlias<"sall", "shll">;
+def : MnemonicAlias<"salq", "shlq">;
+
+def : MnemonicAlias<"smovb", "movsb">;
+def : MnemonicAlias<"smovw", "movsw">;
+def : MnemonicAlias<"smovl", "movsl">;
+def : MnemonicAlias<"smovq", "movsq">;
+
+def : MnemonicAlias<"ud2a", "ud2">;
+def : MnemonicAlias<"verrw", "verr">;
+
+// System instruction aliases.
+def : MnemonicAlias<"iret", "iretl">;
+def : MnemonicAlias<"sysret", "sysretl">;
+
+def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lidtl", "lidt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lidtq", "lidt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sgdtl", "sgdt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sgdtq", "sgdt">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sidtl", "sidt">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sidtq", "sidt">, Requires<[In64BitMode]>;
+
+
+// Floating point stack aliases.
+def : MnemonicAlias<"fcmovz",   "fcmove">;
+def : MnemonicAlias<"fcmova",   "fcmovnbe">;
+def : MnemonicAlias<"fcmovnae", "fcmovb">;
+def : MnemonicAlias<"fcmovna",  "fcmovbe">;
+def : MnemonicAlias<"fcmovae",  "fcmovnb">;
+def : MnemonicAlias<"fcomip",   "fcompi">;
+def : MnemonicAlias<"fildq",    "fildll">;
+def : MnemonicAlias<"fldcww",   "fldcw">;
+def : MnemonicAlias<"fnstcww", "fnstcw">;
+def : MnemonicAlias<"fnstsww", "fnstsw">;
+def : MnemonicAlias<"fucomip",  "fucompi">;
+def : MnemonicAlias<"fwait",    "wait">;
+
+
+class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond>
+  : MnemonicAlias<!strconcat(Prefix, OldCond, Suffix),
+                  !strconcat(Prefix, NewCond, Suffix)>;
+
+/// IntegerCondCodeMnemonicAlias - This multiclass defines a bunch of
+/// MnemonicAlias's that canonicalize the condition code in a mnemonic, for
+/// example "setz" -> "sete".
+multiclass IntegerCondCodeMnemonicAlias<string Prefix, string Suffix> {
+  def C   : CondCodeAlias<Prefix, Suffix, "c",   "b">;   // setc   -> setb
+  def Z   : CondCodeAlias<Prefix, Suffix, "z" ,  "e">;   // setz   -> sete
+  def NA  : CondCodeAlias<Prefix, Suffix, "na",  "be">;  // setna  -> setbe
+  def NB  : CondCodeAlias<Prefix, Suffix, "nb",  "ae">;  // setnb  -> setae
+  def NC  : CondCodeAlias<Prefix, Suffix, "nc",  "ae">;  // setnc  -> setae
+  def NG  : CondCodeAlias<Prefix, Suffix, "ng",  "le">;  // setng  -> setle
+  def NL  : CondCodeAlias<Prefix, Suffix, "nl",  "ge">;  // setnl  -> setge
+  def NZ  : CondCodeAlias<Prefix, Suffix, "nz",  "ne">;  // setnz  -> setne
+  def PE  : CondCodeAlias<Prefix, Suffix, "pe",  "p">;   // setpe  -> setp
+  def PO  : CondCodeAlias<Prefix, Suffix, "po",  "np">;  // setpo  -> setnp
+
+  def NAE : CondCodeAlias<Prefix, Suffix, "nae", "b">;   // setnae -> setb
+  def NBE : CondCodeAlias<Prefix, Suffix, "nbe", "a">;   // setnbe -> seta
+  def NGE : CondCodeAlias<Prefix, Suffix, "nge", "l">;   // setnge -> setl
+  def NLE : CondCodeAlias<Prefix, Suffix, "nle", "g">;   // setnle -> setg
+}
+
+// Aliases for set<CC>
+defm : IntegerCondCodeMnemonicAlias<"set", "">;
+// Aliases for j<CC>
+defm : IntegerCondCodeMnemonicAlias<"j", "">;
+// Aliases for cmov<CC>{w,l,q}
+defm : IntegerCondCodeMnemonicAlias<"cmov", "w">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "l">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "q">;
+
+
+//===----------------------------------------------------------------------===//
+// Assembler Instruction Aliases
+//===----------------------------------------------------------------------===//
+
+// aad/aam default to base 10 if no operand is specified.
+def : InstAlias<"aad", (AAD8i8 10)>;
+def : InstAlias<"aam", (AAM8i8 10)>;
+
+// Disambiguate the mem/imm form of bt-without-a-suffix as btl.
+def : InstAlias<"bt $imm, $mem", (BT32mi8 i32mem:$mem, i32i8imm:$imm)>;
+
+// clr aliases.
+def : InstAlias<"clrb $reg", (XOR8rr  GR8 :$reg, GR8 :$reg)>;
+def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)>;
+def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>;
+def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>;
+
+// div and idiv aliases for explicit A register.
+def : InstAlias<"divb $src, %al",  (DIV8r  GR8 :$src)>;
+def : InstAlias<"divw $src, %ax",  (DIV16r GR16:$src)>;
+def : InstAlias<"divl $src, %eax", (DIV32r GR32:$src)>;
+def : InstAlias<"divq $src, %rax", (DIV64r GR64:$src)>;
+def : InstAlias<"divb $src, %al",  (DIV8m  i8mem :$src)>;
+def : InstAlias<"divw $src, %ax",  (DIV16m i16mem:$src)>;
+def : InstAlias<"divl $src, %eax", (DIV32m i32mem:$src)>;
+def : InstAlias<"divq $src, %rax", (DIV64m i64mem:$src)>;
+def : InstAlias<"idivb $src, %al",  (IDIV8r  GR8 :$src)>;
+def : InstAlias<"idivw $src, %ax",  (IDIV16r GR16:$src)>;
+def : InstAlias<"idivl $src, %eax", (IDIV32r GR32:$src)>;
+def : InstAlias<"idivq $src, %rax", (IDIV64r GR64:$src)>;
+def : InstAlias<"idivb $src, %al",  (IDIV8m  i8mem :$src)>;
+def : InstAlias<"idivw $src, %ax",  (IDIV16m i16mem:$src)>;
+def : InstAlias<"idivl $src, %eax", (IDIV32m i32mem:$src)>;
+def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>;
+
+
+
+// Various unary fpstack operations default to operating on on ST1.
+// For example, "fxch" -> "fxch %st(1)"
+def : InstAlias<"faddp",        (ADD_FPrST0  ST1)>;
+def : InstAlias<"fsubp",        (SUBR_FPrST0 ST1)>;
+def : InstAlias<"fsubrp",       (SUB_FPrST0  ST1)>;
+def : InstAlias<"fmulp",        (MUL_FPrST0  ST1)>;
+def : InstAlias<"fdivp",        (DIVR_FPrST0 ST1)>;
+def : InstAlias<"fdivrp",       (DIV_FPrST0  ST1)>;
+def : InstAlias<"fxch",         (XCH_F       ST1)>;
+def : InstAlias<"fcomi",        (COM_FIr     ST1)>;
+def : InstAlias<"fcompi",       (COM_FIPr    ST1)>;
+def : InstAlias<"fucom",        (UCOM_Fr     ST1)>;
+def : InstAlias<"fucomp",       (UCOM_FPr    ST1)>;
+def : InstAlias<"fucomi",       (UCOM_FIr    ST1)>;
+def : InstAlias<"fucompi",      (UCOM_FIPr   ST1)>;
+
+// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op.
+// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)".  We also disambiguate
+// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
+// gas.
+multiclass FpUnaryAlias<string Mnemonic, Instruction Inst> {
+ def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"),    (Inst RST:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"), (Inst ST0)>;
+}
+
+defm : FpUnaryAlias<"fadd",   ADD_FST0r>;
+defm : FpUnaryAlias<"faddp",  ADD_FPrST0>;
+defm : FpUnaryAlias<"fsub",   SUB_FST0r>;
+defm : FpUnaryAlias<"fsubp",  SUBR_FPrST0>;
+defm : FpUnaryAlias<"fsubr",  SUBR_FST0r>;
+defm : FpUnaryAlias<"fsubrp", SUB_FPrST0>;
+defm : FpUnaryAlias<"fmul",   MUL_FST0r>;
+defm : FpUnaryAlias<"fmulp",  MUL_FPrST0>;
+defm : FpUnaryAlias<"fdiv",   DIV_FST0r>;
+defm : FpUnaryAlias<"fdivp",  DIVR_FPrST0>;
+defm : FpUnaryAlias<"fdivr",  DIVR_FST0r>;
+defm : FpUnaryAlias<"fdivrp", DIV_FPrST0>;
+defm : FpUnaryAlias<"fcomi",   COM_FIr>;
+defm : FpUnaryAlias<"fucomi",  UCOM_FIr>;
+defm : FpUnaryAlias<"fcompi",   COM_FIPr>;
+defm : FpUnaryAlias<"fucompi",  UCOM_FIPr>;
+
+
+// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
+// commute.  We also allow fdiv[r]p/fsubrp even though they don't commute,
+// solely because gas supports it.
+def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op)>;
+def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>;
+def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>;
+def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>;
+def : InstAlias<"fdivrp %st(0), $op", (DIV_FPrST0 RST:$op)>;
+
+// We accept "fnstsw %eax" even though it only writes %ax.
+def : InstAlias<"fnstsw %eax", (FNSTSW8r)>;
+def : InstAlias<"fnstsw %al" , (FNSTSW8r)>;
+def : InstAlias<"fnstsw"     , (FNSTSW8r)>;
+
+// lcall and ljmp aliases.  This seems to be an odd mapping in 64-bit mode, but
+// this is compatible with what GAS does.
+def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"ljmp $seg, $off",  (FARJMP32i  i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"lcall *$dst",      (FARCALL32m opaque48mem:$dst)>;
+def : InstAlias<"ljmp *$dst",       (FARJMP32m  opaque48mem:$dst)>;
+
+// "imul <imm>, B" is an alias for "imul <imm>, B, B".
+def : InstAlias<"imulw $imm, $r", (IMUL16rri  GR16:$r, GR16:$r, i16imm:$imm)>;
+def : InstAlias<"imulw $imm, $r", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm)>;
+def : InstAlias<"imull $imm, $r", (IMUL32rri  GR32:$r, GR32:$r, i32imm:$imm)>;
+def : InstAlias<"imull $imm, $r", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm)>;
+def : InstAlias<"imulq $imm, $r",(IMUL64rri32 GR64:$r, GR64:$r,i64i32imm:$imm)>;
+def : InstAlias<"imulq $imm, $r", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm)>;
+
+// inb %dx -> inb %al, %dx
+def : InstAlias<"inb %dx", (IN8rr)>;
+def : InstAlias<"inw %dx", (IN16rr)>;
+def : InstAlias<"inl %dx", (IN32rr)>;
+def : InstAlias<"inb $port", (IN8ri i8imm:$port)>;
+def : InstAlias<"inw $port", (IN16ri i8imm:$port)>;
+def : InstAlias<"inl $port", (IN32ri i8imm:$port)>;
+
+
+// jmp and call aliases for lcall and ljmp.  jmp $42,$5 -> ljmp
+def : InstAlias<"call $seg, $off",  (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmp $seg, $off",   (FARJMP32i  i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"callw $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpw $seg, $off",  (FARJMP16i  i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"calll $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpl $seg, $off",  (FARJMP32i  i32imm:$off, i16imm:$seg)>;
+
+// Force mov without a suffix with a segment and mem to prefer the 'l' form of
+// the move.  All segment/mem forms are equivalent, this has the shortest
+// encoding.
+def : InstAlias<"mov $mem, $seg", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem)>;
+def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>;
+
+// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
+def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>;
+
+// Match 'movq GR64, MMX' as an alias for movd.
+def : InstAlias<"movq $src, $dst", (MMX_MOVD64to64rr VR64:$dst, GR64:$src)>;
+def : InstAlias<"movq $src, $dst", (MMX_MOVD64from64rr GR64:$dst, VR64:$src)>;
+
+// movsd with no operands (as opposed to the SSE scalar move of a double) is an
+// alias for movsl. (as in rep; movsd)
+def : InstAlias<"movsd", (MOVSD)>;
+
+// movsx aliases
+def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src)>;
+
+// movzx aliases
+def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src)>;
+// Note: No GR32->GR64 movzx form.
+
+// outb %dx -> outb %al, %dx
+def : InstAlias<"outb %dx", (OUT8rr)>;
+def : InstAlias<"outw %dx", (OUT16rr)>;
+def : InstAlias<"outl %dx", (OUT32rr)>;
+def : InstAlias<"outb $port", (OUT8ir i8imm:$port)>;
+def : InstAlias<"outw $port", (OUT16ir i8imm:$port)>;
+def : InstAlias<"outl $port", (OUT32ir i8imm:$port)>;
+
+// 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
+// effect (both store to a 16-bit mem).  Force to sldtw to avoid ambiguity
+// errors, since its encoding is the most compact.
+def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem)>;
+
+// shld/shrd op,op -> shld op, op, 1
+def : InstAlias<"shldw $r1, $r2", (SHLD16rri8 GR16:$r1, GR16:$r2, 1)>;
+def : InstAlias<"shldl $r1, $r2", (SHLD32rri8 GR32:$r1, GR32:$r2, 1)>;
+def : InstAlias<"shldq $r1, $r2", (SHLD64rri8 GR64:$r1, GR64:$r2, 1)>;
+def : InstAlias<"shrdw $r1, $r2", (SHRD16rri8 GR16:$r1, GR16:$r2, 1)>;
+def : InstAlias<"shrdl $r1, $r2", (SHRD32rri8 GR32:$r1, GR32:$r2, 1)>;
+def : InstAlias<"shrdq $r1, $r2", (SHRD64rri8 GR64:$r1, GR64:$r2, 1)>;
+
+def : InstAlias<"shldw $mem, $reg", (SHLD16mri8 i16mem:$mem, GR16:$reg, 1)>;
+def : InstAlias<"shldl $mem, $reg", (SHLD32mri8 i32mem:$mem, GR32:$reg, 1)>;
+def : InstAlias<"shldq $mem, $reg", (SHLD64mri8 i64mem:$mem, GR64:$reg, 1)>;
+def : InstAlias<"shrdw $mem, $reg", (SHRD16mri8 i16mem:$mem, GR16:$reg, 1)>;
+def : InstAlias<"shrdl $mem, $reg", (SHRD32mri8 i32mem:$mem, GR32:$reg, 1)>;
+def : InstAlias<"shrdq $mem, $reg", (SHRD64mri8 i64mem:$mem, GR64:$reg, 1)>;
+
+/*  FIXME: This is disabled because the asm matcher is currently incapable of
+ *  matching a fixed immediate like $1.
+// "shl X, $1" is an alias for "shl X".
+multiclass ShiftRotateByOneAlias<string Mnemonic, string Opc> {
+ def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "8r1")) GR8:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "16r1")) GR16:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "32r1")) GR32:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "64r1")) GR64:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "8m1")) i8mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "16m1")) i16mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "32m1")) i32mem:$op)>;
+ def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
+                 (!cast<Instruction>(!strconcat(Opc, "64m1")) i64mem:$op)>;
+}
+
+defm : ShiftRotateByOneAlias<"rcl", "RCL">;
+defm : ShiftRotateByOneAlias<"rcr", "RCR">;
+defm : ShiftRotateByOneAlias<"rol", "ROL">;
+defm : ShiftRotateByOneAlias<"ror", "ROR">;
+FIXME */
+
+// test: We accept "testX <reg>, <mem>" and "testX <mem>, <reg>" as synonyms.
+def : InstAlias<"testb $val, $mem", (TEST8rm  GR8 :$val, i8mem :$mem)>;
+def : InstAlias<"testw $val, $mem", (TEST16rm GR16:$val, i16mem:$mem)>;
+def : InstAlias<"testl $val, $mem", (TEST32rm GR32:$val, i32mem:$mem)>;
+def : InstAlias<"testq $val, $mem", (TEST64rm GR64:$val, i64mem:$mem)>;
+
+// xchg: We accept "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as synonyms.
+def : InstAlias<"xchgb $mem, $val", (XCHG8rm  GR8 :$val, i8mem :$mem)>;
+def : InstAlias<"xchgw $mem, $val", (XCHG16rm GR16:$val, i16mem:$mem)>;
+def : InstAlias<"xchgl $mem, $val", (XCHG32rm GR32:$val, i32mem:$mem)>;
+def : InstAlias<"xchgq $mem, $val", (XCHG64rm GR64:$val, i64mem:$mem)>;
diff --git a/final/lib/Target/X86/X86InstrMMX.td b/final/lib/Target/X86/X86InstrMMX.td
new file mode 100644
index 00000000000..bb2165a8a04
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrMMX.td
@@ -0,0 +1,454 @@
+//====- X86InstrMMX.td - Describe the MMX Instruction Set --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 MMX instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+// All instructions that use MMX should be in this file, even if they also use
+// SSE.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MMX Multiclasses
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+  // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
+  // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
+  multiclass MMXI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+                               bit Commutable = 0> {
+    def irr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+                 (ins VR64:$src1, VR64:$src2),
+                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                 [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]> {
+      let isCommutable = Commutable;
+    }
+    def irm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+                 (ins VR64:$src1, i64mem:$src2),
+                 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                 [(set VR64:$dst, (IntId VR64:$src1,
+                                   (bitconvert (load_mmx addr:$src2))))]>;
+  }
+
+  multiclass MMXI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+                                string OpcodeStr, Intrinsic IntId,
+                                Intrinsic IntId2> {
+    def rr : MMXI<opc, MRMSrcReg, (outs VR64:$dst),
+                                  (ins VR64:$src1, VR64:$src2),
+                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                  [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2))]>;
+    def rm : MMXI<opc, MRMSrcMem, (outs VR64:$dst),
+                                  (ins VR64:$src1, i64mem:$src2),
+                  !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+                  [(set VR64:$dst, (IntId VR64:$src1,
+                                    (bitconvert (load_mmx addr:$src2))))]>;
+    def ri : MMXIi8<opc2, ImmForm, (outs VR64:$dst),
+                                   (ins VR64:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           [(set VR64:$dst, (IntId2 VR64:$src1, (i32 imm:$src2)))]>;
+  }
+}
+
+/// Unary MMX instructions requiring SSSE3.
+multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
+                               Intrinsic IntId64> {
+  def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                   [(set VR64:$dst, (IntId64 VR64:$src))]>;
+
+  def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                   [(set VR64:$dst,
+                     (IntId64 (bitconvert (memopmmx addr:$src))))]>;
+}
+
+/// Binary MMX instructions requiring SSSE3.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
+                             Intrinsic IntId64> {
+  let isCommutable = 0 in
+  def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
+       (ins VR64:$src1, VR64:$src2),
+        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+       [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
+  def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
+       (ins VR64:$src1, i64mem:$src2),
+        !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+       [(set VR64:$dst,
+         (IntId64 VR64:$src1,
+          (bitconvert (memopmmx addr:$src2))))]>;
+}
+}
+
+/// PALIGN MMX instructions (require SSSE3).
+multiclass ssse3_palign_mm<string asm, Intrinsic IntId> {
+  def R64irr  : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+      (ins VR64:$src1, VR64:$src2, i8imm:$src3),
+      !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 
+      [(set VR64:$dst, (IntId VR64:$src1, VR64:$src2, (i8 imm:$src3)))]>;
+  def R64irm  : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+      (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
+      !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+      [(set VR64:$dst, (IntId VR64:$src1,
+                       (bitconvert (load_mmx addr:$src2)), (i8 imm:$src3)))]>;
+}
+
+multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                         Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+                         string asm, Domain d> {
+  def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+                        [(set DstRC:$dst, (Int SrcRC:$src))], d>;
+  def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+                        [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
+                    RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+                    PatFrag ld_frag, string asm, Domain d> {
+  def irr : PI<opc, MRMSrcReg, (outs DstRC:$dst),(ins DstRC:$src1, SrcRC:$src2),
+              asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
+  def irm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
+                   (ins DstRC:$src1, x86memop:$src2), asm,
+              [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
+}
+
+//===----------------------------------------------------------------------===//
+// MMX EMMS Instruction
+//===----------------------------------------------------------------------===//
+
+def MMX_EMMS  : MMXI<0x77, RawFrm, (outs), (ins), "emms",
+                     [(int_x86_mmx_emms)]>;
+
+//===----------------------------------------------------------------------===//
+// MMX Scalar Instructions
+//===----------------------------------------------------------------------===//
+
+// Data Transfer Instructions
+def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
+                        "movd\t{$src, $dst|$dst, $src}",
+                        [(set VR64:$dst, 
+                         (x86mmx (scalar_to_vector GR32:$src)))]>;
+let canFoldAsLoad = 1 in
+def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src),
+                        "movd\t{$src, $dst|$dst, $src}",
+              [(set VR64:$dst,
+               (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>;
+let mayStore = 1 in
+def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
+                        "movd\t{$src, $dst|$dst, $src}", []>;
+def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),
+                        "movd\t{$src, $dst|$dst, $src}", []>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+                             "movd\t{$src, $dst|$dst, $src}",
+                             []>;
+
+// These are 64 bit moves, but since the OS X assembler doesn't
+// recognize a register-register movq, we write them as
+// movd.
+def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
+                               (outs GR64:$dst), (ins VR64:$src),
+                               "movd\t{$src, $dst|$dst, $src}", 
+                             [(set GR64:$dst,
+                              (bitconvert VR64:$src))]>;
+def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+                             "movd\t{$src, $dst|$dst, $src}",
+                             [(set VR64:$dst,
+                              (bitconvert GR64:$src))]>;
+let neverHasSideEffects = 1 in
+def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
+                        "movq\t{$src, $dst|$dst, $src}", []>;
+let canFoldAsLoad = 1 in
+def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                        [(set VR64:$dst, (load_mmx addr:$src))]>;
+def MMX_MOVQ64mr : MMXI<0x7F, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                        [(store (x86mmx VR64:$src), addr:$dst)]>;
+
+def MMX_MOVDQ2Qrr : SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
+                          "movdq2q\t{$src, $dst|$dst, $src}",
+                          [(set VR64:$dst,
+                            (x86mmx (bitconvert
+                            (i64 (vector_extract (v2i64 VR128:$src),
+                                  (iPTR 0))))))]>;
+
+def MMX_MOVQ2DQrr : SSDIi8<0xD6, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
+                           "movq2dq\t{$src, $dst|$dst, $src}",
+          [(set VR128:$dst,
+            (v2i64 (scalar_to_vector
+                              (i64 (bitconvert (x86mmx VR64:$src))))))]>;
+
+let neverHasSideEffects = 1 in
+def MMX_MOVQ2FR64rr: SSDIi8<0xD6, MRMSrcReg, (outs FR64:$dst), (ins VR64:$src),
+                           "movq2dq\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_MOVFR642Qrr: SDIi8<0xD6, MRMSrcReg, (outs VR64:$dst), (ins FR64:$src),
+                           "movdq2q\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_MOVNTQmr  : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
+                         "movntq\t{$src, $dst|$dst, $src}",
+                         [(int_x86_mmx_movnt_dq addr:$dst, VR64:$src)]>;
+
+let AddedComplexity = 15 in
+// movd to MMX register zero-extends
+def MMX_MOVZDI2PDIrr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src),
+                             "movd\t{$src, $dst|$dst, $src}",
+              [(set VR64:$dst,
+                    (x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))))]>;
+let AddedComplexity = 20 in
+def MMX_MOVZDI2PDIrm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst),
+                           (ins i32mem:$src),
+                             "movd\t{$src, $dst|$dst, $src}",
+          [(set VR64:$dst,
+                (x86mmx (X86vzmovl (x86mmx
+                                   (scalar_to_vector (loadi32 addr:$src))))))]>;
+
+// Arithmetic Instructions
+defm MMX_PABSB : SS3I_unop_rm_int_mm<0x1C, "pabsb", int_x86_ssse3_pabs_b>;
+defm MMX_PABSW : SS3I_unop_rm_int_mm<0x1D, "pabsw", int_x86_ssse3_pabs_w>;
+defm MMX_PABSD : SS3I_unop_rm_int_mm<0x1E, "pabsd", int_x86_ssse3_pabs_d>;
+// -- Addition
+defm MMX_PADDB : MMXI_binop_rm_int<0xFC, "paddb", int_x86_mmx_padd_b, 1>;
+defm MMX_PADDW : MMXI_binop_rm_int<0xFD, "paddw", int_x86_mmx_padd_w, 1>;
+defm MMX_PADDD : MMXI_binop_rm_int<0xFE, "paddd", int_x86_mmx_padd_d, 1>;
+defm MMX_PADDQ : MMXI_binop_rm_int<0xD4, "paddq", int_x86_mmx_padd_q, 1>;
+defm MMX_PADDSB  : MMXI_binop_rm_int<0xEC, "paddsb" , int_x86_mmx_padds_b, 1>;
+defm MMX_PADDSW  : MMXI_binop_rm_int<0xED, "paddsw" , int_x86_mmx_padds_w, 1>;
+
+defm MMX_PADDUSB : MMXI_binop_rm_int<0xDC, "paddusb", int_x86_mmx_paddus_b, 1>;
+defm MMX_PADDUSW : MMXI_binop_rm_int<0xDD, "paddusw", int_x86_mmx_paddus_w, 1>;
+
+defm MMX_PHADDW  : SS3I_binop_rm_int_mm<0x01, "phaddw", int_x86_ssse3_phadd_w>;
+defm MMX_PHADD   : SS3I_binop_rm_int_mm<0x02, "phaddd", int_x86_ssse3_phadd_d>;
+defm MMX_PHADDSW : SS3I_binop_rm_int_mm<0x03, "phaddsw",int_x86_ssse3_phadd_sw>;
+
+
+// -- Subtraction
+defm MMX_PSUBB : MMXI_binop_rm_int<0xF8, "psubb", int_x86_mmx_psub_b>;
+defm MMX_PSUBW : MMXI_binop_rm_int<0xF9, "psubw", int_x86_mmx_psub_w>;
+defm MMX_PSUBD : MMXI_binop_rm_int<0xFA, "psubd", int_x86_mmx_psub_d>;
+defm MMX_PSUBQ : MMXI_binop_rm_int<0xFB, "psubq", int_x86_mmx_psub_q>;
+
+defm MMX_PSUBSB  : MMXI_binop_rm_int<0xE8, "psubsb" , int_x86_mmx_psubs_b>;
+defm MMX_PSUBSW  : MMXI_binop_rm_int<0xE9, "psubsw" , int_x86_mmx_psubs_w>;
+
+defm MMX_PSUBUSB : MMXI_binop_rm_int<0xD8, "psubusb", int_x86_mmx_psubus_b>;
+defm MMX_PSUBUSW : MMXI_binop_rm_int<0xD9, "psubusw", int_x86_mmx_psubus_w>;
+
+defm MMX_PHSUBW  : SS3I_binop_rm_int_mm<0x05, "phsubw", int_x86_ssse3_phsub_w>;
+defm MMX_PHSUBD  : SS3I_binop_rm_int_mm<0x06, "phsubd", int_x86_ssse3_phsub_d>;
+defm MMX_PHSUBSW : SS3I_binop_rm_int_mm<0x07, "phsubsw",int_x86_ssse3_phsub_sw>;
+
+// -- Multiplication
+defm MMX_PMULLW  : MMXI_binop_rm_int<0xD5, "pmullw", int_x86_mmx_pmull_w, 1>;
+
+defm MMX_PMULHW  : MMXI_binop_rm_int<0xE5, "pmulhw",  int_x86_mmx_pmulh_w,  1>;
+defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w, 1>;
+defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq, 1>;
+let isCommutable = 1 in
+defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
+                                     int_x86_ssse3_pmul_hr_sw>;
+
+// -- Miscellanea
+defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd, 1>;
+
+defm MMX_PMADDUBSW : SS3I_binop_rm_int_mm<0x04, "pmaddubsw",
+                                     int_x86_ssse3_pmadd_ub_sw>;
+defm MMX_PAVGB   : MMXI_binop_rm_int<0xE0, "pavgb", int_x86_mmx_pavg_b, 1>;
+defm MMX_PAVGW   : MMXI_binop_rm_int<0xE3, "pavgw", int_x86_mmx_pavg_w, 1>;
+
+defm MMX_PMINUB  : MMXI_binop_rm_int<0xDA, "pminub", int_x86_mmx_pminu_b, 1>;
+defm MMX_PMINSW  : MMXI_binop_rm_int<0xEA, "pminsw", int_x86_mmx_pmins_w, 1>;
+
+defm MMX_PMAXUB  : MMXI_binop_rm_int<0xDE, "pmaxub", int_x86_mmx_pmaxu_b, 1>;
+defm MMX_PMAXSW  : MMXI_binop_rm_int<0xEE, "pmaxsw", int_x86_mmx_pmaxs_w, 1>;
+
+defm MMX_PSADBW  : MMXI_binop_rm_int<0xF6, "psadbw", int_x86_mmx_psad_bw, 1>;
+
+defm MMX_PSIGNB :  SS3I_binop_rm_int_mm<0x08, "psignb", int_x86_ssse3_psign_b>;
+defm MMX_PSIGNW :  SS3I_binop_rm_int_mm<0x09, "psignw", int_x86_ssse3_psign_w>;
+defm MMX_PSIGND :  SS3I_binop_rm_int_mm<0x0A, "psignd", int_x86_ssse3_psign_d>;
+let Constraints = "$src1 = $dst" in
+  defm MMX_PALIGN : ssse3_palign_mm<"palignr", int_x86_mmx_palignr_b>;
+
+// Logical Instructions
+defm MMX_PAND : MMXI_binop_rm_int<0xDB, "pand", int_x86_mmx_pand, 1>;
+defm MMX_POR  : MMXI_binop_rm_int<0xEB, "por" , int_x86_mmx_por,  1>;
+defm MMX_PXOR : MMXI_binop_rm_int<0xEF, "pxor", int_x86_mmx_pxor, 1>;
+defm MMX_PANDN : MMXI_binop_rm_int<0xDF, "pandn", int_x86_mmx_pandn, 1>;
+
+// Shift Instructions
+defm MMX_PSRLW : MMXI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+                                    int_x86_mmx_psrl_w, int_x86_mmx_psrli_w>;
+defm MMX_PSRLD : MMXI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+                                    int_x86_mmx_psrl_d, int_x86_mmx_psrli_d>;
+defm MMX_PSRLQ : MMXI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+                                    int_x86_mmx_psrl_q, int_x86_mmx_psrli_q>;
+
+defm MMX_PSLLW : MMXI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+                                    int_x86_mmx_psll_w, int_x86_mmx_pslli_w>;
+defm MMX_PSLLD : MMXI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+                                    int_x86_mmx_psll_d, int_x86_mmx_pslli_d>;
+defm MMX_PSLLQ : MMXI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+                                    int_x86_mmx_psll_q, int_x86_mmx_pslli_q>;
+
+defm MMX_PSRAW : MMXI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+                                    int_x86_mmx_psra_w, int_x86_mmx_psrai_w>;
+defm MMX_PSRAD : MMXI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+                                    int_x86_mmx_psra_d, int_x86_mmx_psrai_d>;
+
+// Comparison Instructions
+defm MMX_PCMPEQB : MMXI_binop_rm_int<0x74, "pcmpeqb", int_x86_mmx_pcmpeq_b>;
+defm MMX_PCMPEQW : MMXI_binop_rm_int<0x75, "pcmpeqw", int_x86_mmx_pcmpeq_w>;
+defm MMX_PCMPEQD : MMXI_binop_rm_int<0x76, "pcmpeqd", int_x86_mmx_pcmpeq_d>;
+
+defm MMX_PCMPGTB : MMXI_binop_rm_int<0x64, "pcmpgtb", int_x86_mmx_pcmpgt_b>;
+defm MMX_PCMPGTW : MMXI_binop_rm_int<0x65, "pcmpgtw", int_x86_mmx_pcmpgt_w>;
+defm MMX_PCMPGTD : MMXI_binop_rm_int<0x66, "pcmpgtd", int_x86_mmx_pcmpgt_d>;
+
+// -- Unpack Instructions
+defm MMX_PUNPCKHBW : MMXI_binop_rm_int<0x68, "punpckhbw", 
+                                       int_x86_mmx_punpckhbw>;
+defm MMX_PUNPCKHWD : MMXI_binop_rm_int<0x69, "punpckhwd", 
+                                       int_x86_mmx_punpckhwd>;
+defm MMX_PUNPCKHDQ : MMXI_binop_rm_int<0x6A, "punpckhdq", 
+                                       int_x86_mmx_punpckhdq>;
+defm MMX_PUNPCKLBW : MMXI_binop_rm_int<0x60, "punpcklbw", 
+                                       int_x86_mmx_punpcklbw>;
+defm MMX_PUNPCKLWD : MMXI_binop_rm_int<0x61, "punpcklwd", 
+                                       int_x86_mmx_punpcklwd>;
+defm MMX_PUNPCKLDQ : MMXI_binop_rm_int<0x62, "punpckldq",
+                                       int_x86_mmx_punpckldq>;
+
+// -- Pack Instructions
+defm MMX_PACKSSWB : MMXI_binop_rm_int<0x63, "packsswb", int_x86_mmx_packsswb>;
+defm MMX_PACKSSDW : MMXI_binop_rm_int<0x6B, "packssdw", int_x86_mmx_packssdw>;
+defm MMX_PACKUSWB : MMXI_binop_rm_int<0x67, "packuswb", int_x86_mmx_packuswb>;
+
+// -- Shuffle Instructions
+defm MMX_PSHUFB : SS3I_binop_rm_int_mm<0x00, "pshufb", int_x86_ssse3_pshuf_b>;
+
+def MMX_PSHUFWri : MMXIi8<0x70, MRMSrcReg,
+                          (outs VR64:$dst), (ins VR64:$src1, i8imm:$src2),
+                          "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                          [(set VR64:$dst,
+                             (int_x86_sse_pshuf_w VR64:$src1, imm:$src2))]>;
+def MMX_PSHUFWmi : MMXIi8<0x70, MRMSrcMem,
+                          (outs VR64:$dst), (ins i64mem:$src1, i8imm:$src2),
+                          "pshufw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                          [(set VR64:$dst,
+                             (int_x86_sse_pshuf_w (load_mmx addr:$src1),
+                                                   imm:$src2))]>;
+
+
+
+
+
+// -- Conversion Instructions
+defm MMX_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
+                      f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
+                      SSEPackedSingle>, TB;
+defm MMX_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
+                      f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
+                      SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
+                       f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
+                       SSEPackedSingle>, TB;
+defm MMX_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
+                       f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
+                       SSEPackedDouble>, TB, OpSize;
+defm MMX_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
+                         i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
+                         SSEPackedDouble>, TB, OpSize;
+let Constraints = "$src1 = $dst" in {
+  defm MMX_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
+                         int_x86_sse_cvtpi2ps,
+                         i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedSingle>, TB;
+}
+
+// Extract / Insert
+def MMX_PEXTRWirri: MMXIi8<0xC5, MRMSrcReg,
+                           (outs GR32:$dst), (ins VR64:$src1, i32i8imm:$src2),
+                           "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                           [(set GR32:$dst, (int_x86_mmx_pextr_w VR64:$src1,
+                                             (iPTR imm:$src2)))]>;
+let Constraints = "$src1 = $dst" in {
+  def MMX_PINSRWirri : MMXIi8<0xC4, MRMSrcReg,
+                      (outs VR64:$dst), 
+                      (ins VR64:$src1, GR32:$src2, i32i8imm:$src3),
+                      "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                      [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+                                        GR32:$src2, (iPTR imm:$src3)))]>;
+
+  def MMX_PINSRWirmi : MMXIi8<0xC4, MRMSrcMem,
+                     (outs VR64:$dst),
+                     (ins VR64:$src1, i16mem:$src2, i32i8imm:$src3),
+                     "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(set VR64:$dst, (int_x86_mmx_pinsr_w VR64:$src1,
+                                         (i32 (anyext (loadi16 addr:$src2))),
+                                       (iPTR imm:$src3)))]>;
+}
+
+// Mask creation
+def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
+                          "pmovmskb\t{$src, $dst|$dst, $src}",
+                          [(set GR32:$dst, 
+                                (int_x86_mmx_pmovmskb VR64:$src))]>;
+
+
+// MMX to XMM for vector types
+def MMX_X86movq2dq : SDNode<"X86ISD::MOVQ2DQ", SDTypeProfile<1, 1,
+                            [SDTCisVT<0, v2i64>, SDTCisVT<1, x86mmx>]>>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq VR64:$src)),
+          (v2i64 (MMX_MOVQ2DQrr VR64:$src))>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq (load_mmx addr:$src))),
+          (v2i64 (MOVQI2PQIrm addr:$src))>;
+
+def : Pat<(v2i64 (MMX_X86movq2dq 
+                    (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
+          (v2i64 (MOVDI2PDIrm addr:$src))>;
+
+// Low word of XMM to MMX.
+def MMX_X86movdq2q : SDNode<"X86ISD::MOVDQ2Q", SDTypeProfile<1, 1,
+                            [SDTCisVT<0, x86mmx>, SDTCisVT<1, v2i64>]>>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q VR128:$src)),
+          (x86mmx (MMX_MOVDQ2Qrr VR128:$src))>;
+
+def : Pat<(x86mmx (MMX_X86movdq2q (loadv2i64 addr:$src))),
+          (x86mmx (MMX_MOVQ64rm addr:$src))>;
+
+// Misc.
+let Uses = [EDI] in
+def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+                        "maskmovq\t{$mask, $src|$src, $mask}",
+                        [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>;
+let Uses = [RDI] in
+def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
+                           "maskmovq\t{$mask, $src|$src, $mask}",
+                           [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
+
+// 64-bit bit convert.
+def : Pat<(x86mmx (bitconvert (i64 GR64:$src))),
+          (MMX_MOVD64to64rr GR64:$src)>;
+def : Pat<(i64 (bitconvert (x86mmx VR64:$src))),
+          (MMX_MOVD64from64rr VR64:$src)>;
+def : Pat<(f64 (bitconvert (x86mmx VR64:$src))),
+          (MMX_MOVQ2FR64rr VR64:$src)>;
+def : Pat<(x86mmx (bitconvert (f64 FR64:$src))),
+          (MMX_MOVFR642Qrr FR64:$src)>;
+
+
diff --git a/final/lib/Target/X86/X86InstrSSE.td b/final/lib/Target/X86/X86InstrSSE.td
new file mode 100644
index 00000000000..45e9051f40b
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrSSE.td
@@ -0,0 +1,5881 @@
+//====- X86InstrSSE.td - Describe the X86 Instruction Set --*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 SSE instruction set, defining the instructions,
+// and properties of the instructions which are needed for code generation,
+// machine code emission, and analysis.
+//
+//===----------------------------------------------------------------------===//
+
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 Instructions Classes
+//===----------------------------------------------------------------------===//
+
+/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
+multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           RegisterClass RC, X86MemOperand x86memop,
+                           bit Is2Addr = 1> {
+  let isCommutable = 1 in {
+    def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>;
+  }
+  def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>;
+}
+
+/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
+multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                             string asm, string SSEVer, string FPSizeStr,
+                             Operand memopr, ComplexPattern mem_cpat,
+                             bit Is2Addr = 1> {
+  def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (!cast<Intrinsic>(
+                 !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
+             RC:$src1, RC:$src2))]>;
+  def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
+       !if(Is2Addr,
+           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
+                                          SSEVer, "_", OpcodeStr, FPSizeStr))
+             RC:$src1, mem_cpat:$src2))]>;
+}
+
+/// sse12_fp_packed - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           RegisterClass RC, ValueType vt,
+                           X86MemOperand x86memop, PatFrag mem_frag,
+                           Domain d, bit Is2Addr = 1> {
+  let isCommutable = 1 in
+    def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>;
+  let mayLoad = 1 in
+    def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], d>;
+}
+
+/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
+                                      string OpcodeStr, X86MemOperand x86memop,
+                                      list<dag> pat_rr, list<dag> pat_rm,
+                                      bit Is2Addr = 1> {
+  let isCommutable = 1 in
+    def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       pat_rr, d>;
+  def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       pat_rm, d>;
+}
+
+/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
+multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                           string asm, string SSEVer, string FPSizeStr,
+                           X86MemOperand x86memop, PatFrag mem_frag,
+                           Domain d, bit Is2Addr = 1> {
+  def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+           [(set RC:$dst, (!cast<Intrinsic>(
+                     !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
+                 RC:$src1, RC:$src2))], d>;
+  def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
+       !if(Is2Addr,
+           !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (!cast<Intrinsic>(
+                     !strconcat("int_x86_", SSEVer, "_", OpcodeStr, FPSizeStr))
+             RC:$src1, (mem_frag addr:$src2)))], d>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Instructions
+//===----------------------------------------------------------------------===//
+
+class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> :
+      SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
+      [(set (vt VR128:$dst), (movl VR128:$src1, (scalar_to_vector RC:$src2)))]>;
+
+// Loading from memory automatically zeroing upper bits.
+class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
+                    PatFrag mem_pat, string OpcodeStr> :
+      SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+         !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                        [(set RC:$dst, (mem_pat addr:$src))]>;
+
+// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
+// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
+// is used instead. Register-to-register movss/movsd is not modeled as an
+// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
+// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
+let isAsmParserOnly = 1 in {
+  def VMOVSSrr : sse12_move_rr<FR32, v4f32,
+                  "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
+  def VMOVSDrr : sse12_move_rr<FR64, v2f64,
+                  "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
+
+  let canFoldAsLoad = 1, isReMaterializable = 1 in {
+    def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
+
+    let AddedComplexity = 20 in
+      def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
+  }
+}
+
+let Constraints = "$src1 = $dst" in {
+  def MOVSSrr : sse12_move_rr<FR32, v4f32,
+                          "movss\t{$src2, $dst|$dst, $src2}">, XS;
+  def MOVSDrr : sse12_move_rr<FR64, v2f64,
+                          "movsd\t{$src2, $dst|$dst, $src2}">, XD;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+  def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
+
+  let AddedComplexity = 20 in
+    def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
+}
+
+let AddedComplexity = 15 in {
+// Extract the low 32-bit value from one vector and insert it into another.
+def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
+          (MOVSSrr (v4f32 VR128:$src1),
+                   (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+// Extract the low 64-bit value from one vector and insert it into another.
+def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
+          (MOVSDrr (v2f64 VR128:$src1),
+                   (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+}
+
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
+          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
+          (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
+          (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
+
+let AddedComplexity = 20 in {
+// MOVSSrm zeros the high parts of the register; represent this
+// with SUBREG_TO_REG.
+def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+          (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+          (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+          (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+// MOVSDrm zeros the high parts of the register; represent this
+// with SUBREG_TO_REG.
+def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzload addr:$src)),
+          (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+}
+
+// Store scalar value to memory.
+def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+                  "movss\t{$src, $dst|$dst, $src}",
+                  [(store FR32:$src, addr:$dst)]>;
+def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+                  "movsd\t{$src, $dst|$dst, $src}",
+                  [(store FR64:$src, addr:$dst)]>;
+
+let isAsmParserOnly = 1 in {
+def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+                  "movss\t{$src, $dst|$dst, $src}",
+                  [(store FR32:$src, addr:$dst)]>, XS, VEX;
+def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+                  "movsd\t{$src, $dst|$dst, $src}",
+                  [(store FR64:$src, addr:$dst)]>, XD, VEX;
+}
+
+// Extract and store.
+def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+                 addr:$dst),
+          (MOVSSmr addr:$dst,
+                   (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+                 addr:$dst),
+          (MOVSDmr addr:$dst,
+                   (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+// Move Aligned/Unaligned floating point values
+multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
+                            X86MemOperand x86memop, PatFrag ld_frag,
+                            string asm, Domain d,
+                            bit IsReMaterializable = 1> {
+let neverHasSideEffects = 1 in
+  def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+              !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>;
+let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
+  def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+                   [(set RC:$dst, (ld_frag addr:$src))], d>;
+}
+
+let isAsmParserOnly = 1 in {
+defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+                              "movaps", SSEPackedSingle>, VEX;
+defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+                              "movapd", SSEPackedDouble>, OpSize, VEX;
+defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+                              "movups", SSEPackedSingle>, VEX;
+defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+                              "movupd", SSEPackedDouble, 0>, OpSize, VEX;
+
+defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
+                              "movaps", SSEPackedSingle>, VEX;
+defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
+                              "movapd", SSEPackedDouble>, OpSize, VEX;
+defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
+                              "movups", SSEPackedSingle>, VEX;
+defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
+                              "movupd", SSEPackedDouble, 0>, OpSize, VEX;
+}
+defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+                              "movaps", SSEPackedSingle>, TB;
+defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+                              "movapd", SSEPackedDouble>, TB, OpSize;
+defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+                              "movups", SSEPackedSingle>, TB;
+defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+                              "movupd", SSEPackedDouble, 0>, TB, OpSize;
+
+let isAsmParserOnly = 1 in {
+def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movaps\t{$src, $dst|$dst, $src}",
+                   [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX;
+def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movapd\t{$src, $dst|$dst, $src}",
+                   [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX;
+def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movups\t{$src, $dst|$dst, $src}",
+                   [(store (v4f32 VR128:$src), addr:$dst)]>, VEX;
+def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movupd\t{$src, $dst|$dst, $src}",
+                   [(store (v2f64 VR128:$src), addr:$dst)]>, VEX;
+def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+                   "movaps\t{$src, $dst|$dst, $src}",
+                   [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, VEX;
+def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+                   "movapd\t{$src, $dst|$dst, $src}",
+                   [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, VEX;
+def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+                   "movups\t{$src, $dst|$dst, $src}",
+                   [(store (v8f32 VR256:$src), addr:$dst)]>, VEX;
+def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+                   "movupd\t{$src, $dst|$dst, $src}",
+                   [(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
+}
+
+def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
+          (VMOVUPSYmr addr:$dst, VR256:$src)>;
+
+def : Pat<(int_x86_avx_loadu_pd_256 addr:$src), (VMOVUPDYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src),
+          (VMOVUPDYmr addr:$dst, VR256:$src)>;
+
+def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movaps\t{$src, $dst|$dst, $src}",
+                   [(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
+def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movapd\t{$src, $dst|$dst, $src}",
+                   [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
+def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movups\t{$src, $dst|$dst, $src}",
+                   [(store (v4f32 VR128:$src), addr:$dst)]>;
+def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                   "movupd\t{$src, $dst|$dst, $src}",
+                   [(store (v2f64 VR128:$src), addr:$dst)]>;
+
+// Intrinsic forms of MOVUPS/D load and store
+let isAsmParserOnly = 1 in {
+  let canFoldAsLoad = 1, isReMaterializable = 1 in
+  def VMOVUPSrm_Int : VPSI<0x10, MRMSrcMem, (outs VR128:$dst),
+             (ins f128mem:$src),
+             "movups\t{$src, $dst|$dst, $src}",
+             [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX;
+  def VMOVUPDrm_Int : VPDI<0x10, MRMSrcMem, (outs VR128:$dst),
+             (ins f128mem:$src),
+             "movupd\t{$src, $dst|$dst, $src}",
+             [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX;
+  def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
+             (ins f128mem:$dst, VR128:$src),
+             "movups\t{$src, $dst|$dst, $src}",
+             [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
+  def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
+             (ins f128mem:$dst, VR128:$src),
+             "movupd\t{$src, $dst|$dst, $src}",
+             [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
+}
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "movups\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
+def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "movupd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
+
+def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                       "movups\t{$src, $dst|$dst, $src}",
+                       [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
+def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                       "movupd\t{$src, $dst|$dst, $src}",
+                       [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
+
+// Move Low/High packed floating point values
+multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
+                                 PatFrag mov_frag, string base_opc,
+                                 string asm_opr> {
+  def PSrm : PI<opc, MRMSrcMem,
+         (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+         !strconcat(base_opc, "s", asm_opr),
+     [(set RC:$dst,
+       (mov_frag RC:$src1,
+              (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
+              SSEPackedSingle>, TB;
+
+  def PDrm : PI<opc, MRMSrcMem,
+         (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
+         !strconcat(base_opc, "d", asm_opr),
+     [(set RC:$dst, (v2f64 (mov_frag RC:$src1,
+                              (scalar_to_vector (loadf64 addr:$src2)))))],
+              SSEPackedDouble>, TB, OpSize;
+}
+
+let isAsmParserOnly = 1, AddedComplexity = 20 in {
+  defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
+                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+  defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+                     "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+  defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
+                                   "\t{$src2, $dst|$dst, $src2}">;
+  defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+                                   "\t{$src2, $dst|$dst, $src2}">;
+}
+
+let isAsmParserOnly = 1 in {
+def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movlps\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+                                 (iPTR 0))), addr:$dst)]>, VEX;
+def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movlpd\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract (v2f64 VR128:$src),
+                                 (iPTR 0))), addr:$dst)]>, VEX;
+}
+def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movlps\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+                                 (iPTR 0))), addr:$dst)]>;
+def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movlpd\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract (v2f64 VR128:$src),
+                                 (iPTR 0))), addr:$dst)]>;
+
+// v2f64 extract element 1 is always custom lowered to unpack high to low
+// and extract element 0 so the non-store version isn't too horrible.
+let isAsmParserOnly = 1 in {
+def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movhps\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract
+                                 (unpckh (bc_v2f64 (v4f32 VR128:$src)),
+                                         (undef)), (iPTR 0))), addr:$dst)]>,
+                   VEX;
+def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movhpd\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract
+                                 (v2f64 (unpckh VR128:$src, (undef))),
+                                 (iPTR 0))), addr:$dst)]>,
+                   VEX;
+}
+def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movhps\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract
+                                 (unpckh (bc_v2f64 (v4f32 VR128:$src)),
+                                         (undef)), (iPTR 0))), addr:$dst)]>;
+def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+                   "movhpd\t{$src, $dst|$dst, $src}",
+                   [(store (f64 (vector_extract
+                                 (v2f64 (unpckh VR128:$src, (undef))),
+                                 (iPTR 0))), addr:$dst)]>;
+
+let isAsmParserOnly = 1, AddedComplexity = 20 in {
+  def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
+                                       (ins VR128:$src1, VR128:$src2),
+                      "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set VR128:$dst,
+                        (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>,
+                      VEX_4V;
+  def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
+                                       (ins VR128:$src1, VR128:$src2),
+                      "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      [(set VR128:$dst,
+                        (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>,
+                      VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+  def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
+                                       (ins VR128:$src1, VR128:$src2),
+                      "movlhps\t{$src2, $dst|$dst, $src2}",
+                      [(set VR128:$dst,
+                        (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
+  def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
+                                       (ins VR128:$src1, VR128:$src2),
+                      "movhlps\t{$src2, $dst|$dst, $src2}",
+                      [(set VR128:$dst,
+                        (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
+}
+
+def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+          (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
+let AddedComplexity = 20 in {
+  def : Pat<(v4f32 (movddup VR128:$src, (undef))),
+            (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
+  def : Pat<(v2i64 (movddup VR128:$src, (undef))),
+            (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Conversion Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                     SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+                     string asm> {
+  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+                        [(set DstRC:$dst, (OpNode SrcRC:$src))]>;
+  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+                        [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>;
+}
+
+multiclass sse12_cvt_s_np<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                          X86MemOperand x86memop, string asm> {
+  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+                        []>;
+  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+                        []>;
+}
+
+multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                         SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+                         string asm, Domain d> {
+  def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+                        [(set DstRC:$dst, (OpNode SrcRC:$src))], d>;
+  def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+                        [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], d>;
+}
+
+multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                          X86MemOperand x86memop, string asm> {
+  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
+              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+              (ins DstRC:$src1, x86memop:$src),
+              !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
+}
+
+let isAsmParserOnly = 1 in {
+defm VCVTTSS2SI   : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+                                "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+                                "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+                                VEX_W;
+defm VCVTTSD2SI   : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+                                "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+                                "cvttsd2si\t{$src, $dst|$dst, $src}">, XD,
+                                VEX, VEX_W;
+
+// The assembler can recognize rr 64-bit instructions by seeing a rxx
+// register, but the same isn't true when only using memory operands,
+// provide other assembly "l" and "q" forms to address this explicitly
+// where appropriate to do so.
+defm VCVTSI2SS   : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss">, XS,
+                                  VEX_4V;
+defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, XS,
+                                  VEX_4V, VEX_W;
+defm VCVTSI2SD   : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd">, XD,
+                                  VEX_4V;
+defm VCVTSI2SDL  : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
+                                  VEX_4V;
+defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
+                                  VEX_4V, VEX_W;
+}
+
+defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+                      "cvttss2si\t{$src, $dst|$dst, $src}">, XS;
+defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
+                      "cvttss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+                      "cvttsd2si\t{$src, $dst|$dst, $src}">, XD;
+defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64,
+                      "cvttsd2si{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
+defm CVTSI2SS  : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
+                      "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS;
+defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64,
+                      "cvtsi2ss{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+defm CVTSI2SD  : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
+                      "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD;
+defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64,
+                      "cvtsi2sd{q}\t{$src, $dst|$dst, $src}">, XD, REX_W;
+
+// Conversion Instructions Intrinsics - Match intrinsics which expect MM
+// and/or XMM operand(s).
+
+multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+                         Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+                         string asm> {
+  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src),
+              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+              [(set DstRC:$dst, (Int SrcRC:$src))]>;
+  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src),
+              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+              [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
+}
+
+multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
+                    RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+                    PatFrag ld_frag, string asm, bit Is2Addr = 1> {
+  def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
+              !if(Is2Addr,
+                  !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+                  !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+              [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
+  def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+              (ins DstRC:$src1, x86memop:$src2),
+              !if(Is2Addr,
+                  !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+                  !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+              [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
+}
+
+let isAsmParserOnly = 1 in {
+  defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+                        f32mem, load, "cvtss2si">, XS, VEX;
+  defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+                          int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
+                          XS, VEX, VEX_W;
+  defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+                        f128mem, load, "cvtsd2si">, XD, VEX;
+  defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+                        int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
+                        XD, VEX, VEX_W;
+
+  // FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
+  // Get rid of this hack or rename the intrinsics, there are several
+  // intructions that only match with the intrinsic form, why create duplicates
+  // to let them be recognized by the assembler?
+  defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
+                        "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+  defm VCVTSD2SI64   : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
+                        "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
+}
+defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+                      f32mem, load, "cvtss2si">, XS;
+defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
+                      f32mem, load, "cvtss2si{q}">, XS, REX_W;
+defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+                f128mem, load, "cvtsd2si{l}">, XD;
+defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
+                  f128mem, load, "cvtsd2si{q}">, XD, REX_W;
+
+
+let isAsmParserOnly = 1 in {
+  defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+            int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
+  defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+            int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
+            VEX_W;
+  defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+            int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
+  defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+            int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
+            VEX_4V, VEX_W;
+}
+
+let Constraints = "$src1 = $dst" in {
+  defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+                        int_x86_sse_cvtsi2ss, i32mem, loadi32,
+                        "cvtsi2ss">, XS;
+  defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+                        int_x86_sse_cvtsi642ss, i64mem, loadi64,
+                        "cvtsi2ss{q}">, XS, REX_W;
+  defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+                        int_x86_sse2_cvtsi2sd, i32mem, loadi32,
+                        "cvtsi2sd">, XD;
+  defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+                        int_x86_sse2_cvtsi642sd, i64mem, loadi64,
+                        "cvtsi2sd">, XD, REX_W;
+}
+
+/// SSE 1 Only
+
+// Aliases for intrinsics
+let isAsmParserOnly = 1 in {
+defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+                                    f32mem, load, "cvttss2si">, XS, VEX;
+defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+                                    int_x86_sse_cvttss2si64, f32mem, load,
+                                    "cvttss2si">, XS, VEX, VEX_W;
+defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+                                    f128mem, load, "cvttsd2si">, XD, VEX;
+defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+                                    int_x86_sse2_cvttsd2si64, f128mem, load,
+                                    "cvttsd2si">, XD, VEX, VEX_W;
+}
+defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+                                    f32mem, load, "cvttss2si">, XS;
+defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+                                    int_x86_sse_cvttss2si64, f32mem, load,
+                                    "cvttss2si{q}">, XS, REX_W;
+defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+                                    f128mem, load, "cvttsd2si">, XD;
+defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
+                                    int_x86_sse2_cvttsd2si64, f128mem, load,
+                                    "cvttsd2si{q}">, XD, REX_W;
+
+let isAsmParserOnly = 1, Pattern = []<dag> in {
+defm VCVTSS2SI   : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
+                               "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
+                               "cvtss2si\t{$src, $dst|$dst, $src}">, XS, VEX,
+                               VEX_W;
+defm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load,
+                               "cvtdq2ps\t{$src, $dst|$dst, $src}",
+                               SSEPackedSingle>, TB, VEX;
+defm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, VR256, undef, i256mem, load,
+                               "cvtdq2ps\t{$src, $dst|$dst, $src}",
+                               SSEPackedSingle>, TB, VEX;
+}
+let Pattern = []<dag> in {
+defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
+                          "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
+defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load /*dummy*/,
+                          "cvtss2si{q}\t{$src, $dst|$dst, $src}">, XS, REX_W;
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
+                            "cvtdq2ps\t{$src, $dst|$dst, $src}",
+                            SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
+}
+
+/// SSE 2 Only
+
+// Convert scalar double to scalar single
+let isAsmParserOnly = 1 in {
+def VCVTSD2SSrr  : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
+                       (ins FR64:$src1, FR64:$src2),
+                      "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+                      VEX_4V;
+def VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
+                       (ins FR64:$src1, f64mem:$src2),
+                      "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                      []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
+}
+def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
+        Requires<[HasAVX]>;
+
+def CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
+                      "cvtsd2ss\t{$src, $dst|$dst, $src}",
+                      [(set FR32:$dst, (fround FR64:$src))]>;
+def CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
+                      "cvtsd2ss\t{$src, $dst|$dst, $src}",
+                      [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
+                  Requires<[HasSSE2, OptForSize]>;
+
+let isAsmParserOnly = 1 in
+defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
+                      int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>,
+                      XS, VEX_4V;
+let Constraints = "$src1 = $dst" in
+defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
+                      int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS;
+
+// Convert scalar single to scalar double
+let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
+def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
+                    (ins FR32:$src1, FR32:$src2),
+                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    []>, XS, Requires<[HasAVX]>, VEX_4V;
+def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
+                    (ins FR32:$src1, f32mem:$src2),
+                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
+}
+def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>,
+        Requires<[HasAVX]>;
+
+def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
+                   "cvtss2sd\t{$src, $dst|$dst, $src}",
+                   [(set FR64:$dst, (fextend FR32:$src))]>, XS,
+                 Requires<[HasSSE2]>;
+def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
+                   "cvtss2sd\t{$src, $dst|$dst, $src}",
+                   [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
+                 Requires<[HasSSE2, OptForSize]>;
+
+let isAsmParserOnly = 1 in {
+def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
+                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+                                       VR128:$src2))]>, XS, VEX_4V,
+                    Requires<[HasAVX]>;
+def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
+                      (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+                    "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+                                       (load addr:$src2)))]>, XS, VEX_4V,
+                    Requires<[HasAVX]>;
+}
+let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
+def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
+                      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+                                       VR128:$src2))]>, XS,
+                    Requires<[HasSSE2]>;
+def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
+                      (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+                    "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+                    [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+                                       (load addr:$src2)))]>, XS,
+                    Requires<[HasSSE2]>;
+}
+
+def : Pat<(extloadf32 addr:$src),
+          (CVTSS2SDrr (MOVSSrm addr:$src))>,
+      Requires<[HasSSE2, OptForSpeed]>;
+
+// Convert doubleword to packed single/double fp
+let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix
+def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+                     TB, VEX, Requires<[HasAVX]>;
+def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                      "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
+                                        (bitconvert (memopv2i64 addr:$src))))]>,
+                     TB, VEX, Requires<[HasAVX]>;
+}
+def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtdq2ps\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+                     TB, Requires<[HasSSE2]>;
+def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                      "cvtdq2ps\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
+                                        (bitconvert (memopv2i64 addr:$src))))]>,
+                     TB, Requires<[HasSSE2]>;
+
+// FIXME: why the non-intrinsic version is described as SSE3?
+let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
+def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+                     XS, VEX, Requires<[HasAVX]>;
+def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                       "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
+                                        (bitconvert (memopv2i64 addr:$src))))]>,
+                     XS, VEX, Requires<[HasAVX]>;
+}
+def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+                     XS, Requires<[HasSSE2]>;
+def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                     "cvtdq2pd\t{$src, $dst|$dst, $src}",
+                     [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
+                                        (bitconvert (memopv2i64 addr:$src))))]>,
+                     XS, Requires<[HasSSE2]>;
+
+
+// Convert packed single/double fp to doubleword
+let isAsmParserOnly = 1 in {
+def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                        "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                        "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                     "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+
+let isAsmParserOnly = 1 in {
+def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "cvtps2dq\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>,
+                        VEX;
+def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
+                         (ins f128mem:$src),
+                         "cvtps2dq\t{$src, $dst|$dst, $src}",
+                         [(set VR128:$dst, (int_x86_sse2_cvtps2dq
+                                            (memop addr:$src)))]>, VEX;
+}
+def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "cvtps2dq\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
+def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                         "cvtps2dq\t{$src, $dst|$dst, $src}",
+                         [(set VR128:$dst, (int_x86_sse2_cvtps2dq
+                                            (memop addr:$src)))]>;
+
+let isAsmParserOnly = 1 in { // SSE2 packed instructions with XD prefix
+def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+                     XD, VEX, Requires<[HasAVX]>;
+def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+                                          (memop addr:$src)))]>,
+                     XD, VEX, Requires<[HasAVX]>;
+}
+def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+                     XD, Requires<[HasSSE2]>;
+def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+                                          (memop addr:$src)))]>,
+                     XD, Requires<[HasSSE2]>;
+
+
+// Convert with truncation packed single/double fp to doubleword
+let isAsmParserOnly = 1 in { // SSE2 packed instructions with XS prefix
+def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                            (int_x86_sse2_cvttps2dq VR128:$src))]>;
+def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                      "cvttps2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                            (int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
+
+
+let isAsmParserOnly = 1 in {
+def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "vcvttps2dq\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst,
+                              (int_x86_sse2_cvttps2dq VR128:$src))]>,
+                      XS, VEX, Requires<[HasAVX]>;
+def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                        "vcvttps2dq\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+                                           (memop addr:$src)))]>,
+                      XS, VEX, Requires<[HasAVX]>;
+}
+
+let isAsmParserOnly = 1 in {
+def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
+                            (ins VR128:$src),
+                          "cvttpd2dq\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>,
+                       VEX;
+def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst),
+                          (ins f128mem:$src),
+                          "cvttpd2dq\t{$src, $dst|$dst, $src}",
+                          [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+                                             (memop addr:$src)))]>, VEX;
+}
+def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
+def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
+                      "cvttpd2dq\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+                                        (memop addr:$src)))]>;
+
+let isAsmParserOnly = 1 in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                          "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                         "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                         "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                         "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+                         "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+}
+
+// Convert packed single to packed double
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+                  // SSE2 instructions without OpSize prefix
+def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+                     "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
+def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+                       "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
+
+let isAsmParserOnly = 1 in {
+def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtps2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+                     VEX, Requires<[HasAVX]>;
+def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+                       "vcvtps2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd
+                                          (load addr:$src)))]>,
+                     VEX, Requires<[HasAVX]>;
+}
+def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtps2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+                     TB, Requires<[HasSSE2]>;
+def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+                       "cvtps2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_cvtps2pd
+                                          (load addr:$src)))]>,
+                     TB, Requires<[HasSSE2]>;
+
+// Convert packed double to packed single
+let isAsmParserOnly = 1 in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                        "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                        "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+                        "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+}
+def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                     "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
+def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                     "cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
+
+
+let isAsmParserOnly = 1 in {
+def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
+                         (ins f128mem:$src),
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
+                         [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
+                                            (memop addr:$src)))]>;
+}
+def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                         "cvtpd2ps\t{$src, $dst|$dst, $src}",
+                         [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
+                                            (memop addr:$src)))]>;
+
+// AVX 256-bit register conversion intrinsics
+// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
+// whenever possible to avoid declaring two versions of each one.
+def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
+          (VCVTDQ2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_ps_256 (memopv8i32 addr:$src)),
+          (VCVTDQ2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 VR256:$src),
+          (VCVTPD2PSYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2_ps_256 (memopv4f64 addr:$src)),
+          (VCVTPD2PSYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2dq_256 VR256:$src),
+          (VCVTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2dq_256 (memopv8f32 addr:$src)),
+          (VCVTPS2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 VR128:$src),
+          (VCVTPS2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvt_ps2_pd_256 (memopv4f32 addr:$src)),
+          (VCVTPS2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 VR256:$src),
+          (VCVTTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_pd2dq_256 (memopv4f64 addr:$src)),
+          (VCVTTPD2DQYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 VR256:$src),
+          (VCVTTPS2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)),
+          (VCVTTPS2DQYrm addr:$src)>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Compare Instructions
+//===----------------------------------------------------------------------===//
+
+// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
+multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
+                            string asm, string asm_alt> {
+  def rr : SIi8<0xC2, MRMSrcReg,
+                    (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc),
+                    asm, []>;
+  let mayLoad = 1 in
+  def rm : SIi8<0xC2, MRMSrcMem,
+                    (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc),
+                    asm, []>;
+  // Accept explicit immediate argument form instead of comparison code.
+  let isAsmParserOnly = 1 in {
+    def rr_alt : SIi8<0xC2, MRMSrcReg,
+                  (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+                  asm_alt, []>;
+    let mayLoad = 1 in
+    def rm_alt : SIi8<0xC2, MRMSrcMem,
+                  (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2),
+                  asm_alt, []>;
+  }
+}
+
+let neverHasSideEffects = 1, isAsmParserOnly = 1 in {
+  defm VCMPSS  : sse12_cmp_scalar<FR32, f32mem,
+                  "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+                  "cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
+                  XS, VEX_4V;
+  defm VCMPSD  : sse12_cmp_scalar<FR64, f64mem,
+                  "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+                  "cmpsd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
+                  XD, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+  defm CMPSS  : sse12_cmp_scalar<FR32, f32mem,
+                    "cmp${cc}ss\t{$src, $dst|$dst, $src}",
+                    "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}">, XS;
+  defm CMPSD  : sse12_cmp_scalar<FR64, f64mem,
+                    "cmp${cc}sd\t{$src, $dst|$dst, $src}",
+                    "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}">, XD;
+}
+
+multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
+                         Intrinsic Int, string asm> {
+  def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
+                      (ins VR128:$src1, VR128:$src, SSECC:$cc), asm,
+                        [(set VR128:$dst, (Int VR128:$src1,
+                                               VR128:$src, imm:$cc))]>;
+  def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
+                      (ins VR128:$src1, f32mem:$src, SSECC:$cc), asm,
+                        [(set VR128:$dst, (Int VR128:$src1,
+                                               (load addr:$src), imm:$cc))]>;
+}
+
+// Aliases to match intrinsics which expect XMM operand(s).
+let isAsmParserOnly = 1 in {
+  defm Int_VCMPSS  : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+                       "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
+                       XS, VEX_4V;
+  defm Int_VCMPSD  : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+                       "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
+                       XD, VEX_4V;
+}
+let Constraints = "$src1 = $dst" in {
+  defm Int_CMPSS  : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+                       "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS;
+  defm Int_CMPSD  : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+                       "cmp${cc}sd\t{$src, $dst|$dst, $src}">, XD;
+}
+
+
+// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
+multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
+                            ValueType vt, X86MemOperand x86memop,
+                            PatFrag ld_frag, string OpcodeStr, Domain d> {
+  def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+                     [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], d>;
+  def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+                     !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+                     [(set EFLAGS, (OpNode (vt RC:$src1),
+                                           (ld_frag addr:$src2)))], d>;
+}
+
+let Defs = [EFLAGS] in {
+  let isAsmParserOnly = 1 in {
+    defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+                                    "ucomiss", SSEPackedSingle>, VEX;
+    defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+                                    "ucomisd", SSEPackedDouble>, OpSize, VEX;
+    let Pattern = []<dag> in {
+      defm VCOMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+                                      "comiss", SSEPackedSingle>, VEX;
+      defm VCOMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+                                      "comisd", SSEPackedDouble>, OpSize, VEX;
+    }
+
+    defm Int_VUCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+                              load, "ucomiss", SSEPackedSingle>, VEX;
+    defm Int_VUCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+                              load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
+
+    defm Int_VCOMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
+                              load, "comiss", SSEPackedSingle>, VEX;
+    defm Int_VCOMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
+                              load, "comisd", SSEPackedDouble>, OpSize, VEX;
+  }
+  defm UCOMISS  : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+                                  "ucomiss", SSEPackedSingle>, TB;
+  defm UCOMISD  : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+                                  "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+  let Pattern = []<dag> in {
+    defm COMISS  : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+                                    "comiss", SSEPackedSingle>, TB;
+    defm COMISD  : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+                                    "comisd", SSEPackedDouble>, TB, OpSize;
+  }
+
+  defm Int_UCOMISS  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+                              load, "ucomiss", SSEPackedSingle>, TB;
+  defm Int_UCOMISD  : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+                              load, "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+  defm Int_COMISS  : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
+                                  "comiss", SSEPackedSingle>, TB;
+  defm Int_COMISD  : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
+                                  "comisd", SSEPackedDouble>, TB, OpSize;
+} // Defs = [EFLAGS]
+
+// sse12_cmp_packed - sse 1 & 2 compared packed instructions
+multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
+                            Intrinsic Int, string asm, string asm_alt,
+                            Domain d> {
+  def rri : PIi8<0xC2, MRMSrcReg,
+             (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm,
+             [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>;
+  def rmi : PIi8<0xC2, MRMSrcMem,
+             (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm,
+             [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>;
+  // Accept explicit immediate argument form instead of comparison code.
+  let isAsmParserOnly = 1 in {
+    def rri_alt : PIi8<0xC2, MRMSrcReg,
+               (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+               asm_alt, [], d>;
+    def rmi_alt : PIi8<0xC2, MRMSrcMem,
+               (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2),
+               asm_alt, [], d>;
+  }
+}
+
+let isAsmParserOnly = 1 in {
+  defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+                 "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+                 "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+                 SSEPackedSingle>, VEX_4V;
+  defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+                 "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+                 "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+                 SSEPackedDouble>, OpSize, VEX_4V;
+  defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
+                 "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+                 "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+                 SSEPackedSingle>, VEX_4V;
+  defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
+                 "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+                 "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+                 SSEPackedDouble>, OpSize, VEX_4V;
+}
+let Constraints = "$src1 = $dst" in {
+  defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+                 "cmp${cc}ps\t{$src, $dst|$dst, $src}",
+                 "cmpps\t{$src2, $src, $dst|$dst, $src, $src2}",
+                 SSEPackedSingle>, TB;
+  defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+                 "cmp${cc}pd\t{$src, $dst|$dst, $src}",
+                 "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}",
+                 SSEPackedDouble>, TB, OpSize;
+}
+
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+          (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+          (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+          (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+          (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Shuffle Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_shuffle - sse 1 & 2 shuffle instructions
+multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
+                         ValueType vt, string asm, PatFrag mem_frag,
+                         Domain d, bit IsConvertibleToThreeAddress = 0> {
+  def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
+                   (ins RC:$src1, f128mem:$src2, i8imm:$src3), asm,
+                   [(set RC:$dst, (vt (shufp:$src3
+                            RC:$src1, (mem_frag addr:$src2))))], d>;
+  let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
+    def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
+                   (ins RC:$src1, RC:$src2, i8imm:$src3), asm,
+                   [(set RC:$dst,
+                            (vt (shufp:$src3 RC:$src1, RC:$src2)))], d>;
+}
+
+let isAsmParserOnly = 1 in {
+  defm VSHUFPS  : sse12_shuffle<VR128, f128mem, v4f32,
+             "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+             memopv4f32, SSEPackedSingle>, VEX_4V;
+  defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
+             "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+             memopv8f32, SSEPackedSingle>, VEX_4V;
+  defm VSHUFPD  : sse12_shuffle<VR128, f128mem, v2f64,
+             "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+             memopv2f64, SSEPackedDouble>, OpSize, VEX_4V;
+  defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
+             "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+             memopv4f64, SSEPackedDouble>, OpSize, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+  defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+                    "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>,
+                    TB;
+  defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+                    "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    memopv2f64, SSEPackedDouble>, TB, OpSize;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Unpack Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave
+multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
+                                   PatFrag mem_frag, RegisterClass RC,
+                                   X86MemOperand x86memop, string asm,
+                                   Domain d> {
+    def rr : PI<opc, MRMSrcReg,
+                (outs RC:$dst), (ins RC:$src1, RC:$src2),
+                asm, [(set RC:$dst,
+                           (vt (OpNode RC:$src1, RC:$src2)))], d>;
+    def rm : PI<opc, MRMSrcMem,
+                (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+                asm, [(set RC:$dst,
+                           (vt (OpNode RC:$src1,
+                                       (mem_frag addr:$src2))))], d>;
+}
+
+let AddedComplexity = 10 in {
+  let isAsmParserOnly = 1 in {
+    defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+          VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                         SSEPackedSingle>, VEX_4V;
+    defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+          VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                         SSEPackedDouble>, OpSize, VEX_4V;
+    defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+          VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                         SSEPackedSingle>, VEX_4V;
+    defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+          VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                         SSEPackedDouble>, OpSize, VEX_4V;
+
+    defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
+          VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                         SSEPackedSingle>, VEX_4V;
+    defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
+          VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                         SSEPackedDouble>, OpSize, VEX_4V;
+    defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
+          VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                         SSEPackedSingle>, VEX_4V;
+    defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
+          VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                         SSEPackedDouble>, OpSize, VEX_4V;
+  }
+
+  let Constraints = "$src1 = $dst" in {
+    defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+          VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedSingle>, TB;
+    defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+          VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedDouble>, TB, OpSize;
+    defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+          VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedSingle>, TB;
+    defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+          VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
+                         SSEPackedDouble>, TB, OpSize;
+  } // Constraints = "$src1 = $dst"
+} // AddedComplexity
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Extract Floating-Point Sign mask
+//===----------------------------------------------------------------------===//
+
+/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
+multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
+                                Domain d> {
+  def rr32 : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
+                !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+                     [(set GR32:$dst, (Int RC:$src))], d>;
+  def rr64 : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins RC:$src),
+                !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>, REX_W;
+}
+
+// Mask creation
+defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
+                                     SSEPackedSingle>, TB;
+defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
+                                     SSEPackedDouble>, TB, OpSize;
+
+let isAsmParserOnly = 1 in {
+  defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
+                                        "movmskps", SSEPackedSingle>, VEX;
+  defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
+                                        "movmskpd", SSEPackedDouble>, OpSize,
+                                        VEX;
+  defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
+                                        "movmskps", SSEPackedSingle>, VEX;
+  defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
+                                        "movmskpd", SSEPackedDouble>, OpSize,
+                                        VEX;
+
+  // Assembler Only
+  def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+             "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+  def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+             "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+             VEX;
+  def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+             "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+  def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+             "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+             VEX;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions
+//===----------------------------------------------------------------------===//
+
+// Aliases of packed SSE1 & SSE2 instructions for scalar use. These all have
+// names that start with 'Fs'.
+
+// Alias instructions that map fld0 to pxor for sse.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
+    canFoldAsLoad = 1 in {
+  // FIXME: Set encoding to pseudo!
+def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+                 [(set FR32:$dst, fp32imm0)]>,
+                 Requires<[HasSSE1]>, TB, OpSize;
+def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+                 [(set FR64:$dst, fpimm0)]>,
+               Requires<[HasSSE2]>, TB, OpSize;
+def VFsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+                  [(set FR32:$dst, fp32imm0)]>,
+                  Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+def VFsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+                  [(set FR64:$dst, fpimm0)]>,
+                  Requires<[HasAVX]>, TB, OpSize, VEX_4V;
+}
+
+// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
+// bits are disregarded.
+let neverHasSideEffects = 1 in {
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+                     "movaps\t{$src, $dst|$dst, $src}", []>;
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+                     "movapd\t{$src, $dst|$dst, $src}", []>;
+}
+
+// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
+// bits are disregarded.
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+                     "movaps\t{$src, $dst|$dst, $src}",
+                     [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+                     "movapd\t{$src, $dst|$dst, $src}",
+                     [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Logical Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
+///
+multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
+                                       SDNode OpNode> {
+  let isAsmParserOnly = 1 in {
+    defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+                FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V;
+
+    defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+          FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V;
+  }
+
+  let Constraints = "$src1 = $dst" in {
+    defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
+                f32, f128mem, memopfsf32, SSEPackedSingle>, TB;
+
+    defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
+                f64, f128mem, memopfsf64, SSEPackedDouble>, TB, OpSize;
+  }
+}
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+let mayLoad = 0 in {
+  defm FsAND  : sse12_fp_alias_pack_logical<0x54, "and", X86fand>;
+  defm FsOR   : sse12_fp_alias_pack_logical<0x56, "or", X86for>;
+  defm FsXOR  : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>;
+}
+
+let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
+  defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef>;
+
+/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
+///
+multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
+                                 SDNode OpNode, int HasPat = 0,
+                                 list<list<dag>> Pattern = []> {
+  let isAsmParserOnly = 1, Pattern = []<dag> in {
+    defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+         !strconcat(OpcodeStr, "ps"), f128mem,
+         !if(HasPat, Pattern[0], // rr
+                     [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
+                                                      VR128:$src2)))]),
+         !if(HasPat, Pattern[2], // rm
+                     [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+                                               (memopv2i64 addr:$src2)))]), 0>,
+                                               VEX_4V;
+
+    defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+         !strconcat(OpcodeStr, "pd"), f128mem,
+         !if(HasPat, Pattern[1], // rr
+                     [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                               (bc_v2i64 (v2f64
+                                               VR128:$src2))))]),
+         !if(HasPat, Pattern[3], // rm
+                     [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                               (memopv2i64 addr:$src2)))]), 0>,
+                                                               OpSize, VEX_4V;
+  }
+  let Constraints = "$src1 = $dst" in {
+    defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+         !strconcat(OpcodeStr, "ps"), f128mem,
+         !if(HasPat, Pattern[0], // rr
+                     [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
+                                                      VR128:$src2)))]),
+         !if(HasPat, Pattern[2], // rm
+                     [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+                                               (memopv2i64 addr:$src2)))])>, TB;
+
+    defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+         !strconcat(OpcodeStr, "pd"), f128mem,
+         !if(HasPat, Pattern[1], // rr
+                     [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                               (bc_v2i64 (v2f64
+                                               VR128:$src2))))]),
+         !if(HasPat, Pattern[3], // rm
+                     [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                               (memopv2i64 addr:$src2)))])>,
+                                                                    TB, OpSize;
+  }
+}
+
+/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
+///
+let isAsmParserOnly = 1 in {
+multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> {
+    defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
+          !strconcat(OpcodeStr, "ps"), f256mem, [], [], 0>, VEX_4V;
+
+    defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
+          !strconcat(OpcodeStr, "pd"), f256mem, [], [], 0>, OpSize, VEX_4V;
+}
+}
+
+// AVX 256-bit packed logical ops forms
+defm VAND : sse12_fp_packed_logical_y<0x54, "and">;
+defm VOR  : sse12_fp_packed_logical_y<0x56, "or">;
+defm VXOR : sse12_fp_packed_logical_y<0x57, "xor">;
+let isCommutable = 0 in
+  defm VANDN : sse12_fp_packed_logical_y<0x55, "andn">;
+
+defm AND  : sse12_fp_packed_logical<0x54, "and", and>;
+defm OR   : sse12_fp_packed_logical<0x56, "or", or>;
+defm XOR  : sse12_fp_packed_logical<0x57, "xor", xor>;
+let isCommutable = 0 in
+  defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
+    // single r+r
+    [(set VR128:$dst, (X86pandn VR128:$src1, VR128:$src2))],
+    // double r+r
+    [],
+    // single r+m
+    [(set VR128:$dst, (X86pandn VR128:$src1, (memopv2i64 addr:$src2)))],
+    // double r+m
+    []]>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and
+/// vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation.  This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a scalar)
+/// and leaves the top elements unmodified (therefore these cannot be commuted).
+///
+/// These three forms can each be reg+reg or reg+mem.
+///
+
+/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those
+/// classes below
+multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                  bit Is2Addr = 1> {
+  defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
+                            OpNode, FR32, f32mem, Is2Addr>, XS;
+  defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
+                            OpNode, FR64, f64mem, Is2Addr>, XD;
+}
+
+multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                   bit Is2Addr = 1> {
+  let mayLoad = 0 in {
+  defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
+              v4f32, f128mem, memopv4f32, SSEPackedSingle, Is2Addr>, TB;
+  defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
+              v2f64, f128mem, memopv2f64, SSEPackedDouble, Is2Addr>, TB, OpSize;
+  }
+}
+
+multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
+                                    SDNode OpNode> {
+  let mayLoad = 0 in {
+    defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
+                v8f32, f256mem, memopv8f32, SSEPackedSingle, 0>, TB;
+    defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
+                v4f64, f256mem, memopv4f64, SSEPackedDouble, 0>, TB, OpSize;
+  }
+}
+
+multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
+                                      bit Is2Addr = 1> {
+  defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+     !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS;
+  defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+     !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, Is2Addr>, XD;
+}
+
+multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
+                                      bit Is2Addr = 1> {
+  defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+     !strconcat(OpcodeStr, "ps"), "sse", "_ps", f128mem, memopv4f32,
+                                              SSEPackedSingle, Is2Addr>, TB;
+
+  defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+     !strconcat(OpcodeStr, "pd"), "sse2", "_pd", f128mem, memopv2f64,
+                                      SSEPackedDouble, Is2Addr>, TB, OpSize;
+}
+
+multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
+  defm PSY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+     !strconcat(OpcodeStr, "ps"), "avx", "_ps_256", f256mem, memopv8f32,
+      SSEPackedSingle, 0>, TB;
+
+  defm PDY : sse12_fp_packed_int<opc, OpcodeStr, VR256,
+     !strconcat(OpcodeStr, "pd"), "avx", "_pd_256", f256mem, memopv4f64,
+      SSEPackedDouble, 0>, TB, OpSize;
+}
+
+// Binary Arithmetic instructions
+let isAsmParserOnly = 1 in {
+  defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
+              basic_sse12_fp_binop_s_int<0x58, "add", 0>,
+              basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
+              basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
+  defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
+              basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
+              basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
+              basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
+
+  let isCommutable = 0 in {
+    defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
+                basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
+                basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
+                basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
+    defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
+                basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
+                basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
+                basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
+    defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
+                basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
+                basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
+                basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
+                basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
+                basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
+    defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
+                basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
+                basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
+                basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
+                basic_sse12_fp_binop_p_y_int<0x5D, "min">,
+                basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
+  }
+}
+
+let Constraints = "$src1 = $dst" in {
+  defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd>,
+             basic_sse12_fp_binop_p<0x58, "add", fadd>,
+             basic_sse12_fp_binop_s_int<0x58, "add">;
+  defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul>,
+             basic_sse12_fp_binop_p<0x59, "mul", fmul>,
+             basic_sse12_fp_binop_s_int<0x59, "mul">;
+
+  let isCommutable = 0 in {
+    defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub>,
+               basic_sse12_fp_binop_p<0x5C, "sub", fsub>,
+               basic_sse12_fp_binop_s_int<0x5C, "sub">;
+    defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv>,
+               basic_sse12_fp_binop_p<0x5E, "div", fdiv>,
+               basic_sse12_fp_binop_s_int<0x5E, "div">;
+    defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax>,
+               basic_sse12_fp_binop_p<0x5F, "max", X86fmax>,
+               basic_sse12_fp_binop_s_int<0x5F, "max">,
+               basic_sse12_fp_binop_p_int<0x5F, "max">;
+    defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin>,
+               basic_sse12_fp_binop_p<0x5D, "min", X86fmin>,
+               basic_sse12_fp_binop_s_int<0x5D, "min">,
+               basic_sse12_fp_binop_p_int<0x5D, "min">;
+  }
+}
+
+/// Unop Arithmetic
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation.  This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a
+/// scalar) and leaves the top elements undefined.
+///
+/// And, we have a special variant form for a full-vector intrinsic form.
+
+/// sse1_fp_unop_s - SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
+                          SDNode OpNode, Intrinsic F32Int> {
+  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                [(set FR32:$dst, (OpNode FR32:$src))]>;
+  // For scalar unary operations, fold a load into the operation
+  // only in OptForSize mode. It eliminates an instruction, but it also
+  // eliminates a whole-register clobber (the load), so it introduces a
+  // partial register update condition.
+  def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+                !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
+            Requires<[HasSSE1, OptForSize]>;
+  def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (F32Int VR128:$src))]>;
+  def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+                    !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+}
+
+/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
+                              SDNode OpNode, Intrinsic F32Int> {
+  def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
+                !strconcat(OpcodeStr,
+                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+  def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
+                !strconcat(OpcodeStr,
+                           "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                []>, XS, Requires<[HasAVX, OptForSize]>;
+  def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                !strconcat(OpcodeStr,
+                           "ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
+                [(set VR128:$dst, (F32Int VR128:$src))]>;
+  def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+                !strconcat(OpcodeStr,
+                           "ss\t{$src, $dst, $dst|$dst, $dst, $src}"),
+                [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+}
+
+/// sse1_fp_unop_p - SSE1 unops in packed form.
+multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+  def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+              !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+              [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>;
+  def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form.
+multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+  def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+              !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+              [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>;
+  def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
+multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
+                              Intrinsic V4F32Int> {
+  def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (V4F32Int VR128:$src))]>;
+  def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_p_y_int - AVX 256-bit intrinsics unops in packed forms.
+multiclass sse1_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+                                Intrinsic V4F32Int> {
+  def PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                    [(set VR256:$dst, (V4F32Int VR256:$src))]>;
+  def PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                    !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+                    [(set VR256:$dst, (V4F32Int (memopv8f32 addr:$src)))]>;
+}
+
+/// sse2_fp_unop_s - SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
+                          SDNode OpNode, Intrinsic F64Int> {
+  def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+                !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+                [(set FR64:$dst, (OpNode FR64:$src))]>;
+  // See the comments in sse1_fp_unop_s for why this is OptForSize.
+  def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
+                !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+                [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD,
+            Requires<[HasSSE2, OptForSize]>;
+  def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (F64Int VR128:$src))]>;
+  def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
+                    !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+}
+
+/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
+                              SDNode OpNode, Intrinsic F64Int> {
+  def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+               !strconcat(OpcodeStr,
+                          "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+  def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
+               (ins FR64:$src1, f64mem:$src2),
+               !strconcat(OpcodeStr,
+                          "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+  def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+           !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
+           [(set VR128:$dst, (F64Int VR128:$src))]>;
+  def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
+           !strconcat(OpcodeStr, "sd\t{$src, $dst, $dst|$dst, $dst, $src}"),
+           [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+}
+
+/// sse2_fp_unop_p - SSE2 unops in vector forms.
+multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
+                          SDNode OpNode> {
+  def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+              !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+              [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>;
+  def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+                [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
+}
+
+/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms.
+multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+  def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+              !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+              [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>;
+  def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+                [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))]>;
+}
+
+/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms.
+multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
+                              Intrinsic V2F64Int> {
+  def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (V2F64Int VR128:$src))]>;
+  def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
+}
+
+/// sse2_fp_unop_p_y_int - AVX 256-bit intrinsic unops in vector forms.
+multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
+                                Intrinsic V2F64Int> {
+  def PDYr_Int : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+                    [(set VR256:$dst, (V2F64Int VR256:$src))]>;
+  def PDYm_Int : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                    !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+                    [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+  // Square root.
+  defm VSQRT  : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>,
+                sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>,
+                VEX_4V;
+
+  defm VSQRT  : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
+                sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
+                sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
+                sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
+                sse1_fp_unop_p_int<0x51, "vsqrt", int_x86_sse_sqrt_ps>,
+                sse2_fp_unop_p_int<0x51, "vsqrt", int_x86_sse2_sqrt_pd>,
+                sse1_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_ps_256>,
+                sse2_fp_unop_p_y_int<0x51, "vsqrt", int_x86_avx_sqrt_pd_256>,
+                VEX;
+
+  // Reciprocal approximations. Note that these typically require refinement
+  // in order to obtain suitable precision.
+  defm VRSQRT : sse1_fp_unop_s_avx<0x52, "vrsqrt", X86frsqrt,
+                                   int_x86_sse_rsqrt_ss>, VEX_4V;
+  defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
+                sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>,
+                sse1_fp_unop_p_y_int<0x52, "vrsqrt", int_x86_avx_rsqrt_ps_256>,
+                sse1_fp_unop_p_int<0x52, "vrsqrt", int_x86_sse_rsqrt_ps>, VEX;
+
+  defm VRCP   : sse1_fp_unop_s_avx<0x53, "vrcp", X86frcp, int_x86_sse_rcp_ss>,
+                                   VEX_4V;
+  defm VRCP   : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
+                sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>,
+                sse1_fp_unop_p_y_int<0x53, "vrcp", int_x86_avx_rcp_ps_256>,
+                sse1_fp_unop_p_int<0x53, "vrcp", int_x86_sse_rcp_ps>, VEX;
+}
+
+// Square root.
+defm SQRT  : sse1_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse_sqrt_ss>,
+             sse1_fp_unop_p<0x51, "sqrt",  fsqrt>,
+             sse1_fp_unop_p_int<0x51, "sqrt",  int_x86_sse_sqrt_ps>,
+             sse2_fp_unop_s<0x51, "sqrt",  fsqrt, int_x86_sse2_sqrt_sd>,
+             sse2_fp_unop_p<0x51, "sqrt",  fsqrt>,
+             sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd>;
+
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>,
+             sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt>,
+             sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps>;
+defm RCP   : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
+             sse1_fp_unop_p<0x53, "rcp", X86frcp>,
+             sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps>;
+
+// There is no f64 version of the reciprocal approximation instructions.
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Non-temporal stores
+//===----------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1 in {
+  def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs),
+                         (ins i128mem:$dst, VR128:$src),
+                         "movntps\t{$src, $dst|$dst, $src}",
+                         [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX;
+  def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs),
+                         (ins i128mem:$dst, VR128:$src),
+                         "movntpd\t{$src, $dst|$dst, $src}",
+                         [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX;
+
+  let ExeDomain = SSEPackedInt in
+    def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs),
+                       (ins f128mem:$dst, VR128:$src),
+                       "movntdq\t{$src, $dst|$dst, $src}",
+                       [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX;
+
+  let AddedComplexity = 400 in { // Prefer non-temporal versions
+    def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+                         (ins f128mem:$dst, VR128:$src),
+                         "movntps\t{$src, $dst|$dst, $src}",
+                         [(alignednontemporalstore (v4f32 VR128:$src),
+                                                   addr:$dst)]>, VEX;
+    def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+                         (ins f128mem:$dst, VR128:$src),
+                         "movntpd\t{$src, $dst|$dst, $src}",
+                         [(alignednontemporalstore (v2f64 VR128:$src),
+                                                   addr:$dst)]>, VEX;
+    def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
+                          (ins f128mem:$dst, VR128:$src),
+                          "movntdq\t{$src, $dst|$dst, $src}",
+                          [(alignednontemporalstore (v2f64 VR128:$src),
+                                                    addr:$dst)]>, VEX;
+    let ExeDomain = SSEPackedInt in
+    def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+                        (ins f128mem:$dst, VR128:$src),
+                        "movntdq\t{$src, $dst|$dst, $src}",
+                        [(alignednontemporalstore (v4f32 VR128:$src),
+                                                  addr:$dst)]>, VEX;
+
+    def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+                         (ins f256mem:$dst, VR256:$src),
+                         "movntps\t{$src, $dst|$dst, $src}",
+                         [(alignednontemporalstore (v8f32 VR256:$src),
+                                                   addr:$dst)]>, VEX;
+    def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+                         (ins f256mem:$dst, VR256:$src),
+                         "movntpd\t{$src, $dst|$dst, $src}",
+                         [(alignednontemporalstore (v4f64 VR256:$src),
+                                                   addr:$dst)]>, VEX;
+    def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
+                          (ins f256mem:$dst, VR256:$src),
+                          "movntdq\t{$src, $dst|$dst, $src}",
+                          [(alignednontemporalstore (v4f64 VR256:$src),
+                                                    addr:$dst)]>, VEX;
+    let ExeDomain = SSEPackedInt in
+    def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+                        (ins f256mem:$dst, VR256:$src),
+                        "movntdq\t{$src, $dst|$dst, $src}",
+                        [(alignednontemporalstore (v8f32 VR256:$src),
+                                                  addr:$dst)]>, VEX;
+  }
+}
+
+def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
+          (VMOVNTDQYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_pd_256 addr:$dst, VR256:$src),
+          (VMOVNTPDYmr addr:$dst, VR256:$src)>;
+def : Pat<(int_x86_avx_movnt_ps_256 addr:$dst, VR256:$src),
+          (VMOVNTPSYmr addr:$dst, VR256:$src)>;
+
+def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                    "movntps\t{$src, $dst|$dst, $src}",
+                    [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
+def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                        "movntpd\t{$src, $dst|$dst, $src}",
+                        [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
+
+let ExeDomain = SSEPackedInt in
+def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                        "movntdq\t{$src, $dst|$dst, $src}",
+                        [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
+
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                    "movntps\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                    "movntpd\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+
+def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                    "movntdq\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
+
+let ExeDomain = SSEPackedInt in
+def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+                    "movntdq\t{$src, $dst|$dst, $src}",
+                    [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+
+// There is no AVX form for instructions below this point
+def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                 "movnti\t{$src, $dst|$dst, $src}",
+                 [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
+               TB, Requires<[HasSSE2]>;
+
+def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+                     "movnti\t{$src, $dst|$dst, $src}",
+                     [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
+                  TB, Requires<[HasSSE2]>;
+
+}
+def MOVNTImr_Int  :   I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+                    "movnti\t{$src, $dst|$dst, $src}",
+                    [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
+                  TB, Requires<[HasSSE2]>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Misc Instructions (No AVX form)
+//===----------------------------------------------------------------------===//
+
+// Prefetch intrinsic.
+def PREFETCHT0   : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
+    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
+def PREFETCHT1   : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
+    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
+def PREFETCHT2   : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
+    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
+def PREFETCHNTA  : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
+    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
+
+// Load, store, and memory fence
+def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
+             TB, Requires<[HasSSE1]>;
+def : Pat<(X86SFence), (SFENCE)>;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-zeros value if folding it would be beneficial.
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementation, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+    isCodeGenOnly = 1 in {
+def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+                 [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+                 [(set VR128:$dst, (v2f64 immAllZerosV))]>;
+let ExeDomain = SSEPackedInt in
+def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+                 [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
+
+// The same as done above but for AVX. The 128-bit versions are the
+// same, but re-encoded. The 256-bit does not support PI version.
+// FIXME: Change encoding to pseudo! This is blocked right now by the x86
+// JIT implementatioan, it does not expand the instructions below like
+// X86MCInstLower does.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+    isCodeGenOnly = 1, Predicates = [HasAVX] in {
+def AVX_SET0PS  : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+                   [(set VR128:$dst, (v4f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PD  : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+                   [(set VR128:$dst, (v2f64 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+                   [(set VR256:$dst, (v8f32 immAllZerosV))]>, VEX_4V;
+def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
+                   [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
+let ExeDomain = SSEPackedInt in
+def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+                 [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
+
+def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
+
+def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+          (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Load/Store XCSR register
+//===----------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1 in {
+  def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+                    "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
+  def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+                    "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
+}
+
+def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+                  "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
+def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+                  "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+let isAsmParserOnly = 1 in {
+  let neverHasSideEffects = 1 in {
+  def VMOVDQArr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                      "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+  def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                      "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+  }
+  def VMOVDQUrr  : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                      "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+  def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                      "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+
+  let canFoldAsLoad = 1, mayLoad = 1 in {
+  def VMOVDQArm  : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                     "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+  def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+                     "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+  let Predicates = [HasAVX] in {
+    def VMOVDQUrm  : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                      "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+    def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+                      "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+  }
+  }
+
+  let mayStore = 1 in {
+  def VMOVDQAmr  : VPDI<0x7F, MRMDestMem, (outs),
+                       (ins i128mem:$dst, VR128:$src),
+                       "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+  def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
+                       (ins i256mem:$dst, VR256:$src),
+                       "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+  let Predicates = [HasAVX] in {
+  def VMOVDQUmr  : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                    "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+  def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
+                    "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+  }
+  }
+}
+
+let neverHasSideEffects = 1 in
+def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                   "movdqa\t{$src, $dst|$dst, $src}", []>;
+
+def MOVDQUrr :   I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                   "movdqu\t{$src, $dst|$dst, $src}",
+                   []>, XS, Requires<[HasSSE2]>;
+
+let canFoldAsLoad = 1, mayLoad = 1 in {
+def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                   "movdqa\t{$src, $dst|$dst, $src}",
+                   [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
+def MOVDQUrm :   I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                   "movdqu\t{$src, $dst|$dst, $src}",
+                   [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
+                 XS, Requires<[HasSSE2]>;
+}
+
+let mayStore = 1 in {
+def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                   "movdqa\t{$src, $dst|$dst, $src}",
+                   [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
+def MOVDQUmr :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                   "movdqu\t{$src, $dst|$dst, $src}",
+                   [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
+                 XS, Requires<[HasSSE2]>;
+}
+
+// Intrinsic forms of MOVDQU load and store
+let isAsmParserOnly = 1 in {
+let canFoldAsLoad = 1 in
+def VMOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                       "vmovdqu\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
+                     XS, VEX, Requires<[HasAVX]>;
+def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                       "vmovdqu\t{$src, $dst|$dst, $src}",
+                       [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+                     XS, VEX, Requires<[HasAVX]>;
+}
+
+let canFoldAsLoad = 1 in
+def MOVDQUrm_Int :   I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                       "movdqu\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
+                 XS, Requires<[HasSSE2]>;
+def MOVDQUmr_Int :   I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+                       "movdqu\t{$src, $dst|$dst, $src}",
+                       [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+                     XS, Requires<[HasSSE2]>;
+
+} // ExeDomain = SSEPackedInt
+
+def : Pat<(int_x86_avx_loadu_dq_256 addr:$src), (VMOVDQUYrm addr:$src)>;
+def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src),
+          (VMOVDQUYmr addr:$dst, VR256:$src)>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Arithmetic Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
+                            bit IsCommutable = 0, bit Is2Addr = 1> {
+  let isCommutable = IsCommutable in
+  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
+  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId VR128:$src1,
+                                (bitconvert (memopv2i64 addr:$src2))))]>;
+}
+
+multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
+                             string OpcodeStr, Intrinsic IntId,
+                             Intrinsic IntId2, bit Is2Addr = 1> {
+  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
+  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId VR128:$src1,
+                                      (bitconvert (memopv2i64 addr:$src2))))]>;
+  def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
+       (ins VR128:$src1, i32i8imm:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
+}
+
+/// PDI_binop_rm - Simple SSE2 binary operator.
+multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                        ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> {
+  let isCommutable = IsCommutable in
+  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>;
+  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
+                                     (bitconvert (memopv2i64 addr:$src2)))))]>;
+}
+
+/// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64.
+///
+/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew
+/// to collapse (bitconvert VT to VT) into its operand.
+///
+multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                              bit IsCommutable = 0, bit Is2Addr = 1> {
+  let isCommutable = IsCommutable in
+  def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>;
+  def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>;
+}
+
+} // ExeDomain = SSEPackedInt
+
+// 128-bit Integer Arithmetic
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPADDB  : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V;
+defm VPADDW  : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V;
+defm VPADDD  : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V;
+defm VPADDQ  : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V;
+defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V;
+defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V;
+defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V;
+defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V;
+defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V;
+
+// Intrinsic forms
+defm VPSUBSB  : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>,
+                                 VEX_4V;
+defm VPSUBSW  : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>,
+                                 VEX_4V;
+defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>,
+                                 VEX_4V;
+defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>,
+                                 VEX_4V;
+defm VPADDSB  : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>,
+                                 VEX_4V;
+defm VPADDSW  : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>,
+                                 VEX_4V;
+defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>,
+                                 VEX_4V;
+defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>,
+                                 VEX_4V;
+defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>,
+                                 VEX_4V;
+defm VPMULHW  : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>,
+                                 VEX_4V;
+defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>,
+                                 VEX_4V;
+defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>,
+                                 VEX_4V;
+defm VPAVGB   : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>,
+                                 VEX_4V;
+defm VPAVGW   : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>,
+                                 VEX_4V;
+defm VPMINUB  : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>,
+                                 VEX_4V;
+defm VPMINSW  : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>,
+                                 VEX_4V;
+defm VPMAXUB  : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>,
+                                 VEX_4V;
+defm VPMAXSW  : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>,
+                                 VEX_4V;
+defm VPSADBW  : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>,
+                                 VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+defm PADDB  : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
+defm PADDW  : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
+defm PADDD  : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
+defm PADDQ  : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
+defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
+defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
+defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
+defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
+defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
+
+// Intrinsic forms
+defm PSUBSB  : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
+defm PSUBSW  : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
+defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
+defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
+defm PADDSB  : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
+defm PADDSW  : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
+defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
+defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
+defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
+defm PMULHW  : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>;
+defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
+defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
+defm PAVGB   : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
+defm PAVGW   : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
+defm PMINUB  : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
+defm PMINSW  : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
+defm PMAXUB  : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
+defm PMAXSW  : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
+defm PSADBW  : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
+
+} // Constraints = "$src1 = $dst"
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Logical Instructions
+//===---------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
+                                int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
+                                VEX_4V;
+defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
+                                int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>,
+                                VEX_4V;
+defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
+                                int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>,
+                                VEX_4V;
+
+defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
+                                int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>,
+                                VEX_4V;
+defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
+                                int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>,
+                                VEX_4V;
+defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
+                                int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>,
+                                VEX_4V;
+
+defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
+                                int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>,
+                                VEX_4V;
+defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
+                                int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>,
+                                VEX_4V;
+
+defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V;
+defm VPOR  : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V;
+defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V;
+
+let ExeDomain = SSEPackedInt in {
+  let neverHasSideEffects = 1 in {
+    // 128-bit logical shifts.
+    def VPSLLDQri : PDIi8<0x73, MRM7r,
+                      (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                      "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+                      VEX_4V;
+    def VPSRLDQri : PDIi8<0x73, MRM3r,
+                      (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                      "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+                      VEX_4V;
+    // PSRADQri doesn't exist in SSE[1-3].
+  }
+  def VPANDNrr : PDI<0xDF, MRMSrcReg,
+                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                    "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+                                              VR128:$src2)))]>, VEX_4V;
+
+  def VPANDNrm : PDI<0xDF, MRMSrcMem,
+                    (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                    "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+                                              (memopv2i64 addr:$src2))))]>,
+                                              VEX_4V;
+}
+}
+
+let Constraints = "$src1 = $dst" in {
+defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
+                               int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
+defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
+                               int_x86_sse2_psll_d, int_x86_sse2_pslli_d>;
+defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq",
+                               int_x86_sse2_psll_q, int_x86_sse2_pslli_q>;
+
+defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw",
+                               int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>;
+defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld",
+                               int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>;
+defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq",
+                               int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>;
+
+defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
+                               int_x86_sse2_psra_w, int_x86_sse2_psrai_w>;
+defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
+                               int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
+
+defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
+defm POR  : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>;
+defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
+
+let ExeDomain = SSEPackedInt in {
+  let neverHasSideEffects = 1 in {
+    // 128-bit logical shifts.
+    def PSLLDQri : PDIi8<0x73, MRM7r,
+                         (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                         "pslldq\t{$src2, $dst|$dst, $src2}", []>;
+    def PSRLDQri : PDIi8<0x73, MRM3r,
+                         (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                         "psrldq\t{$src2, $dst|$dst, $src2}", []>;
+    // PSRADQri doesn't exist in SSE[1-3].
+  }
+  def PANDNrr : PDI<0xDF, MRMSrcReg,
+                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                    "pandn\t{$src2, $dst|$dst, $src2}", []>;
+
+  def PANDNrm : PDI<0xDF, MRMSrcMem,
+                    (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                    "pandn\t{$src2, $dst|$dst, $src2}", []>;
+}
+} // Constraints = "$src1 = $dst"
+
+let Predicates = [HasAVX] in {
+  def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
+            (v2i64 (VPSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+  def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
+            (v2i64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+  def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
+            (v2i64 (VPSLLDQri VR128:$src1, imm:$src2))>;
+  def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
+            (v2i64 (VPSRLDQri VR128:$src1, imm:$src2))>;
+  def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+            (v2f64 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+
+  // Shift up / down and insert zero's.
+  def : Pat<(v2i64 (X86vshl  VR128:$src, (i8 imm:$amt))),
+            (v2i64 (VPSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+  def : Pat<(v2i64 (X86vshr  VR128:$src, (i8 imm:$amt))),
+            (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+}
+
+let Predicates = [HasSSE2] in {
+  def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
+            (v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+  def : Pat<(int_x86_sse2_psrl_dq VR128:$src1, imm:$src2),
+            (v2i64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+  def : Pat<(int_x86_sse2_psll_dq_bs VR128:$src1, imm:$src2),
+            (v2i64 (PSLLDQri VR128:$src1, imm:$src2))>;
+  def : Pat<(int_x86_sse2_psrl_dq_bs VR128:$src1, imm:$src2),
+            (v2i64 (PSRLDQri VR128:$src1, imm:$src2))>;
+  def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)),
+            (v2f64 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2)))>;
+
+  // Shift up / down and insert zero's.
+  def : Pat<(v2i64 (X86vshl  VR128:$src, (i8 imm:$amt))),
+            (v2i64 (PSLLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+  def : Pat<(v2i64 (X86vshr  VR128:$src, (i8 imm:$amt))),
+            (v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Comparison Instructions
+//===---------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+  defm VPCMPEQB  : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1,
+                                    0>, VEX_4V;
+  defm VPCMPEQW  : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1,
+                                    0>, VEX_4V;
+  defm VPCMPEQD  : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, 1,
+                                    0>, VEX_4V;
+  defm VPCMPGTB  : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, 0,
+                                    0>, VEX_4V;
+  defm VPCMPGTW  : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, 0,
+                                    0>, VEX_4V;
+  defm VPCMPGTD  : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0,
+                                    0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+  defm PCMPEQB  : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, 1>;
+  defm PCMPEQW  : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, 1>;
+  defm PCMPEQD  : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, 1>;
+  defm PCMPGTB  : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
+  defm PCMPGTW  : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
+  defm PCMPGTD  : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+} // Constraints = "$src1 = $dst"
+
+def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
+          (PCMPEQBrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, (memop addr:$src2))),
+          (PCMPEQBrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, VR128:$src2)),
+          (PCMPEQWrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v8i16 (X86pcmpeqw VR128:$src1, (memop addr:$src2))),
+          (PCMPEQWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, VR128:$src2)),
+          (PCMPEQDrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86pcmpeqd VR128:$src1, (memop addr:$src2))),
+          (PCMPEQDrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, VR128:$src2)),
+          (PCMPGTBrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v16i8 (X86pcmpgtb VR128:$src1, (memop addr:$src2))),
+          (PCMPGTBrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, VR128:$src2)),
+          (PCMPGTWrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v8i16 (X86pcmpgtw VR128:$src1, (memop addr:$src2))),
+          (PCMPGTWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
+          (PCMPGTDrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
+          (PCMPGTDrm VR128:$src1, addr:$src2)>;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Pack Instructions
+//===---------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
+                                  0, 0>, VEX_4V;
+defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
+                                  0, 0>, VEX_4V;
+defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
+                                  0, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
+defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
+defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
+} // Constraints = "$src1 = $dst"
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Shuffle Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, PatFrag pshuf_frag,
+                         PatFrag bc_frag> {
+def ri : Ii8<0x70, MRMSrcReg,
+              (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+              !strconcat(OpcodeStr,
+                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+              [(set VR128:$dst, (vt (pshuf_frag:$src2 VR128:$src1,
+                                                      (undef))))]>;
+def mi : Ii8<0x70, MRMSrcMem,
+              (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+              !strconcat(OpcodeStr,
+                         "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+              [(set VR128:$dst, (vt (pshuf_frag:$src2
+                                      (bc_frag (memopv2i64 addr:$src1)),
+                                      (undef))))]>;
+}
+} // ExeDomain = SSEPackedInt
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+  let AddedComplexity = 5 in
+  defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize,
+                               VEX;
+
+  // SSE2 with ImmT == Imm8 and XS prefix.
+  defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, pshufhw, bc_v8i16>, XS,
+                               VEX;
+
+  // SSE2 with ImmT == Imm8 and XD prefix.
+  defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD,
+                               VEX;
+}
+
+let Predicates = [HasSSE2] in {
+  let AddedComplexity = 5 in
+  defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize;
+
+  // SSE2 with ImmT == Imm8 and XS prefix.
+  defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, pshufhw, bc_v8i16>, XS;
+
+  // SSE2 with ImmT == Imm8 and XD prefix.
+  defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Unpack Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
+                       PatFrag unp_frag, PatFrag bc_frag, bit Is2Addr = 1> {
+  def rr : PDI<opc, MRMSrcReg,
+      (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+      !if(Is2Addr,
+          !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+          !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+      [(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>;
+  def rm : PDI<opc, MRMSrcMem,
+      (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+      !if(Is2Addr,
+          !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+          !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+      [(set VR128:$dst, (unp_frag VR128:$src1,
+                                  (bc_frag (memopv2i64
+                                               addr:$src2))))]>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+  defm VPUNPCKLBW  : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8,
+                                 0>, VEX_4V;
+  defm VPUNPCKLWD  : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16,
+                                 0>, VEX_4V;
+  defm VPUNPCKLDQ  : sse2_unpack<0x62, "vpunpckldq", v4i32, unpckl, bc_v4i32,
+                                 0>, VEX_4V;
+
+  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+  /// knew to collapse (bitconvert VT to VT) into its operand.
+  def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                         "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>, VEX_4V;
+  def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                         "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckl VR128:$src1,
+                                         (memopv2i64 addr:$src2))))]>, VEX_4V;
+
+  defm VPUNPCKHBW  : sse2_unpack<0x68, "vpunpckhbw", v16i8, unpckh, bc_v16i8,
+                                 0>, VEX_4V;
+  defm VPUNPCKHWD  : sse2_unpack<0x69, "vpunpckhwd", v8i16, unpckh, bc_v8i16,
+                                 0>, VEX_4V;
+  defm VPUNPCKHDQ  : sse2_unpack<0x6A, "vpunpckhdq", v4i32, unpckh, bc_v4i32,
+                                 0>, VEX_4V;
+
+  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+  /// knew to collapse (bitconvert VT to VT) into its operand.
+  def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                         "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>, VEX_4V;
+  def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
+                        (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                        "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckh VR128:$src1,
+                                         (memopv2i64 addr:$src2))))]>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+  defm PUNPCKLBW  : sse2_unpack<0x60, "punpcklbw", v16i8, unpckl, bc_v16i8>;
+  defm PUNPCKLWD  : sse2_unpack<0x61, "punpcklwd", v8i16, unpckl, bc_v8i16>;
+  defm PUNPCKLDQ  : sse2_unpack<0x62, "punpckldq", v4i32, unpckl, bc_v4i32>;
+
+  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+  /// knew to collapse (bitconvert VT to VT) into its operand.
+  def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                         "punpcklqdq\t{$src2, $dst|$dst, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>;
+  def PUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+                         (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                         "punpcklqdq\t{$src2, $dst|$dst, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckl VR128:$src1,
+                                         (memopv2i64 addr:$src2))))]>;
+
+  defm PUNPCKHBW  : sse2_unpack<0x68, "punpckhbw", v16i8, unpckh, bc_v16i8>;
+  defm PUNPCKHWD  : sse2_unpack<0x69, "punpckhwd", v8i16, unpckh, bc_v8i16>;
+  defm PUNPCKHDQ  : sse2_unpack<0x6A, "punpckhdq", v4i32, unpckh, bc_v4i32>;
+
+  /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+  /// knew to collapse (bitconvert VT to VT) into its operand.
+  def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+                         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                         "punpckhqdq\t{$src2, $dst|$dst, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>;
+  def PUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
+                        (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                        "punpckhqdq\t{$src2, $dst|$dst, $src2}",
+                        [(set VR128:$dst,
+                          (v2i64 (unpckh VR128:$src1,
+                                         (memopv2i64 addr:$src2))))]>;
+}
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Extract and Insert
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pinsrw<bit Is2Addr = 1> {
+  def rri : Ii8<0xC4, MRMSrcReg,
+       (outs VR128:$dst), (ins VR128:$src1,
+        GR32:$src2, i32i8imm:$src3),
+       !if(Is2Addr,
+           "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+           "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+       [(set VR128:$dst,
+         (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
+  def rmi : Ii8<0xC4, MRMSrcMem,
+                       (outs VR128:$dst), (ins VR128:$src1,
+                        i16mem:$src2, i32i8imm:$src3),
+       !if(Is2Addr,
+           "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+           "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+       [(set VR128:$dst,
+         (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
+                    imm:$src3))]>;
+}
+
+// Extract
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
+                    (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                    "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+                                                imm:$src2))]>, OpSize, VEX;
+def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
+                    (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                    "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+                                                imm:$src2))]>;
+
+// Insert
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+  defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
+  def  VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+       "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+       []>, OpSize, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in
+  defm PINSRW : sse2_pinsrw, TB, OpSize, Requires<[HasSSE2]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Mask Creation
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+
+let isAsmParserOnly = 1 in {
+def VPMOVMSKBrr  : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+           "pmovmskb\t{$src, $dst|$dst, $src}",
+           [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
+def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+           "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+           "pmovmskb\t{$src, $dst|$dst, $src}",
+           [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Conditional Store
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+
+let isAsmParserOnly = 1 in {
+let Uses = [EDI] in
+def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
+           (ins VR128:$src, VR128:$mask),
+           "maskmovdqu\t{$mask, $src|$src, $mask}",
+           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, VEX;
+let Uses = [RDI] in
+def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
+           (ins VR128:$src, VR128:$mask),
+           "maskmovdqu\t{$mask, $src|$src, $mask}",
+           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX;
+}
+
+let Uses = [EDI] in
+def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+           "maskmovdqu\t{$mask, $src|$src, $mask}",
+           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
+let Uses = [RDI] in
+def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+           "maskmovdqu\t{$mask, $src|$src, $mask}",
+           [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Doubleword
+//===---------------------------------------------------------------------===//
+
+// Move Int Doubleword to Packed Double Int
+let isAsmParserOnly = 1 in {
+def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                        (v4i32 (scalar_to_vector GR32:$src)))]>, VEX;
+def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
+                      VEX;
+}
+def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                        (v4i32 (scalar_to_vector GR32:$src)))]>;
+def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                        (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+def MOV64toPQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+                        "mov{d|q}\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst,
+                          (v2i64 (scalar_to_vector GR64:$src)))]>;
+def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
+                       "mov{d|q}\t{$src, $dst|$dst, $src}",
+                       [(set FR64:$dst, (bitconvert GR64:$src))]>;
+
+
+// Move Int Doubleword to Single Scalar
+let isAsmParserOnly = 1 in {
+def VMOVDI2SSrr  : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
+
+def VMOVDI2SSrm  : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
+                      VEX;
+}
+def MOVDI2SSrr  : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set FR32:$dst, (bitconvert GR32:$src))]>;
+
+def MOVDI2SSrm  : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
+
+// Move Packed Doubleword Int to Packed Double Int
+let isAsmParserOnly = 1 in {
+def VMOVPDI2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+                                        (iPTR 0)))]>, VEX;
+def VMOVPDI2DImr  : VPDI<0x7E, MRMDestMem, (outs),
+                       (ins i32mem:$dst, VR128:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(store (i32 (vector_extract (v4i32 VR128:$src),
+                                     (iPTR 0))), addr:$dst)]>, VEX;
+}
+def MOVPDI2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+                                        (iPTR 0)))]>;
+def MOVPDI2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(store (i32 (vector_extract (v4i32 VR128:$src),
+                                     (iPTR 0))), addr:$dst)]>;
+
+def MOVPQIto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+                         "mov{d|q}\t{$src, $dst|$dst, $src}",
+                         [(set GR64:$dst, (vector_extract (v2i64 VR128:$src),
+                                           (iPTR 0)))]>;
+def MOV64toSDrm : S3SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
+                       "movq\t{$src, $dst|$dst, $src}",
+                       [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>;
+
+def MOVSDto64rr  : RPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src),
+                        "mov{d|q}\t{$src, $dst|$dst, $src}",
+                        [(set GR64:$dst, (bitconvert FR64:$src))]>;
+def MOVSDto64mr  : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                        [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
+
+// Move Scalar Single to Double Int
+let isAsmParserOnly = 1 in {
+def VMOVSS2DIrr  : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
+def VMOVSS2DImr  : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX;
+}
+def MOVSS2DIrr  : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(set GR32:$dst, (bitconvert FR32:$src))]>;
+def MOVSS2DImr  : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+                      "movd\t{$src, $dst|$dst, $src}",
+                      [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
+
+// movd / movq to XMM register zero-extends
+let AddedComplexity = 15, isAsmParserOnly = 1 in {
+def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (v4i32 (X86vzmovl
+                                      (v4i32 (scalar_to_vector GR32:$src)))))]>,
+                                      VEX;
+def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+                       "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+                       [(set VR128:$dst, (v2i64 (X86vzmovl
+                                      (v2i64 (scalar_to_vector GR64:$src)))))]>,
+                                      VEX, VEX_W;
+}
+let AddedComplexity = 15 in {
+def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (v4i32 (X86vzmovl
+                                      (v4i32 (scalar_to_vector GR32:$src)))))]>;
+def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+                       "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+                       [(set VR128:$dst, (v2i64 (X86vzmovl
+                                      (v2i64 (scalar_to_vector GR64:$src)))))]>;
+}
+
+let AddedComplexity = 20 in {
+let isAsmParserOnly = 1 in
+def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst,
+                         (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+                                                   (loadi32 addr:$src))))))]>,
+                                                   VEX;
+def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+                       "movd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst,
+                         (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+                                                   (loadi32 addr:$src))))))]>;
+
+def : Pat<(v4i32 (X86vzmovl (loadv4i32 addr:$src))),
+            (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
+            (MOVZDI2PDIrm addr:$src)>;
+def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+            (MOVZDI2PDIrm addr:$src)>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Quadword
+//===---------------------------------------------------------------------===//
+
+// Move Quadword Int to Packed Quadword Int
+let isAsmParserOnly = 1 in
+def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                    "vmovq\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst,
+                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+                    VEX, Requires<[HasAVX]>;
+def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                    "movq\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst,
+                      (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+                    Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
+
+// Move Packed Quadword Int to Quadword Int
+let isAsmParserOnly = 1 in
+def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+                      "movq\t{$src, $dst|$dst, $src}",
+                      [(store (i64 (vector_extract (v2i64 VR128:$src),
+                                    (iPTR 0))), addr:$dst)]>, VEX;
+def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+                      "movq\t{$src, $dst|$dst, $src}",
+                      [(store (i64 (vector_extract (v2i64 VR128:$src),
+                                    (iPTR 0))), addr:$dst)]>;
+
+def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+          (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+// Store / copy lower 64-bits of a XMM register.
+let isAsmParserOnly = 1 in
+def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+                     "movq\t{$src, $dst|$dst, $src}",
+                     [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
+def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+                     "movq\t{$src, $dst|$dst, $src}",
+                     [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
+
+let AddedComplexity = 20, isAsmParserOnly = 1 in
+def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                     "vmovq\t{$src, $dst|$dst, $src}",
+                     [(set VR128:$dst,
+                       (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+                                                 (loadi64 addr:$src))))))]>,
+                     XS, VEX, Requires<[HasAVX]>;
+
+let AddedComplexity = 20 in {
+def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                     "movq\t{$src, $dst|$dst, $src}",
+                     [(set VR128:$dst,
+                       (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+                                                 (loadi64 addr:$src))))))]>,
+                     XS, Requires<[HasSSE2]>;
+
+def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+            (MOVZQI2PQIrm addr:$src)>;
+def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
+            (MOVZQI2PQIrm addr:$src)>;
+def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
+}
+
+// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
+// IA32 document. movq xmm1, xmm2 does clear the high bits.
+let isAsmParserOnly = 1, AddedComplexity = 15 in
+def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "vmovq\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+                      XS, VEX, Requires<[HasAVX]>;
+let AddedComplexity = 15 in
+def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+                      XS, Requires<[HasSSE2]>;
+
+let AddedComplexity = 20, isAsmParserOnly = 1 in
+def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                        "vmovq\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst, (v2i64 (X86vzmovl
+                                             (loadv2i64 addr:$src))))]>,
+                      XS, VEX, Requires<[HasAVX]>;
+let AddedComplexity = 20 in {
+def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                        "movq\t{$src, $dst|$dst, $src}",
+                    [(set VR128:$dst, (v2i64 (X86vzmovl
+                                             (loadv2i64 addr:$src))))]>,
+                      XS, Requires<[HasSSE2]>;
+
+def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
+            (MOVZPQILo2PQIrm addr:$src)>;
+}
+
+// Instructions to match in the assembler
+let isAsmParserOnly = 1 in {
+def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+                      "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+                      "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+// Recognize "movd" with GR64 destination, but encode as a "movq"
+def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+                          "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+}
+
+// Instructions for the disassembler
+// xr = XMM register
+// xm = mem64
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                 "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
+def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                 "movq\t{$src, $dst|$dst, $src}", []>, XS;
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Misc Instructions
+//===---------------------------------------------------------------------===//
+
+// Flush cache
+def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
+               "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
+              TB, Requires<[HasSSE2]>;
+
+// Load, store, and memory fence
+def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
+               "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
+               "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+def : Pat<(X86LFence), (LFENCE)>;
+def : Pat<(X86MFence), (MFENCE)>;
+
+
+// Pause. This "instruction" is encoded as "rep; nop", so even though it
+// was introduced with SSE2, it's backward compatible.
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-ones value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+    isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
+  // FIXME: Change encoding to pseudo.
+  def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
+                         [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Conversion Instructions
+//===---------------------------------------------------------------------===//
+
+// Convert Packed Double FP to Packed DW Integers
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTPD2DQrr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQXrYr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                       "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                      "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                      "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+                      "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+                      "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+}
+
+def CVTPD2DQrm  : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPD2DQrr  : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+
+// Convert Packed DW Integers to Packed Double FP
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+def VCVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrm  : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrr  : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+                     "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
+def CVTDQ2PDrm  : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PDrr  : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+
+// AVX 256-bit register conversion intrinsics
+def : Pat<(int_x86_avx_cvtdq2_pd_256 VR128:$src),
+           (VCVTDQ2PDYrr VR128:$src)>;
+def : Pat<(int_x86_avx_cvtdq2_pd_256 (memopv4i32 addr:$src)),
+           (VCVTDQ2PDYrm addr:$src)>;
+
+def : Pat<(int_x86_avx_cvt_pd2dq_256 VR256:$src),
+          (VCVTPD2DQYrr VR256:$src)>;
+def : Pat<(int_x86_avx_cvt_pd2dq_256 (memopv4f64 addr:$src)),
+          (VCVTPD2DQYrm addr:$src)>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Move Instructions
+//===---------------------------------------------------------------------===//
+
+// Replicate Single FP
+multiclass sse3_replicate_sfp<bits<8> op, PatFrag rep_frag, string OpcodeStr> {
+def rr : S3SI<op, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                      [(set VR128:$dst, (v4f32 (rep_frag
+                                                VR128:$src, (undef))))]>;
+def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                      [(set VR128:$dst, (rep_frag
+                                         (memopv4f32 addr:$src), (undef)))]>;
+}
+
+multiclass sse3_replicate_sfp_y<bits<8> op, PatFrag rep_frag,
+                                string OpcodeStr> {
+def rr : S3SI<op, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+  // FIXME: Merge above classes when we have patterns for the ymm version
+  defm VMOVSHDUP  : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
+  defm VMOVSLDUP  : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
+  defm VMOVSHDUPY : sse3_replicate_sfp_y<0x16, movshdup, "vmovshdup">, VEX;
+  defm VMOVSLDUPY : sse3_replicate_sfp_y<0x12, movsldup, "vmovsldup">, VEX;
+}
+defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">;
+defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">;
+
+// Replicate Double FP
+multiclass sse3_replicate_dfp<string OpcodeStr> {
+def rr  : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
+def rm  : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst,
+                      (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)),
+                                      (undef))))]>;
+}
+
+multiclass sse3_replicate_dfp_y<string OpcodeStr> {
+def rr  : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    []>;
+def rm  : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    []>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+  // FIXME: Merge above classes when we have patterns for the ymm version
+  defm VMOVDDUP  : sse3_replicate_dfp<"vmovddup">, VEX;
+  defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
+}
+defm MOVDDUP : sse3_replicate_dfp<"movddup">;
+
+// Move Unaligned Integer
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+  def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                   "vlddqu\t{$src, $dst|$dst, $src}",
+                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+  def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+                   "vlddqu\t{$src, $dst|$dst, $src}",
+                   [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, VEX;
+}
+def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                   "lddqu\t{$src, $dst|$dst, $src}",
+                   [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
+
+def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
+                   (undef)),
+          (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+// Several Move patterns
+let AddedComplexity = 5 in {
+def : Pat<(movddup (memopv2f64 addr:$src), (undef)),
+          (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+def : Pat<(movddup (bc_v4f32 (memopv2f64 addr:$src)), (undef)),
+          (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+def : Pat<(movddup (memopv2i64 addr:$src), (undef)),
+          (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
+          (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+}
+
+// vector_shuffle v1, <undef> <1, 1, 3, 3>
+let AddedComplexity = 15 in
+def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
+          (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+          (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+// vector_shuffle v1, <undef> <0, 0, 2, 2>
+let AddedComplexity = 15 in
+  def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
+            (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+  def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+            (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Arithmetic
+//===---------------------------------------------------------------------===//
+
+multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
+                       X86MemOperand x86memop, bit Is2Addr = 1> {
+  def rr : I<0xD0, MRMSrcReg,
+       (outs RC:$dst), (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (Int RC:$src1, RC:$src2))]>;
+  def rm : I<0xD0, MRMSrcMem,
+       (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX],
+  ExeDomain = SSEPackedDouble in {
+  defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
+                               f128mem, 0>, XD, VEX_4V;
+  defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
+                               f128mem, 0>, OpSize, VEX_4V;
+  defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
+                               f256mem, 0>, XD, VEX_4V;
+  defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
+                               f256mem, 0>, OpSize, VEX_4V;
+}
+let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
+    ExeDomain = SSEPackedDouble in {
+  defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
+                              f128mem>, XD;
+  defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
+                              f128mem>, TB, OpSize;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE3 Instructions
+//===---------------------------------------------------------------------===//
+
+// Horizontal ops
+multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
+                   X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
+  def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+      [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
+
+  def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+      [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
+}
+multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
+                  X86MemOperand x86memop, Intrinsic IntId, bit Is2Addr = 1> {
+  def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+      [(set RC:$dst, (vt (IntId RC:$src1, RC:$src2)))]>;
+
+  def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+      [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+  defm VHADDPS  : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
+                          int_x86_sse3_hadd_ps, 0>, VEX_4V;
+  defm VHADDPD  : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
+                          int_x86_sse3_hadd_pd, 0>, VEX_4V;
+  defm VHSUBPS  : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
+                          int_x86_sse3_hsub_ps, 0>, VEX_4V;
+  defm VHSUBPD  : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem,
+                          int_x86_sse3_hsub_pd, 0>, VEX_4V;
+  defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
+                          int_x86_avx_hadd_ps_256, 0>, VEX_4V;
+  defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem,
+                          int_x86_avx_hadd_pd_256, 0>, VEX_4V;
+  defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
+                          int_x86_avx_hsub_ps_256, 0>, VEX_4V;
+  defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem,
+                          int_x86_avx_hsub_pd_256, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+  defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem,
+                        int_x86_sse3_hadd_ps>;
+  defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem,
+                       int_x86_sse3_hadd_pd>;
+  defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem,
+                        int_x86_sse3_hsub_ps>;
+  defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem,
+                       int_x86_sse3_hsub_pd>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Absolute Instructions
+//===---------------------------------------------------------------------===//
+
+
+/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
+                            PatFrag mem_frag128, Intrinsic IntId128> {
+  def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+                    (ins VR128:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (IntId128 VR128:$src))]>,
+                    OpSize;
+
+  def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+                    (ins i128mem:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst,
+                      (IntId128
+                       (bitconvert (mem_frag128 addr:$src))))]>, OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+  defm VPABSB  : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
+                                  int_x86_ssse3_pabs_b_128>, VEX;
+  defm VPABSW  : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
+                                  int_x86_ssse3_pabs_w_128>, VEX;
+  defm VPABSD  : SS3I_unop_rm_int<0x1E, "vpabsd", memopv4i32,
+                                  int_x86_ssse3_pabs_d_128>, VEX;
+}
+
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
+                              int_x86_ssse3_pabs_b_128>;
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
+                              int_x86_ssse3_pabs_w_128>;
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv4i32,
+                              int_x86_ssse3_pabs_d_128>;
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Binary Operator Instructions
+//===---------------------------------------------------------------------===//
+
+/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
+                             PatFrag mem_frag128, Intrinsic IntId128,
+                             bit Is2Addr = 1> {
+  let isCommutable = 1 in
+  def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+       OpSize;
+  def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+         !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst,
+         (IntId128 VR128:$src1,
+          (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let isCommutable = 0 in {
+  defm VPHADDW    : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
+                                      int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
+  defm VPHADDD    : SS3I_binop_rm_int<0x02, "vphaddd", memopv4i32,
+                                      int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
+  defm VPHADDSW   : SS3I_binop_rm_int<0x03, "vphaddsw", memopv8i16,
+                                      int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
+  defm VPHSUBW    : SS3I_binop_rm_int<0x05, "vphsubw", memopv8i16,
+                                      int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
+  defm VPHSUBD    : SS3I_binop_rm_int<0x06, "vphsubd", memopv4i32,
+                                      int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
+  defm VPHSUBSW   : SS3I_binop_rm_int<0x07, "vphsubsw", memopv8i16,
+                                      int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
+  defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv16i8,
+                                      int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
+  defm VPSHUFB    : SS3I_binop_rm_int<0x00, "vpshufb", memopv16i8,
+                                      int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
+  defm VPSIGNB    : SS3I_binop_rm_int<0x08, "vpsignb", memopv16i8,
+                                      int_x86_ssse3_psign_b_128, 0>, VEX_4V;
+  defm VPSIGNW    : SS3I_binop_rm_int<0x09, "vpsignw", memopv8i16,
+                                      int_x86_ssse3_psign_w_128, 0>, VEX_4V;
+  defm VPSIGND    : SS3I_binop_rm_int<0x0A, "vpsignd", memopv4i32,
+                                      int_x86_ssse3_psign_d_128, 0>, VEX_4V;
+}
+defm VPMULHRSW    : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
+                                      int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
+}
+
+// None of these have i8 immediate fields.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+let isCommutable = 0 in {
+  defm PHADDW    : SS3I_binop_rm_int<0x01, "phaddw", memopv8i16,
+                                     int_x86_ssse3_phadd_w_128>;
+  defm PHADDD    : SS3I_binop_rm_int<0x02, "phaddd", memopv4i32,
+                                     int_x86_ssse3_phadd_d_128>;
+  defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw", memopv8i16,
+                                     int_x86_ssse3_phadd_sw_128>;
+  defm PHSUBW    : SS3I_binop_rm_int<0x05, "phsubw", memopv8i16,
+                                     int_x86_ssse3_phsub_w_128>;
+  defm PHSUBD    : SS3I_binop_rm_int<0x06, "phsubd", memopv4i32,
+                                     int_x86_ssse3_phsub_d_128>;
+  defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw", memopv8i16,
+                                     int_x86_ssse3_phsub_sw_128>;
+  defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv16i8,
+                                     int_x86_ssse3_pmadd_ub_sw_128>;
+  defm PSHUFB    : SS3I_binop_rm_int<0x00, "pshufb", memopv16i8,
+                                     int_x86_ssse3_pshuf_b_128>;
+  defm PSIGNB    : SS3I_binop_rm_int<0x08, "psignb", memopv16i8,
+                                     int_x86_ssse3_psign_b_128>;
+  defm PSIGNW    : SS3I_binop_rm_int<0x09, "psignw", memopv8i16,
+                                     int_x86_ssse3_psign_w_128>;
+  defm PSIGND    : SS3I_binop_rm_int<0x0A, "psignd", memopv4i32,
+                                       int_x86_ssse3_psign_d_128>;
+}
+defm PMULHRSW    : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv8i16,
+                                     int_x86_ssse3_pmul_hr_sw_128>;
+}
+
+def : Pat<(X86pshufb VR128:$src, VR128:$mask),
+          (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
+def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
+          (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+
+def : Pat<(X86psignb VR128:$src1, VR128:$src2),
+          (PSIGNBrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignw VR128:$src1, VR128:$src2),
+          (PSIGNWrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+def : Pat<(X86psignd VR128:$src1, VR128:$src2),
+          (PSIGNDrr128 VR128:$src1, VR128:$src2)>, Requires<[HasSSSE3]>;
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Align Instruction Patterns
+//===---------------------------------------------------------------------===//
+
+multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
+  def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
+      (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      []>, OpSize;
+  def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
+      (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                  "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      []>, OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+  defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+  defm PALIGN : ssse3_palign<"palignr">;
+
+let AddedComplexity = 5 in {
+def : Pat<(v4i32 (palign:$src3 VR128:$src1, VR128:$src2)),
+          (PALIGNR128rr VR128:$src2, VR128:$src1,
+                        (SHUFFLE_get_palign_imm VR128:$src3))>,
+      Requires<[HasSSSE3]>;
+def : Pat<(v4f32 (palign:$src3 VR128:$src1, VR128:$src2)),
+          (PALIGNR128rr VR128:$src2, VR128:$src1,
+                        (SHUFFLE_get_palign_imm VR128:$src3))>,
+      Requires<[HasSSSE3]>;
+def : Pat<(v8i16 (palign:$src3 VR128:$src1, VR128:$src2)),
+          (PALIGNR128rr VR128:$src2, VR128:$src1,
+                        (SHUFFLE_get_palign_imm VR128:$src3))>,
+      Requires<[HasSSSE3]>;
+def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)),
+          (PALIGNR128rr VR128:$src2, VR128:$src1,
+                        (SHUFFLE_get_palign_imm VR128:$src3))>,
+      Requires<[HasSSSE3]>;
+}
+
+//===---------------------------------------------------------------------===//
+// SSSE3 Misc Instructions
+//===---------------------------------------------------------------------===//
+
+// Thread synchronization
+let usesCustomInserter = 1 in {
+def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3),
+                [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>;
+def MWAIT : PseudoI<(outs), (ins GR32:$src1, GR32:$src2),
+                [(int_x86_sse3_mwait GR32:$src1, GR32:$src2)]>;
+}
+
+let Uses = [EAX, ECX, EDX] in
+def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", []>, TB,
+                 Requires<[HasSSE3]>;
+let Uses = [ECX, EAX] in
+def MWAITrr   : I<0x01, MRM_C9, (outs), (ins), "mwait", []>, TB,
+                Requires<[HasSSE3]>;
+
+def : InstAlias<"mwait %eax, %ecx", (MWAITrr)>, Requires<[In32BitMode]>;
+def : InstAlias<"mwait %rax, %rcx", (MWAITrr)>, Requires<[In64BitMode]>;
+
+def : InstAlias<"monitor %eax, %ecx, %edx", (MONITORrrr)>,
+      Requires<[In32BitMode]>;
+def : InstAlias<"monitor %rax, %rcx, %rdx", (MONITORrrr)>,
+      Requires<[In64BitMode]>;
+
+//===---------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===---------------------------------------------------------------------===//
+
+// extload f32 -> f64.  This matches load+fextend because we have a hack in
+// the isel (PreprocessForFPConvert) that can introduce loads after dag
+// combine.
+// Since these loads aren't folded into the fextend, we have to match it
+// explicitly here.
+let Predicates = [HasSSE2] in
+ def : Pat<(fextend (loadf32 addr:$src)),
+           (CVTSS2SDrm addr:$src)>;
+
+// bit_convert
+let Predicates = [HasXMMInt] in {
+  def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
+  def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
+  def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
+  def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
+  def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
+  def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
+  def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
+  def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
+  def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
+  def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
+  def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
+  def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
+  def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
+  def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
+  def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
+  def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
+  def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
+  def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
+  def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
+  def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
+  def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
+  def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
+  def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
+  def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
+  def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
+  def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
+  def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
+  def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
+  def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
+  def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
+}
+
+let Predicates = [HasAVX] in {
+  def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
+}
+
+// Move scalar to XMM zero-extended
+// movd to XMM register zero-extends
+let AddedComplexity = 15 in {
+// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
+def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
+          (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>;
+def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
+          (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
+def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
+          (MOVSSrr (v4f32 (V_SET0PS)),
+                   (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
+def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
+          (MOVSSrr (v4i32 (V_SET0PI)),
+                   (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
+}
+
+// Splat v2f64 / v2i64
+let AddedComplexity = 10 in {
+def : Pat<(splat_lo (v2f64 VR128:$src), (undef)),
+          (UNPCKLPDrr VR128:$src, VR128:$src)>,   Requires<[HasSSE2]>;
+def : Pat<(unpckh (v2f64 VR128:$src), (undef)),
+          (UNPCKHPDrr VR128:$src, VR128:$src)>,   Requires<[HasSSE2]>;
+def : Pat<(splat_lo (v2i64 VR128:$src), (undef)),
+          (PUNPCKLQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(unpckh (v2i64 VR128:$src), (undef)),
+          (PUNPCKHQDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+}
+
+// Special unary SHUFPSrri case.
+def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
+          (SHUFPSrri VR128:$src1, VR128:$src1,
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>;
+let AddedComplexity = 5 in
+def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
+          (PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+      Requires<[HasSSE2]>;
+// Special unary SHUFPDrri case.
+def : Pat<(v2i64 (pshufd:$src3 VR128:$src1, (undef))),
+          (SHUFPDrri VR128:$src1, VR128:$src1,
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>,
+      Requires<[HasSSE2]>;
+// Special unary SHUFPDrri case.
+def : Pat<(v2f64 (pshufd:$src3 VR128:$src1, (undef))),
+          (SHUFPDrri VR128:$src1, VR128:$src1,
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>,
+      Requires<[HasSSE2]>;
+// Unary v4f32 shuffle with PSHUF* in order to fold a load.
+def : Pat<(pshufd:$src2 (bc_v4i32 (memopv4f32 addr:$src1)), (undef)),
+          (PSHUFDmi addr:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+      Requires<[HasSSE2]>;
+
+// Special binary v4i32 shuffle cases with SHUFPS.
+def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (v4i32 VR128:$src2))),
+          (SHUFPSrri VR128:$src1, VR128:$src2,
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>,
+           Requires<[HasSSE2]>;
+def : Pat<(v4i32 (shufp:$src3 VR128:$src1, (bc_v4i32 (memopv2i64 addr:$src2)))),
+          (SHUFPSrmi VR128:$src1, addr:$src2,
+                    (SHUFFLE_get_shuf_imm VR128:$src3))>,
+           Requires<[HasSSE2]>;
+// Special binary v2i64 shuffle cases using SHUFPDrri.
+def : Pat<(v2i64 (shufp:$src3 VR128:$src1, VR128:$src2)),
+          (SHUFPDrri VR128:$src1, VR128:$src2,
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>,
+          Requires<[HasSSE2]>;
+
+// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
+let AddedComplexity = 15 in {
+def : Pat<(v4i32 (unpckl_undef:$src2 VR128:$src, (undef))),
+          (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+          Requires<[OptForSpeed, HasSSE2]>;
+def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))),
+          (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+          Requires<[OptForSpeed, HasSSE2]>;
+}
+let AddedComplexity = 10 in {
+def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))),
+          (UNPCKLPSrr VR128:$src, VR128:$src)>;
+def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))),
+          (PUNPCKLBWrr VR128:$src, VR128:$src)>;
+def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))),
+          (PUNPCKLWDrr VR128:$src, VR128:$src)>;
+def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))),
+          (PUNPCKLDQrr VR128:$src, VR128:$src)>;
+}
+
+// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
+let AddedComplexity = 15 in {
+def : Pat<(v4i32 (unpckh_undef:$src2 VR128:$src, (undef))),
+          (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+          Requires<[OptForSpeed, HasSSE2]>;
+def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))),
+          (PSHUFDri VR128:$src, (SHUFFLE_get_shuf_imm VR128:$src2))>,
+          Requires<[OptForSpeed, HasSSE2]>;
+}
+let AddedComplexity = 10 in {
+def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))),
+          (UNPCKHPSrr VR128:$src, VR128:$src)>;
+def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))),
+          (PUNPCKHBWrr VR128:$src, VR128:$src)>;
+def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))),
+          (PUNPCKHWDrr VR128:$src, VR128:$src)>;
+def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
+          (PUNPCKHDQrr VR128:$src, VR128:$src)>;
+}
+
+let AddedComplexity = 20 in {
+// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
+def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
+          (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+
+// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
+def : Pat<(v4i32 (movhlps VR128:$src1, VR128:$src2)),
+          (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+// vector_shuffle v1, undef <2, ?, ?, ?> using MOVHLPS
+def : Pat<(v4f32 (movhlps_undef VR128:$src1, (undef))),
+          (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
+          (MOVHLPSrr VR128:$src1, VR128:$src1)>;
+}
+
+let AddedComplexity = 20 in {
+// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
+def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
+          (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
+          (MOVLPDrm VR128:$src1, addr:$src2)>;
+}
+
+// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
+def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+          (MOVLPSmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+          (MOVLPDmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
+                 addr:$src1),
+          (MOVLPSmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
+          (MOVLPDmr addr:$src1, VR128:$src2)>;
+
+let AddedComplexity = 15 in {
+// Setting the lowest element in the vector.
+def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
+          (MOVSSrr (v4i32 VR128:$src1),
+                   (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
+          (MOVSDrr (v2i64 VR128:$src1),
+                   (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+
+// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
+def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
+      Requires<[HasSSE2]>;
+def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
+      Requires<[HasSSE2]>;
+}
+
+// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
+// fall back to this for SSE1)
+def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
+          (SHUFPSrri VR128:$src2, VR128:$src1,
+                     (SHUFFLE_get_shuf_imm VR128:$src3))>;
+
+// Set lowest element and zero upper elements.
+def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+          (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
+
+// vector -> vector casts
+def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
+          (Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>;
+def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
+          (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
+
+// Use movaps / movups for SSE integer load / store (one byte shorter).
+let Predicates = [HasSSE1] in {
+  def : Pat<(alignedloadv4i32 addr:$src),
+            (MOVAPSrm addr:$src)>;
+  def : Pat<(loadv4i32 addr:$src),
+            (MOVUPSrm addr:$src)>;
+  def : Pat<(alignedloadv2i64 addr:$src),
+            (MOVAPSrm addr:$src)>;
+  def : Pat<(loadv2i64 addr:$src),
+            (MOVUPSrm addr:$src)>;
+
+  def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+            (MOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+            (MOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+            (MOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+            (MOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+            (MOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+            (MOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+            (MOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+            (MOVUPSmr addr:$dst, VR128:$src)>;
+}
+
+// Use vmovaps/vmovups for AVX 128-bit integer load/store (one byte shorter).
+let Predicates = [HasAVX] in {
+  def : Pat<(alignedloadv4i32 addr:$src),
+            (VMOVAPSrm addr:$src)>;
+  def : Pat<(loadv4i32 addr:$src),
+            (VMOVUPSrm addr:$src)>;
+  def : Pat<(alignedloadv2i64 addr:$src),
+            (VMOVAPSrm addr:$src)>;
+  def : Pat<(loadv2i64 addr:$src),
+            (VMOVUPSrm addr:$src)>;
+
+  def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
+            (VMOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
+            (VMOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
+            (VMOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
+            (VMOVAPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v2i64 VR128:$src), addr:$dst),
+            (VMOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v4i32 VR128:$src), addr:$dst),
+            (VMOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v8i16 VR128:$src), addr:$dst),
+            (VMOVUPSmr addr:$dst, VR128:$src)>;
+  def : Pat<(store (v16i8 VR128:$src), addr:$dst),
+            (VMOVUPSmr addr:$dst, VR128:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Packed Move with Sign/Zero Extend
+//===----------------------------------------------------------------------===//
+
+multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+       [(set VR128:$dst,
+         (IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
+       OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
+                                     VEX;
+defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
+                                     VEX;
+defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>,
+                                     VEX;
+defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>,
+                                     VEX;
+defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>,
+                                     VEX;
+defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
+                                     VEX;
+}
+
+defm PMOVSXBW   : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
+defm PMOVSXWD   : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
+defm PMOVSXDQ   : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
+defm PMOVZXBW   : SS41I_binop_rm_int8<0x30, "pmovzxbw", int_x86_sse41_pmovzxbw>;
+defm PMOVZXWD   : SS41I_binop_rm_int8<0x33, "pmovzxwd", int_x86_sse41_pmovzxwd>;
+defm PMOVZXDQ   : SS41I_binop_rm_int8<0x35, "pmovzxdq", int_x86_sse41_pmovzxdq>;
+
+// Common patterns involving scalar load.
+def : Pat<(int_x86_sse41_pmovsxbw (vzmovl_v2i64 addr:$src)),
+          (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxbw (vzload_v2i64 addr:$src)),
+          (PMOVSXBWrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovsxwd (vzmovl_v2i64 addr:$src)),
+          (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxwd (vzload_v2i64 addr:$src)),
+          (PMOVSXWDrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovsxdq (vzmovl_v2i64 addr:$src)),
+          (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxdq (vzload_v2i64 addr:$src)),
+          (PMOVSXDQrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxbw (vzmovl_v2i64 addr:$src)),
+          (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxbw (vzload_v2i64 addr:$src)),
+          (PMOVZXBWrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxwd (vzmovl_v2i64 addr:$src)),
+          (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxwd (vzload_v2i64 addr:$src)),
+          (PMOVZXWDrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxdq (vzmovl_v2i64 addr:$src)),
+          (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxdq (vzload_v2i64 addr:$src)),
+          (PMOVZXDQrm addr:$src)>, Requires<[HasSSE41]>;
+
+
+multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+       [(set VR128:$dst,
+         (IntId (bitconvert (v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
+          OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
+                                     VEX;
+defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>,
+                                     VEX;
+defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>,
+                                     VEX;
+defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>,
+                                     VEX;
+}
+
+defm PMOVSXBD   : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
+defm PMOVSXWQ   : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
+defm PMOVZXBD   : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
+defm PMOVZXWQ   : SS41I_binop_rm_int4<0x34, "pmovzxwq", int_x86_sse41_pmovzxwq>;
+
+// Common patterns involving scalar load
+def : Pat<(int_x86_sse41_pmovsxbd (vzmovl_v4i32 addr:$src)),
+          (PMOVSXBDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovsxwq (vzmovl_v4i32 addr:$src)),
+          (PMOVSXWQrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxbd (vzmovl_v4i32 addr:$src)),
+          (PMOVZXBDrm addr:$src)>, Requires<[HasSSE41]>;
+def : Pat<(int_x86_sse41_pmovzxwq (vzmovl_v4i32 addr:$src)),
+          (PMOVZXWQrm addr:$src)>, Requires<[HasSSE41]>;
+
+
+multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                 [(set VR128:$dst, (IntId VR128:$src))]>, OpSize;
+
+  // Expecting a i16 load any extended to i32 value.
+  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst), (ins i16mem:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                 [(set VR128:$dst, (IntId (bitconvert
+                     (v4i32 (scalar_to_vector (loadi16_anyext addr:$src))))))]>,
+                 OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
+                                     VEX;
+defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
+                                     VEX;
+}
+defm PMOVSXBQ   : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
+defm PMOVZXBQ   : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
+
+// Common patterns involving scalar load
+def : Pat<(int_x86_sse41_pmovsxbq
+            (bitconvert (v4i32 (X86vzmovl
+                             (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+          (PMOVSXBQrm addr:$src)>, Requires<[HasSSE41]>;
+
+def : Pat<(int_x86_sse41_pmovzxbq
+            (bitconvert (v4i32 (X86vzmovl
+                             (v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
+          (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Extract Instructions
+//===----------------------------------------------------------------------===//
+
+/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
+multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
+  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+                 (ins VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>,
+                 OpSize;
+  def mr : SS4AIi8<opc, MRMDestMem, (outs),
+                 (ins i8mem:$dst, VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 []>, OpSize;
+// FIXME:
+// There's an AssertZext in the way of writing the store pattern
+// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+  defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
+  def  VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst),
+         (ins VR128:$src1, i32i8imm:$src2),
+         "vpextrb\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, OpSize, VEX;
+}
+
+defm PEXTRB      : SS41I_extract8<0x14, "pextrb">;
+
+
+/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination
+multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
+  def mr : SS4AIi8<opc, MRMDestMem, (outs),
+                 (ins i16mem:$dst, VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 []>, OpSize;
+// FIXME:
+// There's an AssertZext in the way of writing the store pattern
+// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+  defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
+
+defm PEXTRW      : SS41I_extract16<0x15, "pextrw">;
+
+
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
+  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+                 (ins VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 [(set GR32:$dst,
+                  (extractelt (v4i32 VR128:$src1), imm:$src2))]>, OpSize;
+  def mr : SS4AIi8<opc, MRMDestMem, (outs),
+                 (ins i32mem:$dst, VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 [(store (extractelt (v4i32 VR128:$src1), imm:$src2),
+                          addr:$dst)]>, OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+  defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
+
+defm PEXTRD      : SS41I_extract32<0x16, "pextrd">;
+
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
+  def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
+                 (ins VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 [(set GR64:$dst,
+                  (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W;
+  def mr : SS4AIi8<opc, MRMDestMem, (outs),
+                 (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
+                          addr:$dst)]>, OpSize, REX_W;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+  defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
+
+defm PEXTRQ      : SS41I_extract64<0x16, "pextrq">;
+
+/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
+/// destination
+multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
+  def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst),
+                 (ins VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 [(set GR32:$dst,
+                    (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))]>,
+           OpSize;
+  def mr : SS4AIi8<opc, MRMDestMem, (outs),
+                 (ins f32mem:$dst, VR128:$src1, i32i8imm:$src2),
+                 !strconcat(OpcodeStr,
+                  "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2),
+                          addr:$dst)]>, OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+  defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
+  def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
+                  (ins VR128:$src1, i32i8imm:$src2),
+                  "vextractps \t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                  []>, OpSize, VEX;
+}
+defm EXTRACTPS   : SS41I_extractf32<0x17, "extractps">;
+
+// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
+def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
+                                              imm:$src2))),
+                 addr:$dst),
+          (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
+         Requires<[HasSSE41]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Insert Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
+  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+      (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [(set VR128:$dst,
+        (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
+  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+      (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [(set VR128:$dst,
+        (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
+                   imm:$src3))]>, OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+  defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+  defm PINSRB  : SS41I_insert8<0x20, "pinsrb">;
+
+multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
+  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+      (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [(set VR128:$dst,
+        (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
+      OpSize;
+  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+      (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [(set VR128:$dst,
+        (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
+                          imm:$src3)))]>, OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+  defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+  defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
+
+multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
+  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+      (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [(set VR128:$dst,
+        (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
+      OpSize;
+  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+      (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [(set VR128:$dst,
+        (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
+                          imm:$src3)))]>, OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+  defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
+let Constraints = "$src1 = $dst" in
+  defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
+
+// insertps has a few different modes, there's the first two here below which
+// are optimized inserts that won't zero arbitrary elements in the destination
+// vector. The next one matches the intrinsic and could zero arbitrary elements
+// in the target vector.
+multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
+  def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+      (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [(set VR128:$dst,
+        (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
+      OpSize;
+  def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+      (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
+      !if(Is2Addr,
+        !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+        !strconcat(asm,
+                   "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+      [(set VR128:$dst,
+        (X86insrtps VR128:$src1,
+                   (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
+                    imm:$src3))]>, OpSize;
+}
+
+let Constraints = "$src1 = $dst" in
+  defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+  defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
+
+def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
+          (VINSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
+          Requires<[HasAVX]>;
+def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
+          (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>,
+          Requires<[HasSSE41]>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Round Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
+                            X86MemOperand x86memop, RegisterClass RC,
+                            PatFrag mem_frag32, PatFrag mem_frag64,
+                            Intrinsic V4F32Int, Intrinsic V2F64Int> {
+  // Intrinsic operation, reg.
+  // Vector intrinsic operation, reg
+  def PSr : SS4AIi8<opcps, MRMSrcReg,
+                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr,
+                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))]>,
+                    OpSize;
+
+  // Vector intrinsic operation, mem
+  def PSm : Ii8<opcps, MRMSrcMem,
+                    (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr,
+                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    [(set RC:$dst,
+                          (V4F32Int (mem_frag32 addr:$src1),imm:$src2))]>,
+                    TA, OpSize,
+                Requires<[HasSSE41]>;
+
+  // Vector intrinsic operation, reg
+  def PDr : SS4AIi8<opcpd, MRMSrcReg,
+                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr,
+                    "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))]>,
+                    OpSize;
+
+  // Vector intrinsic operation, mem
+  def PDm : SS4AIi8<opcpd, MRMSrcMem,
+                    (outs RC:$dst), (ins f256mem:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr,
+                    "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    [(set RC:$dst,
+                          (V2F64Int (mem_frag64 addr:$src1),imm:$src2))]>,
+                    OpSize;
+}
+
+multiclass sse41_fp_unop_rm_avx_p<bits<8> opcps, bits<8> opcpd,
+                   RegisterClass RC, X86MemOperand x86memop, string OpcodeStr> {
+  // Intrinsic operation, reg.
+  // Vector intrinsic operation, reg
+  def PSr_AVX : SS4AIi8<opcps, MRMSrcReg,
+                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr,
+                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    []>, OpSize;
+
+  // Vector intrinsic operation, mem
+  def PSm_AVX : Ii8<opcps, MRMSrcMem,
+                    (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr,
+                    "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    []>, TA, OpSize, Requires<[HasSSE41]>;
+
+  // Vector intrinsic operation, reg
+  def PDr_AVX : SS4AIi8<opcpd, MRMSrcReg,
+                    (outs RC:$dst), (ins RC:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr,
+                    "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    []>, OpSize;
+
+  // Vector intrinsic operation, mem
+  def PDm_AVX : SS4AIi8<opcpd, MRMSrcMem,
+                    (outs RC:$dst), (ins x86memop:$src1, i32i8imm:$src2),
+                    !strconcat(OpcodeStr,
+                    "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    []>, OpSize;
+}
+
+multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
+                            string OpcodeStr,
+                            Intrinsic F32Int,
+                            Intrinsic F64Int, bit Is2Addr = 1> {
+  // Intrinsic operation, reg.
+  def SSr : SS4AIi8<opcss, MRMSrcReg,
+        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+        OpSize;
+
+  // Intrinsic operation, mem.
+  def SSm : SS4AIi8<opcss, MRMSrcMem,
+        (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        [(set VR128:$dst,
+             (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
+        OpSize;
+
+  // Intrinsic operation, reg.
+  def SDr : SS4AIi8<opcsd, MRMSrcReg,
+        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+        OpSize;
+
+  // Intrinsic operation, mem.
+  def SDm : SS4AIi8<opcsd, MRMSrcMem,
+        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        [(set VR128:$dst,
+              (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
+        OpSize;
+}
+
+multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
+                                   string OpcodeStr> {
+  // Intrinsic operation, reg.
+  def SSr_AVX : SS4AIi8<opcss, MRMSrcReg,
+        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+        !strconcat(OpcodeStr,
+                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+        []>, OpSize;
+
+  // Intrinsic operation, mem.
+  def SSm_AVX : SS4AIi8<opcss, MRMSrcMem,
+        (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+        !strconcat(OpcodeStr,
+                "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+        []>, OpSize;
+
+  // Intrinsic operation, reg.
+  def SDr_AVX : SS4AIi8<opcsd, MRMSrcReg,
+        (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+        []>, OpSize;
+
+  // Intrinsic operation, mem.
+  def SDm_AVX : SS4AIi8<opcsd, MRMSrcMem,
+        (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+            !strconcat(OpcodeStr,
+                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+        []>, OpSize;
+}
+
+// FP round - roundss, roundps, roundsd, roundpd
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+  // Intrinsic form
+  defm VROUND  : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
+                                  memopv4f32, memopv2f64,
+                                  int_x86_sse41_round_ps,
+                                  int_x86_sse41_round_pd>, VEX;
+  defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
+                                  memopv8f32, memopv4f64,
+                                  int_x86_avx_round_ps_256,
+                                  int_x86_avx_round_pd_256>, VEX;
+  defm VROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
+                                  int_x86_sse41_round_ss,
+                                  int_x86_sse41_round_sd, 0>, VEX_4V;
+
+  // Instructions for the assembler
+  defm VROUND  : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR128, f128mem, "vround">,
+                                        VEX;
+  defm VROUNDY : sse41_fp_unop_rm_avx_p<0x08, 0x09, VR256, f256mem, "vround">,
+                                        VEX;
+  defm VROUND  : sse41_fp_binop_rm_avx_s<0x0A, 0x0B, "vround">, VEX_4V;
+}
+
+defm ROUND  : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
+                               memopv4f32, memopv2f64,
+                               int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
+let Constraints = "$src1 = $dst" in
+defm ROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "round",
+                               int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Packed Bit Test
+//===----------------------------------------------------------------------===//
+
+// ptest instruction we'll lower to this in X86ISelLowering primarily from
+// the intel intrinsic that corresponds to this.
+let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
+def VPTESTrr  : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+                "vptest\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+                OpSize, VEX;
+def VPTESTrm  : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+                "vptest\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS,(X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+                OpSize, VEX;
+
+def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
+                "vptest\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
+                OpSize, VEX;
+def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
+                "vptest\t{$src2, $src1|$src1, $src2}",
+                [(set EFLAGS,(X86ptest VR256:$src1, (memopv4i64 addr:$src2)))]>,
+                OpSize, VEX;
+}
+
+let Defs = [EFLAGS] in {
+def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+              "ptest \t{$src2, $src1|$src1, $src2}",
+              [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
+              OpSize;
+def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
+              "ptest \t{$src2, $src1|$src1, $src2}",
+              [(set EFLAGS, (X86ptest VR128:$src1, (memopv4f32 addr:$src2)))]>,
+              OpSize;
+}
+
+// The bit test instructions below are AVX only
+multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                       X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> {
+  def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+            [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, OpSize, VEX;
+  def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+            !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+            [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>,
+            OpSize, VEX;
+}
+
+let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VTESTPS  : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
+defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
+defm VTESTPD  : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
+defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, memopv4f64, v4f64>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Misc Instructions
+//===----------------------------------------------------------------------===//
+
+def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                   "popcnt{w}\t{$src, $dst|$dst, $src}",
+                   [(set GR16:$dst, (ctpop GR16:$src))]>, OpSize, XS;
+def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                   "popcnt{w}\t{$src, $dst|$dst, $src}",
+                   [(set GR16:$dst, (ctpop (loadi16 addr:$src)))]>, OpSize, XS;
+
+def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                   "popcnt{l}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (ctpop GR32:$src))]>, XS;
+def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                   "popcnt{l}\t{$src, $dst|$dst, $src}",
+                   [(set GR32:$dst, (ctpop (loadi32 addr:$src)))]>, XS;
+
+def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                    "popcnt{q}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (ctpop GR64:$src))]>, XS;
+def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                    "popcnt{q}\t{$src, $dst|$dst, $src}",
+                    [(set GR64:$dst, (ctpop (loadi64 addr:$src)))]>, XS;
+
+
+
+// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
+multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
+                                 Intrinsic IntId128> {
+  def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+                    (ins VR128:$src),
+                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                    [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
+  def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+                     (ins i128mem:$src),
+                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                     [(set VR128:$dst,
+                       (IntId128
+                       (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
+                                         int_x86_sse41_phminposuw>, VEX;
+defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
+                                         int_x86_sse41_phminposuw>;
+
+/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
+multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
+                              Intrinsic IntId128, bit Is2Addr = 1> {
+  let isCommutable = 1 in
+  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize;
+  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst,
+         (IntId128 VR128:$src1,
+          (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+  let isCommutable = 0 in
+  defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
+                                                         0>, VEX_4V;
+  defm VPCMPEQQ  : SS41I_binop_rm_int<0x29, "vpcmpeqq",  int_x86_sse41_pcmpeqq,
+                                                         0>, VEX_4V;
+  defm VPMINSB   : SS41I_binop_rm_int<0x38, "vpminsb",   int_x86_sse41_pminsb,
+                                                         0>, VEX_4V;
+  defm VPMINSD   : SS41I_binop_rm_int<0x39, "vpminsd",   int_x86_sse41_pminsd,
+                                                         0>, VEX_4V;
+  defm VPMINUD   : SS41I_binop_rm_int<0x3B, "vpminud",   int_x86_sse41_pminud,
+                                                         0>, VEX_4V;
+  defm VPMINUW   : SS41I_binop_rm_int<0x3A, "vpminuw",   int_x86_sse41_pminuw,
+                                                         0>, VEX_4V;
+  defm VPMAXSB   : SS41I_binop_rm_int<0x3C, "vpmaxsb",   int_x86_sse41_pmaxsb,
+                                                         0>, VEX_4V;
+  defm VPMAXSD   : SS41I_binop_rm_int<0x3D, "vpmaxsd",   int_x86_sse41_pmaxsd,
+                                                         0>, VEX_4V;
+  defm VPMAXUD   : SS41I_binop_rm_int<0x3F, "vpmaxud",   int_x86_sse41_pmaxud,
+                                                         0>, VEX_4V;
+  defm VPMAXUW   : SS41I_binop_rm_int<0x3E, "vpmaxuw",   int_x86_sse41_pmaxuw,
+                                                         0>, VEX_4V;
+  defm VPMULDQ   : SS41I_binop_rm_int<0x28, "vpmuldq",   int_x86_sse41_pmuldq,
+                                                         0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+  let isCommutable = 0 in
+  defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
+  defm PCMPEQQ  : SS41I_binop_rm_int<0x29, "pcmpeqq",  int_x86_sse41_pcmpeqq>;
+  defm PMINSB   : SS41I_binop_rm_int<0x38, "pminsb",   int_x86_sse41_pminsb>;
+  defm PMINSD   : SS41I_binop_rm_int<0x39, "pminsd",   int_x86_sse41_pminsd>;
+  defm PMINUD   : SS41I_binop_rm_int<0x3B, "pminud",   int_x86_sse41_pminud>;
+  defm PMINUW   : SS41I_binop_rm_int<0x3A, "pminuw",   int_x86_sse41_pminuw>;
+  defm PMAXSB   : SS41I_binop_rm_int<0x3C, "pmaxsb",   int_x86_sse41_pmaxsb>;
+  defm PMAXSD   : SS41I_binop_rm_int<0x3D, "pmaxsd",   int_x86_sse41_pmaxsd>;
+  defm PMAXUD   : SS41I_binop_rm_int<0x3F, "pmaxud",   int_x86_sse41_pmaxud>;
+  defm PMAXUW   : SS41I_binop_rm_int<0x3E, "pmaxuw",   int_x86_sse41_pmaxuw>;
+  defm PMULDQ   : SS41I_binop_rm_int<0x28, "pmuldq",   int_x86_sse41_pmuldq>;
+}
+
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
+          (PCMPEQQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
+          (PCMPEQQrm VR128:$src1, addr:$src2)>;
+
+/// SS48I_binop_rm - Simple SSE41 binary operator.
+multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                        ValueType OpVT, bit Is2Addr = 1> {
+  let isCommutable = 1 in
+  def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
+       OpSize;
+  def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (OpNode VR128:$src1,
+                                  (bc_v4i32 (memopv2i64 addr:$src2))))]>,
+       OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+  defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+  defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
+
+/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
+multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
+                 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
+                 X86MemOperand x86memop, bit Is2Addr = 1> {
+  let isCommutable = 1 in
+  def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst),
+        (ins RC:$src1, RC:$src2, i32i8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
+        OpSize;
+  def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst),
+        (ins RC:$src1, x86memop:$src2, i32i8imm:$src3),
+        !if(Is2Addr,
+            !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+            !strconcat(OpcodeStr,
+                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        [(set RC:$dst,
+          (IntId RC:$src1,
+           (bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
+        OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+  let isCommutable = 0 in {
+  defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
+                                      VR128, memopv16i8, i128mem, 0>, VEX_4V;
+  defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
+                                      VR128, memopv16i8, i128mem, 0>, VEX_4V;
+  defm VBLENDPSY : SS41I_binop_rmi_int<0x0C, "vblendps",
+            int_x86_avx_blend_ps_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+  defm VBLENDPDY : SS41I_binop_rmi_int<0x0D, "vblendpd",
+            int_x86_avx_blend_pd_256, VR256, memopv32i8, i256mem, 0>, VEX_4V;
+  defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
+                                      VR128, memopv16i8, i128mem, 0>, VEX_4V;
+  defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
+                                      VR128, memopv16i8, i128mem, 0>, VEX_4V;
+  }
+  defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
+                                   VR128, memopv16i8, i128mem, 0>, VEX_4V;
+  defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
+                                   VR128, memopv16i8, i128mem, 0>, VEX_4V;
+  defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
+                                   VR256, memopv32i8, i256mem, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+  let isCommutable = 0 in {
+  defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps,
+                                     VR128, memopv16i8, i128mem>;
+  defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd,
+                                     VR128, memopv16i8, i128mem>;
+  defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw,
+                                     VR128, memopv16i8, i128mem>;
+  defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw,
+                                     VR128, memopv16i8, i128mem>;
+  }
+  defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps,
+                                  VR128, memopv16i8, i128mem>;
+  defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd,
+                                  VR128, memopv16i8, i128mem>;
+}
+
+/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
+                                    RegisterClass RC, X86MemOperand x86memop,
+                                    PatFrag mem_frag, Intrinsic IntId> {
+  def rr : I<opc, MRMSrcReg, (outs RC:$dst),
+                  (ins RC:$src1, RC:$src2, RC:$src3),
+                  !strconcat(OpcodeStr,
+                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+                  [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
+                  SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+
+  def rm : I<opc, MRMSrcMem, (outs RC:$dst),
+                  (ins RC:$src1, x86memop:$src2, RC:$src3),
+                  !strconcat(OpcodeStr,
+                    "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+                  [(set RC:$dst,
+                        (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
+                               RC:$src3))],
+                  SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+}
+}
+
+defm VBLENDVPD  : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem,
+                                           memopv16i8, int_x86_sse41_blendvpd>;
+defm VBLENDVPS  : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem,
+                                           memopv16i8, int_x86_sse41_blendvps>;
+defm VPBLENDVB  : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem,
+                                           memopv16i8, int_x86_sse41_pblendvb>;
+defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem,
+                                         memopv32i8, int_x86_avx_blendv_pd_256>;
+defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem,
+                                         memopv32i8, int_x86_avx_blendv_ps_256>;
+
+/// SS41I_ternary_int - SSE 4.1 ternary operator
+let Uses = [XMM0], Constraints = "$src1 = $dst" in {
+  multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+    def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+                    (ins VR128:$src1, VR128:$src2),
+                    !strconcat(OpcodeStr,
+                     "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
+                    [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
+                    OpSize;
+
+    def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+                    (ins VR128:$src1, i128mem:$src2),
+                    !strconcat(OpcodeStr,
+                     "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
+                    [(set VR128:$dst,
+                      (IntId VR128:$src1,
+                       (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
+  }
+}
+
+defm BLENDVPD     : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+defm BLENDVPS     : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
+defm PBLENDVB     : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+
+def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0),
+          (PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                       "vmovntdqa\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
+                       OpSize, VEX;
+def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+                       "movntdqa\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
+                       OpSize;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - Compare Instructions
+//===----------------------------------------------------------------------===//
+
+/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
+multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
+                              Intrinsic IntId128, bit Is2Addr = 1> {
+  def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+       OpSize;
+  def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst,
+         (IntId128 VR128:$src1,
+          (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+  defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
+                                     0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+  defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
+
+def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
+          (PCMPGTQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
+          (PCMPGTQrm VR128:$src1, addr:$src2)>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - String/text Processing Instructions
+//===----------------------------------------------------------------------===//
+
+// Packed Compare Implicit Length Strings, Return Mask
+multiclass pseudo_pcmpistrm<string asm> {
+  def REG : PseudoI<(outs VR128:$dst),
+                    (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+    [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
+                                                  imm:$src3))]>;
+  def MEM : PseudoI<(outs VR128:$dst),
+                    (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+    [(set VR128:$dst, (int_x86_sse42_pcmpistrm128
+                       VR128:$src1, (load addr:$src2), imm:$src3))]>;
+}
+
+let Defs = [EFLAGS], usesCustomInserter = 1 in {
+  defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128">, Requires<[HasSSE42]>;
+  defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
+}
+
+let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1,
+    Predicates = [HasAVX] in {
+  def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
+      (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+      "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+  def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
+      (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+      "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+}
+
+let Defs = [XMM0, EFLAGS] in {
+  def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
+      (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+      "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+  def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
+      (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+      "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+}
+
+// Packed Compare Explicit Length Strings, Return Mask
+multiclass pseudo_pcmpestrm<string asm> {
+  def REG : PseudoI<(outs VR128:$dst),
+                    (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+    [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+                       VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>;
+  def MEM : PseudoI<(outs VR128:$dst),
+                    (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+    [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+                       VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>;
+}
+
+let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
+  defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128">, Requires<[HasSSE42]>;
+  defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX],
+    Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+  def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
+      (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+      "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+  def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
+      (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+      "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+}
+
+let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+  def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
+      (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+      "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+  def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
+      (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+      "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+}
+
+// Packed Compare Implicit Length Strings, Return Index
+let Defs = [ECX, EFLAGS] in {
+  multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> {
+    def rr : SS42AI<0x63, MRMSrcReg, (outs),
+      (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+      !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+      [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
+       (implicit EFLAGS)]>, OpSize;
+    def rm : SS42AI<0x63, MRMSrcMem, (outs),
+      (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+      !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+      [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
+       (implicit EFLAGS)]>, OpSize;
+  }
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPCMPISTRI  : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">,
+                                    VEX;
+defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">,
+                                    VEX;
+defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">,
+                                    VEX;
+defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">,
+                                    VEX;
+defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">,
+                                    VEX;
+defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">,
+                                    VEX;
+}
+
+defm PCMPISTRI  : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
+defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
+defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
+defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
+defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
+defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
+
+// Packed Compare Explicit Length Strings, Return Index
+let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in {
+  multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> {
+    def rr : SS42AI<0x61, MRMSrcReg, (outs),
+      (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+      !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+      [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
+       (implicit EFLAGS)]>, OpSize;
+    def rm : SS42AI<0x61, MRMSrcMem, (outs),
+      (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+      !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+       [(set ECX,
+             (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
+        (implicit EFLAGS)]>, OpSize;
+  }
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPCMPESTRI  : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">,
+                                    VEX;
+defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">,
+                                    VEX;
+defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">,
+                                    VEX;
+defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">,
+                                    VEX;
+defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">,
+                                    VEX;
+defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">,
+                                    VEX;
+}
+
+defm PCMPESTRI  : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
+defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
+defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
+defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
+defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
+defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - CRC Instructions
+//===----------------------------------------------------------------------===//
+
+// No CRC instructions have AVX equivalents
+
+// crc intrinsic instruction
+// This set of instructions are only rm, the only difference is the size
+// of r and m.
+let Constraints = "$src1 = $dst" in {
+  def CRC32m8  : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst),
+                      (ins GR32:$src1, i8mem:$src2),
+                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_8 GR32:$src1,
+                         (load addr:$src2)))]>;
+  def CRC32r8  : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst),
+                      (ins GR32:$src1, GR8:$src2),
+                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_8 GR32:$src1, GR8:$src2))]>;
+  def CRC32m16  : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+                      (ins GR32:$src1, i16mem:$src2),
+                      "crc32{w} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_16 GR32:$src1,
+                         (load addr:$src2)))]>,
+                         OpSize;
+  def CRC32r16  : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+                      (ins GR32:$src1, GR16:$src2),
+                      "crc32{w} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_16 GR32:$src1, GR16:$src2))]>,
+                         OpSize;
+  def CRC32m32  : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst),
+                      (ins GR32:$src1, i32mem:$src2),
+                      "crc32{l} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_32 GR32:$src1,
+                         (load addr:$src2)))]>;
+  def CRC32r32  : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst),
+                      (ins GR32:$src1, GR32:$src2),
+                      "crc32{l} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR32:$dst,
+                         (int_x86_sse42_crc32_32 GR32:$src1, GR32:$src2))]>;
+  def CRC64m8  : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst),
+                      (ins GR64:$src1, i8mem:$src2),
+                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR64:$dst,
+                         (int_x86_sse42_crc64_8 GR64:$src1,
+                         (load addr:$src2)))]>,
+                         REX_W;
+  def CRC64r8  : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst),
+                      (ins GR64:$src1, GR8:$src2),
+                      "crc32{b} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR64:$dst,
+                         (int_x86_sse42_crc64_8 GR64:$src1, GR8:$src2))]>,
+                         REX_W;
+  def CRC64m64  : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst),
+                      (ins GR64:$src1, i64mem:$src2),
+                      "crc32{q} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR64:$dst,
+                         (int_x86_sse42_crc64_64 GR64:$src1,
+                         (load addr:$src2)))]>,
+                         REX_W;
+  def CRC64r64  : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst),
+                      (ins GR64:$src1, GR64:$src2),
+                      "crc32{q} \t{$src2, $src1|$src1, $src2}",
+                       [(set GR64:$dst,
+                         (int_x86_sse42_crc64_64 GR64:$src1, GR64:$src2))]>,
+                         REX_W;
+}
+
+//===----------------------------------------------------------------------===//
+// AES-NI Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
+                              Intrinsic IntId128, bit Is2Addr = 1> {
+  def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
+       (ins VR128:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+       OpSize;
+  def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
+       (ins VR128:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set VR128:$dst,
+         (IntId128 VR128:$src1,
+          (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+// Perform One Round of an AES Encryption/Decryption Flow
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+  defm VAESENC          : AESI_binop_rm_int<0xDC, "vaesenc",
+                         int_x86_aesni_aesenc, 0>, VEX_4V;
+  defm VAESENCLAST      : AESI_binop_rm_int<0xDD, "vaesenclast",
+                         int_x86_aesni_aesenclast, 0>, VEX_4V;
+  defm VAESDEC          : AESI_binop_rm_int<0xDE, "vaesdec",
+                         int_x86_aesni_aesdec, 0>, VEX_4V;
+  defm VAESDECLAST      : AESI_binop_rm_int<0xDF, "vaesdeclast",
+                         int_x86_aesni_aesdeclast, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+  defm AESENC          : AESI_binop_rm_int<0xDC, "aesenc",
+                         int_x86_aesni_aesenc>;
+  defm AESENCLAST      : AESI_binop_rm_int<0xDD, "aesenclast",
+                         int_x86_aesni_aesenclast>;
+  defm AESDEC          : AESI_binop_rm_int<0xDE, "aesdec",
+                         int_x86_aesni_aesdec>;
+  defm AESDECLAST      : AESI_binop_rm_int<0xDF, "aesdeclast",
+                         int_x86_aesni_aesdeclast>;
+}
+
+def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
+          (AESENCrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
+          (AESENCrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
+          (AESENCLASTrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
+          (AESENCLASTrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
+          (AESDECrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
+          (AESDECrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
+          (AESDECLASTrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
+          (AESDECLASTrm VR128:$src1, addr:$src2)>;
+
+// Perform the AES InvMixColumn Transformation
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+  def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
+      (ins VR128:$src1),
+      "vaesimc\t{$src1, $dst|$dst, $src1}",
+      [(set VR128:$dst,
+        (int_x86_aesni_aesimc VR128:$src1))]>,
+      OpSize, VEX;
+  def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
+      (ins i128mem:$src1),
+      "vaesimc\t{$src1, $dst|$dst, $src1}",
+      [(set VR128:$dst,
+        (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
+      OpSize, VEX;
+}
+def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
+  (ins VR128:$src1),
+  "aesimc\t{$src1, $dst|$dst, $src1}",
+  [(set VR128:$dst,
+    (int_x86_aesni_aesimc VR128:$src1))]>,
+  OpSize;
+def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
+  (ins i128mem:$src1),
+  "aesimc\t{$src1, $dst|$dst, $src1}",
+  [(set VR128:$dst,
+    (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
+  OpSize;
+
+// AES Round Key Generation Assist
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+  def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
+      (ins VR128:$src1, i8imm:$src2),
+      "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+      [(set VR128:$dst,
+        (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+      OpSize, VEX;
+  def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
+      (ins i128mem:$src1, i8imm:$src2),
+      "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+      [(set VR128:$dst,
+        (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
+                                        imm:$src2))]>,
+      OpSize, VEX;
+}
+def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
+  (ins VR128:$src1, i8imm:$src2),
+  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+  [(set VR128:$dst,
+    (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+  OpSize;
+def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
+  (ins i128mem:$src1, i8imm:$src2),
+  "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+  [(set VR128:$dst,
+    (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
+                                    imm:$src2))]>,
+  OpSize;
+
+//===----------------------------------------------------------------------===//
+// CLMUL Instructions
+//===----------------------------------------------------------------------===//
+
+// Only the AVX version of CLMUL instructions are described here.
+
+// Carry-less Multiplication instructions
+let isAsmParserOnly = 1 in {
+def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+           "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+           []>;
+
+def VPCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+           "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+           []>;
+
+// Assembler Only
+multiclass avx_vpclmul<string asm> {
+  def rr : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             []>;
+
+  def rm : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             []>;
+}
+defm VPCLMULHQHQDQ : avx_vpclmul<"vpclmulhqhqdq">;
+defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">;
+defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">;
+defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">;
+
+} // isAsmParserOnly
+
+//===----------------------------------------------------------------------===//
+// AVX Instructions
+//===----------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1 in {
+
+// Load from memory and broadcast to all elements of the destination operand
+class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
+                    X86MemOperand x86memop, Intrinsic Int> :
+  AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+        [(set RC:$dst, (Int addr:$src))]>, VEX;
+
+def VBROADCASTSS   : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
+                                   int_x86_avx_vbroadcastss>;
+def VBROADCASTSSY  : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem,
+                                   int_x86_avx_vbroadcastss_256>;
+def VBROADCASTSD   : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
+                                   int_x86_avx_vbroadcast_sd_256>;
+def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
+                                   int_x86_avx_vbroadcastf128_pd_256>;
+
+// Insert packed floating-point values
+def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst),
+          (ins VR256:$src1, VR128:$src2, i8imm:$src3),
+          "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+          []>, VEX_4V;
+def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
+          (ins VR256:$src1, f128mem:$src2, i8imm:$src3),
+          "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+          []>, VEX_4V;
+
+// Extract packed floating-point values
+def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst),
+          (ins VR256:$src1, i8imm:$src2),
+          "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+          []>, VEX;
+def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
+          (ins f128mem:$dst, VR256:$src1, i8imm:$src2),
+          "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+          []>, VEX;
+
+// Conditional SIMD Packed Loads and Stores
+multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr,
+                          Intrinsic IntLd, Intrinsic IntLd256,
+                          Intrinsic IntSt, Intrinsic IntSt256,
+                          PatFrag pf128, PatFrag pf256> {
+  def rm  : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst),
+             (ins VR128:$src1, f128mem:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>,
+             VEX_4V;
+  def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst),
+             (ins VR256:$src1, f256mem:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>,
+             VEX_4V;
+  def mr  : AVX8I<opc_mr, MRMDestMem, (outs),
+             (ins f128mem:$dst, VR128:$src1, VR128:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V;
+  def Ymr : AVX8I<opc_mr, MRMDestMem, (outs),
+             (ins f256mem:$dst, VR256:$src1, VR256:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V;
+}
+
+defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps",
+                                 int_x86_avx_maskload_ps,
+                                 int_x86_avx_maskload_ps_256,
+                                 int_x86_avx_maskstore_ps,
+                                 int_x86_avx_maskstore_ps_256,
+                                 memopv4f32, memopv8f32>;
+defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd",
+                                 int_x86_avx_maskload_pd,
+                                 int_x86_avx_maskload_pd_256,
+                                 int_x86_avx_maskstore_pd,
+                                 int_x86_avx_maskstore_pd_256,
+                                 memopv2f64, memopv4f64>;
+
+// Permute Floating-Point Values
+multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr,
+                      RegisterClass RC, X86MemOperand x86memop_f,
+                      X86MemOperand x86memop_i, PatFrag f_frag, PatFrag i_frag,
+                      Intrinsic IntVar, Intrinsic IntImm> {
+  def rr  : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst),
+             (ins RC:$src1, RC:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V;
+  def rm  : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst),
+             (ins RC:$src1, x86memop_i:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set RC:$dst, (IntVar RC:$src1, (i_frag addr:$src2)))]>, VEX_4V;
+
+  def ri  : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst),
+             (ins RC:$src1, i8imm:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set RC:$dst, (IntImm RC:$src1, imm:$src2))]>, VEX;
+  def mi  : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst),
+             (ins x86memop_f:$src1, i8imm:$src2),
+             !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set RC:$dst, (IntImm (f_frag addr:$src1), imm:$src2))]>, VEX;
+}
+
+defm VPERMILPS  : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem,
+                             memopv4f32, memopv4i32,
+                             int_x86_avx_vpermilvar_ps,
+                             int_x86_avx_vpermil_ps>;
+defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem,
+                             memopv8f32, memopv8i32,
+                             int_x86_avx_vpermilvar_ps_256,
+                             int_x86_avx_vpermil_ps_256>;
+defm VPERMILPD  : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem,
+                             memopv2f64, memopv2i64,
+                             int_x86_avx_vpermilvar_pd,
+                             int_x86_avx_vpermil_pd>;
+defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem,
+                             memopv4f64, memopv4i64,
+                             int_x86_avx_vpermilvar_pd_256,
+                             int_x86_avx_vpermil_pd_256>;
+
+def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst),
+          (ins VR256:$src1, VR256:$src2, i8imm:$src3),
+          "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+          []>, VEX_4V;
+def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
+          (ins VR256:$src1, f256mem:$src2, i8imm:$src3),
+          "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+          []>, VEX_4V;
+
+// Zero All YMM registers
+def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
+                 [(int_x86_avx_vzeroall)]>, VEX, VEX_L, Requires<[HasAVX]>;
+
+// Zero Upper bits of YMM registers
+def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
+                   [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>;
+
+} // isAsmParserOnly
+
+def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
+          (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
+          (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vinsertf128_si_256 VR256:$src1, VR128:$src2, imm:$src3),
+          (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
+
+def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
+                                   (i32 imm)),
+          (VINSERTF128rr VR256:$src1, VR128:$src2,
+                         (INSERT_get_vinsertf128_imm VR256:$ins))>;
+
+def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2),
+          (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_ps_256 VR256:$src1, imm:$src2),
+          (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+def : Pat<(int_x86_avx_vextractf128_si_256 VR256:$src1, imm:$src2),
+          (VEXTRACTF128rr VR256:$src1, imm:$src2)>;
+
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v4f32 (VEXTRACTF128rr
+                    (v8f32 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v2f64 (VEXTRACTF128rr
+                    (v4f64 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v4i32 (VEXTRACTF128rr
+                    (v8i32 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
+          (v2i64 (VEXTRACTF128rr
+                    (v4i64 VR256:$src1),
+                    (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
+
+def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
+          (VBROADCASTF128 addr:$src)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256 VR256:$src1, VR256:$src2, imm:$src3),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256 VR256:$src1, VR256:$src2, imm:$src3),
+          (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>;
+
+def : Pat<(int_x86_avx_vperm2f128_ps_256
+                  VR256:$src1, (memopv8f32 addr:$src2), imm:$src3),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_pd_256
+                  VR256:$src1, (memopv4f64 addr:$src2), imm:$src3),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+def : Pat<(int_x86_avx_vperm2f128_si_256
+                  VR256:$src1, (memopv8i32 addr:$src2), imm:$src3),
+          (VPERM2F128rm VR256:$src1, addr:$src2, imm:$src3)>;
+
+//===----------------------------------------------------------------------===//
+// SSE Shuffle pattern fragments
+//===----------------------------------------------------------------------===//
+
+// This is part of a "work in progress" refactoring. The idea is that all
+// vector shuffles are going to be translated into target specific nodes and
+// directly matched by the patterns below (which can be changed along the way)
+// The AVX version of some but not all of them are described here, and more
+// should come in a near future.
+
+// Shuffle with PSHUFD instruction folding loads. The first two patterns match
+// SSE2 loads, which are always promoted to v2i64. The last one should match
+// the SSE1 case, where the only legal load is v4f32, but there is no PSHUFD
+// in SSE2, how does it ever worked? Anyway, the pattern will remain here until
+// we investigate further.
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+                                 (i8 imm:$imm))),
+          (VPSHUFDmi addr:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv2i64 addr:$src1)),
+                                 (i8 imm:$imm))),
+          (PSHUFDmi addr:$src1, imm:$imm)>;
+def : Pat<(v4i32 (X86PShufd (bc_v4i32 (memopv4f32 addr:$src1)),
+                                 (i8 imm:$imm))),
+          (PSHUFDmi addr:$src1, imm:$imm)>; // FIXME: has this ever worked?
+
+// Shuffle with PSHUFD instruction.
+def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+          (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+          (PSHUFDri VR128:$src1, imm:$imm)>;
+
+def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+          (VPSHUFDri VR128:$src1, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86PShufd VR128:$src1, (i8 imm:$imm))),
+          (PSHUFDri VR128:$src1, imm:$imm)>;
+
+// Shuffle with SHUFPD instruction.
+def : Pat<(v2f64 (X86Shufps VR128:$src1,
+                     (memopv2f64 addr:$src2), (i8 imm:$imm))),
+          (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Shufps VR128:$src1,
+                     (memopv2f64 addr:$src2), (i8 imm:$imm))),
+          (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2i64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Shufpd VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+// Shuffle with SHUFPS instruction.
+def : Pat<(v4f32 (X86Shufps VR128:$src1,
+                     (memopv4f32 addr:$src2), (i8 imm:$imm))),
+          (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Shufps VR128:$src1,
+                     (memopv4f32 addr:$src2), (i8 imm:$imm))),
+          (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+def : Pat<(v4i32 (X86Shufps VR128:$src1,
+                     (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+          (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86Shufps VR128:$src1,
+                     (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))),
+          (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>;
+
+def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>, Requires<[HasAVX]>;
+def : Pat<(v4i32 (X86Shufps VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>;
+
+// Shuffle with MOVHLPS instruction
+def : Pat<(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)),
+          (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)),
+          (MOVHLPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with MOVDDUP instruction
+def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (memopv2f64 addr:$src)),
+          (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))),
+          (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))),
+          (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (v2f64 (scalar_to_vector (loadf64 addr:$src)))),
+          (MOVDDUPrm addr:$src)>;
+
+def : Pat<(X86Movddup (bc_v2f64
+                           (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+          (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
+def : Pat<(X86Movddup (bc_v2f64
+                           (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+          (MOVDDUPrm addr:$src)>;
+
+
+// Shuffle with UNPCKLPS
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+          (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
+          (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
+          (UNPCKLPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+          (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
+          (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
+          (UNPCKLPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKHPS
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+          (VUNPCKHPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, (memopv4f32 addr:$src2))),
+          (UNPCKHPSrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+          (VUNPCKHPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
+          (UNPCKHPSrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKLPD
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+          (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
+          (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
+          (UNPCKLPDrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+          (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
+          (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
+          (UNPCKLPDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with UNPCKHPD
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+          (VUNPCKHPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, (memopv2f64 addr:$src2))),
+          (UNPCKHPDrm VR128:$src1, addr:$src2)>;
+
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+          (VUNPCKHPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
+          (UNPCKHPDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLBW
+def : Pat<(v16i8 (X86Punpcklbw VR128:$src1,
+                                   (bc_v16i8 (memopv2i64 addr:$src2)))),
+          (PUNPCKLBWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v16i8 (X86Punpcklbw VR128:$src1, VR128:$src2)),
+          (PUNPCKLBWrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLWD
+def : Pat<(v8i16 (X86Punpcklwd VR128:$src1,
+                                   (bc_v8i16 (memopv2i64 addr:$src2)))),
+          (PUNPCKLWDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86Punpcklwd VR128:$src1, VR128:$src2)),
+          (PUNPCKLWDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLDQ
+def : Pat<(v4i32 (X86Punpckldq VR128:$src1,
+                                   (bc_v4i32 (memopv2i64 addr:$src2)))),
+          (PUNPCKLDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Punpckldq VR128:$src1, VR128:$src2)),
+          (PUNPCKLDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKLQDQ
+def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, (memopv2i64 addr:$src2))),
+          (PUNPCKLQDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Punpcklqdq VR128:$src1, VR128:$src2)),
+          (PUNPCKLQDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHBW
+def : Pat<(v16i8 (X86Punpckhbw VR128:$src1,
+                                   (bc_v16i8 (memopv2i64 addr:$src2)))),
+          (PUNPCKHBWrm VR128:$src1, addr:$src2)>;
+def : Pat<(v16i8 (X86Punpckhbw VR128:$src1, VR128:$src2)),
+          (PUNPCKHBWrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHWD
+def : Pat<(v8i16 (X86Punpckhwd VR128:$src1,
+                                   (bc_v8i16 (memopv2i64 addr:$src2)))),
+          (PUNPCKHWDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v8i16 (X86Punpckhwd VR128:$src1, VR128:$src2)),
+          (PUNPCKHWDrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHDQ
+def : Pat<(v4i32 (X86Punpckhdq VR128:$src1,
+                                   (bc_v4i32 (memopv2i64 addr:$src2)))),
+          (PUNPCKHDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Punpckhdq VR128:$src1, VR128:$src2)),
+          (PUNPCKHDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with PUNPCKHQDQ
+def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, (memopv2i64 addr:$src2))),
+          (PUNPCKHQDQrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Punpckhqdq VR128:$src1, VR128:$src2)),
+          (PUNPCKHQDQrr VR128:$src1, VR128:$src2)>;
+
+// Shuffle with MOVLHPS
+def : Pat<(X86Movlhps VR128:$src1,
+                    (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+          (MOVHPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlhps VR128:$src1,
+                    (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+          (MOVHPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)),
+          (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)),
+          (MOVLHPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)),
+          (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>;
+
+// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v2f64 (X86Movddup VR128:$src)),
+          (UNPCKLPDrr VR128:$src, VR128:$src)>;
+
+// Shuffle with MOVLHPD
+def : Pat<(v2f64 (X86Movlhpd VR128:$src1,
+                    (scalar_to_vector (loadf64 addr:$src2)))),
+          (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+// FIXME: Instead of X86Unpcklpd, there should be a X86Movlhpd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v2f64 (X86Unpcklpd VR128:$src1,
+                    (scalar_to_vector (loadf64 addr:$src2)))),
+          (MOVHPDrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVSS
+def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))),
+          (MOVSSrr VR128:$src1, FR32:$src2)>;
+def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)),
+          (MOVSSrr (v4i32 VR128:$src1),
+                   (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
+def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)),
+          (MOVSSrr (v4f32 VR128:$src1),
+                   (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+// FIXME: Instead of a X86Movss there should be a X86Movlps here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(X86Movss VR128:$src1,
+                    (bc_v4i32 (v2i64 (load addr:$src2)))),
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
+
+// Shuffle with MOVSD
+def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
+          (MOVSDrr VR128:$src1, FR64:$src2)>;
+def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
+          (MOVSDrr (v2i64 VR128:$src1),
+                   (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
+def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)),
+          (MOVSDrr (v2f64 VR128:$src1),
+                   (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)),
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
+def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)),
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>;
+
+// Shuffle with MOVSHDUP
+def : Pat<(v4i32 (X86Movshdup VR128:$src)),
+          (MOVSHDUPrr VR128:$src)>;
+def : Pat<(X86Movshdup (bc_v4i32 (memopv2i64 addr:$src))),
+          (MOVSHDUPrm addr:$src)>;
+
+def : Pat<(v4f32 (X86Movshdup VR128:$src)),
+          (MOVSHDUPrr VR128:$src)>;
+def : Pat<(X86Movshdup (memopv4f32 addr:$src)),
+          (MOVSHDUPrm addr:$src)>;
+
+// Shuffle with MOVSLDUP
+def : Pat<(v4i32 (X86Movsldup VR128:$src)),
+          (MOVSLDUPrr VR128:$src)>;
+def : Pat<(X86Movsldup (bc_v4i32 (memopv2i64 addr:$src))),
+          (MOVSLDUPrm addr:$src)>;
+
+def : Pat<(v4f32 (X86Movsldup VR128:$src)),
+          (MOVSLDUPrr VR128:$src)>;
+def : Pat<(X86Movsldup (memopv4f32 addr:$src)),
+          (MOVSLDUPrm addr:$src)>;
+
+// Shuffle with PSHUFHW
+def : Pat<(v8i16 (X86PShufhw VR128:$src, (i8 imm:$imm))),
+          (PSHUFHWri VR128:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShufhw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
+          (PSHUFHWmi addr:$src, imm:$imm)>;
+
+// Shuffle with PSHUFLW
+def : Pat<(v8i16 (X86PShuflw VR128:$src, (i8 imm:$imm))),
+          (PSHUFLWri VR128:$src, imm:$imm)>;
+def : Pat<(v8i16 (X86PShuflw (bc_v8i16 (memopv2i64 addr:$src)), (i8 imm:$imm))),
+          (PSHUFLWmi addr:$src, imm:$imm)>;
+
+// Shuffle with PALIGN
+def : Pat<(v4i32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v4f32 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v8i16 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+def : Pat<(v16i8 (X86PAlign VR128:$src1, VR128:$src2, (i8 imm:$imm))),
+          (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>;
+
+// Shuffle with MOVLPS
+def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))),
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86Movlps VR128:$src1,
+                    (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
+          (MOVLPSrm VR128:$src1, addr:$src2)>;
+// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
+// is during lowering, where it's not possible to recognize the load fold cause
+// it has two uses through a bitcast. One use disappears at isel time and the
+// fold opportunity reappears.
+def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>;
+
+def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), 
+          (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_sd))>; 
+
+// Shuffle with MOVLPD
+def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+          (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))),
+          (MOVLPDrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2f64 (X86Movlpd VR128:$src1,
+                            (scalar_to_vector (loadf64 addr:$src2)))),
+          (MOVLPDrm VR128:$src1, addr:$src2)>;
+
+// Extra patterns to match stores with MOVHPS/PD and MOVLPS/PD
+def : Pat<(store (f64 (vector_extract
+          (v2f64 (X86Unpckhps VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+          (MOVHPSmr addr:$dst, VR128:$src)>;
+def : Pat<(store (f64 (vector_extract
+          (v2f64 (X86Unpckhpd VR128:$src, (undef))), (iPTR 0))),addr:$dst),
+          (MOVHPDmr addr:$dst, VR128:$src)>;
+
+def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),addr:$src1),
+          (MOVLPSmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v4i32 (X86Movlps
+                 (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1),
+          (MOVLPSmr addr:$src1, VR128:$src2)>;
+
+def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+          (MOVLPDmr addr:$src1, VR128:$src2)>;
+def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)),addr:$src1),
+          (MOVLPDmr addr:$src1, VR128:$src2)>;
diff --git a/final/lib/Target/X86/X86InstrShiftRotate.td b/final/lib/Target/X86/X86InstrShiftRotate.td
new file mode 100644
index 00000000000..8278568184f
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrShiftRotate.td
@@ -0,0 +1,746 @@
+//===- X86InstrShiftRotate.td - Shift and Rotate Instrs ----*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the shift and rotate instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: Someone needs to smear multipattern goodness all over this file.
+
+let Defs = [EFLAGS] in {
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SHL8rCL  : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "shl{b}\t{%cl, $dst|$dst, CL}",
+                 [(set GR8:$dst, (shl GR8:$src1, CL))]>;
+def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+                 "shl{w}\t{%cl, $dst|$dst, CL}",
+                 [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
+def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+                 "shl{l}\t{%cl, $dst|$dst, CL}",
+                 [(set GR32:$dst, (shl GR32:$src1, CL))]>;
+def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+                  "shl{q}\t{%cl, $dst|$dst, %CL}",
+                  [(set GR64:$dst, (shl GR64:$src1, CL))]>;
+} // Uses = [CL]
+
+def SHL8ri   : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+                   "shl{b}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
+                   
+let isConvertibleToThreeAddress = 1 in {   // Can transform into LEA.
+def SHL16ri  : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+                   "shl{w}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR16:$dst, (shl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHL32ri  : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+                   "shl{l}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR32:$dst, (shl GR32:$src1, (i8 imm:$src2)))]>;
+def SHL64ri  : RIi8<0xC1, MRM4r, (outs GR64:$dst), 
+                    (ins GR64:$src1, i8imm:$src2),
+                    "shl{q}\t{$src2, $dst|$dst, $src2}",
+                    [(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
+
+// NOTE: We don't include patterns for shifts of a register by one, because
+// 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one).
+def SHL8r1   : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
+                 "shl{b}\t$dst", []>;
+def SHL16r1  : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
+                 "shl{w}\t$dst", []>, OpSize;
+def SHL32r1  : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
+                 "shl{l}\t$dst", []>;
+def SHL64r1  : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
+                 "shl{q}\t$dst", []>;
+} // isConvertibleToThreeAddress = 1
+} // Constraints = "$src = $dst" 
+
+
+// FIXME: Why do we need an explicit "Uses = [CL]" when the instr has a pattern
+// using CL?
+let Uses = [CL] in {
+def SHL8mCL  : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
+                 "shl{b}\t{%cl, $dst|$dst, CL}",
+                 [(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
+                 "shl{w}\t{%cl, $dst|$dst, CL}",
+                 [(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
+                 "shl{l}\t{%cl, $dst|$dst, CL}",
+                 [(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
+                  "shl{q}\t{%cl, $dst|$dst, %CL}",
+                  [(store (shl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SHL8mi   : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
+                   "shl{b}\t{$src, $dst|$dst, $src}",
+                [(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL16mi  : Ii8<0xC1, MRM4m, (outs), (ins i16mem:$dst, i8imm:$src),
+                   "shl{w}\t{$src, $dst|$dst, $src}",
+               [(store (shl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+                   OpSize;
+def SHL32mi  : Ii8<0xC1, MRM4m, (outs), (ins i32mem:$dst, i8imm:$src),
+                   "shl{l}\t{$src, $dst|$dst, $src}",
+               [(store (shl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, i8imm:$src),
+                  "shl{q}\t{$src, $dst|$dst, $src}",
+                 [(store (shl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SHL8m1   : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
+                 "shl{b}\t$dst",
+                [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHL16m1  : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
+                 "shl{w}\t$dst",
+               [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+                   OpSize;
+def SHL32m1  : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
+                 "shl{l}\t$dst",
+               [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
+                  "shl{q}\t$dst",
+                 [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SHR8rCL  : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "shr{b}\t{%cl, $dst|$dst, CL}",
+                 [(set GR8:$dst, (srl GR8:$src1, CL))]>;
+def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+                 "shr{w}\t{%cl, $dst|$dst, CL}",
+                 [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
+def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+                 "shr{l}\t{%cl, $dst|$dst, CL}",
+                 [(set GR32:$dst, (srl GR32:$src1, CL))]>;
+def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+                  "shr{q}\t{%cl, $dst|$dst, %CL}",
+                  [(set GR64:$dst, (srl GR64:$src1, CL))]>;
+}
+
+def SHR8ri   : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+                   "shr{b}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR8:$dst, (srl GR8:$src1, (i8 imm:$src2)))]>;
+def SHR16ri  : Ii8<0xC1, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+                   "shr{w}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR16:$dst, (srl GR16:$src1, (i8 imm:$src2)))]>, OpSize;
+def SHR32ri  : Ii8<0xC1, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+                   "shr{l}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR32:$dst, (srl GR32:$src1, (i8 imm:$src2)))]>;
+def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
+                  "shr{q}\t{$src2, $dst|$dst, $src2}",
+                  [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
+
+// Shift right by 1
+def SHR8r1   : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
+                 "shr{b}\t$dst",
+                 [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
+def SHR16r1  : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
+                 "shr{w}\t$dst",
+                 [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize;
+def SHR32r1  : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
+                 "shr{l}\t$dst",
+                 [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
+def SHR64r1  : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
+                 "shr{q}\t$dst",
+                 [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+
+let Uses = [CL] in {
+def SHR8mCL  : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
+                 "shr{b}\t{%cl, $dst|$dst, CL}",
+                 [(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
+                 "shr{w}\t{%cl, $dst|$dst, CL}",
+                 [(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
+                 OpSize;
+def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
+                 "shr{l}\t{%cl, $dst|$dst, CL}",
+                 [(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
+                  "shr{q}\t{%cl, $dst|$dst, %CL}",
+                  [(store (srl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SHR8mi   : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
+                   "shr{b}\t{$src, $dst|$dst, $src}",
+                [(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR16mi  : Ii8<0xC1, MRM5m, (outs), (ins i16mem:$dst, i8imm:$src),
+                   "shr{w}\t{$src, $dst|$dst, $src}",
+               [(store (srl (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+                   OpSize;
+def SHR32mi  : Ii8<0xC1, MRM5m, (outs), (ins i32mem:$dst, i8imm:$src),
+                   "shr{l}\t{$src, $dst|$dst, $src}",
+               [(store (srl (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, i8imm:$src),
+                  "shr{q}\t{$src, $dst|$dst, $src}",
+                 [(store (srl (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SHR8m1   : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
+                 "shr{b}\t$dst",
+                [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHR16m1  : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
+                 "shr{w}\t$dst",
+               [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,OpSize;
+def SHR32m1  : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
+                 "shr{l}\t$dst",
+               [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
+                  "shr{q}\t$dst",
+                 [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def SAR8rCL  : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "sar{b}\t{%cl, $dst|$dst, CL}",
+                 [(set GR8:$dst, (sra GR8:$src1, CL))]>;
+def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+                 "sar{w}\t{%cl, $dst|$dst, CL}",
+                 [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
+def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+                 "sar{l}\t{%cl, $dst|$dst, CL}",
+                 [(set GR32:$dst, (sra GR32:$src1, CL))]>;
+def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+                 "sar{q}\t{%cl, $dst|$dst, %CL}",
+                 [(set GR64:$dst, (sra GR64:$src1, CL))]>;
+}
+
+def SAR8ri   : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+                   "sar{b}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR8:$dst, (sra GR8:$src1, (i8 imm:$src2)))]>;
+def SAR16ri  : Ii8<0xC1, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+                   "sar{w}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR16:$dst, (sra GR16:$src1, (i8 imm:$src2)))]>,
+                   OpSize;
+def SAR32ri  : Ii8<0xC1, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+                   "sar{l}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR32:$dst, (sra GR32:$src1, (i8 imm:$src2)))]>;
+def SAR64ri  : RIi8<0xC1, MRM7r, (outs GR64:$dst),
+                    (ins GR64:$src1, i8imm:$src2),
+                    "sar{q}\t{$src2, $dst|$dst, $src2}",
+                    [(set GR64:$dst, (sra GR64:$src1, (i8 imm:$src2)))]>;
+
+// Shift by 1
+def SAR8r1   : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "sar{b}\t$dst",
+                 [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
+def SAR16r1  : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
+                 "sar{w}\t$dst",
+                 [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize;
+def SAR32r1  : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
+                 "sar{l}\t$dst",
+                 [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
+def SAR64r1  : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
+                 "sar{q}\t$dst",
+                 [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+
+let Uses = [CL] in {
+def SAR8mCL  : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
+                 "sar{b}\t{%cl, $dst|$dst, CL}",
+                 [(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
+def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
+                 "sar{w}\t{%cl, $dst|$dst, CL}",
+                 [(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst), 
+                 "sar{l}\t{%cl, $dst|$dst, CL}",
+                 [(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
+def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), 
+                 "sar{q}\t{%cl, $dst|$dst, %CL}",
+                 [(store (sra (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def SAR8mi   : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
+                   "sar{b}\t{$src, $dst|$dst, $src}",
+                [(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR16mi  : Ii8<0xC1, MRM7m, (outs), (ins i16mem:$dst, i8imm:$src),
+                   "sar{w}\t{$src, $dst|$dst, $src}",
+               [(store (sra (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+                   OpSize;
+def SAR32mi  : Ii8<0xC1, MRM7m, (outs), (ins i32mem:$dst, i8imm:$src),
+                   "sar{l}\t{$src, $dst|$dst, $src}",
+               [(store (sra (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def SAR64mi  : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, i8imm:$src),
+                    "sar{q}\t{$src, $dst|$dst, $src}",
+                 [(store (sra (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Shift by 1
+def SAR8m1   : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
+                 "sar{b}\t$dst",
+                [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def SAR16m1  : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
+                 "sar{w}\t$dst",
+               [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+                   OpSize;
+def SAR32m1  : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
+                 "sar{l}\t$dst",
+               [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
+                  "sar{q}\t$dst",
+                 [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+//===----------------------------------------------------------------------===//
+// Rotate instructions
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+               "rcl{b}\t$dst", []>;
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
+                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+  
+def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+                "rcl{w}\t$dst", []>, OpSize;
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+let Uses = [CL] in
+def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+
+def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+                "rcl{l}\t$dst", []>;
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+
+
+def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+                 "rcl{q}\t$dst", []>;
+def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src1),
+                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+
+
+def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+               "rcr{b}\t$dst", []>;
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
+                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
+                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+  
+def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+                "rcr{w}\t$dst", []>, OpSize;
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
+                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+let Uses = [CL] in
+def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+
+def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+                "rcr{l}\t$dst", []>;
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
+                  "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+                 
+def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+                 "rcr{q}\t$dst", []>;
+def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$cnt),
+                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+let Uses = [CL] in
+def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
+                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+
+} // Constraints = "$src = $dst"
+
+def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
+               "rcl{b}\t$dst", []>;
+def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+                 "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL16m1 : I<0xD1, MRM2m, (outs), (ins i16mem:$dst),
+                "rcl{w}\t$dst", []>, OpSize;
+def RCL16mi : Ii8<0xC1, MRM2m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+                  "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCL32m1 : I<0xD1, MRM2m, (outs), (ins i32mem:$dst),
+                "rcl{l}\t$dst", []>;
+def RCL32mi : Ii8<0xC1, MRM2m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+                  "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
+                 "rcl{q}\t$dst", []>;
+def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+                   "rcl{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+def RCR8m1 : I<0xD0, MRM3m, (outs), (ins i8mem:$dst),
+               "rcr{b}\t$dst", []>;
+def RCR8mi : Ii8<0xC0, MRM3m, (outs), (ins i8mem:$dst, i8imm:$cnt),
+                 "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR16m1 : I<0xD1, MRM3m, (outs), (ins i16mem:$dst),
+                "rcr{w}\t$dst", []>, OpSize;
+def RCR16mi : Ii8<0xC1, MRM3m, (outs), (ins i16mem:$dst, i8imm:$cnt),
+                  "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
+def RCR32m1 : I<0xD1, MRM3m, (outs), (ins i32mem:$dst),
+                "rcr{l}\t$dst", []>;
+def RCR32mi : Ii8<0xC1, MRM3m, (outs), (ins i32mem:$dst, i8imm:$cnt),
+                  "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
+def RCR64m1 : RI<0xD1, MRM3m, (outs), (ins i64mem:$dst),
+                 "rcr{q}\t$dst", []>;
+def RCR64mi : RIi8<0xC1, MRM3m, (outs), (ins i64mem:$dst, i8imm:$cnt),
+                   "rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
+
+let Uses = [CL] in {
+def RCL8mCL : I<0xD2, MRM2m, (outs), (ins i8mem:$dst),
+                "rcl{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCL16mCL : I<0xD3, MRM2m, (outs), (ins i16mem:$dst),
+                 "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCL32mCL : I<0xD3, MRM2m, (outs), (ins i32mem:$dst),
+                 "rcl{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
+                  "rcl{q}\t{%cl, $dst|$dst, CL}", []>;
+
+def RCR8mCL : I<0xD2, MRM3m, (outs), (ins i8mem:$dst),
+                "rcr{b}\t{%cl, $dst|$dst, CL}", []>;
+def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
+                 "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
+def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
+                 "rcr{l}\t{%cl, $dst|$dst, CL}", []>;
+def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
+                  "rcr{q}\t{%cl, $dst|$dst, CL}", []>;
+}
+
+let Constraints = "$src1 = $dst" in {
+// FIXME: provide shorter instructions when imm8 == 1
+let Uses = [CL] in {
+def ROL8rCL  : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "rol{b}\t{%cl, $dst|$dst, CL}",
+                 [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
+def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+                 "rol{w}\t{%cl, $dst|$dst, CL}",
+                 [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
+def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+                 "rol{l}\t{%cl, $dst|$dst, CL}",
+                 [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
+def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+                  "rol{q}\t{%cl, $dst|$dst, %CL}",
+                  [(set GR64:$dst, (rotl GR64:$src1, CL))]>;
+}
+
+def ROL8ri   : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+                   "rol{b}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR8:$dst, (rotl GR8:$src1, (i8 imm:$src2)))]>;
+def ROL16ri  : Ii8<0xC1, MRM0r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+                   "rol{w}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR16:$dst, (rotl GR16:$src1, (i8 imm:$src2)))]>, 
+                   OpSize;
+def ROL32ri  : Ii8<0xC1, MRM0r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+                   "rol{l}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR32:$dst, (rotl GR32:$src1, (i8 imm:$src2)))]>;
+def ROL64ri  : RIi8<0xC1, MRM0r, (outs GR64:$dst), 
+                    (ins GR64:$src1, i8imm:$src2),
+                    "rol{q}\t{$src2, $dst|$dst, $src2}",
+                    [(set GR64:$dst, (rotl GR64:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROL8r1   : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "rol{b}\t$dst",
+                 [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
+def ROL16r1  : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
+                 "rol{w}\t$dst",
+                 [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize;
+def ROL32r1  : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
+                 "rol{l}\t$dst",
+                 [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
+def ROL64r1  : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
+                  "rol{q}\t$dst",
+                  [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def ROL8mCL  : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
+                 "rol{b}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
+def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
+                 "rol{w}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
+                 "rol{l}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
+def ROL64mCL :  RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
+                   "rol{q}\t{%cl, $dst|$dst, %CL}",
+                   [(store (rotl (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def ROL8mi   : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src1),
+                   "rol{b}\t{$src1, $dst|$dst, $src1}",
+               [(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+def ROL16mi  : Ii8<0xC1, MRM0m, (outs), (ins i16mem:$dst, i8imm:$src1),
+                   "rol{w}\t{$src1, $dst|$dst, $src1}",
+              [(store (rotl (loadi16 addr:$dst), (i8 imm:$src1)), addr:$dst)]>,
+                   OpSize;
+def ROL32mi  : Ii8<0xC1, MRM0m, (outs), (ins i32mem:$dst, i8imm:$src1),
+                   "rol{l}\t{$src1, $dst|$dst, $src1}",
+              [(store (rotl (loadi32 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+def ROL64mi  : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, i8imm:$src1),
+                    "rol{q}\t{$src1, $dst|$dst, $src1}",
+                [(store (rotl (loadi64 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
+
+// Rotate by 1
+def ROL8m1   : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
+                 "rol{b}\t$dst",
+               [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROL16m1  : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
+                 "rol{w}\t$dst",
+              [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+                   OpSize;
+def ROL32m1  : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
+                 "rol{l}\t$dst",
+              [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROL64m1  : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
+                 "rol{q}\t$dst",
+               [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+let Constraints = "$src1 = $dst" in {
+let Uses = [CL] in {
+def ROR8rCL  : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "ror{b}\t{%cl, $dst|$dst, CL}",
+                 [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
+def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+                 "ror{w}\t{%cl, $dst|$dst, CL}",
+                 [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
+def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+                 "ror{l}\t{%cl, $dst|$dst, CL}",
+                 [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
+def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+                  "ror{q}\t{%cl, $dst|$dst, %CL}",
+                  [(set GR64:$dst, (rotr GR64:$src1, CL))]>;
+}
+
+def ROR8ri   : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
+                   "ror{b}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))]>;
+def ROR16ri  : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
+                   "ror{w}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))]>, 
+                   OpSize;
+def ROR32ri  : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
+                   "ror{l}\t{$src2, $dst|$dst, $src2}",
+                   [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))]>;
+def ROR64ri  : RIi8<0xC1, MRM1r, (outs GR64:$dst), 
+                    (ins GR64:$src1, i8imm:$src2),
+                    "ror{q}\t{$src2, $dst|$dst, $src2}",
+                    [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))]>;
+
+// Rotate by 1
+def ROR8r1   : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+                 "ror{b}\t$dst",
+                 [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
+def ROR16r1  : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
+                 "ror{w}\t$dst",
+                 [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize;
+def ROR32r1  : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
+                 "ror{l}\t$dst",
+                 [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
+def ROR64r1  : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
+                  "ror{q}\t$dst",
+                  [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def ROR8mCL  : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
+                 "ror{b}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
+def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
+                 "ror{w}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
+def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst), 
+                 "ror{l}\t{%cl, $dst|$dst, CL}",
+                 [(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
+def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), 
+                  "ror{q}\t{%cl, $dst|$dst, %CL}",
+                  [(store (rotr (loadi64 addr:$dst), CL), addr:$dst)]>;
+}
+def ROR8mi   : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
+                   "ror{b}\t{$src, $dst|$dst, $src}",
+               [(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR16mi  : Ii8<0xC1, MRM1m, (outs), (ins i16mem:$dst, i8imm:$src),
+                   "ror{w}\t{$src, $dst|$dst, $src}",
+              [(store (rotr (loadi16 addr:$dst), (i8 imm:$src)), addr:$dst)]>,
+                   OpSize;
+def ROR32mi  : Ii8<0xC1, MRM1m, (outs), (ins i32mem:$dst, i8imm:$src),
+                   "ror{l}\t{$src, $dst|$dst, $src}",
+              [(store (rotr (loadi32 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def ROR64mi  : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, i8imm:$src),
+                    "ror{q}\t{$src, $dst|$dst, $src}",
+                [(store (rotr (loadi64 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+
+// Rotate by 1
+def ROR8m1   : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
+                 "ror{b}\t$dst",
+               [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROR16m1  : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
+                 "ror{w}\t$dst",
+              [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
+                   OpSize;
+def ROR32m1  : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
+                 "ror{l}\t$dst",
+              [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
+def ROR64m1  : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
+                 "ror{q}\t$dst",
+               [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
+
+
+//===----------------------------------------------------------------------===//
+// Double shift instructions (generalizations of rotate)
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in {
+
+let Uses = [CL] in {
+def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), 
+                   (ins GR16:$src1, GR16:$src2),
+                   "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
+                   TB, OpSize;
+def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), 
+                   (ins GR16:$src1, GR16:$src2),
+                   "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
+                   TB, OpSize;
+def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), 
+                   (ins GR32:$src1, GR32:$src2),
+                   "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst),
+                   (ins GR32:$src1, GR32:$src2),
+                   "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
+def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), 
+                    (ins GR64:$src1, GR64:$src2),
+                    "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2, CL))]>, 
+                    TB;
+def SHRD64rrCL : RI<0xAD, MRMDestReg, (outs GR64:$dst), 
+                    (ins GR64:$src1, GR64:$src2),
+                    "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2, CL))]>, 
+                    TB;
+}
+
+let isCommutable = 1 in {  // These instructions commute to each other.
+def SHLD16rri8 : Ii8<0xA4, MRMDestReg,
+                     (outs GR16:$dst), 
+                     (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+                     "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2,
+                                      (i8 imm:$src3)))]>,
+                     TB, OpSize;
+def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
+                     (outs GR16:$dst), 
+                     (ins GR16:$src1, GR16:$src2, i8imm:$src3),
+                     "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2,
+                                      (i8 imm:$src3)))]>,
+                     TB, OpSize;
+def SHLD32rri8 : Ii8<0xA4, MRMDestReg,
+                     (outs GR32:$dst), 
+                     (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+                     "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2,
+                                      (i8 imm:$src3)))]>,
+                 TB;
+def SHRD32rri8 : Ii8<0xAC, MRMDestReg,
+                     (outs GR32:$dst), 
+                     (ins GR32:$src1, GR32:$src2, i8imm:$src3),
+                     "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2,
+                                      (i8 imm:$src3)))]>,
+                 TB;
+def SHLD64rri8 : RIi8<0xA4, MRMDestReg,
+                      (outs GR64:$dst), 
+                      (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+                      "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                      [(set GR64:$dst, (X86shld GR64:$src1, GR64:$src2,
+                                       (i8 imm:$src3)))]>,
+                 TB;
+def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
+                      (outs GR64:$dst), 
+                      (ins GR64:$src1, GR64:$src2, i8imm:$src3),
+                      "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                      [(set GR64:$dst, (X86shrd GR64:$src1, GR64:$src2,
+                                       (i8 imm:$src3)))]>,
+                 TB;
+}
+} // Constraints = "$src = $dst"
+
+let Uses = [CL] in {
+def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+                   "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
+                     addr:$dst)]>, TB, OpSize;
+def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
+                  "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                  [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
+                    addr:$dst)]>, TB, OpSize;
+
+def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+                   "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                   [(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
+                     addr:$dst)]>, TB;
+def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
+                  "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
+                  [(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
+                    addr:$dst)]>, TB;
+                    
+def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+                    "shld{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    [(store (X86shld (loadi64 addr:$dst), GR64:$src2, CL),
+                      addr:$dst)]>, TB;
+def SHRD64mrCL : RI<0xAD, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
+                    "shrd{q}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+                    [(store (X86shrd (loadi64 addr:$dst), GR64:$src2, CL),
+                      addr:$dst)]>, TB;
+}
+
+def SHLD16mri8 : Ii8<0xA4, MRMDestMem,
+                    (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+                    "shld{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    [(store (X86shld (loadi16 addr:$dst), GR16:$src2,
+                                      (i8 imm:$src3)), addr:$dst)]>,
+                    TB, OpSize;
+def SHRD16mri8 : Ii8<0xAC, MRMDestMem, 
+                     (outs), (ins i16mem:$dst, GR16:$src2, i8imm:$src3),
+                     "shrd{w}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    [(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
+                                      (i8 imm:$src3)), addr:$dst)]>,
+                     TB, OpSize;
+
+def SHLD32mri8 : Ii8<0xA4, MRMDestMem,
+                    (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+                    "shld{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                    [(store (X86shld (loadi32 addr:$dst), GR32:$src2,
+                                      (i8 imm:$src3)), addr:$dst)]>,
+                    TB;
+def SHRD32mri8 : Ii8<0xAC, MRMDestMem, 
+                     (outs), (ins i32mem:$dst, GR32:$src2, i8imm:$src3),
+                     "shrd{l}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                     [(store (X86shrd (loadi32 addr:$dst), GR32:$src2,
+                                       (i8 imm:$src3)), addr:$dst)]>,
+                     TB;
+
+def SHLD64mri8 : RIi8<0xA4, MRMDestMem,
+                      (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+                      "shld{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                      [(store (X86shld (loadi64 addr:$dst), GR64:$src2,
+                                       (i8 imm:$src3)), addr:$dst)]>,
+                 TB;
+def SHRD64mri8 : RIi8<0xAC, MRMDestMem, 
+                      (outs), (ins i64mem:$dst, GR64:$src2, i8imm:$src3),
+                      "shrd{q}\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+                      [(store (X86shrd (loadi64 addr:$dst), GR64:$src2,
+                                       (i8 imm:$src3)), addr:$dst)]>,
+                 TB;
+
+} // Defs = [EFLAGS]
+
diff --git a/final/lib/Target/X86/X86InstrSystem.td b/final/lib/Target/X86/X86InstrSystem.td
new file mode 100644
index 00000000000..be7d5efa5a9
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrSystem.td
@@ -0,0 +1,400 @@
+//===- X86InstrSystem.td - System Instructions -------------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 instructions that are generally used in
+// privileged modes.  These are not typically used by the compiler, but are
+// supported for the assembler and disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+let Defs = [RAX, RDX] in
+  def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>, TB;
+
+let Defs = [RAX, RCX, RDX] in
+  def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
+
+// CPU flow control instructions
+
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+  def TRAP    : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
+  def UD2B    : I<0xB9, RawFrm, (outs), (ins), "ud2b", []>, TB;
+}
+
+def HLT : I<0xF4, RawFrm, (outs), (ins), "hlt", []>;
+def RSM : I<0xAA, RawFrm, (outs), (ins), "rsm", []>, TB;
+
+// Interrupt and SysCall Instructions.
+let Uses = [EFLAGS] in
+  def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
+              [(int_x86_int (i8 3))]>;
+def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
+              [(int_x86_int imm:$trap)]>;
+
+def SYSCALL  : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
+def SYSRETL  : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB;
+def SYSRETQ  :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
+               Requires<[In64BitMode]>;
+
+def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB;
+                 
+def SYSEXIT   : I<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
+                Requires<[In32BitMode]>;
+def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB,
+                Requires<[In64BitMode]>;
+
+def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize;
+def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l|d}", []>;
+def IRET64 : RI<0xcf, RawFrm, (outs), (ins), "iretq", []>,
+             Requires<[In64BitMode]>;
+
+
+//===----------------------------------------------------------------------===//
+//  Input/Output Instructions.
+//
+let Defs = [AL], Uses = [DX] in
+def IN8rr  : I<0xEC, RawFrm, (outs), (ins),
+               "in{b}\t{%dx, %al|%AL, %DX}", []>;
+let Defs = [AX], Uses = [DX] in
+def IN16rr : I<0xED, RawFrm, (outs), (ins),
+               "in{w}\t{%dx, %ax|%AX, %DX}", []>,  OpSize;
+let Defs = [EAX], Uses = [DX] in
+def IN32rr : I<0xED, RawFrm, (outs), (ins),
+               "in{l}\t{%dx, %eax|%EAX, %DX}", []>;
+
+let Defs = [AL] in
+def IN8ri  : Ii8<0xE4, RawFrm, (outs), (ins i8imm:$port),
+                  "in{b}\t{$port, %al|%AL, $port}", []>;
+let Defs = [AX] in
+def IN16ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+                  "in{w}\t{$port, %ax|%AX, $port}", []>, OpSize;
+let Defs = [EAX] in
+def IN32ri : Ii8<0xE5, RawFrm, (outs), (ins i8imm:$port),
+                  "in{l}\t{$port, %eax|%EAX, $port}", []>;
+
+let Uses = [DX, AL] in
+def OUT8rr  : I<0xEE, RawFrm, (outs), (ins),
+                "out{b}\t{%al, %dx|%DX, %AL}", []>;
+let Uses = [DX, AX] in
+def OUT16rr : I<0xEF, RawFrm, (outs), (ins),
+                "out{w}\t{%ax, %dx|%DX, %AX}", []>, OpSize;
+let Uses = [DX, EAX] in
+def OUT32rr : I<0xEF, RawFrm, (outs), (ins),
+                "out{l}\t{%eax, %dx|%DX, %EAX}", []>;
+
+let Uses = [AL] in
+def OUT8ir  : Ii8<0xE6, RawFrm, (outs), (ins i8imm:$port),
+                   "out{b}\t{%al, $port|$port, %AL}", []>;
+let Uses = [AX] in
+def OUT16ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+                   "out{w}\t{%ax, $port|$port, %AX}", []>, OpSize;
+let Uses = [EAX] in
+def OUT32ir : Ii8<0xE7, RawFrm, (outs), (ins i8imm:$port),
+                   "out{l}\t{%eax, $port|$port, %EAX}", []>;
+
+def IN8  : I<0x6C, RawFrm, (outs), (ins), "ins{b}", []>;
+def IN16 : I<0x6D, RawFrm, (outs), (ins), "ins{w}", []>,  OpSize;
+def IN32 : I<0x6D, RawFrm, (outs), (ins), "ins{l}", []>;
+
+//===----------------------------------------------------------------------===//
+// Moves to and from debug registers
+
+def MOV32rd : I<0x21, MRMDestReg, (outs GR32:$dst), (ins DEBUG_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64rd : I<0x21, MRMDestReg, (outs GR64:$dst), (ins DEBUG_REG:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+                
+def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Moves to and from control registers
+
+def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+                
+def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
+                "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Segment override instruction prefixes
+
+def CS_PREFIX : I<0x2E, RawFrm, (outs), (ins), "cs", []>;
+def SS_PREFIX : I<0x36, RawFrm, (outs), (ins), "ss", []>;
+def DS_PREFIX : I<0x3E, RawFrm, (outs), (ins), "ds", []>;
+def ES_PREFIX : I<0x26, RawFrm, (outs), (ins), "es", []>;
+def FS_PREFIX : I<0x64, RawFrm, (outs), (ins), "fs", []>;
+def GS_PREFIX : I<0x65, RawFrm, (outs), (ins), "gs", []>;
+
+
+//===----------------------------------------------------------------------===//
+// Moves to and from segment registers.
+//
+
+def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64rs : RI<0x8C, MRMDestReg, (outs GR64:$dst), (ins SEGMENT_REG:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64ms : RI<0x8C, MRMDestMem, (outs i64mem:$dst), (ins SEGMENT_REG:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sr : RI<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR64:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
+                "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
+                "mov{l}\t{$src, $dst|$dst, $src}", []>;
+def MOV64sm : RI<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i64mem:$src),
+                 "mov{q}\t{$src, $dst|$dst, $src}", []>;
+
+//===----------------------------------------------------------------------===//
+// Segmentation support instructions.
+
+def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
+
+def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 
+                "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                "lar{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+
+// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
+def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src), 
+                "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                "lar{l}\t{$src, $dst|$dst, $src}", []>, TB;
+// i16mem operand in LAR64rm and GR32 operand in LAR32rr is not a typo.
+def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src), 
+                 "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+                 "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+                "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize; 
+def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+                "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+                "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB; 
+def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+                "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+                 "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB; 
+def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+                 "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
+
+def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
+               "str{w}\t{$dst}", []>, TB, OpSize;
+def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
+               "str{l}\t{$dst}", []>, TB;
+def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
+                "str{q}\t{$dst}", []>, TB;
+def STRm   : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
+               "str{w}\t{$dst}", []>, TB;
+
+def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
+             "ltr{w}\t{$src}", []>, TB;
+def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
+             "ltr{w}\t{$src}", []>, TB;
+             
+def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins),
+                 "push{w}\t%cs", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHCS32 : I<0x0E, RawFrm, (outs), (ins),
+                 "push{l}\t%cs", []>, Requires<[In32BitMode]>;
+def PUSHSS16 : I<0x16, RawFrm, (outs), (ins),
+                 "push{w}\t%ss", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHSS32 : I<0x16, RawFrm, (outs), (ins),
+                 "push{l}\t%ss", []>, Requires<[In32BitMode]>;
+def PUSHDS16 : I<0x1E, RawFrm, (outs), (ins),
+                 "push{w}\t%ds", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHDS32 : I<0x1E, RawFrm, (outs), (ins),
+                 "push{l}\t%ds", []>, Requires<[In32BitMode]>;
+def PUSHES16 : I<0x06, RawFrm, (outs), (ins),
+                 "push{w}\t%es", []>, Requires<[In32BitMode]>, OpSize;
+def PUSHES32 : I<0x06, RawFrm, (outs), (ins),
+                 "push{l}\t%es", []>, Requires<[In32BitMode]>;
+                 
+def PUSHFS16 : I<0xa0, RawFrm, (outs), (ins),
+                 "push{w}\t%fs", []>, OpSize, TB;
+def PUSHFS32 : I<0xa0, RawFrm, (outs), (ins),
+                 "push{l}\t%fs", []>, TB, Requires<[In32BitMode]>;
+def PUSHGS16 : I<0xa8, RawFrm, (outs), (ins),
+                 "push{w}\t%gs", []>, OpSize, TB;
+def PUSHGS32 : I<0xa8, RawFrm, (outs), (ins),
+                 "push{l}\t%gs", []>, TB, Requires<[In32BitMode]>;
+
+def PUSHFS64 : I<0xa0, RawFrm, (outs), (ins),
+                 "push{q}\t%fs", []>, TB;
+def PUSHGS64 : I<0xa8, RawFrm, (outs), (ins),
+                 "push{q}\t%gs", []>, TB;
+
+// No "pop cs" instruction.
+def POPSS16 : I<0x17, RawFrm, (outs), (ins),
+                "pop{w}\t%ss", []>, OpSize, Requires<[In32BitMode]>;
+def POPSS32 : I<0x17, RawFrm, (outs), (ins),
+                "pop{l}\t%ss", []>        , Requires<[In32BitMode]>;
+                
+def POPDS16 : I<0x1F, RawFrm, (outs), (ins),
+                "pop{w}\t%ds", []>, OpSize, Requires<[In32BitMode]>;
+def POPDS32 : I<0x1F, RawFrm, (outs), (ins),
+                "pop{l}\t%ds", []>        , Requires<[In32BitMode]>;
+                
+def POPES16 : I<0x07, RawFrm, (outs), (ins),
+                "pop{w}\t%es", []>, OpSize, Requires<[In32BitMode]>;
+def POPES32 : I<0x07, RawFrm, (outs), (ins),
+                "pop{l}\t%es", []>        , Requires<[In32BitMode]>;
+                
+def POPFS16 : I<0xa1, RawFrm, (outs), (ins),
+                "pop{w}\t%fs", []>, OpSize, TB;
+def POPFS32 : I<0xa1, RawFrm, (outs), (ins),
+                "pop{l}\t%fs", []>, TB    , Requires<[In32BitMode]>;
+def POPFS64 : I<0xa1, RawFrm, (outs), (ins),
+                "pop{q}\t%fs", []>, TB;
+                
+def POPGS16 : I<0xa9, RawFrm, (outs), (ins),
+                "pop{w}\t%gs", []>, OpSize, TB;
+def POPGS32 : I<0xa9, RawFrm, (outs), (ins),
+                "pop{l}\t%gs", []>, TB    , Requires<[In32BitMode]>;
+def POPGS64 : I<0xa9, RawFrm, (outs), (ins),
+                "pop{q}\t%gs", []>, TB;
+                 
+
+def LDS16rm : I<0xc5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+                "lds{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def LDS32rm : I<0xc5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+                "lds{l}\t{$src, $dst|$dst, $src}", []>;
+                
+def LSS16rm : I<0xb2, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+                "lss{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LSS32rm : I<0xb2, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+                "lss{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LSS64rm : RI<0xb2, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+                 "lss{q}\t{$src, $dst|$dst, $src}", []>, TB;
+                
+def LES16rm : I<0xc4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+                "les{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def LES32rm : I<0xc4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+                "les{l}\t{$src, $dst|$dst, $src}", []>;
+                
+def LFS16rm : I<0xb4, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+                "lfs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LFS32rm : I<0xb4, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+                "lfs{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def LFS64rm : RI<0xb4, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+                 "lfs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+                
+def LGS16rm : I<0xb5, MRMSrcMem, (outs GR16:$dst), (ins opaque32mem:$src),
+                "lgs{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
+def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaque48mem:$src),
+                "lgs{l}\t{$src, $dst|$dst, $src}", []>, TB;
+                
+def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaque80mem:$src),
+                 "lgs{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+
+def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg),
+              "verr\t$seg", []>, TB;
+def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg),
+              "verr\t$seg", []>, TB;
+def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg),
+              "verw\t$seg", []>, TB;
+def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg),
+              "verw\t$seg", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Descriptor-table support instructions
+
+def SGDT16m : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
+              "sgdtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>;
+def SGDTm : I<0x01, MRM0m, (outs opaque48mem:$dst), (ins),
+              "sgdt\t$dst", []>, TB;
+def SIDT16m : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
+              "sidtw\t$dst", []>, TB, OpSize, Requires<[In32BitMode]>;
+def SIDTm : I<0x01, MRM1m, (outs opaque48mem:$dst), (ins),
+              "sidt\t$dst", []>, TB;
+def SLDT16r : I<0x00, MRM0r, (outs GR16:$dst), (ins),
+                "sldt{w}\t$dst", []>, TB, OpSize;
+def SLDT16m : I<0x00, MRM0m, (outs i16mem:$dst), (ins),
+                "sldt{w}\t$dst", []>, TB;
+def SLDT32r : I<0x00, MRM0r, (outs GR32:$dst), (ins),
+                "sldt{l}\t$dst", []>, TB;
+                
+// LLDT is not interpreted specially in 64-bit mode because there is no sign
+//   extension.
+def SLDT64r : RI<0x00, MRM0r, (outs GR64:$dst), (ins),
+                 "sldt{q}\t$dst", []>, TB;
+def SLDT64m : RI<0x00, MRM0m, (outs i16mem:$dst), (ins),
+                 "sldt{q}\t$dst", []>, TB;
+
+def LGDT16m : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
+              "lgdtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>;
+def LGDTm : I<0x01, MRM2m, (outs), (ins opaque48mem:$src),
+              "lgdt\t$src", []>, TB;
+def LIDT16m : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
+              "lidtw\t$src", []>, TB, OpSize, Requires<[In32BitMode]>;
+def LIDTm : I<0x01, MRM3m, (outs), (ins opaque48mem:$src),
+              "lidt\t$src", []>, TB;
+def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
+                "lldt{w}\t$src", []>, TB;
+def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
+                "lldt{w}\t$src", []>, TB;
+                
+//===----------------------------------------------------------------------===//
+// Specialized register support
+def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", []>, TB;
+def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
+def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", []>, TB;
+
+def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins), 
+                "smsw{w}\t$dst", []>, OpSize, TB;
+def SMSW32r : I<0x01, MRM4r, (outs GR32:$dst), (ins), 
+                "smsw{l}\t$dst", []>, TB;
+// no m form encodable; use SMSW16m
+def SMSW64r : RI<0x01, MRM4r, (outs GR64:$dst), (ins), 
+                 "smsw{q}\t$dst", []>, TB;
+
+// For memory operands, there is only a 16-bit form
+def SMSW16m : I<0x01, MRM4m, (outs i16mem:$dst), (ins),
+                "smsw{w}\t$dst", []>, TB;
+
+def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
+                "lmsw{w}\t$src", []>, TB;
+def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
+                "lmsw{w}\t$src", []>, TB;
+                
+def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// Cache instructions
+def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB;
+def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB;
+
+let Defs = [RDX, RAX], Uses = [RCX] in
+  def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
+
+let Uses = [RDX, RAX, RCX] in
+  def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
diff --git a/final/lib/Target/X86/X86InstrVMX.td b/final/lib/Target/X86/X86InstrVMX.td
new file mode 100644
index 00000000000..daf61e4625d
--- /dev/null
+++ b/final/lib/Target/X86/X86InstrVMX.td
@@ -0,0 +1,54 @@
+//===- X86InstrVMX.td - VMX Instruction Set Extension ------*- tablegen -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file describes the instructions that make up the Intel VMX instruction
+// set.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VMX instructions
+
+// 66 0F 38 80
+def INVEPT : I<0x80, RawFrm, (outs), (ins), "invept", []>, OpSize, T8;
+// 66 0F 38 81
+def INVVPID : I<0x81, RawFrm, (outs), (ins), "invvpid", []>, OpSize, T8;
+// 0F 01 C1
+def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
+def VMCLEARm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+  "vmclear\t$vmcs", []>, OpSize, TB;
+// 0F 01 C2
+def VMLAUNCH : I<0x01, MRM_C2, (outs), (ins), "vmlaunch", []>, TB;
+// 0F 01 C3
+def VMRESUME : I<0x01, MRM_C3, (outs), (ins), "vmresume", []>, TB;
+def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
+  "vmptrld\t$vmcs", []>, TB;
+def VMPTRSTm : I<0xC7, MRM7m, (outs i64mem:$vmcs), (ins),
+  "vmptrst\t$vmcs", []>, TB;
+def VMREAD64rm : I<0x78, MRMDestMem, (outs i64mem:$dst), (ins GR64:$src),
+  "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+  "vmread{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD32rm : I<0x78, MRMDestMem, (outs i32mem:$dst), (ins GR32:$src),
+  "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+  "vmread{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+  "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+  "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, TB;
+// 0F 01 C4
+def VMXOFF : I<0x01, MRM_C4, (outs), (ins), "vmxoff", []>, TB;
+def VMXON : I<0xC7, MRM6m, (outs), (ins i64mem:$vmxon),
+  "vmxon\t{$vmxon}", []>, XS;
+
diff --git a/final/lib/Target/X86/X86JITInfo.cpp b/final/lib/Target/X86/X86JITInfo.cpp
new file mode 100644
index 00000000000..3f88fa69d0e
--- /dev/null
+++ b/final/lib/Target/X86/X86JITInfo.cpp
@@ -0,0 +1,574 @@
+//===-- X86JITInfo.cpp - Implement the JIT interfaces for the X86 target --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the JIT interfaces for the X86 target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jit"
+#include "X86JITInfo.h"
+#include "X86Relocations.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Function.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Valgrind.h"
+#include <cstdlib>
+#include <cstring>
+using namespace llvm;
+
+// Determine the platform we're running on
+#if defined (__x86_64__) || defined (_M_AMD64) || defined (_M_X64)
+# define X86_64_JIT
+#elif defined(__i386__) || defined(i386) || defined(_M_IX86)
+# define X86_32_JIT
+#endif
+
+void X86JITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
+  unsigned char *OldByte = (unsigned char *)Old;
+  *OldByte++ = 0xE9;                // Emit JMP opcode.
+  unsigned *OldWord = (unsigned *)OldByte;
+  unsigned NewAddr = (intptr_t)New;
+  unsigned OldAddr = (intptr_t)OldWord;
+  *OldWord = NewAddr - OldAddr - 4; // Emit PC-relative addr of New code.
+
+  // X86 doesn't need to invalidate the processor cache, so just invalidate
+  // Valgrind's cache directly.
+  sys::ValgrindDiscardTranslations(Old, 5);
+}
+
+
+/// JITCompilerFunction - This contains the address of the JIT function used to
+/// compile a function lazily.
+static TargetJITInfo::JITCompilerFn JITCompilerFunction;
+
+// Get the ASMPREFIX for the current host.  This is often '_'.
+#ifndef __USER_LABEL_PREFIX__
+#define __USER_LABEL_PREFIX__
+#endif
+#define GETASMPREFIX2(X) #X
+#define GETASMPREFIX(X) GETASMPREFIX2(X)
+#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
+
+// For ELF targets, use a .size and .type directive, to let tools
+// know the extent of functions defined in assembler.
+#if defined(__ELF__)
+# define SIZE(sym) ".size " #sym ", . - " #sym "\n"
+# define TYPE_FUNCTION(sym) ".type " #sym ", @function\n"
+#else
+# define SIZE(sym)
+# define TYPE_FUNCTION(sym)
+#endif
+
+// Provide a convenient way for disabling usage of CFI directives.
+// This is needed for old/broken assemblers (for example, gas on
+// Darwin is pretty old and doesn't support these directives)
+#if defined(__APPLE__)
+# define CFI(x)
+#else
+// FIXME: Disable this until we really want to use it. Also, we will
+//        need to add some workarounds for compilers, which support
+//        only subset of these directives.
+# define CFI(x)
+#endif
+
+// Provide a wrapper for X86CompilationCallback2 that saves non-traditional
+// callee saved registers, for the fastcc calling convention.
+extern "C" {
+#if defined(X86_64_JIT)
+# ifndef _MSC_VER
+  // No need to save EAX/EDX for X86-64.
+  void X86CompilationCallback(void);
+  asm(
+    ".text\n"
+    ".align 8\n"
+    ".globl " ASMPREFIX "X86CompilationCallback\n"
+    TYPE_FUNCTION(X86CompilationCallback)
+  ASMPREFIX "X86CompilationCallback:\n"
+    CFI(".cfi_startproc\n")
+    // Save RBP
+    "pushq   %rbp\n"
+    CFI(".cfi_def_cfa_offset 16\n")
+    CFI(".cfi_offset %rbp, -16\n")
+    // Save RSP
+    "movq    %rsp, %rbp\n"
+    CFI(".cfi_def_cfa_register %rbp\n")
+    // Save all int arg registers
+    "pushq   %rdi\n"
+    CFI(".cfi_rel_offset %rdi, 0\n")
+    "pushq   %rsi\n"
+    CFI(".cfi_rel_offset %rsi, 8\n")
+    "pushq   %rdx\n"
+    CFI(".cfi_rel_offset %rdx, 16\n")
+    "pushq   %rcx\n"
+    CFI(".cfi_rel_offset %rcx, 24\n")
+    "pushq   %r8\n"
+    CFI(".cfi_rel_offset %r8, 32\n")
+    "pushq   %r9\n"
+    CFI(".cfi_rel_offset %r9, 40\n")
+    // Align stack on 16-byte boundary. ESP might not be properly aligned
+    // (8 byte) if this is called from an indirect stub.
+    "andq    $-16, %rsp\n"
+    // Save all XMM arg registers
+    "subq    $128, %rsp\n"
+    "movaps  %xmm0, (%rsp)\n"
+    "movaps  %xmm1, 16(%rsp)\n"
+    "movaps  %xmm2, 32(%rsp)\n"
+    "movaps  %xmm3, 48(%rsp)\n"
+    "movaps  %xmm4, 64(%rsp)\n"
+    "movaps  %xmm5, 80(%rsp)\n"
+    "movaps  %xmm6, 96(%rsp)\n"
+    "movaps  %xmm7, 112(%rsp)\n"
+    // JIT callee
+#ifdef _WIN64
+    "subq    $32, %rsp\n"
+    "movq    %rbp, %rcx\n"    // Pass prev frame and return address
+    "movq    8(%rbp), %rdx\n"
+    "call    " ASMPREFIX "X86CompilationCallback2\n"
+    "addq    $32, %rsp\n"
+#else
+    "movq    %rbp, %rdi\n"    // Pass prev frame and return address
+    "movq    8(%rbp), %rsi\n"
+    "call    " ASMPREFIX "X86CompilationCallback2\n"
+#endif
+    // Restore all XMM arg registers
+    "movaps  112(%rsp), %xmm7\n"
+    "movaps  96(%rsp), %xmm6\n"
+    "movaps  80(%rsp), %xmm5\n"
+    "movaps  64(%rsp), %xmm4\n"
+    "movaps  48(%rsp), %xmm3\n"
+    "movaps  32(%rsp), %xmm2\n"
+    "movaps  16(%rsp), %xmm1\n"
+    "movaps  (%rsp), %xmm0\n"
+    // Restore RSP
+    "movq    %rbp, %rsp\n"
+    CFI(".cfi_def_cfa_register %rsp\n")
+    // Restore all int arg registers
+    "subq    $48, %rsp\n"
+    CFI(".cfi_adjust_cfa_offset 48\n")
+    "popq    %r9\n"
+    CFI(".cfi_adjust_cfa_offset -8\n")
+    CFI(".cfi_restore %r9\n")
+    "popq    %r8\n"
+    CFI(".cfi_adjust_cfa_offset -8\n")
+    CFI(".cfi_restore %r8\n")
+    "popq    %rcx\n"
+    CFI(".cfi_adjust_cfa_offset -8\n")
+    CFI(".cfi_restore %rcx\n")
+    "popq    %rdx\n"
+    CFI(".cfi_adjust_cfa_offset -8\n")
+    CFI(".cfi_restore %rdx\n")
+    "popq    %rsi\n"
+    CFI(".cfi_adjust_cfa_offset -8\n")
+    CFI(".cfi_restore %rsi\n")
+    "popq    %rdi\n"
+    CFI(".cfi_adjust_cfa_offset -8\n")
+    CFI(".cfi_restore %rdi\n")
+    // Restore RBP
+    "popq    %rbp\n"
+    CFI(".cfi_adjust_cfa_offset -8\n")
+    CFI(".cfi_restore %rbp\n")
+    "ret\n"
+    CFI(".cfi_endproc\n")
+    SIZE(X86CompilationCallback)
+  );
+# else
+  // No inline assembler support on this platform. The routine is in external
+  // file.
+  void X86CompilationCallback();
+
+# endif
+#elif defined (X86_32_JIT)
+# ifndef _MSC_VER
+  void X86CompilationCallback(void);
+  asm(
+    ".text\n"
+    ".align 8\n"
+    ".globl " ASMPREFIX "X86CompilationCallback\n"
+    TYPE_FUNCTION(X86CompilationCallback)
+  ASMPREFIX "X86CompilationCallback:\n"
+    CFI(".cfi_startproc\n")
+    "pushl   %ebp\n"
+    CFI(".cfi_def_cfa_offset 8\n")
+    CFI(".cfi_offset %ebp, -8\n")
+    "movl    %esp, %ebp\n"    // Standard prologue
+    CFI(".cfi_def_cfa_register %ebp\n")
+    "pushl   %eax\n"
+    CFI(".cfi_rel_offset %eax, 0\n")
+    "pushl   %edx\n"          // Save EAX/EDX/ECX
+    CFI(".cfi_rel_offset %edx, 4\n")
+    "pushl   %ecx\n"
+    CFI(".cfi_rel_offset %ecx, 8\n")
+#  if defined(__APPLE__)
+    "andl    $-16, %esp\n"    // Align ESP on 16-byte boundary
+#  endif
+    "subl    $16, %esp\n"
+    "movl    4(%ebp), %eax\n" // Pass prev frame and return address
+    "movl    %eax, 4(%esp)\n"
+    "movl    %ebp, (%esp)\n"
+    "call    " ASMPREFIX "X86CompilationCallback2\n"
+    "movl    %ebp, %esp\n"    // Restore ESP
+    CFI(".cfi_def_cfa_register %esp\n")
+    "subl    $12, %esp\n"
+    CFI(".cfi_adjust_cfa_offset 12\n")
+    "popl    %ecx\n"
+    CFI(".cfi_adjust_cfa_offset -4\n")
+    CFI(".cfi_restore %ecx\n")
+    "popl    %edx\n"
+    CFI(".cfi_adjust_cfa_offset -4\n")
+    CFI(".cfi_restore %edx\n")
+    "popl    %eax\n"
+    CFI(".cfi_adjust_cfa_offset -4\n")
+    CFI(".cfi_restore %eax\n")
+    "popl    %ebp\n"
+    CFI(".cfi_adjust_cfa_offset -4\n")
+    CFI(".cfi_restore %ebp\n")
+    "ret\n"
+    CFI(".cfi_endproc\n")
+    SIZE(X86CompilationCallback)
+  );
+
+  // Same as X86CompilationCallback but also saves XMM argument registers.
+  void X86CompilationCallback_SSE(void);
+  asm(
+    ".text\n"
+    ".align 8\n"
+    ".globl " ASMPREFIX "X86CompilationCallback_SSE\n"
+    TYPE_FUNCTION(X86CompilationCallback_SSE)
+  ASMPREFIX "X86CompilationCallback_SSE:\n"
+    CFI(".cfi_startproc\n")
+    "pushl   %ebp\n"
+    CFI(".cfi_def_cfa_offset 8\n")
+    CFI(".cfi_offset %ebp, -8\n")
+    "movl    %esp, %ebp\n"    // Standard prologue
+    CFI(".cfi_def_cfa_register %ebp\n")
+    "pushl   %eax\n"
+    CFI(".cfi_rel_offset %eax, 0\n")
+    "pushl   %edx\n"          // Save EAX/EDX/ECX
+    CFI(".cfi_rel_offset %edx, 4\n")
+    "pushl   %ecx\n"
+    CFI(".cfi_rel_offset %ecx, 8\n")
+    "andl    $-16, %esp\n"    // Align ESP on 16-byte boundary
+    // Save all XMM arg registers
+    "subl    $64, %esp\n"
+    // FIXME: provide frame move information for xmm registers.
+    // This can be tricky, because CFA register is ebp (unaligned)
+    // and we need to produce offsets relative to it.
+    "movaps  %xmm0, (%esp)\n"
+    "movaps  %xmm1, 16(%esp)\n"
+    "movaps  %xmm2, 32(%esp)\n"
+    "movaps  %xmm3, 48(%esp)\n"
+    "subl    $16, %esp\n"
+    "movl    4(%ebp), %eax\n" // Pass prev frame and return address
+    "movl    %eax, 4(%esp)\n"
+    "movl    %ebp, (%esp)\n"
+    "call    " ASMPREFIX "X86CompilationCallback2\n"
+    "addl    $16, %esp\n"
+    "movaps  48(%esp), %xmm3\n"
+    CFI(".cfi_restore %xmm3\n")
+    "movaps  32(%esp), %xmm2\n"
+    CFI(".cfi_restore %xmm2\n")
+    "movaps  16(%esp), %xmm1\n"
+    CFI(".cfi_restore %xmm1\n")
+    "movaps  (%esp), %xmm0\n"
+    CFI(".cfi_restore %xmm0\n")
+    "movl    %ebp, %esp\n"    // Restore ESP
+    CFI(".cfi_def_cfa_register esp\n")
+    "subl    $12, %esp\n"
+    CFI(".cfi_adjust_cfa_offset 12\n")
+    "popl    %ecx\n"
+    CFI(".cfi_adjust_cfa_offset -4\n")
+    CFI(".cfi_restore %ecx\n")
+    "popl    %edx\n"
+    CFI(".cfi_adjust_cfa_offset -4\n")
+    CFI(".cfi_restore %edx\n")
+    "popl    %eax\n"
+    CFI(".cfi_adjust_cfa_offset -4\n")
+    CFI(".cfi_restore %eax\n")
+    "popl    %ebp\n"
+    CFI(".cfi_adjust_cfa_offset -4\n")
+    CFI(".cfi_restore %ebp\n")
+    "ret\n"
+    CFI(".cfi_endproc\n")
+    SIZE(X86CompilationCallback_SSE)
+  );
+# else
+  void X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr);
+
+  _declspec(naked) void X86CompilationCallback(void) {
+    __asm {
+      push  ebp
+      mov   ebp, esp
+      push  eax
+      push  edx
+      push  ecx
+      and   esp, -16
+      sub   esp, 16
+      mov   eax, dword ptr [ebp+4]
+      mov   dword ptr [esp+4], eax
+      mov   dword ptr [esp], ebp
+      call  X86CompilationCallback2
+      mov   esp, ebp
+      sub   esp, 12
+      pop   ecx
+      pop   edx
+      pop   eax
+      pop   ebp
+      ret
+    }
+  }
+
+# endif // _MSC_VER
+
+#else // Not an i386 host
+  void X86CompilationCallback() {
+    llvm_unreachable("Cannot call X86CompilationCallback() on a non-x86 arch!");
+  }
+#endif
+}
+
+/// X86CompilationCallback2 - This is the target-specific function invoked by the
+/// function stub when we did not know the real target of a call.  This function
+/// must locate the start of the stub or call site and pass it into the JIT
+/// compiler function.
+extern "C" {
+#if !(defined (X86_64_JIT) && defined(_MSC_VER))
+ // the following function is called only from this translation unit,
+ // unless we are under 64bit Windows with MSC, where there is
+ // no support for inline assembly
+static
+#endif
+void LLVM_ATTRIBUTE_USED
+X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
+  intptr_t *RetAddrLoc = &StackPtr[1];
+  assert(*RetAddrLoc == RetAddr &&
+         "Could not find return address on the stack!");
+
+  // It's a stub if there is an interrupt marker after the call.
+  bool isStub = ((unsigned char*)RetAddr)[0] == 0xCE;
+
+  // The call instruction should have pushed the return value onto the stack...
+#if defined (X86_64_JIT)
+  RetAddr--;     // Backtrack to the reference itself...
+#else
+  RetAddr -= 4;  // Backtrack to the reference itself...
+#endif
+
+#if 0
+  DEBUG(dbgs() << "In callback! Addr=" << (void*)RetAddr
+               << " ESP=" << (void*)StackPtr
+               << ": Resolving call to function: "
+               << TheVM->getFunctionReferencedName((void*)RetAddr) << "\n");
+#endif
+
+  // Sanity check to make sure this really is a call instruction.
+#if defined (X86_64_JIT)
+  assert(((unsigned char*)RetAddr)[-2] == 0x41 &&"Not a call instr!");
+  assert(((unsigned char*)RetAddr)[-1] == 0xFF &&"Not a call instr!");
+#else
+  assert(((unsigned char*)RetAddr)[-1] == 0xE8 &&"Not a call instr!");
+#endif
+
+  intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)RetAddr);
+
+  // Rewrite the call target... so that we don't end up here every time we
+  // execute the call.
+#if defined (X86_64_JIT)
+  assert(isStub &&
+         "X86-64 doesn't support rewriting non-stub lazy compilation calls:"
+         " the call instruction varies too much.");
+#else
+  *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
+#endif
+
+  if (isStub) {
+    // If this is a stub, rewrite the call into an unconditional branch
+    // instruction so that two return addresses are not pushed onto the stack
+    // when the requested function finally gets called.  This also makes the
+    // 0xCE byte (interrupt) dead, so the marker doesn't effect anything.
+#if defined (X86_64_JIT)
+    // If the target address is within 32-bit range of the stub, use a
+    // PC-relative branch instead of loading the actual address.  (This is
+    // considerably shorter than the 64-bit immediate load already there.)
+    // We assume here intptr_t is 64 bits.
+    intptr_t diff = NewVal-RetAddr+7;
+    if (diff >= -2147483648LL && diff <= 2147483647LL) {
+      *(unsigned char*)(RetAddr-0xc) = 0xE9;
+      *(intptr_t *)(RetAddr-0xb) = diff & 0xffffffff;
+    } else {
+      *(intptr_t *)(RetAddr - 0xa) = NewVal;
+      ((unsigned char*)RetAddr)[0] = (2 | (4 << 3) | (3 << 6));
+    }
+    sys::ValgrindDiscardTranslations((void*)(RetAddr-0xc), 0xd);
+#else
+    ((unsigned char*)RetAddr)[-1] = 0xE9;
+    sys::ValgrindDiscardTranslations((void*)(RetAddr-1), 5);
+#endif
+  }
+
+  // Change the return address to reexecute the call instruction...
+#if defined (X86_64_JIT)
+  *RetAddrLoc -= 0xd;
+#else
+  *RetAddrLoc -= 5;
+#endif
+}
+}
+
+TargetJITInfo::LazyResolverFn
+X86JITInfo::getLazyResolverFunction(JITCompilerFn F) {
+  JITCompilerFunction = F;
+
+#if defined (X86_32_JIT) && !defined (_MSC_VER)
+  if (Subtarget->hasSSE1())
+    return X86CompilationCallback_SSE;
+#endif
+
+  return X86CompilationCallback;
+}
+
+X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) {
+  Subtarget = &TM.getSubtarget<X86Subtarget>();
+  useGOT = 0;
+  TLSOffset = 0;
+}
+
+void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+                                             JITCodeEmitter &JCE) {
+#if defined (X86_64_JIT)
+  const unsigned Alignment = 8;
+  uint8_t Buffer[8];
+  uint8_t *Cur = Buffer;
+  MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(intptr_t)ptr);
+  MachineCodeEmitter::emitWordLEInto(Cur, (unsigned)(((intptr_t)ptr) >> 32));
+#else
+  const unsigned Alignment = 4;
+  uint8_t Buffer[4];
+  uint8_t *Cur = Buffer;
+  MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)ptr);
+#endif
+  return JCE.allocIndirectGV(GV, Buffer, sizeof(Buffer), Alignment);
+}
+
+TargetJITInfo::StubLayout X86JITInfo::getStubLayout() {
+  // The 64-bit stub contains:
+  //   movabs r10 <- 8-byte-target-address  # 10 bytes
+  //   call|jmp *r10  # 3 bytes
+  // The 32-bit stub contains a 5-byte call|jmp.
+  // If the stub is a call to the compilation callback, an extra byte is added
+  // to mark it as a stub.
+  StubLayout Result = {14, 4};
+  return Result;
+}
+
+void *X86JITInfo::emitFunctionStub(const Function* F, void *Target,
+                                   JITCodeEmitter &JCE) {
+  // Note, we cast to intptr_t here to silence a -pedantic warning that
+  // complains about casting a function pointer to a normal pointer.
+#if defined (X86_32_JIT) && !defined (_MSC_VER)
+  bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback &&
+                Target != (void*)(intptr_t)X86CompilationCallback_SSE);
+#else
+  bool NotCC = Target != (void*)(intptr_t)X86CompilationCallback;
+#endif
+  JCE.emitAlignment(4);
+  void *Result = (void*)JCE.getCurrentPCValue();
+  if (NotCC) {
+#if defined (X86_64_JIT)
+    JCE.emitByte(0x49);          // REX prefix
+    JCE.emitByte(0xB8+2);        // movabsq r10
+    JCE.emitWordLE((unsigned)(intptr_t)Target);
+    JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
+    JCE.emitByte(0x41);          // REX prefix
+    JCE.emitByte(0xFF);          // jmpq *r10
+    JCE.emitByte(2 | (4 << 3) | (3 << 6));
+#else
+    JCE.emitByte(0xE9);
+    JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
+#endif
+    return Result;
+  }
+
+#if defined (X86_64_JIT)
+  JCE.emitByte(0x49);          // REX prefix
+  JCE.emitByte(0xB8+2);        // movabsq r10
+  JCE.emitWordLE((unsigned)(intptr_t)Target);
+  JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32));
+  JCE.emitByte(0x41);          // REX prefix
+  JCE.emitByte(0xFF);          // callq *r10
+  JCE.emitByte(2 | (2 << 3) | (3 << 6));
+#else
+  JCE.emitByte(0xE8);   // Call with 32 bit pc-rel destination...
+
+  JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4);
+#endif
+
+  // This used to use 0xCD, but that value is used by JITMemoryManager to
+  // initialize the buffer with garbage, which means it may follow a
+  // noreturn function call, confusing X86CompilationCallback2.  PR 4929.
+  JCE.emitByte(0xCE);   // Interrupt - Just a marker identifying the stub!
+  return Result;
+}
+
+/// getPICJumpTableEntry - Returns the value of the jumptable entry for the
+/// specific basic block.
+uintptr_t X86JITInfo::getPICJumpTableEntry(uintptr_t BB, uintptr_t Entry) {
+#if defined(X86_64_JIT)
+  return BB - Entry;
+#else
+  return BB - PICBase;
+#endif
+}
+
+/// relocate - Before the JIT can run a block of code that has been emitted,
+/// it must rewrite the code to contain the actual addresses of any
+/// referenced global symbols.
+void X86JITInfo::relocate(void *Function, MachineRelocation *MR,
+                          unsigned NumRelocs, unsigned char* GOTBase) {
+  for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
+    void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
+    intptr_t ResultPtr = (intptr_t)MR->getResultPointer();
+    switch ((X86::RelocationType)MR->getRelocationType()) {
+    case X86::reloc_pcrel_word: {
+      // PC relative relocation, add the relocated value to the value already in
+      // memory, after we adjust it for where the PC is.
+      ResultPtr = ResultPtr -(intptr_t)RelocPos - 4 - MR->getConstantVal();
+      *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+      break;
+    }
+    case X86::reloc_picrel_word: {
+      // PIC base relative relocation, add the relocated value to the value
+      // already in memory, after we adjust it for where the PIC base is.
+      ResultPtr = ResultPtr - ((intptr_t)Function + MR->getConstantVal());
+      *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+      break;
+    }
+    case X86::reloc_absolute_word:
+    case X86::reloc_absolute_word_sext:
+      // Absolute relocation, just add the relocated value to the value already
+      // in memory.
+      *((unsigned*)RelocPos) += (unsigned)ResultPtr;
+      break;
+    case X86::reloc_absolute_dword:
+      *((intptr_t*)RelocPos) += ResultPtr;
+      break;
+    }
+  }
+}
+
+char* X86JITInfo::allocateThreadLocalMemory(size_t size) {
+#if defined(X86_32_JIT) && !defined(__APPLE__) && !defined(_MSC_VER)
+  TLSOffset -= size;
+  return TLSOffset;
+#else
+  llvm_unreachable("Cannot allocate thread local storage on this arch!");
+  return 0;
+#endif
+}
diff --git a/final/lib/Target/X86/X86JITInfo.h b/final/lib/Target/X86/X86JITInfo.h
new file mode 100644
index 00000000000..238420c236b
--- /dev/null
+++ b/final/lib/Target/X86/X86JITInfo.h
@@ -0,0 +1,81 @@
+//===- X86JITInfo.h - X86 implementation of the JIT interface  --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetJITInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86JITINFO_H
+#define X86JITINFO_H
+
+#include "llvm/Function.h"
+#include "llvm/CodeGen/JITCodeEmitter.h"
+#include "llvm/Target/TargetJITInfo.h"
+
+namespace llvm {
+  class X86TargetMachine;
+  class X86Subtarget;
+
+  class X86JITInfo : public TargetJITInfo {
+    X86TargetMachine &TM;
+    const X86Subtarget *Subtarget;
+    uintptr_t PICBase;
+    char* TLSOffset;
+  public:
+    explicit X86JITInfo(X86TargetMachine &tm);
+
+    /// replaceMachineCodeForFunction - Make it so that calling the function
+    /// whose machine code is at OLD turns into a call to NEW, perhaps by
+    /// overwriting OLD with a branch to NEW.  This is used for self-modifying
+    /// code.
+    ///
+    virtual void replaceMachineCodeForFunction(void *Old, void *New);
+
+    /// emitGlobalValueIndirectSym - Use the specified JITCodeEmitter object
+    /// to emit an indirect symbol which contains the address of the specified
+    /// ptr.
+    virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr,
+                                             JITCodeEmitter &JCE);
+
+    // getStubLayout - Returns the size and alignment of the largest call stub
+    // on X86.
+    virtual StubLayout getStubLayout();
+
+    /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a
+    /// small native function that simply calls the function at the specified
+    /// address.
+    virtual void *emitFunctionStub(const Function* F, void *Target,
+                                   JITCodeEmitter &JCE);
+
+    /// getPICJumpTableEntry - Returns the value of the jumptable entry for the
+    /// specific basic block.
+    virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase);
+
+    /// getLazyResolverFunction - Expose the lazy resolver to the JIT.
+    virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn);
+
+    /// relocate - Before the JIT can run a block of code that has been emitted,
+    /// it must rewrite the code to contain the actual addresses of any
+    /// referenced global symbols.
+    virtual void relocate(void *Function, MachineRelocation *MR,
+                          unsigned NumRelocs, unsigned char* GOTBase);
+    
+    /// allocateThreadLocalMemory - Each target has its own way of
+    /// handling thread local variables. This method returns a value only
+    /// meaningful to the target.
+    virtual char* allocateThreadLocalMemory(size_t size);
+
+    /// setPICBase / getPICBase - Getter / setter of PICBase, used to compute
+    /// PIC jumptable entry.
+    void setPICBase(uintptr_t Base) { PICBase = Base; }
+    uintptr_t getPICBase() const { return PICBase; }
+  };
+}
+
+#endif
diff --git a/final/lib/Target/X86/X86MCAsmInfo.cpp b/final/lib/Target/X86/X86MCAsmInfo.cpp
new file mode 100644
index 00000000000..6686214e06f
--- /dev/null
+++ b/final/lib/Target/X86/X86MCAsmInfo.cpp
@@ -0,0 +1,116 @@
+//===-- X86MCAsmInfo.cpp - X86 asm properties -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the X86MCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCAsmInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+enum AsmWriterFlavorTy {
+  // Note: This numbering has to match the GCC assembler dialects for inline
+  // asm alternatives to work right.
+  ATT = 0, Intel = 1
+};
+
+static cl::opt<AsmWriterFlavorTy>
+AsmWriterFlavor("x86-asm-syntax", cl::init(ATT),
+  cl::desc("Choose style of code to emit from X86 backend:"),
+  cl::values(clEnumValN(ATT,   "att",   "Emit AT&T-style assembly"),
+             clEnumValN(Intel, "intel", "Emit Intel-style assembly"),
+             clEnumValEnd));
+
+
+static const char *const x86_asm_table[] = {
+  "{si}", "S",
+  "{di}", "D",
+  "{ax}", "a",
+  "{cx}", "c",
+  "{memory}", "memory",
+  "{flags}", "",
+  "{dirflag}", "",
+  "{fpsr}", "",
+  "{cc}", "cc",
+  0,0};
+
+X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
+  AsmTransCBE = x86_asm_table;
+  AssemblerDialect = AsmWriterFlavor;
+    
+  bool is64Bit = Triple.getArch() == Triple::x86_64;
+
+  TextAlignFillValue = 0x90;
+
+  if (!is64Bit)
+    Data64bitsDirective = 0;       // we can't emit a 64-bit unit
+
+  // Use ## as a comment string so that .s files generated by llvm can go
+  // through the GCC preprocessor without causing an error.  This is needed
+  // because "clang foo.s" runs the C preprocessor, which is usually reserved
+  // for .S files on other systems.  Perhaps this is because the file system
+  // wasn't always case preserving or something.
+  CommentString = "##";
+  PCSymbol = ".";
+
+  SupportsDebugInformation = true;
+  DwarfUsesInlineInfoSection = true;
+
+  // Exceptions handling
+  ExceptionsType = ExceptionHandling::DwarfTable;
+}
+
+X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
+  AsmTransCBE = x86_asm_table;
+  AssemblerDialect = AsmWriterFlavor;
+
+  TextAlignFillValue = 0x90;
+
+  PrivateGlobalPrefix = ".L";
+  WeakRefDirective = "\t.weak\t";
+  PCSymbol = ".";
+
+  // Set up DWARF directives
+  HasLEB128 = true;  // Target asm supports leb128 directives (little-endian)
+
+  // Debug Information
+  SupportsDebugInformation = true;
+
+  // Exceptions handling
+  ExceptionsType = ExceptionHandling::DwarfTable;
+
+  // OpenBSD has buggy support for .quad in 32-bit mode, just split into two
+  // .words.
+  if (T.getOS() == Triple::OpenBSD && T.getArch() == Triple::x86)
+    Data64bitsDirective = 0;
+}
+
+const MCSection *X86ELFMCAsmInfo::
+getNonexecutableStackSection(MCContext &Ctx) const {
+  return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
+                           0, SectionKind::getMetadata());
+}
+
+X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
+  if (Triple.getArch() == Triple::x86_64) {
+    GlobalPrefix = "";
+    PrivateGlobalPrefix = ".L";
+  }
+
+  AsmTransCBE = x86_asm_table;
+  AssemblerDialect = AsmWriterFlavor;
+
+  TextAlignFillValue = 0x90;
+}
diff --git a/final/lib/Target/X86/X86MCAsmInfo.h b/final/lib/Target/X86/X86MCAsmInfo.h
new file mode 100644
index 00000000000..581522567d0
--- /dev/null
+++ b/final/lib/Target/X86/X86MCAsmInfo.h
@@ -0,0 +1,38 @@
+//=====-- X86MCAsmInfo.h - X86 asm properties -----------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the X86MCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETASMINFO_H
+#define X86TARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCAsmInfoCOFF.h"
+#include "llvm/MC/MCAsmInfoDarwin.h"
+
+namespace llvm {
+  class Triple;
+
+  struct X86MCAsmInfoDarwin : public MCAsmInfoDarwin {
+    explicit X86MCAsmInfoDarwin(const Triple &Triple);
+  };
+
+  struct X86ELFMCAsmInfo : public MCAsmInfo {
+    explicit X86ELFMCAsmInfo(const Triple &Triple);
+    virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
+  };
+
+  struct X86MCAsmInfoCOFF : public MCAsmInfoCOFF {
+    explicit X86MCAsmInfoCOFF(const Triple &Triple);
+  };
+} // namespace llvm
+
+#endif
diff --git a/final/lib/Target/X86/X86MCCodeEmitter.cpp b/final/lib/Target/X86/X86MCCodeEmitter.cpp
new file mode 100644
index 00000000000..0e3b5711f2b
--- /dev/null
+++ b/final/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -0,0 +1,1033 @@
+//===-- X86/X86MCCodeEmitter.cpp - Convert X86 code to machine code -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86MCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mccodeemitter"
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86FixupKinds.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+class X86MCCodeEmitter : public MCCodeEmitter {
+  X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
+  void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
+  const TargetMachine &TM;
+  const TargetInstrInfo &TII;
+  MCContext &Ctx;
+  bool Is64BitMode;
+public:
+  X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit)
+    : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) {
+    Is64BitMode = is64Bit;
+  }
+
+  ~X86MCCodeEmitter() {}
+
+  static unsigned GetX86RegNum(const MCOperand &MO) {
+    return X86RegisterInfo::getX86RegNum(MO.getReg());
+  }
+
+  // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
+  // 0-7 and the difference between the 2 groups is given by the REX prefix.
+  // In the VEX prefix, registers are seen sequencially from 0-15 and encoded
+  // in 1's complement form, example:
+  //
+  //  ModRM field => XMM9 => 1
+  //  VEX.VVVV    => XMM9 => ~9
+  //
+  // See table 4-35 of Intel AVX Programming Reference for details.
+  static unsigned char getVEXRegisterEncoding(const MCInst &MI,
+                                              unsigned OpNum) {
+    unsigned SrcReg = MI.getOperand(OpNum).getReg();
+    unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum));
+    if ((SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) ||
+        (SrcReg >= X86::YMM8 && SrcReg <= X86::YMM15))
+      SrcRegNum += 8;
+
+    // The registers represented through VEX_VVVV should
+    // be encoded in 1's complement form.
+    return (~SrcRegNum) & 0xf;
+  }
+
+  void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
+    OS << (char)C;
+    ++CurByte;
+  }
+
+  void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
+                    raw_ostream &OS) const {
+    // Output the constant in little endian byte order.
+    for (unsigned i = 0; i != Size; ++i) {
+      EmitByte(Val & 255, CurByte, OS);
+      Val >>= 8;
+    }
+  }
+
+  void EmitImmediate(const MCOperand &Disp,
+                     unsigned ImmSize, MCFixupKind FixupKind,
+                     unsigned &CurByte, raw_ostream &OS,
+                     SmallVectorImpl<MCFixup> &Fixups,
+                     int ImmOffset = 0) const;
+
+  inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
+                                        unsigned RM) {
+    assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
+    return RM | (RegOpcode << 3) | (Mod << 6);
+  }
+
+  void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld,
+                        unsigned &CurByte, raw_ostream &OS) const {
+    EmitByte(ModRMByte(3, RegOpcodeFld, GetX86RegNum(ModRMReg)), CurByte, OS);
+  }
+
+  void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base,
+                   unsigned &CurByte, raw_ostream &OS) const {
+    // SIB byte is in the same format as the ModRMByte.
+    EmitByte(ModRMByte(SS, Index, Base), CurByte, OS);
+  }
+
+
+  void EmitMemModRMByte(const MCInst &MI, unsigned Op,
+                        unsigned RegOpcodeField,
+                        uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS,
+                        SmallVectorImpl<MCFixup> &Fixups) const;
+
+  void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups) const;
+
+  void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
+                           const MCInst &MI, const TargetInstrDesc &Desc,
+                           raw_ostream &OS) const;
+
+  void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte,
+                                 int MemOperand, const MCInst &MI,
+                                 raw_ostream &OS) const;
+
+  void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
+                        const MCInst &MI, const TargetInstrDesc &Desc,
+                        raw_ostream &OS) const;
+};
+
+} // end anonymous namespace
+
+
+MCCodeEmitter *llvm::createX86_32MCCodeEmitter(const Target &,
+                                               TargetMachine &TM,
+                                               MCContext &Ctx) {
+  return new X86MCCodeEmitter(TM, Ctx, false);
+}
+
+MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &,
+                                               TargetMachine &TM,
+                                               MCContext &Ctx) {
+  return new X86MCCodeEmitter(TM, Ctx, true);
+}
+
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit
+/// sign-extended field.
+static bool isDisp8(int Value) {
+  return Value == (signed char)Value;
+}
+
+/// getImmFixupKind - Return the appropriate fixup kind to use for an immediate
+/// in an instruction with the specified TSFlags.
+static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
+  unsigned Size = X86II::getSizeOfImm(TSFlags);
+  bool isPCRel = X86II::isImmPCRel(TSFlags);
+
+  return MCFixup::getKindForSize(Size, isPCRel);
+}
+
+/// Is32BitMemOperand - Return true if the specified instruction with a memory
+/// operand should emit the 0x67 prefix byte in 64-bit mode due to a 32-bit
+/// memory operand.  Op specifies the operand # of the memoperand.
+static bool Is32BitMemOperand(const MCInst &MI, unsigned Op) {
+  const MCOperand &BaseReg  = MI.getOperand(Op+X86::AddrBaseReg);
+  const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+  
+  if ((BaseReg.getReg() != 0 && X86::GR32RegClass.contains(BaseReg.getReg())) ||
+      (IndexReg.getReg() != 0 && X86::GR32RegClass.contains(IndexReg.getReg())))
+    return true;
+  return false;
+}
+
+/// StartsWithGlobalOffsetTable - Return true for the simple cases where this
+/// expression starts with _GLOBAL_OFFSET_TABLE_. This is a needed to support
+/// PIC on ELF i386 as that symbol is magic. We check only simple case that
+/// are know to be used: _GLOBAL_OFFSET_TABLE_ by itself or at the start
+/// of a binary expression.
+static bool StartsWithGlobalOffsetTable(const MCExpr *Expr) {
+  if (Expr->getKind() == MCExpr::Binary) {
+    const MCBinaryExpr *BE = static_cast<const MCBinaryExpr *>(Expr);
+    Expr = BE->getLHS();
+  }
+
+  if (Expr->getKind() != MCExpr::SymbolRef)
+    return false;
+
+  const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr*>(Expr);
+  const MCSymbol &S = Ref->getSymbol();
+  return S.getName() == "_GLOBAL_OFFSET_TABLE_";
+}
+
+void X86MCCodeEmitter::
+EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
+              unsigned &CurByte, raw_ostream &OS,
+              SmallVectorImpl<MCFixup> &Fixups, int ImmOffset) const {
+  const MCExpr *Expr = NULL;
+  if (DispOp.isImm()) {
+    // If this is a simple integer displacement that doesn't require a relocation,
+    // emit it now.
+    if (FixupKind != FK_PCRel_1 &&
+	FixupKind != FK_PCRel_2 &&
+	FixupKind != FK_PCRel_4) {
+      EmitConstant(DispOp.getImm()+ImmOffset, Size, CurByte, OS);
+      return;
+    }
+    Expr = MCConstantExpr::Create(DispOp.getImm(), Ctx);
+  } else {
+    Expr = DispOp.getExpr();
+  }
+
+  // If we have an immoffset, add it to the expression.
+  if (FixupKind == FK_Data_4 && StartsWithGlobalOffsetTable(Expr)) {
+    assert(ImmOffset == 0);
+
+    FixupKind = MCFixupKind(X86::reloc_global_offset_table);
+    ImmOffset = CurByte;
+  }
+
+  // If the fixup is pc-relative, we need to bias the value to be relative to
+  // the start of the field, not the end of the field.
+  if (FixupKind == FK_PCRel_4 ||
+      FixupKind == MCFixupKind(X86::reloc_riprel_4byte) ||
+      FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load))
+    ImmOffset -= 4;
+  if (FixupKind == FK_PCRel_2)
+    ImmOffset -= 2;
+  if (FixupKind == FK_PCRel_1)
+    ImmOffset -= 1;
+
+  if (ImmOffset)
+    Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx),
+                                   Ctx);
+
+  // Emit a symbolic constant as a fixup and 4 zeros.
+  Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind));
+  EmitConstant(0, Size, CurByte, OS);
+}
+
+void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
+                                        unsigned RegOpcodeField,
+                                        uint64_t TSFlags, unsigned &CurByte,
+                                        raw_ostream &OS,
+                                        SmallVectorImpl<MCFixup> &Fixups) const{
+  const MCOperand &Disp     = MI.getOperand(Op+X86::AddrDisp);
+  const MCOperand &Base     = MI.getOperand(Op+X86::AddrBaseReg);
+  const MCOperand &Scale    = MI.getOperand(Op+X86::AddrScaleAmt);
+  const MCOperand &IndexReg = MI.getOperand(Op+X86::AddrIndexReg);
+  unsigned BaseReg = Base.getReg();
+
+  // Handle %rip relative addressing.
+  if (BaseReg == X86::RIP) {    // [disp32+RIP] in X86-64 mode
+    assert(Is64BitMode && "Rip-relative addressing requires 64-bit mode");
+    assert(IndexReg.getReg() == 0 && "Invalid rip-relative address");
+    EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
+
+    unsigned FixupKind = X86::reloc_riprel_4byte;
+
+    // movq loads are handled with a special relocation form which allows the
+    // linker to eliminate some loads for GOT references which end up in the
+    // same linkage unit.
+    if (MI.getOpcode() == X86::MOV64rm)
+      FixupKind = X86::reloc_riprel_4byte_movq_load;
+
+    // rip-relative addressing is actually relative to the *next* instruction.
+    // Since an immediate can follow the mod/rm byte for an instruction, this
+    // means that we need to bias the immediate field of the instruction with
+    // the size of the immediate field.  If we have this case, add it into the
+    // expression to emit.
+    int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0;
+
+    EmitImmediate(Disp, 4, MCFixupKind(FixupKind),
+                  CurByte, OS, Fixups, -ImmSize);
+    return;
+  }
+
+  unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U;
+
+  // Determine whether a SIB byte is needed.
+  // If no BaseReg, issue a RIP relative instruction only if the MCE can
+  // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
+  // 2-7) and absolute references.
+
+  if (// The SIB byte must be used if there is an index register.
+      IndexReg.getReg() == 0 &&
+      // The SIB byte must be used if the base is ESP/RSP/R12, all of which
+      // encode to an R/M value of 4, which indicates that a SIB byte is
+      // present.
+      BaseRegNo != N86::ESP &&
+      // If there is no base register and we're in 64-bit mode, we need a SIB
+      // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
+      (!Is64BitMode || BaseReg != 0)) {
+
+    if (BaseReg == 0) {          // [disp32]     in X86-32 mode
+      EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
+      EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
+      return;
+    }
+
+    // If the base is not EBP/ESP and there is no displacement, use simple
+    // indirect register encoding, this handles addresses like [EAX].  The
+    // encoding for [EBP] with no displacement means [disp32] so we handle it
+    // by emitting a displacement of 0 below.
+    if (Disp.isImm() && Disp.getImm() == 0 && BaseRegNo != N86::EBP) {
+      EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
+      return;
+    }
+
+    // Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
+    if (Disp.isImm() && isDisp8(Disp.getImm())) {
+      EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
+      EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
+      return;
+    }
+
+    // Otherwise, emit the most general non-SIB encoding: [REG+disp32]
+    EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
+    EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+                  Fixups);
+    return;
+  }
+
+  // We need a SIB byte, so start by outputting the ModR/M byte first
+  assert(IndexReg.getReg() != X86::ESP &&
+         IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
+
+  bool ForceDisp32 = false;
+  bool ForceDisp8  = false;
+  if (BaseReg == 0) {
+    // If there is no base register, we emit the special case SIB byte with
+    // MOD=0, BASE=5, to JUST get the index, scale, and displacement.
+    EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS);
+    ForceDisp32 = true;
+  } else if (!Disp.isImm()) {
+    // Emit the normal disp32 encoding.
+    EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
+    ForceDisp32 = true;
+  } else if (Disp.getImm() == 0 &&
+             // Base reg can't be anything that ends up with '5' as the base
+             // reg, it is the magic [*] nomenclature that indicates no base.
+             BaseRegNo != N86::EBP) {
+    // Emit no displacement ModR/M byte
+    EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS);
+  } else if (isDisp8(Disp.getImm())) {
+    // Emit the disp8 encoding.
+    EmitByte(ModRMByte(1, RegOpcodeField, 4), CurByte, OS);
+    ForceDisp8 = true;           // Make sure to force 8 bit disp if Base=EBP
+  } else {
+    // Emit the normal disp32 encoding.
+    EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
+  }
+
+  // Calculate what the SS field value should be...
+  static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
+  unsigned SS = SSTable[Scale.getImm()];
+
+  if (BaseReg == 0) {
+    // Handle the SIB byte for the case where there is no base, see Intel
+    // Manual 2A, table 2-7. The displacement has already been output.
+    unsigned IndexRegNo;
+    if (IndexReg.getReg())
+      IndexRegNo = GetX86RegNum(IndexReg);
+    else // Examples: [ESP+1*<noreg>+4] or [scaled idx]+disp32 (MOD=0,BASE=5)
+      IndexRegNo = 4;
+    EmitSIBByte(SS, IndexRegNo, 5, CurByte, OS);
+  } else {
+    unsigned IndexRegNo;
+    if (IndexReg.getReg())
+      IndexRegNo = GetX86RegNum(IndexReg);
+    else
+      IndexRegNo = 4;   // For example [ESP+1*<noreg>+4]
+    EmitSIBByte(SS, IndexRegNo, GetX86RegNum(Base), CurByte, OS);
+  }
+
+  // Do we need to output a displacement?
+  if (ForceDisp8)
+    EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
+  else if (ForceDisp32 || Disp.getImm() != 0)
+    EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_signed_4byte), CurByte, OS,
+                  Fixups);
+}
+
+/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
+/// called VEX.
+void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+                                           int MemOperand, const MCInst &MI,
+                                           const TargetInstrDesc &Desc,
+                                           raw_ostream &OS) const {
+  bool HasVEX_4V = false;
+  if ((TSFlags >> 32) & X86II::VEX_4V)
+    HasVEX_4V = true;
+
+  // VEX_R: opcode externsion equivalent to REX.R in
+  // 1's complement (inverted) form
+  //
+  //  1: Same as REX_R=0 (must be 1 in 32-bit mode)
+  //  0: Same as REX_R=1 (64 bit mode only)
+  //
+  unsigned char VEX_R = 0x1;
+
+  // VEX_X: equivalent to REX.X, only used when a
+  // register is used for index in SIB Byte.
+  //
+  //  1: Same as REX.X=0 (must be 1 in 32-bit mode)
+  //  0: Same as REX.X=1 (64-bit mode only)
+  unsigned char VEX_X = 0x1;
+
+  // VEX_B:
+  //
+  //  1: Same as REX_B=0 (ignored in 32-bit mode)
+  //  0: Same as REX_B=1 (64 bit mode only)
+  //
+  unsigned char VEX_B = 0x1;
+
+  // VEX_W: opcode specific (use like REX.W, or used for
+  // opcode extension, or ignored, depending on the opcode byte)
+  unsigned char VEX_W = 0;
+
+  // VEX_5M (VEX m-mmmmm field):
+  //
+  //  0b00000: Reserved for future use
+  //  0b00001: implied 0F leading opcode
+  //  0b00010: implied 0F 38 leading opcode bytes
+  //  0b00011: implied 0F 3A leading opcode bytes
+  //  0b00100-0b11111: Reserved for future use
+  //
+  unsigned char VEX_5M = 0x1;
+
+  // VEX_4V (VEX vvvv field): a register specifier
+  // (in 1's complement form) or 1111 if unused.
+  unsigned char VEX_4V = 0xf;
+
+  // VEX_L (Vector Length):
+  //
+  //  0: scalar or 128-bit vector
+  //  1: 256-bit vector
+  //
+  unsigned char VEX_L = 0;
+
+  // VEX_PP: opcode extension providing equivalent
+  // functionality of a SIMD prefix
+  //
+  //  0b00: None
+  //  0b01: 66
+  //  0b10: F3
+  //  0b11: F2
+  //
+  unsigned char VEX_PP = 0;
+
+  // Encode the operand size opcode prefix as needed.
+  if (TSFlags & X86II::OpSize)
+    VEX_PP = 0x01;
+
+  if ((TSFlags >> 32) & X86II::VEX_W)
+    VEX_W = 1;
+
+  if ((TSFlags >> 32) & X86II::VEX_L)
+    VEX_L = 1;
+
+  switch (TSFlags & X86II::Op0Mask) {
+  default: assert(0 && "Invalid prefix!");
+  case X86II::T8:  // 0F 38
+    VEX_5M = 0x2;
+    break;
+  case X86II::TA:  // 0F 3A
+    VEX_5M = 0x3;
+    break;
+  case X86II::TF:  // F2 0F 38
+    VEX_PP = 0x3;
+    VEX_5M = 0x2;
+    break;
+  case X86II::XS:  // F3 0F
+    VEX_PP = 0x2;
+    break;
+  case X86II::XD:  // F2 0F
+    VEX_PP = 0x3;
+    break;
+  case X86II::TB:  // Bypass: Not used by VEX
+  case 0:
+    break;  // No prefix!
+  }
+
+  // Set the vector length to 256-bit if YMM0-YMM15 is used
+  for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
+    if (!MI.getOperand(i).isReg())
+      continue;
+    unsigned SrcReg = MI.getOperand(i).getReg();
+    if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15)
+      VEX_L = 1;
+  }
+
+  unsigned NumOps = MI.getNumOperands();
+  unsigned CurOp = 0;
+  bool IsDestMem = false;
+
+  switch (TSFlags & X86II::FormMask) {
+  case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+  case X86II::MRMDestMem:
+    IsDestMem = true;
+    // The important info for the VEX prefix is never beyond the address
+    // registers. Don't check beyond that.
+    NumOps = CurOp = X86::AddrNumOperands;
+  case X86II::MRM0m: case X86II::MRM1m:
+  case X86II::MRM2m: case X86II::MRM3m:
+  case X86II::MRM4m: case X86II::MRM5m:
+  case X86II::MRM6m: case X86II::MRM7m:
+  case X86II::MRMSrcMem:
+  case X86II::MRMSrcReg:
+    if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+      VEX_R = 0x0;
+    CurOp++;
+
+    if (HasVEX_4V) {
+      VEX_4V = getVEXRegisterEncoding(MI, IsDestMem ? CurOp-1 : CurOp);
+      CurOp++;
+    }
+
+    // To only check operands before the memory address ones, start
+    // the search from the begining
+    if (IsDestMem)
+      CurOp = 0;
+
+    // If the last register should be encoded in the immediate field
+    // do not use any bit from VEX prefix to this register, ignore it
+    if ((TSFlags >> 32) & X86II::VEX_I8IMM)
+      NumOps--;
+
+    for (; CurOp != NumOps; ++CurOp) {
+      const MCOperand &MO = MI.getOperand(CurOp);
+      if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+        VEX_B = 0x0;
+      if (!VEX_B && MO.isReg() &&
+          ((TSFlags & X86II::FormMask) == X86II::MRMSrcMem) &&
+          X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+        VEX_X = 0x0;
+    }
+    break;
+  default: // MRMDestReg, MRM0r-MRM7r, RawFrm
+    if (!MI.getNumOperands())
+      break;
+
+    if (MI.getOperand(CurOp).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+      VEX_B = 0;
+
+    if (HasVEX_4V)
+      VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+
+    CurOp++;
+    for (; CurOp != NumOps; ++CurOp) {
+      const MCOperand &MO = MI.getOperand(CurOp);
+      if (MO.isReg() && !HasVEX_4V &&
+          X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+        VEX_R = 0x0;
+    }
+    break;
+  }
+
+  // Emit segment override opcode prefix as needed.
+  EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS);
+
+  // VEX opcode prefix can have 2 or 3 bytes
+  //
+  //  3 bytes:
+  //    +-----+ +--------------+ +-------------------+
+  //    | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
+  //    +-----+ +--------------+ +-------------------+
+  //  2 bytes:
+  //    +-----+ +-------------------+
+  //    | C5h | | R | vvvv | L | pp |
+  //    +-----+ +-------------------+
+  //
+  unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
+
+  if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix
+    EmitByte(0xC5, CurByte, OS);
+    EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
+    return;
+  }
+
+  // 3 byte VEX prefix
+  EmitByte(0xC4, CurByte, OS);
+  EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
+  EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
+}
+
+/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
+/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
+/// size, and 3) use of X86-64 extended registers.
+static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
+                                   const TargetInstrDesc &Desc) {
+  unsigned REX = 0;
+  if (TSFlags & X86II::REX_W)
+    REX |= 1 << 3; // set REX.W
+
+  if (MI.getNumOperands() == 0) return REX;
+
+  unsigned NumOps = MI.getNumOperands();
+  // FIXME: MCInst should explicitize the two-addrness.
+  bool isTwoAddr = NumOps > 1 &&
+                      Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
+
+  // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+  unsigned i = isTwoAddr ? 1 : 0;
+  for (; i != NumOps; ++i) {
+    const MCOperand &MO = MI.getOperand(i);
+    if (!MO.isReg()) continue;
+    unsigned Reg = MO.getReg();
+    if (!X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) continue;
+    // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything
+    // that returns non-zero.
+    REX |= 0x40; // REX fixed encoding prefix
+    break;
+  }
+
+  switch (TSFlags & X86II::FormMask) {
+  case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+  case X86II::MRMSrcReg:
+    if (MI.getOperand(0).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+      REX |= 1 << 2; // set REX.R
+    i = isTwoAddr ? 2 : 1;
+    for (; i != NumOps; ++i) {
+      const MCOperand &MO = MI.getOperand(i);
+      if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+        REX |= 1 << 0; // set REX.B
+    }
+    break;
+  case X86II::MRMSrcMem: {
+    if (MI.getOperand(0).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+      REX |= 1 << 2; // set REX.R
+    unsigned Bit = 0;
+    i = isTwoAddr ? 2 : 1;
+    for (; i != NumOps; ++i) {
+      const MCOperand &MO = MI.getOperand(i);
+      if (MO.isReg()) {
+        if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+          REX |= 1 << Bit; // set REX.B (Bit=0) and REX.X (Bit=1)
+        Bit++;
+      }
+    }
+    break;
+  }
+  case X86II::MRM0m: case X86II::MRM1m:
+  case X86II::MRM2m: case X86II::MRM3m:
+  case X86II::MRM4m: case X86II::MRM5m:
+  case X86II::MRM6m: case X86II::MRM7m:
+  case X86II::MRMDestMem: {
+    unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
+    i = isTwoAddr ? 1 : 0;
+    if (NumOps > e && MI.getOperand(e).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e).getReg()))
+      REX |= 1 << 2; // set REX.R
+    unsigned Bit = 0;
+    for (; i != e; ++i) {
+      const MCOperand &MO = MI.getOperand(i);
+      if (MO.isReg()) {
+        if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+          REX |= 1 << Bit; // REX.B (Bit=0) and REX.X (Bit=1)
+        Bit++;
+      }
+    }
+    break;
+  }
+  default:
+    if (MI.getOperand(0).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+      REX |= 1 << 0; // set REX.B
+    i = isTwoAddr ? 2 : 1;
+    for (unsigned e = NumOps; i != e; ++i) {
+      const MCOperand &MO = MI.getOperand(i);
+      if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+        REX |= 1 << 2; // set REX.R
+    }
+    break;
+  }
+  return REX;
+}
+
+/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed
+void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags,
+                                        unsigned &CurByte, int MemOperand,
+                                        const MCInst &MI,
+                                        raw_ostream &OS) const {
+  switch (TSFlags & X86II::SegOvrMask) {
+  default: assert(0 && "Invalid segment!");
+  case 0:
+    // No segment override, check for explicit one on memory operand.
+    if (MemOperand != -1) {   // If the instruction has a memory operand.
+      switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) {
+      default: assert(0 && "Unknown segment register!");
+      case 0: break;
+      case X86::CS: EmitByte(0x2E, CurByte, OS); break;
+      case X86::SS: EmitByte(0x36, CurByte, OS); break;
+      case X86::DS: EmitByte(0x3E, CurByte, OS); break;
+      case X86::ES: EmitByte(0x26, CurByte, OS); break;
+      case X86::FS: EmitByte(0x64, CurByte, OS); break;
+      case X86::GS: EmitByte(0x65, CurByte, OS); break;
+      }
+    }
+    break;
+  case X86II::FS:
+    EmitByte(0x64, CurByte, OS);
+    break;
+  case X86II::GS:
+    EmitByte(0x65, CurByte, OS);
+    break;
+  }
+}
+
+/// EmitOpcodePrefix - Emit all instruction prefixes prior to the opcode.
+///
+/// MemOperand is the operand # of the start of a memory operand if present.  If
+/// Not present, it is -1.
+void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+                                        int MemOperand, const MCInst &MI,
+                                        const TargetInstrDesc &Desc,
+                                        raw_ostream &OS) const {
+
+  // Emit the lock opcode prefix as needed.
+  if (TSFlags & X86II::LOCK)
+    EmitByte(0xF0, CurByte, OS);
+
+  // Emit segment override opcode prefix as needed.
+  EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS);
+
+  // Emit the repeat opcode prefix as needed.
+  if ((TSFlags & X86II::Op0Mask) == X86II::REP)
+    EmitByte(0xF3, CurByte, OS);
+
+  // Emit the address size opcode prefix as needed.
+  if ((TSFlags & X86II::AdSize) ||
+      (MemOperand != -1 && Is64BitMode && Is32BitMemOperand(MI, MemOperand)))
+    EmitByte(0x67, CurByte, OS);
+  
+  // Emit the operand size opcode prefix as needed.
+  if (TSFlags & X86II::OpSize)
+    EmitByte(0x66, CurByte, OS);
+
+  bool Need0FPrefix = false;
+  switch (TSFlags & X86II::Op0Mask) {
+  default: assert(0 && "Invalid prefix!");
+  case 0: break;  // No prefix!
+  case X86II::REP: break; // already handled.
+  case X86II::TB:  // Two-byte opcode prefix
+  case X86II::T8:  // 0F 38
+  case X86II::TA:  // 0F 3A
+    Need0FPrefix = true;
+    break;
+  case X86II::TF: // F2 0F 38
+    EmitByte(0xF2, CurByte, OS);
+    Need0FPrefix = true;
+    break;
+  case X86II::XS:   // F3 0F
+    EmitByte(0xF3, CurByte, OS);
+    Need0FPrefix = true;
+    break;
+  case X86II::XD:   // F2 0F
+    EmitByte(0xF2, CurByte, OS);
+    Need0FPrefix = true;
+    break;
+  case X86II::D8: EmitByte(0xD8, CurByte, OS); break;
+  case X86II::D9: EmitByte(0xD9, CurByte, OS); break;
+  case X86II::DA: EmitByte(0xDA, CurByte, OS); break;
+  case X86II::DB: EmitByte(0xDB, CurByte, OS); break;
+  case X86II::DC: EmitByte(0xDC, CurByte, OS); break;
+  case X86II::DD: EmitByte(0xDD, CurByte, OS); break;
+  case X86II::DE: EmitByte(0xDE, CurByte, OS); break;
+  case X86II::DF: EmitByte(0xDF, CurByte, OS); break;
+  }
+
+  // Handle REX prefix.
+  // FIXME: Can this come before F2 etc to simplify emission?
+  if (Is64BitMode) {
+    if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc))
+      EmitByte(0x40 | REX, CurByte, OS);
+  }
+
+  // 0x0F escape code must be emitted just before the opcode.
+  if (Need0FPrefix)
+    EmitByte(0x0F, CurByte, OS);
+
+  // FIXME: Pull this up into previous switch if REX can be moved earlier.
+  switch (TSFlags & X86II::Op0Mask) {
+  case X86II::TF:    // F2 0F 38
+  case X86II::T8:    // 0F 38
+    EmitByte(0x38, CurByte, OS);
+    break;
+  case X86II::TA:    // 0F 3A
+    EmitByte(0x3A, CurByte, OS);
+    break;
+  }
+}
+
+void X86MCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = TII.get(Opcode);
+  uint64_t TSFlags = Desc.TSFlags;
+
+  // Pseudo instructions don't get encoded.
+  if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
+    return;
+
+  // If this is a two-address instruction, skip one of the register operands.
+  // FIXME: This should be handled during MCInst lowering.
+  unsigned NumOps = Desc.getNumOperands();
+  unsigned CurOp = 0;
+  if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1)
+    ++CurOp;
+  else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
+    // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
+    --NumOps;
+
+  // Keep track of the current byte being emitted.
+  unsigned CurByte = 0;
+
+  // Is this instruction encoded using the AVX VEX prefix?
+  bool HasVEXPrefix = false;
+
+  // It uses the VEX.VVVV field?
+  bool HasVEX_4V = false;
+
+  if ((TSFlags >> 32) & X86II::VEX)
+    HasVEXPrefix = true;
+  if ((TSFlags >> 32) & X86II::VEX_4V)
+    HasVEX_4V = true;
+
+  
+  // Determine where the memory operand starts, if present.
+  int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
+  if (MemoryOperand != -1) MemoryOperand += CurOp;
+
+  if (!HasVEXPrefix)
+    EmitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+  else
+    EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+
+  
+  unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+  
+  if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+    BaseOpcode = 0x0F;   // Weird 3DNow! encoding.
+  
+  unsigned SrcRegNum = 0;
+  switch (TSFlags & X86II::FormMask) {
+  case X86II::MRMInitReg:
+    assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!");
+  default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n";
+    assert(0 && "Unknown FormMask value in X86MCCodeEmitter!");
+  case X86II::Pseudo:
+    assert(0 && "Pseudo instruction shouldn't be emitted");
+  case X86II::RawFrm:
+    EmitByte(BaseOpcode, CurByte, OS);
+    break;
+      
+  case X86II::RawFrmImm8:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitImmediate(MI.getOperand(CurOp++),
+                  X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+                  CurByte, OS, Fixups);
+    EmitImmediate(MI.getOperand(CurOp++), 1, FK_Data_1, CurByte, OS, Fixups);
+    break;
+  case X86II::RawFrmImm16:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitImmediate(MI.getOperand(CurOp++),
+                  X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+                  CurByte, OS, Fixups);
+    EmitImmediate(MI.getOperand(CurOp++), 2, FK_Data_2, CurByte, OS, Fixups);
+    break;
+
+  case X86II::AddRegFrm:
+    EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
+    break;
+
+  case X86II::MRMDestReg:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitRegModRMByte(MI.getOperand(CurOp),
+                     GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
+    CurOp += 2;
+    break;
+
+  case X86II::MRMDestMem:
+    EmitByte(BaseOpcode, CurByte, OS);
+    SrcRegNum = CurOp + X86::AddrNumOperands;
+
+    if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+      SrcRegNum++;
+
+    EmitMemModRMByte(MI, CurOp,
+                     GetX86RegNum(MI.getOperand(SrcRegNum)),
+                     TSFlags, CurByte, OS, Fixups);
+    CurOp = SrcRegNum + 1;
+    break;
+
+  case X86II::MRMSrcReg:
+    EmitByte(BaseOpcode, CurByte, OS);
+    SrcRegNum = CurOp + 1;
+
+    if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+      SrcRegNum++;
+
+    EmitRegModRMByte(MI.getOperand(SrcRegNum),
+                     GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+    CurOp = SrcRegNum + 1;
+    break;
+
+  case X86II::MRMSrcMem: {
+    int AddrOperands = X86::AddrNumOperands;
+    unsigned FirstMemOp = CurOp+1;
+    if (HasVEX_4V) {
+      ++AddrOperands;
+      ++FirstMemOp;  // Skip the register source (which is encoded in VEX_VVVV).
+    }
+
+    EmitByte(BaseOpcode, CurByte, OS);
+
+    EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
+                     TSFlags, CurByte, OS, Fixups);
+    CurOp += AddrOperands + 1;
+    break;
+  }
+
+  case X86II::MRM0r: case X86II::MRM1r:
+  case X86II::MRM2r: case X86II::MRM3r:
+  case X86II::MRM4r: case X86II::MRM5r:
+  case X86II::MRM6r: case X86II::MRM7r:
+    if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+      CurOp++;
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitRegModRMByte(MI.getOperand(CurOp++),
+                     (TSFlags & X86II::FormMask)-X86II::MRM0r,
+                     CurByte, OS);
+    break;
+  case X86II::MRM0m: case X86II::MRM1m:
+  case X86II::MRM2m: case X86II::MRM3m:
+  case X86II::MRM4m: case X86II::MRM5m:
+  case X86II::MRM6m: case X86II::MRM7m:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m,
+                     TSFlags, CurByte, OS, Fixups);
+    CurOp += X86::AddrNumOperands;
+    break;
+  case X86II::MRM_C1:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC1, CurByte, OS);
+    break;
+  case X86II::MRM_C2:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC2, CurByte, OS);
+    break;
+  case X86II::MRM_C3:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC3, CurByte, OS);
+    break;
+  case X86II::MRM_C4:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC4, CurByte, OS);
+    break;
+  case X86II::MRM_C8:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC8, CurByte, OS);
+    break;
+  case X86II::MRM_C9:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xC9, CurByte, OS);
+    break;
+  case X86II::MRM_E8:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xE8, CurByte, OS);
+    break;
+  case X86II::MRM_F0:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xF0, CurByte, OS);
+    break;
+  case X86II::MRM_F8:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xF8, CurByte, OS);
+    break;
+  case X86II::MRM_F9:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xF9, CurByte, OS);
+    break;
+  case X86II::MRM_D0:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xD0, CurByte, OS);
+    break;
+  case X86II::MRM_D1:
+    EmitByte(BaseOpcode, CurByte, OS);
+    EmitByte(0xD1, CurByte, OS);
+    break;
+  }
+
+  // If there is a remaining operand, it must be a trailing immediate.  Emit it
+  // according to the right size for the instruction.
+  if (CurOp != NumOps) {
+    // The last source register of a 4 operand instruction in AVX is encoded
+    // in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
+    if ((TSFlags >> 32) & X86II::VEX_I8IMM) {
+      const MCOperand &MO = MI.getOperand(CurOp++);
+      bool IsExtReg =
+        X86InstrInfo::isX86_64ExtendedReg(MO.getReg());
+      unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
+      RegNum |= GetX86RegNum(MO) << 4;
+      EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
+                    Fixups);
+    } else {
+      unsigned FixupKind;
+      // FIXME: Is there a better way to know that we need a signed relocation?
+      if (MI.getOpcode() == X86::MOV64ri32 ||
+          MI.getOpcode() == X86::MOV64mi32 ||
+          MI.getOpcode() == X86::PUSH64i32)
+        FixupKind = X86::reloc_signed_4byte;
+      else
+        FixupKind = getImmFixupKind(TSFlags);
+      EmitImmediate(MI.getOperand(CurOp++),
+                    X86II::getSizeOfImm(TSFlags), MCFixupKind(FixupKind),
+                    CurByte, OS, Fixups);
+    }
+  }
+
+  if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+    EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
+  
+
+#ifndef NDEBUG
+  // FIXME: Verify.
+  if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) {
+    errs() << "Cannot encode all operands of: ";
+    MI.dump();
+    errs() << '\n';
+    abort();
+  }
+#endif
+}
diff --git a/final/lib/Target/X86/X86MCInstLower.cpp b/final/lib/Target/X86/X86MCInstLower.cpp
new file mode 100644
index 00000000000..cbe6db26e5b
--- /dev/null
+++ b/final/lib/Target/X86/X86MCInstLower.cpp
@@ -0,0 +1,696 @@
+//===-- X86MCInstLower.cpp - Convert X86 MachineInstr to an MCInst --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower X86 MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstPrinter/X86ATTInstPrinter.h"
+#include "X86MCInstLower.h"
+#include "X86AsmPrinter.h"
+#include "X86COFFMachineModuleInfo.h"
+#include "X86MCAsmInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Type.h"
+using namespace llvm;
+
+X86MCInstLower::X86MCInstLower(Mangler *mang, const MachineFunction &mf,
+                               X86AsmPrinter &asmprinter)
+: Ctx(mf.getContext()), Mang(mang), MF(mf), TM(mf.getTarget()),
+  MAI(*TM.getMCAsmInfo()), AsmPrinter(asmprinter) {}
+
+MachineModuleInfoMachO &X86MCInstLower::getMachOMMI() const {
+  return MF.getMMI().getObjFileInfo<MachineModuleInfoMachO>();
+}
+
+
+/// GetSymbolFromOperand - Lower an MO_GlobalAddress or MO_ExternalSymbol
+/// operand to an MCSymbol.
+MCSymbol *X86MCInstLower::
+GetSymbolFromOperand(const MachineOperand &MO) const {
+  assert((MO.isGlobal() || MO.isSymbol()) && "Isn't a symbol reference");
+
+  SmallString<128> Name;
+  
+  if (!MO.isGlobal()) {
+    assert(MO.isSymbol());
+    Name += MAI.getGlobalPrefix();
+    Name += MO.getSymbolName();
+  } else {    
+    const GlobalValue *GV = MO.getGlobal();
+    bool isImplicitlyPrivate = false;
+    if (MO.getTargetFlags() == X86II::MO_DARWIN_STUB ||
+        MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY ||
+        MO.getTargetFlags() == X86II::MO_DARWIN_NONLAZY_PIC_BASE ||
+        MO.getTargetFlags() == X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE)
+      isImplicitlyPrivate = true;
+    
+    Mang->getNameWithPrefix(Name, GV, isImplicitlyPrivate);
+  }
+
+  // If the target flags on the operand changes the name of the symbol, do that
+  // before we return the symbol.
+  switch (MO.getTargetFlags()) {
+  default: break;
+  case X86II::MO_DLLIMPORT: {
+    // Handle dllimport linkage.
+    const char *Prefix = "__imp_";
+    Name.insert(Name.begin(), Prefix, Prefix+strlen(Prefix));
+    break;
+  }
+  case X86II::MO_DARWIN_NONLAZY:
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
+    Name += "$non_lazy_ptr";
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+      getMachOMMI().getGVStubEntry(Sym);
+    if (StubSym.getPointer() == 0) {
+      assert(MO.isGlobal() && "Extern symbol not handled yet");
+      StubSym =
+        MachineModuleInfoImpl::
+        StubValueTy(Mang->getSymbol(MO.getGlobal()),
+                    !MO.getGlobal()->hasInternalLinkage());
+    }
+    return Sym;
+  }
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: {
+    Name += "$non_lazy_ptr";
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+      getMachOMMI().getHiddenGVStubEntry(Sym);
+    if (StubSym.getPointer() == 0) {
+      assert(MO.isGlobal() && "Extern symbol not handled yet");
+      StubSym =
+        MachineModuleInfoImpl::
+        StubValueTy(Mang->getSymbol(MO.getGlobal()),
+                    !MO.getGlobal()->hasInternalLinkage());
+    }
+    return Sym;
+  }
+  case X86II::MO_DARWIN_STUB: {
+    Name += "$stub";
+    MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
+    MachineModuleInfoImpl::StubValueTy &StubSym =
+      getMachOMMI().getFnStubEntry(Sym);
+    if (StubSym.getPointer())
+      return Sym;
+    
+    if (MO.isGlobal()) {
+      StubSym =
+        MachineModuleInfoImpl::
+        StubValueTy(Mang->getSymbol(MO.getGlobal()),
+                    !MO.getGlobal()->hasInternalLinkage());
+    } else {
+      Name.erase(Name.end()-5, Name.end());
+      StubSym =
+        MachineModuleInfoImpl::
+        StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false);
+    }
+    return Sym;
+  }
+  }
+
+  return Ctx.GetOrCreateSymbol(Name.str());
+}
+
+MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+                                             MCSymbol *Sym) const {
+  // FIXME: We would like an efficient form for this, so we don't have to do a
+  // lot of extra uniquing.
+  const MCExpr *Expr = 0;
+  MCSymbolRefExpr::VariantKind RefKind = MCSymbolRefExpr::VK_None;
+  
+  switch (MO.getTargetFlags()) {
+  default: llvm_unreachable("Unknown target flag on GV operand");
+  case X86II::MO_NO_FLAG:    // No flag.
+  // These affect the name of the symbol, not any suffix.
+  case X86II::MO_DARWIN_NONLAZY:
+  case X86II::MO_DLLIMPORT:
+  case X86II::MO_DARWIN_STUB:
+    break;
+      
+  case X86II::MO_TLVP:      RefKind = MCSymbolRefExpr::VK_TLVP; break;
+  case X86II::MO_TLVP_PIC_BASE:
+    Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
+    // Subtract the pic base.
+    Expr = MCBinaryExpr::CreateSub(Expr,
+                                  MCSymbolRefExpr::Create(MF.getPICBaseSymbol(),
+                                                           Ctx),
+                                   Ctx);
+    break;
+  case X86II::MO_TLSGD:     RefKind = MCSymbolRefExpr::VK_TLSGD; break;
+  case X86II::MO_GOTTPOFF:  RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
+  case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
+  case X86II::MO_TPOFF:     RefKind = MCSymbolRefExpr::VK_TPOFF; break;
+  case X86II::MO_NTPOFF:    RefKind = MCSymbolRefExpr::VK_NTPOFF; break;
+  case X86II::MO_GOTPCREL:  RefKind = MCSymbolRefExpr::VK_GOTPCREL; break;
+  case X86II::MO_GOT:       RefKind = MCSymbolRefExpr::VK_GOT; break;
+  case X86II::MO_GOTOFF:    RefKind = MCSymbolRefExpr::VK_GOTOFF; break;
+  case X86II::MO_PLT:       RefKind = MCSymbolRefExpr::VK_PLT; break;
+  case X86II::MO_PIC_BASE_OFFSET:
+  case X86II::MO_DARWIN_NONLAZY_PIC_BASE:
+  case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE:
+    Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+    // Subtract the pic base.
+    Expr = MCBinaryExpr::CreateSub(Expr, 
+                            MCSymbolRefExpr::Create(MF.getPICBaseSymbol(), Ctx),
+                                   Ctx);
+    if (MO.isJTI() && MAI.hasSetDirective()) {
+      // If .set directive is supported, use it to reduce the number of
+      // relocations the assembler will generate for differences between
+      // local labels. This is only safe when the symbols are in the same
+      // section so we are restricting it to jumptable references.
+      MCSymbol *Label = Ctx.CreateTempSymbol();
+      AsmPrinter.OutStreamer.EmitAssignment(Label, Expr);
+      Expr = MCSymbolRefExpr::Create(Label, Ctx);
+    }
+    break;
+  }
+  
+  if (Expr == 0)
+    Expr = MCSymbolRefExpr::Create(Sym, RefKind, Ctx);
+  
+  if (!MO.isJTI() && MO.getOffset())
+    Expr = MCBinaryExpr::CreateAdd(Expr,
+                                   MCConstantExpr::Create(MO.getOffset(), Ctx),
+                                   Ctx);
+  return MCOperand::CreateExpr(Expr);
+}
+
+
+
+static void lower_subreg32(MCInst *MI, unsigned OpNo) {
+  // Convert registers in the addr mode according to subreg32.
+  unsigned Reg = MI->getOperand(OpNo).getReg();
+  if (Reg != 0)
+    MI->getOperand(OpNo).setReg(getX86SubSuperRegister(Reg, MVT::i32));
+}
+
+static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
+  // Convert registers in the addr mode according to subreg64.
+  for (unsigned i = 0; i != 4; ++i) {
+    if (!MI->getOperand(OpNo+i).isReg()) continue;
+    
+    unsigned Reg = MI->getOperand(OpNo+i).getReg();
+    if (Reg == 0) continue;
+    
+    MI->getOperand(OpNo+i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
+  }
+}
+
+/// LowerSubReg32_Op0 - Things like MOVZX16rr8 -> MOVZX32rr8.
+static void LowerSubReg32_Op0(MCInst &OutMI, unsigned NewOpc) {
+  OutMI.setOpcode(NewOpc);
+  lower_subreg32(&OutMI, 0);
+}
+/// LowerUnaryToTwoAddr - R = setb   -> R = sbb R, R
+static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) {
+  OutMI.setOpcode(NewOpc);
+  OutMI.addOperand(OutMI.getOperand(0));
+  OutMI.addOperand(OutMI.getOperand(0));
+}
+
+/// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
+/// a short fixed-register form.
+static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
+  unsigned ImmOp = Inst.getNumOperands() - 1;
+  assert(Inst.getOperand(0).isReg() && Inst.getOperand(ImmOp).isImm() &&
+         ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
+           Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
+          Inst.getNumOperands() == 2) && "Unexpected instruction!");
+
+  // Check whether the destination register can be fixed.
+  unsigned Reg = Inst.getOperand(0).getReg();
+  if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+    return;
+
+  // If so, rewrite the instruction.
+  MCOperand Saved = Inst.getOperand(ImmOp);
+  Inst = MCInst();
+  Inst.setOpcode(Opcode);
+  Inst.addOperand(Saved);
+}
+
+/// \brief Simplify things like MOV32rm to MOV32o32a.
+static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
+                                  unsigned Opcode) {
+  // Don't make these simplifications in 64-bit mode; other assemblers don't
+  // perform them because they make the code larger.
+  if (Printer.getSubtarget().is64Bit())
+    return;
+
+  bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
+  unsigned AddrBase = IsStore;
+  unsigned RegOp = IsStore ? 0 : 5;
+  unsigned AddrOp = AddrBase + 3;
+  assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
+         Inst.getOperand(AddrBase + 0).isReg() && // base
+         Inst.getOperand(AddrBase + 1).isImm() && // scale
+         Inst.getOperand(AddrBase + 2).isReg() && // index register
+         (Inst.getOperand(AddrOp).isExpr() ||     // address
+          Inst.getOperand(AddrOp).isImm())&&
+         Inst.getOperand(AddrBase + 4).isReg() && // segment
+         "Unexpected instruction!");
+
+  // Check whether the destination register can be fixed.
+  unsigned Reg = Inst.getOperand(RegOp).getReg();
+  if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+    return;
+
+  // Check whether this is an absolute address.
+  // FIXME: We know TLVP symbol refs aren't, but there should be a better way 
+  // to do this here.
+  bool Absolute = true;
+  if (Inst.getOperand(AddrOp).isExpr()) {
+    const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
+    if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
+      if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
+        Absolute = false;
+  }
+  
+  if (Absolute &&
+      (Inst.getOperand(AddrBase + 0).getReg() != 0 ||
+       Inst.getOperand(AddrBase + 2).getReg() != 0 ||
+       Inst.getOperand(AddrBase + 4).getReg() != 0 ||
+       Inst.getOperand(AddrBase + 1).getImm() != 1))
+    return;
+
+  // If so, rewrite the instruction.
+  MCOperand Saved = Inst.getOperand(AddrOp);
+  Inst = MCInst();
+  Inst.setOpcode(Opcode);
+  Inst.addOperand(Saved);
+}
+
+void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+  
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    
+    MCOperand MCOp;
+    switch (MO.getType()) {
+    default:
+      MI->dump();
+      llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all implicit register operands.
+      if (MO.isImplicit()) continue;
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+                       MO.getMBB()->getSymbol(), Ctx));
+      break;
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_ExternalSymbol:
+      MCOp = LowerSymbolOperand(MO, GetSymbolFromOperand(MO));
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = LowerSymbolOperand(MO, AsmPrinter.GetJTISymbol(MO.getIndex()));
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      MCOp = LowerSymbolOperand(MO, AsmPrinter.GetCPISymbol(MO.getIndex()));
+      break;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = LowerSymbolOperand(MO,
+                     AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress()));
+      break;
+    }
+    
+    OutMI.addOperand(MCOp);
+  }
+  
+  // Handle a few special cases to eliminate operand modifiers.
+ReSimplify:
+  switch (OutMI.getOpcode()) {
+  case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
+    lower_lea64_32mem(&OutMI, 1);
+    // FALL THROUGH.
+  case X86::LEA64r:
+  case X86::LEA16r:
+  case X86::LEA32r:
+    // LEA should have a segment register, but it must be empty.
+    assert(OutMI.getNumOperands() == 1+X86::AddrNumOperands &&
+           "Unexpected # of LEA operands");
+    assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 &&
+           "LEA has segment specified!");
+    break;
+  case X86::MOVZX16rr8:   LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break;
+  case X86::MOVZX16rm8:   LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
+  case X86::MOVSX16rr8:   LowerSubReg32_Op0(OutMI, X86::MOVSX32rr8); break;
+  case X86::MOVSX16rm8:   LowerSubReg32_Op0(OutMI, X86::MOVSX32rm8); break;
+  case X86::MOVZX64rr32:  LowerSubReg32_Op0(OutMI, X86::MOV32rr); break;
+  case X86::MOVZX64rm32:  LowerSubReg32_Op0(OutMI, X86::MOV32rm); break;
+  case X86::MOV64ri64i32: LowerSubReg32_Op0(OutMI, X86::MOV32ri); break;
+  case X86::MOVZX64rr8:   LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break;
+  case X86::MOVZX64rm8:   LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
+  case X86::MOVZX64rr16:  LowerSubReg32_Op0(OutMI, X86::MOVZX32rr16); break;
+  case X86::MOVZX64rm16:  LowerSubReg32_Op0(OutMI, X86::MOVZX32rm16); break;
+  case X86::SETB_C8r:     LowerUnaryToTwoAddr(OutMI, X86::SBB8rr); break;
+  case X86::SETB_C16r:    LowerUnaryToTwoAddr(OutMI, X86::SBB16rr); break;
+  case X86::SETB_C32r:    LowerUnaryToTwoAddr(OutMI, X86::SBB32rr); break;
+  case X86::SETB_C64r:    LowerUnaryToTwoAddr(OutMI, X86::SBB64rr); break;
+  case X86::MOV8r0:       LowerUnaryToTwoAddr(OutMI, X86::XOR8rr); break;
+  case X86::MOV32r0:      LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); break;
+  case X86::FsFLD0SS:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+  case X86::FsFLD0SD:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+  case X86::VFsFLD0SS:     LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+  case X86::VFsFLD0SD:     LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+  case X86::V_SET0PS:      LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+  case X86::V_SET0PD:      LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
+  case X86::V_SET0PI:      LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
+  case X86::V_SETALLONES:  LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
+  case X86::AVX_SET0PS:    LowerUnaryToTwoAddr(OutMI, X86::VXORPSrr); break;
+  case X86::AVX_SET0PSY:   LowerUnaryToTwoAddr(OutMI, X86::VXORPSYrr); break;
+  case X86::AVX_SET0PD:    LowerUnaryToTwoAddr(OutMI, X86::VXORPDrr); break;
+  case X86::AVX_SET0PDY:   LowerUnaryToTwoAddr(OutMI, X86::VXORPDYrr); break;
+  case X86::AVX_SET0PI:    LowerUnaryToTwoAddr(OutMI, X86::VPXORrr); break;
+
+  case X86::MOV16r0:
+    LowerSubReg32_Op0(OutMI, X86::MOV32r0);   // MOV16r0 -> MOV32r0
+    LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
+    break;
+  case X86::MOV64r0:
+    LowerSubReg32_Op0(OutMI, X86::MOV32r0);   // MOV64r0 -> MOV32r0
+    LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
+    break;
+
+  // TAILJMPr64, [WIN]CALL64r, [WIN]CALL64pcrel32 - These instructions have
+  // register inputs modeled as normal uses instead of implicit uses.  As such,
+  // truncate off all but the first operand (the callee).  FIXME: Change isel.
+  case X86::TAILJMPr64:
+  case X86::CALL64r:
+  case X86::CALL64pcrel32:
+  case X86::WINCALL64r:
+  case X86::WINCALL64pcrel32: {
+    unsigned Opcode = OutMI.getOpcode();
+    MCOperand Saved = OutMI.getOperand(0);
+    OutMI = MCInst();
+    OutMI.setOpcode(Opcode);
+    OutMI.addOperand(Saved);
+    break;
+  }
+
+  case X86::EH_RETURN:
+  case X86::EH_RETURN64: {
+    OutMI = MCInst();
+    OutMI.setOpcode(X86::RET);
+    break;
+  }
+
+  // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
+  case X86::TAILJMPr:
+  case X86::TAILJMPd:
+  case X86::TAILJMPd64: {
+    unsigned Opcode;
+    switch (OutMI.getOpcode()) {
+    default: assert(0 && "Invalid opcode");
+    case X86::TAILJMPr: Opcode = X86::JMP32r; break;
+    case X86::TAILJMPd:
+    case X86::TAILJMPd64: Opcode = X86::JMP_1; break;
+    }
+    
+    MCOperand Saved = OutMI.getOperand(0);
+    OutMI = MCInst();
+    OutMI.setOpcode(Opcode);
+    OutMI.addOperand(Saved);
+    break;
+  }
+
+  // These are pseudo-ops for OR to help with the OR->ADD transformation.  We do
+  // this with an ugly goto in case the resultant OR uses EAX and needs the
+  // short form.
+  case X86::ADD16rr_DB:   OutMI.setOpcode(X86::OR16rr); goto ReSimplify;
+  case X86::ADD32rr_DB:   OutMI.setOpcode(X86::OR32rr); goto ReSimplify;
+  case X86::ADD64rr_DB:   OutMI.setOpcode(X86::OR64rr); goto ReSimplify;
+  case X86::ADD16ri_DB:   OutMI.setOpcode(X86::OR16ri); goto ReSimplify;
+  case X86::ADD32ri_DB:   OutMI.setOpcode(X86::OR32ri); goto ReSimplify;
+  case X86::ADD64ri32_DB: OutMI.setOpcode(X86::OR64ri32); goto ReSimplify;
+  case X86::ADD16ri8_DB:  OutMI.setOpcode(X86::OR16ri8); goto ReSimplify;
+  case X86::ADD32ri8_DB:  OutMI.setOpcode(X86::OR32ri8); goto ReSimplify;
+  case X86::ADD64ri8_DB:  OutMI.setOpcode(X86::OR64ri8); goto ReSimplify;
+      
+  // The assembler backend wants to see branches in their small form and relax
+  // them to their large form.  The JIT can only handle the large form because
+  // it does not do relaxation.  For now, translate the large form to the
+  // small one here.
+  case X86::JMP_4: OutMI.setOpcode(X86::JMP_1); break;
+  case X86::JO_4:  OutMI.setOpcode(X86::JO_1); break;
+  case X86::JNO_4: OutMI.setOpcode(X86::JNO_1); break;
+  case X86::JB_4:  OutMI.setOpcode(X86::JB_1); break;
+  case X86::JAE_4: OutMI.setOpcode(X86::JAE_1); break;
+  case X86::JE_4:  OutMI.setOpcode(X86::JE_1); break;
+  case X86::JNE_4: OutMI.setOpcode(X86::JNE_1); break;
+  case X86::JBE_4: OutMI.setOpcode(X86::JBE_1); break;
+  case X86::JA_4:  OutMI.setOpcode(X86::JA_1); break;
+  case X86::JS_4:  OutMI.setOpcode(X86::JS_1); break;
+  case X86::JNS_4: OutMI.setOpcode(X86::JNS_1); break;
+  case X86::JP_4:  OutMI.setOpcode(X86::JP_1); break;
+  case X86::JNP_4: OutMI.setOpcode(X86::JNP_1); break;
+  case X86::JL_4:  OutMI.setOpcode(X86::JL_1); break;
+  case X86::JGE_4: OutMI.setOpcode(X86::JGE_1); break;
+  case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break;
+  case X86::JG_4:  OutMI.setOpcode(X86::JG_1); break;
+
+  // We don't currently select the correct instruction form for instructions
+  // which have a short %eax, etc. form. Handle this by custom lowering, for
+  // now.
+  //
+  // Note, we are currently not handling the following instructions:
+  // MOV64ao8, MOV64o8a
+  // XCHG16ar, XCHG32ar, XCHG64ar
+  case X86::MOV8mr_NOREX:
+  case X86::MOV8mr:     SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8ao8); break;
+  case X86::MOV8rm_NOREX:
+  case X86::MOV8rm:     SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV8o8a); break;
+  case X86::MOV16mr:    SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16ao16); break;
+  case X86::MOV16rm:    SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV16o16a); break;
+  case X86::MOV32mr:    SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32ao32); break;
+  case X86::MOV32rm:    SimplifyShortMoveForm(AsmPrinter, OutMI, X86::MOV32o32a); break;
+
+  case X86::ADC8ri:     SimplifyShortImmForm(OutMI, X86::ADC8i8);    break;
+  case X86::ADC16ri:    SimplifyShortImmForm(OutMI, X86::ADC16i16);  break;
+  case X86::ADC32ri:    SimplifyShortImmForm(OutMI, X86::ADC32i32);  break;
+  case X86::ADC64ri32:  SimplifyShortImmForm(OutMI, X86::ADC64i32);  break;
+  case X86::ADD8ri:     SimplifyShortImmForm(OutMI, X86::ADD8i8);    break;
+  case X86::ADD16ri:    SimplifyShortImmForm(OutMI, X86::ADD16i16);  break;
+  case X86::ADD32ri:    SimplifyShortImmForm(OutMI, X86::ADD32i32);  break;
+  case X86::ADD64ri32:  SimplifyShortImmForm(OutMI, X86::ADD64i32);  break;
+  case X86::AND8ri:     SimplifyShortImmForm(OutMI, X86::AND8i8);    break;
+  case X86::AND16ri:    SimplifyShortImmForm(OutMI, X86::AND16i16);  break;
+  case X86::AND32ri:    SimplifyShortImmForm(OutMI, X86::AND32i32);  break;
+  case X86::AND64ri32:  SimplifyShortImmForm(OutMI, X86::AND64i32);  break;
+  case X86::CMP8ri:     SimplifyShortImmForm(OutMI, X86::CMP8i8);    break;
+  case X86::CMP16ri:    SimplifyShortImmForm(OutMI, X86::CMP16i16);  break;
+  case X86::CMP32ri:    SimplifyShortImmForm(OutMI, X86::CMP32i32);  break;
+  case X86::CMP64ri32:  SimplifyShortImmForm(OutMI, X86::CMP64i32);  break;
+  case X86::OR8ri:      SimplifyShortImmForm(OutMI, X86::OR8i8);     break;
+  case X86::OR16ri:     SimplifyShortImmForm(OutMI, X86::OR16i16);   break;
+  case X86::OR32ri:     SimplifyShortImmForm(OutMI, X86::OR32i32);   break;
+  case X86::OR64ri32:   SimplifyShortImmForm(OutMI, X86::OR64i32);   break;
+  case X86::SBB8ri:     SimplifyShortImmForm(OutMI, X86::SBB8i8);    break;
+  case X86::SBB16ri:    SimplifyShortImmForm(OutMI, X86::SBB16i16);  break;
+  case X86::SBB32ri:    SimplifyShortImmForm(OutMI, X86::SBB32i32);  break;
+  case X86::SBB64ri32:  SimplifyShortImmForm(OutMI, X86::SBB64i32);  break;
+  case X86::SUB8ri:     SimplifyShortImmForm(OutMI, X86::SUB8i8);    break;
+  case X86::SUB16ri:    SimplifyShortImmForm(OutMI, X86::SUB16i16);  break;
+  case X86::SUB32ri:    SimplifyShortImmForm(OutMI, X86::SUB32i32);  break;
+  case X86::SUB64ri32:  SimplifyShortImmForm(OutMI, X86::SUB64i32);  break;
+  case X86::TEST8ri:    SimplifyShortImmForm(OutMI, X86::TEST8i8);   break;
+  case X86::TEST16ri:   SimplifyShortImmForm(OutMI, X86::TEST16i16); break;
+  case X86::TEST32ri:   SimplifyShortImmForm(OutMI, X86::TEST32i32); break;
+  case X86::TEST64ri32: SimplifyShortImmForm(OutMI, X86::TEST64i32); break;
+  case X86::XOR8ri:     SimplifyShortImmForm(OutMI, X86::XOR8i8);    break;
+  case X86::XOR16ri:    SimplifyShortImmForm(OutMI, X86::XOR16i16);  break;
+  case X86::XOR32ri:    SimplifyShortImmForm(OutMI, X86::XOR32i32);  break;
+  case X86::XOR64ri32:  SimplifyShortImmForm(OutMI, X86::XOR64i32);  break;
+  }
+}
+
+static void LowerTlsAddr(MCStreamer &OutStreamer,
+                         X86MCInstLower &MCInstLowering,
+                         const MachineInstr &MI) {
+  bool is64Bits = MI.getOpcode() == X86::TLS_addr64;
+  MCContext &context = OutStreamer.getContext();
+
+  if (is64Bits) {
+    MCInst prefix;
+    prefix.setOpcode(X86::DATA16_PREFIX);
+    OutStreamer.EmitInstruction(prefix);
+  }
+  MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
+  const MCSymbolRefExpr *symRef =
+    MCSymbolRefExpr::Create(sym, MCSymbolRefExpr::VK_TLSGD, context);
+
+  MCInst LEA;
+  if (is64Bits) {
+    LEA.setOpcode(X86::LEA64r);
+    LEA.addOperand(MCOperand::CreateReg(X86::RDI)); // dest
+    LEA.addOperand(MCOperand::CreateReg(X86::RIP)); // base
+    LEA.addOperand(MCOperand::CreateImm(1));        // scale
+    LEA.addOperand(MCOperand::CreateReg(0));        // index
+    LEA.addOperand(MCOperand::CreateExpr(symRef));  // disp
+    LEA.addOperand(MCOperand::CreateReg(0));        // seg
+  } else {
+    LEA.setOpcode(X86::LEA32r);
+    LEA.addOperand(MCOperand::CreateReg(X86::EAX)); // dest
+    LEA.addOperand(MCOperand::CreateReg(0));        // base
+    LEA.addOperand(MCOperand::CreateImm(1));        // scale
+    LEA.addOperand(MCOperand::CreateReg(X86::EBX)); // index
+    LEA.addOperand(MCOperand::CreateExpr(symRef));  // disp
+    LEA.addOperand(MCOperand::CreateReg(0));        // seg
+  }
+  OutStreamer.EmitInstruction(LEA);
+
+  if (is64Bits) {
+    MCInst prefix;
+    prefix.setOpcode(X86::DATA16_PREFIX);
+    OutStreamer.EmitInstruction(prefix);
+    prefix.setOpcode(X86::DATA16_PREFIX);
+    OutStreamer.EmitInstruction(prefix);
+    prefix.setOpcode(X86::REX64_PREFIX);
+    OutStreamer.EmitInstruction(prefix);
+  }
+
+  MCInst call;
+  if (is64Bits)
+    call.setOpcode(X86::CALL64pcrel32);
+  else
+    call.setOpcode(X86::CALLpcrel32);
+  StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr";
+  MCSymbol *tlsGetAddr = context.GetOrCreateSymbol(name);
+  const MCSymbolRefExpr *tlsRef =
+    MCSymbolRefExpr::Create(tlsGetAddr,
+                            MCSymbolRefExpr::VK_PLT,
+                            context);
+
+  call.addOperand(MCOperand::CreateExpr(tlsRef));
+  OutStreamer.EmitInstruction(call);
+}
+
+void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  X86MCInstLower MCInstLowering(Mang, *MF, *this);
+  switch (MI->getOpcode()) {
+  case TargetOpcode::DBG_VALUE:
+    if (isVerbose() && OutStreamer.hasRawTextSupport()) {
+      std::string TmpStr;
+      raw_string_ostream OS(TmpStr);
+      PrintDebugValueComment(MI, OS);
+      OutStreamer.EmitRawText(StringRef(OS.str()));
+    }
+    return;
+
+  // Emit nothing here but a comment if we can.
+  case X86::Int_MemBarrier:
+    if (OutStreamer.hasRawTextSupport())
+      OutStreamer.EmitRawText(StringRef("\t#MEMBARRIER"));
+    return;
+        
+
+  case X86::EH_RETURN:
+  case X86::EH_RETURN64: {
+    // Lower these as normal, but add some comments.
+    unsigned Reg = MI->getOperand(0).getReg();
+    OutStreamer.AddComment(StringRef("eh_return, addr: %") +
+                           X86ATTInstPrinter::getRegisterName(Reg));
+    break;
+  }
+  case X86::TAILJMPr:
+  case X86::TAILJMPd:
+  case X86::TAILJMPd64:
+    // Lower these as normal, but add some comments.
+    OutStreamer.AddComment("TAILCALL");
+    break;
+
+  case X86::TLS_addr32:
+  case X86::TLS_addr64:
+    return LowerTlsAddr(OutStreamer, MCInstLowering, *MI);
+
+  case X86::MOVPC32r: {
+    MCInst TmpInst;
+    // This is a pseudo op for a two instruction sequence with a label, which
+    // looks like:
+    //     call "L1$pb"
+    // "L1$pb":
+    //     popl %esi
+    
+    // Emit the call.
+    MCSymbol *PICBase = MF->getPICBaseSymbol();
+    TmpInst.setOpcode(X86::CALLpcrel32);
+    // FIXME: We would like an efficient form for this, so we don't have to do a
+    // lot of extra uniquing.
+    TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(PICBase,
+                                                                 OutContext)));
+    OutStreamer.EmitInstruction(TmpInst);
+    
+    // Emit the label.
+    OutStreamer.EmitLabel(PICBase);
+    
+    // popl $reg
+    TmpInst.setOpcode(X86::POP32r);
+    TmpInst.getOperand(0) = MCOperand::CreateReg(MI->getOperand(0).getReg());
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+      
+  case X86::ADD32ri: {
+    // Lower the MO_GOT_ABSOLUTE_ADDRESS form of ADD32ri.
+    if (MI->getOperand(2).getTargetFlags() != X86II::MO_GOT_ABSOLUTE_ADDRESS)
+      break;
+    
+    // Okay, we have something like:
+    //  EAX = ADD32ri EAX, MO_GOT_ABSOLUTE_ADDRESS(@MYGLOBAL)
+    
+    // For this, we want to print something like:
+    //   MYGLOBAL + (. - PICBASE)
+    // However, we can't generate a ".", so just emit a new label here and refer
+    // to it.
+    MCSymbol *DotSym = OutContext.CreateTempSymbol();
+    OutStreamer.EmitLabel(DotSym);
+    
+    // Now that we have emitted the label, lower the complex operand expression.
+    MCSymbol *OpSym = MCInstLowering.GetSymbolFromOperand(MI->getOperand(2));
+    
+    const MCExpr *DotExpr = MCSymbolRefExpr::Create(DotSym, OutContext);
+    const MCExpr *PICBase =
+      MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), OutContext);
+    DotExpr = MCBinaryExpr::CreateSub(DotExpr, PICBase, OutContext);
+    
+    DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), 
+                                      DotExpr, OutContext);
+    
+    MCInst TmpInst;
+    TmpInst.setOpcode(X86::ADD32ri);
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
+    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(1).getReg()));
+    TmpInst.addOperand(MCOperand::CreateExpr(DotExpr));
+    OutStreamer.EmitInstruction(TmpInst);
+    return;
+  }
+  }
+  
+  MCInst TmpInst;
+  MCInstLowering.Lower(MI, TmpInst);
+  OutStreamer.EmitInstruction(TmpInst);
+}
+
diff --git a/final/lib/Target/X86/X86MCInstLower.h b/final/lib/Target/X86/X86MCInstLower.h
new file mode 100644
index 00000000000..02100723912
--- /dev/null
+++ b/final/lib/Target/X86/X86MCInstLower.h
@@ -0,0 +1,52 @@
+//===-- X86MCInstLower.h - Lower MachineInstr to MCInst -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86_MCINSTLOWER_H
+#define X86_MCINSTLOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+  class MCAsmInfo;
+  class MCContext;
+  class MCInst;
+  class MCOperand;
+  class MCSymbol;
+  class MachineInstr;
+  class MachineFunction;
+  class MachineModuleInfoMachO;
+  class MachineOperand;
+  class Mangler;
+  class TargetMachine;
+  class X86AsmPrinter;
+  
+/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
+class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
+  MCContext &Ctx;
+  Mangler *Mang;
+  const MachineFunction &MF;
+  const TargetMachine &TM;
+  const MCAsmInfo &MAI;
+  X86AsmPrinter &AsmPrinter;
+public:
+  X86MCInstLower(Mangler *mang, const MachineFunction &MF,
+                 X86AsmPrinter &asmprinter);
+  
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+
+  MCSymbol *GetSymbolFromOperand(const MachineOperand &MO) const;
+  MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+  
+private:
+  MachineModuleInfoMachO &getMachOMMI() const;
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/X86/X86MachObjectWriter.cpp b/final/lib/Target/X86/X86MachObjectWriter.cpp
new file mode 100644
index 00000000000..8f3dd322248
--- /dev/null
+++ b/final/lib/Target/X86/X86MachObjectWriter.cpp
@@ -0,0 +1,32 @@
+//===-- X86MachObjectWriter.cpp - X86 Mach-O Writer -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+using namespace llvm;
+
+namespace {
+class X86MachObjectWriter : public MCMachObjectTargetWriter {
+public:
+  X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
+                      uint32_t CPUSubtype)
+    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+                               /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+};
+}
+
+MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
+                                                bool Is64Bit,
+                                                uint32_t CPUType,
+                                                uint32_t CPUSubtype) {
+  return createMachObjectWriter(new X86MachObjectWriter(Is64Bit,
+                                                        CPUType,
+                                                        CPUSubtype),
+                                OS, /*IsLittleEndian=*/true);
+}
diff --git a/final/lib/Target/X86/X86MachineFunctionInfo.h b/final/lib/Target/X86/X86MachineFunctionInfo.h
new file mode 100644
index 00000000000..06043ecd3f3
--- /dev/null
+++ b/final/lib/Target/X86/X86MachineFunctionInfo.h
@@ -0,0 +1,135 @@
+//====- X86MachineFuctionInfo.h - X86 machine function info -----*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file declares X86-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MACHINEFUNCTIONINFO_H
+#define X86MACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+/// X86MachineFunctionInfo - This class is derived from MachineFunction and
+/// contains private X86 target-specific information for each MachineFunction.
+class X86MachineFunctionInfo : public MachineFunctionInfo {
+  /// ForceFramePointer - True if the function is required to use of frame
+  /// pointer for reasons other than it containing dynamic allocation or 
+  /// that FP eliminatation is turned off. For example, Cygwin main function
+  /// contains stack pointer re-alignment code which requires FP.
+  bool ForceFramePointer;
+
+  /// CalleeSavedFrameSize - Size of the callee-saved register portion of the
+  /// stack frame in bytes.
+  unsigned CalleeSavedFrameSize;
+
+  /// BytesToPopOnReturn - Number of bytes function pops on return (in addition
+  /// to the space used by the return address).
+  /// Used on windows platform for stdcall & fastcall name decoration
+  unsigned BytesToPopOnReturn;
+
+  /// ReturnAddrIndex - FrameIndex for return slot.
+  int ReturnAddrIndex;
+
+  /// TailCallReturnAddrDelta - The number of bytes by which return address
+  /// stack slot is moved as the result of tail call optimization.
+  int TailCallReturnAddrDelta;
+
+  /// SRetReturnReg - Some subtargets require that sret lowering includes
+  /// returning the value of the returned struct in a register. This field
+  /// holds the virtual register into which the sret argument is passed.
+  unsigned SRetReturnReg;
+
+  /// GlobalBaseReg - keeps track of the virtual register initialized for
+  /// use as the global base register. This is used for PIC in some PIC
+  /// relocation models.
+  unsigned GlobalBaseReg;
+
+  /// ReserveFP - whether the function should reserve the frame pointer
+  /// when allocating, even if there may not actually be a frame pointer used.
+  bool ReserveFP;
+
+  /// VarArgsFrameIndex - FrameIndex for start of varargs area.
+  int VarArgsFrameIndex;
+  /// RegSaveFrameIndex - X86-64 vararg func register save area.
+  int RegSaveFrameIndex;
+  /// VarArgsGPOffset - X86-64 vararg func int reg offset.
+  unsigned VarArgsGPOffset;
+  /// VarArgsFPOffset - X86-64 vararg func fp reg offset.
+  unsigned VarArgsFPOffset;
+
+public:
+  X86MachineFunctionInfo() : ForceFramePointer(false),
+                             CalleeSavedFrameSize(0),
+                             BytesToPopOnReturn(0),
+                             ReturnAddrIndex(0),
+                             TailCallReturnAddrDelta(0),
+                             SRetReturnReg(0),
+                             GlobalBaseReg(0),
+                             VarArgsFrameIndex(0),
+                             RegSaveFrameIndex(0),
+                             VarArgsGPOffset(0),
+                             VarArgsFPOffset(0) {}
+  
+  explicit X86MachineFunctionInfo(MachineFunction &MF)
+    : ForceFramePointer(false),
+      CalleeSavedFrameSize(0),
+      BytesToPopOnReturn(0),
+      ReturnAddrIndex(0),
+      TailCallReturnAddrDelta(0),
+      SRetReturnReg(0),
+      GlobalBaseReg(0),
+      ReserveFP(false),
+      VarArgsFrameIndex(0),
+      RegSaveFrameIndex(0),
+      VarArgsGPOffset(0),
+      VarArgsFPOffset(0) {}
+  
+  bool getForceFramePointer() const { return ForceFramePointer;} 
+  void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
+
+  unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
+  void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+  unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
+  void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;}
+
+  int getRAIndex() const { return ReturnAddrIndex; }
+  void setRAIndex(int Index) { ReturnAddrIndex = Index; }
+
+  int getTCReturnAddrDelta() const { return TailCallReturnAddrDelta; }
+  void setTCReturnAddrDelta(int delta) {TailCallReturnAddrDelta = delta;}
+
+  unsigned getSRetReturnReg() const { return SRetReturnReg; }
+  void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+  unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+  void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+
+  bool getReserveFP() const { return ReserveFP; }
+  void setReserveFP(bool reserveFP) { ReserveFP = reserveFP; }
+
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  void setVarArgsFrameIndex(int Idx) { VarArgsFrameIndex = Idx; }
+
+  int getRegSaveFrameIndex() const { return RegSaveFrameIndex; }
+  void setRegSaveFrameIndex(int Idx) { RegSaveFrameIndex = Idx; }
+
+  unsigned getVarArgsGPOffset() const { return VarArgsGPOffset; }
+  void setVarArgsGPOffset(unsigned Offset) { VarArgsGPOffset = Offset; }
+
+  unsigned getVarArgsFPOffset() const { return VarArgsFPOffset; }
+  void setVarArgsFPOffset(unsigned Offset) { VarArgsFPOffset = Offset; }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/X86/X86RegisterInfo.cpp b/final/lib/Target/X86/X86RegisterInfo.cpp
new file mode 100644
index 00000000000..fe98cacb836
--- /dev/null
+++ b/final/lib/Target/X86/X86RegisterInfo.cpp
@@ -0,0 +1,868 @@
+//===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetRegisterInfo class.
+// This file is responsible for the frame pointer elimination optimization
+// on X86.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86RegisterInfo.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+cl::opt<bool>
+ForceStackAlign("force-align-stack",
+                 cl::desc("Force align the stack to the minimum alignment"
+                           " needed for the function."),
+                 cl::init(false), cl::Hidden);
+
+X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
+                                 const TargetInstrInfo &tii)
+  : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ?
+                         X86::ADJCALLSTACKDOWN64 :
+                         X86::ADJCALLSTACKDOWN32,
+                       tm.getSubtarget<X86Subtarget>().is64Bit() ?
+                         X86::ADJCALLSTACKUP64 :
+                         X86::ADJCALLSTACKUP32),
+    TM(tm), TII(tii) {
+  // Cache some information.
+  const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+  Is64Bit = Subtarget->is64Bit();
+  IsWin64 = Subtarget->isTargetWin64();
+  StackAlign = TM.getFrameLowering()->getStackAlignment();
+
+  if (Is64Bit) {
+    SlotSize = 8;
+    StackPtr = X86::RSP;
+    FramePtr = X86::RBP;
+  } else {
+    SlotSize = 4;
+    StackPtr = X86::ESP;
+    FramePtr = X86::EBP;
+  }
+}
+
+/// getDwarfRegNum - This function maps LLVM register identifiers to the DWARF
+/// specific numbering, used in debug info and exception tables.
+int X86RegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
+  const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
+  unsigned Flavour = DWARFFlavour::X86_64;
+
+  if (!Subtarget->is64Bit()) {
+    if (Subtarget->isTargetDarwin()) {
+      if (isEH)
+        Flavour = DWARFFlavour::X86_32_DarwinEH;
+      else
+        Flavour = DWARFFlavour::X86_32_Generic;
+    } else if (Subtarget->isTargetCygMing()) {
+      // Unsupported by now, just quick fallback
+      Flavour = DWARFFlavour::X86_32_Generic;
+    } else {
+      Flavour = DWARFFlavour::X86_32_Generic;
+    }
+  }
+
+  return X86GenRegisterInfo::getDwarfRegNumFull(RegNo, Flavour);
+}
+
+/// getX86RegNum - This function maps LLVM register identifiers to their X86
+/// specific numbering, which is used in various places encoding instructions.
+unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
+  switch(RegNo) {
+  case X86::RAX: case X86::EAX: case X86::AX: case X86::AL: return N86::EAX;
+  case X86::RCX: case X86::ECX: case X86::CX: case X86::CL: return N86::ECX;
+  case X86::RDX: case X86::EDX: case X86::DX: case X86::DL: return N86::EDX;
+  case X86::RBX: case X86::EBX: case X86::BX: case X86::BL: return N86::EBX;
+  case X86::RSP: case X86::ESP: case X86::SP: case X86::SPL: case X86::AH:
+    return N86::ESP;
+  case X86::RBP: case X86::EBP: case X86::BP: case X86::BPL: case X86::CH:
+    return N86::EBP;
+  case X86::RSI: case X86::ESI: case X86::SI: case X86::SIL: case X86::DH:
+    return N86::ESI;
+  case X86::RDI: case X86::EDI: case X86::DI: case X86::DIL: case X86::BH:
+    return N86::EDI;
+
+  case X86::R8:  case X86::R8D:  case X86::R8W:  case X86::R8B:
+    return N86::EAX;
+  case X86::R9:  case X86::R9D:  case X86::R9W:  case X86::R9B:
+    return N86::ECX;
+  case X86::R10: case X86::R10D: case X86::R10W: case X86::R10B:
+    return N86::EDX;
+  case X86::R11: case X86::R11D: case X86::R11W: case X86::R11B:
+    return N86::EBX;
+  case X86::R12: case X86::R12D: case X86::R12W: case X86::R12B:
+    return N86::ESP;
+  case X86::R13: case X86::R13D: case X86::R13W: case X86::R13B:
+    return N86::EBP;
+  case X86::R14: case X86::R14D: case X86::R14W: case X86::R14B:
+    return N86::ESI;
+  case X86::R15: case X86::R15D: case X86::R15W: case X86::R15B:
+    return N86::EDI;
+
+  case X86::ST0: case X86::ST1: case X86::ST2: case X86::ST3:
+  case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
+    return RegNo-X86::ST0;
+
+  case X86::XMM0: case X86::XMM8:
+  case X86::YMM0: case X86::YMM8: case X86::MM0:
+    return 0;
+  case X86::XMM1: case X86::XMM9:
+  case X86::YMM1: case X86::YMM9: case X86::MM1:
+    return 1;
+  case X86::XMM2: case X86::XMM10:
+  case X86::YMM2: case X86::YMM10: case X86::MM2:
+    return 2;
+  case X86::XMM3: case X86::XMM11:
+  case X86::YMM3: case X86::YMM11: case X86::MM3:
+    return 3;
+  case X86::XMM4: case X86::XMM12:
+  case X86::YMM4: case X86::YMM12: case X86::MM4:
+    return 4;
+  case X86::XMM5: case X86::XMM13:
+  case X86::YMM5: case X86::YMM13: case X86::MM5:
+    return 5;
+  case X86::XMM6: case X86::XMM14:
+  case X86::YMM6: case X86::YMM14: case X86::MM6:
+    return 6;
+  case X86::XMM7: case X86::XMM15:
+  case X86::YMM7: case X86::YMM15: case X86::MM7:
+    return 7;
+
+  case X86::ES: return 0;
+  case X86::CS: return 1;
+  case X86::SS: return 2;
+  case X86::DS: return 3;
+  case X86::FS: return 4;
+  case X86::GS: return 5;
+
+  case X86::CR0: case X86::CR8 : case X86::DR0: return 0;
+  case X86::CR1: case X86::CR9 : case X86::DR1: return 1;
+  case X86::CR2: case X86::CR10: case X86::DR2: return 2;
+  case X86::CR3: case X86::CR11: case X86::DR3: return 3;
+  case X86::CR4: case X86::CR12: case X86::DR4: return 4;
+  case X86::CR5: case X86::CR13: case X86::DR5: return 5;
+  case X86::CR6: case X86::CR14: case X86::DR6: return 6;
+  case X86::CR7: case X86::CR15: case X86::DR7: return 7;
+
+  // Pseudo index registers are equivalent to a "none"
+  // scaled index (See Intel Manual 2A, table 2-3)
+  case X86::EIZ:
+  case X86::RIZ:
+    return 4;
+
+  default:
+    assert(isVirtualRegister(RegNo) && "Unknown physical register!");
+    llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
+    return 0;
+  }
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
+                                          const TargetRegisterClass *B,
+                                          unsigned SubIdx) const {
+  switch (SubIdx) {
+  default: return 0;
+  case X86::sub_8bit:
+    if (B == &X86::GR8RegClass) {
+      if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8)
+        return A;
+    } else if (B == &X86::GR8_ABCD_LRegClass || B == &X86::GR8_ABCD_HRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+          A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass ||
+          A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_ABCDRegClass;
+      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
+               A == &X86::GR32_NOREXRegClass ||
+               A == &X86::GR32_NOSPRegClass)
+        return &X86::GR32_ABCDRegClass;
+      else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
+               A == &X86::GR16_NOREXRegClass)
+        return &X86::GR16_ABCDRegClass;
+    } else if (B == &X86::GR8_NOREXRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_NOREXRegClass;
+      else if (A == &X86::GR64_ABCDRegClass)
+        return &X86::GR64_ABCDRegClass;
+      else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
+               A == &X86::GR32_NOSPRegClass)
+        return &X86::GR32_NOREXRegClass;
+      else if (A == &X86::GR32_ABCDRegClass)
+        return &X86::GR32_ABCDRegClass;
+      else if (A == &X86::GR16RegClass || A == &X86::GR16_NOREXRegClass)
+        return &X86::GR16_NOREXRegClass;
+      else if (A == &X86::GR16_ABCDRegClass)
+        return &X86::GR16_ABCDRegClass;
+    }
+    break;
+  case X86::sub_8bit_hi:
+    if (B == &X86::GR8_ABCD_HRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+          A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass ||
+          A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_ABCDRegClass;
+      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
+               A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
+        return &X86::GR32_ABCDRegClass;
+      else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
+               A == &X86::GR16_NOREXRegClass)
+        return &X86::GR16_ABCDRegClass;
+    }
+    break;
+  case X86::sub_16bit:
+    if (B == &X86::GR16RegClass) {
+      if (A->getSize() == 4 || A->getSize() == 8)
+        return A;
+    } else if (B == &X86::GR16_ABCDRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+          A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass ||
+          A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_ABCDRegClass;
+      else if (A == &X86::GR32RegClass || A == &X86::GR32_ABCDRegClass ||
+               A == &X86::GR32_NOREXRegClass || A == &X86::GR32_NOSPRegClass)
+        return &X86::GR32_ABCDRegClass;
+    } else if (B == &X86::GR16_NOREXRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_NOREXRegClass;
+      else if (A == &X86::GR64_ABCDRegClass)
+        return &X86::GR64_ABCDRegClass;
+      else if (A == &X86::GR32RegClass || A == &X86::GR32_NOREXRegClass ||
+               A == &X86::GR32_NOSPRegClass)
+        return &X86::GR32_NOREXRegClass;
+      else if (A == &X86::GR32_ABCDRegClass)
+        return &X86::GR64_ABCDRegClass;
+    }
+    break;
+  case X86::sub_32bit:
+    if (B == &X86::GR32RegClass) {
+      if (A->getSize() == 8)
+        return A;
+    } else if (B == &X86::GR32_NOSPRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_NOSPRegClass)
+        return &X86::GR64_NOSPRegClass;
+      if (A->getSize() == 8)
+        return getCommonSubClass(A, &X86::GR64_NOSPRegClass);
+    } else if (B == &X86::GR32_ABCDRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
+          A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass ||
+          A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_ABCDRegClass;
+    } else if (B == &X86::GR32_NOREXRegClass) {
+      if (A == &X86::GR64RegClass || A == &X86::GR64_NOREXRegClass ||
+          A == &X86::GR64_NOSPRegClass || A == &X86::GR64_NOREX_NOSPRegClass)
+        return &X86::GR64_NOREXRegClass;
+      else if (A == &X86::GR64_ABCDRegClass)
+        return &X86::GR64_ABCDRegClass;
+    }
+    break;
+  case X86::sub_ss:
+    if (B == &X86::FR32RegClass)
+      return A;
+    break;
+  case X86::sub_sd:
+    if (B == &X86::FR64RegClass)
+      return A;
+    break;
+  case X86::sub_xmm:
+    if (B == &X86::VR128RegClass)
+      return A;
+    break;
+  }
+  return 0;
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
+  switch (Kind) {
+  default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
+  case 0: // Normal GPRs.
+    if (TM.getSubtarget<X86Subtarget>().is64Bit())
+      return &X86::GR64RegClass;
+    return &X86::GR32RegClass;
+  case 1: // Normal GPRs except the stack pointer (for encoding reasons).
+    if (TM.getSubtarget<X86Subtarget>().is64Bit())
+      return &X86::GR64_NOSPRegClass;
+    return &X86::GR32_NOSPRegClass;
+  case 2: // Available for tailcall (not callee-saved GPRs).
+    if (TM.getSubtarget<X86Subtarget>().isTargetWin64())
+      return &X86::GR64_TCW64RegClass;
+    if (TM.getSubtarget<X86Subtarget>().is64Bit())
+      return &X86::GR64_TCRegClass;
+    return &X86::GR32_TCRegClass;
+  }
+}
+
+const TargetRegisterClass *
+X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
+  if (RC == &X86::CCRRegClass) {
+    if (Is64Bit)
+      return &X86::GR64RegClass;
+    else
+      return &X86::GR32RegClass;
+  }
+  return NULL;
+}
+
+unsigned
+X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+                                     MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
+  switch (RC->getID()) {
+  default:
+    return 0;
+  case X86::GR32RegClassID:
+    return 4 - FPDiff;
+  case X86::GR64RegClassID:
+    return 12 - FPDiff;
+  case X86::VR128RegClassID:
+    return TM.getSubtarget<X86Subtarget>().is64Bit() ? 10 : 4;
+  case X86::VR64RegClassID:
+    return 4;
+  }
+}
+
+const unsigned *
+X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  bool callsEHReturn = false;
+  bool ghcCall = false;
+
+  if (MF) {
+    callsEHReturn = MF->getMMI().callsEHReturn();
+    const Function *F = MF->getFunction();
+    ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
+  }
+
+  static const unsigned GhcCalleeSavedRegs[] = {
+    0
+  };
+
+  static const unsigned CalleeSavedRegs32Bit[] = {
+    X86::ESI, X86::EDI, X86::EBX, X86::EBP,  0
+  };
+
+  static const unsigned CalleeSavedRegs32EHRet[] = {
+    X86::EAX, X86::EDX, X86::ESI, X86::EDI, X86::EBX, X86::EBP,  0
+  };
+
+  static const unsigned CalleeSavedRegs64Bit[] = {
+    X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0
+  };
+
+  static const unsigned CalleeSavedRegs64EHRet[] = {
+    X86::RAX, X86::RDX, X86::RBX, X86::R12,
+    X86::R13, X86::R14, X86::R15, X86::RBP, 0
+  };
+
+  static const unsigned CalleeSavedRegsWin64[] = {
+    X86::RBX,   X86::RBP,   X86::RDI,   X86::RSI,
+    X86::R12,   X86::R13,   X86::R14,   X86::R15,
+    X86::XMM6,  X86::XMM7,  X86::XMM8,  X86::XMM9,
+    X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13,
+    X86::XMM14, X86::XMM15, 0
+  };
+
+  if (ghcCall) {
+    return GhcCalleeSavedRegs;
+  } else if (Is64Bit) {
+    if (IsWin64)
+      return CalleeSavedRegsWin64;
+    else
+      return (callsEHReturn ? CalleeSavedRegs64EHRet : CalleeSavedRegs64Bit);
+  } else {
+    return (callsEHReturn ? CalleeSavedRegs32EHRet : CalleeSavedRegs32Bit);
+  }
+}
+
+BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  // Set the stack-pointer register and its aliases as reserved.
+  Reserved.set(X86::RSP);
+  Reserved.set(X86::ESP);
+  Reserved.set(X86::SP);
+  Reserved.set(X86::SPL);
+
+  // Set the instruction pointer register and its aliases as reserved.
+  Reserved.set(X86::RIP);
+  Reserved.set(X86::EIP);
+  Reserved.set(X86::IP);
+
+  // Set the frame-pointer register and its aliases as reserved if needed.
+  if (TFI->hasFP(MF)) {
+    Reserved.set(X86::RBP);
+    Reserved.set(X86::EBP);
+    Reserved.set(X86::BP);
+    Reserved.set(X86::BPL);
+  }
+
+  // Mark the x87 stack registers as reserved, since they don't behave normally
+  // with respect to liveness. We don't fully model the effects of x87 stack
+  // pushes and pops after stackification.
+  Reserved.set(X86::ST0);
+  Reserved.set(X86::ST1);
+  Reserved.set(X86::ST2);
+  Reserved.set(X86::ST3);
+  Reserved.set(X86::ST4);
+  Reserved.set(X86::ST5);
+  Reserved.set(X86::ST6);
+  Reserved.set(X86::ST7);
+  return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+// Stack Frame Processing methods
+//===----------------------------------------------------------------------===//
+
+bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  return (RealignStack &&
+          !MFI->hasVarSizedObjects());
+}
+
+bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const Function *F = MF.getFunction();
+  bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+                               F->hasFnAttr(Attribute::StackAlignment));
+
+  // FIXME: Currently we don't support stack realignment for functions with
+  //        variable-sized allocas.
+  // FIXME: It's more complicated than this...
+  if (0 && requiresRealignment && MFI->hasVarSizedObjects())
+    report_fatal_error(
+      "Stack realignment in presense of dynamic allocas is not supported");
+
+  // If we've requested that we force align the stack do so now.
+  if (ForceStackAlign)
+    return canRealignStack(MF);
+
+  return requiresRealignment && canRealignStack(MF);
+}
+
+bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
+                                           unsigned Reg, int &FrameIdx) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (Reg == FramePtr && TFI->hasFP(MF)) {
+    FrameIdx = MF.getFrameInfo()->getObjectIndexBegin();
+    return true;
+  }
+  return false;
+}
+
+static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
+  if (is64Bit) {
+    if (isInt<8>(Imm))
+      return X86::SUB64ri8;
+    return X86::SUB64ri32;
+  } else {
+    if (isInt<8>(Imm))
+      return X86::SUB32ri8;
+    return X86::SUB32ri;
+  }
+}
+
+static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
+  if (is64Bit) {
+    if (isInt<8>(Imm))
+      return X86::ADD64ri8;
+    return X86::ADD64ri32;
+  } else {
+    if (isInt<8>(Imm))
+      return X86::ADD32ri8;
+    return X86::ADD32ri;
+  }
+}
+
+void X86RegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  bool reseveCallFrame = TFI->hasReservedCallFrame(MF);
+  int Opcode = I->getOpcode();
+  bool isDestroy = Opcode == getCallFrameDestroyOpcode();
+  DebugLoc DL = I->getDebugLoc();
+  uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
+  uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
+  I = MBB.erase(I);
+
+  if (!reseveCallFrame) {
+    // If the stack pointer can be changed after prologue, turn the
+    // adjcallstackup instruction into a 'sub ESP, <amt>' and the
+    // adjcallstackdown instruction into 'add ESP, <amt>'
+    // TODO: consider using push / pop instead of sub + store / add
+    if (Amount == 0)
+      return;
+
+    // We need to keep the stack aligned properly.  To do this, we round the
+    // amount of space needed for the outgoing arguments up to the next
+    // alignment boundary.
+    Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
+
+    MachineInstr *New = 0;
+    if (Opcode == getCallFrameSetupOpcode()) {
+      New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)),
+                    StackPtr)
+        .addReg(StackPtr)
+        .addImm(Amount);
+    } else {
+      assert(Opcode == getCallFrameDestroyOpcode());
+
+      // Factor out the amount the callee already popped.
+      Amount -= CalleeAmt;
+
+      if (Amount) {
+        unsigned Opc = getADDriOpcode(Is64Bit, Amount);
+        New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+          .addReg(StackPtr).addImm(Amount);
+      }
+    }
+
+    if (New) {
+      // The EFLAGS implicit def is dead.
+      New->getOperand(3).setIsDead();
+
+      // Replace the pseudo instruction with a new instruction.
+      MBB.insert(I, New);
+    }
+
+    return;
+  }
+
+  if (Opcode == getCallFrameDestroyOpcode() && CalleeAmt) {
+    // If we are performing frame pointer elimination and if the callee pops
+    // something off the stack pointer, add it back.  We do this until we have
+    // more advanced stack pointer tracking ability.
+    unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
+    MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
+      .addReg(StackPtr).addImm(CalleeAmt);
+
+    // The EFLAGS implicit def is dead.
+    New->getOperand(3).setIsDead();
+    MBB.insert(I, New);
+  }
+}
+
+void
+X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                     int SPAdj, RegScavenger *RS) const{
+  assert(SPAdj == 0 && "Unexpected");
+
+  unsigned i = 0;
+  MachineInstr &MI = *II;
+  MachineFunction &MF = *MI.getParent()->getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(i).getIndex();
+  unsigned BasePtr;
+
+  unsigned Opc = MI.getOpcode();
+  bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
+  if (needsStackRealignment(MF))
+    BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
+  else if (AfterFPPop)
+    BasePtr = StackPtr;
+  else
+    BasePtr = (TFI->hasFP(MF) ? FramePtr : StackPtr);
+
+  // This must be part of a four operand memory reference.  Replace the
+  // FrameIndex with base register with EBP.  Add an offset to the offset.
+  MI.getOperand(i).ChangeToRegister(BasePtr, false);
+
+  // Now add the frame object offset to the offset from EBP.
+  int FIOffset;
+  if (AfterFPPop) {
+    // Tail call jmp happens after FP is popped.
+    const MachineFrameInfo *MFI = MF.getFrameInfo();
+    FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
+  } else
+    FIOffset = TFI->getFrameIndexOffset(MF, FrameIndex);
+
+  if (MI.getOperand(i+3).isImm()) {
+    // Offset is a 32-bit integer.
+    int Offset = FIOffset + (int)(MI.getOperand(i + 3).getImm());
+    MI.getOperand(i + 3).ChangeToImmediate(Offset);
+  } else {
+    // Offset is symbolic. This is extremely rare.
+    uint64_t Offset = FIOffset + (uint64_t)MI.getOperand(i+3).getOffset();
+    MI.getOperand(i+3).setOffset(Offset);
+  }
+}
+
+unsigned X86RegisterInfo::getRARegister() const {
+  return Is64Bit ? X86::RIP     // Should have dwarf #16.
+                 : X86::EIP;    // Should have dwarf #8.
+}
+
+unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  return TFI->hasFP(MF) ? FramePtr : StackPtr;
+}
+
+unsigned X86RegisterInfo::getEHExceptionRegister() const {
+  llvm_unreachable("What is the exception register");
+  return 0;
+}
+
+unsigned X86RegisterInfo::getEHHandlerRegister() const {
+  llvm_unreachable("What is the exception handler register");
+  return 0;
+}
+
+namespace llvm {
+unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
+  switch (VT.getSimpleVT().SimpleTy) {
+  default: return Reg;
+  case MVT::i8:
+    if (High) {
+      switch (Reg) {
+      default: return 0;
+      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+        return X86::AH;
+      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+        return X86::DH;
+      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+        return X86::CH;
+      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+        return X86::BH;
+      }
+    } else {
+      switch (Reg) {
+      default: return 0;
+      case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+        return X86::AL;
+      case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+        return X86::DL;
+      case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+        return X86::CL;
+      case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+        return X86::BL;
+      case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+        return X86::SIL;
+      case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+        return X86::DIL;
+      case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+        return X86::BPL;
+      case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+        return X86::SPL;
+      case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+        return X86::R8B;
+      case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+        return X86::R9B;
+      case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+        return X86::R10B;
+      case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+        return X86::R11B;
+      case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+        return X86::R12B;
+      case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+        return X86::R13B;
+      case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+        return X86::R14B;
+      case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+        return X86::R15B;
+      }
+    }
+  case MVT::i16:
+    switch (Reg) {
+    default: return Reg;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::AX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::DX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::CX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::BX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::SI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::DI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::BP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::SP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8W;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9W;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10W;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11W;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12W;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13W;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14W;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15W;
+    }
+  case MVT::i32:
+    switch (Reg) {
+    default: return Reg;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::EAX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::EDX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::ECX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::EBX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::ESI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::EDI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::EBP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::ESP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8D;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9D;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10D;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11D;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12D;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13D;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14D;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15D;
+    }
+  case MVT::i64:
+    switch (Reg) {
+    default: return Reg;
+    case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
+      return X86::RAX;
+    case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
+      return X86::RDX;
+    case X86::CH: case X86::CL: case X86::CX: case X86::ECX: case X86::RCX:
+      return X86::RCX;
+    case X86::BH: case X86::BL: case X86::BX: case X86::EBX: case X86::RBX:
+      return X86::RBX;
+    case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
+      return X86::RSI;
+    case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
+      return X86::RDI;
+    case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
+      return X86::RBP;
+    case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
+      return X86::RSP;
+    case X86::R8B: case X86::R8W: case X86::R8D: case X86::R8:
+      return X86::R8;
+    case X86::R9B: case X86::R9W: case X86::R9D: case X86::R9:
+      return X86::R9;
+    case X86::R10B: case X86::R10W: case X86::R10D: case X86::R10:
+      return X86::R10;
+    case X86::R11B: case X86::R11W: case X86::R11D: case X86::R11:
+      return X86::R11;
+    case X86::R12B: case X86::R12W: case X86::R12D: case X86::R12:
+      return X86::R12;
+    case X86::R13B: case X86::R13W: case X86::R13D: case X86::R13:
+      return X86::R13;
+    case X86::R14B: case X86::R14W: case X86::R14D: case X86::R14:
+      return X86::R14;
+    case X86::R15B: case X86::R15W: case X86::R15D: case X86::R15:
+      return X86::R15;
+    }
+  }
+
+  return Reg;
+}
+}
+
+#include "X86GenRegisterInfo.inc"
+
+namespace {
+  struct MSAH : public MachineFunctionPass {
+    static char ID;
+    MSAH() : MachineFunctionPass(ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      const X86TargetMachine *TM =
+        static_cast<const X86TargetMachine *>(&MF.getTarget());
+      const X86RegisterInfo *X86RI = TM->getRegisterInfo();
+      MachineRegisterInfo &RI = MF.getRegInfo();
+      X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
+      unsigned StackAlignment = X86RI->getStackAlignment();
+
+      // Be over-conservative: scan over all vreg defs and find whether vector
+      // registers are used. If yes, there is a possibility that vector register
+      // will be spilled and thus require dynamic stack realignment.
+      for (unsigned i = 0, e = RI.getNumVirtRegs(); i != e; ++i) {
+        unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+        if (RI.getRegClass(Reg)->getAlignment() > StackAlignment) {
+          FuncInfo->setReserveFP(true);
+          return true;
+        }
+      }
+      // Nothing to do
+      return false;
+    }
+
+    virtual const char *getPassName() const {
+      return "X86 Maximal Stack Alignment Check";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+
+  char MSAH::ID = 0;
+}
+
+FunctionPass*
+llvm::createX86MaxStackAlignmentHeuristicPass() { return new MSAH(); }
diff --git a/final/lib/Target/X86/X86RegisterInfo.h b/final/lib/Target/X86/X86RegisterInfo.h
new file mode 100644
index 00000000000..cccddfadd1f
--- /dev/null
+++ b/final/lib/Target/X86/X86RegisterInfo.h
@@ -0,0 +1,150 @@
+//===- X86RegisterInfo.h - X86 Register Information Impl --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the X86 implementation of the TargetRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86REGISTERINFO_H
+#define X86REGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "X86GenRegisterInfo.h.inc"
+
+namespace llvm {
+  class Type;
+  class TargetInstrInfo;
+  class X86TargetMachine;
+
+/// N86 namespace - Native X86 register numbers
+///
+namespace N86 {
+  enum {
+    EAX = 0, ECX = 1, EDX = 2, EBX = 3, ESP = 4, EBP = 5, ESI = 6, EDI = 7
+  };
+}
+
+/// DWARFFlavour - Flavour of dwarf regnumbers
+///
+namespace DWARFFlavour {
+  enum {
+    X86_64 = 0, X86_32_DarwinEH = 1, X86_32_Generic = 2
+  };
+} 
+  
+class X86RegisterInfo : public X86GenRegisterInfo {
+public:
+  X86TargetMachine &TM;
+  const TargetInstrInfo &TII;
+
+private:
+  /// Is64Bit - Is the target 64-bits.
+  ///
+  bool Is64Bit;
+
+  /// IsWin64 - Is the target on of win64 flavours
+  ///
+  bool IsWin64;
+
+  /// SlotSize - Stack slot size in bytes.
+  ///
+  unsigned SlotSize;
+
+  /// StackAlign - Default stack alignment.
+  ///
+  unsigned StackAlign;
+
+  /// StackPtr - X86 physical register used as stack ptr.
+  ///
+  unsigned StackPtr;
+
+  /// FramePtr - X86 physical register used as frame ptr.
+  ///
+  unsigned FramePtr;
+
+public:
+  X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
+
+  /// getX86RegNum - Returns the native X86 register number for the given LLVM
+  /// register identifier.
+  static unsigned getX86RegNum(unsigned RegNo);
+
+  unsigned getStackAlignment() const { return StackAlign; }
+
+  /// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum
+  /// (created by TableGen) for target dependencies.
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+  /// Code Generation virtual methods...
+  /// 
+
+  /// getMatchingSuperRegClass - Return a subclass of the specified register
+  /// class A so that each register in it has a sub-register of the
+  /// specified sub-register index which is in the specified register class B.
+  virtual const TargetRegisterClass *
+  getMatchingSuperRegClass(const TargetRegisterClass *A,
+                           const TargetRegisterClass *B, unsigned Idx) const;
+
+  /// getPointerRegClass - Returns a TargetRegisterClass used for pointer
+  /// values.
+  const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
+
+  /// getCrossCopyRegClass - Returns a legal register class to copy a register
+  /// in the specified class to or from. Returns NULL if it is possible to copy
+  /// between a two registers of the specified class.
+  const TargetRegisterClass *
+  getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+
+  unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+                               MachineFunction &MF) const;
+
+  /// getCalleeSavedRegs - Return a null-terminated list of all of the
+  /// callee-save registers on this target.
+  const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+  /// getReservedRegs - Returns a bitset indexed by physical register number
+  /// indicating if a register is a special register that has particular uses and
+  /// should be considered unavailable at all times, e.g. SP, RA. This is used by
+  /// register scavenger to determine what registers are free.
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+
+  bool canRealignStack(const MachineFunction &MF) const;
+
+  bool needsStackRealignment(const MachineFunction &MF) const;
+
+  bool hasReservedSpillSlot(const MachineFunction &MF, unsigned Reg,
+                            int &FrameIdx) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MI) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+  unsigned getStackRegister() const { return StackPtr; }
+  // FIXME: Move to FrameInfok
+  unsigned getSlotSize() const { return SlotSize; }
+
+  // Exception handling queries.
+  unsigned getEHExceptionRegister() const;
+  unsigned getEHHandlerRegister() const;
+};
+
+// getX86SubSuperRegister - X86 utility function. It returns the sub or super
+// register of a specific X86 register.
+// e.g. getX86SubSuperRegister(X86::EAX, EVT::i16) return X86:AX
+unsigned getX86SubSuperRegister(unsigned, EVT, bool High=false);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/X86/X86RegisterInfo.td b/final/lib/Target/X86/X86RegisterInfo.td
new file mode 100644
index 00000000000..612fac2f3be
--- /dev/null
+++ b/final/lib/Target/X86/X86RegisterInfo.td
@@ -0,0 +1,853 @@
+//===- X86RegisterInfo.td - Describe the X86 Register File --*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the X86 Register file, defining the registers themselves,
+// aliases between the registers, and the register classes built out of the
+// registers.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Register definitions...
+//
+let Namespace = "X86" in {
+
+  // Subregister indices.
+  def sub_8bit    : SubRegIndex;
+  def sub_8bit_hi : SubRegIndex;
+  def sub_16bit   : SubRegIndex;
+  def sub_32bit   : SubRegIndex;
+
+  def sub_ss  : SubRegIndex;
+  def sub_sd  : SubRegIndex;
+  def sub_xmm : SubRegIndex;
+
+
+  // In the register alias definitions below, we define which registers alias
+  // which others.  We only specify which registers the small registers alias,
+  // because the register file generator is smart enough to figure out that
+  // AL aliases AX if we tell it that AX aliased AL (for example).
+
+  // Dwarf numbering is different for 32-bit and 64-bit, and there are
+  // variations by target as well. Currently the first entry is for X86-64,
+  // second - for EH on X86-32/Darwin and third is 'generic' one (X86-32/Linux
+  // and debug information on X86-32/Darwin)
+
+  // 8-bit registers
+  // Low registers
+  def AL : Register<"al">, DwarfRegNum<[0, 0, 0]>;
+  def DL : Register<"dl">, DwarfRegNum<[1, 2, 2]>;
+  def CL : Register<"cl">, DwarfRegNum<[2, 1, 1]>;
+  def BL : Register<"bl">, DwarfRegNum<[3, 3, 3]>;
+
+  // X86-64 only
+  def SIL : Register<"sil">, DwarfRegNum<[4, 6, 6]>;
+  def DIL : Register<"dil">, DwarfRegNum<[5, 7, 7]>;
+  def BPL : Register<"bpl">, DwarfRegNum<[6, 4, 5]>;
+  def SPL : Register<"spl">, DwarfRegNum<[7, 5, 4]>;
+  def R8B  : Register<"r8b">,  DwarfRegNum<[8, -2, -2]>;
+  def R9B  : Register<"r9b">,  DwarfRegNum<[9, -2, -2]>;
+  def R10B : Register<"r10b">, DwarfRegNum<[10, -2, -2]>;
+  def R11B : Register<"r11b">, DwarfRegNum<[11, -2, -2]>;
+  def R12B : Register<"r12b">, DwarfRegNum<[12, -2, -2]>;
+  def R13B : Register<"r13b">, DwarfRegNum<[13, -2, -2]>;
+  def R14B : Register<"r14b">, DwarfRegNum<[14, -2, -2]>;
+  def R15B : Register<"r15b">, DwarfRegNum<[15, -2, -2]>;
+
+  // High registers. On x86-64, these cannot be used in any instruction
+  // with a REX prefix.
+  def AH : Register<"ah">, DwarfRegNum<[0, 0, 0]>;
+  def DH : Register<"dh">, DwarfRegNum<[1, 2, 2]>;
+  def CH : Register<"ch">, DwarfRegNum<[2, 1, 1]>;
+  def BH : Register<"bh">, DwarfRegNum<[3, 3, 3]>;
+
+  // 16-bit registers
+  let SubRegIndices = [sub_8bit, sub_8bit_hi] in {
+  def AX : RegisterWithSubRegs<"ax", [AL,AH]>, DwarfRegNum<[0, 0, 0]>;
+  def DX : RegisterWithSubRegs<"dx", [DL,DH]>, DwarfRegNum<[1, 2, 2]>;
+  def CX : RegisterWithSubRegs<"cx", [CL,CH]>, DwarfRegNum<[2, 1, 1]>;
+  def BX : RegisterWithSubRegs<"bx", [BL,BH]>, DwarfRegNum<[3, 3, 3]>;
+  }
+  let SubRegIndices = [sub_8bit] in {
+  def SI : RegisterWithSubRegs<"si", [SIL]>, DwarfRegNum<[4, 6, 6]>;
+  def DI : RegisterWithSubRegs<"di", [DIL]>, DwarfRegNum<[5, 7, 7]>;
+  def BP : RegisterWithSubRegs<"bp", [BPL]>, DwarfRegNum<[6, 4, 5]>;
+  def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>;
+  }
+  def IP : Register<"ip">, DwarfRegNum<[16]>;
+
+  // X86-64 only
+  let SubRegIndices = [sub_8bit] in {
+  def R8W  : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>;
+  def R9W  : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>;
+  def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>;
+  def R11W : RegisterWithSubRegs<"r11w", [R11B]>, DwarfRegNum<[11, -2, -2]>;
+  def R12W : RegisterWithSubRegs<"r12w", [R12B]>, DwarfRegNum<[12, -2, -2]>;
+  def R13W : RegisterWithSubRegs<"r13w", [R13B]>, DwarfRegNum<[13, -2, -2]>;
+  def R14W : RegisterWithSubRegs<"r14w", [R14B]>, DwarfRegNum<[14, -2, -2]>;
+  def R15W : RegisterWithSubRegs<"r15w", [R15B]>, DwarfRegNum<[15, -2, -2]>;
+  }
+  // 32-bit registers
+  let SubRegIndices = [sub_16bit] in {
+  def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[0, 0, 0]>;
+  def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[1, 2, 2]>;
+  def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[2, 1, 1]>;
+  def EBX : RegisterWithSubRegs<"ebx", [BX]>, DwarfRegNum<[3, 3, 3]>;
+  def ESI : RegisterWithSubRegs<"esi", [SI]>, DwarfRegNum<[4, 6, 6]>;
+  def EDI : RegisterWithSubRegs<"edi", [DI]>, DwarfRegNum<[5, 7, 7]>;
+  def EBP : RegisterWithSubRegs<"ebp", [BP]>, DwarfRegNum<[6, 4, 5]>;
+  def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>;
+  def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>;
+
+  // X86-64 only
+  def R8D  : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>;
+  def R9D  : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>;
+  def R10D : RegisterWithSubRegs<"r10d", [R10W]>, DwarfRegNum<[10, -2, -2]>;
+  def R11D : RegisterWithSubRegs<"r11d", [R11W]>, DwarfRegNum<[11, -2, -2]>;
+  def R12D : RegisterWithSubRegs<"r12d", [R12W]>, DwarfRegNum<[12, -2, -2]>;
+  def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>;
+  def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>;
+  def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>;
+  }
+
+  // 64-bit registers, X86-64 only
+  let SubRegIndices = [sub_32bit] in {
+  def RAX : RegisterWithSubRegs<"rax", [EAX]>, DwarfRegNum<[0, -2, -2]>;
+  def RDX : RegisterWithSubRegs<"rdx", [EDX]>, DwarfRegNum<[1, -2, -2]>;
+  def RCX : RegisterWithSubRegs<"rcx", [ECX]>, DwarfRegNum<[2, -2, -2]>;
+  def RBX : RegisterWithSubRegs<"rbx", [EBX]>, DwarfRegNum<[3, -2, -2]>;
+  def RSI : RegisterWithSubRegs<"rsi", [ESI]>, DwarfRegNum<[4, -2, -2]>;
+  def RDI : RegisterWithSubRegs<"rdi", [EDI]>, DwarfRegNum<[5, -2, -2]>;
+  def RBP : RegisterWithSubRegs<"rbp", [EBP]>, DwarfRegNum<[6, -2, -2]>;
+  def RSP : RegisterWithSubRegs<"rsp", [ESP]>, DwarfRegNum<[7, -2, -2]>;
+
+  def R8  : RegisterWithSubRegs<"r8", [R8D]>, DwarfRegNum<[8, -2, -2]>;
+  def R9  : RegisterWithSubRegs<"r9", [R9D]>, DwarfRegNum<[9, -2, -2]>;
+  def R10 : RegisterWithSubRegs<"r10", [R10D]>, DwarfRegNum<[10, -2, -2]>;
+  def R11 : RegisterWithSubRegs<"r11", [R11D]>, DwarfRegNum<[11, -2, -2]>;
+  def R12 : RegisterWithSubRegs<"r12", [R12D]>, DwarfRegNum<[12, -2, -2]>;
+  def R13 : RegisterWithSubRegs<"r13", [R13D]>, DwarfRegNum<[13, -2, -2]>;
+  def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>;
+  def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>;
+  def RIP : RegisterWithSubRegs<"rip", [EIP]>,  DwarfRegNum<[16, -2, -2]>;
+  }
+
+  // MMX Registers. These are actually aliased to ST0 .. ST7
+  def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>;
+  def MM1 : Register<"mm1">, DwarfRegNum<[42, 30, 30]>;
+  def MM2 : Register<"mm2">, DwarfRegNum<[43, 31, 31]>;
+  def MM3 : Register<"mm3">, DwarfRegNum<[44, 32, 32]>;
+  def MM4 : Register<"mm4">, DwarfRegNum<[45, 33, 33]>;
+  def MM5 : Register<"mm5">, DwarfRegNum<[46, 34, 34]>;
+  def MM6 : Register<"mm6">, DwarfRegNum<[47, 35, 35]>;
+  def MM7 : Register<"mm7">, DwarfRegNum<[48, 36, 36]>;
+
+  // Pseudo Floating Point registers
+  def FP0 : Register<"fp0">;
+  def FP1 : Register<"fp1">;
+  def FP2 : Register<"fp2">;
+  def FP3 : Register<"fp3">;
+  def FP4 : Register<"fp4">;
+  def FP5 : Register<"fp5">;
+  def FP6 : Register<"fp6">;
+
+  // XMM Registers, used by the various SSE instruction set extensions.
+  // The sub_ss and sub_sd subregs are the same registers with another regclass.
+  let CompositeIndices = [(sub_ss), (sub_sd)] in {
+  def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>;
+  def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>;
+  def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>;
+  def XMM3: Register<"xmm3">, DwarfRegNum<[20, 24, 24]>;
+  def XMM4: Register<"xmm4">, DwarfRegNum<[21, 25, 25]>;
+  def XMM5: Register<"xmm5">, DwarfRegNum<[22, 26, 26]>;
+  def XMM6: Register<"xmm6">, DwarfRegNum<[23, 27, 27]>;
+  def XMM7: Register<"xmm7">, DwarfRegNum<[24, 28, 28]>;
+
+  // X86-64 only
+  def XMM8:  Register<"xmm8">,  DwarfRegNum<[25, -2, -2]>;
+  def XMM9:  Register<"xmm9">,  DwarfRegNum<[26, -2, -2]>;
+  def XMM10: Register<"xmm10">, DwarfRegNum<[27, -2, -2]>;
+  def XMM11: Register<"xmm11">, DwarfRegNum<[28, -2, -2]>;
+  def XMM12: Register<"xmm12">, DwarfRegNum<[29, -2, -2]>;
+  def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
+  def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
+  def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
+  }
+
+  // YMM Registers, used by AVX instructions
+  let SubRegIndices = [sub_xmm] in {
+  def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>;
+  def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>;
+  def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>;
+  def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegNum<[20, 24, 24]>;
+  def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegNum<[21, 25, 25]>;
+  def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegNum<[22, 26, 26]>;
+  def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegNum<[23, 27, 27]>;
+  def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegNum<[24, 28, 28]>;
+  def YMM8:  RegisterWithSubRegs<"ymm8", [XMM8]>,  DwarfRegNum<[25, -2, -2]>;
+  def YMM9:  RegisterWithSubRegs<"ymm9", [XMM9]>,  DwarfRegNum<[26, -2, -2]>;
+  def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegNum<[27, -2, -2]>;
+  def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegNum<[28, -2, -2]>;
+  def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegNum<[29, -2, -2]>;
+  def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>;
+  def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>;
+  def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>;
+  }
+
+  // Floating point stack registers
+  def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
+  def ST1 : Register<"st(1)">, DwarfRegNum<[34, 13, 12]>;
+  def ST2 : Register<"st(2)">, DwarfRegNum<[35, 14, 13]>;
+  def ST3 : Register<"st(3)">, DwarfRegNum<[36, 15, 14]>;
+  def ST4 : Register<"st(4)">, DwarfRegNum<[37, 16, 15]>;
+  def ST5 : Register<"st(5)">, DwarfRegNum<[38, 17, 16]>;
+  def ST6 : Register<"st(6)">, DwarfRegNum<[39, 18, 17]>;
+  def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>;
+
+  // Status flags register
+  def EFLAGS : Register<"flags">;
+
+  // Segment registers
+  def CS : Register<"cs">;
+  def DS : Register<"ds">;
+  def SS : Register<"ss">;
+  def ES : Register<"es">;
+  def FS : Register<"fs">;
+  def GS : Register<"gs">;
+
+  // Debug registers
+  def DR0 : Register<"dr0">;
+  def DR1 : Register<"dr1">;
+  def DR2 : Register<"dr2">;
+  def DR3 : Register<"dr3">;
+  def DR4 : Register<"dr4">;
+  def DR5 : Register<"dr5">;
+  def DR6 : Register<"dr6">;
+  def DR7 : Register<"dr7">;
+
+  // Control registers
+  def CR0 : Register<"cr0">;
+  def CR1 : Register<"cr1">;
+  def CR2 : Register<"cr2">;
+  def CR3 : Register<"cr3">;
+  def CR4 : Register<"cr4">;
+  def CR5 : Register<"cr5">;
+  def CR6 : Register<"cr6">;
+  def CR7 : Register<"cr7">;
+  def CR8 : Register<"cr8">;
+  def CR9 : Register<"cr9">;
+  def CR10 : Register<"cr10">;
+  def CR11 : Register<"cr11">;
+  def CR12 : Register<"cr12">;
+  def CR13 : Register<"cr13">;
+  def CR14 : Register<"cr14">;
+  def CR15 : Register<"cr15">;
+
+  // Pseudo index registers
+  def EIZ : Register<"eiz">;
+  def RIZ : Register<"riz">;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Register Class Definitions... now that we have all of the pieces, define the
+// top-level register classes.  The order specified in the register list is
+// implicitly defined to be the register allocation order.
+//
+
+// List call-clobbered registers before callee-save registers. RBX, RBP, (and
+// R12, R13, R14, and R15 for X86-64) are callee-save registers.
+// In 64-mode, there are 12 additional i8 registers, SIL, DIL, BPL, SPL, and
+// R8B, ... R15B.
+// Allocate R12 and R13 last, as these require an extra byte when
+// encoded in x86_64 instructions.
+// FIXME: Allow AH, CH, DH, BH to be used as general-purpose registers in
+// 64-bit mode. The main complication is that they cannot be encoded in an
+// instruction requiring a REX prefix, while SIL, DIL, BPL, R8D, etc.
+// require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
+// cannot be encoded.
+def GR8 : RegisterClass<"X86", [i8],  8,
+                        [AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
+                         R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> {
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned X86_GR8_AO_64[] = {
+      X86::AL,   X86::CL,   X86::DL,   X86::SIL, X86::DIL,
+      X86::R8B,  X86::R9B,  X86::R10B, X86::R11B,
+      X86::BL,   X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL
+    };
+
+    GR8Class::iterator
+    GR8Class::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      if (Subtarget.is64Bit())
+        return X86_GR8_AO_64;
+      else
+        return begin();
+    }
+
+    GR8Class::iterator
+    GR8Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
+      // Does the function dedicate RBP / EBP to being a frame ptr?
+      if (!Subtarget.is64Bit())
+        // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
+        return begin() + 8;
+      else if (TFI->hasFP(MF) || MFI->getReserveFP())
+        // If so, don't allocate SPL or BPL.
+        return array_endof(X86_GR8_AO_64) - 1;
+      else
+        // If not, just don't allocate SPL.
+        return array_endof(X86_GR8_AO_64);
+    }
+  }];
+}
+
+def GR16 : RegisterClass<"X86", [i16], 16,
+                         [AX, CX, DX, SI, DI, BX, BP, SP,
+                          R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> {
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned X86_GR16_AO_64[] = {
+      X86::AX,  X86::CX,   X86::DX,   X86::SI,   X86::DI,
+      X86::R8W, X86::R9W,  X86::R10W, X86::R11W,
+      X86::BX, X86::R14W, X86::R15W,  X86::R12W, X86::R13W, X86::BP
+    };
+
+    GR16Class::iterator
+    GR16Class::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      if (Subtarget.is64Bit())
+        return X86_GR16_AO_64;
+      else
+        return begin();
+    }
+
+    GR16Class::iterator
+    GR16Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
+      if (Subtarget.is64Bit()) {
+        // Does the function dedicate RBP to being a frame ptr?
+        if (TFI->hasFP(MF) || MFI->getReserveFP())
+          // If so, don't allocate SP or BP.
+          return array_endof(X86_GR16_AO_64) - 1;
+        else
+          // If not, just don't allocate SP.
+          return array_endof(X86_GR16_AO_64);
+      } else {
+        // Does the function dedicate EBP to being a frame ptr?
+        if (TFI->hasFP(MF) || MFI->getReserveFP())
+          // If so, don't allocate SP or BP.
+          return begin() + 6;
+        else
+          // If not, just don't allocate SP.
+          return begin() + 7;
+      }
+    }
+  }];
+}
+
+def GR32 : RegisterClass<"X86", [i32], 32,
+                         [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
+                          R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned X86_GR32_AO_64[] = {
+      X86::EAX, X86::ECX,  X86::EDX,  X86::ESI,  X86::EDI,
+      X86::R8D, X86::R9D,  X86::R10D, X86::R11D,
+      X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP
+    };
+
+    GR32Class::iterator
+    GR32Class::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      if (Subtarget.is64Bit())
+        return X86_GR32_AO_64;
+      else
+        return begin();
+    }
+
+    GR32Class::iterator
+    GR32Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
+      if (Subtarget.is64Bit()) {
+        // Does the function dedicate RBP to being a frame ptr?
+        if (TFI->hasFP(MF) || MFI->getReserveFP())
+          // If so, don't allocate ESP or EBP.
+          return array_endof(X86_GR32_AO_64) - 1;
+        else
+          // If not, just don't allocate ESP.
+          return array_endof(X86_GR32_AO_64);
+      } else {
+        // Does the function dedicate EBP to being a frame ptr?
+        if (TFI->hasFP(MF) || MFI->getReserveFP())
+          // If so, don't allocate ESP or EBP.
+          return begin() + 6;
+        else
+          // If not, just don't allocate ESP.
+          return begin() + 7;
+      }
+    }
+  }];
+}
+
+// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
+// RIP isn't really a register and it can't be used anywhere except in an
+// address, but it doesn't cause trouble.
+def GR64 : RegisterClass<"X86", [i64], 64,
+                         [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+                          RBX, R14, R15, R12, R13, RBP, RSP, RIP]> {
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+                       (GR16 sub_16bit),
+                       (GR32 sub_32bit)];
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GR64Class::iterator
+    GR64Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
+      if (!Subtarget.is64Bit())
+        return begin();  // None of these are allocatable in 32-bit.
+      // Does the function dedicate RBP to being a frame ptr?
+      if (TFI->hasFP(MF) || MFI->getReserveFP())
+        return end()-3;  // If so, don't allocate RIP, RSP or RBP
+      else
+        return end()-2;  // If not, just don't allocate RIP or RSP
+    }
+  }];
+}
+
+// Segment registers for use by MOV instructions (and others) that have a
+//   segment register as one operand.  Always contain a 16-bit segment
+//   descriptor.
+def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]>;
+
+// Debug registers.
+def DEBUG_REG : RegisterClass<"X86", [i32], 32,
+                              [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]>;
+
+// Control registers.
+def CONTROL_REG : RegisterClass<"X86", [i64], 64,
+                                [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8,
+                                 CR9, CR10, CR11, CR12, CR13, CR14, CR15]>;
+
+// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
+// GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
+// registers. On x86-32, GR16_ABCD and GR32_ABCD are classes for registers
+// that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
+// and GR64_ABCD are classes for registers that support 8-bit h-register
+// operations.
+def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]>;
+def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]>;
+def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
+  let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)];
+}
+def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
+  let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+                       (GR8_ABCD_H sub_8bit_hi),
+                       (GR16_ABCD sub_16bit)];
+}
+def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
+  let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+                       (GR8_ABCD_H sub_8bit_hi),
+                       (GR16_ABCD sub_16bit),
+                       (GR32_ABCD sub_32bit)];
+}
+def GR32_TC   : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> {
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
+}
+def GR64_TC   : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
+                                                 R8, R9, R11]> {
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+                       (GR16 sub_16bit),
+                       (GR32_TC sub_32bit)];
+}
+
+def GR64_TCW64   : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX,
+                                                    R8, R9, R11]>;
+
+// GR8_NOREX - GR8 registers which do not require a REX prefix.
+def GR8_NOREX : RegisterClass<"X86", [i8], 8,
+                              [AL, CL, DL, AH, CH, DH, BL, BH]> {
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    // In 64-bit mode, it's not safe to blindly allocate H registers.
+    static const unsigned X86_GR8_NOREX_AO_64[] = {
+      X86::AL, X86::CL, X86::DL, X86::BL
+    };
+
+    GR8_NOREXClass::iterator
+    GR8_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      if (Subtarget.is64Bit())
+        return X86_GR8_NOREX_AO_64;
+      else
+        return begin();
+    }
+
+    GR8_NOREXClass::iterator
+    GR8_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      if (Subtarget.is64Bit())
+        return array_endof(X86_GR8_NOREX_AO_64);
+      else
+        return end();
+    }
+  }];
+}
+// GR16_NOREX - GR16 registers which do not require a REX prefix.
+def GR16_NOREX : RegisterClass<"X86", [i16], 16,
+                               [AX, CX, DX, SI, DI, BX, BP, SP]> {
+  let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)];
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GR16_NOREXClass::iterator
+    GR16_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
+      // Does the function dedicate RBP / EBP to being a frame ptr?
+      if (TFI->hasFP(MF) || MFI->getReserveFP())
+        // If so, don't allocate SP or BP.
+        return end() - 2;
+      else
+        // If not, just don't allocate SP.
+        return end() - 1;
+    }
+  }];
+}
+// GR32_NOREX - GR32 registers which do not require a REX prefix.
+def GR32_NOREX : RegisterClass<"X86", [i32], 32,
+                               [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
+  let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+                       (GR16_NOREX sub_16bit)];
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GR32_NOREXClass::iterator
+    GR32_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
+      // Does the function dedicate RBP / EBP to being a frame ptr?
+      if (TFI->hasFP(MF) || MFI->getReserveFP())
+        // If so, don't allocate ESP or EBP.
+        return end() - 2;
+      else
+        // If not, just don't allocate ESP.
+        return end() - 1;
+    }
+  }];
+}
+// GR64_NOREX - GR64 registers which do not require a REX prefix.
+def GR64_NOREX : RegisterClass<"X86", [i64], 64,
+                               [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> {
+  let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+                       (GR16_NOREX sub_16bit),
+                       (GR32_NOREX sub_32bit)];
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GR64_NOREXClass::iterator
+    GR64_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
+      // Does the function dedicate RBP to being a frame ptr?
+      if (TFI->hasFP(MF) || MFI->getReserveFP())
+        // If so, don't allocate RIP, RSP or RBP.
+        return end() - 3;
+      else
+        // If not, just don't allocate RIP or RSP.
+        return end() - 2;
+    }
+  }];
+}
+
+// GR32_NOSP - GR32 registers except ESP.
+def GR32_NOSP : RegisterClass<"X86", [i32], 32,
+                              [EAX, ECX, EDX, ESI, EDI, EBX, EBP,
+                               R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    static const unsigned X86_GR32_NOSP_AO_64[] = {
+      X86::EAX, X86::ECX,  X86::EDX,  X86::ESI,  X86::EDI,
+      X86::R8D, X86::R9D,  X86::R10D, X86::R11D,
+      X86::EBX, X86::R14D, X86::R15D, X86::R12D, X86::R13D, X86::EBP
+    };
+
+    GR32_NOSPClass::iterator
+    GR32_NOSPClass::allocation_order_begin(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      if (Subtarget.is64Bit())
+        return X86_GR32_NOSP_AO_64;
+      else
+        return begin();
+    }
+
+    GR32_NOSPClass::iterator
+    GR32_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
+      if (Subtarget.is64Bit()) {
+        // Does the function dedicate RBP to being a frame ptr?
+        if (TFI->hasFP(MF) || MFI->getReserveFP())
+          // If so, don't allocate EBP.
+          return array_endof(X86_GR32_NOSP_AO_64) - 1;
+        else
+          // If not, any reg in this class is ok.
+          return array_endof(X86_GR32_NOSP_AO_64);
+      } else {
+        // Does the function dedicate EBP to being a frame ptr?
+        if (TFI->hasFP(MF) || MFI->getReserveFP())
+          // If so, don't allocate EBP.
+          return begin() + 6;
+        else
+          // If not, any reg in this class is ok.
+          return begin() + 7;
+      }
+    }
+  }];
+}
+
+// GR64_NOSP - GR64 registers except RSP (and RIP).
+def GR64_NOSP : RegisterClass<"X86", [i64], 64,
+                              [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+                               RBX, R14, R15, R12, R13, RBP]> {
+  let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+                       (GR16 sub_16bit),
+                       (GR32_NOSP sub_32bit)];
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GR64_NOSPClass::iterator
+    GR64_NOSPClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
+      if (!Subtarget.is64Bit())
+        return begin();  // None of these are allocatable in 32-bit.
+      // Does the function dedicate RBP to being a frame ptr?
+      if (TFI->hasFP(MF) || MFI->getReserveFP())
+        return end()-1;  // If so, don't allocate RBP
+      else
+        return end();  // If not, any reg in this class is ok.
+    }
+  }];
+}
+
+// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
+def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
+                                    [RAX, RCX, RDX, RSI, RDI, RBX, RBP]> {
+  let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+                       (GR16_NOREX sub_16bit),
+                       (GR32_NOREX sub_32bit)];
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GR64_NOREX_NOSPClass::iterator
+    GR64_NOREX_NOSPClass::allocation_order_end(const MachineFunction &MF) const
+  {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
+      // Does the function dedicate RBP to being a frame ptr?
+      if (TFI->hasFP(MF) || MFI->getReserveFP())
+        // If so, don't allocate RBP.
+        return end() - 1;
+      else
+        // If not, any reg in this class is ok.
+        return end();
+    }
+  }];
+}
+
+// A class to support the 'A' assembler constraint: EAX then EDX.
+def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> {
+  let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+                       (GR8_ABCD_H sub_8bit_hi),
+                       (GR16_ABCD sub_16bit)];
+}
+
+// Scalar SSE2 floating point registers.
+def FR32 : RegisterClass<"X86", [f32], 32,
+                         [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+                          XMM8, XMM9, XMM10, XMM11,
+                          XMM12, XMM13, XMM14, XMM15]> {
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    FR32Class::iterator
+    FR32Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      if (!Subtarget.is64Bit())
+        return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
+      else
+        return end();
+    }
+  }];
+}
+
+def FR64 : RegisterClass<"X86", [f64], 64,
+                         [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+                          XMM8, XMM9, XMM10, XMM11,
+                          XMM12, XMM13, XMM14, XMM15]> {
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    FR64Class::iterator
+    FR64Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      if (!Subtarget.is64Bit())
+        return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
+      else
+        return end();
+    }
+  }];
+}
+
+
+// FIXME: This sets up the floating point register files as though they are f64
+// values, though they really are f80 values.  This will cause us to spill
+// values as 64-bit quantities instead of 80-bit quantities, which is much much
+// faster on common hardware.  In reality, this should be controlled by a
+// command line option or something.
+
+def RFP32 : RegisterClass<"X86",[f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+def RFP64 : RegisterClass<"X86",[f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+def RFP80 : RegisterClass<"X86",[f80], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+
+// Floating point stack registers (these are not allocatable by the
+// register allocator - the floating point stackifier is responsible
+// for transforming FPn allocations to STn registers)
+def RST : RegisterClass<"X86", [f80, f64, f32], 32,
+                        [ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7]> {
+    let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    RSTClass::iterator
+    RSTClass::allocation_order_end(const MachineFunction &MF) const {
+      return begin();
+    }
+  }];
+}
+
+// Generic vector registers: VR64 and VR128.
+def VR64: RegisterClass<"X86", [x86mmx], 64,
+                          [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
+def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
+                          [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+                           XMM8, XMM9, XMM10, XMM11,
+                           XMM12, XMM13, XMM14, XMM15]> {
+  let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
+
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    VR128Class::iterator
+    VR128Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      if (!Subtarget.is64Bit())
+        return end()-8; // Only XMM0 to XMM7 are available in 32-bit mode.
+      else
+        return end();
+    }
+  }];
+}
+
+def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256,
+                          [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+                           YMM8, YMM9, YMM10, YMM11,
+                           YMM12, YMM13, YMM14, YMM15]> {
+  let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
+
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    VR256Class::iterator
+    VR256Class::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+      if (!Subtarget.is64Bit())
+        return end()-8; // Only YMM0 to YMM7 are available in 32-bit mode.
+      else
+        return end();
+    }
+  }];
+}
+
+// Status flags registers.
+def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
+  let CopyCost = -1;  // Don't allow copying of status registers.
+
+  // EFLAGS is not allocatable.
+  let MethodProtos = [{
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    CCRClass::iterator
+    CCRClass::allocation_order_end(const MachineFunction &MF) const {
+      return allocation_order_begin(MF);
+    }
+  }];
+}
diff --git a/final/lib/Target/X86/X86Relocations.h b/final/lib/Target/X86/X86Relocations.h
new file mode 100644
index 00000000000..990962dc417
--- /dev/null
+++ b/final/lib/Target/X86/X86Relocations.h
@@ -0,0 +1,52 @@
+//===- X86Relocations.h - X86 Code Relocations ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 target-specific relocation types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86RELOCATIONS_H
+#define X86RELOCATIONS_H
+
+#include "llvm/CodeGen/MachineRelocation.h"
+
+namespace llvm {
+  namespace X86 {
+    /// RelocationType - An enum for the x86 relocation codes. Note that
+    /// the terminology here doesn't follow x86 convention - word means
+    /// 32-bit and dword means 64-bit. The relocations will be treated
+    /// by JIT or ObjectCode emitters, this is transparent to the x86 code 
+    /// emitter but JIT and ObjectCode will treat them differently
+    enum RelocationType {
+      /// reloc_pcrel_word - PC relative relocation, add the relocated value to
+      /// the value already in memory, after we adjust it for where the PC is.
+      reloc_pcrel_word = 0,
+
+      /// reloc_picrel_word - PIC base relative relocation, add the relocated
+      /// value to the value already in memory, after we adjust it for where the
+      /// PIC base is.
+      reloc_picrel_word = 1,
+
+      /// reloc_absolute_word - absolute relocation, just add the relocated
+      /// value to the value already in memory.
+      reloc_absolute_word = 2,
+
+      /// reloc_absolute_word_sext - absolute relocation, just add the relocated
+      /// value to the value already in memory. In object files, it represents a
+      /// value which must be sign-extended when resolving the relocation.
+      reloc_absolute_word_sext = 3,
+
+      /// reloc_absolute_dword - absolute relocation, just add the relocated
+      /// value to the value already in memory.
+      reloc_absolute_dword = 4
+    };
+  }
+}
+
+#endif
diff --git a/final/lib/Target/X86/X86SelectionDAGInfo.cpp b/final/lib/Target/X86/X86SelectionDAGInfo.cpp
new file mode 100644
index 00000000000..42e819343b5
--- /dev/null
+++ b/final/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -0,0 +1,259 @@
+//===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86SelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "x86-selectiondag-info"
+#include "X86TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+using namespace llvm;
+
+X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) :
+  TargetSelectionDAGInfo(TM),
+  Subtarget(&TM.getSubtarget<X86Subtarget>()),
+  TLI(*TM.getTargetLowering()) {
+}
+
+X86SelectionDAGInfo::~X86SelectionDAGInfo() {
+}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+                                             SDValue Chain,
+                                             SDValue Dst, SDValue Src,
+                                             SDValue Size, unsigned Align,
+                                             bool isVolatile,
+                                         MachinePointerInfo DstPtrInfo) const {
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+
+  // If to a segment-relative address space, use the default lowering.
+  if (DstPtrInfo.getAddrSpace() >= 256)
+    return SDValue();
+  
+  // If not DWORD aligned or size is more than the threshold, call the library.
+  // The libc version is likely to be faster for these cases. It can use the
+  // address value and run time information about the CPU.
+  if ((Align & 3) != 0 ||
+      !ConstantSize ||
+      ConstantSize->getZExtValue() >
+        Subtarget->getMaxInlineSizeThreshold()) {
+    SDValue InFlag(0, 0);
+
+    // Check to see if there is a specialized entry-point for memory zeroing.
+    ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+
+    if (const char *bzeroEntry =  V &&
+        V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+      EVT IntPtr = TLI.getPointerTy();
+      const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+      TargetLowering::ArgListTy Args;
+      TargetLowering::ArgListEntry Entry;
+      Entry.Node = Dst;
+      Entry.Ty = IntPtrTy;
+      Args.push_back(Entry);
+      Entry.Node = Size;
+      Args.push_back(Entry);
+      std::pair<SDValue,SDValue> CallResult =
+        TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
+                        false, false, false, false,
+                        0, CallingConv::C, false, /*isReturnValueUsed=*/false,
+                        DAG.getExternalSymbol(bzeroEntry, IntPtr), Args,
+                        DAG, dl);
+      return CallResult.second;
+    }
+
+    // Otherwise have the target-independent code call memset.
+    return SDValue();
+  }
+
+  uint64_t SizeVal = ConstantSize->getZExtValue();
+  SDValue InFlag(0, 0);
+  EVT AVT;
+  SDValue Count;
+  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
+  unsigned BytesLeft = 0;
+  bool TwoRepStos = false;
+  if (ValC) {
+    unsigned ValReg;
+    uint64_t Val = ValC->getZExtValue() & 255;
+
+    // If the value is a constant, then we can potentially use larger sets.
+    switch (Align & 3) {
+    case 2:   // WORD aligned
+      AVT = MVT::i16;
+      ValReg = X86::AX;
+      Val = (Val << 8) | Val;
+      break;
+    case 0:  // DWORD aligned
+      AVT = MVT::i32;
+      ValReg = X86::EAX;
+      Val = (Val << 8)  | Val;
+      Val = (Val << 16) | Val;
+      if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) {  // QWORD aligned
+        AVT = MVT::i64;
+        ValReg = X86::RAX;
+        Val = (Val << 32) | Val;
+      }
+      break;
+    default:  // Byte aligned
+      AVT = MVT::i8;
+      ValReg = X86::AL;
+      Count = DAG.getIntPtrConstant(SizeVal);
+      break;
+    }
+
+    if (AVT.bitsGT(MVT::i8)) {
+      unsigned UBytes = AVT.getSizeInBits() / 8;
+      Count = DAG.getIntPtrConstant(SizeVal / UBytes);
+      BytesLeft = SizeVal % UBytes;
+    }
+
+    Chain  = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
+                              InFlag);
+    InFlag = Chain.getValue(1);
+  } else {
+    AVT = MVT::i8;
+    Count  = DAG.getIntPtrConstant(SizeVal);
+    Chain  = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+                                                              X86::ECX,
+                            Count, InFlag);
+  InFlag = Chain.getValue(1);
+  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+                                                              X86::EDI,
+                            Dst, InFlag);
+  InFlag = Chain.getValue(1);
+
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+  Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+
+  if (TwoRepStos) {
+    InFlag = Chain.getValue(1);
+    Count  = Size;
+    EVT CVT = Count.getValueType();
+    SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
+                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
+    Chain  = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
+                                                             X86::ECX,
+                              Left, InFlag);
+    InFlag = Chain.getValue(1);
+    Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+    SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
+    Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+  } else if (BytesLeft) {
+    // Handle the last 1 - 7 bytes.
+    unsigned Offset = SizeVal - BytesLeft;
+    EVT AddrVT = Dst.getValueType();
+    EVT SizeVT = Size.getValueType();
+
+    Chain = DAG.getMemset(Chain, dl,
+                          DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
+                                      DAG.getConstant(Offset, AddrVT)),
+                          Src,
+                          DAG.getConstant(BytesLeft, SizeVT),
+                          Align, isVolatile, DstPtrInfo.getWithOffset(Offset));
+  }
+
+  // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
+  return Chain;
+}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                                        SDValue Chain, SDValue Dst, SDValue Src,
+                                        SDValue Size, unsigned Align,
+                                        bool isVolatile, bool AlwaysInline,
+                                         MachinePointerInfo DstPtrInfo,
+                                         MachinePointerInfo SrcPtrInfo) const {
+  // This requires the copy size to be a constant, preferrably
+  // within a subtarget-specific limit.
+  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+  if (!ConstantSize)
+    return SDValue();
+  uint64_t SizeVal = ConstantSize->getZExtValue();
+  if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+    return SDValue();
+
+  /// If not DWORD aligned, it is more efficient to call the library.  However
+  /// if calling the library is not allowed (AlwaysInline), then soldier on as
+  /// the code generated here is better than the long load-store sequence we
+  /// would otherwise get.
+  if (!AlwaysInline && (Align & 3) != 0)
+    return SDValue();
+
+  // If to a segment-relative address space, use the default lowering.
+  if (DstPtrInfo.getAddrSpace() >= 256 ||
+      SrcPtrInfo.getAddrSpace() >= 256)
+    return SDValue();
+
+  MVT AVT;
+  if (Align & 1)
+    AVT = MVT::i8;
+  else if (Align & 2)
+    AVT = MVT::i16;
+  else if (Align & 4)
+    // DWORD aligned
+    AVT = MVT::i32;
+  else
+    // QWORD aligned
+    AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+
+  unsigned UBytes = AVT.getSizeInBits() / 8;
+  unsigned CountVal = SizeVal / UBytes;
+  SDValue Count = DAG.getIntPtrConstant(CountVal);
+  unsigned BytesLeft = SizeVal % UBytes;
+
+  SDValue InFlag(0, 0);
+  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+                                                              X86::ECX,
+                            Count, InFlag);
+  InFlag = Chain.getValue(1);
+  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+                                                              X86::EDI,
+                            Dst, InFlag);
+  InFlag = Chain.getValue(1);
+  Chain  = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
+                                                              X86::ESI,
+                            Src, InFlag);
+  InFlag = Chain.getValue(1);
+
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+  SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
+                                array_lengthof(Ops));
+
+  SmallVector<SDValue, 4> Results;
+  Results.push_back(RepMovs);
+  if (BytesLeft) {
+    // Handle the last 1 - 7 bytes.
+    unsigned Offset = SizeVal - BytesLeft;
+    EVT DstVT = Dst.getValueType();
+    EVT SrcVT = Src.getValueType();
+    EVT SizeVT = Size.getValueType();
+    Results.push_back(DAG.getMemcpy(Chain, dl,
+                                    DAG.getNode(ISD::ADD, dl, DstVT, Dst,
+                                                DAG.getConstant(Offset, DstVT)),
+                                    DAG.getNode(ISD::ADD, dl, SrcVT, Src,
+                                                DAG.getConstant(Offset, SrcVT)),
+                                    DAG.getConstant(BytesLeft, SizeVT),
+                                    Align, isVolatile, AlwaysInline,
+                                    DstPtrInfo.getWithOffset(Offset),
+                                    SrcPtrInfo.getWithOffset(Offset)));
+  }
+
+  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                     &Results[0], Results.size());
+}
diff --git a/final/lib/Target/X86/X86SelectionDAGInfo.h b/final/lib/Target/X86/X86SelectionDAGInfo.h
new file mode 100644
index 00000000000..d1d66fe76e9
--- /dev/null
+++ b/final/lib/Target/X86/X86SelectionDAGInfo.h
@@ -0,0 +1,56 @@
+//===-- X86SelectionDAGInfo.h - X86 SelectionDAG Info -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86SELECTIONDAGINFO_H
+#define X86SELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class X86TargetLowering;
+class X86TargetMachine;
+class X86Subtarget;
+
+class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
+  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+  /// make the right decision when generating code for different targets.
+  const X86Subtarget *Subtarget;
+
+  const X86TargetLowering &TLI;
+
+public:
+  explicit X86SelectionDAGInfo(const X86TargetMachine &TM);
+  ~X86SelectionDAGInfo();
+
+  virtual
+  SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+                                  SDValue Chain,
+                                  SDValue Dst, SDValue Src,
+                                  SDValue Size, unsigned Align,
+                                  bool isVolatile,
+                                  MachinePointerInfo DstPtrInfo) const;
+
+  virtual
+  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+                                  SDValue Chain,
+                                  SDValue Dst, SDValue Src,
+                                  SDValue Size, unsigned Align,
+                                  bool isVolatile, bool AlwaysInline,
+                                  MachinePointerInfo DstPtrInfo,
+                                  MachinePointerInfo SrcPtrInfo) const;
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/X86/X86Subtarget.cpp b/final/lib/Target/X86/X86Subtarget.cpp
new file mode 100644
index 00000000000..1ee73123bbc
--- /dev/null
+++ b/final/lib/Target/X86/X86Subtarget.cpp
@@ -0,0 +1,376 @@
+//===-- X86Subtarget.cpp - X86 Subtarget Information ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the X86 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "subtarget"
+#include "X86Subtarget.h"
+#include "X86InstrInfo.h"
+#include "X86GenSubtarget.inc"
+#include "llvm/GlobalValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+#if defined(_MSC_VER)
+#include <intrin.h>
+#endif
+
+/// ClassifyBlockAddressReference - Classify a blockaddress reference for the
+/// current subtarget according to how we should reference it in a non-pcrel
+/// context.
+unsigned char X86Subtarget::
+ClassifyBlockAddressReference() const {
+  if (isPICStyleGOT())    // 32-bit ELF targets.
+    return X86II::MO_GOTOFF;
+  
+  if (isPICStyleStubPIC())   // Darwin/32 in PIC mode.
+    return X86II::MO_PIC_BASE_OFFSET;
+  
+  // Direct static reference to label.
+  return X86II::MO_NO_FLAG;
+}
+
+/// ClassifyGlobalReference - Classify a global variable reference for the
+/// current subtarget according to how we should reference it in a non-pcrel
+/// context.
+unsigned char X86Subtarget::
+ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
+  // DLLImport only exists on windows, it is implemented as a load from a
+  // DLLIMPORT stub.
+  if (GV->hasDLLImportLinkage())
+    return X86II::MO_DLLIMPORT;
+
+  // Determine whether this is a reference to a definition or a declaration.
+  // Materializable GVs (in JIT lazy compilation mode) do not require an extra
+  // load from stub.
+  bool isDecl = GV->hasAvailableExternallyLinkage();
+  if (GV->isDeclaration() && !GV->isMaterializable())
+    isDecl = true;
+
+  // X86-64 in PIC mode.
+  if (isPICStyleRIPRel()) {
+    // Large model never uses stubs.
+    if (TM.getCodeModel() == CodeModel::Large)
+      return X86II::MO_NO_FLAG;
+      
+    if (isTargetDarwin()) {
+      // If symbol visibility is hidden, the extra load is not needed if
+      // target is x86-64 or the symbol is definitely defined in the current
+      // translation unit.
+      if (GV->hasDefaultVisibility() &&
+          (isDecl || GV->isWeakForLinker()))
+        return X86II::MO_GOTPCREL;
+    } else if (!isTargetWin64()) {
+      assert(isTargetELF() && "Unknown rip-relative target");
+
+      // Extra load is needed for all externally visible.
+      if (!GV->hasLocalLinkage() && GV->hasDefaultVisibility())
+        return X86II::MO_GOTPCREL;
+    }
+
+    return X86II::MO_NO_FLAG;
+  }
+  
+  if (isPICStyleGOT()) {   // 32-bit ELF targets.
+    // Extra load is needed for all externally visible.
+    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+      return X86II::MO_GOTOFF;
+    return X86II::MO_GOT;
+  }
+  
+  if (isPICStyleStubPIC()) {  // Darwin/32 in PIC mode.
+    // Determine whether we have a stub reference and/or whether the reference
+    // is relative to the PIC base or not.
+    
+    // If this is a strong reference to a definition, it is definitely not
+    // through a stub.
+    if (!isDecl && !GV->isWeakForLinker())
+      return X86II::MO_PIC_BASE_OFFSET;
+
+    // Unless we have a symbol with hidden visibility, we have to go through a
+    // normal $non_lazy_ptr stub because this symbol might be resolved late.
+    if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
+      return X86II::MO_DARWIN_NONLAZY_PIC_BASE;
+    
+    // If symbol visibility is hidden, we have a stub for common symbol
+    // references and external declarations.
+    if (isDecl || GV->hasCommonLinkage()) {
+      // Hidden $non_lazy_ptr reference.
+      return X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE;
+    }
+    
+    // Otherwise, no stub.
+    return X86II::MO_PIC_BASE_OFFSET;
+  }
+  
+  if (isPICStyleStubNoDynamic()) {  // Darwin/32 in -mdynamic-no-pic mode.
+    // Determine whether we have a stub reference.
+    
+    // If this is a strong reference to a definition, it is definitely not
+    // through a stub.
+    if (!isDecl && !GV->isWeakForLinker())
+      return X86II::MO_NO_FLAG;
+    
+    // Unless we have a symbol with hidden visibility, we have to go through a
+    // normal $non_lazy_ptr stub because this symbol might be resolved late.
+    if (!GV->hasHiddenVisibility())  // Non-hidden $non_lazy_ptr reference.
+      return X86II::MO_DARWIN_NONLAZY;
+
+    // Otherwise, no stub.
+    return X86II::MO_NO_FLAG;
+  }
+  
+  // Direct static reference to global.
+  return X86II::MO_NO_FLAG;
+}
+
+
+/// getBZeroEntry - This function returns the name of a function which has an
+/// interface like the non-standard bzero function, if such a function exists on
+/// the current subtarget and it is considered prefereable over memset with zero
+/// passed as the second argument. Otherwise it returns null.
+const char *X86Subtarget::getBZeroEntry() const {
+  // Darwin 10 has a __bzero entry point for this purpose.
+  if (getDarwinVers() >= 10)
+    return "__bzero";
+
+  return 0;
+}
+
+/// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
+/// to immediate address.
+bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
+  if (Is64Bit)
+    return false;
+  return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
+}
+
+/// getSpecialAddressLatency - For targets where it is beneficial to
+/// backschedule instructions that compute addresses, return a value
+/// indicating the number of scheduling cycles of backscheduling that
+/// should be attempted.
+unsigned X86Subtarget::getSpecialAddressLatency() const {
+  // For x86 out-of-order targets, back-schedule address computations so
+  // that loads and stores aren't blocked.
+  // This value was chosen arbitrarily.
+  return 200;
+}
+
+/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments.  If we can't run cpuid on the host, return true.
+static bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+                            unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+  #if defined(__GNUC__)
+    // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+    asm ("movq\t%%rbx, %%rsi\n\t"
+         "cpuid\n\t"
+         "xchgq\t%%rbx, %%rsi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value));
+    return false;
+  #elif defined(_MSC_VER)
+    int registers[4];
+    __cpuid(registers, value);
+    *rEAX = registers[0];
+    *rEBX = registers[1];
+    *rECX = registers[2];
+    *rEDX = registers[3];
+    return false;
+  #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+  #if defined(__GNUC__)
+    asm ("movl\t%%ebx, %%esi\n\t"
+         "cpuid\n\t"
+         "xchgl\t%%ebx, %%esi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value));
+    return false;
+  #elif defined(_MSC_VER)
+    __asm {
+      mov   eax,value
+      cpuid
+      mov   esi,rEAX
+      mov   dword ptr [esi],eax
+      mov   esi,rEBX
+      mov   dword ptr [esi],ebx
+      mov   esi,rECX
+      mov   dword ptr [esi],ecx
+      mov   esi,rEDX
+      mov   dword ptr [esi],edx
+    }
+    return false;
+  #endif
+#endif
+  return true;
+}
+
+static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
+  Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+  Model  = (EAX >> 4) & 0xf; // Bits 4 - 7
+  if (Family == 6 || Family == 0xf) {
+    if (Family == 0xf)
+      // Examine extended family ID if family ID is F.
+      Family += (EAX >> 20) & 0xff;    // Bits 20 - 27
+    // Examine extended model ID if family ID is 6 or F.
+    Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+  }
+}
+
+void X86Subtarget::AutoDetectSubtargetFeatures() {
+  unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+  union {
+    unsigned u[3];
+    char     c[12];
+  } text;
+  
+  if (GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
+    return;
+
+  GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
+  
+  if ((EDX >> 15) & 1) HasCMov = true;
+  if ((EDX >> 23) & 1) X86SSELevel = MMX;
+  if ((EDX >> 25) & 1) X86SSELevel = SSE1;
+  if ((EDX >> 26) & 1) X86SSELevel = SSE2;
+  if (ECX & 0x1)       X86SSELevel = SSE3;
+  if ((ECX >> 9)  & 1) X86SSELevel = SSSE3;
+  if ((ECX >> 19) & 1) X86SSELevel = SSE41;
+  if ((ECX >> 20) & 1) X86SSELevel = SSE42;
+  // FIXME: AVX codegen support is not ready.
+  //if ((ECX >> 28) & 1) { HasAVX = true; X86SSELevel = NoMMXSSE; }
+
+  bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
+  bool IsAMD   = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
+
+  HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
+  HasFMA3  = IsIntel && ((ECX >> 12) & 0x1);
+  HasAES   = IsIntel && ((ECX >> 25) & 0x1);
+
+  if (IsIntel || IsAMD) {
+    // Determine if bit test memory instructions are slow.
+    unsigned Family = 0;
+    unsigned Model  = 0;
+    DetectFamilyModel(EAX, Family, Model);
+    IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
+    // If it's Nehalem, unaligned memory access is fast.
+    if (Family == 15 && Model == 26)
+      IsUAMemFast = true;
+
+    GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+    HasX86_64 = (EDX >> 29) & 0x1;
+    HasSSE4A = IsAMD && ((ECX >> 6) & 0x1);
+    HasFMA4 = IsAMD && ((ECX >> 16) & 0x1);
+  }
+}
+
+X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, 
+                           bool is64Bit)
+  : PICStyle(PICStyles::None)
+  , X86SSELevel(NoMMXSSE)
+  , X863DNowLevel(NoThreeDNow)
+  , HasCMov(false)
+  , HasX86_64(false)
+  , HasPOPCNT(false)
+  , HasSSE4A(false)
+  , HasAVX(false)
+  , HasAES(false)
+  , HasCLMUL(false)
+  , HasFMA3(false)
+  , HasFMA4(false)
+  , IsBTMemSlow(false)
+  , IsUAMemFast(false)
+  , HasVectorUAMem(false)
+  , stackAlignment(8)
+  // FIXME: this is a known good value for Yonah. How about others?
+  , MaxInlineSizeThreshold(128)
+  , TargetTriple(TT)
+  , Is64Bit(is64Bit) {
+
+  // default to hard float ABI
+  if (FloatABIType == FloatABI::Default)
+    FloatABIType = FloatABI::Hard;
+    
+  // Determine default and user specified characteristics
+  if (!FS.empty()) {
+    // If feature string is not empty, parse features string.
+    std::string CPU = sys::getHostCPUName();
+    ParseSubtargetFeatures(FS, CPU);
+    // All X86-64 CPUs also have SSE2, however user might request no SSE via 
+    // -mattr, so don't force SSELevel here.
+    if (HasAVX)
+      X86SSELevel = NoMMXSSE;
+  } else {
+    // Otherwise, use CPUID to auto-detect feature set.
+    AutoDetectSubtargetFeatures();
+    // Make sure SSE2 is enabled; it is available on all X86-64 CPUs.
+    if (Is64Bit && !HasAVX && X86SSELevel < SSE2)
+      X86SSELevel = SSE2;
+  }
+
+  // If requesting codegen for X86-64, make sure that 64-bit features
+  // are enabled.
+  if (Is64Bit) {
+    HasX86_64 = true;
+
+    // All 64-bit cpus have cmov support.
+    HasCMov = true;
+  }
+    
+  DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
+               << ", 3DNowLevel " << X863DNowLevel
+               << ", 64bit " << HasX86_64 << "\n");
+  assert((!Is64Bit || HasX86_64) &&
+         "64-bit code requested on a subtarget that doesn't support it!");
+
+  // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
+  // 32 and 64 bit) and for all 64-bit targets.
+  if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() ||
+      isTargetSolaris() || Is64Bit)
+    stackAlignment = 16;
+
+  if (StackAlignment)
+    stackAlignment = StackAlignment;
+}
+
+/// IsCalleePop - Determines whether the callee is required to pop its
+/// own arguments. Callee pop is necessary to support tail calls.
+bool X86Subtarget::IsCalleePop(bool IsVarArg,
+                               CallingConv::ID CallingConv) const {
+  if (IsVarArg)
+    return false;
+
+  switch (CallingConv) {
+  default:
+    return false;
+  case CallingConv::X86_StdCall:
+    return !is64Bit();
+  case CallingConv::X86_FastCall:
+    return !is64Bit();
+  case CallingConv::X86_ThisCall:
+    return !is64Bit();
+  case CallingConv::Fast:
+    return GuaranteedTailCallOpt;
+  case CallingConv::GHC:
+    return GuaranteedTailCallOpt;
+  }
+}
diff --git a/final/lib/Target/X86/X86Subtarget.h b/final/lib/Target/X86/X86Subtarget.h
new file mode 100644
index 00000000000..0a62a029554
--- /dev/null
+++ b/final/lib/Target/X86/X86Subtarget.h
@@ -0,0 +1,259 @@
+//=====---- X86Subtarget.h - Define Subtarget for the X86 -----*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the X86 specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86SUBTARGET_H
+#define X86SUBTARGET_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/CallingConv.h"
+#include <string>
+
+namespace llvm {
+class GlobalValue;
+class TargetMachine;
+
+/// PICStyles - The X86 backend supports a number of different styles of PIC.
+///
+namespace PICStyles {
+enum Style {
+  StubPIC,          // Used on i386-darwin in -fPIC mode.
+  StubDynamicNoPIC, // Used on i386-darwin in -mdynamic-no-pic mode.
+  GOT,              // Used on many 32-bit unices in -fPIC mode.
+  RIPRel,           // Used on X86-64 when not in -static mode.
+  None              // Set when in -static mode (not PIC or DynamicNoPIC mode).
+};
+}
+
+class X86Subtarget : public TargetSubtarget {
+protected:
+  enum X86SSEEnum {
+    NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42
+  };
+
+  enum X863DNowEnum {
+    NoThreeDNow, ThreeDNow, ThreeDNowA
+  };
+
+  /// PICStyle - Which PIC style to use
+  ///
+  PICStyles::Style PICStyle;
+
+  /// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or
+  /// none supported.
+  X86SSEEnum X86SSELevel;
+
+  /// X863DNowLevel - 3DNow or 3DNow Athlon, or none supported.
+  ///
+  X863DNowEnum X863DNowLevel;
+
+  /// HasCMov - True if this processor has conditional move instructions
+  /// (generally pentium pro+).
+  bool HasCMov;
+
+  /// HasX86_64 - True if the processor supports X86-64 instructions.
+  ///
+  bool HasX86_64;
+
+  /// HasPOPCNT - True if the processor supports POPCNT.
+  bool HasPOPCNT;
+
+  /// HasSSE4A - True if the processor supports SSE4A instructions.
+  bool HasSSE4A;
+
+  /// HasAVX - Target has AVX instructions
+  bool HasAVX;
+
+  /// HasAES - Target has AES instructions
+  bool HasAES;
+
+  /// HasCLMUL - Target has carry-less multiplication
+  bool HasCLMUL;
+
+  /// HasFMA3 - Target has 3-operand fused multiply-add
+  bool HasFMA3;
+
+  /// HasFMA4 - Target has 4-operand fused multiply-add
+  bool HasFMA4;
+
+  /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
+  bool IsBTMemSlow;
+
+  /// IsUAMemFast - True if unaligned memory access is fast.
+  bool IsUAMemFast;
+
+  /// HasVectorUAMem - True if SIMD operations can have unaligned memory
+  /// operands. This may require setting a feature bit in the processor.
+  bool HasVectorUAMem;
+
+  /// stackAlignment - The minimum alignment known to hold of the stack frame on
+  /// entry to the function and which must be maintained by every function.
+  unsigned stackAlignment;
+
+  /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
+  ///
+  unsigned MaxInlineSizeThreshold;
+
+  /// TargetTriple - What processor and OS we're targeting.
+  Triple TargetTriple;
+
+private:
+  /// Is64Bit - True if the processor supports 64-bit instructions and
+  /// pointer size is 64 bit.
+  bool Is64Bit;
+
+public:
+
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  X86Subtarget(const std::string &TT, const std::string &FS, bool is64Bit);
+
+  /// getStackAlignment - Returns the minimum alignment known to hold of the
+  /// stack frame on entry to the function and which must be maintained by every
+  /// function for this subtarget.
+  unsigned getStackAlignment() const { return stackAlignment; }
+
+  /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
+  /// that still makes it profitable to inline the call.
+  unsigned getMaxInlineSizeThreshold() const { return MaxInlineSizeThreshold; }
+
+  /// ParseSubtargetFeatures - Parses features string setting specified
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  std::string ParseSubtargetFeatures(const std::string &FS,
+                                     const std::string &CPU);
+
+  /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID
+  /// instruction.
+  void AutoDetectSubtargetFeatures();
+
+  bool is64Bit() const { return Is64Bit; }
+
+  PICStyles::Style getPICStyle() const { return PICStyle; }
+  void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
+
+  bool hasCMov() const { return HasCMov; }
+  bool hasMMX() const { return X86SSELevel >= MMX; }
+  bool hasSSE1() const { return X86SSELevel >= SSE1; }
+  bool hasSSE2() const { return X86SSELevel >= SSE2; }
+  bool hasSSE3() const { return X86SSELevel >= SSE3; }
+  bool hasSSSE3() const { return X86SSELevel >= SSSE3; }
+  bool hasSSE41() const { return X86SSELevel >= SSE41; }
+  bool hasSSE42() const { return X86SSELevel >= SSE42; }
+  bool hasSSE4A() const { return HasSSE4A; }
+  bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
+  bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+  bool hasPOPCNT() const { return HasPOPCNT; }
+  bool hasAVX() const { return HasAVX; }
+  bool hasXMM() const { return hasSSE1() || hasAVX(); }
+  bool hasXMMInt() const { return hasSSE2() || hasAVX(); }
+  bool hasAES() const { return HasAES; }
+  bool hasCLMUL() const { return HasCLMUL; }
+  bool hasFMA3() const { return HasFMA3; }
+  bool hasFMA4() const { return HasFMA4; }
+  bool isBTMemSlow() const { return IsBTMemSlow; }
+  bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
+  bool hasVectorUAMem() const { return HasVectorUAMem; }
+
+  bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
+  bool isTargetFreeBSD() const { return TargetTriple.getOS() == Triple::FreeBSD; }
+  bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; }
+
+  // ELF is a reasonably sane default and the only other X86 targets we
+  // support are Darwin and Windows. Just use "not those".
+  bool isTargetELF() const {
+    return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing();
+  }
+  bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
+
+  bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
+  bool isTargetMingw() const { return TargetTriple.getOS() == Triple::MinGW32; }
+  bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; }
+  bool isTargetCygMing() const {
+    return isTargetMingw() || isTargetCygwin();
+  }
+
+  /// isTargetCOFF - Return true if this is any COFF/Windows target variant.
+  bool isTargetCOFF() const {
+    return isTargetMingw() || isTargetCygwin() || isTargetWindows();
+  }
+
+  bool isTargetWin64() const {
+    return Is64Bit && (isTargetMingw() || isTargetWindows());
+  }
+
+  bool isTargetEnvMacho() const {
+    return isTargetDarwin() || (TargetTriple.getEnvironment() == Triple::MachO);
+  }
+
+  bool isTargetWin32() const {
+    return !Is64Bit && (isTargetMingw() || isTargetWindows());
+  }
+
+  bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
+  bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
+  bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
+
+  bool isPICStyleStubPIC() const {
+    return PICStyle == PICStyles::StubPIC;
+  }
+
+  bool isPICStyleStubNoDynamic() const {
+    return PICStyle == PICStyles::StubDynamicNoPIC;
+  }
+  bool isPICStyleStubAny() const {
+    return PICStyle == PICStyles::StubDynamicNoPIC ||
+           PICStyle == PICStyles::StubPIC; }
+
+  /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard,
+  /// 10 = Snow Leopard, etc.
+  unsigned getDarwinVers() const {
+    if (isTargetDarwin()) return TargetTriple.getDarwinMajorNumber();
+    return 0;
+  }
+
+  /// ClassifyGlobalReference - Classify a global variable reference for the
+  /// current subtarget according to how we should reference it in a non-pcrel
+  /// context.
+  unsigned char ClassifyGlobalReference(const GlobalValue *GV,
+                                        const TargetMachine &TM)const;
+
+  /// ClassifyBlockAddressReference - Classify a blockaddress reference for the
+  /// current subtarget according to how we should reference it in a non-pcrel
+  /// context.
+  unsigned char ClassifyBlockAddressReference() const;
+
+  /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
+  /// to immediate address.
+  bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const;
+
+  /// This function returns the name of a function which has an interface
+  /// like the non-standard bzero function, if such a function exists on
+  /// the current subtarget and it is considered prefereable over
+  /// memset with zero passed as the second argument. Otherwise it
+  /// returns null.
+  const char *getBZeroEntry() const;
+
+  /// getSpecialAddressLatency - For targets where it is beneficial to
+  /// backschedule instructions that compute addresses, return a value
+  /// indicating the number of scheduling cycles of backscheduling that
+  /// should be attempted.
+  unsigned getSpecialAddressLatency() const;
+
+  /// IsCalleePop - Test whether a function should pop its own arguments.
+  bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/X86/X86TargetMachine.cpp b/final/lib/Target/X86/X86TargetMachine.cpp
new file mode 100644
index 00000000000..8fb94703daa
--- /dev/null
+++ b/final/lib/Target/X86/X86TargetMachine.cpp
@@ -0,0 +1,265 @@
+//===-- X86TargetMachine.cpp - Define TargetMachine for the X86 -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the X86 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCAsmInfo.h"
+#include "X86TargetMachine.h"
+#include "X86.h"
+#include "llvm/PassManager.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+  switch (TheTriple.getOS()) {
+  case Triple::Darwin:
+    return new X86MCAsmInfoDarwin(TheTriple);
+  case Triple::MinGW32:
+  case Triple::Cygwin:
+  case Triple::Win32:
+    if (TheTriple.getEnvironment() == Triple::MachO)
+      return new X86MCAsmInfoDarwin(TheTriple);
+    else
+      return new X86MCAsmInfoCOFF(TheTriple);
+  default:
+    return new X86ELFMCAsmInfo(TheTriple);
+  }
+}
+
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+                                    MCContext &Ctx, TargetAsmBackend &TAB,
+                                    raw_ostream &_OS,
+                                    MCCodeEmitter *_Emitter,
+                                    bool RelaxAll,
+                                    bool NoExecStack) {
+  Triple TheTriple(TT);
+  switch (TheTriple.getOS()) {
+  case Triple::Darwin:
+    return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+  case Triple::MinGW32:
+  case Triple::Cygwin:
+  case Triple::Win32:
+    if (TheTriple.getEnvironment() == Triple::MachO)
+      return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+    else
+      return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
+  default:
+    return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
+  }
+}
+
+extern "C" void LLVMInitializeX86Target() {
+  // Register the target.
+  RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
+  RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
+
+  // Register the target asm info.
+  RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo);
+  RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo);
+
+  // Register the code emitter.
+  TargetRegistry::RegisterCodeEmitter(TheX86_32Target,
+                                      createX86_32MCCodeEmitter);
+  TargetRegistry::RegisterCodeEmitter(TheX86_64Target,
+                                      createX86_64MCCodeEmitter);
+
+  // Register the asm backend.
+  TargetRegistry::RegisterAsmBackend(TheX86_32Target,
+                                     createX86_32AsmBackend);
+  TargetRegistry::RegisterAsmBackend(TheX86_64Target,
+                                     createX86_64AsmBackend);
+
+  // Register the object streamer.
+  TargetRegistry::RegisterObjectStreamer(TheX86_32Target,
+                                         createMCStreamer);
+  TargetRegistry::RegisterObjectStreamer(TheX86_64Target,
+                                         createMCStreamer);
+}
+
+
+X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
+                                         const std::string &FS)
+  : X86TargetMachine(T, TT, FS, false),
+    DataLayout(getSubtargetImpl()->isTargetDarwin() ?
+               "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" :
+               (getSubtargetImpl()->isTargetCygMing() ||
+                getSubtargetImpl()->isTargetWindows()) ?
+               "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-n8:16:32" :
+               "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-n8:16:32"),
+    InstrInfo(*this),
+    TSInfo(*this),
+    TLInfo(*this),
+    JITInfo(*this) {
+}
+
+
+X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
+                                         const std::string &FS)
+  : X86TargetMachine(T, TT, FS, true),
+    DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"),
+    InstrInfo(*this),
+    TSInfo(*this),
+    TLInfo(*this),
+    JITInfo(*this) {
+}
+
+/// X86TargetMachine ctor - Create an X86 target.
+///
+X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &FS, bool is64Bit)
+  : LLVMTargetMachine(T, TT),
+    Subtarget(TT, FS, is64Bit),
+    FrameLowering(*this, Subtarget),
+    ELFWriterInfo(is64Bit, true) {
+  DefRelocModel = getRelocationModel();
+
+  // If no relocation model was picked, default as appropriate for the target.
+  if (getRelocationModel() == Reloc::Default) {
+    // Darwin defaults to PIC in 64 bit mode and dynamic-no-pic in 32 bit mode.
+    // Win64 requires rip-rel addressing, thus we force it to PIC. Otherwise we
+    // use static relocation model by default.
+    if (Subtarget.isTargetDarwin()) {
+      if (Subtarget.is64Bit())
+        setRelocationModel(Reloc::PIC_);
+      else
+        setRelocationModel(Reloc::DynamicNoPIC);
+    } else if (Subtarget.isTargetWin64())
+      setRelocationModel(Reloc::PIC_);
+    else
+      setRelocationModel(Reloc::Static);
+  }
+
+  assert(getRelocationModel() != Reloc::Default &&
+         "Relocation mode not picked");
+
+  // ELF and X86-64 don't have a distinct DynamicNoPIC model.  DynamicNoPIC
+  // is defined as a model for code which may be used in static or dynamic
+  // executables but not necessarily a shared library. On X86-32 we just
+  // compile in -static mode, in x86-64 we use PIC.
+  if (getRelocationModel() == Reloc::DynamicNoPIC) {
+    if (is64Bit)
+      setRelocationModel(Reloc::PIC_);
+    else if (!Subtarget.isTargetDarwin())
+      setRelocationModel(Reloc::Static);
+  }
+
+  // If we are on Darwin, disallow static relocation model in X86-64 mode, since
+  // the Mach-O file format doesn't support it.
+  if (getRelocationModel() == Reloc::Static &&
+      Subtarget.isTargetDarwin() &&
+      is64Bit)
+    setRelocationModel(Reloc::PIC_);
+
+  // Determine the PICStyle based on the target selected.
+  if (getRelocationModel() == Reloc::Static) {
+    // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None.
+    Subtarget.setPICStyle(PICStyles::None);
+  } else if (Subtarget.is64Bit()) {
+    // PIC in 64 bit mode is always rip-rel.
+    Subtarget.setPICStyle(PICStyles::RIPRel);
+  } else if (Subtarget.isTargetCygMing()) {
+    Subtarget.setPICStyle(PICStyles::None);
+  } else if (Subtarget.isTargetDarwin()) {
+    if (getRelocationModel() == Reloc::PIC_)
+      Subtarget.setPICStyle(PICStyles::StubPIC);
+    else {
+      assert(getRelocationModel() == Reloc::DynamicNoPIC);
+      Subtarget.setPICStyle(PICStyles::StubDynamicNoPIC);
+    }
+  } else if (Subtarget.isTargetELF()) {
+    Subtarget.setPICStyle(PICStyles::GOT);
+  }
+
+  // Finally, if we have "none" as our PIC style, force to static mode.
+  if (Subtarget.getPICStyle() == PICStyles::None)
+    setRelocationModel(Reloc::Static);
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Pipeline Configuration
+//===----------------------------------------------------------------------===//
+
+bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
+  // Install an instruction selector.
+  PM.add(createX86ISelDag(*this, OptLevel));
+
+  // For 32-bit, prepend instructions to set the "global base reg" for PIC.
+  if (!Subtarget.is64Bit())
+    PM.add(createGlobalBaseRegPass());
+
+  return false;
+}
+
+bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel) {
+  PM.add(createX86MaxStackAlignmentHeuristicPass());
+  return false;  // -print-machineinstr shouldn't print after this.
+}
+
+bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
+                                       CodeGenOpt::Level OptLevel) {
+  PM.add(createX86FloatingPointStackifierPass());
+  return true;  // -print-machineinstr should print after this.
+}
+
+bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel) {
+  if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
+    PM.add(createSSEDomainFixPass());
+    return true;
+  }
+  return false;
+}
+
+bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
+                                      CodeGenOpt::Level OptLevel,
+                                      JITCodeEmitter &JCE) {
+  // FIXME: Move this to TargetJITInfo!
+  // On Darwin, do not override 64-bit setting made in X86TargetMachine().
+  if (DefRelocModel == Reloc::Default &&
+      (!Subtarget.isTargetDarwin() || !Subtarget.is64Bit())) {
+    setRelocationModel(Reloc::Static);
+    Subtarget.setPICStyle(PICStyles::None);
+  }
+
+
+  PM.add(createX86JITCodeEmitterPass(*this, JCE));
+
+  return false;
+}
+
+void X86TargetMachine::setCodeModelForStatic() {
+
+    if (getCodeModel() != CodeModel::Default) return;
+
+    // For static codegen, if we're not already set, use Small codegen.
+    setCodeModel(CodeModel::Small);
+}
+
+
+void X86TargetMachine::setCodeModelForJIT() {
+
+  if (getCodeModel() != CodeModel::Default) return;
+
+  // 64-bit JIT places everything in the same buffer except external functions.
+  if (Subtarget.is64Bit())
+    setCodeModel(CodeModel::Large);
+  else
+    setCodeModel(CodeModel::Small);
+}
diff --git a/final/lib/Target/X86/X86TargetMachine.h b/final/lib/Target/X86/X86TargetMachine.h
new file mode 100644
index 00000000000..597392251e6
--- /dev/null
+++ b/final/lib/Target/X86/X86TargetMachine.h
@@ -0,0 +1,134 @@
+//===-- X86TargetMachine.h - Define TargetMachine for the X86 ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the X86 specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86TARGETMACHINE_H
+#define X86TARGETMACHINE_H
+
+#include "X86.h"
+#include "X86ELFWriterInfo.h"
+#include "X86InstrInfo.h"
+#include "X86ISelLowering.h"
+#include "X86FrameLowering.h"
+#include "X86JITInfo.h"
+#include "X86SelectionDAGInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+
+namespace llvm {
+  
+class formatted_raw_ostream;
+
+class X86TargetMachine : public LLVMTargetMachine {
+  X86Subtarget      Subtarget;
+  X86FrameLowering  FrameLowering;
+  X86ELFWriterInfo  ELFWriterInfo;
+  Reloc::Model      DefRelocModel; // Reloc model before it's overridden.
+
+private:
+  // We have specific defaults for X86.
+  virtual void setCodeModelForJIT();
+  virtual void setCodeModelForStatic();
+  
+public:
+  X86TargetMachine(const Target &T, const std::string &TT, 
+                   const std::string &FS, bool is64Bit);
+
+  virtual const X86InstrInfo     *getInstrInfo() const {
+    llvm_unreachable("getInstrInfo not implemented");
+  }
+  virtual const TargetFrameLowering  *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual       X86JITInfo       *getJITInfo()         {
+    llvm_unreachable("getJITInfo not implemented");
+  }
+  virtual const X86Subtarget     *getSubtargetImpl() const{ return &Subtarget; }
+  virtual const X86TargetLowering *getTargetLowering() const {
+    llvm_unreachable("getTargetLowering not implemented");
+  }
+  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { 
+    llvm_unreachable("getSelectionDAGInfo not implemented");
+  }
+  virtual const X86RegisterInfo  *getRegisterInfo() const {
+    return &getInstrInfo()->getRegisterInfo();
+  }
+  virtual const X86ELFWriterInfo *getELFWriterInfo() const {
+    return Subtarget.isTargetELF() ? &ELFWriterInfo : 0;
+  }
+
+  // Set up the pass pipeline.
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+  virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
+                              JITCodeEmitter &JCE);
+};
+
+/// X86_32TargetMachine - X86 32-bit target machine.
+///
+class X86_32TargetMachine : public X86TargetMachine {
+  const TargetData  DataLayout; // Calculates type size & alignment
+  X86InstrInfo      InstrInfo;
+  X86SelectionDAGInfo TSInfo;
+  X86TargetLowering TLInfo;
+  X86JITInfo        JITInfo;
+public:
+  X86_32TargetMachine(const Target &T, const std::string &M,
+                      const std::string &FS);
+  virtual const TargetData *getTargetData() const { return &DataLayout; }
+  virtual const X86TargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { 
+    return &TSInfo;
+  }
+  virtual const X86InstrInfo     *getInstrInfo() const {
+    return &InstrInfo;
+  }
+  virtual       X86JITInfo       *getJITInfo()         {
+    return &JITInfo;
+  }
+};
+
+/// X86_64TargetMachine - X86 64-bit target machine.
+///
+class X86_64TargetMachine : public X86TargetMachine {
+  const TargetData  DataLayout; // Calculates type size & alignment
+  X86InstrInfo      InstrInfo;
+  X86SelectionDAGInfo TSInfo;
+  X86TargetLowering TLInfo;
+  X86JITInfo        JITInfo;
+public:
+  X86_64TargetMachine(const Target &T, const std::string &TT,
+                      const std::string &FS);
+  virtual const TargetData *getTargetData() const { return &DataLayout; }
+  virtual const X86TargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+  virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const { 
+    return &TSInfo;
+  }
+  virtual const X86InstrInfo     *getInstrInfo() const {
+    return &InstrInfo;
+  }
+  virtual       X86JITInfo       *getJITInfo()         {
+    return &JITInfo;
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/X86/X86TargetObjectFile.cpp b/final/lib/Target/X86/X86TargetObjectFile.cpp
new file mode 100644
index 00000000000..c15dfbb1c8e
--- /dev/null
+++ b/final/lib/Target/X86/X86TargetObjectFile.cpp
@@ -0,0 +1,118 @@
+//===-- llvm/Target/X86/X86TargetObjectFile.cpp - X86 Object Info ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86TargetObjectFile.h"
+#include "X86TargetMachine.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Dwarf.h"
+using namespace llvm;
+using namespace dwarf;
+
+const MCExpr *X8664_MachoTargetObjectFile::
+getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                               MachineModuleInfo *MMI, unsigned Encoding,
+                               MCStreamer &Streamer) const {
+
+  // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
+  // is an indirect pc-relative reference.
+  if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
+    const MCSymbol *Sym = Mang->getSymbol(GV);
+    const MCExpr *Res =
+      MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
+    const MCExpr *Four = MCConstantExpr::Create(4, getContext());
+    return MCBinaryExpr::CreateAdd(Res, Four, getContext());
+  }
+
+  return TargetLoweringObjectFileMachO::
+    getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
+}
+
+unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  else
+    return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  else
+    return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getFDEEncoding() const {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  else
+    return DW_EH_PE_absptr;
+}
+
+unsigned X8632_ELFTargetObjectFile::getTTypeEncoding() const {
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+  else
+    return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getPersonalityEncoding() const {
+  CodeModel::Model Model = TM.getCodeModel();
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small ||
+                                                 Model == CodeModel::Medium ?
+                                            DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+  if (Model == CodeModel::Small || Model == CodeModel::Medium)
+    return DW_EH_PE_udata4;
+
+  return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const {
+  CodeModel::Model Model = TM.getCodeModel();
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | (Model == CodeModel::Small ?
+                             DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+  if (Model == CodeModel::Small)
+    return DW_EH_PE_udata4;
+
+  return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getFDEEncoding() const {
+  CodeModel::Model Model = TM.getCodeModel();
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_pcrel | (Model == CodeModel::Small ||
+                             Model == CodeModel::Medium ?
+                             DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+  if (Model == CodeModel::Small || Model == CodeModel::Medium)
+    return DW_EH_PE_udata4;
+
+  return DW_EH_PE_absptr;
+}
+
+unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const {
+  CodeModel::Model Model = TM.getCodeModel();
+  if (TM.getRelocationModel() == Reloc::PIC_)
+    return DW_EH_PE_indirect | DW_EH_PE_pcrel | (Model == CodeModel::Small ||
+                                                 Model == CodeModel::Medium ?
+                                            DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+
+  if (Model == CodeModel::Small)
+    return DW_EH_PE_udata4;
+
+  return DW_EH_PE_absptr;
+}
diff --git a/final/lib/Target/X86/X86TargetObjectFile.h b/final/lib/Target/X86/X86TargetObjectFile.h
new file mode 100644
index 00000000000..f2fd49caca3
--- /dev/null
+++ b/final/lib/Target/X86/X86TargetObjectFile.h
@@ -0,0 +1,54 @@
+//===-- llvm/Target/X86/X86TargetObjectFile.h - X86 Object Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_X86_TARGETOBJECTFILE_H
+#define LLVM_TARGET_X86_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+
+namespace llvm {
+  class X86TargetMachine;
+
+  /// X8664_MachoTargetObjectFile - This TLOF implementation is used for Darwin
+  /// x86-64.
+  class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
+  public:
+    virtual const MCExpr *
+    getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
+                                   MachineModuleInfo *MMI, unsigned Encoding,
+                                   MCStreamer &Streamer) const;
+  };
+
+  class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
+    const X86TargetMachine &TM;
+  public:
+    X8632_ELFTargetObjectFile(const X86TargetMachine &tm)
+      :TM(tm) { }
+    virtual unsigned getPersonalityEncoding() const;
+    virtual unsigned getLSDAEncoding() const;
+    virtual unsigned getFDEEncoding() const;
+    virtual unsigned getTTypeEncoding() const;
+  };
+
+  class X8664_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
+    const X86TargetMachine &TM;
+  public:
+    X8664_ELFTargetObjectFile(const X86TargetMachine &tm)
+      :TM(tm) { }
+    virtual unsigned getPersonalityEncoding() const;
+    virtual unsigned getLSDAEncoding() const;
+    virtual unsigned getFDEEncoding() const;
+    virtual unsigned getTTypeEncoding() const;
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/XCore/CMakeLists.txt b/final/lib/Target/XCore/CMakeLists.txt
new file mode 100644
index 00000000000..9093de69158
--- /dev/null
+++ b/final/lib/Target/XCore/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(LLVM_TARGET_DEFINITIONS XCore.td)
+
+tablegen(XCoreGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(XCoreGenRegisterNames.inc -gen-register-enums)
+tablegen(XCoreGenRegisterInfo.inc -gen-register-desc)
+tablegen(XCoreGenInstrNames.inc -gen-instr-enums)
+tablegen(XCoreGenInstrInfo.inc -gen-instr-desc)
+tablegen(XCoreGenAsmWriter.inc -gen-asm-writer)
+tablegen(XCoreGenDAGISel.inc -gen-dag-isel)
+tablegen(XCoreGenCallingConv.inc -gen-callingconv)
+tablegen(XCoreGenSubtarget.inc -gen-subtarget)
+
+add_llvm_target(XCoreCodeGen
+  XCoreAsmPrinter.cpp
+  XCoreFrameLowering.cpp
+  XCoreInstrInfo.cpp
+  XCoreISelDAGToDAG.cpp
+  XCoreISelLowering.cpp
+  XCoreMCAsmInfo.cpp
+  XCoreRegisterInfo.cpp
+  XCoreSubtarget.cpp
+  XCoreTargetMachine.cpp
+  XCoreTargetObjectFile.cpp
+  XCoreSelectionDAGInfo.cpp
+  )
+
+add_subdirectory(TargetInfo)
diff --git a/final/lib/Target/XCore/Makefile b/final/lib/Target/XCore/Makefile
new file mode 100644
index 00000000000..6c1ef886031
--- /dev/null
+++ b/final/lib/Target/XCore/Makefile
@@ -0,0 +1,24 @@
+##===- lib/Target/XCore/Makefile ---------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMXCoreCodeGen
+TARGET = XCore
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = XCoreGenRegisterInfo.h.inc XCoreGenRegisterNames.inc \
+                XCoreGenRegisterInfo.inc XCoreGenInstrNames.inc \
+                XCoreGenInstrInfo.inc XCoreGenAsmWriter.inc \
+                XCoreGenDAGISel.inc XCoreGenCallingConv.inc \
+		XCoreGenSubtarget.inc
+
+DIRS = TargetInfo
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Target/XCore/README.txt b/final/lib/Target/XCore/README.txt
new file mode 100644
index 00000000000..b69205b49b6
--- /dev/null
+++ b/final/lib/Target/XCore/README.txt
@@ -0,0 +1,7 @@
+To-do
+-----
+
+* Instruction encodings
+* Tailcalls
+* Investigate loop alignment
+* Add builtins
diff --git a/final/lib/Target/XCore/TargetInfo/CMakeLists.txt b/final/lib/Target/XCore/TargetInfo/CMakeLists.txt
new file mode 100644
index 00000000000..c147b8a66bc
--- /dev/null
+++ b/final/lib/Target/XCore/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMXCoreInfo
+  XCoreTargetInfo.cpp
+  )
+
+add_dependencies(LLVMXCoreInfo XCoreCodeGenTable_gen)
diff --git a/final/lib/Target/XCore/TargetInfo/Makefile b/final/lib/Target/XCore/TargetInfo/Makefile
new file mode 100644
index 00000000000..f8a40951749
--- /dev/null
+++ b/final/lib/Target/XCore/TargetInfo/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/XCore/TargetInfo/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMXCoreInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/final/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
new file mode 100644
index 00000000000..7aa8965c4ac
--- /dev/null
+++ b/final/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- XCoreTargetInfo.cpp - XCore Target Implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheXCoreTarget;
+
+extern "C" void LLVMInitializeXCoreTargetInfo() { 
+  RegisterTarget<Triple::xcore> X(TheXCoreTarget, "xcore", "XCore");
+}
diff --git a/final/lib/Target/XCore/XCore.h b/final/lib/Target/XCore/XCore.h
new file mode 100644
index 00000000000..8937fbe123c
--- /dev/null
+++ b/final/lib/Target/XCore/XCore.h
@@ -0,0 +1,41 @@
+//===-- XCore.h - Top-level interface for XCore representation --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// XCore back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_XCORE_H
+#define TARGET_XCORE_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class FunctionPass;
+  class TargetMachine;
+  class XCoreTargetMachine;
+  class formatted_raw_ostream;
+
+  FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM);
+
+  extern Target TheXCoreTarget;
+
+} // end namespace llvm;
+
+// Defines symbolic names for XCore registers.  This defines a mapping from
+// register name to register number.
+//
+#include "XCoreGenRegisterNames.inc"
+
+// Defines symbolic names for the XCore instructions.
+//
+#include "XCoreGenInstrNames.inc"
+
+#endif
diff --git a/final/lib/Target/XCore/XCore.td b/final/lib/Target/XCore/XCore.td
new file mode 100644
index 00000000000..38401895e63
--- /dev/null
+++ b/final/lib/Target/XCore/XCore.td
@@ -0,0 +1,46 @@
+//===- XCore.td - Describe the XCore Target Machine --------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces which we are implementing
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Descriptions
+//===----------------------------------------------------------------------===//
+
+include "XCoreRegisterInfo.td"
+include "XCoreInstrInfo.td"
+include "XCoreCallingConv.td"
+
+def XCoreInstrInfo : InstrInfo;
+
+//===----------------------------------------------------------------------===//
+// XCore processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic",      []>;
+def : Proc<"xs1b-generic", []>;
+
+//===----------------------------------------------------------------------===//
+// Declare the target which we are implementing
+//===----------------------------------------------------------------------===//
+
+def XCore : Target {
+  // Pull in Instruction Info:
+  let InstructionSet = XCoreInstrInfo;
+}
diff --git a/final/lib/Target/XCore/XCoreAsmPrinter.cpp b/final/lib/Target/XCore/XCoreAsmPrinter.cpp
new file mode 100644
index 00000000000..8f06dd32662
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -0,0 +1,280 @@
+//===-- XCoreAsmPrinter.cpp - XCore LLVM assembly writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to the XAS-format XCore assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "XCore.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreSubtarget.h"
+#include "XCoreMCAsmInfo.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+static cl::opt<unsigned> MaxThreads("xcore-max-threads", cl::Optional,
+  cl::desc("Maximum number of threads (for emulation thread-local storage)"),
+  cl::Hidden,
+  cl::value_desc("number"),
+  cl::init(8));
+
+namespace {
+  class XCoreAsmPrinter : public AsmPrinter {
+    const XCoreSubtarget &Subtarget;
+  public:
+    explicit XCoreAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
+      : AsmPrinter(TM, Streamer), Subtarget(TM.getSubtarget<XCoreSubtarget>()){}
+
+    virtual const char *getPassName() const {
+      return "XCore Assembly Printer";
+    }
+
+    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    void printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
+                       const std::string &directive = ".jmptable");
+    void printInlineJT32(const MachineInstr *MI, int opNum, raw_ostream &O) {
+      printInlineJT(MI, opNum, O, ".jmptable32");
+    }
+    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                         unsigned AsmVariant, const char *ExtraCode,
+                         raw_ostream &O);
+
+    void emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV);
+    virtual void EmitGlobalVariable(const GlobalVariable *GV);
+
+    void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd.
+    static const char *getRegisterName(unsigned RegNo);
+
+    void EmitFunctionEntryLabel();
+    void EmitInstruction(const MachineInstr *MI);
+    void EmitFunctionBodyEnd();
+  };
+} // end of anonymous namespace
+
+#include "XCoreGenAsmWriter.inc"
+
+void XCoreAsmPrinter::emitArrayBound(MCSymbol *Sym, const GlobalVariable *GV) {
+  assert(((GV->hasExternalLinkage() ||
+    GV->hasWeakLinkage()) ||
+    GV->hasLinkOnceLinkage()) && "Unexpected linkage");
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(
+    cast<PointerType>(GV->getType())->getElementType())) {
+    OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
+    // FIXME: MCStreamerize.
+    OutStreamer.EmitRawText(StringRef(".globound"));
+    OutStreamer.EmitRawText("\t.set\t" + Twine(Sym->getName()));
+    OutStreamer.EmitRawText(".globound," + Twine(ATy->getNumElements()));
+    if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage()) {
+      // TODO Use COMDAT groups for LinkOnceLinkage
+      OutStreamer.EmitRawText(MAI->getWeakDefDirective() +Twine(Sym->getName())+
+                              ".globound");
+    }
+  }
+}
+
+void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+  // Check to see if this is a special global used by LLVM, if so, emit it.
+  if (!GV->hasInitializer() ||
+      EmitSpecialLLVMGlobal(GV))
+    return;
+
+  const TargetData *TD = TM.getTargetData();
+  OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GV, Mang,TM));
+
+  
+  MCSymbol *GVSym = Mang->getSymbol(GV);
+  Constant *C = GV->getInitializer();
+  unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
+  
+  // Mark the start of the global
+  OutStreamer.EmitRawText("\t.cc_top " + Twine(GVSym->getName()) + ".data," +
+                          GVSym->getName());
+
+  switch (GV->getLinkage()) {
+  case GlobalValue::AppendingLinkage:
+    report_fatal_error("AppendingLinkage is not supported by this target!");
+  case GlobalValue::LinkOnceAnyLinkage:
+  case GlobalValue::LinkOnceODRLinkage:
+  case GlobalValue::WeakAnyLinkage:
+  case GlobalValue::WeakODRLinkage:
+  case GlobalValue::ExternalLinkage:
+    emitArrayBound(GVSym, GV);
+    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
+
+    // TODO Use COMDAT groups for LinkOnceLinkage
+    if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage())
+      OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
+    // FALL THROUGH
+  case GlobalValue::InternalLinkage:
+  case GlobalValue::PrivateLinkage:
+    break;
+  case GlobalValue::DLLImportLinkage:
+    llvm_unreachable("DLLImport linkage is not supported by this target!");
+  case GlobalValue::DLLExportLinkage:
+    llvm_unreachable("DLLExport linkage is not supported by this target!");
+  default:
+    llvm_unreachable("Unknown linkage type!");
+  }
+
+  EmitAlignment(Align > 2 ? Align : 2, GV);
+  
+  unsigned Size = TD->getTypeAllocSize(C->getType());
+  if (GV->isThreadLocal()) {
+    Size *= MaxThreads;
+  }
+  if (MAI->hasDotTypeDotSizeDirective()) {
+    OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
+    OutStreamer.EmitRawText("\t.size " + Twine(GVSym->getName()) + "," +
+                            Twine(Size));
+  }
+  OutStreamer.EmitLabel(GVSym);
+  
+  EmitGlobalConstant(C);
+  if (GV->isThreadLocal()) {
+    for (unsigned i = 1; i < MaxThreads; ++i)
+      EmitGlobalConstant(C);
+  }
+  // The ABI requires that unsigned scalar types smaller than 32 bits
+  // are padded to 32 bits.
+  if (Size < 4)
+    OutStreamer.EmitZeros(4 - Size, 0);
+  
+  // Mark the end of the global
+  OutStreamer.EmitRawText("\t.cc_bottom " + Twine(GVSym->getName()) + ".data");
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void XCoreAsmPrinter::EmitFunctionBodyEnd() {
+  // Emit function end directives
+  OutStreamer.EmitRawText("\t.cc_bottom " + Twine(CurrentFnSym->getName()) +
+                          ".function");
+}
+
+void XCoreAsmPrinter::EmitFunctionEntryLabel() {
+  // Mark the start of the function
+  OutStreamer.EmitRawText("\t.cc_top " + Twine(CurrentFnSym->getName()) +
+                          ".function," + CurrentFnSym->getName());
+  OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum,
+                                      raw_ostream &O) {
+  printOperand(MI, opNum, O);
+  
+  if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0)
+    return;
+  
+  O << "+";
+  printOperand(MI, opNum+1, O);
+}
+
+void XCoreAsmPrinter::
+printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
+              const std::string &directive) {
+  unsigned JTI = MI->getOperand(opNum).getIndex();
+  const MachineFunction *MF = MI->getParent()->getParent();
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+  O << "\t" << directive << " ";
+  for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+    MachineBasicBlock *MBB = JTBBs[i];
+    if (i > 0)
+      O << ",";
+    O << *MBB->getSymbol();
+  }
+}
+
+void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
+                                   raw_ostream &O) {
+  const MachineOperand &MO = MI->getOperand(opNum);
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << getRegisterName(MO.getReg());
+    break;
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    O << *Mang->getSymbol(MO.getGlobal());
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    O << MO.getSymbolName();
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+      << '_' << MO.getIndex();
+    break;
+  case MachineOperand::MO_BlockAddress:
+    O << *GetBlockAddressSymbol(MO.getBlockAddress());
+    break;
+  default:
+    llvm_unreachable("not implemented");
+  }
+}
+
+/// PrintAsmOperand - Print out an operand for an inline asm expression.
+///
+bool XCoreAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                                      unsigned AsmVariant,const char *ExtraCode,
+                                      raw_ostream &O) {
+  printOperand(MI, OpNo, O);
+  return false;
+}
+
+void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  SmallString<128> Str;
+  raw_svector_ostream O(Str);
+
+  // Check for mov mnemonic
+  if (MI->getOpcode() == XCore::ADD_2rus && !MI->getOperand(2).getImm())
+    O << "\tmov " << getRegisterName(MI->getOperand(0).getReg()) << ", "
+      << getRegisterName(MI->getOperand(1).getReg());
+  else
+    printInstruction(MI, O);
+  OutStreamer.EmitRawText(O.str());
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreAsmPrinter() { 
+  RegisterAsmPrinter<XCoreAsmPrinter> X(TheXCoreTarget);
+}
diff --git a/final/lib/Target/XCore/XCoreCallingConv.td b/final/lib/Target/XCore/XCoreCallingConv.td
new file mode 100644
index 00000000000..b20d71f49cf
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreCallingConv.td
@@ -0,0 +1,36 @@
+//===- XCoreCallingConv.td - Calling Conventions for XCore -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for XCore architecture.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XCore Return Value Calling Convention
+//===----------------------------------------------------------------------===//
+def RetCC_XCore : CallingConv<[
+  // i32 are returned in registers R0, R1, R2, R3
+  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// XCore Argument Calling Conventions
+//===----------------------------------------------------------------------===//
+def CC_XCore : CallingConv<[
+  // Promote i8/i16 arguments to i32.
+  CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+  // The 'nest' parameter, if any, is passed in R11.
+  CCIfNest<CCAssignToReg<[R11]>>,
+
+  // The first 4 integer arguments are passed in integer registers.
+  CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+
+  // Integer values get stored in stack slots that are 4 bytes in
+  // size and 4-byte aligned.
+  CCIfType<[i32], CCAssignToStack<4, 4>>
+]>;
diff --git a/final/lib/Target/XCore/XCoreFrameLowering.cpp b/final/lib/Target/XCore/XCoreFrameLowering.cpp
new file mode 100644
index 00000000000..057822074e5
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -0,0 +1,387 @@
+//===-- XCoreFrameLowering.cpp - Frame info for XCore Target -----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreFrameLowering.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+// helper functions. FIXME: Eliminate.
+static inline bool isImmUs(unsigned val) {
+  return val <= 11;
+}
+
+static inline bool isImmU6(unsigned val) {
+  return val < (1 << 6);
+}
+
+static inline bool isImmU16(unsigned val) {
+  return val < (1 << 16);
+}
+
+static void loadFromStack(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator I,
+                          unsigned DstReg, int Offset, DebugLoc dl,
+                          const TargetInstrInfo &TII) {
+  assert(Offset%4 == 0 && "Misaligned stack offset");
+  Offset/=4;
+  bool isU6 = isImmU6(Offset);
+  if (!isU6 && !isImmU16(Offset))
+    report_fatal_error("loadFromStack offset too big " + Twine(Offset));
+  int Opcode = isU6 ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
+  BuildMI(MBB, I, dl, TII.get(Opcode), DstReg)
+    .addImm(Offset);
+}
+
+
+static void storeToStack(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator I,
+                         unsigned SrcReg, int Offset, DebugLoc dl,
+                         const TargetInstrInfo &TII) {
+  assert(Offset%4 == 0 && "Misaligned stack offset");
+  Offset/=4;
+  bool isU6 = isImmU6(Offset);
+  if (!isU6 && !isImmU16(Offset))
+    report_fatal_error("storeToStack offset too big " + Twine(Offset));
+  int Opcode = isU6 ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
+  BuildMI(MBB, I, dl, TII.get(Opcode))
+    .addReg(SrcReg)
+    .addImm(Offset);
+}
+
+
+//===----------------------------------------------------------------------===//
+// XCoreFrameLowering:
+//===----------------------------------------------------------------------===//
+
+XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti)
+  : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0),
+    STI(sti) {
+  // Do nothing
+}
+
+bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const {
+  return DisableFramePointerElim(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
+void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const {
+  MachineBasicBlock &MBB = MF.front();   // Prolog goes in entry BB
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineModuleInfo *MMI = &MF.getMMI();
+  const XCoreRegisterInfo *RegInfo =
+    static_cast<const XCoreRegisterInfo*>(MF.getTarget().getRegisterInfo());
+  const XCoreInstrInfo &TII =
+    *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+  XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+  DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
+
+  bool FP = hasFP(MF);
+  bool Nested = MF.getFunction()->getAttributes().hasAttrSomewhere(Attribute::Nest);
+
+  if (Nested) {
+    loadFromStack(MBB, MBBI, XCore::R11, 0, dl, TII);
+  }
+
+  // Work out frame sizes.
+  int FrameSize = MFI->getStackSize();
+  assert(FrameSize%4 == 0 && "Misaligned frame size");
+  FrameSize/=4;
+
+  bool isU6 = isImmU6(FrameSize);
+
+  if (!isU6 && !isImmU16(FrameSize)) {
+    // FIXME could emit multiple instructions.
+    report_fatal_error("emitPrologue Frame size too big: " + Twine(FrameSize));
+  }
+  bool emitFrameMoves = RegInfo->needsFrameMoves(MF);
+
+  // Do we need to allocate space on the stack?
+  if (FrameSize) {
+    bool saveLR = XFI->getUsesLR();
+    bool LRSavedOnEntry = false;
+    int Opcode;
+    if (saveLR && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0)) {
+      Opcode = (isU6) ? XCore::ENTSP_u6 : XCore::ENTSP_lu6;
+      MBB.addLiveIn(XCore::LR);
+      saveLR = false;
+      LRSavedOnEntry = true;
+    } else {
+      Opcode = (isU6) ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+    }
+    BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+
+    if (emitFrameMoves) {
+      std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+
+      // Show update of SP.
+      MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+
+      MachineLocation SPDst(MachineLocation::VirtualFP);
+      MachineLocation SPSrc(MachineLocation::VirtualFP, -FrameSize * 4);
+      Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+
+      if (LRSavedOnEntry) {
+        MachineLocation CSDst(MachineLocation::VirtualFP, 0);
+        MachineLocation CSSrc(XCore::LR);
+        Moves.push_back(MachineMove(FrameLabel, CSDst, CSSrc));
+      }
+    }
+    if (saveLR) {
+      int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+      storeToStack(MBB, MBBI, XCore::LR, LRSpillOffset + FrameSize*4, dl, TII);
+      MBB.addLiveIn(XCore::LR);
+
+      if (emitFrameMoves) {
+        MCSymbol *SaveLRLabel = MMI->getContext().CreateTempSymbol();
+        BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLRLabel);
+        MachineLocation CSDst(MachineLocation::VirtualFP, LRSpillOffset);
+        MachineLocation CSSrc(XCore::LR);
+        MMI->getFrameMoves().push_back(MachineMove(SaveLRLabel, CSDst, CSSrc));
+      }
+    }
+  }
+
+  if (FP) {
+    // Save R10 to the stack.
+    int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+    storeToStack(MBB, MBBI, XCore::R10, FPSpillOffset + FrameSize*4, dl, TII);
+    // R10 is live-in. It is killed at the spill.
+    MBB.addLiveIn(XCore::R10);
+    if (emitFrameMoves) {
+      MCSymbol *SaveR10Label = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(SaveR10Label);
+      MachineLocation CSDst(MachineLocation::VirtualFP, FPSpillOffset);
+      MachineLocation CSSrc(XCore::R10);
+      MMI->getFrameMoves().push_back(MachineMove(SaveR10Label, CSDst, CSSrc));
+    }
+    // Set the FP from the SP.
+    unsigned FramePtr = XCore::R10;
+    BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr)
+      .addImm(0);
+    if (emitFrameMoves) {
+      // Show FP is now valid.
+      MCSymbol *FrameLabel = MMI->getContext().CreateTempSymbol();
+      BuildMI(MBB, MBBI, dl, TII.get(XCore::PROLOG_LABEL)).addSym(FrameLabel);
+      MachineLocation SPDst(FramePtr);
+      MachineLocation SPSrc(MachineLocation::VirtualFP);
+      MMI->getFrameMoves().push_back(MachineMove(FrameLabel, SPDst, SPSrc));
+    }
+  }
+
+  if (emitFrameMoves) {
+    // Frame moves for callee saved.
+    std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+    std::vector<std::pair<MCSymbol*, CalleeSavedInfo> >&SpillLabels =
+        XFI->getSpillLabels();
+    for (unsigned I = 0, E = SpillLabels.size(); I != E; ++I) {
+      MCSymbol *SpillLabel = SpillLabels[I].first;
+      CalleeSavedInfo &CSI = SpillLabels[I].second;
+      int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
+      unsigned Reg = CSI.getReg();
+      MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+      MachineLocation CSSrc(Reg);
+      Moves.push_back(MachineMove(SpillLabel, CSDst, CSSrc));
+    }
+  }
+}
+
+void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
+                                     MachineBasicBlock &MBB) const {
+  MachineFrameInfo *MFI            = MF.getFrameInfo();
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  const XCoreInstrInfo &TII =
+    *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo());
+  DebugLoc dl = MBBI->getDebugLoc();
+
+  bool FP = hasFP(MF);
+  if (FP) {
+    // Restore the stack pointer.
+    unsigned FramePtr = XCore::R10;
+    BuildMI(MBB, MBBI, dl, TII.get(XCore::SETSP_1r))
+      .addReg(FramePtr);
+  }
+
+  // Work out frame sizes.
+  int FrameSize = MFI->getStackSize();
+
+  assert(FrameSize%4 == 0 && "Misaligned frame size");
+
+  FrameSize/=4;
+
+  bool isU6 = isImmU6(FrameSize);
+
+  if (!isU6 && !isImmU16(FrameSize)) {
+    // FIXME could emit multiple instructions.
+    report_fatal_error("emitEpilogue Frame size too big: " + Twine(FrameSize));
+  }
+
+  if (FrameSize) {
+    XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+
+    if (FP) {
+      // Restore R10
+      int FPSpillOffset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+      FPSpillOffset += FrameSize*4;
+      loadFromStack(MBB, MBBI, XCore::R10, FPSpillOffset, dl, TII);
+    }
+    bool restoreLR = XFI->getUsesLR();
+    if (restoreLR && MFI->getObjectOffset(XFI->getLRSpillSlot()) != 0) {
+      int LRSpillOffset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+      LRSpillOffset += FrameSize*4;
+      loadFromStack(MBB, MBBI, XCore::LR, LRSpillOffset, dl, TII);
+      restoreLR = false;
+    }
+    if (restoreLR) {
+      // Fold prologue into return instruction
+      assert(MBBI->getOpcode() == XCore::RETSP_u6
+        || MBBI->getOpcode() == XCore::RETSP_lu6);
+      int Opcode = (isU6) ? XCore::RETSP_u6 : XCore::RETSP_lu6;
+      BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(FrameSize);
+      MBB.erase(MBBI);
+    } else {
+      int Opcode = (isU6) ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+      BuildMI(MBB, MBBI, dl, TII.get(Opcode), XCore::SP).addImm(FrameSize);
+    }
+  }
+}
+
+void XCoreFrameLowering::getInitialFrameState(std::vector<MachineMove> &Moves)
+                                                                        const {
+  // Initial state of the frame pointer is SP.
+  MachineLocation Dst(MachineLocation::VirtualFP);
+  MachineLocation Src(XCore::SP, 0);
+  Moves.push_back(MachineMove(0, Dst, Src));
+}
+
+bool XCoreFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                               MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          const TargetRegisterInfo *TRI) const {
+  if (CSI.empty())
+    return true;
+
+  MachineFunction *MF = MBB.getParent();
+  const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+  XCoreFunctionInfo *XFI = MF->getInfo<XCoreFunctionInfo>();
+  bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF);
+
+  DebugLoc DL;
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+                                                    it != CSI.end(); ++it) {
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(it->getReg());
+
+    unsigned Reg = it->getReg();
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    TII.storeRegToStackSlot(MBB, MI, Reg, true,
+                            it->getFrameIdx(), RC, TRI);
+    if (emitFrameMoves) {
+      MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
+      BuildMI(MBB, MI, DL, TII.get(XCore::PROLOG_LABEL)).addSym(SaveLabel);
+      XFI->getSpillLabels().push_back(std::make_pair(SaveLabel, *it));
+    }
+  }
+  return true;
+}
+
+bool XCoreFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                                 MachineBasicBlock::iterator MI,
+                                        const std::vector<CalleeSavedInfo> &CSI,
+                                            const TargetRegisterInfo *TRI) const{
+  MachineFunction *MF = MBB.getParent();
+  const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo();
+
+  bool AtStart = MI == MBB.begin();
+  MachineBasicBlock::iterator BeforeI = MI;
+  if (!AtStart)
+    --BeforeI;
+  for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
+                                                    it != CSI.end(); ++it) {
+    unsigned Reg = it->getReg();
+    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+    TII.loadRegFromStackSlot(MBB, MI, it->getReg(), it->getFrameIdx(),
+                             RC, TRI);
+    assert(MI != MBB.begin() &&
+           "loadRegFromStackSlot didn't insert any code!");
+    // Insert in reverse order.  loadRegFromStackSlot can insert multiple
+    // instructions.
+    if (AtStart)
+      MI = MBB.begin();
+    else {
+      MI = BeforeI;
+      ++MI;
+    }
+  }
+  return true;
+}
+
+void
+XCoreFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                                     RegScavenger *RS) const {
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  bool LRUsed = MF.getRegInfo().isPhysRegUsed(XCore::LR);
+  const TargetRegisterClass *RC = XCore::GRRegsRegisterClass;
+  XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+  if (LRUsed) {
+    MF.getRegInfo().setPhysRegUnused(XCore::LR);
+
+    bool isVarArg = MF.getFunction()->isVarArg();
+    int FrameIdx;
+    if (! isVarArg) {
+      // A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
+      FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true);
+    } else {
+      FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(),
+                                        false);
+    }
+    XFI->setUsesLR(FrameIdx);
+    XFI->setLRSpillSlot(FrameIdx);
+  }
+  if (RegInfo->requiresRegisterScavenging(MF)) {
+    // Reserve a slot close to SP or frame pointer.
+    RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+                                                       RC->getAlignment(),
+                                                       false));
+  }
+  if (hasFP(MF)) {
+    // A callee save register is used to hold the FP.
+    // This needs saving / restoring in the epilogue / prologue.
+    XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(),
+                                               RC->getAlignment(),
+                                               false));
+  }
+}
+
+void XCoreFrameLowering::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+
+}
diff --git a/final/lib/Target/XCore/XCoreFrameLowering.h b/final/lib/Target/XCore/XCoreFrameLowering.h
new file mode 100644
index 00000000000..7da19f0deb1
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreFrameLowering.h
@@ -0,0 +1,59 @@
+//===-- XCoreFrameLowering.h - Frame info for XCore Target -------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains XCore frame information that doesn't fit anywhere else
+// cleanly...
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREFRAMEINFO_H
+#define XCOREFRAMEINFO_H
+
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+  class XCoreSubtarget;
+
+  class XCoreFrameLowering: public TargetFrameLowering {
+    const XCoreSubtarget &STI;
+  public:
+    XCoreFrameLowering(const XCoreSubtarget &STI);
+
+    /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
+    /// the function.
+    void emitPrologue(MachineFunction &MF) const;
+    void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+    bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   const std::vector<CalleeSavedInfo> &CSI,
+                                   const TargetRegisterInfo *TRI) const;
+    bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator MI,
+                                     const std::vector<CalleeSavedInfo> &CSI,
+                                     const TargetRegisterInfo *TRI) const;
+
+    bool hasFP(const MachineFunction &MF) const;
+
+    void getInitialFrameState(std::vector<MachineMove> &Moves) const;
+
+    void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
+                                              RegScavenger *RS = NULL) const;
+
+    void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+    //! Stack slot size (4 bytes)
+    static int stackSlotSize() {
+      return 4;
+    }
+  };
+}
+
+#endif // XCOREFRAMEINFO_H
diff --git a/final/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/final/lib/Target/XCore/XCoreISelDAGToDAG.cpp
new file mode 100644
index 00000000000..fc8a07aad73
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -0,0 +1,215 @@
+//===-- XCoreISelDAGToDAG.cpp - A dag to dag inst selector for XCore ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the XCore target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCore.h"
+#include "XCoreTargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <queue>
+#include <set>
+using namespace llvm;
+
+/// XCoreDAGToDAGISel - XCore specific code to select XCore machine
+/// instructions for SelectionDAG operations.
+///
+namespace {
+  class XCoreDAGToDAGISel : public SelectionDAGISel {
+    const XCoreTargetLowering &Lowering;
+    const XCoreSubtarget &Subtarget;
+
+  public:
+    XCoreDAGToDAGISel(XCoreTargetMachine &TM)
+      : SelectionDAGISel(TM),
+        Lowering(*TM.getTargetLowering()), 
+        Subtarget(*TM.getSubtargetImpl()) { }
+
+    SDNode *Select(SDNode *N);
+    
+    /// getI32Imm - Return a target constant with the specified value, of type
+    /// i32.
+    inline SDValue getI32Imm(unsigned Imm) {
+      return CurDAG->getTargetConstant(Imm, MVT::i32);
+    }
+
+    inline bool immMskBitp(SDNode *inN) const {
+      ConstantSDNode *N = cast<ConstantSDNode>(inN);
+      uint32_t value = (uint32_t)N->getZExtValue();
+      if (!isMask_32(value)) {
+        return false;
+      }
+      int msksize = 32 - CountLeadingZeros_32(value);
+      return (msksize >= 1 && msksize <= 8) ||
+              msksize == 16 || msksize == 24 || msksize == 32;
+    }
+
+    // Complex Pattern Selectors.
+    bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset);
+    bool SelectADDRdpii(SDValue Addr, SDValue &Base, SDValue &Offset);
+    bool SelectADDRcpii(SDValue Addr, SDValue &Base, SDValue &Offset);
+    
+    virtual const char *getPassName() const {
+      return "XCore DAG->DAG Pattern Instruction Selection";
+    } 
+    
+    // Include the pieces autogenerated from the target description.
+  #include "XCoreGenDAGISel.inc"
+  };
+}  // end anonymous namespace
+
+/// createXCoreISelDag - This pass converts a legalized DAG into a 
+/// XCore-specific DAG, ready for instruction scheduling.
+///
+FunctionPass *llvm::createXCoreISelDag(XCoreTargetMachine &TM) {
+  return new XCoreDAGToDAGISel(TM);
+}
+
+bool XCoreDAGToDAGISel::SelectADDRspii(SDValue Addr, SDValue &Base,
+                                       SDValue &Offset) {
+  FrameIndexSDNode *FIN = 0;
+  if ((FIN = dyn_cast<FrameIndexSDNode>(Addr))) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+  if (Addr.getOpcode() == ISD::ADD) {
+    ConstantSDNode *CN = 0;
+    if ((FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
+      && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+      && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) {
+      // Constant positive word offset from frame index
+      Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base,
+                                       SDValue &Offset) {
+  if (Addr.getOpcode() == XCoreISD::DPRelativeWrapper) {
+    Base = Addr.getOperand(0);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+  if (Addr.getOpcode() == ISD::ADD) {
+    ConstantSDNode *CN = 0;
+    if ((Addr.getOperand(0).getOpcode() == XCoreISD::DPRelativeWrapper)
+      && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+      && (CN->getSExtValue() % 4 == 0)) {
+      // Constant word offset from a object in the data region
+      Base = Addr.getOperand(0).getOperand(0);
+      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+      return true;
+    }
+  }
+  return false;
+}
+
+bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base,
+                                       SDValue &Offset) {
+  if (Addr.getOpcode() == XCoreISD::CPRelativeWrapper) {
+    Base = Addr.getOperand(0);
+    Offset = CurDAG->getTargetConstant(0, MVT::i32);
+    return true;
+  }
+  if (Addr.getOpcode() == ISD::ADD) {
+    ConstantSDNode *CN = 0;
+    if ((Addr.getOperand(0).getOpcode() == XCoreISD::CPRelativeWrapper)
+      && (CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
+      && (CN->getSExtValue() % 4 == 0)) {
+      // Constant word offset from a object in the data region
+      Base = Addr.getOperand(0).getOperand(0);
+      Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32);
+      return true;
+    }
+  }
+  return false;
+}
+
+SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
+  DebugLoc dl = N->getDebugLoc();
+  EVT NVT = N->getValueType(0);
+  if (NVT == MVT::i32) {
+    switch (N->getOpcode()) {
+      default: break;
+      case ISD::Constant: {
+        uint64_t Val = cast<ConstantSDNode>(N)->getZExtValue();
+        if (immMskBitp(N)) {
+          // Transformation function: get the size of a mask
+          // Look for the first non-zero bit
+          SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val));
+          return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
+                                        MVT::i32, MskSize);
+        }
+        else if (!isUInt<16>(Val)) {
+          SDValue CPIdx =
+            CurDAG->getTargetConstantPool(ConstantInt::get(
+                                  Type::getInt32Ty(*CurDAG->getContext()), Val),
+                                          TLI.getPointerTy());
+          return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32, 
+                                        MVT::Other, CPIdx, 
+                                        CurDAG->getEntryNode());
+        }
+        break;
+      }
+      case XCoreISD::LADD: {
+        SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                            N->getOperand(2) };
+        return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32,
+                                      Ops, 3);
+      }
+      case XCoreISD::LSUB: {
+        SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                            N->getOperand(2) };
+        return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32,
+                                      Ops, 3);
+      }
+      case XCoreISD::MACCU: {
+        SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                          N->getOperand(2), N->getOperand(3) };
+        return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32,
+                                      Ops, 4);
+      }
+      case XCoreISD::MACCS: {
+        SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                          N->getOperand(2), N->getOperand(3) };
+        return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32,
+                                      Ops, 4);
+      }
+      case XCoreISD::LMUL: {
+        SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
+                          N->getOperand(2), N->getOperand(3) };
+        return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32,
+                                      Ops, 4);
+      }
+      // Other cases are autogenerated.
+    }
+  }
+  return SelectCode(N);
+}
diff --git a/final/lib/Target/XCore/XCoreISelLowering.cpp b/final/lib/Target/XCore/XCoreISelLowering.cpp
new file mode 100644
index 00000000000..4817787d751
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreISelLowering.cpp
@@ -0,0 +1,1620 @@
+//===-- XCoreISelLowering.cpp - XCore DAG Lowering Implementation   ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCoreTargetLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xcore-lower"
+
+#include "XCoreISelLowering.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "XCore.h"
+#include "XCoreTargetObjectFile.h"
+#include "XCoreTargetMachine.h"
+#include "XCoreSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/VectorExtras.h"
+#include <queue>
+#include <set>
+using namespace llvm;
+
+const char *XCoreTargetLowering::
+getTargetNodeName(unsigned Opcode) const
+{
+  switch (Opcode)
+  {
+    case XCoreISD::BL                : return "XCoreISD::BL";
+    case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper";
+    case XCoreISD::DPRelativeWrapper : return "XCoreISD::DPRelativeWrapper";
+    case XCoreISD::CPRelativeWrapper : return "XCoreISD::CPRelativeWrapper";
+    case XCoreISD::STWSP             : return "XCoreISD::STWSP";
+    case XCoreISD::RETSP             : return "XCoreISD::RETSP";
+    case XCoreISD::LADD              : return "XCoreISD::LADD";
+    case XCoreISD::LSUB              : return "XCoreISD::LSUB";
+    case XCoreISD::LMUL              : return "XCoreISD::LMUL";
+    case XCoreISD::MACCU             : return "XCoreISD::MACCU";
+    case XCoreISD::MACCS             : return "XCoreISD::MACCS";
+    case XCoreISD::BR_JT             : return "XCoreISD::BR_JT";
+    case XCoreISD::BR_JT32           : return "XCoreISD::BR_JT32";
+    default                          : return NULL;
+  }
+}
+
+XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
+  : TargetLowering(XTM, new XCoreTargetObjectFile()),
+    TM(XTM),
+    Subtarget(*XTM.getSubtargetImpl()) {
+
+  // Set up the register classes.
+  addRegisterClass(MVT::i32, XCore::GRRegsRegisterClass);
+
+  // Compute derived properties from the register classes
+  computeRegisterProperties();
+
+  // Division is expensive
+  setIntDivIsCheap(false);
+
+  setStackPointerRegisterToSaveRestore(XCore::SP);
+
+  setSchedulingPreference(Sched::RegPressure);
+
+  // Use i32 for setcc operations results (slt, sgt, ...).
+  setBooleanContents(ZeroOrOneBooleanContent);
+
+  // XCore does not have the NodeTypes below.
+  setOperationAction(ISD::BR_CC,     MVT::Other, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::i32,   Custom);
+  setOperationAction(ISD::ADDC, MVT::i32, Expand);
+  setOperationAction(ISD::ADDE, MVT::i32, Expand);
+  setOperationAction(ISD::SUBC, MVT::i32, Expand);
+  setOperationAction(ISD::SUBE, MVT::i32, Expand);
+
+  // Stop the combiner recombining select and set_cc
+  setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+  // 64bit
+  setOperationAction(ISD::ADD, MVT::i64, Custom);
+  setOperationAction(ISD::SUB, MVT::i64, Custom);
+  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom);
+  setOperationAction(ISD::MULHS, MVT::i32, Expand);
+  setOperationAction(ISD::MULHU, MVT::i32, Expand);
+  setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+
+  // Bit Manipulation
+  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+  setOperationAction(ISD::ROTL , MVT::i32, Expand);
+  setOperationAction(ISD::ROTR , MVT::i32, Expand);
+
+  setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
+  // Jump tables.
+  setOperationAction(ISD::BR_JT, MVT::Other, Custom);
+
+  setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
+  setOperationAction(ISD::BlockAddress, MVT::i32 , Custom);
+
+  // Thread Local Storage
+  setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+
+  // Conversion of i64 -> double produces constantpool nodes
+  setOperationAction(ISD::ConstantPool, MVT::i32,   Custom);
+
+  // Loads
+  setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+  setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
+
+  // Custom expand misaligned loads / stores.
+  setOperationAction(ISD::LOAD, MVT::i32, Custom);
+  setOperationAction(ISD::STORE, MVT::i32, Custom);
+
+  // Varargs
+  setOperationAction(ISD::VAEND, MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+  setOperationAction(ISD::VAARG, MVT::Other, Custom);
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+
+  // Dynamic stack
+  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+
+  // TRAMPOLINE is custom lowered.
+  setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom);
+
+  maxStoresPerMemset = maxStoresPerMemsetOptSize = 4;
+  maxStoresPerMemmove = maxStoresPerMemmoveOptSize
+    = maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 2;
+
+  // We have target-specific dag combine patterns for the following nodes:
+  setTargetDAGCombine(ISD::STORE);
+  setTargetDAGCombine(ISD::ADD);
+}
+
+SDValue XCoreTargetLowering::
+LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode())
+  {
+  case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
+  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::BlockAddress:     return LowerBlockAddress(Op, DAG);
+  case ISD::ConstantPool:     return LowerConstantPool(Op, DAG);
+  case ISD::BR_JT:            return LowerBR_JT(Op, DAG);
+  case ISD::LOAD:             return LowerLOAD(Op, DAG);
+  case ISD::STORE:            return LowerSTORE(Op, DAG);
+  case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
+  case ISD::VAARG:            return LowerVAARG(Op, DAG);
+  case ISD::VASTART:          return LowerVASTART(Op, DAG);
+  case ISD::SMUL_LOHI:        return LowerSMUL_LOHI(Op, DAG);
+  case ISD::UMUL_LOHI:        return LowerUMUL_LOHI(Op, DAG);
+  // FIXME: Remove these when LegalizeDAGTypes lands.
+  case ISD::ADD:
+  case ISD::SUB:              return ExpandADDSUB(Op.getNode(), DAG);
+  case ISD::FRAMEADDR:        return LowerFRAMEADDR(Op, DAG);
+  case ISD::TRAMPOLINE:       return LowerTRAMPOLINE(Op, DAG);
+  default:
+    llvm_unreachable("unimplemented operand");
+    return SDValue();
+  }
+}
+
+/// ReplaceNodeResults - Replace the results of node with an illegal result
+/// type with new values built out of custom code.
+void XCoreTargetLowering::ReplaceNodeResults(SDNode *N,
+                                             SmallVectorImpl<SDValue>&Results,
+                                             SelectionDAG &DAG) const {
+  switch (N->getOpcode()) {
+  default:
+    llvm_unreachable("Don't know how to custom expand this!");
+    return;
+  case ISD::ADD:
+  case ISD::SUB:
+    Results.push_back(ExpandADDSUB(N, DAG));
+    return;
+  }
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned XCoreTargetLowering::
+getFunctionAlignment(const Function *) const {
+  return 1;
+}
+
+//===----------------------------------------------------------------------===//
+//  Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+
+SDValue XCoreTargetLowering::
+LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i32, Op.getOperand(2),
+                             Op.getOperand(3), Op.getOperand(4));
+  return DAG.getNode(ISD::SELECT, dl, MVT::i32, Cond, Op.getOperand(0),
+                     Op.getOperand(1));
+}
+
+SDValue XCoreTargetLowering::
+getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
+                        SelectionDAG &DAG) const
+{
+  // FIXME there is no actual debug info here
+  DebugLoc dl = GA.getDebugLoc();
+  if (isa<Function>(GV)) {
+    return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA);
+  }
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+  if (!GVar) {
+    // If GV is an alias then use the aliasee to determine constness
+    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+      GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
+  }
+  bool isConst = GVar && GVar->isConstant();
+  if (isConst) {
+    return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA);
+  }
+  return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA);
+}
+
+SDValue XCoreTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
+{
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), MVT::i32);
+  // If it's a debug information descriptor, don't mess with it.
+  if (DAG.isVerifiedDebugInfoDesc(Op))
+    return GA;
+  return getGlobalAddressWrapper(GA, GV, DAG);
+}
+
+static inline SDValue BuildGetId(SelectionDAG &DAG, DebugLoc dl) {
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
+                     DAG.getConstant(Intrinsic::xcore_getid, MVT::i32));
+}
+
+static inline bool isZeroLengthArray(const Type *Ty) {
+  const ArrayType *AT = dyn_cast_or_null<ArrayType>(Ty);
+  return AT && (AT->getNumElements() == 0);
+}
+
+SDValue XCoreTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
+{
+  // FIXME there isn't really debug info here
+  DebugLoc dl = Op.getDebugLoc();
+  // transform to label + getid() * size
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+  if (!GVar) {
+    // If GV is an alias then use the aliasee to determine size
+    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+      GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal());
+  }
+  if (! GVar) {
+    llvm_unreachable("Thread local object not a GlobalVariable?");
+    return SDValue();
+  }
+  const Type *Ty = cast<PointerType>(GV->getType())->getElementType();
+  if (!Ty->isSized() || isZeroLengthArray(Ty)) {
+#ifndef NDEBUG
+    errs() << "Size of thread local object " << GVar->getName()
+           << " is unknown\n";
+#endif
+    llvm_unreachable(0);
+  }
+  SDValue base = getGlobalAddressWrapper(GA, GV, DAG);
+  const TargetData *TD = TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(Ty);
+  SDValue offset = DAG.getNode(ISD::MUL, dl, MVT::i32, BuildGetId(DAG, dl),
+                       DAG.getConstant(Size, MVT::i32));
+  return DAG.getNode(ISD::ADD, dl, MVT::i32, base, offset);
+}
+
+SDValue XCoreTargetLowering::
+LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc DL = Op.getDebugLoc();
+
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+  SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true);
+
+  return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result);
+}
+
+SDValue XCoreTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+  // FIXME there isn't really debug info here
+  DebugLoc dl = CP->getDebugLoc();
+  EVT PtrVT = Op.getValueType();
+  SDValue Res;
+  if (CP->isMachineConstantPoolEntry()) {
+    Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+                                    CP->getAlignment());
+  } else {
+    Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+                                    CP->getAlignment());
+  }
+  return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, Res);
+}
+
+unsigned XCoreTargetLowering::getJumpTableEncoding() const {
+  return MachineJumpTableInfo::EK_Inline;
+}
+
+SDValue XCoreTargetLowering::
+LowerBR_JT(SDValue Op, SelectionDAG &DAG) const
+{
+  SDValue Chain = Op.getOperand(0);
+  SDValue Table = Op.getOperand(1);
+  SDValue Index = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+  unsigned JTI = JT->getIndex();
+  MachineFunction &MF = DAG.getMachineFunction();
+  const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+  SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+
+  unsigned NumEntries = MJTI->getJumpTables()[JTI].MBBs.size();
+  if (NumEntries <= 32) {
+    return DAG.getNode(XCoreISD::BR_JT, dl, MVT::Other, Chain, TargetJT, Index);
+  }
+  assert((NumEntries >> 31) == 0);
+  SDValue ScaledIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
+                                    DAG.getConstant(1, MVT::i32));
+  return DAG.getNode(XCoreISD::BR_JT32, dl, MVT::Other, Chain, TargetJT,
+                     ScaledIndex);
+}
+
+static bool
+IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase,
+                                    int64_t &Offset)
+{
+  if (Addr.getOpcode() != ISD::ADD) {
+    return false;
+  }
+  ConstantSDNode *CN = 0;
+  if (!(CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
+    return false;
+  }
+  int64_t off = CN->getSExtValue();
+  const SDValue &Base = Addr.getOperand(0);
+  const SDValue *Root = &Base;
+  if (Base.getOpcode() == ISD::ADD &&
+      Base.getOperand(1).getOpcode() == ISD::SHL) {
+    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Base.getOperand(1)
+                                                      .getOperand(1));
+    if (CN && (CN->getSExtValue() >= 2)) {
+      Root = &Base.getOperand(0);
+    }
+  }
+  if (isa<FrameIndexSDNode>(*Root)) {
+    // All frame indicies are word aligned
+    AlignedBase = Base;
+    Offset = off;
+    return true;
+  }
+  if (Root->getOpcode() == XCoreISD::DPRelativeWrapper ||
+      Root->getOpcode() == XCoreISD::CPRelativeWrapper) {
+    // All dp / cp relative addresses are word aligned
+    AlignedBase = Base;
+    Offset = off;
+    return true;
+  }
+  return false;
+}
+
+SDValue XCoreTargetLowering::
+LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
+  LoadSDNode *LD = cast<LoadSDNode>(Op);
+  assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
+         "Unexpected extension type");
+  assert(LD->getMemoryVT() == MVT::i32 && "Unexpected load EVT");
+  if (allowsUnalignedMemoryAccesses(LD->getMemoryVT()))
+    return SDValue();
+
+  unsigned ABIAlignment = getTargetData()->
+    getABITypeAlignment(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+  // Leave aligned load alone.
+  if (LD->getAlignment() >= ABIAlignment)
+    return SDValue();
+
+  SDValue Chain = LD->getChain();
+  SDValue BasePtr = LD->getBasePtr();
+  DebugLoc DL = Op.getDebugLoc();
+
+  SDValue Base;
+  int64_t Offset;
+  if (!LD->isVolatile() &&
+      IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) {
+    if (Offset % 4 == 0) {
+      // We've managed to infer better alignment information than the load
+      // already has. Use an aligned load.
+      //
+      return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr,
+                         MachinePointerInfo(),
+                         false, false, 0);
+    }
+    // Lower to
+    // ldw low, base[offset >> 2]
+    // ldw high, base[(offset >> 2) + 1]
+    // shr low_shifted, low, (offset & 0x3) * 8
+    // shl high_shifted, high, 32 - (offset & 0x3) * 8
+    // or result, low_shifted, high_shifted
+    SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32);
+    SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32);
+    SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32);
+    SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32);
+
+    SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset);
+    SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset);
+
+    SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain,
+                              LowAddr, MachinePointerInfo(), false, false, 0);
+    SDValue High = DAG.getLoad(getPointerTy(), DL, Chain,
+                               HighAddr, MachinePointerInfo(), false, false, 0);
+    SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift);
+    SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift);
+    SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted);
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
+                             High.getValue(1));
+    SDValue Ops[] = { Result, Chain };
+    return DAG.getMergeValues(Ops, 2, DL);
+  }
+
+  if (LD->getAlignment() == 2) {
+    SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain,
+                                 BasePtr, LD->getPointerInfo(), MVT::i16,
+                                 LD->isVolatile(), LD->isNonTemporal(), 2);
+    SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
+                                   DAG.getConstant(2, MVT::i32));
+    SDValue High = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
+                                  HighAddr,
+                                  LD->getPointerInfo().getWithOffset(2),
+                                  MVT::i16, LD->isVolatile(),
+                                  LD->isNonTemporal(), 2);
+    SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High,
+                                      DAG.getConstant(16, MVT::i32));
+    SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Low, HighShifted);
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Low.getValue(1),
+                             High.getValue(1));
+    SDValue Ops[] = { Result, Chain };
+    return DAG.getMergeValues(Ops, 2, DL);
+  }
+
+  // Lower to a call to __misaligned_load(BasePtr).
+  const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+
+  Entry.Ty = IntPtrTy;
+  Entry.Node = BasePtr;
+  Args.push_back(Entry);
+
+  std::pair<SDValue, SDValue> CallResult =
+        LowerCallTo(Chain, IntPtrTy, false, false,
+                    false, false, 0, CallingConv::C, false,
+                    /*isReturnValueUsed=*/true,
+                    DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
+                    Args, DAG, DL);
+
+  SDValue Ops[] =
+    { CallResult.first, CallResult.second };
+
+  return DAG.getMergeValues(Ops, 2, DL);
+}
+
+SDValue XCoreTargetLowering::
+LowerSTORE(SDValue Op, SelectionDAG &DAG) const
+{
+  StoreSDNode *ST = cast<StoreSDNode>(Op);
+  assert(!ST->isTruncatingStore() && "Unexpected store type");
+  assert(ST->getMemoryVT() == MVT::i32 && "Unexpected store EVT");
+  if (allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
+    return SDValue();
+  }
+  unsigned ABIAlignment = getTargetData()->
+    getABITypeAlignment(ST->getMemoryVT().getTypeForEVT(*DAG.getContext()));
+  // Leave aligned store alone.
+  if (ST->getAlignment() >= ABIAlignment) {
+    return SDValue();
+  }
+  SDValue Chain = ST->getChain();
+  SDValue BasePtr = ST->getBasePtr();
+  SDValue Value = ST->getValue();
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (ST->getAlignment() == 2) {
+    SDValue Low = Value;
+    SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value,
+                                      DAG.getConstant(16, MVT::i32));
+    SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr,
+                                         ST->getPointerInfo(), MVT::i16,
+                                         ST->isVolatile(), ST->isNonTemporal(),
+                                         2);
+    SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
+                                   DAG.getConstant(2, MVT::i32));
+    SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr,
+                                          ST->getPointerInfo().getWithOffset(2),
+                                          MVT::i16, ST->isVolatile(),
+                                          ST->isNonTemporal(), 2);
+    return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh);
+  }
+
+  // Lower to a call to __misaligned_store(BasePtr, Value).
+  const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+
+  Entry.Ty = IntPtrTy;
+  Entry.Node = BasePtr;
+  Args.push_back(Entry);
+
+  Entry.Node = Value;
+  Args.push_back(Entry);
+
+  std::pair<SDValue, SDValue> CallResult =
+        LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false,
+                    false, false, 0, CallingConv::C, false,
+                    /*isReturnValueUsed=*/true,
+                    DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
+                    Args, DAG, dl);
+
+  return CallResult.second;
+}
+
+SDValue XCoreTargetLowering::
+LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
+{
+  assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::SMUL_LOHI &&
+         "Unexpected operand to lower!");
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue Zero = DAG.getConstant(0, MVT::i32);
+  SDValue Hi = DAG.getNode(XCoreISD::MACCS, dl,
+                           DAG.getVTList(MVT::i32, MVT::i32), Zero, Zero,
+                           LHS, RHS);
+  SDValue Lo(Hi.getNode(), 1);
+  SDValue Ops[] = { Lo, Hi };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+SDValue XCoreTargetLowering::
+LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const
+{
+  assert(Op.getValueType() == MVT::i32 && Op.getOpcode() == ISD::UMUL_LOHI &&
+         "Unexpected operand to lower!");
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue Zero = DAG.getConstant(0, MVT::i32);
+  SDValue Hi = DAG.getNode(XCoreISD::LMUL, dl,
+                           DAG.getVTList(MVT::i32, MVT::i32), LHS, RHS,
+                           Zero, Zero);
+  SDValue Lo(Hi.getNode(), 1);
+  SDValue Ops[] = { Lo, Hi };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+/// isADDADDMUL - Return whether Op is in a form that is equivalent to
+/// add(add(mul(x,y),a),b). If requireIntermediatesHaveOneUse is true then
+/// each intermediate result in the calculation must also have a single use.
+/// If the Op is in the correct form the constituent parts are written to Mul0,
+/// Mul1, Addend0 and Addend1.
+static bool
+isADDADDMUL(SDValue Op, SDValue &Mul0, SDValue &Mul1, SDValue &Addend0,
+            SDValue &Addend1, bool requireIntermediatesHaveOneUse)
+{
+  if (Op.getOpcode() != ISD::ADD)
+    return false;
+  SDValue N0 = Op.getOperand(0);
+  SDValue N1 = Op.getOperand(1);
+  SDValue AddOp;
+  SDValue OtherOp;
+  if (N0.getOpcode() == ISD::ADD) {
+    AddOp = N0;
+    OtherOp = N1;
+  } else if (N1.getOpcode() == ISD::ADD) {
+    AddOp = N1;
+    OtherOp = N0;
+  } else {
+    return false;
+  }
+  if (requireIntermediatesHaveOneUse && !AddOp.hasOneUse())
+    return false;
+  if (OtherOp.getOpcode() == ISD::MUL) {
+    // add(add(a,b),mul(x,y))
+    if (requireIntermediatesHaveOneUse && !OtherOp.hasOneUse())
+      return false;
+    Mul0 = OtherOp.getOperand(0);
+    Mul1 = OtherOp.getOperand(1);
+    Addend0 = AddOp.getOperand(0);
+    Addend1 = AddOp.getOperand(1);
+    return true;
+  }
+  if (AddOp.getOperand(0).getOpcode() == ISD::MUL) {
+    // add(add(mul(x,y),a),b)
+    if (requireIntermediatesHaveOneUse && !AddOp.getOperand(0).hasOneUse())
+      return false;
+    Mul0 = AddOp.getOperand(0).getOperand(0);
+    Mul1 = AddOp.getOperand(0).getOperand(1);
+    Addend0 = AddOp.getOperand(1);
+    Addend1 = OtherOp;
+    return true;
+  }
+  if (AddOp.getOperand(1).getOpcode() == ISD::MUL) {
+    // add(add(a,mul(x,y)),b)
+    if (requireIntermediatesHaveOneUse && !AddOp.getOperand(1).hasOneUse())
+      return false;
+    Mul0 = AddOp.getOperand(1).getOperand(0);
+    Mul1 = AddOp.getOperand(1).getOperand(1);
+    Addend0 = AddOp.getOperand(0);
+    Addend1 = OtherOp;
+    return true;
+  }
+  return false;
+}
+
+SDValue XCoreTargetLowering::
+TryExpandADDWithMul(SDNode *N, SelectionDAG &DAG) const
+{
+  SDValue Mul;
+  SDValue Other;
+  if (N->getOperand(0).getOpcode() == ISD::MUL) {
+    Mul = N->getOperand(0);
+    Other = N->getOperand(1);
+  } else if (N->getOperand(1).getOpcode() == ISD::MUL) {
+    Mul = N->getOperand(1);
+    Other = N->getOperand(0);
+  } else {
+    return SDValue();
+  }
+  DebugLoc dl = N->getDebugLoc();
+  SDValue LL, RL, AddendL, AddendH;
+  LL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                   Mul.getOperand(0),  DAG.getConstant(0, MVT::i32));
+  RL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                   Mul.getOperand(1),  DAG.getConstant(0, MVT::i32));
+  AddendL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                        Other,  DAG.getConstant(0, MVT::i32));
+  AddendH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                        Other,  DAG.getConstant(1, MVT::i32));
+  APInt HighMask = APInt::getHighBitsSet(64, 32);
+  unsigned LHSSB = DAG.ComputeNumSignBits(Mul.getOperand(0));
+  unsigned RHSSB = DAG.ComputeNumSignBits(Mul.getOperand(1));
+  if (DAG.MaskedValueIsZero(Mul.getOperand(0), HighMask) &&
+      DAG.MaskedValueIsZero(Mul.getOperand(1), HighMask)) {
+    // The inputs are both zero-extended.
+    SDValue Hi = DAG.getNode(XCoreISD::MACCU, dl,
+                             DAG.getVTList(MVT::i32, MVT::i32), AddendH,
+                             AddendL, LL, RL);
+    SDValue Lo(Hi.getNode(), 1);
+    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+  }
+  if (LHSSB > 32 && RHSSB > 32) {
+    // The inputs are both sign-extended.
+    SDValue Hi = DAG.getNode(XCoreISD::MACCS, dl,
+                             DAG.getVTList(MVT::i32, MVT::i32), AddendH,
+                             AddendL, LL, RL);
+    SDValue Lo(Hi.getNode(), 1);
+    return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+  }
+  SDValue LH, RH;
+  LH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                   Mul.getOperand(0),  DAG.getConstant(1, MVT::i32));
+  RH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                   Mul.getOperand(1),  DAG.getConstant(1, MVT::i32));
+  SDValue Hi = DAG.getNode(XCoreISD::MACCU, dl,
+                           DAG.getVTList(MVT::i32, MVT::i32), AddendH,
+                           AddendL, LL, RL);
+  SDValue Lo(Hi.getNode(), 1);
+  RH = DAG.getNode(ISD::MUL, dl, MVT::i32, LL, RH);
+  LH = DAG.getNode(ISD::MUL, dl, MVT::i32, LH, RL);
+  Hi = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, RH);
+  Hi = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, LH);
+  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+SDValue XCoreTargetLowering::
+ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const
+{
+  assert(N->getValueType(0) == MVT::i64 &&
+         (N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
+        "Unknown operand to lower!");
+
+  if (N->getOpcode() == ISD::ADD) {
+    SDValue Result = TryExpandADDWithMul(N, DAG);
+    if (Result.getNode() != 0)
+      return Result;
+  }
+
+  DebugLoc dl = N->getDebugLoc();
+
+  // Extract components
+  SDValue LHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                            N->getOperand(0),  DAG.getConstant(0, MVT::i32));
+  SDValue LHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                            N->getOperand(0),  DAG.getConstant(1, MVT::i32));
+  SDValue RHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                             N->getOperand(1), DAG.getConstant(0, MVT::i32));
+  SDValue RHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                             N->getOperand(1), DAG.getConstant(1, MVT::i32));
+
+  // Expand
+  unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD :
+                                                   XCoreISD::LSUB;
+  SDValue Zero = DAG.getConstant(0, MVT::i32);
+  SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+                                  LHSL, RHSL, Zero);
+  SDValue Lo(Carry.getNode(), 1);
+
+  SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
+                                  LHSH, RHSH, Carry);
+  SDValue Hi(Ignored.getNode(), 1);
+  // Merge the pieces
+  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+}
+
+SDValue XCoreTargetLowering::
+LowerVAARG(SDValue Op, SelectionDAG &DAG) const
+{
+  llvm_unreachable("unimplemented");
+  // FIX Arguments passed by reference need a extra dereference.
+  SDNode *Node = Op.getNode();
+  DebugLoc dl = Node->getDebugLoc();
+  const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+  EVT VT = Node->getValueType(0);
+  SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0),
+                               Node->getOperand(1), MachinePointerInfo(V),
+                               false, false, 0);
+  // Increment the pointer, VAList, to the next vararg
+  SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList,
+                     DAG.getConstant(VT.getSizeInBits(),
+                                     getPointerTy()));
+  // Store the incremented VAList to the legalized pointer
+  Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1),
+                      MachinePointerInfo(V), false, false, 0);
+  // Load the actual argument out of the pointer VAList
+  return DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
+                     false, false, 0);
+}
+
+SDValue XCoreTargetLowering::
+LowerVASTART(SDValue Op, SelectionDAG &DAG) const
+{
+  DebugLoc dl = Op.getDebugLoc();
+  // vastart stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument
+  MachineFunction &MF = DAG.getMachineFunction();
+  XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+  SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32);
+  return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1),
+                      MachinePointerInfo(), false, false, 0);
+}
+
+SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  DebugLoc dl = Op.getDebugLoc();
+  // Depths > 0 not supported yet!
+  if (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() > 0)
+    return SDValue();
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo();
+  return DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+                            RegInfo->getFrameRegister(MF), MVT::i32);
+}
+
+SDValue XCoreTargetLowering::
+LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  SDValue Trmp = Op.getOperand(1); // trampoline
+  SDValue FPtr = Op.getOperand(2); // nested function
+  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+
+  const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
+
+  // .align 4
+  // LDAPF_u10 r11, nest
+  // LDW_2rus r11, r11[0]
+  // STWSP_ru6 r11, sp[0]
+  // LDAPF_u10 r11, fptr
+  // LDW_2rus r11, r11[0]
+  // BAU_1r r11
+  // nest:
+  // .word nest
+  // fptr:
+  // .word fptr
+  SDValue OutChains[5];
+
+  SDValue Addr = Trmp;
+
+  DebugLoc dl = Op.getDebugLoc();
+  OutChains[0] = DAG.getStore(Chain, dl, DAG.getConstant(0x0a3cd805, MVT::i32),
+                              Addr, MachinePointerInfo(TrmpAddr), false, false,
+                              0);
+
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                     DAG.getConstant(4, MVT::i32));
+  OutChains[1] = DAG.getStore(Chain, dl, DAG.getConstant(0xd80456c0, MVT::i32),
+                              Addr, MachinePointerInfo(TrmpAddr, 4), false,
+                              false, 0);
+
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                     DAG.getConstant(8, MVT::i32));
+  OutChains[2] = DAG.getStore(Chain, dl, DAG.getConstant(0x27fb0a3c, MVT::i32),
+                              Addr, MachinePointerInfo(TrmpAddr, 8), false,
+                              false, 0);
+
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                     DAG.getConstant(12, MVT::i32));
+  OutChains[3] = DAG.getStore(Chain, dl, Nest, Addr,
+                              MachinePointerInfo(TrmpAddr, 12), false, false,
+                              0);
+
+  Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp,
+                     DAG.getConstant(16, MVT::i32));
+  OutChains[4] = DAG.getStore(Chain, dl, FPtr, Addr,
+                              MachinePointerInfo(TrmpAddr, 16), false, false,
+                              0);
+
+  SDValue Ops[] =
+    { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 5) };
+  return DAG.getMergeValues(Ops, 2, dl);
+}
+
+//===----------------------------------------------------------------------===//
+//                      Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "XCoreGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+//                  Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// XCore call implementation
+SDValue
+XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
+                               CallingConv::ID CallConv, bool isVarArg,
+                               bool &isTailCall,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               const SmallVectorImpl<ISD::InputArg> &Ins,
+                               DebugLoc dl, SelectionDAG &DAG,
+                               SmallVectorImpl<SDValue> &InVals) const {
+  // XCore target does not yet support tail call optimization.
+  isTailCall = false;
+
+  // For now, only CallingConv::C implemented
+  switch (CallConv)
+  {
+    default:
+      llvm_unreachable("Unsupported calling convention");
+    case CallingConv::Fast:
+    case CallingConv::C:
+      return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
+                            Outs, OutVals, Ins, dl, DAG, InVals);
+  }
+}
+
+/// LowerCCCCallTo - functions arguments are copied from virtual
+/// regs to (physical regs)/(stack frame), CALLSEQ_START and
+/// CALLSEQ_END are emitted.
+/// TODO: isTailCall, sret.
+SDValue
+XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                                    CallingConv::ID CallConv, bool isVarArg,
+                                    bool isTailCall,
+                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                    const SmallVectorImpl<SDValue> &OutVals,
+                                    const SmallVectorImpl<ISD::InputArg> &Ins,
+                                    DebugLoc dl, SelectionDAG &DAG,
+                                    SmallVectorImpl<SDValue> &InVals) const {
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+
+  // The ABI dictates there should be one stack slot available to the callee
+  // on function entry (for saving lr).
+  CCInfo.AllocateStack(4, 4);
+
+  CCInfo.AnalyzeCallOperands(Outs, CC_XCore);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes,
+                                 getPointerTy(), true));
+
+  SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
+  SmallVector<SDValue, 12> MemOpChains;
+
+  // Walk the register/memloc assignments, inserting copies/loads.
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+      default: llvm_unreachable("Unknown loc info!");
+      case CCValAssign::Full: break;
+      case CCValAssign::SExt:
+        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::ZExt:
+        Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+      case CCValAssign::AExt:
+        Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
+        break;
+    }
+
+    // Arguments that can be passed on register must be kept at
+    // RegsToPass vector
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+    } else {
+      assert(VA.isMemLoc());
+
+      int Offset = VA.getLocMemOffset();
+
+      MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other,
+                                        Chain, Arg,
+                                        DAG.getConstant(Offset/4, MVT::i32)));
+    }
+  }
+
+  // Transform all store nodes into one single node because
+  // all store nodes are independent of each other.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                        &MemOpChains[0], MemOpChains.size());
+
+  // Build a sequence of copy-to-reg nodes chained together with token
+  // chain and flag operands which copy the outgoing args into registers.
+  // The InFlag in necessary since all emited instructions must be
+  // stuck together.
+  SDValue InFlag;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+                             RegsToPass[i].second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
+  else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
+    Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
+
+  // XCoreBranchLink = #chain, #target_address, #opt_in_flags...
+  //             = Chain, Callee, Reg#1, Reg#2, ...
+  //
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are
+  // known live into the call.
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  if (InFlag.getNode())
+    Ops.push_back(InFlag);
+
+  Chain  = DAG.getNode(XCoreISD::BL, dl, NodeTys, &Ops[0], Ops.size());
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getConstant(NumBytes, getPointerTy(), true),
+                             DAG.getConstant(0, getPointerTy(), true),
+                             InFlag);
+  InFlag = Chain.getValue(1);
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+                         Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue
+XCoreTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
+                                     CallingConv::ID CallConv, bool isVarArg,
+                                     const SmallVectorImpl<ISD::InputArg> &Ins,
+                                     DebugLoc dl, SelectionDAG &DAG,
+                                     SmallVectorImpl<SDValue> &InVals) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeCallResult(Ins, RetCC_XCore);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+                                 RVLocs[i].getValVT(), InFlag).getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//             Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// XCore formal arguments implementation
+SDValue
+XCoreTargetLowering::LowerFormalArguments(SDValue Chain,
+                                          CallingConv::ID CallConv,
+                                          bool isVarArg,
+                                      const SmallVectorImpl<ISD::InputArg> &Ins,
+                                          DebugLoc dl,
+                                          SelectionDAG &DAG,
+                                          SmallVectorImpl<SDValue> &InVals)
+                                            const {
+  switch (CallConv)
+  {
+    default:
+      llvm_unreachable("Unsupported calling convention");
+    case CallingConv::C:
+    case CallingConv::Fast:
+      return LowerCCCArguments(Chain, CallConv, isVarArg,
+                               Ins, dl, DAG, InVals);
+  }
+}
+
+/// LowerCCCArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: sret
+SDValue
+XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
+                                       CallingConv::ID CallConv,
+                                       bool isVarArg,
+                                       const SmallVectorImpl<ISD::InputArg>
+                                         &Ins,
+                                       DebugLoc dl,
+                                       SelectionDAG &DAG,
+                                       SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 ArgLocs, *DAG.getContext());
+
+  CCInfo.AnalyzeFormalArguments(Ins, CC_XCore);
+
+  unsigned StackSlotSize = XCoreFrameLowering::stackSlotSize();
+
+  unsigned LRSaveSize = StackSlotSize;
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+
+    CCValAssign &VA = ArgLocs[i];
+
+    if (VA.isRegLoc()) {
+      // Arguments passed in registers
+      EVT RegVT = VA.getLocVT();
+      switch (RegVT.getSimpleVT().SimpleTy) {
+      default:
+        {
+#ifndef NDEBUG
+          errs() << "LowerFormalArguments Unhandled argument type: "
+                 << RegVT.getSimpleVT().SimpleTy << "\n";
+#endif
+          llvm_unreachable(0);
+        }
+      case MVT::i32:
+        unsigned VReg = RegInfo.createVirtualRegister(
+                          XCore::GRRegsRegisterClass);
+        RegInfo.addLiveIn(VA.getLocReg(), VReg);
+        InVals.push_back(DAG.getCopyFromReg(Chain, dl, VReg, RegVT));
+      }
+    } else {
+      // sanity check
+      assert(VA.isMemLoc());
+      // Load the argument to a virtual register
+      unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
+      if (ObjSize > StackSlotSize) {
+        errs() << "LowerFormalArguments Unhandled argument type: "
+               << EVT(VA.getLocVT()).getEVTString()
+               << "\n";
+      }
+      // Create the frame index object for this incoming parameter...
+      int FI = MFI->CreateFixedObject(ObjSize,
+                                      LRSaveSize + VA.getLocMemOffset(),
+                                      true);
+
+      // Create the SelectionDAG nodes corresponding to a load
+      //from this parameter
+      SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+      InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
+                                   MachinePointerInfo::getFixedStack(FI),
+                                   false, false, 0));
+    }
+  }
+
+  if (isVarArg) {
+    /* Argument registers */
+    static const unsigned ArgRegs[] = {
+      XCore::R0, XCore::R1, XCore::R2, XCore::R3
+    };
+    XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
+    unsigned FirstVAReg = CCInfo.getFirstUnallocated(ArgRegs,
+                                                     array_lengthof(ArgRegs));
+    if (FirstVAReg < array_lengthof(ArgRegs)) {
+      SmallVector<SDValue, 4> MemOps;
+      int offset = 0;
+      // Save remaining registers, storing higher register numbers at a higher
+      // address
+      for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) {
+        // Create a stack slot
+        int FI = MFI->CreateFixedObject(4, offset, true);
+        if (i == FirstVAReg) {
+          XFI->setVarArgsFrameIndex(FI);
+        }
+        offset -= StackSlotSize;
+        SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
+        // Move argument from phys reg -> virt reg
+        unsigned VReg = RegInfo.createVirtualRegister(
+                          XCore::GRRegsRegisterClass);
+        RegInfo.addLiveIn(ArgRegs[i], VReg);
+        SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
+        // Move argument from virt reg -> stack
+        SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
+                                     MachinePointerInfo(), false, false, 0);
+        MemOps.push_back(Store);
+      }
+      if (!MemOps.empty())
+        Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                            &MemOps[0], MemOps.size());
+    } else {
+      // This will point to the next argument passed via stack.
+      XFI->setVarArgsFrameIndex(
+        MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(),
+                               true));
+    }
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//               Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+bool XCoreTargetLowering::
+CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+               const SmallVectorImpl<ISD::OutputArg> &Outs,
+               LLVMContext &Context) const {
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, Context);
+  return CCInfo.CheckReturn(Outs, RetCC_XCore);
+}
+
+SDValue
+XCoreTargetLowering::LowerReturn(SDValue Chain,
+                                 CallingConv::ID CallConv, bool isVarArg,
+                                 const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                 const SmallVectorImpl<SDValue> &OutVals,
+                                 DebugLoc dl, SelectionDAG &DAG) const {
+
+  // CCValAssign - represent the assignment of
+  // the return value to a location
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+                 RVLocs, *DAG.getContext());
+
+  // Analize return values.
+  CCInfo.AnalyzeReturn(Outs, RetCC_XCore);
+
+  // If this is the first return lowered for this function, add
+  // the regs to the liveout set for the function.
+  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+    for (unsigned i = 0; i != RVLocs.size(); ++i)
+      if (RVLocs[i].isRegLoc())
+        DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+  }
+
+  SDValue Flag;
+
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+                             OutVals[i], Flag);
+
+    // guarantee that all emitted copies are
+    // stuck together, avoiding something bad
+    Flag = Chain.getValue(1);
+  }
+
+  // Return on XCore is always a "retsp 0"
+  if (Flag.getNode())
+    return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
+                       Chain, DAG.getConstant(0, MVT::i32), Flag);
+  else // Return Void
+    return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other,
+                       Chain, DAG.getConstant(0, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+//  Other Lowering Code
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *
+XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
+                                                 MachineBasicBlock *BB) const {
+  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  assert((MI->getOpcode() == XCore::SELECT_CC) &&
+         "Unexpected instr type to insert");
+
+  // To "insert" a SELECT_CC instruction, we actually have to insert the diamond
+  // control-flow pattern.  The incoming instruction knows the destination vreg
+  // to set, the condition code register to branch on, the true/false values to
+  // select between, and a branch opcode to use.
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  //  thisMBB:
+  //  ...
+  //   TrueVal = ...
+  //   cmpTY ccX, r1, r2
+  //   bCC copy1MBB
+  //   fallthrough --> copy0MBB
+  MachineBasicBlock *thisMBB = BB;
+  MachineFunction *F = BB->getParent();
+  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, copy0MBB);
+  F->insert(It, sinkMBB);
+
+  // Transfer the remainder of BB and its successor edges to sinkMBB.
+  sinkMBB->splice(sinkMBB->begin(), BB,
+                  llvm::next(MachineBasicBlock::iterator(MI)),
+                  BB->end());
+  sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Next, add the true and fallthrough blocks as its successors.
+  BB->addSuccessor(copy0MBB);
+  BB->addSuccessor(sinkMBB);
+
+  BuildMI(BB, dl, TII.get(XCore::BRFT_lru6))
+    .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+
+  //  copy0MBB:
+  //   %FalseValue = ...
+  //   # fallthrough to sinkMBB
+  BB = copy0MBB;
+
+  // Update machine-CFG edges
+  BB->addSuccessor(sinkMBB);
+
+  //  sinkMBB:
+  //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+  //  ...
+  BB = sinkMBB;
+  BuildMI(*BB, BB->begin(), dl,
+          TII.get(XCore::PHI), MI->getOperand(0).getReg())
+    .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
+    .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
+
+  MI->eraseFromParent();   // The pseudo instruction is gone now.
+  return BB;
+}
+
+//===----------------------------------------------------------------------===//
+// Target Optimization Hooks
+//===----------------------------------------------------------------------===//
+
+SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
+                                             DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  DebugLoc dl = N->getDebugLoc();
+  switch (N->getOpcode()) {
+  default: break;
+  case XCoreISD::LADD: {
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
+    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+    EVT VT = N0.getValueType();
+
+    // canonicalize constant to RHS
+    if (N0C && !N1C)
+      return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N1, N0, N2);
+
+    // fold (ladd 0, 0, x) -> 0, x & 1
+    if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
+      SDValue Carry = DAG.getConstant(0, VT);
+      SDValue Result = DAG.getNode(ISD::AND, dl, VT, N2,
+                                   DAG.getConstant(1, VT));
+      SDValue Ops [] = { Carry, Result };
+      return DAG.getMergeValues(Ops, 2, dl);
+    }
+
+    // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the
+    // low bit set
+    if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
+      APInt KnownZero, KnownOne;
+      APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+                                         VT.getSizeInBits() - 1);
+      DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
+      if (KnownZero == Mask) {
+        SDValue Carry = DAG.getConstant(0, VT);
+        SDValue Result = DAG.getNode(ISD::ADD, dl, VT, N0, N2);
+        SDValue Ops [] = { Carry, Result };
+        return DAG.getMergeValues(Ops, 2, dl);
+      }
+    }
+  }
+  break;
+  case XCoreISD::LSUB: {
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
+    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+    EVT VT = N0.getValueType();
+
+    // fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set
+    if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) {
+      APInt KnownZero, KnownOne;
+      APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+                                         VT.getSizeInBits() - 1);
+      DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
+      if (KnownZero == Mask) {
+        SDValue Borrow = N2;
+        SDValue Result = DAG.getNode(ISD::SUB, dl, VT,
+                                     DAG.getConstant(0, VT), N2);
+        SDValue Ops [] = { Borrow, Result };
+        return DAG.getMergeValues(Ops, 2, dl);
+      }
+    }
+
+    // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the
+    // low bit set
+    if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) {
+      APInt KnownZero, KnownOne;
+      APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
+                                         VT.getSizeInBits() - 1);
+      DAG.ComputeMaskedBits(N2, Mask, KnownZero, KnownOne);
+      if (KnownZero == Mask) {
+        SDValue Borrow = DAG.getConstant(0, VT);
+        SDValue Result = DAG.getNode(ISD::SUB, dl, VT, N0, N2);
+        SDValue Ops [] = { Borrow, Result };
+        return DAG.getMergeValues(Ops, 2, dl);
+      }
+    }
+  }
+  break;
+  case XCoreISD::LMUL: {
+    SDValue N0 = N->getOperand(0);
+    SDValue N1 = N->getOperand(1);
+    SDValue N2 = N->getOperand(2);
+    SDValue N3 = N->getOperand(3);
+    ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+    EVT VT = N0.getValueType();
+    // Canonicalize multiplicative constant to RHS. If both multiplicative
+    // operands are constant canonicalize smallest to RHS.
+    if ((N0C && !N1C) ||
+        (N0C && N1C && N0C->getZExtValue() < N1C->getZExtValue()))
+      return DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(VT, VT), N1, N0, N2, N3);
+
+    // lmul(x, 0, a, b)
+    if (N1C && N1C->isNullValue()) {
+      // If the high result is unused fold to add(a, b)
+      if (N->hasNUsesOfValue(0, 0)) {
+        SDValue Lo = DAG.getNode(ISD::ADD, dl, VT, N2, N3);
+        SDValue Ops [] = { Lo, Lo };
+        return DAG.getMergeValues(Ops, 2, dl);
+      }
+      // Otherwise fold to ladd(a, b, 0)
+      return DAG.getNode(XCoreISD::LADD, dl, DAG.getVTList(VT, VT), N2, N3, N1);
+    }
+  }
+  break;
+  case ISD::ADD: {
+    // Fold 32 bit expressions such as add(add(mul(x,y),a),b) ->
+    // lmul(x, y, a, b). The high result of lmul will be ignored.
+    // This is only profitable if the intermediate results are unused
+    // elsewhere.
+    SDValue Mul0, Mul1, Addend0, Addend1;
+    if (N->getValueType(0) == MVT::i32 &&
+        isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, true)) {
+      SDValue Ignored = DAG.getNode(XCoreISD::LMUL, dl,
+                                    DAG.getVTList(MVT::i32, MVT::i32), Mul0,
+                                    Mul1, Addend0, Addend1);
+      SDValue Result(Ignored.getNode(), 1);
+      return Result;
+    }
+    APInt HighMask = APInt::getHighBitsSet(64, 32);
+    // Fold 64 bit expression such as add(add(mul(x,y),a),b) ->
+    // lmul(x, y, a, b) if all operands are zero-extended. We do this
+    // before type legalization as it is messy to match the operands after
+    // that.
+    if (N->getValueType(0) == MVT::i64 &&
+        isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, false) &&
+        DAG.MaskedValueIsZero(Mul0, HighMask) &&
+        DAG.MaskedValueIsZero(Mul1, HighMask) &&
+        DAG.MaskedValueIsZero(Addend0, HighMask) &&
+        DAG.MaskedValueIsZero(Addend1, HighMask)) {
+      SDValue Mul0L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                  Mul0, DAG.getConstant(0, MVT::i32));
+      SDValue Mul1L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                  Mul1, DAG.getConstant(0, MVT::i32));
+      SDValue Addend0L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                     Addend0, DAG.getConstant(0, MVT::i32));
+      SDValue Addend1L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
+                                     Addend1, DAG.getConstant(0, MVT::i32));
+      SDValue Hi = DAG.getNode(XCoreISD::LMUL, dl,
+                               DAG.getVTList(MVT::i32, MVT::i32), Mul0L, Mul1L,
+                               Addend0L, Addend1L);
+      SDValue Lo(Hi.getNode(), 1);
+      return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
+    }
+  }
+  break;
+  case ISD::STORE: {
+    // Replace unaligned store of unaligned load with memmove.
+    StoreSDNode *ST  = cast<StoreSDNode>(N);
+    if (!DCI.isBeforeLegalize() ||
+        allowsUnalignedMemoryAccesses(ST->getMemoryVT()) ||
+        ST->isVolatile() || ST->isIndexed()) {
+      break;
+    }
+    SDValue Chain = ST->getChain();
+
+    unsigned StoreBits = ST->getMemoryVT().getStoreSizeInBits();
+    if (StoreBits % 8) {
+      break;
+    }
+    unsigned ABIAlignment = getTargetData()->getABITypeAlignment(
+        ST->getMemoryVT().getTypeForEVT(*DCI.DAG.getContext()));
+    unsigned Alignment = ST->getAlignment();
+    if (Alignment >= ABIAlignment) {
+      break;
+    }
+
+    if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ST->getValue())) {
+      if (LD->hasNUsesOfValue(1, 0) && ST->getMemoryVT() == LD->getMemoryVT() &&
+        LD->getAlignment() == Alignment &&
+        !LD->isVolatile() && !LD->isIndexed() &&
+        Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) {
+        return DAG.getMemmove(Chain, dl, ST->getBasePtr(),
+                              LD->getBasePtr(),
+                              DAG.getConstant(StoreBits/8, MVT::i32),
+                              Alignment, false, ST->getPointerInfo(),
+                              LD->getPointerInfo());
+      }
+    }
+    break;
+  }
+  }
+  return SDValue();
+}
+
+void XCoreTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+                                                         const APInt &Mask,
+                                                         APInt &KnownZero,
+                                                         APInt &KnownOne,
+                                                         const SelectionDAG &DAG,
+                                                         unsigned Depth) const {
+  KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
+  switch (Op.getOpcode()) {
+  default: break;
+  case XCoreISD::LADD:
+  case XCoreISD::LSUB:
+    if (Op.getResNo() == 0) {
+      // Top bits of carry / borrow are clear.
+      KnownZero = APInt::getHighBitsSet(Mask.getBitWidth(),
+                                        Mask.getBitWidth() - 1);
+      KnownZero &= Mask;
+    }
+    break;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  Addressing mode description hooks
+//===----------------------------------------------------------------------===//
+
+static inline bool isImmUs(int64_t val)
+{
+  return (val >= 0 && val <= 11);
+}
+
+static inline bool isImmUs2(int64_t val)
+{
+  return (val%2 == 0 && isImmUs(val/2));
+}
+
+static inline bool isImmUs4(int64_t val)
+{
+  return (val%4 == 0 && isImmUs(val/4));
+}
+
+/// isLegalAddressingMode - Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool
+XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
+                                              const Type *Ty) const {
+  if (Ty->getTypeID() == Type::VoidTyID)
+    return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
+
+  const TargetData *TD = TM.getTargetData();
+  unsigned Size = TD->getTypeAllocSize(Ty);
+  if (AM.BaseGV) {
+    return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 &&
+                 AM.BaseOffs%4 == 0;
+  }
+
+  switch (Size) {
+  case 1:
+    // reg + imm
+    if (AM.Scale == 0) {
+      return isImmUs(AM.BaseOffs);
+    }
+    // reg + reg
+    return AM.Scale == 1 && AM.BaseOffs == 0;
+  case 2:
+  case 3:
+    // reg + imm
+    if (AM.Scale == 0) {
+      return isImmUs2(AM.BaseOffs);
+    }
+    // reg + reg<<1
+    return AM.Scale == 2 && AM.BaseOffs == 0;
+  default:
+    // reg + imm
+    if (AM.Scale == 0) {
+      return isImmUs4(AM.BaseOffs);
+    }
+    // reg + reg<<2
+    return AM.Scale == 4 && AM.BaseOffs == 0;
+  }
+
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+//                           XCore Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+std::vector<unsigned> XCoreTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint,
+                                  EVT VT) const
+{
+  if (Constraint.size() != 1)
+    return std::vector<unsigned>();
+
+  switch (Constraint[0]) {
+    default : break;
+    case 'r':
+      return make_vector<unsigned>(XCore::R0, XCore::R1,  XCore::R2,
+                                   XCore::R3, XCore::R4,  XCore::R5,
+                                   XCore::R6, XCore::R7,  XCore::R8,
+                                   XCore::R9, XCore::R10, XCore::R11, 0);
+      break;
+  }
+  return std::vector<unsigned>();
+}
diff --git a/final/lib/Target/XCore/XCoreISelLowering.h b/final/lib/Target/XCore/XCoreISelLowering.h
new file mode 100644
index 00000000000..bb3f2cc038e
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreISelLowering.h
@@ -0,0 +1,203 @@
+//===-- XCoreISelLowering.h - XCore DAG Lowering Interface ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that XCore uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREISELLOWERING_H
+#define XCOREISELLOWERING_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "XCore.h"
+
+namespace llvm {
+
+  // Forward delcarations
+  class XCoreSubtarget;
+  class XCoreTargetMachine;
+
+  namespace XCoreISD {
+    enum NodeType {
+      // Start the numbering where the builtin ops and target ops leave off.
+      FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+      // Branch and link (call)
+      BL,
+
+      // pc relative address
+      PCRelativeWrapper,
+
+      // dp relative address
+      DPRelativeWrapper,
+
+      // cp relative address
+      CPRelativeWrapper,
+
+      // Store word to stack
+      STWSP,
+
+      // Corresponds to retsp instruction
+      RETSP,
+
+      // Corresponds to LADD instruction
+      LADD,
+
+      // Corresponds to LSUB instruction
+      LSUB,
+
+      // Corresponds to LMUL instruction
+      LMUL,
+
+      // Corresponds to MACCU instruction
+      MACCU,
+
+      // Corresponds to MACCS instruction
+      MACCS,
+
+      // Jumptable branch.
+      BR_JT,
+
+      // Jumptable branch using long branches for each entry.
+      BR_JT32
+    };
+  }
+
+  //===--------------------------------------------------------------------===//
+  // TargetLowering Implementation
+  //===--------------------------------------------------------------------===//
+  class XCoreTargetLowering : public TargetLowering
+  {
+  public:
+
+    explicit XCoreTargetLowering(XCoreTargetMachine &TM);
+
+    virtual unsigned getJumpTableEncoding() const;
+    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
+
+    /// LowerOperation - Provide custom lowering hooks for some operations.
+    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
+
+    /// ReplaceNodeResults - Replace the results of node with an illegal result
+    /// type with new values built out of custom code.
+    ///
+    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
+                                    SelectionDAG &DAG) const;
+
+    /// getTargetNodeName - This method returns the name of a target specific
+    //  DAG node.
+    virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+    virtual MachineBasicBlock *
+      EmitInstrWithCustomInserter(MachineInstr *MI,
+                                  MachineBasicBlock *MBB) const;
+
+    virtual bool isLegalAddressingMode(const AddrMode &AM,
+                                       const Type *Ty) const;
+
+    /// getFunctionAlignment - Return the Log2 alignment of this function.
+    virtual unsigned getFunctionAlignment(const Function *F) const;
+
+  private:
+    const XCoreTargetMachine &TM;
+    const XCoreSubtarget &Subtarget;
+
+    // Lower Operand helpers
+    SDValue LowerCCCArguments(SDValue Chain,
+                              CallingConv::ID CallConv,
+                              bool isVarArg,
+                              const SmallVectorImpl<ISD::InputArg> &Ins,
+                              DebugLoc dl, SelectionDAG &DAG,
+                              SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
+                           CallingConv::ID CallConv, bool isVarArg,
+                           bool isTailCall,
+                           const SmallVectorImpl<ISD::OutputArg> &Outs,
+                           const SmallVectorImpl<SDValue> &OutVals,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+                            CallingConv::ID CallConv, bool isVarArg,
+                            const SmallVectorImpl<ISD::InputArg> &Ins,
+                            DebugLoc dl, SelectionDAG &DAG,
+                            SmallVectorImpl<SDValue> &InVals) const;
+    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
+    SDValue getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV,
+                                    SelectionDAG &DAG) const;
+
+    // Lower Operand specifics
+    SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
+
+    // Inline asm support
+    std::vector<unsigned>
+    getRegClassForInlineAsmConstraint(const std::string &Constraint,
+              EVT VT) const;
+
+    // Expand specifics
+    SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
+    SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const;
+
+    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    virtual void computeMaskedBitsForTargetNode(const SDValue Op,
+                                                const APInt &Mask,
+                                                APInt &KnownZero,
+                                                APInt &KnownOne,
+                                                const SelectionDAG &DAG,
+                                                unsigned Depth = 0) const;
+
+    virtual SDValue
+      LowerFormalArguments(SDValue Chain,
+                           CallingConv::ID CallConv,
+                           bool isVarArg,
+                           const SmallVectorImpl<ISD::InputArg> &Ins,
+                           DebugLoc dl, SelectionDAG &DAG,
+                           SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerCall(SDValue Chain, SDValue Callee,
+                CallingConv::ID CallConv, bool isVarArg,
+                bool &isTailCall,
+                const SmallVectorImpl<ISD::OutputArg> &Outs,
+                const SmallVectorImpl<SDValue> &OutVals,
+                const SmallVectorImpl<ISD::InputArg> &Ins,
+                DebugLoc dl, SelectionDAG &DAG,
+                SmallVectorImpl<SDValue> &InVals) const;
+
+    virtual SDValue
+      LowerReturn(SDValue Chain,
+                  CallingConv::ID CallConv, bool isVarArg,
+                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                  const SmallVectorImpl<SDValue> &OutVals,
+                  DebugLoc dl, SelectionDAG &DAG) const;
+
+    virtual bool
+      CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+                     const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
+                     LLVMContext &Context) const;
+  };
+}
+
+#endif // XCOREISELLOWERING_H
diff --git a/final/lib/Target/XCore/XCoreInstrFormats.td b/final/lib/Target/XCore/XCoreInstrFormats.td
new file mode 100644
index 00000000000..8002c993270
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreInstrFormats.td
@@ -0,0 +1,120 @@
+//===- XCoreInstrFormats.td - XCore Instruction Formats ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+class InstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : Instruction {
+  field bits<32> Inst;
+
+  let Namespace = "XCore";
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  let AsmString   = asmstr;
+  let Pattern = pattern;
+}
+
+// XCore pseudo instructions format
+class PseudoInstXCore<dag outs, dag ins, string asmstr, list<dag> pattern>
+   : InstXCore<outs, ins, asmstr, pattern>;
+
+//===----------------------------------------------------------------------===//
+// Instruction formats
+//===----------------------------------------------------------------------===//
+
+class _F3R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FL3R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _F2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FL2RUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FLRU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FLU6<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FU10<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FLU10<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _F2R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FRUS<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _FL2R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _F1R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _F0R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _L4R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _L5R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
+
+class _L6R<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : InstXCore<outs, ins, asmstr, pattern> {
+  let Inst{31-0} = 0;
+}
diff --git a/final/lib/Target/XCore/XCoreInstrInfo.cpp b/final/lib/Target/XCore/XCoreInstrInfo.cpp
new file mode 100644
index 00000000000..9cb6a7d17b5
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -0,0 +1,395 @@
+//===- XCoreInstrInfo.cpp - XCore Instruction Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMachineFunctionInfo.h"
+#include "XCoreInstrInfo.h"
+#include "XCore.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "XCoreGenInstrInfo.inc"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+namespace XCore {
+
+  // XCore Condition Codes
+  enum CondCode {
+    COND_TRUE,
+    COND_FALSE,
+    COND_INVALID
+  };
+}
+}
+
+using namespace llvm;
+
+XCoreInstrInfo::XCoreInstrInfo()
+  : TargetInstrInfoImpl(XCoreInsts, array_lengthof(XCoreInsts)),
+    RI(*this) {
+}
+
+static bool isZeroImm(const MachineOperand &op) {
+  return op.isImm() && op.getImm() == 0;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot.  If
+/// not, return 0.  This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned
+XCoreInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const{
+  int Opcode = MI->getOpcode();
+  if (Opcode == XCore::LDWFI) 
+  {
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2)))) 
+    {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+  return 0;
+}
+  
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+unsigned
+XCoreInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                   int &FrameIndex) const {
+  int Opcode = MI->getOpcode();
+  if (Opcode == XCore::STWFI)
+  {
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2))))
+    {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+  }
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+
+static inline bool IsBRU(unsigned BrOpc) {
+  return BrOpc == XCore::BRFU_u6
+      || BrOpc == XCore::BRFU_lu6
+      || BrOpc == XCore::BRBU_u6
+      || BrOpc == XCore::BRBU_lu6;
+}
+
+static inline bool IsBRT(unsigned BrOpc) {
+  return BrOpc == XCore::BRFT_ru6
+      || BrOpc == XCore::BRFT_lru6
+      || BrOpc == XCore::BRBT_ru6
+      || BrOpc == XCore::BRBT_lru6;
+}
+
+static inline bool IsBRF(unsigned BrOpc) {
+  return BrOpc == XCore::BRFF_ru6
+      || BrOpc == XCore::BRFF_lru6
+      || BrOpc == XCore::BRBF_ru6
+      || BrOpc == XCore::BRBF_lru6;
+}
+
+static inline bool IsCondBranch(unsigned BrOpc) {
+  return IsBRF(BrOpc) || IsBRT(BrOpc);
+}
+
+static inline bool IsBR_JT(unsigned BrOpc) {
+  return BrOpc == XCore::BR_JT
+      || BrOpc == XCore::BR_JT32;
+}
+
+/// GetCondFromBranchOpc - Return the XCore CC that matches 
+/// the correspondent Branch instruction opcode.
+static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc) 
+{
+  if (IsBRT(BrOpc)) {
+    return XCore::COND_TRUE;
+  } else if (IsBRF(BrOpc)) {
+    return XCore::COND_FALSE;
+  } else {
+    return XCore::COND_INVALID;
+  }
+}
+
+/// GetCondBranchFromCond - Return the Branch instruction
+/// opcode that matches the cc.
+static inline unsigned GetCondBranchFromCond(XCore::CondCode CC) 
+{
+  switch (CC) {
+  default: llvm_unreachable("Illegal condition code!");
+  case XCore::COND_TRUE   : return XCore::BRFT_lru6;
+  case XCore::COND_FALSE  : return XCore::BRFF_lru6;
+  }
+}
+
+/// GetOppositeBranchCondition - Return the inverse of the specified 
+/// condition, e.g. turning COND_E to COND_NE.
+static inline XCore::CondCode GetOppositeBranchCondition(XCore::CondCode CC)
+{
+  switch (CC) {
+  default: llvm_unreachable("Illegal condition code!");
+  case XCore::COND_TRUE   : return XCore::COND_FALSE;
+  case XCore::COND_FALSE  : return XCore::COND_TRUE;
+  }
+}
+
+/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
+/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
+/// implemented for a target).  Upon success, this returns false and returns
+/// with the following information in various cases:
+///
+/// 1. If this block ends with no branches (it just falls through to its succ)
+///    just return false, leaving TBB/FBB null.
+/// 2. If this block ends with only an unconditional branch, it sets TBB to be
+///    the destination block.
+/// 3. If this block ends with an conditional branch and it falls through to
+///    an successor block, it sets TBB to be the branch destination block and a
+///    list of operands that evaluate the condition. These
+///    operands can be passed to other TargetInstrInfo methods to create new
+///    branches.
+/// 4. If this block ends with an conditional branch and an unconditional
+///    block, it returns the 'true' destination in TBB, the 'false' destination
+///    in FBB, and a list of operands that evaluate the condition. These
+///    operands can be passed to other TargetInstrInfo methods to create new
+///    branches.
+///
+/// Note that RemoveBranch and InsertBranch must be implemented to support
+/// cases where this method returns success.
+///
+bool
+XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                              MachineBasicBlock *&FBB,
+                              SmallVectorImpl<MachineOperand> &Cond,
+                              bool AllowModify) const {
+  // If the block has no terminators, it just falls into the block after it.
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin())
+    return false;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return false;
+    --I;
+  }
+  if (!isUnpredicatedTerminator(I))
+    return false;
+
+  // Get the last instruction in the block.
+  MachineInstr *LastInst = I;
+  
+  // If there is only one terminator instruction, process it.
+  if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+    if (IsBRU(LastInst->getOpcode())) {
+      TBB = LastInst->getOperand(0).getMBB();
+      return false;
+    }
+    
+    XCore::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode());
+    if (BranchCode == XCore::COND_INVALID)
+      return true;  // Can't handle indirect branch.
+    
+    // Conditional branch
+    // Block ends with fall-through condbranch.
+
+    TBB = LastInst->getOperand(1).getMBB();
+    Cond.push_back(MachineOperand::CreateImm(BranchCode));
+    Cond.push_back(LastInst->getOperand(0));
+    return false;
+  }
+  
+  // Get the instruction before it if it's a terminator.
+  MachineInstr *SecondLastInst = I;
+
+  // If there are three terminators, we don't know what sort of block this is.
+  if (SecondLastInst && I != MBB.begin() &&
+      isUnpredicatedTerminator(--I))
+    return true;
+  
+  unsigned SecondLastOpc    = SecondLastInst->getOpcode();
+  XCore::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc);
+  
+  // If the block ends with conditional branch followed by unconditional,
+  // handle it.
+  if (BranchCode != XCore::COND_INVALID
+    && IsBRU(LastInst->getOpcode())) {
+
+    TBB = SecondLastInst->getOperand(1).getMBB();
+    Cond.push_back(MachineOperand::CreateImm(BranchCode));
+    Cond.push_back(SecondLastInst->getOperand(0));
+
+    FBB = LastInst->getOperand(0).getMBB();
+    return false;
+  }
+  
+  // If the block ends with two unconditional branches, handle it.  The second
+  // one is not executed, so remove it.
+  if (IsBRU(SecondLastInst->getOpcode()) && 
+      IsBRU(LastInst->getOpcode())) {
+    TBB = SecondLastInst->getOperand(0).getMBB();
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return false;
+  }
+
+  // Likewise if it ends with a branch table followed by an unconditional branch.
+  if (IsBR_JT(SecondLastInst->getOpcode()) && IsBRU(LastInst->getOpcode())) {
+    I = LastInst;
+    if (AllowModify)
+      I->eraseFromParent();
+    return true;
+  }
+
+  // Otherwise, can't handle this.
+  return true;
+}
+
+unsigned
+XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
+                             MachineBasicBlock *FBB,
+                             const SmallVectorImpl<MachineOperand> &Cond,
+                             DebugLoc DL)const{
+  // Shouldn't be a fall through.
+  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert((Cond.size() == 2 || Cond.size() == 0) &&
+         "Unexpected number of components!");
+  
+  if (FBB == 0) { // One way branch.
+    if (Cond.empty()) {
+      // Unconditional branch
+      BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(TBB);
+    } else {
+      // Conditional branch.
+      unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
+      BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg())
+                             .addMBB(TBB);
+    }
+    return 1;
+  }
+  
+  // Two-way Conditional branch.
+  assert(Cond.size() == 2 && "Unexpected number of components!");
+  unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
+  BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg())
+                         .addMBB(TBB);
+  BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(FBB);
+  return 2;
+}
+
+unsigned
+XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator I = MBB.end();
+  if (I == MBB.begin()) return 0;
+  --I;
+  while (I->isDebugValue()) {
+    if (I == MBB.begin())
+      return 0;
+    --I;
+  }
+  if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode()))
+    return 0;
+  
+  // Remove the branch.
+  I->eraseFromParent();
+  
+  I = MBB.end();
+
+  if (I == MBB.begin()) return 1;
+  --I;
+  if (!IsCondBranch(I->getOpcode()))
+    return 1;
+  
+  // Remove the branch.
+  I->eraseFromParent();
+  return 2;
+}
+
+void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+                                 MachineBasicBlock::iterator I, DebugLoc DL,
+                                 unsigned DestReg, unsigned SrcReg,
+                                 bool KillSrc) const {
+  bool GRDest = XCore::GRRegsRegClass.contains(DestReg);
+  bool GRSrc  = XCore::GRRegsRegClass.contains(SrcReg);
+
+  if (GRDest && GRSrc) {
+    BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc))
+      .addImm(0);
+    return;
+  }
+  
+  if (GRDest && SrcReg == XCore::SP) {
+    BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg).addImm(0);
+    return;
+  }
+
+  if (DestReg == XCore::SP && GRSrc) {
+    BuildMI(MBB, I, DL, get(XCore::SETSP_1r))
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
+  llvm_unreachable("Impossible reg-to-reg copy");
+}
+
+void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator I,
+                                         unsigned SrcReg, bool isKill,
+                                         int FrameIndex,
+                                         const TargetRegisterClass *RC,
+                                         const TargetRegisterInfo *TRI) const
+{
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+  BuildMI(MBB, I, DL, get(XCore::STWFI))
+    .addReg(SrcReg, getKillRegState(isKill))
+    .addFrameIndex(FrameIndex)
+    .addImm(0);
+}
+
+void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator I,
+                                          unsigned DestReg, int FrameIndex,
+                                          const TargetRegisterClass *RC,
+                                          const TargetRegisterInfo *TRI) const
+{
+  DebugLoc DL;
+  if (I != MBB.end()) DL = I->getDebugLoc();
+  BuildMI(MBB, I, DL, get(XCore::LDWFI), DestReg)
+    .addFrameIndex(FrameIndex)
+    .addImm(0);
+}
+
+/// ReverseBranchCondition - Return the inverse opcode of the 
+/// specified Branch instruction.
+bool XCoreInstrInfo::
+ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert((Cond.size() == 2) && 
+          "Invalid XCore branch condition!");
+  Cond[0].setImm(GetOppositeBranchCondition((XCore::CondCode)Cond[0].getImm()));
+  return false;
+}
diff --git a/final/lib/Target/XCore/XCoreInstrInfo.h b/final/lib/Target/XCore/XCoreInstrInfo.h
new file mode 100644
index 00000000000..977fe8dd550
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreInstrInfo.h
@@ -0,0 +1,85 @@
+//===- XCoreInstrInfo.h - XCore Instruction Information ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREINSTRUCTIONINFO_H
+#define XCOREINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "XCoreRegisterInfo.h"
+
+namespace llvm {
+
+class XCoreInstrInfo : public TargetInstrInfoImpl {
+  const XCoreRegisterInfo RI;
+public:
+  XCoreInstrInfo();
+
+  /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
+  /// such, whenever a client has an instance of instruction info, it should
+  /// always be able to get register info as well (through this method).
+  ///
+  virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+
+  /// isLoadFromStackSlot - If the specified machine instruction is a direct
+  /// load from a stack slot, return the virtual or physical register number of
+  /// the destination along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than loading from the stack slot.
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  
+  /// isStoreToStackSlot - If the specified machine instruction is a direct
+  /// store to a stack slot, return the virtual or physical register number of
+  /// the source reg along with the FrameIndex of the loaded stack slot.  If
+  /// not, return 0.  This predicate must return 0 if the instruction has
+  /// any side effects other than storing to the stack slot.
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+  
+  virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                             MachineBasicBlock *&FBB,
+                             SmallVectorImpl<MachineOperand> &Cond,
+                             bool AllowModify) const;
+  
+  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                                MachineBasicBlock *FBB,
+                                const SmallVectorImpl<MachineOperand> &Cond,
+                                DebugLoc DL) const;
+  
+  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
+
+  virtual void copyPhysReg(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator I, DebugLoc DL,
+                           unsigned DestReg, unsigned SrcReg,
+                           bool KillSrc) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC,
+                                   const TargetRegisterInfo *TRI) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
+
+  virtual bool ReverseBranchCondition(
+                            SmallVectorImpl<MachineOperand> &Cond) const;
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/XCore/XCoreInstrInfo.td b/final/lib/Target/XCore/XCoreInstrInfo.td
new file mode 100644
index 00000000000..ecdd4cb6300
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreInstrInfo.td
@@ -0,0 +1,1121 @@
+//===- XCoreInstrInfo.td - Target Description for XCore ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the XCore instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+// Uses of CP, DP are not currently reflected in the patterns, since
+// having a physical register as an operand prevents loop hoisting and
+// since the value of these registers never changes during the life of the
+// function.
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass.
+//===----------------------------------------------------------------------===//
+
+include "XCoreInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// XCore specific DAG Nodes.
+//
+
+// Call
+def SDT_XCoreBranchLink : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def XCoreBranchLink     : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
+                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                             SDNPVariadic]>;
+
+def XCoreRetsp       : SDNode<"XCoreISD::RETSP", SDTBrind,
+                         [SDNPHasChain, SDNPOptInGlue]>;
+
+def SDT_XCoreBR_JT    : SDTypeProfile<0, 2,
+                                      [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+def XCoreBR_JT : SDNode<"XCoreISD::BR_JT", SDT_XCoreBR_JT,
+                        [SDNPHasChain]>;
+
+def XCoreBR_JT32 : SDNode<"XCoreISD::BR_JT32", SDT_XCoreBR_JT,
+                        [SDNPHasChain]>;
+
+def SDT_XCoreAddress    : SDTypeProfile<1, 1,
+                            [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+
+def pcrelwrapper : SDNode<"XCoreISD::PCRelativeWrapper", SDT_XCoreAddress,
+                           []>;
+
+def dprelwrapper : SDNode<"XCoreISD::DPRelativeWrapper", SDT_XCoreAddress,
+                           []>;
+
+def cprelwrapper : SDNode<"XCoreISD::CPRelativeWrapper", SDT_XCoreAddress,
+                           []>;
+
+def SDT_XCoreStwsp    : SDTypeProfile<0, 2, [SDTCisInt<1>]>;
+def XCoreStwsp        : SDNode<"XCoreISD::STWSP", SDT_XCoreStwsp,
+                               [SDNPHasChain]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def SDT_XCoreCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_XCoreCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+                                        SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_XCoreCallSeqStart,
+                           [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_XCoreCallSeqEnd,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Pattern Stuff
+//===----------------------------------------------------------------------===//
+
+def div4_xform : SDNodeXForm<imm, [{
+  // Transformation function: imm/4
+  assert(N->getZExtValue() % 4 == 0);
+  return getI32Imm(N->getZExtValue()/4);
+}]>;
+
+def msksize_xform : SDNodeXForm<imm, [{
+  // Transformation function: get the size of a mask
+  assert(isMask_32(N->getZExtValue()));
+  // look for the first non-zero bit
+  return getI32Imm(32 - CountLeadingZeros_32(N->getZExtValue()));
+}]>;
+
+def neg_xform : SDNodeXForm<imm, [{
+  // Transformation function: -imm
+  uint32_t value = N->getZExtValue();
+  return getI32Imm(-value);
+}]>;
+
+def bpwsub_xform : SDNodeXForm<imm, [{
+  // Transformation function: 32-imm
+  uint32_t value = N->getZExtValue();
+  return getI32Imm(32-value);
+}]>;
+
+def div4neg_xform : SDNodeXForm<imm, [{
+  // Transformation function: -imm/4
+  uint32_t value = N->getZExtValue();
+  assert(-value % 4 == 0);
+  return getI32Imm(-value/4);
+}]>;
+
+def immUs4Neg : PatLeaf<(imm), [{
+  uint32_t value = (uint32_t)N->getZExtValue();
+  return (-value)%4 == 0 && (-value)/4 <= 11;
+}]>;
+
+def immUs4 : PatLeaf<(imm), [{
+  uint32_t value = (uint32_t)N->getZExtValue();
+  return value%4 == 0 && value/4 <= 11;
+}]>;
+
+def immUsNeg : PatLeaf<(imm), [{
+  return -((uint32_t)N->getZExtValue()) <= 11;
+}]>;
+
+def immUs : PatLeaf<(imm), [{
+  return (uint32_t)N->getZExtValue() <= 11;
+}]>;
+
+def immU6 : PatLeaf<(imm), [{
+  return (uint32_t)N->getZExtValue() < (1 << 6);
+}]>;
+
+def immU10 : PatLeaf<(imm), [{
+  return (uint32_t)N->getZExtValue() < (1 << 10);
+}]>;
+
+def immU16 : PatLeaf<(imm), [{
+  return (uint32_t)N->getZExtValue() < (1 << 16);
+}]>;
+
+def immU20 : PatLeaf<(imm), [{
+  return (uint32_t)N->getZExtValue() < (1 << 20);
+}]>;
+
+def immMskBitp : PatLeaf<(imm), [{ return immMskBitp(N); }]>;
+
+def immBitp : PatLeaf<(imm), [{
+  uint32_t value = (uint32_t)N->getZExtValue();
+  return (value >= 1 && value <= 8)
+          || value == 16
+          || value == 24
+          || value == 32;
+}]>;
+
+def immBpwSubBitp : PatLeaf<(imm), [{
+  uint32_t value = (uint32_t)N->getZExtValue();
+  return (value >= 24 && value <= 31)
+          || value == 16
+          || value == 8
+          || value == 0;
+}]>;
+
+def lda16f : PatFrag<(ops node:$addr, node:$offset),
+                     (add node:$addr, (shl node:$offset, 1))>;
+def lda16b : PatFrag<(ops node:$addr, node:$offset),
+                     (sub node:$addr, (shl node:$offset, 1))>;
+def ldawf : PatFrag<(ops node:$addr, node:$offset),
+                     (add node:$addr, (shl node:$offset, 2))>;
+def ldawb : PatFrag<(ops node:$addr, node:$offset),
+                     (sub node:$addr, (shl node:$offset, 2))>;
+
+// Instruction operand types
+def calltarget  : Operand<i32>;
+def brtarget : Operand<OtherVT>;
+def pclabel : Operand<i32>;
+
+// Addressing modes
+def ADDRspii : ComplexPattern<i32, 2, "SelectADDRspii", [add, frameindex], []>;
+def ADDRdpii : ComplexPattern<i32, 2, "SelectADDRdpii", [add, dprelwrapper],
+                 []>;
+def ADDRcpii : ComplexPattern<i32, 2, "SelectADDRcpii", [add, cprelwrapper],
+                 []>;
+
+// Address operands
+def MEMii : Operand<i32> {
+  let PrintMethod = "printMemOperand";
+  let MIOperandInfo = (ops i32imm, i32imm);
+}
+
+// Jump tables.
+def InlineJT : Operand<i32> {
+  let PrintMethod = "printInlineJT";
+}
+
+def InlineJT32 : Operand<i32> {
+  let PrintMethod = "printInlineJT32";
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+// Three operand short
+
+multiclass F3R_2RUS<string OpcStr, SDNode OpNode> {
+  def _3r: _F3R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+  def _2rus : _F2RUS<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+}
+
+multiclass F3R_2RUS_np<string OpcStr> {
+  def _3r: _F3R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 []>;
+  def _2rus : _F2RUS<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 []>;
+}
+
+multiclass F3R_2RBITP<string OpcStr, SDNode OpNode> {
+  def _3r: _F3R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+  def _2rus : _F2RUS<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+}
+
+class F3R<string OpcStr, SDNode OpNode> : _F3R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+
+class F3R_np<string OpcStr> : _F3R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 []>;
+// Three operand long
+
+/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
+multiclass FL3R_L2RUS<string OpcStr, SDNode OpNode> {
+  def _l3r: _FL3R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+  def _l2rus : _FL2RUS<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, immUs:$c))]>;
+}
+
+/// FL3R_L2RUS multiclass - Define a normal FL3R/FL2RUS pattern in one shot.
+multiclass FL3R_L2RBITP<string OpcStr, SDNode OpNode> {
+  def _l3r: _FL3R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+  def _l2rus : _FL2RUS<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, i32imm:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, immBitp:$c))]>;
+}
+
+class FL3R<string OpcStr, SDNode OpNode> : _FL3R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b, GRRegs:$c),
+                 !strconcat(OpcStr, " $dst, $b, $c"),
+                 [(set GRRegs:$dst, (OpNode GRRegs:$b, GRRegs:$c))]>;
+
+// Register - U6
+// Operand register - U6
+multiclass FRU6_LRU6_branch<string OpcStr> {
+  def _ru6: _FRU6<
+                 (outs), (ins GRRegs:$cond, brtarget:$dest),
+                 !strconcat(OpcStr, " $cond, $dest"),
+                 []>;
+  def _lru6: _FLRU6<
+                 (outs), (ins GRRegs:$cond, brtarget:$dest),
+                 !strconcat(OpcStr, " $cond, $dest"),
+                 []>;
+}
+
+multiclass FRU6_LRU6_cp<string OpcStr> {
+  def _ru6: _FRU6<
+                 (outs GRRegs:$dst), (ins i32imm:$a),
+                 !strconcat(OpcStr, " $dst, cp[$a]"),
+                 []>;
+  def _lru6: _FLRU6<
+                 (outs GRRegs:$dst), (ins i32imm:$a),
+                 !strconcat(OpcStr, " $dst, cp[$a]"),
+                 []>;
+}
+
+// U6
+multiclass FU6_LU6<string OpcStr, SDNode OpNode> {
+  def _u6: _FU6<
+                 (outs), (ins i32imm:$b),
+                 !strconcat(OpcStr, " $b"),
+                 [(OpNode immU6:$b)]>;
+  def _lu6: _FLU6<
+                 (outs), (ins i32imm:$b),
+                 !strconcat(OpcStr, " $b"),
+                 [(OpNode immU16:$b)]>;
+}
+
+multiclass FU6_LU6_np<string OpcStr> {
+  def _u6: _FU6<
+                 (outs), (ins i32imm:$b),
+                 !strconcat(OpcStr, " $b"),
+                 []>;
+  def _lu6: _FLU6<
+                 (outs), (ins i32imm:$b),
+                 !strconcat(OpcStr, " $b"),
+                 []>;
+}
+
+// U10
+multiclass FU10_LU10_np<string OpcStr> {
+  def _u10: _FU10<
+                 (outs), (ins i32imm:$b),
+                 !strconcat(OpcStr, " $b"),
+                 []>;
+  def _lu10: _FLU10<
+                 (outs), (ins i32imm:$b),
+                 !strconcat(OpcStr, " $b"),
+                 []>;
+}
+
+// Two operand short
+
+class F2R_np<string OpcStr> : _F2R<
+                 (outs GRRegs:$dst), (ins GRRegs:$b),
+                 !strconcat(OpcStr, " $dst, $b"),
+                 []>;
+
+// Two operand long
+
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+let Defs = [SP], Uses = [SP] in {
+def ADJCALLSTACKDOWN : PseudoInstXCore<(outs), (ins i32imm:$amt),
+                               "${:comment} ADJCALLSTACKDOWN $amt",
+                               [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : PseudoInstXCore<(outs), (ins i32imm:$amt1, i32imm:$amt2),
+                            "${:comment} ADJCALLSTACKUP $amt1",
+                            [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+def LDWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
+                             "${:comment} LDWFI $dst, $addr",
+                             [(set GRRegs:$dst, (load ADDRspii:$addr))]>;
+
+def LDAWFI : PseudoInstXCore<(outs GRRegs:$dst), (ins MEMii:$addr),
+                             "${:comment} LDAWFI $dst, $addr",
+                             [(set GRRegs:$dst, ADDRspii:$addr)]>;
+
+def STWFI : PseudoInstXCore<(outs), (ins GRRegs:$src, MEMii:$addr),
+                            "${:comment} STWFI $src, $addr",
+                            [(store GRRegs:$src, ADDRspii:$addr)]>;
+
+// SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
+// instruction selection into a branch sequence.
+let usesCustomInserter = 1 in {
+  def SELECT_CC : PseudoInstXCore<(outs GRRegs:$dst),
+                              (ins GRRegs:$cond, GRRegs:$T, GRRegs:$F),
+                              "${:comment} SELECT_CC PSEUDO!",
+                              [(set GRRegs:$dst,
+                                 (select GRRegs:$cond, GRRegs:$T, GRRegs:$F))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+// Three operand short
+defm ADD : F3R_2RUS<"add", add>;
+defm SUB : F3R_2RUS<"sub", sub>;
+let neverHasSideEffects = 1 in {
+defm EQ : F3R_2RUS_np<"eq">;
+def LSS_3r : F3R_np<"lss">;
+def LSU_3r : F3R_np<"lsu">;
+}
+def AND_3r : F3R<"and", and>;
+def OR_3r : F3R<"or", or>;
+
+let mayLoad=1 in {
+def LDW_3r : _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+                  "ldw $dst, $addr[$offset]",
+                  []>;
+
+def LDW_2rus : _F2RUS<(outs GRRegs:$dst), (ins GRRegs:$addr, i32imm:$offset),
+                  "ldw $dst, $addr[$offset]",
+                  []>;
+
+def LD16S_3r :  _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+                  "ld16s $dst, $addr[$offset]",
+                  []>;
+
+def LD8U_3r :  _F3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+                  "ld8u $dst, $addr[$offset]",
+                  []>;
+}
+
+let mayStore=1 in {
+def STW_3r : _F3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+                  "stw $val, $addr[$offset]",
+                  []>;
+
+def STW_2rus : _F2RUS<(outs), (ins GRRegs:$val, GRRegs:$addr, i32imm:$offset),
+                  "stw $val, $addr[$offset]",
+                  []>;
+}
+
+defm SHL : F3R_2RBITP<"shl", shl>;
+defm SHR : F3R_2RBITP<"shr", srl>;
+// TODO tsetr
+
+// Three operand long
+def LDAWF_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+                  "ldaw $dst, $addr[$offset]",
+                  [(set GRRegs:$dst, (ldawf GRRegs:$addr, GRRegs:$offset))]>;
+
+let neverHasSideEffects = 1 in
+def LDAWF_l2rus : _FL2RUS<(outs GRRegs:$dst),
+                    (ins GRRegs:$addr, i32imm:$offset),
+                    "ldaw $dst, $addr[$offset]",
+                    []>;
+
+def LDAWB_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+                  "ldaw $dst, $addr[-$offset]",
+                  [(set GRRegs:$dst, (ldawb GRRegs:$addr, GRRegs:$offset))]>;
+
+let neverHasSideEffects = 1 in
+def LDAWB_l2rus : _FL2RUS<(outs GRRegs:$dst),
+                    (ins GRRegs:$addr, i32imm:$offset),
+                    "ldaw $dst, $addr[-$offset]",
+                    []>;
+
+def LDA16F_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+                  "lda16 $dst, $addr[$offset]",
+                  [(set GRRegs:$dst, (lda16f GRRegs:$addr, GRRegs:$offset))]>;
+
+def LDA16B_l3r : _FL3R<(outs GRRegs:$dst), (ins GRRegs:$addr, GRRegs:$offset),
+                  "lda16 $dst, $addr[-$offset]",
+                  [(set GRRegs:$dst, (lda16b GRRegs:$addr, GRRegs:$offset))]>;
+
+def MUL_l3r : FL3R<"mul", mul>;
+// Instructions which may trap are marked as side effecting.
+let hasSideEffects = 1 in {
+def DIVS_l3r : FL3R<"divs", sdiv>;
+def DIVU_l3r : FL3R<"divu", udiv>;
+def REMS_l3r : FL3R<"rems", srem>;
+def REMU_l3r : FL3R<"remu", urem>;
+}
+def XOR_l3r : FL3R<"xor", xor>;
+defm ASHR : FL3R_L2RBITP<"ashr", sra>;
+// TODO crc32, crc8, inpw, outpw
+let mayStore=1 in {
+def ST16_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+                "st16 $val, $addr[$offset]",
+                []>;
+
+def ST8_l3r : _FL3R<(outs), (ins GRRegs:$val, GRRegs:$addr, GRRegs:$offset),
+                "st8 $val, $addr[$offset]",
+                []>;
+}
+
+// Four operand long
+let Constraints = "$src1 = $dst1,$src2 = $dst2" in {
+def MACCU_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
+                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+                      GRRegs:$src4),
+                    "maccu $dst1, $dst2, $src3, $src4",
+                    []>;
+
+def MACCS_l4r : _L4R<(outs GRRegs:$dst1, GRRegs:$dst2),
+                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+                      GRRegs:$src4),
+                    "maccs $dst1, $dst2, $src3, $src4",
+                    []>;
+}
+
+// Five operand long
+
+def LADD_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+                    "ladd $dst1, $dst2, $src1, $src2, $src3",
+                    []>;
+
+def LSUB_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+                    "lsub $dst1, $dst2, $src1, $src2, $src3",
+                    []>;
+
+def LDIV_l5r : _L5R<(outs GRRegs:$dst1, GRRegs:$dst2),
+                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3),
+                    "ldiv $dst1, $dst2, $src1, $src2, $src3",
+                    []>;
+
+// Six operand long
+
+def LMUL_l6r : _L6R<(outs GRRegs:$dst1, GRRegs:$dst2),
+                    (ins GRRegs:$src1, GRRegs:$src2, GRRegs:$src3,
+                      GRRegs:$src4),
+                    "lmul $dst1, $dst2, $src1, $src2, $src3, $src4",
+                    []>;
+
+// Register - U6
+
+//let Uses = [DP] in ...
+let neverHasSideEffects = 1, isReMaterializable = 1 in
+def LDAWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
+                    "ldaw $dst, dp[$a]",
+                    []>;
+
+let isReMaterializable = 1 in                    
+def LDAWDP_lru6: _FLRU6<
+                    (outs GRRegs:$dst), (ins MEMii:$a),
+                    "ldaw $dst, dp[$a]",
+                    [(set GRRegs:$dst, ADDRdpii:$a)]>;
+
+let mayLoad=1 in
+def LDWDP_ru6: _FRU6<(outs GRRegs:$dst), (ins MEMii:$a),
+                    "ldw $dst, dp[$a]",
+                    []>;
+                    
+def LDWDP_lru6: _FLRU6<
+                    (outs GRRegs:$dst), (ins MEMii:$a),
+                    "ldw $dst, dp[$a]",
+                    [(set GRRegs:$dst, (load ADDRdpii:$a))]>;
+
+let mayStore=1 in
+def STWDP_ru6 : _FRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
+                  "stw $val, dp[$addr]",
+                  []>;
+
+def STWDP_lru6 : _FLRU6<(outs), (ins GRRegs:$val, MEMii:$addr),
+                  "stw $val, dp[$addr]",
+                  [(store GRRegs:$val, ADDRdpii:$addr)]>;
+
+//let Uses = [CP] in ..
+let mayLoad = 1, isReMaterializable = 1 in
+defm LDWCP : FRU6_LRU6_cp<"ldw">;
+
+let Uses = [SP] in {
+let mayStore=1 in {
+def STWSP_ru6 : _FRU6<
+                 (outs), (ins GRRegs:$val, i32imm:$index),
+                 "stw $val, sp[$index]",
+                 [(XCoreStwsp GRRegs:$val, immU6:$index)]>;
+
+def STWSP_lru6 : _FLRU6<
+                 (outs), (ins GRRegs:$val, i32imm:$index),
+                 "stw $val, sp[$index]",
+                 [(XCoreStwsp GRRegs:$val, immU16:$index)]>;
+}
+
+let mayLoad=1 in {
+def LDWSP_ru6 : _FRU6<
+                 (outs GRRegs:$dst), (ins i32imm:$b),
+                 "ldw $dst, sp[$b]",
+                 []>;
+
+def LDWSP_lru6 : _FLRU6<
+                 (outs GRRegs:$dst), (ins i32imm:$b),
+                 "ldw $dst, sp[$b]",
+                 []>;
+}
+
+let neverHasSideEffects = 1 in {
+def LDAWSP_ru6 : _FRU6<
+                 (outs GRRegs:$dst), (ins i32imm:$b),
+                 "ldaw $dst, sp[$b]",
+                 []>;
+
+def LDAWSP_lru6 : _FLRU6<
+                 (outs GRRegs:$dst), (ins i32imm:$b),
+                 "ldaw $dst, sp[$b]",
+                 []>;
+
+def LDAWSP_ru6_RRegs : _FRU6<
+                 (outs RRegs:$dst), (ins i32imm:$b),
+                 "ldaw $dst, sp[$b]",
+                 []>;
+
+def LDAWSP_lru6_RRegs : _FLRU6<
+                 (outs RRegs:$dst), (ins i32imm:$b),
+                 "ldaw $dst, sp[$b]",
+                 []>;
+}
+}
+
+let isReMaterializable = 1 in {
+def LDC_ru6 : _FRU6<
+                 (outs GRRegs:$dst), (ins i32imm:$b),
+                 "ldc $dst, $b",
+                 [(set GRRegs:$dst, immU6:$b)]>;
+
+def LDC_lru6 : _FLRU6<
+                 (outs GRRegs:$dst), (ins i32imm:$b),
+                 "ldc $dst, $b",
+                 [(set GRRegs:$dst, immU16:$b)]>;
+}
+
+def SETC_ru6 : _FRU6<(outs), (ins GRRegs:$r, i32imm:$val),
+                  "setc res[$r], $val",
+                  [(int_xcore_setc GRRegs:$r, immU6:$val)]>;
+
+def SETC_lru6 : _FLRU6<(outs), (ins GRRegs:$r, i32imm:$val),
+                  "setc res[$r], $val",
+                  [(int_xcore_setc GRRegs:$r, immU16:$val)]>;
+
+// Operand register - U6
+let isBranch = 1, isTerminator = 1 in {
+defm BRFT: FRU6_LRU6_branch<"bt">;
+defm BRBT: FRU6_LRU6_branch<"bt">;
+defm BRFF: FRU6_LRU6_branch<"bf">;
+defm BRBF: FRU6_LRU6_branch<"bf">;
+}
+
+// U6
+let Defs = [SP], Uses = [SP] in {
+let neverHasSideEffects = 1 in
+defm EXTSP : FU6_LU6_np<"extsp">;
+let mayStore = 1 in
+defm ENTSP : FU6_LU6_np<"entsp">;
+
+let isReturn = 1, isTerminator = 1, mayLoad = 1, isBarrier = 1 in {
+defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
+}
+}
+
+// TODO extdp, kentsp, krestsp, blat, setsr
+// clrsr, getsr, kalli
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+def BRBU_u6 : _FU6<
+                 (outs),
+                 (ins brtarget:$target),
+                 "bu $target",
+                 []>;
+
+def BRBU_lu6 : _FLU6<
+                 (outs),
+                 (ins brtarget:$target),
+                 "bu $target",
+                 []>;
+
+def BRFU_u6 : _FU6<
+                 (outs),
+                 (ins brtarget:$target),
+                 "bu $target",
+                 []>;
+
+def BRFU_lu6 : _FLU6<
+                 (outs),
+                 (ins brtarget:$target),
+                 "bu $target",
+                 []>;
+}
+
+//let Uses = [CP] in ...
+let Defs = [R11], neverHasSideEffects = 1, isReMaterializable = 1 in
+def LDAWCP_u6: _FRU6<(outs), (ins MEMii:$a),
+                    "ldaw r11, cp[$a]",
+                    []>;
+
+let Defs = [R11], isReMaterializable = 1 in
+def LDAWCP_lu6: _FLRU6<
+                    (outs), (ins MEMii:$a),
+                    "ldaw r11, cp[$a]",
+                    [(set R11, ADDRcpii:$a)]>;
+
+// U10
+// TODO ldwcpl, blacp
+
+let Defs = [R11], isReMaterializable = 1, neverHasSideEffects = 1 in
+def LDAP_u10 : _FU10<
+                  (outs),
+                  (ins i32imm:$addr),
+                  "ldap r11, $addr",
+                  []>;
+
+let Defs = [R11], isReMaterializable = 1 in
+def LDAP_lu10 : _FLU10<
+                  (outs),
+                  (ins i32imm:$addr),
+                  "ldap r11, $addr",
+                  [(set R11, (pcrelwrapper tglobaladdr:$addr))]>;
+
+let Defs = [R11], isReMaterializable = 1 in
+def LDAP_lu10_ba : _FLU10<(outs),
+                          (ins i32imm:$addr),
+                          "ldap r11, $addr",
+                          [(set R11, (pcrelwrapper tblockaddress:$addr))]>;
+
+let isCall=1,
+// All calls clobber the link register and the non-callee-saved registers:
+Defs = [R0, R1, R2, R3, R11, LR] in {
+def BL_u10 : _FU10<
+                  (outs),
+                  (ins calltarget:$target, variable_ops),
+                  "bl $target",
+                  [(XCoreBranchLink immU10:$target)]>;
+
+def BL_lu10 : _FLU10<
+                  (outs),
+                  (ins calltarget:$target, variable_ops),
+                  "bl $target",
+                  [(XCoreBranchLink immU20:$target)]>;
+}
+
+// Two operand short
+// TODO getr, getst
+def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
+                 "not $dst, $b",
+                 [(set GRRegs:$dst, (not GRRegs:$b))]>;
+
+def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
+                 "neg $dst, $b",
+                 [(set GRRegs:$dst, (ineg GRRegs:$b))]>;
+
+// TODO setd, eet, eef, testwct, tinitpc, tinitdp,
+// tinitsp, tinitcp, tsetmr, sext (reg), zext (reg)
+let Constraints = "$src1 = $dst" in {
+let neverHasSideEffects = 1 in
+def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+                 "sext $dst, $src2",
+                 []>;
+
+let neverHasSideEffects = 1 in
+def ZEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
+                 "zext $dst, $src2",
+                 []>;
+
+def ANDNOT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$src1, GRRegs:$src2),
+                 "andnot $dst, $src2",
+                 [(set GRRegs:$dst, (and GRRegs:$src1, (not GRRegs:$src2)))]>;
+}
+
+let isReMaterializable = 1, neverHasSideEffects = 1 in
+def MKMSK_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$size),
+                 "mkmsk $dst, $size",
+                 []>;
+
+def MKMSK_2r : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$size),
+                 "mkmsk $dst, $size",
+                 [(set GRRegs:$dst, (add (shl 1, GRRegs:$size), 0xffffffff))]>;
+
+def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type),
+                 "getr $dst, $type",
+                 [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>;
+
+def GETTS_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+                 "getts $dst, res[$r]",
+                 [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>;
+
+def SETPT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "setpt res[$r], $val",
+                 [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>;
+
+def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "outct res[$r], $val",
+                 [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>;
+
+def OUTCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
+                 "outct res[$r], $val",
+                 [(int_xcore_outct GRRegs:$r, immUs:$val)]>;
+
+def OUTT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "outt res[$r], $val",
+                 [(int_xcore_outt GRRegs:$r, GRRegs:$val)]>;
+
+def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "out res[$r], $val",
+                 [(int_xcore_out GRRegs:$r, GRRegs:$val)]>;
+
+let Constraints = "$src = $dst" in
+def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
+                 "outshr res[$r], $src",
+                 [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>;
+
+def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+                 "inct $dst, res[$r]",
+                 [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>;
+
+def INT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+                 "int $dst, res[$r]",
+                 [(set GRRegs:$dst, (int_xcore_int GRRegs:$r))]>;
+
+def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r),
+                 "in $dst, res[$r]",
+                 [(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>;
+
+let Constraints = "$src = $dst" in
+def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src),
+                 "inshr $dst, res[$r]",
+                 [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>;
+
+def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "chkct res[$r], $val",
+                 [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>;
+
+def CHKCT_rus : _F2R<(outs), (ins GRRegs:$r, i32imm:$val),
+                 "chkct res[$r], $val",
+                 [(int_xcore_chkct GRRegs:$r, immUs:$val)]>;
+
+def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                 "setd res[$r], $val",
+                 [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>;
+
+// Two operand long
+// TODO setclk, setrdy, setpsc, endin, peek,
+// getd, testlcl, tinitlr, getps, setps
+def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+                 "bitrev $dst, $src",
+                 [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>;
+
+def BYTEREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+                 "byterev $dst, $src",
+                 [(set GRRegs:$dst, (bswap GRRegs:$src))]>;
+
+def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
+                 "clz $dst, $src",
+                 [(set GRRegs:$dst, (ctlz GRRegs:$src))]>;
+
+def SETC_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                  "setc res[$r], $val",
+                  [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>;
+
+def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val),
+                  "settw res[$r], $val",
+                  [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>;
+
+// One operand short
+// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, clrtp
+// setdp, setcp, setev, kcall
+// dgetreg
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
+                 "bau $addr",
+                 [(brind GRRegs:$addr)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT : PseudoInstXCore<(outs), (ins InlineJT:$t, GRRegs:$i),
+                            "bru $i\n$t",
+                            [(XCoreBR_JT tjumptable:$t, GRRegs:$i)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i),
+                              "bru $i\n$t",
+                              [(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>;
+
+let Defs=[SP], neverHasSideEffects=1 in
+def SETSP_1r : _F1R<(outs), (ins GRRegs:$src),
+                 "set sp, $src",
+                 []>;
+
+let hasCtrlDep = 1 in 
+def ECALLT_1r : _F1R<(outs), (ins GRRegs:$src),
+                 "ecallt $src",
+                 []>;
+
+let hasCtrlDep = 1 in 
+def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src),
+                 "ecallf $src",
+                 []>;
+
+let isCall=1, 
+// All calls clobber the link register and the non-callee-saved registers:
+Defs = [R0, R1, R2, R3, R11, LR] in {
+def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops),
+                 "bla $addr",
+                 [(XCoreBranchLink GRRegs:$addr)]>;
+}
+
+def SYNCR_1r : _F1R<(outs), (ins GRRegs:$r),
+                 "syncr res[$r]",
+                 [(int_xcore_syncr GRRegs:$r)]>;
+
+def FREER_1r : _F1R<(outs), (ins GRRegs:$r),
+               "freer res[$r]",
+               [(int_xcore_freer GRRegs:$r)]>;
+
+let Uses=[R11] in
+def SETV_1r : _F1R<(outs), (ins GRRegs:$r),
+               "setv res[$r], r11",
+               [(int_xcore_setv GRRegs:$r, R11)]>;
+
+def EEU_1r : _F1R<(outs), (ins GRRegs:$r),
+               "eeu res[$r]",
+               [(int_xcore_eeu GRRegs:$r)]>;
+
+// Zero operand short
+// TODO ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed,
+// stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret,
+// dentsp, drestsp
+
+def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>;
+
+let Defs = [R11] in
+def GETID_0R : _F0R<(outs), (ins),
+                 "get r11, id",
+                 [(set R11, (int_xcore_getid))]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1,
+    hasSideEffects = 1 in
+def WAITEU_0R : _F0R<(outs), (ins),
+                 "waiteu",
+                 [(brind (int_xcore_waitevent))]>;
+
+//===----------------------------------------------------------------------===//
+// Non-Instruction Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat<(XCoreBranchLink tglobaladdr:$addr), (BL_lu10 tglobaladdr:$addr)>;
+def : Pat<(XCoreBranchLink texternalsym:$addr), (BL_lu10 texternalsym:$addr)>;
+
+/// sext_inreg
+def : Pat<(sext_inreg GRRegs:$b, i1), (SEXT_rus GRRegs:$b, 1)>;
+def : Pat<(sext_inreg GRRegs:$b, i8), (SEXT_rus GRRegs:$b, 8)>;
+def : Pat<(sext_inreg GRRegs:$b, i16), (SEXT_rus GRRegs:$b, 16)>;
+
+/// loads
+def : Pat<(zextloadi8 (add GRRegs:$addr, GRRegs:$offset)),
+          (LD8U_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(zextloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(sextloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)),
+          (LD16S_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(sextloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(load (ldawf GRRegs:$addr, GRRegs:$offset)),
+          (LDW_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(load (add GRRegs:$addr, immUs4:$offset)),
+          (LDW_2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+def : Pat<(load GRRegs:$addr), (LDW_2rus GRRegs:$addr, 0)>;
+
+/// anyext
+def : Pat<(extloadi8 (add GRRegs:$addr, GRRegs:$offset)),
+          (LD8U_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(extloadi8 GRRegs:$addr), (LD8U_3r GRRegs:$addr, (LDC_ru6 0))>;
+def : Pat<(extloadi16 (lda16f GRRegs:$addr, GRRegs:$offset)),
+          (LD16S_3r GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(extloadi16 GRRegs:$addr), (LD16S_3r GRRegs:$addr, (LDC_ru6 0))>;
+
+/// stores
+def : Pat<(truncstorei8 GRRegs:$val, (add GRRegs:$addr, GRRegs:$offset)),
+          (ST8_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(truncstorei8 GRRegs:$val, GRRegs:$addr),
+          (ST8_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
+          
+def : Pat<(truncstorei16 GRRegs:$val, (lda16f GRRegs:$addr, GRRegs:$offset)),
+          (ST16_l3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(truncstorei16 GRRegs:$val, GRRegs:$addr),
+          (ST16_l3r GRRegs:$val, GRRegs:$addr, (LDC_ru6 0))>;
+
+def : Pat<(store GRRegs:$val, (ldawf GRRegs:$addr, GRRegs:$offset)),
+          (STW_3r GRRegs:$val, GRRegs:$addr, GRRegs:$offset)>;
+def : Pat<(store GRRegs:$val, (add GRRegs:$addr, immUs4:$offset)),
+          (STW_2rus GRRegs:$val, GRRegs:$addr, (div4_xform immUs4:$offset))>;
+def : Pat<(store GRRegs:$val, GRRegs:$addr),
+          (STW_2rus GRRegs:$val, GRRegs:$addr, 0)>;
+
+/// cttz
+def : Pat<(cttz GRRegs:$src), (CLZ_l2r (BITREV_l2r GRRegs:$src))>;
+
+/// trap
+def : Pat<(trap), (ECALLF_1r (LDC_ru6 0))>;
+
+///
+/// branch patterns
+///
+
+// unconditional branch
+def : Pat<(br bb:$addr), (BRFU_lu6 bb:$addr)>;
+
+// direct match equal/notequal zero brcond
+def : Pat<(brcond (setne GRRegs:$lhs, 0), bb:$dst),
+          (BRFT_lru6 GRRegs:$lhs, bb:$dst)>;
+def : Pat<(brcond (seteq GRRegs:$lhs, 0), bb:$dst),
+          (BRFF_lru6 GRRegs:$lhs, bb:$dst)>;
+
+def : Pat<(brcond (setle GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+          (BRFF_lru6 (LSS_3r GRRegs:$rhs, GRRegs:$lhs), bb:$dst)>;
+def : Pat<(brcond (setule GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+          (BRFF_lru6 (LSU_3r GRRegs:$rhs, GRRegs:$lhs), bb:$dst)>;
+def : Pat<(brcond (setge GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+          (BRFF_lru6 (LSS_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setuge GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+          (BRFF_lru6 (LSU_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setne GRRegs:$lhs, GRRegs:$rhs), bb:$dst),
+          (BRFF_lru6 (EQ_3r GRRegs:$lhs, GRRegs:$rhs), bb:$dst)>;
+def : Pat<(brcond (setne GRRegs:$lhs, immUs:$rhs), bb:$dst),
+          (BRFF_lru6 (EQ_2rus GRRegs:$lhs, immUs:$rhs), bb:$dst)>;
+
+// generic brcond pattern
+def : Pat<(brcond GRRegs:$cond, bb:$addr), (BRFT_lru6 GRRegs:$cond, bb:$addr)>;
+
+
+///
+/// Select patterns
+///
+
+// direct match equal/notequal zero select
+def : Pat<(select (setne GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+        (SELECT_CC GRRegs:$lhs, GRRegs:$T, GRRegs:$F)>;
+
+def : Pat<(select (seteq GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+        (SELECT_CC GRRegs:$lhs, GRRegs:$F, GRRegs:$T)>;
+
+def : Pat<(select (setle GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (LSS_3r GRRegs:$rhs, GRRegs:$lhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setule GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (LSU_3r GRRegs:$rhs, GRRegs:$lhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setge GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (LSS_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setuge GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (LSU_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setne GRRegs:$lhs, GRRegs:$rhs), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (EQ_3r GRRegs:$lhs, GRRegs:$rhs), GRRegs:$F, GRRegs:$T)>;
+def : Pat<(select (setne GRRegs:$lhs, immUs:$rhs), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (EQ_2rus GRRegs:$lhs, immUs:$rhs), GRRegs:$F, GRRegs:$T)>;
+
+///
+/// setcc patterns, only matched when none of the above brcond
+/// patterns match
+///
+
+// setcc 2 register operands
+def : Pat<(setle GRRegs:$lhs, GRRegs:$rhs),
+          (EQ_2rus (LSS_3r GRRegs:$rhs, GRRegs:$lhs), 0)>;
+def : Pat<(setule GRRegs:$lhs, GRRegs:$rhs),
+          (EQ_2rus (LSU_3r GRRegs:$rhs, GRRegs:$lhs), 0)>;
+
+def : Pat<(setgt GRRegs:$lhs, GRRegs:$rhs),
+          (LSS_3r GRRegs:$rhs, GRRegs:$lhs)>;
+def : Pat<(setugt GRRegs:$lhs, GRRegs:$rhs),
+          (LSU_3r GRRegs:$rhs, GRRegs:$lhs)>;
+
+def : Pat<(setge GRRegs:$lhs, GRRegs:$rhs),
+          (EQ_2rus (LSS_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+def : Pat<(setuge GRRegs:$lhs, GRRegs:$rhs),
+          (EQ_2rus (LSU_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+
+def : Pat<(setlt GRRegs:$lhs, GRRegs:$rhs),
+          (LSS_3r GRRegs:$lhs, GRRegs:$rhs)>;
+def : Pat<(setult GRRegs:$lhs, GRRegs:$rhs),
+          (LSU_3r GRRegs:$lhs, GRRegs:$rhs)>;
+
+def : Pat<(setne GRRegs:$lhs, GRRegs:$rhs),
+          (EQ_2rus (EQ_3r GRRegs:$lhs, GRRegs:$rhs), 0)>;
+
+def : Pat<(seteq GRRegs:$lhs, GRRegs:$rhs),
+          (EQ_3r GRRegs:$lhs, GRRegs:$rhs)>;
+
+// setcc reg/imm operands
+def : Pat<(seteq GRRegs:$lhs, immUs:$rhs),
+          (EQ_2rus GRRegs:$lhs, immUs:$rhs)>;
+def : Pat<(setne GRRegs:$lhs, immUs:$rhs),
+          (EQ_2rus (EQ_2rus GRRegs:$lhs, immUs:$rhs), 0)>;
+
+// misc
+def : Pat<(add GRRegs:$addr, immUs4:$offset),
+          (LDAWF_l2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+
+def : Pat<(sub GRRegs:$addr, immUs4:$offset),
+          (LDAWB_l2rus GRRegs:$addr, (div4_xform immUs4:$offset))>;
+
+def : Pat<(and GRRegs:$val, immMskBitp:$mask),
+          (ZEXT_rus GRRegs:$val, (msksize_xform immMskBitp:$mask))>;
+
+// (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
+def : Pat<(add GRRegs:$src1, immUsNeg:$src2),
+          (SUB_2rus GRRegs:$src1, (neg_xform immUsNeg:$src2))>;
+
+def : Pat<(add GRRegs:$src1, immUs4Neg:$src2),
+          (LDAWB_l2rus GRRegs:$src1, (div4neg_xform immUs4Neg:$src2))>;
+
+///
+/// Some peepholes
+///
+
+def : Pat<(mul GRRegs:$src, 3),
+          (LDA16F_l3r GRRegs:$src, GRRegs:$src)>;
+
+def : Pat<(mul GRRegs:$src, 5),
+          (LDAWF_l3r GRRegs:$src, GRRegs:$src)>;
+
+def : Pat<(mul GRRegs:$src, -3),
+          (LDAWB_l3r GRRegs:$src, GRRegs:$src)>;
+
+// ashr X, 32 is equivalent to ashr X, 31 on the XCore.
+def : Pat<(sra GRRegs:$src, 31),
+          (ASHR_l2rus GRRegs:$src, 32)>;
+
+def : Pat<(brcond (setlt GRRegs:$lhs, 0), bb:$dst),
+          (BRFT_lru6 (ASHR_l2rus GRRegs:$lhs, 32), bb:$dst)>;
+
+// setge X, 0 is canonicalized to setgt X, -1
+def : Pat<(brcond (setgt GRRegs:$lhs, -1), bb:$dst),
+          (BRFF_lru6 (ASHR_l2rus GRRegs:$lhs, 32), bb:$dst)>;
+
+def : Pat<(select (setlt GRRegs:$lhs, 0), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (ASHR_l2rus GRRegs:$lhs, 32), GRRegs:$T, GRRegs:$F)>;
+
+def : Pat<(select (setgt GRRegs:$lhs, -1), GRRegs:$T, GRRegs:$F),
+          (SELECT_CC (ASHR_l2rus GRRegs:$lhs, 32), GRRegs:$F, GRRegs:$T)>;
+
+def : Pat<(setgt GRRegs:$lhs, -1),
+          (EQ_2rus (ASHR_l2rus GRRegs:$lhs, 32), 0)>;
+
+def : Pat<(sra (shl GRRegs:$src, immBpwSubBitp:$imm), immBpwSubBitp:$imm),
+          (SEXT_rus GRRegs:$src, (bpwsub_xform immBpwSubBitp:$imm))>;
diff --git a/final/lib/Target/XCore/XCoreMCAsmInfo.cpp b/final/lib/Target/XCore/XCoreMCAsmInfo.cpp
new file mode 100644
index 00000000000..42ab1b31d57
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreMCAsmInfo.cpp
@@ -0,0 +1,29 @@
+//===-- XCoreMCAsmInfo.cpp - XCore asm properties -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMCAsmInfo.h"
+using namespace llvm;
+
+XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, StringRef TT) {
+  SupportsDebugInformation = true;
+  Data16bitsDirective = "\t.short\t";
+  Data32bitsDirective = "\t.long\t";
+  Data64bitsDirective = 0;
+  ZeroDirective = "\t.space\t";
+  CommentString = "#";
+    
+  PrivateGlobalPrefix = ".L";
+  AscizDirective = ".asciiz";
+  WeakDefDirective = "\t.weak\t";
+  WeakRefDirective = "\t.weak\t";
+
+  // Debug
+  HasLEB128 = true;
+}
+
diff --git a/final/lib/Target/XCore/XCoreMCAsmInfo.h b/final/lib/Target/XCore/XCoreMCAsmInfo.h
new file mode 100644
index 00000000000..84039226388
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreMCAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- XCoreMCAsmInfo.h - XCore asm properties -------------*- C++ -*--====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the XCoreMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORETARGETASMINFO_H
+#define XCORETARGETASMINFO_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+  class Target;
+
+  class XCoreMCAsmInfo : public MCAsmInfo {
+  public:
+    explicit XCoreMCAsmInfo(const Target &T, StringRef TT);
+  };
+
+} // namespace llvm
+
+#endif
diff --git a/final/lib/Target/XCore/XCoreMachineFunctionInfo.h b/final/lib/Target/XCore/XCoreMachineFunctionInfo.h
new file mode 100644
index 00000000000..a575a0f6954
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreMachineFunctionInfo.h
@@ -0,0 +1,69 @@
+//====- XCoreMachineFuctionInfo.h - XCore machine function info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares XCore-specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMACHINEFUNCTIONINFO_H
+#define XCOREMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include <vector>
+
+namespace llvm {
+
+// Forward declarations
+class Function;
+
+/// XCoreFunctionInfo - This class is derived from MachineFunction private
+/// XCore target-specific information for each MachineFunction.
+class XCoreFunctionInfo : public MachineFunctionInfo {
+private:
+  bool UsesLR;
+  int LRSpillSlot;
+  int FPSpillSlot;
+  int VarArgsFrameIndex;
+  std::vector<std::pair<MCSymbol*, CalleeSavedInfo> > SpillLabels;
+
+public:
+  XCoreFunctionInfo() :
+    UsesLR(false),
+    LRSpillSlot(0),
+    FPSpillSlot(0),
+    VarArgsFrameIndex(0) {}
+  
+  explicit XCoreFunctionInfo(MachineFunction &MF) :
+    UsesLR(false),
+    LRSpillSlot(0),
+    FPSpillSlot(0),
+    VarArgsFrameIndex(0) {}
+  
+  ~XCoreFunctionInfo() {}
+  
+  void setVarArgsFrameIndex(int off) { VarArgsFrameIndex = off; }
+  int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
+  
+  void setUsesLR(bool val) { UsesLR = val; }
+  bool getUsesLR() const { return UsesLR; }
+  
+  void setLRSpillSlot(int off) { LRSpillSlot = off; }
+  int getLRSpillSlot() const { return LRSpillSlot; }
+  
+  void setFPSpillSlot(int off) { FPSpillSlot = off; }
+  int getFPSpillSlot() const { return FPSpillSlot; }
+  
+  std::vector<std::pair<MCSymbol*, CalleeSavedInfo> > &getSpillLabels() {
+    return SpillLabels;
+  }
+};
+} // End llvm namespace
+
+#endif // XCOREMACHINEFUNCTIONINFO_H
diff --git a/final/lib/Target/XCore/XCoreRegisterInfo.cpp b/final/lib/Target/XCore/XCoreRegisterInfo.cpp
new file mode 100644
index 00000000000..56c0879cc8f
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -0,0 +1,324 @@
+//===- XCoreRegisterInfo.cpp - XCore Register Information -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreRegisterInfo.h"
+#include "XCoreMachineFunctionInfo.h"
+#include "XCore.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+XCoreRegisterInfo::XCoreRegisterInfo(const TargetInstrInfo &tii)
+  : XCoreGenRegisterInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP),
+    TII(tii) {
+}
+
+// helper functions
+static inline bool isImmUs(unsigned val) {
+  return val <= 11;
+}
+
+static inline bool isImmU6(unsigned val) {
+  return val < (1 << 6);
+}
+
+static inline bool isImmU16(unsigned val) {
+  return val < (1 << 16);
+}
+
+static const unsigned XCore_ArgRegs[] = {
+  XCore::R0, XCore::R1, XCore::R2, XCore::R3
+};
+
+const unsigned * XCoreRegisterInfo::getArgRegs(const MachineFunction *MF)
+{
+  return XCore_ArgRegs;
+}
+
+unsigned XCoreRegisterInfo::getNumArgRegs(const MachineFunction *MF)
+{
+  return array_lengthof(XCore_ArgRegs);
+}
+
+bool XCoreRegisterInfo::needsFrameMoves(const MachineFunction &MF) {
+  return MF.getMMI().hasDebugInfo() || !MF.getFunction()->doesNotThrow() ||
+          UnwindTablesMandatory;
+}
+
+const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
+                                                                         const {
+  static const unsigned CalleeSavedRegs[] = {
+    XCore::R4, XCore::R5, XCore::R6, XCore::R7,
+    XCore::R8, XCore::R9, XCore::R10, XCore::LR,
+    0
+  };
+  return CalleeSavedRegs;
+}
+
+BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
+  BitVector Reserved(getNumRegs());
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  Reserved.set(XCore::CP);
+  Reserved.set(XCore::DP);
+  Reserved.set(XCore::SP);
+  Reserved.set(XCore::LR);
+  if (TFI->hasFP(MF)) {
+    Reserved.set(XCore::R10);
+  }
+  return Reserved;
+}
+
+bool
+XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  // TODO can we estimate stack size?
+  return TFI->hasFP(MF);
+}
+
+// This function eliminates ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void XCoreRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+                              MachineBasicBlock::iterator I) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  if (!TFI->hasReservedCallFrame(MF)) {
+    // Turn the adjcallstackdown instruction into 'extsp <amt>' and the
+    // adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
+    MachineInstr *Old = I;
+    uint64_t Amount = Old->getOperand(0).getImm();
+    if (Amount != 0) {
+      // We need to keep the stack aligned properly.  To do this, we round the
+      // amount of space needed for the outgoing arguments up to the next
+      // alignment boundary.
+      unsigned Align = TFI->getStackAlignment();
+      Amount = (Amount+Align-1)/Align*Align;
+
+      assert(Amount%4 == 0);
+      Amount /= 4;
+
+      bool isU6 = isImmU6(Amount);
+      if (!isU6 && !isImmU16(Amount)) {
+        // FIX could emit multiple instructions in this case.
+#ifndef NDEBUG
+        errs() << "eliminateCallFramePseudoInstr size too big: "
+               << Amount << "\n";
+#endif
+        llvm_unreachable(0);
+      }
+
+      MachineInstr *New;
+      if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) {
+        int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
+        New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode))
+          .addImm(Amount);
+      } else {
+        assert(Old->getOpcode() == XCore::ADJCALLSTACKUP);
+        int Opcode = isU6 ? XCore::LDAWSP_ru6_RRegs : XCore::LDAWSP_lru6_RRegs;
+        New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP)
+          .addImm(Amount);
+      }
+
+      // Replace the pseudo instruction with a new instruction...
+      MBB.insert(I, New);
+    }
+  }
+  
+  MBB.erase(I);
+}
+
+void
+XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                       int SPAdj, RegScavenger *RS) const {
+  assert(SPAdj == 0 && "Unexpected");
+  MachineInstr &MI = *II;
+  DebugLoc dl = MI.getDebugLoc();
+  unsigned i = 0;
+
+  while (!MI.getOperand(i).isFI()) {
+    ++i;
+    assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
+  }
+
+  MachineOperand &FrameOp = MI.getOperand(i);
+  int FrameIndex = FrameOp.getIndex();
+
+  MachineFunction &MF = *MI.getParent()->getParent();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+  int StackSize = MF.getFrameInfo()->getStackSize();
+
+  #ifndef NDEBUG
+  DEBUG(errs() << "\nFunction         : " 
+        << MF.getFunction()->getName() << "\n");
+  DEBUG(errs() << "<--------->\n");
+  DEBUG(MI.print(errs()));
+  DEBUG(errs() << "FrameIndex         : " << FrameIndex << "\n");
+  DEBUG(errs() << "FrameOffset        : " << Offset << "\n");
+  DEBUG(errs() << "StackSize          : " << StackSize << "\n");
+  #endif
+
+  Offset += StackSize;
+  
+  // fold constant into offset.
+  Offset += MI.getOperand(i + 1).getImm();
+  MI.getOperand(i + 1).ChangeToImmediate(0);
+  
+  assert(Offset%4 == 0 && "Misaligned stack offset");
+
+  DEBUG(errs() << "Offset             : " << Offset << "\n" << "<--------->\n");
+  
+  Offset/=4;
+  
+  bool FP = TFI->hasFP(MF);
+  
+  unsigned Reg = MI.getOperand(0).getReg();
+  bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill();
+
+  assert(XCore::GRRegsRegisterClass->contains(Reg) &&
+         "Unexpected register operand");
+  
+  MachineBasicBlock &MBB = *MI.getParent();
+  
+  if (FP) {
+    bool isUs = isImmUs(Offset);
+    unsigned FramePtr = XCore::R10;
+    
+    if (!isUs) {
+      if (!RS)
+        report_fatal_error("eliminateFrameIndex Frame size too big: " +
+                           Twine(Offset));
+      unsigned ScratchReg = RS->scavengeRegister(XCore::GRRegsRegisterClass, II,
+                                                 SPAdj);
+      loadConstant(MBB, II, ScratchReg, Offset, dl);
+      switch (MI.getOpcode()) {
+      case XCore::LDWFI:
+        BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg)
+              .addReg(FramePtr)
+              .addReg(ScratchReg, RegState::Kill);
+        break;
+      case XCore::STWFI:
+        BuildMI(MBB, II, dl, TII.get(XCore::STW_3r))
+              .addReg(Reg, getKillRegState(isKill))
+              .addReg(FramePtr)
+              .addReg(ScratchReg, RegState::Kill);
+        break;
+      case XCore::LDAWFI:
+        BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg)
+              .addReg(FramePtr)
+              .addReg(ScratchReg, RegState::Kill);
+        break;
+      default:
+        llvm_unreachable("Unexpected Opcode");
+      }
+    } else {
+      switch (MI.getOpcode()) {
+      case XCore::LDWFI:
+        BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg)
+              .addReg(FramePtr)
+              .addImm(Offset);
+        break;
+      case XCore::STWFI:
+        BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus))
+              .addReg(Reg, getKillRegState(isKill))
+              .addReg(FramePtr)
+              .addImm(Offset);
+        break;
+      case XCore::LDAWFI:
+        BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg)
+              .addReg(FramePtr)
+              .addImm(Offset);
+        break;
+      default:
+        llvm_unreachable("Unexpected Opcode");
+      }
+    }
+  } else {
+    bool isU6 = isImmU6(Offset);
+    if (!isU6 && !isImmU16(Offset))
+      report_fatal_error("eliminateFrameIndex Frame size too big: " +
+                         Twine(Offset));
+
+    switch (MI.getOpcode()) {
+    int NewOpcode;
+    case XCore::LDWFI:
+      NewOpcode = (isU6) ? XCore::LDWSP_ru6 : XCore::LDWSP_lru6;
+      BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg)
+            .addImm(Offset);
+      break;
+    case XCore::STWFI:
+      NewOpcode = (isU6) ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
+      BuildMI(MBB, II, dl, TII.get(NewOpcode))
+            .addReg(Reg, getKillRegState(isKill))
+            .addImm(Offset);
+      break;
+    case XCore::LDAWFI:
+      NewOpcode = (isU6) ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6;
+      BuildMI(MBB, II, dl, TII.get(NewOpcode), Reg)
+            .addImm(Offset);
+      break;
+    default:
+      llvm_unreachable("Unexpected Opcode");
+    }
+  }
+  // Erase old instruction.
+  MBB.erase(II);
+}
+
+void XCoreRegisterInfo::
+loadConstant(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+            unsigned DstReg, int64_t Value, DebugLoc dl) const {
+  // TODO use mkmsk if possible.
+  if (!isImmU16(Value)) {
+    // TODO use constant pool.
+    report_fatal_error("loadConstant value too big " + Twine(Value));
+  }
+  int Opcode = isImmU6(Value) ? XCore::LDC_ru6 : XCore::LDC_lru6;
+  BuildMI(MBB, I, dl, TII.get(Opcode), DstReg).addImm(Value);
+}
+
+int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+  return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
+}
+
+unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+  return TFI->hasFP(MF) ? XCore::R10 : XCore::SP;
+}
+
+unsigned XCoreRegisterInfo::getRARegister() const {
+  return XCore::LR;
+}
+
+#include "XCoreGenRegisterInfo.inc"
+
diff --git a/final/lib/Target/XCore/XCoreRegisterInfo.h b/final/lib/Target/XCore/XCoreRegisterInfo.h
new file mode 100644
index 00000000000..218575581d4
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreRegisterInfo.h
@@ -0,0 +1,80 @@
+//===- XCoreRegisterInfo.h - XCore Register Information Impl ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the XCore implementation of the MRegisterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREREGISTERINFO_H
+#define XCOREREGISTERINFO_H
+
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "XCoreGenRegisterInfo.h.inc"
+
+namespace llvm {
+
+class TargetInstrInfo;
+
+struct XCoreRegisterInfo : public XCoreGenRegisterInfo {
+private:
+  const TargetInstrInfo &TII;
+
+  void loadConstant(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator I,
+                  unsigned DstReg, int64_t Value, DebugLoc dl) const;
+
+  void storeToStack(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator I,
+                  unsigned SrcReg, int Offset, DebugLoc dl) const;
+
+  void loadFromStack(MachineBasicBlock &MBB,
+                  MachineBasicBlock::iterator I,
+                  unsigned DstReg, int Offset, DebugLoc dl) const;
+
+public:
+  XCoreRegisterInfo(const TargetInstrInfo &tii);
+
+  /// Code Generation virtual methods...
+
+  const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
+
+  BitVector getReservedRegs(const MachineFunction &MF) const;
+  
+  bool requiresRegisterScavenging(const MachineFunction &MF) const;
+
+  void eliminateCallFramePseudoInstr(MachineFunction &MF,
+                                     MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  void eliminateFrameIndex(MachineBasicBlock::iterator II,
+                           int SPAdj, RegScavenger *RS = NULL) const;
+
+  // Debug information queries.
+  unsigned getRARegister() const;
+  unsigned getFrameRegister(const MachineFunction &MF) const;
+
+  //! Return the array of argument passing registers
+  /*!
+    \note The size of this array is returned by getArgRegsSize().
+    */
+  static const unsigned *getArgRegs(const MachineFunction *MF = 0);
+
+  //! Return the size of the argument passing register array
+  static unsigned getNumArgRegs(const MachineFunction *MF = 0);
+  
+  //! Return whether to emit frame moves
+  static bool needsFrameMoves(const MachineFunction &MF);
+
+  //! Get DWARF debugging register number
+  int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/XCore/XCoreRegisterInfo.td b/final/lib/Target/XCore/XCoreRegisterInfo.td
new file mode 100644
index 00000000000..765f717e206
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreRegisterInfo.td
@@ -0,0 +1,91 @@
+//===- XCoreRegisterInfo.td - XCore Register defs ----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the XCore register file 
+//===----------------------------------------------------------------------===//
+
+class XCoreReg<string n> : Register<n> {
+  field bits<4> Num;
+  let Namespace = "XCore";
+}
+
+// Registers are identified with 4-bit ID numbers.
+// Ri - 32-bit integer registers
+class Ri<bits<4> num, string n> : XCoreReg<n> {
+  let Num = num;
+}
+
+// CPU registers
+def R0  : Ri< 0, "r0">, DwarfRegNum<[0]>;
+def R1  : Ri< 1, "r1">, DwarfRegNum<[1]>;
+def R2  : Ri< 2, "r2">, DwarfRegNum<[2]>; 
+def R3  : Ri< 3, "r3">, DwarfRegNum<[3]>;
+def R4  : Ri< 4, "r4">, DwarfRegNum<[4]>;
+def R5  : Ri< 5, "r5">, DwarfRegNum<[5]>; 
+def R6  : Ri< 6, "r6">, DwarfRegNum<[6]>;
+def R7  : Ri< 7, "r7">, DwarfRegNum<[7]>;
+def R8  : Ri< 8, "r8">, DwarfRegNum<[8]>;
+def R9  : Ri< 9, "r9">, DwarfRegNum<[9]>; 
+def R10 : Ri<10, "r10">, DwarfRegNum<[10]>;
+def R11 : Ri<11, "r11">, DwarfRegNum<[11]>;
+def CP : Ri<12, "cp">, DwarfRegNum<[12]>; 
+def DP : Ri<13, "dp">, DwarfRegNum<[13]>;
+def SP : Ri<14, "sp">, DwarfRegNum<[14]>;
+def LR : Ri<15, "lr">, DwarfRegNum<[15]>;
+
+// Register classes.
+//
+def GRRegs : RegisterClass<"XCore", [i32], 32,
+  // Return values and arguments
+  [R0, R1, R2, R3,
+  // Not preserved across procedure calls
+  R11,
+  // Callee save
+  R4, R5, R6, R7, R8, R9, R10]> {
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    GRRegsClass::iterator
+    GRRegsClass::allocation_order_begin(const MachineFunction &MF) const {
+      return begin();
+    }
+    GRRegsClass::iterator
+    GRRegsClass::allocation_order_end(const MachineFunction &MF) const {
+      const TargetMachine &TM = MF.getTarget();
+      const TargetFrameLowering *TFI = TM.getFrameLowering();
+      if (TFI->hasFP(MF))
+        return end()-1;  // don't allocate R10
+      else
+        return end();
+    }
+  }];
+}
+
+def RRegs : RegisterClass<"XCore", [i32], 32,
+  // Reserved
+  [CP, DP, SP, LR]> {
+  let MethodProtos = [{
+    iterator allocation_order_begin(const MachineFunction &MF) const;
+    iterator allocation_order_end(const MachineFunction &MF) const;
+  }];
+  let MethodBodies = [{
+    RRegsClass::iterator
+    RRegsClass::allocation_order_begin(const MachineFunction &MF) const {
+      return begin();
+    }
+    RRegsClass::iterator
+    RRegsClass::allocation_order_end(const MachineFunction &MF) const {
+      // No allocatable registers
+      return begin();
+    }
+  }];
+}
diff --git a/final/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/final/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
new file mode 100644
index 00000000000..44aeb6057cc
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -0,0 +1,23 @@
+//===-- XCoreSelectionDAGInfo.cpp - XCore SelectionDAG Info ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCoreSelectionDAGInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "xcore-selectiondag-info"
+#include "XCoreTargetMachine.h"
+using namespace llvm;
+
+XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM)
+  : TargetSelectionDAGInfo(TM) {
+}
+
+XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
+}
diff --git a/final/lib/Target/XCore/XCoreSelectionDAGInfo.h b/final/lib/Target/XCore/XCoreSelectionDAGInfo.h
new file mode 100644
index 00000000000..0386968638b
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -0,0 +1,31 @@
+//===-- XCoreSelectionDAGInfo.h - XCore SelectionDAG Info -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the XCore subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORESELECTIONDAGINFO_H
+#define XCORESELECTIONDAGINFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+
+class XCoreTargetMachine;
+
+class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+  explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM);
+  ~XCoreSelectionDAGInfo();
+};
+
+}
+
+#endif
diff --git a/final/lib/Target/XCore/XCoreSubtarget.cpp b/final/lib/Target/XCore/XCoreSubtarget.cpp
new file mode 100644
index 00000000000..78a6fa5b2ed
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreSubtarget.cpp
@@ -0,0 +1,20 @@
+//===- XCoreSubtarget.cpp - XCore Subtarget Information -----------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the XCore specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreSubtarget.h"
+#include "XCore.h"
+using namespace llvm;
+
+XCoreSubtarget::XCoreSubtarget(const std::string &TT, const std::string &FS)
+{
+}
diff --git a/final/lib/Target/XCore/XCoreSubtarget.h b/final/lib/Target/XCore/XCoreSubtarget.h
new file mode 100644
index 00000000000..f8be3ec8618
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreSubtarget.h
@@ -0,0 +1,39 @@
+//=====-- XCoreSubtarget.h - Define Subtarget for the XCore -----*- C++ -*--==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the XCore specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORESUBTARGET_H
+#define XCORESUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <string>
+
+namespace llvm {
+
+class XCoreSubtarget : public TargetSubtarget {
+
+public:
+  /// This constructor initializes the data members to match that
+  /// of the specified triple.
+  ///
+  XCoreSubtarget(const std::string &TT, const std::string &FS);
+  
+  /// ParseSubtargetFeatures - Parses features string setting specified 
+  /// subtarget options.  Definition of function is auto generated by tblgen.
+  std::string ParseSubtargetFeatures(const std::string &FS,
+                                     const std::string &CPU);
+};
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Target/XCore/XCoreTargetMachine.cpp b/final/lib/Target/XCore/XCoreTargetMachine.cpp
new file mode 100644
index 00000000000..30da2c896c0
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -0,0 +1,45 @@
+//===-- XCoreTargetMachine.cpp - Define TargetMachine for XCore -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMCAsmInfo.h"
+#include "XCoreTargetMachine.h"
+#include "XCore.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+/// XCoreTargetMachine ctor - Create an ILP32 architecture model
+///
+XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT,
+                                       const std::string &FS)
+  : LLVMTargetMachine(T, TT),
+    Subtarget(TT, FS),
+    DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
+               "i16:16:32-i32:32:32-i64:32:32-n32"),
+    InstrInfo(),
+    FrameLowering(Subtarget),
+    TLInfo(*this),
+    TSInfo(*this) {
+}
+
+bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM,
+                                         CodeGenOpt::Level OptLevel) {
+  PM.add(createXCoreISelDag(*this));
+  return false;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreTarget() {
+  RegisterTargetMachine<XCoreTargetMachine> X(TheXCoreTarget);
+  RegisterAsmInfo<XCoreMCAsmInfo> Y(TheXCoreTarget);
+}
diff --git a/final/lib/Target/XCore/XCoreTargetMachine.h b/final/lib/Target/XCore/XCoreTargetMachine.h
new file mode 100644
index 00000000000..24daadcb6bf
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreTargetMachine.h
@@ -0,0 +1,62 @@
+//===-- XCoreTargetMachine.h - Define TargetMachine for XCore ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the XCore specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCORETARGETMACHINE_H
+#define XCORETARGETMACHINE_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "XCoreFrameLowering.h"
+#include "XCoreSubtarget.h"
+#include "XCoreInstrInfo.h"
+#include "XCoreISelLowering.h"
+#include "XCoreSelectionDAGInfo.h"
+
+namespace llvm {
+
+class XCoreTargetMachine : public LLVMTargetMachine {
+  XCoreSubtarget Subtarget;
+  const TargetData DataLayout;       // Calculates type size & alignment
+  XCoreInstrInfo InstrInfo;
+  XCoreFrameLowering FrameLowering;
+  XCoreTargetLowering TLInfo;
+  XCoreSelectionDAGInfo TSInfo;
+public:
+  XCoreTargetMachine(const Target &T, const std::string &TT,
+                     const std::string &FS);
+
+  virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
+  virtual const XCoreFrameLowering *getFrameLowering() const {
+    return &FrameLowering;
+  }
+  virtual const XCoreSubtarget *getSubtargetImpl() const { return &Subtarget; }
+  virtual const XCoreTargetLowering *getTargetLowering() const {
+    return &TLInfo;
+  }
+
+  virtual const XCoreSelectionDAGInfo* getSelectionDAGInfo() const {
+    return &TSInfo;
+  }
+
+  virtual const TargetRegisterInfo *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+
+  // Pass Pipeline Configuration
+  virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Target/XCore/XCoreTargetObjectFile.cpp b/final/lib/Target/XCore/XCoreTargetObjectFile.cpp
new file mode 100644
index 00000000000..7f4e1c1b4fd
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -0,0 +1,65 @@
+//===-- XCoreTargetObjectFile.cpp - XCore object files --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreTargetObjectFile.h"
+#include "XCoreSubtarget.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/ELF.h"
+using namespace llvm;
+
+
+void XCoreTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+  DataSection =
+    Ctx.getELFSection(".dp.data", ELF::SHT_PROGBITS, 
+                      ELF::SHF_ALLOC | ELF::SHF_WRITE |
+                      ELF::XCORE_SHF_DP_SECTION,
+                      SectionKind::getDataRel());
+  BSSSection =
+    Ctx.getELFSection(".dp.bss", ELF::SHT_NOBITS,
+                      ELF::SHF_ALLOC | ELF::SHF_WRITE |
+                      ELF::XCORE_SHF_DP_SECTION,
+                      SectionKind::getBSS());
+  
+  MergeableConst4Section = 
+    Ctx.getELFSection(".cp.rodata.cst4", ELF::SHT_PROGBITS,
+                      ELF::SHF_ALLOC | ELF::SHF_MERGE |
+                      ELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getMergeableConst4());
+  MergeableConst8Section = 
+    Ctx.getELFSection(".cp.rodata.cst8", ELF::SHT_PROGBITS,
+                      ELF::SHF_ALLOC | ELF::SHF_MERGE |
+                      ELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getMergeableConst8());
+  MergeableConst16Section = 
+    Ctx.getELFSection(".cp.rodata.cst16", ELF::SHT_PROGBITS,
+                      ELF::SHF_ALLOC | ELF::SHF_MERGE |
+                      ELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getMergeableConst16());
+  
+  // TLS globals are lowered in the backend to arrays indexed by the current
+  // thread id. After lowering they require no special handling by the linker
+  // and can be placed in the standard data / bss sections.
+  TLSDataSection = DataSection;
+  TLSBSSSection = BSSSection;
+
+  ReadOnlySection = 
+    Ctx.getELFSection(".cp.rodata", ELF::SHT_PROGBITS,
+                      ELF::SHF_ALLOC |
+                      ELF::XCORE_SHF_CP_SECTION,
+                      SectionKind::getReadOnlyWithRel());
+
+  // Dynamic linking is not supported. Data with relocations is placed in the
+  // same section as data without relocations.
+  DataRelSection = DataRelLocalSection = DataSection;
+  DataRelROSection = DataRelROLocalSection = ReadOnlySection;
+}
diff --git a/final/lib/Target/XCore/XCoreTargetObjectFile.h b/final/lib/Target/XCore/XCoreTargetObjectFile.h
new file mode 100644
index 00000000000..7424c78be30
--- /dev/null
+++ b/final/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -0,0 +1,25 @@
+//===-- llvm/Target/XCoreTargetObjectFile.h - XCore Object Info -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
+#define LLVM_TARGET_XCORE_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+  class XCoreTargetObjectFile : public TargetLoweringObjectFileELF {
+  public:
+    void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+    // TODO: Classify globals as xcore wishes.
+  };
+} // end namespace llvm
+
+#endif
diff --git a/final/lib/Transforms/CMakeLists.txt b/final/lib/Transforms/CMakeLists.txt
new file mode 100644
index 00000000000..10e0cc6b569
--- /dev/null
+++ b/final/lib/Transforms/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_subdirectory(Utils)
+add_subdirectory(Instrumentation)
+add_subdirectory(InstCombine)
+add_subdirectory(Scalar)
+add_subdirectory(IPO)
+add_subdirectory(Hello)
diff --git a/final/lib/Transforms/Hello/CMakeLists.txt b/final/lib/Transforms/Hello/CMakeLists.txt
new file mode 100644
index 00000000000..917b745628d
--- /dev/null
+++ b/final/lib/Transforms/Hello/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_loadable_module( LLVMHello
+  Hello.cpp
+  )
diff --git a/final/lib/Transforms/Hello/Hello.cpp b/final/lib/Transforms/Hello/Hello.cpp
new file mode 100644
index 00000000000..b0e22de8d7e
--- /dev/null
+++ b/final/lib/Transforms/Hello/Hello.cpp
@@ -0,0 +1,64 @@
+//===- Hello.cpp - Example code from "Writing an LLVM Pass" ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements two versions of the LLVM "Hello World" pass described
+// in docs/WritingAnLLVMPass.html
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hello"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(HelloCounter, "Counts number of functions greeted");
+
+namespace {
+  // Hello - The first implementation, without getAnalysisUsage.
+  struct Hello : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    Hello() : FunctionPass(ID) {}
+
+    virtual bool runOnFunction(Function &F) {
+      ++HelloCounter;
+      errs() << "Hello: ";
+      errs().write_escaped(F.getName()) << '\n';
+      return false;
+    }
+  };
+}
+
+char Hello::ID = 0;
+static RegisterPass<Hello> X("hello", "Hello World Pass");
+
+namespace {
+  // Hello2 - The second implementation with getAnalysisUsage implemented.
+  struct Hello2 : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    Hello2() : FunctionPass(ID) {}
+
+    virtual bool runOnFunction(Function &F) {
+      ++HelloCounter;
+      errs() << "Hello: ";
+      errs().write_escaped(F.getName()) << '\n';
+      return false;
+    }
+
+    // We don't modify the program, so we preserve all analyses
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char Hello2::ID = 0;
+static RegisterPass<Hello2>
+Y("hello2", "Hello World Pass (with getAnalysisUsage implemented)");
diff --git a/final/lib/Transforms/Hello/Hello.exports b/final/lib/Transforms/Hello/Hello.exports
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/final/lib/Transforms/Hello/Makefile b/final/lib/Transforms/Hello/Makefile
new file mode 100644
index 00000000000..f1e31489d3c
--- /dev/null
+++ b/final/lib/Transforms/Hello/Makefile
@@ -0,0 +1,24 @@
+##===- lib/Transforms/Hello/Makefile -----------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMHello
+LOADABLE_MODULE = 1
+USEDLIBS =
+
+# If we don't need RTTI or EH, there's no reason to export anything
+# from the hello plugin.
+ifneq ($(REQUIRES_RTTI), 1)
+ifneq ($(REQUIRES_EH), 1)
+EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/Hello.exports
+endif
+endif
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Transforms/IPO/ArgumentPromotion.cpp b/final/lib/Transforms/IPO/ArgumentPromotion.cpp
new file mode 100644
index 00000000000..0c650cfe644
--- /dev/null
+++ b/final/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -0,0 +1,906 @@
+//===-- ArgumentPromotion.cpp - Promote by-reference arguments ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass promotes "by reference" arguments to be "by value" arguments.  In
+// practice, this means looking for internal functions that have pointer
+// arguments.  If it can prove, through the use of alias analysis, that an
+// argument is *only* loaded, then it can pass the value into the function
+// instead of the address of the value.  This can cause recursive simplification
+// of code and lead to the elimination of allocas (especially in C++ template
+// code like the STL).
+//
+// This pass also handles aggregate arguments that are passed into a function,
+// scalarizing them if the elements of the aggregate are only loaded.  Note that
+// by default it refuses to scalarize aggregates which would require passing in
+// more than three operands to the function, because passing thousands of
+// operands for a large array or structure is unprofitable! This limit can be
+// configured or disabled, however.
+//
+// Note that this transformation could also be done for arguments that are only
+// stored to (returning the value instead), but does not currently.  This case
+// would be best handled when and if LLVM begins supporting multiple return
+// values from functions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "argpromotion"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumArgumentsPromoted , "Number of pointer arguments promoted");
+STATISTIC(NumAggregatesPromoted, "Number of aggregate arguments promoted");
+STATISTIC(NumByValArgsPromoted , "Number of byval arguments promoted");
+STATISTIC(NumArgumentsDead     , "Number of dead pointer args eliminated");
+
+namespace {
+  /// ArgPromotion - The 'by reference' to 'by value' argument promotion pass.
+  ///
+  struct ArgPromotion : public CallGraphSCCPass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<AliasAnalysis>();
+      CallGraphSCCPass::getAnalysisUsage(AU);
+    }
+
+    virtual bool runOnSCC(CallGraphSCC &SCC);
+    static char ID; // Pass identification, replacement for typeid
+    explicit ArgPromotion(unsigned maxElements = 3)
+        : CallGraphSCCPass(ID), maxElements(maxElements) {
+      initializeArgPromotionPass(*PassRegistry::getPassRegistry());
+    }
+
+    /// A vector used to hold the indices of a single GEP instruction
+    typedef std::vector<uint64_t> IndicesVector;
+
+  private:
+    CallGraphNode *PromoteArguments(CallGraphNode *CGN);
+    bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const;
+    CallGraphNode *DoPromotion(Function *F,
+                               SmallPtrSet<Argument*, 8> &ArgsToPromote,
+                               SmallPtrSet<Argument*, 8> &ByValArgsToTransform);
+    /// The maximum number of elements to expand, or 0 for unlimited.
+    unsigned maxElements;
+  };
+}
+
+char ArgPromotion::ID = 0;
+INITIALIZE_PASS_BEGIN(ArgPromotion, "argpromotion",
+                "Promote 'by reference' arguments to scalars", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(ArgPromotion, "argpromotion",
+                "Promote 'by reference' arguments to scalars", false, false)
+
+Pass *llvm::createArgumentPromotionPass(unsigned maxElements) {
+  return new ArgPromotion(maxElements);
+}
+
+bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) {
+  bool Changed = false, LocalChange;
+
+  do {  // Iterate until we stop promoting from this SCC.
+    LocalChange = false;
+    // Attempt to promote arguments from all functions in this SCC.
+    for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+      if (CallGraphNode *CGN = PromoteArguments(*I)) {
+        LocalChange = true;
+        SCC.ReplaceNode(*I, CGN);
+      }
+    }
+    Changed |= LocalChange;               // Remember that we changed something.
+  } while (LocalChange);
+  
+  return Changed;
+}
+
+/// PromoteArguments - This method checks the specified function to see if there
+/// are any promotable arguments and if it is safe to promote the function (for
+/// example, all callers are direct).  If safe to promote some arguments, it
+/// calls the DoPromotion method.
+///
+CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
+  Function *F = CGN->getFunction();
+
+  // Make sure that it is local to this module.
+  if (!F || !F->hasLocalLinkage()) return 0;
+
+  // First check: see if there are any pointer arguments!  If not, quick exit.
+  SmallVector<std::pair<Argument*, unsigned>, 16> PointerArgs;
+  unsigned ArgNo = 0;
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I, ++ArgNo)
+    if (I->getType()->isPointerTy())
+      PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo));
+  if (PointerArgs.empty()) return 0;
+
+  // Second check: make sure that all callers are direct callers.  We can't
+  // transform functions that have indirect callers.  Also see if the function
+  // is self-recursive.
+  bool isSelfRecursive = false;
+  for (Value::use_iterator UI = F->use_begin(), E = F->use_end();
+       UI != E; ++UI) {
+    CallSite CS(*UI);
+    // Must be a direct call.
+    if (CS.getInstruction() == 0 || !CS.isCallee(UI)) return 0;
+    
+    if (CS.getInstruction()->getParent()->getParent() == F)
+      isSelfRecursive = true;
+  }
+  
+  // Check to see which arguments are promotable.  If an argument is promotable,
+  // add it to ArgsToPromote.
+  SmallPtrSet<Argument*, 8> ArgsToPromote;
+  SmallPtrSet<Argument*, 8> ByValArgsToTransform;
+  for (unsigned i = 0; i != PointerArgs.size(); ++i) {
+    bool isByVal = F->paramHasAttr(PointerArgs[i].second+1, Attribute::ByVal);
+    Argument *PtrArg = PointerArgs[i].first;
+    const Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
+
+    // If this is a byval argument, and if the aggregate type is small, just
+    // pass the elements, which is always safe.
+    if (isByVal) {
+      if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
+        if (maxElements > 0 && STy->getNumElements() > maxElements) {
+          DEBUG(dbgs() << "argpromotion disable promoting argument '"
+                << PtrArg->getName() << "' because it would require adding more"
+                << " than " << maxElements << " arguments to the function.\n");
+          continue;
+        }
+        
+        // If all the elements are single-value types, we can promote it.
+        bool AllSimple = true;
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+          if (!STy->getElementType(i)->isSingleValueType()) {
+            AllSimple = false;
+            break;
+          }
+        }
+
+        // Safe to transform, don't even bother trying to "promote" it.
+        // Passing the elements as a scalar will allow scalarrepl to hack on
+        // the new alloca we introduce.
+        if (AllSimple) {
+          ByValArgsToTransform.insert(PtrArg);
+          continue;
+        }
+      }
+    }
+
+    // If the argument is a recursive type and we're in a recursive
+    // function, we could end up infinitely peeling the function argument.
+    if (isSelfRecursive) {
+      if (const StructType *STy = dyn_cast<StructType>(AgTy)) {
+        bool RecursiveType = false;
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+          if (STy->getElementType(i) == PtrArg->getType()) {
+            RecursiveType = true;
+            break;
+          }
+        }
+        if (RecursiveType)
+          continue;
+      }
+    }
+    
+    // Otherwise, see if we can promote the pointer to its value.
+    if (isSafeToPromoteArgument(PtrArg, isByVal))
+      ArgsToPromote.insert(PtrArg);
+  }
+
+  // No promotable pointer arguments.
+  if (ArgsToPromote.empty() && ByValArgsToTransform.empty()) 
+    return 0;
+
+  return DoPromotion(F, ArgsToPromote, ByValArgsToTransform);
+}
+
+/// AllCallersPassInValidPointerForArgument - Return true if we can prove that
+/// all callees pass in a valid pointer for the specified function argument.
+static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
+  Function *Callee = Arg->getParent();
+
+  unsigned ArgNo = std::distance(Callee->arg_begin(),
+                                 Function::arg_iterator(Arg));
+
+  // Look at all call sites of the function.  At this pointer we know we only
+  // have direct callees.
+  for (Value::use_iterator UI = Callee->use_begin(), E = Callee->use_end();
+       UI != E; ++UI) {
+    CallSite CS(*UI);
+    assert(CS && "Should only have direct calls!");
+
+    if (!CS.getArgument(ArgNo)->isDereferenceablePointer())
+      return false;
+  }
+  return true;
+}
+
+/// Returns true if Prefix is a prefix of longer. That means, Longer has a size
+/// that is greater than or equal to the size of prefix, and each of the
+/// elements in Prefix is the same as the corresponding elements in Longer.
+///
+/// This means it also returns true when Prefix and Longer are equal!
+static bool IsPrefix(const ArgPromotion::IndicesVector &Prefix,
+                     const ArgPromotion::IndicesVector &Longer) {
+  if (Prefix.size() > Longer.size())
+    return false;
+  for (unsigned i = 0, e = Prefix.size(); i != e; ++i)
+    if (Prefix[i] != Longer[i])
+      return false;
+  return true;
+}
+
+
+/// Checks if Indices, or a prefix of Indices, is in Set.
+static bool PrefixIn(const ArgPromotion::IndicesVector &Indices,
+                     std::set<ArgPromotion::IndicesVector> &Set) {
+    std::set<ArgPromotion::IndicesVector>::iterator Low;
+    Low = Set.upper_bound(Indices);
+    if (Low != Set.begin())
+      Low--;
+    // Low is now the last element smaller than or equal to Indices. This means
+    // it points to a prefix of Indices (possibly Indices itself), if such
+    // prefix exists.
+    //
+    // This load is safe if any prefix of its operands is safe to load.
+    return Low != Set.end() && IsPrefix(*Low, Indices);
+}
+
+/// Mark the given indices (ToMark) as safe in the given set of indices
+/// (Safe). Marking safe usually means adding ToMark to Safe. However, if there
+/// is already a prefix of Indices in Safe, Indices are implicitely marked safe
+/// already. Furthermore, any indices that Indices is itself a prefix of, are
+/// removed from Safe (since they are implicitely safe because of Indices now).
+static void MarkIndicesSafe(const ArgPromotion::IndicesVector &ToMark,
+                            std::set<ArgPromotion::IndicesVector> &Safe) {
+  std::set<ArgPromotion::IndicesVector>::iterator Low;
+  Low = Safe.upper_bound(ToMark);
+  // Guard against the case where Safe is empty
+  if (Low != Safe.begin())
+    Low--;
+  // Low is now the last element smaller than or equal to Indices. This
+  // means it points to a prefix of Indices (possibly Indices itself), if
+  // such prefix exists.
+  if (Low != Safe.end()) {
+    if (IsPrefix(*Low, ToMark))
+      // If there is already a prefix of these indices (or exactly these
+      // indices) marked a safe, don't bother adding these indices
+      return;
+
+    // Increment Low, so we can use it as a "insert before" hint
+    ++Low;
+  }
+  // Insert
+  Low = Safe.insert(Low, ToMark);
+  ++Low;
+  // If there we're a prefix of longer index list(s), remove those
+  std::set<ArgPromotion::IndicesVector>::iterator End = Safe.end();
+  while (Low != End && IsPrefix(ToMark, *Low)) {
+    std::set<ArgPromotion::IndicesVector>::iterator Remove = Low;
+    ++Low;
+    Safe.erase(Remove);
+  }
+}
+
+/// isSafeToPromoteArgument - As you might guess from the name of this method,
+/// it checks to see if it is both safe and useful to promote the argument.
+/// This method limits promotion of aggregates to only promote up to three
+/// elements of the aggregate in order to avoid exploding the number of
+/// arguments passed in.
+bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
+  typedef std::set<IndicesVector> GEPIndicesSet;
+
+  // Quick exit for unused arguments
+  if (Arg->use_empty())
+    return true;
+
+  // We can only promote this argument if all of the uses are loads, or are GEP
+  // instructions (with constant indices) that are subsequently loaded.
+  //
+  // Promoting the argument causes it to be loaded in the caller
+  // unconditionally. This is only safe if we can prove that either the load
+  // would have happened in the callee anyway (ie, there is a load in the entry
+  // block) or the pointer passed in at every call site is guaranteed to be
+  // valid.
+  // In the former case, invalid loads can happen, but would have happened
+  // anyway, in the latter case, invalid loads won't happen. This prevents us
+  // from introducing an invalid load that wouldn't have happened in the
+  // original code.
+  //
+  // This set will contain all sets of indices that are loaded in the entry
+  // block, and thus are safe to unconditionally load in the caller.
+  GEPIndicesSet SafeToUnconditionallyLoad;
+
+  // This set contains all the sets of indices that we are planning to promote.
+  // This makes it possible to limit the number of arguments added.
+  GEPIndicesSet ToPromote;
+
+  // If the pointer is always valid, any load with first index 0 is valid.
+  if (isByVal || AllCallersPassInValidPointerForArgument(Arg))
+    SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
+
+  // First, iterate the entry block and mark loads of (geps of) arguments as
+  // safe.
+  BasicBlock *EntryBlock = Arg->getParent()->begin();
+  // Declare this here so we can reuse it
+  IndicesVector Indices;
+  for (BasicBlock::iterator I = EntryBlock->begin(), E = EntryBlock->end();
+       I != E; ++I)
+    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      Value *V = LI->getPointerOperand();
+      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
+        V = GEP->getPointerOperand();
+        if (V == Arg) {
+          // This load actually loads (part of) Arg? Check the indices then.
+          Indices.reserve(GEP->getNumIndices());
+          for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+               II != IE; ++II)
+            if (ConstantInt *CI = dyn_cast<ConstantInt>(*II))
+              Indices.push_back(CI->getSExtValue());
+            else
+              // We found a non-constant GEP index for this argument? Bail out
+              // right away, can't promote this argument at all.
+              return false;
+
+          // Indices checked out, mark them as safe
+          MarkIndicesSafe(Indices, SafeToUnconditionallyLoad);
+          Indices.clear();
+        }
+      } else if (V == Arg) {
+        // Direct loads are equivalent to a GEP with a single 0 index.
+        MarkIndicesSafe(IndicesVector(1, 0), SafeToUnconditionallyLoad);
+      }
+    }
+
+  // Now, iterate all uses of the argument to see if there are any uses that are
+  // not (GEP+)loads, or any (GEP+)loads that are not safe to promote.
+  SmallVector<LoadInst*, 16> Loads;
+  IndicesVector Operands;
+  for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end();
+       UI != E; ++UI) {
+    User *U = *UI;
+    Operands.clear();
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      if (LI->isVolatile()) return false;  // Don't hack volatile loads
+      Loads.push_back(LI);
+      // Direct loads are equivalent to a GEP with a zero index and then a load.
+      Operands.push_back(0);
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      if (GEP->use_empty()) {
+        // Dead GEP's cause trouble later.  Just remove them if we run into
+        // them.
+        getAnalysis<AliasAnalysis>().deleteValue(GEP);
+        GEP->eraseFromParent();
+        // TODO: This runs the above loop over and over again for dead GEPs
+        // Couldn't we just do increment the UI iterator earlier and erase the
+        // use?
+        return isSafeToPromoteArgument(Arg, isByVal);
+      }
+
+      // Ensure that all of the indices are constants.
+      for (User::op_iterator i = GEP->idx_begin(), e = GEP->idx_end();
+        i != e; ++i)
+        if (ConstantInt *C = dyn_cast<ConstantInt>(*i))
+          Operands.push_back(C->getSExtValue());
+        else
+          return false;  // Not a constant operand GEP!
+
+      // Ensure that the only users of the GEP are load instructions.
+      for (Value::use_iterator UI = GEP->use_begin(), E = GEP->use_end();
+           UI != E; ++UI)
+        if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+          if (LI->isVolatile()) return false;  // Don't hack volatile loads
+          Loads.push_back(LI);
+        } else {
+          // Other uses than load?
+          return false;
+        }
+    } else {
+      return false;  // Not a load or a GEP.
+    }
+
+    // Now, see if it is safe to promote this load / loads of this GEP. Loading
+    // is safe if Operands, or a prefix of Operands, is marked as safe.
+    if (!PrefixIn(Operands, SafeToUnconditionallyLoad))
+      return false;
+
+    // See if we are already promoting a load with these indices. If not, check
+    // to make sure that we aren't promoting too many elements.  If so, nothing
+    // to do.
+    if (ToPromote.find(Operands) == ToPromote.end()) {
+      if (maxElements > 0 && ToPromote.size() == maxElements) {
+        DEBUG(dbgs() << "argpromotion not promoting argument '"
+              << Arg->getName() << "' because it would require adding more "
+              << "than " << maxElements << " arguments to the function.\n");
+        // We limit aggregate promotion to only promoting up to a fixed number
+        // of elements of the aggregate.
+        return false;
+      }
+      ToPromote.insert(Operands);
+    }
+  }
+
+  if (Loads.empty()) return true;  // No users, this is a dead argument.
+
+  // Okay, now we know that the argument is only used by load instructions and
+  // it is safe to unconditionally perform all of them. Use alias analysis to
+  // check to see if the pointer is guaranteed to not be modified from entry of
+  // the function to each of the load instructions.
+
+  // Because there could be several/many load instructions, remember which
+  // blocks we know to be transparent to the load.
+  SmallPtrSet<BasicBlock*, 16> TranspBlocks;
+
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+  for (unsigned i = 0, e = Loads.size(); i != e; ++i) {
+    // Check to see if the load is invalidated from the start of the block to
+    // the load itself.
+    LoadInst *Load = Loads[i];
+    BasicBlock *BB = Load->getParent();
+
+    AliasAnalysis::Location Loc = AA.getLocation(Load);
+    if (AA.canInstructionRangeModify(BB->front(), *Load, Loc))
+      return false;  // Pointer is invalidated!
+
+    // Now check every path from the entry block to the load for transparency.
+    // To do this, we perform a depth first search on the inverse CFG from the
+    // loading block.
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *P = *PI;
+      for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
+             I = idf_ext_begin(P, TranspBlocks),
+             E = idf_ext_end(P, TranspBlocks); I != E; ++I)
+        if (AA.canBasicBlockModify(**I, Loc))
+          return false;
+    }
+  }
+
+  // If the path from the entry of the function to each load is free of
+  // instructions that potentially invalidate the load, we can make the
+  // transformation!
+  return true;
+}
+
+/// DoPromotion - This method actually performs the promotion of the specified
+/// arguments, and returns the new function.  At this point, we know that it's
+/// safe to do so.
+CallGraphNode *ArgPromotion::DoPromotion(Function *F,
+                               SmallPtrSet<Argument*, 8> &ArgsToPromote,
+                              SmallPtrSet<Argument*, 8> &ByValArgsToTransform) {
+
+  // Start by computing a new prototype for the function, which is the same as
+  // the old function, but has modified arguments.
+  const FunctionType *FTy = F->getFunctionType();
+  std::vector<const Type*> Params;
+
+  typedef std::set<IndicesVector> ScalarizeTable;
+
+  // ScalarizedElements - If we are promoting a pointer that has elements
+  // accessed out of it, keep track of which elements are accessed so that we
+  // can add one argument for each.
+  //
+  // Arguments that are directly loaded will have a zero element value here, to
+  // handle cases where there are both a direct load and GEP accesses.
+  //
+  std::map<Argument*, ScalarizeTable> ScalarizedElements;
+
+  // OriginalLoads - Keep track of a representative load instruction from the
+  // original function so that we can tell the alias analysis implementation
+  // what the new GEP/Load instructions we are inserting look like.
+  std::map<IndicesVector, LoadInst*> OriginalLoads;
+
+  // Attributes - Keep track of the parameter attributes for the arguments
+  // that we are *not* promoting. For the ones that we do promote, the parameter
+  // attributes are lost
+  SmallVector<AttributeWithIndex, 8> AttributesVec;
+  const AttrListPtr &PAL = F->getAttributes();
+
+  // Add any return attributes.
+  if (Attributes attrs = PAL.getRetAttributes())
+    AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+
+  // First, determine the new argument list
+  unsigned ArgIndex = 1;
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
+       ++I, ++ArgIndex) {
+    if (ByValArgsToTransform.count(I)) {
+      // Simple byval argument? Just add all the struct element types.
+      const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+      const StructType *STy = cast<StructType>(AgTy);
+      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+        Params.push_back(STy->getElementType(i));
+      ++NumByValArgsPromoted;
+    } else if (!ArgsToPromote.count(I)) {
+      // Unchanged argument
+      Params.push_back(I->getType());
+      if (Attributes attrs = PAL.getParamAttributes(ArgIndex))
+        AttributesVec.push_back(AttributeWithIndex::get(Params.size(), attrs));
+    } else if (I->use_empty()) {
+      // Dead argument (which are always marked as promotable)
+      ++NumArgumentsDead;
+    } else {
+      // Okay, this is being promoted. This means that the only uses are loads
+      // or GEPs which are only used by loads
+
+      // In this table, we will track which indices are loaded from the argument
+      // (where direct loads are tracked as no indices).
+      ScalarizeTable &ArgIndices = ScalarizedElements[I];
+      for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+           ++UI) {
+        Instruction *User = cast<Instruction>(*UI);
+        assert(isa<LoadInst>(User) || isa<GetElementPtrInst>(User));
+        IndicesVector Indices;
+        Indices.reserve(User->getNumOperands() - 1);
+        // Since loads will only have a single operand, and GEPs only a single
+        // non-index operand, this will record direct loads without any indices,
+        // and gep+loads with the GEP indices.
+        for (User::op_iterator II = User->op_begin() + 1, IE = User->op_end();
+             II != IE; ++II)
+          Indices.push_back(cast<ConstantInt>(*II)->getSExtValue());
+        // GEPs with a single 0 index can be merged with direct loads
+        if (Indices.size() == 1 && Indices.front() == 0)
+          Indices.clear();
+        ArgIndices.insert(Indices);
+        LoadInst *OrigLoad;
+        if (LoadInst *L = dyn_cast<LoadInst>(User))
+          OrigLoad = L;
+        else
+          // Take any load, we will use it only to update Alias Analysis
+          OrigLoad = cast<LoadInst>(User->use_back());
+        OriginalLoads[Indices] = OrigLoad;
+      }
+
+      // Add a parameter to the function for each element passed in.
+      for (ScalarizeTable::iterator SI = ArgIndices.begin(),
+             E = ArgIndices.end(); SI != E; ++SI) {
+        // not allowed to dereference ->begin() if size() is 0
+        Params.push_back(GetElementPtrInst::getIndexedType(I->getType(),
+                                                           SI->begin(),
+                                                           SI->end()));
+        assert(Params.back());
+      }
+
+      if (ArgIndices.size() == 1 && ArgIndices.begin()->empty())
+        ++NumArgumentsPromoted;
+      else
+        ++NumAggregatesPromoted;
+    }
+  }
+
+  // Add any function attributes.
+  if (Attributes attrs = PAL.getFnAttributes())
+    AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+
+  const Type *RetTy = FTy->getReturnType();
+
+  // Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
+  // have zero fixed arguments.
+  bool ExtraArgHack = false;
+  if (Params.empty() && FTy->isVarArg()) {
+    ExtraArgHack = true;
+    Params.push_back(Type::getInt32Ty(F->getContext()));
+  }
+
+  // Construct the new function type using the new arguments.
+  FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());
+
+  // Create the new function body and insert it into the module.
+  Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName());
+  NF->copyAttributesFrom(F);
+
+  
+  DEBUG(dbgs() << "ARG PROMOTION:  Promoting to:" << *NF << "\n"
+        << "From: " << *F);
+  
+  // Recompute the parameter attributes list based on the new arguments for
+  // the function.
+  NF->setAttributes(AttrListPtr::get(AttributesVec.begin(),
+                                     AttributesVec.end()));
+  AttributesVec.clear();
+
+  F->getParent()->getFunctionList().insert(F, NF);
+  NF->takeName(F);
+
+  // Get the alias analysis information that we need to update to reflect our
+  // changes.
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+  // Get the callgraph information that we need to update to reflect our
+  // changes.
+  CallGraph &CG = getAnalysis<CallGraph>();
+  
+  // Get a new callgraph node for NF.
+  CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
+
+  // Loop over all of the callers of the function, transforming the call sites
+  // to pass in the loaded pointers.
+  //
+  SmallVector<Value*, 16> Args;
+  while (!F->use_empty()) {
+    CallSite CS(F->use_back());
+    assert(CS.getCalledFunction() == F);
+    Instruction *Call = CS.getInstruction();
+    const AttrListPtr &CallPAL = CS.getAttributes();
+
+    // Add any return attributes.
+    if (Attributes attrs = CallPAL.getRetAttributes())
+      AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+
+    // Loop over the operands, inserting GEP and loads in the caller as
+    // appropriate.
+    CallSite::arg_iterator AI = CS.arg_begin();
+    ArgIndex = 1;
+    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+         I != E; ++I, ++AI, ++ArgIndex)
+      if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+        Args.push_back(*AI);          // Unmodified argument
+
+        if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+          AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+
+      } else if (ByValArgsToTransform.count(I)) {
+        // Emit a GEP and load for each element of the struct.
+        const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+        const StructType *STy = cast<StructType>(AgTy);
+        Value *Idxs[2] = {
+              ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+          Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+          Value *Idx = GetElementPtrInst::Create(*AI, Idxs, Idxs+2,
+                                                 (*AI)->getName()+"."+utostr(i),
+                                                 Call);
+          // TODO: Tell AA about the new values?
+          Args.push_back(new LoadInst(Idx, Idx->getName()+".val", Call));
+        }
+      } else if (!I->use_empty()) {
+        // Non-dead argument: insert GEPs and loads as appropriate.
+        ScalarizeTable &ArgIndices = ScalarizedElements[I];
+        // Store the Value* version of the indices in here, but declare it now
+        // for reuse.
+        std::vector<Value*> Ops;
+        for (ScalarizeTable::iterator SI = ArgIndices.begin(),
+               E = ArgIndices.end(); SI != E; ++SI) {
+          Value *V = *AI;
+          LoadInst *OrigLoad = OriginalLoads[*SI];
+          if (!SI->empty()) {
+            Ops.reserve(SI->size());
+            const Type *ElTy = V->getType();
+            for (IndicesVector::const_iterator II = SI->begin(),
+                 IE = SI->end(); II != IE; ++II) {
+              // Use i32 to index structs, and i64 for others (pointers/arrays).
+              // This satisfies GEP constraints.
+              const Type *IdxTy = (ElTy->isStructTy() ?
+                    Type::getInt32Ty(F->getContext()) : 
+                    Type::getInt64Ty(F->getContext()));
+              Ops.push_back(ConstantInt::get(IdxTy, *II));
+              // Keep track of the type we're currently indexing.
+              ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
+            }
+            // And create a GEP to extract those indices.
+            V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(),
+                                          V->getName()+".idx", Call);
+            Ops.clear();
+            AA.copyValue(OrigLoad->getOperand(0), V);
+          }
+          // Since we're replacing a load make sure we take the alignment
+          // of the previous load.
+          LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
+          newLoad->setAlignment(OrigLoad->getAlignment());
+          // Transfer the TBAA info too.
+          newLoad->setMetadata(LLVMContext::MD_tbaa,
+                               OrigLoad->getMetadata(LLVMContext::MD_tbaa));
+          Args.push_back(newLoad);
+          AA.copyValue(OrigLoad, Args.back());
+        }
+      }
+
+    if (ExtraArgHack)
+      Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext())));
+
+    // Push any varargs arguments on the list.
+    for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
+      Args.push_back(*AI);
+      if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
+        AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+    }
+
+    // Add any function attributes.
+    if (Attributes attrs = CallPAL.getFnAttributes())
+      AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+
+    Instruction *New;
+    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+      New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+                               Args.begin(), Args.end(), "", Call);
+      cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
+                                                          AttributesVec.end()));
+    } else {
+      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+      cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
+                                                        AttributesVec.end()));
+      if (cast<CallInst>(Call)->isTailCall())
+        cast<CallInst>(New)->setTailCall();
+    }
+    Args.clear();
+    AttributesVec.clear();
+
+    // Update the alias analysis implementation to know that we are replacing
+    // the old call with a new one.
+    AA.replaceWithNewValue(Call, New);
+
+    // Update the callgraph to know that the callsite has been transformed.
+    CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
+    CalleeNode->replaceCallEdge(Call, New, NF_CGN);
+
+    if (!Call->use_empty()) {
+      Call->replaceAllUsesWith(New);
+      New->takeName(Call);
+    }
+
+    // Finally, remove the old call from the program, reducing the use-count of
+    // F.
+    Call->eraseFromParent();
+  }
+
+  // Since we have now created the new function, splice the body of the old
+  // function right into the new function, leaving the old rotting hulk of the
+  // function empty.
+  NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+  // Loop over the argument list, transfering uses of the old arguments over to
+  // the new arguments, also transfering over the names as well.
+  //
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+       I2 = NF->arg_begin(); I != E; ++I) {
+    if (!ArgsToPromote.count(I) && !ByValArgsToTransform.count(I)) {
+      // If this is an unmodified argument, move the name and users over to the
+      // new version.
+      I->replaceAllUsesWith(I2);
+      I2->takeName(I);
+      AA.replaceWithNewValue(I, I2);
+      ++I2;
+      continue;
+    }
+
+    if (ByValArgsToTransform.count(I)) {
+      // In the callee, we create an alloca, and store each of the new incoming
+      // arguments into the alloca.
+      Instruction *InsertPt = NF->begin()->begin();
+
+      // Just add all the struct element types.
+      const Type *AgTy = cast<PointerType>(I->getType())->getElementType();
+      Value *TheAlloca = new AllocaInst(AgTy, 0, "", InsertPt);
+      const StructType *STy = cast<StructType>(AgTy);
+      Value *Idxs[2] = {
+            ConstantInt::get(Type::getInt32Ty(F->getContext()), 0), 0 };
+
+      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+        Idxs[1] = ConstantInt::get(Type::getInt32Ty(F->getContext()), i);
+        Value *Idx = 
+          GetElementPtrInst::Create(TheAlloca, Idxs, Idxs+2,
+                                    TheAlloca->getName()+"."+Twine(i), 
+                                    InsertPt);
+        I2->setName(I->getName()+"."+Twine(i));
+        new StoreInst(I2++, Idx, InsertPt);
+      }
+
+      // Anything that used the arg should now use the alloca.
+      I->replaceAllUsesWith(TheAlloca);
+      TheAlloca->takeName(I);
+      AA.replaceWithNewValue(I, TheAlloca);
+      continue;
+    }
+
+    if (I->use_empty()) {
+      AA.deleteValue(I);
+      continue;
+    }
+
+    // Otherwise, if we promoted this argument, then all users are load
+    // instructions (or GEPs with only load users), and all loads should be
+    // using the new argument that we added.
+    ScalarizeTable &ArgIndices = ScalarizedElements[I];
+
+    while (!I->use_empty()) {
+      if (LoadInst *LI = dyn_cast<LoadInst>(I->use_back())) {
+        assert(ArgIndices.begin()->empty() &&
+               "Load element should sort to front!");
+        I2->setName(I->getName()+".val");
+        LI->replaceAllUsesWith(I2);
+        AA.replaceWithNewValue(LI, I2);
+        LI->eraseFromParent();
+        DEBUG(dbgs() << "*** Promoted load of argument '" << I->getName()
+              << "' in function '" << F->getName() << "'\n");
+      } else {
+        GetElementPtrInst *GEP = cast<GetElementPtrInst>(I->use_back());
+        IndicesVector Operands;
+        Operands.reserve(GEP->getNumIndices());
+        for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end();
+             II != IE; ++II)
+          Operands.push_back(cast<ConstantInt>(*II)->getSExtValue());
+
+        // GEPs with a single 0 index can be merged with direct loads
+        if (Operands.size() == 1 && Operands.front() == 0)
+          Operands.clear();
+
+        Function::arg_iterator TheArg = I2;
+        for (ScalarizeTable::iterator It = ArgIndices.begin();
+             *It != Operands; ++It, ++TheArg) {
+          assert(It != ArgIndices.end() && "GEP not handled??");
+        }
+
+        std::string NewName = I->getName();
+        for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+            NewName += "." + utostr(Operands[i]);
+        }
+        NewName += ".val";
+        TheArg->setName(NewName);
+
+        DEBUG(dbgs() << "*** Promoted agg argument '" << TheArg->getName()
+              << "' of function '" << NF->getName() << "'\n");
+
+        // All of the uses must be load instructions.  Replace them all with
+        // the argument specified by ArgNo.
+        while (!GEP->use_empty()) {
+          LoadInst *L = cast<LoadInst>(GEP->use_back());
+          L->replaceAllUsesWith(TheArg);
+          AA.replaceWithNewValue(L, TheArg);
+          L->eraseFromParent();
+        }
+        AA.deleteValue(GEP);
+        GEP->eraseFromParent();
+      }
+    }
+
+    // Increment I2 past all of the arguments added for this promoted pointer.
+    for (unsigned i = 0, e = ArgIndices.size(); i != e; ++i)
+      ++I2;
+  }
+
+  // Notify the alias analysis implementation that we inserted a new argument.
+  if (ExtraArgHack)
+    AA.copyValue(Constant::getNullValue(Type::getInt32Ty(F->getContext())), 
+                 NF->arg_begin());
+
+
+  // Tell the alias analysis that the old function is about to disappear.
+  AA.replaceWithNewValue(F, NF);
+
+  
+  NF_CGN->stealCalledFunctionsFrom(CG[F]);
+  
+  // Now that the old function is dead, delete it.  If there is a dangling
+  // reference to the CallgraphNode, just leave the dead function around for
+  // someone else to nuke.
+  CallGraphNode *CGN = CG[F];
+  if (CGN->getNumReferences() == 0)
+    delete CG.removeFunctionFromModule(CGN);
+  else
+    F->setLinkage(Function::ExternalLinkage);
+  
+  return NF_CGN;
+}
diff --git a/final/lib/Transforms/IPO/CMakeLists.txt b/final/lib/Transforms/IPO/CMakeLists.txt
new file mode 100644
index 00000000000..efdeec56405
--- /dev/null
+++ b/final/lib/Transforms/IPO/CMakeLists.txt
@@ -0,0 +1,24 @@
+add_llvm_library(LLVMipo
+  ArgumentPromotion.cpp
+  ConstantMerge.cpp
+  DeadArgumentElimination.cpp
+  DeadTypeElimination.cpp
+  ExtractGV.cpp
+  FunctionAttrs.cpp
+  GlobalDCE.cpp
+  GlobalOpt.cpp
+  IPConstantPropagation.cpp
+  IPO.cpp
+  InlineAlways.cpp
+  InlineSimple.cpp
+  Inliner.cpp
+  Internalize.cpp
+  LoopExtractor.cpp
+  LowerSetJmp.cpp
+  MergeFunctions.cpp
+  PartialInlining.cpp
+  PruneEH.cpp
+  StripDeadPrototypes.cpp
+  StripSymbols.cpp
+  StructRetPromotion.cpp
+  )
diff --git a/final/lib/Transforms/IPO/ConstantMerge.cpp b/final/lib/Transforms/IPO/ConstantMerge.cpp
new file mode 100644
index 00000000000..a21efced73b
--- /dev/null
+++ b/final/lib/Transforms/IPO/ConstantMerge.cpp
@@ -0,0 +1,190 @@
+//===- ConstantMerge.cpp - Merge duplicate global constants ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface to a pass that merges duplicate global
+// constants together into a single constant that is shared.  This is useful
+// because some passes (ie TraceValues) insert a lot of string constants into
+// the program, regardless of whether or not an existing string is available.
+//
+// Algorithm: ConstantMerge is designed to build up a map of available constants
+// and eliminate duplicates when it is initialized.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "constmerge"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumMerged, "Number of global constants merged");
+
+namespace {
+  struct ConstantMerge : public ModulePass {
+    static char ID; // Pass identification, replacement for typeid
+    ConstantMerge() : ModulePass(ID) {
+      initializeConstantMergePass(*PassRegistry::getPassRegistry());
+    }
+
+    // run - For this pass, process all of the globals in the module,
+    // eliminating duplicate constants.
+    //
+    bool runOnModule(Module &M);
+  };
+}
+
+char ConstantMerge::ID = 0;
+INITIALIZE_PASS(ConstantMerge, "constmerge",
+                "Merge Duplicate Global Constants", false, false)
+
+ModulePass *llvm::createConstantMergePass() { return new ConstantMerge(); }
+
+
+
+/// Find values that are marked as llvm.used.
+static void FindUsedValues(GlobalVariable *LLVMUsed,
+                           SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+  if (LLVMUsed == 0) return;
+  ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+  if (Inits == 0) return;
+  
+  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+    if (GlobalValue *GV = 
+        dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+      UsedValues.insert(GV);
+}
+
+// True if A is better than B.
+static bool IsBetterCannonical(const GlobalVariable &A,
+                               const GlobalVariable &B) {
+  if (!A.hasLocalLinkage() && B.hasLocalLinkage())
+    return true;
+
+  if (A.hasLocalLinkage() && !B.hasLocalLinkage())
+    return false;
+
+  return A.hasUnnamedAddr();
+}
+
+bool ConstantMerge::runOnModule(Module &M) {
+  // Find all the globals that are marked "used".  These cannot be merged.
+  SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
+  FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
+  FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
+  
+  // Map unique constant/section pairs to globals.  We don't want to merge
+  // globals in different sections.
+  DenseMap<Constant*, GlobalVariable*> CMap;
+
+  // Replacements - This vector contains a list of replacements to perform.
+  SmallVector<std::pair<GlobalVariable*, GlobalVariable*>, 32> Replacements;
+
+  bool MadeChange = false;
+
+  // Iterate constant merging while we are still making progress.  Merging two
+  // constants together may allow us to merge other constants together if the
+  // second level constants have initializers which point to the globals that
+  // were just merged.
+  while (1) {
+
+    // First: Find the canonical constants others will be merged with.
+    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+         GVI != E; ) {
+      GlobalVariable *GV = GVI++;
+
+      // If this GV is dead, remove it.
+      GV->removeDeadConstantUsers();
+      if (GV->use_empty() && GV->hasLocalLinkage()) {
+        GV->eraseFromParent();
+        continue;
+      }
+
+      // Only process constants with initializers in the default address space.
+      if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+          GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+          // Don't touch values marked with attribute(used).
+          UsedGlobals.count(GV))
+        continue;
+
+      Constant *Init = GV->getInitializer();
+
+      // Check to see if the initializer is already known.
+      GlobalVariable *&Slot = CMap[Init];
+
+      // If this is the first constant we find or if the old on is local,
+      // replace with the current one. It the current is externally visible
+      // it cannot be replace, but can be the canonical constant we merge with.
+      if (Slot == 0 || IsBetterCannonical(*GV, *Slot)) {
+        Slot = GV;
+      }
+    }
+
+    // Second: identify all globals that can be merged together, filling in
+    // the Replacements vector.  We cannot do the replacement in this pass
+    // because doing so may cause initializers of other globals to be rewritten,
+    // invalidating the Constant* pointers in CMap.
+    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+         GVI != E; ) {
+      GlobalVariable *GV = GVI++;
+
+      // Only process constants with initializers in the default address space.
+      if (!GV->isConstant() || !GV->hasDefinitiveInitializer() ||
+          GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
+          // Don't touch values marked with attribute(used).
+          UsedGlobals.count(GV))
+        continue;
+
+      // We can only replace constant with local linkage.
+      if (!GV->hasLocalLinkage())
+        continue;
+
+      Constant *Init = GV->getInitializer();
+
+      // Check to see if the initializer is already known.
+      GlobalVariable *Slot = CMap[Init];
+
+      if (!Slot || Slot == GV)
+        continue;
+
+      if (!Slot->hasUnnamedAddr() && !GV->hasUnnamedAddr())
+        continue;
+
+      if (!GV->hasUnnamedAddr())
+        Slot->setUnnamedAddr(false);
+
+      // Make all uses of the duplicate constant use the canonical version.
+      Replacements.push_back(std::make_pair(GV, Slot));
+    }
+
+    if (Replacements.empty())
+      return MadeChange;
+    CMap.clear();
+
+    // Now that we have figured out which replacements must be made, do them all
+    // now.  This avoid invalidating the pointers in CMap, which are unneeded
+    // now.
+    for (unsigned i = 0, e = Replacements.size(); i != e; ++i) {
+      // Eliminate any uses of the dead global.
+      Replacements[i].first->replaceAllUsesWith(Replacements[i].second);
+
+      // Delete the global value from the module.
+      assert(Replacements[i].first->hasLocalLinkage() &&
+             "Refusing to delete an externally visible global variable.");
+      Replacements[i].first->eraseFromParent();
+    }
+
+    NumMerged += Replacements.size();
+    Replacements.clear();
+  }
+}
diff --git a/final/lib/Transforms/IPO/DeadArgumentElimination.cpp b/final/lib/Transforms/IPO/DeadArgumentElimination.cpp
new file mode 100644
index 00000000000..4d1f7abdc32
--- /dev/null
+++ b/final/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -0,0 +1,1003 @@
+//===-- DeadArgumentElimination.cpp - Eliminate dead arguments ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass deletes dead arguments from internal functions.  Dead argument
+// elimination removes arguments which are directly dead, as well as arguments
+// only passed into function calls as dead arguments of other functions.  This
+// pass also deletes dead return values in a similar way.
+//
+// This pass is often useful as a cleanup pass to run after aggressive
+// interprocedural passes, which add possibly-dead arguments or return values.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "deadargelim"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constant.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include <map>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumArgumentsEliminated, "Number of unread args removed");
+STATISTIC(NumRetValsEliminated  , "Number of unused return values removed");
+STATISTIC(NumArgumentsReplacedWithUndef, 
+          "Number of unread args replaced with undef");
+namespace {
+  /// DAE - The dead argument elimination pass.
+  ///
+  class DAE : public ModulePass {
+  public:
+
+    /// Struct that represents (part of) either a return value or a function
+    /// argument.  Used so that arguments and return values can be used
+    /// interchangably.
+    struct RetOrArg {
+      RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx),
+               IsArg(IsArg) {}
+      const Function *F;
+      unsigned Idx;
+      bool IsArg;
+
+      /// Make RetOrArg comparable, so we can put it into a map.
+      bool operator<(const RetOrArg &O) const {
+        if (F != O.F)
+          return F < O.F;
+        else if (Idx != O.Idx)
+          return Idx < O.Idx;
+        else
+          return IsArg < O.IsArg;
+      }
+
+      /// Make RetOrArg comparable, so we can easily iterate the multimap.
+      bool operator==(const RetOrArg &O) const {
+        return F == O.F && Idx == O.Idx && IsArg == O.IsArg;
+      }
+
+      std::string getDescription() const {
+        return std::string((IsArg ? "Argument #" : "Return value #"))
+               + utostr(Idx) + " of function " + F->getNameStr();
+      }
+    };
+
+    /// Liveness enum - During our initial pass over the program, we determine
+    /// that things are either alive or maybe alive. We don't mark anything
+    /// explicitly dead (even if we know they are), since anything not alive
+    /// with no registered uses (in Uses) will never be marked alive and will
+    /// thus become dead in the end.
+    enum Liveness { Live, MaybeLive };
+
+    /// Convenience wrapper
+    RetOrArg CreateRet(const Function *F, unsigned Idx) {
+      return RetOrArg(F, Idx, false);
+    }
+    /// Convenience wrapper
+    RetOrArg CreateArg(const Function *F, unsigned Idx) {
+      return RetOrArg(F, Idx, true);
+    }
+
+    typedef std::multimap<RetOrArg, RetOrArg> UseMap;
+    /// This maps a return value or argument to any MaybeLive return values or
+    /// arguments it uses. This allows the MaybeLive values to be marked live
+    /// when any of its users is marked live.
+    /// For example (indices are left out for clarity):
+    ///  - Uses[ret F] = ret G
+    ///    This means that F calls G, and F returns the value returned by G.
+    ///  - Uses[arg F] = ret G
+    ///    This means that some function calls G and passes its result as an
+    ///    argument to F.
+    ///  - Uses[ret F] = arg F
+    ///    This means that F returns one of its own arguments.
+    ///  - Uses[arg F] = arg G
+    ///    This means that G calls F and passes one of its own (G's) arguments
+    ///    directly to F.
+    UseMap Uses;
+
+    typedef std::set<RetOrArg> LiveSet;
+    typedef std::set<const Function*> LiveFuncSet;
+
+    /// This set contains all values that have been determined to be live.
+    LiveSet LiveValues;
+    /// This set contains all values that are cannot be changed in any way.
+    LiveFuncSet LiveFunctions;
+
+    typedef SmallVector<RetOrArg, 5> UseVector;
+
+  protected:
+    // DAH uses this to specify a different ID.
+    explicit DAE(char &ID) : ModulePass(ID) {}
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    DAE() : ModulePass(ID) {
+      initializeDAEPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnModule(Module &M);
+
+    virtual bool ShouldHackArguments() const { return false; }
+
+  private:
+    Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
+    Liveness SurveyUse(Value::const_use_iterator U, UseVector &MaybeLiveUses,
+                       unsigned RetValNum = 0);
+    Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
+
+    void SurveyFunction(const Function &F);
+    void MarkValue(const RetOrArg &RA, Liveness L,
+                   const UseVector &MaybeLiveUses);
+    void MarkLive(const RetOrArg &RA);
+    void MarkLive(const Function &F);
+    void PropagateLiveness(const RetOrArg &RA);
+    bool RemoveDeadStuffFromFunction(Function *F);
+    bool DeleteDeadVarargs(Function &Fn);
+    bool RemoveDeadArgumentsFromCallers(Function &Fn);
+  };
+}
+
+
+char DAE::ID = 0;
+INITIALIZE_PASS(DAE, "deadargelim", "Dead Argument Elimination", false, false)
+
+namespace {
+  /// DAH - DeadArgumentHacking pass - Same as dead argument elimination, but
+  /// deletes arguments to functions which are external.  This is only for use
+  /// by bugpoint.
+  struct DAH : public DAE {
+    static char ID;
+    DAH() : DAE(ID) {}
+
+    virtual bool ShouldHackArguments() const { return true; }
+  };
+}
+
+char DAH::ID = 0;
+INITIALIZE_PASS(DAH, "deadarghaX0r", 
+                "Dead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE)",
+                false, false)
+
+/// createDeadArgEliminationPass - This pass removes arguments from functions
+/// which are not used by the body of the function.
+///
+ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); }
+ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); }
+
+/// DeleteDeadVarargs - If this is an function that takes a ... list, and if
+/// llvm.vastart is never called, the varargs list is dead for the function.
+bool DAE::DeleteDeadVarargs(Function &Fn) {
+  assert(Fn.getFunctionType()->isVarArg() && "Function isn't varargs!");
+  if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false;
+
+  // Ensure that the function is only directly called.
+  if (Fn.hasAddressTaken())
+    return false;
+
+  // Okay, we know we can transform this function if safe.  Scan its body
+  // looking for calls to llvm.vastart.
+  for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+        if (II->getIntrinsicID() == Intrinsic::vastart)
+          return false;
+      }
+    }
+  }
+
+  // If we get here, there are no calls to llvm.vastart in the function body,
+  // remove the "..." and adjust all the calls.
+
+  // Start by computing a new prototype for the function, which is the same as
+  // the old function, but doesn't have isVarArg set.
+  const FunctionType *FTy = Fn.getFunctionType();
+
+  std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end());
+  FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
+                                                Params, false);
+  unsigned NumArgs = Params.size();
+
+  // Create the new function body and insert it into the module...
+  Function *NF = Function::Create(NFTy, Fn.getLinkage());
+  NF->copyAttributesFrom(&Fn);
+  Fn.getParent()->getFunctionList().insert(&Fn, NF);
+  NF->takeName(&Fn);
+
+  // Loop over all of the callers of the function, transforming the call sites
+  // to pass in a smaller number of arguments into the new function.
+  //
+  std::vector<Value*> Args;
+  while (!Fn.use_empty()) {
+    CallSite CS(Fn.use_back());
+    Instruction *Call = CS.getInstruction();
+
+    // Pass all the same arguments.
+    Args.assign(CS.arg_begin(), CS.arg_begin() + NumArgs);
+
+    // Drop any attributes that were on the vararg arguments.
+    AttrListPtr PAL = CS.getAttributes();
+    if (!PAL.isEmpty() && PAL.getSlot(PAL.getNumSlots() - 1).Index > NumArgs) {
+      SmallVector<AttributeWithIndex, 8> AttributesVec;
+      for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i)
+        AttributesVec.push_back(PAL.getSlot(i));
+      if (Attributes FnAttrs = PAL.getFnAttributes())
+        AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+      PAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end());
+    }
+
+    Instruction *New;
+    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+      New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+                               Args.begin(), Args.end(), "", Call);
+      cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<InvokeInst>(New)->setAttributes(PAL);
+    } else {
+      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+      cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<CallInst>(New)->setAttributes(PAL);
+      if (cast<CallInst>(Call)->isTailCall())
+        cast<CallInst>(New)->setTailCall();
+    }
+    New->setDebugLoc(Call->getDebugLoc());
+
+    Args.clear();
+
+    if (!Call->use_empty())
+      Call->replaceAllUsesWith(New);
+
+    New->takeName(Call);
+
+    // Finally, remove the old call from the program, reducing the use-count of
+    // F.
+    Call->eraseFromParent();
+  }
+
+  // Since we have now created the new function, splice the body of the old
+  // function right into the new function, leaving the old rotting hulk of the
+  // function empty.
+  NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList());
+
+  // Loop over the argument list, transfering uses of the old arguments over to
+  // the new arguments, also transfering over the names as well.  While we're at
+  // it, remove the dead arguments from the DeadArguments list.
+  //
+  for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(),
+       I2 = NF->arg_begin(); I != E; ++I, ++I2) {
+    // Move the name and users over to the new version.
+    I->replaceAllUsesWith(I2);
+    I2->takeName(I);
+  }
+
+  // Finally, nuke the old function.
+  Fn.eraseFromParent();
+  return true;
+}
+
+/// RemoveDeadArgumentsFromCallers - Checks if the given function has any 
+/// arguments that are unused, and changes the caller parameters to be undefined
+/// instead.
+bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn)
+{
+  if (Fn.isDeclaration() || Fn.mayBeOverridden())
+    return false;
+
+  // Functions with local linkage should already have been handled.
+  if (Fn.hasLocalLinkage())
+    return false;
+
+  if (Fn.use_empty())
+    return false;
+
+  llvm::SmallVector<unsigned, 8> UnusedArgs;
+  for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(); 
+       I != E; ++I) {
+    Argument *Arg = I;
+
+    if (Arg->use_empty() && !Arg->hasByValAttr())
+      UnusedArgs.push_back(Arg->getArgNo());
+  }
+
+  if (UnusedArgs.empty())
+    return false;
+
+  bool Changed = false;
+
+  for (Function::use_iterator I = Fn.use_begin(), E = Fn.use_end(); 
+       I != E; ++I) {
+    CallSite CS(*I);
+    if (!CS || !CS.isCallee(I))
+      continue;
+
+    // Now go through all unused args and replace them with "undef".
+    for (unsigned I = 0, E = UnusedArgs.size(); I != E; ++I) {
+      unsigned ArgNo = UnusedArgs[I];
+
+      Value *Arg = CS.getArgument(ArgNo);
+      CS.setArgument(ArgNo, UndefValue::get(Arg->getType()));
+      ++NumArgumentsReplacedWithUndef;
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+/// Convenience function that returns the number of return values. It returns 0
+/// for void functions and 1 for functions not returning a struct. It returns
+/// the number of struct elements for functions returning a struct.
+static unsigned NumRetVals(const Function *F) {
+  if (F->getReturnType()->isVoidTy())
+    return 0;
+  else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType()))
+    return STy->getNumElements();
+  else
+    return 1;
+}
+
+/// MarkIfNotLive - This checks Use for liveness in LiveValues. If Use is not
+/// live, it adds Use to the MaybeLiveUses argument. Returns the determined
+/// liveness of Use.
+DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) {
+  // We're live if our use or its Function is already marked as live.
+  if (LiveFunctions.count(Use.F) || LiveValues.count(Use))
+    return Live;
+
+  // We're maybe live otherwise, but remember that we must become live if
+  // Use becomes live.
+  MaybeLiveUses.push_back(Use);
+  return MaybeLive;
+}
+
+
+/// SurveyUse - This looks at a single use of an argument or return value
+/// and determines if it should be alive or not. Adds this use to MaybeLiveUses
+/// if it causes the used value to become MaybeLive.
+///
+/// RetValNum is the return value number to use when this use is used in a
+/// return instruction. This is used in the recursion, you should always leave
+/// it at 0.
+DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U,
+                             UseVector &MaybeLiveUses, unsigned RetValNum) {
+    const User *V = *U;
+    if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) {
+      // The value is returned from a function. It's only live when the
+      // function's return value is live. We use RetValNum here, for the case
+      // that U is really a use of an insertvalue instruction that uses the
+      // orginal Use.
+      RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum);
+      // We might be live, depending on the liveness of Use.
+      return MarkIfNotLive(Use, MaybeLiveUses);
+    }
+    if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
+      if (U.getOperandNo() != InsertValueInst::getAggregateOperandIndex()
+          && IV->hasIndices())
+        // The use we are examining is inserted into an aggregate. Our liveness
+        // depends on all uses of that aggregate, but if it is used as a return
+        // value, only index at which we were inserted counts.
+        RetValNum = *IV->idx_begin();
+
+      // Note that if we are used as the aggregate operand to the insertvalue,
+      // we don't change RetValNum, but do survey all our uses.
+
+      Liveness Result = MaybeLive;
+      for (Value::const_use_iterator I = IV->use_begin(),
+           E = V->use_end(); I != E; ++I) {
+        Result = SurveyUse(I, MaybeLiveUses, RetValNum);
+        if (Result == Live)
+          break;
+      }
+      return Result;
+    }
+
+    if (ImmutableCallSite CS = V) {
+      const Function *F = CS.getCalledFunction();
+      if (F) {
+        // Used in a direct call.
+
+        // Find the argument number. We know for sure that this use is an
+        // argument, since if it was the function argument this would be an
+        // indirect call and the we know can't be looking at a value of the
+        // label type (for the invoke instruction).
+        unsigned ArgNo = CS.getArgumentNo(U);
+
+        if (ArgNo >= F->getFunctionType()->getNumParams())
+          // The value is passed in through a vararg! Must be live.
+          return Live;
+
+        assert(CS.getArgument(ArgNo)
+               == CS->getOperand(U.getOperandNo())
+               && "Argument is not where we expected it");
+
+        // Value passed to a normal call. It's only live when the corresponding
+        // argument to the called function turns out live.
+        RetOrArg Use = CreateArg(F, ArgNo);
+        return MarkIfNotLive(Use, MaybeLiveUses);
+      }
+    }
+    // Used in any other way? Value must be live.
+    return Live;
+}
+
+/// SurveyUses - This looks at all the uses of the given value
+/// Returns the Liveness deduced from the uses of this value.
+///
+/// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If
+/// the result is Live, MaybeLiveUses might be modified but its content should
+/// be ignored (since it might not be complete).
+DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) {
+  // Assume it's dead (which will only hold if there are no uses at all..).
+  Liveness Result = MaybeLive;
+  // Check each use.
+  for (Value::const_use_iterator I = V->use_begin(),
+       E = V->use_end(); I != E; ++I) {
+    Result = SurveyUse(I, MaybeLiveUses);
+    if (Result == Live)
+      break;
+  }
+  return Result;
+}
+
+// SurveyFunction - This performs the initial survey of the specified function,
+// checking out whether or not it uses any of its incoming arguments or whether
+// any callers use the return value.  This fills in the LiveValues set and Uses
+// map.
+//
+// We consider arguments of non-internal functions to be intrinsically alive as
+// well as arguments to functions which have their "address taken".
+//
+void DAE::SurveyFunction(const Function &F) {
+  unsigned RetCount = NumRetVals(&F);
+  // Assume all return values are dead
+  typedef SmallVector<Liveness, 5> RetVals;
+  RetVals RetValLiveness(RetCount, MaybeLive);
+
+  typedef SmallVector<UseVector, 5> RetUses;
+  // These vectors map each return value to the uses that make it MaybeLive, so
+  // we can add those to the Uses map if the return value really turns out to be
+  // MaybeLive. Initialized to a list of RetCount empty lists.
+  RetUses MaybeLiveRetUses(RetCount);
+
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+      if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType()
+          != F.getFunctionType()->getReturnType()) {
+        // We don't support old style multiple return values.
+        MarkLive(F);
+        return;
+      }
+
+  if (!F.hasLocalLinkage() && (!ShouldHackArguments() || F.isIntrinsic())) {
+    MarkLive(F);
+    return;
+  }
+
+  DEBUG(dbgs() << "DAE - Inspecting callers for fn: " << F.getName() << "\n");
+  // Keep track of the number of live retvals, so we can skip checks once all
+  // of them turn out to be live.
+  unsigned NumLiveRetVals = 0;
+  const Type *STy = dyn_cast<StructType>(F.getReturnType());
+  // Loop all uses of the function.
+  for (Value::const_use_iterator I = F.use_begin(), E = F.use_end();
+       I != E; ++I) {
+    // If the function is PASSED IN as an argument, its address has been
+    // taken.
+    ImmutableCallSite CS(*I);
+    if (!CS || !CS.isCallee(I)) {
+      MarkLive(F);
+      return;
+    }
+
+    // If this use is anything other than a call site, the function is alive.
+    const Instruction *TheCall = CS.getInstruction();
+    if (!TheCall) {   // Not a direct call site?
+      MarkLive(F);
+      return;
+    }
+
+    // If we end up here, we are looking at a direct call to our function.
+
+    // Now, check how our return value(s) is/are used in this caller. Don't
+    // bother checking return values if all of them are live already.
+    if (NumLiveRetVals != RetCount) {
+      if (STy) {
+        // Check all uses of the return value.
+        for (Value::const_use_iterator I = TheCall->use_begin(),
+             E = TheCall->use_end(); I != E; ++I) {
+          const ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(*I);
+          if (Ext && Ext->hasIndices()) {
+            // This use uses a part of our return value, survey the uses of
+            // that part and store the results for this index only.
+            unsigned Idx = *Ext->idx_begin();
+            if (RetValLiveness[Idx] != Live) {
+              RetValLiveness[Idx] = SurveyUses(Ext, MaybeLiveRetUses[Idx]);
+              if (RetValLiveness[Idx] == Live)
+                NumLiveRetVals++;
+            }
+          } else {
+            // Used by something else than extractvalue. Mark all return
+            // values as live.
+            for (unsigned i = 0; i != RetCount; ++i )
+              RetValLiveness[i] = Live;
+            NumLiveRetVals = RetCount;
+            break;
+          }
+        }
+      } else {
+        // Single return value
+        RetValLiveness[0] = SurveyUses(TheCall, MaybeLiveRetUses[0]);
+        if (RetValLiveness[0] == Live)
+          NumLiveRetVals = RetCount;
+      }
+    }
+  }
+
+  // Now we've inspected all callers, record the liveness of our return values.
+  for (unsigned i = 0; i != RetCount; ++i)
+    MarkValue(CreateRet(&F, i), RetValLiveness[i], MaybeLiveRetUses[i]);
+
+  DEBUG(dbgs() << "DAE - Inspecting args for fn: " << F.getName() << "\n");
+
+  // Now, check all of our arguments.
+  unsigned i = 0;
+  UseVector MaybeLiveArgUses;
+  for (Function::const_arg_iterator AI = F.arg_begin(),
+       E = F.arg_end(); AI != E; ++AI, ++i) {
+    // See what the effect of this use is (recording any uses that cause
+    // MaybeLive in MaybeLiveArgUses).
+    Liveness Result = SurveyUses(AI, MaybeLiveArgUses);
+    // Mark the result.
+    MarkValue(CreateArg(&F, i), Result, MaybeLiveArgUses);
+    // Clear the vector again for the next iteration.
+    MaybeLiveArgUses.clear();
+  }
+}
+
+/// MarkValue - This function marks the liveness of RA depending on L. If L is
+/// MaybeLive, it also takes all uses in MaybeLiveUses and records them in Uses,
+/// such that RA will be marked live if any use in MaybeLiveUses gets marked
+/// live later on.
+void DAE::MarkValue(const RetOrArg &RA, Liveness L,
+                    const UseVector &MaybeLiveUses) {
+  switch (L) {
+    case Live: MarkLive(RA); break;
+    case MaybeLive:
+    {
+      // Note any uses of this value, so this return value can be
+      // marked live whenever one of the uses becomes live.
+      for (UseVector::const_iterator UI = MaybeLiveUses.begin(),
+           UE = MaybeLiveUses.end(); UI != UE; ++UI)
+        Uses.insert(std::make_pair(*UI, RA));
+      break;
+    }
+  }
+}
+
+/// MarkLive - Mark the given Function as alive, meaning that it cannot be
+/// changed in any way. Additionally,
+/// mark any values that are used as this function's parameters or by its return
+/// values (according to Uses) live as well.
+void DAE::MarkLive(const Function &F) {
+  DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n");
+  // Mark the function as live.
+  LiveFunctions.insert(&F);
+  // Mark all arguments as live.
+  for (unsigned i = 0, e = F.arg_size(); i != e; ++i)
+    PropagateLiveness(CreateArg(&F, i));
+  // Mark all return values as live.
+  for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i)
+    PropagateLiveness(CreateRet(&F, i));
+}
+
+/// MarkLive - Mark the given return value or argument as live. Additionally,
+/// mark any values that are used by this value (according to Uses) live as
+/// well.
+void DAE::MarkLive(const RetOrArg &RA) {
+  if (LiveFunctions.count(RA.F))
+    return; // Function was already marked Live.
+
+  if (!LiveValues.insert(RA).second)
+    return; // We were already marked Live.
+
+  DEBUG(dbgs() << "DAE - Marking " << RA.getDescription() << " live\n");
+  PropagateLiveness(RA);
+}
+
+/// PropagateLiveness - Given that RA is a live value, propagate it's liveness
+/// to any other values it uses (according to Uses).
+void DAE::PropagateLiveness(const RetOrArg &RA) {
+  // We don't use upper_bound (or equal_range) here, because our recursive call
+  // to ourselves is likely to cause the upper_bound (which is the first value
+  // not belonging to RA) to become erased and the iterator invalidated.
+  UseMap::iterator Begin = Uses.lower_bound(RA);
+  UseMap::iterator E = Uses.end();
+  UseMap::iterator I;
+  for (I = Begin; I != E && I->first == RA; ++I)
+    MarkLive(I->second);
+
+  // Erase RA from the Uses map (from the lower bound to wherever we ended up
+  // after the loop).
+  Uses.erase(Begin, I);
+}
+
+// RemoveDeadStuffFromFunction - Remove any arguments and return values from F
+// that are not in LiveValues. Transform the function and all of the callees of
+// the function to not have these arguments and return values.
+//
+bool DAE::RemoveDeadStuffFromFunction(Function *F) {
+  // Don't modify fully live functions
+  if (LiveFunctions.count(F))
+    return false;
+
+  // Start by computing a new prototype for the function, which is the same as
+  // the old function, but has fewer arguments and a different return type.
+  const FunctionType *FTy = F->getFunctionType();
+  std::vector<const Type*> Params;
+
+  // Set up to build a new list of parameter attributes.
+  SmallVector<AttributeWithIndex, 8> AttributesVec;
+  const AttrListPtr &PAL = F->getAttributes();
+
+  // The existing function return attributes.
+  Attributes RAttrs = PAL.getRetAttributes();
+  Attributes FnAttrs = PAL.getFnAttributes();
+
+  // Find out the new return value.
+
+  const Type *RetTy = FTy->getReturnType();
+  const Type *NRetTy = NULL;
+  unsigned RetCount = NumRetVals(F);
+
+  // -1 means unused, other numbers are the new index
+  SmallVector<int, 5> NewRetIdxs(RetCount, -1);
+  std::vector<const Type*> RetTypes;
+  if (RetTy->isVoidTy()) {
+    NRetTy = RetTy;
+  } else {
+    const StructType *STy = dyn_cast<StructType>(RetTy);
+    if (STy)
+      // Look at each of the original return values individually.
+      for (unsigned i = 0; i != RetCount; ++i) {
+        RetOrArg Ret = CreateRet(F, i);
+        if (LiveValues.erase(Ret)) {
+          RetTypes.push_back(STy->getElementType(i));
+          NewRetIdxs[i] = RetTypes.size() - 1;
+        } else {
+          ++NumRetValsEliminated;
+          DEBUG(dbgs() << "DAE - Removing return value " << i << " from "
+                << F->getName() << "\n");
+        }
+      }
+    else
+      // We used to return a single value.
+      if (LiveValues.erase(CreateRet(F, 0))) {
+        RetTypes.push_back(RetTy);
+        NewRetIdxs[0] = 0;
+      } else {
+        DEBUG(dbgs() << "DAE - Removing return value from " << F->getName()
+              << "\n");
+        ++NumRetValsEliminated;
+      }
+    if (RetTypes.size() > 1)
+      // More than one return type? Return a struct with them. Also, if we used
+      // to return a struct and didn't change the number of return values,
+      // return a struct again. This prevents changing {something} into
+      // something and {} into void.
+      // Make the new struct packed if we used to return a packed struct
+      // already.
+      NRetTy = StructType::get(STy->getContext(), RetTypes, STy->isPacked());
+    else if (RetTypes.size() == 1)
+      // One return type? Just a simple value then, but only if we didn't use to
+      // return a struct with that simple value before.
+      NRetTy = RetTypes.front();
+    else if (RetTypes.size() == 0)
+      // No return types? Make it void, but only if we didn't use to return {}.
+      NRetTy = Type::getVoidTy(F->getContext());
+  }
+
+  assert(NRetTy && "No new return type found?");
+
+  // Remove any incompatible attributes, but only if we removed all return
+  // values. Otherwise, ensure that we don't have any conflicting attributes
+  // here. Currently, this should not be possible, but special handling might be
+  // required when new return value attributes are added.
+  if (NRetTy->isVoidTy())
+    RAttrs &= ~Attribute::typeIncompatible(NRetTy);
+  else
+    assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0
+           && "Return attributes no longer compatible?");
+
+  if (RAttrs)
+    AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+
+  // Remember which arguments are still alive.
+  SmallVector<bool, 10> ArgAlive(FTy->getNumParams(), false);
+  // Construct the new parameter list from non-dead arguments. Also construct
+  // a new set of parameter attributes to correspond. Skip the first parameter
+  // attribute, since that belongs to the return value.
+  unsigned i = 0;
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I, ++i) {
+    RetOrArg Arg = CreateArg(F, i);
+    if (LiveValues.erase(Arg)) {
+      Params.push_back(I->getType());
+      ArgAlive[i] = true;
+
+      // Get the original parameter attributes (skipping the first one, that is
+      // for the return value.
+      if (Attributes Attrs = PAL.getParamAttributes(i + 1))
+        AttributesVec.push_back(AttributeWithIndex::get(Params.size(), Attrs));
+    } else {
+      ++NumArgumentsEliminated;
+      DEBUG(dbgs() << "DAE - Removing argument " << i << " (" << I->getName()
+            << ") from " << F->getName() << "\n");
+    }
+  }
+
+  if (FnAttrs != Attribute::None)
+    AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+  // Reconstruct the AttributesList based on the vector we constructed.
+  AttrListPtr NewPAL = AttrListPtr::get(AttributesVec.begin(),
+                                        AttributesVec.end());
+
+  // Create the new function type based on the recomputed parameters.
+  FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
+
+  // No change?
+  if (NFTy == FTy)
+    return false;
+
+  // Create the new function body and insert it into the module...
+  Function *NF = Function::Create(NFTy, F->getLinkage());
+  NF->copyAttributesFrom(F);
+  NF->setAttributes(NewPAL);
+  // Insert the new function before the old function, so we won't be processing
+  // it again.
+  F->getParent()->getFunctionList().insert(F, NF);
+  NF->takeName(F);
+
+  // Loop over all of the callers of the function, transforming the call sites
+  // to pass in a smaller number of arguments into the new function.
+  //
+  std::vector<Value*> Args;
+  while (!F->use_empty()) {
+    CallSite CS(F->use_back());
+    Instruction *Call = CS.getInstruction();
+
+    AttributesVec.clear();
+    const AttrListPtr &CallPAL = CS.getAttributes();
+
+    // The call return attributes.
+    Attributes RAttrs = CallPAL.getRetAttributes();
+    Attributes FnAttrs = CallPAL.getFnAttributes();
+    // Adjust in case the function was changed to return void.
+    RAttrs &= ~Attribute::typeIncompatible(NF->getReturnType());
+    if (RAttrs)
+      AttributesVec.push_back(AttributeWithIndex::get(0, RAttrs));
+
+    // Declare these outside of the loops, so we can reuse them for the second
+    // loop, which loops the varargs.
+    CallSite::arg_iterator I = CS.arg_begin();
+    unsigned i = 0;
+    // Loop over those operands, corresponding to the normal arguments to the
+    // original function, and add those that are still alive.
+    for (unsigned e = FTy->getNumParams(); i != e; ++I, ++i)
+      if (ArgAlive[i]) {
+        Args.push_back(*I);
+        // Get original parameter attributes, but skip return attributes.
+        if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+          AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+      }
+
+    // Push any varargs arguments on the list. Don't forget their attributes.
+    for (CallSite::arg_iterator E = CS.arg_end(); I != E; ++I, ++i) {
+      Args.push_back(*I);
+      if (Attributes Attrs = CallPAL.getParamAttributes(i + 1))
+        AttributesVec.push_back(AttributeWithIndex::get(Args.size(), Attrs));
+    }
+
+    if (FnAttrs != Attribute::None)
+      AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+    // Reconstruct the AttributesList based on the vector we constructed.
+    AttrListPtr NewCallPAL = AttrListPtr::get(AttributesVec.begin(),
+                                              AttributesVec.end());
+
+    Instruction *New;
+    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+      New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+                               Args.begin(), Args.end(), "", Call);
+      cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<InvokeInst>(New)->setAttributes(NewCallPAL);
+    } else {
+      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+      cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<CallInst>(New)->setAttributes(NewCallPAL);
+      if (cast<CallInst>(Call)->isTailCall())
+        cast<CallInst>(New)->setTailCall();
+    }
+    New->setDebugLoc(Call->getDebugLoc());
+
+    Args.clear();
+
+    if (!Call->use_empty()) {
+      if (New->getType() == Call->getType()) {
+        // Return type not changed? Just replace users then.
+        Call->replaceAllUsesWith(New);
+        New->takeName(Call);
+      } else if (New->getType()->isVoidTy()) {
+        // Our return value has uses, but they will get removed later on.
+        // Replace by null for now.
+        if (!Call->getType()->isX86_MMXTy())
+          Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
+      } else {
+        assert(RetTy->isStructTy() &&
+               "Return type changed, but not into a void. The old return type"
+               " must have been a struct!");
+        Instruction *InsertPt = Call;
+        if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+          BasicBlock::iterator IP = II->getNormalDest()->begin();
+          while (isa<PHINode>(IP)) ++IP;
+          InsertPt = IP;
+        }
+
+        // We used to return a struct. Instead of doing smart stuff with all the
+        // uses of this struct, we will just rebuild it using
+        // extract/insertvalue chaining and let instcombine clean that up.
+        //
+        // Start out building up our return value from undef
+        Value *RetVal = UndefValue::get(RetTy);
+        for (unsigned i = 0; i != RetCount; ++i)
+          if (NewRetIdxs[i] != -1) {
+            Value *V;
+            if (RetTypes.size() > 1)
+              // We are still returning a struct, so extract the value from our
+              // return value
+              V = ExtractValueInst::Create(New, NewRetIdxs[i], "newret",
+                                           InsertPt);
+            else
+              // We are now returning a single element, so just insert that
+              V = New;
+            // Insert the value at the old position
+            RetVal = InsertValueInst::Create(RetVal, V, i, "oldret", InsertPt);
+          }
+        // Now, replace all uses of the old call instruction with the return
+        // struct we built
+        Call->replaceAllUsesWith(RetVal);
+        New->takeName(Call);
+      }
+    }
+
+    // Finally, remove the old call from the program, reducing the use-count of
+    // F.
+    Call->eraseFromParent();
+  }
+
+  // Since we have now created the new function, splice the body of the old
+  // function right into the new function, leaving the old rotting hulk of the
+  // function empty.
+  NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+  // Loop over the argument list, transfering uses of the old arguments over to
+  // the new arguments, also transfering over the names as well.
+  i = 0;
+  for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(),
+       I2 = NF->arg_begin(); I != E; ++I, ++i)
+    if (ArgAlive[i]) {
+      // If this is a live argument, move the name and users over to the new
+      // version.
+      I->replaceAllUsesWith(I2);
+      I2->takeName(I);
+      ++I2;
+    } else {
+      // If this argument is dead, replace any uses of it with null constants
+      // (these are guaranteed to become unused later on).
+      if (!I->getType()->isX86_MMXTy())
+        I->replaceAllUsesWith(Constant::getNullValue(I->getType()));
+    }
+
+  // If we change the return value of the function we must rewrite any return
+  // instructions.  Check this now.
+  if (F->getReturnType() != NF->getReturnType())
+    for (Function::iterator BB = NF->begin(), E = NF->end(); BB != E; ++BB)
+      if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+        Value *RetVal;
+
+        if (NFTy->getReturnType()->isVoidTy()) {
+          RetVal = 0;
+        } else {
+          assert (RetTy->isStructTy());
+          // The original return value was a struct, insert
+          // extractvalue/insertvalue chains to extract only the values we need
+          // to return and insert them into our new result.
+          // This does generate messy code, but we'll let it to instcombine to
+          // clean that up.
+          Value *OldRet = RI->getOperand(0);
+          // Start out building up our return value from undef
+          RetVal = UndefValue::get(NRetTy);
+          for (unsigned i = 0; i != RetCount; ++i)
+            if (NewRetIdxs[i] != -1) {
+              ExtractValueInst *EV = ExtractValueInst::Create(OldRet, i,
+                                                              "oldret", RI);
+              if (RetTypes.size() > 1) {
+                // We're still returning a struct, so reinsert the value into
+                // our new return value at the new index
+
+                RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[i],
+                                                 "newret", RI);
+              } else {
+                // We are now only returning a simple value, so just return the
+                // extracted value.
+                RetVal = EV;
+              }
+            }
+        }
+        // Replace the return instruction with one returning the new return
+        // value (possibly 0 if we became void).
+        ReturnInst::Create(F->getContext(), RetVal, RI);
+        BB->getInstList().erase(RI);
+      }
+
+  // Now that the old function is dead, delete it.
+  F->eraseFromParent();
+
+  return true;
+}
+
+bool DAE::runOnModule(Module &M) {
+  bool Changed = false;
+
+  // First pass: Do a simple check to see if any functions can have their "..."
+  // removed.  We can do this if they never call va_start.  This loop cannot be
+  // fused with the next loop, because deleting a function invalidates
+  // information computed while surveying other functions.
+  DEBUG(dbgs() << "DAE - Deleting dead varargs\n");
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+    Function &F = *I++;
+    if (F.getFunctionType()->isVarArg())
+      Changed |= DeleteDeadVarargs(F);
+  }
+
+  // Second phase:loop through the module, determining which arguments are live.
+  // We assume all arguments are dead unless proven otherwise (allowing us to
+  // determine that dead arguments passed into recursive functions are dead).
+  //
+  DEBUG(dbgs() << "DAE - Determining liveness\n");
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    SurveyFunction(*I);
+
+  // Now, remove all dead arguments and return values from each function in
+  // turn.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+    // Increment now, because the function will probably get removed (ie.
+    // replaced by a new one).
+    Function *F = I++;
+    Changed |= RemoveDeadStuffFromFunction(F);
+  }
+
+  // Finally, look for any unused parameters in functions with non-local
+  // linkage and replace the passed in parameters with undef.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    Function& F = *I;
+
+    Changed |= RemoveDeadArgumentsFromCallers(F);
+  }
+
+  return Changed;
+}
diff --git a/final/lib/Transforms/IPO/DeadTypeElimination.cpp b/final/lib/Transforms/IPO/DeadTypeElimination.cpp
new file mode 100644
index 00000000000..a5099313b48
--- /dev/null
+++ b/final/lib/Transforms/IPO/DeadTypeElimination.cpp
@@ -0,0 +1,111 @@
+//===- DeadTypeElimination.cpp - Eliminate unused types for symbol table --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to cleanup the output of GCC.  It eliminate names for types
+// that are unused in the entire translation unit, using the FindUsedTypes pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "deadtypeelim"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Analysis/FindUsedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumKilled, "Number of unused typenames removed from symtab");
+
+namespace {
+  struct DTE : public ModulePass {
+    static char ID; // Pass identification, replacement for typeid
+    DTE() : ModulePass(ID) {
+      initializeDTEPass(*PassRegistry::getPassRegistry());
+    }
+
+    // doPassInitialization - For this pass, it removes global symbol table
+    // entries for primitive types.  These are never used for linking in GCC and
+    // they make the output uglier to look at, so we nuke them.
+    //
+    // Also, initialize instance variables.
+    //
+    bool runOnModule(Module &M);
+
+    // getAnalysisUsage - This function needs FindUsedTypes to do its job...
+    //
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<FindUsedTypes>();
+    }
+  };
+}
+
+char DTE::ID = 0;
+INITIALIZE_PASS_BEGIN(DTE, "deadtypeelim", "Dead Type Elimination",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(FindUsedTypes)
+INITIALIZE_PASS_END(DTE, "deadtypeelim", "Dead Type Elimination", false, false)
+
+ModulePass *llvm::createDeadTypeEliminationPass() {
+  return new DTE();
+}
+
+
+// ShouldNukeSymtabEntry - Return true if this module level symbol table entry
+// should be eliminated.
+//
+static inline bool ShouldNukeSymtabEntry(const Type *Ty){
+  // Nuke all names for primitive types!
+  if (Ty->isPrimitiveType() || Ty->isIntegerTy()) 
+    return true;
+
+  // Nuke all pointers to primitive types as well...
+  if (const PointerType *PT = dyn_cast<PointerType>(Ty))
+    if (PT->getElementType()->isPrimitiveType() ||
+        PT->getElementType()->isIntegerTy()) 
+      return true;
+
+  return false;
+}
+
+// run - For this pass, it removes global symbol table entries for primitive
+// types.  These are never used for linking in GCC and they make the output
+// uglier to look at, so we nuke them.  Also eliminate types that are never used
+// in the entire program as indicated by FindUsedTypes.
+//
+bool DTE::runOnModule(Module &M) {
+  bool Changed = false;
+
+  TypeSymbolTable &ST = M.getTypeSymbolTable();
+  std::set<const Type *> UsedTypes = getAnalysis<FindUsedTypes>().getTypes();
+
+  // Check the symbol table for superfluous type entries...
+  //
+  // Grab the 'type' plane of the module symbol...
+  TypeSymbolTable::iterator TI = ST.begin();
+  TypeSymbolTable::iterator TE = ST.end();
+  while ( TI != TE ) {
+    // If this entry should be unconditionally removed, or if we detect that
+    // the type is not used, remove it.
+    const Type *RHS = TI->second;
+    if (ShouldNukeSymtabEntry(RHS) || !UsedTypes.count(RHS)) {
+      ST.remove(TI++);
+      ++NumKilled;
+      Changed = true;
+    } else {
+      ++TI;
+      // We only need to leave one name for each type.
+      UsedTypes.erase(RHS);
+    }
+  }
+
+  return Changed;
+}
+
+// vim: sw=2
diff --git a/final/lib/Transforms/IPO/ExtractGV.cpp b/final/lib/Transforms/IPO/ExtractGV.cpp
new file mode 100644
index 00000000000..9d432de9fa7
--- /dev/null
+++ b/final/lib/Transforms/IPO/ExtractGV.cpp
@@ -0,0 +1,80 @@
+//===-- ExtractGV.cpp - Global Value extraction pass ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass extracts global values
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Constants.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/SetVector.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+  /// @brief A pass to extract specific functions and their dependencies.
+  class GVExtractorPass : public ModulePass {
+    SetVector<GlobalValue *> Named;
+    bool deleteStuff;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+
+    /// FunctionExtractorPass - If deleteFn is true, this pass deletes as the
+    /// specified function. Otherwise, it deletes as much of the module as
+    /// possible, except for the function specified.
+    ///
+    explicit GVExtractorPass(std::vector<GlobalValue*>& GVs, bool deleteS = true)
+      : ModulePass(ID), Named(GVs.begin(), GVs.end()), deleteStuff(deleteS) {}
+
+    bool runOnModule(Module &M) {
+      // Visit the global inline asm.
+      if (!deleteStuff)
+        M.setModuleInlineAsm("");
+
+      // For simplicity, just give all GlobalValues ExternalLinkage. A trickier
+      // implementation could figure out which GlobalValues are actually
+      // referenced by the Named set, and which GlobalValues in the rest of
+      // the module are referenced by the NamedSet, and get away with leaving
+      // more internal and private things internal and private. But for now,
+      // be conservative and simple.
+
+      // Visit the GlobalVariables.
+      for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+           I != E; ++I) {
+        if (I->hasLocalLinkage())
+          I->setVisibility(GlobalValue::HiddenVisibility);
+        I->setLinkage(GlobalValue::ExternalLinkage);
+        if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration())
+          I->setInitializer(0);
+      }
+
+      // Visit the Functions.
+      for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+        if (I->hasLocalLinkage())
+          I->setVisibility(GlobalValue::HiddenVisibility);
+        I->setLinkage(GlobalValue::ExternalLinkage);
+        if (deleteStuff == (bool)Named.count(I) && !I->isDeclaration())
+          I->deleteBody();
+      }
+
+      return true;
+    }
+  };
+
+  char GVExtractorPass::ID = 0;
+}
+
+ModulePass *llvm::createGVExtractionPass(std::vector<GlobalValue*>& GVs, 
+                                         bool deleteFn) {
+  return new GVExtractorPass(GVs, deleteFn);
+}
diff --git a/final/lib/Transforms/IPO/FunctionAttrs.cpp b/final/lib/Transforms/IPO/FunctionAttrs.cpp
new file mode 100644
index 00000000000..95decec0f87
--- /dev/null
+++ b/final/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -0,0 +1,380 @@
+//===- FunctionAttrs.cpp - Pass which marks functions readnone or readonly ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interprocedural pass which walks the
+// call-graph, looking for functions which do not access or only read
+// non-local memory, and marking them readnone/readonly.  In addition,
+// it marks function arguments (of pointer type) 'nocapture' if a call
+// to the function does not create any copies of the pointer value that
+// outlive the call.  This more or less means that the pointer is only
+// dereferenced, and not returned from the function or stored in a global.
+// This pass is implemented as a bottom-up traversal of the call-graph.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "functionattrs"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/UniqueVector.h"
+#include "llvm/Support/InstIterator.h"
+using namespace llvm;
+
+STATISTIC(NumReadNone, "Number of functions marked readnone");
+STATISTIC(NumReadOnly, "Number of functions marked readonly");
+STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
+STATISTIC(NumNoAlias, "Number of function returns marked noalias");
+
+namespace {
+  struct FunctionAttrs : public CallGraphSCCPass {
+    static char ID; // Pass identification, replacement for typeid
+    FunctionAttrs() : CallGraphSCCPass(ID), AA(0) {
+      initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+    }
+
+    // runOnSCC - Analyze the SCC, performing the transformation if possible.
+    bool runOnSCC(CallGraphSCC &SCC);
+
+    // AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
+    bool AddReadAttrs(const CallGraphSCC &SCC);
+
+    // AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
+    bool AddNoCaptureAttrs(const CallGraphSCC &SCC);
+
+    // IsFunctionMallocLike - Does this function allocate new memory?
+    bool IsFunctionMallocLike(Function *F,
+                              SmallPtrSet<Function*, 8> &) const;
+
+    // AddNoAliasAttrs - Deduce noalias attributes for the SCC.
+    bool AddNoAliasAttrs(const CallGraphSCC &SCC);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<AliasAnalysis>();
+      CallGraphSCCPass::getAnalysisUsage(AU);
+    }
+
+  private:
+    AliasAnalysis *AA;
+  };
+}
+
+char FunctionAttrs::ID = 0;
+INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
+                "Deduce function attributes", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
+                "Deduce function attributes", false, false)
+
+Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
+
+
+/// AddReadAttrs - Deduce readonly/readnone attributes for the SCC.
+bool FunctionAttrs::AddReadAttrs(const CallGraphSCC &SCC) {
+  SmallPtrSet<Function*, 8> SCCNodes;
+
+  // Fill SCCNodes with the elements of the SCC.  Used for quickly
+  // looking up whether a given CallGraphNode is in this SCC.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    SCCNodes.insert((*I)->getFunction());
+
+  // Check if any of the functions in the SCC read or write memory.  If they
+  // write memory then they can't be marked readnone or readonly.
+  bool ReadsMemory = false;
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+
+    if (F == 0)
+      // External node - may write memory.  Just give up.
+      return false;
+
+    AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(F);
+    if (MRB == AliasAnalysis::DoesNotAccessMemory)
+      // Already perfect!
+      continue;
+
+    // Definitions with weak linkage may be overridden at linktime with
+    // something that writes memory, so treat them like declarations.
+    if (F->isDeclaration() || F->mayBeOverridden()) {
+      if (!AliasAnalysis::onlyReadsMemory(MRB))
+        // May write memory.  Just give up.
+        return false;
+
+      ReadsMemory = true;
+      continue;
+    }
+
+    // Scan the function body for instructions that may read or write memory.
+    for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
+      Instruction *I = &*II;
+
+      // Some instructions can be ignored even if they read or write memory.
+      // Detect these now, skipping to the next instruction if one is found.
+      CallSite CS(cast<Value>(I));
+      if (CS) {
+        // Ignore calls to functions in the same SCC.
+        if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
+          continue;
+        AliasAnalysis::ModRefBehavior MRB = AA->getModRefBehavior(CS);
+        // If the call doesn't access arbitrary memory, we may be able to
+        // figure out something.
+        if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+          // If the call does access argument pointees, check each argument.
+          if (AliasAnalysis::doesAccessArgPointees(MRB))
+            // Check whether all pointer arguments point to local memory, and
+            // ignore calls that only access local memory.
+            for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
+                 CI != CE; ++CI) {
+              Value *Arg = *CI;
+              if (Arg->getType()->isPointerTy()) {
+                AliasAnalysis::Location Loc(Arg,
+                                            AliasAnalysis::UnknownSize,
+                                            I->getMetadata(LLVMContext::MD_tbaa));
+                if (!AA->pointsToConstantMemory(Loc, /*OrLocal=*/true)) {
+                  if (MRB & AliasAnalysis::Mod)
+                    // Writes non-local memory.  Give up.
+                    return false;
+                  if (MRB & AliasAnalysis::Ref)
+                    // Ok, it reads non-local memory.
+                    ReadsMemory = true;
+                }
+              }
+            }
+          continue;
+        }
+        // The call could access any memory. If that includes writes, give up.
+        if (MRB & AliasAnalysis::Mod)
+          return false;
+        // If it reads, note it.
+        if (MRB & AliasAnalysis::Ref)
+          ReadsMemory = true;
+        continue;
+      } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+        // Ignore non-volatile loads from local memory.
+        if (!LI->isVolatile()) {
+          AliasAnalysis::Location Loc = AA->getLocation(LI);
+          if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+            continue;
+        }
+      } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+        // Ignore non-volatile stores to local memory.
+        if (!SI->isVolatile()) {
+          AliasAnalysis::Location Loc = AA->getLocation(SI);
+          if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+            continue;
+        }
+      } else if (VAArgInst *VI = dyn_cast<VAArgInst>(I)) {
+        // Ignore vaargs on local memory.
+        AliasAnalysis::Location Loc = AA->getLocation(VI);
+        if (AA->pointsToConstantMemory(Loc, /*OrLocal=*/true))
+          continue;
+      }
+
+      // Any remaining instructions need to be taken seriously!  Check if they
+      // read or write memory.
+      if (I->mayWriteToMemory())
+        // Writes memory.  Just give up.
+        return false;
+
+      // If this instruction may read memory, remember that.
+      ReadsMemory |= I->mayReadFromMemory();
+    }
+  }
+
+  // Success!  Functions in this SCC do not access memory, or only read memory.
+  // Give them the appropriate attribute.
+  bool MadeChange = false;
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+
+    if (F->doesNotAccessMemory())
+      // Already perfect!
+      continue;
+
+    if (F->onlyReadsMemory() && ReadsMemory)
+      // No change.
+      continue;
+
+    MadeChange = true;
+
+    // Clear out any existing attributes.
+    F->removeAttribute(~0, Attribute::ReadOnly | Attribute::ReadNone);
+
+    // Add in the new attribute.
+    F->addAttribute(~0, ReadsMemory? Attribute::ReadOnly : Attribute::ReadNone);
+
+    if (ReadsMemory)
+      ++NumReadOnly;
+    else
+      ++NumReadNone;
+  }
+
+  return MadeChange;
+}
+
+/// AddNoCaptureAttrs - Deduce nocapture attributes for the SCC.
+bool FunctionAttrs::AddNoCaptureAttrs(const CallGraphSCC &SCC) {
+  bool Changed = false;
+
+  // Check each function in turn, determining which pointer arguments are not
+  // captured.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+
+    if (F == 0)
+      // External node - skip it;
+      continue;
+
+    // Definitions with weak linkage may be overridden at linktime with
+    // something that writes memory, so treat them like declarations.
+    if (F->isDeclaration() || F->mayBeOverridden())
+      continue;
+
+    for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A)
+      if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr() &&
+          !PointerMayBeCaptured(A, true, /*StoreCaptures=*/false)) {
+        A->addAttr(Attribute::NoCapture);
+        ++NumNoCapture;
+        Changed = true;
+      }
+  }
+
+  return Changed;
+}
+
+/// IsFunctionMallocLike - A function is malloc-like if it returns either null
+/// or a pointer that doesn't alias any other pointer visible to the caller.
+bool FunctionAttrs::IsFunctionMallocLike(Function *F,
+                              SmallPtrSet<Function*, 8> &SCCNodes) const {
+  UniqueVector<Value *> FlowsToReturn;
+  for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I)
+    if (ReturnInst *Ret = dyn_cast<ReturnInst>(I->getTerminator()))
+      FlowsToReturn.insert(Ret->getReturnValue());
+
+  for (unsigned i = 0; i != FlowsToReturn.size(); ++i) {
+    Value *RetVal = FlowsToReturn[i+1];   // UniqueVector[0] is reserved.
+
+    if (Constant *C = dyn_cast<Constant>(RetVal)) {
+      if (!C->isNullValue() && !isa<UndefValue>(C))
+        return false;
+
+      continue;
+    }
+
+    if (isa<Argument>(RetVal))
+      return false;
+
+    if (Instruction *RVI = dyn_cast<Instruction>(RetVal))
+      switch (RVI->getOpcode()) {
+        // Extend the analysis by looking upwards.
+        case Instruction::BitCast:
+        case Instruction::GetElementPtr:
+          FlowsToReturn.insert(RVI->getOperand(0));
+          continue;
+        case Instruction::Select: {
+          SelectInst *SI = cast<SelectInst>(RVI);
+          FlowsToReturn.insert(SI->getTrueValue());
+          FlowsToReturn.insert(SI->getFalseValue());
+          continue;
+        }
+        case Instruction::PHI: {
+          PHINode *PN = cast<PHINode>(RVI);
+          for (int i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+            FlowsToReturn.insert(PN->getIncomingValue(i));
+          continue;
+        }
+
+        // Check whether the pointer came from an allocation.
+        case Instruction::Alloca:
+          break;
+        case Instruction::Call:
+        case Instruction::Invoke: {
+          CallSite CS(RVI);
+          if (CS.paramHasAttr(0, Attribute::NoAlias))
+            break;
+          if (CS.getCalledFunction() &&
+              SCCNodes.count(CS.getCalledFunction()))
+            break;
+        } // fall-through
+        default:
+          return false;  // Did not come from an allocation.
+      }
+
+    if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false))
+      return false;
+  }
+
+  return true;
+}
+
+/// AddNoAliasAttrs - Deduce noalias attributes for the SCC.
+bool FunctionAttrs::AddNoAliasAttrs(const CallGraphSCC &SCC) {
+  SmallPtrSet<Function*, 8> SCCNodes;
+
+  // Fill SCCNodes with the elements of the SCC.  Used for quickly
+  // looking up whether a given CallGraphNode is in this SCC.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    SCCNodes.insert((*I)->getFunction());
+
+  // Check each function in turn, determining which functions return noalias
+  // pointers.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+
+    if (F == 0)
+      // External node - skip it;
+      return false;
+
+    // Already noalias.
+    if (F->doesNotAlias(0))
+      continue;
+
+    // Definitions with weak linkage may be overridden at linktime, so
+    // treat them like declarations.
+    if (F->isDeclaration() || F->mayBeOverridden())
+      return false;
+
+    // We annotate noalias return values, which are only applicable to 
+    // pointer types.
+    if (!F->getReturnType()->isPointerTy())
+      continue;
+
+    if (!IsFunctionMallocLike(F, SCCNodes))
+      return false;
+  }
+
+  bool MadeChange = false;
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+    if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
+      continue;
+
+    F->setDoesNotAlias(0);
+    ++NumNoAlias;
+    MadeChange = true;
+  }
+
+  return MadeChange;
+}
+
+bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
+  AA = &getAnalysis<AliasAnalysis>();
+
+  bool Changed = AddReadAttrs(SCC);
+  Changed |= AddNoCaptureAttrs(SCC);
+  Changed |= AddNoAliasAttrs(SCC);
+  return Changed;
+}
diff --git a/final/lib/Transforms/IPO/GlobalDCE.cpp b/final/lib/Transforms/IPO/GlobalDCE.cpp
new file mode 100644
index 00000000000..2b427aa6a4e
--- /dev/null
+++ b/final/lib/Transforms/IPO/GlobalDCE.cpp
@@ -0,0 +1,211 @@
+//===-- GlobalDCE.cpp - DCE unreachable internal functions ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transform is designed to eliminate unreachable internal globals from the
+// program.  It uses an aggressive algorithm, searching out globals that are
+// known to be alive.  After it finds all of the globals which are needed, it
+// deletes whatever is left over.  This allows it to delete recursive chunks of
+// the program which are unreachable.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globaldce"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumAliases  , "Number of global aliases removed");
+STATISTIC(NumFunctions, "Number of functions removed");
+STATISTIC(NumVariables, "Number of global variables removed");
+
+namespace {
+  struct GlobalDCE : public ModulePass {
+    static char ID; // Pass identification, replacement for typeid
+    GlobalDCE() : ModulePass(ID) {
+      initializeGlobalDCEPass(*PassRegistry::getPassRegistry());
+    }
+
+    // run - Do the GlobalDCE pass on the specified module, optionally updating
+    // the specified callgraph to reflect the changes.
+    //
+    bool runOnModule(Module &M);
+
+  private:
+    SmallPtrSet<GlobalValue*, 32> AliveGlobals;
+
+    /// GlobalIsNeeded - mark the specific global value as needed, and
+    /// recursively mark anything that it uses as also needed.
+    void GlobalIsNeeded(GlobalValue *GV);
+    void MarkUsedGlobalsAsNeeded(Constant *C);
+
+    bool RemoveUnusedGlobalValue(GlobalValue &GV);
+  };
+}
+
+char GlobalDCE::ID = 0;
+INITIALIZE_PASS(GlobalDCE, "globaldce",
+                "Dead Global Elimination", false, false)
+
+ModulePass *llvm::createGlobalDCEPass() { return new GlobalDCE(); }
+
+bool GlobalDCE::runOnModule(Module &M) {
+  bool Changed = false;
+  
+  // Loop over the module, adding globals which are obviously necessary.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    Changed |= RemoveUnusedGlobalValue(*I);
+    // Functions with external linkage are needed if they have a body
+    if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() &&
+        !I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+      GlobalIsNeeded(I);
+  }
+
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I) {
+    Changed |= RemoveUnusedGlobalValue(*I);
+    // Externally visible & appending globals are needed, if they have an
+    // initializer.
+    if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage() &&
+        !I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+      GlobalIsNeeded(I);
+  }
+
+  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+       I != E; ++I) {
+    Changed |= RemoveUnusedGlobalValue(*I);
+    // Externally visible aliases are needed.
+    if (!I->hasLocalLinkage() && !I->hasLinkOnceLinkage())
+      GlobalIsNeeded(I);
+  }
+
+  // Now that all globals which are needed are in the AliveGlobals set, we loop
+  // through the program, deleting those which are not alive.
+  //
+
+  // The first pass is to drop initializers of global variables which are dead.
+  std::vector<GlobalVariable*> DeadGlobalVars;   // Keep track of dead globals
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
+    if (!AliveGlobals.count(I)) {
+      DeadGlobalVars.push_back(I);         // Keep track of dead globals
+      I->setInitializer(0);
+    }
+
+  // The second pass drops the bodies of functions which are dead...
+  std::vector<Function*> DeadFunctions;
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    if (!AliveGlobals.count(I)) {
+      DeadFunctions.push_back(I);         // Keep track of dead globals
+      if (!I->isDeclaration())
+        I->deleteBody();
+    }
+
+  // The third pass drops targets of aliases which are dead...
+  std::vector<GlobalAlias*> DeadAliases;
+  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E;
+       ++I)
+    if (!AliveGlobals.count(I)) {
+      DeadAliases.push_back(I);
+      I->setAliasee(0);
+    }
+
+  if (!DeadFunctions.empty()) {
+    // Now that all interferences have been dropped, delete the actual objects
+    // themselves.
+    for (unsigned i = 0, e = DeadFunctions.size(); i != e; ++i) {
+      RemoveUnusedGlobalValue(*DeadFunctions[i]);
+      M.getFunctionList().erase(DeadFunctions[i]);
+    }
+    NumFunctions += DeadFunctions.size();
+    Changed = true;
+  }
+
+  if (!DeadGlobalVars.empty()) {
+    for (unsigned i = 0, e = DeadGlobalVars.size(); i != e; ++i) {
+      RemoveUnusedGlobalValue(*DeadGlobalVars[i]);
+      M.getGlobalList().erase(DeadGlobalVars[i]);
+    }
+    NumVariables += DeadGlobalVars.size();
+    Changed = true;
+  }
+
+  // Now delete any dead aliases.
+  if (!DeadAliases.empty()) {
+    for (unsigned i = 0, e = DeadAliases.size(); i != e; ++i) {
+      RemoveUnusedGlobalValue(*DeadAliases[i]);
+      M.getAliasList().erase(DeadAliases[i]);
+    }
+    NumAliases += DeadAliases.size();
+    Changed = true;
+  }
+
+  // Make sure that all memory is released
+  AliveGlobals.clear();
+
+  return Changed;
+}
+
+/// GlobalIsNeeded - the specific global value as needed, and
+/// recursively mark anything that it uses as also needed.
+void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
+  // If the global is already in the set, no need to reprocess it.
+  if (!AliveGlobals.insert(G))
+    return;
+  
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(G)) {
+    // If this is a global variable, we must make sure to add any global values
+    // referenced by the initializer to the alive set.
+    if (GV->hasInitializer())
+      MarkUsedGlobalsAsNeeded(GV->getInitializer());
+  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(G)) {
+    // The target of a global alias is needed.
+    MarkUsedGlobalsAsNeeded(GA->getAliasee());
+  } else {
+    // Otherwise this must be a function object.  We have to scan the body of
+    // the function looking for constants and global values which are used as
+    // operands.  Any operands of these types must be processed to ensure that
+    // any globals used will be marked as needed.
+    Function *F = cast<Function>(G);
+
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+        for (User::op_iterator U = I->op_begin(), E = I->op_end(); U != E; ++U)
+          if (GlobalValue *GV = dyn_cast<GlobalValue>(*U))
+            GlobalIsNeeded(GV);
+          else if (Constant *C = dyn_cast<Constant>(*U))
+            MarkUsedGlobalsAsNeeded(C);
+  }
+}
+
+void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
+    return GlobalIsNeeded(GV);
+  
+  // Loop over all of the operands of the constant, adding any globals they
+  // use to the list of needed globals.
+  for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I)
+    if (Constant *OpC = dyn_cast<Constant>(*I))
+      MarkUsedGlobalsAsNeeded(OpC);
+}
+
+// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
+// GlobalValue, looking for the constant pointer ref that may be pointing to it.
+// If found, check to see if the constant pointer ref is safe to destroy, and if
+// so, nuke it.  This will reduce the reference count on the global value, which
+// might make it deader.
+//
+bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) {
+  if (GV.use_empty()) return false;
+  GV.removeDeadConstantUsers();
+  return GV.use_empty();
+}
diff --git a/final/lib/Transforms/IPO/GlobalOpt.cpp b/final/lib/Transforms/IPO/GlobalOpt.cpp
new file mode 100644
index 00000000000..d4cb71272f7
--- /dev/null
+++ b/final/lib/Transforms/IPO/GlobalOpt.cpp
@@ -0,0 +1,2728 @@
+//===- GlobalOpt.cpp - Optimize Global Variables --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms simple global variables that never have their address
+// taken.  If obviously true, it marks read/write globals as constant, deletes
+// variables only stored to, etc.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "globalopt"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumMarked    , "Number of globals marked constant");
+STATISTIC(NumUnnamed   , "Number of globals marked unnamed_addr");
+STATISTIC(NumSRA       , "Number of aggregate globals broken into scalars");
+STATISTIC(NumHeapSRA   , "Number of heap objects SRA'd");
+STATISTIC(NumSubstitute,"Number of globals with initializers stored into them");
+STATISTIC(NumDeleted   , "Number of globals deleted");
+STATISTIC(NumFnDeleted , "Number of functions deleted");
+STATISTIC(NumGlobUses  , "Number of global uses devirtualized");
+STATISTIC(NumLocalized , "Number of globals localized");
+STATISTIC(NumShrunkToBool  , "Number of global vars shrunk to booleans");
+STATISTIC(NumFastCallFns   , "Number of functions converted to fastcc");
+STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated");
+STATISTIC(NumNestRemoved   , "Number of nest attributes removed");
+STATISTIC(NumAliasesResolved, "Number of global aliases resolved");
+STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated");
+
+namespace {
+  struct GlobalStatus;
+  struct GlobalOpt : public ModulePass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    }
+    static char ID; // Pass identification, replacement for typeid
+    GlobalOpt() : ModulePass(ID) {
+      initializeGlobalOptPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnModule(Module &M);
+
+  private:
+    GlobalVariable *FindGlobalCtors(Module &M);
+    bool OptimizeFunctions(Module &M);
+    bool OptimizeGlobalVars(Module &M);
+    bool OptimizeGlobalAliases(Module &M);
+    bool OptimizeGlobalCtorsList(GlobalVariable *&GCL);
+    bool ProcessGlobal(GlobalVariable *GV,Module::global_iterator &GVI);
+    bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI,
+                               const SmallPtrSet<const PHINode*, 16> &PHIUsers,
+                               const GlobalStatus &GS);
+  };
+}
+
+char GlobalOpt::ID = 0;
+INITIALIZE_PASS(GlobalOpt, "globalopt",
+                "Global Variable Optimizer", false, false)
+
+ModulePass *llvm::createGlobalOptimizerPass() { return new GlobalOpt(); }
+
+namespace {
+
+/// GlobalStatus - As we analyze each global, keep track of some information
+/// about it.  If we find out that the address of the global is taken, none of
+/// this info will be accurate.
+struct GlobalStatus {
+  /// isCompared - True if the global's address is used in a comparison.
+  bool isCompared;
+
+  /// isLoaded - True if the global is ever loaded.  If the global isn't ever
+  /// loaded it can be deleted.
+  bool isLoaded;
+
+  /// StoredType - Keep track of what stores to the global look like.
+  ///
+  enum StoredType {
+    /// NotStored - There is no store to this global.  It can thus be marked
+    /// constant.
+    NotStored,
+
+    /// isInitializerStored - This global is stored to, but the only thing
+    /// stored is the constant it was initialized with.  This is only tracked
+    /// for scalar globals.
+    isInitializerStored,
+
+    /// isStoredOnce - This global is stored to, but only its initializer and
+    /// one other value is ever stored to it.  If this global isStoredOnce, we
+    /// track the value stored to it in StoredOnceValue below.  This is only
+    /// tracked for scalar globals.
+    isStoredOnce,
+
+    /// isStored - This global is stored to by multiple values or something else
+    /// that we cannot track.
+    isStored
+  } StoredType;
+
+  /// StoredOnceValue - If only one value (besides the initializer constant) is
+  /// ever stored to this global, keep track of what value it is.
+  Value *StoredOnceValue;
+
+  /// AccessingFunction/HasMultipleAccessingFunctions - These start out
+  /// null/false.  When the first accessing function is noticed, it is recorded.
+  /// When a second different accessing function is noticed,
+  /// HasMultipleAccessingFunctions is set to true.
+  const Function *AccessingFunction;
+  bool HasMultipleAccessingFunctions;
+
+  /// HasNonInstructionUser - Set to true if this global has a user that is not
+  /// an instruction (e.g. a constant expr or GV initializer).
+  bool HasNonInstructionUser;
+
+  /// HasPHIUser - Set to true if this global has a user that is a PHI node.
+  bool HasPHIUser;
+
+  GlobalStatus() : isCompared(false), isLoaded(false), StoredType(NotStored),
+                   StoredOnceValue(0), AccessingFunction(0),
+                   HasMultipleAccessingFunctions(false), HasNonInstructionUser(false),
+                   HasPHIUser(false) {}
+};
+
+}
+
+// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used
+// by constants itself.  Note that constants cannot be cyclic, so this test is
+// pretty easy to implement recursively.
+//
+static bool SafeToDestroyConstant(const Constant *C) {
+  if (isa<GlobalValue>(C)) return false;
+
+  for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E;
+       ++UI)
+    if (const Constant *CU = dyn_cast<Constant>(*UI)) {
+      if (!SafeToDestroyConstant(CU)) return false;
+    } else
+      return false;
+  return true;
+}
+
+
+/// AnalyzeGlobal - Look at all uses of the global and fill in the GlobalStatus
+/// structure.  If the global has its address taken, return true to indicate we
+/// can't do anything with it.
+///
+static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
+                          SmallPtrSet<const PHINode*, 16> &PHIUsers) {
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+       ++UI) {
+    const User *U = *UI;
+    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+      GS.HasNonInstructionUser = true;
+      
+      // If the result of the constantexpr isn't pointer type, then we won't
+      // know to expect it in various places.  Just reject early.
+      if (!isa<PointerType>(CE->getType())) return true;
+      
+      if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
+    } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
+      if (!GS.HasMultipleAccessingFunctions) {
+        const Function *F = I->getParent()->getParent();
+        if (GS.AccessingFunction == 0)
+          GS.AccessingFunction = F;
+        else if (GS.AccessingFunction != F)
+          GS.HasMultipleAccessingFunctions = true;
+      }
+      if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
+        GS.isLoaded = true;
+        if (LI->isVolatile()) return true;  // Don't hack on volatile loads.
+      } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
+        // Don't allow a store OF the address, only stores TO the address.
+        if (SI->getOperand(0) == V) return true;
+
+        if (SI->isVolatile()) return true;  // Don't hack on volatile stores.
+
+        // If this is a direct store to the global (i.e., the global is a scalar
+        // value, not an aggregate), keep more specific information about
+        // stores.
+        if (GS.StoredType != GlobalStatus::isStored) {
+          if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(
+                                                           SI->getOperand(1))) {
+            Value *StoredVal = SI->getOperand(0);
+            if (StoredVal == GV->getInitializer()) {
+              if (GS.StoredType < GlobalStatus::isInitializerStored)
+                GS.StoredType = GlobalStatus::isInitializerStored;
+            } else if (isa<LoadInst>(StoredVal) &&
+                       cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
+              if (GS.StoredType < GlobalStatus::isInitializerStored)
+                GS.StoredType = GlobalStatus::isInitializerStored;
+            } else if (GS.StoredType < GlobalStatus::isStoredOnce) {
+              GS.StoredType = GlobalStatus::isStoredOnce;
+              GS.StoredOnceValue = StoredVal;
+            } else if (GS.StoredType == GlobalStatus::isStoredOnce &&
+                       GS.StoredOnceValue == StoredVal) {
+              // noop.
+            } else {
+              GS.StoredType = GlobalStatus::isStored;
+            }
+          } else {
+            GS.StoredType = GlobalStatus::isStored;
+          }
+        }
+      } else if (isa<GetElementPtrInst>(I)) {
+        if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+      } else if (isa<SelectInst>(I)) {
+        if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+      } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
+        // PHI nodes we can check just like select or GEP instructions, but we
+        // have to be careful about infinite recursion.
+        if (PHIUsers.insert(PN))  // Not already visited.
+          if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
+        GS.HasPHIUser = true;
+      } else if (isa<CmpInst>(I)) {
+        GS.isCompared = true;
+      } else if (isa<MemTransferInst>(I)) {
+        const MemTransferInst *MTI = cast<MemTransferInst>(I);
+        if (MTI->getArgOperand(0) == V)
+          GS.StoredType = GlobalStatus::isStored;
+        if (MTI->getArgOperand(1) == V)
+          GS.isLoaded = true;
+      } else if (isa<MemSetInst>(I)) {
+        assert(cast<MemSetInst>(I)->getArgOperand(0) == V &&
+               "Memset only takes one pointer!");
+        GS.StoredType = GlobalStatus::isStored;
+      } else {
+        return true;  // Any other non-load instruction might take address!
+      }
+    } else if (const Constant *C = dyn_cast<Constant>(U)) {
+      GS.HasNonInstructionUser = true;
+      // We might have a dead and dangling constant hanging off of here.
+      if (!SafeToDestroyConstant(C))
+        return true;
+    } else {
+      GS.HasNonInstructionUser = true;
+      // Otherwise must be some other user.
+      return true;
+    }
+  }
+
+  return false;
+}
+
+static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) {
+  ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
+  if (!CI) return 0;
+  unsigned IdxV = CI->getZExtValue();
+
+  if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg)) {
+    if (IdxV < CS->getNumOperands()) return CS->getOperand(IdxV);
+  } else if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg)) {
+    if (IdxV < CA->getNumOperands()) return CA->getOperand(IdxV);
+  } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Agg)) {
+    if (IdxV < CP->getNumOperands()) return CP->getOperand(IdxV);
+  } else if (isa<ConstantAggregateZero>(Agg)) {
+    if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) {
+      if (IdxV < STy->getNumElements())
+        return Constant::getNullValue(STy->getElementType(IdxV));
+    } else if (const SequentialType *STy =
+               dyn_cast<SequentialType>(Agg->getType())) {
+      return Constant::getNullValue(STy->getElementType());
+    }
+  } else if (isa<UndefValue>(Agg)) {
+    if (const StructType *STy = dyn_cast<StructType>(Agg->getType())) {
+      if (IdxV < STy->getNumElements())
+        return UndefValue::get(STy->getElementType(IdxV));
+    } else if (const SequentialType *STy =
+               dyn_cast<SequentialType>(Agg->getType())) {
+      return UndefValue::get(STy->getElementType());
+    }
+  }
+  return 0;
+}
+
+
+/// CleanupConstantGlobalUsers - We just marked GV constant.  Loop over all
+/// users of the global, cleaning up the obvious ones.  This is largely just a
+/// quick scan over the use list to clean up the easy and obvious cruft.  This
+/// returns true if it made a change.
+static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
+  bool Changed = false;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
+    User *U = *UI++;
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      if (Init) {
+        // Replace the load with the initializer.
+        LI->replaceAllUsesWith(Init);
+        LI->eraseFromParent();
+        Changed = true;
+      }
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      // Store must be unreachable or storing Init into the global.
+      SI->eraseFromParent();
+      Changed = true;
+    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
+      if (CE->getOpcode() == Instruction::GetElementPtr) {
+        Constant *SubInit = 0;
+        if (Init)
+          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+        Changed |= CleanupConstantGlobalUsers(CE, SubInit);
+      } else if (CE->getOpcode() == Instruction::BitCast &&
+                 CE->getType()->isPointerTy()) {
+        // Pointer cast, delete any stores and memsets to the global.
+        Changed |= CleanupConstantGlobalUsers(CE, 0);
+      }
+
+      if (CE->use_empty()) {
+        CE->destroyConstant();
+        Changed = true;
+      }
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      // Do not transform "gepinst (gep constexpr (GV))" here, because forming
+      // "gepconstexpr (gep constexpr (GV))" will cause the two gep's to fold
+      // and will invalidate our notion of what Init is.
+      Constant *SubInit = 0;
+      if (!isa<ConstantExpr>(GEP->getOperand(0))) {
+        ConstantExpr *CE =
+          dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
+        if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
+          SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
+      }
+      Changed |= CleanupConstantGlobalUsers(GEP, SubInit);
+
+      if (GEP->use_empty()) {
+        GEP->eraseFromParent();
+        Changed = true;
+      }
+    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U)) { // memset/cpy/mv
+      if (MI->getRawDest() == V) {
+        MI->eraseFromParent();
+        Changed = true;
+      }
+
+    } else if (Constant *C = dyn_cast<Constant>(U)) {
+      // If we have a chain of dead constantexprs or other things dangling from
+      // us, and if they are all dead, nuke them without remorse.
+      if (SafeToDestroyConstant(C)) {
+        C->destroyConstant();
+        // This could have invalidated UI, start over from scratch.
+        CleanupConstantGlobalUsers(V, Init);
+        return true;
+      }
+    }
+  }
+  return Changed;
+}
+
+/// isSafeSROAElementUse - Return true if the specified instruction is a safe
+/// user of a derived expression from a global that we want to SROA.
+static bool isSafeSROAElementUse(Value *V) {
+  // We might have a dead and dangling constant hanging off of here.
+  if (Constant *C = dyn_cast<Constant>(V))
+    return SafeToDestroyConstant(C);
+
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+
+  // Loads are ok.
+  if (isa<LoadInst>(I)) return true;
+
+  // Stores *to* the pointer are ok.
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->getOperand(0) != V;
+
+  // Otherwise, it must be a GEP.
+  GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I);
+  if (GEPI == 0) return false;
+
+  if (GEPI->getNumOperands() < 3 || !isa<Constant>(GEPI->getOperand(1)) ||
+      !cast<Constant>(GEPI->getOperand(1))->isNullValue())
+    return false;
+
+  for (Value::use_iterator I = GEPI->use_begin(), E = GEPI->use_end();
+       I != E; ++I)
+    if (!isSafeSROAElementUse(*I))
+      return false;
+  return true;
+}
+
+
+/// IsUserOfGlobalSafeForSRA - U is a direct user of the specified global value.
+/// Look at it and its uses and decide whether it is safe to SROA this global.
+///
+static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
+  // The user of the global must be a GEP Inst or a ConstantExpr GEP.
+  if (!isa<GetElementPtrInst>(U) &&
+      (!isa<ConstantExpr>(U) ||
+       cast<ConstantExpr>(U)->getOpcode() != Instruction::GetElementPtr))
+    return false;
+
+  // Check to see if this ConstantExpr GEP is SRA'able.  In particular, we
+  // don't like < 3 operand CE's, and we don't like non-constant integer
+  // indices.  This enforces that all uses are 'gep GV, 0, C, ...' for some
+  // value of C.
+  if (U->getNumOperands() < 3 || !isa<Constant>(U->getOperand(1)) ||
+      !cast<Constant>(U->getOperand(1))->isNullValue() ||
+      !isa<ConstantInt>(U->getOperand(2)))
+    return false;
+
+  gep_type_iterator GEPI = gep_type_begin(U), E = gep_type_end(U);
+  ++GEPI;  // Skip over the pointer index.
+
+  // If this is a use of an array allocation, do a bit more checking for sanity.
+  if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
+    uint64_t NumElements = AT->getNumElements();
+    ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
+
+    // Check to make sure that index falls within the array.  If not,
+    // something funny is going on, so we won't do the optimization.
+    //
+    if (Idx->getZExtValue() >= NumElements)
+      return false;
+
+    // We cannot scalar repl this level of the array unless any array
+    // sub-indices are in-range constants.  In particular, consider:
+    // A[0][i].  We cannot know that the user isn't doing invalid things like
+    // allowing i to index an out-of-range subscript that accesses A[1].
+    //
+    // Scalar replacing *just* the outer index of the array is probably not
+    // going to be a win anyway, so just give up.
+    for (++GEPI; // Skip array index.
+         GEPI != E;
+         ++GEPI) {
+      uint64_t NumElements;
+      if (const ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
+        NumElements = SubArrayTy->getNumElements();
+      else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
+        NumElements = SubVectorTy->getNumElements();
+      else {
+        assert((*GEPI)->isStructTy() &&
+               "Indexed GEP type is not array, vector, or struct!");
+        continue;
+      }
+
+      ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
+      if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
+        return false;
+    }
+  }
+
+  for (Value::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I)
+    if (!isSafeSROAElementUse(*I))
+      return false;
+  return true;
+}
+
+/// GlobalUsersSafeToSRA - Look at all uses of the global and decide whether it
+/// is safe for us to perform this transformation.
+///
+static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
+  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
+       UI != E; ++UI) {
+    if (!IsUserOfGlobalSafeForSRA(*UI, GV))
+      return false;
+  }
+  return true;
+}
+
+
+/// SRAGlobal - Perform scalar replacement of aggregates on the specified global
+/// variable.  This opens the door for other optimizations by exposing the
+/// behavior of the program in a more fine-grained way.  We have determined that
+/// this transformation is safe already.  We return the first global variable we
+/// insert so that the caller can reprocess it.
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
+  // Make sure this global only has simple uses that we can SRA.
+  if (!GlobalUsersSafeToSRA(GV))
+    return 0;
+
+  assert(GV->hasLocalLinkage() && !GV->isConstant());
+  Constant *Init = GV->getInitializer();
+  const Type *Ty = Init->getType();
+
+  std::vector<GlobalVariable*> NewGlobals;
+  Module::GlobalListType &Globals = GV->getParent()->getGlobalList();
+
+  // Get the alignment of the global, either explicit or target-specific.
+  unsigned StartAlignment = GV->getAlignment();
+  if (StartAlignment == 0)
+    StartAlignment = TD.getABITypeAlignment(GV->getType());
+
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    NewGlobals.reserve(STy->getNumElements());
+    const StructLayout &Layout = *TD.getStructLayout(STy);
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+      Constant *In = getAggregateConstantElement(Init,
+                    ConstantInt::get(Type::getInt32Ty(STy->getContext()), i));
+      assert(In && "Couldn't get element of initializer?");
+      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
+                                               GlobalVariable::InternalLinkage,
+                                               In, GV->getName()+"."+Twine(i),
+                                               GV->isThreadLocal(),
+                                              GV->getType()->getAddressSpace());
+      Globals.insert(GV, NGV);
+      NewGlobals.push_back(NGV);
+
+      // Calculate the known alignment of the field.  If the original aggregate
+      // had 256 byte alignment for example, something might depend on that:
+      // propagate info to each field.
+      uint64_t FieldOffset = Layout.getElementOffset(i);
+      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, FieldOffset);
+      if (NewAlign > TD.getABITypeAlignment(STy->getElementType(i)))
+        NGV->setAlignment(NewAlign);
+    }
+  } else if (const SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
+    unsigned NumElements = 0;
+    if (const ArrayType *ATy = dyn_cast<ArrayType>(STy))
+      NumElements = ATy->getNumElements();
+    else
+      NumElements = cast<VectorType>(STy)->getNumElements();
+
+    if (NumElements > 16 && GV->hasNUsesOrMore(16))
+      return 0; // It's not worth it.
+    NewGlobals.reserve(NumElements);
+
+    uint64_t EltSize = TD.getTypeAllocSize(STy->getElementType());
+    unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
+    for (unsigned i = 0, e = NumElements; i != e; ++i) {
+      Constant *In = getAggregateConstantElement(Init,
+                    ConstantInt::get(Type::getInt32Ty(Init->getContext()), i));
+      assert(In && "Couldn't get element of initializer?");
+
+      GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
+                                               GlobalVariable::InternalLinkage,
+                                               In, GV->getName()+"."+Twine(i),
+                                               GV->isThreadLocal(),
+                                              GV->getType()->getAddressSpace());
+      Globals.insert(GV, NGV);
+      NewGlobals.push_back(NGV);
+
+      // Calculate the known alignment of the field.  If the original aggregate
+      // had 256 byte alignment for example, something might depend on that:
+      // propagate info to each field.
+      unsigned NewAlign = (unsigned)MinAlign(StartAlignment, EltSize*i);
+      if (NewAlign > EltAlign)
+        NGV->setAlignment(NewAlign);
+    }
+  }
+
+  if (NewGlobals.empty())
+    return 0;
+
+  DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
+
+  Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
+
+  // Loop over all of the uses of the global, replacing the constantexpr geps,
+  // with smaller constantexpr geps or direct references.
+  while (!GV->use_empty()) {
+    User *GEP = GV->use_back();
+    assert(((isa<ConstantExpr>(GEP) &&
+             cast<ConstantExpr>(GEP)->getOpcode()==Instruction::GetElementPtr)||
+            isa<GetElementPtrInst>(GEP)) && "NonGEP CE's are not SRAable!");
+
+    // Ignore the 1th operand, which has to be zero or else the program is quite
+    // broken (undefined).  Get the 2nd operand, which is the structure or array
+    // index.
+    unsigned Val = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
+    if (Val >= NewGlobals.size()) Val = 0; // Out of bound array access.
+
+    Value *NewPtr = NewGlobals[Val];
+
+    // Form a shorter GEP if needed.
+    if (GEP->getNumOperands() > 3) {
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GEP)) {
+        SmallVector<Constant*, 8> Idxs;
+        Idxs.push_back(NullInt);
+        for (unsigned i = 3, e = CE->getNumOperands(); i != e; ++i)
+          Idxs.push_back(CE->getOperand(i));
+        NewPtr = ConstantExpr::getGetElementPtr(cast<Constant>(NewPtr),
+                                                &Idxs[0], Idxs.size());
+      } else {
+        GetElementPtrInst *GEPI = cast<GetElementPtrInst>(GEP);
+        SmallVector<Value*, 8> Idxs;
+        Idxs.push_back(NullInt);
+        for (unsigned i = 3, e = GEPI->getNumOperands(); i != e; ++i)
+          Idxs.push_back(GEPI->getOperand(i));
+        NewPtr = GetElementPtrInst::Create(NewPtr, Idxs.begin(), Idxs.end(),
+                                           GEPI->getName()+"."+Twine(Val),GEPI);
+      }
+    }
+    GEP->replaceAllUsesWith(NewPtr);
+
+    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(GEP))
+      GEPI->eraseFromParent();
+    else
+      cast<ConstantExpr>(GEP)->destroyConstant();
+  }
+
+  // Delete the old global, now that it is dead.
+  Globals.erase(GV);
+  ++NumSRA;
+
+  // Loop over the new globals array deleting any globals that are obviously
+  // dead.  This can arise due to scalarization of a structure or an array that
+  // has elements that are dead.
+  unsigned FirstGlobal = 0;
+  for (unsigned i = 0, e = NewGlobals.size(); i != e; ++i)
+    if (NewGlobals[i]->use_empty()) {
+      Globals.erase(NewGlobals[i]);
+      if (FirstGlobal == i) ++FirstGlobal;
+    }
+
+  return FirstGlobal != NewGlobals.size() ? NewGlobals[FirstGlobal] : 0;
+}
+
+/// AllUsesOfValueWillTrapIfNull - Return true if all users of the specified
+/// value will trap if the value is dynamically null.  PHIs keeps track of any
+/// phi nodes we've seen to avoid reprocessing them.
+static bool AllUsesOfValueWillTrapIfNull(const Value *V,
+                                         SmallPtrSet<const PHINode*, 8> &PHIs) {
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+       ++UI) {
+    const User *U = *UI;
+
+    if (isa<LoadInst>(U)) {
+      // Will trap.
+    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (SI->getOperand(0) == V) {
+        //cerr << "NONTRAPPING USE: " << *U;
+        return false;  // Storing the value.
+      }
+    } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
+      if (CI->getCalledValue() != V) {
+        //cerr << "NONTRAPPING USE: " << *U;
+        return false;  // Not calling the ptr
+      }
+    } else if (const InvokeInst *II = dyn_cast<InvokeInst>(U)) {
+      if (II->getCalledValue() != V) {
+        //cerr << "NONTRAPPING USE: " << *U;
+        return false;  // Not calling the ptr
+      }
+    } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
+      if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
+    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+      if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
+    } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
+      // If we've already seen this phi node, ignore it, it has already been
+      // checked.
+      if (PHIs.insert(PN) && !AllUsesOfValueWillTrapIfNull(PN, PHIs))
+        return false;
+    } else if (isa<ICmpInst>(U) &&
+               isa<ConstantPointerNull>(UI->getOperand(1))) {
+      // Ignore icmp X, null
+    } else {
+      //cerr << "NONTRAPPING USE: " << *U;
+      return false;
+    }
+  }
+  return true;
+}
+
+/// AllUsesOfLoadedValueWillTrapIfNull - Return true if all uses of any loads
+/// from GV will trap if the loaded value is null.  Note that this also permits
+/// comparisons of the loaded value against null, as a special case.
+static bool AllUsesOfLoadedValueWillTrapIfNull(const GlobalVariable *GV) {
+  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+       UI != E; ++UI) {
+    const User *U = *UI;
+
+    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      SmallPtrSet<const PHINode*, 8> PHIs;
+      if (!AllUsesOfValueWillTrapIfNull(LI, PHIs))
+        return false;
+    } else if (isa<StoreInst>(U)) {
+      // Ignore stores to the global.
+    } else {
+      // We don't know or understand this user, bail out.
+      //cerr << "UNKNOWN USER OF GLOBAL!: " << *U;
+      return false;
+    }
+  }
+  return true;
+}
+
+static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
+  bool Changed = false;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
+    Instruction *I = cast<Instruction>(*UI++);
+    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      LI->setOperand(0, NewV);
+      Changed = true;
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+      if (SI->getOperand(1) == V) {
+        SI->setOperand(1, NewV);
+        Changed = true;
+      }
+    } else if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+      CallSite CS(I);
+      if (CS.getCalledValue() == V) {
+        // Calling through the pointer!  Turn into a direct call, but be careful
+        // that the pointer is not also being passed as an argument.
+        CS.setCalledFunction(NewV);
+        Changed = true;
+        bool PassedAsArg = false;
+        for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+          if (CS.getArgument(i) == V) {
+            PassedAsArg = true;
+            CS.setArgument(i, NewV);
+          }
+
+        if (PassedAsArg) {
+          // Being passed as an argument also.  Be careful to not invalidate UI!
+          UI = V->use_begin();
+        }
+      }
+    } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
+      Changed |= OptimizeAwayTrappingUsesOfValue(CI,
+                                ConstantExpr::getCast(CI->getOpcode(),
+                                                      NewV, CI->getType()));
+      if (CI->use_empty()) {
+        Changed = true;
+        CI->eraseFromParent();
+      }
+    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+      // Should handle GEP here.
+      SmallVector<Constant*, 8> Idxs;
+      Idxs.reserve(GEPI->getNumOperands()-1);
+      for (User::op_iterator i = GEPI->op_begin() + 1, e = GEPI->op_end();
+           i != e; ++i)
+        if (Constant *C = dyn_cast<Constant>(*i))
+          Idxs.push_back(C);
+        else
+          break;
+      if (Idxs.size() == GEPI->getNumOperands()-1)
+        Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
+                          ConstantExpr::getGetElementPtr(NewV, &Idxs[0],
+                                                        Idxs.size()));
+      if (GEPI->use_empty()) {
+        Changed = true;
+        GEPI->eraseFromParent();
+      }
+    }
+  }
+
+  return Changed;
+}
+
+
+/// OptimizeAwayTrappingUsesOfLoads - The specified global has only one non-null
+/// value stored into it.  If there are uses of the loaded value that would trap
+/// if the loaded value is dynamically null, then we know that they cannot be
+/// reachable with a null optimize away the load.
+static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
+  bool Changed = false;
+
+  // Keep track of whether we are able to remove all the uses of the global
+  // other than the store that defines it.
+  bool AllNonStoreUsesGone = true;
+
+  // Replace all uses of loads with uses of uses of the stored value.
+  for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
+    User *GlobalUser = *GUI++;
+    if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
+      Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
+      // If we were able to delete all uses of the loads
+      if (LI->use_empty()) {
+        LI->eraseFromParent();
+        Changed = true;
+      } else {
+        AllNonStoreUsesGone = false;
+      }
+    } else if (isa<StoreInst>(GlobalUser)) {
+      // Ignore the store that stores "LV" to the global.
+      assert(GlobalUser->getOperand(1) == GV &&
+             "Must be storing *to* the global");
+    } else {
+      AllNonStoreUsesGone = false;
+
+      // If we get here we could have other crazy uses that are transitively
+      // loaded.
+      assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
+              isa<ConstantExpr>(GlobalUser)) && "Only expect load and stores!");
+    }
+  }
+
+  if (Changed) {
+    DEBUG(dbgs() << "OPTIMIZED LOADS FROM STORED ONCE POINTER: " << *GV);
+    ++NumGlobUses;
+  }
+
+  // If we nuked all of the loads, then none of the stores are needed either,
+  // nor is the global.
+  if (AllNonStoreUsesGone) {
+    DEBUG(dbgs() << "  *** GLOBAL NOW DEAD!\n");
+    CleanupConstantGlobalUsers(GV, 0);
+    if (GV->use_empty()) {
+      GV->eraseFromParent();
+      ++NumDeleted;
+    }
+    Changed = true;
+  }
+  return Changed;
+}
+
+/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
+/// instructions that are foldable.
+static void ConstantPropUsersOf(Value *V) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
+    if (Instruction *I = dyn_cast<Instruction>(*UI++))
+      if (Constant *NewC = ConstantFoldInstruction(I)) {
+        I->replaceAllUsesWith(NewC);
+
+        // Advance UI to the next non-I use to avoid invalidating it!
+        // Instructions could multiply use V.
+        while (UI != E && *UI == I)
+          ++UI;
+        I->eraseFromParent();
+      }
+}
+
+/// OptimizeGlobalAddressOfMalloc - This function takes the specified global
+/// variable, and transforms the program as if it always contained the result of
+/// the specified malloc.  Because it is always the result of the specified
+/// malloc, there is no reason to actually DO the malloc.  Instead, turn the
+/// malloc into a global, and any loads of GV as uses of the new global.
+static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
+                                                     CallInst *CI,
+                                                     const Type *AllocTy,
+                                                     ConstantInt *NElements,
+                                                     TargetData* TD) {
+  DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
+
+  const Type *GlobalType;
+  if (NElements->getZExtValue() == 1)
+    GlobalType = AllocTy;
+  else
+    // If we have an array allocation, the global variable is of an array.
+    GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
+
+  // Create the new global variable.  The contents of the malloc'd memory is
+  // undefined, so initialize with an undef value.
+  GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
+                                             GlobalType, false,
+                                             GlobalValue::InternalLinkage,
+                                             UndefValue::get(GlobalType),
+                                             GV->getName()+".body",
+                                             GV,
+                                             GV->isThreadLocal());
+
+  // If there are bitcast users of the malloc (which is typical, usually we have
+  // a malloc + bitcast) then replace them with uses of the new global.  Update
+  // other users to use the global as well.
+  BitCastInst *TheBC = 0;
+  while (!CI->use_empty()) {
+    Instruction *User = cast<Instruction>(CI->use_back());
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+      if (BCI->getType() == NewGV->getType()) {
+        BCI->replaceAllUsesWith(NewGV);
+        BCI->eraseFromParent();
+      } else {
+        BCI->setOperand(0, NewGV);
+      }
+    } else {
+      if (TheBC == 0)
+        TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
+      User->replaceUsesOfWith(CI, TheBC);
+    }
+  }
+
+  Constant *RepValue = NewGV;
+  if (NewGV->getType() != GV->getType()->getElementType())
+    RepValue = ConstantExpr::getBitCast(RepValue,
+                                        GV->getType()->getElementType());
+
+  // If there is a comparison against null, we will insert a global bool to
+  // keep track of whether the global was initialized yet or not.
+  GlobalVariable *InitBool =
+    new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
+                       GlobalValue::InternalLinkage,
+                       ConstantInt::getFalse(GV->getContext()),
+                       GV->getName()+".init", GV->isThreadLocal());
+  bool InitBoolUsed = false;
+
+  // Loop over all uses of GV, processing them in turn.
+  while (!GV->use_empty()) {
+    if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) {
+      // The global is initialized when the store to it occurs.
+      new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI);
+      SI->eraseFromParent();
+      continue;
+    }
+
+    LoadInst *LI = cast<LoadInst>(GV->use_back());
+    while (!LI->use_empty()) {
+      Use &LoadUse = LI->use_begin().getUse();
+      if (!isa<ICmpInst>(LoadUse.getUser())) {
+        LoadUse = RepValue;
+        continue;
+      }
+
+      ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
+      // Replace the cmp X, 0 with a use of the bool value.
+      Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
+      InitBoolUsed = true;
+      switch (ICI->getPredicate()) {
+      default: llvm_unreachable("Unknown ICmp Predicate!");
+      case ICmpInst::ICMP_ULT:
+      case ICmpInst::ICMP_SLT:   // X < null -> always false
+        LV = ConstantInt::getFalse(GV->getContext());
+        break;
+      case ICmpInst::ICMP_ULE:
+      case ICmpInst::ICMP_SLE:
+      case ICmpInst::ICMP_EQ:
+        LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
+        break;
+      case ICmpInst::ICMP_NE:
+      case ICmpInst::ICMP_UGE:
+      case ICmpInst::ICMP_SGE:
+      case ICmpInst::ICMP_UGT:
+      case ICmpInst::ICMP_SGT:
+        break;  // no change.
+      }
+      ICI->replaceAllUsesWith(LV);
+      ICI->eraseFromParent();
+    }
+    LI->eraseFromParent();
+  }
+
+  // If the initialization boolean was used, insert it, otherwise delete it.
+  if (!InitBoolUsed) {
+    while (!InitBool->use_empty())  // Delete initializations
+      cast<StoreInst>(InitBool->use_back())->eraseFromParent();
+    delete InitBool;
+  } else
+    GV->getParent()->getGlobalList().insert(GV, InitBool);
+
+  // Now the GV is dead, nuke it and the malloc..
+  GV->eraseFromParent();
+  CI->eraseFromParent();
+
+  // To further other optimizations, loop over all users of NewGV and try to
+  // constant prop them.  This will promote GEP instructions with constant
+  // indices into GEP constant-exprs, which will allow global-opt to hack on it.
+  ConstantPropUsersOf(NewGV);
+  if (RepValue != NewGV)
+    ConstantPropUsersOf(RepValue);
+
+  return NewGV;
+}
+
+/// ValueIsOnlyUsedLocallyOrStoredToOneGlobal - Scan the use-list of V checking
+/// to make sure that there are no complex uses of V.  We permit simple things
+/// like dereferencing the pointer, but not storing through the address, unless
+/// it is to the specified global.
+static bool ValueIsOnlyUsedLocallyOrStoredToOneGlobal(const Instruction *V,
+                                                      const GlobalVariable *GV,
+                                         SmallPtrSet<const PHINode*, 8> &PHIs) {
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    const Instruction *Inst = cast<Instruction>(*UI);
+
+    if (isa<LoadInst>(Inst) || isa<CmpInst>(Inst)) {
+      continue; // Fine, ignore.
+    }
+
+    if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      if (SI->getOperand(0) == V && SI->getOperand(1) != GV)
+        return false;  // Storing the pointer itself... bad.
+      continue; // Otherwise, storing through it, or storing into GV... fine.
+    }
+
+    // Must index into the array and into the struct.
+    if (isa<GetElementPtrInst>(Inst) && Inst->getNumOperands() >= 3) {
+      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(Inst, GV, PHIs))
+        return false;
+      continue;
+    }
+
+    if (const PHINode *PN = dyn_cast<PHINode>(Inst)) {
+      // PHIs are ok if all uses are ok.  Don't infinitely recurse through PHI
+      // cycles.
+      if (PHIs.insert(PN))
+        if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(PN, GV, PHIs))
+          return false;
+      continue;
+    }
+
+    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Inst)) {
+      if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
+        return false;
+      continue;
+    }
+
+    return false;
+  }
+  return true;
+}
+
+/// ReplaceUsesOfMallocWithGlobal - The Alloc pointer is stored into GV
+/// somewhere.  Transform all uses of the allocation into loads from the
+/// global and uses of the resultant pointer.  Further, delete the store into
+/// GV.  This assumes that these value pass the
+/// 'ValueIsOnlyUsedLocallyOrStoredToOneGlobal' predicate.
+static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
+                                          GlobalVariable *GV) {
+  while (!Alloc->use_empty()) {
+    Instruction *U = cast<Instruction>(*Alloc->use_begin());
+    Instruction *InsertPt = U;
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      // If this is the store of the allocation into the global, remove it.
+      if (SI->getOperand(1) == GV) {
+        SI->eraseFromParent();
+        continue;
+      }
+    } else if (PHINode *PN = dyn_cast<PHINode>(U)) {
+      // Insert the load in the corresponding predecessor, not right before the
+      // PHI.
+      InsertPt = PN->getIncomingBlock(Alloc->use_begin())->getTerminator();
+    } else if (isa<BitCastInst>(U)) {
+      // Must be bitcast between the malloc and store to initialize the global.
+      ReplaceUsesOfMallocWithGlobal(U, GV);
+      U->eraseFromParent();
+      continue;
+    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+      // If this is a "GEP bitcast" and the user is a store to the global, then
+      // just process it as a bitcast.
+      if (GEPI->hasAllZeroIndices() && GEPI->hasOneUse())
+        if (StoreInst *SI = dyn_cast<StoreInst>(GEPI->use_back()))
+          if (SI->getOperand(1) == GV) {
+            // Must be bitcast GEP between the malloc and store to initialize
+            // the global.
+            ReplaceUsesOfMallocWithGlobal(GEPI, GV);
+            GEPI->eraseFromParent();
+            continue;
+          }
+    }
+
+    // Insert a load from the global, and use it instead of the malloc.
+    Value *NL = new LoadInst(GV, GV->getName()+".val", InsertPt);
+    U->replaceUsesOfWith(Alloc, NL);
+  }
+}
+
+/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
+/// of a load) are simple enough to perform heap SRA on.  This permits GEP's
+/// that index through the array and struct field, icmps of null, and PHIs.
+static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
+                        SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
+                        SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
+  // We permit two users of the load: setcc comparing against the null
+  // pointer, and a getelementptr of a specific form.
+  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
+       ++UI) {
+    const Instruction *User = cast<Instruction>(*UI);
+
+    // Comparison against null is ok.
+    if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
+      if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
+        return false;
+      continue;
+    }
+
+    // getelementptr is also ok, but only a simple form.
+    if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+      // Must index into the array and into the struct.
+      if (GEPI->getNumOperands() < 3)
+        return false;
+
+      // Otherwise the GEP is ok.
+      continue;
+    }
+
+    if (const PHINode *PN = dyn_cast<PHINode>(User)) {
+      if (!LoadUsingPHIsPerLoad.insert(PN))
+        // This means some phi nodes are dependent on each other.
+        // Avoid infinite looping!
+        return false;
+      if (!LoadUsingPHIs.insert(PN))
+        // If we have already analyzed this PHI, then it is safe.
+        continue;
+
+      // Make sure all uses of the PHI are simple enough to transform.
+      if (!LoadUsesSimpleEnoughForHeapSRA(PN,
+                                          LoadUsingPHIs, LoadUsingPHIsPerLoad))
+        return false;
+
+      continue;
+    }
+
+    // Otherwise we don't know what this is, not ok.
+    return false;
+  }
+
+  return true;
+}
+
+
+/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
+/// GV are simple enough to perform HeapSRA, return true.
+static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
+                                                    Instruction *StoredVal) {
+  SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
+  SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
+  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+       UI != E; ++UI)
+    if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+      if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
+                                          LoadUsingPHIsPerLoad))
+        return false;
+      LoadUsingPHIsPerLoad.clear();
+    }
+
+  // If we reach here, we know that all uses of the loads and transitive uses
+  // (through PHI nodes) are simple enough to transform.  However, we don't know
+  // that all inputs the to the PHI nodes are in the same equivalence sets.
+  // Check to verify that all operands of the PHIs are either PHIS that can be
+  // transformed, loads from GV, or MI itself.
+  for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin()
+       , E = LoadUsingPHIs.end(); I != E; ++I) {
+    const PHINode *PN = *I;
+    for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
+      Value *InVal = PN->getIncomingValue(op);
+
+      // PHI of the stored value itself is ok.
+      if (InVal == StoredVal) continue;
+
+      if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
+        // One of the PHIs in our set is (optimistically) ok.
+        if (LoadUsingPHIs.count(InPN))
+          continue;
+        return false;
+      }
+
+      // Load from GV is ok.
+      if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
+        if (LI->getOperand(0) == GV)
+          continue;
+
+      // UNDEF? NULL?
+
+      // Anything else is rejected.
+      return false;
+    }
+  }
+
+  return true;
+}
+
+static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
+               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+  std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
+
+  if (FieldNo >= FieldVals.size())
+    FieldVals.resize(FieldNo+1);
+
+  // If we already have this value, just reuse the previously scalarized
+  // version.
+  if (Value *FieldVal = FieldVals[FieldNo])
+    return FieldVal;
+
+  // Depending on what instruction this is, we have several cases.
+  Value *Result;
+  if (LoadInst *LI = dyn_cast<LoadInst>(V)) {
+    // This is a scalarized version of the load from the global.  Just create
+    // a new Load of the scalarized global.
+    Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
+                                           InsertedScalarizedValues,
+                                           PHIsToRewrite),
+                          LI->getName()+".f"+Twine(FieldNo), LI);
+  } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+    // PN's type is pointer to struct.  Make a new PHI of pointer to struct
+    // field.
+    const StructType *ST =
+      cast<StructType>(cast<PointerType>(PN->getType())->getElementType());
+
+    Result =
+     PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)),
+                     PN->getName()+".f"+Twine(FieldNo), PN);
+    PHIsToRewrite.push_back(std::make_pair(PN, FieldNo));
+  } else {
+    llvm_unreachable("Unknown usable value");
+    Result = 0;
+  }
+
+  return FieldVals[FieldNo] = Result;
+}
+
+/// RewriteHeapSROALoadUser - Given a load instruction and a value derived from
+/// the load, rewrite the derived value to use the HeapSRoA'd load.
+static void RewriteHeapSROALoadUser(Instruction *LoadUser,
+             DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+  // If this is a comparison against null, handle it.
+  if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
+    assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
+    // If we have a setcc of the loaded pointer, we can use a setcc of any
+    // field.
+    Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
+                                   InsertedScalarizedValues, PHIsToRewrite);
+
+    Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
+                              Constant::getNullValue(NPtr->getType()),
+                              SCI->getName());
+    SCI->replaceAllUsesWith(New);
+    SCI->eraseFromParent();
+    return;
+  }
+
+  // Handle 'getelementptr Ptr, Idx, i32 FieldNo ...'
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(LoadUser)) {
+    assert(GEPI->getNumOperands() >= 3 && isa<ConstantInt>(GEPI->getOperand(2))
+           && "Unexpected GEPI!");
+
+    // Load the pointer for this field.
+    unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
+    Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
+                                     InsertedScalarizedValues, PHIsToRewrite);
+
+    // Create the new GEP idx vector.
+    SmallVector<Value*, 8> GEPIdx;
+    GEPIdx.push_back(GEPI->getOperand(1));
+    GEPIdx.append(GEPI->op_begin()+3, GEPI->op_end());
+
+    Value *NGEPI = GetElementPtrInst::Create(NewPtr,
+                                             GEPIdx.begin(), GEPIdx.end(),
+                                             GEPI->getName(), GEPI);
+    GEPI->replaceAllUsesWith(NGEPI);
+    GEPI->eraseFromParent();
+    return;
+  }
+
+  // Recursively transform the users of PHI nodes.  This will lazily create the
+  // PHIs that are needed for individual elements.  Keep track of what PHIs we
+  // see in InsertedScalarizedValues so that we don't get infinite loops (very
+  // antisocial).  If the PHI is already in InsertedScalarizedValues, it has
+  // already been seen first by another load, so its uses have already been
+  // processed.
+  PHINode *PN = cast<PHINode>(LoadUser);
+  bool Inserted;
+  DenseMap<Value*, std::vector<Value*> >::iterator InsertPos;
+  tie(InsertPos, Inserted) =
+    InsertedScalarizedValues.insert(std::make_pair(PN, std::vector<Value*>()));
+  if (!Inserted) return;
+
+  // If this is the first time we've seen this PHI, recursively process all
+  // users.
+  for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
+    Instruction *User = cast<Instruction>(*UI++);
+    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+  }
+}
+
+/// RewriteUsesOfLoadForHeapSRoA - We are performing Heap SRoA on a global.  Ptr
+/// is a value loaded from the global.  Eliminate all uses of Ptr, making them
+/// use FieldGlobals instead.  All uses of loaded values satisfy
+/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
+static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
+               DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
+                   std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
+  for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
+       UI != E; ) {
+    Instruction *User = cast<Instruction>(*UI++);
+    RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
+  }
+
+  if (Load->use_empty()) {
+    Load->eraseFromParent();
+    InsertedScalarizedValues.erase(Load);
+  }
+}
+
+/// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
+/// it up into multiple allocations of arrays of the fields.
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
+                                            Value* NElems, TargetData *TD) {
+  DEBUG(dbgs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
+  const Type* MAT = getMallocAllocatedType(CI);
+  const StructType *STy = cast<StructType>(MAT);
+
+  // There is guaranteed to be at least one use of the malloc (storing
+  // it into GV).  If there are other uses, change them to be uses of
+  // the global to simplify later code.  This also deletes the store
+  // into GV.
+  ReplaceUsesOfMallocWithGlobal(CI, GV);
+
+  // Okay, at this point, there are no users of the malloc.  Insert N
+  // new mallocs at the same place as CI, and N globals.
+  std::vector<Value*> FieldGlobals;
+  std::vector<Value*> FieldMallocs;
+
+  for (unsigned FieldNo = 0, e = STy->getNumElements(); FieldNo != e;++FieldNo){
+    const Type *FieldTy = STy->getElementType(FieldNo);
+    const PointerType *PFieldTy = PointerType::getUnqual(FieldTy);
+
+    GlobalVariable *NGV =
+      new GlobalVariable(*GV->getParent(),
+                         PFieldTy, false, GlobalValue::InternalLinkage,
+                         Constant::getNullValue(PFieldTy),
+                         GV->getName() + ".f" + Twine(FieldNo), GV,
+                         GV->isThreadLocal());
+    FieldGlobals.push_back(NGV);
+
+    unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
+    if (const StructType *ST = dyn_cast<StructType>(FieldTy))
+      TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
+    const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+    Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
+                                        ConstantInt::get(IntPtrTy, TypeSize),
+                                        NElems, 0,
+                                        CI->getName() + ".f" + Twine(FieldNo));
+    FieldMallocs.push_back(NMI);
+    new StoreInst(NMI, NGV, CI);
+  }
+
+  // The tricky aspect of this transformation is handling the case when malloc
+  // fails.  In the original code, malloc failing would set the result pointer
+  // of malloc to null.  In this case, some mallocs could succeed and others
+  // could fail.  As such, we emit code that looks like this:
+  //    F0 = malloc(field0)
+  //    F1 = malloc(field1)
+  //    F2 = malloc(field2)
+  //    if (F0 == 0 || F1 == 0 || F2 == 0) {
+  //      if (F0) { free(F0); F0 = 0; }
+  //      if (F1) { free(F1); F1 = 0; }
+  //      if (F2) { free(F2); F2 = 0; }
+  //    }
+  // The malloc can also fail if its argument is too large.
+  Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
+  Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
+                                  ConstantZero, "isneg");
+  for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
+    Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
+                             Constant::getNullValue(FieldMallocs[i]->getType()),
+                               "isnull");
+    RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
+  }
+
+  // Split the basic block at the old malloc.
+  BasicBlock *OrigBB = CI->getParent();
+  BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
+
+  // Create the block to check the first condition.  Put all these blocks at the
+  // end of the function as they are unlikely to be executed.
+  BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
+                                                "malloc_ret_null",
+                                                OrigBB->getParent());
+
+  // Remove the uncond branch from OrigBB to ContBB, turning it into a cond
+  // branch on RunningOr.
+  OrigBB->getTerminator()->eraseFromParent();
+  BranchInst::Create(NullPtrBlock, ContBB, RunningOr, OrigBB);
+
+  // Within the NullPtrBlock, we need to emit a comparison and branch for each
+  // pointer, because some may be null while others are not.
+  for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+    Value *GVVal = new LoadInst(FieldGlobals[i], "tmp", NullPtrBlock);
+    Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
+                              Constant::getNullValue(GVVal->getType()),
+                              "tmp");
+    BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
+                                               OrigBB->getParent());
+    BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
+                                               OrigBB->getParent());
+    Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
+                                         Cmp, NullPtrBlock);
+
+    // Fill in FreeBlock.
+    CallInst::CreateFree(GVVal, BI);
+    new StoreInst(Constant::getNullValue(GVVal->getType()), FieldGlobals[i],
+                  FreeBlock);
+    BranchInst::Create(NextBlock, FreeBlock);
+
+    NullPtrBlock = NextBlock;
+  }
+
+  BranchInst::Create(ContBB, NullPtrBlock);
+
+  // CI is no longer needed, remove it.
+  CI->eraseFromParent();
+
+  /// InsertedScalarizedLoads - As we process loads, if we can't immediately
+  /// update all uses of the load, keep track of what scalarized loads are
+  /// inserted for a given load.
+  DenseMap<Value*, std::vector<Value*> > InsertedScalarizedValues;
+  InsertedScalarizedValues[GV] = FieldGlobals;
+
+  std::vector<std::pair<PHINode*, unsigned> > PHIsToRewrite;
+
+  // Okay, the malloc site is completely handled.  All of the uses of GV are now
+  // loads, and all uses of those loads are simple.  Rewrite them to use loads
+  // of the per-field globals instead.
+  for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;) {
+    Instruction *User = cast<Instruction>(*UI++);
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
+      continue;
+    }
+
+    // Must be a store of null.
+    StoreInst *SI = cast<StoreInst>(User);
+    assert(isa<ConstantPointerNull>(SI->getOperand(0)) &&
+           "Unexpected heap-sra user!");
+
+    // Insert a store of null into each global.
+    for (unsigned i = 0, e = FieldGlobals.size(); i != e; ++i) {
+      const PointerType *PT = cast<PointerType>(FieldGlobals[i]->getType());
+      Constant *Null = Constant::getNullValue(PT->getElementType());
+      new StoreInst(Null, FieldGlobals[i], SI);
+    }
+    // Erase the original store.
+    SI->eraseFromParent();
+  }
+
+  // While we have PHIs that are interesting to rewrite, do it.
+  while (!PHIsToRewrite.empty()) {
+    PHINode *PN = PHIsToRewrite.back().first;
+    unsigned FieldNo = PHIsToRewrite.back().second;
+    PHIsToRewrite.pop_back();
+    PHINode *FieldPN = cast<PHINode>(InsertedScalarizedValues[PN][FieldNo]);
+    assert(FieldPN->getNumIncomingValues() == 0 &&"Already processed this phi");
+
+    // Add all the incoming values.  This can materialize more phis.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *InVal = PN->getIncomingValue(i);
+      InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
+                               PHIsToRewrite);
+      FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
+    }
+  }
+
+  // Drop all inter-phi links and any loads that made it this far.
+  for (DenseMap<Value*, std::vector<Value*> >::iterator
+       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+       I != E; ++I) {
+    if (PHINode *PN = dyn_cast<PHINode>(I->first))
+      PN->dropAllReferences();
+    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+      LI->dropAllReferences();
+  }
+
+  // Delete all the phis and loads now that inter-references are dead.
+  for (DenseMap<Value*, std::vector<Value*> >::iterator
+       I = InsertedScalarizedValues.begin(), E = InsertedScalarizedValues.end();
+       I != E; ++I) {
+    if (PHINode *PN = dyn_cast<PHINode>(I->first))
+      PN->eraseFromParent();
+    else if (LoadInst *LI = dyn_cast<LoadInst>(I->first))
+      LI->eraseFromParent();
+  }
+
+  // The old global is now dead, remove it.
+  GV->eraseFromParent();
+
+  ++NumHeapSRA;
+  return cast<GlobalVariable>(FieldGlobals[0]);
+}
+
+/// TryToOptimizeStoreOfMallocToGlobal - This function is called when we see a
+/// pointer global variable with a single value stored it that is a malloc or
+/// cast of malloc.
+static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
+                                               CallInst *CI,
+                                               const Type *AllocTy,
+                                               Module::global_iterator &GVI,
+                                               TargetData *TD) {
+  if (!TD)
+    return false;
+
+  // If this is a malloc of an abstract type, don't touch it.
+  if (!AllocTy->isSized())
+    return false;
+
+  // We can't optimize this global unless all uses of it are *known* to be
+  // of the malloc value, not of the null initializer value (consider a use
+  // that compares the global's value against zero to see if the malloc has
+  // been reached).  To do this, we check to see if all uses of the global
+  // would trap if the global were null: this proves that they must all
+  // happen after the malloc.
+  if (!AllUsesOfLoadedValueWillTrapIfNull(GV))
+    return false;
+
+  // We can't optimize this if the malloc itself is used in a complex way,
+  // for example, being stored into multiple globals.  This allows the
+  // malloc to be stored into the specified global, loaded setcc'd, and
+  // GEP'd.  These are all things we could transform to using the global
+  // for.
+  SmallPtrSet<const PHINode*, 8> PHIs;
+  if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
+    return false;
+
+  // If we have a global that is only initialized with a fixed size malloc,
+  // transform the program to use global memory instead of malloc'd memory.
+  // This eliminates dynamic allocation, avoids an indirection accessing the
+  // data, and exposes the resultant global to further GlobalOpt.
+  // We cannot optimize the malloc if we cannot determine malloc array size.
+  Value *NElems = getMallocArraySize(CI, TD, true);
+  if (!NElems)
+    return false;
+
+  if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
+    // Restrict this transformation to only working on small allocations
+    // (2048 bytes currently), as we don't want to introduce a 16M global or
+    // something.
+    if (NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
+      GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
+      return true;
+    }
+
+  // If the allocation is an array of structures, consider transforming this
+  // into multiple malloc'd arrays, one for each field.  This is basically
+  // SRoA for malloc'd memory.
+
+  // If this is an allocation of a fixed size array of structs, analyze as a
+  // variable size array.  malloc [100 x struct],1 -> malloc struct, 100
+  if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
+    if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
+      AllocTy = AT->getElementType();
+
+  const StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
+  if (!AllocSTy)
+    return false;
+
+  // This the structure has an unreasonable number of fields, leave it
+  // alone.
+  if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
+      AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
+
+    // If this is a fixed size array, transform the Malloc to be an alloc of
+    // structs.  malloc [100 x struct],1 -> malloc struct, 100
+    if (const ArrayType *AT = dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
+      const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
+      unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
+      Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
+      Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
+      Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
+                                                   AllocSize, NumElements,
+                                                   0, CI->getName());
+      Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
+      CI->replaceAllUsesWith(Cast);
+      CI->eraseFromParent();
+      CI = dyn_cast<BitCastInst>(Malloc) ?
+        extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
+    }
+
+    GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
+    return true;
+  }
+
+  return false;
+}
+
+// OptimizeOnceStoredGlobal - Try to optimize globals based on the knowledge
+// that only one value (besides its initializer) is ever stored to the global.
+static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
+                                     Module::global_iterator &GVI,
+                                     TargetData *TD) {
+  // Ignore no-op GEPs and bitcasts.
+  StoredOnceVal = StoredOnceVal->stripPointerCasts();
+
+  // If we are dealing with a pointer global that is initialized to null and
+  // only has one (non-null) value stored into it, then we can optimize any
+  // users of the loaded value (often calls and loads) that would trap if the
+  // value was null.
+  if (GV->getInitializer()->getType()->isPointerTy() &&
+      GV->getInitializer()->isNullValue()) {
+    if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
+      if (GV->getInitializer()->getType() != SOVC->getType())
+        SOVC =
+         ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
+
+      // Optimize away any trapping uses of the loaded value.
+      if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC))
+        return true;
+    } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
+      const Type* MallocType = getMallocAllocatedType(CI);
+      if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
+                                                           GVI, TD))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+/// TryToShrinkGlobalToBoolean - At this point, we have learned that the only
+/// two values ever stored into GV are its initializer and OtherVal.  See if we
+/// can shrink the global into a boolean and select between the two values
+/// whenever it is used.  This exposes the values to other scalar optimizations.
+static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
+  const Type *GVElType = GV->getType()->getElementType();
+
+  // If GVElType is already i1, it is already shrunk.  If the type of the GV is
+  // an FP value, pointer or vector, don't do this optimization because a select
+  // between them is very expensive and unlikely to lead to later
+  // simplification.  In these cases, we typically end up with "cond ? v1 : v2"
+  // where v1 and v2 both require constant pool loads, a big loss.
+  if (GVElType == Type::getInt1Ty(GV->getContext()) ||
+      GVElType->isFloatingPointTy() ||
+      GVElType->isPointerTy() || GVElType->isVectorTy())
+    return false;
+
+  // Walk the use list of the global seeing if all the uses are load or store.
+  // If there is anything else, bail out.
+  for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
+    User *U = *I;
+    if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
+      return false;
+  }
+
+  DEBUG(dbgs() << "   *** SHRINKING TO BOOL: " << *GV);
+
+  // Create the new global, initializing it to false.
+  GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
+                                             false,
+                                             GlobalValue::InternalLinkage,
+                                        ConstantInt::getFalse(GV->getContext()),
+                                             GV->getName()+".b",
+                                             GV->isThreadLocal());
+  GV->getParent()->getGlobalList().insert(GV, NewGV);
+
+  Constant *InitVal = GV->getInitializer();
+  assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
+         "No reason to shrink to bool!");
+
+  // If initialized to zero and storing one into the global, we can use a cast
+  // instead of a select to synthesize the desired value.
+  bool IsOneZero = false;
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(OtherVal))
+    IsOneZero = InitVal->isNullValue() && CI->isOne();
+
+  while (!GV->use_empty()) {
+    Instruction *UI = cast<Instruction>(GV->use_back());
+    if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
+      // Change the store into a boolean store.
+      bool StoringOther = SI->getOperand(0) == OtherVal;
+      // Only do this if we weren't storing a loaded value.
+      Value *StoreVal;
+      if (StoringOther || SI->getOperand(0) == InitVal)
+        StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
+                                    StoringOther);
+      else {
+        // Otherwise, we are storing a previously loaded copy.  To do this,
+        // change the copy from copying the original value to just copying the
+        // bool.
+        Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
+
+        // If we've already replaced the input, StoredVal will be a cast or
+        // select instruction.  If not, it will be a load of the original
+        // global.
+        if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+          assert(LI->getOperand(0) == GV && "Not a copy!");
+          // Insert a new load, to preserve the saved value.
+          StoreVal = new LoadInst(NewGV, LI->getName()+".b", LI);
+        } else {
+          assert((isa<CastInst>(StoredVal) || isa<SelectInst>(StoredVal)) &&
+                 "This is not a form that we understand!");
+          StoreVal = StoredVal->getOperand(0);
+          assert(isa<LoadInst>(StoreVal) && "Not a load of NewGV!");
+        }
+      }
+      new StoreInst(StoreVal, NewGV, SI);
+    } else {
+      // Change the load into a load of bool then a select.
+      LoadInst *LI = cast<LoadInst>(UI);
+      LoadInst *NLI = new LoadInst(NewGV, LI->getName()+".b", LI);
+      Value *NSI;
+      if (IsOneZero)
+        NSI = new ZExtInst(NLI, LI->getType(), "", LI);
+      else
+        NSI = SelectInst::Create(NLI, OtherVal, InitVal, "", LI);
+      NSI->takeName(LI);
+      LI->replaceAllUsesWith(NSI);
+    }
+    UI->eraseFromParent();
+  }
+
+  GV->eraseFromParent();
+  return true;
+}
+
+
+/// ProcessInternalGlobal - Analyze the specified global variable and optimize
+/// it if possible.  If we make a change, return true.
+bool GlobalOpt::ProcessGlobal(GlobalVariable *GV,
+                              Module::global_iterator &GVI) {
+  if (!GV->hasLocalLinkage())
+    return false;
+
+  // Do more involved optimizations if the global is internal.
+  GV->removeDeadConstantUsers();
+
+  if (GV->use_empty()) {
+    DEBUG(dbgs() << "GLOBAL DEAD: " << *GV);
+    GV->eraseFromParent();
+    ++NumDeleted;
+    return true;
+  }
+
+  SmallPtrSet<const PHINode*, 16> PHIUsers;
+  GlobalStatus GS;
+
+  if (AnalyzeGlobal(GV, GS, PHIUsers))
+    return false;
+
+  if (!GS.isCompared && !GV->hasUnnamedAddr()) {
+    GV->setUnnamedAddr(true);
+    NumUnnamed++;
+  }
+
+  if (GV->isConstant() || !GV->hasInitializer())
+    return false;
+
+  return ProcessInternalGlobal(GV, GVI, PHIUsers, GS);
+}
+
+/// ProcessInternalGlobal - Analyze the specified global variable and optimize
+/// it if possible.  If we make a change, return true.
+bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
+                                      Module::global_iterator &GVI,
+                                      const SmallPtrSet<const PHINode*, 16> &PHIUsers,
+                                      const GlobalStatus &GS) {
+  // If this is a first class global and has only one accessing function
+  // and this function is main (which we know is not recursive we can make
+  // this global a local variable) we replace the global with a local alloca
+  // in this function.
+  //
+  // NOTE: It doesn't make sense to promote non single-value types since we
+  // are just replacing static memory to stack memory.
+  //
+  // If the global is in different address space, don't bring it to stack.
+  if (!GS.HasMultipleAccessingFunctions &&
+      GS.AccessingFunction && !GS.HasNonInstructionUser &&
+      GV->getType()->getElementType()->isSingleValueType() &&
+      GS.AccessingFunction->getName() == "main" &&
+      GS.AccessingFunction->hasExternalLinkage() &&
+      GV->getType()->getAddressSpace() == 0) {
+    DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
+    Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction
+                                                   ->getEntryBlock().begin());
+    const Type* ElemTy = GV->getType()->getElementType();
+    // FIXME: Pass Global's alignment when globals have alignment
+    AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
+    if (!isa<UndefValue>(GV->getInitializer()))
+      new StoreInst(GV->getInitializer(), Alloca, &FirstI);
+
+    GV->replaceAllUsesWith(Alloca);
+    GV->eraseFromParent();
+    ++NumLocalized;
+    return true;
+  }
+
+  // If the global is never loaded (but may be stored to), it is dead.
+  // Delete it now.
+  if (!GS.isLoaded) {
+    DEBUG(dbgs() << "GLOBAL NEVER LOADED: " << *GV);
+
+    // Delete any stores we can find to the global.  We may not be able to
+    // make it completely dead though.
+    bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+    // If the global is dead now, delete it.
+    if (GV->use_empty()) {
+      GV->eraseFromParent();
+      ++NumDeleted;
+      Changed = true;
+    }
+    return Changed;
+
+  } else if (GS.StoredType <= GlobalStatus::isInitializerStored) {
+    DEBUG(dbgs() << "MARKING CONSTANT: " << *GV);
+    GV->setConstant(true);
+
+    // Clean up any obviously simplifiable users now.
+    CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+    // If the global is dead now, just nuke it.
+    if (GV->use_empty()) {
+      DEBUG(dbgs() << "   *** Marking constant allowed us to simplify "
+            << "all users and delete global!\n");
+      GV->eraseFromParent();
+      ++NumDeleted;
+    }
+
+    ++NumMarked;
+    return true;
+  } else if (!GV->getInitializer()->getType()->isSingleValueType()) {
+    if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
+      if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
+        GVI = FirstNewGV;  // Don't skip the newly produced globals!
+        return true;
+      }
+  } else if (GS.StoredType == GlobalStatus::isStoredOnce) {
+    // If the initial value for the global was an undef value, and if only
+    // one other value was stored into it, we can just change the
+    // initializer to be the stored value, then delete all stores to the
+    // global.  This allows us to mark it constant.
+    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+      if (isa<UndefValue>(GV->getInitializer())) {
+        // Change the initial value here.
+        GV->setInitializer(SOVConstant);
+
+        // Clean up any obviously simplifiable users now.
+        CleanupConstantGlobalUsers(GV, GV->getInitializer());
+
+        if (GV->use_empty()) {
+          DEBUG(dbgs() << "   *** Substituting initializer allowed us to "
+                << "simplify all users and delete global!\n");
+          GV->eraseFromParent();
+          ++NumDeleted;
+        } else {
+          GVI = GV;
+        }
+        ++NumSubstitute;
+        return true;
+      }
+
+    // Try to optimize globals based on the knowledge that only one value
+    // (besides its initializer) is ever stored to the global.
+    if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
+                                 getAnalysisIfAvailable<TargetData>()))
+      return true;
+
+    // Otherwise, if the global was not a boolean, we can shrink it to be a
+    // boolean.
+    if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
+      if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
+        ++NumShrunkToBool;
+        return true;
+      }
+  }
+
+  return false;
+}
+
+/// ChangeCalleesToFastCall - Walk all of the direct calls of the specified
+/// function, changing them to FastCC.
+static void ChangeCalleesToFastCall(Function *F) {
+  for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
+    CallSite User(cast<Instruction>(*UI));
+    User.setCallingConv(CallingConv::Fast);
+  }
+}
+
+static AttrListPtr StripNest(const AttrListPtr &Attrs) {
+  for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
+    if ((Attrs.getSlot(i).Attrs & Attribute::Nest) == 0)
+      continue;
+
+    // There can be only one.
+    return Attrs.removeAttr(Attrs.getSlot(i).Index, Attribute::Nest);
+  }
+
+  return Attrs;
+}
+
+static void RemoveNestAttribute(Function *F) {
+  F->setAttributes(StripNest(F->getAttributes()));
+  for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){
+    CallSite User(cast<Instruction>(*UI));
+    User.setAttributes(StripNest(User.getAttributes()));
+  }
+}
+
+bool GlobalOpt::OptimizeFunctions(Module &M) {
+  bool Changed = false;
+  // Optimize functions.
+  for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) {
+    Function *F = FI++;
+    // Functions without names cannot be referenced outside this module.
+    if (!F->hasName() && !F->isDeclaration())
+      F->setLinkage(GlobalValue::InternalLinkage);
+    F->removeDeadConstantUsers();
+    if (F->use_empty() && (F->hasLocalLinkage() || F->hasLinkOnceLinkage())) {
+      F->eraseFromParent();
+      Changed = true;
+      ++NumFnDeleted;
+    } else if (F->hasLocalLinkage()) {
+      if (F->getCallingConv() == CallingConv::C && !F->isVarArg() &&
+          !F->hasAddressTaken()) {
+        // If this function has C calling conventions, is not a varargs
+        // function, and is only called directly, promote it to use the Fast
+        // calling convention.
+        F->setCallingConv(CallingConv::Fast);
+        ChangeCalleesToFastCall(F);
+        ++NumFastCallFns;
+        Changed = true;
+      }
+
+      if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) &&
+          !F->hasAddressTaken()) {
+        // The function is not used by a trampoline intrinsic, so it is safe
+        // to remove the 'nest' attribute.
+        RemoveNestAttribute(F);
+        ++NumNestRemoved;
+        Changed = true;
+      }
+    }
+  }
+  return Changed;
+}
+
+bool GlobalOpt::OptimizeGlobalVars(Module &M) {
+  bool Changed = false;
+  for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+       GVI != E; ) {
+    GlobalVariable *GV = GVI++;
+    // Global variables without names cannot be referenced outside this module.
+    if (!GV->hasName() && !GV->isDeclaration())
+      GV->setLinkage(GlobalValue::InternalLinkage);
+    // Simplify the initializer.
+    if (GV->hasInitializer())
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
+        TargetData *TD = getAnalysisIfAvailable<TargetData>();
+        Constant *New = ConstantFoldConstantExpression(CE, TD);
+        if (New && New != CE)
+          GV->setInitializer(New);
+      }
+
+    Changed |= ProcessGlobal(GV, GVI);
+  }
+  return Changed;
+}
+
+/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all
+/// initializers have an init priority of 65535.
+GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) {
+  GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors");
+  if (GV == 0) return 0;
+  
+  // Found it, verify it's an array of { int, void()* }.
+  const ArrayType *ATy =dyn_cast<ArrayType>(GV->getType()->getElementType());
+  if (!ATy) return 0;
+  const StructType *STy = dyn_cast<StructType>(ATy->getElementType());
+  if (!STy || STy->getNumElements() != 2 ||
+      !STy->getElementType(0)->isIntegerTy(32)) return 0;
+  const PointerType *PFTy = dyn_cast<PointerType>(STy->getElementType(1));
+  if (!PFTy) return 0;
+  const FunctionType *FTy = dyn_cast<FunctionType>(PFTy->getElementType());
+  if (!FTy || !FTy->getReturnType()->isVoidTy() ||
+      FTy->isVarArg() || FTy->getNumParams() != 0)
+    return 0;
+
+  // Verify that the initializer is simple enough for us to handle. We are
+  // only allowed to optimize the initializer if it is unique.
+  if (!GV->hasUniqueInitializer()) return 0;
+  
+  ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!CA) return 0;
+  
+  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+    ConstantStruct *CS = dyn_cast<ConstantStruct>(*i);
+    if (CS == 0) return 0;
+    
+    if (isa<ConstantPointerNull>(CS->getOperand(1)))
+      continue;
+
+    // Must have a function or null ptr.
+    if (!isa<Function>(CS->getOperand(1)))
+      return 0;
+
+    // Init priority must be standard.
+    ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
+    if (!CI || CI->getZExtValue() != 65535)
+      return 0;
+  }
+
+  return GV;
+}
+
+/// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand,
+/// return a list of the functions and null terminator as a vector.
+static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
+  ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
+  std::vector<Function*> Result;
+  Result.reserve(CA->getNumOperands());
+  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
+    ConstantStruct *CS = cast<ConstantStruct>(*i);
+    Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
+  }
+  return Result;
+}
+
+/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
+/// specified array, returning the new global to use.
+static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
+                                          const std::vector<Function*> &Ctors) {
+  // If we made a change, reassemble the initializer list.
+  std::vector<Constant*> CSVals;
+  CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535));
+  CSVals.push_back(0);
+
+  // Create the new init list.
+  std::vector<Constant*> CAList;
+  for (unsigned i = 0, e = Ctors.size(); i != e; ++i) {
+    if (Ctors[i]) {
+      CSVals[1] = Ctors[i];
+    } else {
+      const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
+                                          false);
+      const PointerType *PFTy = PointerType::getUnqual(FTy);
+      CSVals[1] = Constant::getNullValue(PFTy);
+      CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
+                                   2147483647);
+    }
+    CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false));
+  }
+
+  // Create the array initializer.
+  const Type *StructTy =
+      cast<ArrayType>(GCL->getType()->getElementType())->getElementType();
+  Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
+                                                   CAList.size()), CAList);
+
+  // If we didn't change the number of elements, don't create a new GV.
+  if (CA->getType() == GCL->getInitializer()->getType()) {
+    GCL->setInitializer(CA);
+    return GCL;
+  }
+
+  // Create the new global and insert it next to the existing list.
+  GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
+                                           GCL->getLinkage(), CA, "",
+                                           GCL->isThreadLocal());
+  GCL->getParent()->getGlobalList().insert(GCL, NGV);
+  NGV->takeName(GCL);
+
+  // Nuke the old list, replacing any uses with the new one.
+  if (!GCL->use_empty()) {
+    Constant *V = NGV;
+    if (V->getType() != GCL->getType())
+      V = ConstantExpr::getBitCast(V, GCL->getType());
+    GCL->replaceAllUsesWith(V);
+  }
+  GCL->eraseFromParent();
+
+  if (Ctors.size())
+    return NGV;
+  else
+    return 0;
+}
+
+
+static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues, Value *V) {
+  if (Constant *CV = dyn_cast<Constant>(V)) return CV;
+  Constant *R = ComputedValues[V];
+  assert(R && "Reference to an uncomputed value!");
+  return R;
+}
+
+static inline bool 
+isSimpleEnoughValueToCommit(Constant *C,
+                            SmallPtrSet<Constant*, 8> &SimpleConstants);
+
+
+/// isSimpleEnoughValueToCommit - Return true if the specified constant can be
+/// handled by the code generator.  We don't want to generate something like:
+///   void *X = &X/42;
+/// because the code generator doesn't have a relocation that can handle that.
+///
+/// This function should be called if C was not found (but just got inserted)
+/// in SimpleConstants to avoid having to rescan the same constants all the
+/// time.
+static bool isSimpleEnoughValueToCommitHelper(Constant *C,
+                                   SmallPtrSet<Constant*, 8> &SimpleConstants) {
+  // Simple integer, undef, constant aggregate zero, global addresses, etc are
+  // all supported.
+  if (C->getNumOperands() == 0 || isa<BlockAddress>(C) ||
+      isa<GlobalValue>(C))
+    return true;
+  
+  // Aggregate values are safe if all their elements are.
+  if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
+      isa<ConstantVector>(C)) {
+    for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+      Constant *Op = cast<Constant>(C->getOperand(i));
+      if (!isSimpleEnoughValueToCommit(Op, SimpleConstants))
+        return false;
+    }
+    return true;
+  }
+  
+  // We don't know exactly what relocations are allowed in constant expressions,
+  // so we allow &global+constantoffset, which is safe and uniformly supported
+  // across targets.
+  ConstantExpr *CE = cast<ConstantExpr>(C);
+  switch (CE->getOpcode()) {
+  case Instruction::BitCast:
+  case Instruction::IntToPtr:
+  case Instruction::PtrToInt:
+    // These casts are always fine if the casted value is.
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+      
+  // GEP is fine if it is simple + constant offset.
+  case Instruction::GetElementPtr:
+    for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+      if (!isa<ConstantInt>(CE->getOperand(i)))
+        return false;
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+      
+  case Instruction::Add:
+    // We allow simple+cst.
+    if (!isa<ConstantInt>(CE->getOperand(1)))
+      return false;
+    return isSimpleEnoughValueToCommit(CE->getOperand(0), SimpleConstants);
+  }
+  return false;
+}
+
+static inline bool 
+isSimpleEnoughValueToCommit(Constant *C,
+                            SmallPtrSet<Constant*, 8> &SimpleConstants) {
+  // If we already checked this constant, we win.
+  if (!SimpleConstants.insert(C)) return true;
+  // Check the constant.
+  return isSimpleEnoughValueToCommitHelper(C, SimpleConstants);
+}
+
+
+/// isSimpleEnoughPointerToCommit - Return true if this constant is simple
+/// enough for us to understand.  In particular, if it is a cast to anything
+/// other than from one pointer type to another pointer type, we punt.
+/// We basically just support direct accesses to globals and GEP's of
+/// globals.  This should be kept up to date with CommitValueTo.
+static bool isSimpleEnoughPointerToCommit(Constant *C) {
+  // Conservatively, avoid aggregate types. This is because we don't
+  // want to worry about them partially overlapping other stores.
+  if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
+    return false;
+
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C))
+    // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+    // external globals.
+    return GV->hasUniqueInitializer();
+
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    // Handle a constantexpr gep.
+    if (CE->getOpcode() == Instruction::GetElementPtr &&
+        isa<GlobalVariable>(CE->getOperand(0)) &&
+        cast<GEPOperator>(CE)->isInBounds()) {
+      GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+      // external globals.
+      if (!GV->hasUniqueInitializer())
+        return false;
+
+      // The first index must be zero.
+      ConstantInt *CI = dyn_cast<ConstantInt>(*llvm::next(CE->op_begin()));
+      if (!CI || !CI->isZero()) return false;
+
+      // The remaining indices must be compile-time known integers within the
+      // notional bounds of the corresponding static array types.
+      if (!CE->isGEPWithNoNotionalOverIndexing())
+        return false;
+
+      return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+    
+    // A constantexpr bitcast from a pointer to another pointer is a no-op,
+    // and we know how to evaluate it by moving the bitcast from the pointer
+    // operand to the value operand.
+    } else if (CE->getOpcode() == Instruction::BitCast &&
+               isa<GlobalVariable>(CE->getOperand(0))) {
+      // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or
+      // external globals.
+      return cast<GlobalVariable>(CE->getOperand(0))->hasUniqueInitializer();
+    }
+  }
+  
+  return false;
+}
+
+/// EvaluateStoreInto - Evaluate a piece of a constantexpr store into a global
+/// initializer.  This returns 'Init' modified to reflect 'Val' stored into it.
+/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
+static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
+                                   ConstantExpr *Addr, unsigned OpNo) {
+  // Base case of the recursion.
+  if (OpNo == Addr->getNumOperands()) {
+    assert(Val->getType() == Init->getType() && "Type mismatch!");
+    return Val;
+  }
+
+  std::vector<Constant*> Elts;
+  if (const StructType *STy = dyn_cast<StructType>(Init->getType())) {
+
+    // Break up the constant into its elements.
+    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Init)) {
+      for (User::op_iterator i = CS->op_begin(), e = CS->op_end(); i != e; ++i)
+        Elts.push_back(cast<Constant>(*i));
+    } else if (isa<ConstantAggregateZero>(Init)) {
+      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+        Elts.push_back(Constant::getNullValue(STy->getElementType(i)));
+    } else if (isa<UndefValue>(Init)) {
+      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+        Elts.push_back(UndefValue::get(STy->getElementType(i)));
+    } else {
+      llvm_unreachable("This code is out of sync with "
+             " ConstantFoldLoadThroughGEPConstantExpr");
+    }
+
+    // Replace the element that we are supposed to.
+    ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
+    unsigned Idx = CU->getZExtValue();
+    assert(Idx < STy->getNumElements() && "Struct index out of range!");
+    Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
+
+    // Return the modified struct.
+    return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(),
+                               STy->isPacked());
+  } else {
+    ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
+    const SequentialType *InitTy = cast<SequentialType>(Init->getType());
+
+    uint64_t NumElts;
+    if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
+      NumElts = ATy->getNumElements();
+    else
+      NumElts = cast<VectorType>(InitTy)->getNumElements();
+
+
+    // Break up the array into elements.
+    if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
+      for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
+        Elts.push_back(cast<Constant>(*i));
+    } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) {
+      for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i)
+        Elts.push_back(cast<Constant>(*i));
+    } else if (isa<ConstantAggregateZero>(Init)) {
+      Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType()));
+    } else {
+      assert(isa<UndefValue>(Init) && "This code is out of sync with "
+             " ConstantFoldLoadThroughGEPConstantExpr");
+      Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
+    }
+
+    assert(CI->getZExtValue() < NumElts);
+    Elts[CI->getZExtValue()] =
+      EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
+
+    if (Init->getType()->isArrayTy())
+      return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
+    return ConstantVector::get(Elts);
+  }
+}
+
+/// CommitValueTo - We have decided that Addr (which satisfies the predicate
+/// isSimpleEnoughPointerToCommit) should get Val as its value.  Make it happen.
+static void CommitValueTo(Constant *Val, Constant *Addr) {
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
+    assert(GV->hasInitializer());
+    GV->setInitializer(Val);
+    return;
+  }
+
+  ConstantExpr *CE = cast<ConstantExpr>(Addr);
+  GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+  GV->setInitializer(EvaluateStoreInto(GV->getInitializer(), Val, CE, 2));
+}
+
+/// ComputeLoadResult - Return the value that would be computed by a load from
+/// P after the stores reflected by 'memory' have been performed.  If we can't
+/// decide, return null.
+static Constant *ComputeLoadResult(Constant *P,
+                                const DenseMap<Constant*, Constant*> &Memory) {
+  // If this memory location has been recently stored, use the stored value: it
+  // is the most up-to-date.
+  DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
+  if (I != Memory.end()) return I->second;
+
+  // Access it.
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P)) {
+    if (GV->hasDefinitiveInitializer())
+      return GV->getInitializer();
+    return 0;
+  }
+
+  // Handle a constantexpr getelementptr.
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P))
+    if (CE->getOpcode() == Instruction::GetElementPtr &&
+        isa<GlobalVariable>(CE->getOperand(0))) {
+      GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
+      if (GV->hasDefinitiveInitializer())
+        return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE);
+    }
+
+  return 0;  // don't know how to evaluate.
+}
+
+/// EvaluateFunction - Evaluate a call to function F, returning true if
+/// successful, false if we can't evaluate it.  ActualArgs contains the formal
+/// arguments for the function.
+static bool EvaluateFunction(Function *F, Constant *&RetVal,
+                             const SmallVectorImpl<Constant*> &ActualArgs,
+                             std::vector<Function*> &CallStack,
+                             DenseMap<Constant*, Constant*> &MutatedMemory,
+                             std::vector<GlobalVariable*> &AllocaTmps,
+                             SmallPtrSet<Constant*, 8> &SimpleConstants,
+                             const TargetData *TD) {
+  // Check to see if this function is already executing (recursion).  If so,
+  // bail out.  TODO: we might want to accept limited recursion.
+  if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
+    return false;
+
+  CallStack.push_back(F);
+
+  /// Values - As we compute SSA register values, we store their contents here.
+  DenseMap<Value*, Constant*> Values;
+
+  // Initialize arguments to the incoming values specified.
+  unsigned ArgNo = 0;
+  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E;
+       ++AI, ++ArgNo)
+    Values[AI] = ActualArgs[ArgNo];
+
+  /// ExecutedBlocks - We only handle non-looping, non-recursive code.  As such,
+  /// we can only evaluate any one basic block at most once.  This set keeps
+  /// track of what we have executed so we can detect recursive cases etc.
+  SmallPtrSet<BasicBlock*, 32> ExecutedBlocks;
+
+  // CurInst - The current instruction we're evaluating.
+  BasicBlock::iterator CurInst = F->begin()->begin();
+
+  // This is the main evaluation loop.
+  while (1) {
+    Constant *InstResult = 0;
+
+    if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
+      if (SI->isVolatile()) return false;  // no volatile accesses.
+      Constant *Ptr = getVal(Values, SI->getOperand(1));
+      if (!isSimpleEnoughPointerToCommit(Ptr))
+        // If this is too complex for us to commit, reject it.
+        return false;
+      
+      Constant *Val = getVal(Values, SI->getOperand(0));
+
+      // If this might be too difficult for the backend to handle (e.g. the addr
+      // of one global variable divided by another) then we can't commit it.
+      if (!isSimpleEnoughValueToCommit(Val, SimpleConstants))
+        return false;
+        
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+        if (CE->getOpcode() == Instruction::BitCast) {
+          // If we're evaluating a store through a bitcast, then we need
+          // to pull the bitcast off the pointer type and push it onto the
+          // stored value.
+          Ptr = CE->getOperand(0);
+          
+          const Type *NewTy=cast<PointerType>(Ptr->getType())->getElementType();
+          
+          // In order to push the bitcast onto the stored value, a bitcast
+          // from NewTy to Val's type must be legal.  If it's not, we can try
+          // introspecting NewTy to find a legal conversion.
+          while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) {
+            // If NewTy is a struct, we can convert the pointer to the struct
+            // into a pointer to its first member.
+            // FIXME: This could be extended to support arrays as well.
+            if (const StructType *STy = dyn_cast<StructType>(NewTy)) {
+              NewTy = STy->getTypeAtIndex(0U);
+
+              const IntegerType *IdxTy =IntegerType::get(NewTy->getContext(), 32);
+              Constant *IdxZero = ConstantInt::get(IdxTy, 0, false);
+              Constant * const IdxList[] = {IdxZero, IdxZero};
+
+              Ptr = ConstantExpr::getGetElementPtr(Ptr, IdxList, 2);
+            
+            // If we can't improve the situation by introspecting NewTy,
+            // we have to give up.
+            } else {
+              return 0;
+            }
+          }
+          
+          // If we found compatible types, go ahead and push the bitcast
+          // onto the stored value.
+          Val = ConstantExpr::getBitCast(Val, NewTy);
+        }
+          
+      MutatedMemory[Ptr] = Val;
+    } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CurInst)) {
+      InstResult = ConstantExpr::get(BO->getOpcode(),
+                                     getVal(Values, BO->getOperand(0)),
+                                     getVal(Values, BO->getOperand(1)));
+    } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {
+      InstResult = ConstantExpr::getCompare(CI->getPredicate(),
+                                            getVal(Values, CI->getOperand(0)),
+                                            getVal(Values, CI->getOperand(1)));
+    } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {
+      InstResult = ConstantExpr::getCast(CI->getOpcode(),
+                                         getVal(Values, CI->getOperand(0)),
+                                         CI->getType());
+    } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
+      InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
+                                           getVal(Values, SI->getOperand(1)),
+                                           getVal(Values, SI->getOperand(2)));
+    } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
+      Constant *P = getVal(Values, GEP->getOperand(0));
+      SmallVector<Constant*, 8> GEPOps;
+      for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end();
+           i != e; ++i)
+        GEPOps.push_back(getVal(Values, *i));
+      InstResult = cast<GEPOperator>(GEP)->isInBounds() ?
+          ConstantExpr::getInBoundsGetElementPtr(P, &GEPOps[0], GEPOps.size()) :
+          ConstantExpr::getGetElementPtr(P, &GEPOps[0], GEPOps.size());
+    } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
+      if (LI->isVolatile()) return false;  // no volatile accesses.
+      InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)),
+                                     MutatedMemory);
+      if (InstResult == 0) return false; // Could not evaluate load.
+    } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
+      if (AI->isArrayAllocation()) return false;  // Cannot handle array allocs.
+      const Type *Ty = AI->getType()->getElementType();
+      AllocaTmps.push_back(new GlobalVariable(Ty, false,
+                                              GlobalValue::InternalLinkage,
+                                              UndefValue::get(Ty),
+                                              AI->getName()));
+      InstResult = AllocaTmps.back();
+    } else if (CallInst *CI = dyn_cast<CallInst>(CurInst)) {
+
+      // Debug info can safely be ignored here.
+      if (isa<DbgInfoIntrinsic>(CI)) {
+        ++CurInst;
+        continue;
+      }
+
+      // Cannot handle inline asm.
+      if (isa<InlineAsm>(CI->getCalledValue())) return false;
+
+      // Resolve function pointers.
+      Function *Callee = dyn_cast<Function>(getVal(Values,
+                                                   CI->getCalledValue()));
+      if (!Callee) return false;  // Cannot resolve.
+
+      SmallVector<Constant*, 8> Formals;
+      CallSite CS(CI);
+      for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end();
+           i != e; ++i)
+        Formals.push_back(getVal(Values, *i));
+
+      if (Callee->isDeclaration()) {
+        // If this is a function we can constant fold, do it.
+        if (Constant *C = ConstantFoldCall(Callee, Formals.data(),
+                                           Formals.size())) {
+          InstResult = C;
+        } else {
+          return false;
+        }
+      } else {
+        if (Callee->getFunctionType()->isVarArg())
+          return false;
+
+        Constant *RetVal;
+        // Execute the call, if successful, use the return value.
+        if (!EvaluateFunction(Callee, RetVal, Formals, CallStack,
+                              MutatedMemory, AllocaTmps, SimpleConstants, TD))
+          return false;
+        InstResult = RetVal;
+      }
+    } else if (isa<TerminatorInst>(CurInst)) {
+      BasicBlock *NewBB = 0;
+      if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {
+        if (BI->isUnconditional()) {
+          NewBB = BI->getSuccessor(0);
+        } else {
+          ConstantInt *Cond =
+            dyn_cast<ConstantInt>(getVal(Values, BI->getCondition()));
+          if (!Cond) return false;  // Cannot determine.
+
+          NewBB = BI->getSuccessor(!Cond->getZExtValue());
+        }
+      } else if (SwitchInst *SI = dyn_cast<SwitchInst>(CurInst)) {
+        ConstantInt *Val =
+          dyn_cast<ConstantInt>(getVal(Values, SI->getCondition()));
+        if (!Val) return false;  // Cannot determine.
+        NewBB = SI->getSuccessor(SI->findCaseValue(Val));
+      } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(CurInst)) {
+        Value *Val = getVal(Values, IBI->getAddress())->stripPointerCasts();
+        if (BlockAddress *BA = dyn_cast<BlockAddress>(Val))
+          NewBB = BA->getBasicBlock();
+        else
+          return false;  // Cannot determine.
+      } else if (ReturnInst *RI = dyn_cast<ReturnInst>(CurInst)) {
+        if (RI->getNumOperands())
+          RetVal = getVal(Values, RI->getOperand(0));
+
+        CallStack.pop_back();  // return from fn.
+        return true;  // We succeeded at evaluating this ctor!
+      } else {
+        // invoke, unwind, unreachable.
+        return false;  // Cannot handle this terminator.
+      }
+
+      // Okay, we succeeded in evaluating this control flow.  See if we have
+      // executed the new block before.  If so, we have a looping function,
+      // which we cannot evaluate in reasonable time.
+      if (!ExecutedBlocks.insert(NewBB))
+        return false;  // looped!
+
+      // Okay, we have never been in this block before.  Check to see if there
+      // are any PHI nodes.  If so, evaluate them with information about where
+      // we came from.
+      BasicBlock *OldBB = CurInst->getParent();
+      CurInst = NewBB->begin();
+      PHINode *PN;
+      for (; (PN = dyn_cast<PHINode>(CurInst)); ++CurInst)
+        Values[PN] = getVal(Values, PN->getIncomingValueForBlock(OldBB));
+
+      // Do NOT increment CurInst.  We know that the terminator had no value.
+      continue;
+    } else {
+      // Did not know how to evaluate this!
+      return false;
+    }
+
+    if (!CurInst->use_empty()) {
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
+        InstResult = ConstantFoldConstantExpression(CE, TD);
+      
+      Values[CurInst] = InstResult;
+    }
+
+    // Advance program counter.
+    ++CurInst;
+  }
+}
+
+/// EvaluateStaticConstructor - Evaluate static constructors in the function, if
+/// we can.  Return true if we can, false otherwise.
+static bool EvaluateStaticConstructor(Function *F, const TargetData *TD) {
+  /// MutatedMemory - For each store we execute, we update this map.  Loads
+  /// check this to get the most up-to-date value.  If evaluation is successful,
+  /// this state is committed to the process.
+  DenseMap<Constant*, Constant*> MutatedMemory;
+
+  /// AllocaTmps - To 'execute' an alloca, we create a temporary global variable
+  /// to represent its body.  This vector is needed so we can delete the
+  /// temporary globals when we are done.
+  std::vector<GlobalVariable*> AllocaTmps;
+
+  /// CallStack - This is used to detect recursion.  In pathological situations
+  /// we could hit exponential behavior, but at least there is nothing
+  /// unbounded.
+  std::vector<Function*> CallStack;
+
+  /// SimpleConstants - These are constants we have checked and know to be
+  /// simple enough to live in a static initializer of a global.
+  SmallPtrSet<Constant*, 8> SimpleConstants;
+  
+  // Call the function.
+  Constant *RetValDummy;
+  bool EvalSuccess = EvaluateFunction(F, RetValDummy,
+                                      SmallVector<Constant*, 0>(), CallStack,
+                                      MutatedMemory, AllocaTmps,
+                                      SimpleConstants, TD);
+  
+  if (EvalSuccess) {
+    // We succeeded at evaluation: commit the result.
+    DEBUG(dbgs() << "FULLY EVALUATED GLOBAL CTOR FUNCTION '"
+          << F->getName() << "' to " << MutatedMemory.size()
+          << " stores.\n");
+    for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(),
+         E = MutatedMemory.end(); I != E; ++I)
+      CommitValueTo(I->second, I->first);
+  }
+
+  // At this point, we are done interpreting.  If we created any 'alloca'
+  // temporaries, release them now.
+  while (!AllocaTmps.empty()) {
+    GlobalVariable *Tmp = AllocaTmps.back();
+    AllocaTmps.pop_back();
+
+    // If there are still users of the alloca, the program is doing something
+    // silly, e.g. storing the address of the alloca somewhere and using it
+    // later.  Since this is undefined, we'll just make it be null.
+    if (!Tmp->use_empty())
+      Tmp->replaceAllUsesWith(Constant::getNullValue(Tmp->getType()));
+    delete Tmp;
+  }
+
+  return EvalSuccess;
+}
+
+
+
+/// OptimizeGlobalCtorsList - Simplify and evaluation global ctors if possible.
+/// Return true if anything changed.
+bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
+  std::vector<Function*> Ctors = ParseGlobalCtors(GCL);
+  bool MadeChange = false;
+  if (Ctors.empty()) return false;
+
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  // Loop over global ctors, optimizing them when we can.
+  for (unsigned i = 0; i != Ctors.size(); ++i) {
+    Function *F = Ctors[i];
+    // Found a null terminator in the middle of the list, prune off the rest of
+    // the list.
+    if (F == 0) {
+      if (i != Ctors.size()-1) {
+        Ctors.resize(i+1);
+        MadeChange = true;
+      }
+      break;
+    }
+
+    // We cannot simplify external ctor functions.
+    if (F->empty()) continue;
+
+    // If we can evaluate the ctor at compile time, do.
+    if (EvaluateStaticConstructor(F, TD)) {
+      Ctors.erase(Ctors.begin()+i);
+      MadeChange = true;
+      --i;
+      ++NumCtorsEvaluated;
+      continue;
+    }
+  }
+
+  if (!MadeChange) return false;
+
+  GCL = InstallGlobalCtors(GCL, Ctors);
+  return true;
+}
+
+bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
+  bool Changed = false;
+
+  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+       I != E;) {
+    Module::alias_iterator J = I++;
+    // Aliases without names cannot be referenced outside this module.
+    if (!J->hasName() && !J->isDeclaration())
+      J->setLinkage(GlobalValue::InternalLinkage);
+    // If the aliasee may change at link time, nothing can be done - bail out.
+    if (J->mayBeOverridden())
+      continue;
+
+    Constant *Aliasee = J->getAliasee();
+    GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
+    Target->removeDeadConstantUsers();
+    bool hasOneUse = Target->hasOneUse() && Aliasee->hasOneUse();
+
+    // Make all users of the alias use the aliasee instead.
+    if (!J->use_empty()) {
+      J->replaceAllUsesWith(Aliasee);
+      ++NumAliasesResolved;
+      Changed = true;
+    }
+
+    // If the alias is externally visible, we may still be able to simplify it.
+    if (!J->hasLocalLinkage()) {
+      // If the aliasee has internal linkage, give it the name and linkage
+      // of the alias, and delete the alias.  This turns:
+      //   define internal ... @f(...)
+      //   @a = alias ... @f
+      // into:
+      //   define ... @a(...)
+      if (!Target->hasLocalLinkage())
+        continue;
+
+      // Do not perform the transform if multiple aliases potentially target the
+      // aliasee. This check also ensures that it is safe to replace the section
+      // and other attributes of the aliasee with those of the alias.
+      if (!hasOneUse)
+        continue;
+
+      // Give the aliasee the name, linkage and other attributes of the alias.
+      Target->takeName(J);
+      Target->setLinkage(J->getLinkage());
+      Target->GlobalValue::copyAttributesFrom(J);
+    }
+
+    // Delete the alias.
+    M.getAliasList().erase(J);
+    ++NumAliasesRemoved;
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+bool GlobalOpt::runOnModule(Module &M) {
+  bool Changed = false;
+
+  // Try to find the llvm.globalctors list.
+  GlobalVariable *GlobalCtors = FindGlobalCtors(M);
+
+  bool LocalChange = true;
+  while (LocalChange) {
+    LocalChange = false;
+
+    // Delete functions that are trivially dead, ccc -> fastcc
+    LocalChange |= OptimizeFunctions(M);
+
+    // Optimize global_ctors list.
+    if (GlobalCtors)
+      LocalChange |= OptimizeGlobalCtorsList(GlobalCtors);
+
+    // Optimize non-address-taken globals.
+    LocalChange |= OptimizeGlobalVars(M);
+
+    // Resolve aliases, when possible.
+    LocalChange |= OptimizeGlobalAliases(M);
+    Changed |= LocalChange;
+  }
+
+  // TODO: Move all global ctors functions to the end of the module for code
+  // layout.
+
+  return Changed;
+}
diff --git a/final/lib/Transforms/IPO/IPConstantPropagation.cpp b/final/lib/Transforms/IPO/IPConstantPropagation.cpp
new file mode 100644
index 00000000000..c7c293987a5
--- /dev/null
+++ b/final/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -0,0 +1,279 @@
+//===-- IPConstantPropagation.cpp - Propagate constants through calls -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements an _extremely_ simple interprocedural constant
+// propagation pass.  It could certainly be improved in many different ways,
+// like using a worklist.  This pass makes arguments dead, but does not remove
+// them.  The existing dead argument elimination pass should be run after this
+// to clean up the mess.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ipconstprop"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+STATISTIC(NumArgumentsProped, "Number of args turned into constants");
+STATISTIC(NumReturnValProped, "Number of return values turned into constants");
+
+namespace {
+  /// IPCP - The interprocedural constant propagation pass
+  ///
+  struct IPCP : public ModulePass {
+    static char ID; // Pass identification, replacement for typeid
+    IPCP() : ModulePass(ID) {
+      initializeIPCPPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnModule(Module &M);
+  private:
+    bool PropagateConstantsIntoArguments(Function &F);
+    bool PropagateConstantReturn(Function &F);
+  };
+}
+
+char IPCP::ID = 0;
+INITIALIZE_PASS(IPCP, "ipconstprop",
+                "Interprocedural constant propagation", false, false)
+
+ModulePass *llvm::createIPConstantPropagationPass() { return new IPCP(); }
+
+bool IPCP::runOnModule(Module &M) {
+  bool Changed = false;
+  bool LocalChange = true;
+
+  // FIXME: instead of using smart algorithms, we just iterate until we stop
+  // making changes.
+  while (LocalChange) {
+    LocalChange = false;
+    for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+      if (!I->isDeclaration()) {
+        // Delete any klingons.
+        I->removeDeadConstantUsers();
+        if (I->hasLocalLinkage())
+          LocalChange |= PropagateConstantsIntoArguments(*I);
+        Changed |= PropagateConstantReturn(*I);
+      }
+    Changed |= LocalChange;
+  }
+  return Changed;
+}
+
+/// PropagateConstantsIntoArguments - Look at all uses of the specified
+/// function.  If all uses are direct call sites, and all pass a particular
+/// constant in for an argument, propagate that constant in as the argument.
+///
+bool IPCP::PropagateConstantsIntoArguments(Function &F) {
+  if (F.arg_empty() || F.use_empty()) return false; // No arguments? Early exit.
+
+  // For each argument, keep track of its constant value and whether it is a
+  // constant or not.  The bool is driven to true when found to be non-constant.
+  SmallVector<std::pair<Constant*, bool>, 16> ArgumentConstants;
+  ArgumentConstants.resize(F.arg_size());
+
+  unsigned NumNonconstant = 0;
+  for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
+    User *U = *UI;
+    // Ignore blockaddress uses.
+    if (isa<BlockAddress>(U)) continue;
+    
+    // Used by a non-instruction, or not the callee of a function, do not
+    // transform.
+    if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
+      return false;
+    
+    CallSite CS(cast<Instruction>(U));
+    if (!CS.isCallee(UI))
+      return false;
+
+    // Check out all of the potentially constant arguments.  Note that we don't
+    // inspect varargs here.
+    CallSite::arg_iterator AI = CS.arg_begin();
+    Function::arg_iterator Arg = F.arg_begin();
+    for (unsigned i = 0, e = ArgumentConstants.size(); i != e;
+         ++i, ++AI, ++Arg) {
+      
+      // If this argument is known non-constant, ignore it.
+      if (ArgumentConstants[i].second)
+        continue;
+      
+      Constant *C = dyn_cast<Constant>(*AI);
+      if (C && ArgumentConstants[i].first == 0) {
+        ArgumentConstants[i].first = C;   // First constant seen.
+      } else if (C && ArgumentConstants[i].first == C) {
+        // Still the constant value we think it is.
+      } else if (*AI == &*Arg) {
+        // Ignore recursive calls passing argument down.
+      } else {
+        // Argument became non-constant.  If all arguments are non-constant now,
+        // give up on this function.
+        if (++NumNonconstant == ArgumentConstants.size())
+          return false;
+        ArgumentConstants[i].second = true;
+      }
+    }
+  }
+
+  // If we got to this point, there is a constant argument!
+  assert(NumNonconstant != ArgumentConstants.size());
+  bool MadeChange = false;
+  Function::arg_iterator AI = F.arg_begin();
+  for (unsigned i = 0, e = ArgumentConstants.size(); i != e; ++i, ++AI) {
+    // Do we have a constant argument?
+    if (ArgumentConstants[i].second || AI->use_empty() ||
+        (AI->hasByValAttr() && !F.onlyReadsMemory()))
+      continue;
+  
+    Value *V = ArgumentConstants[i].first;
+    if (V == 0) V = UndefValue::get(AI->getType());
+    AI->replaceAllUsesWith(V);
+    ++NumArgumentsProped;
+    MadeChange = true;
+  }
+  return MadeChange;
+}
+
+
+// Check to see if this function returns one or more constants. If so, replace
+// all callers that use those return values with the constant value. This will
+// leave in the actual return values and instructions, but deadargelim will
+// clean that up.
+//
+// Additionally if a function always returns one of its arguments directly,
+// callers will be updated to use the value they pass in directly instead of
+// using the return value.
+bool IPCP::PropagateConstantReturn(Function &F) {
+  if (F.getReturnType()->isVoidTy())
+    return false; // No return value.
+
+  // If this function could be overridden later in the link stage, we can't
+  // propagate information about its results into callers.
+  if (F.mayBeOverridden())
+    return false;
+    
+  // Check to see if this function returns a constant.
+  SmallVector<Value *,4> RetVals;
+  const StructType *STy = dyn_cast<StructType>(F.getReturnType());
+  if (STy)
+    for (unsigned i = 0, e = STy->getNumElements(); i < e; ++i) 
+      RetVals.push_back(UndefValue::get(STy->getElementType(i)));
+  else
+    RetVals.push_back(UndefValue::get(F.getReturnType()));
+
+  unsigned NumNonConstant = 0;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+      for (unsigned i = 0, e = RetVals.size(); i != e; ++i) {
+        // Already found conflicting return values?
+        Value *RV = RetVals[i];
+        if (!RV)
+          continue;
+
+        // Find the returned value
+        Value *V;
+        if (!STy)
+          V = RI->getOperand(i);
+        else
+          V = FindInsertedValue(RI->getOperand(0), i);
+
+        if (V) {
+          // Ignore undefs, we can change them into anything
+          if (isa<UndefValue>(V))
+            continue;
+          
+          // Try to see if all the rets return the same constant or argument.
+          if (isa<Constant>(V) || isa<Argument>(V)) {
+            if (isa<UndefValue>(RV)) {
+              // No value found yet? Try the current one.
+              RetVals[i] = V;
+              continue;
+            }
+            // Returning the same value? Good.
+            if (RV == V)
+              continue;
+          }
+        }
+        // Different or no known return value? Don't propagate this return
+        // value.
+        RetVals[i] = 0;
+        // All values non constant? Stop looking.
+        if (++NumNonConstant == RetVals.size())
+          return false;
+      }
+    }
+
+  // If we got here, the function returns at least one constant value.  Loop
+  // over all users, replacing any uses of the return value with the returned
+  // constant.
+  bool MadeChange = false;
+  for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
+    CallSite CS(*UI);
+    Instruction* Call = CS.getInstruction();
+
+    // Not a call instruction or a call instruction that's not calling F
+    // directly?
+    if (!Call || !CS.isCallee(UI))
+      continue;
+    
+    // Call result not used?
+    if (Call->use_empty())
+      continue;
+
+    MadeChange = true;
+
+    if (STy == 0) {
+      Value* New = RetVals[0];
+      if (Argument *A = dyn_cast<Argument>(New))
+        // Was an argument returned? Then find the corresponding argument in
+        // the call instruction and use that.
+        New = CS.getArgument(A->getArgNo());
+      Call->replaceAllUsesWith(New);
+      continue;
+    }
+   
+    for (Value::use_iterator I = Call->use_begin(), E = Call->use_end();
+         I != E;) {
+      Instruction *Ins = cast<Instruction>(*I);
+
+      // Increment now, so we can remove the use
+      ++I;
+
+      // Find the index of the retval to replace with
+      int index = -1;
+      if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Ins))
+        if (EV->hasIndices())
+          index = *EV->idx_begin();
+
+      // If this use uses a specific return value, and we have a replacement,
+      // replace it.
+      if (index != -1) {
+        Value *New = RetVals[index];
+        if (New) {
+          if (Argument *A = dyn_cast<Argument>(New))
+            // Was an argument returned? Then find the corresponding argument in
+            // the call instruction and use that.
+            New = CS.getArgument(A->getArgNo());
+          Ins->replaceAllUsesWith(New);
+          Ins->eraseFromParent();
+        }
+      }
+    }
+  }
+
+  if (MadeChange) ++NumReturnValProped;
+  return MadeChange;
+}
diff --git a/final/lib/Transforms/IPO/IPO.cpp b/final/lib/Transforms/IPO/IPO.cpp
new file mode 100644
index 00000000000..fbe90ce6759
--- /dev/null
+++ b/final/lib/Transforms/IPO/IPO.cpp
@@ -0,0 +1,118 @@
+//===-- Scalar.cpp --------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common infrastructure (including C bindings) for 
+// libLLVMIPO.a, which implements several transformations over the LLVM 
+// intermediate representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Transforms/IPO.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/Transforms/IPO.h"
+
+using namespace llvm;
+
+void llvm::initializeIPO(PassRegistry &Registry) {
+  initializeArgPromotionPass(Registry);
+  initializeConstantMergePass(Registry);
+  initializeDAEPass(Registry);
+  initializeDAHPass(Registry);
+  initializeDTEPass(Registry);
+  initializeFunctionAttrsPass(Registry);
+  initializeGlobalDCEPass(Registry);
+  initializeGlobalOptPass(Registry);
+  initializeIPCPPass(Registry);
+  initializeAlwaysInlinerPass(Registry);
+  initializeSimpleInlinerPass(Registry);
+  initializeInternalizePassPass(Registry);
+  initializeLoopExtractorPass(Registry);
+  initializeBlockExtractorPassPass(Registry);
+  initializeSingleLoopExtractorPass(Registry);
+  initializeLowerSetJmpPass(Registry);
+  initializeMergeFunctionsPass(Registry);
+  initializePartialInlinerPass(Registry);
+  initializePruneEHPass(Registry);
+  initializeStripDeadPrototypesPassPass(Registry);
+  initializeStripSymbolsPass(Registry);
+  initializeStripDebugDeclarePass(Registry);
+  initializeStripDeadDebugInfoPass(Registry);
+  initializeStripNonDebugSymbolsPass(Registry);
+  initializeSRETPromotionPass(Registry);
+}
+
+void LLVMInitializeIPO(LLVMPassRegistryRef R) {
+  initializeIPO(*unwrap(R));
+}
+
+void LLVMAddArgumentPromotionPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createArgumentPromotionPass());
+}
+
+void LLVMAddConstantMergePass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createConstantMergePass());
+}
+
+void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createDeadArgEliminationPass());
+}
+
+void LLVMAddDeadTypeEliminationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createDeadTypeEliminationPass());
+}
+
+void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createFunctionAttrsPass());
+}
+
+void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createFunctionInliningPass());
+}
+
+void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createGlobalDCEPass());
+}
+
+void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createGlobalOptimizerPass());
+}
+
+void LLVMAddIPConstantPropagationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createIPConstantPropagationPass());
+}
+
+void LLVMAddLowerSetJmpPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLowerSetJmpPass());
+}
+
+void LLVMAddPruneEHPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createPruneEHPass());
+}
+
+void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createIPSCCPPass());
+}
+
+void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
+  unwrap(PM)->add(createInternalizePass(AllButMain != 0));
+}
+
+
+void LLVMAddRaiseAllocationsPass(LLVMPassManagerRef PM) {
+  // FIXME: Remove in LLVM 3.0.
+}
+
+void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createStripDeadPrototypesPass());
+}
+
+void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createStripSymbolsPass());
+}
diff --git a/final/lib/Transforms/IPO/InlineAlways.cpp b/final/lib/Transforms/IPO/InlineAlways.cpp
new file mode 100644
index 00000000000..ce795b72438
--- /dev/null
+++ b/final/lib/Transforms/IPO/InlineAlways.cpp
@@ -0,0 +1,85 @@
+//===- InlineAlways.cpp - Code to inline always_inline functions ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a custom inliner that handles only functions that
+// are marked as "always inline".
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/CallingConv.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+
+namespace {
+
+  // AlwaysInliner only inlines functions that are mark as "always inline".
+  class AlwaysInliner : public Inliner {
+    // Functions that are never inlined
+    SmallPtrSet<const Function*, 16> NeverInline; 
+    InlineCostAnalyzer CA;
+  public:
+    // Use extremely low threshold. 
+    AlwaysInliner() : Inliner(ID, -2000000000) {
+      initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
+    }
+    static char ID; // Pass identification, replacement for typeid
+    InlineCost getInlineCost(CallSite CS) {
+      return CA.getInlineCost(CS, NeverInline);
+    }
+    float getInlineFudgeFactor(CallSite CS) {
+      return CA.getInlineFudgeFactor(CS);
+    }
+    void resetCachedCostInfo(Function *Caller) {
+      CA.resetCachedCostInfo(Caller);
+    }
+    void growCachedCostInfo(Function* Caller, Function* Callee) {
+      CA.growCachedCostInfo(Caller, Callee);
+    }
+    virtual bool doFinalization(CallGraph &CG) { 
+      return removeDeadFunctions(CG, &NeverInline); 
+    }
+    virtual bool doInitialization(CallGraph &CG);
+    void releaseMemory() {
+      CA.clear();
+    }
+  };
+}
+
+char AlwaysInliner::ID = 0;
+INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
+                "Inliner for always_inline functions", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
+                "Inliner for always_inline functions", false, false)
+
+Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
+
+// doInitialization - Initializes the vector of functions that have not 
+// been annotated with the "always inline" attribute.
+bool AlwaysInliner::doInitialization(CallGraph &CG) {
+  Module &M = CG.getModule();
+  
+  for (Module::iterator I = M.begin(), E = M.end();
+       I != E; ++I)
+    if (!I->isDeclaration() && !I->hasFnAttr(Attribute::AlwaysInline))
+      NeverInline.insert(I);
+
+  return false;
+}
diff --git a/final/lib/Transforms/IPO/InlineSimple.cpp b/final/lib/Transforms/IPO/InlineSimple.cpp
new file mode 100644
index 00000000000..0c5b3be8f98
--- /dev/null
+++ b/final/lib/Transforms/IPO/InlineSimple.cpp
@@ -0,0 +1,118 @@
+//===- InlineSimple.cpp - Code to perform simple function inlining --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements bottom-up inlining of functions into callees.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/CallingConv.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+using namespace llvm;
+
+namespace {
+
+  class SimpleInliner : public Inliner {
+    // Functions that are never inlined
+    SmallPtrSet<const Function*, 16> NeverInline; 
+    InlineCostAnalyzer CA;
+  public:
+    SimpleInliner() : Inliner(ID) {
+      initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+    }
+    SimpleInliner(int Threshold) : Inliner(ID, Threshold) {
+      initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
+    }
+    static char ID; // Pass identification, replacement for typeid
+    InlineCost getInlineCost(CallSite CS) {
+      return CA.getInlineCost(CS, NeverInline);
+    }
+    float getInlineFudgeFactor(CallSite CS) {
+      return CA.getInlineFudgeFactor(CS);
+    }
+    void resetCachedCostInfo(Function *Caller) {
+      CA.resetCachedCostInfo(Caller);
+    }
+    void growCachedCostInfo(Function* Caller, Function* Callee) {
+      CA.growCachedCostInfo(Caller, Callee);
+    }
+    virtual bool doInitialization(CallGraph &CG);
+    void releaseMemory() {
+      CA.clear();
+    }
+  };
+}
+
+char SimpleInliner::ID = 0;
+INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
+                "Function Integration/Inlining", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(SimpleInliner, "inline",
+                "Function Integration/Inlining", false, false)
+
+Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
+
+Pass *llvm::createFunctionInliningPass(int Threshold) { 
+  return new SimpleInliner(Threshold);
+}
+
+// doInitialization - Initializes the vector of functions that have been
+// annotated with the noinline attribute.
+bool SimpleInliner::doInitialization(CallGraph &CG) {
+  
+  Module &M = CG.getModule();
+  
+  for (Module::iterator I = M.begin(), E = M.end();
+       I != E; ++I)
+    if (!I->isDeclaration() && I->hasFnAttr(Attribute::NoInline))
+      NeverInline.insert(I);
+
+  // Get llvm.noinline
+  GlobalVariable *GV = M.getNamedGlobal("llvm.noinline");
+  
+  if (GV == 0)
+    return false;
+
+  // Don't crash on invalid code
+  if (!GV->hasDefinitiveInitializer())
+    return false;
+  
+  const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+  
+  if (InitList == 0)
+    return false;
+
+  // Iterate over each element and add to the NeverInline set
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+        
+    // Get Source
+    const Constant *Elt = InitList->getOperand(i);
+        
+    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(Elt))
+      if (CE->getOpcode() == Instruction::BitCast) 
+        Elt = CE->getOperand(0);
+    
+    // Insert into set of functions to never inline
+    if (const Function *F = dyn_cast<Function>(Elt))
+      NeverInline.insert(F);
+  }
+  
+  return false;
+}
+
diff --git a/final/lib/Transforms/IPO/Inliner.cpp b/final/lib/Transforms/IPO/Inliner.cpp
new file mode 100644
index 00000000000..37eafd723bf
--- /dev/null
+++ b/final/lib/Transforms/IPO/Inliner.cpp
@@ -0,0 +1,572 @@
+//===- Inliner.cpp - Code common to all inliners --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the mechanics required to implement inlining without
+// missing any calls and updating the call graph.  The decisions of which calls
+// are profitable to inline are implemented elsewhere.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "inline"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInlined, "Number of functions inlined");
+STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
+STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
+STATISTIC(NumMergedAllocas, "Number of allocas merged together");
+
+static cl::opt<int>
+InlineLimit("inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
+        cl::desc("Control the amount of inlining to perform (default = 225)"));
+
+static cl::opt<int>
+HintThreshold("inlinehint-threshold", cl::Hidden, cl::init(325),
+              cl::desc("Threshold for inlining functions with inline hint"));
+
+// Threshold to use when optsize is specified (and there is no -inline-limit).
+const int OptSizeThreshold = 75;
+
+Inliner::Inliner(char &ID) 
+  : CallGraphSCCPass(ID), InlineThreshold(InlineLimit) {}
+
+Inliner::Inliner(char &ID, int Threshold) 
+  : CallGraphSCCPass(ID), InlineThreshold(InlineLimit.getNumOccurrences() > 0 ?
+                                          InlineLimit : Threshold) {}
+
+/// getAnalysisUsage - For this class, we declare that we require and preserve
+/// the call graph.  If the derived class implements this method, it should
+/// always explicitly call the implementation here.
+void Inliner::getAnalysisUsage(AnalysisUsage &Info) const {
+  CallGraphSCCPass::getAnalysisUsage(Info);
+}
+
+
+typedef DenseMap<const ArrayType*, std::vector<AllocaInst*> >
+InlinedArrayAllocasTy;
+
+/// InlineCallIfPossible - If it is possible to inline the specified call site,
+/// do so and update the CallGraph for this operation.
+///
+/// This function also does some basic book-keeping to update the IR.  The
+/// InlinedArrayAllocas map keeps track of any allocas that are already
+/// available from other  functions inlined into the caller.  If we are able to
+/// inline this call site we attempt to reuse already available allocas or add
+/// any new allocas to the set if not possible.
+static bool InlineCallIfPossible(CallSite CS, InlineFunctionInfo &IFI,
+                                 InlinedArrayAllocasTy &InlinedArrayAllocas,
+                                 int InlineHistory) {
+  Function *Callee = CS.getCalledFunction();
+  Function *Caller = CS.getCaller();
+
+  // Try to inline the function.  Get the list of static allocas that were
+  // inlined.
+  if (!InlineFunction(CS, IFI))
+    return false;
+
+  // If the inlined function had a higher stack protection level than the
+  // calling function, then bump up the caller's stack protection level.
+  if (Callee->hasFnAttr(Attribute::StackProtectReq))
+    Caller->addFnAttr(Attribute::StackProtectReq);
+  else if (Callee->hasFnAttr(Attribute::StackProtect) &&
+           !Caller->hasFnAttr(Attribute::StackProtectReq))
+    Caller->addFnAttr(Attribute::StackProtect);
+
+  // Look at all of the allocas that we inlined through this call site.  If we
+  // have already inlined other allocas through other calls into this function,
+  // then we know that they have disjoint lifetimes and that we can merge them.
+  //
+  // There are many heuristics possible for merging these allocas, and the
+  // different options have different tradeoffs.  One thing that we *really*
+  // don't want to hurt is SRoA: once inlining happens, often allocas are no
+  // longer address taken and so they can be promoted.
+  //
+  // Our "solution" for that is to only merge allocas whose outermost type is an
+  // array type.  These are usually not promoted because someone is using a
+  // variable index into them.  These are also often the most important ones to
+  // merge.
+  //
+  // A better solution would be to have real memory lifetime markers in the IR
+  // and not have the inliner do any merging of allocas at all.  This would
+  // allow the backend to do proper stack slot coloring of all allocas that
+  // *actually make it to the backend*, which is really what we want.
+  //
+  // Because we don't have this information, we do this simple and useful hack.
+  //
+  SmallPtrSet<AllocaInst*, 16> UsedAllocas;
+  
+  // When processing our SCC, check to see if CS was inlined from some other
+  // call site.  For example, if we're processing "A" in this code:
+  //   A() { B() }
+  //   B() { x = alloca ... C() }
+  //   C() { y = alloca ... }
+  // Assume that C was not inlined into B initially, and so we're processing A
+  // and decide to inline B into A.  Doing this makes an alloca available for
+  // reuse and makes a callsite (C) available for inlining.  When we process
+  // the C call site we don't want to do any alloca merging between X and Y
+  // because their scopes are not disjoint.  We could make this smarter by
+  // keeping track of the inline history for each alloca in the
+  // InlinedArrayAllocas but this isn't likely to be a significant win.
+  if (InlineHistory != -1)  // Only do merging for top-level call sites in SCC.
+    return true;
+  
+  // Loop over all the allocas we have so far and see if they can be merged with
+  // a previously inlined alloca.  If not, remember that we had it.
+  for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
+       AllocaNo != e; ++AllocaNo) {
+    AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
+    
+    // Don't bother trying to merge array allocations (they will usually be
+    // canonicalized to be an allocation *of* an array), or allocations whose
+    // type is not itself an array (because we're afraid of pessimizing SRoA).
+    const ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
+    if (ATy == 0 || AI->isArrayAllocation())
+      continue;
+    
+    // Get the list of all available allocas for this array type.
+    std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy];
+    
+    // Loop over the allocas in AllocasForType to see if we can reuse one.  Note
+    // that we have to be careful not to reuse the same "available" alloca for
+    // multiple different allocas that we just inlined, we use the 'UsedAllocas'
+    // set to keep track of which "available" allocas are being used by this
+    // function.  Also, AllocasForType can be empty of course!
+    bool MergedAwayAlloca = false;
+    for (unsigned i = 0, e = AllocasForType.size(); i != e; ++i) {
+      AllocaInst *AvailableAlloca = AllocasForType[i];
+      
+      // The available alloca has to be in the right function, not in some other
+      // function in this SCC.
+      if (AvailableAlloca->getParent() != AI->getParent())
+        continue;
+      
+      // If the inlined function already uses this alloca then we can't reuse
+      // it.
+      if (!UsedAllocas.insert(AvailableAlloca))
+        continue;
+      
+      // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
+      // success!
+      DEBUG(dbgs() << "    ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
+                   << *AvailableAlloca << '\n');
+      
+      AI->replaceAllUsesWith(AvailableAlloca);
+      AI->eraseFromParent();
+      MergedAwayAlloca = true;
+      ++NumMergedAllocas;
+      IFI.StaticAllocas[AllocaNo] = 0;
+      break;
+    }
+
+    // If we already nuked the alloca, we're done with it.
+    if (MergedAwayAlloca)
+      continue;
+    
+    // If we were unable to merge away the alloca either because there are no
+    // allocas of the right type available or because we reused them all
+    // already, remember that this alloca came from an inlined function and mark
+    // it used so we don't reuse it for other allocas from this inline
+    // operation.
+    AllocasForType.push_back(AI);
+    UsedAllocas.insert(AI);
+  }
+  
+  return true;
+}
+
+unsigned Inliner::getInlineThreshold(CallSite CS) const {
+  int thres = InlineThreshold;
+
+  // Listen to optsize when -inline-limit is not given.
+  Function *Caller = CS.getCaller();
+  if (Caller && !Caller->isDeclaration() &&
+      Caller->hasFnAttr(Attribute::OptimizeForSize) &&
+      InlineLimit.getNumOccurrences() == 0)
+    thres = OptSizeThreshold;
+
+  // Listen to inlinehint when it would increase the threshold.
+  Function *Callee = CS.getCalledFunction();
+  if (HintThreshold > thres && Callee && !Callee->isDeclaration() &&
+      Callee->hasFnAttr(Attribute::InlineHint))
+    thres = HintThreshold;
+
+  return thres;
+}
+
+/// shouldInline - Return true if the inliner should attempt to inline
+/// at the given CallSite.
+bool Inliner::shouldInline(CallSite CS) {
+  InlineCost IC = getInlineCost(CS);
+  
+  if (IC.isAlways()) {
+    DEBUG(dbgs() << "    Inlining: cost=always"
+          << ", Call: " << *CS.getInstruction() << "\n");
+    return true;
+  }
+  
+  if (IC.isNever()) {
+    DEBUG(dbgs() << "    NOT Inlining: cost=never"
+          << ", Call: " << *CS.getInstruction() << "\n");
+    return false;
+  }
+  
+  int Cost = IC.getValue();
+  Function *Caller = CS.getCaller();
+  int CurrentThreshold = getInlineThreshold(CS);
+  float FudgeFactor = getInlineFudgeFactor(CS);
+  int AdjThreshold = (int)(CurrentThreshold * FudgeFactor);
+  if (Cost >= AdjThreshold) {
+    DEBUG(dbgs() << "    NOT Inlining: cost=" << Cost
+          << ", thres=" << AdjThreshold
+          << ", Call: " << *CS.getInstruction() << "\n");
+    return false;
+  }
+  
+  // Try to detect the case where the current inlining candidate caller
+  // (call it B) is a static function and is an inlining candidate elsewhere,
+  // and the current candidate callee (call it C) is large enough that
+  // inlining it into B would make B too big to inline later.  In these
+  // circumstances it may be best not to inline C into B, but to inline B
+  // into its callers.
+  if (Caller->hasLocalLinkage()) {
+    int TotalSecondaryCost = 0;
+    bool outerCallsFound = false;
+    // This bool tracks what happens if we do NOT inline C into B.
+    bool callerWillBeRemoved = true;
+    // This bool tracks what happens if we DO inline C into B.
+    bool inliningPreventsSomeOuterInline = false;
+    for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); 
+         I != E; ++I) {
+      CallSite CS2(*I);
+
+      // If this isn't a call to Caller (it could be some other sort
+      // of reference) skip it.  Such references will prevent the caller
+      // from being removed.
+      if (!CS2 || CS2.getCalledFunction() != Caller) {
+        callerWillBeRemoved = false;
+        continue;
+      }
+
+      InlineCost IC2 = getInlineCost(CS2);
+      if (IC2.isNever())
+        callerWillBeRemoved = false;
+      if (IC2.isAlways() || IC2.isNever())
+        continue;
+
+      outerCallsFound = true;
+      int Cost2 = IC2.getValue();
+      int CurrentThreshold2 = getInlineThreshold(CS2);
+      float FudgeFactor2 = getInlineFudgeFactor(CS2);
+
+      if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
+        callerWillBeRemoved = false;
+
+      // See if we have this case.  We subtract off the penalty
+      // for the call instruction, which we would be deleting.
+      if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
+          Cost2 + Cost - (InlineConstants::CallPenalty + 1) >= 
+                (int)(CurrentThreshold2 * FudgeFactor2)) {
+        inliningPreventsSomeOuterInline = true;
+        TotalSecondaryCost += Cost2;
+      }
+    }
+    // If all outer calls to Caller would get inlined, the cost for the last
+    // one is set very low by getInlineCost, in anticipation that Caller will
+    // be removed entirely.  We did not account for this above unless there
+    // is only one caller of Caller.
+    if (callerWillBeRemoved && Caller->use_begin() != Caller->use_end())
+      TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
+
+    if (outerCallsFound && inliningPreventsSomeOuterInline &&
+        TotalSecondaryCost < Cost) {
+      DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction() << 
+           " Cost = " << Cost << 
+           ", outer Cost = " << TotalSecondaryCost << '\n');
+      return false;
+    }
+  }
+
+  DEBUG(dbgs() << "    Inlining: cost=" << Cost
+        << ", thres=" << AdjThreshold
+        << ", Call: " << *CS.getInstruction() << '\n');
+  return true;
+}
+
+/// InlineHistoryIncludes - Return true if the specified inline history ID
+/// indicates an inline history that includes the specified function.
+static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
+            const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) {
+  while (InlineHistoryID != -1) {
+    assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
+           "Invalid inline history ID");
+    if (InlineHistory[InlineHistoryID].first == F)
+      return true;
+    InlineHistoryID = InlineHistory[InlineHistoryID].second;
+  }
+  return false;
+}
+
+
+bool Inliner::runOnSCC(CallGraphSCC &SCC) {
+  CallGraph &CG = getAnalysis<CallGraph>();
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+  SmallPtrSet<Function*, 8> SCCFunctions;
+  DEBUG(dbgs() << "Inliner visiting SCC:");
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+    if (F) SCCFunctions.insert(F);
+    DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
+  }
+
+  // Scan through and identify all call sites ahead of time so that we only
+  // inline call sites in the original functions, not call sites that result
+  // from inlining other functions.
+  SmallVector<std::pair<CallSite, int>, 16> CallSites;
+  
+  // When inlining a callee produces new call sites, we want to keep track of
+  // the fact that they were inlined from the callee.  This allows us to avoid
+  // infinite inlining in some obscure cases.  To represent this, we use an
+  // index into the InlineHistory vector.
+  SmallVector<std::pair<Function*, int>, 8> InlineHistory;
+
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    Function *F = (*I)->getFunction();
+    if (!F) continue;
+    
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+        CallSite CS(cast<Value>(I));
+        // If this isn't a call, or it is a call to an intrinsic, it can
+        // never be inlined.
+        if (!CS || isa<IntrinsicInst>(I))
+          continue;
+        
+        // If this is a direct call to an external function, we can never inline
+        // it.  If it is an indirect call, inlining may resolve it to be a
+        // direct call, so we keep it.
+        if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration())
+          continue;
+        
+        CallSites.push_back(std::make_pair(CS, -1));
+      }
+  }
+
+  DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
+
+  // If there are no calls in this function, exit early.
+  if (CallSites.empty())
+    return false;
+  
+  // Now that we have all of the call sites, move the ones to functions in the
+  // current SCC to the end of the list.
+  unsigned FirstCallInSCC = CallSites.size();
+  for (unsigned i = 0; i < FirstCallInSCC; ++i)
+    if (Function *F = CallSites[i].first.getCalledFunction())
+      if (SCCFunctions.count(F))
+        std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
+
+  
+  InlinedArrayAllocasTy InlinedArrayAllocas;
+  InlineFunctionInfo InlineInfo(&CG, TD);
+  
+  // Now that we have all of the call sites, loop over them and inline them if
+  // it looks profitable to do so.
+  bool Changed = false;
+  bool LocalChange;
+  do {
+    LocalChange = false;
+    // Iterate over the outer loop because inlining functions can cause indirect
+    // calls to become direct calls.
+    for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
+      CallSite CS = CallSites[CSi].first;
+      
+      Function *Caller = CS.getCaller();
+      Function *Callee = CS.getCalledFunction();
+
+      // If this call site is dead and it is to a readonly function, we should
+      // just delete the call instead of trying to inline it, regardless of
+      // size.  This happens because IPSCCP propagates the result out of the
+      // call and then we're left with the dead call.
+      if (isInstructionTriviallyDead(CS.getInstruction())) {
+        DEBUG(dbgs() << "    -> Deleting dead call: "
+                     << *CS.getInstruction() << "\n");
+        // Update the call graph by deleting the edge from Callee to Caller.
+        CG[Caller]->removeCallEdgeFor(CS);
+        CS.getInstruction()->eraseFromParent();
+        ++NumCallsDeleted;
+        // Update the cached cost info with the missing call
+        growCachedCostInfo(Caller, NULL);
+      } else {
+        // We can only inline direct calls to non-declarations.
+        if (Callee == 0 || Callee->isDeclaration()) continue;
+      
+        // If this call site was obtained by inlining another function, verify
+        // that the include path for the function did not include the callee
+        // itself.  If so, we'd be recursively inlining the same function,
+        // which would provide the same callsites, which would cause us to
+        // infinitely inline.
+        int InlineHistoryID = CallSites[CSi].second;
+        if (InlineHistoryID != -1 &&
+            InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
+          continue;
+        
+        
+        // If the policy determines that we should inline this function,
+        // try to do so.
+        if (!shouldInline(CS))
+          continue;
+
+        // Attempt to inline the function.
+        if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
+                                  InlineHistoryID))
+          continue;
+        ++NumInlined;
+        
+        // If inlining this function gave us any new call sites, throw them
+        // onto our worklist to process.  They are useful inline candidates.
+        if (!InlineInfo.InlinedCalls.empty()) {
+          // Create a new inline history entry for this, so that we remember
+          // that these new callsites came about due to inlining Callee.
+          int NewHistoryID = InlineHistory.size();
+          InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
+
+          for (unsigned i = 0, e = InlineInfo.InlinedCalls.size();
+               i != e; ++i) {
+            Value *Ptr = InlineInfo.InlinedCalls[i];
+            CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
+          }
+        }
+        
+        // Update the cached cost info with the inlined call.
+        growCachedCostInfo(Caller, Callee);
+      }
+      
+      // If we inlined or deleted the last possible call site to the function,
+      // delete the function body now.
+      if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
+          // TODO: Can remove if in SCC now.
+          !SCCFunctions.count(Callee) &&
+          
+          // The function may be apparently dead, but if there are indirect
+          // callgraph references to the node, we cannot delete it yet, this
+          // could invalidate the CGSCC iterator.
+          CG[Callee]->getNumReferences() == 0) {
+        DEBUG(dbgs() << "    -> Deleting dead function: "
+              << Callee->getName() << "\n");
+        CallGraphNode *CalleeNode = CG[Callee];
+        
+        // Remove any call graph edges from the callee to its callees.
+        CalleeNode->removeAllCalledFunctions();
+        
+        resetCachedCostInfo(Callee);
+        
+        // Removing the node for callee from the call graph and delete it.
+        delete CG.removeFunctionFromModule(CalleeNode);
+        ++NumDeleted;
+      }
+
+      // Remove this call site from the list.  If possible, use 
+      // swap/pop_back for efficiency, but do not use it if doing so would
+      // move a call site to a function in this SCC before the
+      // 'FirstCallInSCC' barrier.
+      if (SCC.isSingular()) {
+        CallSites[CSi] = CallSites.back();
+        CallSites.pop_back();
+      } else {
+        CallSites.erase(CallSites.begin()+CSi);
+      }
+      --CSi;
+
+      Changed = true;
+      LocalChange = true;
+    }
+  } while (LocalChange);
+
+  return Changed;
+}
+
+// doFinalization - Remove now-dead linkonce functions at the end of
+// processing to avoid breaking the SCC traversal.
+bool Inliner::doFinalization(CallGraph &CG) {
+  return removeDeadFunctions(CG);
+}
+
+/// removeDeadFunctions - Remove dead functions that are not included in
+/// DNR (Do Not Remove) list.
+bool Inliner::removeDeadFunctions(CallGraph &CG, 
+                                  SmallPtrSet<const Function *, 16> *DNR) {
+  SmallPtrSet<CallGraphNode*, 16> FunctionsToRemove;
+
+  // Scan for all of the functions, looking for ones that should now be removed
+  // from the program.  Insert the dead ones in the FunctionsToRemove set.
+  for (CallGraph::iterator I = CG.begin(), E = CG.end(); I != E; ++I) {
+    CallGraphNode *CGN = I->second;
+    if (CGN->getFunction() == 0)
+      continue;
+    
+    Function *F = CGN->getFunction();
+    
+    // If the only remaining users of the function are dead constants, remove
+    // them.
+    F->removeDeadConstantUsers();
+
+    if (DNR && DNR->count(F))
+      continue;
+    if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&
+        !F->hasAvailableExternallyLinkage())
+      continue;
+    if (!F->use_empty())
+      continue;
+    
+    // Remove any call graph edges from the function to its callees.
+    CGN->removeAllCalledFunctions();
+
+    // Remove any edges from the external node to the function's call graph
+    // node.  These edges might have been made irrelegant due to
+    // optimization of the program.
+    CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
+
+    // Removing the node for callee from the call graph and delete it.
+    FunctionsToRemove.insert(CGN);
+  }
+
+  // Now that we know which functions to delete, do so.  We didn't want to do
+  // this inline, because that would invalidate our CallGraph::iterator
+  // objects. :(
+  //
+  // Note that it doesn't matter that we are iterating over a non-stable set
+  // here to do this, it doesn't matter which order the functions are deleted
+  // in.
+  bool Changed = false;
+  for (SmallPtrSet<CallGraphNode*, 16>::iterator I = FunctionsToRemove.begin(),
+       E = FunctionsToRemove.end(); I != E; ++I) {
+    resetCachedCostInfo((*I)->getFunction());
+    delete CG.removeFunctionFromModule(*I);
+    ++NumDeleted;
+    Changed = true;
+  }
+
+  return Changed;
+}
diff --git a/final/lib/Transforms/IPO/Internalize.cpp b/final/lib/Transforms/IPO/Internalize.cpp
new file mode 100644
index 00000000000..8b7253ed758
--- /dev/null
+++ b/final/lib/Transforms/IPO/Internalize.cpp
@@ -0,0 +1,194 @@
+//===-- Internalize.cpp - Mark functions internal -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions in the input module, looking for a
+// main function.  If a main function is found, all other functions and all
+// global variables with initializers are marked as internal.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "internalize"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include <fstream>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumAliases  , "Number of aliases internalized");
+STATISTIC(NumFunctions, "Number of functions internalized");
+STATISTIC(NumGlobals  , "Number of global vars internalized");
+
+// APIFile - A file which contains a list of symbols that should not be marked
+// external.
+static cl::opt<std::string>
+APIFile("internalize-public-api-file", cl::value_desc("filename"),
+        cl::desc("A file containing list of symbol names to preserve"));
+
+// APIList - A list of symbols that should not be marked internal.
+static cl::list<std::string>
+APIList("internalize-public-api-list", cl::value_desc("list"),
+        cl::desc("A list of symbol names to preserve"),
+        cl::CommaSeparated);
+
+namespace {
+  class InternalizePass : public ModulePass {
+    std::set<std::string> ExternalNames;
+    /// If no api symbols were specified and a main function is defined,
+    /// assume the main function is the only API
+    bool AllButMain;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit InternalizePass(bool AllButMain = true);
+    explicit InternalizePass(const std::vector <const char *>& exportList);
+    void LoadFile(const char *Filename);
+    virtual bool runOnModule(Module &M);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addPreserved<CallGraph>();
+    }
+  };
+} // end anonymous namespace
+
+char InternalizePass::ID = 0;
+INITIALIZE_PASS(InternalizePass, "internalize",
+                "Internalize Global Symbols", false, false)
+
+InternalizePass::InternalizePass(bool AllButMain)
+  : ModulePass(ID), AllButMain(AllButMain){
+  initializeInternalizePassPass(*PassRegistry::getPassRegistry());
+  if (!APIFile.empty())           // If a filename is specified, use it.
+    LoadFile(APIFile.c_str());
+  if (!APIList.empty())           // If a list is specified, use it as well.
+    ExternalNames.insert(APIList.begin(), APIList.end());
+}
+
+InternalizePass::InternalizePass(const std::vector<const char *>&exportList)
+  : ModulePass(ID), AllButMain(false){
+  initializeInternalizePassPass(*PassRegistry::getPassRegistry());
+  for(std::vector<const char *>::const_iterator itr = exportList.begin();
+        itr != exportList.end(); itr++) {
+    ExternalNames.insert(*itr);
+  }
+}
+
+void InternalizePass::LoadFile(const char *Filename) {
+  // Load the APIFile...
+  std::ifstream In(Filename);
+  if (!In.good()) {
+    errs() << "WARNING: Internalize couldn't load file '" << Filename
+         << "'! Continuing as if it's empty.\n";
+    return; // Just continue as if the file were empty
+  }
+  while (In) {
+    std::string Symbol;
+    In >> Symbol;
+    if (!Symbol.empty())
+      ExternalNames.insert(Symbol);
+  }
+}
+
+bool InternalizePass::runOnModule(Module &M) {
+  CallGraph *CG = getAnalysisIfAvailable<CallGraph>();
+  CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : 0;
+  
+  if (ExternalNames.empty()) {
+    // Return if we're not in 'all but main' mode and have no external api
+    if (!AllButMain)
+      return false;
+    // If no list or file of symbols was specified, check to see if there is a
+    // "main" symbol defined in the module.  If so, use it, otherwise do not
+    // internalize the module, it must be a library or something.
+    //
+    Function *MainFunc = M.getFunction("main");
+    if (MainFunc == 0 || MainFunc->isDeclaration())
+      return false;  // No main found, must be a library...
+
+    // Preserve main, internalize all else.
+    ExternalNames.insert(MainFunc->getName());
+  }
+
+  bool Changed = false;
+
+  // Mark all functions not in the api as internal.
+  // FIXME: maybe use private linkage?
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    if (!I->isDeclaration() &&         // Function must be defined here
+        // Available externally is really just a "declaration with a body".
+        !I->hasAvailableExternallyLinkage() &&
+        !I->hasLocalLinkage() &&  // Can't already have internal linkage
+        !ExternalNames.count(I->getName())) {// Not marked to keep external?
+      I->setLinkage(GlobalValue::InternalLinkage);
+      // Remove a callgraph edge from the external node to this function.
+      if (ExternalNode) ExternalNode->removeOneAbstractEdgeTo((*CG)[I]);
+      Changed = true;
+      ++NumFunctions;
+      DEBUG(dbgs() << "Internalizing func " << I->getName() << "\n");
+    }
+
+  // Never internalize the llvm.used symbol.  It is used to implement
+  // attribute((used)).
+  // FIXME: Shouldn't this just filter on llvm.metadata section??
+  ExternalNames.insert("llvm.used");
+  ExternalNames.insert("llvm.compiler.used");
+
+  // Never internalize anchors used by the machine module info, else the info
+  // won't find them.  (see MachineModuleInfo.)
+  ExternalNames.insert("llvm.dbg.compile_units");
+  ExternalNames.insert("llvm.dbg.global_variables");
+  ExternalNames.insert("llvm.dbg.subprograms");
+  ExternalNames.insert("llvm.global_ctors");
+  ExternalNames.insert("llvm.global_dtors");
+  ExternalNames.insert("llvm.noinline");
+  ExternalNames.insert("llvm.global.annotations");
+
+  // Mark all global variables with initializers that are not in the api as
+  // internal as well.
+  // FIXME: maybe use private linkage?
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I)
+    if (!I->isDeclaration() && !I->hasLocalLinkage() &&
+        // Available externally is really just a "declaration with a body".
+        !I->hasAvailableExternallyLinkage() &&
+        !ExternalNames.count(I->getName())) {
+      I->setLinkage(GlobalValue::InternalLinkage);
+      Changed = true;
+      ++NumGlobals;
+      DEBUG(dbgs() << "Internalized gvar " << I->getName() << "\n");
+    }
+
+  // Mark all aliases that are not in the api as internal as well.
+  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+       I != E; ++I)
+    if (!I->isDeclaration() && !I->hasInternalLinkage() &&
+        // Available externally is really just a "declaration with a body".
+        !I->hasAvailableExternallyLinkage() &&
+        !ExternalNames.count(I->getName())) {
+      I->setLinkage(GlobalValue::InternalLinkage);
+      Changed = true;
+      ++NumAliases;
+      DEBUG(dbgs() << "Internalized alias " << I->getName() << "\n");
+    }
+
+  return Changed;
+}
+
+ModulePass *llvm::createInternalizePass(bool AllButMain) {
+  return new InternalizePass(AllButMain);
+}
+
+ModulePass *llvm::createInternalizePass(const std::vector <const char *> &el) {
+  return new InternalizePass(el);
+}
diff --git a/final/lib/Transforms/IPO/LoopExtractor.cpp b/final/lib/Transforms/IPO/LoopExtractor.cpp
new file mode 100644
index 00000000000..848944dc938
--- /dev/null
+++ b/final/lib/Transforms/IPO/LoopExtractor.cpp
@@ -0,0 +1,248 @@
+//===- LoopExtractor.cpp - Extract each loop into a new function ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A pass wrapper around the ExtractLoop() scalar transformation to extract each
+// top-level loop into its own new function. If the loop is the ONLY loop in a
+// given function, it is not touched. This is a pass most useful for debugging
+// via bugpoint.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-extract"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include <fstream>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumExtracted, "Number of loops extracted");
+
+namespace {
+  struct LoopExtractor : public LoopPass {
+    static char ID; // Pass identification, replacement for typeid
+    unsigned NumLoops;
+
+    explicit LoopExtractor(unsigned numLoops = ~0) 
+      : LoopPass(ID), NumLoops(numLoops) {
+        initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequiredID(BreakCriticalEdgesID);
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addRequired<DominatorTree>();
+    }
+  };
+}
+
+char LoopExtractor::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopExtractor, "loop-extract",
+                "Extract loops into new functions", false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(LoopExtractor, "loop-extract",
+                "Extract loops into new functions", false, false)
+
+namespace {
+  /// SingleLoopExtractor - For bugpoint.
+  struct SingleLoopExtractor : public LoopExtractor {
+    static char ID; // Pass identification, replacement for typeid
+    SingleLoopExtractor() : LoopExtractor(1) {}
+  };
+} // End anonymous namespace
+
+char SingleLoopExtractor::ID = 0;
+INITIALIZE_PASS(SingleLoopExtractor, "loop-extract-single",
+                "Extract at most one loop into a new function", false, false)
+
+// createLoopExtractorPass - This pass extracts all natural loops from the
+// program into a function if it can.
+//
+Pass *llvm::createLoopExtractorPass() { return new LoopExtractor(); }
+
+bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
+  // Only visit top-level loops.
+  if (L->getParentLoop())
+    return false;
+
+  // If LoopSimplify form is not available, stay out of trouble.
+  if (!L->isLoopSimplifyForm())
+    return false;
+
+  DominatorTree &DT = getAnalysis<DominatorTree>();
+  bool Changed = false;
+
+  // If there is more than one top-level loop in this function, extract all of
+  // the loops. Otherwise there is exactly one top-level loop; in this case if
+  // this function is more than a minimal wrapper around the loop, extract
+  // the loop.
+  bool ShouldExtractLoop = false;
+
+  // Extract the loop if the entry block doesn't branch to the loop header.
+  TerminatorInst *EntryTI =
+    L->getHeader()->getParent()->getEntryBlock().getTerminator();
+  if (!isa<BranchInst>(EntryTI) ||
+      !cast<BranchInst>(EntryTI)->isUnconditional() ||
+      EntryTI->getSuccessor(0) != L->getHeader())
+    ShouldExtractLoop = true;
+  else {
+    // Check to see if any exits from the loop are more than just return
+    // blocks.
+    SmallVector<BasicBlock*, 8> ExitBlocks;
+    L->getExitBlocks(ExitBlocks);
+    for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+      if (!isa<ReturnInst>(ExitBlocks[i]->getTerminator())) {
+        ShouldExtractLoop = true;
+        break;
+      }
+  }
+  if (ShouldExtractLoop) {
+    if (NumLoops == 0) return Changed;
+    --NumLoops;
+    if (ExtractLoop(DT, L) != 0) {
+      Changed = true;
+      // After extraction, the loop is replaced by a function call, so
+      // we shouldn't try to run any more loop passes on it.
+      LPM.deleteLoopFromQueue(L);
+    }
+    ++NumExtracted;
+  }
+
+  return Changed;
+}
+
+// createSingleLoopExtractorPass - This pass extracts one natural loop from the
+// program into a function if it can.  This is used by bugpoint.
+//
+Pass *llvm::createSingleLoopExtractorPass() {
+  return new SingleLoopExtractor();
+}
+
+
+// BlockFile - A file which contains a list of blocks that should not be
+// extracted.
+static cl::opt<std::string>
+BlockFile("extract-blocks-file", cl::value_desc("filename"),
+          cl::desc("A file containing list of basic blocks to not extract"),
+          cl::Hidden);
+
+namespace {
+  /// BlockExtractorPass - This pass is used by bugpoint to extract all blocks
+  /// from the module into their own functions except for those specified by the
+  /// BlocksToNotExtract list.
+  class BlockExtractorPass : public ModulePass {
+    void LoadFile(const char *Filename);
+
+    std::vector<BasicBlock*> BlocksToNotExtract;
+    std::vector<std::pair<std::string, std::string> > BlocksToNotExtractByName;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    BlockExtractorPass() : ModulePass(ID) {
+      if (!BlockFile.empty())
+        LoadFile(BlockFile.c_str());
+    }
+
+    bool runOnModule(Module &M);
+  };
+}
+
+char BlockExtractorPass::ID = 0;
+INITIALIZE_PASS(BlockExtractorPass, "extract-blocks",
+                "Extract Basic Blocks From Module (for bugpoint use)",
+                false, false)
+
+// createBlockExtractorPass - This pass extracts all blocks (except those
+// specified in the argument list) from the functions in the module.
+//
+ModulePass *llvm::createBlockExtractorPass()
+{
+  return new BlockExtractorPass();
+}
+
+void BlockExtractorPass::LoadFile(const char *Filename) {
+  // Load the BlockFile...
+  std::ifstream In(Filename);
+  if (!In.good()) {
+    errs() << "WARNING: BlockExtractor couldn't load file '" << Filename
+           << "'!\n";
+    return;
+  }
+  while (In) {
+    std::string FunctionName, BlockName;
+    In >> FunctionName;
+    In >> BlockName;
+    if (!BlockName.empty())
+      BlocksToNotExtractByName.push_back(
+          std::make_pair(FunctionName, BlockName));
+  }
+}
+
+bool BlockExtractorPass::runOnModule(Module &M) {
+  std::set<BasicBlock*> TranslatedBlocksToNotExtract;
+  for (unsigned i = 0, e = BlocksToNotExtract.size(); i != e; ++i) {
+    BasicBlock *BB = BlocksToNotExtract[i];
+    Function *F = BB->getParent();
+
+    // Map the corresponding function in this module.
+    Function *MF = M.getFunction(F->getName());
+    assert(MF->getFunctionType() == F->getFunctionType() && "Wrong function?");
+
+    // Figure out which index the basic block is in its function.
+    Function::iterator BBI = MF->begin();
+    std::advance(BBI, std::distance(F->begin(), Function::iterator(BB)));
+    TranslatedBlocksToNotExtract.insert(BBI);
+  }
+
+  while (!BlocksToNotExtractByName.empty()) {
+    // There's no way to find BBs by name without looking at every BB inside
+    // every Function. Fortunately, this is always empty except when used by
+    // bugpoint in which case correctness is more important than performance.
+
+    std::string &FuncName  = BlocksToNotExtractByName.back().first;
+    std::string &BlockName = BlocksToNotExtractByName.back().second;
+
+    for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
+      Function &F = *FI;
+      if (F.getName() != FuncName) continue;
+
+      for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+        BasicBlock &BB = *BI;
+        if (BB.getName() != BlockName) continue;
+
+        TranslatedBlocksToNotExtract.insert(BI);
+      }
+    }
+
+    BlocksToNotExtractByName.pop_back();
+  }
+
+  // Now that we know which blocks to not extract, figure out which ones we WANT
+  // to extract.
+  std::vector<BasicBlock*> BlocksToExtract;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      if (!TranslatedBlocksToNotExtract.count(BB))
+        BlocksToExtract.push_back(BB);
+
+  for (unsigned i = 0, e = BlocksToExtract.size(); i != e; ++i)
+    ExtractBasicBlock(BlocksToExtract[i]);
+
+  return !BlocksToExtract.empty();
+}
diff --git a/final/lib/Transforms/IPO/LowerSetJmp.cpp b/final/lib/Transforms/IPO/LowerSetJmp.cpp
new file mode 100644
index 00000000000..b545f0bb267
--- /dev/null
+++ b/final/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -0,0 +1,547 @@
+//===- LowerSetJmp.cpp - Code pertaining to lowering set/long jumps -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the lowering of setjmp and longjmp to use the
+//  LLVM invoke and unwind instructions as necessary.
+//
+//  Lowering of longjmp is fairly trivial. We replace the call with a
+//  call to the LLVM library function "__llvm_sjljeh_throw_longjmp()".
+//  This unwinds the stack for us calling all of the destructors for
+//  objects allocated on the stack.
+//
+//  At a setjmp call, the basic block is split and the setjmp removed.
+//  The calls in a function that have a setjmp are converted to invoke
+//  where the except part checks to see if it's a longjmp exception and,
+//  if so, if it's handled in the function. If it is, then it gets the
+//  value returned by the longjmp and goes to where the basic block was
+//  split. Invoke instructions are handled in a similar fashion with the
+//  original except block being executed if it isn't a longjmp except
+//  that is handled by that function.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FIXME: This pass doesn't deal with PHI statements just yet. That is,
+// we expect this to occur before SSAification is done. This would seem
+// to make sense, but in general, it might be a good idea to make this
+// pass invokable via the "opt" command at will.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowersetjmp"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(LongJmpsTransformed, "Number of longjmps transformed");
+STATISTIC(SetJmpsTransformed , "Number of setjmps transformed");
+STATISTIC(CallsTransformed   , "Number of calls invokified");
+STATISTIC(InvokesTransformed , "Number of invokes modified");
+
+namespace {
+  //===--------------------------------------------------------------------===//
+  // LowerSetJmp pass implementation.
+  class LowerSetJmp : public ModulePass, public InstVisitor<LowerSetJmp> {
+    // LLVM library functions...
+    Constant *InitSJMap;        // __llvm_sjljeh_init_setjmpmap
+    Constant *DestroySJMap;     // __llvm_sjljeh_destroy_setjmpmap
+    Constant *AddSJToMap;       // __llvm_sjljeh_add_setjmp_to_map
+    Constant *ThrowLongJmp;     // __llvm_sjljeh_throw_longjmp
+    Constant *TryCatchLJ;       // __llvm_sjljeh_try_catching_longjmp_exception
+    Constant *IsLJException;    // __llvm_sjljeh_is_longjmp_exception
+    Constant *GetLJValue;       // __llvm_sjljeh_get_longjmp_value
+
+    typedef std::pair<SwitchInst*, CallInst*> SwitchValuePair;
+
+    // Keep track of those basic blocks reachable via a depth-first search of
+    // the CFG from a setjmp call. We only need to transform those "call" and
+    // "invoke" instructions that are reachable from the setjmp call site.
+    std::set<BasicBlock*> DFSBlocks;
+
+    // The setjmp map is going to hold information about which setjmps
+    // were called (each setjmp gets its own number) and with which
+    // buffer it was called.
+    std::map<Function*, AllocaInst*>            SJMap;
+
+    // The rethrow basic block map holds the basic block to branch to if
+    // the exception isn't handled in the current function and needs to
+    // be rethrown.
+    std::map<const Function*, BasicBlock*>      RethrowBBMap;
+
+    // The preliminary basic block map holds a basic block that grabs the
+    // exception and determines if it's handled by the current function.
+    std::map<const Function*, BasicBlock*>      PrelimBBMap;
+
+    // The switch/value map holds a switch inst/call inst pair. The
+    // switch inst controls which handler (if any) gets called and the
+    // value is the value returned to that handler by the call to
+    // __llvm_sjljeh_get_longjmp_value.
+    std::map<const Function*, SwitchValuePair>  SwitchValMap;
+
+    // A map of which setjmps we've seen so far in a function.
+    std::map<const Function*, unsigned>         SetJmpIDMap;
+
+    AllocaInst*     GetSetJmpMap(Function* Func);
+    BasicBlock*     GetRethrowBB(Function* Func);
+    SwitchValuePair GetSJSwitch(Function* Func, BasicBlock* Rethrow);
+
+    void TransformLongJmpCall(CallInst* Inst);
+    void TransformSetJmpCall(CallInst* Inst);
+
+    bool IsTransformableFunction(StringRef Name);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    LowerSetJmp() : ModulePass(ID) {
+      initializeLowerSetJmpPass(*PassRegistry::getPassRegistry());
+    }
+
+    void visitCallInst(CallInst& CI);
+    void visitInvokeInst(InvokeInst& II);
+    void visitReturnInst(ReturnInst& RI);
+    void visitUnwindInst(UnwindInst& UI);
+
+    bool runOnModule(Module& M);
+    bool doInitialization(Module& M);
+  };
+} // end anonymous namespace
+
+char LowerSetJmp::ID = 0;
+INITIALIZE_PASS(LowerSetJmp, "lowersetjmp", "Lower Set Jump", false, false)
+
+// run - Run the transformation on the program. We grab the function
+// prototypes for longjmp and setjmp. If they are used in the program,
+// then we can go directly to the places they're at and transform them.
+bool LowerSetJmp::runOnModule(Module& M) {
+  bool Changed = false;
+
+  // These are what the functions are called.
+  Function* SetJmp = M.getFunction("llvm.setjmp");
+  Function* LongJmp = M.getFunction("llvm.longjmp");
+
+  // This program doesn't have longjmp and setjmp calls.
+  if ((!LongJmp || LongJmp->use_empty()) &&
+        (!SetJmp || SetJmp->use_empty())) return false;
+
+  // Initialize some values and functions we'll need to transform the
+  // setjmp/longjmp functions.
+  doInitialization(M);
+
+  if (SetJmp) {
+    for (Value::use_iterator B = SetJmp->use_begin(), E = SetJmp->use_end();
+         B != E; ++B) {
+      BasicBlock* BB = cast<Instruction>(*B)->getParent();
+      for (df_ext_iterator<BasicBlock*> I = df_ext_begin(BB, DFSBlocks),
+             E = df_ext_end(BB, DFSBlocks); I != E; ++I)
+        /* empty */;
+    }
+
+    while (!SetJmp->use_empty()) {
+      assert(isa<CallInst>(SetJmp->use_back()) &&
+             "User of setjmp intrinsic not a call?");
+      TransformSetJmpCall(cast<CallInst>(SetJmp->use_back()));
+      Changed = true;
+    }
+  }
+
+  if (LongJmp)
+    while (!LongJmp->use_empty()) {
+      assert(isa<CallInst>(LongJmp->use_back()) &&
+             "User of longjmp intrinsic not a call?");
+      TransformLongJmpCall(cast<CallInst>(LongJmp->use_back()));
+      Changed = true;
+    }
+
+  // Now go through the affected functions and convert calls and invokes
+  // to new invokes...
+  for (std::map<Function*, AllocaInst*>::iterator
+      B = SJMap.begin(), E = SJMap.end(); B != E; ++B) {
+    Function* F = B->first;
+    for (Function::iterator BB = F->begin(), BE = F->end(); BB != BE; ++BB)
+      for (BasicBlock::iterator IB = BB->begin(), IE = BB->end(); IB != IE; ) {
+        visit(*IB++);
+        if (IB != BB->end() && IB->getParent() != BB)
+          break;  // The next instruction got moved to a different block!
+      }
+  }
+
+  DFSBlocks.clear();
+  SJMap.clear();
+  RethrowBBMap.clear();
+  PrelimBBMap.clear();
+  SwitchValMap.clear();
+  SetJmpIDMap.clear();
+
+  return Changed;
+}
+
+// doInitialization - For the lower long/setjmp pass, this ensures that a
+// module contains a declaration for the intrisic functions we are going
+// to call to convert longjmp and setjmp calls.
+//
+// This function is always successful, unless it isn't.
+bool LowerSetJmp::doInitialization(Module& M)
+{
+  const Type *SBPTy = Type::getInt8PtrTy(M.getContext());
+  const Type *SBPPTy = PointerType::getUnqual(SBPTy);
+
+  // N.B. See llvm/runtime/GCCLibraries/libexception/SJLJ-Exception.h for
+  // a description of the following library functions.
+
+  // void __llvm_sjljeh_init_setjmpmap(void**)
+  InitSJMap = M.getOrInsertFunction("__llvm_sjljeh_init_setjmpmap",
+                                    Type::getVoidTy(M.getContext()),
+                                    SBPPTy, (Type *)0);
+  // void __llvm_sjljeh_destroy_setjmpmap(void**)
+  DestroySJMap = M.getOrInsertFunction("__llvm_sjljeh_destroy_setjmpmap",
+                                       Type::getVoidTy(M.getContext()),
+                                       SBPPTy, (Type *)0);
+
+  // void __llvm_sjljeh_add_setjmp_to_map(void**, void*, unsigned)
+  AddSJToMap = M.getOrInsertFunction("__llvm_sjljeh_add_setjmp_to_map",
+                                     Type::getVoidTy(M.getContext()),
+                                     SBPPTy, SBPTy,
+                                     Type::getInt32Ty(M.getContext()),
+                                     (Type *)0);
+
+  // void __llvm_sjljeh_throw_longjmp(int*, int)
+  ThrowLongJmp = M.getOrInsertFunction("__llvm_sjljeh_throw_longjmp",
+                                       Type::getVoidTy(M.getContext()), SBPTy, 
+                                       Type::getInt32Ty(M.getContext()),
+                                       (Type *)0);
+
+  // unsigned __llvm_sjljeh_try_catching_longjmp_exception(void **)
+  TryCatchLJ =
+    M.getOrInsertFunction("__llvm_sjljeh_try_catching_longjmp_exception",
+                          Type::getInt32Ty(M.getContext()), SBPPTy, (Type *)0);
+
+  // bool __llvm_sjljeh_is_longjmp_exception()
+  IsLJException = M.getOrInsertFunction("__llvm_sjljeh_is_longjmp_exception",
+                                        Type::getInt1Ty(M.getContext()),
+                                        (Type *)0);
+
+  // int __llvm_sjljeh_get_longjmp_value()
+  GetLJValue = M.getOrInsertFunction("__llvm_sjljeh_get_longjmp_value",
+                                     Type::getInt32Ty(M.getContext()),
+                                     (Type *)0);
+  return true;
+}
+
+// IsTransformableFunction - Return true if the function name isn't one
+// of the ones we don't want transformed. Currently, don't transform any
+// "llvm.{setjmp,longjmp}" functions and none of the setjmp/longjmp error
+// handling functions (beginning with __llvm_sjljeh_...they don't throw
+// exceptions).
+bool LowerSetJmp::IsTransformableFunction(StringRef Name) {
+  return !Name.startswith("__llvm_sjljeh_");
+}
+
+// TransformLongJmpCall - Transform a longjmp call into a call to the
+// internal __llvm_sjljeh_throw_longjmp function. It then takes care of
+// throwing the exception for us.
+void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
+{
+  const Type* SBPTy = Type::getInt8PtrTy(Inst->getContext());
+
+  // Create the call to "__llvm_sjljeh_throw_longjmp". This takes the
+  // same parameters as "longjmp", except that the buffer is cast to a
+  // char*. It returns "void", so it doesn't need to replace any of
+  // Inst's uses and doesn't get a name.
+  CastInst* CI = 
+    new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst);
+  Value *Args[] = { CI, Inst->getArgOperand(1) };
+  CallInst::Create(ThrowLongJmp, Args, Args + 2, "", Inst);
+
+  SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()];
+
+  // If the function has a setjmp call in it (they are transformed first)
+  // we should branch to the basic block that determines if this longjmp
+  // is applicable here. Otherwise, issue an unwind.
+  if (SVP.first)
+    BranchInst::Create(SVP.first->getParent(), Inst);
+  else
+    new UnwindInst(Inst->getContext(), Inst);
+
+  // Remove all insts after the branch/unwind inst.  Go from back to front to
+  // avoid replaceAllUsesWith if possible.
+  BasicBlock *BB = Inst->getParent();
+  Instruction *Removed;
+  do {
+    Removed = &BB->back();
+    // If the removed instructions have any users, replace them now.
+    if (!Removed->use_empty())
+      Removed->replaceAllUsesWith(UndefValue::get(Removed->getType()));
+    Removed->eraseFromParent();
+  } while (Removed != Inst);
+
+  ++LongJmpsTransformed;
+}
+
+// GetSetJmpMap - Retrieve (create and initialize, if necessary) the
+// setjmp map. This map is going to hold information about which setjmps
+// were called (each setjmp gets its own number) and with which buffer it
+// was called. There can be only one!
+AllocaInst* LowerSetJmp::GetSetJmpMap(Function* Func)
+{
+  if (SJMap[Func]) return SJMap[Func];
+
+  // Insert the setjmp map initialization before the first instruction in
+  // the function.
+  Instruction* Inst = Func->getEntryBlock().begin();
+  assert(Inst && "Couldn't find even ONE instruction in entry block!");
+
+  // Fill in the alloca and call to initialize the SJ map.
+  const Type *SBPTy =
+        Type::getInt8PtrTy(Func->getContext());
+  AllocaInst* Map = new AllocaInst(SBPTy, 0, "SJMap", Inst);
+  CallInst::Create(InitSJMap, Map, "", Inst);
+  return SJMap[Func] = Map;
+}
+
+// GetRethrowBB - Only one rethrow basic block is needed per function.
+// If this is a longjmp exception but not handled in this block, this BB
+// performs the rethrow.
+BasicBlock* LowerSetJmp::GetRethrowBB(Function* Func)
+{
+  if (RethrowBBMap[Func]) return RethrowBBMap[Func];
+
+  // The basic block we're going to jump to if we need to rethrow the
+  // exception.
+  BasicBlock* Rethrow =
+        BasicBlock::Create(Func->getContext(), "RethrowExcept", Func);
+
+  // Fill in the "Rethrow" BB with a call to rethrow the exception. This
+  // is the last instruction in the BB since at this point the runtime
+  // should exit this function and go to the next function.
+  new UnwindInst(Func->getContext(), Rethrow);
+  return RethrowBBMap[Func] = Rethrow;
+}
+
+// GetSJSwitch - Return the switch statement that controls which handler
+// (if any) gets called and the value returned to that handler.
+LowerSetJmp::SwitchValuePair LowerSetJmp::GetSJSwitch(Function* Func,
+                                                      BasicBlock* Rethrow)
+{
+  if (SwitchValMap[Func].first) return SwitchValMap[Func];
+
+  BasicBlock* LongJmpPre =
+        BasicBlock::Create(Func->getContext(), "LongJmpBlkPre", Func);
+
+  // Keep track of the preliminary basic block for some of the other
+  // transformations.
+  PrelimBBMap[Func] = LongJmpPre;
+
+  // Grab the exception.
+  CallInst* Cond = CallInst::Create(IsLJException, "IsLJExcept", LongJmpPre);
+
+  // The "decision basic block" gets the number associated with the
+  // setjmp call returning to switch on and the value returned by
+  // longjmp.
+  BasicBlock* DecisionBB =
+        BasicBlock::Create(Func->getContext(), "LJDecisionBB", Func);
+
+  BranchInst::Create(DecisionBB, Rethrow, Cond, LongJmpPre);
+
+  // Fill in the "decision" basic block.
+  CallInst* LJVal = CallInst::Create(GetLJValue, "LJVal", DecisionBB);
+  CallInst* SJNum = CallInst::Create(TryCatchLJ, GetSetJmpMap(Func), "SJNum",
+                                     DecisionBB);
+
+  SwitchInst* SI = SwitchInst::Create(SJNum, Rethrow, 0, DecisionBB);
+  return SwitchValMap[Func] = SwitchValuePair(SI, LJVal);
+}
+
+// TransformSetJmpCall - The setjmp call is a bit trickier to transform.
+// We're going to convert all setjmp calls to nops. Then all "call" and
+// "invoke" instructions in the function are converted to "invoke" where
+// the "except" branch is used when returning from a longjmp call.
+void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
+{
+  BasicBlock* ABlock = Inst->getParent();
+  Function* Func = ABlock->getParent();
+
+  // Add this setjmp to the setjmp map.
+  const Type* SBPTy =
+          Type::getInt8PtrTy(Inst->getContext());
+  CastInst* BufPtr = 
+    new BitCastInst(Inst->getArgOperand(0), SBPTy, "SBJmpBuf", Inst);
+  Value *Args[] = {
+    GetSetJmpMap(Func), BufPtr,
+    ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++)
+  };
+  CallInst::Create(AddSJToMap, Args, Args + 3, "", Inst);
+
+  // We are guaranteed that there are no values live across basic blocks
+  // (because we are "not in SSA form" yet), but there can still be values live
+  // in basic blocks.  Because of this, splitting the setjmp block can cause
+  // values above the setjmp to not dominate uses which are after the setjmp
+  // call.  For all of these occasions, we must spill the value to the stack.
+  //
+  std::set<Instruction*> InstrsAfterCall;
+
+  // The call is probably very close to the end of the basic block, for the
+  // common usage pattern of: 'if (setjmp(...))', so keep track of the
+  // instructions after the call.
+  for (BasicBlock::iterator I = ++BasicBlock::iterator(Inst), E = ABlock->end();
+       I != E; ++I)
+    InstrsAfterCall.insert(I);
+
+  for (BasicBlock::iterator II = ABlock->begin();
+       II != BasicBlock::iterator(Inst); ++II)
+    // Loop over all of the uses of instruction.  If any of them are after the
+    // call, "spill" the value to the stack.
+    for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
+         UI != E; ++UI) {
+      User *U = *UI;
+      if (cast<Instruction>(U)->getParent() != ABlock ||
+          InstrsAfterCall.count(cast<Instruction>(U))) {
+        DemoteRegToStack(*II);
+        break;
+      }
+    }
+  InstrsAfterCall.clear();
+
+  // Change the setjmp call into a branch statement. We'll remove the
+  // setjmp call in a little bit. No worries.
+  BasicBlock* SetJmpContBlock = ABlock->splitBasicBlock(Inst);
+  assert(SetJmpContBlock && "Couldn't split setjmp BB!!");
+
+  SetJmpContBlock->setName(ABlock->getName()+"SetJmpCont");
+
+  // Add the SetJmpContBlock to the set of blocks reachable from a setjmp.
+  DFSBlocks.insert(SetJmpContBlock);
+
+  // This PHI node will be in the new block created from the
+  // splitBasicBlock call.
+  PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()),
+                                 "SetJmpReturn", Inst);
+
+  // Coming from a call to setjmp, the return is 0.
+  PHI->addIncoming(Constant::getNullValue(Type::getInt32Ty(Inst->getContext())),
+                   ABlock);
+
+  // Add the case for this setjmp's number...
+  SwitchValuePair SVP = GetSJSwitch(Func, GetRethrowBB(Func));
+  SVP.first->addCase(ConstantInt::get(Type::getInt32Ty(Inst->getContext()),
+                                      SetJmpIDMap[Func] - 1),
+                     SetJmpContBlock);
+
+  // Value coming from the handling of the exception.
+  PHI->addIncoming(SVP.second, SVP.second->getParent());
+
+  // Replace all uses of this instruction with the PHI node created by
+  // the eradication of setjmp.
+  Inst->replaceAllUsesWith(PHI);
+  Inst->eraseFromParent();
+
+  ++SetJmpsTransformed;
+}
+
+// visitCallInst - This converts all LLVM call instructions into invoke
+// instructions. The except part of the invoke goes to the "LongJmpBlkPre"
+// that grabs the exception and proceeds to determine if it's a longjmp
+// exception or not.
+void LowerSetJmp::visitCallInst(CallInst& CI)
+{
+  if (CI.getCalledFunction())
+    if (!IsTransformableFunction(CI.getCalledFunction()->getName()) ||
+        CI.getCalledFunction()->isIntrinsic()) return;
+
+  BasicBlock* OldBB = CI.getParent();
+
+  // If not reachable from a setjmp call, don't transform.
+  if (!DFSBlocks.count(OldBB)) return;
+
+  BasicBlock* NewBB = OldBB->splitBasicBlock(CI);
+  assert(NewBB && "Couldn't split BB of \"call\" instruction!!");
+  DFSBlocks.insert(NewBB);
+  NewBB->setName("Call2Invoke");
+
+  Function* Func = OldBB->getParent();
+
+  // Construct the new "invoke" instruction.
+  TerminatorInst* Term = OldBB->getTerminator();
+  CallSite CS(&CI);
+  std::vector<Value*> Params(CS.arg_begin(), CS.arg_end());
+  InvokeInst* II =
+    InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func],
+                       Params.begin(), Params.end(), CI.getName(), Term);
+  II->setCallingConv(CI.getCallingConv());
+  II->setAttributes(CI.getAttributes());
+
+  // Replace the old call inst with the invoke inst and remove the call.
+  CI.replaceAllUsesWith(II);
+  CI.eraseFromParent();
+
+  // The old terminator is useless now that we have the invoke inst.
+  Term->eraseFromParent();
+  ++CallsTransformed;
+}
+
+// visitInvokeInst - Converting the "invoke" instruction is fairly
+// straight-forward. The old exception part is replaced by a query asking
+// if this is a longjmp exception. If it is, then it goes to the longjmp
+// exception blocks. Otherwise, control is passed the old exception.
+void LowerSetJmp::visitInvokeInst(InvokeInst& II)
+{
+  if (II.getCalledFunction())
+    if (!IsTransformableFunction(II.getCalledFunction()->getName()) ||
+        II.getCalledFunction()->isIntrinsic()) return;
+
+  BasicBlock* BB = II.getParent();
+
+  // If not reachable from a setjmp call, don't transform.
+  if (!DFSBlocks.count(BB)) return;
+
+  BasicBlock* ExceptBB = II.getUnwindDest();
+
+  Function* Func = BB->getParent();
+  BasicBlock* NewExceptBB = BasicBlock::Create(II.getContext(), 
+                                               "InvokeExcept", Func);
+
+  // If this is a longjmp exception, then branch to the preliminary BB of
+  // the longjmp exception handling. Otherwise, go to the old exception.
+  CallInst* IsLJExcept = CallInst::Create(IsLJException, "IsLJExcept",
+                                          NewExceptBB);
+
+  BranchInst::Create(PrelimBBMap[Func], ExceptBB, IsLJExcept, NewExceptBB);
+
+  II.setUnwindDest(NewExceptBB);
+  ++InvokesTransformed;
+}
+
+// visitReturnInst - We want to destroy the setjmp map upon exit from the
+// function.
+void LowerSetJmp::visitReturnInst(ReturnInst &RI) {
+  Function* Func = RI.getParent()->getParent();
+  CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &RI);
+}
+
+// visitUnwindInst - We want to destroy the setjmp map upon exit from the
+// function.
+void LowerSetJmp::visitUnwindInst(UnwindInst &UI) {
+  Function* Func = UI.getParent()->getParent();
+  CallInst::Create(DestroySJMap, GetSetJmpMap(Func), "", &UI);
+}
+
+ModulePass *llvm::createLowerSetJmpPass() {
+  return new LowerSetJmp();
+}
+
diff --git a/final/lib/Transforms/IPO/Makefile b/final/lib/Transforms/IPO/Makefile
new file mode 100644
index 00000000000..5c42374139a
--- /dev/null
+++ b/final/lib/Transforms/IPO/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/IPO/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMipo
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Transforms/IPO/MergeFunctions.cpp b/final/lib/Transforms/IPO/MergeFunctions.cpp
new file mode 100644
index 00000000000..cccffca6e38
--- /dev/null
+++ b/final/lib/Transforms/IPO/MergeFunctions.cpp
@@ -0,0 +1,868 @@
+//===- MergeFunctions.cpp - Merge identical functions ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for equivalent functions that are mergable and folds them.
+//
+// A hash is computed from the function, based on its type and number of
+// basic blocks.
+//
+// Once all hashes are computed, we perform an expensive equality comparison
+// on each function pair. This takes n^2/2 comparisons per bucket, so it's
+// important that the hash function be high quality. The equality comparison
+// iterates through each instruction in each basic block.
+//
+// When a match is found the functions are folded. If both functions are
+// overridable, we move the functionality into a new internal function and
+// leave two overridable thunks to it.
+//
+//===----------------------------------------------------------------------===//
+//
+// Future work:
+//
+// * virtual functions.
+//
+// Many functions have their address taken by the virtual function table for
+// the object they belong to. However, as long as it's only used for a lookup
+// and call, this is irrelevant, and we'd like to fold such functions.
+//
+// * switch from n^2 pair-wise comparisons to an n-way comparison for each
+// bucket.
+//
+// * be smarter about bitcasts.
+//
+// In order to fold functions, we will sometimes add either bitcast instructions
+// or bitcast constant expressions. Unfortunately, this can confound further
+// analysis since the two functions differ where one has a bitcast and the
+// other doesn't. We should learn to look through bitcasts.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mergefunc"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Constants.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include <vector>
+using namespace llvm;
+
+STATISTIC(NumFunctionsMerged, "Number of functions merged");
+STATISTIC(NumThunksWritten, "Number of thunks generated");
+STATISTIC(NumAliasesWritten, "Number of aliases generated");
+STATISTIC(NumDoubleWeak, "Number of new functions created");
+
+/// Creates a hash-code for the function which is the same for any two
+/// functions that will compare equal, without looking at the instructions
+/// inside the function.
+static unsigned profileFunction(const Function *F) {
+  const FunctionType *FTy = F->getFunctionType();
+
+  FoldingSetNodeID ID;
+  ID.AddInteger(F->size());
+  ID.AddInteger(F->getCallingConv());
+  ID.AddBoolean(F->hasGC());
+  ID.AddBoolean(FTy->isVarArg());
+  ID.AddInteger(FTy->getReturnType()->getTypeID());
+  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+    ID.AddInteger(FTy->getParamType(i)->getTypeID());
+  return ID.ComputeHash();
+}
+
+namespace {
+
+/// ComparableFunction - A struct that pairs together functions with a
+/// TargetData so that we can keep them together as elements in the DenseSet.
+class ComparableFunction {
+public:
+  static const ComparableFunction EmptyKey;
+  static const ComparableFunction TombstoneKey;
+  static TargetData * const LookupOnly;
+
+  ComparableFunction(Function *Func, TargetData *TD)
+    : Func(Func), Hash(profileFunction(Func)), TD(TD) {}
+
+  Function *getFunc() const { return Func; }
+  unsigned getHash() const { return Hash; }
+  TargetData *getTD() const { return TD; }
+
+  // Drops AssertingVH reference to the function. Outside of debug mode, this
+  // does nothing.
+  void release() {
+    assert(Func &&
+           "Attempted to release function twice, or release empty/tombstone!");
+    Func = NULL;
+  }
+
+private:
+  explicit ComparableFunction(unsigned Hash)
+    : Func(NULL), Hash(Hash), TD(NULL) {}
+
+  AssertingVH<Function> Func;
+  unsigned Hash;
+  TargetData *TD;
+};
+
+const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0);
+const ComparableFunction ComparableFunction::TombstoneKey =
+    ComparableFunction(1);
+TargetData * const ComparableFunction::LookupOnly = (TargetData*)(-1);
+
+}
+
+namespace llvm {
+  template <>
+  struct DenseMapInfo<ComparableFunction> {
+    static ComparableFunction getEmptyKey() {
+      return ComparableFunction::EmptyKey;
+    }
+    static ComparableFunction getTombstoneKey() {
+      return ComparableFunction::TombstoneKey;
+    }
+    static unsigned getHashValue(const ComparableFunction &CF) {
+      return CF.getHash();
+    }
+    static bool isEqual(const ComparableFunction &LHS,
+                        const ComparableFunction &RHS);
+  };
+}
+
+namespace {
+
+/// FunctionComparator - Compares two functions to determine whether or not
+/// they will generate machine code with the same behaviour. TargetData is
+/// used if available. The comparator always fails conservatively (erring on the
+/// side of claiming that two functions are different).
+class FunctionComparator {
+public:
+  FunctionComparator(const TargetData *TD, const Function *F1,
+                     const Function *F2)
+    : F1(F1), F2(F2), TD(TD) {}
+
+  /// Test whether the two functions have equivalent behaviour.
+  bool compare();
+
+private:
+  /// Test whether two basic blocks have equivalent behaviour.
+  bool compare(const BasicBlock *BB1, const BasicBlock *BB2);
+
+  /// Assign or look up previously assigned numbers for the two values, and
+  /// return whether the numbers are equal. Numbers are assigned in the order
+  /// visited.
+  bool enumerate(const Value *V1, const Value *V2);
+
+  /// Compare two Instructions for equivalence, similar to
+  /// Instruction::isSameOperationAs but with modifications to the type
+  /// comparison.
+  bool isEquivalentOperation(const Instruction *I1,
+                             const Instruction *I2) const;
+
+  /// Compare two GEPs for equivalent pointer arithmetic.
+  bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2);
+  bool isEquivalentGEP(const GetElementPtrInst *GEP1,
+                       const GetElementPtrInst *GEP2) {
+    return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2));
+  }
+
+  /// Compare two Types, treating all pointer types as equal.
+  bool isEquivalentType(const Type *Ty1, const Type *Ty2) const;
+
+  // The two functions undergoing comparison.
+  const Function *F1, *F2;
+
+  const TargetData *TD;
+
+  DenseMap<const Value *, const Value *> id_map;
+  DenseSet<const Value *> seen_values;
+};
+
+}
+
+// Any two pointers in the same address space are equivalent, intptr_t and
+// pointers are equivalent. Otherwise, standard type equivalence rules apply.
+bool FunctionComparator::isEquivalentType(const Type *Ty1,
+                                          const Type *Ty2) const {
+  if (Ty1 == Ty2)
+    return true;
+  if (Ty1->getTypeID() != Ty2->getTypeID()) {
+    if (TD) {
+      LLVMContext &Ctx = Ty1->getContext();
+      if (isa<PointerType>(Ty1) && Ty2 == TD->getIntPtrType(Ctx)) return true;
+      if (isa<PointerType>(Ty2) && Ty1 == TD->getIntPtrType(Ctx)) return true;
+    }
+    return false;
+  }
+
+  switch(Ty1->getTypeID()) {
+  default:
+    llvm_unreachable("Unknown type!");
+    // Fall through in Release mode.
+  case Type::IntegerTyID:
+  case Type::OpaqueTyID:
+  case Type::VectorTyID:
+    // Ty1 == Ty2 would have returned true earlier.
+    return false;
+
+  case Type::VoidTyID:
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+  case Type::X86_FP80TyID:
+  case Type::FP128TyID:
+  case Type::PPC_FP128TyID:
+  case Type::LabelTyID:
+  case Type::MetadataTyID:
+    return true;
+
+  case Type::PointerTyID: {
+    const PointerType *PTy1 = cast<PointerType>(Ty1);
+    const PointerType *PTy2 = cast<PointerType>(Ty2);
+    return PTy1->getAddressSpace() == PTy2->getAddressSpace();
+  }
+
+  case Type::StructTyID: {
+    const StructType *STy1 = cast<StructType>(Ty1);
+    const StructType *STy2 = cast<StructType>(Ty2);
+    if (STy1->getNumElements() != STy2->getNumElements())
+      return false;
+
+    if (STy1->isPacked() != STy2->isPacked())
+      return false;
+
+    for (unsigned i = 0, e = STy1->getNumElements(); i != e; ++i) {
+      if (!isEquivalentType(STy1->getElementType(i), STy2->getElementType(i)))
+        return false;
+    }
+    return true;
+  }
+
+  case Type::FunctionTyID: {
+    const FunctionType *FTy1 = cast<FunctionType>(Ty1);
+    const FunctionType *FTy2 = cast<FunctionType>(Ty2);
+    if (FTy1->getNumParams() != FTy2->getNumParams() ||
+        FTy1->isVarArg() != FTy2->isVarArg())
+      return false;
+
+    if (!isEquivalentType(FTy1->getReturnType(), FTy2->getReturnType()))
+      return false;
+
+    for (unsigned i = 0, e = FTy1->getNumParams(); i != e; ++i) {
+      if (!isEquivalentType(FTy1->getParamType(i), FTy2->getParamType(i)))
+        return false;
+    }
+    return true;
+  }
+
+  case Type::ArrayTyID: {
+    const ArrayType *ATy1 = cast<ArrayType>(Ty1);
+    const ArrayType *ATy2 = cast<ArrayType>(Ty2);
+    return ATy1->getNumElements() == ATy2->getNumElements() &&
+           isEquivalentType(ATy1->getElementType(), ATy2->getElementType());
+  }
+  }
+}
+
+// Determine whether the two operations are the same except that pointer-to-A
+// and pointer-to-B are equivalent. This should be kept in sync with
+// Instruction::isSameOperationAs.
+bool FunctionComparator::isEquivalentOperation(const Instruction *I1,
+                                               const Instruction *I2) const {
+  // Differences from Instruction::isSameOperationAs:
+  //  * replace type comparison with calls to isEquivalentType.
+  //  * we test for I->hasSameSubclassOptionalData (nuw/nsw/tail) at the top
+  //  * because of the above, we don't test for the tail bit on calls later on
+  if (I1->getOpcode() != I2->getOpcode() ||
+      I1->getNumOperands() != I2->getNumOperands() ||
+      !isEquivalentType(I1->getType(), I2->getType()) ||
+      !I1->hasSameSubclassOptionalData(I2))
+    return false;
+
+  // We have two instructions of identical opcode and #operands.  Check to see
+  // if all operands are the same type
+  for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i)
+    if (!isEquivalentType(I1->getOperand(i)->getType(),
+                          I2->getOperand(i)->getType()))
+      return false;
+
+  // Check special state that is a part of some instructions.
+  if (const LoadInst *LI = dyn_cast<LoadInst>(I1))
+    return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() &&
+           LI->getAlignment() == cast<LoadInst>(I2)->getAlignment();
+  if (const StoreInst *SI = dyn_cast<StoreInst>(I1))
+    return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() &&
+           SI->getAlignment() == cast<StoreInst>(I2)->getAlignment();
+  if (const CmpInst *CI = dyn_cast<CmpInst>(I1))
+    return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate();
+  if (const CallInst *CI = dyn_cast<CallInst>(I1))
+    return CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() &&
+           CI->getAttributes() == cast<CallInst>(I2)->getAttributes();
+  if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1))
+    return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() &&
+           CI->getAttributes() == cast<InvokeInst>(I2)->getAttributes();
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) {
+    if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices())
+      return false;
+    for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
+      if (IVI->idx_begin()[i] != cast<InsertValueInst>(I2)->idx_begin()[i])
+        return false;
+    return true;
+  }
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) {
+    if (EVI->getNumIndices() != cast<ExtractValueInst>(I2)->getNumIndices())
+      return false;
+    for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
+      if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I2)->idx_begin()[i])
+        return false;
+    return true;
+  }
+
+  return true;
+}
+
+// Determine whether two GEP operations perform the same underlying arithmetic.
+bool FunctionComparator::isEquivalentGEP(const GEPOperator *GEP1,
+                                         const GEPOperator *GEP2) {
+  // When we have target data, we can reduce the GEP down to the value in bytes
+  // added to the address.
+  if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) {
+    SmallVector<Value *, 8> Indices1(GEP1->idx_begin(), GEP1->idx_end());
+    SmallVector<Value *, 8> Indices2(GEP2->idx_begin(), GEP2->idx_end());
+    uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(),
+                                            Indices1.data(), Indices1.size());
+    uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(),
+                                            Indices2.data(), Indices2.size());
+    return Offset1 == Offset2;
+  }
+
+  if (GEP1->getPointerOperand()->getType() !=
+      GEP2->getPointerOperand()->getType())
+    return false;
+
+  if (GEP1->getNumOperands() != GEP2->getNumOperands())
+    return false;
+
+  for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
+    if (!enumerate(GEP1->getOperand(i), GEP2->getOperand(i)))
+      return false;
+  }
+
+  return true;
+}
+
+// Compare two values used by the two functions under pair-wise comparison. If
+// this is the first time the values are seen, they're added to the mapping so
+// that we will detect mismatches on next use.
+bool FunctionComparator::enumerate(const Value *V1, const Value *V2) {
+  // Check for function @f1 referring to itself and function @f2 referring to
+  // itself, or referring to each other, or both referring to either of them.
+  // They're all equivalent if the two functions are otherwise equivalent.
+  if (V1 == F1 && V2 == F2)
+    return true;
+  if (V1 == F2 && V2 == F1)
+    return true;
+
+  if (const Constant *C1 = dyn_cast<Constant>(V1)) {
+    if (V1 == V2) return true;
+    const Constant *C2 = dyn_cast<Constant>(V2);
+    if (!C2) return false;
+    // TODO: constant expressions with GEP or references to F1 or F2.
+    if (C1->isNullValue() && C2->isNullValue() &&
+	isEquivalentType(C1->getType(), C2->getType()))
+      return true;
+    // Try bitcasting C2 to C1's type. If the bitcast is legal and returns C1
+    // then they must have equal bit patterns.
+    return C1->getType()->canLosslesslyBitCastTo(C2->getType()) &&
+      C1 == ConstantExpr::getBitCast(const_cast<Constant*>(C2), C1->getType());
+  }
+
+  if (isa<InlineAsm>(V1) || isa<InlineAsm>(V2))
+    return V1 == V2;
+
+  // Check that V1 maps to V2. If we find a value that V1 maps to then we simply
+  // check whether it's equal to V2. When there is no mapping then we need to
+  // ensure that V2 isn't already equivalent to something else. For this
+  // purpose, we track the V2 values in a set.
+
+  const Value *&map_elem = id_map[V1];
+  if (map_elem)
+    return map_elem == V2;
+  if (!seen_values.insert(V2).second)
+    return false;
+  map_elem = V2;
+  return true;
+}
+
+// Test whether two basic blocks have equivalent behaviour.
+bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) {
+  BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end();
+  BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end();
+
+  do {
+    if (!enumerate(F1I, F2I))
+      return false;
+
+    if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) {
+      const GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(F2I);
+      if (!GEP2)
+        return false;
+
+      if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
+        return false;
+
+      if (!isEquivalentGEP(GEP1, GEP2))
+        return false;
+    } else {
+      if (!isEquivalentOperation(F1I, F2I))
+        return false;
+
+      assert(F1I->getNumOperands() == F2I->getNumOperands());
+      for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) {
+        Value *OpF1 = F1I->getOperand(i);
+        Value *OpF2 = F2I->getOperand(i);
+
+        if (!enumerate(OpF1, OpF2))
+          return false;
+
+        if (OpF1->getValueID() != OpF2->getValueID() ||
+            !isEquivalentType(OpF1->getType(), OpF2->getType()))
+          return false;
+      }
+    }
+
+    ++F1I, ++F2I;
+  } while (F1I != F1E && F2I != F2E);
+
+  return F1I == F1E && F2I == F2E;
+}
+
+// Test whether the two functions have equivalent behaviour.
+bool FunctionComparator::compare() {
+  // We need to recheck everything, but check the things that weren't included
+  // in the hash first.
+
+  if (F1->getAttributes() != F2->getAttributes())
+    return false;
+
+  if (F1->hasGC() != F2->hasGC())
+    return false;
+
+  if (F1->hasGC() && F1->getGC() != F2->getGC())
+    return false;
+
+  if (F1->hasSection() != F2->hasSection())
+    return false;
+
+  if (F1->hasSection() && F1->getSection() != F2->getSection())
+    return false;
+
+  if (F1->isVarArg() != F2->isVarArg())
+    return false;
+
+  // TODO: if it's internal and only used in direct calls, we could handle this
+  // case too.
+  if (F1->getCallingConv() != F2->getCallingConv())
+    return false;
+
+  if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType()))
+    return false;
+
+  assert(F1->arg_size() == F2->arg_size() &&
+         "Identically typed functions have different numbers of args!");
+
+  // Visit the arguments so that they get enumerated in the order they're
+  // passed in.
+  for (Function::const_arg_iterator f1i = F1->arg_begin(),
+         f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) {
+    if (!enumerate(f1i, f2i))
+      llvm_unreachable("Arguments repeat!");
+  }
+
+  // We do a CFG-ordered walk since the actual ordering of the blocks in the
+  // linked list is immaterial. Our walk starts at the entry block for both
+  // functions, then takes each block from each terminator in order. As an
+  // artifact, this also means that unreachable blocks are ignored.
+  SmallVector<const BasicBlock *, 8> F1BBs, F2BBs;
+  SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1.
+
+  F1BBs.push_back(&F1->getEntryBlock());
+  F2BBs.push_back(&F2->getEntryBlock());
+
+  VisitedBBs.insert(F1BBs[0]);
+  while (!F1BBs.empty()) {
+    const BasicBlock *F1BB = F1BBs.pop_back_val();
+    const BasicBlock *F2BB = F2BBs.pop_back_val();
+
+    if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB))
+      return false;
+
+    const TerminatorInst *F1TI = F1BB->getTerminator();
+    const TerminatorInst *F2TI = F2BB->getTerminator();
+
+    assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors());
+    for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) {
+      if (!VisitedBBs.insert(F1TI->getSuccessor(i)))
+        continue;
+
+      F1BBs.push_back(F1TI->getSuccessor(i));
+      F2BBs.push_back(F2TI->getSuccessor(i));
+    }
+  }
+  return true;
+}
+
+namespace {
+
+/// MergeFunctions finds functions which will generate identical machine code,
+/// by considering all pointer types to be equivalent. Once identified,
+/// MergeFunctions will fold them by replacing a call to one to a call to a
+/// bitcast of the other.
+///
+class MergeFunctions : public ModulePass {
+public:
+  static char ID;
+  MergeFunctions()
+    : ModulePass(ID), HasGlobalAliases(false) {
+    initializeMergeFunctionsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M);
+
+private:
+  typedef DenseSet<ComparableFunction> FnSetType;
+
+  /// A work queue of functions that may have been modified and should be
+  /// analyzed again.
+  std::vector<WeakVH> Deferred;
+
+  /// Insert a ComparableFunction into the FnSet, or merge it away if it's
+  /// equal to one that's already present.
+  bool insert(ComparableFunction &NewF);
+
+  /// Remove a Function from the FnSet and queue it up for a second sweep of
+  /// analysis.
+  void remove(Function *F);
+
+  /// Find the functions that use this Value and remove them from FnSet and
+  /// queue the functions.
+  void removeUsers(Value *V);
+
+  /// Replace all direct calls of Old with calls of New. Will bitcast New if
+  /// necessary to make types match.
+  void replaceDirectCallers(Function *Old, Function *New);
+
+  /// Merge two equivalent functions. Upon completion, G may be deleted, or may
+  /// be converted into a thunk. In either case, it should never be visited
+  /// again.
+  void mergeTwoFunctions(Function *F, Function *G);
+
+  /// Replace G with a thunk or an alias to F. Deletes G.
+  void writeThunkOrAlias(Function *F, Function *G);
+
+  /// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+  /// of G with bitcast(F). Deletes G.
+  void writeThunk(Function *F, Function *G);
+
+  /// Replace G with an alias to F. Deletes G.
+  void writeAlias(Function *F, Function *G);
+
+  /// The set of all distinct functions. Use the insert() and remove() methods
+  /// to modify it.
+  FnSetType FnSet;
+
+  /// TargetData for more accurate GEP comparisons. May be NULL.
+  TargetData *TD;
+
+  /// Whether or not the target supports global aliases.
+  bool HasGlobalAliases;
+};
+
+}  // end anonymous namespace
+
+char MergeFunctions::ID = 0;
+INITIALIZE_PASS(MergeFunctions, "mergefunc", "Merge Functions", false, false)
+
+ModulePass *llvm::createMergeFunctionsPass() {
+  return new MergeFunctions();
+}
+
+bool MergeFunctions::runOnModule(Module &M) {
+  bool Changed = false;
+  TD = getAnalysisIfAvailable<TargetData>();
+
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage())
+      Deferred.push_back(WeakVH(I));
+  }
+  FnSet.resize(Deferred.size());
+
+  do {
+    std::vector<WeakVH> Worklist;
+    Deferred.swap(Worklist);
+
+    DEBUG(dbgs() << "size of module: " << M.size() << '\n');
+    DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');
+
+    // Insert only strong functions and merge them. Strong function merging
+    // always deletes one of them.
+    for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+           E = Worklist.end(); I != E; ++I) {
+      if (!*I) continue;
+      Function *F = cast<Function>(*I);
+      if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+          !F->mayBeOverridden()) {
+        ComparableFunction CF = ComparableFunction(F, TD);
+        Changed |= insert(CF);
+      }
+    }
+
+    // Insert only weak functions and merge them. By doing these second we
+    // create thunks to the strong function when possible. When two weak
+    // functions are identical, we create a new strong function with two weak
+    // weak thunks to it which are identical but not mergable.
+    for (std::vector<WeakVH>::iterator I = Worklist.begin(),
+           E = Worklist.end(); I != E; ++I) {
+      if (!*I) continue;
+      Function *F = cast<Function>(*I);
+      if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() &&
+          F->mayBeOverridden()) {
+        ComparableFunction CF = ComparableFunction(F, TD);
+        Changed |= insert(CF);
+      }
+    }
+    DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n');
+  } while (!Deferred.empty());
+
+  FnSet.clear();
+
+  return Changed;
+}
+
+bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS,
+                                               const ComparableFunction &RHS) {
+  if (LHS.getFunc() == RHS.getFunc() &&
+      LHS.getHash() == RHS.getHash())
+    return true;
+  if (!LHS.getFunc() || !RHS.getFunc())
+    return false;
+
+  // One of these is a special "underlying pointer comparison only" object.
+  if (LHS.getTD() == ComparableFunction::LookupOnly ||
+      RHS.getTD() == ComparableFunction::LookupOnly)
+    return false;
+
+  assert(LHS.getTD() == RHS.getTD() &&
+         "Comparing functions for different targets");
+
+  return FunctionComparator(LHS.getTD(), LHS.getFunc(),
+                            RHS.getFunc()).compare();
+}
+
+// Replace direct callers of Old with New.
+void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) {
+  Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType());
+  for (Value::use_iterator UI = Old->use_begin(), UE = Old->use_end();
+       UI != UE;) {
+    Value::use_iterator TheIter = UI;
+    ++UI;
+    CallSite CS(*TheIter);
+    if (CS && CS.isCallee(TheIter)) {
+      remove(CS.getInstruction()->getParent()->getParent());
+      TheIter.getUse().set(BitcastNew);
+    }
+  }
+}
+
+// Replace G with an alias to F if possible, or else a thunk to F. Deletes G.
+void MergeFunctions::writeThunkOrAlias(Function *F, Function *G) {
+  if (HasGlobalAliases && G->hasUnnamedAddr()) {
+    if (G->hasExternalLinkage() || G->hasLocalLinkage() ||
+        G->hasWeakLinkage()) {
+      writeAlias(F, G);
+      return;
+    }
+  }
+
+  writeThunk(F, G);
+}
+
+// Replace G with a simple tail call to bitcast(F). Also replace direct uses
+// of G with bitcast(F). Deletes G.
+void MergeFunctions::writeThunk(Function *F, Function *G) {
+  if (!G->mayBeOverridden()) {
+    // Redirect direct callers of G to F.
+    replaceDirectCallers(G, F);
+  }
+
+  // If G was internal then we may have replaced all uses of G with F. If so,
+  // stop here and delete G. There's no need for a thunk.
+  if (G->hasLocalLinkage() && G->use_empty()) {
+    G->eraseFromParent();
+    return;
+  }
+
+  Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
+                                    G->getParent());
+  BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
+  IRBuilder<false> Builder(BB);
+
+  SmallVector<Value *, 16> Args;
+  unsigned i = 0;
+  const FunctionType *FFTy = F->getFunctionType();
+  for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
+       AI != AE; ++AI) {
+    Args.push_back(Builder.CreateBitCast(AI, FFTy->getParamType(i)));
+    ++i;
+  }
+
+  CallInst *CI = Builder.CreateCall(F, Args.begin(), Args.end());
+  CI->setTailCall();
+  CI->setCallingConv(F->getCallingConv());
+  if (NewG->getReturnType()->isVoidTy()) {
+    Builder.CreateRetVoid();
+  } else {
+    Builder.CreateRet(Builder.CreateBitCast(CI, NewG->getReturnType()));
+  }
+
+  NewG->copyAttributesFrom(G);
+  NewG->takeName(G);
+  removeUsers(G);
+  G->replaceAllUsesWith(NewG);
+  G->eraseFromParent();
+
+  DEBUG(dbgs() << "writeThunk: " << NewG->getName() << '\n');
+  ++NumThunksWritten;
+}
+
+// Replace G with an alias to F and delete G.
+void MergeFunctions::writeAlias(Function *F, Function *G) {
+  Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+  GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "",
+                                    BitcastF, G->getParent());
+  F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+  GA->takeName(G);
+  GA->setVisibility(G->getVisibility());
+  removeUsers(G);
+  G->replaceAllUsesWith(GA);
+  G->eraseFromParent();
+
+  DEBUG(dbgs() << "writeAlias: " << GA->getName() << '\n');
+  ++NumAliasesWritten;
+}
+
+// Merge two equivalent functions. Upon completion, Function G is deleted.
+void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) {
+  if (F->mayBeOverridden()) {
+    assert(G->mayBeOverridden());
+
+    if (HasGlobalAliases) {
+      // Make them both thunks to the same internal function.
+      Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+                                     F->getParent());
+      H->copyAttributesFrom(F);
+      H->takeName(F);
+      removeUsers(F);
+      F->replaceAllUsesWith(H);
+
+      unsigned MaxAlignment = std::max(G->getAlignment(), H->getAlignment());
+
+      writeAlias(F, G);
+      writeAlias(F, H);
+
+      F->setAlignment(MaxAlignment);
+      F->setLinkage(GlobalValue::PrivateLinkage);
+    } else {
+      // We can't merge them. Instead, pick one and update all direct callers
+      // to call it and hope that we improve the instruction cache hit rate.
+      replaceDirectCallers(G, F);
+    }
+
+    ++NumDoubleWeak;
+  } else {
+    writeThunkOrAlias(F, G);
+  }
+
+  ++NumFunctionsMerged;
+}
+
+// Insert a ComparableFunction into the FnSet, or merge it away if equal to one
+// that was already inserted.
+bool MergeFunctions::insert(ComparableFunction &NewF) {
+  std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF);
+  if (Result.second) {
+    DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n');
+    return false;
+  }
+
+  const ComparableFunction &OldF = *Result.first;
+
+  // Never thunk a strong function to a weak function.
+  assert(!OldF.getFunc()->mayBeOverridden() ||
+         NewF.getFunc()->mayBeOverridden());
+
+  DEBUG(dbgs() << "  " << OldF.getFunc()->getName() << " == "
+               << NewF.getFunc()->getName() << '\n');
+
+  Function *DeleteF = NewF.getFunc();
+  NewF.release();
+  mergeTwoFunctions(OldF.getFunc(), DeleteF);
+  return true;
+}
+
+// Remove a function from FnSet. If it was already in FnSet, add it to Deferred
+// so that we'll look at it in the next round.
+void MergeFunctions::remove(Function *F) {
+  // We need to make sure we remove F, not a function "equal" to F per the
+  // function equality comparator.
+  //
+  // The special "lookup only" ComparableFunction bypasses the expensive
+  // function comparison in favour of a pointer comparison on the underlying
+  // Function*'s.
+  ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly);
+  if (FnSet.erase(CF)) {
+    DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n");
+    Deferred.push_back(F);
+  }
+}
+
+// For each instruction used by the value, remove() the function that contains
+// the instruction. This should happen right before a call to RAUW.
+void MergeFunctions::removeUsers(Value *V) {
+  std::vector<Value *> Worklist;
+  Worklist.push_back(V);
+  while (!Worklist.empty()) {
+    Value *V = Worklist.back();
+    Worklist.pop_back();
+
+    for (Value::use_iterator UI = V->use_begin(), UE = V->use_end();
+         UI != UE; ++UI) {
+      Use &U = UI.getUse();
+      if (Instruction *I = dyn_cast<Instruction>(U.getUser())) {
+        remove(I->getParent()->getParent());
+      } else if (isa<GlobalValue>(U.getUser())) {
+        // do nothing
+      } else if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+        for (Value::use_iterator CUI = C->use_begin(), CUE = C->use_end();
+             CUI != CUE; ++CUI)
+          Worklist.push_back(*CUI);
+      }
+    }
+  }
+}
diff --git a/final/lib/Transforms/IPO/PartialInlining.cpp b/final/lib/Transforms/IPO/PartialInlining.cpp
new file mode 100644
index 00000000000..2afd0298576
--- /dev/null
+++ b/final/lib/Transforms/IPO/PartialInlining.cpp
@@ -0,0 +1,182 @@
+//===- PartialInlining.cpp - Inline parts of functions --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs partial inlining, typically by inlining an if statement
+// that surrounds the body of the function.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "partialinlining"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+using namespace llvm;
+
+STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+
+namespace {
+  struct PartialInliner : public ModulePass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
+    static char ID; // Pass identification, replacement for typeid
+    PartialInliner() : ModulePass(ID) {
+      initializePartialInlinerPass(*PassRegistry::getPassRegistry());
+    }
+    
+    bool runOnModule(Module& M);
+    
+  private:
+    Function* unswitchFunction(Function* F);
+  };
+}
+
+char PartialInliner::ID = 0;
+INITIALIZE_PASS(PartialInliner, "partial-inliner",
+                "Partial Inliner", false, false)
+
+ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); }
+
+Function* PartialInliner::unswitchFunction(Function* F) {
+  // First, verify that this function is an unswitching candidate...
+  BasicBlock* entryBlock = F->begin();
+  BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
+  if (!BR || BR->isUnconditional())
+    return 0;
+  
+  BasicBlock* returnBlock = 0;
+  BasicBlock* nonReturnBlock = 0;
+  unsigned returnCount = 0;
+  for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock);
+       SI != SE; ++SI)
+    if (isa<ReturnInst>((*SI)->getTerminator())) {
+      returnBlock = *SI;
+      returnCount++;
+    } else
+      nonReturnBlock = *SI;
+  
+  if (returnCount != 1)
+    return 0;
+  
+  // Clone the function, so that we can hack away on it.
+  ValueToValueMapTy VMap;
+  Function* duplicateFunction = CloneFunction(F, VMap,
+                                              /*ModuleLevelChanges=*/false);
+  duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
+  F->getParent()->getFunctionList().push_back(duplicateFunction);
+  BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
+  BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]);
+  BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]);
+  
+  // Go ahead and update all uses to the duplicate, so that we can just
+  // use the inliner functionality when we're done hacking.
+  F->replaceAllUsesWith(duplicateFunction);
+  
+  // Special hackery is needed with PHI nodes that have inputs from more than
+  // one extracted block.  For simplicity, just split the PHIs into a two-level
+  // sequence of PHIs, some of which will go in the extracted region, and some
+  // of which will go outside.
+  BasicBlock* preReturn = newReturnBlock;
+  newReturnBlock = newReturnBlock->splitBasicBlock(
+                                              newReturnBlock->getFirstNonPHI());
+  BasicBlock::iterator I = preReturn->begin();
+  BasicBlock::iterator Ins = newReturnBlock->begin();
+  while (I != preReturn->end()) {
+    PHINode* OldPhi = dyn_cast<PHINode>(I);
+    if (!OldPhi) break;
+    
+    PHINode* retPhi = PHINode::Create(OldPhi->getType(), "", Ins);
+    OldPhi->replaceAllUsesWith(retPhi);
+    Ins = newReturnBlock->getFirstNonPHI();
+    
+    retPhi->addIncoming(I, preReturn);
+    retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
+                        newEntryBlock);
+    OldPhi->removeIncomingValue(newEntryBlock);
+    
+    ++I;
+  }
+  newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock);
+  
+  // Gather up the blocks that we're going to extract.
+  std::vector<BasicBlock*> toExtract;
+  toExtract.push_back(newNonReturnBlock);
+  for (Function::iterator FI = duplicateFunction->begin(),
+       FE = duplicateFunction->end(); FI != FE; ++FI)
+    if (&*FI != newEntryBlock && &*FI != newReturnBlock &&
+        &*FI != newNonReturnBlock)
+      toExtract.push_back(FI);
+      
+  // The CodeExtractor needs a dominator tree.
+  DominatorTree DT;
+  DT.runOnFunction(*duplicateFunction);
+  
+  // Extract the body of the if.
+  Function* extractedFunction = ExtractCodeRegion(DT, toExtract);
+  
+  InlineFunctionInfo IFI;
+  
+  // Inline the top-level if test into all callers.
+  std::vector<User*> Users(duplicateFunction->use_begin(), 
+                           duplicateFunction->use_end());
+  for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end();
+       UI != UE; ++UI)
+    if (CallInst *CI = dyn_cast<CallInst>(*UI))
+      InlineFunction(CI, IFI);
+    else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI))
+      InlineFunction(II, IFI);
+  
+  // Ditch the duplicate, since we're done with it, and rewrite all remaining
+  // users (function pointers, etc.) back to the original function.
+  duplicateFunction->replaceAllUsesWith(F);
+  duplicateFunction->eraseFromParent();
+  
+  ++NumPartialInlined;
+  
+  return extractedFunction;
+}
+
+bool PartialInliner::runOnModule(Module& M) {
+  std::vector<Function*> worklist;
+  worklist.reserve(M.size());
+  for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI)
+    if (!FI->use_empty() && !FI->isDeclaration())
+      worklist.push_back(&*FI);
+    
+  bool changed = false;
+  while (!worklist.empty()) {
+    Function* currFunc = worklist.back();
+    worklist.pop_back();
+  
+    if (currFunc->use_empty()) continue;
+    
+    bool recursive = false;
+    for (Function::use_iterator UI = currFunc->use_begin(),
+         UE = currFunc->use_end(); UI != UE; ++UI)
+      if (Instruction* I = dyn_cast<Instruction>(*UI))
+        if (I->getParent()->getParent() == currFunc) {
+          recursive = true;
+          break;
+        }
+    if (recursive) continue;
+          
+    
+    if (Function* newFunc = unswitchFunction(currFunc)) {
+      worklist.push_back(newFunc);
+      changed = true;
+    }
+    
+  }
+  
+  return changed;
+}
diff --git a/final/lib/Transforms/IPO/PruneEH.cpp b/final/lib/Transforms/IPO/PruneEH.cpp
new file mode 100644
index 00000000000..d91c2c403aa
--- /dev/null
+++ b/final/lib/Transforms/IPO/PruneEH.cpp
@@ -0,0 +1,257 @@
+//===- PruneEH.cpp - Pass which deletes unused exception handlers ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interprocedural pass which walks the
+// call-graph, turning invoke instructions into calls, iff the callee cannot
+// throw an exception, and marking functions 'nounwind' if they cannot throw.
+// It implements this as a bottom-up traversal of the call-graph.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "prune-eh"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include <set>
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumRemoved, "Number of invokes removed");
+STATISTIC(NumUnreach, "Number of noreturn calls optimized");
+
+namespace {
+  struct PruneEH : public CallGraphSCCPass {
+    static char ID; // Pass identification, replacement for typeid
+    PruneEH() : CallGraphSCCPass(ID) {
+      initializePruneEHPass(*PassRegistry::getPassRegistry());
+    }
+
+    // runOnSCC - Analyze the SCC, performing the transformation if possible.
+    bool runOnSCC(CallGraphSCC &SCC);
+
+    bool SimplifyFunction(Function *F);
+    void DeleteBasicBlock(BasicBlock *BB);
+  };
+}
+
+char PruneEH::ID = 0;
+INITIALIZE_PASS_BEGIN(PruneEH, "prune-eh",
+                "Remove unused exception handling info", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(PruneEH, "prune-eh",
+                "Remove unused exception handling info", false, false)
+
+Pass *llvm::createPruneEHPass() { return new PruneEH(); }
+
+
+bool PruneEH::runOnSCC(CallGraphSCC &SCC) {
+  SmallPtrSet<CallGraphNode *, 8> SCCNodes;
+  CallGraph &CG = getAnalysis<CallGraph>();
+  bool MadeChange = false;
+
+  // Fill SCCNodes with the elements of the SCC.  Used for quickly
+  // looking up whether a given CallGraphNode is in this SCC.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    SCCNodes.insert(*I);
+
+  // First pass, scan all of the functions in the SCC, simplifying them
+  // according to what we know.
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    if (Function *F = (*I)->getFunction())
+      MadeChange |= SimplifyFunction(F);
+
+  // Next, check to see if any callees might throw or if there are any external
+  // functions in this SCC: if so, we cannot prune any functions in this SCC.
+  // Definitions that are weak and not declared non-throwing might be 
+  // overridden at linktime with something that throws, so assume that.
+  // If this SCC includes the unwind instruction, we KNOW it throws, so
+  // obviously the SCC might throw.
+  //
+  bool SCCMightUnwind = false, SCCMightReturn = false;
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); 
+       (!SCCMightUnwind || !SCCMightReturn) && I != E; ++I) {
+    Function *F = (*I)->getFunction();
+    if (F == 0) {
+      SCCMightUnwind = true;
+      SCCMightReturn = true;
+    } else if (F->isDeclaration() || F->mayBeOverridden()) {
+      SCCMightUnwind |= !F->doesNotThrow();
+      SCCMightReturn |= !F->doesNotReturn();
+    } else {
+      bool CheckUnwind = !SCCMightUnwind && !F->doesNotThrow();
+      bool CheckReturn = !SCCMightReturn && !F->doesNotReturn();
+
+      if (!CheckUnwind && !CheckReturn)
+        continue;
+
+      // Check to see if this function performs an unwind or calls an
+      // unwinding function.
+      for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+        if (CheckUnwind && isa<UnwindInst>(BB->getTerminator())) {
+          // Uses unwind!
+          SCCMightUnwind = true;
+        } else if (CheckReturn && isa<ReturnInst>(BB->getTerminator())) {
+          SCCMightReturn = true;
+        }
+
+        // Invoke instructions don't allow unwinding to continue, so we are
+        // only interested in call instructions.
+        if (CheckUnwind && !SCCMightUnwind)
+          for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+            if (CallInst *CI = dyn_cast<CallInst>(I)) {
+              if (CI->doesNotThrow()) {
+                // This call cannot throw.
+              } else if (Function *Callee = CI->getCalledFunction()) {
+                CallGraphNode *CalleeNode = CG[Callee];
+                // If the callee is outside our current SCC then we may
+                // throw because it might.
+                if (!SCCNodes.count(CalleeNode)) {
+                  SCCMightUnwind = true;
+                  break;
+                }
+              } else {
+                // Indirect call, it might throw.
+                SCCMightUnwind = true;
+                break;
+              }
+            }
+        if (SCCMightUnwind && SCCMightReturn) break;
+      }
+    }
+  }
+
+  // If the SCC doesn't unwind or doesn't throw, note this fact.
+  if (!SCCMightUnwind || !SCCMightReturn)
+    for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+      Attributes NewAttributes = Attribute::None;
+
+      if (!SCCMightUnwind)
+        NewAttributes |= Attribute::NoUnwind;
+      if (!SCCMightReturn)
+        NewAttributes |= Attribute::NoReturn;
+
+      Function *F = (*I)->getFunction();
+      const AttrListPtr &PAL = F->getAttributes();
+      const AttrListPtr &NPAL = PAL.addAttr(~0, NewAttributes);
+      if (PAL != NPAL) {
+        MadeChange = true;
+        F->setAttributes(NPAL);
+      }
+    }
+
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+    // Convert any invoke instructions to non-throwing functions in this node
+    // into call instructions with a branch.  This makes the exception blocks
+    // dead.
+    if (Function *F = (*I)->getFunction())
+      MadeChange |= SimplifyFunction(F);
+  }
+
+  return MadeChange;
+}
+
+
+// SimplifyFunction - Given information about callees, simplify the specified
+// function if we have invokes to non-unwinding functions or code after calls to
+// no-return functions.
+bool PruneEH::SimplifyFunction(Function *F) {
+  bool MadeChange = false;
+  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+      if (II->doesNotThrow()) {
+        SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+        // Insert a call instruction before the invoke.
+        CallInst *Call = CallInst::Create(II->getCalledValue(),
+                                          Args.begin(), Args.end(), "", II);
+        Call->takeName(II);
+        Call->setCallingConv(II->getCallingConv());
+        Call->setAttributes(II->getAttributes());
+
+        // Anything that used the value produced by the invoke instruction
+        // now uses the value produced by the call instruction.  Note that we
+        // do this even for void functions and calls with no uses so that the
+        // callgraph edge is updated.
+        II->replaceAllUsesWith(Call);
+        BasicBlock *UnwindBlock = II->getUnwindDest();
+        UnwindBlock->removePredecessor(II->getParent());
+
+        // Insert a branch to the normal destination right before the
+        // invoke.
+        BranchInst::Create(II->getNormalDest(), II);
+
+        // Finally, delete the invoke instruction!
+        BB->getInstList().pop_back();
+
+        // If the unwind block is now dead, nuke it.
+        if (pred_begin(UnwindBlock) == pred_end(UnwindBlock))
+          DeleteBasicBlock(UnwindBlock);  // Delete the new BB.
+
+        ++NumRemoved;
+        MadeChange = true;
+      }
+
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; )
+      if (CallInst *CI = dyn_cast<CallInst>(I++))
+        if (CI->doesNotReturn() && !isa<UnreachableInst>(I)) {
+          // This call calls a function that cannot return.  Insert an
+          // unreachable instruction after it and simplify the code.  Do this
+          // by splitting the BB, adding the unreachable, then deleting the
+          // new BB.
+          BasicBlock *New = BB->splitBasicBlock(I);
+
+          // Remove the uncond branch and add an unreachable.
+          BB->getInstList().pop_back();
+          new UnreachableInst(BB->getContext(), BB);
+
+          DeleteBasicBlock(New);  // Delete the new BB.
+          MadeChange = true;
+          ++NumUnreach;
+          break;
+        }
+  }
+
+  return MadeChange;
+}
+
+/// DeleteBasicBlock - remove the specified basic block from the program,
+/// updating the callgraph to reflect any now-obsolete edges due to calls that
+/// exist in the BB.
+void PruneEH::DeleteBasicBlock(BasicBlock *BB) {
+  assert(pred_begin(BB) == pred_end(BB) && "BB is not dead!");
+  CallGraph &CG = getAnalysis<CallGraph>();
+
+  CallGraphNode *CGN = CG[BB->getParent()];
+  for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; ) {
+    --I;
+    if (CallInst *CI = dyn_cast<CallInst>(I)) {
+      if (!isa<DbgInfoIntrinsic>(I))
+        CGN->removeCallEdgeFor(CI);
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+      CGN->removeCallEdgeFor(II);
+    if (!I->use_empty())
+      I->replaceAllUsesWith(UndefValue::get(I->getType()));
+  }
+
+  // Get the list of successors of this block.
+  std::vector<BasicBlock*> Succs(succ_begin(BB), succ_end(BB));
+
+  for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+    Succs[i]->removePredecessor(BB);
+
+  BB->eraseFromParent();
+}
diff --git a/final/lib/Transforms/IPO/StripDeadPrototypes.cpp b/final/lib/Transforms/IPO/StripDeadPrototypes.cpp
new file mode 100644
index 00000000000..b5f09ecccaf
--- /dev/null
+++ b/final/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -0,0 +1,73 @@
+//===-- StripDeadPrototypes.cpp - Remove unused function declarations ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass loops over all of the functions in the input module, looking for 
+// dead declarations and removes them. Dead declarations are declarations of
+// functions for which no implementation is available (i.e., declarations for
+// unused library functions).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "strip-dead-prototypes"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeadPrototypes, "Number of dead prototypes removed");
+
+namespace {
+
+/// @brief Pass to remove unused function declarations.
+class StripDeadPrototypesPass : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  StripDeadPrototypesPass() : ModulePass(ID) {
+    initializeStripDeadPrototypesPassPass(*PassRegistry::getPassRegistry());
+  }
+  virtual bool runOnModule(Module &M);
+};
+
+} // end anonymous namespace
+
+char StripDeadPrototypesPass::ID = 0;
+INITIALIZE_PASS(StripDeadPrototypesPass, "strip-dead-prototypes",
+                "Strip Unused Function Prototypes", false, false)
+
+bool StripDeadPrototypesPass::runOnModule(Module &M) {
+  bool MadeChange = false;
+  
+  // Erase dead function prototypes.
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+    Function *F = I++;
+    // Function must be a prototype and unused.
+    if (F->isDeclaration() && F->use_empty()) {
+      F->eraseFromParent();
+      ++NumDeadPrototypes;
+      MadeChange = true;
+    }
+  }
+
+  // Erase dead global var prototypes.
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ) {
+    GlobalVariable *GV = I++;
+    // Global must be a prototype and unused.
+    if (GV->isDeclaration() && GV->use_empty())
+      GV->eraseFromParent();
+  }
+  
+  // Return an indication of whether we changed anything or not.
+  return MadeChange;
+}
+
+ModulePass *llvm::createStripDeadPrototypesPass() {
+  return new StripDeadPrototypesPass();
+}
diff --git a/final/lib/Transforms/IPO/StripSymbols.cpp b/final/lib/Transforms/IPO/StripSymbols.cpp
new file mode 100644
index 00000000000..a6907651080
--- /dev/null
+++ b/final/lib/Transforms/IPO/StripSymbols.cpp
@@ -0,0 +1,408 @@
+//===- StripSymbols.cpp - Strip symbols and debug info from a module ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The StripSymbols transformation implements code stripping. Specifically, it
+// can delete:
+// 
+//   * names for virtual registers
+//   * symbols for internal globals and functions
+//   * debug information
+//
+// Note that this transformation makes code much less readable, so it should
+// only be used in situations where the 'strip' utility would be used, such as
+// reducing code size or making it harder to reverse engineer code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+namespace {
+  class StripSymbols : public ModulePass {
+    bool OnlyDebugInfo;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit StripSymbols(bool ODI = false) 
+      : ModulePass(ID), OnlyDebugInfo(ODI) {
+        initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual bool runOnModule(Module &M);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+
+  class StripNonDebugSymbols : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit StripNonDebugSymbols()
+      : ModulePass(ID) {
+        initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual bool runOnModule(Module &M);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+
+  class StripDebugDeclare : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit StripDebugDeclare()
+      : ModulePass(ID) {
+        initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual bool runOnModule(Module &M);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+
+  class StripDeadDebugInfo : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit StripDeadDebugInfo()
+      : ModulePass(ID) {
+        initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry());
+      }
+
+    virtual bool runOnModule(Module &M);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char StripSymbols::ID = 0;
+INITIALIZE_PASS(StripSymbols, "strip",
+                "Strip all symbols from a module", false, false)
+
+ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
+  return new StripSymbols(OnlyDebugInfo);
+}
+
+char StripNonDebugSymbols::ID = 0;
+INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
+                "Strip all symbols, except dbg symbols, from a module",
+                false, false)
+
+ModulePass *llvm::createStripNonDebugSymbolsPass() {
+  return new StripNonDebugSymbols();
+}
+
+char StripDebugDeclare::ID = 0;
+INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
+                "Strip all llvm.dbg.declare intrinsics", false, false)
+
+ModulePass *llvm::createStripDebugDeclarePass() {
+  return new StripDebugDeclare();
+}
+
+char StripDeadDebugInfo::ID = 0;
+INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
+                "Strip debug info for unused symbols", false, false)
+
+ModulePass *llvm::createStripDeadDebugInfoPass() {
+  return new StripDeadDebugInfo();
+}
+
+/// OnlyUsedBy - Return true if V is only used by Usr.
+static bool OnlyUsedBy(Value *V, Value *Usr) {
+  for(Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
+    User *U = *I;
+    if (U != Usr)
+      return false;
+  }
+  return true;
+}
+
+static void RemoveDeadConstant(Constant *C) {
+  assert(C->use_empty() && "Constant is not dead!");
+  SmallPtrSet<Constant*, 4> Operands;
+  for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
+    if (isa<DerivedType>(C->getOperand(i)->getType()) &&
+        OnlyUsedBy(C->getOperand(i), C)) 
+      Operands.insert(cast<Constant>(C->getOperand(i)));
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+    if (!GV->hasLocalLinkage()) return;   // Don't delete non static globals.
+    GV->eraseFromParent();
+  }
+  else if (!isa<Function>(C))
+    if (isa<CompositeType>(C->getType()))
+      C->destroyConstant();
+
+  // If the constant referenced anything, see if we can delete it as well.
+  for (SmallPtrSet<Constant*, 4>::iterator OI = Operands.begin(),
+         OE = Operands.end(); OI != OE; ++OI)
+    RemoveDeadConstant(*OI);
+}
+
+// Strip the symbol table of its names.
+//
+static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
+  for (ValueSymbolTable::iterator VI = ST.begin(), VE = ST.end(); VI != VE; ) {
+    Value *V = VI->getValue();
+    ++VI;
+    if (!isa<GlobalValue>(V) || cast<GlobalValue>(V)->hasLocalLinkage()) {
+      if (!PreserveDbgInfo || !V->getName().startswith("llvm.dbg"))
+        // Set name to "", removing from symbol table!
+        V->setName("");
+    }
+  }
+}
+
+// Strip the symbol table of its names.
+static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) {
+  for (TypeSymbolTable::iterator TI = ST.begin(), E = ST.end(); TI != E; ) {
+    if (PreserveDbgInfo && StringRef(TI->first).startswith("llvm.dbg"))
+      ++TI;
+    else
+      ST.remove(TI++);
+  }
+}
+
+/// Find values that are marked as llvm.used.
+static void findUsedValues(GlobalVariable *LLVMUsed,
+                           SmallPtrSet<const GlobalValue*, 8> &UsedValues) {
+  if (LLVMUsed == 0) return;
+  UsedValues.insert(LLVMUsed);
+  
+  ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+  if (Inits == 0) return;
+  
+  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+    if (GlobalValue *GV = 
+          dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+      UsedValues.insert(GV);
+}
+
+/// StripSymbolNames - Strip symbol names.
+static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
+
+  SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
+  findUsedValues(M.getGlobalVariable("llvm.used"), llvmUsedValues);
+  findUsedValues(M.getGlobalVariable("llvm.compiler.used"), llvmUsedValues);
+
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ++I) {
+    if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+      if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
+        I->setName("");     // Internal symbols can't participate in linkage
+  }
+  
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    if (I->hasLocalLinkage() && llvmUsedValues.count(I) == 0)
+      if (!PreserveDbgInfo || !I->getName().startswith("llvm.dbg"))
+        I->setName("");     // Internal symbols can't participate in linkage
+    StripSymtab(I->getValueSymbolTable(), PreserveDbgInfo);
+  }
+  
+  // Remove all names from types.
+  StripTypeSymtab(M.getTypeSymbolTable(), PreserveDbgInfo);
+
+  return true;
+}
+
+// StripDebugInfo - Strip debug info in the module if it exists.  
+// To do this, we remove llvm.dbg.func.start, llvm.dbg.stoppoint, and 
+// llvm.dbg.region.end calls, and any globals they point to if now dead.
+static bool StripDebugInfo(Module &M) {
+
+  bool Changed = false;
+
+  // Remove all of the calls to the debugger intrinsics, and remove them from
+  // the module.
+  if (Function *Declare = M.getFunction("llvm.dbg.declare")) {
+    while (!Declare->use_empty()) {
+      CallInst *CI = cast<CallInst>(Declare->use_back());
+      CI->eraseFromParent();
+    }
+    Declare->eraseFromParent();
+    Changed = true;
+  }
+
+  if (Function *DbgVal = M.getFunction("llvm.dbg.value")) {
+    while (!DbgVal->use_empty()) {
+      CallInst *CI = cast<CallInst>(DbgVal->use_back());
+      CI->eraseFromParent();
+    }
+    DbgVal->eraseFromParent();
+    Changed = true;
+  }
+
+  for (Module::named_metadata_iterator NMI = M.named_metadata_begin(),
+         NME = M.named_metadata_end(); NMI != NME;) {
+    NamedMDNode *NMD = NMI;
+    ++NMI;
+    if (NMD->getName().startswith("llvm.dbg.")) {
+      NMD->eraseFromParent();
+      Changed = true;
+    }
+  }
+
+  for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
+    for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
+         ++FI)
+      for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
+           ++BI) {
+        if (!BI->getDebugLoc().isUnknown()) {
+          Changed = true;
+          BI->setDebugLoc(DebugLoc());
+        }
+      }
+
+  return Changed;
+}
+
+bool StripSymbols::runOnModule(Module &M) {
+  bool Changed = false;
+  Changed |= StripDebugInfo(M);
+  if (!OnlyDebugInfo)
+    Changed |= StripSymbolNames(M, false);
+  return Changed;
+}
+
+bool StripNonDebugSymbols::runOnModule(Module &M) {
+  return StripSymbolNames(M, true);
+}
+
+bool StripDebugDeclare::runOnModule(Module &M) {
+
+  Function *Declare = M.getFunction("llvm.dbg.declare");
+  std::vector<Constant*> DeadConstants;
+
+  if (Declare) {
+    while (!Declare->use_empty()) {
+      CallInst *CI = cast<CallInst>(Declare->use_back());
+      Value *Arg1 = CI->getArgOperand(0);
+      Value *Arg2 = CI->getArgOperand(1);
+      assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
+      CI->eraseFromParent();
+      if (Arg1->use_empty()) {
+        if (Constant *C = dyn_cast<Constant>(Arg1)) 
+          DeadConstants.push_back(C);
+        else 
+          RecursivelyDeleteTriviallyDeadInstructions(Arg1);
+      }
+      if (Arg2->use_empty())
+        if (Constant *C = dyn_cast<Constant>(Arg2)) 
+          DeadConstants.push_back(C);
+    }
+    Declare->eraseFromParent();
+  }
+
+  while (!DeadConstants.empty()) {
+    Constant *C = DeadConstants.back();
+    DeadConstants.pop_back();
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+      if (GV->hasLocalLinkage())
+        RemoveDeadConstant(GV);
+    } else
+      RemoveDeadConstant(C);
+  }
+
+  return true;
+}
+
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm 
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+  char One = '\1';
+  if (LinkageName.startswith(StringRef(&One, 1)))
+    return LinkageName.substr(1);
+  return LinkageName;
+}
+
+bool StripDeadDebugInfo::runOnModule(Module &M) {
+  bool Changed = false;
+
+  // Debugging infomration is encoded in llvm IR using metadata. This is designed
+  // such a way that debug info for symbols preserved even if symbols are
+  // optimized away by the optimizer. This special pass removes debug info for 
+  // such symbols.
+
+  // llvm.dbg.gv keeps track of debug info for global variables.
+  if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
+    SmallVector<MDNode *, 8> MDs;
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      if (DIGlobalVariable(NMD->getOperand(i)).Verify())
+        MDs.push_back(NMD->getOperand(i));
+      else
+        Changed = true;
+    NMD->eraseFromParent();
+    NMD = NULL;
+
+    for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
+           E = MDs.end(); I != E; ++I) {
+      GlobalVariable *GV = DIGlobalVariable(*I).getGlobal();
+      if (GV && M.getGlobalVariable(GV->getName(), true)) {
+        if (!NMD)
+          NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+        NMD->addOperand(*I);
+      }
+      else
+        Changed = true;
+    }
+  }
+
+  // llvm.dbg.sp keeps track of debug info for subprograms.
+  if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) {
+    SmallVector<MDNode *, 8> MDs;
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      if (DISubprogram(NMD->getOperand(i)).Verify())
+        MDs.push_back(NMD->getOperand(i));
+      else
+        Changed = true;
+    NMD->eraseFromParent();
+    NMD = NULL;
+
+    for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
+           E = MDs.end(); I != E; ++I) {
+      bool FnIsLive = false;
+      if (Function *F = DISubprogram(*I).getFunction())
+        if (M.getFunction(F->getName()))
+          FnIsLive = true;
+      if (FnIsLive) {
+          if (!NMD)
+            NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+          NMD->addOperand(*I);
+      } else {
+        // Remove llvm.dbg.lv.fnname named mdnode which may have been used
+        // to hold debug info for dead function's local variables.
+        StringRef FName = DISubprogram(*I).getLinkageName();
+        if (FName.empty())
+          FName = DISubprogram(*I).getName();
+        if (NamedMDNode *LVNMD = 
+            M.getNamedMetadata(Twine("llvm.dbg.lv.", 
+                                     getRealLinkageName(FName)))) 
+          LVNMD->eraseFromParent();
+      }
+    }
+  }
+
+  return Changed;
+}
diff --git a/final/lib/Transforms/IPO/StructRetPromotion.cpp b/final/lib/Transforms/IPO/StructRetPromotion.cpp
new file mode 100644
index 00000000000..584deacaff1
--- /dev/null
+++ b/final/lib/Transforms/IPO/StructRetPromotion.cpp
@@ -0,0 +1,357 @@
+//===-- StructRetPromotion.cpp - Promote sret arguments -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass finds functions that return a struct (using a pointer to the struct
+// as the first argument of the function, marked with the 'sret' attribute) and
+// replaces them with a new function that simply returns each of the elements of
+// that struct (using multiple return values).
+//
+// This pass works under a number of conditions:
+//  1. The returned struct must not contain other structs
+//  2. The returned struct must only be used to load values from
+//  3. The placeholder struct passed in is the result of an alloca
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sretpromotion"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumRejectedSRETUses , "Number of sret rejected due to unexpected uses");
+STATISTIC(NumSRET , "Number of sret promoted");
+namespace {
+  /// SRETPromotion - This pass removes sret parameter and updates
+  /// function to use multiple return value.
+  ///
+  struct SRETPromotion : public CallGraphSCCPass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      CallGraphSCCPass::getAnalysisUsage(AU);
+    }
+
+    virtual bool runOnSCC(CallGraphSCC &SCC);
+    static char ID; // Pass identification, replacement for typeid
+    SRETPromotion() : CallGraphSCCPass(ID) {
+      initializeSRETPromotionPass(*PassRegistry::getPassRegistry());
+    }
+
+  private:
+    CallGraphNode *PromoteReturn(CallGraphNode *CGN);
+    bool isSafeToUpdateAllCallers(Function *F);
+    Function *cloneFunctionBody(Function *F, const StructType *STy);
+    CallGraphNode *updateCallSites(Function *F, Function *NF);
+  };
+}
+
+char SRETPromotion::ID = 0;
+INITIALIZE_PASS_BEGIN(SRETPromotion, "sretpromotion",
+                "Promote sret arguments to multiple ret values", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(SRETPromotion, "sretpromotion",
+                "Promote sret arguments to multiple ret values", false, false)
+
+Pass *llvm::createStructRetPromotionPass() {
+  return new SRETPromotion();
+}
+
+bool SRETPromotion::runOnSCC(CallGraphSCC &SCC) {
+  bool Changed = false;
+
+  for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I)
+    if (CallGraphNode *NewNode = PromoteReturn(*I)) {
+      SCC.ReplaceNode(*I, NewNode);
+      Changed = true;
+    }
+
+  return Changed;
+}
+
+/// PromoteReturn - This method promotes function that uses StructRet paramater 
+/// into a function that uses multiple return values.
+CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
+  Function *F = CGN->getFunction();
+
+  if (!F || F->isDeclaration() || !F->hasLocalLinkage())
+    return 0;
+
+  // Make sure that function returns struct.
+  if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn())
+    return 0;
+
+  DEBUG(dbgs() << "SretPromotion: Looking at sret function " 
+        << F->getName() << "\n");
+
+  assert(F->getReturnType()->isVoidTy() && "Invalid function return type");
+  Function::arg_iterator AI = F->arg_begin();
+  const llvm::PointerType *FArgType = dyn_cast<PointerType>(AI->getType());
+  assert(FArgType && "Invalid sret parameter type");
+  const llvm::StructType *STy = 
+    dyn_cast<StructType>(FArgType->getElementType());
+  assert(STy && "Invalid sret parameter element type");
+
+  // Check if it is ok to perform this promotion.
+  if (isSafeToUpdateAllCallers(F) == false) {
+    DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n");
+    ++NumRejectedSRETUses;
+    return 0;
+  }
+
+  DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n");
+  ++NumSRET;
+  // [1] Replace use of sret parameter 
+  AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", 
+                                         F->getEntryBlock().begin());
+  Value *NFirstArg = F->arg_begin();
+  NFirstArg->replaceAllUsesWith(TheAlloca);
+
+  // [2] Find and replace ret instructions
+  for (Function::iterator FI = F->begin(), FE = F->end();  FI != FE; ++FI) 
+    for(BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
+      Instruction *I = BI;
+      ++BI;
+      if (isa<ReturnInst>(I)) {
+        Value *NV = new LoadInst(TheAlloca, "mrv.ld", I);
+        ReturnInst *NR = ReturnInst::Create(F->getContext(), NV, I);
+        I->replaceAllUsesWith(NR);
+        I->eraseFromParent();
+      }
+    }
+
+  // [3] Create the new function body and insert it into the module.
+  Function *NF = cloneFunctionBody(F, STy);
+
+  // [4] Update all call sites to use new function
+  CallGraphNode *NF_CFN = updateCallSites(F, NF);
+
+  CallGraph &CG = getAnalysis<CallGraph>();
+  NF_CFN->stealCalledFunctionsFrom(CG[F]);
+
+  delete CG.removeFunctionFromModule(F);
+  return NF_CFN;
+}
+
+// Check if it is ok to perform this promotion.
+bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) {
+
+  if (F->use_empty())
+    // No users. OK to modify signature.
+    return true;
+
+  for (Value::use_iterator FnUseI = F->use_begin(), FnUseE = F->use_end();
+       FnUseI != FnUseE; ++FnUseI) {
+    // The function is passed in as an argument to (possibly) another function,
+    // we can't change it!
+    CallSite CS(*FnUseI);
+    Instruction *Call = CS.getInstruction();
+    // The function is used by something else than a call or invoke instruction,
+    // we can't change it!
+    if (!Call || !CS.isCallee(FnUseI))
+      return false;
+    CallSite::arg_iterator AI = CS.arg_begin();
+    Value *FirstArg = *AI;
+
+    if (!isa<AllocaInst>(FirstArg))
+      return false;
+
+    // Check FirstArg's users.
+    for (Value::use_iterator ArgI = FirstArg->use_begin(), 
+           ArgE = FirstArg->use_end(); ArgI != ArgE; ++ArgI) {
+      User *U = *ArgI;
+      // If FirstArg user is a CallInst that does not correspond to current
+      // call site then this function F is not suitable for sret promotion.
+      if (CallInst *CI = dyn_cast<CallInst>(U)) {
+        if (CI != Call)
+          return false;
+      }
+      // If FirstArg user is a GEP whose all users are not LoadInst then
+      // this function F is not suitable for sret promotion.
+      else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+        // TODO : Use dom info and insert PHINodes to collect get results
+        // from multiple call sites for this GEP.
+        if (GEP->getParent() != Call->getParent())
+          return false;
+        for (Value::use_iterator GEPI = GEP->use_begin(), GEPE = GEP->use_end();
+             GEPI != GEPE; ++GEPI) 
+          if (!isa<LoadInst>(*GEPI))
+            return false;
+      } 
+      // Any other FirstArg users make this function unsuitable for sret 
+      // promotion.
+      else
+        return false;
+    }
+  }
+
+  return true;
+}
+
+/// cloneFunctionBody - Create a new function based on F and
+/// insert it into module. Remove first argument. Use STy as
+/// the return type for new function.
+Function *SRETPromotion::cloneFunctionBody(Function *F, 
+                                           const StructType *STy) {
+
+  const FunctionType *FTy = F->getFunctionType();
+  std::vector<const Type*> Params;
+
+  // Attributes - Keep track of the parameter attributes for the arguments.
+  SmallVector<AttributeWithIndex, 8> AttributesVec;
+  const AttrListPtr &PAL = F->getAttributes();
+
+  // Add any return attributes.
+  if (Attributes attrs = PAL.getRetAttributes())
+    AttributesVec.push_back(AttributeWithIndex::get(0, attrs));
+
+  // Skip first argument.
+  Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+  ++I;
+  // 0th parameter attribute is reserved for return type.
+  // 1th parameter attribute is for first 1st sret argument.
+  unsigned ParamIndex = 2; 
+  while (I != E) {
+    Params.push_back(I->getType());
+    if (Attributes Attrs = PAL.getParamAttributes(ParamIndex))
+      AttributesVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs));
+    ++I;
+    ++ParamIndex;
+  }
+
+  // Add any fn attributes.
+  if (Attributes attrs = PAL.getFnAttributes())
+    AttributesVec.push_back(AttributeWithIndex::get(~0, attrs));
+
+
+  FunctionType *NFTy = FunctionType::get(STy, Params, FTy->isVarArg());
+  Function *NF = Function::Create(NFTy, F->getLinkage());
+  NF->takeName(F);
+  NF->copyAttributesFrom(F);
+  NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end()));
+  F->getParent()->getFunctionList().insert(F, NF);
+  NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList());
+
+  // Replace arguments
+  I = F->arg_begin();
+  E = F->arg_end();
+  Function::arg_iterator NI = NF->arg_begin();
+  ++I;
+  while (I != E) {
+    I->replaceAllUsesWith(NI);
+    NI->takeName(I);
+    ++I;
+    ++NI;
+  }
+
+  return NF;
+}
+
+/// updateCallSites - Update all sites that call F to use NF.
+CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) {
+  CallGraph &CG = getAnalysis<CallGraph>();
+  SmallVector<Value*, 16> Args;
+
+  // Attributes - Keep track of the parameter attributes for the arguments.
+  SmallVector<AttributeWithIndex, 8> ArgAttrsVec;
+
+  // Get a new callgraph node for NF.
+  CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF);
+
+  while (!F->use_empty()) {
+    CallSite CS(*F->use_begin());
+    Instruction *Call = CS.getInstruction();
+
+    const AttrListPtr &PAL = F->getAttributes();
+    // Add any return attributes.
+    if (Attributes attrs = PAL.getRetAttributes())
+      ArgAttrsVec.push_back(AttributeWithIndex::get(0, attrs));
+
+    // Copy arguments, however skip first one.
+    CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+    Value *FirstCArg = *AI;
+    ++AI;
+    // 0th parameter attribute is reserved for return type.
+    // 1th parameter attribute is for first 1st sret argument.
+    unsigned ParamIndex = 2; 
+    while (AI != AE) {
+      Args.push_back(*AI); 
+      if (Attributes Attrs = PAL.getParamAttributes(ParamIndex))
+        ArgAttrsVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs));
+      ++ParamIndex;
+      ++AI;
+    }
+
+    // Add any function attributes.
+    if (Attributes attrs = PAL.getFnAttributes())
+      ArgAttrsVec.push_back(AttributeWithIndex::get(~0, attrs));
+    
+    AttrListPtr NewPAL = AttrListPtr::get(ArgAttrsVec.begin(), ArgAttrsVec.end());
+    
+    // Build new call instruction.
+    Instruction *New;
+    if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
+      New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
+                               Args.begin(), Args.end(), "", Call);
+      cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<InvokeInst>(New)->setAttributes(NewPAL);
+    } else {
+      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+      cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
+      cast<CallInst>(New)->setAttributes(NewPAL);
+      if (cast<CallInst>(Call)->isTailCall())
+        cast<CallInst>(New)->setTailCall();
+    }
+    Args.clear();
+    ArgAttrsVec.clear();
+    New->takeName(Call);
+
+    // Update the callgraph to know that the callsite has been transformed.
+    CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()];
+    CalleeNode->removeCallEdgeFor(Call);
+    CalleeNode->addCalledFunction(New, NF_CGN);
+    
+    // Update all users of sret parameter to extract value using extractvalue.
+    for (Value::use_iterator UI = FirstCArg->use_begin(), 
+           UE = FirstCArg->use_end(); UI != UE; ) {
+      User *U2 = *UI++;
+      CallInst *C2 = dyn_cast<CallInst>(U2);
+      if (C2 && (C2 == Call))
+        continue;
+      
+      GetElementPtrInst *UGEP = cast<GetElementPtrInst>(U2);
+      ConstantInt *Idx = cast<ConstantInt>(UGEP->getOperand(2));
+      Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(),
+                                           "evi", UGEP);
+      while(!UGEP->use_empty()) {
+        // isSafeToUpdateAllCallers has checked that all GEP uses are
+        // LoadInsts
+        LoadInst *L = cast<LoadInst>(*UGEP->use_begin());
+        L->replaceAllUsesWith(GR);
+        L->eraseFromParent();
+      }
+      UGEP->eraseFromParent();
+      continue;
+    }
+    Call->eraseFromParent();
+  }
+  
+  return NF_CGN;
+}
+
diff --git a/final/lib/Transforms/InstCombine/CMakeLists.txt b/final/lib/Transforms/InstCombine/CMakeLists.txt
new file mode 100644
index 00000000000..d070ccc0d63
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/CMakeLists.txt
@@ -0,0 +1,15 @@
+add_llvm_library(LLVMInstCombine
+  InstructionCombining.cpp
+  InstCombineAddSub.cpp
+  InstCombineAndOrXor.cpp
+  InstCombineCalls.cpp
+  InstCombineCasts.cpp
+  InstCombineCompares.cpp
+  InstCombineLoadStoreAlloca.cpp
+  InstCombineMulDivRem.cpp
+  InstCombinePHI.cpp
+  InstCombineSelect.cpp
+  InstCombineShifts.cpp 
+  InstCombineSimplifyDemanded.cpp
+  InstCombineVectorOps.cpp
+  )
diff --git a/final/lib/Transforms/InstCombine/InstCombine.h b/final/lib/Transforms/InstCombine/InstCombine.h
new file mode 100644
index 00000000000..9c2969c7ab2
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/InstCombine.h
@@ -0,0 +1,356 @@
+//===- InstCombine.h - Main InstCombine pass definition -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTCOMBINE_INSTCOMBINE_H
+#define INSTCOMBINE_INSTCOMBINE_H
+
+#include "InstCombineWorklist.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/TargetFolder.h"
+
+namespace llvm {
+  class CallSite;
+  class TargetData;
+  class DbgDeclareInst;
+  class MemIntrinsic;
+  class MemSetInst;
+  
+/// SelectPatternFlavor - We can match a variety of different patterns for
+/// select operations.
+enum SelectPatternFlavor {
+  SPF_UNKNOWN = 0,
+  SPF_SMIN, SPF_UMIN,
+  SPF_SMAX, SPF_UMAX
+  //SPF_ABS - TODO.
+};
+  
+/// getComplexity:  Assign a complexity or rank value to LLVM Values...
+///   0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst
+static inline unsigned getComplexity(Value *V) {
+  if (isa<Instruction>(V)) {
+    if (BinaryOperator::isNeg(V) ||
+        BinaryOperator::isFNeg(V) ||
+        BinaryOperator::isNot(V))
+      return 3;
+    return 4;
+  }
+  if (isa<Argument>(V)) return 3;
+  return isa<Constant>(V) ? (isa<UndefValue>(V) ? 0 : 1) : 2;
+}
+
+  
+/// InstCombineIRInserter - This is an IRBuilder insertion helper that works
+/// just like the normal insertion helper, but also adds any new instructions
+/// to the instcombine worklist.
+class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter 
+    : public IRBuilderDefaultInserter<true> {
+  InstCombineWorklist &Worklist;
+public:
+  InstCombineIRInserter(InstCombineWorklist &WL) : Worklist(WL) {}
+  
+  void InsertHelper(Instruction *I, const Twine &Name,
+                    BasicBlock *BB, BasicBlock::iterator InsertPt) const {
+    IRBuilderDefaultInserter<true>::InsertHelper(I, Name, BB, InsertPt);
+    Worklist.Add(I);
+  }
+};
+  
+/// InstCombiner - The -instcombine pass.
+class LLVM_LIBRARY_VISIBILITY InstCombiner
+                             : public FunctionPass,
+                               public InstVisitor<InstCombiner, Instruction*> {
+  TargetData *TD;
+  bool MustPreserveLCSSA;
+  bool MadeIRChange;
+public:
+  /// Worklist - All of the instructions that need to be simplified.
+  InstCombineWorklist Worklist;
+
+  /// Builder - This is an IRBuilder that automatically inserts new
+  /// instructions into the worklist when they are created.
+  typedef IRBuilder<true, TargetFolder, InstCombineIRInserter> BuilderTy;
+  BuilderTy *Builder;
+      
+  static char ID; // Pass identification, replacement for typeid
+  InstCombiner() : FunctionPass(ID), TD(0), Builder(0) {
+    initializeInstCombinerPass(*PassRegistry::getPassRegistry());
+  }
+
+public:
+  virtual bool runOnFunction(Function &F);
+  
+  bool DoOneIteration(Function &F, unsigned ItNum);
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+                                 
+  TargetData *getTargetData() const { return TD; }
+
+  // Visitation implementation - Implement instruction combining for different
+  // instruction types.  The semantics are as follows:
+  // Return Value:
+  //    null        - No change was made
+  //     I          - Change was made, I is still valid, I may be dead though
+  //   otherwise    - Change was made, replace I with returned instruction
+  //
+  Instruction *visitAdd(BinaryOperator &I);
+  Instruction *visitFAdd(BinaryOperator &I);
+  Value *OptimizePointerDifference(Value *LHS, Value *RHS, const Type *Ty);
+  Instruction *visitSub(BinaryOperator &I);
+  Instruction *visitFSub(BinaryOperator &I);
+  Instruction *visitMul(BinaryOperator &I);
+  Instruction *visitFMul(BinaryOperator &I);
+  Instruction *visitURem(BinaryOperator &I);
+  Instruction *visitSRem(BinaryOperator &I);
+  Instruction *visitFRem(BinaryOperator &I);
+  bool SimplifyDivRemOfSelect(BinaryOperator &I);
+  Instruction *commonRemTransforms(BinaryOperator &I);
+  Instruction *commonIRemTransforms(BinaryOperator &I);
+  Instruction *commonDivTransforms(BinaryOperator &I);
+  Instruction *commonIDivTransforms(BinaryOperator &I);
+  Instruction *visitUDiv(BinaryOperator &I);
+  Instruction *visitSDiv(BinaryOperator &I);
+  Instruction *visitFDiv(BinaryOperator &I);
+  Value *FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+  Value *FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
+  Instruction *visitAnd(BinaryOperator &I);
+  Value *FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS);
+  Value *FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS);
+  Instruction *FoldOrWithConstants(BinaryOperator &I, Value *Op,
+                                   Value *A, Value *B, Value *C);
+  Instruction *visitOr (BinaryOperator &I);
+  Instruction *visitXor(BinaryOperator &I);
+  Instruction *visitShl(BinaryOperator &I);
+  Instruction *visitAShr(BinaryOperator &I);
+  Instruction *visitLShr(BinaryOperator &I);
+  Instruction *commonShiftTransforms(BinaryOperator &I);
+  Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI,
+                                    Constant *RHSC);
+  Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
+                                            GlobalVariable *GV, CmpInst &ICI,
+                                            ConstantInt *AndCst = 0);
+  Instruction *visitFCmpInst(FCmpInst &I);
+  Instruction *visitICmpInst(ICmpInst &I);
+  Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
+  Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+                                              Instruction *LHS,
+                                              ConstantInt *RHS);
+  Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+                              ConstantInt *DivRHS);
+  Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI,
+                              ConstantInt *DivRHS);
+  Instruction *FoldICmpAddOpCst(ICmpInst &ICI, Value *X, ConstantInt *CI,
+                                ICmpInst::Predicate Pred, Value *TheAdd);
+  Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+                           ICmpInst::Predicate Cond, Instruction &I);
+  Instruction *FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+                                   BinaryOperator &I);
+  Instruction *commonCastTransforms(CastInst &CI);
+  Instruction *commonPointerCastTransforms(CastInst &CI);
+  Instruction *visitTrunc(TruncInst &CI);
+  Instruction *visitZExt(ZExtInst &CI);
+  Instruction *visitSExt(SExtInst &CI);
+  Instruction *visitFPTrunc(FPTruncInst &CI);
+  Instruction *visitFPExt(CastInst &CI);
+  Instruction *visitFPToUI(FPToUIInst &FI);
+  Instruction *visitFPToSI(FPToSIInst &FI);
+  Instruction *visitUIToFP(CastInst &CI);
+  Instruction *visitSIToFP(CastInst &CI);
+  Instruction *visitPtrToInt(PtrToIntInst &CI);
+  Instruction *visitIntToPtr(IntToPtrInst &CI);
+  Instruction *visitBitCast(BitCastInst &CI);
+  Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI,
+                              Instruction *FI);
+  Instruction *FoldSelectIntoOp(SelectInst &SI, Value*, Value*);
+  Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
+                            Value *A, Value *B, Instruction &Outer,
+                            SelectPatternFlavor SPF2, Value *C);
+  Instruction *visitSelectInst(SelectInst &SI);
+  Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
+  Instruction *visitCallInst(CallInst &CI);
+  Instruction *visitInvokeInst(InvokeInst &II);
+
+  Instruction *SliceUpIllegalIntegerPHI(PHINode &PN);
+  Instruction *visitPHINode(PHINode &PN);
+  Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
+  Instruction *visitAllocaInst(AllocaInst &AI);
+  Instruction *visitMalloc(Instruction &FI);
+  Instruction *visitFree(CallInst &FI);
+  Instruction *visitLoadInst(LoadInst &LI);
+  Instruction *visitStoreInst(StoreInst &SI);
+  Instruction *visitBranchInst(BranchInst &BI);
+  Instruction *visitSwitchInst(SwitchInst &SI);
+  Instruction *visitInsertElementInst(InsertElementInst &IE);
+  Instruction *visitExtractElementInst(ExtractElementInst &EI);
+  Instruction *visitShuffleVectorInst(ShuffleVectorInst &SVI);
+  Instruction *visitExtractValueInst(ExtractValueInst &EV);
+
+  // visitInstruction - Specify what to return for unhandled instructions...
+  Instruction *visitInstruction(Instruction &I) { return 0; }
+
+private:
+  bool ShouldChangeType(const Type *From, const Type *To) const;
+  Value *dyn_castNegVal(Value *V) const;
+  Value *dyn_castFNegVal(Value *V) const;
+  const Type *FindElementAtOffset(const Type *Ty, int64_t Offset, 
+                                  SmallVectorImpl<Value*> &NewIndices);
+  Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
+                                 
+  /// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+  /// results in any code being generated and is interesting to optimize out. If
+  /// the cast can be eliminated by some other simple transformation, we prefer
+  /// to do the simplification first.
+  bool ShouldOptimizeCast(Instruction::CastOps opcode,const Value *V,
+                          const Type *Ty);
+
+  Instruction *visitCallSite(CallSite CS);
+  Instruction *tryOptimizeCall(CallInst *CI, const TargetData *TD);
+  bool transformConstExprCastCall(CallSite CS);
+  Instruction *transformCallThroughTrampoline(CallSite CS);
+  Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+                                 bool DoXform = true);
+  bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS);
+  DbgDeclareInst *hasOneUsePlusDeclare(Value *V);
+  Value *EmitGEPOffset(User *GEP);
+
+public:
+  // InsertNewInstBefore - insert an instruction New before instruction Old
+  // in the program.  Add the new instruction to the worklist.
+  //
+  Instruction *InsertNewInstBefore(Instruction *New, Instruction &Old) {
+    assert(New && New->getParent() == 0 &&
+           "New instruction already inserted into a basic block!");
+    BasicBlock *BB = Old.getParent();
+    BB->getInstList().insert(&Old, New);  // Insert inst
+    Worklist.Add(New);
+    return New;
+  }
+      
+  // ReplaceInstUsesWith - This method is to be used when an instruction is
+  // found to be dead, replacable with another preexisting expression.  Here
+  // we add all uses of I to the worklist, replace all uses of I with the new
+  // value, then return I, so that the inst combiner will know that I was
+  // modified.
+  //
+  Instruction *ReplaceInstUsesWith(Instruction &I, Value *V) {
+    Worklist.AddUsersToWorkList(I);   // Add all modified instrs to worklist.
+    
+    // If we are replacing the instruction with itself, this must be in a
+    // segment of unreachable code, so just clobber the instruction.
+    if (&I == V) 
+      V = UndefValue::get(I.getType());
+      
+    I.replaceAllUsesWith(V);
+    return &I;
+  }
+
+  // EraseInstFromFunction - When dealing with an instruction that has side
+  // effects or produces a void value, we can't rely on DCE to delete the
+  // instruction.  Instead, visit methods should return the value returned by
+  // this function.
+  Instruction *EraseInstFromFunction(Instruction &I) {
+    DEBUG(errs() << "IC: ERASE " << I << '\n');
+
+    assert(I.use_empty() && "Cannot erase instruction that is used!");
+    // Make sure that we reprocess all operands now that we reduced their
+    // use counts.
+    if (I.getNumOperands() < 8) {
+      for (User::op_iterator i = I.op_begin(), e = I.op_end(); i != e; ++i)
+        if (Instruction *Op = dyn_cast<Instruction>(*i))
+          Worklist.Add(Op);
+    }
+    Worklist.Remove(&I);
+    I.eraseFromParent();
+    MadeIRChange = true;
+    return 0;  // Don't do anything with FI
+  }
+      
+  void ComputeMaskedBits(Value *V, const APInt &Mask, APInt &KnownZero,
+                         APInt &KnownOne, unsigned Depth = 0) const {
+    return llvm::ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD, Depth);
+  }
+  
+  bool MaskedValueIsZero(Value *V, const APInt &Mask, 
+                         unsigned Depth = 0) const {
+    return llvm::MaskedValueIsZero(V, Mask, TD, Depth);
+  }
+  unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0) const {
+    return llvm::ComputeNumSignBits(Op, TD, Depth);
+  }
+
+private:
+
+  /// SimplifyAssociativeOrCommutative - This performs a few simplifications for
+  /// operators which are associative or commutative.
+  bool SimplifyAssociativeOrCommutative(BinaryOperator &I);
+
+  /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
+  /// which some other binary operation distributes over either by factorizing
+  /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
+  /// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
+  /// a win).  Returns the simplified value, or null if it didn't simplify.
+  Value *SimplifyUsingDistributiveLaws(BinaryOperator &I);
+
+  /// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
+  /// based on the demanded bits.
+  Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, 
+                                 APInt& KnownZero, APInt& KnownOne,
+                                 unsigned Depth);
+  bool SimplifyDemandedBits(Use &U, APInt DemandedMask, 
+                            APInt& KnownZero, APInt& KnownOne,
+                            unsigned Depth=0);
+      
+  /// SimplifyDemandedInstructionBits - Inst is an integer instruction that
+  /// SimplifyDemandedBits knows about.  See if the instruction has any
+  /// properties that allow us to simplify its operands.
+  bool SimplifyDemandedInstructionBits(Instruction &Inst);
+      
+  Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+                                    APInt& UndefElts, unsigned Depth = 0);
+    
+  // FoldOpIntoPhi - Given a binary operator, cast instruction, or select
+  // which has a PHI node as operand #0, see if we can fold the instruction
+  // into the PHI (which is only possible if all operands to the PHI are
+  // constants).
+  //
+  Instruction *FoldOpIntoPhi(Instruction &I);
+
+  // FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+  // operator and they all are only used by the PHI, PHI together their
+  // inputs, and do the operation once, to the result of the PHI.
+  Instruction *FoldPHIArgOpIntoPHI(PHINode &PN);
+  Instruction *FoldPHIArgBinOpIntoPHI(PHINode &PN);
+  Instruction *FoldPHIArgGEPIntoPHI(PHINode &PN);
+  Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
+
+  
+  Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
+                        ConstantInt *AndRHS, BinaryOperator &TheAnd);
+  
+  Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
+                            bool isSub, Instruction &I);
+  Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+                         bool isSigned, bool Inside);
+  Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
+  Instruction *MatchBSwap(BinaryOperator &I);
+  bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
+  Instruction *SimplifyMemTransfer(MemIntrinsic *MI);
+  Instruction *SimplifyMemSet(MemSetInst *MI);
+
+
+  Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
+};
+
+      
+  
+} // end namespace llvm.
+
+#endif
diff --git a/final/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/final/lib/Transforms/InstCombine/InstCombineAddSub.cpp
new file mode 100644
index 00000000000..c36a9552e7a
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -0,0 +1,697 @@
+//===- InstCombineAddSub.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for add, fadd, sub, and fsub.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// AddOne - Add one to a ConstantInt.
+static Constant *AddOne(Constant *C) {
+  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a ConstantInt.
+static Constant *SubOne(ConstantInt *C) {
+  return ConstantInt::get(C->getContext(), C->getValue()-1);
+}
+
+
+// dyn_castFoldableMul - If this value is a multiply that can be folded into
+// other computations (because it has a constant operand), return the
+// non-constant operand of the multiply, and set CST to point to the multiplier.
+// Otherwise, return null.
+//
+static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) {
+  if (!V->hasOneUse() || !V->getType()->isIntegerTy())
+    return 0;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return 0;
+  
+  if (I->getOpcode() == Instruction::Mul)
+    if ((CST = dyn_cast<ConstantInt>(I->getOperand(1))))
+      return I->getOperand(0);
+  if (I->getOpcode() == Instruction::Shl)
+    if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) {
+      // The multiplier is really 1 << CST.
+      uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
+      uint32_t CSTVal = CST->getLimitedValue(BitWidth);
+      CST = ConstantInt::get(V->getType()->getContext(),
+                             APInt(BitWidth, 1).shl(CSTVal));
+      return I->getOperand(0);
+    }
+  return 0;
+}
+
+
+/// WillNotOverflowSignedAdd - Return true if we can prove that:
+///    (sext (add LHS, RHS))  === (add (sext LHS), (sext RHS))
+/// This basically requires proving that the add in the original type would not
+/// overflow to change the sign bit or have a carry out.
+bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) {
+  // There are different heuristics we can use for this.  Here are some simple
+  // ones.
+  
+  // Add has the property that adding any two 2's complement numbers can only 
+  // have one carry bit which can change a sign.  As such, if LHS and RHS each
+  // have at least two sign bits, we know that the addition of the two values
+  // will sign extend fine.
+  if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1)
+    return true;
+  
+  
+  // If one of the operands only has one non-zero bit, and if the other operand
+  // has a known-zero bit in a more significant place than it (not including the
+  // sign bit) the ripple may go up to and fill the zero, but won't change the
+  // sign.  For example, (X & ~4) + 1.
+  
+  // TODO: Implement.
+  
+  return false;
+}
+
+Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
+  bool Changed = SimplifyAssociativeOrCommutative(I);
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+  if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
+                                 I.hasNoUnsignedWrap(), TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // (A*B)+(A*C) -> A*(B+C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
+
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+    // X + (signbit) --> X ^ signbit
+    const APInt &Val = CI->getValue();
+    if (Val.isSignBit())
+      return BinaryOperator::CreateXor(LHS, RHS);
+    
+    // See if SimplifyDemandedBits can simplify this.  This handles stuff like
+    // (X & 254)+1 -> (X&254)|1
+    if (SimplifyDemandedInstructionBits(I))
+      return &I;
+
+    // zext(bool) + C -> bool ? C + 1 : C
+    if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS))
+      if (ZI->getSrcTy()->isIntegerTy(1))
+        return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI);
+    
+    Value *XorLHS = 0; ConstantInt *XorRHS = 0;
+    if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
+      uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
+      const APInt &RHSVal = CI->getValue();
+      unsigned ExtendAmt = 0;
+      // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext.
+      // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
+      if (XorRHS->getValue() == -RHSVal) {
+        if (RHSVal.isPowerOf2())
+          ExtendAmt = TySizeBits - RHSVal.logBase2() - 1;
+        else if (XorRHS->getValue().isPowerOf2())
+          ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1;
+      }
+      
+      if (ExtendAmt) {
+        APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt);
+        if (!MaskedValueIsZero(XorLHS, Mask))
+          ExtendAmt = 0;
+      }
+      
+      if (ExtendAmt) {
+        Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt);
+        Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext");
+        return BinaryOperator::CreateAShr(NewShl, ShAmt);
+      }
+    }
+  }
+
+  if (isa<Constant>(RHS) && isa<PHINode>(LHS))
+    if (Instruction *NV = FoldOpIntoPhi(I))
+      return NV;
+
+  if (I.getType()->isIntegerTy(1))
+    return BinaryOperator::CreateXor(LHS, RHS);
+
+  // X + X --> X << 1
+  if (LHS == RHS) {
+    BinaryOperator *New =
+      BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1));
+    New->setHasNoSignedWrap(I.hasNoSignedWrap());
+    New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+    return New;
+  }
+
+  // -A + B  -->  B - A
+  // -A + -B  -->  -(A + B)
+  if (Value *LHSV = dyn_castNegVal(LHS)) {
+    if (Value *RHSV = dyn_castNegVal(RHS)) {
+      Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum");
+      return BinaryOperator::CreateNeg(NewAdd);
+    }
+    
+    return BinaryOperator::CreateSub(RHS, LHSV);
+  }
+
+  // A + -B  -->  A - B
+  if (!isa<Constant>(RHS))
+    if (Value *V = dyn_castNegVal(RHS))
+      return BinaryOperator::CreateSub(LHS, V);
+
+
+  ConstantInt *C2;
+  if (Value *X = dyn_castFoldableMul(LHS, C2)) {
+    if (X == RHS)   // X*C + X --> X * (C+1)
+      return BinaryOperator::CreateMul(RHS, AddOne(C2));
+
+    // X*C1 + X*C2 --> X * (C1+C2)
+    ConstantInt *C1;
+    if (X == dyn_castFoldableMul(RHS, C1))
+      return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
+  }
+
+  // X + X*C --> X * (C+1)
+  if (dyn_castFoldableMul(RHS, C2) == LHS)
+    return BinaryOperator::CreateMul(LHS, AddOne(C2));
+
+  // A+B --> A|B iff A and B have no bits set in common.
+  if (const IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
+    APInt Mask = APInt::getAllOnesValue(IT->getBitWidth());
+    APInt LHSKnownOne(IT->getBitWidth(), 0);
+    APInt LHSKnownZero(IT->getBitWidth(), 0);
+    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+    if (LHSKnownZero != 0) {
+      APInt RHSKnownOne(IT->getBitWidth(), 0);
+      APInt RHSKnownZero(IT->getBitWidth(), 0);
+      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+      
+      // No bits in common -> bitwise or.
+      if ((LHSKnownZero|RHSKnownZero).isAllOnesValue())
+        return BinaryOperator::CreateOr(LHS, RHS);
+    }
+  }
+
+  // W*X + Y*Z --> W * (X+Z)  iff W == Y
+  {
+    Value *W, *X, *Y, *Z;
+    if (match(LHS, m_Mul(m_Value(W), m_Value(X))) &&
+        match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) {
+      if (W != Y) {
+        if (W == Z) {
+          std::swap(Y, Z);
+        } else if (Y == X) {
+          std::swap(W, X);
+        } else if (X == Z) {
+          std::swap(Y, Z);
+          std::swap(W, X);
+        }
+      }
+
+      if (W == Y) {
+        Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName());
+        return BinaryOperator::CreateMul(W, NewAdd);
+      }
+    }
+  }
+
+  if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) {
+    Value *X = 0;
+    if (match(LHS, m_Not(m_Value(X))))    // ~X + C --> (C-1) - X
+      return BinaryOperator::CreateSub(SubOne(CRHS), X);
+
+    // (X & FF00) + xx00  -> (X+xx00) & FF00
+    if (LHS->hasOneUse() &&
+        match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) &&
+        CRHS->getValue() == (CRHS->getValue() & C2->getValue())) {
+      // See if all bits from the first bit set in the Add RHS up are included
+      // in the mask.  First, get the rightmost bit.
+      const APInt &AddRHSV = CRHS->getValue();
+      
+      // Form a mask of all bits from the lowest bit added through the top.
+      APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1));
+
+      // See if the and mask includes all of these bits.
+      APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue());
+
+      if (AddRHSHighBits == AddRHSHighBitsAnd) {
+        // Okay, the xform is safe.  Insert the new add pronto.
+        Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName());
+        return BinaryOperator::CreateAnd(NewAdd, C2);
+      }
+    }
+
+    // Try to fold constant add into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(LHS))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+  }
+
+  // add (select X 0 (sub n A)) A  -->  select X A n
+  {
+    SelectInst *SI = dyn_cast<SelectInst>(LHS);
+    Value *A = RHS;
+    if (!SI) {
+      SI = dyn_cast<SelectInst>(RHS);
+      A = LHS;
+    }
+    if (SI && SI->hasOneUse()) {
+      Value *TV = SI->getTrueValue();
+      Value *FV = SI->getFalseValue();
+      Value *N;
+
+      // Can we fold the add into the argument of the select?
+      // We check both true and false select arguments for a matching subtract.
+      if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A))))
+        // Fold the add into the true select value.
+        return SelectInst::Create(SI->getCondition(), N, A);
+      
+      if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A))))
+        // Fold the add into the false select value.
+        return SelectInst::Create(SI->getCondition(), A, N);
+    }
+  }
+
+  // Check for (add (sext x), y), see if we can merge this into an
+  // integer add followed by a sext.
+  if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
+    // (add (sext x), cst) --> (sext (add x, cst'))
+    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
+      Constant *CI = 
+        ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
+      if (LHSConv->hasOneUse() &&
+          ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+        // Insert the new, smaller add.
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+                                              CI, "addconv");
+        return new SExtInst(NewAdd, I.getType());
+      }
+    }
+    
+    // (add (sext x), (sext y)) --> (sext (add int x, y))
+    if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) {
+      // Only do this if x/y have the same type, if at last one of them has a
+      // single use (so we don't increase the number of sexts), and if the
+      // integer add will not overflow.
+      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+                                   RHSConv->getOperand(0))) {
+        // Insert the new integer add.
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+                                             RHSConv->getOperand(0), "addconv");
+        return new SExtInst(NewAdd, I.getType());
+      }
+    }
+  }
+
+  return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
+  bool Changed = SimplifyAssociativeOrCommutative(I);
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+
+  if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+    // X + 0 --> X
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+      if (CFP->isExactlyValue(ConstantFP::getNegativeZero
+                              (I.getType())->getValueAPF()))
+        return ReplaceInstUsesWith(I, LHS);
+    }
+
+    if (isa<PHINode>(LHS))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  // -A + B  -->  B - A
+  // -A + -B  -->  -(A + B)
+  if (Value *LHSV = dyn_castFNegVal(LHS))
+    return BinaryOperator::CreateFSub(RHS, LHSV);
+
+  // A + -B  -->  A - B
+  if (!isa<Constant>(RHS))
+    if (Value *V = dyn_castFNegVal(RHS))
+      return BinaryOperator::CreateFSub(LHS, V);
+
+  // Check for X+0.0.  Simplify it to X if we know X is not -0.0.
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS))
+    if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
+      return ReplaceInstUsesWith(I, LHS);
+
+  // Check for (fadd double (sitofp x), y), see if we can merge this into an
+  // integer add followed by a promotion.
+  if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
+    // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
+    // ... if the constant fits in the integer value.  This is useful for things
+    // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
+    // requires a constant pool load, and generally allows the add to be better
+    // instcombined.
+    if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) {
+      Constant *CI = 
+      ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType());
+      if (LHSConv->hasOneUse() &&
+          ConstantExpr::getSIToFP(CI, I.getType()) == CFP &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) {
+        // Insert the new integer add.
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
+                                              CI, "addconv");
+        return new SIToFPInst(NewAdd, I.getType());
+      }
+    }
+    
+    // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
+    if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
+      // Only do this if x/y have the same type, if at last one of them has a
+      // single use (so we don't increase the number of int->fp conversions),
+      // and if the integer add will not overflow.
+      if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&&
+          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+          WillNotOverflowSignedAdd(LHSConv->getOperand(0),
+                                   RHSConv->getOperand(0))) {
+        // Insert the new integer add.
+        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 
+                                              RHSConv->getOperand(0),"addconv");
+        return new SIToFPInst(NewAdd, I.getType());
+      }
+    }
+  }
+  
+  return Changed ? &I : 0;
+}
+
+
+/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
+/// code necessary to compute the offset from the base pointer (without adding
+/// in the base pointer).  Return the result as a signed integer of intptr size.
+Value *InstCombiner::EmitGEPOffset(User *GEP) {
+  TargetData &TD = *getTargetData();
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  const Type *IntPtrTy = TD.getIntPtrType(GEP->getContext());
+  Value *Result = Constant::getNullValue(IntPtrTy);
+
+  // If the GEP is inbounds, we know that none of the addressing operations will
+  // overflow in an unsigned sense.
+  bool isInBounds = cast<GEPOperator>(GEP)->isInBounds();
+  
+  // Build a mask for high order bits.
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+
+  for (User::op_iterator i = GEP->op_begin() + 1, e = GEP->op_end(); i != e;
+       ++i, ++GTI) {
+    Value *Op = *i;
+    uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()) & PtrSizeMask;
+    if (ConstantInt *OpC = dyn_cast<ConstantInt>(Op)) {
+      if (OpC->isZero()) continue;
+      
+      // Handle a struct index, which adds its field offset to the pointer.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        Size = TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+        
+        if (Size)
+          Result = Builder->CreateAdd(Result, ConstantInt::get(IntPtrTy, Size),
+                                      GEP->getName()+".offs");
+        continue;
+      }
+      
+      Constant *Scale = ConstantInt::get(IntPtrTy, Size);
+      Constant *OC =
+              ConstantExpr::getIntegerCast(OpC, IntPtrTy, true /*SExt*/);
+      Scale = ConstantExpr::getMul(OC, Scale, isInBounds/*NUW*/);
+      // Emit an add instruction.
+      Result = Builder->CreateAdd(Result, Scale, GEP->getName()+".offs");
+      continue;
+    }
+    // Convert to correct type.
+    if (Op->getType() != IntPtrTy)
+      Op = Builder->CreateIntCast(Op, IntPtrTy, true, Op->getName()+".c");
+    if (Size != 1) {
+      // We'll let instcombine(mul) convert this to a shl if possible.
+      Op = Builder->CreateMul(Op, ConstantInt::get(IntPtrTy, Size),
+                              GEP->getName()+".idx", isInBounds /*NUW*/);
+    }
+
+    // Emit an add instruction.
+    Result = Builder->CreateAdd(Op, Result, GEP->getName()+".offs");
+  }
+  return Result;
+}
+
+
+
+
+/// Optimize pointer differences into the same array into a size.  Consider:
+///  &A[10] - &A[0]: we should compile this to "10".  LHS/RHS are the pointer
+/// operands to the ptrtoint instructions for the LHS/RHS of the subtract.
+///
+Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS,
+                                               const Type *Ty) {
+  assert(TD && "Must have target data info for this");
+  
+  // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize
+  // this.
+  bool Swapped = false;
+  GetElementPtrInst *GEP = 0;
+  ConstantExpr *CstGEP = 0;
+  
+  // TODO: Could also optimize &A[i] - &A[j] -> "i-j", and "&A.foo[i] - &A.foo".
+  // For now we require one side to be the base pointer "A" or a constant
+  // expression derived from it.
+  if (GetElementPtrInst *LHSGEP = dyn_cast<GetElementPtrInst>(LHS)) {
+    // (gep X, ...) - X
+    if (LHSGEP->getOperand(0) == RHS) {
+      GEP = LHSGEP;
+      Swapped = false;
+    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(RHS)) {
+      // (gep X, ...) - (ce_gep X, ...)
+      if (CE->getOpcode() == Instruction::GetElementPtr &&
+          LHSGEP->getOperand(0) == CE->getOperand(0)) {
+        CstGEP = CE;
+        GEP = LHSGEP;
+        Swapped = false;
+      }
+    }
+  }
+  
+  if (GetElementPtrInst *RHSGEP = dyn_cast<GetElementPtrInst>(RHS)) {
+    // X - (gep X, ...)
+    if (RHSGEP->getOperand(0) == LHS) {
+      GEP = RHSGEP;
+      Swapped = true;
+    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(LHS)) {
+      // (ce_gep X, ...) - (gep X, ...)
+      if (CE->getOpcode() == Instruction::GetElementPtr &&
+          RHSGEP->getOperand(0) == CE->getOperand(0)) {
+        CstGEP = CE;
+        GEP = RHSGEP;
+        Swapped = true;
+      }
+    }
+  }
+  
+  if (GEP == 0)
+    return 0;
+  
+  // Emit the offset of the GEP and an intptr_t.
+  Value *Result = EmitGEPOffset(GEP);
+  
+  // If we had a constant expression GEP on the other side offsetting the
+  // pointer, subtract it from the offset we have.
+  if (CstGEP) {
+    Value *CstOffset = EmitGEPOffset(CstGEP);
+    Result = Builder->CreateSub(Result, CstOffset);
+  }
+  
+
+  // If we have p - gep(p, ...)  then we have to negate the result.
+  if (Swapped)
+    Result = Builder->CreateNeg(Result, "diff.neg");
+
+  return Builder->CreateIntCast(Result, Ty, true);
+}
+
+
+Instruction *InstCombiner::visitSub(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
+                                 I.hasNoUnsignedWrap(), TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // (A*B)-(A*C) -> A*(B-C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
+
+  // If this is a 'B = x-(-A)', change to B = x+A.  This preserves NSW/NUW.
+  if (Value *V = dyn_castNegVal(Op1)) {
+    BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V);
+    Res->setHasNoSignedWrap(I.hasNoSignedWrap());
+    Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+    return Res;
+  }
+
+  if (I.getType()->isIntegerTy(1))
+    return BinaryOperator::CreateXor(Op0, Op1);
+
+  // Replace (-1 - A) with (~A).
+  if (match(Op0, m_AllOnes()))
+    return BinaryOperator::CreateNot(Op1);
+  
+  if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
+    // C - ~X == X + (1+C)
+    Value *X = 0;
+    if (match(Op1, m_Not(m_Value(X))))
+      return BinaryOperator::CreateAdd(X, AddOne(C));
+
+    // -(X >>u 31) -> (X >>s 31)
+    // -(X >>s 31) -> (X >>u 31)
+    if (C->isZero()) {
+      Value *X; ConstantInt *CI;
+      if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) &&
+          // Verify we are shifting out everything but the sign bit.
+          CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+        return BinaryOperator::CreateAShr(X, CI);
+
+      if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) &&
+          // Verify we are shifting out everything but the sign bit.
+          CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1)
+        return BinaryOperator::CreateLShr(X, CI);
+    }
+
+    // Try to fold constant sub into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+
+    // C - zext(bool) -> bool ? C - 1 : C
+    if (ZExtInst *ZI = dyn_cast<ZExtInst>(Op1))
+      if (ZI->getSrcTy()->isIntegerTy(1))
+        return SelectInst::Create(ZI->getOperand(0), SubOne(C), C);
+
+    // C-(X+C2) --> (C-C2)-X
+    ConstantInt *C2;
+    if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2))))
+      return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
+  }
+
+  
+  { Value *Y;
+    // X-(X+Y) == -Y    X-(Y+X) == -Y
+    if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) ||
+        match(Op1, m_Add(m_Value(Y), m_Specific(Op0))))
+      return BinaryOperator::CreateNeg(Y);
+    
+    // (X-Y)-X == -Y
+    if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y))))
+      return BinaryOperator::CreateNeg(Y);
+  }
+  
+  if (Op1->hasOneUse()) {
+    Value *X = 0, *Y = 0, *Z = 0;
+    Constant *C = 0;
+    ConstantInt *CI = 0;
+
+    // (X - (Y - Z))  -->  (X + (Z - Y)).
+    if (match(Op1, m_Sub(m_Value(Y), m_Value(Z))))
+      return BinaryOperator::CreateAdd(Op0,
+                                      Builder->CreateSub(Z, Y, Op1->getName()));
+
+    // (X - (X & Y))   -->   (X & ~Y)
+    //
+    if (match(Op1, m_And(m_Value(Y), m_Specific(Op0))) ||
+        match(Op1, m_And(m_Specific(Op0), m_Value(Y))))
+      return BinaryOperator::CreateAnd(Op0,
+                                  Builder->CreateNot(Y, Y->getName() + ".not"));
+    
+    // 0 - (X sdiv C)  -> (X sdiv -C)
+    if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) &&
+        match(Op0, m_Zero()))
+      return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C));
+
+    // 0 - (X << Y)  -> (-X << Y)   when X is freely negatable.
+    if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero()))
+      if (Value *XNeg = dyn_castNegVal(X))
+        return BinaryOperator::CreateShl(XNeg, Y);
+
+    // X - X*C --> X * (1-C)
+    if (match(Op1, m_Mul(m_Specific(Op0), m_ConstantInt(CI)))) {
+      Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI);
+      return BinaryOperator::CreateMul(Op0, CP1);
+    }
+
+    // X - X<<C --> X * (1-(1<<C))
+    if (match(Op1, m_Shl(m_Specific(Op0), m_ConstantInt(CI)))) {
+      Constant *One = ConstantInt::get(I.getType(), 1);
+      C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI));
+      return BinaryOperator::CreateMul(Op0, C);
+    }
+    
+    // X - A*-B -> X + A*B
+    // X - -A*B -> X + A*B
+    Value *A, *B;
+    if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) ||
+        match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B))))
+      return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B));
+      
+    // X - A*CI -> X + A*-CI
+    // X - CI*A -> X + A*-CI
+    if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) ||
+        match(Op1, m_Mul(m_ConstantInt(CI), m_Value(A)))) {
+      Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI));
+      return BinaryOperator::CreateAdd(Op0, NewMul);
+    }
+  }
+
+  ConstantInt *C1;
+  if (Value *X = dyn_castFoldableMul(Op0, C1)) {
+    if (X == Op1)  // X*C - X --> X * (C-1)
+      return BinaryOperator::CreateMul(Op1, SubOne(C1));
+
+    ConstantInt *C2;   // X*C1 - X*C2 -> X * (C1-C2)
+    if (X == dyn_castFoldableMul(Op1, C2))
+      return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
+  }
+  
+  // Optimize pointer differences into the same array into a size.  Consider:
+  //  &A[10] - &A[0]: we should compile this to "10".
+  if (TD) {
+    Value *LHSOp, *RHSOp;
+    if (match(Op0, m_PtrToInt(m_Value(LHSOp))) &&
+        match(Op1, m_PtrToInt(m_Value(RHSOp))))
+      if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+        return ReplaceInstUsesWith(I, Res);
+    
+    // trunc(p)-trunc(q) -> trunc(p-q)
+    if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) &&
+        match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp)))))
+      if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType()))
+        return ReplaceInstUsesWith(I, Res);
+  }
+  
+  return 0;
+}
+
+Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // If this is a 'B = x-(-A)', change to B = x+A...
+  if (Value *V = dyn_castFNegVal(Op1))
+    return BinaryOperator::CreateFAdd(Op0, V);
+
+  return 0;
+}
diff --git a/final/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/final/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
new file mode 100644
index 00000000000..c8545f5deec
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -0,0 +1,2240 @@
+//===- InstCombineAndOrXor.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitAnd, visitOr, and visitXor functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+
+/// AddOne - Add one to a ConstantInt.
+static Constant *AddOne(Constant *C) {
+  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a ConstantInt.
+static Constant *SubOne(ConstantInt *C) {
+  return ConstantInt::get(C->getContext(), C->getValue()-1);
+}
+
+/// isFreeToInvert - Return true if the specified value is free to invert (apply
+/// ~ to).  This happens in cases where the ~ can be eliminated.
+static inline bool isFreeToInvert(Value *V) {
+  // ~(~(X)) -> X.
+  if (BinaryOperator::isNot(V))
+    return true;
+  
+  // Constants can be considered to be not'ed values.
+  if (isa<ConstantInt>(V))
+    return true;
+  
+  // Compares can be inverted if they have a single use.
+  if (CmpInst *CI = dyn_cast<CmpInst>(V))
+    return CI->hasOneUse();
+  
+  return false;
+}
+
+static inline Value *dyn_castNotVal(Value *V) {
+  // If this is not(not(x)) don't return that this is a not: we want the two
+  // not's to be folded first.
+  if (BinaryOperator::isNot(V)) {
+    Value *Operand = BinaryOperator::getNotArgument(V);
+    if (!isFreeToInvert(Operand))
+      return Operand;
+  }
+  
+  // Constants can be considered to be not'ed values...
+  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+    return ConstantInt::get(C->getType(), ~C->getValue());
+  return 0;
+}
+
+
+/// getICmpCode - Encode a icmp predicate into a three bit mask.  These bits
+/// are carefully arranged to allow folding of expressions such as:
+///
+///      (A < B) | (A > B) --> (A != B)
+///
+/// Note that this is only valid if the first and second predicates have the
+/// same sign. Is illegal to do: (A u< B) | (A s> B) 
+///
+/// Three bits are used to represent the condition, as follows:
+///   0  A > B
+///   1  A == B
+///   2  A < B
+///
+/// <=>  Value  Definition
+/// 000     0   Always false
+/// 001     1   A >  B
+/// 010     2   A == B
+/// 011     3   A >= B
+/// 100     4   A <  B
+/// 101     5   A != B
+/// 110     6   A <= B
+/// 111     7   Always true
+///  
+static unsigned getICmpCode(const ICmpInst *ICI) {
+  switch (ICI->getPredicate()) {
+    // False -> 0
+  case ICmpInst::ICMP_UGT: return 1;  // 001
+  case ICmpInst::ICMP_SGT: return 1;  // 001
+  case ICmpInst::ICMP_EQ:  return 2;  // 010
+  case ICmpInst::ICMP_UGE: return 3;  // 011
+  case ICmpInst::ICMP_SGE: return 3;  // 011
+  case ICmpInst::ICMP_ULT: return 4;  // 100
+  case ICmpInst::ICMP_SLT: return 4;  // 100
+  case ICmpInst::ICMP_NE:  return 5;  // 101
+  case ICmpInst::ICMP_ULE: return 6;  // 110
+  case ICmpInst::ICMP_SLE: return 6;  // 110
+    // True -> 7
+  default:
+    llvm_unreachable("Invalid ICmp predicate!");
+    return 0;
+  }
+}
+
+/// getFCmpCode - Similar to getICmpCode but for FCmpInst. This encodes a fcmp
+/// predicate into a three bit mask. It also returns whether it is an ordered
+/// predicate by reference.
+static unsigned getFCmpCode(FCmpInst::Predicate CC, bool &isOrdered) {
+  isOrdered = false;
+  switch (CC) {
+  case FCmpInst::FCMP_ORD: isOrdered = true; return 0;  // 000
+  case FCmpInst::FCMP_UNO:                   return 0;  // 000
+  case FCmpInst::FCMP_OGT: isOrdered = true; return 1;  // 001
+  case FCmpInst::FCMP_UGT:                   return 1;  // 001
+  case FCmpInst::FCMP_OEQ: isOrdered = true; return 2;  // 010
+  case FCmpInst::FCMP_UEQ:                   return 2;  // 010
+  case FCmpInst::FCMP_OGE: isOrdered = true; return 3;  // 011
+  case FCmpInst::FCMP_UGE:                   return 3;  // 011
+  case FCmpInst::FCMP_OLT: isOrdered = true; return 4;  // 100
+  case FCmpInst::FCMP_ULT:                   return 4;  // 100
+  case FCmpInst::FCMP_ONE: isOrdered = true; return 5;  // 101
+  case FCmpInst::FCMP_UNE:                   return 5;  // 101
+  case FCmpInst::FCMP_OLE: isOrdered = true; return 6;  // 110
+  case FCmpInst::FCMP_ULE:                   return 6;  // 110
+    // True -> 7
+  default:
+    // Not expecting FCMP_FALSE and FCMP_TRUE;
+    llvm_unreachable("Unexpected FCmp predicate!");
+    return 0;
+  }
+}
+
+/// getICmpValue - This is the complement of getICmpCode, which turns an
+/// opcode and two operands into either a constant true or false, or a brand 
+/// new ICmp instruction. The sign is passed in to determine which kind
+/// of predicate to use in the new icmp instruction.
+static Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
+                           InstCombiner::BuilderTy *Builder) {
+  CmpInst::Predicate Pred;
+  switch (Code) {
+  default: assert(0 && "Illegal ICmp code!");
+  case 0: // False.
+    return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+  case 1: Pred = Sign ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
+  case 2: Pred = ICmpInst::ICMP_EQ; break;
+  case 3: Pred = Sign ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
+  case 4: Pred = Sign ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
+  case 5: Pred = ICmpInst::ICMP_NE; break;
+  case 6: Pred = Sign ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
+  case 7: // True.
+    return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+  }
+  return Builder->CreateICmp(Pred, LHS, RHS);
+}
+
+/// getFCmpValue - This is the complement of getFCmpCode, which turns an
+/// opcode and two operands into either a FCmp instruction. isordered is passed
+/// in to determine which kind of predicate to use in the new fcmp instruction.
+static Value *getFCmpValue(bool isordered, unsigned code,
+                           Value *LHS, Value *RHS,
+                           InstCombiner::BuilderTy *Builder) {
+  CmpInst::Predicate Pred;
+  switch (code) {
+  default: assert(0 && "Illegal FCmp code!");
+  case 0: Pred = isordered ? FCmpInst::FCMP_ORD : FCmpInst::FCMP_UNO; break;
+  case 1: Pred = isordered ? FCmpInst::FCMP_OGT : FCmpInst::FCMP_UGT; break;
+  case 2: Pred = isordered ? FCmpInst::FCMP_OEQ : FCmpInst::FCMP_UEQ; break;
+  case 3: Pred = isordered ? FCmpInst::FCMP_OGE : FCmpInst::FCMP_UGE; break;
+  case 4: Pred = isordered ? FCmpInst::FCMP_OLT : FCmpInst::FCMP_ULT; break;
+  case 5: Pred = isordered ? FCmpInst::FCMP_ONE : FCmpInst::FCMP_UNE; break;
+  case 6: Pred = isordered ? FCmpInst::FCMP_OLE : FCmpInst::FCMP_ULE; break;
+  case 7: 
+    if (!isordered) return ConstantInt::getTrue(LHS->getContext());
+    Pred = FCmpInst::FCMP_ORD; break;
+  }
+  return Builder->CreateFCmp(Pred, LHS, RHS);
+}
+
+/// PredicatesFoldable - Return true if both predicates match sign or if at
+/// least one of them is an equality comparison (which is signless).
+static bool PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
+  return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
+         (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
+         (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
+}
+
+// OptAndOp - This handles expressions of the form ((val OP C1) & C2).  Where
+// the Op parameter is 'OP', OpRHS is 'C1', and AndRHS is 'C2'.  Op is
+// guaranteed to be a binary operator.
+Instruction *InstCombiner::OptAndOp(Instruction *Op,
+                                    ConstantInt *OpRHS,
+                                    ConstantInt *AndRHS,
+                                    BinaryOperator &TheAnd) {
+  Value *X = Op->getOperand(0);
+  Constant *Together = 0;
+  if (!Op->isShift())
+    Together = ConstantExpr::getAnd(AndRHS, OpRHS);
+
+  switch (Op->getOpcode()) {
+  case Instruction::Xor:
+    if (Op->hasOneUse()) {
+      // (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
+      Value *And = Builder->CreateAnd(X, AndRHS);
+      And->takeName(Op);
+      return BinaryOperator::CreateXor(And, Together);
+    }
+    break;
+  case Instruction::Or:
+    if (Op->hasOneUse()){
+      if (Together != OpRHS) {
+        // (X | C1) & C2 --> (X | (C1&C2)) & C2
+        Value *Or = Builder->CreateOr(X, Together);
+        Or->takeName(Op);
+        return BinaryOperator::CreateAnd(Or, AndRHS);
+      }
+      
+      ConstantInt *TogetherCI = dyn_cast<ConstantInt>(Together);
+      if (TogetherCI && !TogetherCI->isZero()){
+        // (X | C1) & C2 --> (X & (C2^(C1&C2))) | C1
+        // NOTE: This reduces the number of bits set in the & mask, which
+        // can expose opportunities for store narrowing.
+        Together = ConstantExpr::getXor(AndRHS, Together);
+        Value *And = Builder->CreateAnd(X, Together);
+        And->takeName(Op);
+        return BinaryOperator::CreateOr(And, OpRHS);
+      }
+    }
+    
+    break;
+  case Instruction::Add:
+    if (Op->hasOneUse()) {
+      // Adding a one to a single bit bit-field should be turned into an XOR
+      // of the bit.  First thing to check is to see if this AND is with a
+      // single bit constant.
+      const APInt &AndRHSV = cast<ConstantInt>(AndRHS)->getValue();
+
+      // If there is only one bit set.
+      if (AndRHSV.isPowerOf2()) {
+        // Ok, at this point, we know that we are masking the result of the
+        // ADD down to exactly one bit.  If the constant we are adding has
+        // no bits set below this bit, then we can eliminate the ADD.
+        const APInt& AddRHS = cast<ConstantInt>(OpRHS)->getValue();
+
+        // Check to see if any bits below the one bit set in AndRHSV are set.
+        if ((AddRHS & (AndRHSV-1)) == 0) {
+          // If not, the only thing that can effect the output of the AND is
+          // the bit specified by AndRHSV.  If that bit is set, the effect of
+          // the XOR is to toggle the bit.  If it is clear, then the ADD has
+          // no effect.
+          if ((AddRHS & AndRHSV) == 0) { // Bit is not set, noop
+            TheAnd.setOperand(0, X);
+            return &TheAnd;
+          } else {
+            // Pull the XOR out of the AND.
+            Value *NewAnd = Builder->CreateAnd(X, AndRHS);
+            NewAnd->takeName(Op);
+            return BinaryOperator::CreateXor(NewAnd, AndRHS);
+          }
+        }
+      }
+    }
+    break;
+
+  case Instruction::Shl: {
+    // We know that the AND will not produce any of the bits shifted in, so if
+    // the anded constant includes them, clear them now!
+    //
+    uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+    uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+    APInt ShlMask(APInt::getHighBitsSet(BitWidth, BitWidth-OpRHSVal));
+    ConstantInt *CI = ConstantInt::get(AndRHS->getContext(),
+                                       AndRHS->getValue() & ShlMask);
+
+    if (CI->getValue() == ShlMask)
+      // Masking out bits that the shift already masks.
+      return ReplaceInstUsesWith(TheAnd, Op);   // No need for the and.
+    
+    if (CI != AndRHS) {                  // Reducing bits set in and.
+      TheAnd.setOperand(1, CI);
+      return &TheAnd;
+    }
+    break;
+  }
+  case Instruction::LShr: {
+    // We know that the AND will not produce any of the bits shifted in, so if
+    // the anded constant includes them, clear them now!  This only applies to
+    // unsigned shifts, because a signed shr may bring in set bits!
+    //
+    uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+    uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+    APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
+    ConstantInt *CI = ConstantInt::get(Op->getContext(),
+                                       AndRHS->getValue() & ShrMask);
+
+    if (CI->getValue() == ShrMask)
+      // Masking out bits that the shift already masks.
+      return ReplaceInstUsesWith(TheAnd, Op);
+    
+    if (CI != AndRHS) {
+      TheAnd.setOperand(1, CI);  // Reduce bits set in and cst.
+      return &TheAnd;
+    }
+    break;
+  }
+  case Instruction::AShr:
+    // Signed shr.
+    // See if this is shifting in some sign extension, then masking it out
+    // with an and.
+    if (Op->hasOneUse()) {
+      uint32_t BitWidth = AndRHS->getType()->getBitWidth();
+      uint32_t OpRHSVal = OpRHS->getLimitedValue(BitWidth);
+      APInt ShrMask(APInt::getLowBitsSet(BitWidth, BitWidth - OpRHSVal));
+      Constant *C = ConstantInt::get(Op->getContext(),
+                                     AndRHS->getValue() & ShrMask);
+      if (C == AndRHS) {          // Masking out bits shifted in.
+        // (Val ashr C1) & C2 -> (Val lshr C1) & C2
+        // Make the argument unsigned.
+        Value *ShVal = Op->getOperand(0);
+        ShVal = Builder->CreateLShr(ShVal, OpRHS, Op->getName());
+        return BinaryOperator::CreateAnd(ShVal, AndRHS, TheAnd.getName());
+      }
+    }
+    break;
+  }
+  return 0;
+}
+
+
+/// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is
+/// true, otherwise (V < Lo || V >= Hi).  In pratice, we emit the more efficient
+/// (V-Lo) <u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
+/// whether to treat the V, Lo and HI as signed or not. IB is the location to
+/// insert new instructions.
+Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+                                     bool isSigned, bool Inside) {
+  assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ? 
+            ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
+         "Lo is not <= Hi in range emission code!");
+    
+  if (Inside) {
+    if (Lo == Hi)  // Trivially false.
+      return ConstantInt::getFalse(V->getContext());
+
+    // V >= Min && V < Hi --> V < Hi
+    if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
+      ICmpInst::Predicate pred = (isSigned ? 
+        ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
+      return Builder->CreateICmp(pred, V, Hi);
+    }
+
+    // Emit V-Lo <u Hi-Lo
+    Constant *NegLo = ConstantExpr::getNeg(Lo);
+    Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+    Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
+    return Builder->CreateICmpULT(Add, UpperBound);
+  }
+
+  if (Lo == Hi)  // Trivially true.
+    return ConstantInt::getTrue(V->getContext());
+
+  // V < Min || V >= Hi -> V > Hi-1
+  Hi = SubOne(cast<ConstantInt>(Hi));
+  if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
+    ICmpInst::Predicate pred = (isSigned ? 
+        ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
+    return Builder->CreateICmp(pred, V, Hi);
+  }
+
+  // Emit V-Lo >u Hi-1-Lo
+  // Note that Hi has already had one subtracted from it, above.
+  ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
+  Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
+  Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
+  return Builder->CreateICmpUGT(Add, LowerBound);
+}
+
+// isRunOfOnes - Returns true iff Val consists of one contiguous run of 1s with
+// any number of 0s on either side.  The 1s are allowed to wrap from LSB to
+// MSB, so 0x000FFF0, 0x0000FFFF, and 0xFF0000FF are all runs.  0x0F0F0000 is
+// not, since all 1s are not contiguous.
+static bool isRunOfOnes(ConstantInt *Val, uint32_t &MB, uint32_t &ME) {
+  const APInt& V = Val->getValue();
+  uint32_t BitWidth = Val->getType()->getBitWidth();
+  if (!APIntOps::isShiftedMask(BitWidth, V)) return false;
+
+  // look for the first zero bit after the run of ones
+  MB = BitWidth - ((V - 1) ^ V).countLeadingZeros();
+  // look for the first non-zero bit
+  ME = V.getActiveBits(); 
+  return true;
+}
+
+/// FoldLogicalPlusAnd - This is part of an expression (LHS +/- RHS) & Mask,
+/// where isSub determines whether the operator is a sub.  If we can fold one of
+/// the following xforms:
+/// 
+/// ((A & N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == Mask
+/// ((A | N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+/// ((A ^ N) +/- B) & Mask -> (A +/- B) & Mask iff N&Mask == 0
+///
+/// return (A +/- B).
+///
+Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
+                                        ConstantInt *Mask, bool isSub,
+                                        Instruction &I) {
+  Instruction *LHSI = dyn_cast<Instruction>(LHS);
+  if (!LHSI || LHSI->getNumOperands() != 2 ||
+      !isa<ConstantInt>(LHSI->getOperand(1))) return 0;
+
+  ConstantInt *N = cast<ConstantInt>(LHSI->getOperand(1));
+
+  switch (LHSI->getOpcode()) {
+  default: return 0;
+  case Instruction::And:
+    if (ConstantExpr::getAnd(N, Mask) == Mask) {
+      // If the AndRHS is a power of two minus one (0+1+), this is simple.
+      if ((Mask->getValue().countLeadingZeros() + 
+           Mask->getValue().countPopulation()) == 
+          Mask->getValue().getBitWidth())
+        break;
+
+      // Otherwise, if Mask is 0+1+0+, and if B is known to have the low 0+
+      // part, we don't need any explicit masks to take them out of A.  If that
+      // is all N is, ignore it.
+      uint32_t MB = 0, ME = 0;
+      if (isRunOfOnes(Mask, MB, ME)) {  // begin/end bit of run, inclusive
+        uint32_t BitWidth = cast<IntegerType>(RHS->getType())->getBitWidth();
+        APInt Mask(APInt::getLowBitsSet(BitWidth, MB-1));
+        if (MaskedValueIsZero(RHS, Mask))
+          break;
+      }
+    }
+    return 0;
+  case Instruction::Or:
+  case Instruction::Xor:
+    // If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
+    if ((Mask->getValue().countLeadingZeros() + 
+         Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
+        && ConstantExpr::getAnd(N, Mask)->isNullValue())
+      break;
+    return 0;
+  }
+  
+  if (isSub)
+    return Builder->CreateSub(LHSI->getOperand(0), RHS, "fold");
+  return Builder->CreateAdd(LHSI->getOperand(0), RHS, "fold");
+}
+
+/// enum for classifying (icmp eq (A & B), C) and (icmp ne (A & B), C)
+/// One of A and B is considered the mask, the other the value. This is 
+/// described as the "AMask" or "BMask" part of the enum. If the enum 
+/// contains only "Mask", then both A and B can be considered masks.
+/// If A is the mask, then it was proven, that (A & C) == C. This
+/// is trivial if C == A, or C == 0. If both A and C are constants, this
+/// proof is also easy.
+/// For the following explanations we assume that A is the mask.
+/// The part "AllOnes" declares, that the comparison is true only 
+/// if (A & B) == A, or all bits of A are set in B.
+///   Example: (icmp eq (A & 3), 3) -> FoldMskICmp_AMask_AllOnes
+/// The part "AllZeroes" declares, that the comparison is true only 
+/// if (A & B) == 0, or all bits of A are cleared in B.
+///   Example: (icmp eq (A & 3), 0) -> FoldMskICmp_Mask_AllZeroes
+/// The part "Mixed" declares, that (A & B) == C and C might or might not 
+/// contain any number of one bits and zero bits.
+///   Example: (icmp eq (A & 3), 1) -> FoldMskICmp_AMask_Mixed
+/// The Part "Not" means, that in above descriptions "==" should be replaced
+/// by "!=".
+///   Example: (icmp ne (A & 3), 3) -> FoldMskICmp_AMask_NotAllOnes
+/// If the mask A contains a single bit, then the following is equivalent:
+///    (icmp eq (A & B), A) equals (icmp ne (A & B), 0)
+///    (icmp ne (A & B), A) equals (icmp eq (A & B), 0)
+enum MaskedICmpType {
+  FoldMskICmp_AMask_AllOnes           =     1,
+  FoldMskICmp_AMask_NotAllOnes        =     2,
+  FoldMskICmp_BMask_AllOnes           =     4,
+  FoldMskICmp_BMask_NotAllOnes        =     8,
+  FoldMskICmp_Mask_AllZeroes          =    16,
+  FoldMskICmp_Mask_NotAllZeroes       =    32,
+  FoldMskICmp_AMask_Mixed             =    64,
+  FoldMskICmp_AMask_NotMixed          =   128,
+  FoldMskICmp_BMask_Mixed             =   256,
+  FoldMskICmp_BMask_NotMixed          =   512
+};
+
+/// return the set of pattern classes (from MaskedICmpType)
+/// that (icmp SCC (A & B), C) satisfies
+static unsigned getTypeOfMaskedICmp(Value* A, Value* B, Value* C, 
+                                    ICmpInst::Predicate SCC)
+{
+  ConstantInt *ACst = dyn_cast<ConstantInt>(A);
+  ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+  ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+  bool icmp_eq = (SCC == ICmpInst::ICMP_EQ);
+  bool icmp_abit = (ACst != 0 && !ACst->isZero() && 
+                    ACst->getValue().isPowerOf2());
+  bool icmp_bbit = (BCst != 0 && !BCst->isZero() && 
+                    BCst->getValue().isPowerOf2());
+  unsigned result = 0;
+  if (CCst != 0 && CCst->isZero()) {
+    // if C is zero, then both A and B qualify as mask
+    result |= (icmp_eq ? (FoldMskICmp_Mask_AllZeroes |
+                          FoldMskICmp_Mask_AllZeroes |
+                          FoldMskICmp_AMask_Mixed |
+                          FoldMskICmp_BMask_Mixed)
+                       : (FoldMskICmp_Mask_NotAllZeroes |
+                          FoldMskICmp_Mask_NotAllZeroes |
+                          FoldMskICmp_AMask_NotMixed |
+                          FoldMskICmp_BMask_NotMixed));
+    if (icmp_abit)
+      result |= (icmp_eq ? (FoldMskICmp_AMask_NotAllOnes |
+                            FoldMskICmp_AMask_NotMixed) 
+                         : (FoldMskICmp_AMask_AllOnes |
+                            FoldMskICmp_AMask_Mixed));
+    if (icmp_bbit)
+      result |= (icmp_eq ? (FoldMskICmp_BMask_NotAllOnes |
+                            FoldMskICmp_BMask_NotMixed) 
+                         : (FoldMskICmp_BMask_AllOnes |
+                            FoldMskICmp_BMask_Mixed));
+    return result;
+  }
+  if (A == C) {
+    result |= (icmp_eq ? (FoldMskICmp_AMask_AllOnes |
+                          FoldMskICmp_AMask_Mixed)
+                       : (FoldMskICmp_AMask_NotAllOnes |
+                          FoldMskICmp_AMask_NotMixed));
+    if (icmp_abit)
+      result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+                            FoldMskICmp_AMask_NotMixed)
+                         : (FoldMskICmp_Mask_AllZeroes |
+                            FoldMskICmp_AMask_Mixed));
+  }
+  else if (ACst != 0 && CCst != 0 &&
+        ConstantExpr::getAnd(ACst, CCst) == CCst) {
+    result |= (icmp_eq ? FoldMskICmp_AMask_Mixed
+                       : FoldMskICmp_AMask_NotMixed);
+  }
+  if (B == C) 
+  {
+    result |= (icmp_eq ? (FoldMskICmp_BMask_AllOnes |
+                          FoldMskICmp_BMask_Mixed)
+                       : (FoldMskICmp_BMask_NotAllOnes |
+                          FoldMskICmp_BMask_NotMixed));
+    if (icmp_bbit)
+      result |= (icmp_eq ? (FoldMskICmp_Mask_NotAllZeroes |
+                            FoldMskICmp_BMask_NotMixed) 
+                         : (FoldMskICmp_Mask_AllZeroes |
+                            FoldMskICmp_BMask_Mixed));
+  }
+  else if (BCst != 0 && CCst != 0 &&
+        ConstantExpr::getAnd(BCst, CCst) == CCst) {
+    result |= (icmp_eq ? FoldMskICmp_BMask_Mixed
+                       : FoldMskICmp_BMask_NotMixed);
+  }
+  return result;
+}
+
+/// foldLogOpOfMaskedICmpsHelper:
+/// handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// return the set of pattern classes (from MaskedICmpType)
+/// that both LHS and RHS satisfy
+static unsigned foldLogOpOfMaskedICmpsHelper(Value*& A, 
+                                             Value*& B, Value*& C,
+                                             Value*& D, Value*& E,
+                                             ICmpInst *LHS, ICmpInst *RHS) {
+  ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+  if (LHSCC != ICmpInst::ICMP_EQ && LHSCC != ICmpInst::ICMP_NE) return 0;
+  if (RHSCC != ICmpInst::ICMP_EQ && RHSCC != ICmpInst::ICMP_NE) return 0;
+  if (LHS->getOperand(0)->getType() != RHS->getOperand(0)->getType()) return 0;
+  // vectors are not (yet?) supported
+  if (LHS->getOperand(0)->getType()->isVectorTy()) return 0;
+
+  // Here comes the tricky part:
+  // LHS might be of the form L11 & L12 == X, X == L21 & L22, 
+  // and L11 & L12 == L21 & L22. The same goes for RHS.
+  // Now we must find those components L** and R**, that are equal, so
+  // that we can extract the parameters A, B, C, D, and E for the canonical 
+  // above.
+  Value *L1 = LHS->getOperand(0);
+  Value *L2 = LHS->getOperand(1);
+  Value *L11,*L12,*L21,*L22;
+  if (match(L1, m_And(m_Value(L11), m_Value(L12)))) {
+    if (!match(L2, m_And(m_Value(L21), m_Value(L22))))
+      L21 = L22 = 0;
+  }
+  else {
+    if (!match(L2, m_And(m_Value(L11), m_Value(L12))))
+      return 0;
+    std::swap(L1, L2);
+    L21 = L22 = 0;
+  }
+
+  Value *R1 = RHS->getOperand(0);
+  Value *R2 = RHS->getOperand(1);
+  Value *R11,*R12;
+  bool ok = false;
+  if (match(R1, m_And(m_Value(R11), m_Value(R12)))) {
+    if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
+      A = R11; D = R12; E = R2; ok = true;
+    }
+    else 
+    if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+      A = R12; D = R11; E = R2; ok = true;
+    }
+  }
+  if (!ok && match(R2, m_And(m_Value(R11), m_Value(R12)))) {
+    if (R11 != 0 && (R11 == L11 || R11 == L12 || R11 == L21 || R11 == L22)) {
+       A = R11; D = R12; E = R1; ok = true;
+    }
+    else 
+    if (R12 != 0 && (R12 == L11 || R12 == L12 || R12 == L21 || R12 == L22)) {
+      A = R12; D = R11; E = R1; ok = true;
+    }
+    else
+      return 0;
+  }
+  if (!ok)
+    return 0;
+
+  if (L11 == A) {
+    B = L12; C = L2;
+  }
+  else if (L12 == A) {
+    B = L11; C = L2;
+  }
+  else if (L21 == A) {
+    B = L22; C = L1;
+  }
+  else if (L22 == A) {
+    B = L21; C = L1;
+  }
+
+  unsigned left_type = getTypeOfMaskedICmp(A, B, C, LHSCC);
+  unsigned right_type = getTypeOfMaskedICmp(A, D, E, RHSCC);
+  return left_type & right_type;
+}
+/// foldLogOpOfMaskedICmps:
+/// try to fold (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
+/// into a single (icmp(A & X) ==/!= Y)
+static Value* foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS,
+                                     ICmpInst::Predicate NEWCC,
+                                     llvm::InstCombiner::BuilderTy* Builder) {
+  Value *A = 0, *B = 0, *C = 0, *D = 0, *E = 0;
+  unsigned mask = foldLogOpOfMaskedICmpsHelper(A, B, C, D, E, LHS, RHS);
+  if (mask == 0) return 0;
+
+  if (NEWCC == ICmpInst::ICMP_NE)
+    mask >>= 1; // treat "Not"-states as normal states
+
+  if (mask & FoldMskICmp_Mask_AllZeroes) {
+    // (icmp eq (A & B), 0) & (icmp eq (A & D), 0) 
+    // -> (icmp eq (A & (B|D)), 0)
+    Value* newOr = Builder->CreateOr(B, D);
+    Value* newAnd = Builder->CreateAnd(A, newOr);
+    // we can't use C as zero, because we might actually handle
+    //   (icmp ne (A & B), B) & (icmp ne (A & D), D) 
+    // with B and D, having a single bit set
+    Value* zero = Constant::getNullValue(A->getType());
+    return Builder->CreateICmp(NEWCC, newAnd, zero);
+  }
+  else if (mask & FoldMskICmp_BMask_AllOnes) {
+    // (icmp eq (A & B), B) & (icmp eq (A & D), D) 
+    // -> (icmp eq (A & (B|D)), (B|D))
+    Value* newOr = Builder->CreateOr(B, D);
+    Value* newAnd = Builder->CreateAnd(A, newOr);
+    return Builder->CreateICmp(NEWCC, newAnd, newOr);
+  }     
+  else if (mask & FoldMskICmp_AMask_AllOnes) {
+    // (icmp eq (A & B), A) & (icmp eq (A & D), A) 
+    // -> (icmp eq (A & (B&D)), A)
+    Value* newAnd1 = Builder->CreateAnd(B, D);
+    Value* newAnd = Builder->CreateAnd(A, newAnd1);
+    return Builder->CreateICmp(NEWCC, newAnd, A);
+  }
+  else if (mask & FoldMskICmp_BMask_Mixed) {
+    // (icmp eq (A & B), C) & (icmp eq (A & D), E) 
+    // We already know that B & C == C && D & E == E.
+    // If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
+    // C and E, which are shared by both the mask B and the mask D, don't
+    // contradict, then we can transform to
+    // -> (icmp eq (A & (B|D)), (C|E))
+    // Currently, we only handle the case of B, C, D, and E being constant.
+    ConstantInt *BCst = dyn_cast<ConstantInt>(B);
+    if (BCst == 0) return 0;
+    ConstantInt *DCst = dyn_cast<ConstantInt>(D);
+    if (DCst == 0) return 0;
+    // we can't simply use C and E, because we might actually handle
+    //   (icmp ne (A & B), B) & (icmp eq (A & D), D) 
+    // with B and D, having a single bit set
+
+    ConstantInt *CCst = dyn_cast<ConstantInt>(C);
+    if (CCst == 0) return 0;
+    if (LHS->getPredicate() != NEWCC)
+      CCst = dyn_cast<ConstantInt>( ConstantExpr::getXor(BCst, CCst) );
+    ConstantInt *ECst = dyn_cast<ConstantInt>(E);
+    if (ECst == 0) return 0;
+    if (RHS->getPredicate() != NEWCC)
+      ECst = dyn_cast<ConstantInt>( ConstantExpr::getXor(DCst, ECst) );
+    ConstantInt* MCst = dyn_cast<ConstantInt>(
+      ConstantExpr::getAnd(ConstantExpr::getAnd(BCst, DCst),
+                           ConstantExpr::getXor(CCst, ECst)) );
+    // if there is a conflict we should actually return a false for the
+    // whole construct
+    if (!MCst->isZero())
+      return 0;
+    Value *newOr1 = Builder->CreateOr(B, D);
+    Value *newOr2 = ConstantExpr::getOr(CCst, ECst);
+    Value *newAnd = Builder->CreateAnd(A, newOr1);
+    return Builder->CreateICmp(NEWCC, newAnd, newOr2);
+  }
+  return 0;
+}
+
+/// FoldAndOfICmps - Fold (icmp)&(icmp) if possible.
+Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+  ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+
+  // (icmp1 A, B) & (icmp2 A, B) --> (icmp3 A, B)
+  if (PredicatesFoldable(LHSCC, RHSCC)) {
+    if (LHS->getOperand(0) == RHS->getOperand(1) &&
+        LHS->getOperand(1) == RHS->getOperand(0))
+      LHS->swapOperands();
+    if (LHS->getOperand(0) == RHS->getOperand(0) &&
+        LHS->getOperand(1) == RHS->getOperand(1)) {
+      Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+      unsigned Code = getICmpCode(LHS) & getICmpCode(RHS);
+      bool isSigned = LHS->isSigned() || RHS->isSigned();
+      return getICmpValue(isSigned, Code, Op0, Op1, Builder);
+    }
+  }
+
+  // handle (roughly):  (icmp eq (A & B), C) & (icmp eq (A & D), E)
+  if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_EQ, Builder))
+    return V;
+  
+  // This only handles icmp of constants: (icmp1 A, C1) & (icmp2 B, C2).
+  Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
+  ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+  ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+  if (LHSCst == 0 || RHSCst == 0) return 0;
+  
+  if (LHSCst == RHSCst && LHSCC == RHSCC) {
+    // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C)
+    // where C is a power of 2
+    if (LHSCC == ICmpInst::ICMP_ULT &&
+        LHSCst->getValue().isPowerOf2()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+    }
+    
+    // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
+    if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+    }
+  }
+  
+  // From here on, we only handle:
+  //    (icmp1 A, C1) & (icmp2 A, C2) --> something simpler.
+  if (Val != Val2) return 0;
+  
+  // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
+  if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
+      RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
+      LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
+      RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+    return 0;
+
+  // Make a constant range that's the intersection of the two icmp ranges.
+  // If the intersection is empty, we know that the result is false.
+  ConstantRange LHSRange = 
+    ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue());
+  ConstantRange RHSRange = 
+    ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue());
+
+  if (LHSRange.intersectWith(RHSRange).isEmptySet())
+    return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+
+  // We can't fold (ugt x, C) & (sgt x, C2).
+  if (!PredicatesFoldable(LHSCC, RHSCC))
+    return 0;
+    
+  // Ensure that the larger constant is on the RHS.
+  bool ShouldSwap;
+  if (CmpInst::isSigned(LHSCC) ||
+      (ICmpInst::isEquality(LHSCC) && 
+       CmpInst::isSigned(RHSCC)))
+    ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+  else
+    ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+    
+  if (ShouldSwap) {
+    std::swap(LHS, RHS);
+    std::swap(LHSCst, RHSCst);
+    std::swap(LHSCC, RHSCC);
+  }
+
+  // At this point, we know we have two icmp instructions
+  // comparing a value against two constants and and'ing the result
+  // together.  Because of the above check, we know that we only have
+  // icmp eq, icmp ne, icmp [su]lt, and icmp [SU]gt here. We also know 
+  // (from the icmp folding check above), that the two constants 
+  // are not equal and that the larger constant is on the RHS
+  assert(LHSCst != RHSCst && "Compares not folded above?");
+
+  switch (LHSCC) {
+  default: llvm_unreachable("Unknown integer condition code!");
+  case ICmpInst::ICMP_EQ:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_NE:         // (X == 13 & X != 15) -> X == 13
+    case ICmpInst::ICMP_ULT:        // (X == 13 & X <  15) -> X == 13
+    case ICmpInst::ICMP_SLT:        // (X == 13 & X <  15) -> X == 13
+      return LHS;
+    }
+  case ICmpInst::ICMP_NE:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_ULT:
+      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
+        return Builder->CreateICmpULT(Val, LHSCst);
+      break;                        // (X != 13 & X u< 15) -> no change
+    case ICmpInst::ICMP_SLT:
+      if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
+        return Builder->CreateICmpSLT(Val, LHSCst);
+      break;                        // (X != 13 & X s< 15) -> no change
+    case ICmpInst::ICMP_EQ:         // (X != 13 & X == 15) -> X == 15
+    case ICmpInst::ICMP_UGT:        // (X != 13 & X u> 15) -> X u> 15
+    case ICmpInst::ICMP_SGT:        // (X != 13 & X s> 15) -> X s> 15
+      return RHS;
+    case ICmpInst::ICMP_NE:
+      if (LHSCst == SubOne(RHSCst)){// (X != 13 & X != 14) -> X-13 >u 1
+        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+        return Builder->CreateICmpUGT(Add, ConstantInt::get(Add->getType(), 1));
+      }
+      break;                        // (X != 13 & X != 15) -> no change
+    }
+    break;
+  case ICmpInst::ICMP_ULT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X u< 13 & X == 15) -> false
+    case ICmpInst::ICMP_UGT:        // (X u< 13 & X u> 15) -> false
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+    case ICmpInst::ICMP_SGT:        // (X u< 13 & X s> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X u< 13 & X != 15) -> X u< 13
+    case ICmpInst::ICMP_ULT:        // (X u< 13 & X u< 15) -> X u< 13
+      return LHS;
+    case ICmpInst::ICMP_SLT:        // (X u< 13 & X s< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_SLT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_UGT:        // (X s< 13 & X u> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X s< 13 & X != 15) -> X < 13
+    case ICmpInst::ICMP_SLT:        // (X s< 13 & X s< 15) -> X < 13
+      return LHS;
+    case ICmpInst::ICMP_ULT:        // (X s< 13 & X u< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_UGT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X u> 13 & X == 15) -> X == 15
+    case ICmpInst::ICMP_UGT:        // (X u> 13 & X u> 15) -> X u> 15
+      return RHS;
+    case ICmpInst::ICMP_SGT:        // (X u> 13 & X s> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:
+      if (RHSCst == AddOne(LHSCst)) // (X u> 13 & X != 14) -> X u> 14
+        return Builder->CreateICmp(LHSCC, Val, RHSCst);
+      break;                        // (X u> 13 & X != 15) -> no change
+    case ICmpInst::ICMP_ULT:        // (X u> 13 & X u< 15) -> (X-14) <u 1
+      return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true);
+    case ICmpInst::ICMP_SLT:        // (X u> 13 & X s< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_SGT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X s> 13 & X == 15) -> X == 15
+    case ICmpInst::ICMP_SGT:        // (X s> 13 & X s> 15) -> X s> 15
+      return RHS;
+    case ICmpInst::ICMP_UGT:        // (X s> 13 & X u> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:
+      if (RHSCst == AddOne(LHSCst)) // (X s> 13 & X != 14) -> X s> 14
+        return Builder->CreateICmp(LHSCC, Val, RHSCst);
+      break;                        // (X s> 13 & X != 15) -> no change
+    case ICmpInst::ICMP_SLT:        // (X s> 13 & X s< 15) -> (X-14) s< 1
+      return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true);
+    case ICmpInst::ICMP_ULT:        // (X s> 13 & X u< 15) -> no change
+      break;
+    }
+    break;
+  }
+ 
+  return 0;
+}
+
+/// FoldAndOfFCmps - Optimize (fcmp)&(fcmp).  NOTE: Unlike the rest of
+/// instcombine, this returns a Value which should already be inserted into the
+/// function.
+Value *InstCombiner::FoldAndOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
+  if (LHS->getPredicate() == FCmpInst::FCMP_ORD &&
+      RHS->getPredicate() == FCmpInst::FCMP_ORD) {
+    // (fcmp ord x, c) & (fcmp ord y, c)  -> (fcmp ord x, y)
+    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+        // If either of the constants are nans, then the whole thing returns
+        // false.
+        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+          return ConstantInt::getFalse(LHS->getContext());
+        return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
+      }
+    
+    // Handle vector zeros.  This occurs because the canonical form of
+    // "fcmp ord x,x" is "fcmp ord x, 0".
+    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+        isa<ConstantAggregateZero>(RHS->getOperand(1)))
+      return Builder->CreateFCmpORD(LHS->getOperand(0), RHS->getOperand(0));
+    return 0;
+  }
+  
+  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+  
+  
+  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+    // Swap RHS operands to match LHS.
+    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+    std::swap(Op1LHS, Op1RHS);
+  }
+  
+  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+    // Simplify (fcmp cc0 x, y) & (fcmp cc1 x, y).
+    if (Op0CC == Op1CC)
+      return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
+    if (Op0CC == FCmpInst::FCMP_FALSE || Op1CC == FCmpInst::FCMP_FALSE)
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+    if (Op0CC == FCmpInst::FCMP_TRUE)
+      return RHS;
+    if (Op1CC == FCmpInst::FCMP_TRUE)
+      return LHS;
+    
+    bool Op0Ordered;
+    bool Op1Ordered;
+    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+    if (Op1Pred == 0) {
+      std::swap(LHS, RHS);
+      std::swap(Op0Pred, Op1Pred);
+      std::swap(Op0Ordered, Op1Ordered);
+    }
+    if (Op0Pred == 0) {
+      // uno && ueq -> uno && (uno || eq) -> ueq
+      // ord && olt -> ord && (ord && lt) -> olt
+      if (Op0Ordered == Op1Ordered)
+        return RHS;
+      
+      // uno && oeq -> uno && (ord && eq) -> false
+      // uno && ord -> false
+      if (!Op0Ordered)
+        return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0);
+      // ord && ueq -> ord && (uno || eq) -> oeq
+      return getFCmpValue(true, Op1Pred, Op0LHS, Op0RHS, Builder);
+    }
+  }
+
+  return 0;
+}
+
+
+Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
+  bool Changed = SimplifyAssociativeOrCommutative(I);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyAndInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // (A|B)&(A|C) -> A|(B&C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
+
+  // See if we can simplify any instructions used by the instruction whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;  
+
+  if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
+    const APInt &AndRHSMask = AndRHS->getValue();
+
+    // Optimize a variety of ((val OP C1) & C2) combinations...
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+      Value *Op0LHS = Op0I->getOperand(0);
+      Value *Op0RHS = Op0I->getOperand(1);
+      switch (Op0I->getOpcode()) {
+      default: break;
+      case Instruction::Xor:
+      case Instruction::Or: {
+        // If the mask is only needed on one incoming arm, push it up.
+        if (!Op0I->hasOneUse()) break;
+          
+        APInt NotAndRHS(~AndRHSMask);
+        if (MaskedValueIsZero(Op0LHS, NotAndRHS)) {
+          // Not masking anything out for the LHS, move to RHS.
+          Value *NewRHS = Builder->CreateAnd(Op0RHS, AndRHS,
+                                             Op0RHS->getName()+".masked");
+          return BinaryOperator::Create(Op0I->getOpcode(), Op0LHS, NewRHS);
+        }
+        if (!isa<Constant>(Op0RHS) &&
+            MaskedValueIsZero(Op0RHS, NotAndRHS)) {
+          // Not masking anything out for the RHS, move to LHS.
+          Value *NewLHS = Builder->CreateAnd(Op0LHS, AndRHS,
+                                             Op0LHS->getName()+".masked");
+          return BinaryOperator::Create(Op0I->getOpcode(), NewLHS, Op0RHS);
+        }
+
+        break;
+      }
+      case Instruction::Add:
+        // ((A & N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == AndRHS.
+        // ((A | N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+        // ((A ^ N) + B) & AndRHS -> (A + B) & AndRHS iff N&AndRHS == 0
+        if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, false, I))
+          return BinaryOperator::CreateAnd(V, AndRHS);
+        if (Value *V = FoldLogicalPlusAnd(Op0RHS, Op0LHS, AndRHS, false, I))
+          return BinaryOperator::CreateAnd(V, AndRHS);  // Add commutes
+        break;
+
+      case Instruction::Sub:
+        // ((A & N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == AndRHS.
+        // ((A | N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+        // ((A ^ N) - B) & AndRHS -> (A - B) & AndRHS iff N&AndRHS == 0
+        if (Value *V = FoldLogicalPlusAnd(Op0LHS, Op0RHS, AndRHS, true, I))
+          return BinaryOperator::CreateAnd(V, AndRHS);
+
+        // (A - N) & AndRHS -> -N & AndRHS iff A&AndRHS==0 and AndRHS
+        // has 1's for all bits that the subtraction with A might affect.
+        if (Op0I->hasOneUse() && !match(Op0LHS, m_Zero())) {
+          uint32_t BitWidth = AndRHSMask.getBitWidth();
+          uint32_t Zeros = AndRHSMask.countLeadingZeros();
+          APInt Mask = APInt::getLowBitsSet(BitWidth, BitWidth - Zeros);
+
+          if (MaskedValueIsZero(Op0LHS, Mask)) {
+            Value *NewNeg = Builder->CreateNeg(Op0RHS);
+            return BinaryOperator::CreateAnd(NewNeg, AndRHS);
+          }
+        }
+        break;
+
+      case Instruction::Shl:
+      case Instruction::LShr:
+        // (1 << x) & 1 --> zext(x == 0)
+        // (1 >> x) & 1 --> zext(x == 0)
+        if (AndRHSMask == 1 && Op0LHS == AndRHS) {
+          Value *NewICmp =
+            Builder->CreateICmpEQ(Op0RHS, Constant::getNullValue(I.getType()));
+          return new ZExtInst(NewICmp, I.getType());
+        }
+        break;
+      }
+          
+      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1)))
+        if (Instruction *Res = OptAndOp(Op0I, Op0CI, AndRHS, I))
+          return Res;
+    }
+    
+    // If this is an integer truncation, and if the source is an 'and' with
+    // immediate, transform it.  This frequently occurs for bitfield accesses.
+    {
+      Value *X = 0; ConstantInt *YC = 0;
+      if (match(Op0, m_Trunc(m_And(m_Value(X), m_ConstantInt(YC))))) {
+        // Change: and (trunc (and X, YC) to T), C2
+        // into  : and (trunc X to T), trunc(YC) & C2
+        // This will fold the two constants together, which may allow 
+        // other simplifications.
+        Value *NewCast = Builder->CreateTrunc(X, I.getType(), "and.shrunk");
+        Constant *C3 = ConstantExpr::getTrunc(YC, I.getType());
+        C3 = ConstantExpr::getAnd(C3, AndRHS);
+        return BinaryOperator::CreateAnd(NewCast, C3);
+      }
+    }
+
+    // Try to fold constant and into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+
+  // (~A & ~B) == (~(A | B)) - De Morgan's Law
+  if (Value *Op0NotVal = dyn_castNotVal(Op0))
+    if (Value *Op1NotVal = dyn_castNotVal(Op1))
+      if (Op0->hasOneUse() && Op1->hasOneUse()) {
+        Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
+                                      I.getName()+".demorgan");
+        return BinaryOperator::CreateNot(Or);
+      }
+  
+  {
+    Value *A = 0, *B = 0, *C = 0, *D = 0;
+    // (A|B) & ~(A&B) -> A^B
+    if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+        match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+        ((A == C && B == D) || (A == D && B == C)))
+      return BinaryOperator::CreateXor(A, B);
+    
+    // ~(A&B) & (A|B) -> A^B
+    if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+        match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+        ((A == C && B == D) || (A == D && B == C)))
+      return BinaryOperator::CreateXor(A, B);
+    
+    if (Op0->hasOneUse() &&
+        match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+      if (A == Op1) {                                // (A^B)&A -> A&(A^B)
+        I.swapOperands();     // Simplify below
+        std::swap(Op0, Op1);
+      } else if (B == Op1) {                         // (A^B)&B -> B&(B^A)
+        cast<BinaryOperator>(Op0)->swapOperands();
+        I.swapOperands();     // Simplify below
+        std::swap(Op0, Op1);
+      }
+    }
+
+    if (Op1->hasOneUse() &&
+        match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
+      if (B == Op0) {                                // B&(A^B) -> B&(B^A)
+        cast<BinaryOperator>(Op1)->swapOperands();
+        std::swap(A, B);
+      }
+      // Notice that the patten (A&(~B)) is actually (A&(-1^B)), so if
+      // A is originally -1 (or a vector of -1 and undefs), then we enter
+      // an endless loop. By checking that A is non-constant we ensure that
+      // we will never get to the loop.
+      if (A == Op0 && !isa<Constant>(A)) // A&(A^B) -> A & ~B
+        return BinaryOperator::CreateAnd(A, Builder->CreateNot(B, "tmp"));
+    }
+
+    // (A&((~A)|B)) -> A&B
+    if (match(Op0, m_Or(m_Not(m_Specific(Op1)), m_Value(A))) ||
+        match(Op0, m_Or(m_Value(A), m_Not(m_Specific(Op1)))))
+      return BinaryOperator::CreateAnd(A, Op1);
+    if (match(Op1, m_Or(m_Not(m_Specific(Op0)), m_Value(A))) ||
+        match(Op1, m_Or(m_Value(A), m_Not(m_Specific(Op0)))))
+      return BinaryOperator::CreateAnd(A, Op0);
+  }
+  
+  if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1))
+    if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0))
+      if (Value *Res = FoldAndOfICmps(LHS, RHS))
+        return ReplaceInstUsesWith(I, Res);
+  
+  // If and'ing two fcmp, try combine them into one.
+  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+      if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+        return ReplaceInstUsesWith(I, Res);
+  
+  
+  // fold (and (cast A), (cast B)) -> (cast (and A, B))
+  if (CastInst *Op0C = dyn_cast<CastInst>(Op0))
+    if (CastInst *Op1C = dyn_cast<CastInst>(Op1)) {
+      const Type *SrcTy = Op0C->getOperand(0)->getType();
+      if (Op0C->getOpcode() == Op1C->getOpcode() && // same cast kind ?
+          SrcTy == Op1C->getOperand(0)->getType() &&
+          SrcTy->isIntOrIntVectorTy()) {
+        Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+        
+        // Only do this if the casts both really cause code to be generated.
+        if (ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+            ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+          Value *NewOp = Builder->CreateAnd(Op0COp, Op1COp, I.getName());
+          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+        }
+        
+        // If this is and(cast(icmp), cast(icmp)), try to fold this even if the
+        // cast is otherwise not optimizable.  This happens for vector sexts.
+        if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+          if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+            if (Value *Res = FoldAndOfICmps(LHS, RHS))
+              return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+        
+        // If this is and(cast(fcmp), cast(fcmp)), try to fold this even if the
+        // cast is otherwise not optimizable.  This happens for vector sexts.
+        if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+          if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+            if (Value *Res = FoldAndOfFCmps(LHS, RHS))
+              return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+      }
+    }
+    
+  // (X >> Z) & (Y >> Z)  -> (X&Y) >> Z  for all shifts.
+  if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
+    if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
+      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 
+          SI0->getOperand(1) == SI1->getOperand(1) &&
+          (SI0->hasOneUse() || SI1->hasOneUse())) {
+        Value *NewOp =
+          Builder->CreateAnd(SI0->getOperand(0), SI1->getOperand(0),
+                             SI0->getName());
+        return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
+                                      SI1->getOperand(1));
+      }
+  }
+
+  return Changed ? &I : 0;
+}
+
+/// CollectBSwapParts - Analyze the specified subexpression and see if it is
+/// capable of providing pieces of a bswap.  The subexpression provides pieces
+/// of a bswap if it is proven that each of the non-zero bytes in the output of
+/// the expression came from the corresponding "byte swapped" byte in some other
+/// value.  For example, if the current subexpression is "(shl i32 %X, 24)" then
+/// we know that the expression deposits the low byte of %X into the high byte
+/// of the bswap result and that all other bytes are zero.  This expression is
+/// accepted, the high byte of ByteValues is set to X to indicate a correct
+/// match.
+///
+/// This function returns true if the match was unsuccessful and false if so.
+/// On entry to the function the "OverallLeftShift" is a signed integer value
+/// indicating the number of bytes that the subexpression is later shifted.  For
+/// example, if the expression is later right shifted by 16 bits, the
+/// OverallLeftShift value would be -2 on entry.  This is used to specify which
+/// byte of ByteValues is actually being set.
+///
+/// Similarly, ByteMask is a bitmask where a bit is clear if its corresponding
+/// byte is masked to zero by a user.  For example, in (X & 255), X will be
+/// processed with a bytemask of 1.  Because bytemask is 32-bits, this limits
+/// this function to working on up to 32-byte (256 bit) values.  ByteMask is
+/// always in the local (OverallLeftShift) coordinate space.
+///
+static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
+                              SmallVector<Value*, 8> &ByteValues) {
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    // If this is an or instruction, it may be an inner node of the bswap.
+    if (I->getOpcode() == Instruction::Or) {
+      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask,
+                               ByteValues) ||
+             CollectBSwapParts(I->getOperand(1), OverallLeftShift, ByteMask,
+                               ByteValues);
+    }
+  
+    // If this is a logical shift by a constant multiple of 8, recurse with
+    // OverallLeftShift and ByteMask adjusted.
+    if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) {
+      unsigned ShAmt = 
+        cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U);
+      // Ensure the shift amount is defined and of a byte value.
+      if ((ShAmt & 7) || (ShAmt > 8*ByteValues.size()))
+        return true;
+
+      unsigned ByteShift = ShAmt >> 3;
+      if (I->getOpcode() == Instruction::Shl) {
+        // X << 2 -> collect(X, +2)
+        OverallLeftShift += ByteShift;
+        ByteMask >>= ByteShift;
+      } else {
+        // X >>u 2 -> collect(X, -2)
+        OverallLeftShift -= ByteShift;
+        ByteMask <<= ByteShift;
+        ByteMask &= (~0U >> (32-ByteValues.size()));
+      }
+
+      if (OverallLeftShift >= (int)ByteValues.size()) return true;
+      if (OverallLeftShift <= -(int)ByteValues.size()) return true;
+
+      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 
+                               ByteValues);
+    }
+
+    // If this is a logical 'and' with a mask that clears bytes, clear the
+    // corresponding bytes in ByteMask.
+    if (I->getOpcode() == Instruction::And &&
+        isa<ConstantInt>(I->getOperand(1))) {
+      // Scan every byte of the and mask, seeing if the byte is either 0 or 255.
+      unsigned NumBytes = ByteValues.size();
+      APInt Byte(I->getType()->getPrimitiveSizeInBits(), 255);
+      const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue();
+      
+      for (unsigned i = 0; i != NumBytes; ++i, Byte <<= 8) {
+        // If this byte is masked out by a later operation, we don't care what
+        // the and mask is.
+        if ((ByteMask & (1 << i)) == 0)
+          continue;
+        
+        // If the AndMask is all zeros for this byte, clear the bit.
+        APInt MaskB = AndMask & Byte;
+        if (MaskB == 0) {
+          ByteMask &= ~(1U << i);
+          continue;
+        }
+        
+        // If the AndMask is not all ones for this byte, it's not a bytezap.
+        if (MaskB != Byte)
+          return true;
+
+        // Otherwise, this byte is kept.
+      }
+
+      return CollectBSwapParts(I->getOperand(0), OverallLeftShift, ByteMask, 
+                               ByteValues);
+    }
+  }
+  
+  // Okay, we got to something that isn't a shift, 'or' or 'and'.  This must be
+  // the input value to the bswap.  Some observations: 1) if more than one byte
+  // is demanded from this input, then it could not be successfully assembled
+  // into a byteswap.  At least one of the two bytes would not be aligned with
+  // their ultimate destination.
+  if (!isPowerOf2_32(ByteMask)) return true;
+  unsigned InputByteNo = CountTrailingZeros_32(ByteMask);
+  
+  // 2) The input and ultimate destinations must line up: if byte 3 of an i32
+  // is demanded, it needs to go into byte 0 of the result.  This means that the
+  // byte needs to be shifted until it lands in the right byte bucket.  The
+  // shift amount depends on the position: if the byte is coming from the high
+  // part of the value (e.g. byte 3) then it must be shifted right.  If from the
+  // low part, it must be shifted left.
+  unsigned DestByteNo = InputByteNo + OverallLeftShift;
+  if (InputByteNo < ByteValues.size()/2) {
+    if (ByteValues.size()-1-DestByteNo != InputByteNo)
+      return true;
+  } else {
+    if (ByteValues.size()-1-DestByteNo != InputByteNo)
+      return true;
+  }
+  
+  // If the destination byte value is already defined, the values are or'd
+  // together, which isn't a bswap (unless it's an or of the same bits).
+  if (ByteValues[DestByteNo] && ByteValues[DestByteNo] != V)
+    return true;
+  ByteValues[DestByteNo] = V;
+  return false;
+}
+
+/// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
+/// If so, insert the new bswap intrinsic and return it.
+Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
+  const IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
+  if (!ITy || ITy->getBitWidth() % 16 || 
+      // ByteMask only allows up to 32-byte values.
+      ITy->getBitWidth() > 32*8) 
+    return 0;   // Can only bswap pairs of bytes.  Can't do vectors.
+  
+  /// ByteValues - For each byte of the result, we keep track of which value
+  /// defines each byte.
+  SmallVector<Value*, 8> ByteValues;
+  ByteValues.resize(ITy->getBitWidth()/8);
+    
+  // Try to find all the pieces corresponding to the bswap.
+  uint32_t ByteMask = ~0U >> (32-ByteValues.size());
+  if (CollectBSwapParts(&I, 0, ByteMask, ByteValues))
+    return 0;
+  
+  // Check to see if all of the bytes come from the same value.
+  Value *V = ByteValues[0];
+  if (V == 0) return 0;  // Didn't find a byte?  Must be zero.
+  
+  // Check to make sure that all of the bytes come from the same value.
+  for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
+    if (ByteValues[i] != V)
+      return 0;
+  const Type *Tys[] = { ITy };
+  Module *M = I.getParent()->getParent()->getParent();
+  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+  return CallInst::Create(F, V);
+}
+
+/// MatchSelectFromAndOr - We have an expression of the form (A&C)|(B&D).  Check
+/// If A is (cond?-1:0) and either B or D is ~(cond?-1,0) or (cond?0,-1), then
+/// we can simplify this expression to "cond ? C : D or B".
+static Instruction *MatchSelectFromAndOr(Value *A, Value *B,
+                                         Value *C, Value *D) {
+  // If A is not a select of -1/0, this cannot match.
+  Value *Cond = 0;
+  if (!match(A, m_SExt(m_Value(Cond))) ||
+      !Cond->getType()->isIntegerTy(1))
+    return 0;
+
+  // ((cond?-1:0)&C) | (B&(cond?0:-1)) -> cond ? C : B.
+  if (match(D, m_Not(m_SExt(m_Specific(Cond)))))
+    return SelectInst::Create(Cond, C, B);
+  if (match(D, m_SExt(m_Not(m_Specific(Cond)))))
+    return SelectInst::Create(Cond, C, B);
+  
+  // ((cond?-1:0)&C) | ((cond?0:-1)&D) -> cond ? C : D.
+  if (match(B, m_Not(m_SExt(m_Specific(Cond)))))
+    return SelectInst::Create(Cond, C, D);
+  if (match(B, m_SExt(m_Not(m_Specific(Cond)))))
+    return SelectInst::Create(Cond, C, D);
+  return 0;
+}
+
+/// FoldOrOfICmps - Fold (icmp)|(icmp) if possible.
+Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
+  ICmpInst::Predicate LHSCC = LHS->getPredicate(), RHSCC = RHS->getPredicate();
+
+  // (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
+  if (PredicatesFoldable(LHSCC, RHSCC)) {
+    if (LHS->getOperand(0) == RHS->getOperand(1) &&
+        LHS->getOperand(1) == RHS->getOperand(0))
+      LHS->swapOperands();
+    if (LHS->getOperand(0) == RHS->getOperand(0) &&
+        LHS->getOperand(1) == RHS->getOperand(1)) {
+      Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+      unsigned Code = getICmpCode(LHS) | getICmpCode(RHS);
+      bool isSigned = LHS->isSigned() || RHS->isSigned();
+      return getICmpValue(isSigned, Code, Op0, Op1, Builder);
+    }
+  }
+
+  // handle (roughly):
+  // (icmp ne (A & B), C) | (icmp ne (A & D), E)
+  if (Value *V = foldLogOpOfMaskedICmps(LHS, RHS, ICmpInst::ICMP_NE, Builder))
+    return V;
+
+  // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
+  Value *Val = LHS->getOperand(0), *Val2 = RHS->getOperand(0);
+  ConstantInt *LHSCst = dyn_cast<ConstantInt>(LHS->getOperand(1));
+  ConstantInt *RHSCst = dyn_cast<ConstantInt>(RHS->getOperand(1));
+  if (LHSCst == 0 || RHSCst == 0) return 0;
+
+  if (LHSCst == RHSCst && LHSCC == RHSCC) {
+    // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
+    if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
+      Value *NewOr = Builder->CreateOr(Val, Val2);
+      return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
+    }
+  }
+
+  // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1)
+  //   iff C2 + CA == C1.
+  if (LHSCC == ICmpInst::ICMP_ULT && RHSCC == ICmpInst::ICMP_EQ) {
+    ConstantInt *AddCst;
+    if (match(Val, m_Add(m_Specific(Val2), m_ConstantInt(AddCst))))
+      if (RHSCst->getValue() + AddCst->getValue() == LHSCst->getValue())
+        return Builder->CreateICmpULE(Val, LHSCst);
+  }
+
+  // From here on, we only handle:
+  //    (icmp1 A, C1) | (icmp2 A, C2) --> something simpler.
+  if (Val != Val2) return 0;
+  
+  // ICMP_[US][GL]E X, CST is folded to ICMP_[US][GL]T elsewhere.
+  if (LHSCC == ICmpInst::ICMP_UGE || LHSCC == ICmpInst::ICMP_ULE ||
+      RHSCC == ICmpInst::ICMP_UGE || RHSCC == ICmpInst::ICMP_ULE ||
+      LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE ||
+      RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE)
+    return 0;
+  
+  // We can't fold (ugt x, C) | (sgt x, C2).
+  if (!PredicatesFoldable(LHSCC, RHSCC))
+    return 0;
+  
+  // Ensure that the larger constant is on the RHS.
+  bool ShouldSwap;
+  if (CmpInst::isSigned(LHSCC) ||
+      (ICmpInst::isEquality(LHSCC) && 
+       CmpInst::isSigned(RHSCC)))
+    ShouldSwap = LHSCst->getValue().sgt(RHSCst->getValue());
+  else
+    ShouldSwap = LHSCst->getValue().ugt(RHSCst->getValue());
+  
+  if (ShouldSwap) {
+    std::swap(LHS, RHS);
+    std::swap(LHSCst, RHSCst);
+    std::swap(LHSCC, RHSCC);
+  }
+  
+  // At this point, we know we have two icmp instructions
+  // comparing a value against two constants and or'ing the result
+  // together.  Because of the above check, we know that we only have
+  // ICMP_EQ, ICMP_NE, ICMP_LT, and ICMP_GT here. We also know (from the
+  // icmp folding check above), that the two constants are not
+  // equal.
+  assert(LHSCst != RHSCst && "Compares not folded above?");
+
+  switch (LHSCC) {
+  default: llvm_unreachable("Unknown integer condition code!");
+  case ICmpInst::ICMP_EQ:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:
+      if (LHSCst == SubOne(RHSCst)) {
+        // (X == 13 | X == 14) -> X-13 <u 2
+        Constant *AddCST = ConstantExpr::getNeg(LHSCst);
+        Value *Add = Builder->CreateAdd(Val, AddCST, Val->getName()+".off");
+        AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
+        return Builder->CreateICmpULT(Add, AddCST);
+      }
+      break;                         // (X == 13 | X == 15) -> no change
+    case ICmpInst::ICMP_UGT:         // (X == 13 | X u> 14) -> no change
+    case ICmpInst::ICMP_SGT:         // (X == 13 | X s> 14) -> no change
+      break;
+    case ICmpInst::ICMP_NE:          // (X == 13 | X != 15) -> X != 15
+    case ICmpInst::ICMP_ULT:         // (X == 13 | X u< 15) -> X u< 15
+    case ICmpInst::ICMP_SLT:         // (X == 13 | X s< 15) -> X s< 15
+      return RHS;
+    }
+    break;
+  case ICmpInst::ICMP_NE:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:          // (X != 13 | X == 15) -> X != 13
+    case ICmpInst::ICMP_UGT:         // (X != 13 | X u> 15) -> X != 13
+    case ICmpInst::ICMP_SGT:         // (X != 13 | X s> 15) -> X != 13
+      return LHS;
+    case ICmpInst::ICMP_NE:          // (X != 13 | X != 15) -> true
+    case ICmpInst::ICMP_ULT:         // (X != 13 | X u< 15) -> true
+    case ICmpInst::ICMP_SLT:         // (X != 13 | X s< 15) -> true
+      return ConstantInt::getTrue(LHS->getContext());
+    }
+    break;
+  case ICmpInst::ICMP_ULT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X u< 13 | X == 14) -> no change
+      break;
+    case ICmpInst::ICMP_UGT:        // (X u< 13 | X u> 15) -> (X-13) u> 2
+      // If RHSCst is [us]MAXINT, it is always false.  Not handling
+      // this can cause overflow.
+      if (RHSCst->isMaxValue(false))
+        return LHS;
+      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false);
+    case ICmpInst::ICMP_SGT:        // (X u< 13 | X s> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X u< 13 | X != 15) -> X != 15
+    case ICmpInst::ICMP_ULT:        // (X u< 13 | X u< 15) -> X u< 15
+      return RHS;
+    case ICmpInst::ICMP_SLT:        // (X u< 13 | X s< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_SLT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X s< 13 | X == 14) -> no change
+      break;
+    case ICmpInst::ICMP_SGT:        // (X s< 13 | X s> 15) -> (X-13) s> 2
+      // If RHSCst is [us]MAXINT, it is always false.  Not handling
+      // this can cause overflow.
+      if (RHSCst->isMaxValue(true))
+        return LHS;
+      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false);
+    case ICmpInst::ICMP_UGT:        // (X s< 13 | X u> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X s< 13 | X != 15) -> X != 15
+    case ICmpInst::ICMP_SLT:        // (X s< 13 | X s< 15) -> X s< 15
+      return RHS;
+    case ICmpInst::ICMP_ULT:        // (X s< 13 | X u< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_UGT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X u> 13 | X == 15) -> X u> 13
+    case ICmpInst::ICMP_UGT:        // (X u> 13 | X u> 15) -> X u> 13
+      return LHS;
+    case ICmpInst::ICMP_SGT:        // (X u> 13 | X s> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X u> 13 | X != 15) -> true
+    case ICmpInst::ICMP_ULT:        // (X u> 13 | X u< 15) -> true
+      return ConstantInt::getTrue(LHS->getContext());
+    case ICmpInst::ICMP_SLT:        // (X u> 13 | X s< 15) -> no change
+      break;
+    }
+    break;
+  case ICmpInst::ICMP_SGT:
+    switch (RHSCC) {
+    default: llvm_unreachable("Unknown integer condition code!");
+    case ICmpInst::ICMP_EQ:         // (X s> 13 | X == 15) -> X > 13
+    case ICmpInst::ICMP_SGT:        // (X s> 13 | X s> 15) -> X > 13
+      return LHS;
+    case ICmpInst::ICMP_UGT:        // (X s> 13 | X u> 15) -> no change
+      break;
+    case ICmpInst::ICMP_NE:         // (X s> 13 | X != 15) -> true
+    case ICmpInst::ICMP_SLT:        // (X s> 13 | X s< 15) -> true
+      return ConstantInt::getTrue(LHS->getContext());
+    case ICmpInst::ICMP_ULT:        // (X s> 13 | X u< 15) -> no change
+      break;
+    }
+    break;
+  }
+  return 0;
+}
+
+/// FoldOrOfFCmps - Optimize (fcmp)|(fcmp).  NOTE: Unlike the rest of
+/// instcombine, this returns a Value which should already be inserted into the
+/// function.
+Value *InstCombiner::FoldOrOfFCmps(FCmpInst *LHS, FCmpInst *RHS) {
+  if (LHS->getPredicate() == FCmpInst::FCMP_UNO &&
+      RHS->getPredicate() == FCmpInst::FCMP_UNO && 
+      LHS->getOperand(0)->getType() == RHS->getOperand(0)->getType()) {
+    if (ConstantFP *LHSC = dyn_cast<ConstantFP>(LHS->getOperand(1)))
+      if (ConstantFP *RHSC = dyn_cast<ConstantFP>(RHS->getOperand(1))) {
+        // If either of the constants are nans, then the whole thing returns
+        // true.
+        if (LHSC->getValueAPF().isNaN() || RHSC->getValueAPF().isNaN())
+          return ConstantInt::getTrue(LHS->getContext());
+        
+        // Otherwise, no need to compare the two constants, compare the
+        // rest.
+        return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
+      }
+    
+    // Handle vector zeros.  This occurs because the canonical form of
+    // "fcmp uno x,x" is "fcmp uno x, 0".
+    if (isa<ConstantAggregateZero>(LHS->getOperand(1)) &&
+        isa<ConstantAggregateZero>(RHS->getOperand(1)))
+      return Builder->CreateFCmpUNO(LHS->getOperand(0), RHS->getOperand(0));
+    
+    return 0;
+  }
+  
+  Value *Op0LHS = LHS->getOperand(0), *Op0RHS = LHS->getOperand(1);
+  Value *Op1LHS = RHS->getOperand(0), *Op1RHS = RHS->getOperand(1);
+  FCmpInst::Predicate Op0CC = LHS->getPredicate(), Op1CC = RHS->getPredicate();
+  
+  if (Op0LHS == Op1RHS && Op0RHS == Op1LHS) {
+    // Swap RHS operands to match LHS.
+    Op1CC = FCmpInst::getSwappedPredicate(Op1CC);
+    std::swap(Op1LHS, Op1RHS);
+  }
+  if (Op0LHS == Op1LHS && Op0RHS == Op1RHS) {
+    // Simplify (fcmp cc0 x, y) | (fcmp cc1 x, y).
+    if (Op0CC == Op1CC)
+      return Builder->CreateFCmp((FCmpInst::Predicate)Op0CC, Op0LHS, Op0RHS);
+    if (Op0CC == FCmpInst::FCMP_TRUE || Op1CC == FCmpInst::FCMP_TRUE)
+      return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 1);
+    if (Op0CC == FCmpInst::FCMP_FALSE)
+      return RHS;
+    if (Op1CC == FCmpInst::FCMP_FALSE)
+      return LHS;
+    bool Op0Ordered;
+    bool Op1Ordered;
+    unsigned Op0Pred = getFCmpCode(Op0CC, Op0Ordered);
+    unsigned Op1Pred = getFCmpCode(Op1CC, Op1Ordered);
+    if (Op0Ordered == Op1Ordered) {
+      // If both are ordered or unordered, return a new fcmp with
+      // or'ed predicates.
+      return getFCmpValue(Op0Ordered, Op0Pred|Op1Pred, Op0LHS, Op0RHS, Builder);
+    }
+  }
+  return 0;
+}
+
+/// FoldOrWithConstants - This helper function folds:
+///
+///     ((A | B) & C1) | (B & C2)
+///
+/// into:
+/// 
+///     (A & C1) | B
+///
+/// when the XOR of the two constants is "all ones" (-1).
+Instruction *InstCombiner::FoldOrWithConstants(BinaryOperator &I, Value *Op,
+                                               Value *A, Value *B, Value *C) {
+  ConstantInt *CI1 = dyn_cast<ConstantInt>(C);
+  if (!CI1) return 0;
+
+  Value *V1 = 0;
+  ConstantInt *CI2 = 0;
+  if (!match(Op, m_And(m_Value(V1), m_ConstantInt(CI2)))) return 0;
+
+  APInt Xor = CI1->getValue() ^ CI2->getValue();
+  if (!Xor.isAllOnesValue()) return 0;
+
+  if (V1 == A || V1 == B) {
+    Value *NewOp = Builder->CreateAnd((V1 == A) ? B : A, CI1);
+    return BinaryOperator::CreateOr(NewOp, V1);
+  }
+
+  return 0;
+}
+
+Instruction *InstCombiner::visitOr(BinaryOperator &I) {
+  bool Changed = SimplifyAssociativeOrCommutative(I);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyOrInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // (A&B)|(A&C) -> A&(B|C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
+
+  // See if we can simplify any instructions used by the instruction whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+
+  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+    ConstantInt *C1 = 0; Value *X = 0;
+    // (X & C1) | C2 --> (X | C2) & (C1|C2)
+    // iff (C1 & C2) == 0.
+    if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
+        (RHS->getValue() & C1->getValue()) != 0 &&
+        Op0->hasOneUse()) {
+      Value *Or = Builder->CreateOr(X, RHS);
+      Or->takeName(Op0);
+      return BinaryOperator::CreateAnd(Or, 
+                         ConstantInt::get(I.getContext(),
+                                          RHS->getValue() | C1->getValue()));
+    }
+
+    // (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
+    if (match(Op0, m_Xor(m_Value(X), m_ConstantInt(C1))) &&
+        Op0->hasOneUse()) {
+      Value *Or = Builder->CreateOr(X, RHS);
+      Or->takeName(Op0);
+      return BinaryOperator::CreateXor(Or,
+                 ConstantInt::get(I.getContext(),
+                                  C1->getValue() & ~RHS->getValue()));
+    }
+
+    // Try to fold constant and into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  Value *A = 0, *B = 0;
+  ConstantInt *C1 = 0, *C2 = 0;
+
+  // (A | B) | C  and  A | (B | C)                  -> bswap if possible.
+  // (A >> B) | (C << D)  and  (A << B) | (B >> C)  -> bswap if possible.
+  if (match(Op0, m_Or(m_Value(), m_Value())) ||
+      match(Op1, m_Or(m_Value(), m_Value())) ||
+      (match(Op0, m_LogicalShift(m_Value(), m_Value())) &&
+       match(Op1, m_LogicalShift(m_Value(), m_Value())))) {
+    if (Instruction *BSwap = MatchBSwap(I))
+      return BSwap;
+  }
+  
+  // (X^C)|Y -> (X|Y)^C iff Y&C == 0
+  if (Op0->hasOneUse() &&
+      match(Op0, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+      MaskedValueIsZero(Op1, C1->getValue())) {
+    Value *NOr = Builder->CreateOr(A, Op1);
+    NOr->takeName(Op0);
+    return BinaryOperator::CreateXor(NOr, C1);
+  }
+
+  // Y|(X^C) -> (X|Y)^C iff Y&C == 0
+  if (Op1->hasOneUse() &&
+      match(Op1, m_Xor(m_Value(A), m_ConstantInt(C1))) &&
+      MaskedValueIsZero(Op0, C1->getValue())) {
+    Value *NOr = Builder->CreateOr(A, Op0);
+    NOr->takeName(Op0);
+    return BinaryOperator::CreateXor(NOr, C1);
+  }
+
+  // (A & C)|(B & D)
+  Value *C = 0, *D = 0;
+  if (match(Op0, m_And(m_Value(A), m_Value(C))) &&
+      match(Op1, m_And(m_Value(B), m_Value(D)))) {
+    Value *V1 = 0, *V2 = 0;
+    C1 = dyn_cast<ConstantInt>(C);
+    C2 = dyn_cast<ConstantInt>(D);
+    if (C1 && C2) {  // (A & C1)|(B & C2)
+      // If we have: ((V + N) & C1) | (V & C2)
+      // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+      // replace with V+N.
+      if (C1->getValue() == ~C2->getValue()) {
+        if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+
+            match(A, m_Add(m_Value(V1), m_Value(V2)))) {
+          // Add commutes, try both ways.
+          if (V1 == B && MaskedValueIsZero(V2, C2->getValue()))
+            return ReplaceInstUsesWith(I, A);
+          if (V2 == B && MaskedValueIsZero(V1, C2->getValue()))
+            return ReplaceInstUsesWith(I, A);
+        }
+        // Or commutes, try both ways.
+        if ((C1->getValue() & (C1->getValue()+1)) == 0 &&
+            match(B, m_Add(m_Value(V1), m_Value(V2)))) {
+          // Add commutes, try both ways.
+          if (V1 == A && MaskedValueIsZero(V2, C1->getValue()))
+            return ReplaceInstUsesWith(I, B);
+          if (V2 == A && MaskedValueIsZero(V1, C1->getValue()))
+            return ReplaceInstUsesWith(I, B);
+        }
+      }
+      
+      if ((C1->getValue() & C2->getValue()) == 0) {
+        // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2)
+        // iff (C1&C2) == 0 and (N&~C1) == 0
+        if (match(A, m_Or(m_Value(V1), m_Value(V2))) &&
+            ((V1 == B && MaskedValueIsZero(V2, ~C1->getValue())) ||  // (V|N)
+             (V2 == B && MaskedValueIsZero(V1, ~C1->getValue()))))   // (N|V)
+          return BinaryOperator::CreateAnd(A,
+                               ConstantInt::get(A->getContext(),
+                                                C1->getValue()|C2->getValue()));
+        // Or commutes, try both ways.
+        if (match(B, m_Or(m_Value(V1), m_Value(V2))) &&
+            ((V1 == A && MaskedValueIsZero(V2, ~C2->getValue())) ||  // (V|N)
+             (V2 == A && MaskedValueIsZero(V1, ~C2->getValue()))))   // (N|V)
+          return BinaryOperator::CreateAnd(B,
+                               ConstantInt::get(B->getContext(),
+                                                C1->getValue()|C2->getValue()));
+        
+        // ((V|C3)&C1) | ((V|C4)&C2) --> (V|C3|C4)&(C1|C2)
+        // iff (C1&C2) == 0 and (C3&~C1) == 0 and (C4&~C2) == 0.
+        ConstantInt *C3 = 0, *C4 = 0;
+        if (match(A, m_Or(m_Value(V1), m_ConstantInt(C3))) &&
+            (C3->getValue() & ~C1->getValue()) == 0 &&
+            match(B, m_Or(m_Specific(V1), m_ConstantInt(C4))) &&
+            (C4->getValue() & ~C2->getValue()) == 0) {
+          V2 = Builder->CreateOr(V1, ConstantExpr::getOr(C3, C4), "bitfield");
+          return BinaryOperator::CreateAnd(V2,
+                               ConstantInt::get(B->getContext(),
+                                                C1->getValue()|C2->getValue()));
+        }
+      }
+    }
+
+    // (A & (C0?-1:0)) | (B & ~(C0?-1:0)) ->  C0 ? A : B, and commuted variants.
+    // Don't do this for vector select idioms, the code generator doesn't handle
+    // them well yet.
+    if (!I.getType()->isVectorTy()) {
+      if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
+        return Match;
+      if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
+        return Match;
+      if (Instruction *Match = MatchSelectFromAndOr(C, B, A, D))
+        return Match;
+      if (Instruction *Match = MatchSelectFromAndOr(D, A, B, C))
+        return Match;
+    }
+
+    // ((A&~B)|(~A&B)) -> A^B
+    if ((match(C, m_Not(m_Specific(D))) &&
+         match(B, m_Not(m_Specific(A)))))
+      return BinaryOperator::CreateXor(A, D);
+    // ((~B&A)|(~A&B)) -> A^B
+    if ((match(A, m_Not(m_Specific(D))) &&
+         match(B, m_Not(m_Specific(C)))))
+      return BinaryOperator::CreateXor(C, D);
+    // ((A&~B)|(B&~A)) -> A^B
+    if ((match(C, m_Not(m_Specific(B))) &&
+         match(D, m_Not(m_Specific(A)))))
+      return BinaryOperator::CreateXor(A, B);
+    // ((~B&A)|(B&~A)) -> A^B
+    if ((match(A, m_Not(m_Specific(B))) &&
+         match(D, m_Not(m_Specific(C)))))
+      return BinaryOperator::CreateXor(C, B);
+
+    // ((A|B)&1)|(B&-2) -> (A&1) | B
+    if (match(A, m_Or(m_Value(V1), m_Specific(B))) ||
+        match(A, m_Or(m_Specific(B), m_Value(V1)))) {
+      Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C);
+      if (Ret) return Ret;
+    }
+    // (B&-2)|((A|B)&1) -> (A&1) | B
+    if (match(B, m_Or(m_Specific(A), m_Value(V1))) ||
+        match(B, m_Or(m_Value(V1), m_Specific(A)))) {
+      Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D);
+      if (Ret) return Ret;
+    }
+  }
+  
+  // (X >> Z) | (Y >> Z)  -> (X|Y) >> Z  for all shifts.
+  if (BinaryOperator *SI1 = dyn_cast<BinaryOperator>(Op1)) {
+    if (BinaryOperator *SI0 = dyn_cast<BinaryOperator>(Op0))
+      if (SI0->isShift() && SI0->getOpcode() == SI1->getOpcode() && 
+          SI0->getOperand(1) == SI1->getOperand(1) &&
+          (SI0->hasOneUse() || SI1->hasOneUse())) {
+        Value *NewOp = Builder->CreateOr(SI0->getOperand(0), SI1->getOperand(0),
+                                         SI0->getName());
+        return BinaryOperator::Create(SI1->getOpcode(), NewOp, 
+                                      SI1->getOperand(1));
+      }
+  }
+
+  // (~A | ~B) == (~(A & B)) - De Morgan's Law
+  if (Value *Op0NotVal = dyn_castNotVal(Op0))
+    if (Value *Op1NotVal = dyn_castNotVal(Op1))
+      if (Op0->hasOneUse() && Op1->hasOneUse()) {
+        Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
+                                        I.getName()+".demorgan");
+        return BinaryOperator::CreateNot(And);
+      }
+
+  // Canonicalize xor to the RHS.
+  if (match(Op0, m_Xor(m_Value(), m_Value())))
+    std::swap(Op0, Op1);
+
+  // A | ( A ^ B) -> A |  B
+  // A | (~A ^ B) -> A | ~B
+  if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) {
+    if (Op0 == A || Op0 == B)
+      return BinaryOperator::CreateOr(A, B);
+
+    if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) {
+      Value *Not = Builder->CreateNot(B, B->getName()+".not");
+      return BinaryOperator::CreateOr(Not, Op0);
+    }
+    if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) {
+      Value *Not = Builder->CreateNot(A, A->getName()+".not");
+      return BinaryOperator::CreateOr(Not, Op0);
+    }
+  }
+
+  // A | ~(A | B) -> A | ~B
+  // A | ~(A ^ B) -> A | ~B
+  if (match(Op1, m_Not(m_Value(A))))
+    if (BinaryOperator *B = dyn_cast<BinaryOperator>(A))
+      if ((Op0 == B->getOperand(0) || Op0 == B->getOperand(1)) &&
+          Op1->hasOneUse() && (B->getOpcode() == Instruction::Or ||
+                               B->getOpcode() == Instruction::Xor)) {
+        Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) :
+                                                 B->getOperand(0);
+        Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not");
+        return BinaryOperator::CreateOr(Not, Op0);
+      }
+
+  if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
+    if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
+      if (Value *Res = FoldOrOfICmps(LHS, RHS))
+        return ReplaceInstUsesWith(I, Res);
+    
+  // (fcmp uno x, c) | (fcmp uno y, c)  -> (fcmp uno x, y)
+  if (FCmpInst *LHS = dyn_cast<FCmpInst>(I.getOperand(0)))
+    if (FCmpInst *RHS = dyn_cast<FCmpInst>(I.getOperand(1)))
+      if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+        return ReplaceInstUsesWith(I, Res);
+  
+  // fold (or (cast A), (cast B)) -> (cast (or A, B))
+  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+    CastInst *Op1C = dyn_cast<CastInst>(Op1);
+    if (Op1C && Op0C->getOpcode() == Op1C->getOpcode()) {// same cast kind ?
+      const Type *SrcTy = Op0C->getOperand(0)->getType();
+      if (SrcTy == Op1C->getOperand(0)->getType() &&
+          SrcTy->isIntOrIntVectorTy()) {
+        Value *Op0COp = Op0C->getOperand(0), *Op1COp = Op1C->getOperand(0);
+
+        if ((!isa<ICmpInst>(Op0COp) || !isa<ICmpInst>(Op1COp)) &&
+            // Only do this if the casts both really cause code to be
+            // generated.
+            ShouldOptimizeCast(Op0C->getOpcode(), Op0COp, I.getType()) &&
+            ShouldOptimizeCast(Op1C->getOpcode(), Op1COp, I.getType())) {
+          Value *NewOp = Builder->CreateOr(Op0COp, Op1COp, I.getName());
+          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+        }
+        
+        // If this is or(cast(icmp), cast(icmp)), try to fold this even if the
+        // cast is otherwise not optimizable.  This happens for vector sexts.
+        if (ICmpInst *RHS = dyn_cast<ICmpInst>(Op1COp))
+          if (ICmpInst *LHS = dyn_cast<ICmpInst>(Op0COp))
+            if (Value *Res = FoldOrOfICmps(LHS, RHS))
+              return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+        
+        // If this is or(cast(fcmp), cast(fcmp)), try to fold this even if the
+        // cast is otherwise not optimizable.  This happens for vector sexts.
+        if (FCmpInst *RHS = dyn_cast<FCmpInst>(Op1COp))
+          if (FCmpInst *LHS = dyn_cast<FCmpInst>(Op0COp))
+            if (Value *Res = FoldOrOfFCmps(LHS, RHS))
+              return CastInst::Create(Op0C->getOpcode(), Res, I.getType());
+      }
+    }
+  }
+  
+  // Note: If we've gotten to the point of visiting the outer OR, then the
+  // inner one couldn't be simplified.  If it was a constant, then it won't
+  // be simplified by a later pass either, so we try swapping the inner/outer
+  // ORs in the hopes that we'll be able to simplify it this way.
+  // (X|C) | V --> (X|V) | C
+  if (Op0->hasOneUse() && !isa<ConstantInt>(Op1) &&
+      match(Op0, m_Or(m_Value(A), m_ConstantInt(C1)))) {
+    Value *Inner = Builder->CreateOr(A, Op1);
+    Inner->takeName(Op0);
+    return BinaryOperator::CreateOr(Inner, C1);
+  }
+  
+  return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitXor(BinaryOperator &I) {
+  bool Changed = SimplifyAssociativeOrCommutative(I);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyXorInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // (A&B)^(A&C) -> A&(B^C) etc
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
+
+  // See if we can simplify any instructions used by the instruction whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+
+  // Is this a ~ operation?
+  if (Value *NotOp = dyn_castNotVal(&I)) {
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(NotOp)) {
+      if (Op0I->getOpcode() == Instruction::And || 
+          Op0I->getOpcode() == Instruction::Or) {
+        // ~(~X & Y) --> (X | ~Y) - De Morgan's Law
+        // ~(~X | Y) === (X & ~Y) - De Morgan's Law
+        if (dyn_castNotVal(Op0I->getOperand(1)))
+          Op0I->swapOperands();
+        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0))) {
+          Value *NotY =
+            Builder->CreateNot(Op0I->getOperand(1),
+                               Op0I->getOperand(1)->getName()+".not");
+          if (Op0I->getOpcode() == Instruction::And)
+            return BinaryOperator::CreateOr(Op0NotVal, NotY);
+          return BinaryOperator::CreateAnd(Op0NotVal, NotY);
+        }
+        
+        // ~(X & Y) --> (~X | ~Y) - De Morgan's Law
+        // ~(X | Y) === (~X & ~Y) - De Morgan's Law
+        if (isFreeToInvert(Op0I->getOperand(0)) && 
+            isFreeToInvert(Op0I->getOperand(1))) {
+          Value *NotX =
+            Builder->CreateNot(Op0I->getOperand(0), "notlhs");
+          Value *NotY =
+            Builder->CreateNot(Op0I->getOperand(1), "notrhs");
+          if (Op0I->getOpcode() == Instruction::And)
+            return BinaryOperator::CreateOr(NotX, NotY);
+          return BinaryOperator::CreateAnd(NotX, NotY);
+        }
+
+      } else if (Op0I->getOpcode() == Instruction::AShr) {
+        // ~(~X >>s Y) --> (X >>s Y)
+        if (Value *Op0NotVal = dyn_castNotVal(Op0I->getOperand(0)))
+          return BinaryOperator::CreateAShr(Op0NotVal, Op0I->getOperand(1));
+      }
+    }
+  }
+  
+  
+  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+    if (RHS->isOne() && Op0->hasOneUse())
+      // xor (cmp A, B), true = not (cmp A, B) = !cmp A, B
+      if (CmpInst *CI = dyn_cast<CmpInst>(Op0))
+        return CmpInst::Create(CI->getOpcode(),
+                               CI->getInversePredicate(),
+                               CI->getOperand(0), CI->getOperand(1));
+
+    // fold (xor(zext(cmp)), 1) and (xor(sext(cmp)), -1) to ext(!cmp).
+    if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+      if (CmpInst *CI = dyn_cast<CmpInst>(Op0C->getOperand(0))) {
+        if (CI->hasOneUse() && Op0C->hasOneUse()) {
+          Instruction::CastOps Opcode = Op0C->getOpcode();
+          if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
+              (RHS == ConstantExpr::getCast(Opcode, 
+                                           ConstantInt::getTrue(I.getContext()),
+                                            Op0C->getDestTy()))) {
+            CI->setPredicate(CI->getInversePredicate());
+            return CastInst::Create(Opcode, CI, Op0C->getType());
+          }
+        }
+      }
+    }
+
+    if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) {
+      // ~(c-X) == X-c-1 == X+(-c-1)
+      if (Op0I->getOpcode() == Instruction::Sub && RHS->isAllOnesValue())
+        if (Constant *Op0I0C = dyn_cast<Constant>(Op0I->getOperand(0))) {
+          Constant *NegOp0I0C = ConstantExpr::getNeg(Op0I0C);
+          Constant *ConstantRHS = ConstantExpr::getSub(NegOp0I0C,
+                                      ConstantInt::get(I.getType(), 1));
+          return BinaryOperator::CreateAdd(Op0I->getOperand(1), ConstantRHS);
+        }
+          
+      if (ConstantInt *Op0CI = dyn_cast<ConstantInt>(Op0I->getOperand(1))) {
+        if (Op0I->getOpcode() == Instruction::Add) {
+          // ~(X-c) --> (-c-1)-X
+          if (RHS->isAllOnesValue()) {
+            Constant *NegOp0CI = ConstantExpr::getNeg(Op0CI);
+            return BinaryOperator::CreateSub(
+                           ConstantExpr::getSub(NegOp0CI,
+                                      ConstantInt::get(I.getType(), 1)),
+                                      Op0I->getOperand(0));
+          } else if (RHS->getValue().isSignBit()) {
+            // (X + C) ^ signbit -> (X + C + signbit)
+            Constant *C = ConstantInt::get(I.getContext(),
+                                           RHS->getValue() + Op0CI->getValue());
+            return BinaryOperator::CreateAdd(Op0I->getOperand(0), C);
+
+          }
+        } else if (Op0I->getOpcode() == Instruction::Or) {
+          // (X|C1)^C2 -> X^(C1|C2) iff X&~C1 == 0
+          if (MaskedValueIsZero(Op0I->getOperand(0), Op0CI->getValue())) {
+            Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
+            // Anything in both C1 and C2 is known to be zero, remove it from
+            // NewRHS.
+            Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
+            NewRHS = ConstantExpr::getAnd(NewRHS, 
+                                       ConstantExpr::getNot(CommonBits));
+            Worklist.Add(Op0I);
+            I.setOperand(0, Op0I->getOperand(0));
+            I.setOperand(1, NewRHS);
+            return &I;
+          }
+        }
+      }
+    }
+
+    // Try to fold constant and into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1);
+  if (Op1I) {
+    Value *A, *B;
+    if (match(Op1I, m_Or(m_Value(A), m_Value(B)))) {
+      if (A == Op0) {              // B^(B|A) == (A|B)^B
+        Op1I->swapOperands();
+        I.swapOperands();
+        std::swap(Op0, Op1);
+      } else if (B == Op0) {       // B^(A|B) == (A|B)^B
+        I.swapOperands();     // Simplified below.
+        std::swap(Op0, Op1);
+      }
+    } else if (match(Op1I, m_And(m_Value(A), m_Value(B))) && 
+               Op1I->hasOneUse()){
+      if (A == Op0) {                                      // A^(A&B) -> A^(B&A)
+        Op1I->swapOperands();
+        std::swap(A, B);
+      }
+      if (B == Op0) {                                      // A^(B&A) -> (B&A)^A
+        I.swapOperands();     // Simplified below.
+        std::swap(Op0, Op1);
+      }
+    }
+  }
+  
+  BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0);
+  if (Op0I) {
+    Value *A, *B;
+    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+        Op0I->hasOneUse()) {
+      if (A == Op1)                                  // (B|A)^B == (A|B)^B
+        std::swap(A, B);
+      if (B == Op1)                                  // (A|B)^B == A & ~B
+        return BinaryOperator::CreateAnd(A, Builder->CreateNot(Op1, "tmp"));
+    } else if (match(Op0I, m_And(m_Value(A), m_Value(B))) && 
+               Op0I->hasOneUse()){
+      if (A == Op1)                                        // (A&B)^A -> (B&A)^A
+        std::swap(A, B);
+      if (B == Op1 &&                                      // (B&A)^A == ~B & A
+          !isa<ConstantInt>(Op1)) {  // Canonical form is (B&C)^C
+        return BinaryOperator::CreateAnd(Builder->CreateNot(A, "tmp"), Op1);
+      }
+    }
+  }
+  
+  // (X >> Z) ^ (Y >> Z)  -> (X^Y) >> Z  for all shifts.
+  if (Op0I && Op1I && Op0I->isShift() && 
+      Op0I->getOpcode() == Op1I->getOpcode() && 
+      Op0I->getOperand(1) == Op1I->getOperand(1) &&
+      (Op1I->hasOneUse() || Op1I->hasOneUse())) {
+    Value *NewOp =
+      Builder->CreateXor(Op0I->getOperand(0), Op1I->getOperand(0),
+                         Op0I->getName());
+    return BinaryOperator::Create(Op1I->getOpcode(), NewOp, 
+                                  Op1I->getOperand(1));
+  }
+    
+  if (Op0I && Op1I) {
+    Value *A, *B, *C, *D;
+    // (A & B)^(A | B) -> A ^ B
+    if (match(Op0I, m_And(m_Value(A), m_Value(B))) &&
+        match(Op1I, m_Or(m_Value(C), m_Value(D)))) {
+      if ((A == C && B == D) || (A == D && B == C)) 
+        return BinaryOperator::CreateXor(A, B);
+    }
+    // (A | B)^(A & B) -> A ^ B
+    if (match(Op0I, m_Or(m_Value(A), m_Value(B))) &&
+        match(Op1I, m_And(m_Value(C), m_Value(D)))) {
+      if ((A == C && B == D) || (A == D && B == C)) 
+        return BinaryOperator::CreateXor(A, B);
+    }
+  }
+
+  // (icmp1 A, B) ^ (icmp2 A, B) --> (icmp3 A, B)
+  if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
+    if (ICmpInst *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
+      if (PredicatesFoldable(LHS->getPredicate(), RHS->getPredicate())) {
+        if (LHS->getOperand(0) == RHS->getOperand(1) &&
+            LHS->getOperand(1) == RHS->getOperand(0))
+          LHS->swapOperands();
+        if (LHS->getOperand(0) == RHS->getOperand(0) &&
+            LHS->getOperand(1) == RHS->getOperand(1)) {
+          Value *Op0 = LHS->getOperand(0), *Op1 = LHS->getOperand(1);
+          unsigned Code = getICmpCode(LHS) ^ getICmpCode(RHS);
+          bool isSigned = LHS->isSigned() || RHS->isSigned();
+          return ReplaceInstUsesWith(I, 
+                               getICmpValue(isSigned, Code, Op0, Op1, Builder));
+        }
+      }
+
+  // fold (xor (cast A), (cast B)) -> (cast (xor A, B))
+  if (CastInst *Op0C = dyn_cast<CastInst>(Op0)) {
+    if (CastInst *Op1C = dyn_cast<CastInst>(Op1))
+      if (Op0C->getOpcode() == Op1C->getOpcode()) { // same cast kind?
+        const Type *SrcTy = Op0C->getOperand(0)->getType();
+        if (SrcTy == Op1C->getOperand(0)->getType() && SrcTy->isIntegerTy() &&
+            // Only do this if the casts both really cause code to be generated.
+            ShouldOptimizeCast(Op0C->getOpcode(), Op0C->getOperand(0), 
+                               I.getType()) &&
+            ShouldOptimizeCast(Op1C->getOpcode(), Op1C->getOperand(0), 
+                               I.getType())) {
+          Value *NewOp = Builder->CreateXor(Op0C->getOperand(0),
+                                            Op1C->getOperand(0), I.getName());
+          return CastInst::Create(Op0C->getOpcode(), NewOp, I.getType());
+        }
+      }
+  }
+
+  return Changed ? &I : 0;
+}
diff --git a/final/lib/Transforms/InstCombine/InstCombineCalls.cpp b/final/lib/Transforms/InstCombine/InstCombineCalls.cpp
new file mode 100644
index 00000000000..0e464507a7e
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -0,0 +1,1259 @@
+//===- InstCombineCalls.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitCall and visitInvoke functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+/// getPromotedType - Return the specified type promoted as it would be to pass
+/// though a va_arg area.
+static const Type *getPromotedType(const Type *Ty) {
+  if (const IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
+    if (ITy->getBitWidth() < 32)
+      return Type::getInt32Ty(Ty->getContext());
+  }
+  return Ty;
+}
+
+
+Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
+  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), TD);
+  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), TD);
+  unsigned MinAlign = std::min(DstAlign, SrcAlign);
+  unsigned CopyAlign = MI->getAlignment();
+
+  if (CopyAlign < MinAlign) {
+    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), 
+                                             MinAlign, false));
+    return MI;
+  }
+  
+  // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
+  // load/store.
+  ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
+  if (MemOpLength == 0) return 0;
+  
+  // Source and destination pointer types are always "i8*" for intrinsic.  See
+  // if the size is something we can handle with a single primitive load/store.
+  // A single load+store correctly handles overlapping memory in the memmove
+  // case.
+  unsigned Size = MemOpLength->getZExtValue();
+  if (Size == 0) return MI;  // Delete this mem transfer.
+  
+  if (Size > 8 || (Size&(Size-1)))
+    return 0;  // If not 1/2/4/8 bytes, exit.
+  
+  // Use an integer load+store unless we can find something better.
+  unsigned SrcAddrSp =
+    cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
+  unsigned DstAddrSp =
+    cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
+
+  const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
+  Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
+  Type *NewDstPtrTy = PointerType::get(IntType, DstAddrSp);
+  
+  // Memcpy forces the use of i8* for the source and destination.  That means
+  // that if you're using memcpy to move one double around, you'll get a cast
+  // from double* to i8*.  We'd much rather use a double load+store rather than
+  // an i64 load+store, here because this improves the odds that the source or
+  // dest address will be promotable.  See if we can find a better type than the
+  // integer datatype.
+  Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
+  if (StrippedDest != MI->getArgOperand(0)) {
+    const Type *SrcETy = cast<PointerType>(StrippedDest->getType())
+                                    ->getElementType();
+    if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
+      // The SrcETy might be something like {{{double}}} or [1 x double].  Rip
+      // down through these levels if so.
+      while (!SrcETy->isSingleValueType()) {
+        if (const StructType *STy = dyn_cast<StructType>(SrcETy)) {
+          if (STy->getNumElements() == 1)
+            SrcETy = STy->getElementType(0);
+          else
+            break;
+        } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcETy)) {
+          if (ATy->getNumElements() == 1)
+            SrcETy = ATy->getElementType();
+          else
+            break;
+        } else
+          break;
+      }
+      
+      if (SrcETy->isSingleValueType()) {
+        NewSrcPtrTy = PointerType::get(SrcETy, SrcAddrSp);
+        NewDstPtrTy = PointerType::get(SrcETy, DstAddrSp);
+      }
+    }
+  }
+  
+  
+  // If the memcpy/memmove provides better alignment info than we can
+  // infer, use it.
+  SrcAlign = std::max(SrcAlign, CopyAlign);
+  DstAlign = std::max(DstAlign, CopyAlign);
+  
+  Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
+  Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
+  Instruction *L = new LoadInst(Src, "tmp", MI->isVolatile(), SrcAlign);
+  InsertNewInstBefore(L, *MI);
+  InsertNewInstBefore(new StoreInst(L, Dest, MI->isVolatile(), DstAlign),
+                      *MI);
+
+  // Set the size of the copy to 0, it will be deleted on the next iteration.
+  MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
+  return MI;
+}
+
+Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
+  unsigned Alignment = getKnownAlignment(MI->getDest(), TD);
+  if (MI->getAlignment() < Alignment) {
+    MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
+                                             Alignment, false));
+    return MI;
+  }
+  
+  // Extract the length and alignment and fill if they are constant.
+  ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
+  ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
+  if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
+    return 0;
+  uint64_t Len = LenC->getZExtValue();
+  Alignment = MI->getAlignment();
+  
+  // If the length is zero, this is a no-op
+  if (Len == 0) return MI; // memset(d,c,0,a) -> noop
+  
+  // memset(s,c,n) -> store s, c (for n=1,2,4,8)
+  if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
+    const Type *ITy = IntegerType::get(MI->getContext(), Len*8);  // n=1 -> i8.
+    
+    Value *Dest = MI->getDest();
+    unsigned DstAddrSp = cast<PointerType>(Dest->getType())->getAddressSpace();
+    Type *NewDstPtrTy = PointerType::get(ITy, DstAddrSp);
+    Dest = Builder->CreateBitCast(Dest, NewDstPtrTy);
+
+    // Alignment 0 is identity for alignment 1 for memset, but not store.
+    if (Alignment == 0) Alignment = 1;
+    
+    // Extract the fill value and store.
+    uint64_t Fill = FillC->getZExtValue()*0x0101010101010101ULL;
+    InsertNewInstBefore(new StoreInst(ConstantInt::get(ITy, Fill),
+                                      Dest, false, Alignment), *MI);
+    
+    // Set the size of the copy to 0, it will be deleted on the next iteration.
+    MI->setLength(Constant::getNullValue(LenC->getType()));
+    return MI;
+  }
+
+  return 0;
+}
+
+/// visitCallInst - CallInst simplification.  This mostly only handles folding 
+/// of intrinsic instructions.  For normal calls, it allows visitCallSite to do
+/// the heavy lifting.
+///
+Instruction *InstCombiner::visitCallInst(CallInst &CI) {
+  if (isFreeCall(&CI))
+    return visitFree(CI);
+  if (isMalloc(&CI))
+    return visitMalloc(CI);
+
+  // If the caller function is nounwind, mark the call as nounwind, even if the
+  // callee isn't.
+  if (CI.getParent()->getParent()->doesNotThrow() &&
+      !CI.doesNotThrow()) {
+    CI.setDoesNotThrow();
+    return &CI;
+  }
+  
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
+  if (!II) return visitCallSite(&CI);
+
+  // Intrinsics cannot occur in an invoke, so handle them here instead of in
+  // visitCallSite.
+  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
+    bool Changed = false;
+
+    // memmove/cpy/set of zero bytes is a noop.
+    if (Constant *NumBytes = dyn_cast<Constant>(MI->getLength())) {
+      if (NumBytes->isNullValue())
+        return EraseInstFromFunction(CI);
+
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes))
+        if (CI->getZExtValue() == 1) {
+          // Replace the instruction with just byte operations.  We would
+          // transform other cases to loads/stores, but we don't know if
+          // alignment is sufficient.
+        }
+    }
+    
+    // No other transformations apply to volatile transfers.
+    if (MI->isVolatile())
+      return 0;
+
+    // If we have a memmove and the source operation is a constant global,
+    // then the source and dest pointers can't alias, so we can change this
+    // into a call to memcpy.
+    if (MemMoveInst *MMI = dyn_cast<MemMoveInst>(MI)) {
+      if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
+        if (GVSrc->isConstant()) {
+          Module *M = CI.getParent()->getParent()->getParent();
+          Intrinsic::ID MemCpyID = Intrinsic::memcpy;
+          const Type *Tys[3] = { CI.getArgOperand(0)->getType(),
+                                 CI.getArgOperand(1)->getType(),
+                                 CI.getArgOperand(2)->getType() };
+          CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys, 3));
+          Changed = true;
+        }
+    }
+
+    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
+      // memmove(x,x,size) -> noop.
+      if (MTI->getSource() == MTI->getDest())
+        return EraseInstFromFunction(CI);
+    }
+
+    // If we can determine a pointer alignment that is bigger than currently
+    // set, update the alignment.
+    if (isa<MemTransferInst>(MI)) {
+      if (Instruction *I = SimplifyMemTransfer(MI))
+        return I;
+    } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(MI)) {
+      if (Instruction *I = SimplifyMemSet(MSI))
+        return I;
+    }
+
+    if (Changed) return II;
+  }
+  
+  switch (II->getIntrinsicID()) {
+  default: break;
+  case Intrinsic::objectsize: {
+    // We need target data for just about everything so depend on it.
+    if (!TD) break;
+    
+    const Type *ReturnTy = CI.getType();
+    uint64_t DontKnow = II->getArgOperand(1) == Builder->getTrue() ? 0 : -1ULL;
+
+    // Get to the real allocated thing and offset as fast as possible.
+    Value *Op1 = II->getArgOperand(0)->stripPointerCasts();
+
+    uint64_t Offset = 0;
+    uint64_t Size = -1ULL;
+
+    // Try to look through constant GEPs.
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1)) {
+      if (!GEP->hasAllConstantIndices()) break;
+
+      // Get the current byte offset into the thing. Use the original
+      // operand in case we're looking through a bitcast.
+      SmallVector<Value*, 8> Ops(GEP->idx_begin(), GEP->idx_end());
+      Offset = TD->getIndexedOffset(GEP->getPointerOperandType(),
+                                    Ops.data(), Ops.size());
+
+      Op1 = GEP->getPointerOperand()->stripPointerCasts();
+
+      // Make sure we're not a constant offset from an external
+      // global.
+      if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1))
+        if (!GV->hasDefinitiveInitializer()) break;
+    }
+
+    // If we've stripped down to a single global variable that we
+    // can know the size of then just return that.
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) {
+      if (GV->hasDefinitiveInitializer()) {
+        Constant *C = GV->getInitializer();
+        Size = TD->getTypeAllocSize(C->getType());
+      } else {
+        // Can't determine size of the GV.
+        Constant *RetVal = ConstantInt::get(ReturnTy, DontKnow);
+        return ReplaceInstUsesWith(CI, RetVal);
+      }
+    } else if (AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) {
+      // Get alloca size.
+      if (AI->getAllocatedType()->isSized()) {
+        Size = TD->getTypeAllocSize(AI->getAllocatedType());
+        if (AI->isArrayAllocation()) {
+          const ConstantInt *C = dyn_cast<ConstantInt>(AI->getArraySize());
+          if (!C) break;
+          Size *= C->getZExtValue();
+        }
+      }
+    } else if (CallInst *MI = extractMallocCall(Op1)) {
+      // Get allocation size.
+      const Type* MallocType = getMallocAllocatedType(MI);
+      if (MallocType && MallocType->isSized())
+        if (Value *NElems = getMallocArraySize(MI, TD, true))
+          if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
+            Size = NElements->getZExtValue() * TD->getTypeAllocSize(MallocType);
+    }
+
+    // Do not return "I don't know" here. Later optimization passes could
+    // make it possible to evaluate objectsize to a constant.
+    if (Size == -1ULL)
+      break;
+
+    if (Size < Offset) {
+      // Out of bound reference? Negative index normalized to large
+      // index? Just return "I don't know".
+      return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, DontKnow));
+    }
+    return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, Size-Offset));
+  }
+  case Intrinsic::bswap:
+    // bswap(bswap(x)) -> x
+    if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0)))
+      if (Operand->getIntrinsicID() == Intrinsic::bswap)
+        return ReplaceInstUsesWith(CI, Operand->getArgOperand(0));
+      
+    // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
+    if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) {
+      if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
+        if (Operand->getIntrinsicID() == Intrinsic::bswap) {
+          unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
+                       TI->getType()->getPrimitiveSizeInBits();
+          Value *CV = ConstantInt::get(Operand->getType(), C);
+          Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV);
+          return new TruncInst(V, TI->getType());
+        }
+    }
+      
+    break;
+  case Intrinsic::powi:
+    if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+      // powi(x, 0) -> 1.0
+      if (Power->isZero())
+        return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
+      // powi(x, 1) -> x
+      if (Power->isOne())
+        return ReplaceInstUsesWith(CI, II->getArgOperand(0));
+      // powi(x, -1) -> 1/x
+      if (Power->isAllOnesValue())
+        return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
+                                          II->getArgOperand(0));
+    }
+    break;
+  case Intrinsic::cttz: {
+    // If all bits below the first known one are known zero,
+    // this value is constant.
+    const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
+    uint32_t BitWidth = IT->getBitWidth();
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
+                      KnownZero, KnownOne);
+    unsigned TrailingZeros = KnownOne.countTrailingZeros();
+    APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
+    if ((Mask & KnownZero) == Mask)
+      return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
+                                 APInt(BitWidth, TrailingZeros)));
+    
+    }
+    break;
+  case Intrinsic::ctlz: {
+    // If all bits above the first known one are known zero,
+    // this value is constant.
+    const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
+    uint32_t BitWidth = IT->getBitWidth();
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
+                      KnownZero, KnownOne);
+    unsigned LeadingZeros = KnownOne.countLeadingZeros();
+    APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
+    if ((Mask & KnownZero) == Mask)
+      return ReplaceInstUsesWith(CI, ConstantInt::get(IT,
+                                 APInt(BitWidth, LeadingZeros)));
+    
+    }
+    break;
+  case Intrinsic::uadd_with_overflow: {
+    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+    const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
+    uint32_t BitWidth = IT->getBitWidth();
+    APInt Mask = APInt::getSignBit(BitWidth);
+    APInt LHSKnownZero(BitWidth, 0);
+    APInt LHSKnownOne(BitWidth, 0);
+    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+    bool LHSKnownNegative = LHSKnownOne[BitWidth - 1];
+    bool LHSKnownPositive = LHSKnownZero[BitWidth - 1];
+
+    if (LHSKnownNegative || LHSKnownPositive) {
+      APInt RHSKnownZero(BitWidth, 0);
+      APInt RHSKnownOne(BitWidth, 0);
+      ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+      bool RHSKnownNegative = RHSKnownOne[BitWidth - 1];
+      bool RHSKnownPositive = RHSKnownZero[BitWidth - 1];
+      if (LHSKnownNegative && RHSKnownNegative) {
+        // The sign bit is set in both cases: this MUST overflow.
+        // Create a simple add instruction, and insert it into the struct.
+        Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI);
+        Worklist.Add(Add);
+        Constant *V[] = {
+          UndefValue::get(LHS->getType()),ConstantInt::getTrue(II->getContext())
+        };
+        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        return InsertValueInst::Create(Struct, Add, 0);
+      }
+      
+      if (LHSKnownPositive && RHSKnownPositive) {
+        // The sign bit is clear in both cases: this CANNOT overflow.
+        // Create a simple add instruction, and insert it into the struct.
+        Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI);
+        Worklist.Add(Add);
+        Constant *V[] = {
+          UndefValue::get(LHS->getType()),
+          ConstantInt::getFalse(II->getContext())
+        };
+        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        return InsertValueInst::Create(Struct, Add, 0);
+      }
+    }
+  }
+  // FALL THROUGH uadd into sadd
+  case Intrinsic::sadd_with_overflow:
+    // Canonicalize constants into the RHS.
+    if (isa<Constant>(II->getArgOperand(0)) &&
+        !isa<Constant>(II->getArgOperand(1))) {
+      Value *LHS = II->getArgOperand(0);
+      II->setArgOperand(0, II->getArgOperand(1));
+      II->setArgOperand(1, LHS);
+      return II;
+    }
+
+    // X + undef -> undef
+    if (isa<UndefValue>(II->getArgOperand(1)))
+      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+      
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+      // X + 0 -> {X, false}
+      if (RHS->isZero()) {
+        Constant *V[] = {
+          UndefValue::get(II->getArgOperand(0)->getType()),
+          ConstantInt::getFalse(II->getContext())
+        };
+        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
+      }
+    }
+    break;
+  case Intrinsic::usub_with_overflow:
+  case Intrinsic::ssub_with_overflow:
+    // undef - X -> undef
+    // X - undef -> undef
+    if (isa<UndefValue>(II->getArgOperand(0)) ||
+        isa<UndefValue>(II->getArgOperand(1)))
+      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+      
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+      // X - 0 -> {X, false}
+      if (RHS->isZero()) {
+        Constant *V[] = {
+          UndefValue::get(II->getArgOperand(0)->getType()),
+          ConstantInt::getFalse(II->getContext())
+        };
+        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
+      }
+    }
+    break;
+  case Intrinsic::umul_with_overflow:
+  case Intrinsic::smul_with_overflow:
+    // Canonicalize constants into the RHS.
+    if (isa<Constant>(II->getArgOperand(0)) &&
+        !isa<Constant>(II->getArgOperand(1))) {
+      Value *LHS = II->getArgOperand(0);
+      II->setArgOperand(0, II->getArgOperand(1));
+      II->setArgOperand(1, LHS);
+      return II;
+    }
+
+    // X * undef -> undef
+    if (isa<UndefValue>(II->getArgOperand(1)))
+      return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
+      
+    if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
+      // X*0 -> {0, false}
+      if (RHSI->isZero())
+        return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
+      
+      // X * 1 -> {X, false}
+      if (RHSI->equalsInt(1)) {
+        Constant *V[] = {
+          UndefValue::get(II->getArgOperand(0)->getType()),
+          ConstantInt::getFalse(II->getContext())
+        };
+        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
+      }
+    }
+    break;
+  case Intrinsic::ppc_altivec_lvx:
+  case Intrinsic::ppc_altivec_lvxl:
+  case Intrinsic::x86_sse_loadu_ps:
+  case Intrinsic::x86_sse2_loadu_pd:
+  case Intrinsic::x86_sse2_loadu_dq:
+    // Turn PPC lvx     -> load if the pointer is known aligned.
+    // Turn X86 loadups -> load if the pointer is known aligned.
+    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
+      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
+                                         PointerType::getUnqual(II->getType()));
+      return new LoadInst(Ptr);
+    }
+    break;
+  case Intrinsic::ppc_altivec_stvx:
+  case Intrinsic::ppc_altivec_stvxl:
+    // Turn stvx -> store if the pointer is known aligned.
+    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, TD) >= 16) {
+      const Type *OpPtrTy = 
+        PointerType::getUnqual(II->getArgOperand(0)->getType());
+      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+      return new StoreInst(II->getArgOperand(0), Ptr);
+    }
+    break;
+  case Intrinsic::x86_sse_storeu_ps:
+  case Intrinsic::x86_sse2_storeu_pd:
+  case Intrinsic::x86_sse2_storeu_dq:
+    // Turn X86 storeu -> store if the pointer is known aligned.
+    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) {
+      const Type *OpPtrTy = 
+        PointerType::getUnqual(II->getArgOperand(1)->getType());
+      Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
+      return new StoreInst(II->getArgOperand(1), Ptr);
+    }
+    break;
+
+  case Intrinsic::x86_sse_cvtss2si:
+  case Intrinsic::x86_sse_cvtss2si64:
+  case Intrinsic::x86_sse_cvttss2si:
+  case Intrinsic::x86_sse_cvttss2si64:
+  case Intrinsic::x86_sse2_cvtsd2si:
+  case Intrinsic::x86_sse2_cvtsd2si64:
+  case Intrinsic::x86_sse2_cvttsd2si:
+  case Intrinsic::x86_sse2_cvttsd2si64: {
+    // These intrinsics only demand the 0th element of their input vectors. If
+    // we can simplify the input based on that, do so now.
+    unsigned VWidth =
+      cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
+    APInt DemandedElts(VWidth, 1);
+    APInt UndefElts(VWidth, 0);
+    if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0),
+                                              DemandedElts, UndefElts)) {
+      II->setArgOperand(0, V);
+      return II;
+    }
+    break;
+  }
+
+  case Intrinsic::ppc_altivec_vperm:
+    // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
+    if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) {
+      assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
+      
+      // Check that all of the elements are integer constants or undefs.
+      bool AllEltsOk = true;
+      for (unsigned i = 0; i != 16; ++i) {
+        if (!isa<ConstantInt>(Mask->getOperand(i)) && 
+            !isa<UndefValue>(Mask->getOperand(i))) {
+          AllEltsOk = false;
+          break;
+        }
+      }
+      
+      if (AllEltsOk) {
+        // Cast the input vectors to byte vectors.
+        Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0),
+                                            Mask->getType());
+        Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1),
+                                            Mask->getType());
+        Value *Result = UndefValue::get(Op0->getType());
+        
+        // Only extract each element once.
+        Value *ExtractedElts[32];
+        memset(ExtractedElts, 0, sizeof(ExtractedElts));
+        
+        for (unsigned i = 0; i != 16; ++i) {
+          if (isa<UndefValue>(Mask->getOperand(i)))
+            continue;
+          unsigned Idx=cast<ConstantInt>(Mask->getOperand(i))->getZExtValue();
+          Idx &= 31;  // Match the hardware behavior.
+          
+          if (ExtractedElts[Idx] == 0) {
+            ExtractedElts[Idx] = 
+              Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, 
+                  ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                   Idx&15, false), "tmp");
+          }
+        
+          // Insert this value into the result vector.
+          Result = Builder->CreateInsertElement(Result, ExtractedElts[Idx],
+                         ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                          i, false), "tmp");
+        }
+        return CastInst::Create(Instruction::BitCast, Result, CI.getType());
+      }
+    }
+    break;
+
+  case Intrinsic::arm_neon_vld1:
+  case Intrinsic::arm_neon_vld2:
+  case Intrinsic::arm_neon_vld3:
+  case Intrinsic::arm_neon_vld4:
+  case Intrinsic::arm_neon_vld2lane:
+  case Intrinsic::arm_neon_vld3lane:
+  case Intrinsic::arm_neon_vld4lane:
+  case Intrinsic::arm_neon_vst1:
+  case Intrinsic::arm_neon_vst2:
+  case Intrinsic::arm_neon_vst3:
+  case Intrinsic::arm_neon_vst4:
+  case Intrinsic::arm_neon_vst2lane:
+  case Intrinsic::arm_neon_vst3lane:
+  case Intrinsic::arm_neon_vst4lane: {
+    unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), TD);
+    unsigned AlignArg = II->getNumArgOperands() - 1;
+    ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
+    if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
+      II->setArgOperand(AlignArg,
+                        ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                         MemAlign, false));
+      return II;
+    }
+    break;
+  }
+
+  case Intrinsic::stackrestore: {
+    // If the save is right next to the restore, remove the restore.  This can
+    // happen when variable allocas are DCE'd.
+    if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
+      if (SS->getIntrinsicID() == Intrinsic::stacksave) {
+        BasicBlock::iterator BI = SS;
+        if (&*++BI == II)
+          return EraseInstFromFunction(CI);
+      }
+    }
+    
+    // Scan down this block to see if there is another stack restore in the
+    // same block without an intervening call/alloca.
+    BasicBlock::iterator BI = II;
+    TerminatorInst *TI = II->getParent()->getTerminator();
+    bool CannotRemove = false;
+    for (++BI; &*BI != TI; ++BI) {
+      if (isa<AllocaInst>(BI) || isMalloc(BI)) {
+        CannotRemove = true;
+        break;
+      }
+      if (CallInst *BCI = dyn_cast<CallInst>(BI)) {
+        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(BCI)) {
+          // If there is a stackrestore below this one, remove this one.
+          if (II->getIntrinsicID() == Intrinsic::stackrestore)
+            return EraseInstFromFunction(CI);
+          // Otherwise, ignore the intrinsic.
+        } else {
+          // If we found a non-intrinsic call, we can't remove the stack
+          // restore.
+          CannotRemove = true;
+          break;
+        }
+      }
+    }
+    
+    // If the stack restore is in a return/unwind block and if there are no
+    // allocas or calls between the restore and the return, nuke the restore.
+    if (!CannotRemove && (isa<ReturnInst>(TI) || isa<UnwindInst>(TI)))
+      return EraseInstFromFunction(CI);
+    break;
+  }
+  }
+
+  return visitCallSite(II);
+}
+
+// InvokeInst simplification
+//
+Instruction *InstCombiner::visitInvokeInst(InvokeInst &II) {
+  return visitCallSite(&II);
+}
+
+/// isSafeToEliminateVarargsCast - If this cast does not affect the value 
+/// passed through the varargs area, we can eliminate the use of the cast.
+static bool isSafeToEliminateVarargsCast(const CallSite CS,
+                                         const CastInst * const CI,
+                                         const TargetData * const TD,
+                                         const int ix) {
+  if (!CI->isLosslessCast())
+    return false;
+
+  // The size of ByVal arguments is derived from the type, so we
+  // can't change to a type with a different size.  If the size were
+  // passed explicitly we could avoid this check.
+  if (!CS.paramHasAttr(ix, Attribute::ByVal))
+    return true;
+
+  const Type* SrcTy = 
+            cast<PointerType>(CI->getOperand(0)->getType())->getElementType();
+  const Type* DstTy = cast<PointerType>(CI->getType())->getElementType();
+  if (!SrcTy->isSized() || !DstTy->isSized())
+    return false;
+  if (!TD || TD->getTypeAllocSize(SrcTy) != TD->getTypeAllocSize(DstTy))
+    return false;
+  return true;
+}
+
+namespace {
+class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls {
+  InstCombiner *IC;
+protected:
+  void replaceCall(Value *With) {
+    NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
+  }
+  bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
+    if (CI->getArgOperand(SizeCIOp) == CI->getArgOperand(SizeArgOp))
+      return true;
+    if (ConstantInt *SizeCI =
+                           dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) {
+      if (SizeCI->isAllOnesValue())
+        return true;
+      if (isString)
+        return SizeCI->getZExtValue() >=
+               GetStringLength(CI->getArgOperand(SizeArgOp));
+      if (ConstantInt *Arg = dyn_cast<ConstantInt>(
+                                                  CI->getArgOperand(SizeArgOp)))
+        return SizeCI->getZExtValue() >= Arg->getZExtValue();
+    }
+    return false;
+  }
+public:
+  InstCombineFortifiedLibCalls(InstCombiner *IC) : IC(IC), NewInstruction(0) { }
+  Instruction *NewInstruction;
+};
+} // end anonymous namespace
+
+// Try to fold some different type of calls here.
+// Currently we're only working with the checking functions, memcpy_chk, 
+// mempcpy_chk, memmove_chk, memset_chk, strcpy_chk, stpcpy_chk, strncpy_chk,
+// strcat_chk and strncat_chk.
+Instruction *InstCombiner::tryOptimizeCall(CallInst *CI, const TargetData *TD) {
+  if (CI->getCalledFunction() == 0) return 0;
+
+  InstCombineFortifiedLibCalls Simplifier(this);
+  Simplifier.fold(CI, TD);
+  return Simplifier.NewInstruction;
+}
+
+// visitCallSite - Improvements for call and invoke instructions.
+//
+Instruction *InstCombiner::visitCallSite(CallSite CS) {
+  bool Changed = false;
+
+  // If the callee is a pointer to a function, attempt to move any casts to the
+  // arguments of the call/invoke.
+  Value *Callee = CS.getCalledValue();
+  if (!isa<Function>(Callee) && transformConstExprCastCall(CS))
+    return 0;
+
+  if (Function *CalleeF = dyn_cast<Function>(Callee))
+    // If the call and callee calling conventions don't match, this call must
+    // be unreachable, as the call is undefined.
+    if (CalleeF->getCallingConv() != CS.getCallingConv() &&
+        // Only do this for calls to a function with a body.  A prototype may
+        // not actually end up matching the implementation's calling conv for a
+        // variety of reasons (e.g. it may be written in assembly).
+        !CalleeF->isDeclaration()) {
+      Instruction *OldCall = CS.getInstruction();
+      new StoreInst(ConstantInt::getTrue(Callee->getContext()),
+                UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), 
+                                  OldCall);
+      // If OldCall dues not return void then replaceAllUsesWith undef.
+      // This allows ValueHandlers and custom metadata to adjust itself.
+      if (!OldCall->getType()->isVoidTy())
+        OldCall->replaceAllUsesWith(UndefValue::get(OldCall->getType()));
+      if (isa<CallInst>(OldCall))
+        return EraseInstFromFunction(*OldCall);
+      
+      // We cannot remove an invoke, because it would change the CFG, just
+      // change the callee to a null pointer.
+      cast<InvokeInst>(OldCall)->setCalledFunction(
+                                    Constant::getNullValue(CalleeF->getType()));
+      return 0;
+    }
+
+  if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) {
+    // This instruction is not reachable, just remove it.  We insert a store to
+    // undef so that we know that this code is not reachable, despite the fact
+    // that we can't modify the CFG here.
+    new StoreInst(ConstantInt::getTrue(Callee->getContext()),
+               UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
+                  CS.getInstruction());
+
+    // If CS does not return void then replaceAllUsesWith undef.
+    // This allows ValueHandlers and custom metadata to adjust itself.
+    if (!CS.getInstruction()->getType()->isVoidTy())
+      CS.getInstruction()->
+        replaceAllUsesWith(UndefValue::get(CS.getInstruction()->getType()));
+
+    if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
+      // Don't break the CFG, insert a dummy cond branch.
+      BranchInst::Create(II->getNormalDest(), II->getUnwindDest(),
+                         ConstantInt::getTrue(Callee->getContext()), II);
+    }
+    return EraseInstFromFunction(*CS.getInstruction());
+  }
+
+  if (BitCastInst *BC = dyn_cast<BitCastInst>(Callee))
+    if (IntrinsicInst *In = dyn_cast<IntrinsicInst>(BC->getOperand(0)))
+      if (In->getIntrinsicID() == Intrinsic::init_trampoline)
+        return transformCallThroughTrampoline(CS);
+
+  const PointerType *PTy = cast<PointerType>(Callee->getType());
+  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+  if (FTy->isVarArg()) {
+    int ix = FTy->getNumParams() + (isa<InvokeInst>(Callee) ? 3 : 1);
+    // See if we can optimize any arguments passed through the varargs area of
+    // the call.
+    for (CallSite::arg_iterator I = CS.arg_begin()+FTy->getNumParams(),
+           E = CS.arg_end(); I != E; ++I, ++ix) {
+      CastInst *CI = dyn_cast<CastInst>(*I);
+      if (CI && isSafeToEliminateVarargsCast(CS, CI, TD, ix)) {
+        *I = CI->getOperand(0);
+        Changed = true;
+      }
+    }
+  }
+
+  if (isa<InlineAsm>(Callee) && !CS.doesNotThrow()) {
+    // Inline asm calls cannot throw - mark them 'nounwind'.
+    CS.setDoesNotThrow();
+    Changed = true;
+  }
+
+  // Try to optimize the call if possible, we require TargetData for most of
+  // this.  None of these calls are seen as possibly dead so go ahead and
+  // delete the instruction now.
+  if (CallInst *CI = dyn_cast<CallInst>(CS.getInstruction())) {
+    Instruction *I = tryOptimizeCall(CI, TD);
+    // If we changed something return the result, etc. Otherwise let
+    // the fallthrough check.
+    if (I) return EraseInstFromFunction(*I);
+  }
+
+  return Changed ? CS.getInstruction() : 0;
+}
+
+// transformConstExprCastCall - If the callee is a constexpr cast of a function,
+// attempt to move the cast to the arguments of the call/invoke.
+//
+bool InstCombiner::transformConstExprCastCall(CallSite CS) {
+  Function *Callee =
+    dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+  if (Callee == 0)
+    return false;
+  Instruction *Caller = CS.getInstruction();
+  const AttrListPtr &CallerPAL = CS.getAttributes();
+
+  // Okay, this is a cast from a function to a different type.  Unless doing so
+  // would cause a type conversion of one of our arguments, change this call to
+  // be a direct call with arguments casted to the appropriate types.
+  //
+  const FunctionType *FT = Callee->getFunctionType();
+  const Type *OldRetTy = Caller->getType();
+  const Type *NewRetTy = FT->getReturnType();
+
+  if (NewRetTy->isStructTy())
+    return false; // TODO: Handle multiple return values.
+
+  // Check to see if we are changing the return type...
+  if (OldRetTy != NewRetTy) {
+    if (Callee->isDeclaration() &&
+        // Conversion is ok if changing from one pointer type to another or from
+        // a pointer to an integer of the same size.
+        !((OldRetTy->isPointerTy() || !TD ||
+           OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
+          (NewRetTy->isPointerTy() || !TD ||
+           NewRetTy == TD->getIntPtrType(Caller->getContext()))))
+      return false;   // Cannot transform this return value.
+
+    if (!Caller->use_empty() &&
+        // void -> non-void is handled specially
+        !NewRetTy->isVoidTy() && !CastInst::isCastable(NewRetTy, OldRetTy))
+      return false;   // Cannot transform this return value.
+
+    if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
+      Attributes RAttrs = CallerPAL.getRetAttributes();
+      if (RAttrs & Attribute::typeIncompatible(NewRetTy))
+        return false;   // Attribute not compatible with transformed value.
+    }
+
+    // If the callsite is an invoke instruction, and the return value is used by
+    // a PHI node in a successor, we cannot change the return type of the call
+    // because there is no place to put the cast instruction (without breaking
+    // the critical edge).  Bail out in this case.
+    if (!Caller->use_empty())
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller))
+        for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
+             UI != E; ++UI)
+          if (PHINode *PN = dyn_cast<PHINode>(*UI))
+            if (PN->getParent() == II->getNormalDest() ||
+                PN->getParent() == II->getUnwindDest())
+              return false;
+  }
+
+  unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin());
+  unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
+
+  CallSite::arg_iterator AI = CS.arg_begin();
+  for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
+    const Type *ParamTy = FT->getParamType(i);
+    const Type *ActTy = (*AI)->getType();
+
+    if (!CastInst::isCastable(ActTy, ParamTy))
+      return false;   // Cannot transform this parameter value.
+
+    unsigned Attrs = CallerPAL.getParamAttributes(i + 1);
+    if (Attrs & Attribute::typeIncompatible(ParamTy))
+      return false;   // Attribute not compatible with transformed value.
+    
+    // If the parameter is passed as a byval argument, then we have to have a
+    // sized type and the sized type has to have the same size as the old type.
+    if (ParamTy != ActTy && (Attrs & Attribute::ByVal)) {
+      const PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
+      if (ParamPTy == 0 || !ParamPTy->getElementType()->isSized() || TD == 0)
+        return false;
+      
+      const Type *CurElTy = cast<PointerType>(ActTy)->getElementType();
+      if (TD->getTypeAllocSize(CurElTy) !=
+          TD->getTypeAllocSize(ParamPTy->getElementType()))
+        return false;
+    }
+
+    // Converting from one pointer type to another or between a pointer and an
+    // integer of the same size is safe even if we do not have a body.
+    bool isConvertible = ActTy == ParamTy ||
+      (TD && ((ParamTy->isPointerTy() ||
+      ParamTy == TD->getIntPtrType(Caller->getContext())) &&
+              (ActTy->isPointerTy() ||
+              ActTy == TD->getIntPtrType(Caller->getContext()))));
+    if (Callee->isDeclaration() && !isConvertible) return false;
+  }
+
+  if (Callee->isDeclaration()) {
+    // Do not delete arguments unless we have a function body.
+    if (FT->getNumParams() < NumActualArgs && !FT->isVarArg())
+      return false;
+
+    // If the callee is just a declaration, don't change the varargsness of the
+    // call.  We don't want to introduce a varargs call where one doesn't
+    // already exist.
+    const PointerType *APTy = cast<PointerType>(CS.getCalledValue()->getType());
+    if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
+      return false;
+  }
+      
+  if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
+      !CallerPAL.isEmpty())
+    // In this case we have more arguments than the new function type, but we
+    // won't be dropping them.  Check that these extra arguments have attributes
+    // that are compatible with being a vararg call argument.
+    for (unsigned i = CallerPAL.getNumSlots(); i; --i) {
+      if (CallerPAL.getSlot(i - 1).Index <= FT->getNumParams())
+        break;
+      Attributes PAttrs = CallerPAL.getSlot(i - 1).Attrs;
+      if (PAttrs & Attribute::VarArgsIncompatible)
+        return false;
+    }
+
+  
+  // Okay, we decided that this is a safe thing to do: go ahead and start
+  // inserting cast instructions as necessary.
+  std::vector<Value*> Args;
+  Args.reserve(NumActualArgs);
+  SmallVector<AttributeWithIndex, 8> attrVec;
+  attrVec.reserve(NumCommonArgs);
+
+  // Get any return attributes.
+  Attributes RAttrs = CallerPAL.getRetAttributes();
+
+  // If the return value is not being used, the type may not be compatible
+  // with the existing attributes.  Wipe out any problematic attributes.
+  RAttrs &= ~Attribute::typeIncompatible(NewRetTy);
+
+  // Add the new return attributes.
+  if (RAttrs)
+    attrVec.push_back(AttributeWithIndex::get(0, RAttrs));
+
+  AI = CS.arg_begin();
+  for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
+    const Type *ParamTy = FT->getParamType(i);
+    if ((*AI)->getType() == ParamTy) {
+      Args.push_back(*AI);
+    } else {
+      Instruction::CastOps opcode = CastInst::getCastOpcode(*AI,
+          false, ParamTy, false);
+      Args.push_back(Builder->CreateCast(opcode, *AI, ParamTy, "tmp"));
+    }
+
+    // Add any parameter attributes.
+    if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+      attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+  }
+
+  // If the function takes more arguments than the call was taking, add them
+  // now.
+  for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i)
+    Args.push_back(Constant::getNullValue(FT->getParamType(i)));
+
+  // If we are removing arguments to the function, emit an obnoxious warning.
+  if (FT->getNumParams() < NumActualArgs) {
+    if (!FT->isVarArg()) {
+      errs() << "WARNING: While resolving call to function '"
+             << Callee->getName() << "' arguments were dropped!\n";
+    } else {
+      // Add all of the arguments in their promoted form to the arg list.
+      for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
+        const Type *PTy = getPromotedType((*AI)->getType());
+        if (PTy != (*AI)->getType()) {
+          // Must promote to pass through va_arg area!
+          Instruction::CastOps opcode =
+            CastInst::getCastOpcode(*AI, false, PTy, false);
+          Args.push_back(Builder->CreateCast(opcode, *AI, PTy, "tmp"));
+        } else {
+          Args.push_back(*AI);
+        }
+
+        // Add any parameter attributes.
+        if (Attributes PAttrs = CallerPAL.getParamAttributes(i + 1))
+          attrVec.push_back(AttributeWithIndex::get(i + 1, PAttrs));
+      }
+    }
+  }
+
+  if (Attributes FnAttrs =  CallerPAL.getFnAttributes())
+    attrVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
+
+  if (NewRetTy->isVoidTy())
+    Caller->setName("");   // Void type should not have a name.
+
+  const AttrListPtr &NewCallerPAL = AttrListPtr::get(attrVec.begin(),
+                                                     attrVec.end());
+
+  Instruction *NC;
+  if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+    NC = InvokeInst::Create(Callee, II->getNormalDest(), II->getUnwindDest(),
+                            Args.begin(), Args.end(),
+                            Caller->getName(), Caller);
+    cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
+    cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
+  } else {
+    NC = CallInst::Create(Callee, Args.begin(), Args.end(),
+                          Caller->getName(), Caller);
+    CallInst *CI = cast<CallInst>(Caller);
+    if (CI->isTailCall())
+      cast<CallInst>(NC)->setTailCall();
+    cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
+    cast<CallInst>(NC)->setAttributes(NewCallerPAL);
+  }
+
+  // Insert a cast of the return type as necessary.
+  Value *NV = NC;
+  if (OldRetTy != NV->getType() && !Caller->use_empty()) {
+    if (!NV->getType()->isVoidTy()) {
+      Instruction::CastOps opcode =
+        CastInst::getCastOpcode(NC, false, OldRetTy, false);
+      NV = NC = CastInst::Create(opcode, NC, OldRetTy, "tmp");
+
+      // If this is an invoke instruction, we should insert it after the first
+      // non-phi, instruction in the normal successor block.
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+        BasicBlock::iterator I = II->getNormalDest()->getFirstNonPHI();
+        InsertNewInstBefore(NC, *I);
+      } else {
+        // Otherwise, it's a call, just insert cast right after the call.
+        InsertNewInstBefore(NC, *Caller);
+      }
+      Worklist.AddUsersToWorkList(*Caller);
+    } else {
+      NV = UndefValue::get(Caller->getType());
+    }
+  }
+
+  if (!Caller->use_empty())
+    Caller->replaceAllUsesWith(NV);
+  
+  EraseInstFromFunction(*Caller);
+  return true;
+}
+
+// transformCallThroughTrampoline - Turn a call to a function created by the
+// init_trampoline intrinsic into a direct call to the underlying function.
+//
+Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
+  Value *Callee = CS.getCalledValue();
+  const PointerType *PTy = cast<PointerType>(Callee->getType());
+  const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+  const AttrListPtr &Attrs = CS.getAttributes();
+
+  // If the call already has the 'nest' attribute somewhere then give up -
+  // otherwise 'nest' would occur twice after splicing in the chain.
+  if (Attrs.hasAttrSomewhere(Attribute::Nest))
+    return 0;
+
+  IntrinsicInst *Tramp =
+    cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
+
+  Function *NestF =cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
+  const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
+  const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
+
+  const AttrListPtr &NestAttrs = NestF->getAttributes();
+  if (!NestAttrs.isEmpty()) {
+    unsigned NestIdx = 1;
+    const Type *NestTy = 0;
+    Attributes NestAttr = Attribute::None;
+
+    // Look for a parameter marked with the 'nest' attribute.
+    for (FunctionType::param_iterator I = NestFTy->param_begin(),
+         E = NestFTy->param_end(); I != E; ++NestIdx, ++I)
+      if (NestAttrs.paramHasAttr(NestIdx, Attribute::Nest)) {
+        // Record the parameter type and any other attributes.
+        NestTy = *I;
+        NestAttr = NestAttrs.getParamAttributes(NestIdx);
+        break;
+      }
+
+    if (NestTy) {
+      Instruction *Caller = CS.getInstruction();
+      std::vector<Value*> NewArgs;
+      NewArgs.reserve(unsigned(CS.arg_end()-CS.arg_begin())+1);
+
+      SmallVector<AttributeWithIndex, 8> NewAttrs;
+      NewAttrs.reserve(Attrs.getNumSlots() + 1);
+
+      // Insert the nest argument into the call argument list, which may
+      // mean appending it.  Likewise for attributes.
+
+      // Add any result attributes.
+      if (Attributes Attr = Attrs.getRetAttributes())
+        NewAttrs.push_back(AttributeWithIndex::get(0, Attr));
+
+      {
+        unsigned Idx = 1;
+        CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+        do {
+          if (Idx == NestIdx) {
+            // Add the chain argument and attributes.
+            Value *NestVal = Tramp->getArgOperand(2);
+            if (NestVal->getType() != NestTy)
+              NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
+            NewArgs.push_back(NestVal);
+            NewAttrs.push_back(AttributeWithIndex::get(NestIdx, NestAttr));
+          }
+
+          if (I == E)
+            break;
+
+          // Add the original argument and attributes.
+          NewArgs.push_back(*I);
+          if (Attributes Attr = Attrs.getParamAttributes(Idx))
+            NewAttrs.push_back
+              (AttributeWithIndex::get(Idx + (Idx >= NestIdx), Attr));
+
+          ++Idx, ++I;
+        } while (1);
+      }
+
+      // Add any function attributes.
+      if (Attributes Attr = Attrs.getFnAttributes())
+        NewAttrs.push_back(AttributeWithIndex::get(~0, Attr));
+
+      // The trampoline may have been bitcast to a bogus type (FTy).
+      // Handle this by synthesizing a new function type, equal to FTy
+      // with the chain parameter inserted.
+
+      std::vector<const Type*> NewTypes;
+      NewTypes.reserve(FTy->getNumParams()+1);
+
+      // Insert the chain's type into the list of parameter types, which may
+      // mean appending it.
+      {
+        unsigned Idx = 1;
+        FunctionType::param_iterator I = FTy->param_begin(),
+          E = FTy->param_end();
+
+        do {
+          if (Idx == NestIdx)
+            // Add the chain's type.
+            NewTypes.push_back(NestTy);
+
+          if (I == E)
+            break;
+
+          // Add the original type.
+          NewTypes.push_back(*I);
+
+          ++Idx, ++I;
+        } while (1);
+      }
+
+      // Replace the trampoline call with a direct call.  Let the generic
+      // code sort out any function type mismatches.
+      FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(), NewTypes, 
+                                                FTy->isVarArg());
+      Constant *NewCallee =
+        NestF->getType() == PointerType::getUnqual(NewFTy) ?
+        NestF : ConstantExpr::getBitCast(NestF, 
+                                         PointerType::getUnqual(NewFTy));
+      const AttrListPtr &NewPAL = AttrListPtr::get(NewAttrs.begin(),
+                                                   NewAttrs.end());
+
+      Instruction *NewCaller;
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
+        NewCaller = InvokeInst::Create(NewCallee,
+                                       II->getNormalDest(), II->getUnwindDest(),
+                                       NewArgs.begin(), NewArgs.end(),
+                                       Caller->getName(), Caller);
+        cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
+        cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
+      } else {
+        NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end(),
+                                     Caller->getName(), Caller);
+        if (cast<CallInst>(Caller)->isTailCall())
+          cast<CallInst>(NewCaller)->setTailCall();
+        cast<CallInst>(NewCaller)->
+          setCallingConv(cast<CallInst>(Caller)->getCallingConv());
+        cast<CallInst>(NewCaller)->setAttributes(NewPAL);
+      }
+      if (!Caller->getType()->isVoidTy())
+        Caller->replaceAllUsesWith(NewCaller);
+      Caller->eraseFromParent();
+      Worklist.Remove(Caller);
+      return 0;
+    }
+  }
+
+  // Replace the trampoline call with a direct call.  Since there is no 'nest'
+  // parameter, there is no need to adjust the argument list.  Let the generic
+  // code sort out any function type mismatches.
+  Constant *NewCallee =
+    NestF->getType() == PTy ? NestF : 
+                              ConstantExpr::getBitCast(NestF, PTy);
+  CS.setCalledFunction(NewCallee);
+  return CS.getInstruction();
+}
+
diff --git a/final/lib/Transforms/InstCombine/InstCombineCasts.cpp b/final/lib/Transforms/InstCombine/InstCombineCasts.cpp
new file mode 100644
index 00000000000..b432641a140
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -0,0 +1,1709 @@
+//===- InstCombineCasts.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for cast operations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// DecomposeSimpleLinearExpr - Analyze 'Val', seeing if it is a simple linear
+/// expression.  If so, decompose it, returning some value X, such that Val is
+/// X*Scale+Offset.
+///
+static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
+                                        uint64_t &Offset) {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+    Offset = CI->getZExtValue();
+    Scale  = 0;
+    return ConstantInt::get(Val->getType(), 0);
+  }
+  
+  if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      if (I->getOpcode() == Instruction::Shl) {
+        // This is a value scaled by '1 << the shift amt'.
+        Scale = UINT64_C(1) << RHS->getZExtValue();
+        Offset = 0;
+        return I->getOperand(0);
+      }
+      
+      if (I->getOpcode() == Instruction::Mul) {
+        // This value is scaled by 'RHS'.
+        Scale = RHS->getZExtValue();
+        Offset = 0;
+        return I->getOperand(0);
+      }
+      
+      if (I->getOpcode() == Instruction::Add) {
+        // We have X+C.  Check to see if we really have (X*C2)+C1, 
+        // where C1 is divisible by C2.
+        unsigned SubScale;
+        Value *SubVal = 
+          DecomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
+        Offset += RHS->getZExtValue();
+        Scale = SubScale;
+        return SubVal;
+      }
+    }
+  }
+
+  // Otherwise, we can't look past this.
+  Scale = 1;
+  Offset = 0;
+  return Val;
+}
+
+/// PromoteCastOfAllocation - If we find a cast of an allocation instruction,
+/// try to eliminate the cast by moving the type information into the alloc.
+Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
+                                                   AllocaInst &AI) {
+  // This requires TargetData to get the alloca alignment and size information.
+  if (!TD) return 0;
+
+  const PointerType *PTy = cast<PointerType>(CI.getType());
+  
+  BuilderTy AllocaBuilder(*Builder);
+  AllocaBuilder.SetInsertPoint(AI.getParent(), &AI);
+
+  // Get the type really allocated and the type casted to.
+  const Type *AllocElTy = AI.getAllocatedType();
+  const Type *CastElTy = PTy->getElementType();
+  if (!AllocElTy->isSized() || !CastElTy->isSized()) return 0;
+
+  unsigned AllocElTyAlign = TD->getABITypeAlignment(AllocElTy);
+  unsigned CastElTyAlign = TD->getABITypeAlignment(CastElTy);
+  if (CastElTyAlign < AllocElTyAlign) return 0;
+
+  // If the allocation has multiple uses, only promote it if we are strictly
+  // increasing the alignment of the resultant allocation.  If we keep it the
+  // same, we open the door to infinite loops of various kinds.  (A reference
+  // from a dbg.declare doesn't count as a use for this purpose.)
+  if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) &&
+      CastElTyAlign == AllocElTyAlign) return 0;
+
+  uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy);
+  uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy);
+  if (CastElTySize == 0 || AllocElTySize == 0) return 0;
+
+  // See if we can satisfy the modulus by pulling a scale out of the array
+  // size argument.
+  unsigned ArraySizeScale;
+  uint64_t ArrayOffset;
+  Value *NumElements = // See if the array size is a decomposable linear expr.
+    DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
+ 
+  // If we can now satisfy the modulus, by using a non-1 scale, we really can
+  // do the xform.
+  if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
+      (AllocElTySize*ArrayOffset   ) % CastElTySize != 0) return 0;
+
+  unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
+  Value *Amt = 0;
+  if (Scale == 1) {
+    Amt = NumElements;
+  } else {
+    Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
+    // Insert before the alloca, not before the cast.
+    Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp");
+  }
+  
+  if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
+    Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
+                                  Offset, true);
+    Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
+  }
+  
+  AllocaInst *New = AllocaBuilder.CreateAlloca(CastElTy, Amt);
+  New->setAlignment(AI.getAlignment());
+  New->takeName(&AI);
+  
+  // If the allocation has one real use plus a dbg.declare, just remove the
+  // declare.
+  if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) {
+    EraseInstFromFunction(*(Instruction*)DI);
+  }
+  // If the allocation has multiple real uses, insert a cast and change all
+  // things that used it to use the new cast.  This will also hack on CI, but it
+  // will die soon.
+  else if (!AI.hasOneUse()) {
+    // New is the allocation instruction, pointer typed. AI is the original
+    // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
+    Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast");
+    AI.replaceAllUsesWith(NewCast);
+  }
+  return ReplaceInstUsesWith(CI, New);
+}
+
+
+
+/// EvaluateInDifferentType - Given an expression that 
+/// CanEvaluateTruncated or CanEvaluateSExtd returns true for, actually
+/// insert the code to evaluate the expression.
+Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, 
+                                             bool isSigned) {
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
+    // If we got a constantexpr back, try to simplify it with TD info.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+      C = ConstantFoldConstantExpression(CE, TD);
+    return C;
+  }
+
+  // Otherwise, it must be an instruction.
+  Instruction *I = cast<Instruction>(V);
+  Instruction *Res = 0;
+  unsigned Opc = I->getOpcode();
+  switch (Opc) {
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::AShr:
+  case Instruction::LShr:
+  case Instruction::Shl:
+  case Instruction::UDiv:
+  case Instruction::URem: {
+    Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
+    Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+    Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+    break;
+  }    
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+    // If the source type of the cast is the type we're trying for then we can
+    // just return the source.  There's no need to insert it because it is not
+    // new.
+    if (I->getOperand(0)->getType() == Ty)
+      return I->getOperand(0);
+    
+    // Otherwise, must be the same type of cast, so just reinsert a new one.
+    // This also handles the case of zext(trunc(x)) -> zext(x).
+    Res = CastInst::CreateIntegerCast(I->getOperand(0), Ty,
+                                      Opc == Instruction::SExt);
+    break;
+  case Instruction::Select: {
+    Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
+    Value *False = EvaluateInDifferentType(I->getOperand(2), Ty, isSigned);
+    Res = SelectInst::Create(I->getOperand(0), True, False);
+    break;
+  }
+  case Instruction::PHI: {
+    PHINode *OPN = cast<PHINode>(I);
+    PHINode *NPN = PHINode::Create(Ty);
+    for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) {
+      Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned);
+      NPN->addIncoming(V, OPN->getIncomingBlock(i));
+    }
+    Res = NPN;
+    break;
+  }
+  default: 
+    // TODO: Can handle more cases here.
+    llvm_unreachable("Unreachable!");
+    break;
+  }
+  
+  Res->takeName(I);
+  return InsertNewInstBefore(Res, *I);
+}
+
+
+/// This function is a wrapper around CastInst::isEliminableCastPair. It
+/// simply extracts arguments and returns what that function returns.
+static Instruction::CastOps 
+isEliminableCastPair(
+  const CastInst *CI, ///< The first cast instruction
+  unsigned opcode,       ///< The opcode of the second cast instruction
+  const Type *DstTy,     ///< The target type for the second cast instruction
+  TargetData *TD         ///< The target data for pointer size
+) {
+
+  const Type *SrcTy = CI->getOperand(0)->getType();   // A from above
+  const Type *MidTy = CI->getType();                  // B from above
+
+  // Get the opcodes of the two Cast instructions
+  Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
+  Instruction::CastOps secondOp = Instruction::CastOps(opcode);
+
+  unsigned Res = CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy,
+                                                DstTy,
+                                  TD ? TD->getIntPtrType(CI->getContext()) : 0);
+  
+  // We don't want to form an inttoptr or ptrtoint that converts to an integer
+  // type that differs from the pointer size.
+  if ((Res == Instruction::IntToPtr &&
+          (!TD || SrcTy != TD->getIntPtrType(CI->getContext()))) ||
+      (Res == Instruction::PtrToInt &&
+          (!TD || DstTy != TD->getIntPtrType(CI->getContext()))))
+    Res = 0;
+  
+  return Instruction::CastOps(Res);
+}
+
+/// ShouldOptimizeCast - Return true if the cast from "V to Ty" actually
+/// results in any code being generated and is interesting to optimize out. If
+/// the cast can be eliminated by some other simple transformation, we prefer
+/// to do the simplification first.
+bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
+                                      const Type *Ty) {
+  // Noop casts and casts of constants should be eliminated trivially.
+  if (V->getType() == Ty || isa<Constant>(V)) return false;
+  
+  // If this is another cast that can be eliminated, we prefer to have it
+  // eliminated.
+  if (const CastInst *CI = dyn_cast<CastInst>(V))
+    if (isEliminableCastPair(CI, opc, Ty, TD))
+      return false;
+  
+  // If this is a vector sext from a compare, then we don't want to break the
+  // idiom where each element of the extended vector is either zero or all ones.
+  if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
+    return false;
+  
+  return true;
+}
+
+
+/// @brief Implement the transforms common to all CastInst visitors.
+Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
+  Value *Src = CI.getOperand(0);
+
+  // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
+  // eliminate it now.
+  if (CastInst *CSrc = dyn_cast<CastInst>(Src)) {   // A->B->C cast
+    if (Instruction::CastOps opc = 
+        isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), TD)) {
+      // The first cast (CSrc) is eliminable so we need to fix up or replace
+      // the second cast (CI). CSrc will then have a good chance of being dead.
+      return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
+    }
+  }
+
+  // If we are casting a select then fold the cast into the select
+  if (SelectInst *SI = dyn_cast<SelectInst>(Src))
+    if (Instruction *NV = FoldOpIntoSelect(CI, SI))
+      return NV;
+
+  // If we are casting a PHI then fold the cast into the PHI
+  if (isa<PHINode>(Src)) {
+    // We don't do this if this would create a PHI node with an illegal type if
+    // it is currently legal.
+    if (!Src->getType()->isIntegerTy() ||
+        !CI.getType()->isIntegerTy() ||
+        ShouldChangeType(CI.getType(), Src->getType()))
+      if (Instruction *NV = FoldOpIntoPhi(CI))
+        return NV;
+  }
+  
+  return 0;
+}
+
+/// CanEvaluateTruncated - Return true if we can evaluate the specified
+/// expression tree as type Ty instead of its larger type, and arrive with the
+/// same value.  This is used by code that tries to eliminate truncates.
+///
+/// Ty will always be a type smaller than V.  We should return true if trunc(V)
+/// can be computed by computing V in the smaller type.  If V is an instruction,
+/// then trunc(inst(x,y)) can be computed as inst(trunc(x),trunc(y)), which only
+/// makes sense if x and y can be efficiently truncated.
+///
+/// This function works on both vectors and scalars.
+///
+static bool CanEvaluateTruncated(Value *V, const Type *Ty) {
+  // We can always evaluate constants in another type.
+  if (isa<Constant>(V))
+    return true;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+  
+  const Type *OrigTy = V->getType();
+  
+  // If this is an extension from the dest type, we can eliminate it, even if it
+  // has multiple uses.
+  if ((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 
+      I->getOperand(0)->getType() == Ty)
+    return true;
+
+  // We can't extend or shrink something that has multiple uses: doing so would
+  // require duplicating the instruction in general, which isn't profitable.
+  if (!I->hasOneUse()) return false;
+
+  unsigned Opc = I->getOpcode();
+  switch (Opc) {
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    // These operators can all arbitrarily be extended or truncated.
+    return CanEvaluateTruncated(I->getOperand(0), Ty) &&
+           CanEvaluateTruncated(I->getOperand(1), Ty);
+
+  case Instruction::UDiv:
+  case Instruction::URem: {
+    // UDiv and URem can be truncated if all the truncated bits are zero.
+    uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+    uint32_t BitWidth = Ty->getScalarSizeInBits();
+    if (BitWidth < OrigBitWidth) {
+      APInt Mask = APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth);
+      if (MaskedValueIsZero(I->getOperand(0), Mask) &&
+          MaskedValueIsZero(I->getOperand(1), Mask)) {
+        return CanEvaluateTruncated(I->getOperand(0), Ty) &&
+               CanEvaluateTruncated(I->getOperand(1), Ty);
+      }
+    }
+    break;
+  }
+  case Instruction::Shl:
+    // If we are truncating the result of this SHL, and if it's a shift of a
+    // constant amount, we can always perform a SHL in a smaller type.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint32_t BitWidth = Ty->getScalarSizeInBits();
+      if (CI->getLimitedValue(BitWidth) < BitWidth)
+        return CanEvaluateTruncated(I->getOperand(0), Ty);
+    }
+    break;
+  case Instruction::LShr:
+    // If this is a truncate of a logical shr, we can truncate it to a smaller
+    // lshr iff we know that the bits we would otherwise be shifting in are
+    // already zeros.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+      uint32_t BitWidth = Ty->getScalarSizeInBits();
+      if (MaskedValueIsZero(I->getOperand(0),
+            APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+          CI->getLimitedValue(BitWidth) < BitWidth) {
+        return CanEvaluateTruncated(I->getOperand(0), Ty);
+      }
+    }
+    break;
+  case Instruction::Trunc:
+    // trunc(trunc(x)) -> trunc(x)
+    return true;
+  case Instruction::ZExt:
+  case Instruction::SExt:
+    // trunc(ext(x)) -> ext(x) if the source type is smaller than the new dest
+    // trunc(ext(x)) -> trunc(x) if the source type is larger than the new dest
+    return true;
+  case Instruction::Select: {
+    SelectInst *SI = cast<SelectInst>(I);
+    return CanEvaluateTruncated(SI->getTrueValue(), Ty) &&
+           CanEvaluateTruncated(SI->getFalseValue(), Ty);
+  }
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.  Note that we never
+    // get into trouble with cyclic PHIs here because we only consider
+    // instructions with a single use.
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!CanEvaluateTruncated(PN->getIncomingValue(i), Ty))
+        return false;
+    return true;
+  }
+  default:
+    // TODO: Can handle more cases here.
+    break;
+  }
+  
+  return false;
+}
+
+Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
+  if (Instruction *Result = commonCastTransforms(CI))
+    return Result;
+  
+  // See if we can simplify any instructions used by the input whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(CI))
+    return &CI;
+  
+  Value *Src = CI.getOperand(0);
+  const Type *DestTy = CI.getType(), *SrcTy = Src->getType();
+  
+  // Attempt to truncate the entire input expression tree to the destination
+  // type.   Only do this if the dest type is a simple type, don't convert the
+  // expression tree to something weird like i93 unless the source is also
+  // strange.
+  if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+      CanEvaluateTruncated(Src, DestTy)) {
+      
+    // If this cast is a truncate, evaluting in a different type always
+    // eliminates the cast, so it is always a win.
+    DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+          " to avoid cast: " << CI << '\n');
+    Value *Res = EvaluateInDifferentType(Src, DestTy, false);
+    assert(Res->getType() == DestTy);
+    return ReplaceInstUsesWith(CI, Res);
+  }
+
+  // Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0), likewise for vector.
+  if (DestTy->getScalarSizeInBits() == 1) {
+    Constant *One = ConstantInt::get(Src->getType(), 1);
+    Src = Builder->CreateAnd(Src, One, "tmp");
+    Value *Zero = Constant::getNullValue(Src->getType());
+    return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
+  }
+  
+  // Transform trunc(lshr (zext A), Cst) to eliminate one type conversion.
+  Value *A = 0; ConstantInt *Cst = 0;
+  if (Src->hasOneUse() &&
+      match(Src, m_LShr(m_ZExt(m_Value(A)), m_ConstantInt(Cst)))) {
+    // We have three types to worry about here, the type of A, the source of
+    // the truncate (MidSize), and the destination of the truncate. We know that
+    // ASize < MidSize   and MidSize > ResultSize, but don't know the relation
+    // between ASize and ResultSize.
+    unsigned ASize = A->getType()->getPrimitiveSizeInBits();
+    
+    // If the shift amount is larger than the size of A, then the result is
+    // known to be zero because all the input bits got shifted out.
+    if (Cst->getZExtValue() >= ASize)
+      return ReplaceInstUsesWith(CI, Constant::getNullValue(CI.getType()));
+
+    // Since we're doing an lshr and a zero extend, and know that the shift
+    // amount is smaller than ASize, it is always safe to do the shift in A's
+    // type, then zero extend or truncate to the result.
+    Value *Shift = Builder->CreateLShr(A, Cst->getZExtValue());
+    Shift->takeName(Src);
+    return CastInst::CreateIntegerCast(Shift, CI.getType(), false);
+  }
+  
+  // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
+  // type isn't non-native.
+  if (Src->hasOneUse() && isa<IntegerType>(Src->getType()) &&
+      ShouldChangeType(Src->getType(), CI.getType()) &&
+      match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) {
+    Value *NewTrunc = Builder->CreateTrunc(A, CI.getType(), A->getName()+".tr");
+    return BinaryOperator::CreateAnd(NewTrunc,
+                                     ConstantExpr::getTrunc(Cst, CI.getType()));
+  }
+
+  return 0;
+}
+
+/// transformZExtICmp - Transform (zext icmp) to bitwise / integer operations
+/// in order to eliminate the icmp.
+Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
+                                             bool DoXform) {
+  // If we are just checking for a icmp eq of a single bit and zext'ing it
+  // to an integer, then shift the bit to the appropriate place and then
+  // cast to integer to avoid the comparison.
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
+    const APInt &Op1CV = Op1C->getValue();
+      
+    // zext (x <s  0) to i32 --> x>>u31      true if signbit set.
+    // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
+    if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
+        (ICI->getPredicate() == ICmpInst::ICMP_SGT &&Op1CV.isAllOnesValue())) {
+      if (!DoXform) return ICI;
+
+      Value *In = ICI->getOperand(0);
+      Value *Sh = ConstantInt::get(In->getType(),
+                                   In->getType()->getScalarSizeInBits()-1);
+      In = Builder->CreateLShr(In, Sh, In->getName()+".lobit");
+      if (In->getType() != CI.getType())
+        In = Builder->CreateIntCast(In, CI.getType(), false/*ZExt*/, "tmp");
+
+      if (ICI->getPredicate() == ICmpInst::ICMP_SGT) {
+        Constant *One = ConstantInt::get(In->getType(), 1);
+        In = Builder->CreateXor(In, One, In->getName()+".not");
+      }
+
+      return ReplaceInstUsesWith(CI, In);
+    }
+      
+      
+      
+    // zext (X == 0) to i32 --> X^1      iff X has only the low bit set.
+    // zext (X == 0) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+    // zext (X == 1) to i32 --> X        iff X has only the low bit set.
+    // zext (X == 2) to i32 --> X>>1     iff X has only the 2nd bit set.
+    // zext (X != 0) to i32 --> X        iff X has only the low bit set.
+    // zext (X != 0) to i32 --> X>>1     iff X has only the 2nd bit set.
+    // zext (X != 1) to i32 --> X^1      iff X has only the low bit set.
+    // zext (X != 2) to i32 --> (X>>1)^1 iff X has only the 2nd bit set.
+    if ((Op1CV == 0 || Op1CV.isPowerOf2()) && 
+        // This only works for EQ and NE
+        ICI->isEquality()) {
+      // If Op1C some other power of two, convert:
+      uint32_t BitWidth = Op1C->getType()->getBitWidth();
+      APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+      ComputeMaskedBits(ICI->getOperand(0), TypeMask, KnownZero, KnownOne);
+        
+      APInt KnownZeroMask(~KnownZero);
+      if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
+        if (!DoXform) return ICI;
+
+        bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
+        if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
+          // (X&4) == 2 --> false
+          // (X&4) != 2 --> true
+          Constant *Res = ConstantInt::get(Type::getInt1Ty(CI.getContext()),
+                                           isNE);
+          Res = ConstantExpr::getZExt(Res, CI.getType());
+          return ReplaceInstUsesWith(CI, Res);
+        }
+          
+        uint32_t ShiftAmt = KnownZeroMask.logBase2();
+        Value *In = ICI->getOperand(0);
+        if (ShiftAmt) {
+          // Perform a logical shr by shiftamt.
+          // Insert the shift to put the result in the low bit.
+          In = Builder->CreateLShr(In, ConstantInt::get(In->getType(),ShiftAmt),
+                                   In->getName()+".lobit");
+        }
+          
+        if ((Op1CV != 0) == isNE) { // Toggle the low bit.
+          Constant *One = ConstantInt::get(In->getType(), 1);
+          In = Builder->CreateXor(In, One, "tmp");
+        }
+          
+        if (CI.getType() == In->getType())
+          return ReplaceInstUsesWith(CI, In);
+        return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
+      }
+    }
+  }
+
+  // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
+  // It is also profitable to transform icmp eq into not(xor(A, B)) because that
+  // may lead to additional simplifications.
+  if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
+    if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
+      uint32_t BitWidth = ITy->getBitWidth();
+      Value *LHS = ICI->getOperand(0);
+      Value *RHS = ICI->getOperand(1);
+
+      APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
+      APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
+      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+      ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS);
+      ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS);
+
+      if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
+        APInt KnownBits = KnownZeroLHS | KnownOneLHS;
+        APInt UnknownBit = ~KnownBits;
+        if (UnknownBit.countPopulation() == 1) {
+          if (!DoXform) return ICI;
+
+          Value *Result = Builder->CreateXor(LHS, RHS);
+
+          // Mask off any bits that are set and won't be shifted away.
+          if (KnownOneLHS.uge(UnknownBit))
+            Result = Builder->CreateAnd(Result,
+                                        ConstantInt::get(ITy, UnknownBit));
+
+          // Shift the bit we're testing down to the lsb.
+          Result = Builder->CreateLShr(
+               Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
+
+          if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+            Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1));
+          Result->takeName(ICI);
+          return ReplaceInstUsesWith(CI, Result);
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+
+/// CanEvaluateZExtd - Determine if the specified value can be computed in the
+/// specified wider type and produce the same low bits.  If not, return false.
+///
+/// If this function returns true, it can also return a non-zero number of bits
+/// (in BitsToClear) which indicates that the value it computes is correct for
+/// the zero extend, but that the additional BitsToClear bits need to be zero'd
+/// out.  For example, to promote something like:
+///
+///   %B = trunc i64 %A to i32
+///   %C = lshr i32 %B, 8
+///   %E = zext i32 %C to i64
+///
+/// CanEvaluateZExtd for the 'lshr' will return true, and BitsToClear will be
+/// set to 8 to indicate that the promoted value needs to have bits 24-31
+/// cleared in addition to bits 32-63.  Since an 'and' will be generated to
+/// clear the top bits anyway, doing this has no extra cost.
+///
+/// This function works on both vectors and scalars.
+static bool CanEvaluateZExtd(Value *V, const Type *Ty, unsigned &BitsToClear) {
+  BitsToClear = 0;
+  if (isa<Constant>(V))
+    return true;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+  
+  // If the input is a truncate from the destination type, we can trivially
+  // eliminate it, even if it has multiple uses.
+  // FIXME: This is currently disabled until codegen can handle this without
+  // pessimizing code, PR5997.
+  if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+    return true;
+  
+  // We can't extend or shrink something that has multiple uses: doing so would
+  // require duplicating the instruction in general, which isn't profitable.
+  if (!I->hasOneUse()) return false;
+  
+  unsigned Opc = I->getOpcode(), Tmp;
+  switch (Opc) {
+  case Instruction::ZExt:  // zext(zext(x)) -> zext(x).
+  case Instruction::SExt:  // zext(sext(x)) -> sext(x).
+  case Instruction::Trunc: // zext(trunc(x)) -> trunc(x) or zext(x)
+    return true;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::Shl:
+    if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear) ||
+        !CanEvaluateZExtd(I->getOperand(1), Ty, Tmp))
+      return false;
+    // These can all be promoted if neither operand has 'bits to clear'.
+    if (BitsToClear == 0 && Tmp == 0)
+      return true;
+      
+    // If the operation is an AND/OR/XOR and the bits to clear are zero in the
+    // other side, BitsToClear is ok.
+    if (Tmp == 0 &&
+        (Opc == Instruction::And || Opc == Instruction::Or ||
+         Opc == Instruction::Xor)) {
+      // We use MaskedValueIsZero here for generality, but the case we care
+      // about the most is constant RHS.
+      unsigned VSize = V->getType()->getScalarSizeInBits();
+      if (MaskedValueIsZero(I->getOperand(1),
+                            APInt::getHighBitsSet(VSize, BitsToClear)))
+        return true;
+    }
+      
+    // Otherwise, we don't know how to analyze this BitsToClear case yet.
+    return false;
+      
+  case Instruction::LShr:
+    // We can promote lshr(x, cst) if we can promote x.  This requires the
+    // ultimate 'and' to clear out the high zero bits we're clearing out though.
+    if (ConstantInt *Amt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      if (!CanEvaluateZExtd(I->getOperand(0), Ty, BitsToClear))
+        return false;
+      BitsToClear += Amt->getZExtValue();
+      if (BitsToClear > V->getType()->getScalarSizeInBits())
+        BitsToClear = V->getType()->getScalarSizeInBits();
+      return true;
+    }
+    // Cannot promote variable LSHR.
+    return false;
+  case Instruction::Select:
+    if (!CanEvaluateZExtd(I->getOperand(1), Ty, Tmp) ||
+        !CanEvaluateZExtd(I->getOperand(2), Ty, BitsToClear) ||
+        // TODO: If important, we could handle the case when the BitsToClear are
+        // known zero in the disagreeing side.
+        Tmp != BitsToClear)
+      return false;
+    return true;
+      
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.  Note that we never
+    // get into trouble with cyclic PHIs here because we only consider
+    // instructions with a single use.
+    PHINode *PN = cast<PHINode>(I);
+    if (!CanEvaluateZExtd(PN->getIncomingValue(0), Ty, BitsToClear))
+      return false;
+    for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!CanEvaluateZExtd(PN->getIncomingValue(i), Ty, Tmp) ||
+          // TODO: If important, we could handle the case when the BitsToClear
+          // are known zero in the disagreeing input.
+          Tmp != BitsToClear)
+        return false;
+    return true;
+  }
+  default:
+    // TODO: Can handle more cases here.
+    return false;
+  }
+}
+
+Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
+  // If this zero extend is only used by a truncate, let the truncate by
+  // eliminated before we try to optimize this zext.
+  if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
+    return 0;
+  
+  // If one of the common conversion will work, do it.
+  if (Instruction *Result = commonCastTransforms(CI))
+    return Result;
+
+  // See if we can simplify any instructions used by the input whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(CI))
+    return &CI;
+  
+  Value *Src = CI.getOperand(0);
+  const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+  
+  // Attempt to extend the entire input expression tree to the destination
+  // type.   Only do this if the dest type is a simple type, don't convert the
+  // expression tree to something weird like i93 unless the source is also
+  // strange.
+  unsigned BitsToClear;
+  if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+      CanEvaluateZExtd(Src, DestTy, BitsToClear)) { 
+    assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
+           "Unreasonable BitsToClear");
+    
+    // Okay, we can transform this!  Insert the new expression now.
+    DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+          " to avoid zero extend: " << CI);
+    Value *Res = EvaluateInDifferentType(Src, DestTy, false);
+    assert(Res->getType() == DestTy);
+    
+    uint32_t SrcBitsKept = SrcTy->getScalarSizeInBits()-BitsToClear;
+    uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+    
+    // If the high bits are already filled with zeros, just replace this
+    // cast with the result.
+    if (MaskedValueIsZero(Res, APInt::getHighBitsSet(DestBitSize,
+                                                     DestBitSize-SrcBitsKept)))
+      return ReplaceInstUsesWith(CI, Res);
+    
+    // We need to emit an AND to clear the high bits.
+    Constant *C = ConstantInt::get(Res->getType(),
+                               APInt::getLowBitsSet(DestBitSize, SrcBitsKept));
+    return BinaryOperator::CreateAnd(Res, C);
+  }
+
+  // If this is a TRUNC followed by a ZEXT then we are dealing with integral
+  // types and if the sizes are just right we can convert this into a logical
+  // 'and' which will be much cheaper than the pair of casts.
+  if (TruncInst *CSrc = dyn_cast<TruncInst>(Src)) {   // A->B->C cast
+    // TODO: Subsume this into EvaluateInDifferentType.
+    
+    // Get the sizes of the types involved.  We know that the intermediate type
+    // will be smaller than A or C, but don't know the relation between A and C.
+    Value *A = CSrc->getOperand(0);
+    unsigned SrcSize = A->getType()->getScalarSizeInBits();
+    unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
+    unsigned DstSize = CI.getType()->getScalarSizeInBits();
+    // If we're actually extending zero bits, then if
+    // SrcSize <  DstSize: zext(a & mask)
+    // SrcSize == DstSize: a & mask
+    // SrcSize  > DstSize: trunc(a) & mask
+    if (SrcSize < DstSize) {
+      APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+      Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
+      Value *And = Builder->CreateAnd(A, AndConst, CSrc->getName()+".mask");
+      return new ZExtInst(And, CI.getType());
+    }
+    
+    if (SrcSize == DstSize) {
+      APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
+      return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
+                                                           AndValue));
+    }
+    if (SrcSize > DstSize) {
+      Value *Trunc = Builder->CreateTrunc(A, CI.getType(), "tmp");
+      APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
+      return BinaryOperator::CreateAnd(Trunc, 
+                                       ConstantInt::get(Trunc->getType(),
+                                                        AndValue));
+    }
+  }
+
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Src))
+    return transformZExtICmp(ICI, CI);
+
+  BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
+  if (SrcI && SrcI->getOpcode() == Instruction::Or) {
+    // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one
+    // of the (zext icmp) will be transformed.
+    ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
+    ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
+    if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
+        (transformZExtICmp(LHS, CI, false) ||
+         transformZExtICmp(RHS, CI, false))) {
+      Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName());
+      Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName());
+      return BinaryOperator::Create(Instruction::Or, LCast, RCast);
+    }
+  }
+
+  // zext(trunc(t) & C) -> (t & zext(C)).
+  if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse())
+    if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+      if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) {
+        Value *TI0 = TI->getOperand(0);
+        if (TI0->getType() == CI.getType())
+          return
+            BinaryOperator::CreateAnd(TI0,
+                                ConstantExpr::getZExt(C, CI.getType()));
+      }
+
+  // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
+  if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse())
+    if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+      if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0)))
+        if (And->getOpcode() == Instruction::And && And->hasOneUse() &&
+            And->getOperand(1) == C)
+          if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {
+            Value *TI0 = TI->getOperand(0);
+            if (TI0->getType() == CI.getType()) {
+              Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
+              Value *NewAnd = Builder->CreateAnd(TI0, ZC, "tmp");
+              return BinaryOperator::CreateXor(NewAnd, ZC);
+            }
+          }
+
+  // zext (xor i1 X, true) to i32  --> xor (zext i1 X to i32), 1
+  Value *X;
+  if (SrcI && SrcI->hasOneUse() && SrcI->getType()->isIntegerTy(1) &&
+      match(SrcI, m_Not(m_Value(X))) &&
+      (!X->hasOneUse() || !isa<CmpInst>(X))) {
+    Value *New = Builder->CreateZExt(X, CI.getType());
+    return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
+  }
+  
+  return 0;
+}
+
+/// CanEvaluateSExtd - Return true if we can take the specified value
+/// and return it as type Ty without inserting any new casts and without
+/// changing the value of the common low bits.  This is used by code that tries
+/// to promote integer operations to a wider types will allow us to eliminate
+/// the extension.
+///
+/// This function works on both vectors and scalars.
+///
+static bool CanEvaluateSExtd(Value *V, const Type *Ty) {
+  assert(V->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits() &&
+         "Can't sign extend type to a smaller type");
+  // If this is a constant, it can be trivially promoted.
+  if (isa<Constant>(V))
+    return true;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+  
+  // If this is a truncate from the dest type, we can trivially eliminate it,
+  // even if it has multiple uses.
+  // FIXME: This is currently disabled until codegen can handle this without
+  // pessimizing code, PR5997.
+  if (0 && isa<TruncInst>(I) && I->getOperand(0)->getType() == Ty)
+    return true;
+  
+  // We can't extend or shrink something that has multiple uses: doing so would
+  // require duplicating the instruction in general, which isn't profitable.
+  if (!I->hasOneUse()) return false;
+
+  switch (I->getOpcode()) {
+  case Instruction::SExt:  // sext(sext(x)) -> sext(x)
+  case Instruction::ZExt:  // sext(zext(x)) -> zext(x)
+  case Instruction::Trunc: // sext(trunc(x)) -> trunc(x) or sext(x)
+    return true;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+    // These operators can all arbitrarily be extended if their inputs can.
+    return CanEvaluateSExtd(I->getOperand(0), Ty) &&
+           CanEvaluateSExtd(I->getOperand(1), Ty);
+      
+  //case Instruction::Shl:   TODO
+  //case Instruction::LShr:  TODO
+      
+  case Instruction::Select:
+    return CanEvaluateSExtd(I->getOperand(1), Ty) &&
+           CanEvaluateSExtd(I->getOperand(2), Ty);
+      
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.  Note that we never
+    // get into trouble with cyclic PHIs here because we only consider
+    // instructions with a single use.
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!CanEvaluateSExtd(PN->getIncomingValue(i), Ty)) return false;
+    return true;
+  }
+  default:
+    // TODO: Can handle more cases here.
+    break;
+  }
+  
+  return false;
+}
+
+Instruction *InstCombiner::visitSExt(SExtInst &CI) {
+  // If this sign extend is only used by a truncate, let the truncate by
+  // eliminated before we try to optimize this zext.
+  if (CI.hasOneUse() && isa<TruncInst>(CI.use_back()))
+    return 0;
+  
+  if (Instruction *I = commonCastTransforms(CI))
+    return I;
+  
+  // See if we can simplify any instructions used by the input whose sole 
+  // purpose is to compute bits we don't care about.
+  if (SimplifyDemandedInstructionBits(CI))
+    return &CI;
+  
+  Value *Src = CI.getOperand(0);
+  const Type *SrcTy = Src->getType(), *DestTy = CI.getType();
+
+  // Attempt to extend the entire input expression tree to the destination
+  // type.   Only do this if the dest type is a simple type, don't convert the
+  // expression tree to something weird like i93 unless the source is also
+  // strange.
+  if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
+      CanEvaluateSExtd(Src, DestTy)) {
+    // Okay, we can transform this!  Insert the new expression now.
+    DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
+          " to avoid sign extend: " << CI);
+    Value *Res = EvaluateInDifferentType(Src, DestTy, true);
+    assert(Res->getType() == DestTy);
+
+    uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+    uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+
+    // If the high bits are already filled with sign bit, just replace this
+    // cast with the result.
+    if (ComputeNumSignBits(Res) > DestBitSize - SrcBitSize)
+      return ReplaceInstUsesWith(CI, Res);
+    
+    // We need to emit a shl + ashr to do the sign extend.
+    Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
+    return BinaryOperator::CreateAShr(Builder->CreateShl(Res, ShAmt, "sext"),
+                                      ShAmt);
+  }
+
+  // If this input is a trunc from our destination, then turn sext(trunc(x))
+  // into shifts.
+  if (TruncInst *TI = dyn_cast<TruncInst>(Src))
+    if (TI->hasOneUse() && TI->getOperand(0)->getType() == DestTy) {
+      uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+      uint32_t DestBitSize = DestTy->getScalarSizeInBits();
+      
+      // We need to emit a shl + ashr to do the sign extend.
+      Value *ShAmt = ConstantInt::get(DestTy, DestBitSize-SrcBitSize);
+      Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext");
+      return BinaryOperator::CreateAShr(Res, ShAmt);
+    }
+  
+  
+  // (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed
+  // (x >s -1) ? -1 : 0 -> ashr x, 31  -> all ones if not signed
+  {
+  ICmpInst::Predicate Pred; Value *CmpLHS; ConstantInt *CmpRHS;
+  if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_ConstantInt(CmpRHS)))) {
+    // sext (x <s  0) to i32 --> x>>s31       true if signbit set.
+    // sext (x >s -1) to i32 --> (x>>s31)^-1  true if signbit clear.
+    if ((Pred == ICmpInst::ICMP_SLT && CmpRHS->isZero()) ||
+        (Pred == ICmpInst::ICMP_SGT && CmpRHS->isAllOnesValue())) {
+      Value *Sh = ConstantInt::get(CmpLHS->getType(),
+                                   CmpLHS->getType()->getScalarSizeInBits()-1);
+      Value *In = Builder->CreateAShr(CmpLHS, Sh, CmpLHS->getName()+".lobit");
+      if (In->getType() != CI.getType())
+        In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp");
+      
+      if (Pred == ICmpInst::ICMP_SGT)
+        In = Builder->CreateNot(In, In->getName()+".not");
+      return ReplaceInstUsesWith(CI, In);
+    }
+  }
+  }
+
+  // vector (x <s 0) ? -1 : 0 -> ashr x, 31   -> all ones if signed.
+  if (const VectorType *VTy = dyn_cast<VectorType>(DestTy)) {
+    ICmpInst::Predicate Pred; Value *CmpLHS;
+    if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_Zero()))) {
+      if (Pred == ICmpInst::ICMP_SLT && CmpLHS->getType() == DestTy) {
+        const Type *EltTy = VTy->getElementType();
+
+        // splat the shift constant to a constant vector.
+        Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1);
+        Value *In = Builder->CreateAShr(CmpLHS, VSh,CmpLHS->getName()+".lobit");
+        return ReplaceInstUsesWith(CI, In);
+      }
+    }
+  }
+
+  // If the input is a shl/ashr pair of a same constant, then this is a sign
+  // extension from a smaller value.  If we could trust arbitrary bitwidth
+  // integers, we could turn this into a truncate to the smaller bit and then
+  // use a sext for the whole extension.  Since we don't, look deeper and check
+  // for a truncate.  If the source and dest are the same type, eliminate the
+  // trunc and extend and just do shifts.  For example, turn:
+  //   %a = trunc i32 %i to i8
+  //   %b = shl i8 %a, 6
+  //   %c = ashr i8 %b, 6
+  //   %d = sext i8 %c to i32
+  // into:
+  //   %a = shl i32 %i, 30
+  //   %d = ashr i32 %a, 30
+  Value *A = 0;
+  // TODO: Eventually this could be subsumed by EvaluateInDifferentType.
+  ConstantInt *BA = 0, *CA = 0;
+  if (match(Src, m_AShr(m_Shl(m_Trunc(m_Value(A)), m_ConstantInt(BA)),
+                        m_ConstantInt(CA))) &&
+      BA == CA && A->getType() == CI.getType()) {
+    unsigned MidSize = Src->getType()->getScalarSizeInBits();
+    unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
+    unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
+    Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
+    A = Builder->CreateShl(A, ShAmtV, CI.getName());
+    return BinaryOperator::CreateAShr(A, ShAmtV);
+  }
+  
+  return 0;
+}
+
+
+/// FitsInFPType - Return a Constant* for the specified FP constant if it fits
+/// in the specified FP type without changing its value.
+static Constant *FitsInFPType(ConstantFP *CFP, const fltSemantics &Sem) {
+  bool losesInfo;
+  APFloat F = CFP->getValueAPF();
+  (void)F.convert(Sem, APFloat::rmNearestTiesToEven, &losesInfo);
+  if (!losesInfo)
+    return ConstantFP::get(CFP->getContext(), F);
+  return 0;
+}
+
+/// LookThroughFPExtensions - If this is an fp extension instruction, look
+/// through it until we get the source value.
+static Value *LookThroughFPExtensions(Value *V) {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    if (I->getOpcode() == Instruction::FPExt)
+      return LookThroughFPExtensions(I->getOperand(0));
+  
+  // If this value is a constant, return the constant in the smallest FP type
+  // that can accurately represent it.  This allows us to turn
+  // (float)((double)X+2.0) into x+2.0f.
+  if (ConstantFP *CFP = dyn_cast<ConstantFP>(V)) {
+    if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext()))
+      return V;  // No constant folding of this.
+    // See if the value can be truncated to float and then reextended.
+    if (Value *V = FitsInFPType(CFP, APFloat::IEEEsingle))
+      return V;
+    if (CFP->getType()->isDoubleTy())
+      return V;  // Won't shrink.
+    if (Value *V = FitsInFPType(CFP, APFloat::IEEEdouble))
+      return V;
+    // Don't try to shrink to various long double types.
+  }
+  
+  return V;
+}
+
+Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
+  if (Instruction *I = commonCastTransforms(CI))
+    return I;
+  
+  // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are
+  // smaller than the destination type, we can eliminate the truncate by doing
+  // the add as the smaller type.  This applies to fadd/fsub/fmul/fdiv as well
+  // as many builtins (sqrt, etc).
+  BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0));
+  if (OpI && OpI->hasOneUse()) {
+    switch (OpI->getOpcode()) {
+    default: break;
+    case Instruction::FAdd:
+    case Instruction::FSub:
+    case Instruction::FMul:
+    case Instruction::FDiv:
+    case Instruction::FRem:
+      const Type *SrcTy = OpI->getType();
+      Value *LHSTrunc = LookThroughFPExtensions(OpI->getOperand(0));
+      Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
+      if (LHSTrunc->getType() != SrcTy && 
+          RHSTrunc->getType() != SrcTy) {
+        unsigned DstSize = CI.getType()->getScalarSizeInBits();
+        // If the source types were both smaller than the destination type of
+        // the cast, do this xform.
+        if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&
+            RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {
+          LHSTrunc = Builder->CreateFPExt(LHSTrunc, CI.getType());
+          RHSTrunc = Builder->CreateFPExt(RHSTrunc, CI.getType());
+          return BinaryOperator::Create(OpI->getOpcode(), LHSTrunc, RHSTrunc);
+        }
+      }
+      break;  
+    }
+  }
+  
+  // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x)
+  // NOTE: This should be disabled by -fno-builtin-sqrt if we ever support it.
+  CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
+  if (Call && Call->getCalledFunction() &&
+      Call->getCalledFunction()->getName() == "sqrt" &&
+      Call->getNumArgOperands() == 1) {
+    CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
+    if (Arg && Arg->getOpcode() == Instruction::FPExt &&
+        CI.getType()->isFloatTy() &&
+        Call->getType()->isDoubleTy() &&
+        Arg->getType()->isDoubleTy() &&
+        Arg->getOperand(0)->getType()->isFloatTy()) {
+      Function *Callee = Call->getCalledFunction();
+      Module *M = CI.getParent()->getParent()->getParent();
+      Constant *SqrtfFunc = M->getOrInsertFunction("sqrtf", 
+                                                   Callee->getAttributes(),
+                                                   Builder->getFloatTy(),
+                                                   Builder->getFloatTy(),
+                                                   NULL);
+      CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0),
+                                       "sqrtfcall");
+      ret->setAttributes(Callee->getAttributes());
+      
+      
+      // Remove the old Call.  With -fmath-errno, it won't get marked readnone.
+      Call->replaceAllUsesWith(UndefValue::get(Call->getType()));
+      EraseInstFromFunction(*Call);
+      return ret;
+    }
+  }
+  
+  return 0;
+}
+
+Instruction *InstCombiner::visitFPExt(CastInst &CI) {
+  return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
+  Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+  if (OpI == 0)
+    return commonCastTransforms(FI);
+
+  // fptoui(uitofp(X)) --> X
+  // fptoui(sitofp(X)) --> X
+  // This is safe if the intermediate type has enough bits in its mantissa to
+  // accurately represent all values of X.  For example, do not do this with
+  // i64->float->i64.  This is also safe for sitofp case, because any negative
+  // 'X' value would cause an undefined result for the fptoui. 
+  if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+      OpI->getOperand(0)->getType() == FI.getType() &&
+      (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
+                    OpI->getType()->getFPMantissaWidth())
+    return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+
+  return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
+  Instruction *OpI = dyn_cast<Instruction>(FI.getOperand(0));
+  if (OpI == 0)
+    return commonCastTransforms(FI);
+  
+  // fptosi(sitofp(X)) --> X
+  // fptosi(uitofp(X)) --> X
+  // This is safe if the intermediate type has enough bits in its mantissa to
+  // accurately represent all values of X.  For example, do not do this with
+  // i64->float->i64.  This is also safe for sitofp case, because any negative
+  // 'X' value would cause an undefined result for the fptoui. 
+  if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
+      OpI->getOperand(0)->getType() == FI.getType() &&
+      (int)FI.getType()->getScalarSizeInBits() <=
+                    OpI->getType()->getFPMantissaWidth())
+    return ReplaceInstUsesWith(FI, OpI->getOperand(0));
+  
+  return commonCastTransforms(FI);
+}
+
+Instruction *InstCombiner::visitUIToFP(CastInst &CI) {
+  return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitSIToFP(CastInst &CI) {
+  return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
+  // If the source integer type is not the intptr_t type for this target, do a
+  // trunc or zext to the intptr_t type, then inttoptr of it.  This allows the
+  // cast to be exposed to other transforms.
+  if (TD) {
+    if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
+        TD->getPointerSizeInBits()) {
+      Value *P = Builder->CreateTrunc(CI.getOperand(0),
+                                      TD->getIntPtrType(CI.getContext()), "tmp");
+      return new IntToPtrInst(P, CI.getType());
+    }
+    if (CI.getOperand(0)->getType()->getScalarSizeInBits() <
+        TD->getPointerSizeInBits()) {
+      Value *P = Builder->CreateZExt(CI.getOperand(0),
+                                     TD->getIntPtrType(CI.getContext()), "tmp");
+      return new IntToPtrInst(P, CI.getType());
+    }
+  }
+  
+  if (Instruction *I = commonCastTransforms(CI))
+    return I;
+
+  return 0;
+}
+
+/// @brief Implement the transforms for cast of pointer (bitcast/ptrtoint)
+Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
+  Value *Src = CI.getOperand(0);
+  
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) {
+    // If casting the result of a getelementptr instruction with no offset, turn
+    // this into a cast of the original pointer!
+    if (GEP->hasAllZeroIndices()) {
+      // Changing the cast operand is usually not a good idea but it is safe
+      // here because the pointer operand is being replaced with another 
+      // pointer operand so the opcode doesn't need to change.
+      Worklist.Add(GEP);
+      CI.setOperand(0, GEP->getOperand(0));
+      return &CI;
+    }
+    
+    // If the GEP has a single use, and the base pointer is a bitcast, and the
+    // GEP computes a constant offset, see if we can convert these three
+    // instructions into fewer.  This typically happens with unions and other
+    // non-type-safe code.
+    if (TD && GEP->hasOneUse() && isa<BitCastInst>(GEP->getOperand(0)) &&
+        GEP->hasAllConstantIndices()) {
+      // We are guaranteed to get a constant from EmitGEPOffset.
+      ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(GEP));
+      int64_t Offset = OffsetV->getSExtValue();
+      
+      // Get the base pointer input of the bitcast, and the type it points to.
+      Value *OrigBase = cast<BitCastInst>(GEP->getOperand(0))->getOperand(0);
+      const Type *GEPIdxTy =
+      cast<PointerType>(OrigBase->getType())->getElementType();
+      SmallVector<Value*, 8> NewIndices;
+      if (FindElementAtOffset(GEPIdxTy, Offset, NewIndices)) {
+        // If we were able to index down into an element, create the GEP
+        // and bitcast the result.  This eliminates one bitcast, potentially
+        // two.
+        Value *NGEP = cast<GEPOperator>(GEP)->isInBounds() ?
+        Builder->CreateInBoundsGEP(OrigBase,
+                                   NewIndices.begin(), NewIndices.end()) :
+        Builder->CreateGEP(OrigBase, NewIndices.begin(), NewIndices.end());
+        NGEP->takeName(GEP);
+        
+        if (isa<BitCastInst>(CI))
+          return new BitCastInst(NGEP, CI.getType());
+        assert(isa<PtrToIntInst>(CI));
+        return new PtrToIntInst(NGEP, CI.getType());
+      }      
+    }
+  }
+  
+  return commonCastTransforms(CI);
+}
+
+Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
+  // If the destination integer type is not the intptr_t type for this target,
+  // do a ptrtoint to intptr_t then do a trunc or zext.  This allows the cast
+  // to be exposed to other transforms.
+  if (TD) {
+    if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
+      Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+                                         TD->getIntPtrType(CI.getContext()),
+                                         "tmp");
+      return new TruncInst(P, CI.getType());
+    }
+    if (CI.getType()->getScalarSizeInBits() > TD->getPointerSizeInBits()) {
+      Value *P = Builder->CreatePtrToInt(CI.getOperand(0),
+                                         TD->getIntPtrType(CI.getContext()),
+                                         "tmp");
+      return new ZExtInst(P, CI.getType());
+    }
+  }
+  
+  return commonPointerCastTransforms(CI);
+}
+
+/// OptimizeVectorResize - This input value (which is known to have vector type)
+/// is being zero extended or truncated to the specified vector type.  Try to
+/// replace it with a shuffle (and vector/vector bitcast) if possible.
+///
+/// The source and destination vector types may have different element types.
+static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
+                                         InstCombiner &IC) {
+  // We can only do this optimization if the output is a multiple of the input
+  // element size, or the input is a multiple of the output element size.
+  // Convert the input type to have the same element type as the output.
+  const VectorType *SrcTy = cast<VectorType>(InVal->getType());
+  
+  if (SrcTy->getElementType() != DestTy->getElementType()) {
+    // The input types don't need to be identical, but for now they must be the
+    // same size.  There is no specific reason we couldn't handle things like
+    // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
+    // there yet. 
+    if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
+        DestTy->getElementType()->getPrimitiveSizeInBits())
+      return 0;
+    
+    SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
+    InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
+  }
+  
+  // Now that the element types match, get the shuffle mask and RHS of the
+  // shuffle to use, which depends on whether we're increasing or decreasing the
+  // size of the input.
+  SmallVector<Constant*, 16> ShuffleMask;
+  Value *V2;
+  const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext());
+  
+  if (SrcTy->getNumElements() > DestTy->getNumElements()) {
+    // If we're shrinking the number of elements, just shuffle in the low
+    // elements from the input and use undef as the second shuffle input.
+    V2 = UndefValue::get(SrcTy);
+    for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
+      ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+    
+  } else {
+    // If we're increasing the number of elements, shuffle in all of the
+    // elements from InVal and fill the rest of the result elements with zeros
+    // from a constant zero.
+    V2 = Constant::getNullValue(SrcTy);
+    unsigned SrcElts = SrcTy->getNumElements();
+    for (unsigned i = 0, e = SrcElts; i != e; ++i)
+      ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+
+    // The excess elements reference the first element of the zero input.
+    ShuffleMask.append(DestTy->getNumElements()-SrcElts,
+                       ConstantInt::get(Int32Ty, SrcElts));
+  }
+  
+  return new ShuffleVectorInst(InVal, V2, ConstantVector::get(ShuffleMask));
+}
+
+static bool isMultipleOfTypeSize(unsigned Value, const Type *Ty) {
+  return Value % Ty->getPrimitiveSizeInBits() == 0;
+}
+
+static unsigned getTypeSizeIndex(unsigned Value, const Type *Ty) {
+  return Value / Ty->getPrimitiveSizeInBits();
+}
+
+/// CollectInsertionElements - V is a value which is inserted into a vector of
+/// VecEltTy.  Look through the value to see if we can decompose it into
+/// insertions into the vector.  See the example in the comment for
+/// OptimizeIntegerToVectorInsertions for the pattern this handles.
+/// The type of V is always a non-zero multiple of VecEltTy's size.
+///
+/// This returns false if the pattern can't be matched or true if it can,
+/// filling in Elements with the elements found here.
+static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
+                                     SmallVectorImpl<Value*> &Elements,
+                                     const Type *VecEltTy) {
+  // Undef values never contribute useful bits to the result.
+  if (isa<UndefValue>(V)) return true;
+  
+  // If we got down to a value of the right type, we win, try inserting into the
+  // right element.
+  if (V->getType() == VecEltTy) {
+    // Inserting null doesn't actually insert any elements.
+    if (Constant *C = dyn_cast<Constant>(V))
+      if (C->isNullValue())
+        return true;
+    
+    // Fail if multiple elements are inserted into this slot.
+    if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+      return false;
+    
+    Elements[ElementIndex] = V;
+    return true;
+  }
+  
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    // Figure out the # elements this provides, and bitcast it or slice it up
+    // as required.
+    unsigned NumElts = getTypeSizeIndex(C->getType()->getPrimitiveSizeInBits(),
+                                        VecEltTy);
+    // If the constant is the size of a vector element, we just need to bitcast
+    // it to the right type so it gets properly inserted.
+    if (NumElts == 1)
+      return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
+                                      ElementIndex, Elements, VecEltTy);
+    
+    // Okay, this is a constant that covers multiple elements.  Slice it up into
+    // pieces and insert each element-sized piece into the vector.
+    if (!isa<IntegerType>(C->getType()))
+      C = ConstantExpr::getBitCast(C, IntegerType::get(V->getContext(),
+                                       C->getType()->getPrimitiveSizeInBits()));
+    unsigned ElementSize = VecEltTy->getPrimitiveSizeInBits();
+    const Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
+    
+    for (unsigned i = 0; i != NumElts; ++i) {
+      Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
+                                                               i*ElementSize));
+      Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
+      if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+        return false;
+    }
+    return true;
+  }
+  
+  if (!V->hasOneUse()) return false;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return false;
+  switch (I->getOpcode()) {
+  default: return false; // Unhandled case.
+  case Instruction::BitCast:
+    return CollectInsertionElements(I->getOperand(0), ElementIndex,
+                                    Elements, VecEltTy);  
+  case Instruction::ZExt:
+    if (!isMultipleOfTypeSize(
+                          I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
+                              VecEltTy))
+      return false;
+    return CollectInsertionElements(I->getOperand(0), ElementIndex,
+                                    Elements, VecEltTy);  
+  case Instruction::Or:
+    return CollectInsertionElements(I->getOperand(0), ElementIndex,
+                                    Elements, VecEltTy) &&
+           CollectInsertionElements(I->getOperand(1), ElementIndex,
+                                    Elements, VecEltTy);
+  case Instruction::Shl: {
+    // Must be shifting by a constant that is a multiple of the element size.
+    ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
+    if (CI == 0) return false;
+    if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
+    unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
+    
+    return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
+                                    Elements, VecEltTy);
+  }
+      
+  }
+}
+
+
+/// OptimizeIntegerToVectorInsertions - If the input is an 'or' instruction, we
+/// may be doing shifts and ors to assemble the elements of the vector manually.
+/// Try to rip the code out and replace it with insertelements.  This is to
+/// optimize code like this:
+///
+///    %tmp37 = bitcast float %inc to i32
+///    %tmp38 = zext i32 %tmp37 to i64
+///    %tmp31 = bitcast float %inc5 to i32
+///    %tmp32 = zext i32 %tmp31 to i64
+///    %tmp33 = shl i64 %tmp32, 32
+///    %ins35 = or i64 %tmp33, %tmp38
+///    %tmp43 = bitcast i64 %ins35 to <2 x float>
+///
+/// Into two insertelements that do "buildvector{%inc, %inc5}".
+static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
+                                                InstCombiner &IC) {
+  const VectorType *DestVecTy = cast<VectorType>(CI.getType());
+  Value *IntInput = CI.getOperand(0);
+
+  SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
+  if (!CollectInsertionElements(IntInput, 0, Elements,
+                                DestVecTy->getElementType()))
+    return 0;
+
+  // If we succeeded, we know that all of the element are specified by Elements
+  // or are zero if Elements has a null entry.  Recast this as a set of
+  // insertions.
+  Value *Result = Constant::getNullValue(CI.getType());
+  for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+    if (Elements[i] == 0) continue;  // Unset element.
+    
+    Result = IC.Builder->CreateInsertElement(Result, Elements[i],
+                                             IC.Builder->getInt32(i));
+  }
+  
+  return Result;
+}
+
+
+/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double
+/// bitcast.  The various long double bitcasts can't get in here.
+static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){
+  Value *Src = CI.getOperand(0);
+  const Type *DestTy = CI.getType();
+
+  // If this is a bitcast from int to float, check to see if the int is an
+  // extraction from a vector.
+  Value *VecInput = 0;
+  // bitcast(trunc(bitcast(somevector)))
+  if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) &&
+      isa<VectorType>(VecInput->getType())) {
+    const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+
+    if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) {
+      // If the element type of the vector doesn't match the result type,
+      // bitcast it to be a vector type we can extract from.
+      if (VecTy->getElementType() != DestTy) {
+        VecTy = VectorType::get(DestTy,
+                                VecTy->getPrimitiveSizeInBits() / DestWidth);
+        VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+      }
+    
+      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0));
+    }
+  }
+  
+  // bitcast(trunc(lshr(bitcast(somevector), cst))
+  ConstantInt *ShAmt = 0;
+  if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)),
+                                m_ConstantInt(ShAmt)))) &&
+      isa<VectorType>(VecInput->getType())) {
+    const VectorType *VecTy = cast<VectorType>(VecInput->getType());
+    unsigned DestWidth = DestTy->getPrimitiveSizeInBits();
+    if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 &&
+        ShAmt->getZExtValue() % DestWidth == 0) {
+      // If the element type of the vector doesn't match the result type,
+      // bitcast it to be a vector type we can extract from.
+      if (VecTy->getElementType() != DestTy) {
+        VecTy = VectorType::get(DestTy,
+                                VecTy->getPrimitiveSizeInBits() / DestWidth);
+        VecInput = IC.Builder->CreateBitCast(VecInput, VecTy);
+      }
+      
+      unsigned Elt = ShAmt->getZExtValue() / DestWidth;
+      return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
+    }
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
+  // If the operands are integer typed then apply the integer transforms,
+  // otherwise just apply the common ones.
+  Value *Src = CI.getOperand(0);
+  const Type *SrcTy = Src->getType();
+  const Type *DestTy = CI.getType();
+
+  // Get rid of casts from one type to the same type. These are useless and can
+  // be replaced by the operand.
+  if (DestTy == Src->getType())
+    return ReplaceInstUsesWith(CI, Src);
+
+  if (const PointerType *DstPTy = dyn_cast<PointerType>(DestTy)) {
+    const PointerType *SrcPTy = cast<PointerType>(SrcTy);
+    const Type *DstElTy = DstPTy->getElementType();
+    const Type *SrcElTy = SrcPTy->getElementType();
+    
+    // If the address spaces don't match, don't eliminate the bitcast, which is
+    // required for changing types.
+    if (SrcPTy->getAddressSpace() != DstPTy->getAddressSpace())
+      return 0;
+    
+    // If we are casting a alloca to a pointer to a type of the same
+    // size, rewrite the allocation instruction to allocate the "right" type.
+    // There is no need to modify malloc calls because it is their bitcast that
+    // needs to be cleaned up.
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
+      if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
+        return V;
+    
+    // If the source and destination are pointers, and this cast is equivalent
+    // to a getelementptr X, 0, 0, 0...  turn it into the appropriate gep.
+    // This can enhance SROA and other transforms that want type-safe pointers.
+    Constant *ZeroUInt =
+      Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
+    unsigned NumZeros = 0;
+    while (SrcElTy != DstElTy && 
+           isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
+           SrcElTy->getNumContainedTypes() /* not "{}" */) {
+      SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
+      ++NumZeros;
+    }
+
+    // If we found a path from the src to dest, create the getelementptr now.
+    if (SrcElTy == DstElTy) {
+      SmallVector<Value*, 8> Idxs(NumZeros+1, ZeroUInt);
+      return GetElementPtrInst::CreateInBounds(Src, Idxs.begin(), Idxs.end(),"",
+                                               ((Instruction*)NULL));
+    }
+  }
+  
+  // Try to optimize int -> float bitcasts.
+  if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa<IntegerType>(SrcTy))
+    if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this))
+      return I;
+
+  if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
+    if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
+      Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
+      return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
+                     Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+      // FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
+    }
+    
+    if (isa<IntegerType>(SrcTy)) {
+      // If this is a cast from an integer to vector, check to see if the input
+      // is a trunc or zext of a bitcast from vector.  If so, we can replace all
+      // the casts with a shuffle and (potentially) a bitcast.
+      if (isa<TruncInst>(Src) || isa<ZExtInst>(Src)) {
+        CastInst *SrcCast = cast<CastInst>(Src);
+        if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+          if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+            if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+                                               cast<VectorType>(DestTy), *this))
+              return I;
+      }
+      
+      // If the input is an 'or' instruction, we may be doing shifts and ors to
+      // assemble the elements of the vector manually.  Try to rip the code out
+      // and replace it with insertelements.
+      if (Value *V = OptimizeIntegerToVectorInsertions(CI, *this))
+        return ReplaceInstUsesWith(CI, V);
+    }
+  }
+
+  if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
+    if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) {
+      Value *Elem = 
+        Builder->CreateExtractElement(Src,
+                   Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
+      return CastInst::Create(Instruction::BitCast, Elem, DestTy);
+    }
+  }
+
+  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
+    // Okay, we have (bitcast (shuffle ..)).  Check to see if this is
+    // a bitcast to a vector with the same # elts.
+    if (SVI->hasOneUse() && DestTy->isVectorTy() && 
+        cast<VectorType>(DestTy)->getNumElements() ==
+              SVI->getType()->getNumElements() &&
+        SVI->getType()->getNumElements() ==
+          cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements()) {
+      BitCastInst *Tmp;
+      // If either of the operands is a cast from CI.getType(), then
+      // evaluating the shuffle in the casted destination's type will allow
+      // us to eliminate at least one cast.
+      if (((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(0))) && 
+           Tmp->getOperand(0)->getType() == DestTy) ||
+          ((Tmp = dyn_cast<BitCastInst>(SVI->getOperand(1))) && 
+           Tmp->getOperand(0)->getType() == DestTy)) {
+        Value *LHS = Builder->CreateBitCast(SVI->getOperand(0), DestTy);
+        Value *RHS = Builder->CreateBitCast(SVI->getOperand(1), DestTy);
+        // Return a new shuffle vector.  Use the same element ID's, as we
+        // know the vector types match #elts.
+        return new ShuffleVectorInst(LHS, RHS, SVI->getOperand(2));
+      }
+    }
+  }
+  
+  if (SrcTy->isPointerTy())
+    return commonPointerCastTransforms(CI);
+  return commonCastTransforms(CI);
+}
diff --git a/final/lib/Transforms/InstCombine/InstCombineCompares.cpp b/final/lib/Transforms/InstCombine/InstCombineCompares.cpp
new file mode 100644
index 00000000000..108806a82b7
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -0,0 +1,2817 @@
+//===- InstCombineCompares.cpp --------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitICmp and visitFCmp functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+static ConstantInt *getOne(Constant *C) {
+  return ConstantInt::get(cast<IntegerType>(C->getType()), 1);
+}
+
+/// AddOne - Add one to a ConstantInt
+static Constant *AddOne(Constant *C) {
+  return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
+}
+/// SubOne - Subtract one from a ConstantInt
+static Constant *SubOne(Constant *C) {
+  return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
+}
+
+static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
+  return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
+}
+
+static bool HasAddOverflow(ConstantInt *Result,
+                           ConstantInt *In1, ConstantInt *In2,
+                           bool IsSigned) {
+  if (IsSigned)
+    if (In2->getValue().isNegative())
+      return Result->getValue().sgt(In1->getValue());
+    else
+      return Result->getValue().slt(In1->getValue());
+  else
+    return Result->getValue().ult(In1->getValue());
+}
+
+/// AddWithOverflow - Compute Result = In1+In2, returning true if the result
+/// overflowed for this type.
+static bool AddWithOverflow(Constant *&Result, Constant *In1,
+                            Constant *In2, bool IsSigned = false) {
+  Result = ConstantExpr::getAdd(In1, In2);
+
+  if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+      Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
+      if (HasAddOverflow(ExtractElement(Result, Idx),
+                         ExtractElement(In1, Idx),
+                         ExtractElement(In2, Idx),
+                         IsSigned))
+        return true;
+    }
+    return false;
+  }
+
+  return HasAddOverflow(cast<ConstantInt>(Result),
+                        cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+                        IsSigned);
+}
+
+static bool HasSubOverflow(ConstantInt *Result,
+                           ConstantInt *In1, ConstantInt *In2,
+                           bool IsSigned) {
+  if (IsSigned)
+    if (In2->getValue().isNegative())
+      return Result->getValue().slt(In1->getValue());
+    else
+      return Result->getValue().sgt(In1->getValue());
+  else
+    return Result->getValue().ugt(In1->getValue());
+}
+
+/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
+/// overflowed for this type.
+static bool SubWithOverflow(Constant *&Result, Constant *In1,
+                            Constant *In2, bool IsSigned = false) {
+  Result = ConstantExpr::getSub(In1, In2);
+
+  if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+    for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+      Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
+      if (HasSubOverflow(ExtractElement(Result, Idx),
+                         ExtractElement(In1, Idx),
+                         ExtractElement(In2, Idx),
+                         IsSigned))
+        return true;
+    }
+    return false;
+  }
+
+  return HasSubOverflow(cast<ConstantInt>(Result),
+                        cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+                        IsSigned);
+}
+
+/// isSignBitCheck - Given an exploded icmp instruction, return true if the
+/// comparison only checks the sign bit.  If it only checks the sign bit, set
+/// TrueIfSigned if the result of the comparison is true when the input value is
+/// signed.
+static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
+                           bool &TrueIfSigned) {
+  switch (pred) {
+  case ICmpInst::ICMP_SLT:   // True if LHS s< 0
+    TrueIfSigned = true;
+    return RHS->isZero();
+  case ICmpInst::ICMP_SLE:   // True if LHS s<= RHS and RHS == -1
+    TrueIfSigned = true;
+    return RHS->isAllOnesValue();
+  case ICmpInst::ICMP_SGT:   // True if LHS s> -1
+    TrueIfSigned = false;
+    return RHS->isAllOnesValue();
+  case ICmpInst::ICMP_UGT:
+    // True if LHS u> RHS and RHS == high-bit-mask - 1
+    TrueIfSigned = true;
+    return RHS->getValue() ==
+      APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits());
+  case ICmpInst::ICMP_UGE: 
+    // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
+    TrueIfSigned = true;
+    return RHS->getValue().isSignBit();
+  default:
+    return false;
+  }
+}
+
+// isHighOnes - Return true if the constant is of the form 1+0+.
+// This is the same as lowones(~X).
+static bool isHighOnes(const ConstantInt *CI) {
+  return (~CI->getValue() + 1).isPowerOf2();
+}
+
+/// ComputeSignedMinMaxValuesFromKnownBits - Given a signed integer type and a 
+/// set of known zero and one bits, compute the maximum and minimum values that
+/// could have the specified known zero and known one bits, returning them in
+/// min/max.
+static void ComputeSignedMinMaxValuesFromKnownBits(const APInt& KnownZero,
+                                                   const APInt& KnownOne,
+                                                   APInt& Min, APInt& Max) {
+  assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
+         KnownZero.getBitWidth() == Min.getBitWidth() &&
+         KnownZero.getBitWidth() == Max.getBitWidth() &&
+         "KnownZero, KnownOne and Min, Max must have equal bitwidth.");
+  APInt UnknownBits = ~(KnownZero|KnownOne);
+
+  // The minimum value is when all unknown bits are zeros, EXCEPT for the sign
+  // bit if it is unknown.
+  Min = KnownOne;
+  Max = KnownOne|UnknownBits;
+  
+  if (UnknownBits.isNegative()) { // Sign bit is unknown
+    Min.setBit(Min.getBitWidth()-1);
+    Max.clearBit(Max.getBitWidth()-1);
+  }
+}
+
+// ComputeUnsignedMinMaxValuesFromKnownBits - Given an unsigned integer type and
+// a set of known zero and one bits, compute the maximum and minimum values that
+// could have the specified known zero and known one bits, returning them in
+// min/max.
+static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
+                                                     const APInt &KnownOne,
+                                                     APInt &Min, APInt &Max) {
+  assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
+         KnownZero.getBitWidth() == Min.getBitWidth() &&
+         KnownZero.getBitWidth() == Max.getBitWidth() &&
+         "Ty, KnownZero, KnownOne and Min, Max must have equal bitwidth.");
+  APInt UnknownBits = ~(KnownZero|KnownOne);
+  
+  // The minimum value is when the unknown bits are all zeros.
+  Min = KnownOne;
+  // The maximum value is when the unknown bits are all ones.
+  Max = KnownOne|UnknownBits;
+}
+
+
+
+/// FoldCmpLoadFromIndexedGlobal - Called we see this pattern:
+///   cmp pred (load (gep GV, ...)), cmpcst
+/// where GV is a global variable with a constant initializer.  Try to simplify
+/// this into some simple computation that does not need the load.  For example
+/// we can optimize "icmp eq (load (gep "foo", 0, i)), 0" into "icmp eq i, 3".
+///
+/// If AndCst is non-null, then the loaded value is masked with that constant
+/// before doing the comparison.  This handles cases like "A[i]&4 == 0".
+Instruction *InstCombiner::
+FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
+                             CmpInst &ICI, ConstantInt *AndCst) {
+  // We need TD information to know the pointer size unless this is inbounds.
+  if (!GEP->isInBounds() && TD == 0) return 0;
+  
+  ConstantArray *Init = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (Init == 0 || Init->getNumOperands() > 1024) return 0;
+  
+  // There are many forms of this optimization we can handle, for now, just do
+  // the simple index into a single-dimensional array.
+  //
+  // Require: GEP GV, 0, i {{, constant indices}}
+  if (GEP->getNumOperands() < 3 ||
+      !isa<ConstantInt>(GEP->getOperand(1)) ||
+      !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
+      isa<Constant>(GEP->getOperand(2)))
+    return 0;
+
+  // Check that indices after the variable are constants and in-range for the
+  // type they index.  Collect the indices.  This is typically for arrays of
+  // structs.
+  SmallVector<unsigned, 4> LaterIndices;
+  
+  const Type *EltTy = cast<ArrayType>(Init->getType())->getElementType();
+  for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
+    ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
+    if (Idx == 0) return 0;  // Variable index.
+    
+    uint64_t IdxVal = Idx->getZExtValue();
+    if ((unsigned)IdxVal != IdxVal) return 0; // Too large array index.
+    
+    if (const StructType *STy = dyn_cast<StructType>(EltTy))
+      EltTy = STy->getElementType(IdxVal);
+    else if (const ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
+      if (IdxVal >= ATy->getNumElements()) return 0;
+      EltTy = ATy->getElementType();
+    } else {
+      return 0; // Unknown type.
+    }
+    
+    LaterIndices.push_back(IdxVal);
+  }
+  
+  enum { Overdefined = -3, Undefined = -2 };
+
+  // Variables for our state machines.
+  
+  // FirstTrueElement/SecondTrueElement - Used to emit a comparison of the form
+  // "i == 47 | i == 87", where 47 is the first index the condition is true for,
+  // and 87 is the second (and last) index.  FirstTrueElement is -2 when
+  // undefined, otherwise set to the first true element.  SecondTrueElement is
+  // -2 when undefined, -3 when overdefined and >= 0 when that index is true.
+  int FirstTrueElement = Undefined, SecondTrueElement = Undefined;
+
+  // FirstFalseElement/SecondFalseElement - Used to emit a comparison of the
+  // form "i != 47 & i != 87".  Same state transitions as for true elements.
+  int FirstFalseElement = Undefined, SecondFalseElement = Undefined;
+  
+  /// TrueRangeEnd/FalseRangeEnd - In conjunction with First*Element, these
+  /// define a state machine that triggers for ranges of values that the index
+  /// is true or false for.  This triggers on things like "abbbbc"[i] == 'b'.
+  /// This is -2 when undefined, -3 when overdefined, and otherwise the last
+  /// index in the range (inclusive).  We use -2 for undefined here because we
+  /// use relative comparisons and don't want 0-1 to match -1.
+  int TrueRangeEnd = Undefined, FalseRangeEnd = Undefined;
+  
+  // MagicBitvector - This is a magic bitvector where we set a bit if the
+  // comparison is true for element 'i'.  If there are 64 elements or less in
+  // the array, this will fully represent all the comparison results.
+  uint64_t MagicBitvector = 0;
+  
+  
+  // Scan the array and see if one of our patterns matches.
+  Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
+  for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
+    Constant *Elt = Init->getOperand(i);
+    
+    // If this is indexing an array of structures, get the structure element.
+    if (!LaterIndices.empty())
+      Elt = ConstantExpr::getExtractValue(Elt, LaterIndices.data(),
+                                          LaterIndices.size());
+    
+    // If the element is masked, handle it.
+    if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
+    
+    // Find out if the comparison would be true or false for the i'th element.
+    Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
+                                                  CompareRHS, TD);
+    // If the result is undef for this element, ignore it.
+    if (isa<UndefValue>(C)) {
+      // Extend range state machines to cover this element in case there is an
+      // undef in the middle of the range.
+      if (TrueRangeEnd == (int)i-1)
+        TrueRangeEnd = i;
+      if (FalseRangeEnd == (int)i-1)
+        FalseRangeEnd = i;
+      continue;
+    }
+    
+    // If we can't compute the result for any of the elements, we have to give
+    // up evaluating the entire conditional.
+    if (!isa<ConstantInt>(C)) return 0;
+    
+    // Otherwise, we know if the comparison is true or false for this element,
+    // update our state machines.
+    bool IsTrueForElt = !cast<ConstantInt>(C)->isZero();
+    
+    // State machine for single/double/range index comparison.
+    if (IsTrueForElt) {
+      // Update the TrueElement state machine.
+      if (FirstTrueElement == Undefined)
+        FirstTrueElement = TrueRangeEnd = i;  // First true element.
+      else {
+        // Update double-compare state machine.
+        if (SecondTrueElement == Undefined)
+          SecondTrueElement = i;
+        else
+          SecondTrueElement = Overdefined;
+        
+        // Update range state machine.
+        if (TrueRangeEnd == (int)i-1)
+          TrueRangeEnd = i;
+        else
+          TrueRangeEnd = Overdefined;
+      }
+    } else {
+      // Update the FalseElement state machine.
+      if (FirstFalseElement == Undefined)
+        FirstFalseElement = FalseRangeEnd = i; // First false element.
+      else {
+        // Update double-compare state machine.
+        if (SecondFalseElement == Undefined)
+          SecondFalseElement = i;
+        else
+          SecondFalseElement = Overdefined;
+        
+        // Update range state machine.
+        if (FalseRangeEnd == (int)i-1)
+          FalseRangeEnd = i;
+        else
+          FalseRangeEnd = Overdefined;
+      }
+    }
+    
+    
+    // If this element is in range, update our magic bitvector.
+    if (i < 64 && IsTrueForElt)
+      MagicBitvector |= 1ULL << i;
+    
+    // If all of our states become overdefined, bail out early.  Since the
+    // predicate is expensive, only check it every 8 elements.  This is only
+    // really useful for really huge arrays.
+    if ((i & 8) == 0 && i >= 64 && SecondTrueElement == Overdefined &&
+        SecondFalseElement == Overdefined && TrueRangeEnd == Overdefined &&
+        FalseRangeEnd == Overdefined)
+      return 0;
+  }
+
+  // Now that we've scanned the entire array, emit our new comparison(s).  We
+  // order the state machines in complexity of the generated code.
+  Value *Idx = GEP->getOperand(2);
+
+  // If the index is larger than the pointer size of the target, truncate the
+  // index down like the GEP would do implicitly.  We don't have to do this for
+  // an inbounds GEP because the index can't be out of range.
+  if (!GEP->isInBounds() &&
+      Idx->getType()->getPrimitiveSizeInBits() > TD->getPointerSizeInBits())
+    Idx = Builder->CreateTrunc(Idx, TD->getIntPtrType(Idx->getContext()));
+  
+  // If the comparison is only true for one or two elements, emit direct
+  // comparisons.
+  if (SecondTrueElement != Overdefined) {
+    // None true -> false.
+    if (FirstTrueElement == Undefined)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(GEP->getContext()));
+    
+    Value *FirstTrueIdx = ConstantInt::get(Idx->getType(), FirstTrueElement);
+    
+    // True for one element -> 'i == 47'.
+    if (SecondTrueElement == Undefined)
+      return new ICmpInst(ICmpInst::ICMP_EQ, Idx, FirstTrueIdx);
+    
+    // True for two elements -> 'i == 47 | i == 72'.
+    Value *C1 = Builder->CreateICmpEQ(Idx, FirstTrueIdx);
+    Value *SecondTrueIdx = ConstantInt::get(Idx->getType(), SecondTrueElement);
+    Value *C2 = Builder->CreateICmpEQ(Idx, SecondTrueIdx);
+    return BinaryOperator::CreateOr(C1, C2);
+  }
+
+  // If the comparison is only false for one or two elements, emit direct
+  // comparisons.
+  if (SecondFalseElement != Overdefined) {
+    // None false -> true.
+    if (FirstFalseElement == Undefined)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(GEP->getContext()));
+    
+    Value *FirstFalseIdx = ConstantInt::get(Idx->getType(), FirstFalseElement);
+
+    // False for one element -> 'i != 47'.
+    if (SecondFalseElement == Undefined)
+      return new ICmpInst(ICmpInst::ICMP_NE, Idx, FirstFalseIdx);
+     
+    // False for two elements -> 'i != 47 & i != 72'.
+    Value *C1 = Builder->CreateICmpNE(Idx, FirstFalseIdx);
+    Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
+    Value *C2 = Builder->CreateICmpNE(Idx, SecondFalseIdx);
+    return BinaryOperator::CreateAnd(C1, C2);
+  }
+  
+  // If the comparison can be replaced with a range comparison for the elements
+  // where it is true, emit the range check.
+  if (TrueRangeEnd != Overdefined) {
+    assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
+    
+    // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
+    if (FirstTrueElement) {
+      Value *Offs = ConstantInt::get(Idx->getType(), -FirstTrueElement);
+      Idx = Builder->CreateAdd(Idx, Offs);
+    }
+    
+    Value *End = ConstantInt::get(Idx->getType(),
+                                  TrueRangeEnd-FirstTrueElement+1);
+    return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
+  }
+  
+  // False range check.
+  if (FalseRangeEnd != Overdefined) {
+    assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
+    // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
+    if (FirstFalseElement) {
+      Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
+      Idx = Builder->CreateAdd(Idx, Offs);
+    }
+    
+    Value *End = ConstantInt::get(Idx->getType(),
+                                  FalseRangeEnd-FirstFalseElement);
+    return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
+  }
+  
+  
+  // If a 32-bit or 64-bit magic bitvector captures the entire comparison state
+  // of this load, replace it with computation that does:
+  //   ((magic_cst >> i) & 1) != 0
+  if (Init->getNumOperands() <= 32 ||
+      (TD && Init->getNumOperands() <= 64 && TD->isLegalInteger(64))) {
+    const Type *Ty;
+    if (Init->getNumOperands() <= 32)
+      Ty = Type::getInt32Ty(Init->getContext());
+    else
+      Ty = Type::getInt64Ty(Init->getContext());
+    Value *V = Builder->CreateIntCast(Idx, Ty, false);
+    V = Builder->CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
+    V = Builder->CreateAnd(ConstantInt::get(Ty, 1), V);
+    return new ICmpInst(ICmpInst::ICMP_NE, V, ConstantInt::get(Ty, 0));
+  }
+  
+  return 0;
+}
+
+
+/// EvaluateGEPOffsetExpression - Return a value that can be used to compare
+/// the *offset* implied by a GEP to zero.  For example, if we have &A[i], we
+/// want to return 'i' for "icmp ne i, 0".  Note that, in general, indices can
+/// be complex, and scales are involved.  The above expression would also be
+/// legal to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32).
+/// This later form is less amenable to optimization though, and we are allowed
+/// to generate the first by knowing that pointer arithmetic doesn't overflow.
+///
+/// If we can't emit an optimized form for this expression, this returns null.
+/// 
+static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
+                                          InstCombiner &IC) {
+  TargetData &TD = *IC.getTargetData();
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  
+  // Check to see if this gep only has a single variable index.  If so, and if
+  // any constant indices are a multiple of its scale, then we can compute this
+  // in terms of the scale of the variable index.  For example, if the GEP
+  // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
+  // because the expression will cross zero at the same point.
+  unsigned i, e = GEP->getNumOperands();
+  int64_t Offset = 0;
+  for (i = 1; i != e; ++i, ++GTI) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+      // Compute the aggregate offset of constant indices.
+      if (CI->isZero()) continue;
+      
+      // Handle a struct index, which adds its field offset to the pointer.
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+      } else {
+        uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+        Offset += Size*CI->getSExtValue();
+      }
+    } else {
+      // Found our variable index.
+      break;
+    }
+  }
+  
+  // If there are no variable indices, we must have a constant offset, just
+  // evaluate it the general way.
+  if (i == e) return 0;
+  
+  Value *VariableIdx = GEP->getOperand(i);
+  // Determine the scale factor of the variable element.  For example, this is
+  // 4 if the variable index is into an array of i32.
+  uint64_t VariableScale = TD.getTypeAllocSize(GTI.getIndexedType());
+  
+  // Verify that there are no other variable indices.  If so, emit the hard way.
+  for (++i, ++GTI; i != e; ++i, ++GTI) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
+    if (!CI) return 0;
+    
+    // Compute the aggregate offset of constant indices.
+    if (CI->isZero()) continue;
+    
+    // Handle a struct index, which adds its field offset to the pointer.
+    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+      Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+    } else {
+      uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+      Offset += Size*CI->getSExtValue();
+    }
+  }
+  
+  // Okay, we know we have a single variable index, which must be a
+  // pointer/array/vector index.  If there is no offset, life is simple, return
+  // the index.
+  unsigned IntPtrWidth = TD.getPointerSizeInBits();
+  if (Offset == 0) {
+    // Cast to intptrty in case a truncation occurs.  If an extension is needed,
+    // we don't need to bother extending: the extension won't affect where the
+    // computation crosses zero.
+    if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth)
+      VariableIdx = new TruncInst(VariableIdx, 
+                                  TD.getIntPtrType(VariableIdx->getContext()),
+                                  VariableIdx->getName(), &I);
+    return VariableIdx;
+  }
+  
+  // Otherwise, there is an index.  The computation we will do will be modulo
+  // the pointer size, so get it.
+  uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+  
+  Offset &= PtrSizeMask;
+  VariableScale &= PtrSizeMask;
+  
+  // To do this transformation, any constant index must be a multiple of the
+  // variable scale factor.  For example, we can evaluate "12 + 4*i" as "3 + i",
+  // but we can't evaluate "10 + 3*i" in terms of i.  Check that the offset is a
+  // multiple of the variable scale.
+  int64_t NewOffs = Offset / (int64_t)VariableScale;
+  if (Offset != NewOffs*(int64_t)VariableScale)
+    return 0;
+  
+  // Okay, we can do this evaluation.  Start by converting the index to intptr.
+  const Type *IntPtrTy = TD.getIntPtrType(VariableIdx->getContext());
+  if (VariableIdx->getType() != IntPtrTy)
+    VariableIdx = CastInst::CreateIntegerCast(VariableIdx, IntPtrTy,
+                                              true /*SExt*/, 
+                                              VariableIdx->getName(), &I);
+  Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
+  return BinaryOperator::CreateAdd(VariableIdx, OffsetVal, "offset", &I);
+}
+
+/// FoldGEPICmp - Fold comparisons between a GEP instruction and something
+/// else.  At this point we know that the GEP is on the LHS of the comparison.
+Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+                                       ICmpInst::Predicate Cond,
+                                       Instruction &I) {
+  // Look through bitcasts.
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
+    RHS = BCI->getOperand(0);
+
+  Value *PtrBase = GEPLHS->getOperand(0);
+  if (TD && PtrBase == RHS && GEPLHS->isInBounds()) {
+    // ((gep Ptr, OFFSET) cmp Ptr)   ---> (OFFSET cmp 0).
+    // This transformation (ignoring the base and scales) is valid because we
+    // know pointers can't overflow since the gep is inbounds.  See if we can
+    // output an optimized form.
+    Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this);
+    
+    // If not, synthesize the offset the hard way.
+    if (Offset == 0)
+      Offset = EmitGEPOffset(GEPLHS);
+    return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
+                        Constant::getNullValue(Offset->getType()));
+  } else if (GEPOperator *GEPRHS = dyn_cast<GEPOperator>(RHS)) {
+    // If the base pointers are different, but the indices are the same, just
+    // compare the base pointer.
+    if (PtrBase != GEPRHS->getOperand(0)) {
+      bool IndicesTheSame = GEPLHS->getNumOperands()==GEPRHS->getNumOperands();
+      IndicesTheSame &= GEPLHS->getOperand(0)->getType() ==
+                        GEPRHS->getOperand(0)->getType();
+      if (IndicesTheSame)
+        for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
+          if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
+            IndicesTheSame = false;
+            break;
+          }
+
+      // If all indices are the same, just compare the base pointers.
+      if (IndicesTheSame)
+        return new ICmpInst(ICmpInst::getSignedPredicate(Cond),
+                            GEPLHS->getOperand(0), GEPRHS->getOperand(0));
+
+      // Otherwise, the base pointers are different and the indices are
+      // different, bail out.
+      return 0;
+    }
+
+    // If one of the GEPs has all zero indices, recurse.
+    bool AllZeros = true;
+    for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i)
+      if (!isa<Constant>(GEPLHS->getOperand(i)) ||
+          !cast<Constant>(GEPLHS->getOperand(i))->isNullValue()) {
+        AllZeros = false;
+        break;
+      }
+    if (AllZeros)
+      return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
+                          ICmpInst::getSwappedPredicate(Cond), I);
+
+    // If the other GEP has all zero indices, recurse.
+    AllZeros = true;
+    for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
+      if (!isa<Constant>(GEPRHS->getOperand(i)) ||
+          !cast<Constant>(GEPRHS->getOperand(i))->isNullValue()) {
+        AllZeros = false;
+        break;
+      }
+    if (AllZeros)
+      return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
+
+    if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
+      // If the GEPs only differ by one index, compare it.
+      unsigned NumDifferences = 0;  // Keep track of # differences.
+      unsigned DiffOperand = 0;     // The operand that differs.
+      for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i)
+        if (GEPLHS->getOperand(i) != GEPRHS->getOperand(i)) {
+          if (GEPLHS->getOperand(i)->getType()->getPrimitiveSizeInBits() !=
+                   GEPRHS->getOperand(i)->getType()->getPrimitiveSizeInBits()) {
+            // Irreconcilable differences.
+            NumDifferences = 2;
+            break;
+          } else {
+            if (NumDifferences++) break;
+            DiffOperand = i;
+          }
+        }
+
+      if (NumDifferences == 0)   // SAME GEP?
+        return ReplaceInstUsesWith(I, // No comparison is needed here.
+                               ConstantInt::get(Type::getInt1Ty(I.getContext()),
+                                             ICmpInst::isTrueWhenEqual(Cond)));
+
+      else if (NumDifferences == 1) {
+        Value *LHSV = GEPLHS->getOperand(DiffOperand);
+        Value *RHSV = GEPRHS->getOperand(DiffOperand);
+        // Make sure we do a signed comparison here.
+        return new ICmpInst(ICmpInst::getSignedPredicate(Cond), LHSV, RHSV);
+      }
+    }
+
+    // Only lower this if the icmp is the only user of the GEP or if we expect
+    // the result to fold to a constant!
+    if (TD &&
+        (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
+        (isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
+      // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2)  --->  (OFFSET1 cmp OFFSET2)
+      Value *L = EmitGEPOffset(GEPLHS);
+      Value *R = EmitGEPOffset(GEPRHS);
+      return new ICmpInst(ICmpInst::getSignedPredicate(Cond), L, R);
+    }
+  }
+  return 0;
+}
+
+/// FoldICmpAddOpCst - Fold "icmp pred (X+CI), X".
+Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI,
+                                            Value *X, ConstantInt *CI,
+                                            ICmpInst::Predicate Pred,
+                                            Value *TheAdd) {
+  // If we have X+0, exit early (simplifying logic below) and let it get folded
+  // elsewhere.   icmp X+0, X  -> icmp X, X
+  if (CI->isZero()) {
+    bool isTrue = ICmpInst::isTrueWhenEqual(Pred);
+    return ReplaceInstUsesWith(ICI, ConstantInt::get(ICI.getType(), isTrue));
+  }
+  
+  // (X+4) == X -> false.
+  if (Pred == ICmpInst::ICMP_EQ)
+    return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(X->getContext()));
+
+  // (X+4) != X -> true.
+  if (Pred == ICmpInst::ICMP_NE)
+    return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext()));
+
+  // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
+  // so the values can never be equal.  Similiarly for all other "or equals"
+  // operators.
+  
+  // (X+1) <u X        --> X >u (MAXUINT-1)        --> X == 255
+  // (X+2) <u X        --> X >u (MAXUINT-2)        --> X > 253
+  // (X+MAXUINT) <u X  --> X >u (MAXUINT-MAXUINT)  --> X != 0
+  if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE) {
+    Value *R = 
+      ConstantExpr::getSub(ConstantInt::getAllOnesValue(CI->getType()), CI);
+    return new ICmpInst(ICmpInst::ICMP_UGT, X, R);
+  }
+  
+  // (X+1) >u X        --> X <u (0-1)        --> X != 255
+  // (X+2) >u X        --> X <u (0-2)        --> X <u 254
+  // (X+MAXUINT) >u X  --> X <u (0-MAXUINT)  --> X <u 1  --> X == 0
+  if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE)
+    return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantExpr::getNeg(CI));
+  
+  unsigned BitWidth = CI->getType()->getPrimitiveSizeInBits();
+  ConstantInt *SMax = ConstantInt::get(X->getContext(),
+                                       APInt::getSignedMaxValue(BitWidth));
+
+  // (X+ 1) <s X       --> X >s (MAXSINT-1)          --> X == 127
+  // (X+ 2) <s X       --> X >s (MAXSINT-2)          --> X >s 125
+  // (X+MAXSINT) <s X  --> X >s (MAXSINT-MAXSINT)    --> X >s 0
+  // (X+MINSINT) <s X  --> X >s (MAXSINT-MINSINT)    --> X >s -1
+  // (X+ -2) <s X      --> X >s (MAXSINT- -2)        --> X >s 126
+  // (X+ -1) <s X      --> X >s (MAXSINT- -1)        --> X != 127
+  if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
+    return new ICmpInst(ICmpInst::ICMP_SGT, X, ConstantExpr::getSub(SMax, CI));
+  
+  // (X+ 1) >s X       --> X <s (MAXSINT-(1-1))       --> X != 127
+  // (X+ 2) >s X       --> X <s (MAXSINT-(2-1))       --> X <s 126
+  // (X+MAXSINT) >s X  --> X <s (MAXSINT-(MAXSINT-1)) --> X <s 1
+  // (X+MINSINT) >s X  --> X <s (MAXSINT-(MINSINT-1)) --> X <s -2
+  // (X+ -2) >s X      --> X <s (MAXSINT-(-2-1))      --> X <s -126
+  // (X+ -1) >s X      --> X <s (MAXSINT-(-1-1))      --> X == -128
+  
+  assert(Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE);
+  Constant *C = ConstantInt::get(X->getContext(), CI->getValue()-1);
+  return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
+}
+
+/// FoldICmpDivCst - Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS
+/// and CmpRHS are both known to be integer constants.
+Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
+                                          ConstantInt *DivRHS) {
+  ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
+  const APInt &CmpRHSV = CmpRHS->getValue();
+  
+  // FIXME: If the operand types don't match the type of the divide 
+  // then don't attempt this transform. The code below doesn't have the
+  // logic to deal with a signed divide and an unsigned compare (and
+  // vice versa). This is because (x /s C1) <s C2  produces different 
+  // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
+  // (x /u C1) <u C2.  Simply casting the operands and result won't 
+  // work. :(  The if statement below tests that condition and bails 
+  // if it finds it.
+  bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
+  if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
+    return 0;
+  if (DivRHS->isZero())
+    return 0; // The ProdOV computation fails on divide by zero.
+  if (DivIsSigned && DivRHS->isAllOnesValue())
+    return 0; // The overflow computation also screws up here
+  if (DivRHS->isOne()) {
+    // This eliminates some funny cases with INT_MIN.
+    ICI.setOperand(0, DivI->getOperand(0));   // X/1 == X.
+    return &ICI;
+  }
+
+  // Compute Prod = CI * DivRHS. We are essentially solving an equation
+  // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and 
+  // C2 (CI). By solving for X we can turn this into a range check 
+  // instead of computing a divide. 
+  Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
+
+  // Determine if the product overflows by seeing if the product is
+  // not equal to the divide. Make sure we do the same kind of divide
+  // as in the LHS instruction that we're folding. 
+  bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
+                 ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
+
+  // Get the ICmp opcode
+  ICmpInst::Predicate Pred = ICI.getPredicate();
+
+  /// If the division is known to be exact, then there is no remainder from the
+  /// divide, so the covered range size is unit, otherwise it is the divisor.
+  ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS;
+  
+  // Figure out the interval that is being checked.  For example, a comparison
+  // like "X /u 5 == 0" is really checking that X is in the interval [0, 5). 
+  // Compute this interval based on the constants involved and the signedness of
+  // the compare/divide.  This computes a half-open interval, keeping track of
+  // whether either value in the interval overflows.  After analysis each
+  // overflow variable is set to 0 if it's corresponding bound variable is valid
+  // -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
+  int LoOverflow = 0, HiOverflow = 0;
+  Constant *LoBound = 0, *HiBound = 0;
+
+  if (!DivIsSigned) {  // udiv
+    // e.g. X/5 op 3  --> [15, 20)
+    LoBound = Prod;
+    HiOverflow = LoOverflow = ProdOV;
+    if (!HiOverflow) {
+      // If this is not an exact divide, then many values in the range collapse
+      // to the same result value.
+      HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
+    }
+    
+  } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
+    if (CmpRHSV == 0) {       // (X / pos) op 0
+      // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
+      LoBound = ConstantExpr::getNeg(SubOne(RangeSize));
+      HiBound = RangeSize;
+    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / pos) op pos
+      LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
+      HiOverflow = LoOverflow = ProdOV;
+      if (!HiOverflow)
+        HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true);
+    } else {                       // (X / pos) op neg
+      // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
+      HiBound = AddOne(Prod);
+      LoOverflow = HiOverflow = ProdOV ? -1 : 0;
+      if (!LoOverflow) {
+        ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+        LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
+      }
+    }
+  } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.
+    if (DivI->isExact())
+      RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+    if (CmpRHSV == 0) {       // (X / neg) op 0
+      // e.g. X/-5 op 0  --> [-4, 5)
+      LoBound = AddOne(RangeSize);
+      HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+      if (HiBound == DivRHS) {     // -INTMIN = INTMIN
+        HiOverflow = 1;            // [INTMIN+1, overflow)
+        HiBound = 0;               // e.g. X/INTMIN = 0 --> X > INTMIN
+      }
+    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / neg) op pos
+      // e.g. X/-5 op 3  --> [-19, -14)
+      HiBound = AddOne(Prod);
+      HiOverflow = LoOverflow = ProdOV ? -1 : 0;
+      if (!LoOverflow)
+        LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
+    } else {                       // (X / neg) op neg
+      LoBound = Prod;       // e.g. X/-5 op -3  --> [15, 20)
+      LoOverflow = HiOverflow = ProdOV;
+      if (!HiOverflow)
+        HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true);
+    }
+    
+    // Dividing by a negative swaps the condition.  LT <-> GT
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+
+  Value *X = DivI->getOperand(0);
+  switch (Pred) {
+  default: llvm_unreachable("Unhandled icmp opcode!");
+  case ICmpInst::ICMP_EQ:
+    if (LoOverflow && HiOverflow)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+    if (HiOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+                          ICmpInst::ICMP_UGE, X, LoBound);
+    if (LoOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+                          ICmpInst::ICMP_ULT, X, HiBound);
+    return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+                                                    DivIsSigned, true));
+  case ICmpInst::ICMP_NE:
+    if (LoOverflow && HiOverflow)
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+    if (HiOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+                          ICmpInst::ICMP_ULT, X, LoBound);
+    if (LoOverflow)
+      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+                          ICmpInst::ICMP_UGE, X, HiBound);
+    return ReplaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
+                                                    DivIsSigned, false));
+  case ICmpInst::ICMP_ULT:
+  case ICmpInst::ICMP_SLT:
+    if (LoOverflow == +1)   // Low bound is greater than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+    if (LoOverflow == -1)   // Low bound is less than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+    return new ICmpInst(Pred, X, LoBound);
+  case ICmpInst::ICMP_UGT:
+  case ICmpInst::ICMP_SGT:
+    if (HiOverflow == +1)       // High bound greater than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getFalse(ICI.getContext()));
+    if (HiOverflow == -1)       // High bound less than input range.
+      return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(ICI.getContext()));
+    if (Pred == ICmpInst::ICMP_UGT)
+      return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
+    return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+  }
+}
+
+/// FoldICmpShrCst - Handle "icmp(([al]shr X, cst1), cst2)".
+Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
+                                          ConstantInt *ShAmt) {
+  const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue();
+  
+  // Check that the shift amount is in range.  If not, don't perform
+  // undefined shifts.  When the shift is visited it will be
+  // simplified.
+  uint32_t TypeBits = CmpRHSV.getBitWidth();
+  uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+  if (ShAmtVal >= TypeBits || ShAmtVal == 0)
+    return 0;
+  
+  if (!ICI.isEquality()) {
+    // If we have an unsigned comparison and an ashr, we can't simplify this.
+    // Similarly for signed comparisons with lshr.
+    if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
+      return 0;
+    
+    // Otherwise, all lshr and all exact ashr's are equivalent to a udiv/sdiv by
+    // a power of 2.  Since we already have logic to simplify these, transform
+    // to div and then simplify the resultant comparison.
+    if (Shr->getOpcode() == Instruction::AShr &&
+        !Shr->isExact())
+      return 0;
+    
+    // Revisit the shift (to delete it).
+    Worklist.Add(Shr);
+    
+    Constant *DivCst =
+      ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
+    
+    Value *Tmp =
+      Shr->getOpcode() == Instruction::AShr ?
+      Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) :
+      Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact());
+    
+    ICI.setOperand(0, Tmp);
+    
+    // If the builder folded the binop, just return it.
+    BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
+    if (TheDiv == 0)
+      return &ICI;
+    
+    // Otherwise, fold this div/compare.
+    assert(TheDiv->getOpcode() == Instruction::SDiv ||
+           TheDiv->getOpcode() == Instruction::UDiv);
+    
+    Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst));
+    assert(Res && "This div/cst should have folded!");
+    return Res;
+  }
+  
+  
+  // If we are comparing against bits always shifted out, the
+  // comparison cannot succeed.
+  APInt Comp = CmpRHSV << ShAmtVal;
+  ConstantInt *ShiftedCmpRHS = ConstantInt::get(ICI.getContext(), Comp);
+  if (Shr->getOpcode() == Instruction::LShr)
+    Comp = Comp.lshr(ShAmtVal);
+  else
+    Comp = Comp.ashr(ShAmtVal);
+  
+  if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
+    bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+    Constant *Cst = ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+                                     IsICMP_NE);
+    return ReplaceInstUsesWith(ICI, Cst);
+  }
+  
+  // Otherwise, check to see if the bits shifted out are known to be zero.
+  // If so, we can compare against the unshifted value:
+  //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
+  if (Shr->hasOneUse() && Shr->isExact())
+    return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS);
+  
+  if (Shr->hasOneUse()) {
+    // Otherwise strength reduce the shift into an and.
+    APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+    Constant *Mask = ConstantInt::get(ICI.getContext(), Val);
+    
+    Value *And = Builder->CreateAnd(Shr->getOperand(0),
+                                    Mask, Shr->getName()+".mask");
+    return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
+  }
+  return 0;
+}
+
+
+/// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)".
+///
+Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
+                                                          Instruction *LHSI,
+                                                          ConstantInt *RHS) {
+  const APInt &RHSV = RHS->getValue();
+  
+  switch (LHSI->getOpcode()) {
+  case Instruction::Trunc:
+    if (ICI.isEquality() && LHSI->hasOneUse()) {
+      // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
+      // of the high bits truncated out of x are known.
+      unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
+             SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+      APInt Mask(APInt::getHighBitsSet(SrcBits, SrcBits-DstBits));
+      APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
+      ComputeMaskedBits(LHSI->getOperand(0), Mask, KnownZero, KnownOne);
+      
+      // If all the high bits are known, we can do this xform.
+      if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
+        // Pull in the high bits from known-ones set.
+        APInt NewRHS = RHS->getValue().zext(SrcBits);
+        NewRHS |= KnownOne;
+        return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+                            ConstantInt::get(ICI.getContext(), NewRHS));
+      }
+    }
+    break;
+      
+  case Instruction::Xor:         // (icmp pred (xor X, XorCST), CI)
+    if (ConstantInt *XorCST = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+      // If this is a comparison that tests the signbit (X < 0) or (x > -1),
+      // fold the xor.
+      if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) ||
+          (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) {
+        Value *CompareVal = LHSI->getOperand(0);
+        
+        // If the sign bit of the XorCST is not set, there is no change to
+        // the operation, just stop using the Xor.
+        if (!XorCST->getValue().isNegative()) {
+          ICI.setOperand(0, CompareVal);
+          Worklist.Add(LHSI);
+          return &ICI;
+        }
+        
+        // Was the old condition true if the operand is positive?
+        bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT;
+        
+        // If so, the new one isn't.
+        isTrueIfPositive ^= true;
+        
+        if (isTrueIfPositive)
+          return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal,
+                              SubOne(RHS));
+        else
+          return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal,
+                              AddOne(RHS));
+      }
+
+      if (LHSI->hasOneUse()) {
+        // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit))
+        if (!ICI.isEquality() && XorCST->getValue().isSignBit()) {
+          const APInt &SignBit = XorCST->getValue();
+          ICmpInst::Predicate Pred = ICI.isSigned()
+                                         ? ICI.getUnsignedPredicate()
+                                         : ICI.getSignedPredicate();
+          return new ICmpInst(Pred, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),
+                                               RHSV ^ SignBit));
+        }
+
+        // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
+        if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) {
+          const APInt &NotSignBit = XorCST->getValue();
+          ICmpInst::Predicate Pred = ICI.isSigned()
+                                         ? ICI.getUnsignedPredicate()
+                                         : ICI.getSignedPredicate();
+          Pred = ICI.getSwappedPredicate(Pred);
+          return new ICmpInst(Pred, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),
+                                               RHSV ^ NotSignBit));
+        }
+      }
+    }
+    break;
+  case Instruction::And:         // (icmp pred (and X, AndCST), RHS)
+    if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) &&
+        LHSI->getOperand(0)->hasOneUse()) {
+      ConstantInt *AndCST = cast<ConstantInt>(LHSI->getOperand(1));
+      
+      // If the LHS is an AND of a truncating cast, we can widen the
+      // and/compare to be the input width without changing the value
+      // produced, eliminating a cast.
+      if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) {
+        // We can do this transformation if either the AND constant does not
+        // have its sign bit set or if it is an equality comparison. 
+        // Extending a relational comparison when we're checking the sign
+        // bit would not work.
+        if (Cast->hasOneUse() &&
+            (ICI.isEquality() ||
+             (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) {
+          uint32_t BitWidth = 
+            cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth();
+          APInt NewCST = AndCST->getValue().zext(BitWidth);
+          APInt NewCI = RHSV.zext(BitWidth);
+          Value *NewAnd = 
+            Builder->CreateAnd(Cast->getOperand(0),
+                           ConstantInt::get(ICI.getContext(), NewCST),
+                               LHSI->getName());
+          return new ICmpInst(ICI.getPredicate(), NewAnd,
+                              ConstantInt::get(ICI.getContext(), NewCI));
+        }
+      }
+      
+      // If this is: (X >> C1) & C2 != C3 (where any shift and any compare
+      // could exist), turn it into (X & (C2 << C1)) != (C3 << C1).  This
+      // happens a LOT in code produced by the C front-end, for bitfield
+      // access.
+      BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
+      if (Shift && !Shift->isShift())
+        Shift = 0;
+      
+      ConstantInt *ShAmt;
+      ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : 0;
+      const Type *Ty = Shift ? Shift->getType() : 0;  // Type of the shift.
+      const Type *AndTy = AndCST->getType();          // Type of the and.
+      
+      // We can fold this as long as we can't shift unknown bits
+      // into the mask.  This can only happen with signed shift
+      // rights, as they sign-extend.
+      if (ShAmt) {
+        bool CanFold = Shift->isLogicalShift();
+        if (!CanFold) {
+          // To test for the bad case of the signed shr, see if any
+          // of the bits shifted in could be tested after the mask.
+          uint32_t TyBits = Ty->getPrimitiveSizeInBits();
+          int ShAmtVal = TyBits - ShAmt->getLimitedValue(TyBits);
+          
+          uint32_t BitWidth = AndTy->getPrimitiveSizeInBits();
+          if ((APInt::getHighBitsSet(BitWidth, BitWidth-ShAmtVal) & 
+               AndCST->getValue()) == 0)
+            CanFold = true;
+        }
+        
+        if (CanFold) {
+          Constant *NewCst;
+          if (Shift->getOpcode() == Instruction::Shl)
+            NewCst = ConstantExpr::getLShr(RHS, ShAmt);
+          else
+            NewCst = ConstantExpr::getShl(RHS, ShAmt);
+          
+          // Check to see if we are shifting out any of the bits being
+          // compared.
+          if (ConstantExpr::get(Shift->getOpcode(),
+                                       NewCst, ShAmt) != RHS) {
+            // If we shifted bits out, the fold is not going to work out.
+            // As a special case, check to see if this means that the
+            // result is always true or false now.
+            if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+              return ReplaceInstUsesWith(ICI,
+                                       ConstantInt::getFalse(ICI.getContext()));
+            if (ICI.getPredicate() == ICmpInst::ICMP_NE)
+              return ReplaceInstUsesWith(ICI,
+                                       ConstantInt::getTrue(ICI.getContext()));
+          } else {
+            ICI.setOperand(1, NewCst);
+            Constant *NewAndCST;
+            if (Shift->getOpcode() == Instruction::Shl)
+              NewAndCST = ConstantExpr::getLShr(AndCST, ShAmt);
+            else
+              NewAndCST = ConstantExpr::getShl(AndCST, ShAmt);
+            LHSI->setOperand(1, NewAndCST);
+            LHSI->setOperand(0, Shift->getOperand(0));
+            Worklist.Add(Shift); // Shift is dead.
+            return &ICI;
+          }
+        }
+      }
+      
+      // Turn ((X >> Y) & C) == 0  into  (X & (C << Y)) == 0.  The later is
+      // preferable because it allows the C<<Y expression to be hoisted out
+      // of a loop if Y is invariant and X is not.
+      if (Shift && Shift->hasOneUse() && RHSV == 0 &&
+          ICI.isEquality() && !Shift->isArithmeticShift() &&
+          !isa<Constant>(Shift->getOperand(0))) {
+        // Compute C << Y.
+        Value *NS;
+        if (Shift->getOpcode() == Instruction::LShr) {
+          NS = Builder->CreateShl(AndCST, Shift->getOperand(1), "tmp");
+        } else {
+          // Insert a logical shift.
+          NS = Builder->CreateLShr(AndCST, Shift->getOperand(1), "tmp");
+        }
+        
+        // Compute X & (C << Y).
+        Value *NewAnd = 
+          Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
+        
+        ICI.setOperand(0, NewAnd);
+        return &ICI;
+      }
+    }
+      
+    // Try to optimize things like "A[i]&42 == 0" to index computations.
+    if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) {
+      if (GetElementPtrInst *GEP =
+          dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
+        if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+          if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+              !LI->isVolatile() && isa<ConstantInt>(LHSI->getOperand(1))) {
+            ConstantInt *C = cast<ConstantInt>(LHSI->getOperand(1));
+            if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C))
+              return Res;
+          }
+    }
+    break;
+
+  case Instruction::Or: {
+    if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse())
+      break;
+    Value *P, *Q;
+    if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
+      // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
+      // -> and (icmp eq P, null), (icmp eq Q, null).
+      Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P,
+                                        Constant::getNullValue(P->getType()));
+      Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q,
+                                        Constant::getNullValue(Q->getType()));
+      Instruction *Op;
+      if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
+        Op = BinaryOperator::CreateAnd(ICIP, ICIQ);
+      else
+        Op = BinaryOperator::CreateOr(ICIP, ICIQ);
+      return Op;
+    }
+    break;
+  }
+    
+  case Instruction::Shl: {       // (icmp pred (shl X, ShAmt), CI)
+    ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+    if (!ShAmt) break;
+    
+    uint32_t TypeBits = RHSV.getBitWidth();
+    
+    // Check that the shift amount is in range.  If not, don't perform
+    // undefined shifts.  When the shift is visited it will be
+    // simplified.
+    if (ShAmt->uge(TypeBits))
+      break;
+    
+    if (ICI.isEquality()) {
+      // If we are comparing against bits always shifted out, the
+      // comparison cannot succeed.
+      Constant *Comp =
+        ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt),
+                                                                 ShAmt);
+      if (Comp != RHS) {// Comparing against a bit that we know is zero.
+        bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+        Constant *Cst =
+          ConstantInt::get(Type::getInt1Ty(ICI.getContext()), IsICMP_NE);
+        return ReplaceInstUsesWith(ICI, Cst);
+      }
+      
+      // If the shift is NUW, then it is just shifting out zeros, no need for an
+      // AND.
+      if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap())
+        return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
+                            ConstantExpr::getLShr(RHS, ShAmt));
+      
+      if (LHSI->hasOneUse()) {
+        // Otherwise strength reduce the shift into an and.
+        uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
+        Constant *Mask =
+          ConstantInt::get(ICI.getContext(), APInt::getLowBitsSet(TypeBits, 
+                                                       TypeBits-ShAmtVal));
+        
+        Value *And =
+          Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
+        return new ICmpInst(ICI.getPredicate(), And,
+                            ConstantExpr::getLShr(RHS, ShAmt));
+      }
+    }
+    
+    // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
+    bool TrueIfSigned = false;
+    if (LHSI->hasOneUse() &&
+        isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
+      // (X << 31) <s 0  --> (X&1) != 0
+      Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(),
+                                        APInt::getOneBitSet(TypeBits, 
+                                            TypeBits-ShAmt->getZExtValue()-1));
+      Value *And =
+        Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
+      return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
+                          And, Constant::getNullValue(And->getType()));
+    }
+    break;
+  }
+    
+  case Instruction::LShr:         // (icmp pred (shr X, ShAmt), CI)
+  case Instruction::AShr: {
+    // Handle equality comparisons of shift-by-constant.
+    BinaryOperator *BO = cast<BinaryOperator>(LHSI);
+    if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
+      if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt))
+        return Res;
+    }
+
+    // Handle exact shr's.
+    if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) {
+      if (RHSV.isMinValue())
+        return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS);
+    }
+    break;
+  }
+    
+  case Instruction::SDiv:
+  case Instruction::UDiv:
+    // Fold: icmp pred ([us]div X, C1), C2 -> range test
+    // Fold this div into the comparison, producing a range check. 
+    // Determine, based on the divide type, what the range is being 
+    // checked.  If there is an overflow on the low or high side, remember 
+    // it, otherwise compute the range [low, hi) bounding the new value.
+    // See: InsertRangeTest above for the kinds of replacements possible.
+    if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
+      if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI),
+                                          DivRHS))
+        return R;
+    break;
+
+  case Instruction::Add:
+    // Fold: icmp pred (add X, C1), C2
+    if (!ICI.isEquality()) {
+      ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1));
+      if (!LHSC) break;
+      const APInt &LHSV = LHSC->getValue();
+
+      ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV)
+                            .subtract(LHSV);
+
+      if (ICI.isSigned()) {
+        if (CR.getLower().isSignBit()) {
+          return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),CR.getUpper()));
+        } else if (CR.getUpper().isSignBit()) {
+          return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),CR.getLower()));
+        }
+      } else {
+        if (CR.getLower().isMinValue()) {
+          return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),CR.getUpper()));
+        } else if (CR.getUpper().isMinValue()) {
+          return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
+                              ConstantInt::get(ICI.getContext(),CR.getLower()));
+        }
+      }
+    }
+    break;
+  }
+  
+  // Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
+  if (ICI.isEquality()) {
+    bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
+    
+    // If the first operand is (add|sub|and|or|xor|rem) with a constant, and 
+    // the second operand is a constant, simplify a bit.
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) {
+      switch (BO->getOpcode()) {
+      case Instruction::SRem:
+        // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
+        if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){
+          const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
+          if (V.sgt(1) && V.isPowerOf2()) {
+            Value *NewRem =
+              Builder->CreateURem(BO->getOperand(0), BO->getOperand(1),
+                                  BO->getName());
+            return new ICmpInst(ICI.getPredicate(), NewRem,
+                                Constant::getNullValue(BO->getType()));
+          }
+        }
+        break;
+      case Instruction::Add:
+        // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
+        if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+          if (BO->hasOneUse())
+            return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+                                ConstantExpr::getSub(RHS, BOp1C));
+        } else if (RHSV == 0) {
+          // Replace ((add A, B) != 0) with (A != -B) if A or B is
+          // efficiently invertible, or if the add has just this one use.
+          Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
+          
+          if (Value *NegVal = dyn_castNegVal(BOp1))
+            return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
+          else if (Value *NegVal = dyn_castNegVal(BOp0))
+            return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
+          else if (BO->hasOneUse()) {
+            Value *Neg = Builder->CreateNeg(BOp1);
+            Neg->takeName(BO);
+            return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
+          }
+        }
+        break;
+      case Instruction::Xor:
+        // For the xor case, we can xor two constants together, eliminating
+        // the explicit xor.
+        if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1)))
+          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 
+                              ConstantExpr::getXor(RHS, BOC));
+        
+        // FALLTHROUGH
+      case Instruction::Sub:
+        // Replace (([sub|xor] A, B) != 0) with (A != B)
+        if (RHSV == 0)
+          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+                              BO->getOperand(1));
+        break;
+        
+      case Instruction::Or:
+        // If bits are being or'd in that are not present in the constant we
+        // are comparing against, then the comparison could never succeed!
+        if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+          Constant *NotCI = ConstantExpr::getNot(RHS);
+          if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
+            return ReplaceInstUsesWith(ICI,
+                             ConstantInt::get(Type::getInt1Ty(ICI.getContext()), 
+                                       isICMP_NE));
+        }
+        break;
+        
+      case Instruction::And:
+        if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+          // If bits are being compared against that are and'd out, then the
+          // comparison can never succeed!
+          if ((RHSV & ~BOC->getValue()) != 0)
+            return ReplaceInstUsesWith(ICI,
+                             ConstantInt::get(Type::getInt1Ty(ICI.getContext()),
+                                       isICMP_NE));
+          
+          // If we have ((X & C) == C), turn it into ((X & C) != 0).
+          if (RHS == BOC && RHSV.isPowerOf2())
+            return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
+                                ICmpInst::ICMP_NE, LHSI,
+                                Constant::getNullValue(RHS->getType()));
+          
+          // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
+          if (BOC->getValue().isSignBit()) {
+            Value *X = BO->getOperand(0);
+            Constant *Zero = Constant::getNullValue(X->getType());
+            ICmpInst::Predicate pred = isICMP_NE ? 
+              ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
+            return new ICmpInst(pred, X, Zero);
+          }
+          
+          // ((X & ~7) == 0) --> X < 8
+          if (RHSV == 0 && isHighOnes(BOC)) {
+            Value *X = BO->getOperand(0);
+            Constant *NegX = ConstantExpr::getNeg(BOC);
+            ICmpInst::Predicate pred = isICMP_NE ? 
+              ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
+            return new ICmpInst(pred, X, NegX);
+          }
+        }
+      default: break;
+      }
+    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
+      // Handle icmp {eq|ne} <intrinsic>, intcst.
+      switch (II->getIntrinsicID()) {
+      case Intrinsic::bswap:
+        Worklist.Add(II);
+        ICI.setOperand(0, II->getArgOperand(0));
+        ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap()));
+        return &ICI;
+      case Intrinsic::ctlz:
+      case Intrinsic::cttz:
+        // ctz(A) == bitwidth(a)  ->  A == 0 and likewise for !=
+        if (RHSV == RHS->getType()->getBitWidth()) {
+          Worklist.Add(II);
+          ICI.setOperand(0, II->getArgOperand(0));
+          ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0));
+          return &ICI;
+        }
+        break;
+      case Intrinsic::ctpop:
+        // popcount(A) == 0  ->  A == 0 and likewise for !=
+        if (RHS->isZero()) {
+          Worklist.Add(II);
+          ICI.setOperand(0, II->getArgOperand(0));
+          ICI.setOperand(1, RHS);
+          return &ICI;
+        }
+        break;
+      default:
+        break;
+      }
+    }
+  }
+  return 0;
+}
+
+/// visitICmpInstWithCastAndCast - Handle icmp (cast x to y), (cast/cst).
+/// We only handle extending casts so far.
+///
+Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
+  const CastInst *LHSCI = cast<CastInst>(ICI.getOperand(0));
+  Value *LHSCIOp        = LHSCI->getOperand(0);
+  const Type *SrcTy     = LHSCIOp->getType();
+  const Type *DestTy    = LHSCI->getType();
+  Value *RHSCIOp;
+
+  // Turn icmp (ptrtoint x), (ptrtoint/c) into a compare of the input if the 
+  // integer type is the same size as the pointer type.
+  if (TD && LHSCI->getOpcode() == Instruction::PtrToInt &&
+      TD->getPointerSizeInBits() ==
+         cast<IntegerType>(DestTy)->getBitWidth()) {
+    Value *RHSOp = 0;
+    if (Constant *RHSC = dyn_cast<Constant>(ICI.getOperand(1))) {
+      RHSOp = ConstantExpr::getIntToPtr(RHSC, SrcTy);
+    } else if (PtrToIntInst *RHSC = dyn_cast<PtrToIntInst>(ICI.getOperand(1))) {
+      RHSOp = RHSC->getOperand(0);
+      // If the pointer types don't match, insert a bitcast.
+      if (LHSCIOp->getType() != RHSOp->getType())
+        RHSOp = Builder->CreateBitCast(RHSOp, LHSCIOp->getType());
+    }
+
+    if (RHSOp)
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSOp);
+  }
+  
+  // The code below only handles extension cast instructions, so far.
+  // Enforce this.
+  if (LHSCI->getOpcode() != Instruction::ZExt &&
+      LHSCI->getOpcode() != Instruction::SExt)
+    return 0;
+
+  bool isSignedExt = LHSCI->getOpcode() == Instruction::SExt;
+  bool isSignedCmp = ICI.isSigned();
+
+  if (CastInst *CI = dyn_cast<CastInst>(ICI.getOperand(1))) {
+    // Not an extension from the same type?
+    RHSCIOp = CI->getOperand(0);
+    if (RHSCIOp->getType() != LHSCIOp->getType()) 
+      return 0;
+    
+    // If the signedness of the two casts doesn't agree (i.e. one is a sext
+    // and the other is a zext), then we can't handle this.
+    if (CI->getOpcode() != LHSCI->getOpcode())
+      return 0;
+
+    // Deal with equality cases early.
+    if (ICI.isEquality())
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+    // A signed comparison of sign extended values simplifies into a
+    // signed comparison.
+    if (isSignedCmp && isSignedExt)
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, RHSCIOp);
+
+    // The other three cases all fold into an unsigned comparison.
+    return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, RHSCIOp);
+  }
+
+  // If we aren't dealing with a constant on the RHS, exit early
+  ConstantInt *CI = dyn_cast<ConstantInt>(ICI.getOperand(1));
+  if (!CI)
+    return 0;
+
+  // Compute the constant that would happen if we truncated to SrcTy then
+  // reextended to DestTy.
+  Constant *Res1 = ConstantExpr::getTrunc(CI, SrcTy);
+  Constant *Res2 = ConstantExpr::getCast(LHSCI->getOpcode(),
+                                                Res1, DestTy);
+
+  // If the re-extended constant didn't change...
+  if (Res2 == CI) {
+    // Deal with equality cases early.
+    if (ICI.isEquality())
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
+
+    // A signed comparison of sign extended values simplifies into a
+    // signed comparison.
+    if (isSignedExt && isSignedCmp)
+      return new ICmpInst(ICI.getPredicate(), LHSCIOp, Res1);
+
+    // The other three cases all fold into an unsigned comparison.
+    return new ICmpInst(ICI.getUnsignedPredicate(), LHSCIOp, Res1);
+  }
+
+  // The re-extended constant changed so the constant cannot be represented 
+  // in the shorter type. Consequently, we cannot emit a simple comparison.
+  // All the cases that fold to true or false will have already been handled
+  // by SimplifyICmpInst, so only deal with the tricky case.
+
+  if (isSignedCmp || !isSignedExt)
+    return 0;
+
+  // Evaluate the comparison for LT (we invert for GT below). LE and GE cases
+  // should have been folded away previously and not enter in here.
+
+  // We're performing an unsigned comp with a sign extended value.
+  // This is true if the input is >= 0. [aka >s -1]
+  Constant *NegOne = Constant::getAllOnesValue(SrcTy);
+  Value *Result = Builder->CreateICmpSGT(LHSCIOp, NegOne, ICI.getName());
+
+  // Finally, return the value computed.
+  if (ICI.getPredicate() == ICmpInst::ICMP_ULT)
+    return ReplaceInstUsesWith(ICI, Result);
+
+  assert(ICI.getPredicate() == ICmpInst::ICMP_UGT && "ICmp should be folded!");
+  return BinaryOperator::CreateNot(Result);
+}
+
+/// ProcessUGT_ADDCST_ADD - The caller has matched a pattern of the form:
+///   I = icmp ugt (add (add A, B), CI2), CI1
+/// If this is of the form:
+///   sum = a + b
+///   if (sum+128 >u 255)
+/// Then replace it with llvm.sadd.with.overflow.i8.
+///
+static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
+                                          ConstantInt *CI2, ConstantInt *CI1,
+                                          InstCombiner &IC) {
+  // The transformation we're trying to do here is to transform this into an
+  // llvm.sadd.with.overflow.  To do this, we have to replace the original add
+  // with a narrower add, and discard the add-with-constant that is part of the
+  // range check (if we can't eliminate it, this isn't profitable).
+  
+  // In order to eliminate the add-with-constant, the compare can be its only
+  // use.
+  Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
+  if (!AddWithCst->hasOneUse()) return 0;
+  
+  // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
+  if (!CI2->getValue().isPowerOf2()) return 0;
+  unsigned NewWidth = CI2->getValue().countTrailingZeros();
+  if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return 0;
+    
+  // The width of the new add formed is 1 more than the bias.
+  ++NewWidth;
+  
+  // Check to see that CI1 is an all-ones value with NewWidth bits.
+  if (CI1->getBitWidth() == NewWidth ||
+      CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
+    return 0;
+  
+  // In order to replace the original add with a narrower 
+  // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
+  // and truncates that discard the high bits of the add.  Verify that this is
+  // the case.
+  Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
+  for (Value::use_iterator UI = OrigAdd->use_begin(), E = OrigAdd->use_end();
+       UI != E; ++UI) {
+    if (*UI == AddWithCst) continue;
+    
+    // Only accept truncates for now.  We would really like a nice recursive
+    // predicate like SimplifyDemandedBits, but which goes downwards the use-def
+    // chain to see which bits of a value are actually demanded.  If the
+    // original add had another add which was then immediately truncated, we
+    // could still do the transformation.
+    TruncInst *TI = dyn_cast<TruncInst>(*UI);
+    if (TI == 0 ||
+        TI->getType()->getPrimitiveSizeInBits() > NewWidth) return 0;
+  }
+  
+  // If the pattern matches, truncate the inputs to the narrower type and
+  // use the sadd_with_overflow intrinsic to efficiently compute both the
+  // result and the overflow bit.
+  Module *M = I.getParent()->getParent()->getParent();
+  
+  const Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow,
+                                       &NewType, 1);
+
+  InstCombiner::BuilderTy *Builder = IC.Builder;
+  
+  // Put the new code above the original add, in case there are any uses of the
+  // add between the add and the compare.
+  Builder->SetInsertPoint(OrigAdd);
+  
+  Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc");
+  Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc");
+  CallInst *Call = Builder->CreateCall2(F, TruncA, TruncB, "sadd");
+  Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
+  Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
+  
+  // The inner add was the result of the narrow add, zero extended to the
+  // wider type.  Replace it with the result computed by the intrinsic.
+  IC.ReplaceInstUsesWith(*OrigAdd, ZExt);
+  
+  // The original icmp gets replaced with the overflow value.
+  return ExtractValueInst::Create(Call, 1, "sadd.overflow");
+}
+
+static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
+                                     InstCombiner &IC) {
+  // Don't bother doing this transformation for pointers, don't do it for
+  // vectors.
+  if (!isa<IntegerType>(OrigAddV->getType())) return 0;
+  
+  // If the add is a constant expr, then we don't bother transforming it.
+  Instruction *OrigAdd = dyn_cast<Instruction>(OrigAddV);
+  if (OrigAdd == 0) return 0;
+  
+  Value *LHS = OrigAdd->getOperand(0), *RHS = OrigAdd->getOperand(1);
+  
+  // Put the new code above the original add, in case there are any uses of the
+  // add between the add and the compare.
+  InstCombiner::BuilderTy *Builder = IC.Builder;
+  Builder->SetInsertPoint(OrigAdd);
+
+  Module *M = I.getParent()->getParent()->getParent();
+  const Type *Ty = LHS->getType();
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, &Ty,1);
+  CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
+  Value *Add = Builder->CreateExtractValue(Call, 0);
+
+  IC.ReplaceInstUsesWith(*OrigAdd, Add);
+
+  // The original icmp gets replaced with the overflow value.
+  return ExtractValueInst::Create(Call, 1, "uadd.overflow");
+}
+
+// DemandedBitsLHSMask - When performing a comparison against a constant,
+// it is possible that not all the bits in the LHS are demanded.  This helper
+// method computes the mask that IS demanded.
+static APInt DemandedBitsLHSMask(ICmpInst &I,
+                                 unsigned BitWidth, bool isSignCheck) {
+  if (isSignCheck)
+    return APInt::getSignBit(BitWidth);
+  
+  ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
+  if (!CI) return APInt::getAllOnesValue(BitWidth);
+  const APInt &RHS = CI->getValue();
+  
+  switch (I.getPredicate()) {
+  // For a UGT comparison, we don't care about any bits that 
+  // correspond to the trailing ones of the comparand.  The value of these
+  // bits doesn't impact the outcome of the comparison, because any value
+  // greater than the RHS must differ in a bit higher than these due to carry.
+  case ICmpInst::ICMP_UGT: {
+    unsigned trailingOnes = RHS.countTrailingOnes();
+    APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingOnes);
+    return ~lowBitsSet;
+  }
+  
+  // Similarly, for a ULT comparison, we don't care about the trailing zeros.
+  // Any value less than the RHS must differ in a higher bit because of carries.
+  case ICmpInst::ICMP_ULT: {
+    unsigned trailingZeros = RHS.countTrailingZeros();
+    APInt lowBitsSet = APInt::getLowBitsSet(BitWidth, trailingZeros);
+    return ~lowBitsSet;
+  }
+  
+  default:
+    return APInt::getAllOnesValue(BitWidth);
+  }
+  
+}
+
+Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
+  bool Changed = false;
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  
+  /// Orders the operands of the compare so that they are listed from most
+  /// complex to least complex.  This puts constants before unary operators,
+  /// before binary operators.
+  if (getComplexity(Op0) < getComplexity(Op1)) {
+    I.swapOperands();
+    std::swap(Op0, Op1);
+    Changed = true;
+  }
+  
+  if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+  
+  const Type *Ty = Op0->getType();
+
+  // icmp's with boolean values can always be turned into bitwise operations
+  if (Ty->isIntegerTy(1)) {
+    switch (I.getPredicate()) {
+    default: llvm_unreachable("Invalid icmp instruction!");
+    case ICmpInst::ICMP_EQ: {               // icmp eq i1 A, B -> ~(A^B)
+      Value *Xor = Builder->CreateXor(Op0, Op1, I.getName()+"tmp");
+      return BinaryOperator::CreateNot(Xor);
+    }
+    case ICmpInst::ICMP_NE:                  // icmp eq i1 A, B -> A^B
+      return BinaryOperator::CreateXor(Op0, Op1);
+
+    case ICmpInst::ICMP_UGT:
+      std::swap(Op0, Op1);                   // Change icmp ugt -> icmp ult
+      // FALL THROUGH
+    case ICmpInst::ICMP_ULT:{               // icmp ult i1 A, B -> ~A & B
+      Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
+      return BinaryOperator::CreateAnd(Not, Op1);
+    }
+    case ICmpInst::ICMP_SGT:
+      std::swap(Op0, Op1);                   // Change icmp sgt -> icmp slt
+      // FALL THROUGH
+    case ICmpInst::ICMP_SLT: {               // icmp slt i1 A, B -> A & ~B
+      Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
+      return BinaryOperator::CreateAnd(Not, Op0);
+    }
+    case ICmpInst::ICMP_UGE:
+      std::swap(Op0, Op1);                   // Change icmp uge -> icmp ule
+      // FALL THROUGH
+    case ICmpInst::ICMP_ULE: {               //  icmp ule i1 A, B -> ~A | B
+      Value *Not = Builder->CreateNot(Op0, I.getName()+"tmp");
+      return BinaryOperator::CreateOr(Not, Op1);
+    }
+    case ICmpInst::ICMP_SGE:
+      std::swap(Op0, Op1);                   // Change icmp sge -> icmp sle
+      // FALL THROUGH
+    case ICmpInst::ICMP_SLE: {               //  icmp sle i1 A, B -> A | ~B
+      Value *Not = Builder->CreateNot(Op1, I.getName()+"tmp");
+      return BinaryOperator::CreateOr(Not, Op0);
+    }
+    }
+  }
+
+  unsigned BitWidth = 0;
+  if (Ty->isIntOrIntVectorTy())
+    BitWidth = Ty->getScalarSizeInBits();
+  else if (TD)  // Pointers require TD info to get their size.
+    BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
+  
+  bool isSignBit = false;
+
+  // See if we are doing a comparison with a constant.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+    Value *A = 0, *B = 0;
+    
+    // Match the following pattern, which is a common idiom when writing
+    // overflow-safe integer arithmetic function.  The source performs an
+    // addition in wider type, and explicitly checks for overflow using
+    // comparisons against INT_MIN and INT_MAX.  Simplify this by using the
+    // sadd_with_overflow intrinsic.
+    //
+    // TODO: This could probably be generalized to handle other overflow-safe
+    // operations if we worked out the formulas to compute the appropriate 
+    // magic constants.
+    // 
+    // sum = a + b
+    // if (sum+128 >u 255)  ...  -> llvm.sadd.with.overflow.i8
+    {
+    ConstantInt *CI2;    // I = icmp ugt (add (add A, B), CI2), CI
+    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+        match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
+      if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this))
+        return Res;
+    }
+    
+    // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
+    if (I.isEquality() && CI->isZero() &&
+        match(Op0, m_Sub(m_Value(A), m_Value(B)))) {
+      // (icmp cond A B) if cond is equality
+      return new ICmpInst(I.getPredicate(), A, B);
+    }
+    
+    // If we have an icmp le or icmp ge instruction, turn it into the
+    // appropriate icmp lt or icmp gt instruction.  This allows us to rely on
+    // them being folded in the code below.  The SimplifyICmpInst code has
+    // already handled the edge cases for us, so we just assert on them.
+    switch (I.getPredicate()) {
+    default: break;
+    case ICmpInst::ICMP_ULE:
+      assert(!CI->isMaxValue(false));                 // A <=u MAX -> TRUE
+      return new ICmpInst(ICmpInst::ICMP_ULT, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()+1));
+    case ICmpInst::ICMP_SLE:
+      assert(!CI->isMaxValue(true));                  // A <=s MAX -> TRUE
+      return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()+1));
+    case ICmpInst::ICMP_UGE:
+      assert(!CI->isMinValue(false));                 // A >=u MIN -> TRUE
+      return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()-1));
+    case ICmpInst::ICMP_SGE:
+      assert(!CI->isMinValue(true));                  // A >=s MIN -> TRUE
+      return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()-1));
+    }
+    
+    // If this comparison is a normal comparison, it demands all
+    // bits, if it is a sign bit comparison, it only demands the sign bit.
+    bool UnusedBit;
+    isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit);
+  }
+
+  // See if we can fold the comparison based on range information we can get
+  // by checking whether bits are known to be zero or one in the input.
+  if (BitWidth != 0) {
+    APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0);
+    APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
+
+    if (SimplifyDemandedBits(I.getOperandUse(0),
+                             DemandedBitsLHSMask(I, BitWidth, isSignBit),
+                             Op0KnownZero, Op0KnownOne, 0))
+      return &I;
+    if (SimplifyDemandedBits(I.getOperandUse(1),
+                             APInt::getAllOnesValue(BitWidth),
+                             Op1KnownZero, Op1KnownOne, 0))
+      return &I;
+
+    // Given the known and unknown bits, compute a range that the LHS could be
+    // in.  Compute the Min, Max and RHS values based on the known bits. For the
+    // EQ and NE we use unsigned values.
+    APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
+    APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
+    if (I.isSigned()) {
+      ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
+                                             Op0Min, Op0Max);
+      ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
+                                             Op1Min, Op1Max);
+    } else {
+      ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
+                                               Op0Min, Op0Max);
+      ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
+                                               Op1Min, Op1Max);
+    }
+
+    // If Min and Max are known to be the same, then SimplifyDemandedBits
+    // figured out that the LHS is a constant.  Just constant fold this now so
+    // that code below can assume that Min != Max.
+    if (!isa<Constant>(Op0) && Op0Min == Op0Max)
+      return new ICmpInst(I.getPredicate(),
+                          ConstantInt::get(Op0->getType(), Op0Min), Op1);
+    if (!isa<Constant>(Op1) && Op1Min == Op1Max)
+      return new ICmpInst(I.getPredicate(), Op0,
+                          ConstantInt::get(Op1->getType(), Op1Min));
+
+    // Based on the range information we know about the LHS, see if we can
+    // simplify this comparison.  For example, (x&4) < 8 is always true.
+    switch (I.getPredicate()) {
+    default: llvm_unreachable("Unknown icmp opcode!");
+    case ICmpInst::ICMP_EQ: {
+      if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+        
+      // If all bits are known zero except for one, then we know at most one
+      // bit is set.   If the comparison is against zero, then this is a check
+      // to see if *that* bit is set.
+      APInt Op0KnownZeroInverted = ~Op0KnownZero;
+      if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
+        // If the LHS is an AND with the same constant, look through it.
+        Value *LHS = 0;
+        ConstantInt *LHSC = 0;
+        if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
+            LHSC->getValue() != Op0KnownZeroInverted)
+          LHS = Op0;
+        
+        // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
+        // then turn "((1 << x)&8) == 0" into "x != 3".
+        Value *X = 0;
+        if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+          unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
+          return new ICmpInst(ICmpInst::ICMP_NE, X,
+                              ConstantInt::get(X->getType(), CmpVal));
+        }
+        
+        // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
+        // then turn "((8 >>u x)&1) == 0" into "x != 3".
+        const APInt *CI;
+        if (Op0KnownZeroInverted == 1 &&
+            match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
+          return new ICmpInst(ICmpInst::ICMP_NE, X,
+                              ConstantInt::get(X->getType(),
+                                               CI->countTrailingZeros()));
+      }
+        
+      break;
+    }
+    case ICmpInst::ICMP_NE: {
+      if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      
+      // If all bits are known zero except for one, then we know at most one
+      // bit is set.   If the comparison is against zero, then this is a check
+      // to see if *that* bit is set.
+      APInt Op0KnownZeroInverted = ~Op0KnownZero;
+      if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) {
+        // If the LHS is an AND with the same constant, look through it.
+        Value *LHS = 0;
+        ConstantInt *LHSC = 0;
+        if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
+            LHSC->getValue() != Op0KnownZeroInverted)
+          LHS = Op0;
+        
+        // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
+        // then turn "((1 << x)&8) != 0" into "x == 3".
+        Value *X = 0;
+        if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+          unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros();
+          return new ICmpInst(ICmpInst::ICMP_EQ, X,
+                              ConstantInt::get(X->getType(), CmpVal));
+        }
+        
+        // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
+        // then turn "((8 >>u x)&1) != 0" into "x == 3".
+        const APInt *CI;
+        if (Op0KnownZeroInverted == 1 &&
+            match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
+          return new ICmpInst(ICmpInst::ICMP_EQ, X,
+                              ConstantInt::get(X->getType(),
+                                               CI->countTrailingZeros()));
+      }
+      
+      break;
+    }
+    case ICmpInst::ICMP_ULT:
+      if (Op0Max.ult(Op1Min))          // A <u B -> true if max(A) < min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      if (Op0Min.uge(Op1Max))          // A <u B -> false if min(A) >= max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+      if (Op1Min == Op0Max)            // A <u B -> A != B if max(A) == min(B)
+        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+        if (Op1Max == Op0Min+1)        // A <u C -> A == C-1 if min(A)+1 == C
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()-1));
+
+        // (x <u 2147483648) -> (x >s -1)  -> true if sign bit clear
+        if (CI->isMinValue(true))
+          return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
+                           Constant::getAllOnesValue(Op0->getType()));
+      }
+      break;
+    case ICmpInst::ICMP_UGT:
+      if (Op0Min.ugt(Op1Max))          // A >u B -> true if min(A) > max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      if (Op0Max.ule(Op1Min))          // A >u B -> false if max(A) <= max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+
+      if (Op1Max == Op0Min)            // A >u B -> A != B if min(A) == max(B)
+        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+        if (Op1Min == Op0Max-1)        // A >u C -> A == C+1 if max(a)-1 == C
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()+1));
+
+        // (x >u 2147483647) -> (x <s 0)  -> true if sign bit set
+        if (CI->isMaxValue(true))
+          return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+                              Constant::getNullValue(Op0->getType()));
+      }
+      break;
+    case ICmpInst::ICMP_SLT:
+      if (Op0Max.slt(Op1Min))          // A <s B -> true if max(A) < min(C)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      if (Op0Min.sge(Op1Max))          // A <s B -> false if min(A) >= max(C)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+      if (Op1Min == Op0Max)            // A <s B -> A != B if max(A) == min(B)
+        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+        if (Op1Max == Op0Min+1)        // A <s C -> A == C-1 if min(A)+1 == C
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()-1));
+      }
+      break;
+    case ICmpInst::ICMP_SGT:
+      if (Op0Min.sgt(Op1Max))          // A >s B -> true if min(A) > max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      if (Op0Max.sle(Op1Min))          // A >s B -> false if max(A) <= min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+
+      if (Op1Max == Op0Min)            // A >s B -> A != B if min(A) == max(B)
+        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+        if (Op1Min == Op0Max-1)        // A >s C -> A == C+1 if max(A)-1 == C
+          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                          ConstantInt::get(CI->getContext(), CI->getValue()+1));
+      }
+      break;
+    case ICmpInst::ICMP_SGE:
+      assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
+      if (Op0Min.sge(Op1Max))          // A >=s B -> true if min(A) >= max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      if (Op0Max.slt(Op1Min))          // A >=s B -> false if max(A) < min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+      break;
+    case ICmpInst::ICMP_SLE:
+      assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
+      if (Op0Max.sle(Op1Min))          // A <=s B -> true if max(A) <= min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      if (Op0Min.sgt(Op1Max))          // A <=s B -> false if min(A) > max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+      break;
+    case ICmpInst::ICMP_UGE:
+      assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
+      if (Op0Min.uge(Op1Max))          // A >=u B -> true if min(A) >= max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      if (Op0Max.ult(Op1Min))          // A >=u B -> false if max(A) < min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+      break;
+    case ICmpInst::ICMP_ULE:
+      assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
+      if (Op0Max.ule(Op1Min))          // A <=u B -> true if max(A) <= min(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+      if (Op0Min.ugt(Op1Max))          // A <=u B -> false if min(A) > max(B)
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+      break;
+    }
+
+    // Turn a signed comparison into an unsigned one if both operands
+    // are known to have the same sign.
+    if (I.isSigned() &&
+        ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) ||
+         (Op0KnownOne.isNegative() && Op1KnownOne.isNegative())))
+      return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
+  }
+
+  // Test if the ICmpInst instruction is used exclusively by a select as
+  // part of a minimum or maximum operation. If so, refrain from doing
+  // any other folding. This helps out other analyses which understand
+  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+  // and CodeGen. And in this case, at least one of the comparison
+  // operands has at least one user besides the compare (the select),
+  // which would often largely negate the benefit of folding anyway.
+  if (I.hasOneUse())
+    if (SelectInst *SI = dyn_cast<SelectInst>(*I.use_begin()))
+      if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+          (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+        return 0;
+
+  // See if we are doing a comparison between a constant and an instruction that
+  // can be folded into the comparison.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+    // Since the RHS is a ConstantInt (CI), if the left hand side is an 
+    // instruction, see if that instruction also has constants so that the 
+    // instruction can be folded into the icmp 
+    if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+      if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI))
+        return Res;
+  }
+
+  // Handle icmp with constant (but not simple integer constant) RHS
+  if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+    if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+      switch (LHSI->getOpcode()) {
+      case Instruction::GetElementPtr:
+          // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
+        if (RHSC->isNullValue() &&
+            cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())
+          return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
+                  Constant::getNullValue(LHSI->getOperand(0)->getType()));
+        break;
+      case Instruction::PHI:
+        // Only fold icmp into the PHI if the phi and icmp are in the same
+        // block.  If in the same block, we're encouraging jump threading.  If
+        // not, we are just pessimizing the code by making an i1 phi.
+        if (LHSI->getParent() == I.getParent())
+          if (Instruction *NV = FoldOpIntoPhi(I))
+            return NV;
+        break;
+      case Instruction::Select: {
+        // If either operand of the select is a constant, we can fold the
+        // comparison into the select arms, which will cause one to be
+        // constant folded and the select turned into a bitwise or.
+        Value *Op1 = 0, *Op2 = 0;
+        if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1)))
+          Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+        if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2)))
+          Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+
+        // We only want to perform this transformation if it will not lead to
+        // additional code. This is true if either both sides of the select
+        // fold to a constant (in which case the icmp is replaced with a select
+        // which will usually simplify) or this is the only user of the
+        // select (in which case we are trading a select+icmp for a simpler
+        // select+icmp).
+        if ((Op1 && Op2) || (LHSI->hasOneUse() && (Op1 || Op2))) {
+          if (!Op1)
+            Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1),
+                                      RHSC, I.getName());
+          if (!Op2)
+            Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2),
+                                      RHSC, I.getName());
+          return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+        }
+        break;
+      }
+      case Instruction::IntToPtr:
+        // icmp pred inttoptr(X), null -> icmp pred X, 0
+        if (RHSC->isNullValue() && TD &&
+            TD->getIntPtrType(RHSC->getContext()) == 
+               LHSI->getOperand(0)->getType())
+          return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
+                        Constant::getNullValue(LHSI->getOperand(0)->getType()));
+        break;
+
+      case Instruction::Load:
+        // Try to optimize things like "A[i] > 4" to index computations.
+        if (GetElementPtrInst *GEP =
+              dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+            if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+                !cast<LoadInst>(LHSI)->isVolatile())
+              if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+                return Res;
+        }
+        break;
+      }
+  }
+
+  // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
+    if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I))
+      return NI;
+  if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
+    if (Instruction *NI = FoldGEPICmp(GEP, Op0,
+                           ICmpInst::getSwappedPredicate(I.getPredicate()), I))
+      return NI;
+
+  // Test to see if the operands of the icmp are casted versions of other
+  // values.  If the ptr->ptr cast can be stripped off both arguments, we do so
+  // now.
+  if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
+    if (Op0->getType()->isPointerTy() && 
+        (isa<Constant>(Op1) || isa<BitCastInst>(Op1))) { 
+      // We keep moving the cast from the left operand over to the right
+      // operand, where it can often be eliminated completely.
+      Op0 = CI->getOperand(0);
+
+      // If operand #1 is a bitcast instruction, it must also be a ptr->ptr cast
+      // so eliminate it as well.
+      if (BitCastInst *CI2 = dyn_cast<BitCastInst>(Op1))
+        Op1 = CI2->getOperand(0);
+
+      // If Op1 is a constant, we can fold the cast into the constant.
+      if (Op0->getType() != Op1->getType()) {
+        if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+          Op1 = ConstantExpr::getBitCast(Op1C, Op0->getType());
+        } else {
+          // Otherwise, cast the RHS right before the icmp
+          Op1 = Builder->CreateBitCast(Op1, Op0->getType());
+        }
+      }
+      return new ICmpInst(I.getPredicate(), Op0, Op1);
+    }
+  }
+  
+  if (isa<CastInst>(Op0)) {
+    // Handle the special case of: icmp (cast bool to X), <cst>
+    // This comes up when you have code like
+    //   int X = A < B;
+    //   if (X) ...
+    // For generality, we handle any zero-extension of any operand comparison
+    // with a constant or another cast from the same type.
+    if (isa<Constant>(Op1) || isa<CastInst>(Op1))
+      if (Instruction *R = visitICmpInstWithCastAndCast(I))
+        return R;
+  }
+
+  // Special logic for binary operators.
+  BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
+  BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
+  if (BO0 || BO1) {
+    CmpInst::Predicate Pred = I.getPredicate();
+    bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
+    if (BO0 && isa<OverflowingBinaryOperator>(BO0))
+      NoOp0WrapProblem = ICmpInst::isEquality(Pred) ||
+        (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
+        (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
+    if (BO1 && isa<OverflowingBinaryOperator>(BO1))
+      NoOp1WrapProblem = ICmpInst::isEquality(Pred) ||
+        (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
+        (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
+
+    // Analyze the case when either Op0 or Op1 is an add instruction.
+    // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
+    Value *A = 0, *B = 0, *C = 0, *D = 0;
+    if (BO0 && BO0->getOpcode() == Instruction::Add)
+      A = BO0->getOperand(0), B = BO0->getOperand(1);
+    if (BO1 && BO1->getOpcode() == Instruction::Add)
+      C = BO1->getOperand(0), D = BO1->getOperand(1);
+
+    // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+    if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
+      return new ICmpInst(Pred, A == Op1 ? B : A,
+                          Constant::getNullValue(Op1->getType()));
+
+    // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+    if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
+      return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
+                          C == Op0 ? D : C);
+
+    // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow.
+    if (A && C && (A == C || A == D || B == C || B == D) &&
+        NoOp0WrapProblem && NoOp1WrapProblem &&
+        // Try not to increase register pressure.
+        BO0->hasOneUse() && BO1->hasOneUse()) {
+      // Determine Y and Z in the form icmp (X+Y), (X+Z).
+      Value *Y = (A == C || A == D) ? B : A;
+      Value *Z = (C == A || C == B) ? D : C;
+      return new ICmpInst(Pred, Y, Z);
+    }
+
+    // Analyze the case when either Op0 or Op1 is a sub instruction.
+    // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
+    A = 0; B = 0; C = 0; D = 0;
+    if (BO0 && BO0->getOpcode() == Instruction::Sub)
+      A = BO0->getOperand(0), B = BO0->getOperand(1);
+    if (BO1 && BO1->getOpcode() == Instruction::Sub)
+      C = BO1->getOperand(0), D = BO1->getOperand(1);
+
+    // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow.
+    if (A == Op1 && NoOp0WrapProblem)
+      return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
+
+    // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow.
+    if (C == Op0 && NoOp1WrapProblem)
+      return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
+
+    // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow.
+    if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem &&
+        // Try not to increase register pressure.
+        BO0->hasOneUse() && BO1->hasOneUse())
+      return new ICmpInst(Pred, A, C);
+
+    // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow.
+    if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem &&
+        // Try not to increase register pressure.
+        BO0->hasOneUse() && BO1->hasOneUse())
+      return new ICmpInst(Pred, D, B);
+
+    BinaryOperator *SRem = NULL;
+    // icmp Y, (srem X, Y)
+    if (BO0 && BO0->getOpcode() == Instruction::SRem &&
+        Op1 == BO0->getOperand(1))
+      SRem = BO0;
+    // icmp (srem X, Y), Y
+    else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
+             Op0 == BO1->getOperand(1))
+      SRem = BO1;
+    if (SRem) {
+      // We don't check hasOneUse to avoid increasing register pressure because
+      // the value we use is the same value this instruction was already using.
+      switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
+        default: break;
+        case ICmpInst::ICMP_EQ:
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+        case ICmpInst::ICMP_NE:
+          return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+        case ICmpInst::ICMP_SGT:
+        case ICmpInst::ICMP_SGE:
+          return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
+                              Constant::getAllOnesValue(SRem->getType()));
+        case ICmpInst::ICMP_SLT:
+        case ICmpInst::ICMP_SLE:
+          return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
+                              Constant::getNullValue(SRem->getType()));
+      }
+    }
+
+    if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() &&
+        BO0->hasOneUse() && BO1->hasOneUse() &&
+        BO0->getOperand(1) == BO1->getOperand(1)) {
+      switch (BO0->getOpcode()) {
+      default: break;
+      case Instruction::Add:
+      case Instruction::Sub:
+      case Instruction::Xor:
+        if (I.isEquality())    // a+x icmp eq/ne b+x --> a icmp b
+          return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+                              BO1->getOperand(0));
+        // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+          if (CI->getValue().isSignBit()) {
+            ICmpInst::Predicate Pred = I.isSigned()
+                                           ? I.getUnsignedPredicate()
+                                           : I.getSignedPredicate();
+            return new ICmpInst(Pred, BO0->getOperand(0),
+                                BO1->getOperand(0));
+          }
+          
+          if (CI->getValue().isMaxSignedValue()) {
+            ICmpInst::Predicate Pred = I.isSigned()
+                                           ? I.getUnsignedPredicate()
+                                           : I.getSignedPredicate();
+            Pred = I.getSwappedPredicate(Pred);
+            return new ICmpInst(Pred, BO0->getOperand(0),
+                                BO1->getOperand(0));
+          }
+        }
+        break;
+      case Instruction::Mul:
+        if (!I.isEquality())
+          break;
+
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+          // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
+          // Mask = -1 >> count-trailing-zeros(Cst).
+          if (!CI->isZero() && !CI->isOne()) {
+            const APInt &AP = CI->getValue();
+            ConstantInt *Mask = ConstantInt::get(I.getContext(), 
+                                    APInt::getLowBitsSet(AP.getBitWidth(),
+                                                         AP.getBitWidth() -
+                                                    AP.countTrailingZeros()));
+            Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
+            Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
+            return new ICmpInst(I.getPredicate(), And1, And2);
+          }
+        }
+        break;
+      case Instruction::UDiv:
+      case Instruction::LShr:
+        if (I.isSigned())
+          break;
+        // fall-through
+      case Instruction::SDiv:
+      case Instruction::AShr:
+        if (!BO0->isExact() && !BO1->isExact())
+          break;
+        return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+                            BO1->getOperand(0));
+      case Instruction::Shl: {
+        bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
+        bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
+        if (!NUW && !NSW)
+          break;
+        if (!NSW && I.isSigned())
+          break;
+        return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+                            BO1->getOperand(0));
+      }
+      }
+    }
+  }
+  
+  { Value *A, *B;
+    // ~x < ~y --> y < x
+    // ~x < cst --> ~cst < x
+    if (match(Op0, m_Not(m_Value(A)))) {
+      if (match(Op1, m_Not(m_Value(B))))
+        return new ICmpInst(I.getPredicate(), B, A);
+      if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
+        return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
+    }
+
+    // (a+b) <u a  --> llvm.uadd.with.overflow.
+    // (a+b) <u b  --> llvm.uadd.with.overflow.
+    if (I.getPredicate() == ICmpInst::ICMP_ULT &&
+        match(Op0, m_Add(m_Value(A), m_Value(B))) && 
+        (Op1 == A || Op1 == B))
+      if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
+        return R;
+                                 
+    // a >u (a+b)  --> llvm.uadd.with.overflow.
+    // b >u (a+b)  --> llvm.uadd.with.overflow.
+    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+        match(Op1, m_Add(m_Value(A), m_Value(B))) &&
+        (Op0 == A || Op0 == B))
+      if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
+        return R;
+  }
+  
+  if (I.isEquality()) {
+    Value *A, *B, *C, *D;
+
+    if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+      if (A == Op1 || B == Op1) {    // (A^B) == A  ->  B == 0
+        Value *OtherVal = A == Op1 ? B : A;
+        return new ICmpInst(I.getPredicate(), OtherVal,
+                            Constant::getNullValue(A->getType()));
+      }
+
+      if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
+        // A^c1 == C^c2 --> A == C^(c1^c2)
+        ConstantInt *C1, *C2;
+        if (match(B, m_ConstantInt(C1)) &&
+            match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
+          Constant *NC = ConstantInt::get(I.getContext(),
+                                          C1->getValue() ^ C2->getValue());
+          Value *Xor = Builder->CreateXor(C, NC, "tmp");
+          return new ICmpInst(I.getPredicate(), A, Xor);
+        }
+        
+        // A^B == A^D -> B == D
+        if (A == C) return new ICmpInst(I.getPredicate(), B, D);
+        if (A == D) return new ICmpInst(I.getPredicate(), B, C);
+        if (B == C) return new ICmpInst(I.getPredicate(), A, D);
+        if (B == D) return new ICmpInst(I.getPredicate(), A, C);
+      }
+    }
+    
+    if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
+        (A == Op0 || B == Op0)) {
+      // A == (A^B)  ->  B == 0
+      Value *OtherVal = A == Op0 ? B : A;
+      return new ICmpInst(I.getPredicate(), OtherVal,
+                          Constant::getNullValue(A->getType()));
+    }
+
+    // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
+    if (Op0->hasOneUse() && Op1->hasOneUse() &&
+        match(Op0, m_And(m_Value(A), m_Value(B))) && 
+        match(Op1, m_And(m_Value(C), m_Value(D)))) {
+      Value *X = 0, *Y = 0, *Z = 0;
+      
+      if (A == C) {
+        X = B; Y = D; Z = A;
+      } else if (A == D) {
+        X = B; Y = C; Z = A;
+      } else if (B == C) {
+        X = A; Y = D; Z = B;
+      } else if (B == D) {
+        X = A; Y = C; Z = B;
+      }
+      
+      if (X) {   // Build (X^Y) & Z
+        Op1 = Builder->CreateXor(X, Y, "tmp");
+        Op1 = Builder->CreateAnd(Op1, Z, "tmp");
+        I.setOperand(0, Op1);
+        I.setOperand(1, Constant::getNullValue(Op1->getType()));
+        return &I;
+      }
+    }
+  }
+  
+  {
+    Value *X; ConstantInt *Cst;
+    // icmp X+Cst, X
+    if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
+      return FoldICmpAddOpCst(I, X, Cst, I.getPredicate(), Op0);
+
+    // icmp X, X+Cst
+    if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
+      return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate(), Op1);
+  }
+  return Changed ? &I : 0;
+}
+
+
+
+
+
+
+/// FoldFCmp_IntToFP_Cst - Fold fcmp ([us]itofp x, cst) if possible.
+///
+Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
+                                                Instruction *LHSI,
+                                                Constant *RHSC) {
+  if (!isa<ConstantFP>(RHSC)) return 0;
+  const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
+  
+  // Get the width of the mantissa.  We don't want to hack on conversions that
+  // might lose information from the integer, e.g. "i64 -> float"
+  int MantissaWidth = LHSI->getType()->getFPMantissaWidth();
+  if (MantissaWidth == -1) return 0;  // Unknown.
+  
+  // Check to see that the input is converted from an integer type that is small
+  // enough that preserves all bits.  TODO: check here for "known" sign bits.
+  // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
+  unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits();
+  
+  // If this is a uitofp instruction, we need an extra bit to hold the sign.
+  bool LHSUnsigned = isa<UIToFPInst>(LHSI);
+  if (LHSUnsigned)
+    ++InputSize;
+  
+  // If the conversion would lose info, don't hack on this.
+  if ((int)InputSize > MantissaWidth)
+    return 0;
+  
+  // Otherwise, we can potentially simplify the comparison.  We know that it
+  // will always come through as an integer value and we know the constant is
+  // not a NAN (it would have been previously simplified).
+  assert(!RHS.isNaN() && "NaN comparison not already folded!");
+  
+  ICmpInst::Predicate Pred;
+  switch (I.getPredicate()) {
+  default: llvm_unreachable("Unexpected predicate!");
+  case FCmpInst::FCMP_UEQ:
+  case FCmpInst::FCMP_OEQ:
+    Pred = ICmpInst::ICMP_EQ;
+    break;
+  case FCmpInst::FCMP_UGT:
+  case FCmpInst::FCMP_OGT:
+    Pred = LHSUnsigned ? ICmpInst::ICMP_UGT : ICmpInst::ICMP_SGT;
+    break;
+  case FCmpInst::FCMP_UGE:
+  case FCmpInst::FCMP_OGE:
+    Pred = LHSUnsigned ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_SGE;
+    break;
+  case FCmpInst::FCMP_ULT:
+  case FCmpInst::FCMP_OLT:
+    Pred = LHSUnsigned ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_SLT;
+    break;
+  case FCmpInst::FCMP_ULE:
+  case FCmpInst::FCMP_OLE:
+    Pred = LHSUnsigned ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_SLE;
+    break;
+  case FCmpInst::FCMP_UNE:
+  case FCmpInst::FCMP_ONE:
+    Pred = ICmpInst::ICMP_NE;
+    break;
+  case FCmpInst::FCMP_ORD:
+    return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+  case FCmpInst::FCMP_UNO:
+    return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+  }
+  
+  const IntegerType *IntTy = cast<IntegerType>(LHSI->getOperand(0)->getType());
+  
+  // Now we know that the APFloat is a normal number, zero or inf.
+  
+  // See if the FP constant is too large for the integer.  For example,
+  // comparing an i8 to 300.0.
+  unsigned IntWidth = IntTy->getScalarSizeInBits();
+  
+  if (!LHSUnsigned) {
+    // If the RHS value is > SignedMax, fold the comparison.  This handles +INF
+    // and large values.
+    APFloat SMax(RHS.getSemantics(), APFloat::fcZero, false);
+    SMax.convertFromAPInt(APInt::getSignedMaxValue(IntWidth), true,
+                          APFloat::rmNearestTiesToEven);
+    if (SMax.compare(RHS) == APFloat::cmpLessThan) {  // smax < 13123.0
+      if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_SLT ||
+          Pred == ICmpInst::ICMP_SLE)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+    }
+  } else {
+    // If the RHS value is > UnsignedMax, fold the comparison. This handles
+    // +INF and large values.
+    APFloat UMax(RHS.getSemantics(), APFloat::fcZero, false);
+    UMax.convertFromAPInt(APInt::getMaxValue(IntWidth), false,
+                          APFloat::rmNearestTiesToEven);
+    if (UMax.compare(RHS) == APFloat::cmpLessThan) {  // umax < 13123.0
+      if (Pred == ICmpInst::ICMP_NE  || Pred == ICmpInst::ICMP_ULT ||
+          Pred == ICmpInst::ICMP_ULE)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+    }
+  }
+  
+  if (!LHSUnsigned) {
+    // See if the RHS value is < SignedMin.
+    APFloat SMin(RHS.getSemantics(), APFloat::fcZero, false);
+    SMin.convertFromAPInt(APInt::getSignedMinValue(IntWidth), true,
+                          APFloat::rmNearestTiesToEven);
+    if (SMin.compare(RHS) == APFloat::cmpGreaterThan) { // smin > 12312.0
+      if (Pred == ICmpInst::ICMP_NE || Pred == ICmpInst::ICMP_SGT ||
+          Pred == ICmpInst::ICMP_SGE)
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+    }
+  }
+
+  // Okay, now we know that the FP constant fits in the range [SMIN, SMAX] or
+  // [0, UMAX], but it may still be fractional.  See if it is fractional by
+  // casting the FP value to the integer value and back, checking for equality.
+  // Don't do this for zero, because -0.0 is not fractional.
+  Constant *RHSInt = LHSUnsigned
+    ? ConstantExpr::getFPToUI(RHSC, IntTy)
+    : ConstantExpr::getFPToSI(RHSC, IntTy);
+  if (!RHS.isZero()) {
+    bool Equal = LHSUnsigned
+      ? ConstantExpr::getUIToFP(RHSInt, RHSC->getType()) == RHSC
+      : ConstantExpr::getSIToFP(RHSInt, RHSC->getType()) == RHSC;
+    if (!Equal) {
+      // If we had a comparison against a fractional value, we have to adjust
+      // the compare predicate and sometimes the value.  RHSC is rounded towards
+      // zero at this point.
+      switch (Pred) {
+      default: llvm_unreachable("Unexpected integer comparison!");
+      case ICmpInst::ICMP_NE:  // (float)int != 4.4   --> true
+        return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+      case ICmpInst::ICMP_EQ:  // (float)int == 4.4   --> false
+        return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+      case ICmpInst::ICMP_ULE:
+        // (float)int <= 4.4   --> int <= 4
+        // (float)int <= -4.4  --> false
+        if (RHS.isNegative())
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        break;
+      case ICmpInst::ICMP_SLE:
+        // (float)int <= 4.4   --> int <= 4
+        // (float)int <= -4.4  --> int < -4
+        if (RHS.isNegative())
+          Pred = ICmpInst::ICMP_SLT;
+        break;
+      case ICmpInst::ICMP_ULT:
+        // (float)int < -4.4   --> false
+        // (float)int < 4.4    --> int <= 4
+        if (RHS.isNegative())
+          return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext()));
+        Pred = ICmpInst::ICMP_ULE;
+        break;
+      case ICmpInst::ICMP_SLT:
+        // (float)int < -4.4   --> int < -4
+        // (float)int < 4.4    --> int <= 4
+        if (!RHS.isNegative())
+          Pred = ICmpInst::ICMP_SLE;
+        break;
+      case ICmpInst::ICMP_UGT:
+        // (float)int > 4.4    --> int > 4
+        // (float)int > -4.4   --> true
+        if (RHS.isNegative())
+          return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+        break;
+      case ICmpInst::ICMP_SGT:
+        // (float)int > 4.4    --> int > 4
+        // (float)int > -4.4   --> int >= -4
+        if (RHS.isNegative())
+          Pred = ICmpInst::ICMP_SGE;
+        break;
+      case ICmpInst::ICMP_UGE:
+        // (float)int >= -4.4   --> true
+        // (float)int >= 4.4    --> int > 4
+        if (!RHS.isNegative())
+          return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext()));
+        Pred = ICmpInst::ICMP_UGT;
+        break;
+      case ICmpInst::ICMP_SGE:
+        // (float)int >= -4.4   --> int >= -4
+        // (float)int >= 4.4    --> int > 4
+        if (!RHS.isNegative())
+          Pred = ICmpInst::ICMP_SGT;
+        break;
+      }
+    }
+  }
+
+  // Lower this FP comparison into an appropriate integer version of the
+  // comparison.
+  return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt);
+}
+
+Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
+  bool Changed = false;
+  
+  /// Orders the operands of the compare so that they are listed from most
+  /// complex to least complex.  This puts constants before unary operators,
+  /// before binary operators.
+  if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+    I.swapOperands();
+    Changed = true;
+  }
+
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  
+  if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // Simplify 'fcmp pred X, X'
+  if (Op0 == Op1) {
+    switch (I.getPredicate()) {
+    default: llvm_unreachable("Unknown predicate!");
+    case FCmpInst::FCMP_UNO:    // True if unordered: isnan(X) | isnan(Y)
+    case FCmpInst::FCMP_ULT:    // True if unordered or less than
+    case FCmpInst::FCMP_UGT:    // True if unordered or greater than
+    case FCmpInst::FCMP_UNE:    // True if unordered or not equal
+      // Canonicalize these to be 'fcmp uno %X, 0.0'.
+      I.setPredicate(FCmpInst::FCMP_UNO);
+      I.setOperand(1, Constant::getNullValue(Op0->getType()));
+      return &I;
+      
+    case FCmpInst::FCMP_ORD:    // True if ordered (no nans)
+    case FCmpInst::FCMP_OEQ:    // True if ordered and equal
+    case FCmpInst::FCMP_OGE:    // True if ordered and greater than or equal
+    case FCmpInst::FCMP_OLE:    // True if ordered and less than or equal
+      // Canonicalize these to be 'fcmp ord %X, 0.0'.
+      I.setPredicate(FCmpInst::FCMP_ORD);
+      I.setOperand(1, Constant::getNullValue(Op0->getType()));
+      return &I;
+    }
+  }
+    
+  // Handle fcmp with constant RHS
+  if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
+    if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
+      switch (LHSI->getOpcode()) {
+      case Instruction::PHI:
+        // Only fold fcmp into the PHI if the phi and fcmp are in the same
+        // block.  If in the same block, we're encouraging jump threading.  If
+        // not, we are just pessimizing the code by making an i1 phi.
+        if (LHSI->getParent() == I.getParent())
+          if (Instruction *NV = FoldOpIntoPhi(I))
+            return NV;
+        break;
+      case Instruction::SIToFP:
+      case Instruction::UIToFP:
+        if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC))
+          return NV;
+        break;
+      case Instruction::Select: {
+        // If either operand of the select is a constant, we can fold the
+        // comparison into the select arms, which will cause one to be
+        // constant folded and the select turned into a bitwise or.
+        Value *Op1 = 0, *Op2 = 0;
+        if (LHSI->hasOneUse()) {
+          if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
+            // Fold the known value into the constant operand.
+            Op1 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
+            // Insert a new FCmp of the other select operand.
+            Op2 = Builder->CreateFCmp(I.getPredicate(),
+                                      LHSI->getOperand(2), RHSC, I.getName());
+          } else if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
+            // Fold the known value into the constant operand.
+            Op2 = ConstantExpr::getCompare(I.getPredicate(), C, RHSC);
+            // Insert a new FCmp of the other select operand.
+            Op1 = Builder->CreateFCmp(I.getPredicate(), LHSI->getOperand(1),
+                                      RHSC, I.getName());
+          }
+        }
+
+        if (Op1)
+          return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+        break;
+      }
+      case Instruction::Load:
+        if (GetElementPtrInst *GEP =
+            dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+            if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+                !cast<LoadInst>(LHSI)->isVolatile())
+              if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+                return Res;
+        }
+        break;
+      }
+  }
+
+  return Changed ? &I : 0;
+}
diff --git a/final/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/final/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
new file mode 100644
index 00000000000..78ff7346abe
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -0,0 +1,642 @@
+//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for load, store and alloca.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumDeadStore, "Number of dead stores eliminated");
+
+Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
+  // Ensure that the alloca array size argument has type intptr_t, so that
+  // any casting is exposed early.
+  if (TD) {
+    const Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
+    if (AI.getArraySize()->getType() != IntPtrTy) {
+      Value *V = Builder->CreateIntCast(AI.getArraySize(),
+                                        IntPtrTy, false);
+      AI.setOperand(0, V);
+      return &AI;
+    }
+  }
+
+  // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
+  if (AI.isArrayAllocation()) {  // Check C != 1
+    if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
+      const Type *NewTy = 
+        ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
+      assert(isa<AllocaInst>(AI) && "Unknown type of allocation inst!");
+      AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName());
+      New->setAlignment(AI.getAlignment());
+
+      // Scan to the end of the allocation instructions, to skip over a block of
+      // allocas if possible...also skip interleaved debug info
+      //
+      BasicBlock::iterator It = New;
+      while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It;
+
+      // Now that I is pointing to the first non-allocation-inst in the block,
+      // insert our getelementptr instruction...
+      //
+      Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext()));
+      Value *Idx[2];
+      Idx[0] = NullIdx;
+      Idx[1] = NullIdx;
+      Value *V = GetElementPtrInst::CreateInBounds(New, Idx, Idx + 2,
+                                                   New->getName()+".sub", It);
+
+      // Now make everything use the getelementptr instead of the original
+      // allocation.
+      return ReplaceInstUsesWith(AI, V);
+    } else if (isa<UndefValue>(AI.getArraySize())) {
+      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+    }
+  }
+
+  if (TD && isa<AllocaInst>(AI) && AI.getAllocatedType()->isSized()) {
+    // If alloca'ing a zero byte object, replace the alloca with a null pointer.
+    // Note that we only do this for alloca's, because malloc should allocate
+    // and return a unique pointer, even for a zero byte allocation.
+    if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0)
+      return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
+
+    // If the alignment is 0 (unspecified), assign it the preferred alignment.
+    if (AI.getAlignment() == 0)
+      AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
+  }
+
+  return 0;
+}
+
+
+/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible.
+static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
+                                        const TargetData *TD) {
+  User *CI = cast<User>(LI.getOperand(0));
+  Value *CastOp = CI->getOperand(0);
+
+  const PointerType *DestTy = cast<PointerType>(CI->getType());
+  const Type *DestPTy = DestTy->getElementType();
+  if (const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) {
+
+    // If the address spaces don't match, don't eliminate the cast.
+    if (DestTy->getAddressSpace() != SrcTy->getAddressSpace())
+      return 0;
+
+    const Type *SrcPTy = SrcTy->getElementType();
+
+    if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || 
+         DestPTy->isVectorTy()) {
+      // If the source is an array, the code below will not succeed.  Check to
+      // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
+      // constants.
+      if (const ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy))
+        if (Constant *CSrc = dyn_cast<Constant>(CastOp))
+          if (ASrcTy->getNumElements() != 0) {
+            Value *Idxs[2];
+            Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext()));
+            Idxs[1] = Idxs[0];
+            CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs, 2);
+            SrcTy = cast<PointerType>(CastOp->getType());
+            SrcPTy = SrcTy->getElementType();
+          }
+
+      if (IC.getTargetData() &&
+          (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || 
+            SrcPTy->isVectorTy()) &&
+          // Do not allow turning this into a load of an integer, which is then
+          // casted to a pointer, this pessimizes pointer analysis a lot.
+          (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
+          IC.getTargetData()->getTypeSizeInBits(SrcPTy) ==
+               IC.getTargetData()->getTypeSizeInBits(DestPTy)) {
+
+        // Okay, we are casting from one integer or pointer type to another of
+        // the same size.  Instead of casting the pointer before the load, cast
+        // the result of the loaded value.
+        LoadInst *NewLoad = 
+          IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName());
+        NewLoad->setAlignment(LI.getAlignment());
+        // Now cast the result of the load.
+        return new BitCastInst(NewLoad, LI.getType());
+      }
+    }
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
+  Value *Op = LI.getOperand(0);
+
+  // Attempt to improve the alignment.
+  if (TD) {
+    unsigned KnownAlign =
+      getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD);
+    unsigned LoadAlign = LI.getAlignment();
+    unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign :
+      TD->getABITypeAlignment(LI.getType());
+
+    if (KnownAlign > EffectiveLoadAlign)
+      LI.setAlignment(KnownAlign);
+    else if (LoadAlign == 0)
+      LI.setAlignment(EffectiveLoadAlign);
+  }
+
+  // load (cast X) --> cast (load X) iff safe.
+  if (isa<CastInst>(Op))
+    if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+      return Res;
+
+  // None of the following transforms are legal for volatile loads.
+  if (LI.isVolatile()) return 0;
+  
+  // Do really simple store-to-load forwarding and load CSE, to catch cases
+  // where there are several consecutive memory accesses to the same location,
+  // separated by a few arithmetic operations.
+  BasicBlock::iterator BBI = &LI;
+  if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6))
+    return ReplaceInstUsesWith(LI, AvailableVal);
+
+  // load(gep null, ...) -> unreachable
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) {
+    const Value *GEPI0 = GEPI->getOperand(0);
+    // TODO: Consider a target hook for valid address spaces for this xform.
+    if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){
+      // Insert a new store to null instruction before the load to indicate
+      // that this code is not reachable.  We do this instead of inserting
+      // an unreachable instruction directly because we cannot modify the
+      // CFG.
+      new StoreInst(UndefValue::get(LI.getType()),
+                    Constant::getNullValue(Op->getType()), &LI);
+      return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+    }
+  } 
+
+  // load null/undef -> unreachable
+  // TODO: Consider a target hook for valid address spaces for this xform.
+  if (isa<UndefValue>(Op) ||
+      (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) {
+    // Insert a new store to null instruction before the load to indicate that
+    // this code is not reachable.  We do this instead of inserting an
+    // unreachable instruction directly because we cannot modify the CFG.
+    new StoreInst(UndefValue::get(LI.getType()),
+                  Constant::getNullValue(Op->getType()), &LI);
+    return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType()));
+  }
+
+  // Instcombine load (constantexpr_cast global) -> cast (load global)
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op))
+    if (CE->isCast())
+      if (Instruction *Res = InstCombineLoadCast(*this, LI, TD))
+        return Res;
+  
+  if (Op->hasOneUse()) {
+    // Change select and PHI nodes to select values instead of addresses: this
+    // helps alias analysis out a lot, allows many others simplifications, and
+    // exposes redundancy in the code.
+    //
+    // Note that we cannot do the transformation unless we know that the
+    // introduced loads cannot trap!  Something like this is valid as long as
+    // the condition is always false: load (select bool %C, int* null, int* %G),
+    // but it would not be valid if we transformed it to load from null
+    // unconditionally.
+    //
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op)) {
+      // load (select (Cond, &V1, &V2))  --> select(Cond, load &V1, load &V2).
+      unsigned Align = LI.getAlignment();
+      if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) &&
+          isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) {
+        LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1),
+                                           SI->getOperand(1)->getName()+".val");
+        LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2),
+                                           SI->getOperand(2)->getName()+".val");
+        V1->setAlignment(Align);
+        V2->setAlignment(Align);
+        return SelectInst::Create(SI->getCondition(), V1, V2);
+      }
+
+      // load (select (cond, null, P)) -> load P
+      if (Constant *C = dyn_cast<Constant>(SI->getOperand(1)))
+        if (C->isNullValue()) {
+          LI.setOperand(0, SI->getOperand(2));
+          return &LI;
+        }
+
+      // load (select (cond, P, null)) -> load P
+      if (Constant *C = dyn_cast<Constant>(SI->getOperand(2)))
+        if (C->isNullValue()) {
+          LI.setOperand(0, SI->getOperand(1));
+          return &LI;
+        }
+    }
+  }
+  return 0;
+}
+
+/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P
+/// when possible.  This makes it generally easy to do alias analysis and/or
+/// SROA/mem2reg of the memory object.
+static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
+  User *CI = cast<User>(SI.getOperand(1));
+  Value *CastOp = CI->getOperand(0);
+
+  const Type *DestPTy = cast<PointerType>(CI->getType())->getElementType();
+  const PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType());
+  if (SrcTy == 0) return 0;
+  
+  const Type *SrcPTy = SrcTy->getElementType();
+
+  if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
+    return 0;
+  
+  /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
+  /// to its first element.  This allows us to handle things like:
+  ///   store i32 xxx, (bitcast {foo*, float}* %P to i32*)
+  /// on 32-bit hosts.
+  SmallVector<Value*, 4> NewGEPIndices;
+  
+  // If the source is an array, the code below will not succeed.  Check to
+  // see if a trivial 'gep P, 0, 0' will help matters.  Only do this for
+  // constants.
+  if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
+    // Index through pointer.
+    Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
+    NewGEPIndices.push_back(Zero);
+    
+    while (1) {
+      if (const StructType *STy = dyn_cast<StructType>(SrcPTy)) {
+        if (!STy->getNumElements()) /* Struct can be empty {} */
+          break;
+        NewGEPIndices.push_back(Zero);
+        SrcPTy = STy->getElementType(0);
+      } else if (const ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) {
+        NewGEPIndices.push_back(Zero);
+        SrcPTy = ATy->getElementType();
+      } else {
+        break;
+      }
+    }
+    
+    SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
+  }
+
+  if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
+    return 0;
+  
+  // If the pointers point into different address spaces or if they point to
+  // values with different sizes, we can't do the transformation.
+  if (!IC.getTargetData() ||
+      SrcTy->getAddressSpace() != 
+        cast<PointerType>(CI->getType())->getAddressSpace() ||
+      IC.getTargetData()->getTypeSizeInBits(SrcPTy) !=
+      IC.getTargetData()->getTypeSizeInBits(DestPTy))
+    return 0;
+
+  // Okay, we are casting from one integer or pointer type to another of
+  // the same size.  Instead of casting the pointer before 
+  // the store, cast the value to be stored.
+  Value *NewCast;
+  Value *SIOp0 = SI.getOperand(0);
+  Instruction::CastOps opcode = Instruction::BitCast;
+  const Type* CastSrcTy = SIOp0->getType();
+  const Type* CastDstTy = SrcPTy;
+  if (CastDstTy->isPointerTy()) {
+    if (CastSrcTy->isIntegerTy())
+      opcode = Instruction::IntToPtr;
+  } else if (CastDstTy->isIntegerTy()) {
+    if (SIOp0->getType()->isPointerTy())
+      opcode = Instruction::PtrToInt;
+  }
+  
+  // SIOp0 is a pointer to aggregate and this is a store to the first field,
+  // emit a GEP to index into its first field.
+  if (!NewGEPIndices.empty())
+    CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices.begin(),
+                                           NewGEPIndices.end());
+  
+  NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy,
+                                   SIOp0->getName()+".c");
+  SI.setOperand(0, NewCast);
+  SI.setOperand(1, CastOp);
+  return &SI;
+}
+
+/// equivalentAddressValues - Test if A and B will obviously have the same
+/// value. This includes recognizing that %t0 and %t1 will have the same
+/// value in code like this:
+///   %t0 = getelementptr \@a, 0, 3
+///   store i32 0, i32* %t0
+///   %t1 = getelementptr \@a, 0, 3
+///   %t2 = load i32* %t1
+///
+static bool equivalentAddressValues(Value *A, Value *B) {
+  // Test if the values are trivially equivalent.
+  if (A == B) return true;
+  
+  // Test if the values come form identical arithmetic instructions.
+  // This uses isIdenticalToWhenDefined instead of isIdenticalTo because
+  // its only used to compare two uses within the same basic block, which
+  // means that they'll always either have the same value or one of them
+  // will have an undefined value.
+  if (isa<BinaryOperator>(A) ||
+      isa<CastInst>(A) ||
+      isa<PHINode>(A) ||
+      isa<GetElementPtrInst>(A))
+    if (Instruction *BI = dyn_cast<Instruction>(B))
+      if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
+        return true;
+  
+  // Otherwise they may not be equivalent.
+  return false;
+}
+
+// If this instruction has two uses, one of which is a llvm.dbg.declare,
+// return the llvm.dbg.declare.
+DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) {
+  if (!V->hasNUses(2))
+    return 0;
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    User *U = *UI;
+    if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U))
+      return DI;
+    if (isa<BitCastInst>(U) && U->hasOneUse()) {
+      if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(*U->use_begin()))
+        return DI;
+      }
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
+  Value *Val = SI.getOperand(0);
+  Value *Ptr = SI.getOperand(1);
+
+  // If the RHS is an alloca with a single use, zapify the store, making the
+  // alloca dead.
+  // If the RHS is an alloca with a two uses, the other one being a 
+  // llvm.dbg.declare, zapify the store and the declare, making the
+  // alloca dead.  We must do this to prevent declares from affecting
+  // codegen.
+  if (!SI.isVolatile()) {
+    if (Ptr->hasOneUse()) {
+      if (isa<AllocaInst>(Ptr)) 
+        return EraseInstFromFunction(SI);
+      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
+        if (isa<AllocaInst>(GEP->getOperand(0))) {
+          if (GEP->getOperand(0)->hasOneUse())
+            return EraseInstFromFunction(SI);
+          if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) {
+            EraseInstFromFunction(*DI);
+            return EraseInstFromFunction(SI);
+          }
+        }
+      }
+    }
+    if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) {
+      EraseInstFromFunction(*DI);
+      return EraseInstFromFunction(SI);
+    }
+  }
+
+  // Attempt to improve the alignment.
+  if (TD) {
+    unsigned KnownAlign =
+      getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()),
+                                 TD);
+    unsigned StoreAlign = SI.getAlignment();
+    unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign :
+      TD->getABITypeAlignment(Val->getType());
+
+    if (KnownAlign > EffectiveStoreAlign)
+      SI.setAlignment(KnownAlign);
+    else if (StoreAlign == 0)
+      SI.setAlignment(EffectiveStoreAlign);
+  }
+
+  // Do really simple DSE, to catch cases where there are several consecutive
+  // stores to the same location, separated by a few arithmetic operations. This
+  // situation often occurs with bitfield accesses.
+  BasicBlock::iterator BBI = &SI;
+  for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts;
+       --ScanInsts) {
+    --BBI;
+    // Don't count debug info directives, lest they affect codegen,
+    // and we skip pointer-to-pointer bitcasts, which are NOPs.
+    if (isa<DbgInfoIntrinsic>(BBI) ||
+        (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
+      ScanInsts++;
+      continue;
+    }    
+    
+    if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) {
+      // Prev store isn't volatile, and stores to the same location?
+      if (!PrevSI->isVolatile() &&equivalentAddressValues(PrevSI->getOperand(1),
+                                                          SI.getOperand(1))) {
+        ++NumDeadStore;
+        ++BBI;
+        EraseInstFromFunction(*PrevSI);
+        continue;
+      }
+      break;
+    }
+    
+    // If this is a load, we have to stop.  However, if the loaded value is from
+    // the pointer we're loading and is producing the pointer we're storing,
+    // then *this* store is dead (X = load P; store X -> P).
+    if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+      if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) &&
+          !SI.isVolatile())
+        return EraseInstFromFunction(SI);
+      
+      // Otherwise, this is a load from some other location.  Stores before it
+      // may not be dead.
+      break;
+    }
+    
+    // Don't skip over loads or things that can modify memory.
+    if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory())
+      break;
+  }
+  
+  
+  if (SI.isVolatile()) return 0;  // Don't hack volatile stores.
+
+  // store X, null    -> turns into 'unreachable' in SimplifyCFG
+  if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) {
+    if (!isa<UndefValue>(Val)) {
+      SI.setOperand(0, UndefValue::get(Val->getType()));
+      if (Instruction *U = dyn_cast<Instruction>(Val))
+        Worklist.Add(U);  // Dropped a use.
+    }
+    return 0;  // Do not modify these!
+  }
+
+  // store undef, Ptr -> noop
+  if (isa<UndefValue>(Val))
+    return EraseInstFromFunction(SI);
+
+  // If the pointer destination is a cast, see if we can fold the cast into the
+  // source instead.
+  if (isa<CastInst>(Ptr))
+    if (Instruction *Res = InstCombineStoreToCast(*this, SI))
+      return Res;
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
+    if (CE->isCast())
+      if (Instruction *Res = InstCombineStoreToCast(*this, SI))
+        return Res;
+
+  
+  // If this store is the last instruction in the basic block (possibly
+  // excepting debug info instructions), and if the block ends with an
+  // unconditional branch, try to move it to the successor block.
+  BBI = &SI; 
+  do {
+    ++BBI;
+  } while (isa<DbgInfoIntrinsic>(BBI) ||
+           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
+  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
+    if (BI->isUnconditional())
+      if (SimplifyStoreAtEndOfBlock(SI))
+        return 0;  // xform done!
+  
+  return 0;
+}
+
+/// SimplifyStoreAtEndOfBlock - Turn things like:
+///   if () { *P = v1; } else { *P = v2 }
+/// into a phi node with a store in the successor.
+///
+/// Simplify things like:
+///   *P = v1; if () { *P = v2; }
+/// into a phi node with a store in the successor.
+///
+bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
+  BasicBlock *StoreBB = SI.getParent();
+  
+  // Check to see if the successor block has exactly two incoming edges.  If
+  // so, see if the other predecessor contains a store to the same location.
+  // if so, insert a PHI node (if needed) and move the stores down.
+  BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0);
+  
+  // Determine whether Dest has exactly two predecessors and, if so, compute
+  // the other predecessor.
+  pred_iterator PI = pred_begin(DestBB);
+  BasicBlock *P = *PI;
+  BasicBlock *OtherBB = 0;
+
+  if (P != StoreBB)
+    OtherBB = P;
+
+  if (++PI == pred_end(DestBB))
+    return false;
+  
+  P = *PI;
+  if (P != StoreBB) {
+    if (OtherBB)
+      return false;
+    OtherBB = P;
+  }
+  if (++PI != pred_end(DestBB))
+    return false;
+
+  // Bail out if all the relevant blocks aren't distinct (this can happen,
+  // for example, if SI is in an infinite loop)
+  if (StoreBB == DestBB || OtherBB == DestBB)
+    return false;
+
+  // Verify that the other block ends in a branch and is not otherwise empty.
+  BasicBlock::iterator BBI = OtherBB->getTerminator();
+  BranchInst *OtherBr = dyn_cast<BranchInst>(BBI);
+  if (!OtherBr || BBI == OtherBB->begin())
+    return false;
+  
+  // If the other block ends in an unconditional branch, check for the 'if then
+  // else' case.  there is an instruction before the branch.
+  StoreInst *OtherStore = 0;
+  if (OtherBr->isUnconditional()) {
+    --BBI;
+    // Skip over debugging info.
+    while (isa<DbgInfoIntrinsic>(BBI) ||
+           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
+      if (BBI==OtherBB->begin())
+        return false;
+      --BBI;
+    }
+    // If this isn't a store, isn't a store to the same location, or if the
+    // alignments differ, bail out.
+    OtherStore = dyn_cast<StoreInst>(BBI);
+    if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
+        OtherStore->getAlignment() != SI.getAlignment())
+      return false;
+  } else {
+    // Otherwise, the other block ended with a conditional branch. If one of the
+    // destinations is StoreBB, then we have the if/then case.
+    if (OtherBr->getSuccessor(0) != StoreBB && 
+        OtherBr->getSuccessor(1) != StoreBB)
+      return false;
+    
+    // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an
+    // if/then triangle.  See if there is a store to the same ptr as SI that
+    // lives in OtherBB.
+    for (;; --BBI) {
+      // Check to see if we find the matching store.
+      if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
+        if (OtherStore->getOperand(1) != SI.getOperand(1) ||
+            OtherStore->getAlignment() != SI.getAlignment())
+          return false;
+        break;
+      }
+      // If we find something that may be using or overwriting the stored
+      // value, or if we run out of instructions, we can't do the xform.
+      if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() ||
+          BBI == OtherBB->begin())
+        return false;
+    }
+    
+    // In order to eliminate the store in OtherBr, we have to
+    // make sure nothing reads or overwrites the stored value in
+    // StoreBB.
+    for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) {
+      // FIXME: This should really be AA driven.
+      if (I->mayReadFromMemory() || I->mayWriteToMemory())
+        return false;
+    }
+  }
+  
+  // Insert a PHI node now if we need it.
+  Value *MergedVal = OtherStore->getOperand(0);
+  if (MergedVal != SI.getOperand(0)) {
+    PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge");
+    PN->reserveOperandSpace(2);
+    PN->addIncoming(SI.getOperand(0), SI.getParent());
+    PN->addIncoming(OtherStore->getOperand(0), OtherBB);
+    MergedVal = InsertNewInstBefore(PN, DestBB->front());
+  }
+  
+  // Advance to a place where it is safe to insert the new store and
+  // insert it.
+  BBI = DestBB->getFirstNonPHI();
+  InsertNewInstBefore(new StoreInst(MergedVal, SI.getOperand(1),
+                                    OtherStore->isVolatile(),
+                                    SI.getAlignment()), *BBI);
+  
+  // Nuke the old stores.
+  EraseInstFromFunction(SI);
+  EraseInstFromFunction(*OtherStore);
+  return true;
+}
diff --git a/final/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/final/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
new file mode 100644
index 00000000000..d1a1fd6ddfa
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -0,0 +1,622 @@
+//===- InstCombineMulDivRem.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visit functions for mul, fmul, sdiv, udiv, fdiv,
+// srem, urem, frem.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// MultiplyOverflows - True if the multiply can not be expressed in an int
+/// this size.
+static bool MultiplyOverflows(ConstantInt *C1, ConstantInt *C2, bool sign) {
+  uint32_t W = C1->getBitWidth();
+  APInt LHSExt = C1->getValue(), RHSExt = C2->getValue();
+  if (sign) {
+    LHSExt = LHSExt.sext(W * 2);
+    RHSExt = RHSExt.sext(W * 2);
+  } else {
+    LHSExt = LHSExt.zext(W * 2);
+    RHSExt = RHSExt.zext(W * 2);
+  }
+  
+  APInt MulExt = LHSExt * RHSExt;
+  
+  if (!sign)
+    return MulExt.ugt(APInt::getLowBitsSet(W * 2, W));
+  
+  APInt Min = APInt::getSignedMinValue(W).sext(W * 2);
+  APInt Max = APInt::getSignedMaxValue(W).sext(W * 2);
+  return MulExt.slt(Min) || MulExt.sgt(Max);
+}
+
+Instruction *InstCombiner::visitMul(BinaryOperator &I) {
+  bool Changed = SimplifyAssociativeOrCommutative(I);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyMulInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  if (Value *V = SimplifyUsingDistributiveLaws(I))
+    return ReplaceInstUsesWith(I, V);
+
+  if (match(Op1, m_AllOnes()))  // X * -1 == 0 - X
+    return BinaryOperator::CreateNeg(Op0, I.getName());
+  
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+    
+    // ((X << C1)*C2) == (X * (C2 << C1))
+    if (BinaryOperator *SI = dyn_cast<BinaryOperator>(Op0))
+      if (SI->getOpcode() == Instruction::Shl)
+        if (Constant *ShOp = dyn_cast<Constant>(SI->getOperand(1)))
+          return BinaryOperator::CreateMul(SI->getOperand(0),
+                                           ConstantExpr::getShl(CI, ShOp));
+    
+    const APInt &Val = CI->getValue();
+    if (Val.isPowerOf2()) {          // Replace X*(2^C) with X << C
+      Constant *NewCst = ConstantInt::get(Op0->getType(), Val.logBase2());
+      BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, NewCst);
+      if (I.hasNoSignedWrap()) Shl->setHasNoSignedWrap();
+      if (I.hasNoUnsignedWrap()) Shl->setHasNoUnsignedWrap();
+      return Shl;
+    }
+    
+    // Canonicalize (X+C1)*CI -> X*CI+C1*CI.
+    { Value *X; ConstantInt *C1;
+      if (Op0->hasOneUse() &&
+          match(Op0, m_Add(m_Value(X), m_ConstantInt(C1)))) {
+        Value *Add = Builder->CreateMul(X, CI, "tmp");
+        return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, CI));
+      }
+    }
+  }
+  
+  // Simplify mul instructions with a constant RHS.
+  if (isa<Constant>(Op1)) {    
+    // Try to fold constant mul into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  if (Value *Op0v = dyn_castNegVal(Op0))     // -X * -Y = X*Y
+    if (Value *Op1v = dyn_castNegVal(Op1))
+      return BinaryOperator::CreateMul(Op0v, Op1v);
+
+  // (X / Y) *  Y = X - (X % Y)
+  // (X / Y) * -Y = (X % Y) - X
+  {
+    Value *Op1C = Op1;
+    BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0);
+    if (!BO ||
+        (BO->getOpcode() != Instruction::UDiv && 
+         BO->getOpcode() != Instruction::SDiv)) {
+      Op1C = Op0;
+      BO = dyn_cast<BinaryOperator>(Op1);
+    }
+    Value *Neg = dyn_castNegVal(Op1C);
+    if (BO && BO->hasOneUse() &&
+        (BO->getOperand(1) == Op1C || BO->getOperand(1) == Neg) &&
+        (BO->getOpcode() == Instruction::UDiv ||
+         BO->getOpcode() == Instruction::SDiv)) {
+      Value *Op0BO = BO->getOperand(0), *Op1BO = BO->getOperand(1);
+
+      // If the division is exact, X % Y is zero, so we end up with X or -X.
+      if (PossiblyExactOperator *SDiv = dyn_cast<PossiblyExactOperator>(BO))
+        if (SDiv->isExact()) {
+          if (Op1BO == Op1C)
+            return ReplaceInstUsesWith(I, Op0BO);
+          return BinaryOperator::CreateNeg(Op0BO);
+        }
+
+      Value *Rem;
+      if (BO->getOpcode() == Instruction::UDiv)
+        Rem = Builder->CreateURem(Op0BO, Op1BO);
+      else
+        Rem = Builder->CreateSRem(Op0BO, Op1BO);
+      Rem->takeName(BO);
+
+      if (Op1BO == Op1C)
+        return BinaryOperator::CreateSub(Op0BO, Rem);
+      return BinaryOperator::CreateSub(Rem, Op0BO);
+    }
+  }
+
+  /// i1 mul -> i1 and.
+  if (I.getType()->isIntegerTy(1))
+    return BinaryOperator::CreateAnd(Op0, Op1);
+
+  // X*(1 << Y) --> X << Y
+  // (1 << Y)*X --> X << Y
+  {
+    Value *Y;
+    if (match(Op0, m_Shl(m_One(), m_Value(Y))))
+      return BinaryOperator::CreateShl(Op1, Y);
+    if (match(Op1, m_Shl(m_One(), m_Value(Y))))
+      return BinaryOperator::CreateShl(Op0, Y);
+  }
+  
+  // If one of the operands of the multiply is a cast from a boolean value, then
+  // we know the bool is either zero or one, so this is a 'masking' multiply.
+  //   X * Y (where Y is 0 or 1) -> X & (0-Y)
+  if (!I.getType()->isVectorTy()) {
+    // -2 is "-1 << 1" so it is all bits set except the low one.
+    APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
+    
+    Value *BoolCast = 0, *OtherOp = 0;
+    if (MaskedValueIsZero(Op0, Negative2))
+      BoolCast = Op0, OtherOp = Op1;
+    else if (MaskedValueIsZero(Op1, Negative2))
+      BoolCast = Op1, OtherOp = Op0;
+
+    if (BoolCast) {
+      Value *V = Builder->CreateSub(Constant::getNullValue(I.getType()),
+                                    BoolCast, "tmp");
+      return BinaryOperator::CreateAnd(V, OtherOp);
+    }
+  }
+
+  return Changed ? &I : 0;
+}
+
+Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
+  bool Changed = SimplifyAssociativeOrCommutative(I);
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // Simplify mul instructions with a constant RHS...
+  if (Constant *Op1C = dyn_cast<Constant>(Op1)) {
+    if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1C)) {
+      // "In IEEE floating point, x*1 is not equivalent to x for nans.  However,
+      // ANSI says we can drop signals, so we can do this anyway." (from GCC)
+      if (Op1F->isExactlyValue(1.0))
+        return ReplaceInstUsesWith(I, Op0);  // Eliminate 'fmul double %X, 1.0'
+    } else if (Op1C->getType()->isVectorTy()) {
+      if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
+        // As above, vector X*splat(1.0) -> X in all defined cases.
+        if (Constant *Splat = Op1V->getSplatValue()) {
+          if (ConstantFP *F = dyn_cast<ConstantFP>(Splat))
+            if (F->isExactlyValue(1.0))
+              return ReplaceInstUsesWith(I, Op0);
+        }
+      }
+    }
+
+    // Try to fold constant mul into select arguments.
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+
+    if (isa<PHINode>(Op0))
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+  }
+
+  if (Value *Op0v = dyn_castFNegVal(Op0))     // -X * -Y = X*Y
+    if (Value *Op1v = dyn_castFNegVal(Op1))
+      return BinaryOperator::CreateFMul(Op0v, Op1v);
+
+  return Changed ? &I : 0;
+}
+
+/// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select
+/// instruction.
+bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) {
+  SelectInst *SI = cast<SelectInst>(I.getOperand(1));
+  
+  // div/rem X, (Cond ? 0 : Y) -> div/rem X, Y
+  int NonNullOperand = -1;
+  if (Constant *ST = dyn_cast<Constant>(SI->getOperand(1)))
+    if (ST->isNullValue())
+      NonNullOperand = 2;
+  // div/rem X, (Cond ? Y : 0) -> div/rem X, Y
+  if (Constant *ST = dyn_cast<Constant>(SI->getOperand(2)))
+    if (ST->isNullValue())
+      NonNullOperand = 1;
+  
+  if (NonNullOperand == -1)
+    return false;
+  
+  Value *SelectCond = SI->getOperand(0);
+  
+  // Change the div/rem to use 'Y' instead of the select.
+  I.setOperand(1, SI->getOperand(NonNullOperand));
+  
+  // Okay, we know we replace the operand of the div/rem with 'Y' with no
+  // problem.  However, the select, or the condition of the select may have
+  // multiple uses.  Based on our knowledge that the operand must be non-zero,
+  // propagate the known value for the select into other uses of it, and
+  // propagate a known value of the condition into its other users.
+  
+  // If the select and condition only have a single use, don't bother with this,
+  // early exit.
+  if (SI->use_empty() && SelectCond->hasOneUse())
+    return true;
+  
+  // Scan the current block backward, looking for other uses of SI.
+  BasicBlock::iterator BBI = &I, BBFront = I.getParent()->begin();
+  
+  while (BBI != BBFront) {
+    --BBI;
+    // If we found a call to a function, we can't assume it will return, so
+    // information from below it cannot be propagated above it.
+    if (isa<CallInst>(BBI) && !isa<IntrinsicInst>(BBI))
+      break;
+    
+    // Replace uses of the select or its condition with the known values.
+    for (Instruction::op_iterator I = BBI->op_begin(), E = BBI->op_end();
+         I != E; ++I) {
+      if (*I == SI) {
+        *I = SI->getOperand(NonNullOperand);
+        Worklist.Add(BBI);
+      } else if (*I == SelectCond) {
+        *I = NonNullOperand == 1 ? ConstantInt::getTrue(BBI->getContext()) :
+                                   ConstantInt::getFalse(BBI->getContext());
+        Worklist.Add(BBI);
+      }
+    }
+    
+    // If we past the instruction, quit looking for it.
+    if (&*BBI == SI)
+      SI = 0;
+    if (&*BBI == SelectCond)
+      SelectCond = 0;
+    
+    // If we ran out of things to eliminate, break out of the loop.
+    if (SelectCond == 0 && SI == 0)
+      break;
+    
+  }
+  return true;
+}
+
+
+/// This function implements the transforms common to both integer division
+/// instructions (udiv and sdiv). It is called by the visitors to those integer
+/// division instructions.
+/// @brief Common integer divide transforms
+Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // Handle cases involving: [su]div X, (select Cond, Y, Z)
+  // This does not apply for fdiv.
+  if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+    return &I;
+
+  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+    // (X / C1) / C2  -> X / (C1*C2)
+    if (Instruction *LHS = dyn_cast<Instruction>(Op0))
+      if (Instruction::BinaryOps(LHS->getOpcode()) == I.getOpcode())
+        if (ConstantInt *LHSRHS = dyn_cast<ConstantInt>(LHS->getOperand(1))) {
+          if (MultiplyOverflows(RHS, LHSRHS,
+                                I.getOpcode()==Instruction::SDiv))
+            return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+          return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
+                                        ConstantExpr::getMul(RHS, LHSRHS));
+        }
+
+    if (!RHS->isZero()) { // avoid X udiv 0
+      if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+        if (Instruction *R = FoldOpIntoSelect(I, SI))
+          return R;
+      if (isa<PHINode>(Op0))
+        if (Instruction *NV = FoldOpIntoPhi(I))
+          return NV;
+    }
+  }
+
+  // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y
+  Value *X = 0, *Z = 0;
+  if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1
+    bool isSigned = I.getOpcode() == Instruction::SDiv;
+    if ((isSigned && match(Z, m_SRem(m_Specific(X), m_Specific(Op1)))) ||
+        (!isSigned && match(Z, m_URem(m_Specific(X), m_Specific(Op1)))))
+      return BinaryOperator::Create(I.getOpcode(), X, Op1);
+  }
+
+  return 0;
+}
+
+Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyUDivInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // Handle the integer div common cases
+  if (Instruction *Common = commonIDivTransforms(I))
+    return Common;
+
+  if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
+    // X udiv 2^C -> X >> C
+    // Check to see if this is an unsigned division with an exact power of 2,
+    // if so, convert to a right shift.
+    if (C->getValue().isPowerOf2()) { // 0 not included in isPowerOf2
+      BinaryOperator *LShr =
+        BinaryOperator::CreateLShr(Op0, 
+            ConstantInt::get(Op0->getType(), C->getValue().logBase2()));
+      if (I.isExact()) LShr->setIsExact();
+      return LShr;
+    }
+
+    // X udiv C, where C >= signbit
+    if (C->getValue().isNegative()) {
+      Value *IC = Builder->CreateICmpULT(Op0, C);
+      return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
+                                ConstantInt::get(I.getType(), 1));
+    }
+  }
+
+  // X udiv (C1 << N), where C1 is "1<<C2"  -->  X >> (N+C2)
+  { const APInt *CI; Value *N;
+    if (match(Op1, m_Shl(m_Power2(CI), m_Value(N)))) {
+      if (*CI != 1)
+        N = Builder->CreateAdd(N, ConstantInt::get(I.getType(), CI->logBase2()),
+                               "tmp");
+      if (I.isExact())
+        return BinaryOperator::CreateExactLShr(Op0, N);
+      return BinaryOperator::CreateLShr(Op0, N);
+    }
+  }
+  
+  // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
+  // where C1&C2 are powers of two.
+  { Value *Cond; const APInt *C1, *C2;
+    if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
+      // Construct the "on true" case of the select
+      Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
+                                       I.isExact());
+  
+      // Construct the "on false" case of the select
+      Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
+                                       I.isExact());
+      
+      // construct the select instruction and return it.
+      return SelectInst::Create(Cond, TSI, FSI);
+    }
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifySDivInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  // Handle the integer div common cases
+  if (Instruction *Common = commonIDivTransforms(I))
+    return Common;
+
+  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+    // sdiv X, -1 == -X
+    if (RHS->isAllOnesValue())
+      return BinaryOperator::CreateNeg(Op0);
+
+    // sdiv X, C  -->  ashr exact X, log2(C)
+    if (I.isExact() && RHS->getValue().isNonNegative() &&
+        RHS->getValue().isPowerOf2()) {
+      Value *ShAmt = llvm::ConstantInt::get(RHS->getType(),
+                                            RHS->getValue().exactLogBase2());
+      return BinaryOperator::CreateExactAShr(Op0, ShAmt, I.getName());
+    }
+
+    // -X/C  -->  X/-C  provided the negation doesn't overflow.
+    if (SubOperator *Sub = dyn_cast<SubOperator>(Op0))
+      if (match(Sub->getOperand(0), m_Zero()) && Sub->hasNoSignedWrap())
+        return BinaryOperator::CreateSDiv(Sub->getOperand(1),
+                                          ConstantExpr::getNeg(RHS));
+  }
+
+  // If the sign bits of both operands are zero (i.e. we can prove they are
+  // unsigned inputs), turn this into a udiv.
+  if (I.getType()->isIntegerTy()) {
+    APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+    if (MaskedValueIsZero(Op0, Mask)) {
+      if (MaskedValueIsZero(Op1, Mask)) {
+        // X sdiv Y -> X udiv Y, iff X and Y don't have sign bit set
+        return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+      }
+      
+      if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
+        // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
+        // Safe because the only negative value (1 << Y) can take on is
+        // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
+        // the sign bit set.
+        return BinaryOperator::CreateUDiv(Op0, Op1, I.getName());
+      }
+    }
+  }
+  
+  return 0;
+}
+
+Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Value *V = SimplifyFDivInst(Op0, Op1, TD))
+    return ReplaceInstUsesWith(I, V);
+
+  return 0;
+}
+
+/// This function implements the transforms on rem instructions that work
+/// regardless of the kind of rem instruction it is (urem, srem, or frem). It 
+/// is used by the visitors to those instructions.
+/// @brief Transforms common to all three rem instructions
+Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (isa<UndefValue>(Op0)) {             // undef % X -> 0
+    if (I.getType()->isFPOrFPVectorTy())
+      return ReplaceInstUsesWith(I, Op0);  // X % undef -> undef (could be SNaN)
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+  }
+  if (isa<UndefValue>(Op1))
+    return ReplaceInstUsesWith(I, Op1);  // X % undef -> undef
+
+  // Handle cases involving: rem X, (select Cond, Y, Z)
+  if (isa<SelectInst>(Op1) && SimplifyDivRemOfSelect(I))
+    return &I;
+
+  return 0;
+}
+
+/// This function implements the transforms common to both integer remainder
+/// instructions (urem and srem). It is called by the visitors to those integer
+/// remainder instructions.
+/// @brief Common integer remainder transforms
+Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Instruction *common = commonRemTransforms(I))
+    return common;
+
+  // X % X == 0
+  if (Op0 == Op1)
+    return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+  // 0 % X == 0 for integer, we don't need to preserve faults!
+  if (Constant *LHS = dyn_cast<Constant>(Op0))
+    if (LHS->isNullValue())
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+  if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
+    // X % 0 == undef, we don't need to preserve faults!
+    if (RHS->equalsInt(0))
+      return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
+    
+    if (RHS->equalsInt(1))  // X % 1 == 0
+      return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
+    if (Instruction *Op0I = dyn_cast<Instruction>(Op0)) {
+      if (SelectInst *SI = dyn_cast<SelectInst>(Op0I)) {
+        if (Instruction *R = FoldOpIntoSelect(I, SI))
+          return R;
+      } else if (isa<PHINode>(Op0I)) {
+        if (Instruction *NV = FoldOpIntoPhi(I))
+          return NV;
+      }
+
+      // See if we can fold away this rem instruction.
+      if (SimplifyDemandedInstructionBits(I))
+        return &I;
+    }
+  }
+
+  return 0;
+}
+
+Instruction *InstCombiner::visitURem(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (Instruction *common = commonIRemTransforms(I))
+    return common;
+  
+  // X urem C^2 -> X and C-1
+  { const APInt *C;
+    if (match(Op1, m_Power2(C)))
+      return BinaryOperator::CreateAnd(Op0,
+                                       ConstantInt::get(I.getType(), *C-1));
+  }
+
+  // Turn A % (C << N), where C is 2^k, into A & ((C << N)-1)  
+  if (match(Op1, m_Shl(m_Power2(), m_Value()))) {
+    Constant *N1 = Constant::getAllOnesValue(I.getType());
+    Value *Add = Builder->CreateAdd(Op1, N1, "tmp");
+    return BinaryOperator::CreateAnd(Op0, Add);
+  }
+
+  // urem X, (select Cond, 2^C1, 2^C2) -->
+  //    select Cond, (and X, C1-1), (and X, C2-1)
+  // when C1&C2 are powers of two.
+  { Value *Cond; const APInt *C1, *C2;
+    if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
+      Value *TrueAnd = Builder->CreateAnd(Op0, *C1-1, Op1->getName()+".t");
+      Value *FalseAnd = Builder->CreateAnd(Op0, *C2-1, Op1->getName()+".f");
+      return SelectInst::Create(Cond, TrueAnd, FalseAnd);
+    }
+  }
+  
+  return 0;
+}
+
+Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // Handle the integer rem common cases
+  if (Instruction *Common = commonIRemTransforms(I))
+    return Common;
+  
+  if (Value *RHSNeg = dyn_castNegVal(Op1))
+    if (!isa<Constant>(RHSNeg) ||
+        (isa<ConstantInt>(RHSNeg) &&
+         cast<ConstantInt>(RHSNeg)->getValue().isStrictlyPositive())) {
+      // X % -Y -> X % Y
+      Worklist.AddValue(I.getOperand(1));
+      I.setOperand(1, RHSNeg);
+      return &I;
+    }
+
+  // If the sign bits of both operands are zero (i.e. we can prove they are
+  // unsigned inputs), turn this into a urem.
+  if (I.getType()->isIntegerTy()) {
+    APInt Mask(APInt::getSignBit(I.getType()->getPrimitiveSizeInBits()));
+    if (MaskedValueIsZero(Op1, Mask) && MaskedValueIsZero(Op0, Mask)) {
+      // X srem Y -> X urem Y, iff X and Y don't have sign bit set
+      return BinaryOperator::CreateURem(Op0, Op1, I.getName());
+    }
+  }
+
+  // If it's a constant vector, flip any negative values positive.
+  if (ConstantVector *RHSV = dyn_cast<ConstantVector>(Op1)) {
+    unsigned VWidth = RHSV->getNumOperands();
+
+    bool hasNegative = false;
+    for (unsigned i = 0; !hasNegative && i != VWidth; ++i)
+      if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i)))
+        if (RHS->getValue().isNegative())
+          hasNegative = true;
+
+    if (hasNegative) {
+      std::vector<Constant *> Elts(VWidth);
+      for (unsigned i = 0; i != VWidth; ++i) {
+        if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
+          if (RHS->getValue().isNegative())
+            Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
+          else
+            Elts[i] = RHS;
+        }
+      }
+
+      Constant *NewRHSV = ConstantVector::get(Elts);
+      if (NewRHSV != RHSV) {
+        Worklist.AddValue(I.getOperand(1));
+        I.setOperand(1, NewRHSV);
+        return &I;
+      }
+    }
+  }
+
+  return 0;
+}
+
+Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
+  return commonRemTransforms(I);
+}
+
diff --git a/final/lib/Transforms/InstCombine/InstCombinePHI.cpp b/final/lib/Transforms/InstCombine/InstCombinePHI.cpp
new file mode 100644
index 00000000000..297a18c40a9
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -0,0 +1,891 @@
+//===- InstCombinePHI.cpp -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitPHINode function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+/// FoldPHIArgBinOpIntoPHI - If we have something like phi [add (a,b), add(a,c)]
+/// and if a/b/c and the add's all have a single use, turn this into a phi
+/// and a single binop.
+Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
+  Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+  assert(isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst));
+  unsigned Opc = FirstInst->getOpcode();
+  Value *LHSVal = FirstInst->getOperand(0);
+  Value *RHSVal = FirstInst->getOperand(1);
+    
+  const Type *LHSType = LHSVal->getType();
+  const Type *RHSType = RHSVal->getType();
+  
+  bool isNUW = false, isNSW = false, isExact = false;
+  if (OverflowingBinaryOperator *BO =
+        dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
+    isNUW = BO->hasNoUnsignedWrap();
+    isNSW = BO->hasNoSignedWrap();
+  } else if (PossiblyExactOperator *PEO =
+               dyn_cast<PossiblyExactOperator>(FirstInst))
+    isExact = PEO->isExact();
+  
+  // Scan to see if all operands are the same opcode, and all have one use.
+  for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+    Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+    if (!I || I->getOpcode() != Opc || !I->hasOneUse() ||
+        // Verify type of the LHS matches so we don't fold cmp's of different
+        // types.
+        I->getOperand(0)->getType() != LHSType ||
+        I->getOperand(1)->getType() != RHSType)
+      return 0;
+
+    // If they are CmpInst instructions, check their predicates
+    if (CmpInst *CI = dyn_cast<CmpInst>(I))
+      if (CI->getPredicate() != cast<CmpInst>(FirstInst)->getPredicate())
+        return 0;
+    
+    if (isNUW)
+      isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
+    if (isNSW)
+      isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    if (isExact)
+      isExact = cast<PossiblyExactOperator>(I)->isExact();
+    
+    // Keep track of which operand needs a phi node.
+    if (I->getOperand(0) != LHSVal) LHSVal = 0;
+    if (I->getOperand(1) != RHSVal) RHSVal = 0;
+  }
+
+  // If both LHS and RHS would need a PHI, don't do this transformation,
+  // because it would increase the number of PHIs entering the block,
+  // which leads to higher register pressure. This is especially
+  // bad when the PHIs are in the header of a loop.
+  if (!LHSVal && !RHSVal)
+    return 0;
+  
+  // Otherwise, this is safe to transform!
+  
+  Value *InLHS = FirstInst->getOperand(0);
+  Value *InRHS = FirstInst->getOperand(1);
+  PHINode *NewLHS = 0, *NewRHS = 0;
+  if (LHSVal == 0) {
+    NewLHS = PHINode::Create(LHSType,
+                             FirstInst->getOperand(0)->getName() + ".pn");
+    NewLHS->reserveOperandSpace(PN.getNumOperands()/2);
+    NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0));
+    InsertNewInstBefore(NewLHS, PN);
+    LHSVal = NewLHS;
+  }
+  
+  if (RHSVal == 0) {
+    NewRHS = PHINode::Create(RHSType,
+                             FirstInst->getOperand(1)->getName() + ".pn");
+    NewRHS->reserveOperandSpace(PN.getNumOperands()/2);
+    NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0));
+    InsertNewInstBefore(NewRHS, PN);
+    RHSVal = NewRHS;
+  }
+  
+  // Add all operands to the new PHIs.
+  if (NewLHS || NewRHS) {
+    for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+      Instruction *InInst = cast<Instruction>(PN.getIncomingValue(i));
+      if (NewLHS) {
+        Value *NewInLHS = InInst->getOperand(0);
+        NewLHS->addIncoming(NewInLHS, PN.getIncomingBlock(i));
+      }
+      if (NewRHS) {
+        Value *NewInRHS = InInst->getOperand(1);
+        NewRHS->addIncoming(NewInRHS, PN.getIncomingBlock(i));
+      }
+    }
+  }
+    
+  if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst))
+    return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+                           LHSVal, RHSVal);
+  
+  BinaryOperator *BinOp = cast<BinaryOperator>(FirstInst);
+  BinaryOperator *NewBinOp =
+    BinaryOperator::Create(BinOp->getOpcode(), LHSVal, RHSVal);
+  if (isNUW) NewBinOp->setHasNoUnsignedWrap();
+  if (isNSW) NewBinOp->setHasNoSignedWrap();
+  if (isExact) NewBinOp->setIsExact();
+  return NewBinOp;
+}
+
+Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
+  GetElementPtrInst *FirstInst =cast<GetElementPtrInst>(PN.getIncomingValue(0));
+  
+  SmallVector<Value*, 16> FixedOperands(FirstInst->op_begin(), 
+                                        FirstInst->op_end());
+  // This is true if all GEP bases are allocas and if all indices into them are
+  // constants.
+  bool AllBasePointersAreAllocas = true;
+
+  // We don't want to replace this phi if the replacement would require
+  // more than one phi, which leads to higher register pressure. This is
+  // especially bad when the PHIs are in the header of a loop.
+  bool NeededPhi = false;
+  
+  bool AllInBounds = true;
+  
+  // Scan to see if all operands are the same opcode, and all have one use.
+  for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+    GetElementPtrInst *GEP= dyn_cast<GetElementPtrInst>(PN.getIncomingValue(i));
+    if (!GEP || !GEP->hasOneUse() || GEP->getType() != FirstInst->getType() ||
+      GEP->getNumOperands() != FirstInst->getNumOperands())
+      return 0;
+
+    AllInBounds &= GEP->isInBounds();
+    
+    // Keep track of whether or not all GEPs are of alloca pointers.
+    if (AllBasePointersAreAllocas &&
+        (!isa<AllocaInst>(GEP->getOperand(0)) ||
+         !GEP->hasAllConstantIndices()))
+      AllBasePointersAreAllocas = false;
+    
+    // Compare the operand lists.
+    for (unsigned op = 0, e = FirstInst->getNumOperands(); op != e; ++op) {
+      if (FirstInst->getOperand(op) == GEP->getOperand(op))
+        continue;
+      
+      // Don't merge two GEPs when two operands differ (introducing phi nodes)
+      // if one of the PHIs has a constant for the index.  The index may be
+      // substantially cheaper to compute for the constants, so making it a
+      // variable index could pessimize the path.  This also handles the case
+      // for struct indices, which must always be constant.
+      if (isa<ConstantInt>(FirstInst->getOperand(op)) ||
+          isa<ConstantInt>(GEP->getOperand(op)))
+        return 0;
+      
+      if (FirstInst->getOperand(op)->getType() !=GEP->getOperand(op)->getType())
+        return 0;
+
+      // If we already needed a PHI for an earlier operand, and another operand
+      // also requires a PHI, we'd be introducing more PHIs than we're
+      // eliminating, which increases register pressure on entry to the PHI's
+      // block.
+      if (NeededPhi)
+        return 0;
+
+      FixedOperands[op] = 0;  // Needs a PHI.
+      NeededPhi = true;
+    }
+  }
+  
+  // If all of the base pointers of the PHI'd GEPs are from allocas, don't
+  // bother doing this transformation.  At best, this will just save a bit of
+  // offset calculation, but all the predecessors will have to materialize the
+  // stack address into a register anyway.  We'd actually rather *clone* the
+  // load up into the predecessors so that we have a load of a gep of an alloca,
+  // which can usually all be folded into the load.
+  if (AllBasePointersAreAllocas)
+    return 0;
+  
+  // Otherwise, this is safe to transform.  Insert PHI nodes for each operand
+  // that is variable.
+  SmallVector<PHINode*, 16> OperandPhis(FixedOperands.size());
+  
+  bool HasAnyPHIs = false;
+  for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) {
+    if (FixedOperands[i]) continue;  // operand doesn't need a phi.
+    Value *FirstOp = FirstInst->getOperand(i);
+    PHINode *NewPN = PHINode::Create(FirstOp->getType(),
+                                     FirstOp->getName()+".pn");
+    InsertNewInstBefore(NewPN, PN);
+    
+    NewPN->reserveOperandSpace(e);
+    NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0));
+    OperandPhis[i] = NewPN;
+    FixedOperands[i] = NewPN;
+    HasAnyPHIs = true;
+  }
+
+  
+  // Add all operands to the new PHIs.
+  if (HasAnyPHIs) {
+    for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+      GetElementPtrInst *InGEP =cast<GetElementPtrInst>(PN.getIncomingValue(i));
+      BasicBlock *InBB = PN.getIncomingBlock(i);
+      
+      for (unsigned op = 0, e = OperandPhis.size(); op != e; ++op)
+        if (PHINode *OpPhi = OperandPhis[op])
+          OpPhi->addIncoming(InGEP->getOperand(op), InBB);
+    }
+  }
+  
+  Value *Base = FixedOperands[0];
+  GetElementPtrInst *NewGEP = 
+    GetElementPtrInst::Create(Base, FixedOperands.begin()+1,
+                              FixedOperands.end());
+  if (AllInBounds) NewGEP->setIsInBounds();
+  return NewGEP;
+}
+
+
+/// isSafeAndProfitableToSinkLoad - Return true if we know that it is safe to
+/// sink the load out of the block that defines it.  This means that it must be
+/// obvious the value of the load is not changed from the point of the load to
+/// the end of the block it is in.
+///
+/// Finally, it is safe, but not profitable, to sink a load targetting a
+/// non-address-taken alloca.  Doing so will cause us to not promote the alloca
+/// to a register.
+static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
+  BasicBlock::iterator BBI = L, E = L->getParent()->end();
+  
+  for (++BBI; BBI != E; ++BBI)
+    if (BBI->mayWriteToMemory())
+      return false;
+  
+  // Check for non-address taken alloca.  If not address-taken already, it isn't
+  // profitable to do this xform.
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(L->getOperand(0))) {
+    bool isAddressTaken = false;
+    for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+         UI != E; ++UI) {
+      User *U = *UI;
+      if (isa<LoadInst>(U)) continue;
+      if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+        // If storing TO the alloca, then the address isn't taken.
+        if (SI->getOperand(1) == AI) continue;
+      }
+      isAddressTaken = true;
+      break;
+    }
+    
+    if (!isAddressTaken && AI->isStaticAlloca())
+      return false;
+  }
+  
+  // If this load is a load from a GEP with a constant offset from an alloca,
+  // then we don't want to sink it.  In its present form, it will be
+  // load [constant stack offset].  Sinking it will cause us to have to
+  // materialize the stack addresses in each predecessor in a register only to
+  // do a shared load from register in the successor.
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(L->getOperand(0)))
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(GEP->getOperand(0)))
+      if (AI->isStaticAlloca() && GEP->hasAllConstantIndices())
+        return false;
+  
+  return true;
+}
+
+Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
+  LoadInst *FirstLI = cast<LoadInst>(PN.getIncomingValue(0));
+  
+  // When processing loads, we need to propagate two bits of information to the
+  // sunk load: whether it is volatile, and what its alignment is.  We currently
+  // don't sink loads when some have their alignment specified and some don't.
+  // visitLoadInst will propagate an alignment onto the load when TD is around,
+  // and if TD isn't around, we can't handle the mixed case.
+  bool isVolatile = FirstLI->isVolatile();
+  unsigned LoadAlignment = FirstLI->getAlignment();
+  unsigned LoadAddrSpace = FirstLI->getPointerAddressSpace();
+  
+  // We can't sink the load if the loaded value could be modified between the
+  // load and the PHI.
+  if (FirstLI->getParent() != PN.getIncomingBlock(0) ||
+      !isSafeAndProfitableToSinkLoad(FirstLI))
+    return 0;
+  
+  // If the PHI is of volatile loads and the load block has multiple
+  // successors, sinking it would remove a load of the volatile value from
+  // the path through the other successor.
+  if (isVolatile && 
+      FirstLI->getParent()->getTerminator()->getNumSuccessors() != 1)
+    return 0;
+  
+  // Check to see if all arguments are the same operation.
+  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+    LoadInst *LI = dyn_cast<LoadInst>(PN.getIncomingValue(i));
+    if (!LI || !LI->hasOneUse())
+      return 0;
+    
+    // We can't sink the load if the loaded value could be modified between 
+    // the load and the PHI.
+    if (LI->isVolatile() != isVolatile ||
+        LI->getParent() != PN.getIncomingBlock(i) ||
+        LI->getPointerAddressSpace() != LoadAddrSpace ||
+        !isSafeAndProfitableToSinkLoad(LI))
+      return 0;
+      
+    // If some of the loads have an alignment specified but not all of them,
+    // we can't do the transformation.
+    if ((LoadAlignment != 0) != (LI->getAlignment() != 0))
+      return 0;
+    
+    LoadAlignment = std::min(LoadAlignment, LI->getAlignment());
+    
+    // If the PHI is of volatile loads and the load block has multiple
+    // successors, sinking it would remove a load of the volatile value from
+    // the path through the other successor.
+    if (isVolatile &&
+        LI->getParent()->getTerminator()->getNumSuccessors() != 1)
+      return 0;
+  }
+  
+  // Okay, they are all the same operation.  Create a new PHI node of the
+  // correct type, and PHI together all of the LHS's of the instructions.
+  PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(),
+                                   PN.getName()+".in");
+  NewPN->reserveOperandSpace(PN.getNumOperands()/2);
+  
+  Value *InVal = FirstLI->getOperand(0);
+  NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+  
+  // Add all operands to the new PHI.
+  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+    Value *NewInVal = cast<LoadInst>(PN.getIncomingValue(i))->getOperand(0);
+    if (NewInVal != InVal)
+      InVal = 0;
+    NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+  }
+  
+  Value *PhiVal;
+  if (InVal) {
+    // The new PHI unions all of the same values together.  This is really
+    // common, so we handle it intelligently here for compile-time speed.
+    PhiVal = InVal;
+    delete NewPN;
+  } else {
+    InsertNewInstBefore(NewPN, PN);
+    PhiVal = NewPN;
+  }
+  
+  // If this was a volatile load that we are merging, make sure to loop through
+  // and mark all the input loads as non-volatile.  If we don't do this, we will
+  // insert a new volatile load and the old ones will not be deletable.
+  if (isVolatile)
+    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
+      cast<LoadInst>(PN.getIncomingValue(i))->setVolatile(false);
+  
+  return new LoadInst(PhiVal, "", isVolatile, LoadAlignment);
+}
+
+
+
+/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+/// operator and they all are only used by the PHI, PHI together their
+/// inputs, and do the operation once, to the result of the PHI.
+Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
+  Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+
+  if (isa<GetElementPtrInst>(FirstInst))
+    return FoldPHIArgGEPIntoPHI(PN);
+  if (isa<LoadInst>(FirstInst))
+    return FoldPHIArgLoadIntoPHI(PN);
+  
+  // Scan the instruction, looking for input operations that can be folded away.
+  // If all input operands to the phi are the same instruction (e.g. a cast from
+  // the same type or "+42") we can pull the operation through the PHI, reducing
+  // code size and simplifying code.
+  Constant *ConstantOp = 0;
+  const Type *CastSrcTy = 0;
+  bool isNUW = false, isNSW = false, isExact = false;
+  
+  if (isa<CastInst>(FirstInst)) {
+    CastSrcTy = FirstInst->getOperand(0)->getType();
+
+    // Be careful about transforming integer PHIs.  We don't want to pessimize
+    // the code by turning an i32 into an i1293.
+    if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) {
+      if (!ShouldChangeType(PN.getType(), CastSrcTy))
+        return 0;
+    }
+  } else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
+    // Can fold binop, compare or shift here if the RHS is a constant, 
+    // otherwise call FoldPHIArgBinOpIntoPHI.
+    ConstantOp = dyn_cast<Constant>(FirstInst->getOperand(1));
+    if (ConstantOp == 0)
+      return FoldPHIArgBinOpIntoPHI(PN);
+    
+    if (OverflowingBinaryOperator *BO =
+        dyn_cast<OverflowingBinaryOperator>(FirstInst)) {
+      isNUW = BO->hasNoUnsignedWrap();
+      isNSW = BO->hasNoSignedWrap();
+    } else if (PossiblyExactOperator *PEO =
+               dyn_cast<PossiblyExactOperator>(FirstInst))
+      isExact = PEO->isExact();
+  } else {
+    return 0;  // Cannot fold this operation.
+  }
+
+  // Check to see if all arguments are the same operation.
+  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+    Instruction *I = dyn_cast<Instruction>(PN.getIncomingValue(i));
+    if (I == 0 || !I->hasOneUse() || !I->isSameOperationAs(FirstInst))
+      return 0;
+    if (CastSrcTy) {
+      if (I->getOperand(0)->getType() != CastSrcTy)
+        return 0;  // Cast operation must match.
+    } else if (I->getOperand(1) != ConstantOp) {
+      return 0;
+    }
+    
+    if (isNUW)
+      isNUW = cast<OverflowingBinaryOperator>(I)->hasNoUnsignedWrap();
+    if (isNSW)
+      isNSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    if (isExact)
+      isExact = cast<PossiblyExactOperator>(I)->isExact();
+  }
+
+  // Okay, they are all the same operation.  Create a new PHI node of the
+  // correct type, and PHI together all of the LHS's of the instructions.
+  PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(),
+                                   PN.getName()+".in");
+  NewPN->reserveOperandSpace(PN.getNumOperands()/2);
+
+  Value *InVal = FirstInst->getOperand(0);
+  NewPN->addIncoming(InVal, PN.getIncomingBlock(0));
+
+  // Add all operands to the new PHI.
+  for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i) {
+    Value *NewInVal = cast<Instruction>(PN.getIncomingValue(i))->getOperand(0);
+    if (NewInVal != InVal)
+      InVal = 0;
+    NewPN->addIncoming(NewInVal, PN.getIncomingBlock(i));
+  }
+
+  Value *PhiVal;
+  if (InVal) {
+    // The new PHI unions all of the same values together.  This is really
+    // common, so we handle it intelligently here for compile-time speed.
+    PhiVal = InVal;
+    delete NewPN;
+  } else {
+    InsertNewInstBefore(NewPN, PN);
+    PhiVal = NewPN;
+  }
+
+  // Insert and return the new operation.
+  if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst))
+    return CastInst::Create(FirstCI->getOpcode(), PhiVal, PN.getType());
+  
+  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(FirstInst)) {
+    BinOp = BinaryOperator::Create(BinOp->getOpcode(), PhiVal, ConstantOp);
+    if (isNUW) BinOp->setHasNoUnsignedWrap();
+    if (isNSW) BinOp->setHasNoSignedWrap();
+    if (isExact) BinOp->setIsExact();
+    return BinOp;
+  }
+  
+  CmpInst *CIOp = cast<CmpInst>(FirstInst);
+  return CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
+                         PhiVal, ConstantOp);
+}
+
+/// DeadPHICycle - Return true if this PHI node is only used by a PHI node cycle
+/// that is dead.
+static bool DeadPHICycle(PHINode *PN,
+                         SmallPtrSet<PHINode*, 16> &PotentiallyDeadPHIs) {
+  if (PN->use_empty()) return true;
+  if (!PN->hasOneUse()) return false;
+
+  // Remember this node, and if we find the cycle, return.
+  if (!PotentiallyDeadPHIs.insert(PN))
+    return true;
+  
+  // Don't scan crazily complex things.
+  if (PotentiallyDeadPHIs.size() == 16)
+    return false;
+
+  if (PHINode *PU = dyn_cast<PHINode>(PN->use_back()))
+    return DeadPHICycle(PU, PotentiallyDeadPHIs);
+
+  return false;
+}
+
+/// PHIsEqualValue - Return true if this phi node is always equal to
+/// NonPhiInVal.  This happens with mutually cyclic phi nodes like:
+///   z = some value; x = phi (y, z); y = phi (x, z)
+static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal, 
+                           SmallPtrSet<PHINode*, 16> &ValueEqualPHIs) {
+  // See if we already saw this PHI node.
+  if (!ValueEqualPHIs.insert(PN))
+    return true;
+  
+  // Don't scan crazily complex things.
+  if (ValueEqualPHIs.size() == 16)
+    return false;
+ 
+  // Scan the operands to see if they are either phi nodes or are equal to
+  // the value.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    Value *Op = PN->getIncomingValue(i);
+    if (PHINode *OpPN = dyn_cast<PHINode>(Op)) {
+      if (!PHIsEqualValue(OpPN, NonPhiInVal, ValueEqualPHIs))
+        return false;
+    } else if (Op != NonPhiInVal)
+      return false;
+  }
+  
+  return true;
+}
+
+
+namespace {
+struct PHIUsageRecord {
+  unsigned PHIId;     // The ID # of the PHI (something determinstic to sort on)
+  unsigned Shift;     // The amount shifted.
+  Instruction *Inst;  // The trunc instruction.
+  
+  PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
+    : PHIId(pn), Shift(Sh), Inst(User) {}
+  
+  bool operator<(const PHIUsageRecord &RHS) const {
+    if (PHIId < RHS.PHIId) return true;
+    if (PHIId > RHS.PHIId) return false;
+    if (Shift < RHS.Shift) return true;
+    if (Shift > RHS.Shift) return false;
+    return Inst->getType()->getPrimitiveSizeInBits() <
+           RHS.Inst->getType()->getPrimitiveSizeInBits();
+  }
+};
+  
+struct LoweredPHIRecord {
+  PHINode *PN;        // The PHI that was lowered.
+  unsigned Shift;     // The amount shifted.
+  unsigned Width;     // The width extracted.
+  
+  LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty)
+    : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
+  
+  // Ctor form used by DenseMap.
+  LoweredPHIRecord(PHINode *pn, unsigned Sh)
+    : PN(pn), Shift(Sh), Width(0) {}
+};
+}
+
+namespace llvm {
+  template<>
+  struct DenseMapInfo<LoweredPHIRecord> {
+    static inline LoweredPHIRecord getEmptyKey() {
+      return LoweredPHIRecord(0, 0);
+    }
+    static inline LoweredPHIRecord getTombstoneKey() {
+      return LoweredPHIRecord(0, 1);
+    }
+    static unsigned getHashValue(const LoweredPHIRecord &Val) {
+      return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
+             (Val.Width>>3);
+    }
+    static bool isEqual(const LoweredPHIRecord &LHS,
+                        const LoweredPHIRecord &RHS) {
+      return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
+             LHS.Width == RHS.Width;
+    }
+  };
+  template <>
+  struct isPodLike<LoweredPHIRecord> { static const bool value = true; };
+}
+
+
+/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an
+/// illegal type: see if it is only used by trunc or trunc(lshr) operations.  If
+/// so, we split the PHI into the various pieces being extracted.  This sort of
+/// thing is introduced when SROA promotes an aggregate to large integer values.
+///
+/// TODO: The user of the trunc may be an bitcast to float/double/vector or an
+/// inttoptr.  We should produce new PHIs in the right type.
+///
+Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
+  // PHIUsers - Keep track of all of the truncated values extracted from a set
+  // of PHIs, along with their offset.  These are the things we want to rewrite.
+  SmallVector<PHIUsageRecord, 16> PHIUsers;
+  
+  // PHIs are often mutually cyclic, so we keep track of a whole set of PHI
+  // nodes which are extracted from. PHIsToSlice is a set we use to avoid
+  // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
+  // check the uses of (to ensure they are all extracts).
+  SmallVector<PHINode*, 8> PHIsToSlice;
+  SmallPtrSet<PHINode*, 8> PHIsInspected;
+  
+  PHIsToSlice.push_back(&FirstPhi);
+  PHIsInspected.insert(&FirstPhi);
+  
+  for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
+    PHINode *PN = PHIsToSlice[PHIId];
+    
+    // Scan the input list of the PHI.  If any input is an invoke, and if the
+    // input is defined in the predecessor, then we won't be split the critical
+    // edge which is required to insert a truncate.  Because of this, we have to
+    // bail out.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      InvokeInst *II = dyn_cast<InvokeInst>(PN->getIncomingValue(i));
+      if (II == 0) continue;
+      if (II->getParent() != PN->getIncomingBlock(i))
+        continue;
+     
+      // If we have a phi, and if it's directly in the predecessor, then we have
+      // a critical edge where we need to put the truncate.  Since we can't
+      // split the edge in instcombine, we have to bail out.
+      return 0;
+    }
+      
+    
+    for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+         UI != E; ++UI) {
+      Instruction *User = cast<Instruction>(*UI);
+      
+      // If the user is a PHI, inspect its uses recursively.
+      if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+        if (PHIsInspected.insert(UserPN))
+          PHIsToSlice.push_back(UserPN);
+        continue;
+      }
+      
+      // Truncates are always ok.
+      if (isa<TruncInst>(User)) {
+        PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
+        continue;
+      }
+      
+      // Otherwise it must be a lshr which can only be used by one trunc.
+      if (User->getOpcode() != Instruction::LShr ||
+          !User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
+          !isa<ConstantInt>(User->getOperand(1)))
+        return 0;
+      
+      unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
+      PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
+    }
+  }
+  
+  // If we have no users, they must be all self uses, just nuke the PHI.
+  if (PHIUsers.empty())
+    return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
+  
+  // If this phi node is transformable, create new PHIs for all the pieces
+  // extracted out of it.  First, sort the users by their offset and size.
+  array_pod_sort(PHIUsers.begin(), PHIUsers.end());
+  
+  DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
+            for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+              errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
+        );
+  
+  // PredValues - This is a temporary used when rewriting PHI nodes.  It is
+  // hoisted out here to avoid construction/destruction thrashing.
+  DenseMap<BasicBlock*, Value*> PredValues;
+  
+  // ExtractedVals - Each new PHI we introduce is saved here so we don't
+  // introduce redundant PHIs.
+  DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
+  
+  for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
+    unsigned PHIId = PHIUsers[UserI].PHIId;
+    PHINode *PN = PHIsToSlice[PHIId];
+    unsigned Offset = PHIUsers[UserI].Shift;
+    const Type *Ty = PHIUsers[UserI].Inst->getType();
+    
+    PHINode *EltPHI;
+    
+    // If we've already lowered a user like this, reuse the previously lowered
+    // value.
+    if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
+      
+      // Otherwise, Create the new PHI node for this user.
+      EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN);
+      assert(EltPHI->getType() != PN->getType() &&
+             "Truncate didn't shrink phi?");
+    
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        BasicBlock *Pred = PN->getIncomingBlock(i);
+        Value *&PredVal = PredValues[Pred];
+        
+        // If we already have a value for this predecessor, reuse it.
+        if (PredVal) {
+          EltPHI->addIncoming(PredVal, Pred);
+          continue;
+        }
+
+        // Handle the PHI self-reuse case.
+        Value *InVal = PN->getIncomingValue(i);
+        if (InVal == PN) {
+          PredVal = EltPHI;
+          EltPHI->addIncoming(PredVal, Pred);
+          continue;
+        }
+        
+        if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
+          // If the incoming value was a PHI, and if it was one of the PHIs we
+          // already rewrote it, just use the lowered value.
+          if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) {
+            PredVal = Res;
+            EltPHI->addIncoming(PredVal, Pred);
+            continue;
+          }
+        }
+        
+        // Otherwise, do an extract in the predecessor.
+        Builder->SetInsertPoint(Pred, Pred->getTerminator());
+        Value *Res = InVal;
+        if (Offset)
+          Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
+                                                          Offset), "extract");
+        Res = Builder->CreateTrunc(Res, Ty, "extract.t");
+        PredVal = Res;
+        EltPHI->addIncoming(Res, Pred);
+        
+        // If the incoming value was a PHI, and if it was one of the PHIs we are
+        // rewriting, we will ultimately delete the code we inserted.  This
+        // means we need to revisit that PHI to make sure we extract out the
+        // needed piece.
+        if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
+          if (PHIsInspected.count(OldInVal)) {
+            unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
+                                          OldInVal)-PHIsToSlice.begin();
+            PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset, 
+                                              cast<Instruction>(Res)));
+            ++UserE;
+          }
+      }
+      PredValues.clear();
+      
+      DEBUG(errs() << "  Made element PHI for offset " << Offset << ": "
+                   << *EltPHI << '\n');
+      ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
+    }
+    
+    // Replace the use of this piece with the PHI node.
+    ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
+  }
+  
+  // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
+  // with undefs.
+  Value *Undef = UndefValue::get(FirstPhi.getType());
+  for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+    ReplaceInstUsesWith(*PHIsToSlice[i], Undef);
+  return ReplaceInstUsesWith(FirstPhi, Undef);
+}
+
+// PHINode simplification
+//
+Instruction *InstCombiner::visitPHINode(PHINode &PN) {
+  // If LCSSA is around, don't mess with Phi nodes
+  if (MustPreserveLCSSA) return 0;
+
+  if (Value *V = SimplifyInstruction(&PN, TD))
+    return ReplaceInstUsesWith(PN, V);
+
+  // If all PHI operands are the same operation, pull them through the PHI,
+  // reducing code size.
+  if (isa<Instruction>(PN.getIncomingValue(0)) &&
+      isa<Instruction>(PN.getIncomingValue(1)) &&
+      cast<Instruction>(PN.getIncomingValue(0))->getOpcode() ==
+      cast<Instruction>(PN.getIncomingValue(1))->getOpcode() &&
+      // FIXME: The hasOneUse check will fail for PHIs that use the value more
+      // than themselves more than once.
+      PN.getIncomingValue(0)->hasOneUse())
+    if (Instruction *Result = FoldPHIArgOpIntoPHI(PN))
+      return Result;
+
+  // If this is a trivial cycle in the PHI node graph, remove it.  Basically, if
+  // this PHI only has a single use (a PHI), and if that PHI only has one use (a
+  // PHI)... break the cycle.
+  if (PN.hasOneUse()) {
+    Instruction *PHIUser = cast<Instruction>(PN.use_back());
+    if (PHINode *PU = dyn_cast<PHINode>(PHIUser)) {
+      SmallPtrSet<PHINode*, 16> PotentiallyDeadPHIs;
+      PotentiallyDeadPHIs.insert(&PN);
+      if (DeadPHICycle(PU, PotentiallyDeadPHIs))
+        return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+    }
+   
+    // If this phi has a single use, and if that use just computes a value for
+    // the next iteration of a loop, delete the phi.  This occurs with unused
+    // induction variables, e.g. "for (int j = 0; ; ++j);".  Detecting this
+    // common case here is good because the only other things that catch this
+    // are induction variable analysis (sometimes) and ADCE, which is only run
+    // late.
+    if (PHIUser->hasOneUse() &&
+        (isa<BinaryOperator>(PHIUser) || isa<GetElementPtrInst>(PHIUser)) &&
+        PHIUser->use_back() == &PN) {
+      return ReplaceInstUsesWith(PN, UndefValue::get(PN.getType()));
+    }
+  }
+
+  // We sometimes end up with phi cycles that non-obviously end up being the
+  // same value, for example:
+  //   z = some value; x = phi (y, z); y = phi (x, z)
+  // where the phi nodes don't necessarily need to be in the same block.  Do a
+  // quick check to see if the PHI node only contains a single non-phi value, if
+  // so, scan to see if the phi cycle is actually equal to that value.
+  {
+    unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues();
+    // Scan for the first non-phi operand.
+    while (InValNo != NumOperandVals && 
+           isa<PHINode>(PN.getIncomingValue(InValNo)))
+      ++InValNo;
+
+    if (InValNo != NumOperandVals) {
+      Value *NonPhiInVal = PN.getOperand(InValNo);
+      
+      // Scan the rest of the operands to see if there are any conflicts, if so
+      // there is no need to recursively scan other phis.
+      for (++InValNo; InValNo != NumOperandVals; ++InValNo) {
+        Value *OpVal = PN.getIncomingValue(InValNo);
+        if (OpVal != NonPhiInVal && !isa<PHINode>(OpVal))
+          break;
+      }
+      
+      // If we scanned over all operands, then we have one unique value plus
+      // phi values.  Scan PHI nodes to see if they all merge in each other or
+      // the value.
+      if (InValNo == NumOperandVals) {
+        SmallPtrSet<PHINode*, 16> ValueEqualPHIs;
+        if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs))
+          return ReplaceInstUsesWith(PN, NonPhiInVal);
+      }
+    }
+  }
+
+  // If there are multiple PHIs, sort their operands so that they all list
+  // the blocks in the same order. This will help identical PHIs be eliminated
+  // by other passes. Other passes shouldn't depend on this for correctness
+  // however.
+  PHINode *FirstPN = cast<PHINode>(PN.getParent()->begin());
+  if (&PN != FirstPN)
+    for (unsigned i = 0, e = FirstPN->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *BBA = PN.getIncomingBlock(i);
+      BasicBlock *BBB = FirstPN->getIncomingBlock(i);
+      if (BBA != BBB) {
+        Value *VA = PN.getIncomingValue(i);
+        unsigned j = PN.getBasicBlockIndex(BBB);
+        Value *VB = PN.getIncomingValue(j);
+        PN.setIncomingBlock(i, BBB);
+        PN.setIncomingValue(i, VB);
+        PN.setIncomingBlock(j, BBA);
+        PN.setIncomingValue(j, VA);
+        // NOTE: Instcombine normally would want us to "return &PN" if we
+        // modified any of the operands of an instruction.  However, since we
+        // aren't adding or removing uses (just rearranging them) we don't do
+        // this in this case.
+      }
+    }
+
+  // If this is an integer PHI and we know that it has an illegal type, see if
+  // it is only used by trunc or trunc(lshr) operations.  If so, we split the
+  // PHI into the various pieces being extracted.  This sort of thing is
+  // introduced when SROA promotes an aggregate to a single large integer type.
+  if (PN.getType()->isIntegerTy() && TD &&
+      !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+    if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
+      return Res;
+  
+  return 0;
+}
diff --git a/final/lib/Transforms/InstCombine/InstCombineSelect.cpp b/final/lib/Transforms/InstCombine/InstCombineSelect.cpp
new file mode 100644
index 00000000000..8b9261b8fe0
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -0,0 +1,815 @@
+//===- InstCombineSelect.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitSelect function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+/// MatchSelectPattern - Pattern match integer [SU]MIN, [SU]MAX, and ABS idioms,
+/// returning the kind and providing the out parameter results if we
+/// successfully match.
+static SelectPatternFlavor
+MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) {
+  SelectInst *SI = dyn_cast<SelectInst>(V);
+  if (SI == 0) return SPF_UNKNOWN;
+
+  ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition());
+  if (ICI == 0) return SPF_UNKNOWN;
+
+  LHS = ICI->getOperand(0);
+  RHS = ICI->getOperand(1);
+
+  // (icmp X, Y) ? X : Y
+  if (SI->getTrueValue() == ICI->getOperand(0) &&
+      SI->getFalseValue() == ICI->getOperand(1)) {
+    switch (ICI->getPredicate()) {
+    default: return SPF_UNKNOWN; // Equality.
+    case ICmpInst::ICMP_UGT:
+    case ICmpInst::ICMP_UGE: return SPF_UMAX;
+    case ICmpInst::ICMP_SGT:
+    case ICmpInst::ICMP_SGE: return SPF_SMAX;
+    case ICmpInst::ICMP_ULT:
+    case ICmpInst::ICMP_ULE: return SPF_UMIN;
+    case ICmpInst::ICMP_SLT:
+    case ICmpInst::ICMP_SLE: return SPF_SMIN;
+    }
+  }
+
+  // (icmp X, Y) ? Y : X
+  if (SI->getTrueValue() == ICI->getOperand(1) &&
+      SI->getFalseValue() == ICI->getOperand(0)) {
+    switch (ICI->getPredicate()) {
+      default: return SPF_UNKNOWN; // Equality.
+      case ICmpInst::ICMP_UGT:
+      case ICmpInst::ICMP_UGE: return SPF_UMIN;
+      case ICmpInst::ICMP_SGT:
+      case ICmpInst::ICMP_SGE: return SPF_SMIN;
+      case ICmpInst::ICMP_ULT:
+      case ICmpInst::ICMP_ULE: return SPF_UMAX;
+      case ICmpInst::ICMP_SLT:
+      case ICmpInst::ICMP_SLE: return SPF_SMAX;
+    }
+  }
+
+  // TODO: (X > 4) ? X : 5   -->  (X >= 5) ? X : 5  -->  MAX(X, 5)
+
+  return SPF_UNKNOWN;
+}
+
+
+/// GetSelectFoldableOperands - We want to turn code that looks like this:
+///   %C = or %A, %B
+///   %D = select %cond, %C, %A
+/// into:
+///   %C = select %cond, %B, 0
+///   %D = or %A, %C
+///
+/// Assuming that the specified instruction is an operand to the select, return
+/// a bitmask indicating which operands of this instruction are foldable if they
+/// equal the other incoming value of the select.
+///
+static unsigned GetSelectFoldableOperands(Instruction *I) {
+  switch (I->getOpcode()) {
+  case Instruction::Add:
+  case Instruction::Mul:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    return 3;              // Can fold through either operand.
+  case Instruction::Sub:   // Can only fold on the amount subtracted.
+  case Instruction::Shl:   // Can only fold on the shift amount.
+  case Instruction::LShr:
+  case Instruction::AShr:
+    return 1;
+  default:
+    return 0;              // Cannot fold
+  }
+}
+
+/// GetSelectFoldableConstant - For the same transformation as the previous
+/// function, return the identity constant that goes into the select.
+static Constant *GetSelectFoldableConstant(Instruction *I) {
+  switch (I->getOpcode()) {
+  default: llvm_unreachable("This cannot happen!");
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    return Constant::getNullValue(I->getType());
+  case Instruction::And:
+    return Constant::getAllOnesValue(I->getType());
+  case Instruction::Mul:
+    return ConstantInt::get(I->getType(), 1);
+  }
+}
+
+/// FoldSelectOpOp - Here we have (select c, TI, FI), and we know that TI and FI
+/// have the same opcode and only one use each.  Try to simplify this.
+Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
+                                          Instruction *FI) {
+  if (TI->getNumOperands() == 1) {
+    // If this is a non-volatile load or a cast from the same type,
+    // merge.
+    if (TI->isCast()) {
+      if (TI->getOperand(0)->getType() != FI->getOperand(0)->getType())
+        return 0;
+    } else {
+      return 0;  // unknown unary op.
+    }
+
+    // Fold this by inserting a select from the input values.
+    SelectInst *NewSI = SelectInst::Create(SI.getCondition(), TI->getOperand(0),
+                                          FI->getOperand(0), SI.getName()+".v");
+    InsertNewInstBefore(NewSI, SI);
+    return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
+                            TI->getType());
+  }
+
+  // Only handle binary operators here.
+  if (!isa<BinaryOperator>(TI))
+    return 0;
+
+  // Figure out if the operations have any operands in common.
+  Value *MatchOp, *OtherOpT, *OtherOpF;
+  bool MatchIsOpZero;
+  if (TI->getOperand(0) == FI->getOperand(0)) {
+    MatchOp  = TI->getOperand(0);
+    OtherOpT = TI->getOperand(1);
+    OtherOpF = FI->getOperand(1);
+    MatchIsOpZero = true;
+  } else if (TI->getOperand(1) == FI->getOperand(1)) {
+    MatchOp  = TI->getOperand(1);
+    OtherOpT = TI->getOperand(0);
+    OtherOpF = FI->getOperand(0);
+    MatchIsOpZero = false;
+  } else if (!TI->isCommutative()) {
+    return 0;
+  } else if (TI->getOperand(0) == FI->getOperand(1)) {
+    MatchOp  = TI->getOperand(0);
+    OtherOpT = TI->getOperand(1);
+    OtherOpF = FI->getOperand(0);
+    MatchIsOpZero = true;
+  } else if (TI->getOperand(1) == FI->getOperand(0)) {
+    MatchOp  = TI->getOperand(1);
+    OtherOpT = TI->getOperand(0);
+    OtherOpF = FI->getOperand(1);
+    MatchIsOpZero = true;
+  } else {
+    return 0;
+  }
+
+  // If we reach here, they do have operations in common.
+  SelectInst *NewSI = SelectInst::Create(SI.getCondition(), OtherOpT,
+                                         OtherOpF, SI.getName()+".v");
+  InsertNewInstBefore(NewSI, SI);
+
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) {
+    if (MatchIsOpZero)
+      return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI);
+    else
+      return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
+  }
+  llvm_unreachable("Shouldn't get here");
+  return 0;
+}
+
+static bool isSelect01(Constant *C1, Constant *C2) {
+  ConstantInt *C1I = dyn_cast<ConstantInt>(C1);
+  if (!C1I)
+    return false;
+  ConstantInt *C2I = dyn_cast<ConstantInt>(C2);
+  if (!C2I)
+    return false;
+  if (!C1I->isZero() && !C2I->isZero()) // One side must be zero.
+    return false;
+  return C1I->isOne() || C1I->isAllOnesValue() ||
+         C2I->isOne() || C2I->isAllOnesValue();
+}
+
+/// FoldSelectIntoOp - Try fold the select into one of the operands to
+/// facilitate further optimization.
+Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
+                                            Value *FalseVal) {
+  // See the comment above GetSelectFoldableOperands for a description of the
+  // transformation we are doing here.
+  if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) {
+    if (TVI->hasOneUse() && TVI->getNumOperands() == 2 &&
+        !isa<Constant>(FalseVal)) {
+      if (unsigned SFO = GetSelectFoldableOperands(TVI)) {
+        unsigned OpToFold = 0;
+        if ((SFO & 1) && FalseVal == TVI->getOperand(0)) {
+          OpToFold = 1;
+        } else  if ((SFO & 2) && FalseVal == TVI->getOperand(1)) {
+          OpToFold = 2;
+        }
+
+        if (OpToFold) {
+          Constant *C = GetSelectFoldableConstant(TVI);
+          Value *OOp = TVI->getOperand(2-OpToFold);
+          // Avoid creating select between 2 constants unless it's selecting
+          // between 0, 1 and -1.
+          if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
+            Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C);
+            InsertNewInstBefore(NewSel, SI);
+            NewSel->takeName(TVI);
+            if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TVI))
+              return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel);
+            llvm_unreachable("Unknown instruction!!");
+          }
+        }
+      }
+    }
+  }
+
+  if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) {
+    if (FVI->hasOneUse() && FVI->getNumOperands() == 2 &&
+        !isa<Constant>(TrueVal)) {
+      if (unsigned SFO = GetSelectFoldableOperands(FVI)) {
+        unsigned OpToFold = 0;
+        if ((SFO & 1) && TrueVal == FVI->getOperand(0)) {
+          OpToFold = 1;
+        } else  if ((SFO & 2) && TrueVal == FVI->getOperand(1)) {
+          OpToFold = 2;
+        }
+
+        if (OpToFold) {
+          Constant *C = GetSelectFoldableConstant(FVI);
+          Value *OOp = FVI->getOperand(2-OpToFold);
+          // Avoid creating select between 2 constants unless it's selecting
+          // between 0, 1 and -1.
+          if (!isa<Constant>(OOp) || isSelect01(C, cast<Constant>(OOp))) {
+            Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp);
+            InsertNewInstBefore(NewSel, SI);
+            NewSel->takeName(FVI);
+            if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FVI))
+              return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel);
+            llvm_unreachable("Unknown instruction!!");
+          }
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+
+/// visitSelectInstWithICmp - Visit a SelectInst that has an
+/// ICmpInst as its first operand.
+///
+Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
+                                                   ICmpInst *ICI) {
+  bool Changed = false;
+  ICmpInst::Predicate Pred = ICI->getPredicate();
+  Value *CmpLHS = ICI->getOperand(0);
+  Value *CmpRHS = ICI->getOperand(1);
+  Value *TrueVal = SI.getTrueValue();
+  Value *FalseVal = SI.getFalseValue();
+
+  // Check cases where the comparison is with a constant that
+  // can be adjusted to fit the min/max idiom. We may move or edit ICI
+  // here, so make sure the select is the only user.
+  if (ICI->hasOneUse())
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) {
+      // X < MIN ? T : F  -->  F
+      if ((Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT)
+          && CI->isMinValue(Pred == ICmpInst::ICMP_SLT))
+        return ReplaceInstUsesWith(SI, FalseVal);
+      // X > MAX ? T : F  -->  F
+      else if ((Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT)
+               && CI->isMaxValue(Pred == ICmpInst::ICMP_SGT))
+        return ReplaceInstUsesWith(SI, FalseVal);
+      switch (Pred) {
+      default: break;
+      case ICmpInst::ICMP_ULT:
+      case ICmpInst::ICMP_SLT:
+      case ICmpInst::ICMP_UGT:
+      case ICmpInst::ICMP_SGT: {
+        // These transformations only work for selects over integers.
+        const IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType());
+        if (!SelectTy)
+          break;
+
+        Constant *AdjustedRHS;
+        if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT)
+          AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() + 1);
+        else // (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT)
+          AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() - 1);
+
+        // X > C ? X : C+1  -->  X < C+1 ? C+1 : X
+        // X < C ? X : C-1  -->  X > C-1 ? C-1 : X
+        if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
+            (CmpLHS == FalseVal && AdjustedRHS == TrueVal))
+          ; // Nothing to do here. Values match without any sign/zero extension.
+
+        // Types do not match. Instead of calculating this with mixed types
+        // promote all to the larger type. This enables scalar evolution to
+        // analyze this expression.
+        else if (CmpRHS->getType()->getScalarSizeInBits()
+                 < SelectTy->getBitWidth()) {
+          Constant *sextRHS = ConstantExpr::getSExt(AdjustedRHS, SelectTy);
+
+          // X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X
+          // X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X
+          // X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X
+          // X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X
+          if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) &&
+                sextRHS == FalseVal) {
+            CmpLHS = TrueVal;
+            AdjustedRHS = sextRHS;
+          } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) &&
+                     sextRHS == TrueVal) {
+            CmpLHS = FalseVal;
+            AdjustedRHS = sextRHS;
+          } else if (ICI->isUnsigned()) {
+            Constant *zextRHS = ConstantExpr::getZExt(AdjustedRHS, SelectTy);
+            // X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X
+            // X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X
+            // zext + signed compare cannot be changed:
+            //    0xff <s 0x00, but 0x00ff >s 0x0000
+            if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) &&
+                zextRHS == FalseVal) {
+              CmpLHS = TrueVal;
+              AdjustedRHS = zextRHS;
+            } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) &&
+                       zextRHS == TrueVal) {
+              CmpLHS = FalseVal;
+              AdjustedRHS = zextRHS;
+            } else
+              break;
+          } else
+            break;
+        } else
+          break;
+
+        Pred = ICmpInst::getSwappedPredicate(Pred);
+        CmpRHS = AdjustedRHS;
+        std::swap(FalseVal, TrueVal);
+        ICI->setPredicate(Pred);
+        ICI->setOperand(0, CmpLHS);
+        ICI->setOperand(1, CmpRHS);
+        SI.setOperand(1, TrueVal);
+        SI.setOperand(2, FalseVal);
+
+        // Move ICI instruction right before the select instruction. Otherwise
+        // the sext/zext value may be defined after the ICI instruction uses it.
+        ICI->moveBefore(&SI);
+
+        Changed = true;
+        break;
+      }
+      }
+    }
+
+  // Transform (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1
+  // and       (X <s  0) ? C2 : C1 --> ((X >>s 31) & (C2 - C1)) + C1
+  // FIXME: Type and constness constraints could be lifted, but we have to
+  //        watch code size carefully. We should consider xor instead of
+  //        sub/add when we decide to do that.
+  if (const IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) {
+    if (TrueVal->getType() == Ty) {
+      if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) {
+        ConstantInt *C1 = NULL, *C2 = NULL;
+        if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) {
+          C1 = dyn_cast<ConstantInt>(TrueVal);
+          C2 = dyn_cast<ConstantInt>(FalseVal);
+        } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isNullValue()) {
+          C1 = dyn_cast<ConstantInt>(FalseVal);
+          C2 = dyn_cast<ConstantInt>(TrueVal);
+        }
+        if (C1 && C2) {
+          // This shift results in either -1 or 0.
+          Value *AShr = Builder->CreateAShr(CmpLHS, Ty->getBitWidth()-1);
+
+          // Check if we can express the operation with a single or.
+          if (C2->isAllOnesValue())
+            return ReplaceInstUsesWith(SI, Builder->CreateOr(AShr, C1));
+
+          Value *And = Builder->CreateAnd(AShr, C2->getValue()-C1->getValue());
+          return ReplaceInstUsesWith(SI, Builder->CreateAdd(And, C1));
+        }
+      }
+    }
+  }
+
+  if (CmpLHS == TrueVal && CmpRHS == FalseVal) {
+    // Transform (X == Y) ? X : Y  -> Y
+    if (Pred == ICmpInst::ICMP_EQ)
+      return ReplaceInstUsesWith(SI, FalseVal);
+    // Transform (X != Y) ? X : Y  -> X
+    if (Pred == ICmpInst::ICMP_NE)
+      return ReplaceInstUsesWith(SI, TrueVal);
+    /// NOTE: if we wanted to, this is where to detect integer MIN/MAX
+
+  } else if (CmpLHS == FalseVal && CmpRHS == TrueVal) {
+    // Transform (X == Y) ? Y : X  -> X
+    if (Pred == ICmpInst::ICMP_EQ)
+      return ReplaceInstUsesWith(SI, FalseVal);
+    // Transform (X != Y) ? Y : X  -> Y
+    if (Pred == ICmpInst::ICMP_NE)
+      return ReplaceInstUsesWith(SI, TrueVal);
+    /// NOTE: if we wanted to, this is where to detect integer MIN/MAX
+  }
+  return Changed ? &SI : 0;
+}
+
+
+/// CanSelectOperandBeMappingIntoPredBlock - SI is a select whose condition is a
+/// PHI node (but the two may be in different blocks).  See if the true/false
+/// values (V) are live in all of the predecessor blocks of the PHI.  For
+/// example, cases like this cannot be mapped:
+///
+///   X = phi [ C1, BB1], [C2, BB2]
+///   Y = add
+///   Z = select X, Y, 0
+///
+/// because Y is not live in BB1/BB2.
+///
+static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
+                                                   const SelectInst &SI) {
+  // If the value is a non-instruction value like a constant or argument, it
+  // can always be mapped.
+  const Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return true;
+
+  // If V is a PHI node defined in the same block as the condition PHI, we can
+  // map the arguments.
+  const PHINode *CondPHI = cast<PHINode>(SI.getCondition());
+
+  if (const PHINode *VP = dyn_cast<PHINode>(I))
+    if (VP->getParent() == CondPHI->getParent())
+      return true;
+
+  // Otherwise, if the PHI and select are defined in the same block and if V is
+  // defined in a different block, then we can transform it.
+  if (SI.getParent() == CondPHI->getParent() &&
+      I->getParent() != CondPHI->getParent())
+    return true;
+
+  // Otherwise we have a 'hard' case and we can't tell without doing more
+  // detailed dominator based analysis, punt.
+  return false;
+}
+
+/// FoldSPFofSPF - We have an SPF (e.g. a min or max) of an SPF of the form:
+///   SPF2(SPF1(A, B), C)
+Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
+                                        SelectPatternFlavor SPF1,
+                                        Value *A, Value *B,
+                                        Instruction &Outer,
+                                        SelectPatternFlavor SPF2, Value *C) {
+  if (C == A || C == B) {
+    // MAX(MAX(A, B), B) -> MAX(A, B)
+    // MIN(MIN(a, b), a) -> MIN(a, b)
+    if (SPF1 == SPF2)
+      return ReplaceInstUsesWith(Outer, Inner);
+
+    // MAX(MIN(a, b), a) -> a
+    // MIN(MAX(a, b), a) -> a
+    if ((SPF1 == SPF_SMIN && SPF2 == SPF_SMAX) ||
+        (SPF1 == SPF_SMAX && SPF2 == SPF_SMIN) ||
+        (SPF1 == SPF_UMIN && SPF2 == SPF_UMAX) ||
+        (SPF1 == SPF_UMAX && SPF2 == SPF_UMIN))
+      return ReplaceInstUsesWith(Outer, C);
+  }
+
+  // TODO: MIN(MIN(A, 23), 97)
+  return 0;
+}
+
+
+/// foldSelectICmpAnd - If one of the constants is zero (we know they can't
+/// both be) and we have an icmp instruction with zero, and we have an 'and'
+/// with the non-constant value and a power of two we can turn the select
+/// into a shift on the result of the 'and'.
+static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal,
+                                ConstantInt *FalseVal,
+                                InstCombiner::BuilderTy *Builder) {
+  const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition());
+  if (!IC || !IC->isEquality())
+    return 0;
+
+  if (!match(IC->getOperand(1), m_Zero()))
+    return 0;
+
+  ConstantInt *AndRHS;
+  Value *LHS = IC->getOperand(0);
+  if (LHS->getType() != SI.getType() ||
+      !match(LHS, m_And(m_Value(), m_ConstantInt(AndRHS))))
+    return 0;
+
+  // If both select arms are non-zero see if we have a select of the form
+  // 'x ? 2^n + C : C'. Then we can offset both arms by C, use the logic
+  // for 'x ? 2^n : 0' and fix the thing up at the end.
+  ConstantInt *Offset = 0;
+  if (!TrueVal->isZero() && !FalseVal->isZero()) {
+    if ((TrueVal->getValue() - FalseVal->getValue()).isPowerOf2())
+      Offset = FalseVal;
+    else if ((FalseVal->getValue() - TrueVal->getValue()).isPowerOf2())
+      Offset = TrueVal;
+    else
+      return 0;
+
+    // Adjust TrueVal and FalseVal to the offset.
+    TrueVal = ConstantInt::get(Builder->getContext(),
+                               TrueVal->getValue() - Offset->getValue());
+    FalseVal = ConstantInt::get(Builder->getContext(),
+                                FalseVal->getValue() - Offset->getValue());
+  }
+
+  // Make sure the mask in the 'and' and one of the select arms is a power of 2.
+  if (!AndRHS->getValue().isPowerOf2() ||
+      (!TrueVal->getValue().isPowerOf2() &&
+       !FalseVal->getValue().isPowerOf2()))
+    return 0;
+
+  // Determine which shift is needed to transform result of the 'and' into the
+  // desired result.
+  ConstantInt *ValC = !TrueVal->isZero() ? TrueVal : FalseVal;
+  unsigned ValZeros = ValC->getValue().logBase2();
+  unsigned AndZeros = AndRHS->getValue().logBase2();
+
+  Value *V = LHS;
+  if (ValZeros > AndZeros)
+    V = Builder->CreateShl(V, ValZeros - AndZeros);
+  else if (ValZeros < AndZeros)
+    V = Builder->CreateLShr(V, AndZeros - ValZeros);
+
+  // Okay, now we know that everything is set up, we just don't know whether we
+  // have a icmp_ne or icmp_eq and whether the true or false val is the zero.
+  bool ShouldNotVal = !TrueVal->isZero();
+  ShouldNotVal ^= IC->getPredicate() == ICmpInst::ICMP_NE;
+  if (ShouldNotVal)
+    V = Builder->CreateXor(V, ValC);
+
+  // Apply an offset if needed.
+  if (Offset)
+    V = Builder->CreateAdd(V, Offset);
+  return V;
+}
+
+Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
+  Value *CondVal = SI.getCondition();
+  Value *TrueVal = SI.getTrueValue();
+  Value *FalseVal = SI.getFalseValue();
+
+  if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, TD))
+    return ReplaceInstUsesWith(SI, V);
+
+  if (SI.getType()->isIntegerTy(1)) {
+    if (ConstantInt *C = dyn_cast<ConstantInt>(TrueVal)) {
+      if (C->getZExtValue()) {
+        // Change: A = select B, true, C --> A = or B, C
+        return BinaryOperator::CreateOr(CondVal, FalseVal);
+      }
+      // Change: A = select B, false, C --> A = and !B, C
+      Value *NotCond =
+        InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
+                                           "not."+CondVal->getName()), SI);
+      return BinaryOperator::CreateAnd(NotCond, FalseVal);
+    } else if (ConstantInt *C = dyn_cast<ConstantInt>(FalseVal)) {
+      if (C->getZExtValue() == false) {
+        // Change: A = select B, C, false --> A = and B, C
+        return BinaryOperator::CreateAnd(CondVal, TrueVal);
+      }
+      // Change: A = select B, C, true --> A = or !B, C
+      Value *NotCond =
+        InsertNewInstBefore(BinaryOperator::CreateNot(CondVal,
+                                           "not."+CondVal->getName()), SI);
+      return BinaryOperator::CreateOr(NotCond, TrueVal);
+    }
+
+    // select a, b, a  -> a&b
+    // select a, a, b  -> a|b
+    if (CondVal == TrueVal)
+      return BinaryOperator::CreateOr(CondVal, FalseVal);
+    else if (CondVal == FalseVal)
+      return BinaryOperator::CreateAnd(CondVal, TrueVal);
+  }
+
+  // Selecting between two integer constants?
+  if (ConstantInt *TrueValC = dyn_cast<ConstantInt>(TrueVal))
+    if (ConstantInt *FalseValC = dyn_cast<ConstantInt>(FalseVal)) {
+      // select C, 1, 0 -> zext C to int
+      if (FalseValC->isZero() && TrueValC->getValue() == 1)
+        return new ZExtInst(CondVal, SI.getType());
+
+      // select C, -1, 0 -> sext C to int
+      if (FalseValC->isZero() && TrueValC->isAllOnesValue())
+        return new SExtInst(CondVal, SI.getType());
+
+      // select C, 0, 1 -> zext !C to int
+      if (TrueValC->isZero() && FalseValC->getValue() == 1) {
+        Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+        return new ZExtInst(NotCond, SI.getType());
+      }
+
+      // select C, 0, -1 -> sext !C to int
+      if (TrueValC->isZero() && FalseValC->isAllOnesValue()) {
+        Value *NotCond = Builder->CreateNot(CondVal, "not."+CondVal->getName());
+        return new SExtInst(NotCond, SI.getType());
+      }
+
+      if (Value *V = foldSelectICmpAnd(SI, TrueValC, FalseValC, Builder))
+        return ReplaceInstUsesWith(SI, V);
+    }
+
+  // See if we are selecting two values based on a comparison of the two values.
+  if (FCmpInst *FCI = dyn_cast<FCmpInst>(CondVal)) {
+    if (FCI->getOperand(0) == TrueVal && FCI->getOperand(1) == FalseVal) {
+      // Transform (X == Y) ? X : Y  -> Y
+      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+        // This is not safe in general for floating point:
+        // consider X== -0, Y== +0.
+        // It becomes safe if either operand is a nonzero constant.
+        ConstantFP *CFPt, *CFPf;
+        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+              !CFPt->getValueAPF().isZero()) ||
+            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+             !CFPf->getValueAPF().isZero()))
+        return ReplaceInstUsesWith(SI, FalseVal);
+      }
+      // Transform (X une Y) ? X : Y  -> X
+      if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+        // This is not safe in general for floating point:
+        // consider X== -0, Y== +0.
+        // It becomes safe if either operand is a nonzero constant.
+        ConstantFP *CFPt, *CFPf;
+        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+              !CFPt->getValueAPF().isZero()) ||
+            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+             !CFPf->getValueAPF().isZero()))
+        return ReplaceInstUsesWith(SI, TrueVal);
+      }
+      // NOTE: if we wanted to, this is where to detect MIN/MAX
+
+    } else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
+      // Transform (X == Y) ? Y : X  -> X
+      if (FCI->getPredicate() == FCmpInst::FCMP_OEQ) {
+        // This is not safe in general for floating point:
+        // consider X== -0, Y== +0.
+        // It becomes safe if either operand is a nonzero constant.
+        ConstantFP *CFPt, *CFPf;
+        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+              !CFPt->getValueAPF().isZero()) ||
+            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+             !CFPf->getValueAPF().isZero()))
+          return ReplaceInstUsesWith(SI, FalseVal);
+      }
+      // Transform (X une Y) ? Y : X  -> Y
+      if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+        // This is not safe in general for floating point:
+        // consider X== -0, Y== +0.
+        // It becomes safe if either operand is a nonzero constant.
+        ConstantFP *CFPt, *CFPf;
+        if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+              !CFPt->getValueAPF().isZero()) ||
+            ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+             !CFPf->getValueAPF().isZero()))
+          return ReplaceInstUsesWith(SI, TrueVal);
+      }
+      // NOTE: if we wanted to, this is where to detect MIN/MAX
+    }
+    // NOTE: if we wanted to, this is where to detect ABS
+  }
+
+  // See if we are selecting two values based on a comparison of the two values.
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal))
+    if (Instruction *Result = visitSelectInstWithICmp(SI, ICI))
+      return Result;
+
+  if (Instruction *TI = dyn_cast<Instruction>(TrueVal))
+    if (Instruction *FI = dyn_cast<Instruction>(FalseVal))
+      if (TI->hasOneUse() && FI->hasOneUse()) {
+        Instruction *AddOp = 0, *SubOp = 0;
+
+        // Turn (select C, (op X, Y), (op X, Z)) -> (op X, (select C, Y, Z))
+        if (TI->getOpcode() == FI->getOpcode())
+          if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
+            return IV;
+
+        // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))).  This is
+        // even legal for FP.
+        if ((TI->getOpcode() == Instruction::Sub &&
+             FI->getOpcode() == Instruction::Add) ||
+            (TI->getOpcode() == Instruction::FSub &&
+             FI->getOpcode() == Instruction::FAdd)) {
+          AddOp = FI; SubOp = TI;
+        } else if ((FI->getOpcode() == Instruction::Sub &&
+                    TI->getOpcode() == Instruction::Add) ||
+                   (FI->getOpcode() == Instruction::FSub &&
+                    TI->getOpcode() == Instruction::FAdd)) {
+          AddOp = TI; SubOp = FI;
+        }
+
+        if (AddOp) {
+          Value *OtherAddOp = 0;
+          if (SubOp->getOperand(0) == AddOp->getOperand(0)) {
+            OtherAddOp = AddOp->getOperand(1);
+          } else if (SubOp->getOperand(0) == AddOp->getOperand(1)) {
+            OtherAddOp = AddOp->getOperand(0);
+          }
+
+          if (OtherAddOp) {
+            // So at this point we know we have (Y -> OtherAddOp):
+            //        select C, (add X, Y), (sub X, Z)
+            Value *NegVal;  // Compute -Z
+            if (Constant *C = dyn_cast<Constant>(SubOp->getOperand(1))) {
+              NegVal = ConstantExpr::getNeg(C);
+            } else if (SI.getType()->isFloatingPointTy()) {
+              NegVal = InsertNewInstBefore(
+                    BinaryOperator::CreateFNeg(SubOp->getOperand(1),
+                                              "tmp"), SI);
+            } else {
+              NegVal = InsertNewInstBefore(
+                    BinaryOperator::CreateNeg(SubOp->getOperand(1),
+                                              "tmp"), SI);
+            }
+
+            Value *NewTrueOp = OtherAddOp;
+            Value *NewFalseOp = NegVal;
+            if (AddOp != TI)
+              std::swap(NewTrueOp, NewFalseOp);
+            Instruction *NewSel =
+              SelectInst::Create(CondVal, NewTrueOp,
+                                 NewFalseOp, SI.getName() + ".p");
+
+            NewSel = InsertNewInstBefore(NewSel, SI);
+            if (SI.getType()->isFloatingPointTy())
+              return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel);
+            else
+              return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
+          }
+        }
+      }
+
+  // See if we can fold the select into one of our operands.
+  if (SI.getType()->isIntegerTy()) {
+    if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
+      return FoldI;
+
+    // MAX(MAX(a, b), a) -> MAX(a, b)
+    // MIN(MIN(a, b), a) -> MIN(a, b)
+    // MAX(MIN(a, b), a) -> a
+    // MIN(MAX(a, b), a) -> a
+    Value *LHS, *RHS, *LHS2, *RHS2;
+    if (SelectPatternFlavor SPF = MatchSelectPattern(&SI, LHS, RHS)) {
+      if (SelectPatternFlavor SPF2 = MatchSelectPattern(LHS, LHS2, RHS2))
+        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2, 
+                                          SI, SPF, RHS))
+          return R;
+      if (SelectPatternFlavor SPF2 = MatchSelectPattern(RHS, LHS2, RHS2))
+        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
+                                          SI, SPF, LHS))
+          return R;
+    }
+
+    // TODO.
+    // ABS(-X) -> ABS(X)
+    // ABS(ABS(X)) -> ABS(X)
+  }
+
+  // See if we can fold the select into a phi node if the condition is a select.
+  if (isa<PHINode>(SI.getCondition()))
+    // The true/false values have to be live in the PHI predecessor's blocks.
+    if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
+        CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
+      if (Instruction *NV = FoldOpIntoPhi(SI))
+        return NV;
+
+  if (SelectInst *TrueSI = dyn_cast<SelectInst>(TrueVal)) {
+    if (TrueSI->getCondition() == CondVal) {
+      SI.setOperand(1, TrueSI->getTrueValue());
+      return &SI;
+    }
+  }
+  if (SelectInst *FalseSI = dyn_cast<SelectInst>(FalseVal)) {
+    if (FalseSI->getCondition() == CondVal) {
+      SI.setOperand(2, FalseSI->getFalseValue());
+      return &SI;
+    }
+  }
+
+  if (BinaryOperator::isNot(CondVal)) {
+    SI.setOperand(0, BinaryOperator::getNotArgument(CondVal));
+    SI.setOperand(1, FalseVal);
+    SI.setOperand(2, TrueVal);
+    return &SI;
+  }
+
+  return 0;
+}
diff --git a/final/lib/Transforms/InstCombine/InstCombineShifts.cpp b/final/lib/Transforms/InstCombine/InstCombineShifts.cpp
new file mode 100644
index 00000000000..a7f800587bb
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -0,0 +1,746 @@
+//===- InstCombineShifts.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the visitShl, visitLShr, and visitAShr functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace PatternMatch;
+
+Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
+  assert(I.getOperand(1)->getType() == I.getOperand(0)->getType());
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  // See if we can fold away this shift.
+  if (SimplifyDemandedInstructionBits(I))
+    return &I;
+
+  // Try to fold constant and into select arguments.
+  if (isa<Constant>(Op0))
+    if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
+      if (Instruction *R = FoldOpIntoSelect(I, SI))
+        return R;
+
+  if (ConstantInt *CUI = dyn_cast<ConstantInt>(Op1))
+    if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
+      return Res;
+
+  // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2.
+  // Because shifts by negative values (which could occur if A were negative)
+  // are undefined.
+  Value *A; const APInt *B;
+  if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) {
+    // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't
+    // demand the sign bit (and many others) here??
+    Value *Rem = Builder->CreateAnd(A, ConstantInt::get(I.getType(), *B-1),
+                                    Op1->getName());
+    I.setOperand(1, Rem);
+    return &I;
+  }
+  
+  return 0;
+}
+
+/// CanEvaluateShifted - See if we can compute the specified value, but shifted
+/// logically to the left or right by some number of bits.  This should return
+/// true if the expression can be computed for the same cost as the current
+/// expression tree.  This is used to eliminate extraneous shifting from things
+/// like:
+///      %C = shl i128 %A, 64
+///      %D = shl i128 %B, 96
+///      %E = or i128 %C, %D
+///      %F = lshr i128 %E, 64
+/// where the client will ask if E can be computed shifted right by 64-bits.  If
+/// this succeeds, the GetShiftedValue function will be called to produce the
+/// value.
+static bool CanEvaluateShifted(Value *V, unsigned NumBits, bool isLeftShift,
+                               InstCombiner &IC) {
+  // We can always evaluate constants shifted.
+  if (isa<Constant>(V))
+    return true;
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+  
+  // If this is the opposite shift, we can directly reuse the input of the shift
+  // if the needed bits are already zero in the input.  This allows us to reuse
+  // the value which means that we don't care if the shift has multiple uses.
+  //  TODO:  Handle opposite shift by exact value.
+  ConstantInt *CI = 0;
+  if ((isLeftShift && match(I, m_LShr(m_Value(), m_ConstantInt(CI)))) ||
+      (!isLeftShift && match(I, m_Shl(m_Value(), m_ConstantInt(CI))))) {
+    if (CI->getZExtValue() == NumBits) {
+      // TODO: Check that the input bits are already zero with MaskedValueIsZero
+#if 0
+      // If this is a truncate of a logical shr, we can truncate it to a smaller
+      // lshr iff we know that the bits we would otherwise be shifting in are
+      // already zeros.
+      uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+      uint32_t BitWidth = Ty->getScalarSizeInBits();
+      if (MaskedValueIsZero(I->getOperand(0),
+            APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
+          CI->getLimitedValue(BitWidth) < BitWidth) {
+        return CanEvaluateTruncated(I->getOperand(0), Ty);
+      }
+#endif
+      
+    }
+  }
+  
+  // We can't mutate something that has multiple uses: doing so would
+  // require duplicating the instruction in general, which isn't profitable.
+  if (!I->hasOneUse()) return false;
+  
+  switch (I->getOpcode()) {
+  default: return false;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+    return CanEvaluateShifted(I->getOperand(0), NumBits, isLeftShift, IC) &&
+           CanEvaluateShifted(I->getOperand(1), NumBits, isLeftShift, IC);
+      
+  case Instruction::Shl: {
+    // We can often fold the shift into shifts-by-a-constant.
+    CI = dyn_cast<ConstantInt>(I->getOperand(1));
+    if (CI == 0) return false;
+
+    // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+    if (isLeftShift) return true;
+    
+    // We can always turn shl(c)+shr(c) -> and(c2).
+    if (CI->getValue() == NumBits) return true;
+      
+    unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+    // We can turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but it isn't
+    // profitable unless we know the and'd out bits are already zero.
+    if (CI->getZExtValue() > NumBits) {
+      unsigned LowBits = TypeWidth - CI->getZExtValue();
+      if (MaskedValueIsZero(I->getOperand(0),
+                       APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
+        return true;
+    }
+      
+    return false;
+  }
+  case Instruction::LShr: {
+    // We can often fold the shift into shifts-by-a-constant.
+    CI = dyn_cast<ConstantInt>(I->getOperand(1));
+    if (CI == 0) return false;
+    
+    // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+    if (!isLeftShift) return true;
+    
+    // We can always turn lshr(c)+shl(c) -> and(c2).
+    if (CI->getValue() == NumBits) return true;
+      
+    unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+    // We can always turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but it isn't
+    // profitable unless we know the and'd out bits are already zero.
+    if (CI->getZExtValue() > NumBits) {
+      unsigned LowBits = CI->getZExtValue() - NumBits;
+      if (MaskedValueIsZero(I->getOperand(0),
+                          APInt::getLowBitsSet(TypeWidth, NumBits) << LowBits))
+        return true;
+    }
+      
+    return false;
+  }
+  case Instruction::Select: {
+    SelectInst *SI = cast<SelectInst>(I);
+    return CanEvaluateShifted(SI->getTrueValue(), NumBits, isLeftShift, IC) &&
+           CanEvaluateShifted(SI->getFalseValue(), NumBits, isLeftShift, IC);
+  }
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.  Note that we never
+    // get into trouble with cyclic PHIs here because we only consider
+    // instructions with a single use.
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!CanEvaluateShifted(PN->getIncomingValue(i), NumBits, isLeftShift,IC))
+        return false;
+    return true;
+  }
+  }      
+}
+
+/// GetShiftedValue - When CanEvaluateShifted returned true for an expression,
+/// this value inserts the new computation that produces the shifted value.
+static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
+                              InstCombiner &IC) {
+  // We can always evaluate constants shifted.
+  if (Constant *C = dyn_cast<Constant>(V)) {
+    if (isLeftShift)
+      V = IC.Builder->CreateShl(C, NumBits);
+    else
+      V = IC.Builder->CreateLShr(C, NumBits);
+    // If we got a constantexpr back, try to simplify it with TD info.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      V = ConstantFoldConstantExpression(CE, IC.getTargetData());
+    return V;
+  }
+  
+  Instruction *I = cast<Instruction>(V);
+  IC.Worklist.Add(I);
+
+  switch (I->getOpcode()) {
+  default: assert(0 && "Inconsistency with CanEvaluateShifted");
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    // Bitwise operators can all arbitrarily be arbitrarily evaluated shifted.
+    I->setOperand(0, GetShiftedValue(I->getOperand(0), NumBits,isLeftShift,IC));
+    I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+    return I;
+    
+  case Instruction::Shl: {
+    unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+
+    // We only accept shifts-by-a-constant in CanEvaluateShifted.
+    ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+    
+    // We can always fold shl(c1)+shl(c2) -> shl(c1+c2).
+    if (isLeftShift) {
+      // If this is oversized composite shift, then unsigned shifts get 0.
+      unsigned NewShAmt = NumBits+CI->getZExtValue();
+      if (NewShAmt >= TypeWidth)
+        return Constant::getNullValue(I->getType());
+
+      I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+      return I;
+    }
+    
+    // We turn shl(c)+lshr(c) -> and(c2) if the input doesn't already have
+    // zeros.
+    if (CI->getValue() == NumBits) {
+      APInt Mask(APInt::getLowBitsSet(TypeWidth, TypeWidth - NumBits));
+      V = IC.Builder->CreateAnd(I->getOperand(0),
+                                ConstantInt::get(I->getContext(), Mask));
+      if (Instruction *VI = dyn_cast<Instruction>(V)) {
+        VI->moveBefore(I);
+        VI->takeName(I);
+      }
+      return V;
+    }
+    
+    // We turn shl(c1)+shr(c2) -> shl(c3)+and(c4), but only when we know that
+    // the and won't be needed.
+    assert(CI->getZExtValue() > NumBits);
+    I->setOperand(1, ConstantInt::get(I->getType(),
+                                      CI->getZExtValue() - NumBits));
+    return I;
+  }
+  case Instruction::LShr: {
+    unsigned TypeWidth = I->getType()->getScalarSizeInBits();
+    // We only accept shifts-by-a-constant in CanEvaluateShifted.
+    ConstantInt *CI = cast<ConstantInt>(I->getOperand(1));
+    
+    // We can always fold lshr(c1)+lshr(c2) -> lshr(c1+c2).
+    if (!isLeftShift) {
+      // If this is oversized composite shift, then unsigned shifts get 0.
+      unsigned NewShAmt = NumBits+CI->getZExtValue();
+      if (NewShAmt >= TypeWidth)
+        return Constant::getNullValue(I->getType());
+      
+      I->setOperand(1, ConstantInt::get(I->getType(), NewShAmt));
+      return I;
+    }
+    
+    // We turn lshr(c)+shl(c) -> and(c2) if the input doesn't already have
+    // zeros.
+    if (CI->getValue() == NumBits) {
+      APInt Mask(APInt::getHighBitsSet(TypeWidth, TypeWidth - NumBits));
+      V = IC.Builder->CreateAnd(I->getOperand(0),
+                                ConstantInt::get(I->getContext(), Mask));
+      if (Instruction *VI = dyn_cast<Instruction>(V)) {
+        VI->moveBefore(I);
+        VI->takeName(I);
+      }
+      return V;
+    }
+    
+    // We turn lshr(c1)+shl(c2) -> lshr(c3)+and(c4), but only when we know that
+    // the and won't be needed.
+    assert(CI->getZExtValue() > NumBits);
+    I->setOperand(1, ConstantInt::get(I->getType(),
+                                      CI->getZExtValue() - NumBits));
+    return I;
+  }
+    
+  case Instruction::Select:
+    I->setOperand(1, GetShiftedValue(I->getOperand(1), NumBits,isLeftShift,IC));
+    I->setOperand(2, GetShiftedValue(I->getOperand(2), NumBits,isLeftShift,IC));
+    return I;
+  case Instruction::PHI: {
+    // We can change a phi if we can change all operands.  Note that we never
+    // get into trouble with cyclic PHIs here because we only consider
+    // instructions with a single use.
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      PN->setIncomingValue(i, GetShiftedValue(PN->getIncomingValue(i),
+                                              NumBits, isLeftShift, IC));
+    return PN;
+  }
+  }      
+}
+
+
+
+Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
+                                               BinaryOperator &I) {
+  bool isLeftShift = I.getOpcode() == Instruction::Shl;
+  
+  
+  // See if we can propagate this shift into the input, this covers the trivial
+  // cast of lshr(shl(x,c1),c2) as well as other more complex cases.
+  if (I.getOpcode() != Instruction::AShr &&
+      CanEvaluateShifted(Op0, Op1->getZExtValue(), isLeftShift, *this)) {
+    DEBUG(dbgs() << "ICE: GetShiftedValue propagating shift through expression"
+              " to eliminate shift:\n  IN: " << *Op0 << "\n  SH: " << I <<"\n");
+    
+    return ReplaceInstUsesWith(I, 
+                 GetShiftedValue(Op0, Op1->getZExtValue(), isLeftShift, *this));
+  }
+  
+  
+  // See if we can simplify any instructions used by the instruction whose sole 
+  // purpose is to compute bits we don't care about.
+  uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
+  
+  // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
+  // a signed shift.
+  //
+  if (Op1->uge(TypeBits)) {
+    if (I.getOpcode() != Instruction::AShr)
+      return ReplaceInstUsesWith(I, Constant::getNullValue(Op0->getType()));
+    // ashr i32 X, 32 --> ashr i32 X, 31
+    I.setOperand(1, ConstantInt::get(I.getType(), TypeBits-1));
+    return &I;
+  }
+  
+  // ((X*C1) << C2) == (X * (C1 << C2))
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op0))
+    if (BO->getOpcode() == Instruction::Mul && isLeftShift)
+      if (Constant *BOOp = dyn_cast<Constant>(BO->getOperand(1)))
+        return BinaryOperator::CreateMul(BO->getOperand(0),
+                                        ConstantExpr::getShl(BOOp, Op1));
+  
+  // Try to fold constant and into select arguments.
+  if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
+    if (Instruction *R = FoldOpIntoSelect(I, SI))
+      return R;
+  if (isa<PHINode>(Op0))
+    if (Instruction *NV = FoldOpIntoPhi(I))
+      return NV;
+  
+  // Fold shift2(trunc(shift1(x,c1)), c2) -> trunc(shift2(shift1(x,c1),c2))
+  if (TruncInst *TI = dyn_cast<TruncInst>(Op0)) {
+    Instruction *TrOp = dyn_cast<Instruction>(TI->getOperand(0));
+    // If 'shift2' is an ashr, we would have to get the sign bit into a funny
+    // place.  Don't try to do this transformation in this case.  Also, we
+    // require that the input operand is a shift-by-constant so that we have
+    // confidence that the shifts will get folded together.  We could do this
+    // xform in more cases, but it is unlikely to be profitable.
+    if (TrOp && I.isLogicalShift() && TrOp->isShift() && 
+        isa<ConstantInt>(TrOp->getOperand(1))) {
+      // Okay, we'll do this xform.  Make the shift of shift.
+      Constant *ShAmt = ConstantExpr::getZExt(Op1, TrOp->getType());
+      // (shift2 (shift1 & 0x00FF), c2)
+      Value *NSh = Builder->CreateBinOp(I.getOpcode(), TrOp, ShAmt,I.getName());
+
+      // For logical shifts, the truncation has the effect of making the high
+      // part of the register be zeros.  Emulate this by inserting an AND to
+      // clear the top bits as needed.  This 'and' will usually be zapped by
+      // other xforms later if dead.
+      unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
+      unsigned DstSize = TI->getType()->getScalarSizeInBits();
+      APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
+      
+      // The mask we constructed says what the trunc would do if occurring
+      // between the shifts.  We want to know the effect *after* the second
+      // shift.  We know that it is a logical shift by a constant, so adjust the
+      // mask as appropriate.
+      if (I.getOpcode() == Instruction::Shl)
+        MaskV <<= Op1->getZExtValue();
+      else {
+        assert(I.getOpcode() == Instruction::LShr && "Unknown logical shift");
+        MaskV = MaskV.lshr(Op1->getZExtValue());
+      }
+
+      // shift1 & 0x00FF
+      Value *And = Builder->CreateAnd(NSh,
+                                      ConstantInt::get(I.getContext(), MaskV),
+                                      TI->getName());
+
+      // Return the value truncated to the interesting size.
+      return new TruncInst(And, I.getType());
+    }
+  }
+  
+  if (Op0->hasOneUse()) {
+    if (BinaryOperator *Op0BO = dyn_cast<BinaryOperator>(Op0)) {
+      // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
+      Value *V1, *V2;
+      ConstantInt *CC;
+      switch (Op0BO->getOpcode()) {
+      default: break;
+      case Instruction::Add:
+      case Instruction::And:
+      case Instruction::Or:
+      case Instruction::Xor: {
+        // These operators commute.
+        // Turn (Y + (X >> C)) << C  ->  (X + (Y << C)) & (~0 << C)
+        if (isLeftShift && Op0BO->getOperand(1)->hasOneUse() &&
+            match(Op0BO->getOperand(1), m_Shr(m_Value(V1),
+                  m_Specific(Op1)))) {
+          Value *YS =         // (Y << C)
+            Builder->CreateShl(Op0BO->getOperand(0), Op1, Op0BO->getName());
+          // (X + (Y << C))
+          Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), YS, V1,
+                                          Op0BO->getOperand(1)->getName());
+          uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
+          return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
+                     APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+        }
+        
+        // Turn (Y + ((X >> C) & CC)) << C  ->  ((X & (CC << C)) + (Y << C))
+        Value *Op0BOOp1 = Op0BO->getOperand(1);
+        if (isLeftShift && Op0BOOp1->hasOneUse() &&
+            match(Op0BOOp1, 
+                  m_And(m_Shr(m_Value(V1), m_Specific(Op1)),
+                        m_ConstantInt(CC))) &&
+            cast<BinaryOperator>(Op0BOOp1)->getOperand(0)->hasOneUse()) {
+          Value *YS =   // (Y << C)
+            Builder->CreateShl(Op0BO->getOperand(0), Op1,
+                                         Op0BO->getName());
+          // X & (CC << C)
+          Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+                                         V1->getName()+".mask");
+          return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
+        }
+      }
+        
+      // FALL THROUGH.
+      case Instruction::Sub: {
+        // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
+        if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
+            match(Op0BO->getOperand(0), m_Shr(m_Value(V1),
+                  m_Specific(Op1)))) {
+          Value *YS =  // (Y << C)
+            Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+          // (X + (Y << C))
+          Value *X = Builder->CreateBinOp(Op0BO->getOpcode(), V1, YS,
+                                          Op0BO->getOperand(0)->getName());
+          uint32_t Op1Val = Op1->getLimitedValue(TypeBits);
+          return BinaryOperator::CreateAnd(X, ConstantInt::get(I.getContext(),
+                     APInt::getHighBitsSet(TypeBits, TypeBits-Op1Val)));
+        }
+        
+        // Turn (((X >> C)&CC) + Y) << C  ->  (X + (Y << C)) & (CC << C)
+        if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
+            match(Op0BO->getOperand(0),
+                  m_And(m_Shr(m_Value(V1), m_Value(V2)),
+                        m_ConstantInt(CC))) && V2 == Op1 &&
+            cast<BinaryOperator>(Op0BO->getOperand(0))
+                ->getOperand(0)->hasOneUse()) {
+          Value *YS = // (Y << C)
+            Builder->CreateShl(Op0BO->getOperand(1), Op1, Op0BO->getName());
+          // X & (CC << C)
+          Value *XM = Builder->CreateAnd(V1, ConstantExpr::getShl(CC, Op1),
+                                         V1->getName()+".mask");
+          
+          return BinaryOperator::Create(Op0BO->getOpcode(), XM, YS);
+        }
+        
+        break;
+      }
+      }
+      
+      
+      // If the operand is an bitwise operator with a constant RHS, and the
+      // shift is the only use, we can pull it out of the shift.
+      if (ConstantInt *Op0C = dyn_cast<ConstantInt>(Op0BO->getOperand(1))) {
+        bool isValid = true;     // Valid only for And, Or, Xor
+        bool highBitSet = false; // Transform if high bit of constant set?
+        
+        switch (Op0BO->getOpcode()) {
+        default: isValid = false; break;   // Do not perform transform!
+        case Instruction::Add:
+          isValid = isLeftShift;
+          break;
+        case Instruction::Or:
+        case Instruction::Xor:
+          highBitSet = false;
+          break;
+        case Instruction::And:
+          highBitSet = true;
+          break;
+        }
+        
+        // If this is a signed shift right, and the high bit is modified
+        // by the logical operation, do not perform the transformation.
+        // The highBitSet boolean indicates the value of the high bit of
+        // the constant which would cause it to be modified for this
+        // operation.
+        //
+        if (isValid && I.getOpcode() == Instruction::AShr)
+          isValid = Op0C->getValue()[TypeBits-1] == highBitSet;
+        
+        if (isValid) {
+          Constant *NewRHS = ConstantExpr::get(I.getOpcode(), Op0C, Op1);
+          
+          Value *NewShift =
+            Builder->CreateBinOp(I.getOpcode(), Op0BO->getOperand(0), Op1);
+          NewShift->takeName(Op0BO);
+          
+          return BinaryOperator::Create(Op0BO->getOpcode(), NewShift,
+                                        NewRHS);
+        }
+      }
+    }
+  }
+  
+  // Find out if this is a shift of a shift by a constant.
+  BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
+  if (ShiftOp && !ShiftOp->isShift())
+    ShiftOp = 0;
+  
+  if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
+    ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
+    uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
+    uint32_t ShiftAmt2 = Op1->getLimitedValue(TypeBits);
+    assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
+    if (ShiftAmt1 == 0) return 0;  // Will be simplified in the future.
+    Value *X = ShiftOp->getOperand(0);
+    
+    uint32_t AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
+    
+    const IntegerType *Ty = cast<IntegerType>(I.getType());
+    
+    // Check for (X << c1) << c2  and  (X >> c1) >> c2
+    if (I.getOpcode() == ShiftOp->getOpcode()) {
+      // If this is oversized composite shift, then unsigned shifts get 0, ashr
+      // saturates.
+      if (AmtSum >= TypeBits) {
+        if (I.getOpcode() != Instruction::AShr)
+          return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+        AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.
+      }
+      
+      return BinaryOperator::Create(I.getOpcode(), X,
+                                    ConstantInt::get(Ty, AmtSum));
+    }
+    
+    if (ShiftAmt1 == ShiftAmt2) {
+      // If we have ((X >>? C) << C), turn this into X & (-1 << C).
+      if (I.getOpcode() == Instruction::Shl &&
+          ShiftOp->getOpcode() != Instruction::Shl) {
+        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt1));
+        return BinaryOperator::CreateAnd(X,
+                                         ConstantInt::get(I.getContext(),Mask));
+      }
+      // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
+      if (I.getOpcode() == Instruction::LShr &&
+          ShiftOp->getOpcode() == Instruction::Shl) {
+        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
+        return BinaryOperator::CreateAnd(X,
+                                        ConstantInt::get(I.getContext(), Mask));
+      }
+    } else if (ShiftAmt1 < ShiftAmt2) {
+      uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
+      
+      // (X >>? C1) << C2 --> X << (C2-C1) & (-1 << C2)
+      if (I.getOpcode() == Instruction::Shl &&
+          ShiftOp->getOpcode() != Instruction::Shl) {
+        assert(ShiftOp->getOpcode() == Instruction::LShr ||
+               ShiftOp->getOpcode() == Instruction::AShr);
+        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+        
+        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(I.getContext(),Mask));
+      }
+      
+      // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
+      if (I.getOpcode() == Instruction::LShr &&
+          ShiftOp->getOpcode() == Instruction::Shl) {
+        assert(ShiftOp->getOpcode() == Instruction::Shl);
+        Value *Shift = Builder->CreateLShr(X, ConstantInt::get(Ty, ShiftDiff));
+        
+        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(I.getContext(),Mask));
+      }
+      
+      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in.
+    } else {
+      assert(ShiftAmt2 < ShiftAmt1);
+      uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
+
+      // (X >>? C1) << C2 --> X >>? (C1-C2) & (-1 << C2)
+      if (I.getOpcode() == Instruction::Shl &&
+          ShiftOp->getOpcode() != Instruction::Shl) {
+        Value *Shift = Builder->CreateBinOp(ShiftOp->getOpcode(), X,
+                                            ConstantInt::get(Ty, ShiftDiff));
+        
+        APInt Mask(APInt::getHighBitsSet(TypeBits, TypeBits - ShiftAmt2));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(I.getContext(),Mask));
+      }
+      
+      // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
+      if (I.getOpcode() == Instruction::LShr &&
+          ShiftOp->getOpcode() == Instruction::Shl) {
+        Value *Shift = Builder->CreateShl(X, ConstantInt::get(Ty, ShiftDiff));
+        
+        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+        return BinaryOperator::CreateAnd(Shift,
+                                         ConstantInt::get(I.getContext(),Mask));
+      }
+      
+      // We can't handle (X << C1) >>a C2, it shifts arbitrary bits in.
+    }
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::visitShl(BinaryOperator &I) {
+  if (Value *V = SimplifyShlInst(I.getOperand(0), I.getOperand(1),
+                                 I.hasNoSignedWrap(), I.hasNoUnsignedWrap(),
+                                 TD))
+    return ReplaceInstUsesWith(I, V);
+  
+  if (Instruction *V = commonShiftTransforms(I))
+    return V;
+  
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(I.getOperand(1))) {
+    unsigned ShAmt = Op1C->getZExtValue();
+    
+    // If the shifted-out value is known-zero, then this is a NUW shift.
+    if (!I.hasNoUnsignedWrap() && 
+        MaskedValueIsZero(I.getOperand(0),
+                          APInt::getHighBitsSet(Op1C->getBitWidth(), ShAmt))) {
+          I.setHasNoUnsignedWrap();
+          return &I;
+        }
+    
+    // If the shifted out value is all signbits, this is a NSW shift.
+    if (!I.hasNoSignedWrap() &&
+        ComputeNumSignBits(I.getOperand(0)) > ShAmt) {
+      I.setHasNoSignedWrap();
+      return &I;
+    }
+  }
+  
+  return 0;    
+}
+
+Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
+  if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1),
+                                  I.isExact(), TD))
+    return ReplaceInstUsesWith(I, V);
+
+  if (Instruction *R = commonShiftTransforms(I))
+    return R;
+  
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+    unsigned ShAmt = Op1C->getZExtValue();
+
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op0)) {
+      unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
+      // ctlz.i32(x)>>5  --> zext(x == 0)
+      // cttz.i32(x)>>5  --> zext(x == 0)
+      // ctpop.i32(x)>>5 --> zext(x == -1)
+      if ((II->getIntrinsicID() == Intrinsic::ctlz ||
+           II->getIntrinsicID() == Intrinsic::cttz ||
+           II->getIntrinsicID() == Intrinsic::ctpop) &&
+          isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == ShAmt) {
+        bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop;
+        Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0);
+        Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS);
+        return new ZExtInst(Cmp, II->getType());
+      }
+    }
+  
+    // If the shifted-out value is known-zero, then this is an exact shift.
+    if (!I.isExact() && 
+        MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
+      I.setIsExact();
+      return &I;
+    }    
+  }
+  
+  return 0;
+}
+
+Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
+  if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1),
+                                  I.isExact(), TD))
+    return ReplaceInstUsesWith(I, V);
+
+  if (Instruction *R = commonShiftTransforms(I))
+    return R;
+  
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+    unsigned ShAmt = Op1C->getZExtValue();
+    
+    // If the input is a SHL by the same constant (ashr (shl X, C), C), then we
+    // have a sign-extend idiom.
+    Value *X;
+    if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) {
+      // If the left shift is just shifting out partial signbits, delete the
+      // extension.
+      if (cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap())
+        return ReplaceInstUsesWith(I, X);
+
+      // If the input is an extension from the shifted amount value, e.g.
+      //   %x = zext i8 %A to i32
+      //   %y = shl i32 %x, 24
+      //   %z = ashr %y, 24
+      // then turn this into "z = sext i8 A to i32".
+      if (ZExtInst *ZI = dyn_cast<ZExtInst>(X)) {
+        uint32_t SrcBits = ZI->getOperand(0)->getType()->getScalarSizeInBits();
+        uint32_t DestBits = ZI->getType()->getScalarSizeInBits();
+        if (Op1C->getZExtValue() == DestBits-SrcBits)
+          return new SExtInst(ZI->getOperand(0), ZI->getType());
+      }
+    }
+
+    // If the shifted-out value is known-zero, then this is an exact shift.
+    if (!I.isExact() && 
+        MaskedValueIsZero(Op0,APInt::getLowBitsSet(Op1C->getBitWidth(),ShAmt))){
+      I.setIsExact();
+      return &I;
+    }
+  }            
+  
+  // See if we can turn a signed shr into an unsigned shr.
+  if (MaskedValueIsZero(Op0,
+                        APInt::getSignBit(I.getType()->getScalarSizeInBits())))
+    return BinaryOperator::CreateLShr(Op0, Op1);
+  
+  // Arithmetic shifting an all-sign-bit value is a no-op.
+  unsigned NumSignBits = ComputeNumSignBits(Op0);
+  if (NumSignBits == Op0->getType()->getScalarSizeInBits())
+    return ReplaceInstUsesWith(I, Op0);
+  
+  return 0;
+}
+
diff --git a/final/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/final/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
new file mode 100644
index 00000000000..b12d4c36874
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -0,0 +1,1145 @@
+//===- InstCombineSimplifyDemanded.cpp ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains logic for simplifying instructions based on information
+// about how they are used.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "InstCombine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/IntrinsicInst.h"
+
+using namespace llvm;
+
+
+/// ShrinkDemandedConstant - Check to see if the specified operand of the 
+/// specified instruction is a constant integer.  If so, check to see if there
+/// are any bits set in the constant that are not demanded.  If so, shrink the
+/// constant and return true.
+static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo, 
+                                   APInt Demanded) {
+  assert(I && "No instruction?");
+  assert(OpNo < I->getNumOperands() && "Operand index too large");
+
+  // If the operand is not a constant integer, nothing to do.
+  ConstantInt *OpC = dyn_cast<ConstantInt>(I->getOperand(OpNo));
+  if (!OpC) return false;
+
+  // If there are no bits set that aren't demanded, nothing to do.
+  Demanded = Demanded.zextOrTrunc(OpC->getValue().getBitWidth());
+  if ((~Demanded & OpC->getValue()) == 0)
+    return false;
+
+  // This instruction is producing bits that are not demanded. Shrink the RHS.
+  Demanded &= OpC->getValue();
+  I->setOperand(OpNo, ConstantInt::get(OpC->getType(), Demanded));
+  return true;
+}
+
+
+
+/// SimplifyDemandedInstructionBits - Inst is an integer instruction that
+/// SimplifyDemandedBits knows about.  See if the instruction has any
+/// properties that allow us to simplify its operands.
+bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
+  unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
+  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+  APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
+  
+  Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, 
+                                     KnownZero, KnownOne, 0);
+  if (V == 0) return false;
+  if (V == &Inst) return true;
+  ReplaceInstUsesWith(Inst, V);
+  return true;
+}
+
+/// SimplifyDemandedBits - This form of SimplifyDemandedBits simplifies the
+/// specified instruction operand if possible, updating it in place.  It returns
+/// true if it made any change and false otherwise.
+bool InstCombiner::SimplifyDemandedBits(Use &U, APInt DemandedMask, 
+                                        APInt &KnownZero, APInt &KnownOne,
+                                        unsigned Depth) {
+  Value *NewVal = SimplifyDemandedUseBits(U.get(), DemandedMask,
+                                          KnownZero, KnownOne, Depth);
+  if (NewVal == 0) return false;
+  U = NewVal;
+  return true;
+}
+
+
+/// SimplifyDemandedUseBits - This function attempts to replace V with a simpler
+/// value based on the demanded bits.  When this function is called, it is known
+/// that only the bits set in DemandedMask of the result of V are ever used
+/// downstream. Consequently, depending on the mask and V, it may be possible
+/// to replace V with a constant or one of its operands. In such cases, this
+/// function does the replacement and returns true. In all other cases, it
+/// returns false after analyzing the expression and setting KnownOne and known
+/// to be one in the expression.  KnownZero contains all the bits that are known
+/// to be zero in the expression. These are provided to potentially allow the
+/// caller (which might recursively be SimplifyDemandedBits itself) to simplify
+/// the expression. KnownOne and KnownZero always follow the invariant that 
+/// KnownOne & KnownZero == 0. That is, a bit can't be both 1 and 0. Note that
+/// the bits in KnownOne and KnownZero may only be accurate for those bits set
+/// in DemandedMask. Note also that the bitwidth of V, DemandedMask, KnownZero
+/// and KnownOne must all be the same.
+///
+/// This returns null if it did not change anything and it permits no
+/// simplification.  This returns V itself if it did some simplification of V's
+/// operands based on the information about what bits are demanded. This returns
+/// some other non-null value if it found out that V is equal to another value
+/// in the context where the specified bits are demanded, but not for all users.
+Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
+                                             APInt &KnownZero, APInt &KnownOne,
+                                             unsigned Depth) {
+  assert(V != 0 && "Null pointer of Value???");
+  assert(Depth <= 6 && "Limit Search Depth");
+  uint32_t BitWidth = DemandedMask.getBitWidth();
+  const Type *VTy = V->getType();
+  assert((TD || !VTy->isPointerTy()) &&
+         "SimplifyDemandedBits needs to know bit widths!");
+  assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
+         (!VTy->isIntOrIntVectorTy() ||
+          VTy->getScalarSizeInBits() == BitWidth) &&
+         KnownZero.getBitWidth() == BitWidth &&
+         KnownOne.getBitWidth() == BitWidth &&
+         "Value *V, DemandedMask, KnownZero and KnownOne "
+         "must have same BitWidth");
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    // We know all of the bits for a constant!
+    KnownOne = CI->getValue() & DemandedMask;
+    KnownZero = ~KnownOne & DemandedMask;
+    return 0;
+  }
+  if (isa<ConstantPointerNull>(V)) {
+    // We know all of the bits for a constant!
+    KnownOne.clearAllBits();
+    KnownZero = DemandedMask;
+    return 0;
+  }
+
+  KnownZero.clearAllBits();
+  KnownOne.clearAllBits();
+  if (DemandedMask == 0) {   // Not demanding any bits from V.
+    if (isa<UndefValue>(V))
+      return 0;
+    return UndefValue::get(VTy);
+  }
+  
+  if (Depth == 6)        // Limit search depth.
+    return 0;
+  
+  APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+  APInt RHSKnownZero(BitWidth, 0), RHSKnownOne(BitWidth, 0);
+
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) {
+    ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+    return 0;        // Only analyze instructions.
+  }
+
+  // If there are multiple uses of this value and we aren't at the root, then
+  // we can't do any simplifications of the operands, because DemandedMask
+  // only reflects the bits demanded by *one* of the users.
+  if (Depth != 0 && !I->hasOneUse()) {
+    // Despite the fact that we can't simplify this instruction in all User's
+    // context, we can at least compute the knownzero/knownone bits, and we can
+    // do simplifications that apply to *just* the one user if we know that
+    // this instruction has a simpler value in that context.
+    if (I->getOpcode() == Instruction::And) {
+      // If either the LHS or the RHS are Zero, the result is zero.
+      ComputeMaskedBits(I->getOperand(1), DemandedMask,
+                        RHSKnownZero, RHSKnownOne, Depth+1);
+      ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownZero,
+                        LHSKnownZero, LHSKnownOne, Depth+1);
+      
+      // If all of the demanded bits are known 1 on one side, return the other.
+      // These bits cannot contribute to the result of the 'and' in this
+      // context.
+      if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 
+          (DemandedMask & ~LHSKnownZero))
+        return I->getOperand(0);
+      if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 
+          (DemandedMask & ~RHSKnownZero))
+        return I->getOperand(1);
+      
+      // If all of the demanded bits in the inputs are known zeros, return zero.
+      if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+        return Constant::getNullValue(VTy);
+      
+    } else if (I->getOpcode() == Instruction::Or) {
+      // We can simplify (X|Y) -> X or Y in the user's context if we know that
+      // only bits from X or Y are demanded.
+      
+      // If either the LHS or the RHS are One, the result is One.
+      ComputeMaskedBits(I->getOperand(1), DemandedMask, 
+                        RHSKnownZero, RHSKnownOne, Depth+1);
+      ComputeMaskedBits(I->getOperand(0), DemandedMask & ~RHSKnownOne, 
+                        LHSKnownZero, LHSKnownOne, Depth+1);
+      
+      // If all of the demanded bits are known zero on one side, return the
+      // other.  These bits cannot contribute to the result of the 'or' in this
+      // context.
+      if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 
+          (DemandedMask & ~LHSKnownOne))
+        return I->getOperand(0);
+      if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 
+          (DemandedMask & ~RHSKnownOne))
+        return I->getOperand(1);
+      
+      // If all of the potentially set bits on one side are known to be set on
+      // the other side, just use the 'other' side.
+      if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 
+          (DemandedMask & (~RHSKnownZero)))
+        return I->getOperand(0);
+      if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 
+          (DemandedMask & (~LHSKnownZero)))
+        return I->getOperand(1);
+    }
+    
+    // Compute the KnownZero/KnownOne bits to simplify things downstream.
+    ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+    return 0;
+  }
+  
+  // If this is the root being simplified, allow it to have multiple uses,
+  // just set the DemandedMask to all bits so that we can try to simplify the
+  // operands.  This allows visitTruncInst (for example) to simplify the
+  // operand of a trunc without duplicating all the logic below.
+  if (Depth == 0 && !V->hasOneUse())
+    DemandedMask = APInt::getAllOnesValue(BitWidth);
+  
+  switch (I->getOpcode()) {
+  default:
+    ComputeMaskedBits(I, DemandedMask, KnownZero, KnownOne, Depth);
+    break;
+  case Instruction::And:
+    // If either the LHS or the RHS are Zero, the result is zero.
+    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+                             RHSKnownZero, RHSKnownOne, Depth+1) ||
+        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownZero,
+                             LHSKnownZero, LHSKnownOne, Depth+1))
+      return I;
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
+
+    // If all of the demanded bits are known 1 on one side, return the other.
+    // These bits cannot contribute to the result of the 'and'.
+    if ((DemandedMask & ~LHSKnownZero & RHSKnownOne) == 
+        (DemandedMask & ~LHSKnownZero))
+      return I->getOperand(0);
+    if ((DemandedMask & ~RHSKnownZero & LHSKnownOne) == 
+        (DemandedMask & ~RHSKnownZero))
+      return I->getOperand(1);
+    
+    // If all of the demanded bits in the inputs are known zeros, return zero.
+    if ((DemandedMask & (RHSKnownZero|LHSKnownZero)) == DemandedMask)
+      return Constant::getNullValue(VTy);
+      
+    // If the RHS is a constant, see if we can simplify it.
+    if (ShrinkDemandedConstant(I, 1, DemandedMask & ~LHSKnownZero))
+      return I;
+      
+    // Output known-1 bits are only known if set in both the LHS & RHS.
+    KnownOne = RHSKnownOne & LHSKnownOne;
+    // Output known-0 are known to be clear if zero in either the LHS | RHS.
+    KnownZero = RHSKnownZero | LHSKnownZero;
+    break;
+  case Instruction::Or:
+    // If either the LHS or the RHS are One, the result is One.
+    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, 
+                             RHSKnownZero, RHSKnownOne, Depth+1) ||
+        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask & ~RHSKnownOne, 
+                             LHSKnownZero, LHSKnownOne, Depth+1))
+      return I;
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
+    
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'or'.
+    if ((DemandedMask & ~LHSKnownOne & RHSKnownZero) == 
+        (DemandedMask & ~LHSKnownOne))
+      return I->getOperand(0);
+    if ((DemandedMask & ~RHSKnownOne & LHSKnownZero) == 
+        (DemandedMask & ~RHSKnownOne))
+      return I->getOperand(1);
+
+    // If all of the potentially set bits on one side are known to be set on
+    // the other side, just use the 'other' side.
+    if ((DemandedMask & (~RHSKnownZero) & LHSKnownOne) == 
+        (DemandedMask & (~RHSKnownZero)))
+      return I->getOperand(0);
+    if ((DemandedMask & (~LHSKnownZero) & RHSKnownOne) == 
+        (DemandedMask & (~LHSKnownZero)))
+      return I->getOperand(1);
+        
+    // If the RHS is a constant, see if we can simplify it.
+    if (ShrinkDemandedConstant(I, 1, DemandedMask))
+      return I;
+          
+    // Output known-0 bits are only known if clear in both the LHS & RHS.
+    KnownZero = RHSKnownZero & LHSKnownZero;
+    // Output known-1 are known to be set if set in either the LHS | RHS.
+    KnownOne = RHSKnownOne | LHSKnownOne;
+    break;
+  case Instruction::Xor: {
+    if (SimplifyDemandedBits(I->getOperandUse(1), DemandedMask,
+                             RHSKnownZero, RHSKnownOne, Depth+1) ||
+        SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 
+                             LHSKnownZero, LHSKnownOne, Depth+1))
+      return I;
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
+    
+    // If all of the demanded bits are known zero on one side, return the other.
+    // These bits cannot contribute to the result of the 'xor'.
+    if ((DemandedMask & RHSKnownZero) == DemandedMask)
+      return I->getOperand(0);
+    if ((DemandedMask & LHSKnownZero) == DemandedMask)
+      return I->getOperand(1);
+    
+    // If all of the demanded bits are known to be zero on one side or the
+    // other, turn this into an *inclusive* or.
+    //    e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+    if ((DemandedMask & ~RHSKnownZero & ~LHSKnownZero) == 0) {
+      Instruction *Or = 
+        BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
+                                 I->getName());
+      return InsertNewInstBefore(Or, *I);
+    }
+    
+    // If all of the demanded bits on one side are known, and all of the set
+    // bits on that side are also known to be set on the other side, turn this
+    // into an AND, as we know the bits will be cleared.
+    //    e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
+    if ((DemandedMask & (RHSKnownZero|RHSKnownOne)) == DemandedMask) { 
+      // all known
+      if ((RHSKnownOne & LHSKnownOne) == RHSKnownOne) {
+        Constant *AndC = Constant::getIntegerValue(VTy,
+                                                   ~RHSKnownOne & DemandedMask);
+        Instruction *And = 
+          BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+        return InsertNewInstBefore(And, *I);
+      }
+    }
+    
+    // If the RHS is a constant, see if we can simplify it.
+    // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1.
+    if (ShrinkDemandedConstant(I, 1, DemandedMask))
+      return I;
+    
+    // If our LHS is an 'and' and if it has one use, and if any of the bits we
+    // are flipping are known to be set, then the xor is just resetting those
+    // bits to zero.  We can just knock out bits from the 'and' and the 'xor',
+    // simplifying both of them.
+    if (Instruction *LHSInst = dyn_cast<Instruction>(I->getOperand(0)))
+      if (LHSInst->getOpcode() == Instruction::And && LHSInst->hasOneUse() &&
+          isa<ConstantInt>(I->getOperand(1)) &&
+          isa<ConstantInt>(LHSInst->getOperand(1)) &&
+          (LHSKnownOne & RHSKnownOne & DemandedMask) != 0) {
+        ConstantInt *AndRHS = cast<ConstantInt>(LHSInst->getOperand(1));
+        ConstantInt *XorRHS = cast<ConstantInt>(I->getOperand(1));
+        APInt NewMask = ~(LHSKnownOne & RHSKnownOne & DemandedMask);
+        
+        Constant *AndC =
+          ConstantInt::get(I->getType(), NewMask & AndRHS->getValue());
+        Instruction *NewAnd = 
+          BinaryOperator::CreateAnd(I->getOperand(0), AndC, "tmp");
+        InsertNewInstBefore(NewAnd, *I);
+        
+        Constant *XorC =
+          ConstantInt::get(I->getType(), NewMask & XorRHS->getValue());
+        Instruction *NewXor =
+          BinaryOperator::CreateXor(NewAnd, XorC, "tmp");
+        return InsertNewInstBefore(NewXor, *I);
+      }
+
+    // Output known-0 bits are known if clear or set in both the LHS & RHS.
+    KnownZero= (RHSKnownZero & LHSKnownZero) | (RHSKnownOne & LHSKnownOne);
+    // Output known-1 are known to be set if set in only one of the LHS, RHS.
+    KnownOne = (RHSKnownZero & LHSKnownOne) | (RHSKnownOne & LHSKnownZero);
+    break;
+  }
+  case Instruction::Select:
+    if (SimplifyDemandedBits(I->getOperandUse(2), DemandedMask,
+                             RHSKnownZero, RHSKnownOne, Depth+1) ||
+        SimplifyDemandedBits(I->getOperandUse(1), DemandedMask, 
+                             LHSKnownZero, LHSKnownOne, Depth+1))
+      return I;
+    assert(!(RHSKnownZero & RHSKnownOne) && "Bits known to be one AND zero?"); 
+    assert(!(LHSKnownZero & LHSKnownOne) && "Bits known to be one AND zero?"); 
+    
+    // If the operands are constants, see if we can simplify them.
+    if (ShrinkDemandedConstant(I, 1, DemandedMask) ||
+        ShrinkDemandedConstant(I, 2, DemandedMask))
+      return I;
+    
+    // Only known if known in both the LHS and RHS.
+    KnownOne = RHSKnownOne & LHSKnownOne;
+    KnownZero = RHSKnownZero & LHSKnownZero;
+    break;
+  case Instruction::Trunc: {
+    unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
+    DemandedMask = DemandedMask.zext(truncBf);
+    KnownZero = KnownZero.zext(truncBf);
+    KnownOne = KnownOne.zext(truncBf);
+    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask, 
+                             KnownZero, KnownOne, Depth+1))
+      return I;
+    DemandedMask = DemandedMask.trunc(BitWidth);
+    KnownZero = KnownZero.trunc(BitWidth);
+    KnownOne = KnownOne.trunc(BitWidth);
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+    break;
+  }
+  case Instruction::BitCast:
+    if (!I->getOperand(0)->getType()->isIntOrIntVectorTy())
+      return 0;  // vector->int or fp->int?
+
+    if (const VectorType *DstVTy = dyn_cast<VectorType>(I->getType())) {
+      if (const VectorType *SrcVTy =
+            dyn_cast<VectorType>(I->getOperand(0)->getType())) {
+        if (DstVTy->getNumElements() != SrcVTy->getNumElements())
+          // Don't touch a bitcast between vectors of different element counts.
+          return 0;
+      } else
+        // Don't touch a scalar-to-vector bitcast.
+        return 0;
+    } else if (I->getOperand(0)->getType()->isVectorTy())
+      // Don't touch a vector-to-scalar bitcast.
+      return 0;
+
+    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+                             KnownZero, KnownOne, Depth+1))
+      return I;
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+    break;
+  case Instruction::ZExt: {
+    // Compute the bits in the result that are not present in the input.
+    unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
+    
+    DemandedMask = DemandedMask.trunc(SrcBitWidth);
+    KnownZero = KnownZero.trunc(SrcBitWidth);
+    KnownOne = KnownOne.trunc(SrcBitWidth);
+    if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMask,
+                             KnownZero, KnownOne, Depth+1))
+      return I;
+    DemandedMask = DemandedMask.zext(BitWidth);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+    // The top bits are known to be zero.
+    KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);
+    break;
+  }
+  case Instruction::SExt: {
+    // Compute the bits in the result that are not present in the input.
+    unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
+    
+    APInt InputDemandedBits = DemandedMask & 
+                              APInt::getLowBitsSet(BitWidth, SrcBitWidth);
+
+    APInt NewBits(APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth));
+    // If any of the sign extended bits are demanded, we know that the sign
+    // bit is demanded.
+    if ((NewBits & DemandedMask) != 0)
+      InputDemandedBits.setBit(SrcBitWidth-1);
+      
+    InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth);
+    KnownZero = KnownZero.trunc(SrcBitWidth);
+    KnownOne = KnownOne.trunc(SrcBitWidth);
+    if (SimplifyDemandedBits(I->getOperandUse(0), InputDemandedBits,
+                             KnownZero, KnownOne, Depth+1))
+      return I;
+    InputDemandedBits = InputDemandedBits.zext(BitWidth);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownOne = KnownOne.zext(BitWidth);
+    assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+      
+    // If the sign bit of the input is known set or clear, then we know the
+    // top bits of the result.
+
+    // If the input sign bit is known zero, or if the NewBits are not demanded
+    // convert this into a zero extension.
+    if (KnownZero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) {
+      // Convert to ZExt cast
+      CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName());
+      return InsertNewInstBefore(NewCast, *I);
+    } else if (KnownOne[SrcBitWidth-1]) {    // Input sign bit known set
+      KnownOne |= NewBits;
+    }
+    break;
+  }
+  case Instruction::Add: {
+    // Figure out what the input bits are.  If the top bits of the and result
+    // are not demanded, then the add doesn't demand them from its input
+    // either.
+    unsigned NLZ = DemandedMask.countLeadingZeros();
+      
+    // If there is a constant on the RHS, there are a variety of xformations
+    // we can do.
+    if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      // If null, this should be simplified elsewhere.  Some of the xforms here
+      // won't work if the RHS is zero.
+      if (RHS->isZero())
+        break;
+      
+      // If the top bit of the output is demanded, demand everything from the
+      // input.  Otherwise, we demand all the input bits except NLZ top bits.
+      APInt InDemandedBits(APInt::getLowBitsSet(BitWidth, BitWidth - NLZ));
+
+      // Find information about known zero/one bits in the input.
+      if (SimplifyDemandedBits(I->getOperandUse(0), InDemandedBits, 
+                               LHSKnownZero, LHSKnownOne, Depth+1))
+        return I;
+
+      // If the RHS of the add has bits set that can't affect the input, reduce
+      // the constant.
+      if (ShrinkDemandedConstant(I, 1, InDemandedBits))
+        return I;
+      
+      // Avoid excess work.
+      if (LHSKnownZero == 0 && LHSKnownOne == 0)
+        break;
+      
+      // Turn it into OR if input bits are zero.
+      if ((LHSKnownZero & RHS->getValue()) == RHS->getValue()) {
+        Instruction *Or =
+          BinaryOperator::CreateOr(I->getOperand(0), I->getOperand(1),
+                                   I->getName());
+        return InsertNewInstBefore(Or, *I);
+      }
+      
+      // We can say something about the output known-zero and known-one bits,
+      // depending on potential carries from the input constant and the
+      // unknowns.  For example if the LHS is known to have at most the 0x0F0F0
+      // bits set and the RHS constant is 0x01001, then we know we have a known
+      // one mask of 0x00001 and a known zero mask of 0xE0F0E.
+      
+      // To compute this, we first compute the potential carry bits.  These are
+      // the bits which may be modified.  I'm not aware of a better way to do
+      // this scan.
+      const APInt &RHSVal = RHS->getValue();
+      APInt CarryBits((~LHSKnownZero + RHSVal) ^ (~LHSKnownZero ^ RHSVal));
+      
+      // Now that we know which bits have carries, compute the known-1/0 sets.
+      
+      // Bits are known one if they are known zero in one operand and one in the
+      // other, and there is no input carry.
+      KnownOne = ((LHSKnownZero & RHSVal) | 
+                  (LHSKnownOne & ~RHSVal)) & ~CarryBits;
+      
+      // Bits are known zero if they are known zero in both operands and there
+      // is no input carry.
+      KnownZero = LHSKnownZero & ~RHSVal & ~CarryBits;
+    } else {
+      // If the high-bits of this ADD are not demanded, then it does not demand
+      // the high bits of its LHS or RHS.
+      if (DemandedMask[BitWidth-1] == 0) {
+        // Right fill the mask of bits for this ADD to demand the most
+        // significant bit and all those below it.
+        APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
+        if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
+                                 LHSKnownZero, LHSKnownOne, Depth+1) ||
+            SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
+                                 LHSKnownZero, LHSKnownOne, Depth+1))
+          return I;
+      }
+    }
+    break;
+  }
+  case Instruction::Sub:
+    // If the high-bits of this SUB are not demanded, then it does not demand
+    // the high bits of its LHS or RHS.
+    if (DemandedMask[BitWidth-1] == 0) {
+      // Right fill the mask of bits for this SUB to demand the most
+      // significant bit and all those below it.
+      uint32_t NLZ = DemandedMask.countLeadingZeros();
+      APInt DemandedFromOps(APInt::getLowBitsSet(BitWidth, BitWidth-NLZ));
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedFromOps,
+                               LHSKnownZero, LHSKnownOne, Depth+1) ||
+          SimplifyDemandedBits(I->getOperandUse(1), DemandedFromOps,
+                               LHSKnownZero, LHSKnownOne, Depth+1))
+        return I;
+    }
+    // Otherwise just hand the sub off to ComputeMaskedBits to fill in
+    // the known zeros and ones.
+    ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+    break;
+  case Instruction::Shl:
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+      APInt DemandedMaskIn(DemandedMask.lshr(ShiftAmt));
+      
+      // If the shift is NUW/NSW, then it does demand the high bits.
+      ShlOperator *IOp = cast<ShlOperator>(I);
+      if (IOp->hasNoSignedWrap())
+        DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
+      else if (IOp->hasNoUnsignedWrap())
+        DemandedMaskIn |= APInt::getHighBitsSet(BitWidth, ShiftAmt);
+      
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn, 
+                               KnownZero, KnownOne, Depth+1))
+        return I;
+      assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+      KnownZero <<= ShiftAmt;
+      KnownOne  <<= ShiftAmt;
+      // low bits known zero.
+      if (ShiftAmt)
+        KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+    }
+    break;
+  case Instruction::LShr:
+    // For a logical shift right
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+      
+      // Unsigned shift right.
+      APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+      
+      // If the shift is exact, then it does demand the low bits (and knows that
+      // they are zero).
+      if (cast<LShrOperator>(I)->isExact())
+        DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+      
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+                               KnownZero, KnownOne, Depth+1))
+        return I;
+      assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
+      if (ShiftAmt) {
+        // Compute the new bits that are at the top now.
+        APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+        KnownZero |= HighBits;  // high bits known zero.
+      }
+    }
+    break;
+  case Instruction::AShr:
+    // If this is an arithmetic shift right and only the low-bit is set, we can
+    // always convert this into a logical shr, even if the shift amount is
+    // variable.  The low bit of the shift cannot be an input sign bit unless
+    // the shift amount is >= the size of the datatype, which is undefined.
+    if (DemandedMask == 1) {
+      // Perform the logical shift right.
+      Instruction *NewVal = BinaryOperator::CreateLShr(
+                        I->getOperand(0), I->getOperand(1), I->getName());
+      return InsertNewInstBefore(NewVal, *I);
+    }    
+
+    // If the sign bit is the only bit demanded by this ashr, then there is no
+    // need to do it, the shift doesn't change the high bit.
+    if (DemandedMask.isSignBit())
+      return I->getOperand(0);
+    
+    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      uint32_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
+      
+      // Signed shift right.
+      APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
+      // If any of the "high bits" are demanded, we should set the sign bit as
+      // demanded.
+      if (DemandedMask.countLeadingZeros() <= ShiftAmt)
+        DemandedMaskIn.setBit(BitWidth-1);
+      
+      // If the shift is exact, then it does demand the low bits (and knows that
+      // they are zero).
+      if (cast<AShrOperator>(I)->isExact())
+        DemandedMaskIn |= APInt::getLowBitsSet(BitWidth, ShiftAmt);
+      
+      if (SimplifyDemandedBits(I->getOperandUse(0), DemandedMaskIn,
+                               KnownZero, KnownOne, Depth+1))
+        return I;
+      assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?");
+      // Compute the new bits that are at the top now.
+      APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt));
+      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt);
+      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt);
+        
+      // Handle the sign bits.
+      APInt SignBit(APInt::getSignBit(BitWidth));
+      // Adjust to where it is now in the mask.
+      SignBit = APIntOps::lshr(SignBit, ShiftAmt);  
+        
+      // If the input sign bit is known to be zero, or if none of the top bits
+      // are demanded, turn this into an unsigned shift right.
+      if (BitWidth <= ShiftAmt || KnownZero[BitWidth-ShiftAmt-1] || 
+          (HighBits & ~DemandedMask) == HighBits) {
+        // Perform the logical shift right.
+        Instruction *NewVal = BinaryOperator::CreateLShr(
+                          I->getOperand(0), SA, I->getName());
+        return InsertNewInstBefore(NewVal, *I);
+      } else if ((KnownOne & SignBit) != 0) { // New bits are known one.
+        KnownOne |= HighBits;
+      }
+    }
+    break;
+  case Instruction::SRem:
+    if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
+      APInt RA = Rem->getValue().abs();
+      if (RA.isPowerOf2()) {
+        if (DemandedMask.ult(RA))    // srem won't affect demanded bits
+          return I->getOperand(0);
+
+        APInt LowBits = RA - 1;
+        APInt Mask2 = LowBits | APInt::getSignBit(BitWidth);
+        if (SimplifyDemandedBits(I->getOperandUse(0), Mask2,
+                                 LHSKnownZero, LHSKnownOne, Depth+1))
+          return I;
+
+        // The low bits of LHS are unchanged by the srem.
+        KnownZero = LHSKnownZero & LowBits;
+        KnownOne = LHSKnownOne & LowBits;
+
+        // If LHS is non-negative or has all low bits zero, then the upper bits
+        // are all zero.
+        if (LHSKnownZero[BitWidth-1] || ((LHSKnownZero & LowBits) == LowBits))
+          KnownZero |= ~LowBits;
+
+        // If LHS is negative and not all low bits are zero, then the upper bits
+        // are all one.
+        if (LHSKnownOne[BitWidth-1] && ((LHSKnownOne & LowBits) != 0))
+          KnownOne |= ~LowBits;
+
+        assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); 
+      }
+    }
+
+    // The sign bit is the LHS's sign bit, except when the result of the
+    // remainder is zero.
+    if (DemandedMask.isNegative() && KnownZero.isNonNegative()) {
+      APInt Mask2 = APInt::getSignBit(BitWidth);
+      APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0);
+      ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne,
+                        Depth+1);
+      // If it's known zero, our sign bit is also zero.
+      if (LHSKnownZero.isNegative())
+        KnownZero |= LHSKnownZero;
+    }
+    break;
+  case Instruction::URem: {
+    APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+    APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+    if (SimplifyDemandedBits(I->getOperandUse(0), AllOnes,
+                             KnownZero2, KnownOne2, Depth+1) ||
+        SimplifyDemandedBits(I->getOperandUse(1), AllOnes,
+                             KnownZero2, KnownOne2, Depth+1))
+      return I;
+
+    unsigned Leaders = KnownZero2.countLeadingOnes();
+    Leaders = std::max(Leaders,
+                       KnownZero2.countLeadingOnes());
+    KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
+    break;
+  }
+  case Instruction::Call:
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+      switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::bswap: {
+        // If the only bits demanded come from one byte of the bswap result,
+        // just shift the input byte into position to eliminate the bswap.
+        unsigned NLZ = DemandedMask.countLeadingZeros();
+        unsigned NTZ = DemandedMask.countTrailingZeros();
+          
+        // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
+        // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
+        // have 14 leading zeros, round to 8.
+        NLZ &= ~7;
+        NTZ &= ~7;
+        // If we need exactly one byte, we can do this transformation.
+        if (BitWidth-NLZ-NTZ == 8) {
+          unsigned ResultBit = NTZ;
+          unsigned InputBit = BitWidth-NTZ-8;
+          
+          // Replace this with either a left or right shift to get the byte into
+          // the right place.
+          Instruction *NewVal;
+          if (InputBit > ResultBit)
+            NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
+                    ConstantInt::get(I->getType(), InputBit-ResultBit));
+          else
+            NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
+                    ConstantInt::get(I->getType(), ResultBit-InputBit));
+          NewVal->takeName(I);
+          return InsertNewInstBefore(NewVal, *I);
+        }
+          
+        // TODO: Could compute known zero/one bits based on the input.
+        break;
+      }
+      }
+    }
+    ComputeMaskedBits(V, DemandedMask, KnownZero, KnownOne, Depth);
+    break;
+  }
+  
+  // If the client is only demanding bits that we know, return the known
+  // constant.
+  if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask)
+    return Constant::getIntegerValue(VTy, KnownOne);
+  return 0;
+}
+
+
+/// SimplifyDemandedVectorElts - The specified value produces a vector with
+/// any number of elements. DemandedElts contains the set of elements that are
+/// actually used by the caller.  This method analyzes which elements of the
+/// operand are undef and returns that information in UndefElts.
+///
+/// If the information about demanded elements can be used to simplify the
+/// operation, the operation is simplified, then the resultant value is
+/// returned.  This returns null if no change was made.
+Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
+                                                APInt &UndefElts,
+                                                unsigned Depth) {
+  unsigned VWidth = cast<VectorType>(V->getType())->getNumElements();
+  APInt EltMask(APInt::getAllOnesValue(VWidth));
+  assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!");
+
+  if (isa<UndefValue>(V)) {
+    // If the entire vector is undefined, just return this info.
+    UndefElts = EltMask;
+    return 0;
+  }
+  
+  if (DemandedElts == 0) { // If nothing is demanded, provide undef.
+    UndefElts = EltMask;
+    return UndefValue::get(V->getType());
+  }
+
+  UndefElts = 0;
+  if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+    const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+    Constant *Undef = UndefValue::get(EltTy);
+
+    std::vector<Constant*> Elts;
+    for (unsigned i = 0; i != VWidth; ++i)
+      if (!DemandedElts[i]) {   // If not demanded, set to undef.
+        Elts.push_back(Undef);
+        UndefElts.setBit(i);
+      } else if (isa<UndefValue>(CV->getOperand(i))) {   // Already undef.
+        Elts.push_back(Undef);
+        UndefElts.setBit(i);
+      } else {                               // Otherwise, defined.
+        Elts.push_back(CV->getOperand(i));
+      }
+
+    // If we changed the constant, return it.
+    Constant *NewCP = ConstantVector::get(Elts);
+    return NewCP != CV ? NewCP : 0;
+  }
+  
+  if (isa<ConstantAggregateZero>(V)) {
+    // Simplify the CAZ to a ConstantVector where the non-demanded elements are
+    // set to undef.
+    
+    // Check if this is identity. If so, return 0 since we are not simplifying
+    // anything.
+    if (DemandedElts.isAllOnesValue())
+      return 0;
+    
+    const Type *EltTy = cast<VectorType>(V->getType())->getElementType();
+    Constant *Zero = Constant::getNullValue(EltTy);
+    Constant *Undef = UndefValue::get(EltTy);
+    std::vector<Constant*> Elts;
+    for (unsigned i = 0; i != VWidth; ++i) {
+      Constant *Elt = DemandedElts[i] ? Zero : Undef;
+      Elts.push_back(Elt);
+    }
+    UndefElts = DemandedElts ^ EltMask;
+    return ConstantVector::get(Elts);
+  }
+  
+  // Limit search depth.
+  if (Depth == 10)
+    return 0;
+
+  // If multiple users are using the root value, procede with
+  // simplification conservatively assuming that all elements
+  // are needed.
+  if (!V->hasOneUse()) {
+    // Quit if we find multiple users of a non-root value though.
+    // They'll be handled when it's their turn to be visited by
+    // the main instcombine process.
+    if (Depth != 0)
+      // TODO: Just compute the UndefElts information recursively.
+      return 0;
+
+    // Conservatively assume that all elements are needed.
+    DemandedElts = EltMask;
+  }
+  
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return 0;        // Only analyze instructions.
+  
+  bool MadeChange = false;
+  APInt UndefElts2(VWidth, 0);
+  Value *TmpV;
+  switch (I->getOpcode()) {
+  default: break;
+    
+  case Instruction::InsertElement: {
+    // If this is a variable index, we don't know which element it overwrites.
+    // demand exactly the same input as we produce.
+    ConstantInt *Idx = dyn_cast<ConstantInt>(I->getOperand(2));
+    if (Idx == 0) {
+      // Note that we can't propagate undef elt info, because we don't know
+      // which elt is getting updated.
+      TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+                                        UndefElts2, Depth+1);
+      if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+      break;
+    }
+    
+    // If this is inserting an element that isn't demanded, remove this
+    // insertelement.
+    unsigned IdxNo = Idx->getZExtValue();
+    if (IdxNo >= VWidth || !DemandedElts[IdxNo]) {
+      Worklist.Add(I);
+      return I->getOperand(0);
+    }
+    
+    // Otherwise, the element inserted overwrites whatever was there, so the
+    // input demanded set is simpler than the output set.
+    APInt DemandedElts2 = DemandedElts;
+    DemandedElts2.clearBit(IdxNo);
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts2,
+                                      UndefElts, Depth+1);
+    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+    // The inserted element is defined.
+    UndefElts.clearBit(IdxNo);
+    break;
+  }
+  case Instruction::ShuffleVector: {
+    ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
+    uint64_t LHSVWidth =
+      cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
+    APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
+    for (unsigned i = 0; i < VWidth; i++) {
+      if (DemandedElts[i]) {
+        unsigned MaskVal = Shuffle->getMaskValue(i);
+        if (MaskVal != -1u) {
+          assert(MaskVal < LHSVWidth * 2 &&
+                 "shufflevector mask index out of range!");
+          if (MaskVal < LHSVWidth)
+            LeftDemanded.setBit(MaskVal);
+          else
+            RightDemanded.setBit(MaskVal - LHSVWidth);
+        }
+      }
+    }
+
+    APInt UndefElts4(LHSVWidth, 0);
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
+                                      UndefElts4, Depth+1);
+    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+
+    APInt UndefElts3(LHSVWidth, 0);
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
+                                      UndefElts3, Depth+1);
+    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+
+    bool NewUndefElts = false;
+    for (unsigned i = 0; i < VWidth; i++) {
+      unsigned MaskVal = Shuffle->getMaskValue(i);
+      if (MaskVal == -1u) {
+        UndefElts.setBit(i);
+      } else if (MaskVal < LHSVWidth) {
+        if (UndefElts4[MaskVal]) {
+          NewUndefElts = true;
+          UndefElts.setBit(i);
+        }
+      } else {
+        if (UndefElts3[MaskVal - LHSVWidth]) {
+          NewUndefElts = true;
+          UndefElts.setBit(i);
+        }
+      }
+    }
+
+    if (NewUndefElts) {
+      // Add additional discovered undefs.
+      std::vector<Constant*> Elts;
+      for (unsigned i = 0; i < VWidth; ++i) {
+        if (UndefElts[i])
+          Elts.push_back(UndefValue::get(Type::getInt32Ty(I->getContext())));
+        else
+          Elts.push_back(ConstantInt::get(Type::getInt32Ty(I->getContext()),
+                                          Shuffle->getMaskValue(i)));
+      }
+      I->setOperand(2, ConstantVector::get(Elts));
+      MadeChange = true;
+    }
+    break;
+  }
+  case Instruction::BitCast: {
+    // Vector->vector casts only.
+    const VectorType *VTy = dyn_cast<VectorType>(I->getOperand(0)->getType());
+    if (!VTy) break;
+    unsigned InVWidth = VTy->getNumElements();
+    APInt InputDemandedElts(InVWidth, 0);
+    unsigned Ratio;
+
+    if (VWidth == InVWidth) {
+      // If we are converting from <4 x i32> -> <4 x f32>, we demand the same
+      // elements as are demanded of us.
+      Ratio = 1;
+      InputDemandedElts = DemandedElts;
+    } else if (VWidth > InVWidth) {
+      // Untested so far.
+      break;
+      
+      // If there are more elements in the result than there are in the source,
+      // then an input element is live if any of the corresponding output
+      // elements are live.
+      Ratio = VWidth/InVWidth;
+      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
+        if (DemandedElts[OutIdx])
+          InputDemandedElts.setBit(OutIdx/Ratio);
+      }
+    } else {
+      // Untested so far.
+      break;
+      
+      // If there are more elements in the source than there are in the result,
+      // then an input element is live if the corresponding output element is
+      // live.
+      Ratio = InVWidth/VWidth;
+      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+        if (DemandedElts[InIdx/Ratio])
+          InputDemandedElts.setBit(InIdx);
+    }
+    
+    // div/rem demand all inputs, because they don't want divide by zero.
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), InputDemandedElts,
+                                      UndefElts2, Depth+1);
+    if (TmpV) {
+      I->setOperand(0, TmpV);
+      MadeChange = true;
+    }
+    
+    UndefElts = UndefElts2;
+    if (VWidth > InVWidth) {
+      llvm_unreachable("Unimp");
+      // If there are more elements in the result than there are in the source,
+      // then an output element is undef if the corresponding input element is
+      // undef.
+      for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx)
+        if (UndefElts2[OutIdx/Ratio])
+          UndefElts.setBit(OutIdx);
+    } else if (VWidth < InVWidth) {
+      llvm_unreachable("Unimp");
+      // If there are more elements in the source than there are in the result,
+      // then a result element is undef if all of the corresponding input
+      // elements are undef.
+      UndefElts = ~0ULL >> (64-VWidth);  // Start out all undef.
+      for (unsigned InIdx = 0; InIdx != InVWidth; ++InIdx)
+        if (!UndefElts2[InIdx])            // Not undef?
+          UndefElts.clearBit(InIdx/Ratio);    // Clear undef bit.
+    }
+    break;
+  }
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+    // div/rem demand all inputs, because they don't want divide by zero.
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(0), DemandedElts,
+                                      UndefElts, Depth+1);
+    if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
+    TmpV = SimplifyDemandedVectorElts(I->getOperand(1), DemandedElts,
+                                      UndefElts2, Depth+1);
+    if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
+      
+    // Output elements are undefined if both are undefined.  Consider things
+    // like undef&0.  The result is known zero, not undef.
+    UndefElts &= UndefElts2;
+    break;
+    
+  case Instruction::Call: {
+    IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
+    if (!II) break;
+    switch (II->getIntrinsicID()) {
+    default: break;
+      
+    // Binary vector operations that work column-wise.  A dest element is a
+    // function of the corresponding input elements from the two inputs.
+    case Intrinsic::x86_sse_sub_ss:
+    case Intrinsic::x86_sse_mul_ss:
+    case Intrinsic::x86_sse_min_ss:
+    case Intrinsic::x86_sse_max_ss:
+    case Intrinsic::x86_sse2_sub_sd:
+    case Intrinsic::x86_sse2_mul_sd:
+    case Intrinsic::x86_sse2_min_sd:
+    case Intrinsic::x86_sse2_max_sd:
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+                                        UndefElts, Depth+1);
+      if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
+                                        UndefElts2, Depth+1);
+      if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+
+      // If only the low elt is demanded and this is a scalarizable intrinsic,
+      // scalarize it now.
+      if (DemandedElts == 1) {
+        switch (II->getIntrinsicID()) {
+        default: break;
+        case Intrinsic::x86_sse_sub_ss:
+        case Intrinsic::x86_sse_mul_ss:
+        case Intrinsic::x86_sse2_sub_sd:
+        case Intrinsic::x86_sse2_mul_sd:
+          // TODO: Lower MIN/MAX/ABS/etc
+          Value *LHS = II->getArgOperand(0);
+          Value *RHS = II->getArgOperand(1);
+          // Extract the element as scalars.
+          LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, 
+            ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
+          RHS = InsertNewInstBefore(ExtractElementInst::Create(RHS,
+            ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
+          
+          switch (II->getIntrinsicID()) {
+          default: llvm_unreachable("Case stmts out of sync!");
+          case Intrinsic::x86_sse_sub_ss:
+          case Intrinsic::x86_sse2_sub_sd:
+            TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS,
+                                                        II->getName()), *II);
+            break;
+          case Intrinsic::x86_sse_mul_ss:
+          case Intrinsic::x86_sse2_mul_sd:
+            TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS,
+                                                         II->getName()), *II);
+            break;
+          }
+          
+          Instruction *New =
+            InsertElementInst::Create(
+              UndefValue::get(II->getType()), TmpV,
+              ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
+                                      II->getName());
+          InsertNewInstBefore(New, *II);
+          return New;
+        }            
+      }
+        
+      // Output elements are undefined if both are undefined.  Consider things
+      // like undef&0.  The result is known zero, not undef.
+      UndefElts &= UndefElts2;
+      break;
+    }
+    break;
+  }
+  }
+  return MadeChange ? I : 0;
+}
diff --git a/final/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/final/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
new file mode 100644
index 00000000000..5caa12dfdfa
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -0,0 +1,567 @@
+//===- InstCombineVectorOps.cpp -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements instcombine for ExtractElement, InsertElement and
+// ShuffleVector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstCombine.h"
+using namespace llvm;
+
+/// CheapToScalarize - Return true if the value is cheaper to scalarize than it
+/// is to leave as a vector operation.
+static bool CheapToScalarize(Value *V, bool isConstant) {
+  if (isa<ConstantAggregateZero>(V))
+    return true;
+  if (ConstantVector *C = dyn_cast<ConstantVector>(V)) {
+    if (isConstant) return true;
+    // If all elts are the same, we can extract.
+    Constant *Op0 = C->getOperand(0);
+    for (unsigned i = 1; i < C->getNumOperands(); ++i)
+      if (C->getOperand(i) != Op0)
+        return false;
+    return true;
+  }
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) return false;
+
+  // Insert element gets simplified to the inserted element or is deleted if
+  // this is constant idx extract element and its a constant idx insertelt.
+  if (I->getOpcode() == Instruction::InsertElement && isConstant &&
+      isa<ConstantInt>(I->getOperand(2)))
+    return true;
+  if (I->getOpcode() == Instruction::Load && I->hasOneUse())
+    return true;
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
+    if (BO->hasOneUse() &&
+        (CheapToScalarize(BO->getOperand(0), isConstant) ||
+         CheapToScalarize(BO->getOperand(1), isConstant)))
+      return true;
+  if (CmpInst *CI = dyn_cast<CmpInst>(I))
+    if (CI->hasOneUse() &&
+        (CheapToScalarize(CI->getOperand(0), isConstant) ||
+         CheapToScalarize(CI->getOperand(1), isConstant)))
+      return true;
+
+  return false;
+}
+
+/// getShuffleMask - Read and decode a shufflevector mask.
+/// Turn undef elements into negative values.
+static std::vector<int> getShuffleMask(const ShuffleVectorInst *SVI) {
+  unsigned NElts = SVI->getType()->getNumElements();
+  if (isa<ConstantAggregateZero>(SVI->getOperand(2)))
+    return std::vector<int>(NElts, 0);
+  if (isa<UndefValue>(SVI->getOperand(2)))
+    return std::vector<int>(NElts, -1);
+
+  std::vector<int> Result;
+  const ConstantVector *CP = cast<ConstantVector>(SVI->getOperand(2));
+  for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i)
+    if (isa<UndefValue>(*i))
+      Result.push_back(-1);  // undef
+    else
+      Result.push_back(cast<ConstantInt>(*i)->getZExtValue());
+  return Result;
+}
+
+/// FindScalarElement - Given a vector and an element number, see if the scalar
+/// value is already around as a register, for example if it were inserted then
+/// extracted from the vector.
+static Value *FindScalarElement(Value *V, unsigned EltNo) {
+  assert(V->getType()->isVectorTy() && "Not looking at a vector?");
+  const VectorType *PTy = cast<VectorType>(V->getType());
+  unsigned Width = PTy->getNumElements();
+  if (EltNo >= Width)  // Out of range access.
+    return UndefValue::get(PTy->getElementType());
+
+  if (isa<UndefValue>(V))
+    return UndefValue::get(PTy->getElementType());
+  if (isa<ConstantAggregateZero>(V))
+    return Constant::getNullValue(PTy->getElementType());
+  if (ConstantVector *CP = dyn_cast<ConstantVector>(V))
+    return CP->getOperand(EltNo);
+
+  if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
+    // If this is an insert to a variable element, we don't know what it is.
+    if (!isa<ConstantInt>(III->getOperand(2)))
+      return 0;
+    unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
+
+    // If this is an insert to the element we are looking for, return the
+    // inserted value.
+    if (EltNo == IIElt)
+      return III->getOperand(1);
+
+    // Otherwise, the insertelement doesn't modify the value, recurse on its
+    // vector input.
+    return FindScalarElement(III->getOperand(0), EltNo);
+  }
+
+  if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
+    unsigned LHSWidth =
+      cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+    int InEl = getShuffleMask(SVI)[EltNo];
+    if (InEl < 0)
+      return UndefValue::get(PTy->getElementType());
+    if (InEl < (int)LHSWidth)
+      return FindScalarElement(SVI->getOperand(0), InEl);
+    return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
+  }
+
+  // Otherwise, we don't know.
+  return 0;
+}
+
+Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
+  // If vector val is undef, replace extract with scalar undef.
+  if (isa<UndefValue>(EI.getOperand(0)))
+    return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+
+  // If vector val is constant 0, replace extract with scalar 0.
+  if (isa<ConstantAggregateZero>(EI.getOperand(0)))
+    return ReplaceInstUsesWith(EI, Constant::getNullValue(EI.getType()));
+
+  if (ConstantVector *C = dyn_cast<ConstantVector>(EI.getOperand(0))) {
+    // If vector val is constant with all elements the same, replace EI with
+    // that element. When the elements are not identical, we cannot replace yet
+    // (we do that below, but only when the index is constant).
+    Constant *op0 = C->getOperand(0);
+    for (unsigned i = 1; i != C->getNumOperands(); ++i)
+      if (C->getOperand(i) != op0) {
+        op0 = 0;
+        break;
+      }
+    if (op0)
+      return ReplaceInstUsesWith(EI, op0);
+  }
+
+  // If extracting a specified index from the vector, see if we can recursively
+  // find a previously computed scalar that was inserted into the vector.
+  if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+    unsigned IndexVal = IdxC->getZExtValue();
+    unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
+
+    // If this is extracting an invalid index, turn this into undef, to avoid
+    // crashing the code below.
+    if (IndexVal >= VectorWidth)
+      return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+
+    // This instruction only demands the single element from the input vector.
+    // If the input vector has a single use, simplify it based on this use
+    // property.
+    if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
+      APInt UndefElts(VectorWidth, 0);
+      APInt DemandedMask(VectorWidth, 0);
+      DemandedMask.setBit(IndexVal);
+      if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
+                                                DemandedMask, UndefElts)) {
+        EI.setOperand(0, V);
+        return &EI;
+      }
+    }
+
+    if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
+      return ReplaceInstUsesWith(EI, Elt);
+
+    // If the this extractelement is directly using a bitcast from a vector of
+    // the same number of elements, see if we can find the source element from
+    // it.  In this case, we will end up needing to bitcast the scalars.
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
+      if (const VectorType *VT =
+          dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
+        if (VT->getNumElements() == VectorWidth)
+          if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
+            return new BitCastInst(Elt, EI.getType());
+    }
+  }
+
+  if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
+    // Push extractelement into predecessor operation if legal and
+    // profitable to do so
+    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+      if (I->hasOneUse() &&
+          CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
+        Value *newEI0 =
+          Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
+                                        EI.getName()+".lhs");
+        Value *newEI1 =
+          Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
+                                        EI.getName()+".rhs");
+        return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
+      }
+    } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
+      // Extracting the inserted element?
+      if (IE->getOperand(2) == EI.getOperand(1))
+        return ReplaceInstUsesWith(EI, IE->getOperand(1));
+      // If the inserted and extracted elements are constants, they must not
+      // be the same value, extract from the pre-inserted value instead.
+      if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
+        Worklist.AddValue(EI.getOperand(0));
+        EI.setOperand(0, IE->getOperand(0));
+        return &EI;
+      }
+    } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
+      // If this is extracting an element from a shufflevector, figure out where
+      // it came from and extract from the appropriate input element instead.
+      if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
+        int SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()];
+        Value *Src;
+        unsigned LHSWidth =
+          cast<VectorType>(SVI->getOperand(0)->getType())->getNumElements();
+
+        if (SrcIdx < 0)
+          return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+        if (SrcIdx < (int)LHSWidth)
+          Src = SVI->getOperand(0);
+        else {
+          SrcIdx -= LHSWidth;
+          Src = SVI->getOperand(1);
+        }
+        const Type *Int32Ty = Type::getInt32Ty(EI.getContext());
+        return ExtractElementInst::Create(Src,
+                                          ConstantInt::get(Int32Ty,
+                                                           SrcIdx, false));
+      }
+    }
+    // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement)
+  }
+  return 0;
+}
+
+/// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
+/// elements from either LHS or RHS, return the shuffle mask and true.
+/// Otherwise, return false.
+static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
+                                         std::vector<Constant*> &Mask) {
+  assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
+         "Invalid CollectSingleShuffleElements");
+  unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+
+  if (isa<UndefValue>(V)) {
+    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
+    return true;
+  }
+
+  if (V == LHS) {
+    for (unsigned i = 0; i != NumElts; ++i)
+      Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
+    return true;
+  }
+
+  if (V == RHS) {
+    for (unsigned i = 0; i != NumElts; ++i)
+      Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                                      i+NumElts));
+    return true;
+  }
+
+  if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+    // If this is an insert of an extract from some other vector, include it.
+    Value *VecOp    = IEI->getOperand(0);
+    Value *ScalarOp = IEI->getOperand(1);
+    Value *IdxOp    = IEI->getOperand(2);
+
+    if (!isa<ConstantInt>(IdxOp))
+      return false;
+    unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+    if (isa<UndefValue>(ScalarOp)) {  // inserting undef into vector.
+      // Okay, we can handle this if the vector we are insertinting into is
+      // transitively ok.
+      if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+        // If so, update the mask to reflect the inserted undef.
+        Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
+        return true;
+      }
+    } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
+      if (isa<ConstantInt>(EI->getOperand(1)) &&
+          EI->getOperand(0)->getType() == V->getType()) {
+        unsigned ExtractedIdx =
+        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+
+        // This must be extracting from either LHS or RHS.
+        if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
+          // Okay, we can handle this if the vector we are insertinting into is
+          // transitively ok.
+          if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
+            // If so, update the mask to reflect the inserted value.
+            if (EI->getOperand(0) == LHS) {
+              Mask[InsertedIdx % NumElts] =
+              ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                               ExtractedIdx);
+            } else {
+              assert(EI->getOperand(0) == RHS);
+              Mask[InsertedIdx % NumElts] =
+              ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                               ExtractedIdx+NumElts);
+            }
+            return true;
+          }
+        }
+      }
+    }
+  }
+  // TODO: Handle shufflevector here!
+
+  return false;
+}
+
+/// CollectShuffleElements - We are building a shuffle of V, using RHS as the
+/// RHS of the shuffle instruction, if it is not null.  Return a shuffle mask
+/// that computes V and the LHS value of the shuffle.
+static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
+                                     Value *&RHS) {
+  assert(V->getType()->isVectorTy() &&
+         (RHS == 0 || V->getType() == RHS->getType()) &&
+         "Invalid shuffle!");
+  unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+
+  if (isa<UndefValue>(V)) {
+    Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
+    return V;
+  } else if (isa<ConstantAggregateZero>(V)) {
+    Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
+    return V;
+  } else if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
+    // If this is an insert of an extract from some other vector, include it.
+    Value *VecOp    = IEI->getOperand(0);
+    Value *ScalarOp = IEI->getOperand(1);
+    Value *IdxOp    = IEI->getOperand(2);
+
+    if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+      if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+          EI->getOperand(0)->getType() == V->getType()) {
+        unsigned ExtractedIdx =
+          cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+        unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+        // Either the extracted from or inserted into vector must be RHSVec,
+        // otherwise we'd end up with a shuffle of three inputs.
+        if (EI->getOperand(0) == RHS || RHS == 0) {
+          RHS = EI->getOperand(0);
+          Value *V = CollectShuffleElements(VecOp, Mask, RHS);
+          Mask[InsertedIdx % NumElts] =
+            ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                             NumElts+ExtractedIdx);
+          return V;
+        }
+
+        if (VecOp == RHS) {
+          Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
+          // Everything but the extracted element is replaced with the RHS.
+          for (unsigned i = 0; i != NumElts; ++i) {
+            if (i != InsertedIdx)
+              Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()),
+                                         NumElts+i);
+          }
+          return V;
+        }
+
+        // If this insertelement is a chain that comes from exactly these two
+        // vectors, return the vector and the effective shuffle.
+        if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
+          return EI->getOperand(0);
+      }
+    }
+  }
+  // TODO: Handle shufflevector here!
+
+  // Otherwise, can't do anything fancy.  Return an identity vector.
+  for (unsigned i = 0; i != NumElts; ++i)
+    Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
+  return V;
+}
+
+Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
+  Value *VecOp    = IE.getOperand(0);
+  Value *ScalarOp = IE.getOperand(1);
+  Value *IdxOp    = IE.getOperand(2);
+
+  // Inserting an undef or into an undefined place, remove this.
+  if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
+    ReplaceInstUsesWith(IE, VecOp);
+
+  // If the inserted element was extracted from some other vector, and if the
+  // indexes are constant, try to turn this into a shufflevector operation.
+  if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
+    if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
+        EI->getOperand(0)->getType() == IE.getType()) {
+      unsigned NumVectorElts = IE.getType()->getNumElements();
+      unsigned ExtractedIdx =
+        cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
+      unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
+
+      if (ExtractedIdx >= NumVectorElts) // Out of range extract.
+        return ReplaceInstUsesWith(IE, VecOp);
+
+      if (InsertedIdx >= NumVectorElts)  // Out of range insert.
+        return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
+
+      // If we are extracting a value from a vector, then inserting it right
+      // back into the same place, just use the input vector.
+      if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
+        return ReplaceInstUsesWith(IE, VecOp);
+
+      // If this insertelement isn't used by some other insertelement, turn it
+      // (and any insertelements it points to), into one big shuffle.
+      if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
+        std::vector<Constant*> Mask;
+        Value *RHS = 0;
+        Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
+        if (RHS == 0) RHS = UndefValue::get(LHS->getType());
+        // We now have a shuffle of LHS, RHS, Mask.
+        return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask));
+      }
+    }
+  }
+
+  unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
+  APInt UndefElts(VWidth, 0);
+  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+  if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
+    if (V != &IE)
+      return ReplaceInstUsesWith(IE, V);
+    return &IE;
+  }
+
+  return 0;
+}
+
+
+Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
+  Value *LHS = SVI.getOperand(0);
+  Value *RHS = SVI.getOperand(1);
+  std::vector<int> Mask = getShuffleMask(&SVI);
+
+  bool MadeChange = false;
+
+  // Undefined shuffle mask -> undefined value.
+  if (isa<UndefValue>(SVI.getOperand(2)))
+    return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
+
+  unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
+
+  if (VWidth != cast<VectorType>(LHS->getType())->getNumElements())
+    return 0;
+
+  APInt UndefElts(VWidth, 0);
+  APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+  if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
+    if (V != &SVI)
+      return ReplaceInstUsesWith(SVI, V);
+    LHS = SVI.getOperand(0);
+    RHS = SVI.getOperand(1);
+    MadeChange = true;
+  }
+
+  // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
+  // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
+  if (LHS == RHS || isa<UndefValue>(LHS)) {
+    if (isa<UndefValue>(LHS) && LHS == RHS) {
+      // shuffle(undef,undef,mask) -> undef.
+      return ReplaceInstUsesWith(SVI, LHS);
+    }
+
+    // Remap any references to RHS to use LHS.
+    std::vector<Constant*> Elts;
+    for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+      if (Mask[i] < 0)
+        Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+      else {
+        if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
+            (Mask[i] <  (int)e && isa<UndefValue>(LHS))) {
+          Mask[i] = -1;     // Turn into undef.
+          Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
+        } else {
+          Mask[i] = Mask[i] % e;  // Force to LHS.
+          Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
+                                          Mask[i]));
+        }
+      }
+    }
+    SVI.setOperand(0, SVI.getOperand(1));
+    SVI.setOperand(1, UndefValue::get(RHS->getType()));
+    SVI.setOperand(2, ConstantVector::get(Elts));
+    LHS = SVI.getOperand(0);
+    RHS = SVI.getOperand(1);
+    MadeChange = true;
+  }
+
+  // Analyze the shuffle, are the LHS or RHS and identity shuffles?
+  bool isLHSID = true, isRHSID = true;
+
+  for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+    if (Mask[i] < 0) continue;  // Ignore undef values.
+    // Is this an identity shuffle of the LHS value?
+    isLHSID &= (Mask[i] == (int)i);
+
+    // Is this an identity shuffle of the RHS value?
+    isRHSID &= (Mask[i]-e == i);
+  }
+
+  // Eliminate identity shuffles.
+  if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
+  if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
+
+  // If the LHS is a shufflevector itself, see if we can combine it with this
+  // one without producing an unusual shuffle.  Here we are really conservative:
+  // we are absolutely afraid of producing a shuffle mask not in the input
+  // program, because the code gen may not be smart enough to turn a merged
+  // shuffle into two specific shuffles: it may produce worse code.  As such,
+  // we only merge two shuffles if the result is either a splat or one of the
+  // two input shuffle masks.  In this case, merging the shuffles just removes
+  // one instruction, which we know is safe.  This is good for things like
+  // turning: (splat(splat)) -> splat.
+  if (ShuffleVectorInst *LHSSVI = dyn_cast<ShuffleVectorInst>(LHS)) {
+    if (isa<UndefValue>(RHS)) {
+      std::vector<int> LHSMask = getShuffleMask(LHSSVI);
+
+      if (LHSMask.size() == Mask.size()) {
+        std::vector<int> NewMask;
+        bool isSplat = true;
+        int SplatElt = -1; // undef
+        for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
+          int MaskElt;
+          if (Mask[i] < 0 || Mask[i] >= (int)e)
+            MaskElt = -1; // undef
+          else
+            MaskElt = LHSMask[Mask[i]];
+          // Check if this could still be a splat.
+          if (MaskElt >= 0) {
+            if (SplatElt >=0 && SplatElt != MaskElt)
+              isSplat = false;
+            SplatElt = MaskElt;
+          }
+          NewMask.push_back(MaskElt);
+        }
+
+        // If the result mask is equal to the src shuffle or this
+        // shuffle mask, do the replacement.
+        if (isSplat || NewMask == LHSMask || NewMask == Mask) {
+          std::vector<Constant*> Elts;
+          const Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
+          for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
+            if (NewMask[i] < 0) {
+              Elts.push_back(UndefValue::get(Int32Ty));
+            } else {
+              Elts.push_back(ConstantInt::get(Int32Ty, NewMask[i]));
+            }
+          }
+          return new ShuffleVectorInst(LHSSVI->getOperand(0),
+                                       LHSSVI->getOperand(1),
+                                       ConstantVector::get(Elts));
+        }
+      }
+    }
+  }
+
+  return MadeChange ? &SVI : 0;
+}
diff --git a/final/lib/Transforms/InstCombine/InstCombineWorklist.h b/final/lib/Transforms/InstCombine/InstCombineWorklist.h
new file mode 100644
index 00000000000..32009c39ec2
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -0,0 +1,106 @@
+//===- InstCombineWorklist.h - Worklist for the InstCombine pass ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTCOMBINE_WORKLIST_H
+#define INSTCOMBINE_WORKLIST_H
+
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Instruction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+  
+/// InstCombineWorklist - This is the worklist management logic for
+/// InstCombine.
+class LLVM_LIBRARY_VISIBILITY InstCombineWorklist {
+  SmallVector<Instruction*, 256> Worklist;
+  DenseMap<Instruction*, unsigned> WorklistMap;
+  
+  void operator=(const InstCombineWorklist&RHS);   // DO NOT IMPLEMENT
+  InstCombineWorklist(const InstCombineWorklist&); // DO NOT IMPLEMENT
+public:
+  InstCombineWorklist() {}
+  
+  bool isEmpty() const { return Worklist.empty(); }
+  
+  /// Add - Add the specified instruction to the worklist if it isn't already
+  /// in it.
+  void Add(Instruction *I) {
+    if (WorklistMap.insert(std::make_pair(I, Worklist.size())).second) {
+      DEBUG(errs() << "IC: ADD: " << *I << '\n');
+      Worklist.push_back(I);
+    }
+  }
+  
+  void AddValue(Value *V) {
+    if (Instruction *I = dyn_cast<Instruction>(V))
+      Add(I);
+  }
+  
+  /// AddInitialGroup - Add the specified batch of stuff in reverse order.
+  /// which should only be done when the worklist is empty and when the group
+  /// has no duplicates.
+  void AddInitialGroup(Instruction *const *List, unsigned NumEntries) {
+    assert(Worklist.empty() && "Worklist must be empty to add initial group");
+    Worklist.reserve(NumEntries+16);
+    WorklistMap.resize(NumEntries);
+    DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n");
+    for (; NumEntries; --NumEntries) {
+      Instruction *I = List[NumEntries-1];
+      WorklistMap.insert(std::make_pair(I, Worklist.size()));
+      Worklist.push_back(I);
+    }
+  }
+  
+  // Remove - remove I from the worklist if it exists.
+  void Remove(Instruction *I) {
+    DenseMap<Instruction*, unsigned>::iterator It = WorklistMap.find(I);
+    if (It == WorklistMap.end()) return; // Not in worklist.
+    
+    // Don't bother moving everything down, just null out the slot.
+    Worklist[It->second] = 0;
+    
+    WorklistMap.erase(It);
+  }
+  
+  Instruction *RemoveOne() {
+    Instruction *I = Worklist.back();
+    Worklist.pop_back();
+    WorklistMap.erase(I);
+    return I;
+  }
+  
+  /// AddUsersToWorkList - When an instruction is simplified, add all users of
+  /// the instruction to the work lists because they might get more simplified
+  /// now.
+  ///
+  void AddUsersToWorkList(Instruction &I) {
+    for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+         UI != UE; ++UI)
+      Add(cast<Instruction>(*UI));
+  }
+  
+  
+  /// Zap - check that the worklist is empty and nuke the backing store for
+  /// the map if it is large.
+  void Zap() {
+    assert(WorklistMap.empty() && "Worklist empty, but map not?");
+    
+    // Do an explicit clear, this shrinks the map if needed.
+    WorklistMap.clear();
+  }
+};
+  
+} // end namespace llvm.
+
+#endif
diff --git a/final/lib/Transforms/InstCombine/InstructionCombining.cpp b/final/lib/Transforms/InstCombine/InstructionCombining.cpp
new file mode 100644
index 00000000000..37123d0621e
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -0,0 +1,1662 @@
+//===- InstructionCombining.cpp - Combine multiple instructions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// InstructionCombining - Combine instructions to form fewer, simple
+// instructions.  This pass does not modify the CFG.  This pass is where
+// algebraic simplification happens.
+//
+// This pass combines things like:
+//    %Y = add i32 %X, 1
+//    %Z = add i32 %Y, 1
+// into:
+//    %Z = add i32 %X, 2
+//
+// This is a simple worklist driven algorithm.
+//
+// This pass guarantees that the following canonicalizations are performed on
+// the program:
+//    1. If a binary operator has a constant operand, it is moved to the RHS
+//    2. Bitwise operators with constant operands are always grouped so that
+//       shifts are performed first, then or's, then and's, then xor's.
+//    3. Compare instructions are converted from <,>,<=,>= to ==,!= if possible
+//    4. All cmp instructions on boolean values are replaced with logical ops
+//    5. add X, X is represented as (X*2) => (X << 1)
+//    6. Multiplies with a power-of-two constant argument are transformed into
+//       shifts.
+//   ... etc.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Scalar.h"
+#include "InstCombine.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm-c/Initialization.h"
+#include <algorithm>
+#include <climits>
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+STATISTIC(NumCombined , "Number of insts combined");
+STATISTIC(NumConstProp, "Number of constant folds");
+STATISTIC(NumDeadInst , "Number of dead inst eliminated");
+STATISTIC(NumSunkInst , "Number of instructions sunk");
+STATISTIC(NumExpand,    "Number of expansions");
+STATISTIC(NumFactor   , "Number of factorizations");
+STATISTIC(NumReassoc  , "Number of reassociations");
+
+// Initialization Routines
+void llvm::initializeInstCombine(PassRegistry &Registry) {
+  initializeInstCombinerPass(Registry);
+}
+
+void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
+  initializeInstCombine(*unwrap(R));
+}
+
+char InstCombiner::ID = 0;
+INITIALIZE_PASS(InstCombiner, "instcombine",
+                "Combine redundant instructions", false, false)
+
+void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addPreservedID(LCSSAID);
+  AU.setPreservesCFG();
+}
+
+
+/// ShouldChangeType - Return true if it is desirable to convert a computation
+/// from 'From' to 'To'.  We don't want to convert from a legal to an illegal
+/// type for example, or from a smaller to a larger illegal type.
+bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
+  assert(From->isIntegerTy() && To->isIntegerTy());
+  
+  // If we don't have TD, we don't know if the source/dest are legal.
+  if (!TD) return false;
+  
+  unsigned FromWidth = From->getPrimitiveSizeInBits();
+  unsigned ToWidth = To->getPrimitiveSizeInBits();
+  bool FromLegal = TD->isLegalInteger(FromWidth);
+  bool ToLegal = TD->isLegalInteger(ToWidth);
+  
+  // If this is a legal integer from type, and the result would be an illegal
+  // type, don't do the transformation.
+  if (FromLegal && !ToLegal)
+    return false;
+  
+  // Otherwise, if both are illegal, do not increase the size of the result. We
+  // do allow things like i160 -> i64, but not i64 -> i160.
+  if (!FromLegal && !ToLegal && ToWidth > FromWidth)
+    return false;
+  
+  return true;
+}
+
+
+/// SimplifyAssociativeOrCommutative - This performs a few simplifications for
+/// operators which are associative or commutative:
+//
+//  Commutative operators:
+//
+//  1. Order operands such that they are listed from right (least complex) to
+//     left (most complex).  This puts constants before unary operators before
+//     binary operators.
+//
+//  Associative operators:
+//
+//  2. Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+//  3. Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+//
+//  Associative and commutative operators:
+//
+//  4. Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+//  5. Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+//  6. Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+//     if C1 and C2 are constants.
+//
+bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {
+  Instruction::BinaryOps Opcode = I.getOpcode();
+  bool Changed = false;
+
+  do {
+    // Order operands such that they are listed from right (least complex) to
+    // left (most complex).  This puts constants before unary operators before
+    // binary operators.
+    if (I.isCommutative() && getComplexity(I.getOperand(0)) <
+        getComplexity(I.getOperand(1)))
+      Changed = !I.swapOperands();
+
+    BinaryOperator *Op0 = dyn_cast<BinaryOperator>(I.getOperand(0));
+    BinaryOperator *Op1 = dyn_cast<BinaryOperator>(I.getOperand(1));
+
+    if (I.isAssociative()) {
+      // Transform: "(A op B) op C" ==> "A op (B op C)" if "B op C" simplifies.
+      if (Op0 && Op0->getOpcode() == Opcode) {
+        Value *A = Op0->getOperand(0);
+        Value *B = Op0->getOperand(1);
+        Value *C = I.getOperand(1);
+
+        // Does "B op C" simplify?
+        if (Value *V = SimplifyBinOp(Opcode, B, C, TD)) {
+          // It simplifies to V.  Form "A op V".
+          I.setOperand(0, A);
+          I.setOperand(1, V);
+          // Conservatively clear the optional flags, since they may not be
+          // preserved by the reassociation.
+          I.clearSubclassOptionalData();
+          Changed = true;
+          ++NumReassoc;
+          continue;
+        }
+      }
+
+      // Transform: "A op (B op C)" ==> "(A op B) op C" if "A op B" simplifies.
+      if (Op1 && Op1->getOpcode() == Opcode) {
+        Value *A = I.getOperand(0);
+        Value *B = Op1->getOperand(0);
+        Value *C = Op1->getOperand(1);
+
+        // Does "A op B" simplify?
+        if (Value *V = SimplifyBinOp(Opcode, A, B, TD)) {
+          // It simplifies to V.  Form "V op C".
+          I.setOperand(0, V);
+          I.setOperand(1, C);
+          // Conservatively clear the optional flags, since they may not be
+          // preserved by the reassociation.
+          I.clearSubclassOptionalData();
+          Changed = true;
+          ++NumReassoc;
+          continue;
+        }
+      }
+    }
+
+    if (I.isAssociative() && I.isCommutative()) {
+      // Transform: "(A op B) op C" ==> "(C op A) op B" if "C op A" simplifies.
+      if (Op0 && Op0->getOpcode() == Opcode) {
+        Value *A = Op0->getOperand(0);
+        Value *B = Op0->getOperand(1);
+        Value *C = I.getOperand(1);
+
+        // Does "C op A" simplify?
+        if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+          // It simplifies to V.  Form "V op B".
+          I.setOperand(0, V);
+          I.setOperand(1, B);
+          // Conservatively clear the optional flags, since they may not be
+          // preserved by the reassociation.
+          I.clearSubclassOptionalData();
+          Changed = true;
+          ++NumReassoc;
+          continue;
+        }
+      }
+
+      // Transform: "A op (B op C)" ==> "B op (C op A)" if "C op A" simplifies.
+      if (Op1 && Op1->getOpcode() == Opcode) {
+        Value *A = I.getOperand(0);
+        Value *B = Op1->getOperand(0);
+        Value *C = Op1->getOperand(1);
+
+        // Does "C op A" simplify?
+        if (Value *V = SimplifyBinOp(Opcode, C, A, TD)) {
+          // It simplifies to V.  Form "B op V".
+          I.setOperand(0, B);
+          I.setOperand(1, V);
+          // Conservatively clear the optional flags, since they may not be
+          // preserved by the reassociation.
+          I.clearSubclassOptionalData();
+          Changed = true;
+          ++NumReassoc;
+          continue;
+        }
+      }
+
+      // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)"
+      // if C1 and C2 are constants.
+      if (Op0 && Op1 &&
+          Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&
+          isa<Constant>(Op0->getOperand(1)) &&
+          isa<Constant>(Op1->getOperand(1)) &&
+          Op0->hasOneUse() && Op1->hasOneUse()) {
+        Value *A = Op0->getOperand(0);
+        Constant *C1 = cast<Constant>(Op0->getOperand(1));
+        Value *B = Op1->getOperand(0);
+        Constant *C2 = cast<Constant>(Op1->getOperand(1));
+
+        Constant *Folded = ConstantExpr::get(Opcode, C1, C2);
+        Instruction *New = BinaryOperator::Create(Opcode, A, B, Op1->getName(),
+                                                  &I);
+        Worklist.Add(New);
+        I.setOperand(0, New);
+        I.setOperand(1, Folded);
+        // Conservatively clear the optional flags, since they may not be
+        // preserved by the reassociation.
+        I.clearSubclassOptionalData();
+        Changed = true;
+        continue;
+      }
+    }
+
+    // No further simplifications.
+    return Changed;
+  } while (1);
+}
+
+/// LeftDistributesOverRight - Whether "X LOp (Y ROp Z)" is always equal to
+/// "(X LOp Y) ROp (X LOp Z)".
+static bool LeftDistributesOverRight(Instruction::BinaryOps LOp,
+                                     Instruction::BinaryOps ROp) {
+  switch (LOp) {
+  default:
+    return false;
+
+  case Instruction::And:
+    // And distributes over Or and Xor.
+    switch (ROp) {
+    default:
+      return false;
+    case Instruction::Or:
+    case Instruction::Xor:
+      return true;
+    }
+
+  case Instruction::Mul:
+    // Multiplication distributes over addition and subtraction.
+    switch (ROp) {
+    default:
+      return false;
+    case Instruction::Add:
+    case Instruction::Sub:
+      return true;
+    }
+
+  case Instruction::Or:
+    // Or distributes over And.
+    switch (ROp) {
+    default:
+      return false;
+    case Instruction::And:
+      return true;
+    }
+  }
+}
+
+/// RightDistributesOverLeft - Whether "(X LOp Y) ROp Z" is always equal to
+/// "(X ROp Z) LOp (Y ROp Z)".
+static bool RightDistributesOverLeft(Instruction::BinaryOps LOp,
+                                     Instruction::BinaryOps ROp) {
+  if (Instruction::isCommutative(ROp))
+    return LeftDistributesOverRight(ROp, LOp);
+  // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z",
+  // but this requires knowing that the addition does not overflow and other
+  // such subtleties.
+  return false;
+}
+
+/// SimplifyUsingDistributiveLaws - This tries to simplify binary operations
+/// which some other binary operation distributes over either by factorizing
+/// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this
+/// results in simplifications (eg: "A & (B | C) -> (A&B) | (A&C)" if this is
+/// a win).  Returns the simplified value, or null if it didn't simplify.
+Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
+  Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+  BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
+  BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS);
+  Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op
+
+  // Factorization.
+  if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) {
+    // The instruction has the form "(A op' B) op (C op' D)".  Try to factorize
+    // a common term.
+    Value *A = Op0->getOperand(0), *B = Op0->getOperand(1);
+    Value *C = Op1->getOperand(0), *D = Op1->getOperand(1);
+    Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+
+    // Does "X op' Y" always equal "Y op' X"?
+    bool InnerCommutative = Instruction::isCommutative(InnerOpcode);
+
+    // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"?
+    if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode))
+      // Does the instruction have the form "(A op' B) op (A op' D)" or, in the
+      // commutative case, "(A op' B) op (C op' A)"?
+      if (A == C || (InnerCommutative && A == D)) {
+        if (A != C)
+          std::swap(C, D);
+        // Consider forming "A op' (B op D)".
+        // If "B op D" simplifies then it can be formed with no cost.
+        Value *V = SimplifyBinOp(TopLevelOpcode, B, D, TD);
+        // If "B op D" doesn't simplify then only go on if both of the existing
+        // operations "A op' B" and "C op' D" will be zapped as no longer used.
+        if (!V && Op0->hasOneUse() && Op1->hasOneUse())
+          V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName());
+        if (V) {
+          ++NumFactor;
+          V = Builder->CreateBinOp(InnerOpcode, A, V);
+          V->takeName(&I);
+          return V;
+        }
+      }
+
+    // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"?
+    if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode))
+      // Does the instruction have the form "(A op' B) op (C op' B)" or, in the
+      // commutative case, "(A op' B) op (B op' D)"?
+      if (B == D || (InnerCommutative && B == C)) {
+        if (B != D)
+          std::swap(C, D);
+        // Consider forming "(A op C) op' B".
+        // If "A op C" simplifies then it can be formed with no cost.
+        Value *V = SimplifyBinOp(TopLevelOpcode, A, C, TD);
+        // If "A op C" doesn't simplify then only go on if both of the existing
+        // operations "A op' B" and "C op' D" will be zapped as no longer used.
+        if (!V && Op0->hasOneUse() && Op1->hasOneUse())
+          V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName());
+        if (V) {
+          ++NumFactor;
+          V = Builder->CreateBinOp(InnerOpcode, V, B);
+          V->takeName(&I);
+          return V;
+        }
+      }
+  }
+
+  // Expansion.
+  if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) {
+    // The instruction has the form "(A op' B) op C".  See if expanding it out
+    // to "(A op C) op' (B op C)" results in simplifications.
+    Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS;
+    Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'
+
+    // Do "A op C" and "B op C" both simplify?
+    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, TD))
+      if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, TD)) {
+        // They do! Return "L op' R".
+        ++NumExpand;
+        // If "L op' R" equals "A op' B" then "L op' R" is just the LHS.
+        if ((L == A && R == B) ||
+            (Instruction::isCommutative(InnerOpcode) && L == B && R == A))
+          return Op0;
+        // Otherwise return "L op' R" if it simplifies.
+        if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+          return V;
+        // Otherwise, create a new instruction.
+        C = Builder->CreateBinOp(InnerOpcode, L, R);
+        C->takeName(&I);
+        return C;
+      }
+  }
+
+  if (Op1 && LeftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) {
+    // The instruction has the form "A op (B op' C)".  See if expanding it out
+    // to "(A op B) op' (A op C)" results in simplifications.
+    Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1);
+    Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'
+
+    // Do "A op B" and "A op C" both simplify?
+    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, TD))
+      if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, TD)) {
+        // They do! Return "L op' R".
+        ++NumExpand;
+        // If "L op' R" equals "B op' C" then "L op' R" is just the RHS.
+        if ((L == B && R == C) ||
+            (Instruction::isCommutative(InnerOpcode) && L == C && R == B))
+          return Op1;
+        // Otherwise return "L op' R" if it simplifies.
+        if (Value *V = SimplifyBinOp(InnerOpcode, L, R, TD))
+          return V;
+        // Otherwise, create a new instruction.
+        A = Builder->CreateBinOp(InnerOpcode, L, R);
+        A->takeName(&I);
+        return A;
+      }
+  }
+
+  return 0;
+}
+
+// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
+// if the LHS is a constant zero (which is the 'negate' form).
+//
+Value *InstCombiner::dyn_castNegVal(Value *V) const {
+  if (BinaryOperator::isNeg(V))
+    return BinaryOperator::getNegArgument(V);
+
+  // Constants can be considered to be negated values if they can be folded.
+  if (ConstantInt *C = dyn_cast<ConstantInt>(V))
+    return ConstantExpr::getNeg(C);
+
+  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+    if (C->getType()->getElementType()->isIntegerTy())
+      return ConstantExpr::getNeg(C);
+
+  return 0;
+}
+
+// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the
+// instruction if the LHS is a constant negative zero (which is the 'negate'
+// form).
+//
+Value *InstCombiner::dyn_castFNegVal(Value *V) const {
+  if (BinaryOperator::isFNeg(V))
+    return BinaryOperator::getFNegArgument(V);
+
+  // Constants can be considered to be negated values if they can be folded.
+  if (ConstantFP *C = dyn_cast<ConstantFP>(V))
+    return ConstantExpr::getFNeg(C);
+
+  if (ConstantVector *C = dyn_cast<ConstantVector>(V))
+    if (C->getType()->getElementType()->isFloatingPointTy())
+      return ConstantExpr::getFNeg(C);
+
+  return 0;
+}
+
+static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
+                                             InstCombiner *IC) {
+  if (CastInst *CI = dyn_cast<CastInst>(&I)) {
+    return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
+  }
+
+  // Figure out if the constant is the left or the right argument.
+  bool ConstIsRHS = isa<Constant>(I.getOperand(1));
+  Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
+
+  if (Constant *SOC = dyn_cast<Constant>(SO)) {
+    if (ConstIsRHS)
+      return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
+    return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
+  }
+
+  Value *Op0 = SO, *Op1 = ConstOperand;
+  if (!ConstIsRHS)
+    std::swap(Op0, Op1);
+  
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I))
+    return IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
+                                    SO->getName()+".op");
+  if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
+    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+                                   SO->getName()+".cmp");
+  if (FCmpInst *CI = dyn_cast<FCmpInst>(&I))
+    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
+                                   SO->getName()+".cmp");
+  llvm_unreachable("Unknown binary instruction type!");
+}
+
+// FoldOpIntoSelect - Given an instruction with a select as one operand and a
+// constant as the other operand, try to fold the binary operator into the
+// select arguments.  This also works for Cast instructions, which obviously do
+// not have a second operand.
+Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
+  // Don't modify shared select instructions
+  if (!SI->hasOneUse()) return 0;
+  Value *TV = SI->getOperand(1);
+  Value *FV = SI->getOperand(2);
+
+  if (isa<Constant>(TV) || isa<Constant>(FV)) {
+    // Bool selects with constant operands can be folded to logical ops.
+    if (SI->getType()->isIntegerTy(1)) return 0;
+
+    // If it's a bitcast involving vectors, make sure it has the same number of
+    // elements on both sides.
+    if (BitCastInst *BC = dyn_cast<BitCastInst>(&Op)) {
+      const VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
+      const VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
+
+      // Verify that either both or neither are vectors.
+      if ((SrcTy == NULL) != (DestTy == NULL)) return 0;
+      // If vectors, verify that they have the same number of elements.
+      if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements())
+        return 0;
+    }
+    
+    Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
+    Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
+
+    return SelectInst::Create(SI->getCondition(),
+                              SelectTrueVal, SelectFalseVal);
+  }
+  return 0;
+}
+
+
+/// FoldOpIntoPhi - Given a binary operator, cast instruction, or select which
+/// has a PHI node as operand #0, see if we can fold the instruction into the
+/// PHI (which is only possible if all operands to the PHI are constants).
+///
+Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
+  PHINode *PN = cast<PHINode>(I.getOperand(0));
+  unsigned NumPHIValues = PN->getNumIncomingValues();
+  if (NumPHIValues == 0)
+    return 0;
+  
+  // We normally only transform phis with a single use.  However, if a PHI has
+  // multiple uses and they are all the same operation, we can fold *all* of the
+  // uses into the PHI.
+  if (!PN->hasOneUse()) {
+    // Walk the use list for the instruction, comparing them to I.
+    for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+         UI != E; ++UI) {
+      Instruction *User = cast<Instruction>(*UI);
+      if (User != &I && !I.isIdenticalTo(User))
+        return 0;
+    }
+    // Otherwise, we can replace *all* users with the new PHI we form.
+  }
+  
+  // Check to see if all of the operands of the PHI are simple constants
+  // (constantint/constantfp/undef).  If there is one non-constant value,
+  // remember the BB it is in.  If there is more than one or if *it* is a PHI,
+  // bail out.  We don't do arbitrary constant expressions here because moving
+  // their computation can be expensive without a cost model.
+  BasicBlock *NonConstBB = 0;
+  for (unsigned i = 0; i != NumPHIValues; ++i) {
+    Value *InVal = PN->getIncomingValue(i);
+    if (isa<Constant>(InVal) && !isa<ConstantExpr>(InVal))
+      continue;
+
+    if (isa<PHINode>(InVal)) return 0;  // Itself a phi.
+    if (NonConstBB) return 0;  // More than one non-const value.
+    
+    NonConstBB = PN->getIncomingBlock(i);
+
+    // If the InVal is an invoke at the end of the pred block, then we can't
+    // insert a computation after it without breaking the edge.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
+      if (II->getParent() == NonConstBB)
+        return 0;
+    
+    // If the incoming non-constant value is in I's block, we will remove one
+    // instruction, but insert another equivalent one, leading to infinite
+    // instcombine.
+    if (NonConstBB == I.getParent())
+      return 0;
+  }
+  
+  // If there is exactly one non-constant value, we can insert a copy of the
+  // operation in that block.  However, if this is a critical edge, we would be
+  // inserting the computation one some other paths (e.g. inside a loop).  Only
+  // do this if the pred block is unconditionally branching into the phi block.
+  if (NonConstBB != 0) {
+    BranchInst *BI = dyn_cast<BranchInst>(NonConstBB->getTerminator());
+    if (!BI || !BI->isUnconditional()) return 0;
+  }
+
+  // Okay, we can do the transformation: create the new PHI node.
+  PHINode *NewPN = PHINode::Create(I.getType(), "");
+  NewPN->reserveOperandSpace(PN->getNumOperands()/2);
+  InsertNewInstBefore(NewPN, *PN);
+  NewPN->takeName(PN);
+  
+  // If we are going to have to insert a new computation, do so right before the
+  // predecessors terminator.
+  if (NonConstBB)
+    Builder->SetInsertPoint(NonConstBB->getTerminator());
+  
+  // Next, add all of the operands to the PHI.
+  if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
+    // We only currently try to fold the condition of a select when it is a phi,
+    // not the true/false values.
+    Value *TrueV = SI->getTrueValue();
+    Value *FalseV = SI->getFalseValue();
+    BasicBlock *PhiTransBB = PN->getParent();
+    for (unsigned i = 0; i != NumPHIValues; ++i) {
+      BasicBlock *ThisBB = PN->getIncomingBlock(i);
+      Value *TrueVInPred = TrueV->DoPHITranslation(PhiTransBB, ThisBB);
+      Value *FalseVInPred = FalseV->DoPHITranslation(PhiTransBB, ThisBB);
+      Value *InV = 0;
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+        InV = InC->isNullValue() ? FalseVInPred : TrueVInPred;
+      else
+        InV = Builder->CreateSelect(PN->getIncomingValue(i),
+                                    TrueVInPred, FalseVInPred, "phitmp");
+      NewPN->addIncoming(InV, ThisBB);
+    }
+  } else if (CmpInst *CI = dyn_cast<CmpInst>(&I)) {
+    Constant *C = cast<Constant>(I.getOperand(1));
+    for (unsigned i = 0; i != NumPHIValues; ++i) {
+      Value *InV = 0;
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+        InV = ConstantExpr::getCompare(CI->getPredicate(), InC, C);
+      else if (isa<ICmpInst>(CI))
+        InV = Builder->CreateICmp(CI->getPredicate(), PN->getIncomingValue(i),
+                                  C, "phitmp");
+      else
+        InV = Builder->CreateFCmp(CI->getPredicate(), PN->getIncomingValue(i),
+                                  C, "phitmp");
+      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+    }
+  } else if (I.getNumOperands() == 2) {
+    Constant *C = cast<Constant>(I.getOperand(1));
+    for (unsigned i = 0; i != NumPHIValues; ++i) {
+      Value *InV = 0;
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+        InV = ConstantExpr::get(I.getOpcode(), InC, C);
+      else
+        InV = Builder->CreateBinOp(cast<BinaryOperator>(I).getOpcode(),
+                                   PN->getIncomingValue(i), C, "phitmp");
+      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+    }
+  } else { 
+    CastInst *CI = cast<CastInst>(&I);
+    const Type *RetTy = CI->getType();
+    for (unsigned i = 0; i != NumPHIValues; ++i) {
+      Value *InV;
+      if (Constant *InC = dyn_cast<Constant>(PN->getIncomingValue(i)))
+        InV = ConstantExpr::getCast(CI->getOpcode(), InC, RetTy);
+      else 
+        InV = Builder->CreateCast(CI->getOpcode(),
+                                PN->getIncomingValue(i), I.getType(), "phitmp");
+      NewPN->addIncoming(InV, PN->getIncomingBlock(i));
+    }
+  }
+  
+  for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+       UI != E; ) {
+    Instruction *User = cast<Instruction>(*UI++);
+    if (User == &I) continue;
+    ReplaceInstUsesWith(*User, NewPN);
+    EraseInstFromFunction(*User);
+  }
+  return ReplaceInstUsesWith(I, NewPN);
+}
+
+/// FindElementAtOffset - Given a type and a constant offset, determine whether
+/// or not there is a sequence of GEP indices into the type that will land us at
+/// the specified offset.  If so, fill them into NewIndices and return the
+/// resultant element type, otherwise return null.
+const Type *InstCombiner::FindElementAtOffset(const Type *Ty, int64_t Offset, 
+                                          SmallVectorImpl<Value*> &NewIndices) {
+  if (!TD) return 0;
+  if (!Ty->isSized()) return 0;
+  
+  // Start with the index over the outer type.  Note that the type size
+  // might be zero (even if the offset isn't zero) if the indexed type
+  // is something like [0 x {int, int}]
+  const Type *IntPtrTy = TD->getIntPtrType(Ty->getContext());
+  int64_t FirstIdx = 0;
+  if (int64_t TySize = TD->getTypeAllocSize(Ty)) {
+    FirstIdx = Offset/TySize;
+    Offset -= FirstIdx*TySize;
+    
+    // Handle hosts where % returns negative instead of values [0..TySize).
+    if (Offset < 0) {
+      --FirstIdx;
+      Offset += TySize;
+      assert(Offset >= 0);
+    }
+    assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
+  }
+  
+  NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx));
+    
+  // Index into the types.  If we fail, set OrigBase to null.
+  while (Offset) {
+    // Indexing into tail padding between struct/array elements.
+    if (uint64_t(Offset*8) >= TD->getTypeSizeInBits(Ty))
+      return 0;
+    
+    if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+      const StructLayout *SL = TD->getStructLayout(STy);
+      assert(Offset < (int64_t)SL->getSizeInBytes() &&
+             "Offset must stay within the indexed type");
+      
+      unsigned Elt = SL->getElementContainingOffset(Offset);
+      NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+                                            Elt));
+      
+      Offset -= SL->getElementOffset(Elt);
+      Ty = STy->getElementType(Elt);
+    } else if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
+      uint64_t EltSize = TD->getTypeAllocSize(AT->getElementType());
+      assert(EltSize && "Cannot index into a zero-sized array");
+      NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize));
+      Offset %= EltSize;
+      Ty = AT->getElementType();
+    } else {
+      // Otherwise, we can't index into the middle of this atomic type, bail.
+      return 0;
+    }
+  }
+  
+  return Ty;
+}
+
+
+
+Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+  SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
+
+  if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD))
+    return ReplaceInstUsesWith(GEP, V);
+
+  Value *PtrOp = GEP.getOperand(0);
+
+  // Eliminate unneeded casts for indices, and replace indices which displace
+  // by multiples of a zero size type with zero.
+  if (TD) {
+    bool MadeChange = false;
+    const Type *IntPtrTy = TD->getIntPtrType(GEP.getContext());
+
+    gep_type_iterator GTI = gep_type_begin(GEP);
+    for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end();
+         I != E; ++I, ++GTI) {
+      // Skip indices into struct types.
+      const SequentialType *SeqTy = dyn_cast<SequentialType>(*GTI);
+      if (!SeqTy) continue;
+
+      // If the element type has zero size then any index over it is equivalent
+      // to an index of zero, so replace it with zero if it is not zero already.
+      if (SeqTy->getElementType()->isSized() &&
+          TD->getTypeAllocSize(SeqTy->getElementType()) == 0)
+        if (!isa<Constant>(*I) || !cast<Constant>(*I)->isNullValue()) {
+          *I = Constant::getNullValue(IntPtrTy);
+          MadeChange = true;
+        }
+
+      if ((*I)->getType() != IntPtrTy) {
+        // If we are using a wider index than needed for this platform, shrink
+        // it to what we need.  If narrower, sign-extend it to what we need.
+        // This explicit cast can make subsequent optimizations more obvious.
+        *I = Builder->CreateIntCast(*I, IntPtrTy, true);
+        MadeChange = true;
+      }
+    }
+    if (MadeChange) return &GEP;
+  }
+
+  // Combine Indices - If the source pointer to this getelementptr instruction
+  // is a getelementptr instruction, combine the indices of the two
+  // getelementptr instructions into a single instruction.
+  //
+  if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
+    // Note that if our source is a gep chain itself that we wait for that
+    // chain to be resolved before we perform this transformation.  This
+    // avoids us creating a TON of code in some cases.
+    //
+    if (GetElementPtrInst *SrcGEP =
+          dyn_cast<GetElementPtrInst>(Src->getOperand(0)))
+      if (SrcGEP->getNumOperands() == 2)
+        return 0;   // Wait until our source is folded to completion.
+
+    SmallVector<Value*, 8> Indices;
+
+    // Find out whether the last index in the source GEP is a sequential idx.
+    bool EndsWithSequential = false;
+    for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
+         I != E; ++I)
+      EndsWithSequential = !(*I)->isStructTy();
+
+    // Can we combine the two pointer arithmetics offsets?
+    if (EndsWithSequential) {
+      // Replace: gep (gep %P, long B), long A, ...
+      // With:    T = long A+B; gep %P, T, ...
+      //
+      Value *Sum;
+      Value *SO1 = Src->getOperand(Src->getNumOperands()-1);
+      Value *GO1 = GEP.getOperand(1);
+      if (SO1 == Constant::getNullValue(SO1->getType())) {
+        Sum = GO1;
+      } else if (GO1 == Constant::getNullValue(GO1->getType())) {
+        Sum = SO1;
+      } else {
+        // If they aren't the same type, then the input hasn't been processed
+        // by the loop above yet (which canonicalizes sequential index types to
+        // intptr_t).  Just avoid transforming this until the input has been
+        // normalized.
+        if (SO1->getType() != GO1->getType())
+          return 0;
+        Sum = Builder->CreateAdd(SO1, GO1, PtrOp->getName()+".sum");
+      }
+
+      // Update the GEP in place if possible.
+      if (Src->getNumOperands() == 2) {
+        GEP.setOperand(0, Src->getOperand(0));
+        GEP.setOperand(1, Sum);
+        return &GEP;
+      }
+      Indices.append(Src->op_begin()+1, Src->op_end()-1);
+      Indices.push_back(Sum);
+      Indices.append(GEP.op_begin()+2, GEP.op_end());
+    } else if (isa<Constant>(*GEP.idx_begin()) &&
+               cast<Constant>(*GEP.idx_begin())->isNullValue() &&
+               Src->getNumOperands() != 1) {
+      // Otherwise we can do the fold if the first index of the GEP is a zero
+      Indices.append(Src->op_begin()+1, Src->op_end());
+      Indices.append(GEP.idx_begin()+1, GEP.idx_end());
+    }
+
+    if (!Indices.empty())
+      return (GEP.isInBounds() && Src->isInBounds()) ?
+        GetElementPtrInst::CreateInBounds(Src->getOperand(0), Indices.begin(),
+                                          Indices.end(), GEP.getName()) :
+        GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(),
+                                  Indices.end(), GEP.getName());
+  }
+  
+  // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
+  Value *StrippedPtr = PtrOp->stripPointerCasts();
+  if (StrippedPtr != PtrOp) {
+    const PointerType *StrippedPtrTy =cast<PointerType>(StrippedPtr->getType());
+
+    bool HasZeroPointerIndex = false;
+    if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
+      HasZeroPointerIndex = C->isZero();
+    
+    // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
+    // into     : GEP [10 x i8]* X, i32 0, ...
+    //
+    // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
+    //           into     : GEP i8* X, ...
+    // 
+    // This occurs when the program declares an array extern like "int X[];"
+    if (HasZeroPointerIndex) {
+      const PointerType *CPTy = cast<PointerType>(PtrOp->getType());
+      if (const ArrayType *CATy =
+          dyn_cast<ArrayType>(CPTy->getElementType())) {
+        // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
+        if (CATy->getElementType() == StrippedPtrTy->getElementType()) {
+          // -> GEP i8* X, ...
+          SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());
+          GetElementPtrInst *Res =
+            GetElementPtrInst::Create(StrippedPtr, Idx.begin(),
+                                      Idx.end(), GEP.getName());
+          Res->setIsInBounds(GEP.isInBounds());
+          return Res;
+        }
+        
+        if (const ArrayType *XATy =
+              dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){
+          // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
+          if (CATy->getElementType() == XATy->getElementType()) {
+            // -> GEP [10 x i8]* X, i32 0, ...
+            // At this point, we know that the cast source type is a pointer
+            // to an array of the same type as the destination pointer
+            // array.  Because the array type is never stepped over (there
+            // is a leading zero) we can fold the cast into this GEP.
+            GEP.setOperand(0, StrippedPtr);
+            return &GEP;
+          }
+        }
+      }
+    } else if (GEP.getNumOperands() == 2) {
+      // Transform things like:
+      // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V
+      // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
+      const Type *SrcElTy = StrippedPtrTy->getElementType();
+      const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
+      if (TD && SrcElTy->isArrayTy() &&
+          TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
+          TD->getTypeAllocSize(ResElTy)) {
+        Value *Idx[2];
+        Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
+        Idx[1] = GEP.getOperand(1);
+        Value *NewGEP = GEP.isInBounds() ?
+          Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2, GEP.getName()) :
+          Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName());
+        // V and GEP are both pointer types --> BitCast
+        return new BitCastInst(NewGEP, GEP.getType());
+      }
+      
+      // Transform things like:
+      // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
+      //   (where tmp = 8*tmp2) into:
+      // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
+      
+      if (TD && SrcElTy->isArrayTy() && ResElTy->isIntegerTy(8)) {
+        uint64_t ArrayEltSize =
+            TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
+        
+        // Check to see if "tmp" is a scale by a multiple of ArrayEltSize.  We
+        // allow either a mul, shift, or constant here.
+        Value *NewIdx = 0;
+        ConstantInt *Scale = 0;
+        if (ArrayEltSize == 1) {
+          NewIdx = GEP.getOperand(1);
+          Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
+        } else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
+          NewIdx = ConstantInt::get(CI->getType(), 1);
+          Scale = CI;
+        } else if (Instruction *Inst =dyn_cast<Instruction>(GEP.getOperand(1))){
+          if (Inst->getOpcode() == Instruction::Shl &&
+              isa<ConstantInt>(Inst->getOperand(1))) {
+            ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
+            uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
+            Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
+                                     1ULL << ShAmtVal);
+            NewIdx = Inst->getOperand(0);
+          } else if (Inst->getOpcode() == Instruction::Mul &&
+                     isa<ConstantInt>(Inst->getOperand(1))) {
+            Scale = cast<ConstantInt>(Inst->getOperand(1));
+            NewIdx = Inst->getOperand(0);
+          }
+        }
+        
+        // If the index will be to exactly the right offset with the scale taken
+        // out, perform the transformation. Note, we don't know whether Scale is
+        // signed or not. We'll use unsigned version of division/modulo
+        // operation after making sure Scale doesn't have the sign bit set.
+        if (ArrayEltSize && Scale && Scale->getSExtValue() >= 0LL &&
+            Scale->getZExtValue() % ArrayEltSize == 0) {
+          Scale = ConstantInt::get(Scale->getType(),
+                                   Scale->getZExtValue() / ArrayEltSize);
+          if (Scale->getZExtValue() != 1) {
+            Constant *C = ConstantExpr::getIntegerCast(Scale, NewIdx->getType(),
+                                                       false /*ZExt*/);
+            NewIdx = Builder->CreateMul(NewIdx, C, "idxscale");
+          }
+
+          // Insert the new GEP instruction.
+          Value *Idx[2];
+          Idx[0] = Constant::getNullValue(Type::getInt32Ty(GEP.getContext()));
+          Idx[1] = NewIdx;
+          Value *NewGEP = GEP.isInBounds() ?
+            Builder->CreateInBoundsGEP(StrippedPtr, Idx, Idx + 2,GEP.getName()):
+            Builder->CreateGEP(StrippedPtr, Idx, Idx + 2, GEP.getName());
+          // The NewGEP must be pointer typed, so must the old one -> BitCast
+          return new BitCastInst(NewGEP, GEP.getType());
+        }
+      }
+    }
+  }
+  
+  /// See if we can simplify:
+  ///   X = bitcast A* to B*
+  ///   Y = gep X, <...constant indices...>
+  /// into a gep of the original struct.  This is important for SROA and alias
+  /// analysis of unions.  If "A" is also a bitcast, wait for A/X to be merged.
+  if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) {
+    if (TD &&
+        !isa<BitCastInst>(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) {
+      // Determine how much the GEP moves the pointer.  We are guaranteed to get
+      // a constant back from EmitGEPOffset.
+      ConstantInt *OffsetV = cast<ConstantInt>(EmitGEPOffset(&GEP));
+      int64_t Offset = OffsetV->getSExtValue();
+      
+      // If this GEP instruction doesn't move the pointer, just replace the GEP
+      // with a bitcast of the real input to the dest type.
+      if (Offset == 0) {
+        // If the bitcast is of an allocation, and the allocation will be
+        // converted to match the type of the cast, don't touch this.
+        if (isa<AllocaInst>(BCI->getOperand(0)) ||
+            isMalloc(BCI->getOperand(0))) {
+          // See if the bitcast simplifies, if so, don't nuke this GEP yet.
+          if (Instruction *I = visitBitCast(*BCI)) {
+            if (I != BCI) {
+              I->takeName(BCI);
+              BCI->getParent()->getInstList().insert(BCI, I);
+              ReplaceInstUsesWith(*BCI, I);
+            }
+            return &GEP;
+          }
+        }
+        return new BitCastInst(BCI->getOperand(0), GEP.getType());
+      }
+      
+      // Otherwise, if the offset is non-zero, we need to find out if there is a
+      // field at Offset in 'A's type.  If so, we can pull the cast through the
+      // GEP.
+      SmallVector<Value*, 8> NewIndices;
+      const Type *InTy =
+        cast<PointerType>(BCI->getOperand(0)->getType())->getElementType();
+      if (FindElementAtOffset(InTy, Offset, NewIndices)) {
+        Value *NGEP = GEP.isInBounds() ?
+          Builder->CreateInBoundsGEP(BCI->getOperand(0), NewIndices.begin(),
+                                     NewIndices.end()) :
+          Builder->CreateGEP(BCI->getOperand(0), NewIndices.begin(),
+                             NewIndices.end());
+        
+        if (NGEP->getType() == GEP.getType())
+          return ReplaceInstUsesWith(GEP, NGEP);
+        NGEP->takeName(&GEP);
+        return new BitCastInst(NGEP, GEP.getType());
+      }
+    }
+  }    
+    
+  return 0;
+}
+
+
+
+static bool IsOnlyNullComparedAndFreed(const Value &V) {
+  for (Value::const_use_iterator UI = V.use_begin(), UE = V.use_end();
+       UI != UE; ++UI) {
+    const User *U = *UI;
+    if (isFreeCall(U))
+      continue;
+    if (const ICmpInst *ICI = dyn_cast<ICmpInst>(U))
+      if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1)))
+        continue;
+    return false;
+  }
+  return true;
+}
+
+Instruction *InstCombiner::visitMalloc(Instruction &MI) {
+  // If we have a malloc call which is only used in any amount of comparisons
+  // to null and free calls, delete the calls and replace the comparisons with
+  // true or false as appropriate.
+  if (IsOnlyNullComparedAndFreed(MI)) {
+    for (Value::use_iterator UI = MI.use_begin(), UE = MI.use_end();
+         UI != UE;) {
+      // We can assume that every remaining use is a free call or an icmp eq/ne
+      // to null, so the cast is safe.
+      Instruction *I = cast<Instruction>(*UI);
+
+      // Early increment here, as we're about to get rid of the user.
+      ++UI;
+
+      if (isFreeCall(I)) {
+        EraseInstFromFunction(*cast<CallInst>(I));
+        continue;
+      }
+      // Again, the cast is safe.
+      ICmpInst *C = cast<ICmpInst>(I);
+      ReplaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()),
+                                               C->isFalseWhenEqual()));
+      EraseInstFromFunction(*C);
+    }
+    return EraseInstFromFunction(MI);
+  }
+  return 0;
+}
+
+
+
+Instruction *InstCombiner::visitFree(CallInst &FI) {
+  Value *Op = FI.getArgOperand(0);
+
+  // free undef -> unreachable.
+  if (isa<UndefValue>(Op)) {
+    // Insert a new store to null because we cannot modify the CFG here.
+    new StoreInst(ConstantInt::getTrue(FI.getContext()),
+           UndefValue::get(Type::getInt1PtrTy(FI.getContext())), &FI);
+    return EraseInstFromFunction(FI);
+  }
+  
+  // If we have 'free null' delete the instruction.  This can happen in stl code
+  // when lots of inlining happens.
+  if (isa<ConstantPointerNull>(Op))
+    return EraseInstFromFunction(FI);
+
+  return 0;
+}
+
+
+
+Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
+  // Change br (not X), label True, label False to: br X, label False, True
+  Value *X = 0;
+  BasicBlock *TrueDest;
+  BasicBlock *FalseDest;
+  if (match(&BI, m_Br(m_Not(m_Value(X)), TrueDest, FalseDest)) &&
+      !isa<Constant>(X)) {
+    // Swap Destinations and condition...
+    BI.setCondition(X);
+    BI.setSuccessor(0, FalseDest);
+    BI.setSuccessor(1, TrueDest);
+    return &BI;
+  }
+
+  // Cannonicalize fcmp_one -> fcmp_oeq
+  FCmpInst::Predicate FPred; Value *Y;
+  if (match(&BI, m_Br(m_FCmp(FPred, m_Value(X), m_Value(Y)), 
+                             TrueDest, FalseDest)) &&
+      BI.getCondition()->hasOneUse())
+    if (FPred == FCmpInst::FCMP_ONE || FPred == FCmpInst::FCMP_OLE ||
+        FPred == FCmpInst::FCMP_OGE) {
+      FCmpInst *Cond = cast<FCmpInst>(BI.getCondition());
+      Cond->setPredicate(FCmpInst::getInversePredicate(FPred));
+      
+      // Swap Destinations and condition.
+      BI.setSuccessor(0, FalseDest);
+      BI.setSuccessor(1, TrueDest);
+      Worklist.Add(Cond);
+      return &BI;
+    }
+
+  // Cannonicalize icmp_ne -> icmp_eq
+  ICmpInst::Predicate IPred;
+  if (match(&BI, m_Br(m_ICmp(IPred, m_Value(X), m_Value(Y)),
+                      TrueDest, FalseDest)) &&
+      BI.getCondition()->hasOneUse())
+    if (IPred == ICmpInst::ICMP_NE  || IPred == ICmpInst::ICMP_ULE ||
+        IPred == ICmpInst::ICMP_SLE || IPred == ICmpInst::ICMP_UGE ||
+        IPred == ICmpInst::ICMP_SGE) {
+      ICmpInst *Cond = cast<ICmpInst>(BI.getCondition());
+      Cond->setPredicate(ICmpInst::getInversePredicate(IPred));
+      // Swap Destinations and condition.
+      BI.setSuccessor(0, FalseDest);
+      BI.setSuccessor(1, TrueDest);
+      Worklist.Add(Cond);
+      return &BI;
+    }
+
+  return 0;
+}
+
+Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
+  Value *Cond = SI.getCondition();
+  if (Instruction *I = dyn_cast<Instruction>(Cond)) {
+    if (I->getOpcode() == Instruction::Add)
+      if (ConstantInt *AddRHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
+        // change 'switch (X+4) case 1:' into 'switch (X) case -3'
+        for (unsigned i = 2, e = SI.getNumOperands(); i != e; i += 2)
+          SI.setOperand(i,
+                   ConstantExpr::getSub(cast<Constant>(SI.getOperand(i)),
+                                                AddRHS));
+        SI.setOperand(0, I->getOperand(0));
+        Worklist.Add(I);
+        return &SI;
+      }
+  }
+  return 0;
+}
+
+Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
+  Value *Agg = EV.getAggregateOperand();
+
+  if (!EV.hasIndices())
+    return ReplaceInstUsesWith(EV, Agg);
+
+  if (Constant *C = dyn_cast<Constant>(Agg)) {
+    if (isa<UndefValue>(C))
+      return ReplaceInstUsesWith(EV, UndefValue::get(EV.getType()));
+      
+    if (isa<ConstantAggregateZero>(C))
+      return ReplaceInstUsesWith(EV, Constant::getNullValue(EV.getType()));
+
+    if (isa<ConstantArray>(C) || isa<ConstantStruct>(C)) {
+      // Extract the element indexed by the first index out of the constant
+      Value *V = C->getOperand(*EV.idx_begin());
+      if (EV.getNumIndices() > 1)
+        // Extract the remaining indices out of the constant indexed by the
+        // first index
+        return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end());
+      else
+        return ReplaceInstUsesWith(EV, V);
+    }
+    return 0; // Can't handle other constants
+  } 
+  if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
+    // We're extracting from an insertvalue instruction, compare the indices
+    const unsigned *exti, *exte, *insi, *inse;
+    for (exti = EV.idx_begin(), insi = IV->idx_begin(),
+         exte = EV.idx_end(), inse = IV->idx_end();
+         exti != exte && insi != inse;
+         ++exti, ++insi) {
+      if (*insi != *exti)
+        // The insert and extract both reference distinctly different elements.
+        // This means the extract is not influenced by the insert, and we can
+        // replace the aggregate operand of the extract with the aggregate
+        // operand of the insert. i.e., replace
+        // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+        // %E = extractvalue { i32, { i32 } } %I, 0
+        // with
+        // %E = extractvalue { i32, { i32 } } %A, 0
+        return ExtractValueInst::Create(IV->getAggregateOperand(),
+                                        EV.idx_begin(), EV.idx_end());
+    }
+    if (exti == exte && insi == inse)
+      // Both iterators are at the end: Index lists are identical. Replace
+      // %B = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+      // %C = extractvalue { i32, { i32 } } %B, 1, 0
+      // with "i32 42"
+      return ReplaceInstUsesWith(EV, IV->getInsertedValueOperand());
+    if (exti == exte) {
+      // The extract list is a prefix of the insert list. i.e. replace
+      // %I = insertvalue { i32, { i32 } } %A, i32 42, 1, 0
+      // %E = extractvalue { i32, { i32 } } %I, 1
+      // with
+      // %X = extractvalue { i32, { i32 } } %A, 1
+      // %E = insertvalue { i32 } %X, i32 42, 0
+      // by switching the order of the insert and extract (though the
+      // insertvalue should be left in, since it may have other uses).
+      Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
+                                                 EV.idx_begin(), EV.idx_end());
+      return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
+                                     insi, inse);
+    }
+    if (insi == inse)
+      // The insert list is a prefix of the extract list
+      // We can simply remove the common indices from the extract and make it
+      // operate on the inserted value instead of the insertvalue result.
+      // i.e., replace
+      // %I = insertvalue { i32, { i32 } } %A, { i32 } { i32 42 }, 1
+      // %E = extractvalue { i32, { i32 } } %I, 1, 0
+      // with
+      // %E extractvalue { i32 } { i32 42 }, 0
+      return ExtractValueInst::Create(IV->getInsertedValueOperand(), 
+                                      exti, exte);
+  }
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
+    // We're extracting from an intrinsic, see if we're the only user, which
+    // allows us to simplify multiple result intrinsics to simpler things that
+    // just get one value.
+    if (II->hasOneUse()) {
+      // Check if we're grabbing the overflow bit or the result of a 'with
+      // overflow' intrinsic.  If it's the latter we can remove the intrinsic
+      // and replace it with a traditional binary instruction.
+      switch (II->getIntrinsicID()) {
+      case Intrinsic::uadd_with_overflow:
+      case Intrinsic::sadd_with_overflow:
+        if (*EV.idx_begin() == 0) {  // Normal result.
+          Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+          II->replaceAllUsesWith(UndefValue::get(II->getType()));
+          EraseInstFromFunction(*II);
+          return BinaryOperator::CreateAdd(LHS, RHS);
+        }
+          
+        // If the normal result of the add is dead, and the RHS is a constant,
+        // we can transform this into a range comparison.
+        // overflow = uadd a, -4  -->  overflow = icmp ugt a, 3
+        if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow)
+          if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1)))
+            return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0),
+                                ConstantExpr::getNot(CI));
+        break;
+      case Intrinsic::usub_with_overflow:
+      case Intrinsic::ssub_with_overflow:
+        if (*EV.idx_begin() == 0) {  // Normal result.
+          Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+          II->replaceAllUsesWith(UndefValue::get(II->getType()));
+          EraseInstFromFunction(*II);
+          return BinaryOperator::CreateSub(LHS, RHS);
+        }
+        break;
+      case Intrinsic::umul_with_overflow:
+      case Intrinsic::smul_with_overflow:
+        if (*EV.idx_begin() == 0) {  // Normal result.
+          Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+          II->replaceAllUsesWith(UndefValue::get(II->getType()));
+          EraseInstFromFunction(*II);
+          return BinaryOperator::CreateMul(LHS, RHS);
+        }
+        break;
+      default:
+        break;
+      }
+    }
+  }
+  if (LoadInst *L = dyn_cast<LoadInst>(Agg))
+    // If the (non-volatile) load only has one use, we can rewrite this to a
+    // load from a GEP. This reduces the size of the load.
+    // FIXME: If a load is used only by extractvalue instructions then this
+    //        could be done regardless of having multiple uses.
+    if (!L->isVolatile() && L->hasOneUse()) {
+      // extractvalue has integer indices, getelementptr has Value*s. Convert.
+      SmallVector<Value*, 4> Indices;
+      // Prefix an i32 0 since we need the first element.
+      Indices.push_back(Builder->getInt32(0));
+      for (ExtractValueInst::idx_iterator I = EV.idx_begin(), E = EV.idx_end();
+            I != E; ++I)
+        Indices.push_back(Builder->getInt32(*I));
+
+      // We need to insert these at the location of the old load, not at that of
+      // the extractvalue.
+      Builder->SetInsertPoint(L->getParent(), L);
+      Value *GEP = Builder->CreateInBoundsGEP(L->getPointerOperand(),
+                                              Indices.begin(), Indices.end());
+      // Returning the load directly will cause the main loop to insert it in
+      // the wrong spot, so use ReplaceInstUsesWith().
+      return ReplaceInstUsesWith(EV, Builder->CreateLoad(GEP));
+    }
+  // We could simplify extracts from other values. Note that nested extracts may
+  // already be simplified implicitly by the above: extract (extract (insert) )
+  // will be translated into extract ( insert ( extract ) ) first and then just
+  // the value inserted, if appropriate. Similarly for extracts from single-use
+  // loads: extract (extract (load)) will be translated to extract (load (gep))
+  // and if again single-use then via load (gep (gep)) to load (gep).
+  // However, double extracts from e.g. function arguments or return values
+  // aren't handled yet.
+  return 0;
+}
+
+
+
+
+/// TryToSinkInstruction - Try to move the specified instruction from its
+/// current block into the beginning of DestBlock, which can only happen if it's
+/// safe to move the instruction past all of the instructions between it and the
+/// end of its block.
+static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
+  assert(I->hasOneUse() && "Invariants didn't hold!");
+
+  // Cannot move control-flow-involving, volatile loads, vaarg, etc.
+  if (isa<PHINode>(I) || I->mayHaveSideEffects() || isa<TerminatorInst>(I))
+    return false;
+
+  // Do not sink alloca instructions out of the entry block.
+  if (isa<AllocaInst>(I) && I->getParent() ==
+        &DestBlock->getParent()->getEntryBlock())
+    return false;
+
+  // We can only sink load instructions if there is nothing between the load and
+  // the end of block that could change the value.
+  if (I->mayReadFromMemory()) {
+    for (BasicBlock::iterator Scan = I, E = I->getParent()->end();
+         Scan != E; ++Scan)
+      if (Scan->mayWriteToMemory())
+        return false;
+  }
+
+  BasicBlock::iterator InsertPos = DestBlock->getFirstNonPHI();
+
+  I->moveBefore(InsertPos);
+  ++NumSunkInst;
+  return true;
+}
+
+
+/// AddReachableCodeToWorklist - Walk the function in depth-first order, adding
+/// all reachable code to the worklist.
+///
+/// This has a couple of tricks to make the code faster and more powerful.  In
+/// particular, we constant fold and DCE instructions as we go, to avoid adding
+/// them to the worklist (this significantly speeds up instcombine on code where
+/// many instructions are dead or constant).  Additionally, if we find a branch
+/// whose condition is a known constant, we only visit the reachable successors.
+///
+static bool AddReachableCodeToWorklist(BasicBlock *BB, 
+                                       SmallPtrSet<BasicBlock*, 64> &Visited,
+                                       InstCombiner &IC,
+                                       const TargetData *TD) {
+  bool MadeIRChange = false;
+  SmallVector<BasicBlock*, 256> Worklist;
+  Worklist.push_back(BB);
+
+  SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
+  SmallPtrSet<ConstantExpr*, 64> FoldedConstants;
+  
+  do {
+    BB = Worklist.pop_back_val();
+    
+    // We have now visited this block!  If we've already been here, ignore it.
+    if (!Visited.insert(BB)) continue;
+
+    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+      Instruction *Inst = BBI++;
+      
+      // DCE instruction if trivially dead.
+      if (isInstructionTriviallyDead(Inst)) {
+        ++NumDeadInst;
+        DEBUG(errs() << "IC: DCE: " << *Inst << '\n');
+        Inst->eraseFromParent();
+        continue;
+      }
+      
+      // ConstantProp instruction if trivially constant.
+      if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
+        if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
+          DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
+                       << *Inst << '\n');
+          Inst->replaceAllUsesWith(C);
+          ++NumConstProp;
+          Inst->eraseFromParent();
+          continue;
+        }
+      
+      if (TD) {
+        // See if we can constant fold its operands.
+        for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end();
+             i != e; ++i) {
+          ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
+          if (CE == 0) continue;
+          
+          // If we already folded this constant, don't try again.
+          if (!FoldedConstants.insert(CE))
+            continue;
+          
+          Constant *NewC = ConstantFoldConstantExpression(CE, TD);
+          if (NewC && NewC != CE) {
+            *i = NewC;
+            MadeIRChange = true;
+          }
+        }
+      }
+
+      InstrsForInstCombineWorklist.push_back(Inst);
+    }
+
+    // Recursively visit successors.  If this is a branch or switch on a
+    // constant, only visit the reachable successor.
+    TerminatorInst *TI = BB->getTerminator();
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
+        bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
+        BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
+        Worklist.push_back(ReachableBB);
+        continue;
+      }
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+      if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
+        // See if this is an explicit destination.
+        for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
+          if (SI->getCaseValue(i) == Cond) {
+            BasicBlock *ReachableBB = SI->getSuccessor(i);
+            Worklist.push_back(ReachableBB);
+            continue;
+          }
+        
+        // Otherwise it is the default destination.
+        Worklist.push_back(SI->getSuccessor(0));
+        continue;
+      }
+    }
+    
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      Worklist.push_back(TI->getSuccessor(i));
+  } while (!Worklist.empty());
+  
+  // Once we've found all of the instructions to add to instcombine's worklist,
+  // add them in reverse order.  This way instcombine will visit from the top
+  // of the function down.  This jives well with the way that it adds all uses
+  // of instructions to the worklist after doing a transformation, thus avoiding
+  // some N^2 behavior in pathological cases.
+  IC.Worklist.AddInitialGroup(&InstrsForInstCombineWorklist[0],
+                              InstrsForInstCombineWorklist.size());
+  
+  return MadeIRChange;
+}
+
+bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
+  MadeIRChange = false;
+  
+  DEBUG(errs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
+        << F.getNameStr() << "\n");
+
+  {
+    // Do a depth-first traversal of the function, populate the worklist with
+    // the reachable instructions.  Ignore blocks that are not reachable.  Keep
+    // track of which blocks we visit.
+    SmallPtrSet<BasicBlock*, 64> Visited;
+    MadeIRChange |= AddReachableCodeToWorklist(F.begin(), Visited, *this, TD);
+
+    // Do a quick scan over the function.  If we find any blocks that are
+    // unreachable, remove any instructions inside of them.  This prevents
+    // the instcombine code from having to deal with some bad special cases.
+    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+      if (!Visited.count(BB)) {
+        Instruction *Term = BB->getTerminator();
+        while (Term != BB->begin()) {   // Remove instrs bottom-up
+          BasicBlock::iterator I = Term; --I;
+
+          DEBUG(errs() << "IC: DCE: " << *I << '\n');
+          // A debug intrinsic shouldn't force another iteration if we weren't
+          // going to do one without it.
+          if (!isa<DbgInfoIntrinsic>(I)) {
+            ++NumDeadInst;
+            MadeIRChange = true;
+          }
+
+          // If I is not void type then replaceAllUsesWith undef.
+          // This allows ValueHandlers and custom metadata to adjust itself.
+          if (!I->getType()->isVoidTy())
+            I->replaceAllUsesWith(UndefValue::get(I->getType()));
+          I->eraseFromParent();
+        }
+      }
+  }
+
+  while (!Worklist.isEmpty()) {
+    Instruction *I = Worklist.RemoveOne();
+    if (I == 0) continue;  // skip null values.
+
+    // Check to see if we can DCE the instruction.
+    if (isInstructionTriviallyDead(I)) {
+      DEBUG(errs() << "IC: DCE: " << *I << '\n');
+      EraseInstFromFunction(*I);
+      ++NumDeadInst;
+      MadeIRChange = true;
+      continue;
+    }
+
+    // Instruction isn't dead, see if we can constant propagate it.
+    if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
+      if (Constant *C = ConstantFoldInstruction(I, TD)) {
+        DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
+
+        // Add operands to the worklist.
+        ReplaceInstUsesWith(*I, C);
+        ++NumConstProp;
+        EraseInstFromFunction(*I);
+        MadeIRChange = true;
+        continue;
+      }
+
+    // See if we can trivially sink this instruction to a successor basic block.
+    if (I->hasOneUse()) {
+      BasicBlock *BB = I->getParent();
+      Instruction *UserInst = cast<Instruction>(I->use_back());
+      BasicBlock *UserParent;
+      
+      // Get the block the use occurs in.
+      if (PHINode *PN = dyn_cast<PHINode>(UserInst))
+        UserParent = PN->getIncomingBlock(I->use_begin().getUse());
+      else
+        UserParent = UserInst->getParent();
+      
+      if (UserParent != BB) {
+        bool UserIsSuccessor = false;
+        // See if the user is one of our successors.
+        for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+          if (*SI == UserParent) {
+            UserIsSuccessor = true;
+            break;
+          }
+
+        // If the user is one of our immediate successors, and if that successor
+        // only has us as a predecessors (we'd have to split the critical edge
+        // otherwise), we can keep going.
+        if (UserIsSuccessor && UserParent->getSinglePredecessor())
+          // Okay, the CFG is simple enough, try to sink this instruction.
+          MadeIRChange |= TryToSinkInstruction(I, UserParent);
+      }
+    }
+
+    // Now that we have an instruction, try combining it to simplify it.
+    Builder->SetInsertPoint(I->getParent(), I);
+    
+#ifndef NDEBUG
+    std::string OrigI;
+#endif
+    DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str(););
+    DEBUG(errs() << "IC: Visiting: " << OrigI << '\n');
+
+    if (Instruction *Result = visit(*I)) {
+      ++NumCombined;
+      // Should we replace the old instruction with a new one?
+      if (Result != I) {
+        DEBUG(errs() << "IC: Old = " << *I << '\n'
+                     << "    New = " << *Result << '\n');
+
+        Result->setDebugLoc(I->getDebugLoc());
+        // Everything uses the new instruction now.
+        I->replaceAllUsesWith(Result);
+
+        // Push the new instruction and any users onto the worklist.
+        Worklist.Add(Result);
+        Worklist.AddUsersToWorkList(*Result);
+
+        // Move the name to the new instruction first.
+        Result->takeName(I);
+
+        // Insert the new instruction into the basic block...
+        BasicBlock *InstParent = I->getParent();
+        BasicBlock::iterator InsertPos = I;
+
+        if (!isa<PHINode>(Result))        // If combining a PHI, don't insert
+          while (isa<PHINode>(InsertPos)) // middle of a block of PHIs.
+            ++InsertPos;
+
+        InstParent->getInstList().insert(InsertPos, Result);
+
+        EraseInstFromFunction(*I);
+      } else {
+#ifndef NDEBUG
+        DEBUG(errs() << "IC: Mod = " << OrigI << '\n'
+                     << "    New = " << *I << '\n');
+#endif
+
+        // If the instruction was modified, it's possible that it is now dead.
+        // if so, remove it.
+        if (isInstructionTriviallyDead(I)) {
+          EraseInstFromFunction(*I);
+        } else {
+          Worklist.Add(I);
+          Worklist.AddUsersToWorkList(*I);
+        }
+      }
+      MadeIRChange = true;
+    }
+  }
+
+  Worklist.Zap();
+  return MadeIRChange;
+}
+
+
+bool InstCombiner::runOnFunction(Function &F) {
+  MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+  TD = getAnalysisIfAvailable<TargetData>();
+
+  
+  /// Builder - This is an IRBuilder that automatically inserts new
+  /// instructions into the worklist when they are created.
+  IRBuilder<true, TargetFolder, InstCombineIRInserter> 
+    TheBuilder(F.getContext(), TargetFolder(TD),
+               InstCombineIRInserter(Worklist));
+  Builder = &TheBuilder;
+  
+  bool EverMadeChange = false;
+
+  // Iterate while there is work to do.
+  unsigned Iteration = 0;
+  while (DoOneIteration(F, Iteration++))
+    EverMadeChange = true;
+  
+  Builder = 0;
+  return EverMadeChange;
+}
+
+FunctionPass *llvm::createInstructionCombiningPass() {
+  return new InstCombiner();
+}
diff --git a/final/lib/Transforms/InstCombine/Makefile b/final/lib/Transforms/InstCombine/Makefile
new file mode 100644
index 00000000000..0c488e78b6d
--- /dev/null
+++ b/final/lib/Transforms/InstCombine/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/InstCombine/Makefile -----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMInstCombine
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Transforms/Instrumentation/CMakeLists.txt b/final/lib/Transforms/Instrumentation/CMakeLists.txt
new file mode 100644
index 00000000000..0ac1cb09bce
--- /dev/null
+++ b/final/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMInstrumentation
+  EdgeProfiling.cpp
+  Instrumentation.cpp
+  OptimalEdgeProfiling.cpp
+  PathProfiling.cpp
+  ProfilingUtils.cpp
+  )
diff --git a/final/lib/Transforms/Instrumentation/EdgeProfiling.cpp b/final/lib/Transforms/Instrumentation/EdgeProfiling.cpp
new file mode 100644
index 00000000000..1d31fcc4df3
--- /dev/null
+++ b/final/lib/Transforms/Instrumentation/EdgeProfiling.cpp
@@ -0,0 +1,117 @@
+//===- EdgeProfiling.cpp - Insert counters for edge profiling -------------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments the specified program with counters for edge profiling.
+// Edge profiling can give a reasonable approximation of the hot paths through a
+// program, and is used for a wide variety of program transformations.
+//
+// Note that this implementation is very naive.  We insert a counter for *every*
+// edge in the program, instead of using control flow information to prune the
+// number of counters inserted.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-edge-profiling"
+
+#include "ProfilingUtils.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumEdgesInserted, "The # of edges inserted.");
+
+namespace {
+  class EdgeProfiler : public ModulePass {
+    bool runOnModule(Module &M);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    EdgeProfiler() : ModulePass(ID) {
+      initializeEdgeProfilerPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual const char *getPassName() const {
+      return "Edge Profiler";
+    }
+  };
+}
+
+char EdgeProfiler::ID = 0;
+INITIALIZE_PASS(EdgeProfiler, "insert-edge-profiling",
+                "Insert instrumentation for edge profiling", false, false)
+
+ModulePass *llvm::createEdgeProfilerPass() { return new EdgeProfiler(); }
+
+bool EdgeProfiler::runOnModule(Module &M) {
+  Function *Main = M.getFunction("main");
+  if (Main == 0) {
+    errs() << "WARNING: cannot insert edge profiling into a module"
+           << " with no main function!\n";
+    return false;  // No main, no instrumentation!
+  }
+
+  std::set<BasicBlock*> BlocksToInstrument;
+  unsigned NumEdges = 0;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    // Reserve space for (0,entry) edge.
+    ++NumEdges;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      // Keep track of which blocks need to be instrumented.  We don't want to
+      // instrument blocks that are added as the result of breaking critical
+      // edges!
+      BlocksToInstrument.insert(BB);
+      NumEdges += BB->getTerminator()->getNumSuccessors();
+    }
+  }
+
+  const Type *ATy = ArrayType::get(Type::getInt32Ty(M.getContext()), NumEdges);
+  GlobalVariable *Counters =
+    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+                       Constant::getNullValue(ATy), "EdgeProfCounters");
+  NumEdgesInserted = NumEdges;
+
+  // Instrument all of the edges...
+  unsigned i = 0;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    // Create counter for (0,entry) edge.
+    IncrementCounterInBlock(&F->getEntryBlock(), i++, Counters);
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      if (BlocksToInstrument.count(BB)) {  // Don't instrument inserted blocks
+        // Okay, we have to add a counter of each outgoing edge.  If the
+        // outgoing edge is not critical don't split it, just insert the counter
+        // in the source or destination of the edge.
+        TerminatorInst *TI = BB->getTerminator();
+        for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+          // If the edge is critical, split it.
+          SplitCriticalEdge(TI, s, this);
+
+          // Okay, we are guaranteed that the edge is no longer critical.  If we
+          // only have a single successor, insert the counter in this block,
+          // otherwise insert it in the successor block.
+          if (TI->getNumSuccessors() == 1) {
+            // Insert counter at the start of the block
+            IncrementCounterInBlock(BB, i++, Counters, false);
+          } else {
+            // Insert counter at the start of the block
+            IncrementCounterInBlock(TI->getSuccessor(s), i++, Counters);
+          }
+        }
+      }
+  }
+
+  // Add the initialization call to main.
+  InsertProfilingInitCall(Main, "llvm_start_edge_profiling", Counters);
+  return true;
+}
+
diff --git a/final/lib/Transforms/Instrumentation/Instrumentation.cpp b/final/lib/Transforms/Instrumentation/Instrumentation.cpp
new file mode 100644
index 00000000000..96ed4fa5c0f
--- /dev/null
+++ b/final/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -0,0 +1,32 @@
+//===-- Instrumentation.cpp - TransformUtils Infrastructure ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// Instrumentation library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeInstrumentation - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeInstrumentation(PassRegistry &Registry) {
+  initializeEdgeProfilerPass(Registry);
+  initializeOptimalEdgeProfilerPass(Registry);
+  initializePathProfilerPass(Registry);
+}
+
+/// LLVMInitializeInstrumentation - C binding for
+/// initializeInstrumentation.
+void LLVMInitializeInstrumentation(LLVMPassRegistryRef R) {
+  initializeInstrumentation(*unwrap(R));
+}
diff --git a/final/lib/Transforms/Instrumentation/Makefile b/final/lib/Transforms/Instrumentation/Makefile
new file mode 100644
index 00000000000..6cbc7a9cd88
--- /dev/null
+++ b/final/lib/Transforms/Instrumentation/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/Instrumentation/Makefile -------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMInstrumentation
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/final/lib/Transforms/Instrumentation/MaximumSpanningTree.h
new file mode 100644
index 00000000000..829da6b295d
--- /dev/null
+++ b/final/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -0,0 +1,108 @@
+//===- llvm/Analysis/MaximumSpanningTree.h - Interface ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This module privides means for calculating a maximum spanning tree for a
+// given set of weighted edges. The type parameter T is the type of a node.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include <vector>
+#include <algorithm>
+
+namespace llvm {
+
+  /// MaximumSpanningTree - A MST implementation.
+  /// The type parameter T determines the type of the nodes of the graph.
+  template <typename T>
+  class MaximumSpanningTree {
+
+    // A comparing class for comparing weighted edges.
+    template <typename CT>
+    struct EdgeWeightCompare {
+      bool operator()(typename MaximumSpanningTree<CT>::EdgeWeight X, 
+                      typename MaximumSpanningTree<CT>::EdgeWeight Y) const {
+        if (X.second > Y.second) return true;
+        if (X.second < Y.second) return false;
+        if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) {
+          if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) {
+            if (BBX->size() > BBY->size()) return true;
+            if (BBX->size() < BBY->size()) return false;
+          }
+        }
+        if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) {
+          if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) {
+            if (BBX->size() > BBY->size()) return true;
+            if (BBX->size() < BBY->size()) return false;
+          }
+        }
+        return false;
+      }
+    };
+
+  public:
+    typedef std::pair<const T*, const T*> Edge;
+    typedef std::pair<Edge, double> EdgeWeight;
+    typedef std::vector<EdgeWeight> EdgeWeights;
+  protected:
+    typedef std::vector<Edge> MaxSpanTree;
+
+    MaxSpanTree MST;
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+
+    /// MaximumSpanningTree() - Takes a vector of weighted edges and returns a
+    /// spanning tree.
+    MaximumSpanningTree(EdgeWeights &EdgeVector) {
+
+      std::stable_sort(EdgeVector.begin(), EdgeVector.end(), EdgeWeightCompare<T>());
+
+      // Create spanning tree, Forest contains a special data structure
+      // that makes checking if two nodes are already in a common (sub-)tree
+      // fast and cheap.
+      EquivalenceClasses<const T*> Forest;
+      for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
+           EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
+        Edge e = (*EWi).first;
+
+        Forest.insert(e.first);
+        Forest.insert(e.second);
+      }
+
+      // Iterate over the sorted edges, biggest first.
+      for (typename EdgeWeights::iterator EWi = EdgeVector.begin(),
+           EWe = EdgeVector.end(); EWi != EWe; ++EWi) {
+        Edge e = (*EWi).first;
+
+        if (Forest.findLeader(e.first) != Forest.findLeader(e.second)) {
+          Forest.unionSets(e.first, e.second);
+          // So we know now that the edge is not already in a subtree, so we push
+          // the edge to the MST.
+          MST.push_back(e);
+        }
+      }
+    }
+
+    typename MaxSpanTree::iterator begin() {
+      return MST.begin();
+    }
+
+    typename MaxSpanTree::iterator end() {
+      return MST.end();
+    }
+  };
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/final/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
new file mode 100644
index 00000000000..c85a1a9391d
--- /dev/null
+++ b/final/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -0,0 +1,225 @@
+//===- OptimalEdgeProfiling.cpp - Insert counters for opt. edge profiling -===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments the specified program with counters for edge profiling.
+// Edge profiling can give a reasonable approximation of the hot paths through a
+// program, and is used for a wide variety of program transformations.
+//
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-optimal-edge-profiling"
+#include "ProfilingUtils.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "MaximumSpanningTree.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumEdgesInserted, "The # of edges inserted.");
+
+namespace {
+  class OptimalEdgeProfiler : public ModulePass {
+    bool runOnModule(Module &M);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    OptimalEdgeProfiler() : ModulePass(ID) {
+      initializeOptimalEdgeProfilerPass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequiredID(ProfileEstimatorPassID);
+      AU.addRequired<ProfileInfo>();
+    }
+
+    virtual const char *getPassName() const {
+      return "Optimal Edge Profiler";
+    }
+  };
+}
+
+char OptimalEdgeProfiler::ID = 0;
+INITIALIZE_PASS_BEGIN(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+                "Insert optimal instrumentation for edge profiling",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileEstimatorPass)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(OptimalEdgeProfiler, "insert-optimal-edge-profiling",
+                "Insert optimal instrumentation for edge profiling",
+                false, false)
+
+ModulePass *llvm::createOptimalEdgeProfilerPass() {
+  return new OptimalEdgeProfiler();
+}
+
+inline static void printEdgeCounter(ProfileInfo::Edge e,
+                                    BasicBlock* b,
+                                    unsigned i) {
+  DEBUG(dbgs() << "--Edge Counter for " << (e) << " in " \
+               << ((b)?(b)->getNameStr():"0") << " (# " << (i) << ")\n");
+}
+
+bool OptimalEdgeProfiler::runOnModule(Module &M) {
+  Function *Main = M.getFunction("main");
+  if (Main == 0) {
+    errs() << "WARNING: cannot insert edge profiling into a module"
+           << " with no main function!\n";
+    return false;  // No main, no instrumentation!
+  }
+
+  // NumEdges counts all the edges that may be instrumented. Later on its
+  // decided which edges to actually instrument, to achieve optimal profiling.
+  // For the entry block a virtual edge (0,entry) is reserved, for each block
+  // with no successors an edge (BB,0) is reserved. These edges are necessary
+  // to calculate a truly optimal maximum spanning tree and thus an optimal
+  // instrumentation.
+  unsigned NumEdges = 0;
+
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    // Reserve space for (0,entry) edge.
+    ++NumEdges;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      // Keep track of which blocks need to be instrumented.  We don't want to
+      // instrument blocks that are added as the result of breaking critical
+      // edges!
+      if (BB->getTerminator()->getNumSuccessors() == 0) {
+        // Reserve space for (BB,0) edge.
+        ++NumEdges;
+      } else {
+        NumEdges += BB->getTerminator()->getNumSuccessors();
+      }
+    }
+  }
+
+  // In the profiling output a counter for each edge is reserved, but only few
+  // are used. This is done to be able to read back in the profile without
+  // calulating the maximum spanning tree again, instead each edge counter that
+  // is not used is initialised with -1 to signal that this edge counter has to
+  // be calculated from other edge counters on reading the profile info back
+  // in.
+
+  const Type *Int32 = Type::getInt32Ty(M.getContext());
+  const ArrayType *ATy = ArrayType::get(Int32, NumEdges);
+  GlobalVariable *Counters =
+    new GlobalVariable(M, ATy, false, GlobalValue::InternalLinkage,
+                       Constant::getNullValue(ATy), "OptEdgeProfCounters");
+  NumEdgesInserted = 0;
+
+  std::vector<Constant*> Initializer(NumEdges);
+  Constant* Zero = ConstantInt::get(Int32, 0);
+  Constant* Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted);
+
+  // Instrument all of the edges not in MST...
+  unsigned i = 0;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration()) continue;
+    DEBUG(dbgs()<<"Working on "<<F->getNameStr()<<"\n");
+
+    // Calculate a Maximum Spanning Tree with the edge weights determined by
+    // ProfileEstimator. ProfileEstimator also assign weights to the virtual
+    // edges (0,entry) and (BB,0) (for blocks with no successors) and this
+    // edges also participate in the maximum spanning tree calculation.
+    // The third parameter of MaximumSpanningTree() has the effect that not the
+    // actual MST is returned but the edges _not_ in the MST.
+
+    ProfileInfo::EdgeWeights ECs =
+      getAnalysis<ProfileInfo>(*F).getEdgeWeights(F);
+    std::vector<ProfileInfo::EdgeWeight> EdgeVector(ECs.begin(), ECs.end());
+    MaximumSpanningTree<BasicBlock> MST (EdgeVector);
+    std::stable_sort(MST.begin(),MST.end());
+
+    // Check if (0,entry) not in the MST. If not, instrument edge
+    // (IncrementCounterInBlock()) and set the counter initially to zero, if
+    // the edge is in the MST the counter is initialised to -1.
+
+    BasicBlock *entry = &(F->getEntryBlock());
+    ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry);
+    if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+      printEdgeCounter(edge,entry,i);
+      IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted;
+      Initializer[i++] = (Zero);
+    } else{
+      Initializer[i++] = (Uncounted);
+    }
+
+    // InsertedBlocks contains all blocks that were inserted for splitting an
+    // edge, this blocks do not have to be instrumented.
+    DenseSet<BasicBlock*> InsertedBlocks;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      // Check if block was not inserted and thus does not have to be
+      // instrumented.
+      if (InsertedBlocks.count(BB)) continue;
+
+      // Okay, we have to add a counter of each outgoing edge not in MST. If
+      // the outgoing edge is not critical don't split it, just insert the
+      // counter in the source or destination of the edge. Also, if the block
+      // has no successors, the virtual edge (BB,0) is processed.
+      TerminatorInst *TI = BB->getTerminator();
+      if (TI->getNumSuccessors() == 0) {
+        ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0);
+        if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+          printEdgeCounter(edge,BB,i);
+          IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
+          Initializer[i++] = (Zero);
+        } else{
+          Initializer[i++] = (Uncounted);
+        }
+      }
+      for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+        BasicBlock *Succ = TI->getSuccessor(s);
+        ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,Succ);
+        if (!std::binary_search(MST.begin(), MST.end(), edge)) {
+
+          // If the edge is critical, split it.
+          bool wasInserted = SplitCriticalEdge(TI, s, this);
+          Succ = TI->getSuccessor(s);
+          if (wasInserted)
+            InsertedBlocks.insert(Succ);
+
+          // Okay, we are guaranteed that the edge is no longer critical.  If
+          // we only have a single successor, insert the counter in this block,
+          // otherwise insert it in the successor block.
+          if (TI->getNumSuccessors() == 1) {
+            // Insert counter at the start of the block
+            printEdgeCounter(edge,BB,i);
+            IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
+          } else {
+            // Insert counter at the start of the block
+            printEdgeCounter(edge,Succ,i);
+            IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted;
+          }
+          Initializer[i++] = (Zero);
+        } else {
+          Initializer[i++] = (Uncounted);
+        }
+      }
+    }
+  }
+
+  // Check if the number of edges counted at first was the number of edges we
+  // considered for instrumentation.
+  assert(i==NumEdges && "the number of edges in counting array is wrong");
+
+  // Assing the now completely defined initialiser to the array.
+  Constant *init = ConstantArray::get(ATy, Initializer);
+  Counters->setInitializer(init);
+
+  // Add the initialization call to main.
+  InsertProfilingInitCall(Main, "llvm_start_opt_edge_profiling", Counters);
+  return true;
+}
+
diff --git a/final/lib/Transforms/Instrumentation/PathProfiling.cpp b/final/lib/Transforms/Instrumentation/PathProfiling.cpp
new file mode 100644
index 00000000000..6449b39cfc9
--- /dev/null
+++ b/final/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -0,0 +1,1423 @@
+//===- PathProfiling.cpp - Inserts counters for path profiling ------------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass instruments functions for Ball-Larus path profiling.  Ball-Larus
+// profiling converts the CFG into a DAG by replacing backedges with edges
+// from entry to the start block and from the end block to exit.  The paths
+// along the new DAG are enumrated, i.e. each path is given a path number.
+// Edges are instrumented to increment the path number register, such that the
+// path number register will equal the path number of the path taken at the
+// exit.
+//
+// This file defines classes for building a CFG for use with different stages
+// in the Ball-Larus path profiling instrumentation [Ball96].  The
+// requirements are formatting the llvm CFG into the Ball-Larus DAG, path
+// numbering, finding a spanning tree, moving increments from the spanning
+// tree to chords.
+//
+// Terms:
+// DAG            - Directed Acyclic Graph.
+// Ball-Larus DAG - A CFG with an entry node, an exit node, and backedges
+//                  removed in the following manner.  For every backedge
+//                  v->w, insert edge ENTRY->w and edge v->EXIT.
+// Path Number    - The number corresponding to a specific path through a
+//                  Ball-Larus DAG.
+// Spanning Tree  - A subgraph, S, is a spanning tree if S covers all
+//                  vertices and is a tree.
+// Chord          - An edge not in the spanning tree.
+//
+// [Ball96]
+//  T. Ball and J. R. Larus. "Efficient Path Profiling."
+//  International Symposium on Microarchitecture, pages 46-57, 1996.
+//  http://portal.acm.org/citation.cfm?id=243857
+//
+// [Ball94]
+//  Thomas Ball.  "Efficiently Counting Program Events with Support for
+//  On-line queries."
+//  ACM Transactions on Programmmg Languages and Systems, Vol 16, No 5,
+//  September 1994, Pages 1399-1410.
+//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "insert-path-profiling"
+
+#include "llvm/DerivedTypes.h"
+#include "ProfilingUtils.h"
+#include "llvm/Analysis/PathNumbering.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include <map>
+#include <vector>
+
+#define HASH_THRESHHOLD 100000
+
+using namespace llvm;
+
+namespace {
+class BLInstrumentationNode;
+class BLInstrumentationEdge;
+class BLInstrumentationDag;
+
+// ---------------------------------------------------------------------------
+// BLInstrumentationNode extends BallLarusNode with member used by the
+// instrumentation algortihms.
+// ---------------------------------------------------------------------------
+class BLInstrumentationNode : public BallLarusNode {
+public:
+  // Creates a new BLInstrumentationNode from a BasicBlock.
+  BLInstrumentationNode(BasicBlock* BB);
+
+  // Get/sets the Value corresponding to the pathNumber register,
+  // constant or phinode.  Used by the instrumentation code to remember
+  // path number Values.
+  Value* getStartingPathNumber();
+  void setStartingPathNumber(Value* pathNumber);
+
+  Value* getEndingPathNumber();
+  void setEndingPathNumber(Value* pathNumber);
+
+  // Get/set the PHINode Instruction for this node.
+  PHINode* getPathPHI();
+  void setPathPHI(PHINode* pathPHI);
+
+private:
+
+  Value* _startingPathNumber; // The Value for the current pathNumber.
+  Value* _endingPathNumber; // The Value for the current pathNumber.
+  PHINode* _pathPHI; // The PHINode for current pathNumber.
+};
+
+// --------------------------------------------------------------------------
+// BLInstrumentationEdge extends BallLarusEdge with data about the
+// instrumentation that will end up on each edge.
+// --------------------------------------------------------------------------
+class BLInstrumentationEdge : public BallLarusEdge {
+public:
+  BLInstrumentationEdge(BLInstrumentationNode* source,
+                        BLInstrumentationNode* target);
+
+  // Sets the target node of this edge.  Required to split edges.
+  void setTarget(BallLarusNode* node);
+
+  // Get/set whether edge is in the spanning tree.
+  bool isInSpanningTree() const;
+  void setIsInSpanningTree(bool isInSpanningTree);
+
+  // Get/ set whether this edge will be instrumented with a path number
+  // initialization.
+  bool isInitialization() const;
+  void setIsInitialization(bool isInitialization);
+
+  // Get/set whether this edge will be instrumented with a path counter
+  // increment.  Notice this is incrementing the path counter
+  // corresponding to the path number register.  The path number
+  // increment is determined by getIncrement().
+  bool isCounterIncrement() const;
+  void setIsCounterIncrement(bool isCounterIncrement);
+
+  // Get/set the path number increment that this edge will be instrumented
+  // with.  This is distinct from the path counter increment and the
+  // weight.  The counter increment counts the number of executions of
+  // some path, whereas the path number keeps track of which path number
+  // the program is on.
+  long getIncrement() const;
+  void setIncrement(long increment);
+
+  // Get/set whether the edge has been instrumented.
+  bool hasInstrumentation();
+  void setHasInstrumentation(bool hasInstrumentation);
+
+  // Returns the successor number of this edge in the source.
+  unsigned getSuccessorNumber();
+
+private:
+  // The increment that the code will be instrumented with.
+  long long _increment;
+
+  // Whether this edge is in the spanning tree.
+  bool _isInSpanningTree;
+
+  // Whether this edge is an initialiation of the path number.
+  bool _isInitialization;
+
+  // Whether this edge is a path counter increment.
+  bool _isCounterIncrement;
+
+  // Whether this edge has been instrumented.
+  bool _hasInstrumentation;
+};
+
+// ---------------------------------------------------------------------------
+// BLInstrumentationDag extends BallLarusDag with algorithms that
+// determine where instrumentation should be placed.
+// ---------------------------------------------------------------------------
+class BLInstrumentationDag : public BallLarusDag {
+public:
+  BLInstrumentationDag(Function &F);
+
+  // Returns the Exit->Root edge. This edge is required for creating
+  // directed cycles in the algorithm for moving instrumentation off of
+  // the spanning tree
+  BallLarusEdge* getExitRootEdge();
+
+  // Returns an array of phony edges which mark those nodes
+  // with function calls
+  BLEdgeVector getCallPhonyEdges();
+
+  // Gets/sets the path counter array
+  GlobalVariable* getCounterArray();
+  void setCounterArray(GlobalVariable* c);
+
+  // Calculates the increments for the chords, thereby removing
+  // instrumentation from the spanning tree edges. Implementation is based
+  // on the algorithm in Figure 4 of [Ball94]
+  void calculateChordIncrements();
+
+  // Updates the state when an edge has been split
+  void splitUpdate(BLInstrumentationEdge* formerEdge, BasicBlock* newBlock);
+
+  // Calculates a spanning tree of the DAG ignoring cycles.  Whichever
+  // edges are in the spanning tree will not be instrumented, but this
+  // implementation does not try to minimize the instrumentation overhead
+  // by trying to find hot edges.
+  void calculateSpanningTree();
+
+  // Pushes initialization further down in order to group the first
+  // increment and initialization.
+  void pushInitialization();
+
+  // Pushes the path counter increments up in order to group the last path
+  // number increment.
+  void pushCounters();
+
+  // Removes phony edges from the successor list of the source, and the
+  // predecessor list of the target.
+  void unlinkPhony();
+
+  // Generate dot graph for the function
+  void generateDotGraph();
+
+protected:
+  // BLInstrumentationDag creates BLInstrumentationNode objects in this
+  // method overriding the creation of BallLarusNode objects.
+  //
+  // Allows subclasses to determine which type of Node is created.
+  // Override this method to produce subclasses of BallLarusNode if
+  // necessary.
+  virtual BallLarusNode* createNode(BasicBlock* BB);
+
+  // BLInstrumentationDag create BLInstrumentationEdges.
+  //
+  // Allows subclasses to determine which type of Edge is created.
+  // Override this method to produce subclasses of BallLarusEdge if
+  // necessary.  Parameters source and target will have been created by
+  // createNode and can be cast to the subclass of BallLarusNode*
+  // returned by createNode.
+  virtual BallLarusEdge* createEdge(
+    BallLarusNode* source, BallLarusNode* target, unsigned edgeNumber);
+
+private:
+  BLEdgeVector _treeEdges; // All edges in the spanning tree.
+  BLEdgeVector _chordEdges; // All edges not in the spanning tree.
+  GlobalVariable* _counterArray; // Array to store path counters
+
+  // Removes the edge from the appropriate predecessor and successor lists.
+  void unlinkEdge(BallLarusEdge* edge);
+
+  // Makes an edge part of the spanning tree.
+  void makeEdgeSpanning(BLInstrumentationEdge* edge);
+
+  // Pushes initialization and calls itself recursively.
+  void pushInitializationFromEdge(BLInstrumentationEdge* edge);
+
+  // Pushes path counter increments up recursively.
+  void pushCountersFromEdge(BLInstrumentationEdge* edge);
+
+  // Depth first algorithm for determining the chord increments.f
+  void calculateChordIncrementsDfs(
+    long weight, BallLarusNode* v, BallLarusEdge* e);
+
+  // Determines the relative direction of two edges.
+  int calculateChordIncrementsDir(BallLarusEdge* e, BallLarusEdge* f);
+};
+
+// ---------------------------------------------------------------------------
+// PathProfiler is a module pass which intruments path profiling instructions
+// ---------------------------------------------------------------------------
+class PathProfiler : public ModulePass {
+private:
+  // Current context for multi threading support.
+  LLVMContext* Context;
+
+  // Which function are we currently instrumenting
+  unsigned currentFunctionNumber;
+
+  // The function prototype in the profiling runtime for incrementing a
+  // single path counter in a hash table.
+  Constant* llvmIncrementHashFunction;
+  Constant* llvmDecrementHashFunction;
+
+  // Instruments each function with path profiling.  'main' is instrumented
+  // with code to save the profile to disk.
+  bool runOnModule(Module &M);
+
+  // Analyzes the function for Ball-Larus path profiling, and inserts code.
+  void runOnFunction(std::vector<Constant*> &ftInit, Function &F, Module &M);
+
+  // Creates an increment constant representing incr.
+  ConstantInt* createIncrementConstant(long incr, int bitsize);
+
+  // Creates an increment constant representing the value in
+  // edge->getIncrement().
+  ConstantInt* createIncrementConstant(BLInstrumentationEdge* edge);
+
+  // Finds the insertion point after pathNumber in block.  PathNumber may
+  // be NULL.
+  BasicBlock::iterator getInsertionPoint(
+    BasicBlock* block, Value* pathNumber);
+
+  // Inserts source's pathNumber Value* into target.  Target may or may not
+  // have multiple predecessors, and may or may not have its phiNode
+  // initalized.
+  void pushValueIntoNode(
+    BLInstrumentationNode* source, BLInstrumentationNode* target);
+
+  // Inserts source's pathNumber Value* into the appropriate slot of
+  // target's phiNode.
+  void pushValueIntoPHI(
+    BLInstrumentationNode* target, BLInstrumentationNode* source);
+
+  // The Value* in node, oldVal,  is updated with a Value* correspodning to
+  // oldVal + addition.
+  void insertNumberIncrement(BLInstrumentationNode* node, Value* addition,
+                             bool atBeginning);
+
+  // Creates a counter increment in the given node.  The Value* in node is
+  // taken as the index into a hash table.
+  void insertCounterIncrement(
+    Value* incValue,
+    BasicBlock::iterator insertPoint,
+    BLInstrumentationDag* dag,
+    bool increment = true);
+
+  // A PHINode is created in the node, and its values initialized to -1U.
+  void preparePHI(BLInstrumentationNode* node);
+
+  // Inserts instrumentation for the given edge
+  //
+  // Pre: The edge's source node has pathNumber set if edge is non zero
+  // path number increment.
+  //
+  // Post: Edge's target node has a pathNumber set to the path number Value
+  // corresponding to the value of the path register after edge's
+  // execution.
+  void insertInstrumentationStartingAt(
+    BLInstrumentationEdge* edge,
+    BLInstrumentationDag* dag);
+
+  // If this edge is a critical edge, then inserts a node at this edge.
+  // This edge becomes the first edge, and a new BallLarusEdge is created.
+  bool splitCritical(BLInstrumentationEdge* edge, BLInstrumentationDag* dag);
+
+  // Inserts instrumentation according to the marked edges in dag.  Phony
+  // edges must be unlinked from the DAG, but accessible from the
+  // backedges.  Dag must have initializations, path number increments, and
+  // counter increments present.
+  //
+  // Counter storage is created here.
+  void insertInstrumentation( BLInstrumentationDag& dag, Module &M);
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  PathProfiler() : ModulePass(ID) {
+    initializePathProfilerPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual const char *getPassName() const {
+    return "Path Profiler";
+  }
+};
+} // end anonymous namespace
+
+// Should we print the dot-graphs
+static cl::opt<bool> DotPathDag("path-profile-pathdag", cl::Hidden,
+        cl::desc("Output the path profiling DAG for each function."));
+
+// Register the path profiler as a pass
+char PathProfiler::ID = 0;
+INITIALIZE_PASS(PathProfiler, "insert-path-profiling",
+                "Insert instrumentation for Ball-Larus path profiling",
+                false, false)
+
+ModulePass *llvm::createPathProfilerPass() { return new PathProfiler(); }
+
+namespace llvm {
+  class PathProfilingFunctionTable {};
+
+  // Type for global array storing references to hashes or arrays
+  template<bool xcompile> class TypeBuilder<PathProfilingFunctionTable,
+                                            xcompile> {
+  public:
+    static const StructType *get(LLVMContext& C) {
+      return( StructType::get(
+                C, TypeBuilder<types::i<32>, xcompile>::get(C), // type
+                TypeBuilder<types::i<32>, xcompile>::get(C), // array size
+                TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr
+                NULL));
+    }
+  };
+
+  typedef TypeBuilder<PathProfilingFunctionTable, true>
+  ftEntryTypeBuilder;
+
+  // BallLarusEdge << operator overloading
+  raw_ostream& operator<<(raw_ostream& os,
+                          const BLInstrumentationEdge& edge) {
+    os << "[" << edge.getSource()->getName() << " -> "
+       << edge.getTarget()->getName() << "] init: "
+       << (edge.isInitialization() ? "yes" : "no")
+       << " incr:" << edge.getIncrement() << " cinc: "
+       << (edge.isCounterIncrement() ? "yes" : "no");
+    return(os);
+  }
+}
+
+// Creates a new BLInstrumentationNode from a BasicBlock.
+BLInstrumentationNode::BLInstrumentationNode(BasicBlock* BB) :
+  BallLarusNode(BB),
+  _startingPathNumber(NULL), _endingPathNumber(NULL), _pathPHI(NULL) {}
+
+// Constructor for BLInstrumentationEdge.
+BLInstrumentationEdge::BLInstrumentationEdge(BLInstrumentationNode* source,
+                                             BLInstrumentationNode* target)
+  : BallLarusEdge(source, target, 0),
+    _increment(0), _isInSpanningTree(false), _isInitialization(false),
+    _isCounterIncrement(false), _hasInstrumentation(false) {}
+
+// Sets the target node of this edge.  Required to split edges.
+void BLInstrumentationEdge::setTarget(BallLarusNode* node) {
+  _target = node;
+}
+
+// Returns whether this edge is in the spanning tree.
+bool BLInstrumentationEdge::isInSpanningTree() const {
+  return(_isInSpanningTree);
+}
+
+// Sets whether this edge is in the spanning tree.
+void BLInstrumentationEdge::setIsInSpanningTree(bool isInSpanningTree) {
+  _isInSpanningTree = isInSpanningTree;
+}
+
+// Returns whether this edge will be instrumented with a path number
+// initialization.
+bool BLInstrumentationEdge::isInitialization() const {
+  return(_isInitialization);
+}
+
+// Sets whether this edge will be instrumented with a path number
+// initialization.
+void BLInstrumentationEdge::setIsInitialization(bool isInitialization) {
+  _isInitialization = isInitialization;
+}
+
+// Returns whether this edge will be instrumented with a path counter
+// increment.  Notice this is incrementing the path counter
+// corresponding to the path number register.  The path number
+// increment is determined by getIncrement().
+bool BLInstrumentationEdge::isCounterIncrement() const {
+  return(_isCounterIncrement);
+}
+
+// Sets whether this edge will be instrumented with a path counter
+// increment.
+void BLInstrumentationEdge::setIsCounterIncrement(bool isCounterIncrement) {
+  _isCounterIncrement = isCounterIncrement;
+}
+
+// Gets the path number increment that this edge will be instrumented
+// with.  This is distinct from the path counter increment and the
+// weight.  The counter increment is counts the number of executions of
+// some path, whereas the path number keeps track of which path number
+// the program is on.
+long BLInstrumentationEdge::getIncrement() const {
+  return(_increment);
+}
+
+// Set whether this edge will be instrumented with a path number
+// increment.
+void BLInstrumentationEdge::setIncrement(long increment) {
+  _increment = increment;
+}
+
+// True iff the edge has already been instrumented.
+bool BLInstrumentationEdge::hasInstrumentation() {
+  return(_hasInstrumentation);
+}
+
+// Set whether this edge has been instrumented.
+void BLInstrumentationEdge::setHasInstrumentation(bool hasInstrumentation) {
+  _hasInstrumentation = hasInstrumentation;
+}
+
+// Returns the successor number of this edge in the source.
+unsigned BLInstrumentationEdge::getSuccessorNumber() {
+  BallLarusNode* sourceNode = getSource();
+  BallLarusNode* targetNode = getTarget();
+  BasicBlock* source = sourceNode->getBlock();
+  BasicBlock* target = targetNode->getBlock();
+
+  if(source == NULL || target == NULL)
+    return(0);
+
+  TerminatorInst* terminator = source->getTerminator();
+
+        unsigned i;
+  for(i=0; i < terminator->getNumSuccessors(); i++) {
+    if(terminator->getSuccessor(i) == target)
+      break;
+  }
+
+  return(i);
+}
+
+// BLInstrumentationDag constructor initializes a DAG for the given Function.
+BLInstrumentationDag::BLInstrumentationDag(Function &F) : BallLarusDag(F),
+                                                          _counterArray(0) {
+}
+
+// Returns the Exit->Root edge. This edge is required for creating
+// directed cycles in the algorithm for moving instrumentation off of
+// the spanning tree
+BallLarusEdge* BLInstrumentationDag::getExitRootEdge() {
+  BLEdgeIterator erEdge = getExit()->succBegin();
+  return(*erEdge);
+}
+
+BLEdgeVector BLInstrumentationDag::getCallPhonyEdges () {
+  BLEdgeVector callEdges;
+
+  for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+       edge != end; edge++ ) {
+    if( (*edge)->getType() == BallLarusEdge::CALLEDGE_PHONY )
+      callEdges.push_back(*edge);
+  }
+
+  return callEdges;
+}
+
+// Gets the path counter array
+GlobalVariable* BLInstrumentationDag::getCounterArray() {
+  return _counterArray;
+}
+
+void BLInstrumentationDag::setCounterArray(GlobalVariable* c) {
+  _counterArray = c;
+}
+
+// Calculates the increment for the chords, thereby removing
+// instrumentation from the spanning tree edges. Implementation is based on
+// the algorithm in Figure 4 of [Ball94]
+void BLInstrumentationDag::calculateChordIncrements() {
+  calculateChordIncrementsDfs(0, getRoot(), NULL);
+
+  BLInstrumentationEdge* chord;
+  for(BLEdgeIterator chordEdge = _chordEdges.begin(),
+      end = _chordEdges.end(); chordEdge != end; chordEdge++) {
+    chord = (BLInstrumentationEdge*) *chordEdge;
+    chord->setIncrement(chord->getIncrement() + chord->getWeight());
+  }
+}
+
+// Updates the state when an edge has been split
+void BLInstrumentationDag::splitUpdate(BLInstrumentationEdge* formerEdge,
+                                       BasicBlock* newBlock) {
+  BallLarusNode* oldTarget = formerEdge->getTarget();
+  BallLarusNode* newNode = addNode(newBlock);
+  formerEdge->setTarget(newNode);
+  newNode->addPredEdge(formerEdge);
+
+  DEBUG(dbgs() << "  Edge split: " << *formerEdge << "\n");
+
+  oldTarget->removePredEdge(formerEdge);
+  BallLarusEdge* newEdge = addEdge(newNode, oldTarget,0);
+
+  if( formerEdge->getType() == BallLarusEdge::BACKEDGE ||
+                        formerEdge->getType() == BallLarusEdge::SPLITEDGE) {
+                newEdge->setType(formerEdge->getType());
+    newEdge->setPhonyRoot(formerEdge->getPhonyRoot());
+    newEdge->setPhonyExit(formerEdge->getPhonyExit());
+    formerEdge->setType(BallLarusEdge::NORMAL);
+                formerEdge->setPhonyRoot(NULL);
+    formerEdge->setPhonyExit(NULL);
+  }
+}
+
+// Calculates a spanning tree of the DAG ignoring cycles.  Whichever
+// edges are in the spanning tree will not be instrumented, but this
+// implementation does not try to minimize the instrumentation overhead
+// by trying to find hot edges.
+void BLInstrumentationDag::calculateSpanningTree() {
+  std::stack<BallLarusNode*> dfsStack;
+
+  for(BLNodeIterator nodeIt = _nodes.begin(), end = _nodes.end();
+      nodeIt != end; nodeIt++) {
+    (*nodeIt)->setColor(BallLarusNode::WHITE);
+  }
+
+  dfsStack.push(getRoot());
+  while(dfsStack.size() > 0) {
+    BallLarusNode* node = dfsStack.top();
+    dfsStack.pop();
+
+    if(node->getColor() == BallLarusNode::WHITE)
+      continue;
+
+    BallLarusNode* nextNode;
+    bool forward = true;
+    BLEdgeIterator succEnd = node->succEnd();
+
+    node->setColor(BallLarusNode::WHITE);
+    // first iterate over successors then predecessors
+    for(BLEdgeIterator edge = node->succBegin(), predEnd = node->predEnd();
+        edge != predEnd; edge++) {
+      if(edge == succEnd) {
+        edge = node->predBegin();
+        forward = false;
+      }
+
+      // Ignore split edges
+      if ((*edge)->getType() == BallLarusEdge::SPLITEDGE)
+        continue;
+
+      nextNode = forward? (*edge)->getTarget(): (*edge)->getSource();
+      if(nextNode->getColor() != BallLarusNode::WHITE) {
+        nextNode->setColor(BallLarusNode::WHITE);
+        makeEdgeSpanning((BLInstrumentationEdge*)(*edge));
+      }
+    }
+  }
+
+  for(BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+      edge != end; edge++) {
+    BLInstrumentationEdge* instEdge = (BLInstrumentationEdge*) (*edge);
+      // safe since createEdge is overriden
+    if(!instEdge->isInSpanningTree() && (*edge)->getType()
+        != BallLarusEdge::SPLITEDGE)
+      _chordEdges.push_back(instEdge);
+  }
+}
+
+// Pushes initialization further down in order to group the first
+// increment and initialization.
+void BLInstrumentationDag::pushInitialization() {
+  BLInstrumentationEdge* exitRootEdge =
+                (BLInstrumentationEdge*) getExitRootEdge();
+  exitRootEdge->setIsInitialization(true);
+  pushInitializationFromEdge(exitRootEdge);
+}
+
+// Pushes the path counter increments up in order to group the last path
+// number increment.
+void BLInstrumentationDag::pushCounters() {
+  BLInstrumentationEdge* exitRootEdge =
+    (BLInstrumentationEdge*) getExitRootEdge();
+  exitRootEdge->setIsCounterIncrement(true);
+  pushCountersFromEdge(exitRootEdge);
+}
+
+// Removes phony edges from the successor list of the source, and the
+// predecessor list of the target.
+void BLInstrumentationDag::unlinkPhony() {
+  BallLarusEdge* edge;
+
+  for(BLEdgeIterator next = _edges.begin(),
+      end = _edges.end(); next != end; next++) {
+    edge = (*next);
+
+    if( edge->getType() == BallLarusEdge::BACKEDGE_PHONY ||
+        edge->getType() == BallLarusEdge::SPLITEDGE_PHONY ||
+        edge->getType() == BallLarusEdge::CALLEDGE_PHONY ) {
+      unlinkEdge(edge);
+    }
+  }
+}
+
+// Generate a .dot graph to represent the DAG and pathNumbers
+void BLInstrumentationDag::generateDotGraph() {
+  std::string errorInfo;
+  std::string functionName = getFunction().getNameStr();
+  std::string filename = "pathdag." + functionName + ".dot";
+
+  DEBUG (dbgs() << "Writing '" << filename << "'...\n");
+  raw_fd_ostream dotFile(filename.c_str(), errorInfo);
+
+  if (!errorInfo.empty()) {
+    errs() << "Error opening '" << filename.c_str() <<"' for writing!";
+    errs() << "\n";
+    return;
+  }
+
+  dotFile << "digraph " << functionName << " {\n";
+
+  for( BLEdgeIterator edge = _edges.begin(), end = _edges.end();
+       edge != end; edge++) {
+    std::string sourceName = (*edge)->getSource()->getName();
+    std::string targetName = (*edge)->getTarget()->getName();
+
+    dotFile << "\t\"" << sourceName.c_str() << "\" -> \""
+            << targetName.c_str() << "\" ";
+
+    long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
+
+    switch( (*edge)->getType() ) {
+    case BallLarusEdge::NORMAL:
+      dotFile << "[label=" << inc << "] [color=black];\n";
+      break;
+
+    case BallLarusEdge::BACKEDGE:
+      dotFile << "[color=cyan];\n";
+      break;
+
+    case BallLarusEdge::BACKEDGE_PHONY:
+      dotFile << "[label=" << inc
+              << "] [color=blue];\n";
+      break;
+
+    case BallLarusEdge::SPLITEDGE:
+      dotFile << "[color=violet];\n";
+      break;
+
+    case BallLarusEdge::SPLITEDGE_PHONY:
+      dotFile << "[label=" << inc << "] [color=red];\n";
+      break;
+
+    case BallLarusEdge::CALLEDGE_PHONY:
+      dotFile << "[label=" << inc     << "] [color=green];\n";
+      break;
+    }
+  }
+
+  dotFile << "}\n";
+}
+
+// Allows subclasses to determine which type of Node is created.
+// Override this method to produce subclasses of BallLarusNode if
+// necessary. The destructor of BallLarusDag will call free on each pointer
+// created.
+BallLarusNode* BLInstrumentationDag::createNode(BasicBlock* BB) {
+  return( new BLInstrumentationNode(BB) );
+}
+
+// Allows subclasses to determine which type of Edge is created.
+// Override this method to produce subclasses of BallLarusEdge if
+// necessary. The destructor of BallLarusDag will call free on each pointer
+// created.
+BallLarusEdge* BLInstrumentationDag::createEdge(BallLarusNode* source,
+                                                BallLarusNode* target, unsigned edgeNumber) {
+  // One can cast from BallLarusNode to BLInstrumentationNode since createNode
+  // is overriden to produce BLInstrumentationNode.
+  return( new BLInstrumentationEdge((BLInstrumentationNode*)source,
+                                    (BLInstrumentationNode*)target) );
+}
+
+// Sets the Value corresponding to the pathNumber register, constant,
+// or phinode.  Used by the instrumentation code to remember path
+// number Values.
+Value* BLInstrumentationNode::getStartingPathNumber(){
+  return(_startingPathNumber);
+}
+
+// Sets the Value of the pathNumber.  Used by the instrumentation code.
+void BLInstrumentationNode::setStartingPathNumber(Value* pathNumber) {
+  DEBUG(dbgs() << "  SPN-" << getName() << " <-- " << (pathNumber ?
+                                                       pathNumber->getNameStr() : "unused") << "\n");
+  _startingPathNumber = pathNumber;
+}
+
+Value* BLInstrumentationNode::getEndingPathNumber(){
+  return(_endingPathNumber);
+}
+
+void BLInstrumentationNode::setEndingPathNumber(Value* pathNumber) {
+  DEBUG(dbgs() << "  EPN-" << getName() << " <-- "
+        << (pathNumber ? pathNumber->getNameStr() : "unused") << "\n");
+  _endingPathNumber = pathNumber;
+}
+
+// Get the PHINode Instruction for this node.  Used by instrumentation
+// code.
+PHINode* BLInstrumentationNode::getPathPHI() {
+  return(_pathPHI);
+}
+
+// Set the PHINode Instruction for this node.  Used by instrumentation
+// code.
+void BLInstrumentationNode::setPathPHI(PHINode* pathPHI) {
+  _pathPHI = pathPHI;
+}
+
+// Removes the edge from the appropriate predecessor and successor
+// lists.
+void BLInstrumentationDag::unlinkEdge(BallLarusEdge* edge) {
+  if(edge == getExitRootEdge())
+    DEBUG(dbgs() << " Removing exit->root edge\n");
+
+  edge->getSource()->removeSuccEdge(edge);
+  edge->getTarget()->removePredEdge(edge);
+}
+
+// Makes an edge part of the spanning tree.
+void BLInstrumentationDag::makeEdgeSpanning(BLInstrumentationEdge* edge) {
+  edge->setIsInSpanningTree(true);
+  _treeEdges.push_back(edge);
+}
+
+// Pushes initialization and calls itself recursively.
+void BLInstrumentationDag::pushInitializationFromEdge(
+  BLInstrumentationEdge* edge) {
+  BallLarusNode* target;
+
+  target = edge->getTarget();
+  if( target->getNumberPredEdges() > 1 || target == getExit() ) {
+    return;
+  } else {
+    for(BLEdgeIterator next = target->succBegin(),
+          end = target->succEnd(); next != end; next++) {
+      BLInstrumentationEdge* intoEdge = (BLInstrumentationEdge*) *next;
+
+      // Skip split edges
+      if (intoEdge->getType() == BallLarusEdge::SPLITEDGE)
+        continue;
+
+      intoEdge->setIncrement(intoEdge->getIncrement() +
+                             edge->getIncrement());
+      intoEdge->setIsInitialization(true);
+      pushInitializationFromEdge(intoEdge);
+    }
+
+    edge->setIncrement(0);
+    edge->setIsInitialization(false);
+  }
+}
+
+// Pushes path counter increments up recursively.
+void BLInstrumentationDag::pushCountersFromEdge(BLInstrumentationEdge* edge) {
+  BallLarusNode* source;
+
+  source = edge->getSource();
+  if(source->getNumberSuccEdges() > 1 || source == getRoot()
+     || edge->isInitialization()) {
+    return;
+  } else {
+    for(BLEdgeIterator previous = source->predBegin(),
+          end = source->predEnd(); previous != end; previous++) {
+      BLInstrumentationEdge* fromEdge = (BLInstrumentationEdge*) *previous;
+
+      // Skip split edges
+      if (fromEdge->getType() == BallLarusEdge::SPLITEDGE)
+        continue;
+
+      fromEdge->setIncrement(fromEdge->getIncrement() +
+                             edge->getIncrement());
+      fromEdge->setIsCounterIncrement(true);
+      pushCountersFromEdge(fromEdge);
+    }
+
+    edge->setIncrement(0);
+    edge->setIsCounterIncrement(false);
+  }
+}
+
+// Depth first algorithm for determining the chord increments.
+void BLInstrumentationDag::calculateChordIncrementsDfs(long weight,
+                                                       BallLarusNode* v, BallLarusEdge* e) {
+  BLInstrumentationEdge* f;
+
+  for(BLEdgeIterator treeEdge = _treeEdges.begin(),
+        end = _treeEdges.end(); treeEdge != end; treeEdge++) {
+    f = (BLInstrumentationEdge*) *treeEdge;
+    if(e != f && v == f->getTarget()) {
+      calculateChordIncrementsDfs(
+        calculateChordIncrementsDir(e,f)*(weight) +
+        f->getWeight(), f->getSource(), f);
+    }
+    if(e != f && v == f->getSource()) {
+      calculateChordIncrementsDfs(
+        calculateChordIncrementsDir(e,f)*(weight) +
+        f->getWeight(), f->getTarget(), f);
+    }
+  }
+
+  for(BLEdgeIterator chordEdge = _chordEdges.begin(),
+        end = _chordEdges.end(); chordEdge != end; chordEdge++) {
+    f = (BLInstrumentationEdge*) *chordEdge;
+    if(v == f->getSource() || v == f->getTarget()) {
+      f->setIncrement(f->getIncrement() +
+                      calculateChordIncrementsDir(e,f)*weight);
+    }
+  }
+}
+
+// Determines the relative direction of two edges.
+int BLInstrumentationDag::calculateChordIncrementsDir(BallLarusEdge* e,
+                                                      BallLarusEdge* f) {
+  if( e == NULL)
+    return(1);
+  else if(e->getSource() == f->getTarget()
+          || e->getTarget() == f->getSource())
+    return(1);
+
+  return(-1);
+}
+
+// Creates an increment constant representing incr.
+ConstantInt* PathProfiler::createIncrementConstant(long incr,
+                                                   int bitsize) {
+  return(ConstantInt::get(IntegerType::get(*Context, 32), incr));
+}
+
+// Creates an increment constant representing the value in
+// edge->getIncrement().
+ConstantInt* PathProfiler::createIncrementConstant(
+  BLInstrumentationEdge* edge) {
+  return(createIncrementConstant(edge->getIncrement(), 32));
+}
+
+// Finds the insertion point after pathNumber in block.  PathNumber may
+// be NULL.
+BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value*
+                                                     pathNumber) {
+  if(pathNumber == NULL || isa<ConstantInt>(pathNumber)
+     || (((Instruction*)(pathNumber))->getParent()) != block) {
+    return(block->getFirstNonPHI());
+  } else {
+    Instruction* pathNumberInst = (Instruction*) (pathNumber);
+    BasicBlock::iterator insertPoint;
+    BasicBlock::iterator end = block->end();
+
+    for(insertPoint = block->begin();
+        insertPoint != end; insertPoint++) {
+      Instruction* insertInst = &(*insertPoint);
+
+      if(insertInst == pathNumberInst)
+        return(++insertPoint);
+    }
+
+    return(insertPoint);
+  }
+}
+
+// A PHINode is created in the node, and its values initialized to -1U.
+void PathProfiler::preparePHI(BLInstrumentationNode* node) {
+  BasicBlock* block = node->getBlock();
+  BasicBlock::iterator insertPoint = block->getFirstNonPHI();
+  PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context), "pathNumber",
+                                 insertPoint );
+  node->setPathPHI(phi);
+  node->setStartingPathNumber(phi);
+  node->setEndingPathNumber(phi);
+
+  for(pred_iterator predIt = pred_begin(node->getBlock()),
+        end = pred_end(node->getBlock()); predIt != end; predIt++) {
+    BasicBlock* pred = (*predIt);
+
+    if(pred != NULL)
+      phi->addIncoming(createIncrementConstant((long)-1, 32), pred);
+  }
+}
+
+// Inserts source's pathNumber Value* into target.  Target may or may not
+// have multiple predecessors, and may or may not have its phiNode
+// initalized.
+void PathProfiler::pushValueIntoNode(BLInstrumentationNode* source,
+                                     BLInstrumentationNode* target) {
+  if(target->getBlock() == NULL)
+    return;
+
+
+  if(target->getNumberPredEdges() <= 1) {
+    assert(target->getStartingPathNumber() == NULL &&
+           "Target already has path number");
+    target->setStartingPathNumber(source->getEndingPathNumber());
+    target->setEndingPathNumber(source->getEndingPathNumber());
+    DEBUG(dbgs() << "  Passing path number"
+          << (source->getEndingPathNumber() ? "" : " (null)")
+          << " value through.\n");
+  } else {
+    if(target->getPathPHI() == NULL) {
+      DEBUG(dbgs() << "  Initializing PHI node for block '"
+            << target->getName() << "'\n");
+      preparePHI(target);
+    }
+    pushValueIntoPHI(target, source);
+    DEBUG(dbgs() << "  Passing number value into PHI for block '"
+          << target->getName() << "'\n");
+  }
+}
+
+// Inserts source's pathNumber Value* into the appropriate slot of
+// target's phiNode.
+void PathProfiler::pushValueIntoPHI(BLInstrumentationNode* target,
+                                    BLInstrumentationNode* source) {
+  PHINode* phi = target->getPathPHI();
+  assert(phi != NULL && "  Tried to push value into node with PHI, but node"
+         " actually had no PHI.");
+  phi->removeIncomingValue(source->getBlock(), false);
+  phi->addIncoming(source->getEndingPathNumber(), source->getBlock());
+}
+
+// The Value* in node, oldVal,  is updated with a Value* correspodning to
+// oldVal + addition.
+void PathProfiler::insertNumberIncrement(BLInstrumentationNode* node,
+                                         Value* addition, bool atBeginning) {
+  BasicBlock* block = node->getBlock();
+  assert(node->getStartingPathNumber() != NULL);
+  assert(node->getEndingPathNumber() != NULL);
+
+  BasicBlock::iterator insertPoint;
+
+  if( atBeginning )
+    insertPoint = block->getFirstNonPHI();
+  else
+    insertPoint = block->getTerminator();
+
+  DEBUG(errs() << "  Creating addition instruction.\n");
+  Value* newpn = BinaryOperator::Create(Instruction::Add,
+                                        node->getStartingPathNumber(),
+                                        addition, "pathNumber", insertPoint);
+
+  node->setEndingPathNumber(newpn);
+
+  if( atBeginning )
+    node->setStartingPathNumber(newpn);
+}
+
+// Creates a counter increment in the given node.  The Value* in node is
+// taken as the index into an array or hash table.  The hash table access
+// is a call to the runtime.
+void PathProfiler::insertCounterIncrement(Value* incValue,
+                                          BasicBlock::iterator insertPoint,
+                                          BLInstrumentationDag* dag,
+                                          bool increment) {
+  // Counter increment for array
+  if( dag->getNumberOfPaths() <= HASH_THRESHHOLD ) {
+    // Get pointer to the array location
+    std::vector<Value*> gepIndices(2);
+    gepIndices[0] = Constant::getNullValue(Type::getInt32Ty(*Context));
+    gepIndices[1] = incValue;
+
+    GetElementPtrInst* pcPointer =
+      GetElementPtrInst::Create(dag->getCounterArray(),
+                                gepIndices.begin(), gepIndices.end(),
+                                "counterInc", insertPoint);
+
+    // Load from the array - call it oldPC
+    LoadInst* oldPc = new LoadInst(pcPointer, "oldPC", insertPoint);
+
+    // Test to see whether adding 1 will overflow the counter
+    ICmpInst* isMax = new ICmpInst(insertPoint, CmpInst::ICMP_ULT, oldPc,
+                                   createIncrementConstant(0xffffffff, 32),
+                                   "isMax");
+
+    // Select increment for the path counter based on overflow
+    SelectInst* inc =
+      SelectInst::Create( isMax, createIncrementConstant(increment?1:-1,32),
+                          createIncrementConstant(0,32),
+                          "pathInc", insertPoint);
+
+    // newPc = oldPc + inc
+    BinaryOperator* newPc = BinaryOperator::Create(Instruction::Add,
+                                                   oldPc, inc, "newPC",
+                                                   insertPoint);
+
+    // Store back in to the array
+    new StoreInst(newPc, pcPointer, insertPoint);
+  } else { // Counter increment for hash
+    std::vector<Value*> args(2);
+    args[0] = ConstantInt::get(Type::getInt32Ty(*Context),
+                               currentFunctionNumber);
+    args[1] = incValue;
+
+    CallInst::Create(
+      increment ? llvmIncrementHashFunction : llvmDecrementHashFunction,
+      args.begin(), args.end(), "", insertPoint);
+  }
+}
+
+// Inserts instrumentation for the given edge
+//
+// Pre: The edge's source node has pathNumber set if edge is non zero
+// path number increment.
+//
+// Post: Edge's target node has a pathNumber set to the path number Value
+// corresponding to the value of the path register after edge's
+// execution.
+//
+// FIXME: This should be reworked so it's not recursive.
+void PathProfiler::insertInstrumentationStartingAt(BLInstrumentationEdge* edge,
+                                                   BLInstrumentationDag* dag) {
+  // Mark the edge as instrumented
+  edge->setHasInstrumentation(true);
+  DEBUG(dbgs() << "\nInstrumenting edge: " << (*edge) << "\n");
+
+  // create a new node for this edge's instrumentation
+  splitCritical(edge, dag);
+
+  BLInstrumentationNode* sourceNode = (BLInstrumentationNode*)edge->getSource();
+  BLInstrumentationNode* targetNode = (BLInstrumentationNode*)edge->getTarget();
+  BLInstrumentationNode* instrumentNode;
+  BLInstrumentationNode* nextSourceNode;
+
+  bool atBeginning = false;
+
+  // Source node has only 1 successor so any information can be simply
+  // inserted in to it without splitting
+  if( sourceNode->getBlock() && sourceNode->getNumberSuccEdges() <= 1) {
+    DEBUG(dbgs() << "  Potential instructions to be placed in: "
+          << sourceNode->getName() << " (at end)\n");
+    instrumentNode = sourceNode;
+    nextSourceNode = targetNode; // ... since we never made any new nodes
+  }
+
+  // The target node only has one predecessor, so we can safely insert edge
+  // instrumentation into it. If there was splitting, it must have been
+  // successful.
+  else if( targetNode->getNumberPredEdges() == 1 ) {
+    DEBUG(dbgs() << "  Potential instructions to be placed in: "
+          << targetNode->getName() << " (at beginning)\n");
+    pushValueIntoNode(sourceNode, targetNode);
+    instrumentNode = targetNode;
+    nextSourceNode = NULL; // ... otherwise we'll just keep splitting
+    atBeginning = true;
+  }
+
+  // Somehow, splitting must have failed.
+  else {
+    errs() << "Instrumenting could not split a critical edge.\n";
+    DEBUG(dbgs() << "  Couldn't split edge " << (*edge) << ".\n");
+    return;
+  }
+
+  // Insert instrumentation if this is a back or split edge
+  if( edge->getType() == BallLarusEdge::BACKEDGE ||
+      edge->getType() == BallLarusEdge::SPLITEDGE ) {
+    BLInstrumentationEdge* top =
+      (BLInstrumentationEdge*) edge->getPhonyRoot();
+    BLInstrumentationEdge* bottom =
+      (BLInstrumentationEdge*) edge->getPhonyExit();
+
+    assert( top->isInitialization() && " Top phony edge did not"
+            " contain a path number initialization.");
+    assert( bottom->isCounterIncrement() && " Bottom phony edge"
+            " did not contain a path counter increment.");
+
+    // split edge has yet to be initialized
+    if( !instrumentNode->getEndingPathNumber() ) {
+      instrumentNode->setStartingPathNumber(createIncrementConstant(0,32));
+      instrumentNode->setEndingPathNumber(createIncrementConstant(0,32));
+    }
+
+    BasicBlock::iterator insertPoint = atBeginning ?
+      instrumentNode->getBlock()->getFirstNonPHI() :
+      instrumentNode->getBlock()->getTerminator();
+
+    // add information from the bottom edge, if it exists
+    if( bottom->getIncrement() ) {
+      Value* newpn =
+        BinaryOperator::Create(Instruction::Add,
+                               instrumentNode->getStartingPathNumber(),
+                               createIncrementConstant(bottom),
+                               "pathNumber", insertPoint);
+      instrumentNode->setEndingPathNumber(newpn);
+    }
+
+    insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+                           insertPoint, dag);
+
+    if( atBeginning )
+      instrumentNode->setStartingPathNumber(createIncrementConstant(top));
+
+    instrumentNode->setEndingPathNumber(createIncrementConstant(top));
+
+    // Check for path counter increments
+    if( top->isCounterIncrement() ) {
+      insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+                             instrumentNode->getBlock()->getTerminator(),dag);
+      instrumentNode->setEndingPathNumber(0);
+    }
+  }
+
+  // Insert instrumentation if this is a normal edge
+  else {
+    BasicBlock::iterator insertPoint = atBeginning ?
+      instrumentNode->getBlock()->getFirstNonPHI() :
+      instrumentNode->getBlock()->getTerminator();
+
+    if( edge->isInitialization() ) { // initialize path number
+      instrumentNode->setEndingPathNumber(createIncrementConstant(edge));
+    } else if( edge->getIncrement() )       {// increment path number
+      Value* newpn =
+        BinaryOperator::Create(Instruction::Add,
+                               instrumentNode->getStartingPathNumber(),
+                               createIncrementConstant(edge),
+                               "pathNumber", insertPoint);
+      instrumentNode->setEndingPathNumber(newpn);
+
+      if( atBeginning )
+        instrumentNode->setStartingPathNumber(newpn);
+    }
+
+    // Check for path counter increments
+    if( edge->isCounterIncrement() ) {
+      insertCounterIncrement(instrumentNode->getEndingPathNumber(),
+                             insertPoint, dag);
+      instrumentNode->setEndingPathNumber(0);
+    }
+  }
+
+  // Push it along
+  if (nextSourceNode && instrumentNode->getEndingPathNumber())
+    pushValueIntoNode(instrumentNode, nextSourceNode);
+
+  // Add all the successors
+  for( BLEdgeIterator next = targetNode->succBegin(),
+         end = targetNode->succEnd(); next != end; next++ ) {
+    // So long as it is un-instrumented, add it to the list
+    if( !((BLInstrumentationEdge*)(*next))->hasInstrumentation() )
+      insertInstrumentationStartingAt((BLInstrumentationEdge*)*next,dag);
+    else
+      DEBUG(dbgs() << "  Edge " << *(BLInstrumentationEdge*)(*next)
+            << " already instrumented.\n");
+  }
+}
+
+// Inserts instrumentation according to the marked edges in dag.  Phony edges
+// must be unlinked from the DAG, but accessible from the backedges.  Dag
+// must have initializations, path number increments, and counter increments
+// present.
+//
+// Counter storage is created here.
+void PathProfiler::insertInstrumentation(
+  BLInstrumentationDag& dag, Module &M) {
+
+  BLInstrumentationEdge* exitRootEdge =
+    (BLInstrumentationEdge*) dag.getExitRootEdge();
+  insertInstrumentationStartingAt(exitRootEdge, &dag);
+
+  // Iterate through each call edge and apply the appropriate hash increment
+  // and decrement functions
+  BLEdgeVector callEdges = dag.getCallPhonyEdges();
+  for( BLEdgeIterator edge = callEdges.begin(),
+         end = callEdges.end(); edge != end; edge++ ) {
+    BLInstrumentationNode* node =
+      (BLInstrumentationNode*)(*edge)->getSource();
+    BasicBlock::iterator insertPoint = node->getBlock()->getFirstNonPHI();
+
+    // Find the first function call
+    while( ((Instruction&)(*insertPoint)).getOpcode() != Instruction::Call )
+      insertPoint++;
+
+    DEBUG(dbgs() << "\nInstrumenting method call block '"
+          << node->getBlock()->getNameStr() << "'\n");
+    DEBUG(dbgs() << "   Path number initialized: "
+          << ((node->getStartingPathNumber()) ? "yes" : "no") << "\n");
+
+    Value* newpn;
+    if( node->getStartingPathNumber() ) {
+      long inc = ((BLInstrumentationEdge*)(*edge))->getIncrement();
+      if ( inc )
+        newpn = BinaryOperator::Create(Instruction::Add,
+                                       node->getStartingPathNumber(),
+                                       createIncrementConstant(inc,32),
+                                       "pathNumber", insertPoint);
+      else
+        newpn = node->getStartingPathNumber();
+    } else {
+      newpn = (Value*)createIncrementConstant(
+        ((BLInstrumentationEdge*)(*edge))->getIncrement(), 32);
+    }
+
+    insertCounterIncrement(newpn, insertPoint, &dag);
+    insertCounterIncrement(newpn, node->getBlock()->getTerminator(),
+                           &dag, false);
+  }
+}
+
+// Entry point of the module
+void PathProfiler::runOnFunction(std::vector<Constant*> &ftInit,
+                                 Function &F, Module &M) {
+  // Build DAG from CFG
+  BLInstrumentationDag dag = BLInstrumentationDag(F);
+  dag.init();
+
+  // give each path a unique integer value
+  dag.calculatePathNumbers();
+
+  // modify path increments to increase the efficiency
+  // of instrumentation
+  dag.calculateSpanningTree();
+  dag.calculateChordIncrements();
+  dag.pushInitialization();
+  dag.pushCounters();
+  dag.unlinkPhony();
+
+  // potentially generate .dot graph for the dag
+  if (DotPathDag)
+    dag.generateDotGraph ();
+
+  // Should we store the information in an array or hash
+  if( dag.getNumberOfPaths() <= HASH_THRESHHOLD ) {
+    const Type* t = ArrayType::get(Type::getInt32Ty(*Context),
+                                   dag.getNumberOfPaths());
+
+    dag.setCounterArray(new GlobalVariable(M, t, false,
+                                           GlobalValue::InternalLinkage,
+                                           Constant::getNullValue(t), ""));
+  }
+
+  insertInstrumentation(dag, M);
+
+  // Add to global function reference table
+  unsigned type;
+  const Type* voidPtr = TypeBuilder<types::i<8>*, true>::get(*Context);
+
+  if( dag.getNumberOfPaths() <= HASH_THRESHHOLD )
+    type = ProfilingArray;
+  else
+    type = ProfilingHash;
+
+  std::vector<Constant*> entryArray(3);
+  entryArray[0] = createIncrementConstant(type,32);
+  entryArray[1] = createIncrementConstant(dag.getNumberOfPaths(),32);
+  entryArray[2] = dag.getCounterArray() ?
+    ConstantExpr::getBitCast(dag.getCounterArray(), voidPtr) :
+    Constant::getNullValue(voidPtr);
+
+  const StructType* at = ftEntryTypeBuilder::get(*Context);
+  ConstantStruct* functionEntry =
+    (ConstantStruct*)ConstantStruct::get(at, entryArray);
+  ftInit.push_back(functionEntry);
+}
+
+// Output the bitcode if we want to observe instrumentation changess
+#define PRINT_MODULE dbgs() <<                               \
+  "\n\n============= MODULE BEGIN ===============\n" << M << \
+  "\n============== MODULE END ================\n"
+
+bool PathProfiler::runOnModule(Module &M) {
+  Context = &M.getContext();
+
+  DEBUG(dbgs()
+        << "****************************************\n"
+        << "****************************************\n"
+        << "**                                    **\n"
+        << "**   PATH PROFILING INSTRUMENTATION   **\n"
+        << "**                                    **\n"
+        << "****************************************\n"
+        << "****************************************\n");
+
+  // No main, no instrumentation!
+  Function *Main = M.getFunction("main");
+
+  // Using fortran? ... this kind of works
+  if (!Main)
+    Main = M.getFunction("MAIN__");
+
+  if (!Main) {
+    errs() << "WARNING: cannot insert path profiling into a module"
+           << " with no main function!\n";
+    return false;
+  }
+
+  BasicBlock::iterator insertPoint = Main->getEntryBlock().getFirstNonPHI();
+
+  llvmIncrementHashFunction = M.getOrInsertFunction(
+    "llvm_increment_path_count",
+    Type::getVoidTy(*Context), // return type
+    Type::getInt32Ty(*Context), // function number
+    Type::getInt32Ty(*Context), // path number
+    NULL );
+
+  llvmDecrementHashFunction = M.getOrInsertFunction(
+    "llvm_decrement_path_count",
+    Type::getVoidTy(*Context), // return type
+    Type::getInt32Ty(*Context), // function number
+    Type::getInt32Ty(*Context), // path number
+    NULL );
+
+  std::vector<Constant*> ftInit;
+  unsigned functionNumber = 0;
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) {
+    if (F->isDeclaration())
+      continue;
+
+    DEBUG(dbgs() << "Function: " << F->getNameStr() << "\n");
+    functionNumber++;
+
+    // set function number
+    currentFunctionNumber = functionNumber;
+    runOnFunction(ftInit, *F, M);
+  }
+
+  const Type *t = ftEntryTypeBuilder::get(*Context);
+  const ArrayType* ftArrayType = ArrayType::get(t, ftInit.size());
+  Constant* ftInitConstant = ConstantArray::get(ftArrayType, ftInit);
+
+  DEBUG(dbgs() << " ftArrayType:" << *ftArrayType << "\n");
+
+  GlobalVariable* functionTable =
+    new GlobalVariable(M, ftArrayType, false, GlobalValue::InternalLinkage,
+                       ftInitConstant, "functionPathTable");
+  const Type *eltType = ftArrayType->getTypeAtIndex((unsigned)0);
+  InsertProfilingInitCall(Main, "llvm_start_path_profiling", functionTable,
+                          PointerType::getUnqual(eltType));
+
+  DEBUG(PRINT_MODULE);
+
+  return true;
+}
+
+// If this edge is a critical edge, then inserts a node at this edge.
+// This edge becomes the first edge, and a new BallLarusEdge is created.
+// Returns true if the edge was split
+bool PathProfiler::splitCritical(BLInstrumentationEdge* edge,
+                                 BLInstrumentationDag* dag) {
+  unsigned succNum = edge->getSuccessorNumber();
+  BallLarusNode* sourceNode = edge->getSource();
+  BallLarusNode* targetNode = edge->getTarget();
+  BasicBlock* sourceBlock = sourceNode->getBlock();
+  BasicBlock* targetBlock = targetNode->getBlock();
+
+  if(sourceBlock == NULL || targetBlock == NULL
+     || sourceNode->getNumberSuccEdges() <= 1
+     || targetNode->getNumberPredEdges() == 1 ) {
+    return(false);
+  }
+
+  TerminatorInst* terminator = sourceBlock->getTerminator();
+
+  if( SplitCriticalEdge(terminator, succNum, this, false)) {
+    BasicBlock* newBlock = terminator->getSuccessor(succNum);
+    dag->splitUpdate(edge, newBlock);
+    return(true);
+  } else
+    return(false);
+}
diff --git a/final/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/final/lib/Transforms/Instrumentation/ProfilingUtils.cpp
new file mode 100644
index 00000000000..b57bbf60a07
--- /dev/null
+++ b/final/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -0,0 +1,133 @@
+//===- ProfilingUtils.cpp - Helper functions shared by profilers ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a few helper functions which are used by profile
+// instrumentation code to instrument the code.  This allows the profiler pass
+// to worry about *what* to insert, and these functions take care of *how* to do
+// it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ProfilingUtils.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+
+void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
+                                   GlobalValue *Array,
+                                   PointerType *arrayType) {
+  LLVMContext &Context = MainFn->getContext();
+  const Type *ArgVTy =
+    PointerType::getUnqual(Type::getInt8PtrTy(Context));
+  const PointerType *UIntPtr = arrayType ? arrayType :
+    Type::getInt32PtrTy(Context);
+  Module &M = *MainFn->getParent();
+  Constant *InitFn = M.getOrInsertFunction(FnName, Type::getInt32Ty(Context),
+                                           Type::getInt32Ty(Context),
+                                           ArgVTy, UIntPtr,
+                                           Type::getInt32Ty(Context),
+                                           (Type *)0);
+
+  // This could force argc and argv into programs that wouldn't otherwise have
+  // them, but instead we just pass null values in.
+  std::vector<Value*> Args(4);
+  Args[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+  Args[1] = Constant::getNullValue(ArgVTy);
+
+  // Skip over any allocas in the entry block.
+  BasicBlock *Entry = MainFn->begin();
+  BasicBlock::iterator InsertPos = Entry->begin();
+  while (isa<AllocaInst>(InsertPos)) ++InsertPos;
+
+  std::vector<Constant*> GEPIndices(2,
+                             Constant::getNullValue(Type::getInt32Ty(Context)));
+  unsigned NumElements = 0;
+  if (Array) {
+    Args[2] = ConstantExpr::getGetElementPtr(Array, &GEPIndices[0],
+                                             GEPIndices.size());
+    NumElements =
+      cast<ArrayType>(Array->getType()->getElementType())->getNumElements();
+  } else {
+    // If this profiling instrumentation doesn't have a constant array, just
+    // pass null.
+    Args[2] = ConstantPointerNull::get(UIntPtr);
+  }
+  Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
+
+  CallInst *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(),
+                                        "newargc", InsertPos);
+
+  // If argc or argv are not available in main, just pass null values in.
+  Function::arg_iterator AI;
+  switch (MainFn->arg_size()) {
+  default:
+  case 2:
+    AI = MainFn->arg_begin(); ++AI;
+    if (AI->getType() != ArgVTy) {
+      Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
+                                                            false);
+      InitCall->setArgOperand(1,
+          CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
+    } else {
+      InitCall->setArgOperand(1, AI);
+    }
+    /* FALL THROUGH */
+
+  case 1:
+    AI = MainFn->arg_begin();
+    // If the program looked at argc, have it look at the return value of the
+    // init call instead.
+    if (!AI->getType()->isIntegerTy(32)) {
+      Instruction::CastOps opcode;
+      if (!AI->use_empty()) {
+        opcode = CastInst::getCastOpcode(InitCall, true, AI->getType(), true);
+        AI->replaceAllUsesWith(
+          CastInst::Create(opcode, InitCall, AI->getType(), "", InsertPos));
+      }
+      opcode = CastInst::getCastOpcode(AI, true,
+                                       Type::getInt32Ty(Context), true);
+      InitCall->setArgOperand(0,
+          CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
+                           "argc.cast", InitCall));
+    } else {
+      AI->replaceAllUsesWith(InitCall);
+      InitCall->setArgOperand(0, AI);
+    }
+
+  case 0: break;
+  }
+}
+
+void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
+                                   GlobalValue *CounterArray, bool beginning) {
+  // Insert the increment after any alloca or PHI instructions...
+  BasicBlock::iterator InsertPos = beginning ? BB->getFirstNonPHI() :
+                BB->getTerminator();
+  while (isa<AllocaInst>(InsertPos))
+    ++InsertPos;
+
+  LLVMContext &Context = BB->getContext();
+
+  // Create the getelementptr constant expression
+  std::vector<Constant*> Indices(2);
+  Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+  Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum);
+  Constant *ElementPtr =
+    ConstantExpr::getGetElementPtr(CounterArray, &Indices[0],
+                                          Indices.size());
+
+  // Load, increment and store the value back.
+  Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos);
+  Value *NewVal = BinaryOperator::Create(Instruction::Add, OldVal,
+                                 ConstantInt::get(Type::getInt32Ty(Context), 1),
+                                         "NewFuncCounter", InsertPos);
+  new StoreInst(NewVal, ElementPtr, InsertPos);
+}
diff --git a/final/lib/Transforms/Instrumentation/ProfilingUtils.h b/final/lib/Transforms/Instrumentation/ProfilingUtils.h
new file mode 100644
index 00000000000..a76e3576e1c
--- /dev/null
+++ b/final/lib/Transforms/Instrumentation/ProfilingUtils.h
@@ -0,0 +1,34 @@
+//===- ProfilingUtils.h - Helper functions shared by profilers --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a few helper functions which are used by profile
+// instrumentation code to instrument the code.  This allows the profiler pass
+// to worry about *what* to insert, and these functions take care of *how* to do
+// it.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PROFILINGUTILS_H
+#define PROFILINGUTILS_H
+
+namespace llvm {
+  class Function;
+  class GlobalValue;
+  class BasicBlock;
+  class PointerType;
+
+  void InsertProfilingInitCall(Function *MainFn, const char *FnName,
+                               GlobalValue *Arr = 0,
+                               PointerType *arrayType = 0);
+  void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
+                               GlobalValue *CounterArray,
+                               bool beginning = true);
+}
+
+#endif
diff --git a/final/lib/Transforms/Makefile b/final/lib/Transforms/Makefile
new file mode 100644
index 00000000000..e527be25dec
--- /dev/null
+++ b/final/lib/Transforms/Makefile
@@ -0,0 +1,20 @@
+##===- lib/Transforms/Makefile -----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Hello
+
+include $(LEVEL)/Makefile.config
+
+# No support for plugins on windows targets
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW Minix))
+  PARALLEL_DIRS := $(filter-out Hello, $(PARALLEL_DIRS))
+endif
+
+include $(LEVEL)/Makefile.common
diff --git a/final/lib/Transforms/Scalar/ADCE.cpp b/final/lib/Transforms/Scalar/ADCE.cpp
new file mode 100644
index 00000000000..a5adb5e7cef
--- /dev/null
+++ b/final/lib/Transforms/Scalar/ADCE.cpp
@@ -0,0 +1,97 @@
+//===- DCE.cpp - Code to perform dead code elimination --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Aggressive Dead Code Elimination pass.  This pass
+// optimistically assumes that all instructions are dead until proven otherwise,
+// allowing it to eliminate dead computations that other DCE passes do not 
+// catch, particularly involving loop computations.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "adce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumRemoved, "Number of instructions removed");
+
+namespace {
+  struct ADCE : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    ADCE() : FunctionPass(ID) {
+      initializeADCEPass(*PassRegistry::getPassRegistry());
+    }
+    
+    virtual bool runOnFunction(Function& F);
+    
+    virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+      AU.setPreservesCFG();
+    }
+    
+  };
+}
+
+char ADCE::ID = 0;
+INITIALIZE_PASS(ADCE, "adce", "Aggressive Dead Code Elimination", false, false)
+
+bool ADCE::runOnFunction(Function& F) {
+  SmallPtrSet<Instruction*, 128> alive;
+  SmallVector<Instruction*, 128> worklist;
+  
+  // Collect the set of "root" instructions that are known live.
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+    if (isa<TerminatorInst>(I.getInstructionIterator()) ||
+        isa<DbgInfoIntrinsic>(I.getInstructionIterator()) ||
+        I->mayHaveSideEffects()) {
+      alive.insert(I.getInstructionIterator());
+      worklist.push_back(I.getInstructionIterator());
+    }
+  
+  // Propagate liveness backwards to operands.
+  while (!worklist.empty()) {
+    Instruction* curr = worklist.pop_back_val();
+    
+    for (Instruction::op_iterator OI = curr->op_begin(), OE = curr->op_end();
+         OI != OE; ++OI)
+      if (Instruction* Inst = dyn_cast<Instruction>(OI))
+        if (alive.insert(Inst))
+          worklist.push_back(Inst);
+  }
+  
+  // The inverse of the live set is the dead set.  These are those instructions
+  // which have no side effects and do not influence the control flow or return
+  // value of the function, and may therefore be deleted safely.
+  // NOTE: We reuse the worklist vector here for memory efficiency.
+  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
+    if (!alive.count(I.getInstructionIterator())) {
+      worklist.push_back(I.getInstructionIterator());
+      I->dropAllReferences();
+    }
+  
+  for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(),
+       E = worklist.end(); I != E; ++I) {
+    ++NumRemoved;
+    (*I)->eraseFromParent();
+  }
+
+  return !worklist.empty();
+}
+
+FunctionPass *llvm::createAggressiveDCEPass() {
+  return new ADCE();
+}
diff --git a/final/lib/Transforms/Scalar/BasicBlockPlacement.cpp b/final/lib/Transforms/Scalar/BasicBlockPlacement.cpp
new file mode 100644
index 00000000000..cee55026562
--- /dev/null
+++ b/final/lib/Transforms/Scalar/BasicBlockPlacement.cpp
@@ -0,0 +1,152 @@
+//===-- BasicBlockPlacement.cpp - Basic Block Code Layout optimization ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a very simple profile guided basic block placement
+// algorithm.  The idea is to put frequently executed blocks together at the
+// start of the function, and hopefully increase the number of fall-through
+// conditional branches.  If there is no profile information for a particular
+// function, this pass basically orders blocks in depth-first order
+//
+// The algorithm implemented here is basically "Algo1" from "Profile Guided Code
+// Positioning" by Pettis and Hansen, except that it uses basic block counts
+// instead of edge counts.  This should be improved in many ways, but is very
+// simple for now.
+//
+// Basically we "place" the entry block, then loop over all successors in a DFO,
+// placing the most frequently executed successor until we run out of blocks.  I
+// told you this was _extremely_ simplistic. :) This is also much slower than it
+// could be.  When it becomes important, this pass will be rewritten to use a
+// better algorithm, and then we can worry about efficiency.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "block-placement"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Transforms/Scalar.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumMoved, "Number of basic blocks moved");
+
+namespace {
+  struct BlockPlacement : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    BlockPlacement() : FunctionPass(ID) {
+      initializeBlockPlacementPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<ProfileInfo>();
+      //AU.addPreserved<ProfileInfo>();  // Does this work?
+    }
+  private:
+    /// PI - The profile information that is guiding us.
+    ///
+    ProfileInfo *PI;
+
+    /// NumMovedBlocks - Every time we move a block, increment this counter.
+    ///
+    unsigned NumMovedBlocks;
+
+    /// PlacedBlocks - Every time we place a block, remember it so we don't get
+    /// into infinite loops.
+    std::set<BasicBlock*> PlacedBlocks;
+
+    /// InsertPos - This an iterator to the next place we want to insert a
+    /// block.
+    Function::iterator InsertPos;
+
+    /// PlaceBlocks - Recursively place the specified blocks and any unplaced
+    /// successors.
+    void PlaceBlocks(BasicBlock *BB);
+  };
+}
+
+char BlockPlacement::ID = 0;
+INITIALIZE_PASS_BEGIN(BlockPlacement, "block-placement",
+                "Profile Guided Basic Block Placement", false, false)
+INITIALIZE_AG_DEPENDENCY(ProfileInfo)
+INITIALIZE_PASS_END(BlockPlacement, "block-placement",
+                "Profile Guided Basic Block Placement", false, false)
+
+FunctionPass *llvm::createBlockPlacementPass() { return new BlockPlacement(); }
+
+bool BlockPlacement::runOnFunction(Function &F) {
+  PI = &getAnalysis<ProfileInfo>();
+
+  NumMovedBlocks = 0;
+  InsertPos = F.begin();
+
+  // Recursively place all blocks.
+  PlaceBlocks(F.begin());
+
+  PlacedBlocks.clear();
+  NumMoved += NumMovedBlocks;
+  return NumMovedBlocks != 0;
+}
+
+
+/// PlaceBlocks - Recursively place the specified blocks and any unplaced
+/// successors.
+void BlockPlacement::PlaceBlocks(BasicBlock *BB) {
+  assert(!PlacedBlocks.count(BB) && "Already placed this block!");
+  PlacedBlocks.insert(BB);
+
+  // Place the specified block.
+  if (&*InsertPos != BB) {
+    // Use splice to move the block into the right place.  This avoids having to
+    // remove the block from the function then readd it, which causes a bunch of
+    // symbol table traffic that is entirely pointless.
+    Function::BasicBlockListType &Blocks = BB->getParent()->getBasicBlockList();
+    Blocks.splice(InsertPos, Blocks, BB);
+
+    ++NumMovedBlocks;
+  } else {
+    // This block is already in the right place, we don't have to do anything.
+    ++InsertPos;
+  }
+
+  // Keep placing successors until we run out of ones to place.  Note that this
+  // loop is very inefficient (N^2) for blocks with many successors, like switch
+  // statements.  FIXME!
+  while (1) {
+    // Okay, now place any unplaced successors.
+    succ_iterator SI = succ_begin(BB), E = succ_end(BB);
+
+    // Scan for the first unplaced successor.
+    for (; SI != E && PlacedBlocks.count(*SI); ++SI)
+      /*empty*/;
+    if (SI == E) return;  // No more successors to place.
+
+    double MaxExecutionCount = PI->getExecutionCount(*SI);
+    BasicBlock *MaxSuccessor = *SI;
+
+    // Scan for more frequently executed successors
+    for (; SI != E; ++SI)
+      if (!PlacedBlocks.count(*SI)) {
+        double Count = PI->getExecutionCount(*SI);
+        if (Count > MaxExecutionCount ||
+            // Prefer to not disturb the code.
+            (Count == MaxExecutionCount && *SI == &*InsertPos)) {
+          MaxExecutionCount = Count;
+          MaxSuccessor = *SI;
+        }
+      }
+
+    // Now that we picked the maximally executed successor, place it.
+    PlaceBlocks(MaxSuccessor);
+  }
+}
diff --git a/final/lib/Transforms/Scalar/CMakeLists.txt b/final/lib/Transforms/Scalar/CMakeLists.txt
new file mode 100644
index 00000000000..fcf914f8baa
--- /dev/null
+++ b/final/lib/Transforms/Scalar/CMakeLists.txt
@@ -0,0 +1,33 @@
+add_llvm_library(LLVMScalarOpts
+  ADCE.cpp
+  BasicBlockPlacement.cpp
+  CodeGenPrepare.cpp
+  ConstantProp.cpp
+  CorrelatedValuePropagation.cpp
+  DCE.cpp
+  DeadStoreElimination.cpp
+  EarlyCSE.cpp
+  GVN.cpp
+  IndVarSimplify.cpp
+  JumpThreading.cpp
+  LICM.cpp
+  LoopDeletion.cpp
+  LoopIdiomRecognize.cpp
+  LoopInstSimplify.cpp
+  LoopRotation.cpp
+  LoopStrengthReduce.cpp
+  LoopUnrollPass.cpp
+  LoopUnswitch.cpp
+  LowerAtomic.cpp
+  MemCpyOptimizer.cpp
+  Reassociate.cpp
+  Reg2Mem.cpp
+  SCCP.cpp
+  Scalar.cpp
+  ScalarReplAggregates.cpp
+  SimplifyCFGPass.cpp
+  SimplifyLibCalls.cpp
+  Sink.cpp
+  TailDuplication.cpp
+  TailRecursionElimination.cpp
+  )
diff --git a/final/lib/Transforms/Scalar/CodeGenPrepare.cpp b/final/lib/Transforms/Scalar/CodeGenPrepare.cpp
new file mode 100644
index 00000000000..15d8c20f14a
--- /dev/null
+++ b/final/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -0,0 +1,1113 @@
+//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass munges the code in the input function to better prepare it for
+// SelectionDAG-based code generation. This works around limitations in it's
+// basic-block-at-a-time approach. It should eventually be removed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "codegenprepare"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/ValueHandle.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+STATISTIC(NumBlocksElim, "Number of blocks eliminated");
+STATISTIC(NumPHIsElim,   "Number of trivial PHIs eliminated");
+STATISTIC(NumGEPsElim,   "Number of GEPs converted to casts");
+STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
+                      "sunken Cmps");
+STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
+                       "of sunken Casts");
+STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
+                          "computations were sunk");
+STATISTIC(NumExtsMoved,  "Number of [s|z]ext instructions combined with loads");
+STATISTIC(NumExtUses,    "Number of uses of [s|z]ext instructions optimized");
+STATISTIC(NumRetsDup,    "Number of return instructions duplicated");
+
+namespace {
+  class CodeGenPrepare : public FunctionPass {
+    /// TLI - Keep a pointer of a TargetLowering to consult for determining
+    /// transformation profitability.
+    const TargetLowering *TLI;
+    DominatorTree *DT;
+    ProfileInfo *PFI;
+    
+    /// CurInstIterator - As we scan instructions optimizing them, this is the
+    /// next instruction to optimize.  Xforms that can invalidate this should
+    /// update it.
+    BasicBlock::iterator CurInstIterator;
+
+    /// Keeps track of non-local addresses that have been sunk into a block.
+    /// This allows us to avoid inserting duplicate code for blocks with
+    /// multiple load/stores of the same address.
+    DenseMap<Value*, Value*> SunkAddrs;
+
+    /// UpdateDT - If CFG is modified in anyway, dominator tree may need to
+    /// be updated.
+    bool UpdateDT;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit CodeGenPrepare(const TargetLowering *tli = 0)
+      : FunctionPass(ID), TLI(tli) {
+        initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
+      }
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<ProfileInfo>();
+    }
+
+  private:
+    bool EliminateMostlyEmptyBlocks(Function &F);
+    bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
+    void EliminateMostlyEmptyBlock(BasicBlock *BB);
+    bool OptimizeBlock(BasicBlock &BB);
+    bool OptimizeInst(Instruction *I);
+    bool OptimizeMemoryInst(Instruction *I, Value *Addr, const Type *AccessTy);
+    bool OptimizeInlineAsmInst(CallInst *CS);
+    bool OptimizeCallInst(CallInst *CI);
+    bool MoveExtToFormExtLoad(Instruction *I);
+    bool OptimizeExtUses(Instruction *I);
+    bool DupRetToEnableTailCallOpts(ReturnInst *RI);
+  };
+}
+
+char CodeGenPrepare::ID = 0;
+INITIALIZE_PASS(CodeGenPrepare, "codegenprepare",
+                "Optimize for code generation", false, false)
+
+FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
+  return new CodeGenPrepare(TLI);
+}
+
+bool CodeGenPrepare::runOnFunction(Function &F) {
+  bool EverMadeChange = false;
+
+  UpdateDT = false;
+  DT = getAnalysisIfAvailable<DominatorTree>();
+  PFI = getAnalysisIfAvailable<ProfileInfo>();
+
+  // First pass, eliminate blocks that contain only PHI nodes and an
+  // unconditional branch.
+  EverMadeChange |= EliminateMostlyEmptyBlocks(F);
+
+  bool MadeChange = true;
+  while (MadeChange) {
+    MadeChange = false;
+    for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+      BasicBlock *BB = I++;
+      MadeChange |= OptimizeBlock(*BB);
+    }
+    EverMadeChange |= MadeChange;
+  }
+
+  SunkAddrs.clear();
+
+  if (UpdateDT && DT)
+    DT->DT->recalculate(F);
+
+  return EverMadeChange;
+}
+
+/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
+/// debug info directives, and an unconditional branch.  Passes before isel
+/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
+/// isel.  Start by eliminating these blocks so we can split them the way we
+/// want them.
+bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
+  bool MadeChange = false;
+  // Note that this intentionally skips the entry block.
+  for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
+    BasicBlock *BB = I++;
+
+    // If this block doesn't end with an uncond branch, ignore it.
+    BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+    if (!BI || !BI->isUnconditional())
+      continue;
+
+    // If the instruction before the branch (skipping debug info) isn't a phi
+    // node, then other stuff is happening here.
+    BasicBlock::iterator BBI = BI;
+    if (BBI != BB->begin()) {
+      --BBI;
+      while (isa<DbgInfoIntrinsic>(BBI)) {
+        if (BBI == BB->begin())
+          break;
+        --BBI;
+      }
+      if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
+        continue;
+    }
+
+    // Do not break infinite loops.
+    BasicBlock *DestBB = BI->getSuccessor(0);
+    if (DestBB == BB)
+      continue;
+
+    if (!CanMergeBlocks(BB, DestBB))
+      continue;
+
+    EliminateMostlyEmptyBlock(BB);
+    MadeChange = true;
+  }
+  return MadeChange;
+}
+
+/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
+/// single uncond branch between them, and BB contains no other non-phi
+/// instructions.
+bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
+                                    const BasicBlock *DestBB) const {
+  // We only want to eliminate blocks whose phi nodes are used by phi nodes in
+  // the successor.  If there are more complex condition (e.g. preheaders),
+  // don't mess around with them.
+  BasicBlock::const_iterator BBI = BB->begin();
+  while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
+    for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end();
+         UI != E; ++UI) {
+      const Instruction *User = cast<Instruction>(*UI);
+      if (User->getParent() != DestBB || !isa<PHINode>(User))
+        return false;
+      // If User is inside DestBB block and it is a PHINode then check
+      // incoming value. If incoming value is not from BB then this is
+      // a complex condition (e.g. preheaders) we want to avoid here.
+      if (User->getParent() == DestBB) {
+        if (const PHINode *UPN = dyn_cast<PHINode>(User))
+          for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
+            Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
+            if (Insn && Insn->getParent() == BB &&
+                Insn->getParent() != UPN->getIncomingBlock(I))
+              return false;
+          }
+      }
+    }
+  }
+
+  // If BB and DestBB contain any common predecessors, then the phi nodes in BB
+  // and DestBB may have conflicting incoming values for the block.  If so, we
+  // can't merge the block.
+  const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
+  if (!DestBBPN) return true;  // no conflict.
+
+  // Collect the preds of BB.
+  SmallPtrSet<const BasicBlock*, 16> BBPreds;
+  if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+    // It is faster to get preds from a PHI than with pred_iterator.
+    for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+      BBPreds.insert(BBPN->getIncomingBlock(i));
+  } else {
+    BBPreds.insert(pred_begin(BB), pred_end(BB));
+  }
+
+  // Walk the preds of DestBB.
+  for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
+    BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
+    if (BBPreds.count(Pred)) {   // Common predecessor?
+      BBI = DestBB->begin();
+      while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
+        const Value *V1 = PN->getIncomingValueForBlock(Pred);
+        const Value *V2 = PN->getIncomingValueForBlock(BB);
+
+        // If V2 is a phi node in BB, look up what the mapped value will be.
+        if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
+          if (V2PN->getParent() == BB)
+            V2 = V2PN->getIncomingValueForBlock(Pred);
+
+        // If there is a conflict, bail out.
+        if (V1 != V2) return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+
+/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
+/// an unconditional branch in it.
+void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
+  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+  BasicBlock *DestBB = BI->getSuccessor(0);
+
+  DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
+
+  // If the destination block has a single pred, then this is a trivial edge,
+  // just collapse it.
+  if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
+    if (SinglePred != DestBB) {
+      // Remember if SinglePred was the entry block of the function.  If so, we
+      // will need to move BB back to the entry position.
+      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+      MergeBasicBlockIntoOnlyPred(DestBB, this);
+
+      if (isEntry && BB != &BB->getParent()->getEntryBlock())
+        BB->moveBefore(&BB->getParent()->getEntryBlock());
+      
+      DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+      return;
+    }
+  }
+
+  // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
+  // to handle the new incoming edges it is about to have.
+  PHINode *PN;
+  for (BasicBlock::iterator BBI = DestBB->begin();
+       (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+    // Remove the incoming value for BB, and remember it.
+    Value *InVal = PN->removeIncomingValue(BB, false);
+
+    // Two options: either the InVal is a phi node defined in BB or it is some
+    // value that dominates BB.
+    PHINode *InValPhi = dyn_cast<PHINode>(InVal);
+    if (InValPhi && InValPhi->getParent() == BB) {
+      // Add all of the input values of the input PHI as inputs of this phi.
+      for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
+        PN->addIncoming(InValPhi->getIncomingValue(i),
+                        InValPhi->getIncomingBlock(i));
+    } else {
+      // Otherwise, add one instance of the dominating value for each edge that
+      // we will be adding.
+      if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
+        for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
+          PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
+      } else {
+        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+          PN->addIncoming(InVal, *PI);
+      }
+    }
+  }
+
+  // The PHIs are now updated, change everything that refers to BB to use
+  // DestBB and remove BB.
+  BB->replaceAllUsesWith(DestBB);
+  if (DT) {
+    BasicBlock *BBIDom  = DT->getNode(BB)->getIDom()->getBlock();
+    BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
+    BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
+    DT->changeImmediateDominator(DestBB, NewIDom);
+    DT->eraseNode(BB);
+  }
+  if (PFI) {
+    PFI->replaceAllUses(BB, DestBB);
+    PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB));
+  }
+  BB->eraseFromParent();
+  ++NumBlocksElim;
+
+  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
+}
+
+/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
+/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
+/// sink it into user blocks to reduce the number of virtual
+/// registers that must be created and coalesced.
+///
+/// Return true if any changes are made.
+///
+static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
+  // If this is a noop copy,
+  EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
+  EVT DstVT = TLI.getValueType(CI->getType());
+
+  // This is an fp<->int conversion?
+  if (SrcVT.isInteger() != DstVT.isInteger())
+    return false;
+
+  // If this is an extension, it will be a zero or sign extension, which
+  // isn't a noop.
+  if (SrcVT.bitsLT(DstVT)) return false;
+
+  // If these values will be promoted, find out what they will be promoted
+  // to.  This helps us consider truncates on PPC as noop copies when they
+  // are.
+  if (TLI.getTypeAction(SrcVT) == TargetLowering::Promote)
+    SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
+  if (TLI.getTypeAction(DstVT) == TargetLowering::Promote)
+    DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
+
+  // If, after promotion, these are the same types, this is a noop copy.
+  if (SrcVT != DstVT)
+    return false;
+
+  BasicBlock *DefBB = CI->getParent();
+
+  /// InsertedCasts - Only insert a cast in each block once.
+  DenseMap<BasicBlock*, CastInst*> InsertedCasts;
+
+  bool MadeChange = false;
+  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
+       UI != E; ) {
+    Use &TheUse = UI.getUse();
+    Instruction *User = cast<Instruction>(*UI);
+
+    // Figure out which BB this cast is used in.  For PHI's this is the
+    // appropriate predecessor block.
+    BasicBlock *UserBB = User->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(User)) {
+      UserBB = PN->getIncomingBlock(UI);
+    }
+
+    // Preincrement use iterator so we don't invalidate it.
+    ++UI;
+
+    // If this user is in the same block as the cast, don't change the cast.
+    if (UserBB == DefBB) continue;
+
+    // If we have already inserted a cast into this block, use it.
+    CastInst *&InsertedCast = InsertedCasts[UserBB];
+
+    if (!InsertedCast) {
+      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
+
+      InsertedCast =
+        CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
+                         InsertPt);
+      MadeChange = true;
+    }
+
+    // Replace a use of the cast with a use of the new cast.
+    TheUse = InsertedCast;
+    ++NumCastUses;
+  }
+
+  // If we removed all uses, nuke the cast.
+  if (CI->use_empty()) {
+    CI->eraseFromParent();
+    MadeChange = true;
+  }
+
+  return MadeChange;
+}
+
+/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
+/// the number of virtual registers that must be created and coalesced.  This is
+/// a clear win except on targets with multiple condition code registers
+///  (PowerPC), where it might lose; some adjustment may be wanted there.
+///
+/// Return true if any changes are made.
+static bool OptimizeCmpExpression(CmpInst *CI) {
+  BasicBlock *DefBB = CI->getParent();
+
+  /// InsertedCmp - Only insert a cmp in each block once.
+  DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
+
+  bool MadeChange = false;
+  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
+       UI != E; ) {
+    Use &TheUse = UI.getUse();
+    Instruction *User = cast<Instruction>(*UI);
+
+    // Preincrement use iterator so we don't invalidate it.
+    ++UI;
+
+    // Don't bother for PHI nodes.
+    if (isa<PHINode>(User))
+      continue;
+
+    // Figure out which BB this cmp is used in.
+    BasicBlock *UserBB = User->getParent();
+
+    // If this user is in the same block as the cmp, don't change the cmp.
+    if (UserBB == DefBB) continue;
+
+    // If we have already inserted a cmp into this block, use it.
+    CmpInst *&InsertedCmp = InsertedCmps[UserBB];
+
+    if (!InsertedCmp) {
+      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
+
+      InsertedCmp =
+        CmpInst::Create(CI->getOpcode(),
+                        CI->getPredicate(),  CI->getOperand(0),
+                        CI->getOperand(1), "", InsertPt);
+      MadeChange = true;
+    }
+
+    // Replace a use of the cmp with a use of the new cmp.
+    TheUse = InsertedCmp;
+    ++NumCmpUses;
+  }
+
+  // If we removed all uses, nuke the cmp.
+  if (CI->use_empty())
+    CI->eraseFromParent();
+
+  return MadeChange;
+}
+
+namespace {
+class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
+protected:
+  void replaceCall(Value *With) {
+    CI->replaceAllUsesWith(With);
+    CI->eraseFromParent();
+  }
+  bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
+      if (ConstantInt *SizeCI =
+                             dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
+        return SizeCI->isAllOnesValue();
+    return false;
+  }
+};
+} // end anonymous namespace
+
+bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
+  BasicBlock *BB = CI->getParent();
+  
+  // Lower inline assembly if we can.
+  // If we found an inline asm expession, and if the target knows how to
+  // lower it to normal LLVM code, do so now.
+  if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
+    if (TLI->ExpandInlineAsm(CI)) {
+      // Avoid invalidating the iterator.
+      CurInstIterator = BB->begin();
+      // Avoid processing instructions out of order, which could cause
+      // reuse before a value is defined.
+      SunkAddrs.clear();
+      return true;
+    }
+    // Sink address computing for memory operands into the block.
+    if (OptimizeInlineAsmInst(CI))
+      return true;
+  }
+  
+  // Lower all uses of llvm.objectsize.*
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
+  if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
+    bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
+    const Type *ReturnTy = CI->getType();
+    Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);    
+    
+    // Substituting this can cause recursive simplifications, which can
+    // invalidate our iterator.  Use a WeakVH to hold onto it in case this
+    // happens.
+    WeakVH IterHandle(CurInstIterator);
+    
+    ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT);
+
+    // If the iterator instruction was recursively deleted, start over at the
+    // start of the block.
+    if (IterHandle != CurInstIterator) {
+      CurInstIterator = BB->begin();
+      SunkAddrs.clear();
+    }
+    return true;
+  }
+
+  // From here on out we're working with named functions.
+  if (CI->getCalledFunction() == 0) return false;
+  
+  // We'll need TargetData from here on out.
+  const TargetData *TD = TLI ? TLI->getTargetData() : 0;
+  if (!TD) return false;
+  
+  // Lower all default uses of _chk calls.  This is very similar
+  // to what InstCombineCalls does, but here we are only lowering calls
+  // that have the default "don't know" as the objectsize.  Anything else
+  // should be left alone.
+  CodeGenPrepareFortifiedLibCalls Simplifier;
+  return Simplifier.fold(CI, TD);
+}
+
+/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
+/// instructions to the predecessor to enable tail call optimizations. The
+/// case it is currently looking for is:
+/// bb0:
+///   %tmp0 = tail call i32 @f0()
+///   br label %return
+/// bb1:
+///   %tmp1 = tail call i32 @f1()
+///   br label %return
+/// bb2:
+///   %tmp2 = tail call i32 @f2()
+///   br label %return
+/// return:
+///   %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
+///   ret i32 %retval
+///
+/// =>
+///
+/// bb0:
+///   %tmp0 = tail call i32 @f0()
+///   ret i32 %tmp0
+/// bb1:
+///   %tmp1 = tail call i32 @f1()
+///   ret i32 %tmp1
+/// bb2:
+///   %tmp2 = tail call i32 @f2()
+///   ret i32 %tmp2
+///
+bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) {
+  if (!TLI)
+    return false;
+
+  Value *V = RI->getReturnValue();
+  PHINode *PN = V ? dyn_cast<PHINode>(V) : NULL;
+  if (V && !PN)
+    return false;
+
+  BasicBlock *BB = RI->getParent();
+  if (PN && PN->getParent() != BB)
+    return false;
+
+  // It's not safe to eliminate the sign / zero extension of the return value.
+  // See llvm::isInTailCallPosition().
+  const Function *F = BB->getParent();
+  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+  if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+    return false;
+
+  // Make sure there are no instructions between the PHI and return, or that the
+  // return is the first instruction in the block.
+  if (PN) {
+    BasicBlock::iterator BI = BB->begin();
+    do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
+    if (&*BI != RI)
+      return false;
+  } else {
+    BasicBlock::iterator BI = BB->begin();
+    while (isa<DbgInfoIntrinsic>(BI)) ++BI;
+    if (&*BI != RI)
+      return false;
+  }
+
+  /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
+  /// call.
+  SmallVector<CallInst*, 4> TailCalls;
+  if (PN) {
+    for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
+      CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
+      // Make sure the phi value is indeed produced by the tail call.
+      if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
+          TLI->mayBeEmittedAsTailCall(CI))
+        TailCalls.push_back(CI);
+    }
+  } else {
+    SmallPtrSet<BasicBlock*, 4> VisitedBBs;
+    for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
+      if (!VisitedBBs.insert(*PI))
+        continue;
+
+      BasicBlock::InstListType &InstList = (*PI)->getInstList();
+      BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
+      BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
+      do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
+      if (RI == RE)
+        continue;
+
+      CallInst *CI = dyn_cast<CallInst>(&*RI);
+      if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
+        TailCalls.push_back(CI);
+    }
+  }
+
+  bool Changed = false;
+  for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
+    CallInst *CI = TailCalls[i];
+    CallSite CS(CI);
+
+    // Conservatively require the attributes of the call to match those of the
+    // return. Ignore noalias because it doesn't affect the call sequence.
+    unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes();
+    if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
+      continue;
+
+    // Make sure the call instruction is followed by an unconditional branch to
+    // the return block.
+    BasicBlock *CallBB = CI->getParent();
+    BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
+    if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
+      continue;
+
+    // Duplicate the return into CallBB.
+    (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
+    UpdateDT = Changed = true;
+    ++NumRetsDup;
+  }
+
+  // If we eliminated all predecessors of the block, delete the block now.
+  if (Changed && pred_begin(BB) == pred_end(BB))
+    BB->eraseFromParent();
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Optimization
+//===----------------------------------------------------------------------===//
+
+/// IsNonLocalValue - Return true if the specified values are defined in a
+/// different basic block than BB.
+static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    return I->getParent() != BB;
+  return false;
+}
+
+/// OptimizeMemoryInst - Load and Store Instructions often have
+/// addressing modes that can do significant amounts of computation.  As such,
+/// instruction selection will try to get the load or store to do as much
+/// computation as possible for the program.  The problem is that isel can only
+/// see within a single block.  As such, we sink as much legal addressing mode
+/// stuff into the block as possible.
+///
+/// This method is used to optimize both load/store and inline asms with memory
+/// operands.
+bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
+                                        const Type *AccessTy) {
+  Value *Repl = Addr;
+  
+  // Try to collapse single-value PHI nodes.  This is necessary to undo 
+  // unprofitable PRE transformations.
+  SmallVector<Value*, 8> worklist;
+  SmallPtrSet<Value*, 16> Visited;
+  worklist.push_back(Addr);
+  
+  // Use a worklist to iteratively look through PHI nodes, and ensure that
+  // the addressing mode obtained from the non-PHI roots of the graph
+  // are equivalent.
+  Value *Consensus = 0;
+  unsigned NumUsesConsensus = 0;
+  bool IsNumUsesConsensusValid = false;
+  SmallVector<Instruction*, 16> AddrModeInsts;
+  ExtAddrMode AddrMode;
+  while (!worklist.empty()) {
+    Value *V = worklist.back();
+    worklist.pop_back();
+    
+    // Break use-def graph loops.
+    if (Visited.count(V)) {
+      Consensus = 0;
+      break;
+    }
+    
+    Visited.insert(V);
+    
+    // For a PHI node, push all of its incoming values.
+    if (PHINode *P = dyn_cast<PHINode>(V)) {
+      for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
+        worklist.push_back(P->getIncomingValue(i));
+      continue;
+    }
+    
+    // For non-PHIs, determine the addressing mode being computed.
+    SmallVector<Instruction*, 16> NewAddrModeInsts;
+    ExtAddrMode NewAddrMode =
+      AddressingModeMatcher::Match(V, AccessTy,MemoryInst,
+                                   NewAddrModeInsts, *TLI);
+
+    // This check is broken into two cases with very similar code to avoid using
+    // getNumUses() as much as possible. Some values have a lot of uses, so
+    // calling getNumUses() unconditionally caused a significant compile-time
+    // regression.
+    if (!Consensus) {
+      Consensus = V;
+      AddrMode = NewAddrMode;
+      AddrModeInsts = NewAddrModeInsts;
+      continue;
+    } else if (NewAddrMode == AddrMode) {
+      if (!IsNumUsesConsensusValid) {
+        NumUsesConsensus = Consensus->getNumUses();
+        IsNumUsesConsensusValid = true;
+      }
+
+      // Ensure that the obtained addressing mode is equivalent to that obtained
+      // for all other roots of the PHI traversal.  Also, when choosing one
+      // such root as representative, select the one with the most uses in order
+      // to keep the cost modeling heuristics in AddressingModeMatcher
+      // applicable.
+      unsigned NumUses = V->getNumUses();
+      if (NumUses > NumUsesConsensus) {
+        Consensus = V;
+        NumUsesConsensus = NumUses;
+        AddrModeInsts = NewAddrModeInsts;
+      }
+      continue;
+    }
+    
+    Consensus = 0;
+    break;
+  }
+  
+  // If the addressing mode couldn't be determined, or if multiple different
+  // ones were determined, bail out now.
+  if (!Consensus) return false;
+  
+  // Check to see if any of the instructions supersumed by this addr mode are
+  // non-local to I's BB.
+  bool AnyNonLocal = false;
+  for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
+    if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
+      AnyNonLocal = true;
+      break;
+    }
+  }
+
+  // If all the instructions matched are already in this BB, don't do anything.
+  if (!AnyNonLocal) {
+    DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
+    return false;
+  }
+
+  // Insert this computation right after this user.  Since our caller is
+  // scanning from the top of the BB to the bottom, reuse of the expr are
+  // guaranteed to happen later.
+  BasicBlock::iterator InsertPt = MemoryInst;
+
+  // Now that we determined the addressing expression we want to use and know
+  // that we have to sink it into this block.  Check to see if we have already
+  // done this for some other load/store instr in this block.  If so, reuse the
+  // computation.
+  Value *&SunkAddr = SunkAddrs[Addr];
+  if (SunkAddr) {
+    DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
+                 << *MemoryInst);
+    if (SunkAddr->getType() != Addr->getType())
+      SunkAddr = new BitCastInst(SunkAddr, Addr->getType(), "tmp", InsertPt);
+  } else {
+    DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
+                 << *MemoryInst);
+    const Type *IntPtrTy =
+          TLI->getTargetData()->getIntPtrType(AccessTy->getContext());
+
+    Value *Result = 0;
+
+    // Start with the base register. Do this first so that subsequent address
+    // matching finds it last, which will prevent it from trying to match it
+    // as the scaled value in case it happens to be a mul. That would be
+    // problematic if we've sunk a different mul for the scale, because then
+    // we'd end up sinking both muls.
+    if (AddrMode.BaseReg) {
+      Value *V = AddrMode.BaseReg;
+      if (V->getType()->isPointerTy())
+        V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+      if (V->getType() != IntPtrTy)
+        V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true,
+                                        "sunkaddr", InsertPt);
+      Result = V;
+    }
+
+    // Add the scale value.
+    if (AddrMode.Scale) {
+      Value *V = AddrMode.ScaledReg;
+      if (V->getType() == IntPtrTy) {
+        // done.
+      } else if (V->getType()->isPointerTy()) {
+        V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
+      } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
+                 cast<IntegerType>(V->getType())->getBitWidth()) {
+        V = new TruncInst(V, IntPtrTy, "sunkaddr", InsertPt);
+      } else {
+        V = new SExtInst(V, IntPtrTy, "sunkaddr", InsertPt);
+      }
+      if (AddrMode.Scale != 1)
+        V = BinaryOperator::CreateMul(V, ConstantInt::get(IntPtrTy,
+                                                                AddrMode.Scale),
+                                      "sunkaddr", InsertPt);
+      if (Result)
+        Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+      else
+        Result = V;
+    }
+
+    // Add in the BaseGV if present.
+    if (AddrMode.BaseGV) {
+      Value *V = new PtrToIntInst(AddrMode.BaseGV, IntPtrTy, "sunkaddr",
+                                  InsertPt);
+      if (Result)
+        Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+      else
+        Result = V;
+    }
+
+    // Add in the Base Offset if present.
+    if (AddrMode.BaseOffs) {
+      Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
+      if (Result)
+        Result = BinaryOperator::CreateAdd(Result, V, "sunkaddr", InsertPt);
+      else
+        Result = V;
+    }
+
+    if (Result == 0)
+      SunkAddr = Constant::getNullValue(Addr->getType());
+    else
+      SunkAddr = new IntToPtrInst(Result, Addr->getType(), "sunkaddr",InsertPt);
+  }
+
+  MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
+
+  if (Repl->use_empty()) {
+    RecursivelyDeleteTriviallyDeadInstructions(Repl);
+    // This address is now available for reassignment, so erase the table entry;
+    // we don't want to match some completely different instruction.
+    SunkAddrs[Addr] = 0;
+  }
+  ++NumMemoryInsts;
+  return true;
+}
+
+/// OptimizeInlineAsmInst - If there are any memory operands, use
+/// OptimizeMemoryInst to sink their address computing into the block when
+/// possible / profitable.
+bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
+  bool MadeChange = false;
+
+  TargetLowering::AsmOperandInfoVector 
+    TargetConstraints = TLI->ParseConstraints(CS);
+  unsigned ArgNo = 0;
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+    
+    // Compute the constraint code and ConstraintType to use.
+    TLI->ComputeConstraintToUse(OpInfo, SDValue());
+
+    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
+        OpInfo.isIndirect) {
+      Value *OpVal = CS->getArgOperand(ArgNo++);
+      MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
+    } else if (OpInfo.Type == InlineAsm::isInput)
+      ArgNo++;
+  }
+
+  return MadeChange;
+}
+
+/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
+/// basic block as the load, unless conditions are unfavorable. This allows
+/// SelectionDAG to fold the extend into the load.
+///
+bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
+  // Look for a load being extended.
+  LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
+  if (!LI) return false;
+
+  // If they're already in the same block, there's nothing to do.
+  if (LI->getParent() == I->getParent())
+    return false;
+
+  // If the load has other users and the truncate is not free, this probably
+  // isn't worthwhile.
+  if (!LI->hasOneUse() &&
+      TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
+              !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
+      !TLI->isTruncateFree(I->getType(), LI->getType()))
+    return false;
+
+  // Check whether the target supports casts folded into loads.
+  unsigned LType;
+  if (isa<ZExtInst>(I))
+    LType = ISD::ZEXTLOAD;
+  else {
+    assert(isa<SExtInst>(I) && "Unexpected ext type!");
+    LType = ISD::SEXTLOAD;
+  }
+  if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
+    return false;
+
+  // Move the extend into the same block as the load, so that SelectionDAG
+  // can fold it.
+  I->removeFromParent();
+  I->insertAfter(LI);
+  ++NumExtsMoved;
+  return true;
+}
+
+bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
+  BasicBlock *DefBB = I->getParent();
+
+  // If the result of a {s|z}ext and its source are both live out, rewrite all
+  // other uses of the source with result of extension.
+  Value *Src = I->getOperand(0);
+  if (Src->hasOneUse())
+    return false;
+
+  // Only do this xform if truncating is free.
+  if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
+    return false;
+
+  // Only safe to perform the optimization if the source is also defined in
+  // this block.
+  if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
+    return false;
+
+  bool DefIsLiveOut = false;
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+       UI != E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+
+    // Figure out which BB this ext is used in.
+    BasicBlock *UserBB = User->getParent();
+    if (UserBB == DefBB) continue;
+    DefIsLiveOut = true;
+    break;
+  }
+  if (!DefIsLiveOut)
+    return false;
+
+  // Make sure non of the uses are PHI nodes.
+  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
+       UI != E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+    BasicBlock *UserBB = User->getParent();
+    if (UserBB == DefBB) continue;
+    // Be conservative. We don't want this xform to end up introducing
+    // reloads just before load / store instructions.
+    if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
+      return false;
+  }
+
+  // InsertedTruncs - Only insert one trunc in each block once.
+  DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
+
+  bool MadeChange = false;
+  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
+       UI != E; ++UI) {
+    Use &TheUse = UI.getUse();
+    Instruction *User = cast<Instruction>(*UI);
+
+    // Figure out which BB this ext is used in.
+    BasicBlock *UserBB = User->getParent();
+    if (UserBB == DefBB) continue;
+
+    // Both src and def are live in this block. Rewrite the use.
+    Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
+
+    if (!InsertedTrunc) {
+      BasicBlock::iterator InsertPt = UserBB->getFirstNonPHI();
+
+      InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
+    }
+
+    // Replace a use of the {s|z}ext source with a use of the result.
+    TheUse = InsertedTrunc;
+    ++NumExtUses;
+    MadeChange = true;
+  }
+
+  return MadeChange;
+}
+
+bool CodeGenPrepare::OptimizeInst(Instruction *I) {
+  if (PHINode *P = dyn_cast<PHINode>(I)) {
+    // It is possible for very late stage optimizations (such as SimplifyCFG)
+    // to introduce PHI nodes too late to be cleaned up.  If we detect such a
+    // trivial PHI, go ahead and zap it here.
+    if (Value *V = SimplifyInstruction(P)) {
+      P->replaceAllUsesWith(V);
+      P->eraseFromParent();
+      ++NumPHIsElim;
+      return true;
+    }
+    return false;
+  }
+  
+  if (CastInst *CI = dyn_cast<CastInst>(I)) {
+    // If the source of the cast is a constant, then this should have
+    // already been constant folded.  The only reason NOT to constant fold
+    // it is if something (e.g. LSR) was careful to place the constant
+    // evaluation in a block other than then one that uses it (e.g. to hoist
+    // the address of globals out of a loop).  If this is the case, we don't
+    // want to forward-subst the cast.
+    if (isa<Constant>(CI->getOperand(0)))
+      return false;
+
+    if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
+      return true;
+
+    if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
+      bool MadeChange = MoveExtToFormExtLoad(I);
+      return MadeChange | OptimizeExtUses(I);
+    }
+    return false;
+  }
+  
+  if (CmpInst *CI = dyn_cast<CmpInst>(I))
+    return OptimizeCmpExpression(CI);
+  
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    if (TLI)
+      return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
+    return false;
+  }
+  
+  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+    if (TLI)
+      return OptimizeMemoryInst(I, SI->getOperand(1),
+                                SI->getOperand(0)->getType());
+    return false;
+  }
+  
+  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
+    if (GEPI->hasAllZeroIndices()) {
+      /// The GEP operand must be a pointer, so must its result -> BitCast
+      Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
+                                        GEPI->getName(), GEPI);
+      GEPI->replaceAllUsesWith(NC);
+      GEPI->eraseFromParent();
+      ++NumGEPsElim;
+      OptimizeInst(NC);
+      return true;
+    }
+    return false;
+  }
+  
+  if (CallInst *CI = dyn_cast<CallInst>(I))
+    return OptimizeCallInst(CI);
+
+  if (ReturnInst *RI = dyn_cast<ReturnInst>(I))
+    return DupRetToEnableTailCallOpts(RI);
+
+  return false;
+}
+
+// In this pass we look for GEP and cast instructions that are used
+// across basic blocks and rewrite them to improve basic-block-at-a-time
+// selection.
+bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
+  SunkAddrs.clear();
+  bool MadeChange = false;
+
+  CurInstIterator = BB.begin();
+  for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; )
+    MadeChange |= OptimizeInst(CurInstIterator++);
+
+  return MadeChange;
+}
diff --git a/final/lib/Transforms/Scalar/ConstantProp.cpp b/final/lib/Transforms/Scalar/ConstantProp.cpp
new file mode 100644
index 00000000000..664c3f6a222
--- /dev/null
+++ b/final/lib/Transforms/Scalar/ConstantProp.cpp
@@ -0,0 +1,91 @@
+//===- ConstantProp.cpp - Code to perform Simple Constant Propagation -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements constant propagation and merging:
+//
+// Specifically, this:
+//   * Converts instructions like "add int 1, 2" into 3
+//
+// Notice that:
+//   * This pass has a habit of making definitions be dead.  It is a good idea
+//     to run a DIE pass sometime after running this pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "constprop"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Constant.h"
+#include "llvm/Instruction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInstKilled, "Number of instructions killed");
+
+namespace {
+  struct ConstantPropagation : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    ConstantPropagation() : FunctionPass(ID) {
+      initializeConstantPropagationPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+  };
+}
+
+char ConstantPropagation::ID = 0;
+INITIALIZE_PASS(ConstantPropagation, "constprop",
+                "Simple constant propagation", false, false)
+
+FunctionPass *llvm::createConstantPropagationPass() {
+  return new ConstantPropagation();
+}
+
+
+bool ConstantPropagation::runOnFunction(Function &F) {
+  // Initialize the worklist to all of the instructions ready to process...
+  std::set<Instruction*> WorkList;
+  for(inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
+      WorkList.insert(&*i);
+  }
+  bool Changed = false;
+
+  while (!WorkList.empty()) {
+    Instruction *I = *WorkList.begin();
+    WorkList.erase(WorkList.begin());    // Get an element from the worklist...
+
+    if (!I->use_empty())                 // Don't muck with dead instructions...
+      if (Constant *C = ConstantFoldInstruction(I)) {
+        // Add all of the users of this instruction to the worklist, they might
+        // be constant propagatable now...
+        for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+             UI != UE; ++UI)
+          WorkList.insert(cast<Instruction>(*UI));
+
+        // Replace all of the uses of a variable with uses of the constant.
+        I->replaceAllUsesWith(C);
+
+        // Remove the dead instruction.
+        WorkList.erase(I);
+        I->eraseFromParent();
+
+        // We made a change to the function...
+        Changed = true;
+        ++NumInstKilled;
+      }
+  }
+  return Changed;
+}
diff --git a/final/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/final/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
new file mode 100644
index 00000000000..be12973b645
--- /dev/null
+++ b/final/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -0,0 +1,206 @@
+//===- CorrelatedValuePropagation.cpp - Propagate CFG-derived info --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Correlated Value Propagation pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "correlated-value-propagation"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPhis,      "Number of phis propagated");
+STATISTIC(NumSelects,   "Number of selects propagated");
+STATISTIC(NumMemAccess, "Number of memory access targets propagated");
+STATISTIC(NumCmps,      "Number of comparisons propagated");
+
+namespace {
+  class CorrelatedValuePropagation : public FunctionPass {
+    LazyValueInfo *LVI;
+
+    bool processSelect(SelectInst *SI);
+    bool processPHI(PHINode *P);
+    bool processMemAccess(Instruction *I);
+    bool processCmp(CmpInst *C);
+
+  public:
+    static char ID;
+    CorrelatedValuePropagation(): FunctionPass(ID) {
+     initializeCorrelatedValuePropagationPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LazyValueInfo>();
+    }
+  };
+}
+
+char CorrelatedValuePropagation::ID = 0;
+INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation",
+                "Value Propagation", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_END(CorrelatedValuePropagation, "correlated-propagation",
+                "Value Propagation", false, false)
+
+// Public interface to the Value Propagation pass
+Pass *llvm::createCorrelatedValuePropagationPass() {
+  return new CorrelatedValuePropagation();
+}
+
+bool CorrelatedValuePropagation::processSelect(SelectInst *S) {
+  if (S->getType()->isVectorTy()) return false;
+  if (isa<Constant>(S->getOperand(0))) return false;
+
+  Constant *C = LVI->getConstant(S->getOperand(0), S->getParent());
+  if (!C) return false;
+
+  ConstantInt *CI = dyn_cast<ConstantInt>(C);
+  if (!CI) return false;
+
+  Value *ReplaceWith = S->getOperand(1);
+  Value *Other = S->getOperand(2);
+  if (!CI->isOne()) std::swap(ReplaceWith, Other);
+  if (ReplaceWith == S) ReplaceWith = UndefValue::get(S->getType());
+
+  S->replaceAllUsesWith(ReplaceWith);
+  S->eraseFromParent();
+
+  ++NumSelects;
+
+  return true;
+}
+
+bool CorrelatedValuePropagation::processPHI(PHINode *P) {
+  bool Changed = false;
+
+  BasicBlock *BB = P->getParent();
+  for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+    Value *Incoming = P->getIncomingValue(i);
+    if (isa<Constant>(Incoming)) continue;
+
+    Constant *C = LVI->getConstantOnEdge(P->getIncomingValue(i),
+                                         P->getIncomingBlock(i),
+                                         BB);
+    if (!C) continue;
+
+    P->setIncomingValue(i, C);
+    Changed = true;
+  }
+
+  if (Value *V = SimplifyInstruction(P)) {
+    P->replaceAllUsesWith(V);
+    P->eraseFromParent();
+    Changed = true;
+  }
+
+  ++NumPhis;
+
+  return Changed;
+}
+
+bool CorrelatedValuePropagation::processMemAccess(Instruction *I) {
+  Value *Pointer = 0;
+  if (LoadInst *L = dyn_cast<LoadInst>(I))
+    Pointer = L->getPointerOperand();
+  else
+    Pointer = cast<StoreInst>(I)->getPointerOperand();
+
+  if (isa<Constant>(Pointer)) return false;
+
+  Constant *C = LVI->getConstant(Pointer, I->getParent());
+  if (!C) return false;
+
+  ++NumMemAccess;
+  I->replaceUsesOfWith(Pointer, C);
+  return true;
+}
+
+/// processCmp - If the value of this comparison could be determined locally,
+/// constant propagation would already have figured it out.  Instead, walk
+/// the predecessors and statically evaluate the comparison based on information
+/// available on that edge.  If a given static evaluation is true on ALL
+/// incoming edges, then it's true universally and we can simplify the compare.
+bool CorrelatedValuePropagation::processCmp(CmpInst *C) {
+  Value *Op0 = C->getOperand(0);
+  if (isa<Instruction>(Op0) &&
+      cast<Instruction>(Op0)->getParent() == C->getParent())
+    return false;
+
+  Constant *Op1 = dyn_cast<Constant>(C->getOperand(1));
+  if (!Op1) return false;
+
+  pred_iterator PI = pred_begin(C->getParent()), PE = pred_end(C->getParent());
+  if (PI == PE) return false;
+
+  LazyValueInfo::Tristate Result = LVI->getPredicateOnEdge(C->getPredicate(),
+                                    C->getOperand(0), Op1, *PI, C->getParent());
+  if (Result == LazyValueInfo::Unknown) return false;
+
+  ++PI;
+  while (PI != PE) {
+    LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(C->getPredicate(),
+                                    C->getOperand(0), Op1, *PI, C->getParent());
+    if (Res != Result) return false;
+    ++PI;
+  }
+
+  ++NumCmps;
+
+  if (Result == LazyValueInfo::True)
+    C->replaceAllUsesWith(ConstantInt::getTrue(C->getContext()));
+  else
+    C->replaceAllUsesWith(ConstantInt::getFalse(C->getContext()));
+
+  C->eraseFromParent();
+
+  return true;
+}
+
+bool CorrelatedValuePropagation::runOnFunction(Function &F) {
+  LVI = &getAnalysis<LazyValueInfo>();
+
+  bool FnChanged = false;
+
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    bool BBChanged = false;
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) {
+      Instruction *II = BI++;
+      switch (II->getOpcode()) {
+      case Instruction::Select:
+        BBChanged |= processSelect(cast<SelectInst>(II));
+        break;
+      case Instruction::PHI:
+        BBChanged |= processPHI(cast<PHINode>(II));
+        break;
+      case Instruction::ICmp:
+      case Instruction::FCmp:
+        BBChanged |= processCmp(cast<CmpInst>(II));
+        break;
+      case Instruction::Load:
+      case Instruction::Store:
+        BBChanged |= processMemAccess(II);
+        break;
+      }
+    }
+
+    FnChanged |= BBChanged;
+  }
+
+  return FnChanged;
+}
diff --git a/final/lib/Transforms/Scalar/DCE.cpp b/final/lib/Transforms/Scalar/DCE.cpp
new file mode 100644
index 00000000000..dbb68f3e0bd
--- /dev/null
+++ b/final/lib/Transforms/Scalar/DCE.cpp
@@ -0,0 +1,136 @@
+//===- DCE.cpp - Code to perform dead code elimination --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements dead inst elimination and dead code elimination.
+//
+// Dead Inst Elimination performs a single pass over the function removing
+// instructions that are obviously dead.  Dead Code Elimination is similar, but
+// it rechecks instructions that were used by removed instructions to see if
+// they are newly dead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Instruction.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/InstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include <set>
+using namespace llvm;
+
+STATISTIC(DIEEliminated, "Number of insts removed by DIE pass");
+STATISTIC(DCEEliminated, "Number of insts removed");
+
+namespace {
+  //===--------------------------------------------------------------------===//
+  // DeadInstElimination pass implementation
+  //
+  struct DeadInstElimination : public BasicBlockPass {
+    static char ID; // Pass identification, replacement for typeid
+    DeadInstElimination() : BasicBlockPass(ID) {
+      initializeDeadInstEliminationPass(*PassRegistry::getPassRegistry());
+    }
+    virtual bool runOnBasicBlock(BasicBlock &BB) {
+      bool Changed = false;
+      for (BasicBlock::iterator DI = BB.begin(); DI != BB.end(); ) {
+        Instruction *Inst = DI++;
+        if (isInstructionTriviallyDead(Inst)) {
+          Inst->eraseFromParent();
+          Changed = true;
+          ++DIEEliminated;
+        }
+      }
+      return Changed;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+  };
+}
+
+char DeadInstElimination::ID = 0;
+INITIALIZE_PASS(DeadInstElimination, "die",
+                "Dead Instruction Elimination", false, false)
+
+Pass *llvm::createDeadInstEliminationPass() {
+  return new DeadInstElimination();
+}
+
+
+namespace {
+  //===--------------------------------------------------------------------===//
+  // DeadCodeElimination pass implementation
+  //
+  struct DCE : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    DCE() : FunctionPass(ID) {
+      initializeDCEPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+
+     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+ };
+}
+
+char DCE::ID = 0;
+INITIALIZE_PASS(DCE, "dce", "Dead Code Elimination", false, false)
+
+bool DCE::runOnFunction(Function &F) {
+  // Start out with all of the instructions in the worklist...
+  std::vector<Instruction*> WorkList;
+  for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i)
+    WorkList.push_back(&*i);
+
+  // Loop over the worklist finding instructions that are dead.  If they are
+  // dead make them drop all of their uses, making other instructions
+  // potentially dead, and work until the worklist is empty.
+  //
+  bool MadeChange = false;
+  while (!WorkList.empty()) {
+    Instruction *I = WorkList.back();
+    WorkList.pop_back();
+
+    if (isInstructionTriviallyDead(I)) {       // If the instruction is dead.
+      // Loop over all of the values that the instruction uses, if there are
+      // instructions being used, add them to the worklist, because they might
+      // go dead after this one is removed.
+      //
+      for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+        if (Instruction *Used = dyn_cast<Instruction>(*OI))
+          WorkList.push_back(Used);
+
+      // Remove the instruction.
+      I->eraseFromParent();
+
+      // Remove the instruction from the worklist if it still exists in it.
+      for (std::vector<Instruction*>::iterator WI = WorkList.begin();
+           WI != WorkList.end(); ) {
+        if (*WI == I)
+          WI = WorkList.erase(WI);
+        else
+          ++WI;
+      }
+
+      MadeChange = true;
+      ++DCEEliminated;
+    }
+  }
+  return MadeChange;
+}
+
+FunctionPass *llvm::createDeadCodeEliminationPass() {
+  return new DCE();
+}
+
diff --git a/final/lib/Transforms/Scalar/DeadStoreElimination.cpp b/final/lib/Transforms/Scalar/DeadStoreElimination.cpp
new file mode 100644
index 00000000000..b2200872031
--- /dev/null
+++ b/final/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -0,0 +1,741 @@
+//===- DeadStoreElimination.cpp - Fast Dead Store Elimination -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a trivial dead store elimination that only considers
+// basic-block local redundant stores.
+//
+// FIXME: This should eventually be extended to be a post-dominator tree
+// traversal.  Doing so would be pretty trivial.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dse"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumFastStores, "Number of stores deleted");
+STATISTIC(NumFastOther , "Number of other instrs removed");
+
+namespace {
+  struct DSE : public FunctionPass {
+    AliasAnalysis *AA;
+    MemoryDependenceAnalysis *MD;
+
+    static char ID; // Pass identification, replacement for typeid
+    DSE() : FunctionPass(ID), AA(0), MD(0) {
+      initializeDSEPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F) {
+      AA = &getAnalysis<AliasAnalysis>();
+      MD = &getAnalysis<MemoryDependenceAnalysis>();
+      DominatorTree &DT = getAnalysis<DominatorTree>();
+      
+      bool Changed = false;
+      for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+        // Only check non-dead blocks.  Dead blocks may have strange pointer
+        // cycles that will confuse alias analysis.
+        if (DT.isReachableFromEntry(I))
+          Changed |= runOnBasicBlock(*I);
+      
+      AA = 0; MD = 0;
+      return Changed;
+    }
+    
+    bool runOnBasicBlock(BasicBlock &BB);
+    bool HandleFree(CallInst *F);
+    bool handleEndBlock(BasicBlock &BB);
+    void RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+                               SmallPtrSet<Value*, 16> &DeadStackObjects);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<MemoryDependenceAnalysis>();
+      AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<MemoryDependenceAnalysis>();
+    }
+  };
+}
+
+char DSE::ID = 0;
+INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
+
+FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
+
+//===----------------------------------------------------------------------===//
+// Helper functions
+//===----------------------------------------------------------------------===//
+
+/// DeleteDeadInstruction - Delete this instruction.  Before we do, go through
+/// and zero out all the operands of this instruction.  If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+/// If ValueSet is non-null, remove any deleted instructions from it as well.
+///
+static void DeleteDeadInstruction(Instruction *I,
+                                  MemoryDependenceAnalysis &MD,
+                                  SmallPtrSet<Value*, 16> *ValueSet = 0) {
+  SmallVector<Instruction*, 32> NowDeadInsts;
+  
+  NowDeadInsts.push_back(I);
+  --NumFastOther;
+  
+  // Before we touch this instruction, remove it from memdep!
+  do {
+    Instruction *DeadInst = NowDeadInsts.pop_back_val();
+    ++NumFastOther;
+    
+    // This instruction is dead, zap it, in stages.  Start by removing it from
+    // MemDep, which needs to know the operands and needs it to be in the
+    // function.
+    MD.removeInstruction(DeadInst);
+    
+    for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+      Value *Op = DeadInst->getOperand(op);
+      DeadInst->setOperand(op, 0);
+      
+      // If this operand just became dead, add it to the NowDeadInsts list.
+      if (!Op->use_empty()) continue;
+      
+      if (Instruction *OpI = dyn_cast<Instruction>(Op))
+        if (isInstructionTriviallyDead(OpI))
+          NowDeadInsts.push_back(OpI);
+    }
+    
+    DeadInst->eraseFromParent();
+    
+    if (ValueSet) ValueSet->erase(DeadInst);
+  } while (!NowDeadInsts.empty());
+}
+
+
+/// hasMemoryWrite - Does this instruction write some memory?  This only returns
+/// true for things that we can analyze with other helpers below.
+static bool hasMemoryWrite(Instruction *I) {
+  if (isa<StoreInst>(I))
+    return true;
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    switch (II->getIntrinsicID()) {
+    default:
+      return false;
+    case Intrinsic::memset:
+    case Intrinsic::memmove:
+    case Intrinsic::memcpy:
+    case Intrinsic::init_trampoline:
+    case Intrinsic::lifetime_end:
+      return true;
+    }
+  }
+  return false;
+}
+
+/// getLocForWrite - Return a Location stored to by the specified instruction.
+static AliasAnalysis::Location
+getLocForWrite(Instruction *Inst, AliasAnalysis &AA) {
+  if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+    return AA.getLocation(SI);
+  
+  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(Inst)) {
+    // memcpy/memmove/memset.
+    AliasAnalysis::Location Loc = AA.getLocationForDest(MI);
+    // If we don't have target data around, an unknown size in Location means
+    // that we should use the size of the pointee type.  This isn't valid for
+    // memset/memcpy, which writes more than an i8.
+    if (Loc.Size == AliasAnalysis::UnknownSize && AA.getTargetData() == 0)
+      return AliasAnalysis::Location();
+    return Loc;
+  }
+  
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst);
+  if (II == 0) return AliasAnalysis::Location();
+  
+  switch (II->getIntrinsicID()) {
+  default: return AliasAnalysis::Location(); // Unhandled intrinsic.
+  case Intrinsic::init_trampoline:
+    // If we don't have target data around, an unknown size in Location means
+    // that we should use the size of the pointee type.  This isn't valid for
+    // init.trampoline, which writes more than an i8.
+    if (AA.getTargetData() == 0) return AliasAnalysis::Location();
+      
+    // FIXME: We don't know the size of the trampoline, so we can't really
+    // handle it here.
+    return AliasAnalysis::Location(II->getArgOperand(0));
+  case Intrinsic::lifetime_end: {
+    uint64_t Len = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+    return AliasAnalysis::Location(II->getArgOperand(1), Len);
+  }
+  }
+}
+
+/// getLocForRead - Return the location read by the specified "hasMemoryWrite"
+/// instruction if any.
+static AliasAnalysis::Location 
+getLocForRead(Instruction *Inst, AliasAnalysis &AA) {
+  assert(hasMemoryWrite(Inst) && "Unknown instruction case");
+  
+  // The only instructions that both read and write are the mem transfer
+  // instructions (memcpy/memmove).
+  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst))
+    return AA.getLocationForSource(MTI);
+  return AliasAnalysis::Location();
+}
+
+
+/// isRemovable - If the value of this instruction and the memory it writes to
+/// is unused, may we delete this instruction?
+static bool isRemovable(Instruction *I) {
+  // Don't remove volatile stores.
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return !SI->isVolatile();
+  
+  IntrinsicInst *II = cast<IntrinsicInst>(I);
+  switch (II->getIntrinsicID()) {
+  default: assert(0 && "doesn't pass 'hasMemoryWrite' predicate");
+  case Intrinsic::lifetime_end:
+    // Never remove dead lifetime_end's, e.g. because it is followed by a
+    // free.
+    return false;
+  case Intrinsic::init_trampoline:
+    // Always safe to remove init_trampoline.
+    return true;
+    
+  case Intrinsic::memset:
+  case Intrinsic::memmove:
+  case Intrinsic::memcpy:
+    // Don't remove volatile memory intrinsics.
+    return !cast<MemIntrinsic>(II)->isVolatile();
+  }
+}
+
+/// getStoredPointerOperand - Return the pointer that is being written to.
+static Value *getStoredPointerOperand(Instruction *I) {
+  if (StoreInst *SI = dyn_cast<StoreInst>(I))
+    return SI->getPointerOperand();
+  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
+    return MI->getDest();
+
+  IntrinsicInst *II = cast<IntrinsicInst>(I);
+  switch (II->getIntrinsicID()) {
+  default: assert(false && "Unexpected intrinsic!");
+  case Intrinsic::init_trampoline:
+    return II->getArgOperand(0);
+  }
+}
+
+static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) {
+  const TargetData *TD = AA.getTargetData();
+  if (TD == 0)
+    return AliasAnalysis::UnknownSize;
+  
+  if (AllocaInst *A = dyn_cast<AllocaInst>(V)) {
+    // Get size information for the alloca
+    if (ConstantInt *C = dyn_cast<ConstantInt>(A->getArraySize()))
+      return C->getZExtValue() * TD->getTypeAllocSize(A->getAllocatedType());
+    return AliasAnalysis::UnknownSize;
+  }
+  
+  assert(isa<Argument>(V) && "Expected AllocaInst or Argument!");
+  const PointerType *PT = cast<PointerType>(V->getType());
+  return TD->getTypeAllocSize(PT->getElementType());
+}
+
+/// isObjectPointerWithTrustworthySize - Return true if the specified Value* is
+/// pointing to an object with a pointer size we can trust.
+static bool isObjectPointerWithTrustworthySize(const Value *V) {
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+    return !AI->isArrayAllocation();
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    return !GV->mayBeOverridden();
+  if (const Argument *A = dyn_cast<Argument>(V))
+    return A->hasByValAttr();
+  return false;
+}
+
+/// isCompleteOverwrite - Return true if a store to the 'Later' location
+/// completely overwrites a store to the 'Earlier' location.
+static bool isCompleteOverwrite(const AliasAnalysis::Location &Later,
+                                const AliasAnalysis::Location &Earlier,
+                                AliasAnalysis &AA) {
+  const Value *P1 = Earlier.Ptr->stripPointerCasts();
+  const Value *P2 = Later.Ptr->stripPointerCasts();
+  
+  // If the start pointers are the same, we just have to compare sizes to see if
+  // the later store was larger than the earlier store.
+  if (P1 == P2) {
+    // If we don't know the sizes of either access, then we can't do a
+    // comparison.
+    if (Later.Size == AliasAnalysis::UnknownSize ||
+        Earlier.Size == AliasAnalysis::UnknownSize) {
+      // If we have no TargetData information around, then the size of the store
+      // is inferrable from the pointee type.  If they are the same type, then
+      // we know that the store is safe.
+      if (AA.getTargetData() == 0)
+        return Later.Ptr->getType() == Earlier.Ptr->getType();
+      return false;
+    }
+    
+    // Make sure that the Later size is >= the Earlier size.
+    if (Later.Size < Earlier.Size)
+      return false;
+    return true;
+  }
+  
+  // Otherwise, we have to have size information, and the later store has to be
+  // larger than the earlier one.
+  if (Later.Size == AliasAnalysis::UnknownSize ||
+      Earlier.Size == AliasAnalysis::UnknownSize ||
+      Later.Size <= Earlier.Size || AA.getTargetData() == 0)
+    return false;
+  
+  // Check to see if the later store is to the entire object (either a global,
+  // an alloca, or a byval argument).  If so, then it clearly overwrites any
+  // other store to the same object.
+  const TargetData &TD = *AA.getTargetData();
+  
+  const Value *UO1 = GetUnderlyingObject(P1, &TD),
+              *UO2 = GetUnderlyingObject(P2, &TD);
+  
+  // If we can't resolve the same pointers to the same object, then we can't
+  // analyze them at all.
+  if (UO1 != UO2)
+    return false;
+  
+  // If the "Later" store is to a recognizable object, get its size.
+  if (isObjectPointerWithTrustworthySize(UO2)) {
+    uint64_t ObjectSize =
+      TD.getTypeAllocSize(cast<PointerType>(UO2->getType())->getElementType());
+    if (ObjectSize == Later.Size)
+      return true;
+  }
+  
+  // Okay, we have stores to two completely different pointers.  Try to
+  // decompose the pointer into a "base + constant_offset" form.  If the base
+  // pointers are equal, then we can reason about the two stores.
+  int64_t EarlierOff = 0, LaterOff = 0;
+  const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD);
+  const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD);
+  
+  // If the base pointers still differ, we have two completely different stores.
+  if (BP1 != BP2)
+    return false;
+
+  // The later store completely overlaps the earlier store if:
+  // 
+  // 1. Both start at the same offset and the later one's size is greater than
+  //    or equal to the earlier one's, or
+  //
+  //      |--earlier--|
+  //      |--   later   --|
+  //      
+  // 2. The earlier store has an offset greater than the later offset, but which
+  //    still lies completely within the later store.
+  //
+  //        |--earlier--|
+  //    |-----  later  ------|
+  //
+  // We have to be careful here as *Off is signed while *.Size is unsigned.
+  if (EarlierOff >= LaterOff &&
+      uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size)
+    return true;
+
+  // Otherwise, they don't completely overlap.
+  return false;
+}
+
+/// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a
+/// memory region into an identical pointer) then it doesn't actually make its
+/// input dead in the traditional sense.  Consider this case: 
+///
+///   memcpy(A <- B)
+///   memcpy(A <- A)
+///
+/// In this case, the second store to A does not make the first store to A dead.
+/// The usual situation isn't an explicit A<-A store like this (which can be
+/// trivially removed) but a case where two pointers may alias.
+///
+/// This function detects when it is unsafe to remove a dependent instruction
+/// because the DSE inducing instruction may be a self-read.
+static bool isPossibleSelfRead(Instruction *Inst,
+                               const AliasAnalysis::Location &InstStoreLoc,
+                               Instruction *DepWrite, AliasAnalysis &AA) {
+  // Self reads can only happen for instructions that read memory.  Get the
+  // location read.
+  AliasAnalysis::Location InstReadLoc = getLocForRead(Inst, AA);
+  if (InstReadLoc.Ptr == 0) return false;  // Not a reading instruction.
+  
+  // If the read and written loc obviously don't alias, it isn't a read.
+  if (AA.isNoAlias(InstReadLoc, InstStoreLoc)) return false;
+  
+  // Okay, 'Inst' may copy over itself.  However, we can still remove a the
+  // DepWrite instruction if we can prove that it reads from the same location
+  // as Inst.  This handles useful cases like:
+  //   memcpy(A <- B)
+  //   memcpy(A <- B)
+  // Here we don't know if A/B may alias, but we do know that B/B are must
+  // aliases, so removing the first memcpy is safe (assuming it writes <= #
+  // bytes as the second one.
+  AliasAnalysis::Location DepReadLoc = getLocForRead(DepWrite, AA);
+  
+  if (DepReadLoc.Ptr && AA.isMustAlias(InstReadLoc.Ptr, DepReadLoc.Ptr))
+    return false;
+  
+  // If DepWrite doesn't read memory or if we can't prove it is a must alias,
+  // then it can't be considered dead.
+  return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DSE Pass
+//===----------------------------------------------------------------------===//
+
+bool DSE::runOnBasicBlock(BasicBlock &BB) {
+  bool MadeChange = false;
+  
+  // Do a top-down walk on the BB.
+  for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
+    Instruction *Inst = BBI++;
+    
+    // Handle 'free' calls specially.
+    if (CallInst *F = isFreeCall(Inst)) {
+      MadeChange |= HandleFree(F);
+      continue;
+    }
+    
+    // If we find something that writes memory, get its memory dependence.
+    if (!hasMemoryWrite(Inst))
+      continue;
+
+    MemDepResult InstDep = MD->getDependency(Inst);
+    
+    // Ignore non-local store liveness.
+    // FIXME: cross-block DSE would be fun. :)
+    if (InstDep.isNonLocal() || 
+        // Ignore self dependence, which happens in the entry block of the
+        // function.
+        InstDep.getInst() == Inst)
+      continue;
+     
+    // If we're storing the same value back to a pointer that we just
+    // loaded from, then the store can be removed.
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+      if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
+        if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
+            SI->getOperand(0) == DepLoad && !SI->isVolatile()) {
+          DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n  "
+                       << "LOAD: " << *DepLoad << "\n  STORE: " << *SI << '\n');
+          
+          // DeleteDeadInstruction can delete the current instruction.  Save BBI
+          // in case we need it.
+          WeakVH NextInst(BBI);
+          
+          DeleteDeadInstruction(SI, *MD);
+          
+          if (NextInst == 0)  // Next instruction deleted.
+            BBI = BB.begin();
+          else if (BBI != BB.begin())  // Revisit this instruction if possible.
+            --BBI;
+          ++NumFastStores;
+          MadeChange = true;
+          continue;
+        }
+      }
+    }
+    
+    // Figure out what location is being stored to.
+    AliasAnalysis::Location Loc = getLocForWrite(Inst, *AA);
+
+    // If we didn't get a useful location, fail.
+    if (Loc.Ptr == 0)
+      continue;
+    
+    while (!InstDep.isNonLocal()) {
+      // Get the memory clobbered by the instruction we depend on.  MemDep will
+      // skip any instructions that 'Loc' clearly doesn't interact with.  If we
+      // end up depending on a may- or must-aliased load, then we can't optimize
+      // away the store and we bail out.  However, if we depend on on something
+      // that overwrites the memory location we *can* potentially optimize it.
+      //
+      // Find out what memory location the dependant instruction stores.
+      Instruction *DepWrite = InstDep.getInst();
+      AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA);
+      // If we didn't get a useful location, or if it isn't a size, bail out.
+      if (DepLoc.Ptr == 0)
+        break;
+
+      // If we find a write that is a) removable (i.e., non-volatile), b) is
+      // completely obliterated by the store to 'Loc', and c) which we know that
+      // 'Inst' doesn't load from, then we can remove it.
+      if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) &&
+          !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) {
+        DEBUG(dbgs() << "DSE: Remove Dead Store:\n  DEAD: "
+              << *DepWrite << "\n  KILLER: " << *Inst << '\n');
+        
+        // Delete the store and now-dead instructions that feed it.
+        DeleteDeadInstruction(DepWrite, *MD);
+        ++NumFastStores;
+        MadeChange = true;
+        
+        // DeleteDeadInstruction can delete the current instruction in loop
+        // cases, reset BBI.
+        BBI = Inst;
+        if (BBI != BB.begin())
+          --BBI;
+        break;
+      }
+      
+      // If this is a may-aliased store that is clobbering the store value, we
+      // can keep searching past it for another must-aliased pointer that stores
+      // to the same location.  For example, in:
+      //   store -> P
+      //   store -> Q
+      //   store -> P
+      // we can remove the first store to P even though we don't know if P and Q
+      // alias.
+      if (DepWrite == &BB.front()) break;
+      
+      // Can't look past this instruction if it might read 'Loc'.
+      if (AA->getModRefInfo(DepWrite, Loc) & AliasAnalysis::Ref)
+        break;
+        
+      InstDep = MD->getPointerDependencyFrom(Loc, false, DepWrite, &BB);
+    }
+  }
+  
+  // If this block ends in a return, unwind, or unreachable, all allocas are
+  // dead at its end, which means stores to them are also dead.
+  if (BB.getTerminator()->getNumSuccessors() == 0)
+    MadeChange |= handleEndBlock(BB);
+  
+  return MadeChange;
+}
+
+/// HandleFree - Handle frees of entire structures whose dependency is a store
+/// to a field of that structure.
+bool DSE::HandleFree(CallInst *F) {
+  MemDepResult Dep = MD->getDependency(F);
+  do {
+    if (Dep.isNonLocal()) return false;
+    
+    Instruction *Dependency = Dep.getInst();
+    if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
+      return false;
+  
+    Value *DepPointer =
+      GetUnderlyingObject(getStoredPointerOperand(Dependency));
+
+    // Check for aliasing.
+    if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
+      return false;
+  
+    // DCE instructions only used to calculate that store
+    DeleteDeadInstruction(Dependency, *MD);
+    ++NumFastStores;
+
+    // Inst's old Dependency is now deleted. Compute the next dependency,
+    // which may also be dead, as in
+    //    s[0] = 0;
+    //    s[1] = 0; // This has just been deleted.
+    //    free(s);
+    Dep = MD->getDependency(F);
+  } while (!Dep.isNonLocal());
+  
+  return true;
+}
+
+/// handleEndBlock - Remove dead stores to stack-allocated locations in the
+/// function end block.  Ex:
+/// %A = alloca i32
+/// ...
+/// store i32 1, i32* %A
+/// ret void
+bool DSE::handleEndBlock(BasicBlock &BB) {
+  bool MadeChange = false;
+  
+  // Keep track of all of the stack objects that are dead at the end of the
+  // function.
+  SmallPtrSet<Value*, 16> DeadStackObjects;
+  
+  // Find all of the alloca'd pointers in the entry block.
+  BasicBlock *Entry = BB.getParent()->begin();
+  for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I)
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      DeadStackObjects.insert(AI);
+  
+  // Treat byval arguments the same, stores to them are dead at the end of the
+  // function.
+  for (Function::arg_iterator AI = BB.getParent()->arg_begin(),
+       AE = BB.getParent()->arg_end(); AI != AE; ++AI)
+    if (AI->hasByValAttr())
+      DeadStackObjects.insert(AI);
+  
+  // Scan the basic block backwards
+  for (BasicBlock::iterator BBI = BB.end(); BBI != BB.begin(); ){
+    --BBI;
+    
+    // If we find a store, check to see if it points into a dead stack value.
+    if (hasMemoryWrite(BBI) && isRemovable(BBI)) {
+      // See through pointer-to-pointer bitcasts
+      Value *Pointer = GetUnderlyingObject(getStoredPointerOperand(BBI));
+
+      // Stores to stack values are valid candidates for removal.
+      if (DeadStackObjects.count(Pointer)) {
+        Instruction *Dead = BBI++;
+        
+        DEBUG(dbgs() << "DSE: Dead Store at End of Block:\n  DEAD: "
+                     << *Dead << "\n  Object: " << *Pointer << '\n');
+        
+        // DCE instructions only used to calculate that store.
+        DeleteDeadInstruction(Dead, *MD, &DeadStackObjects);
+        ++NumFastStores;
+        MadeChange = true;
+        continue;
+      }
+    }
+    
+    // Remove any dead non-memory-mutating instructions.
+    if (isInstructionTriviallyDead(BBI)) {
+      Instruction *Inst = BBI++;
+      DeleteDeadInstruction(Inst, *MD, &DeadStackObjects);
+      ++NumFastOther;
+      MadeChange = true;
+      continue;
+    }
+    
+    if (AllocaInst *A = dyn_cast<AllocaInst>(BBI)) {
+      DeadStackObjects.erase(A);
+      continue;
+    }
+    
+    if (CallSite CS = cast<Value>(BBI)) {
+      // If this call does not access memory, it can't be loading any of our
+      // pointers.
+      if (AA->doesNotAccessMemory(CS))
+        continue;
+      
+      unsigned NumModRef = 0, NumOther = 0;
+      
+      // If the call might load from any of our allocas, then any store above
+      // the call is live.
+      SmallVector<Value*, 8> LiveAllocas;
+      for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+           E = DeadStackObjects.end(); I != E; ++I) {
+        // If we detect that our AA is imprecise, it's not worth it to scan the
+        // rest of the DeadPointers set.  Just assume that the AA will return
+        // ModRef for everything, and go ahead and bail out.
+        if (NumModRef >= 16 && NumOther == 0)
+          return MadeChange;
+
+        // See if the call site touches it.
+        AliasAnalysis::ModRefResult A = 
+          AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA));
+        
+        if (A == AliasAnalysis::ModRef)
+          ++NumModRef;
+        else
+          ++NumOther;
+        
+        if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
+          LiveAllocas.push_back(*I);
+      }
+      
+      for (SmallVector<Value*, 8>::iterator I = LiveAllocas.begin(),
+           E = LiveAllocas.end(); I != E; ++I)
+        DeadStackObjects.erase(*I);
+      
+      // If all of the allocas were clobbered by the call then we're not going
+      // to find anything else to process.
+      if (DeadStackObjects.empty())
+        return MadeChange;
+      
+      continue;
+    }
+    
+    AliasAnalysis::Location LoadedLoc;
+    
+    // If we encounter a use of the pointer, it is no longer considered dead
+    if (LoadInst *L = dyn_cast<LoadInst>(BBI)) {
+      LoadedLoc = AA->getLocation(L);
+    } else if (VAArgInst *V = dyn_cast<VAArgInst>(BBI)) {
+      LoadedLoc = AA->getLocation(V);
+    } else if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(BBI)) {
+      LoadedLoc = AA->getLocationForSource(MTI);
+    } else {
+      // Not a loading instruction.
+      continue;
+    }
+
+    // Remove any allocas from the DeadPointer set that are loaded, as this
+    // makes any stores above the access live.
+    RemoveAccessedObjects(LoadedLoc, DeadStackObjects);
+
+    // If all of the allocas were clobbered by the access then we're not going
+    // to find anything else to process.
+    if (DeadStackObjects.empty())
+      break;
+  }
+  
+  return MadeChange;
+}
+
+/// RemoveAccessedObjects - Check to see if the specified location may alias any
+/// of the stack objects in the DeadStackObjects set.  If so, they become live
+/// because the location is being loaded.
+void DSE::RemoveAccessedObjects(const AliasAnalysis::Location &LoadedLoc,
+                                SmallPtrSet<Value*, 16> &DeadStackObjects) {
+  const Value *UnderlyingPointer = GetUnderlyingObject(LoadedLoc.Ptr);
+
+  // A constant can't be in the dead pointer set.
+  if (isa<Constant>(UnderlyingPointer))
+    return;
+  
+  // If the kill pointer can be easily reduced to an alloca, don't bother doing
+  // extraneous AA queries.
+  if (isa<AllocaInst>(UnderlyingPointer) || isa<Argument>(UnderlyingPointer)) {
+    DeadStackObjects.erase(const_cast<Value*>(UnderlyingPointer));
+    return;
+  }
+  
+  SmallVector<Value*, 16> NowLive;
+  for (SmallPtrSet<Value*, 16>::iterator I = DeadStackObjects.begin(),
+       E = DeadStackObjects.end(); I != E; ++I) {
+    // See if the loaded location could alias the stack location.
+    AliasAnalysis::Location StackLoc(*I, getPointerSize(*I, *AA));
+    if (!AA->isNoAlias(StackLoc, LoadedLoc))
+      NowLive.push_back(*I);
+  }
+
+  for (SmallVector<Value*, 16>::iterator I = NowLive.begin(), E = NowLive.end();
+       I != E; ++I)
+    DeadStackObjects.erase(*I);
+}
+
diff --git a/final/lib/Transforms/Scalar/EarlyCSE.cpp b/final/lib/Transforms/Scalar/EarlyCSE.cpp
new file mode 100644
index 00000000000..3d3f17b26fc
--- /dev/null
+++ b/final/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -0,0 +1,470 @@
+//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs a simple dominator tree walk that eliminates trivially
+// redundant instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "early-cse"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
+STATISTIC(NumCSE,      "Number of instructions CSE'd");
+STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
+STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
+STATISTIC(NumDSE,      "Number of trivial dead stores removed");
+
+static unsigned getHash(const void *V) {
+  return DenseMapInfo<const void*>::getHashValue(V);
+}
+
+//===----------------------------------------------------------------------===//
+// SimpleValue 
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// SimpleValue - Instances of this struct represent available values in the
+  /// scoped hash table.
+  struct SimpleValue {
+    Instruction *Inst;
+    
+    SimpleValue(Instruction *I) : Inst(I) {
+      assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+    }
+    
+    bool isSentinel() const {
+      return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
+             Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
+    }
+    
+    static bool canHandle(Instruction *Inst) {
+      // This can only handle non-void readnone functions.
+      if (CallInst *CI = dyn_cast<CallInst>(Inst))
+        return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
+      return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
+             isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
+             isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+             isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+             isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
+    }
+  };
+}
+
+namespace llvm {
+// SimpleValue is POD.
+template<> struct isPodLike<SimpleValue> {
+  static const bool value = true;
+};
+
+template<> struct DenseMapInfo<SimpleValue> {
+  static inline SimpleValue getEmptyKey() {
+    return DenseMapInfo<Instruction*>::getEmptyKey();
+  }
+  static inline SimpleValue getTombstoneKey() {
+    return DenseMapInfo<Instruction*>::getTombstoneKey();
+  }
+  static unsigned getHashValue(SimpleValue Val);
+  static bool isEqual(SimpleValue LHS, SimpleValue RHS);
+};
+}
+
+unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+  Instruction *Inst = Val.Inst;
+  
+  // Hash in all of the operands as pointers.
+  unsigned Res = 0;
+  for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+    Res ^= getHash(Inst->getOperand(i)) << i;
+
+  if (CastInst *CI = dyn_cast<CastInst>(Inst))
+    Res ^= getHash(CI->getType());
+  else if (CmpInst *CI = dyn_cast<CmpInst>(Inst))
+    Res ^= CI->getPredicate();
+  else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst)) {
+    for (ExtractValueInst::idx_iterator I = EVI->idx_begin(),
+         E = EVI->idx_end(); I != E; ++I)
+      Res ^= *I;
+  } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst)) {
+    for (InsertValueInst::idx_iterator I = IVI->idx_begin(),
+         E = IVI->idx_end(); I != E; ++I)
+      Res ^= *I;
+  } else {
+    // nothing extra to hash in.
+    assert((isa<CallInst>(Inst) ||
+            isa<BinaryOperator>(Inst) || isa<GetElementPtrInst>(Inst) ||
+            isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+            isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst)) &&
+           "Invalid/unknown instruction");
+  }
+
+  // Mix in the opcode.
+  return (Res << 1) ^ Inst->getOpcode();
+}
+
+bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+
+  if (LHS.isSentinel() || RHS.isSentinel())
+    return LHSI == RHSI;
+  
+  if (LHSI->getOpcode() != RHSI->getOpcode()) return false;
+  return LHSI->isIdenticalTo(RHSI);
+}
+
+//===----------------------------------------------------------------------===//
+// CallValue 
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// CallValue - Instances of this struct represent available call values in
+  /// the scoped hash table.
+  struct CallValue {
+    Instruction *Inst;
+    
+    CallValue(Instruction *I) : Inst(I) {
+      assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+    }
+    
+    bool isSentinel() const {
+      return Inst == DenseMapInfo<Instruction*>::getEmptyKey() ||
+             Inst == DenseMapInfo<Instruction*>::getTombstoneKey();
+    }
+    
+    static bool canHandle(Instruction *Inst) {
+      // Don't value number anything that returns void.
+      if (Inst->getType()->isVoidTy())
+        return false;
+      
+      CallInst *CI = dyn_cast<CallInst>(Inst);
+      if (CI == 0 || !CI->onlyReadsMemory())
+        return false;
+      return true;
+    }
+  };
+}
+
+namespace llvm {
+  // CallValue is POD.
+  template<> struct isPodLike<CallValue> {
+    static const bool value = true;
+  };
+  
+  template<> struct DenseMapInfo<CallValue> {
+    static inline CallValue getEmptyKey() {
+      return DenseMapInfo<Instruction*>::getEmptyKey();
+    }
+    static inline CallValue getTombstoneKey() {
+      return DenseMapInfo<Instruction*>::getTombstoneKey();
+    }
+    static unsigned getHashValue(CallValue Val);
+    static bool isEqual(CallValue LHS, CallValue RHS);
+  };
+}
+unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
+  Instruction *Inst = Val.Inst;
+  // Hash in all of the operands as pointers.
+  unsigned Res = 0;
+  for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i) {
+    assert(!Inst->getOperand(i)->getType()->isMetadataTy() &&
+           "Cannot value number calls with metadata operands");
+    Res ^= getHash(Inst->getOperand(i)) << i;
+  }
+  
+  // Mix in the opcode.
+  return (Res << 1) ^ Inst->getOpcode();
+}
+
+bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
+  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+  if (LHS.isSentinel() || RHS.isSentinel())
+    return LHSI == RHSI;
+  return LHSI->isIdenticalTo(RHSI);
+}
+
+
+//===----------------------------------------------------------------------===//
+// EarlyCSE pass. 
+//===----------------------------------------------------------------------===//
+
+namespace {
+  
+/// EarlyCSE - This pass does a simple depth-first walk over the dominator
+/// tree, eliminating trivially redundant instructions and using instsimplify
+/// to canonicalize things as it goes.  It is intended to be fast and catch
+/// obvious cases so that instcombine and other passes are more effective.  It
+/// is expected that a later pass of GVN will catch the interesting/hard
+/// cases.
+class EarlyCSE : public FunctionPass {
+public:
+  const TargetData *TD;
+  DominatorTree *DT;
+  typedef RecyclingAllocator<BumpPtrAllocator,
+                      ScopedHashTableVal<SimpleValue, Value*> > AllocatorTy;
+  typedef ScopedHashTable<SimpleValue, Value*, DenseMapInfo<SimpleValue>,
+                          AllocatorTy> ScopedHTType;
+  
+  /// AvailableValues - This scoped hash table contains the current values of
+  /// all of our simple scalar expressions.  As we walk down the domtree, we
+  /// look to see if instructions are in this: if so, we replace them with what
+  /// we find, otherwise we insert them so that dominated values can succeed in
+  /// their lookup.
+  ScopedHTType *AvailableValues;
+  
+  /// AvailableLoads - This scoped hash table contains the current values
+  /// of loads.  This allows us to get efficient access to dominating loads when
+  /// we have a fully redundant load.  In addition to the most recent load, we
+  /// keep track of a generation count of the read, which is compared against
+  /// the current generation count.  The current generation count is
+  /// incremented after every possibly writing memory operation, which ensures
+  /// that we only CSE loads with other loads that have no intervening store.
+  typedef RecyclingAllocator<BumpPtrAllocator,
+    ScopedHashTableVal<Value*, std::pair<Value*, unsigned> > > LoadMapAllocator;
+  typedef ScopedHashTable<Value*, std::pair<Value*, unsigned>,
+                          DenseMapInfo<Value*>, LoadMapAllocator> LoadHTType;
+  LoadHTType *AvailableLoads;
+  
+  /// AvailableCalls - This scoped hash table contains the current values
+  /// of read-only call values.  It uses the same generation count as loads.
+  typedef ScopedHashTable<CallValue, std::pair<Value*, unsigned> > CallHTType;
+  CallHTType *AvailableCalls;
+  
+  /// CurrentGeneration - This is the current generation of the memory value.
+  unsigned CurrentGeneration;
+  
+  static char ID;
+  explicit EarlyCSE() : FunctionPass(ID) {
+    initializeEarlyCSEPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F);
+
+private:
+  
+  bool processNode(DomTreeNode *Node);
+  
+  // This transformation requires dominator postdominator info
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequired<DominatorTree>();
+    AU.setPreservesCFG();
+  }
+};
+}
+
+char EarlyCSE::ID = 0;
+
+// createEarlyCSEPass - The public interface to this file.
+FunctionPass *llvm::createEarlyCSEPass() {
+  return new EarlyCSE();
+}
+
+INITIALIZE_PASS_BEGIN(EarlyCSE, "early-cse", "Early CSE", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(EarlyCSE, "early-cse", "Early CSE", false, false)
+
+bool EarlyCSE::processNode(DomTreeNode *Node) {
+  // Define a scope in the scoped hash table.  When we are done processing this
+  // domtree node and recurse back up to our parent domtree node, this will pop
+  // off all the values we install.
+  ScopedHTType::ScopeTy Scope(*AvailableValues);
+  
+  // Define a scope for the load values so that anything we add will get
+  // popped when we recurse back up to our parent domtree node.
+  LoadHTType::ScopeTy LoadScope(*AvailableLoads);
+  
+  // Define a scope for the call values so that anything we add will get
+  // popped when we recurse back up to our parent domtree node.
+  CallHTType::ScopeTy CallScope(*AvailableCalls);
+  
+  BasicBlock *BB = Node->getBlock();
+  
+  // If this block has a single predecessor, then the predecessor is the parent
+  // of the domtree node and all of the live out memory values are still current
+  // in this block.  If this block has multiple predecessors, then they could
+  // have invalidated the live-out memory values of our parent value.  For now,
+  // just be conservative and invalidate memory if this block has multiple
+  // predecessors.
+  if (BB->getSinglePredecessor() == 0)
+    ++CurrentGeneration;
+  
+  /// LastStore - Keep track of the last non-volatile store that we saw... for
+  /// as long as there in no instruction that reads memory.  If we see a store
+  /// to the same location, we delete the dead store.  This zaps trivial dead
+  /// stores which can occur in bitfield code among other things.
+  StoreInst *LastStore = 0;
+  
+  bool Changed = false;
+
+  // See if any instructions in the block can be eliminated.  If so, do it.  If
+  // not, add them to AvailableValues.
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+    Instruction *Inst = I++;
+    
+    // Dead instructions should just be removed.
+    if (isInstructionTriviallyDead(Inst)) {
+      DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
+      Inst->eraseFromParent();
+      Changed = true;
+      ++NumSimplify;
+      continue;
+    }
+    
+    // If the instruction can be simplified (e.g. X+0 = X) then replace it with
+    // its simpler value.
+    if (Value *V = SimplifyInstruction(Inst, TD, DT)) {
+      DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');
+      Inst->replaceAllUsesWith(V);
+      Inst->eraseFromParent();
+      Changed = true;
+      ++NumSimplify;
+      continue;
+    }
+    
+    // If this is a simple instruction that we can value number, process it.
+    if (SimpleValue::canHandle(Inst)) {
+      // See if the instruction has an available value.  If so, use it.
+      if (Value *V = AvailableValues->lookup(Inst)) {
+        DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << "  to: " << *V << '\n');
+        Inst->replaceAllUsesWith(V);
+        Inst->eraseFromParent();
+        Changed = true;
+        ++NumCSE;
+        continue;
+      }
+      
+      // Otherwise, just remember that this value is available.
+      AvailableValues->insert(Inst, Inst);
+      continue;
+    }
+    
+    // If this is a non-volatile load, process it.
+    if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+      // Ignore volatile loads.
+      if (LI->isVolatile()) {
+        LastStore = 0;
+        continue;
+      }
+      
+      // If we have an available version of this load, and if it is the right
+      // generation, replace this instruction.
+      std::pair<Value*, unsigned> InVal =
+        AvailableLoads->lookup(Inst->getOperand(0));
+      if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+        DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst << "  to: "
+              << *InVal.first << '\n');
+        if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+        Inst->eraseFromParent();
+        Changed = true;
+        ++NumCSELoad;
+        continue;
+      }
+      
+      // Otherwise, remember that we have this instruction.
+      AvailableLoads->insert(Inst->getOperand(0),
+                          std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+      LastStore = 0;
+      continue;
+    }
+    
+    // If this instruction may read from memory, forget LastStore.
+    if (Inst->mayReadFromMemory())
+      LastStore = 0;
+    
+    // If this is a read-only call, process it.
+    if (CallValue::canHandle(Inst)) {
+      // If we have an available version of this call, and if it is the right
+      // generation, replace this instruction.
+      std::pair<Value*, unsigned> InVal = AvailableCalls->lookup(Inst);
+      if (InVal.first != 0 && InVal.second == CurrentGeneration) {
+        DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst << "  to: "
+                     << *InVal.first << '\n');
+        if (!Inst->use_empty()) Inst->replaceAllUsesWith(InVal.first);
+        Inst->eraseFromParent();
+        Changed = true;
+        ++NumCSECall;
+        continue;
+      }
+      
+      // Otherwise, remember that we have this instruction.
+      AvailableCalls->insert(Inst,
+                         std::pair<Value*, unsigned>(Inst, CurrentGeneration));
+      continue;
+    }
+    
+    // Okay, this isn't something we can CSE at all.  Check to see if it is
+    // something that could modify memory.  If so, our available memory values
+    // cannot be used so bump the generation count.
+    if (Inst->mayWriteToMemory()) {
+      ++CurrentGeneration;
+     
+      if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        // We do a trivial form of DSE if there are two stores to the same
+        // location with no intervening loads.  Delete the earlier store.
+        if (LastStore &&
+            LastStore->getPointerOperand() == SI->getPointerOperand()) {
+          DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << "  due to: "
+                       << *Inst << '\n');
+          LastStore->eraseFromParent();
+          Changed = true;
+          ++NumDSE;
+          LastStore = 0;
+          continue;
+        }
+        
+        // Okay, we just invalidated anything we knew about loaded values.  Try
+        // to salvage *something* by remembering that the stored value is a live
+        // version of the pointer.  It is safe to forward from volatile stores
+        // to non-volatile loads, so we don't have to check for volatility of
+        // the store.
+        AvailableLoads->insert(SI->getPointerOperand(),
+         std::pair<Value*, unsigned>(SI->getValueOperand(), CurrentGeneration));
+        
+        // Remember that this was the last store we saw for DSE.
+        if (!SI->isVolatile())
+          LastStore = SI;
+      }
+    }
+  }
+  
+  unsigned LiveOutGeneration = CurrentGeneration;
+  for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I) {
+    Changed |= processNode(*I);
+    // Pop any generation changes off the stack from the recursive walk.
+    CurrentGeneration = LiveOutGeneration;
+  }
+  return Changed;
+}
+
+
+bool EarlyCSE::runOnFunction(Function &F) {
+  TD = getAnalysisIfAvailable<TargetData>();
+  DT = &getAnalysis<DominatorTree>();
+  
+  // Tables that the pass uses when walking the domtree.
+  ScopedHTType AVTable;
+  AvailableValues = &AVTable;
+  LoadHTType LoadTable;
+  AvailableLoads = &LoadTable;
+  CallHTType CallTable;
+  AvailableCalls = &CallTable;
+  
+  CurrentGeneration = 0;
+  return processNode(DT->getRootNode());
+}
diff --git a/final/lib/Transforms/Scalar/GVN.cpp b/final/lib/Transforms/Scalar/GVN.cpp
new file mode 100644
index 00000000000..a0123f58981
--- /dev/null
+++ b/final/lib/Transforms/Scalar/GVN.cpp
@@ -0,0 +1,2044 @@
+//===- GVN.cpp - Eliminate redundant values and loads ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global value numbering to eliminate fully redundant
+// instructions.  It also performs simple dead load elimination.
+//
+// Note that this pass does the value numbering itself; it does not use the
+// ValueNumbering analysis passes.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "gvn"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+using namespace llvm;
+
+STATISTIC(NumGVNInstr,  "Number of instructions deleted");
+STATISTIC(NumGVNLoad,   "Number of loads deleted");
+STATISTIC(NumGVNPRE,    "Number of instructions PRE'd");
+STATISTIC(NumGVNBlocks, "Number of blocks merged");
+STATISTIC(NumPRELoad,   "Number of loads PRE'd");
+
+static cl::opt<bool> EnablePRE("enable-pre",
+                               cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
+
+//===----------------------------------------------------------------------===//
+//                         ValueTable Class
+//===----------------------------------------------------------------------===//
+
+/// This class holds the mapping between values and value numbers.  It is used
+/// as an efficient mechanism to determine the expression-wise equivalence of
+/// two values.
+namespace {
+  struct Expression {
+    uint32_t opcode;
+    const Type* type;
+    SmallVector<uint32_t, 4> varargs;
+
+    Expression() { }
+    Expression(uint32_t o) : opcode(o) { }
+
+    bool operator==(const Expression &other) const {
+      if (opcode != other.opcode)
+        return false;
+      else if (opcode == ~0U || opcode == ~1U)
+        return true;
+      else if (type != other.type)
+        return false;
+      else if (varargs != other.varargs)
+        return false;
+      return true;
+    }
+  };
+
+  class ValueTable {
+    private:
+      DenseMap<Value*, uint32_t> valueNumbering;
+      DenseMap<Expression, uint32_t> expressionNumbering;
+      AliasAnalysis* AA;
+      MemoryDependenceAnalysis* MD;
+      DominatorTree* DT;
+
+      uint32_t nextValueNumber;
+
+      Expression create_expression(Instruction* I);
+      uint32_t lookup_or_add_call(CallInst* C);
+    public:
+      ValueTable() : nextValueNumber(1) { }
+      uint32_t lookup_or_add(Value *V);
+      uint32_t lookup(Value *V) const;
+      void add(Value *V, uint32_t num);
+      void clear();
+      void erase(Value *v);
+      void setAliasAnalysis(AliasAnalysis* A) { AA = A; }
+      AliasAnalysis *getAliasAnalysis() const { return AA; }
+      void setMemDep(MemoryDependenceAnalysis* M) { MD = M; }
+      void setDomTree(DominatorTree* D) { DT = D; }
+      uint32_t getNextUnusedValueNumber() { return nextValueNumber; }
+      void verifyRemoved(const Value *) const;
+  };
+}
+
+namespace llvm {
+template <> struct DenseMapInfo<Expression> {
+  static inline Expression getEmptyKey() {
+    return ~0U;
+  }
+
+  static inline Expression getTombstoneKey() {
+    return ~1U;
+  }
+
+  static unsigned getHashValue(const Expression e) {
+    unsigned hash = e.opcode;
+
+    hash = ((unsigned)((uintptr_t)e.type >> 4) ^
+            (unsigned)((uintptr_t)e.type >> 9));
+
+    for (SmallVector<uint32_t, 4>::const_iterator I = e.varargs.begin(),
+         E = e.varargs.end(); I != E; ++I)
+      hash = *I + hash * 37;
+    
+    return hash;
+  }
+  static bool isEqual(const Expression &LHS, const Expression &RHS) {
+    return LHS == RHS;
+  }
+};
+
+}
+
+//===----------------------------------------------------------------------===//
+//                     ValueTable Internal Functions
+//===----------------------------------------------------------------------===//
+
+
+Expression ValueTable::create_expression(Instruction *I) {
+  Expression e;
+  e.type = I->getType();
+  e.opcode = I->getOpcode();
+  for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+       OI != OE; ++OI)
+    e.varargs.push_back(lookup_or_add(*OI));
+  
+  if (CmpInst *C = dyn_cast<CmpInst>(I))
+    e.opcode = (C->getOpcode() << 8) | C->getPredicate();
+  else if (ExtractValueInst *E = dyn_cast<ExtractValueInst>(I)) {
+    for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+         II != IE; ++II)
+      e.varargs.push_back(*II);
+  } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
+    for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
+         II != IE; ++II)
+      e.varargs.push_back(*II);
+  }
+  
+  return e;
+}
+
+//===----------------------------------------------------------------------===//
+//                     ValueTable External Functions
+//===----------------------------------------------------------------------===//
+
+/// add - Insert a value into the table with a specified value number.
+void ValueTable::add(Value *V, uint32_t num) {
+  valueNumbering.insert(std::make_pair(V, num));
+}
+
+uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
+  if (AA->doesNotAccessMemory(C)) {
+    Expression exp = create_expression(C);
+    uint32_t& e = expressionNumbering[exp];
+    if (!e) e = nextValueNumber++;
+    valueNumbering[C] = e;
+    return e;
+  } else if (AA->onlyReadsMemory(C)) {
+    Expression exp = create_expression(C);
+    uint32_t& e = expressionNumbering[exp];
+    if (!e) {
+      e = nextValueNumber++;
+      valueNumbering[C] = e;
+      return e;
+    }
+    if (!MD) {
+      e = nextValueNumber++;
+      valueNumbering[C] = e;
+      return e;
+    }
+
+    MemDepResult local_dep = MD->getDependency(C);
+
+    if (!local_dep.isDef() && !local_dep.isNonLocal()) {
+      valueNumbering[C] =  nextValueNumber;
+      return nextValueNumber++;
+    }
+
+    if (local_dep.isDef()) {
+      CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
+
+      if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
+        valueNumbering[C] = nextValueNumber;
+        return nextValueNumber++;
+      }
+
+      for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+        uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+        uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
+        if (c_vn != cd_vn) {
+          valueNumbering[C] = nextValueNumber;
+          return nextValueNumber++;
+        }
+      }
+
+      uint32_t v = lookup_or_add(local_cdep);
+      valueNumbering[C] = v;
+      return v;
+    }
+
+    // Non-local case.
+    const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
+      MD->getNonLocalCallDependency(CallSite(C));
+    // FIXME: call/call dependencies for readonly calls should return def, not
+    // clobber!  Move the checking logic to MemDep!
+    CallInst* cdep = 0;
+
+    // Check to see if we have a single dominating call instruction that is
+    // identical to C.
+    for (unsigned i = 0, e = deps.size(); i != e; ++i) {
+      const NonLocalDepEntry *I = &deps[i];
+      // Ignore non-local dependencies.
+      if (I->getResult().isNonLocal())
+        continue;
+
+      // We don't handle non-depedencies.  If we already have a call, reject
+      // instruction dependencies.
+      if (I->getResult().isClobber() || cdep != 0) {
+        cdep = 0;
+        break;
+      }
+
+      CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
+      // FIXME: All duplicated with non-local case.
+      if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
+        cdep = NonLocalDepCall;
+        continue;
+      }
+
+      cdep = 0;
+      break;
+    }
+
+    if (!cdep) {
+      valueNumbering[C] = nextValueNumber;
+      return nextValueNumber++;
+    }
+
+    if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
+      valueNumbering[C] = nextValueNumber;
+      return nextValueNumber++;
+    }
+    for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+      uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+      uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
+      if (c_vn != cd_vn) {
+        valueNumbering[C] = nextValueNumber;
+        return nextValueNumber++;
+      }
+    }
+
+    uint32_t v = lookup_or_add(cdep);
+    valueNumbering[C] = v;
+    return v;
+
+  } else {
+    valueNumbering[C] = nextValueNumber;
+    return nextValueNumber++;
+  }
+}
+
+/// lookup_or_add - Returns the value number for the specified value, assigning
+/// it a new number if it did not have one before.
+uint32_t ValueTable::lookup_or_add(Value *V) {
+  DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
+  if (VI != valueNumbering.end())
+    return VI->second;
+
+  if (!isa<Instruction>(V)) {
+    valueNumbering[V] = nextValueNumber;
+    return nextValueNumber++;
+  }
+  
+  Instruction* I = cast<Instruction>(V);
+  Expression exp;
+  switch (I->getOpcode()) {
+    case Instruction::Call:
+      return lookup_or_add_call(cast<CallInst>(I));
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::FDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::And:
+    case Instruction::Or :
+    case Instruction::Xor:
+    case Instruction::ICmp:
+    case Instruction::FCmp:
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::BitCast:
+    case Instruction::Select:
+    case Instruction::ExtractElement:
+    case Instruction::InsertElement:
+    case Instruction::ShuffleVector:
+    case Instruction::ExtractValue:
+    case Instruction::InsertValue:
+    case Instruction::GetElementPtr:
+      exp = create_expression(I);
+      break;
+    default:
+      valueNumbering[V] = nextValueNumber;
+      return nextValueNumber++;
+  }
+
+  uint32_t& e = expressionNumbering[exp];
+  if (!e) e = nextValueNumber++;
+  valueNumbering[V] = e;
+  return e;
+}
+
+/// lookup - Returns the value number of the specified value. Fails if
+/// the value has not yet been numbered.
+uint32_t ValueTable::lookup(Value *V) const {
+  DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
+  assert(VI != valueNumbering.end() && "Value not numbered?");
+  return VI->second;
+}
+
+/// clear - Remove all entries from the ValueTable
+void ValueTable::clear() {
+  valueNumbering.clear();
+  expressionNumbering.clear();
+  nextValueNumber = 1;
+}
+
+/// erase - Remove a value from the value numbering
+void ValueTable::erase(Value *V) {
+  valueNumbering.erase(V);
+}
+
+/// verifyRemoved - Verify that the value is removed from all internal data
+/// structures.
+void ValueTable::verifyRemoved(const Value *V) const {
+  for (DenseMap<Value*, uint32_t>::const_iterator
+         I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
+    assert(I->first != V && "Inst still occurs in value numbering map!");
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                                GVN Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+  class GVN : public FunctionPass {
+    bool runOnFunction(Function &F);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit GVN(bool noloads = false)
+        : FunctionPass(ID), NoLoads(noloads), MD(0) {
+      initializeGVNPass(*PassRegistry::getPassRegistry());
+    }
+
+  private:
+    bool NoLoads;
+    MemoryDependenceAnalysis *MD;
+    DominatorTree *DT;
+    const TargetData* TD;
+
+    ValueTable VN;
+    
+    /// LeaderTable - A mapping from value numbers to lists of Value*'s that
+    /// have that value number.  Use findLeader to query it.
+    struct LeaderTableEntry {
+      Value *Val;
+      BasicBlock *BB;
+      LeaderTableEntry *Next;
+    };
+    DenseMap<uint32_t, LeaderTableEntry> LeaderTable;
+    BumpPtrAllocator TableAllocator;
+    
+    /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for
+    /// its value number.
+    void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+      LeaderTableEntry& Curr = LeaderTable[N];
+      if (!Curr.Val) {
+        Curr.Val = V;
+        Curr.BB = BB;
+        return;
+      }
+      
+      LeaderTableEntry* Node = TableAllocator.Allocate<LeaderTableEntry>();
+      Node->Val = V;
+      Node->BB = BB;
+      Node->Next = Curr.Next;
+      Curr.Next = Node;
+    }
+    
+    /// removeFromLeaderTable - Scan the list of values corresponding to a given
+    /// value number, and remove the given value if encountered.
+    void removeFromLeaderTable(uint32_t N, Value *V, BasicBlock *BB) {
+      LeaderTableEntry* Prev = 0;
+      LeaderTableEntry* Curr = &LeaderTable[N];
+
+      while (Curr->Val != V || Curr->BB != BB) {
+        Prev = Curr;
+        Curr = Curr->Next;
+      }
+      
+      if (Prev) {
+        Prev->Next = Curr->Next;
+      } else {
+        if (!Curr->Next) {
+          Curr->Val = 0;
+          Curr->BB = 0;
+        } else {
+          LeaderTableEntry* Next = Curr->Next;
+          Curr->Val = Next->Val;
+          Curr->BB = Next->BB;
+          Curr->Next = Next->Next;
+        }
+      }
+    }
+
+    // List of critical edges to be split between iterations.
+    SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
+
+    // This transformation requires dominator postdominator info
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+      if (!NoLoads)
+        AU.addRequired<MemoryDependenceAnalysis>();
+      AU.addRequired<AliasAnalysis>();
+
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<AliasAnalysis>();
+    }
+
+    // Helper fuctions
+    // FIXME: eliminate or document these better
+    bool processLoad(LoadInst* L,
+                     SmallVectorImpl<Instruction*> &toErase);
+    bool processInstruction(Instruction *I,
+                            SmallVectorImpl<Instruction*> &toErase);
+    bool processNonLocalLoad(LoadInst* L,
+                             SmallVectorImpl<Instruction*> &toErase);
+    bool processBlock(BasicBlock *BB);
+    void dump(DenseMap<uint32_t, Value*>& d);
+    bool iterateOnFunction(Function &F);
+    bool performPRE(Function& F);
+    Value *findLeader(BasicBlock *BB, uint32_t num);
+    void cleanupGlobalSets();
+    void verifyRemoved(const Instruction *I) const;
+    bool splitCriticalEdges();
+  };
+
+  char GVN::ID = 0;
+}
+
+// createGVNPass - The public interface to this file...
+FunctionPass *llvm::createGVNPass(bool NoLoads) {
+  return new GVN(NoLoads);
+}
+
+INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(GVN, "gvn", "Global Value Numbering", false, false)
+
+void GVN::dump(DenseMap<uint32_t, Value*>& d) {
+  errs() << "{\n";
+  for (DenseMap<uint32_t, Value*>::iterator I = d.begin(),
+       E = d.end(); I != E; ++I) {
+      errs() << I->first << "\n";
+      I->second->dump();
+  }
+  errs() << "}\n";
+}
+
+/// IsValueFullyAvailableInBlock - Return true if we can prove that the value
+/// we're analyzing is fully available in the specified block.  As we go, keep
+/// track of which blocks we know are fully alive in FullyAvailableBlocks.  This
+/// map is actually a tri-state map with the following values:
+///   0) we know the block *is not* fully available.
+///   1) we know the block *is* fully available.
+///   2) we do not know whether the block is fully available or not, but we are
+///      currently speculating that it will be.
+///   3) we are speculating for this block and have used that to speculate for
+///      other blocks.
+static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
+                            DenseMap<BasicBlock*, char> &FullyAvailableBlocks) {
+  // Optimistically assume that the block is fully available and check to see
+  // if we already know about this block in one lookup.
+  std::pair<DenseMap<BasicBlock*, char>::iterator, char> IV =
+    FullyAvailableBlocks.insert(std::make_pair(BB, 2));
+
+  // If the entry already existed for this block, return the precomputed value.
+  if (!IV.second) {
+    // If this is a speculative "available" value, mark it as being used for
+    // speculation of other blocks.
+    if (IV.first->second == 2)
+      IV.first->second = 3;
+    return IV.first->second != 0;
+  }
+
+  // Otherwise, see if it is fully available in all predecessors.
+  pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+  // If this block has no predecessors, it isn't live-in here.
+  if (PI == PE)
+    goto SpeculationFailure;
+
+  for (; PI != PE; ++PI)
+    // If the value isn't fully available in one of our predecessors, then it
+    // isn't fully available in this block either.  Undo our previous
+    // optimistic assumption and bail out.
+    if (!IsValueFullyAvailableInBlock(*PI, FullyAvailableBlocks))
+      goto SpeculationFailure;
+
+  return true;
+
+// SpeculationFailure - If we get here, we found out that this is not, after
+// all, a fully-available block.  We have a problem if we speculated on this and
+// used the speculation to mark other blocks as available.
+SpeculationFailure:
+  char &BBVal = FullyAvailableBlocks[BB];
+
+  // If we didn't speculate on this, just return with it set to false.
+  if (BBVal == 2) {
+    BBVal = 0;
+    return false;
+  }
+
+  // If we did speculate on this value, we could have blocks set to 1 that are
+  // incorrect.  Walk the (transitive) successors of this block and mark them as
+  // 0 if set to one.
+  SmallVector<BasicBlock*, 32> BBWorklist;
+  BBWorklist.push_back(BB);
+
+  do {
+    BasicBlock *Entry = BBWorklist.pop_back_val();
+    // Note that this sets blocks to 0 (unavailable) if they happen to not
+    // already be in FullyAvailableBlocks.  This is safe.
+    char &EntryVal = FullyAvailableBlocks[Entry];
+    if (EntryVal == 0) continue;  // Already unavailable.
+
+    // Mark as unavailable.
+    EntryVal = 0;
+
+    for (succ_iterator I = succ_begin(Entry), E = succ_end(Entry); I != E; ++I)
+      BBWorklist.push_back(*I);
+  } while (!BBWorklist.empty());
+
+  return false;
+}
+
+
+/// CanCoerceMustAliasedValueToLoad - Return true if
+/// CoerceAvailableValueToLoadType will succeed.
+static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
+                                            const Type *LoadTy,
+                                            const TargetData &TD) {
+  // If the loaded or stored value is an first class array or struct, don't try
+  // to transform them.  We need to be able to bitcast to integer.
+  if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
+      StoredVal->getType()->isStructTy() ||
+      StoredVal->getType()->isArrayTy())
+    return false;
+  
+  // The store has to be at least as big as the load.
+  if (TD.getTypeSizeInBits(StoredVal->getType()) <
+        TD.getTypeSizeInBits(LoadTy))
+    return false;
+  
+  return true;
+}
+  
+
+/// CoerceAvailableValueToLoadType - If we saw a store of a value to memory, and
+/// then a load from a must-aliased pointer of a different type, try to coerce
+/// the stored value.  LoadedTy is the type of the load we want to replace and
+/// InsertPt is the place to insert new instructions.
+///
+/// If we can't do it, return null.
+static Value *CoerceAvailableValueToLoadType(Value *StoredVal, 
+                                             const Type *LoadedTy,
+                                             Instruction *InsertPt,
+                                             const TargetData &TD) {
+  if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD))
+    return 0;
+  
+  const Type *StoredValTy = StoredVal->getType();
+  
+  uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
+  uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
+  
+  // If the store and reload are the same size, we can always reuse it.
+  if (StoreSize == LoadSize) {
+    if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) {
+      // Pointer to Pointer -> use bitcast.
+      return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
+    }
+    
+    // Convert source pointers to integers, which can be bitcast.
+    if (StoredValTy->isPointerTy()) {
+      StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+      StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+    }
+    
+    const Type *TypeToCastTo = LoadedTy;
+    if (TypeToCastTo->isPointerTy())
+      TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
+    
+    if (StoredValTy != TypeToCastTo)
+      StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
+    
+    // Cast to pointer if the load needs a pointer type.
+    if (LoadedTy->isPointerTy())
+      StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
+    
+    return StoredVal;
+  }
+  
+  // If the loaded value is smaller than the available value, then we can
+  // extract out a piece from it.  If the available value is too small, then we
+  // can't do anything.
+  assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
+  
+  // Convert source pointers to integers, which can be manipulated.
+  if (StoredValTy->isPointerTy()) {
+    StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
+    StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
+  }
+  
+  // Convert vectors and fp to integer, which can be manipulated.
+  if (!StoredValTy->isIntegerTy()) {
+    StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
+    StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
+  }
+  
+  // If this is a big-endian system, we need to shift the value down to the low
+  // bits so that a truncate will work.
+  if (TD.isBigEndian()) {
+    Constant *Val = ConstantInt::get(StoredVal->getType(), StoreSize-LoadSize);
+    StoredVal = BinaryOperator::CreateLShr(StoredVal, Val, "tmp", InsertPt);
+  }
+  
+  // Truncate the integer to the right size now.
+  const Type *NewIntTy = IntegerType::get(StoredValTy->getContext(), LoadSize);
+  StoredVal = new TruncInst(StoredVal, NewIntTy, "trunc", InsertPt);
+  
+  if (LoadedTy == NewIntTy)
+    return StoredVal;
+  
+  // If the result is a pointer, inttoptr.
+  if (LoadedTy->isPointerTy())
+    return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
+  
+  // Otherwise, bitcast.
+  return new BitCastInst(StoredVal, LoadedTy, "bitcast", InsertPt);
+}
+
+/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering memory write (store,
+/// memset, memcpy, memmove).  This means that the write *may* provide bits used
+/// by the load but we can't be sure because the pointers don't mustalias.
+///
+/// Check this case to see if there is anything more we can do before we give
+/// up.  This returns -1 if we have to give up, or a byte number in the stored
+/// value of the piece that feeds the load.
+static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
+                                          Value *WritePtr,
+                                          uint64_t WriteSizeInBits,
+                                          const TargetData &TD) {
+  // If the loaded or stored value is an first class array or struct, don't try
+  // to transform them.  We need to be able to bitcast to integer.
+  if (LoadTy->isStructTy() || LoadTy->isArrayTy())
+    return -1;
+  
+  int64_t StoreOffset = 0, LoadOffset = 0;
+  Value *StoreBase = GetPointerBaseWithConstantOffset(WritePtr, StoreOffset,TD);
+  Value *LoadBase = GetPointerBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
+  if (StoreBase != LoadBase)
+    return -1;
+  
+  // If the load and store are to the exact same address, they should have been
+  // a must alias.  AA must have gotten confused.
+  // FIXME: Study to see if/when this happens.  One case is forwarding a memset
+  // to a load from the base of the memset.
+#if 0
+  if (LoadOffset == StoreOffset) {
+    dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
+    << "Base       = " << *StoreBase << "\n"
+    << "Store Ptr  = " << *WritePtr << "\n"
+    << "Store Offs = " << StoreOffset << "\n"
+    << "Load Ptr   = " << *LoadPtr << "\n";
+    abort();
+  }
+#endif
+  
+  // If the load and store don't overlap at all, the store doesn't provide
+  // anything to the load.  In this case, they really don't alias at all, AA
+  // must have gotten confused.
+  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
+  
+  if ((WriteSizeInBits & 7) | (LoadSize & 7))
+    return -1;
+  uint64_t StoreSize = WriteSizeInBits >> 3;  // Convert to bytes.
+  LoadSize >>= 3;
+  
+  
+  bool isAAFailure = false;
+  if (StoreOffset < LoadOffset)
+    isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
+  else
+    isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
+
+  if (isAAFailure) {
+#if 0
+    dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
+    << "Base       = " << *StoreBase << "\n"
+    << "Store Ptr  = " << *WritePtr << "\n"
+    << "Store Offs = " << StoreOffset << "\n"
+    << "Load Ptr   = " << *LoadPtr << "\n";
+    abort();
+#endif
+    return -1;
+  }
+  
+  // If the Load isn't completely contained within the stored bits, we don't
+  // have all the bits to feed it.  We could do something crazy in the future
+  // (issue a smaller load then merge the bits in) but this seems unlikely to be
+  // valuable.
+  if (StoreOffset > LoadOffset ||
+      StoreOffset+StoreSize < LoadOffset+LoadSize)
+    return -1;
+  
+  // Okay, we can do this transformation.  Return the number of bytes into the
+  // store that the load is.
+  return LoadOffset-StoreOffset;
+}  
+
+/// AnalyzeLoadFromClobberingStore - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.
+static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
+                                          StoreInst *DepSI,
+                                          const TargetData &TD) {
+  // Cannot handle reading from store of first-class aggregate yet.
+  if (DepSI->getValueOperand()->getType()->isStructTy() ||
+      DepSI->getValueOperand()->getType()->isArrayTy())
+    return -1;
+
+  Value *StorePtr = DepSI->getPointerOperand();
+  uint64_t StoreSize =TD.getTypeSizeInBits(DepSI->getValueOperand()->getType());
+  return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+                                        StorePtr, StoreSize, TD);
+}
+
+static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
+                                            MemIntrinsic *MI,
+                                            const TargetData &TD) {
+  // If the mem operation is a non-constant size, we can't handle it.
+  ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
+  if (SizeCst == 0) return -1;
+  uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
+
+  // If this is memset, we just need to see if the offset is valid in the size
+  // of the memset..
+  if (MI->getIntrinsicID() == Intrinsic::memset)
+    return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+                                          MemSizeInBits, TD);
+  
+  // If we have a memcpy/memmove, the only case we can handle is if this is a
+  // copy from constant memory.  In that case, we can read directly from the
+  // constant memory.
+  MemTransferInst *MTI = cast<MemTransferInst>(MI);
+  
+  Constant *Src = dyn_cast<Constant>(MTI->getSource());
+  if (Src == 0) return -1;
+  
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(GetUnderlyingObject(Src, &TD));
+  if (GV == 0 || !GV->isConstant()) return -1;
+  
+  // See if the access is within the bounds of the transfer.
+  int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+                                              MI->getDest(), MemSizeInBits, TD);
+  if (Offset == -1)
+    return Offset;
+  
+  // Otherwise, see if we can constant fold a load from the constant with the
+  // offset applied as appropriate.
+  Src = ConstantExpr::getBitCast(Src,
+                                 llvm::Type::getInt8PtrTy(Src->getContext()));
+  Constant *OffsetCst = 
+    ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+  Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+  if (ConstantFoldLoadFromConstPtr(Src, &TD))
+    return Offset;
+  return -1;
+}
+                                            
+
+/// GetStoreValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.  This means
+/// that the store *may* provide bits used by the load but we can't be sure
+/// because the pointers don't mustalias.  Check this case to see if there is
+/// anything more we can do before we give up.
+static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
+                                   const Type *LoadTy,
+                                   Instruction *InsertPt, const TargetData &TD){
+  LLVMContext &Ctx = SrcVal->getType()->getContext();
+  
+  uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
+  uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
+  
+  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
+  
+  // Compute which bits of the stored value are being used by the load.  Convert
+  // to an integer type to start with.
+  if (SrcVal->getType()->isPointerTy())
+    SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
+  if (!SrcVal->getType()->isIntegerTy())
+    SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
+                                   "tmp");
+  
+  // Shift the bits to the least significant depending on endianness.
+  unsigned ShiftAmt;
+  if (TD.isLittleEndian())
+    ShiftAmt = Offset*8;
+  else
+    ShiftAmt = (StoreSize-LoadSize-Offset)*8;
+  
+  if (ShiftAmt)
+    SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
+  
+  if (LoadSize != StoreSize)
+    SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
+                                 "tmp");
+  
+  return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
+}
+
+/// GetMemInstValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering mem intrinsic.
+static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+                                     const Type *LoadTy, Instruction *InsertPt,
+                                     const TargetData &TD){
+  LLVMContext &Ctx = LoadTy->getContext();
+  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+
+  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
+  
+  // We know that this method is only called when the mem transfer fully
+  // provides the bits for the load.
+  if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
+    // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
+    // independently of what the offset is.
+    Value *Val = MSI->getValue();
+    if (LoadSize != 1)
+      Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
+    
+    Value *OneElt = Val;
+    
+    // Splat the value out to the right number of bits.
+    for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
+      // If we can double the number of bytes set, do it.
+      if (NumBytesSet*2 <= LoadSize) {
+        Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
+        Val = Builder.CreateOr(Val, ShVal);
+        NumBytesSet <<= 1;
+        continue;
+      }
+      
+      // Otherwise insert one byte at a time.
+      Value *ShVal = Builder.CreateShl(Val, 1*8);
+      Val = Builder.CreateOr(OneElt, ShVal);
+      ++NumBytesSet;
+    }
+    
+    return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
+  }
+ 
+  // Otherwise, this is a memcpy/memmove from a constant global.
+  MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
+  Constant *Src = cast<Constant>(MTI->getSource());
+
+  // Otherwise, see if we can constant fold a load from the constant with the
+  // offset applied as appropriate.
+  Src = ConstantExpr::getBitCast(Src,
+                                 llvm::Type::getInt8PtrTy(Src->getContext()));
+  Constant *OffsetCst = 
+  ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+  Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+  return ConstantFoldLoadFromConstPtr(Src, &TD);
+}
+
+namespace {
+
+struct AvailableValueInBlock {
+  /// BB - The basic block in question.
+  BasicBlock *BB;
+  enum ValType {
+    SimpleVal,  // A simple offsetted value that is accessed.
+    MemIntrin   // A memory intrinsic which is loaded from.
+  };
+  
+  /// V - The value that is live out of the block.
+  PointerIntPair<Value *, 1, ValType> Val;
+  
+  /// Offset - The byte offset in Val that is interesting for the load query.
+  unsigned Offset;
+  
+  static AvailableValueInBlock get(BasicBlock *BB, Value *V,
+                                   unsigned Offset = 0) {
+    AvailableValueInBlock Res;
+    Res.BB = BB;
+    Res.Val.setPointer(V);
+    Res.Val.setInt(SimpleVal);
+    Res.Offset = Offset;
+    return Res;
+  }
+
+  static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+                                     unsigned Offset = 0) {
+    AvailableValueInBlock Res;
+    Res.BB = BB;
+    Res.Val.setPointer(MI);
+    Res.Val.setInt(MemIntrin);
+    Res.Offset = Offset;
+    return Res;
+  }
+  
+  bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+  Value *getSimpleValue() const {
+    assert(isSimpleValue() && "Wrong accessor");
+    return Val.getPointer();
+  }
+  
+  MemIntrinsic *getMemIntrinValue() const {
+    assert(!isSimpleValue() && "Wrong accessor");
+    return cast<MemIntrinsic>(Val.getPointer());
+  }
+  
+  /// MaterializeAdjustedValue - Emit code into this block to adjust the value
+  /// defined here to the specified type.  This handles various coercion cases.
+  Value *MaterializeAdjustedValue(const Type *LoadTy,
+                                  const TargetData *TD) const {
+    Value *Res;
+    if (isSimpleValue()) {
+      Res = getSimpleValue();
+      if (Res->getType() != LoadTy) {
+        assert(TD && "Need target data to handle type mismatch case");
+        Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(),
+                                   *TD);
+        
+        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
+                     << *getSimpleValue() << '\n'
+                     << *Res << '\n' << "\n\n\n");
+      }
+    } else {
+      Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset,
+                                   LoadTy, BB->getTerminator(), *TD);
+      DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+                   << "  " << *getMemIntrinValue() << '\n'
+                   << *Res << '\n' << "\n\n\n");
+    }
+    return Res;
+  }
+};
+
+}
+
+/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
+/// construct SSA form, allowing us to eliminate LI.  This returns the value
+/// that should be used at LI's definition site.
+static Value *ConstructSSAForLoadSet(LoadInst *LI, 
+                         SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock,
+                                     const TargetData *TD,
+                                     const DominatorTree &DT,
+                                     AliasAnalysis *AA) {
+  // Check for the fully redundant, dominating load case.  In this case, we can
+  // just use the dominating value directly.
+  if (ValuesPerBlock.size() == 1 && 
+      DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent()))
+    return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD);
+
+  // Otherwise, we have to construct SSA form.
+  SmallVector<PHINode*, 8> NewPHIs;
+  SSAUpdater SSAUpdate(&NewPHIs);
+  SSAUpdate.Initialize(LI->getType(), LI->getName());
+  
+  const Type *LoadTy = LI->getType();
+  
+  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+    const AvailableValueInBlock &AV = ValuesPerBlock[i];
+    BasicBlock *BB = AV.BB;
+    
+    if (SSAUpdate.HasValueForBlock(BB))
+      continue;
+
+    SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD));
+  }
+  
+  // Perform PHI construction.
+  Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
+  
+  // If new PHI nodes were created, notify alias analysis.
+  if (V->getType()->isPointerTy())
+    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+      AA->copyValue(LI, NewPHIs[i]);
+    
+    // Now that we've copied information to the new PHIs, scan through
+    // them again and inform alias analysis that we've added potentially
+    // escaping uses to any values that are operands to these PHIs.
+    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
+      PHINode *P = NewPHIs[i];
+      for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii)
+        AA->addEscapingUse(P->getOperandUse(2*ii));
+    }
+
+  return V;
+}
+
+static bool isLifetimeStart(const Instruction *Inst) {
+  if (const IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
+    return II->getIntrinsicID() == Intrinsic::lifetime_start;
+  return false;
+}
+
+/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
+/// non-local by performing PHI construction.
+bool GVN::processNonLocalLoad(LoadInst *LI,
+                              SmallVectorImpl<Instruction*> &toErase) {
+  // Find the non-local dependencies of the load.
+  SmallVector<NonLocalDepResult, 64> Deps;
+  AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI);
+  MD->getNonLocalPointerDependency(Loc, true, LI->getParent(), Deps);
+  //DEBUG(dbgs() << "INVESTIGATING NONLOCAL LOAD: "
+  //             << Deps.size() << *LI << '\n');
+
+  // If we had to process more than one hundred blocks to find the
+  // dependencies, this load isn't worth worrying about.  Optimizing
+  // it will be too expensive.
+  if (Deps.size() > 100)
+    return false;
+
+  // If we had a phi translation failure, we'll have a single entry which is a
+  // clobber in the current block.  Reject this early.
+  if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
+    DEBUG(
+      dbgs() << "GVN: non-local load ";
+      WriteAsOperand(dbgs(), LI);
+      dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
+    );
+    return false;
+  }
+
+  // Filter out useless results (non-locals, etc).  Keep track of the blocks
+  // where we have a value available in repl, also keep track of whether we see
+  // dependencies that produce an unknown value for the load (such as a call
+  // that could potentially clobber the load).
+  SmallVector<AvailableValueInBlock, 16> ValuesPerBlock;
+  SmallVector<BasicBlock*, 16> UnavailableBlocks;
+
+  for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
+    BasicBlock *DepBB = Deps[i].getBB();
+    MemDepResult DepInfo = Deps[i].getResult();
+
+    if (DepInfo.isClobber()) {
+      // The address being loaded in this non-local block may not be the same as
+      // the pointer operand of the load if PHI translation occurs.  Make sure
+      // to consider the right address.
+      Value *Address = Deps[i].getAddress();
+      
+      // If the dependence is to a store that writes to a superset of the bits
+      // read by the load, we can extract the bits we need for the load from the
+      // stored value.
+      if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
+        if (TD && Address) {
+          int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
+                                                      DepSI, *TD);
+          if (Offset != -1) {
+            ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+                                                       DepSI->getValueOperand(),
+                                                                Offset));
+            continue;
+          }
+        }
+      }
+
+      // If the clobbering value is a memset/memcpy/memmove, see if we can
+      // forward a value on from it.
+      if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
+        if (TD && Address) {
+          int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
+                                                        DepMI, *TD);
+          if (Offset != -1) {
+            ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
+                                                                  Offset));
+            continue;
+          }            
+        }
+      }
+      
+      UnavailableBlocks.push_back(DepBB);
+      continue;
+    }
+
+    Instruction *DepInst = DepInfo.getInst();
+
+    // Loading the allocation -> undef.
+    if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
+        // Loading immediately after lifetime begin -> undef.
+        isLifetimeStart(DepInst)) {
+      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+                                             UndefValue::get(LI->getType())));
+      continue;
+    }
+    
+    if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
+      // Reject loads and stores that are to the same address but are of
+      // different types if we have to.
+      if (S->getValueOperand()->getType() != LI->getType()) {
+        // If the stored value is larger or equal to the loaded value, we can
+        // reuse it.
+        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(S->getValueOperand(),
+                                                        LI->getType(), *TD)) {
+          UnavailableBlocks.push_back(DepBB);
+          continue;
+        }
+      }
+
+      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
+                                                         S->getValueOperand()));
+      continue;
+    }
+    
+    if (LoadInst *LD = dyn_cast<LoadInst>(DepInst)) {
+      // If the types mismatch and we can't handle it, reject reuse of the load.
+      if (LD->getType() != LI->getType()) {
+        // If the stored value is larger or equal to the loaded value, we can
+        // reuse it.
+        if (TD == 0 || !CanCoerceMustAliasedValueToLoad(LD, LI->getType(),*TD)){
+          UnavailableBlocks.push_back(DepBB);
+          continue;
+        }          
+      }
+      ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD));
+      continue;
+    }
+    
+    UnavailableBlocks.push_back(DepBB);
+    continue;
+  }
+
+  // If we have no predecessors that produce a known value for this load, exit
+  // early.
+  if (ValuesPerBlock.empty()) return false;
+
+  // If all of the instructions we depend on produce a known value for this
+  // load, then it is fully redundant and we can use PHI insertion to compute
+  // its value.  Insert PHIs and remove the fully redundant value now.
+  if (UnavailableBlocks.empty()) {
+    DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n');
+    
+    // Perform PHI construction.
+    Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
+                                      VN.getAliasAnalysis());
+    LI->replaceAllUsesWith(V);
+
+    if (isa<PHINode>(V))
+      V->takeName(LI);
+    if (V->getType()->isPointerTy())
+      MD->invalidateCachedPointerInfo(V);
+    VN.erase(LI);
+    toErase.push_back(LI);
+    ++NumGVNLoad;
+    return true;
+  }
+
+  if (!EnablePRE || !EnableLoadPRE)
+    return false;
+
+  // Okay, we have *some* definitions of the value.  This means that the value
+  // is available in some of our (transitive) predecessors.  Lets think about
+  // doing PRE of this load.  This will involve inserting a new load into the
+  // predecessor when it's not available.  We could do this in general, but
+  // prefer to not increase code size.  As such, we only do this when we know
+  // that we only have to insert *one* load (which means we're basically moving
+  // the load, not inserting a new one).
+
+  SmallPtrSet<BasicBlock *, 4> Blockers;
+  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
+    Blockers.insert(UnavailableBlocks[i]);
+
+  // Lets find first basic block with more than one predecessor.  Walk backwards
+  // through predecessors if needed.
+  BasicBlock *LoadBB = LI->getParent();
+  BasicBlock *TmpBB = LoadBB;
+
+  bool isSinglePred = false;
+  bool allSingleSucc = true;
+  while (TmpBB->getSinglePredecessor()) {
+    isSinglePred = true;
+    TmpBB = TmpBB->getSinglePredecessor();
+    if (TmpBB == LoadBB) // Infinite (unreachable) loop.
+      return false;
+    if (Blockers.count(TmpBB))
+      return false;
+    
+    // If any of these blocks has more than one successor (i.e. if the edge we
+    // just traversed was critical), then there are other paths through this 
+    // block along which the load may not be anticipated.  Hoisting the load 
+    // above this block would be adding the load to execution paths along
+    // which it was not previously executed.
+    if (TmpBB->getTerminator()->getNumSuccessors() != 1)
+      return false;
+  }
+
+  assert(TmpBB);
+  LoadBB = TmpBB;
+
+  // FIXME: It is extremely unclear what this loop is doing, other than
+  // artificially restricting loadpre.
+  if (isSinglePred) {
+    bool isHot = false;
+    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+      const AvailableValueInBlock &AV = ValuesPerBlock[i];
+      if (AV.isSimpleValue())
+        // "Hot" Instruction is in some loop (because it dominates its dep.
+        // instruction).
+        if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
+          if (DT->dominates(LI, I)) {
+            isHot = true;
+            break;
+          }
+    }
+
+    // We are interested only in "hot" instructions. We don't want to do any
+    // mis-optimizations here.
+    if (!isHot)
+      return false;
+  }
+
+  // Check to see how many predecessors have the loaded value fully
+  // available.
+  DenseMap<BasicBlock*, Value*> PredLoads;
+  DenseMap<BasicBlock*, char> FullyAvailableBlocks;
+  for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
+    FullyAvailableBlocks[ValuesPerBlock[i].BB] = true;
+  for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
+    FullyAvailableBlocks[UnavailableBlocks[i]] = false;
+
+  SmallVector<std::pair<TerminatorInst*, unsigned>, 4> NeedToSplit;
+  for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
+       PI != E; ++PI) {
+    BasicBlock *Pred = *PI;
+    if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) {
+      continue;
+    }
+    PredLoads[Pred] = 0;
+
+    if (Pred->getTerminator()->getNumSuccessors() != 1) {
+      if (isa<IndirectBrInst>(Pred->getTerminator())) {
+        DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
+              << Pred->getName() << "': " << *LI << '\n');
+        return false;
+      }
+      unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
+      NeedToSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
+    }
+  }
+  if (!NeedToSplit.empty()) {
+    toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
+    return false;
+  }
+
+  // Decide whether PRE is profitable for this load.
+  unsigned NumUnavailablePreds = PredLoads.size();
+  assert(NumUnavailablePreds != 0 &&
+         "Fully available value should be eliminated above!");
+  
+  // If this load is unavailable in multiple predecessors, reject it.
+  // FIXME: If we could restructure the CFG, we could make a common pred with
+  // all the preds that don't have an available LI and insert a new load into
+  // that one block.
+  if (NumUnavailablePreds != 1)
+      return false;
+
+  // Check if the load can safely be moved to all the unavailable predecessors.
+  bool CanDoPRE = true;
+  SmallVector<Instruction*, 8> NewInsts;
+  for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
+         E = PredLoads.end(); I != E; ++I) {
+    BasicBlock *UnavailablePred = I->first;
+
+    // Do PHI translation to get its value in the predecessor if necessary.  The
+    // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
+
+    // If all preds have a single successor, then we know it is safe to insert
+    // the load on the pred (?!?), so we can insert code to materialize the
+    // pointer if it is not available.
+    PHITransAddr Address(LI->getPointerOperand(), TD);
+    Value *LoadPtr = 0;
+    if (allSingleSucc) {
+      LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+                                                  *DT, NewInsts);
+    } else {
+      Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
+      LoadPtr = Address.getAddr();
+    }
+
+    // If we couldn't find or insert a computation of this phi translated value,
+    // we fail PRE.
+    if (LoadPtr == 0) {
+      DEBUG(dbgs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
+            << *LI->getPointerOperand() << "\n");
+      CanDoPRE = false;
+      break;
+    }
+
+    // Make sure it is valid to move this load here.  We have to watch out for:
+    //  @1 = getelementptr (i8* p, ...
+    //  test p and branch if == 0
+    //  load @1
+    // It is valid to have the getelementptr before the test, even if p can
+    // be 0, as getelementptr only does address arithmetic.
+    // If we are not pushing the value through any multiple-successor blocks
+    // we do not have this case.  Otherwise, check that the load is safe to
+    // put anywhere; this can be improved, but should be conservatively safe.
+    if (!allSingleSucc &&
+        // FIXME: REEVALUTE THIS.
+        !isSafeToLoadUnconditionally(LoadPtr,
+                                     UnavailablePred->getTerminator(),
+                                     LI->getAlignment(), TD)) {
+      CanDoPRE = false;
+      break;
+    }
+
+    I->second = LoadPtr;
+  }
+
+  if (!CanDoPRE) {
+    while (!NewInsts.empty()) {
+      Instruction *I = NewInsts.pop_back_val();
+      if (MD) MD->removeInstruction(I);
+      I->eraseFromParent();
+    }
+    return false;
+  }
+
+  // Okay, we can eliminate this load by inserting a reload in the predecessor
+  // and using PHI construction to get the value in the other predecessors, do
+  // it.
+  DEBUG(dbgs() << "GVN REMOVING PRE LOAD: " << *LI << '\n');
+  DEBUG(if (!NewInsts.empty())
+          dbgs() << "INSERTED " << NewInsts.size() << " INSTS: "
+                 << *NewInsts.back() << '\n');
+  
+  // Assign value numbers to the new instructions.
+  for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
+    // FIXME: We really _ought_ to insert these value numbers into their 
+    // parent's availability map.  However, in doing so, we risk getting into
+    // ordering issues.  If a block hasn't been processed yet, we would be
+    // marking a value as AVAIL-IN, which isn't what we intend.
+    VN.lookup_or_add(NewInsts[i]);
+  }
+
+  for (DenseMap<BasicBlock*, Value*>::iterator I = PredLoads.begin(),
+         E = PredLoads.end(); I != E; ++I) {
+    BasicBlock *UnavailablePred = I->first;
+    Value *LoadPtr = I->second;
+
+    Instruction *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false,
+                                        LI->getAlignment(),
+                                        UnavailablePred->getTerminator());
+
+    // Transfer the old load's TBAA tag to the new load.
+    if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa))
+      NewLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+
+    // Add the newly created load.
+    ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
+                                                        NewLoad));
+    MD->invalidateCachedPointerInfo(LoadPtr);
+    DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
+  }
+
+  // Perform PHI construction.
+  Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT,
+                                    VN.getAliasAnalysis());
+  LI->replaceAllUsesWith(V);
+  if (isa<PHINode>(V))
+    V->takeName(LI);
+  if (V->getType()->isPointerTy())
+    MD->invalidateCachedPointerInfo(V);
+  VN.erase(LI);
+  toErase.push_back(LI);
+  ++NumPRELoad;
+  return true;
+}
+
+/// processLoad - Attempt to eliminate a load, first by eliminating it
+/// locally, and then attempting non-local elimination if that fails.
+bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
+  if (!MD)
+    return false;
+
+  if (L->isVolatile())
+    return false;
+
+  // ... to a pointer that has been loaded from before...
+  MemDepResult Dep = MD->getDependency(L);
+
+  // If the value isn't available, don't do anything!
+  if (Dep.isClobber()) {
+    // Check to see if we have something like this:
+    //   store i32 123, i32* %P
+    //   %A = bitcast i32* %P to i8*
+    //   %B = gep i8* %A, i32 1
+    //   %C = load i8* %B
+    //
+    // We could do that by recognizing if the clobber instructions are obviously
+    // a common base + constant offset, and if the previous store (or memset)
+    // completely covers this load.  This sort of thing can happen in bitfield
+    // access code.
+    Value *AvailVal = 0;
+    if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
+      if (TD) {
+        int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
+                                                    L->getPointerOperand(),
+                                                    DepSI, *TD);
+        if (Offset != -1)
+          AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset,
+                                          L->getType(), L, *TD);
+      }
+    
+    // If the clobbering value is a memset/memcpy/memmove, see if we can forward
+    // a value on from it.
+    if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
+      if (TD) {
+        int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
+                                                      L->getPointerOperand(),
+                                                      DepMI, *TD);
+        if (Offset != -1)
+          AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
+      }
+    }
+        
+    if (AvailVal) {
+      DEBUG(dbgs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
+            << *AvailVal << '\n' << *L << "\n\n\n");
+      
+      // Replace the load!
+      L->replaceAllUsesWith(AvailVal);
+      if (AvailVal->getType()->isPointerTy())
+        MD->invalidateCachedPointerInfo(AvailVal);
+      VN.erase(L);
+      toErase.push_back(L);
+      ++NumGVNLoad;
+      return true;
+    }
+        
+    DEBUG(
+      // fast print dep, using operator<< on instruction would be too slow
+      dbgs() << "GVN: load ";
+      WriteAsOperand(dbgs(), L);
+      Instruction *I = Dep.getInst();
+      dbgs() << " is clobbered by " << *I << '\n';
+    );
+    return false;
+  }
+
+  // If it is defined in another block, try harder.
+  if (Dep.isNonLocal())
+    return processNonLocalLoad(L, toErase);
+
+  Instruction *DepInst = Dep.getInst();
+  if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
+    Value *StoredVal = DepSI->getValueOperand();
+    
+    // The store and load are to a must-aliased pointer, but they may not
+    // actually have the same type.  See if we know how to reuse the stored
+    // value (depending on its type).
+    if (StoredVal->getType() != L->getType()) {
+      if (TD) {
+        StoredVal = CoerceAvailableValueToLoadType(StoredVal, L->getType(),
+                                                   L, *TD);
+        if (StoredVal == 0)
+          return false;
+        
+        DEBUG(dbgs() << "GVN COERCED STORE:\n" << *DepSI << '\n' << *StoredVal
+                     << '\n' << *L << "\n\n\n");
+      }
+      else 
+        return false;
+    }
+
+    // Remove it!
+    L->replaceAllUsesWith(StoredVal);
+    if (StoredVal->getType()->isPointerTy())
+      MD->invalidateCachedPointerInfo(StoredVal);
+    VN.erase(L);
+    toErase.push_back(L);
+    ++NumGVNLoad;
+    return true;
+  }
+
+  if (LoadInst *DepLI = dyn_cast<LoadInst>(DepInst)) {
+    Value *AvailableVal = DepLI;
+    
+    // The loads are of a must-aliased pointer, but they may not actually have
+    // the same type.  See if we know how to reuse the previously loaded value
+    // (depending on its type).
+    if (DepLI->getType() != L->getType()) {
+      if (TD) {
+        AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD);
+        if (AvailableVal == 0)
+          return false;
+      
+        DEBUG(dbgs() << "GVN COERCED LOAD:\n" << *DepLI << "\n" << *AvailableVal
+                     << "\n" << *L << "\n\n\n");
+      }
+      else 
+        return false;
+    }
+    
+    // Remove it!
+    L->replaceAllUsesWith(AvailableVal);
+    if (DepLI->getType()->isPointerTy())
+      MD->invalidateCachedPointerInfo(DepLI);
+    VN.erase(L);
+    toErase.push_back(L);
+    ++NumGVNLoad;
+    return true;
+  }
+
+  // If this load really doesn't depend on anything, then we must be loading an
+  // undef value.  This can happen when loading for a fresh allocation with no
+  // intervening stores, for example.
+  if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
+    L->replaceAllUsesWith(UndefValue::get(L->getType()));
+    VN.erase(L);
+    toErase.push_back(L);
+    ++NumGVNLoad;
+    return true;
+  }
+  
+  // If this load occurs either right after a lifetime begin,
+  // then the loaded value is undefined.
+  if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
+    if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+      L->replaceAllUsesWith(UndefValue::get(L->getType()));
+      VN.erase(L);
+      toErase.push_back(L);
+      ++NumGVNLoad;
+      return true;
+    }
+  }
+
+  return false;
+}
+
+// findLeader - In order to find a leader for a given value number at a 
+// specific basic block, we first obtain the list of all Values for that number,
+// and then scan the list to find one whose block dominates the block in 
+// question.  This is fast because dominator tree queries consist of only
+// a few comparisons of DFS numbers.
+Value *GVN::findLeader(BasicBlock *BB, uint32_t num) {
+  LeaderTableEntry Vals = LeaderTable[num];
+  if (!Vals.Val) return 0;
+  
+  Value *Val = 0;
+  if (DT->dominates(Vals.BB, BB)) {
+    Val = Vals.Val;
+    if (isa<Constant>(Val)) return Val;
+  }
+  
+  LeaderTableEntry* Next = Vals.Next;
+  while (Next) {
+    if (DT->dominates(Next->BB, BB)) {
+      if (isa<Constant>(Next->Val)) return Next->Val;
+      if (!Val) Val = Next->Val;
+    }
+    
+    Next = Next->Next;
+  }
+
+  return Val;
+}
+
+
+/// processInstruction - When calculating availability, handle an instruction
+/// by inserting it into the appropriate sets
+bool GVN::processInstruction(Instruction *I,
+                             SmallVectorImpl<Instruction*> &toErase) {
+  // Ignore dbg info intrinsics.
+  if (isa<DbgInfoIntrinsic>(I))
+    return false;
+
+  // If the instruction can be easily simplified then do so now in preference
+  // to value numbering it.  Value numbering often exposes redundancies, for
+  // example if it determines that %y is equal to %x then the instruction
+  // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.
+  if (Value *V = SimplifyInstruction(I, TD, DT)) {
+    I->replaceAllUsesWith(V);
+    if (MD && V->getType()->isPointerTy())
+      MD->invalidateCachedPointerInfo(V);
+    VN.erase(I);
+    toErase.push_back(I);
+    return true;
+  }
+
+  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+    bool Changed = processLoad(LI, toErase);
+
+    if (!Changed) {
+      unsigned Num = VN.lookup_or_add(LI);
+      addToLeaderTable(Num, LI, LI->getParent());
+    }
+
+    return Changed;
+  }
+
+  // For conditions branches, we can perform simple conditional propagation on
+  // the condition value itself.
+  if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+    if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
+      return false;
+    
+    Value *BranchCond = BI->getCondition();
+    uint32_t CondVN = VN.lookup_or_add(BranchCond);
+  
+    BasicBlock *TrueSucc = BI->getSuccessor(0);
+    BasicBlock *FalseSucc = BI->getSuccessor(1);
+  
+    if (TrueSucc->getSinglePredecessor())
+      addToLeaderTable(CondVN,
+                   ConstantInt::getTrue(TrueSucc->getContext()),
+                   TrueSucc);
+    if (FalseSucc->getSinglePredecessor())
+      addToLeaderTable(CondVN,
+                   ConstantInt::getFalse(TrueSucc->getContext()),
+                   FalseSucc);
+    
+    return false;
+  }
+  
+  // Instructions with void type don't return a value, so there's
+  // no point in trying to find redudancies in them.
+  if (I->getType()->isVoidTy()) return false;
+  
+  uint32_t NextNum = VN.getNextUnusedValueNumber();
+  unsigned Num = VN.lookup_or_add(I);
+
+  // Allocations are always uniquely numbered, so we can save time and memory
+  // by fast failing them.
+  if (isa<AllocaInst>(I) || isa<TerminatorInst>(I) || isa<PHINode>(I)) {
+    addToLeaderTable(Num, I, I->getParent());
+    return false;
+  }
+
+  // If the number we were assigned was a brand new VN, then we don't
+  // need to do a lookup to see if the number already exists
+  // somewhere in the domtree: it can't!
+  if (Num == NextNum) {
+    addToLeaderTable(Num, I, I->getParent());
+    return false;
+  }
+  
+  // Perform fast-path value-number based elimination of values inherited from
+  // dominators.
+  Value *repl = findLeader(I->getParent(), Num);
+  if (repl == 0) {
+    // Failure, just remember this instance for future use.
+    addToLeaderTable(Num, I, I->getParent());
+    return false;
+  }
+  
+  // Remove it!
+  VN.erase(I);
+  I->replaceAllUsesWith(repl);
+  if (MD && repl->getType()->isPointerTy())
+    MD->invalidateCachedPointerInfo(repl);
+  toErase.push_back(I);
+  return true;
+}
+
+/// runOnFunction - This is the main transformation entry point for a function.
+bool GVN::runOnFunction(Function& F) {
+  if (!NoLoads)
+    MD = &getAnalysis<MemoryDependenceAnalysis>();
+  DT = &getAnalysis<DominatorTree>();
+  TD = getAnalysisIfAvailable<TargetData>();
+  VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
+  VN.setMemDep(MD);
+  VN.setDomTree(DT);
+
+  bool Changed = false;
+  bool ShouldContinue = true;
+
+  // Merge unconditional branches, allowing PRE to catch more
+  // optimization opportunities.
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
+    BasicBlock *BB = FI++;
+    
+    bool removedBlock = MergeBlockIntoPredecessor(BB, this);
+    if (removedBlock) ++NumGVNBlocks;
+
+    Changed |= removedBlock;
+  }
+
+  unsigned Iteration = 0;
+  while (ShouldContinue) {
+    DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
+    ShouldContinue = iterateOnFunction(F);
+    if (splitCriticalEdges())
+      ShouldContinue = true;
+    Changed |= ShouldContinue;
+    ++Iteration;
+  }
+
+  if (EnablePRE) {
+    bool PREChanged = true;
+    while (PREChanged) {
+      PREChanged = performPRE(F);
+      Changed |= PREChanged;
+    }
+  }
+  // FIXME: Should perform GVN again after PRE does something.  PRE can move
+  // computations into blocks where they become fully redundant.  Note that
+  // we can't do this until PRE's critical edge splitting updates memdep.
+  // Actually, when this happens, we should just fully integrate PRE into GVN.
+
+  cleanupGlobalSets();
+
+  return Changed;
+}
+
+
+bool GVN::processBlock(BasicBlock *BB) {
+  // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and
+  // incrementing BI before processing an instruction).
+  SmallVector<Instruction*, 8> toErase;
+  bool ChangedFunction = false;
+
+  for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+       BI != BE;) {
+    ChangedFunction |= processInstruction(BI, toErase);
+    if (toErase.empty()) {
+      ++BI;
+      continue;
+    }
+
+    // If we need some instructions deleted, do it now.
+    NumGVNInstr += toErase.size();
+
+    // Avoid iterator invalidation.
+    bool AtStart = BI == BB->begin();
+    if (!AtStart)
+      --BI;
+
+    for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
+         E = toErase.end(); I != E; ++I) {
+      DEBUG(dbgs() << "GVN removed: " << **I << '\n');
+      if (MD) MD->removeInstruction(*I);
+      (*I)->eraseFromParent();
+      DEBUG(verifyRemoved(*I));
+    }
+    toErase.clear();
+
+    if (AtStart)
+      BI = BB->begin();
+    else
+      ++BI;
+  }
+
+  return ChangedFunction;
+}
+
+/// performPRE - Perform a purely local form of PRE that looks for diamond
+/// control flow patterns and attempts to perform simple PRE at the join point.
+bool GVN::performPRE(Function &F) {
+  bool Changed = false;
+  DenseMap<BasicBlock*, Value*> predMap;
+  for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+       DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
+    BasicBlock *CurrentBlock = *DI;
+
+    // Nothing to PRE in the entry block.
+    if (CurrentBlock == &F.getEntryBlock()) continue;
+
+    for (BasicBlock::iterator BI = CurrentBlock->begin(),
+         BE = CurrentBlock->end(); BI != BE; ) {
+      Instruction *CurInst = BI++;
+
+      if (isa<AllocaInst>(CurInst) ||
+          isa<TerminatorInst>(CurInst) || isa<PHINode>(CurInst) ||
+          CurInst->getType()->isVoidTy() ||
+          CurInst->mayReadFromMemory() || CurInst->mayHaveSideEffects() ||
+          isa<DbgInfoIntrinsic>(CurInst))
+        continue;
+      
+      // We don't currently value number ANY inline asm calls.
+      if (CallInst *CallI = dyn_cast<CallInst>(CurInst))
+        if (CallI->isInlineAsm())
+          continue;
+
+      uint32_t ValNo = VN.lookup(CurInst);
+
+      // Look for the predecessors for PRE opportunities.  We're
+      // only trying to solve the basic diamond case, where
+      // a value is computed in the successor and one predecessor,
+      // but not the other.  We also explicitly disallow cases
+      // where the successor is its own predecessor, because they're
+      // more complicated to get right.
+      unsigned NumWith = 0;
+      unsigned NumWithout = 0;
+      BasicBlock *PREPred = 0;
+      predMap.clear();
+
+      for (pred_iterator PI = pred_begin(CurrentBlock),
+           PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+        BasicBlock *P = *PI;
+        // We're not interested in PRE where the block is its
+        // own predecessor, or in blocks with predecessors
+        // that are not reachable.
+        if (P == CurrentBlock) {
+          NumWithout = 2;
+          break;
+        } else if (!DT->dominates(&F.getEntryBlock(), P))  {
+          NumWithout = 2;
+          break;
+        }
+
+        Value* predV = findLeader(P, ValNo);
+        if (predV == 0) {
+          PREPred = P;
+          ++NumWithout;
+        } else if (predV == CurInst) {
+          NumWithout = 2;
+        } else {
+          predMap[P] = predV;
+          ++NumWith;
+        }
+      }
+
+      // Don't do PRE when it might increase code size, i.e. when
+      // we would need to insert instructions in more than one pred.
+      if (NumWithout != 1 || NumWith == 0)
+        continue;
+      
+      // Don't do PRE across indirect branch.
+      if (isa<IndirectBrInst>(PREPred->getTerminator()))
+        continue;
+
+      // We can't do PRE safely on a critical edge, so instead we schedule
+      // the edge to be split and perform the PRE the next time we iterate
+      // on the function.
+      unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
+      if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
+        toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
+        continue;
+      }
+
+      // Instantiate the expression in the predecessor that lacked it.
+      // Because we are going top-down through the block, all value numbers
+      // will be available in the predecessor by the time we need them.  Any
+      // that weren't originally present will have been instantiated earlier
+      // in this loop.
+      Instruction *PREInstr = CurInst->clone();
+      bool success = true;
+      for (unsigned i = 0, e = CurInst->getNumOperands(); i != e; ++i) {
+        Value *Op = PREInstr->getOperand(i);
+        if (isa<Argument>(Op) || isa<Constant>(Op) || isa<GlobalValue>(Op))
+          continue;
+
+        if (Value *V = findLeader(PREPred, VN.lookup(Op))) {
+          PREInstr->setOperand(i, V);
+        } else {
+          success = false;
+          break;
+        }
+      }
+
+      // Fail out if we encounter an operand that is not available in
+      // the PRE predecessor.  This is typically because of loads which
+      // are not value numbered precisely.
+      if (!success) {
+        delete PREInstr;
+        DEBUG(verifyRemoved(PREInstr));
+        continue;
+      }
+
+      PREInstr->insertBefore(PREPred->getTerminator());
+      PREInstr->setName(CurInst->getName() + ".pre");
+      predMap[PREPred] = PREInstr;
+      VN.add(PREInstr, ValNo);
+      ++NumGVNPRE;
+
+      // Update the availability map to include the new instruction.
+      addToLeaderTable(ValNo, PREInstr, PREPred);
+
+      // Create a PHI to make the value available in this block.
+      PHINode* Phi = PHINode::Create(CurInst->getType(),
+                                     CurInst->getName() + ".pre-phi",
+                                     CurrentBlock->begin());
+      for (pred_iterator PI = pred_begin(CurrentBlock),
+           PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+        BasicBlock *P = *PI;
+        Phi->addIncoming(predMap[P], P);
+      }
+
+      VN.add(Phi, ValNo);
+      addToLeaderTable(ValNo, Phi, CurrentBlock);
+
+      CurInst->replaceAllUsesWith(Phi);
+      if (Phi->getType()->isPointerTy()) {
+        // Because we have added a PHI-use of the pointer value, it has now
+        // "escaped" from alias analysis' perspective.  We need to inform
+        // AA of this.
+        for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii)
+          VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(2*ii));
+        
+        if (MD)
+          MD->invalidateCachedPointerInfo(Phi);
+      }
+      VN.erase(CurInst);
+      removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
+
+      DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
+      if (MD) MD->removeInstruction(CurInst);
+      CurInst->eraseFromParent();
+      DEBUG(verifyRemoved(CurInst));
+      Changed = true;
+    }
+  }
+
+  if (splitCriticalEdges())
+    Changed = true;
+
+  return Changed;
+}
+
+/// splitCriticalEdges - Split critical edges found during the previous
+/// iteration that may enable further optimization.
+bool GVN::splitCriticalEdges() {
+  if (toSplit.empty())
+    return false;
+  do {
+    std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
+    SplitCriticalEdge(Edge.first, Edge.second, this);
+  } while (!toSplit.empty());
+  if (MD) MD->invalidateCachedPredecessors();
+  return true;
+}
+
+/// iterateOnFunction - Executes one iteration of GVN
+bool GVN::iterateOnFunction(Function &F) {
+  cleanupGlobalSets();
+  
+  // Top-down walk of the dominator tree
+  bool Changed = false;
+#if 0
+  // Needed for value numbering with phi construction to work.
+  ReversePostOrderTraversal<Function*> RPOT(&F);
+  for (ReversePostOrderTraversal<Function*>::rpo_iterator RI = RPOT.begin(),
+       RE = RPOT.end(); RI != RE; ++RI)
+    Changed |= processBlock(*RI);
+#else
+  for (df_iterator<DomTreeNode*> DI = df_begin(DT->getRootNode()),
+       DE = df_end(DT->getRootNode()); DI != DE; ++DI)
+    Changed |= processBlock(DI->getBlock());
+#endif
+
+  return Changed;
+}
+
+void GVN::cleanupGlobalSets() {
+  VN.clear();
+  LeaderTable.clear();
+  TableAllocator.Reset();
+}
+
+/// verifyRemoved - Verify that the specified instruction does not occur in our
+/// internal data structures.
+void GVN::verifyRemoved(const Instruction *Inst) const {
+  VN.verifyRemoved(Inst);
+
+  // Walk through the value number scope to make sure the instruction isn't
+  // ferreted away in it.
+  for (DenseMap<uint32_t, LeaderTableEntry>::const_iterator
+       I = LeaderTable.begin(), E = LeaderTable.end(); I != E; ++I) {
+    const LeaderTableEntry *Node = &I->second;
+    assert(Node->Val != Inst && "Inst still in value numbering scope!");
+    
+    while (Node->Next) {
+      Node = Node->Next;
+      assert(Node->Val != Inst && "Inst still in value numbering scope!");
+    }
+  }
+}
diff --git a/final/lib/Transforms/Scalar/IndVarSimplify.cpp b/final/lib/Transforms/Scalar/IndVarSimplify.cpp
new file mode 100644
index 00000000000..0fb67982a3d
--- /dev/null
+++ b/final/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -0,0 +1,1043 @@
+//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation analyzes and transforms the induction variables (and
+// computations derived from them) into simpler forms suitable for subsequent
+// analysis and transformation.
+//
+// This transformation makes the following changes to each loop with an
+// identifiable induction variable:
+//   1. All loops are transformed to have a SINGLE canonical induction variable
+//      which starts at zero and steps by one.
+//   2. The canonical induction variable is guaranteed to be the first PHI node
+//      in the loop header block.
+//   3. The canonical induction variable is guaranteed to be in a wide enough
+//      type so that IV expressions need not be (directly) zero-extended or
+//      sign-extended.
+//   4. Any pointer arithmetic recurrences are raised to use array subscripts.
+//
+// If the trip count of a loop is computable, this pass also makes the following
+// changes:
+//   1. The exit condition for the loop is canonicalized to compare the
+//      induction value against the exit value.  This turns loops like:
+//        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
+//   2. Any use outside of the loop of an expression derived from the indvar
+//      is changed to compute the derived value outside of the loop, eliminating
+//      the dependence on the exit value of the induction variable.  If the only
+//      purpose of the loop is to compute the exit value of some derived
+//      expression, this transformation will make the loop dead.
+//
+// This transformation should be followed by strength reduction after all of the
+// desired loop transformations have been performed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "indvars"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumRemoved , "Number of aux indvars removed");
+STATISTIC(NumInserted, "Number of canonical indvars added");
+STATISTIC(NumReplaced, "Number of exit values replaced");
+STATISTIC(NumLFTR    , "Number of loop exit tests replaced");
+
+namespace {
+  class IndVarSimplify : public LoopPass {
+    IVUsers         *IU;
+    LoopInfo        *LI;
+    ScalarEvolution *SE;
+    DominatorTree   *DT;
+    bool Changed;
+  public:
+
+    static char ID; // Pass identification, replacement for typeid
+    IndVarSimplify() : LoopPass(ID) {
+      initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addRequired<ScalarEvolution>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addRequiredID(LCSSAID);
+      AU.addRequired<IVUsers>();
+      AU.addPreserved<ScalarEvolution>();
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addPreservedID(LCSSAID);
+      AU.addPreserved<IVUsers>();
+      AU.setPreservesCFG();
+    }
+
+  private:
+
+    void EliminateIVComparisons();
+    void EliminateIVRemainders();
+    void RewriteNonIntegerIVs(Loop *L);
+
+    ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
+                                   PHINode *IndVar,
+                                   BasicBlock *ExitingBlock,
+                                   BranchInst *BI,
+                                   SCEVExpander &Rewriter);
+    void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+
+    void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
+
+    void SinkUnusedInvariants(Loop *L);
+
+    void HandleFloatingPointIV(Loop *L, PHINode *PH);
+  };
+}
+
+char IndVarSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(IndVarSimplify, "indvars",
+                "Canonicalize Induction Variables", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_END(IndVarSimplify, "indvars",
+                "Canonicalize Induction Variables", false, false)
+
+Pass *llvm::createIndVarSimplifyPass() {
+  return new IndVarSimplify();
+}
+
+/// LinearFunctionTestReplace - This method rewrites the exit condition of the
+/// loop to be a canonical != comparison against the incremented loop induction
+/// variable.  This pass is able to rewrite the exit tests of any loop where the
+/// SCEV analysis can determine a loop-invariant trip count of the loop, which
+/// is actually a much broader range than just linear tests.
+ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
+                                   const SCEV *BackedgeTakenCount,
+                                   PHINode *IndVar,
+                                   BasicBlock *ExitingBlock,
+                                   BranchInst *BI,
+                                   SCEVExpander &Rewriter) {
+  // Special case: If the backedge-taken count is a UDiv, it's very likely a
+  // UDiv that ScalarEvolution produced in order to compute a precise
+  // expression, rather than a UDiv from the user's code. If we can't find a
+  // UDiv in the code with some simple searching, assume the former and forego
+  // rewriting the loop.
+  if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
+    ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
+    if (!OrigCond) return 0;
+    const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
+    R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
+    if (R != BackedgeTakenCount) {
+      const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
+      L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
+      if (L != BackedgeTakenCount)
+        return 0;
+    }
+  }
+
+  // If the exiting block is not the same as the backedge block, we must compare
+  // against the preincremented value, otherwise we prefer to compare against
+  // the post-incremented value.
+  Value *CmpIndVar;
+  const SCEV *RHS = BackedgeTakenCount;
+  if (ExitingBlock == L->getLoopLatch()) {
+    // Add one to the "backedge-taken" count to get the trip count.
+    // If this addition may overflow, we have to be more pessimistic and
+    // cast the induction variable before doing the add.
+    const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
+    const SCEV *N =
+      SE->getAddExpr(BackedgeTakenCount,
+                     SE->getConstant(BackedgeTakenCount->getType(), 1));
+    if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+        SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+      // No overflow. Cast the sum.
+      RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
+    } else {
+      // Potential overflow. Cast before doing the add.
+      RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+                                        IndVar->getType());
+      RHS = SE->getAddExpr(RHS,
+                           SE->getConstant(IndVar->getType(), 1));
+    }
+
+    // The BackedgeTaken expression contains the number of times that the
+    // backedge branches to the loop header.  This is one less than the
+    // number of times the loop executes, so use the incremented indvar.
+    CmpIndVar = IndVar->getIncomingValueForBlock(ExitingBlock);
+  } else {
+    // We have to use the preincremented value...
+    RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+                                      IndVar->getType());
+    CmpIndVar = IndVar;
+  }
+
+  // Expand the code for the iteration count.
+  assert(SE->isLoopInvariant(RHS, L) &&
+         "Computed iteration count is not loop invariant!");
+  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+
+  // Insert a new icmp_ne or icmp_eq instruction before the branch.
+  ICmpInst::Predicate Opcode;
+  if (L->contains(BI->getSuccessor(0)))
+    Opcode = ICmpInst::ICMP_NE;
+  else
+    Opcode = ICmpInst::ICMP_EQ;
+
+  DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
+               << "      LHS:" << *CmpIndVar << '\n'
+               << "       op:\t"
+               << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+               << "      RHS:\t" << *RHS << "\n");
+
+  ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
+
+  Value *OrigCond = BI->getCondition();
+  // It's tempting to use replaceAllUsesWith here to fully replace the old
+  // comparison, but that's not immediately safe, since users of the old
+  // comparison may not be dominated by the new comparison. Instead, just
+  // update the branch to use the new comparison; in the common case this
+  // will make old comparison dead.
+  BI->setCondition(Cond);
+  RecursivelyDeleteTriviallyDeadInstructions(OrigCond);
+
+  ++NumLFTR;
+  Changed = true;
+  return Cond;
+}
+
+/// RewriteLoopExitValues - Check to see if this loop has a computable
+/// loop-invariant execution count.  If so, this means that we can compute the
+/// final value of any expressions that are recurrent in the loop, and
+/// substitute the exit values from the loop into any instructions outside of
+/// the loop that use the final values of the current expressions.
+///
+/// This is mostly redundant with the regular IndVarSimplify activities that
+/// happen later, except that it's more powerful in some cases, because it's
+/// able to brute-force evaluate arbitrary instructions as long as they have
+/// constant operands at the beginning of the loop.
+void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
+  // Verify the input to the pass in already in LCSSA form.
+  assert(L->isLCSSAForm(*DT));
+
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getUniqueExitBlocks(ExitBlocks);
+
+  // Find all values that are computed inside the loop, but used outside of it.
+  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
+  // the exit blocks of the loop to find them.
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitBB = ExitBlocks[i];
+
+    // If there are no PHI nodes in this exit block, then no values defined
+    // inside the loop are used on this path, skip it.
+    PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
+    if (!PN) continue;
+
+    unsigned NumPreds = PN->getNumIncomingValues();
+
+    // Iterate over all of the PHI nodes.
+    BasicBlock::iterator BBI = ExitBB->begin();
+    while ((PN = dyn_cast<PHINode>(BBI++))) {
+      if (PN->use_empty())
+        continue; // dead use, don't replace it
+
+      // SCEV only supports integer expressions for now.
+      if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy())
+        continue;
+
+      // It's necessary to tell ScalarEvolution about this explicitly so that
+      // it can walk the def-use list and forget all SCEVs, as it may not be
+      // watching the PHI itself. Once the new exit value is in place, there
+      // may not be a def-use connection between the loop and every instruction
+      // which got a SCEVAddRecExpr for that loop.
+      SE->forgetValue(PN);
+
+      // Iterate over all of the values in all the PHI nodes.
+      for (unsigned i = 0; i != NumPreds; ++i) {
+        // If the value being merged in is not integer or is not defined
+        // in the loop, skip it.
+        Value *InVal = PN->getIncomingValue(i);
+        if (!isa<Instruction>(InVal))
+          continue;
+
+        // If this pred is for a subloop, not L itself, skip it.
+        if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
+          continue; // The Block is in a subloop, skip it.
+
+        // Check that InVal is defined in the loop.
+        Instruction *Inst = cast<Instruction>(InVal);
+        if (!L->contains(Inst))
+          continue;
+
+        // Okay, this instruction has a user outside of the current loop
+        // and varies predictably *inside* the loop.  Evaluate the value it
+        // contains when the loop exits, if possible.
+        const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
+        if (!SE->isLoopInvariant(ExitValue, L))
+          continue;
+
+        Changed = true;
+        ++NumReplaced;
+
+        Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
+
+        DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n'
+                     << "  LoopVal = " << *Inst << "\n");
+
+        PN->setIncomingValue(i, ExitVal);
+
+        // If this instruction is dead now, delete it.
+        RecursivelyDeleteTriviallyDeadInstructions(Inst);
+
+        if (NumPreds == 1) {
+          // Completely replace a single-pred PHI. This is safe, because the
+          // NewVal won't be variant in the loop, so we don't need an LCSSA phi
+          // node anymore.
+          PN->replaceAllUsesWith(ExitVal);
+          RecursivelyDeleteTriviallyDeadInstructions(PN);
+        }
+      }
+      if (NumPreds != 1) {
+        // Clone the PHI and delete the original one. This lets IVUsers and
+        // any other maps purge the original user from their records.
+        PHINode *NewPN = cast<PHINode>(PN->clone());
+        NewPN->takeName(PN);
+        NewPN->insertBefore(PN);
+        PN->replaceAllUsesWith(NewPN);
+        PN->eraseFromParent();
+      }
+    }
+  }
+
+  // The insertion point instruction may have been deleted; clear it out
+  // so that the rewriter doesn't trip over it later.
+  Rewriter.clearInsertPoint();
+}
+
+void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
+  // First step.  Check to see if there are any floating-point recurrences.
+  // If there are, change them into integer recurrences, permitting analysis by
+  // the SCEV routines.
+  //
+  BasicBlock *Header = L->getHeader();
+
+  SmallVector<WeakVH, 8> PHIs;
+  for (BasicBlock::iterator I = Header->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    PHIs.push_back(PN);
+
+  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+    if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
+      HandleFloatingPointIV(L, PN);
+
+  // If the loop previously had floating-point IV, ScalarEvolution
+  // may not have been able to compute a trip count. Now that we've done some
+  // re-writing, the trip count may be computable.
+  if (Changed)
+    SE->forgetLoop(L);
+}
+
+void IndVarSimplify::EliminateIVComparisons() {
+  SmallVector<WeakVH, 16> DeadInsts;
+
+  // Look for ICmp users.
+  for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
+    IVStrideUse &UI = *I;
+    ICmpInst *ICmp = dyn_cast<ICmpInst>(UI.getUser());
+    if (!ICmp) continue;
+
+    bool Swapped = UI.getOperandValToReplace() == ICmp->getOperand(1);
+    ICmpInst::Predicate Pred = ICmp->getPredicate();
+    if (Swapped) Pred = ICmpInst::getSwappedPredicate(Pred);
+
+    // Get the SCEVs for the ICmp operands.
+    const SCEV *S = IU->getReplacementExpr(UI);
+    const SCEV *X = SE->getSCEV(ICmp->getOperand(!Swapped));
+
+    // Simplify unnecessary loops away.
+    const Loop *ICmpLoop = LI->getLoopFor(ICmp->getParent());
+    S = SE->getSCEVAtScope(S, ICmpLoop);
+    X = SE->getSCEVAtScope(X, ICmpLoop);
+
+    // If the condition is always true or always false, replace it with
+    // a constant value.
+    if (SE->isKnownPredicate(Pred, S, X))
+      ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));
+    else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X))
+      ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));
+    else
+      continue;
+
+    DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');
+    DeadInsts.push_back(ICmp);
+  }
+
+  // Now that we're done iterating through lists, clean up any instructions
+  // which are now dead.
+  while (!DeadInsts.empty())
+    if (Instruction *Inst =
+        dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+      RecursivelyDeleteTriviallyDeadInstructions(Inst);
+}
+
+void IndVarSimplify::EliminateIVRemainders() {
+  SmallVector<WeakVH, 16> DeadInsts;
+
+  // Look for SRem and URem users.
+  for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
+    IVStrideUse &UI = *I;
+    BinaryOperator *Rem = dyn_cast<BinaryOperator>(UI.getUser());
+    if (!Rem) continue;
+
+    bool isSigned = Rem->getOpcode() == Instruction::SRem;
+    if (!isSigned && Rem->getOpcode() != Instruction::URem)
+      continue;
+
+    // We're only interested in the case where we know something about
+    // the numerator.
+    if (UI.getOperandValToReplace() != Rem->getOperand(0))
+      continue;
+
+    // Get the SCEVs for the ICmp operands.
+    const SCEV *S = SE->getSCEV(Rem->getOperand(0));
+    const SCEV *X = SE->getSCEV(Rem->getOperand(1));
+
+    // Simplify unnecessary loops away.
+    const Loop *ICmpLoop = LI->getLoopFor(Rem->getParent());
+    S = SE->getSCEVAtScope(S, ICmpLoop);
+    X = SE->getSCEVAtScope(X, ICmpLoop);
+
+    // i % n  -->  i  if i is in [0,n).
+    if ((!isSigned || SE->isKnownNonNegative(S)) &&
+        SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+                             S, X))
+      Rem->replaceAllUsesWith(Rem->getOperand(0));
+    else {
+      // (i+1) % n  -->  (i+1)==n?0:(i+1)  if i is in [0,n).
+      const SCEV *LessOne =
+        SE->getMinusSCEV(S, SE->getConstant(S->getType(), 1));
+      if ((!isSigned || SE->isKnownNonNegative(LessOne)) &&
+          SE->isKnownPredicate(isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT,
+                               LessOne, X)) {
+        ICmpInst *ICmp = new ICmpInst(Rem, ICmpInst::ICMP_EQ,
+                                      Rem->getOperand(0), Rem->getOperand(1),
+                                      "tmp");
+        SelectInst *Sel =
+          SelectInst::Create(ICmp,
+                             ConstantInt::get(Rem->getType(), 0),
+                             Rem->getOperand(0), "tmp", Rem);
+        Rem->replaceAllUsesWith(Sel);
+      } else
+        continue;
+    }
+
+    // Inform IVUsers about the new users.
+    if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
+      IU->AddUsersIfInteresting(I);
+
+    DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
+    DeadInsts.push_back(Rem);
+  }
+
+  // Now that we're done iterating through lists, clean up any instructions
+  // which are now dead.
+  while (!DeadInsts.empty())
+    if (Instruction *Inst =
+          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+      RecursivelyDeleteTriviallyDeadInstructions(Inst);
+}
+
+bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+  // If LoopSimplify form is not available, stay out of trouble. Some notes:
+  //  - LSR currently only supports LoopSimplify-form loops. Indvars'
+  //    canonicalization can be a pessimization without LSR to "clean up"
+  //    afterwards.
+  //  - We depend on having a preheader; in particular,
+  //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
+  //    and we're in trouble if we can't find the induction variable even when
+  //    we've manually inserted one.
+  if (!L->isLoopSimplifyForm())
+    return false;
+
+  IU = &getAnalysis<IVUsers>();
+  LI = &getAnalysis<LoopInfo>();
+  SE = &getAnalysis<ScalarEvolution>();
+  DT = &getAnalysis<DominatorTree>();
+  Changed = false;
+
+  // If there are any floating-point recurrences, attempt to
+  // transform them to use integer recurrences.
+  RewriteNonIntegerIVs(L);
+
+  BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+
+  // Create a rewriter object which we'll use to transform the code with.
+  SCEVExpander Rewriter(*SE);
+
+  // Check to see if this loop has a computable loop-invariant execution count.
+  // If so, this means that we can compute the final value of any expressions
+  // that are recurrent in the loop, and substitute the exit values from the
+  // loop into any instructions outside of the loop that use the final values of
+  // the current expressions.
+  //
+  if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+    RewriteLoopExitValues(L, Rewriter);
+
+  // Simplify ICmp IV users.
+  EliminateIVComparisons();
+
+  // Simplify SRem and URem IV users.
+  EliminateIVRemainders();
+
+  // Compute the type of the largest recurrence expression, and decide whether
+  // a canonical induction variable should be inserted.
+  const Type *LargestType = 0;
+  bool NeedCannIV = false;
+  if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
+    LargestType = BackedgeTakenCount->getType();
+    LargestType = SE->getEffectiveSCEVType(LargestType);
+    // If we have a known trip count and a single exit block, we'll be
+    // rewriting the loop exit test condition below, which requires a
+    // canonical induction variable.
+    if (ExitingBlock)
+      NeedCannIV = true;
+  }
+  for (IVUsers::const_iterator I = IU->begin(), E = IU->end(); I != E; ++I) {
+    const Type *Ty =
+      SE->getEffectiveSCEVType(I->getOperandValToReplace()->getType());
+    if (!LargestType ||
+        SE->getTypeSizeInBits(Ty) >
+          SE->getTypeSizeInBits(LargestType))
+      LargestType = Ty;
+    NeedCannIV = true;
+  }
+
+  // Now that we know the largest of the induction variable expressions
+  // in this loop, insert a canonical induction variable of the largest size.
+  PHINode *IndVar = 0;
+  if (NeedCannIV) {
+    // Check to see if the loop already has any canonical-looking induction
+    // variables. If any are present and wider than the planned canonical
+    // induction variable, temporarily remove them, so that the Rewriter
+    // doesn't attempt to reuse them.
+    SmallVector<PHINode *, 2> OldCannIVs;
+    while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) {
+      if (SE->getTypeSizeInBits(OldCannIV->getType()) >
+          SE->getTypeSizeInBits(LargestType))
+        OldCannIV->removeFromParent();
+      else
+        break;
+      OldCannIVs.push_back(OldCannIV);
+    }
+
+    IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
+
+    ++NumInserted;
+    Changed = true;
+    DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n');
+
+    // Now that the official induction variable is established, reinsert
+    // any old canonical-looking variables after it so that the IR remains
+    // consistent. They will be deleted as part of the dead-PHI deletion at
+    // the end of the pass.
+    while (!OldCannIVs.empty()) {
+      PHINode *OldCannIV = OldCannIVs.pop_back_val();
+      OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI());
+    }
+  }
+
+  // If we have a trip count expression, rewrite the loop's exit condition
+  // using it.  We can currently only handle loops with a single exit.
+  ICmpInst *NewICmp = 0;
+  if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
+      !BackedgeTakenCount->isZero() &&
+      ExitingBlock) {
+    assert(NeedCannIV &&
+           "LinearFunctionTestReplace requires a canonical induction variable");
+    // Can't rewrite non-branch yet.
+    if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()))
+      NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
+                                          ExitingBlock, BI, Rewriter);
+  }
+
+  // Rewrite IV-derived expressions. Clears the rewriter cache.
+  RewriteIVExpressions(L, Rewriter);
+
+  // The Rewriter may not be used from this point on.
+
+  // Loop-invariant instructions in the preheader that aren't used in the
+  // loop may be sunk below the loop to reduce register pressure.
+  SinkUnusedInvariants(L);
+
+  // For completeness, inform IVUsers of the IV use in the newly-created
+  // loop exit test instruction.
+  if (NewICmp)
+    IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
+
+  // Clean up dead instructions.
+  Changed |= DeleteDeadPHIs(L->getHeader());
+  // Check a post-condition.
+  assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!");
+  return Changed;
+}
+
+// FIXME: It is an extremely bad idea to indvar substitute anything more
+// complex than affine induction variables.  Doing so will put expensive
+// polynomial evaluations inside of the loop, and the str reduction pass
+// currently can only reduce affine polynomials.  For now just disable
+// indvar subst on anything more complex than an affine addrec, unless
+// it can be expanded to a trivial value.
+static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
+  // Loop-invariant values are safe.
+  if (SE->isLoopInvariant(S, L)) return true;
+
+  // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
+  // to transform them into efficient code.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+    return AR->isAffine();
+
+  // An add is safe it all its operands are safe.
+  if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
+    for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
+         E = Commutative->op_end(); I != E; ++I)
+      if (!isSafe(*I, L, SE)) return false;
+    return true;
+  }
+  
+  // A cast is safe if its operand is.
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+    return isSafe(C->getOperand(), L, SE);
+
+  // A udiv is safe if its operands are.
+  if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
+    return isSafe(UD->getLHS(), L, SE) &&
+           isSafe(UD->getRHS(), L, SE);
+
+  // SCEVUnknown is always safe.
+  if (isa<SCEVUnknown>(S))
+    return true;
+
+  // Nothing else is safe.
+  return false;
+}
+
+void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
+  SmallVector<WeakVH, 16> DeadInsts;
+
+  // Rewrite all induction variable expressions in terms of the canonical
+  // induction variable.
+  //
+  // If there were induction variables of other sizes or offsets, manually
+  // add the offsets to the primary induction variable and cast, avoiding
+  // the need for the code evaluation methods to insert induction variables
+  // of different sizes.
+  for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
+    Value *Op = UI->getOperandValToReplace();
+    const Type *UseTy = Op->getType();
+    Instruction *User = UI->getUser();
+
+    // Compute the final addrec to expand into code.
+    const SCEV *AR = IU->getReplacementExpr(*UI);
+
+    // Evaluate the expression out of the loop, if possible.
+    if (!L->contains(UI->getUser())) {
+      const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
+      if (SE->isLoopInvariant(ExitVal, L))
+        AR = ExitVal;
+    }
+
+    // FIXME: It is an extremely bad idea to indvar substitute anything more
+    // complex than affine induction variables.  Doing so will put expensive
+    // polynomial evaluations inside of the loop, and the str reduction pass
+    // currently can only reduce affine polynomials.  For now just disable
+    // indvar subst on anything more complex than an affine addrec, unless
+    // it can be expanded to a trivial value.
+    if (!isSafe(AR, L, SE))
+      continue;
+
+    // Determine the insertion point for this user. By default, insert
+    // immediately before the user. The SCEVExpander class will automatically
+    // hoist loop invariants out of the loop. For PHI nodes, there may be
+    // multiple uses, so compute the nearest common dominator for the
+    // incoming blocks.
+    Instruction *InsertPt = User;
+    if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
+      for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+        if (PHI->getIncomingValue(i) == Op) {
+          if (InsertPt == User)
+            InsertPt = PHI->getIncomingBlock(i)->getTerminator();
+          else
+            InsertPt =
+              DT->findNearestCommonDominator(InsertPt->getParent(),
+                                             PHI->getIncomingBlock(i))
+                    ->getTerminator();
+        }
+
+    // Now expand it into actual Instructions and patch it into place.
+    Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
+
+    // Inform ScalarEvolution that this value is changing. The change doesn't
+    // affect its value, but it does potentially affect which use lists the
+    // value will be on after the replacement, which affects ScalarEvolution's
+    // ability to walk use lists and drop dangling pointers when a value is
+    // deleted.
+    SE->forgetValue(User);
+
+    // Patch the new value into place.
+    if (Op->hasName())
+      NewVal->takeName(Op);
+    User->replaceUsesOfWith(Op, NewVal);
+    UI->setOperandValToReplace(NewVal);
+    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+                 << "   into = " << *NewVal << "\n");
+    ++NumRemoved;
+    Changed = true;
+
+    // The old value may be dead now.
+    DeadInsts.push_back(Op);
+  }
+
+  // Clear the rewriter cache, because values that are in the rewriter's cache
+  // can be deleted in the loop below, causing the AssertingVH in the cache to
+  // trigger.
+  Rewriter.clear();
+  // Now that we're done iterating through lists, clean up any instructions
+  // which are now dead.
+  while (!DeadInsts.empty())
+    if (Instruction *Inst =
+          dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val()))
+      RecursivelyDeleteTriviallyDeadInstructions(Inst);
+}
+
+/// If there's a single exit block, sink any loop-invariant values that
+/// were defined in the preheader but not used inside the loop into the
+/// exit block to reduce register pressure in the loop.
+void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
+  BasicBlock *ExitBlock = L->getExitBlock();
+  if (!ExitBlock) return;
+
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) return;
+
+  Instruction *InsertPt = ExitBlock->getFirstNonPHI();
+  BasicBlock::iterator I = Preheader->getTerminator();
+  while (I != Preheader->begin()) {
+    --I;
+    // New instructions were inserted at the end of the preheader.
+    if (isa<PHINode>(I))
+      break;
+
+    // Don't move instructions which might have side effects, since the side
+    // effects need to complete before instructions inside the loop.  Also don't
+    // move instructions which might read memory, since the loop may modify
+    // memory. Note that it's okay if the instruction might have undefined
+    // behavior: LoopSimplify guarantees that the preheader dominates the exit
+    // block.
+    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+      continue;
+
+    // Skip debug info intrinsics.
+    if (isa<DbgInfoIntrinsic>(I))
+      continue;
+
+    // Don't sink static AllocaInsts out of the entry block, which would
+    // turn them into dynamic allocas!
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (AI->isStaticAlloca())
+        continue;
+
+    // Determine if there is a use in or before the loop (direct or
+    // otherwise).
+    bool UsedInLoop = false;
+    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+         UI != UE; ++UI) {
+      User *U = *UI;
+      BasicBlock *UseBB = cast<Instruction>(U)->getParent();
+      if (PHINode *P = dyn_cast<PHINode>(U)) {
+        unsigned i =
+          PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
+        UseBB = P->getIncomingBlock(i);
+      }
+      if (UseBB == Preheader || L->contains(UseBB)) {
+        UsedInLoop = true;
+        break;
+      }
+    }
+
+    // If there is, the def must remain in the preheader.
+    if (UsedInLoop)
+      continue;
+
+    // Otherwise, sink it to the exit block.
+    Instruction *ToMove = I;
+    bool Done = false;
+
+    if (I != Preheader->begin()) {
+      // Skip debug info intrinsics.
+      do {
+        --I;
+      } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
+
+      if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
+        Done = true;
+    } else {
+      Done = true;
+    }
+
+    ToMove->moveBefore(InsertPt);
+    if (Done) break;
+    InsertPt = ToMove;
+  }
+}
+
+/// ConvertToSInt - Convert APF to an integer, if possible.
+static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
+  bool isExact = false;
+  if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
+    return false;
+  // See if we can convert this to an int64_t
+  uint64_t UIntVal;
+  if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
+                           &isExact) != APFloat::opOK || !isExact)
+    return false;
+  IntVal = UIntVal;
+  return true;
+}
+
+/// HandleFloatingPointIV - If the loop has floating induction variable
+/// then insert corresponding integer induction variable if possible.
+/// For example,
+/// for(double i = 0; i < 10000; ++i)
+///   bar(i)
+/// is converted into
+/// for(int i = 0; i < 10000; ++i)
+///   bar((double)i);
+///
+void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
+  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
+  unsigned BackEdge     = IncomingEdge^1;
+
+  // Check incoming value.
+  ConstantFP *InitValueVal =
+    dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
+
+  int64_t InitValue;
+  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
+    return;
+
+  // Check IV increment. Reject this PN if increment operation is not
+  // an add or increment value can not be represented by an integer.
+  BinaryOperator *Incr =
+    dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+  if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
+  
+  // If this is not an add of the PHI with a constantfp, or if the constant fp
+  // is not an integer, bail out.
+  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
+  int64_t IncValue;
+  if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
+      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
+    return;
+
+  // Check Incr uses. One user is PN and the other user is an exit condition
+  // used by the conditional terminator.
+  Value::use_iterator IncrUse = Incr->use_begin();
+  Instruction *U1 = cast<Instruction>(*IncrUse++);
+  if (IncrUse == Incr->use_end()) return;
+  Instruction *U2 = cast<Instruction>(*IncrUse++);
+  if (IncrUse != Incr->use_end()) return;
+
+  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
+  // only used by a branch, we can't transform it.
+  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
+  if (!Compare)
+    Compare = dyn_cast<FCmpInst>(U2);
+  if (Compare == 0 || !Compare->hasOneUse() ||
+      !isa<BranchInst>(Compare->use_back()))
+    return;
+  
+  BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
+
+  // We need to verify that the branch actually controls the iteration count
+  // of the loop.  If not, the new IV can overflow and no one will notice.
+  // The branch block must be in the loop and one of the successors must be out
+  // of the loop.
+  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
+  if (!L->contains(TheBr->getParent()) ||
+      (L->contains(TheBr->getSuccessor(0)) &&
+       L->contains(TheBr->getSuccessor(1))))
+    return;
+  
+  
+  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
+  // transform it.
+  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
+  int64_t ExitValue;
+  if (ExitValueVal == 0 ||
+      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
+    return;
+  
+  // Find new predicate for integer comparison.
+  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
+  switch (Compare->getPredicate()) {
+  default: return;  // Unknown comparison.
+  case CmpInst::FCMP_OEQ:
+  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
+  case CmpInst::FCMP_ONE:
+  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
+  case CmpInst::FCMP_OGT:
+  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
+  case CmpInst::FCMP_OGE:
+  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
+  case CmpInst::FCMP_OLT:
+  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
+  case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
+  }
+  
+  // We convert the floating point induction variable to a signed i32 value if
+  // we can.  This is only safe if the comparison will not overflow in a way
+  // that won't be trapped by the integer equivalent operations.  Check for this
+  // now.
+  // TODO: We could use i64 if it is native and the range requires it.
+  
+  // The start/stride/exit values must all fit in signed i32.
+  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
+    return;
+
+  // If not actually striding (add x, 0.0), avoid touching the code.
+  if (IncValue == 0)
+    return;
+
+  // Positive and negative strides have different safety conditions.
+  if (IncValue > 0) {
+    // If we have a positive stride, we require the init to be less than the
+    // exit value and an equality or less than comparison.
+    if (InitValue >= ExitValue ||
+        NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
+      return;
+    
+    uint32_t Range = uint32_t(ExitValue-InitValue);
+    if (NewPred == CmpInst::ICMP_SLE) {
+      // Normalize SLE -> SLT, check for infinite loop.
+      if (++Range == 0) return;  // Range overflows.
+    }
+    
+    unsigned Leftover = Range % uint32_t(IncValue);
+    
+    // If this is an equality comparison, we require that the strided value
+    // exactly land on the exit value, otherwise the IV condition will wrap
+    // around and do things the fp IV wouldn't.
+    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+        Leftover != 0)
+      return;
+    
+    // If the stride would wrap around the i32 before exiting, we can't
+    // transform the IV.
+    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
+      return;
+    
+  } else {
+    // If we have a negative stride, we require the init to be greater than the
+    // exit value and an equality or greater than comparison.
+    if (InitValue >= ExitValue ||
+        NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
+      return;
+    
+    uint32_t Range = uint32_t(InitValue-ExitValue);
+    if (NewPred == CmpInst::ICMP_SGE) {
+      // Normalize SGE -> SGT, check for infinite loop.
+      if (++Range == 0) return;  // Range overflows.
+    }
+    
+    unsigned Leftover = Range % uint32_t(-IncValue);
+    
+    // If this is an equality comparison, we require that the strided value
+    // exactly land on the exit value, otherwise the IV condition will wrap
+    // around and do things the fp IV wouldn't.
+    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+        Leftover != 0)
+      return;
+    
+    // If the stride would wrap around the i32 before exiting, we can't
+    // transform the IV.
+    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
+      return;
+  }
+  
+  const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
+
+  // Insert new integer induction variable.
+  PHINode *NewPHI = PHINode::Create(Int32Ty, PN->getName()+".int", PN);
+  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
+                      PN->getIncomingBlock(IncomingEdge));
+
+  Value *NewAdd =
+    BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
+                              Incr->getName()+".int", Incr);
+  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
+
+  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
+                                      ConstantInt::get(Int32Ty, ExitValue),
+                                      Compare->getName());
+
+  // In the following deletions, PN may become dead and may be deleted.
+  // Use a WeakVH to observe whether this happens.
+  WeakVH WeakPH = PN;
+
+  // Delete the old floating point exit comparison.  The branch starts using the
+  // new comparison.
+  NewCompare->takeName(Compare);
+  Compare->replaceAllUsesWith(NewCompare);
+  RecursivelyDeleteTriviallyDeadInstructions(Compare);
+
+  // Delete the old floating point increment.
+  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
+  RecursivelyDeleteTriviallyDeadInstructions(Incr);
+
+  // If the FP induction variable still has uses, this is because something else
+  // in the loop uses its value.  In order to canonicalize the induction
+  // variable, we chose to eliminate the IV and rewrite it in terms of an
+  // int->fp cast.
+  //
+  // We give preference to sitofp over uitofp because it is faster on most
+  // platforms.
+  if (WeakPH) {
+    Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
+                                 PN->getParent()->getFirstNonPHI());
+    PN->replaceAllUsesWith(Conv);
+    RecursivelyDeleteTriviallyDeadInstructions(PN);
+  }
+
+  // Add a new IVUsers entry for the newly-created integer PHI.
+  IU->AddUsersIfInteresting(NewPHI);
+}
diff --git a/final/lib/Transforms/Scalar/JumpThreading.cpp b/final/lib/Transforms/Scalar/JumpThreading.cpp
new file mode 100644
index 00000000000..90094a8da25
--- /dev/null
+++ b/final/lib/Transforms/Scalar/JumpThreading.cpp
@@ -0,0 +1,1576 @@
+//===- JumpThreading.cpp - Thread control through conditional blocks ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Jump Threading pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "jump-threading"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumThreads, "Number of jumps threaded");
+STATISTIC(NumFolds,   "Number of terminators folded");
+STATISTIC(NumDupes,   "Number of branch blocks duplicated to eliminate phi");
+
+static cl::opt<unsigned>
+Threshold("jump-threading-threshold",
+          cl::desc("Max block size to duplicate for jump threading"),
+          cl::init(6), cl::Hidden);
+
+namespace {
+  // These are at global scope so static functions can use them too.
+  typedef SmallVectorImpl<std::pair<Constant*, BasicBlock*> > PredValueInfo;
+  typedef SmallVector<std::pair<Constant*, BasicBlock*>, 8> PredValueInfoTy;
+
+  // This is used to keep track of what kind of constant we're currently hoping
+  // to find.
+  enum ConstantPreference {
+    WantInteger,
+    WantBlockAddress
+  };
+
+  /// This pass performs 'jump threading', which looks at blocks that have
+  /// multiple predecessors and multiple successors.  If one or more of the
+  /// predecessors of the block can be proven to always jump to one of the
+  /// successors, we forward the edge from the predecessor to the successor by
+  /// duplicating the contents of this block.
+  ///
+  /// An example of when this can occur is code like this:
+  ///
+  ///   if () { ...
+  ///     X = 4;
+  ///   }
+  ///   if (X < 3) {
+  ///
+  /// In this case, the unconditional branch at the end of the first if can be
+  /// revectored to the false side of the second if.
+  ///
+  class JumpThreading : public FunctionPass {
+    TargetData *TD;
+    LazyValueInfo *LVI;
+#ifdef NDEBUG
+    SmallPtrSet<BasicBlock*, 16> LoopHeaders;
+#else
+    SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
+#endif
+    DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
+
+    // RAII helper for updating the recursion stack.
+    struct RecursionSetRemover {
+      DenseSet<std::pair<Value*, BasicBlock*> > &TheSet;
+      std::pair<Value*, BasicBlock*> ThePair;
+
+      RecursionSetRemover(DenseSet<std::pair<Value*, BasicBlock*> > &S,
+                          std::pair<Value*, BasicBlock*> P)
+        : TheSet(S), ThePair(P) { }
+
+      ~RecursionSetRemover() {
+        TheSet.erase(ThePair);
+      }
+    };
+  public:
+    static char ID; // Pass identification
+    JumpThreading() : FunctionPass(ID) {
+      initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LazyValueInfo>();
+      AU.addPreserved<LazyValueInfo>();
+    }
+
+    void FindLoopHeaders(Function &F);
+    bool ProcessBlock(BasicBlock *BB);
+    bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
+                    BasicBlock *SuccBB);
+    bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
+                                  const SmallVectorImpl<BasicBlock *> &PredBBs);
+
+    bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
+                                         PredValueInfo &Result,
+                                         ConstantPreference Preference);
+    bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+                                ConstantPreference Preference);
+
+    bool ProcessBranchOnPHI(PHINode *PN);
+    bool ProcessBranchOnXOR(BinaryOperator *BO);
+
+    bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
+  };
+}
+
+char JumpThreading::ID = 0;
+INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
+                "Jump Threading", false, false)
+INITIALIZE_PASS_DEPENDENCY(LazyValueInfo)
+INITIALIZE_PASS_END(JumpThreading, "jump-threading",
+                "Jump Threading", false, false)
+
+// Public interface to the Jump Threading pass
+FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
+
+/// runOnFunction - Top level algorithm.
+///
+bool JumpThreading::runOnFunction(Function &F) {
+  DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
+  TD = getAnalysisIfAvailable<TargetData>();
+  LVI = &getAnalysis<LazyValueInfo>();
+
+  FindLoopHeaders(F);
+
+  bool Changed, EverChanged = false;
+  do {
+    Changed = false;
+    for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
+      BasicBlock *BB = I;
+      // Thread all of the branches we can over this block.
+      while (ProcessBlock(BB))
+        Changed = true;
+
+      ++I;
+
+      // If the block is trivially dead, zap it.  This eliminates the successor
+      // edges which simplifies the CFG.
+      if (pred_begin(BB) == pred_end(BB) &&
+          BB != &BB->getParent()->getEntryBlock()) {
+        DEBUG(dbgs() << "  JT: Deleting dead block '" << BB->getName()
+              << "' with terminator: " << *BB->getTerminator() << '\n');
+        LoopHeaders.erase(BB);
+        LVI->eraseBlock(BB);
+        DeleteDeadBlock(BB);
+        Changed = true;
+        continue;
+      }
+      
+      BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+      
+      // Can't thread an unconditional jump, but if the block is "almost
+      // empty", we can replace uses of it with uses of the successor and make
+      // this dead.
+      if (BI && BI->isUnconditional() &&
+          BB != &BB->getParent()->getEntryBlock() &&
+          // If the terminator is the only non-phi instruction, try to nuke it.
+          BB->getFirstNonPHIOrDbg()->isTerminator()) {
+        // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
+        // block, we have to make sure it isn't in the LoopHeaders set.  We
+        // reinsert afterward if needed.
+        bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+        BasicBlock *Succ = BI->getSuccessor(0);
+
+        // FIXME: It is always conservatively correct to drop the info
+        // for a block even if it doesn't get erased.  This isn't totally
+        // awesome, but it allows us to use AssertingVH to prevent nasty
+        // dangling pointer issues within LazyValueInfo.
+        LVI->eraseBlock(BB);
+        if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) {
+          Changed = true;
+          // If we deleted BB and BB was the header of a loop, then the
+          // successor is now the header of the loop.
+          BB = Succ;
+        }
+
+        if (ErasedFromLoopHeaders)
+          LoopHeaders.insert(BB);
+      }
+    }
+    EverChanged |= Changed;
+  } while (Changed);
+
+  LoopHeaders.clear();
+  return EverChanged;
+}
+
+/// getJumpThreadDuplicationCost - Return the cost of duplicating this block to
+/// thread across it.
+static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
+  /// Ignore PHI nodes, these will be flattened when duplication happens.
+  BasicBlock::const_iterator I = BB->getFirstNonPHI();
+
+  // FIXME: THREADING will delete values that are just used to compute the
+  // branch, so they shouldn't count against the duplication cost.
+
+
+  // Sum up the cost of each instruction until we get to the terminator.  Don't
+  // include the terminator because the copy won't include it.
+  unsigned Size = 0;
+  for (; !isa<TerminatorInst>(I); ++I) {
+    // Debugger intrinsics don't incur code size.
+    if (isa<DbgInfoIntrinsic>(I)) continue;
+
+    // If this is a pointer->pointer bitcast, it is free.
+    if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
+      continue;
+
+    // All other instructions count for at least one unit.
+    ++Size;
+
+    // Calls are more expensive.  If they are non-intrinsic calls, we model them
+    // as having cost of 4.  If they are a non-vector intrinsic, we model them
+    // as having cost of 2 total, and if they are a vector intrinsic, we model
+    // them as having cost 1.
+    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+      if (!isa<IntrinsicInst>(CI))
+        Size += 3;
+      else if (!CI->getType()->isVectorTy())
+        Size += 1;
+    }
+  }
+
+  // Threading through a switch statement is particularly profitable.  If this
+  // block ends in a switch, decrease its cost to make it more likely to happen.
+  if (isa<SwitchInst>(I))
+    Size = Size > 6 ? Size-6 : 0;
+
+  // The same holds for indirect branches, but slightly more so.
+  if (isa<IndirectBrInst>(I))
+    Size = Size > 8 ? Size-8 : 0;
+
+  return Size;
+}
+
+/// FindLoopHeaders - We do not want jump threading to turn proper loop
+/// structures into irreducible loops.  Doing this breaks up the loop nesting
+/// hierarchy and pessimizes later transformations.  To prevent this from
+/// happening, we first have to find the loop headers.  Here we approximate this
+/// by finding targets of backedges in the CFG.
+///
+/// Note that there definitely are cases when we want to allow threading of
+/// edges across a loop header.  For example, threading a jump from outside the
+/// loop (the preheader) to an exit block of the loop is definitely profitable.
+/// It is also almost always profitable to thread backedges from within the loop
+/// to exit blocks, and is often profitable to thread backedges to other blocks
+/// within the loop (forming a nested loop).  This simple analysis is not rich
+/// enough to track all of these properties and keep it up-to-date as the CFG
+/// mutates, so we don't allow any of these transformations.
+///
+void JumpThreading::FindLoopHeaders(Function &F) {
+  SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
+  FindFunctionBackedges(F, Edges);
+
+  for (unsigned i = 0, e = Edges.size(); i != e; ++i)
+    LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
+}
+
+/// getKnownConstant - Helper method to determine if we can thread over a
+/// terminator with the given value as its condition, and if so what value to
+/// use for that. What kind of value this is depends on whether we want an
+/// integer or a block address, but an undef is always accepted.
+/// Returns null if Val is null or not an appropriate constant.
+static Constant *getKnownConstant(Value *Val, ConstantPreference Preference) {
+  if (!Val)
+    return 0;
+
+  // Undef is "known" enough.
+  if (UndefValue *U = dyn_cast<UndefValue>(Val))
+    return U;
+
+  if (Preference == WantBlockAddress)
+    return dyn_cast<BlockAddress>(Val->stripPointerCasts());
+
+  return dyn_cast<ConstantInt>(Val);
+}
+
+/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
+/// if we can infer that the value is a known ConstantInt/BlockAddress or undef
+/// in any of our predecessors.  If so, return the known list of value and pred
+/// BB in the result vector.
+///
+/// This returns true if there were any known values.
+///
+bool JumpThreading::
+ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
+                                ConstantPreference Preference) {
+  // This method walks up use-def chains recursively.  Because of this, we could
+  // get into an infinite loop going around loops in the use-def chain.  To
+  // prevent this, keep track of what (value, block) pairs we've already visited
+  // and terminate the search if we loop back to them
+  if (!RecursionSet.insert(std::make_pair(V, BB)).second)
+    return false;
+
+  // An RAII help to remove this pair from the recursion set once the recursion
+  // stack pops back out again.
+  RecursionSetRemover remover(RecursionSet, std::make_pair(V, BB));
+
+  // If V is a constant, then it is known in all predecessors.
+  if (Constant *KC = getKnownConstant(V, Preference)) {
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+      Result.push_back(std::make_pair(KC, *PI));
+
+    return true;
+  }
+
+  // If V is a non-instruction value, or an instruction in a different block,
+  // then it can't be derived from a PHI.
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0 || I->getParent() != BB) {
+
+    // Okay, if this is a live-in value, see if it has a known value at the end
+    // of any of our predecessors.
+    //
+    // FIXME: This should be an edge property, not a block end property.
+    /// TODO: Per PR2563, we could infer value range information about a
+    /// predecessor based on its terminator.
+    //
+    // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
+    // "I" is a non-local compare-with-a-constant instruction.  This would be
+    // able to handle value inequalities better, for example if the compare is
+    // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
+    // Perhaps getConstantOnEdge should be smart enough to do this?
+
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *P = *PI;
+      // If the value is known by LazyValueInfo to be a constant in a
+      // predecessor, use that information to try to thread this block.
+      Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
+      if (Constant *KC = getKnownConstant(PredCst, Preference))
+        Result.push_back(std::make_pair(KC, P));
+    }
+
+    return !Result.empty();
+  }
+
+  /// If I is a PHI node, then we know the incoming values for any constants.
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *InVal = PN->getIncomingValue(i);
+      if (Constant *KC = getKnownConstant(InVal, Preference)) {
+        Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
+      } else {
+        Constant *CI = LVI->getConstantOnEdge(InVal,
+                                              PN->getIncomingBlock(i), BB);
+        if (Constant *KC = getKnownConstant(CI, Preference))
+          Result.push_back(std::make_pair(KC, PN->getIncomingBlock(i)));
+      }
+    }
+
+    return !Result.empty();
+  }
+
+  PredValueInfoTy LHSVals, RHSVals;
+
+  // Handle some boolean conditions.
+  if (I->getType()->getPrimitiveSizeInBits() == 1) {
+    assert(Preference == WantInteger && "One-bit non-integer type?");
+    // X | true -> true
+    // X & false -> false
+    if (I->getOpcode() == Instruction::Or ||
+        I->getOpcode() == Instruction::And) {
+      ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+                                      WantInteger);
+      ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals,
+                                      WantInteger);
+
+      if (LHSVals.empty() && RHSVals.empty())
+        return false;
+
+      ConstantInt *InterestingVal;
+      if (I->getOpcode() == Instruction::Or)
+        InterestingVal = ConstantInt::getTrue(I->getContext());
+      else
+        InterestingVal = ConstantInt::getFalse(I->getContext());
+
+      SmallPtrSet<BasicBlock*, 4> LHSKnownBBs;
+
+      // Scan for the sentinel.  If we find an undef, force it to the
+      // interesting value: x|undef -> true and x&undef -> false.
+      for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
+        if (LHSVals[i].first == InterestingVal ||
+            isa<UndefValue>(LHSVals[i].first)) {
+          Result.push_back(LHSVals[i]);
+          Result.back().first = InterestingVal;
+          LHSKnownBBs.insert(LHSVals[i].second);
+        }
+      for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
+        if (RHSVals[i].first == InterestingVal ||
+            isa<UndefValue>(RHSVals[i].first)) {
+          // If we already inferred a value for this block on the LHS, don't
+          // re-add it.
+          if (!LHSKnownBBs.count(RHSVals[i].second)) {
+            Result.push_back(RHSVals[i]);
+            Result.back().first = InterestingVal;
+          }
+        }
+
+      return !Result.empty();
+    }
+
+    // Handle the NOT form of XOR.
+    if (I->getOpcode() == Instruction::Xor &&
+        isa<ConstantInt>(I->getOperand(1)) &&
+        cast<ConstantInt>(I->getOperand(1))->isOne()) {
+      ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result,
+                                      WantInteger);
+      if (Result.empty())
+        return false;
+
+      // Invert the known values.
+      for (unsigned i = 0, e = Result.size(); i != e; ++i)
+        Result[i].first = ConstantExpr::getNot(Result[i].first);
+
+      return true;
+    }
+
+  // Try to simplify some other binary operator values.
+  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
+    assert(Preference != WantBlockAddress
+            && "A binary operator creating a block address?");
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
+      PredValueInfoTy LHSVals;
+      ComputeValueKnownInPredecessors(BO->getOperand(0), BB, LHSVals,
+                                      WantInteger);
+
+      // Try to use constant folding to simplify the binary operator.
+      for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+        Constant *V = LHSVals[i].first;
+        Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
+
+        if (Constant *KC = getKnownConstant(Folded, WantInteger))
+          Result.push_back(std::make_pair(KC, LHSVals[i].second));
+      }
+    }
+
+    return !Result.empty();
+  }
+
+  // Handle compare with phi operand, where the PHI is defined in this block.
+  if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+    assert(Preference == WantInteger && "Compares only produce integers");
+    PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
+    if (PN && PN->getParent() == BB) {
+      // We can do this simplification if any comparisons fold to true or false.
+      // See if any do.
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        BasicBlock *PredBB = PN->getIncomingBlock(i);
+        Value *LHS = PN->getIncomingValue(i);
+        Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
+
+        Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
+        if (Res == 0) {
+          if (!isa<Constant>(RHS))
+            continue;
+
+          LazyValueInfo::Tristate
+            ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
+                                           cast<Constant>(RHS), PredBB, BB);
+          if (ResT == LazyValueInfo::Unknown)
+            continue;
+          Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
+        }
+
+        if (Constant *KC = getKnownConstant(Res, WantInteger))
+          Result.push_back(std::make_pair(KC, PredBB));
+      }
+
+      return !Result.empty();
+    }
+
+
+    // If comparing a live-in value against a constant, see if we know the
+    // live-in value on any predecessors.
+    if (isa<Constant>(Cmp->getOperand(1)) && Cmp->getType()->isIntegerTy()) {
+      if (!isa<Instruction>(Cmp->getOperand(0)) ||
+          cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
+        Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+
+        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
+          BasicBlock *P = *PI;
+          // If the value is known by LazyValueInfo to be a constant in a
+          // predecessor, use that information to try to thread this block.
+          LazyValueInfo::Tristate Res =
+            LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+                                    RHSCst, P, BB);
+          if (Res == LazyValueInfo::Unknown)
+            continue;
+
+          Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+          Result.push_back(std::make_pair(ResC, P));
+        }
+
+        return !Result.empty();
+      }
+
+      // Try to find a constant value for the LHS of a comparison,
+      // and evaluate it statically if we can.
+      if (Constant *CmpConst = dyn_cast<Constant>(Cmp->getOperand(1))) {
+        PredValueInfoTy LHSVals;
+        ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
+                                        WantInteger);
+
+        for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
+          Constant *V = LHSVals[i].first;
+          Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
+                                                      V, CmpConst);
+          if (Constant *KC = getKnownConstant(Folded, WantInteger))
+            Result.push_back(std::make_pair(KC, LHSVals[i].second));
+        }
+
+        return !Result.empty();
+      }
+    }
+  }
+
+  if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+    // Handle select instructions where at least one operand is a known constant
+    // and we can figure out the condition value for any predecessor block.
+    Constant *TrueVal = getKnownConstant(SI->getTrueValue(), Preference);
+    Constant *FalseVal = getKnownConstant(SI->getFalseValue(), Preference);
+    PredValueInfoTy Conds;
+    if ((TrueVal || FalseVal) &&
+        ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
+                                        WantInteger)) {
+      for (unsigned i = 0, e = Conds.size(); i != e; ++i) {
+        Constant *Cond = Conds[i].first;
+
+        // Figure out what value to use for the condition.
+        bool KnownCond;
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(Cond)) {
+          // A known boolean.
+          KnownCond = CI->isOne();
+        } else {
+          assert(isa<UndefValue>(Cond) && "Unexpected condition value");
+          // Either operand will do, so be sure to pick the one that's a known
+          // constant.
+          // FIXME: Do this more cleverly if both values are known constants?
+          KnownCond = (TrueVal != 0);
+        }
+
+        // See if the select has a known constant value for this predecessor.
+        if (Constant *Val = KnownCond ? TrueVal : FalseVal)
+          Result.push_back(std::make_pair(Val, Conds[i].second));
+      }
+
+      return !Result.empty();
+    }
+  }
+
+  // If all else fails, see if LVI can figure out a constant value for us.
+  Constant *CI = LVI->getConstant(V, BB);
+  if (Constant *KC = getKnownConstant(CI, Preference)) {
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+      Result.push_back(std::make_pair(KC, *PI));
+  }
+
+  return !Result.empty();
+}
+
+
+
+/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
+/// in an undefined jump, decide which block is best to revector to.
+///
+/// Since we can pick an arbitrary destination, we pick the successor with the
+/// fewest predecessors.  This should reduce the in-degree of the others.
+///
+static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
+  TerminatorInst *BBTerm = BB->getTerminator();
+  unsigned MinSucc = 0;
+  BasicBlock *TestBB = BBTerm->getSuccessor(MinSucc);
+  // Compute the successor with the minimum number of predecessors.
+  unsigned MinNumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
+  for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
+    TestBB = BBTerm->getSuccessor(i);
+    unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
+    if (NumPreds < MinNumPreds)
+      MinSucc = i;
+  }
+
+  return MinSucc;
+}
+
+static bool hasAddressTakenAndUsed(BasicBlock *BB) {
+  if (!BB->hasAddressTaken()) return false;
+  
+  // If the block has its address taken, it may be a tree of dead constants
+  // hanging off of it.  These shouldn't keep the block alive.
+  BlockAddress *BA = BlockAddress::get(BB);
+  BA->removeDeadConstantUsers();
+  return !BA->use_empty();
+}
+
+/// ProcessBlock - If there are any predecessors whose control can be threaded
+/// through to a successor, transform them now.
+bool JumpThreading::ProcessBlock(BasicBlock *BB) {
+  // If the block is trivially dead, just return and let the caller nuke it.
+  // This simplifies other transformations.
+  if (pred_begin(BB) == pred_end(BB) &&
+      BB != &BB->getParent()->getEntryBlock())
+    return false;
+
+  // If this block has a single predecessor, and if that pred has a single
+  // successor, merge the blocks.  This encourages recursive jump threading
+  // because now the condition in this block can be threaded through
+  // predecessors of our predecessor block.
+  if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
+    if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
+        SinglePred != BB && !hasAddressTakenAndUsed(BB)) {
+      // If SinglePred was a loop header, BB becomes one.
+      if (LoopHeaders.erase(SinglePred))
+        LoopHeaders.insert(BB);
+
+      // Remember if SinglePred was the entry block of the function.  If so, we
+      // will need to move BB back to the entry position.
+      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
+      LVI->eraseBlock(SinglePred);
+      MergeBasicBlockIntoOnlyPred(BB);
+
+      if (isEntry && BB != &BB->getParent()->getEntryBlock())
+        BB->moveBefore(&BB->getParent()->getEntryBlock());
+      return true;
+    }
+  }
+
+  // What kind of constant we're looking for.
+  ConstantPreference Preference = WantInteger;
+
+  // Look to see if the terminator is a conditional branch, switch or indirect
+  // branch, if not we can't thread it.
+  Value *Condition;
+  Instruction *Terminator = BB->getTerminator();
+  if (BranchInst *BI = dyn_cast<BranchInst>(Terminator)) {
+    // Can't thread an unconditional jump.
+    if (BI->isUnconditional()) return false;
+    Condition = BI->getCondition();
+  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(Terminator)) {
+    Condition = SI->getCondition();
+  } else if (IndirectBrInst *IB = dyn_cast<IndirectBrInst>(Terminator)) {
+    Condition = IB->getAddress()->stripPointerCasts();
+    Preference = WantBlockAddress;
+  } else {
+    return false; // Must be an invoke.
+  }
+
+  // If the terminator is branching on an undef, we can pick any of the
+  // successors to branch to.  Let GetBestDestForJumpOnUndef decide.
+  if (isa<UndefValue>(Condition)) {
+    unsigned BestSucc = GetBestDestForJumpOnUndef(BB);
+
+    // Fold the branch/switch.
+    TerminatorInst *BBTerm = BB->getTerminator();
+    for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
+      if (i == BestSucc) continue;
+      BBTerm->getSuccessor(i)->removePredecessor(BB, true);
+    }
+
+    DEBUG(dbgs() << "  In block '" << BB->getName()
+          << "' folding undef terminator: " << *BBTerm << '\n');
+    BranchInst::Create(BBTerm->getSuccessor(BestSucc), BBTerm);
+    BBTerm->eraseFromParent();
+    return true;
+  }
+
+  // If the terminator of this block is branching on a constant, simplify the
+  // terminator to an unconditional branch.  This can occur due to threading in
+  // other blocks.
+  if (getKnownConstant(Condition, Preference)) {
+    DEBUG(dbgs() << "  In block '" << BB->getName()
+          << "' folding terminator: " << *BB->getTerminator() << '\n');
+    ++NumFolds;
+    ConstantFoldTerminator(BB);
+    return true;
+  }
+
+  Instruction *CondInst = dyn_cast<Instruction>(Condition);
+
+  // All the rest of our checks depend on the condition being an instruction.
+  if (CondInst == 0) {
+    // FIXME: Unify this with code below.
+    if (ProcessThreadableEdges(Condition, BB, Preference))
+      return true;
+    return false;
+  }
+
+
+  if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
+    // For a comparison where the LHS is outside this block, it's possible
+    // that we've branched on it before.  Used LVI to see if we can simplify
+    // the branch based on that.
+    BranchInst *CondBr = dyn_cast<BranchInst>(BB->getTerminator());
+    Constant *CondConst = dyn_cast<Constant>(CondCmp->getOperand(1));
+    pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+    if (CondBr && CondConst && CondBr->isConditional() && PI != PE &&
+        (!isa<Instruction>(CondCmp->getOperand(0)) ||
+         cast<Instruction>(CondCmp->getOperand(0))->getParent() != BB)) {
+      // For predecessor edge, determine if the comparison is true or false
+      // on that edge.  If they're all true or all false, we can simplify the
+      // branch.
+      // FIXME: We could handle mixed true/false by duplicating code.
+      LazyValueInfo::Tristate Baseline =
+        LVI->getPredicateOnEdge(CondCmp->getPredicate(), CondCmp->getOperand(0),
+                                CondConst, *PI, BB);
+      if (Baseline != LazyValueInfo::Unknown) {
+        // Check that all remaining incoming values match the first one.
+        while (++PI != PE) {
+          LazyValueInfo::Tristate Ret =
+            LVI->getPredicateOnEdge(CondCmp->getPredicate(),
+                                    CondCmp->getOperand(0), CondConst, *PI, BB);
+          if (Ret != Baseline) break;
+        }
+
+        // If we terminated early, then one of the values didn't match.
+        if (PI == PE) {
+          unsigned ToRemove = Baseline == LazyValueInfo::True ? 1 : 0;
+          unsigned ToKeep = Baseline == LazyValueInfo::True ? 0 : 1;
+          CondBr->getSuccessor(ToRemove)->removePredecessor(BB, true);
+          BranchInst::Create(CondBr->getSuccessor(ToKeep), CondBr);
+          CondBr->eraseFromParent();
+          return true;
+        }
+      }
+    }
+  }
+
+  // Check for some cases that are worth simplifying.  Right now we want to look
+  // for loads that are used by a switch or by the condition for the branch.  If
+  // we see one, check to see if it's partially redundant.  If so, insert a PHI
+  // which can then be used to thread the values.
+  //
+  Value *SimplifyValue = CondInst;
+  if (CmpInst *CondCmp = dyn_cast<CmpInst>(SimplifyValue))
+    if (isa<Constant>(CondCmp->getOperand(1)))
+      SimplifyValue = CondCmp->getOperand(0);
+
+  // TODO: There are other places where load PRE would be profitable, such as
+  // more complex comparisons.
+  if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
+    if (SimplifyPartiallyRedundantLoad(LI))
+      return true;
+
+
+  // Handle a variety of cases where we are branching on something derived from
+  // a PHI node in the current block.  If we can prove that any predecessors
+  // compute a predictable value based on a PHI node, thread those predecessors.
+  //
+  if (ProcessThreadableEdges(CondInst, BB, Preference))
+    return true;
+
+  // If this is an otherwise-unfoldable branch on a phi node in the current
+  // block, see if we can simplify.
+  if (PHINode *PN = dyn_cast<PHINode>(CondInst))
+    if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+      return ProcessBranchOnPHI(PN);
+
+
+  // If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
+  if (CondInst->getOpcode() == Instruction::Xor &&
+      CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
+    return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
+
+
+  // TODO: If we have: "br (X > 0)"  and we have a predecessor where we know
+  // "(X == 4)", thread through this block.
+
+  return false;
+}
+
+
+/// SimplifyPartiallyRedundantLoad - If LI is an obviously partially redundant
+/// load instruction, eliminate it by replacing it with a PHI node.  This is an
+/// important optimization that encourages jump threading, and needs to be run
+/// interlaced with other jump threading tasks.
+bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
+  // Don't hack volatile loads.
+  if (LI->isVolatile()) return false;
+
+  // If the load is defined in a block with exactly one predecessor, it can't be
+  // partially redundant.
+  BasicBlock *LoadBB = LI->getParent();
+  if (LoadBB->getSinglePredecessor())
+    return false;
+
+  Value *LoadedPtr = LI->getOperand(0);
+
+  // If the loaded operand is defined in the LoadBB, it can't be available.
+  // TODO: Could do simple PHI translation, that would be fun :)
+  if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
+    if (PtrOp->getParent() == LoadBB)
+      return false;
+
+  // Scan a few instructions up from the load, to see if it is obviously live at
+  // the entry to its block.
+  BasicBlock::iterator BBIt = LI;
+
+  if (Value *AvailableVal =
+        FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
+    // If the value if the load is locally available within the block, just use
+    // it.  This frequently occurs for reg2mem'd allocas.
+    //cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
+
+    // If the returned value is the load itself, replace with an undef. This can
+    // only happen in dead loops.
+    if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
+    LI->replaceAllUsesWith(AvailableVal);
+    LI->eraseFromParent();
+    return true;
+  }
+
+  // Otherwise, if we scanned the whole block and got to the top of the block,
+  // we know the block is locally transparent to the load.  If not, something
+  // might clobber its value.
+  if (BBIt != LoadBB->begin())
+    return false;
+
+
+  SmallPtrSet<BasicBlock*, 8> PredsScanned;
+  typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
+  AvailablePredsTy AvailablePreds;
+  BasicBlock *OneUnavailablePred = 0;
+
+  // If we got here, the loaded value is transparent through to the start of the
+  // block.  Check to see if it is available in any of the predecessor blocks.
+  for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
+       PI != PE; ++PI) {
+    BasicBlock *PredBB = *PI;
+
+    // If we already scanned this predecessor, skip it.
+    if (!PredsScanned.insert(PredBB))
+      continue;
+
+    // Scan the predecessor to see if the value is available in the pred.
+    BBIt = PredBB->end();
+    Value *PredAvailable = FindAvailableLoadedValue(LoadedPtr, PredBB, BBIt, 6);
+    if (!PredAvailable) {
+      OneUnavailablePred = PredBB;
+      continue;
+    }
+
+    // If so, this load is partially redundant.  Remember this info so that we
+    // can create a PHI node.
+    AvailablePreds.push_back(std::make_pair(PredBB, PredAvailable));
+  }
+
+  // If the loaded value isn't available in any predecessor, it isn't partially
+  // redundant.
+  if (AvailablePreds.empty()) return false;
+
+  // Okay, the loaded value is available in at least one (and maybe all!)
+  // predecessors.  If the value is unavailable in more than one unique
+  // predecessor, we want to insert a merge block for those common predecessors.
+  // This ensures that we only have to insert one reload, thus not increasing
+  // code size.
+  BasicBlock *UnavailablePred = 0;
+
+  // If there is exactly one predecessor where the value is unavailable, the
+  // already computed 'OneUnavailablePred' block is it.  If it ends in an
+  // unconditional branch, we know that it isn't a critical edge.
+  if (PredsScanned.size() == AvailablePreds.size()+1 &&
+      OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) {
+    UnavailablePred = OneUnavailablePred;
+  } else if (PredsScanned.size() != AvailablePreds.size()) {
+    // Otherwise, we had multiple unavailable predecessors or we had a critical
+    // edge from the one.
+    SmallVector<BasicBlock*, 8> PredsToSplit;
+    SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
+
+    for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i)
+      AvailablePredSet.insert(AvailablePreds[i].first);
+
+    // Add all the unavailable predecessors to the PredsToSplit list.
+    for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
+         PI != PE; ++PI) {
+      BasicBlock *P = *PI;
+      // If the predecessor is an indirect goto, we can't split the edge.
+      if (isa<IndirectBrInst>(P->getTerminator()))
+        return false;
+
+      if (!AvailablePredSet.count(P))
+        PredsToSplit.push_back(P);
+    }
+
+    // Split them out to their own block.
+    UnavailablePred =
+      SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
+                             "thread-pre-split", this);
+  }
+
+  // If the value isn't available in all predecessors, then there will be
+  // exactly one where it isn't available.  Insert a load on that edge and add
+  // it to the AvailablePreds list.
+  if (UnavailablePred) {
+    assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
+           "Can't handle critical edge here!");
+    Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
+                                 LI->getAlignment(),
+                                 UnavailablePred->getTerminator());
+    AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
+  }
+
+  // Now we know that each predecessor of this block has a value in
+  // AvailablePreds, sort them for efficient access as we're walking the preds.
+  array_pod_sort(AvailablePreds.begin(), AvailablePreds.end());
+
+  // Create a PHI node at the start of the block for the PRE'd load value.
+  PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin());
+  PN->takeName(LI);
+
+  // Insert new entries into the PHI for each predecessor.  A single block may
+  // have multiple entries here.
+  for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E;
+       ++PI) {
+    BasicBlock *P = *PI;
+    AvailablePredsTy::iterator I =
+      std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
+                       std::make_pair(P, (Value*)0));
+
+    assert(I != AvailablePreds.end() && I->first == P &&
+           "Didn't find entry for predecessor!");
+
+    PN->addIncoming(I->second, I->first);
+  }
+
+  //cerr << "PRE: " << *LI << *PN << "\n";
+
+  LI->replaceAllUsesWith(PN);
+  LI->eraseFromParent();
+
+  return true;
+}
+
+/// FindMostPopularDest - The specified list contains multiple possible
+/// threadable destinations.  Pick the one that occurs the most frequently in
+/// the list.
+static BasicBlock *
+FindMostPopularDest(BasicBlock *BB,
+                    const SmallVectorImpl<std::pair<BasicBlock*,
+                                  BasicBlock*> > &PredToDestList) {
+  assert(!PredToDestList.empty());
+
+  // Determine popularity.  If there are multiple possible destinations, we
+  // explicitly choose to ignore 'undef' destinations.  We prefer to thread
+  // blocks with known and real destinations to threading undef.  We'll handle
+  // them later if interesting.
+  DenseMap<BasicBlock*, unsigned> DestPopularity;
+  for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+    if (PredToDestList[i].second)
+      DestPopularity[PredToDestList[i].second]++;
+
+  // Find the most popular dest.
+  DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
+  BasicBlock *MostPopularDest = DPI->first;
+  unsigned Popularity = DPI->second;
+  SmallVector<BasicBlock*, 4> SamePopularity;
+
+  for (++DPI; DPI != DestPopularity.end(); ++DPI) {
+    // If the popularity of this entry isn't higher than the popularity we've
+    // seen so far, ignore it.
+    if (DPI->second < Popularity)
+      ; // ignore.
+    else if (DPI->second == Popularity) {
+      // If it is the same as what we've seen so far, keep track of it.
+      SamePopularity.push_back(DPI->first);
+    } else {
+      // If it is more popular, remember it.
+      SamePopularity.clear();
+      MostPopularDest = DPI->first;
+      Popularity = DPI->second;
+    }
+  }
+
+  // Okay, now we know the most popular destination.  If there is more than one
+  // destination, we need to determine one.  This is arbitrary, but we need
+  // to make a deterministic decision.  Pick the first one that appears in the
+  // successor list.
+  if (!SamePopularity.empty()) {
+    SamePopularity.push_back(MostPopularDest);
+    TerminatorInst *TI = BB->getTerminator();
+    for (unsigned i = 0; ; ++i) {
+      assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
+
+      if (std::find(SamePopularity.begin(), SamePopularity.end(),
+                    TI->getSuccessor(i)) == SamePopularity.end())
+        continue;
+
+      MostPopularDest = TI->getSuccessor(i);
+      break;
+    }
+  }
+
+  // Okay, we have finally picked the most popular destination.
+  return MostPopularDest;
+}
+
+bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
+                                           ConstantPreference Preference) {
+  // If threading this would thread across a loop header, don't even try to
+  // thread the edge.
+  if (LoopHeaders.count(BB))
+    return false;
+
+  PredValueInfoTy PredValues;
+  if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues, Preference))
+    return false;
+
+  assert(!PredValues.empty() &&
+         "ComputeValueKnownInPredecessors returned true with no values");
+
+  DEBUG(dbgs() << "IN BB: " << *BB;
+        for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+          dbgs() << "  BB '" << BB->getName() << "': FOUND condition = "
+            << *PredValues[i].first
+            << " for pred '" << PredValues[i].second->getName() << "'.\n";
+        });
+
+  // Decide what we want to thread through.  Convert our list of known values to
+  // a list of known destinations for each pred.  This also discards duplicate
+  // predecessors and keeps track of the undefined inputs (which are represented
+  // as a null dest in the PredToDestList).
+  SmallPtrSet<BasicBlock*, 16> SeenPreds;
+  SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
+
+  BasicBlock *OnlyDest = 0;
+  BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
+
+  for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+    BasicBlock *Pred = PredValues[i].second;
+    if (!SeenPreds.insert(Pred))
+      continue;  // Duplicate predecessor entry.
+
+    // If the predecessor ends with an indirect goto, we can't change its
+    // destination.
+    if (isa<IndirectBrInst>(Pred->getTerminator()))
+      continue;
+
+    Constant *Val = PredValues[i].first;
+
+    BasicBlock *DestBB;
+    if (isa<UndefValue>(Val))
+      DestBB = 0;
+    else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+      DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero());
+    else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator()))
+      DestBB = SI->getSuccessor(SI->findCaseValue(cast<ConstantInt>(Val)));
+    else {
+      assert(isa<IndirectBrInst>(BB->getTerminator())
+              && "Unexpected terminator");
+      DestBB = cast<BlockAddress>(Val)->getBasicBlock();
+    }
+
+    // If we have exactly one destination, remember it for efficiency below.
+    if (PredToDestList.empty())
+      OnlyDest = DestBB;
+    else if (OnlyDest != DestBB)
+      OnlyDest = MultipleDestSentinel;
+
+    PredToDestList.push_back(std::make_pair(Pred, DestBB));
+  }
+
+  // If all edges were unthreadable, we fail.
+  if (PredToDestList.empty())
+    return false;
+
+  // Determine which is the most common successor.  If we have many inputs and
+  // this block is a switch, we want to start by threading the batch that goes
+  // to the most popular destination first.  If we only know about one
+  // threadable destination (the common case) we can avoid this.
+  BasicBlock *MostPopularDest = OnlyDest;
+
+  if (MostPopularDest == MultipleDestSentinel)
+    MostPopularDest = FindMostPopularDest(BB, PredToDestList);
+
+  // Now that we know what the most popular destination is, factor all
+  // predecessors that will jump to it into a single predecessor.
+  SmallVector<BasicBlock*, 16> PredsToFactor;
+  for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+    if (PredToDestList[i].second == MostPopularDest) {
+      BasicBlock *Pred = PredToDestList[i].first;
+
+      // This predecessor may be a switch or something else that has multiple
+      // edges to the block.  Factor each of these edges by listing them
+      // according to # occurrences in PredsToFactor.
+      TerminatorInst *PredTI = Pred->getTerminator();
+      for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i)
+        if (PredTI->getSuccessor(i) == BB)
+          PredsToFactor.push_back(Pred);
+    }
+
+  // If the threadable edges are branching on an undefined value, we get to pick
+  // the destination that these predecessors should get to.
+  if (MostPopularDest == 0)
+    MostPopularDest = BB->getTerminator()->
+                            getSuccessor(GetBestDestForJumpOnUndef(BB));
+
+  // Ok, try to thread it!
+  return ThreadEdge(BB, PredsToFactor, MostPopularDest);
+}
+
+/// ProcessBranchOnPHI - We have an otherwise unthreadable conditional branch on
+/// a PHI node in the current block.  See if there are any simplifications we
+/// can do based on inputs to the phi node.
+///
+bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
+  BasicBlock *BB = PN->getParent();
+
+  // TODO: We could make use of this to do it once for blocks with common PHI
+  // values.
+  SmallVector<BasicBlock*, 1> PredBBs;
+  PredBBs.resize(1);
+
+  // If any of the predecessor blocks end in an unconditional branch, we can
+  // *duplicate* the conditional branch into that block in order to further
+  // encourage jump threading and to eliminate cases where we have branch on a
+  // phi of an icmp (branch on icmp is much better).
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    BasicBlock *PredBB = PN->getIncomingBlock(i);
+    if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
+      if (PredBr->isUnconditional()) {
+        PredBBs[0] = PredBB;
+        // Try to duplicate BB into PredBB.
+        if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs))
+          return true;
+      }
+  }
+
+  return false;
+}
+
+/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
+/// a xor instruction in the current block.  See if there are any
+/// simplifications we can do based on inputs to the xor.
+///
+bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
+  BasicBlock *BB = BO->getParent();
+
+  // If either the LHS or RHS of the xor is a constant, don't do this
+  // optimization.
+  if (isa<ConstantInt>(BO->getOperand(0)) ||
+      isa<ConstantInt>(BO->getOperand(1)))
+    return false;
+
+  // If the first instruction in BB isn't a phi, we won't be able to infer
+  // anything special about any particular predecessor.
+  if (!isa<PHINode>(BB->front()))
+    return false;
+
+  // If we have a xor as the branch input to this block, and we know that the
+  // LHS or RHS of the xor in any predecessor is true/false, then we can clone
+  // the condition into the predecessor and fix that value to true, saving some
+  // logical ops on that path and encouraging other paths to simplify.
+  //
+  // This copies something like this:
+  //
+  //  BB:
+  //    %X = phi i1 [1],  [%X']
+  //    %Y = icmp eq i32 %A, %B
+  //    %Z = xor i1 %X, %Y
+  //    br i1 %Z, ...
+  //
+  // Into:
+  //  BB':
+  //    %Y = icmp ne i32 %A, %B
+  //    br i1 %Z, ...
+
+  PredValueInfoTy XorOpValues;
+  bool isLHS = true;
+  if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues,
+                                       WantInteger)) {
+    assert(XorOpValues.empty());
+    if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues,
+                                         WantInteger))
+      return false;
+    isLHS = false;
+  }
+
+  assert(!XorOpValues.empty() &&
+         "ComputeValueKnownInPredecessors returned true with no values");
+
+  // Scan the information to see which is most popular: true or false.  The
+  // predecessors can be of the set true, false, or undef.
+  unsigned NumTrue = 0, NumFalse = 0;
+  for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
+    if (isa<UndefValue>(XorOpValues[i].first))
+      // Ignore undefs for the count.
+      continue;
+    if (cast<ConstantInt>(XorOpValues[i].first)->isZero())
+      ++NumFalse;
+    else
+      ++NumTrue;
+  }
+
+  // Determine which value to split on, true, false, or undef if neither.
+  ConstantInt *SplitVal = 0;
+  if (NumTrue > NumFalse)
+    SplitVal = ConstantInt::getTrue(BB->getContext());
+  else if (NumTrue != 0 || NumFalse != 0)
+    SplitVal = ConstantInt::getFalse(BB->getContext());
+
+  // Collect all of the blocks that this can be folded into so that we can
+  // factor this once and clone it once.
+  SmallVector<BasicBlock*, 8> BlocksToFoldInto;
+  for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
+    if (XorOpValues[i].first != SplitVal &&
+        !isa<UndefValue>(XorOpValues[i].first))
+      continue;
+
+    BlocksToFoldInto.push_back(XorOpValues[i].second);
+  }
+
+  // If we inferred a value for all of the predecessors, then duplication won't
+  // help us.  However, we can just replace the LHS or RHS with the constant.
+  if (BlocksToFoldInto.size() ==
+      cast<PHINode>(BB->front()).getNumIncomingValues()) {
+    if (SplitVal == 0) {
+      // If all preds provide undef, just nuke the xor, because it is undef too.
+      BO->replaceAllUsesWith(UndefValue::get(BO->getType()));
+      BO->eraseFromParent();
+    } else if (SplitVal->isZero()) {
+      // If all preds provide 0, replace the xor with the other input.
+      BO->replaceAllUsesWith(BO->getOperand(isLHS));
+      BO->eraseFromParent();
+    } else {
+      // If all preds provide 1, set the computed value to 1.
+      BO->setOperand(!isLHS, SplitVal);
+    }
+
+    return true;
+  }
+
+  // Try to duplicate BB into PredBB.
+  return DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto);
+}
+
+
+/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
+/// predecessor to the PHIBB block.  If it has PHI nodes, add entries for
+/// NewPred using the entries from OldPred (suitably mapped).
+static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
+                                            BasicBlock *OldPred,
+                                            BasicBlock *NewPred,
+                                     DenseMap<Instruction*, Value*> &ValueMap) {
+  for (BasicBlock::iterator PNI = PHIBB->begin();
+       PHINode *PN = dyn_cast<PHINode>(PNI); ++PNI) {
+    // Ok, we have a PHI node.  Figure out what the incoming value was for the
+    // DestBlock.
+    Value *IV = PN->getIncomingValueForBlock(OldPred);
+
+    // Remap the value if necessary.
+    if (Instruction *Inst = dyn_cast<Instruction>(IV)) {
+      DenseMap<Instruction*, Value*>::iterator I = ValueMap.find(Inst);
+      if (I != ValueMap.end())
+        IV = I->second;
+    }
+
+    PN->addIncoming(IV, NewPred);
+  }
+}
+
+/// ThreadEdge - We have decided that it is safe and profitable to factor the
+/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
+/// across BB.  Transform the IR to reflect this change.
+bool JumpThreading::ThreadEdge(BasicBlock *BB,
+                               const SmallVectorImpl<BasicBlock*> &PredBBs,
+                               BasicBlock *SuccBB) {
+  // If threading to the same block as we come from, we would infinite loop.
+  if (SuccBB == BB) {
+    DEBUG(dbgs() << "  Not threading across BB '" << BB->getName()
+          << "' - would thread to self!\n");
+    return false;
+  }
+
+  // If threading this would thread across a loop header, don't thread the edge.
+  // See the comments above FindLoopHeaders for justifications and caveats.
+  if (LoopHeaders.count(BB)) {
+    DEBUG(dbgs() << "  Not threading across loop header BB '" << BB->getName()
+          << "' to dest BB '" << SuccBB->getName()
+          << "' - it might create an irreducible loop!\n");
+    return false;
+  }
+
+  unsigned JumpThreadCost = getJumpThreadDuplicationCost(BB);
+  if (JumpThreadCost > Threshold) {
+    DEBUG(dbgs() << "  Not threading BB '" << BB->getName()
+          << "' - Cost is too high: " << JumpThreadCost << "\n");
+    return false;
+  }
+
+  // And finally, do it!  Start by factoring the predecessors is needed.
+  BasicBlock *PredBB;
+  if (PredBBs.size() == 1)
+    PredBB = PredBBs[0];
+  else {
+    DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
+          << " common predecessors.\n");
+    PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
+                                    ".thr_comm", this);
+  }
+
+  // And finally, do it!
+  DEBUG(dbgs() << "  Threading edge from '" << PredBB->getName() << "' to '"
+        << SuccBB->getName() << "' with cost: " << JumpThreadCost
+        << ", across block:\n    "
+        << *BB << "\n");
+
+  LVI->threadEdge(PredBB, BB, SuccBB);
+
+  // We are going to have to map operands from the original BB block to the new
+  // copy of the block 'NewBB'.  If there are PHI nodes in BB, evaluate them to
+  // account for entry from PredBB.
+  DenseMap<Instruction*, Value*> ValueMapping;
+
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(),
+                                         BB->getName()+".thread",
+                                         BB->getParent(), BB);
+  NewBB->moveAfter(PredBB);
+
+  BasicBlock::iterator BI = BB->begin();
+  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+    ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+
+  // Clone the non-phi instructions of BB into NewBB, keeping track of the
+  // mapping and using it to remap operands in the cloned instructions.
+  for (; !isa<TerminatorInst>(BI); ++BI) {
+    Instruction *New = BI->clone();
+    New->setName(BI->getName());
+    NewBB->getInstList().push_back(New);
+    ValueMapping[BI] = New;
+
+    // Remap operands to patch up intra-block references.
+    for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+      if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+        DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
+        if (I != ValueMapping.end())
+          New->setOperand(i, I->second);
+      }
+  }
+
+  // We didn't copy the terminator from BB over to NewBB, because there is now
+  // an unconditional jump to SuccBB.  Insert the unconditional jump.
+  BranchInst::Create(SuccBB, NewBB);
+
+  // Check to see if SuccBB has PHI nodes. If so, we need to add entries to the
+  // PHI nodes for NewBB now.
+  AddPHINodeEntriesForMappedBlock(SuccBB, BB, NewBB, ValueMapping);
+
+  // If there were values defined in BB that are used outside the block, then we
+  // now have to update all uses of the value to use either the original value,
+  // the cloned value, or some PHI derived value.  This can require arbitrary
+  // PHI insertion, of which we are prepared to do, clean these up now.
+  SSAUpdater SSAUpdate;
+  SmallVector<Use*, 16> UsesToRename;
+  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+    // Scan all uses of this instruction to see if it is used outside of its
+    // block, and if so, record them in UsesToRename.
+    for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+         ++UI) {
+      Instruction *User = cast<Instruction>(*UI);
+      if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+        if (UserPN->getIncomingBlock(UI) == BB)
+          continue;
+      } else if (User->getParent() == BB)
+        continue;
+
+      UsesToRename.push_back(&UI.getUse());
+    }
+
+    // If there are no uses outside the block, we're done with this instruction.
+    if (UsesToRename.empty())
+      continue;
+
+    DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+
+    // We found a use of I outside of BB.  Rename all uses of I that are outside
+    // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
+    // with the two values we know.
+    SSAUpdate.Initialize(I->getType(), I->getName());
+    SSAUpdate.AddAvailableValue(BB, I);
+    SSAUpdate.AddAvailableValue(NewBB, ValueMapping[I]);
+
+    while (!UsesToRename.empty())
+      SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+    DEBUG(dbgs() << "\n");
+  }
+
+
+  // Ok, NewBB is good to go.  Update the terminator of PredBB to jump to
+  // NewBB instead of BB.  This eliminates predecessors from BB, which requires
+  // us to simplify any PHI nodes in BB.
+  TerminatorInst *PredTerm = PredBB->getTerminator();
+  for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
+    if (PredTerm->getSuccessor(i) == BB) {
+      BB->removePredecessor(PredBB, true);
+      PredTerm->setSuccessor(i, NewBB);
+    }
+
+  // At this point, the IR is fully up to date and consistent.  Do a quick scan
+  // over the new instructions and zap any that are constants or dead.  This
+  // frequently happens because of phi translation.
+  SimplifyInstructionsInBlock(NewBB, TD);
+
+  // Threaded an edge!
+  ++NumThreads;
+  return true;
+}
+
+/// DuplicateCondBranchOnPHIIntoPred - PredBB contains an unconditional branch
+/// to BB which contains an i1 PHI node and a conditional branch on that PHI.
+/// If we can duplicate the contents of BB up into PredBB do so now, this
+/// improves the odds that the branch will be on an analyzable instruction like
+/// a compare.
+bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
+                                 const SmallVectorImpl<BasicBlock *> &PredBBs) {
+  assert(!PredBBs.empty() && "Can't handle an empty set");
+
+  // If BB is a loop header, then duplicating this block outside the loop would
+  // cause us to transform this into an irreducible loop, don't do this.
+  // See the comments above FindLoopHeaders for justifications and caveats.
+  if (LoopHeaders.count(BB)) {
+    DEBUG(dbgs() << "  Not duplicating loop header '" << BB->getName()
+          << "' into predecessor block '" << PredBBs[0]->getName()
+          << "' - it might create an irreducible loop!\n");
+    return false;
+  }
+
+  unsigned DuplicationCost = getJumpThreadDuplicationCost(BB);
+  if (DuplicationCost > Threshold) {
+    DEBUG(dbgs() << "  Not duplicating BB '" << BB->getName()
+          << "' - Cost is too high: " << DuplicationCost << "\n");
+    return false;
+  }
+
+  // And finally, do it!  Start by factoring the predecessors is needed.
+  BasicBlock *PredBB;
+  if (PredBBs.size() == 1)
+    PredBB = PredBBs[0];
+  else {
+    DEBUG(dbgs() << "  Factoring out " << PredBBs.size()
+          << " common predecessors.\n");
+    PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
+                                    ".thr_comm", this);
+  }
+
+  // Okay, we decided to do this!  Clone all the instructions in BB onto the end
+  // of PredBB.
+  DEBUG(dbgs() << "  Duplicating block '" << BB->getName() << "' into end of '"
+        << PredBB->getName() << "' to eliminate branch on phi.  Cost: "
+        << DuplicationCost << " block is:" << *BB << "\n");
+
+  // Unless PredBB ends with an unconditional branch, split the edge so that we
+  // can just clone the bits from BB into the end of the new PredBB.
+  BranchInst *OldPredBranch = dyn_cast<BranchInst>(PredBB->getTerminator());
+
+  if (OldPredBranch == 0 || !OldPredBranch->isUnconditional()) {
+    PredBB = SplitEdge(PredBB, BB, this);
+    OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
+  }
+
+  // We are going to have to map operands from the original BB block into the
+  // PredBB block.  Evaluate PHI nodes in BB.
+  DenseMap<Instruction*, Value*> ValueMapping;
+
+  BasicBlock::iterator BI = BB->begin();
+  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+    ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
+
+  // Clone the non-phi instructions of BB into PredBB, keeping track of the
+  // mapping and using it to remap operands in the cloned instructions.
+  for (; BI != BB->end(); ++BI) {
+    Instruction *New = BI->clone();
+
+    // Remap operands to patch up intra-block references.
+    for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
+      if (Instruction *Inst = dyn_cast<Instruction>(New->getOperand(i))) {
+        DenseMap<Instruction*, Value*>::iterator I = ValueMapping.find(Inst);
+        if (I != ValueMapping.end())
+          New->setOperand(i, I->second);
+      }
+
+    // If this instruction can be simplified after the operands are updated,
+    // just use the simplified value instead.  This frequently happens due to
+    // phi translation.
+    if (Value *IV = SimplifyInstruction(New, TD)) {
+      delete New;
+      ValueMapping[BI] = IV;
+    } else {
+      // Otherwise, insert the new instruction into the block.
+      New->setName(BI->getName());
+      PredBB->getInstList().insert(OldPredBranch, New);
+      ValueMapping[BI] = New;
+    }
+  }
+
+  // Check to see if the targets of the branch had PHI nodes. If so, we need to
+  // add entries to the PHI nodes for branch from PredBB now.
+  BranchInst *BBBranch = cast<BranchInst>(BB->getTerminator());
+  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(0), BB, PredBB,
+                                  ValueMapping);
+  AddPHINodeEntriesForMappedBlock(BBBranch->getSuccessor(1), BB, PredBB,
+                                  ValueMapping);
+
+  // If there were values defined in BB that are used outside the block, then we
+  // now have to update all uses of the value to use either the original value,
+  // the cloned value, or some PHI derived value.  This can require arbitrary
+  // PHI insertion, of which we are prepared to do, clean these up now.
+  SSAUpdater SSAUpdate;
+  SmallVector<Use*, 16> UsesToRename;
+  for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+    // Scan all uses of this instruction to see if it is used outside of its
+    // block, and if so, record them in UsesToRename.
+    for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
+         ++UI) {
+      Instruction *User = cast<Instruction>(*UI);
+      if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+        if (UserPN->getIncomingBlock(UI) == BB)
+          continue;
+      } else if (User->getParent() == BB)
+        continue;
+
+      UsesToRename.push_back(&UI.getUse());
+    }
+
+    // If there are no uses outside the block, we're done with this instruction.
+    if (UsesToRename.empty())
+      continue;
+
+    DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+
+    // We found a use of I outside of BB.  Rename all uses of I that are outside
+    // its block to be uses of the appropriate PHI node etc.  See ValuesInBlocks
+    // with the two values we know.
+    SSAUpdate.Initialize(I->getType(), I->getName());
+    SSAUpdate.AddAvailableValue(BB, I);
+    SSAUpdate.AddAvailableValue(PredBB, ValueMapping[I]);
+
+    while (!UsesToRename.empty())
+      SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+    DEBUG(dbgs() << "\n");
+  }
+
+  // PredBB no longer jumps to BB, remove entries in the PHI node for the edge
+  // that we nuked.
+  BB->removePredecessor(PredBB, true);
+
+  // Remove the unconditional branch at the end of the PredBB block.
+  OldPredBranch->eraseFromParent();
+
+  ++NumDupes;
+  return true;
+}
+
+
diff --git a/final/lib/Transforms/Scalar/LICM.cpp b/final/lib/Transforms/Scalar/LICM.cpp
new file mode 100644
index 00000000000..07867933d08
--- /dev/null
+++ b/final/lib/Transforms/Scalar/LICM.cpp
@@ -0,0 +1,789 @@
+//===-- LICM.cpp - Loop Invariant Code Motion Pass ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs loop invariant code motion, attempting to remove as much
+// code from the body of a loop as possible.  It does this by either hoisting
+// code into the preheader block, or by sinking code to the exit blocks if it is
+// safe.  This pass also promotes must-aliased memory locations in the loop to
+// live in registers, thus hoisting and sinking "invariant" loads and stores.
+//
+// This pass uses alias analysis for two purposes:
+//
+//  1. Moving loop invariant loads and calls out of loops.  If we can determine
+//     that a load or call inside of a loop never aliases anything stored to,
+//     we can hoist it or sink it like any other instruction.
+//  2. Scalar Promotion of Memory - If there is a store instruction inside of
+//     the loop, we try to move the store to happen AFTER the loop instead of
+//     inside of the loop.  This can only happen if a few conditions are true:
+//       A. The pointer stored through is loop invariant
+//       B. There are no stores or loads in the loop which _may_ alias the
+//          pointer.  There are no calls in the loop which mod/ref the pointer.
+//     If these conditions are true, we can promote the loads and stores in the
+//     loop of the pointer to use a temporary alloca'd variable.  We then use
+//     the SSAUpdater to construct the appropriate SSA form for the value.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "licm"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumSunk      , "Number of instructions sunk out of loop");
+STATISTIC(NumHoisted   , "Number of instructions hoisted out of loop");
+STATISTIC(NumMovedLoads, "Number of load insts hoisted or sunk");
+STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
+STATISTIC(NumPromoted  , "Number of memory locations promoted to registers");
+
+static cl::opt<bool>
+DisablePromotion("disable-licm-promotion", cl::Hidden,
+                 cl::desc("Disable memory promotion in LICM pass"));
+
+namespace {
+  struct LICM : public LoopPass {
+    static char ID; // Pass identification, replacement for typeid
+    LICM() : LoopPass(ID) {
+      initializeLICMPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    /// This transformation requires natural loop information & requires that
+    /// loop preheaders be inserted into the CFG...
+    ///
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addRequired<AliasAnalysis>();
+      AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved("scalar-evolution");
+      AU.addPreservedID(LoopSimplifyID);
+    }
+
+    bool doFinalization() {
+      assert(LoopToAliasSetMap.empty() && "Didn't free loop alias sets");
+      return false;
+    }
+
+  private:
+    AliasAnalysis *AA;       // Current AliasAnalysis information
+    LoopInfo      *LI;       // Current LoopInfo
+    DominatorTree *DT;       // Dominator Tree for the current Loop.
+
+    // State that is updated as we process loops.
+    bool Changed;            // Set to true when we change anything.
+    BasicBlock *Preheader;   // The preheader block of the current loop...
+    Loop *CurLoop;           // The current loop we are working on...
+    AliasSetTracker *CurAST; // AliasSet information for the current loop...
+    DenseMap<Loop*, AliasSetTracker*> LoopToAliasSetMap;
+
+    /// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+    void cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L);
+
+    /// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
+    /// set.
+    void deleteAnalysisValue(Value *V, Loop *L);
+
+    /// SinkRegion - Walk the specified region of the CFG (defined by all blocks
+    /// dominated by the specified block, and that are in the current loop) in
+    /// reverse depth first order w.r.t the DominatorTree.  This allows us to
+    /// visit uses before definitions, allowing us to sink a loop body in one
+    /// pass without iteration.
+    ///
+    void SinkRegion(DomTreeNode *N);
+
+    /// HoistRegion - Walk the specified region of the CFG (defined by all
+    /// blocks dominated by the specified block, and that are in the current
+    /// loop) in depth first order w.r.t the DominatorTree.  This allows us to
+    /// visit definitions before uses, allowing us to hoist a loop body in one
+    /// pass without iteration.
+    ///
+    void HoistRegion(DomTreeNode *N);
+
+    /// inSubLoop - Little predicate that returns true if the specified basic
+    /// block is in a subloop of the current one, not the current one itself.
+    ///
+    bool inSubLoop(BasicBlock *BB) {
+      assert(CurLoop->contains(BB) && "Only valid if BB is IN the loop");
+      return LI->getLoopFor(BB) != CurLoop;
+    }
+
+    /// sink - When an instruction is found to only be used outside of the loop,
+    /// this function moves it to the exit blocks and patches up SSA form as
+    /// needed.
+    ///
+    void sink(Instruction &I);
+
+    /// hoist - When an instruction is found to only use loop invariant operands
+    /// that is safe to hoist, this instruction is called to do the dirty work.
+    ///
+    void hoist(Instruction &I);
+
+    /// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it
+    /// is not a trapping instruction or if it is a trapping instruction and is
+    /// guaranteed to execute.
+    ///
+    bool isSafeToExecuteUnconditionally(Instruction &I);
+
+    /// pointerInvalidatedByLoop - Return true if the body of this loop may
+    /// store into the memory location pointed to by V.
+    ///
+    bool pointerInvalidatedByLoop(Value *V, uint64_t Size,
+                                  const MDNode *TBAAInfo) {
+      // Check to see if any of the basic blocks in CurLoop invalidate *V.
+      return CurAST->getAliasSetForPointer(V, Size, TBAAInfo).isMod();
+    }
+
+    bool canSinkOrHoistInst(Instruction &I);
+    bool isNotUsedInLoop(Instruction &I);
+
+    void PromoteAliasSet(AliasSet &AS);
+  };
+}
+
+char LICM::ID = 0;
+INITIALIZE_PASS_BEGIN(LICM, "licm", "Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
+
+Pass *llvm::createLICMPass() { return new LICM(); }
+
+/// Hoist expressions out of the specified loop. Note, alias info for inner
+/// loop is not preserved so it is not a good idea to run LICM multiple 
+/// times on one loop.
+///
+bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
+  Changed = false;
+
+  // Get our Loop and Alias Analysis information...
+  LI = &getAnalysis<LoopInfo>();
+  AA = &getAnalysis<AliasAnalysis>();
+  DT = &getAnalysis<DominatorTree>();
+
+  CurAST = new AliasSetTracker(*AA);
+  // Collect Alias info from subloops.
+  for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
+       LoopItr != LoopItrE; ++LoopItr) {
+    Loop *InnerL = *LoopItr;
+    AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
+    assert(InnerAST && "Where is my AST?");
+
+    // What if InnerLoop was modified by other passes ?
+    CurAST->add(*InnerAST);
+    
+    // Once we've incorporated the inner loop's AST into ours, we don't need the
+    // subloop's anymore.
+    delete InnerAST;
+    LoopToAliasSetMap.erase(InnerL);
+  }
+  
+  CurLoop = L;
+
+  // Get the preheader block to move instructions into...
+  Preheader = L->getLoopPreheader();
+
+  // Loop over the body of this loop, looking for calls, invokes, and stores.
+  // Because subloops have already been incorporated into AST, we skip blocks in
+  // subloops.
+  //
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+       I != E; ++I) {
+    BasicBlock *BB = *I;
+    if (LI->getLoopFor(BB) == L)        // Ignore blocks in subloops.
+      CurAST->add(*BB);                 // Incorporate the specified basic block
+  }
+
+  // We want to visit all of the instructions in this loop... that are not parts
+  // of our subloops (they have already had their invariants hoisted out of
+  // their loop, into this loop, so there is no need to process the BODIES of
+  // the subloops).
+  //
+  // Traverse the body of the loop in depth first order on the dominator tree so
+  // that we are guaranteed to see definitions before we see uses.  This allows
+  // us to sink instructions in one pass, without iteration.  After sinking
+  // instructions, we perform another pass to hoist them out of the loop.
+  //
+  if (L->hasDedicatedExits())
+    SinkRegion(DT->getNode(L->getHeader()));
+  if (Preheader)
+    HoistRegion(DT->getNode(L->getHeader()));
+
+  // Now that all loop invariants have been removed from the loop, promote any
+  // memory references to scalars that we can.
+  if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+    // Loop over all of the alias sets in the tracker object.
+    for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+         I != E; ++I)
+      PromoteAliasSet(*I);
+  }
+  
+  // Clear out loops state information for the next iteration
+  CurLoop = 0;
+  Preheader = 0;
+
+  // If this loop is nested inside of another one, save the alias information
+  // for when we process the outer loop.
+  if (L->getParentLoop())
+    LoopToAliasSetMap[L] = CurAST;
+  else
+    delete CurAST;
+  return Changed;
+}
+
+/// SinkRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in
+/// reverse depth first order w.r.t the DominatorTree.  This allows us to visit
+/// uses before definitions, allowing us to sink a loop body in one pass without
+/// iteration.
+///
+void LICM::SinkRegion(DomTreeNode *N) {
+  assert(N != 0 && "Null dominator tree node?");
+  BasicBlock *BB = N->getBlock();
+
+  // If this subregion is not in the top level loop at all, exit.
+  if (!CurLoop->contains(BB)) return;
+
+  // We are processing blocks in reverse dfo, so process children first.
+  const std::vector<DomTreeNode*> &Children = N->getChildren();
+  for (unsigned i = 0, e = Children.size(); i != e; ++i)
+    SinkRegion(Children[i]);
+
+  // Only need to process the contents of this block if it is not part of a
+  // subloop (which would already have been processed).
+  if (inSubLoop(BB)) return;
+
+  for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
+    Instruction &I = *--II;
+    
+    // If the instruction is dead, we would try to sink it because it isn't used
+    // in the loop, instead, just delete it.
+    if (isInstructionTriviallyDead(&I)) {
+      DEBUG(dbgs() << "LICM deleting dead inst: " << I << '\n');
+      ++II;
+      CurAST->deleteValue(&I);
+      I.eraseFromParent();
+      Changed = true;
+      continue;
+    }
+
+    // Check to see if we can sink this instruction to the exit blocks
+    // of the loop.  We can do this if the all users of the instruction are
+    // outside of the loop.  In this case, it doesn't even matter if the
+    // operands of the instruction are loop invariant.
+    //
+    if (isNotUsedInLoop(I) && canSinkOrHoistInst(I)) {
+      ++II;
+      sink(I);
+    }
+  }
+}
+
+/// HoistRegion - Walk the specified region of the CFG (defined by all blocks
+/// dominated by the specified block, and that are in the current loop) in depth
+/// first order w.r.t the DominatorTree.  This allows us to visit definitions
+/// before uses, allowing us to hoist a loop body in one pass without iteration.
+///
+void LICM::HoistRegion(DomTreeNode *N) {
+  assert(N != 0 && "Null dominator tree node?");
+  BasicBlock *BB = N->getBlock();
+
+  // If this subregion is not in the top level loop at all, exit.
+  if (!CurLoop->contains(BB)) return;
+
+  // Only need to process the contents of this block if it is not part of a
+  // subloop (which would already have been processed).
+  if (!inSubLoop(BB))
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ) {
+      Instruction &I = *II++;
+
+      // Try constant folding this instruction.  If all the operands are
+      // constants, it is technically hoistable, but it would be better to just
+      // fold it.
+      if (Constant *C = ConstantFoldInstruction(&I)) {
+        DEBUG(dbgs() << "LICM folding inst: " << I << "  --> " << *C << '\n');
+        CurAST->copyValue(&I, C);
+        CurAST->deleteValue(&I);
+        I.replaceAllUsesWith(C);
+        I.eraseFromParent();
+        continue;
+      }
+      
+      // Try hoisting the instruction out to the preheader.  We can only do this
+      // if all of the operands of the instruction are loop invariant and if it
+      // is safe to hoist the instruction.
+      //
+      if (CurLoop->hasLoopInvariantOperands(&I) && canSinkOrHoistInst(I) &&
+          isSafeToExecuteUnconditionally(I))
+        hoist(I);
+    }
+
+  const std::vector<DomTreeNode*> &Children = N->getChildren();
+  for (unsigned i = 0, e = Children.size(); i != e; ++i)
+    HoistRegion(Children[i]);
+}
+
+/// canSinkOrHoistInst - Return true if the hoister and sinker can handle this
+/// instruction.
+///
+bool LICM::canSinkOrHoistInst(Instruction &I) {
+  // Loads have extra constraints we have to verify before we can hoist them.
+  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+    if (LI->isVolatile())
+      return false;        // Don't hoist volatile loads!
+
+    // Loads from constant memory are always safe to move, even if they end up
+    // in the same alias set as something that ends up being modified.
+    if (AA->pointsToConstantMemory(LI->getOperand(0)))
+      return true;
+    
+    // Don't hoist loads which have may-aliased stores in loop.
+    uint64_t Size = 0;
+    if (LI->getType()->isSized())
+      Size = AA->getTypeStoreSize(LI->getType());
+    return !pointerInvalidatedByLoop(LI->getOperand(0), Size,
+                                     LI->getMetadata(LLVMContext::MD_tbaa));
+  } else if (CallInst *CI = dyn_cast<CallInst>(&I)) {
+    // Handle obvious cases efficiently.
+    AliasAnalysis::ModRefBehavior Behavior = AA->getModRefBehavior(CI);
+    if (Behavior == AliasAnalysis::DoesNotAccessMemory)
+      return true;
+    if (AliasAnalysis::onlyReadsMemory(Behavior)) {
+      // If this call only reads from memory and there are no writes to memory
+      // in the loop, we can hoist or sink the call as appropriate.
+      bool FoundMod = false;
+      for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
+           I != E; ++I) {
+        AliasSet &AS = *I;
+        if (!AS.isForwardingAliasSet() && AS.isMod()) {
+          FoundMod = true;
+          break;
+        }
+      }
+      if (!FoundMod) return true;
+    }
+
+    // FIXME: This should use mod/ref information to see if we can hoist or sink
+    // the call.
+
+    return false;
+  }
+
+  // Otherwise these instructions are hoistable/sinkable
+  return isa<BinaryOperator>(I) || isa<CastInst>(I) ||
+         isa<SelectInst>(I) || isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
+         isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
+         isa<ShuffleVectorInst>(I);
+}
+
+/// isNotUsedInLoop - Return true if the only users of this instruction are
+/// outside of the loop.  If this is true, we can sink the instruction to the
+/// exit blocks of the loop.
+///
+bool LICM::isNotUsedInLoop(Instruction &I) {
+  for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+    if (PHINode *PN = dyn_cast<PHINode>(User)) {
+      // PHI node uses occur in predecessor blocks!
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (PN->getIncomingValue(i) == &I)
+          if (CurLoop->contains(PN->getIncomingBlock(i)))
+            return false;
+    } else if (CurLoop->contains(User)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+
+/// sink - When an instruction is found to only be used outside of the loop,
+/// this function moves it to the exit blocks and patches up SSA form as needed.
+/// This method is guaranteed to remove the original instruction from its
+/// position, and may either delete it or move it to outside of the loop.
+///
+void LICM::sink(Instruction &I) {
+  DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
+
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+  if (isa<LoadInst>(I)) ++NumMovedLoads;
+  else if (isa<CallInst>(I)) ++NumMovedCalls;
+  ++NumSunk;
+  Changed = true;
+
+  // The case where there is only a single exit node of this loop is common
+  // enough that we handle it as a special (more efficient) case.  It is more
+  // efficient to handle because there are no PHI nodes that need to be placed.
+  if (ExitBlocks.size() == 1) {
+    if (!DT->dominates(I.getParent(), ExitBlocks[0])) {
+      // Instruction is not used, just delete it.
+      CurAST->deleteValue(&I);
+      // If I has users in unreachable blocks, eliminate.
+      // If I is not void type then replaceAllUsesWith undef.
+      // This allows ValueHandlers and custom metadata to adjust itself.
+      if (!I.use_empty())
+        I.replaceAllUsesWith(UndefValue::get(I.getType()));
+      I.eraseFromParent();
+    } else {
+      // Move the instruction to the start of the exit block, after any PHI
+      // nodes in it.
+      I.moveBefore(ExitBlocks[0]->getFirstNonPHI());
+
+      // This instruction is no longer in the AST for the current loop, because
+      // we just sunk it out of the loop.  If we just sunk it into an outer
+      // loop, we will rediscover the operation when we process it.
+      CurAST->deleteValue(&I);
+    }
+    return;
+  }
+  
+  if (ExitBlocks.empty()) {
+    // The instruction is actually dead if there ARE NO exit blocks.
+    CurAST->deleteValue(&I);
+    // If I has users in unreachable blocks, eliminate.
+    // If I is not void type then replaceAllUsesWith undef.
+    // This allows ValueHandlers and custom metadata to adjust itself.
+    if (!I.use_empty())
+      I.replaceAllUsesWith(UndefValue::get(I.getType()));
+    I.eraseFromParent();
+    return;
+  }
+  
+  // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
+  // hard work of inserting PHI nodes as necessary.
+  SmallVector<PHINode*, 8> NewPHIs;
+  SSAUpdater SSA(&NewPHIs);
+  
+  if (!I.use_empty())
+    SSA.Initialize(I.getType(), I.getName());
+  
+  // Insert a copy of the instruction in each exit block of the loop that is
+  // dominated by the instruction.  Each exit block is known to only be in the
+  // ExitBlocks list once.
+  BasicBlock *InstOrigBB = I.getParent();
+  unsigned NumInserted = 0;
+  
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitBlock = ExitBlocks[i];
+    
+    if (!DT->dominates(InstOrigBB, ExitBlock))
+      continue;
+    
+    // Insert the code after the last PHI node.
+    BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
+    
+    // If this is the first exit block processed, just move the original
+    // instruction, otherwise clone the original instruction and insert
+    // the copy.
+    Instruction *New;
+    if (NumInserted++ == 0) {
+      I.moveBefore(InsertPt);
+      New = &I;
+    } else {
+      New = I.clone();
+      if (!I.getName().empty())
+        New->setName(I.getName()+".le");
+      ExitBlock->getInstList().insert(InsertPt, New);
+    }
+    
+    // Now that we have inserted the instruction, inform SSAUpdater.
+    if (!I.use_empty())
+      SSA.AddAvailableValue(ExitBlock, New);
+  }
+  
+  // If the instruction doesn't dominate any exit blocks, it must be dead.
+  if (NumInserted == 0) {
+    CurAST->deleteValue(&I);
+    if (!I.use_empty())
+      I.replaceAllUsesWith(UndefValue::get(I.getType()));
+    I.eraseFromParent();
+    return;
+  }
+  
+  // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
+  for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
+    // Grab the use before incrementing the iterator.
+    Use &U = UI.getUse();
+    // Increment the iterator before removing the use from the list.
+    ++UI;
+    SSA.RewriteUseAfterInsertions(U);
+  }
+  
+  // Update CurAST for NewPHIs if I had pointer type.
+  if (I.getType()->isPointerTy())
+    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+      CurAST->copyValue(&I, NewPHIs[i]);
+  
+  // Finally, remove the instruction from CurAST.  It is no longer in the loop.
+  CurAST->deleteValue(&I);
+}
+
+/// hoist - When an instruction is found to only use loop invariant operands
+/// that is safe to hoist, this instruction is called to do the dirty work.
+///
+void LICM::hoist(Instruction &I) {
+  DEBUG(dbgs() << "LICM hoisting to " << Preheader->getName() << ": "
+        << I << "\n");
+
+  // Move the new node to the Preheader, before its terminator.
+  I.moveBefore(Preheader->getTerminator());
+
+  if (isa<LoadInst>(I)) ++NumMovedLoads;
+  else if (isa<CallInst>(I)) ++NumMovedCalls;
+  ++NumHoisted;
+  Changed = true;
+}
+
+/// isSafeToExecuteUnconditionally - Only sink or hoist an instruction if it is
+/// not a trapping instruction or if it is a trapping instruction and is
+/// guaranteed to execute.
+///
+bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) {
+  // If it is not a trapping instruction, it is always safe to hoist.
+  if (Inst.isSafeToSpeculativelyExecute())
+    return true;
+
+  // Otherwise we have to check to make sure that the instruction dominates all
+  // of the exit blocks.  If it doesn't, then there is a path out of the loop
+  // which does not execute this instruction, so we can't hoist it.
+
+  // If the instruction is in the header block for the loop (which is very
+  // common), it is always guaranteed to dominate the exit blocks.  Since this
+  // is a common case, and can save some work, check it now.
+  if (Inst.getParent() == CurLoop->getHeader())
+    return true;
+
+  // Get the exit blocks for the current loop.
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  CurLoop->getExitBlocks(ExitBlocks);
+
+  // Verify that the block dominates each of the exit blocks of the loop.
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (!DT->dominates(Inst.getParent(), ExitBlocks[i]))
+      return false;
+
+  return true;
+}
+
+namespace {
+  class LoopPromoter : public LoadAndStorePromoter {
+    Value *SomePtr;  // Designated pointer to store to.
+    SmallPtrSet<Value*, 4> &PointerMustAliases;
+    SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
+    AliasSetTracker &AST;
+  public:
+    LoopPromoter(Value *SP,
+                 const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
+                 SmallPtrSet<Value*, 4> &PMA,
+                 SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast)
+      : LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
+        LoopExitBlocks(LEB), AST(ast) {}
+    
+    virtual bool isInstInList(Instruction *I,
+                              const SmallVectorImpl<Instruction*> &) const {
+      Value *Ptr;
+      if (LoadInst *LI = dyn_cast<LoadInst>(I))
+        Ptr = LI->getOperand(0);
+      else
+        Ptr = cast<StoreInst>(I)->getPointerOperand();
+      return PointerMustAliases.count(Ptr);
+    }
+    
+    virtual void doExtraRewritesBeforeFinalDeletion() const {
+      // Insert stores after in the loop exit blocks.  Each exit block gets a
+      // store of the live-out values that feed them.  Since we've already told
+      // the SSA updater about the defs in the loop and the preheader
+      // definition, it is all set and we can start using it.
+      for (unsigned i = 0, e = LoopExitBlocks.size(); i != e; ++i) {
+        BasicBlock *ExitBlock = LoopExitBlocks[i];
+        Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
+        Instruction *InsertPos = ExitBlock->getFirstNonPHI();
+        new StoreInst(LiveInValue, SomePtr, InsertPos);
+      }
+    }
+
+    virtual void replaceLoadWithValue(LoadInst *LI, Value *V) const {
+      // Update alias analysis.
+      AST.copyValue(LI, V);
+    }
+    virtual void instructionDeleted(Instruction *I) const {
+      AST.deleteValue(I);
+    }
+  };
+} // end anon namespace
+
+/// PromoteAliasSet - Try to promote memory values to scalars by sinking
+/// stores out of the loop and moving loads to before the loop.  We do this by
+/// looping over the stores in the loop, looking for stores to Must pointers
+/// which are loop invariant.
+///
+void LICM::PromoteAliasSet(AliasSet &AS) {
+  // We can promote this alias set if it has a store, if it is a "Must" alias
+  // set, if the pointer is loop invariant, and if we are not eliminating any
+  // volatile loads or stores.
+  if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
+      AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
+    return;
+  
+  assert(!AS.empty() &&
+         "Must alias set should have at least one pointer element in it!");
+  Value *SomePtr = AS.begin()->getValue();
+
+  // It isn't safe to promote a load/store from the loop if the load/store is
+  // conditional.  For example, turning:
+  //
+  //    for () { if (c) *P += 1; }
+  //
+  // into:
+  //
+  //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
+  //
+  // is not safe, because *P may only be valid to access if 'c' is true.
+  // 
+  // It is safe to promote P if all uses are direct load/stores and if at
+  // least one is guaranteed to be executed.
+  bool GuaranteedToExecute = false;
+  
+  SmallVector<Instruction*, 64> LoopUses;
+  SmallPtrSet<Value*, 4> PointerMustAliases;
+
+  // Check that all of the pointers in the alias set have the same type.  We
+  // cannot (yet) promote a memory location that is loaded and stored in
+  // different sizes.
+  for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
+    Value *ASIV = ASI->getValue();
+    PointerMustAliases.insert(ASIV);
+    
+    // Check that all of the pointers in the alias set have the same type.  We
+    // cannot (yet) promote a memory location that is loaded and stored in
+    // different sizes.
+    if (SomePtr->getType() != ASIV->getType())
+      return;
+    
+    for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
+         UI != UE; ++UI) {
+      // Ignore instructions that are outside the loop.
+      Instruction *Use = dyn_cast<Instruction>(*UI);
+      if (!Use || !CurLoop->contains(Use))
+        continue;
+      
+      // If there is an non-load/store instruction in the loop, we can't promote
+      // it.
+      if (isa<LoadInst>(Use))
+        assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
+      else if (isa<StoreInst>(Use)) {
+        // Stores *of* the pointer are not interesting, only stores *to* the
+        // pointer.
+        if (Use->getOperand(1) != ASIV)
+          continue;
+        assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
+      } else
+        return; // Not a load or store.
+      
+      if (!GuaranteedToExecute)
+        GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
+      
+      LoopUses.push_back(Use);
+    }
+  }
+  
+  // If there isn't a guaranteed-to-execute instruction, we can't promote.
+  if (!GuaranteedToExecute)
+    return;
+  
+  // Otherwise, this is safe to promote, lets do it!
+  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');  
+  Changed = true;
+  ++NumPromoted;
+
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  CurLoop->getUniqueExitBlocks(ExitBlocks);
+  
+  // We use the SSAUpdater interface to insert phi nodes as required.
+  SmallVector<PHINode*, 16> NewPHIs;
+  SSAUpdater SSA(&NewPHIs);
+  LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
+                        *CurAST);
+  
+  // Set up the preheader to have a definition of the value.  It is the live-out
+  // value from the preheader that uses in the loop will use.
+  LoadInst *PreheaderLoad =
+    new LoadInst(SomePtr, SomePtr->getName()+".promoted",
+                 Preheader->getTerminator());
+  SSA.AddAvailableValue(Preheader, PreheaderLoad);
+
+  // Copy any value stored to or loaded from a must-alias of the pointer.
+  if (PreheaderLoad->getType()->isPointerTy()) {
+    Value *SomeValue;
+    if (LoadInst *LI = dyn_cast<LoadInst>(LoopUses[0]))
+      SomeValue = LI;
+    else
+      SomeValue = cast<StoreInst>(LoopUses[0])->getValueOperand();
+    
+    CurAST->copyValue(SomeValue, PreheaderLoad);
+  }
+
+  // Rewrite all the loads in the loop and remember all the definitions from
+  // stores in the loop.
+  Promoter.run(LoopUses);
+  
+  // If the preheader load is itself a pointer, we need to tell alias analysis
+  // about the new pointer we created in the preheader block and about any PHI
+  // nodes that just got inserted.
+  if (PreheaderLoad->getType()->isPointerTy()) {
+    for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
+      CurAST->copyValue(PreheaderLoad, NewPHIs[i]);
+  }
+  
+  // fwew, we're done!
+}
+
+
+/// cloneBasicBlockAnalysis - Simple Analysis hook. Clone alias set info.
+void LICM::cloneBasicBlockAnalysis(BasicBlock *From, BasicBlock *To, Loop *L) {
+  AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
+  if (!AST)
+    return;
+
+  AST->copyValue(From, To);
+}
+
+/// deleteAnalysisValue - Simple Analysis hook. Delete value V from alias
+/// set.
+void LICM::deleteAnalysisValue(Value *V, Loop *L) {
+  AliasSetTracker *AST = LoopToAliasSetMap.lookup(L);
+  if (!AST)
+    return;
+
+  AST->deleteValue(V);
+}
diff --git a/final/lib/Transforms/Scalar/LoopDeletion.cpp b/final/lib/Transforms/Scalar/LoopDeletion.cpp
new file mode 100644
index 00000000000..753a558cfe8
--- /dev/null
+++ b/final/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -0,0 +1,246 @@
+//===- LoopDeletion.cpp - Dead Loop Deletion Pass ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Dead Loop Deletion Pass. This pass is responsible
+// for eliminating loops with non-infinite computable trip counts that have no
+// side effects or volatile instructions, and do not contribute to the
+// computation of the function's return value.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-delete"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+STATISTIC(NumDeleted, "Number of loops deleted");
+
+namespace {
+  class LoopDeletion : public LoopPass {
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    LoopDeletion() : LoopPass(ID) {
+      initializeLoopDeletionPass(*PassRegistry::getPassRegistry());
+    }
+    
+    // Possibly eliminate loop L if it is dead.
+    bool runOnLoop(Loop* L, LPPassManager& LPM);
+    
+    bool IsLoopDead(Loop* L, SmallVector<BasicBlock*, 4>& exitingBlocks,
+                    SmallVector<BasicBlock*, 4>& exitBlocks,
+                    bool &Changed, BasicBlock *Preheader);
+
+    virtual void getAnalysisUsage(AnalysisUsage& AU) const {
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addRequired<ScalarEvolution>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addRequiredID(LCSSAID);
+      
+      AU.addPreserved<ScalarEvolution>();
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<LoopInfo>();
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addPreservedID(LCSSAID);
+    }
+  };
+}
+  
+char LoopDeletion::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopDeletion, "loop-deletion",
+                "Delete dead loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopDeletion, "loop-deletion",
+                "Delete dead loops", false, false)
+
+Pass* llvm::createLoopDeletionPass() {
+  return new LoopDeletion();
+}
+
+/// IsLoopDead - Determined if a loop is dead.  This assumes that we've already
+/// checked for unique exit and exiting blocks, and that the code is in LCSSA
+/// form.
+bool LoopDeletion::IsLoopDead(Loop* L,
+                              SmallVector<BasicBlock*, 4>& exitingBlocks,
+                              SmallVector<BasicBlock*, 4>& exitBlocks,
+                              bool &Changed, BasicBlock *Preheader) {
+  BasicBlock* exitBlock = exitBlocks[0];
+  
+  // Make sure that all PHI entries coming from the loop are loop invariant.
+  // Because the code is in LCSSA form, any values used outside of the loop
+  // must pass through a PHI in the exit block, meaning that this check is
+  // sufficient to guarantee that no loop-variant values are used outside
+  // of the loop.
+  BasicBlock::iterator BI = exitBlock->begin();
+  while (PHINode* P = dyn_cast<PHINode>(BI)) {
+    Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]);
+
+    // Make sure all exiting blocks produce the same incoming value for the exit
+    // block.  If there are different incoming values for different exiting
+    // blocks, then it is impossible to statically determine which value should
+    // be used.
+    for (unsigned i = 1; i < exitingBlocks.size(); ++i) {
+      if (incoming != P->getIncomingValueForBlock(exitingBlocks[i]))
+        return false;
+    }
+      
+    if (Instruction* I = dyn_cast<Instruction>(incoming))
+      if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
+        return false;
+
+    ++BI;
+  }
+  
+  // Make sure that no instructions in the block have potential side-effects.
+  // This includes instructions that could write to memory, and loads that are
+  // marked volatile.  This could be made more aggressive by using aliasing
+  // information to identify readonly and readnone calls.
+  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+       LI != LE; ++LI) {
+    for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end();
+         BI != BE; ++BI) {
+      if (BI->mayHaveSideEffects())
+        return false;
+    }
+  }
+  
+  return true;
+}
+
+/// runOnLoop - Remove dead loops, by which we mean loops that do not impact the
+/// observable behavior of the program other than finite running time.  Note 
+/// we do ensure that this never remove a loop that might be infinite, as doing
+/// so could change the halting/non-halting nature of a program.
+/// NOTE: This entire process relies pretty heavily on LoopSimplify and LCSSA
+/// in order to make various safety checks work.
+bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
+  // We can only remove the loop if there is a preheader that we can 
+  // branch from after removing it.
+  BasicBlock* preheader = L->getLoopPreheader();
+  if (!preheader)
+    return false;
+  
+  // If LoopSimplify form is not available, stay out of trouble.
+  if (!L->hasDedicatedExits())
+    return false;
+
+  // We can't remove loops that contain subloops.  If the subloops were dead,
+  // they would already have been removed in earlier executions of this pass.
+  if (L->begin() != L->end())
+    return false;
+  
+  SmallVector<BasicBlock*, 4> exitingBlocks;
+  L->getExitingBlocks(exitingBlocks);
+  
+  SmallVector<BasicBlock*, 4> exitBlocks;
+  L->getUniqueExitBlocks(exitBlocks);
+  
+  // We require that the loop only have a single exit block.  Otherwise, we'd
+  // be in the situation of needing to be able to solve statically which exit
+  // block will be branched to, or trying to preserve the branching logic in
+  // a loop invariant manner.
+  if (exitBlocks.size() != 1)
+    return false;
+  
+  // Finally, we have to check that the loop really is dead.
+  bool Changed = false;
+  if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader))
+    return Changed;
+  
+  // Don't remove loops for which we can't solve the trip count.
+  // They could be infinite, in which case we'd be changing program behavior.
+  ScalarEvolution& SE = getAnalysis<ScalarEvolution>();
+  const SCEV *S = SE.getMaxBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(S))
+    return Changed;
+  
+  // Now that we know the removal is safe, remove the loop by changing the
+  // branch from the preheader to go to the single exit block.  
+  BasicBlock* exitBlock = exitBlocks[0];
+  
+  // Because we're deleting a large chunk of code at once, the sequence in which
+  // we remove things is very important to avoid invalidation issues.  Don't
+  // mess with this unless you have good reason and know what you're doing.
+
+  // Tell ScalarEvolution that the loop is deleted. Do this before
+  // deleting the loop so that ScalarEvolution can look at the loop
+  // to determine what it needs to clean up.
+  SE.forgetLoop(L);
+
+  // Connect the preheader directly to the exit block.
+  TerminatorInst* TI = preheader->getTerminator();
+  TI->replaceUsesOfWith(L->getHeader(), exitBlock);
+
+  // Rewrite phis in the exit block to get their inputs from
+  // the preheader instead of the exiting block.
+  BasicBlock* exitingBlock = exitingBlocks[0];
+  BasicBlock::iterator BI = exitBlock->begin();
+  while (PHINode* P = dyn_cast<PHINode>(BI)) {
+    P->replaceUsesOfWith(exitingBlock, preheader);
+    for (unsigned i = 1; i < exitingBlocks.size(); ++i)
+      P->removeIncomingValue(exitingBlocks[i]);
+    ++BI;
+  }
+  
+  // Update the dominator tree and remove the instructions and blocks that will
+  // be deleted from the reference counting scheme.
+  DominatorTree& DT = getAnalysis<DominatorTree>();
+  SmallVector<DomTreeNode*, 8> ChildNodes;
+  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+       LI != LE; ++LI) {
+    // Move all of the block's children to be children of the preheader, which
+    // allows us to remove the domtree entry for the block.
+    ChildNodes.insert(ChildNodes.begin(), DT[*LI]->begin(), DT[*LI]->end());
+    for (SmallVector<DomTreeNode*, 8>::iterator DI = ChildNodes.begin(),
+         DE = ChildNodes.end(); DI != DE; ++DI) {
+      DT.changeImmediateDominator(*DI, DT[preheader]);
+    }
+    
+    ChildNodes.clear();
+    DT.eraseNode(*LI);
+
+    // Remove the block from the reference counting scheme, so that we can
+    // delete it freely later.
+    (*LI)->dropAllReferences();
+  }
+  
+  // Erase the instructions and the blocks without having to worry
+  // about ordering because we already dropped the references.
+  // NOTE: This iteration is safe because erasing the block does not remove its
+  // entry from the loop's block list.  We do that in the next section.
+  for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end();
+       LI != LE; ++LI)
+    (*LI)->eraseFromParent();
+
+  // Finally, the blocks from loopinfo.  This has to happen late because
+  // otherwise our loop iterators won't work.
+  LoopInfo& loopInfo = getAnalysis<LoopInfo>();
+  SmallPtrSet<BasicBlock*, 8> blocks;
+  blocks.insert(L->block_begin(), L->block_end());
+  for (SmallPtrSet<BasicBlock*,8>::iterator I = blocks.begin(),
+       E = blocks.end(); I != E; ++I)
+    loopInfo.removeBlock(*I);
+  
+  // The last step is to inform the loop pass manager that we've
+  // eliminated this loop.
+  LPM.deleteLoopFromQueue(L);
+  Changed = true;
+  
+  ++NumDeleted;
+  
+  return Changed;
+}
diff --git a/final/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/final/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
new file mode 100644
index 00000000000..e31ccc8df10
--- /dev/null
+++ b/final/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -0,0 +1,606 @@
+//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements an idiom recognizer that transforms simple loops into a
+// non-loop form.  In cases that this kicks in, it can be a significant
+// performance win.
+//
+//===----------------------------------------------------------------------===//
+//
+// TODO List:
+//
+// Future loop memory idioms to recognize:
+//   memcmp, memmove, strlen, etc.
+// Future floating point idioms to recognize in -ffast-math mode:
+//   fpowi
+// Future integer operation idioms to recognize:
+//   ctpop, ctlz, cttz
+//
+// Beware that isel's default lowering for ctpop is highly inefficient for
+// i64 and larger types when i64 is legal and the value has few bits set.  It
+// would be good to enhance isel to emit a loop for ctpop in this case.
+//
+// We should enhance the memset/memcpy recognition to handle multiple stores in
+// the loop.  This would handle things like:
+//   void foo(_Complex float *P)
+//     for (i) { __real__(*P) = 0;  __imag__(*P) = 0; }
+//
+// We should enhance this to handle negative strides through memory.
+// Alternatively (and perhaps better) we could rely on an earlier pass to force
+// forward iteration through memory, which is generally better for cache
+// behavior.  Negative strides *do* happen for memset/memcpy loops.
+//
+// This could recognize common matrix multiplies and dot product idioms and
+// replace them with calls to BLAS (if linked in??).
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-idiom"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
+STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
+
+namespace {
+  class LoopIdiomRecognize : public LoopPass {
+    Loop *CurLoop;
+    const TargetData *TD;
+    DominatorTree *DT;
+    ScalarEvolution *SE;
+    TargetLibraryInfo *TLI;
+  public:
+    static char ID;
+    explicit LoopIdiomRecognize() : LoopPass(ID) {
+      initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnLoop(Loop *L, LPPassManager &LPM);
+    bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+                        SmallVectorImpl<BasicBlock*> &ExitBlocks);
+
+    bool processLoopStore(StoreInst *SI, const SCEV *BECount);
+    bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
+    
+    bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+                                 unsigned StoreAlignment,
+                                 Value *SplatValue, Instruction *TheStore,
+                                 const SCEVAddRecExpr *Ev,
+                                 const SCEV *BECount);
+    bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+                                    const SCEVAddRecExpr *StoreEv,
+                                    const SCEVAddRecExpr *LoadEv,
+                                    const SCEV *BECount);
+      
+    /// This transformation requires natural loop information & requires that
+    /// loop preheaders be inserted into the CFG.
+    ///
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addRequiredID(LCSSAID);
+      AU.addPreservedID(LCSSAID);
+      AU.addRequired<AliasAnalysis>();
+      AU.addPreserved<AliasAnalysis>();
+      AU.addRequired<ScalarEvolution>();
+      AU.addPreserved<ScalarEvolution>();
+      AU.addPreserved<DominatorTree>();
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<TargetLibraryInfo>();
+    }
+  };
+}
+
+char LoopIdiomRecognize::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms",
+                    false, false)
+
+Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); }
+
+/// DeleteDeadInstruction - Delete this instruction.  Before we do, go through
+/// and zero out all the operands of this instruction.  If any of them become
+/// dead, delete them and the computation tree that feeds them.
+///
+static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
+  SmallVector<Instruction*, 32> NowDeadInsts;
+  
+  NowDeadInsts.push_back(I);
+  
+  // Before we touch this instruction, remove it from SE!
+  do {
+    Instruction *DeadInst = NowDeadInsts.pop_back_val();
+    
+    // This instruction is dead, zap it, in stages.  Start by removing it from
+    // SCEV.
+    SE.forgetValue(DeadInst);
+    
+    for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
+      Value *Op = DeadInst->getOperand(op);
+      DeadInst->setOperand(op, 0);
+      
+      // If this operand just became dead, add it to the NowDeadInsts list.
+      if (!Op->use_empty()) continue;
+      
+      if (Instruction *OpI = dyn_cast<Instruction>(Op))
+        if (isInstructionTriviallyDead(OpI))
+          NowDeadInsts.push_back(OpI);
+    }
+    
+    DeadInst->eraseFromParent();
+    
+  } while (!NowDeadInsts.empty());
+}
+
+bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
+  CurLoop = L;
+  
+  // The trip count of the loop must be analyzable.
+  SE = &getAnalysis<ScalarEvolution>();
+  if (!SE->hasLoopInvariantBackedgeTakenCount(L))
+    return false;
+  const SCEV *BECount = SE->getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BECount)) return false;
+  
+  // If this loop executes exactly one time, then it should be peeled, not
+  // optimized by this pass.
+  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+    if (BECst->getValue()->getValue() == 0)
+      return false;
+  
+  // We require target data for now.
+  TD = getAnalysisIfAvailable<TargetData>();
+  if (TD == 0) return false;
+
+  DT = &getAnalysis<DominatorTree>();
+  LoopInfo &LI = getAnalysis<LoopInfo>();
+  TLI = &getAnalysis<TargetLibraryInfo>();
+  
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  CurLoop->getUniqueExitBlocks(ExitBlocks);
+
+  DEBUG(dbgs() << "loop-idiom Scanning: F["
+               << L->getHeader()->getParent()->getName()
+               << "] Loop %" << L->getHeader()->getName() << "\n");
+  
+  bool MadeChange = false;
+  // Scan all the blocks in the loop that are not in subloops.
+  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
+       ++BI) {
+    // Ignore blocks in subloops.
+    if (LI.getLoopFor(*BI) != CurLoop)
+      continue;
+    
+    MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks);
+  }
+  return MadeChange;
+}
+
+/// runOnLoopBlock - Process the specified block, which lives in a counted loop
+/// with the specified backedge count.  This block is known to be in the current
+/// loop and not in any subloops.
+bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount,
+                                     SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+  // We can only promote stores in this block if they are unconditionally
+  // executed in the loop.  For a block to be unconditionally executed, it has
+  // to dominate all the exit blocks of the loop.  Verify this now.
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (!DT->dominates(BB, ExitBlocks[i]))
+      return false;
+  
+  bool MadeChange = false;
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+    Instruction *Inst = I++;
+    // Look for store instructions, which may be optimized to memset/memcpy.
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst))  {
+      WeakVH InstPtr(I);
+      if (!processLoopStore(SI, BECount)) continue;
+      MadeChange = true;
+      
+      // If processing the store invalidated our iterator, start over from the
+      // top of the block.
+      if (InstPtr == 0)
+        I = BB->begin();
+      continue;
+    }
+    
+    // Look for memset instructions, which may be optimized to a larger memset.
+    if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst))  {
+      WeakVH InstPtr(I);
+      if (!processLoopMemSet(MSI, BECount)) continue;
+      MadeChange = true;
+      
+      // If processing the memset invalidated our iterator, start over from the
+      // top of the block.
+      if (InstPtr == 0)
+        I = BB->begin();
+      continue;
+    }
+  }
+  
+  return MadeChange;
+}
+
+
+/// processLoopStore - See if this store can be promoted to a memset or memcpy.
+bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
+  if (SI->isVolatile()) return false;
+
+  Value *StoredVal = SI->getValueOperand();
+  Value *StorePtr = SI->getPointerOperand();
+  
+  // Reject stores that are so large that they overflow an unsigned.
+  uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType());
+  if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
+    return false;
+  
+  // See if the pointer expression is an AddRec like {base,+,1} on the current
+  // loop, which indicates a strided store.  If we have something else, it's a
+  // random store we can't handle.
+  const SCEVAddRecExpr *StoreEv =
+    dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
+  if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
+    return false;
+
+  // Check to see if the stride matches the size of the store.  If so, then we
+  // know that every byte is touched in the loop.
+  unsigned StoreSize = (unsigned)SizeInBits >> 3; 
+  const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1));
+  
+  if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) {
+    // TODO: Could also handle negative stride here someday, that will require
+    // the validity check in mayLoopAccessLocation to be updated though.
+    // Enable this to print exact negative strides.
+    if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) {
+      dbgs() << "NEGATIVE STRIDE: " << *SI << "\n";
+      dbgs() << "BB: " << *SI->getParent();
+    }
+    
+    return false;
+  }
+
+  // See if we can optimize just this store in isolation.
+  if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
+                              StoredVal, SI, StoreEv, BECount))
+    return true;
+
+  // If the stored value is a strided load in the same loop with the same stride
+  // this this may be transformable into a memcpy.  This kicks in for stuff like
+  //   for (i) A[i] = B[i];
+  if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
+    const SCEVAddRecExpr *LoadEv =
+      dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0)));
+    if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() &&
+        StoreEv->getOperand(1) == LoadEv->getOperand(1) && !LI->isVolatile())
+      if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount))
+        return true;
+  }
+  //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n";
+
+  return false;
+}
+
+/// processLoopMemSet - See if this memset can be promoted to a large memset.
+bool LoopIdiomRecognize::
+processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) {
+  // We can only handle non-volatile memsets with a constant size.
+  if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false;
+
+  // If we're not allowed to hack on memset, we fail.
+  if (!TLI->has(LibFunc::memset))
+    return false;
+  
+  Value *Pointer = MSI->getDest();
+  
+  // See if the pointer expression is an AddRec like {base,+,1} on the current
+  // loop, which indicates a strided store.  If we have something else, it's a
+  // random store we can't handle.
+  const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer));
+  if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine())
+    return false;
+
+  // Reject memsets that are so large that they overflow an unsigned.
+  uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+  if ((SizeInBytes >> 32) != 0)
+    return false;
+  
+  // Check to see if the stride matches the size of the memset.  If so, then we
+  // know that every byte is touched in the loop.
+  const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1));
+  
+  // TODO: Could also handle negative stride here someday, that will require the
+  // validity check in mayLoopAccessLocation to be updated though.
+  if (Stride == 0 || MSI->getLength() != Stride->getValue())
+    return false;
+  
+  return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
+                                 MSI->getAlignment(), MSI->getValue(),
+                                 MSI, Ev, BECount);
+}
+
+
+/// mayLoopAccessLocation - Return true if the specified loop might access the
+/// specified pointer location, which is a loop-strided access.  The 'Access'
+/// argument specifies what the verboten forms of access are (read or write).
+static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access,
+                                  Loop *L, const SCEV *BECount,
+                                  unsigned StoreSize, AliasAnalysis &AA,
+                                  Instruction *IgnoredStore) {
+  // Get the location that may be stored across the loop.  Since the access is
+  // strided positively through memory, we say that the modified location starts
+  // at the pointer and has infinite size.
+  uint64_t AccessSize = AliasAnalysis::UnknownSize;
+
+  // If the loop iterates a fixed number of times, we can refine the access size
+  // to be exactly the size of the memset, which is (BECount+1)*StoreSize
+  if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
+    AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize;
+  
+  // TODO: For this to be really effective, we have to dive into the pointer
+  // operand in the store.  Store to &A[i] of 100 will always return may alias
+  // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
+  // which will then no-alias a store to &A[100].
+  AliasAnalysis::Location StoreLoc(Ptr, AccessSize);
+
+  for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
+       ++BI)
+    for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
+      if (&*I != IgnoredStore &&
+          (AA.getModRefInfo(I, StoreLoc) & Access))
+        return true;
+
+  return false;
+}
+
+/// getMemSetPatternValue - If a strided store of the specified value is safe to
+/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should
+/// be passed in.  Otherwise, return null.
+///
+/// Note that we don't ever attempt to use memset_pattern8 or 4, because these
+/// just replicate their input array and then pass on to memset_pattern16.
+static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) {
+  // If the value isn't a constant, we can't promote it to being in a constant
+  // array.  We could theoretically do a store to an alloca or something, but
+  // that doesn't seem worthwhile.
+  Constant *C = dyn_cast<Constant>(V);
+  if (C == 0) return 0;
+  
+  // Only handle simple values that are a power of two bytes in size.
+  uint64_t Size = TD.getTypeSizeInBits(V->getType());
+  if (Size == 0 || (Size & 7) || (Size & (Size-1)))
+    return 0;
+  
+  // Don't care enough about darwin/ppc to implement this.
+  if (TD.isBigEndian())
+    return 0;
+
+  // Convert to size in bytes.
+  Size /= 8;
+
+  // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see
+  // if the top and bottom are the same (e.g. for vectors and large integers).
+  if (Size > 16) return 0;
+  
+  // If the constant is exactly 16 bytes, just use it.
+  if (Size == 16) return C;
+
+  // Otherwise, we'll use an array of the constants.
+  unsigned ArraySize = 16/Size;
+  ArrayType *AT = ArrayType::get(V->getType(), ArraySize);
+  return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C));
+}
+
+
+/// processLoopStridedStore - We see a strided store of some value.  If we can
+/// transform this into a memset or memset_pattern in the loop preheader, do so.
+bool LoopIdiomRecognize::
+processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
+                        unsigned StoreAlignment, Value *StoredVal,
+                        Instruction *TheStore, const SCEVAddRecExpr *Ev,
+                        const SCEV *BECount) {
+  
+  // If the stored value is a byte-wise value (like i32 -1), then it may be
+  // turned into a memset of i8 -1, assuming that all the consecutive bytes
+  // are stored.  A store of i32 0x01020304 can never be turned into a memset,
+  // but it can be turned into memset_pattern if the target supports it.
+  Value *SplatValue = isBytewiseValue(StoredVal);
+  Constant *PatternValue = 0;
+  
+  // If we're allowed to form a memset, and the stored value would be acceptable
+  // for memset, use it.
+  if (SplatValue && TLI->has(LibFunc::memset) &&
+      // Verify that the stored value is loop invariant.  If not, we can't
+      // promote the memset.
+      CurLoop->isLoopInvariant(SplatValue)) {
+    // Keep and use SplatValue.
+    PatternValue = 0;
+  } else if (TLI->has(LibFunc::memset_pattern16) &&
+             (PatternValue = getMemSetPatternValue(StoredVal, *TD))) {
+    // It looks like we can use PatternValue!
+    SplatValue = 0;
+  } else {
+    // Otherwise, this isn't an idiom we can transform.  For example, we can't
+    // do anything with a 3-byte store, for example.
+    return false;
+  }
+  
+  
+  // Okay, we have a strided store "p[i]" of a splattable value.  We can turn
+  // this into a memset in the loop preheader now if we want.  However, this
+  // would be unsafe to do if there is anything else in the loop that may read
+  // or write to the aliased location.  Check for an alias.
+  if (mayLoopAccessLocation(DestPtr, AliasAnalysis::ModRef,
+                            CurLoop, BECount,
+                            StoreSize, getAnalysis<AliasAnalysis>(), TheStore))
+    return false;
+  
+  // Okay, everything looks good, insert the memset.
+  BasicBlock *Preheader = CurLoop->getLoopPreheader();
+  
+  IRBuilder<> Builder(Preheader->getTerminator());
+  
+  // The trip count of the loop and the base pointer of the addrec SCEV is
+  // guaranteed to be loop invariant, which means that it should dominate the
+  // header.  Just insert code for it in the preheader.
+  SCEVExpander Expander(*SE);
+  
+  unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace();
+  Value *BasePtr = 
+    Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace),
+                           Preheader->getTerminator());
+  
+  // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
+  // pointer size if it isn't already.
+  const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext());
+  BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+  
+  const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+                                         true /*no unsigned overflow*/);
+  if (StoreSize != 1)
+    NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+                               true /*no unsigned overflow*/);
+  
+  Value *NumBytes = 
+    Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+  
+  CallInst *NewCall;
+  if (SplatValue)
+    NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment);
+  else {
+    Module *M = TheStore->getParent()->getParent()->getParent();
+    Value *MSP = M->getOrInsertFunction("memset_pattern16",
+                                        Builder.getVoidTy(),
+                                        Builder.getInt8PtrTy(), 
+                                        Builder.getInt8PtrTy(), IntPtr,
+                                        (void*)0);
+    
+    // Otherwise we should form a memset_pattern16.  PatternValue is known to be
+    // an constant array of 16-bytes.  Plop the value into a mergable global.
+    GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true,
+                                            GlobalValue::InternalLinkage,
+                                            PatternValue, ".memset_pattern");
+    GV->setUnnamedAddr(true); // Ok to merge these.
+    GV->setAlignment(16);
+    Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy());
+    NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes);
+  }
+  
+  DEBUG(dbgs() << "  Formed memset: " << *NewCall << "\n"
+               << "    from store to: " << *Ev << " at: " << *TheStore << "\n");
+  NewCall->setDebugLoc(TheStore->getDebugLoc());
+  
+  // Okay, the memset has been formed.  Zap the original store and anything that
+  // feeds into it.
+  DeleteDeadInstruction(TheStore, *SE);
+  ++NumMemSet;
+  return true;
+}
+
+/// processLoopStoreOfLoopLoad - We see a strided store whose value is a
+/// same-strided load.
+bool LoopIdiomRecognize::
+processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
+                           const SCEVAddRecExpr *StoreEv,
+                           const SCEVAddRecExpr *LoadEv,
+                           const SCEV *BECount) {
+  // If we're not allowed to form memcpy, we fail.
+  if (!TLI->has(LibFunc::memcpy))
+    return false;
+  
+  LoadInst *LI = cast<LoadInst>(SI->getValueOperand());
+  
+  // Okay, we have a strided store "p[i]" of a loaded value.  We can turn
+  // this into a memcpy in the loop preheader now if we want.  However, this
+  // would be unsafe to do if there is anything else in the loop that may read
+  // or write to the stored location (including the load feeding the stores).
+  // Check for an alias.
+  if (mayLoopAccessLocation(SI->getPointerOperand(), AliasAnalysis::ModRef,
+                            CurLoop, BECount, StoreSize,
+                            getAnalysis<AliasAnalysis>(), SI))
+    return false;
+
+  // For a memcpy, we have to make sure that the input array is not being
+  // mutated by the loop.
+  if (mayLoopAccessLocation(LI->getPointerOperand(), AliasAnalysis::Mod,
+                            CurLoop, BECount, StoreSize,
+                            getAnalysis<AliasAnalysis>(), SI))
+    return false;
+  
+  // Okay, everything looks good, insert the memcpy.
+  BasicBlock *Preheader = CurLoop->getLoopPreheader();
+  
+  IRBuilder<> Builder(Preheader->getTerminator());
+  
+  // The trip count of the loop and the base pointer of the addrec SCEV is
+  // guaranteed to be loop invariant, which means that it should dominate the
+  // header.  Just insert code for it in the preheader.
+  SCEVExpander Expander(*SE);
+
+  Value *LoadBasePtr = 
+    Expander.expandCodeFor(LoadEv->getStart(),
+                           Builder.getInt8PtrTy(LI->getPointerAddressSpace()),
+                           Preheader->getTerminator());
+  Value *StoreBasePtr = 
+    Expander.expandCodeFor(StoreEv->getStart(),
+                           Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
+                           Preheader->getTerminator());
+  
+  // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
+  // pointer size if it isn't already.
+  const Type *IntPtr = TD->getIntPtrType(SI->getContext());
+  BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr);
+  
+  const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1),
+                                         true /*no unsigned overflow*/);
+  if (StoreSize != 1)
+    NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize),
+                               true /*no unsigned overflow*/);
+  
+  Value *NumBytes =
+    Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator());
+  
+  Value *NewCall =
+    Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes,
+                         std::min(SI->getAlignment(), LI->getAlignment()));
+  
+  DEBUG(dbgs() << "  Formed memcpy: " << *NewCall << "\n"
+               << "    from load ptr=" << *LoadEv << " at: " << *LI << "\n"
+               << "    from store ptr=" << *StoreEv << " at: " << *SI << "\n");
+  (void)NewCall;
+  
+  // Okay, the memset has been formed.  Zap the original store and anything that
+  // feeds into it.
+  DeleteDeadInstruction(SI, *SE);
+  ++NumMemCpy;
+  return true;
+}
diff --git a/final/lib/Transforms/Scalar/LoopInstSimplify.cpp b/final/lib/Transforms/Scalar/LoopInstSimplify.cpp
new file mode 100644
index 00000000000..af25c5c1a66
--- /dev/null
+++ b/final/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -0,0 +1,170 @@
+//===- LoopInstSimplify.cpp - Loop Instruction Simplification Pass --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs lightweight instruction simplification on loop bodies.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-instsimplify"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions simplified");
+
+namespace {
+  class LoopInstSimplify : public LoopPass {
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    LoopInstSimplify() : LoopPass(ID) {
+      initializeLoopInstSimplifyPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnLoop(Loop*, LPPassManager&);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<LoopInfo>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addPreservedID(LCSSAID);
+      AU.addPreserved("scalar-evolution");
+    }
+  };
+}
+  
+char LoopInstSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopInstSimplify, "loop-instsimplify",
+                "Simplify instructions in loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopInstSimplify, "loop-instsimplify",
+                "Simplify instructions in loops", false, false)
+
+Pass *llvm::createLoopInstSimplifyPass() {
+  return new LoopInstSimplify();
+}
+
+bool LoopInstSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+  DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+  LoopInfo *LI = &getAnalysis<LoopInfo>();
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getUniqueExitBlocks(ExitBlocks);
+  array_pod_sort(ExitBlocks.begin(), ExitBlocks.end());
+
+  SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+
+  // The bit we are stealing from the pointer represents whether this basic
+  // block is the header of a subloop, in which case we only process its phis.
+  typedef PointerIntPair<BasicBlock*, 1> WorklistItem;
+  SmallVector<WorklistItem, 16> VisitStack;
+  SmallPtrSet<BasicBlock*, 32> Visited;
+
+  bool Changed = false;
+  bool LocalChanged;
+  do {
+    LocalChanged = false;
+
+    VisitStack.clear();
+    Visited.clear();
+
+    VisitStack.push_back(WorklistItem(L->getHeader(), false));
+
+    while (!VisitStack.empty()) {
+      WorklistItem Item = VisitStack.pop_back_val();
+      BasicBlock *BB = Item.getPointer();
+      bool IsSubloopHeader = Item.getInt();
+
+      // Simplify instructions in the current basic block.
+      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+        Instruction *I = BI++;
+
+        // The first time through the loop ToSimplify is empty and we try to
+        // simplify all instructions. On later iterations ToSimplify is not
+        // empty and we only bother simplifying instructions that are in it.
+        if (!ToSimplify->empty() && !ToSimplify->count(I))
+          continue;
+
+        // Don't bother simplifying unused instructions.
+        if (!I->use_empty()) {
+          Value *V = SimplifyInstruction(I, TD, DT);
+          if (V && LI->replacementPreservesLCSSAForm(I, V)) {
+            // Mark all uses for resimplification next time round the loop.
+            for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+                 UI != UE; ++UI)
+              Next->insert(cast<Instruction>(*UI));
+
+            I->replaceAllUsesWith(V);
+            LocalChanged = true;
+            ++NumSimplified;
+          }
+        }
+        LocalChanged |= RecursivelyDeleteTriviallyDeadInstructions(I);
+
+        if (IsSubloopHeader && !isa<PHINode>(I))
+          break;
+      }
+
+      // Add all successors to the worklist, except for loop exit blocks and the
+      // bodies of subloops. We visit the headers of loops so that we can process
+      // their phis, but we contract the rest of the subloop body and only follow
+      // edges leading back to the original loop.
+      for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+           ++SI) {
+        BasicBlock *SuccBB = *SI;
+        if (!Visited.insert(SuccBB))
+          continue;
+
+        const Loop *SuccLoop = LI->getLoopFor(SuccBB);
+        if (SuccLoop && SuccLoop->getHeader() == SuccBB
+                     && L->contains(SuccLoop)) {
+          VisitStack.push_back(WorklistItem(SuccBB, true));
+
+          SmallVector<BasicBlock*, 8> SubLoopExitBlocks;
+          SuccLoop->getExitBlocks(SubLoopExitBlocks);
+
+          for (unsigned i = 0; i < SubLoopExitBlocks.size(); ++i) {
+            BasicBlock *ExitBB = SubLoopExitBlocks[i];
+            if (LI->getLoopFor(ExitBB) == L && Visited.insert(ExitBB))
+              VisitStack.push_back(WorklistItem(ExitBB, false));
+          }
+
+          continue;
+        }
+
+        bool IsExitBlock = std::binary_search(ExitBlocks.begin(),
+                                              ExitBlocks.end(), SuccBB);
+        if (IsExitBlock)
+          continue;
+
+        VisitStack.push_back(WorklistItem(SuccBB, false));
+      }
+    }
+
+    // Place the list of instructions to simplify on the next loop iteration
+    // into ToSimplify.
+    std::swap(ToSimplify, Next);
+    Next->clear();
+
+    Changed |= LocalChanged;
+  } while (LocalChanged);
+
+  return Changed;
+}
diff --git a/final/lib/Transforms/Scalar/LoopRotation.cpp b/final/lib/Transforms/Scalar/LoopRotation.cpp
new file mode 100644
index 00000000000..95e15784df2
--- /dev/null
+++ b/final/lib/Transforms/Scalar/LoopRotation.cpp
@@ -0,0 +1,348 @@
+//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Loop Rotation Pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-rotate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+#define MAX_HEADER_SIZE 16
+
+STATISTIC(NumRotated, "Number of loops rotated");
+namespace {
+
+  class LoopRotate : public LoopPass {
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    LoopRotate() : LoopPass(ID) {
+      initializeLoopRotatePass(*PassRegistry::getPassRegistry());
+    }
+
+    // LCSSA form makes instruction renaming easier.
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addRequiredID(LCSSAID);
+      AU.addPreservedID(LCSSAID);
+      AU.addPreserved<ScalarEvolution>();
+    }
+
+    bool runOnLoop(Loop *L, LPPassManager &LPM);
+    bool rotateLoop(Loop *L);
+    
+  private:
+    LoopInfo *LI;
+  };
+}
+  
+char LoopRotate::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
+
+Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
+
+/// Rotate Loop L as many times as possible. Return true if
+/// the loop is rotated at least once.
+bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
+  LI = &getAnalysis<LoopInfo>();
+
+  // One loop can be rotated multiple times.
+  bool MadeChange = false;
+  while (rotateLoop(L))
+    MadeChange = true;
+
+  return MadeChange;
+}
+
+/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
+/// old header into the preheader.  If there were uses of the values produced by
+/// these instruction that were outside of the loop, we have to insert PHI nodes
+/// to merge the two values.  Do this now.
+static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
+                                            BasicBlock *OrigPreheader,
+                                            ValueToValueMapTy &ValueMap) {
+  // Remove PHI node entries that are no longer live.
+  BasicBlock::iterator I, E = OrigHeader->end();
+  for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
+    
+  // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
+  // as necessary.
+  SSAUpdater SSA;
+  for (I = OrigHeader->begin(); I != E; ++I) {
+    Value *OrigHeaderVal = I;
+    
+    // If there are no uses of the value (e.g. because it returns void), there
+    // is nothing to rewrite.
+    if (OrigHeaderVal->use_empty())
+      continue;
+    
+    Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
+
+    // The value now exits in two versions: the initial value in the preheader
+    // and the loop "next" value in the original header.
+    SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
+    SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
+    SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
+    
+    // Visit each use of the OrigHeader instruction.
+    for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
+         UE = OrigHeaderVal->use_end(); UI != UE; ) {
+      // Grab the use before incrementing the iterator.
+      Use &U = UI.getUse();
+      
+      // Increment the iterator before removing the use from the list.
+      ++UI;
+      
+      // SSAUpdater can't handle a non-PHI use in the same block as an
+      // earlier def. We can easily handle those cases manually.
+      Instruction *UserInst = cast<Instruction>(U.getUser());
+      if (!isa<PHINode>(UserInst)) {
+        BasicBlock *UserBB = UserInst->getParent();
+        
+        // The original users in the OrigHeader are already using the
+        // original definitions.
+        if (UserBB == OrigHeader)
+          continue;
+        
+        // Users in the OrigPreHeader need to use the value to which the
+        // original definitions are mapped.
+        if (UserBB == OrigPreheader) {
+          U = OrigPreHeaderVal;
+          continue;
+        }
+      }
+      
+      // Anything else can be handled by SSAUpdater.
+      SSA.RewriteUse(U);
+    }
+  }
+}  
+
+/// Rotate loop LP. Return true if the loop is rotated.
+bool LoopRotate::rotateLoop(Loop *L) {
+  // If the loop has only one block then there is not much to rotate.
+  if (L->getBlocks().size() == 1)
+    return false;
+  
+  BasicBlock *OrigHeader = L->getHeader();
+  
+  BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
+  if (BI == 0 || BI->isUnconditional())
+    return false;
+  
+  // If the loop header is not one of the loop exiting blocks then
+  // either this loop is already rotated or it is not
+  // suitable for loop rotation transformations.
+  if (!L->isLoopExiting(OrigHeader))
+    return false;
+
+  // Updating PHInodes in loops with multiple exits adds complexity. 
+  // Keep it simple, and restrict loop rotation to loops with one exit only.
+  // In future, lift this restriction and support for multiple exits if
+  // required.
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+  if (ExitBlocks.size() > 1)
+    return false;
+
+  // Check size of original header and reject loop if it is very big.
+  {
+    CodeMetrics Metrics;
+    Metrics.analyzeBasicBlock(OrigHeader);
+    if (Metrics.NumInsts > MAX_HEADER_SIZE)
+      return false;
+  }
+
+  // Now, this loop is suitable for rotation.
+  BasicBlock *OrigPreheader = L->getLoopPreheader();
+  BasicBlock *OrigLatch = L->getLoopLatch();
+  assert(OrigPreheader && OrigLatch && "Loop not in canonical form?");
+
+  // Anything ScalarEvolution may know about this loop or the PHI nodes
+  // in its header will soon be invalidated.
+  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+    SE->forgetLoop(L);
+
+  // Find new Loop header. NewHeader is a Header's one and only successor
+  // that is inside loop.  Header's other successor is outside the
+  // loop.  Otherwise loop is not suitable for rotation.
+  BasicBlock *Exit = BI->getSuccessor(0);
+  BasicBlock *NewHeader = BI->getSuccessor(1);
+  if (L->contains(Exit))
+    std::swap(Exit, NewHeader);
+  assert(NewHeader && "Unable to determine new loop header");
+  assert(L->contains(NewHeader) && !L->contains(Exit) && 
+         "Unable to determine loop header and exit blocks");
+  
+  // This code assumes that the new header has exactly one predecessor.
+  // Remove any single-entry PHI nodes in it.
+  assert(NewHeader->getSinglePredecessor() &&
+         "New header doesn't have one pred!");
+  FoldSingleEntryPHINodes(NewHeader);
+
+  // Begin by walking OrigHeader and populating ValueMap with an entry for
+  // each Instruction.
+  BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
+  ValueToValueMapTy ValueMap;
+
+  // For PHI nodes, the value available in OldPreHeader is just the
+  // incoming value from OldPreHeader.
+  for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
+
+  // For the rest of the instructions, either hoist to the OrigPreheader if
+  // possible or create a clone in the OldPreHeader if not.
+  TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
+  while (I != E) {
+    Instruction *Inst = I++;
+    
+    // If the instruction's operands are invariant and it doesn't read or write
+    // memory, then it is safe to hoist.  Doing this doesn't change the order of
+    // execution in the preheader, but does prevent the instruction from
+    // executing in each iteration of the loop.  This means it is safe to hoist
+    // something that might trap, but isn't safe to hoist something that reads
+    // memory (without proving that the loop doesn't write).
+    if (L->hasLoopInvariantOperands(Inst) &&
+        !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
+        !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) {
+      Inst->moveBefore(LoopEntryBranch);
+      continue;
+    }
+    
+    // Otherwise, create a duplicate of the instruction.
+    Instruction *C = Inst->clone();
+    
+    // Eagerly remap the operands of the instruction.
+    RemapInstruction(C, ValueMap,
+                     RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+    
+    // With the operands remapped, see if the instruction constant folds or is
+    // otherwise simplifyable.  This commonly occurs because the entry from PHI
+    // nodes allows icmps and other instructions to fold.
+    Value *V = SimplifyInstruction(C);
+    if (V && LI->replacementPreservesLCSSAForm(C, V)) {
+      // If so, then delete the temporary instruction and stick the folded value
+      // in the map.
+      delete C;
+      ValueMap[Inst] = V;
+    } else {
+      // Otherwise, stick the new instruction into the new block!
+      C->setName(Inst->getName());
+      C->insertBefore(LoopEntryBranch);
+      ValueMap[Inst] = C;
+    }
+  }
+
+  // Along with all the other instructions, we just cloned OrigHeader's
+  // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
+  // successors by duplicating their incoming values for OrigHeader.
+  TerminatorInst *TI = OrigHeader->getTerminator();
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+    for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
+         PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+      PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
+
+  // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
+  // OrigPreHeader's old terminator (the original branch into the loop), and
+  // remove the corresponding incoming values from the PHI nodes in OrigHeader.
+  LoopEntryBranch->eraseFromParent();
+
+  // If there were any uses of instructions in the duplicated block outside the
+  // loop, update them, inserting PHI nodes as required
+  RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap);
+
+  // NewHeader is now the header of the loop.
+  L->moveToHeader(NewHeader);
+  assert(L->getHeader() == NewHeader && "Latch block is our new header");
+
+  
+  // At this point, we've finished our major CFG changes.  As part of cloning
+  // the loop into the preheader we've simplified instructions and the
+  // duplicated conditional branch may now be branching on a constant.  If it is
+  // branching on a constant and if that constant means that we enter the loop,
+  // then we fold away the cond branch to an uncond branch.  This simplifies the
+  // loop in cases important for nested loops, and it also means we don't have
+  // to split as many edges.
+  BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
+  assert(PHBI->isConditional() && "Should be clone of BI condbr!");
+  if (!isa<ConstantInt>(PHBI->getCondition()) ||
+      PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero())
+          != NewHeader) {
+    // The conditional branch can't be folded, handle the general case.
+    // Update DominatorTree to reflect the CFG change we just made.  Then split
+    // edges as necessary to preserve LoopSimplify form.
+    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+      // Since OrigPreheader now has the conditional branch to Exit block, it is
+      // the dominator of Exit.
+      DT->changeImmediateDominator(Exit, OrigPreheader);
+      DT->changeImmediateDominator(NewHeader, OrigPreheader);
+      
+      // Update OrigHeader to be dominated by the new header block.
+      DT->changeImmediateDominator(OrigHeader, OrigLatch);
+    }
+    
+    // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
+    // thus is not a preheader anymore.  Split the edge to form a real preheader.
+    BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
+    NewPH->setName(NewHeader->getName() + ".lr.ph");
+    
+    // Preserve canonical loop form, which means that 'Exit' should have only one
+    // predecessor.
+    BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
+    ExitSplit->moveBefore(Exit);
+  } else {
+    // We can fold the conditional branch in the preheader, this makes things
+    // simpler. The first step is to remove the extra edge to the Exit block.
+    Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
+    BranchInst::Create(NewHeader, PHBI);
+    PHBI->eraseFromParent();
+    
+    // With our CFG finalized, update DomTree if it is available.
+    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
+      // Update OrigHeader to be dominated by the new header block.
+      DT->changeImmediateDominator(NewHeader, OrigPreheader);
+      DT->changeImmediateDominator(OrigHeader, OrigLatch);
+    }
+  }
+  
+  assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
+  assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
+
+  // Now that the CFG and DomTree are in a consistent state again, try to merge
+  // the OrigHeader block into OrigLatch.  This will succeed if they are
+  // connected by an unconditional branch.  This is just a cleanup so the
+  // emitted code isn't too gross in this common case.
+  MergeBlockIntoPredecessor(OrigHeader, this);
+  
+  ++NumRotated;
+  return true;
+}
+
diff --git a/final/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/final/lib/Transforms/Scalar/LoopStrengthReduce.cpp
new file mode 100644
index 00000000000..ac4aea2e404
--- /dev/null
+++ b/final/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -0,0 +1,3845 @@
+//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation analyzes and transforms the induction variables (and
+// computations derived from them) into forms suitable for efficient execution
+// on the target.
+//
+// This pass performs a strength reduction on array references inside loops that
+// have as one or more of their components the loop induction variable, it
+// rewrites expressions to take advantage of scaled-index addressing modes
+// available on the target, and it performs a variety of other optimizations
+// related to loop induction variables.
+//
+// Terminology note: this code has a lot of handling for "post-increment" or
+// "post-inc" users. This is not talking about post-increment addressing modes;
+// it is instead talking about code like this:
+//
+//   %i = phi [ 0, %entry ], [ %i.next, %latch ]
+//   ...
+//   %i.next = add %i, 1
+//   %c = icmp eq %i.next, %n
+//
+// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
+// it's useful to think about these as the same register, with some uses using
+// the value of the register before the add and some using // it after. In this
+// example, the icmp is a post-increment user, since it uses %i.next, which is
+// the value of the induction variable after the increment. The other common
+// case of post-increment users is users outside the loop.
+//
+// TODO: More sophistication in the way Formulae are generated and filtered.
+//
+// TODO: Handle multiple loops at a time.
+//
+// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
+//       instead of a GlobalValue?
+//
+// TODO: When truncation is free, truncate ICmp users' operands to make it a
+//       smaller encoding (on x86 at least).
+//
+// TODO: When a negated register is used by an add (such as in a list of
+//       multiple base registers, or as the increment expression in an addrec),
+//       we may not actually need both reg and (-1 * reg) in registers; the
+//       negation can be implemented by using a sub instead of an add. The
+//       lack of support for taking this into consideration when making
+//       register pressure decisions is partly worked around by the "Special"
+//       use kind.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-reduce"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+
+/// RegSortData - This class holds data which is used to order reuse candidates.
+class RegSortData {
+public:
+  /// UsedByIndices - This represents the set of LSRUse indices which reference
+  /// a particular register.
+  SmallBitVector UsedByIndices;
+
+  RegSortData() {}
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+}
+
+void RegSortData::print(raw_ostream &OS) const {
+  OS << "[NumUses=" << UsedByIndices.count() << ']';
+}
+
+void RegSortData::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+namespace {
+
+/// RegUseTracker - Map register candidates to information about how they are
+/// used.
+class RegUseTracker {
+  typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
+
+  RegUsesTy RegUsesMap;
+  SmallVector<const SCEV *, 16> RegSequence;
+
+public:
+  void CountRegister(const SCEV *Reg, size_t LUIdx);
+  void DropRegister(const SCEV *Reg, size_t LUIdx);
+  void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx);
+
+  bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
+
+  const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
+
+  void clear();
+
+  typedef SmallVectorImpl<const SCEV *>::iterator iterator;
+  typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator;
+  iterator begin() { return RegSequence.begin(); }
+  iterator end()   { return RegSequence.end(); }
+  const_iterator begin() const { return RegSequence.begin(); }
+  const_iterator end() const   { return RegSequence.end(); }
+};
+
+}
+
+void
+RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
+  std::pair<RegUsesTy::iterator, bool> Pair =
+    RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
+  RegSortData &RSD = Pair.first->second;
+  if (Pair.second)
+    RegSequence.push_back(Reg);
+  RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
+  RSD.UsedByIndices.set(LUIdx);
+}
+
+void
+RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
+  RegUsesTy::iterator It = RegUsesMap.find(Reg);
+  assert(It != RegUsesMap.end());
+  RegSortData &RSD = It->second;
+  assert(RSD.UsedByIndices.size() > LUIdx);
+  RSD.UsedByIndices.reset(LUIdx);
+}
+
+void
+RegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
+  assert(LUIdx <= LastLUIdx);
+
+  // Update RegUses. The data structure is not optimized for this purpose;
+  // we must iterate through it and update each of the bit vectors.
+  for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
+       I != E; ++I) {
+    SmallBitVector &UsedByIndices = I->second.UsedByIndices;
+    if (LUIdx < UsedByIndices.size())
+      UsedByIndices[LUIdx] =
+        LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
+    UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
+  }
+}
+
+bool
+RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
+  RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+  if (I == RegUsesMap.end())
+    return false;
+  const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
+  int i = UsedByIndices.find_first();
+  if (i == -1) return false;
+  if ((size_t)i != LUIdx) return true;
+  return UsedByIndices.find_next(i) != -1;
+}
+
+const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
+  RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+  assert(I != RegUsesMap.end() && "Unknown register!");
+  return I->second.UsedByIndices;
+}
+
+void RegUseTracker::clear() {
+  RegUsesMap.clear();
+  RegSequence.clear();
+}
+
+namespace {
+
+/// Formula - This class holds information that describes a formula for
+/// computing satisfying a use. It may include broken-out immediates and scaled
+/// registers.
+struct Formula {
+  /// AM - This is used to represent complex addressing, as well as other kinds
+  /// of interesting uses.
+  TargetLowering::AddrMode AM;
+
+  /// BaseRegs - The list of "base" registers for this use. When this is
+  /// non-empty, AM.HasBaseReg should be set to true.
+  SmallVector<const SCEV *, 2> BaseRegs;
+
+  /// ScaledReg - The 'scaled' register for this use. This should be non-null
+  /// when AM.Scale is not zero.
+  const SCEV *ScaledReg;
+
+  Formula() : ScaledReg(0) {}
+
+  void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
+
+  unsigned getNumRegs() const;
+  const Type *getType() const;
+
+  void DeleteBaseReg(const SCEV *&S);
+
+  bool referencesReg(const SCEV *S) const;
+  bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
+                                  const RegUseTracker &RegUses) const;
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+}
+
+/// DoInitialMatch - Recursion helper for InitialMatch.
+static void DoInitialMatch(const SCEV *S, Loop *L,
+                           SmallVectorImpl<const SCEV *> &Good,
+                           SmallVectorImpl<const SCEV *> &Bad,
+                           ScalarEvolution &SE) {
+  // Collect expressions which properly dominate the loop header.
+  if (SE.properlyDominates(S, L->getHeader())) {
+    Good.push_back(S);
+    return;
+  }
+
+  // Look at add operands.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      DoInitialMatch(*I, L, Good, Bad, SE);
+    return;
+  }
+
+  // Look at addrec operands.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+    if (!AR->getStart()->isZero()) {
+      DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
+      DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
+                                      AR->getStepRecurrence(SE),
+                                      AR->getLoop()),
+                     L, Good, Bad, SE);
+      return;
+    }
+
+  // Handle a multiplication by -1 (negation) if it didn't fold.
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
+    if (Mul->getOperand(0)->isAllOnesValue()) {
+      SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end());
+      const SCEV *NewMul = SE.getMulExpr(Ops);
+
+      SmallVector<const SCEV *, 4> MyGood;
+      SmallVector<const SCEV *, 4> MyBad;
+      DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
+      const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
+        SE.getEffectiveSCEVType(NewMul->getType())));
+      for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(),
+           E = MyGood.end(); I != E; ++I)
+        Good.push_back(SE.getMulExpr(NegOne, *I));
+      for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(),
+           E = MyBad.end(); I != E; ++I)
+        Bad.push_back(SE.getMulExpr(NegOne, *I));
+      return;
+    }
+
+  // Ok, we can't do anything interesting. Just stuff the whole thing into a
+  // register and hope for the best.
+  Bad.push_back(S);
+}
+
+/// InitialMatch - Incorporate loop-variant parts of S into this Formula,
+/// attempting to keep all loop-invariant and loop-computable values in a
+/// single base register.
+void Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
+  SmallVector<const SCEV *, 4> Good;
+  SmallVector<const SCEV *, 4> Bad;
+  DoInitialMatch(S, L, Good, Bad, SE);
+  if (!Good.empty()) {
+    const SCEV *Sum = SE.getAddExpr(Good);
+    if (!Sum->isZero())
+      BaseRegs.push_back(Sum);
+    AM.HasBaseReg = true;
+  }
+  if (!Bad.empty()) {
+    const SCEV *Sum = SE.getAddExpr(Bad);
+    if (!Sum->isZero())
+      BaseRegs.push_back(Sum);
+    AM.HasBaseReg = true;
+  }
+}
+
+/// getNumRegs - Return the total number of register operands used by this
+/// formula. This does not include register uses implied by non-constant
+/// addrec strides.
+unsigned Formula::getNumRegs() const {
+  return !!ScaledReg + BaseRegs.size();
+}
+
+/// getType - Return the type of this formula, if it has one, or null
+/// otherwise. This type is meaningless except for the bit size.
+const Type *Formula::getType() const {
+  return !BaseRegs.empty() ? BaseRegs.front()->getType() :
+         ScaledReg ? ScaledReg->getType() :
+         AM.BaseGV ? AM.BaseGV->getType() :
+         0;
+}
+
+/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
+void Formula::DeleteBaseReg(const SCEV *&S) {
+  if (&S != &BaseRegs.back())
+    std::swap(S, BaseRegs.back());
+  BaseRegs.pop_back();
+}
+
+/// referencesReg - Test if this formula references the given register.
+bool Formula::referencesReg(const SCEV *S) const {
+  return S == ScaledReg ||
+         std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
+}
+
+/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers
+/// which are used by uses other than the use with the given index.
+bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
+                                         const RegUseTracker &RegUses) const {
+  if (ScaledReg)
+    if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
+      return true;
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
+       E = BaseRegs.end(); I != E; ++I)
+    if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx))
+      return true;
+  return false;
+}
+
+void Formula::print(raw_ostream &OS) const {
+  bool First = true;
+  if (AM.BaseGV) {
+    if (!First) OS << " + "; else First = false;
+    WriteAsOperand(OS, AM.BaseGV, /*PrintType=*/false);
+  }
+  if (AM.BaseOffs != 0) {
+    if (!First) OS << " + "; else First = false;
+    OS << AM.BaseOffs;
+  }
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(),
+       E = BaseRegs.end(); I != E; ++I) {
+    if (!First) OS << " + "; else First = false;
+    OS << "reg(" << **I << ')';
+  }
+  if (AM.HasBaseReg && BaseRegs.empty()) {
+    if (!First) OS << " + "; else First = false;
+    OS << "**error: HasBaseReg**";
+  } else if (!AM.HasBaseReg && !BaseRegs.empty()) {
+    if (!First) OS << " + "; else First = false;
+    OS << "**error: !HasBaseReg**";
+  }
+  if (AM.Scale != 0) {
+    if (!First) OS << " + "; else First = false;
+    OS << AM.Scale << "*reg(";
+    if (ScaledReg)
+      OS << *ScaledReg;
+    else
+      OS << "<unknown>";
+    OS << ')';
+  }
+}
+
+void Formula::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+/// isAddRecSExtable - Return true if the given addrec can be sign-extended
+/// without changing its value.
+static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+  const Type *WideTy =
+    IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
+  return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
+}
+
+/// isAddSExtable - Return true if the given add can be sign-extended
+/// without changing its value.
+static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
+  const Type *WideTy =
+    IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
+  return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
+}
+
+/// isMulSExtable - Return true if the given mul can be sign-extended
+/// without changing its value.
+static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
+  const Type *WideTy =
+    IntegerType::get(SE.getContext(),
+                     SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
+  return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
+}
+
+/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
+/// and if the remainder is known to be zero,  or null otherwise. If
+/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
+/// to Y, ignoring that the multiplication may overflow, which is useful when
+/// the result will be used in a context where the most significant bits are
+/// ignored.
+static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
+                                ScalarEvolution &SE,
+                                bool IgnoreSignificantBits = false) {
+  // Handle the trivial case, which works for any SCEV type.
+  if (LHS == RHS)
+    return SE.getConstant(LHS->getType(), 1);
+
+  // Handle a few RHS special cases.
+  const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
+  if (RC) {
+    const APInt &RA = RC->getValue()->getValue();
+    // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
+    // some folding.
+    if (RA.isAllOnesValue())
+      return SE.getMulExpr(LHS, RC);
+    // Handle x /s 1 as x.
+    if (RA == 1)
+      return LHS;
+  }
+
+  // Check for a division of a constant by a constant.
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
+    if (!RC)
+      return 0;
+    const APInt &LA = C->getValue()->getValue();
+    const APInt &RA = RC->getValue()->getValue();
+    if (LA.srem(RA) != 0)
+      return 0;
+    return SE.getConstant(LA.sdiv(RA));
+  }
+
+  // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
+    if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
+      const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
+                                      IgnoreSignificantBits);
+      if (!Step) return 0;
+      const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
+                                       IgnoreSignificantBits);
+      if (!Start) return 0;
+      return SE.getAddRecExpr(Start, Step, AR->getLoop());
+    }
+    return 0;
+  }
+
+  // Distribute the sdiv over add operands, if the add doesn't overflow.
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
+    if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
+      SmallVector<const SCEV *, 8> Ops;
+      for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+           I != E; ++I) {
+        const SCEV *Op = getExactSDiv(*I, RHS, SE,
+                                      IgnoreSignificantBits);
+        if (!Op) return 0;
+        Ops.push_back(Op);
+      }
+      return SE.getAddExpr(Ops);
+    }
+    return 0;
+  }
+
+  // Check for a multiply operand that we can pull RHS out of.
+  if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
+    if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
+      SmallVector<const SCEV *, 4> Ops;
+      bool Found = false;
+      for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
+           I != E; ++I) {
+        const SCEV *S = *I;
+        if (!Found)
+          if (const SCEV *Q = getExactSDiv(S, RHS, SE,
+                                           IgnoreSignificantBits)) {
+            S = Q;
+            Found = true;
+          }
+        Ops.push_back(S);
+      }
+      return Found ? SE.getMulExpr(Ops) : 0;
+    }
+    return 0;
+  }
+
+  // Otherwise we don't know.
+  return 0;
+}
+
+/// ExtractImmediate - If S involves the addition of a constant integer value,
+/// return that integer value, and mutate S to point to a new SCEV with that
+/// value excluded.
+static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
+  if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+    if (C->getValue()->getValue().getMinSignedBits() <= 64) {
+      S = SE.getConstant(C->getType(), 0);
+      return C->getValue()->getSExtValue();
+    }
+  } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+    int64_t Result = ExtractImmediate(NewOps.front(), SE);
+    if (Result != 0)
+      S = SE.getAddExpr(NewOps);
+    return Result;
+  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+    int64_t Result = ExtractImmediate(NewOps.front(), SE);
+    if (Result != 0)
+      S = SE.getAddRecExpr(NewOps, AR->getLoop());
+    return Result;
+  }
+  return 0;
+}
+
+/// ExtractSymbol - If S involves the addition of a GlobalValue address,
+/// return that symbol, and mutate S to point to a new SCEV with that
+/// value excluded.
+static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
+  if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+    if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
+      S = SE.getConstant(GV->getType(), 0);
+      return GV;
+    }
+  } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end());
+    GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
+    if (Result)
+      S = SE.getAddExpr(NewOps);
+    return Result;
+  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end());
+    GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
+    if (Result)
+      S = SE.getAddRecExpr(NewOps, AR->getLoop());
+    return Result;
+  }
+  return 0;
+}
+
+/// isAddressUse - Returns true if the specified instruction is using the
+/// specified value as an address.
+static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
+  bool isAddress = isa<LoadInst>(Inst);
+  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+    if (SI->getOperand(1) == OperandVal)
+      isAddress = true;
+  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+    // Addressing modes can also be folded into prefetches and a variety
+    // of intrinsics.
+    switch (II->getIntrinsicID()) {
+      default: break;
+      case Intrinsic::prefetch:
+      case Intrinsic::x86_sse2_loadu_dq:
+      case Intrinsic::x86_sse2_loadu_pd:
+      case Intrinsic::x86_sse_loadu_ps:
+      case Intrinsic::x86_sse_storeu_ps:
+      case Intrinsic::x86_sse2_storeu_pd:
+      case Intrinsic::x86_sse2_storeu_dq:
+      case Intrinsic::x86_sse2_storel_dq:
+        if (II->getArgOperand(0) == OperandVal)
+          isAddress = true;
+        break;
+    }
+  }
+  return isAddress;
+}
+
+/// getAccessType - Return the type of the memory being accessed.
+static const Type *getAccessType(const Instruction *Inst) {
+  const Type *AccessTy = Inst->getType();
+  if (const StoreInst *SI = dyn_cast<StoreInst>(Inst))
+    AccessTy = SI->getOperand(0)->getType();
+  else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+    // Addressing modes can also be folded into prefetches and a variety
+    // of intrinsics.
+    switch (II->getIntrinsicID()) {
+    default: break;
+    case Intrinsic::x86_sse_storeu_ps:
+    case Intrinsic::x86_sse2_storeu_pd:
+    case Intrinsic::x86_sse2_storeu_dq:
+    case Intrinsic::x86_sse2_storel_dq:
+      AccessTy = II->getArgOperand(0)->getType();
+      break;
+    }
+  }
+
+  // All pointers have the same requirements, so canonicalize them to an
+  // arbitrary pointer type to minimize variation.
+  if (const PointerType *PTy = dyn_cast<PointerType>(AccessTy))
+    AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1),
+                                PTy->getAddressSpace());
+
+  return AccessTy;
+}
+
+/// DeleteTriviallyDeadInstructions - If any of the instructions is the
+/// specified set are trivially dead, delete them and see if this makes any of
+/// their operands subsequently dead.
+static bool
+DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
+  bool Changed = false;
+
+  while (!DeadInsts.empty()) {
+    Instruction *I = dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val());
+
+    if (I == 0 || !isInstructionTriviallyDead(I))
+      continue;
+
+    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+      if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+        *OI = 0;
+        if (U->use_empty())
+          DeadInsts.push_back(U);
+      }
+
+    I->eraseFromParent();
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+namespace {
+
+/// Cost - This class is used to measure and compare candidate formulae.
+class Cost {
+  /// TODO: Some of these could be merged. Also, a lexical ordering
+  /// isn't always optimal.
+  unsigned NumRegs;
+  unsigned AddRecCost;
+  unsigned NumIVMuls;
+  unsigned NumBaseAdds;
+  unsigned ImmCost;
+  unsigned SetupCost;
+
+public:
+  Cost()
+    : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
+      SetupCost(0) {}
+
+  bool operator<(const Cost &Other) const;
+
+  void Loose();
+
+  void RateFormula(const Formula &F,
+                   SmallPtrSet<const SCEV *, 16> &Regs,
+                   const DenseSet<const SCEV *> &VisitedRegs,
+                   const Loop *L,
+                   const SmallVectorImpl<int64_t> &Offsets,
+                   ScalarEvolution &SE, DominatorTree &DT);
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+
+private:
+  void RateRegister(const SCEV *Reg,
+                    SmallPtrSet<const SCEV *, 16> &Regs,
+                    const Loop *L,
+                    ScalarEvolution &SE, DominatorTree &DT);
+  void RatePrimaryRegister(const SCEV *Reg,
+                           SmallPtrSet<const SCEV *, 16> &Regs,
+                           const Loop *L,
+                           ScalarEvolution &SE, DominatorTree &DT);
+};
+
+}
+
+/// RateRegister - Tally up interesting quantities from the given register.
+void Cost::RateRegister(const SCEV *Reg,
+                        SmallPtrSet<const SCEV *, 16> &Regs,
+                        const Loop *L,
+                        ScalarEvolution &SE, DominatorTree &DT) {
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
+    if (AR->getLoop() == L)
+      AddRecCost += 1; /// TODO: This should be a function of the stride.
+
+    // If this is an addrec for a loop that's already been visited by LSR,
+    // don't second-guess its addrec phi nodes. LSR isn't currently smart
+    // enough to reason about more than one loop at a time. Consider these
+    // registers free and leave them alone.
+    else if (L->contains(AR->getLoop()) ||
+             (!AR->getLoop()->contains(L) &&
+              DT.dominates(L->getHeader(), AR->getLoop()->getHeader()))) {
+      for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin();
+           PHINode *PN = dyn_cast<PHINode>(I); ++I)
+        if (SE.isSCEVable(PN->getType()) &&
+            (SE.getEffectiveSCEVType(PN->getType()) ==
+             SE.getEffectiveSCEVType(AR->getType())) &&
+            SE.getSCEV(PN) == AR)
+          return;
+
+      // If this isn't one of the addrecs that the loop already has, it
+      // would require a costly new phi and add. TODO: This isn't
+      // precisely modeled right now.
+      ++NumBaseAdds;
+      if (!Regs.count(AR->getStart()))
+        RateRegister(AR->getStart(), Regs, L, SE, DT);
+    }
+
+    // Add the step value register, if it needs one.
+    // TODO: The non-affine case isn't precisely modeled here.
+    if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1)))
+      if (!Regs.count(AR->getStart()))
+        RateRegister(AR->getOperand(1), Regs, L, SE, DT);
+  }
+  ++NumRegs;
+
+  // Rough heuristic; favor registers which don't require extra setup
+  // instructions in the preheader.
+  if (!isa<SCEVUnknown>(Reg) &&
+      !isa<SCEVConstant>(Reg) &&
+      !(isa<SCEVAddRecExpr>(Reg) &&
+        (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) ||
+         isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
+    ++SetupCost;
+
+    NumIVMuls += isa<SCEVMulExpr>(Reg) &&
+                 SE.hasComputableLoopEvolution(Reg, L);
+}
+
+/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
+/// before, rate it.
+void Cost::RatePrimaryRegister(const SCEV *Reg,
+                               SmallPtrSet<const SCEV *, 16> &Regs,
+                               const Loop *L,
+                               ScalarEvolution &SE, DominatorTree &DT) {
+  if (Regs.insert(Reg))
+    RateRegister(Reg, Regs, L, SE, DT);
+}
+
+void Cost::RateFormula(const Formula &F,
+                       SmallPtrSet<const SCEV *, 16> &Regs,
+                       const DenseSet<const SCEV *> &VisitedRegs,
+                       const Loop *L,
+                       const SmallVectorImpl<int64_t> &Offsets,
+                       ScalarEvolution &SE, DominatorTree &DT) {
+  // Tally up the registers.
+  if (const SCEV *ScaledReg = F.ScaledReg) {
+    if (VisitedRegs.count(ScaledReg)) {
+      Loose();
+      return;
+    }
+    RatePrimaryRegister(ScaledReg, Regs, L, SE, DT);
+  }
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+       E = F.BaseRegs.end(); I != E; ++I) {
+    const SCEV *BaseReg = *I;
+    if (VisitedRegs.count(BaseReg)) {
+      Loose();
+      return;
+    }
+    RatePrimaryRegister(BaseReg, Regs, L, SE, DT);
+  }
+
+  if (F.BaseRegs.size() > 1)
+    NumBaseAdds += F.BaseRegs.size() - 1;
+
+  // Tally up the non-zero immediates.
+  for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
+       E = Offsets.end(); I != E; ++I) {
+    int64_t Offset = (uint64_t)*I + F.AM.BaseOffs;
+    if (F.AM.BaseGV)
+      ImmCost += 64; // Handle symbolic values conservatively.
+                     // TODO: This should probably be the pointer size.
+    else if (Offset != 0)
+      ImmCost += APInt(64, Offset, true).getMinSignedBits();
+  }
+}
+
+/// Loose - Set this cost to a loosing value.
+void Cost::Loose() {
+  NumRegs = ~0u;
+  AddRecCost = ~0u;
+  NumIVMuls = ~0u;
+  NumBaseAdds = ~0u;
+  ImmCost = ~0u;
+  SetupCost = ~0u;
+}
+
+/// operator< - Choose the lower cost.
+bool Cost::operator<(const Cost &Other) const {
+  if (NumRegs != Other.NumRegs)
+    return NumRegs < Other.NumRegs;
+  if (AddRecCost != Other.AddRecCost)
+    return AddRecCost < Other.AddRecCost;
+  if (NumIVMuls != Other.NumIVMuls)
+    return NumIVMuls < Other.NumIVMuls;
+  if (NumBaseAdds != Other.NumBaseAdds)
+    return NumBaseAdds < Other.NumBaseAdds;
+  if (ImmCost != Other.ImmCost)
+    return ImmCost < Other.ImmCost;
+  if (SetupCost != Other.SetupCost)
+    return SetupCost < Other.SetupCost;
+  return false;
+}
+
+void Cost::print(raw_ostream &OS) const {
+  OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
+  if (AddRecCost != 0)
+    OS << ", with addrec cost " << AddRecCost;
+  if (NumIVMuls != 0)
+    OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
+  if (NumBaseAdds != 0)
+    OS << ", plus " << NumBaseAdds << " base add"
+       << (NumBaseAdds == 1 ? "" : "s");
+  if (ImmCost != 0)
+    OS << ", plus " << ImmCost << " imm cost";
+  if (SetupCost != 0)
+    OS << ", plus " << SetupCost << " setup cost";
+}
+
+void Cost::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+namespace {
+
+/// LSRFixup - An operand value in an instruction which is to be replaced
+/// with some equivalent, possibly strength-reduced, replacement.
+struct LSRFixup {
+  /// UserInst - The instruction which will be updated.
+  Instruction *UserInst;
+
+  /// OperandValToReplace - The operand of the instruction which will
+  /// be replaced. The operand may be used more than once; every instance
+  /// will be replaced.
+  Value *OperandValToReplace;
+
+  /// PostIncLoops - If this user is to use the post-incremented value of an
+  /// induction variable, this variable is non-null and holds the loop
+  /// associated with the induction variable.
+  PostIncLoopSet PostIncLoops;
+
+  /// LUIdx - The index of the LSRUse describing the expression which
+  /// this fixup needs, minus an offset (below).
+  size_t LUIdx;
+
+  /// Offset - A constant offset to be added to the LSRUse expression.
+  /// This allows multiple fixups to share the same LSRUse with different
+  /// offsets, for example in an unrolled loop.
+  int64_t Offset;
+
+  bool isUseFullyOutsideLoop(const Loop *L) const;
+
+  LSRFixup();
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+}
+
+LSRFixup::LSRFixup()
+  : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {}
+
+/// isUseFullyOutsideLoop - Test whether this fixup always uses its
+/// value outside of the given loop.
+bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
+  // PHI nodes use their value in their incoming blocks.
+  if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingValue(i) == OperandValToReplace &&
+          L->contains(PN->getIncomingBlock(i)))
+        return false;
+    return true;
+  }
+
+  return !L->contains(UserInst);
+}
+
+void LSRFixup::print(raw_ostream &OS) const {
+  OS << "UserInst=";
+  // Store is common and interesting enough to be worth special-casing.
+  if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
+    OS << "store ";
+    WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false);
+  } else if (UserInst->getType()->isVoidTy())
+    OS << UserInst->getOpcodeName();
+  else
+    WriteAsOperand(OS, UserInst, /*PrintType=*/false);
+
+  OS << ", OperandValToReplace=";
+  WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false);
+
+  for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(),
+       E = PostIncLoops.end(); I != E; ++I) {
+    OS << ", PostIncLoop=";
+    WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false);
+  }
+
+  if (LUIdx != ~size_t(0))
+    OS << ", LUIdx=" << LUIdx;
+
+  if (Offset != 0)
+    OS << ", Offset=" << Offset;
+}
+
+void LSRFixup::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+namespace {
+
+/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*.
+struct UniquifierDenseMapInfo {
+  static SmallVector<const SCEV *, 2> getEmptyKey() {
+    SmallVector<const SCEV *, 2> V;
+    V.push_back(reinterpret_cast<const SCEV *>(-1));
+    return V;
+  }
+
+  static SmallVector<const SCEV *, 2> getTombstoneKey() {
+    SmallVector<const SCEV *, 2> V;
+    V.push_back(reinterpret_cast<const SCEV *>(-2));
+    return V;
+  }
+
+  static unsigned getHashValue(const SmallVector<const SCEV *, 2> &V) {
+    unsigned Result = 0;
+    for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(),
+         E = V.end(); I != E; ++I)
+      Result ^= DenseMapInfo<const SCEV *>::getHashValue(*I);
+    return Result;
+  }
+
+  static bool isEqual(const SmallVector<const SCEV *, 2> &LHS,
+                      const SmallVector<const SCEV *, 2> &RHS) {
+    return LHS == RHS;
+  }
+};
+
+/// LSRUse - This class holds the state that LSR keeps for each use in
+/// IVUsers, as well as uses invented by LSR itself. It includes information
+/// about what kinds of things can be folded into the user, information about
+/// the user itself, and information about how the use may be satisfied.
+/// TODO: Represent multiple users of the same expression in common?
+class LSRUse {
+  DenseSet<SmallVector<const SCEV *, 2>, UniquifierDenseMapInfo> Uniquifier;
+
+public:
+  /// KindType - An enum for a kind of use, indicating what types of
+  /// scaled and immediate operands it might support.
+  enum KindType {
+    Basic,   ///< A normal use, with no folding.
+    Special, ///< A special case of basic, allowing -1 scales.
+    Address, ///< An address use; folding according to TargetLowering
+    ICmpZero ///< An equality icmp with both operands folded into one.
+    // TODO: Add a generic icmp too?
+  };
+
+  KindType Kind;
+  const Type *AccessTy;
+
+  SmallVector<int64_t, 8> Offsets;
+  int64_t MinOffset;
+  int64_t MaxOffset;
+
+  /// AllFixupsOutsideLoop - This records whether all of the fixups using this
+  /// LSRUse are outside of the loop, in which case some special-case heuristics
+  /// may be used.
+  bool AllFixupsOutsideLoop;
+
+  /// WidestFixupType - This records the widest use type for any fixup using
+  /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different
+  /// max fixup widths to be equivalent, because the narrower one may be relying
+  /// on the implicit truncation to truncate away bogus bits.
+  const Type *WidestFixupType;
+
+  /// Formulae - A list of ways to build a value that can satisfy this user.
+  /// After the list is populated, one of these is selected heuristically and
+  /// used to formulate a replacement for OperandValToReplace in UserInst.
+  SmallVector<Formula, 12> Formulae;
+
+  /// Regs - The set of register candidates used by all formulae in this LSRUse.
+  SmallPtrSet<const SCEV *, 4> Regs;
+
+  LSRUse(KindType K, const Type *T) : Kind(K), AccessTy(T),
+                                      MinOffset(INT64_MAX),
+                                      MaxOffset(INT64_MIN),
+                                      AllFixupsOutsideLoop(true),
+                                      WidestFixupType(0) {}
+
+  bool HasFormulaWithSameRegs(const Formula &F) const;
+  bool InsertFormula(const Formula &F);
+  void DeleteFormula(Formula &F);
+  void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+}
+
+/// HasFormula - Test whether this use as a formula which has the same
+/// registers as the given formula.
+bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
+  SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+  if (F.ScaledReg) Key.push_back(F.ScaledReg);
+  // Unstable sort by host order ok, because this is only used for uniquifying.
+  std::sort(Key.begin(), Key.end());
+  return Uniquifier.count(Key);
+}
+
+/// InsertFormula - If the given formula has not yet been inserted, add it to
+/// the list, and return true. Return false otherwise.
+bool LSRUse::InsertFormula(const Formula &F) {
+  SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+  if (F.ScaledReg) Key.push_back(F.ScaledReg);
+  // Unstable sort by host order ok, because this is only used for uniquifying.
+  std::sort(Key.begin(), Key.end());
+
+  if (!Uniquifier.insert(Key).second)
+    return false;
+
+  // Using a register to hold the value of 0 is not profitable.
+  assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
+         "Zero allocated in a scaled register!");
+#ifndef NDEBUG
+  for (SmallVectorImpl<const SCEV *>::const_iterator I =
+       F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I)
+    assert(!(*I)->isZero() && "Zero allocated in a base register!");
+#endif
+
+  // Add the formula to the list.
+  Formulae.push_back(F);
+
+  // Record registers now being used by this use.
+  if (F.ScaledReg) Regs.insert(F.ScaledReg);
+  Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+
+  return true;
+}
+
+/// DeleteFormula - Remove the given formula from this use's list.
+void LSRUse::DeleteFormula(Formula &F) {
+  if (&F != &Formulae.back())
+    std::swap(F, Formulae.back());
+  Formulae.pop_back();
+  assert(!Formulae.empty() && "LSRUse has no formulae left!");
+}
+
+/// RecomputeRegs - Recompute the Regs field, and update RegUses.
+void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
+  // Now that we've filtered out some formulae, recompute the Regs set.
+  SmallPtrSet<const SCEV *, 4> OldRegs = Regs;
+  Regs.clear();
+  for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(),
+       E = Formulae.end(); I != E; ++I) {
+    const Formula &F = *I;
+    if (F.ScaledReg) Regs.insert(F.ScaledReg);
+    Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+  }
+
+  // Update the RegTracker.
+  for (SmallPtrSet<const SCEV *, 4>::iterator I = OldRegs.begin(),
+       E = OldRegs.end(); I != E; ++I)
+    if (!Regs.count(*I))
+      RegUses.DropRegister(*I, LUIdx);
+}
+
+void LSRUse::print(raw_ostream &OS) const {
+  OS << "LSR Use: Kind=";
+  switch (Kind) {
+  case Basic:    OS << "Basic"; break;
+  case Special:  OS << "Special"; break;
+  case ICmpZero: OS << "ICmpZero"; break;
+  case Address:
+    OS << "Address of ";
+    if (AccessTy->isPointerTy())
+      OS << "pointer"; // the full pointer type could be really verbose
+    else
+      OS << *AccessTy;
+  }
+
+  OS << ", Offsets={";
+  for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
+       E = Offsets.end(); I != E; ++I) {
+    OS << *I;
+    if (llvm::next(I) != E)
+      OS << ',';
+  }
+  OS << '}';
+
+  if (AllFixupsOutsideLoop)
+    OS << ", all-fixups-outside-loop";
+
+  if (WidestFixupType)
+    OS << ", widest fixup type: " << *WidestFixupType;
+}
+
+void LSRUse::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
+/// be completely folded into the user instruction at isel time. This includes
+/// address-mode folding and special icmp tricks.
+static bool isLegalUse(const TargetLowering::AddrMode &AM,
+                       LSRUse::KindType Kind, const Type *AccessTy,
+                       const TargetLowering *TLI) {
+  switch (Kind) {
+  case LSRUse::Address:
+    // If we have low-level target information, ask the target if it can
+    // completely fold this address.
+    if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
+
+    // Otherwise, just guess that reg+reg addressing is legal.
+    return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
+
+  case LSRUse::ICmpZero:
+    // There's not even a target hook for querying whether it would be legal to
+    // fold a GV into an ICmp.
+    if (AM.BaseGV)
+      return false;
+
+    // ICmp only has two operands; don't allow more than two non-trivial parts.
+    if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
+      return false;
+
+    // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
+    // putting the scaled register in the other operand of the icmp.
+    if (AM.Scale != 0 && AM.Scale != -1)
+      return false;
+
+    // If we have low-level target information, ask the target if it can fold an
+    // integer immediate on an icmp.
+    if (AM.BaseOffs != 0) {
+      if (TLI) return TLI->isLegalICmpImmediate(-AM.BaseOffs);
+      return false;
+    }
+
+    return true;
+
+  case LSRUse::Basic:
+    // Only handle single-register values.
+    return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
+
+  case LSRUse::Special:
+    // Only handle -1 scales, or no scale.
+    return AM.Scale == 0 || AM.Scale == -1;
+  }
+
+  return false;
+}
+
+static bool isLegalUse(TargetLowering::AddrMode AM,
+                       int64_t MinOffset, int64_t MaxOffset,
+                       LSRUse::KindType Kind, const Type *AccessTy,
+                       const TargetLowering *TLI) {
+  // Check for overflow.
+  if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
+      (MinOffset > 0))
+    return false;
+  AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
+  if (isLegalUse(AM, Kind, AccessTy, TLI)) {
+    AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
+    // Check for overflow.
+    if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
+        (MaxOffset > 0))
+      return false;
+    AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
+    return isLegalUse(AM, Kind, AccessTy, TLI);
+  }
+  return false;
+}
+
+static bool isAlwaysFoldable(int64_t BaseOffs,
+                             GlobalValue *BaseGV,
+                             bool HasBaseReg,
+                             LSRUse::KindType Kind, const Type *AccessTy,
+                             const TargetLowering *TLI) {
+  // Fast-path: zero is always foldable.
+  if (BaseOffs == 0 && !BaseGV) return true;
+
+  // Conservatively, create an address with an immediate and a
+  // base and a scale.
+  TargetLowering::AddrMode AM;
+  AM.BaseOffs = BaseOffs;
+  AM.BaseGV = BaseGV;
+  AM.HasBaseReg = HasBaseReg;
+  AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+
+  // Canonicalize a scale of 1 to a base register if the formula doesn't
+  // already have a base register.
+  if (!AM.HasBaseReg && AM.Scale == 1) {
+    AM.Scale = 0;
+    AM.HasBaseReg = true;
+  }
+
+  return isLegalUse(AM, Kind, AccessTy, TLI);
+}
+
+static bool isAlwaysFoldable(const SCEV *S,
+                             int64_t MinOffset, int64_t MaxOffset,
+                             bool HasBaseReg,
+                             LSRUse::KindType Kind, const Type *AccessTy,
+                             const TargetLowering *TLI,
+                             ScalarEvolution &SE) {
+  // Fast-path: zero is always foldable.
+  if (S->isZero()) return true;
+
+  // Conservatively, create an address with an immediate and a
+  // base and a scale.
+  int64_t BaseOffs = ExtractImmediate(S, SE);
+  GlobalValue *BaseGV = ExtractSymbol(S, SE);
+
+  // If there's anything else involved, it's not foldable.
+  if (!S->isZero()) return false;
+
+  // Fast-path: zero is always foldable.
+  if (BaseOffs == 0 && !BaseGV) return true;
+
+  // Conservatively, create an address with an immediate and a
+  // base and a scale.
+  TargetLowering::AddrMode AM;
+  AM.BaseOffs = BaseOffs;
+  AM.BaseGV = BaseGV;
+  AM.HasBaseReg = HasBaseReg;
+  AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+
+  return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
+}
+
+namespace {
+
+/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind.
+struct UseMapDenseMapInfo {
+  static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() {
+    return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic);
+  }
+
+  static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() {
+    return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic);
+  }
+
+  static unsigned
+  getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) {
+    unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first);
+    Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second));
+    return Result;
+  }
+
+  static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS,
+                      const std::pair<const SCEV *, LSRUse::KindType> &RHS) {
+    return LHS == RHS;
+  }
+};
+
+/// LSRInstance - This class holds state for the main loop strength reduction
+/// logic.
+class LSRInstance {
+  IVUsers &IU;
+  ScalarEvolution &SE;
+  DominatorTree &DT;
+  LoopInfo &LI;
+  const TargetLowering *const TLI;
+  Loop *const L;
+  bool Changed;
+
+  /// IVIncInsertPos - This is the insert position that the current loop's
+  /// induction variable increment should be placed. In simple loops, this is
+  /// the latch block's terminator. But in more complicated cases, this is a
+  /// position which will dominate all the in-loop post-increment users.
+  Instruction *IVIncInsertPos;
+
+  /// Factors - Interesting factors between use strides.
+  SmallSetVector<int64_t, 8> Factors;
+
+  /// Types - Interesting use types, to facilitate truncation reuse.
+  SmallSetVector<const Type *, 4> Types;
+
+  /// Fixups - The list of operands which are to be replaced.
+  SmallVector<LSRFixup, 16> Fixups;
+
+  /// Uses - The list of interesting uses.
+  SmallVector<LSRUse, 16> Uses;
+
+  /// RegUses - Track which uses use which register candidates.
+  RegUseTracker RegUses;
+
+  void OptimizeShadowIV();
+  bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
+  ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
+  void OptimizeLoopTermCond();
+
+  void CollectInterestingTypesAndFactors();
+  void CollectFixupsAndInitialFormulae();
+
+  LSRFixup &getNewFixup() {
+    Fixups.push_back(LSRFixup());
+    return Fixups.back();
+  }
+
+  // Support for sharing of LSRUses between LSRFixups.
+  typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>,
+                   size_t,
+                   UseMapDenseMapInfo> UseMapTy;
+  UseMapTy UseMap;
+
+  bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
+                          LSRUse::KindType Kind, const Type *AccessTy);
+
+  std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
+                                    LSRUse::KindType Kind,
+                                    const Type *AccessTy);
+
+  void DeleteUse(LSRUse &LU, size_t LUIdx);
+
+  LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
+
+public:
+  void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
+  void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
+  void CountRegisters(const Formula &F, size_t LUIdx);
+  bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
+
+  void CollectLoopInvariantFixupsAndFormulae();
+
+  void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
+                              unsigned Depth = 0);
+  void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
+  void GenerateCrossUseConstantOffsets();
+  void GenerateAllReuseFormulae();
+
+  void FilterOutUndesirableDedicatedRegisters();
+
+  size_t EstimateSearchSpaceComplexity() const;
+  void NarrowSearchSpaceByDetectingSupersets();
+  void NarrowSearchSpaceByCollapsingUnrolledCode();
+  void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+  void NarrowSearchSpaceByPickingWinnerRegs();
+  void NarrowSearchSpaceUsingHeuristics();
+
+  void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
+                    Cost &SolutionCost,
+                    SmallVectorImpl<const Formula *> &Workspace,
+                    const Cost &CurCost,
+                    const SmallPtrSet<const SCEV *, 16> &CurRegs,
+                    DenseSet<const SCEV *> &VisitedRegs) const;
+  void Solve(SmallVectorImpl<const Formula *> &Solution) const;
+
+  BasicBlock::iterator
+    HoistInsertPosition(BasicBlock::iterator IP,
+                        const SmallVectorImpl<Instruction *> &Inputs) const;
+  BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+                                                     const LSRFixup &LF,
+                                                     const LSRUse &LU) const;
+
+  Value *Expand(const LSRFixup &LF,
+                const Formula &F,
+                BasicBlock::iterator IP,
+                SCEVExpander &Rewriter,
+                SmallVectorImpl<WeakVH> &DeadInsts) const;
+  void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
+                     const Formula &F,
+                     SCEVExpander &Rewriter,
+                     SmallVectorImpl<WeakVH> &DeadInsts,
+                     Pass *P) const;
+  void Rewrite(const LSRFixup &LF,
+               const Formula &F,
+               SCEVExpander &Rewriter,
+               SmallVectorImpl<WeakVH> &DeadInsts,
+               Pass *P) const;
+  void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
+                         Pass *P);
+
+  LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
+
+  bool getChanged() const { return Changed; }
+
+  void print_factors_and_types(raw_ostream &OS) const;
+  void print_fixups(raw_ostream &OS) const;
+  void print_uses(raw_ostream &OS) const;
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+}
+
+/// OptimizeShadowIV - If IV is used in a int-to-float cast
+/// inside the loop then try to eliminate the cast operation.
+void LSRInstance::OptimizeShadowIV() {
+  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+    return;
+
+  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
+       UI != E; /* empty */) {
+    IVUsers::const_iterator CandidateUI = UI;
+    ++UI;
+    Instruction *ShadowUse = CandidateUI->getUser();
+    const Type *DestTy = NULL;
+
+    /* If shadow use is a int->float cast then insert a second IV
+       to eliminate this cast.
+
+         for (unsigned i = 0; i < n; ++i)
+           foo((double)i);
+
+       is transformed into
+
+         double d = 0.0;
+         for (unsigned i = 0; i < n; ++i, ++d)
+           foo(d);
+    */
+    if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
+      DestTy = UCast->getDestTy();
+    else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser()))
+      DestTy = SCast->getDestTy();
+    if (!DestTy) continue;
+
+    if (TLI) {
+      // If target does not support DestTy natively then do not apply
+      // this transformation.
+      EVT DVT = TLI->getValueType(DestTy);
+      if (!TLI->isTypeLegal(DVT)) continue;
+    }
+
+    PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
+    if (!PH) continue;
+    if (PH->getNumIncomingValues() != 2) continue;
+
+    const Type *SrcTy = PH->getType();
+    int Mantissa = DestTy->getFPMantissaWidth();
+    if (Mantissa == -1) continue;
+    if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
+      continue;
+
+    unsigned Entry, Latch;
+    if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
+      Entry = 0;
+      Latch = 1;
+    } else {
+      Entry = 1;
+      Latch = 0;
+    }
+
+    ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
+    if (!Init) continue;
+    Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+
+    BinaryOperator *Incr =
+      dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
+    if (!Incr) continue;
+    if (Incr->getOpcode() != Instruction::Add
+        && Incr->getOpcode() != Instruction::Sub)
+      continue;
+
+    /* Initialize new IV, double d = 0.0 in above example. */
+    ConstantInt *C = NULL;
+    if (Incr->getOperand(0) == PH)
+      C = dyn_cast<ConstantInt>(Incr->getOperand(1));
+    else if (Incr->getOperand(1) == PH)
+      C = dyn_cast<ConstantInt>(Incr->getOperand(0));
+    else
+      continue;
+
+    if (!C) continue;
+
+    // Ignore negative constants, as the code below doesn't handle them
+    // correctly. TODO: Remove this restriction.
+    if (!C->getValue().isStrictlyPositive()) continue;
+
+    /* Add new PHINode. */
+    PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
+
+    /* create new increment. '++d' in above example. */
+    Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
+    BinaryOperator *NewIncr =
+      BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
+                               Instruction::FAdd : Instruction::FSub,
+                             NewPH, CFP, "IV.S.next.", Incr);
+
+    NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
+    NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
+
+    /* Remove cast operation */
+    ShadowUse->replaceAllUsesWith(NewPH);
+    ShadowUse->eraseFromParent();
+    Changed = true;
+    break;
+  }
+}
+
+/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
+/// set the IV user and stride information and return true, otherwise return
+/// false.
+bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
+  for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
+    if (UI->getUser() == Cond) {
+      // NOTE: we could handle setcc instructions with multiple uses here, but
+      // InstCombine does it as well for simple uses, it's not clear that it
+      // occurs enough in real life to handle.
+      CondUse = UI;
+      return true;
+    }
+  return false;
+}
+
+/// OptimizeMax - Rewrite the loop's terminating condition if it uses
+/// a max computation.
+///
+/// This is a narrow solution to a specific, but acute, problem. For loops
+/// like this:
+///
+///   i = 0;
+///   do {
+///     p[i] = 0.0;
+///   } while (++i < n);
+///
+/// the trip count isn't just 'n', because 'n' might not be positive. And
+/// unfortunately this can come up even for loops where the user didn't use
+/// a C do-while loop. For example, seemingly well-behaved top-test loops
+/// will commonly be lowered like this:
+//
+///   if (n > 0) {
+///     i = 0;
+///     do {
+///       p[i] = 0.0;
+///     } while (++i < n);
+///   }
+///
+/// and then it's possible for subsequent optimization to obscure the if
+/// test in such a way that indvars can't find it.
+///
+/// When indvars can't find the if test in loops like this, it creates a
+/// max expression, which allows it to give the loop a canonical
+/// induction variable:
+///
+///   i = 0;
+///   max = n < 1 ? 1 : n;
+///   do {
+///     p[i] = 0.0;
+///   } while (++i != max);
+///
+/// Canonical induction variables are necessary because the loop passes
+/// are designed around them. The most obvious example of this is the
+/// LoopInfo analysis, which doesn't remember trip count values. It
+/// expects to be able to rediscover the trip count each time it is
+/// needed, and it does this using a simple analysis that only succeeds if
+/// the loop has a canonical induction variable.
+///
+/// However, when it comes time to generate code, the maximum operation
+/// can be quite costly, especially if it's inside of an outer loop.
+///
+/// This function solves this problem by detecting this type of loop and
+/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
+/// the instructions for the maximum computation.
+///
+ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
+  // Check that the loop matches the pattern we're looking for.
+  if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
+      Cond->getPredicate() != CmpInst::ICMP_NE)
+    return Cond;
+
+  SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
+  if (!Sel || !Sel->hasOneUse()) return Cond;
+
+  const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+    return Cond;
+  const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
+
+  // Add one to the backedge-taken count to get the trip count.
+  const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
+  if (IterationCount != SE.getSCEV(Sel)) return Cond;
+
+  // Check for a max calculation that matches the pattern. There's no check
+  // for ICMP_ULE here because the comparison would be with zero, which
+  // isn't interesting.
+  CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
+  const SCEVNAryExpr *Max = 0;
+  if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
+    Pred = ICmpInst::ICMP_SLE;
+    Max = S;
+  } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
+    Pred = ICmpInst::ICMP_SLT;
+    Max = S;
+  } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
+    Pred = ICmpInst::ICMP_ULT;
+    Max = U;
+  } else {
+    // No match; bail.
+    return Cond;
+  }
+
+  // To handle a max with more than two operands, this optimization would
+  // require additional checking and setup.
+  if (Max->getNumOperands() != 2)
+    return Cond;
+
+  const SCEV *MaxLHS = Max->getOperand(0);
+  const SCEV *MaxRHS = Max->getOperand(1);
+
+  // ScalarEvolution canonicalizes constants to the left. For < and >, look
+  // for a comparison with 1. For <= and >=, a comparison with zero.
+  if (!MaxLHS ||
+      (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
+    return Cond;
+
+  // Check the relevant induction variable for conformance to
+  // the pattern.
+  const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+  if (!AR || !AR->isAffine() ||
+      AR->getStart() != One ||
+      AR->getStepRecurrence(SE) != One)
+    return Cond;
+
+  assert(AR->getLoop() == L &&
+         "Loop condition operand is an addrec in a different loop!");
+
+  // Check the right operand of the select, and remember it, as it will
+  // be used in the new comparison instruction.
+  Value *NewRHS = 0;
+  if (ICmpInst::isTrueWhenEqual(Pred)) {
+    // Look for n+1, and grab n.
+    if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
+      if (isa<ConstantInt>(BO->getOperand(1)) &&
+          cast<ConstantInt>(BO->getOperand(1))->isOne() &&
+          SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+        NewRHS = BO->getOperand(0);
+    if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
+      if (isa<ConstantInt>(BO->getOperand(1)) &&
+          cast<ConstantInt>(BO->getOperand(1))->isOne() &&
+          SE.getSCEV(BO->getOperand(0)) == MaxRHS)
+        NewRHS = BO->getOperand(0);
+    if (!NewRHS)
+      return Cond;
+  } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
+    NewRHS = Sel->getOperand(1);
+  else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
+    NewRHS = Sel->getOperand(2);
+  else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
+    NewRHS = SU->getValue();
+  else
+    // Max doesn't match expected pattern.
+    return Cond;
+
+  // Determine the new comparison opcode. It may be signed or unsigned,
+  // and the original comparison may be either equality or inequality.
+  if (Cond->getPredicate() == CmpInst::ICMP_EQ)
+    Pred = CmpInst::getInversePredicate(Pred);
+
+  // Ok, everything looks ok to change the condition into an SLT or SGE and
+  // delete the max calculation.
+  ICmpInst *NewCond =
+    new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp");
+
+  // Delete the max calculation instructions.
+  Cond->replaceAllUsesWith(NewCond);
+  CondUse->setUser(NewCond);
+  Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
+  Cond->eraseFromParent();
+  Sel->eraseFromParent();
+  if (Cmp->use_empty())
+    Cmp->eraseFromParent();
+  return NewCond;
+}
+
+/// OptimizeLoopTermCond - Change loop terminating condition to use the
+/// postinc iv when possible.
+void
+LSRInstance::OptimizeLoopTermCond() {
+  SmallPtrSet<Instruction *, 4> PostIncs;
+
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  SmallVector<BasicBlock*, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+
+  for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitingBlock = ExitingBlocks[i];
+
+    // Get the terminating condition for the loop if possible.  If we
+    // can, we want to change it to use a post-incremented version of its
+    // induction variable, to allow coalescing the live ranges for the IV into
+    // one register value.
+
+    BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+    if (!TermBr)
+      continue;
+    // FIXME: Overly conservative, termination condition could be an 'or' etc..
+    if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
+      continue;
+
+    // Search IVUsesByStride to find Cond's IVUse if there is one.
+    IVStrideUse *CondUse = 0;
+    ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+    if (!FindIVUserForCond(Cond, CondUse))
+      continue;
+
+    // If the trip count is computed in terms of a max (due to ScalarEvolution
+    // being unable to find a sufficient guard, for example), change the loop
+    // comparison to use SLT or ULT instead of NE.
+    // One consequence of doing this now is that it disrupts the count-down
+    // optimization. That's not always a bad thing though, because in such
+    // cases it may still be worthwhile to avoid a max.
+    Cond = OptimizeMax(Cond, CondUse);
+
+    // If this exiting block dominates the latch block, it may also use
+    // the post-inc value if it won't be shared with other uses.
+    // Check for dominance.
+    if (!DT.dominates(ExitingBlock, LatchBlock))
+      continue;
+
+    // Conservatively avoid trying to use the post-inc value in non-latch
+    // exits if there may be pre-inc users in intervening blocks.
+    if (LatchBlock != ExitingBlock)
+      for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
+        // Test if the use is reachable from the exiting block. This dominator
+        // query is a conservative approximation of reachability.
+        if (&*UI != CondUse &&
+            !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
+          // Conservatively assume there may be reuse if the quotient of their
+          // strides could be a legal scale.
+          const SCEV *A = IU.getStride(*CondUse, L);
+          const SCEV *B = IU.getStride(*UI, L);
+          if (!A || !B) continue;
+          if (SE.getTypeSizeInBits(A->getType()) !=
+              SE.getTypeSizeInBits(B->getType())) {
+            if (SE.getTypeSizeInBits(A->getType()) >
+                SE.getTypeSizeInBits(B->getType()))
+              B = SE.getSignExtendExpr(B, A->getType());
+            else
+              A = SE.getSignExtendExpr(A, B->getType());
+          }
+          if (const SCEVConstant *D =
+                dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
+            const ConstantInt *C = D->getValue();
+            // Stride of one or negative one can have reuse with non-addresses.
+            if (C->isOne() || C->isAllOnesValue())
+              goto decline_post_inc;
+            // Avoid weird situations.
+            if (C->getValue().getMinSignedBits() >= 64 ||
+                C->getValue().isMinSignedValue())
+              goto decline_post_inc;
+            // Without TLI, assume that any stride might be valid, and so any
+            // use might be shared.
+            if (!TLI)
+              goto decline_post_inc;
+            // Check for possible scaled-address reuse.
+            const Type *AccessTy = getAccessType(UI->getUser());
+            TargetLowering::AddrMode AM;
+            AM.Scale = C->getSExtValue();
+            if (TLI->isLegalAddressingMode(AM, AccessTy))
+              goto decline_post_inc;
+            AM.Scale = -AM.Scale;
+            if (TLI->isLegalAddressingMode(AM, AccessTy))
+              goto decline_post_inc;
+          }
+        }
+
+    DEBUG(dbgs() << "  Change loop exiting icmp to use postinc iv: "
+                 << *Cond << '\n');
+
+    // It's possible for the setcc instruction to be anywhere in the loop, and
+    // possible for it to have multiple users.  If it is not immediately before
+    // the exiting block branch, move it.
+    if (&*++BasicBlock::iterator(Cond) != TermBr) {
+      if (Cond->hasOneUse()) {
+        Cond->moveBefore(TermBr);
+      } else {
+        // Clone the terminating condition and insert into the loopend.
+        ICmpInst *OldCond = Cond;
+        Cond = cast<ICmpInst>(Cond->clone());
+        Cond->setName(L->getHeader()->getName() + ".termcond");
+        ExitingBlock->getInstList().insert(TermBr, Cond);
+
+        // Clone the IVUse, as the old use still exists!
+        CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
+        TermBr->replaceUsesOfWith(OldCond, Cond);
+      }
+    }
+
+    // If we get to here, we know that we can transform the setcc instruction to
+    // use the post-incremented version of the IV, allowing us to coalesce the
+    // live ranges for the IV correctly.
+    CondUse->transformToPostInc(L);
+    Changed = true;
+
+    PostIncs.insert(Cond);
+  decline_post_inc:;
+  }
+
+  // Determine an insertion point for the loop induction variable increment. It
+  // must dominate all the post-inc comparisons we just set up, and it must
+  // dominate the loop latch edge.
+  IVIncInsertPos = L->getLoopLatch()->getTerminator();
+  for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(),
+       E = PostIncs.end(); I != E; ++I) {
+    BasicBlock *BB =
+      DT.findNearestCommonDominator(IVIncInsertPos->getParent(),
+                                    (*I)->getParent());
+    if (BB == (*I)->getParent())
+      IVIncInsertPos = *I;
+    else if (BB != IVIncInsertPos->getParent())
+      IVIncInsertPos = BB->getTerminator();
+  }
+}
+
+/// reconcileNewOffset - Determine if the given use can accomodate a fixup
+/// at the given offset and other details. If so, update the use and
+/// return true.
+bool
+LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
+                                LSRUse::KindType Kind, const Type *AccessTy) {
+  int64_t NewMinOffset = LU.MinOffset;
+  int64_t NewMaxOffset = LU.MaxOffset;
+  const Type *NewAccessTy = AccessTy;
+
+  // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
+  // something conservative, however this can pessimize in the case that one of
+  // the uses will have all its uses outside the loop, for example.
+  if (LU.Kind != Kind)
+    return false;
+  // Conservatively assume HasBaseReg is true for now.
+  if (NewOffset < LU.MinOffset) {
+    if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
+                          Kind, AccessTy, TLI))
+      return false;
+    NewMinOffset = NewOffset;
+  } else if (NewOffset > LU.MaxOffset) {
+    if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
+                          Kind, AccessTy, TLI))
+      return false;
+    NewMaxOffset = NewOffset;
+  }
+  // Check for a mismatched access type, and fall back conservatively as needed.
+  // TODO: Be less conservative when the type is similar and can use the same
+  // addressing modes.
+  if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
+    NewAccessTy = Type::getVoidTy(AccessTy->getContext());
+
+  // Update the use.
+  LU.MinOffset = NewMinOffset;
+  LU.MaxOffset = NewMaxOffset;
+  LU.AccessTy = NewAccessTy;
+  if (NewOffset != LU.Offsets.back())
+    LU.Offsets.push_back(NewOffset);
+  return true;
+}
+
+/// getUse - Return an LSRUse index and an offset value for a fixup which
+/// needs the given expression, with the given kind and optional access type.
+/// Either reuse an existing use or create a new one, as needed.
+std::pair<size_t, int64_t>
+LSRInstance::getUse(const SCEV *&Expr,
+                    LSRUse::KindType Kind, const Type *AccessTy) {
+  const SCEV *Copy = Expr;
+  int64_t Offset = ExtractImmediate(Expr, SE);
+
+  // Basic uses can't accept any offset, for example.
+  if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
+    Expr = Copy;
+    Offset = 0;
+  }
+
+  std::pair<UseMapTy::iterator, bool> P =
+    UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0));
+  if (!P.second) {
+    // A use already existed with this base.
+    size_t LUIdx = P.first->second;
+    LSRUse &LU = Uses[LUIdx];
+    if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
+      // Reuse this use.
+      return std::make_pair(LUIdx, Offset);
+  }
+
+  // Create a new use.
+  size_t LUIdx = Uses.size();
+  P.first->second = LUIdx;
+  Uses.push_back(LSRUse(Kind, AccessTy));
+  LSRUse &LU = Uses[LUIdx];
+
+  // We don't need to track redundant offsets, but we don't need to go out
+  // of our way here to avoid them.
+  if (LU.Offsets.empty() || Offset != LU.Offsets.back())
+    LU.Offsets.push_back(Offset);
+
+  LU.MinOffset = Offset;
+  LU.MaxOffset = Offset;
+  return std::make_pair(LUIdx, Offset);
+}
+
+/// DeleteUse - Delete the given use from the Uses list.
+void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
+  if (&LU != &Uses.back())
+    std::swap(LU, Uses.back());
+  Uses.pop_back();
+
+  // Update RegUses.
+  RegUses.SwapAndDropUse(LUIdx, Uses.size());
+}
+
+/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
+/// a formula that has the same registers as the given formula.
+LSRUse *
+LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
+                                       const LSRUse &OrigLU) {
+  // Search all uses for the formula. This could be more clever.
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    // Check whether this use is close enough to OrigLU, to see whether it's
+    // worthwhile looking through its formulae.
+    // Ignore ICmpZero uses because they may contain formulae generated by
+    // GenerateICmpZeroScales, in which case adding fixup offsets may
+    // be invalid.
+    if (&LU != &OrigLU &&
+        LU.Kind != LSRUse::ICmpZero &&
+        LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
+        LU.WidestFixupType == OrigLU.WidestFixupType &&
+        LU.HasFormulaWithSameRegs(OrigF)) {
+      // Scan through this use's formulae.
+      for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+           E = LU.Formulae.end(); I != E; ++I) {
+        const Formula &F = *I;
+        // Check to see if this formula has the same registers and symbols
+        // as OrigF.
+        if (F.BaseRegs == OrigF.BaseRegs &&
+            F.ScaledReg == OrigF.ScaledReg &&
+            F.AM.BaseGV == OrigF.AM.BaseGV &&
+            F.AM.Scale == OrigF.AM.Scale) {
+          if (F.AM.BaseOffs == 0)
+            return &LU;
+          // This is the formula where all the registers and symbols matched;
+          // there aren't going to be any others. Since we declined it, we
+          // can skip the rest of the formulae and procede to the next LSRUse.
+          break;
+        }
+      }
+    }
+  }
+
+  // Nothing looked good.
+  return 0;
+}
+
+void LSRInstance::CollectInterestingTypesAndFactors() {
+  SmallSetVector<const SCEV *, 4> Strides;
+
+  // Collect interesting types and strides.
+  SmallVector<const SCEV *, 4> Worklist;
+  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+    const SCEV *Expr = IU.getExpr(*UI);
+
+    // Collect interesting types.
+    Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
+
+    // Add strides for mentioned loops.
+    Worklist.push_back(Expr);
+    do {
+      const SCEV *S = Worklist.pop_back_val();
+      if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+        Strides.insert(AR->getStepRecurrence(SE));
+        Worklist.push_back(AR->getStart());
+      } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+        Worklist.append(Add->op_begin(), Add->op_end());
+      }
+    } while (!Worklist.empty());
+  }
+
+  // Compute interesting factors from the set of interesting strides.
+  for (SmallSetVector<const SCEV *, 4>::const_iterator
+       I = Strides.begin(), E = Strides.end(); I != E; ++I)
+    for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
+         llvm::next(I); NewStrideIter != E; ++NewStrideIter) {
+      const SCEV *OldStride = *I;
+      const SCEV *NewStride = *NewStrideIter;
+
+      if (SE.getTypeSizeInBits(OldStride->getType()) !=
+          SE.getTypeSizeInBits(NewStride->getType())) {
+        if (SE.getTypeSizeInBits(OldStride->getType()) >
+            SE.getTypeSizeInBits(NewStride->getType()))
+          NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
+        else
+          OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
+      }
+      if (const SCEVConstant *Factor =
+            dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
+                                                        SE, true))) {
+        if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
+          Factors.insert(Factor->getValue()->getValue().getSExtValue());
+      } else if (const SCEVConstant *Factor =
+                   dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
+                                                               NewStride,
+                                                               SE, true))) {
+        if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
+          Factors.insert(Factor->getValue()->getValue().getSExtValue());
+      }
+    }
+
+  // If all uses use the same type, don't bother looking for truncation-based
+  // reuse.
+  if (Types.size() == 1)
+    Types.clear();
+
+  DEBUG(print_factors_and_types(dbgs()));
+}
+
+void LSRInstance::CollectFixupsAndInitialFormulae() {
+  for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
+    // Record the uses.
+    LSRFixup &LF = getNewFixup();
+    LF.UserInst = UI->getUser();
+    LF.OperandValToReplace = UI->getOperandValToReplace();
+    LF.PostIncLoops = UI->getPostIncLoops();
+
+    LSRUse::KindType Kind = LSRUse::Basic;
+    const Type *AccessTy = 0;
+    if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
+      Kind = LSRUse::Address;
+      AccessTy = getAccessType(LF.UserInst);
+    }
+
+    const SCEV *S = IU.getExpr(*UI);
+
+    // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
+    // (N - i == 0), and this allows (N - i) to be the expression that we work
+    // with rather than just N or i, so we can consider the register
+    // requirements for both N and i at the same time. Limiting this code to
+    // equality icmps is not a problem because all interesting loops use
+    // equality icmps, thanks to IndVarSimplify.
+    if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
+      if (CI->isEquality()) {
+        // Swap the operands if needed to put the OperandValToReplace on the
+        // left, for consistency.
+        Value *NV = CI->getOperand(1);
+        if (NV == LF.OperandValToReplace) {
+          CI->setOperand(1, CI->getOperand(0));
+          CI->setOperand(0, NV);
+          NV = CI->getOperand(1);
+          Changed = true;
+        }
+
+        // x == y  -->  x - y == 0
+        const SCEV *N = SE.getSCEV(NV);
+        if (SE.isLoopInvariant(N, L)) {
+          Kind = LSRUse::ICmpZero;
+          S = SE.getMinusSCEV(N, S);
+        }
+
+        // -1 and the negations of all interesting strides (except the negation
+        // of -1) are now also interesting.
+        for (size_t i = 0, e = Factors.size(); i != e; ++i)
+          if (Factors[i] != -1)
+            Factors.insert(-(uint64_t)Factors[i]);
+        Factors.insert(-1);
+      }
+
+    // Set up the initial formula for this use.
+    std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
+    LF.LUIdx = P.first;
+    LF.Offset = P.second;
+    LSRUse &LU = Uses[LF.LUIdx];
+    LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+    if (!LU.WidestFixupType ||
+        SE.getTypeSizeInBits(LU.WidestFixupType) <
+        SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+      LU.WidestFixupType = LF.OperandValToReplace->getType();
+
+    // If this is the first use of this LSRUse, give it a formula.
+    if (LU.Formulae.empty()) {
+      InsertInitialFormula(S, LU, LF.LUIdx);
+      CountRegisters(LU.Formulae.back(), LF.LUIdx);
+    }
+  }
+
+  DEBUG(print_fixups(dbgs()));
+}
+
+/// InsertInitialFormula - Insert a formula for the given expression into
+/// the given use, separating out loop-variant portions from loop-invariant
+/// and loop-computable portions.
+void
+LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
+  Formula F;
+  F.InitialMatch(S, L, SE);
+  bool Inserted = InsertFormula(LU, LUIdx, F);
+  assert(Inserted && "Initial formula already exists!"); (void)Inserted;
+}
+
+/// InsertSupplementalFormula - Insert a simple single-register formula for
+/// the given expression into the given use.
+void
+LSRInstance::InsertSupplementalFormula(const SCEV *S,
+                                       LSRUse &LU, size_t LUIdx) {
+  Formula F;
+  F.BaseRegs.push_back(S);
+  F.AM.HasBaseReg = true;
+  bool Inserted = InsertFormula(LU, LUIdx, F);
+  assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
+}
+
+/// CountRegisters - Note which registers are used by the given formula,
+/// updating RegUses.
+void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
+  if (F.ScaledReg)
+    RegUses.CountRegister(F.ScaledReg, LUIdx);
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+       E = F.BaseRegs.end(); I != E; ++I)
+    RegUses.CountRegister(*I, LUIdx);
+}
+
+/// InsertFormula - If the given formula has not yet been inserted, add it to
+/// the list, and return true. Return false otherwise.
+bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
+  if (!LU.InsertFormula(F))
+    return false;
+
+  CountRegisters(F, LUIdx);
+  return true;
+}
+
+/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of
+/// loop-invariant values which we're tracking. These other uses will pin these
+/// values in registers, making them less profitable for elimination.
+/// TODO: This currently misses non-constant addrec step registers.
+/// TODO: Should this give more weight to users inside the loop?
+void
+LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
+  SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
+  SmallPtrSet<const SCEV *, 8> Inserted;
+
+  while (!Worklist.empty()) {
+    const SCEV *S = Worklist.pop_back_val();
+
+    if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
+      Worklist.append(N->op_begin(), N->op_end());
+    else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+      Worklist.push_back(C->getOperand());
+    else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
+      Worklist.push_back(D->getLHS());
+      Worklist.push_back(D->getRHS());
+    } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+      if (!Inserted.insert(U)) continue;
+      const Value *V = U->getValue();
+      if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+        // Look for instructions defined outside the loop.
+        if (L->contains(Inst)) continue;
+      } else if (isa<UndefValue>(V))
+        // Undef doesn't have a live range, so it doesn't matter.
+        continue;
+      for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+           UI != UE; ++UI) {
+        const Instruction *UserInst = dyn_cast<Instruction>(*UI);
+        // Ignore non-instructions.
+        if (!UserInst)
+          continue;
+        // Ignore instructions in other functions (as can happen with
+        // Constants).
+        if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
+          continue;
+        // Ignore instructions not dominated by the loop.
+        const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
+          UserInst->getParent() :
+          cast<PHINode>(UserInst)->getIncomingBlock(
+            PHINode::getIncomingValueNumForOperand(UI.getOperandNo()));
+        if (!DT.dominates(L->getHeader(), UseBB))
+          continue;
+        // Ignore uses which are part of other SCEV expressions, to avoid
+        // analyzing them multiple times.
+        if (SE.isSCEVable(UserInst->getType())) {
+          const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
+          // If the user is a no-op, look through to its uses.
+          if (!isa<SCEVUnknown>(UserS))
+            continue;
+          if (UserS == U) {
+            Worklist.push_back(
+              SE.getUnknown(const_cast<Instruction *>(UserInst)));
+            continue;
+          }
+        }
+        // Ignore icmp instructions which are already being analyzed.
+        if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
+          unsigned OtherIdx = !UI.getOperandNo();
+          Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
+          if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
+            continue;
+        }
+
+        LSRFixup &LF = getNewFixup();
+        LF.UserInst = const_cast<Instruction *>(UserInst);
+        LF.OperandValToReplace = UI.getUse();
+        std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0);
+        LF.LUIdx = P.first;
+        LF.Offset = P.second;
+        LSRUse &LU = Uses[LF.LUIdx];
+        LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+        if (!LU.WidestFixupType ||
+            SE.getTypeSizeInBits(LU.WidestFixupType) <
+            SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
+          LU.WidestFixupType = LF.OperandValToReplace->getType();
+        InsertSupplementalFormula(U, LU, LF.LUIdx);
+        CountRegisters(LU.Formulae.back(), Uses.size() - 1);
+        break;
+      }
+    }
+  }
+}
+
+/// CollectSubexprs - Split S into subexpressions which can be pulled out into
+/// separate registers. If C is non-null, multiply each subexpression by C.
+static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
+                            SmallVectorImpl<const SCEV *> &Ops,
+                            const Loop *L,
+                            ScalarEvolution &SE) {
+  if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
+    // Break out add operands.
+    for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+         I != E; ++I)
+      CollectSubexprs(*I, C, Ops, L, SE);
+    return;
+  } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
+    // Split a non-zero base out of an addrec.
+    if (!AR->getStart()->isZero()) {
+      CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
+                                       AR->getStepRecurrence(SE),
+                                       AR->getLoop()),
+                      C, Ops, L, SE);
+      CollectSubexprs(AR->getStart(), C, Ops, L, SE);
+      return;
+    }
+  } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
+    // Break (C * (a + b + c)) into C*a + C*b + C*c.
+    if (Mul->getNumOperands() == 2)
+      if (const SCEVConstant *Op0 =
+            dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
+        CollectSubexprs(Mul->getOperand(1),
+                        C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
+                        Ops, L, SE);
+        return;
+      }
+  }
+
+  // Otherwise use the value itself, optionally with a scale applied.
+  Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+}
+
+/// GenerateReassociations - Split out subexpressions from adds and the bases of
+/// addrecs.
+void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
+                                         Formula Base,
+                                         unsigned Depth) {
+  // Arbitrarily cap recursion to protect compile time.
+  if (Depth >= 3) return;
+
+  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+    const SCEV *BaseReg = Base.BaseRegs[i];
+
+    SmallVector<const SCEV *, 8> AddOps;
+    CollectSubexprs(BaseReg, 0, AddOps, L, SE);
+
+    if (AddOps.size() == 1) continue;
+
+    for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
+         JE = AddOps.end(); J != JE; ++J) {
+
+      // Loop-variant "unknown" values are uninteresting; we won't be able to
+      // do anything meaningful with them.
+      if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
+        continue;
+
+      // Don't pull a constant into a register if the constant could be folded
+      // into an immediate field.
+      if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
+                           Base.getNumRegs() > 1,
+                           LU.Kind, LU.AccessTy, TLI, SE))
+        continue;
+
+      // Collect all operands except *J.
+      SmallVector<const SCEV *, 8> InnerAddOps
+        (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+      InnerAddOps.append
+        (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
+
+      // Don't leave just a constant behind in a register if the constant could
+      // be folded into an immediate field.
+      if (InnerAddOps.size() == 1 &&
+          isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
+                           Base.getNumRegs() > 1,
+                           LU.Kind, LU.AccessTy, TLI, SE))
+        continue;
+
+      const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
+      if (InnerSum->isZero())
+        continue;
+      Formula F = Base;
+      F.BaseRegs[i] = InnerSum;
+      F.BaseRegs.push_back(*J);
+      if (InsertFormula(LU, LUIdx, F))
+        // If that formula hadn't been seen before, recurse to find more like
+        // it.
+        GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1);
+    }
+  }
+}
+
+/// GenerateCombinations - Generate a formula consisting of all of the
+/// loop-dominating registers added into a single register.
+void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
+                                       Formula Base) {
+  // This method is only interesting on a plurality of registers.
+  if (Base.BaseRegs.size() <= 1) return;
+
+  Formula F = Base;
+  F.BaseRegs.clear();
+  SmallVector<const SCEV *, 4> Ops;
+  for (SmallVectorImpl<const SCEV *>::const_iterator
+       I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) {
+    const SCEV *BaseReg = *I;
+    if (SE.properlyDominates(BaseReg, L->getHeader()) &&
+        !SE.hasComputableLoopEvolution(BaseReg, L))
+      Ops.push_back(BaseReg);
+    else
+      F.BaseRegs.push_back(BaseReg);
+  }
+  if (Ops.size() > 1) {
+    const SCEV *Sum = SE.getAddExpr(Ops);
+    // TODO: If Sum is zero, it probably means ScalarEvolution missed an
+    // opportunity to fold something. For now, just ignore such cases
+    // rather than proceed with zero in a register.
+    if (!Sum->isZero()) {
+      F.BaseRegs.push_back(Sum);
+      (void)InsertFormula(LU, LUIdx, F);
+    }
+  }
+}
+
+/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets.
+void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
+                                          Formula Base) {
+  // We can't add a symbolic offset if the address already contains one.
+  if (Base.AM.BaseGV) return;
+
+  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+    const SCEV *G = Base.BaseRegs[i];
+    GlobalValue *GV = ExtractSymbol(G, SE);
+    if (G->isZero() || !GV)
+      continue;
+    Formula F = Base;
+    F.AM.BaseGV = GV;
+    if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
+                    LU.Kind, LU.AccessTy, TLI))
+      continue;
+    F.BaseRegs[i] = G;
+    (void)InsertFormula(LU, LUIdx, F);
+  }
+}
+
+/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
+void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
+                                          Formula Base) {
+  // TODO: For now, just add the min and max offset, because it usually isn't
+  // worthwhile looking at everything inbetween.
+  SmallVector<int64_t, 2> Worklist;
+  Worklist.push_back(LU.MinOffset);
+  if (LU.MaxOffset != LU.MinOffset)
+    Worklist.push_back(LU.MaxOffset);
+
+  for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
+    const SCEV *G = Base.BaseRegs[i];
+
+    for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(),
+         E = Worklist.end(); I != E; ++I) {
+      Formula F = Base;
+      F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
+      if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
+                     LU.Kind, LU.AccessTy, TLI)) {
+        // Add the offset to the base register.
+        const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
+        // If it cancelled out, drop the base register, otherwise update it.
+        if (NewG->isZero()) {
+          std::swap(F.BaseRegs[i], F.BaseRegs.back());
+          F.BaseRegs.pop_back();
+        } else
+          F.BaseRegs[i] = NewG;
+
+        (void)InsertFormula(LU, LUIdx, F);
+      }
+    }
+
+    int64_t Imm = ExtractImmediate(G, SE);
+    if (G->isZero() || Imm == 0)
+      continue;
+    Formula F = Base;
+    F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
+    if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
+                    LU.Kind, LU.AccessTy, TLI))
+      continue;
+    F.BaseRegs[i] = G;
+    (void)InsertFormula(LU, LUIdx, F);
+  }
+}
+
+/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up
+/// the comparison. For example, x == y -> x*c == y*c.
+void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
+                                         Formula Base) {
+  if (LU.Kind != LSRUse::ICmpZero) return;
+
+  // Determine the integer type for the base formula.
+  const Type *IntTy = Base.getType();
+  if (!IntTy) return;
+  if (SE.getTypeSizeInBits(IntTy) > 64) return;
+
+  // Don't do this if there is more than one offset.
+  if (LU.MinOffset != LU.MaxOffset) return;
+
+  assert(!Base.AM.BaseGV && "ICmpZero use is not legal!");
+
+  // Check each interesting stride.
+  for (SmallSetVector<int64_t, 8>::const_iterator
+       I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+    int64_t Factor = *I;
+
+    // Check that the multiplication doesn't overflow.
+    if (Base.AM.BaseOffs == INT64_MIN && Factor == -1)
+      continue;
+    int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
+    if (NewBaseOffs / Factor != Base.AM.BaseOffs)
+      continue;
+
+    // Check that multiplying with the use offset doesn't overflow.
+    int64_t Offset = LU.MinOffset;
+    if (Offset == INT64_MIN && Factor == -1)
+      continue;
+    Offset = (uint64_t)Offset * Factor;
+    if (Offset / Factor != LU.MinOffset)
+      continue;
+
+    Formula F = Base;
+    F.AM.BaseOffs = NewBaseOffs;
+
+    // Check that this scale is legal.
+    if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
+      continue;
+
+    // Compensate for the use having MinOffset built into it.
+    F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Offset - LU.MinOffset;
+
+    const SCEV *FactorS = SE.getConstant(IntTy, Factor);
+
+    // Check that multiplying with each base register doesn't overflow.
+    for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
+      F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
+      if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
+        goto next;
+    }
+
+    // Check that multiplying with the scaled register doesn't overflow.
+    if (F.ScaledReg) {
+      F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
+      if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
+        continue;
+    }
+
+    // If we make it here and it's legal, add it.
+    (void)InsertFormula(LU, LUIdx, F);
+  next:;
+  }
+}
+
+/// GenerateScales - Generate stride factor reuse formulae by making use of
+/// scaled-offset address modes, for example.
+void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
+  // Determine the integer type for the base formula.
+  const Type *IntTy = Base.getType();
+  if (!IntTy) return;
+
+  // If this Formula already has a scaled register, we can't add another one.
+  if (Base.AM.Scale != 0) return;
+
+  // Check each interesting stride.
+  for (SmallSetVector<int64_t, 8>::const_iterator
+       I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+    int64_t Factor = *I;
+
+    Base.AM.Scale = Factor;
+    Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
+    // Check whether this scale is going to be legal.
+    if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
+                    LU.Kind, LU.AccessTy, TLI)) {
+      // As a special-case, handle special out-of-loop Basic users specially.
+      // TODO: Reconsider this special case.
+      if (LU.Kind == LSRUse::Basic &&
+          isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
+                     LSRUse::Special, LU.AccessTy, TLI) &&
+          LU.AllFixupsOutsideLoop)
+        LU.Kind = LSRUse::Special;
+      else
+        continue;
+    }
+    // For an ICmpZero, negating a solitary base register won't lead to
+    // new solutions.
+    if (LU.Kind == LSRUse::ICmpZero &&
+        !Base.AM.HasBaseReg && Base.AM.BaseOffs == 0 && !Base.AM.BaseGV)
+      continue;
+    // For each addrec base reg, apply the scale, if possible.
+    for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
+      if (const SCEVAddRecExpr *AR =
+            dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) {
+        const SCEV *FactorS = SE.getConstant(IntTy, Factor);
+        if (FactorS->isZero())
+          continue;
+        // Divide out the factor, ignoring high bits, since we'll be
+        // scaling the value back up in the end.
+        if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
+          // TODO: This could be optimized to avoid all the copying.
+          Formula F = Base;
+          F.ScaledReg = Quotient;
+          F.DeleteBaseReg(F.BaseRegs[i]);
+          (void)InsertFormula(LU, LUIdx, F);
+        }
+      }
+  }
+}
+
+/// GenerateTruncates - Generate reuse formulae from different IV types.
+void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
+  // This requires TargetLowering to tell us which truncates are free.
+  if (!TLI) return;
+
+  // Don't bother truncating symbolic values.
+  if (Base.AM.BaseGV) return;
+
+  // Determine the integer type for the base formula.
+  const Type *DstTy = Base.getType();
+  if (!DstTy) return;
+  DstTy = SE.getEffectiveSCEVType(DstTy);
+
+  for (SmallSetVector<const Type *, 4>::const_iterator
+       I = Types.begin(), E = Types.end(); I != E; ++I) {
+    const Type *SrcTy = *I;
+    if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
+      Formula F = Base;
+
+      if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
+      for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(),
+           JE = F.BaseRegs.end(); J != JE; ++J)
+        *J = SE.getAnyExtendExpr(*J, SrcTy);
+
+      // TODO: This assumes we've done basic processing on all uses and
+      // have an idea what the register usage is.
+      if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
+        continue;
+
+      (void)InsertFormula(LU, LUIdx, F);
+    }
+  }
+}
+
+namespace {
+
+/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to
+/// defer modifications so that the search phase doesn't have to worry about
+/// the data structures moving underneath it.
+struct WorkItem {
+  size_t LUIdx;
+  int64_t Imm;
+  const SCEV *OrigReg;
+
+  WorkItem(size_t LI, int64_t I, const SCEV *R)
+    : LUIdx(LI), Imm(I), OrigReg(R) {}
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+}
+
+void WorkItem::print(raw_ostream &OS) const {
+  OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
+     << " , add offset " << Imm;
+}
+
+void WorkItem::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+/// GenerateCrossUseConstantOffsets - Look for registers which are a constant
+/// distance apart and try to form reuse opportunities between them.
+void LSRInstance::GenerateCrossUseConstantOffsets() {
+  // Group the registers by their value without any added constant offset.
+  typedef std::map<int64_t, const SCEV *> ImmMapTy;
+  typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy;
+  RegMapTy Map;
+  DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
+  SmallVector<const SCEV *, 8> Sequence;
+  for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
+       I != E; ++I) {
+    const SCEV *Reg = *I;
+    int64_t Imm = ExtractImmediate(Reg, SE);
+    std::pair<RegMapTy::iterator, bool> Pair =
+      Map.insert(std::make_pair(Reg, ImmMapTy()));
+    if (Pair.second)
+      Sequence.push_back(Reg);
+    Pair.first->second.insert(std::make_pair(Imm, *I));
+    UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I);
+  }
+
+  // Now examine each set of registers with the same base value. Build up
+  // a list of work to do and do the work in a separate step so that we're
+  // not adding formulae and register counts while we're searching.
+  SmallVector<WorkItem, 32> WorkItems;
+  SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems;
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(),
+       E = Sequence.end(); I != E; ++I) {
+    const SCEV *Reg = *I;
+    const ImmMapTy &Imms = Map.find(Reg)->second;
+
+    // It's not worthwhile looking for reuse if there's only one offset.
+    if (Imms.size() == 1)
+      continue;
+
+    DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
+          for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
+               J != JE; ++J)
+            dbgs() << ' ' << J->first;
+          dbgs() << '\n');
+
+    // Examine each offset.
+    for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
+         J != JE; ++J) {
+      const SCEV *OrigReg = J->second;
+
+      int64_t JImm = J->first;
+      const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
+
+      if (!isa<SCEVConstant>(OrigReg) &&
+          UsedByIndicesMap[Reg].count() == 1) {
+        DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n');
+        continue;
+      }
+
+      // Conservatively examine offsets between this orig reg a few selected
+      // other orig regs.
+      ImmMapTy::const_iterator OtherImms[] = {
+        Imms.begin(), prior(Imms.end()),
+        Imms.upper_bound((Imms.begin()->first + prior(Imms.end())->first) / 2)
+      };
+      for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
+        ImmMapTy::const_iterator M = OtherImms[i];
+        if (M == J || M == JE) continue;
+
+        // Compute the difference between the two.
+        int64_t Imm = (uint64_t)JImm - M->first;
+        for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1;
+             LUIdx = UsedByIndices.find_next(LUIdx))
+          // Make a memo of this use, offset, and register tuple.
+          if (UniqueItems.insert(std::make_pair(LUIdx, Imm)))
+            WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
+      }
+    }
+  }
+
+  Map.clear();
+  Sequence.clear();
+  UsedByIndicesMap.clear();
+  UniqueItems.clear();
+
+  // Now iterate through the worklist and add new formulae.
+  for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(),
+       E = WorkItems.end(); I != E; ++I) {
+    const WorkItem &WI = *I;
+    size_t LUIdx = WI.LUIdx;
+    LSRUse &LU = Uses[LUIdx];
+    int64_t Imm = WI.Imm;
+    const SCEV *OrigReg = WI.OrigReg;
+
+    const Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
+    const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
+    unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
+
+    // TODO: Use a more targeted data structure.
+    for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
+      const Formula &F = LU.Formulae[L];
+      // Use the immediate in the scaled register.
+      if (F.ScaledReg == OrigReg) {
+        int64_t Offs = (uint64_t)F.AM.BaseOffs +
+                       Imm * (uint64_t)F.AM.Scale;
+        // Don't create 50 + reg(-50).
+        if (F.referencesReg(SE.getSCEV(
+                   ConstantInt::get(IntTy, -(uint64_t)Offs))))
+          continue;
+        Formula NewF = F;
+        NewF.AM.BaseOffs = Offs;
+        if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
+                        LU.Kind, LU.AccessTy, TLI))
+          continue;
+        NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
+
+        // If the new scale is a constant in a register, and adding the constant
+        // value to the immediate would produce a value closer to zero than the
+        // immediate itself, then the formula isn't worthwhile.
+        if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
+          if (C->getValue()->getValue().isNegative() !=
+                (NewF.AM.BaseOffs < 0) &&
+              (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
+                .ule(abs64(NewF.AM.BaseOffs)))
+            continue;
+
+        // OK, looks good.
+        (void)InsertFormula(LU, LUIdx, NewF);
+      } else {
+        // Use the immediate in a base register.
+        for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
+          const SCEV *BaseReg = F.BaseRegs[N];
+          if (BaseReg != OrigReg)
+            continue;
+          Formula NewF = F;
+          NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
+          if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
+                          LU.Kind, LU.AccessTy, TLI))
+            continue;
+          NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
+
+          // If the new formula has a constant in a register, and adding the
+          // constant value to the immediate would produce a value closer to
+          // zero than the immediate itself, then the formula isn't worthwhile.
+          for (SmallVectorImpl<const SCEV *>::const_iterator
+               J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
+               J != JE; ++J)
+            if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
+              if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt(
+                   abs64(NewF.AM.BaseOffs)) &&
+                  (C->getValue()->getValue() +
+                   NewF.AM.BaseOffs).countTrailingZeros() >=
+                   CountTrailingZeros_64(NewF.AM.BaseOffs))
+                goto skip_formula;
+
+          // Ok, looks good.
+          (void)InsertFormula(LU, LUIdx, NewF);
+          break;
+        skip_formula:;
+        }
+      }
+    }
+  }
+}
+
+/// GenerateAllReuseFormulae - Generate formulae for each use.
+void
+LSRInstance::GenerateAllReuseFormulae() {
+  // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
+  // queries are more precise.
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
+  }
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateScales(LU, LUIdx, LU.Formulae[i]);
+  }
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
+      GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
+  }
+
+  GenerateCrossUseConstantOffsets();
+
+  DEBUG(dbgs() << "\n"
+                  "After generating reuse formulae:\n";
+        print_uses(dbgs()));
+}
+
+/// If there are multiple formulae with the same set of registers used
+/// by other uses, pick the best one and delete the others.
+void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
+  DenseSet<const SCEV *> VisitedRegs;
+  SmallPtrSet<const SCEV *, 16> Regs;
+#ifndef NDEBUG
+  bool ChangedFormulae = false;
+#endif
+
+  // Collect the best formula for each unique set of shared registers. This
+  // is reset for each use.
+  typedef DenseMap<SmallVector<const SCEV *, 2>, size_t, UniquifierDenseMapInfo>
+    BestFormulaeTy;
+  BestFormulaeTy BestFormulae;
+
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+    LSRUse &LU = Uses[LUIdx];
+    DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
+
+    bool Any = false;
+    for (size_t FIdx = 0, NumForms = LU.Formulae.size();
+         FIdx != NumForms; ++FIdx) {
+      Formula &F = LU.Formulae[FIdx];
+
+      SmallVector<const SCEV *, 2> Key;
+      for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(),
+           JE = F.BaseRegs.end(); J != JE; ++J) {
+        const SCEV *Reg = *J;
+        if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
+          Key.push_back(Reg);
+      }
+      if (F.ScaledReg &&
+          RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
+        Key.push_back(F.ScaledReg);
+      // Unstable sort by host order ok, because this is only used for
+      // uniquifying.
+      std::sort(Key.begin(), Key.end());
+
+      std::pair<BestFormulaeTy::const_iterator, bool> P =
+        BestFormulae.insert(std::make_pair(Key, FIdx));
+      if (!P.second) {
+        Formula &Best = LU.Formulae[P.first->second];
+
+        Cost CostF;
+        CostF.RateFormula(F, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+        Regs.clear();
+        Cost CostBest;
+        CostBest.RateFormula(Best, Regs, VisitedRegs, L, LU.Offsets, SE, DT);
+        Regs.clear();
+        if (CostF < CostBest)
+          std::swap(F, Best);
+        DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
+              dbgs() << "\n"
+                        "    in favor of formula "; Best.print(dbgs());
+              dbgs() << '\n');
+#ifndef NDEBUG
+        ChangedFormulae = true;
+#endif
+        LU.DeleteFormula(F);
+        --FIdx;
+        --NumForms;
+        Any = true;
+        continue;
+      }
+    }
+
+    // Now that we've filtered out some formulae, recompute the Regs set.
+    if (Any)
+      LU.RecomputeRegs(LUIdx, RegUses);
+
+    // Reset this to prepare for the next use.
+    BestFormulae.clear();
+  }
+
+  DEBUG(if (ChangedFormulae) {
+          dbgs() << "\n"
+                    "After filtering out undesirable candidates:\n";
+          print_uses(dbgs());
+        });
+}
+
+// This is a rough guess that seems to work fairly well.
+static const size_t ComplexityLimit = UINT16_MAX;
+
+/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
+/// solutions the solver might have to consider. It almost never considers
+/// this many solutions because it prune the search space, but the pruning
+/// isn't always sufficient.
+size_t LSRInstance::EstimateSearchSpaceComplexity() const {
+  size_t Power = 1;
+  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+       E = Uses.end(); I != E; ++I) {
+    size_t FSize = I->Formulae.size();
+    if (FSize >= ComplexityLimit) {
+      Power = ComplexityLimit;
+      break;
+    }
+    Power *= FSize;
+    if (Power >= ComplexityLimit)
+      break;
+  }
+  return Power;
+}
+
+/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset
+/// of the registers of another formula, it won't help reduce register
+/// pressure (though it may not necessarily hurt register pressure); remove
+/// it to simplify the system.
+void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
+  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+    DEBUG(dbgs() << "The search space is too complex.\n");
+
+    DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
+                    "which use a superset of registers used by other "
+                    "formulae.\n");
+
+    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+      LSRUse &LU = Uses[LUIdx];
+      bool Any = false;
+      for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+        Formula &F = LU.Formulae[i];
+        // Look for a formula with a constant or GV in a register. If the use
+        // also has a formula with that same value in an immediate field,
+        // delete the one that uses a register.
+        for (SmallVectorImpl<const SCEV *>::const_iterator
+             I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
+          if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
+            Formula NewF = F;
+            NewF.AM.BaseOffs += C->getValue()->getSExtValue();
+            NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
+                                (I - F.BaseRegs.begin()));
+            if (LU.HasFormulaWithSameRegs(NewF)) {
+              DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
+              LU.DeleteFormula(F);
+              --i;
+              --e;
+              Any = true;
+              break;
+            }
+          } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
+            if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
+              if (!F.AM.BaseGV) {
+                Formula NewF = F;
+                NewF.AM.BaseGV = GV;
+                NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
+                                    (I - F.BaseRegs.begin()));
+                if (LU.HasFormulaWithSameRegs(NewF)) {
+                  DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
+                        dbgs() << '\n');
+                  LU.DeleteFormula(F);
+                  --i;
+                  --e;
+                  Any = true;
+                  break;
+                }
+              }
+          }
+        }
+      }
+      if (Any)
+        LU.RecomputeRegs(LUIdx, RegUses);
+    }
+
+    DEBUG(dbgs() << "After pre-selection:\n";
+          print_uses(dbgs()));
+  }
+}
+
+/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers
+/// for expressions like A, A+1, A+2, etc., allocate a single register for
+/// them.
+void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
+  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+    DEBUG(dbgs() << "The search space is too complex.\n");
+
+    DEBUG(dbgs() << "Narrowing the search space by assuming that uses "
+                    "separated by a constant offset will use the same "
+                    "registers.\n");
+
+    // This is especially useful for unrolled loops.
+
+    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+      LSRUse &LU = Uses[LUIdx];
+      for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+           E = LU.Formulae.end(); I != E; ++I) {
+        const Formula &F = *I;
+        if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) {
+          if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) {
+            if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs,
+                                   /*HasBaseReg=*/false,
+                                   LU.Kind, LU.AccessTy)) {
+              DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs());
+                    dbgs() << '\n');
+
+              LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+
+              // Update the relocs to reference the new use.
+              for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+                   E = Fixups.end(); I != E; ++I) {
+                LSRFixup &Fixup = *I;
+                if (Fixup.LUIdx == LUIdx) {
+                  Fixup.LUIdx = LUThatHas - &Uses.front();
+                  Fixup.Offset += F.AM.BaseOffs;
+                  // Add the new offset to LUThatHas' offset list.
+                  if (LUThatHas->Offsets.back() != Fixup.Offset) {
+                    LUThatHas->Offsets.push_back(Fixup.Offset);
+                    if (Fixup.Offset > LUThatHas->MaxOffset)
+                      LUThatHas->MaxOffset = Fixup.Offset;
+                    if (Fixup.Offset < LUThatHas->MinOffset)
+                      LUThatHas->MinOffset = Fixup.Offset;
+                  }
+                  DEBUG(dbgs() << "New fixup has offset "
+                               << Fixup.Offset << '\n');
+                }
+                if (Fixup.LUIdx == NumUses-1)
+                  Fixup.LUIdx = LUIdx;
+              }
+
+              // Delete formulae from the new use which are no longer legal.
+              bool Any = false;
+              for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
+                Formula &F = LUThatHas->Formulae[i];
+                if (!isLegalUse(F.AM,
+                                LUThatHas->MinOffset, LUThatHas->MaxOffset,
+                                LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
+                  DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
+                        dbgs() << '\n');
+                  LUThatHas->DeleteFormula(F);
+                  --i;
+                  --e;
+                  Any = true;
+                }
+              }
+              if (Any)
+                LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+
+              // Delete the old use.
+              DeleteUse(LU, LUIdx);
+              --LUIdx;
+              --NumUses;
+              break;
+            }
+          }
+        }
+      }
+    }
+
+    DEBUG(dbgs() << "After pre-selection:\n";
+          print_uses(dbgs()));
+  }
+}
+
+/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call 
+/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that
+/// we've done more filtering, as it may be able to find more formulae to
+/// eliminate.
+void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
+  if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+    DEBUG(dbgs() << "The search space is too complex.\n");
+
+    DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
+                    "undesirable dedicated registers.\n");
+
+    FilterOutUndesirableDedicatedRegisters();
+
+    DEBUG(dbgs() << "After pre-selection:\n";
+          print_uses(dbgs()));
+  }
+}
+
+/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely
+/// to be profitable, and then in any use which has any reference to that
+/// register, delete all formulae which do not reference that register.
+void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
+  // With all other options exhausted, loop until the system is simple
+  // enough to handle.
+  SmallPtrSet<const SCEV *, 4> Taken;
+  while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+    // Ok, we have too many of formulae on our hands to conveniently handle.
+    // Use a rough heuristic to thin out the list.
+    DEBUG(dbgs() << "The search space is too complex.\n");
+
+    // Pick the register which is used by the most LSRUses, which is likely
+    // to be a good reuse register candidate.
+    const SCEV *Best = 0;
+    unsigned BestNum = 0;
+    for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end();
+         I != E; ++I) {
+      const SCEV *Reg = *I;
+      if (Taken.count(Reg))
+        continue;
+      if (!Best)
+        Best = Reg;
+      else {
+        unsigned Count = RegUses.getUsedByIndices(Reg).count();
+        if (Count > BestNum) {
+          Best = Reg;
+          BestNum = Count;
+        }
+      }
+    }
+
+    DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
+                 << " will yield profitable reuse.\n");
+    Taken.insert(Best);
+
+    // In any use with formulae which references this register, delete formulae
+    // which don't reference it.
+    for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+      LSRUse &LU = Uses[LUIdx];
+      if (!LU.Regs.count(Best)) continue;
+
+      bool Any = false;
+      for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+        Formula &F = LU.Formulae[i];
+        if (!F.referencesReg(Best)) {
+          DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
+          LU.DeleteFormula(F);
+          --e;
+          --i;
+          Any = true;
+          assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
+          continue;
+        }
+      }
+
+      if (Any)
+        LU.RecomputeRegs(LUIdx, RegUses);
+    }
+
+    DEBUG(dbgs() << "After pre-selection:\n";
+          print_uses(dbgs()));
+  }
+}
+
+/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
+/// formulae to choose from, use some rough heuristics to prune down the number
+/// of formulae. This keeps the main solver from taking an extraordinary amount
+/// of time in some worst-case scenarios.
+void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
+  NarrowSearchSpaceByDetectingSupersets();
+  NarrowSearchSpaceByCollapsingUnrolledCode();
+  NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
+  NarrowSearchSpaceByPickingWinnerRegs();
+}
+
+/// SolveRecurse - This is the recursive solver.
+void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
+                               Cost &SolutionCost,
+                               SmallVectorImpl<const Formula *> &Workspace,
+                               const Cost &CurCost,
+                               const SmallPtrSet<const SCEV *, 16> &CurRegs,
+                               DenseSet<const SCEV *> &VisitedRegs) const {
+  // Some ideas:
+  //  - prune more:
+  //    - use more aggressive filtering
+  //    - sort the formula so that the most profitable solutions are found first
+  //    - sort the uses too
+  //  - search faster:
+  //    - don't compute a cost, and then compare. compare while computing a cost
+  //      and bail early.
+  //    - track register sets with SmallBitVector
+
+  const LSRUse &LU = Uses[Workspace.size()];
+
+  // If this use references any register that's already a part of the
+  // in-progress solution, consider it a requirement that a formula must
+  // reference that register in order to be considered. This prunes out
+  // unprofitable searching.
+  SmallSetVector<const SCEV *, 4> ReqRegs;
+  for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(),
+       E = CurRegs.end(); I != E; ++I)
+    if (LU.Regs.count(*I))
+      ReqRegs.insert(*I);
+
+  bool AnySatisfiedReqRegs = false;
+  SmallPtrSet<const SCEV *, 16> NewRegs;
+  Cost NewCost;
+retry:
+  for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+       E = LU.Formulae.end(); I != E; ++I) {
+    const Formula &F = *I;
+
+    // Ignore formulae which do not use any of the required registers.
+    for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
+         JE = ReqRegs.end(); J != JE; ++J) {
+      const SCEV *Reg = *J;
+      if ((!F.ScaledReg || F.ScaledReg != Reg) &&
+          std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) ==
+          F.BaseRegs.end())
+        goto skip;
+    }
+    AnySatisfiedReqRegs = true;
+
+    // Evaluate the cost of the current formula. If it's already worse than
+    // the current best, prune the search at that point.
+    NewCost = CurCost;
+    NewRegs = CurRegs;
+    NewCost.RateFormula(F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT);
+    if (NewCost < SolutionCost) {
+      Workspace.push_back(&F);
+      if (Workspace.size() != Uses.size()) {
+        SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
+                     NewRegs, VisitedRegs);
+        if (F.getNumRegs() == 1 && Workspace.size() == 1)
+          VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
+      } else {
+        DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
+              dbgs() << ". Regs:";
+              for (SmallPtrSet<const SCEV *, 16>::const_iterator
+                   I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I)
+                dbgs() << ' ' << **I;
+              dbgs() << '\n');
+
+        SolutionCost = NewCost;
+        Solution = Workspace;
+      }
+      Workspace.pop_back();
+    }
+  skip:;
+  }
+
+  // If none of the formulae had all of the required registers, relax the
+  // constraint so that we don't exclude all formulae.
+  if (!AnySatisfiedReqRegs) {
+    assert(!ReqRegs.empty() && "Solver failed even without required registers");
+    ReqRegs.clear();
+    goto retry;
+  }
+}
+
+/// Solve - Choose one formula from each use. Return the results in the given
+/// Solution vector.
+void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
+  SmallVector<const Formula *, 8> Workspace;
+  Cost SolutionCost;
+  SolutionCost.Loose();
+  Cost CurCost;
+  SmallPtrSet<const SCEV *, 16> CurRegs;
+  DenseSet<const SCEV *> VisitedRegs;
+  Workspace.reserve(Uses.size());
+
+  // SolveRecurse does all the work.
+  SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
+               CurRegs, VisitedRegs);
+
+  // Ok, we've now made all our decisions.
+  DEBUG(dbgs() << "\n"
+                  "The chosen solution requires "; SolutionCost.print(dbgs());
+        dbgs() << ":\n";
+        for (size_t i = 0, e = Uses.size(); i != e; ++i) {
+          dbgs() << "  ";
+          Uses[i].print(dbgs());
+          dbgs() << "\n"
+                    "    ";
+          Solution[i]->print(dbgs());
+          dbgs() << '\n';
+        });
+
+  assert(Solution.size() == Uses.size() && "Malformed solution!");
+}
+
+/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
+/// the dominator tree far as we can go while still being dominated by the
+/// input positions. This helps canonicalize the insert position, which
+/// encourages sharing.
+BasicBlock::iterator
+LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
+                                 const SmallVectorImpl<Instruction *> &Inputs)
+                                                                         const {
+  for (;;) {
+    const Loop *IPLoop = LI.getLoopFor(IP->getParent());
+    unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
+
+    BasicBlock *IDom;
+    for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
+      if (!Rung) return IP;
+      Rung = Rung->getIDom();
+      if (!Rung) return IP;
+      IDom = Rung->getBlock();
+
+      // Don't climb into a loop though.
+      const Loop *IDomLoop = LI.getLoopFor(IDom);
+      unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
+      if (IDomDepth <= IPLoopDepth &&
+          (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
+        break;
+    }
+
+    bool AllDominate = true;
+    Instruction *BetterPos = 0;
+    Instruction *Tentative = IDom->getTerminator();
+    for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(),
+         E = Inputs.end(); I != E; ++I) {
+      Instruction *Inst = *I;
+      if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
+        AllDominate = false;
+        break;
+      }
+      // Attempt to find an insert position in the middle of the block,
+      // instead of at the end, so that it can be used for other expansions.
+      if (IDom == Inst->getParent() &&
+          (!BetterPos || DT.dominates(BetterPos, Inst)))
+        BetterPos = llvm::next(BasicBlock::iterator(Inst));
+    }
+    if (!AllDominate)
+      break;
+    if (BetterPos)
+      IP = BetterPos;
+    else
+      IP = Tentative;
+  }
+
+  return IP;
+}
+
+/// AdjustInsertPositionForExpand - Determine an input position which will be
+/// dominated by the operands and which will dominate the result.
+BasicBlock::iterator
+LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
+                                           const LSRFixup &LF,
+                                           const LSRUse &LU) const {
+  // Collect some instructions which must be dominated by the
+  // expanding replacement. These must be dominated by any operands that
+  // will be required in the expansion.
+  SmallVector<Instruction *, 4> Inputs;
+  if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
+    Inputs.push_back(I);
+  if (LU.Kind == LSRUse::ICmpZero)
+    if (Instruction *I =
+          dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
+      Inputs.push_back(I);
+  if (LF.PostIncLoops.count(L)) {
+    if (LF.isUseFullyOutsideLoop(L))
+      Inputs.push_back(L->getLoopLatch()->getTerminator());
+    else
+      Inputs.push_back(IVIncInsertPos);
+  }
+  // The expansion must also be dominated by the increment positions of any
+  // loops it for which it is using post-inc mode.
+  for (PostIncLoopSet::const_iterator I = LF.PostIncLoops.begin(),
+       E = LF.PostIncLoops.end(); I != E; ++I) {
+    const Loop *PIL = *I;
+    if (PIL == L) continue;
+
+    // Be dominated by the loop exit.
+    SmallVector<BasicBlock *, 4> ExitingBlocks;
+    PIL->getExitingBlocks(ExitingBlocks);
+    if (!ExitingBlocks.empty()) {
+      BasicBlock *BB = ExitingBlocks[0];
+      for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
+        BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
+      Inputs.push_back(BB->getTerminator());
+    }
+  }
+
+  // Then, climb up the immediate dominator tree as far as we can go while
+  // still being dominated by the input positions.
+  IP = HoistInsertPosition(IP, Inputs);
+
+  // Don't insert instructions before PHI nodes.
+  while (isa<PHINode>(IP)) ++IP;
+
+  // Ignore debug intrinsics.
+  while (isa<DbgInfoIntrinsic>(IP)) ++IP;
+
+  return IP;
+}
+
+/// Expand - Emit instructions for the leading candidate expression for this
+/// LSRUse (this is called "expanding").
+Value *LSRInstance::Expand(const LSRFixup &LF,
+                           const Formula &F,
+                           BasicBlock::iterator IP,
+                           SCEVExpander &Rewriter,
+                           SmallVectorImpl<WeakVH> &DeadInsts) const {
+  const LSRUse &LU = Uses[LF.LUIdx];
+
+  // Determine an input position which will be dominated by the operands and
+  // which will dominate the result.
+  IP = AdjustInsertPositionForExpand(IP, LF, LU);
+
+  // Inform the Rewriter if we have a post-increment use, so that it can
+  // perform an advantageous expansion.
+  Rewriter.setPostInc(LF.PostIncLoops);
+
+  // This is the type that the user actually needs.
+  const Type *OpTy = LF.OperandValToReplace->getType();
+  // This will be the type that we'll initially expand to.
+  const Type *Ty = F.getType();
+  if (!Ty)
+    // No type known; just expand directly to the ultimate type.
+    Ty = OpTy;
+  else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
+    // Expand directly to the ultimate type if it's the right size.
+    Ty = OpTy;
+  // This is the type to do integer arithmetic in.
+  const Type *IntTy = SE.getEffectiveSCEVType(Ty);
+
+  // Build up a list of operands to add together to form the full base.
+  SmallVector<const SCEV *, 8> Ops;
+
+  // Expand the BaseRegs portion.
+  for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(),
+       E = F.BaseRegs.end(); I != E; ++I) {
+    const SCEV *Reg = *I;
+    assert(!Reg->isZero() && "Zero allocated in a base register!");
+
+    // If we're expanding for a post-inc user, make the post-inc adjustment.
+    PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+    Reg = TransformForPostIncUse(Denormalize, Reg,
+                                 LF.UserInst, LF.OperandValToReplace,
+                                 Loops, SE, DT);
+
+    Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
+  }
+
+  // Flush the operand list to suppress SCEVExpander hoisting.
+  if (!Ops.empty()) {
+    Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+    Ops.clear();
+    Ops.push_back(SE.getUnknown(FullV));
+  }
+
+  // Expand the ScaledReg portion.
+  Value *ICmpScaledV = 0;
+  if (F.AM.Scale != 0) {
+    const SCEV *ScaledS = F.ScaledReg;
+
+    // If we're expanding for a post-inc user, make the post-inc adjustment.
+    PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
+    ScaledS = TransformForPostIncUse(Denormalize, ScaledS,
+                                     LF.UserInst, LF.OperandValToReplace,
+                                     Loops, SE, DT);
+
+    if (LU.Kind == LSRUse::ICmpZero) {
+      // An interesting way of "folding" with an icmp is to use a negated
+      // scale, which we'll implement by inserting it into the other operand
+      // of the icmp.
+      assert(F.AM.Scale == -1 &&
+             "The only scale supported by ICmpZero uses is -1!");
+      ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP);
+    } else {
+      // Otherwise just expand the scaled register and an explicit scale,
+      // which is expected to be matched as part of the address.
+      ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP));
+      ScaledS = SE.getMulExpr(ScaledS,
+                              SE.getConstant(ScaledS->getType(), F.AM.Scale));
+      Ops.push_back(ScaledS);
+
+      // Flush the operand list to suppress SCEVExpander hoisting.
+      Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+      Ops.clear();
+      Ops.push_back(SE.getUnknown(FullV));
+    }
+  }
+
+  // Expand the GV portion.
+  if (F.AM.BaseGV) {
+    Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+
+    // Flush the operand list to suppress SCEVExpander hoisting.
+    Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+    Ops.clear();
+    Ops.push_back(SE.getUnknown(FullV));
+  }
+
+  // Expand the immediate portion.
+  int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset;
+  if (Offset != 0) {
+    if (LU.Kind == LSRUse::ICmpZero) {
+      // The other interesting way of "folding" with an ICmpZero is to use a
+      // negated immediate.
+      if (!ICmpScaledV)
+        ICmpScaledV = ConstantInt::get(IntTy, -Offset);
+      else {
+        Ops.push_back(SE.getUnknown(ICmpScaledV));
+        ICmpScaledV = ConstantInt::get(IntTy, Offset);
+      }
+    } else {
+      // Just add the immediate values. These again are expected to be matched
+      // as part of the address.
+      Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
+    }
+  }
+
+  // Emit instructions summing all the operands.
+  const SCEV *FullS = Ops.empty() ?
+                      SE.getConstant(IntTy, 0) :
+                      SE.getAddExpr(Ops);
+  Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP);
+
+  // We're done expanding now, so reset the rewriter.
+  Rewriter.clearPostInc();
+
+  // An ICmpZero Formula represents an ICmp which we're handling as a
+  // comparison against zero. Now that we've expanded an expression for that
+  // form, update the ICmp's other operand.
+  if (LU.Kind == LSRUse::ICmpZero) {
+    ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
+    DeadInsts.push_back(CI->getOperand(1));
+    assert(!F.AM.BaseGV && "ICmp does not support folding a global value and "
+                           "a scale at the same time!");
+    if (F.AM.Scale == -1) {
+      if (ICmpScaledV->getType() != OpTy) {
+        Instruction *Cast =
+          CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false,
+                                                   OpTy, false),
+                           ICmpScaledV, OpTy, "tmp", CI);
+        ICmpScaledV = Cast;
+      }
+      CI->setOperand(1, ICmpScaledV);
+    } else {
+      assert(F.AM.Scale == 0 &&
+             "ICmp does not support folding a global value and "
+             "a scale at the same time!");
+      Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
+                                           -(uint64_t)Offset);
+      if (C->getType() != OpTy)
+        C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                          OpTy, false),
+                                  C, OpTy);
+
+      CI->setOperand(1, C);
+    }
+  }
+
+  return FullV;
+}
+
+/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
+/// of their operands effectively happens in their predecessor blocks, so the
+/// expression may need to be expanded in multiple places.
+void LSRInstance::RewriteForPHI(PHINode *PN,
+                                const LSRFixup &LF,
+                                const Formula &F,
+                                SCEVExpander &Rewriter,
+                                SmallVectorImpl<WeakVH> &DeadInsts,
+                                Pass *P) const {
+  DenseMap<BasicBlock *, Value *> Inserted;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
+      BasicBlock *BB = PN->getIncomingBlock(i);
+
+      // If this is a critical edge, split the edge so that we do not insert
+      // the code on all predecessor/successor paths.  We do this unless this
+      // is the canonical backedge for this loop, which complicates post-inc
+      // users.
+      if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
+          !isa<IndirectBrInst>(BB->getTerminator())) {
+        Loop *PNLoop = LI.getLoopFor(PN->getParent());
+        if (!PNLoop || PN->getParent() != PNLoop->getHeader()) {
+          // Split the critical edge.
+          BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+
+          // If PN is outside of the loop and BB is in the loop, we want to
+          // move the block to be immediately before the PHI block, not
+          // immediately after BB.
+          if (L->contains(BB) && !L->contains(PN))
+            NewBB->moveBefore(PN->getParent());
+
+          // Splitting the edge can reduce the number of PHI entries we have.
+          e = PN->getNumIncomingValues();
+          BB = NewBB;
+          i = PN->getBasicBlockIndex(BB);
+        }
+      }
+
+      std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
+        Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
+      if (!Pair.second)
+        PN->setIncomingValue(i, Pair.first->second);
+      else {
+        Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
+
+        // If this is reuse-by-noop-cast, insert the noop cast.
+        const Type *OpTy = LF.OperandValToReplace->getType();
+        if (FullV->getType() != OpTy)
+          FullV =
+            CastInst::Create(CastInst::getCastOpcode(FullV, false,
+                                                     OpTy, false),
+                             FullV, LF.OperandValToReplace->getType(),
+                             "tmp", BB->getTerminator());
+
+        PN->setIncomingValue(i, FullV);
+        Pair.first->second = FullV;
+      }
+    }
+}
+
+/// Rewrite - Emit instructions for the leading candidate expression for this
+/// LSRUse (this is called "expanding"), and update the UserInst to reference
+/// the newly expanded value.
+void LSRInstance::Rewrite(const LSRFixup &LF,
+                          const Formula &F,
+                          SCEVExpander &Rewriter,
+                          SmallVectorImpl<WeakVH> &DeadInsts,
+                          Pass *P) const {
+  // First, find an insertion point that dominates UserInst. For PHI nodes,
+  // find the nearest block which dominates all the relevant uses.
+  if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
+    RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
+  } else {
+    Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
+
+    // If this is reuse-by-noop-cast, insert the noop cast.
+    const Type *OpTy = LF.OperandValToReplace->getType();
+    if (FullV->getType() != OpTy) {
+      Instruction *Cast =
+        CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
+                         FullV, OpTy, "tmp", LF.UserInst);
+      FullV = Cast;
+    }
+
+    // Update the user. ICmpZero is handled specially here (for now) because
+    // Expand may have updated one of the operands of the icmp already, and
+    // its new value may happen to be equal to LF.OperandValToReplace, in
+    // which case doing replaceUsesOfWith leads to replacing both operands
+    // with the same value. TODO: Reorganize this.
+    if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
+      LF.UserInst->setOperand(0, FullV);
+    else
+      LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
+  }
+
+  DeadInsts.push_back(LF.OperandValToReplace);
+}
+
+/// ImplementSolution - Rewrite all the fixup locations with new values,
+/// following the chosen solution.
+void
+LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
+                               Pass *P) {
+  // Keep track of instructions we may have made dead, so that
+  // we can remove them after we are done working.
+  SmallVector<WeakVH, 16> DeadInsts;
+
+  SCEVExpander Rewriter(SE);
+  Rewriter.disableCanonicalMode();
+  Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
+
+  // Expand the new value definitions and update the users.
+  for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
+       E = Fixups.end(); I != E; ++I) {
+    const LSRFixup &Fixup = *I;
+
+    Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
+
+    Changed = true;
+  }
+
+  // Clean up after ourselves. This must be done before deleting any
+  // instructions.
+  Rewriter.clear();
+
+  Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
+}
+
+LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
+  : IU(P->getAnalysis<IVUsers>()),
+    SE(P->getAnalysis<ScalarEvolution>()),
+    DT(P->getAnalysis<DominatorTree>()),
+    LI(P->getAnalysis<LoopInfo>()),
+    TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
+
+  // If LoopSimplify form is not available, stay out of trouble.
+  if (!L->isLoopSimplifyForm()) return;
+
+  // If there's no interesting work to be done, bail early.
+  if (IU.empty()) return;
+
+  DEBUG(dbgs() << "\nLSR on loop ";
+        WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
+        dbgs() << ":\n");
+
+  // First, perform some low-level loop optimizations.
+  OptimizeShadowIV();
+  OptimizeLoopTermCond();
+
+  // Start collecting data and preparing for the solver.
+  CollectInterestingTypesAndFactors();
+  CollectFixupsAndInitialFormulae();
+  CollectLoopInvariantFixupsAndFormulae();
+
+  DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
+        print_uses(dbgs()));
+
+  // Now use the reuse data to generate a bunch of interesting ways
+  // to formulate the values needed for the uses.
+  GenerateAllReuseFormulae();
+
+  FilterOutUndesirableDedicatedRegisters();
+  NarrowSearchSpaceUsingHeuristics();
+
+  SmallVector<const Formula *, 8> Solution;
+  Solve(Solution);
+
+  // Release memory that is no longer needed.
+  Factors.clear();
+  Types.clear();
+  RegUses.clear();
+
+#ifndef NDEBUG
+  // Formulae should be legal.
+  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+       E = Uses.end(); I != E; ++I) {
+     const LSRUse &LU = *I;
+     for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+          JE = LU.Formulae.end(); J != JE; ++J)
+        assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
+                          LU.Kind, LU.AccessTy, TLI) &&
+               "Illegal formula generated!");
+  };
+#endif
+
+  // Now that we've decided what we want, make it so.
+  ImplementSolution(Solution, P);
+}
+
+void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
+  if (Factors.empty() && Types.empty()) return;
+
+  OS << "LSR has identified the following interesting factors and types: ";
+  bool First = true;
+
+  for (SmallSetVector<int64_t, 8>::const_iterator
+       I = Factors.begin(), E = Factors.end(); I != E; ++I) {
+    if (!First) OS << ", ";
+    First = false;
+    OS << '*' << *I;
+  }
+
+  for (SmallSetVector<const Type *, 4>::const_iterator
+       I = Types.begin(), E = Types.end(); I != E; ++I) {
+    if (!First) OS << ", ";
+    First = false;
+    OS << '(' << **I << ')';
+  }
+  OS << '\n';
+}
+
+void LSRInstance::print_fixups(raw_ostream &OS) const {
+  OS << "LSR is examining the following fixup sites:\n";
+  for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
+       E = Fixups.end(); I != E; ++I) {
+    dbgs() << "  ";
+    I->print(OS);
+    OS << '\n';
+  }
+}
+
+void LSRInstance::print_uses(raw_ostream &OS) const {
+  OS << "LSR is examining the following uses:\n";
+  for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+       E = Uses.end(); I != E; ++I) {
+    const LSRUse &LU = *I;
+    dbgs() << "  ";
+    LU.print(OS);
+    OS << '\n';
+    for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
+         JE = LU.Formulae.end(); J != JE; ++J) {
+      OS << "    ";
+      J->print(OS);
+      OS << '\n';
+    }
+  }
+}
+
+void LSRInstance::print(raw_ostream &OS) const {
+  print_factors_and_types(OS);
+  print_fixups(OS);
+  print_uses(OS);
+}
+
+void LSRInstance::dump() const {
+  print(errs()); errs() << '\n';
+}
+
+namespace {
+
+class LoopStrengthReduce : public LoopPass {
+  /// TLI - Keep a pointer of a TargetLowering to consult for determining
+  /// transformation profitability.
+  const TargetLowering *const TLI;
+
+public:
+  static char ID; // Pass ID, replacement for typeid
+  explicit LoopStrengthReduce(const TargetLowering *tli = 0);
+
+private:
+  bool runOnLoop(Loop *L, LPPassManager &LPM);
+  void getAnalysisUsage(AnalysisUsage &AU) const;
+};
+
+}
+
+char LoopStrengthReduce::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
+                "Loop Strength Reduction", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+INITIALIZE_PASS_DEPENDENCY(IVUsers)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
+                "Loop Strength Reduction", false, false)
+
+
+Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
+  return new LoopStrengthReduce(TLI);
+}
+
+LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
+  : LoopPass(ID), TLI(tli) {
+    initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
+  }
+
+void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
+  // We split critical edges, so we change the CFG.  However, we do update
+  // many analyses if they are around.
+  AU.addPreservedID(LoopSimplifyID);
+
+  AU.addRequired<LoopInfo>();
+  AU.addPreserved<LoopInfo>();
+  AU.addRequiredID(LoopSimplifyID);
+  AU.addRequired<DominatorTree>();
+  AU.addPreserved<DominatorTree>();
+  AU.addRequired<ScalarEvolution>();
+  AU.addPreserved<ScalarEvolution>();
+  // Requiring LoopSimplify a second time here prevents IVUsers from running
+  // twice, since LoopSimplify was invalidated by running ScalarEvolution.
+  AU.addRequiredID(LoopSimplifyID);
+  AU.addRequired<IVUsers>();
+  AU.addPreserved<IVUsers>();
+}
+
+bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
+  bool Changed = false;
+
+  // Run the main LSR transformation.
+  Changed |= LSRInstance(TLI, L, this).getChanged();
+
+  // At this point, it is worth checking to see if any recurrence PHIs are also
+  // dead, so that we can remove them as well.
+  Changed |= DeleteDeadPHIs(L->getHeader());
+
+  return Changed;
+}
diff --git a/final/lib/Transforms/Scalar/LoopUnrollPass.cpp b/final/lib/Transforms/Scalar/LoopUnrollPass.cpp
new file mode 100644
index 00000000000..80b263a30cb
--- /dev/null
+++ b/final/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -0,0 +1,182 @@
+//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass implements a simple loop unroller.  It works best when loops have
+// been canonicalized by the -indvars pass, allowing it to determine the trip
+// counts of loops easily.
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include <climits>
+
+using namespace llvm;
+
+static cl::opt<unsigned>
+UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
+  cl::desc("The cut-off point for automatic loop unrolling"));
+
+static cl::opt<unsigned>
+UnrollCount("unroll-count", cl::init(0), cl::Hidden,
+  cl::desc("Use this unroll count for all loops, for testing purposes"));
+
+static cl::opt<bool>
+UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
+  cl::desc("Allows loops to be partially unrolled until "
+           "-unroll-threshold loop size is reached."));
+
+namespace {
+  class LoopUnroll : public LoopPass {
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    LoopUnroll() : LoopPass(ID) {
+      initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
+    }
+
+    /// A magic value for use with the Threshold parameter to indicate
+    /// that the loop unroll should be performed regardless of how much
+    /// code expansion would result.
+    static const unsigned NoThreshold = UINT_MAX;
+    
+    // Threshold to use when optsize is specified (and there is no
+    // explicit -unroll-threshold).
+    static const unsigned OptSizeUnrollThreshold = 50;
+    
+    unsigned CurrentThreshold;
+
+    bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    /// This transformation requires natural loop information & requires that
+    /// loop preheaders be inserted into the CFG...
+    ///
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addRequiredID(LCSSAID);
+      AU.addPreservedID(LCSSAID);
+      AU.addPreserved<ScalarEvolution>();
+      // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
+      // If loop unroll does not preserve dom info then LCSSA pass on next
+      // loop will receive invalid dom info.
+      // For now, recreate dom info, if loop is unrolled.
+      AU.addPreserved<DominatorTree>();
+    }
+  };
+}
+
+char LoopUnroll::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+
+Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
+
+/// ApproximateLoopSize - Approximate the size of the loop.
+static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) {
+  CodeMetrics Metrics;
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+       I != E; ++I)
+    Metrics.analyzeBasicBlock(*I);
+  NumCalls = Metrics.NumInlineCandidates;
+  
+  unsigned LoopSize = Metrics.NumInsts;
+  
+  // Don't allow an estimate of size zero.  This would allows unrolling of loops
+  // with huge iteration counts, which is a compile time problem even if it's
+  // not a problem for code quality.
+  if (LoopSize == 0) LoopSize = 1;
+  
+  return LoopSize;
+}
+
+bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
+  LoopInfo *LI = &getAnalysis<LoopInfo>();
+
+  BasicBlock *Header = L->getHeader();
+  DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
+        << "] Loop %" << Header->getName() << "\n");
+  (void)Header;
+  
+  // Determine the current unrolling threshold.  While this is normally set
+  // from UnrollThreshold, it is overridden to a smaller value if the current
+  // function is marked as optimize-for-size, and the unroll threshold was
+  // not user specified.
+  CurrentThreshold = UnrollThreshold;
+  if (Header->getParent()->hasFnAttr(Attribute::OptimizeForSize) &&
+      UnrollThreshold.getNumOccurrences() == 0)
+    CurrentThreshold = OptSizeUnrollThreshold;
+
+  // Find trip count
+  unsigned TripCount = L->getSmallConstantTripCount();
+  unsigned Count = UnrollCount;
+
+  // Automatically select an unroll count.
+  if (Count == 0) {
+    // Conservative heuristic: if we know the trip count, see if we can
+    // completely unroll (subject to the threshold, checked below); otherwise
+    // try to find greatest modulo of the trip count which is still under
+    // threshold value.
+    if (TripCount == 0)
+      return false;
+    Count = TripCount;
+  }
+
+  // Enforce the threshold.
+  if (CurrentThreshold != NoThreshold) {
+    unsigned NumInlineCandidates;
+    unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates);
+    DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
+    if (NumInlineCandidates != 0) {
+      DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
+      return false;
+    }
+    uint64_t Size = (uint64_t)LoopSize*Count;
+    if (TripCount != 1 && Size > CurrentThreshold) {
+      DEBUG(dbgs() << "  Too large to fully unroll with count: " << Count
+            << " because size: " << Size << ">" << CurrentThreshold << "\n");
+      if (!UnrollAllowPartial) {
+        DEBUG(dbgs() << "  will not try to unroll partially because "
+              << "-unroll-allow-partial not given\n");
+        return false;
+      }
+      // Reduce unroll count to be modulo of TripCount for partial unrolling
+      Count = CurrentThreshold / LoopSize;
+      while (Count != 0 && TripCount%Count != 0) {
+        Count--;
+      }
+      if (Count < 2) {
+        DEBUG(dbgs() << "  could not unroll partially\n");
+        return false;
+      }
+      DEBUG(dbgs() << "  partially unrolling with count: " << Count << "\n");
+    }
+  }
+
+  // Unroll the loop.
+  Function *F = L->getHeader()->getParent();
+  if (!UnrollLoop(L, Count, LI, &LPM))
+    return false;
+
+  // FIXME: Reconstruct dom info, because it is not preserved properly.
+  if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+    DT->runOnFunction(*F);
+  return true;
+}
diff --git a/final/lib/Transforms/Scalar/LoopUnswitch.cpp b/final/lib/Transforms/Scalar/LoopUnswitch.cpp
new file mode 100644
index 00000000000..b4e3d318a57
--- /dev/null
+++ b/final/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -0,0 +1,1045 @@
+//===-- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops that contain branches on loop-invariant conditions
+// to have multiple loops.  For example, it turns the left into the right code:
+//
+//  for (...)                  if (lic)
+//    A                          for (...)
+//    if (lic)                     A; B; C
+//      B                      else
+//    C                          for (...)
+//                                 A; C
+//
+// This can increase the size of the code exponentially (doubling it every time
+// a loop is unswitched) so we only unswitch if the resultant code will be
+// smaller than a threshold.
+//
+// This pass expects LICM to be run before it to hoist invariant conditions out
+// of the loop, to make the unswitching opportunity obvious.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unswitch"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumBranches, "Number of branches unswitched");
+STATISTIC(NumSwitches, "Number of switches unswitched");
+STATISTIC(NumSelects , "Number of selects unswitched");
+STATISTIC(NumTrivial , "Number of unswitches that are trivial");
+STATISTIC(NumSimplify, "Number of simplifications of unswitched code");
+
+// The specific value of 50 here was chosen based only on intuition and a
+// few specific examples.
+static cl::opt<unsigned>
+Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
+          cl::init(50), cl::Hidden);
+  
+namespace {
+  class LoopUnswitch : public LoopPass {
+    LoopInfo *LI;  // Loop information
+    LPPassManager *LPM;
+
+    // LoopProcessWorklist - Used to check if second loop needs processing
+    // after RewriteLoopBodyWithConditionConstant rewrites first loop.
+    std::vector<Loop*> LoopProcessWorklist;
+    SmallPtrSet<Value *,8> UnswitchedVals;
+    
+    bool OptimizeForSize;
+    bool redoLoop;
+
+    Loop *currentLoop;
+    DominatorTree *DT;
+    BasicBlock *loopHeader;
+    BasicBlock *loopPreheader;
+    
+    // LoopBlocks contains all of the basic blocks of the loop, including the
+    // preheader of the loop, the body of the loop, and the exit blocks of the 
+    // loop, in that order.
+    std::vector<BasicBlock*> LoopBlocks;
+    // NewBlocks contained cloned copy of basic blocks from LoopBlocks.
+    std::vector<BasicBlock*> NewBlocks;
+
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    explicit LoopUnswitch(bool Os = false) : 
+      LoopPass(ID), OptimizeForSize(Os), redoLoop(false), 
+      currentLoop(NULL), DT(NULL), loopHeader(NULL),
+      loopPreheader(NULL) {
+        initializeLoopUnswitchPass(*PassRegistry::getPassRegistry());
+      }
+
+    bool runOnLoop(Loop *L, LPPassManager &LPM);
+    bool processCurrentLoop();
+
+    /// This transformation requires natural loop information & requires that
+    /// loop preheaders be inserted into the CFG.
+    ///
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequiredID(LoopSimplifyID);
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
+      AU.addRequiredID(LCSSAID);
+      AU.addPreservedID(LCSSAID);
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<ScalarEvolution>();
+    }
+
+  private:
+
+    virtual void releaseMemory() {
+      UnswitchedVals.clear();
+    }
+
+    /// RemoveLoopFromWorklist - If the specified loop is on the loop worklist,
+    /// remove it.
+    void RemoveLoopFromWorklist(Loop *L) {
+      std::vector<Loop*>::iterator I = std::find(LoopProcessWorklist.begin(),
+                                                 LoopProcessWorklist.end(), L);
+      if (I != LoopProcessWorklist.end())
+        LoopProcessWorklist.erase(I);
+    }
+
+    void initLoopData() {
+      loopHeader = currentLoop->getHeader();
+      loopPreheader = currentLoop->getLoopPreheader();
+    }
+
+    /// Split all of the edges from inside the loop to their exit blocks.
+    /// Update the appropriate Phi nodes as we do so.
+    void SplitExitEdges(Loop *L, const SmallVector<BasicBlock *, 8> &ExitBlocks);
+
+    bool UnswitchIfProfitable(Value *LoopCond, Constant *Val);
+    void UnswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val,
+                                  BasicBlock *ExitBlock);
+    void UnswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L);
+
+    void RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
+                                              Constant *Val, bool isEqual);
+
+    void EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
+                                        BasicBlock *TrueDest, 
+                                        BasicBlock *FalseDest,
+                                        Instruction *InsertPt);
+
+    void SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L);
+    void RemoveBlockIfDead(BasicBlock *BB,
+                           std::vector<Instruction*> &Worklist, Loop *l);
+    void RemoveLoopFromHierarchy(Loop *L);
+    bool IsTrivialUnswitchCondition(Value *Cond, Constant **Val = 0,
+                                    BasicBlock **LoopExit = 0);
+
+  };
+}
+char LoopUnswitch::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",
+                      false, false)
+
+Pass *llvm::createLoopUnswitchPass(bool Os) { 
+  return new LoopUnswitch(Os); 
+}
+
+/// FindLIVLoopCondition - Cond is a condition that occurs in L.  If it is
+/// invariant in the loop, or has an invariant piece, return the invariant.
+/// Otherwise, return null.
+static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
+  // We can never unswitch on vector conditions.
+  if (Cond->getType()->isVectorTy())
+    return 0;
+
+  // Constants should be folded, not unswitched on!
+  if (isa<Constant>(Cond)) return 0;
+
+  // TODO: Handle: br (VARIANT|INVARIANT).
+
+  // Hoist simple values out.
+  if (L->makeLoopInvariant(Cond, Changed))
+    return Cond;
+
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond))
+    if (BO->getOpcode() == Instruction::And ||
+        BO->getOpcode() == Instruction::Or) {
+      // If either the left or right side is invariant, we can unswitch on this,
+      // which will cause the branch to go away in one loop and the condition to
+      // simplify in the other one.
+      if (Value *LHS = FindLIVLoopCondition(BO->getOperand(0), L, Changed))
+        return LHS;
+      if (Value *RHS = FindLIVLoopCondition(BO->getOperand(1), L, Changed))
+        return RHS;
+    }
+  
+  return 0;
+}
+
+bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
+  LI = &getAnalysis<LoopInfo>();
+  LPM = &LPM_Ref;
+  DT = getAnalysisIfAvailable<DominatorTree>();
+  currentLoop = L;
+  Function *F = currentLoop->getHeader()->getParent();
+  bool Changed = false;
+  do {
+    assert(currentLoop->isLCSSAForm(*DT));
+    redoLoop = false;
+    Changed |= processCurrentLoop();
+  } while(redoLoop);
+
+  if (Changed) {
+    // FIXME: Reconstruct dom info, because it is not preserved properly.
+    if (DT)
+      DT->runOnFunction(*F);
+  }
+  return Changed;
+}
+
+/// processCurrentLoop - Do actual work and unswitch loop if possible 
+/// and profitable.
+bool LoopUnswitch::processCurrentLoop() {
+  bool Changed = false;
+  LLVMContext &Context = currentLoop->getHeader()->getContext();
+
+  // Loop over all of the basic blocks in the loop.  If we find an interior
+  // block that is branching on a loop-invariant condition, we can unswitch this
+  // loop.
+  for (Loop::block_iterator I = currentLoop->block_begin(), 
+         E = currentLoop->block_end(); I != E; ++I) {
+    TerminatorInst *TI = (*I)->getTerminator();
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      // If this isn't branching on an invariant condition, we can't unswitch
+      // it.
+      if (BI->isConditional()) {
+        // See if this, or some part of it, is loop invariant.  If so, we can
+        // unswitch on it if we desire.
+        Value *LoopCond = FindLIVLoopCondition(BI->getCondition(), 
+                                               currentLoop, Changed);
+        if (LoopCond && UnswitchIfProfitable(LoopCond, 
+                                             ConstantInt::getTrue(Context))) {
+          ++NumBranches;
+          return true;
+        }
+      }      
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+      Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), 
+                                             currentLoop, Changed);
+      if (LoopCond && SI->getNumCases() > 1) {
+        // Find a value to unswitch on:
+        // FIXME: this should chose the most expensive case!
+        Constant *UnswitchVal = SI->getCaseValue(1);
+        // Do not process same value again and again.
+        if (!UnswitchedVals.insert(UnswitchVal))
+          continue;
+
+        if (UnswitchIfProfitable(LoopCond, UnswitchVal)) {
+          ++NumSwitches;
+          return true;
+        }
+      }
+    }
+    
+    // Scan the instructions to check for unswitchable values.
+    for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); 
+         BBI != E; ++BBI)
+      if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) {
+        Value *LoopCond = FindLIVLoopCondition(SI->getCondition(), 
+                                               currentLoop, Changed);
+        if (LoopCond && UnswitchIfProfitable(LoopCond, 
+                                             ConstantInt::getTrue(Context))) {
+          ++NumSelects;
+          return true;
+        }
+      }
+  }
+  return Changed;
+}
+
+/// isTrivialLoopExitBlock - Check to see if all paths from BB exit the
+/// loop with no side effects (including infinite loops).
+///
+/// If true, we return true and set ExitBB to the block we
+/// exit through.
+///
+static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB,
+                                         BasicBlock *&ExitBB,
+                                         std::set<BasicBlock*> &Visited) {
+  if (!Visited.insert(BB).second) {
+    // Already visited. Without more analysis, this could indicate an infinte loop.
+    return false;
+  } else if (!L->contains(BB)) {
+    // Otherwise, this is a loop exit, this is fine so long as this is the
+    // first exit.
+    if (ExitBB != 0) return false;
+    ExitBB = BB;
+    return true;
+  }
+  
+  // Otherwise, this is an unvisited intra-loop node.  Check all successors.
+  for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) {
+    // Check to see if the successor is a trivial loop exit.
+    if (!isTrivialLoopExitBlockHelper(L, *SI, ExitBB, Visited))
+      return false;
+  }
+
+  // Okay, everything after this looks good, check to make sure that this block
+  // doesn't include any side effects.
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+    if (I->mayHaveSideEffects())
+      return false;
+  
+  return true;
+}
+
+/// isTrivialLoopExitBlock - Return true if the specified block unconditionally
+/// leads to an exit from the specified loop, and has no side-effects in the 
+/// process.  If so, return the block that is exited to, otherwise return null.
+static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) {
+  std::set<BasicBlock*> Visited;
+  Visited.insert(L->getHeader());  // Branches to header make infinite loops.
+  BasicBlock *ExitBB = 0;
+  if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited))
+    return ExitBB;
+  return 0;
+}
+
+/// IsTrivialUnswitchCondition - Check to see if this unswitch condition is
+/// trivial: that is, that the condition controls whether or not the loop does
+/// anything at all.  If this is a trivial condition, unswitching produces no
+/// code duplications (equivalently, it produces a simpler loop and a new empty
+/// loop, which gets deleted).
+///
+/// If this is a trivial condition, return true, otherwise return false.  When
+/// returning true, this sets Cond and Val to the condition that controls the
+/// trivial condition: when Cond dynamically equals Val, the loop is known to
+/// exit.  Finally, this sets LoopExit to the BB that the loop exits to when
+/// Cond == Val.
+///
+bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
+                                       BasicBlock **LoopExit) {
+  BasicBlock *Header = currentLoop->getHeader();
+  TerminatorInst *HeaderTerm = Header->getTerminator();
+  LLVMContext &Context = Header->getContext();
+  
+  BasicBlock *LoopExitBB = 0;
+  if (BranchInst *BI = dyn_cast<BranchInst>(HeaderTerm)) {
+    // If the header block doesn't end with a conditional branch on Cond, we
+    // can't handle it.
+    if (!BI->isConditional() || BI->getCondition() != Cond)
+      return false;
+  
+    // Check to see if a successor of the branch is guaranteed to 
+    // exit through a unique exit block without having any 
+    // side-effects.  If so, determine the value of Cond that causes it to do
+    // this.
+    if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
+                                             BI->getSuccessor(0)))) {
+      if (Val) *Val = ConstantInt::getTrue(Context);
+    } else if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
+                                                    BI->getSuccessor(1)))) {
+      if (Val) *Val = ConstantInt::getFalse(Context);
+    }
+  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(HeaderTerm)) {
+    // If this isn't a switch on Cond, we can't handle it.
+    if (SI->getCondition() != Cond) return false;
+    
+    // Check to see if a successor of the switch is guaranteed to go to the
+    // latch block or exit through a one exit block without having any 
+    // side-effects.  If so, determine the value of Cond that causes it to do
+    // this.  Note that we can't trivially unswitch on the default case.
+    for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i)
+      if ((LoopExitBB = isTrivialLoopExitBlock(currentLoop, 
+                                               SI->getSuccessor(i)))) {
+        // Okay, we found a trivial case, remember the value that is trivial.
+        if (Val) *Val = SI->getCaseValue(i);
+        break;
+      }
+  }
+
+  // If we didn't find a single unique LoopExit block, or if the loop exit block
+  // contains phi nodes, this isn't trivial.
+  if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin()))
+    return false;   // Can't handle this.
+  
+  if (LoopExit) *LoopExit = LoopExitBB;
+  
+  // We already know that nothing uses any scalar values defined inside of this
+  // loop.  As such, we just have to check to see if this loop will execute any
+  // side-effecting instructions (e.g. stores, calls, volatile loads) in the
+  // part of the loop that the code *would* execute.  We already checked the
+  // tail, check the header now.
+  for (BasicBlock::iterator I = Header->begin(), E = Header->end(); I != E; ++I)
+    if (I->mayHaveSideEffects())
+      return false;
+  return true;
+}
+
+/// UnswitchIfProfitable - We have found that we can unswitch currentLoop when
+/// LoopCond == Val to simplify the loop.  If we decide that this is profitable,
+/// unswitch the loop, reprocess the pieces, then return true.
+bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
+
+  initLoopData();
+
+  // If LoopSimplify was unable to form a preheader, don't do any unswitching.
+  if (!loopPreheader)
+    return false;
+
+  Function *F = loopHeader->getParent();
+
+  Constant *CondVal = 0;
+  BasicBlock *ExitBlock = 0;
+  if (IsTrivialUnswitchCondition(LoopCond, &CondVal, &ExitBlock)) {
+    // If the condition is trivial, always unswitch. There is no code growth
+    // for this case.
+    UnswitchTrivialCondition(currentLoop, LoopCond, CondVal, ExitBlock);
+    return true;
+  }
+
+  // Check to see if it would be profitable to unswitch current loop.
+
+  // Do not do non-trivial unswitch while optimizing for size.
+  if (OptimizeForSize || F->hasFnAttr(Attribute::OptimizeForSize))
+    return false;
+
+  // FIXME: This is overly conservative because it does not take into
+  // consideration code simplification opportunities and code that can
+  // be shared by the resultant unswitched loops.
+  CodeMetrics Metrics;
+  for (Loop::block_iterator I = currentLoop->block_begin(), 
+         E = currentLoop->block_end();
+       I != E; ++I)
+    Metrics.analyzeBasicBlock(*I);
+
+  // Limit the number of instructions to avoid causing significant code
+  // expansion, and the number of basic blocks, to avoid loops with
+  // large numbers of branches which cause loop unswitching to go crazy.
+  // This is a very ad-hoc heuristic.
+  if (Metrics.NumInsts > Threshold ||
+      Metrics.NumBlocks * 5 > Threshold ||
+      Metrics.containsIndirectBr || Metrics.isRecursive) {
+    DEBUG(dbgs() << "NOT unswitching loop %"
+          << currentLoop->getHeader()->getName() << ", cost too high: "
+          << currentLoop->getBlocks().size() << "\n");
+    return false;
+  }
+
+  UnswitchNontrivialCondition(LoopCond, Val, currentLoop);
+  return true;
+}
+
+/// CloneLoop - Recursively clone the specified loop and all of its children,
+/// mapping the blocks with the specified map.
+static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
+                       LoopInfo *LI, LPPassManager *LPM) {
+  Loop *New = new Loop();
+  LPM->insertLoop(New, PL);
+
+  // Add all of the blocks in L to the new loop.
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+       I != E; ++I)
+    if (LI->getLoopFor(*I) == L)
+      New->addBasicBlockToLoop(cast<BasicBlock>(VM[*I]), LI->getBase());
+
+  // Add all of the subloops to the new loop.
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    CloneLoop(*I, New, VM, LI, LPM);
+
+  return New;
+}
+
+/// EmitPreheaderBranchOnCondition - Emit a conditional branch on two values
+/// if LIC == Val, branch to TrueDst, otherwise branch to FalseDest.  Insert the
+/// code immediately before InsertPt.
+void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
+                                                  BasicBlock *TrueDest,
+                                                  BasicBlock *FalseDest,
+                                                  Instruction *InsertPt) {
+  // Insert a conditional branch on LIC to the two preheaders.  The original
+  // code is the true version and the new code is the false version.
+  Value *BranchVal = LIC;
+  if (!isa<ConstantInt>(Val) ||
+      Val->getType() != Type::getInt1Ty(LIC->getContext()))
+    BranchVal = new ICmpInst(InsertPt, ICmpInst::ICMP_EQ, LIC, Val, "tmp");
+  else if (Val != ConstantInt::getTrue(Val->getContext()))
+    // We want to enter the new loop when the condition is true.
+    std::swap(TrueDest, FalseDest);
+
+  // Insert the new branch.
+  BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt);
+
+  // If either edge is critical, split it. This helps preserve LoopSimplify
+  // form for enclosing loops.
+  SplitCriticalEdge(BI, 0, this);
+  SplitCriticalEdge(BI, 1, this);
+}
+
+/// UnswitchTrivialCondition - Given a loop that has a trivial unswitchable
+/// condition in it (a cond branch from its header block to its latch block,
+/// where the path through the loop that doesn't execute its body has no 
+/// side-effects), unswitch it.  This doesn't involve any code duplication, just
+/// moving the conditional branch outside of the loop and updating loop info.
+void LoopUnswitch::UnswitchTrivialCondition(Loop *L, Value *Cond, 
+                                            Constant *Val, 
+                                            BasicBlock *ExitBlock) {
+  DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"
+        << loopHeader->getName() << " [" << L->getBlocks().size()
+        << " blocks] in Function " << L->getHeader()->getParent()->getName()
+        << " on cond: " << *Val << " == " << *Cond << "\n");
+  
+  // First step, split the preheader, so that we know that there is a safe place
+  // to insert the conditional branch.  We will change loopPreheader to have a
+  // conditional branch on Cond.
+  BasicBlock *NewPH = SplitEdge(loopPreheader, loopHeader, this);
+
+  // Now that we have a place to insert the conditional branch, create a place
+  // to branch to: this is the exit block out of the loop that we should
+  // short-circuit to.
+  
+  // Split this block now, so that the loop maintains its exit block, and so
+  // that the jump from the preheader can execute the contents of the exit block
+  // without actually branching to it (the exit block should be dominated by the
+  // loop header, not the preheader).
+  assert(!L->contains(ExitBlock) && "Exit block is in the loop?");
+  BasicBlock *NewExit = SplitBlock(ExitBlock, ExitBlock->begin(), this);
+    
+  // Okay, now we have a position to branch from and a position to branch to, 
+  // insert the new conditional branch.
+  EmitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, 
+                                 loopPreheader->getTerminator());
+  LPM->deleteSimpleAnalysisValue(loopPreheader->getTerminator(), L);
+  loopPreheader->getTerminator()->eraseFromParent();
+
+  // We need to reprocess this loop, it could be unswitched again.
+  redoLoop = true;
+  
+  // Now that we know that the loop is never entered when this condition is a
+  // particular value, rewrite the loop with this info.  We know that this will
+  // at least eliminate the old branch.
+  RewriteLoopBodyWithConditionConstant(L, Cond, Val, false);
+  ++NumTrivial;
+}
+
+/// SplitExitEdges - Split all of the edges from inside the loop to their exit
+/// blocks.  Update the appropriate Phi nodes as we do so.
+void LoopUnswitch::SplitExitEdges(Loop *L, 
+                                const SmallVector<BasicBlock *, 8> &ExitBlocks){
+
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+    BasicBlock *ExitBlock = ExitBlocks[i];
+    SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock),
+                                       pred_end(ExitBlock));
+    SplitBlockPredecessors(ExitBlock, Preds.data(), Preds.size(),
+                           ".us-lcssa", this);
+  }
+}
+
+/// UnswitchNontrivialCondition - We determined that the loop is profitable 
+/// to unswitch when LIC equal Val.  Split it into loop versions and test the 
+/// condition outside of either loop.  Return the loops created as Out1/Out2.
+void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, 
+                                               Loop *L) {
+  Function *F = loopHeader->getParent();
+  DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"
+        << loopHeader->getName() << " [" << L->getBlocks().size()
+        << " blocks] in Function " << F->getName()
+        << " when '" << *Val << "' == " << *LIC << "\n");
+
+  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
+    SE->forgetLoop(L);
+
+  LoopBlocks.clear();
+  NewBlocks.clear();
+
+  // First step, split the preheader and exit blocks, and add these blocks to
+  // the LoopBlocks list.
+  BasicBlock *NewPreheader = SplitEdge(loopPreheader, loopHeader, this);
+  LoopBlocks.push_back(NewPreheader);
+
+  // We want the loop to come after the preheader, but before the exit blocks.
+  LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
+
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getUniqueExitBlocks(ExitBlocks);
+
+  // Split all of the edges from inside the loop to their exit blocks.  Update
+  // the appropriate Phi nodes as we do so.
+  SplitExitEdges(L, ExitBlocks);
+
+  // The exit blocks may have been changed due to edge splitting, recompute.
+  ExitBlocks.clear();
+  L->getUniqueExitBlocks(ExitBlocks);
+
+  // Add exit blocks to the loop blocks.
+  LoopBlocks.insert(LoopBlocks.end(), ExitBlocks.begin(), ExitBlocks.end());
+
+  // Next step, clone all of the basic blocks that make up the loop (including
+  // the loop preheader and exit blocks), keeping track of the mapping between
+  // the instructions and blocks.
+  NewBlocks.reserve(LoopBlocks.size());
+  ValueToValueMapTy VMap;
+  for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
+    BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
+    NewBlocks.push_back(NewBB);
+    VMap[LoopBlocks[i]] = NewBB;  // Keep the BB mapping.
+    LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
+  }
+
+  // Splice the newly inserted blocks into the function right before the
+  // original preheader.
+  F->getBasicBlockList().splice(NewPreheader, F->getBasicBlockList(),
+                                NewBlocks[0], F->end());
+
+  // Now we create the new Loop object for the versioned loop.
+  Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
+  Loop *ParentLoop = L->getParentLoop();
+  if (ParentLoop) {
+    // Make sure to add the cloned preheader and exit blocks to the parent loop
+    // as well.
+    ParentLoop->addBasicBlockToLoop(NewBlocks[0], LI->getBase());
+  }
+  
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+    BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
+    // The new exit block should be in the same loop as the old one.
+    if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
+      ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
+    
+    assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&
+           "Exit block should have been split to have one successor!");
+    BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0);
+
+    // If the successor of the exit block had PHI nodes, add an entry for
+    // NewExit.
+    PHINode *PN;
+    for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
+      PN = cast<PHINode>(I);
+      Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
+      ValueToValueMapTy::iterator It = VMap.find(V);
+      if (It != VMap.end()) V = It->second;
+      PN->addIncoming(V, NewExit);
+    }
+  }
+
+  // Rewrite the code to refer to itself.
+  for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
+    for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+           E = NewBlocks[i]->end(); I != E; ++I)
+      RemapInstruction(I, VMap,RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
+  
+  // Rewrite the original preheader to select between versions of the loop.
+  BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
+  assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&
+         "Preheader splitting did not work correctly!");
+
+  // Emit the new branch that selects between the two versions of this loop.
+  EmitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR);
+  LPM->deleteSimpleAnalysisValue(OldBR, L);
+  OldBR->eraseFromParent();
+
+  LoopProcessWorklist.push_back(NewLoop);
+  redoLoop = true;
+
+  // Keep a WeakVH holding onto LIC.  If the first call to RewriteLoopBody
+  // deletes the instruction (for example by simplifying a PHI that feeds into
+  // the condition that we're unswitching on), we don't rewrite the second
+  // iteration.
+  WeakVH LICHandle(LIC);
+  
+  // Now we rewrite the original code to know that the condition is true and the
+  // new code to know that the condition is false.
+  RewriteLoopBodyWithConditionConstant(L, LIC, Val, false);
+
+  // It's possible that simplifying one loop could cause the other to be
+  // changed to another value or a constant.  If its a constant, don't simplify
+  // it.
+  if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop &&
+      LICHandle && !isa<Constant>(LICHandle))
+    RewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, true);
+}
+
+/// RemoveFromWorklist - Remove all instances of I from the worklist vector
+/// specified.
+static void RemoveFromWorklist(Instruction *I, 
+                               std::vector<Instruction*> &Worklist) {
+  std::vector<Instruction*>::iterator WI = std::find(Worklist.begin(),
+                                                     Worklist.end(), I);
+  while (WI != Worklist.end()) {
+    unsigned Offset = WI-Worklist.begin();
+    Worklist.erase(WI);
+    WI = std::find(Worklist.begin()+Offset, Worklist.end(), I);
+  }
+}
+
+/// ReplaceUsesOfWith - When we find that I really equals V, remove I from the
+/// program, replacing all uses with V and update the worklist.
+static void ReplaceUsesOfWith(Instruction *I, Value *V, 
+                              std::vector<Instruction*> &Worklist,
+                              Loop *L, LPPassManager *LPM) {
+  DEBUG(dbgs() << "Replace with '" << *V << "': " << *I);
+
+  // Add uses to the worklist, which may be dead now.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
+      Worklist.push_back(Use);
+
+  // Add users to the worklist which may be simplified now.
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+       UI != E; ++UI)
+    Worklist.push_back(cast<Instruction>(*UI));
+  LPM->deleteSimpleAnalysisValue(I, L);
+  RemoveFromWorklist(I, Worklist);
+  I->replaceAllUsesWith(V);
+  I->eraseFromParent();
+  ++NumSimplify;
+}
+
+/// RemoveBlockIfDead - If the specified block is dead, remove it, update loop
+/// information, and remove any dead successors it has.
+///
+void LoopUnswitch::RemoveBlockIfDead(BasicBlock *BB,
+                                     std::vector<Instruction*> &Worklist,
+                                     Loop *L) {
+  if (pred_begin(BB) != pred_end(BB)) {
+    // This block isn't dead, since an edge to BB was just removed, see if there
+    // are any easy simplifications we can do now.
+    if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+      // If it has one pred, fold phi nodes in BB.
+      while (isa<PHINode>(BB->begin()))
+        ReplaceUsesOfWith(BB->begin(), 
+                          cast<PHINode>(BB->begin())->getIncomingValue(0), 
+                          Worklist, L, LPM);
+      
+      // If this is the header of a loop and the only pred is the latch, we now
+      // have an unreachable loop.
+      if (Loop *L = LI->getLoopFor(BB))
+        if (loopHeader == BB && L->contains(Pred)) {
+          // Remove the branch from the latch to the header block, this makes
+          // the header dead, which will make the latch dead (because the header
+          // dominates the latch).
+          LPM->deleteSimpleAnalysisValue(Pred->getTerminator(), L);
+          Pred->getTerminator()->eraseFromParent();
+          new UnreachableInst(BB->getContext(), Pred);
+          
+          // The loop is now broken, remove it from LI.
+          RemoveLoopFromHierarchy(L);
+          
+          // Reprocess the header, which now IS dead.
+          RemoveBlockIfDead(BB, Worklist, L);
+          return;
+        }
+      
+      // If pred ends in a uncond branch, add uncond branch to worklist so that
+      // the two blocks will get merged.
+      if (BranchInst *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
+        if (BI->isUnconditional())
+          Worklist.push_back(BI);
+    }
+    return;
+  }
+
+  DEBUG(dbgs() << "Nuking dead block: " << *BB);
+  
+  // Remove the instructions in the basic block from the worklist.
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    RemoveFromWorklist(I, Worklist);
+    
+    // Anything that uses the instructions in this basic block should have their
+    // uses replaced with undefs.
+    // If I is not void type then replaceAllUsesWith undef.
+    // This allows ValueHandlers and custom metadata to adjust itself.
+    if (!I->getType()->isVoidTy())
+      I->replaceAllUsesWith(UndefValue::get(I->getType()));
+  }
+  
+  // If this is the edge to the header block for a loop, remove the loop and
+  // promote all subloops.
+  if (Loop *BBLoop = LI->getLoopFor(BB)) {
+    if (BBLoop->getLoopLatch() == BB)
+      RemoveLoopFromHierarchy(BBLoop);
+  }
+
+  // Remove the block from the loop info, which removes it from any loops it
+  // was in.
+  LI->removeBlock(BB);
+  
+  
+  // Remove phi node entries in successors for this block.
+  TerminatorInst *TI = BB->getTerminator();
+  SmallVector<BasicBlock*, 4> Succs;
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+    Succs.push_back(TI->getSuccessor(i));
+    TI->getSuccessor(i)->removePredecessor(BB);
+  }
+  
+  // Unique the successors, remove anything with multiple uses.
+  array_pod_sort(Succs.begin(), Succs.end());
+  Succs.erase(std::unique(Succs.begin(), Succs.end()), Succs.end());
+  
+  // Remove the basic block, including all of the instructions contained in it.
+  LPM->deleteSimpleAnalysisValue(BB, L);  
+  BB->eraseFromParent();
+  // Remove successor blocks here that are not dead, so that we know we only
+  // have dead blocks in this list.  Nondead blocks have a way of becoming dead,
+  // then getting removed before we revisit them, which is badness.
+  //
+  for (unsigned i = 0; i != Succs.size(); ++i)
+    if (pred_begin(Succs[i]) != pred_end(Succs[i])) {
+      // One exception is loop headers.  If this block was the preheader for a
+      // loop, then we DO want to visit the loop so the loop gets deleted.
+      // We know that if the successor is a loop header, that this loop had to
+      // be the preheader: the case where this was the latch block was handled
+      // above and headers can only have two predecessors.
+      if (!LI->isLoopHeader(Succs[i])) {
+        Succs.erase(Succs.begin()+i);
+        --i;
+      }
+    }
+  
+  for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+    RemoveBlockIfDead(Succs[i], Worklist, L);
+}
+
+/// RemoveLoopFromHierarchy - We have discovered that the specified loop has
+/// become unwrapped, either because the backedge was deleted, or because the
+/// edge into the header was removed.  If the edge into the header from the
+/// latch block was removed, the loop is unwrapped but subloops are still alive,
+/// so they just reparent loops.  If the loops are actually dead, they will be
+/// removed later.
+void LoopUnswitch::RemoveLoopFromHierarchy(Loop *L) {
+  LPM->deleteLoopFromQueue(L);
+  RemoveLoopFromWorklist(L);
+}
+
+// RewriteLoopBodyWithConditionConstant - We know either that the value LIC has
+// the value specified by Val in the specified loop, or we know it does NOT have
+// that value.  Rewrite any uses of LIC or of properties correlated to it.
+void LoopUnswitch::RewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC,
+                                                        Constant *Val,
+                                                        bool IsEqual) {
+  assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?");
+  
+  // FIXME: Support correlated properties, like:
+  //  for (...)
+  //    if (li1 < li2)
+  //      ...
+  //    if (li1 > li2)
+  //      ...
+  
+  // FOLD boolean conditions (X|LIC), (X&LIC).  Fold conditional branches,
+  // selects, switches.
+  std::vector<User*> Users(LIC->use_begin(), LIC->use_end());
+  std::vector<Instruction*> Worklist;
+  LLVMContext &Context = Val->getContext();
+
+
+  // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC
+  // in the loop with the appropriate one directly.
+  if (IsEqual || (isa<ConstantInt>(Val) &&
+      Val->getType()->isIntegerTy(1))) {
+    Value *Replacement;
+    if (IsEqual)
+      Replacement = Val;
+    else
+      Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), 
+                                     !cast<ConstantInt>(Val)->getZExtValue());
+    
+    for (unsigned i = 0, e = Users.size(); i != e; ++i)
+      if (Instruction *U = cast<Instruction>(Users[i])) {
+        if (!L->contains(U))
+          continue;
+        U->replaceUsesOfWith(LIC, Replacement);
+        Worklist.push_back(U);
+      }
+    SimplifyCode(Worklist, L);
+    return;
+  }
+  
+  // Otherwise, we don't know the precise value of LIC, but we do know that it
+  // is certainly NOT "Val".  As such, simplify any uses in the loop that we
+  // can.  This case occurs when we unswitch switch statements.
+  for (unsigned i = 0, e = Users.size(); i != e; ++i) {
+    Instruction *U = cast<Instruction>(Users[i]);
+    if (!L->contains(U))
+      continue;
+
+    Worklist.push_back(U);
+
+    // TODO: We could do other simplifications, for example, turning 
+    // 'icmp eq LIC, Val' -> false.
+
+    // If we know that LIC is not Val, use this info to simplify code.
+    SwitchInst *SI = dyn_cast<SwitchInst>(U);
+    if (SI == 0 || !isa<ConstantInt>(Val)) continue;
+    
+    unsigned DeadCase = SI->findCaseValue(cast<ConstantInt>(Val));
+    if (DeadCase == 0) continue;  // Default case is live for multiple values.
+    
+    // Found a dead case value.  Don't remove PHI nodes in the 
+    // successor if they become single-entry, those PHI nodes may
+    // be in the Users list.
+        
+    // FIXME: This is a hack.  We need to keep the successor around
+    // and hooked up so as to preserve the loop structure, because
+    // trying to update it is complicated.  So instead we preserve the
+    // loop structure and put the block on a dead code path.
+    BasicBlock *Switch = SI->getParent();
+    SplitEdge(Switch, SI->getSuccessor(DeadCase), this);
+    // Compute the successors instead of relying on the return value
+    // of SplitEdge, since it may have split the switch successor
+    // after PHI nodes.
+    BasicBlock *NewSISucc = SI->getSuccessor(DeadCase);
+    BasicBlock *OldSISucc = *succ_begin(NewSISucc);
+    // Create an "unreachable" destination.
+    BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable",
+                                           Switch->getParent(),
+                                           OldSISucc);
+    new UnreachableInst(Context, Abort);
+    // Force the new case destination to branch to the "unreachable"
+    // block while maintaining a (dead) CFG edge to the old block.
+    NewSISucc->getTerminator()->eraseFromParent();
+    BranchInst::Create(Abort, OldSISucc,
+                       ConstantInt::getTrue(Context), NewSISucc);
+    // Release the PHI operands for this edge.
+    for (BasicBlock::iterator II = NewSISucc->begin();
+         PHINode *PN = dyn_cast<PHINode>(II); ++II)
+      PN->setIncomingValue(PN->getBasicBlockIndex(Switch),
+                           UndefValue::get(PN->getType()));
+    // Tell the domtree about the new block. We don't fully update the
+    // domtree here -- instead we force it to do a full recomputation
+    // after the pass is complete -- but we do need to inform it of
+    // new blocks.
+    if (DT)
+      DT->addNewBlock(Abort, NewSISucc);
+  }
+  
+  SimplifyCode(Worklist, L);
+}
+
+/// SimplifyCode - Okay, now that we have simplified some instructions in the
+/// loop, walk over it and constant prop, dce, and fold control flow where
+/// possible.  Note that this is effectively a very simple loop-structure-aware
+/// optimizer.  During processing of this loop, L could very well be deleted, so
+/// it must not be used.
+///
+/// FIXME: When the loop optimizer is more mature, separate this out to a new
+/// pass.
+///
+void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.back();
+    Worklist.pop_back();
+
+    // Simple DCE.
+    if (isInstructionTriviallyDead(I)) {
+      DEBUG(dbgs() << "Remove dead instruction '" << *I);
+      
+      // Add uses to the worklist, which may be dead now.
+      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+        if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i)))
+          Worklist.push_back(Use);
+      LPM->deleteSimpleAnalysisValue(I, L);
+      RemoveFromWorklist(I, Worklist);
+      I->eraseFromParent();
+      ++NumSimplify;
+      continue;
+    }
+
+    // See if instruction simplification can hack this up.  This is common for
+    // things like "select false, X, Y" after unswitching made the condition be
+    // 'false'.
+    if (Value *V = SimplifyInstruction(I, 0, DT))
+      if (LI->replacementPreservesLCSSAForm(I, V)) {
+        ReplaceUsesOfWith(I, V, Worklist, L, LPM);
+        continue;
+      }
+
+    // Special case hacks that appear commonly in unswitched code.
+    if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+      if (BI->isUnconditional()) {
+        // If BI's parent is the only pred of the successor, fold the two blocks
+        // together.
+        BasicBlock *Pred = BI->getParent();
+        BasicBlock *Succ = BI->getSuccessor(0);
+        BasicBlock *SinglePred = Succ->getSinglePredecessor();
+        if (!SinglePred) continue;  // Nothing to do.
+        assert(SinglePred == Pred && "CFG broken");
+
+        DEBUG(dbgs() << "Merging blocks: " << Pred->getName() << " <- " 
+              << Succ->getName() << "\n");
+        
+        // Resolve any single entry PHI nodes in Succ.
+        while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
+          ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
+        
+        // Move all of the successor contents from Succ to Pred.
+        Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
+                                   Succ->end());
+        LPM->deleteSimpleAnalysisValue(BI, L);
+        BI->eraseFromParent();
+        RemoveFromWorklist(BI, Worklist);
+        
+        // If Succ has any successors with PHI nodes, update them to have
+        // entries coming from Pred instead of Succ.
+        Succ->replaceAllUsesWith(Pred);
+        
+        // Remove Succ from the loop tree.
+        LI->removeBlock(Succ);
+        LPM->deleteSimpleAnalysisValue(Succ, L);
+        Succ->eraseFromParent();
+        ++NumSimplify;
+        continue;
+      }
+      
+      if (ConstantInt *CB = dyn_cast<ConstantInt>(BI->getCondition())){
+        // Conditional branch.  Turn it into an unconditional branch, then
+        // remove dead blocks.
+        continue;  // FIXME: Enable.
+
+        DEBUG(dbgs() << "Folded branch: " << *BI);
+        BasicBlock *DeadSucc = BI->getSuccessor(CB->getZExtValue());
+        BasicBlock *LiveSucc = BI->getSuccessor(!CB->getZExtValue());
+        DeadSucc->removePredecessor(BI->getParent(), true);
+        Worklist.push_back(BranchInst::Create(LiveSucc, BI));
+        LPM->deleteSimpleAnalysisValue(BI, L);
+        BI->eraseFromParent();
+        RemoveFromWorklist(BI, Worklist);
+        ++NumSimplify;
+
+        RemoveBlockIfDead(DeadSucc, Worklist, L);
+      }
+      continue;
+    }
+  }
+}
diff --git a/final/lib/Transforms/Scalar/LowerAtomic.cpp b/final/lib/Transforms/Scalar/LowerAtomic.cpp
new file mode 100644
index 00000000000..9087b46c138
--- /dev/null
+++ b/final/lib/Transforms/Scalar/LowerAtomic.cpp
@@ -0,0 +1,139 @@
+//===- LowerAtomic.cpp - Lower atomic intrinsics --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers atomic intrinsics to non-atomic form for use in a known
+// non-preemptible environment.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loweratomic"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IRBuilder.h"
+using namespace llvm;
+
+static bool LowerAtomicIntrinsic(IntrinsicInst *II) {
+  IRBuilder<> Builder(II->getParent(), II);
+  unsigned IID = II->getIntrinsicID();
+  switch (IID) {
+  case Intrinsic::memory_barrier:
+    break;
+
+  case Intrinsic::atomic_load_add:
+  case Intrinsic::atomic_load_sub:
+  case Intrinsic::atomic_load_and:
+  case Intrinsic::atomic_load_nand:
+  case Intrinsic::atomic_load_or:
+  case Intrinsic::atomic_load_xor:
+  case Intrinsic::atomic_load_max:
+  case Intrinsic::atomic_load_min:
+  case Intrinsic::atomic_load_umax:
+  case Intrinsic::atomic_load_umin: {
+    Value *Ptr = II->getArgOperand(0), *Delta = II->getArgOperand(1);
+
+    LoadInst *Orig = Builder.CreateLoad(Ptr);
+    Value *Res = NULL;
+    switch (IID) {
+    default: assert(0 && "Unrecognized atomic modify operation");
+    case Intrinsic::atomic_load_add:
+      Res = Builder.CreateAdd(Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_sub:
+      Res = Builder.CreateSub(Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_and:
+      Res = Builder.CreateAnd(Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_nand:
+      Res = Builder.CreateNot(Builder.CreateAnd(Orig, Delta));
+      break;
+    case Intrinsic::atomic_load_or:
+      Res = Builder.CreateOr(Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_xor:
+      Res = Builder.CreateXor(Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_max:
+      Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+                                 Delta, Orig);
+      break;
+    case Intrinsic::atomic_load_min:
+      Res = Builder.CreateSelect(Builder.CreateICmpSLT(Orig, Delta),
+                                 Orig, Delta);
+      break;
+    case Intrinsic::atomic_load_umax:
+      Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+                                 Delta, Orig);
+      break;
+    case Intrinsic::atomic_load_umin:
+      Res = Builder.CreateSelect(Builder.CreateICmpULT(Orig, Delta),
+                                 Orig, Delta);
+      break;
+    }
+    Builder.CreateStore(Res, Ptr);
+
+    II->replaceAllUsesWith(Orig);
+    break;
+  }
+
+  case Intrinsic::atomic_swap: {
+    Value *Ptr = II->getArgOperand(0), *Val = II->getArgOperand(1);
+    LoadInst *Orig = Builder.CreateLoad(Ptr);
+    Builder.CreateStore(Val, Ptr);
+    II->replaceAllUsesWith(Orig);
+    break;
+  }
+
+  case Intrinsic::atomic_cmp_swap: {
+    Value *Ptr = II->getArgOperand(0), *Cmp = II->getArgOperand(1);
+    Value *Val = II->getArgOperand(2);
+
+    LoadInst *Orig = Builder.CreateLoad(Ptr);
+    Value *Equal = Builder.CreateICmpEQ(Orig, Cmp);
+    Value *Res = Builder.CreateSelect(Equal, Val, Orig);
+    Builder.CreateStore(Res, Ptr);
+    II->replaceAllUsesWith(Orig);
+    break;
+  }
+
+  default:
+    return false;
+  }
+
+  assert(II->use_empty() &&
+         "Lowering should have eliminated any uses of the intrinsic call!");
+  II->eraseFromParent();
+
+  return true;
+}
+
+namespace {
+  struct LowerAtomic : public BasicBlockPass {
+    static char ID;
+    LowerAtomic() : BasicBlockPass(ID) {
+      initializeLowerAtomicPass(*PassRegistry::getPassRegistry());
+    }
+    bool runOnBasicBlock(BasicBlock &BB) {
+      bool Changed = false;
+      for (BasicBlock::iterator DI = BB.begin(), DE = BB.end(); DI != DE; )
+        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(DI++))
+          Changed |= LowerAtomicIntrinsic(II);
+      return Changed;
+    }
+  };
+}
+
+char LowerAtomic::ID = 0;
+INITIALIZE_PASS(LowerAtomic, "loweratomic",
+                "Lower atomic intrinsics to non-atomic form",
+                false, false)
+
+Pass *llvm::createLowerAtomicPass() { return new LowerAtomic(); }
diff --git a/final/lib/Transforms/Scalar/Makefile b/final/lib/Transforms/Scalar/Makefile
new file mode 100644
index 00000000000..cc42fd00ac7
--- /dev/null
+++ b/final/lib/Transforms/Scalar/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/Scalar/Makefile ----------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMScalarOpts
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/final/lib/Transforms/Scalar/MemCpyOptimizer.cpp
new file mode 100644
index 00000000000..bde0e5316c3
--- /dev/null
+++ b/final/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -0,0 +1,946 @@
+//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs various transformations related to eliminating memcpy
+// calls, or transforming sets of stores into memset's.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "memcpyopt"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
+#include <list>
+using namespace llvm;
+
+STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
+STATISTIC(NumMemSetInfer, "Number of memsets inferred");
+STATISTIC(NumMoveToCpy,   "Number of memmoves converted to memcpy");
+STATISTIC(NumCpyToSet,    "Number of memcpys converted to memset");
+
+static int64_t GetOffsetFromIndex(const GetElementPtrInst *GEP, unsigned Idx,
+                                  bool &VariableIdxFound, const TargetData &TD){
+  // Skip over the first indices.
+  gep_type_iterator GTI = gep_type_begin(GEP);
+  for (unsigned i = 1; i != Idx; ++i, ++GTI)
+    /*skip along*/;
+  
+  // Compute the offset implied by the rest of the indices.
+  int64_t Offset = 0;
+  for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
+    ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
+    if (OpC == 0)
+      return VariableIdxFound = true;
+    if (OpC->isZero()) continue;  // No offset.
+
+    // Handle struct indices, which add their field offset to the pointer.
+    if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+      Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+      continue;
+    }
+    
+    // Otherwise, we have a sequential type like an array or vector.  Multiply
+    // the index by the ElementSize.
+    uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType());
+    Offset += Size*OpC->getSExtValue();
+  }
+
+  return Offset;
+}
+
+/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
+/// constant offset, and return that constant offset.  For example, Ptr1 might
+/// be &A[42], and Ptr2 might be &A[40].  In this case offset would be -8.
+static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
+                            const TargetData &TD) {
+  Ptr1 = Ptr1->stripPointerCasts();
+  Ptr2 = Ptr2->stripPointerCasts();
+  GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(Ptr1);
+  GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(Ptr2);
+  
+  bool VariableIdxFound = false;
+
+  // If one pointer is a GEP and the other isn't, then see if the GEP is a
+  // constant offset from the base, as in "P" and "gep P, 1".
+  if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
+    Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD);
+    return !VariableIdxFound;
+  }
+
+  if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
+    Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD);
+    return !VariableIdxFound;
+  }
+  
+  // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
+  // base.  After that base, they may have some number of common (and
+  // potentially variable) indices.  After that they handle some constant
+  // offset, which determines their offset from each other.  At this point, we
+  // handle no other case.
+  if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
+    return false;
+  
+  // Skip any common indices and track the GEP types.
+  unsigned Idx = 1;
+  for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
+    if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
+      break;
+
+  int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD);
+  int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD);
+  if (VariableIdxFound) return false;
+  
+  Offset = Offset2-Offset1;
+  return true;
+}
+
+
+/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
+/// This allows us to analyze stores like:
+///   store 0 -> P+1
+///   store 0 -> P+0
+///   store 0 -> P+3
+///   store 0 -> P+2
+/// which sometimes happens with stores to arrays of structs etc.  When we see
+/// the first store, we make a range [1, 2).  The second store extends the range
+/// to [0, 2).  The third makes a new range [2, 3).  The fourth store joins the
+/// two ranges into [0, 3) which is memset'able.
+namespace {
+struct MemsetRange {
+  // Start/End - A semi range that describes the span that this range covers.
+  // The range is closed at the start and open at the end: [Start, End).  
+  int64_t Start, End;
+
+  /// StartPtr - The getelementptr instruction that points to the start of the
+  /// range.
+  Value *StartPtr;
+  
+  /// Alignment - The known alignment of the first store.
+  unsigned Alignment;
+  
+  /// TheStores - The actual stores that make up this range.
+  SmallVector<Instruction*, 16> TheStores;
+  
+  bool isProfitableToUseMemset(const TargetData &TD) const;
+
+};
+} // end anon namespace
+
+bool MemsetRange::isProfitableToUseMemset(const TargetData &TD) const {
+  // If we found more than 8 stores to merge or 64 bytes, use memset.
+  if (TheStores.size() >= 8 || End-Start >= 64) return true;
+
+  // If there is nothing to merge, don't do anything.
+  if (TheStores.size() < 2) return false;
+  
+  // If any of the stores are a memset, then it is always good to extend the
+  // memset.
+  for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
+    if (!isa<StoreInst>(TheStores[i]))
+      return true;
+  
+  // Assume that the code generator is capable of merging pairs of stores
+  // together if it wants to.
+  if (TheStores.size() == 2) return false;
+  
+  // If we have fewer than 8 stores, it can still be worthwhile to do this.
+  // For example, merging 4 i8 stores into an i32 store is useful almost always.
+  // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
+  // memset will be split into 2 32-bit stores anyway) and doing so can
+  // pessimize the llvm optimizer.
+  //
+  // Since we don't have perfect knowledge here, make some assumptions: assume
+  // the maximum GPR width is the same size as the pointer size and assume that
+  // this width can be stored.  If so, check to see whether we will end up
+  // actually reducing the number of stores used.
+  unsigned Bytes = unsigned(End-Start);
+  unsigned NumPointerStores = Bytes/TD.getPointerSize();
+  
+  // Assume the remaining bytes if any are done a byte at a time.
+  unsigned NumByteStores = Bytes - NumPointerStores*TD.getPointerSize();
+  
+  // If we will reduce the # stores (according to this heuristic), do the
+  // transformation.  This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
+  // etc.
+  return TheStores.size() > NumPointerStores+NumByteStores;
+}    
+
+
+namespace {
+class MemsetRanges {
+  /// Ranges - A sorted list of the memset ranges.  We use std::list here
+  /// because each element is relatively large and expensive to copy.
+  std::list<MemsetRange> Ranges;
+  typedef std::list<MemsetRange>::iterator range_iterator;
+  const TargetData &TD;
+public:
+  MemsetRanges(const TargetData &td) : TD(td) {}
+  
+  typedef std::list<MemsetRange>::const_iterator const_iterator;
+  const_iterator begin() const { return Ranges.begin(); }
+  const_iterator end() const { return Ranges.end(); }
+  bool empty() const { return Ranges.empty(); }
+  
+  void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
+    if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+      addStore(OffsetFromFirst, SI);
+    else
+      addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
+  }
+
+  void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
+    int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType());
+    
+    addRange(OffsetFromFirst, StoreSize,
+             SI->getPointerOperand(), SI->getAlignment(), SI);
+  }
+  
+  void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
+    int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+    addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
+  }
+  
+  void addRange(int64_t Start, int64_t Size, Value *Ptr,
+                unsigned Alignment, Instruction *Inst);
+
+};
+  
+} // end anon namespace
+
+
+/// addRange - Add a new store to the MemsetRanges data structure.  This adds a
+/// new range for the specified store at the specified offset, merging into
+/// existing ranges as appropriate.
+///
+/// Do a linear search of the ranges to see if this can be joined and/or to
+/// find the insertion point in the list.  We keep the ranges sorted for
+/// simplicity here.  This is a linear search of a linked list, which is ugly,
+/// however the number of ranges is limited, so this won't get crazy slow.
+void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
+                            unsigned Alignment, Instruction *Inst) {
+  int64_t End = Start+Size;
+  range_iterator I = Ranges.begin(), E = Ranges.end();
+  
+  while (I != E && Start > I->End)
+    ++I;
+  
+  // We now know that I == E, in which case we didn't find anything to merge
+  // with, or that Start <= I->End.  If End < I->Start or I == E, then we need
+  // to insert a new range.  Handle this now.
+  if (I == E || End < I->Start) {
+    MemsetRange &R = *Ranges.insert(I, MemsetRange());
+    R.Start        = Start;
+    R.End          = End;
+    R.StartPtr     = Ptr;
+    R.Alignment    = Alignment;
+    R.TheStores.push_back(Inst);
+    return;
+  }
+  
+  // This store overlaps with I, add it.
+  I->TheStores.push_back(Inst);
+  
+  // At this point, we may have an interval that completely contains our store.
+  // If so, just add it to the interval and return.
+  if (I->Start <= Start && I->End >= End)
+    return;
+  
+  // Now we know that Start <= I->End and End >= I->Start so the range overlaps
+  // but is not entirely contained within the range.
+  
+  // See if the range extends the start of the range.  In this case, it couldn't
+  // possibly cause it to join the prior range, because otherwise we would have
+  // stopped on *it*.
+  if (Start < I->Start) {
+    I->Start = Start;
+    I->StartPtr = Ptr;
+    I->Alignment = Alignment;
+  }
+    
+  // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
+  // is in or right at the end of I), and that End >= I->Start.  Extend I out to
+  // End.
+  if (End > I->End) {
+    I->End = End;
+    range_iterator NextI = I;
+    while (++NextI != E && End >= NextI->Start) {
+      // Merge the range in.
+      I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
+      if (NextI->End > I->End)
+        I->End = NextI->End;
+      Ranges.erase(NextI);
+      NextI = I;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                         MemCpyOpt Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+  class MemCpyOpt : public FunctionPass {
+    MemoryDependenceAnalysis *MD;
+    const TargetData *TD;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    MemCpyOpt() : FunctionPass(ID) {
+      initializeMemCpyOptPass(*PassRegistry::getPassRegistry());
+      MD = 0;
+    }
+
+    bool runOnFunction(Function &F);
+
+  private:
+    // This transformation requires dominator postdominator info
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<MemoryDependenceAnalysis>();
+      AU.addRequired<AliasAnalysis>();
+      AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved<MemoryDependenceAnalysis>();
+    }
+  
+    // Helper fuctions
+    bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
+    bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
+    bool processMemCpy(MemCpyInst *M);
+    bool processMemMove(MemMoveInst *M);
+    bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
+                              uint64_t cpyLen, CallInst *C);
+    bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
+                                       uint64_t MSize);
+    bool processByValArgument(CallSite CS, unsigned ArgNo);
+    Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
+                                      Value *ByteVal);
+
+    bool iterateOnFunction(Function &F);
+  };
+  
+  char MemCpyOpt::ID = 0;
+}
+
+// createMemCpyOptPass - The public interface to this file...
+FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
+
+INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
+                    false, false)
+
+/// tryMergingIntoMemset - When scanning forward over instructions, we look for
+/// some other patterns to fold away.  In particular, this looks for stores to
+/// neighboring locations of memory.  If it sees enough consecutive ones, it
+/// attempts to merge them together into a memcpy/memset.
+Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, 
+                                             Value *StartPtr, Value *ByteVal) {
+  if (TD == 0) return 0;
+  
+  // Okay, so we now have a single store that can be splatable.  Scan to find
+  // all subsequent stores of the same value to offset from the same pointer.
+  // Join these together into ranges, so we can decide whether contiguous blocks
+  // are stored.
+  MemsetRanges Ranges(*TD);
+  
+  BasicBlock::iterator BI = StartInst;
+  for (++BI; !isa<TerminatorInst>(BI); ++BI) {
+    if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
+      // If the instruction is readnone, ignore it, otherwise bail out.  We
+      // don't even allow readonly here because we don't want something like:
+      // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
+      if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
+        break;
+      continue;
+    }
+    
+    if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
+      // If this is a store, see if we can merge it in.
+      if (NextStore->isVolatile()) break;
+    
+      // Check to see if this stored value is of the same byte-splattable value.
+      if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
+        break;
+      
+      // Check to see if this store is to a constant offset from the start ptr.
+      int64_t Offset;
+      if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(),
+                           Offset, *TD))
+        break;
+      
+      Ranges.addStore(Offset, NextStore);
+    } else {
+      MemSetInst *MSI = cast<MemSetInst>(BI);
+      
+      if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
+          !isa<ConstantInt>(MSI->getLength()))
+        break;
+      
+      // Check to see if this store is to a constant offset from the start ptr.
+      int64_t Offset;
+      if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD))
+        break;
+      
+      Ranges.addMemSet(Offset, MSI);
+    }
+  }
+  
+  // If we have no ranges, then we just had a single store with nothing that
+  // could be merged in.  This is a very common case of course.
+  if (Ranges.empty())
+    return 0;
+  
+  // If we had at least one store that could be merged in, add the starting
+  // store as well.  We try to avoid this unless there is at least something
+  // interesting as a small compile-time optimization.
+  Ranges.addInst(0, StartInst);
+
+  // If we create any memsets, we put it right before the first instruction that
+  // isn't part of the memset block.  This ensure that the memset is dominated
+  // by any addressing instruction needed by the start of the block.
+  IRBuilder<> Builder(BI);
+
+  // Now that we have full information about ranges, loop over the ranges and
+  // emit memset's for anything big enough to be worthwhile.
+  Instruction *AMemSet = 0;
+  for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
+       I != E; ++I) {
+    const MemsetRange &Range = *I;
+    
+    if (Range.TheStores.size() == 1) continue;
+    
+    // If it is profitable to lower this range to memset, do so now.
+    if (!Range.isProfitableToUseMemset(*TD))
+      continue;
+    
+    // Otherwise, we do want to transform this!  Create a new memset.
+    // Get the starting pointer of the block.
+    StartPtr = Range.StartPtr;
+    
+    // Determine alignment
+    unsigned Alignment = Range.Alignment;
+    if (Alignment == 0) {
+      const Type *EltType = 
+        cast<PointerType>(StartPtr->getType())->getElementType();
+      Alignment = TD->getABITypeAlignment(EltType);
+    }
+    
+    AMemSet = 
+      Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
+    
+    DEBUG(dbgs() << "Replace stores:\n";
+          for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
+            dbgs() << *Range.TheStores[i] << '\n';
+          dbgs() << "With: " << *AMemSet << '\n');
+    
+    // Zap all the stores.
+    for (SmallVector<Instruction*, 16>::const_iterator
+         SI = Range.TheStores.begin(),
+         SE = Range.TheStores.end(); SI != SE; ++SI) {
+      MD->removeInstruction(*SI);
+      (*SI)->eraseFromParent();
+    }
+    ++NumMemSetInfer;
+  }
+  
+  return AMemSet;
+}
+
+
+bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
+  if (SI->isVolatile()) return false;
+  
+  if (TD == 0) return false;
+
+  // Detect cases where we're performing call slot forwarding, but
+  // happen to be using a load-store pair to implement it, rather than
+  // a memcpy.
+  if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
+    if (!LI->isVolatile() && LI->hasOneUse()) {
+      MemDepResult dep = MD->getDependency(LI);
+      CallInst *C = 0;
+      if (dep.isClobber() && !isa<MemCpyInst>(dep.getInst()))
+        C = dyn_cast<CallInst>(dep.getInst());
+      
+      if (C) {
+        bool changed = performCallSlotOptzn(LI,
+                        SI->getPointerOperand()->stripPointerCasts(), 
+                        LI->getPointerOperand()->stripPointerCasts(),
+                        TD->getTypeStoreSize(SI->getOperand(0)->getType()), C);
+        if (changed) {
+          MD->removeInstruction(SI);
+          SI->eraseFromParent();
+          MD->removeInstruction(LI);
+          LI->eraseFromParent();
+          ++NumMemCpyInstr;
+          return true;
+        }
+      }
+    }
+  }
+  
+  // There are two cases that are interesting for this code to handle: memcpy
+  // and memset.  Right now we only handle memset.
+  
+  // Ensure that the value being stored is something that can be memset'able a
+  // byte at a time like "0" or "-1" or any width, as well as things like
+  // 0xA0A0A0A0 and 0.0.
+  if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
+    if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
+                                              ByteVal)) {
+      BBI = I;  // Don't invalidate iterator.
+      return true;
+    }
+  
+  return false;
+}
+
+bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
+  // See if there is another memset or store neighboring this memset which
+  // allows us to widen out the memset to do a single larger store.
+  if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
+    if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
+                                              MSI->getValue())) {
+      BBI = I;  // Don't invalidate iterator.
+      return true;
+    }
+  return false;
+}
+
+
+/// performCallSlotOptzn - takes a memcpy and a call that it depends on,
+/// and checks for the possibility of a call slot optimization by having
+/// the call write its result directly into the destination of the memcpy.
+bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
+                                     Value *cpyDest, Value *cpySrc,
+                                     uint64_t cpyLen, CallInst *C) {
+  // The general transformation to keep in mind is
+  //
+  //   call @func(..., src, ...)
+  //   memcpy(dest, src, ...)
+  //
+  // ->
+  //
+  //   memcpy(dest, src, ...)
+  //   call @func(..., dest, ...)
+  //
+  // Since moving the memcpy is technically awkward, we additionally check that
+  // src only holds uninitialized values at the moment of the call, meaning that
+  // the memcpy can be discarded rather than moved.
+
+  // Deliberately get the source and destination with bitcasts stripped away,
+  // because we'll need to do type comparisons based on the underlying type.
+  CallSite CS(C);
+
+  // Require that src be an alloca.  This simplifies the reasoning considerably.
+  AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
+  if (!srcAlloca)
+    return false;
+
+  // Check that all of src is copied to dest.
+  if (TD == 0) return false;
+
+  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
+  if (!srcArraySize)
+    return false;
+
+  uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) *
+    srcArraySize->getZExtValue();
+
+  if (cpyLen < srcSize)
+    return false;
+
+  // Check that accessing the first srcSize bytes of dest will not cause a
+  // trap.  Otherwise the transform is invalid since it might cause a trap
+  // to occur earlier than it otherwise would.
+  if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
+    // The destination is an alloca.  Check it is larger than srcSize.
+    ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
+    if (!destArraySize)
+      return false;
+
+    uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) *
+      destArraySize->getZExtValue();
+
+    if (destSize < srcSize)
+      return false;
+  } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
+    // If the destination is an sret parameter then only accesses that are
+    // outside of the returned struct type can trap.
+    if (!A->hasStructRetAttr())
+      return false;
+
+    const Type *StructTy = cast<PointerType>(A->getType())->getElementType();
+    uint64_t destSize = TD->getTypeAllocSize(StructTy);
+
+    if (destSize < srcSize)
+      return false;
+  } else {
+    return false;
+  }
+
+  // Check that src is not accessed except via the call and the memcpy.  This
+  // guarantees that it holds only undefined values when passed in (so the final
+  // memcpy can be dropped), that it is not read or written between the call and
+  // the memcpy, and that writing beyond the end of it is undefined.
+  SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(),
+                                   srcAlloca->use_end());
+  while (!srcUseList.empty()) {
+    User *UI = srcUseList.pop_back_val();
+
+    if (isa<BitCastInst>(UI)) {
+      for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+           I != E; ++I)
+        srcUseList.push_back(*I);
+    } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) {
+      if (G->hasAllZeroIndices())
+        for (User::use_iterator I = UI->use_begin(), E = UI->use_end();
+             I != E; ++I)
+          srcUseList.push_back(*I);
+      else
+        return false;
+    } else if (UI != C && UI != cpy) {
+      return false;
+    }
+  }
+
+  // Since we're changing the parameter to the callsite, we need to make sure
+  // that what would be the new parameter dominates the callsite.
+  DominatorTree &DT = getAnalysis<DominatorTree>();
+  if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
+    if (!DT.dominates(cpyDestInst, C))
+      return false;
+
+  // In addition to knowing that the call does not access src in some
+  // unexpected manner, for example via a global, which we deduce from
+  // the use analysis, we also need to know that it does not sneakily
+  // access dest.  We rely on AA to figure this out for us.
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+  if (AA.getModRefInfo(C, cpyDest, srcSize) != AliasAnalysis::NoModRef)
+    return false;
+
+  // All the checks have passed, so do the transformation.
+  bool changedArgument = false;
+  for (unsigned i = 0; i < CS.arg_size(); ++i)
+    if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
+      if (cpySrc->getType() != cpyDest->getType())
+        cpyDest = CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
+                                              cpyDest->getName(), C);
+      changedArgument = true;
+      if (CS.getArgument(i)->getType() == cpyDest->getType())
+        CS.setArgument(i, cpyDest);
+      else
+        CS.setArgument(i, CastInst::CreatePointerCast(cpyDest, 
+                          CS.getArgument(i)->getType(), cpyDest->getName(), C));
+    }
+
+  if (!changedArgument)
+    return false;
+
+  // Drop any cached information about the call, because we may have changed
+  // its dependence information by changing its parameter.
+  MD->removeInstruction(C);
+
+  // Remove the memcpy.
+  MD->removeInstruction(cpy);
+  ++NumMemCpyInstr;
+
+  return true;
+}
+
+/// processMemCpyMemCpyDependence - We've found that the (upward scanning)
+/// memory dependence of memcpy 'M' is the memcpy 'MDep'.  Try to simplify M to
+/// copy from MDep's input if we can.  MSize is the size of M's copy.
+/// 
+bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep,
+                                              uint64_t MSize) {
+  // We can only transforms memcpy's where the dest of one is the source of the
+  // other.
+  if (M->getSource() != MDep->getDest() || MDep->isVolatile())
+    return false;
+  
+  // If dep instruction is reading from our current input, then it is a noop
+  // transfer and substituting the input won't change this instruction.  Just
+  // ignore the input and let someone else zap MDep.  This handles cases like:
+  //    memcpy(a <- a)
+  //    memcpy(b <- a)
+  if (M->getSource() == MDep->getSource())
+    return false;
+  
+  // Second, the length of the memcpy's must be the same, or the preceeding one
+  // must be larger than the following one.
+  ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
+  ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
+  if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
+    return false;
+  
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+  // Verify that the copied-from memory doesn't change in between the two
+  // transfers.  For example, in:
+  //    memcpy(a <- b)
+  //    *b = 42;
+  //    memcpy(c <- a)
+  // It would be invalid to transform the second memcpy into memcpy(c <- b).
+  //
+  // TODO: If the code between M and MDep is transparent to the destination "c",
+  // then we could still perform the xform by moving M up to the first memcpy.
+  //
+  // NOTE: This is conservative, it will stop on any read from the source loc,
+  // not just the defining memcpy.
+  MemDepResult SourceDep =
+    MD->getPointerDependencyFrom(AA.getLocationForSource(MDep),
+                                 false, M, M->getParent());
+  if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+    return false;
+  
+  // If the dest of the second might alias the source of the first, then the
+  // source and dest might overlap.  We still want to eliminate the intermediate
+  // value, but we have to generate a memmove instead of memcpy.
+  bool UseMemMove = false;
+  if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep)))
+    UseMemMove = true;
+  
+  // If all checks passed, then we can transform M.
+  
+  // Make sure to use the lesser of the alignment of the source and the dest
+  // since we're changing where we're reading from, but don't want to increase
+  // the alignment past what can be read from or written to.
+  // TODO: Is this worth it if we're creating a less aligned memcpy? For
+  // example we could be moving from movaps -> movq on x86.
+  unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
+  
+  IRBuilder<> Builder(M);
+  if (UseMemMove)
+    Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
+                          Align, M->isVolatile());
+  else
+    Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
+                         Align, M->isVolatile());
+
+  // Remove the instruction we're replacing.
+  MD->removeInstruction(M);
+  M->eraseFromParent();
+  ++NumMemCpyInstr;
+  return true;
+}
+
+
+/// processMemCpy - perform simplification of memcpy's.  If we have memcpy A
+/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
+/// B to be a memcpy from X to Z (or potentially a memmove, depending on
+/// circumstances). This allows later passes to remove the first memcpy
+/// altogether.
+bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
+  // We can only optimize statically-sized memcpy's that are non-volatile.
+  ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
+  if (CopySize == 0 || M->isVolatile()) return false;
+
+  // If the source and destination of the memcpy are the same, then zap it.
+  if (M->getSource() == M->getDest()) {
+    MD->removeInstruction(M);
+    M->eraseFromParent();
+    return false;
+  }
+
+  // If copying from a constant, try to turn the memcpy into a memset.
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
+    if (GV->isConstant() && GV->hasDefinitiveInitializer())
+      if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
+        IRBuilder<> Builder(M);
+        Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize,
+                             M->getAlignment(), false);
+        MD->removeInstruction(M);
+        M->eraseFromParent();
+        ++NumCpyToSet;
+        return true;
+      }
+
+  // The are two possible optimizations we can do for memcpy:
+  //   a) memcpy-memcpy xform which exposes redundance for DSE.
+  //   b) call-memcpy xform for return slot optimization.
+  MemDepResult DepInfo = MD->getDependency(M);
+  if (!DepInfo.isClobber())
+    return false;
+  
+  if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()))
+    return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue());
+    
+  if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
+    if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
+                             CopySize->getZExtValue(), C)) {
+      MD->removeInstruction(M);
+      M->eraseFromParent();
+      return true;
+    }
+  }
+  
+  return false;
+}
+
+/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst
+/// are guaranteed not to alias.
+bool MemCpyOpt::processMemMove(MemMoveInst *M) {
+  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+
+  // See if the pointers alias.
+  if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M)))
+    return false;
+  
+  DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
+  
+  // If not, then we know we can transform this.
+  Module *Mod = M->getParent()->getParent()->getParent();
+  const Type *ArgTys[3] = { M->getRawDest()->getType(),
+                            M->getRawSource()->getType(),
+                            M->getLength()->getType() };
+  M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
+                                                 ArgTys, 3));
+
+  // MemDep may have over conservative information about this instruction, just
+  // conservatively flush it from the cache.
+  MD->removeInstruction(M);
+
+  ++NumMoveToCpy;
+  return true;
+}
+  
+/// processByValArgument - This is called on every byval argument in call sites.
+bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
+  if (TD == 0) return false;
+
+  // Find out what feeds this byval argument.
+  Value *ByValArg = CS.getArgument(ArgNo);
+  const Type *ByValTy =cast<PointerType>(ByValArg->getType())->getElementType();
+  uint64_t ByValSize = TD->getTypeAllocSize(ByValTy);
+  MemDepResult DepInfo =
+    MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize),
+                                 true, CS.getInstruction(),
+                                 CS.getInstruction()->getParent());
+  if (!DepInfo.isClobber())
+    return false;
+
+  // If the byval argument isn't fed by a memcpy, ignore it.  If it is fed by
+  // a memcpy, see if we can byval from the source of the memcpy instead of the
+  // result.
+  MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
+  if (MDep == 0 || MDep->isVolatile() ||
+      ByValArg->stripPointerCasts() != MDep->getDest())
+    return false;
+  
+  // The length of the memcpy must be larger or equal to the size of the byval.
+  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
+  if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize)
+    return false;
+
+  // Get the alignment of the byval.  If it is greater than the memcpy, then we
+  // can't do the substitution.  If the call doesn't specify the alignment, then
+  // it is some target specific value that we can't know.
+  unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
+  if (ByValAlign == 0 || MDep->getAlignment() < ByValAlign)
+    return false;  
+  
+  // Verify that the copied-from memory doesn't change in between the memcpy and
+  // the byval call.
+  //    memcpy(a <- b)
+  //    *b = 42;
+  //    foo(*a)
+  // It would be invalid to transform the second memcpy into foo(*b).
+  //
+  // NOTE: This is conservative, it will stop on any read from the source loc,
+  // not just the defining memcpy.
+  MemDepResult SourceDep =
+    MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep),
+                                 false, CS.getInstruction(), MDep->getParent());
+  if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
+    return false;
+  
+  Value *TmpCast = MDep->getSource();
+  if (MDep->getSource()->getType() != ByValArg->getType())
+    TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
+                              "tmpcast", CS.getInstruction());
+  
+  DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
+               << "  " << *MDep << "\n"
+               << "  " << *CS.getInstruction() << "\n");
+  
+  // Otherwise we're good!  Update the byval argument.
+  CS.setArgument(ArgNo, TmpCast);
+  ++NumMemCpyInstr;
+  return true;
+}
+
+/// iterateOnFunction - Executes one iteration of MemCpyOpt.
+bool MemCpyOpt::iterateOnFunction(Function &F) {
+  bool MadeChange = false;
+
+  // Walk all instruction in the function.
+  for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
+      // Avoid invalidating the iterator.
+      Instruction *I = BI++;
+      
+      bool RepeatInstruction = false;
+      
+      if (StoreInst *SI = dyn_cast<StoreInst>(I))
+        MadeChange |= processStore(SI, BI);
+      else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
+        RepeatInstruction = processMemSet(M, BI);
+      else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
+        RepeatInstruction = processMemCpy(M);
+      else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
+        RepeatInstruction = processMemMove(M);
+      else if (CallSite CS = (Value*)I) {
+        for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+          if (CS.paramHasAttr(i+1, Attribute::ByVal))
+            MadeChange |= processByValArgument(CS, i);
+      }
+
+      // Reprocess the instruction if desired.
+      if (RepeatInstruction) {
+        if (BI != BB->begin()) --BI;
+        MadeChange = true;
+      }
+    }
+  }
+  
+  return MadeChange;
+}
+
+// MemCpyOpt::runOnFunction - This is the main transformation entry point for a
+// function.
+//
+bool MemCpyOpt::runOnFunction(Function &F) {
+  bool MadeChange = false;
+  MD = &getAnalysis<MemoryDependenceAnalysis>();
+  TD = getAnalysisIfAvailable<TargetData>();
+  while (1) {
+    if (!iterateOnFunction(F))
+      break;
+    MadeChange = true;
+  }
+  
+  MD = 0;
+  return MadeChange;
+}
diff --git a/final/lib/Transforms/Scalar/Reassociate.cpp b/final/lib/Transforms/Scalar/Reassociate.cpp
new file mode 100644
index 00000000000..e093b52571a
--- /dev/null
+++ b/final/lib/Transforms/Scalar/Reassociate.cpp
@@ -0,0 +1,1101 @@
+//===- Reassociate.cpp - Reassociate binary expressions -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass reassociates commutative expressions in an order that is designed
+// to promote better constant propagation, GCSE, LICM, PRE, etc.
+//
+// For example: 4 + (x + 5) -> x + (4 + 5)
+//
+// In the implementation of this algorithm, constants are assigned rank = 0,
+// function arguments are rank = 1, and other values are assigned ranks
+// corresponding to the reverse post order traversal of current function
+// (starting at 2), which effectively gives values in deep loops higher rank
+// than values not in loops.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reassociate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DenseMap.h"
+#include <algorithm>
+using namespace llvm;
+
+STATISTIC(NumLinear , "Number of insts linearized");
+STATISTIC(NumChanged, "Number of insts reassociated");
+STATISTIC(NumAnnihil, "Number of expr tree annihilated");
+STATISTIC(NumFactor , "Number of multiplies factored");
+
+namespace {
+  struct ValueEntry {
+    unsigned Rank;
+    Value *Op;
+    ValueEntry(unsigned R, Value *O) : Rank(R), Op(O) {}
+  };
+  inline bool operator<(const ValueEntry &LHS, const ValueEntry &RHS) {
+    return LHS.Rank > RHS.Rank;   // Sort so that highest rank goes to start.
+  }
+}
+
+#ifndef NDEBUG
+/// PrintOps - Print out the expression identified in the Ops list.
+///
+static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
+  Module *M = I->getParent()->getParent()->getParent();
+  dbgs() << Instruction::getOpcodeName(I->getOpcode()) << " "
+       << *Ops[0].Op->getType() << '\t';
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    dbgs() << "[ ";
+    WriteAsOperand(dbgs(), Ops[i].Op, false, M);
+    dbgs() << ", #" << Ops[i].Rank << "] ";
+  }
+}
+#endif
+  
+namespace {
+  class Reassociate : public FunctionPass {
+    DenseMap<BasicBlock*, unsigned> RankMap;
+    DenseMap<AssertingVH<>, unsigned> ValueRankMap;
+    bool MadeChange;
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    Reassociate() : FunctionPass(ID) {
+      initializeReassociatePass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+  private:
+    void BuildRankMap(Function &F);
+    unsigned getRank(Value *V);
+    Value *ReassociateExpression(BinaryOperator *I);
+    void RewriteExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops,
+                         unsigned Idx = 0);
+    Value *OptimizeExpression(BinaryOperator *I,
+                              SmallVectorImpl<ValueEntry> &Ops);
+    Value *OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops);
+    void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl<ValueEntry> &Ops);
+    void LinearizeExpr(BinaryOperator *I);
+    Value *RemoveFactorFromExpression(Value *V, Value *Factor);
+    void ReassociateBB(BasicBlock *BB);
+    
+    void RemoveDeadBinaryOp(Value *V);
+  };
+}
+
+char Reassociate::ID = 0;
+INITIALIZE_PASS(Reassociate, "reassociate",
+                "Reassociate expressions", false, false)
+
+// Public interface to the Reassociate pass
+FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }
+
+void Reassociate::RemoveDeadBinaryOp(Value *V) {
+  Instruction *Op = dyn_cast<Instruction>(V);
+  if (!Op || !isa<BinaryOperator>(Op) || !Op->use_empty())
+    return;
+  
+  Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1);
+  
+  ValueRankMap.erase(Op);
+  Op->eraseFromParent();
+  RemoveDeadBinaryOp(LHS);
+  RemoveDeadBinaryOp(RHS);
+}
+
+
+static bool isUnmovableInstruction(Instruction *I) {
+  if (I->getOpcode() == Instruction::PHI ||
+      I->getOpcode() == Instruction::Alloca ||
+      I->getOpcode() == Instruction::Load ||
+      I->getOpcode() == Instruction::Invoke ||
+      (I->getOpcode() == Instruction::Call &&
+       !isa<DbgInfoIntrinsic>(I)) ||
+      I->getOpcode() == Instruction::UDiv || 
+      I->getOpcode() == Instruction::SDiv ||
+      I->getOpcode() == Instruction::FDiv ||
+      I->getOpcode() == Instruction::URem ||
+      I->getOpcode() == Instruction::SRem ||
+      I->getOpcode() == Instruction::FRem)
+    return true;
+  return false;
+}
+
+void Reassociate::BuildRankMap(Function &F) {
+  unsigned i = 2;
+
+  // Assign distinct ranks to function arguments
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
+    ValueRankMap[&*I] = ++i;
+
+  ReversePostOrderTraversal<Function*> RPOT(&F);
+  for (ReversePostOrderTraversal<Function*>::rpo_iterator I = RPOT.begin(),
+         E = RPOT.end(); I != E; ++I) {
+    BasicBlock *BB = *I;
+    unsigned BBRank = RankMap[BB] = ++i << 16;
+
+    // Walk the basic block, adding precomputed ranks for any instructions that
+    // we cannot move.  This ensures that the ranks for these instructions are
+    // all different in the block.
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+      if (isUnmovableInstruction(I))
+        ValueRankMap[&*I] = ++BBRank;
+  }
+}
+
+unsigned Reassociate::getRank(Value *V) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) {
+    if (isa<Argument>(V)) return ValueRankMap[V];   // Function argument.
+    return 0;  // Otherwise it's a global or constant, rank 0.
+  }
+
+  if (unsigned Rank = ValueRankMap[I])
+    return Rank;    // Rank already known?
+
+  // If this is an expression, return the 1+MAX(rank(LHS), rank(RHS)) so that
+  // we can reassociate expressions for code motion!  Since we do not recurse
+  // for PHI nodes, we cannot have infinite recursion here, because there
+  // cannot be loops in the value graph that do not go through PHI nodes.
+  unsigned Rank = 0, MaxRank = RankMap[I->getParent()];
+  for (unsigned i = 0, e = I->getNumOperands();
+       i != e && Rank != MaxRank; ++i)
+    Rank = std::max(Rank, getRank(I->getOperand(i)));
+
+  // If this is a not or neg instruction, do not count it for rank.  This
+  // assures us that X and ~X will have the same rank.
+  if (!I->getType()->isIntegerTy() ||
+      (!BinaryOperator::isNot(I) && !BinaryOperator::isNeg(I)))
+    ++Rank;
+
+  //DEBUG(dbgs() << "Calculated Rank[" << V->getName() << "] = "
+  //     << Rank << "\n");
+
+  return ValueRankMap[I] = Rank;
+}
+
+/// isReassociableOp - Return true if V is an instruction of the specified
+/// opcode and if it only has one use.
+static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
+  if ((V->hasOneUse() || V->use_empty()) && isa<Instruction>(V) &&
+      cast<Instruction>(V)->getOpcode() == Opcode)
+    return cast<BinaryOperator>(V);
+  return 0;
+}
+
+/// LowerNegateToMultiply - Replace 0-X with X*-1.
+///
+static Instruction *LowerNegateToMultiply(Instruction *Neg,
+                              DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+  Constant *Cst = Constant::getAllOnesValue(Neg->getType());
+
+  Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
+  ValueRankMap.erase(Neg);
+  Res->takeName(Neg);
+  Neg->replaceAllUsesWith(Res);
+  Neg->eraseFromParent();
+  return Res;
+}
+
+// Given an expression of the form '(A+B)+(D+C)', turn it into '(((A+B)+C)+D)'.
+// Note that if D is also part of the expression tree that we recurse to
+// linearize it as well.  Besides that case, this does not recurse into A,B, or
+// C.
+void Reassociate::LinearizeExpr(BinaryOperator *I) {
+  BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0));
+  BinaryOperator *RHS = cast<BinaryOperator>(I->getOperand(1));
+  assert(isReassociableOp(LHS, I->getOpcode()) &&
+         isReassociableOp(RHS, I->getOpcode()) &&
+         "Not an expression that needs linearization?");
+
+  DEBUG(dbgs() << "Linear" << *LHS << '\n' << *RHS << '\n' << *I << '\n');
+
+  // Move the RHS instruction to live immediately before I, avoiding breaking
+  // dominator properties.
+  RHS->moveBefore(I);
+
+  // Move operands around to do the linearization.
+  I->setOperand(1, RHS->getOperand(0));
+  RHS->setOperand(0, LHS);
+  I->setOperand(0, RHS);
+
+  // Conservatively clear all the optional flags, which may not hold
+  // after the reassociation.
+  I->clearSubclassOptionalData();
+  LHS->clearSubclassOptionalData();
+  RHS->clearSubclassOptionalData();
+
+  ++NumLinear;
+  MadeChange = true;
+  DEBUG(dbgs() << "Linearized: " << *I << '\n');
+
+  // If D is part of this expression tree, tail recurse.
+  if (isReassociableOp(I->getOperand(1), I->getOpcode()))
+    LinearizeExpr(I);
+}
+
+
+/// LinearizeExprTree - Given an associative binary expression tree, traverse
+/// all of the uses putting it into canonical form.  This forces a left-linear
+/// form of the expression (((a+b)+c)+d), and collects information about the
+/// rank of the non-tree operands.
+///
+/// NOTE: These intentionally destroys the expression tree operands (turning
+/// them into undef values) to reduce #uses of the values.  This means that the
+/// caller MUST use something like RewriteExprTree to put the values back in.
+///
+void Reassociate::LinearizeExprTree(BinaryOperator *I,
+                                    SmallVectorImpl<ValueEntry> &Ops) {
+  Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
+  unsigned Opcode = I->getOpcode();
+
+  // First step, linearize the expression if it is in ((A+B)+(C+D)) form.
+  BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode);
+  BinaryOperator *RHSBO = isReassociableOp(RHS, Opcode);
+
+  // If this is a multiply expression tree and it contains internal negations,
+  // transform them into multiplies by -1 so they can be reassociated.
+  if (I->getOpcode() == Instruction::Mul) {
+    if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) {
+      LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap);
+      LHSBO = isReassociableOp(LHS, Opcode);
+    }
+    if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) {
+      RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap);
+      RHSBO = isReassociableOp(RHS, Opcode);
+    }
+  }
+
+  if (!LHSBO) {
+    if (!RHSBO) {
+      // Neither the LHS or RHS as part of the tree, thus this is a leaf.  As
+      // such, just remember these operands and their rank.
+      Ops.push_back(ValueEntry(getRank(LHS), LHS));
+      Ops.push_back(ValueEntry(getRank(RHS), RHS));
+      
+      // Clear the leaves out.
+      I->setOperand(0, UndefValue::get(I->getType()));
+      I->setOperand(1, UndefValue::get(I->getType()));
+      return;
+    }
+    
+    // Turn X+(Y+Z) -> (Y+Z)+X
+    std::swap(LHSBO, RHSBO);
+    std::swap(LHS, RHS);
+    bool Success = !I->swapOperands();
+    assert(Success && "swapOperands failed");
+    Success = false;
+    MadeChange = true;
+  } else if (RHSBO) {
+    // Turn (A+B)+(C+D) -> (((A+B)+C)+D).  This guarantees the RHS is not
+    // part of the expression tree.
+    LinearizeExpr(I);
+    LHS = LHSBO = cast<BinaryOperator>(I->getOperand(0));
+    RHS = I->getOperand(1);
+    RHSBO = 0;
+  }
+
+  // Okay, now we know that the LHS is a nested expression and that the RHS is
+  // not.  Perform reassociation.
+  assert(!isReassociableOp(RHS, Opcode) && "LinearizeExpr failed!");
+
+  // Move LHS right before I to make sure that the tree expression dominates all
+  // values.
+  LHSBO->moveBefore(I);
+
+  // Linearize the expression tree on the LHS.
+  LinearizeExprTree(LHSBO, Ops);
+
+  // Remember the RHS operand and its rank.
+  Ops.push_back(ValueEntry(getRank(RHS), RHS));
+  
+  // Clear the RHS leaf out.
+  I->setOperand(1, UndefValue::get(I->getType()));
+}
+
+// RewriteExprTree - Now that the operands for this expression tree are
+// linearized and optimized, emit them in-order.  This function is written to be
+// tail recursive.
+void Reassociate::RewriteExprTree(BinaryOperator *I,
+                                  SmallVectorImpl<ValueEntry> &Ops,
+                                  unsigned i) {
+  if (i+2 == Ops.size()) {
+    if (I->getOperand(0) != Ops[i].Op ||
+        I->getOperand(1) != Ops[i+1].Op) {
+      Value *OldLHS = I->getOperand(0);
+      DEBUG(dbgs() << "RA: " << *I << '\n');
+      I->setOperand(0, Ops[i].Op);
+      I->setOperand(1, Ops[i+1].Op);
+
+      // Clear all the optional flags, which may not hold after the
+      // reassociation if the expression involved more than just this operation.
+      if (Ops.size() != 2)
+        I->clearSubclassOptionalData();
+
+      DEBUG(dbgs() << "TO: " << *I << '\n');
+      MadeChange = true;
+      ++NumChanged;
+      
+      // If we reassociated a tree to fewer operands (e.g. (1+a+2) -> (a+3)
+      // delete the extra, now dead, nodes.
+      RemoveDeadBinaryOp(OldLHS);
+    }
+    return;
+  }
+  assert(i+2 < Ops.size() && "Ops index out of range!");
+
+  if (I->getOperand(1) != Ops[i].Op) {
+    DEBUG(dbgs() << "RA: " << *I << '\n');
+    I->setOperand(1, Ops[i].Op);
+
+    // Conservatively clear all the optional flags, which may not hold
+    // after the reassociation.
+    I->clearSubclassOptionalData();
+
+    DEBUG(dbgs() << "TO: " << *I << '\n');
+    MadeChange = true;
+    ++NumChanged;
+  }
+  
+  BinaryOperator *LHS = cast<BinaryOperator>(I->getOperand(0));
+  assert(LHS->getOpcode() == I->getOpcode() &&
+         "Improper expression tree!");
+  
+  // Compactify the tree instructions together with each other to guarantee
+  // that the expression tree is dominated by all of Ops.
+  LHS->moveBefore(I);
+  RewriteExprTree(LHS, Ops, i+1);
+}
+
+
+
+// NegateValue - Insert instructions before the instruction pointed to by BI,
+// that computes the negative version of the value specified.  The negative
+// version of the value is returned, and BI is left pointing at the instruction
+// that should be processed next by the reassociation pass.
+//
+static Value *NegateValue(Value *V, Instruction *BI) {
+  if (Constant *C = dyn_cast<Constant>(V))
+    return ConstantExpr::getNeg(C);
+  
+  // We are trying to expose opportunity for reassociation.  One of the things
+  // that we want to do to achieve this is to push a negation as deep into an
+  // expression chain as possible, to expose the add instructions.  In practice,
+  // this means that we turn this:
+  //   X = -(A+12+C+D)   into    X = -A + -12 + -C + -D = -12 + -A + -C + -D
+  // so that later, a: Y = 12+X could get reassociated with the -12 to eliminate
+  // the constants.  We assume that instcombine will clean up the mess later if
+  // we introduce tons of unnecessary negation instructions.
+  //
+  if (Instruction *I = dyn_cast<Instruction>(V))
+    if (I->getOpcode() == Instruction::Add && I->hasOneUse()) {
+      // Push the negates through the add.
+      I->setOperand(0, NegateValue(I->getOperand(0), BI));
+      I->setOperand(1, NegateValue(I->getOperand(1), BI));
+
+      // We must move the add instruction here, because the neg instructions do
+      // not dominate the old add instruction in general.  By moving it, we are
+      // assured that the neg instructions we just inserted dominate the 
+      // instruction we are about to insert after them.
+      //
+      I->moveBefore(BI);
+      I->setName(I->getName()+".neg");
+      return I;
+    }
+  
+  // Okay, we need to materialize a negated version of V with an instruction.
+  // Scan the use lists of V to see if we have one already.
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+    User *U = *UI;
+    if (!BinaryOperator::isNeg(U)) continue;
+
+    // We found one!  Now we have to make sure that the definition dominates
+    // this use.  We do this by moving it to the entry block (if it is a
+    // non-instruction value) or right after the definition.  These negates will
+    // be zapped by reassociate later, so we don't need much finesse here.
+    BinaryOperator *TheNeg = cast<BinaryOperator>(U);
+
+    // Verify that the negate is in this function, V might be a constant expr.
+    if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
+      continue;
+    
+    BasicBlock::iterator InsertPt;
+    if (Instruction *InstInput = dyn_cast<Instruction>(V)) {
+      if (InvokeInst *II = dyn_cast<InvokeInst>(InstInput)) {
+        InsertPt = II->getNormalDest()->begin();
+      } else {
+        InsertPt = InstInput;
+        ++InsertPt;
+      }
+      while (isa<PHINode>(InsertPt)) ++InsertPt;
+    } else {
+      InsertPt = TheNeg->getParent()->getParent()->getEntryBlock().begin();
+    }
+    TheNeg->moveBefore(InsertPt);
+    return TheNeg;
+  }
+
+  // Insert a 'neg' instruction that subtracts the value from zero to get the
+  // negation.
+  return BinaryOperator::CreateNeg(V, V->getName() + ".neg", BI);
+}
+
+/// ShouldBreakUpSubtract - Return true if we should break up this subtract of
+/// X-Y into (X + -Y).
+static bool ShouldBreakUpSubtract(Instruction *Sub) {
+  // If this is a negation, we can't split it up!
+  if (BinaryOperator::isNeg(Sub))
+    return false;
+  
+  // Don't bother to break this up unless either the LHS is an associable add or
+  // subtract or if this is only used by one.
+  if (isReassociableOp(Sub->getOperand(0), Instruction::Add) ||
+      isReassociableOp(Sub->getOperand(0), Instruction::Sub))
+    return true;
+  if (isReassociableOp(Sub->getOperand(1), Instruction::Add) ||
+      isReassociableOp(Sub->getOperand(1), Instruction::Sub))
+    return true;
+  if (Sub->hasOneUse() && 
+      (isReassociableOp(Sub->use_back(), Instruction::Add) ||
+       isReassociableOp(Sub->use_back(), Instruction::Sub)))
+    return true;
+    
+  return false;
+}
+
+/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is
+/// only used by an add, transform this into (X+(0-Y)) to promote better
+/// reassociation.
+static Instruction *BreakUpSubtract(Instruction *Sub,
+                              DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+  // Convert a subtract into an add and a neg instruction. This allows sub
+  // instructions to be commuted with other add instructions.
+  //
+  // Calculate the negative value of Operand 1 of the sub instruction,
+  // and set it as the RHS of the add instruction we just made.
+  //
+  Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
+  Instruction *New =
+    BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
+  New->takeName(Sub);
+
+  // Everyone now refers to the add instruction.
+  ValueRankMap.erase(Sub);
+  Sub->replaceAllUsesWith(New);
+  Sub->eraseFromParent();
+
+  DEBUG(dbgs() << "Negated: " << *New << '\n');
+  return New;
+}
+
+/// ConvertShiftToMul - If this is a shift of a reassociable multiply or is used
+/// by one, change this into a multiply by a constant to assist with further
+/// reassociation.
+static Instruction *ConvertShiftToMul(Instruction *Shl, 
+                              DenseMap<AssertingVH<>, unsigned> &ValueRankMap) {
+  // If an operand of this shift is a reassociable multiply, or if the shift
+  // is used by a reassociable multiply or add, turn into a multiply.
+  if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
+      (Shl->hasOneUse() && 
+       (isReassociableOp(Shl->use_back(), Instruction::Mul) ||
+        isReassociableOp(Shl->use_back(), Instruction::Add)))) {
+    Constant *MulCst = ConstantInt::get(Shl->getType(), 1);
+    MulCst = ConstantExpr::getShl(MulCst, cast<Constant>(Shl->getOperand(1)));
+    
+    Instruction *Mul =
+      BinaryOperator::CreateMul(Shl->getOperand(0), MulCst, "", Shl);
+    ValueRankMap.erase(Shl);
+    Mul->takeName(Shl);
+    Shl->replaceAllUsesWith(Mul);
+    Shl->eraseFromParent();
+    return Mul;
+  }
+  return 0;
+}
+
+// Scan backwards and forwards among values with the same rank as element i to
+// see if X exists.  If X does not exist, return i.  This is useful when
+// scanning for 'x' when we see '-x' because they both get the same rank.
+static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,
+                                  Value *X) {
+  unsigned XRank = Ops[i].Rank;
+  unsigned e = Ops.size();
+  for (unsigned j = i+1; j != e && Ops[j].Rank == XRank; ++j)
+    if (Ops[j].Op == X)
+      return j;
+  // Scan backwards.
+  for (unsigned j = i-1; j != ~0U && Ops[j].Rank == XRank; --j)
+    if (Ops[j].Op == X)
+      return j;
+  return i;
+}
+
+/// EmitAddTreeOfValues - Emit a tree of add instructions, summing Ops together
+/// and returning the result.  Insert the tree before I.
+static Value *EmitAddTreeOfValues(Instruction *I, SmallVectorImpl<Value*> &Ops){
+  if (Ops.size() == 1) return Ops.back();
+  
+  Value *V1 = Ops.back();
+  Ops.pop_back();
+  Value *V2 = EmitAddTreeOfValues(I, Ops);
+  return BinaryOperator::CreateAdd(V2, V1, "tmp", I);
+}
+
+/// RemoveFactorFromExpression - If V is an expression tree that is a 
+/// multiplication sequence, and if this sequence contains a multiply by Factor,
+/// remove Factor from the tree and return the new tree.
+Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) {
+  BinaryOperator *BO = isReassociableOp(V, Instruction::Mul);
+  if (!BO) return 0;
+  
+  SmallVector<ValueEntry, 8> Factors;
+  LinearizeExprTree(BO, Factors);
+
+  bool FoundFactor = false;
+  bool NeedsNegate = false;
+  for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
+    if (Factors[i].Op == Factor) {
+      FoundFactor = true;
+      Factors.erase(Factors.begin()+i);
+      break;
+    }
+    
+    // If this is a negative version of this factor, remove it.
+    if (ConstantInt *FC1 = dyn_cast<ConstantInt>(Factor))
+      if (ConstantInt *FC2 = dyn_cast<ConstantInt>(Factors[i].Op))
+        if (FC1->getValue() == -FC2->getValue()) {
+          FoundFactor = NeedsNegate = true;
+          Factors.erase(Factors.begin()+i);
+          break;
+        }
+  }
+  
+  if (!FoundFactor) {
+    // Make sure to restore the operands to the expression tree.
+    RewriteExprTree(BO, Factors);
+    return 0;
+  }
+  
+  BasicBlock::iterator InsertPt = BO; ++InsertPt;
+  
+  // If this was just a single multiply, remove the multiply and return the only
+  // remaining operand.
+  if (Factors.size() == 1) {
+    ValueRankMap.erase(BO);
+    BO->eraseFromParent();
+    V = Factors[0].Op;
+  } else {
+    RewriteExprTree(BO, Factors);
+    V = BO;
+  }
+  
+  if (NeedsNegate)
+    V = BinaryOperator::CreateNeg(V, "neg", InsertPt);
+  
+  return V;
+}
+
+/// FindSingleUseMultiplyFactors - If V is a single-use multiply, recursively
+/// add its operands as factors, otherwise add V to the list of factors.
+///
+/// Ops is the top-level list of add operands we're trying to factor.
+static void FindSingleUseMultiplyFactors(Value *V,
+                                         SmallVectorImpl<Value*> &Factors,
+                                       const SmallVectorImpl<ValueEntry> &Ops,
+                                         bool IsRoot) {
+  BinaryOperator *BO;
+  if (!(V->hasOneUse() || V->use_empty()) || // More than one use.
+      !(BO = dyn_cast<BinaryOperator>(V)) ||
+      BO->getOpcode() != Instruction::Mul) {
+    Factors.push_back(V);
+    return;
+  }
+  
+  // If this value has a single use because it is another input to the add
+  // tree we're reassociating and we dropped its use, it actually has two
+  // uses and we can't factor it.
+  if (!IsRoot) {
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+      if (Ops[i].Op == V) {
+        Factors.push_back(V);
+        return;
+      }
+  }
+  
+  
+  // Otherwise, add the LHS and RHS to the list of factors.
+  FindSingleUseMultiplyFactors(BO->getOperand(1), Factors, Ops, false);
+  FindSingleUseMultiplyFactors(BO->getOperand(0), Factors, Ops, false);
+}
+
+/// OptimizeAndOrXor - Optimize a series of operands to an 'and', 'or', or 'xor'
+/// instruction.  This optimizes based on identities.  If it can be reduced to
+/// a single Value, it is returned, otherwise the Ops list is mutated as
+/// necessary.
+static Value *OptimizeAndOrXor(unsigned Opcode,
+                               SmallVectorImpl<ValueEntry> &Ops) {
+  // Scan the operand lists looking for X and ~X pairs, along with X,X pairs.
+  // If we find any, we can simplify the expression. X&~X == 0, X|~X == -1.
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    // First, check for X and ~X in the operand list.
+    assert(i < Ops.size());
+    if (BinaryOperator::isNot(Ops[i].Op)) {    // Cannot occur for ^.
+      Value *X = BinaryOperator::getNotArgument(Ops[i].Op);
+      unsigned FoundX = FindInOperandList(Ops, i, X);
+      if (FoundX != i) {
+        if (Opcode == Instruction::And)   // ...&X&~X = 0
+          return Constant::getNullValue(X->getType());
+        
+        if (Opcode == Instruction::Or)    // ...|X|~X = -1
+          return Constant::getAllOnesValue(X->getType());
+      }
+    }
+    
+    // Next, check for duplicate pairs of values, which we assume are next to
+    // each other, due to our sorting criteria.
+    assert(i < Ops.size());
+    if (i+1 != Ops.size() && Ops[i+1].Op == Ops[i].Op) {
+      if (Opcode == Instruction::And || Opcode == Instruction::Or) {
+        // Drop duplicate values for And and Or.
+        Ops.erase(Ops.begin()+i);
+        --i; --e;
+        ++NumAnnihil;
+        continue;
+      }
+      
+      // Drop pairs of values for Xor.
+      assert(Opcode == Instruction::Xor);
+      if (e == 2)
+        return Constant::getNullValue(Ops[0].Op->getType());
+      
+      // Y ^ X^X -> Y
+      Ops.erase(Ops.begin()+i, Ops.begin()+i+2);
+      i -= 1; e -= 2;
+      ++NumAnnihil;
+    }
+  }
+  return 0;
+}
+
+/// OptimizeAdd - Optimize a series of operands to an 'add' instruction.  This
+/// optimizes based on identities.  If it can be reduced to a single Value, it
+/// is returned, otherwise the Ops list is mutated as necessary.
+Value *Reassociate::OptimizeAdd(Instruction *I,
+                                SmallVectorImpl<ValueEntry> &Ops) {
+  // Scan the operand lists looking for X and -X pairs.  If we find any, we
+  // can simplify the expression. X+-X == 0.  While we're at it, scan for any
+  // duplicates.  We want to canonicalize Y+Y+Y+Z -> 3*Y+Z.
+  //
+  // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1".
+  //
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    Value *TheOp = Ops[i].Op;
+    // Check to see if we've seen this operand before.  If so, we factor all
+    // instances of the operand together.  Due to our sorting criteria, we know
+    // that these need to be next to each other in the vector.
+    if (i+1 != Ops.size() && Ops[i+1].Op == TheOp) {
+      // Rescan the list, remove all instances of this operand from the expr.
+      unsigned NumFound = 0;
+      do {
+        Ops.erase(Ops.begin()+i);
+        ++NumFound;
+      } while (i != Ops.size() && Ops[i].Op == TheOp);
+      
+      DEBUG(errs() << "\nFACTORING [" << NumFound << "]: " << *TheOp << '\n');
+      ++NumFactor;
+      
+      // Insert a new multiply.
+      Value *Mul = ConstantInt::get(cast<IntegerType>(I->getType()), NumFound);
+      Mul = BinaryOperator::CreateMul(TheOp, Mul, "factor", I);
+      
+      // Now that we have inserted a multiply, optimize it. This allows us to
+      // handle cases that require multiple factoring steps, such as this:
+      // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6
+      Mul = ReassociateExpression(cast<BinaryOperator>(Mul));
+      
+      // If every add operand was a duplicate, return the multiply.
+      if (Ops.empty())
+        return Mul;
+      
+      // Otherwise, we had some input that didn't have the dupe, such as
+      // "A + A + B" -> "A*2 + B".  Add the new multiply to the list of
+      // things being added by this operation.
+      Ops.insert(Ops.begin(), ValueEntry(getRank(Mul), Mul));
+      
+      --i;
+      e = Ops.size();
+      continue;
+    }
+    
+    // Check for X and -X in the operand list.
+    if (!BinaryOperator::isNeg(TheOp))
+      continue;
+    
+    Value *X = BinaryOperator::getNegArgument(TheOp);
+    unsigned FoundX = FindInOperandList(Ops, i, X);
+    if (FoundX == i)
+      continue;
+    
+    // Remove X and -X from the operand list.
+    if (Ops.size() == 2)
+      return Constant::getNullValue(X->getType());
+    
+    Ops.erase(Ops.begin()+i);
+    if (i < FoundX)
+      --FoundX;
+    else
+      --i;   // Need to back up an extra one.
+    Ops.erase(Ops.begin()+FoundX);
+    ++NumAnnihil;
+    --i;     // Revisit element.
+    e -= 2;  // Removed two elements.
+  }
+  
+  // Scan the operand list, checking to see if there are any common factors
+  // between operands.  Consider something like A*A+A*B*C+D.  We would like to
+  // reassociate this to A*(A+B*C)+D, which reduces the number of multiplies.
+  // To efficiently find this, we count the number of times a factor occurs
+  // for any ADD operands that are MULs.
+  DenseMap<Value*, unsigned> FactorOccurrences;
+  
+  // Keep track of each multiply we see, to avoid triggering on (X*4)+(X*4)
+  // where they are actually the same multiply.
+  unsigned MaxOcc = 0;
+  Value *MaxOccVal = 0;
+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+    BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
+    if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty())
+      continue;
+    
+    // Compute all of the factors of this added value.
+    SmallVector<Value*, 8> Factors;
+    FindSingleUseMultiplyFactors(BOp, Factors, Ops, true);
+    assert(Factors.size() > 1 && "Bad linearize!");
+    
+    // Add one to FactorOccurrences for each unique factor in this op.
+    SmallPtrSet<Value*, 8> Duplicates;
+    for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
+      Value *Factor = Factors[i];
+      if (!Duplicates.insert(Factor)) continue;
+      
+      unsigned Occ = ++FactorOccurrences[Factor];
+      if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
+      
+      // If Factor is a negative constant, add the negated value as a factor
+      // because we can percolate the negate out.  Watch for minint, which
+      // cannot be positivified.
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor))
+        if (CI->getValue().isNegative() && !CI->getValue().isMinSignedValue()) {
+          Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
+          assert(!Duplicates.count(Factor) &&
+                 "Shouldn't have two constant factors, missed a canonicalize");
+          
+          unsigned Occ = ++FactorOccurrences[Factor];
+          if (Occ > MaxOcc) { MaxOcc = Occ; MaxOccVal = Factor; }
+        }
+    }
+  }
+  
+  // If any factor occurred more than one time, we can pull it out.
+  if (MaxOcc > 1) {
+    DEBUG(errs() << "\nFACTORING [" << MaxOcc << "]: " << *MaxOccVal << '\n');
+    ++NumFactor;
+
+    // Create a new instruction that uses the MaxOccVal twice.  If we don't do
+    // this, we could otherwise run into situations where removing a factor
+    // from an expression will drop a use of maxocc, and this can cause 
+    // RemoveFactorFromExpression on successive values to behave differently.
+    Instruction *DummyInst = BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal);
+    SmallVector<Value*, 4> NewMulOps;
+    for (unsigned i = 0; i != Ops.size(); ++i) {
+      // Only try to remove factors from expressions we're allowed to.
+      BinaryOperator *BOp = dyn_cast<BinaryOperator>(Ops[i].Op);
+      if (BOp == 0 || BOp->getOpcode() != Instruction::Mul || !BOp->use_empty())
+        continue;
+      
+      if (Value *V = RemoveFactorFromExpression(Ops[i].Op, MaxOccVal)) {
+        // The factorized operand may occur several times.  Convert them all in
+        // one fell swoop.
+        for (unsigned j = Ops.size(); j != i;) {
+          --j;
+          if (Ops[j].Op == Ops[i].Op) {
+            NewMulOps.push_back(V);
+            Ops.erase(Ops.begin()+j);
+          }
+        }
+        --i;
+      }
+    }
+    
+    // No need for extra uses anymore.
+    delete DummyInst;
+
+    unsigned NumAddedValues = NewMulOps.size();
+    Value *V = EmitAddTreeOfValues(I, NewMulOps);
+
+    // Now that we have inserted the add tree, optimize it. This allows us to
+    // handle cases that require multiple factoring steps, such as this:
+    // A*A*B + A*A*C   -->   A*(A*B+A*C)   -->   A*(A*(B+C))
+    assert(NumAddedValues > 1 && "Each occurrence should contribute a value");
+    (void)NumAddedValues;
+    V = ReassociateExpression(cast<BinaryOperator>(V));
+
+    // Create the multiply.
+    Value *V2 = BinaryOperator::CreateMul(V, MaxOccVal, "tmp", I);
+
+    // Rerun associate on the multiply in case the inner expression turned into
+    // a multiply.  We want to make sure that we keep things in canonical form.
+    V2 = ReassociateExpression(cast<BinaryOperator>(V2));
+    
+    // If every add operand included the factor (e.g. "A*B + A*C"), then the
+    // entire result expression is just the multiply "A*(B+C)".
+    if (Ops.empty())
+      return V2;
+    
+    // Otherwise, we had some input that didn't have the factor, such as
+    // "A*B + A*C + D" -> "A*(B+C) + D".  Add the new multiply to the list of
+    // things being added by this operation.
+    Ops.insert(Ops.begin(), ValueEntry(getRank(V2), V2));
+  }
+  
+  return 0;
+}
+
+Value *Reassociate::OptimizeExpression(BinaryOperator *I,
+                                       SmallVectorImpl<ValueEntry> &Ops) {
+  // Now that we have the linearized expression tree, try to optimize it.
+  // Start by folding any constants that we found.
+  bool IterateOptimization = false;
+  if (Ops.size() == 1) return Ops[0].Op;
+
+  unsigned Opcode = I->getOpcode();
+  
+  if (Constant *V1 = dyn_cast<Constant>(Ops[Ops.size()-2].Op))
+    if (Constant *V2 = dyn_cast<Constant>(Ops.back().Op)) {
+      Ops.pop_back();
+      Ops.back().Op = ConstantExpr::get(Opcode, V1, V2);
+      return OptimizeExpression(I, Ops);
+    }
+
+  // Check for destructive annihilation due to a constant being used.
+  if (ConstantInt *CstVal = dyn_cast<ConstantInt>(Ops.back().Op))
+    switch (Opcode) {
+    default: break;
+    case Instruction::And:
+      if (CstVal->isZero())                  // X & 0 -> 0
+        return CstVal;
+      if (CstVal->isAllOnesValue())          // X & -1 -> X
+        Ops.pop_back();
+      break;
+    case Instruction::Mul:
+      if (CstVal->isZero()) {                // X * 0 -> 0
+        ++NumAnnihil;
+        return CstVal;
+      }
+        
+      if (cast<ConstantInt>(CstVal)->isOne())
+        Ops.pop_back();                      // X * 1 -> X
+      break;
+    case Instruction::Or:
+      if (CstVal->isAllOnesValue())          // X | -1 -> -1
+        return CstVal;
+      // FALLTHROUGH!
+    case Instruction::Add:
+    case Instruction::Xor:
+      if (CstVal->isZero())                  // X [|^+] 0 -> X
+        Ops.pop_back();
+      break;
+    }
+  if (Ops.size() == 1) return Ops[0].Op;
+
+  // Handle destructive annihilation due to identities between elements in the
+  // argument list here.
+  switch (Opcode) {
+  default: break;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor: {
+    unsigned NumOps = Ops.size();
+    if (Value *Result = OptimizeAndOrXor(Opcode, Ops))
+      return Result;
+    IterateOptimization |= Ops.size() != NumOps;
+    break;
+  }
+
+  case Instruction::Add: {
+    unsigned NumOps = Ops.size();
+    if (Value *Result = OptimizeAdd(I, Ops))
+      return Result;
+    IterateOptimization |= Ops.size() != NumOps;
+  }
+
+    break;
+  //case Instruction::Mul:
+  }
+
+  if (IterateOptimization)
+    return OptimizeExpression(I, Ops);
+  return 0;
+}
+
+
+/// ReassociateBB - Inspect all of the instructions in this basic block,
+/// reassociating them as we go.
+void Reassociate::ReassociateBB(BasicBlock *BB) {
+  for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) {
+    Instruction *BI = BBI++;
+    if (BI->getOpcode() == Instruction::Shl &&
+        isa<ConstantInt>(BI->getOperand(1)))
+      if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
+        MadeChange = true;
+        BI = NI;
+      }
+
+    // Reject cases where it is pointless to do this.
+    if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() || 
+        BI->getType()->isVectorTy())
+      continue;  // Floating point ops are not associative.
+
+    // Do not reassociate boolean (i1) expressions.  We want to preserve the
+    // original order of evaluation for short-circuited comparisons that
+    // SimplifyCFG has folded to AND/OR expressions.  If the expression
+    // is not further optimized, it is likely to be transformed back to a
+    // short-circuited form for code gen, and the source order may have been
+    // optimized for the most likely conditions.
+    if (BI->getType()->isIntegerTy(1))
+      continue;
+
+    // If this is a subtract instruction which is not already in negate form,
+    // see if we can convert it to X+-Y.
+    if (BI->getOpcode() == Instruction::Sub) {
+      if (ShouldBreakUpSubtract(BI)) {
+        BI = BreakUpSubtract(BI, ValueRankMap);
+        // Reset the BBI iterator in case BreakUpSubtract changed the
+        // instruction it points to.
+        BBI = BI;
+        ++BBI;
+        MadeChange = true;
+      } else if (BinaryOperator::isNeg(BI)) {
+        // Otherwise, this is a negation.  See if the operand is a multiply tree
+        // and if this is not an inner node of a multiply tree.
+        if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
+            (!BI->hasOneUse() ||
+             !isReassociableOp(BI->use_back(), Instruction::Mul))) {
+          BI = LowerNegateToMultiply(BI, ValueRankMap);
+          MadeChange = true;
+        }
+      }
+    }
+
+    // If this instruction is a commutative binary operator, process it.
+    if (!BI->isAssociative()) continue;
+    BinaryOperator *I = cast<BinaryOperator>(BI);
+
+    // If this is an interior node of a reassociable tree, ignore it until we
+    // get to the root of the tree, to avoid N^2 analysis.
+    if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode()))
+      continue;
+
+    // If this is an add tree that is used by a sub instruction, ignore it 
+    // until we process the subtract.
+    if (I->hasOneUse() && I->getOpcode() == Instruction::Add &&
+        cast<Instruction>(I->use_back())->getOpcode() == Instruction::Sub)
+      continue;
+
+    ReassociateExpression(I);
+  }
+}
+
+Value *Reassociate::ReassociateExpression(BinaryOperator *I) {
+  
+  // First, walk the expression tree, linearizing the tree, collecting the
+  // operand information.
+  SmallVector<ValueEntry, 8> Ops;
+  LinearizeExprTree(I, Ops);
+  
+  DEBUG(dbgs() << "RAIn:\t"; PrintOps(I, Ops); dbgs() << '\n');
+  
+  // Now that we have linearized the tree to a list and have gathered all of
+  // the operands and their ranks, sort the operands by their rank.  Use a
+  // stable_sort so that values with equal ranks will have their relative
+  // positions maintained (and so the compiler is deterministic).  Note that
+  // this sorts so that the highest ranking values end up at the beginning of
+  // the vector.
+  std::stable_sort(Ops.begin(), Ops.end());
+  
+  // OptimizeExpression - Now that we have the expression tree in a convenient
+  // sorted form, optimize it globally if possible.
+  if (Value *V = OptimizeExpression(I, Ops)) {
+    // This expression tree simplified to something that isn't a tree,
+    // eliminate it.
+    DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n');
+    I->replaceAllUsesWith(V);
+    RemoveDeadBinaryOp(I);
+    ++NumAnnihil;
+    return V;
+  }
+  
+  // We want to sink immediates as deeply as possible except in the case where
+  // this is a multiply tree used only by an add, and the immediate is a -1.
+  // In this case we reassociate to put the negation on the outside so that we
+  // can fold the negation into the add: (-X)*Y + Z -> Z-X*Y
+  if (I->getOpcode() == Instruction::Mul && I->hasOneUse() &&
+      cast<Instruction>(I->use_back())->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Ops.back().Op) &&
+      cast<ConstantInt>(Ops.back().Op)->isAllOnesValue()) {
+    ValueEntry Tmp = Ops.pop_back_val();
+    Ops.insert(Ops.begin(), Tmp);
+  }
+  
+  DEBUG(dbgs() << "RAOut:\t"; PrintOps(I, Ops); dbgs() << '\n');
+  
+  if (Ops.size() == 1) {
+    // This expression tree simplified to something that isn't a tree,
+    // eliminate it.
+    I->replaceAllUsesWith(Ops[0].Op);
+    RemoveDeadBinaryOp(I);
+    return Ops[0].Op;
+  }
+  
+  // Now that we ordered and optimized the expressions, splat them back into
+  // the expression tree, removing any unneeded nodes.
+  RewriteExprTree(I, Ops);
+  return I;
+}
+
+
+bool Reassociate::runOnFunction(Function &F) {
+  // Recalculate the rank map for F
+  BuildRankMap(F);
+
+  MadeChange = false;
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
+    ReassociateBB(FI);
+
+  // We are done with the rank map.
+  RankMap.clear();
+  ValueRankMap.clear();
+  return MadeChange;
+}
+
diff --git a/final/lib/Transforms/Scalar/Reg2Mem.cpp b/final/lib/Transforms/Scalar/Reg2Mem.cpp
new file mode 100644
index 00000000000..459bb0621f8
--- /dev/null
+++ b/final/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -0,0 +1,134 @@
+//===- Reg2Mem.cpp - Convert registers to allocas -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file demotes all registers to memory references.  It is intented to be
+// the inverse of PromoteMemoryToRegister.  By converting to loads, the only
+// values live accross basic blocks are allocas and loads before phi nodes.
+// It is intended that this should make CFG hacking much easier.
+// To make later hacking easier, the entry block is split into two, such that
+// all introduced allocas and nothing else are in the entry block.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "reg2mem"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include <list>
+using namespace llvm;
+
+STATISTIC(NumRegsDemoted, "Number of registers demoted");
+STATISTIC(NumPhisDemoted, "Number of phi-nodes demoted");
+
+namespace {
+  struct RegToMem : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    RegToMem() : FunctionPass(ID) {
+      initializeRegToMemPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequiredID(BreakCriticalEdgesID);
+      AU.addPreservedID(BreakCriticalEdgesID);
+    }
+
+   bool valueEscapes(const Instruction *Inst) const {
+     const BasicBlock *BB = Inst->getParent();
+      for (Value::const_use_iterator UI = Inst->use_begin(),E = Inst->use_end();
+           UI != E; ++UI) {
+        const Instruction *I = cast<Instruction>(*UI);
+        if (I->getParent() != BB || isa<PHINode>(I))
+          return true;
+      }
+      return false;
+    }
+
+    virtual bool runOnFunction(Function &F);
+  };
+}
+  
+char RegToMem::ID = 0;
+INITIALIZE_PASS_BEGIN(RegToMem, "reg2mem", "Demote all values to stack slots",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(BreakCriticalEdges)
+INITIALIZE_PASS_END(RegToMem, "reg2mem", "Demote all values to stack slots",
+                false, false)
+
+bool RegToMem::runOnFunction(Function &F) {
+  if (F.isDeclaration()) 
+    return false;
+  
+  // Insert all new allocas into entry block.
+  BasicBlock *BBEntry = &F.getEntryBlock();
+  assert(pred_begin(BBEntry) == pred_end(BBEntry) &&
+         "Entry block to function must not have predecessors!");
+  
+  // Find first non-alloca instruction and create insertion point. This is
+  // safe if block is well-formed: it always have terminator, otherwise
+  // we'll get and assertion.
+  BasicBlock::iterator I = BBEntry->begin();
+  while (isa<AllocaInst>(I)) ++I;
+  
+  CastInst *AllocaInsertionPoint =
+    new BitCastInst(Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+                    Type::getInt32Ty(F.getContext()),
+                    "reg2mem alloca point", I);
+  
+  // Find the escaped instructions. But don't create stack slots for
+  // allocas in entry block.
+  std::list<Instruction*> WorkList;
+  for (Function::iterator ibb = F.begin(), ibe = F.end();
+       ibb != ibe; ++ibb)
+    for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
+         iib != iie; ++iib) {
+      if (!(isa<AllocaInst>(iib) && iib->getParent() == BBEntry) &&
+          valueEscapes(iib)) {
+        WorkList.push_front(&*iib);
+      }
+    }
+  
+  // Demote escaped instructions
+  NumRegsDemoted += WorkList.size();
+  for (std::list<Instruction*>::iterator ilb = WorkList.begin(), 
+       ile = WorkList.end(); ilb != ile; ++ilb)
+    DemoteRegToStack(**ilb, false, AllocaInsertionPoint);
+  
+  WorkList.clear();
+  
+  // Find all phi's
+  for (Function::iterator ibb = F.begin(), ibe = F.end();
+       ibb != ibe; ++ibb)
+    for (BasicBlock::iterator iib = ibb->begin(), iie = ibb->end();
+         iib != iie; ++iib)
+      if (isa<PHINode>(iib))
+        WorkList.push_front(&*iib);
+  
+  // Demote phi nodes
+  NumPhisDemoted += WorkList.size();
+  for (std::list<Instruction*>::iterator ilb = WorkList.begin(), 
+       ile = WorkList.end(); ilb != ile; ++ilb)
+    DemotePHIToStack(cast<PHINode>(*ilb), AllocaInsertionPoint);
+  
+  return true;
+}
+
+
+// createDemoteRegisterToMemory - Provide an entry point to create this pass.
+//
+char &llvm::DemoteRegisterToMemoryID = RegToMem::ID;
+FunctionPass *llvm::createDemoteRegisterToMemoryPass() {
+  return new RegToMem();
+}
diff --git a/final/lib/Transforms/Scalar/SCCP.cpp b/final/lib/Transforms/Scalar/SCCP.cpp
new file mode 100644
index 00000000000..c82e929b364
--- /dev/null
+++ b/final/lib/Transforms/Scalar/SCCP.cpp
@@ -0,0 +1,2010 @@
+//===- SCCP.cpp - Sparse Conditional Constant Propagation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements sparse conditional constant propagation and merging:
+//
+// Specifically, this:
+//   * Assumes values are constant unless proven otherwise
+//   * Assumes BasicBlocks are dead unless proven otherwise
+//   * Proves values to be constant, and replaces them with constants
+//   * Proves conditional branches to be unconditional
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sccp"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumInstRemoved, "Number of instructions removed");
+STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable");
+
+STATISTIC(IPNumInstRemoved, "Number of instructions removed by IPSCCP");
+STATISTIC(IPNumArgsElimed ,"Number of arguments constant propagated by IPSCCP");
+STATISTIC(IPNumGlobalConst, "Number of globals found to be constant by IPSCCP");
+
+namespace {
+/// LatticeVal class - This class represents the different lattice values that
+/// an LLVM value may occupy.  It is a simple class with value semantics.
+///
+class LatticeVal {
+  enum LatticeValueTy {
+    /// undefined - This LLVM Value has no known value yet.
+    undefined,
+    
+    /// constant - This LLVM Value has a specific constant value.
+    constant,
+
+    /// forcedconstant - This LLVM Value was thought to be undef until
+    /// ResolvedUndefsIn.  This is treated just like 'constant', but if merged
+    /// with another (different) constant, it goes to overdefined, instead of
+    /// asserting.
+    forcedconstant,
+    
+    /// overdefined - This instruction is not known to be constant, and we know
+    /// it has a value.
+    overdefined
+  };
+
+  /// Val: This stores the current lattice value along with the Constant* for
+  /// the constant if this is a 'constant' or 'forcedconstant' value.
+  PointerIntPair<Constant *, 2, LatticeValueTy> Val;
+  
+  LatticeValueTy getLatticeValue() const {
+    return Val.getInt();
+  }
+  
+public:
+  LatticeVal() : Val(0, undefined) {}
+  
+  bool isUndefined() const { return getLatticeValue() == undefined; }
+  bool isConstant() const {
+    return getLatticeValue() == constant || getLatticeValue() == forcedconstant;
+  }
+  bool isOverdefined() const { return getLatticeValue() == overdefined; }
+  
+  Constant *getConstant() const {
+    assert(isConstant() && "Cannot get the constant of a non-constant!");
+    return Val.getPointer();
+  }
+  
+  /// markOverdefined - Return true if this is a change in status.
+  bool markOverdefined() {
+    if (isOverdefined())
+      return false;
+    
+    Val.setInt(overdefined);
+    return true;
+  }
+
+  /// markConstant - Return true if this is a change in status.
+  bool markConstant(Constant *V) {
+    if (getLatticeValue() == constant) { // Constant but not forcedconstant.
+      assert(getConstant() == V && "Marking constant with different value");
+      return false;
+    }
+    
+    if (isUndefined()) {
+      Val.setInt(constant);
+      assert(V && "Marking constant with NULL");
+      Val.setPointer(V);
+    } else {
+      assert(getLatticeValue() == forcedconstant && 
+             "Cannot move from overdefined to constant!");
+      // Stay at forcedconstant if the constant is the same.
+      if (V == getConstant()) return false;
+      
+      // Otherwise, we go to overdefined.  Assumptions made based on the
+      // forced value are possibly wrong.  Assuming this is another constant
+      // could expose a contradiction.
+      Val.setInt(overdefined);
+    }
+    return true;
+  }
+
+  /// getConstantInt - If this is a constant with a ConstantInt value, return it
+  /// otherwise return null.
+  ConstantInt *getConstantInt() const {
+    if (isConstant())
+      return dyn_cast<ConstantInt>(getConstant());
+    return 0;
+  }
+  
+  void markForcedConstant(Constant *V) {
+    assert(isUndefined() && "Can't force a defined value!");
+    Val.setInt(forcedconstant);
+    Val.setPointer(V);
+  }
+};
+} // end anonymous namespace.
+
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+//
+/// SCCPSolver - This class is a general purpose solver for Sparse Conditional
+/// Constant Propagation.
+///
+class SCCPSolver : public InstVisitor<SCCPSolver> {
+  const TargetData *TD;
+  SmallPtrSet<BasicBlock*, 8> BBExecutable;// The BBs that are executable.
+  DenseMap<Value*, LatticeVal> ValueState;  // The state each value is in.
+
+  /// StructValueState - This maintains ValueState for values that have
+  /// StructType, for example for formal arguments, calls, insertelement, etc.
+  ///
+  DenseMap<std::pair<Value*, unsigned>, LatticeVal> StructValueState;
+  
+  /// GlobalValue - If we are tracking any values for the contents of a global
+  /// variable, we keep a mapping from the constant accessor to the element of
+  /// the global, to the currently known value.  If the value becomes
+  /// overdefined, it's entry is simply removed from this map.
+  DenseMap<GlobalVariable*, LatticeVal> TrackedGlobals;
+
+  /// TrackedRetVals - If we are tracking arguments into and the return
+  /// value out of a function, it will have an entry in this map, indicating
+  /// what the known return value for the function is.
+  DenseMap<Function*, LatticeVal> TrackedRetVals;
+
+  /// TrackedMultipleRetVals - Same as TrackedRetVals, but used for functions
+  /// that return multiple values.
+  DenseMap<std::pair<Function*, unsigned>, LatticeVal> TrackedMultipleRetVals;
+  
+  /// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is
+  /// represented here for efficient lookup.
+  SmallPtrSet<Function*, 16> MRVFunctionsTracked;
+
+  /// TrackingIncomingArguments - This is the set of functions for whose
+  /// arguments we make optimistic assumptions about and try to prove as
+  /// constants.
+  SmallPtrSet<Function*, 16> TrackingIncomingArguments;
+  
+  /// The reason for two worklists is that overdefined is the lowest state
+  /// on the lattice, and moving things to overdefined as fast as possible
+  /// makes SCCP converge much faster.
+  ///
+  /// By having a separate worklist, we accomplish this because everything
+  /// possibly overdefined will become overdefined at the soonest possible
+  /// point.
+  SmallVector<Value*, 64> OverdefinedInstWorkList;
+  SmallVector<Value*, 64> InstWorkList;
+
+
+  SmallVector<BasicBlock*, 64>  BBWorkList;  // The BasicBlock work list
+
+  /// UsersOfOverdefinedPHIs - Keep track of any users of PHI nodes that are not
+  /// overdefined, despite the fact that the PHI node is overdefined.
+  std::multimap<PHINode*, Instruction*> UsersOfOverdefinedPHIs;
+
+  /// KnownFeasibleEdges - Entries in this set are edges which have already had
+  /// PHI nodes retriggered.
+  typedef std::pair<BasicBlock*, BasicBlock*> Edge;
+  DenseSet<Edge> KnownFeasibleEdges;
+public:
+  SCCPSolver(const TargetData *td) : TD(td) {}
+
+  /// MarkBlockExecutable - This method can be used by clients to mark all of
+  /// the blocks that are known to be intrinsically live in the processed unit.
+  ///
+  /// This returns true if the block was not considered live before.
+  bool MarkBlockExecutable(BasicBlock *BB) {
+    if (!BBExecutable.insert(BB)) return false;
+    DEBUG(dbgs() << "Marking Block Executable: " << BB->getName() << "\n");
+    BBWorkList.push_back(BB);  // Add the block to the work list!
+    return true;
+  }
+
+  /// TrackValueOfGlobalVariable - Clients can use this method to
+  /// inform the SCCPSolver that it should track loads and stores to the
+  /// specified global variable if it can.  This is only legal to call if
+  /// performing Interprocedural SCCP.
+  void TrackValueOfGlobalVariable(GlobalVariable *GV) {
+    // We only track the contents of scalar globals.
+    if (GV->getType()->getElementType()->isSingleValueType()) {
+      LatticeVal &IV = TrackedGlobals[GV];
+      if (!isa<UndefValue>(GV->getInitializer()))
+        IV.markConstant(GV->getInitializer());
+    }
+  }
+
+  /// AddTrackedFunction - If the SCCP solver is supposed to track calls into
+  /// and out of the specified function (which cannot have its address taken),
+  /// this method must be called.
+  void AddTrackedFunction(Function *F) {
+    // Add an entry, F -> undef.
+    if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+      MRVFunctionsTracked.insert(F);
+      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+        TrackedMultipleRetVals.insert(std::make_pair(std::make_pair(F, i),
+                                                     LatticeVal()));
+    } else
+      TrackedRetVals.insert(std::make_pair(F, LatticeVal()));
+  }
+
+  void AddArgumentTrackedFunction(Function *F) {
+    TrackingIncomingArguments.insert(F);
+  }
+  
+  /// Solve - Solve for constants and executable blocks.
+  ///
+  void Solve();
+
+  /// ResolvedUndefsIn - While solving the dataflow for a function, we assume
+  /// that branches on undef values cannot reach any of their successors.
+  /// However, this is not a safe assumption.  After we solve dataflow, this
+  /// method should be use to handle this.  If this returns true, the solver
+  /// should be rerun.
+  bool ResolvedUndefsIn(Function &F);
+
+  bool isBlockExecutable(BasicBlock *BB) const {
+    return BBExecutable.count(BB);
+  }
+
+  LatticeVal getLatticeValueFor(Value *V) const {
+    DenseMap<Value*, LatticeVal>::const_iterator I = ValueState.find(V);
+    assert(I != ValueState.end() && "V is not in valuemap!");
+    return I->second;
+  }
+  
+  /*LatticeVal getStructLatticeValueFor(Value *V, unsigned i) const {
+    DenseMap<std::pair<Value*, unsigned>, LatticeVal>::const_iterator I = 
+      StructValueState.find(std::make_pair(V, i));
+    assert(I != StructValueState.end() && "V is not in valuemap!");
+    return I->second;
+  }*/
+
+  /// getTrackedRetVals - Get the inferred return value map.
+  ///
+  const DenseMap<Function*, LatticeVal> &getTrackedRetVals() {
+    return TrackedRetVals;
+  }
+
+  /// getTrackedGlobals - Get and return the set of inferred initializers for
+  /// global variables.
+  const DenseMap<GlobalVariable*, LatticeVal> &getTrackedGlobals() {
+    return TrackedGlobals;
+  }
+
+  void markOverdefined(Value *V) {
+    assert(!V->getType()->isStructTy() && "Should use other method");
+    markOverdefined(ValueState[V], V);
+  }
+
+  /// markAnythingOverdefined - Mark the specified value overdefined.  This
+  /// works with both scalars and structs.
+  void markAnythingOverdefined(Value *V) {
+    if (const StructType *STy = dyn_cast<StructType>(V->getType()))
+      for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+        markOverdefined(getStructValueState(V, i), V);
+    else
+      markOverdefined(V);
+  }
+  
+private:
+  // markConstant - Make a value be marked as "constant".  If the value
+  // is not already a constant, add it to the instruction work list so that
+  // the users of the instruction are updated later.
+  //
+  void markConstant(LatticeVal &IV, Value *V, Constant *C) {
+    if (!IV.markConstant(C)) return;
+    DEBUG(dbgs() << "markConstant: " << *C << ": " << *V << '\n');
+    if (IV.isOverdefined())
+      OverdefinedInstWorkList.push_back(V);
+    else
+      InstWorkList.push_back(V);
+  }
+  
+  void markConstant(Value *V, Constant *C) {
+    assert(!V->getType()->isStructTy() && "Should use other method");
+    markConstant(ValueState[V], V, C);
+  }
+
+  void markForcedConstant(Value *V, Constant *C) {
+    assert(!V->getType()->isStructTy() && "Should use other method");
+    LatticeVal &IV = ValueState[V];
+    IV.markForcedConstant(C);
+    DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n');
+    if (IV.isOverdefined())
+      OverdefinedInstWorkList.push_back(V);
+    else
+      InstWorkList.push_back(V);
+  }
+  
+  
+  // markOverdefined - Make a value be marked as "overdefined". If the
+  // value is not already overdefined, add it to the overdefined instruction
+  // work list so that the users of the instruction are updated later.
+  void markOverdefined(LatticeVal &IV, Value *V) {
+    if (!IV.markOverdefined()) return;
+    
+    DEBUG(dbgs() << "markOverdefined: ";
+          if (Function *F = dyn_cast<Function>(V))
+            dbgs() << "Function '" << F->getName() << "'\n";
+          else
+            dbgs() << *V << '\n');
+    // Only instructions go on the work list
+    OverdefinedInstWorkList.push_back(V);
+  }
+
+  void mergeInValue(LatticeVal &IV, Value *V, LatticeVal MergeWithV) {
+    if (IV.isOverdefined() || MergeWithV.isUndefined())
+      return;  // Noop.
+    if (MergeWithV.isOverdefined())
+      markOverdefined(IV, V);
+    else if (IV.isUndefined())
+      markConstant(IV, V, MergeWithV.getConstant());
+    else if (IV.getConstant() != MergeWithV.getConstant())
+      markOverdefined(IV, V);
+  }
+  
+  void mergeInValue(Value *V, LatticeVal MergeWithV) {
+    assert(!V->getType()->isStructTy() && "Should use other method");
+    mergeInValue(ValueState[V], V, MergeWithV);
+  }
+
+
+  /// getValueState - Return the LatticeVal object that corresponds to the
+  /// value.  This function handles the case when the value hasn't been seen yet
+  /// by properly seeding constants etc.
+  LatticeVal &getValueState(Value *V) {
+    assert(!V->getType()->isStructTy() && "Should use getStructValueState");
+
+    std::pair<DenseMap<Value*, LatticeVal>::iterator, bool> I =
+      ValueState.insert(std::make_pair(V, LatticeVal()));
+    LatticeVal &LV = I.first->second;
+
+    if (!I.second)
+      return LV;  // Common case, already in the map.
+
+    if (Constant *C = dyn_cast<Constant>(V)) {
+      // Undef values remain undefined.
+      if (!isa<UndefValue>(V))
+        LV.markConstant(C);          // Constants are constant
+    }
+    
+    // All others are underdefined by default.
+    return LV;
+  }
+
+  /// getStructValueState - Return the LatticeVal object that corresponds to the
+  /// value/field pair.  This function handles the case when the value hasn't
+  /// been seen yet by properly seeding constants etc.
+  LatticeVal &getStructValueState(Value *V, unsigned i) {
+    assert(V->getType()->isStructTy() && "Should use getValueState");
+    assert(i < cast<StructType>(V->getType())->getNumElements() &&
+           "Invalid element #");
+
+    std::pair<DenseMap<std::pair<Value*, unsigned>, LatticeVal>::iterator,
+              bool> I = StructValueState.insert(
+                        std::make_pair(std::make_pair(V, i), LatticeVal()));
+    LatticeVal &LV = I.first->second;
+
+    if (!I.second)
+      return LV;  // Common case, already in the map.
+
+    if (Constant *C = dyn_cast<Constant>(V)) {
+      if (isa<UndefValue>(C))
+        ; // Undef values remain undefined.
+      else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C))
+        LV.markConstant(CS->getOperand(i));      // Constants are constant.
+      else if (isa<ConstantAggregateZero>(C)) {
+        const Type *FieldTy = cast<StructType>(V->getType())->getElementType(i);
+        LV.markConstant(Constant::getNullValue(FieldTy));
+      } else
+        LV.markOverdefined();      // Unknown sort of constant.
+    }
+    
+    // All others are underdefined by default.
+    return LV;
+  }
+  
+
+  /// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
+  /// work list if it is not already executable.
+  void markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest) {
+    if (!KnownFeasibleEdges.insert(Edge(Source, Dest)).second)
+      return;  // This edge is already known to be executable!
+
+    if (!MarkBlockExecutable(Dest)) {
+      // If the destination is already executable, we just made an *edge*
+      // feasible that wasn't before.  Revisit the PHI nodes in the block
+      // because they have potentially new operands.
+      DEBUG(dbgs() << "Marking Edge Executable: " << Source->getName()
+            << " -> " << Dest->getName() << "\n");
+
+      PHINode *PN;
+      for (BasicBlock::iterator I = Dest->begin();
+           (PN = dyn_cast<PHINode>(I)); ++I)
+        visitPHINode(*PN);
+    }
+  }
+
+  // getFeasibleSuccessors - Return a vector of booleans to indicate which
+  // successors are reachable from a given terminator instruction.
+  //
+  void getFeasibleSuccessors(TerminatorInst &TI, SmallVector<bool, 16> &Succs);
+
+  // isEdgeFeasible - Return true if the control flow edge from the 'From' basic
+  // block to the 'To' basic block is currently feasible.
+  //
+  bool isEdgeFeasible(BasicBlock *From, BasicBlock *To);
+
+  // OperandChangedState - This method is invoked on all of the users of an
+  // instruction that was just changed state somehow.  Based on this
+  // information, we need to update the specified user of this instruction.
+  //
+  void OperandChangedState(Instruction *I) {
+    if (BBExecutable.count(I->getParent()))   // Inst is executable?
+      visit(*I);
+  }
+  
+  /// RemoveFromOverdefinedPHIs - If I has any entries in the
+  /// UsersOfOverdefinedPHIs map for PN, remove them now.
+  void RemoveFromOverdefinedPHIs(Instruction *I, PHINode *PN) {
+    if (UsersOfOverdefinedPHIs.empty()) return;
+    std::multimap<PHINode*, Instruction*>::iterator It, E;
+    tie(It, E) = UsersOfOverdefinedPHIs.equal_range(PN);
+    while (It != E) {
+      if (It->second == I)
+        UsersOfOverdefinedPHIs.erase(It++);
+      else
+        ++It;
+    }
+  }
+
+  /// InsertInOverdefinedPHIs - Insert an entry in the UsersOfOverdefinedPHIS
+  /// map for I and PN, but if one is there already, do not create another.
+  /// (Duplicate entries do not break anything directly, but can lead to
+  /// exponential growth of the table in rare cases.)
+  void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) {
+    std::multimap<PHINode*, Instruction*>::iterator J, E;
+    tie(J, E) = UsersOfOverdefinedPHIs.equal_range(PN);
+    for (; J != E; ++J)
+      if (J->second == I)
+        return;
+    UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I));
+  }
+
+private:
+  friend class InstVisitor<SCCPSolver>;
+
+  // visit implementations - Something changed in this instruction.  Either an
+  // operand made a transition, or the instruction is newly executable.  Change
+  // the value type of I to reflect these changes if appropriate.
+  void visitPHINode(PHINode &I);
+
+  // Terminators
+  void visitReturnInst(ReturnInst &I);
+  void visitTerminatorInst(TerminatorInst &TI);
+
+  void visitCastInst(CastInst &I);
+  void visitSelectInst(SelectInst &I);
+  void visitBinaryOperator(Instruction &I);
+  void visitCmpInst(CmpInst &I);
+  void visitExtractElementInst(ExtractElementInst &I);
+  void visitInsertElementInst(InsertElementInst &I);
+  void visitShuffleVectorInst(ShuffleVectorInst &I);
+  void visitExtractValueInst(ExtractValueInst &EVI);
+  void visitInsertValueInst(InsertValueInst &IVI);
+
+  // Instructions that cannot be folded away.
+  void visitStoreInst     (StoreInst &I);
+  void visitLoadInst      (LoadInst &I);
+  void visitGetElementPtrInst(GetElementPtrInst &I);
+  void visitCallInst      (CallInst &I) {
+    visitCallSite(&I);
+  }
+  void visitInvokeInst    (InvokeInst &II) {
+    visitCallSite(&II);
+    visitTerminatorInst(II);
+  }
+  void visitCallSite      (CallSite CS);
+  void visitUnwindInst    (TerminatorInst &I) { /*returns void*/ }
+  void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ }
+  void visitAllocaInst    (Instruction &I) { markOverdefined(&I); }
+  void visitVAArgInst     (Instruction &I) { markAnythingOverdefined(&I); }
+
+  void visitInstruction(Instruction &I) {
+    // If a new instruction is added to LLVM that we don't handle.
+    dbgs() << "SCCP: Don't know how to handle: " << I;
+    markAnythingOverdefined(&I);   // Just in case
+  }
+};
+
+} // end anonymous namespace
+
+
+// getFeasibleSuccessors - Return a vector of booleans to indicate which
+// successors are reachable from a given terminator instruction.
+//
+void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
+                                       SmallVector<bool, 16> &Succs) {
+  Succs.resize(TI.getNumSuccessors());
+  if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
+    if (BI->isUnconditional()) {
+      Succs[0] = true;
+      return;
+    }
+    
+    LatticeVal BCValue = getValueState(BI->getCondition());
+    ConstantInt *CI = BCValue.getConstantInt();
+    if (CI == 0) {
+      // Overdefined condition variables, and branches on unfoldable constant
+      // conditions, mean the branch could go either way.
+      if (!BCValue.isUndefined())
+        Succs[0] = Succs[1] = true;
+      return;
+    }
+    
+    // Constant condition variables mean the branch can only go a single way.
+    Succs[CI->isZero()] = true;
+    return;
+  }
+  
+  if (isa<InvokeInst>(TI)) {
+    // Invoke instructions successors are always executable.
+    Succs[0] = Succs[1] = true;
+    return;
+  }
+  
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) {
+    LatticeVal SCValue = getValueState(SI->getCondition());
+    ConstantInt *CI = SCValue.getConstantInt();
+    
+    if (CI == 0) {   // Overdefined or undefined condition?
+      // All destinations are executable!
+      if (!SCValue.isUndefined())
+        Succs.assign(TI.getNumSuccessors(), true);
+      return;
+    }
+      
+    Succs[SI->findCaseValue(CI)] = true;
+    return;
+  }
+  
+  // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
+  if (isa<IndirectBrInst>(&TI)) {
+    // Just mark all destinations executable!
+    Succs.assign(TI.getNumSuccessors(), true);
+    return;
+  }
+  
+#ifndef NDEBUG
+  dbgs() << "Unknown terminator instruction: " << TI << '\n';
+#endif
+  llvm_unreachable("SCCP: Don't know how to handle this terminator!");
+}
+
+
+// isEdgeFeasible - Return true if the control flow edge from the 'From' basic
+// block to the 'To' basic block is currently feasible.
+//
+bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
+  assert(BBExecutable.count(To) && "Dest should always be alive!");
+
+  // Make sure the source basic block is executable!!
+  if (!BBExecutable.count(From)) return false;
+
+  // Check to make sure this edge itself is actually feasible now.
+  TerminatorInst *TI = From->getTerminator();
+  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+    if (BI->isUnconditional())
+      return true;
+    
+    LatticeVal BCValue = getValueState(BI->getCondition());
+
+    // Overdefined condition variables mean the branch could go either way,
+    // undef conditions mean that neither edge is feasible yet.
+    ConstantInt *CI = BCValue.getConstantInt();
+    if (CI == 0)
+      return !BCValue.isUndefined();
+    
+    // Constant condition variables mean the branch can only go a single way.
+    return BI->getSuccessor(CI->isZero()) == To;
+  }
+  
+  // Invoke instructions successors are always executable.
+  if (isa<InvokeInst>(TI))
+    return true;
+  
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    LatticeVal SCValue = getValueState(SI->getCondition());
+    ConstantInt *CI = SCValue.getConstantInt();
+    
+    if (CI == 0)
+      return !SCValue.isUndefined();
+
+    // Make sure to skip the "default value" which isn't a value
+    for (unsigned i = 1, E = SI->getNumSuccessors(); i != E; ++i)
+      if (SI->getSuccessorValue(i) == CI) // Found the taken branch.
+        return SI->getSuccessor(i) == To;
+
+    // If the constant value is not equal to any of the branches, we must
+    // execute default branch.
+    return SI->getDefaultDest() == To;
+  }
+  
+  // Just mark all destinations executable!
+  // TODO: This could be improved if the operand is a [cast of a] BlockAddress.
+  if (isa<IndirectBrInst>(&TI))
+    return true;
+  
+#ifndef NDEBUG
+  dbgs() << "Unknown terminator instruction: " << *TI << '\n';
+#endif
+  llvm_unreachable(0);
+}
+
+// visit Implementations - Something changed in this instruction, either an
+// operand made a transition, or the instruction is newly executable.  Change
+// the value type of I to reflect these changes if appropriate.  This method
+// makes sure to do the following actions:
+//
+// 1. If a phi node merges two constants in, and has conflicting value coming
+//    from different branches, or if the PHI node merges in an overdefined
+//    value, then the PHI node becomes overdefined.
+// 2. If a phi node merges only constants in, and they all agree on value, the
+//    PHI node becomes a constant value equal to that.
+// 3. If V <- x (op) y && isConstant(x) && isConstant(y) V = Constant
+// 4. If V <- x (op) y && (isOverdefined(x) || isOverdefined(y)) V = Overdefined
+// 5. If V <- MEM or V <- CALL or V <- (unknown) then V = Overdefined
+// 6. If a conditional branch has a value that is constant, make the selected
+//    destination executable
+// 7. If a conditional branch has a value that is overdefined, make all
+//    successors executable.
+//
+void SCCPSolver::visitPHINode(PHINode &PN) {
+  // If this PN returns a struct, just mark the result overdefined.
+  // TODO: We could do a lot better than this if code actually uses this.
+  if (PN.getType()->isStructTy())
+    return markAnythingOverdefined(&PN);
+  
+  if (getValueState(&PN).isOverdefined()) {
+    // There may be instructions using this PHI node that are not overdefined
+    // themselves.  If so, make sure that they know that the PHI node operand
+    // changed.
+    std::multimap<PHINode*, Instruction*>::iterator I, E;
+    tie(I, E) = UsersOfOverdefinedPHIs.equal_range(&PN);
+    if (I == E)
+      return;
+    
+    SmallVector<Instruction*, 16> Users;
+    for (; I != E; ++I)
+      Users.push_back(I->second);
+    while (!Users.empty())
+      visit(Users.pop_back_val());
+    return;  // Quick exit
+  }
+
+  // Super-extra-high-degree PHI nodes are unlikely to ever be marked constant,
+  // and slow us down a lot.  Just mark them overdefined.
+  if (PN.getNumIncomingValues() > 64)
+    return markOverdefined(&PN);
+  
+  // Look at all of the executable operands of the PHI node.  If any of them
+  // are overdefined, the PHI becomes overdefined as well.  If they are all
+  // constant, and they agree with each other, the PHI becomes the identical
+  // constant.  If they are constant and don't agree, the PHI is overdefined.
+  // If there are no executable operands, the PHI remains undefined.
+  //
+  Constant *OperandVal = 0;
+  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+    LatticeVal IV = getValueState(PN.getIncomingValue(i));
+    if (IV.isUndefined()) continue;  // Doesn't influence PHI node.
+
+    if (!isEdgeFeasible(PN.getIncomingBlock(i), PN.getParent()))
+      continue;
+    
+    if (IV.isOverdefined())    // PHI node becomes overdefined!
+      return markOverdefined(&PN);
+
+    if (OperandVal == 0) {   // Grab the first value.
+      OperandVal = IV.getConstant();
+      continue;
+    }
+    
+    // There is already a reachable operand.  If we conflict with it,
+    // then the PHI node becomes overdefined.  If we agree with it, we
+    // can continue on.
+    
+    // Check to see if there are two different constants merging, if so, the PHI
+    // node is overdefined.
+    if (IV.getConstant() != OperandVal)
+      return markOverdefined(&PN);
+  }
+
+  // If we exited the loop, this means that the PHI node only has constant
+  // arguments that agree with each other(and OperandVal is the constant) or
+  // OperandVal is null because there are no defined incoming arguments.  If
+  // this is the case, the PHI remains undefined.
+  //
+  if (OperandVal)
+    markConstant(&PN, OperandVal);      // Acquire operand value
+}
+
+
+
+
+void SCCPSolver::visitReturnInst(ReturnInst &I) {
+  if (I.getNumOperands() == 0) return;  // ret void
+
+  Function *F = I.getParent()->getParent();
+  Value *ResultOp = I.getOperand(0);
+  
+  // If we are tracking the return value of this function, merge it in.
+  if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) {
+    DenseMap<Function*, LatticeVal>::iterator TFRVI =
+      TrackedRetVals.find(F);
+    if (TFRVI != TrackedRetVals.end()) {
+      mergeInValue(TFRVI->second, F, getValueState(ResultOp));
+      return;
+    }
+  }
+  
+  // Handle functions that return multiple values.
+  if (!TrackedMultipleRetVals.empty()) {
+    if (const StructType *STy = dyn_cast<StructType>(ResultOp->getType()))
+      if (MRVFunctionsTracked.count(F))
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+          mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F,
+                       getStructValueState(ResultOp, i));
+    
+  }
+}
+
+void SCCPSolver::visitTerminatorInst(TerminatorInst &TI) {
+  SmallVector<bool, 16> SuccFeasible;
+  getFeasibleSuccessors(TI, SuccFeasible);
+
+  BasicBlock *BB = TI.getParent();
+
+  // Mark all feasible successors executable.
+  for (unsigned i = 0, e = SuccFeasible.size(); i != e; ++i)
+    if (SuccFeasible[i])
+      markEdgeExecutable(BB, TI.getSuccessor(i));
+}
+
+void SCCPSolver::visitCastInst(CastInst &I) {
+  LatticeVal OpSt = getValueState(I.getOperand(0));
+  if (OpSt.isOverdefined())          // Inherit overdefinedness of operand
+    markOverdefined(&I);
+  else if (OpSt.isConstant())        // Propagate constant value
+    markConstant(&I, ConstantExpr::getCast(I.getOpcode(), 
+                                           OpSt.getConstant(), I.getType()));
+}
+
+
+void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
+  // If this returns a struct, mark all elements over defined, we don't track
+  // structs in structs.
+  if (EVI.getType()->isStructTy())
+    return markAnythingOverdefined(&EVI);
+    
+  // If this is extracting from more than one level of struct, we don't know.
+  if (EVI.getNumIndices() != 1)
+    return markOverdefined(&EVI);
+
+  Value *AggVal = EVI.getAggregateOperand();
+  if (AggVal->getType()->isStructTy()) {
+    unsigned i = *EVI.idx_begin();
+    LatticeVal EltVal = getStructValueState(AggVal, i);
+    mergeInValue(getValueState(&EVI), &EVI, EltVal);
+  } else {
+    // Otherwise, must be extracting from an array.
+    return markOverdefined(&EVI);
+  }
+}
+
+void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
+  const StructType *STy = dyn_cast<StructType>(IVI.getType());
+  if (STy == 0)
+    return markOverdefined(&IVI);
+  
+  // If this has more than one index, we can't handle it, drive all results to
+  // undef.
+  if (IVI.getNumIndices() != 1)
+    return markAnythingOverdefined(&IVI);
+  
+  Value *Aggr = IVI.getAggregateOperand();
+  unsigned Idx = *IVI.idx_begin();
+  
+  // Compute the result based on what we're inserting.
+  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+    // This passes through all values that aren't the inserted element.
+    if (i != Idx) {
+      LatticeVal EltVal = getStructValueState(Aggr, i);
+      mergeInValue(getStructValueState(&IVI, i), &IVI, EltVal);
+      continue;
+    }
+    
+    Value *Val = IVI.getInsertedValueOperand();
+    if (Val->getType()->isStructTy())
+      // We don't track structs in structs.
+      markOverdefined(getStructValueState(&IVI, i), &IVI);
+    else {
+      LatticeVal InVal = getValueState(Val);
+      mergeInValue(getStructValueState(&IVI, i), &IVI, InVal);
+    }
+  }
+}
+
+void SCCPSolver::visitSelectInst(SelectInst &I) {
+  // If this select returns a struct, just mark the result overdefined.
+  // TODO: We could do a lot better than this if code actually uses this.
+  if (I.getType()->isStructTy())
+    return markAnythingOverdefined(&I);
+  
+  LatticeVal CondValue = getValueState(I.getCondition());
+  if (CondValue.isUndefined())
+    return;
+  
+  if (ConstantInt *CondCB = CondValue.getConstantInt()) {
+    Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue();
+    mergeInValue(&I, getValueState(OpVal));
+    return;
+  }
+  
+  // Otherwise, the condition is overdefined or a constant we can't evaluate.
+  // See if we can produce something better than overdefined based on the T/F
+  // value.
+  LatticeVal TVal = getValueState(I.getTrueValue());
+  LatticeVal FVal = getValueState(I.getFalseValue());
+  
+  // select ?, C, C -> C.
+  if (TVal.isConstant() && FVal.isConstant() && 
+      TVal.getConstant() == FVal.getConstant())
+    return markConstant(&I, FVal.getConstant());
+
+  if (TVal.isUndefined())   // select ?, undef, X -> X.
+    return mergeInValue(&I, FVal);
+  if (FVal.isUndefined())   // select ?, X, undef -> X.
+    return mergeInValue(&I, TVal);
+  markOverdefined(&I);
+}
+
+// Handle Binary Operators.
+void SCCPSolver::visitBinaryOperator(Instruction &I) {
+  LatticeVal V1State = getValueState(I.getOperand(0));
+  LatticeVal V2State = getValueState(I.getOperand(1));
+  
+  LatticeVal &IV = ValueState[&I];
+  if (IV.isOverdefined()) return;
+
+  if (V1State.isConstant() && V2State.isConstant())
+    return markConstant(IV, &I,
+                        ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
+                                          V2State.getConstant()));
+  
+  // If something is undef, wait for it to resolve.
+  if (!V1State.isOverdefined() && !V2State.isOverdefined())
+    return;
+  
+  // Otherwise, one of our operands is overdefined.  Try to produce something
+  // better than overdefined with some tricks.
+  
+  // If this is an AND or OR with 0 or -1, it doesn't matter that the other
+  // operand is overdefined.
+  if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) {
+    LatticeVal *NonOverdefVal = 0;
+    if (!V1State.isOverdefined())
+      NonOverdefVal = &V1State;
+    else if (!V2State.isOverdefined())
+      NonOverdefVal = &V2State;
+
+    if (NonOverdefVal) {
+      if (NonOverdefVal->isUndefined()) {
+        // Could annihilate value.
+        if (I.getOpcode() == Instruction::And)
+          markConstant(IV, &I, Constant::getNullValue(I.getType()));
+        else if (const VectorType *PT = dyn_cast<VectorType>(I.getType()))
+          markConstant(IV, &I, Constant::getAllOnesValue(PT));
+        else
+          markConstant(IV, &I,
+                       Constant::getAllOnesValue(I.getType()));
+        return;
+      }
+      
+      if (I.getOpcode() == Instruction::And) {
+        // X and 0 = 0
+        if (NonOverdefVal->getConstant()->isNullValue())
+          return markConstant(IV, &I, NonOverdefVal->getConstant());
+      } else {
+        if (ConstantInt *CI = NonOverdefVal->getConstantInt())
+          if (CI->isAllOnesValue())     // X or -1 = -1
+            return markConstant(IV, &I, NonOverdefVal->getConstant());
+      }
+    }
+  }
+
+
+  // If both operands are PHI nodes, it is possible that this instruction has
+  // a constant value, despite the fact that the PHI node doesn't.  Check for
+  // this condition now.
+  if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
+    if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
+      if (PN1->getParent() == PN2->getParent()) {
+        // Since the two PHI nodes are in the same basic block, they must have
+        // entries for the same predecessors.  Walk the predecessor list, and
+        // if all of the incoming values are constants, and the result of
+        // evaluating this expression with all incoming value pairs is the
+        // same, then this expression is a constant even though the PHI node
+        // is not a constant!
+        LatticeVal Result;
+        for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
+          LatticeVal In1 = getValueState(PN1->getIncomingValue(i));
+          BasicBlock *InBlock = PN1->getIncomingBlock(i);
+          LatticeVal In2 =getValueState(PN2->getIncomingValueForBlock(InBlock));
+
+          if (In1.isOverdefined() || In2.isOverdefined()) {
+            Result.markOverdefined();
+            break;  // Cannot fold this operation over the PHI nodes!
+          }
+          
+          if (In1.isConstant() && In2.isConstant()) {
+            Constant *V = ConstantExpr::get(I.getOpcode(), In1.getConstant(),
+                                            In2.getConstant());
+            if (Result.isUndefined())
+              Result.markConstant(V);
+            else if (Result.isConstant() && Result.getConstant() != V) {
+              Result.markOverdefined();
+              break;
+            }
+          }
+        }
+
+        // If we found a constant value here, then we know the instruction is
+        // constant despite the fact that the PHI nodes are overdefined.
+        if (Result.isConstant()) {
+          markConstant(IV, &I, Result.getConstant());
+          // Remember that this instruction is virtually using the PHI node
+          // operands. 
+          InsertInOverdefinedPHIs(&I, PN1);
+          InsertInOverdefinedPHIs(&I, PN2);
+          return;
+        }
+        
+        if (Result.isUndefined())
+          return;
+
+        // Okay, this really is overdefined now.  Since we might have
+        // speculatively thought that this was not overdefined before, and
+        // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
+        // make sure to clean out any entries that we put there, for
+        // efficiency.
+        RemoveFromOverdefinedPHIs(&I, PN1);
+        RemoveFromOverdefinedPHIs(&I, PN2);
+      }
+
+  markOverdefined(&I);
+}
+
+// Handle ICmpInst instruction.
+void SCCPSolver::visitCmpInst(CmpInst &I) {
+  LatticeVal V1State = getValueState(I.getOperand(0));
+  LatticeVal V2State = getValueState(I.getOperand(1));
+
+  LatticeVal &IV = ValueState[&I];
+  if (IV.isOverdefined()) return;
+
+  if (V1State.isConstant() && V2State.isConstant())
+    return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(), 
+                                                         V1State.getConstant(), 
+                                                        V2State.getConstant()));
+  
+  // If operands are still undefined, wait for it to resolve.
+  if (!V1State.isOverdefined() && !V2State.isOverdefined())
+    return;
+  
+  // If something is overdefined, use some tricks to avoid ending up and over
+  // defined if we can.
+  
+  // If both operands are PHI nodes, it is possible that this instruction has
+  // a constant value, despite the fact that the PHI node doesn't.  Check for
+  // this condition now.
+  if (PHINode *PN1 = dyn_cast<PHINode>(I.getOperand(0)))
+    if (PHINode *PN2 = dyn_cast<PHINode>(I.getOperand(1)))
+      if (PN1->getParent() == PN2->getParent()) {
+        // Since the two PHI nodes are in the same basic block, they must have
+        // entries for the same predecessors.  Walk the predecessor list, and
+        // if all of the incoming values are constants, and the result of
+        // evaluating this expression with all incoming value pairs is the
+        // same, then this expression is a constant even though the PHI node
+        // is not a constant!
+        LatticeVal Result;
+        for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) {
+          LatticeVal In1 = getValueState(PN1->getIncomingValue(i));
+          BasicBlock *InBlock = PN1->getIncomingBlock(i);
+          LatticeVal In2 =getValueState(PN2->getIncomingValueForBlock(InBlock));
+
+          if (In1.isOverdefined() || In2.isOverdefined()) {
+            Result.markOverdefined();
+            break;  // Cannot fold this operation over the PHI nodes!
+          }
+          
+          if (In1.isConstant() && In2.isConstant()) {
+            Constant *V = ConstantExpr::getCompare(I.getPredicate(), 
+                                                   In1.getConstant(), 
+                                                   In2.getConstant());
+            if (Result.isUndefined())
+              Result.markConstant(V);
+            else if (Result.isConstant() && Result.getConstant() != V) {
+              Result.markOverdefined();
+              break;
+            }
+          }
+        }
+
+        // If we found a constant value here, then we know the instruction is
+        // constant despite the fact that the PHI nodes are overdefined.
+        if (Result.isConstant()) {
+          markConstant(&I, Result.getConstant());
+          // Remember that this instruction is virtually using the PHI node
+          // operands.
+          InsertInOverdefinedPHIs(&I, PN1);
+          InsertInOverdefinedPHIs(&I, PN2);
+          return;
+        }
+        
+        if (Result.isUndefined())
+          return;
+
+        // Okay, this really is overdefined now.  Since we might have
+        // speculatively thought that this was not overdefined before, and
+        // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs,
+        // make sure to clean out any entries that we put there, for
+        // efficiency.
+        RemoveFromOverdefinedPHIs(&I, PN1);
+        RemoveFromOverdefinedPHIs(&I, PN2);
+      }
+
+  markOverdefined(&I);
+}
+
+void SCCPSolver::visitExtractElementInst(ExtractElementInst &I) {
+  // TODO : SCCP does not handle vectors properly.
+  return markOverdefined(&I);
+
+#if 0
+  LatticeVal &ValState = getValueState(I.getOperand(0));
+  LatticeVal &IdxState = getValueState(I.getOperand(1));
+
+  if (ValState.isOverdefined() || IdxState.isOverdefined())
+    markOverdefined(&I);
+  else if(ValState.isConstant() && IdxState.isConstant())
+    markConstant(&I, ConstantExpr::getExtractElement(ValState.getConstant(),
+                                                     IdxState.getConstant()));
+#endif
+}
+
+void SCCPSolver::visitInsertElementInst(InsertElementInst &I) {
+  // TODO : SCCP does not handle vectors properly.
+  return markOverdefined(&I);
+#if 0
+  LatticeVal &ValState = getValueState(I.getOperand(0));
+  LatticeVal &EltState = getValueState(I.getOperand(1));
+  LatticeVal &IdxState = getValueState(I.getOperand(2));
+
+  if (ValState.isOverdefined() || EltState.isOverdefined() ||
+      IdxState.isOverdefined())
+    markOverdefined(&I);
+  else if(ValState.isConstant() && EltState.isConstant() &&
+          IdxState.isConstant())
+    markConstant(&I, ConstantExpr::getInsertElement(ValState.getConstant(),
+                                                    EltState.getConstant(),
+                                                    IdxState.getConstant()));
+  else if (ValState.isUndefined() && EltState.isConstant() &&
+           IdxState.isConstant()) 
+    markConstant(&I,ConstantExpr::getInsertElement(UndefValue::get(I.getType()),
+                                                   EltState.getConstant(),
+                                                   IdxState.getConstant()));
+#endif
+}
+
+void SCCPSolver::visitShuffleVectorInst(ShuffleVectorInst &I) {
+  // TODO : SCCP does not handle vectors properly.
+  return markOverdefined(&I);
+#if 0
+  LatticeVal &V1State   = getValueState(I.getOperand(0));
+  LatticeVal &V2State   = getValueState(I.getOperand(1));
+  LatticeVal &MaskState = getValueState(I.getOperand(2));
+
+  if (MaskState.isUndefined() ||
+      (V1State.isUndefined() && V2State.isUndefined()))
+    return;  // Undefined output if mask or both inputs undefined.
+  
+  if (V1State.isOverdefined() || V2State.isOverdefined() ||
+      MaskState.isOverdefined()) {
+    markOverdefined(&I);
+  } else {
+    // A mix of constant/undef inputs.
+    Constant *V1 = V1State.isConstant() ? 
+        V1State.getConstant() : UndefValue::get(I.getType());
+    Constant *V2 = V2State.isConstant() ? 
+        V2State.getConstant() : UndefValue::get(I.getType());
+    Constant *Mask = MaskState.isConstant() ? 
+      MaskState.getConstant() : UndefValue::get(I.getOperand(2)->getType());
+    markConstant(&I, ConstantExpr::getShuffleVector(V1, V2, Mask));
+  }
+#endif
+}
+
+// Handle getelementptr instructions.  If all operands are constants then we
+// can turn this into a getelementptr ConstantExpr.
+//
+void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
+  if (ValueState[&I].isOverdefined()) return;
+
+  SmallVector<Constant*, 8> Operands;
+  Operands.reserve(I.getNumOperands());
+
+  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+    LatticeVal State = getValueState(I.getOperand(i));
+    if (State.isUndefined())
+      return;  // Operands are not resolved yet.
+    
+    if (State.isOverdefined())
+      return markOverdefined(&I);
+
+    assert(State.isConstant() && "Unknown state!");
+    Operands.push_back(State.getConstant());
+  }
+
+  Constant *Ptr = Operands[0];
+  markConstant(&I, ConstantExpr::getGetElementPtr(Ptr, &Operands[0]+1,
+                                                  Operands.size()-1));
+}
+
+void SCCPSolver::visitStoreInst(StoreInst &SI) {
+  // If this store is of a struct, ignore it.
+  if (SI.getOperand(0)->getType()->isStructTy())
+    return;
+  
+  if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1)))
+    return;
+  
+  GlobalVariable *GV = cast<GlobalVariable>(SI.getOperand(1));
+  DenseMap<GlobalVariable*, LatticeVal>::iterator I = TrackedGlobals.find(GV);
+  if (I == TrackedGlobals.end() || I->second.isOverdefined()) return;
+
+  // Get the value we are storing into the global, then merge it.
+  mergeInValue(I->second, GV, getValueState(SI.getOperand(0)));
+  if (I->second.isOverdefined())
+    TrackedGlobals.erase(I);      // No need to keep tracking this!
+}
+
+
+// Handle load instructions.  If the operand is a constant pointer to a constant
+// global, we can replace the load with the loaded constant value!
+void SCCPSolver::visitLoadInst(LoadInst &I) {
+  // If this load is of a struct, just mark the result overdefined.
+  if (I.getType()->isStructTy())
+    return markAnythingOverdefined(&I);
+  
+  LatticeVal PtrVal = getValueState(I.getOperand(0));
+  if (PtrVal.isUndefined()) return;   // The pointer is not resolved yet!
+  
+  LatticeVal &IV = ValueState[&I];
+  if (IV.isOverdefined()) return;
+
+  if (!PtrVal.isConstant() || I.isVolatile())
+    return markOverdefined(IV, &I);
+    
+  Constant *Ptr = PtrVal.getConstant();
+
+  // load null -> null
+  if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0)
+    return markConstant(IV, &I, Constant::getNullValue(I.getType()));
+  
+  // Transform load (constant global) into the value loaded.
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+    if (!TrackedGlobals.empty()) {
+      // If we are tracking this global, merge in the known value for it.
+      DenseMap<GlobalVariable*, LatticeVal>::iterator It =
+        TrackedGlobals.find(GV);
+      if (It != TrackedGlobals.end()) {
+        mergeInValue(IV, &I, It->second);
+        return;
+      }
+    }
+  }
+
+  // Transform load from a constant into a constant if possible.
+  if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, TD))
+    return markConstant(IV, &I, C);
+
+  // Otherwise we cannot say for certain what value this load will produce.
+  // Bail out.
+  markOverdefined(IV, &I);
+}
+
+void SCCPSolver::visitCallSite(CallSite CS) {
+  Function *F = CS.getCalledFunction();
+  Instruction *I = CS.getInstruction();
+  
+  // The common case is that we aren't tracking the callee, either because we
+  // are not doing interprocedural analysis or the callee is indirect, or is
+  // external.  Handle these cases first.
+  if (F == 0 || F->isDeclaration()) {
+CallOverdefined:
+    // Void return and not tracking callee, just bail.
+    if (I->getType()->isVoidTy()) return;
+    
+    // Otherwise, if we have a single return value case, and if the function is
+    // a declaration, maybe we can constant fold it.
+    if (F && F->isDeclaration() && !I->getType()->isStructTy() &&
+        canConstantFoldCallTo(F)) {
+      
+      SmallVector<Constant*, 8> Operands;
+      for (CallSite::arg_iterator AI = CS.arg_begin(), E = CS.arg_end();
+           AI != E; ++AI) {
+        LatticeVal State = getValueState(*AI);
+        
+        if (State.isUndefined())
+          return;  // Operands are not resolved yet.
+        if (State.isOverdefined())
+          return markOverdefined(I);
+        assert(State.isConstant() && "Unknown state!");
+        Operands.push_back(State.getConstant());
+      }
+     
+      // If we can constant fold this, mark the result of the call as a
+      // constant.
+      if (Constant *C = ConstantFoldCall(F, Operands.data(), Operands.size()))
+        return markConstant(I, C);
+    }
+
+    // Otherwise, we don't know anything about this call, mark it overdefined.
+    return markAnythingOverdefined(I);
+  }
+
+  // If this is a local function that doesn't have its address taken, mark its
+  // entry block executable and merge in the actual arguments to the call into
+  // the formal arguments of the function.
+  if (!TrackingIncomingArguments.empty() && TrackingIncomingArguments.count(F)){
+    MarkBlockExecutable(F->begin());
+    
+    // Propagate information from this call site into the callee.
+    CallSite::arg_iterator CAI = CS.arg_begin();
+    for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+         AI != E; ++AI, ++CAI) {
+      // If this argument is byval, and if the function is not readonly, there
+      // will be an implicit copy formed of the input aggregate.
+      if (AI->hasByValAttr() && !F->onlyReadsMemory()) {
+        markOverdefined(AI);
+        continue;
+      }
+      
+      if (const StructType *STy = dyn_cast<StructType>(AI->getType())) {
+        for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+          LatticeVal CallArg = getStructValueState(*CAI, i);
+          mergeInValue(getStructValueState(AI, i), AI, CallArg);
+        }
+      } else {
+        mergeInValue(AI, getValueState(*CAI));
+      }
+    }
+  }
+  
+  // If this is a single/zero retval case, see if we're tracking the function.
+  if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+    if (!MRVFunctionsTracked.count(F))
+      goto CallOverdefined;  // Not tracking this callee.
+    
+    // If we are tracking this callee, propagate the result of the function
+    // into this call site.
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+      mergeInValue(getStructValueState(I, i), I, 
+                   TrackedMultipleRetVals[std::make_pair(F, i)]);
+  } else {
+    DenseMap<Function*, LatticeVal>::iterator TFRVI = TrackedRetVals.find(F);
+    if (TFRVI == TrackedRetVals.end())
+      goto CallOverdefined;  // Not tracking this callee.
+      
+    // If so, propagate the return value of the callee into this call result.
+    mergeInValue(I, TFRVI->second);
+  }
+}
+
+void SCCPSolver::Solve() {
+  // Process the work lists until they are empty!
+  while (!BBWorkList.empty() || !InstWorkList.empty() ||
+         !OverdefinedInstWorkList.empty()) {
+    // Process the overdefined instruction's work list first, which drives other
+    // things to overdefined more quickly.
+    while (!OverdefinedInstWorkList.empty()) {
+      Value *I = OverdefinedInstWorkList.pop_back_val();
+
+      DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n');
+
+      // "I" got into the work list because it either made the transition from
+      // bottom to constant
+      //
+      // Anything on this worklist that is overdefined need not be visited
+      // since all of its users will have already been marked as overdefined
+      // Update all of the users of this instruction's value.
+      //
+      for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+           UI != E; ++UI)
+        if (Instruction *I = dyn_cast<Instruction>(*UI))
+          OperandChangedState(I);
+    }
+    
+    // Process the instruction work list.
+    while (!InstWorkList.empty()) {
+      Value *I = InstWorkList.pop_back_val();
+
+      DEBUG(dbgs() << "\nPopped off I-WL: " << *I << '\n');
+
+      // "I" got into the work list because it made the transition from undef to
+      // constant.
+      //
+      // Anything on this worklist that is overdefined need not be visited
+      // since all of its users will have already been marked as overdefined.
+      // Update all of the users of this instruction's value.
+      //
+      if (I->getType()->isStructTy() || !getValueState(I).isOverdefined())
+        for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+             UI != E; ++UI)
+          if (Instruction *I = dyn_cast<Instruction>(*UI))
+            OperandChangedState(I);
+    }
+
+    // Process the basic block work list.
+    while (!BBWorkList.empty()) {
+      BasicBlock *BB = BBWorkList.back();
+      BBWorkList.pop_back();
+
+      DEBUG(dbgs() << "\nPopped off BBWL: " << *BB << '\n');
+
+      // Notify all instructions in this basic block that they are newly
+      // executable.
+      visit(BB);
+    }
+  }
+}
+
+/// ResolvedUndefsIn - While solving the dataflow for a function, we assume
+/// that branches on undef values cannot reach any of their successors.
+/// However, this is not a safe assumption.  After we solve dataflow, this
+/// method should be use to handle this.  If this returns true, the solver
+/// should be rerun.
+///
+/// This method handles this by finding an unresolved branch and marking it one
+/// of the edges from the block as being feasible, even though the condition
+/// doesn't say it would otherwise be.  This allows SCCP to find the rest of the
+/// CFG and only slightly pessimizes the analysis results (by marking one,
+/// potentially infeasible, edge feasible).  This cannot usefully modify the
+/// constraints on the condition of the branch, as that would impact other users
+/// of the value.
+///
+/// This scan also checks for values that use undefs, whose results are actually
+/// defined.  For example, 'zext i8 undef to i32' should produce all zeros
+/// conservatively, as "(zext i8 X -> i32) & 0xFF00" must always return zero,
+/// even if X isn't defined.
+bool SCCPSolver::ResolvedUndefsIn(Function &F) {
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (!BBExecutable.count(BB))
+      continue;
+    
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      // Look for instructions which produce undef values.
+      if (I->getType()->isVoidTy()) continue;
+      
+      if (const StructType *STy = dyn_cast<StructType>(I->getType())) {
+        // Only a few things that can be structs matter for undef.  Just send
+        // all their results to overdefined.  We could be more precise than this
+        // but it isn't worth bothering.
+        if (isa<CallInst>(I) || isa<SelectInst>(I)) {
+          for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+            LatticeVal &LV = getStructValueState(I, i);
+            if (LV.isUndefined())
+              markOverdefined(LV, I);
+          }
+        }
+        continue;
+      }
+      
+      LatticeVal &LV = getValueState(I);
+      if (!LV.isUndefined()) continue;
+
+      // No instructions using structs need disambiguation.
+      if (I->getOperand(0)->getType()->isStructTy())
+        continue;
+
+      // Get the lattice values of the first two operands for use below.
+      LatticeVal Op0LV = getValueState(I->getOperand(0));
+      LatticeVal Op1LV;
+      if (I->getNumOperands() == 2) {
+        // No instructions using structs need disambiguation.
+        if (I->getOperand(1)->getType()->isStructTy())
+          continue;
+        
+        // If this is a two-operand instruction, and if both operands are
+        // undefs, the result stays undef.
+        Op1LV = getValueState(I->getOperand(1));
+        if (Op0LV.isUndefined() && Op1LV.isUndefined())
+          continue;
+      }
+      
+      // If this is an instructions whose result is defined even if the input is
+      // not fully defined, propagate the information.
+      const Type *ITy = I->getType();
+      switch (I->getOpcode()) {
+      default: break;          // Leave the instruction as an undef.
+      case Instruction::ZExt:
+        // After a zero extend, we know the top part is zero.  SExt doesn't have
+        // to be handled here, because we don't know whether the top part is 1's
+        // or 0's.
+      case Instruction::SIToFP:  // some FP values are not possible, just use 0.
+      case Instruction::UIToFP:  // some FP values are not possible, just use 0.
+        markForcedConstant(I, Constant::getNullValue(ITy));
+        return true;
+      case Instruction::Mul:
+      case Instruction::And:
+        // undef * X -> 0.   X could be zero.
+        // undef & X -> 0.   X could be zero.
+        markForcedConstant(I, Constant::getNullValue(ITy));
+        return true;
+
+      case Instruction::Or:
+        // undef | X -> -1.   X could be -1.
+        markForcedConstant(I, Constant::getAllOnesValue(ITy));
+        return true;
+
+      case Instruction::SDiv:
+      case Instruction::UDiv:
+      case Instruction::SRem:
+      case Instruction::URem:
+        // X / undef -> undef.  No change.
+        // X % undef -> undef.  No change.
+        if (Op1LV.isUndefined()) break;
+        
+        // undef / X -> 0.   X could be maxint.
+        // undef % X -> 0.   X could be 1.
+        markForcedConstant(I, Constant::getNullValue(ITy));
+        return true;
+        
+      case Instruction::AShr:
+        // undef >>s X -> undef.  No change.
+        if (Op0LV.isUndefined()) break;
+        
+        // X >>s undef -> X.  X could be 0, X could have the high-bit known set.
+        if (Op0LV.isConstant())
+          markForcedConstant(I, Op0LV.getConstant());
+        else
+          markOverdefined(I);
+        return true;
+      case Instruction::LShr:
+      case Instruction::Shl:
+        // undef >> X -> undef.  No change.
+        // undef << X -> undef.  No change.
+        if (Op0LV.isUndefined()) break;
+        
+        // X >> undef -> 0.  X could be 0.
+        // X << undef -> 0.  X could be 0.
+        markForcedConstant(I, Constant::getNullValue(ITy));
+        return true;
+      case Instruction::Select:
+        // undef ? X : Y  -> X or Y.  There could be commonality between X/Y.
+        if (Op0LV.isUndefined()) {
+          if (!Op1LV.isConstant())  // Pick the constant one if there is any.
+            Op1LV = getValueState(I->getOperand(2));
+        } else if (Op1LV.isUndefined()) {
+          // c ? undef : undef -> undef.  No change.
+          Op1LV = getValueState(I->getOperand(2));
+          if (Op1LV.isUndefined())
+            break;
+          // Otherwise, c ? undef : x -> x.
+        } else {
+          // Leave Op1LV as Operand(1)'s LatticeValue.
+        }
+        
+        if (Op1LV.isConstant())
+          markForcedConstant(I, Op1LV.getConstant());
+        else
+          markOverdefined(I);
+        return true;
+      case Instruction::Call:
+        // If a call has an undef result, it is because it is constant foldable
+        // but one of the inputs was undef.  Just force the result to
+        // overdefined.
+        markOverdefined(I);
+        return true;
+      }
+    }
+  
+    // Check to see if we have a branch or switch on an undefined value.  If so
+    // we force the branch to go one way or the other to make the successor
+    // values live.  It doesn't really matter which way we force it.
+    TerminatorInst *TI = BB->getTerminator();
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      if (!BI->isConditional()) continue;
+      if (!getValueState(BI->getCondition()).isUndefined())
+        continue;
+    
+      // If the input to SCCP is actually branch on undef, fix the undef to
+      // false.
+      if (isa<UndefValue>(BI->getCondition())) {
+        BI->setCondition(ConstantInt::getFalse(BI->getContext()));
+        markEdgeExecutable(BB, TI->getSuccessor(1));
+        return true;
+      }
+      
+      // Otherwise, it is a branch on a symbolic value which is currently
+      // considered to be undef.  Handle this by forcing the input value to the
+      // branch to false.
+      markForcedConstant(BI->getCondition(),
+                         ConstantInt::getFalse(TI->getContext()));
+      return true;
+    }
+    
+    if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+      if (SI->getNumSuccessors() < 2)   // no cases
+        continue;
+      if (!getValueState(SI->getCondition()).isUndefined())
+        continue;
+      
+      // If the input to SCCP is actually switch on undef, fix the undef to
+      // the first constant.
+      if (isa<UndefValue>(SI->getCondition())) {
+        SI->setCondition(SI->getCaseValue(1));
+        markEdgeExecutable(BB, TI->getSuccessor(1));
+        return true;
+      }
+      
+      markForcedConstant(SI->getCondition(), SI->getCaseValue(1));
+      return true;
+    }
+  }
+
+  return false;
+}
+
+
+namespace {
+  //===--------------------------------------------------------------------===//
+  //
+  /// SCCP Class - This class uses the SCCPSolver to implement a per-function
+  /// Sparse Conditional Constant Propagator.
+  ///
+  struct SCCP : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    SCCP() : FunctionPass(ID) {
+      initializeSCCPPass(*PassRegistry::getPassRegistry());
+    }
+
+    // runOnFunction - Run the Sparse Conditional Constant Propagation
+    // algorithm, and return true if the function was modified.
+    //
+    bool runOnFunction(Function &F);
+  };
+} // end anonymous namespace
+
+char SCCP::ID = 0;
+INITIALIZE_PASS(SCCP, "sccp",
+                "Sparse Conditional Constant Propagation", false, false)
+
+// createSCCPPass - This is the public interface to this file.
+FunctionPass *llvm::createSCCPPass() {
+  return new SCCP();
+}
+
+static void DeleteInstructionInBlock(BasicBlock *BB) {
+  DEBUG(dbgs() << "  BasicBlock Dead:" << *BB);
+  ++NumDeadBlocks;
+  
+  // Delete the instructions backwards, as it has a reduced likelihood of
+  // having to update as many def-use and use-def chains.
+  while (!isa<TerminatorInst>(BB->begin())) {
+    Instruction *I = --BasicBlock::iterator(BB->getTerminator());
+    
+    if (!I->use_empty())
+      I->replaceAllUsesWith(UndefValue::get(I->getType()));
+    BB->getInstList().erase(I);
+    ++NumInstRemoved;
+  }
+}
+
+// runOnFunction() - Run the Sparse Conditional Constant Propagation algorithm,
+// and return true if the function was modified.
+//
+bool SCCP::runOnFunction(Function &F) {
+  DEBUG(dbgs() << "SCCP on function '" << F.getName() << "'\n");
+  SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+
+  // Mark the first block of the function as being executable.
+  Solver.MarkBlockExecutable(F.begin());
+
+  // Mark all arguments to the function as being overdefined.
+  for (Function::arg_iterator AI = F.arg_begin(), E = F.arg_end(); AI != E;++AI)
+    Solver.markAnythingOverdefined(AI);
+
+  // Solve for constants.
+  bool ResolvedUndefs = true;
+  while (ResolvedUndefs) {
+    Solver.Solve();
+    DEBUG(dbgs() << "RESOLVING UNDEFs\n");
+    ResolvedUndefs = Solver.ResolvedUndefsIn(F);
+  }
+
+  bool MadeChanges = false;
+
+  // If we decided that there are basic blocks that are dead in this function,
+  // delete their contents now.  Note that we cannot actually delete the blocks,
+  // as we cannot modify the CFG of the function.
+
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (!Solver.isBlockExecutable(BB)) {
+      DeleteInstructionInBlock(BB);
+      MadeChanges = true;
+      continue;
+    }
+  
+    // Iterate over all of the instructions in a function, replacing them with
+    // constants if we have found them to be of constant values.
+    //
+    for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+      Instruction *Inst = BI++;
+      if (Inst->getType()->isVoidTy() || isa<TerminatorInst>(Inst))
+        continue;
+      
+      // TODO: Reconstruct structs from their elements.
+      if (Inst->getType()->isStructTy())
+        continue;
+      
+      LatticeVal IV = Solver.getLatticeValueFor(Inst);
+      if (IV.isOverdefined())
+        continue;
+      
+      Constant *Const = IV.isConstant()
+        ? IV.getConstant() : UndefValue::get(Inst->getType());
+      DEBUG(dbgs() << "  Constant: " << *Const << " = " << *Inst);
+
+      // Replaces all of the uses of a variable with uses of the constant.
+      Inst->replaceAllUsesWith(Const);
+      
+      // Delete the instruction.
+      Inst->eraseFromParent();
+      
+      // Hey, we just changed something!
+      MadeChanges = true;
+      ++NumInstRemoved;
+    }
+  }
+
+  return MadeChanges;
+}
+
+namespace {
+  //===--------------------------------------------------------------------===//
+  //
+  /// IPSCCP Class - This class implements interprocedural Sparse Conditional
+  /// Constant Propagation.
+  ///
+  struct IPSCCP : public ModulePass {
+    static char ID;
+    IPSCCP() : ModulePass(ID) {
+      initializeIPSCCPPass(*PassRegistry::getPassRegistry());
+    }
+    bool runOnModule(Module &M);
+  };
+} // end anonymous namespace
+
+char IPSCCP::ID = 0;
+INITIALIZE_PASS(IPSCCP, "ipsccp",
+                "Interprocedural Sparse Conditional Constant Propagation",
+                false, false)
+
+// createIPSCCPPass - This is the public interface to this file.
+ModulePass *llvm::createIPSCCPPass() {
+  return new IPSCCP();
+}
+
+
+static bool AddressIsTaken(const GlobalValue *GV) {
+  // Delete any dead constantexpr klingons.
+  GV->removeDeadConstantUsers();
+
+  for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+       UI != E; ++UI) {
+    const User *U = *UI;
+    if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (SI->getOperand(0) == GV || SI->isVolatile())
+        return true;  // Storing addr of GV.
+    } else if (isa<InvokeInst>(U) || isa<CallInst>(U)) {
+      // Make sure we are calling the function, not passing the address.
+      ImmutableCallSite CS(cast<Instruction>(U));
+      if (!CS.isCallee(UI))
+        return true;
+    } else if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      if (LI->isVolatile())
+        return true;
+    } else if (isa<BlockAddress>(U)) {
+      // blockaddress doesn't take the address of the function, it takes addr
+      // of label.
+    } else {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool IPSCCP::runOnModule(Module &M) {
+  SCCPSolver Solver(getAnalysisIfAvailable<TargetData>());
+
+  // AddressTakenFunctions - This set keeps track of the address-taken functions
+  // that are in the input.  As IPSCCP runs through and simplifies code,
+  // functions that were address taken can end up losing their
+  // address-taken-ness.  Because of this, we keep track of their addresses from
+  // the first pass so we can use them for the later simplification pass.
+  SmallPtrSet<Function*, 32> AddressTakenFunctions;
+  
+  // Loop over all functions, marking arguments to those with their addresses
+  // taken or that are external as overdefined.
+  //
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (F->isDeclaration())
+      continue;
+    
+    // If this is a strong or ODR definition of this function, then we can
+    // propagate information about its result into callsites of it.
+    if (!F->mayBeOverridden())
+      Solver.AddTrackedFunction(F);
+    
+    // If this function only has direct calls that we can see, we can track its
+    // arguments and return value aggressively, and can assume it is not called
+    // unless we see evidence to the contrary.
+    if (F->hasLocalLinkage()) {
+      if (AddressIsTaken(F))
+        AddressTakenFunctions.insert(F);
+      else {
+        Solver.AddArgumentTrackedFunction(F);
+        continue;
+      }
+    }
+
+    // Assume the function is called.
+    Solver.MarkBlockExecutable(F->begin());
+    
+    // Assume nothing about the incoming arguments.
+    for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+         AI != E; ++AI)
+      Solver.markAnythingOverdefined(AI);
+  }
+
+  // Loop over global variables.  We inform the solver about any internal global
+  // variables that do not have their 'addresses taken'.  If they don't have
+  // their addresses taken, we can propagate constants through them.
+  for (Module::global_iterator G = M.global_begin(), E = M.global_end();
+       G != E; ++G)
+    if (!G->isConstant() && G->hasLocalLinkage() && !AddressIsTaken(G))
+      Solver.TrackValueOfGlobalVariable(G);
+
+  // Solve for constants.
+  bool ResolvedUndefs = true;
+  while (ResolvedUndefs) {
+    Solver.Solve();
+
+    DEBUG(dbgs() << "RESOLVING UNDEFS\n");
+    ResolvedUndefs = false;
+    for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F)
+      ResolvedUndefs |= Solver.ResolvedUndefsIn(*F);
+  }
+
+  bool MadeChanges = false;
+
+  // Iterate over all of the instructions in the module, replacing them with
+  // constants if we have found them to be of constant values.
+  //
+  SmallVector<BasicBlock*, 512> BlocksToErase;
+
+  for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
+    if (Solver.isBlockExecutable(F->begin())) {
+      for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+           AI != E; ++AI) {
+        if (AI->use_empty() || AI->getType()->isStructTy()) continue;
+        
+        // TODO: Could use getStructLatticeValueFor to find out if the entire
+        // result is a constant and replace it entirely if so.
+
+        LatticeVal IV = Solver.getLatticeValueFor(AI);
+        if (IV.isOverdefined()) continue;
+        
+        Constant *CST = IV.isConstant() ?
+        IV.getConstant() : UndefValue::get(AI->getType());
+        DEBUG(dbgs() << "***  Arg " << *AI << " = " << *CST <<"\n");
+        
+        // Replaces all of the uses of a variable with uses of the
+        // constant.
+        AI->replaceAllUsesWith(CST);
+        ++IPNumArgsElimed;
+      }
+    }
+
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
+      if (!Solver.isBlockExecutable(BB)) {
+        DeleteInstructionInBlock(BB);
+        MadeChanges = true;
+
+        TerminatorInst *TI = BB->getTerminator();
+        for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+          BasicBlock *Succ = TI->getSuccessor(i);
+          if (!Succ->empty() && isa<PHINode>(Succ->begin()))
+            TI->getSuccessor(i)->removePredecessor(BB);
+        }
+        if (!TI->use_empty())
+          TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+        TI->eraseFromParent();
+
+        if (&*BB != &F->front())
+          BlocksToErase.push_back(BB);
+        else
+          new UnreachableInst(M.getContext(), BB);
+        continue;
+      }
+      
+      for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+        Instruction *Inst = BI++;
+        if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy())
+          continue;
+        
+        // TODO: Could use getStructLatticeValueFor to find out if the entire
+        // result is a constant and replace it entirely if so.
+        
+        LatticeVal IV = Solver.getLatticeValueFor(Inst);
+        if (IV.isOverdefined())
+          continue;
+        
+        Constant *Const = IV.isConstant()
+          ? IV.getConstant() : UndefValue::get(Inst->getType());
+        DEBUG(dbgs() << "  Constant: " << *Const << " = " << *Inst);
+
+        // Replaces all of the uses of a variable with uses of the
+        // constant.
+        Inst->replaceAllUsesWith(Const);
+        
+        // Delete the instruction.
+        if (!isa<CallInst>(Inst) && !isa<TerminatorInst>(Inst))
+          Inst->eraseFromParent();
+
+        // Hey, we just changed something!
+        MadeChanges = true;
+        ++IPNumInstRemoved;
+      }
+    }
+
+    // Now that all instructions in the function are constant folded, erase dead
+    // blocks, because we can now use ConstantFoldTerminator to get rid of
+    // in-edges.
+    for (unsigned i = 0, e = BlocksToErase.size(); i != e; ++i) {
+      // If there are any PHI nodes in this successor, drop entries for BB now.
+      BasicBlock *DeadBB = BlocksToErase[i];
+      for (Value::use_iterator UI = DeadBB->use_begin(), UE = DeadBB->use_end();
+           UI != UE; ) {
+        // Grab the user and then increment the iterator early, as the user
+        // will be deleted. Step past all adjacent uses from the same user.
+        Instruction *I = dyn_cast<Instruction>(*UI);
+        do { ++UI; } while (UI != UE && *UI == I);
+
+        // Ignore blockaddress users; BasicBlock's dtor will handle them.
+        if (!I) continue;
+
+        bool Folded = ConstantFoldTerminator(I->getParent());
+        if (!Folded) {
+          // The constant folder may not have been able to fold the terminator
+          // if this is a branch or switch on undef.  Fold it manually as a
+          // branch to the first successor.
+#ifndef NDEBUG
+          if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+            assert(BI->isConditional() && isa<UndefValue>(BI->getCondition()) &&
+                   "Branch should be foldable!");
+          } else if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+            assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold");
+          } else {
+            llvm_unreachable("Didn't fold away reference to block!");
+          }
+#endif
+          
+          // Make this an uncond branch to the first successor.
+          TerminatorInst *TI = I->getParent()->getTerminator();
+          BranchInst::Create(TI->getSuccessor(0), TI);
+          
+          // Remove entries in successor phi nodes to remove edges.
+          for (unsigned i = 1, e = TI->getNumSuccessors(); i != e; ++i)
+            TI->getSuccessor(i)->removePredecessor(TI->getParent());
+          
+          // Remove the old terminator.
+          TI->eraseFromParent();
+        }
+      }
+
+      // Finally, delete the basic block.
+      F->getBasicBlockList().erase(DeadBB);
+    }
+    BlocksToErase.clear();
+  }
+
+  // If we inferred constant or undef return values for a function, we replaced
+  // all call uses with the inferred value.  This means we don't need to bother
+  // actually returning anything from the function.  Replace all return
+  // instructions with return undef.
+  //
+  // Do this in two stages: first identify the functions we should process, then
+  // actually zap their returns.  This is important because we can only do this
+  // if the address of the function isn't taken.  In cases where a return is the
+  // last use of a function, the order of processing functions would affect
+  // whether other functions are optimizable.
+  SmallVector<ReturnInst*, 8> ReturnsToZap;
+  
+  // TODO: Process multiple value ret instructions also.
+  const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
+  for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(),
+       E = RV.end(); I != E; ++I) {
+    Function *F = I->first;
+    if (I->second.isOverdefined() || F->getReturnType()->isVoidTy())
+      continue;
+  
+    // We can only do this if we know that nothing else can call the function.
+    if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F))
+      continue;
+    
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+        if (!isa<UndefValue>(RI->getOperand(0)))
+          ReturnsToZap.push_back(RI);
+  }
+
+  // Zap all returns which we've identified as zap to change.
+  for (unsigned i = 0, e = ReturnsToZap.size(); i != e; ++i) {
+    Function *F = ReturnsToZap[i]->getParent()->getParent();
+    ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
+  }
+    
+  // If we infered constant or undef values for globals variables, we can delete
+  // the global and any stores that remain to it.
+  const DenseMap<GlobalVariable*, LatticeVal> &TG = Solver.getTrackedGlobals();
+  for (DenseMap<GlobalVariable*, LatticeVal>::const_iterator I = TG.begin(),
+         E = TG.end(); I != E; ++I) {
+    GlobalVariable *GV = I->first;
+    assert(!I->second.isOverdefined() &&
+           "Overdefined values should have been taken out of the map!");
+    DEBUG(dbgs() << "Found that GV '" << GV->getName() << "' is constant!\n");
+    while (!GV->use_empty()) {
+      StoreInst *SI = cast<StoreInst>(GV->use_back());
+      SI->eraseFromParent();
+    }
+    M.getGlobalList().erase(GV);
+    ++IPNumGlobalConst;
+  }
+
+  return MadeChanges;
+}
diff --git a/final/lib/Transforms/Scalar/Scalar.cpp b/final/lib/Transforms/Scalar/Scalar.cpp
new file mode 100644
index 00000000000..678ef0d01b5
--- /dev/null
+++ b/final/lib/Transforms/Scalar/Scalar.cpp
@@ -0,0 +1,159 @@
+//===-- Scalar.cpp --------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements common infrastructure for libLLVMScalarOpts.a, which 
+// implements several scalar transformations over the LLVM intermediate
+// representation, including the C bindings for that library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Transforms/Scalar.h"
+#include "llvm-c/Initialization.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+/// initializeScalarOptsPasses - Initialize all passes linked into the 
+/// ScalarOpts library.
+void llvm::initializeScalarOpts(PassRegistry &Registry) {
+  initializeADCEPass(Registry);
+  initializeBlockPlacementPass(Registry);
+  initializeCodeGenPreparePass(Registry);
+  initializeConstantPropagationPass(Registry);
+  initializeCorrelatedValuePropagationPass(Registry);
+  initializeDCEPass(Registry);
+  initializeDeadInstEliminationPass(Registry);
+  initializeDSEPass(Registry);
+  initializeGVNPass(Registry);
+  initializeEarlyCSEPass(Registry);
+  initializeIndVarSimplifyPass(Registry);
+  initializeJumpThreadingPass(Registry);
+  initializeLICMPass(Registry);
+  initializeLoopDeletionPass(Registry);
+  initializeLoopInstSimplifyPass(Registry);
+  initializeLoopRotatePass(Registry);
+  initializeLoopStrengthReducePass(Registry);
+  initializeLoopUnrollPass(Registry);
+  initializeLoopUnswitchPass(Registry);
+  initializeLoopIdiomRecognizePass(Registry);
+  initializeLowerAtomicPass(Registry);
+  initializeMemCpyOptPass(Registry);
+  initializeReassociatePass(Registry);
+  initializeRegToMemPass(Registry);
+  initializeSCCPPass(Registry);
+  initializeIPSCCPPass(Registry);
+  initializeSROA_DTPass(Registry);
+  initializeSROA_SSAUpPass(Registry);
+  initializeCFGSimplifyPassPass(Registry);
+  initializeSimplifyLibCallsPass(Registry);
+  initializeSinkingPass(Registry);
+  initializeTailDupPass(Registry);
+  initializeTailCallElimPass(Registry);
+}
+
+void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
+  initializeScalarOpts(*unwrap(R));
+}
+
+void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createAggressiveDCEPass());
+}
+
+void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createCFGSimplificationPass());
+}
+
+void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createDeadStoreEliminationPass());
+}
+
+void LLVMAddGVNPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createGVNPass());
+}
+
+void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createIndVarSimplifyPass());
+}
+
+void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createInstructionCombiningPass());
+}
+
+void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createJumpThreadingPass());
+}
+
+void LLVMAddLICMPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLICMPass());
+}
+
+void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLoopDeletionPass());
+}
+
+void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLoopRotatePass());
+}
+
+void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLoopUnrollPass());
+}
+
+void LLVMAddLoopUnswitchPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLoopUnswitchPass());
+}
+
+void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createMemCpyOptPass());
+}
+
+void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createPromoteMemoryToRegisterPass());
+}
+
+void LLVMAddReassociatePass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createReassociatePass());
+}
+
+void LLVMAddSCCPPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createSCCPPass());
+}
+
+void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createScalarReplAggregatesPass());
+}
+
+void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
+                                                  int Threshold) {
+  unwrap(PM)->add(createScalarReplAggregatesPass(Threshold));
+}
+
+void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createSimplifyLibCallsPass());
+}
+
+void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createTailCallEliminationPass());
+}
+
+void LLVMAddConstantPropagationPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createConstantPropagationPass());
+}
+
+void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createDemoteRegisterToMemoryPass());
+}
+
+void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createVerifierPass());
+}
diff --git a/final/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/final/lib/Transforms/Scalar/ScalarReplAggregates.cpp
new file mode 100644
index 00000000000..c3ca85280ee
--- /dev/null
+++ b/final/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -0,0 +1,2336 @@
+//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation implements the well known scalar replacement of
+// aggregates transformation.  This xform breaks up alloca instructions of
+// aggregate type (structure or array) into individual alloca instructions for
+// each member (if possible).  Then, if possible, it transforms the individual
+// alloca instructions into nice clean scalar SSA form.
+//
+// This combines a simple SRoA algorithm with the Mem2Reg algorithm because
+// often interact, especially for C++ programs.  As such, iterating between
+// SRoA, then Mem2Reg until we run out of things to promote works well.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "scalarrepl"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumReplaced,  "Number of allocas broken up");
+STATISTIC(NumPromoted,  "Number of allocas promoted");
+STATISTIC(NumAdjusted,  "Number of scalar allocas adjusted to allow promotion");
+STATISTIC(NumConverted, "Number of aggregates converted to scalar");
+STATISTIC(NumGlobals,   "Number of allocas copied from constant global");
+
+namespace {
+  struct SROA : public FunctionPass {
+    SROA(int T, bool hasDT, char &ID)
+      : FunctionPass(ID), HasDomTree(hasDT) {
+      if (T == -1)
+        SRThreshold = 128;
+      else
+        SRThreshold = T;
+    }
+
+    bool runOnFunction(Function &F);
+
+    bool performScalarRepl(Function &F);
+    bool performPromotion(Function &F);
+
+  private:
+    bool HasDomTree;
+    TargetData *TD;
+
+    /// DeadInsts - Keep track of instructions we have made dead, so that
+    /// we can remove them after we are done working.
+    SmallVector<Value*, 32> DeadInsts;
+
+    /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
+    /// information about the uses.  All these fields are initialized to false
+    /// and set to true when something is learned.
+    struct AllocaInfo {
+      /// The alloca to promote.
+      AllocaInst *AI;
+      
+      /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
+      /// looping and avoid redundant work.
+      SmallPtrSet<PHINode*, 8> CheckedPHIs;
+      
+      /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
+      bool isUnsafe : 1;
+
+      /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
+      bool isMemCpySrc : 1;
+
+      /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
+      bool isMemCpyDst : 1;
+
+      /// hasSubelementAccess - This is true if a subelement of the alloca is
+      /// ever accessed, or false if the alloca is only accessed with mem
+      /// intrinsics or load/store that only access the entire alloca at once.
+      bool hasSubelementAccess : 1;
+      
+      /// hasALoadOrStore - This is true if there are any loads or stores to it.
+      /// The alloca may just be accessed with memcpy, for example, which would
+      /// not set this.
+      bool hasALoadOrStore : 1;
+      
+      explicit AllocaInfo(AllocaInst *ai)
+        : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
+          hasSubelementAccess(false), hasALoadOrStore(false) {}
+    };
+
+    unsigned SRThreshold;
+
+    void MarkUnsafe(AllocaInfo &I, Instruction *User) {
+      I.isUnsafe = true;
+      DEBUG(dbgs() << "  Transformation preventing inst: " << *User << '\n');
+    }
+
+    bool isSafeAllocaToScalarRepl(AllocaInst *AI);
+
+    void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
+    void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
+                                         AllocaInfo &Info);
+    void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
+    void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
+                         const Type *MemOpType, bool isStore, AllocaInfo &Info,
+                         Instruction *TheAccess, bool AllowWholeAccess);
+    bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
+    uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
+                                  const Type *&IdxTy);
+
+    void DoScalarReplacement(AllocaInst *AI,
+                             std::vector<AllocaInst*> &WorkList);
+    void DeleteDeadInstructions();
+
+    void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+                              SmallVector<AllocaInst*, 32> &NewElts);
+    void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
+                        SmallVector<AllocaInst*, 32> &NewElts);
+    void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
+                    SmallVector<AllocaInst*, 32> &NewElts);
+    void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
+                                      AllocaInst *AI,
+                                      SmallVector<AllocaInst*, 32> &NewElts);
+    void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
+                                       SmallVector<AllocaInst*, 32> &NewElts);
+    void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
+                                      SmallVector<AllocaInst*, 32> &NewElts);
+
+    static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
+  };
+  
+  // SROA_DT - SROA that uses DominatorTree.
+  struct SROA_DT : public SROA {
+    static char ID;
+  public:
+    SROA_DT(int T = -1) : SROA(T, true, ID) {
+      initializeSROA_DTPass(*PassRegistry::getPassRegistry());
+    }
+    
+    // getAnalysisUsage - This pass does not require any passes, but we know it
+    // will not alter the CFG, so say so.
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+      AU.setPreservesCFG();
+    }
+  };
+  
+  // SROA_SSAUp - SROA that uses SSAUpdater.
+  struct SROA_SSAUp : public SROA {
+    static char ID;
+  public:
+    SROA_SSAUp(int T = -1) : SROA(T, false, ID) {
+      initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
+    }
+    
+    // getAnalysisUsage - This pass does not require any passes, but we know it
+    // will not alter the CFG, so say so.
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+  };
+  
+}
+
+char SROA_DT::ID = 0;
+char SROA_SSAUp::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
+                "Scalar Replacement of Aggregates (DT)", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(SROA_DT, "scalarrepl",
+                "Scalar Replacement of Aggregates (DT)", false, false)
+
+INITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa",
+                      "Scalar Replacement of Aggregates (SSAUp)", false, false)
+INITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
+                    "Scalar Replacement of Aggregates (SSAUp)", false, false)
+
+// Public interface to the ScalarReplAggregates pass
+FunctionPass *llvm::createScalarReplAggregatesPass(int Threshold,
+                                                   bool UseDomTree) {
+  if (UseDomTree)
+    return new SROA_DT(Threshold);
+  return new SROA_SSAUp(Threshold);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Convert To Scalar Optimization.
+//===----------------------------------------------------------------------===//
+
+namespace {
+/// ConvertToScalarInfo - This class implements the "Convert To Scalar"
+/// optimization, which scans the uses of an alloca and determines if it can
+/// rewrite it in terms of a single new alloca that can be mem2reg'd.
+class ConvertToScalarInfo {
+  /// AllocaSize - The size of the alloca being considered.
+  unsigned AllocaSize;
+  const TargetData &TD;
+
+  /// IsNotTrivial - This is set to true if there is some access to the object
+  /// which means that mem2reg can't promote it.
+  bool IsNotTrivial;
+
+  /// VectorTy - This tracks the type that we should promote the vector to if
+  /// it is possible to turn it into a vector.  This starts out null, and if it
+  /// isn't possible to turn into a vector type, it gets set to VoidTy.
+  const Type *VectorTy;
+
+  /// HadAVector - True if there is at least one vector access to the alloca.
+  /// We don't want to turn random arrays into vectors and use vector element
+  /// insert/extract, but if there are element accesses to something that is
+  /// also declared as a vector, we do want to promote to a vector.
+  bool HadAVector;
+
+public:
+  explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
+    : AllocaSize(Size), TD(td) {
+    IsNotTrivial = false;
+    VectorTy = 0;
+    HadAVector = false;
+  }
+
+  AllocaInst *TryConvert(AllocaInst *AI);
+
+private:
+  bool CanConvertToScalar(Value *V, uint64_t Offset);
+  void MergeInType(const Type *In, uint64_t Offset);
+  void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
+
+  Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
+                                    uint64_t Offset, IRBuilder<> &Builder);
+  Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
+                                   uint64_t Offset, IRBuilder<> &Builder);
+};
+} // end anonymous namespace.
+
+
+/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
+/// rewrite it to be a new alloca which is mem2reg'able.  This returns the new
+/// alloca if possible or null if not.
+AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
+  // If we can't convert this scalar, or if mem2reg can trivially do it, bail
+  // out.
+  if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
+    return 0;
+
+  // If we were able to find a vector type that can handle this with
+  // insert/extract elements, and if there was at least one use that had
+  // a vector type, promote this to a vector.  We don't want to promote
+  // random stuff that doesn't use vectors (e.g. <9 x double>) because then
+  // we just get a lot of insert/extracts.  If at least one vector is
+  // involved, then we probably really do have a union of vector/array.
+  const Type *NewTy;
+  if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
+    DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
+          << *VectorTy << '\n');
+    NewTy = VectorTy;  // Use the vector type.
+  } else {
+    DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
+    // Create and insert the integer alloca.
+    NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
+  }
+  AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
+  ConvertUsesToScalar(AI, NewAI, 0);
+  return NewAI;
+}
+
+/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
+/// so far at the offset specified by Offset (which is specified in bytes).
+///
+/// There are two cases we handle here:
+///   1) A union of vector types of the same size and potentially its elements.
+///      Here we turn element accesses into insert/extract element operations.
+///      This promotes a <4 x float> with a store of float to the third element
+///      into a <4 x float> that uses insert element.
+///   2) A fully general blob of memory, which we turn into some (potentially
+///      large) integer type with extract and insert operations where the loads
+///      and stores would mutate the memory.  We mark this by setting VectorTy
+///      to VoidTy.
+void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) {
+  // If we already decided to turn this into a blob of integer memory, there is
+  // nothing to be done.
+  if (VectorTy && VectorTy->isVoidTy())
+    return;
+
+  // If this could be contributing to a vector, analyze it.
+
+  // If the In type is a vector that is the same size as the alloca, see if it
+  // matches the existing VecTy.
+  if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
+    // Remember if we saw a vector type.
+    HadAVector = true;
+
+    if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
+      // If we're storing/loading a vector of the right size, allow it as a
+      // vector.  If this the first vector we see, remember the type so that
+      // we know the element size.  If this is a subsequent access, ignore it
+      // even if it is a differing type but the same size.  Worst case we can
+      // bitcast the resultant vectors.
+      if (VectorTy == 0)
+        VectorTy = VInTy;
+      return;
+    }
+  } else if (In->isFloatTy() || In->isDoubleTy() ||
+             (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
+              isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
+    // If we're accessing something that could be an element of a vector, see
+    // if the implied vector agrees with what we already have and if Offset is
+    // compatible with it.
+    unsigned EltSize = In->getPrimitiveSizeInBits()/8;
+    if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
+        (VectorTy == 0 ||
+         cast<VectorType>(VectorTy)->getElementType()
+               ->getPrimitiveSizeInBits()/8 == EltSize)) {
+      if (VectorTy == 0)
+        VectorTy = VectorType::get(In, AllocaSize/EltSize);
+      return;
+    }
+  }
+
+  // Otherwise, we have a case that we can't handle with an optimized vector
+  // form.  We can still turn this into a large integer.
+  VectorTy = Type::getVoidTy(In->getContext());
+}
+
+/// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
+/// its accesses to a single vector type, return true and set VecTy to
+/// the new type.  If we could convert the alloca into a single promotable
+/// integer, return true but set VecTy to VoidTy.  Further, if the use is not a
+/// completely trivial use that mem2reg could promote, set IsNotTrivial.  Offset
+/// is the current offset from the base of the alloca being analyzed.
+///
+/// If we see at least one access to the value that is as a vector type, set the
+/// SawVec flag.
+bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      // Don't break volatile loads.
+      if (LI->isVolatile())
+        return false;
+      // Don't touch MMX operations.
+      if (LI->getType()->isX86_MMXTy())
+        return false;
+      MergeInType(LI->getType(), Offset);
+      continue;
+    }
+
+    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      // Storing the pointer, not into the value?
+      if (SI->getOperand(0) == V || SI->isVolatile()) return false;
+      // Don't touch MMX operations.
+      if (SI->getOperand(0)->getType()->isX86_MMXTy())
+        return false;
+      MergeInType(SI->getOperand(0)->getType(), Offset);
+      continue;
+    }
+
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+      IsNotTrivial = true;  // Can't be mem2reg'd.
+      if (!CanConvertToScalar(BCI, Offset))
+        return false;
+      continue;
+    }
+
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+      // If this is a GEP with a variable indices, we can't handle it.
+      if (!GEP->hasAllConstantIndices())
+        return false;
+
+      // Compute the offset that this GEP adds to the pointer.
+      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+      uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+                                               &Indices[0], Indices.size());
+      // See if all uses can be converted.
+      if (!CanConvertToScalar(GEP, Offset+GEPOffset))
+        return false;
+      IsNotTrivial = true;  // Can't be mem2reg'd.
+      continue;
+    }
+
+    // If this is a constant sized memset of a constant value (e.g. 0) we can
+    // handle it.
+    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+      // Store of constant value and constant size.
+      if (!isa<ConstantInt>(MSI->getValue()) ||
+          !isa<ConstantInt>(MSI->getLength()))
+        return false;
+      IsNotTrivial = true;  // Can't be mem2reg'd.
+      continue;
+    }
+
+    // If this is a memcpy or memmove into or out of the whole allocation, we
+    // can handle it like a load or store of the scalar type.
+    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+      ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
+      if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
+        return false;
+
+      IsNotTrivial = true;  // Can't be mem2reg'd.
+      continue;
+    }
+
+    // Otherwise, we cannot handle this!
+    return false;
+  }
+
+  return true;
+}
+
+/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
+/// directly.  This happens when we are converting an "integer union" to a
+/// single integer scalar, or when we are converting a "vector union" to a
+/// vector with insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.  By the end of this, there should be no uses of Ptr.
+void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
+                                              uint64_t Offset) {
+  while (!Ptr->use_empty()) {
+    Instruction *User = cast<Instruction>(Ptr->use_back());
+
+    if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
+      ConvertUsesToScalar(CI, NewAI, Offset);
+      CI->eraseFromParent();
+      continue;
+    }
+
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
+      // Compute the offset that this GEP adds to the pointer.
+      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
+      uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
+                                               &Indices[0], Indices.size());
+      ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
+      GEP->eraseFromParent();
+      continue;
+    }
+
+    IRBuilder<> Builder(User);
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      // The load is a bit extract from NewAI shifted right by Offset bits.
+      Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
+      Value *NewLoadVal
+        = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
+      LI->replaceAllUsesWith(NewLoadVal);
+      LI->eraseFromParent();
+      continue;
+    }
+
+    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      assert(SI->getOperand(0) != Ptr && "Consistency error!");
+      Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
+      Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
+                                             Builder);
+      Builder.CreateStore(New, NewAI);
+      SI->eraseFromParent();
+
+      // If the load we just inserted is now dead, then the inserted store
+      // overwrote the entire thing.
+      if (Old->use_empty())
+        Old->eraseFromParent();
+      continue;
+    }
+
+    // If this is a constant sized memset of a constant value (e.g. 0) we can
+    // transform it into a store of the expanded constant value.
+    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
+      assert(MSI->getRawDest() == Ptr && "Consistency error!");
+      unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
+      if (NumBytes != 0) {
+        unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
+
+        // Compute the value replicated the right number of times.
+        APInt APVal(NumBytes*8, Val);
+
+        // Splat the value if non-zero.
+        if (Val)
+          for (unsigned i = 1; i != NumBytes; ++i)
+            APVal |= APVal << 8;
+
+        Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
+        Value *New = ConvertScalar_InsertValue(
+                                    ConstantInt::get(User->getContext(), APVal),
+                                               Old, Offset, Builder);
+        Builder.CreateStore(New, NewAI);
+
+        // If the load we just inserted is now dead, then the memset overwrote
+        // the entire thing.
+        if (Old->use_empty())
+          Old->eraseFromParent();
+      }
+      MSI->eraseFromParent();
+      continue;
+    }
+
+    // If this is a memcpy or memmove into or out of the whole allocation, we
+    // can handle it like a load or store of the scalar type.
+    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
+      assert(Offset == 0 && "must be store to start of alloca");
+
+      // If the source and destination are both to the same alloca, then this is
+      // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
+      // as appropriate.
+      AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0));
+
+      if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) {
+        // Dest must be OrigAI, change this to be a load from the original
+        // pointer (bitcasted), then a store to our new alloca.
+        assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
+        Value *SrcPtr = MTI->getSource();
+        const PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
+        const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+        if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
+          AIPTy = PointerType::get(AIPTy->getElementType(),
+                                   SPTy->getAddressSpace());
+        }
+        SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
+
+        LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
+        SrcVal->setAlignment(MTI->getAlignment());
+        Builder.CreateStore(SrcVal, NewAI);
+      } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) {
+        // Src must be OrigAI, change this to be a load from NewAI then a store
+        // through the original dest pointer (bitcasted).
+        assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
+        LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
+
+        const PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
+        const PointerType* AIPTy = cast<PointerType>(NewAI->getType());
+        if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
+          AIPTy = PointerType::get(AIPTy->getElementType(),
+                                   DPTy->getAddressSpace());
+        }
+        Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
+
+        StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
+        NewStore->setAlignment(MTI->getAlignment());
+      } else {
+        // Noop transfer. Src == Dst
+      }
+
+      MTI->eraseFromParent();
+      continue;
+    }
+
+    llvm_unreachable("Unsupported operation!");
+  }
+}
+
+/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
+/// or vector value FromVal, extracting the bits from the offset specified by
+/// Offset.  This returns the value, which is of type ToType.
+///
+/// This happens when we are converting an "integer union" to a single
+/// integer scalar, or when we are converting a "vector union" to a vector with
+/// insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.
+Value *ConvertToScalarInfo::
+ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
+                           uint64_t Offset, IRBuilder<> &Builder) {
+  // If the load is of the whole new alloca, no conversion is needed.
+  if (FromVal->getType() == ToType && Offset == 0)
+    return FromVal;
+
+  // If the result alloca is a vector type, this is either an element
+  // access or a bitcast to another vector type of the same size.
+  if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
+    if (ToType->isVectorTy())
+      return Builder.CreateBitCast(FromVal, ToType, "tmp");
+
+    // Otherwise it must be an element access.
+    unsigned Elt = 0;
+    if (Offset) {
+      unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+      Elt = Offset/EltSize;
+      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
+    }
+    // Return the element extracted out of it.
+    Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
+                    Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
+    if (V->getType() != ToType)
+      V = Builder.CreateBitCast(V, ToType, "tmp");
+    return V;
+  }
+
+  // If ToType is a first class aggregate, extract out each of the pieces and
+  // use insertvalue's to form the FCA.
+  if (const StructType *ST = dyn_cast<StructType>(ToType)) {
+    const StructLayout &Layout = *TD.getStructLayout(ST);
+    Value *Res = UndefValue::get(ST);
+    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+      Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
+                                        Offset+Layout.getElementOffsetInBits(i),
+                                              Builder);
+      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+    }
+    return Res;
+  }
+
+  if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
+    uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+    Value *Res = UndefValue::get(AT);
+    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+      Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
+                                              Offset+i*EltSize, Builder);
+      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
+    }
+    return Res;
+  }
+
+  // Otherwise, this must be a union that was converted to an integer value.
+  const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
+
+  // If this is a big-endian system and the load is narrower than the
+  // full alloca type, we need to do a shift to get the right bits.
+  int ShAmt = 0;
+  if (TD.isBigEndian()) {
+    // On big-endian machines, the lowest bit is stored at the bit offset
+    // from the pointer given by getTypeStoreSizeInBits.  This matters for
+    // integers with a bitwidth that is not a multiple of 8.
+    ShAmt = TD.getTypeStoreSizeInBits(NTy) -
+            TD.getTypeStoreSizeInBits(ToType) - Offset;
+  } else {
+    ShAmt = Offset;
+  }
+
+  // Note: we support negative bitwidths (with shl) which are not defined.
+  // We do this to support (f.e.) loads off the end of a structure where
+  // only some bits are used.
+  if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
+    FromVal = Builder.CreateLShr(FromVal,
+                                 ConstantInt::get(FromVal->getType(),
+                                                           ShAmt), "tmp");
+  else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
+    FromVal = Builder.CreateShl(FromVal,
+                                ConstantInt::get(FromVal->getType(),
+                                                          -ShAmt), "tmp");
+
+  // Finally, unconditionally truncate the integer to the right width.
+  unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
+  if (LIBitWidth < NTy->getBitWidth())
+    FromVal =
+      Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
+                                                    LIBitWidth), "tmp");
+  else if (LIBitWidth > NTy->getBitWidth())
+    FromVal =
+       Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
+                                                    LIBitWidth), "tmp");
+
+  // If the result is an integer, this is a trunc or bitcast.
+  if (ToType->isIntegerTy()) {
+    // Should be done.
+  } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
+    // Just do a bitcast, we know the sizes match up.
+    FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
+  } else {
+    // Otherwise must be a pointer.
+    FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
+  }
+  assert(FromVal->getType() == ToType && "Didn't convert right?");
+  return FromVal;
+}
+
+/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
+/// or vector value "Old" at the offset specified by Offset.
+///
+/// This happens when we are converting an "integer union" to a
+/// single integer scalar, or when we are converting a "vector union" to a
+/// vector with insert/extractelement instructions.
+///
+/// Offset is an offset from the original alloca, in bits that need to be
+/// shifted to the right.
+Value *ConvertToScalarInfo::
+ConvertScalar_InsertValue(Value *SV, Value *Old,
+                          uint64_t Offset, IRBuilder<> &Builder) {
+  // Convert the stored type to the actual type, shift it left to insert
+  // then 'or' into place.
+  const Type *AllocaType = Old->getType();
+  LLVMContext &Context = Old->getContext();
+
+  if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
+    uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
+    uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
+
+    // Changing the whole vector with memset or with an access of a different
+    // vector type?
+    if (ValSize == VecSize)
+      return Builder.CreateBitCast(SV, AllocaType, "tmp");
+
+    uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
+
+    // Must be an element insertion.
+    unsigned Elt = Offset/EltSize;
+
+    if (SV->getType() != VTy->getElementType())
+      SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
+
+    SV = Builder.CreateInsertElement(Old, SV,
+                     ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
+                                     "tmp");
+    return SV;
+  }
+
+  // If SV is a first-class aggregate value, insert each value recursively.
+  if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
+    const StructLayout &Layout = *TD.getStructLayout(ST);
+    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
+      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+      Old = ConvertScalar_InsertValue(Elt, Old,
+                                      Offset+Layout.getElementOffsetInBits(i),
+                                      Builder);
+    }
+    return Old;
+  }
+
+  if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
+    uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
+    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
+      Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
+    }
+    return Old;
+  }
+
+  // If SV is a float, convert it to the appropriate integer type.
+  // If it is a pointer, do the same.
+  unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType());
+  unsigned DestWidth = TD.getTypeSizeInBits(AllocaType);
+  unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
+  unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
+  if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
+    SV = Builder.CreateBitCast(SV,
+                            IntegerType::get(SV->getContext(),SrcWidth), "tmp");
+  else if (SV->getType()->isPointerTy())
+    SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()), "tmp");
+
+  // Zero extend or truncate the value if needed.
+  if (SV->getType() != AllocaType) {
+    if (SV->getType()->getPrimitiveSizeInBits() <
+             AllocaType->getPrimitiveSizeInBits())
+      SV = Builder.CreateZExt(SV, AllocaType, "tmp");
+    else {
+      // Truncation may be needed if storing more than the alloca can hold
+      // (undefined behavior).
+      SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
+      SrcWidth = DestWidth;
+      SrcStoreWidth = DestStoreWidth;
+    }
+  }
+
+  // If this is a big-endian system and the store is narrower than the
+  // full alloca type, we need to do a shift to get the right bits.
+  int ShAmt = 0;
+  if (TD.isBigEndian()) {
+    // On big-endian machines, the lowest bit is stored at the bit offset
+    // from the pointer given by getTypeStoreSizeInBits.  This matters for
+    // integers with a bitwidth that is not a multiple of 8.
+    ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
+  } else {
+    ShAmt = Offset;
+  }
+
+  // Note: we support negative bitwidths (with shr) which are not defined.
+  // We do this to support (f.e.) stores off the end of a structure where
+  // only some bits in the structure are set.
+  APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
+  if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
+    SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
+                           ShAmt), "tmp");
+    Mask <<= ShAmt;
+  } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
+    SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
+                            -ShAmt), "tmp");
+    Mask = Mask.lshr(-ShAmt);
+  }
+
+  // Mask out the bits we are about to insert from the old value, and or
+  // in the new bits.
+  if (SrcWidth != DestWidth) {
+    assert(DestWidth > SrcWidth);
+    Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
+    SV = Builder.CreateOr(Old, SV, "ins");
+  }
+  return SV;
+}
+
+
+//===----------------------------------------------------------------------===//
+// SRoA Driver
+//===----------------------------------------------------------------------===//
+
+
+bool SROA::runOnFunction(Function &F) {
+  TD = getAnalysisIfAvailable<TargetData>();
+
+  bool Changed = performPromotion(F);
+
+  // FIXME: ScalarRepl currently depends on TargetData more than it
+  // theoretically needs to. It should be refactored in order to support
+  // target-independent IR. Until this is done, just skip the actual
+  // scalar-replacement portion of this pass.
+  if (!TD) return Changed;
+
+  while (1) {
+    bool LocalChange = performScalarRepl(F);
+    if (!LocalChange) break;   // No need to repromote if no scalarrepl
+    Changed = true;
+    LocalChange = performPromotion(F);
+    if (!LocalChange) break;   // No need to re-scalarrepl if no promotion
+  }
+
+  return Changed;
+}
+
+namespace {
+class AllocaPromoter : public LoadAndStorePromoter {
+  AllocaInst *AI;
+public:
+  AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S)
+    : LoadAndStorePromoter(Insts, S), AI(0) {}
+  
+  void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
+    // Remember which alloca we're promoting (for isInstInList).
+    this->AI = AI;
+    LoadAndStorePromoter::run(Insts);
+    AI->eraseFromParent();
+  }
+  
+  virtual bool isInstInList(Instruction *I,
+                            const SmallVectorImpl<Instruction*> &Insts) const {
+    if (LoadInst *LI = dyn_cast<LoadInst>(I))
+      return LI->getOperand(0) == AI;
+    return cast<StoreInst>(I)->getPointerOperand() == AI;
+  }
+};
+} // end anon namespace
+
+/// isSafeSelectToSpeculate - Select instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers and then
+/// select between the result, allowing the load of the alloca to be promoted.
+/// From this:
+///   %P2 = select i1 %cond, i32* %Alloca, i32* %Other
+///   %V = load i32* %P2
+/// to:
+///   %V1 = load i32* %Alloca      -> will be mem2reg'd
+///   %V2 = load i32* %Other
+///   %V = select i1 %cond, i32 %V1, i32 %V2
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
+  bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
+  bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
+  
+  for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
+       UI != UE; ++UI) {
+    LoadInst *LI = dyn_cast<LoadInst>(*UI);
+    if (LI == 0 || LI->isVolatile()) return false;
+    
+    // Both operands to the select need to be dereferencable, either absolutely
+    // (e.g. allocas) or at this point because we can see other accesses to it.
+    if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
+                                                    LI->getAlignment(), TD))
+      return false;
+    if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
+                                                    LI->getAlignment(), TD))
+      return false;
+  }
+  
+  return true;
+}
+
+/// isSafePHIToSpeculate - PHI instructions that use an alloca and are
+/// subsequently loaded can be rewritten to load both input pointers in the pred
+/// blocks and then PHI the results, allowing the load of the alloca to be
+/// promoted.
+/// From this:
+///   %P2 = phi [i32* %Alloca, i32* %Other]
+///   %V = load i32* %P2
+/// to:
+///   %V1 = load i32* %Alloca      -> will be mem2reg'd
+///   ...
+///   %V2 = load i32* %Other
+///   ...
+///   %V = phi [i32 %V1, i32 %V2]
+///
+/// We can do this to a select if its only uses are loads and if the operand to
+/// the select can be loaded unconditionally.
+static bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
+  // For now, we can only do this promotion if the load is in the same block as
+  // the PHI, and if there are no stores between the phi and load.
+  // TODO: Allow recursive phi users.
+  // TODO: Allow stores.
+  BasicBlock *BB = PN->getParent();
+  unsigned MaxAlign = 0;
+  for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
+       UI != UE; ++UI) {
+    LoadInst *LI = dyn_cast<LoadInst>(*UI);
+    if (LI == 0 || LI->isVolatile()) return false;
+    
+    // For now we only allow loads in the same block as the PHI.  This is a
+    // common case that happens when instcombine merges two loads through a PHI.
+    if (LI->getParent() != BB) return false;
+    
+    // Ensure that there are no instructions between the PHI and the load that
+    // could store.
+    for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
+      if (BBI->mayWriteToMemory())
+        return false;
+    
+    MaxAlign = std::max(MaxAlign, LI->getAlignment());
+  }
+  
+  // Okay, we know that we have one or more loads in the same block as the PHI.
+  // We can transform this if it is safe to push the loads into the predecessor
+  // blocks.  The only thing to watch out for is that we can't put a possibly
+  // trapping load in the predecessor if it is a critical edge.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    BasicBlock *Pred = PN->getIncomingBlock(i);
+
+    // If the predecessor has a single successor, then the edge isn't critical.
+    if (Pred->getTerminator()->getNumSuccessors() == 1)
+      continue;
+    
+    Value *InVal = PN->getIncomingValue(i);
+    
+    // If the InVal is an invoke in the pred, we can't put a load on the edge.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(InVal))
+      if (II->getParent() == Pred)
+        return false;
+
+    // If this pointer is always safe to load, or if we can prove that there is
+    // already a load in the block, then we can move the load to the pred block.
+    if (InVal->isDereferenceablePointer() ||
+        isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD))
+      continue;
+    
+    return false;
+  }
+    
+  return true;
+}
+
+
+/// tryToMakeAllocaBePromotable - This returns true if the alloca only has
+/// direct (non-volatile) loads and stores to it.  If the alloca is close but
+/// not quite there, this will transform the code to allow promotion.  As such,
+/// it is a non-pure predicate.
+static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
+  SetVector<Instruction*, SmallVector<Instruction*, 4>,
+            SmallPtrSet<Instruction*, 4> > InstsToRewrite;
+  
+  for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+       UI != UE; ++UI) {
+    User *U = *UI;
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      if (LI->isVolatile())
+        return false;
+      continue;
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (SI->getOperand(0) == AI || SI->isVolatile())
+        return false;   // Don't allow a store OF the AI, only INTO the AI.
+      continue;
+    }
+
+    if (SelectInst *SI = dyn_cast<SelectInst>(U)) {
+      // If the condition being selected on is a constant, fold the select, yes
+      // this does (rarely) happen early on.
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) {
+        Value *Result = SI->getOperand(1+CI->isZero());
+        SI->replaceAllUsesWith(Result);
+        SI->eraseFromParent();
+        
+        // This is very rare and we just scrambled the use list of AI, start
+        // over completely.
+        return tryToMakeAllocaBePromotable(AI, TD);
+      }
+
+      // If it is safe to turn "load (select c, AI, ptr)" into a select of two
+      // loads, then we can transform this by rewriting the select.
+      if (!isSafeSelectToSpeculate(SI, TD))
+        return false;
+      
+      InstsToRewrite.insert(SI);
+      continue;
+    }
+    
+    if (PHINode *PN = dyn_cast<PHINode>(U)) {
+      if (PN->use_empty()) {  // Dead PHIs can be stripped.
+        InstsToRewrite.insert(PN);
+        continue;
+      }
+      
+      // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
+      // in the pred blocks, then we can transform this by rewriting the PHI.
+      if (!isSafePHIToSpeculate(PN, TD))
+        return false;
+      
+      InstsToRewrite.insert(PN);
+      continue;
+    }
+    
+    return false;
+  }
+
+  // If there are no instructions to rewrite, then all uses are load/stores and
+  // we're done!
+  if (InstsToRewrite.empty())
+    return true;
+  
+  // If we have instructions that need to be rewritten for this to be promotable
+  // take care of it now.
+  for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
+    if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
+      // Selects in InstsToRewrite only have load uses.  Rewrite each as two
+      // loads with a new select.
+      while (!SI->use_empty()) {
+        LoadInst *LI = cast<LoadInst>(SI->use_back());
+      
+        IRBuilder<> Builder(LI);
+        LoadInst *TrueLoad = 
+          Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
+        LoadInst *FalseLoad = 
+          Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t");
+        
+        // Transfer alignment and TBAA info if present.
+        TrueLoad->setAlignment(LI->getAlignment());
+        FalseLoad->setAlignment(LI->getAlignment());
+        if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
+          TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+          FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
+        }
+        
+        Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
+        V->takeName(LI);
+        LI->replaceAllUsesWith(V);
+        LI->eraseFromParent();
+      }
+    
+      // Now that all the loads are gone, the select is gone too.
+      SI->eraseFromParent();
+      continue;
+    }
+    
+    // Otherwise, we have a PHI node which allows us to push the loads into the
+    // predecessors.
+    PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
+    if (PN->use_empty()) {
+      PN->eraseFromParent();
+      continue;
+    }
+    
+    const Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
+    PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN);
+
+    // Get the TBAA tag and alignment to use from one of the loads.  It doesn't
+    // matter which one we get and if any differ, it doesn't matter.
+    LoadInst *SomeLoad = cast<LoadInst>(PN->use_back());
+    MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
+    unsigned Align = SomeLoad->getAlignment();
+    
+    // Rewrite all loads of the PN to use the new PHI.
+    while (!PN->use_empty()) {
+      LoadInst *LI = cast<LoadInst>(PN->use_back());
+      LI->replaceAllUsesWith(NewPN);
+      LI->eraseFromParent();
+    }
+    
+    // Inject loads into all of the pred blocks.  Keep track of which blocks we
+    // insert them into in case we have multiple edges from the same block.
+    DenseMap<BasicBlock*, LoadInst*> InsertedLoads;
+    
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *Pred = PN->getIncomingBlock(i);
+      LoadInst *&Load = InsertedLoads[Pred];
+      if (Load == 0) {
+        Load = new LoadInst(PN->getIncomingValue(i),
+                            PN->getName() + "." + Pred->getName(),
+                            Pred->getTerminator());
+        Load->setAlignment(Align);
+        if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+      }
+      
+      NewPN->addIncoming(Load, Pred);
+    }
+    
+    PN->eraseFromParent();
+  }
+    
+  ++NumAdjusted;
+  return true;
+}
+
+
+bool SROA::performPromotion(Function &F) {
+  std::vector<AllocaInst*> Allocas;
+  DominatorTree *DT = 0;
+  if (HasDomTree)
+    DT = &getAnalysis<DominatorTree>();
+
+  BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
+
+  bool Changed = false;
+  SmallVector<Instruction*, 64> Insts;
+  while (1) {
+    Allocas.clear();
+
+    // Find allocas that are safe to promote, by looking at all instructions in
+    // the entry node
+    for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(I))       // Is it an alloca?
+        if (tryToMakeAllocaBePromotable(AI, TD))
+          Allocas.push_back(AI);
+
+    if (Allocas.empty()) break;
+
+    if (HasDomTree)
+      PromoteMemToReg(Allocas, *DT);
+    else {
+      SSAUpdater SSA;
+      for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+        AllocaInst *AI = Allocas[i];
+        
+        // Build list of instructions to promote.
+        for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+             UI != E; ++UI)
+          Insts.push_back(cast<Instruction>(*UI));
+        
+        AllocaPromoter(Insts, SSA).run(AI, Insts);
+        Insts.clear();
+      }
+    }
+    NumPromoted += Allocas.size();
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+
+/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
+/// SROA.  It must be a struct or array type with a small number of elements.
+static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
+  const Type *T = AI->getAllocatedType();
+  // Do not promote any struct into more than 32 separate vars.
+  if (const StructType *ST = dyn_cast<StructType>(T))
+    return ST->getNumElements() <= 32;
+  // Arrays are much less likely to be safe for SROA; only consider
+  // them if they are very small.
+  if (const ArrayType *AT = dyn_cast<ArrayType>(T))
+    return AT->getNumElements() <= 8;
+  return false;
+}
+
+
+// performScalarRepl - This algorithm is a simple worklist driven algorithm,
+// which runs on all of the malloc/alloca instructions in the function, removing
+// them if they are only used by getelementptr instructions.
+//
+bool SROA::performScalarRepl(Function &F) {
+  std::vector<AllocaInst*> WorkList;
+
+  // Scan the entry basic block, adding allocas to the worklist.
+  BasicBlock &BB = F.getEntryBlock();
+  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
+    if (AllocaInst *A = dyn_cast<AllocaInst>(I))
+      WorkList.push_back(A);
+
+  // Process the worklist
+  bool Changed = false;
+  while (!WorkList.empty()) {
+    AllocaInst *AI = WorkList.back();
+    WorkList.pop_back();
+
+    // Handle dead allocas trivially.  These can be formed by SROA'ing arrays
+    // with unused elements.
+    if (AI->use_empty()) {
+      AI->eraseFromParent();
+      Changed = true;
+      continue;
+    }
+
+    // If this alloca is impossible for us to promote, reject it early.
+    if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
+      continue;
+
+    // Check to see if this allocation is only modified by a memcpy/memmove from
+    // a constant global.  If this is the case, we can change all users to use
+    // the constant global instead.  This is commonly produced by the CFE by
+    // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
+    // is only subsequently read.
+    if (MemTransferInst *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
+      DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
+      DEBUG(dbgs() << "  memcpy = " << *TheCopy << '\n');
+      Constant *TheSrc = cast<Constant>(TheCopy->getSource());
+      AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
+      TheCopy->eraseFromParent();  // Don't mutate the global.
+      AI->eraseFromParent();
+      ++NumGlobals;
+      Changed = true;
+      continue;
+    }
+
+    // Check to see if we can perform the core SROA transformation.  We cannot
+    // transform the allocation instruction if it is an array allocation
+    // (allocations OF arrays are ok though), and an allocation of a scalar
+    // value cannot be decomposed at all.
+    uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
+
+    // Do not promote [0 x %struct].
+    if (AllocaSize == 0) continue;
+
+    // Do not promote any struct whose size is too big.
+    if (AllocaSize > SRThreshold) continue;
+
+    // If the alloca looks like a good candidate for scalar replacement, and if
+    // all its users can be transformed, then split up the aggregate into its
+    // separate elements.
+    if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
+      DoScalarReplacement(AI, WorkList);
+      Changed = true;
+      continue;
+    }
+
+    // If we can turn this aggregate value (potentially with casts) into a
+    // simple scalar value that can be mem2reg'd into a register value.
+    // IsNotTrivial tracks whether this is something that mem2reg could have
+    // promoted itself.  If so, we don't want to transform it needlessly.  Note
+    // that we can't just check based on the type: the alloca may be of an i32
+    // but that has pointer arithmetic to set byte 3 of it or something.
+    if (AllocaInst *NewAI =
+          ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) {
+      NewAI->takeName(AI);
+      AI->eraseFromParent();
+      ++NumConverted;
+      Changed = true;
+      continue;
+    }
+
+    // Otherwise, couldn't process this alloca.
+  }
+
+  return Changed;
+}
+
+/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
+/// predicate, do SROA now.
+void SROA::DoScalarReplacement(AllocaInst *AI,
+                               std::vector<AllocaInst*> &WorkList) {
+  DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
+  SmallVector<AllocaInst*, 32> ElementAllocas;
+  if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+    ElementAllocas.reserve(ST->getNumContainedTypes());
+    for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
+      AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
+                                      AI->getAlignment(),
+                                      AI->getName() + "." + Twine(i), AI);
+      ElementAllocas.push_back(NA);
+      WorkList.push_back(NA);  // Add to worklist for recursive processing
+    }
+  } else {
+    const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
+    ElementAllocas.reserve(AT->getNumElements());
+    const Type *ElTy = AT->getElementType();
+    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
+      AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
+                                      AI->getName() + "." + Twine(i), AI);
+      ElementAllocas.push_back(NA);
+      WorkList.push_back(NA);  // Add to worklist for recursive processing
+    }
+  }
+
+  // Now that we have created the new alloca instructions, rewrite all the
+  // uses of the old alloca.
+  RewriteForScalarRepl(AI, AI, 0, ElementAllocas);
+
+  // Now erase any instructions that were made dead while rewriting the alloca.
+  DeleteDeadInstructions();
+  AI->eraseFromParent();
+
+  ++NumReplaced;
+}
+
+/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
+/// recursively including all their operands that become trivially dead.
+void SROA::DeleteDeadInstructions() {
+  while (!DeadInsts.empty()) {
+    Instruction *I = cast<Instruction>(DeadInsts.pop_back_val());
+
+    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
+      if (Instruction *U = dyn_cast<Instruction>(*OI)) {
+        // Zero out the operand and see if it becomes trivially dead.
+        // (But, don't add allocas to the dead instruction list -- they are
+        // already on the worklist and will be deleted separately.)
+        *OI = 0;
+        if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
+          DeadInsts.push_back(U);
+      }
+
+    I->eraseFromParent();
+  }
+}
+
+/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
+/// performing scalar replacement of alloca AI.  The results are flagged in
+/// the Info parameter.  Offset indicates the position within AI that is
+/// referenced by this instruction.
+void SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
+                               AllocaInfo &Info) {
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+
+    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+      isSafeForScalarRepl(BC, Offset, Info);
+    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+      uint64_t GEPOffset = Offset;
+      isSafeGEP(GEPI, GEPOffset, Info);
+      if (!Info.isUnsafe)
+        isSafeForScalarRepl(GEPI, GEPOffset, Info);
+    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+      ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
+      if (Length == 0)
+        return MarkUnsafe(Info, User);
+      isSafeMemAccess(Offset, Length->getZExtValue(), 0,
+                      UI.getOperandNo() == 0, Info, MI,
+                      true /*AllowWholeAccess*/);
+    } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      if (LI->isVolatile())
+        return MarkUnsafe(Info, User);
+      const Type *LIType = LI->getType();
+      isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+                      LIType, false, Info, LI, true /*AllowWholeAccess*/);
+      Info.hasALoadOrStore = true;
+        
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      // Store is ok if storing INTO the pointer, not storing the pointer
+      if (SI->isVolatile() || SI->getOperand(0) == I)
+        return MarkUnsafe(Info, User);
+        
+      const Type *SIType = SI->getOperand(0)->getType();
+      isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+                      SIType, true, Info, SI, true /*AllowWholeAccess*/);
+      Info.hasALoadOrStore = true;
+    } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+      isSafePHISelectUseForScalarRepl(User, Offset, Info);
+    } else {
+      return MarkUnsafe(Info, User);
+    }
+    if (Info.isUnsafe) return;
+  }
+}
+ 
+
+/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
+/// derived from the alloca, we can often still split the alloca into elements.
+/// This is useful if we have a large alloca where one element is phi'd
+/// together somewhere: we can SRoA and promote all the other elements even if
+/// we end up not being able to promote this one.
+///
+/// All we require is that the uses of the PHI do not index into other parts of
+/// the alloca.  The most important use case for this is single load and stores
+/// that are PHI'd together, which can happen due to code sinking.
+void SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
+                                           AllocaInfo &Info) {
+  // If we've already checked this PHI, don't do it again.
+  if (PHINode *PN = dyn_cast<PHINode>(I))
+    if (!Info.CheckedPHIs.insert(PN))
+      return;
+  
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+    
+    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+      isSafePHISelectUseForScalarRepl(BC, Offset, Info);
+    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+      // Only allow "bitcast" GEPs for simplicity.  We could generalize this,
+      // but would have to prove that we're staying inside of an element being
+      // promoted.
+      if (!GEPI->hasAllZeroIndices())
+        return MarkUnsafe(Info, User);
+      isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
+    } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      if (LI->isVolatile())
+        return MarkUnsafe(Info, User);
+      const Type *LIType = LI->getType();
+      isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
+                      LIType, false, Info, LI, false /*AllowWholeAccess*/);
+      Info.hasALoadOrStore = true;
+      
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      // Store is ok if storing INTO the pointer, not storing the pointer
+      if (SI->isVolatile() || SI->getOperand(0) == I)
+        return MarkUnsafe(Info, User);
+      
+      const Type *SIType = SI->getOperand(0)->getType();
+      isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
+                      SIType, true, Info, SI, false /*AllowWholeAccess*/);
+      Info.hasALoadOrStore = true;
+    } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
+      isSafePHISelectUseForScalarRepl(User, Offset, Info);
+    } else {
+      return MarkUnsafe(Info, User);
+    }
+    if (Info.isUnsafe) return;
+  }
+}
+
+/// isSafeGEP - Check if a GEP instruction can be handled for scalar
+/// replacement.  It is safe when all the indices are constant, in-bounds
+/// references, and when the resulting offset corresponds to an element within
+/// the alloca type.  The results are flagged in the Info parameter.  Upon
+/// return, Offset is adjusted as specified by the GEP indices.
+void SROA::isSafeGEP(GetElementPtrInst *GEPI,
+                     uint64_t &Offset, AllocaInfo &Info) {
+  gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
+  if (GEPIt == E)
+    return;
+
+  // Walk through the GEP type indices, checking the types that this indexes
+  // into.
+  for (; GEPIt != E; ++GEPIt) {
+    // Ignore struct elements, no extra checking needed for these.
+    if ((*GEPIt)->isStructTy())
+      continue;
+
+    ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
+    if (!IdxVal)
+      return MarkUnsafe(Info, GEPI);
+  }
+
+  // Compute the offset due to this GEP and check if the alloca has a
+  // component element at that offset.
+  SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+  Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
+                                 &Indices[0], Indices.size());
+  if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0))
+    MarkUnsafe(Info, GEPI);
+}
+
+/// isHomogeneousAggregate - Check if type T is a struct or array containing
+/// elements of the same type (which is always true for arrays).  If so,
+/// return true with NumElts and EltTy set to the number of elements and the
+/// element type, respectively.
+static bool isHomogeneousAggregate(const Type *T, unsigned &NumElts,
+                                   const Type *&EltTy) {
+  if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+    NumElts = AT->getNumElements();
+    EltTy = (NumElts == 0 ? 0 : AT->getElementType());
+    return true;
+  }
+  if (const StructType *ST = dyn_cast<StructType>(T)) {
+    NumElts = ST->getNumContainedTypes();
+    EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
+    for (unsigned n = 1; n < NumElts; ++n) {
+      if (ST->getContainedType(n) != EltTy)
+        return false;
+    }
+    return true;
+  }
+  return false;
+}
+
+/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
+/// "homogeneous" aggregates with the same element type and number of elements.
+static bool isCompatibleAggregate(const Type *T1, const Type *T2) {
+  if (T1 == T2)
+    return true;
+
+  unsigned NumElts1, NumElts2;
+  const Type *EltTy1, *EltTy2;
+  if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
+      isHomogeneousAggregate(T2, NumElts2, EltTy2) &&
+      NumElts1 == NumElts2 &&
+      EltTy1 == EltTy2)
+    return true;
+
+  return false;
+}
+
+/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
+/// alloca or has an offset and size that corresponds to a component element
+/// within it.  The offset checked here may have been formed from a GEP with a
+/// pointer bitcasted to a different type.
+///
+/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
+/// unit.  If false, it only allows accesses known to be in a single element.
+void SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
+                           const Type *MemOpType, bool isStore,
+                           AllocaInfo &Info, Instruction *TheAccess,
+                           bool AllowWholeAccess) {
+  // Check if this is a load/store of the entire alloca.
+  if (Offset == 0 && AllowWholeAccess &&
+      MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) {
+    // This can be safe for MemIntrinsics (where MemOpType is 0) and integer
+    // loads/stores (which are essentially the same as the MemIntrinsics with
+    // regard to copying padding between elements).  But, if an alloca is
+    // flagged as both a source and destination of such operations, we'll need
+    // to check later for padding between elements.
+    if (!MemOpType || MemOpType->isIntegerTy()) {
+      if (isStore)
+        Info.isMemCpyDst = true;
+      else
+        Info.isMemCpySrc = true;
+      return;
+    }
+    // This is also safe for references using a type that is compatible with
+    // the type of the alloca, so that loads/stores can be rewritten using
+    // insertvalue/extractvalue.
+    if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) {
+      Info.hasSubelementAccess = true;
+      return;
+    }
+  }
+  // Check if the offset/size correspond to a component within the alloca type.
+  const Type *T = Info.AI->getAllocatedType();
+  if (TypeHasComponent(T, Offset, MemSize)) {
+    Info.hasSubelementAccess = true;
+    return;
+  }
+
+  return MarkUnsafe(Info, TheAccess);
+}
+
+/// TypeHasComponent - Return true if T has a component type with the
+/// specified offset and size.  If Size is zero, do not check the size.
+bool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
+  const Type *EltTy;
+  uint64_t EltSize;
+  if (const StructType *ST = dyn_cast<StructType>(T)) {
+    const StructLayout *Layout = TD->getStructLayout(ST);
+    unsigned EltIdx = Layout->getElementContainingOffset(Offset);
+    EltTy = ST->getContainedType(EltIdx);
+    EltSize = TD->getTypeAllocSize(EltTy);
+    Offset -= Layout->getElementOffset(EltIdx);
+  } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
+    EltTy = AT->getElementType();
+    EltSize = TD->getTypeAllocSize(EltTy);
+    if (Offset >= AT->getNumElements() * EltSize)
+      return false;
+    Offset %= EltSize;
+  } else {
+    return false;
+  }
+  if (Offset == 0 && (Size == 0 || EltSize == Size))
+    return true;
+  // Check if the component spans multiple elements.
+  if (Offset + Size > EltSize)
+    return false;
+  return TypeHasComponent(EltTy, Offset, Size);
+}
+
+/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
+/// the instruction I, which references it, to use the separate elements.
+/// Offset indicates the position within AI that is referenced by this
+/// instruction.
+void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
+                                SmallVector<AllocaInst*, 32> &NewElts) {
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
+    Use &TheUse = UI.getUse();
+    Instruction *User = cast<Instruction>(*UI++);
+
+    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
+      RewriteBitCast(BC, AI, Offset, NewElts);
+      continue;
+    }
+    
+    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+      RewriteGEP(GEPI, AI, Offset, NewElts);
+      continue;
+    }
+    
+    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
+      ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
+      uint64_t MemSize = Length->getZExtValue();
+      if (Offset == 0 &&
+          MemSize == TD->getTypeAllocSize(AI->getAllocatedType()))
+        RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
+      // Otherwise the intrinsic can only touch a single element and the
+      // address operand will be updated, so nothing else needs to be done.
+      continue;
+    }
+    
+    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
+      const Type *LIType = LI->getType();
+      
+      if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
+        // Replace:
+        //   %res = load { i32, i32 }* %alloc
+        // with:
+        //   %load.0 = load i32* %alloc.0
+        //   %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
+        //   %load.1 = load i32* %alloc.1
+        //   %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
+        // (Also works for arrays instead of structs)
+        Value *Insert = UndefValue::get(LIType);
+        for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+          Value *Load = new LoadInst(NewElts[i], "load", LI);
+          Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
+        }
+        LI->replaceAllUsesWith(Insert);
+        DeadInsts.push_back(LI);
+      } else if (LIType->isIntegerTy() &&
+                 TD->getTypeAllocSize(LIType) ==
+                 TD->getTypeAllocSize(AI->getAllocatedType())) {
+        // If this is a load of the entire alloca to an integer, rewrite it.
+        RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
+      }
+      continue;
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+      Value *Val = SI->getOperand(0);
+      const Type *SIType = Val->getType();
+      if (isCompatibleAggregate(SIType, AI->getAllocatedType())) {
+        // Replace:
+        //   store { i32, i32 } %val, { i32, i32 }* %alloc
+        // with:
+        //   %val.0 = extractvalue { i32, i32 } %val, 0
+        //   store i32 %val.0, i32* %alloc.0
+        //   %val.1 = extractvalue { i32, i32 } %val, 1
+        //   store i32 %val.1, i32* %alloc.1
+        // (Also works for arrays instead of structs)
+        for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+          Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
+          new StoreInst(Extract, NewElts[i], SI);
+        }
+        DeadInsts.push_back(SI);
+      } else if (SIType->isIntegerTy() &&
+                 TD->getTypeAllocSize(SIType) ==
+                 TD->getTypeAllocSize(AI->getAllocatedType())) {
+        // If this is a store of the entire alloca from an integer, rewrite it.
+        RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
+      }
+      continue;
+    }
+    
+    if (isa<SelectInst>(User) || isa<PHINode>(User)) {
+      // If we have a PHI user of the alloca itself (as opposed to a GEP or 
+      // bitcast) we have to rewrite it.  GEP and bitcast uses will be RAUW'd to
+      // the new pointer.
+      if (!isa<AllocaInst>(I)) continue;
+      
+      assert(Offset == 0 && NewElts[0] &&
+             "Direct alloca use should have a zero offset");
+      
+      // If we have a use of the alloca, we know the derived uses will be
+      // utilizing just the first element of the scalarized result.  Insert a
+      // bitcast of the first alloca before the user as required.
+      AllocaInst *NewAI = NewElts[0];
+      BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI);
+      NewAI->moveBefore(BCI);
+      TheUse = BCI;
+      continue;
+    }
+  }
+}
+
+/// RewriteBitCast - Update a bitcast reference to the alloca being replaced
+/// and recursively continue updating all of its uses.
+void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
+                          SmallVector<AllocaInst*, 32> &NewElts) {
+  RewriteForScalarRepl(BC, AI, Offset, NewElts);
+  if (BC->getOperand(0) != AI)
+    return;
+
+  // The bitcast references the original alloca.  Replace its uses with
+  // references to the first new element alloca.
+  Instruction *Val = NewElts[0];
+  if (Val->getType() != BC->getDestTy()) {
+    Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
+    Val->takeName(BC);
+  }
+  BC->replaceAllUsesWith(Val);
+  DeadInsts.push_back(BC);
+}
+
+/// FindElementAndOffset - Return the index of the element containing Offset
+/// within the specified type, which must be either a struct or an array.
+/// Sets T to the type of the element and Offset to the offset within that
+/// element.  IdxTy is set to the type of the index result to be used in a
+/// GEP instruction.
+uint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset,
+                                    const Type *&IdxTy) {
+  uint64_t Idx = 0;
+  if (const StructType *ST = dyn_cast<StructType>(T)) {
+    const StructLayout *Layout = TD->getStructLayout(ST);
+    Idx = Layout->getElementContainingOffset(Offset);
+    T = ST->getContainedType(Idx);
+    Offset -= Layout->getElementOffset(Idx);
+    IdxTy = Type::getInt32Ty(T->getContext());
+    return Idx;
+  }
+  const ArrayType *AT = cast<ArrayType>(T);
+  T = AT->getElementType();
+  uint64_t EltSize = TD->getTypeAllocSize(T);
+  Idx = Offset / EltSize;
+  Offset -= Idx * EltSize;
+  IdxTy = Type::getInt64Ty(T->getContext());
+  return Idx;
+}
+
+/// RewriteGEP - Check if this GEP instruction moves the pointer across
+/// elements of the alloca that are being split apart, and if so, rewrite
+/// the GEP to be relative to the new element.
+void SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
+                      SmallVector<AllocaInst*, 32> &NewElts) {
+  uint64_t OldOffset = Offset;
+  SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
+  Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
+                                 &Indices[0], Indices.size());
+
+  RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
+
+  const Type *T = AI->getAllocatedType();
+  const Type *IdxTy;
+  uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
+  if (GEPI->getOperand(0) == AI)
+    OldIdx = ~0ULL; // Force the GEP to be rewritten.
+
+  T = AI->getAllocatedType();
+  uint64_t EltOffset = Offset;
+  uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
+
+  // If this GEP does not move the pointer across elements of the alloca
+  // being split, then it does not needs to be rewritten.
+  if (Idx == OldIdx)
+    return;
+
+  const Type *i32Ty = Type::getInt32Ty(AI->getContext());
+  SmallVector<Value*, 8> NewArgs;
+  NewArgs.push_back(Constant::getNullValue(i32Ty));
+  while (EltOffset != 0) {
+    uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
+    NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
+  }
+  Instruction *Val = NewElts[Idx];
+  if (NewArgs.size() > 1) {
+    Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(),
+                                            NewArgs.end(), "", GEPI);
+    Val->takeName(GEPI);
+  }
+  if (Val->getType() != GEPI->getType())
+    Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI);
+  GEPI->replaceAllUsesWith(Val);
+  DeadInsts.push_back(GEPI);
+}
+
+/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
+/// Rewrite it to copy or set the elements of the scalarized memory.
+void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
+                                        AllocaInst *AI,
+                                        SmallVector<AllocaInst*, 32> &NewElts) {
+  // If this is a memcpy/memmove, construct the other pointer as the
+  // appropriate type.  The "Other" pointer is the pointer that goes to memory
+  // that doesn't have anything to do with the alloca that we are promoting. For
+  // memset, this Value* stays null.
+  Value *OtherPtr = 0;
+  unsigned MemAlignment = MI->getAlignment();
+  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
+    if (Inst == MTI->getRawDest())
+      OtherPtr = MTI->getRawSource();
+    else {
+      assert(Inst == MTI->getRawSource());
+      OtherPtr = MTI->getRawDest();
+    }
+  }
+
+  // If there is an other pointer, we want to convert it to the same pointer
+  // type as AI has, so we can GEP through it safely.
+  if (OtherPtr) {
+    unsigned AddrSpace =
+      cast<PointerType>(OtherPtr->getType())->getAddressSpace();
+
+    // Remove bitcasts and all-zero GEPs from OtherPtr.  This is an
+    // optimization, but it's also required to detect the corner case where
+    // both pointer operands are referencing the same memory, and where
+    // OtherPtr may be a bitcast or GEP that currently being rewritten.  (This
+    // function is only called for mem intrinsics that access the whole
+    // aggregate, so non-zero GEPs are not an issue here.)
+    OtherPtr = OtherPtr->stripPointerCasts();
+
+    // Copying the alloca to itself is a no-op: just delete it.
+    if (OtherPtr == AI || OtherPtr == NewElts[0]) {
+      // This code will run twice for a no-op memcpy -- once for each operand.
+      // Put only one reference to MI on the DeadInsts list.
+      for (SmallVector<Value*, 32>::const_iterator I = DeadInsts.begin(),
+             E = DeadInsts.end(); I != E; ++I)
+        if (*I == MI) return;
+      DeadInsts.push_back(MI);
+      return;
+    }
+
+    // If the pointer is not the right type, insert a bitcast to the right
+    // type.
+    const Type *NewTy =
+      PointerType::get(AI->getType()->getElementType(), AddrSpace);
+
+    if (OtherPtr->getType() != NewTy)
+      OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
+  }
+
+  // Process each element of the aggregate.
+  bool SROADest = MI->getRawDest() == Inst;
+
+  Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
+
+  for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+    // If this is a memcpy/memmove, emit a GEP of the other element address.
+    Value *OtherElt = 0;
+    unsigned OtherEltAlign = MemAlignment;
+
+    if (OtherPtr) {
+      Value *Idx[2] = { Zero,
+                      ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
+      OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2,
+                                              OtherPtr->getName()+"."+Twine(i),
+                                                   MI);
+      uint64_t EltOffset;
+      const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
+      const Type *OtherTy = OtherPtrTy->getElementType();
+      if (const StructType *ST = dyn_cast<StructType>(OtherTy)) {
+        EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
+      } else {
+        const Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
+        EltOffset = TD->getTypeAllocSize(EltTy)*i;
+      }
+
+      // The alignment of the other pointer is the guaranteed alignment of the
+      // element, which is affected by both the known alignment of the whole
+      // mem intrinsic and the alignment of the element.  If the alignment of
+      // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
+      // known alignment is just 4 bytes.
+      OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
+    }
+
+    Value *EltPtr = NewElts[i];
+    const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
+
+    // If we got down to a scalar, insert a load or store as appropriate.
+    if (EltTy->isSingleValueType()) {
+      if (isa<MemTransferInst>(MI)) {
+        if (SROADest) {
+          // From Other to Alloca.
+          Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
+          new StoreInst(Elt, EltPtr, MI);
+        } else {
+          // From Alloca to Other.
+          Value *Elt = new LoadInst(EltPtr, "tmp", MI);
+          new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
+        }
+        continue;
+      }
+      assert(isa<MemSetInst>(MI));
+
+      // If the stored element is zero (common case), just store a null
+      // constant.
+      Constant *StoreVal;
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) {
+        if (CI->isZero()) {
+          StoreVal = Constant::getNullValue(EltTy);  // 0.0, null, 0, <0,0>
+        } else {
+          // If EltTy is a vector type, get the element type.
+          const Type *ValTy = EltTy->getScalarType();
+
+          // Construct an integer with the right value.
+          unsigned EltSize = TD->getTypeSizeInBits(ValTy);
+          APInt OneVal(EltSize, CI->getZExtValue());
+          APInt TotalVal(OneVal);
+          // Set each byte.
+          for (unsigned i = 0; 8*i < EltSize; ++i) {
+            TotalVal = TotalVal.shl(8);
+            TotalVal |= OneVal;
+          }
+
+          // Convert the integer value to the appropriate type.
+          StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
+          if (ValTy->isPointerTy())
+            StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
+          else if (ValTy->isFloatingPointTy())
+            StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
+          assert(StoreVal->getType() == ValTy && "Type mismatch!");
+
+          // If the requested value was a vector constant, create it.
+          if (EltTy != ValTy) {
+            unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
+            SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
+            StoreVal = ConstantVector::get(Elts);
+          }
+        }
+        new StoreInst(StoreVal, EltPtr, MI);
+        continue;
+      }
+      // Otherwise, if we're storing a byte variable, use a memset call for
+      // this element.
+    }
+
+    unsigned EltSize = TD->getTypeAllocSize(EltTy);
+
+    IRBuilder<> Builder(MI);
+
+    // Finally, insert the meminst for this element.
+    if (isa<MemSetInst>(MI)) {
+      Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize,
+                           MI->isVolatile());
+    } else {
+      assert(isa<MemTransferInst>(MI));
+      Value *Dst = SROADest ? EltPtr : OtherElt;  // Dest ptr
+      Value *Src = SROADest ? OtherElt : EltPtr;  // Src ptr
+
+      if (isa<MemCpyInst>(MI))
+        Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile());
+      else
+        Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile());
+    }
+  }
+  DeadInsts.push_back(MI);
+}
+
+/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
+/// overwrites the entire allocation.  Extract out the pieces of the stored
+/// integer and store them individually.
+void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
+                                         SmallVector<AllocaInst*, 32> &NewElts){
+  // Extract each element out of the integer according to its structure offset
+  // and store the element value to the individual alloca.
+  Value *SrcVal = SI->getOperand(0);
+  const Type *AllocaEltTy = AI->getAllocatedType();
+  uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+
+  IRBuilder<> Builder(SI);
+  
+  // Handle tail padding by extending the operand
+  if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
+    SrcVal = Builder.CreateZExt(SrcVal,
+                            IntegerType::get(SI->getContext(), AllocaSizeBits));
+
+  DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
+               << '\n');
+
+  // There are two forms here: AI could be an array or struct.  Both cases
+  // have different ways to compute the element offset.
+  if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+    const StructLayout *Layout = TD->getStructLayout(EltSTy);
+
+    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+      // Get the number of bits to shift SrcVal to get the value.
+      const Type *FieldTy = EltSTy->getElementType(i);
+      uint64_t Shift = Layout->getElementOffsetInBits(i);
+
+      if (TD->isBigEndian())
+        Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
+
+      Value *EltVal = SrcVal;
+      if (Shift) {
+        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+        EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
+      }
+
+      // Truncate down to an integer of the right size.
+      uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+
+      // Ignore zero sized fields like {}, they obviously contain no data.
+      if (FieldSizeBits == 0) continue;
+
+      if (FieldSizeBits != AllocaSizeBits)
+        EltVal = Builder.CreateTrunc(EltVal,
+                             IntegerType::get(SI->getContext(), FieldSizeBits));
+      Value *DestField = NewElts[i];
+      if (EltVal->getType() == FieldTy) {
+        // Storing to an integer field of this size, just do it.
+      } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
+        // Bitcast to the right element type (for fp/vector values).
+        EltVal = Builder.CreateBitCast(EltVal, FieldTy);
+      } else {
+        // Otherwise, bitcast the dest pointer (for aggregates).
+        DestField = Builder.CreateBitCast(DestField,
+                                     PointerType::getUnqual(EltVal->getType()));
+      }
+      new StoreInst(EltVal, DestField, SI);
+    }
+
+  } else {
+    const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
+    const Type *ArrayEltTy = ATy->getElementType();
+    uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
+    uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
+
+    uint64_t Shift;
+
+    if (TD->isBigEndian())
+      Shift = AllocaSizeBits-ElementOffset;
+    else
+      Shift = 0;
+
+    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+      // Ignore zero sized fields like {}, they obviously contain no data.
+      if (ElementSizeBits == 0) continue;
+
+      Value *EltVal = SrcVal;
+      if (Shift) {
+        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
+        EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
+      }
+
+      // Truncate down to an integer of the right size.
+      if (ElementSizeBits != AllocaSizeBits)
+        EltVal = Builder.CreateTrunc(EltVal,
+                                     IntegerType::get(SI->getContext(),
+                                                      ElementSizeBits));
+      Value *DestField = NewElts[i];
+      if (EltVal->getType() == ArrayEltTy) {
+        // Storing to an integer field of this size, just do it.
+      } else if (ArrayEltTy->isFloatingPointTy() ||
+                 ArrayEltTy->isVectorTy()) {
+        // Bitcast to the right element type (for fp/vector values).
+        EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy);
+      } else {
+        // Otherwise, bitcast the dest pointer (for aggregates).
+        DestField = Builder.CreateBitCast(DestField,
+                                     PointerType::getUnqual(EltVal->getType()));
+      }
+      new StoreInst(EltVal, DestField, SI);
+
+      if (TD->isBigEndian())
+        Shift -= ElementOffset;
+      else
+        Shift += ElementOffset;
+    }
+  }
+
+  DeadInsts.push_back(SI);
+}
+
+/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
+/// an integer.  Load the individual pieces to form the aggregate value.
+void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
+                                        SmallVector<AllocaInst*, 32> &NewElts) {
+  // Extract each element out of the NewElts according to its structure offset
+  // and form the result value.
+  const Type *AllocaEltTy = AI->getAllocatedType();
+  uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
+
+  DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
+               << '\n');
+
+  // There are two forms here: AI could be an array or struct.  Both cases
+  // have different ways to compute the element offset.
+  const StructLayout *Layout = 0;
+  uint64_t ArrayEltBitOffset = 0;
+  if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
+    Layout = TD->getStructLayout(EltSTy);
+  } else {
+    const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
+    ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
+  }
+
+  Value *ResultVal =
+    Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
+
+  for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
+    // Load the value from the alloca.  If the NewElt is an aggregate, cast
+    // the pointer to an integer of the same size before doing the load.
+    Value *SrcField = NewElts[i];
+    const Type *FieldTy =
+      cast<PointerType>(SrcField->getType())->getElementType();
+    uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
+
+    // Ignore zero sized fields like {}, they obviously contain no data.
+    if (FieldSizeBits == 0) continue;
+
+    const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
+                                                     FieldSizeBits);
+    if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
+        !FieldTy->isVectorTy())
+      SrcField = new BitCastInst(SrcField,
+                                 PointerType::getUnqual(FieldIntTy),
+                                 "", LI);
+    SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
+
+    // If SrcField is a fp or vector of the right size but that isn't an
+    // integer type, bitcast to an integer so we can shift it.
+    if (SrcField->getType() != FieldIntTy)
+      SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI);
+
+    // Zero extend the field to be the same size as the final alloca so that
+    // we can shift and insert it.
+    if (SrcField->getType() != ResultVal->getType())
+      SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
+
+    // Determine the number of bits to shift SrcField.
+    uint64_t Shift;
+    if (Layout) // Struct case.
+      Shift = Layout->getElementOffsetInBits(i);
+    else  // Array case.
+      Shift = i*ArrayEltBitOffset;
+
+    if (TD->isBigEndian())
+      Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
+
+    if (Shift) {
+      Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
+      SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
+    }
+
+    // Don't create an 'or x, 0' on the first iteration.
+    if (!isa<Constant>(ResultVal) ||
+        !cast<Constant>(ResultVal)->isNullValue())
+      ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
+    else
+      ResultVal = SrcField;
+  }
+
+  // Handle tail padding by truncating the result
+  if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
+    ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
+
+  LI->replaceAllUsesWith(ResultVal);
+  DeadInsts.push_back(LI);
+}
+
+/// HasPadding - Return true if the specified type has any structure or
+/// alignment padding in between the elements that would be split apart
+/// by SROA; return false otherwise.
+static bool HasPadding(const Type *Ty, const TargetData &TD) {
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    Ty = ATy->getElementType();
+    return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
+  }
+
+  // SROA currently handles only Arrays and Structs.
+  const StructType *STy = cast<StructType>(Ty);
+  const StructLayout *SL = TD.getStructLayout(STy);
+  unsigned PrevFieldBitOffset = 0;
+  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+    unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
+
+    // Check to see if there is any padding between this element and the
+    // previous one.
+    if (i) {
+      unsigned PrevFieldEnd =
+        PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
+      if (PrevFieldEnd < FieldBitOffset)
+        return true;
+    }
+    PrevFieldBitOffset = FieldBitOffset;
+  }
+  // Check for tail padding.
+  if (unsigned EltCount = STy->getNumElements()) {
+    unsigned PrevFieldEnd = PrevFieldBitOffset +
+      TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
+    if (PrevFieldEnd < SL->getSizeInBits())
+      return true;
+  }
+  return false;
+}
+
+/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
+/// an aggregate can be broken down into elements.  Return 0 if not, 3 if safe,
+/// or 1 if safe after canonicalization has been performed.
+bool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
+  // Loop over the use list of the alloca.  We can only transform it if all of
+  // the users are safe to transform.
+  AllocaInfo Info(AI);
+
+  isSafeForScalarRepl(AI, 0, Info);
+  if (Info.isUnsafe) {
+    DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
+    return false;
+  }
+
+  // Okay, we know all the users are promotable.  If the aggregate is a memcpy
+  // source and destination, we have to be careful.  In particular, the memcpy
+  // could be moving around elements that live in structure padding of the LLVM
+  // types, but may actually be used.  In these cases, we refuse to promote the
+  // struct.
+  if (Info.isMemCpySrc && Info.isMemCpyDst &&
+      HasPadding(AI->getAllocatedType(), *TD))
+    return false;
+
+  // If the alloca never has an access to just *part* of it, but is accessed
+  // via loads and stores, then we should use ConvertToScalarInfo to promote
+  // the alloca instead of promoting each piece at a time and inserting fission
+  // and fusion code.
+  if (!Info.hasSubelementAccess && Info.hasALoadOrStore) {
+    // If the struct/array just has one element, use basic SRoA.
+    if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
+      if (ST->getNumElements() > 1) return false;
+    } else {
+      if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1)
+        return false;
+    }
+  }
+  
+  return true;
+}
+
+
+
+/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
+/// some part of a constant global variable.  This intentionally only accepts
+/// constant expressions because we don't can't rewrite arbitrary instructions.
+static bool PointsToConstantGlobal(Value *V) {
+  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    return GV->isConstant();
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+    if (CE->getOpcode() == Instruction::BitCast ||
+        CE->getOpcode() == Instruction::GetElementPtr)
+      return PointsToConstantGlobal(CE->getOperand(0));
+  return false;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
+/// pointer to an alloca.  Ignore any reads of the pointer, return false if we
+/// see any stores or other unknown uses.  If we see pointer arithmetic, keep
+/// track of whether it moves the pointer (with isOffset) but otherwise traverse
+/// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
+/// the alloca, and if the source pointer is a pointer to a constant global, we
+/// can optimize this.
+static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
+                                           bool isOffset) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
+    User *U = cast<Instruction>(*UI);
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      // Ignore non-volatile loads, they are always ok.
+      if (LI->isVolatile()) return false;
+      continue;
+    }
+
+    if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+      // If uses of the bitcast are ok, we are ok.
+      if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
+        return false;
+      continue;
+    }
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      // If the GEP has all zero indices, it doesn't offset the pointer.  If it
+      // doesn't, it does.
+      if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
+                                         isOffset || !GEP->hasAllZeroIndices()))
+        return false;
+      continue;
+    }
+
+    if (CallSite CS = U) {
+      // If this is a readonly/readnone call site, then we know it is just a
+      // load and we can ignore it.
+      if (CS.onlyReadsMemory())
+        continue;
+
+      // If this is the function being called then we treat it like a load and
+      // ignore it.
+      if (CS.isCallee(UI))
+        continue;
+
+      // If this is being passed as a byval argument, the caller is making a
+      // copy, so it is only a read of the alloca.
+      unsigned ArgNo = CS.getArgumentNo(UI);
+      if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal))
+        continue;
+    }
+
+    // If this is isn't our memcpy/memmove, reject it as something we can't
+    // handle.
+    MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
+    if (MI == 0)
+      return false;
+
+    // If the transfer is using the alloca as a source of the transfer, then
+    // ignore it since it is a load (unless the transfer is volatile).
+    if (UI.getOperandNo() == 1) {
+      if (MI->isVolatile()) return false;
+      continue;
+    }
+
+    // If we already have seen a copy, reject the second one.
+    if (TheCopy) return false;
+
+    // If the pointer has been offset from the start of the alloca, we can't
+    // safely handle this.
+    if (isOffset) return false;
+
+    // If the memintrinsic isn't using the alloca as the dest, reject it.
+    if (UI.getOperandNo() != 0) return false;
+
+    // If the source of the memcpy/move is not a constant global, reject it.
+    if (!PointsToConstantGlobal(MI->getSource()))
+      return false;
+
+    // Otherwise, the transform is safe.  Remember the copy instruction.
+    TheCopy = MI;
+  }
+  return true;
+}
+
+/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
+/// modified by a copy from a constant global.  If we can prove this, we can
+/// replace any uses of the alloca with uses of the global directly.
+MemTransferInst *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) {
+  MemTransferInst *TheCopy = 0;
+  if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false))
+    return TheCopy;
+  return 0;
+}
diff --git a/final/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/final/lib/Transforms/Scalar/SimplifyCFGPass.cpp
new file mode 100644
index 00000000000..ce5dd73ace3
--- /dev/null
+++ b/final/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -0,0 +1,329 @@
+//===- SimplifyCFGPass.cpp - CFG Simplification Pass ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements dead code elimination and basic block merging, along
+// with a collection of other peephole control flow optimizations.  For example:
+//
+//   * Removes basic blocks with no predecessors.
+//   * Merges a basic block into its predecessor if there is only one and the
+//     predecessor only has one successor.
+//   * Eliminates PHI nodes for basic blocks with a single predecessor.
+//   * Eliminates a basic block that only contains an unconditional branch.
+//   * Changes invoke instructions to nounwind functions to be calls.
+//   * Change things like "if (x) if (y)" into "if (x&y)".
+//   * etc..
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifycfg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Module.h"
+#include "llvm/Attributes.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumSimpl, "Number of blocks simplified");
+
+namespace {
+  struct CFGSimplifyPass : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    CFGSimplifyPass() : FunctionPass(ID) {
+      initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+  };
+}
+
+char CFGSimplifyPass::ID = 0;
+INITIALIZE_PASS(CFGSimplifyPass, "simplifycfg",
+                "Simplify the CFG", false, false)
+
+// Public interface to the CFGSimplification pass
+FunctionPass *llvm::createCFGSimplificationPass() {
+  return new CFGSimplifyPass();
+}
+
+/// ChangeToUnreachable - Insert an unreachable instruction before the specified
+/// instruction, making it and the rest of the code in the block dead.
+static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+  BasicBlock *BB = I->getParent();
+  // Loop over all of the successors, removing BB's entry from any PHI
+  // nodes.
+  for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+    (*SI)->removePredecessor(BB);
+  
+  // Insert a call to llvm.trap right before this.  This turns the undefined
+  // behavior into a hard fail instead of falling through into random code.
+  if (UseLLVMTrap) {
+    Function *TrapFn =
+      Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
+    CallInst::Create(TrapFn, "", I);
+  }
+  new UnreachableInst(I->getContext(), I);
+  
+  // All instructions after this are dead.
+  BasicBlock::iterator BBI = I, BBE = BB->end();
+  while (BBI != BBE) {
+    if (!BBI->use_empty())
+      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
+    BB->getInstList().erase(BBI++);
+  }
+}
+
+/// ChangeToCall - Convert the specified invoke into a normal call.
+static void ChangeToCall(InvokeInst *II) {
+  BasicBlock *BB = II->getParent();
+  SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
+  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args.begin(),
+                                       Args.end(), "", II);
+  NewCall->takeName(II);
+  NewCall->setCallingConv(II->getCallingConv());
+  NewCall->setAttributes(II->getAttributes());
+  II->replaceAllUsesWith(NewCall);
+
+  // Follow the call by a branch to the normal destination.
+  BranchInst::Create(II->getNormalDest(), II);
+
+  // Update PHI nodes in the unwind destination
+  II->getUnwindDest()->removePredecessor(BB);
+  BB->getInstList().erase(II);
+}
+
+static bool MarkAliveBlocks(BasicBlock *BB,
+                            SmallPtrSet<BasicBlock*, 128> &Reachable) {
+  
+  SmallVector<BasicBlock*, 128> Worklist;
+  Worklist.push_back(BB);
+  bool Changed = false;
+  do {
+    BB = Worklist.pop_back_val();
+    
+    if (!Reachable.insert(BB))
+      continue;
+
+    // Do a quick scan of the basic block, turning any obviously unreachable
+    // instructions into LLVM unreachable insts.  The instruction combining pass
+    // canonicalizes unreachable insts into stores to null or undef.
+    for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;++BBI){
+      if (CallInst *CI = dyn_cast<CallInst>(BBI)) {
+        if (CI->doesNotReturn()) {
+          // If we found a call to a no-return function, insert an unreachable
+          // instruction after it.  Make sure there isn't *already* one there
+          // though.
+          ++BBI;
+          if (!isa<UnreachableInst>(BBI)) {
+            // Don't insert a call to llvm.trap right before the unreachable.
+            ChangeToUnreachable(BBI, false);
+            Changed = true;
+          }
+          break;
+        }
+      }
+      
+      // Store to undef and store to null are undefined and used to signal that
+      // they should be changed to unreachable by passes that can't modify the
+      // CFG.
+      if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+        // Don't touch volatile stores.
+        if (SI->isVolatile()) continue;
+
+        Value *Ptr = SI->getOperand(1);
+        
+        if (isa<UndefValue>(Ptr) ||
+            (isa<ConstantPointerNull>(Ptr) &&
+             SI->getPointerAddressSpace() == 0)) {
+          ChangeToUnreachable(SI, true);
+          Changed = true;
+          break;
+        }
+      }
+    }
+
+    // Turn invokes that call 'nounwind' functions into ordinary calls.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
+      if (II->doesNotThrow()) {
+        ChangeToCall(II);
+        Changed = true;
+      }
+
+    Changed |= ConstantFoldTerminator(BB);
+    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+      Worklist.push_back(*SI);
+  } while (!Worklist.empty());
+  return Changed;
+}
+
+/// RemoveUnreachableBlocksFromFn - Remove blocks that are not reachable, even 
+/// if they are in a dead cycle.  Return true if a change was made, false 
+/// otherwise.
+static bool RemoveUnreachableBlocksFromFn(Function &F) {
+  SmallPtrSet<BasicBlock*, 128> Reachable;
+  bool Changed = MarkAliveBlocks(F.begin(), Reachable);
+  
+  // If there are unreachable blocks in the CFG...
+  if (Reachable.size() == F.size())
+    return Changed;
+  
+  assert(Reachable.size() < F.size());
+  NumSimpl += F.size()-Reachable.size();
+  
+  // Loop over all of the basic blocks that are not reachable, dropping all of
+  // their internal references...
+  for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) {
+    if (Reachable.count(BB))
+      continue;
+    
+    for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
+      if (Reachable.count(*SI))
+        (*SI)->removePredecessor(BB);
+    BB->dropAllReferences();
+  }
+  
+  for (Function::iterator I = ++F.begin(); I != F.end();)
+    if (!Reachable.count(I))
+      I = F.getBasicBlockList().erase(I);
+    else
+      ++I;
+  
+  return true;
+}
+
+/// MergeEmptyReturnBlocks - If we have more than one empty (other than phi
+/// node) return blocks, merge them together to promote recursive block merging.
+static bool MergeEmptyReturnBlocks(Function &F) {
+  bool Changed = false;
+  
+  BasicBlock *RetBlock = 0;
+  
+  // Scan all the blocks in the function, looking for empty return blocks.
+  for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) {
+    BasicBlock &BB = *BBI++;
+    
+    // Only look at return blocks.
+    ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
+    if (Ret == 0) continue;
+    
+    // Only look at the block if it is empty or the only other thing in it is a
+    // single PHI node that is the operand to the return.
+    if (Ret != &BB.front()) {
+      // Check for something else in the block.
+      BasicBlock::iterator I = Ret;
+      --I;
+      // Skip over debug info.
+      while (isa<DbgInfoIntrinsic>(I) && I != BB.begin())
+        --I;
+      if (!isa<DbgInfoIntrinsic>(I) &&
+          (!isa<PHINode>(I) || I != BB.begin() ||
+           Ret->getNumOperands() == 0 ||
+           Ret->getOperand(0) != I))
+        continue;
+    }
+
+    // If this is the first returning block, remember it and keep going.
+    if (RetBlock == 0) {
+      RetBlock = &BB;
+      continue;
+    }
+    
+    // Otherwise, we found a duplicate return block.  Merge the two.
+    Changed = true;
+    
+    // Case when there is no input to the return or when the returned values
+    // agree is trivial.  Note that they can't agree if there are phis in the
+    // blocks.
+    if (Ret->getNumOperands() == 0 ||
+        Ret->getOperand(0) == 
+          cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) {
+      BB.replaceAllUsesWith(RetBlock);
+      BB.eraseFromParent();
+      continue;
+    }
+    
+    // If the canonical return block has no PHI node, create one now.
+    PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
+    if (RetBlockPHI == 0) {
+      Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0);
+      RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), "merge",
+                                    &RetBlock->front());
+      
+      for (pred_iterator PI = pred_begin(RetBlock), E = pred_end(RetBlock);
+           PI != E; ++PI)
+        RetBlockPHI->addIncoming(InVal, *PI);
+      RetBlock->getTerminator()->setOperand(0, RetBlockPHI);
+    }
+    
+    // Turn BB into a block that just unconditionally branches to the return
+    // block.  This handles the case when the two return blocks have a common
+    // predecessor but that return different things.
+    RetBlockPHI->addIncoming(Ret->getOperand(0), &BB);
+    BB.getTerminator()->eraseFromParent();
+    BranchInst::Create(RetBlock, &BB);
+  }
+  
+  return Changed;
+}
+
+/// IterativeSimplifyCFG - Call SimplifyCFG on all the blocks in the function,
+/// iterating until no more changes are made.
+static bool IterativeSimplifyCFG(Function &F, const TargetData *TD) {
+  bool Changed = false;
+  bool LocalChange = true;
+  while (LocalChange) {
+    LocalChange = false;
+    
+    // Loop over all of the basic blocks and remove them if they are unneeded...
+    //
+    for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
+      if (SimplifyCFG(BBIt++, TD)) {
+        LocalChange = true;
+        ++NumSimpl;
+      }
+    }
+    Changed |= LocalChange;
+  }
+  return Changed;
+}
+
+// It is possible that we may require multiple passes over the code to fully
+// simplify the CFG.
+//
+bool CFGSimplifyPass::runOnFunction(Function &F) {
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+  bool EverChanged = RemoveUnreachableBlocksFromFn(F);
+  EverChanged |= MergeEmptyReturnBlocks(F);
+  EverChanged |= IterativeSimplifyCFG(F, TD);
+
+  // If neither pass changed anything, we're done.
+  if (!EverChanged) return false;
+
+  // IterativeSimplifyCFG can (rarely) make some loops dead.  If this happens,
+  // RemoveUnreachableBlocksFromFn is needed to nuke them, which means we should
+  // iterate between the two optimizations.  We structure the code like this to
+  // avoid reruning IterativeSimplifyCFG if the second pass of 
+  // RemoveUnreachableBlocksFromFn doesn't do anything.
+  if (!RemoveUnreachableBlocksFromFn(F))
+    return true;
+
+  do {
+    EverChanged = IterativeSimplifyCFG(F, TD);
+    EverChanged |= RemoveUnreachableBlocksFromFn(F);
+  } while (EverChanged);
+
+  return true;
+}
diff --git a/final/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/final/lib/Transforms/Scalar/SimplifyLibCalls.cpp
new file mode 100644
index 00000000000..54eaada3e43
--- /dev/null
+++ b/final/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -0,0 +1,2388 @@
+//===- SimplifyLibCalls.cpp - Optimize specific well-known library calls --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple pass that applies a variety of small
+// optimizations for calls to specific well-known function calls (e.g. runtime
+// library functions).   Any optimization that takes the very simple form
+// "replace call to library function with simpler code that provides the same
+// result" belongs in this file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplify-libcalls"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Config/config.h"            // FIXME: Shouldn't depend on host!
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of library calls simplified");
+STATISTIC(NumAnnotated, "Number of attributes added to library functions");
+
+//===----------------------------------------------------------------------===//
+// Optimizer Base Class
+//===----------------------------------------------------------------------===//
+
+/// This class is the abstract base class for the set of optimizations that
+/// corresponds to one library call.
+namespace {
+class LibCallOptimization {
+protected:
+  Function *Caller;
+  const TargetData *TD;
+  const TargetLibraryInfo *TLI;
+  LLVMContext* Context;
+public:
+  LibCallOptimization() { }
+  virtual ~LibCallOptimization() {}
+
+  /// CallOptimizer - This pure virtual method is implemented by base classes to
+  /// do various optimizations.  If this returns null then no transformation was
+  /// performed.  If it returns CI, then it transformed the call and CI is to be
+  /// deleted.  If it returns something else, replace CI with the new value and
+  /// delete CI.
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B)
+    =0;
+
+  Value *OptimizeCall(CallInst *CI, const TargetData *TD,
+                      const TargetLibraryInfo *TLI, IRBuilder<> &B) {
+    Caller = CI->getParent()->getParent();
+    this->TD = TD;
+    this->TLI = TLI;
+    if (CI->getCalledFunction())
+      Context = &CI->getCalledFunction()->getContext();
+
+    // We never change the calling convention.
+    if (CI->getCallingConv() != llvm::CallingConv::C)
+      return NULL;
+
+    return CallOptimizer(CI->getCalledFunction(), CI, B);
+  }
+};
+} // End anonymous namespace.
+
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
+/// value is equal or not-equal to zero.
+static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+      if (IC->isEquality())
+        if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
+          if (C->isNullValue())
+            continue;
+    // Unknown instruction.
+    return false;
+  }
+  return true;
+}
+ 
+static bool CallHasFloatingPointArgument(const CallInst *CI) {
+  for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end();
+       it != e; ++it) {
+    if ((*it)->getType()->isFloatingPointTy())
+      return true;
+  }
+  return false;
+}
+
+/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality
+/// comparisons with With.
+static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) {
+  for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+       UI != E; ++UI) {
+    if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+      if (IC->isEquality() && IC->getOperand(1) == With)
+        continue;
+    // Unknown instruction.
+    return false;
+  }
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+// String and Memory LibCall Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'strcat' Optimizations
+namespace {
+struct StrCatOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strcat" function prototype.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        FT->getParamType(1) != FT->getReturnType())
+      return 0;
+
+    // Extract some information from the instruction
+    Value *Dst = CI->getArgOperand(0);
+    Value *Src = CI->getArgOperand(1);
+
+    // See if we can get the length of the input string.
+    uint64_t Len = GetStringLength(Src);
+    if (Len == 0) return 0;
+    --Len;  // Unbias length.
+
+    // Handle the simple, do-nothing case: strcat(x, "") -> x
+    if (Len == 0)
+      return Dst;
+
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    EmitStrLenMemCpy(Src, Dst, Len, B);
+    return Dst;
+  }
+
+  void EmitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len, IRBuilder<> &B) {
+    // We need to find the end of the destination string.  That's where the
+    // memory is to be moved to. We just generate a call to strlen.
+    Value *DstLen = EmitStrLen(Dst, B, TD);
+
+    // Now that we have the destination's length, we must index into the
+    // destination's pointer to get the actual memcpy destination (end of
+    // the string .. we're concatenating).
+    Value *CpyDst = B.CreateGEP(Dst, DstLen, "endptr");
+
+    // We have enough information to now generate the memcpy call to do the
+    // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
+    B.CreateMemCpy(CpyDst, Src,
+                   ConstantInt::get(TD->getIntPtrType(*Context), Len + 1), 1);
+  }
+};
+
+//===---------------------------------------===//
+// 'strncat' Optimizations
+
+struct StrNCatOpt : public StrCatOpt {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strncat" function prototype.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        FT->getParamType(1) != FT->getReturnType() ||
+        !FT->getParamType(2)->isIntegerTy())
+      return 0;
+
+    // Extract some information from the instruction
+    Value *Dst = CI->getArgOperand(0);
+    Value *Src = CI->getArgOperand(1);
+    uint64_t Len;
+
+    // We don't do anything if length is not constant
+    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+      Len = LengthArg->getZExtValue();
+    else
+      return 0;
+
+    // See if we can get the length of the input string.
+    uint64_t SrcLen = GetStringLength(Src);
+    if (SrcLen == 0) return 0;
+    --SrcLen;  // Unbias length.
+
+    // Handle the simple, do-nothing cases:
+    // strncat(x, "", c) -> x
+    // strncat(x,  c, 0) -> x
+    if (SrcLen == 0 || Len == 0) return Dst;
+
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    // We don't optimize this case
+    if (Len < SrcLen) return 0;
+
+    // strncat(x, s, c) -> strcat(x, s)
+    // s is constant so the strcat can be optimized further
+    EmitStrLenMemCpy(Src, Dst, SrcLen, B);
+    return Dst;
+  }
+};
+
+//===---------------------------------------===//
+// 'strchr' Optimizations
+
+struct StrChrOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strchr" function prototype.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        !FT->getParamType(1)->isIntegerTy(32))
+      return 0;
+
+    Value *SrcStr = CI->getArgOperand(0);
+
+    // If the second operand is non-constant, see if we can compute the length
+    // of the input string and turn this into memchr.
+    ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+    if (CharC == 0) {
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
+      uint64_t Len = GetStringLength(SrcStr);
+      if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
+        return 0;
+
+      return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
+                        ConstantInt::get(TD->getIntPtrType(*Context), Len),
+                        B, TD);
+    }
+
+    // Otherwise, the character is a constant, see if the first argument is
+    // a string literal.  If so, we can constant fold.
+    std::string Str;
+    if (!GetConstantStringInfo(SrcStr, Str))
+      return 0;
+
+    // strchr can find the nul character.
+    Str += '\0';
+
+    // Compute the offset.
+    size_t I = Str.find(CharC->getSExtValue());
+    if (I == std::string::npos) // Didn't find the char.  strchr returns null.
+      return Constant::getNullValue(CI->getType());
+
+    // strchr(s+n,c)  -> gep(s+n+i,c)
+    return B.CreateGEP(SrcStr, B.getInt64(I), "strchr");
+  }
+};
+
+//===---------------------------------------===//
+// 'strrchr' Optimizations
+
+struct StrRChrOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strrchr" function prototype.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getReturnType() != B.getInt8PtrTy() ||
+        FT->getParamType(0) != FT->getReturnType() ||
+        !FT->getParamType(1)->isIntegerTy(32))
+      return 0;
+
+    Value *SrcStr = CI->getArgOperand(0);
+    ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+
+    // Cannot fold anything if we're not looking for a constant.
+    if (!CharC)
+      return 0;
+
+    std::string Str;
+    if (!GetConstantStringInfo(SrcStr, Str)) {
+      // strrchr(s, 0) -> strchr(s, 0)
+      if (TD && CharC->isZero())
+        return EmitStrChr(SrcStr, '\0', B, TD);
+      return 0;
+    }
+
+    // strrchr can find the nul character.
+    Str += '\0';
+
+    // Compute the offset.
+    size_t I = Str.rfind(CharC->getSExtValue());
+    if (I == std::string::npos) // Didn't find the char. Return null.
+      return Constant::getNullValue(CI->getType());
+
+    // strrchr(s+n,c) -> gep(s+n+i,c)
+    return B.CreateGEP(SrcStr, B.getInt64(I), "strrchr");
+  }
+};
+
+//===---------------------------------------===//
+// 'strcmp' Optimizations
+
+struct StrCmpOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strcmp" function prototype.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        !FT->getReturnType()->isIntegerTy(32) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != B.getInt8PtrTy())
+      return 0;
+
+    Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+    if (Str1P == Str2P)      // strcmp(x,x)  -> 0
+      return ConstantInt::get(CI->getType(), 0);
+
+    std::string Str1, Str2;
+    bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
+    bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
+
+    if (HasStr1 && Str1.empty()) // strcmp("", x) -> *x
+      return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
+
+    if (HasStr2 && Str2.empty()) // strcmp(x,"") -> *x
+      return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+    // strcmp(x, y)  -> cnst  (if both x and y are constant strings)
+    if (HasStr1 && HasStr2)
+      return ConstantInt::get(CI->getType(),
+                                     strcmp(Str1.c_str(),Str2.c_str()));
+
+    // strcmp(P, "x") -> memcmp(P, "x", 2)
+    uint64_t Len1 = GetStringLength(Str1P);
+    uint64_t Len2 = GetStringLength(Str2P);
+    if (Len1 && Len2) {
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
+      return EmitMemCmp(Str1P, Str2P,
+                        ConstantInt::get(TD->getIntPtrType(*Context),
+                        std::min(Len1, Len2)), B, TD);
+    }
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'strncmp' Optimizations
+
+struct StrNCmpOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strncmp" function prototype.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 ||
+        !FT->getReturnType()->isIntegerTy(32) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        !FT->getParamType(2)->isIntegerTy())
+      return 0;
+
+    Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+    if (Str1P == Str2P)      // strncmp(x,x,n)  -> 0
+      return ConstantInt::get(CI->getType(), 0);
+
+    // Get the length argument if it is constant.
+    uint64_t Length;
+    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
+      Length = LengthArg->getZExtValue();
+    else
+      return 0;
+
+    if (Length == 0) // strncmp(x,y,0)   -> 0
+      return ConstantInt::get(CI->getType(), 0);
+
+    if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+      return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD);
+
+    std::string Str1, Str2;
+    bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
+    bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
+
+    if (HasStr1 && Str1.empty())  // strncmp("", x, n) -> *x
+      return B.CreateZExt(B.CreateLoad(Str2P, "strcmpload"), CI->getType());
+
+    if (HasStr2 && Str2.empty())  // strncmp(x, "", n) -> *x
+      return B.CreateZExt(B.CreateLoad(Str1P, "strcmpload"), CI->getType());
+
+    // strncmp(x, y)  -> cnst  (if both x and y are constant strings)
+    if (HasStr1 && HasStr2)
+      return ConstantInt::get(CI->getType(),
+                              strncmp(Str1.c_str(), Str2.c_str(), Length));
+    return 0;
+  }
+};
+
+
+//===---------------------------------------===//
+// 'strcpy' Optimizations
+
+struct StrCpyOpt : public LibCallOptimization {
+  bool OptChkCall;  // True if it's optimizing a __strcpy_chk libcall.
+
+  StrCpyOpt(bool c) : OptChkCall(c) {}
+
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Verify the "strcpy" function prototype.
+    unsigned NumParams = OptChkCall ? 3 : 2;
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != NumParams ||
+        FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != B.getInt8PtrTy())
+      return 0;
+
+    Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
+    if (Dst == Src)      // strcpy(x,x)  -> x
+      return Src;
+
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    // See if we can get the length of the input string.
+    uint64_t Len = GetStringLength(Src);
+    if (Len == 0) return 0;
+
+    // We have enough information to now generate the memcpy call to do the
+    // concatenation for us.  Make a memcpy to copy the nul byte with align = 1.
+    if (OptChkCall)
+      EmitMemCpyChk(Dst, Src,
+                    ConstantInt::get(TD->getIntPtrType(*Context), Len),
+                    CI->getArgOperand(2), B, TD);
+    else
+      B.CreateMemCpy(Dst, Src,
+                     ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+    return Dst;
+  }
+};
+
+//===---------------------------------------===//
+// 'strncpy' Optimizations
+
+struct StrNCpyOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        !FT->getParamType(2)->isIntegerTy())
+      return 0;
+
+    Value *Dst = CI->getArgOperand(0);
+    Value *Src = CI->getArgOperand(1);
+    Value *LenOp = CI->getArgOperand(2);
+
+    // See if we can get the length of the input string.
+    uint64_t SrcLen = GetStringLength(Src);
+    if (SrcLen == 0) return 0;
+    --SrcLen;
+
+    if (SrcLen == 0) {
+      // strncpy(x, "", y) -> memset(x, '\0', y, 1)
+      B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);
+      return Dst;
+    }
+
+    uint64_t Len;
+    if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(LenOp))
+      Len = LengthArg->getZExtValue();
+    else
+      return 0;
+
+    if (Len == 0) return Dst; // strncpy(x, y, 0) -> x
+
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    // Let strncpy handle the zero padding
+    if (Len > SrcLen+1) return 0;
+
+    // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant]
+    B.CreateMemCpy(Dst, Src,
+                   ConstantInt::get(TD->getIntPtrType(*Context), Len), 1);
+
+    return Dst;
+  }
+};
+
+//===---------------------------------------===//
+// 'strlen' Optimizations
+
+struct StrLenOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 1 ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    Value *Src = CI->getArgOperand(0);
+
+    // Constant folding: strlen("xyz") -> 3
+    if (uint64_t Len = GetStringLength(Src))
+      return ConstantInt::get(CI->getType(), Len-1);
+
+    // strlen(x) != 0 --> *x != 0
+    // strlen(x) == 0 --> *x == 0
+    if (IsOnlyUsedInZeroEqualityComparison(CI))
+      return B.CreateZExt(B.CreateLoad(Src, "strlenfirst"), CI->getType());
+    return 0;
+  }
+};
+
+
+//===---------------------------------------===//
+// 'strpbrk' Optimizations
+
+struct StrPBrkOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        FT->getParamType(1) != FT->getParamType(0) ||
+        FT->getReturnType() != FT->getParamType(0))
+      return 0;
+
+    std::string S1, S2;
+    bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+    bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+    // strpbrk(s, "") -> NULL
+    // strpbrk("", s) -> NULL
+    if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+      return Constant::getNullValue(CI->getType());
+
+    // Constant folding.
+    if (HasS1 && HasS2) {
+      size_t I = S1.find_first_of(S2);
+      if (I == std::string::npos) // No match.
+        return Constant::getNullValue(CI->getType());
+
+      return B.CreateGEP(CI->getArgOperand(0), B.getInt64(I), "strpbrk");
+    }
+
+    // strpbrk(s, "a") -> strchr(s, 'a')
+    if (TD && HasS2 && S2.size() == 1)
+      return EmitStrChr(CI->getArgOperand(0), S2[0], B, TD);
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'strto*' Optimizations.  This handles strtol, strtod, strtof, strtoul, etc.
+
+struct StrToOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy())
+      return 0;
+
+    Value *EndPtr = CI->getArgOperand(1);
+    if (isa<ConstantPointerNull>(EndPtr)) {
+      // With a null EndPtr, this function won't capture the main argument.
+      // It would be readonly too, except that it still may write to errno.
+      CI->addAttribute(1, Attribute::NoCapture);
+    }
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'strspn' Optimizations
+
+struct StrSpnOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        FT->getParamType(1) != FT->getParamType(0) ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    std::string S1, S2;
+    bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+    bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+    // strspn(s, "") -> 0
+    // strspn("", s) -> 0
+    if ((HasS1 && S1.empty()) || (HasS2 && S2.empty()))
+      return Constant::getNullValue(CI->getType());
+
+    // Constant folding.
+    if (HasS1 && HasS2)
+      return ConstantInt::get(CI->getType(), strspn(S1.c_str(), S2.c_str()));
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'strcspn' Optimizations
+
+struct StrCSpnOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        FT->getParamType(0) != B.getInt8PtrTy() ||
+        FT->getParamType(1) != FT->getParamType(0) ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    std::string S1, S2;
+    bool HasS1 = GetConstantStringInfo(CI->getArgOperand(0), S1);
+    bool HasS2 = GetConstantStringInfo(CI->getArgOperand(1), S2);
+
+    // strcspn("", s) -> 0
+    if (HasS1 && S1.empty())
+      return Constant::getNullValue(CI->getType());
+
+    // Constant folding.
+    if (HasS1 && HasS2)
+      return ConstantInt::get(CI->getType(), strcspn(S1.c_str(), S2.c_str()));
+
+    // strcspn(s, "") -> strlen(s)
+    if (TD && HasS2 && S2.empty())
+      return EmitStrLen(CI->getArgOperand(0), B, TD);
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'strstr' Optimizations
+
+struct StrStrOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isPointerTy())
+      return 0;
+
+    // fold strstr(x, x) -> x.
+    if (CI->getArgOperand(0) == CI->getArgOperand(1))
+      return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+    // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
+    if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+      Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD);
+      Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
+                                   StrLen, B, TD);
+      for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
+           UI != UE; ) {
+        ICmpInst *Old = cast<ICmpInst>(*UI++);
+        Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
+                                  ConstantInt::getNullValue(StrNCmp->getType()),
+                                  "cmp");
+        Old->replaceAllUsesWith(Cmp);
+        Old->eraseFromParent();
+      }
+      return CI;
+    }
+
+    // See if either input string is a constant string.
+    std::string SearchStr, ToFindStr;
+    bool HasStr1 = GetConstantStringInfo(CI->getArgOperand(0), SearchStr);
+    bool HasStr2 = GetConstantStringInfo(CI->getArgOperand(1), ToFindStr);
+
+    // fold strstr(x, "") -> x.
+    if (HasStr2 && ToFindStr.empty())
+      return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+    // If both strings are known, constant fold it.
+    if (HasStr1 && HasStr2) {
+      std::string::size_type Offset = SearchStr.find(ToFindStr);
+
+      if (Offset == std::string::npos) // strstr("foo", "bar") -> null
+        return Constant::getNullValue(CI->getType());
+
+      // strstr("abcd", "bc") -> gep((char*)"abcd", 1)
+      Value *Result = CastToCStr(CI->getArgOperand(0), B);
+      Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
+      return B.CreateBitCast(Result, CI->getType());
+    }
+
+    // fold strstr(x, "y") -> strchr(x, 'y').
+    if (HasStr2 && ToFindStr.size() == 1)
+      return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0),
+                             ToFindStr[0], B, TD), CI->getType());
+    return 0;
+  }
+};
+
+
+//===---------------------------------------===//
+// 'memcmp' Optimizations
+
+struct MemCmpOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy(32))
+      return 0;
+
+    Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
+
+    if (LHS == RHS)  // memcmp(s,s,x) -> 0
+      return Constant::getNullValue(CI->getType());
+
+    // Make sure we have a constant length.
+    ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+    if (!LenC) return 0;
+    uint64_t Len = LenC->getZExtValue();
+
+    if (Len == 0) // memcmp(s1,s2,0) -> 0
+      return Constant::getNullValue(CI->getType());
+
+    // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
+    if (Len == 1) {
+      Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"),
+                                 CI->getType(), "lhsv");
+      Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"),
+                                 CI->getType(), "rhsv");
+      return B.CreateSub(LHSV, RHSV, "chardiff");
+    }
+
+    // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
+    std::string LHSStr, RHSStr;
+    if (GetConstantStringInfo(LHS, LHSStr) &&
+        GetConstantStringInfo(RHS, RHSStr)) {
+      // Make sure we're not reading out-of-bounds memory.
+      if (Len > LHSStr.length() || Len > RHSStr.length())
+        return 0;
+      uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len);
+      return ConstantInt::get(CI->getType(), Ret);
+    }
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'memcpy' Optimizations
+
+struct MemCpyOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(*Context))
+      return 0;
+
+    // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
+    B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                   CI->getArgOperand(2), 1);
+    return CI->getArgOperand(0);
+  }
+};
+
+//===---------------------------------------===//
+// 'memmove' Optimizations
+
+struct MemMoveOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(*Context))
+      return 0;
+
+    // memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
+    B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+                    CI->getArgOperand(2), 1);
+    return CI->getArgOperand(0);
+  }
+};
+
+//===---------------------------------------===//
+// 'memset' Optimizations
+
+struct MemSetOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isIntegerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(*Context))
+      return 0;
+
+    // memset(p, v, n) -> llvm.memset(p, v, n, 1)
+    Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);
+    B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+    return CI->getArgOperand(0);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Math Library Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'pow*' Optimizations
+
+struct PowOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    // Just make sure this has 2 arguments of the same FP type, which match the
+    // result type.
+    if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        !FT->getParamType(0)->isFloatingPointTy())
+      return 0;
+
+    Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
+    if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
+      if (Op1C->isExactlyValue(1.0))  // pow(1.0, x) -> 1.0
+        return Op1C;
+      if (Op1C->isExactlyValue(2.0))  // pow(2.0, x) -> exp2(x)
+        return EmitUnaryFloatFnCall(Op2, "exp2", B, Callee->getAttributes());
+    }
+
+    ConstantFP *Op2C = dyn_cast<ConstantFP>(Op2);
+    if (Op2C == 0) return 0;
+
+    if (Op2C->getValueAPF().isZero())  // pow(x, 0.0) -> 1.0
+      return ConstantFP::get(CI->getType(), 1.0);
+
+    if (Op2C->isExactlyValue(0.5)) {
+      // Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
+      // This is faster than calling pow, and still handles negative zero
+      // and negative infinite correctly.
+      // TODO: In fast-math mode, this could be just sqrt(x).
+      // TODO: In finite-only mode, this could be just fabs(sqrt(x)).
+      Value *Inf = ConstantFP::getInfinity(CI->getType());
+      Value *NegInf = ConstantFP::getInfinity(CI->getType(), true);
+      Value *Sqrt = EmitUnaryFloatFnCall(Op1, "sqrt", B,
+                                         Callee->getAttributes());
+      Value *FAbs = EmitUnaryFloatFnCall(Sqrt, "fabs", B,
+                                         Callee->getAttributes());
+      Value *FCmp = B.CreateFCmpOEQ(Op1, NegInf, "tmp");
+      Value *Sel = B.CreateSelect(FCmp, Inf, FAbs, "tmp");
+      return Sel;
+    }
+
+    if (Op2C->isExactlyValue(1.0))  // pow(x, 1.0) -> x
+      return Op1;
+    if (Op2C->isExactlyValue(2.0))  // pow(x, 2.0) -> x*x
+      return B.CreateFMul(Op1, Op1, "pow2");
+    if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x
+      return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0),
+                          Op1, "powrecip");
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'exp2' Optimizations
+
+struct Exp2Opt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    // Just make sure this has 1 argument of FP type, which matches the
+    // result type.
+    if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isFloatingPointTy())
+      return 0;
+
+    Value *Op = CI->getArgOperand(0);
+    // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x))  if sizeof(x) <= 32
+    // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x))  if sizeof(x) < 32
+    Value *LdExpArg = 0;
+    if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
+      if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
+        LdExpArg = B.CreateSExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
+    } else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
+      if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
+        LdExpArg = B.CreateZExt(OpC->getOperand(0), B.getInt32Ty(), "tmp");
+    }
+
+    if (LdExpArg) {
+      const char *Name;
+      if (Op->getType()->isFloatTy())
+        Name = "ldexpf";
+      else if (Op->getType()->isDoubleTy())
+        Name = "ldexp";
+      else
+        Name = "ldexpl";
+
+      Constant *One = ConstantFP::get(*Context, APFloat(1.0f));
+      if (!Op->getType()->isFloatTy())
+        One = ConstantExpr::getFPExtend(One, Op->getType());
+
+      Module *M = Caller->getParent();
+      Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+                                             Op->getType(),
+                                             B.getInt32Ty(), NULL);
+      CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+      if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+        CI->setCallingConv(F->getCallingConv());
+
+      return CI;
+    }
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
+
+struct UnaryDoubleFPOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() ||
+        !FT->getParamType(0)->isDoubleTy())
+      return 0;
+
+    // If this is something like 'floor((double)floatval)', convert to floorf.
+    FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
+    if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
+      return 0;
+
+    // floor((double)floatval) -> (double)floorf(floatval)
+    Value *V = Cast->getOperand(0);
+    V = EmitUnaryFloatFnCall(V, Callee->getName().data(), B,
+                             Callee->getAttributes());
+    return B.CreateFPExt(V, B.getDoubleTy());
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Integer Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'ffs*' Optimizations
+
+struct FFSOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    // Just make sure this has 2 arguments of the same FP type, which match the
+    // result type.
+    if (FT->getNumParams() != 1 ||
+        !FT->getReturnType()->isIntegerTy(32) ||
+        !FT->getParamType(0)->isIntegerTy())
+      return 0;
+
+    Value *Op = CI->getArgOperand(0);
+
+    // Constant fold.
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+      if (CI->getValue() == 0)  // ffs(0) -> 0.
+        return Constant::getNullValue(CI->getType());
+      // ffs(c) -> cttz(c)+1
+      return B.getInt32(CI->getValue().countTrailingZeros() + 1);
+    }
+
+    // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
+    const Type *ArgType = Op->getType();
+    Value *F = Intrinsic::getDeclaration(Callee->getParent(),
+                                         Intrinsic::cttz, &ArgType, 1);
+    Value *V = B.CreateCall(F, Op, "cttz");
+    V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
+    V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp");
+
+    Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
+    return B.CreateSelect(Cond, V, B.getInt32(0));
+  }
+};
+
+//===---------------------------------------===//
+// 'isdigit' Optimizations
+
+struct IsDigitOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    // We require integer(i32)
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+        !FT->getParamType(0)->isIntegerTy(32))
+      return 0;
+
+    // isdigit(c) -> (c-'0') <u 10
+    Value *Op = CI->getArgOperand(0);
+    Op = B.CreateSub(Op, B.getInt32('0'), "isdigittmp");
+    Op = B.CreateICmpULT(Op, B.getInt32(10), "isdigit");
+    return B.CreateZExt(Op, CI->getType());
+  }
+};
+
+//===---------------------------------------===//
+// 'isascii' Optimizations
+
+struct IsAsciiOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    // We require integer(i32)
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+        !FT->getParamType(0)->isIntegerTy(32))
+      return 0;
+
+    // isascii(c) -> c <u 128
+    Value *Op = CI->getArgOperand(0);
+    Op = B.CreateICmpULT(Op, B.getInt32(128), "isascii");
+    return B.CreateZExt(Op, CI->getType());
+  }
+};
+
+//===---------------------------------------===//
+// 'abs', 'labs', 'llabs' Optimizations
+
+struct AbsOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    // We require integer(integer) where the types agree.
+    if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
+        FT->getParamType(0) != FT->getReturnType())
+      return 0;
+
+    // abs(x) -> x >s -1 ? x : -x
+    Value *Op = CI->getArgOperand(0);
+    Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()),
+                                 "ispos");
+    Value *Neg = B.CreateNeg(Op, "neg");
+    return B.CreateSelect(Pos, Op, Neg);
+  }
+};
+
+
+//===---------------------------------------===//
+// 'toascii' Optimizations
+
+struct ToAsciiOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    const FunctionType *FT = Callee->getFunctionType();
+    // We require i32(i32)
+    if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isIntegerTy(32))
+      return 0;
+
+    // isascii(c) -> c & 0x7f
+    return B.CreateAnd(CI->getArgOperand(0),
+                       ConstantInt::get(CI->getType(),0x7F));
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Formatting and IO Optimizations
+//===----------------------------------------------------------------------===//
+
+//===---------------------------------------===//
+// 'printf' Optimizations
+
+struct PrintFOpt : public LibCallOptimization {
+  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
+    // Check for a fixed format string.
+    std::string FormatStr;
+    if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr))
+      return 0;
+
+    // Empty format string -> noop.
+    if (FormatStr.empty())  // Tolerate printf's declared void.
+      return CI->use_empty() ? (Value*)CI :
+                               ConstantInt::get(CI->getType(), 0);
+
+    // Do not do any of the following transformations if the printf return value
+    // is used, in general the printf return value is not compatible with either
+    // putchar() or puts().
+    if (!CI->use_empty())
+      return 0;
+
+    // printf("x") -> putchar('x'), even for '%'.
+    if (FormatStr.size() == 1) {
+      Value *Res = EmitPutChar(B.getInt32(FormatStr[0]), B, TD);
+      if (CI->use_empty()) return CI;
+      return B.CreateIntCast(Res, CI->getType(), true);
+    }
+
+    // printf("foo\n") --> puts("foo")
+    if (FormatStr[FormatStr.size()-1] == '\n' &&
+        FormatStr.find('%') == std::string::npos) {  // no format characters.
+      // Create a string literal with no \n on it.  We expect the constant merge
+      // pass to be run after this pass, to merge duplicate strings.
+      FormatStr.erase(FormatStr.end()-1);
+      Constant *C = ConstantArray::get(*Context, FormatStr, true);
+      C = new GlobalVariable(*Callee->getParent(), C->getType(), true,
+                             GlobalVariable::InternalLinkage, C, "str");
+      EmitPutS(C, B, TD);
+      return CI->use_empty() ? (Value*)CI :
+                    ConstantInt::get(CI->getType(), FormatStr.size()+1);
+    }
+
+    // Optimize specific format strings.
+    // printf("%c", chr) --> putchar(chr)
+    if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+        CI->getArgOperand(1)->getType()->isIntegerTy()) {
+      Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD);
+
+      if (CI->use_empty()) return CI;
+      return B.CreateIntCast(Res, CI->getType(), true);
+    }
+
+    // printf("%s\n", str) --> puts(str)
+    if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+        CI->getArgOperand(1)->getType()->isPointerTy()) {
+      EmitPutS(CI->getArgOperand(1), B, TD);
+      return CI;
+    }
+    return 0;
+  }
+
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require one fixed pointer argument and an integer/void result.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+        !(FT->getReturnType()->isIntegerTy() ||
+          FT->getReturnType()->isVoidTy()))
+      return 0;
+
+    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // printf(format, ...) -> iprintf(format, ...) if no floating point
+    // arguments.
+    if (TLI->has(LibFunc::iprintf) && !CallHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *IPrintFFn =
+        M->getOrInsertFunction("iprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(IPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'sprintf' Optimizations
+
+struct SPrintFOpt : public LibCallOptimization {
+  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
+    // Check for a fixed format string.
+    std::string FormatStr;
+    if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
+      return 0;
+
+    // If we just have a format string (nothing else crazy) transform it.
+    if (CI->getNumArgOperands() == 2) {
+      // Make sure there's no % in the constant array.  We could try to handle
+      // %% -> % in the future if we cared.
+      for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+        if (FormatStr[i] == '%')
+          return 0; // we found a format specifier, bail out.
+
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
+      // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                     ConstantInt::get(TD->getIntPtrType(*Context), // Copy the
+                                      FormatStr.size() + 1), 1);   // nul byte.
+      return ConstantInt::get(CI->getType(), FormatStr.size());
+    }
+
+    // The remaining optimizations require the format string to be "%s" or "%c"
+    // and have an extra operand.
+    if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+        CI->getNumArgOperands() < 3)
+      return 0;
+
+    // Decode the second character of the format string.
+    if (FormatStr[1] == 'c') {
+      // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
+      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+      Value *V = B.CreateTrunc(CI->getArgOperand(2), B.getInt8Ty(), "char");
+      Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
+      B.CreateStore(V, Ptr);
+      Ptr = B.CreateGEP(Ptr, B.getInt32(1), "nul");
+      B.CreateStore(B.getInt8(0), Ptr);
+
+      return ConstantInt::get(CI->getType(), 1);
+    }
+
+    if (FormatStr[1] == 's') {
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
+      // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
+      if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
+
+      Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD);
+      Value *IncLen = B.CreateAdd(Len,
+                                  ConstantInt::get(Len->getType(), 1),
+                                  "leninc");
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1);
+
+      // The sprintf result is the unincremented number of bytes in the string.
+      return B.CreateIntCast(Len, CI->getType(), false);
+    }
+    return 0;
+  }
+
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require two fixed pointer arguments and an integer result.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating
+    // point arguments.
+    if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *SIPrintFFn =
+        M->getOrInsertFunction("siprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(SIPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'fwrite' Optimizations
+
+struct FWriteOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require a pointer, an integer, an integer, a pointer, returning integer.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isIntegerTy() ||
+        !FT->getParamType(2)->isIntegerTy() ||
+        !FT->getParamType(3)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    // Get the element size and count.
+    ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+    ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+    if (!SizeC || !CountC) return 0;
+    uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
+
+    // If this is writing zero records, remove the call (it's a noop).
+    if (Bytes == 0)
+      return ConstantInt::get(CI->getType(), 0);
+
+    // If this is writing one byte, turn it into fputc.
+    if (Bytes == 1) {  // fwrite(S,1,1,F) -> fputc(S[0],F)
+      Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
+      EmitFPutC(Char, CI->getArgOperand(3), B, TD);
+      return ConstantInt::get(CI->getType(), 1);
+    }
+
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'fputs' Optimizations
+
+struct FPutsOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // These optimizations require TargetData.
+    if (!TD) return 0;
+
+    // Require two pointers.  Also, we can't optimize if return value is used.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !CI->use_empty())
+      return 0;
+
+    // fputs(s,F) --> fwrite(s,1,strlen(s),F)
+    uint64_t Len = GetStringLength(CI->getArgOperand(0));
+    if (!Len) return 0;
+    EmitFWrite(CI->getArgOperand(0),
+               ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
+               CI->getArgOperand(1), B, TD);
+    return CI;  // Known to have no uses (see above).
+  }
+};
+
+//===---------------------------------------===//
+// 'fprintf' Optimizations
+
+struct FPrintFOpt : public LibCallOptimization {
+  Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI,
+                                   IRBuilder<> &B) {
+    // All the optimizations depend on the format string.
+    std::string FormatStr;
+    if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
+      return 0;
+
+    // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
+    if (CI->getNumArgOperands() == 2) {
+      for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
+        if (FormatStr[i] == '%')  // Could handle %% -> % if we cared.
+          return 0; // We found a format specifier.
+
+      // These optimizations require TargetData.
+      if (!TD) return 0;
+
+      EmitFWrite(CI->getArgOperand(1),
+                 ConstantInt::get(TD->getIntPtrType(*Context),
+                                  FormatStr.size()),
+                 CI->getArgOperand(0), B, TD);
+      return ConstantInt::get(CI->getType(), FormatStr.size());
+    }
+
+    // The remaining optimizations require the format string to be "%s" or "%c"
+    // and have an extra operand.
+    if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+        CI->getNumArgOperands() < 3)
+      return 0;
+
+    // Decode the second character of the format string.
+    if (FormatStr[1] == 'c') {
+      // fprintf(F, "%c", chr) --> fputc(chr, F)
+      if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+      EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
+      return ConstantInt::get(CI->getType(), 1);
+    }
+
+    if (FormatStr[1] == 's') {
+      // fprintf(F, "%s", str) --> fputs(str, F)
+      if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
+        return 0;
+      EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
+      return CI;
+    }
+    return 0;
+  }
+
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require two fixed paramters as pointers and integer result.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        !FT->getReturnType()->isIntegerTy())
+      return 0;
+
+    if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) {
+      return V;
+    }
+
+    // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no
+    // floating point arguments.
+    if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) {
+      Module *M = B.GetInsertBlock()->getParent()->getParent();
+      Constant *FIPrintFFn =
+        M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes());
+      CallInst *New = cast<CallInst>(CI->clone());
+      New->setCalledFunction(FIPrintFFn);
+      B.Insert(New);
+      return New;
+    }
+    return 0;
+  }
+};
+
+//===---------------------------------------===//
+// 'puts' Optimizations
+
+struct PutsOpt : public LibCallOptimization {
+  virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
+    // Require one fixed pointer argument and an integer/void result.
+    const FunctionType *FT = Callee->getFunctionType();
+    if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+        !(FT->getReturnType()->isIntegerTy() ||
+          FT->getReturnType()->isVoidTy()))
+      return 0;
+
+    // Check for a constant string.
+    std::string Str;
+    if (!GetConstantStringInfo(CI->getArgOperand(0), Str))
+      return 0;
+
+    if (Str.empty() && CI->use_empty()) {
+      // puts("") -> putchar('\n')
+      Value *Res = EmitPutChar(B.getInt32('\n'), B, TD);
+      if (CI->use_empty()) return CI;
+      return B.CreateIntCast(Res, CI->getType(), true);
+    }
+
+    return 0;
+  }
+};
+
+} // end anonymous namespace.
+
+//===----------------------------------------------------------------------===//
+// SimplifyLibCalls Pass Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// This pass optimizes well known library functions from libc and libm.
+  ///
+  class SimplifyLibCalls : public FunctionPass {
+    TargetLibraryInfo *TLI;
+    
+    StringMap<LibCallOptimization*> Optimizations;
+    // String and Memory LibCall Optimizations
+    StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr;
+    StrCmpOpt StrCmp; StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
+    StrNCpyOpt StrNCpy; StrLenOpt StrLen; StrPBrkOpt StrPBrk;
+    StrToOpt StrTo; StrSpnOpt StrSpn; StrCSpnOpt StrCSpn; StrStrOpt StrStr;
+    MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
+    // Math Library Optimizations
+    PowOpt Pow; Exp2Opt Exp2; UnaryDoubleFPOpt UnaryDoubleFP;
+    // Integer Optimizations
+    FFSOpt FFS; AbsOpt Abs; IsDigitOpt IsDigit; IsAsciiOpt IsAscii;
+    ToAsciiOpt ToAscii;
+    // Formatting and IO Optimizations
+    SPrintFOpt SPrintF; PrintFOpt PrintF;
+    FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF;
+    PutsOpt Puts;
+    
+    bool Modified;  // This is only used by doInitialization.
+  public:
+    static char ID; // Pass identification
+    SimplifyLibCalls() : FunctionPass(ID), StrCpy(false), StrCpyChk(true) {
+      initializeSimplifyLibCallsPass(*PassRegistry::getPassRegistry());
+    }
+    void InitOptimizations();
+    bool runOnFunction(Function &F);
+
+    void setDoesNotAccessMemory(Function &F);
+    void setOnlyReadsMemory(Function &F);
+    void setDoesNotThrow(Function &F);
+    void setDoesNotCapture(Function &F, unsigned n);
+    void setDoesNotAlias(Function &F, unsigned n);
+    bool doInitialization(Module &M);
+
+    void inferPrototypeAttributes(Function &F);
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<TargetLibraryInfo>();
+    }
+  };
+} // end anonymous namespace.
+
+char SimplifyLibCalls::ID = 0;
+
+INITIALIZE_PASS_BEGIN(SimplifyLibCalls, "simplify-libcalls",
+                      "Simplify well-known library calls", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
+INITIALIZE_PASS_END(SimplifyLibCalls, "simplify-libcalls",
+                    "Simplify well-known library calls", false, false)
+
+// Public interface to the Simplify LibCalls pass.
+FunctionPass *llvm::createSimplifyLibCallsPass() {
+  return new SimplifyLibCalls();
+}
+
+/// Optimizations - Populate the Optimizations map with all the optimizations
+/// we know.
+void SimplifyLibCalls::InitOptimizations() {
+  // String and Memory LibCall Optimizations
+  Optimizations["strcat"] = &StrCat;
+  Optimizations["strncat"] = &StrNCat;
+  Optimizations["strchr"] = &StrChr;
+  Optimizations["strrchr"] = &StrRChr;
+  Optimizations["strcmp"] = &StrCmp;
+  Optimizations["strncmp"] = &StrNCmp;
+  Optimizations["strcpy"] = &StrCpy;
+  Optimizations["strncpy"] = &StrNCpy;
+  Optimizations["strlen"] = &StrLen;
+  Optimizations["strpbrk"] = &StrPBrk;
+  Optimizations["strtol"] = &StrTo;
+  Optimizations["strtod"] = &StrTo;
+  Optimizations["strtof"] = &StrTo;
+  Optimizations["strtoul"] = &StrTo;
+  Optimizations["strtoll"] = &StrTo;
+  Optimizations["strtold"] = &StrTo;
+  Optimizations["strtoull"] = &StrTo;
+  Optimizations["strspn"] = &StrSpn;
+  Optimizations["strcspn"] = &StrCSpn;
+  Optimizations["strstr"] = &StrStr;
+  Optimizations["memcmp"] = &MemCmp;
+  if (TLI->has(LibFunc::memcpy)) Optimizations["memcpy"] = &MemCpy;
+  Optimizations["memmove"] = &MemMove;
+  if (TLI->has(LibFunc::memset)) Optimizations["memset"] = &MemSet;
+
+  // _chk variants of String and Memory LibCall Optimizations.
+  Optimizations["__strcpy_chk"] = &StrCpyChk;
+
+  // Math Library Optimizations
+  Optimizations["powf"] = &Pow;
+  Optimizations["pow"] = &Pow;
+  Optimizations["powl"] = &Pow;
+  Optimizations["llvm.pow.f32"] = &Pow;
+  Optimizations["llvm.pow.f64"] = &Pow;
+  Optimizations["llvm.pow.f80"] = &Pow;
+  Optimizations["llvm.pow.f128"] = &Pow;
+  Optimizations["llvm.pow.ppcf128"] = &Pow;
+  Optimizations["exp2l"] = &Exp2;
+  Optimizations["exp2"] = &Exp2;
+  Optimizations["exp2f"] = &Exp2;
+  Optimizations["llvm.exp2.ppcf128"] = &Exp2;
+  Optimizations["llvm.exp2.f128"] = &Exp2;
+  Optimizations["llvm.exp2.f80"] = &Exp2;
+  Optimizations["llvm.exp2.f64"] = &Exp2;
+  Optimizations["llvm.exp2.f32"] = &Exp2;
+
+#ifdef HAVE_FLOORF
+  Optimizations["floor"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_CEILF
+  Optimizations["ceil"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_ROUNDF
+  Optimizations["round"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_RINTF
+  Optimizations["rint"] = &UnaryDoubleFP;
+#endif
+#ifdef HAVE_NEARBYINTF
+  Optimizations["nearbyint"] = &UnaryDoubleFP;
+#endif
+
+  // Integer Optimizations
+  Optimizations["ffs"] = &FFS;
+  Optimizations["ffsl"] = &FFS;
+  Optimizations["ffsll"] = &FFS;
+  Optimizations["abs"] = &Abs;
+  Optimizations["labs"] = &Abs;
+  Optimizations["llabs"] = &Abs;
+  Optimizations["isdigit"] = &IsDigit;
+  Optimizations["isascii"] = &IsAscii;
+  Optimizations["toascii"] = &ToAscii;
+
+  // Formatting and IO Optimizations
+  Optimizations["sprintf"] = &SPrintF;
+  Optimizations["printf"] = &PrintF;
+  Optimizations["fwrite"] = &FWrite;
+  Optimizations["fputs"] = &FPuts;
+  Optimizations["fprintf"] = &FPrintF;
+  Optimizations["puts"] = &Puts;
+}
+
+
+/// runOnFunction - Top level algorithm.
+///
+bool SimplifyLibCalls::runOnFunction(Function &F) {
+  TLI = &getAnalysis<TargetLibraryInfo>();
+
+  if (Optimizations.empty())
+    InitOptimizations();
+
+  const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+
+  IRBuilder<> Builder(F.getContext());
+
+  bool Changed = false;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {
+      // Ignore non-calls.
+      CallInst *CI = dyn_cast<CallInst>(I++);
+      if (!CI) continue;
+
+      // Ignore indirect calls and calls to non-external functions.
+      Function *Callee = CI->getCalledFunction();
+      if (Callee == 0 || !Callee->isDeclaration() ||
+          !(Callee->hasExternalLinkage() || Callee->hasDLLImportLinkage()))
+        continue;
+
+      // Ignore unknown calls.
+      LibCallOptimization *LCO = Optimizations.lookup(Callee->getName());
+      if (!LCO) continue;
+
+      // Set the builder to the instruction after the call.
+      Builder.SetInsertPoint(BB, I);
+
+      // Try to optimize this call.
+      Value *Result = LCO->OptimizeCall(CI, TD, TLI, Builder);
+      if (Result == 0) continue;
+
+      DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI;
+            dbgs() << "  into: " << *Result << "\n");
+
+      // Something changed!
+      Changed = true;
+      ++NumSimplified;
+
+      // Inspect the instruction after the call (which was potentially just
+      // added) next.
+      I = CI; ++I;
+
+      if (CI != Result && !CI->use_empty()) {
+        CI->replaceAllUsesWith(Result);
+        if (!Result->hasName())
+          Result->takeName(CI);
+      }
+      CI->eraseFromParent();
+    }
+  }
+  return Changed;
+}
+
+// Utility methods for doInitialization.
+
+void SimplifyLibCalls::setDoesNotAccessMemory(Function &F) {
+  if (!F.doesNotAccessMemory()) {
+    F.setDoesNotAccessMemory();
+    ++NumAnnotated;
+    Modified = true;
+  }
+}
+void SimplifyLibCalls::setOnlyReadsMemory(Function &F) {
+  if (!F.onlyReadsMemory()) {
+    F.setOnlyReadsMemory();
+    ++NumAnnotated;
+    Modified = true;
+  }
+}
+void SimplifyLibCalls::setDoesNotThrow(Function &F) {
+  if (!F.doesNotThrow()) {
+    F.setDoesNotThrow();
+    ++NumAnnotated;
+    Modified = true;
+  }
+}
+void SimplifyLibCalls::setDoesNotCapture(Function &F, unsigned n) {
+  if (!F.doesNotCapture(n)) {
+    F.setDoesNotCapture(n);
+    ++NumAnnotated;
+    Modified = true;
+  }
+}
+void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) {
+  if (!F.doesNotAlias(n)) {
+    F.setDoesNotAlias(n);
+    ++NumAnnotated;
+    Modified = true;
+  }
+}
+
+
+void SimplifyLibCalls::inferPrototypeAttributes(Function &F) {
+  const FunctionType *FTy = F.getFunctionType();
+  
+  StringRef Name = F.getName();
+  switch (Name[0]) {
+  case 's':
+    if (Name == "strlen") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "strchr" ||
+               Name == "strrchr") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isIntegerTy())
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+    } else if (Name == "strcpy" ||
+               Name == "stpcpy" ||
+               Name == "strcat" ||
+               Name == "strtol" ||
+               Name == "strtod" ||
+               Name == "strtof" ||
+               Name == "strtoul" ||
+               Name == "strtoll" ||
+               Name == "strtold" ||
+               Name == "strncat" ||
+               Name == "strncpy" ||
+               Name == "strtoull") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "strxfrm") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "strcmp" ||
+               Name == "strspn" ||
+               Name == "strncmp" ||
+               Name == "strcspn" ||
+               Name == "strcoll" ||
+               Name == "strcasecmp" ||
+               Name == "strncasecmp") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "strstr" ||
+               Name == "strpbrk") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "strtok" ||
+               Name == "strtok_r") {
+      if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "scanf" ||
+               Name == "setbuf" ||
+               Name == "setvbuf") {
+      if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "strdup" ||
+               Name == "strndup") {
+      if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "stat" ||
+               Name == "sscanf" ||
+               Name == "sprintf" ||
+               Name == "statvfs") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "snprintf") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(2)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 3);
+    } else if (Name == "setitimer") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(1)->isPointerTy() ||
+          !FTy->getParamType(2)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+      setDoesNotCapture(F, 3);
+    } else if (Name == "system") {
+      if (FTy->getNumParams() != 1 ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      // May throw; "system" is a valid pthread cancellation point.
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'm':
+    if (Name == "malloc") {
+      if (FTy->getNumParams() != 1 ||
+          !FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+    } else if (Name == "memcmp") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "memchr" ||
+               Name == "memrchr") {
+      if (FTy->getNumParams() != 3)
+        return;
+      setOnlyReadsMemory(F);
+      setDoesNotThrow(F);
+    } else if (Name == "modf" ||
+               Name == "modff" ||
+               Name == "modfl" ||
+               Name == "memcpy" ||
+               Name == "memccpy" ||
+               Name == "memmove") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "memalign") {
+      if (!FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotAlias(F, 0);
+    } else if (Name == "mkdir" ||
+               Name == "mktime") {
+      if (FTy->getNumParams() == 0 ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'r':
+    if (Name == "realloc") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "read") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      // May throw; "read" is a valid pthread cancellation point.
+      setDoesNotCapture(F, 2);
+    } else if (Name == "rmdir" ||
+               Name == "rewind" ||
+               Name == "remove" ||
+               Name == "realpath") {
+      if (FTy->getNumParams() < 1 ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "rename" ||
+               Name == "readlink") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    }
+    break;
+  case 'w':
+    if (Name == "write") {
+      if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      // May throw; "write" is a valid pthread cancellation point.
+      setDoesNotCapture(F, 2);
+    }
+    break;
+  case 'b':
+    if (Name == "bcopy") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "bcmp") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setOnlyReadsMemory(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "bzero") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'c':
+    if (Name == "calloc") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+    } else if (Name == "chmod" ||
+               Name == "chown" ||
+               Name == "ctermid" ||
+               Name == "clearerr" ||
+               Name == "closedir") {
+      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'a':
+    if (Name == "atoi" ||
+        Name == "atol" ||
+        Name == "atof" ||
+        Name == "atoll") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setOnlyReadsMemory(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "access") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'f':
+    if (Name == "fopen") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "fdopen") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "feof" ||
+               Name == "free" ||
+               Name == "fseek" ||
+               Name == "ftell" ||
+               Name == "fgetc" ||
+               Name == "fseeko" ||
+               Name == "ftello" ||
+               Name == "fileno" ||
+               Name == "fflush" ||
+               Name == "fclose" ||
+               Name == "fsetpos" ||
+               Name == "flockfile" ||
+               Name == "funlockfile" ||
+               Name == "ftrylockfile") {
+      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "ferror") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setOnlyReadsMemory(F);
+    } else if (Name == "fputc" ||
+               Name == "fstat" ||
+               Name == "frexp" ||
+               Name == "frexpf" ||
+               Name == "frexpl" ||
+               Name == "fstatvfs") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "fgets") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(2)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 3);
+    } else if (Name == "fread" ||
+               Name == "fwrite") {
+      if (FTy->getNumParams() != 4 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(3)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 4);
+    } else if (Name == "fputs" ||
+               Name == "fscanf" ||
+               Name == "fprintf" ||
+               Name == "fgetpos") {
+      if (FTy->getNumParams() < 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    }
+    break;
+  case 'g':
+    if (Name == "getc" ||
+        Name == "getlogin_r" ||
+        Name == "getc_unlocked") {
+      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "getenv") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setOnlyReadsMemory(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "gets" ||
+               Name == "getchar") {
+      setDoesNotThrow(F);
+    } else if (Name == "getitimer") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "getpwnam") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'u':
+    if (Name == "ungetc") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "uname" ||
+               Name == "unlink" ||
+               Name == "unsetenv") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "utime" ||
+               Name == "utimes") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    }
+    break;
+  case 'p':
+    if (Name == "putc") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "puts" ||
+               Name == "printf" ||
+               Name == "perror") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "pread" ||
+               Name == "pwrite") {
+      if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      // May throw; these are valid pthread cancellation points.
+      setDoesNotCapture(F, 2);
+    } else if (Name == "putchar") {
+      setDoesNotThrow(F);
+    } else if (Name == "popen") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "pclose") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'v':
+    if (Name == "vscanf") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "vsscanf" ||
+               Name == "vfscanf") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(1)->isPointerTy() ||
+          !FTy->getParamType(2)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "valloc") {
+      if (!FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+    } else if (Name == "vprintf") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "vfprintf" ||
+               Name == "vsprintf") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "vsnprintf") {
+      if (FTy->getNumParams() != 4 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(2)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 3);
+    }
+    break;
+  case 'o':
+    if (Name == "open") {
+      if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      // May throw; "open" is a valid pthread cancellation point.
+      setDoesNotCapture(F, 1);
+    } else if (Name == "opendir") {
+      if (FTy->getNumParams() != 1 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 't':
+    if (Name == "tmpfile") {
+      if (!FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+    } else if (Name == "times") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'h':
+    if (Name == "htonl" ||
+        Name == "htons") {
+      setDoesNotThrow(F);
+      setDoesNotAccessMemory(F);
+    }
+    break;
+  case 'n':
+    if (Name == "ntohl" ||
+        Name == "ntohs") {
+      setDoesNotThrow(F);
+      setDoesNotAccessMemory(F);
+    }
+    break;
+  case 'l':
+    if (Name == "lstat") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "lchown") {
+      if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  case 'q':
+    if (Name == "qsort") {
+      if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy())
+        return;
+      // May throw; places call through function pointer.
+      setDoesNotCapture(F, 4);
+    }
+    break;
+  case '_':
+    if (Name == "__strdup" ||
+        Name == "__strndup") {
+      if (FTy->getNumParams() < 1 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "__strtok_r") {
+      if (FTy->getNumParams() != 3 ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "_IO_getc") {
+      if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "_IO_putc") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    }
+    break;
+  case 1:
+    if (Name == "\1__isoc99_scanf") {
+      if (FTy->getNumParams() < 1 ||
+          !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "\1stat64" ||
+               Name == "\1lstat64" ||
+               Name == "\1statvfs64" ||
+               Name == "\1__isoc99_sscanf") {
+      if (FTy->getNumParams() < 1 ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "\1fopen64") {
+      if (FTy->getNumParams() != 2 ||
+          !FTy->getReturnType()->isPointerTy() ||
+          !FTy->getParamType(0)->isPointerTy() ||
+          !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+      setDoesNotCapture(F, 1);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "\1fseeko64" ||
+               Name == "\1ftello64") {
+      if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 1);
+    } else if (Name == "\1tmpfile64") {
+      if (!FTy->getReturnType()->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotAlias(F, 0);
+    } else if (Name == "\1fstat64" ||
+               Name == "\1fstatvfs64") {
+      if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy())
+        return;
+      setDoesNotThrow(F);
+      setDoesNotCapture(F, 2);
+    } else if (Name == "\1open64") {
+      if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy())
+        return;
+      // May throw; "open" is a valid pthread cancellation point.
+      setDoesNotCapture(F, 1);
+    }
+    break;
+  }
+}
+
+/// doInitialization - Add attributes to well-known functions.
+///
+bool SimplifyLibCalls::doInitialization(Module &M) {
+  Modified = false;
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+    Function &F = *I;
+    if (F.isDeclaration() && F.hasName())
+      inferPrototypeAttributes(F);
+  }
+  return Modified;
+}
+
+// TODO:
+//   Additional cases that we need to add to this file:
+//
+// cbrt:
+//   * cbrt(expN(X))  -> expN(x/3)
+//   * cbrt(sqrt(x))  -> pow(x,1/6)
+//   * cbrt(sqrt(x))  -> pow(x,1/9)
+//
+// cos, cosf, cosl:
+//   * cos(-x)  -> cos(x)
+//
+// exp, expf, expl:
+//   * exp(log(x))  -> x
+//
+// log, logf, logl:
+//   * log(exp(x))   -> x
+//   * log(x**y)     -> y*log(x)
+//   * log(exp(y))   -> y*log(e)
+//   * log(exp2(y))  -> y*log(2)
+//   * log(exp10(y)) -> y*log(10)
+//   * log(sqrt(x))  -> 0.5*log(x)
+//   * log(pow(x,y)) -> y*log(x)
+//
+// lround, lroundf, lroundl:
+//   * lround(cnst) -> cnst'
+//
+// pow, powf, powl:
+//   * pow(exp(x),y)  -> exp(x*y)
+//   * pow(sqrt(x),y) -> pow(x,y*0.5)
+//   * pow(pow(x,y),z)-> pow(x,y*z)
+//
+// round, roundf, roundl:
+//   * round(cnst) -> cnst'
+//
+// signbit:
+//   * signbit(cnst) -> cnst'
+//   * signbit(nncst) -> 0 (if pstv is a non-negative constant)
+//
+// sqrt, sqrtf, sqrtl:
+//   * sqrt(expN(x))  -> expN(x*0.5)
+//   * sqrt(Nroot(x)) -> pow(x,1/(2*N))
+//   * sqrt(pow(x,y)) -> pow(|x|,y*0.5)
+//
+// stpcpy:
+//   * stpcpy(str, "literal") ->
+//           llvm.memcpy(str,"literal",strlen("literal")+1,1)
+//
+// tan, tanf, tanl:
+//   * tan(atan(x)) -> x
+//
+// trunc, truncf, truncl:
+//   * trunc(cnst) -> cnst'
+//
+//
diff --git a/final/lib/Transforms/Scalar/Sink.cpp b/final/lib/Transforms/Scalar/Sink.cpp
new file mode 100644
index 00000000000..705f4420490
--- /dev/null
+++ b/final/lib/Transforms/Scalar/Sink.cpp
@@ -0,0 +1,274 @@
+//===-- Sink.cpp - Code Sinking -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions into successor blocks, when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sink"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumSunk, "Number of instructions sunk");
+
+namespace {
+  class Sinking : public FunctionPass {
+    DominatorTree *DT;
+    LoopInfo *LI;
+    AliasAnalysis *AA;
+
+  public:
+    static char ID; // Pass identification
+    Sinking() : FunctionPass(ID) {
+      initializeSinkingPass(*PassRegistry::getPassRegistry());
+    }
+    
+    virtual bool runOnFunction(Function &F);
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      FunctionPass::getAnalysisUsage(AU);
+      AU.addRequired<AliasAnalysis>();
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<LoopInfo>();
+    }
+  private:
+    bool ProcessBlock(BasicBlock &BB);
+    bool SinkInstruction(Instruction *I, SmallPtrSet<Instruction *, 8> &Stores);
+    bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const;
+  };
+} // end anonymous namespace
+  
+char Sinking::ID = 0;
+INITIALIZE_PASS_BEGIN(Sinking, "sink", "Code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(Sinking, "sink", "Code sinking", false, false)
+
+FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified value
+/// occur in blocks dominated by the specified block.
+bool Sinking::AllUsesDominatedByBlock(Instruction *Inst, 
+                                      BasicBlock *BB) const {
+  // Ignoring debug uses is necessary so debug info doesn't affect the code.
+  // This may leave a referencing dbg_value in the original block, before
+  // the definition of the vreg.  Dwarf generator handles this although the
+  // user might not get the right info at runtime.
+  for (Value::use_iterator I = Inst->use_begin(),
+       E = Inst->use_end(); I != E; ++I) {
+    // Determine the block of the use.
+    Instruction *UseInst = cast<Instruction>(*I);
+    BasicBlock *UseBlock = UseInst->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(UseInst)) {
+      // PHI nodes use the operand in the predecessor block, not the block with
+      // the PHI.
+      unsigned Num = PHINode::getIncomingValueNumForOperand(I.getOperandNo());
+      UseBlock = PN->getIncomingBlock(Num);
+    }
+    // Check that it dominates.
+    if (!DT->dominates(BB, UseBlock))
+      return false;
+  }
+  return true;
+}
+
+bool Sinking::runOnFunction(Function &F) {
+  DT = &getAnalysis<DominatorTree>();
+  LI = &getAnalysis<LoopInfo>();
+  AA = &getAnalysis<AliasAnalysis>();
+
+  bool EverMadeChange = false;
+  
+  while (1) {
+    bool MadeChange = false;
+
+    // Process all basic blocks.
+    for (Function::iterator I = F.begin(), E = F.end(); 
+         I != E; ++I)
+      MadeChange |= ProcessBlock(*I);
+    
+    // If this iteration over the code changed anything, keep iterating.
+    if (!MadeChange) break;
+    EverMadeChange = true;
+  } 
+  return EverMadeChange;
+}
+
+bool Sinking::ProcessBlock(BasicBlock &BB) {
+  // Can't sink anything out of a block that has less than two successors.
+  if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false;
+
+  // Don't bother sinking code out of unreachable blocks. In addition to being
+  // unprofitable, it can also lead to infinite looping, because in an unreachable
+  // loop there may be nowhere to stop.
+  if (!DT->isReachableFromEntry(&BB)) return false;
+
+  bool MadeChange = false;
+
+  // Walk the basic block bottom-up.  Remember if we saw a store.
+  BasicBlock::iterator I = BB.end();
+  --I;
+  bool ProcessedBegin = false;
+  SmallPtrSet<Instruction *, 8> Stores;
+  do {
+    Instruction *Inst = I;  // The instruction to sink.
+    
+    // Predecrement I (if it's not begin) so that it isn't invalidated by
+    // sinking.
+    ProcessedBegin = I == BB.begin();
+    if (!ProcessedBegin)
+      --I;
+
+    if (isa<DbgInfoIntrinsic>(Inst))
+      continue;
+
+    if (SinkInstruction(Inst, Stores))
+      ++NumSunk, MadeChange = true;
+    
+    // If we just processed the first instruction in the block, we're done.
+  } while (!ProcessedBegin);
+  
+  return MadeChange;
+}
+
+static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
+                         SmallPtrSet<Instruction *, 8> &Stores) {
+  if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
+    if (L->isVolatile()) return false;
+
+    AliasAnalysis::Location Loc = AA->getLocation(L);
+    for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
+         E = Stores.end(); I != E; ++I)
+      if (AA->getModRefInfo(*I, Loc) & AliasAnalysis::Mod)
+        return false;
+  }
+
+  if (Inst->mayWriteToMemory()) {
+    Stores.insert(Inst);
+    return false;
+  }
+
+  if (isa<TerminatorInst>(Inst) || isa<PHINode>(Inst))
+    return false;
+
+  return true;
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool Sinking::SinkInstruction(Instruction *Inst,
+                              SmallPtrSet<Instruction *, 8> &Stores) {
+  // Check if it's safe to move the instruction.
+  if (!isSafeToMove(Inst, AA, Stores))
+    return false;
+  
+  // FIXME: This should include support for sinking instructions within the
+  // block they are currently in to shorten the live ranges.  We often get
+  // instructions sunk into the top of a large block, but it would be better to
+  // also sink them down before their first use in the block.  This xform has to
+  // be careful not to *increase* register pressure though, e.g. sinking
+  // "x = y + z" down if it kills y and z would increase the live ranges of y
+  // and z and only shrink the live range of x.
+  
+  // Loop over all the operands of the specified instruction.  If there is
+  // anything we can't handle, bail out.
+  BasicBlock *ParentBlock = Inst->getParent();
+  
+  // SuccToSinkTo - This is the successor to sink this instruction to, once we
+  // decide.
+  BasicBlock *SuccToSinkTo = 0;
+  
+  // FIXME: This picks a successor to sink into based on having one
+  // successor that dominates all the uses.  However, there are cases where
+  // sinking can happen but where the sink point isn't a successor.  For
+  // example:
+  //   x = computation
+  //   if () {} else {}
+  //   use x
+  // the instruction could be sunk over the whole diamond for the 
+  // if/then/else (or loop, etc), allowing it to be sunk into other blocks
+  // after that.
+  
+  // Instructions can only be sunk if all their uses are in blocks
+  // dominated by one of the successors.
+  // Look at all the successors and decide which one
+  // we should sink to.
+  for (succ_iterator SI = succ_begin(ParentBlock),
+       E = succ_end(ParentBlock); SI != E; ++SI) {
+    if (AllUsesDominatedByBlock(Inst, *SI)) {
+      SuccToSinkTo = *SI;
+      break;
+    }
+  }
+      
+  // If we couldn't find a block to sink to, ignore this instruction.
+  if (SuccToSinkTo == 0)
+    return false;
+  
+  // It is not possible to sink an instruction into its own block.  This can
+  // happen with loops.
+  if (Inst->getParent() == SuccToSinkTo)
+    return false;
+  
+  DEBUG(dbgs() << "Sink instr " << *Inst);
+  DEBUG(dbgs() << "to block ";
+        WriteAsOperand(dbgs(), SuccToSinkTo, false));
+  
+  // If the block has multiple predecessors, this would introduce computation on
+  // a path that it doesn't already exist.  We could split the critical edge,
+  // but for now we just punt.
+  // FIXME: Split critical edges if not backedges.
+  if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) {
+    // We cannot sink a load across a critical edge - there may be stores in
+    // other code paths.
+    if (!Inst->isSafeToSpeculativelyExecute()) {
+      DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
+      return false;
+    }
+
+    // We don't want to sink across a critical edge if we don't dominate the
+    // successor. We could be introducing calculations to new code paths.
+    if (!DT->dominates(ParentBlock, SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
+      return false;
+    }
+
+    // Don't sink instructions into a loop.
+    if (LI->isLoopHeader(SuccToSinkTo)) {
+      DEBUG(dbgs() << " *** PUNTING: Loop header found\n");
+      return false;
+    }
+
+    // Otherwise we are OK with sinking along a critical edge.
+    DEBUG(dbgs() << "Sinking along critical edge.\n");
+  }
+  
+  // Determine where to insert into.  Skip phi nodes.
+  BasicBlock::iterator InsertPos = SuccToSinkTo->begin();
+  while (InsertPos != SuccToSinkTo->end() && isa<PHINode>(InsertPos))
+    ++InsertPos;
+  
+  // Move the instruction.
+  Inst->moveBefore(InsertPos);
+  return true;
+}
diff --git a/final/lib/Transforms/Scalar/TailDuplication.cpp b/final/lib/Transforms/Scalar/TailDuplication.cpp
new file mode 100644
index 00000000000..9dd83c04fa6
--- /dev/null
+++ b/final/lib/Transforms/Scalar/TailDuplication.cpp
@@ -0,0 +1,373 @@
+//===- TailDuplication.cpp - Simplify CFG through tail duplication --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs a limited form of tail duplication, intended to simplify
+// CFGs by removing some unconditional branches.  This pass is necessary to
+// straighten out loops created by the C front-end, but also is capable of
+// making other code nicer.  After this pass is run, the CFG simplify pass
+// should be run to clean up the mess.
+//
+// This pass could be enhanced in the future to use profile information to be
+// more aggressive.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailduplicate"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constant.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <map>
+using namespace llvm;
+
+STATISTIC(NumEliminated, "Number of unconditional branches eliminated");
+
+static cl::opt<unsigned>
+TailDupThreshold("taildup-threshold",
+                 cl::desc("Max block size to tail duplicate"),
+                 cl::init(1), cl::Hidden);
+
+namespace {
+  class TailDup : public FunctionPass {
+    bool runOnFunction(Function &F);
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    TailDup() : FunctionPass(ID) {
+      initializeTailDupPass(*PassRegistry::getPassRegistry());
+    }
+
+  private:
+    inline bool shouldEliminateUnconditionalBranch(TerminatorInst *, unsigned);
+    inline void eliminateUnconditionalBranch(BranchInst *BI);
+    SmallPtrSet<BasicBlock*, 4> CycleDetector;
+  };
+}
+
+char TailDup::ID = 0;
+INITIALIZE_PASS(TailDup, "tailduplicate", "Tail Duplication", false, false)
+
+// Public interface to the Tail Duplication pass
+FunctionPass *llvm::createTailDuplicationPass() { return new TailDup(); }
+
+/// runOnFunction - Top level algorithm - Loop over each unconditional branch in
+/// the function, eliminating it if it looks attractive enough.  CycleDetector
+/// prevents infinite loops by checking that we aren't redirecting a branch to
+/// a place it already pointed to earlier; see PR 2323.
+bool TailDup::runOnFunction(Function &F) {
+  bool Changed = false;
+  CycleDetector.clear();
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+    if (shouldEliminateUnconditionalBranch(I->getTerminator(),
+                                           TailDupThreshold)) {
+      eliminateUnconditionalBranch(cast<BranchInst>(I->getTerminator()));
+      Changed = true;
+    } else {
+      ++I;
+      CycleDetector.clear();
+    }
+  }
+  return Changed;
+}
+
+/// shouldEliminateUnconditionalBranch - Return true if this branch looks
+/// attractive to eliminate.  We eliminate the branch if the destination basic
+/// block has <= 5 instructions in it, not counting PHI nodes.  In practice,
+/// since one of these is a terminator instruction, this means that we will add
+/// up to 4 instructions to the new block.
+///
+/// We don't count PHI nodes in the count since they will be removed when the
+/// contents of the block are copied over.
+///
+bool TailDup::shouldEliminateUnconditionalBranch(TerminatorInst *TI,
+                                                 unsigned Threshold) {
+  BranchInst *BI = dyn_cast<BranchInst>(TI);
+  if (!BI || !BI->isUnconditional()) return false;  // Not an uncond branch!
+
+  BasicBlock *Dest = BI->getSuccessor(0);
+  if (Dest == BI->getParent()) return false;        // Do not loop infinitely!
+
+  // Do not inline a block if we will just get another branch to the same block!
+  TerminatorInst *DTI = Dest->getTerminator();
+  if (BranchInst *DBI = dyn_cast<BranchInst>(DTI))
+    if (DBI->isUnconditional() && DBI->getSuccessor(0) == Dest)
+      return false;                                 // Do not loop infinitely!
+
+  // FIXME: DemoteRegToStack cannot yet demote invoke instructions to the stack,
+  // because doing so would require breaking critical edges.  This should be
+  // fixed eventually.
+  if (!DTI->use_empty())
+    return false;
+
+  // Do not bother with blocks with only a single predecessor: simplify
+  // CFG will fold these two blocks together!
+  pred_iterator PI = pred_begin(Dest), PE = pred_end(Dest);
+  ++PI;
+  if (PI == PE) return false;  // Exactly one predecessor!
+
+  BasicBlock::iterator I = Dest->getFirstNonPHI();
+
+  for (unsigned Size = 0; I != Dest->end(); ++I) {
+    if (Size == Threshold) return false;  // The block is too large.
+    
+    // Don't tail duplicate call instructions.  They are very large compared to
+    // other instructions.
+    if (isa<CallInst>(I) || isa<InvokeInst>(I)) return false;
+
+    // Also alloca and malloc.
+    if (isa<AllocaInst>(I)) return false;
+
+    // Some vector instructions can expand into a number of instructions.
+    if (isa<ShuffleVectorInst>(I) || isa<ExtractElementInst>(I) ||
+        isa<InsertElementInst>(I)) return false;
+    
+    // Only count instructions that are not debugger intrinsics.
+    if (!isa<DbgInfoIntrinsic>(I)) ++Size;
+  }
+
+  // Do not tail duplicate a block that has thousands of successors into a block
+  // with a single successor if the block has many other predecessors.  This can
+  // cause an N^2 explosion in CFG edges (and PHI node entries), as seen in
+  // cases that have a large number of indirect gotos.
+  unsigned NumSuccs = DTI->getNumSuccessors();
+  if (NumSuccs > 8) {
+    unsigned TooMany = 128;
+    if (NumSuccs >= TooMany) return false;
+    TooMany = TooMany/NumSuccs;
+    for (; PI != PE; ++PI)
+      if (TooMany-- == 0) return false;
+  }
+  
+  // If this unconditional branch is a fall-through, be careful about
+  // tail duplicating it.  In particular, we don't want to taildup it if the
+  // original block will still be there after taildup is completed: doing so
+  // would eliminate the fall-through, requiring unconditional branches.
+  Function::iterator DestI = Dest;
+  if (&*--DestI == BI->getParent()) {
+    // The uncond branch is a fall-through.  Tail duplication of the block is
+    // will eliminate the fall-through-ness and end up cloning the terminator
+    // at the end of the Dest block.  Since the original Dest block will
+    // continue to exist, this means that one or the other will not be able to
+    // fall through.  One typical example that this helps with is code like:
+    // if (a)
+    //   foo();
+    // if (b)
+    //   foo();
+    // Cloning the 'if b' block into the end of the first foo block is messy.
+    
+    // The messy case is when the fall-through block falls through to other
+    // blocks.  This is what we would be preventing if we cloned the block.
+    DestI = Dest;
+    if (++DestI != Dest->getParent()->end()) {
+      BasicBlock *DestSucc = DestI;
+      // If any of Dest's successors are fall-throughs, don't do this xform.
+      for (succ_iterator SI = succ_begin(Dest), SE = succ_end(Dest);
+           SI != SE; ++SI)
+        if (*SI == DestSucc)
+          return false;
+    }
+  }
+
+  // Finally, check that we haven't redirected to this target block earlier;
+  // there are cases where we loop forever if we don't check this (PR 2323).
+  if (!CycleDetector.insert(Dest))
+    return false;
+
+  return true;
+}
+
+/// FindObviousSharedDomOf - We know there is a branch from SrcBlock to
+/// DestBlock, and that SrcBlock is not the only predecessor of DstBlock.  If we
+/// can find a predecessor of SrcBlock that is a dominator of both SrcBlock and
+/// DstBlock, return it.
+static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock,
+                                          BasicBlock *DstBlock) {
+  // SrcBlock must have a single predecessor.
+  pred_iterator PI = pred_begin(SrcBlock), PE = pred_end(SrcBlock);
+  if (PI == PE || ++PI != PE) return 0;
+
+  BasicBlock *SrcPred = *pred_begin(SrcBlock);
+
+  // Look at the predecessors of DstBlock.  One of them will be SrcBlock.  If
+  // there is only one other pred, get it, otherwise we can't handle it.
+  PI = pred_begin(DstBlock); PE = pred_end(DstBlock);
+  BasicBlock *DstOtherPred = 0;
+  BasicBlock *P = *PI;
+  if (P == SrcBlock) {
+    if (++PI == PE) return 0;
+    DstOtherPred = *PI;
+    if (++PI != PE) return 0;
+  } else {
+    DstOtherPred = P;
+    if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0;
+  }
+
+  // We can handle two situations here: "if then" and "if then else" blocks.  An
+  // 'if then' situation is just where DstOtherPred == SrcPred.
+  if (DstOtherPred == SrcPred)
+    return SrcPred;
+
+  // Check to see if we have an "if then else" situation, which means that
+  // DstOtherPred will have a single predecessor and it will be SrcPred.
+  PI = pred_begin(DstOtherPred); PE = pred_end(DstOtherPred);
+  if (PI != PE && *PI == SrcPred) {
+    if (++PI != PE) return 0;  // Not a single pred.
+    return SrcPred;  // Otherwise, it's an "if then" situation.  Return the if.
+  }
+
+  // Otherwise, this is something we can't handle.
+  return 0;
+}
+
+
+/// eliminateUnconditionalBranch - Clone the instructions from the destination
+/// block into the source block, eliminating the specified unconditional branch.
+/// If the destination block defines values used by successors of the dest
+/// block, we may need to insert PHI nodes.
+///
+void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
+  BasicBlock *SourceBlock = Branch->getParent();
+  BasicBlock *DestBlock = Branch->getSuccessor(0);
+  assert(SourceBlock != DestBlock && "Our predicate is broken!");
+
+  DEBUG(dbgs() << "TailDuplication[" << SourceBlock->getParent()->getName()
+        << "]: Eliminating branch: " << *Branch);
+
+  // See if we can avoid duplicating code by moving it up to a dominator of both
+  // blocks.
+  if (BasicBlock *DomBlock = FindObviousSharedDomOf(SourceBlock, DestBlock)) {
+    DEBUG(dbgs() << "Found shared dominator: " << DomBlock->getName() << "\n");
+
+    // If there are non-phi instructions in DestBlock that have no operands
+    // defined in DestBlock, and if the instruction has no side effects, we can
+    // move the instruction to DomBlock instead of duplicating it.
+    BasicBlock::iterator BBI = DestBlock->getFirstNonPHI();
+    while (!isa<TerminatorInst>(BBI)) {
+      Instruction *I = BBI++;
+
+      bool CanHoist = I->isSafeToSpeculativelyExecute() &&
+                      !I->mayReadFromMemory();
+      if (CanHoist) {
+        for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
+          if (Instruction *OpI = dyn_cast<Instruction>(I->getOperand(op)))
+            if (OpI->getParent() == DestBlock ||
+                (isa<InvokeInst>(OpI) && OpI->getParent() == DomBlock)) {
+              CanHoist = false;
+              break;
+            }
+        if (CanHoist) {
+          // Remove from DestBlock, move right before the term in DomBlock.
+          DestBlock->getInstList().remove(I);
+          DomBlock->getInstList().insert(DomBlock->getTerminator(), I);
+          DEBUG(dbgs() << "Hoisted: " << *I);
+        }
+      }
+    }
+  }
+
+  // Tail duplication can not update SSA properties correctly if the values
+  // defined in the duplicated tail are used outside of the tail itself.  For
+  // this reason, we spill all values that are used outside of the tail to the
+  // stack.
+  for (BasicBlock::iterator I = DestBlock->begin(); I != DestBlock->end(); ++I)
+    if (I->isUsedOutsideOfBlock(DestBlock)) {
+      // We found a use outside of the tail.  Create a new stack slot to
+      // break this inter-block usage pattern.
+      DemoteRegToStack(*I);
+    }
+
+  // We are going to have to map operands from the original block B to the new
+  // copy of the block B'.  If there are PHI nodes in the DestBlock, these PHI
+  // nodes also define part of this mapping.  Loop over these PHI nodes, adding
+  // them to our mapping.
+  //
+  std::map<Value*, Value*> ValueMapping;
+
+  BasicBlock::iterator BI = DestBlock->begin();
+  bool HadPHINodes = isa<PHINode>(BI);
+  for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
+    ValueMapping[PN] = PN->getIncomingValueForBlock(SourceBlock);
+
+  // Clone the non-phi instructions of the dest block into the source block,
+  // keeping track of the mapping...
+  //
+  for (; BI != DestBlock->end(); ++BI) {
+    Instruction *New = BI->clone();
+    New->setName(BI->getName());
+    SourceBlock->getInstList().push_back(New);
+    ValueMapping[BI] = New;
+  }
+
+  // Now that we have built the mapping information and cloned all of the
+  // instructions (giving us a new terminator, among other things), walk the new
+  // instructions, rewriting references of old instructions to use new
+  // instructions.
+  //
+  BI = Branch; ++BI;  // Get an iterator to the first new instruction
+  for (; BI != SourceBlock->end(); ++BI)
+    for (unsigned i = 0, e = BI->getNumOperands(); i != e; ++i) {
+      std::map<Value*, Value*>::const_iterator I =
+        ValueMapping.find(BI->getOperand(i));
+      if (I != ValueMapping.end())
+        BI->setOperand(i, I->second);
+    }
+
+  // Next we check to see if any of the successors of DestBlock had PHI nodes.
+  // If so, we need to add entries to the PHI nodes for SourceBlock now.
+  for (succ_iterator SI = succ_begin(DestBlock), SE = succ_end(DestBlock);
+       SI != SE; ++SI) {
+    BasicBlock *Succ = *SI;
+    for (BasicBlock::iterator PNI = Succ->begin(); isa<PHINode>(PNI); ++PNI) {
+      PHINode *PN = cast<PHINode>(PNI);
+      // Ok, we have a PHI node.  Figure out what the incoming value was for the
+      // DestBlock.
+      Value *IV = PN->getIncomingValueForBlock(DestBlock);
+
+      // Remap the value if necessary...
+      std::map<Value*, Value*>::const_iterator I = ValueMapping.find(IV);
+      if (I != ValueMapping.end())
+        IV = I->second;
+      PN->addIncoming(IV, SourceBlock);
+    }
+  }
+
+  // Next, remove the old branch instruction, and any PHI node entries that we
+  // had.
+  BI = Branch; ++BI;  // Get an iterator to the first new instruction
+  DestBlock->removePredecessor(SourceBlock); // Remove entries in PHI nodes...
+  SourceBlock->getInstList().erase(Branch);  // Destroy the uncond branch...
+
+  // Final step: now that we have finished everything up, walk the cloned
+  // instructions one last time, constant propagating and DCE'ing them, because
+  // they may not be needed anymore.
+  //
+  if (HadPHINodes) {
+    while (BI != SourceBlock->end()) {
+      Instruction *Inst = BI++;
+      if (isInstructionTriviallyDead(Inst))
+        Inst->eraseFromParent();
+      else if (Value *V = SimplifyInstruction(Inst)) {
+        Inst->replaceAllUsesWith(V);
+        Inst->eraseFromParent();
+      }
+    }
+  }
+
+  ++NumEliminated;  // We just killed a branch!
+}
diff --git a/final/lib/Transforms/Scalar/TailRecursionElimination.cpp b/final/lib/Transforms/Scalar/TailRecursionElimination.cpp
new file mode 100644
index 00000000000..5b6bc04cc1c
--- /dev/null
+++ b/final/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -0,0 +1,630 @@
+//===- TailRecursionElimination.cpp - Eliminate Tail Calls ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file transforms calls of the current function (self recursion) followed
+// by a return instruction with a branch to the entry of the function, creating
+// a loop.  This pass also implements the following extensions to the basic
+// algorithm:
+//
+//  1. Trivial instructions between the call and return do not prevent the
+//     transformation from taking place, though currently the analysis cannot
+//     support moving any really useful instructions (only dead ones).
+//  2. This pass transforms functions that are prevented from being tail
+//     recursive by an associative and commutative expression to use an
+//     accumulator variable, thus compiling the typical naive factorial or
+//     'fib' implementation into efficient code.
+//  3. TRE is performed if the function returns void, if the return
+//     returns the result returned by the call, or if the function returns a
+//     run-time constant on all exits from the function.  It is possible, though
+//     unlikely, that the return returns something else (like constant 0), and
+//     can still be TRE'd.  It can be TRE'd if ALL OTHER return instructions in
+//     the function return the exact same value.
+//  4. If it can prove that callees do not access their caller stack frame,
+//     they are marked as eligible for tail call elimination (by the code
+//     generator).
+//
+// There are several improvements that could be made:
+//
+//  1. If the function has any alloca instructions, these instructions will be
+//     moved out of the entry block of the function, causing them to be
+//     evaluated each time through the tail recursion.  Safely keeping allocas
+//     in the entry block requires analysis to proves that the tail-called
+//     function does not read or write the stack object.
+//  2. Tail recursion is only performed if the call immediately preceeds the
+//     return instruction.  It's possible that there could be a jump between
+//     the call and the return.
+//  3. There can be intervening operations between the call and the return that
+//     prevent the TRE from occurring.  For example, there could be GEP's and
+//     stores to memory that will not be read or written by the call.  This
+//     requires some substantial analysis (such as with DSA) to prove safe to
+//     move ahead of the call, but doing so could allow many more TREs to be
+//     performed, for example in TreeAdd/TreeAlloc from the treeadd benchmark.
+//  4. The algorithm we use to detect if callees access their caller stack
+//     frames is very primitive.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "tailcallelim"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+STATISTIC(NumEliminated, "Number of tail calls removed");
+STATISTIC(NumRetDuped,   "Number of return duplicated");
+STATISTIC(NumAccumAdded, "Number of accumulators introduced");
+
+namespace {
+  struct TailCallElim : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    TailCallElim() : FunctionPass(ID) {
+      initializeTailCallElimPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+
+  private:
+    CallInst *FindTRECandidate(Instruction *I,
+                               bool CannotTailCallElimCallsMarkedTail);
+    bool EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+                                    BasicBlock *&OldEntry,
+                                    bool &TailCallsAreMarkedTail,
+                                    SmallVector<PHINode*, 8> &ArgumentPHIs,
+                                    bool CannotTailCallElimCallsMarkedTail);
+    bool FoldReturnAndProcessPred(BasicBlock *BB,
+                                  ReturnInst *Ret, BasicBlock *&OldEntry,
+                                  bool &TailCallsAreMarkedTail,
+                                  SmallVector<PHINode*, 8> &ArgumentPHIs,
+                                  bool CannotTailCallElimCallsMarkedTail);
+    bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
+                               bool &TailCallsAreMarkedTail,
+                               SmallVector<PHINode*, 8> &ArgumentPHIs,
+                               bool CannotTailCallElimCallsMarkedTail);
+    bool CanMoveAboveCall(Instruction *I, CallInst *CI);
+    Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI);
+  };
+}
+
+char TailCallElim::ID = 0;
+INITIALIZE_PASS(TailCallElim, "tailcallelim",
+                "Tail Call Elimination", false, false)
+
+// Public interface to the TailCallElimination pass
+FunctionPass *llvm::createTailCallEliminationPass() {
+  return new TailCallElim();
+}
+
+/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
+/// callees of this function.  We only do very simple analysis right now, this
+/// could be expanded in the future to use mod/ref information for particular
+/// call sites if desired.
+static bool AllocaMightEscapeToCalls(AllocaInst *AI) {
+  // FIXME: do simple 'address taken' analysis.
+  return true;
+}
+
+/// CheckForEscapingAllocas - Scan the specified basic block for alloca
+/// instructions.  If it contains any that might be accessed by calls, return
+/// true.
+static bool CheckForEscapingAllocas(BasicBlock *BB,
+                                    bool &CannotTCETailMarkedCall) {
+  bool RetVal = false;
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+      RetVal |= AllocaMightEscapeToCalls(AI);
+
+      // If this alloca is in the body of the function, or if it is a variable
+      // sized allocation, we cannot tail call eliminate calls marked 'tail'
+      // with this mechanism.
+      if (BB != &BB->getParent()->getEntryBlock() ||
+          !isa<ConstantInt>(AI->getArraySize()))
+        CannotTCETailMarkedCall = true;
+    }
+  return RetVal;
+}
+
+bool TailCallElim::runOnFunction(Function &F) {
+  // If this function is a varargs function, we won't be able to PHI the args
+  // right, so don't even try to convert it...
+  if (F.getFunctionType()->isVarArg()) return false;
+
+  BasicBlock *OldEntry = 0;
+  bool TailCallsAreMarkedTail = false;
+  SmallVector<PHINode*, 8> ArgumentPHIs;
+  bool MadeChange = false;
+  bool FunctionContainsEscapingAllocas = false;
+
+  // CannotTCETailMarkedCall - If true, we cannot perform TCE on tail calls
+  // marked with the 'tail' attribute, because doing so would cause the stack
+  // size to increase (real TCE would deallocate variable sized allocas, TCE
+  // doesn't).
+  bool CannotTCETailMarkedCall = false;
+
+  // Loop over the function, looking for any returning blocks, and keeping track
+  // of whether this function has any non-trivially used allocas.
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (FunctionContainsEscapingAllocas && CannotTCETailMarkedCall)
+      break;
+
+    FunctionContainsEscapingAllocas |=
+      CheckForEscapingAllocas(BB, CannotTCETailMarkedCall);
+  }
+  
+  /// FIXME: The code generator produces really bad code when an 'escaping
+  /// alloca' is changed from being a static alloca to being a dynamic alloca.
+  /// Until this is resolved, disable this transformation if that would ever
+  /// happen.  This bug is PR962.
+  if (FunctionContainsEscapingAllocas)
+    return false;
+
+  // Second pass, change any tail calls to loops.
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
+      bool Change = ProcessReturningBlock(Ret, OldEntry, TailCallsAreMarkedTail,
+                                          ArgumentPHIs,CannotTCETailMarkedCall);
+      if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
+        Change = FoldReturnAndProcessPred(BB, Ret, OldEntry,
+                                          TailCallsAreMarkedTail, ArgumentPHIs,
+                                          CannotTCETailMarkedCall);
+      MadeChange |= Change;
+    }
+  }
+
+  // If we eliminated any tail recursions, it's possible that we inserted some
+  // silly PHI nodes which just merge an initial value (the incoming operand)
+  // with themselves.  Check to see if we did and clean up our mess if so.  This
+  // occurs when a function passes an argument straight through to its tail
+  // call.
+  if (!ArgumentPHIs.empty()) {
+    for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
+      PHINode *PN = ArgumentPHIs[i];
+
+      // If the PHI Node is a dynamic constant, replace it with the value it is.
+      if (Value *PNV = SimplifyInstruction(PN)) {
+        PN->replaceAllUsesWith(PNV);
+        PN->eraseFromParent();
+      }
+    }
+  }
+
+  // Finally, if this function contains no non-escaping allocas, mark all calls
+  // in the function as eligible for tail calls (there is no stack memory for
+  // them to access).
+  if (!FunctionContainsEscapingAllocas)
+    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+        if (CallInst *CI = dyn_cast<CallInst>(I)) {
+          CI->setTailCall();
+          MadeChange = true;
+        }
+
+  return MadeChange;
+}
+
+
+/// CanMoveAboveCall - Return true if it is safe to move the specified
+/// instruction from after the call to before the call, assuming that all
+/// instructions between the call and this instruction are movable.
+///
+bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
+  // FIXME: We can move load/store/call/free instructions above the call if the
+  // call does not mod/ref the memory location being processed.
+  if (I->mayHaveSideEffects())  // This also handles volatile loads.
+    return false;
+  
+  if (LoadInst *L = dyn_cast<LoadInst>(I)) {
+    // Loads may always be moved above calls without side effects.
+    if (CI->mayHaveSideEffects()) {
+      // Non-volatile loads may be moved above a call with side effects if it
+      // does not write to memory and the load provably won't trap.
+      // FIXME: Writes to memory only matter if they may alias the pointer
+      // being loaded from.
+      if (CI->mayWriteToMemory() ||
+          !isSafeToLoadUnconditionally(L->getPointerOperand(), L,
+                                       L->getAlignment()))
+        return false;
+    }
+  }
+
+  // Otherwise, if this is a side-effect free instruction, check to make sure
+  // that it does not use the return value of the call.  If it doesn't use the
+  // return value of the call, it must only use things that are defined before
+  // the call, or movable instructions between the call and the instruction
+  // itself.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (I->getOperand(i) == CI)
+      return false;
+  return true;
+}
+
+// isDynamicConstant - Return true if the specified value is the same when the
+// return would exit as it was when the initial iteration of the recursive
+// function was executed.
+//
+// We currently handle static constants and arguments that are not modified as
+// part of the recursion.
+//
+static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
+  if (isa<Constant>(V)) return true; // Static constants are always dyn consts
+
+  // Check to see if this is an immutable argument, if so, the value
+  // will be available to initialize the accumulator.
+  if (Argument *Arg = dyn_cast<Argument>(V)) {
+    // Figure out which argument number this is...
+    unsigned ArgNo = 0;
+    Function *F = CI->getParent()->getParent();
+    for (Function::arg_iterator AI = F->arg_begin(); &*AI != Arg; ++AI)
+      ++ArgNo;
+
+    // If we are passing this argument into call as the corresponding
+    // argument operand, then the argument is dynamically constant.
+    // Otherwise, we cannot transform this function safely.
+    if (CI->getArgOperand(ArgNo) == Arg)
+      return true;
+  }
+
+  // Switch cases are always constant integers. If the value is being switched
+  // on and the return is only reachable from one of its cases, it's
+  // effectively constant.
+  if (BasicBlock *UniquePred = RI->getParent()->getUniquePredecessor())
+    if (SwitchInst *SI = dyn_cast<SwitchInst>(UniquePred->getTerminator()))
+      if (SI->getCondition() == V)
+        return SI->getDefaultDest() != RI->getParent();
+
+  // Not a constant or immutable argument, we can't safely transform.
+  return false;
+}
+
+// getCommonReturnValue - Check to see if the function containing the specified
+// tail call consistently returns the same runtime-constant value at all exit
+// points except for IgnoreRI.  If so, return the returned value.
+//
+static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
+  Function *F = CI->getParent()->getParent();
+  Value *ReturnedValue = 0;
+
+  for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) {
+    ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator());
+    if (RI == 0 || RI == IgnoreRI) continue;
+
+    // We can only perform this transformation if the value returned is
+    // evaluatable at the start of the initial invocation of the function,
+    // instead of at the end of the evaluation.
+    //
+    Value *RetOp = RI->getOperand(0);
+    if (!isDynamicConstant(RetOp, CI, RI))
+      return 0;
+
+    if (ReturnedValue && RetOp != ReturnedValue)
+      return 0;     // Cannot transform if differing values are returned.
+    ReturnedValue = RetOp;
+  }
+  return ReturnedValue;
+}
+
+/// CanTransformAccumulatorRecursion - If the specified instruction can be
+/// transformed using accumulator recursion elimination, return the constant
+/// which is the start of the accumulator value.  Otherwise return null.
+///
+Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
+                                                      CallInst *CI) {
+  if (!I->isAssociative() || !I->isCommutative()) return 0;
+  assert(I->getNumOperands() == 2 &&
+         "Associative/commutative operations should have 2 args!");
+
+  // Exactly one operand should be the result of the call instruction.
+  if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
+      (I->getOperand(0) != CI && I->getOperand(1) != CI))
+    return 0;
+
+  // The only user of this instruction we allow is a single return instruction.
+  if (!I->hasOneUse() || !isa<ReturnInst>(I->use_back()))
+    return 0;
+
+  // Ok, now we have to check all of the other return instructions in this
+  // function.  If they return non-constants or differing values, then we cannot
+  // transform the function safely.
+  return getCommonReturnValue(cast<ReturnInst>(I->use_back()), CI);
+}
+
+static Instruction *FirstNonDbg(BasicBlock::iterator I) {
+  while (isa<DbgInfoIntrinsic>(I))
+    ++I;
+  return &*I;
+}
+
+CallInst*
+TailCallElim::FindTRECandidate(Instruction *TI,
+                               bool CannotTailCallElimCallsMarkedTail) {
+  BasicBlock *BB = TI->getParent();
+  Function *F = BB->getParent();
+
+  if (&BB->front() == TI) // Make sure there is something before the terminator.
+    return 0;
+  
+  // Scan backwards from the return, checking to see if there is a tail call in
+  // this block.  If so, set CI to it.
+  CallInst *CI = 0;
+  BasicBlock::iterator BBI = TI;
+  while (true) {
+    CI = dyn_cast<CallInst>(BBI);
+    if (CI && CI->getCalledFunction() == F)
+      break;
+
+    if (BBI == BB->begin())
+      return 0;          // Didn't find a potential tail call.
+    --BBI;
+  }
+
+  // If this call is marked as a tail call, and if there are dynamic allocas in
+  // the function, we cannot perform this optimization.
+  if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
+    return 0;
+
+  // As a special case, detect code like this:
+  //   double fabs(double f) { return __builtin_fabs(f); } // a 'fabs' call
+  // and disable this xform in this case, because the code generator will
+  // lower the call to fabs into inline code.
+  if (BB == &F->getEntryBlock() && 
+      FirstNonDbg(BB->front()) == CI &&
+      FirstNonDbg(llvm::next(BB->begin())) == TI &&
+      callIsSmall(F)) {
+    // A single-block function with just a call and a return. Check that
+    // the arguments match.
+    CallSite::arg_iterator I = CallSite(CI).arg_begin(),
+                           E = CallSite(CI).arg_end();
+    Function::arg_iterator FI = F->arg_begin(),
+                           FE = F->arg_end();
+    for (; I != E && FI != FE; ++I, ++FI)
+      if (*I != &*FI) break;
+    if (I == E && FI == FE)
+      return 0;
+  }
+
+  return CI;
+}
+
+bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret,
+                                       BasicBlock *&OldEntry,
+                                       bool &TailCallsAreMarkedTail,
+                                       SmallVector<PHINode*, 8> &ArgumentPHIs,
+                                       bool CannotTailCallElimCallsMarkedTail) {
+  // If we are introducing accumulator recursion to eliminate operations after
+  // the call instruction that are both associative and commutative, the initial
+  // value for the accumulator is placed in this variable.  If this value is set
+  // then we actually perform accumulator recursion elimination instead of
+  // simple tail recursion elimination.  If the operation is an LLVM instruction
+  // (eg: "add") then it is recorded in AccumulatorRecursionInstr.  If not, then
+  // we are handling the case when the return instruction returns a constant C
+  // which is different to the constant returned by other return instructions
+  // (which is recorded in AccumulatorRecursionEliminationInitVal).  This is a
+  // special case of accumulator recursion, the operation being "return C".
+  Value *AccumulatorRecursionEliminationInitVal = 0;
+  Instruction *AccumulatorRecursionInstr = 0;
+
+  // Ok, we found a potential tail call.  We can currently only transform the
+  // tail call if all of the instructions between the call and the return are
+  // movable to above the call itself, leaving the call next to the return.
+  // Check that this is the case now.
+  BasicBlock::iterator BBI = CI;
+  for (++BBI; &*BBI != Ret; ++BBI) {
+    if (CanMoveAboveCall(BBI, CI)) continue;
+    
+    // If we can't move the instruction above the call, it might be because it
+    // is an associative and commutative operation that could be tranformed
+    // using accumulator recursion elimination.  Check to see if this is the
+    // case, and if so, remember the initial accumulator value for later.
+    if ((AccumulatorRecursionEliminationInitVal =
+                           CanTransformAccumulatorRecursion(BBI, CI))) {
+      // Yes, this is accumulator recursion.  Remember which instruction
+      // accumulates.
+      AccumulatorRecursionInstr = BBI;
+    } else {
+      return false;   // Otherwise, we cannot eliminate the tail recursion!
+    }
+  }
+
+  // We can only transform call/return pairs that either ignore the return value
+  // of the call and return void, ignore the value of the call and return a
+  // constant, return the value returned by the tail call, or that are being
+  // accumulator recursion variable eliminated.
+  if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
+      !isa<UndefValue>(Ret->getReturnValue()) &&
+      AccumulatorRecursionEliminationInitVal == 0 &&
+      !getCommonReturnValue(0, CI)) {
+    // One case remains that we are able to handle: the current return
+    // instruction returns a constant, and all other return instructions
+    // return a different constant.
+    if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret))
+      return false; // Current return instruction does not return a constant.
+    // Check that all other return instructions return a common constant.  If
+    // so, record it in AccumulatorRecursionEliminationInitVal.
+    AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI);
+    if (!AccumulatorRecursionEliminationInitVal)
+      return false;
+  }
+
+  BasicBlock *BB = Ret->getParent();
+  Function *F = BB->getParent();
+
+  // OK! We can transform this tail call.  If this is the first one found,
+  // create the new entry block, allowing us to branch back to the old entry.
+  if (OldEntry == 0) {
+    OldEntry = &F->getEntryBlock();
+    BasicBlock *NewEntry = BasicBlock::Create(F->getContext(), "", F, OldEntry);
+    NewEntry->takeName(OldEntry);
+    OldEntry->setName("tailrecurse");
+    BranchInst::Create(OldEntry, NewEntry);
+
+    // If this tail call is marked 'tail' and if there are any allocas in the
+    // entry block, move them up to the new entry block.
+    TailCallsAreMarkedTail = CI->isTailCall();
+    if (TailCallsAreMarkedTail)
+      // Move all fixed sized allocas from OldEntry to NewEntry.
+      for (BasicBlock::iterator OEBI = OldEntry->begin(), E = OldEntry->end(),
+             NEBI = NewEntry->begin(); OEBI != E; )
+        if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
+          if (isa<ConstantInt>(AI->getArraySize()))
+            AI->moveBefore(NEBI);
+
+    // Now that we have created a new block, which jumps to the entry
+    // block, insert a PHI node for each argument of the function.
+    // For now, we initialize each PHI to only have the real arguments
+    // which are passed in.
+    Instruction *InsertPos = OldEntry->begin();
+    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
+         I != E; ++I) {
+      PHINode *PN = PHINode::Create(I->getType(),
+                                    I->getName() + ".tr", InsertPos);
+      I->replaceAllUsesWith(PN); // Everyone use the PHI node now!
+      PN->addIncoming(I, NewEntry);
+      ArgumentPHIs.push_back(PN);
+    }
+  }
+
+  // If this function has self recursive calls in the tail position where some
+  // are marked tail and some are not, only transform one flavor or another.  We
+  // have to choose whether we move allocas in the entry block to the new entry
+  // block or not, so we can't make a good choice for both.  NOTE: We could do
+  // slightly better here in the case that the function has no entry block
+  // allocas.
+  if (TailCallsAreMarkedTail && !CI->isTailCall())
+    return false;
+
+  // Ok, now that we know we have a pseudo-entry block WITH all of the
+  // required PHI nodes, add entries into the PHI node for the actual
+  // parameters passed into the tail-recursive call.
+  for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+    ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB);
+
+  // If we are introducing an accumulator variable to eliminate the recursion,
+  // do so now.  Note that we _know_ that no subsequent tail recursion
+  // eliminations will happen on this function because of the way the
+  // accumulator recursion predicate is set up.
+  //
+  if (AccumulatorRecursionEliminationInitVal) {
+    Instruction *AccRecInstr = AccumulatorRecursionInstr;
+    // Start by inserting a new PHI node for the accumulator.
+    PHINode *AccPN =
+      PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
+                      "accumulator.tr", OldEntry->begin());
+
+    // Loop over all of the predecessors of the tail recursion block.  For the
+    // real entry into the function we seed the PHI with the initial value,
+    // computed earlier.  For any other existing branches to this block (due to
+    // other tail recursions eliminated) the accumulator is not modified.
+    // Because we haven't added the branch in the current block to OldEntry yet,
+    // it will not show up as a predecessor.
+    for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry);
+         PI != PE; ++PI) {
+      BasicBlock *P = *PI;
+      if (P == &F->getEntryBlock())
+        AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P);
+      else
+        AccPN->addIncoming(AccPN, P);
+    }
+
+    if (AccRecInstr) {
+      // Add an incoming argument for the current block, which is computed by
+      // our associative and commutative accumulator instruction.
+      AccPN->addIncoming(AccRecInstr, BB);
+
+      // Next, rewrite the accumulator recursion instruction so that it does not
+      // use the result of the call anymore, instead, use the PHI node we just
+      // inserted.
+      AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);
+    } else {
+      // Add an incoming argument for the current block, which is just the
+      // constant returned by the current return instruction.
+      AccPN->addIncoming(Ret->getReturnValue(), BB);
+    }
+
+    // Finally, rewrite any return instructions in the program to return the PHI
+    // node instead of the "initval" that they do currently.  This loop will
+    // actually rewrite the return value we are destroying, but that's ok.
+    for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
+      if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
+        RI->setOperand(0, AccPN);
+    ++NumAccumAdded;
+  }
+
+  // Now that all of the PHI nodes are in place, remove the call and
+  // ret instructions, replacing them with an unconditional branch.
+  BranchInst::Create(OldEntry, Ret);
+  BB->getInstList().erase(Ret);  // Remove return.
+  BB->getInstList().erase(CI);   // Remove call.
+  ++NumEliminated;
+  return true;
+}
+
+bool TailCallElim::FoldReturnAndProcessPred(BasicBlock *BB,
+                                       ReturnInst *Ret, BasicBlock *&OldEntry,
+                                       bool &TailCallsAreMarkedTail,
+                                       SmallVector<PHINode*, 8> &ArgumentPHIs,
+                                       bool CannotTailCallElimCallsMarkedTail) {
+  bool Change = false;
+
+  // If the return block contains nothing but the return and PHI's,
+  // there might be an opportunity to duplicate the return in its
+  // predecessors and perform TRC there. Look for predecessors that end
+  // in unconditional branch and recursive call(s).
+  SmallVector<BranchInst*, 8> UncondBranchPreds;
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    BasicBlock *Pred = *PI;
+    TerminatorInst *PTI = Pred->getTerminator();
+    if (BranchInst *BI = dyn_cast<BranchInst>(PTI))
+      if (BI->isUnconditional())
+        UncondBranchPreds.push_back(BI);
+  }
+
+  while (!UncondBranchPreds.empty()) {
+    BranchInst *BI = UncondBranchPreds.pop_back_val();
+    BasicBlock *Pred = BI->getParent();
+    if (CallInst *CI = FindTRECandidate(BI, CannotTailCallElimCallsMarkedTail)){
+      DEBUG(dbgs() << "FOLDING: " << *BB
+            << "INTO UNCOND BRANCH PRED: " << *Pred);
+      EliminateRecursiveTailCall(CI, FoldReturnIntoUncondBranch(Ret, BB, Pred),
+                                 OldEntry, TailCallsAreMarkedTail, ArgumentPHIs,
+                                 CannotTailCallElimCallsMarkedTail);
+      ++NumRetDuped;
+      Change = true;
+    }
+  }
+
+  return Change;
+}
+
+bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
+                                         bool &TailCallsAreMarkedTail,
+                                         SmallVector<PHINode*, 8> &ArgumentPHIs,
+                                       bool CannotTailCallElimCallsMarkedTail) {
+  CallInst *CI = FindTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
+  if (!CI)
+    return false;
+
+  return EliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail,
+                                    ArgumentPHIs,
+                                    CannotTailCallElimCallsMarkedTail);
+}
diff --git a/final/lib/Transforms/Utils/AddrModeMatcher.cpp b/final/lib/Transforms/Utils/AddrModeMatcher.cpp
new file mode 100644
index 00000000000..be7bed1cecd
--- /dev/null
+++ b/final/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -0,0 +1,582 @@
+//===- AddrModeMatcher.cpp - Addressing mode matching facility --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements target addressing mode matcher class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/AddrModeMatcher.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instruction.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/PatternMatch.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CallSite.h"
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+void ExtAddrMode::print(raw_ostream &OS) const {
+  bool NeedPlus = false;
+  OS << "[";
+  if (BaseGV) {
+    OS << (NeedPlus ? " + " : "")
+       << "GV:";
+    WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+
+  if (BaseOffs)
+    OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
+
+  if (BaseReg) {
+    OS << (NeedPlus ? " + " : "")
+       << "Base:";
+    WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+  if (Scale) {
+    OS << (NeedPlus ? " + " : "")
+       << Scale << "*";
+    WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
+    NeedPlus = true;
+  }
+
+  OS << ']';
+}
+
+void ExtAddrMode::dump() const {
+  print(dbgs());
+  dbgs() << '\n';
+}
+
+
+/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
+/// Return true and update AddrMode if this addr mode is legal for the target,
+/// false if not.
+bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
+                                             unsigned Depth) {
+  // If Scale is 1, then this is the same as adding ScaleReg to the addressing
+  // mode.  Just process that directly.
+  if (Scale == 1)
+    return MatchAddr(ScaleReg, Depth);
+  
+  // If the scale is 0, it takes nothing to add this.
+  if (Scale == 0)
+    return true;
+  
+  // If we already have a scale of this value, we can add to it, otherwise, we
+  // need an available scale field.
+  if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
+    return false;
+
+  ExtAddrMode TestAddrMode = AddrMode;
+
+  // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
+  // [A+B + A*7] -> [B+A*8].
+  TestAddrMode.Scale += Scale;
+  TestAddrMode.ScaledReg = ScaleReg;
+
+  // If the new address isn't legal, bail out.
+  if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
+    return false;
+
+  // It was legal, so commit it.
+  AddrMode = TestAddrMode;
+  
+  // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
+  // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
+  // X*Scale + C*Scale to addr mode.
+  ConstantInt *CI = 0; Value *AddLHS = 0;
+  if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
+      match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+    TestAddrMode.ScaledReg = AddLHS;
+    TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
+      
+    // If this addressing mode is legal, commit it and remember that we folded
+    // this instruction.
+    if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
+      AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
+      AddrMode = TestAddrMode;
+      return true;
+    }
+  }
+
+  // Otherwise, not (x+c)*scale, just return what we have.
+  return true;
+}
+
+/// MightBeFoldableInst - This is a little filter, which returns true if an
+/// addressing computation involving I might be folded into a load/store
+/// accessing it.  This doesn't need to be perfect, but needs to accept at least
+/// the set of instructions that MatchOperationAddr can.
+static bool MightBeFoldableInst(Instruction *I) {
+  switch (I->getOpcode()) {
+  case Instruction::BitCast:
+    // Don't touch identity bitcasts.
+    if (I->getType() == I->getOperand(0)->getType())
+      return false;
+    return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
+  case Instruction::PtrToInt:
+    // PtrToInt is always a noop, as we know that the int type is pointer sized.
+    return true;
+  case Instruction::IntToPtr:
+    // We know the input is intptr_t, so this is foldable.
+    return true;
+  case Instruction::Add:
+    return true;
+  case Instruction::Mul:
+  case Instruction::Shl:
+    // Can only handle X*C and X << C.
+    return isa<ConstantInt>(I->getOperand(1));
+  case Instruction::GetElementPtr:
+    return true;
+  default:
+    return false;
+  }
+}
+
+
+/// MatchOperationAddr - Given an instruction or constant expr, see if we can
+/// fold the operation into the addressing mode.  If so, update the addressing
+/// mode and return true, otherwise return false without modifying AddrMode.
+bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
+                                               unsigned Depth) {
+  // Avoid exponential behavior on extremely deep expression trees.
+  if (Depth >= 5) return false;
+  
+  switch (Opcode) {
+  case Instruction::PtrToInt:
+    // PtrToInt is always a noop, as we know that the int type is pointer sized.
+    return MatchAddr(AddrInst->getOperand(0), Depth);
+  case Instruction::IntToPtr:
+    // This inttoptr is a no-op if the integer type is pointer sized.
+    if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
+        TLI.getPointerTy())
+      return MatchAddr(AddrInst->getOperand(0), Depth);
+    return false;
+  case Instruction::BitCast:
+    // BitCast is always a noop, and we can handle it as long as it is
+    // int->int or pointer->pointer (we don't want int<->fp or something).
+    if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
+         AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
+        // Don't touch identity bitcasts.  These were probably put here by LSR,
+        // and we don't want to mess around with them.  Assume it knows what it
+        // is doing.
+        AddrInst->getOperand(0)->getType() != AddrInst->getType())
+      return MatchAddr(AddrInst->getOperand(0), Depth);
+    return false;
+  case Instruction::Add: {
+    // Check to see if we can merge in the RHS then the LHS.  If so, we win.
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+    if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
+        MatchAddr(AddrInst->getOperand(0), Depth+1))
+      return true;
+    
+    // Restore the old addr mode info.
+    AddrMode = BackupAddrMode;
+    AddrModeInsts.resize(OldSize);
+    
+    // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
+    if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
+        MatchAddr(AddrInst->getOperand(1), Depth+1))
+      return true;
+    
+    // Otherwise we definitely can't merge the ADD in.
+    AddrMode = BackupAddrMode;
+    AddrModeInsts.resize(OldSize);
+    break;
+  }
+  //case Instruction::Or:
+  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
+  //break;
+  case Instruction::Mul:
+  case Instruction::Shl: {
+    // Can only handle X*C and X << C.
+    ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
+    if (!RHS) return false;
+    int64_t Scale = RHS->getSExtValue();
+    if (Opcode == Instruction::Shl)
+      Scale = 1LL << Scale;
+    
+    return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
+  }
+  case Instruction::GetElementPtr: {
+    // Scan the GEP.  We check it if it contains constant offsets and at most
+    // one variable offset.
+    int VariableOperand = -1;
+    unsigned VariableScale = 0;
+    
+    int64_t ConstantOffset = 0;
+    const TargetData *TD = TLI.getTargetData();
+    gep_type_iterator GTI = gep_type_begin(AddrInst);
+    for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
+      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+        const StructLayout *SL = TD->getStructLayout(STy);
+        unsigned Idx =
+          cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
+        ConstantOffset += SL->getElementOffset(Idx);
+      } else {
+        uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
+          ConstantOffset += CI->getSExtValue()*TypeSize;
+        } else if (TypeSize) {  // Scales of zero don't do anything.
+          // We only allow one variable index at the moment.
+          if (VariableOperand != -1)
+            return false;
+          
+          // Remember the variable index.
+          VariableOperand = i;
+          VariableScale = TypeSize;
+        }
+      }
+    }
+    
+    // A common case is for the GEP to only do a constant offset.  In this case,
+    // just add it to the disp field and check validity.
+    if (VariableOperand == -1) {
+      AddrMode.BaseOffs += ConstantOffset;
+      if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
+        // Check to see if we can fold the base pointer in too.
+        if (MatchAddr(AddrInst->getOperand(0), Depth+1))
+          return true;
+      }
+      AddrMode.BaseOffs -= ConstantOffset;
+      return false;
+    }
+
+    // Save the valid addressing mode in case we can't match.
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+
+    // See if the scale and offset amount is valid for this target.
+    AddrMode.BaseOffs += ConstantOffset;
+
+    // Match the base operand of the GEP.
+    if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
+      // If it couldn't be matched, just stuff the value in a register.
+      if (AddrMode.HasBaseReg) {
+        AddrMode = BackupAddrMode;
+        AddrModeInsts.resize(OldSize);
+        return false;
+      }
+      AddrMode.HasBaseReg = true;
+      AddrMode.BaseReg = AddrInst->getOperand(0);
+    }
+
+    // Match the remaining variable portion of the GEP.
+    if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
+                          Depth)) {
+      // If it couldn't be matched, try stuffing the base into a register
+      // instead of matching it, and retrying the match of the scale.
+      AddrMode = BackupAddrMode;
+      AddrModeInsts.resize(OldSize);
+      if (AddrMode.HasBaseReg)
+        return false;
+      AddrMode.HasBaseReg = true;
+      AddrMode.BaseReg = AddrInst->getOperand(0);
+      AddrMode.BaseOffs += ConstantOffset;
+      if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
+                            VariableScale, Depth)) {
+        // If even that didn't work, bail.
+        AddrMode = BackupAddrMode;
+        AddrModeInsts.resize(OldSize);
+        return false;
+      }
+    }
+
+    return true;
+  }
+  }
+  return false;
+}
+
+/// MatchAddr - If we can, try to add the value of 'Addr' into the current
+/// addressing mode.  If Addr can't be added to AddrMode this returns false and
+/// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
+/// or intptr_t for the target.
+///
+bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
+    // Fold in immediates if legal for the target.
+    AddrMode.BaseOffs += CI->getSExtValue();
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.BaseOffs -= CI->getSExtValue();
+  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
+    // If this is a global variable, try to fold it into the addressing mode.
+    if (AddrMode.BaseGV == 0) {
+      AddrMode.BaseGV = GV;
+      if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+        return true;
+      AddrMode.BaseGV = 0;
+    }
+  } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
+    ExtAddrMode BackupAddrMode = AddrMode;
+    unsigned OldSize = AddrModeInsts.size();
+
+    // Check to see if it is possible to fold this operation.
+    if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
+      // Okay, it's possible to fold this.  Check to see if it is actually
+      // *profitable* to do so.  We use a simple cost model to avoid increasing
+      // register pressure too much.
+      if (I->hasOneUse() ||
+          IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
+        AddrModeInsts.push_back(I);
+        return true;
+      }
+      
+      // It isn't profitable to do this, roll back.
+      //cerr << "NOT FOLDING: " << *I;
+      AddrMode = BackupAddrMode;
+      AddrModeInsts.resize(OldSize);
+    }
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
+    if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
+      return true;
+  } else if (isa<ConstantPointerNull>(Addr)) {
+    // Null pointer gets folded without affecting the addressing mode.
+    return true;
+  }
+
+  // Worse case, the target should support [reg] addressing modes. :)
+  if (!AddrMode.HasBaseReg) {
+    AddrMode.HasBaseReg = true;
+    AddrMode.BaseReg = Addr;
+    // Still check for legality in case the target supports [imm] but not [i+r].
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.HasBaseReg = false;
+    AddrMode.BaseReg = 0;
+  }
+
+  // If the base register is already taken, see if we can do [r+r].
+  if (AddrMode.Scale == 0) {
+    AddrMode.Scale = 1;
+    AddrMode.ScaledReg = Addr;
+    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
+      return true;
+    AddrMode.Scale = 0;
+    AddrMode.ScaledReg = 0;
+  }
+  // Couldn't match.
+  return false;
+}
+
+
+/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
+/// inline asm call are due to memory operands.  If so, return true, otherwise
+/// return false.
+static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
+                                    const TargetLowering &TLI) {
+  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
+  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
+    
+    // Compute the constraint code and ConstraintType to use.
+    TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
+    // If this asm operand is our Value*, and if it isn't an indirect memory
+    // operand, we can't fold it!
+    if (OpInfo.CallOperandVal == OpVal &&
+        (OpInfo.ConstraintType != TargetLowering::C_Memory ||
+         !OpInfo.isIndirect))
+      return false;
+  }
+
+  return true;
+}
+
+
+/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
+/// memory use.  If we find an obviously non-foldable instruction, return true.
+/// Add the ultimately found memory instructions to MemoryUses.
+static bool FindAllMemoryUses(Instruction *I,
+                SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
+                              SmallPtrSet<Instruction*, 16> &ConsideredInsts,
+                              const TargetLowering &TLI) {
+  // If we already considered this instruction, we're done.
+  if (!ConsideredInsts.insert(I))
+    return false;
+  
+  // If this is an obviously unfoldable instruction, bail out.
+  if (!MightBeFoldableInst(I))
+    return true;
+
+  // Loop over all the uses, recursively processing them.
+  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+       UI != E; ++UI) {
+    User *U = *UI;
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
+      continue;
+    }
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      unsigned opNo = UI.getOperandNo();
+      if (opNo == 0) return true; // Storing addr, not into addr.
+      MemoryUses.push_back(std::make_pair(SI, opNo));
+      continue;
+    }
+    
+    if (CallInst *CI = dyn_cast<CallInst>(U)) {
+      InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
+      if (!IA) return true;
+      
+      // If this is a memory operand, we're cool, otherwise bail out.
+      if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
+        return true;
+      continue;
+    }
+    
+    if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
+                          TLI))
+      return true;
+  }
+
+  return false;
+}
+
+
+/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
+/// the use site that we're folding it into.  If so, there is no cost to
+/// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
+/// that we know are live at the instruction already.
+bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
+                                                   Value *KnownLive2) {
+  // If Val is either of the known-live values, we know it is live!
+  if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
+    return true;
+  
+  // All values other than instructions and arguments (e.g. constants) are live.
+  if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
+  
+  // If Val is a constant sized alloca in the entry block, it is live, this is
+  // true because it is just a reference to the stack/frame pointer, which is
+  // live for the whole function.
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
+    if (AI->isStaticAlloca())
+      return true;
+  
+  // Check to see if this value is already used in the memory instruction's
+  // block.  If so, it's already live into the block at the very least, so we
+  // can reasonably fold it.
+  BasicBlock *MemBB = MemoryInst->getParent();
+  for (Value::use_iterator UI = Val->use_begin(), E = Val->use_end();
+       UI != E; ++UI)
+    // We know that uses of arguments and instructions have to be instructions.
+    if (cast<Instruction>(*UI)->getParent() == MemBB)
+      return true;
+  
+  return false;
+}
+
+
+
+/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
+/// mode of the machine to fold the specified instruction into a load or store
+/// that ultimately uses it.  However, the specified instruction has multiple
+/// uses.  Given this, it may actually increase register pressure to fold it
+/// into the load.  For example, consider this code:
+///
+///     X = ...
+///     Y = X+1
+///     use(Y)   -> nonload/store
+///     Z = Y+1
+///     load Z
+///
+/// In this case, Y has multiple uses, and can be folded into the load of Z
+/// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
+/// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
+/// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
+/// number of computations either.
+///
+/// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
+/// X was live across 'load Z' for other reasons, we actually *would* want to
+/// fold the addressing mode in the Z case.  This would make Y die earlier.
+bool AddressingModeMatcher::
+IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
+                                     ExtAddrMode &AMAfter) {
+  if (IgnoreProfitability) return true;
+  
+  // AMBefore is the addressing mode before this instruction was folded into it,
+  // and AMAfter is the addressing mode after the instruction was folded.  Get
+  // the set of registers referenced by AMAfter and subtract out those
+  // referenced by AMBefore: this is the set of values which folding in this
+  // address extends the lifetime of.
+  //
+  // Note that there are only two potential values being referenced here,
+  // BaseReg and ScaleReg (global addresses are always available, as are any
+  // folded immediates).
+  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
+  
+  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
+  // lifetime wasn't extended by adding this instruction.
+  if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+    BaseReg = 0;
+  if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
+    ScaledReg = 0;
+
+  // If folding this instruction (and it's subexprs) didn't extend any live
+  // ranges, we're ok with it.
+  if (BaseReg == 0 && ScaledReg == 0)
+    return true;
+
+  // If all uses of this instruction are ultimately load/store/inlineasm's,
+  // check to see if their addressing modes will include this instruction.  If
+  // so, we can fold it into all uses, so it doesn't matter if it has multiple
+  // uses.
+  SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
+  SmallPtrSet<Instruction*, 16> ConsideredInsts;
+  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
+    return false;  // Has a non-memory, non-foldable use!
+  
+  // Now that we know that all uses of this instruction are part of a chain of
+  // computation involving only operations that could theoretically be folded
+  // into a memory use, loop over each of these uses and see if they could
+  // *actually* fold the instruction.
+  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
+  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
+    Instruction *User = MemoryUses[i].first;
+    unsigned OpNo = MemoryUses[i].second;
+    
+    // Get the access type of this use.  If the use isn't a pointer, we don't
+    // know what it accesses.
+    Value *Address = User->getOperand(OpNo);
+    if (!Address->getType()->isPointerTy())
+      return false;
+    const Type *AddressAccessTy =
+      cast<PointerType>(Address->getType())->getElementType();
+    
+    // Do a match against the root of this address, ignoring profitability. This
+    // will tell us if the addressing mode for the memory operation will
+    // *actually* cover the shared instruction.
+    ExtAddrMode Result;
+    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
+                                  MemoryInst, Result);
+    Matcher.IgnoreProfitability = true;
+    bool Success = Matcher.MatchAddr(Address, 0);
+    (void)Success; assert(Success && "Couldn't select *anything*?");
+
+    // If the match didn't cover I, then it won't be shared by it.
+    if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
+                  I) == MatchedAddrModeInsts.end())
+      return false;
+    
+    MatchedAddrModeInsts.clear();
+  }
+  
+  return true;
+}
diff --git a/final/lib/Transforms/Utils/BasicBlockUtils.cpp b/final/lib/Transforms/Utils/BasicBlockUtils.cpp
new file mode 100644
index 00000000000..acaea195e71
--- /dev/null
+++ b/final/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -0,0 +1,540 @@
+//===-- BasicBlockUtils.cpp - BasicBlock Utilities -------------------------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform manipulations on basic blocks, and
+// instructions contained within basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Constant.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ValueHandle.h"
+#include <algorithm>
+using namespace llvm;
+
+/// DeleteDeadBlock - Delete the specified block, which must have no
+/// predecessors.
+void llvm::DeleteDeadBlock(BasicBlock *BB) {
+  assert((pred_begin(BB) == pred_end(BB) ||
+         // Can delete self loop.
+         BB->getSinglePredecessor() == BB) && "Block is not dead!");
+  TerminatorInst *BBTerm = BB->getTerminator();
+  
+  // Loop through all of our successors and make sure they know that one
+  // of their predecessors is going away.
+  for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i)
+    BBTerm->getSuccessor(i)->removePredecessor(BB);
+  
+  // Zap all the instructions in the block.
+  while (!BB->empty()) {
+    Instruction &I = BB->back();
+    // If this instruction is used, replace uses with an arbitrary value.
+    // Because control flow can't get here, we don't care what we replace the
+    // value with.  Note that since this block is unreachable, and all values
+    // contained within it must dominate their uses, that all uses will
+    // eventually be removed (they are themselves dead).
+    if (!I.use_empty())
+      I.replaceAllUsesWith(UndefValue::get(I.getType()));
+    BB->getInstList().pop_back();
+  }
+  
+  // Zap the block!
+  BB->eraseFromParent();
+}
+
+/// FoldSingleEntryPHINodes - We know that BB has one predecessor.  If there are
+/// any single-entry PHI nodes in it, fold them away.  This handles the case
+/// when all entries to the PHI nodes in a block are guaranteed equal, such as
+/// when the block has exactly one predecessor.
+void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, Pass *P) {
+  if (!isa<PHINode>(BB->begin())) return;
+  
+  AliasAnalysis *AA = 0;
+  MemoryDependenceAnalysis *MemDep = 0;
+  if (P) {
+    AA = P->getAnalysisIfAvailable<AliasAnalysis>();
+    MemDep = P->getAnalysisIfAvailable<MemoryDependenceAnalysis>();
+  }
+  
+  while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+    if (PN->getIncomingValue(0) != PN)
+      PN->replaceAllUsesWith(PN->getIncomingValue(0));
+    else
+      PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+    
+    if (MemDep)
+      MemDep->removeInstruction(PN);  // Memdep updates AA itself.
+    else if (AA && isa<PointerType>(PN->getType()))
+      AA->deleteValue(PN);
+    
+    PN->eraseFromParent();
+  }
+}
+
+
+/// DeleteDeadPHIs - Examine each PHI in the given block and delete it if it
+/// is dead. Also recursively delete any operands that become dead as
+/// a result. This includes tracing the def-use list from the PHI to see if
+/// it is ultimately unused or if it reaches an unused cycle.
+bool llvm::DeleteDeadPHIs(BasicBlock *BB) {
+  // Recursively deleting a PHI may cause multiple PHIs to be deleted
+  // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete.
+  SmallVector<WeakVH, 8> PHIs;
+  for (BasicBlock::iterator I = BB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    PHIs.push_back(PN);
+
+  bool Changed = false;
+  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+    if (PHINode *PN = dyn_cast_or_null<PHINode>(PHIs[i].operator Value*()))
+      Changed |= RecursivelyDeleteDeadPHINode(PN);
+
+  return Changed;
+}
+
+/// MergeBlockIntoPredecessor - Attempts to merge a block into its predecessor,
+/// if possible.  The return value indicates success or failure.
+bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
+  // Don't merge away blocks who have their address taken.
+  if (BB->hasAddressTaken()) return false;
+  
+  // Can't merge if there are multiple predecessors, or no predecessors.
+  BasicBlock *PredBB = BB->getUniquePredecessor();
+  if (!PredBB) return false;
+
+  // Don't break self-loops.
+  if (PredBB == BB) return false;
+  // Don't break invokes.
+  if (isa<InvokeInst>(PredBB->getTerminator())) return false;
+  
+  succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB));
+  BasicBlock *OnlySucc = BB;
+  for (; SI != SE; ++SI)
+    if (*SI != OnlySucc) {
+      OnlySucc = 0;     // There are multiple distinct successors!
+      break;
+    }
+  
+  // Can't merge if there are multiple successors.
+  if (!OnlySucc) return false;
+
+  // Can't merge if there is PHI loop.
+  for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+    if (PHINode *PN = dyn_cast<PHINode>(BI)) {
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (PN->getIncomingValue(i) == PN)
+          return false;
+    } else
+      break;
+  }
+
+  // Begin by getting rid of unneeded PHIs.
+  if (isa<PHINode>(BB->front()))
+    FoldSingleEntryPHINodes(BB, P);
+  
+  // Delete the unconditional branch from the predecessor...
+  PredBB->getInstList().pop_back();
+  
+  // Move all definitions in the successor to the predecessor...
+  PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+  
+  // Make all PHI nodes that referred to BB now refer to Pred as their
+  // source...
+  BB->replaceAllUsesWith(PredBB);
+  
+  // Inherit predecessors name if it exists.
+  if (!PredBB->hasName())
+    PredBB->takeName(BB);
+  
+  // Finally, erase the old block and update dominator info.
+  if (P) {
+    if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+      if (DomTreeNode *DTN = DT->getNode(BB)) {
+        DomTreeNode *PredDTN = DT->getNode(PredBB);
+        SmallVector<DomTreeNode*, 8> Children(DTN->begin(), DTN->end());
+        for (SmallVector<DomTreeNode*, 8>::iterator DI = Children.begin(),
+             DE = Children.end(); DI != DE; ++DI)
+          DT->changeImmediateDominator(*DI, PredDTN);
+
+        DT->eraseNode(BB);
+      }
+      
+      if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+        LI->removeBlock(BB);
+      
+      if (MemoryDependenceAnalysis *MD =
+            P->getAnalysisIfAvailable<MemoryDependenceAnalysis>())
+        MD->invalidateCachedPredecessors();
+    }
+  }
+  
+  BB->eraseFromParent();
+  return true;
+}
+
+/// ReplaceInstWithValue - Replace all uses of an instruction (specified by BI)
+/// with a value, then remove and delete the original instruction.
+///
+void llvm::ReplaceInstWithValue(BasicBlock::InstListType &BIL,
+                                BasicBlock::iterator &BI, Value *V) {
+  Instruction &I = *BI;
+  // Replaces all of the uses of the instruction with uses of the value
+  I.replaceAllUsesWith(V);
+
+  // Make sure to propagate a name if there is one already.
+  if (I.hasName() && !V->hasName())
+    V->takeName(&I);
+
+  // Delete the unnecessary instruction now...
+  BI = BIL.erase(BI);
+}
+
+
+/// ReplaceInstWithInst - Replace the instruction specified by BI with the
+/// instruction specified by I.  The original instruction is deleted and BI is
+/// updated to point to the new instruction.
+///
+void llvm::ReplaceInstWithInst(BasicBlock::InstListType &BIL,
+                               BasicBlock::iterator &BI, Instruction *I) {
+  assert(I->getParent() == 0 &&
+         "ReplaceInstWithInst: Instruction already inserted into basic block!");
+
+  // Insert the new instruction into the basic block...
+  BasicBlock::iterator New = BIL.insert(BI, I);
+
+  // Replace all uses of the old instruction, and delete it.
+  ReplaceInstWithValue(BIL, BI, I);
+
+  // Move BI back to point to the newly inserted instruction
+  BI = New;
+}
+
+/// ReplaceInstWithInst - Replace the instruction specified by From with the
+/// instruction specified by To.
+///
+void llvm::ReplaceInstWithInst(Instruction *From, Instruction *To) {
+  BasicBlock::iterator BI(From);
+  ReplaceInstWithInst(From->getParent()->getInstList(), BI, To);
+}
+
+/// GetSuccessorNumber - Search for the specified successor of basic block BB
+/// and return its position in the terminator instruction's list of
+/// successors.  It is an error to call this with a block that is not a
+/// successor.
+unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
+  TerminatorInst *Term = BB->getTerminator();
+#ifndef NDEBUG
+  unsigned e = Term->getNumSuccessors();
+#endif
+  for (unsigned i = 0; ; ++i) {
+    assert(i != e && "Didn't find edge?");
+    if (Term->getSuccessor(i) == Succ)
+      return i;
+  }
+  return 0;
+}
+
+/// SplitEdge -  Split the edge connecting specified block. Pass P must 
+/// not be NULL. 
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
+  unsigned SuccNum = GetSuccessorNumber(BB, Succ);
+  
+  // If this is a critical edge, let SplitCriticalEdge do it.
+  TerminatorInst *LatchTerm = BB->getTerminator();
+  if (SplitCriticalEdge(LatchTerm, SuccNum, P))
+    return LatchTerm->getSuccessor(SuccNum);
+
+  // If the edge isn't critical, then BB has a single successor or Succ has a
+  // single pred.  Split the block.
+  BasicBlock::iterator SplitPoint;
+  if (BasicBlock *SP = Succ->getSinglePredecessor()) {
+    // If the successor only has a single pred, split the top of the successor
+    // block.
+    assert(SP == BB && "CFG broken");
+    SP = NULL;
+    return SplitBlock(Succ, Succ->begin(), P);
+  }
+  
+  // Otherwise, if BB has a single successor, split it at the bottom of the
+  // block.
+  assert(BB->getTerminator()->getNumSuccessors() == 1 &&
+         "Should have a single succ!"); 
+  return SplitBlock(BB, BB->getTerminator(), P);
+}
+
+/// SplitBlock - Split the specified block at the specified instruction - every
+/// thing before SplitPt stays in Old and everything starting with SplitPt moves
+/// to a new block.  The two blocks are joined by an unconditional branch and
+/// the loop info is updated.
+///
+BasicBlock *llvm::SplitBlock(BasicBlock *Old, Instruction *SplitPt, Pass *P) {
+  BasicBlock::iterator SplitIt = SplitPt;
+  while (isa<PHINode>(SplitIt))
+    ++SplitIt;
+  BasicBlock *New = Old->splitBasicBlock(SplitIt, Old->getName()+".split");
+
+  // The new block lives in whichever loop the old one did. This preserves
+  // LCSSA as well, because we force the split point to be after any PHI nodes.
+  if (LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>())
+    if (Loop *L = LI->getLoopFor(Old))
+      L->addBasicBlockToLoop(New, LI->getBase());
+
+  if (DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>()) {
+    // Old dominates New. New node dominates all other nodes dominated by Old.
+    DomTreeNode *OldNode = DT->getNode(Old);
+    std::vector<DomTreeNode *> Children;
+    for (DomTreeNode::iterator I = OldNode->begin(), E = OldNode->end();
+         I != E; ++I) 
+      Children.push_back(*I);
+
+      DomTreeNode *NewNode = DT->addNewBlock(New,Old);
+      for (std::vector<DomTreeNode *>::iterator I = Children.begin(),
+             E = Children.end(); I != E; ++I) 
+        DT->changeImmediateDominator(*I, NewNode);
+  }
+
+  return New;
+}
+
+
+/// SplitBlockPredecessors - This method transforms BB by introducing a new
+/// basic block into the function, and moving some of the predecessors of BB to
+/// be predecessors of the new block.  The new predecessors are indicated by the
+/// Preds array, which has NumPreds elements in it.  The new block is given a
+/// suffix of 'Suffix'.
+///
+/// This currently updates the LLVM IR, AliasAnalysis, DominatorTree,
+/// LoopInfo, and LCCSA but no other analyses. In particular, it does not
+/// preserve LoopSimplify (because it's complicated to handle the case where one
+/// of the edges being split is an exit of a loop with other exits).
+///
+BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, 
+                                         BasicBlock *const *Preds,
+                                         unsigned NumPreds, const char *Suffix,
+                                         Pass *P) {
+  // Create new basic block, insert right before the original block.
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), BB->getName()+Suffix,
+                                         BB->getParent(), BB);
+  
+  // The new block unconditionally branches to the old block.
+  BranchInst *BI = BranchInst::Create(BB, NewBB);
+  
+  LoopInfo *LI = P ? P->getAnalysisIfAvailable<LoopInfo>() : 0;
+  Loop *L = LI ? LI->getLoopFor(BB) : 0;
+  bool PreserveLCSSA = P->mustPreserveAnalysisID(LCSSAID);
+
+  // Move the edges from Preds to point to NewBB instead of BB.
+  // While here, if we need to preserve loop analyses, collect
+  // some information about how this split will affect loops.
+  bool HasLoopExit = false;
+  bool IsLoopEntry = !!L;
+  bool SplitMakesNewLoopHeader = false;
+  for (unsigned i = 0; i != NumPreds; ++i) {
+    // This is slightly more strict than necessary; the minimum requirement
+    // is that there be no more than one indirectbr branching to BB. And
+    // all BlockAddress uses would need to be updated.
+    assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+           "Cannot split an edge from an IndirectBrInst");
+
+    Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
+
+    if (LI) {
+      // If we need to preserve LCSSA, determine if any of
+      // the preds is a loop exit.
+      if (PreserveLCSSA)
+        if (Loop *PL = LI->getLoopFor(Preds[i]))
+          if (!PL->contains(BB))
+            HasLoopExit = true;
+      // If we need to preserve LoopInfo, note whether any of the
+      // preds crosses an interesting loop boundary.
+      if (L) {
+        if (L->contains(Preds[i]))
+          IsLoopEntry = false;
+        else
+          SplitMakesNewLoopHeader = true;
+      }
+    }
+  }
+
+  // Update dominator tree if available.
+  DominatorTree *DT = P ? P->getAnalysisIfAvailable<DominatorTree>() : 0;
+  if (DT)
+    DT->splitBlock(NewBB);
+
+  // Insert a new PHI node into NewBB for every PHI node in BB and that new PHI
+  // node becomes an incoming value for BB's phi node.  However, if the Preds
+  // list is empty, we need to insert dummy entries into the PHI nodes in BB to
+  // account for the newly created predecessor.
+  if (NumPreds == 0) {
+    // Insert dummy values as the incoming value.
+    for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)
+      cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB);
+    return NewBB;
+  }
+
+  AliasAnalysis *AA = P ? P->getAnalysisIfAvailable<AliasAnalysis>() : 0;
+
+  if (L) {
+    if (IsLoopEntry) {
+      // Add the new block to the nearest enclosing loop (and not an
+      // adjacent loop). To find this, examine each of the predecessors and
+      // determine which loops enclose them, and select the most-nested loop
+      // which contains the loop containing the block being split.
+      Loop *InnermostPredLoop = 0;
+      for (unsigned i = 0; i != NumPreds; ++i)
+        if (Loop *PredLoop = LI->getLoopFor(Preds[i])) {
+          // Seek a loop which actually contains the block being split (to
+          // avoid adjacent loops).
+          while (PredLoop && !PredLoop->contains(BB))
+            PredLoop = PredLoop->getParentLoop();
+          // Select the most-nested of these loops which contains the block.
+          if (PredLoop &&
+              PredLoop->contains(BB) &&
+              (!InnermostPredLoop ||
+               InnermostPredLoop->getLoopDepth() < PredLoop->getLoopDepth()))
+            InnermostPredLoop = PredLoop;
+        }
+      if (InnermostPredLoop)
+        InnermostPredLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+    } else {
+      L->addBasicBlockToLoop(NewBB, LI->getBase());
+      if (SplitMakesNewLoopHeader)
+        L->moveToHeader(NewBB);
+    }
+  }
+  
+  // Otherwise, create a new PHI node in NewBB for each PHI node in BB.
+  for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ) {
+    PHINode *PN = cast<PHINode>(I++);
+    
+    // Check to see if all of the values coming in are the same.  If so, we
+    // don't need to create a new PHI node, unless it's needed for LCSSA.
+    Value *InVal = 0;
+    if (!HasLoopExit) {
+      InVal = PN->getIncomingValueForBlock(Preds[0]);
+      for (unsigned i = 1; i != NumPreds; ++i)
+        if (InVal != PN->getIncomingValueForBlock(Preds[i])) {
+          InVal = 0;
+          break;
+        }
+    }
+
+    if (InVal) {
+      // If all incoming values for the new PHI would be the same, just don't
+      // make a new PHI.  Instead, just remove the incoming values from the old
+      // PHI.
+      for (unsigned i = 0; i != NumPreds; ++i)
+        PN->removeIncomingValue(Preds[i], false);
+    } else {
+      // If the values coming into the block are not the same, we need a PHI.
+      // Create the new PHI node, insert it into NewBB at the end of the block
+      PHINode *NewPHI =
+        PHINode::Create(PN->getType(), PN->getName()+".ph", BI);
+      if (AA) AA->copyValue(PN, NewPHI);
+      
+      // Move all of the PHI values for 'Preds' to the new PHI.
+      for (unsigned i = 0; i != NumPreds; ++i) {
+        Value *V = PN->removeIncomingValue(Preds[i], false);
+        NewPHI->addIncoming(V, Preds[i]);
+      }
+      InVal = NewPHI;
+    }
+    
+    // Add an incoming value to the PHI node in the loop for the preheader
+    // edge.
+    PN->addIncoming(InVal, NewBB);
+  }
+  
+  return NewBB;
+}
+
+/// FindFunctionBackedges - Analyze the specified function to find all of the
+/// loop backedges in the function and return them.  This is a relatively cheap
+/// (compared to computing dominators and loop info) analysis.
+///
+/// The output is added to Result, as pairs of <from,to> edge info.
+void llvm::FindFunctionBackedges(const Function &F,
+     SmallVectorImpl<std::pair<const BasicBlock*,const BasicBlock*> > &Result) {
+  const BasicBlock *BB = &F.getEntryBlock();
+  if (succ_begin(BB) == succ_end(BB))
+    return;
+  
+  SmallPtrSet<const BasicBlock*, 8> Visited;
+  SmallVector<std::pair<const BasicBlock*, succ_const_iterator>, 8> VisitStack;
+  SmallPtrSet<const BasicBlock*, 8> InStack;
+  
+  Visited.insert(BB);
+  VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+  InStack.insert(BB);
+  do {
+    std::pair<const BasicBlock*, succ_const_iterator> &Top = VisitStack.back();
+    const BasicBlock *ParentBB = Top.first;
+    succ_const_iterator &I = Top.second;
+    
+    bool FoundNew = false;
+    while (I != succ_end(ParentBB)) {
+      BB = *I++;
+      if (Visited.insert(BB)) {
+        FoundNew = true;
+        break;
+      }
+      // Successor is in VisitStack, it's a back edge.
+      if (InStack.count(BB))
+        Result.push_back(std::make_pair(ParentBB, BB));
+    }
+    
+    if (FoundNew) {
+      // Go down one level if there is a unvisited successor.
+      InStack.insert(BB);
+      VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+    } else {
+      // Go up one level.
+      InStack.erase(VisitStack.pop_back_val().first);
+    }
+  } while (!VisitStack.empty()); 
+}
+
+/// FoldReturnIntoUncondBranch - This method duplicates the specified return
+/// instruction into a predecessor which ends in an unconditional branch. If
+/// the return instruction returns a value defined by a PHI, propagate the
+/// right value into the return. It returns the new return instruction in the
+/// predecessor.
+ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
+                                             BasicBlock *Pred) {
+  Instruction *UncondBranch = Pred->getTerminator();
+  // Clone the return and add it to the end of the predecessor.
+  Instruction *NewRet = RI->clone();
+  Pred->getInstList().push_back(NewRet);
+      
+  // If the return instruction returns a value, and if the value was a
+  // PHI node in "BB", propagate the right value into the return.
+  for (User::op_iterator i = NewRet->op_begin(), e = NewRet->op_end();
+       i != e; ++i)
+    if (PHINode *PN = dyn_cast<PHINode>(*i))
+      if (PN->getParent() == BB)
+        *i = PN->getIncomingValueForBlock(Pred);
+      
+  // Update any PHI nodes in the returning block to realize that we no
+  // longer branch to them.
+  BB->removePredecessor(Pred);
+  UncondBranch->eraseFromParent();
+  return cast<ReturnInst>(NewRet);
+}
diff --git a/final/lib/Transforms/Utils/BasicInliner.cpp b/final/lib/Transforms/Utils/BasicInliner.cpp
new file mode 100644
index 00000000000..23a30cc5850
--- /dev/null
+++ b/final/lib/Transforms/Utils/BasicInliner.cpp
@@ -0,0 +1,182 @@
+//===- BasicInliner.cpp - Basic function level inliner --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a simple function based inliner that does not use
+// call graph information. 
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "basicinliner"
+#include "llvm/Module.h"
+#include "llvm/Function.h"
+#include "llvm/Transforms/Utils/BasicInliner.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<unsigned>     
+BasicInlineThreshold("basic-inline-threshold", cl::Hidden, cl::init(200),
+   cl::desc("Control the amount of basic inlining to perform (default = 200)"));
+
+namespace llvm {
+
+  /// BasicInlinerImpl - BasicInliner implemantation class. This hides
+  /// container info, used by basic inliner, from public interface.
+  struct BasicInlinerImpl {
+    
+    BasicInlinerImpl(const BasicInlinerImpl&); // DO NOT IMPLEMENT
+    void operator=(const BasicInlinerImpl&); // DO NO IMPLEMENT
+  public:
+    BasicInlinerImpl(TargetData *T) : TD(T) {}
+
+    /// addFunction - Add function into the list of functions to process.
+    /// All functions must be inserted using this interface before invoking
+    /// inlineFunctions().
+    void addFunction(Function *F) {
+      Functions.push_back(F);
+    }
+
+    /// neverInlineFunction - Sometimes a function is never to be inlined 
+    /// because of one or other reason. 
+    void neverInlineFunction(Function *F) {
+      NeverInline.insert(F);
+    }
+
+    /// inlineFuctions - Walk all call sites in all functions supplied by
+    /// client. Inline as many call sites as possible. Delete completely
+    /// inlined functions.
+    void inlineFunctions();
+    
+  private:
+    TargetData *TD;
+    std::vector<Function *> Functions;
+    SmallPtrSet<const Function *, 16> NeverInline;
+    SmallPtrSet<Function *, 8> DeadFunctions;
+    InlineCostAnalyzer CA;
+  };
+
+/// inlineFuctions - Walk all call sites in all functions supplied by
+/// client. Inline as many call sites as possible. Delete completely
+/// inlined functions.
+void BasicInlinerImpl::inlineFunctions() {
+      
+  // Scan through and identify all call sites ahead of time so that we only
+  // inline call sites in the original functions, not call sites that result
+  // from inlining other functions.
+  std::vector<CallSite> CallSites;
+  
+  for (std::vector<Function *>::iterator FI = Functions.begin(),
+         FE = Functions.end(); FI != FE; ++FI) {
+    Function *F = *FI;
+    for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+        CallSite CS(cast<Value>(I));
+        if (CS && CS.getCalledFunction()
+            && !CS.getCalledFunction()->isDeclaration())
+          CallSites.push_back(CS);
+      }
+  }
+  
+  DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
+  
+  // Inline call sites.
+  bool Changed = false;
+  do {
+    Changed = false;
+    for (unsigned index = 0; index != CallSites.size() && !CallSites.empty(); 
+         ++index) {
+      CallSite CS = CallSites[index];
+      if (Function *Callee = CS.getCalledFunction()) {
+        
+        // Eliminate calls that are never inlinable.
+        if (Callee->isDeclaration() ||
+            CS.getInstruction()->getParent()->getParent() == Callee) {
+          CallSites.erase(CallSites.begin() + index);
+          --index;
+          continue;
+        }
+        InlineCost IC = CA.getInlineCost(CS, NeverInline);
+        if (IC.isAlways()) {        
+          DEBUG(dbgs() << "  Inlining: cost=always"
+                       <<", call: " << *CS.getInstruction());
+        } else if (IC.isNever()) {
+          DEBUG(dbgs() << "  NOT Inlining: cost=never"
+                       <<", call: " << *CS.getInstruction());
+          continue;
+        } else {
+          int Cost = IC.getValue();
+          
+          if (Cost >= (int) BasicInlineThreshold) {
+            DEBUG(dbgs() << "  NOT Inlining: cost = " << Cost
+                         << ", call: " <<  *CS.getInstruction());
+            continue;
+          } else {
+            DEBUG(dbgs() << "  Inlining: cost = " << Cost
+                         << ", call: " <<  *CS.getInstruction());
+          }
+        }
+        
+        // Inline
+        InlineFunctionInfo IFI(0, TD);
+        if (InlineFunction(CS, IFI)) {
+          if (Callee->use_empty() && (Callee->hasLocalLinkage() ||
+                                      Callee->hasAvailableExternallyLinkage()))
+            DeadFunctions.insert(Callee);
+          Changed = true;
+          CallSites.erase(CallSites.begin() + index);
+          --index;
+        }
+      }
+    }
+  } while (Changed);
+  
+  // Remove completely inlined functions from module.
+  for(SmallPtrSet<Function *, 8>::iterator I = DeadFunctions.begin(),
+        E = DeadFunctions.end(); I != E; ++I) {
+    Function *D = *I;
+    Module *M = D->getParent();
+    M->getFunctionList().remove(D);
+  }
+}
+
+BasicInliner::BasicInliner(TargetData *TD) {
+  Impl = new BasicInlinerImpl(TD);
+}
+
+BasicInliner::~BasicInliner() {
+  delete Impl;
+}
+
+/// addFunction - Add function into the list of functions to process.
+/// All functions must be inserted using this interface before invoking
+/// inlineFunctions().
+void BasicInliner::addFunction(Function *F) {
+  Impl->addFunction(F);
+}
+
+/// neverInlineFunction - Sometimes a function is never to be inlined because
+/// of one or other reason. 
+void BasicInliner::neverInlineFunction(Function *F) {
+  Impl->neverInlineFunction(F);
+}
+
+/// inlineFuctions - Walk all call sites in all functions supplied by
+/// client. Inline as many call sites as possible. Delete completely
+/// inlined functions.
+void BasicInliner::inlineFunctions() {
+  Impl->inlineFunctions();
+}
+
+}
diff --git a/final/lib/Transforms/Utils/BreakCriticalEdges.cpp b/final/lib/Transforms/Utils/BreakCriticalEdges.cpp
new file mode 100644
index 00000000000..616b066b5ab
--- /dev/null
+++ b/final/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -0,0 +1,407 @@
+//===- BreakCriticalEdges.cpp - Critical Edge Elimination Pass ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BreakCriticalEdges pass - Break all of the critical edges in the CFG by
+// inserting a dummy basic block.  This pass may be "required" by passes that
+// cannot deal with critical edges.  For this usage, the structure type is
+// forward declared.  This pass obviously invalidates the CFG, but can update
+// dominator trees.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "break-crit-edges"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumBroken, "Number of blocks inserted");
+
+namespace {
+  struct BreakCriticalEdges : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    BreakCriticalEdges() : FunctionPass(ID) {
+      initializeBreakCriticalEdgesPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addPreserved<DominatorTree>();
+      AU.addPreserved<LoopInfo>();
+      AU.addPreserved<ProfileInfo>();
+
+      // No loop canonicalization guarantees are broken by this pass.
+      AU.addPreservedID(LoopSimplifyID);
+    }
+  };
+}
+
+char BreakCriticalEdges::ID = 0;
+INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges",
+                "Break critical edges in CFG", false, false)
+
+// Publically exposed interface to pass...
+char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID;
+FunctionPass *llvm::createBreakCriticalEdgesPass() {
+  return new BreakCriticalEdges();
+}
+
+// runOnFunction - Loop over all of the edges in the CFG, breaking critical
+// edges as they are found.
+//
+bool BreakCriticalEdges::runOnFunction(Function &F) {
+  bool Changed = false;
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+    TerminatorInst *TI = I->getTerminator();
+    if (TI->getNumSuccessors() > 1 && !isa<IndirectBrInst>(TI))
+      for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+        if (SplitCriticalEdge(TI, i, this)) {
+          ++NumBroken;
+          Changed = true;
+        }
+  }
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+//    Implementation of the external critical edge manipulation functions
+//===----------------------------------------------------------------------===//
+
+// isCriticalEdge - Return true if the specified edge is a critical edge.
+// Critical edges are edges from a block with multiple successors to a block
+// with multiple predecessors.
+//
+bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
+                          bool AllowIdenticalEdges) {
+  assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!");
+  if (TI->getNumSuccessors() == 1) return false;
+
+  const BasicBlock *Dest = TI->getSuccessor(SuccNum);
+  const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest);
+
+  // If there is more than one predecessor, this is a critical edge...
+  assert(I != E && "No preds, but we have an edge to the block?");
+  const BasicBlock *FirstPred = *I;
+  ++I;        // Skip one edge due to the incoming arc from TI.
+  if (!AllowIdenticalEdges)
+    return I != E;
+  
+  // If AllowIdenticalEdges is true, then we allow this edge to be considered
+  // non-critical iff all preds come from TI's block.
+  while (I != E) {
+    const BasicBlock *P = *I;
+    if (P != FirstPred)
+      return true;
+    // Note: leave this as is until no one ever compiles with either gcc 4.0.1
+    // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207
+    E = pred_end(P);
+    ++I;
+  }
+  return false;
+}
+
+/// CreatePHIsForSplitLoopExit - When a loop exit edge is split, LCSSA form
+/// may require new PHIs in the new exit block. This function inserts the
+/// new PHIs, as needed.  Preds is a list of preds inside the loop, SplitBB
+/// is the new loop exit block, and DestBB is the old loop exit, now the
+/// successor of SplitBB.
+static void CreatePHIsForSplitLoopExit(SmallVectorImpl<BasicBlock *> &Preds,
+                                       BasicBlock *SplitBB,
+                                       BasicBlock *DestBB) {
+  // SplitBB shouldn't have anything non-trivial in it yet.
+  assert(SplitBB->getFirstNonPHI() == SplitBB->getTerminator() &&
+         "SplitBB has non-PHI nodes!");
+
+  // For each PHI in the destination block...
+  for (BasicBlock::iterator I = DestBB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+    unsigned Idx = PN->getBasicBlockIndex(SplitBB);
+    Value *V = PN->getIncomingValue(Idx);
+    // If the input is a PHI which already satisfies LCSSA, don't create
+    // a new one.
+    if (const PHINode *VP = dyn_cast<PHINode>(V))
+      if (VP->getParent() == SplitBB)
+        continue;
+    // Otherwise a new PHI is needed. Create one and populate it.
+    PHINode *NewPN = PHINode::Create(PN->getType(), "split",
+                                     SplitBB->getTerminator());
+    for (unsigned i = 0, e = Preds.size(); i != e; ++i)
+      NewPN->addIncoming(V, Preds[i]);
+    // Update the original PHI.
+    PN->setIncomingValue(Idx, NewPN);
+  }
+}
+
+/// SplitCriticalEdge - If this edge is a critical edge, insert a new node to
+/// split the critical edge.  This will update DominatorTree information if it
+/// is available, thus calling this pass will not invalidate either of them.
+/// This returns the new block if the edge was split, null otherwise.
+///
+/// If MergeIdenticalEdges is true (not the default), *all* edges from TI to the
+/// specified successor will be merged into the same critical edge block.  
+/// This is most commonly interesting with switch instructions, which may 
+/// have many edges to any one destination.  This ensures that all edges to that
+/// dest go to one block instead of each going to a different block, but isn't 
+/// the standard definition of a "critical edge".
+///
+/// It is invalid to call this function on a critical edge that starts at an
+/// IndirectBrInst.  Splitting these edges will almost always create an invalid
+/// program because the address of the new block won't be the one that is jumped
+/// to.
+///
+BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
+                                    Pass *P, bool MergeIdenticalEdges) {
+  if (!isCriticalEdge(TI, SuccNum, MergeIdenticalEdges)) return 0;
+  
+  assert(!isa<IndirectBrInst>(TI) &&
+         "Cannot split critical edge from IndirectBrInst");
+  
+  BasicBlock *TIBB = TI->getParent();
+  BasicBlock *DestBB = TI->getSuccessor(SuccNum);
+
+  // Create a new basic block, linking it into the CFG.
+  BasicBlock *NewBB = BasicBlock::Create(TI->getContext(),
+                      TIBB->getName() + "." + DestBB->getName() + "_crit_edge");
+  // Create our unconditional branch.
+  BranchInst::Create(DestBB, NewBB);
+
+  // Branch to the new block, breaking the edge.
+  TI->setSuccessor(SuccNum, NewBB);
+
+  // Insert the block into the function... right after the block TI lives in.
+  Function &F = *TIBB->getParent();
+  Function::iterator FBBI = TIBB;
+  F.getBasicBlockList().insert(++FBBI, NewBB);
+  
+  // If there are any PHI nodes in DestBB, we need to update them so that they
+  // merge incoming values from NewBB instead of from TIBB.
+  if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) {
+    // This conceptually does:
+    //  foreach (PHINode *PN in DestBB)
+    //    PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB);
+    // but is optimized for two cases.
+    
+    if (APHI->getNumIncomingValues() <= 8) {  // Small # preds case.
+      unsigned BBIdx = 0;
+      for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+        // We no longer enter through TIBB, now we come in through NewBB.
+        // Revector exactly one entry in the PHI node that used to come from
+        // TIBB to come from NewBB.
+        PHINode *PN = cast<PHINode>(I);
+        
+        // Reuse the previous value of BBIdx if it lines up.  In cases where we
+        // have multiple phi nodes with *lots* of predecessors, this is a speed
+        // win because we don't have to scan the PHI looking for TIBB.  This
+        // happens because the BB list of PHI nodes are usually in the same
+        // order.
+        if (PN->getIncomingBlock(BBIdx) != TIBB)
+          BBIdx = PN->getBasicBlockIndex(TIBB);
+        PN->setIncomingBlock(BBIdx, NewBB);
+      }
+    } else {
+      // However, the foreach loop is slow for blocks with lots of predecessors
+      // because PHINode::getIncomingBlock is O(n) in # preds.  Instead, walk
+      // the user list of TIBB to find the PHI nodes.
+      SmallPtrSet<PHINode*, 16> UpdatedPHIs;
+    
+      for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end();
+           UI != E; ) {
+        Value::use_iterator Use = UI++;
+        if (PHINode *PN = dyn_cast<PHINode>(*Use)) {
+          // Remove one entry from each PHI.
+          if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN))
+            PN->setOperand(Use.getOperandNo(), NewBB);
+        }
+      }
+    }
+  }
+   
+  // If there are any other edges from TIBB to DestBB, update those to go
+  // through the split block, making those edges non-critical as well (and
+  // reducing the number of phi entries in the DestBB if relevant).
+  if (MergeIdenticalEdges) {
+    for (unsigned i = SuccNum+1, e = TI->getNumSuccessors(); i != e; ++i) {
+      if (TI->getSuccessor(i) != DestBB) continue;
+      
+      // Remove an entry for TIBB from DestBB phi nodes.
+      DestBB->removePredecessor(TIBB);
+      
+      // We found another edge to DestBB, go to NewBB instead.
+      TI->setSuccessor(i, NewBB);
+    }
+  }
+  
+  
+
+  // If we don't have a pass object, we can't update anything...
+  if (P == 0) return NewBB;
+  
+  DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+  LoopInfo *LI = P->getAnalysisIfAvailable<LoopInfo>();
+  ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+  
+  // If we have nothing to update, just return.
+  if (DT == 0 && LI == 0 && PI == 0)
+    return NewBB;
+
+  // Now update analysis information.  Since the only predecessor of NewBB is
+  // the TIBB, TIBB clearly dominates NewBB.  TIBB usually doesn't dominate
+  // anything, as there are other successors of DestBB.  However, if all other
+  // predecessors of DestBB are already dominated by DestBB (e.g. DestBB is a
+  // loop header) then NewBB dominates DestBB.
+  SmallVector<BasicBlock*, 8> OtherPreds;
+
+  // If there is a PHI in the block, loop over predecessors with it, which is
+  // faster than iterating pred_begin/end.
+  if (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingBlock(i) != NewBB)
+        OtherPreds.push_back(PN->getIncomingBlock(i));
+  } else {
+    for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB);
+         I != E; ++I) {
+      BasicBlock *P = *I;
+      if (P != NewBB)
+        OtherPreds.push_back(P);
+    }
+  }
+
+  bool NewBBDominatesDestBB = true;
+  
+  // Should we update DominatorTree information?
+  if (DT) {
+    DomTreeNode *TINode = DT->getNode(TIBB);
+
+    // The new block is not the immediate dominator for any other nodes, but
+    // TINode is the immediate dominator for the new node.
+    //
+    if (TINode) {       // Don't break unreachable code!
+      DomTreeNode *NewBBNode = DT->addNewBlock(NewBB, TIBB);
+      DomTreeNode *DestBBNode = 0;
+     
+      // If NewBBDominatesDestBB hasn't been computed yet, do so with DT.
+      if (!OtherPreds.empty()) {
+        DestBBNode = DT->getNode(DestBB);
+        while (!OtherPreds.empty() && NewBBDominatesDestBB) {
+          if (DomTreeNode *OPNode = DT->getNode(OtherPreds.back()))
+            NewBBDominatesDestBB = DT->dominates(DestBBNode, OPNode);
+          OtherPreds.pop_back();
+        }
+        OtherPreds.clear();
+      }
+      
+      // If NewBBDominatesDestBB, then NewBB dominates DestBB, otherwise it
+      // doesn't dominate anything.
+      if (NewBBDominatesDestBB) {
+        if (!DestBBNode) DestBBNode = DT->getNode(DestBB);
+        DT->changeImmediateDominator(DestBBNode, NewBBNode);
+      }
+    }
+  }
+
+  // Update LoopInfo if it is around.
+  if (LI) {
+    if (Loop *TIL = LI->getLoopFor(TIBB)) {
+      // If one or the other blocks were not in a loop, the new block is not
+      // either, and thus LI doesn't need to be updated.
+      if (Loop *DestLoop = LI->getLoopFor(DestBB)) {
+        if (TIL == DestLoop) {
+          // Both in the same loop, the NewBB joins loop.
+          DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+        } else if (TIL->contains(DestLoop)) {
+          // Edge from an outer loop to an inner loop.  Add to the outer loop.
+          TIL->addBasicBlockToLoop(NewBB, LI->getBase());
+        } else if (DestLoop->contains(TIL)) {
+          // Edge from an inner loop to an outer loop.  Add to the outer loop.
+          DestLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+        } else {
+          // Edge from two loops with no containment relation.  Because these
+          // are natural loops, we know that the destination block must be the
+          // header of its loop (adding a branch into a loop elsewhere would
+          // create an irreducible loop).
+          assert(DestLoop->getHeader() == DestBB &&
+                 "Should not create irreducible loops!");
+          if (Loop *P = DestLoop->getParentLoop())
+            P->addBasicBlockToLoop(NewBB, LI->getBase());
+        }
+      }
+      // If TIBB is in a loop and DestBB is outside of that loop, split the
+      // other exit blocks of the loop that also have predecessors outside
+      // the loop, to maintain a LoopSimplify guarantee.
+      if (!TIL->contains(DestBB) &&
+          P->mustPreserveAnalysisID(LoopSimplifyID)) {
+        assert(!TIL->contains(NewBB) &&
+               "Split point for loop exit is contained in loop!");
+
+        // Update LCSSA form in the newly created exit block.
+        if (P->mustPreserveAnalysisID(LCSSAID)) {
+          SmallVector<BasicBlock *, 1> OrigPred;
+          OrigPred.push_back(TIBB);
+          CreatePHIsForSplitLoopExit(OrigPred, NewBB, DestBB);
+        }
+
+        // For each unique exit block...
+        SmallVector<BasicBlock *, 4> ExitBlocks;
+        TIL->getExitBlocks(ExitBlocks);
+        for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
+          // Collect all the preds that are inside the loop, and note
+          // whether there are any preds outside the loop.
+          SmallVector<BasicBlock *, 4> Preds;
+          bool HasPredOutsideOfLoop = false;
+          BasicBlock *Exit = ExitBlocks[i];
+          for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
+               I != E; ++I) {
+            BasicBlock *P = *I;
+            if (TIL->contains(P))
+              Preds.push_back(P);
+            else
+              HasPredOutsideOfLoop = true;
+          }
+          // If there are any preds not in the loop, we'll need to split
+          // the edges. The Preds.empty() check is needed because a block
+          // may appear multiple times in the list. We can't use
+          // getUniqueExitBlocks above because that depends on LoopSimplify
+          // form, which we're in the process of restoring!
+          if (!Preds.empty() && HasPredOutsideOfLoop) {
+            BasicBlock *NewExitBB =
+              SplitBlockPredecessors(Exit, Preds.data(), Preds.size(),
+                                     "split", P);
+            if (P->mustPreserveAnalysisID(LCSSAID))
+              CreatePHIsForSplitLoopExit(Preds, NewExitBB, Exit);
+          }
+        }
+      }
+      // LCSSA form was updated above for the case where LoopSimplify is
+      // available, which means that all predecessors of loop exit blocks
+      // are within the loop. Without LoopSimplify form, it would be
+      // necessary to insert a new phi.
+      assert((!P->mustPreserveAnalysisID(LCSSAID) ||
+              P->mustPreserveAnalysisID(LoopSimplifyID)) &&
+             "SplitCriticalEdge doesn't know how to update LCCSA form "
+             "without LoopSimplify!");
+    }
+  }
+
+  // Update ProfileInfo if it is around.
+  if (PI)
+    PI->splitEdge(TIBB, DestBB, NewBB, MergeIdenticalEdges);
+
+  return NewBB;
+}
diff --git a/final/lib/Transforms/Utils/BuildLibCalls.cpp b/final/lib/Transforms/Utils/BuildLibCalls.cpp
new file mode 100644
index 00000000000..4a90751936b
--- /dev/null
+++ b/final/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -0,0 +1,483 @@
+//===- BuildLibCalls.cpp - Utility builder for libcalls -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some functions that will create standard C libcalls.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Type.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Intrinsics.h"
+
+using namespace llvm;
+
+/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
+Value *llvm::CastToCStr(Value *V, IRBuilder<> &B) {
+  return B.CreateBitCast(V, B.getInt8PtrTy(), "cstr");
+}
+
+/// EmitStrLen - Emit a call to the strlen function to the builder, for the
+/// specified pointer.  This always returns an integer value of size intptr_t.
+Value *llvm::EmitStrLen(Value *Ptr, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+                                   Attribute::NoUnwind);
+
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Constant *StrLen = M->getOrInsertFunction("strlen", AttrListPtr::get(AWI, 2),
+                                            TD->getIntPtrType(Context),
+                                            B.getInt8PtrTy(),
+                                            NULL);
+  CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+  if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitStrChr - Emit a call to the strchr function to the builder, for the
+/// specified pointer and character.  Ptr is required to be some pointer type,
+/// and the return value has 'i8*' type.
+Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
+                        const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI =
+    AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+
+  const Type *I8Ptr = B.getInt8PtrTy();
+  const Type *I32Ty = B.getInt32Ty();
+  Constant *StrChr = M->getOrInsertFunction("strchr", AttrListPtr::get(&AWI, 1),
+                                            I8Ptr, I8Ptr, I32Ty, NULL);
+  CallInst *CI = B.CreateCall2(StrChr, CastToCStr(Ptr, B),
+                               ConstantInt::get(I32Ty, C), "strchr");
+  if (const Function *F = dyn_cast<Function>(StrChr->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitStrNCmp - Emit a call to the strncmp function to the builder.
+Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
+                         IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+                                   Attribute::NoUnwind);
+
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI, 3),
+                                          B.getInt32Ty(),
+                                          B.getInt8PtrTy(),
+                                          B.getInt8PtrTy(),
+                                          TD->getIntPtrType(Context), NULL);
+  CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B),
+                               CastToCStr(Ptr2, B), Len, "strncmp");
+
+  if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
+                        const TargetData *TD, StringRef Name) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  const Type *I8Ptr = B.getInt8PtrTy();
+  Value *StrCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2),
+                                         I8Ptr, I8Ptr, I8Ptr, NULL);
+  CallInst *CI = B.CreateCall2(StrCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+                               Name);
+  if (const Function *F = dyn_cast<Function>(StrCpy->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitStrNCpy - Emit a call to the strncpy function to the builder, for the
+/// specified pointer arguments.
+Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
+                         IRBuilder<> &B, const TargetData *TD, StringRef Name) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  const Type *I8Ptr = B.getInt8PtrTy();
+  Value *StrNCpy = M->getOrInsertFunction(Name, AttrListPtr::get(AWI, 2),
+                                          I8Ptr, I8Ptr, I8Ptr,
+                                          Len->getType(), NULL);
+  CallInst *CI = B.CreateCall3(StrNCpy, CastToCStr(Dst, B), CastToCStr(Src, B),
+                               Len, "strncpy");
+  if (const Function *F = dyn_cast<Function>(StrNCpy->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
+/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
+/// are pointers.
+Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
+                           IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI;
+  AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
+                                         AttrListPtr::get(&AWI, 1),
+                                         B.getInt8PtrTy(),
+                                         B.getInt8PtrTy(),
+                                         B.getInt8PtrTy(),
+                                         TD->getIntPtrType(Context),
+                                         TD->getIntPtrType(Context), NULL);
+  Dst = CastToCStr(Dst, B);
+  Src = CastToCStr(Src, B);
+  CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize);
+  if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
+/// a pointer, Val is an i32 value, and Len is an 'intptr_t' value.
+Value *llvm::EmitMemChr(Value *Ptr, Value *Val,
+                        Value *Len, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI;
+  AWI = AttributeWithIndex::get(~0u, Attribute::ReadOnly | Attribute::NoUnwind);
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Value *MemChr = M->getOrInsertFunction("memchr", AttrListPtr::get(&AWI, 1),
+                                         B.getInt8PtrTy(),
+                                         B.getInt8PtrTy(),
+                                         B.getInt32Ty(),
+                                         TD->getIntPtrType(Context),
+                                         NULL);
+  CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+
+  if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitMemCmp - Emit a call to the memcmp function.
+Value *llvm::EmitMemCmp(Value *Ptr1, Value *Ptr2,
+                        Value *Len, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+                                   Attribute::NoUnwind);
+
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Value *MemCmp = M->getOrInsertFunction("memcmp", AttrListPtr::get(AWI, 3),
+                                         B.getInt32Ty(),
+                                         B.getInt8PtrTy(),
+                                         B.getInt8PtrTy(),
+                                         TD->getIntPtrType(Context), NULL);
+  CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
+                               Len, "memcmp");
+
+  if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitUnaryFloatFnCall - Emit a call to the unary function named 'Name' (e.g.
+/// 'floor').  This function is known to take a single of type matching 'Op' and
+/// returns one value with the same type.  If 'Op' is a long double, 'l' is
+/// added as the suffix of name, if 'Op' is a float, we add a 'f' suffix.
+Value *llvm::EmitUnaryFloatFnCall(Value *Op, const char *Name,
+                                  IRBuilder<> &B, const AttrListPtr &Attrs) {
+  char NameBuffer[20];
+  if (!Op->getType()->isDoubleTy()) {
+    // If we need to add a suffix, copy into NameBuffer.
+    unsigned NameLen = strlen(Name);
+    assert(NameLen < sizeof(NameBuffer)-2);
+    memcpy(NameBuffer, Name, NameLen);
+    if (Op->getType()->isFloatTy())
+      NameBuffer[NameLen] = 'f';  // floorf
+    else
+      NameBuffer[NameLen] = 'l';  // floorl
+    NameBuffer[NameLen+1] = 0;
+    Name = NameBuffer;
+  }
+
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+                                         Op->getType(), NULL);
+  CallInst *CI = B.CreateCall(Callee, Op, Name);
+  CI->setAttributes(Attrs);
+  if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+  return CI;
+}
+
+/// EmitPutChar - Emit a call to the putchar function.  This assumes that Char
+/// is an integer.
+Value *llvm::EmitPutChar(Value *Char, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  Value *PutChar = M->getOrInsertFunction("putchar", B.getInt32Ty(),
+                                          B.getInt32Ty(), NULL);
+  CallInst *CI = B.CreateCall(PutChar,
+                              B.CreateIntCast(Char,
+                              B.getInt32Ty(),
+                              /*isSigned*/true,
+                              "chari"),
+                              "putchar");
+
+  if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+  return CI;
+}
+
+/// EmitPutS - Emit a call to the puts function.  This assumes that Str is
+/// some pointer.
+void llvm::EmitPutS(Value *Str, IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+
+  Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
+                                       B.getInt32Ty(),
+                                       B.getInt8PtrTy(),
+                                       NULL);
+  CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
+  if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+    CI->setCallingConv(F->getCallingConv());
+
+}
+
+/// EmitFPutC - Emit a call to the fputc function.  This assumes that Char is
+/// an integer and File is a pointer to FILE.
+void llvm::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B,
+                     const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[2];
+  AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  Constant *F;
+  if (File->getType()->isPointerTy())
+    F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2),
+                               B.getInt32Ty(),
+                               B.getInt32Ty(), File->getType(),
+                               NULL);
+  else
+    F = M->getOrInsertFunction("fputc",
+                               B.getInt32Ty(),
+                               B.getInt32Ty(),
+                               File->getType(), NULL);
+  Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/true,
+                         "chari");
+  CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
+}
+
+/// EmitFPutS - Emit a call to the puts function.  Str is required to be a
+/// pointer and File is a pointer to FILE.
+void llvm::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B,
+                     const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  Constant *F;
+  if (File->getType()->isPointerTy())
+    F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
+                               B.getInt32Ty(),
+                               B.getInt8PtrTy(),
+                               File->getType(), NULL);
+  else
+    F = M->getOrInsertFunction("fputs", B.getInt32Ty(),
+                               B.getInt8PtrTy(),
+                               File->getType(), NULL);
+  CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
+}
+
+/// EmitFWrite - Emit a call to the fwrite function.  This assumes that Ptr is
+/// a pointer, Size is an 'intptr_t', and File is a pointer to FILE.
+void llvm::EmitFWrite(Value *Ptr, Value *Size, Value *File,
+                      IRBuilder<> &B, const TargetData *TD) {
+  Module *M = B.GetInsertBlock()->getParent()->getParent();
+  AttributeWithIndex AWI[3];
+  AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+  AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
+  AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+  LLVMContext &Context = B.GetInsertBlock()->getContext();
+  Constant *F;
+  if (File->getType()->isPointerTy())
+    F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
+                               TD->getIntPtrType(Context),
+                               B.getInt8PtrTy(),
+                               TD->getIntPtrType(Context),
+                               TD->getIntPtrType(Context),
+                               File->getType(), NULL);
+  else
+    F = M->getOrInsertFunction("fwrite", TD->getIntPtrType(Context),
+                               B.getInt8PtrTy(),
+                               TD->getIntPtrType(Context),
+                               TD->getIntPtrType(Context),
+                               File->getType(), NULL);
+  CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+                        ConstantInt::get(TD->getIntPtrType(Context), 1), File);
+
+  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+    CI->setCallingConv(Fn->getCallingConv());
+}
+
+SimplifyFortifiedLibCalls::~SimplifyFortifiedLibCalls() { }
+
+bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
+  // We really need TargetData for later.
+  if (!TD) return false;
+  
+  this->CI = CI;
+  Function *Callee = CI->getCalledFunction();
+  StringRef Name = Callee->getName();
+  const FunctionType *FT = Callee->getFunctionType();
+  BasicBlock *BB = CI->getParent();
+  LLVMContext &Context = CI->getParent()->getContext();
+  IRBuilder<> B(Context);
+
+  // Set the builder to the instruction after the call.
+  B.SetInsertPoint(BB, CI);
+
+  if (Name == "__memcpy_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+
+    if (isFoldable(3, 2, false)) {
+      B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                     CI->getArgOperand(2), 1);
+      replaceCall(CI->getArgOperand(0));
+      return true;
+    }
+    return false;
+  }
+
+  // Should be similar to memcpy.
+  if (Name == "__mempcpy_chk") {
+    return false;
+  }
+
+  if (Name == "__memmove_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isPointerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+
+    if (isFoldable(3, 2, false)) {
+      B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+                      CI->getArgOperand(2), 1);
+      replaceCall(CI->getArgOperand(0));
+      return true;
+    }
+    return false;
+  }
+
+  if (Name == "__memset_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        !FT->getParamType(0)->isPointerTy() ||
+        !FT->getParamType(1)->isIntegerTy() ||
+        FT->getParamType(2) != TD->getIntPtrType(Context) ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+
+    if (isFoldable(3, 2, false)) {
+      Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
+                                   false);
+      B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);
+      replaceCall(CI->getArgOperand(0));
+      return true;
+    }
+    return false;
+  }
+
+  if (Name == "__strcpy_chk" || Name == "__stpcpy_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 3 ||
+        FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+        FT->getParamType(2) != TD->getIntPtrType(Context))
+      return 0;
+    
+    
+    // If a) we don't have any length information, or b) we know this will
+    // fit then just lower to a plain st[rp]cpy. Otherwise we'll keep our
+    // st[rp]cpy_chk call which may fail at runtime if the size is too long.
+    // TODO: It might be nice to get a maximum length out of the possible
+    // string lengths for varying.
+    if (isFoldable(2, 1, true)) {
+      Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
+                              Name.substr(2, 6));
+      replaceCall(Ret);
+      return true;
+    }
+    return false;
+  }
+
+  if (Name == "__strncpy_chk" || Name == "__stpncpy_chk") {
+    // Check if this has the right signature.
+    if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
+        FT->getParamType(0) != FT->getParamType(1) ||
+        FT->getParamType(0) != Type::getInt8PtrTy(Context) ||
+        !FT->getParamType(2)->isIntegerTy() ||
+        FT->getParamType(3) != TD->getIntPtrType(Context))
+      return false;
+
+    if (isFoldable(3, 2, false)) {
+      Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+                               CI->getArgOperand(2), B, TD, Name.substr(2, 7));
+      replaceCall(Ret);
+      return true;
+    }
+    return false;
+  }
+
+  if (Name == "__strcat_chk") {
+    return false;
+  }
+
+  if (Name == "__strncat_chk") {
+    return false;
+  }
+
+  return false;
+}
diff --git a/final/lib/Transforms/Utils/CMakeLists.txt b/final/lib/Transforms/Utils/CMakeLists.txt
new file mode 100644
index 00000000000..5b76bb26e40
--- /dev/null
+++ b/final/lib/Transforms/Utils/CMakeLists.txt
@@ -0,0 +1,29 @@
+add_llvm_library(LLVMTransformUtils
+  AddrModeMatcher.cpp
+  BasicBlockUtils.cpp
+  BasicInliner.cpp
+  BreakCriticalEdges.cpp
+  BuildLibCalls.cpp
+  CloneFunction.cpp
+  CloneLoop.cpp
+  CloneModule.cpp
+  CodeExtractor.cpp
+  DemoteRegToStack.cpp
+  InlineFunction.cpp
+  InstructionNamer.cpp
+  LCSSA.cpp
+  Local.cpp
+  LoopSimplify.cpp
+  LoopUnroll.cpp
+  LowerInvoke.cpp
+  LowerSwitch.cpp
+  Mem2Reg.cpp
+  PromoteMemoryToRegister.cpp
+  SSAUpdater.cpp
+  SimplifyCFG.cpp
+  SimplifyInstructions.cpp
+  UnifyFunctionExitNodes.cpp
+  Utils.cpp
+  ValueMapper.cpp
+  )
+
diff --git a/final/lib/Transforms/Utils/CloneFunction.cpp b/final/lib/Transforms/Utils/CloneFunction.cpp
new file mode 100644
index 00000000000..d967ceb9685
--- /dev/null
+++ b/final/lib/Transforms/Utils/CloneFunction.cpp
@@ -0,0 +1,586 @@
+//===- CloneFunction.cpp - Clone a function into another function ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneFunctionInto interface, which is used as the
+// low-level function cloner.  This is used by the CloneFunction and function
+// inliner to do the dirty work of copying the body of a function around.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/SmallVector.h"
+#include <map>
+using namespace llvm;
+
+// CloneBasicBlock - See comments in Cloning.h
+BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
+                                  ValueToValueMapTy &VMap,
+                                  const Twine &NameSuffix, Function *F,
+                                  ClonedCodeInfo *CodeInfo) {
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
+  if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+  bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+  
+  // Loop over all instructions, and copy them over.
+  for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
+       II != IE; ++II) {
+    Instruction *NewInst = II->clone();
+    if (II->hasName())
+      NewInst->setName(II->getName()+NameSuffix);
+    NewBB->getInstList().push_back(NewInst);
+    VMap[II] = NewInst;                // Add instruction map to value.
+    
+    hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+      if (isa<ConstantInt>(AI->getArraySize()))
+        hasStaticAllocas = true;
+      else
+        hasDynamicAllocas = true;
+    }
+  }
+  
+  if (CodeInfo) {
+    CodeInfo->ContainsCalls          |= hasCalls;
+    CodeInfo->ContainsUnwinds        |= isa<UnwindInst>(BB->getTerminator());
+    CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+    CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && 
+                                        BB != &BB->getParent()->getEntryBlock();
+  }
+  return NewBB;
+}
+
+// Clone OldFunc into NewFunc, transforming the old arguments into references to
+// VMap values.
+//
+void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
+                             ValueToValueMapTy &VMap,
+                             bool ModuleLevelChanges,
+                             SmallVectorImpl<ReturnInst*> &Returns,
+                             const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
+  assert(NameSuffix && "NameSuffix cannot be null!");
+
+#ifndef NDEBUG
+  for (Function::const_arg_iterator I = OldFunc->arg_begin(), 
+       E = OldFunc->arg_end(); I != E; ++I)
+    assert(VMap.count(I) && "No mapping from source argument specified!");
+#endif
+
+  // Clone any attributes.
+  if (NewFunc->arg_size() == OldFunc->arg_size())
+    NewFunc->copyAttributesFrom(OldFunc);
+  else {
+    //Some arguments were deleted with the VMap. Copy arguments one by one
+    for (Function::const_arg_iterator I = OldFunc->arg_begin(), 
+           E = OldFunc->arg_end(); I != E; ++I)
+      if (Argument* Anew = dyn_cast<Argument>(VMap[I]))
+        Anew->addAttr( OldFunc->getAttributes()
+                       .getParamAttributes(I->getArgNo() + 1));
+    NewFunc->setAttributes(NewFunc->getAttributes()
+                           .addAttr(0, OldFunc->getAttributes()
+                                     .getRetAttributes()));
+    NewFunc->setAttributes(NewFunc->getAttributes()
+                           .addAttr(~0, OldFunc->getAttributes()
+                                     .getFnAttributes()));
+
+  }
+
+  // Loop over all of the basic blocks in the function, cloning them as
+  // appropriate.  Note that we save BE this way in order to handle cloning of
+  // recursive functions into themselves.
+  //
+  for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+       BI != BE; ++BI) {
+    const BasicBlock &BB = *BI;
+
+    // Create a new basic block and copy instructions into it!
+    BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
+    VMap[&BB] = CBB;                       // Add basic block mapping.
+
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
+      Returns.push_back(RI);
+  }
+
+  // Loop over all of the instructions in the function, fixing up operand
+  // references as we go.  This uses VMap to do all the hard work.
+  for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]),
+         BE = NewFunc->end(); BB != BE; ++BB)
+    // Loop over all instructions, fixing each one as we find it...
+    for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
+      RemapInstruction(II, VMap,
+                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+}
+
+/// CloneFunction - Return a copy of the specified function, but without
+/// embedding the function into another module.  Also, any references specified
+/// in the VMap are changed to refer to their mapped value instead of the
+/// original one.  If any of the arguments to the function are in the VMap,
+/// the arguments are deleted from the resultant function.  The VMap is
+/// updated to include mappings from all of the instructions and basicblocks in
+/// the function from their old to new values.
+///
+Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
+                              bool ModuleLevelChanges,
+                              ClonedCodeInfo *CodeInfo) {
+  std::vector<const Type*> ArgTypes;
+
+  // The user might be deleting arguments to the function by specifying them in
+  // the VMap.  If so, we need to not add the arguments to the arg ty vector
+  //
+  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I)
+    if (VMap.count(I) == 0)  // Haven't mapped the argument to anything yet?
+      ArgTypes.push_back(I->getType());
+
+  // Create a new function type...
+  FunctionType *FTy = FunctionType::get(F->getFunctionType()->getReturnType(),
+                                    ArgTypes, F->getFunctionType()->isVarArg());
+
+  // Create the new function...
+  Function *NewF = Function::Create(FTy, F->getLinkage(), F->getName());
+
+  // Loop over the arguments, copying the names of the mapped arguments over...
+  Function::arg_iterator DestI = NewF->arg_begin();
+  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+       I != E; ++I)
+    if (VMap.count(I) == 0) {   // Is this argument preserved?
+      DestI->setName(I->getName()); // Copy the name over...
+      VMap[I] = DestI++;        // Add mapping to VMap
+    }
+
+  SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
+  CloneFunctionInto(NewF, F, VMap, ModuleLevelChanges, Returns, "", CodeInfo);
+  return NewF;
+}
+
+
+
+namespace {
+  /// PruningFunctionCloner - This class is a private class used to implement
+  /// the CloneAndPruneFunctionInto method.
+  struct PruningFunctionCloner {
+    Function *NewFunc;
+    const Function *OldFunc;
+    ValueToValueMapTy &VMap;
+    bool ModuleLevelChanges;
+    SmallVectorImpl<ReturnInst*> &Returns;
+    const char *NameSuffix;
+    ClonedCodeInfo *CodeInfo;
+    const TargetData *TD;
+  public:
+    PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
+                          ValueToValueMapTy &valueMap,
+                          bool moduleLevelChanges,
+                          SmallVectorImpl<ReturnInst*> &returns,
+                          const char *nameSuffix, 
+                          ClonedCodeInfo *codeInfo,
+                          const TargetData *td)
+    : NewFunc(newFunc), OldFunc(oldFunc),
+      VMap(valueMap), ModuleLevelChanges(moduleLevelChanges),
+      Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
+    }
+
+    /// CloneBlock - The specified block is found to be reachable, clone it and
+    /// anything that it can reach.
+    void CloneBlock(const BasicBlock *BB,
+                    std::vector<const BasicBlock*> &ToClone);
+    
+  public:
+    /// ConstantFoldMappedInstruction - Constant fold the specified instruction,
+    /// mapping its operands through VMap if they are available.
+    Constant *ConstantFoldMappedInstruction(const Instruction *I);
+  };
+}
+
+/// CloneBlock - The specified block is found to be reachable, clone it and
+/// anything that it can reach.
+void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
+                                       std::vector<const BasicBlock*> &ToClone){
+  TrackingVH<Value> &BBEntry = VMap[BB];
+
+  // Have we already cloned this block?
+  if (BBEntry) return;
+  
+  // Nope, clone it now.
+  BasicBlock *NewBB;
+  BBEntry = NewBB = BasicBlock::Create(BB->getContext());
+  if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
+
+  bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;
+  
+  // Loop over all instructions, and copy them over, DCE'ing as we go.  This
+  // loop doesn't include the terminator.
+  for (BasicBlock::const_iterator II = BB->begin(), IE = --BB->end();
+       II != IE; ++II) {
+    // If this instruction constant folds, don't bother cloning the instruction,
+    // instead, just add the constant to the value map.
+    if (Constant *C = ConstantFoldMappedInstruction(II)) {
+      VMap[II] = C;
+      continue;
+    }
+
+    Instruction *NewInst = II->clone();
+    if (II->hasName())
+      NewInst->setName(II->getName()+NameSuffix);
+    NewBB->getInstList().push_back(NewInst);
+    VMap[II] = NewInst;                // Add instruction map to value.
+    
+    hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
+      if (isa<ConstantInt>(AI->getArraySize()))
+        hasStaticAllocas = true;
+      else
+        hasDynamicAllocas = true;
+    }
+  }
+  
+  // Finally, clone over the terminator.
+  const TerminatorInst *OldTI = BB->getTerminator();
+  bool TerminatorDone = false;
+  if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
+    if (BI->isConditional()) {
+      // If the condition was a known constant in the callee...
+      ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
+      // Or is a known constant in the caller...
+      if (Cond == 0) {
+        Value *V = VMap[BI->getCondition()];
+        Cond = dyn_cast_or_null<ConstantInt>(V);
+      }
+
+      // Constant fold to uncond branch!
+      if (Cond) {
+        BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
+        VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+        ToClone.push_back(Dest);
+        TerminatorDone = true;
+      }
+    }
+  } else if (const SwitchInst *SI = dyn_cast<SwitchInst>(OldTI)) {
+    // If switching on a value known constant in the caller.
+    ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
+    if (Cond == 0) { // Or known constant after constant prop in the callee...
+      Value *V = VMap[SI->getCondition()];
+      Cond = dyn_cast_or_null<ConstantInt>(V);
+    }
+    if (Cond) {     // Constant fold to uncond branch!
+      BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
+      VMap[OldTI] = BranchInst::Create(Dest, NewBB);
+      ToClone.push_back(Dest);
+      TerminatorDone = true;
+    }
+  }
+  
+  if (!TerminatorDone) {
+    Instruction *NewInst = OldTI->clone();
+    if (OldTI->hasName())
+      NewInst->setName(OldTI->getName()+NameSuffix);
+    NewBB->getInstList().push_back(NewInst);
+    VMap[OldTI] = NewInst;             // Add instruction map to value.
+    
+    // Recursively clone any reachable successor blocks.
+    const TerminatorInst *TI = BB->getTerminator();
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      ToClone.push_back(TI->getSuccessor(i));
+  }
+  
+  if (CodeInfo) {
+    CodeInfo->ContainsCalls          |= hasCalls;
+    CodeInfo->ContainsUnwinds        |= isa<UnwindInst>(OldTI);
+    CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas;
+    CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && 
+      BB != &BB->getParent()->front();
+  }
+  
+  if (ReturnInst *RI = dyn_cast<ReturnInst>(NewBB->getTerminator()))
+    Returns.push_back(RI);
+}
+
+/// ConstantFoldMappedInstruction - Constant fold the specified instruction,
+/// mapping its operands through VMap if they are available.
+Constant *PruningFunctionCloner::
+ConstantFoldMappedInstruction(const Instruction *I) {
+  SmallVector<Constant*, 8> Ops;
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
+                                                           VMap,
+                  ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges)))
+      Ops.push_back(Op);
+    else
+      return 0;  // All operands not constant!
+
+  if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+    return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
+                                           TD);
+
+  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
+      if (!LI->isVolatile() && CE->getOpcode() == Instruction::GetElementPtr)
+        if (GlobalVariable *GV = dyn_cast<GlobalVariable>(CE->getOperand(0)))
+          if (GV->isConstant() && GV->hasDefinitiveInitializer())
+            return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(),
+                                                          CE);
+
+  return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0],
+                                  Ops.size(), TD);
+}
+
+static DebugLoc
+UpdateInlinedAtInfo(const DebugLoc &InsnDL, const DebugLoc &TheCallDL,
+                    LLVMContext &Ctx) {
+  DebugLoc NewLoc = TheCallDL;
+  if (MDNode *IA = InsnDL.getInlinedAt(Ctx))
+    NewLoc = UpdateInlinedAtInfo(DebugLoc::getFromDILocation(IA), TheCallDL,
+                                 Ctx);
+
+  return DebugLoc::get(InsnDL.getLine(), InsnDL.getCol(),
+                       InsnDL.getScope(Ctx), NewLoc.getAsMDNode(Ctx));
+}
+
+/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
+/// except that it does some simple constant prop and DCE on the fly.  The
+/// effect of this is to copy significantly less code in cases where (for
+/// example) a function call with constant arguments is inlined, and those
+/// constant arguments cause a significant amount of code in the callee to be
+/// dead.  Since this doesn't produce an exact copy of the input, it can't be
+/// used for things like CloneFunction or CloneModule.
+void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
+                                     ValueToValueMapTy &VMap,
+                                     bool ModuleLevelChanges,
+                                     SmallVectorImpl<ReturnInst*> &Returns,
+                                     const char *NameSuffix, 
+                                     ClonedCodeInfo *CodeInfo,
+                                     const TargetData *TD,
+                                     Instruction *TheCall) {
+  assert(NameSuffix && "NameSuffix cannot be null!");
+  
+#ifndef NDEBUG
+  for (Function::const_arg_iterator II = OldFunc->arg_begin(), 
+       E = OldFunc->arg_end(); II != E; ++II)
+    assert(VMap.count(II) && "No mapping from source argument specified!");
+#endif
+
+  PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
+                            Returns, NameSuffix, CodeInfo, TD);
+
+  // Clone the entry block, and anything recursively reachable from it.
+  std::vector<const BasicBlock*> CloneWorklist;
+  CloneWorklist.push_back(&OldFunc->getEntryBlock());
+  while (!CloneWorklist.empty()) {
+    const BasicBlock *BB = CloneWorklist.back();
+    CloneWorklist.pop_back();
+    PFC.CloneBlock(BB, CloneWorklist);
+  }
+  
+  // Loop over all of the basic blocks in the old function.  If the block was
+  // reachable, we have cloned it and the old block is now in the value map:
+  // insert it into the new function in the right order.  If not, ignore it.
+  //
+  // Defer PHI resolution until rest of function is resolved.
+  SmallVector<const PHINode*, 16> PHIToResolve;
+  for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
+       BI != BE; ++BI) {
+    Value *V = VMap[BI];
+    BasicBlock *NewBB = cast_or_null<BasicBlock>(V);
+    if (NewBB == 0) continue;  // Dead block.
+
+    // Add the new block to the new function.
+    NewFunc->getBasicBlockList().push_back(NewBB);
+    
+    // Loop over all of the instructions in the block, fixing up operand
+    // references as we go.  This uses VMap to do all the hard work.
+    //
+    BasicBlock::iterator I = NewBB->begin();
+
+    DebugLoc TheCallDL;
+    if (TheCall) 
+      TheCallDL = TheCall->getDebugLoc();
+    
+    // Handle PHI nodes specially, as we have to remove references to dead
+    // blocks.
+    if (PHINode *PN = dyn_cast<PHINode>(I)) {
+      // Skip over all PHI nodes, remembering them for later.
+      BasicBlock::const_iterator OldI = BI->begin();
+      for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) {
+        if (I->hasMetadata()) {
+          if (!TheCallDL.isUnknown()) {
+            DebugLoc IDL = I->getDebugLoc();
+            if (!IDL.isUnknown()) {
+              DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL,
+                                                   I->getContext());
+              I->setDebugLoc(NewDL);
+            }
+          } else {
+            // The cloned instruction has dbg info but the call instruction
+            // does not have dbg info. Remove dbg info from cloned instruction.
+            I->setDebugLoc(DebugLoc());
+          }
+        }
+        PHIToResolve.push_back(cast<PHINode>(OldI));
+      }
+    }
+    
+    // FIXME:
+    // FIXME:
+    // FIXME: Unclone all this metadata stuff.
+    // FIXME:
+    // FIXME:
+    
+    // Otherwise, remap the rest of the instructions normally.
+    for (; I != NewBB->end(); ++I) {
+      if (I->hasMetadata()) {
+        if (!TheCallDL.isUnknown()) {
+          DebugLoc IDL = I->getDebugLoc();
+          if (!IDL.isUnknown()) {
+            DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL,
+                                                 I->getContext());
+            I->setDebugLoc(NewDL);
+          }
+        } else {
+          // The cloned instruction has dbg info but the call instruction
+          // does not have dbg info. Remove dbg info from cloned instruction.
+          I->setDebugLoc(DebugLoc());
+        }
+      }
+      RemapInstruction(I, VMap,
+                       ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+    }
+  }
+  
+  // Defer PHI resolution until rest of function is resolved, PHI resolution
+  // requires the CFG to be up-to-date.
+  for (unsigned phino = 0, e = PHIToResolve.size(); phino != e; ) {
+    const PHINode *OPN = PHIToResolve[phino];
+    unsigned NumPreds = OPN->getNumIncomingValues();
+    const BasicBlock *OldBB = OPN->getParent();
+    BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);
+
+    // Map operands for blocks that are live and remove operands for blocks
+    // that are dead.
+    for (; phino != PHIToResolve.size() &&
+         PHIToResolve[phino]->getParent() == OldBB; ++phino) {
+      OPN = PHIToResolve[phino];
+      PHINode *PN = cast<PHINode>(VMap[OPN]);
+      for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
+        Value *V = VMap[PN->getIncomingBlock(pred)];
+        if (BasicBlock *MappedBlock = cast_or_null<BasicBlock>(V)) {
+          Value *InVal = MapValue(PN->getIncomingValue(pred),
+                                  VMap, 
+                        ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
+          assert(InVal && "Unknown input value?");
+          PN->setIncomingValue(pred, InVal);
+          PN->setIncomingBlock(pred, MappedBlock);
+        } else {
+          PN->removeIncomingValue(pred, false);
+          --pred, --e;  // Revisit the next entry.
+        }
+      } 
+    }
+    
+    // The loop above has removed PHI entries for those blocks that are dead
+    // and has updated others.  However, if a block is live (i.e. copied over)
+    // but its terminator has been changed to not go to this block, then our
+    // phi nodes will have invalid entries.  Update the PHI nodes in this
+    // case.
+    PHINode *PN = cast<PHINode>(NewBB->begin());
+    NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB));
+    if (NumPreds != PN->getNumIncomingValues()) {
+      assert(NumPreds < PN->getNumIncomingValues());
+      // Count how many times each predecessor comes to this block.
+      std::map<BasicBlock*, unsigned> PredCount;
+      for (pred_iterator PI = pred_begin(NewBB), E = pred_end(NewBB);
+           PI != E; ++PI)
+        --PredCount[*PI];
+      
+      // Figure out how many entries to remove from each PHI.
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        ++PredCount[PN->getIncomingBlock(i)];
+      
+      // At this point, the excess predecessor entries are positive in the
+      // map.  Loop over all of the PHIs and remove excess predecessor
+      // entries.
+      BasicBlock::iterator I = NewBB->begin();
+      for (; (PN = dyn_cast<PHINode>(I)); ++I) {
+        for (std::map<BasicBlock*, unsigned>::iterator PCI =PredCount.begin(),
+             E = PredCount.end(); PCI != E; ++PCI) {
+          BasicBlock *Pred     = PCI->first;
+          for (unsigned NumToRemove = PCI->second; NumToRemove; --NumToRemove)
+            PN->removeIncomingValue(Pred, false);
+        }
+      }
+    }
+    
+    // If the loops above have made these phi nodes have 0 or 1 operand,
+    // replace them with undef or the input value.  We must do this for
+    // correctness, because 0-operand phis are not valid.
+    PN = cast<PHINode>(NewBB->begin());
+    if (PN->getNumIncomingValues() == 0) {
+      BasicBlock::iterator I = NewBB->begin();
+      BasicBlock::const_iterator OldI = OldBB->begin();
+      while ((PN = dyn_cast<PHINode>(I++))) {
+        Value *NV = UndefValue::get(PN->getType());
+        PN->replaceAllUsesWith(NV);
+        assert(VMap[OldI] == PN && "VMap mismatch");
+        VMap[OldI] = NV;
+        PN->eraseFromParent();
+        ++OldI;
+      }
+    }
+    // NOTE: We cannot eliminate single entry phi nodes here, because of
+    // VMap.  Single entry phi nodes can have multiple VMap entries
+    // pointing at them.  Thus, deleting one would require scanning the VMap
+    // to update any entries in it that would require that.  This would be
+    // really slow.
+  }
+  
+  // Now that the inlined function body has been fully constructed, go through
+  // and zap unconditional fall-through branches.  This happen all the time when
+  // specializing code: code specialization turns conditional branches into
+  // uncond branches, and this code folds them.
+  Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
+  while (I != NewFunc->end()) {
+    BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
+    if (!BI || BI->isConditional()) { ++I; continue; }
+    
+    // Note that we can't eliminate uncond branches if the destination has
+    // single-entry PHI nodes.  Eliminating the single-entry phi nodes would
+    // require scanning the VMap to update any entries that point to the phi
+    // node.
+    BasicBlock *Dest = BI->getSuccessor(0);
+    if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) {
+      ++I; continue;
+    }
+    
+    // We know all single-entry PHI nodes in the inlined function have been
+    // removed, so we just need to splice the blocks.
+    BI->eraseFromParent();
+    
+    // Move all the instructions in the succ to the pred.
+    I->getInstList().splice(I->end(), Dest->getInstList());
+    
+    // Make all PHI nodes that referred to Dest now refer to I as their source.
+    Dest->replaceAllUsesWith(I);
+
+    // Remove the dest block.
+    Dest->eraseFromParent();
+    
+    // Do not increment I, iteratively merge all things this block branches to.
+  }
+}
diff --git a/final/lib/Transforms/Utils/CloneLoop.cpp b/final/lib/Transforms/Utils/CloneLoop.cpp
new file mode 100644
index 00000000000..87dd14153a1
--- /dev/null
+++ b/final/lib/Transforms/Utils/CloneLoop.cpp
@@ -0,0 +1,128 @@
+//===- CloneLoop.cpp - Clone loop nest ------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneLoop interface which makes a copy of a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/Dominators.h"
+
+
+using namespace llvm;
+
+/// CloneDominatorInfo - Clone a basic block's dominator tree. It is expected
+/// that the basic block is already cloned.
+static void CloneDominatorInfo(BasicBlock *BB, 
+                               ValueToValueMapTy &VMap,
+                               DominatorTree *DT) {
+
+  assert (DT && "DominatorTree is not available");
+  ValueToValueMapTy::iterator BI = VMap.find(BB);
+  assert (BI != VMap.end() && "BasicBlock clone is missing");
+  BasicBlock *NewBB = cast<BasicBlock>(BI->second);
+
+  // NewBB already got dominator info.
+  if (DT->getNode(NewBB))
+    return;
+
+  assert (DT->getNode(BB) && "BasicBlock does not have dominator info");
+  // Entry block is not expected here. Infinite loops are not to cloned.
+  assert (DT->getNode(BB)->getIDom() && "BasicBlock does not have immediate dominator");
+  BasicBlock *BBDom = DT->getNode(BB)->getIDom()->getBlock();
+
+  // NewBB's dominator is either BB's dominator or BB's dominator's clone.
+  BasicBlock *NewBBDom = BBDom;
+  ValueToValueMapTy::iterator BBDomI = VMap.find(BBDom);
+  if (BBDomI != VMap.end()) {
+    NewBBDom = cast<BasicBlock>(BBDomI->second);
+    if (!DT->getNode(NewBBDom))
+      CloneDominatorInfo(BBDom, VMap, DT);
+  }
+  DT->addNewBlock(NewBB, NewBBDom);
+}
+
+/// CloneLoop - Clone Loop. Clone dominator info. Populate VMap
+/// using old blocks to new blocks mapping.
+Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager  *LPM, LoopInfo *LI,
+                      ValueToValueMapTy &VMap, Pass *P) {
+  
+  DominatorTree *DT = NULL;
+  if (P)
+    DT = P->getAnalysisIfAvailable<DominatorTree>();
+
+  SmallVector<BasicBlock *, 16> NewBlocks;
+
+  // Populate loop nest.
+  SmallVector<Loop *, 8> LoopNest;
+  LoopNest.push_back(OrigL);
+
+
+  Loop *NewParentLoop = NULL;
+  do {
+    Loop *L = LoopNest.pop_back_val();
+    Loop *NewLoop = new Loop();
+
+    if (!NewParentLoop)
+      NewParentLoop = NewLoop;
+
+    LPM->insertLoop(NewLoop, L->getParentLoop());
+
+    // Clone Basic Blocks.
+    for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+         I != E; ++I) {
+      BasicBlock *BB = *I;
+      BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".clone");
+      VMap[BB] = NewBB;
+      if (P)
+        LPM->cloneBasicBlockSimpleAnalysis(BB, NewBB, L);
+      NewLoop->addBasicBlockToLoop(NewBB, LI->getBase());
+      NewBlocks.push_back(NewBB);
+    }
+
+    // Clone dominator info.
+    if (DT)
+      for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+           I != E; ++I) {
+        BasicBlock *BB = *I;
+        CloneDominatorInfo(BB, VMap, DT);
+      }
+
+    // Process sub loops
+    for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+      LoopNest.push_back(*I);
+  } while (!LoopNest.empty());
+
+  // Remap instructions to reference operands from VMap.
+  for(SmallVector<BasicBlock *, 16>::iterator NBItr = NewBlocks.begin(), 
+        NBE = NewBlocks.end();  NBItr != NBE; ++NBItr) {
+    BasicBlock *NB = *NBItr;
+    for(BasicBlock::iterator BI = NB->begin(), BE = NB->end(); 
+        BI != BE; ++BI) {
+      Instruction *Insn = BI;
+      for (unsigned index = 0, num_ops = Insn->getNumOperands(); 
+           index != num_ops; ++index) {
+        Value *Op = Insn->getOperand(index);
+        ValueToValueMapTy::iterator OpItr = VMap.find(Op);
+        if (OpItr != VMap.end())
+          Insn->setOperand(index, OpItr->second);
+      }
+    }
+  }
+
+  BasicBlock *Latch = OrigL->getLoopLatch();
+  Function *F = Latch->getParent();
+  F->getBasicBlockList().insert(OrigL->getHeader(), 
+                                NewBlocks.begin(), NewBlocks.end());
+
+
+  return NewParentLoop;
+}
diff --git a/final/lib/Transforms/Utils/CloneModule.cpp b/final/lib/Transforms/Utils/CloneModule.cpp
new file mode 100644
index 00000000000..1046c38ec01
--- /dev/null
+++ b/final/lib/Transforms/Utils/CloneModule.cpp
@@ -0,0 +1,137 @@
+//===- CloneModule.cpp - Clone an entire module ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CloneModule interface which makes a copy of an
+// entire module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Constant.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+using namespace llvm;
+
+/// CloneModule - Return an exact copy of the specified module.  This is not as
+/// easy as it might seem because we have to worry about making copies of global
+/// variables and functions, and making their (initializers and references,
+/// respectively) refer to the right globals.
+///
+Module *llvm::CloneModule(const Module *M) {
+  // Create the value map that maps things from the old module over to the new
+  // module.
+  ValueToValueMapTy VMap;
+  return CloneModule(M, VMap);
+}
+
+Module *llvm::CloneModule(const Module *M,
+                          ValueToValueMapTy &VMap) {
+  // First off, we need to create the new module...
+  Module *New = new Module(M->getModuleIdentifier(), M->getContext());
+  New->setDataLayout(M->getDataLayout());
+  New->setTargetTriple(M->getTargetTriple());
+  New->setModuleInlineAsm(M->getModuleInlineAsm());
+
+  // Copy all of the type symbol table entries over.
+  const TypeSymbolTable &TST = M->getTypeSymbolTable();
+  for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); 
+       TI != TE; ++TI)
+    New->addTypeName(TI->first, TI->second);
+  
+  // Copy all of the dependent libraries over.
+  for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
+    New->addLibrary(*I);
+
+  // Loop over all of the global variables, making corresponding globals in the
+  // new module.  Here we add them to the VMap and to the new Module.  We
+  // don't worry about attributes or initializers, they will come later.
+  //
+  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I) {
+    GlobalVariable *GV = new GlobalVariable(*New, 
+                                            I->getType()->getElementType(),
+                                            false,
+                                            GlobalValue::ExternalLinkage, 0,
+                                            I->getName());
+    GV->setAlignment(I->getAlignment());
+    VMap[I] = GV;
+  }
+
+  // Loop over the functions in the module, making external functions as before
+  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+    Function *NF =
+      Function::Create(cast<FunctionType>(I->getType()->getElementType()),
+                       GlobalValue::ExternalLinkage, I->getName(), New);
+    NF->copyAttributesFrom(I);
+    VMap[I] = NF;
+  }
+
+  // Loop over the aliases in the module
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    VMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage,
+                                  I->getName(), NULL, New);
+  
+  // Now that all of the things that global variable initializer can refer to
+  // have been created, loop through and copy the global variable referrers
+  // over...  We also set the attributes on the global now.
+  //
+  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I) {
+    GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
+    if (I->hasInitializer())
+      GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
+                                                 VMap, RF_None)));
+    GV->setLinkage(I->getLinkage());
+    GV->setThreadLocal(I->isThreadLocal());
+    GV->setConstant(I->isConstant());
+  }
+
+  // Similarly, copy over function bodies now...
+  //
+  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+    Function *F = cast<Function>(VMap[I]);
+    if (!I->isDeclaration()) {
+      Function::arg_iterator DestI = F->arg_begin();
+      for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
+           ++J) {
+        DestI->setName(J->getName());
+        VMap[J] = DestI++;
+      }
+
+      SmallVector<ReturnInst*, 8> Returns;  // Ignore returns cloned.
+      CloneFunctionInto(F, I, VMap, /*ModuleLevelChanges=*/true, Returns);
+    }
+
+    F->setLinkage(I->getLinkage());
+  }
+
+  // And aliases
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I) {
+    GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
+    GA->setLinkage(I->getLinkage());
+    if (const Constant* C = I->getAliasee())
+      GA->setAliasee(cast<Constant>(MapValue(C, VMap, RF_None)));
+  }
+
+  // And named metadata....
+  for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+         E = M->named_metadata_end(); I != E; ++I) {
+    const NamedMDNode &NMD = *I;
+    NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
+    for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
+      NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap,
+                                               RF_None)));
+  }
+
+  return New;
+}
diff --git a/final/lib/Transforms/Utils/CodeExtractor.cpp b/final/lib/Transforms/Utils/CodeExtractor.cpp
new file mode 100644
index 00000000000..e6337722c8b
--- /dev/null
+++ b/final/lib/Transforms/Utils/CodeExtractor.cpp
@@ -0,0 +1,795 @@
+//===- CodeExtractor.cpp - Pull code region into a new function -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interface to tear out a code region, such as an
+// individual loop or a parallel section, into a new function, replacing it with
+// a call to the new function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+// Provide a command-line option to aggregate function arguments into a struct
+// for functions produced by the code extractor. This is useful when converting
+// extracted functions to pthread-based code, as only one argument (void*) can
+// be passed in to pthread_create().
+static cl::opt<bool>
+AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
+                 cl::desc("Aggregate arguments to code-extracted functions"));
+
+namespace {
+  class CodeExtractor {
+    typedef SetVector<Value*> Values;
+    SetVector<BasicBlock*> BlocksToExtract;
+    DominatorTree* DT;
+    bool AggregateArgs;
+    unsigned NumExitBlocks;
+    const Type *RetTy;
+  public:
+    CodeExtractor(DominatorTree* dt = 0, bool AggArgs = false)
+      : DT(dt), AggregateArgs(AggArgs||AggregateArgsOpt), NumExitBlocks(~0U) {}
+
+    Function *ExtractCodeRegion(const std::vector<BasicBlock*> &code);
+
+    bool isEligible(const std::vector<BasicBlock*> &code);
+
+  private:
+    /// definedInRegion - Return true if the specified value is defined in the
+    /// extracted region.
+    bool definedInRegion(Value *V) const {
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        if (BlocksToExtract.count(I->getParent()))
+          return true;
+      return false;
+    }
+
+    /// definedInCaller - Return true if the specified value is defined in the
+    /// function being code extracted, but not in the region being extracted.
+    /// These values must be passed in as live-ins to the function.
+    bool definedInCaller(Value *V) const {
+      if (isa<Argument>(V)) return true;
+      if (Instruction *I = dyn_cast<Instruction>(V))
+        if (!BlocksToExtract.count(I->getParent()))
+          return true;
+      return false;
+    }
+
+    void severSplitPHINodes(BasicBlock *&Header);
+    void splitReturnBlocks();
+    void findInputsOutputs(Values &inputs, Values &outputs);
+
+    Function *constructFunction(const Values &inputs,
+                                const Values &outputs,
+                                BasicBlock *header,
+                                BasicBlock *newRootNode, BasicBlock *newHeader,
+                                Function *oldFunction, Module *M);
+
+    void moveCodeToFunction(Function *newFunction);
+
+    void emitCallAndSwitchStatement(Function *newFunction,
+                                    BasicBlock *newHeader,
+                                    Values &inputs,
+                                    Values &outputs);
+
+  };
+}
+
+/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the
+/// region, we need to split the entry block of the region so that the PHI node
+/// is easier to deal with.
+void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) {
+  bool HasPredsFromRegion = false;
+  unsigned NumPredsOutsideRegion = 0;
+
+  if (Header != &Header->getParent()->getEntryBlock()) {
+    PHINode *PN = dyn_cast<PHINode>(Header->begin());
+    if (!PN) return;  // No PHI nodes.
+
+    // If the header node contains any PHI nodes, check to see if there is more
+    // than one entry from outside the region.  If so, we need to sever the
+    // header block into two.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (BlocksToExtract.count(PN->getIncomingBlock(i)))
+        HasPredsFromRegion = true;
+      else
+        ++NumPredsOutsideRegion;
+
+    // If there is one (or fewer) predecessor from outside the region, we don't
+    // need to do anything special.
+    if (NumPredsOutsideRegion <= 1) return;
+  }
+
+  // Otherwise, we need to split the header block into two pieces: one
+  // containing PHI nodes merging values from outside of the region, and a
+  // second that contains all of the code for the block and merges back any
+  // incoming values from inside of the region.
+  BasicBlock::iterator AfterPHIs = Header->getFirstNonPHI();
+  BasicBlock *NewBB = Header->splitBasicBlock(AfterPHIs,
+                                              Header->getName()+".ce");
+
+  // We only want to code extract the second block now, and it becomes the new
+  // header of the region.
+  BasicBlock *OldPred = Header;
+  BlocksToExtract.remove(OldPred);
+  BlocksToExtract.insert(NewBB);
+  Header = NewBB;
+
+  // Okay, update dominator sets. The blocks that dominate the new one are the
+  // blocks that dominate TIBB plus the new block itself.
+  if (DT)
+    DT->splitBlock(NewBB);
+
+  // Okay, now we need to adjust the PHI nodes and any branches from within the
+  // region to go to the new header block instead of the old header block.
+  if (HasPredsFromRegion) {
+    PHINode *PN = cast<PHINode>(OldPred->begin());
+    // Loop over all of the predecessors of OldPred that are in the region,
+    // changing them to branch to NewBB instead.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+        TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator();
+        TI->replaceUsesOfWith(OldPred, NewBB);
+      }
+
+    // Okay, everthing within the region is now branching to the right block, we
+    // just have to update the PHI nodes now, inserting PHI nodes into NewBB.
+    for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) {
+      PHINode *PN = cast<PHINode>(AfterPHIs);
+      // Create a new PHI node in the new region, which has an incoming value
+      // from OldPred of PN.
+      PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".ce",
+                                       NewBB->begin());
+      NewPN->addIncoming(PN, OldPred);
+
+      // Loop over all of the incoming value in PN, moving them to NewPN if they
+      // are from the extracted region.
+      for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) {
+        if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+          NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i));
+          PN->removeIncomingValue(i);
+          --i;
+        }
+      }
+    }
+  }
+}
+
+void CodeExtractor::splitReturnBlocks() {
+  for (SetVector<BasicBlock*>::iterator I = BlocksToExtract.begin(),
+         E = BlocksToExtract.end(); I != E; ++I)
+    if (ReturnInst *RI = dyn_cast<ReturnInst>((*I)->getTerminator())) {
+      BasicBlock *New = (*I)->splitBasicBlock(RI, (*I)->getName()+".ret");
+      if (DT) {
+        // Old dominates New. New node dominates all other nodes dominated
+        // by Old.
+        DomTreeNode *OldNode = DT->getNode(*I);
+        SmallVector<DomTreeNode*, 8> Children;
+        for (DomTreeNode::iterator DI = OldNode->begin(), DE = OldNode->end();
+             DI != DE; ++DI) 
+          Children.push_back(*DI);
+
+        DomTreeNode *NewNode = DT->addNewBlock(New, *I);
+
+        for (SmallVector<DomTreeNode*, 8>::iterator I = Children.begin(),
+               E = Children.end(); I != E; ++I) 
+          DT->changeImmediateDominator(*I, NewNode);
+      }
+    }
+}
+
+// findInputsOutputs - Find inputs to, outputs from the code region.
+//
+void CodeExtractor::findInputsOutputs(Values &inputs, Values &outputs) {
+  std::set<BasicBlock*> ExitBlocks;
+  for (SetVector<BasicBlock*>::const_iterator ci = BlocksToExtract.begin(),
+       ce = BlocksToExtract.end(); ci != ce; ++ci) {
+    BasicBlock *BB = *ci;
+
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+      // If a used value is defined outside the region, it's an input.  If an
+      // instruction is used outside the region, it's an output.
+      for (User::op_iterator O = I->op_begin(), E = I->op_end(); O != E; ++O)
+        if (definedInCaller(*O))
+          inputs.insert(*O);
+
+      // Consider uses of this instruction (outputs).
+      for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+           UI != E; ++UI)
+        if (!definedInRegion(*UI)) {
+          outputs.insert(I);
+          break;
+        }
+    } // for: insts
+
+    // Keep track of the exit blocks from the region.
+    TerminatorInst *TI = BB->getTerminator();
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      if (!BlocksToExtract.count(TI->getSuccessor(i)))
+        ExitBlocks.insert(TI->getSuccessor(i));
+  } // for: basic blocks
+
+  NumExitBlocks = ExitBlocks.size();
+}
+
+/// constructFunction - make a function based on inputs and outputs, as follows:
+/// f(in0, ..., inN, out0, ..., outN)
+///
+Function *CodeExtractor::constructFunction(const Values &inputs,
+                                           const Values &outputs,
+                                           BasicBlock *header,
+                                           BasicBlock *newRootNode,
+                                           BasicBlock *newHeader,
+                                           Function *oldFunction,
+                                           Module *M) {
+  DEBUG(dbgs() << "inputs: " << inputs.size() << "\n");
+  DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");
+
+  // This function returns unsigned, outputs will go back by reference.
+  switch (NumExitBlocks) {
+  case 0:
+  case 1: RetTy = Type::getVoidTy(header->getContext()); break;
+  case 2: RetTy = Type::getInt1Ty(header->getContext()); break;
+  default: RetTy = Type::getInt16Ty(header->getContext()); break;
+  }
+
+  std::vector<const Type*> paramTy;
+
+  // Add the types of the input values to the function's argument list
+  for (Values::const_iterator i = inputs.begin(),
+         e = inputs.end(); i != e; ++i) {
+    const Value *value = *i;
+    DEBUG(dbgs() << "value used in func: " << *value << "\n");
+    paramTy.push_back(value->getType());
+  }
+
+  // Add the types of the output values to the function's argument list.
+  for (Values::const_iterator I = outputs.begin(), E = outputs.end();
+       I != E; ++I) {
+    DEBUG(dbgs() << "instr used in func: " << **I << "\n");
+    if (AggregateArgs)
+      paramTy.push_back((*I)->getType());
+    else
+      paramTy.push_back(PointerType::getUnqual((*I)->getType()));
+  }
+
+  DEBUG(dbgs() << "Function type: " << *RetTy << " f(");
+  for (std::vector<const Type*>::iterator i = paramTy.begin(),
+         e = paramTy.end(); i != e; ++i)
+    DEBUG(dbgs() << **i << ", ");
+  DEBUG(dbgs() << ")\n");
+
+  if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+    PointerType *StructPtr =
+           PointerType::getUnqual(StructType::get(M->getContext(), paramTy));
+    paramTy.clear();
+    paramTy.push_back(StructPtr);
+  }
+  const FunctionType *funcType =
+                  FunctionType::get(RetTy, paramTy, false);
+
+  // Create the new function
+  Function *newFunction = Function::Create(funcType,
+                                           GlobalValue::InternalLinkage,
+                                           oldFunction->getName() + "_" +
+                                           header->getName(), M);
+  // If the old function is no-throw, so is the new one.
+  if (oldFunction->doesNotThrow())
+    newFunction->setDoesNotThrow(true);
+  
+  newFunction->getBasicBlockList().push_back(newRootNode);
+
+  // Create an iterator to name all of the arguments we inserted.
+  Function::arg_iterator AI = newFunction->arg_begin();
+
+  // Rewrite all users of the inputs in the extracted region to use the
+  // arguments (or appropriate addressing into struct) instead.
+  for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+    Value *RewriteVal;
+    if (AggregateArgs) {
+      Value *Idx[2];
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
+      TerminatorInst *TI = newFunction->begin()->getTerminator();
+      GetElementPtrInst *GEP = 
+        GetElementPtrInst::Create(AI, Idx, Idx+2, 
+                                  "gep_" + inputs[i]->getName(), TI);
+      RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI);
+    } else
+      RewriteVal = AI++;
+
+    std::vector<User*> Users(inputs[i]->use_begin(), inputs[i]->use_end());
+    for (std::vector<User*>::iterator use = Users.begin(), useE = Users.end();
+         use != useE; ++use)
+      if (Instruction* inst = dyn_cast<Instruction>(*use))
+        if (BlocksToExtract.count(inst->getParent()))
+          inst->replaceUsesOfWith(inputs[i], RewriteVal);
+  }
+
+  // Set names for input and output arguments.
+  if (!AggregateArgs) {
+    AI = newFunction->arg_begin();
+    for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI)
+      AI->setName(inputs[i]->getName());
+    for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI)
+      AI->setName(outputs[i]->getName()+".out");
+  }
+
+  // Rewrite branches to basic blocks outside of the loop to new dummy blocks
+  // within the new function. This must be done before we lose track of which
+  // blocks were originally in the code region.
+  std::vector<User*> Users(header->use_begin(), header->use_end());
+  for (unsigned i = 0, e = Users.size(); i != e; ++i)
+    // The BasicBlock which contains the branch is not in the region
+    // modify the branch target to a new block
+    if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i]))
+      if (!BlocksToExtract.count(TI->getParent()) &&
+          TI->getParent()->getParent() == oldFunction)
+        TI->replaceUsesOfWith(header, newHeader);
+
+  return newFunction;
+}
+
+/// FindPhiPredForUseInBlock - Given a value and a basic block, find a PHI
+/// that uses the value within the basic block, and return the predecessor
+/// block associated with that use, or return 0 if none is found.
+static BasicBlock* FindPhiPredForUseInBlock(Value* Used, BasicBlock* BB) {
+  for (Value::use_iterator UI = Used->use_begin(),
+       UE = Used->use_end(); UI != UE; ++UI) {
+     PHINode *P = dyn_cast<PHINode>(*UI);
+     if (P && P->getParent() == BB)
+       return P->getIncomingBlock(UI);
+  }
+  
+  return 0;
+}
+
+/// emitCallAndSwitchStatement - This method sets up the caller side by adding
+/// the call instruction, splitting any PHI nodes in the header block as
+/// necessary.
+void CodeExtractor::
+emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
+                           Values &inputs, Values &outputs) {
+  // Emit a call to the new function, passing in: *pointer to struct (if
+  // aggregating parameters), or plan inputs and allocated memory for outputs
+  std::vector<Value*> params, StructValues, ReloadOutputs, Reloads;
+  
+  LLVMContext &Context = newFunction->getContext();
+
+  // Add inputs as params, or to be filled into the struct
+  for (Values::iterator i = inputs.begin(), e = inputs.end(); i != e; ++i)
+    if (AggregateArgs)
+      StructValues.push_back(*i);
+    else
+      params.push_back(*i);
+
+  // Create allocas for the outputs
+  for (Values::iterator i = outputs.begin(), e = outputs.end(); i != e; ++i) {
+    if (AggregateArgs) {
+      StructValues.push_back(*i);
+    } else {
+      AllocaInst *alloca =
+        new AllocaInst((*i)->getType(), 0, (*i)->getName()+".loc",
+                       codeReplacer->getParent()->begin()->begin());
+      ReloadOutputs.push_back(alloca);
+      params.push_back(alloca);
+    }
+  }
+
+  AllocaInst *Struct = 0;
+  if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+    std::vector<const Type*> ArgTypes;
+    for (Values::iterator v = StructValues.begin(),
+           ve = StructValues.end(); v != ve; ++v)
+      ArgTypes.push_back((*v)->getType());
+
+    // Allocate a struct at the beginning of this function
+    Type *StructArgTy = StructType::get(newFunction->getContext(), ArgTypes);
+    Struct =
+      new AllocaInst(StructArgTy, 0, "structArg",
+                     codeReplacer->getParent()->begin()->begin());
+    params.push_back(Struct);
+
+    for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+      Value *Idx[2];
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
+      GetElementPtrInst *GEP =
+        GetElementPtrInst::Create(Struct, Idx, Idx + 2,
+                                  "gep_" + StructValues[i]->getName());
+      codeReplacer->getInstList().push_back(GEP);
+      StoreInst *SI = new StoreInst(StructValues[i], GEP);
+      codeReplacer->getInstList().push_back(SI);
+    }
+  }
+
+  // Emit the call to the function
+  CallInst *call = CallInst::Create(newFunction, params.begin(), params.end(),
+                                    NumExitBlocks > 1 ? "targetBlock" : "");
+  codeReplacer->getInstList().push_back(call);
+
+  Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
+  unsigned FirstOut = inputs.size();
+  if (!AggregateArgs)
+    std::advance(OutputArgBegin, inputs.size());
+
+  // Reload the outputs passed in by reference
+  for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+    Value *Output = 0;
+    if (AggregateArgs) {
+      Value *Idx[2];
+      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+      GetElementPtrInst *GEP
+        = GetElementPtrInst::Create(Struct, Idx, Idx + 2,
+                                    "gep_reload_" + outputs[i]->getName());
+      codeReplacer->getInstList().push_back(GEP);
+      Output = GEP;
+    } else {
+      Output = ReloadOutputs[i];
+    }
+    LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload");
+    Reloads.push_back(load);
+    codeReplacer->getInstList().push_back(load);
+    std::vector<User*> Users(outputs[i]->use_begin(), outputs[i]->use_end());
+    for (unsigned u = 0, e = Users.size(); u != e; ++u) {
+      Instruction *inst = cast<Instruction>(Users[u]);
+      if (!BlocksToExtract.count(inst->getParent()))
+        inst->replaceUsesOfWith(outputs[i], load);
+    }
+  }
+
+  // Now we can emit a switch statement using the call as a value.
+  SwitchInst *TheSwitch =
+      SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)),
+                         codeReplacer, 0, codeReplacer);
+
+  // Since there may be multiple exits from the original region, make the new
+  // function return an unsigned, switch on that number.  This loop iterates
+  // over all of the blocks in the extracted region, updating any terminator
+  // instructions in the to-be-extracted region that branch to blocks that are
+  // not in the region to be extracted.
+  std::map<BasicBlock*, BasicBlock*> ExitBlockMap;
+
+  unsigned switchVal = 0;
+  for (SetVector<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
+         e = BlocksToExtract.end(); i != e; ++i) {
+    TerminatorInst *TI = (*i)->getTerminator();
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
+      if (!BlocksToExtract.count(TI->getSuccessor(i))) {
+        BasicBlock *OldTarget = TI->getSuccessor(i);
+        // add a new basic block which returns the appropriate value
+        BasicBlock *&NewTarget = ExitBlockMap[OldTarget];
+        if (!NewTarget) {
+          // If we don't already have an exit stub for this non-extracted
+          // destination, create one now!
+          NewTarget = BasicBlock::Create(Context,
+                                         OldTarget->getName() + ".exitStub",
+                                         newFunction);
+          unsigned SuccNum = switchVal++;
+
+          Value *brVal = 0;
+          switch (NumExitBlocks) {
+          case 0:
+          case 1: break;  // No value needed.
+          case 2:         // Conditional branch, return a bool
+            brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum);
+            break;
+          default:
+            brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum);
+            break;
+          }
+
+          ReturnInst *NTRet = ReturnInst::Create(Context, brVal, NewTarget);
+
+          // Update the switch instruction.
+          TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context),
+                                              SuccNum),
+                             OldTarget);
+
+          // Restore values just before we exit
+          Function::arg_iterator OAI = OutputArgBegin;
+          for (unsigned out = 0, e = outputs.size(); out != e; ++out) {
+            // For an invoke, the normal destination is the only one that is
+            // dominated by the result of the invocation
+            BasicBlock *DefBlock = cast<Instruction>(outputs[out])->getParent();
+
+            bool DominatesDef = true;
+
+            if (InvokeInst *Invoke = dyn_cast<InvokeInst>(outputs[out])) {
+              DefBlock = Invoke->getNormalDest();
+
+              // Make sure we are looking at the original successor block, not
+              // at a newly inserted exit block, which won't be in the dominator
+              // info.
+              for (std::map<BasicBlock*, BasicBlock*>::iterator I =
+                     ExitBlockMap.begin(), E = ExitBlockMap.end(); I != E; ++I)
+                if (DefBlock == I->second) {
+                  DefBlock = I->first;
+                  break;
+                }
+
+              // In the extract block case, if the block we are extracting ends
+              // with an invoke instruction, make sure that we don't emit a
+              // store of the invoke value for the unwind block.
+              if (!DT && DefBlock != OldTarget)
+                DominatesDef = false;
+            }
+
+            if (DT) {
+              DominatesDef = DT->dominates(DefBlock, OldTarget);
+              
+              // If the output value is used by a phi in the target block,
+              // then we need to test for dominance of the phi's predecessor
+              // instead.  Unfortunately, this a little complicated since we
+              // have already rewritten uses of the value to uses of the reload.
+              BasicBlock* pred = FindPhiPredForUseInBlock(Reloads[out], 
+                                                          OldTarget);
+              if (pred && DT && DT->dominates(DefBlock, pred))
+                DominatesDef = true;
+            }
+
+            if (DominatesDef) {
+              if (AggregateArgs) {
+                Value *Idx[2];
+                Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+                Idx[1] = ConstantInt::get(Type::getInt32Ty(Context),
+                                          FirstOut+out);
+                GetElementPtrInst *GEP =
+                  GetElementPtrInst::Create(OAI, Idx, Idx + 2,
+                                            "gep_" + outputs[out]->getName(),
+                                            NTRet);
+                new StoreInst(outputs[out], GEP, NTRet);
+              } else {
+                new StoreInst(outputs[out], OAI, NTRet);
+              }
+            }
+            // Advance output iterator even if we don't emit a store
+            if (!AggregateArgs) ++OAI;
+          }
+        }
+
+        // rewrite the original branch instruction with this new target
+        TI->setSuccessor(i, NewTarget);
+      }
+  }
+
+  // Now that we've done the deed, simplify the switch instruction.
+  const Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType();
+  switch (NumExitBlocks) {
+  case 0:
+    // There are no successors (the block containing the switch itself), which
+    // means that previously this was the last part of the function, and hence
+    // this should be rewritten as a `ret'
+
+    // Check if the function should return a value
+    if (OldFnRetTy->isVoidTy()) {
+      ReturnInst::Create(Context, 0, TheSwitch);  // Return void
+    } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) {
+      // return what we have
+      ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch);
+    } else {
+      // Otherwise we must have code extracted an unwind or something, just
+      // return whatever we want.
+      ReturnInst::Create(Context, 
+                         Constant::getNullValue(OldFnRetTy), TheSwitch);
+    }
+
+    TheSwitch->eraseFromParent();
+    break;
+  case 1:
+    // Only a single destination, change the switch into an unconditional
+    // branch.
+    BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch);
+    TheSwitch->eraseFromParent();
+    break;
+  case 2:
+    BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2),
+                       call, TheSwitch);
+    TheSwitch->eraseFromParent();
+    break;
+  default:
+    // Otherwise, make the default destination of the switch instruction be one
+    // of the other successors.
+    TheSwitch->setOperand(0, call);
+    TheSwitch->setSuccessor(0, TheSwitch->getSuccessor(NumExitBlocks));
+    TheSwitch->removeCase(NumExitBlocks);  // Remove redundant case
+    break;
+  }
+}
+
+void CodeExtractor::moveCodeToFunction(Function *newFunction) {
+  Function *oldFunc = (*BlocksToExtract.begin())->getParent();
+  Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList();
+  Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList();
+
+  for (SetVector<BasicBlock*>::const_iterator i = BlocksToExtract.begin(),
+         e = BlocksToExtract.end(); i != e; ++i) {
+    // Delete the basic block from the old function, and the list of blocks
+    oldBlocks.remove(*i);
+
+    // Insert this basic block into the new function
+    newBlocks.push_back(*i);
+  }
+}
+
+/// ExtractRegion - Removes a loop from a function, replaces it with a call to
+/// new function. Returns pointer to the new function.
+///
+/// algorithm:
+///
+/// find inputs and outputs for the region
+///
+/// for inputs: add to function as args, map input instr* to arg#
+/// for outputs: add allocas for scalars,
+///             add to func as args, map output instr* to arg#
+///
+/// rewrite func to use argument #s instead of instr*
+///
+/// for each scalar output in the function: at every exit, store intermediate
+/// computed result back into memory.
+///
+Function *CodeExtractor::
+ExtractCodeRegion(const std::vector<BasicBlock*> &code) {
+  if (!isEligible(code))
+    return 0;
+
+  // 1) Find inputs, outputs
+  // 2) Construct new function
+  //  * Add allocas for defs, pass as args by reference
+  //  * Pass in uses as args
+  // 3) Move code region, add call instr to func
+  //
+  BlocksToExtract.insert(code.begin(), code.end());
+
+  Values inputs, outputs;
+
+  // Assumption: this is a single-entry code region, and the header is the first
+  // block in the region.
+  BasicBlock *header = code[0];
+
+  for (unsigned i = 1, e = code.size(); i != e; ++i)
+    for (pred_iterator PI = pred_begin(code[i]), E = pred_end(code[i]);
+         PI != E; ++PI)
+      assert(BlocksToExtract.count(*PI) &&
+             "No blocks in this region may have entries from outside the region"
+             " except for the first block!");
+
+  // If we have to split PHI nodes or the entry block, do so now.
+  severSplitPHINodes(header);
+
+  // If we have any return instructions in the region, split those blocks so
+  // that the return is not in the region.
+  splitReturnBlocks();
+
+  Function *oldFunction = header->getParent();
+
+  // This takes place of the original loop
+  BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), 
+                                                "codeRepl", oldFunction,
+                                                header);
+
+  // The new function needs a root node because other nodes can branch to the
+  // head of the region, but the entry node of a function cannot have preds.
+  BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), 
+                                               "newFuncRoot");
+  newFuncRoot->getInstList().push_back(BranchInst::Create(header));
+
+  // Find inputs to, outputs from the code region.
+  findInputsOutputs(inputs, outputs);
+
+  // Construct new function based on inputs/outputs & add allocas for all defs.
+  Function *newFunction = constructFunction(inputs, outputs, header,
+                                            newFuncRoot,
+                                            codeReplacer, oldFunction,
+                                            oldFunction->getParent());
+
+  emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
+
+  moveCodeToFunction(newFunction);
+
+  // Loop over all of the PHI nodes in the header block, and change any
+  // references to the old incoming edge to be the new incoming edge.
+  for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!BlocksToExtract.count(PN->getIncomingBlock(i)))
+        PN->setIncomingBlock(i, newFuncRoot);
+  }
+
+  // Look at all successors of the codeReplacer block.  If any of these blocks
+  // had PHI nodes in them, we need to update the "from" block to be the code
+  // replacer, not the original block in the extracted region.
+  std::vector<BasicBlock*> Succs(succ_begin(codeReplacer),
+                                 succ_end(codeReplacer));
+  for (unsigned i = 0, e = Succs.size(); i != e; ++i)
+    for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) {
+      PHINode *PN = cast<PHINode>(I);
+      std::set<BasicBlock*> ProcessedPreds;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (BlocksToExtract.count(PN->getIncomingBlock(i))) {
+          if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second)
+            PN->setIncomingBlock(i, codeReplacer);
+          else {
+            // There were multiple entries in the PHI for this block, now there
+            // is only one, so remove the duplicated entries.
+            PN->removeIncomingValue(i, false);
+            --i; --e;
+          }
+        }
+    }
+
+  //cerr << "NEW FUNCTION: " << *newFunction;
+  //  verifyFunction(*newFunction);
+
+  //  cerr << "OLD FUNCTION: " << *oldFunction;
+  //  verifyFunction(*oldFunction);
+
+  DEBUG(if (verifyFunction(*newFunction)) 
+        report_fatal_error("verifyFunction failed!"));
+  return newFunction;
+}
+
+bool CodeExtractor::isEligible(const std::vector<BasicBlock*> &code) {
+  // Deny code region if it contains allocas or vastarts.
+  for (std::vector<BasicBlock*>::const_iterator BB = code.begin(), e=code.end();
+       BB != e; ++BB)
+    for (BasicBlock::const_iterator I = (*BB)->begin(), Ie = (*BB)->end();
+         I != Ie; ++I)
+      if (isa<AllocaInst>(*I))
+        return false;
+      else if (const CallInst *CI = dyn_cast<CallInst>(I))
+        if (const Function *F = CI->getCalledFunction())
+          if (F->getIntrinsicID() == Intrinsic::vastart)
+            return false;
+  return true;
+}
+
+
+/// ExtractCodeRegion - slurp a sequence of basic blocks into a brand new
+/// function
+///
+Function* llvm::ExtractCodeRegion(DominatorTree &DT,
+                                  const std::vector<BasicBlock*> &code,
+                                  bool AggregateArgs) {
+  return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(code);
+}
+
+/// ExtractBasicBlock - slurp a natural loop into a brand new function
+///
+Function* llvm::ExtractLoop(DominatorTree &DT, Loop *L, bool AggregateArgs) {
+  return CodeExtractor(&DT, AggregateArgs).ExtractCodeRegion(L->getBlocks());
+}
+
+/// ExtractBasicBlock - slurp a basic block into a brand new function
+///
+Function* llvm::ExtractBasicBlock(BasicBlock *BB, bool AggregateArgs) {
+  std::vector<BasicBlock*> Blocks;
+  Blocks.push_back(BB);
+  return CodeExtractor(0, AggregateArgs).ExtractCodeRegion(Blocks);
+}
diff --git a/final/lib/Transforms/Utils/DemoteRegToStack.cpp b/final/lib/Transforms/Utils/DemoteRegToStack.cpp
new file mode 100644
index 00000000000..8cc26492c29
--- /dev/null
+++ b/final/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -0,0 +1,146 @@
+//===- DemoteRegToStack.cpp - Move a virtual register to the stack --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provide the function DemoteRegToStack().  This function takes a
+// virtual register computed by an Instruction and replaces it with a slot in
+// the stack frame, allocated via alloca. It returns the pointer to the
+// AllocaInst inserted.  After this function is called on an instruction, we are
+// guaranteed that the only user of the instruction is a store that is
+// immediately after it.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include <map>
+using namespace llvm;
+
+/// DemoteRegToStack - This function takes a virtual register computed by an
+/// Instruction and replaces it with a slot in the stack frame, allocated via
+/// alloca.  This allows the CFG to be changed around without fear of
+/// invalidating the SSA information for the value.  It returns the pointer to
+/// the alloca inserted to create a stack slot for I.
+///
+AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
+                                   Instruction *AllocaPoint) {
+  if (I.use_empty()) {
+    I.eraseFromParent();
+    return 0;
+  }
+
+  // Create a stack slot to hold the value.
+  AllocaInst *Slot;
+  if (AllocaPoint) {
+    Slot = new AllocaInst(I.getType(), 0,
+                          I.getName()+".reg2mem", AllocaPoint);
+  } else {
+    Function *F = I.getParent()->getParent();
+    Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem",
+                          F->getEntryBlock().begin());
+  }
+
+  // Change all of the users of the instruction to read from the stack slot
+  // instead.
+  while (!I.use_empty()) {
+    Instruction *U = cast<Instruction>(I.use_back());
+    if (PHINode *PN = dyn_cast<PHINode>(U)) {
+      // If this is a PHI node, we can't insert a load of the value before the
+      // use.  Instead, insert the load in the predecessor block corresponding
+      // to the incoming value.
+      //
+      // Note that if there are multiple edges from a basic block to this PHI
+      // node that we cannot multiple loads.  The problem is that the resultant
+      // PHI node will have multiple values (from each load) coming in from the
+      // same block, which is illegal SSA form.  For this reason, we keep track
+      // and reuse loads we insert.
+      std::map<BasicBlock*, Value*> Loads;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (PN->getIncomingValue(i) == &I) {
+          Value *&V = Loads[PN->getIncomingBlock(i)];
+          if (V == 0) {
+            // Insert the load into the predecessor block
+            V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
+                             PN->getIncomingBlock(i)->getTerminator());
+          }
+          PN->setIncomingValue(i, V);
+        }
+
+    } else {
+      // If this is a normal instruction, just insert a load.
+      Value *V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, U);
+      U->replaceUsesOfWith(&I, V);
+    }
+  }
+
+
+  // Insert stores of the computed value into the stack slot.  We have to be
+  // careful is I is an invoke instruction though, because we can't insert the
+  // store AFTER the terminator instruction.
+  BasicBlock::iterator InsertPt;
+  if (!isa<TerminatorInst>(I)) {
+    InsertPt = &I;
+    ++InsertPt;
+  } else {
+    // We cannot demote invoke instructions to the stack if their normal edge
+    // is critical.
+    InvokeInst &II = cast<InvokeInst>(I);
+    assert(II.getNormalDest()->getSinglePredecessor() &&
+           "Cannot demote invoke with a critical successor!");
+    InsertPt = II.getNormalDest()->begin();
+  }
+
+  for (; isa<PHINode>(InsertPt); ++InsertPt)
+  /* empty */;   // Don't insert before any PHI nodes.
+  new StoreInst(&I, Slot, InsertPt);
+
+  return Slot;
+}
+
+
+/// DemotePHIToStack - This function takes a virtual register computed by a phi
+/// node and replaces it with a slot in the stack frame, allocated via alloca.
+/// The phi node is deleted and it returns the pointer to the alloca inserted.
+AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
+  if (P->use_empty()) {
+    P->eraseFromParent();
+    return 0;
+  }
+
+  // Create a stack slot to hold the value.
+  AllocaInst *Slot;
+  if (AllocaPoint) {
+    Slot = new AllocaInst(P->getType(), 0,
+                          P->getName()+".reg2mem", AllocaPoint);
+  } else {
+    Function *F = P->getParent()->getParent();
+    Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem",
+                          F->getEntryBlock().begin());
+  }
+
+  // Iterate over each operand, insert store in each predecessor.
+  for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
+    if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
+      assert(II->getParent() != P->getIncomingBlock(i) &&
+             "Invoke edge not supported yet"); (void)II;
+    }
+    new StoreInst(P->getIncomingValue(i), Slot,
+                  P->getIncomingBlock(i)->getTerminator());
+  }
+
+  // Insert load in place of the phi and replace all uses.
+  Value *V = new LoadInst(Slot, P->getName()+".reload", P);
+  P->replaceAllUsesWith(V);
+
+  // Delete phi.
+  P->eraseFromParent();
+
+  return Slot;
+}
diff --git a/final/lib/Transforms/Utils/InlineFunction.cpp b/final/lib/Transforms/Utils/InlineFunction.cpp
new file mode 100644
index 00000000000..c1faf241133
--- /dev/null
+++ b/final/lib/Transforms/Utils/InlineFunction.cpp
@@ -0,0 +1,709 @@
+//===- InlineFunction.cpp - Code to perform function inlining -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements inlining of a function into a call site, resolving
+// parameters and the return value as appropriate.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Attributes.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CallSite.h"
+using namespace llvm;
+
+bool llvm::InlineFunction(CallInst *CI, InlineFunctionInfo &IFI) {
+  return InlineFunction(CallSite(CI), IFI);
+}
+bool llvm::InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI) {
+  return InlineFunction(CallSite(II), IFI);
+}
+
+
+/// HandleCallsInBlockInlinedThroughInvoke - When we inline a basic block into
+/// an invoke, we have to turn all of the calls that can throw into
+/// invokes.  This function analyze BB to see if there are any calls, and if so,
+/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI
+/// nodes in that block with the values specified in InvokeDestPHIValues.
+///
+static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
+                                                   BasicBlock *InvokeDest,
+                           const SmallVectorImpl<Value*> &InvokeDestPHIValues) {
+  for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
+    Instruction *I = BBI++;
+    
+    // We only need to check for function calls: inlined invoke
+    // instructions require no special handling.
+    CallInst *CI = dyn_cast<CallInst>(I);
+    if (CI == 0) continue;
+    
+    // If this call cannot unwind, don't convert it to an invoke.
+    if (CI->doesNotThrow())
+      continue;
+    
+    // Convert this function call into an invoke instruction.
+    // First, split the basic block.
+    BasicBlock *Split = BB->splitBasicBlock(CI, CI->getName()+".noexc");
+    
+    // Next, create the new invoke instruction, inserting it at the end
+    // of the old basic block.
+    ImmutableCallSite CS(CI);
+    SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
+    InvokeInst *II =
+      InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest,
+                         InvokeArgs.begin(), InvokeArgs.end(),
+                         CI->getName(), BB->getTerminator());
+    II->setCallingConv(CI->getCallingConv());
+    II->setAttributes(CI->getAttributes());
+    
+    // Make sure that anything using the call now uses the invoke!  This also
+    // updates the CallGraph if present, because it uses a WeakVH.
+    CI->replaceAllUsesWith(II);
+    
+    // Delete the unconditional branch inserted by splitBasicBlock
+    BB->getInstList().pop_back();
+    Split->getInstList().pop_front();  // Delete the original call
+    
+    // Update any PHI nodes in the exceptional block to indicate that
+    // there is now a new entry in them.
+    unsigned i = 0;
+    for (BasicBlock::iterator I = InvokeDest->begin();
+         isa<PHINode>(I); ++I, ++i)
+      cast<PHINode>(I)->addIncoming(InvokeDestPHIValues[i], BB);
+    
+    // This basic block is now complete, the caller will continue scanning the
+    // next one.
+    return;
+  }
+}
+  
+
+/// HandleInlinedInvoke - If we inlined an invoke site, we need to convert calls
+/// in the body of the inlined function into invokes and turn unwind
+/// instructions into branches to the invoke unwind dest.
+///
+/// II is the invoke instruction being inlined.  FirstNewBlock is the first
+/// block of the inlined code (the last block is the end of the function),
+/// and InlineCodeInfo is information about the code that got inlined.
+static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
+                                ClonedCodeInfo &InlinedCodeInfo) {
+  BasicBlock *InvokeDest = II->getUnwindDest();
+  SmallVector<Value*, 8> InvokeDestPHIValues;
+
+  // If there are PHI nodes in the unwind destination block, we need to
+  // keep track of which values came into them from this invoke, then remove
+  // the entry for this block.
+  BasicBlock *InvokeBlock = II->getParent();
+  for (BasicBlock::iterator I = InvokeDest->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    // Save the value to use for this edge.
+    InvokeDestPHIValues.push_back(PN->getIncomingValueForBlock(InvokeBlock));
+  }
+
+  Function *Caller = FirstNewBlock->getParent();
+
+  // The inlined code is currently at the end of the function, scan from the
+  // start of the inlined code to its end, checking for stuff we need to
+  // rewrite.  If the code doesn't have calls or unwinds, we know there is
+  // nothing to rewrite.
+  if (!InlinedCodeInfo.ContainsCalls && !InlinedCodeInfo.ContainsUnwinds) {
+    // Now that everything is happy, we have one final detail.  The PHI nodes in
+    // the exception destination block still have entries due to the original
+    // invoke instruction.  Eliminate these entries (which might even delete the
+    // PHI node) now.
+    InvokeDest->removePredecessor(II->getParent());
+    return;
+  }
+  
+  for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E; ++BB){
+    if (InlinedCodeInfo.ContainsCalls)
+      HandleCallsInBlockInlinedThroughInvoke(BB, InvokeDest,
+                                             InvokeDestPHIValues);
+
+    if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      // An UnwindInst requires special handling when it gets inlined into an
+      // invoke site.  Once this happens, we know that the unwind would cause
+      // a control transfer to the invoke exception destination, so we can
+      // transform it into a direct branch to the exception destination.
+      BranchInst::Create(InvokeDest, UI);
+
+      // Delete the unwind instruction!
+      UI->eraseFromParent();
+
+      // Update any PHI nodes in the exceptional block to indicate that
+      // there is now a new entry in them.
+      unsigned i = 0;
+      for (BasicBlock::iterator I = InvokeDest->begin();
+           isa<PHINode>(I); ++I, ++i) {
+        PHINode *PN = cast<PHINode>(I);
+        PN->addIncoming(InvokeDestPHIValues[i], BB);
+      }
+    }
+  }
+
+  // Now that everything is happy, we have one final detail.  The PHI nodes in
+  // the exception destination block still have entries due to the original
+  // invoke instruction.  Eliminate these entries (which might even delete the
+  // PHI node) now.
+  InvokeDest->removePredecessor(II->getParent());
+}
+
+/// UpdateCallGraphAfterInlining - Once we have cloned code over from a callee
+/// into the caller, update the specified callgraph to reflect the changes we
+/// made.  Note that it's possible that not all code was copied over, so only
+/// some edges of the callgraph may remain.
+static void UpdateCallGraphAfterInlining(CallSite CS,
+                                         Function::iterator FirstNewBlock,
+                                         ValueToValueMapTy &VMap,
+                                         InlineFunctionInfo &IFI) {
+  CallGraph &CG = *IFI.CG;
+  const Function *Caller = CS.getInstruction()->getParent()->getParent();
+  const Function *Callee = CS.getCalledFunction();
+  CallGraphNode *CalleeNode = CG[Callee];
+  CallGraphNode *CallerNode = CG[Caller];
+
+  // Since we inlined some uninlined call sites in the callee into the caller,
+  // add edges from the caller to all of the callees of the callee.
+  CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
+
+  // Consider the case where CalleeNode == CallerNode.
+  CallGraphNode::CalledFunctionsVector CallCache;
+  if (CalleeNode == CallerNode) {
+    CallCache.assign(I, E);
+    I = CallCache.begin();
+    E = CallCache.end();
+  }
+
+  for (; I != E; ++I) {
+    const Value *OrigCall = I->first;
+
+    ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
+    // Only copy the edge if the call was inlined!
+    if (VMI == VMap.end() || VMI->second == 0)
+      continue;
+    
+    // If the call was inlined, but then constant folded, there is no edge to
+    // add.  Check for this case.
+    Instruction *NewCall = dyn_cast<Instruction>(VMI->second);
+    if (NewCall == 0) continue;
+
+    // Remember that this call site got inlined for the client of
+    // InlineFunction.
+    IFI.InlinedCalls.push_back(NewCall);
+
+    // It's possible that inlining the callsite will cause it to go from an
+    // indirect to a direct call by resolving a function pointer.  If this
+    // happens, set the callee of the new call site to a more precise
+    // destination.  This can also happen if the call graph node of the caller
+    // was just unnecessarily imprecise.
+    if (I->second->getFunction() == 0)
+      if (Function *F = CallSite(NewCall).getCalledFunction()) {
+        // Indirect call site resolved to direct call.
+        CallerNode->addCalledFunction(CallSite(NewCall), CG[F]);
+
+        continue;
+      }
+
+    CallerNode->addCalledFunction(CallSite(NewCall), I->second);
+  }
+  
+  // Update the call graph by deleting the edge from Callee to Caller.  We must
+  // do this after the loop above in case Caller and Callee are the same.
+  CallerNode->removeCallEdgeFor(CS);
+}
+
+/// HandleByValArgument - When inlining a call site that has a byval argument,
+/// we have to make the implicit memcpy explicit by adding it.
+static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
+                                  const Function *CalledFunc,
+                                  InlineFunctionInfo &IFI,
+                                  unsigned ByValAlignment) {
+  const Type *AggTy = cast<PointerType>(Arg->getType())->getElementType();
+
+  // If the called function is readonly, then it could not mutate the caller's
+  // copy of the byval'd memory.  In this case, it is safe to elide the copy and
+  // temporary.
+  if (CalledFunc->onlyReadsMemory()) {
+    // If the byval argument has a specified alignment that is greater than the
+    // passed in pointer, then we either have to round up the input pointer or
+    // give up on this transformation.
+    if (ByValAlignment <= 1)  // 0 = unspecified, 1 = no particular alignment.
+      return Arg;
+
+    // If the pointer is already known to be sufficiently aligned, or if we can
+    // round it up to a larger alignment, then we don't need a temporary.
+    if (getOrEnforceKnownAlignment(Arg, ByValAlignment,
+                                   IFI.TD) >= ByValAlignment)
+      return Arg;
+    
+    // Otherwise, we have to make a memcpy to get a safe alignment.  This is bad
+    // for code quality, but rarely happens and is required for correctness.
+  }
+  
+  LLVMContext &Context = Arg->getContext();
+
+  const Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+  
+  // Create the alloca.  If we have TargetData, use nice alignment.
+  unsigned Align = 1;
+  if (IFI.TD)
+    Align = IFI.TD->getPrefTypeAlignment(AggTy);
+  
+  // If the byval had an alignment specified, we *must* use at least that
+  // alignment, as it is required by the byval argument (and uses of the
+  // pointer inside the callee).
+  Align = std::max(Align, ByValAlignment);
+  
+  Function *Caller = TheCall->getParent()->getParent(); 
+  
+  Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), 
+                                    &*Caller->begin()->begin());
+  // Emit a memcpy.
+  const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
+  Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
+                                                 Intrinsic::memcpy, 
+                                                 Tys, 3);
+  Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
+  Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
+  
+  Value *Size;
+  if (IFI.TD == 0)
+    Size = ConstantExpr::getSizeOf(AggTy);
+  else
+    Size = ConstantInt::get(Type::getInt64Ty(Context),
+                            IFI.TD->getTypeStoreSize(AggTy));
+  
+  // Always generate a memcpy of alignment 1 here because we don't know
+  // the alignment of the src pointer.  Other optimizations can infer
+  // better alignment.
+  Value *CallArgs[] = {
+    DestCast, SrcCast, Size,
+    ConstantInt::get(Type::getInt32Ty(Context), 1),
+    ConstantInt::getFalse(Context) // isVolatile
+  };
+  CallInst *TheMemCpy =
+    CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
+  
+  // If we have a call graph, update it.
+  if (CallGraph *CG = IFI.CG) {
+    CallGraphNode *MemCpyCGN = CG->getOrInsertFunction(MemCpyFn);
+    CallGraphNode *CallerNode = (*CG)[Caller];
+    CallerNode->addCalledFunction(TheMemCpy, MemCpyCGN);
+  }
+  
+  // Uses of the argument in the function should use our new alloca
+  // instead.
+  return NewAlloca;
+}
+
+// InlineFunction - This function inlines the called function into the basic
+// block of the caller.  This returns false if it is not possible to inline this
+// call.  The program is still in a well defined state if this occurs though.
+//
+// Note that this only does one level of inlining.  For example, if the
+// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now
+// exists in the instruction stream.  Similiarly this will inline a recursive
+// function by one level.
+//
+bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
+  Instruction *TheCall = CS.getInstruction();
+  LLVMContext &Context = TheCall->getContext();
+  assert(TheCall->getParent() && TheCall->getParent()->getParent() &&
+         "Instruction not in function!");
+
+  // If IFI has any state in it, zap it before we fill it in.
+  IFI.reset();
+  
+  const Function *CalledFunc = CS.getCalledFunction();
+  if (CalledFunc == 0 ||          // Can't inline external function or indirect
+      CalledFunc->isDeclaration() || // call, or call to a vararg function!
+      CalledFunc->getFunctionType()->isVarArg()) return false;
+
+  // If the call to the callee is not a tail call, we must clear the 'tail'
+  // flags on any calls that we inline.
+  bool MustClearTailCallFlags =
+    !(isa<CallInst>(TheCall) && cast<CallInst>(TheCall)->isTailCall());
+
+  // If the call to the callee cannot throw, set the 'nounwind' flag on any
+  // calls that we inline.
+  bool MarkNoUnwind = CS.doesNotThrow();
+
+  BasicBlock *OrigBB = TheCall->getParent();
+  Function *Caller = OrigBB->getParent();
+
+  // GC poses two hazards to inlining, which only occur when the callee has GC:
+  //  1. If the caller has no GC, then the callee's GC must be propagated to the
+  //     caller.
+  //  2. If the caller has a differing GC, it is invalid to inline.
+  if (CalledFunc->hasGC()) {
+    if (!Caller->hasGC())
+      Caller->setGC(CalledFunc->getGC());
+    else if (CalledFunc->getGC() != Caller->getGC())
+      return false;
+  }
+
+  // Get an iterator to the last basic block in the function, which will have
+  // the new function inlined after it.
+  //
+  Function::iterator LastBlock = &Caller->back();
+
+  // Make sure to capture all of the return instructions from the cloned
+  // function.
+  SmallVector<ReturnInst*, 8> Returns;
+  ClonedCodeInfo InlinedFunctionInfo;
+  Function::iterator FirstNewBlock;
+
+  { // Scope to destroy VMap after cloning.
+    ValueToValueMapTy VMap;
+
+    assert(CalledFunc->arg_size() == CS.arg_size() &&
+           "No varargs calls can be inlined!");
+
+    // Calculate the vector of arguments to pass into the function cloner, which
+    // matches up the formal to the actual argument values.
+    CallSite::arg_iterator AI = CS.arg_begin();
+    unsigned ArgNo = 0;
+    for (Function::const_arg_iterator I = CalledFunc->arg_begin(),
+         E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {
+      Value *ActualArg = *AI;
+
+      // When byval arguments actually inlined, we need to make the copy implied
+      // by them explicit.  However, we don't do this if the callee is readonly
+      // or readnone, because the copy would be unneeded: the callee doesn't
+      // modify the struct.
+      if (CalledFunc->paramHasAttr(ArgNo+1, Attribute::ByVal)) {
+        ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI,
+                                        CalledFunc->getParamAlignment(ArgNo+1));
+ 
+        // Calls that we inline may use the new alloca, so we need to clear
+        // their 'tail' flags if HandleByValArgument introduced a new alloca and
+        // the callee has calls.
+        MustClearTailCallFlags |= ActualArg != *AI;
+      }
+
+      VMap[I] = ActualArg;
+    }
+
+    // We want the inliner to prune the code as it copies.  We would LOVE to
+    // have no dead or constant instructions leftover after inlining occurs
+    // (which can happen, e.g., because an argument was constant), but we'll be
+    // happy with whatever the cloner can do.
+    CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, 
+                              /*ModuleLevelChanges=*/false, Returns, ".i",
+                              &InlinedFunctionInfo, IFI.TD, TheCall);
+
+    // Remember the first block that is newly cloned over.
+    FirstNewBlock = LastBlock; ++FirstNewBlock;
+
+    // Update the callgraph if requested.
+    if (IFI.CG)
+      UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
+  }
+
+  // If there are any alloca instructions in the block that used to be the entry
+  // block for the callee, move them to the entry block of the caller.  First
+  // calculate which instruction they should be inserted before.  We insert the
+  // instructions at the end of the current alloca list.
+  //
+  {
+    BasicBlock::iterator InsertPoint = Caller->begin()->begin();
+    for (BasicBlock::iterator I = FirstNewBlock->begin(),
+         E = FirstNewBlock->end(); I != E; ) {
+      AllocaInst *AI = dyn_cast<AllocaInst>(I++);
+      if (AI == 0) continue;
+      
+      // If the alloca is now dead, remove it.  This often occurs due to code
+      // specialization.
+      if (AI->use_empty()) {
+        AI->eraseFromParent();
+        continue;
+      }
+
+      if (!isa<Constant>(AI->getArraySize()))
+        continue;
+      
+      // Keep track of the static allocas that we inline into the caller.
+      IFI.StaticAllocas.push_back(AI);
+      
+      // Scan for the block of allocas that we can move over, and move them
+      // all at once.
+      while (isa<AllocaInst>(I) &&
+             isa<Constant>(cast<AllocaInst>(I)->getArraySize())) {
+        IFI.StaticAllocas.push_back(cast<AllocaInst>(I));
+        ++I;
+      }
+
+      // Transfer all of the allocas over in a block.  Using splice means
+      // that the instructions aren't removed from the symbol table, then
+      // reinserted.
+      Caller->getEntryBlock().getInstList().splice(InsertPoint,
+                                                   FirstNewBlock->getInstList(),
+                                                   AI, I);
+    }
+  }
+
+  // If the inlined code contained dynamic alloca instructions, wrap the inlined
+  // code with llvm.stacksave/llvm.stackrestore intrinsics.
+  if (InlinedFunctionInfo.ContainsDynamicAllocas) {
+    Module *M = Caller->getParent();
+    // Get the two intrinsics we care about.
+    Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave);
+    Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
+
+    // If we are preserving the callgraph, add edges to the stacksave/restore
+    // functions for the calls we insert.
+    CallGraphNode *StackSaveCGN = 0, *StackRestoreCGN = 0, *CallerNode = 0;
+    if (CallGraph *CG = IFI.CG) {
+      StackSaveCGN    = CG->getOrInsertFunction(StackSave);
+      StackRestoreCGN = CG->getOrInsertFunction(StackRestore);
+      CallerNode = (*CG)[Caller];
+    }
+
+    // Insert the llvm.stacksave.
+    CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack",
+                                          FirstNewBlock->begin());
+    if (IFI.CG) CallerNode->addCalledFunction(SavedPtr, StackSaveCGN);
+
+    // Insert a call to llvm.stackrestore before any return instructions in the
+    // inlined function.
+    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+      CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", Returns[i]);
+      if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
+    }
+
+    // Count the number of StackRestore calls we insert.
+    unsigned NumStackRestores = Returns.size();
+
+    // If we are inlining an invoke instruction, insert restores before each
+    // unwind.  These unwinds will be rewritten into branches later.
+    if (InlinedFunctionInfo.ContainsUnwinds && isa<InvokeInst>(TheCall)) {
+      for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+           BB != E; ++BB)
+        if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+          CallInst *CI = CallInst::Create(StackRestore, SavedPtr, "", UI);
+          if (IFI.CG) CallerNode->addCalledFunction(CI, StackRestoreCGN);
+          ++NumStackRestores;
+        }
+    }
+  }
+
+  // If we are inlining tail call instruction through a call site that isn't
+  // marked 'tail', we must remove the tail marker for any calls in the inlined
+  // code.  Also, calls inlined through a 'nounwind' call site should be marked
+  // 'nounwind'.
+  if (InlinedFunctionInfo.ContainsCalls &&
+      (MustClearTailCallFlags || MarkNoUnwind)) {
+    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+         BB != E; ++BB)
+      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+        if (CallInst *CI = dyn_cast<CallInst>(I)) {
+          if (MustClearTailCallFlags)
+            CI->setTailCall(false);
+          if (MarkNoUnwind)
+            CI->setDoesNotThrow();
+        }
+  }
+
+  // If we are inlining through a 'nounwind' call site then any inlined 'unwind'
+  // instructions are unreachable.
+  if (InlinedFunctionInfo.ContainsUnwinds && MarkNoUnwind)
+    for (Function::iterator BB = FirstNewBlock, E = Caller->end();
+         BB != E; ++BB) {
+      TerminatorInst *Term = BB->getTerminator();
+      if (isa<UnwindInst>(Term)) {
+        new UnreachableInst(Context, Term);
+        BB->getInstList().erase(Term);
+      }
+    }
+
+  // If we are inlining for an invoke instruction, we must make sure to rewrite
+  // any inlined 'unwind' instructions into branches to the invoke exception
+  // destination, and call instructions into invoke instructions.
+  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+    HandleInlinedInvoke(II, FirstNewBlock, InlinedFunctionInfo);
+
+  // If we cloned in _exactly one_ basic block, and if that block ends in a
+  // return instruction, we splice the body of the inlined callee directly into
+  // the calling basic block.
+  if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {
+    // Move all of the instructions right before the call.
+    OrigBB->getInstList().splice(TheCall, FirstNewBlock->getInstList(),
+                                 FirstNewBlock->begin(), FirstNewBlock->end());
+    // Remove the cloned basic block.
+    Caller->getBasicBlockList().pop_back();
+
+    // If the call site was an invoke instruction, add a branch to the normal
+    // destination.
+    if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall))
+      BranchInst::Create(II->getNormalDest(), TheCall);
+
+    // If the return instruction returned a value, replace uses of the call with
+    // uses of the returned value.
+    if (!TheCall->use_empty()) {
+      ReturnInst *R = Returns[0];
+      if (TheCall == R->getReturnValue())
+        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+      else
+        TheCall->replaceAllUsesWith(R->getReturnValue());
+    }
+    // Since we are now done with the Call/Invoke, we can delete it.
+    TheCall->eraseFromParent();
+
+    // Since we are now done with the return instruction, delete it also.
+    Returns[0]->eraseFromParent();
+
+    // We are now done with the inlining.
+    return true;
+  }
+
+  // Otherwise, we have the normal case, of more than one block to inline or
+  // multiple return sites.
+
+  // We want to clone the entire callee function into the hole between the
+  // "starter" and "ender" blocks.  How we accomplish this depends on whether
+  // this is an invoke instruction or a call instruction.
+  BasicBlock *AfterCallBB;
+  if (InvokeInst *II = dyn_cast<InvokeInst>(TheCall)) {
+
+    // Add an unconditional branch to make this look like the CallInst case...
+    BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), TheCall);
+
+    // Split the basic block.  This guarantees that no PHI nodes will have to be
+    // updated due to new incoming edges, and make the invoke case more
+    // symmetric to the call case.
+    AfterCallBB = OrigBB->splitBasicBlock(NewBr,
+                                          CalledFunc->getName()+".exit");
+
+  } else {  // It's a call
+    // If this is a call instruction, we need to split the basic block that
+    // the call lives in.
+    //
+    AfterCallBB = OrigBB->splitBasicBlock(TheCall,
+                                          CalledFunc->getName()+".exit");
+  }
+
+  // Change the branch that used to go to AfterCallBB to branch to the first
+  // basic block of the inlined function.
+  //
+  TerminatorInst *Br = OrigBB->getTerminator();
+  assert(Br && Br->getOpcode() == Instruction::Br &&
+         "splitBasicBlock broken!");
+  Br->setOperand(0, FirstNewBlock);
+
+
+  // Now that the function is correct, make it a little bit nicer.  In
+  // particular, move the basic blocks inserted from the end of the function
+  // into the space made by splitting the source basic block.
+  Caller->getBasicBlockList().splice(AfterCallBB, Caller->getBasicBlockList(),
+                                     FirstNewBlock, Caller->end());
+
+  // Handle all of the return instructions that we just cloned in, and eliminate
+  // any users of the original call/invoke instruction.
+  const Type *RTy = CalledFunc->getReturnType();
+
+  PHINode *PHI = 0;
+  if (Returns.size() > 1) {
+    // The PHI node should go at the front of the new basic block to merge all
+    // possible incoming values.
+    if (!TheCall->use_empty()) {
+      PHI = PHINode::Create(RTy, TheCall->getName(),
+                            AfterCallBB->begin());
+      // Anything that used the result of the function call should now use the
+      // PHI node as their operand.
+      TheCall->replaceAllUsesWith(PHI);
+    }
+
+    // Loop over all of the return instructions adding entries to the PHI node
+    // as appropriate.
+    if (PHI) {
+      for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+        ReturnInst *RI = Returns[i];
+        assert(RI->getReturnValue()->getType() == PHI->getType() &&
+               "Ret value not consistent in function!");
+        PHI->addIncoming(RI->getReturnValue(), RI->getParent());
+      }
+    }
+
+
+    // Add a branch to the merge points and remove return instructions.
+    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+      ReturnInst *RI = Returns[i];
+      BranchInst::Create(AfterCallBB, RI);
+      RI->eraseFromParent();
+    }
+  } else if (!Returns.empty()) {
+    // Otherwise, if there is exactly one return value, just replace anything
+    // using the return value of the call with the computed value.
+    if (!TheCall->use_empty()) {
+      if (TheCall == Returns[0]->getReturnValue())
+        TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+      else
+        TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
+    }
+
+    // Splice the code from the return block into the block that it will return
+    // to, which contains the code that was after the call.
+    BasicBlock *ReturnBB = Returns[0]->getParent();
+    AfterCallBB->getInstList().splice(AfterCallBB->begin(),
+                                      ReturnBB->getInstList());
+
+    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
+    ReturnBB->replaceAllUsesWith(AfterCallBB);
+
+    // Delete the return instruction now and empty ReturnBB now.
+    Returns[0]->eraseFromParent();
+    ReturnBB->eraseFromParent();
+  } else if (!TheCall->use_empty()) {
+    // No returns, but something is using the return value of the call.  Just
+    // nuke the result.
+    TheCall->replaceAllUsesWith(UndefValue::get(TheCall->getType()));
+  }
+
+  // Since we are now done with the Call/Invoke, we can delete it.
+  TheCall->eraseFromParent();
+
+  // We should always be able to fold the entry block of the function into the
+  // single predecessor of the block...
+  assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");
+  BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);
+
+  // Splice the code entry block into calling block, right before the
+  // unconditional branch.
+  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
+  CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
+
+  // Remove the unconditional branch.
+  OrigBB->getInstList().erase(Br);
+
+  // Now we can remove the CalleeEntry block, which is now empty.
+  Caller->getBasicBlockList().erase(CalleeEntry);
+
+  // If we inserted a phi node, check to see if it has a single value (e.g. all
+  // the entries are the same or undef).  If so, remove the PHI so it doesn't
+  // block other optimizations.
+  if (PHI)
+    if (Value *V = SimplifyInstruction(PHI, IFI.TD)) {
+      PHI->replaceAllUsesWith(V);
+      PHI->eraseFromParent();
+    }
+
+  return true;
+}
diff --git a/final/lib/Transforms/Utils/InstructionNamer.cpp b/final/lib/Transforms/Utils/InstructionNamer.cpp
new file mode 100644
index 00000000000..45c15de9437
--- /dev/null
+++ b/final/lib/Transforms/Utils/InstructionNamer.cpp
@@ -0,0 +1,64 @@
+//===- InstructionNamer.cpp - Give anonymous instructions names -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a little utility pass that gives instructions names, this is mostly
+// useful when diffing the effect of an optimization because deleting an
+// unnamed instruction can change all other instruction numbering, making the
+// diff very noisy.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+using namespace llvm;
+
+namespace {
+  struct InstNamer : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    InstNamer() : FunctionPass(ID) {
+      initializeInstNamerPass(*PassRegistry::getPassRegistry());
+    }
+    
+    void getAnalysisUsage(AnalysisUsage &Info) const {
+      Info.setPreservesAll();
+    }
+
+    bool runOnFunction(Function &F) {
+      for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end();
+           AI != AE; ++AI)
+        if (!AI->hasName() && !AI->getType()->isVoidTy())
+          AI->setName("arg");
+
+      for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+        if (!BB->hasName())
+          BB->setName("bb");
+        
+        for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
+          if (!I->hasName() && !I->getType()->isVoidTy())
+            I->setName("tmp");
+      }
+      return true;
+    }
+  };
+  
+  char InstNamer::ID = 0;
+}
+
+INITIALIZE_PASS(InstNamer, "instnamer", 
+                "Assign names to anonymous instructions", false, false)
+char &llvm::InstructionNamerID = InstNamer::ID;
+//===----------------------------------------------------------------------===//
+//
+// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
+//
+FunctionPass *llvm::createInstructionNamerPass() {
+  return new InstNamer();
+}
diff --git a/final/lib/Transforms/Utils/LCSSA.cpp b/final/lib/Transforms/Utils/LCSSA.cpp
new file mode 100644
index 00000000000..b2e5fa6d7e3
--- /dev/null
+++ b/final/lib/Transforms/Utils/LCSSA.cpp
@@ -0,0 +1,268 @@
+//===-- LCSSA.cpp - Convert loops into loop-closed SSA form ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass transforms loops by placing phi nodes at the end of the loops for
+// all values that are live across the loop boundary.  For example, it turns
+// the left into the right code:
+// 
+// for (...)                for (...)
+//   if (c)                   if (c)
+//     X1 = ...                 X1 = ...
+//   else                     else
+//     X2 = ...                 X2 = ...
+//   X3 = phi(X1, X2)         X3 = phi(X1, X2)
+// ... = X3 + 4             X4 = phi(X3)
+//                          ... = X4 + 4
+//
+// This is still valid LLVM; the extra phi nodes are purely redundant, and will
+// be trivially eliminated by InstCombine.  The major benefit of this 
+// transformation is that it makes many other loop optimizations, such as 
+// LoopUnswitching, simpler.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lcssa"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/PredIteratorCache.h"
+using namespace llvm;
+
+STATISTIC(NumLCSSA, "Number of live out of a loop variables");
+
+namespace {
+  struct LCSSA : public LoopPass {
+    static char ID; // Pass identification, replacement for typeid
+    LCSSA() : LoopPass(ID) {
+      initializeLCSSAPass(*PassRegistry::getPassRegistry());
+    }
+
+    // Cached analysis information for the current function.
+    DominatorTree *DT;
+    std::vector<BasicBlock*> LoopBlocks;
+    PredIteratorCache PredCache;
+    Loop *L;
+    
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    /// This transformation requires natural loop information & requires that
+    /// loop preheaders be inserted into the CFG.  It maintains both of these,
+    /// as well as the CFG.  It also requires dominator information.
+    ///
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<LoopInfo>();
+      AU.addPreservedID(LoopSimplifyID);
+      AU.addPreserved<ScalarEvolution>();
+    }
+  private:
+    bool ProcessInstruction(Instruction *Inst,
+                            const SmallVectorImpl<BasicBlock*> &ExitBlocks);
+    
+    /// verifyAnalysis() - Verify loop nest.
+    virtual void verifyAnalysis() const {
+      // Check the special guarantees that LCSSA makes.
+      assert(L->isLCSSAForm(*DT) && "LCSSA form not preserved!");
+    }
+
+    /// inLoop - returns true if the given block is within the current loop
+    bool inLoop(BasicBlock *B) const {
+      return std::binary_search(LoopBlocks.begin(), LoopBlocks.end(), B);
+    }
+  };
+}
+  
+char LCSSA::ID = 0;
+INITIALIZE_PASS_BEGIN(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LCSSA, "lcssa", "Loop-Closed SSA Form Pass", false, false)
+
+Pass *llvm::createLCSSAPass() { return new LCSSA(); }
+char &llvm::LCSSAID = LCSSA::ID;
+
+
+/// BlockDominatesAnExit - Return true if the specified block dominates at least
+/// one of the blocks in the specified list.
+static bool BlockDominatesAnExit(BasicBlock *BB,
+                                 const SmallVectorImpl<BasicBlock*> &ExitBlocks,
+                                 DominatorTree *DT) {
+  DomTreeNode *DomNode = DT->getNode(BB);
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (DT->dominates(DomNode, DT->getNode(ExitBlocks[i])))
+      return true;
+
+  return false;
+}
+
+
+/// runOnFunction - Process all loops in the function, inner-most out.
+bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
+  L = TheLoop;
+  
+  DT = &getAnalysis<DominatorTree>();
+
+  // Get the set of exiting blocks.
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+  
+  if (ExitBlocks.empty())
+    return false;
+  
+  // Speed up queries by creating a sorted vector of blocks.
+  LoopBlocks.clear();
+  LoopBlocks.insert(LoopBlocks.end(), L->block_begin(), L->block_end());
+  array_pod_sort(LoopBlocks.begin(), LoopBlocks.end());
+  
+  // Look at all the instructions in the loop, checking to see if they have uses
+  // outside the loop.  If so, rewrite those uses.
+  bool MadeChange = false;
+  
+  for (Loop::block_iterator BBI = L->block_begin(), E = L->block_end();
+       BBI != E; ++BBI) {
+    BasicBlock *BB = *BBI;
+    
+    // For large loops, avoid use-scanning by using dominance information:  In
+    // particular, if a block does not dominate any of the loop exits, then none
+    // of the values defined in the block could be used outside the loop.
+    if (!BlockDominatesAnExit(BB, ExitBlocks, DT))
+      continue;
+    
+    for (BasicBlock::iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      // Reject two common cases fast: instructions with no uses (like stores)
+      // and instructions with one use that is in the same block as this.
+      if (I->use_empty() ||
+          (I->hasOneUse() && I->use_back()->getParent() == BB &&
+           !isa<PHINode>(I->use_back())))
+        continue;
+      
+      MadeChange |= ProcessInstruction(I, ExitBlocks);
+    }
+  }
+  
+  assert(L->isLCSSAForm(*DT));
+  PredCache.clear();
+
+  return MadeChange;
+}
+
+/// isExitBlock - Return true if the specified block is in the list.
+static bool isExitBlock(BasicBlock *BB,
+                        const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+  for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
+    if (ExitBlocks[i] == BB)
+      return true;
+  return false;
+}
+
+/// ProcessInstruction - Given an instruction in the loop, check to see if it
+/// has any uses that are outside the current loop.  If so, insert LCSSA PHI
+/// nodes and rewrite the uses.
+bool LCSSA::ProcessInstruction(Instruction *Inst,
+                               const SmallVectorImpl<BasicBlock*> &ExitBlocks) {
+  SmallVector<Use*, 16> UsesToRewrite;
+  
+  BasicBlock *InstBB = Inst->getParent();
+  
+  for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+       UI != E; ++UI) {
+    User *U = *UI;
+    BasicBlock *UserBB = cast<Instruction>(U)->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(U))
+      UserBB = PN->getIncomingBlock(UI);
+    
+    if (InstBB != UserBB && !inLoop(UserBB))
+      UsesToRewrite.push_back(&UI.getUse());
+  }
+
+  // If there are no uses outside the loop, exit with no change.
+  if (UsesToRewrite.empty()) return false;
+  
+  ++NumLCSSA; // We are applying the transformation
+
+  // Invoke instructions are special in that their result value is not available
+  // along their unwind edge. The code below tests to see whether DomBB dominates
+  // the value, so adjust DomBB to the normal destination block, which is
+  // effectively where the value is first usable.
+  BasicBlock *DomBB = Inst->getParent();
+  if (InvokeInst *Inv = dyn_cast<InvokeInst>(Inst))
+    DomBB = Inv->getNormalDest();
+
+  DomTreeNode *DomNode = DT->getNode(DomBB);
+
+  SSAUpdater SSAUpdate;
+  SSAUpdate.Initialize(Inst->getType(), Inst->getName());
+  
+  // Insert the LCSSA phi's into all of the exit blocks dominated by the
+  // value, and add them to the Phi's map.
+  for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
+      BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
+    BasicBlock *ExitBB = *BBI;
+    if (!DT->dominates(DomNode, DT->getNode(ExitBB))) continue;
+    
+    // If we already inserted something for this BB, don't reprocess it.
+    if (SSAUpdate.HasValueForBlock(ExitBB)) continue;
+    
+    PHINode *PN = PHINode::Create(Inst->getType(), Inst->getName()+".lcssa",
+                                  ExitBB->begin());
+    PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB));
+
+    // Add inputs from inside the loop for this PHI.
+    for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
+      PN->addIncoming(Inst, *PI);
+
+      // If the exit block has a predecessor not within the loop, arrange for
+      // the incoming value use corresponding to that predecessor to be
+      // rewritten in terms of a different LCSSA PHI.
+      if (!inLoop(*PI))
+        UsesToRewrite.push_back(
+          &PN->getOperandUse(
+            PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
+    }
+    
+    // Remember that this phi makes the value alive in this block.
+    SSAUpdate.AddAvailableValue(ExitBB, PN);
+  }
+  
+  // Rewrite all uses outside the loop in terms of the new PHIs we just
+  // inserted.
+  for (unsigned i = 0, e = UsesToRewrite.size(); i != e; ++i) {
+    // If this use is in an exit block, rewrite to use the newly inserted PHI.
+    // This is required for correctness because SSAUpdate doesn't handle uses in
+    // the same block.  It assumes the PHI we inserted is at the end of the
+    // block.
+    Instruction *User = cast<Instruction>(UsesToRewrite[i]->getUser());
+    BasicBlock *UserBB = User->getParent();
+    if (PHINode *PN = dyn_cast<PHINode>(User))
+      UserBB = PN->getIncomingBlock(*UsesToRewrite[i]);
+
+    if (isa<PHINode>(UserBB->begin()) &&
+        isExitBlock(UserBB, ExitBlocks)) {
+      UsesToRewrite[i]->set(UserBB->begin());
+      continue;
+    }
+    
+    // Otherwise, do full PHI insertion.
+    SSAUpdate.RewriteUse(*UsesToRewrite[i]);
+  }
+  
+  return true;
+}
+
diff --git a/final/lib/Transforms/Utils/Local.cpp b/final/lib/Transforms/Utils/Local.cpp
new file mode 100644
index 00000000000..32a50b80cd5
--- /dev/null
+++ b/final/lib/Transforms/Utils/Local.cpp
@@ -0,0 +1,757 @@
+//===-- Local.cpp - Functions to perform local transformations ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This family of functions perform various local transformations to the
+// program.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  Local constant propagation.
+//
+
+// ConstantFoldTerminator - If a terminator instruction is predicated on a
+// constant value, convert it into an unconditional branch to the constant
+// destination.
+//
+bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
+  TerminatorInst *T = BB->getTerminator();
+
+  // Branch - See if we are conditional jumping on constant
+  if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
+    if (BI->isUnconditional()) return false;  // Can't optimize uncond branch
+    BasicBlock *Dest1 = BI->getSuccessor(0);
+    BasicBlock *Dest2 = BI->getSuccessor(1);
+
+    if (ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
+      // Are we branching on constant?
+      // YES.  Change to unconditional branch...
+      BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;
+      BasicBlock *OldDest     = Cond->getZExtValue() ? Dest2 : Dest1;
+
+      //cerr << "Function: " << T->getParent()->getParent()
+      //     << "\nRemoving branch from " << T->getParent()
+      //     << "\n\nTo: " << OldDest << endl;
+
+      // Let the basic block know that we are letting go of it.  Based on this,
+      // it will adjust it's PHI nodes.
+      assert(BI->getParent() && "Terminator not inserted in block!");
+      OldDest->removePredecessor(BI->getParent());
+
+      // Replace the conditional branch with an unconditional one.
+      BranchInst::Create(Destination, BI);
+      BI->eraseFromParent();
+      return true;
+    }
+    
+    if (Dest2 == Dest1) {       // Conditional branch to same location?
+      // This branch matches something like this:
+      //     br bool %cond, label %Dest, label %Dest
+      // and changes it into:  br label %Dest
+
+      // Let the basic block know that we are letting go of one copy of it.
+      assert(BI->getParent() && "Terminator not inserted in block!");
+      Dest1->removePredecessor(BI->getParent());
+
+      // Replace the conditional branch with an unconditional one.
+      BranchInst::Create(Dest1, BI);
+      BI->eraseFromParent();
+      return true;
+    }
+    return false;
+  }
+  
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
+    // If we are switching on a constant, we can convert the switch into a
+    // single branch instruction!
+    ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition());
+    BasicBlock *TheOnlyDest = SI->getSuccessor(0);  // The default dest
+    BasicBlock *DefaultDest = TheOnlyDest;
+    assert(TheOnlyDest == SI->getDefaultDest() &&
+           "Default destination is not successor #0?");
+
+    // Figure out which case it goes to.
+    for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+      // Found case matching a constant operand?
+      if (SI->getSuccessorValue(i) == CI) {
+        TheOnlyDest = SI->getSuccessor(i);
+        break;
+      }
+
+      // Check to see if this branch is going to the same place as the default
+      // dest.  If so, eliminate it as an explicit compare.
+      if (SI->getSuccessor(i) == DefaultDest) {
+        // Remove this entry.
+        DefaultDest->removePredecessor(SI->getParent());
+        SI->removeCase(i);
+        --i; --e;  // Don't skip an entry...
+        continue;
+      }
+
+      // Otherwise, check to see if the switch only branches to one destination.
+      // We do this by reseting "TheOnlyDest" to null when we find two non-equal
+      // destinations.
+      if (SI->getSuccessor(i) != TheOnlyDest) TheOnlyDest = 0;
+    }
+
+    if (CI && !TheOnlyDest) {
+      // Branching on a constant, but not any of the cases, go to the default
+      // successor.
+      TheOnlyDest = SI->getDefaultDest();
+    }
+
+    // If we found a single destination that we can fold the switch into, do so
+    // now.
+    if (TheOnlyDest) {
+      // Insert the new branch.
+      BranchInst::Create(TheOnlyDest, SI);
+      BasicBlock *BB = SI->getParent();
+
+      // Remove entries from PHI nodes which we no longer branch to...
+      for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+        // Found case matching a constant operand?
+        BasicBlock *Succ = SI->getSuccessor(i);
+        if (Succ == TheOnlyDest)
+          TheOnlyDest = 0;  // Don't modify the first branch to TheOnlyDest
+        else
+          Succ->removePredecessor(BB);
+      }
+
+      // Delete the old switch.
+      BB->getInstList().erase(SI);
+      return true;
+    }
+    
+    if (SI->getNumSuccessors() == 2) {
+      // Otherwise, we can fold this switch into a conditional branch
+      // instruction if it has only one non-default destination.
+      Value *Cond = new ICmpInst(SI, ICmpInst::ICMP_EQ, SI->getCondition(),
+                                 SI->getSuccessorValue(1), "cond");
+      // Insert the new branch.
+      BranchInst::Create(SI->getSuccessor(1), SI->getSuccessor(0), Cond, SI);
+
+      // Delete the old switch.
+      SI->eraseFromParent();
+      return true;
+    }
+    return false;
+  }
+
+  if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(T)) {
+    // indirectbr blockaddress(@F, @BB) -> br label @BB
+    if (BlockAddress *BA =
+          dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {
+      BasicBlock *TheOnlyDest = BA->getBasicBlock();
+      // Insert the new branch.
+      BranchInst::Create(TheOnlyDest, IBI);
+      
+      for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+        if (IBI->getDestination(i) == TheOnlyDest)
+          TheOnlyDest = 0;
+        else
+          IBI->getDestination(i)->removePredecessor(IBI->getParent());
+      }
+      IBI->eraseFromParent();
+      
+      // If we didn't find our destination in the IBI successor list, then we
+      // have undefined behavior.  Replace the unconditional branch with an
+      // 'unreachable' instruction.
+      if (TheOnlyDest) {
+        BB->getTerminator()->eraseFromParent();
+        new UnreachableInst(BB->getContext(), BB);
+      }
+      
+      return true;
+    }
+  }
+  
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Local dead code elimination.
+//
+
+/// isInstructionTriviallyDead - Return true if the result produced by the
+/// instruction is not used, and the instruction has no side effects.
+///
+bool llvm::isInstructionTriviallyDead(Instruction *I) {
+  if (!I->use_empty() || isa<TerminatorInst>(I)) return false;
+
+  // We don't want debug info removed by anything this general.
+  if (isa<DbgInfoIntrinsic>(I)) return false;
+
+  if (!I->mayHaveSideEffects()) return true;
+
+  // Special case intrinsics that "may have side effects" but can be deleted
+  // when dead.
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+    // Safe to delete llvm.stacksave if dead.
+    if (II->getIntrinsicID() == Intrinsic::stacksave)
+      return true;
+  return false;
+}
+
+/// RecursivelyDeleteTriviallyDeadInstructions - If the specified value is a
+/// trivially dead instruction, delete it.  If that makes any of its operands
+/// trivially dead, delete them too, recursively.  Return true if any
+/// instructions were deleted.
+bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I || !I->use_empty() || !isInstructionTriviallyDead(I))
+    return false;
+  
+  SmallVector<Instruction*, 16> DeadInsts;
+  DeadInsts.push_back(I);
+  
+  do {
+    I = DeadInsts.pop_back_val();
+
+    // Null out all of the instruction's operands to see if any operand becomes
+    // dead as we go.
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+      Value *OpV = I->getOperand(i);
+      I->setOperand(i, 0);
+      
+      if (!OpV->use_empty()) continue;
+    
+      // If the operand is an instruction that became dead as we nulled out the
+      // operand, and if it is 'trivially' dead, delete it in a future loop
+      // iteration.
+      if (Instruction *OpI = dyn_cast<Instruction>(OpV))
+        if (isInstructionTriviallyDead(OpI))
+          DeadInsts.push_back(OpI);
+    }
+    
+    I->eraseFromParent();
+  } while (!DeadInsts.empty());
+
+  return true;
+}
+
+/// areAllUsesEqual - Check whether the uses of a value are all the same.
+/// This is similar to Instruction::hasOneUse() except this will also return
+/// true when there are no uses or multiple uses that all refer to the same
+/// value.
+static bool areAllUsesEqual(Instruction *I) {
+  Value::use_iterator UI = I->use_begin();
+  Value::use_iterator UE = I->use_end();
+  if (UI == UE)
+    return true;
+
+  User *TheUse = *UI;
+  for (++UI; UI != UE; ++UI) {
+    if (*UI != TheUse)
+      return false;
+  }
+  return true;
+}
+
+/// RecursivelyDeleteDeadPHINode - If the specified value is an effectively
+/// dead PHI node, due to being a def-use chain of single-use nodes that
+/// either forms a cycle or is terminated by a trivially dead instruction,
+/// delete it.  If that makes any of its operands trivially dead, delete them
+/// too, recursively.  Return true if a change was made.
+bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
+  SmallPtrSet<Instruction*, 4> Visited;
+  for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects();
+       I = cast<Instruction>(*I->use_begin())) {
+    if (I->use_empty())
+      return RecursivelyDeleteTriviallyDeadInstructions(I);
+
+    // If we find an instruction more than once, we're on a cycle that
+    // won't prove fruitful.
+    if (!Visited.insert(I)) {
+      // Break the cycle and delete the instruction and its operands.
+      I->replaceAllUsesWith(UndefValue::get(I->getType()));
+      (void)RecursivelyDeleteTriviallyDeadInstructions(I);
+      return true;
+    }
+  }
+  return false;
+}
+
+/// SimplifyInstructionsInBlock - Scan the specified basic block and try to
+/// simplify any instructions in it and recursively delete dead instructions.
+///
+/// This returns true if it changed the code, note that it can delete
+/// instructions in other blocks as well in this block.
+bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
+  bool MadeChange = false;
+  for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
+    Instruction *Inst = BI++;
+    
+    if (Value *V = SimplifyInstruction(Inst, TD)) {
+      WeakVH BIHandle(BI);
+      ReplaceAndSimplifyAllUses(Inst, V, TD);
+      MadeChange = true;
+      if (BIHandle != BI)
+        BI = BB->begin();
+      continue;
+    }
+    
+    MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst);
+  }
+  return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+//  Control Flow Graph Restructuring.
+//
+
+
+/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
+/// method is called when we're about to delete Pred as a predecessor of BB.  If
+/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
+///
+/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
+/// nodes that collapse into identity values.  For example, if we have:
+///   x = phi(1, 0, 0, 0)
+///   y = and x, z
+///
+/// .. and delete the predecessor corresponding to the '1', this will attempt to
+/// recursively fold the and to 0.
+void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
+                                        TargetData *TD) {
+  // This only adjusts blocks with PHI nodes.
+  if (!isa<PHINode>(BB->begin()))
+    return;
+  
+  // Remove the entries for Pred from the PHI nodes in BB, but do not simplify
+  // them down.  This will leave us with single entry phi nodes and other phis
+  // that can be removed.
+  BB->removePredecessor(Pred, true);
+  
+  WeakVH PhiIt = &BB->front();
+  while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
+    PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
+
+    Value *PNV = SimplifyInstruction(PN, TD);
+    if (PNV == 0) continue;
+
+    // If we're able to simplify the phi to a single value, substitute the new
+    // value into all of its uses.
+    assert(PNV != PN && "SimplifyInstruction broken!");
+    
+    Value *OldPhiIt = PhiIt;
+    ReplaceAndSimplifyAllUses(PN, PNV, TD);
+    
+    // If recursive simplification ended up deleting the next PHI node we would
+    // iterate to, then our iterator is invalid, restart scanning from the top
+    // of the block.
+    if (PhiIt != OldPhiIt) PhiIt = &BB->front();
+  }
+}
+
+
+/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
+/// predecessor is known to have one successor (DestBB!).  Eliminate the edge
+/// between them, moving the instructions in the predecessor into DestBB and
+/// deleting the predecessor block.
+///
+void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
+  // If BB has single-entry PHI nodes, fold them.
+  while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {
+    Value *NewVal = PN->getIncomingValue(0);
+    // Replace self referencing PHI with undef, it must be dead.
+    if (NewVal == PN) NewVal = UndefValue::get(PN->getType());
+    PN->replaceAllUsesWith(NewVal);
+    PN->eraseFromParent();
+  }
+  
+  BasicBlock *PredBB = DestBB->getSinglePredecessor();
+  assert(PredBB && "Block doesn't have a single predecessor!");
+  
+  // Splice all the instructions from PredBB to DestBB.
+  PredBB->getTerminator()->eraseFromParent();
+  DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+
+  // Zap anything that took the address of DestBB.  Not doing this will give the
+  // address an invalid value.
+  if (DestBB->hasAddressTaken()) {
+    BlockAddress *BA = BlockAddress::get(DestBB);
+    Constant *Replacement =
+      ConstantInt::get(llvm::Type::getInt32Ty(BA->getContext()), 1);
+    BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+                                                     BA->getType()));
+    BA->destroyConstant();
+  }
+  
+  // Anything that branched to PredBB now branches to DestBB.
+  PredBB->replaceAllUsesWith(DestBB);
+  
+  if (P) {
+    DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
+    if (DT) {
+      BasicBlock *PredBBIDom = DT->getNode(PredBB)->getIDom()->getBlock();
+      DT->changeImmediateDominator(DestBB, PredBBIDom);
+      DT->eraseNode(PredBB);
+    }
+    ProfileInfo *PI = P->getAnalysisIfAvailable<ProfileInfo>();
+    if (PI) {
+      PI->replaceAllUses(PredBB, DestBB);
+      PI->removeEdge(ProfileInfo::getEdge(PredBB, DestBB));
+    }
+  }
+  // Nuke BB.
+  PredBB->eraseFromParent();
+}
+
+/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
+/// almost-empty BB ending in an unconditional branch to Succ, into succ.
+///
+/// Assumption: Succ is the single successor for BB.
+///
+static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
+  assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
+
+  DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " 
+        << Succ->getName() << "\n");
+  // Shortcut, if there is only a single predecessor it must be BB and merging
+  // is always safe
+  if (Succ->getSinglePredecessor()) return true;
+
+  // Make a list of the predecessors of BB
+  typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
+  BlockSet BBPreds(pred_begin(BB), pred_end(BB));
+
+  // Use that list to make another list of common predecessors of BB and Succ
+  BlockSet CommonPreds;
+  for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
+       PI != PE; ++PI) {
+    BasicBlock *P = *PI;
+    if (BBPreds.count(P))
+      CommonPreds.insert(P);
+  }
+
+  // Shortcut, if there are no common predecessors, merging is always safe
+  if (CommonPreds.empty())
+    return true;
+  
+  // Look at all the phi nodes in Succ, to see if they present a conflict when
+  // merging these blocks
+  for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+
+    // If the incoming value from BB is again a PHINode in
+    // BB which has the same incoming value for *PI as PN does, we can
+    // merge the phi nodes and then the blocks can still be merged
+    PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
+    if (BBPN && BBPN->getParent() == BB) {
+      for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+            PI != PE; PI++) {
+        if (BBPN->getIncomingValueForBlock(*PI) 
+              != PN->getIncomingValueForBlock(*PI)) {
+          DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " 
+                << Succ->getName() << " is conflicting with " 
+                << BBPN->getName() << " with regard to common predecessor "
+                << (*PI)->getName() << "\n");
+          return false;
+        }
+      }
+    } else {
+      Value* Val = PN->getIncomingValueForBlock(BB);
+      for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+            PI != PE; PI++) {
+        // See if the incoming value for the common predecessor is equal to the
+        // one for BB, in which case this phi node will not prevent the merging
+        // of the block.
+        if (Val != PN->getIncomingValueForBlock(*PI)) {
+          DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " 
+                << Succ->getName() << " is conflicting with regard to common "
+                << "predecessor " << (*PI)->getName() << "\n");
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
+/// unconditional branch, and contains no instructions other than PHI nodes,
+/// potential debug intrinsics and the branch.  If possible, eliminate BB by
+/// rewriting all the predecessors to branch to the successor block and return
+/// true.  If we can't transform, return false.
+bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
+  assert(BB != &BB->getParent()->getEntryBlock() &&
+         "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
+
+  // We can't eliminate infinite loops.
+  BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
+  if (BB == Succ) return false;
+  
+  // Check to see if merging these blocks would cause conflicts for any of the
+  // phi nodes in BB or Succ. If not, we can safely merge.
+  if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
+
+  // Check for cases where Succ has multiple predecessors and a PHI node in BB
+  // has uses which will not disappear when the PHI nodes are merged.  It is
+  // possible to handle such cases, but difficult: it requires checking whether
+  // BB dominates Succ, which is non-trivial to calculate in the case where
+  // Succ has multiple predecessors.  Also, it requires checking whether
+  // constructing the necessary self-referential PHI node doesn't intoduce any
+  // conflicts; this isn't too difficult, but the previous code for doing this
+  // was incorrect.
+  //
+  // Note that if this check finds a live use, BB dominates Succ, so BB is
+  // something like a loop pre-header (or rarely, a part of an irreducible CFG);
+  // folding the branch isn't profitable in that case anyway.
+  if (!Succ->getSinglePredecessor()) {
+    BasicBlock::iterator BBI = BB->begin();
+    while (isa<PHINode>(*BBI)) {
+      for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+           UI != E; ++UI) {
+        if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
+          if (PN->getIncomingBlock(UI) != BB)
+            return false;
+        } else {
+          return false;
+        }
+      }
+      ++BBI;
+    }
+  }
+
+  DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB);
+  
+  if (isa<PHINode>(Succ->begin())) {
+    // If there is more than one pred of succ, and there are PHI nodes in
+    // the successor, then we need to add incoming edges for the PHI nodes
+    //
+    const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+    
+    // Loop over all of the PHI nodes in the successor of BB.
+    for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+      PHINode *PN = cast<PHINode>(I);
+      Value *OldVal = PN->removeIncomingValue(BB, false);
+      assert(OldVal && "No entry in PHI for Pred BB!");
+      
+      // If this incoming value is one of the PHI nodes in BB, the new entries
+      // in the PHI node are the entries from the old PHI.
+      if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+        PHINode *OldValPN = cast<PHINode>(OldVal);
+        for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
+          // Note that, since we are merging phi nodes and BB and Succ might
+          // have common predecessors, we could end up with a phi node with
+          // identical incoming branches. This will be cleaned up later (and
+          // will trigger asserts if we try to clean it up now, without also
+          // simplifying the corresponding conditional branch).
+          PN->addIncoming(OldValPN->getIncomingValue(i),
+                          OldValPN->getIncomingBlock(i));
+      } else {
+        // Add an incoming value for each of the new incoming values.
+        for (unsigned i = 0, e = BBPreds.size(); i != e; ++i)
+          PN->addIncoming(OldVal, BBPreds[i]);
+      }
+    }
+  }
+  
+  while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+    if (Succ->getSinglePredecessor()) {
+      // BB is the only predecessor of Succ, so Succ will end up with exactly
+      // the same predecessors BB had.
+      Succ->getInstList().splice(Succ->begin(),
+                                 BB->getInstList(), BB->begin());
+    } else {
+      // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+      assert(PN->use_empty() && "There shouldn't be any uses here!");
+      PN->eraseFromParent();
+    }
+  }
+    
+  // Everything that jumped to BB now goes to Succ.
+  BB->replaceAllUsesWith(Succ);
+  if (!Succ->hasName()) Succ->takeName(BB);
+  BB->eraseFromParent();              // Delete the old basic block.
+  return true;
+}
+
+/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
+/// nodes in this block. This doesn't try to be clever about PHI nodes
+/// which differ only in the order of the incoming values, but instcombine
+/// orders them so it usually won't matter.
+///
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+  bool Changed = false;
+
+  // This implementation doesn't currently consider undef operands
+  // specially. Theroetically, two phis which are identical except for
+  // one having an undef where the other doesn't could be collapsed.
+
+  // Map from PHI hash values to PHI nodes. If multiple PHIs have
+  // the same hash value, the element is the first PHI in the
+  // linked list in CollisionMap.
+  DenseMap<uintptr_t, PHINode *> HashMap;
+
+  // Maintain linked lists of PHI nodes with common hash values.
+  DenseMap<PHINode *, PHINode *> CollisionMap;
+
+  // Examine each PHI.
+  for (BasicBlock::iterator I = BB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I++); ) {
+    // Compute a hash value on the operands. Instcombine will likely have sorted
+    // them, which helps expose duplicates, but we have to check all the
+    // operands to be safe in case instcombine hasn't run.
+    uintptr_t Hash = 0;
+    for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
+      // This hash algorithm is quite weak as hash functions go, but it seems
+      // to do a good enough job for this particular purpose, and is very quick.
+      Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
+      Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+    }
+    // Avoid colliding with the DenseMap sentinels ~0 and ~0-1.
+    Hash >>= 1;
+    // If we've never seen this hash value before, it's a unique PHI.
+    std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
+      HashMap.insert(std::make_pair(Hash, PN));
+    if (Pair.second) continue;
+    // Otherwise it's either a duplicate or a hash collision.
+    for (PHINode *OtherPN = Pair.first->second; ; ) {
+      if (OtherPN->isIdenticalTo(PN)) {
+        // A duplicate. Replace this PHI with its duplicate.
+        PN->replaceAllUsesWith(OtherPN);
+        PN->eraseFromParent();
+        Changed = true;
+        break;
+      }
+      // A non-duplicate hash collision.
+      DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN);
+      if (I == CollisionMap.end()) {
+        // Set this PHI to be the head of the linked list of colliding PHIs.
+        PHINode *Old = Pair.first->second;
+        Pair.first->second = PN;
+        CollisionMap[PN] = Old;
+        break;
+      }
+      // Procede to the next PHI in the list.
+      OtherPN = I->second;
+    }
+  }
+
+  return Changed;
+}
+
+/// enforceKnownAlignment - If the specified pointer points to an object that
+/// we control, modify the object's alignment to PrefAlign. This isn't
+/// often possible though. If alignment is important, a more reliable approach
+/// is to simply align all global variables and allocation instructions to
+/// their preferred alignment from the beginning.
+///
+static unsigned enforceKnownAlignment(Value *V, unsigned Align,
+                                      unsigned PrefAlign) {
+
+  User *U = dyn_cast<User>(V);
+  if (!U) return Align;
+
+  switch (Operator::getOpcode(U)) {
+  default: break;
+  case Instruction::BitCast:
+    return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+  case Instruction::GetElementPtr: {
+    // If all indexes are zero, it is just the alignment of the base pointer.
+    bool AllZeroOperands = true;
+    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
+      if (!isa<Constant>(*i) ||
+          !cast<Constant>(*i)->isNullValue()) {
+        AllZeroOperands = false;
+        break;
+      }
+
+    if (AllZeroOperands) {
+      // Treat this like a bitcast.
+      return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
+    }
+    return Align;
+  }
+  case Instruction::Alloca: {
+    AllocaInst *AI = cast<AllocaInst>(V);
+    // If there is a requested alignment and if this is an alloca, round up.
+    if (AI->getAlignment() >= PrefAlign)
+      return AI->getAlignment();
+    AI->setAlignment(PrefAlign);
+    return PrefAlign;
+  }
+  }
+
+  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    // If there is a large requested alignment and we can, bump up the alignment
+    // of the global.
+    if (GV->isDeclaration()) return Align;
+    
+    if (GV->getAlignment() >= PrefAlign)
+      return GV->getAlignment();
+    // We can only increase the alignment of the global if it has no alignment
+    // specified or if it is not assigned a section.  If it is assigned a
+    // section, the global could be densely packed with other objects in the
+    // section, increasing the alignment could cause padding issues.
+    if (!GV->hasSection() || GV->getAlignment() == 0)
+      GV->setAlignment(PrefAlign);
+    return GV->getAlignment();
+  }
+
+  return Align;
+}
+
+/// getOrEnforceKnownAlignment - If the specified pointer has an alignment that
+/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
+/// and it is more than the alignment of the ultimate object, see if we can
+/// increase the alignment of the ultimate object, making this check succeed.
+unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
+                                          const TargetData *TD) {
+  assert(V->getType()->isPointerTy() &&
+         "getOrEnforceKnownAlignment expects a pointer!");
+  unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64;
+  APInt Mask = APInt::getAllOnesValue(BitWidth);
+  APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+  ComputeMaskedBits(V, Mask, KnownZero, KnownOne, TD);
+  unsigned TrailZ = KnownZero.countTrailingOnes();
+  
+  // Avoid trouble with rediculously large TrailZ values, such as
+  // those computed from a null pointer.
+  TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+  
+  unsigned Align = 1u << std::min(BitWidth - 1, TrailZ);
+  
+  // LLVM doesn't support alignments larger than this currently.
+  Align = std::min(Align, +Value::MaximumAlignment);
+  
+  if (PrefAlign > Align)
+    Align = enforceKnownAlignment(V, Align, PrefAlign);
+    
+  // We don't need to make any adjustment.
+  return Align;
+}
+
diff --git a/final/lib/Transforms/Utils/LoopSimplify.cpp b/final/lib/Transforms/Utils/LoopSimplify.cpp
new file mode 100644
index 00000000000..246263026bb
--- /dev/null
+++ b/final/lib/Transforms/Utils/LoopSimplify.cpp
@@ -0,0 +1,753 @@
+//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs several transformations to transform natural loops into a
+// simpler form, which makes subsequent analyses and transformations simpler and
+// more effective.
+//
+// Loop pre-header insertion guarantees that there is a single, non-critical
+// entry edge from outside of the loop to the loop header.  This simplifies a
+// number of analyses and transformations, such as LICM.
+//
+// Loop exit-block insertion guarantees that all exit blocks from the loop
+// (blocks which are outside of the loop that have predecessors inside of the
+// loop) only have predecessors from inside of the loop (and are thus dominated
+// by the loop header).  This simplifies transformations such as store-sinking
+// that are built into LICM.
+//
+// This pass also guarantees that loops will have exactly one backedge.
+//
+// Indirectbr instructions introduce several complications. If the loop
+// contains or is entered by an indirectbr instruction, it may not be possible
+// to transform the loop and make these guarantees. Client code should check
+// that these conditions are true before relying on them.
+//
+// Note that the simplifycfg pass will clean up blocks which are split out but
+// end up being unnecessary, so usage of this pass should not pessimize
+// generated code.
+//
+// This pass obviously modifies the CFG, but updates loop information and
+// dominator information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-simplify"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+using namespace llvm;
+
+STATISTIC(NumInserted, "Number of pre-header or exit blocks inserted");
+STATISTIC(NumNested  , "Number of nested loops split out");
+
+namespace {
+  struct LoopSimplify : public LoopPass {
+    static char ID; // Pass identification, replacement for typeid
+    LoopSimplify() : LoopPass(ID) {
+      initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+    }
+
+    // AA - If we have an alias analysis object to update, this is it, otherwise
+    // this is null.
+    AliasAnalysis *AA;
+    LoopInfo *LI;
+    DominatorTree *DT;
+    ScalarEvolution *SE;
+    Loop *L;
+    virtual bool runOnLoop(Loop *L, LPPassManager &LPM);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      // We need loop information to identify the loops...
+      AU.addRequired<DominatorTree>();
+      AU.addPreserved<DominatorTree>();
+
+      AU.addRequired<LoopInfo>();
+      AU.addPreserved<LoopInfo>();
+
+      AU.addPreserved<AliasAnalysis>();
+      AU.addPreserved<ScalarEvolution>();
+      AU.addPreservedID(BreakCriticalEdgesID);  // No critical edges added.
+    }
+
+    /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+    void verifyAnalysis() const;
+
+  private:
+    bool ProcessLoop(Loop *L, LPPassManager &LPM);
+    BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
+    BasicBlock *InsertPreheaderForLoop(Loop *L);
+    Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
+    BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
+    void PlaceSplitBlockCarefully(BasicBlock *NewBB,
+                                  SmallVectorImpl<BasicBlock*> &SplitPreds,
+                                  Loop *L);
+  };
+}
+
+char LoopSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
+                "Canonicalize natural loops", true, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
+                "Canonicalize natural loops", true, false)
+
+// Publically exposed interface to pass...
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
+Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+
+/// runOnLoop - Run down all loops in the CFG (recursively, but we could do
+/// it in any convenient order) inserting preheaders...
+///
+bool LoopSimplify::runOnLoop(Loop *l, LPPassManager &LPM) {
+  L = l;
+  bool Changed = false;
+  LI = &getAnalysis<LoopInfo>();
+  AA = getAnalysisIfAvailable<AliasAnalysis>();
+  DT = &getAnalysis<DominatorTree>();
+  SE = getAnalysisIfAvailable<ScalarEvolution>();
+
+  Changed |= ProcessLoop(L, LPM);
+
+  return Changed;
+}
+
+/// ProcessLoop - Walk the loop structure in depth first order, ensuring that
+/// all loops have preheaders.
+///
+bool LoopSimplify::ProcessLoop(Loop *L, LPPassManager &LPM) {
+  bool Changed = false;
+ReprocessLoop:
+
+  // Check to see that no blocks (other than the header) in this loop have
+  // predecessors that are not in the loop.  This is not valid for natural
+  // loops, but can occur if the blocks are unreachable.  Since they are
+  // unreachable we can just shamelessly delete those CFG edges!
+  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+       BB != E; ++BB) {
+    if (*BB == L->getHeader()) continue;
+
+    SmallPtrSet<BasicBlock*, 4> BadPreds;
+    for (pred_iterator PI = pred_begin(*BB),
+         PE = pred_end(*BB); PI != PE; ++PI) {
+      BasicBlock *P = *PI;
+      if (!L->contains(P))
+        BadPreds.insert(P);
+    }
+
+    // Delete each unique out-of-loop (and thus dead) predecessor.
+    for (SmallPtrSet<BasicBlock*, 4>::iterator I = BadPreds.begin(),
+         E = BadPreds.end(); I != E; ++I) {
+
+      DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
+                   << (*I)->getName() << "\n");
+
+      // Inform each successor of each dead pred.
+      for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
+        (*SI)->removePredecessor(*I);
+      // Zap the dead pred's terminator and replace it with unreachable.
+      TerminatorInst *TI = (*I)->getTerminator();
+       TI->replaceAllUsesWith(UndefValue::get(TI->getType()));
+      (*I)->getTerminator()->eraseFromParent();
+      new UnreachableInst((*I)->getContext(), *I);
+      Changed = true;
+    }
+  }
+
+  // If there are exiting blocks with branches on undef, resolve the undef in
+  // the direction which will exit the loop. This will help simplify loop
+  // trip count computations.
+  SmallVector<BasicBlock*, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+  for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+       E = ExitingBlocks.end(); I != E; ++I)
+    if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
+      if (BI->isConditional()) {
+        if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
+
+          DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
+                       << (*I)->getName() << "\n");
+
+          BI->setCondition(ConstantInt::get(Cond->getType(),
+                                            !L->contains(BI->getSuccessor(0))));
+          Changed = true;
+        }
+      }
+
+  // Does the loop already have a preheader?  If so, don't insert one.
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) {
+    Preheader = InsertPreheaderForLoop(L);
+    if (Preheader) {
+      ++NumInserted;
+      Changed = true;
+    }
+  }
+
+  // Next, check to make sure that all exit nodes of the loop only have
+  // predecessors that are inside of the loop.  This check guarantees that the
+  // loop preheader/header will dominate the exit blocks.  If the exit block has
+  // predecessors from outside of the loop, split the edge now.
+  SmallVector<BasicBlock*, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+    
+  SmallSetVector<BasicBlock *, 8> ExitBlockSet(ExitBlocks.begin(),
+                                               ExitBlocks.end());
+  for (SmallSetVector<BasicBlock *, 8>::iterator I = ExitBlockSet.begin(),
+         E = ExitBlockSet.end(); I != E; ++I) {
+    BasicBlock *ExitBlock = *I;
+    for (pred_iterator PI = pred_begin(ExitBlock), PE = pred_end(ExitBlock);
+         PI != PE; ++PI)
+      // Must be exactly this loop: no subloops, parent loops, or non-loop preds
+      // allowed.
+      if (!L->contains(*PI)) {
+        if (RewriteLoopExitBlock(L, ExitBlock)) {
+          ++NumInserted;
+          Changed = true;
+        }
+        break;
+      }
+  }
+
+  // If the header has more than two predecessors at this point (from the
+  // preheader and from multiple backedges), we must adjust the loop.
+  BasicBlock *LoopLatch = L->getLoopLatch();
+  if (!LoopLatch) {
+    // If this is really a nested loop, rip it out into a child loop.  Don't do
+    // this for loops with a giant number of backedges, just factor them into a
+    // common backedge instead.
+    if (L->getNumBackEdges() < 8) {
+      if (SeparateNestedLoop(L, LPM)) {
+        ++NumNested;
+        // This is a big restructuring change, reprocess the whole loop.
+        Changed = true;
+        // GCC doesn't tail recursion eliminate this.
+        goto ReprocessLoop;
+      }
+    }
+
+    // If we either couldn't, or didn't want to, identify nesting of the loops,
+    // insert a new block that all backedges target, then make it jump to the
+    // loop header.
+    LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
+    if (LoopLatch) {
+      ++NumInserted;
+      Changed = true;
+    }
+  }
+
+  // Scan over the PHI nodes in the loop header.  Since they now have only two
+  // incoming values (the loop is canonicalized), we may have simplified the PHI
+  // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
+  PHINode *PN;
+  for (BasicBlock::iterator I = L->getHeader()->begin();
+       (PN = dyn_cast<PHINode>(I++)); )
+    if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+      if (AA) AA->deleteValue(PN);
+      if (SE) SE->forgetValue(PN);
+      PN->replaceAllUsesWith(V);
+      PN->eraseFromParent();
+    }
+
+  // If this loop has multiple exits and the exits all go to the same
+  // block, attempt to merge the exits. This helps several passes, such
+  // as LoopRotation, which do not support loops with multiple exits.
+  // SimplifyCFG also does this (and this code uses the same utility
+  // function), however this code is loop-aware, where SimplifyCFG is
+  // not. That gives it the advantage of being able to hoist
+  // loop-invariant instructions out of the way to open up more
+  // opportunities, and the disadvantage of having the responsibility
+  // to preserve dominator information.
+  bool UniqueExit = true;
+  if (!ExitBlocks.empty())
+    for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
+      if (ExitBlocks[i] != ExitBlocks[0]) {
+        UniqueExit = false;
+        break;
+      }
+  if (UniqueExit) {
+    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+      BasicBlock *ExitingBlock = ExitingBlocks[i];
+      if (!ExitingBlock->getSinglePredecessor()) continue;
+      BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+      if (!BI || !BI->isConditional()) continue;
+      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+      if (!CI || CI->getParent() != ExitingBlock) continue;
+
+      // Attempt to hoist out all instructions except for the
+      // comparison and the branch.
+      bool AllInvariant = true;
+      for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) {
+        Instruction *Inst = I++;
+        // Skip debug info intrinsics.
+        if (isa<DbgInfoIntrinsic>(Inst))
+          continue;
+        if (Inst == CI)
+          continue;
+        if (!L->makeLoopInvariant(Inst, Changed,
+                                  Preheader ? Preheader->getTerminator() : 0)) {
+          AllInvariant = false;
+          break;
+        }
+      }
+      if (!AllInvariant) continue;
+
+      // The block has now been cleared of all instructions except for
+      // a comparison and a conditional branch. SimplifyCFG may be able
+      // to fold it now.
+      if (!FoldBranchToCommonDest(BI)) continue;
+
+      // Success. The block is now dead, so remove it from the loop,
+      // update the dominator tree and delete it.
+      DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
+                   << ExitingBlock->getName() << "\n");
+
+      assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+      Changed = true;
+      LI->removeBlock(ExitingBlock);
+
+      DomTreeNode *Node = DT->getNode(ExitingBlock);
+      const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
+        Node->getChildren();
+      while (!Children.empty()) {
+        DomTreeNode *Child = Children.front();
+        DT->changeImmediateDominator(Child, Node->getIDom());
+      }
+      DT->eraseNode(ExitingBlock);
+
+      BI->getSuccessor(0)->removePredecessor(ExitingBlock);
+      BI->getSuccessor(1)->removePredecessor(ExitingBlock);
+      ExitingBlock->eraseFromParent();
+    }
+  }
+
+  return Changed;
+}
+
+/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
+/// preheader, this method is called to insert one.  This method has two phases:
+/// preheader insertion and analysis updating.
+///
+BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
+  BasicBlock *Header = L->getHeader();
+
+  // Compute the set of predecessors of the loop that are not in the loop.
+  SmallVector<BasicBlock*, 8> OutsideBlocks;
+  for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+       PI != PE; ++PI) {
+    BasicBlock *P = *PI;
+    if (!L->contains(P)) {         // Coming in from outside the loop?
+      // If the loop is branched to from an indirect branch, we won't
+      // be able to fully transform the loop, because it prohibits
+      // edge splitting.
+      if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+
+      // Keep track of it.
+      OutsideBlocks.push_back(P);
+    }
+  }
+
+  // Split out the loop pre-header.
+  BasicBlock *NewBB =
+    SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
+                           ".preheader", this);
+
+  DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName()
+               << "\n");
+
+  // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+  // code layout too horribly.
+  PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
+
+  return NewBB;
+}
+
+/// RewriteLoopExitBlock - Ensure that the loop preheader dominates all exit
+/// blocks.  This method is used to split exit blocks that have predecessors
+/// outside of the loop.
+BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
+  SmallVector<BasicBlock*, 8> LoopBlocks;
+  for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
+    BasicBlock *P = *I;
+    if (L->contains(P)) {
+      // Don't do this if the loop is exited via an indirect branch.
+      if (isa<IndirectBrInst>(P->getTerminator())) return 0;
+
+      LoopBlocks.push_back(P);
+    }
+  }
+
+  assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
+  BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], 
+                                             LoopBlocks.size(), ".loopexit",
+                                             this);
+
+  DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
+               << NewBB->getName() << "\n");
+  return NewBB;
+}
+
+/// AddBlockAndPredsToSet - Add the specified block, and all of its
+/// predecessors, to the specified set, if it's not already in there.  Stop
+/// predecessor traversal when we reach StopBlock.
+static void AddBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
+                                  std::set<BasicBlock*> &Blocks) {
+  std::vector<BasicBlock *> WorkList;
+  WorkList.push_back(InputBB);
+  do {
+    BasicBlock *BB = WorkList.back(); WorkList.pop_back();
+    if (Blocks.insert(BB).second && BB != StopBlock)
+      // If BB is not already processed and it is not a stop block then
+      // insert its predecessor in the work list
+      for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+        BasicBlock *WBB = *I;
+        WorkList.push_back(WBB);
+      }
+  } while(!WorkList.empty());
+}
+
+/// FindPHIToPartitionLoops - The first part of loop-nestification is to find a
+/// PHI node that tells us how to partition the loops.
+static PHINode *FindPHIToPartitionLoops(Loop *L, DominatorTree *DT,
+                                        AliasAnalysis *AA, LoopInfo *LI) {
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
+    PHINode *PN = cast<PHINode>(I);
+    ++I;
+    if (Value *V = SimplifyInstruction(PN, 0, DT)) {
+      // This is a degenerate PHI already, don't modify it!
+      PN->replaceAllUsesWith(V);
+      if (AA) AA->deleteValue(PN);
+      PN->eraseFromParent();
+      continue;
+    }
+
+    // Scan this PHI node looking for a use of the PHI node by itself.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingValue(i) == PN &&
+          L->contains(PN->getIncomingBlock(i)))
+        // We found something tasty to remove.
+        return PN;
+  }
+  return 0;
+}
+
+// PlaceSplitBlockCarefully - If the block isn't already, move the new block to
+// right after some 'outside block' block.  This prevents the preheader from
+// being placed inside the loop body, e.g. when the loop hasn't been rotated.
+void LoopSimplify::PlaceSplitBlockCarefully(BasicBlock *NewBB,
+                                       SmallVectorImpl<BasicBlock*> &SplitPreds,
+                                            Loop *L) {
+  // Check to see if NewBB is already well placed.
+  Function::iterator BBI = NewBB; --BBI;
+  for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+    if (&*BBI == SplitPreds[i])
+      return;
+  }
+  
+  // If it isn't already after an outside block, move it after one.  This is
+  // always good as it makes the uncond branch from the outside block into a
+  // fall-through.
+  
+  // Figure out *which* outside block to put this after.  Prefer an outside
+  // block that neighbors a BB actually in the loop.
+  BasicBlock *FoundBB = 0;
+  for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+    Function::iterator BBI = SplitPreds[i];
+    if (++BBI != NewBB->getParent()->end() && 
+        L->contains(BBI)) {
+      FoundBB = SplitPreds[i];
+      break;
+    }
+  }
+  
+  // If our heuristic for a *good* bb to place this after doesn't find
+  // anything, just pick something.  It's likely better than leaving it within
+  // the loop.
+  if (!FoundBB)
+    FoundBB = SplitPreds[0];
+  NewBB->moveAfter(FoundBB);
+}
+
+
+/// SeparateNestedLoop - If this loop has multiple backedges, try to pull one of
+/// them out into a nested loop.  This is important for code that looks like
+/// this:
+///
+///  Loop:
+///     ...
+///     br cond, Loop, Next
+///     ...
+///     br cond2, Loop, Out
+///
+/// To identify this common case, we look at the PHI nodes in the header of the
+/// loop.  PHI nodes with unchanging values on one backedge correspond to values
+/// that change in the "outer" loop, but not in the "inner" loop.
+///
+/// If we are able to separate out a loop, return the new outer loop that was
+/// created.
+///
+Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
+  PHINode *PN = FindPHIToPartitionLoops(L, DT, AA, LI);
+  if (PN == 0) return 0;  // No known way to partition.
+
+  // Pull out all predecessors that have varying values in the loop.  This
+  // handles the case when a PHI node has multiple instances of itself as
+  // arguments.
+  SmallVector<BasicBlock*, 8> OuterLoopPreds;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+    if (PN->getIncomingValue(i) != PN ||
+        !L->contains(PN->getIncomingBlock(i))) {
+      // We can't split indirectbr edges.
+      if (isa<IndirectBrInst>(PN->getIncomingBlock(i)->getTerminator()))
+        return 0;
+
+      OuterLoopPreds.push_back(PN->getIncomingBlock(i));
+    }
+
+  DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+
+  // If ScalarEvolution is around and knows anything about values in
+  // this loop, tell it to forget them, because we're about to
+  // substantially change it.
+  if (SE)
+    SE->forgetLoop(L);
+
+  BasicBlock *Header = L->getHeader();
+  BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
+                                             OuterLoopPreds.size(),
+                                             ".outer", this);
+
+  // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+  // code layout too horribly.
+  PlaceSplitBlockCarefully(NewBB, OuterLoopPreds, L);
+  
+  // Create the new outer loop.
+  Loop *NewOuter = new Loop();
+
+  // Change the parent loop to use the outer loop as its child now.
+  if (Loop *Parent = L->getParentLoop())
+    Parent->replaceChildLoopWith(L, NewOuter);
+  else
+    LI->changeTopLevelLoop(L, NewOuter);
+
+  // L is now a subloop of our outer loop.
+  NewOuter->addChildLoop(L);
+
+  // Add the new loop to the pass manager queue.
+  LPM.insertLoopIntoQueue(NewOuter);
+
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+       I != E; ++I)
+    NewOuter->addBlockEntry(*I);
+
+  // Now reset the header in L, which had been moved by
+  // SplitBlockPredecessors for the outer loop.
+  L->moveToHeader(Header);
+
+  // Determine which blocks should stay in L and which should be moved out to
+  // the Outer loop now.
+  std::set<BasicBlock*> BlocksInL;
+  for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
+    BasicBlock *P = *PI;
+    if (DT->dominates(Header, P))
+      AddBlockAndPredsToSet(P, Header, BlocksInL);
+  }
+
+  // Scan all of the loop children of L, moving them to OuterLoop if they are
+  // not part of the inner loop.
+  const std::vector<Loop*> &SubLoops = L->getSubLoops();
+  for (size_t I = 0; I != SubLoops.size(); )
+    if (BlocksInL.count(SubLoops[I]->getHeader()))
+      ++I;   // Loop remains in L
+    else
+      NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
+
+  // Now that we know which blocks are in L and which need to be moved to
+  // OuterLoop, move any blocks that need it.
+  for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+    BasicBlock *BB = L->getBlocks()[i];
+    if (!BlocksInL.count(BB)) {
+      // Move this block to the parent, updating the exit blocks sets
+      L->removeBlockFromLoop(BB);
+      if ((*LI)[BB] == L)
+        LI->changeLoopFor(BB, NewOuter);
+      --i;
+    }
+  }
+
+  return NewOuter;
+}
+
+
+
+/// InsertUniqueBackedgeBlock - This method is called when the specified loop
+/// has more than one backedge in it.  If this occurs, revector all of these
+/// backedges to target a new basic block and have that block branch to the loop
+/// header.  This ensures that loops have exactly one backedge.
+///
+BasicBlock *
+LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
+  assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
+
+  // Get information about the loop
+  BasicBlock *Header = L->getHeader();
+  Function *F = Header->getParent();
+
+  // Unique backedge insertion currently depends on having a preheader.
+  if (!Preheader)
+    return 0;
+
+  // Figure out which basic blocks contain back-edges to the loop header.
+  std::vector<BasicBlock*> BackedgeBlocks;
+  for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
+    BasicBlock *P = *I;
+
+    // Indirectbr edges cannot be split, so we must fail if we find one.
+    if (isa<IndirectBrInst>(P->getTerminator()))
+      return 0;
+
+    if (P != Preheader) BackedgeBlocks.push_back(P);
+  }
+
+  // Create and insert the new backedge block...
+  BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
+                                           Header->getName()+".backedge", F);
+  BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
+
+  DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
+               << BEBlock->getName() << "\n");
+
+  // Move the new backedge block to right after the last backedge block.
+  Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
+  F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
+
+  // Now that the block has been inserted into the function, create PHI nodes in
+  // the backedge block which correspond to any PHI nodes in the header block.
+  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".be",
+                                     BETerminator);
+    NewPN->reserveOperandSpace(BackedgeBlocks.size());
+    if (AA) AA->copyValue(PN, NewPN);
+
+    // Loop over the PHI node, moving all entries except the one for the
+    // preheader over to the new PHI node.
+    unsigned PreheaderIdx = ~0U;
+    bool HasUniqueIncomingValue = true;
+    Value *UniqueValue = 0;
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *IBB = PN->getIncomingBlock(i);
+      Value *IV = PN->getIncomingValue(i);
+      if (IBB == Preheader) {
+        PreheaderIdx = i;
+      } else {
+        NewPN->addIncoming(IV, IBB);
+        if (HasUniqueIncomingValue) {
+          if (UniqueValue == 0)
+            UniqueValue = IV;
+          else if (UniqueValue != IV)
+            HasUniqueIncomingValue = false;
+        }
+      }
+    }
+
+    // Delete all of the incoming values from the old PN except the preheader's
+    assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
+    if (PreheaderIdx != 0) {
+      PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
+      PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
+    }
+    // Nuke all entries except the zero'th.
+    for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
+      PN->removeIncomingValue(e-i, false);
+
+    // Finally, add the newly constructed PHI node as the entry for the BEBlock.
+    PN->addIncoming(NewPN, BEBlock);
+
+    // As an optimization, if all incoming values in the new PhiNode (which is a
+    // subset of the incoming values of the old PHI node) have the same value,
+    // eliminate the PHI Node.
+    if (HasUniqueIncomingValue) {
+      NewPN->replaceAllUsesWith(UniqueValue);
+      if (AA) AA->deleteValue(NewPN);
+      BEBlock->getInstList().erase(NewPN);
+    }
+  }
+
+  // Now that all of the PHI nodes have been inserted and adjusted, modify the
+  // backedge blocks to just to the BEBlock instead of the header.
+  for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
+    TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
+    for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
+      if (TI->getSuccessor(Op) == Header)
+        TI->setSuccessor(Op, BEBlock);
+  }
+
+  //===--- Update all analyses which we must preserve now -----------------===//
+
+  // Update Loop Information - we know that this block is now in the current
+  // loop and all parent loops.
+  L->addBasicBlockToLoop(BEBlock, LI->getBase());
+
+  // Update dominator information
+  DT->splitBlock(BEBlock);
+
+  return BEBlock;
+}
+
+void LoopSimplify::verifyAnalysis() const {
+  // It used to be possible to just assert L->isLoopSimplifyForm(), however
+  // with the introduction of indirectbr, there are now cases where it's
+  // not possible to transform a loop as necessary. We can at least check
+  // that there is an indirectbr near any time there's trouble.
+
+  // Indirectbr can interfere with preheader and unique backedge insertion.
+  if (!L->getLoopPreheader() || !L->getLoopLatch()) {
+    bool HasIndBrPred = false;
+    for (pred_iterator PI = pred_begin(L->getHeader()),
+         PE = pred_end(L->getHeader()); PI != PE; ++PI)
+      if (isa<IndirectBrInst>((*PI)->getTerminator())) {
+        HasIndBrPred = true;
+        break;
+      }
+    assert(HasIndBrPred &&
+           "LoopSimplify has no excuse for missing loop header info!");
+  }
+
+  // Indirectbr can interfere with exit block canonicalization.
+  if (!L->hasDedicatedExits()) {
+    bool HasIndBrExiting = false;
+    SmallVector<BasicBlock*, 8> ExitingBlocks;
+    L->getExitingBlocks(ExitingBlocks);
+    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i)
+      if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
+        HasIndBrExiting = true;
+        break;
+      }
+    assert(HasIndBrExiting &&
+           "LoopSimplify has no excuse for missing exit block info!");
+  }
+}
diff --git a/final/lib/Transforms/Utils/LoopUnroll.cpp b/final/lib/Transforms/Utils/LoopUnroll.cpp
new file mode 100644
index 00000000000..7da7271e642
--- /dev/null
+++ b/final/lib/Transforms/Utils/LoopUnroll.cpp
@@ -0,0 +1,388 @@
+//===-- UnrollLoop.cpp - Loop unrolling utilities -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities. It does not define any
+// actual pass or policy, but provides a single function to perform loop
+// unrolling.
+//
+// It works best when loops have been canonicalized by the -indvars pass,
+// allowing it to determine the trip counts of loops easily.
+//
+// The process of unrolling can produce extraneous basic blocks linked with
+// unconditional branches.  This will be corrected in the future.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "loop-unroll"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+// TODO: Should these be here or in LoopUnroll?
+STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
+STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by VMap.
+static inline void RemapInstruction(Instruction *I,
+                                    ValueToValueMapTy &VMap) {
+  for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
+    Value *Op = I->getOperand(op);
+    ValueToValueMapTy::iterator It = VMap.find(Op);
+    if (It != VMap.end())
+      I->setOperand(op, It->second);
+  }
+}
+
+/// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it
+/// only has one predecessor, and that predecessor only has one successor.
+/// The LoopInfo Analysis that is passed will be kept consistent.
+/// Returns the new combined block.
+static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
+  // Merge basic blocks into their predecessor if there is only one distinct
+  // pred, and if there is only one distinct successor of the predecessor, and
+  // if there are no PHI nodes.
+  BasicBlock *OnlyPred = BB->getSinglePredecessor();
+  if (!OnlyPred) return 0;
+
+  if (OnlyPred->getTerminator()->getNumSuccessors() != 1)
+    return 0;
+
+  DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred);
+
+  // Resolve any PHI nodes at the start of the block.  They are all
+  // guaranteed to have exactly one entry if they exist, unless there are
+  // multiple duplicate (but guaranteed to be equal) entries for the
+  // incoming edges.  This occurs when there are multiple edges from
+  // OnlyPred to OnlySucc.
+  FoldSingleEntryPHINodes(BB);
+
+  // Delete the unconditional branch from the predecessor...
+  OnlyPred->getInstList().pop_back();
+
+  // Move all definitions in the successor to the predecessor...
+  OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
+
+  // Make all PHI nodes that referred to BB now refer to Pred as their
+  // source...
+  BB->replaceAllUsesWith(OnlyPred);
+
+  std::string OldName = BB->getName();
+
+  // Erase basic block from the function...
+  LI->removeBlock(BB);
+  BB->eraseFromParent();
+
+  // Inherit predecessor's name if it exists...
+  if (!OldName.empty() && !OnlyPred->hasName())
+    OnlyPred->setName(OldName);
+
+  return OnlyPred;
+}
+
+/// Unroll the given loop by Count. The loop must be in LCSSA form. Returns true
+/// if unrolling was successful, or false if the loop was unmodified. Unrolling
+/// can only fail when the loop's latch block is not terminated by a conditional
+/// branch instruction. However, if the trip count (and multiple) are not known,
+/// loop unrolling will mostly produce more code that is no faster.
+///
+/// The LoopInfo Analysis that is passed will be kept consistent.
+///
+/// If a LoopPassManager is passed in, and the loop is fully removed, it will be
+/// removed from the LoopPassManager as well. LPM can also be NULL.
+bool llvm::UnrollLoop(Loop *L, unsigned Count,
+                      LoopInfo *LI, LPPassManager *LPM) {
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) {
+    DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
+    return false;
+  }
+
+  BasicBlock *LatchBlock = L->getLoopLatch();
+  if (!LatchBlock) {
+    DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");
+    return false;
+  }
+
+  BasicBlock *Header = L->getHeader();
+  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
+  
+  if (!BI || BI->isUnconditional()) {
+    // The loop-rotate pass can be helpful to avoid this in many cases.
+    DEBUG(dbgs() <<
+             "  Can't unroll; loop not terminated by a conditional branch.\n");
+    return false;
+  }
+  
+  if (Header->hasAddressTaken()) {
+    // The loop-rotate pass can be helpful to avoid this in many cases.
+    DEBUG(dbgs() <<
+          "  Won't unroll loop: address of header block is taken.\n");
+    return false;
+  }
+
+  // Notify ScalarEvolution that the loop will be substantially changed,
+  // if not outright eliminated.
+  if (ScalarEvolution *SE = LPM->getAnalysisIfAvailable<ScalarEvolution>())
+    SE->forgetLoop(L);
+
+  // Find trip count
+  unsigned TripCount = L->getSmallConstantTripCount();
+  // Find trip multiple if count is not available
+  unsigned TripMultiple = 1;
+  if (TripCount == 0)
+    TripMultiple = L->getSmallConstantTripMultiple();
+
+  if (TripCount != 0)
+    DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");
+  if (TripMultiple != 1)
+    DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");
+
+  // Effectively "DCE" unrolled iterations that are beyond the tripcount
+  // and will never be executed.
+  if (TripCount != 0 && Count > TripCount)
+    Count = TripCount;
+
+  assert(Count > 0);
+  assert(TripMultiple > 0);
+  assert(TripCount == 0 || TripCount % TripMultiple == 0);
+
+  // Are we eliminating the loop control altogether?
+  bool CompletelyUnroll = Count == TripCount;
+
+  // If we know the trip count, we know the multiple...
+  unsigned BreakoutTrip = 0;
+  if (TripCount != 0) {
+    BreakoutTrip = TripCount % Count;
+    TripMultiple = 0;
+  } else {
+    // Figure out what multiple to use.
+    BreakoutTrip = TripMultiple =
+      (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
+  }
+
+  if (CompletelyUnroll) {
+    DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
+          << " with trip count " << TripCount << "!\n");
+  } else {
+    DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
+          << " by " << Count);
+    if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
+      DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
+    } else if (TripMultiple != 1) {
+      DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
+    }
+    DEBUG(dbgs() << "!\n");
+  }
+
+  std::vector<BasicBlock*> LoopBlocks = L->getBlocks();
+
+  bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
+  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
+
+  // For the first iteration of the loop, we should use the precloned values for
+  // PHI nodes.  Insert associations now.
+  ValueToValueMapTy LastValueMap;
+  std::vector<PHINode*> OrigPHINode;
+  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    OrigPHINode.push_back(PN);
+    if (Instruction *I = 
+                dyn_cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)))
+      if (L->contains(I))
+        LastValueMap[I] = I;
+  }
+
+  std::vector<BasicBlock*> Headers;
+  std::vector<BasicBlock*> Latches;
+  Headers.push_back(Header);
+  Latches.push_back(LatchBlock);
+
+  for (unsigned It = 1; It != Count; ++It) {
+    std::vector<BasicBlock*> NewBlocks;
+    
+    for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
+         E = LoopBlocks.end(); BB != E; ++BB) {
+      ValueToValueMapTy VMap;
+      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
+      Header->getParent()->getBasicBlockList().push_back(New);
+
+      // Loop over all of the PHI nodes in the block, changing them to use the
+      // incoming values from the previous block.
+      if (*BB == Header)
+        for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+          PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
+          Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
+          if (Instruction *InValI = dyn_cast<Instruction>(InVal))
+            if (It > 1 && L->contains(InValI))
+              InVal = LastValueMap[InValI];
+          VMap[OrigPHINode[i]] = InVal;
+          New->getInstList().erase(NewPHI);
+        }
+
+      // Update our running map of newest clones
+      LastValueMap[*BB] = New;
+      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
+           VI != VE; ++VI)
+        LastValueMap[VI->first] = VI->second;
+
+      L->addBasicBlockToLoop(New, LI->getBase());
+
+      // Add phi entries for newly created values to all exit blocks except
+      // the successor of the latch block.  The successor of the exit block will
+      // be updated specially after unrolling all the way.
+      if (*BB != LatchBlock)
+        for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end();
+             UI != UE;) {
+          Instruction *UseInst = cast<Instruction>(*UI);
+          ++UI;
+          if (isa<PHINode>(UseInst) && !L->contains(UseInst)) {
+            PHINode *phi = cast<PHINode>(UseInst);
+            Value *Incoming = phi->getIncomingValueForBlock(*BB);
+            phi->addIncoming(Incoming, New);
+          }
+        }
+
+      // Keep track of new headers and latches as we create them, so that
+      // we can insert the proper branches later.
+      if (*BB == Header)
+        Headers.push_back(New);
+      if (*BB == LatchBlock) {
+        Latches.push_back(New);
+
+        // Also, clear out the new latch's back edge so that it doesn't look
+        // like a new loop, so that it's amenable to being merged with adjacent
+        // blocks later on.
+        TerminatorInst *Term = New->getTerminator();
+        assert(L->contains(Term->getSuccessor(!ContinueOnTrue)));
+        assert(Term->getSuccessor(ContinueOnTrue) == LoopExit);
+        Term->setSuccessor(!ContinueOnTrue, NULL);
+      }
+
+      NewBlocks.push_back(New);
+    }
+    
+    // Remap all instructions in the most recent iteration
+    for (unsigned i = 0; i < NewBlocks.size(); ++i)
+      for (BasicBlock::iterator I = NewBlocks[i]->begin(),
+           E = NewBlocks[i]->end(); I != E; ++I)
+        ::RemapInstruction(I, LastValueMap);
+  }
+  
+  // The latch block exits the loop.  If there are any PHI nodes in the
+  // successor blocks, update them to use the appropriate values computed as the
+  // last iteration of the loop.
+  if (Count != 1) {
+    SmallPtrSet<PHINode*, 8> Users;
+    for (Value::use_iterator UI = LatchBlock->use_begin(),
+         UE = LatchBlock->use_end(); UI != UE; ++UI)
+      if (PHINode *phi = dyn_cast<PHINode>(*UI))
+        Users.insert(phi);
+    
+    BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]);
+    for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end();
+         SI != SE; ++SI) {
+      PHINode *PN = *SI;
+      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
+      // If this value was defined in the loop, take the value defined by the
+      // last iteration of the loop.
+      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
+        if (L->contains(InValI))
+          InVal = LastValueMap[InVal];
+      }
+      PN->addIncoming(InVal, LastIterationBB);
+    }
+  }
+
+  // Now, if we're doing complete unrolling, loop over the PHI nodes in the
+  // original block, setting them to their incoming values.
+  if (CompletelyUnroll) {
+    BasicBlock *Preheader = L->getLoopPreheader();
+    for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
+      PHINode *PN = OrigPHINode[i];
+      PN->replaceAllUsesWith(PN->getIncomingValueForBlock(Preheader));
+      Header->getInstList().erase(PN);
+    }
+  }
+
+  // Now that all the basic blocks for the unrolled iterations are in place,
+  // set up the branches to connect them.
+  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+    // The original branch was replicated in each unrolled iteration.
+    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+
+    // The branch destination.
+    unsigned j = (i + 1) % e;
+    BasicBlock *Dest = Headers[j];
+    bool NeedConditional = true;
+
+    // For a complete unroll, make the last iteration end with a branch
+    // to the exit block.
+    if (CompletelyUnroll && j == 0) {
+      Dest = LoopExit;
+      NeedConditional = false;
+    }
+
+    // If we know the trip count or a multiple of it, we can safely use an
+    // unconditional branch for some iterations.
+    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
+      NeedConditional = false;
+    }
+
+    if (NeedConditional) {
+      // Update the conditional branch's successor for the following
+      // iteration.
+      Term->setSuccessor(!ContinueOnTrue, Dest);
+    } else {
+      // Replace the conditional branch with an unconditional one.
+      BranchInst::Create(Dest, Term);
+      Term->eraseFromParent();
+      // Merge adjacent basic blocks, if possible.
+      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) {
+        std::replace(Latches.begin(), Latches.end(), Dest, Fold);
+        std::replace(Headers.begin(), Headers.end(), Dest, Fold);
+      }
+    }
+  }
+  
+  // At this point, the code is well formed.  We now do a quick sweep over the
+  // inserted code, doing constant propagation and dead code elimination as we
+  // go.
+  const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks();
+  for (std::vector<BasicBlock*>::const_iterator BB = NewLoopBlocks.begin(),
+       BBE = NewLoopBlocks.end(); BB != BBE; ++BB)
+    for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E; ) {
+      Instruction *Inst = I++;
+
+      if (isInstructionTriviallyDead(Inst))
+        (*BB)->getInstList().erase(Inst);
+      else if (Value *V = SimplifyInstruction(Inst))
+        if (LI->replacementPreservesLCSSAForm(Inst, V)) {
+          Inst->replaceAllUsesWith(V);
+          (*BB)->getInstList().erase(Inst);
+        }
+    }
+
+  NumCompletelyUnrolled += CompletelyUnroll;
+  ++NumUnrolled;
+  // Remove the loop from the LoopPassManager if it's completely removed.
+  if (CompletelyUnroll && LPM != NULL)
+    LPM->deleteLoopFromQueue(L);
+
+  return true;
+}
diff --git a/final/lib/Transforms/Utils/LowerInvoke.cpp b/final/lib/Transforms/Utils/LowerInvoke.cpp
new file mode 100644
index 00000000000..025ae0d6169
--- /dev/null
+++ b/final/lib/Transforms/Utils/LowerInvoke.cpp
@@ -0,0 +1,613 @@
+//===- LowerInvoke.cpp - Eliminate Invoke & Unwind instructions -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which do not yet
+// support stack unwinding.  This pass supports two models of exception handling
+// lowering, the 'cheap' support and the 'expensive' support.
+//
+// 'Cheap' exception handling support gives the program the ability to execute
+// any program which does not "throw an exception", by turning 'invoke'
+// instructions into calls and by turning 'unwind' instructions into calls to
+// abort().  If the program does dynamically use the unwind instruction, the
+// program will print a message then abort.
+//
+// 'Expensive' exception handling support gives the full exception handling
+// support to the program at the cost of making the 'invoke' instruction
+// really expensive.  It basically inserts setjmp/longjmp calls to emulate the
+// exception handling as necessary.
+//
+// Because the 'expensive' support slows down programs a lot, and EH is only
+// used for a subset of the programs, it must be specifically enabled by an
+// option.
+//
+// Note that after this pass runs the CFG is not entirely accurate (exceptional
+// control flow edges are not correct anymore) so only very simple things should
+// be done after the lowerinvoke pass has run (like generation of native code).
+// This should not be used as a general purpose "my LLVM-to-LLVM pass doesn't
+// support the invoke instruction yet" lowering pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lowerinvoke"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetLowering.h"
+#include <csetjmp>
+#include <set>
+using namespace llvm;
+
+STATISTIC(NumInvokes, "Number of invokes replaced");
+STATISTIC(NumUnwinds, "Number of unwinds replaced");
+STATISTIC(NumSpilled, "Number of registers live across unwind edges");
+
+static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
+ cl::desc("Make the -lowerinvoke pass insert expensive, but correct, EH code"));
+
+namespace {
+  class LowerInvoke : public FunctionPass {
+    // Used for both models.
+    Constant *AbortFn;
+
+    // Used for expensive EH support.
+    const Type *JBLinkTy;
+    GlobalVariable *JBListHead;
+    Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
+    bool useExpensiveEHSupport;
+
+    // We peek in TLI to grab the target's jmp_buf size and alignment
+    const TargetLowering *TLI;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit LowerInvoke(const TargetLowering *tli = NULL,
+                         bool useExpensiveEHSupport = ExpensiveEHSupport)
+      : FunctionPass(ID), useExpensiveEHSupport(useExpensiveEHSupport),
+        TLI(tli) {
+      initializeLowerInvokePass(*PassRegistry::getPassRegistry());
+    }
+    bool doInitialization(Module &M);
+    bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      // This is a cluster of orthogonal Transforms
+      AU.addPreserved("mem2reg");
+      AU.addPreservedID(LowerSwitchID);
+    }
+
+  private:
+    bool insertCheapEHSupport(Function &F);
+    void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*>&Invokes);
+    void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+                                AllocaInst *InvokeNum, AllocaInst *StackPtr,
+                                SwitchInst *CatchSwitch);
+    bool insertExpensiveEHSupport(Function &F);
+  };
+}
+
+char LowerInvoke::ID = 0;
+INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
+                "Lower invoke and unwind, for unwindless code generators",
+                false, false)
+
+char &llvm::LowerInvokePassID = LowerInvoke::ID;
+
+// Public Interface To the LowerInvoke pass.
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI) {
+  return new LowerInvoke(TLI, ExpensiveEHSupport);
+}
+FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
+                                          bool useExpensiveEHSupport) {
+  return new LowerInvoke(TLI, useExpensiveEHSupport);
+}
+
+// doInitialization - Make sure that there is a prototype for abort in the
+// current module.
+bool LowerInvoke::doInitialization(Module &M) {
+  const Type *VoidPtrTy =
+          Type::getInt8PtrTy(M.getContext());
+  if (useExpensiveEHSupport) {
+    // Insert a type for the linked list of jump buffers.
+    unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
+    JBSize = JBSize ? JBSize : 200;
+    const Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
+
+    { // The type is recursive, so use a type holder.
+      std::vector<const Type*> Elements;
+      Elements.push_back(JmpBufTy);
+      OpaqueType *OT = OpaqueType::get(M.getContext());
+      Elements.push_back(PointerType::getUnqual(OT));
+      PATypeHolder JBLType(StructType::get(M.getContext(), Elements));
+      OT->refineAbstractTypeTo(JBLType.get());  // Complete the cycle.
+      JBLinkTy = JBLType.get();
+      M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy);
+    }
+
+    const Type *PtrJBList = PointerType::getUnqual(JBLinkTy);
+
+    // Now that we've done that, insert the jmpbuf list head global, unless it
+    // already exists.
+    if (!(JBListHead = M.getGlobalVariable("llvm.sjljeh.jblist", PtrJBList))) {
+      JBListHead = new GlobalVariable(M, PtrJBList, false,
+                                      GlobalValue::LinkOnceAnyLinkage,
+                                      Constant::getNullValue(PtrJBList),
+                                      "llvm.sjljeh.jblist");
+    }
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+                         !defined(setjmp_undefined_for_msvc)
+#  pragma push_macro("setjmp")
+#  undef setjmp
+#  define setjmp_undefined_for_msvc
+#endif
+
+    SetJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::setjmp);
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+   // let's return it to _setjmp state
+#  pragma pop_macro("setjmp")
+#  undef setjmp_undefined_for_msvc
+#endif
+
+    LongJmpFn = Intrinsic::getDeclaration(&M, Intrinsic::longjmp);
+    StackSaveFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+    StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
+  }
+
+  // We need the 'write' and 'abort' functions for both models.
+  AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()),
+                                  (Type *)0);
+  return true;
+}
+
+bool LowerInvoke::insertCheapEHSupport(Function &F) {
+  bool Changed = false;
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
+      // Insert a normal call instruction...
+      CallInst *NewCall = CallInst::Create(II->getCalledValue(),
+                                           CallArgs.begin(), CallArgs.end(),
+                                           "",II);
+      NewCall->takeName(II);
+      NewCall->setCallingConv(II->getCallingConv());
+      NewCall->setAttributes(II->getAttributes());
+      NewCall->setDebugLoc(II->getDebugLoc());
+      II->replaceAllUsesWith(NewCall);
+
+      // Insert an unconditional branch to the normal destination.
+      BranchInst::Create(II->getNormalDest(), II);
+
+      // Remove any PHI node entries from the exception destination.
+      II->getUnwindDest()->removePredecessor(BB);
+
+      // Remove the invoke instruction now.
+      BB->getInstList().erase(II);
+
+      ++NumInvokes; Changed = true;
+    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      // Insert a call to abort()
+      CallInst::Create(AbortFn, "", UI)->setTailCall();
+
+      // Insert a return instruction.  This really should be a "barrier", as it
+      // is unreachable.
+      ReturnInst::Create(F.getContext(),
+                         F.getReturnType()->isVoidTy() ?
+                          0 : Constant::getNullValue(F.getReturnType()), UI);
+
+      // Remove the unwind instruction now.
+      BB->getInstList().erase(UI);
+
+      ++NumUnwinds; Changed = true;
+    }
+  return Changed;
+}
+
+/// rewriteExpensiveInvoke - Insert code and hack the function to replace the
+/// specified invoke instruction with a call.
+void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
+                                         AllocaInst *InvokeNum,
+                                         AllocaInst *StackPtr,
+                                         SwitchInst *CatchSwitch) {
+  ConstantInt *InvokeNoC = ConstantInt::get(Type::getInt32Ty(II->getContext()),
+                                            InvokeNo);
+
+  // If the unwind edge has phi nodes, split the edge.
+  if (isa<PHINode>(II->getUnwindDest()->begin())) {
+    SplitCriticalEdge(II, 1, this);
+
+    // If there are any phi nodes left, they must have a single predecessor.
+    while (PHINode *PN = dyn_cast<PHINode>(II->getUnwindDest()->begin())) {
+      PN->replaceAllUsesWith(PN->getIncomingValue(0));
+      PN->eraseFromParent();
+    }
+  }
+
+  // Insert a store of the invoke num before the invoke and store zero into the
+  // location afterward.
+  new StoreInst(InvokeNoC, InvokeNum, true, II);  // volatile
+  
+  // Insert a store of the stack ptr before the invoke, so we can restore it
+  // later in the exception case.
+  CallInst* StackSaveRet = CallInst::Create(StackSaveFn, "ssret", II);
+  new StoreInst(StackSaveRet, StackPtr, true, II); // volatile
+
+  BasicBlock::iterator NI = II->getNormalDest()->getFirstNonPHI();
+  // nonvolatile.
+  new StoreInst(Constant::getNullValue(Type::getInt32Ty(II->getContext())), 
+                InvokeNum, false, NI);
+
+  Instruction* StackPtrLoad = new LoadInst(StackPtr, "stackptr.restore", true,
+                                           II->getUnwindDest()->getFirstNonPHI()
+                                           );
+  CallInst::Create(StackRestoreFn, StackPtrLoad, "")->insertAfter(StackPtrLoad);
+    
+  // Add a switch case to our unwind block.
+  CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
+
+  // Insert a normal call instruction.
+  SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
+  CallInst *NewCall = CallInst::Create(II->getCalledValue(),
+                                       CallArgs.begin(), CallArgs.end(), "",
+                                       II);
+  NewCall->takeName(II);
+  NewCall->setCallingConv(II->getCallingConv());
+  NewCall->setAttributes(II->getAttributes());
+  NewCall->setDebugLoc(II->getDebugLoc());
+  II->replaceAllUsesWith(NewCall);
+
+  // Replace the invoke with an uncond branch.
+  BranchInst::Create(II->getNormalDest(), NewCall->getParent());
+  II->eraseFromParent();
+}
+
+/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until
+/// we reach blocks we've already seen.
+static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
+  if (!LiveBBs.insert(BB).second) return; // already been here.
+
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    MarkBlocksLiveIn(*PI, LiveBBs);
+}
+
+// First thing we need to do is scan the whole function for values that are
+// live across unwind edges.  Each value that is live across an unwind edge
+// we spill into a stack location, guaranteeing that there is nothing live
+// across the unwind edge.  This process also splits all critical edges
+// coming out of invoke's.
+void LowerInvoke::
+splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
+  // First step, split all critical edges from invoke instructions.
+  for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+    InvokeInst *II = Invokes[i];
+    SplitCriticalEdge(II, 0, this);
+    SplitCriticalEdge(II, 1, this);
+    assert(!isa<PHINode>(II->getNormalDest()) &&
+           !isa<PHINode>(II->getUnwindDest()) &&
+           "critical edge splitting left single entry phi nodes?");
+  }
+
+  Function *F = Invokes.back()->getParent()->getParent();
+
+  // To avoid having to handle incoming arguments specially, we lower each arg
+  // to a copy instruction in the entry block.  This ensures that the argument
+  // value itself cannot be live across the entry block.
+  BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin();
+  while (isa<AllocaInst>(AfterAllocaInsertPt) &&
+        isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsertPt)->getArraySize()))
+    ++AfterAllocaInsertPt;
+  for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
+       AI != E; ++AI) {
+    const Type *Ty = AI->getType();
+    // Aggregate types can't be cast, but are legal argument types, so we have
+    // to handle them differently. We use an extract/insert pair as a
+    // lightweight method to achieve the same goal.
+    if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+      Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
+      Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+      NI->insertAfter(EI);
+      AI->replaceAllUsesWith(NI);
+      // Set the operand of the instructions back to the AllocaInst.
+      EI->setOperand(0, AI);
+      NI->setOperand(0, AI);
+    } else {
+      // This is always a no-op cast because we're casting AI to AI->getType()
+      // so src and destination types are identical. BitCast is the only
+      // possibility.
+      CastInst *NC = new BitCastInst(
+        AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+      AI->replaceAllUsesWith(NC);
+      // Set the operand of the cast instruction back to the AllocaInst.
+      // Normally it's forbidden to replace a CastInst's operand because it
+      // could cause the opcode to reflect an illegal conversion. However,
+      // we're replacing it here with the same value it was constructed with.
+      // We do this because the above replaceAllUsesWith() clobbered the
+      // operand, but we want this one to remain.
+      NC->setOperand(0, AI);
+    }
+  }
+
+  // Finally, scan the code looking for instructions with bad live ranges.
+  for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+      // Ignore obvious cases we don't have to handle.  In particular, most
+      // instructions either have no uses or only have a single use inside the
+      // current block.  Ignore them quickly.
+      Instruction *Inst = II;
+      if (Inst->use_empty()) continue;
+      if (Inst->hasOneUse() &&
+          cast<Instruction>(Inst->use_back())->getParent() == BB &&
+          !isa<PHINode>(Inst->use_back())) continue;
+
+      // If this is an alloca in the entry block, it's not a real register
+      // value.
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst))
+        if (isa<ConstantInt>(AI->getArraySize()) && BB == F->begin())
+          continue;
+
+      // Avoid iterator invalidation by copying users to a temporary vector.
+      SmallVector<Instruction*,16> Users;
+      for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
+           UI != E; ++UI) {
+        Instruction *User = cast<Instruction>(*UI);
+        if (User->getParent() != BB || isa<PHINode>(User))
+          Users.push_back(User);
+      }
+
+      // Scan all of the uses and see if the live range is live across an unwind
+      // edge.  If we find a use live across an invoke edge, create an alloca
+      // and spill the value.
+      std::set<InvokeInst*> InvokesWithStoreInserted;
+
+      // Find all of the blocks that this value is live in.
+      std::set<BasicBlock*> LiveBBs;
+      LiveBBs.insert(Inst->getParent());
+      while (!Users.empty()) {
+        Instruction *U = Users.back();
+        Users.pop_back();
+
+        if (!isa<PHINode>(U)) {
+          MarkBlocksLiveIn(U->getParent(), LiveBBs);
+        } else {
+          // Uses for a PHI node occur in their predecessor block.
+          PHINode *PN = cast<PHINode>(U);
+          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+            if (PN->getIncomingValue(i) == Inst)
+              MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs);
+        }
+      }
+
+      // Now that we know all of the blocks that this thing is live in, see if
+      // it includes any of the unwind locations.
+      bool NeedsSpill = false;
+      for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
+        BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest();
+        if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) {
+          NeedsSpill = true;
+        }
+      }
+
+      // If we decided we need a spill, do it.
+      if (NeedsSpill) {
+        ++NumSpilled;
+        DemoteRegToStack(*Inst, true);
+      }
+    }
+}
+
+bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
+  SmallVector<ReturnInst*,16> Returns;
+  SmallVector<UnwindInst*,16> Unwinds;
+  SmallVector<InvokeInst*,16> Invokes;
+
+  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+      // Remember all return instructions in case we insert an invoke into this
+      // function.
+      Returns.push_back(RI);
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
+      Invokes.push_back(II);
+    } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+      Unwinds.push_back(UI);
+    }
+
+  if (Unwinds.empty() && Invokes.empty()) return false;
+
+  NumInvokes += Invokes.size();
+  NumUnwinds += Unwinds.size();
+
+  // TODO: This is not an optimal way to do this.  In particular, this always
+  // inserts setjmp calls into the entries of functions with invoke instructions
+  // even though there are possibly paths through the function that do not
+  // execute any invokes.  In particular, for functions with early exits, e.g.
+  // the 'addMove' method in hexxagon, it would be nice to not have to do the
+  // setjmp stuff on the early exit path.  This requires a bit of dataflow, but
+  // would not be too hard to do.
+
+  // If we have an invoke instruction, insert a setjmp that dominates all
+  // invokes.  After the setjmp, use a cond branch that goes to the original
+  // code path on zero, and to a designated 'catch' block of nonzero.
+  Value *OldJmpBufPtr = 0;
+  if (!Invokes.empty()) {
+    // First thing we need to do is scan the whole function for values that are
+    // live across unwind edges.  Each value that is live across an unwind edge
+    // we spill into a stack location, guaranteeing that there is nothing live
+    // across the unwind edge.  This process also splits all critical edges
+    // coming out of invoke's.
+    splitLiveRangesLiveAcrossInvokes(Invokes);
+
+    BasicBlock *EntryBB = F.begin();
+
+    // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+    // that needs to be restored on all exits from the function.  This is an
+    // alloca because the value needs to be live across invokes.
+    unsigned Align = TLI ? TLI->getJumpBufAlignment() : 0;
+    AllocaInst *JmpBuf =
+      new AllocaInst(JBLinkTy, 0, Align,
+                     "jblink", F.begin()->begin());
+
+    Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+                     ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) };
+    OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2],
+                                             "OldBuf",
+                                             EntryBB->getTerminator());
+
+    // Copy the JBListHead to the alloca.
+    Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true,
+                                 EntryBB->getTerminator());
+    new StoreInst(OldBuf, OldJmpBufPtr, true, EntryBB->getTerminator());
+
+    // Add the new jumpbuf to the list.
+    new StoreInst(JmpBuf, JBListHead, true, EntryBB->getTerminator());
+
+    // Create the catch block.  The catch block is basically a big switch
+    // statement that goes to all of the invoke catch blocks.
+    BasicBlock *CatchBB =
+            BasicBlock::Create(F.getContext(), "setjmp.catch", &F);
+
+    // Create an alloca which keeps track of the stack pointer before every
+    // invoke, this allows us to properly restore the stack pointer after
+    // long jumping.
+    AllocaInst *StackPtr = new AllocaInst(Type::getInt8PtrTy(F.getContext()), 0,
+                                          "stackptr", EntryBB->begin());
+
+    // Create an alloca which keeps track of which invoke is currently
+    // executing.  For normal calls it contains zero.
+    AllocaInst *InvokeNum = new AllocaInst(Type::getInt32Ty(F.getContext()), 0,
+                                           "invokenum",EntryBB->begin());
+    new StoreInst(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), 
+                  InvokeNum, true, EntryBB->getTerminator());
+
+    // Insert a load in the Catch block, and a switch on its value.  By default,
+    // we go to a block that just does an unwind (which is the correct action
+    // for a standard call).
+    BasicBlock *UnwindBB = BasicBlock::Create(F.getContext(), "unwindbb", &F);
+    Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBB));
+
+    Value *CatchLoad = new LoadInst(InvokeNum, "invoke.num", true, CatchBB);
+    SwitchInst *CatchSwitch =
+      SwitchInst::Create(CatchLoad, UnwindBB, Invokes.size(), CatchBB);
+
+    // Now that things are set up, insert the setjmp call itself.
+
+    // Split the entry block to insert the conditional branch for the setjmp.
+    BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+                                                     "setjmp.cont");
+
+    Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);
+    Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2],
+                                                 "TheJmpBuf",
+                                                 EntryBB->getTerminator());
+    JmpBufPtr = new BitCastInst(JmpBufPtr,
+                        Type::getInt8PtrTy(F.getContext()),
+                                "tmp", EntryBB->getTerminator());
+    Value *SJRet = CallInst::Create(SetJmpFn, JmpBufPtr, "sjret",
+                                    EntryBB->getTerminator());
+
+    // Compare the return value to zero.
+    Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+                                   ICmpInst::ICMP_EQ, SJRet,
+                                   Constant::getNullValue(SJRet->getType()),
+                                   "notunwind");
+    // Nuke the uncond branch.
+    EntryBB->getTerminator()->eraseFromParent();
+
+    // Put in a new condbranch in its place.
+    BranchInst::Create(ContBlock, CatchBB, IsNormal, EntryBB);
+
+    // At this point, we are all set up, rewrite each invoke instruction.
+    for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+      rewriteExpensiveInvoke(Invokes[i], i+1, InvokeNum, StackPtr, CatchSwitch);
+  }
+
+  // We know that there is at least one unwind.
+
+  // Create three new blocks, the block to load the jmpbuf ptr and compare
+  // against null, the block to do the longjmp, and the error block for if it
+  // is null.  Add them at the end of the function because they are not hot.
+  BasicBlock *UnwindHandler = BasicBlock::Create(F.getContext(),
+                                                "dounwind", &F);
+  BasicBlock *UnwindBlock = BasicBlock::Create(F.getContext(), "unwind", &F);
+  BasicBlock *TermBlock = BasicBlock::Create(F.getContext(), "unwinderror", &F);
+
+  // If this function contains an invoke, restore the old jumpbuf ptr.
+  Value *BufPtr;
+  if (OldJmpBufPtr) {
+    // Before the return, insert a copy from the saved value to the new value.
+    BufPtr = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", UnwindHandler);
+    new StoreInst(BufPtr, JBListHead, UnwindHandler);
+  } else {
+    BufPtr = new LoadInst(JBListHead, "ehlist", UnwindHandler);
+  }
+
+  // Load the JBList, if it's null, then there was no catch!
+  Value *NotNull = new ICmpInst(*UnwindHandler, ICmpInst::ICMP_NE, BufPtr,
+                                Constant::getNullValue(BufPtr->getType()),
+                                "notnull");
+  BranchInst::Create(UnwindBlock, TermBlock, NotNull, UnwindHandler);
+
+  // Create the block to do the longjmp.
+  // Get a pointer to the jmpbuf and longjmp.
+  Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+                   ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) };
+  Idx[0] = GetElementPtrInst::Create(BufPtr, &Idx[0], &Idx[2], "JmpBuf",
+                                     UnwindBlock);
+  Idx[0] = new BitCastInst(Idx[0],
+             Type::getInt8PtrTy(F.getContext()),
+                           "tmp", UnwindBlock);
+  Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
+  CallInst::Create(LongJmpFn, &Idx[0], &Idx[2], "", UnwindBlock);
+  new UnreachableInst(F.getContext(), UnwindBlock);
+
+  // Set up the term block ("throw without a catch").
+  new UnreachableInst(F.getContext(), TermBlock);
+
+  // Insert a call to abort()
+  CallInst::Create(AbortFn, "",
+                   TermBlock->getTerminator())->setTailCall();
+
+
+  // Replace all unwinds with a branch to the unwind handler.
+  for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+    BranchInst::Create(UnwindHandler, Unwinds[i]);
+    Unwinds[i]->eraseFromParent();
+  }
+
+  // Finally, for any returns from this function, if this function contains an
+  // invoke, restore the old jmpbuf pointer to its input value.
+  if (OldJmpBufPtr) {
+    for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
+      ReturnInst *R = Returns[i];
+
+      // Before the return, insert a copy from the saved value to the new value.
+      Value *OldBuf = new LoadInst(OldJmpBufPtr, "oldjmpbufptr", true, R);
+      new StoreInst(OldBuf, JBListHead, true, R);
+    }
+  }
+
+  return true;
+}
+
+bool LowerInvoke::runOnFunction(Function &F) {
+  if (useExpensiveEHSupport)
+    return insertExpensiveEHSupport(F);
+  else
+    return insertCheapEHSupport(F);
+}
diff --git a/final/lib/Transforms/Utils/LowerSwitch.cpp b/final/lib/Transforms/Utils/LowerSwitch.cpp
new file mode 100644
index 00000000000..914a439718d
--- /dev/null
+++ b/final/lib/Transforms/Utils/LowerSwitch.cpp
@@ -0,0 +1,323 @@
+//===- LowerSwitch.cpp - Eliminate Switch instructions --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LowerSwitch transformation rewrites switch instructions with a sequence
+// of branches, which allows targets to get away with not implementing the
+// switch instruction until it is convenient.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+namespace {
+  /// LowerSwitch Pass - Replace all SwitchInst instructions with chained branch
+  /// instructions.
+  class LowerSwitch : public FunctionPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    LowerSwitch() : FunctionPass(ID) {
+      initializeLowerSwitchPass(*PassRegistry::getPassRegistry());
+    } 
+
+    virtual bool runOnFunction(Function &F);
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      // This is a cluster of orthogonal Transforms
+      AU.addPreserved<UnifyFunctionExitNodes>();
+      AU.addPreserved("mem2reg");
+      AU.addPreservedID(LowerInvokePassID);
+    }
+
+    struct CaseRange {
+      Constant* Low;
+      Constant* High;
+      BasicBlock* BB;
+
+      CaseRange(Constant *low = 0, Constant *high = 0, BasicBlock *bb = 0) :
+        Low(low), High(high), BB(bb) { }
+    };
+
+    typedef std::vector<CaseRange>           CaseVector;
+    typedef std::vector<CaseRange>::iterator CaseItr;
+  private:
+    void processSwitchInst(SwitchInst *SI);
+
+    BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val,
+                              BasicBlock* OrigBlock, BasicBlock* Default);
+    BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val,
+                             BasicBlock* OrigBlock, BasicBlock* Default);
+    unsigned Clusterify(CaseVector& Cases, SwitchInst *SI);
+  };
+
+  /// The comparison function for sorting the switch case values in the vector.
+  /// WARNING: Case ranges should be disjoint!
+  struct CaseCmp {
+    bool operator () (const LowerSwitch::CaseRange& C1,
+                      const LowerSwitch::CaseRange& C2) {
+
+      const ConstantInt* CI1 = cast<const ConstantInt>(C1.Low);
+      const ConstantInt* CI2 = cast<const ConstantInt>(C2.High);
+      return CI1->getValue().slt(CI2->getValue());
+    }
+  };
+}
+
+char LowerSwitch::ID = 0;
+INITIALIZE_PASS(LowerSwitch, "lowerswitch",
+                "Lower SwitchInst's to branches", false, false)
+
+// Publically exposed interface to pass...
+char &llvm::LowerSwitchID = LowerSwitch::ID;
+// createLowerSwitchPass - Interface to this file...
+FunctionPass *llvm::createLowerSwitchPass() {
+  return new LowerSwitch();
+}
+
+bool LowerSwitch::runOnFunction(Function &F) {
+  bool Changed = false;
+
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ) {
+    BasicBlock *Cur = I++; // Advance over block so we don't traverse new blocks
+
+    if (SwitchInst *SI = dyn_cast<SwitchInst>(Cur->getTerminator())) {
+      Changed = true;
+      processSwitchInst(SI);
+    }
+  }
+
+  return Changed;
+}
+
+// operator<< - Used for debugging purposes.
+//
+static raw_ostream& operator<<(raw_ostream &O,
+                               const LowerSwitch::CaseVector &C)
+    LLVM_ATTRIBUTE_USED;
+static raw_ostream& operator<<(raw_ostream &O,
+                               const LowerSwitch::CaseVector &C) {
+  O << "[";
+
+  for (LowerSwitch::CaseVector::const_iterator B = C.begin(),
+         E = C.end(); B != E; ) {
+    O << *B->Low << " -" << *B->High;
+    if (++B != E) O << ", ";
+  }
+
+  return O << "]";
+}
+
+// switchConvert - Convert the switch statement into a binary lookup of
+// the case values. The function recursively builds this tree.
+//
+BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End,
+                                       Value* Val, BasicBlock* OrigBlock,
+                                       BasicBlock* Default)
+{
+  unsigned Size = End - Begin;
+
+  if (Size == 1)
+    return newLeafBlock(*Begin, Val, OrigBlock, Default);
+
+  unsigned Mid = Size / 2;
+  std::vector<CaseRange> LHS(Begin, Begin + Mid);
+  DEBUG(dbgs() << "LHS: " << LHS << "\n");
+  std::vector<CaseRange> RHS(Begin + Mid, End);
+  DEBUG(dbgs() << "RHS: " << RHS << "\n");
+
+  CaseRange& Pivot = *(Begin + Mid);
+  DEBUG(dbgs() << "Pivot ==> " 
+               << cast<ConstantInt>(Pivot.Low)->getValue() << " -"
+               << cast<ConstantInt>(Pivot.High)->getValue() << "\n");
+
+  BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val,
+                                      OrigBlock, Default);
+  BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val,
+                                      OrigBlock, Default);
+
+  // Create a new node that checks if the value is < pivot. Go to the
+  // left branch if it is and right branch if not.
+  Function* F = OrigBlock->getParent();
+  BasicBlock* NewNode = BasicBlock::Create(Val->getContext(), "NodeBlock");
+  Function::iterator FI = OrigBlock;
+  F->getBasicBlockList().insert(++FI, NewNode);
+
+  ICmpInst* Comp = new ICmpInst(ICmpInst::ICMP_SLT,
+                                Val, Pivot.Low, "Pivot");
+  NewNode->getInstList().push_back(Comp);
+  BranchInst::Create(LBranch, RBranch, Comp, NewNode);
+  return NewNode;
+}
+
+// newLeafBlock - Create a new leaf block for the binary lookup tree. It
+// checks if the switch's value == the case's value. If not, then it
+// jumps to the default branch. At this point in the tree, the value
+// can't be another valid case value, so the jump to the "default" branch
+// is warranted.
+//
+BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,
+                                      BasicBlock* OrigBlock,
+                                      BasicBlock* Default)
+{
+  Function* F = OrigBlock->getParent();
+  BasicBlock* NewLeaf = BasicBlock::Create(Val->getContext(), "LeafBlock");
+  Function::iterator FI = OrigBlock;
+  F->getBasicBlockList().insert(++FI, NewLeaf);
+
+  // Emit comparison
+  ICmpInst* Comp = NULL;
+  if (Leaf.Low == Leaf.High) {
+    // Make the seteq instruction...
+    Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_EQ, Val,
+                        Leaf.Low, "SwitchLeaf");
+  } else {
+    // Make range comparison
+    if (cast<ConstantInt>(Leaf.Low)->isMinValue(true /*isSigned*/)) {
+      // Val >= Min && Val <= Hi --> Val <= Hi
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_SLE, Val, Leaf.High,
+                          "SwitchLeaf");
+    } else if (cast<ConstantInt>(Leaf.Low)->isZero()) {
+      // Val >= 0 && Val <= Hi --> Val <=u Hi
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High,
+                          "SwitchLeaf");      
+    } else {
+      // Emit V-Lo <=u Hi-Lo
+      Constant* NegLo = ConstantExpr::getNeg(Leaf.Low);
+      Instruction* Add = BinaryOperator::CreateAdd(Val, NegLo,
+                                                   Val->getName()+".off",
+                                                   NewLeaf);
+      Constant *UpperBound = ConstantExpr::getAdd(NegLo, Leaf.High);
+      Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Add, UpperBound,
+                          "SwitchLeaf");
+    }
+  }
+
+  // Make the conditional branch...
+  BasicBlock* Succ = Leaf.BB;
+  BranchInst::Create(Succ, Default, Comp, NewLeaf);
+
+  // If there were any PHI nodes in this successor, rewrite one entry
+  // from OrigBlock to come from NewLeaf.
+  for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+    PHINode* PN = cast<PHINode>(I);
+    // Remove all but one incoming entries from the cluster
+    uint64_t Range = cast<ConstantInt>(Leaf.High)->getSExtValue() -
+                     cast<ConstantInt>(Leaf.Low)->getSExtValue();    
+    for (uint64_t j = 0; j < Range; ++j) {
+      PN->removeIncomingValue(OrigBlock);
+    }
+    
+    int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+    assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+    PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf);
+  }
+
+  return NewLeaf;
+}
+
+// Clusterify - Transform simple list of Cases into list of CaseRange's
+unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
+  unsigned numCmps = 0;
+
+  // Start with "simple" cases
+  for (unsigned i = 1; i < SI->getNumSuccessors(); ++i)
+    Cases.push_back(CaseRange(SI->getSuccessorValue(i),
+                              SI->getSuccessorValue(i),
+                              SI->getSuccessor(i)));
+  std::sort(Cases.begin(), Cases.end(), CaseCmp());
+
+  // Merge case into clusters
+  if (Cases.size()>=2)
+    for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) {
+      int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
+      int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
+      BasicBlock* nextBB = J->BB;
+      BasicBlock* currentBB = I->BB;
+
+      // If the two neighboring cases go to the same destination, merge them
+      // into a single case.
+      if ((nextValue-currentValue==1) && (currentBB == nextBB)) {
+        I->High = J->High;
+        J = Cases.erase(J);
+      } else {
+        I = J++;
+      }
+    }
+
+  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
+    if (I->Low != I->High)
+      // A range counts double, since it requires two compares.
+      ++numCmps;
+  }
+
+  return numCmps;
+}
+
+// processSwitchInst - Replace the specified switch instruction with a sequence
+// of chained if-then insts in a balanced binary search.
+//
+void LowerSwitch::processSwitchInst(SwitchInst *SI) {
+  BasicBlock *CurBlock = SI->getParent();
+  BasicBlock *OrigBlock = CurBlock;
+  Function *F = CurBlock->getParent();
+  Value *Val = SI->getOperand(0);  // The value we are switching on...
+  BasicBlock* Default = SI->getDefaultDest();
+
+  // If there is only the default destination, don't bother with the code below.
+  if (SI->getNumOperands() == 2) {
+    BranchInst::Create(SI->getDefaultDest(), CurBlock);
+    CurBlock->getInstList().erase(SI);
+    return;
+  }
+
+  // Create a new, empty default block so that the new hierarchy of
+  // if-then statements go to this and the PHI nodes are happy.
+  BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");
+  F->getBasicBlockList().insert(Default, NewDefault);
+
+  BranchInst::Create(Default, NewDefault);
+
+  // If there is an entry in any PHI nodes for the default edge, make sure
+  // to update them as well.
+  for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    int BlockIdx = PN->getBasicBlockIndex(OrigBlock);
+    assert(BlockIdx != -1 && "Switch didn't go to this successor??");
+    PN->setIncomingBlock((unsigned)BlockIdx, NewDefault);
+  }
+
+  // Prepare cases vector.
+  CaseVector Cases;
+  unsigned numCmps = Clusterify(Cases, SI);
+
+  DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
+               << ". Total compares: " << numCmps << "\n");
+  DEBUG(dbgs() << "Cases: " << Cases << "\n");
+  (void)numCmps;
+  
+  BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val,
+                                          OrigBlock, NewDefault);
+
+  // Branch to our shiny new if-then stuff...
+  BranchInst::Create(SwitchBlock, OrigBlock);
+
+  // We are now done with the switch instruction, delete it.
+  CurBlock->getInstList().erase(SI);
+}
diff --git a/final/lib/Transforms/Utils/Makefile b/final/lib/Transforms/Utils/Makefile
new file mode 100644
index 00000000000..d1e9336d67f
--- /dev/null
+++ b/final/lib/Transforms/Utils/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/Utils/Makefile -----------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMTransformUtils
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/lib/Transforms/Utils/Mem2Reg.cpp b/final/lib/Transforms/Utils/Mem2Reg.cpp
new file mode 100644
index 00000000000..f4ca81af6d8
--- /dev/null
+++ b/final/lib/Transforms/Utils/Mem2Reg.cpp
@@ -0,0 +1,90 @@
+//===- Mem2Reg.cpp - The -mem2reg pass, a wrapper around the Utils lib ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is a simple pass wrapper around the PromoteMemToReg function call
+// exposed by the Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Instructions.h"
+#include "llvm/Function.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+STATISTIC(NumPromoted, "Number of alloca's promoted");
+
+namespace {
+  struct PromotePass : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    PromotePass() : FunctionPass(ID) {
+      initializePromotePassPass(*PassRegistry::getPassRegistry());
+    }
+
+    // runOnFunction - To run this pass, first we calculate the alloca
+    // instructions that are safe for promotion, then we promote each one.
+    //
+    virtual bool runOnFunction(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<DominatorTree>();
+      AU.setPreservesCFG();
+      // This is a cluster of orthogonal Transforms
+      AU.addPreserved<UnifyFunctionExitNodes>();
+      AU.addPreservedID(LowerSwitchID);
+      AU.addPreservedID(LowerInvokePassID);
+    }
+  };
+}  // end of anonymous namespace
+
+char PromotePass::ID = 0;
+INITIALIZE_PASS_BEGIN(PromotePass, "mem2reg", "Promote Memory to Register",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(PromotePass, "mem2reg", "Promote Memory to Register",
+                false, false)
+
+bool PromotePass::runOnFunction(Function &F) {
+  std::vector<AllocaInst*> Allocas;
+
+  BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
+
+  bool Changed  = false;
+
+  DominatorTree &DT = getAnalysis<DominatorTree>();
+
+  while (1) {
+    Allocas.clear();
+
+    // Find allocas that are safe to promote, by looking at all instructions in
+    // the entry node
+    for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(I))       // Is it an alloca?
+        if (isAllocaPromotable(AI))
+          Allocas.push_back(AI);
+
+    if (Allocas.empty()) break;
+
+    PromoteMemToReg(Allocas, DT);
+    NumPromoted += Allocas.size();
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+// createPromoteMemoryToRegister - Provide an entry point to create this pass.
+//
+FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
+  return new PromotePass();
+}
diff --git a/final/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/final/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
new file mode 100644
index 00000000000..778885723e6
--- /dev/null
+++ b/final/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -0,0 +1,1119 @@
+//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file promotes memory references to be register references.  It promotes
+// alloca instructions which only have loads and stores as uses.  An alloca is
+// transformed by using iterated dominator frontiers to place PHI nodes, then
+// traversing the function in depth-first order to rewrite loads and stores as
+// appropriate.
+//
+// The algorithm used here is based on:
+//
+//   Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
+//   In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
+//   Programming Languages
+//   POPL '95. ACM, New York, NY, 62-73.
+//
+// It has been modified to not explicitly use the DJ graph data structure and to
+// directly compute pruned SSA using per-variable liveness information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mem2reg"
+#include "llvm/Transforms/Utils/PromoteMemToReg.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/DIBuilder.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include <algorithm>
+#include <map>
+#include <queue>
+using namespace llvm;
+
+STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block");
+STATISTIC(NumSingleStore,   "Number of alloca's promoted with a single store");
+STATISTIC(NumDeadAlloca,    "Number of dead alloca's removed");
+STATISTIC(NumPHIInsert,     "Number of PHI nodes inserted");
+
+namespace llvm {
+template<>
+struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
+  typedef std::pair<BasicBlock*, unsigned> EltTy;
+  static inline EltTy getEmptyKey() {
+    return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U);
+  }
+  static inline EltTy getTombstoneKey() {
+    return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U);
+  }
+  static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) {
+    return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2;
+  }
+  static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
+    return LHS == RHS;
+  }
+};
+}
+
+/// isAllocaPromotable - Return true if this alloca is legal for promotion.
+/// This is true if there are only loads and stores to the alloca.
+///
+bool llvm::isAllocaPromotable(const AllocaInst *AI) {
+  // FIXME: If the memory unit is of pointer or integer type, we can permit
+  // assignments to subsections of the memory unit.
+
+  // Only allow direct and non-volatile loads and stores...
+  for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
+       UI != UE; ++UI) {   // Loop over all of the uses of the alloca
+    const User *U = *UI;
+    if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
+      if (LI->isVolatile())
+        return false;
+    } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
+      if (SI->getOperand(0) == AI)
+        return false;   // Don't allow a store OF the AI, only INTO the AI.
+      if (SI->isVolatile())
+        return false;
+    } else {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the
+/// alloca 'V', if any.
+static DbgDeclareInst *FindAllocaDbgDeclare(Value *V) {
+  if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), &V, 1))
+    for (Value::use_iterator UI = DebugNode->use_begin(),
+         E = DebugNode->use_end(); UI != E; ++UI)
+      if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+        return DDI;
+
+  return 0;
+}
+
+namespace {
+  struct AllocaInfo;
+
+  // Data package used by RenamePass()
+  class RenamePassData {
+  public:
+    typedef std::vector<Value *> ValVector;
+    
+    RenamePassData() : BB(NULL), Pred(NULL), Values() {}
+    RenamePassData(BasicBlock *B, BasicBlock *P,
+                   const ValVector &V) : BB(B), Pred(P), Values(V) {}
+    BasicBlock *BB;
+    BasicBlock *Pred;
+    ValVector Values;
+    
+    void swap(RenamePassData &RHS) {
+      std::swap(BB, RHS.BB);
+      std::swap(Pred, RHS.Pred);
+      Values.swap(RHS.Values);
+    }
+  };
+  
+  /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of
+  /// load/store instructions in the block that directly load or store an alloca.
+  ///
+  /// This functionality is important because it avoids scanning large basic
+  /// blocks multiple times when promoting many allocas in the same block.
+  class LargeBlockInfo {
+    /// InstNumbers - For each instruction that we track, keep the index of the
+    /// instruction.  The index starts out as the number of the instruction from
+    /// the start of the block.
+    DenseMap<const Instruction *, unsigned> InstNumbers;
+  public:
+    
+    /// isInterestingInstruction - This code only looks at accesses to allocas.
+    static bool isInterestingInstruction(const Instruction *I) {
+      return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) ||
+             (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1)));
+    }
+    
+    /// getInstructionIndex - Get or calculate the index of the specified
+    /// instruction.
+    unsigned getInstructionIndex(const Instruction *I) {
+      assert(isInterestingInstruction(I) &&
+             "Not a load/store to/from an alloca?");
+      
+      // If we already have this instruction number, return it.
+      DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I);
+      if (It != InstNumbers.end()) return It->second;
+      
+      // Scan the whole block to get the instruction.  This accumulates
+      // information for every interesting instruction in the block, in order to
+      // avoid gratuitus rescans.
+      const BasicBlock *BB = I->getParent();
+      unsigned InstNo = 0;
+      for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end();
+           BBI != E; ++BBI)
+        if (isInterestingInstruction(BBI))
+          InstNumbers[BBI] = InstNo++;
+      It = InstNumbers.find(I);
+      
+      assert(It != InstNumbers.end() && "Didn't insert instruction?");
+      return It->second;
+    }
+    
+    void deleteValue(const Instruction *I) {
+      InstNumbers.erase(I);
+    }
+    
+    void clear() {
+      InstNumbers.clear();
+    }
+  };
+
+  struct PromoteMem2Reg {
+    /// Allocas - The alloca instructions being promoted.
+    ///
+    std::vector<AllocaInst*> Allocas;
+    DominatorTree &DT;
+    DIBuilder *DIB;
+
+    /// AST - An AliasSetTracker object to update.  If null, don't update it.
+    ///
+    AliasSetTracker *AST;
+    
+    /// AllocaLookup - Reverse mapping of Allocas.
+    ///
+    DenseMap<AllocaInst*, unsigned>  AllocaLookup;
+
+    /// NewPhiNodes - The PhiNodes we're adding.
+    ///
+    DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes;
+    
+    /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas
+    /// it corresponds to.
+    DenseMap<PHINode*, unsigned> PhiToAllocaMap;
+    
+    /// PointerAllocaValues - If we are updating an AliasSetTracker, then for
+    /// each alloca that is of pointer type, we keep track of what to copyValue
+    /// to the inserted PHI nodes here.
+    ///
+    std::vector<Value*> PointerAllocaValues;
+
+    /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare
+    /// intrinsic that describes it, if any, so that we can convert it to a
+    /// dbg.value intrinsic if the alloca gets promoted.
+    SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares;
+
+    /// Visited - The set of basic blocks the renamer has already visited.
+    ///
+    SmallPtrSet<BasicBlock*, 16> Visited;
+
+    /// BBNumbers - Contains a stable numbering of basic blocks to avoid
+    /// non-determinstic behavior.
+    DenseMap<BasicBlock*, unsigned> BBNumbers;
+
+    /// DomLevels - Maps DomTreeNodes to their level in the dominator tree.
+    DenseMap<DomTreeNode*, unsigned> DomLevels;
+
+    /// BBNumPreds - Lazily compute the number of predecessors a block has.
+    DenseMap<const BasicBlock*, unsigned> BBNumPreds;
+  public:
+    PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt,
+                   AliasSetTracker *ast)
+      : Allocas(A), DT(dt), DIB(0), AST(ast) {}
+    ~PromoteMem2Reg() {
+      delete DIB;
+    }
+
+    void run();
+
+    /// dominates - Return true if BB1 dominates BB2 using the DominatorTree.
+    ///
+    bool dominates(BasicBlock *BB1, BasicBlock *BB2) const {
+      return DT.dominates(BB1, BB2);
+    }
+
+  private:
+    void RemoveFromAllocasList(unsigned &AllocaIdx) {
+      Allocas[AllocaIdx] = Allocas.back();
+      Allocas.pop_back();
+      --AllocaIdx;
+    }
+
+    unsigned getNumPreds(const BasicBlock *BB) {
+      unsigned &NP = BBNumPreds[BB];
+      if (NP == 0)
+        NP = std::distance(pred_begin(BB), pred_end(BB))+1;
+      return NP-1;
+    }
+
+    void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+                                 AllocaInfo &Info);
+    void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 
+                             const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
+                             SmallPtrSet<BasicBlock*, 32> &LiveInBlocks);
+    
+    void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
+                                  LargeBlockInfo &LBI);
+    void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
+                                  LargeBlockInfo &LBI);
+    void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI);
+
+    
+    void RenamePass(BasicBlock *BB, BasicBlock *Pred,
+                    RenamePassData::ValVector &IncVals,
+                    std::vector<RenamePassData> &Worklist);
+    bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
+  };
+  
+  struct AllocaInfo {
+    SmallVector<BasicBlock*, 32> DefiningBlocks;
+    SmallVector<BasicBlock*, 32> UsingBlocks;
+    
+    StoreInst  *OnlyStore;
+    BasicBlock *OnlyBlock;
+    bool OnlyUsedInOneBlock;
+    
+    Value *AllocaPointerVal;
+    DbgDeclareInst *DbgDeclare;
+    
+    void clear() {
+      DefiningBlocks.clear();
+      UsingBlocks.clear();
+      OnlyStore = 0;
+      OnlyBlock = 0;
+      OnlyUsedInOneBlock = true;
+      AllocaPointerVal = 0;
+      DbgDeclare = 0;
+    }
+    
+    /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our
+    /// ivars.
+    void AnalyzeAlloca(AllocaInst *AI) {
+      clear();
+
+      // As we scan the uses of the alloca instruction, keep track of stores,
+      // and decide whether all of the loads and stores to the alloca are within
+      // the same basic block.
+      for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+           UI != E;)  {
+        Instruction *User = cast<Instruction>(*UI++);
+
+        if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
+          // Remember the basic blocks which define new values for the alloca
+          DefiningBlocks.push_back(SI->getParent());
+          AllocaPointerVal = SI->getOperand(0);
+          OnlyStore = SI;
+        } else {
+          LoadInst *LI = cast<LoadInst>(User);
+          // Otherwise it must be a load instruction, keep track of variable
+          // reads.
+          UsingBlocks.push_back(LI->getParent());
+          AllocaPointerVal = LI;
+        }
+        
+        if (OnlyUsedInOneBlock) {
+          if (OnlyBlock == 0)
+            OnlyBlock = User->getParent();
+          else if (OnlyBlock != User->getParent())
+            OnlyUsedInOneBlock = false;
+        }
+      }
+      
+      DbgDeclare = FindAllocaDbgDeclare(AI);
+    }
+  };
+
+  typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair;
+
+  struct DomTreeNodeCompare {
+    bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) {
+      return LHS.second < RHS.second;
+    }
+  };
+}  // end of anonymous namespace
+
+
+void PromoteMem2Reg::run() {
+  Function &F = *DT.getRoot()->getParent();
+
+  if (AST) PointerAllocaValues.resize(Allocas.size());
+  AllocaDbgDeclares.resize(Allocas.size());
+
+  AllocaInfo Info;
+  LargeBlockInfo LBI;
+
+  for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) {
+    AllocaInst *AI = Allocas[AllocaNum];
+
+    assert(isAllocaPromotable(AI) &&
+           "Cannot promote non-promotable alloca!");
+    assert(AI->getParent()->getParent() == &F &&
+           "All allocas should be in the same function, which is same as DF!");
+
+    if (AI->use_empty()) {
+      // If there are no uses of the alloca, just delete it now.
+      if (AST) AST->deleteValue(AI);
+      AI->eraseFromParent();
+
+      // Remove the alloca from the Allocas list, since it has been processed
+      RemoveFromAllocasList(AllocaNum);
+      ++NumDeadAlloca;
+      continue;
+    }
+    
+    // Calculate the set of read and write-locations for each alloca.  This is
+    // analogous to finding the 'uses' and 'definitions' of each variable.
+    Info.AnalyzeAlloca(AI);
+
+    // If there is only a single store to this value, replace any loads of
+    // it that are directly dominated by the definition with the value stored.
+    if (Info.DefiningBlocks.size() == 1) {
+      RewriteSingleStoreAlloca(AI, Info, LBI);
+
+      // Finally, after the scan, check to see if the store is all that is left.
+      if (Info.UsingBlocks.empty()) {
+        // Record debuginfo for the store and remove the declaration's debuginfo.
+        if (DbgDeclareInst *DDI = Info.DbgDeclare) {
+          ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore);
+          DDI->eraseFromParent();
+        }
+        // Remove the (now dead) store and alloca.
+        Info.OnlyStore->eraseFromParent();
+        LBI.deleteValue(Info.OnlyStore);
+
+        if (AST) AST->deleteValue(AI);
+        AI->eraseFromParent();
+        LBI.deleteValue(AI);
+        
+        // The alloca has been processed, move on.
+        RemoveFromAllocasList(AllocaNum);
+        
+        ++NumSingleStore;
+        continue;
+      }
+    }
+    
+    // If the alloca is only read and written in one basic block, just perform a
+    // linear sweep over the block to eliminate it.
+    if (Info.OnlyUsedInOneBlock) {
+      PromoteSingleBlockAlloca(AI, Info, LBI);
+      
+      // Finally, after the scan, check to see if the stores are all that is
+      // left.
+      if (Info.UsingBlocks.empty()) {
+        
+        // Remove the (now dead) stores and alloca.
+        while (!AI->use_empty()) {
+          StoreInst *SI = cast<StoreInst>(AI->use_back());
+          // Record debuginfo for the store before removing it.
+          if (DbgDeclareInst *DDI = Info.DbgDeclare)
+            ConvertDebugDeclareToDebugValue(DDI, SI);
+          SI->eraseFromParent();
+          LBI.deleteValue(SI);
+        }
+        
+        if (AST) AST->deleteValue(AI);
+        AI->eraseFromParent();
+        LBI.deleteValue(AI);
+        
+        // The alloca has been processed, move on.
+        RemoveFromAllocasList(AllocaNum);
+        
+        // The alloca's debuginfo can be removed as well.
+        if (DbgDeclareInst *DDI = Info.DbgDeclare)
+          DDI->eraseFromParent();
+
+        ++NumLocalPromoted;
+        continue;
+      }
+    }
+
+    // If we haven't computed dominator tree levels, do so now.
+    if (DomLevels.empty()) {
+      SmallVector<DomTreeNode*, 32> Worklist;
+
+      DomTreeNode *Root = DT.getRootNode();
+      DomLevels[Root] = 0;
+      Worklist.push_back(Root);
+
+      while (!Worklist.empty()) {
+        DomTreeNode *Node = Worklist.pop_back_val();
+        unsigned ChildLevel = DomLevels[Node] + 1;
+        for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end();
+             CI != CE; ++CI) {
+          DomLevels[*CI] = ChildLevel;
+          Worklist.push_back(*CI);
+        }
+      }
+    }
+
+    // If we haven't computed a numbering for the BB's in the function, do so
+    // now.
+    if (BBNumbers.empty()) {
+      unsigned ID = 0;
+      for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+        BBNumbers[I] = ID++;
+    }
+
+    // If we have an AST to keep updated, remember some pointer value that is
+    // stored into the alloca.
+    if (AST)
+      PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal;
+      
+    // Remember the dbg.declare intrinsic describing this alloca, if any.
+    if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare;
+    
+    // Keep the reverse mapping of the 'Allocas' array for the rename pass.
+    AllocaLookup[Allocas[AllocaNum]] = AllocaNum;
+
+    // At this point, we're committed to promoting the alloca using IDF's, and
+    // the standard SSA construction algorithm.  Determine which blocks need PHI
+    // nodes and see if we can optimize out some work by avoiding insertion of
+    // dead phi nodes.
+    DetermineInsertionPoint(AI, AllocaNum, Info);
+  }
+
+  if (Allocas.empty())
+    return; // All of the allocas must have been trivial!
+
+  LBI.clear();
+  
+  
+  // Set the incoming values for the basic block to be null values for all of
+  // the alloca's.  We do this in case there is a load of a value that has not
+  // been stored yet.  In this case, it will get this null value.
+  //
+  RenamePassData::ValVector Values(Allocas.size());
+  for (unsigned i = 0, e = Allocas.size(); i != e; ++i)
+    Values[i] = UndefValue::get(Allocas[i]->getAllocatedType());
+
+  // Walks all basic blocks in the function performing the SSA rename algorithm
+  // and inserting the phi nodes we marked as necessary
+  //
+  std::vector<RenamePassData> RenamePassWorkList;
+  RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values));
+  do {
+    RenamePassData RPD;
+    RPD.swap(RenamePassWorkList.back());
+    RenamePassWorkList.pop_back();
+    // RenamePass may add new worklist entries.
+    RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList);
+  } while (!RenamePassWorkList.empty());
+  
+  // The renamer uses the Visited set to avoid infinite loops.  Clear it now.
+  Visited.clear();
+
+  // Remove the allocas themselves from the function.
+  for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
+    Instruction *A = Allocas[i];
+
+    // If there are any uses of the alloca instructions left, they must be in
+    // unreachable basic blocks that were not processed by walking the dominator
+    // tree. Just delete the users now.
+    if (!A->use_empty())
+      A->replaceAllUsesWith(UndefValue::get(A->getType()));
+    if (AST) AST->deleteValue(A);
+    A->eraseFromParent();
+  }
+
+  // Remove alloca's dbg.declare instrinsics from the function.
+  for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)
+    if (DbgDeclareInst *DDI = AllocaDbgDeclares[i])
+      DDI->eraseFromParent();
+
+  // Loop over all of the PHI nodes and see if there are any that we can get
+  // rid of because they merge all of the same incoming values.  This can
+  // happen due to undef values coming into the PHI nodes.  This process is
+  // iterative, because eliminating one PHI node can cause others to be removed.
+  bool EliminatedAPHI = true;
+  while (EliminatedAPHI) {
+    EliminatedAPHI = false;
+    
+    for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+           NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) {
+      PHINode *PN = I->second;
+
+      // If this PHI node merges one value and/or undefs, get the value.
+      if (Value *V = SimplifyInstruction(PN, 0, &DT)) {
+        if (AST && PN->getType()->isPointerTy())
+          AST->deleteValue(PN);
+        PN->replaceAllUsesWith(V);
+        PN->eraseFromParent();
+        NewPhiNodes.erase(I++);
+        EliminatedAPHI = true;
+        continue;
+      }
+      ++I;
+    }
+  }
+  
+  // At this point, the renamer has added entries to PHI nodes for all reachable
+  // code.  Unfortunately, there may be unreachable blocks which the renamer
+  // hasn't traversed.  If this is the case, the PHI nodes may not
+  // have incoming values for all predecessors.  Loop over all PHI nodes we have
+  // created, inserting undef values if they are missing any incoming values.
+  //
+  for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I =
+         NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) {
+    // We want to do this once per basic block.  As such, only process a block
+    // when we find the PHI that is the first entry in the block.
+    PHINode *SomePHI = I->second;
+    BasicBlock *BB = SomePHI->getParent();
+    if (&BB->front() != SomePHI)
+      continue;
+
+    // Only do work here if there the PHI nodes are missing incoming values.  We
+    // know that all PHI nodes that were inserted in a block will have the same
+    // number of incoming values, so we can just check any of them.
+    if (SomePHI->getNumIncomingValues() == getNumPreds(BB))
+      continue;
+
+    // Get the preds for BB.
+    SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+    
+    // Ok, now we know that all of the PHI nodes are missing entries for some
+    // basic blocks.  Start by sorting the incoming predecessors for efficient
+    // access.
+    std::sort(Preds.begin(), Preds.end());
+    
+    // Now we loop through all BB's which have entries in SomePHI and remove
+    // them from the Preds list.
+    for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) {
+      // Do a log(n) search of the Preds list for the entry we want.
+      SmallVector<BasicBlock*, 16>::iterator EntIt =
+        std::lower_bound(Preds.begin(), Preds.end(),
+                         SomePHI->getIncomingBlock(i));
+      assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&&
+             "PHI node has entry for a block which is not a predecessor!");
+
+      // Remove the entry
+      Preds.erase(EntIt);
+    }
+
+    // At this point, the blocks left in the preds list must have dummy
+    // entries inserted into every PHI nodes for the block.  Update all the phi
+    // nodes in this block that we are inserting (there could be phis before
+    // mem2reg runs).
+    unsigned NumBadPreds = SomePHI->getNumIncomingValues();
+    BasicBlock::iterator BBI = BB->begin();
+    while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
+           SomePHI->getNumIncomingValues() == NumBadPreds) {
+      Value *UndefVal = UndefValue::get(SomePHI->getType());
+      for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred)
+        SomePHI->addIncoming(UndefVal, Preds[pred]);
+    }
+  }
+        
+  NewPhiNodes.clear();
+}
+
+
+/// ComputeLiveInBlocks - Determine which blocks the value is live in.  These
+/// are blocks which lead to uses.  Knowing this allows us to avoid inserting
+/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes
+/// would be dead).
+void PromoteMem2Reg::
+ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 
+                    const SmallPtrSet<BasicBlock*, 32> &DefBlocks,
+                    SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) {
+  
+  // To determine liveness, we must iterate through the predecessors of blocks
+  // where the def is live.  Blocks are added to the worklist if we need to
+  // check their predecessors.  Start with all the using blocks.
+  SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
+                                                   Info.UsingBlocks.end());
+  
+  // If any of the using blocks is also a definition block, check to see if the
+  // definition occurs before or after the use.  If it happens before the use,
+  // the value isn't really live-in.
+  for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) {
+    BasicBlock *BB = LiveInBlockWorklist[i];
+    if (!DefBlocks.count(BB)) continue;
+    
+    // Okay, this is a block that both uses and defines the value.  If the first
+    // reference to the alloca is a def (store), then we know it isn't live-in.
+    for (BasicBlock::iterator I = BB->begin(); ; ++I) {
+      if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+        if (SI->getOperand(1) != AI) continue;
+        
+        // We found a store to the alloca before a load.  The alloca is not
+        // actually live-in here.
+        LiveInBlockWorklist[i] = LiveInBlockWorklist.back();
+        LiveInBlockWorklist.pop_back();
+        --i, --e;
+        break;
+      }
+      
+      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+        if (LI->getOperand(0) != AI) continue;
+        
+        // Okay, we found a load before a store to the alloca.  It is actually
+        // live into this block.
+        break;
+      }
+    }
+  }
+  
+  // Now that we have a set of blocks where the phi is live-in, recursively add
+  // their predecessors until we find the full region the value is live.
+  while (!LiveInBlockWorklist.empty()) {
+    BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
+    
+    // The block really is live in here, insert it into the set.  If already in
+    // the set, then it has already been processed.
+    if (!LiveInBlocks.insert(BB))
+      continue;
+    
+    // Since the value is live into BB, it is either defined in a predecessor or
+    // live into it to.  Add the preds to the worklist unless they are a
+    // defining block.
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *P = *PI;
+      
+      // The value is not live into a predecessor if it defines the value.
+      if (DefBlocks.count(P))
+        continue;
+      
+      // Otherwise it is, add to the worklist.
+      LiveInBlockWorklist.push_back(P);
+    }
+  }
+}
+
+/// DetermineInsertionPoint - At this point, we're committed to promoting the
+/// alloca using IDF's, and the standard SSA construction algorithm.  Determine
+/// which blocks need phi nodes and see if we can optimize out some work by
+/// avoiding insertion of dead phi nodes.
+void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum,
+                                             AllocaInfo &Info) {
+  // Unique the set of defining blocks for efficient lookup.
+  SmallPtrSet<BasicBlock*, 32> DefBlocks;
+  DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end());
+
+  // Determine which blocks the value is live in.  These are blocks which lead
+  // to uses.
+  SmallPtrSet<BasicBlock*, 32> LiveInBlocks;
+  ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks);
+
+  // Use a priority queue keyed on dominator tree level so that inserted nodes
+  // are handled from the bottom of the dominator tree upwards.
+  typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>,
+                              DomTreeNodeCompare> IDFPriorityQueue;
+  IDFPriorityQueue PQ;
+
+  for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(),
+       E = DefBlocks.end(); I != E; ++I) {
+    if (DomTreeNode *Node = DT.getNode(*I))
+      PQ.push(std::make_pair(Node, DomLevels[Node]));
+  }
+
+  SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks;
+  SmallPtrSet<DomTreeNode*, 32> Visited;
+  SmallVector<DomTreeNode*, 32> Worklist;
+  while (!PQ.empty()) {
+    DomTreeNodePair RootPair = PQ.top();
+    PQ.pop();
+    DomTreeNode *Root = RootPair.first;
+    unsigned RootLevel = RootPair.second;
+
+    // Walk all dominator tree children of Root, inspecting their CFG edges with
+    // targets elsewhere on the dominator tree. Only targets whose level is at
+    // most Root's level are added to the iterated dominance frontier of the
+    // definition set.
+
+    Worklist.clear();
+    Worklist.push_back(Root);
+
+    while (!Worklist.empty()) {
+      DomTreeNode *Node = Worklist.pop_back_val();
+      BasicBlock *BB = Node->getBlock();
+
+      for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE;
+           ++SI) {
+        DomTreeNode *SuccNode = DT.getNode(*SI);
+
+        // Quickly skip all CFG edges that are also dominator tree edges instead
+        // of catching them below.
+        if (SuccNode->getIDom() == Node)
+          continue;
+
+        unsigned SuccLevel = DomLevels[SuccNode];
+        if (SuccLevel > RootLevel)
+          continue;
+
+        if (!Visited.insert(SuccNode))
+          continue;
+
+        BasicBlock *SuccBB = SuccNode->getBlock();
+        if (!LiveInBlocks.count(SuccBB))
+          continue;
+
+        DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB));
+        if (!DefBlocks.count(SuccBB))
+          PQ.push(std::make_pair(SuccNode, SuccLevel));
+      }
+
+      for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE;
+           ++CI) {
+        if (!Visited.count(*CI))
+          Worklist.push_back(*CI);
+      }
+    }
+  }
+
+  if (DFBlocks.size() > 1)
+    std::sort(DFBlocks.begin(), DFBlocks.end());
+
+  unsigned CurrentVersion = 0;
+  for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i)
+    QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion);
+}
+
+/// RewriteSingleStoreAlloca - If there is only a single store to this value,
+/// replace any loads of it that are directly dominated by the definition with
+/// the value stored.
+void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
+                                              AllocaInfo &Info,
+                                              LargeBlockInfo &LBI) {
+  StoreInst *OnlyStore = Info.OnlyStore;
+  bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
+  BasicBlock *StoreBB = OnlyStore->getParent();
+  int StoreIndex = -1;
+
+  // Clear out UsingBlocks.  We will reconstruct it here if needed.
+  Info.UsingBlocks.clear();
+  
+  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) {
+    Instruction *UserInst = cast<Instruction>(*UI++);
+    if (!isa<LoadInst>(UserInst)) {
+      assert(UserInst == OnlyStore && "Should only have load/stores");
+      continue;
+    }
+    LoadInst *LI = cast<LoadInst>(UserInst);
+    
+    // Okay, if we have a load from the alloca, we want to replace it with the
+    // only value stored to the alloca.  We can do this if the value is
+    // dominated by the store.  If not, we use the rest of the mem2reg machinery
+    // to insert the phi nodes as needed.
+    if (!StoringGlobalVal) {  // Non-instructions are always dominated.
+      if (LI->getParent() == StoreBB) {
+        // If we have a use that is in the same block as the store, compare the
+        // indices of the two instructions to see which one came first.  If the
+        // load came before the store, we can't handle it.
+        if (StoreIndex == -1)
+          StoreIndex = LBI.getInstructionIndex(OnlyStore);
+
+        if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) {
+          // Can't handle this load, bail out.
+          Info.UsingBlocks.push_back(StoreBB);
+          continue;
+        }
+        
+      } else if (LI->getParent() != StoreBB &&
+                 !dominates(StoreBB, LI->getParent())) {
+        // If the load and store are in different blocks, use BB dominance to
+        // check their relationships.  If the store doesn't dom the use, bail
+        // out.
+        Info.UsingBlocks.push_back(LI->getParent());
+        continue;
+      }
+    }
+    
+    // Otherwise, we *can* safely rewrite this load.
+    Value *ReplVal = OnlyStore->getOperand(0);
+    // If the replacement value is the load, this must occur in unreachable
+    // code.
+    if (ReplVal == LI)
+      ReplVal = UndefValue::get(LI->getType());
+    LI->replaceAllUsesWith(ReplVal);
+    if (AST && LI->getType()->isPointerTy())
+      AST->deleteValue(LI);
+    LI->eraseFromParent();
+    LBI.deleteValue(LI);
+  }
+}
+
+namespace {
+
+/// StoreIndexSearchPredicate - This is a helper predicate used to search by the
+/// first element of a pair.
+struct StoreIndexSearchPredicate {
+  bool operator()(const std::pair<unsigned, StoreInst*> &LHS,
+                  const std::pair<unsigned, StoreInst*> &RHS) {
+    return LHS.first < RHS.first;
+  }
+};
+
+}
+
+/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic
+/// block.  If this is the case, avoid traversing the CFG and inserting a lot of
+/// potentially useless PHI nodes by just performing a single linear pass over
+/// the basic block using the Alloca.
+///
+/// If we cannot promote this alloca (because it is read before it is written),
+/// return true.  This is necessary in cases where, due to control flow, the
+/// alloca is potentially undefined on some control flow paths.  e.g. code like
+/// this is potentially correct:
+///
+///   for (...) { if (c) { A = undef; undef = B; } }
+///
+/// ... so long as A is not used before undef is set.
+///
+void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
+                                              LargeBlockInfo &LBI) {
+  // The trickiest case to handle is when we have large blocks. Because of this,
+  // this code is optimized assuming that large blocks happen.  This does not
+  // significantly pessimize the small block case.  This uses LargeBlockInfo to
+  // make it efficient to get the index of various operations in the block.
+  
+  // Clear out UsingBlocks.  We will reconstruct it here if needed.
+  Info.UsingBlocks.clear();
+  
+  // Walk the use-def list of the alloca, getting the locations of all stores.
+  typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy;
+  StoresByIndexTy StoresByIndex;
+  
+  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
+       UI != E; ++UI) 
+    if (StoreInst *SI = dyn_cast<StoreInst>(*UI))
+      StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI));
+
+  // If there are no stores to the alloca, just replace any loads with undef.
+  if (StoresByIndex.empty()) {
+    for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 
+      if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) {
+        LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
+        if (AST && LI->getType()->isPointerTy())
+          AST->deleteValue(LI);
+        LBI.deleteValue(LI);
+        LI->eraseFromParent();
+      }
+    return;
+  }
+  
+  // Sort the stores by their index, making it efficient to do a lookup with a
+  // binary search.
+  std::sort(StoresByIndex.begin(), StoresByIndex.end());
+  
+  // Walk all of the loads from this alloca, replacing them with the nearest
+  // store above them, if any.
+  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) {
+    LoadInst *LI = dyn_cast<LoadInst>(*UI++);
+    if (!LI) continue;
+    
+    unsigned LoadIdx = LBI.getInstructionIndex(LI);
+    
+    // Find the nearest store that has a lower than this load. 
+    StoresByIndexTy::iterator I = 
+      std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
+                       std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)),
+                       StoreIndexSearchPredicate());
+    
+    // If there is no store before this load, then we can't promote this load.
+    if (I == StoresByIndex.begin()) {
+      // Can't handle this load, bail out.
+      Info.UsingBlocks.push_back(LI->getParent());
+      continue;
+    }
+      
+    // Otherwise, there was a store before this load, the load takes its value.
+    --I;
+    LI->replaceAllUsesWith(I->second->getOperand(0));
+    if (AST && LI->getType()->isPointerTy())
+      AST->deleteValue(LI);
+    LI->eraseFromParent();
+    LBI.deleteValue(LI);
+  }
+}
+
+// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value
+// that has an associated llvm.dbg.decl intrinsic.
+void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+                                                     StoreInst *SI) {
+  DIVariable DIVar(DDI->getVariable());
+  if (!DIVar.Verify())
+    return;
+
+  if (!DIB)
+    DIB = new DIBuilder(*SI->getParent()->getParent()->getParent());
+  Instruction *DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0,
+                                                     DIVar, SI);
+  
+  // Propagate any debug metadata from the store onto the dbg.value.
+  DebugLoc SIDL = SI->getDebugLoc();
+  if (!SIDL.isUnknown())
+    DbgVal->setDebugLoc(SIDL);
+  // Otherwise propagate debug metadata from dbg.declare.
+  else
+    DbgVal->setDebugLoc(DDI->getDebugLoc());
+}
+
+// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
+// Alloca returns true if there wasn't already a phi-node for that variable
+//
+bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
+                                  unsigned &Version) {
+  // Look up the basic-block in question.
+  PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)];
+
+  // If the BB already has a phi node added for the i'th alloca then we're done!
+  if (PN) return false;
+
+  // Create a PhiNode using the dereferenced type... and add the phi-node to the
+  // BasicBlock.
+  PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(),
+                       Allocas[AllocaNo]->getName() + "." + Twine(Version++), 
+                       BB->begin());
+  ++NumPHIInsert;
+  PhiToAllocaMap[PN] = AllocaNo;
+  PN->reserveOperandSpace(getNumPreds(BB));
+
+  if (AST && PN->getType()->isPointerTy())
+    AST->copyValue(PointerAllocaValues[AllocaNo], PN);
+
+  return true;
+}
+
+// RenamePass - Recursively traverse the CFG of the function, renaming loads and
+// stores to the allocas which we are promoting.  IncomingVals indicates what
+// value each Alloca contains on exit from the predecessor block Pred.
+//
+void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,
+                                RenamePassData::ValVector &IncomingVals,
+                                std::vector<RenamePassData> &Worklist) {
+NextIteration:
+  // If we are inserting any phi nodes into this BB, they will already be in the
+  // block.
+  if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) {
+    // If we have PHI nodes to update, compute the number of edges from Pred to
+    // BB.
+    if (PhiToAllocaMap.count(APN)) {
+      // We want to be able to distinguish between PHI nodes being inserted by
+      // this invocation of mem2reg from those phi nodes that already existed in
+      // the IR before mem2reg was run.  We determine that APN is being inserted
+      // because it is missing incoming edges.  All other PHI nodes being
+      // inserted by this pass of mem2reg will have the same number of incoming
+      // operands so far.  Remember this count.
+      unsigned NewPHINumOperands = APN->getNumOperands();
+      
+      unsigned NumEdges = 0;
+      for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I)
+        if (*I == BB)
+          ++NumEdges;
+      assert(NumEdges && "Must be at least one edge from Pred to BB!");
+      
+      // Add entries for all the phis.
+      BasicBlock::iterator PNI = BB->begin();
+      do {
+        unsigned AllocaNo = PhiToAllocaMap[APN];
+        
+        // Add N incoming values to the PHI node.
+        for (unsigned i = 0; i != NumEdges; ++i)
+          APN->addIncoming(IncomingVals[AllocaNo], Pred);
+        
+        // The currently active variable for this block is now the PHI.
+        IncomingVals[AllocaNo] = APN;
+        
+        // Get the next phi node.
+        ++PNI;
+        APN = dyn_cast<PHINode>(PNI);
+        if (APN == 0) break;
+        
+        // Verify that it is missing entries.  If not, it is not being inserted
+        // by this mem2reg invocation so we want to ignore it.
+      } while (APN->getNumOperands() == NewPHINumOperands);
+    }
+  }
+  
+  // Don't revisit blocks.
+  if (!Visited.insert(BB)) return;
+
+  for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) {
+    Instruction *I = II++; // get the instruction, increment iterator
+
+    if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand());
+      if (!Src) continue;
+  
+      DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src);
+      if (AI == AllocaLookup.end()) continue;
+
+      Value *V = IncomingVals[AI->second];
+
+      // Anything using the load now uses the current value.
+      LI->replaceAllUsesWith(V);
+      if (AST && LI->getType()->isPointerTy())
+        AST->deleteValue(LI);
+      BB->getInstList().erase(LI);
+    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+      // Delete this instruction and mark the name as the current holder of the
+      // value
+      AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand());
+      if (!Dest) continue;
+      
+      DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest);
+      if (ai == AllocaLookup.end())
+        continue;
+      
+      // what value were we writing?
+      IncomingVals[ai->second] = SI->getOperand(0);
+      // Record debuginfo for the store before removing it.
+      if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second])
+        ConvertDebugDeclareToDebugValue(DDI, SI);
+      BB->getInstList().erase(SI);
+    }
+  }
+
+  // 'Recurse' to our successors.
+  succ_iterator I = succ_begin(BB), E = succ_end(BB);
+  if (I == E) return;
+
+  // Keep track of the successors so we don't visit the same successor twice
+  SmallPtrSet<BasicBlock*, 8> VisitedSuccs;
+
+  // Handle the first successor without using the worklist.
+  VisitedSuccs.insert(*I);
+  Pred = BB;
+  BB = *I;
+  ++I;
+
+  for (; I != E; ++I)
+    if (VisitedSuccs.insert(*I))
+      Worklist.push_back(RenamePassData(*I, Pred, IncomingVals));
+
+  goto NextIteration;
+}
+
+/// PromoteMemToReg - Promote the specified list of alloca instructions into
+/// scalar registers, inserting PHI nodes as appropriate.  This function does
+/// not modify the CFG of the function at all.  All allocas must be from the
+/// same function.
+///
+/// If AST is specified, the specified tracker is updated to reflect changes
+/// made to the IR.
+///
+void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas,
+                           DominatorTree &DT, AliasSetTracker *AST) {
+  // If there is nothing to do, bail out...
+  if (Allocas.empty()) return;
+
+  PromoteMem2Reg(Allocas, DT, AST).run();
+}
diff --git a/final/lib/Transforms/Utils/SSAUpdater.cpp b/final/lib/Transforms/Utils/SSAUpdater.cpp
new file mode 100644
index 00000000000..3896d9851b2
--- /dev/null
+++ b/final/lib/Transforms/Utils/SSAUpdater.cpp
@@ -0,0 +1,511 @@
+//===- SSAUpdater.cpp - Unstructured SSA Update Tool ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ssaupdater"
+#include "llvm/Instructions.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
+using namespace llvm;
+
+typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
+static AvailableValsTy &getAvailableVals(void *AV) {
+  return *static_cast<AvailableValsTy*>(AV);
+}
+
+SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
+  : AV(0), ProtoType(0), ProtoName(), InsertedPHIs(NewPHI) {}
+
+SSAUpdater::~SSAUpdater() {
+  delete &getAvailableVals(AV);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates with type 'Ty'.  PHI nodes get a name based on 'Name'.
+void SSAUpdater::Initialize(const Type *Ty, StringRef Name) {
+  if (AV == 0)
+    AV = new AvailableValsTy();
+  else
+    getAvailableVals(AV).clear();
+  ProtoType = Ty;
+  ProtoName = Name;
+}
+
+/// HasValueForBlock - Return true if the SSAUpdater already has a value for
+/// the specified block.
+bool SSAUpdater::HasValueForBlock(BasicBlock *BB) const {
+  return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
+  assert(ProtoType != 0 && "Need to initialize SSAUpdater");
+  assert(ProtoType == V->getType() &&
+         "All rewritten values must have the same type");
+  getAvailableVals(AV)[BB] = V;
+}
+
+/// IsEquivalentPHI - Check if PHI has the same incoming value as specified
+/// in ValueMapping for each predecessor block.
+static bool IsEquivalentPHI(PHINode *PHI,
+                            DenseMap<BasicBlock*, Value*> &ValueMapping) {
+  unsigned PHINumValues = PHI->getNumIncomingValues();
+  if (PHINumValues != ValueMapping.size())
+    return false;
+
+  // Scan the phi to see if it matches.
+  for (unsigned i = 0, e = PHINumValues; i != e; ++i)
+    if (ValueMapping[PHI->getIncomingBlock(i)] !=
+        PHI->getIncomingValue(i)) {
+      return false;
+    }
+
+  return true;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
+  Value *Res = GetValueAtEndOfBlockInternal(BB);
+  return Res;
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB.  Consider code like this:
+///
+///      X1 = ...
+///   SomeBB:
+///      use(X)
+///      X2 = ...
+///      br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks.  However, the use of X happens in the *middle* of
+/// a block.  Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
+  // If there is no definition of the renamed variable in this block, just use
+  // GetValueAtEndOfBlock to do our work.
+  if (!HasValueForBlock(BB))
+    return GetValueAtEndOfBlock(BB);
+
+  // Otherwise, we have the hard case.  Get the live-in values for each
+  // predecessor.
+  SmallVector<std::pair<BasicBlock*, Value*>, 8> PredValues;
+  Value *SingularValue = 0;
+
+  // We can get our predecessor info by walking the pred_iterator list, but it
+  // is relatively slow.  If we already have PHI nodes in this block, walk one
+  // of them to get the predecessor list instead.
+  if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+    for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
+      Value *PredVal = GetValueAtEndOfBlock(PredBB);
+      PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+      // Compute SingularValue.
+      if (i == 0)
+        SingularValue = PredVal;
+      else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  } else {
+    bool isFirstPred = true;
+    for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+      BasicBlock *PredBB = *PI;
+      Value *PredVal = GetValueAtEndOfBlock(PredBB);
+      PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+      // Compute SingularValue.
+      if (isFirstPred) {
+        SingularValue = PredVal;
+        isFirstPred = false;
+      } else if (PredVal != SingularValue)
+        SingularValue = 0;
+    }
+  }
+
+  // If there are no predecessors, just return undef.
+  if (PredValues.empty())
+    return UndefValue::get(ProtoType);
+
+  // Otherwise, if all the merged values are the same, just use it.
+  if (SingularValue != 0)
+    return SingularValue;
+
+  // Otherwise, we do need a PHI: check to see if we already have one available
+  // in this block that produces the right value.
+  if (isa<PHINode>(BB->begin())) {
+    DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(),
+                                               PredValues.end());
+    PHINode *SomePHI;
+    for (BasicBlock::iterator It = BB->begin();
+         (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+      if (IsEquivalentPHI(SomePHI, ValueMapping))
+        return SomePHI;
+    }
+  }
+
+  // Ok, we have no way out, insert a new one now.
+  PHINode *InsertedPHI = PHINode::Create(ProtoType, ProtoName, &BB->front());
+  InsertedPHI->reserveOperandSpace(PredValues.size());
+
+  // Fill in all the predecessors of the PHI.
+  for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+    InsertedPHI->addIncoming(PredValues[i].second, PredValues[i].first);
+
+  // See if the PHI node can be merged to a single value.  This can happen in
+  // loop cases when we get a PHI of itself and one other value.
+  if (Value *V = SimplifyInstruction(InsertedPHI)) {
+    InsertedPHI->eraseFromParent();
+    return V;
+  }
+
+  // If the client wants to know about all new instructions, tell it.
+  if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+  DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+  return InsertedPHI;
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void SSAUpdater::RewriteUse(Use &U) {
+  Instruction *User = cast<Instruction>(U.getUser());
+
+  Value *V;
+  if (PHINode *UserPN = dyn_cast<PHINode>(User))
+    V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+  else
+    V = GetValueInMiddleOfBlock(User->getParent());
+
+  U.set(V);
+}
+
+/// RewriteUseAfterInsertions - Rewrite a use, just like RewriteUse.  However,
+/// this version of the method can rewrite uses in the same block as a
+/// definition, because it assumes that all uses of a value are below any
+/// inserted values.
+void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
+  Instruction *User = cast<Instruction>(U.getUser());
+  
+  Value *V;
+  if (PHINode *UserPN = dyn_cast<PHINode>(User))
+    V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
+  else
+    V = GetValueAtEndOfBlock(User->getParent());
+  
+  U.set(V);
+}
+
+/// PHIiter - Iterator for PHI operands.  This is used for the PHI_iterator
+/// in the SSAUpdaterImpl template.
+namespace {
+  class PHIiter {
+  private:
+    PHINode *PHI;
+    unsigned idx;
+
+  public:
+    explicit PHIiter(PHINode *P) // begin iterator
+      : PHI(P), idx(0) {}
+    PHIiter(PHINode *P, bool) // end iterator
+      : PHI(P), idx(PHI->getNumIncomingValues()) {}
+
+    PHIiter &operator++() { ++idx; return *this; } 
+    bool operator==(const PHIiter& x) const { return idx == x.idx; }
+    bool operator!=(const PHIiter& x) const { return !operator==(x); }
+    Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
+    BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
+  };
+}
+
+/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template,
+/// specialized for SSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<SSAUpdater> {
+public:
+  typedef BasicBlock BlkT;
+  typedef Value *ValT;
+  typedef PHINode PhiT;
+
+  typedef succ_iterator BlkSucc_iterator;
+  static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); }
+  static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); }
+
+  typedef PHIiter PHI_iterator;
+  static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+  static inline PHI_iterator PHI_end(PhiT *PHI) {
+    return PHI_iterator(PHI, true);
+  }
+
+  /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+  /// vector, set Info->NumPreds, and allocate space in Info->Preds.
+  static void FindPredecessorBlocks(BasicBlock *BB,
+                                    SmallVectorImpl<BasicBlock*> *Preds) {
+    // We can get our predecessor info by walking the pred_iterator list,
+    // but it is relatively slow.  If we already have PHI nodes in this
+    // block, walk one of them to get the predecessor list instead.
+    if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+      for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
+        Preds->push_back(SomePhi->getIncomingBlock(PI));
+    } else {
+      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+        Preds->push_back(*PI);
+    }
+  }
+
+  /// GetUndefVal - Get an undefined value of the same type as the value
+  /// being handled.
+  static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
+    return UndefValue::get(Updater->ProtoType);
+  }
+
+  /// CreateEmptyPHI - Create a new PHI instruction in the specified block.
+  /// Reserve space for the operands but do not fill them in yet.
+  static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
+                               SSAUpdater *Updater) {
+    PHINode *PHI = PHINode::Create(Updater->ProtoType, Updater->ProtoName,
+                                   &BB->front());
+    PHI->reserveOperandSpace(NumPreds);
+    return PHI;
+  }
+
+  /// AddPHIOperand - Add the specified value as an operand of the PHI for
+  /// the specified predecessor block.
+  static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) {
+    PHI->addIncoming(Val, Pred);
+  }
+
+  /// InstrIsPHI - Check if an instruction is a PHI.
+  ///
+  static PHINode *InstrIsPHI(Instruction *I) {
+    return dyn_cast<PHINode>(I);
+  }
+
+  /// ValueIsPHI - Check if a value is a PHI.
+  ///
+  static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
+    return dyn_cast<PHINode>(Val);
+  }
+
+  /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+  /// operands, i.e., it was just added.
+  static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) {
+    PHINode *PHI = ValueIsPHI(Val, Updater);
+    if (PHI && PHI->getNumIncomingValues() == 0)
+      return PHI;
+    return 0;
+  }
+
+  /// GetPHIValue - For the specified PHI instruction, return the value
+  /// that it defines.
+  static Value *GetPHIValue(PHINode *PHI) {
+    return PHI;
+  }
+};
+
+} // End llvm namespace
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it.  If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  if (Value *V = AvailableVals[BB])
+    return V;
+
+  SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+  return Impl.GetValue(BB);
+}
+
+//===----------------------------------------------------------------------===//
+// LoadAndStorePromoter Implementation
+//===----------------------------------------------------------------------===//
+
+LoadAndStorePromoter::
+LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
+                     SSAUpdater &S, StringRef BaseName) : SSA(S) {
+  if (Insts.empty()) return;
+  
+  Value *SomeVal;
+  if (LoadInst *LI = dyn_cast<LoadInst>(Insts[0]))
+    SomeVal = LI;
+  else
+    SomeVal = cast<StoreInst>(Insts[0])->getOperand(0);
+
+  if (BaseName.empty())
+    BaseName = SomeVal->getName();
+  SSA.Initialize(SomeVal->getType(), BaseName);
+}
+
+
+void LoadAndStorePromoter::
+run(const SmallVectorImpl<Instruction*> &Insts) const {
+  
+  // First step: bucket up uses of the alloca by the block they occur in.
+  // This is important because we have to handle multiple defs/uses in a block
+  // ourselves: SSAUpdater is purely for cross-block references.
+  // FIXME: Want a TinyVector<Instruction*> since there is often 0/1 element.
+  DenseMap<BasicBlock*, std::vector<Instruction*> > UsesByBlock;
+  
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+    Instruction *User = Insts[i];
+    UsesByBlock[User->getParent()].push_back(User);
+  }
+  
+  // Okay, now we can iterate over all the blocks in the function with uses,
+  // processing them.  Keep track of which loads are loading a live-in value.
+  // Walk the uses in the use-list order to be determinstic.
+  SmallVector<LoadInst*, 32> LiveInLoads;
+  DenseMap<Value*, Value*> ReplacedLoads;
+  
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+    Instruction *User = Insts[i];
+    BasicBlock *BB = User->getParent();
+    std::vector<Instruction*> &BlockUses = UsesByBlock[BB];
+    
+    // If this block has already been processed, ignore this repeat use.
+    if (BlockUses.empty()) continue;
+    
+    // Okay, this is the first use in the block.  If this block just has a
+    // single user in it, we can rewrite it trivially.
+    if (BlockUses.size() == 1) {
+      // If it is a store, it is a trivial def of the value in the block.
+      if (StoreInst *SI = dyn_cast<StoreInst>(User))
+        SSA.AddAvailableValue(BB, SI->getOperand(0));
+      else 
+        // Otherwise it is a load, queue it to rewrite as a live-in load.
+        LiveInLoads.push_back(cast<LoadInst>(User));
+      BlockUses.clear();
+      continue;
+    }
+    
+    // Otherwise, check to see if this block is all loads.
+    bool HasStore = false;
+    for (unsigned i = 0, e = BlockUses.size(); i != e; ++i) {
+      if (isa<StoreInst>(BlockUses[i])) {
+        HasStore = true;
+        break;
+      }
+    }
+    
+    // If so, we can queue them all as live in loads.  We don't have an
+    // efficient way to tell which on is first in the block and don't want to
+    // scan large blocks, so just add all loads as live ins.
+    if (!HasStore) {
+      for (unsigned i = 0, e = BlockUses.size(); i != e; ++i)
+        LiveInLoads.push_back(cast<LoadInst>(BlockUses[i]));
+      BlockUses.clear();
+      continue;
+    }
+    
+    // Otherwise, we have mixed loads and stores (or just a bunch of stores).
+    // Since SSAUpdater is purely for cross-block values, we need to determine
+    // the order of these instructions in the block.  If the first use in the
+    // block is a load, then it uses the live in value.  The last store defines
+    // the live out value.  We handle this by doing a linear scan of the block.
+    Value *StoredValue = 0;
+    for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) {
+      if (LoadInst *L = dyn_cast<LoadInst>(II)) {
+        // If this is a load from an unrelated pointer, ignore it.
+        if (!isInstInList(L, Insts)) continue;
+        
+        // If we haven't seen a store yet, this is a live in use, otherwise
+        // use the stored value.
+        if (StoredValue) {
+          replaceLoadWithValue(L, StoredValue);
+          L->replaceAllUsesWith(StoredValue);
+          ReplacedLoads[L] = StoredValue;
+        } else {
+          LiveInLoads.push_back(L);
+        }
+        continue;
+      }
+      
+      if (StoreInst *S = dyn_cast<StoreInst>(II)) {
+        // If this is a store to an unrelated pointer, ignore it.
+        if (!isInstInList(S, Insts)) continue;
+        
+        // Remember that this is the active value in the block.
+        StoredValue = S->getOperand(0);
+      }
+    }
+    
+    // The last stored value that happened is the live-out for the block.
+    assert(StoredValue && "Already checked that there is a store in block");
+    SSA.AddAvailableValue(BB, StoredValue);
+    BlockUses.clear();
+  }
+  
+  // Okay, now we rewrite all loads that use live-in values in the loop,
+  // inserting PHI nodes as necessary.
+  for (unsigned i = 0, e = LiveInLoads.size(); i != e; ++i) {
+    LoadInst *ALoad = LiveInLoads[i];
+    Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent());
+    replaceLoadWithValue(ALoad, NewVal);
+
+    // Avoid assertions in unreachable code.
+    if (NewVal == ALoad) NewVal = UndefValue::get(NewVal->getType());
+    ALoad->replaceAllUsesWith(NewVal);
+    ReplacedLoads[ALoad] = NewVal;
+  }
+  
+  // Allow the client to do stuff before we start nuking things.
+  doExtraRewritesBeforeFinalDeletion();
+  
+  // Now that everything is rewritten, delete the old instructions from the
+  // function.  They should all be dead now.
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+    Instruction *User = Insts[i];
+    
+    // If this is a load that still has uses, then the load must have been added
+    // as a live value in the SSAUpdate data structure for a block (e.g. because
+    // the loaded value was stored later).  In this case, we need to recursively
+    // propagate the updates until we get to the real value.
+    if (!User->use_empty()) {
+      Value *NewVal = ReplacedLoads[User];
+      assert(NewVal && "not a replaced load?");
+      
+      // Propagate down to the ultimate replacee.  The intermediately loads
+      // could theoretically already have been deleted, so we don't want to
+      // dereference the Value*'s.
+      DenseMap<Value*, Value*>::iterator RLI = ReplacedLoads.find(NewVal);
+      while (RLI != ReplacedLoads.end()) {
+        NewVal = RLI->second;
+        RLI = ReplacedLoads.find(NewVal);
+      }
+      
+      replaceLoadWithValue(cast<LoadInst>(User), NewVal);
+      User->replaceAllUsesWith(NewVal);
+    }
+    
+    instructionDeleted(User);
+    User->eraseFromParent();
+  }
+}
diff --git a/final/lib/Transforms/Utils/SimplifyCFG.cpp b/final/lib/Transforms/Utils/SimplifyCFG.cpp
new file mode 100644
index 00000000000..72fa468db60
--- /dev/null
+++ b/final/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -0,0 +1,2566 @@
+//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Peephole optimize the CFG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "simplifycfg"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <set>
+#include <map>
+using namespace llvm;
+
+static cl::opt<bool>
+DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false),
+       cl::desc("Duplicate return instructions into unconditional branches"));
+
+STATISTIC(NumSpeculations, "Number of speculative executed instructions");
+
+namespace {
+class SimplifyCFGOpt {
+  const TargetData *const TD;
+
+  Value *isValueEqualityComparison(TerminatorInst *TI);
+  BasicBlock *GetValueEqualityComparisonCases(TerminatorInst *TI,
+    std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases);
+  bool SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+                                                     BasicBlock *Pred);
+  bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI);
+
+  bool SimplifyReturn(ReturnInst *RI);
+  bool SimplifyUnwind(UnwindInst *UI);
+  bool SimplifyUnreachable(UnreachableInst *UI);
+  bool SimplifySwitch(SwitchInst *SI);
+  bool SimplifyIndirectBr(IndirectBrInst *IBI);
+  bool SimplifyUncondBranch(BranchInst *BI);
+  bool SimplifyCondBranch(BranchInst *BI);
+
+public:
+  explicit SimplifyCFGOpt(const TargetData *td) : TD(td) {}
+  bool run(BasicBlock *BB);
+};
+}
+
+/// SafeToMergeTerminators - Return true if it is safe to merge these two
+/// terminator instructions together.
+///
+static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
+  if (SI1 == SI2) return false;  // Can't merge with self!
+  
+  // It is not safe to merge these two switch instructions if they have a common
+  // successor, and if that successor has a PHI node, and if *that* PHI node has
+  // conflicting incoming values from the two switch blocks.
+  BasicBlock *SI1BB = SI1->getParent();
+  BasicBlock *SI2BB = SI2->getParent();
+  SmallPtrSet<BasicBlock*, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+  
+  for (succ_iterator I = succ_begin(SI2BB), E = succ_end(SI2BB); I != E; ++I)
+    if (SI1Succs.count(*I))
+      for (BasicBlock::iterator BBI = (*I)->begin();
+           isa<PHINode>(BBI); ++BBI) {
+        PHINode *PN = cast<PHINode>(BBI);
+        if (PN->getIncomingValueForBlock(SI1BB) !=
+            PN->getIncomingValueForBlock(SI2BB))
+          return false;
+      }
+        
+  return true;
+}
+
+/// AddPredecessorToBlock - Update PHI nodes in Succ to indicate that there will
+/// now be entries in it from the 'NewPred' block.  The values that will be
+/// flowing into the PHI nodes will be the same as those coming in from
+/// ExistPred, an existing predecessor of Succ.
+static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
+                                  BasicBlock *ExistPred) {
+  if (!isa<PHINode>(Succ->begin())) return; // Quick exit if nothing to do
+  
+  PHINode *PN;
+  for (BasicBlock::iterator I = Succ->begin();
+       (PN = dyn_cast<PHINode>(I)); ++I)
+    PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
+}
+
+
+/// GetIfCondition - Given a basic block (BB) with two predecessors (and at
+/// least one PHI node in it), check to see if the merge at this block is due
+/// to an "if condition".  If so, return the boolean condition that determines
+/// which entry into BB will be taken.  Also, return by references the block
+/// that will be entered from if the condition is true, and the block that will
+/// be entered if the condition is false.
+///
+/// This does no checking to see if the true/false blocks have large or unsavory
+/// instructions in them.
+static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
+                             BasicBlock *&IfFalse) {
+  PHINode *SomePHI = cast<PHINode>(BB->begin());
+  assert(SomePHI->getNumIncomingValues() == 2 &&
+         "Function can only handle blocks with 2 predecessors!");
+  BasicBlock *Pred1 = SomePHI->getIncomingBlock(0);
+  BasicBlock *Pred2 = SomePHI->getIncomingBlock(1);
+
+  // We can only handle branches.  Other control flow will be lowered to
+  // branches if possible anyway.
+  BranchInst *Pred1Br = dyn_cast<BranchInst>(Pred1->getTerminator());
+  BranchInst *Pred2Br = dyn_cast<BranchInst>(Pred2->getTerminator());
+  if (Pred1Br == 0 || Pred2Br == 0)
+    return 0;
+
+  // Eliminate code duplication by ensuring that Pred1Br is conditional if
+  // either are.
+  if (Pred2Br->isConditional()) {
+    // If both branches are conditional, we don't have an "if statement".  In
+    // reality, we could transform this case, but since the condition will be
+    // required anyway, we stand no chance of eliminating it, so the xform is
+    // probably not profitable.
+    if (Pred1Br->isConditional())
+      return 0;
+
+    std::swap(Pred1, Pred2);
+    std::swap(Pred1Br, Pred2Br);
+  }
+
+  if (Pred1Br->isConditional()) {
+    // The only thing we have to watch out for here is to make sure that Pred2
+    // doesn't have incoming edges from other blocks.  If it does, the condition
+    // doesn't dominate BB.
+    if (Pred2->getSinglePredecessor() == 0)
+      return 0;
+    
+    // If we found a conditional branch predecessor, make sure that it branches
+    // to BB and Pred2Br.  If it doesn't, this isn't an "if statement".
+    if (Pred1Br->getSuccessor(0) == BB &&
+        Pred1Br->getSuccessor(1) == Pred2) {
+      IfTrue = Pred1;
+      IfFalse = Pred2;
+    } else if (Pred1Br->getSuccessor(0) == Pred2 &&
+               Pred1Br->getSuccessor(1) == BB) {
+      IfTrue = Pred2;
+      IfFalse = Pred1;
+    } else {
+      // We know that one arm of the conditional goes to BB, so the other must
+      // go somewhere unrelated, and this must not be an "if statement".
+      return 0;
+    }
+
+    return Pred1Br->getCondition();
+  }
+
+  // Ok, if we got here, both predecessors end with an unconditional branch to
+  // BB.  Don't panic!  If both blocks only have a single (identical)
+  // predecessor, and THAT is a conditional branch, then we're all ok!
+  BasicBlock *CommonPred = Pred1->getSinglePredecessor();
+  if (CommonPred == 0 || CommonPred != Pred2->getSinglePredecessor())
+    return 0;
+
+  // Otherwise, if this is a conditional branch, then we can use it!
+  BranchInst *BI = dyn_cast<BranchInst>(CommonPred->getTerminator());
+  if (BI == 0) return 0;
+  
+  assert(BI->isConditional() && "Two successors but not conditional?");
+  if (BI->getSuccessor(0) == Pred1) {
+    IfTrue = Pred1;
+    IfFalse = Pred2;
+  } else {
+    IfTrue = Pred2;
+    IfFalse = Pred1;
+  }
+  return BI->getCondition();
+}
+
+/// DominatesMergePoint - If we have a merge point of an "if condition" as
+/// accepted above, return true if the specified value dominates the block.  We
+/// don't handle the true generality of domination here, just a special case
+/// which works well enough for us.
+///
+/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
+/// see if V (which must be an instruction) is cheap to compute and is
+/// non-trapping.  If both are true, the instruction is inserted into the set
+/// and true is returned.
+static bool DominatesMergePoint(Value *V, BasicBlock *BB,
+                                SmallPtrSet<Instruction*, 4> *AggressiveInsts) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (!I) {
+    // Non-instructions all dominate instructions, but not all constantexprs
+    // can be executed unconditionally.
+    if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
+      if (C->canTrap())
+        return false;
+    return true;
+  }
+  BasicBlock *PBB = I->getParent();
+
+  // We don't want to allow weird loops that might have the "if condition" in
+  // the bottom of this block.
+  if (PBB == BB) return false;
+
+  // If this instruction is defined in a block that contains an unconditional
+  // branch to BB, then it must be in the 'conditional' part of the "if
+  // statement".  If not, it definitely dominates the region.
+  BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
+  if (BI == 0 || BI->isConditional() || BI->getSuccessor(0) != BB)
+    return true;
+
+  // If we aren't allowing aggressive promotion anymore, then don't consider
+  // instructions in the 'if region'.
+  if (AggressiveInsts == 0) return false;
+  
+  // Okay, it looks like the instruction IS in the "condition".  Check to
+  // see if it's a cheap instruction to unconditionally compute, and if it
+  // only uses stuff defined outside of the condition.  If so, hoist it out.
+  if (!I->isSafeToSpeculativelyExecute())
+    return false;
+
+  switch (I->getOpcode()) {
+  default: return false;  // Cannot hoist this out safely.
+  case Instruction::Load:
+    // We have to check to make sure there are no instructions before the
+    // load in its basic block, as we are going to hoist the load out to its
+    // predecessor.
+    if (PBB->getFirstNonPHIOrDbg() != I)
+      return false;
+    break;
+  case Instruction::GetElementPtr:
+    // GEPs are cheap if all indices are constant.
+    if (!cast<GetElementPtrInst>(I)->hasAllConstantIndices())
+      return false;
+    break;
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+  case Instruction::ICmp:
+    break;   // These are all cheap and non-trapping instructions.
+  }
+
+  // Okay, we can only really hoist these out if their operands are not
+  // defined in the conditional region.
+  for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
+    if (!DominatesMergePoint(*i, BB, 0))
+      return false;
+  // Okay, it's safe to do this!  Remember this instruction.
+  AggressiveInsts->insert(I);
+  return true;
+}
+
+/// GetConstantInt - Extract ConstantInt from value, looking through IntToPtr
+/// and PointerNullValue. Return NULL if value is not a constant int.
+static ConstantInt *GetConstantInt(Value *V, const TargetData *TD) {
+  // Normal constant int.
+  ConstantInt *CI = dyn_cast<ConstantInt>(V);
+  if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
+    return CI;
+
+  // This is some kind of pointer constant. Turn it into a pointer-sized
+  // ConstantInt if possible.
+  const IntegerType *PtrTy = TD->getIntPtrType(V->getContext());
+
+  // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
+  if (isa<ConstantPointerNull>(V))
+    return ConstantInt::get(PtrTy, 0);
+
+  // IntToPtr const int.
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+    if (CE->getOpcode() == Instruction::IntToPtr)
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
+        // The constant is very likely to have the right type already.
+        if (CI->getType() == PtrTy)
+          return CI;
+        else
+          return cast<ConstantInt>
+            (ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
+      }
+  return 0;
+}
+
+/// GatherConstantCompares - Given a potentially 'or'd or 'and'd together
+/// collection of icmp eq/ne instructions that compare a value against a
+/// constant, return the value being compared, and stick the constant into the
+/// Values vector.
+static Value *
+GatherConstantCompares(Value *V, std::vector<ConstantInt*> &Vals, Value *&Extra,
+                       const TargetData *TD, bool isEQ, unsigned &UsedICmps) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return 0;
+  
+  // If this is an icmp against a constant, handle this as one of the cases.
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
+    if (ConstantInt *C = GetConstantInt(I->getOperand(1), TD)) {
+      if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ:ICmpInst::ICMP_NE)) {
+        UsedICmps++;
+        Vals.push_back(C);
+        return I->getOperand(0);
+      }
+      
+      // If we have "x ult 3" comparison, for example, then we can add 0,1,2 to
+      // the set.
+      ConstantRange Span =
+        ConstantRange::makeICmpRegion(ICI->getPredicate(), C->getValue());
+      
+      // If this is an and/!= check then we want to optimize "x ugt 2" into
+      // x != 0 && x != 1.
+      if (!isEQ)
+        Span = Span.inverse();
+      
+      // If there are a ton of values, we don't want to make a ginormous switch.
+      if (Span.getSetSize().ugt(8) || Span.isEmptySet() ||
+          // We don't handle wrapped sets yet.
+          Span.isWrappedSet())
+        return 0;
+      
+      for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
+        Vals.push_back(ConstantInt::get(V->getContext(), Tmp));
+      UsedICmps++;
+      return I->getOperand(0);
+    }
+    return 0;
+  }
+  
+  // Otherwise, we can only handle an | or &, depending on isEQ.
+  if (I->getOpcode() != (isEQ ? Instruction::Or : Instruction::And))
+    return 0;
+  
+  unsigned NumValsBeforeLHS = Vals.size();
+  unsigned UsedICmpsBeforeLHS = UsedICmps;
+  if (Value *LHS = GatherConstantCompares(I->getOperand(0), Vals, Extra, TD,
+                                          isEQ, UsedICmps)) {
+    unsigned NumVals = Vals.size();
+    unsigned UsedICmpsBeforeRHS = UsedICmps;
+    if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+                                            isEQ, UsedICmps)) {
+      if (LHS == RHS)
+        return LHS;
+      Vals.resize(NumVals);
+      UsedICmps = UsedICmpsBeforeRHS;
+    }
+
+    // The RHS of the or/and can't be folded in and we haven't used "Extra" yet,
+    // set it and return success.
+    if (Extra == 0 || Extra == I->getOperand(1)) {
+      Extra = I->getOperand(1);
+      return LHS;
+    }
+    
+    Vals.resize(NumValsBeforeLHS);
+    UsedICmps = UsedICmpsBeforeLHS;
+    return 0;
+  }
+  
+  // If the LHS can't be folded in, but Extra is available and RHS can, try to
+  // use LHS as Extra.
+  if (Extra == 0 || Extra == I->getOperand(0)) {
+    Value *OldExtra = Extra;
+    Extra = I->getOperand(0);
+    if (Value *RHS = GatherConstantCompares(I->getOperand(1), Vals, Extra, TD,
+                                            isEQ, UsedICmps))
+      return RHS;
+    assert(Vals.size() == NumValsBeforeLHS);
+    Extra = OldExtra;
+  }
+  
+  return 0;
+}
+      
+static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
+  Instruction* Cond = 0;
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    Cond = dyn_cast<Instruction>(SI->getCondition());
+  } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+    if (BI->isConditional())
+      Cond = dyn_cast<Instruction>(BI->getCondition());
+  } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
+    Cond = dyn_cast<Instruction>(IBI->getAddress());
+  }
+
+  TI->eraseFromParent();
+  if (Cond) RecursivelyDeleteTriviallyDeadInstructions(Cond);
+}
+
+/// isValueEqualityComparison - Return true if the specified terminator checks
+/// to see if a value is equal to constant integer value.
+Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {
+  Value *CV = 0;
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    // Do not permit merging of large switch instructions into their
+    // predecessors unless there is only one predecessor.
+    if (SI->getNumSuccessors()*std::distance(pred_begin(SI->getParent()),
+                                             pred_end(SI->getParent())) <= 128)
+      CV = SI->getCondition();
+  } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
+    if (BI->isConditional() && BI->getCondition()->hasOneUse())
+      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+        if ((ICI->getPredicate() == ICmpInst::ICMP_EQ ||
+             ICI->getPredicate() == ICmpInst::ICMP_NE) &&
+            GetConstantInt(ICI->getOperand(1), TD))
+          CV = ICI->getOperand(0);
+
+  // Unwrap any lossless ptrtoint cast.
+  if (TD && CV && CV->getType() == TD->getIntPtrType(CV->getContext()))
+    if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV))
+      CV = PTII->getOperand(0);
+  return CV;
+}
+
+/// GetValueEqualityComparisonCases - Given a value comparison instruction,
+/// decode all of the 'cases' that it represents and return the 'default' block.
+BasicBlock *SimplifyCFGOpt::
+GetValueEqualityComparisonCases(TerminatorInst *TI,
+                                std::vector<std::pair<ConstantInt*,
+                                                      BasicBlock*> > &Cases) {
+  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    Cases.reserve(SI->getNumCases());
+    for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+      Cases.push_back(std::make_pair(SI->getCaseValue(i), SI->getSuccessor(i)));
+    return SI->getDefaultDest();
+  }
+
+  BranchInst *BI = cast<BranchInst>(TI);
+  ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
+  Cases.push_back(std::make_pair(GetConstantInt(ICI->getOperand(1), TD),
+                                 BI->getSuccessor(ICI->getPredicate() ==
+                                                  ICmpInst::ICMP_NE)));
+  return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
+}
+
+
+/// EliminateBlockCases - Given a vector of bb/value pairs, remove any entries
+/// in the list that match the specified block.
+static void EliminateBlockCases(BasicBlock *BB,
+               std::vector<std::pair<ConstantInt*, BasicBlock*> > &Cases) {
+  for (unsigned i = 0, e = Cases.size(); i != e; ++i)
+    if (Cases[i].second == BB) {
+      Cases.erase(Cases.begin()+i);
+      --i; --e;
+    }
+}
+
+/// ValuesOverlap - Return true if there are any keys in C1 that exist in C2 as
+/// well.
+static bool
+ValuesOverlap(std::vector<std::pair<ConstantInt*, BasicBlock*> > &C1,
+              std::vector<std::pair<ConstantInt*, BasicBlock*> > &C2) {
+  std::vector<std::pair<ConstantInt*, BasicBlock*> > *V1 = &C1, *V2 = &C2;
+
+  // Make V1 be smaller than V2.
+  if (V1->size() > V2->size())
+    std::swap(V1, V2);
+
+  if (V1->size() == 0) return false;
+  if (V1->size() == 1) {
+    // Just scan V2.
+    ConstantInt *TheVal = (*V1)[0].first;
+    for (unsigned i = 0, e = V2->size(); i != e; ++i)
+      if (TheVal == (*V2)[i].first)
+        return true;
+  }
+
+  // Otherwise, just sort both lists and compare element by element.
+  array_pod_sort(V1->begin(), V1->end());
+  array_pod_sort(V2->begin(), V2->end());
+  unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
+  while (i1 != e1 && i2 != e2) {
+    if ((*V1)[i1].first == (*V2)[i2].first)
+      return true;
+    if ((*V1)[i1].first < (*V2)[i2].first)
+      ++i1;
+    else
+      ++i2;
+  }
+  return false;
+}
+
+/// SimplifyEqualityComparisonWithOnlyPredecessor - If TI is known to be a
+/// terminator instruction and its block is known to only have a single
+/// predecessor block, check to see if that predecessor is also a value
+/// comparison with the same value, and if that comparison determines the
+/// outcome of this comparison.  If so, simplify TI.  This does a very limited
+/// form of jump threading.
+bool SimplifyCFGOpt::
+SimplifyEqualityComparisonWithOnlyPredecessor(TerminatorInst *TI,
+                                              BasicBlock *Pred) {
+  Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
+  if (!PredVal) return false;  // Not a value comparison in predecessor.
+
+  Value *ThisVal = isValueEqualityComparison(TI);
+  assert(ThisVal && "This isn't a value comparison!!");
+  if (ThisVal != PredVal) return false;  // Different predicates.
+
+  // Find out information about when control will move from Pred to TI's block.
+  std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases;
+  BasicBlock *PredDef = GetValueEqualityComparisonCases(Pred->getTerminator(),
+                                                        PredCases);
+  EliminateBlockCases(PredDef, PredCases);  // Remove default from cases.
+
+  // Find information about how control leaves this block.
+  std::vector<std::pair<ConstantInt*, BasicBlock*> > ThisCases;
+  BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
+  EliminateBlockCases(ThisDef, ThisCases);  // Remove default from cases.
+
+  // If TI's block is the default block from Pred's comparison, potentially
+  // simplify TI based on this knowledge.
+  if (PredDef == TI->getParent()) {
+    // If we are here, we know that the value is none of those cases listed in
+    // PredCases.  If there are any cases in ThisCases that are in PredCases, we
+    // can simplify TI.
+    if (!ValuesOverlap(PredCases, ThisCases))
+      return false;
+    
+    if (isa<BranchInst>(TI)) {
+      // Okay, one of the successors of this condbr is dead.  Convert it to a
+      // uncond br.
+      assert(ThisCases.size() == 1 && "Branch can only have one case!");
+      // Insert the new branch.
+      Instruction *NI = BranchInst::Create(ThisDef, TI);
+      (void) NI;
+
+      // Remove PHI node entries for the dead edge.
+      ThisCases[0].second->removePredecessor(TI->getParent());
+
+      DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+           << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+      EraseTerminatorInstAndDCECond(TI);
+      return true;
+    }
+      
+    SwitchInst *SI = cast<SwitchInst>(TI);
+    // Okay, TI has cases that are statically dead, prune them away.
+    SmallPtrSet<Constant*, 16> DeadCases;
+    for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+      DeadCases.insert(PredCases[i].first);
+
+    DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+                 << "Through successor TI: " << *TI);
+
+    for (unsigned i = SI->getNumCases()-1; i != 0; --i)
+      if (DeadCases.count(SI->getCaseValue(i))) {
+        SI->getSuccessor(i)->removePredecessor(TI->getParent());
+        SI->removeCase(i);
+      }
+
+    DEBUG(dbgs() << "Leaving: " << *TI << "\n");
+    return true;
+  }
+  
+  // Otherwise, TI's block must correspond to some matched value.  Find out
+  // which value (or set of values) this is.
+  ConstantInt *TIV = 0;
+  BasicBlock *TIBB = TI->getParent();
+  for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+    if (PredCases[i].second == TIBB) {
+      if (TIV != 0)
+        return false;  // Cannot handle multiple values coming to this block.
+      TIV = PredCases[i].first;
+    }
+  assert(TIV && "No edge from pred to succ?");
+
+  // Okay, we found the one constant that our value can be if we get into TI's
+  // BB.  Find out which successor will unconditionally be branched to.
+  BasicBlock *TheRealDest = 0;
+  for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
+    if (ThisCases[i].first == TIV) {
+      TheRealDest = ThisCases[i].second;
+      break;
+    }
+
+  // If not handled by any explicit cases, it is handled by the default case.
+  if (TheRealDest == 0) TheRealDest = ThisDef;
+
+  // Remove PHI node entries for dead edges.
+  BasicBlock *CheckEdge = TheRealDest;
+  for (succ_iterator SI = succ_begin(TIBB), e = succ_end(TIBB); SI != e; ++SI)
+    if (*SI != CheckEdge)
+      (*SI)->removePredecessor(TIBB);
+    else
+      CheckEdge = 0;
+
+  // Insert the new branch.
+  Instruction *NI = BranchInst::Create(TheRealDest, TI);
+  (void) NI;
+
+  DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
+            << "Through successor TI: " << *TI << "Leaving: " << *NI << "\n");
+
+  EraseTerminatorInstAndDCECond(TI);
+  return true;
+}
+
+namespace {
+  /// ConstantIntOrdering - This class implements a stable ordering of constant
+  /// integers that does not depend on their address.  This is important for
+  /// applications that sort ConstantInt's to ensure uniqueness.
+  struct ConstantIntOrdering {
+    bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
+      return LHS->getValue().ult(RHS->getValue());
+    }
+  };
+}
+
+static int ConstantIntSortPredicate(const void *P1, const void *P2) {
+  const ConstantInt *LHS = *(const ConstantInt**)P1;
+  const ConstantInt *RHS = *(const ConstantInt**)P2;
+  if (LHS->getValue().ult(RHS->getValue()))
+    return 1;
+  if (LHS->getValue() == RHS->getValue())
+    return 0;
+  return -1;
+}
+
+/// FoldValueComparisonIntoPredecessors - The specified terminator is a value
+/// equality comparison instruction (either a switch or a branch on "X == c").
+/// See if any of the predecessors of the terminator block are value comparisons
+/// on the same value.  If so, and if safe to do so, fold them together.
+bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
+  BasicBlock *BB = TI->getParent();
+  Value *CV = isValueEqualityComparison(TI);  // CondVal
+  assert(CV && "Not a comparison?");
+  bool Changed = false;
+
+  SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB));
+  while (!Preds.empty()) {
+    BasicBlock *Pred = Preds.pop_back_val();
+
+    // See if the predecessor is a comparison with the same value.
+    TerminatorInst *PTI = Pred->getTerminator();
+    Value *PCV = isValueEqualityComparison(PTI);  // PredCondVal
+
+    if (PCV == CV && SafeToMergeTerminators(TI, PTI)) {
+      // Figure out which 'cases' to copy from SI to PSI.
+      std::vector<std::pair<ConstantInt*, BasicBlock*> > BBCases;
+      BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
+
+      std::vector<std::pair<ConstantInt*, BasicBlock*> > PredCases;
+      BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
+
+      // Based on whether the default edge from PTI goes to BB or not, fill in
+      // PredCases and PredDefault with the new switch cases we would like to
+      // build.
+      SmallVector<BasicBlock*, 8> NewSuccessors;
+
+      if (PredDefault == BB) {
+        // If this is the default destination from PTI, only the edges in TI
+        // that don't occur in PTI, or that branch to BB will be activated.
+        std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+        for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+          if (PredCases[i].second != BB)
+            PTIHandled.insert(PredCases[i].first);
+          else {
+            // The default destination is BB, we don't need explicit targets.
+            std::swap(PredCases[i], PredCases.back());
+            PredCases.pop_back();
+            --i; --e;
+          }
+
+        // Reconstruct the new switch statement we will be building.
+        if (PredDefault != BBDefault) {
+          PredDefault->removePredecessor(Pred);
+          PredDefault = BBDefault;
+          NewSuccessors.push_back(BBDefault);
+        }
+        for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+          if (!PTIHandled.count(BBCases[i].first) &&
+              BBCases[i].second != BBDefault) {
+            PredCases.push_back(BBCases[i]);
+            NewSuccessors.push_back(BBCases[i].second);
+          }
+
+      } else {
+        // If this is not the default destination from PSI, only the edges
+        // in SI that occur in PSI with a destination of BB will be
+        // activated.
+        std::set<ConstantInt*, ConstantIntOrdering> PTIHandled;
+        for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+          if (PredCases[i].second == BB) {
+            PTIHandled.insert(PredCases[i].first);
+            std::swap(PredCases[i], PredCases.back());
+            PredCases.pop_back();
+            --i; --e;
+          }
+
+        // Okay, now we know which constants were sent to BB from the
+        // predecessor.  Figure out where they will all go now.
+        for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
+          if (PTIHandled.count(BBCases[i].first)) {
+            // If this is one we are capable of getting...
+            PredCases.push_back(BBCases[i]);
+            NewSuccessors.push_back(BBCases[i].second);
+            PTIHandled.erase(BBCases[i].first);// This constant is taken care of
+          }
+
+        // If there are any constants vectored to BB that TI doesn't handle,
+        // they must go to the default destination of TI.
+        for (std::set<ConstantInt*, ConstantIntOrdering>::iterator I = 
+                                    PTIHandled.begin(),
+               E = PTIHandled.end(); I != E; ++I) {
+          PredCases.push_back(std::make_pair(*I, BBDefault));
+          NewSuccessors.push_back(BBDefault);
+        }
+      }
+
+      // Okay, at this point, we know which new successor Pred will get.  Make
+      // sure we update the number of entries in the PHI nodes for these
+      // successors.
+      for (unsigned i = 0, e = NewSuccessors.size(); i != e; ++i)
+        AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
+
+      // Convert pointer to int before we switch.
+      if (CV->getType()->isPointerTy()) {
+        assert(TD && "Cannot switch on pointer without TargetData");
+        CV = new PtrToIntInst(CV, TD->getIntPtrType(CV->getContext()),
+                              "magicptr", PTI);
+      }
+
+      // Now that the successors are updated, create the new Switch instruction.
+      SwitchInst *NewSI = SwitchInst::Create(CV, PredDefault,
+                                             PredCases.size(), PTI);
+      for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
+        NewSI->addCase(PredCases[i].first, PredCases[i].second);
+
+      EraseTerminatorInstAndDCECond(PTI);
+
+      // Okay, last check.  If BB is still a successor of PSI, then we must
+      // have an infinite loop case.  If so, add an infinitely looping block
+      // to handle the case to preserve the behavior of the code.
+      BasicBlock *InfLoopBlock = 0;
+      for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
+        if (NewSI->getSuccessor(i) == BB) {
+          if (InfLoopBlock == 0) {
+            // Insert it at the end of the function, because it's either code,
+            // or it won't matter if it's hot. :)
+            InfLoopBlock = BasicBlock::Create(BB->getContext(),
+                                              "infloop", BB->getParent());
+            BranchInst::Create(InfLoopBlock, InfLoopBlock);
+          }
+          NewSI->setSuccessor(i, InfLoopBlock);
+        }
+
+      Changed = true;
+    }
+  }
+  return Changed;
+}
+
+// isSafeToHoistInvoke - If we would need to insert a select that uses the
+// value of this invoke (comments in HoistThenElseCodeToIf explain why we
+// would need to do this), we can't hoist the invoke, as there is nowhere
+// to put the select in this case.
+static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
+                                Instruction *I1, Instruction *I2) {
+  for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+    PHINode *PN;
+    for (BasicBlock::iterator BBI = SI->begin();
+         (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+      Value *BB1V = PN->getIncomingValueForBlock(BB1);
+      Value *BB2V = PN->getIncomingValueForBlock(BB2);
+      if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
+/// BB2, hoist any common code in the two blocks up into the branch block.  The
+/// caller of this function guarantees that BI's block dominates BB1 and BB2.
+static bool HoistThenElseCodeToIf(BranchInst *BI) {
+  // This does very trivial matching, with limited scanning, to find identical
+  // instructions in the two blocks.  In particular, we don't want to get into
+  // O(M*N) situations here where M and N are the sizes of BB1 and BB2.  As
+  // such, we currently just scan for obviously identical instructions in an
+  // identical order.
+  BasicBlock *BB1 = BI->getSuccessor(0);  // The true destination.
+  BasicBlock *BB2 = BI->getSuccessor(1);  // The false destination
+
+  BasicBlock::iterator BB1_Itr = BB1->begin();
+  BasicBlock::iterator BB2_Itr = BB2->begin();
+
+  Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++;
+  while (isa<DbgInfoIntrinsic>(I1))
+    I1 = BB1_Itr++;
+  while (isa<DbgInfoIntrinsic>(I2))
+    I2 = BB2_Itr++;
+  if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) ||
+      !I1->isIdenticalToWhenDefined(I2) ||
+      (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
+    return false;
+
+  // If we get here, we can hoist at least one instruction.
+  BasicBlock *BIParent = BI->getParent();
+
+  do {
+    // If we are hoisting the terminator instruction, don't move one (making a
+    // broken BB), instead clone it, and remove BI.
+    if (isa<TerminatorInst>(I1))
+      goto HoistTerminator;
+
+    // For a normal instruction, we just move one to right before the branch,
+    // then replace all uses of the other with the first.  Finally, we remove
+    // the now redundant second instruction.
+    BIParent->getInstList().splice(BI, BB1->getInstList(), I1);
+    if (!I2->use_empty())
+      I2->replaceAllUsesWith(I1);
+    I1->intersectOptionalDataWith(I2);
+    I2->eraseFromParent();
+
+    I1 = BB1_Itr++;
+    while (isa<DbgInfoIntrinsic>(I1))
+      I1 = BB1_Itr++;
+    I2 = BB2_Itr++;
+    while (isa<DbgInfoIntrinsic>(I2))
+      I2 = BB2_Itr++;
+  } while (I1->getOpcode() == I2->getOpcode() &&
+           I1->isIdenticalToWhenDefined(I2));
+
+  return true;
+
+HoistTerminator:
+  // It may not be possible to hoist an invoke.
+  if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
+    return true;
+
+  // Okay, it is safe to hoist the terminator.
+  Instruction *NT = I1->clone();
+  BIParent->getInstList().insert(BI, NT);
+  if (!NT->getType()->isVoidTy()) {
+    I1->replaceAllUsesWith(NT);
+    I2->replaceAllUsesWith(NT);
+    NT->takeName(I1);
+  }
+
+  // Hoisting one of the terminators from our successor is a great thing.
+  // Unfortunately, the successors of the if/else blocks may have PHI nodes in
+  // them.  If they do, all PHI entries for BB1/BB2 must agree for all PHI
+  // nodes, so we insert select instruction to compute the final result.
+  std::map<std::pair<Value*,Value*>, SelectInst*> InsertedSelects;
+  for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+    PHINode *PN;
+    for (BasicBlock::iterator BBI = SI->begin();
+         (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+      Value *BB1V = PN->getIncomingValueForBlock(BB1);
+      Value *BB2V = PN->getIncomingValueForBlock(BB2);
+      if (BB1V == BB2V) continue;
+      
+      // These values do not agree.  Insert a select instruction before NT
+      // that determines the right value.
+      SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
+      if (SI == 0)
+        SI = SelectInst::Create(BI->getCondition(), BB1V, BB2V,
+                                BB1V->getName()+"."+BB2V->getName(), NT);
+      // Make the PHI node use the select for all incoming values for BB1/BB2
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2)
+          PN->setIncomingValue(i, SI);
+    }
+  }
+
+  // Update any PHI nodes in our new successors.
+  for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI)
+    AddPredecessorToBlock(*SI, BIParent, BB1);
+
+  EraseTerminatorInstAndDCECond(BI);
+  return true;
+}
+
+/// SpeculativelyExecuteBB - Given a conditional branch that goes to BB1
+/// and an BB2 and the only successor of BB1 is BB2, hoist simple code
+/// (for now, restricted to a single instruction that's side effect free) from
+/// the BB1 into the branch block to speculatively execute it.
+static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
+  // Only speculatively execution a single instruction (not counting the
+  // terminator) for now.
+  Instruction *HInst = NULL;
+  Instruction *Term = BB1->getTerminator();
+  for (BasicBlock::iterator BBI = BB1->begin(), BBE = BB1->end();
+       BBI != BBE; ++BBI) {
+    Instruction *I = BBI;
+    // Skip debug info.
+    if (isa<DbgInfoIntrinsic>(I)) continue;
+    if (I == Term) break;
+
+    if (HInst)
+      return false;
+    HInst = I;
+  }
+  if (!HInst)
+    return false;
+
+  // Be conservative for now. FP select instruction can often be expensive.
+  Value *BrCond = BI->getCondition();
+  if (isa<FCmpInst>(BrCond))
+    return false;
+
+  // If BB1 is actually on the false edge of the conditional branch, remember
+  // to swap the select operands later.
+  bool Invert = false;
+  if (BB1 != BI->getSuccessor(0)) {
+    assert(BB1 == BI->getSuccessor(1) && "No edge from 'if' block?");
+    Invert = true;
+  }
+
+  // Turn
+  // BB:
+  //     %t1 = icmp
+  //     br i1 %t1, label %BB1, label %BB2
+  // BB1:
+  //     %t3 = add %t2, c
+  //     br label BB2
+  // BB2:
+  // =>
+  // BB:
+  //     %t1 = icmp
+  //     %t4 = add %t2, c
+  //     %t3 = select i1 %t1, %t2, %t3
+  switch (HInst->getOpcode()) {
+  default: return false;  // Not safe / profitable to hoist.
+  case Instruction::Add:
+  case Instruction::Sub:
+    // Not worth doing for vector ops.
+    if (HInst->getType()->isVectorTy())
+      return false;
+    break;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    // Don't mess with vector operations.
+    if (HInst->getType()->isVectorTy())
+      return false;
+    break;   // These are all cheap and non-trapping instructions.
+  }
+  
+  // If the instruction is obviously dead, don't try to predicate it.
+  if (HInst->use_empty()) {
+    HInst->eraseFromParent();
+    return true;
+  }
+
+  // Can we speculatively execute the instruction? And what is the value 
+  // if the condition is false? Consider the phi uses, if the incoming value
+  // from the "if" block are all the same V, then V is the value of the
+  // select if the condition is false.
+  BasicBlock *BIParent = BI->getParent();
+  SmallVector<PHINode*, 4> PHIUses;
+  Value *FalseV = NULL;
+  
+  BasicBlock *BB2 = BB1->getTerminator()->getSuccessor(0);
+  for (Value::use_iterator UI = HInst->use_begin(), E = HInst->use_end();
+       UI != E; ++UI) {
+    // Ignore any user that is not a PHI node in BB2.  These can only occur in
+    // unreachable blocks, because they would not be dominated by the instr.
+    PHINode *PN = dyn_cast<PHINode>(*UI);
+    if (!PN || PN->getParent() != BB2)
+      return false;
+    PHIUses.push_back(PN);
+    
+    Value *PHIV = PN->getIncomingValueForBlock(BIParent);
+    if (!FalseV)
+      FalseV = PHIV;
+    else if (FalseV != PHIV)
+      return false;  // Inconsistent value when condition is false.
+  }
+  
+  assert(FalseV && "Must have at least one user, and it must be a PHI");
+
+  // Do not hoist the instruction if any of its operands are defined but not
+  // used in this BB. The transformation will prevent the operand from
+  // being sunk into the use block.
+  for (User::op_iterator i = HInst->op_begin(), e = HInst->op_end(); 
+       i != e; ++i) {
+    Instruction *OpI = dyn_cast<Instruction>(*i);
+    if (OpI && OpI->getParent() == BIParent &&
+        !OpI->isUsedInBasicBlock(BIParent))
+      return false;
+  }
+
+  // If we get here, we can hoist the instruction. Try to place it
+  // before the icmp instruction preceding the conditional branch.
+  BasicBlock::iterator InsertPos = BI;
+  if (InsertPos != BIParent->begin())
+    --InsertPos;
+  // Skip debug info between condition and branch.
+  while (InsertPos != BIParent->begin() && isa<DbgInfoIntrinsic>(InsertPos))
+    --InsertPos;
+  if (InsertPos == BrCond && !isa<PHINode>(BrCond)) {
+    SmallPtrSet<Instruction *, 4> BB1Insns;
+    for(BasicBlock::iterator BB1I = BB1->begin(), BB1E = BB1->end(); 
+        BB1I != BB1E; ++BB1I) 
+      BB1Insns.insert(BB1I);
+    for(Value::use_iterator UI = BrCond->use_begin(), UE = BrCond->use_end();
+        UI != UE; ++UI) {
+      Instruction *Use = cast<Instruction>(*UI);
+      if (!BB1Insns.count(Use)) continue;
+      
+      // If BrCond uses the instruction that place it just before
+      // branch instruction.
+      InsertPos = BI;
+      break;
+    }
+  } else
+    InsertPos = BI;
+  BIParent->getInstList().splice(InsertPos, BB1->getInstList(), HInst);
+
+  // Create a select whose true value is the speculatively executed value and
+  // false value is the previously determined FalseV.
+  SelectInst *SI;
+  if (Invert)
+    SI = SelectInst::Create(BrCond, FalseV, HInst,
+                            FalseV->getName() + "." + HInst->getName(), BI);
+  else
+    SI = SelectInst::Create(BrCond, HInst, FalseV,
+                            HInst->getName() + "." + FalseV->getName(), BI);
+
+  // Make the PHI node use the select for all incoming values for "then" and
+  // "if" blocks.
+  for (unsigned i = 0, e = PHIUses.size(); i != e; ++i) {
+    PHINode *PN = PHIUses[i];
+    for (unsigned j = 0, ee = PN->getNumIncomingValues(); j != ee; ++j)
+      if (PN->getIncomingBlock(j) == BB1 || PN->getIncomingBlock(j) == BIParent)
+        PN->setIncomingValue(j, SI);
+  }
+
+  ++NumSpeculations;
+  return true;
+}
+
+/// BlockIsSimpleEnoughToThreadThrough - Return true if we can thread a branch
+/// across this block.
+static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
+  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
+  unsigned Size = 0;
+  
+  for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+    if (isa<DbgInfoIntrinsic>(BBI))
+      continue;
+    if (Size > 10) return false;  // Don't clone large BB's.
+    ++Size;
+    
+    // We can only support instructions that do not define values that are
+    // live outside of the current basic block.
+    for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+         UI != E; ++UI) {
+      Instruction *U = cast<Instruction>(*UI);
+      if (U->getParent() != BB || isa<PHINode>(U)) return false;
+    }
+    
+    // Looks ok, continue checking.
+  }
+
+  return true;
+}
+
+/// FoldCondBranchOnPHI - If we have a conditional branch on a PHI node value
+/// that is defined in the same block as the branch and if any PHI entries are
+/// constants, thread edges corresponding to that entry to be branches to their
+/// ultimate destination.
+static bool FoldCondBranchOnPHI(BranchInst *BI, const TargetData *TD) {
+  BasicBlock *BB = BI->getParent();
+  PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
+  // NOTE: we currently cannot transform this case if the PHI node is used
+  // outside of the block.
+  if (!PN || PN->getParent() != BB || !PN->hasOneUse())
+    return false;
+  
+  // Degenerate case of a single entry PHI.
+  if (PN->getNumIncomingValues() == 1) {
+    FoldSingleEntryPHINodes(PN->getParent());
+    return true;    
+  }
+
+  // Now we know that this block has multiple preds and two succs.
+  if (!BlockIsSimpleEnoughToThreadThrough(BB)) return false;
+  
+  // Okay, this is a simple enough basic block.  See if any phi values are
+  // constants.
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
+    if (CB == 0 || !CB->getType()->isIntegerTy(1)) continue;
+    
+    // Okay, we now know that all edges from PredBB should be revectored to
+    // branch to RealDest.
+    BasicBlock *PredBB = PN->getIncomingBlock(i);
+    BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
+    
+    if (RealDest == BB) continue;  // Skip self loops.
+    
+    // The dest block might have PHI nodes, other predecessors and other
+    // difficult cases.  Instead of being smart about this, just insert a new
+    // block that jumps to the destination block, effectively splitting
+    // the edge we are about to create.
+    BasicBlock *EdgeBB = BasicBlock::Create(BB->getContext(),
+                                            RealDest->getName()+".critedge",
+                                            RealDest->getParent(), RealDest);
+    BranchInst::Create(RealDest, EdgeBB);
+    
+    // Update PHI nodes.
+    AddPredecessorToBlock(RealDest, EdgeBB, BB);
+
+    // BB may have instructions that are being threaded over.  Clone these
+    // instructions into EdgeBB.  We know that there will be no uses of the
+    // cloned instructions outside of EdgeBB.
+    BasicBlock::iterator InsertPt = EdgeBB->begin();
+    DenseMap<Value*, Value*> TranslateMap;  // Track translated values.
+    for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
+      if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+        TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
+        continue;
+      }
+      // Clone the instruction.
+      Instruction *N = BBI->clone();
+      if (BBI->hasName()) N->setName(BBI->getName()+".c");
+      
+      // Update operands due to translation.
+      for (User::op_iterator i = N->op_begin(), e = N->op_end();
+           i != e; ++i) {
+        DenseMap<Value*, Value*>::iterator PI = TranslateMap.find(*i);
+        if (PI != TranslateMap.end())
+          *i = PI->second;
+      }
+      
+      // Check for trivial simplification.
+      if (Value *V = SimplifyInstruction(N, TD)) {
+        TranslateMap[BBI] = V;
+        delete N;   // Instruction folded away, don't need actual inst
+      } else {
+        // Insert the new instruction into its new home.
+        EdgeBB->getInstList().insert(InsertPt, N);
+        if (!BBI->use_empty())
+          TranslateMap[BBI] = N;
+      }
+    }
+
+    // Loop over all of the edges from PredBB to BB, changing them to branch
+    // to EdgeBB instead.
+    TerminatorInst *PredBBTI = PredBB->getTerminator();
+    for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
+      if (PredBBTI->getSuccessor(i) == BB) {
+        BB->removePredecessor(PredBB);
+        PredBBTI->setSuccessor(i, EdgeBB);
+      }
+    
+    // Recurse, simplifying any other constants.
+    return FoldCondBranchOnPHI(BI, TD) | true;
+  }
+
+  return false;
+}
+
+/// FoldTwoEntryPHINode - Given a BB that starts with the specified two-entry
+/// PHI node, see if we can eliminate it.
+static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) {
+  // Ok, this is a two entry PHI node.  Check to see if this is a simple "if
+  // statement", which has a very simple dominance structure.  Basically, we
+  // are trying to find the condition that is being branched on, which
+  // subsequently causes this merge to happen.  We really want control
+  // dependence information for this check, but simplifycfg can't keep it up
+  // to date, and this catches most of the cases we care about anyway.
+  BasicBlock *BB = PN->getParent();
+  BasicBlock *IfTrue, *IfFalse;
+  Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
+  if (!IfCond ||
+      // Don't bother if the branch will be constant folded trivially.
+      isa<ConstantInt>(IfCond))
+    return false;
+  
+  // Okay, we found that we can merge this two-entry phi node into a select.
+  // Doing so would require us to fold *all* two entry phi nodes in this block.
+  // At some point this becomes non-profitable (particularly if the target
+  // doesn't support cmov's).  Only do this transformation if there are two or
+  // fewer PHI nodes in this block.
+  unsigned NumPhis = 0;
+  for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
+    if (NumPhis > 2)
+      return false;
+  
+  // Loop over the PHI's seeing if we can promote them all to select
+  // instructions.  While we are at it, keep track of the instructions
+  // that need to be moved to the dominating block.
+  SmallPtrSet<Instruction*, 4> AggressiveInsts;
+  
+  for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
+    PHINode *PN = cast<PHINode>(II++);
+    if (Value *V = SimplifyInstruction(PN, TD)) {
+      PN->replaceAllUsesWith(V);
+      PN->eraseFromParent();
+      continue;
+    }
+    
+    if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts) ||
+        !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts))
+      return false;
+  }
+  
+  // If we folded the the first phi, PN dangles at this point.  Refresh it.  If
+  // we ran out of PHIs then we simplified them all.
+  PN = dyn_cast<PHINode>(BB->begin());
+  if (PN == 0) return true;
+  
+  // Don't fold i1 branches on PHIs which contain binary operators.  These can
+  // often be turned into switches and other things.
+  if (PN->getType()->isIntegerTy(1) &&
+      (isa<BinaryOperator>(PN->getIncomingValue(0)) ||
+       isa<BinaryOperator>(PN->getIncomingValue(1)) ||
+       isa<BinaryOperator>(IfCond)))
+    return false;
+  
+  // If we all PHI nodes are promotable, check to make sure that all
+  // instructions in the predecessor blocks can be promoted as well.  If
+  // not, we won't be able to get rid of the control flow, so it's not
+  // worth promoting to select instructions.
+  BasicBlock *DomBlock = 0;
+  BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
+  BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
+  if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
+    IfBlock1 = 0;
+  } else {
+    DomBlock = *pred_begin(IfBlock1);
+    for (BasicBlock::iterator I = IfBlock1->begin();!isa<TerminatorInst>(I);++I)
+      if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+        // This is not an aggressive instruction that we can promote.
+        // Because of this, we won't be able to get rid of the control
+        // flow, so the xform is not worth it.
+        return false;
+      }
+  }
+    
+  if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
+    IfBlock2 = 0;
+  } else {
+    DomBlock = *pred_begin(IfBlock2);
+    for (BasicBlock::iterator I = IfBlock2->begin();!isa<TerminatorInst>(I);++I)
+      if (!AggressiveInsts.count(I) && !isa<DbgInfoIntrinsic>(I)) {
+        // This is not an aggressive instruction that we can promote.
+        // Because of this, we won't be able to get rid of the control
+        // flow, so the xform is not worth it.
+        return false;
+      }
+  }
+  
+  DEBUG(dbgs() << "FOUND IF CONDITION!  " << *IfCond << "  T: "
+               << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n");
+      
+  // If we can still promote the PHI nodes after this gauntlet of tests,
+  // do all of the PHI's now.
+  Instruction *InsertPt = DomBlock->getTerminator();
+  
+  // Move all 'aggressive' instructions, which are defined in the
+  // conditional parts of the if's up to the dominating block.
+  if (IfBlock1)
+    DomBlock->getInstList().splice(InsertPt,
+                                   IfBlock1->getInstList(), IfBlock1->begin(),
+                                   IfBlock1->getTerminator());
+  if (IfBlock2)
+    DomBlock->getInstList().splice(InsertPt,
+                                   IfBlock2->getInstList(), IfBlock2->begin(),
+                                   IfBlock2->getTerminator());
+  
+  while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
+    // Change the PHI node into a select instruction.
+    Value *TrueVal  = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
+    Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
+    
+    Value *NV = SelectInst::Create(IfCond, TrueVal, FalseVal, "", InsertPt);
+    PN->replaceAllUsesWith(NV);
+    NV->takeName(PN);
+    PN->eraseFromParent();
+  }
+  
+  // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
+  // has been flattened.  Change DomBlock to jump directly to our new block to
+  // avoid other simplifycfg's kicking in on the diamond.
+  TerminatorInst *OldTI = DomBlock->getTerminator();
+  BranchInst::Create(BB, OldTI);
+  OldTI->eraseFromParent();
+  return true;
+}
+
+/// SimplifyCondBranchToTwoReturns - If we found a conditional branch that goes
+/// to two returning blocks, try to merge them together into one return,
+/// introducing a select if the return values disagree.
+static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
+  assert(BI->isConditional() && "Must be a conditional branch");
+  BasicBlock *TrueSucc = BI->getSuccessor(0);
+  BasicBlock *FalseSucc = BI->getSuccessor(1);
+  ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
+  ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
+  
+  // Check to ensure both blocks are empty (just a return) or optionally empty
+  // with PHI nodes.  If there are other instructions, merging would cause extra
+  // computation on one path or the other.
+  if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator())
+    return false;
+  if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator())
+    return false;
+
+  // Okay, we found a branch that is going to two return nodes.  If
+  // there is no return value for this function, just change the
+  // branch into a return.
+  if (FalseRet->getNumOperands() == 0) {
+    TrueSucc->removePredecessor(BI->getParent());
+    FalseSucc->removePredecessor(BI->getParent());
+    ReturnInst::Create(BI->getContext(), 0, BI);
+    EraseTerminatorInstAndDCECond(BI);
+    return true;
+  }
+    
+  // Otherwise, figure out what the true and false return values are
+  // so we can insert a new select instruction.
+  Value *TrueValue = TrueRet->getReturnValue();
+  Value *FalseValue = FalseRet->getReturnValue();
+  
+  // Unwrap any PHI nodes in the return blocks.
+  if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
+    if (TVPN->getParent() == TrueSucc)
+      TrueValue = TVPN->getIncomingValueForBlock(BI->getParent());
+  if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
+    if (FVPN->getParent() == FalseSucc)
+      FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
+  
+  // In order for this transformation to be safe, we must be able to
+  // unconditionally execute both operands to the return.  This is
+  // normally the case, but we could have a potentially-trapping
+  // constant expression that prevents this transformation from being
+  // safe.
+  if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue))
+    if (TCV->canTrap())
+      return false;
+  if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
+    if (FCV->canTrap())
+      return false;
+  
+  // Okay, we collected all the mapped values and checked them for sanity, and
+  // defined to really do this transformation.  First, update the CFG.
+  TrueSucc->removePredecessor(BI->getParent());
+  FalseSucc->removePredecessor(BI->getParent());
+  
+  // Insert select instructions where needed.
+  Value *BrCond = BI->getCondition();
+  if (TrueValue) {
+    // Insert a select if the results differ.
+    if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) {
+    } else if (isa<UndefValue>(TrueValue)) {
+      TrueValue = FalseValue;
+    } else {
+      TrueValue = SelectInst::Create(BrCond, TrueValue,
+                                     FalseValue, "retval", BI);
+    }
+  }
+
+  Value *RI = !TrueValue ?
+              ReturnInst::Create(BI->getContext(), BI) :
+              ReturnInst::Create(BI->getContext(), TrueValue, BI);
+  (void) RI;
+      
+  DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
+               << "\n  " << *BI << "NewRet = " << *RI
+               << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: "<< *FalseSucc);
+      
+  EraseTerminatorInstAndDCECond(BI);
+
+  return true;
+}
+
+/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch,
+/// and if a predecessor branches to us and one of our successors, fold the
+/// setcc into the predecessor and use logical operations to pick the right
+/// destination.
+bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
+  BasicBlock *BB = BI->getParent();
+  Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+  if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+    Cond->getParent() != BB || !Cond->hasOneUse())
+  return false;
+  
+  // Only allow this if the condition is a simple instruction that can be
+  // executed unconditionally.  It must be in the same block as the branch, and
+  // must be at the front of the block.
+  BasicBlock::iterator FrontIt = BB->front();
+  // Ignore dbg intrinsics.
+  while (isa<DbgInfoIntrinsic>(FrontIt))
+    ++FrontIt;
+    
+  // Allow a single instruction to be hoisted in addition to the compare
+  // that feeds the branch.  We later ensure that any values that _it_ uses
+  // were also live in the predecessor, so that we don't unnecessarily create
+  // register pressure or inhibit out-of-order execution.
+  Instruction *BonusInst = 0;
+  if (&*FrontIt != Cond &&
+      FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
+      FrontIt->isSafeToSpeculativelyExecute()) {
+    BonusInst = &*FrontIt;
+    ++FrontIt;
+  }
+  
+  // Only a single bonus inst is allowed.
+  if (&*FrontIt != Cond)
+    return false;
+  
+  // Make sure the instruction after the condition is the cond branch.
+  BasicBlock::iterator CondIt = Cond; ++CondIt;
+  // Ingore dbg intrinsics.
+  while(isa<DbgInfoIntrinsic>(CondIt))
+    ++CondIt;
+  if (&*CondIt != BI) {
+    assert (!isa<DbgInfoIntrinsic>(CondIt) && "Hey do not forget debug info!");
+    return false;
+  }
+
+  // Cond is known to be a compare or binary operator.  Check to make sure that
+  // neither operand is a potentially-trapping constant expression.
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
+    if (CE->canTrap())
+      return false;
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
+    if (CE->canTrap())
+      return false;
+  
+  
+  // Finally, don't infinitely unroll conditional loops.
+  BasicBlock *TrueDest  = BI->getSuccessor(0);
+  BasicBlock *FalseDest = BI->getSuccessor(1);
+  if (TrueDest == BB || FalseDest == BB)
+    return false;
+  
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    BasicBlock *PredBlock = *PI;
+    BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
+    
+    // Check that we have two conditional branches.  If there is a PHI node in
+    // the common successor, verify that the same value flows in from both
+    // blocks.
+    if (PBI == 0 || PBI->isUnconditional() ||
+        !SafeToMergeTerminators(BI, PBI))
+      continue;
+    
+    // Ensure that any values used in the bonus instruction are also used
+    // by the terminator of the predecessor.  This means that those values
+    // must already have been resolved, so we won't be inhibiting the 
+    // out-of-order core by speculating them earlier.
+    if (BonusInst) {
+      // Collect the values used by the bonus inst
+      SmallPtrSet<Value*, 4> UsedValues;
+      for (Instruction::op_iterator OI = BonusInst->op_begin(),
+           OE = BonusInst->op_end(); OI != OE; ++OI) {
+        Value* V = *OI;
+        if (!isa<Constant>(V))
+          UsedValues.insert(V);
+      }
+
+      SmallVector<std::pair<Value*, unsigned>, 4> Worklist;
+      Worklist.push_back(std::make_pair(PBI->getOperand(0), 0));
+      
+      // Walk up to four levels back up the use-def chain of the predecessor's
+      // terminator to see if all those values were used.  The choice of four
+      // levels is arbitrary, to provide a compile-time-cost bound.
+      while (!Worklist.empty()) {
+        std::pair<Value*, unsigned> Pair = Worklist.back();
+        Worklist.pop_back();
+        
+        if (Pair.second >= 4) continue;
+        UsedValues.erase(Pair.first);
+        if (UsedValues.empty()) break;
+        
+        if (Instruction *I = dyn_cast<Instruction>(Pair.first)) {
+          for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+               OI != OE; ++OI)
+            Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
+        }       
+      }
+      
+      if (!UsedValues.empty()) return false;
+    }
+    
+    Instruction::BinaryOps Opc;
+    bool InvertPredCond = false;
+
+    if (PBI->getSuccessor(0) == TrueDest)
+      Opc = Instruction::Or;
+    else if (PBI->getSuccessor(1) == FalseDest)
+      Opc = Instruction::And;
+    else if (PBI->getSuccessor(0) == FalseDest)
+      Opc = Instruction::And, InvertPredCond = true;
+    else if (PBI->getSuccessor(1) == TrueDest)
+      Opc = Instruction::Or, InvertPredCond = true;
+    else
+      continue;
+
+    DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
+    
+    // If we need to invert the condition in the pred block to match, do so now.
+    if (InvertPredCond) {
+      Value *NewCond = PBI->getCondition();
+      
+      if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
+        CmpInst *CI = cast<CmpInst>(NewCond);
+        CI->setPredicate(CI->getInversePredicate());
+      } else {
+        NewCond = BinaryOperator::CreateNot(NewCond,
+                                  PBI->getCondition()->getName()+".not", PBI);
+      }
+      
+      PBI->setCondition(NewCond);
+      BasicBlock *OldTrue = PBI->getSuccessor(0);
+      BasicBlock *OldFalse = PBI->getSuccessor(1);
+      PBI->setSuccessor(0, OldFalse);
+      PBI->setSuccessor(1, OldTrue);
+    }
+    
+    // If we have a bonus inst, clone it into the predecessor block.
+    Instruction *NewBonus = 0;
+    if (BonusInst) {
+      NewBonus = BonusInst->clone();
+      PredBlock->getInstList().insert(PBI, NewBonus);
+      NewBonus->takeName(BonusInst);
+      BonusInst->setName(BonusInst->getName()+".old");
+    }
+    
+    // Clone Cond into the predecessor basic block, and or/and the
+    // two conditions together.
+    Instruction *New = Cond->clone();
+    if (BonusInst) New->replaceUsesOfWith(BonusInst, NewBonus);
+    PredBlock->getInstList().insert(PBI, New);
+    New->takeName(Cond);
+    Cond->setName(New->getName()+".old");
+    
+    Value *NewCond = BinaryOperator::Create(Opc, PBI->getCondition(),
+                                            New, "or.cond", PBI);
+    PBI->setCondition(NewCond);
+    if (PBI->getSuccessor(0) == BB) {
+      AddPredecessorToBlock(TrueDest, PredBlock, BB);
+      PBI->setSuccessor(0, TrueDest);
+    }
+    if (PBI->getSuccessor(1) == BB) {
+      AddPredecessorToBlock(FalseDest, PredBlock, BB);
+      PBI->setSuccessor(1, FalseDest);
+    }
+    return true;
+  }
+  return false;
+}
+
+/// SimplifyCondBranchToCondBranch - If we have a conditional branch as a
+/// predecessor of another block, this function tries to simplify it.  We know
+/// that PBI and BI are both conditional branches, and BI is in one of the
+/// successor blocks of PBI - PBI branches to BI.
+static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
+  assert(PBI->isConditional() && BI->isConditional());
+  BasicBlock *BB = BI->getParent();
+
+  // If this block ends with a branch instruction, and if there is a
+  // predecessor that ends on a branch of the same condition, make 
+  // this conditional branch redundant.
+  if (PBI->getCondition() == BI->getCondition() &&
+      PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+    // Okay, the outcome of this conditional branch is statically
+    // knowable.  If this block had a single pred, handle specially.
+    if (BB->getSinglePredecessor()) {
+      // Turn this into a branch on constant.
+      bool CondIsTrue = PBI->getSuccessor(0) == BB;
+      BI->setCondition(ConstantInt::get(Type::getInt1Ty(BB->getContext()), 
+                                        CondIsTrue));
+      return true;  // Nuke the branch on constant.
+    }
+    
+    // Otherwise, if there are multiple predecessors, insert a PHI that merges
+    // in the constant and simplify the block result.  Subsequent passes of
+    // simplifycfg will thread the block.
+    if (BlockIsSimpleEnoughToThreadThrough(BB)) {
+      PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()),
+                                       BI->getCondition()->getName() + ".pr",
+                                       BB->begin());
+      // Okay, we're going to insert the PHI node.  Since PBI is not the only
+      // predecessor, compute the PHI'd conditional value for all of the preds.
+      // Any predecessor where the condition is not computable we keep symbolic.
+      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+        BasicBlock *P = *PI;
+        if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) &&
+            PBI != BI && PBI->isConditional() &&
+            PBI->getCondition() == BI->getCondition() &&
+            PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
+          bool CondIsTrue = PBI->getSuccessor(0) == BB;
+          NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), 
+                                              CondIsTrue), P);
+        } else {
+          NewPN->addIncoming(BI->getCondition(), P);
+        }
+      }
+      
+      BI->setCondition(NewPN);
+      return true;
+    }
+  }
+  
+  // If this is a conditional branch in an empty block, and if any
+  // predecessors is a conditional branch to one of our destinations,
+  // fold the conditions into logical ops and one cond br.
+  BasicBlock::iterator BBI = BB->begin();
+  // Ignore dbg intrinsics.
+  while (isa<DbgInfoIntrinsic>(BBI))
+    ++BBI;
+  if (&*BBI != BI)
+    return false;
+
+  
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
+    if (CE->canTrap())
+      return false;
+  
+  int PBIOp, BIOp;
+  if (PBI->getSuccessor(0) == BI->getSuccessor(0))
+    PBIOp = BIOp = 0;
+  else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
+    PBIOp = 0, BIOp = 1;
+  else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
+    PBIOp = 1, BIOp = 0;
+  else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
+    PBIOp = BIOp = 1;
+  else
+    return false;
+    
+  // Check to make sure that the other destination of this branch
+  // isn't BB itself.  If so, this is an infinite loop that will
+  // keep getting unwound.
+  if (PBI->getSuccessor(PBIOp) == BB)
+    return false;
+    
+  // Do not perform this transformation if it would require 
+  // insertion of a large number of select instructions. For targets
+  // without predication/cmovs, this is a big pessimization.
+  BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
+      
+  unsigned NumPhis = 0;
+  for (BasicBlock::iterator II = CommonDest->begin();
+       isa<PHINode>(II); ++II, ++NumPhis)
+    if (NumPhis > 2) // Disable this xform.
+      return false;
+    
+  // Finally, if everything is ok, fold the branches to logical ops.
+  BasicBlock *OtherDest  = BI->getSuccessor(BIOp ^ 1);
+  
+  DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
+               << "AND: " << *BI->getParent());
+  
+  
+  // If OtherDest *is* BB, then BB is a basic block with a single conditional
+  // branch in it, where one edge (OtherDest) goes back to itself but the other
+  // exits.  We don't *know* that the program avoids the infinite loop
+  // (even though that seems likely).  If we do this xform naively, we'll end up
+  // recursively unpeeling the loop.  Since we know that (after the xform is
+  // done) that the block *is* infinite if reached, we just make it an obviously
+  // infinite loop with no cond branch.
+  if (OtherDest == BB) {
+    // Insert it at the end of the function, because it's either code,
+    // or it won't matter if it's hot. :)
+    BasicBlock *InfLoopBlock = BasicBlock::Create(BB->getContext(),
+                                                  "infloop", BB->getParent());
+    BranchInst::Create(InfLoopBlock, InfLoopBlock);
+    OtherDest = InfLoopBlock;
+  }  
+  
+  DEBUG(dbgs() << *PBI->getParent()->getParent());
+  
+  // BI may have other predecessors.  Because of this, we leave
+  // it alone, but modify PBI.
+  
+  // Make sure we get to CommonDest on True&True directions.
+  Value *PBICond = PBI->getCondition();
+  if (PBIOp)
+    PBICond = BinaryOperator::CreateNot(PBICond,
+                                        PBICond->getName()+".not",
+                                        PBI);
+  Value *BICond = BI->getCondition();
+  if (BIOp)
+    BICond = BinaryOperator::CreateNot(BICond,
+                                       BICond->getName()+".not",
+                                       PBI);
+  // Merge the conditions.
+  Value *Cond = BinaryOperator::CreateOr(PBICond, BICond, "brmerge", PBI);
+  
+  // Modify PBI to branch on the new condition to the new dests.
+  PBI->setCondition(Cond);
+  PBI->setSuccessor(0, CommonDest);
+  PBI->setSuccessor(1, OtherDest);
+  
+  // OtherDest may have phi nodes.  If so, add an entry from PBI's
+  // block that are identical to the entries for BI's block.
+  AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
+  
+  // We know that the CommonDest already had an edge from PBI to
+  // it.  If it has PHIs though, the PHIs may have different
+  // entries for BB and PBI's BB.  If so, insert a select to make
+  // them agree.
+  PHINode *PN;
+  for (BasicBlock::iterator II = CommonDest->begin();
+       (PN = dyn_cast<PHINode>(II)); ++II) {
+    Value *BIV = PN->getIncomingValueForBlock(BB);
+    unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
+    Value *PBIV = PN->getIncomingValue(PBBIdx);
+    if (BIV != PBIV) {
+      // Insert a select in PBI to pick the right value.
+      Value *NV = SelectInst::Create(PBICond, PBIV, BIV,
+                                     PBIV->getName()+".mux", PBI);
+      PN->setIncomingValue(PBBIdx, NV);
+    }
+  }
+  
+  DEBUG(dbgs() << "INTO: " << *PBI->getParent());
+  DEBUG(dbgs() << *PBI->getParent()->getParent());
+  
+  // This basic block is probably dead.  We know it has at least
+  // one fewer predecessor.
+  return true;
+}
+
+// SimplifyTerminatorOnSelect - Simplifies a terminator by replacing it with a
+// branch to TrueBB if Cond is true or to FalseBB if Cond is false.
+// Takes care of updating the successors and removing the old terminator.
+// Also makes sure not to introduce new successors by assuming that edges to
+// non-successor TrueBBs and FalseBBs aren't reachable.
+static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond,
+                                       BasicBlock *TrueBB, BasicBlock *FalseBB){
+  // Remove any superfluous successor edges from the CFG.
+  // First, figure out which successors to preserve.
+  // If TrueBB and FalseBB are equal, only try to preserve one copy of that
+  // successor.
+  BasicBlock *KeepEdge1 = TrueBB;
+  BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : 0;
+
+  // Then remove the rest.
+  for (unsigned I = 0, E = OldTerm->getNumSuccessors(); I != E; ++I) {
+    BasicBlock *Succ = OldTerm->getSuccessor(I);
+    // Make sure only to keep exactly one copy of each edge.
+    if (Succ == KeepEdge1)
+      KeepEdge1 = 0;
+    else if (Succ == KeepEdge2)
+      KeepEdge2 = 0;
+    else
+      Succ->removePredecessor(OldTerm->getParent());
+  }
+
+  // Insert an appropriate new terminator.
+  if ((KeepEdge1 == 0) && (KeepEdge2 == 0)) {
+    if (TrueBB == FalseBB)
+      // We were only looking for one successor, and it was present.
+      // Create an unconditional branch to it.
+      BranchInst::Create(TrueBB, OldTerm);
+    else
+      // We found both of the successors we were looking for.
+      // Create a conditional branch sharing the condition of the select.
+      BranchInst::Create(TrueBB, FalseBB, Cond, OldTerm);
+  } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
+    // Neither of the selected blocks were successors, so this
+    // terminator must be unreachable.
+    new UnreachableInst(OldTerm->getContext(), OldTerm);
+  } else {
+    // One of the selected values was a successor, but the other wasn't.
+    // Insert an unconditional branch to the one that was found;
+    // the edge to the one that wasn't must be unreachable.
+    if (KeepEdge1 == 0)
+      // Only TrueBB was found.
+      BranchInst::Create(TrueBB, OldTerm);
+    else
+      // Only FalseBB was found.
+      BranchInst::Create(FalseBB, OldTerm);
+  }
+
+  EraseTerminatorInstAndDCECond(OldTerm);
+  return true;
+}
+
+// SimplifySwitchOnSelect - Replaces
+//   (switch (select cond, X, Y)) on constant X, Y
+// with a branch - conditional if X and Y lead to distinct BBs,
+// unconditional otherwise.
+static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) {
+  // Check for constant integer values in the select.
+  ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
+  ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
+  if (!TrueVal || !FalseVal)
+    return false;
+
+  // Find the relevant condition and destinations.
+  Value *Condition = Select->getCondition();
+  BasicBlock *TrueBB = SI->getSuccessor(SI->findCaseValue(TrueVal));
+  BasicBlock *FalseBB = SI->getSuccessor(SI->findCaseValue(FalseVal));
+
+  // Perform the actual simplification.
+  return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB);
+}
+
+// SimplifyIndirectBrOnSelect - Replaces
+//   (indirectbr (select cond, blockaddress(@fn, BlockA),
+//                             blockaddress(@fn, BlockB)))
+// with
+//   (br cond, BlockA, BlockB).
+static bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI) {
+  // Check that both operands of the select are block addresses.
+  BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
+  BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
+  if (!TBA || !FBA)
+    return false;
+
+  // Extract the actual blocks.
+  BasicBlock *TrueBB = TBA->getBasicBlock();
+  BasicBlock *FalseBB = FBA->getBasicBlock();
+
+  // Perform the actual simplification.
+  return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB);
+}
+
+/// TryToSimplifyUncondBranchWithICmpInIt - This is called when we find an icmp
+/// instruction (a seteq/setne with a constant) as the only instruction in a
+/// block that ends with an uncond branch.  We are looking for a very specific
+/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified.  In
+/// this case, we merge the first two "or's of icmp" into a switch, but then the
+/// default value goes to an uncond block with a seteq in it, we get something
+/// like:
+///
+///   switch i8 %A, label %DEFAULT [ i8 1, label %end    i8 2, label %end ]
+/// DEFAULT:
+///   %tmp = icmp eq i8 %A, 92
+///   br label %end
+/// end:
+///   ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
+/// 
+/// We prefer to split the edge to 'end' so that there is a true/false entry to
+/// the PHI, merging the third icmp into the switch.
+static bool TryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
+                                                  const TargetData *TD) {
+  BasicBlock *BB = ICI->getParent();
+  // If the block has any PHIs in it or the icmp has multiple uses, it is too
+  // complex.
+  if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse()) return false;
+
+  Value *V = ICI->getOperand(0);
+  ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
+  
+  // The pattern we're looking for is where our only predecessor is a switch on
+  // 'V' and this block is the default case for the switch.  In this case we can
+  // fold the compared value into the switch to simplify things.
+  BasicBlock *Pred = BB->getSinglePredecessor();
+  if (Pred == 0 || !isa<SwitchInst>(Pred->getTerminator())) return false;
+  
+  SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
+  if (SI->getCondition() != V)
+    return false;
+  
+  // If BB is reachable on a non-default case, then we simply know the value of
+  // V in this block.  Substitute it and constant fold the icmp instruction
+  // away.
+  if (SI->getDefaultDest() != BB) {
+    ConstantInt *VVal = SI->findCaseDest(BB);
+    assert(VVal && "Should have a unique destination value");
+    ICI->setOperand(0, VVal);
+    
+    if (Value *V = SimplifyInstruction(ICI, TD)) {
+      ICI->replaceAllUsesWith(V);
+      ICI->eraseFromParent();
+    }
+    // BB is now empty, so it is likely to simplify away.
+    return SimplifyCFG(BB) | true;
+  }
+  
+  // Ok, the block is reachable from the default dest.  If the constant we're
+  // comparing exists in one of the other edges, then we can constant fold ICI
+  // and zap it.
+  if (SI->findCaseValue(Cst) != 0) {
+    Value *V;
+    if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+      V = ConstantInt::getFalse(BB->getContext());
+    else
+      V = ConstantInt::getTrue(BB->getContext());
+    
+    ICI->replaceAllUsesWith(V);
+    ICI->eraseFromParent();
+    // BB is now empty, so it is likely to simplify away.
+    return SimplifyCFG(BB) | true;
+  }
+  
+  // The use of the icmp has to be in the 'end' block, by the only PHI node in
+  // the block.
+  BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
+  PHINode *PHIUse = dyn_cast<PHINode>(ICI->use_back());
+  if (PHIUse == 0 || PHIUse != &SuccBlock->front() ||
+      isa<PHINode>(++BasicBlock::iterator(PHIUse)))
+    return false;
+
+  // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
+  // true in the PHI.
+  Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
+  Constant *NewCst     = ConstantInt::getFalse(BB->getContext());
+
+  if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
+    std::swap(DefaultCst, NewCst);
+
+  // Replace ICI (which is used by the PHI for the default value) with true or
+  // false depending on if it is EQ or NE.
+  ICI->replaceAllUsesWith(DefaultCst);
+  ICI->eraseFromParent();
+
+  // Okay, the switch goes to this block on a default value.  Add an edge from
+  // the switch to the merge point on the compared value.
+  BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "switch.edge",
+                                         BB->getParent(), BB);
+  SI->addCase(Cst, NewBB);
+  
+  // NewBB branches to the phi block, add the uncond branch and the phi entry.
+  BranchInst::Create(SuccBlock, NewBB);
+  PHIUse->addIncoming(NewCst, NewBB);
+  return true;
+}
+
+/// SimplifyBranchOnICmpChain - The specified branch is a conditional branch.
+/// Check to see if it is branching on an or/and chain of icmp instructions, and
+/// fold it into a switch instruction if so.
+static bool SimplifyBranchOnICmpChain(BranchInst *BI, const TargetData *TD) {
+  Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
+  if (Cond == 0) return false;
+  
+  
+  // Change br (X == 0 | X == 1), T, F into a switch instruction.
+  // If this is a bunch of seteq's or'd together, or if it's a bunch of
+  // 'setne's and'ed together, collect them.
+  Value *CompVal = 0;
+  std::vector<ConstantInt*> Values;
+  bool TrueWhenEqual = true;
+  Value *ExtraCase = 0;
+  unsigned UsedICmps = 0;
+  
+  if (Cond->getOpcode() == Instruction::Or) {
+    CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, true,
+                                     UsedICmps);
+  } else if (Cond->getOpcode() == Instruction::And) {
+    CompVal = GatherConstantCompares(Cond, Values, ExtraCase, TD, false,
+                                     UsedICmps);
+    TrueWhenEqual = false;
+  }
+  
+  // If we didn't have a multiply compared value, fail.
+  if (CompVal == 0) return false;
+
+  // Avoid turning single icmps into a switch.
+  if (UsedICmps <= 1)
+    return false;
+
+  // There might be duplicate constants in the list, which the switch
+  // instruction can't handle, remove them now.
+  array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
+  Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
+  
+  // If Extra was used, we require at least two switch values to do the
+  // transformation.  A switch with one value is just an cond branch.
+  if (ExtraCase && Values.size() < 2) return false;
+  
+  // Figure out which block is which destination.
+  BasicBlock *DefaultBB = BI->getSuccessor(1);
+  BasicBlock *EdgeBB    = BI->getSuccessor(0);
+  if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
+  
+  BasicBlock *BB = BI->getParent();
+  
+  DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
+               << " cases into SWITCH.  BB is:\n" << *BB);
+  
+  // If there are any extra values that couldn't be folded into the switch
+  // then we evaluate them with an explicit branch first.  Split the block
+  // right before the condbr to handle it.
+  if (ExtraCase) {
+    BasicBlock *NewBB = BB->splitBasicBlock(BI, "switch.early.test");
+    // Remove the uncond branch added to the old block.
+    TerminatorInst *OldTI = BB->getTerminator();
+    
+    if (TrueWhenEqual)
+      BranchInst::Create(EdgeBB, NewBB, ExtraCase, OldTI);
+    else
+      BranchInst::Create(NewBB, EdgeBB, ExtraCase, OldTI);
+      
+    OldTI->eraseFromParent();
+    
+    // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
+    // for the edge we just added.
+    AddPredecessorToBlock(EdgeBB, BB, NewBB);
+    
+    DEBUG(dbgs() << "  ** 'icmp' chain unhandled condition: " << *ExtraCase
+          << "\nEXTRABB = " << *BB);
+    BB = NewBB;
+  }
+  
+  // Convert pointer to int before we switch.
+  if (CompVal->getType()->isPointerTy()) {
+    assert(TD && "Cannot switch on pointer without TargetData");
+    CompVal = new PtrToIntInst(CompVal,
+                               TD->getIntPtrType(CompVal->getContext()),
+                               "magicptr", BI);
+  }
+  
+  // Create the new switch instruction now.
+  SwitchInst *New = SwitchInst::Create(CompVal, DefaultBB, Values.size(), BI);
+  
+  // Add all of the 'cases' to the switch instruction.
+  for (unsigned i = 0, e = Values.size(); i != e; ++i)
+    New->addCase(Values[i], EdgeBB);
+  
+  // We added edges from PI to the EdgeBB.  As such, if there were any
+  // PHI nodes in EdgeBB, they need entries to be added corresponding to
+  // the number of edges added.
+  for (BasicBlock::iterator BBI = EdgeBB->begin();
+       isa<PHINode>(BBI); ++BBI) {
+    PHINode *PN = cast<PHINode>(BBI);
+    Value *InVal = PN->getIncomingValueForBlock(BB);
+    for (unsigned i = 0, e = Values.size()-1; i != e; ++i)
+      PN->addIncoming(InVal, BB);
+  }
+  
+  // Erase the old branch instruction.
+  EraseTerminatorInstAndDCECond(BI);
+  
+  DEBUG(dbgs() << "  ** 'icmp' chain result is:\n" << *BB << '\n');
+  return true;
+}
+
+bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI) {
+  BasicBlock *BB = RI->getParent();
+  if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
+  
+  // Find predecessors that end with branches.
+  SmallVector<BasicBlock*, 8> UncondBranchPreds;
+  SmallVector<BranchInst*, 8> CondBranchPreds;
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+    BasicBlock *P = *PI;
+    TerminatorInst *PTI = P->getTerminator();
+    if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
+      if (BI->isUnconditional())
+        UncondBranchPreds.push_back(P);
+      else
+        CondBranchPreds.push_back(BI);
+    }
+  }
+  
+  // If we found some, do the transformation!
+  if (!UncondBranchPreds.empty() && DupRet) {
+    while (!UncondBranchPreds.empty()) {
+      BasicBlock *Pred = UncondBranchPreds.pop_back_val();
+      DEBUG(dbgs() << "FOLDING: " << *BB
+            << "INTO UNCOND BRANCH PRED: " << *Pred);
+      (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
+    }
+    
+    // If we eliminated all predecessors of the block, delete the block now.
+    if (pred_begin(BB) == pred_end(BB))
+      // We know there are no successors, so just nuke the block.
+      BB->eraseFromParent();
+    
+    return true;
+  }
+  
+  // Check out all of the conditional branches going to this return
+  // instruction.  If any of them just select between returns, change the
+  // branch itself into a select/return pair.
+  while (!CondBranchPreds.empty()) {
+    BranchInst *BI = CondBranchPreds.pop_back_val();
+    
+    // Check to see if the non-BB successor is also a return block.
+    if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
+        isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
+        SimplifyCondBranchToTwoReturns(BI))
+      return true;
+  }
+  return false;
+}
+
+bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI) {
+  // Check to see if the first instruction in this block is just an unwind.
+  // If so, replace any invoke instructions which use this as an exception
+  // destination with call instructions.
+  BasicBlock *BB = UI->getParent();
+  if (!BB->getFirstNonPHIOrDbg()->isTerminator()) return false;
+
+  bool Changed = false;
+  SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+  while (!Preds.empty()) {
+    BasicBlock *Pred = Preds.back();
+    InvokeInst *II = dyn_cast<InvokeInst>(Pred->getTerminator());
+    if (II && II->getUnwindDest() == BB) {
+      // Insert a new branch instruction before the invoke, because this
+      // is now a fall through.
+      BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
+      Pred->getInstList().remove(II);   // Take out of symbol table
+      
+      // Insert the call now.
+      SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
+      CallInst *CI = CallInst::Create(II->getCalledValue(),
+                                      Args.begin(), Args.end(),
+                                      II->getName(), BI);
+      CI->setCallingConv(II->getCallingConv());
+      CI->setAttributes(II->getAttributes());
+      // If the invoke produced a value, the Call now does instead.
+      II->replaceAllUsesWith(CI);
+      delete II;
+      Changed = true;
+    }
+    
+    Preds.pop_back();
+  }
+  
+  // If this block is now dead (and isn't the entry block), remove it.
+  if (pred_begin(BB) == pred_end(BB) &&
+      BB != &BB->getParent()->getEntryBlock()) {
+    // We know there are no successors, so just nuke the block.
+    BB->eraseFromParent();
+    return true;
+  }
+  
+  return Changed;  
+}
+
+bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
+  BasicBlock *BB = UI->getParent();
+  
+  bool Changed = false;
+  
+  // If there are any instructions immediately before the unreachable that can
+  // be removed, do so.
+  while (UI != BB->begin()) {
+    BasicBlock::iterator BBI = UI;
+    --BBI;
+    // Do not delete instructions that can have side effects, like calls
+    // (which may never return) and volatile loads and stores.
+    if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)) break;
+    
+    if (StoreInst *SI = dyn_cast<StoreInst>(BBI))
+      if (SI->isVolatile())
+        break;
+    
+    if (LoadInst *LI = dyn_cast<LoadInst>(BBI))
+      if (LI->isVolatile())
+        break;
+    
+    // Delete this instruction
+    BBI->eraseFromParent();
+    Changed = true;
+  }
+  
+  // If the unreachable instruction is the first in the block, take a gander
+  // at all of the predecessors of this instruction, and simplify them.
+  if (&BB->front() != UI) return Changed;
+  
+  SmallVector<BasicBlock*, 8> Preds(pred_begin(BB), pred_end(BB));
+  for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
+    TerminatorInst *TI = Preds[i]->getTerminator();
+    
+    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      if (BI->isUnconditional()) {
+        if (BI->getSuccessor(0) == BB) {
+          new UnreachableInst(TI->getContext(), TI);
+          TI->eraseFromParent();
+          Changed = true;
+        }
+      } else {
+        if (BI->getSuccessor(0) == BB) {
+          BranchInst::Create(BI->getSuccessor(1), BI);
+          EraseTerminatorInstAndDCECond(BI);
+        } else if (BI->getSuccessor(1) == BB) {
+          BranchInst::Create(BI->getSuccessor(0), BI);
+          EraseTerminatorInstAndDCECond(BI);
+          Changed = true;
+        }
+      }
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+      for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+        if (SI->getSuccessor(i) == BB) {
+          BB->removePredecessor(SI->getParent());
+          SI->removeCase(i);
+          --i; --e;
+          Changed = true;
+        }
+      // If the default value is unreachable, figure out the most popular
+      // destination and make it the default.
+      if (SI->getSuccessor(0) == BB) {
+        std::map<BasicBlock*, unsigned> Popularity;
+        for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+          Popularity[SI->getSuccessor(i)]++;
+        
+        // Find the most popular block.
+        unsigned MaxPop = 0;
+        BasicBlock *MaxBlock = 0;
+        for (std::map<BasicBlock*, unsigned>::iterator
+             I = Popularity.begin(), E = Popularity.end(); I != E; ++I) {
+          if (I->second > MaxPop) {
+            MaxPop = I->second;
+            MaxBlock = I->first;
+          }
+        }
+        if (MaxBlock) {
+          // Make this the new default, allowing us to delete any explicit
+          // edges to it.
+          SI->setSuccessor(0, MaxBlock);
+          Changed = true;
+          
+          // If MaxBlock has phinodes in it, remove MaxPop-1 entries from
+          // it.
+          if (isa<PHINode>(MaxBlock->begin()))
+            for (unsigned i = 0; i != MaxPop-1; ++i)
+              MaxBlock->removePredecessor(SI->getParent());
+          
+          for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i)
+            if (SI->getSuccessor(i) == MaxBlock) {
+              SI->removeCase(i);
+              --i; --e;
+            }
+        }
+      }
+    } else if (InvokeInst *II = dyn_cast<InvokeInst>(TI)) {
+      if (II->getUnwindDest() == BB) {
+        // Convert the invoke to a call instruction.  This would be a good
+        // place to note that the call does not throw though.
+        BranchInst *BI = BranchInst::Create(II->getNormalDest(), II);
+        II->removeFromParent();   // Take out of symbol table
+        
+        // Insert the call now...
+        SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
+        CallInst *CI = CallInst::Create(II->getCalledValue(),
+                                        Args.begin(), Args.end(),
+                                        II->getName(), BI);
+        CI->setCallingConv(II->getCallingConv());
+        CI->setAttributes(II->getAttributes());
+        // If the invoke produced a value, the call does now instead.
+        II->replaceAllUsesWith(CI);
+        delete II;
+        Changed = true;
+      }
+    }
+  }
+  
+  // If this block is now dead, remove it.
+  if (pred_begin(BB) == pred_end(BB) &&
+      BB != &BB->getParent()->getEntryBlock()) {
+    // We know there are no successors, so just nuke the block.
+    BB->eraseFromParent();
+    return true;
+  }
+
+  return Changed;
+}
+
+/// TurnSwitchRangeIntoICmp - Turns a switch with that contains only a
+/// integer range comparison into a sub, an icmp and a branch.
+static bool TurnSwitchRangeIntoICmp(SwitchInst *SI) {
+  assert(SI->getNumCases() > 2 && "Degenerate switch?");
+
+  // Make sure all cases point to the same destination and gather the values.
+  SmallVector<ConstantInt *, 16> Cases;
+  Cases.push_back(SI->getCaseValue(1));
+  for (unsigned I = 2, E = SI->getNumCases(); I != E; ++I) {
+    if (SI->getSuccessor(I-1) != SI->getSuccessor(I))
+      return false;
+    Cases.push_back(SI->getCaseValue(I));
+  }
+  assert(Cases.size() == SI->getNumCases()-1 && "Not all cases gathered");
+
+  // Sort the case values, then check if they form a range we can transform.
+  array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
+  for (unsigned I = 1, E = Cases.size(); I != E; ++I) {
+    if (Cases[I-1]->getValue() != Cases[I]->getValue()+1)
+      return false;
+  }
+
+  Constant *Offset = ConstantExpr::getNeg(Cases.back());
+  Constant *NumCases = ConstantInt::get(Offset->getType(), SI->getNumCases()-1);
+
+  Value *Sub = SI->getCondition();
+  if (!Offset->isNullValue())
+    Sub = BinaryOperator::CreateAdd(Sub, Offset, Sub->getName()+".off", SI);
+  Value *Cmp = new ICmpInst(SI, ICmpInst::ICMP_ULT, Sub, NumCases, "switch");
+  BranchInst::Create(SI->getSuccessor(1), SI->getDefaultDest(), Cmp, SI);
+
+  // Prune obsolete incoming values off the successor's PHI nodes.
+  for (BasicBlock::iterator BBI = SI->getSuccessor(1)->begin();
+       isa<PHINode>(BBI); ++BBI) {
+    for (unsigned I = 0, E = SI->getNumCases()-2; I != E; ++I)
+      cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
+  }
+  SI->eraseFromParent();
+
+  return true;
+}
+
+bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI) {
+  // If this switch is too complex to want to look at, ignore it.
+  if (!isValueEqualityComparison(SI))
+    return false;
+
+  BasicBlock *BB = SI->getParent();
+
+  // If we only have one predecessor, and if it is a branch on this value,
+  // see if that predecessor totally determines the outcome of this switch.
+  if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+    if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred))
+      return SimplifyCFG(BB) | true;
+
+  Value *Cond = SI->getCondition();
+  if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
+    if (SimplifySwitchOnSelect(SI, Select))
+      return SimplifyCFG(BB) | true;
+
+  // If the block only contains the switch, see if we can fold the block
+  // away into any preds.
+  BasicBlock::iterator BBI = BB->begin();
+  // Ignore dbg intrinsics.
+  while (isa<DbgInfoIntrinsic>(BBI))
+    ++BBI;
+  if (SI == &*BBI)
+    if (FoldValueComparisonIntoPredecessors(SI))
+      return SimplifyCFG(BB) | true;
+
+  // Try to transform the switch into an icmp and a branch.
+  if (TurnSwitchRangeIntoICmp(SI))
+    return SimplifyCFG(BB) | true;
+  
+  return false;
+}
+
+bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {
+  BasicBlock *BB = IBI->getParent();
+  bool Changed = false;
+  
+  // Eliminate redundant destinations.
+  SmallPtrSet<Value *, 8> Succs;
+  for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
+    BasicBlock *Dest = IBI->getDestination(i);
+    if (!Dest->hasAddressTaken() || !Succs.insert(Dest)) {
+      Dest->removePredecessor(BB);
+      IBI->removeDestination(i);
+      --i; --e;
+      Changed = true;
+    }
+  } 
+
+  if (IBI->getNumDestinations() == 0) {
+    // If the indirectbr has no successors, change it to unreachable.
+    new UnreachableInst(IBI->getContext(), IBI);
+    EraseTerminatorInstAndDCECond(IBI);
+    return true;
+  }
+  
+  if (IBI->getNumDestinations() == 1) {
+    // If the indirectbr has one successor, change it to a direct branch.
+    BranchInst::Create(IBI->getDestination(0), IBI);
+    EraseTerminatorInstAndDCECond(IBI);
+    return true;
+  }
+  
+  if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
+    if (SimplifyIndirectBrOnSelect(IBI, SI))
+      return SimplifyCFG(BB) | true;
+  }
+  return Changed;
+}
+
+bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI) {
+  BasicBlock *BB = BI->getParent();
+  
+  // If the Terminator is the only non-phi instruction, simplify the block.
+  BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
+  if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
+      TryToSimplifyUncondBranchFromEmptyBlock(BB))
+    return true;
+  
+  // If the only instruction in the block is a seteq/setne comparison
+  // against a constant, try to simplify the block.
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
+    if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
+      for (++I; isa<DbgInfoIntrinsic>(I); ++I)
+        ;
+      if (I->isTerminator() && TryToSimplifyUncondBranchWithICmpInIt(ICI, TD))
+        return true;
+    }
+  
+  return false;
+}
+
+
+bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI) {
+  BasicBlock *BB = BI->getParent();
+  
+  // Conditional branch
+  if (isValueEqualityComparison(BI)) {
+    // If we only have one predecessor, and if it is a branch on this value,
+    // see if that predecessor totally determines the outcome of this
+    // switch.
+    if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
+      if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred))
+        return SimplifyCFG(BB) | true;
+    
+    // This block must be empty, except for the setcond inst, if it exists.
+    // Ignore dbg intrinsics.
+    BasicBlock::iterator I = BB->begin();
+    // Ignore dbg intrinsics.
+    while (isa<DbgInfoIntrinsic>(I))
+      ++I;
+    if (&*I == BI) {
+      if (FoldValueComparisonIntoPredecessors(BI))
+        return SimplifyCFG(BB) | true;
+    } else if (&*I == cast<Instruction>(BI->getCondition())){
+      ++I;
+      // Ignore dbg intrinsics.
+      while (isa<DbgInfoIntrinsic>(I))
+        ++I;
+      if (&*I == BI && FoldValueComparisonIntoPredecessors(BI))
+        return SimplifyCFG(BB) | true;
+    }
+  }
+  
+  // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
+  if (SimplifyBranchOnICmpChain(BI, TD))
+    return true;
+  
+  // We have a conditional branch to two blocks that are only reachable
+  // from BI.  We know that the condbr dominates the two blocks, so see if
+  // there is any identical code in the "then" and "else" blocks.  If so, we
+  // can hoist it up to the branching block.
+  if (BI->getSuccessor(0)->getSinglePredecessor() != 0) {
+    if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+      if (HoistThenElseCodeToIf(BI))
+        return SimplifyCFG(BB) | true;
+    } else {
+      // If Successor #1 has multiple preds, we may be able to conditionally
+      // execute Successor #0 if it branches to successor #1.
+      TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator();
+      if (Succ0TI->getNumSuccessors() == 1 &&
+          Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
+        if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
+          return SimplifyCFG(BB) | true;
+    }
+  } else if (BI->getSuccessor(1)->getSinglePredecessor() != 0) {
+    // If Successor #0 has multiple preds, we may be able to conditionally
+    // execute Successor #1 if it branches to successor #0.
+    TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator();
+    if (Succ1TI->getNumSuccessors() == 1 &&
+        Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
+      if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
+        return SimplifyCFG(BB) | true;
+  }
+  
+  // If this is a branch on a phi node in the current block, thread control
+  // through this block if any PHI node entries are constants.
+  if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
+    if (PN->getParent() == BI->getParent())
+      if (FoldCondBranchOnPHI(BI, TD))
+        return SimplifyCFG(BB) | true;
+  
+  // If this basic block is ONLY a setcc and a branch, and if a predecessor
+  // branches to us and one of our successors, fold the setcc into the
+  // predecessor and use logical operations to pick the right destination.
+  if (FoldBranchToCommonDest(BI))
+    return SimplifyCFG(BB) | true;
+  
+  // Scan predecessor blocks for conditional branches.
+  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+    if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+      if (PBI != BI && PBI->isConditional())
+        if (SimplifyCondBranchToCondBranch(PBI, BI))
+          return SimplifyCFG(BB) | true;
+
+  return false;
+}
+
+bool SimplifyCFGOpt::run(BasicBlock *BB) {
+  bool Changed = false;
+
+  assert(BB && BB->getParent() && "Block not embedded in function!");
+  assert(BB->getTerminator() && "Degenerate basic block encountered!");
+
+  // Remove basic blocks that have no predecessors (except the entry block)...
+  // or that just have themself as a predecessor.  These are unreachable.
+  if ((pred_begin(BB) == pred_end(BB) &&
+       BB != &BB->getParent()->getEntryBlock()) ||
+      BB->getSinglePredecessor() == BB) {
+    DEBUG(dbgs() << "Removing BB: \n" << *BB);
+    DeleteDeadBlock(BB);
+    return true;
+  }
+
+  // Check to see if we can constant propagate this terminator instruction
+  // away...
+  Changed |= ConstantFoldTerminator(BB);
+
+  // Check for and eliminate duplicate PHI nodes in this block.
+  Changed |= EliminateDuplicatePHINodes(BB);
+
+  // Merge basic blocks into their predecessor if there is only one distinct
+  // pred, and if there is only one distinct successor of the predecessor, and
+  // if there are no PHI nodes.
+  //
+  if (MergeBlockIntoPredecessor(BB))
+    return true;
+  
+  // If there is a trivial two-entry PHI node in this basic block, and we can
+  // eliminate it, do so now.
+  if (PHINode *PN = dyn_cast<PHINode>(BB->begin()))
+    if (PN->getNumIncomingValues() == 2)
+      Changed |= FoldTwoEntryPHINode(PN, TD);
+
+  if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+    if (BI->isUnconditional()) {
+      if (SimplifyUncondBranch(BI)) return true;
+    } else {
+      if (SimplifyCondBranch(BI)) return true;
+    }
+  } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+    if (SimplifyReturn(RI)) return true;
+  } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+    if (SimplifySwitch(SI)) return true;
+  } else if (UnreachableInst *UI =
+               dyn_cast<UnreachableInst>(BB->getTerminator())) {
+    if (SimplifyUnreachable(UI)) return true;
+  } else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
+    if (SimplifyUnwind(UI)) return true;
+  } else if (IndirectBrInst *IBI =
+               dyn_cast<IndirectBrInst>(BB->getTerminator())) {
+    if (SimplifyIndirectBr(IBI)) return true;
+  }
+
+  return Changed;
+}
+
+/// SimplifyCFG - This function is used to do simplification of a CFG.  For
+/// example, it adjusts branches to branches to eliminate the extra hop, it
+/// eliminates unreachable basic blocks, and does other "peephole" optimization
+/// of the CFG.  It returns true if a modification was made.
+///
+bool llvm::SimplifyCFG(BasicBlock *BB, const TargetData *TD) {
+  return SimplifyCFGOpt(TD).run(BB);
+}
diff --git a/final/lib/Transforms/Utils/SimplifyInstructions.cpp b/final/lib/Transforms/Utils/SimplifyInstructions.cpp
new file mode 100644
index 00000000000..ac005f95b33
--- /dev/null
+++ b/final/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -0,0 +1,94 @@
+//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a utility pass used for testing the InstructionSimplify analysis.
+// The analysis is applied to every instruction, and if it simplifies then the
+// instruction is replaced by the simplification.  If you are looking for a pass
+// that performs serious instruction folding, use the instcombine pass instead.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "instsimplify"
+#include "llvm/Function.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+STATISTIC(NumSimplified, "Number of redundant instructions removed");
+
+namespace {
+  struct InstSimplifier : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    InstSimplifier() : FunctionPass(ID) {
+      initializeInstSimplifierPass(*PassRegistry::getPassRegistry());
+    }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+
+    /// runOnFunction - Remove instructions that simplify.
+    bool runOnFunction(Function &F) {
+      const DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
+      const TargetData *TD = getAnalysisIfAvailable<TargetData>();
+      SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+      bool Changed = false;
+
+      do {
+        for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
+             DE = df_end(&F.getEntryBlock()); DI != DE; ++DI)
+          for (BasicBlock::iterator BI = DI->begin(), BE = DI->end(); BI != BE;) {
+            Instruction *I = BI++;
+            // The first time through the loop ToSimplify is empty and we try to
+            // simplify all instructions.  On later iterations ToSimplify is not
+            // empty and we only bother simplifying instructions that are in it.
+            if (!ToSimplify->empty() && !ToSimplify->count(I))
+              continue;
+            // Don't waste time simplifying unused instructions.
+            if (!I->use_empty())
+              if (Value *V = SimplifyInstruction(I, TD, DT)) {
+                // Mark all uses for resimplification next time round the loop.
+                for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+                     UI != UE; ++UI)
+                  Next->insert(cast<Instruction>(*UI));
+                I->replaceAllUsesWith(V);
+                ++NumSimplified;
+                Changed = true;
+              }
+            Changed |= RecursivelyDeleteTriviallyDeadInstructions(I);
+          }
+
+        // Place the list of instructions to simplify on the next loop iteration
+        // into ToSimplify.
+        std::swap(ToSimplify, Next);
+        Next->clear();
+      } while (!ToSimplify->empty());
+
+      return Changed;
+    }
+  };
+}
+
+char InstSimplifier::ID = 0;
+INITIALIZE_PASS(InstSimplifier, "instsimplify", "Remove redundant instructions",
+                false, false)
+char &llvm::InstructionSimplifierID = InstSimplifier::ID;
+
+// Public interface to the simplify instructions pass.
+FunctionPass *llvm::createInstructionSimplifierPass() {
+  return new InstSimplifier();
+}
diff --git a/final/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/final/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
new file mode 100644
index 00000000000..ccb8287d796
--- /dev/null
+++ b/final/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp
@@ -0,0 +1,141 @@
+//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to ensure that functions have at most one return
+// instruction in them.  Additionally, it keeps track of which node is the new
+// exit node of the CFG.  If there are no exit nodes in the CFG, the getExitNode
+// method will return a null pointer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+char UnifyFunctionExitNodes::ID = 0;
+INITIALIZE_PASS(UnifyFunctionExitNodes, "mergereturn",
+                "Unify function exit nodes", false, false)
+
+Pass *llvm::createUnifyFunctionExitNodesPass() {
+  return new UnifyFunctionExitNodes();
+}
+
+void UnifyFunctionExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
+  // We preserve the non-critical-edgeness property
+  AU.addPreservedID(BreakCriticalEdgesID);
+  // This is a cluster of orthogonal Transforms
+  AU.addPreserved("mem2reg");
+  AU.addPreservedID(LowerSwitchID);
+}
+
+// UnifyAllExitNodes - Unify all exit nodes of the CFG by creating a new
+// BasicBlock, and converting all returns to unconditional branches to this
+// new basic block.  The singular exit node is returned.
+//
+// If there are no return stmts in the Function, a null pointer is returned.
+//
+bool UnifyFunctionExitNodes::runOnFunction(Function &F) {
+  // Loop over all of the blocks in a function, tracking all of the blocks that
+  // return.
+  //
+  std::vector<BasicBlock*> ReturningBlocks;
+  std::vector<BasicBlock*> UnwindingBlocks;
+  std::vector<BasicBlock*> UnreachableBlocks;
+  for(Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+    if (isa<ReturnInst>(I->getTerminator()))
+      ReturningBlocks.push_back(I);
+    else if (isa<UnwindInst>(I->getTerminator()))
+      UnwindingBlocks.push_back(I);
+    else if (isa<UnreachableInst>(I->getTerminator()))
+      UnreachableBlocks.push_back(I);
+
+  // Handle unwinding blocks first.
+  if (UnwindingBlocks.empty()) {
+    UnwindBlock = 0;
+  } else if (UnwindingBlocks.size() == 1) {
+    UnwindBlock = UnwindingBlocks.front();
+  } else {
+    UnwindBlock = BasicBlock::Create(F.getContext(), "UnifiedUnwindBlock", &F);
+    new UnwindInst(F.getContext(), UnwindBlock);
+
+    for (std::vector<BasicBlock*>::iterator I = UnwindingBlocks.begin(),
+           E = UnwindingBlocks.end(); I != E; ++I) {
+      BasicBlock *BB = *I;
+      BB->getInstList().pop_back();  // Remove the unwind insn
+      BranchInst::Create(UnwindBlock, BB);
+    }
+  }
+
+  // Then unreachable blocks.
+  if (UnreachableBlocks.empty()) {
+    UnreachableBlock = 0;
+  } else if (UnreachableBlocks.size() == 1) {
+    UnreachableBlock = UnreachableBlocks.front();
+  } else {
+    UnreachableBlock = BasicBlock::Create(F.getContext(), 
+                                          "UnifiedUnreachableBlock", &F);
+    new UnreachableInst(F.getContext(), UnreachableBlock);
+
+    for (std::vector<BasicBlock*>::iterator I = UnreachableBlocks.begin(),
+           E = UnreachableBlocks.end(); I != E; ++I) {
+      BasicBlock *BB = *I;
+      BB->getInstList().pop_back();  // Remove the unreachable inst.
+      BranchInst::Create(UnreachableBlock, BB);
+    }
+  }
+
+  // Now handle return blocks.
+  if (ReturningBlocks.empty()) {
+    ReturnBlock = 0;
+    return false;                          // No blocks return
+  } else if (ReturningBlocks.size() == 1) {
+    ReturnBlock = ReturningBlocks.front(); // Already has a single return block
+    return false;
+  }
+
+  // Otherwise, we need to insert a new basic block into the function, add a PHI
+  // nodes (if the function returns values), and convert all of the return
+  // instructions into unconditional branches.
+  //
+  BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(),
+                                               "UnifiedReturnBlock", &F);
+
+  PHINode *PN = 0;
+  if (F.getReturnType()->isVoidTy()) {
+    ReturnInst::Create(F.getContext(), NULL, NewRetBlock);
+  } else {
+    // If the function doesn't return void... add a PHI node to the block...
+    PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal");
+    NewRetBlock->getInstList().push_back(PN);
+    ReturnInst::Create(F.getContext(), PN, NewRetBlock);
+  }
+
+  // Loop over all of the blocks, replacing the return instruction with an
+  // unconditional branch.
+  //
+  for (std::vector<BasicBlock*>::iterator I = ReturningBlocks.begin(),
+         E = ReturningBlocks.end(); I != E; ++I) {
+    BasicBlock *BB = *I;
+
+    // Add an incoming element to the PHI node for every return instruction that
+    // is merging into this new block...
+    if (PN)
+      PN->addIncoming(BB->getTerminator()->getOperand(0), BB);
+
+    BB->getInstList().pop_back();  // Remove the return insn
+    BranchInst::Create(NewRetBlock, BB);
+  }
+  ReturnBlock = NewRetBlock;
+  return true;
+}
diff --git a/final/lib/Transforms/Utils/Utils.cpp b/final/lib/Transforms/Utils/Utils.cpp
new file mode 100644
index 00000000000..24e8c8ff5c5
--- /dev/null
+++ b/final/lib/Transforms/Utils/Utils.cpp
@@ -0,0 +1,37 @@
+//===-- Utils.cpp - TransformUtils Infrastructure -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the common initialization infrastructure for the
+// TransformUtils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeTransformUtils - Initialize all passes in the TransformUtils
+/// library.
+void llvm::initializeTransformUtils(PassRegistry &Registry) {
+  initializeBreakCriticalEdgesPass(Registry);
+  initializeInstNamerPass(Registry);
+  initializeLCSSAPass(Registry);
+  initializeLoopSimplifyPass(Registry);
+  initializeLowerInvokePass(Registry);
+  initializeLowerSwitchPass(Registry);
+  initializePromotePassPass(Registry);
+  initializeUnifyFunctionExitNodesPass(Registry);
+  initializeInstSimplifierPass(Registry);
+}
+
+/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
+void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) {
+  initializeTransformUtils(*unwrap(R));
+}
diff --git a/final/lib/Transforms/Utils/ValueMapper.cpp b/final/lib/Transforms/Utils/ValueMapper.cpp
new file mode 100644
index 00000000000..f5481d31eb8
--- /dev/null
+++ b/final/lib/Transforms/Utils/ValueMapper.cpp
@@ -0,0 +1,141 @@
+//===- ValueMapper.cpp - Interface shared by lib/Transforms/Utils ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the MapValue function, which is shared by various parts of
+// the lib/Transforms/Utils library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Type.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/Metadata.h"
+#include "llvm/ADT/SmallVector.h"
+using namespace llvm;
+
+Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
+                      RemapFlags Flags) {
+  ValueToValueMapTy::iterator I = VM.find(V);
+  
+  // If the value already exists in the map, use it.
+  if (I != VM.end() && I->second) return I->second;
+  
+  // Global values do not need to be seeded into the VM if they
+  // are using the identity mapping.
+  if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V))
+    return VM[V] = const_cast<Value*>(V);
+
+  if (const MDNode *MD = dyn_cast<MDNode>(V)) {
+    // If this is a module-level metadata and we know that nothing at the module
+    // level is changing, then use an identity mapping.
+    if (!MD->isFunctionLocal() && (Flags & RF_NoModuleLevelChanges))
+      return VM[V] = const_cast<Value*>(V);
+    
+    // Create a dummy node in case we have a metadata cycle.
+    MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0);
+    VM[V] = Dummy;
+    
+    // Check all operands to see if any need to be remapped.
+    for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
+      Value *OP = MD->getOperand(i);
+      if (OP == 0 || MapValue(OP, VM, Flags) == OP) continue;
+
+      // Ok, at least one operand needs remapping.  
+      SmallVector<Value*, 4> Elts;
+      Elts.reserve(MD->getNumOperands());
+      for (i = 0; i != e; ++i) {
+        Value *Op = MD->getOperand(i);
+        Elts.push_back(Op ? MapValue(Op, VM, Flags) : 0);
+      }
+      MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size());
+      Dummy->replaceAllUsesWith(NewMD);
+      VM[V] = NewMD;
+      MDNode::deleteTemporary(Dummy);
+      return NewMD;
+    }
+
+    VM[V] = const_cast<Value*>(V);
+    MDNode::deleteTemporary(Dummy);
+
+    // No operands needed remapping.  Use an identity mapping.
+    return const_cast<Value*>(V);
+  }
+
+  // Okay, this either must be a constant (which may or may not be mappable) or
+  // is something that is not in the mapping table.
+  Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V));
+  if (C == 0)
+    return 0;
+  
+  if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+    Function *F = cast<Function>(MapValue(BA->getFunction(), VM, Flags));
+    BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
+                                                       Flags));
+    return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
+  }
+  
+  for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+    Value *Op = C->getOperand(i);
+    Value *Mapped = MapValue(Op, VM, Flags);
+    if (Mapped == C) continue;
+    
+    // Okay, the operands don't all match.  We've already processed some or all
+    // of the operands, set them up now.
+    std::vector<Constant*> Ops;
+    Ops.reserve(C->getNumOperands());
+    for (unsigned j = 0; j != i; ++j)
+      Ops.push_back(cast<Constant>(C->getOperand(i)));
+    Ops.push_back(cast<Constant>(Mapped));
+    
+    // Map the rest of the operands that aren't processed yet.
+    for (++i; i != e; ++i)
+      Ops.push_back(cast<Constant>(MapValue(C->getOperand(i), VM, Flags)));
+    
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+      return VM[V] = CE->getWithOperands(Ops);
+    if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
+      return VM[V] = ConstantArray::get(CA->getType(), Ops);
+    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C))
+      return VM[V] = ConstantStruct::get(CS->getType(), Ops);
+    assert(isa<ConstantVector>(C) && "Unknown mapped constant type");
+    return VM[V] = ConstantVector::get(Ops);
+  }
+
+  // If we reach here, all of the operands of the constant match.
+  return VM[V] = C;
+}
+
+/// RemapInstruction - Convert the instruction operands from referencing the
+/// current values into those specified by VMap.
+///
+void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
+                            RemapFlags Flags) {
+  // Remap operands.
+  for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
+    Value *V = MapValue(*op, VMap, Flags);
+    // If we aren't ignoring missing entries, assert that something happened.
+    if (V != 0)
+      *op = V;
+    else
+      assert((Flags & RF_IgnoreMissingEntries) &&
+             "Referenced value not in value map!");
+  }
+
+  // Remap attached metadata.
+  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
+  I->getAllMetadata(MDs);
+  for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
+       MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
+    Value *Old = MI->second;
+    Value *New = MapValue(Old, VMap, Flags);
+    if (New != Old)
+      I->setMetadata(MI->first, cast<MDNode>(New));
+  }
+}
diff --git a/final/lib/VMCore/AsmWriter.cpp b/final/lib/VMCore/AsmWriter.cpp
new file mode 100644
index 00000000000..ffd367a7ada
--- /dev/null
+++ b/final/lib/VMCore/AsmWriter.cpp
@@ -0,0 +1,2169 @@
+//===-- AsmWriter.cpp - Printing LLVM as an assembly file -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This library implements the functionality defined in llvm/Assembly/Writer.h
+//
+// Note that these routines must be extremely tolerant of various errors in the
+// LLVM code, because it can be used for debugging transformations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
+#include "llvm/Module.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/FormattedStream.h"
+#include <algorithm>
+#include <cctype>
+#include <map>
+using namespace llvm;
+
+// Make virtual table appear in this compilation unit.
+AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {}
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+static const Module *getModuleFromVal(const Value *V) {
+  if (const Argument *MA = dyn_cast<Argument>(V))
+    return MA->getParent() ? MA->getParent()->getParent() : 0;
+
+  if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
+    return BB->getParent() ? BB->getParent()->getParent() : 0;
+
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    const Function *M = I->getParent() ? I->getParent()->getParent() : 0;
+    return M ? M->getParent() : 0;
+  }
+  
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+    return GV->getParent();
+  return 0;
+}
+
+// PrintEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+static void PrintEscapedString(StringRef Name, raw_ostream &Out) {
+  for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+    unsigned char C = Name[i];
+    if (isprint(C) && C != '\\' && C != '"')
+      Out << C;
+    else
+      Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+  }
+}
+
+enum PrefixType {
+  GlobalPrefix,
+  LabelPrefix,
+  LocalPrefix,
+  NoPrefix
+};
+
+/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
+/// prefixed with % (if the string only contains simple characters) or is
+/// surrounded with ""'s (if it has special chars in it).  Print it out.
+static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
+  assert(Name.data() && "Cannot get empty name!");
+  switch (Prefix) {
+  default: llvm_unreachable("Bad prefix!");
+  case NoPrefix: break;
+  case GlobalPrefix: OS << '@'; break;
+  case LabelPrefix:  break;
+  case LocalPrefix:  OS << '%'; break;
+  }
+
+  // Scan the name to see if it needs quotes first.
+  bool NeedsQuotes = isdigit(Name[0]);
+  if (!NeedsQuotes) {
+    for (unsigned i = 0, e = Name.size(); i != e; ++i) {
+      char C = Name[i];
+      if (!isalnum(C) && C != '-' && C != '.' && C != '_') {
+        NeedsQuotes = true;
+        break;
+      }
+    }
+  }
+
+  // If we didn't need any quotes, just write out the name in one blast.
+  if (!NeedsQuotes) {
+    OS << Name;
+    return;
+  }
+
+  // Okay, we need quotes.  Output the quotes and escape any scary characters as
+  // needed.
+  OS << '"';
+  PrintEscapedString(Name, OS);
+  OS << '"';
+}
+
+/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
+/// prefixed with % (if the string only contains simple characters) or is
+/// surrounded with ""'s (if it has special chars in it).  Print it out.
+static void PrintLLVMName(raw_ostream &OS, const Value *V) {
+  PrintLLVMName(OS, V->getName(),
+                isa<GlobalValue>(V) ? GlobalPrefix : LocalPrefix);
+}
+
+//===----------------------------------------------------------------------===//
+// TypePrinting Class: Type printing machinery
+//===----------------------------------------------------------------------===//
+
+static DenseMap<const Type *, std::string> &getTypeNamesMap(void *M) {
+  return *static_cast<DenseMap<const Type *, std::string>*>(M);
+}
+
+void TypePrinting::clear() {
+  getTypeNamesMap(TypeNames).clear();
+}
+
+bool TypePrinting::hasTypeName(const Type *Ty) const {
+  return getTypeNamesMap(TypeNames).count(Ty);
+}
+
+void TypePrinting::addTypeName(const Type *Ty, const std::string &N) {
+  getTypeNamesMap(TypeNames).insert(std::make_pair(Ty, N));
+}
+
+
+TypePrinting::TypePrinting() {
+  TypeNames = new DenseMap<const Type *, std::string>();
+}
+
+TypePrinting::~TypePrinting() {
+  delete &getTypeNamesMap(TypeNames);
+}
+
+/// CalcTypeName - Write the specified type to the specified raw_ostream, making
+/// use of type names or up references to shorten the type name where possible.
+void TypePrinting::CalcTypeName(const Type *Ty,
+                                SmallVectorImpl<const Type *> &TypeStack,
+                                raw_ostream &OS, bool IgnoreTopLevelName) {
+  // Check to see if the type is named.
+  if (!IgnoreTopLevelName) {
+    DenseMap<const Type *, std::string> &TM = getTypeNamesMap(TypeNames);
+    DenseMap<const Type *, std::string>::iterator I = TM.find(Ty);
+    if (I != TM.end()) {
+      OS << I->second;
+      return;
+    }
+  }
+
+  // Check to see if the Type is already on the stack...
+  unsigned Slot = 0, CurSize = TypeStack.size();
+  while (Slot < CurSize && TypeStack[Slot] != Ty) ++Slot; // Scan for type
+
+  // This is another base case for the recursion.  In this case, we know
+  // that we have looped back to a type that we have previously visited.
+  // Generate the appropriate upreference to handle this.
+  if (Slot < CurSize) {
+    OS << '\\' << unsigned(CurSize-Slot);     // Here's the upreference
+    return;
+  }
+
+  TypeStack.push_back(Ty);    // Recursive case: Add us to the stack..
+
+  switch (Ty->getTypeID()) {
+  case Type::VoidTyID:      OS << "void"; break;
+  case Type::FloatTyID:     OS << "float"; break;
+  case Type::DoubleTyID:    OS << "double"; break;
+  case Type::X86_FP80TyID:  OS << "x86_fp80"; break;
+  case Type::FP128TyID:     OS << "fp128"; break;
+  case Type::PPC_FP128TyID: OS << "ppc_fp128"; break;
+  case Type::LabelTyID:     OS << "label"; break;
+  case Type::MetadataTyID:  OS << "metadata"; break;
+  case Type::X86_MMXTyID:   OS << "x86_mmx"; break;
+  case Type::IntegerTyID:
+    OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
+    break;
+
+  case Type::FunctionTyID: {
+    const FunctionType *FTy = cast<FunctionType>(Ty);
+    CalcTypeName(FTy->getReturnType(), TypeStack, OS);
+    OS << " (";
+    for (FunctionType::param_iterator I = FTy->param_begin(),
+         E = FTy->param_end(); I != E; ++I) {
+      if (I != FTy->param_begin())
+        OS << ", ";
+      CalcTypeName(*I, TypeStack, OS);
+    }
+    if (FTy->isVarArg()) {
+      if (FTy->getNumParams()) OS << ", ";
+      OS << "...";
+    }
+    OS << ')';
+    break;
+  }
+  case Type::StructTyID: {
+    const StructType *STy = cast<StructType>(Ty);
+    if (STy->isPacked())
+      OS << '<';
+    OS << '{';
+    for (StructType::element_iterator I = STy->element_begin(),
+         E = STy->element_end(); I != E; ++I) {
+      OS << ' ';
+      CalcTypeName(*I, TypeStack, OS);
+      if (llvm::next(I) == STy->element_end())
+        OS << ' ';
+      else
+        OS << ',';
+    }
+    OS << '}';
+    if (STy->isPacked())
+      OS << '>';
+    break;
+  }
+  case Type::PointerTyID: {
+    const PointerType *PTy = cast<PointerType>(Ty);
+    CalcTypeName(PTy->getElementType(), TypeStack, OS);
+    if (unsigned AddressSpace = PTy->getAddressSpace())
+      OS << " addrspace(" << AddressSpace << ')';
+    OS << '*';
+    break;
+  }
+  case Type::ArrayTyID: {
+    const ArrayType *ATy = cast<ArrayType>(Ty);
+    OS << '[' << ATy->getNumElements() << " x ";
+    CalcTypeName(ATy->getElementType(), TypeStack, OS);
+    OS << ']';
+    break;
+  }
+  case Type::VectorTyID: {
+    const VectorType *PTy = cast<VectorType>(Ty);
+    OS << "<" << PTy->getNumElements() << " x ";
+    CalcTypeName(PTy->getElementType(), TypeStack, OS);
+    OS << '>';
+    break;
+  }
+  case Type::OpaqueTyID:
+    OS << "opaque";
+    break;
+  default:
+    OS << "<unrecognized-type>";
+    break;
+  }
+
+  TypeStack.pop_back();       // Remove self from stack.
+}
+
+/// printTypeInt - The internal guts of printing out a type that has a
+/// potentially named portion.
+///
+void TypePrinting::print(const Type *Ty, raw_ostream &OS,
+                         bool IgnoreTopLevelName) {
+  // Check to see if the type is named.
+  DenseMap<const Type*, std::string> &TM = getTypeNamesMap(TypeNames);
+  if (!IgnoreTopLevelName) {
+    DenseMap<const Type*, std::string>::iterator I = TM.find(Ty);
+    if (I != TM.end()) {
+      OS << I->second;
+      return;
+    }
+  }
+
+  // Otherwise we have a type that has not been named but is a derived type.
+  // Carefully recurse the type hierarchy to print out any contained symbolic
+  // names.
+  SmallVector<const Type *, 16> TypeStack;
+  std::string TypeName;
+
+  raw_string_ostream TypeOS(TypeName);
+  CalcTypeName(Ty, TypeStack, TypeOS, IgnoreTopLevelName);
+  OS << TypeOS.str();
+
+  // Cache type name for later use.
+  if (!IgnoreTopLevelName)
+    TM.insert(std::make_pair(Ty, TypeOS.str()));
+}
+
+namespace {
+  class TypeFinder {
+    // To avoid walking constant expressions multiple times and other IR
+    // objects, we keep several helper maps.
+    DenseSet<const Value*> VisitedConstants;
+    DenseSet<const Type*> VisitedTypes;
+
+    TypePrinting &TP;
+    std::vector<const Type*> &NumberedTypes;
+  public:
+    TypeFinder(TypePrinting &tp, std::vector<const Type*> &numberedTypes)
+      : TP(tp), NumberedTypes(numberedTypes) {}
+
+    void Run(const Module &M) {
+      // Get types from the type symbol table.  This gets opaque types referened
+      // only through derived named types.
+      const TypeSymbolTable &ST = M.getTypeSymbolTable();
+      for (TypeSymbolTable::const_iterator TI = ST.begin(), E = ST.end();
+           TI != E; ++TI)
+        IncorporateType(TI->second);
+
+      // Get types from global variables.
+      for (Module::const_global_iterator I = M.global_begin(),
+           E = M.global_end(); I != E; ++I) {
+        IncorporateType(I->getType());
+        if (I->hasInitializer())
+          IncorporateValue(I->getInitializer());
+      }
+
+      // Get types from aliases.
+      for (Module::const_alias_iterator I = M.alias_begin(),
+           E = M.alias_end(); I != E; ++I) {
+        IncorporateType(I->getType());
+        IncorporateValue(I->getAliasee());
+      }
+
+      // Get types from functions.
+      for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
+        IncorporateType(FI->getType());
+
+        for (Function::const_iterator BB = FI->begin(), E = FI->end();
+             BB != E;++BB)
+          for (BasicBlock::const_iterator II = BB->begin(),
+               E = BB->end(); II != E; ++II) {
+            const Instruction &I = *II;
+            // Incorporate the type of the instruction and all its operands.
+            IncorporateType(I.getType());
+            for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
+                 OI != OE; ++OI)
+              IncorporateValue(*OI);
+          }
+      }
+    }
+
+  private:
+    void IncorporateType(const Type *Ty) {
+      // Check to see if we're already visited this type.
+      if (!VisitedTypes.insert(Ty).second)
+        return;
+
+      // If this is a structure or opaque type, add a name for the type.
+      if (((Ty->isStructTy() && cast<StructType>(Ty)->getNumElements())
+            || Ty->isOpaqueTy()) && !TP.hasTypeName(Ty)) {
+        TP.addTypeName(Ty, "%"+utostr(unsigned(NumberedTypes.size())));
+        NumberedTypes.push_back(Ty);
+      }
+
+      // Recursively walk all contained types.
+      for (Type::subtype_iterator I = Ty->subtype_begin(),
+           E = Ty->subtype_end(); I != E; ++I)
+        IncorporateType(*I);
+    }
+
+    /// IncorporateValue - This method is used to walk operand lists finding
+    /// types hiding in constant expressions and other operands that won't be
+    /// walked in other ways.  GlobalValues, basic blocks, instructions, and
+    /// inst operands are all explicitly enumerated.
+    void IncorporateValue(const Value *V) {
+      if (V == 0 || !isa<Constant>(V) || isa<GlobalValue>(V)) return;
+
+      // Already visited?
+      if (!VisitedConstants.insert(V).second)
+        return;
+
+      // Check this type.
+      IncorporateType(V->getType());
+
+      // Look in operands for types.
+      const Constant *C = cast<Constant>(V);
+      for (Constant::const_op_iterator I = C->op_begin(),
+           E = C->op_end(); I != E;++I)
+        IncorporateValue(*I);
+    }
+  };
+} // end anonymous namespace
+
+
+/// AddModuleTypesToPrinter - Add all of the symbolic type names for types in
+/// the specified module to the TypePrinter and all numbered types to it and the
+/// NumberedTypes table.
+static void AddModuleTypesToPrinter(TypePrinting &TP,
+                                    std::vector<const Type*> &NumberedTypes,
+                                    const Module *M) {
+  if (M == 0) return;
+
+  // If the module has a symbol table, take all global types and stuff their
+  // names into the TypeNames map.
+  const TypeSymbolTable &ST = M->getTypeSymbolTable();
+  for (TypeSymbolTable::const_iterator TI = ST.begin(), E = ST.end();
+       TI != E; ++TI) {
+    const Type *Ty = cast<Type>(TI->second);
+
+    // As a heuristic, don't insert pointer to primitive types, because
+    // they are used too often to have a single useful name.
+    if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
+      const Type *PETy = PTy->getElementType();
+      if ((PETy->isPrimitiveType() || PETy->isIntegerTy()) &&
+          !PETy->isOpaqueTy())
+        continue;
+    }
+
+    // Likewise don't insert primitives either.
+    if (Ty->isIntegerTy() || Ty->isPrimitiveType())
+      continue;
+
+    // Get the name as a string and insert it into TypeNames.
+    std::string NameStr;
+    raw_string_ostream NameROS(NameStr);
+    formatted_raw_ostream NameOS(NameROS);
+    PrintLLVMName(NameOS, TI->first, LocalPrefix);
+    NameOS.flush();
+    TP.addTypeName(Ty, NameStr);
+  }
+
+  // Walk the entire module to find references to unnamed structure and opaque
+  // types.  This is required for correctness by opaque types (because multiple
+  // uses of an unnamed opaque type needs to be referred to by the same ID) and
+  // it shrinks complex recursive structure types substantially in some cases.
+  TypeFinder(TP, NumberedTypes).Run(*M);
+}
+
+
+/// WriteTypeSymbolic - This attempts to write the specified type as a symbolic
+/// type, iff there is an entry in the modules symbol table for the specified
+/// type or one of it's component types.
+///
+void llvm::WriteTypeSymbolic(raw_ostream &OS, const Type *Ty, const Module *M) {
+  TypePrinting Printer;
+  std::vector<const Type*> NumberedTypes;
+  AddModuleTypesToPrinter(Printer, NumberedTypes, M);
+  Printer.print(Ty, OS);
+}
+
+//===----------------------------------------------------------------------===//
+// SlotTracker Class: Enumerate slot numbers for unnamed values
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// This class provides computation of slot numbers for LLVM Assembly writing.
+///
+class SlotTracker {
+public:
+  /// ValueMap - A mapping of Values to slot numbers.
+  typedef DenseMap<const Value*, unsigned> ValueMap;
+
+private:
+  /// TheModule - The module for which we are holding slot numbers.
+  const Module* TheModule;
+
+  /// TheFunction - The function for which we are holding slot numbers.
+  const Function* TheFunction;
+  bool FunctionProcessed;
+
+  /// mMap - The TypePlanes map for the module level data.
+  ValueMap mMap;
+  unsigned mNext;
+
+  /// fMap - The TypePlanes map for the function level data.
+  ValueMap fMap;
+  unsigned fNext;
+
+  /// mdnMap - Map for MDNodes.
+  DenseMap<const MDNode*, unsigned> mdnMap;
+  unsigned mdnNext;
+public:
+  /// Construct from a module
+  explicit SlotTracker(const Module *M);
+  /// Construct from a function, starting out in incorp state.
+  explicit SlotTracker(const Function *F);
+
+  /// Return the slot number of the specified value in it's type
+  /// plane.  If something is not in the SlotTracker, return -1.
+  int getLocalSlot(const Value *V);
+  int getGlobalSlot(const GlobalValue *V);
+  int getMetadataSlot(const MDNode *N);
+
+  /// If you'd like to deal with a function instead of just a module, use
+  /// this method to get its data into the SlotTracker.
+  void incorporateFunction(const Function *F) {
+    TheFunction = F;
+    FunctionProcessed = false;
+  }
+
+  /// After calling incorporateFunction, use this method to remove the
+  /// most recently incorporated function from the SlotTracker. This
+  /// will reset the state of the machine back to just the module contents.
+  void purgeFunction();
+
+  /// MDNode map iterators.
+  typedef DenseMap<const MDNode*, unsigned>::iterator mdn_iterator;
+  mdn_iterator mdn_begin() { return mdnMap.begin(); }
+  mdn_iterator mdn_end() { return mdnMap.end(); }
+  unsigned mdn_size() const { return mdnMap.size(); }
+  bool mdn_empty() const { return mdnMap.empty(); }
+
+  /// This function does the actual initialization.
+  inline void initialize();
+
+  // Implementation Details
+private:
+  /// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
+  void CreateModuleSlot(const GlobalValue *V);
+
+  /// CreateMetadataSlot - Insert the specified MDNode* into the slot table.
+  void CreateMetadataSlot(const MDNode *N);
+
+  /// CreateFunctionSlot - Insert the specified Value* into the slot table.
+  void CreateFunctionSlot(const Value *V);
+
+  /// Add all of the module level global variables (and their initializers)
+  /// and function declarations, but not the contents of those functions.
+  void processModule();
+
+  /// Add all of the functions arguments, basic blocks, and instructions.
+  void processFunction();
+
+  SlotTracker(const SlotTracker &);  // DO NOT IMPLEMENT
+  void operator=(const SlotTracker &);  // DO NOT IMPLEMENT
+};
+
+}  // end anonymous namespace
+
+
+static SlotTracker *createSlotTracker(const Value *V) {
+  if (const Argument *FA = dyn_cast<Argument>(V))
+    return new SlotTracker(FA->getParent());
+
+  if (const Instruction *I = dyn_cast<Instruction>(V))
+    return new SlotTracker(I->getParent()->getParent());
+
+  if (const BasicBlock *BB = dyn_cast<BasicBlock>(V))
+    return new SlotTracker(BB->getParent());
+
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
+    return new SlotTracker(GV->getParent());
+
+  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
+    return new SlotTracker(GA->getParent());
+
+  if (const Function *Func = dyn_cast<Function>(V))
+    return new SlotTracker(Func);
+
+  if (const MDNode *MD = dyn_cast<MDNode>(V)) {
+    if (!MD->isFunctionLocal())
+      return new SlotTracker(MD->getFunction());
+
+    return new SlotTracker((Function *)0);
+  }
+
+  return 0;
+}
+
+#if 0
+#define ST_DEBUG(X) dbgs() << X
+#else
+#define ST_DEBUG(X)
+#endif
+
+// Module level constructor. Causes the contents of the Module (sans functions)
+// to be added to the slot table.
+SlotTracker::SlotTracker(const Module *M)
+  : TheModule(M), TheFunction(0), FunctionProcessed(false), 
+    mNext(0), fNext(0),  mdnNext(0) {
+}
+
+// Function level constructor. Causes the contents of the Module and the one
+// function provided to be added to the slot table.
+SlotTracker::SlotTracker(const Function *F)
+  : TheModule(F ? F->getParent() : 0), TheFunction(F), FunctionProcessed(false),
+    mNext(0), fNext(0), mdnNext(0) {
+}
+
+inline void SlotTracker::initialize() {
+  if (TheModule) {
+    processModule();
+    TheModule = 0; ///< Prevent re-processing next time we're called.
+  }
+
+  if (TheFunction && !FunctionProcessed)
+    processFunction();
+}
+
+// Iterate through all the global variables, functions, and global
+// variable initializers and create slots for them.
+void SlotTracker::processModule() {
+  ST_DEBUG("begin processModule!\n");
+
+  // Add all of the unnamed global variables to the value table.
+  for (Module::const_global_iterator I = TheModule->global_begin(),
+         E = TheModule->global_end(); I != E; ++I) {
+    if (!I->hasName())
+      CreateModuleSlot(I);
+  }
+
+  // Add metadata used by named metadata.
+  for (Module::const_named_metadata_iterator
+         I = TheModule->named_metadata_begin(),
+         E = TheModule->named_metadata_end(); I != E; ++I) {
+    const NamedMDNode *NMD = I;
+    for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+      CreateMetadataSlot(NMD->getOperand(i));
+  }
+
+  // Add all the unnamed functions to the table.
+  for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+       I != E; ++I)
+    if (!I->hasName())
+      CreateModuleSlot(I);
+
+  ST_DEBUG("end processModule!\n");
+}
+
+// Process the arguments, basic blocks, and instructions  of a function.
+void SlotTracker::processFunction() {
+  ST_DEBUG("begin processFunction!\n");
+  fNext = 0;
+
+  // Add all the function arguments with no names.
+  for(Function::const_arg_iterator AI = TheFunction->arg_begin(),
+      AE = TheFunction->arg_end(); AI != AE; ++AI)
+    if (!AI->hasName())
+      CreateFunctionSlot(AI);
+
+  ST_DEBUG("Inserting Instructions:\n");
+
+  SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+
+  // Add all of the basic blocks and instructions with no names.
+  for (Function::const_iterator BB = TheFunction->begin(),
+       E = TheFunction->end(); BB != E; ++BB) {
+    if (!BB->hasName())
+      CreateFunctionSlot(BB);
+    
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E;
+         ++I) {
+      if (!I->getType()->isVoidTy() && !I->hasName())
+        CreateFunctionSlot(I);
+      
+      // Intrinsics can directly use metadata.  We allow direct calls to any
+      // llvm.foo function here, because the target may not be linked into the
+      // optimizer.
+      if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+        if (Function *F = CI->getCalledFunction())
+          if (F->getName().startswith("llvm."))
+            for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+              if (MDNode *N = dyn_cast_or_null<MDNode>(I->getOperand(i)))
+                CreateMetadataSlot(N);
+      }
+
+      // Process metadata attached with this instruction.
+      I->getAllMetadata(MDForInst);
+      for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
+        CreateMetadataSlot(MDForInst[i].second);
+      MDForInst.clear();
+    }
+  }
+
+  FunctionProcessed = true;
+
+  ST_DEBUG("end processFunction!\n");
+}
+
+/// Clean up after incorporating a function. This is the only way to get out of
+/// the function incorporation state that affects get*Slot/Create*Slot. Function
+/// incorporation state is indicated by TheFunction != 0.
+void SlotTracker::purgeFunction() {
+  ST_DEBUG("begin purgeFunction!\n");
+  fMap.clear(); // Simply discard the function level map
+  TheFunction = 0;
+  FunctionProcessed = false;
+  ST_DEBUG("end purgeFunction!\n");
+}
+
+/// getGlobalSlot - Get the slot number of a global value.
+int SlotTracker::getGlobalSlot(const GlobalValue *V) {
+  // Check for uninitialized state and do lazy initialization.
+  initialize();
+
+  // Find the type plane in the module map
+  ValueMap::iterator MI = mMap.find(V);
+  return MI == mMap.end() ? -1 : (int)MI->second;
+}
+
+/// getMetadataSlot - Get the slot number of a MDNode.
+int SlotTracker::getMetadataSlot(const MDNode *N) {
+  // Check for uninitialized state and do lazy initialization.
+  initialize();
+
+  // Find the type plane in the module map
+  mdn_iterator MI = mdnMap.find(N);
+  return MI == mdnMap.end() ? -1 : (int)MI->second;
+}
+
+
+/// getLocalSlot - Get the slot number for a value that is local to a function.
+int SlotTracker::getLocalSlot(const Value *V) {
+  assert(!isa<Constant>(V) && "Can't get a constant or global slot with this!");
+
+  // Check for uninitialized state and do lazy initialization.
+  initialize();
+
+  ValueMap::iterator FI = fMap.find(V);
+  return FI == fMap.end() ? -1 : (int)FI->second;
+}
+
+
+/// CreateModuleSlot - Insert the specified GlobalValue* into the slot table.
+void SlotTracker::CreateModuleSlot(const GlobalValue *V) {
+  assert(V && "Can't insert a null Value into SlotTracker!");
+  assert(!V->getType()->isVoidTy() && "Doesn't need a slot!");
+  assert(!V->hasName() && "Doesn't need a slot!");
+
+  unsigned DestSlot = mNext++;
+  mMap[V] = DestSlot;
+
+  ST_DEBUG("  Inserting value [" << V->getType() << "] = " << V << " slot=" <<
+           DestSlot << " [");
+  // G = Global, F = Function, A = Alias, o = other
+  ST_DEBUG((isa<GlobalVariable>(V) ? 'G' :
+            (isa<Function>(V) ? 'F' :
+             (isa<GlobalAlias>(V) ? 'A' : 'o'))) << "]\n");
+}
+
+/// CreateSlot - Create a new slot for the specified value if it has no name.
+void SlotTracker::CreateFunctionSlot(const Value *V) {
+  assert(!V->getType()->isVoidTy() && !V->hasName() && "Doesn't need a slot!");
+
+  unsigned DestSlot = fNext++;
+  fMap[V] = DestSlot;
+
+  // G = Global, F = Function, o = other
+  ST_DEBUG("  Inserting value [" << V->getType() << "] = " << V << " slot=" <<
+           DestSlot << " [o]\n");
+}
+
+/// CreateModuleSlot - Insert the specified MDNode* into the slot table.
+void SlotTracker::CreateMetadataSlot(const MDNode *N) {
+  assert(N && "Can't insert a null Value into SlotTracker!");
+
+  // Don't insert if N is a function-local metadata, these are always printed
+  // inline.
+  if (!N->isFunctionLocal()) {
+    mdn_iterator I = mdnMap.find(N);
+    if (I != mdnMap.end())
+      return;
+
+    unsigned DestSlot = mdnNext++;
+    mdnMap[N] = DestSlot;
+  }
+
+  // Recursively add any MDNodes referenced by operands.
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+    if (const MDNode *Op = dyn_cast_or_null<MDNode>(N->getOperand(i)))
+      CreateMetadataSlot(Op);
+}
+
+//===----------------------------------------------------------------------===//
+// AsmWriter Implementation
+//===----------------------------------------------------------------------===//
+
+static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
+                                   TypePrinting *TypePrinter,
+                                   SlotTracker *Machine,
+                                   const Module *Context);
+
+
+
+static const char *getPredicateText(unsigned predicate) {
+  const char * pred = "unknown";
+  switch (predicate) {
+  case FCmpInst::FCMP_FALSE: pred = "false"; break;
+  case FCmpInst::FCMP_OEQ:   pred = "oeq"; break;
+  case FCmpInst::FCMP_OGT:   pred = "ogt"; break;
+  case FCmpInst::FCMP_OGE:   pred = "oge"; break;
+  case FCmpInst::FCMP_OLT:   pred = "olt"; break;
+  case FCmpInst::FCMP_OLE:   pred = "ole"; break;
+  case FCmpInst::FCMP_ONE:   pred = "one"; break;
+  case FCmpInst::FCMP_ORD:   pred = "ord"; break;
+  case FCmpInst::FCMP_UNO:   pred = "uno"; break;
+  case FCmpInst::FCMP_UEQ:   pred = "ueq"; break;
+  case FCmpInst::FCMP_UGT:   pred = "ugt"; break;
+  case FCmpInst::FCMP_UGE:   pred = "uge"; break;
+  case FCmpInst::FCMP_ULT:   pred = "ult"; break;
+  case FCmpInst::FCMP_ULE:   pred = "ule"; break;
+  case FCmpInst::FCMP_UNE:   pred = "une"; break;
+  case FCmpInst::FCMP_TRUE:  pred = "true"; break;
+  case ICmpInst::ICMP_EQ:    pred = "eq"; break;
+  case ICmpInst::ICMP_NE:    pred = "ne"; break;
+  case ICmpInst::ICMP_SGT:   pred = "sgt"; break;
+  case ICmpInst::ICMP_SGE:   pred = "sge"; break;
+  case ICmpInst::ICMP_SLT:   pred = "slt"; break;
+  case ICmpInst::ICMP_SLE:   pred = "sle"; break;
+  case ICmpInst::ICMP_UGT:   pred = "ugt"; break;
+  case ICmpInst::ICMP_UGE:   pred = "uge"; break;
+  case ICmpInst::ICMP_ULT:   pred = "ult"; break;
+  case ICmpInst::ICMP_ULE:   pred = "ule"; break;
+  }
+  return pred;
+}
+
+
+static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
+  if (const OverflowingBinaryOperator *OBO =
+        dyn_cast<OverflowingBinaryOperator>(U)) {
+    if (OBO->hasNoUnsignedWrap())
+      Out << " nuw";
+    if (OBO->hasNoSignedWrap())
+      Out << " nsw";
+  } else if (const PossiblyExactOperator *Div =
+               dyn_cast<PossiblyExactOperator>(U)) {
+    if (Div->isExact())
+      Out << " exact";
+  } else if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
+    if (GEP->isInBounds())
+      Out << " inbounds";
+  }
+}
+
+static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
+                                  TypePrinting &TypePrinter,
+                                  SlotTracker *Machine,
+                                  const Module *Context) {
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+    if (CI->getType()->isIntegerTy(1)) {
+      Out << (CI->getZExtValue() ? "true" : "false");
+      return;
+    }
+    Out << CI->getValue();
+    return;
+  }
+
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+    if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble ||
+        &CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle) {
+      // We would like to output the FP constant value in exponential notation,
+      // but we cannot do this if doing so will lose precision.  Check here to
+      // make sure that we only output it in exponential format if we can parse
+      // the value back and get the same value.
+      //
+      bool ignored;
+      bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
+      double Val = isDouble ? CFP->getValueAPF().convertToDouble() :
+                              CFP->getValueAPF().convertToFloat();
+      SmallString<128> StrVal;
+      raw_svector_ostream(StrVal) << Val;
+
+      // Check to make sure that the stringized number is not some string like
+      // "Inf" or NaN, that atof will accept, but the lexer will not.  Check
+      // that the string matches the "[-+]?[0-9]" regex.
+      //
+      if ((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+          ((StrVal[0] == '-' || StrVal[0] == '+') &&
+           (StrVal[1] >= '0' && StrVal[1] <= '9'))) {
+        // Reparse stringized version!
+        if (atof(StrVal.c_str()) == Val) {
+          Out << StrVal.str();
+          return;
+        }
+      }
+      // Otherwise we could not reparse it to exactly the same value, so we must
+      // output the string in hexadecimal format!  Note that loading and storing
+      // floating point types changes the bits of NaNs on some hosts, notably
+      // x86, so we must not use these types.
+      assert(sizeof(double) == sizeof(uint64_t) &&
+             "assuming that double is 64 bits!");
+      char Buffer[40];
+      APFloat apf = CFP->getValueAPF();
+      // Floats are represented in ASCII IR as double, convert.
+      if (!isDouble)
+        apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+                          &ignored);
+      Out << "0x" <<
+              utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()),
+                            Buffer+40);
+      return;
+    }
+
+    // Some form of long double.  These appear as a magic letter identifying
+    // the type, then a fixed number of hex digits.
+    Out << "0x";
+    if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended) {
+      Out << 'K';
+      // api needed to prevent premature destruction
+      APInt api = CFP->getValueAPF().bitcastToAPInt();
+      const uint64_t* p = api.getRawData();
+      uint64_t word = p[1];
+      int shiftcount=12;
+      int width = api.getBitWidth();
+      for (int j=0; j<width; j+=4, shiftcount-=4) {
+        unsigned int nibble = (word>>shiftcount) & 15;
+        if (nibble < 10)
+          Out << (unsigned char)(nibble + '0');
+        else
+          Out << (unsigned char)(nibble - 10 + 'A');
+        if (shiftcount == 0 && j+4 < width) {
+          word = *p;
+          shiftcount = 64;
+          if (width-j-4 < 64)
+            shiftcount = width-j-4;
+        }
+      }
+      return;
+    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad)
+      Out << 'L';
+    else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble)
+      Out << 'M';
+    else
+      llvm_unreachable("Unsupported floating point type");
+    // api needed to prevent premature destruction
+    APInt api = CFP->getValueAPF().bitcastToAPInt();
+    const uint64_t* p = api.getRawData();
+    uint64_t word = *p;
+    int shiftcount=60;
+    int width = api.getBitWidth();
+    for (int j=0; j<width; j+=4, shiftcount-=4) {
+      unsigned int nibble = (word>>shiftcount) & 15;
+      if (nibble < 10)
+        Out << (unsigned char)(nibble + '0');
+      else
+        Out << (unsigned char)(nibble - 10 + 'A');
+      if (shiftcount == 0 && j+4 < width) {
+        word = *(++p);
+        shiftcount = 64;
+        if (width-j-4 < 64)
+          shiftcount = width-j-4;
+      }
+    }
+    return;
+  }
+
+  if (isa<ConstantAggregateZero>(CV)) {
+    Out << "zeroinitializer";
+    return;
+  }
+  
+  if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
+    Out << "blockaddress(";
+    WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine,
+                           Context);
+    Out << ", ";
+    WriteAsOperandInternal(Out, BA->getBasicBlock(), &TypePrinter, Machine,
+                           Context);
+    Out << ")";
+    return;
+  }
+
+  if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+    // As a special case, print the array as a string if it is an array of
+    // i8 with ConstantInt values.
+    //
+    const Type *ETy = CA->getType()->getElementType();
+    if (CA->isString()) {
+      Out << "c\"";
+      PrintEscapedString(CA->getAsString(), Out);
+      Out << '"';
+    } else {                // Cannot output in string format...
+      Out << '[';
+      if (CA->getNumOperands()) {
+        TypePrinter.print(ETy, Out);
+        Out << ' ';
+        WriteAsOperandInternal(Out, CA->getOperand(0),
+                               &TypePrinter, Machine,
+                               Context);
+        for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) {
+          Out << ", ";
+          TypePrinter.print(ETy, Out);
+          Out << ' ';
+          WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine,
+                                 Context);
+        }
+      }
+      Out << ']';
+    }
+    return;
+  }
+
+  if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
+    if (CS->getType()->isPacked())
+      Out << '<';
+    Out << '{';
+    unsigned N = CS->getNumOperands();
+    if (N) {
+      Out << ' ';
+      TypePrinter.print(CS->getOperand(0)->getType(), Out);
+      Out << ' ';
+
+      WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine,
+                             Context);
+
+      for (unsigned i = 1; i < N; i++) {
+        Out << ", ";
+        TypePrinter.print(CS->getOperand(i)->getType(), Out);
+        Out << ' ';
+
+        WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine,
+                               Context);
+      }
+      Out << ' ';
+    }
+
+    Out << '}';
+    if (CS->getType()->isPacked())
+      Out << '>';
+    return;
+  }
+
+  if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+    const Type *ETy = CP->getType()->getElementType();
+    assert(CP->getNumOperands() > 0 &&
+           "Number of operands for a PackedConst must be > 0");
+    Out << '<';
+    TypePrinter.print(ETy, Out);
+    Out << ' ';
+    WriteAsOperandInternal(Out, CP->getOperand(0), &TypePrinter, Machine,
+                           Context);
+    for (unsigned i = 1, e = CP->getNumOperands(); i != e; ++i) {
+      Out << ", ";
+      TypePrinter.print(ETy, Out);
+      Out << ' ';
+      WriteAsOperandInternal(Out, CP->getOperand(i), &TypePrinter, Machine,
+                             Context);
+    }
+    Out << '>';
+    return;
+  }
+
+  if (isa<ConstantPointerNull>(CV)) {
+    Out << "null";
+    return;
+  }
+
+  if (isa<UndefValue>(CV)) {
+    Out << "undef";
+    return;
+  }
+
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+    Out << CE->getOpcodeName();
+    WriteOptimizationInfo(Out, CE);
+    if (CE->isCompare())
+      Out << ' ' << getPredicateText(CE->getPredicate());
+    Out << " (";
+
+    for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) {
+      TypePrinter.print((*OI)->getType(), Out);
+      Out << ' ';
+      WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine, Context);
+      if (OI+1 != CE->op_end())
+        Out << ", ";
+    }
+
+    if (CE->hasIndices()) {
+      const SmallVector<unsigned, 4> &Indices = CE->getIndices();
+      for (unsigned i = 0, e = Indices.size(); i != e; ++i)
+        Out << ", " << Indices[i];
+    }
+
+    if (CE->isCast()) {
+      Out << " to ";
+      TypePrinter.print(CE->getType(), Out);
+    }
+
+    Out << ')';
+    return;
+  }
+
+  Out << "<placeholder or erroneous Constant>";
+}
+
+static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node,
+                                    TypePrinting *TypePrinter,
+                                    SlotTracker *Machine,
+                                    const Module *Context) {
+  Out << "!{";
+  for (unsigned mi = 0, me = Node->getNumOperands(); mi != me; ++mi) {
+    const Value *V = Node->getOperand(mi);
+    if (V == 0)
+      Out << "null";
+    else {
+      TypePrinter->print(V->getType(), Out);
+      Out << ' ';
+      WriteAsOperandInternal(Out, Node->getOperand(mi), 
+                             TypePrinter, Machine, Context);
+    }
+    if (mi + 1 != me)
+      Out << ", ";
+  }
+  
+  Out << "}";
+}
+
+
+/// WriteAsOperand - Write the name of the specified value out to the specified
+/// ostream.  This can be useful when you just want to print int %reg126, not
+/// the whole instruction that generated it.
+///
+static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
+                                   TypePrinting *TypePrinter,
+                                   SlotTracker *Machine,
+                                   const Module *Context) {
+  if (V->hasName()) {
+    PrintLLVMName(Out, V);
+    return;
+  }
+
+  const Constant *CV = dyn_cast<Constant>(V);
+  if (CV && !isa<GlobalValue>(CV)) {
+    assert(TypePrinter && "Constants require TypePrinting!");
+    WriteConstantInternal(Out, CV, *TypePrinter, Machine, Context);
+    return;
+  }
+
+  if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+    Out << "asm ";
+    if (IA->hasSideEffects())
+      Out << "sideeffect ";
+    if (IA->isAlignStack())
+      Out << "alignstack ";
+    Out << '"';
+    PrintEscapedString(IA->getAsmString(), Out);
+    Out << "\", \"";
+    PrintEscapedString(IA->getConstraintString(), Out);
+    Out << '"';
+    return;
+  }
+
+  if (const MDNode *N = dyn_cast<MDNode>(V)) {
+    if (N->isFunctionLocal()) {
+      // Print metadata inline, not via slot reference number.
+      WriteMDNodeBodyInternal(Out, N, TypePrinter, Machine, Context);
+      return;
+    }
+  
+    if (!Machine) {
+      if (N->isFunctionLocal())
+        Machine = new SlotTracker(N->getFunction());
+      else
+        Machine = new SlotTracker(Context);
+    }
+    int Slot = Machine->getMetadataSlot(N);
+    if (Slot == -1)
+      Out << "<badref>";
+    else
+      Out << '!' << Slot;
+    return;
+  }
+
+  if (const MDString *MDS = dyn_cast<MDString>(V)) {
+    Out << "!\"";
+    PrintEscapedString(MDS->getString(), Out);
+    Out << '"';
+    return;
+  }
+
+  if (V->getValueID() == Value::PseudoSourceValueVal ||
+      V->getValueID() == Value::FixedStackPseudoSourceValueVal) {
+    V->print(Out);
+    return;
+  }
+
+  char Prefix = '%';
+  int Slot;
+  if (Machine) {
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+      Slot = Machine->getGlobalSlot(GV);
+      Prefix = '@';
+    } else {
+      Slot = Machine->getLocalSlot(V);
+    }
+  } else {
+    Machine = createSlotTracker(V);
+    if (Machine) {
+      if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+        Slot = Machine->getGlobalSlot(GV);
+        Prefix = '@';
+      } else {
+        Slot = Machine->getLocalSlot(V);
+      }
+      delete Machine;
+    } else {
+      Slot = -1;
+    }
+  }
+
+  if (Slot != -1)
+    Out << Prefix << Slot;
+  else
+    Out << "<badref>";
+}
+
+void llvm::WriteAsOperand(raw_ostream &Out, const Value *V,
+                          bool PrintType, const Module *Context) {
+
+  // Fast path: Don't construct and populate a TypePrinting object if we
+  // won't be needing any types printed.
+  if (!PrintType &&
+      ((!isa<Constant>(V) && !isa<MDNode>(V)) ||
+       V->hasName() || isa<GlobalValue>(V))) {
+    WriteAsOperandInternal(Out, V, 0, 0, Context);
+    return;
+  }
+
+  if (Context == 0) Context = getModuleFromVal(V);
+
+  TypePrinting TypePrinter;
+  std::vector<const Type*> NumberedTypes;
+  AddModuleTypesToPrinter(TypePrinter, NumberedTypes, Context);
+  if (PrintType) {
+    TypePrinter.print(V->getType(), Out);
+    Out << ' ';
+  }
+
+  WriteAsOperandInternal(Out, V, &TypePrinter, 0, Context);
+}
+
+namespace {
+
+class AssemblyWriter {
+  formatted_raw_ostream &Out;
+  SlotTracker &Machine;
+  const Module *TheModule;
+  TypePrinting TypePrinter;
+  AssemblyAnnotationWriter *AnnotationWriter;
+  std::vector<const Type*> NumberedTypes;
+  
+public:
+  inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
+                        const Module *M,
+                        AssemblyAnnotationWriter *AAW)
+    : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) {
+    AddModuleTypesToPrinter(TypePrinter, NumberedTypes, M);
+  }
+
+  void printMDNodeBody(const MDNode *MD);
+  void printNamedMDNode(const NamedMDNode *NMD);
+  
+  void printModule(const Module *M);
+
+  void writeOperand(const Value *Op, bool PrintType);
+  void writeParamOperand(const Value *Operand, Attributes Attrs);
+
+  void writeAllMDNodes();
+
+  void printTypeSymbolTable(const TypeSymbolTable &ST);
+  void printGlobal(const GlobalVariable *GV);
+  void printAlias(const GlobalAlias *GV);
+  void printFunction(const Function *F);
+  void printArgument(const Argument *FA, Attributes Attrs);
+  void printBasicBlock(const BasicBlock *BB);
+  void printInstruction(const Instruction &I);
+
+private:
+  // printInfoComment - Print a little comment after the instruction indicating
+  // which slot it occupies.
+  void printInfoComment(const Value &V);
+};
+}  // end of anonymous namespace
+
+void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) {
+  if (Operand == 0) {
+    Out << "<null operand!>";
+    return;
+  }
+  if (PrintType) {
+    TypePrinter.print(Operand->getType(), Out);
+    Out << ' ';
+  }
+  WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
+}
+
+void AssemblyWriter::writeParamOperand(const Value *Operand,
+                                       Attributes Attrs) {
+  if (Operand == 0) {
+    Out << "<null operand!>";
+    return;
+  }
+
+  // Print the type
+  TypePrinter.print(Operand->getType(), Out);
+  // Print parameter attributes list
+  if (Attrs != Attribute::None)
+    Out << ' ' << Attribute::getAsString(Attrs);
+  Out << ' ';
+  // Print the operand
+  WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule);
+}
+
+void AssemblyWriter::printModule(const Module *M) {
+  if (!M->getModuleIdentifier().empty() &&
+      // Don't print the ID if it will start a new line (which would
+      // require a comment char before it).
+      M->getModuleIdentifier().find('\n') == std::string::npos)
+    Out << "; ModuleID = '" << M->getModuleIdentifier() << "'\n";
+
+  if (!M->getDataLayout().empty())
+    Out << "target datalayout = \"" << M->getDataLayout() << "\"\n";
+  if (!M->getTargetTriple().empty())
+    Out << "target triple = \"" << M->getTargetTriple() << "\"\n";
+
+  if (!M->getModuleInlineAsm().empty()) {
+    // Split the string into lines, to make it easier to read the .ll file.
+    std::string Asm = M->getModuleInlineAsm();
+    size_t CurPos = 0;
+    size_t NewLine = Asm.find_first_of('\n', CurPos);
+    Out << '\n';
+    while (NewLine != std::string::npos) {
+      // We found a newline, print the portion of the asm string from the
+      // last newline up to this newline.
+      Out << "module asm \"";
+      PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine),
+                         Out);
+      Out << "\"\n";
+      CurPos = NewLine+1;
+      NewLine = Asm.find_first_of('\n', CurPos);
+    }
+    std::string rest(Asm.begin()+CurPos, Asm.end());
+    if (!rest.empty()) {
+      Out << "module asm \"";
+      PrintEscapedString(rest, Out);
+      Out << "\"\n";
+    }
+  }
+
+  // Loop over the dependent libraries and emit them.
+  Module::lib_iterator LI = M->lib_begin();
+  Module::lib_iterator LE = M->lib_end();
+  if (LI != LE) {
+    Out << '\n';
+    Out << "deplibs = [ ";
+    while (LI != LE) {
+      Out << '"' << *LI << '"';
+      ++LI;
+      if (LI != LE)
+        Out << ", ";
+    }
+    Out << " ]";
+  }
+
+  // Loop over the symbol table, emitting all id'd types.
+  if (!M->getTypeSymbolTable().empty() || !NumberedTypes.empty()) Out << '\n';
+  printTypeSymbolTable(M->getTypeSymbolTable());
+
+  // Output all globals.
+  if (!M->global_empty()) Out << '\n';
+  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I)
+    printGlobal(I);
+
+  // Output all aliases.
+  if (!M->alias_empty()) Out << "\n";
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    printAlias(I);
+
+  // Output all of the functions.
+  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I)
+    printFunction(I);
+
+  // Output named metadata.
+  if (!M->named_metadata_empty()) Out << '\n';
+  
+  for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+       E = M->named_metadata_end(); I != E; ++I)
+    printNamedMDNode(I);
+
+  // Output metadata.
+  if (!Machine.mdn_empty()) {
+    Out << '\n';
+    writeAllMDNodes();
+  }
+}
+
+void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
+  Out << "!" << NMD->getName() << " = !{";
+  for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+    if (i) Out << ", ";
+    int Slot = Machine.getMetadataSlot(NMD->getOperand(i));
+    if (Slot == -1)
+      Out << "<badref>";
+    else
+      Out << '!' << Slot;
+  }
+  Out << "}\n";
+}
+
+
+static void PrintLinkage(GlobalValue::LinkageTypes LT,
+                         formatted_raw_ostream &Out) {
+  switch (LT) {
+  case GlobalValue::ExternalLinkage: break;
+  case GlobalValue::PrivateLinkage:       Out << "private ";        break;
+  case GlobalValue::LinkerPrivateLinkage: Out << "linker_private "; break;
+  case GlobalValue::LinkerPrivateWeakLinkage:
+    Out << "linker_private_weak ";
+    break;
+  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+    Out << "linker_private_weak_def_auto ";
+    break;
+  case GlobalValue::InternalLinkage:      Out << "internal ";       break;
+  case GlobalValue::LinkOnceAnyLinkage:   Out << "linkonce ";       break;
+  case GlobalValue::LinkOnceODRLinkage:   Out << "linkonce_odr ";   break;
+  case GlobalValue::WeakAnyLinkage:       Out << "weak ";           break;
+  case GlobalValue::WeakODRLinkage:       Out << "weak_odr ";       break;
+  case GlobalValue::CommonLinkage:        Out << "common ";         break;
+  case GlobalValue::AppendingLinkage:     Out << "appending ";      break;
+  case GlobalValue::DLLImportLinkage:     Out << "dllimport ";      break;
+  case GlobalValue::DLLExportLinkage:     Out << "dllexport ";      break;
+  case GlobalValue::ExternalWeakLinkage:  Out << "extern_weak ";    break;
+  case GlobalValue::AvailableExternallyLinkage:
+    Out << "available_externally ";
+    break;
+  }
+}
+
+
+static void PrintVisibility(GlobalValue::VisibilityTypes Vis,
+                            formatted_raw_ostream &Out) {
+  switch (Vis) {
+  case GlobalValue::DefaultVisibility: break;
+  case GlobalValue::HiddenVisibility:    Out << "hidden "; break;
+  case GlobalValue::ProtectedVisibility: Out << "protected "; break;
+  }
+}
+
+void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
+  if (GV->isMaterializable())
+    Out << "; Materializable\n";
+
+  WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine, GV->getParent());
+  Out << " = ";
+
+  if (!GV->hasInitializer() && GV->hasExternalLinkage())
+    Out << "external ";
+
+  PrintLinkage(GV->getLinkage(), Out);
+  PrintVisibility(GV->getVisibility(), Out);
+
+  if (GV->isThreadLocal()) Out << "thread_local ";
+  if (unsigned AddressSpace = GV->getType()->getAddressSpace())
+    Out << "addrspace(" << AddressSpace << ") ";
+  if (GV->hasUnnamedAddr()) Out << "unnamed_addr ";
+  Out << (GV->isConstant() ? "constant " : "global ");
+  TypePrinter.print(GV->getType()->getElementType(), Out);
+
+  if (GV->hasInitializer()) {
+    Out << ' ';
+    writeOperand(GV->getInitializer(), false);
+  }
+
+  if (GV->hasSection()) {
+    Out << ", section \"";
+    PrintEscapedString(GV->getSection(), Out);
+    Out << '"';
+  }
+  if (GV->getAlignment())
+    Out << ", align " << GV->getAlignment();
+
+  printInfoComment(*GV);
+  Out << '\n';
+}
+
+void AssemblyWriter::printAlias(const GlobalAlias *GA) {
+  if (GA->isMaterializable())
+    Out << "; Materializable\n";
+
+  // Don't crash when dumping partially built GA
+  if (!GA->hasName())
+    Out << "<<nameless>> = ";
+  else {
+    PrintLLVMName(Out, GA);
+    Out << " = ";
+  }
+  PrintVisibility(GA->getVisibility(), Out);
+
+  Out << "alias ";
+
+  PrintLinkage(GA->getLinkage(), Out);
+
+  const Constant *Aliasee = GA->getAliasee();
+
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Aliasee)) {
+    TypePrinter.print(GV->getType(), Out);
+    Out << ' ';
+    PrintLLVMName(Out, GV);
+  } else if (const Function *F = dyn_cast<Function>(Aliasee)) {
+    TypePrinter.print(F->getFunctionType(), Out);
+    Out << "* ";
+
+    WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
+  } else if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(Aliasee)) {
+    TypePrinter.print(GA->getType(), Out);
+    Out << ' ';
+    PrintLLVMName(Out, GA);
+  } else {
+    const ConstantExpr *CE = cast<ConstantExpr>(Aliasee);
+    // The only valid GEP is an all zero GEP.
+    assert((CE->getOpcode() == Instruction::BitCast ||
+            CE->getOpcode() == Instruction::GetElementPtr) &&
+           "Unsupported aliasee");
+    writeOperand(CE, false);
+  }
+
+  printInfoComment(*GA);
+  Out << '\n';
+}
+
+void AssemblyWriter::printTypeSymbolTable(const TypeSymbolTable &ST) {
+  // Emit all numbered types.
+  for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) {
+    Out << '%' << i << " = type ";
+
+    // Make sure we print out at least one level of the type structure, so
+    // that we do not get %2 = type %2
+    TypePrinter.printAtLeastOneLevel(NumberedTypes[i], Out);
+    Out << '\n';
+  }
+
+  // Print the named types.
+  for (TypeSymbolTable::const_iterator TI = ST.begin(), TE = ST.end();
+       TI != TE; ++TI) {
+    PrintLLVMName(Out, TI->first, LocalPrefix);
+    Out << " = type ";
+
+    // Make sure we print out at least one level of the type structure, so
+    // that we do not get %FILE = type %FILE
+    TypePrinter.printAtLeastOneLevel(TI->second, Out);
+    Out << '\n';
+  }
+}
+
+/// printFunction - Print all aspects of a function.
+///
+void AssemblyWriter::printFunction(const Function *F) {
+  // Print out the return type and name.
+  Out << '\n';
+
+  if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out);
+
+  if (F->isMaterializable())
+    Out << "; Materializable\n";
+
+  if (F->isDeclaration())
+    Out << "declare ";
+  else
+    Out << "define ";
+
+  PrintLinkage(F->getLinkage(), Out);
+  PrintVisibility(F->getVisibility(), Out);
+
+  // Print the calling convention.
+  switch (F->getCallingConv()) {
+  case CallingConv::C: break;   // default
+  case CallingConv::Fast:         Out << "fastcc "; break;
+  case CallingConv::Cold:         Out << "coldcc "; break;
+  case CallingConv::X86_StdCall:  Out << "x86_stdcallcc "; break;
+  case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break;
+  case CallingConv::X86_ThisCall: Out << "x86_thiscallcc "; break;
+  case CallingConv::ARM_APCS:     Out << "arm_apcscc "; break;
+  case CallingConv::ARM_AAPCS:    Out << "arm_aapcscc "; break;
+  case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break;
+  case CallingConv::MSP430_INTR:  Out << "msp430_intrcc "; break;
+  case CallingConv::PTX_Kernel:   Out << "ptx_kernel "; break;
+  case CallingConv::PTX_Device:   Out << "ptx_device "; break;
+  default: Out << "cc" << F->getCallingConv() << " "; break;
+  }
+
+  const FunctionType *FT = F->getFunctionType();
+  const AttrListPtr &Attrs = F->getAttributes();
+  Attributes RetAttrs = Attrs.getRetAttributes();
+  if (RetAttrs != Attribute::None)
+    Out <<  Attribute::getAsString(Attrs.getRetAttributes()) << ' ';
+  TypePrinter.print(F->getReturnType(), Out);
+  Out << ' ';
+  WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent());
+  Out << '(';
+  Machine.incorporateFunction(F);
+
+  // Loop over the arguments, printing them...
+
+  unsigned Idx = 1;
+  if (!F->isDeclaration()) {
+    // If this isn't a declaration, print the argument names as well.
+    for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+         I != E; ++I) {
+      // Insert commas as we go... the first arg doesn't get a comma
+      if (I != F->arg_begin()) Out << ", ";
+      printArgument(I, Attrs.getParamAttributes(Idx));
+      Idx++;
+    }
+  } else {
+    // Otherwise, print the types from the function type.
+    for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
+      // Insert commas as we go... the first arg doesn't get a comma
+      if (i) Out << ", ";
+
+      // Output type...
+      TypePrinter.print(FT->getParamType(i), Out);
+
+      Attributes ArgAttrs = Attrs.getParamAttributes(i+1);
+      if (ArgAttrs != Attribute::None)
+        Out << ' ' << Attribute::getAsString(ArgAttrs);
+    }
+  }
+
+  // Finish printing arguments...
+  if (FT->isVarArg()) {
+    if (FT->getNumParams()) Out << ", ";
+    Out << "...";  // Output varargs portion of signature!
+  }
+  Out << ')';
+  if (F->hasUnnamedAddr())
+    Out << " unnamed_addr";
+  Attributes FnAttrs = Attrs.getFnAttributes();
+  if (FnAttrs != Attribute::None)
+    Out << ' ' << Attribute::getAsString(Attrs.getFnAttributes());
+  if (F->hasSection()) {
+    Out << " section \"";
+    PrintEscapedString(F->getSection(), Out);
+    Out << '"';
+  }
+  if (F->getAlignment())
+    Out << " align " << F->getAlignment();
+  if (F->hasGC())
+    Out << " gc \"" << F->getGC() << '"';
+  if (F->isDeclaration()) {
+    Out << '\n';
+  } else {
+    Out << " {";
+    // Output all of the function's basic blocks.
+    for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I)
+      printBasicBlock(I);
+
+    Out << "}\n";
+  }
+
+  Machine.purgeFunction();
+}
+
+/// printArgument - This member is called for every argument that is passed into
+/// the function.  Simply print it out
+///
+void AssemblyWriter::printArgument(const Argument *Arg,
+                                   Attributes Attrs) {
+  // Output type...
+  TypePrinter.print(Arg->getType(), Out);
+
+  // Output parameter attributes list
+  if (Attrs != Attribute::None)
+    Out << ' ' << Attribute::getAsString(Attrs);
+
+  // Output name, if available...
+  if (Arg->hasName()) {
+    Out << ' ';
+    PrintLLVMName(Out, Arg);
+  }
+}
+
+/// printBasicBlock - This member is called for each basic block in a method.
+///
+void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
+  if (BB->hasName()) {              // Print out the label if it exists...
+    Out << "\n";
+    PrintLLVMName(Out, BB->getName(), LabelPrefix);
+    Out << ':';
+  } else if (!BB->use_empty()) {      // Don't print block # of no uses...
+    Out << "\n; <label>:";
+    int Slot = Machine.getLocalSlot(BB);
+    if (Slot != -1)
+      Out << Slot;
+    else
+      Out << "<badref>";
+  }
+
+  if (BB->getParent() == 0) {
+    Out.PadToColumn(50);
+    Out << "; Error: Block without parent!";
+  } else if (BB != &BB->getParent()->getEntryBlock()) {  // Not the entry block?
+    // Output predecessors for the block.
+    Out.PadToColumn(50);
+    Out << ";";
+    const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
+
+    if (PI == PE) {
+      Out << " No predecessors!";
+    } else {
+      Out << " preds = ";
+      writeOperand(*PI, false);
+      for (++PI; PI != PE; ++PI) {
+        Out << ", ";
+        writeOperand(*PI, false);
+      }
+    }
+  }
+
+  Out << "\n";
+
+  if (AnnotationWriter) AnnotationWriter->emitBasicBlockStartAnnot(BB, Out);
+
+  // Output all of the instructions in the basic block...
+  for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    printInstruction(*I);
+    Out << '\n';
+  }
+
+  if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out);
+}
+
+/// printInfoComment - Print a little comment after the instruction indicating
+/// which slot it occupies.
+///
+void AssemblyWriter::printInfoComment(const Value &V) {
+  if (AnnotationWriter) {
+    AnnotationWriter->printInfoComment(V, Out);
+    return;
+  }
+}
+
+// This member is called for each Instruction in a function..
+void AssemblyWriter::printInstruction(const Instruction &I) {
+  if (AnnotationWriter) AnnotationWriter->emitInstructionAnnot(&I, Out);
+
+  // Print out indentation for an instruction.
+  Out << "  ";
+
+  // Print out name if it exists...
+  if (I.hasName()) {
+    PrintLLVMName(Out, &I);
+    Out << " = ";
+  } else if (!I.getType()->isVoidTy()) {
+    // Print out the def slot taken.
+    int SlotNum = Machine.getLocalSlot(&I);
+    if (SlotNum == -1)
+      Out << "<badref> = ";
+    else
+      Out << '%' << SlotNum << " = ";
+  }
+
+  // If this is a volatile load or store, print out the volatile marker.
+  if ((isa<LoadInst>(I)  && cast<LoadInst>(I).isVolatile()) ||
+      (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile())) {
+      Out << "volatile ";
+  } else if (isa<CallInst>(I) && cast<CallInst>(I).isTailCall()) {
+    // If this is a call, check if it's a tail call.
+    Out << "tail ";
+  }
+
+  // Print out the opcode...
+  Out << I.getOpcodeName();
+
+  // Print out optimization information.
+  WriteOptimizationInfo(Out, &I);
+
+  // Print out the compare instruction predicates
+  if (const CmpInst *CI = dyn_cast<CmpInst>(&I))
+    Out << ' ' << getPredicateText(CI->getPredicate());
+
+  // Print out the type of the operands...
+  const Value *Operand = I.getNumOperands() ? I.getOperand(0) : 0;
+
+  // Special case conditional branches to swizzle the condition out to the front
+  if (isa<BranchInst>(I) && cast<BranchInst>(I).isConditional()) {
+    BranchInst &BI(cast<BranchInst>(I));
+    Out << ' ';
+    writeOperand(BI.getCondition(), true);
+    Out << ", ";
+    writeOperand(BI.getSuccessor(0), true);
+    Out << ", ";
+    writeOperand(BI.getSuccessor(1), true);
+
+  } else if (isa<SwitchInst>(I)) {
+    // Special case switch instruction to get formatting nice and correct.
+    Out << ' ';
+    writeOperand(Operand        , true);
+    Out << ", ";
+    writeOperand(I.getOperand(1), true);
+    Out << " [";
+
+    for (unsigned op = 2, Eop = I.getNumOperands(); op < Eop; op += 2) {
+      Out << "\n    ";
+      writeOperand(I.getOperand(op  ), true);
+      Out << ", ";
+      writeOperand(I.getOperand(op+1), true);
+    }
+    Out << "\n  ]";
+  } else if (isa<IndirectBrInst>(I)) {
+    // Special case indirectbr instruction to get formatting nice and correct.
+    Out << ' ';
+    writeOperand(Operand, true);
+    Out << ", [";
+    
+    for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
+      if (i != 1)
+        Out << ", ";
+      writeOperand(I.getOperand(i), true);
+    }
+    Out << ']';
+  } else if (isa<PHINode>(I)) {
+    Out << ' ';
+    TypePrinter.print(I.getType(), Out);
+    Out << ' ';
+
+    for (unsigned op = 0, Eop = I.getNumOperands(); op < Eop; op += 2) {
+      if (op) Out << ", ";
+      Out << "[ ";
+      writeOperand(I.getOperand(op  ), false); Out << ", ";
+      writeOperand(I.getOperand(op+1), false); Out << " ]";
+    }
+  } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&I)) {
+    Out << ' ';
+    writeOperand(I.getOperand(0), true);
+    for (const unsigned *i = EVI->idx_begin(), *e = EVI->idx_end(); i != e; ++i)
+      Out << ", " << *i;
+  } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&I)) {
+    Out << ' ';
+    writeOperand(I.getOperand(0), true); Out << ", ";
+    writeOperand(I.getOperand(1), true);
+    for (const unsigned *i = IVI->idx_begin(), *e = IVI->idx_end(); i != e; ++i)
+      Out << ", " << *i;
+  } else if (isa<ReturnInst>(I) && !Operand) {
+    Out << " void";
+  } else if (const CallInst *CI = dyn_cast<CallInst>(&I)) {
+    // Print the calling convention being used.
+    switch (CI->getCallingConv()) {
+    case CallingConv::C: break;   // default
+    case CallingConv::Fast:  Out << " fastcc"; break;
+    case CallingConv::Cold:  Out << " coldcc"; break;
+    case CallingConv::X86_StdCall:  Out << " x86_stdcallcc"; break;
+    case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+    case CallingConv::X86_ThisCall: Out << " x86_thiscallcc"; break;
+    case CallingConv::ARM_APCS:     Out << " arm_apcscc "; break;
+    case CallingConv::ARM_AAPCS:    Out << " arm_aapcscc "; break;
+    case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
+    case CallingConv::MSP430_INTR:  Out << " msp430_intrcc "; break;
+    case CallingConv::PTX_Kernel:   Out << " ptx_kernel"; break;
+    case CallingConv::PTX_Device:   Out << " ptx_device"; break;
+    default: Out << " cc" << CI->getCallingConv(); break;
+    }
+
+    Operand = CI->getCalledValue();
+    const PointerType    *PTy = cast<PointerType>(Operand->getType());
+    const FunctionType   *FTy = cast<FunctionType>(PTy->getElementType());
+    const Type         *RetTy = FTy->getReturnType();
+    const AttrListPtr &PAL = CI->getAttributes();
+
+    if (PAL.getRetAttributes() != Attribute::None)
+      Out << ' ' << Attribute::getAsString(PAL.getRetAttributes());
+
+    // If possible, print out the short form of the call instruction.  We can
+    // only do this if the first argument is a pointer to a nonvararg function,
+    // and if the return type is not a pointer to a function.
+    //
+    Out << ' ';
+    if (!FTy->isVarArg() &&
+        (!RetTy->isPointerTy() ||
+         !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
+      TypePrinter.print(RetTy, Out);
+      Out << ' ';
+      writeOperand(Operand, false);
+    } else {
+      writeOperand(Operand, true);
+    }
+    Out << '(';
+    for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) {
+      if (op > 0)
+        Out << ", ";
+      writeParamOperand(CI->getArgOperand(op), PAL.getParamAttributes(op + 1));
+    }
+    Out << ')';
+    if (PAL.getFnAttributes() != Attribute::None)
+      Out << ' ' << Attribute::getAsString(PAL.getFnAttributes());
+  } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
+    Operand = II->getCalledValue();
+    const PointerType    *PTy = cast<PointerType>(Operand->getType());
+    const FunctionType   *FTy = cast<FunctionType>(PTy->getElementType());
+    const Type         *RetTy = FTy->getReturnType();
+    const AttrListPtr &PAL = II->getAttributes();
+
+    // Print the calling convention being used.
+    switch (II->getCallingConv()) {
+    case CallingConv::C: break;   // default
+    case CallingConv::Fast:  Out << " fastcc"; break;
+    case CallingConv::Cold:  Out << " coldcc"; break;
+    case CallingConv::X86_StdCall:  Out << " x86_stdcallcc"; break;
+    case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+    case CallingConv::X86_ThisCall: Out << " x86_thiscallcc"; break;
+    case CallingConv::ARM_APCS:     Out << " arm_apcscc "; break;
+    case CallingConv::ARM_AAPCS:    Out << " arm_aapcscc "; break;
+    case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
+    case CallingConv::MSP430_INTR:  Out << " msp430_intrcc "; break;
+    case CallingConv::PTX_Kernel:   Out << " ptx_kernel"; break;
+    case CallingConv::PTX_Device:   Out << " ptx_device"; break;
+    default: Out << " cc" << II->getCallingConv(); break;
+    }
+
+    if (PAL.getRetAttributes() != Attribute::None)
+      Out << ' ' << Attribute::getAsString(PAL.getRetAttributes());
+
+    // If possible, print out the short form of the invoke instruction. We can
+    // only do this if the first argument is a pointer to a nonvararg function,
+    // and if the return type is not a pointer to a function.
+    //
+    Out << ' ';
+    if (!FTy->isVarArg() &&
+        (!RetTy->isPointerTy() ||
+         !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
+      TypePrinter.print(RetTy, Out);
+      Out << ' ';
+      writeOperand(Operand, false);
+    } else {
+      writeOperand(Operand, true);
+    }
+    Out << '(';
+    for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) {
+      if (op)
+        Out << ", ";
+      writeParamOperand(II->getArgOperand(op), PAL.getParamAttributes(op + 1));
+    }
+
+    Out << ')';
+    if (PAL.getFnAttributes() != Attribute::None)
+      Out << ' ' << Attribute::getAsString(PAL.getFnAttributes());
+
+    Out << "\n          to ";
+    writeOperand(II->getNormalDest(), true);
+    Out << " unwind ";
+    writeOperand(II->getUnwindDest(), true);
+
+  } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+    Out << ' ';
+    TypePrinter.print(AI->getType()->getElementType(), Out);
+    if (!AI->getArraySize() || AI->isArrayAllocation()) {
+      Out << ", ";
+      writeOperand(AI->getArraySize(), true);
+    }
+    if (AI->getAlignment()) {
+      Out << ", align " << AI->getAlignment();
+    }
+  } else if (isa<CastInst>(I)) {
+    if (Operand) {
+      Out << ' ';
+      writeOperand(Operand, true);   // Work with broken code
+    }
+    Out << " to ";
+    TypePrinter.print(I.getType(), Out);
+  } else if (isa<VAArgInst>(I)) {
+    if (Operand) {
+      Out << ' ';
+      writeOperand(Operand, true);   // Work with broken code
+    }
+    Out << ", ";
+    TypePrinter.print(I.getType(), Out);
+  } else if (Operand) {   // Print the normal way.
+
+    // PrintAllTypes - Instructions who have operands of all the same type
+    // omit the type from all but the first operand.  If the instruction has
+    // different type operands (for example br), then they are all printed.
+    bool PrintAllTypes = false;
+    const Type *TheType = Operand->getType();
+
+    // Select, Store and ShuffleVector always print all types.
+    if (isa<SelectInst>(I) || isa<StoreInst>(I) || isa<ShuffleVectorInst>(I)
+        || isa<ReturnInst>(I)) {
+      PrintAllTypes = true;
+    } else {
+      for (unsigned i = 1, E = I.getNumOperands(); i != E; ++i) {
+        Operand = I.getOperand(i);
+        // note that Operand shouldn't be null, but the test helps make dump()
+        // more tolerant of malformed IR
+        if (Operand && Operand->getType() != TheType) {
+          PrintAllTypes = true;    // We have differing types!  Print them all!
+          break;
+        }
+      }
+    }
+
+    if (!PrintAllTypes) {
+      Out << ' ';
+      TypePrinter.print(TheType, Out);
+    }
+
+    Out << ' ';
+    for (unsigned i = 0, E = I.getNumOperands(); i != E; ++i) {
+      if (i) Out << ", ";
+      writeOperand(I.getOperand(i), PrintAllTypes);
+    }
+  }
+
+  // Print post operand alignment for load/store.
+  if (isa<LoadInst>(I) && cast<LoadInst>(I).getAlignment()) {
+    Out << ", align " << cast<LoadInst>(I).getAlignment();
+  } else if (isa<StoreInst>(I) && cast<StoreInst>(I).getAlignment()) {
+    Out << ", align " << cast<StoreInst>(I).getAlignment();
+  }
+
+  // Print Metadata info.
+  SmallVector<std::pair<unsigned, MDNode*>, 4> InstMD;
+  I.getAllMetadata(InstMD);
+  if (!InstMD.empty()) {
+    SmallVector<StringRef, 8> MDNames;
+    I.getType()->getContext().getMDKindNames(MDNames);
+    for (unsigned i = 0, e = InstMD.size(); i != e; ++i) {
+      unsigned Kind = InstMD[i].first;
+       if (Kind < MDNames.size()) {
+         Out << ", !" << MDNames[Kind];
+      } else {
+        Out << ", !<unknown kind #" << Kind << ">";
+      }
+      Out << ' ';
+      WriteAsOperandInternal(Out, InstMD[i].second, &TypePrinter, &Machine,
+                             TheModule);
+    }
+  }
+  printInfoComment(I);
+}
+
+static void WriteMDNodeComment(const MDNode *Node,
+                               formatted_raw_ostream &Out) {
+  if (Node->getNumOperands() < 1)
+    return;
+  ConstantInt *CI = dyn_cast_or_null<ConstantInt>(Node->getOperand(0));
+  if (!CI) return;
+  APInt Val = CI->getValue();
+  APInt Tag = Val & ~APInt(Val.getBitWidth(), LLVMDebugVersionMask);
+  if (Val.ult(LLVMDebugVersion))
+    return;
+  
+  Out.PadToColumn(50);
+  if (Tag == dwarf::DW_TAG_user_base)
+    Out << "; [ DW_TAG_user_base ]";
+  else if (Tag.isIntN(32)) {
+    if (const char *TagName = dwarf::TagString(Tag.getZExtValue()))
+      Out << "; [ " << TagName << " ]";
+  }
+}
+
+void AssemblyWriter::writeAllMDNodes() {
+  SmallVector<const MDNode *, 16> Nodes;
+  Nodes.resize(Machine.mdn_size());
+  for (SlotTracker::mdn_iterator I = Machine.mdn_begin(), E = Machine.mdn_end();
+       I != E; ++I)
+    Nodes[I->second] = cast<MDNode>(I->first);
+  
+  for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
+    Out << '!' << i << " = metadata ";
+    printMDNodeBody(Nodes[i]);
+  }
+}
+
+void AssemblyWriter::printMDNodeBody(const MDNode *Node) {
+  WriteMDNodeBodyInternal(Out, Node, &TypePrinter, &Machine, TheModule);
+  WriteMDNodeComment(Node, Out);
+  Out << "\n";
+}
+
+//===----------------------------------------------------------------------===//
+//                       External Interface declarations
+//===----------------------------------------------------------------------===//
+
+void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+  SlotTracker SlotTable(this);
+  formatted_raw_ostream OS(ROS);
+  AssemblyWriter W(OS, SlotTable, this, AAW);
+  W.printModule(this);
+}
+
+void NamedMDNode::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+  SlotTracker SlotTable(getParent());
+  formatted_raw_ostream OS(ROS);
+  AssemblyWriter W(OS, SlotTable, getParent(), AAW);
+  W.printNamedMDNode(this);
+}
+
+void Type::print(raw_ostream &OS) const {
+  if (this == 0) {
+    OS << "<null Type>";
+    return;
+  }
+  TypePrinting().print(this, OS);
+}
+
+void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
+  if (this == 0) {
+    ROS << "printing a <null> value\n";
+    return;
+  }
+  formatted_raw_ostream OS(ROS);
+  if (const Instruction *I = dyn_cast<Instruction>(this)) {
+    const Function *F = I->getParent() ? I->getParent()->getParent() : 0;
+    SlotTracker SlotTable(F);
+    AssemblyWriter W(OS, SlotTable, getModuleFromVal(I), AAW);
+    W.printInstruction(*I);
+  } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(this)) {
+    SlotTracker SlotTable(BB->getParent());
+    AssemblyWriter W(OS, SlotTable, getModuleFromVal(BB), AAW);
+    W.printBasicBlock(BB);
+  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
+    SlotTracker SlotTable(GV->getParent());
+    AssemblyWriter W(OS, SlotTable, GV->getParent(), AAW);
+    if (const GlobalVariable *V = dyn_cast<GlobalVariable>(GV))
+      W.printGlobal(V);
+    else if (const Function *F = dyn_cast<Function>(GV))
+      W.printFunction(F);
+    else
+      W.printAlias(cast<GlobalAlias>(GV));
+  } else if (const MDNode *N = dyn_cast<MDNode>(this)) {
+    const Function *F = N->getFunction();
+    SlotTracker SlotTable(F);
+    AssemblyWriter W(OS, SlotTable, F ? F->getParent() : 0, AAW);
+    W.printMDNodeBody(N);
+  } else if (const Constant *C = dyn_cast<Constant>(this)) {
+    TypePrinting TypePrinter;
+    TypePrinter.print(C->getType(), OS);
+    OS << ' ';
+    WriteConstantInternal(OS, C, TypePrinter, 0, 0);
+  } else if (isa<InlineAsm>(this) || isa<MDString>(this) ||
+             isa<Argument>(this)) {
+    WriteAsOperand(OS, this, true, 0);
+  } else {
+    // Otherwise we don't know what it is. Call the virtual function to
+    // allow a subclass to print itself.
+    printCustom(OS);
+  }
+}
+
+// Value::printCustom - subclasses should override this to implement printing.
+void Value::printCustom(raw_ostream &OS) const {
+  llvm_unreachable("Unknown value to print out!");
+}
+
+// Value::dump - allow easy printing of Values from the debugger.
+void Value::dump() const { print(dbgs()); dbgs() << '\n'; }
+
+// Type::dump - allow easy printing of Types from the debugger.
+// This one uses type names from the given context module
+void Type::dump(const Module *Context) const {
+  WriteTypeSymbolic(dbgs(), this, Context);
+  dbgs() << '\n';
+}
+
+// Type::dump - allow easy printing of Types from the debugger.
+void Type::dump() const { dump(0); }
+
+// Module::dump() - Allow printing of Modules from the debugger.
+void Module::dump() const { print(dbgs(), 0); }
diff --git a/final/lib/VMCore/Attributes.cpp b/final/lib/VMCore/Attributes.cpp
new file mode 100644
index 00000000000..92152a3b90a
--- /dev/null
+++ b/final/lib/VMCore/Attributes.cpp
@@ -0,0 +1,349 @@
+//===-- Attributes.cpp - Implement AttributesList -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AttributesList class and Attribute utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Attributes.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/Support/Atomic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Attribute Function Definitions
+//===----------------------------------------------------------------------===//
+
+std::string Attribute::getAsString(Attributes Attrs) {
+  std::string Result;
+  if (Attrs & Attribute::ZExt)
+    Result += "zeroext ";
+  if (Attrs & Attribute::SExt)
+    Result += "signext ";
+  if (Attrs & Attribute::NoReturn)
+    Result += "noreturn ";
+  if (Attrs & Attribute::NoUnwind)
+    Result += "nounwind ";
+  if (Attrs & Attribute::InReg)
+    Result += "inreg ";
+  if (Attrs & Attribute::NoAlias)
+    Result += "noalias ";
+  if (Attrs & Attribute::NoCapture)
+    Result += "nocapture ";
+  if (Attrs & Attribute::StructRet)
+    Result += "sret ";
+  if (Attrs & Attribute::ByVal)
+    Result += "byval ";
+  if (Attrs & Attribute::Nest)
+    Result += "nest ";
+  if (Attrs & Attribute::ReadNone)
+    Result += "readnone ";
+  if (Attrs & Attribute::ReadOnly)
+    Result += "readonly ";
+  if (Attrs & Attribute::OptimizeForSize)
+    Result += "optsize ";
+  if (Attrs & Attribute::NoInline)
+    Result += "noinline ";
+  if (Attrs & Attribute::InlineHint)
+    Result += "inlinehint ";
+  if (Attrs & Attribute::AlwaysInline)
+    Result += "alwaysinline ";
+  if (Attrs & Attribute::StackProtect)
+    Result += "ssp ";
+  if (Attrs & Attribute::StackProtectReq)
+    Result += "sspreq ";
+  if (Attrs & Attribute::NoRedZone)
+    Result += "noredzone ";
+  if (Attrs & Attribute::NoImplicitFloat)
+    Result += "noimplicitfloat ";
+  if (Attrs & Attribute::Naked)
+    Result += "naked ";
+  if (Attrs & Attribute::Hotpatch)
+    Result += "hotpatch ";
+  if (Attrs & Attribute::StackAlignment) {
+    Result += "alignstack(";
+    Result += utostr(Attribute::getStackAlignmentFromAttrs(Attrs));
+    Result += ") ";
+  }
+  if (Attrs & Attribute::Alignment) {
+    Result += "align ";
+    Result += utostr(Attribute::getAlignmentFromAttrs(Attrs));
+    Result += " ";
+  }
+  // Trim the trailing space.
+  assert(!Result.empty() && "Unknown attribute!");
+  Result.erase(Result.end()-1);
+  return Result;
+}
+
+Attributes Attribute::typeIncompatible(const Type *Ty) {
+  Attributes Incompatible = None;
+  
+  if (!Ty->isIntegerTy())
+    // Attributes that only apply to integers.
+    Incompatible |= SExt | ZExt;
+  
+  if (!Ty->isPointerTy())
+    // Attributes that only apply to pointers.
+    Incompatible |= ByVal | Nest | NoAlias | StructRet | NoCapture;
+  
+  return Incompatible;
+}
+
+//===----------------------------------------------------------------------===//
+// AttributeListImpl Definition
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+  class AttributeListImpl;
+}
+
+static ManagedStatic<FoldingSet<AttributeListImpl> > AttributesLists;
+
+namespace llvm {
+static ManagedStatic<sys::SmartMutex<true> > ALMutex;
+
+class AttributeListImpl : public FoldingSetNode {
+  sys::cas_flag RefCount;
+  
+  // AttributesList is uniqued, these should not be publicly available.
+  void operator=(const AttributeListImpl &); // Do not implement
+  AttributeListImpl(const AttributeListImpl &); // Do not implement
+  ~AttributeListImpl();                        // Private implementation
+public:
+  SmallVector<AttributeWithIndex, 4> Attrs;
+  
+  AttributeListImpl(const AttributeWithIndex *Attr, unsigned NumAttrs)
+    : Attrs(Attr, Attr+NumAttrs) {
+    RefCount = 0;
+  }
+  
+  void AddRef() {
+    sys::SmartScopedLock<true> Lock(*ALMutex);
+    ++RefCount;
+  }
+  void DropRef() {
+    sys::SmartScopedLock<true> Lock(*ALMutex);
+    if (!AttributesLists.isConstructed())
+      return;
+    sys::cas_flag new_val = --RefCount;
+    if (new_val == 0)
+      delete this;
+  }
+  
+  void Profile(FoldingSetNodeID &ID) const {
+    Profile(ID, Attrs.data(), Attrs.size());
+  }
+  static void Profile(FoldingSetNodeID &ID, const AttributeWithIndex *Attr,
+                      unsigned NumAttrs) {
+    for (unsigned i = 0; i != NumAttrs; ++i)
+      ID.AddInteger(uint64_t(Attr[i].Attrs) << 32 | unsigned(Attr[i].Index));
+  }
+};
+}
+
+AttributeListImpl::~AttributeListImpl() {
+  // NOTE: Lock must be acquired by caller.
+  AttributesLists->RemoveNode(this);
+}
+
+
+AttrListPtr AttrListPtr::get(const AttributeWithIndex *Attrs, unsigned NumAttrs) {
+  // If there are no attributes then return a null AttributesList pointer.
+  if (NumAttrs == 0)
+    return AttrListPtr();
+  
+#ifndef NDEBUG
+  for (unsigned i = 0; i != NumAttrs; ++i) {
+    assert(Attrs[i].Attrs != Attribute::None && 
+           "Pointless attribute!");
+    assert((!i || Attrs[i-1].Index < Attrs[i].Index) &&
+           "Misordered AttributesList!");
+  }
+#endif
+  
+  // Otherwise, build a key to look up the existing attributes.
+  FoldingSetNodeID ID;
+  AttributeListImpl::Profile(ID, Attrs, NumAttrs);
+  void *InsertPos;
+  
+  sys::SmartScopedLock<true> Lock(*ALMutex);
+  
+  AttributeListImpl *PAL =
+    AttributesLists->FindNodeOrInsertPos(ID, InsertPos);
+  
+  // If we didn't find any existing attributes of the same shape then
+  // create a new one and insert it.
+  if (!PAL) {
+    PAL = new AttributeListImpl(Attrs, NumAttrs);
+    AttributesLists->InsertNode(PAL, InsertPos);
+  }
+  
+  // Return the AttributesList that we found or created.
+  return AttrListPtr(PAL);
+}
+
+
+//===----------------------------------------------------------------------===//
+// AttrListPtr Method Implementations
+//===----------------------------------------------------------------------===//
+
+AttrListPtr::AttrListPtr(AttributeListImpl *LI) : AttrList(LI) {
+  if (LI) LI->AddRef();
+}
+
+AttrListPtr::AttrListPtr(const AttrListPtr &P) : AttrList(P.AttrList) {
+  if (AttrList) AttrList->AddRef();  
+}
+
+const AttrListPtr &AttrListPtr::operator=(const AttrListPtr &RHS) {
+  sys::SmartScopedLock<true> Lock(*ALMutex);
+  if (AttrList == RHS.AttrList) return *this;
+  if (AttrList) AttrList->DropRef();
+  AttrList = RHS.AttrList;
+  if (AttrList) AttrList->AddRef();
+  return *this;
+}
+
+AttrListPtr::~AttrListPtr() {
+  if (AttrList) AttrList->DropRef();
+}
+
+/// getNumSlots - Return the number of slots used in this attribute list. 
+/// This is the number of arguments that have an attribute set on them
+/// (including the function itself).
+unsigned AttrListPtr::getNumSlots() const {
+  return AttrList ? AttrList->Attrs.size() : 0;
+}
+
+/// getSlot - Return the AttributeWithIndex at the specified slot.  This
+/// holds a number plus a set of attributes.
+const AttributeWithIndex &AttrListPtr::getSlot(unsigned Slot) const {
+  assert(AttrList && Slot < AttrList->Attrs.size() && "Slot # out of range!");
+  return AttrList->Attrs[Slot];
+}
+
+
+/// getAttributes - The attributes for the specified index are
+/// returned.  Attributes for the result are denoted with Idx = 0.
+/// Function notes are denoted with idx = ~0.
+Attributes AttrListPtr::getAttributes(unsigned Idx) const {
+  if (AttrList == 0) return Attribute::None;
+  
+  const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs;
+  for (unsigned i = 0, e = Attrs.size(); i != e && Attrs[i].Index <= Idx; ++i)
+    if (Attrs[i].Index == Idx)
+      return Attrs[i].Attrs;
+  return Attribute::None;
+}
+
+/// hasAttrSomewhere - Return true if the specified attribute is set for at
+/// least one parameter or for the return value.
+bool AttrListPtr::hasAttrSomewhere(Attributes Attr) const {
+  if (AttrList == 0) return false;
+  
+  const SmallVector<AttributeWithIndex, 4> &Attrs = AttrList->Attrs;
+  for (unsigned i = 0, e = Attrs.size(); i != e; ++i)
+    if (Attrs[i].Attrs & Attr)
+      return true;
+  return false;
+}
+
+
+AttrListPtr AttrListPtr::addAttr(unsigned Idx, Attributes Attrs) const {
+  Attributes OldAttrs = getAttributes(Idx);
+#ifndef NDEBUG
+  // FIXME it is not obvious how this should work for alignment.
+  // For now, say we can't change a known alignment.
+  Attributes OldAlign = OldAttrs & Attribute::Alignment;
+  Attributes NewAlign = Attrs & Attribute::Alignment;
+  assert((!OldAlign || !NewAlign || OldAlign == NewAlign) &&
+         "Attempt to change alignment!");
+#endif
+  
+  Attributes NewAttrs = OldAttrs | Attrs;
+  if (NewAttrs == OldAttrs)
+    return *this;
+  
+  SmallVector<AttributeWithIndex, 8> NewAttrList;
+  if (AttrList == 0)
+    NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
+  else {
+    const SmallVector<AttributeWithIndex, 4> &OldAttrList = AttrList->Attrs;
+    unsigned i = 0, e = OldAttrList.size();
+    // Copy attributes for arguments before this one.
+    for (; i != e && OldAttrList[i].Index < Idx; ++i)
+      NewAttrList.push_back(OldAttrList[i]);
+
+    // If there are attributes already at this index, merge them in.
+    if (i != e && OldAttrList[i].Index == Idx) {
+      Attrs |= OldAttrList[i].Attrs;
+      ++i;
+    }
+    
+    NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
+    
+    // Copy attributes for arguments after this one.
+    NewAttrList.insert(NewAttrList.end(), 
+                       OldAttrList.begin()+i, OldAttrList.end());
+  }
+  
+  return get(NewAttrList.data(), NewAttrList.size());
+}
+
+AttrListPtr AttrListPtr::removeAttr(unsigned Idx, Attributes Attrs) const {
+#ifndef NDEBUG
+  // FIXME it is not obvious how this should work for alignment.
+  // For now, say we can't pass in alignment, which no current use does.
+  assert(!(Attrs & Attribute::Alignment) && "Attempt to exclude alignment!");
+#endif
+  if (AttrList == 0) return AttrListPtr();
+  
+  Attributes OldAttrs = getAttributes(Idx);
+  Attributes NewAttrs = OldAttrs & ~Attrs;
+  if (NewAttrs == OldAttrs)
+    return *this;
+
+  SmallVector<AttributeWithIndex, 8> NewAttrList;
+  const SmallVector<AttributeWithIndex, 4> &OldAttrList = AttrList->Attrs;
+  unsigned i = 0, e = OldAttrList.size();
+  
+  // Copy attributes for arguments before this one.
+  for (; i != e && OldAttrList[i].Index < Idx; ++i)
+    NewAttrList.push_back(OldAttrList[i]);
+  
+  // If there are attributes already at this index, merge them in.
+  assert(OldAttrList[i].Index == Idx && "Attribute isn't set?");
+  Attrs = OldAttrList[i].Attrs & ~Attrs;
+  ++i;
+  if (Attrs)  // If any attributes left for this parameter, add them.
+    NewAttrList.push_back(AttributeWithIndex::get(Idx, Attrs));
+  
+  // Copy attributes for arguments after this one.
+  NewAttrList.insert(NewAttrList.end(), 
+                     OldAttrList.begin()+i, OldAttrList.end());
+  
+  return get(NewAttrList.data(), NewAttrList.size());
+}
+
+void AttrListPtr::dump() const {
+  dbgs() << "PAL[ ";
+  for (unsigned i = 0; i < getNumSlots(); ++i) {
+    const AttributeWithIndex &PAWI = getSlot(i);
+    dbgs() << "{" << PAWI.Index << "," << PAWI.Attrs << "} ";
+  }
+  
+  dbgs() << "]\n";
+}
diff --git a/final/lib/VMCore/AutoUpgrade.cpp b/final/lib/VMCore/AutoUpgrade.cpp
new file mode 100644
index 00000000000..b3235403564
--- /dev/null
+++ b/final/lib/VMCore/AutoUpgrade.cpp
@@ -0,0 +1,1406 @@
+//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the auto-upgrade helper functions 
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/AutoUpgrade.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRBuilder.h"
+#include <cstring>
+using namespace llvm;
+
+
+static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
+  assert(F && "Illegal to upgrade a non-existent Function.");
+
+  // Get the Function's name.
+  const std::string& Name = F->getName();
+
+  // Convenience
+  const FunctionType *FTy = F->getFunctionType();
+
+  // Quickly eliminate it, if it's not a candidate.
+  if (Name.length() <= 8 || Name[0] != 'l' || Name[1] != 'l' || 
+      Name[2] != 'v' || Name[3] != 'm' || Name[4] != '.')
+    return false;
+
+  Module *M = F->getParent();
+  switch (Name[5]) {
+  default: break;
+  case 'a':
+    // This upgrades the llvm.atomic.lcs, llvm.atomic.las, llvm.atomic.lss,
+    // and atomics with default address spaces to their new names to their new
+    // function name (e.g. llvm.atomic.add.i32 => llvm.atomic.add.i32.p0i32)
+    if (Name.compare(5,7,"atomic.",7) == 0) {
+      if (Name.compare(12,3,"lcs",3) == 0) {
+        std::string::size_type delim = Name.find('.',12);
+        F->setName("llvm.atomic.cmp.swap" + Name.substr(delim) +
+                   ".p0" + Name.substr(delim+1));
+        NewFn = F;
+        return true;
+      }
+      else if (Name.compare(12,3,"las",3) == 0) {
+        std::string::size_type delim = Name.find('.',12);
+        F->setName("llvm.atomic.load.add"+Name.substr(delim)
+                   + ".p0" + Name.substr(delim+1));
+        NewFn = F;
+        return true;
+      }
+      else if (Name.compare(12,3,"lss",3) == 0) {
+        std::string::size_type delim = Name.find('.',12);
+        F->setName("llvm.atomic.load.sub"+Name.substr(delim)
+                   + ".p0" + Name.substr(delim+1));
+        NewFn = F;
+        return true;
+      }
+      else if (Name.rfind(".p") == std::string::npos) {
+        // We don't have an address space qualifier so this has be upgraded
+        // to the new name.  Copy the type name at the end of the intrinsic
+        // and add to it
+        std::string::size_type delim = Name.find_last_of('.');
+        assert(delim != std::string::npos && "can not find type");
+        F->setName(Name + ".p0" + Name.substr(delim+1));
+        NewFn = F;
+        return true;
+      }
+    } else if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
+      if (((Name.compare(14, 5, "vmovl", 5) == 0 ||
+            Name.compare(14, 5, "vaddl", 5) == 0 ||
+            Name.compare(14, 5, "vsubl", 5) == 0 ||
+            Name.compare(14, 5, "vaddw", 5) == 0 ||
+            Name.compare(14, 5, "vsubw", 5) == 0 ||
+            Name.compare(14, 5, "vmull", 5) == 0 ||
+            Name.compare(14, 5, "vmlal", 5) == 0 ||
+            Name.compare(14, 5, "vmlsl", 5) == 0 ||
+            Name.compare(14, 5, "vabdl", 5) == 0 ||
+            Name.compare(14, 5, "vabal", 5) == 0) &&
+           (Name.compare(19, 2, "s.", 2) == 0 ||
+            Name.compare(19, 2, "u.", 2) == 0)) ||
+
+          (Name.compare(14, 4, "vaba", 4) == 0 &&
+           (Name.compare(18, 2, "s.", 2) == 0 ||
+            Name.compare(18, 2, "u.", 2) == 0)) ||
+
+          (Name.compare(14, 6, "vmovn.", 6) == 0)) {
+
+        // Calls to these are transformed into IR without intrinsics.
+        NewFn = 0;
+        return true;
+      }
+      // Old versions of NEON ld/st intrinsics are missing alignment arguments.
+      bool isVLd = (Name.compare(14, 3, "vld", 3) == 0);
+      bool isVSt = (Name.compare(14, 3, "vst", 3) == 0);
+      if (isVLd || isVSt) {
+        unsigned NumVecs = Name.at(17) - '0';
+        if (NumVecs == 0 || NumVecs > 4)
+          return false;
+        bool isLaneOp = (Name.compare(18, 5, "lane.", 5) == 0);
+        if (!isLaneOp && Name.at(18) != '.')
+          return false;
+        unsigned ExpectedArgs = 2; // for the address and alignment
+        if (isVSt || isLaneOp)
+          ExpectedArgs += NumVecs;
+        if (isLaneOp)
+          ExpectedArgs += 1; // for the lane number
+        unsigned NumP = FTy->getNumParams();
+        if (NumP != ExpectedArgs - 1)
+          return false;
+
+        // Change the name of the old (bad) intrinsic, because 
+        // its type is incorrect, but we cannot overload that name.
+        F->setName("");
+
+        // One argument is missing: add the alignment argument.
+        std::vector<const Type*> NewParams;
+        for (unsigned p = 0; p < NumP; ++p)
+          NewParams.push_back(FTy->getParamType(p));
+        NewParams.push_back(Type::getInt32Ty(F->getContext()));
+        FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(),
+                                                 NewParams, false);
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, NewFTy));
+        return true;
+      }
+    }
+    break;
+  case 'b':
+    //  This upgrades the name of the llvm.bswap intrinsic function to only use 
+    //  a single type name for overloading. We only care about the old format
+    //  'llvm.bswap.i*.i*', so check for 'bswap.' and then for there being 
+    //  a '.' after 'bswap.'
+    if (Name.compare(5,6,"bswap.",6) == 0) {
+      std::string::size_type delim = Name.find('.',11);
+      
+      if (delim != std::string::npos) {
+        //  Construct the new name as 'llvm.bswap' + '.i*'
+        F->setName(Name.substr(0,10)+Name.substr(delim));
+        NewFn = F;
+        return true;
+      }
+    }
+    break;
+
+  case 'c':
+    //  We only want to fix the 'llvm.ct*' intrinsics which do not have the 
+    //  correct return type, so we check for the name, and then check if the 
+    //  return type does not match the parameter type.
+    if ( (Name.compare(5,5,"ctpop",5) == 0 ||
+          Name.compare(5,4,"ctlz",4) == 0 ||
+          Name.compare(5,4,"cttz",4) == 0) &&
+        FTy->getReturnType() != FTy->getParamType(0)) {
+      //  We first need to change the name of the old (bad) intrinsic, because 
+      //  its type is incorrect, but we cannot overload that name. We 
+      //  arbitrarily unique it here allowing us to construct a correctly named 
+      //  and typed function below.
+      F->setName("");
+
+      //  Now construct the new intrinsic with the correct name and type. We 
+      //  leave the old function around in order to query its type, whatever it 
+      //  may be, and correctly convert up to the new type.
+      NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                    FTy->getParamType(0),
+                                                    FTy->getParamType(0),
+                                                    (Type *)0));
+      return true;
+    }
+    break;
+
+  case 'e':
+    //  The old llvm.eh.selector.i32 is equivalent to the new llvm.eh.selector.
+    if (Name.compare("llvm.eh.selector.i32") == 0) {
+      F->setName("llvm.eh.selector");
+      NewFn = F;
+      return true;
+    }
+    //  The old llvm.eh.typeid.for.i32 is equivalent to llvm.eh.typeid.for.
+    if (Name.compare("llvm.eh.typeid.for.i32") == 0) {
+      F->setName("llvm.eh.typeid.for");
+      NewFn = F;
+      return true;
+    }
+    //  Convert the old llvm.eh.selector.i64 to a call to llvm.eh.selector.
+    if (Name.compare("llvm.eh.selector.i64") == 0) {
+      NewFn = Intrinsic::getDeclaration(M, Intrinsic::eh_selector);
+      return true;
+    }
+    //  Convert the old llvm.eh.typeid.for.i64 to a call to llvm.eh.typeid.for.
+    if (Name.compare("llvm.eh.typeid.for.i64") == 0) {
+      NewFn = Intrinsic::getDeclaration(M, Intrinsic::eh_typeid_for);
+      return true;
+    }
+    break;
+
+  case 'm': {
+    // This upgrades the llvm.memcpy, llvm.memmove, and llvm.memset to the
+    // new format that allows overloading the pointer for different address
+    // space (e.g., llvm.memcpy.i16 => llvm.memcpy.p0i8.p0i8.i16)
+    const char* NewFnName = NULL;
+    if (Name.compare(5,8,"memcpy.i",8) == 0) {
+      if (Name[13] == '8')
+        NewFnName = "llvm.memcpy.p0i8.p0i8.i8";
+      else if (Name.compare(13,2,"16") == 0)
+        NewFnName = "llvm.memcpy.p0i8.p0i8.i16";
+      else if (Name.compare(13,2,"32") == 0)
+        NewFnName = "llvm.memcpy.p0i8.p0i8.i32";
+      else if (Name.compare(13,2,"64") == 0)
+        NewFnName = "llvm.memcpy.p0i8.p0i8.i64";
+    } else if (Name.compare(5,9,"memmove.i",9) == 0) {
+      if (Name[14] == '8')
+        NewFnName = "llvm.memmove.p0i8.p0i8.i8";
+      else if (Name.compare(14,2,"16") == 0)
+        NewFnName = "llvm.memmove.p0i8.p0i8.i16";
+      else if (Name.compare(14,2,"32") == 0)
+        NewFnName = "llvm.memmove.p0i8.p0i8.i32";
+      else if (Name.compare(14,2,"64") == 0)
+        NewFnName = "llvm.memmove.p0i8.p0i8.i64";
+    }
+    else if (Name.compare(5,8,"memset.i",8) == 0) {
+      if (Name[13] == '8')
+        NewFnName = "llvm.memset.p0i8.i8";
+      else if (Name.compare(13,2,"16") == 0)
+        NewFnName = "llvm.memset.p0i8.i16";
+      else if (Name.compare(13,2,"32") == 0)
+        NewFnName = "llvm.memset.p0i8.i32";
+      else if (Name.compare(13,2,"64") == 0)
+        NewFnName = "llvm.memset.p0i8.i64";
+    }
+    if (NewFnName) {
+      NewFn = cast<Function>(M->getOrInsertFunction(NewFnName, 
+                                            FTy->getReturnType(),
+                                            FTy->getParamType(0),
+                                            FTy->getParamType(1),
+                                            FTy->getParamType(2),
+                                            FTy->getParamType(3),
+                                            Type::getInt1Ty(F->getContext()),
+                                            (Type *)0));
+      return true;
+    }
+    break;
+  }
+  case 'p':
+    //  This upgrades the llvm.part.select overloaded intrinsic names to only 
+    //  use one type specifier in the name. We only care about the old format
+    //  'llvm.part.select.i*.i*', and solve as above with bswap.
+    if (Name.compare(5,12,"part.select.",12) == 0) {
+      std::string::size_type delim = Name.find('.',17);
+      
+      if (delim != std::string::npos) {
+        //  Construct a new name as 'llvm.part.select' + '.i*'
+        F->setName(Name.substr(0,16)+Name.substr(delim));
+        NewFn = F;
+        return true;
+      }
+      break;
+    }
+
+    //  This upgrades the llvm.part.set intrinsics similarly as above, however 
+    //  we care about 'llvm.part.set.i*.i*.i*', but only the first two types 
+    //  must match. There is an additional type specifier after these two 
+    //  matching types that we must retain when upgrading.  Thus, we require 
+    //  finding 2 periods, not just one, after the intrinsic name.
+    if (Name.compare(5,9,"part.set.",9) == 0) {
+      std::string::size_type delim = Name.find('.',14);
+
+      if (delim != std::string::npos &&
+          Name.find('.',delim+1) != std::string::npos) {
+        //  Construct a new name as 'llvm.part.select' + '.i*.i*'
+        F->setName(Name.substr(0,13)+Name.substr(delim));
+        NewFn = F;
+        return true;
+      }
+      break;
+    }
+
+    break;
+  case 'x': 
+    // This fixes all MMX shift intrinsic instructions to take a
+    // x86_mmx instead of a v1i64, v2i32, v4i16, or v8i8.
+    if (Name.compare(5, 8, "x86.mmx.", 8) == 0) {
+      const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
+
+      if (Name.compare(13, 4, "padd", 4) == 0   ||
+          Name.compare(13, 4, "psub", 4) == 0   ||
+          Name.compare(13, 4, "pmul", 4) == 0   ||
+          Name.compare(13, 5, "pmadd", 5) == 0  ||
+          Name.compare(13, 4, "pand", 4) == 0   ||
+          Name.compare(13, 3, "por", 3) == 0    ||
+          Name.compare(13, 4, "pxor", 4) == 0   ||
+          Name.compare(13, 4, "pavg", 4) == 0   ||
+          Name.compare(13, 4, "pmax", 4) == 0   ||
+          Name.compare(13, 4, "pmin", 4) == 0   ||
+          Name.compare(13, 4, "psad", 4) == 0   ||
+          Name.compare(13, 4, "psll", 4) == 0   ||
+          Name.compare(13, 4, "psrl", 4) == 0   ||
+          Name.compare(13, 4, "psra", 4) == 0   ||
+          Name.compare(13, 4, "pack", 4) == 0   ||
+          Name.compare(13, 6, "punpck", 6) == 0 ||
+          Name.compare(13, 4, "pcmp", 4) == 0) {
+        assert(FTy->getNumParams() == 2 && "MMX intrinsic takes 2 args!");
+        const Type *SecondParamTy = X86_MMXTy;
+
+        if (Name.compare(13, 5, "pslli", 5) == 0 ||
+            Name.compare(13, 5, "psrli", 5) == 0 ||
+            Name.compare(13, 5, "psrai", 5) == 0)
+          SecondParamTy = FTy->getParamType(1);
+
+        // Don't do anything if it has the correct types.
+        if (FTy->getReturnType() == X86_MMXTy &&
+            FTy->getParamType(0) == X86_MMXTy &&
+            FTy->getParamType(1) == SecondParamTy)
+          break;
+
+        // We first need to change the name of the old (bad) intrinsic, because
+        // its type is incorrect, but we cannot overload that name. We
+        // arbitrarily unique it here allowing us to construct a correctly named
+        // and typed function below.
+        F->setName("");
+
+        // Now construct the new intrinsic with the correct name and type. We
+        // leave the old function around in order to query its type, whatever it
+        // may be, and correctly convert up to the new type.
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      X86_MMXTy, X86_MMXTy,
+                                                      SecondParamTy, (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 8, "maskmovq", 8) == 0) {
+        // Don't do anything if it has the correct types.
+        if (FTy->getParamType(0) == X86_MMXTy &&
+            FTy->getParamType(1) == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      FTy->getReturnType(),
+                                                      X86_MMXTy,
+                                                      X86_MMXTy,
+                                                      FTy->getParamType(2),
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 8, "pmovmskb", 8) == 0) {
+        if (FTy->getParamType(0) == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      FTy->getReturnType(),
+                                                      X86_MMXTy,
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 5, "movnt", 5) == 0) {
+        if (FTy->getParamType(1) == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      FTy->getReturnType(),
+                                                      FTy->getParamType(0),
+                                                      X86_MMXTy,
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 7, "palignr", 7) == 0) {
+        if (FTy->getReturnType() == X86_MMXTy &&
+            FTy->getParamType(0) == X86_MMXTy &&
+            FTy->getParamType(1) == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      X86_MMXTy,
+                                                      X86_MMXTy,
+                                                      X86_MMXTy,
+                                                      FTy->getParamType(2),
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 5, "pextr", 5) == 0) {
+        if (FTy->getParamType(0) == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      FTy->getReturnType(),
+                                                      X86_MMXTy,
+                                                      FTy->getParamType(1),
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 5, "pinsr", 5) == 0) {
+        if (FTy->getReturnType() == X86_MMXTy &&
+            FTy->getParamType(0) == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      X86_MMXTy,
+                                                      X86_MMXTy,
+                                                      FTy->getParamType(1),
+                                                      FTy->getParamType(2),
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 12, "cvtsi32.si64", 12) == 0) {
+        if (FTy->getReturnType() == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      X86_MMXTy,
+                                                      FTy->getParamType(0),
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 12, "cvtsi64.si32", 12) == 0) {
+        if (FTy->getParamType(0) == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      FTy->getReturnType(),
+                                                      X86_MMXTy,
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 8, "vec.init", 8) == 0) {
+        if (FTy->getReturnType() == X86_MMXTy)
+          break;
+
+        F->setName("");
+
+        if (Name.compare(21, 2, ".b", 2) == 0)
+          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                        X86_MMXTy,
+                                                        FTy->getParamType(0),
+                                                        FTy->getParamType(1),
+                                                        FTy->getParamType(2),
+                                                        FTy->getParamType(3),
+                                                        FTy->getParamType(4),
+                                                        FTy->getParamType(5),
+                                                        FTy->getParamType(6),
+                                                        FTy->getParamType(7),
+                                                        (Type*)0));
+        else if (Name.compare(21, 2, ".w", 2) == 0)
+          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                        X86_MMXTy,
+                                                        FTy->getParamType(0),
+                                                        FTy->getParamType(1),
+                                                        FTy->getParamType(2),
+                                                        FTy->getParamType(3),
+                                                        (Type*)0));
+        else if (Name.compare(21, 2, ".d", 2) == 0)
+          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                        X86_MMXTy,
+                                                        FTy->getParamType(0),
+                                                        FTy->getParamType(1),
+                                                        (Type*)0));
+        return true;
+      }
+
+
+      if (Name.compare(13, 9, "vec.ext.d", 9) == 0) {
+        if (FTy->getReturnType() == X86_MMXTy &&
+            FTy->getParamType(0) == X86_MMXTy)
+          break;
+
+        F->setName("");
+        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+                                                      X86_MMXTy,
+                                                      X86_MMXTy,
+                                                      FTy->getParamType(1),
+                                                      (Type*)0));
+        return true;
+      }
+
+      if (Name.compare(13, 9, "emms", 4) == 0 ||
+          Name.compare(13, 9, "femms", 5) == 0) {
+        NewFn = 0;
+        break;
+      }
+
+      // We really shouldn't get here ever.
+      assert(0 && "Invalid MMX intrinsic!");
+      break;
+    } else if (Name.compare(5,17,"x86.sse2.loadh.pd",17) == 0 ||
+               Name.compare(5,17,"x86.sse2.loadl.pd",17) == 0 ||
+               Name.compare(5,16,"x86.sse2.movl.dq",16) == 0 ||
+               Name.compare(5,15,"x86.sse2.movs.d",15) == 0 ||
+               Name.compare(5,16,"x86.sse2.shuf.pd",16) == 0 ||
+               Name.compare(5,18,"x86.sse2.unpckh.pd",18) == 0 ||
+               Name.compare(5,18,"x86.sse2.unpckl.pd",18) == 0 ||
+               Name.compare(5,20,"x86.sse2.punpckh.qdq",20) == 0 ||
+               Name.compare(5,20,"x86.sse2.punpckl.qdq",20) == 0) {
+      // Calls to these intrinsics are transformed into ShuffleVector's.
+      NewFn = 0;
+      return true;
+    } else if (Name.compare(5, 16, "x86.sse41.pmulld", 16) == 0) {
+      // Calls to these intrinsics are transformed into vector multiplies.
+      NewFn = 0;
+      return true;
+    } else if (Name.compare(5, 18, "x86.ssse3.palign.r", 18) == 0 ||
+               Name.compare(5, 22, "x86.ssse3.palign.r.128", 22) == 0) {
+      // Calls to these intrinsics are transformed into vector shuffles, shifts,
+      // or 0.
+      NewFn = 0;
+      return true;           
+    } else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) {
+      // This is an SSE/MMX instruction.
+      const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
+      NewFn =
+        cast<Function>(M->getOrInsertFunction("llvm.x86.sse.pshuf.w",
+                                              X86_MMXTy,
+                                              X86_MMXTy,
+                                              Type::getInt8Ty(F->getContext()),
+                                              (Type*)0));
+      return true;
+    }
+
+    break;
+  }
+
+  //  This may not belong here. This function is effectively being overloaded 
+  //  to both detect an intrinsic which needs upgrading, and to provide the 
+  //  upgraded form of the intrinsic. We should perhaps have two separate 
+  //  functions for this.
+  return false;
+}
+
+bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
+  NewFn = 0;
+  bool Upgraded = UpgradeIntrinsicFunction1(F, NewFn);
+
+  // Upgrade intrinsic attributes.  This does not change the function.
+  if (NewFn)
+    F = NewFn;
+  if (unsigned id = F->getIntrinsicID())
+    F->setAttributes(Intrinsic::getAttributes((Intrinsic::ID)id));
+  return Upgraded;
+}
+
+bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
+  StringRef Name(GV->getName());
+
+  // We are only upgrading one symbol here.
+  if (Name == ".llvm.eh.catch.all.value") {
+    GV->setName("llvm.eh.catch.all.value");
+    return true;
+  }
+
+  return false;
+}
+
+/// ExtendNEONArgs - For NEON "long" and "wide" operations, where the results
+/// have vector elements twice as big as one or both source operands, do the
+/// sign- or zero-extension that used to be handled by intrinsics.  The
+/// extended values are returned via V0 and V1.
+static void ExtendNEONArgs(CallInst *CI, Value *Arg0, Value *Arg1,
+                           Value *&V0, Value *&V1) {
+  Function *F = CI->getCalledFunction();
+  const std::string& Name = F->getName();
+  bool isLong = (Name.at(18) == 'l');
+  bool isSigned = (Name.at(19) == 's');
+
+  if (isSigned) {
+    if (isLong)
+      V0 = new SExtInst(Arg0, CI->getType(), "", CI);
+    else
+      V0 = Arg0;
+    V1 = new SExtInst(Arg1, CI->getType(), "", CI);
+  } else {
+    if (isLong)
+      V0 = new ZExtInst(Arg0, CI->getType(), "", CI);
+    else
+      V0 = Arg0;
+    V1 = new ZExtInst(Arg1, CI->getType(), "", CI);
+  }
+}
+
+/// CallVABD - As part of expanding a call to one of the old NEON vabdl, vaba,
+/// or vabal intrinsics, construct a call to a vabd intrinsic.  Examine the
+/// name of the old intrinsic to determine whether to use a signed or unsigned
+/// vabd intrinsic.  Get the type from the old call instruction, adjusted for
+/// half-size vector elements if the old intrinsic was vabdl or vabal.
+static Instruction *CallVABD(CallInst *CI, Value *Arg0, Value *Arg1) {
+  Function *F = CI->getCalledFunction();
+  const std::string& Name = F->getName();
+  bool isLong = (Name.at(18) == 'l');
+  bool isSigned = (Name.at(isLong ? 19 : 18) == 's');
+
+  Intrinsic::ID intID;
+  if (isSigned)
+    intID = Intrinsic::arm_neon_vabds;
+  else
+    intID = Intrinsic::arm_neon_vabdu;
+
+  const Type *Ty = CI->getType();
+  if (isLong)
+    Ty = VectorType::getTruncatedElementVectorType(cast<const VectorType>(Ty));
+
+  Function *VABD = Intrinsic::getDeclaration(F->getParent(), intID, &Ty, 1);
+  Value *Operands[2];
+  Operands[0] = Arg0;
+  Operands[1] = Arg1;
+  return CallInst::Create(VABD, Operands, Operands+2, 
+                          "upgraded."+CI->getName(), CI);
+}
+
+/// ConstructNewCallInst - Construct a new CallInst with the signature of NewFn.
+static void ConstructNewCallInst(Function *NewFn, CallInst *OldCI,
+                                 Value **Operands, unsigned NumOps,
+                                 bool AssignName = true) {
+  // Construct a new CallInst.
+  CallInst *NewCI =
+    CallInst::Create(NewFn, Operands, Operands + NumOps,
+                     AssignName ? "upgraded." + OldCI->getName() : "", OldCI);
+
+  NewCI->setTailCall(OldCI->isTailCall());
+  NewCI->setCallingConv(OldCI->getCallingConv());
+
+  // Handle any uses of the old CallInst. If the type has changed, add a cast.
+  if (!OldCI->use_empty()) {
+    if (OldCI->getType() != NewCI->getType()) {
+      Function *OldFn = OldCI->getCalledFunction();
+      CastInst *RetCast =
+        CastInst::Create(CastInst::getCastOpcode(NewCI, true,
+                                                 OldFn->getReturnType(), true),
+                         NewCI, OldFn->getReturnType(), NewCI->getName(),OldCI);
+
+      // Replace all uses of the old call with the new cast which has the
+      // correct type.
+      OldCI->replaceAllUsesWith(RetCast);
+    } else {
+      OldCI->replaceAllUsesWith(NewCI);
+    }
+  }
+
+  // Clean up the old call now that it has been completely upgraded.
+  OldCI->eraseFromParent();
+}
+
+// UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 
+// upgraded intrinsic. All argument and return casting must be provided in 
+// order to seamlessly integrate with existing context.
+void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
+  Function *F = CI->getCalledFunction();
+  LLVMContext &C = CI->getContext();
+  ImmutableCallSite CS(CI);
+
+  assert(F && "CallInst has no function associated with it.");
+
+  if (!NewFn) {
+    // Get the Function's name.
+    const std::string& Name = F->getName();
+
+    // Upgrade ARM NEON intrinsics.
+    if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
+      Instruction *NewI;
+      Value *V0, *V1;
+      if (Name.compare(14, 7, "vmovls.", 7) == 0) {
+        NewI = new SExtInst(CI->getArgOperand(0), CI->getType(),
+                            "upgraded." + CI->getName(), CI);
+      } else if (Name.compare(14, 7, "vmovlu.", 7) == 0) {
+        NewI = new ZExtInst(CI->getArgOperand(0), CI->getType(),
+                            "upgraded." + CI->getName(), CI);
+      } else if (Name.compare(14, 4, "vadd", 4) == 0) {
+        ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
+        NewI = BinaryOperator::CreateAdd(V0, V1, "upgraded."+CI->getName(), CI);
+      } else if (Name.compare(14, 4, "vsub", 4) == 0) {
+        ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
+        NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI);
+      } else if (Name.compare(14, 4, "vmul", 4) == 0) {
+        ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
+        NewI = BinaryOperator::CreateMul(V0, V1,"upgraded."+CI->getName(),CI);
+      } else if (Name.compare(14, 4, "vmla", 4) == 0) {
+        ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
+        Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
+        NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), MulI,
+                                         "upgraded."+CI->getName(), CI);
+      } else if (Name.compare(14, 4, "vmls", 4) == 0) {
+        ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
+        Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
+        NewI = BinaryOperator::CreateSub(CI->getArgOperand(0), MulI,
+                                         "upgraded."+CI->getName(), CI);
+      } else if (Name.compare(14, 4, "vabd", 4) == 0) {
+        NewI = CallVABD(CI, CI->getArgOperand(0), CI->getArgOperand(1));
+        NewI = new ZExtInst(NewI, CI->getType(), "upgraded."+CI->getName(), CI);
+      } else if (Name.compare(14, 4, "vaba", 4) == 0) {
+        NewI = CallVABD(CI, CI->getArgOperand(1), CI->getArgOperand(2));
+        if (Name.at(18) == 'l')
+          NewI = new ZExtInst(NewI, CI->getType(), "", CI);
+        NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), NewI,
+                                         "upgraded."+CI->getName(), CI);
+      } else if (Name.compare(14, 6, "vmovn.", 6) == 0) {
+        NewI = new TruncInst(CI->getArgOperand(0), CI->getType(),
+                             "upgraded." + CI->getName(), CI);
+      } else {
+        llvm_unreachable("Unknown arm.neon function for CallInst upgrade.");
+      }
+      // Replace any uses of the old CallInst.
+      if (!CI->use_empty())
+        CI->replaceAllUsesWith(NewI);
+      CI->eraseFromParent();
+      return;
+    }
+
+    bool isLoadH = false, isLoadL = false, isMovL = false;
+    bool isMovSD = false, isShufPD = false;
+    bool isUnpckhPD = false, isUnpcklPD = false;
+    bool isPunpckhQPD = false, isPunpcklQPD = false;
+    if (F->getName() == "llvm.x86.sse2.loadh.pd")
+      isLoadH = true;
+    else if (F->getName() == "llvm.x86.sse2.loadl.pd")
+      isLoadL = true;
+    else if (F->getName() == "llvm.x86.sse2.movl.dq")
+      isMovL = true;
+    else if (F->getName() == "llvm.x86.sse2.movs.d")
+      isMovSD = true;
+    else if (F->getName() == "llvm.x86.sse2.shuf.pd")
+      isShufPD = true;
+    else if (F->getName() == "llvm.x86.sse2.unpckh.pd")
+      isUnpckhPD = true;
+    else if (F->getName() == "llvm.x86.sse2.unpckl.pd")
+      isUnpcklPD = true;
+    else if (F->getName() ==  "llvm.x86.sse2.punpckh.qdq")
+      isPunpckhQPD = true;
+    else if (F->getName() ==  "llvm.x86.sse2.punpckl.qdq")
+      isPunpcklQPD = true;
+
+    if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD ||
+        isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
+      std::vector<Constant*> Idxs;
+      Value *Op0 = CI->getArgOperand(0);
+      ShuffleVectorInst *SI = NULL;
+      if (isLoadH || isLoadL) {
+        Value *Op1 = UndefValue::get(Op0->getType());
+        Value *Addr = new BitCastInst(CI->getArgOperand(1), 
+                                  Type::getDoublePtrTy(C),
+                                      "upgraded.", CI);
+        Value *Load = new LoadInst(Addr, "upgraded.", false, 8, CI);
+        Value *Idx = ConstantInt::get(Type::getInt32Ty(C), 0);
+        Op1 = InsertElementInst::Create(Op1, Load, Idx, "upgraded.", CI);
+
+        if (isLoadH) {
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
+        } else {
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
+        }
+        Value *Mask = ConstantVector::get(Idxs);
+        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
+      } else if (isMovL) {
+        Constant *Zero = ConstantInt::get(Type::getInt32Ty(C), 0);
+        Idxs.push_back(Zero);
+        Idxs.push_back(Zero);
+        Idxs.push_back(Zero);
+        Idxs.push_back(Zero);
+        Value *ZeroV = ConstantVector::get(Idxs);
+
+        Idxs.clear(); 
+        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 4));
+        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 5));
+        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
+        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3));
+        Value *Mask = ConstantVector::get(Idxs);
+        SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI);
+      } else if (isMovSD ||
+                 isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
+        Value *Op1 = CI->getArgOperand(1);
+        if (isMovSD) {
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
+        } else if (isUnpckhPD || isPunpckhQPD) {
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3));
+        } else {
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
+          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
+        }
+        Value *Mask = ConstantVector::get(Idxs);
+        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
+      } else if (isShufPD) {
+        Value *Op1 = CI->getArgOperand(1);
+        unsigned MaskVal =
+                        cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1));
+        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C),
+                                               ((MaskVal >> 1) & 1)+2));
+        Value *Mask = ConstantVector::get(Idxs);
+        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
+      }
+
+      assert(SI && "Unexpected!");
+
+      // Handle any uses of the old CallInst.
+      if (!CI->use_empty())
+        //  Replace all uses of the old call with the new cast which has the 
+        //  correct type.
+        CI->replaceAllUsesWith(SI);
+      
+      //  Clean up the old call now that it has been completely upgraded.
+      CI->eraseFromParent();
+    } else if (F->getName() == "llvm.x86.sse41.pmulld") {
+      // Upgrade this set of intrinsics into vector multiplies.
+      Instruction *Mul = BinaryOperator::CreateMul(CI->getArgOperand(0),
+                                                   CI->getArgOperand(1),
+                                                   CI->getName(),
+                                                   CI);
+      // Fix up all the uses with our new multiply.
+      if (!CI->use_empty())
+        CI->replaceAllUsesWith(Mul);
+        
+      // Remove upgraded multiply.
+      CI->eraseFromParent();
+    } else if (F->getName() == "llvm.x86.ssse3.palign.r") {
+      Value *Op1 = CI->getArgOperand(0);
+      Value *Op2 = CI->getArgOperand(1);
+      Value *Op3 = CI->getArgOperand(2);
+      unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
+      Value *Rep;
+      IRBuilder<> Builder(C);
+      Builder.SetInsertPoint(CI->getParent(), CI);
+
+      // If palignr is shifting the pair of input vectors less than 9 bytes,
+      // emit a shuffle instruction.
+      if (shiftVal <= 8) {
+        const Type *IntTy = Type::getInt32Ty(C);
+        const Type *EltTy = Type::getInt8Ty(C);
+        const Type *VecTy = VectorType::get(EltTy, 8);
+        
+        Op2 = Builder.CreateBitCast(Op2, VecTy);
+        Op1 = Builder.CreateBitCast(Op1, VecTy);
+
+        llvm::SmallVector<llvm::Constant*, 8> Indices;
+        for (unsigned i = 0; i != 8; ++i)
+          Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
+
+        Value *SV = ConstantVector::get(Indices);
+        Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
+        Rep = Builder.CreateBitCast(Rep, F->getReturnType());
+      }
+
+      // If palignr is shifting the pair of input vectors more than 8 but less
+      // than 16 bytes, emit a logical right shift of the destination.
+      else if (shiftVal < 16) {
+        // MMX has these as 1 x i64 vectors for some odd optimization reasons.
+        const Type *EltTy = Type::getInt64Ty(C);
+        const Type *VecTy = VectorType::get(EltTy, 1);
+
+        Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
+        Op2 = ConstantInt::get(VecTy, (shiftVal-8) * 8);
+
+        // create i32 constant
+        Function *I =
+          Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_mmx_psrl_q);
+        Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
+      }
+
+      // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
+      else {
+        Rep = Constant::getNullValue(F->getReturnType());
+      }
+      
+      // Replace any uses with our new instruction.
+      if (!CI->use_empty())
+        CI->replaceAllUsesWith(Rep);
+        
+      // Remove upgraded instruction.
+      CI->eraseFromParent();
+      
+    } else if (F->getName() == "llvm.x86.ssse3.palign.r.128") {
+      Value *Op1 = CI->getArgOperand(0);
+      Value *Op2 = CI->getArgOperand(1);
+      Value *Op3 = CI->getArgOperand(2);
+      unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
+      Value *Rep;
+      IRBuilder<> Builder(C);
+      Builder.SetInsertPoint(CI->getParent(), CI);
+
+      // If palignr is shifting the pair of input vectors less than 17 bytes,
+      // emit a shuffle instruction.
+      if (shiftVal <= 16) {
+        const Type *IntTy = Type::getInt32Ty(C);
+        const Type *EltTy = Type::getInt8Ty(C);
+        const Type *VecTy = VectorType::get(EltTy, 16);
+        
+        Op2 = Builder.CreateBitCast(Op2, VecTy);
+        Op1 = Builder.CreateBitCast(Op1, VecTy);
+
+        llvm::SmallVector<llvm::Constant*, 16> Indices;
+        for (unsigned i = 0; i != 16; ++i)
+          Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
+
+        Value *SV = ConstantVector::get(Indices);
+        Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
+        Rep = Builder.CreateBitCast(Rep, F->getReturnType());
+      }
+
+      // If palignr is shifting the pair of input vectors more than 16 but less
+      // than 32 bytes, emit a logical right shift of the destination.
+      else if (shiftVal < 32) {
+        const Type *EltTy = Type::getInt64Ty(C);
+        const Type *VecTy = VectorType::get(EltTy, 2);
+        const Type *IntTy = Type::getInt32Ty(C);
+
+        Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
+        Op2 = ConstantInt::get(IntTy, (shiftVal-16) * 8);
+
+        // create i32 constant
+        Function *I =
+          Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_sse2_psrl_dq);
+        Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
+      }
+
+      // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
+      else {
+        Rep = Constant::getNullValue(F->getReturnType());
+      }
+      
+      // Replace any uses with our new instruction.
+      if (!CI->use_empty())
+        CI->replaceAllUsesWith(Rep);
+        
+      // Remove upgraded instruction.
+      CI->eraseFromParent();
+      
+    } else {
+      llvm_unreachable("Unknown function for CallInst upgrade.");
+    }
+    return;
+  }
+
+  switch (NewFn->getIntrinsicID()) {
+  default: llvm_unreachable("Unknown function for CallInst upgrade.");
+  case Intrinsic::arm_neon_vld1:
+  case Intrinsic::arm_neon_vld2:
+  case Intrinsic::arm_neon_vld3:
+  case Intrinsic::arm_neon_vld4:
+  case Intrinsic::arm_neon_vst1:
+  case Intrinsic::arm_neon_vst2:
+  case Intrinsic::arm_neon_vst3:
+  case Intrinsic::arm_neon_vst4:
+  case Intrinsic::arm_neon_vld2lane:
+  case Intrinsic::arm_neon_vld3lane:
+  case Intrinsic::arm_neon_vld4lane:
+  case Intrinsic::arm_neon_vst2lane:
+  case Intrinsic::arm_neon_vst3lane:
+  case Intrinsic::arm_neon_vst4lane: {
+    // Add a default alignment argument of 1.
+    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
+    Operands.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
+    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
+                                       CI->getName(), CI);
+    NewCI->setTailCall(CI->isTailCall());
+    NewCI->setCallingConv(CI->getCallingConv());
+
+    //  Handle any uses of the old CallInst.
+    if (!CI->use_empty())
+      //  Replace all uses of the old call with the new cast which has the 
+      //  correct type.
+      CI->replaceAllUsesWith(NewCI);
+    
+    //  Clean up the old call now that it has been completely upgraded.
+    CI->eraseFromParent();
+    break;
+  }        
+
+  case Intrinsic::x86_mmx_padd_b:
+  case Intrinsic::x86_mmx_padd_w:
+  case Intrinsic::x86_mmx_padd_d:
+  case Intrinsic::x86_mmx_padd_q:
+  case Intrinsic::x86_mmx_padds_b:
+  case Intrinsic::x86_mmx_padds_w:
+  case Intrinsic::x86_mmx_paddus_b:
+  case Intrinsic::x86_mmx_paddus_w:
+  case Intrinsic::x86_mmx_psub_b:
+  case Intrinsic::x86_mmx_psub_w:
+  case Intrinsic::x86_mmx_psub_d:
+  case Intrinsic::x86_mmx_psub_q:
+  case Intrinsic::x86_mmx_psubs_b:
+  case Intrinsic::x86_mmx_psubs_w:
+  case Intrinsic::x86_mmx_psubus_b:
+  case Intrinsic::x86_mmx_psubus_w:
+  case Intrinsic::x86_mmx_pmulh_w:
+  case Intrinsic::x86_mmx_pmull_w:
+  case Intrinsic::x86_mmx_pmulhu_w:
+  case Intrinsic::x86_mmx_pmulu_dq:
+  case Intrinsic::x86_mmx_pmadd_wd:
+  case Intrinsic::x86_mmx_pand:
+  case Intrinsic::x86_mmx_pandn:
+  case Intrinsic::x86_mmx_por:
+  case Intrinsic::x86_mmx_pxor:
+  case Intrinsic::x86_mmx_pavg_b:
+  case Intrinsic::x86_mmx_pavg_w:
+  case Intrinsic::x86_mmx_pmaxu_b:
+  case Intrinsic::x86_mmx_pmaxs_w:
+  case Intrinsic::x86_mmx_pminu_b:
+  case Intrinsic::x86_mmx_pmins_w:
+  case Intrinsic::x86_mmx_psad_bw:
+  case Intrinsic::x86_mmx_psll_w:
+  case Intrinsic::x86_mmx_psll_d:
+  case Intrinsic::x86_mmx_psll_q:
+  case Intrinsic::x86_mmx_pslli_w:
+  case Intrinsic::x86_mmx_pslli_d:
+  case Intrinsic::x86_mmx_pslli_q:
+  case Intrinsic::x86_mmx_psrl_w:
+  case Intrinsic::x86_mmx_psrl_d:
+  case Intrinsic::x86_mmx_psrl_q:
+  case Intrinsic::x86_mmx_psrli_w:
+  case Intrinsic::x86_mmx_psrli_d:
+  case Intrinsic::x86_mmx_psrli_q:
+  case Intrinsic::x86_mmx_psra_w:
+  case Intrinsic::x86_mmx_psra_d:
+  case Intrinsic::x86_mmx_psrai_w:
+  case Intrinsic::x86_mmx_psrai_d:
+  case Intrinsic::x86_mmx_packsswb:
+  case Intrinsic::x86_mmx_packssdw:
+  case Intrinsic::x86_mmx_packuswb:
+  case Intrinsic::x86_mmx_punpckhbw:
+  case Intrinsic::x86_mmx_punpckhwd:
+  case Intrinsic::x86_mmx_punpckhdq:
+  case Intrinsic::x86_mmx_punpcklbw:
+  case Intrinsic::x86_mmx_punpcklwd:
+  case Intrinsic::x86_mmx_punpckldq:
+  case Intrinsic::x86_mmx_pcmpeq_b:
+  case Intrinsic::x86_mmx_pcmpeq_w:
+  case Intrinsic::x86_mmx_pcmpeq_d:
+  case Intrinsic::x86_mmx_pcmpgt_b:
+  case Intrinsic::x86_mmx_pcmpgt_w:
+  case Intrinsic::x86_mmx_pcmpgt_d: {
+    Value *Operands[2];
+    
+    // Cast the operand to the X86 MMX type.
+    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
+                                  NewFn->getFunctionType()->getParamType(0),
+                                  "upgraded.", CI);
+
+    switch (NewFn->getIntrinsicID()) {
+    default:
+      // Cast to the X86 MMX type.
+      Operands[1] = new BitCastInst(CI->getArgOperand(1), 
+                                    NewFn->getFunctionType()->getParamType(1),
+                                    "upgraded.", CI);
+      break;
+    case Intrinsic::x86_mmx_pslli_w:
+    case Intrinsic::x86_mmx_pslli_d:
+    case Intrinsic::x86_mmx_pslli_q:
+    case Intrinsic::x86_mmx_psrli_w:
+    case Intrinsic::x86_mmx_psrli_d:
+    case Intrinsic::x86_mmx_psrli_q:
+    case Intrinsic::x86_mmx_psrai_w:
+    case Intrinsic::x86_mmx_psrai_d:
+      // These take an i32 as their second parameter.
+      Operands[1] = CI->getArgOperand(1);
+      break;
+    }
+
+    ConstructNewCallInst(NewFn, CI, Operands, 2);
+    break;
+  }
+  case Intrinsic::x86_mmx_maskmovq: {
+    Value *Operands[3];
+
+    // Cast the operands to the X86 MMX type.
+    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
+                                  NewFn->getFunctionType()->getParamType(0),
+                                  "upgraded.", CI);
+    Operands[1] = new BitCastInst(CI->getArgOperand(1), 
+                                  NewFn->getFunctionType()->getParamType(1),
+                                  "upgraded.", CI);
+    Operands[2] = CI->getArgOperand(2);
+
+    ConstructNewCallInst(NewFn, CI, Operands, 3, false);
+    break;
+  }
+  case Intrinsic::x86_mmx_pmovmskb: {
+    Value *Operands[1];
+
+    // Cast the operand to the X86 MMX type.
+    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
+                                  NewFn->getFunctionType()->getParamType(0),
+                                  "upgraded.", CI);
+
+    ConstructNewCallInst(NewFn, CI, Operands, 1);
+    break;
+  }
+  case Intrinsic::x86_mmx_movnt_dq: {
+    Value *Operands[2];
+
+    Operands[0] = CI->getArgOperand(0);
+
+    // Cast the operand to the X86 MMX type.
+    Operands[1] = new BitCastInst(CI->getArgOperand(1),
+                                  NewFn->getFunctionType()->getParamType(1),
+                                  "upgraded.", CI);
+
+    ConstructNewCallInst(NewFn, CI, Operands, 2, false);
+    break;
+  }
+  case Intrinsic::x86_mmx_palignr_b: {
+    Value *Operands[3];
+
+    // Cast the operands to the X86 MMX type.
+    Operands[0] = new BitCastInst(CI->getArgOperand(0),
+                                  NewFn->getFunctionType()->getParamType(0),
+                                  "upgraded.", CI);
+    Operands[1] = new BitCastInst(CI->getArgOperand(1),
+                                  NewFn->getFunctionType()->getParamType(1),
+                                  "upgraded.", CI);
+    Operands[2] = CI->getArgOperand(2);
+
+    ConstructNewCallInst(NewFn, CI, Operands, 3);
+    break;
+  }
+  case Intrinsic::x86_mmx_pextr_w: {
+    Value *Operands[2];
+
+    // Cast the operands to the X86 MMX type.
+    Operands[0] = new BitCastInst(CI->getArgOperand(0),
+                                  NewFn->getFunctionType()->getParamType(0),
+                                  "upgraded.", CI);
+    Operands[1] = CI->getArgOperand(1);
+
+    ConstructNewCallInst(NewFn, CI, Operands, 2);
+    break;
+  }
+  case Intrinsic::x86_mmx_pinsr_w: {
+    Value *Operands[3];
+
+    // Cast the operands to the X86 MMX type.
+    Operands[0] = new BitCastInst(CI->getArgOperand(0),
+                                  NewFn->getFunctionType()->getParamType(0),
+                                  "upgraded.", CI);
+    Operands[1] = CI->getArgOperand(1);
+    Operands[2] = CI->getArgOperand(2);
+
+    ConstructNewCallInst(NewFn, CI, Operands, 3);
+    break;
+  }
+  case Intrinsic::x86_sse_pshuf_w: {
+    IRBuilder<> Builder(C);
+    Builder.SetInsertPoint(CI->getParent(), CI);
+
+    // Cast the operand to the X86 MMX type.
+    Value *Operands[2];
+    Operands[0] =
+      Builder.CreateBitCast(CI->getArgOperand(0), 
+                            NewFn->getFunctionType()->getParamType(0),
+                            "upgraded.");
+    Operands[1] =
+      Builder.CreateTrunc(CI->getArgOperand(1),
+                          Type::getInt8Ty(C),
+                          "upgraded.");
+
+    ConstructNewCallInst(NewFn, CI, Operands, 2);
+    break;
+  }
+
+#if 0
+  case Intrinsic::x86_mmx_cvtsi32_si64: {
+    // The return type needs to be changed.
+    Value *Operands[1];
+    Operands[0] = CI->getArgOperand(0);
+    ConstructNewCallInst(NewFn, CI, Operands, 1);
+    break;
+  }
+  case Intrinsic::x86_mmx_cvtsi64_si32: {
+    Value *Operands[1];
+
+    // Cast the operand to the X86 MMX type.
+    Operands[0] = new BitCastInst(CI->getArgOperand(0),
+                                  NewFn->getFunctionType()->getParamType(0),
+                                  "upgraded.", CI);
+
+    ConstructNewCallInst(NewFn, CI, Operands, 1);
+    break;
+  }
+  case Intrinsic::x86_mmx_vec_init_b:
+  case Intrinsic::x86_mmx_vec_init_w:
+  case Intrinsic::x86_mmx_vec_init_d: {
+    // The return type needs to be changed.
+    Value *Operands[8];
+    unsigned NumOps = 0;
+
+    switch (NewFn->getIntrinsicID()) {
+    default: break;
+    case Intrinsic::x86_mmx_vec_init_b: NumOps = 8; break;
+    case Intrinsic::x86_mmx_vec_init_w: NumOps = 4; break;
+    case Intrinsic::x86_mmx_vec_init_d: NumOps = 2; break;
+    }
+
+    switch (NewFn->getIntrinsicID()) {
+    default: break;
+    case Intrinsic::x86_mmx_vec_init_b:
+      Operands[7] = CI->getArgOperand(7);
+      Operands[6] = CI->getArgOperand(6);
+      Operands[5] = CI->getArgOperand(5);
+      Operands[4] = CI->getArgOperand(4);
+      // FALLTHRU
+    case Intrinsic::x86_mmx_vec_init_w:
+      Operands[3] = CI->getArgOperand(3);
+      Operands[2] = CI->getArgOperand(2);
+      // FALLTHRU
+    case Intrinsic::x86_mmx_vec_init_d:
+      Operands[1] = CI->getArgOperand(1);
+      Operands[0] = CI->getArgOperand(0);
+      break;
+    }
+
+    ConstructNewCallInst(NewFn, CI, Operands, NumOps);
+    break;
+  }
+  case Intrinsic::x86_mmx_vec_ext_d: {
+    Value *Operands[2];
+
+    // Cast the operand to the X86 MMX type.
+    Operands[0] = new BitCastInst(CI->getArgOperand(0),
+                                  NewFn->getFunctionType()->getParamType(0),
+                                  "upgraded.", CI);
+    Operands[1] = CI->getArgOperand(1);
+
+    ConstructNewCallInst(NewFn, CI, Operands, 2);
+    break;
+  }
+#endif
+
+  case Intrinsic::ctlz:
+  case Intrinsic::ctpop:
+  case Intrinsic::cttz: {
+    //  Build a small vector of the original arguments.
+    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
+
+    //  Construct a new CallInst
+    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
+                                       "upgraded."+CI->getName(), CI);
+    NewCI->setTailCall(CI->isTailCall());
+    NewCI->setCallingConv(CI->getCallingConv());
+
+    //  Handle any uses of the old CallInst.
+    if (!CI->use_empty()) {
+      //  Check for sign extend parameter attributes on the return values.
+      bool SrcSExt = NewFn->getAttributes().paramHasAttr(0, Attribute::SExt);
+      bool DestSExt = F->getAttributes().paramHasAttr(0, Attribute::SExt);
+      
+      //  Construct an appropriate cast from the new return type to the old.
+      CastInst *RetCast = CastInst::Create(
+                            CastInst::getCastOpcode(NewCI, SrcSExt,
+                                                    F->getReturnType(),
+                                                    DestSExt),
+                            NewCI, F->getReturnType(),
+                            NewCI->getName(), CI);
+      NewCI->moveBefore(RetCast);
+
+      //  Replace all uses of the old call with the new cast which has the 
+      //  correct type.
+      CI->replaceAllUsesWith(RetCast);
+    }
+
+    //  Clean up the old call now that it has been completely upgraded.
+    CI->eraseFromParent();
+  }
+  break;
+  case Intrinsic::eh_selector:
+  case Intrinsic::eh_typeid_for: {
+    // Only the return type changed.
+    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
+    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
+                                       "upgraded." + CI->getName(), CI);
+    NewCI->setTailCall(CI->isTailCall());
+    NewCI->setCallingConv(CI->getCallingConv());
+
+    //  Handle any uses of the old CallInst.
+    if (!CI->use_empty()) {
+      //  Construct an appropriate cast from the new return type to the old.
+      CastInst *RetCast =
+        CastInst::Create(CastInst::getCastOpcode(NewCI, true,
+                                                 F->getReturnType(), true),
+                         NewCI, F->getReturnType(), NewCI->getName(), CI);
+      CI->replaceAllUsesWith(RetCast);
+    }
+    CI->eraseFromParent();
+  }
+  break;
+  case Intrinsic::memcpy:
+  case Intrinsic::memmove:
+  case Intrinsic::memset: {
+    // Add isVolatile
+    const llvm::Type *I1Ty = llvm::Type::getInt1Ty(CI->getContext());
+    Value *Operands[5] = { CI->getArgOperand(0), CI->getArgOperand(1),
+                           CI->getArgOperand(2), CI->getArgOperand(3),
+                           llvm::ConstantInt::get(I1Ty, 0) };
+    CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+5,
+                                       CI->getName(), CI);
+    NewCI->setTailCall(CI->isTailCall());
+    NewCI->setCallingConv(CI->getCallingConv());
+    //  Handle any uses of the old CallInst.
+    if (!CI->use_empty())
+      //  Replace all uses of the old call with the new cast which has the 
+      //  correct type.
+      CI->replaceAllUsesWith(NewCI);
+    
+    //  Clean up the old call now that it has been completely upgraded.
+    CI->eraseFromParent();
+    break;
+  }
+  }
+}
+
+// This tests each Function to determine if it needs upgrading. When we find 
+// one we are interested in, we then upgrade all calls to reflect the new 
+// function.
+void llvm::UpgradeCallsToIntrinsic(Function* F) {
+  assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
+
+  // Upgrade the function and check if it is a totaly new function.
+  Function* NewFn;
+  if (UpgradeIntrinsicFunction(F, NewFn)) {
+    if (NewFn != F) {
+      // Replace all uses to the old function with the new one if necessary.
+      for (Value::use_iterator UI = F->use_begin(), UE = F->use_end();
+           UI != UE; ) {
+        if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+          UpgradeIntrinsicCall(CI, NewFn);
+      }
+      // Remove old function, no longer used, from the module.
+      F->eraseFromParent();
+    }
+  }
+}
+
+/// This function strips all debug info intrinsics, except for llvm.dbg.declare.
+/// If an llvm.dbg.declare intrinsic is invalid, then this function simply
+/// strips that use.
+void llvm::CheckDebugInfoIntrinsics(Module *M) {
+
+
+  if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) {
+    while (!FuncStart->use_empty()) {
+      CallInst *CI = cast<CallInst>(FuncStart->use_back());
+      CI->eraseFromParent();
+    }
+    FuncStart->eraseFromParent();
+  }
+  
+  if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) {
+    while (!StopPoint->use_empty()) {
+      CallInst *CI = cast<CallInst>(StopPoint->use_back());
+      CI->eraseFromParent();
+    }
+    StopPoint->eraseFromParent();
+  }
+
+  if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) {
+    while (!RegionStart->use_empty()) {
+      CallInst *CI = cast<CallInst>(RegionStart->use_back());
+      CI->eraseFromParent();
+    }
+    RegionStart->eraseFromParent();
+  }
+
+  if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) {
+    while (!RegionEnd->use_empty()) {
+      CallInst *CI = cast<CallInst>(RegionEnd->use_back());
+      CI->eraseFromParent();
+    }
+    RegionEnd->eraseFromParent();
+  }
+  
+  if (Function *Declare = M->getFunction("llvm.dbg.declare")) {
+    if (!Declare->use_empty()) {
+      DbgDeclareInst *DDI = cast<DbgDeclareInst>(Declare->use_back());
+      if (!isa<MDNode>(DDI->getArgOperand(0)) ||
+          !isa<MDNode>(DDI->getArgOperand(1))) {
+        while (!Declare->use_empty()) {
+          CallInst *CI = cast<CallInst>(Declare->use_back());
+          CI->eraseFromParent();
+        }
+        Declare->eraseFromParent();
+      }
+    }
+  }
+}
diff --git a/final/lib/VMCore/BasicBlock.cpp b/final/lib/VMCore/BasicBlock.cpp
new file mode 100644
index 00000000000..955a0285b26
--- /dev/null
+++ b/final/lib/VMCore/BasicBlock.cpp
@@ -0,0 +1,310 @@
+//===-- BasicBlock.cpp - Implement BasicBlock related methods -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the BasicBlock class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BasicBlock.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/LeakDetector.h"
+#include "SymbolTableListTraitsImpl.h"
+#include <algorithm>
+using namespace llvm;
+
+ValueSymbolTable *BasicBlock::getValueSymbolTable() {
+  if (Function *F = getParent())
+    return &F->getValueSymbolTable();
+  return 0;
+}
+
+LLVMContext &BasicBlock::getContext() const {
+  return getType()->getContext();
+}
+
+// Explicit instantiation of SymbolTableListTraits since some of the methods
+// are not in the public header file...
+template class llvm::SymbolTableListTraits<Instruction, BasicBlock>;
+
+
+BasicBlock::BasicBlock(LLVMContext &C, const Twine &Name, Function *NewParent,
+                       BasicBlock *InsertBefore)
+  : Value(Type::getLabelTy(C), Value::BasicBlockVal), Parent(0) {
+
+  // Make sure that we get added to a function
+  LeakDetector::addGarbageObject(this);
+
+  if (InsertBefore) {
+    assert(NewParent &&
+           "Cannot insert block before another block with no function!");
+    NewParent->getBasicBlockList().insert(InsertBefore, this);
+  } else if (NewParent) {
+    NewParent->getBasicBlockList().push_back(this);
+  }
+  
+  setName(Name);
+}
+
+
+BasicBlock::~BasicBlock() {
+  // If the address of the block is taken and it is being deleted (e.g. because
+  // it is dead), this means that there is either a dangling constant expr
+  // hanging off the block, or an undefined use of the block (source code
+  // expecting the address of a label to keep the block alive even though there
+  // is no indirect branch).  Handle these cases by zapping the BlockAddress
+  // nodes.  There are no other possible uses at this point.
+  if (hasAddressTaken()) {
+    assert(!use_empty() && "There should be at least one blockaddress!");
+    Constant *Replacement =
+      ConstantInt::get(llvm::Type::getInt32Ty(getContext()), 1);
+    while (!use_empty()) {
+      BlockAddress *BA = cast<BlockAddress>(use_back());
+      BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(Replacement,
+                                                       BA->getType()));
+      BA->destroyConstant();
+    }
+  }
+  
+  assert(getParent() == 0 && "BasicBlock still linked into the program!");
+  dropAllReferences();
+  InstList.clear();
+}
+
+void BasicBlock::setParent(Function *parent) {
+  if (getParent())
+    LeakDetector::addGarbageObject(this);
+
+  // Set Parent=parent, updating instruction symtab entries as appropriate.
+  InstList.setSymTabObject(&Parent, parent);
+
+  if (getParent())
+    LeakDetector::removeGarbageObject(this);
+}
+
+void BasicBlock::removeFromParent() {
+  getParent()->getBasicBlockList().remove(this);
+}
+
+void BasicBlock::eraseFromParent() {
+  getParent()->getBasicBlockList().erase(this);
+}
+
+/// moveBefore - Unlink this basic block from its current function and
+/// insert it into the function that MovePos lives in, right before MovePos.
+void BasicBlock::moveBefore(BasicBlock *MovePos) {
+  MovePos->getParent()->getBasicBlockList().splice(MovePos,
+                       getParent()->getBasicBlockList(), this);
+}
+
+/// moveAfter - Unlink this basic block from its current function and
+/// insert it into the function that MovePos lives in, right after MovePos.
+void BasicBlock::moveAfter(BasicBlock *MovePos) {
+  Function::iterator I = MovePos;
+  MovePos->getParent()->getBasicBlockList().splice(++I,
+                                       getParent()->getBasicBlockList(), this);
+}
+
+
+TerminatorInst *BasicBlock::getTerminator() {
+  if (InstList.empty()) return 0;
+  return dyn_cast<TerminatorInst>(&InstList.back());
+}
+
+const TerminatorInst *BasicBlock::getTerminator() const {
+  if (InstList.empty()) return 0;
+  return dyn_cast<TerminatorInst>(&InstList.back());
+}
+
+Instruction* BasicBlock::getFirstNonPHI() {
+  BasicBlock::iterator i = begin();
+  // All valid basic blocks should have a terminator,
+  // which is not a PHINode. If we have an invalid basic
+  // block we'll get an assertion failure when dereferencing
+  // a past-the-end iterator.
+  while (isa<PHINode>(i)) ++i;
+  return &*i;
+}
+
+Instruction* BasicBlock::getFirstNonPHIOrDbg() {
+  BasicBlock::iterator i = begin();
+  // All valid basic blocks should have a terminator,
+  // which is not a PHINode. If we have an invalid basic
+  // block we'll get an assertion failure when dereferencing
+  // a past-the-end iterator.
+  while (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i)) ++i;
+  return &*i;
+}
+
+void BasicBlock::dropAllReferences() {
+  for(iterator I = begin(), E = end(); I != E; ++I)
+    I->dropAllReferences();
+}
+
+/// getSinglePredecessor - If this basic block has a single predecessor block,
+/// return the block, otherwise return a null pointer.
+BasicBlock *BasicBlock::getSinglePredecessor() {
+  pred_iterator PI = pred_begin(this), E = pred_end(this);
+  if (PI == E) return 0;         // No preds.
+  BasicBlock *ThePred = *PI;
+  ++PI;
+  return (PI == E) ? ThePred : 0 /*multiple preds*/;
+}
+
+/// getUniquePredecessor - If this basic block has a unique predecessor block,
+/// return the block, otherwise return a null pointer.
+/// Note that unique predecessor doesn't mean single edge, there can be 
+/// multiple edges from the unique predecessor to this block (for example 
+/// a switch statement with multiple cases having the same destination).
+BasicBlock *BasicBlock::getUniquePredecessor() {
+  pred_iterator PI = pred_begin(this), E = pred_end(this);
+  if (PI == E) return 0; // No preds.
+  BasicBlock *PredBB = *PI;
+  ++PI;
+  for (;PI != E; ++PI) {
+    if (*PI != PredBB)
+      return 0;
+    // The same predecessor appears multiple times in the predecessor list.
+    // This is OK.
+  }
+  return PredBB;
+}
+
+/// removePredecessor - This method is used to notify a BasicBlock that the
+/// specified Predecessor of the block is no longer able to reach it.  This is
+/// actually not used to update the Predecessor list, but is actually used to
+/// update the PHI nodes that reside in the block.  Note that this should be
+/// called while the predecessor still refers to this block.
+///
+void BasicBlock::removePredecessor(BasicBlock *Pred,
+                                   bool DontDeleteUselessPHIs) {
+  assert((hasNUsesOrMore(16)||// Reduce cost of this assertion for complex CFGs.
+          find(pred_begin(this), pred_end(this), Pred) != pred_end(this)) &&
+         "removePredecessor: BB is not a predecessor!");
+
+  if (InstList.empty()) return;
+  PHINode *APN = dyn_cast<PHINode>(&front());
+  if (!APN) return;   // Quick exit.
+
+  // If there are exactly two predecessors, then we want to nuke the PHI nodes
+  // altogether.  However, we cannot do this, if this in this case:
+  //
+  //  Loop:
+  //    %x = phi [X, Loop]
+  //    %x2 = add %x, 1         ;; This would become %x2 = add %x2, 1
+  //    br Loop                 ;; %x2 does not dominate all uses
+  //
+  // This is because the PHI node input is actually taken from the predecessor
+  // basic block.  The only case this can happen is with a self loop, so we
+  // check for this case explicitly now.
+  //
+  unsigned max_idx = APN->getNumIncomingValues();
+  assert(max_idx != 0 && "PHI Node in block with 0 predecessors!?!?!");
+  if (max_idx == 2) {
+    BasicBlock *Other = APN->getIncomingBlock(APN->getIncomingBlock(0) == Pred);
+
+    // Disable PHI elimination!
+    if (this == Other) max_idx = 3;
+  }
+
+  // <= Two predecessors BEFORE I remove one?
+  if (max_idx <= 2 && !DontDeleteUselessPHIs) {
+    // Yup, loop through and nuke the PHI nodes
+    while (PHINode *PN = dyn_cast<PHINode>(&front())) {
+      // Remove the predecessor first.
+      PN->removeIncomingValue(Pred, !DontDeleteUselessPHIs);
+
+      // If the PHI _HAD_ two uses, replace PHI node with its now *single* value
+      if (max_idx == 2) {
+        if (PN->getOperand(0) != PN)
+          PN->replaceAllUsesWith(PN->getOperand(0));
+        else
+          // We are left with an infinite loop with no entries: kill the PHI.
+          PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
+        getInstList().pop_front();    // Remove the PHI node
+      }
+
+      // If the PHI node already only had one entry, it got deleted by
+      // removeIncomingValue.
+    }
+  } else {
+    // Okay, now we know that we need to remove predecessor #pred_idx from all
+    // PHI nodes.  Iterate over each PHI node fixing them up
+    PHINode *PN;
+    for (iterator II = begin(); (PN = dyn_cast<PHINode>(II)); ) {
+      ++II;
+      PN->removeIncomingValue(Pred, false);
+      // If all incoming values to the Phi are the same, we can replace the Phi
+      // with that value.
+      Value* PNV = 0;
+      if (!DontDeleteUselessPHIs && (PNV = PN->hasConstantValue()))
+        if (PNV != PN) {
+          PN->replaceAllUsesWith(PNV);
+          PN->eraseFromParent();
+        }
+    }
+  }
+}
+
+
+/// splitBasicBlock - This splits a basic block into two at the specified
+/// instruction.  Note that all instructions BEFORE the specified iterator stay
+/// as part of the original basic block, an unconditional branch is added to
+/// the new BB, and the rest of the instructions in the BB are moved to the new
+/// BB, including the old terminator.  This invalidates the iterator.
+///
+/// Note that this only works on well formed basic blocks (must have a
+/// terminator), and 'I' must not be the end of instruction list (which would
+/// cause a degenerate basic block to be formed, having a terminator inside of
+/// the basic block).
+///
+BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
+  assert(getTerminator() && "Can't use splitBasicBlock on degenerate BB!");
+  assert(I != InstList.end() &&
+         "Trying to get me to create degenerate basic block!");
+
+  BasicBlock *InsertBefore = llvm::next(Function::iterator(this))
+                               .getNodePtrUnchecked();
+  BasicBlock *New = BasicBlock::Create(getContext(), BBName,
+                                       getParent(), InsertBefore);
+
+  // Move all of the specified instructions from the original basic block into
+  // the new basic block.
+  New->getInstList().splice(New->end(), this->getInstList(), I, end());
+
+  // Add a branch instruction to the newly formed basic block.
+  BranchInst::Create(New, this);
+
+  // Now we must loop through all of the successors of the New block (which
+  // _were_ the successors of the 'this' block), and update any PHI nodes in
+  // successors.  If there were PHI nodes in the successors, then they need to
+  // know that incoming branches will be from New, not from Old.
+  //
+  for (succ_iterator I = succ_begin(New), E = succ_end(New); I != E; ++I) {
+    // Loop over any phi nodes in the basic block, updating the BB field of
+    // incoming values...
+    BasicBlock *Successor = *I;
+    PHINode *PN;
+    for (BasicBlock::iterator II = Successor->begin();
+         (PN = dyn_cast<PHINode>(II)); ++II) {
+      int IDX = PN->getBasicBlockIndex(this);
+      while (IDX != -1) {
+        PN->setIncomingBlock((unsigned)IDX, New);
+        IDX = PN->getBasicBlockIndex(this);
+      }
+    }
+  }
+  return New;
+}
+
diff --git a/final/lib/VMCore/CMakeLists.txt b/final/lib/VMCore/CMakeLists.txt
new file mode 100644
index 00000000000..1abd031dae4
--- /dev/null
+++ b/final/lib/VMCore/CMakeLists.txt
@@ -0,0 +1,38 @@
+set(LLVM_REQUIRES_RTTI 1)
+
+add_llvm_library(LLVMCore
+  AsmWriter.cpp
+  Attributes.cpp
+  AutoUpgrade.cpp
+  BasicBlock.cpp
+  ConstantFold.cpp
+  Constants.cpp
+  Core.cpp
+  DebugLoc.cpp
+  Dominators.cpp
+  Function.cpp
+  GVMaterializer.cpp
+  Globals.cpp
+  IRBuilder.cpp
+  InlineAsm.cpp
+  Instruction.cpp
+  Instructions.cpp
+  IntrinsicInst.cpp
+  LLVMContext.cpp
+  LLVMContextImpl.cpp
+  LeakDetector.cpp
+  Metadata.cpp
+  Module.cpp
+  Pass.cpp
+  PassManager.cpp
+  PassRegistry.cpp
+  PrintModulePass.cpp
+  Type.cpp
+  TypeSymbolTable.cpp
+  Use.cpp
+  User.cpp
+  Value.cpp
+  ValueSymbolTable.cpp
+  ValueTypes.cpp
+  Verifier.cpp
+  )
diff --git a/final/lib/VMCore/ConstantFold.cpp b/final/lib/VMCore/ConstantFold.cpp
new file mode 100644
index 00000000000..573efb7e573
--- /dev/null
+++ b/final/lib/VMCore/ConstantFold.cpp
@@ -0,0 +1,2322 @@
+//===- ConstantFold.cpp - LLVM constant folder ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements folding of constants for LLVM.  This implements the
+// (internal) ConstantFold.h interface, which is used by the
+// ConstantExpr::get* methods to automatically fold constants when possible.
+//
+// The current constant folding implementation is implemented in two pieces: the
+// pieces that don't need TargetData, and the pieces that do. This is to avoid
+// a dependence in VMCore on Target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ConstantFold.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include <limits>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                ConstantFold*Instruction Implementations
+//===----------------------------------------------------------------------===//
+
+/// BitCastConstantVector - Convert the specified ConstantVector node to the
+/// specified vector type.  At this point, we know that the elements of the
+/// input vector constant are all simple integer or FP values.
+static Constant *BitCastConstantVector(ConstantVector *CV,
+                                       const VectorType *DstTy) {
+
+  if (CV->isAllOnesValue()) return Constant::getAllOnesValue(DstTy);
+  if (CV->isNullValue()) return Constant::getNullValue(DstTy);
+
+  // If this cast changes element count then we can't handle it here:
+  // doing so requires endianness information.  This should be handled by
+  // Analysis/ConstantFolding.cpp
+  unsigned NumElts = DstTy->getNumElements();
+  if (NumElts != CV->getNumOperands())
+    return 0;
+
+  // Check to verify that all elements of the input are simple.
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (!isa<ConstantInt>(CV->getOperand(i)) &&
+        !isa<ConstantFP>(CV->getOperand(i)))
+      return 0;
+  }
+
+  // Bitcast each element now.
+  std::vector<Constant*> Result;
+  const Type *DstEltTy = DstTy->getElementType();
+  for (unsigned i = 0; i != NumElts; ++i)
+    Result.push_back(ConstantExpr::getBitCast(CV->getOperand(i),
+                                                    DstEltTy));
+  return ConstantVector::get(Result);
+}
+
+/// This function determines which opcode to use to fold two constant cast 
+/// expressions together. It uses CastInst::isEliminableCastPair to determine
+/// the opcode. Consequently its just a wrapper around that function.
+/// @brief Determine if it is valid to fold a cast of a cast
+static unsigned
+foldConstantCastPair(
+  unsigned opc,          ///< opcode of the second cast constant expression
+  ConstantExpr *Op,      ///< the first cast constant expression
+  const Type *DstTy      ///< desintation type of the first cast
+) {
+  assert(Op && Op->isCast() && "Can't fold cast of cast without a cast!");
+  assert(DstTy && DstTy->isFirstClassType() && "Invalid cast destination type");
+  assert(CastInst::isCast(opc) && "Invalid cast opcode");
+
+  // The the types and opcodes for the two Cast constant expressions
+  const Type *SrcTy = Op->getOperand(0)->getType();
+  const Type *MidTy = Op->getType();
+  Instruction::CastOps firstOp = Instruction::CastOps(Op->getOpcode());
+  Instruction::CastOps secondOp = Instruction::CastOps(opc);
+
+  // Let CastInst::isEliminableCastPair do the heavy lifting.
+  return CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy,
+                                        Type::getInt64Ty(DstTy->getContext()));
+}
+
+static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
+  const Type *SrcTy = V->getType();
+  if (SrcTy == DestTy)
+    return V; // no-op cast
+
+  // Check to see if we are casting a pointer to an aggregate to a pointer to
+  // the first element.  If so, return the appropriate GEP instruction.
+  if (const PointerType *PTy = dyn_cast<PointerType>(V->getType()))
+    if (const PointerType *DPTy = dyn_cast<PointerType>(DestTy))
+      if (PTy->getAddressSpace() == DPTy->getAddressSpace()) {
+        SmallVector<Value*, 8> IdxList;
+        Value *Zero =
+          Constant::getNullValue(Type::getInt32Ty(DPTy->getContext()));
+        IdxList.push_back(Zero);
+        const Type *ElTy = PTy->getElementType();
+        while (ElTy != DPTy->getElementType()) {
+          if (const StructType *STy = dyn_cast<StructType>(ElTy)) {
+            if (STy->getNumElements() == 0) break;
+            ElTy = STy->getElementType(0);
+            IdxList.push_back(Zero);
+          } else if (const SequentialType *STy = 
+                     dyn_cast<SequentialType>(ElTy)) {
+            if (ElTy->isPointerTy()) break;  // Can't index into pointers!
+            ElTy = STy->getElementType();
+            IdxList.push_back(Zero);
+          } else {
+            break;
+          }
+        }
+
+        if (ElTy == DPTy->getElementType())
+          // This GEP is inbounds because all indices are zero.
+          return ConstantExpr::getInBoundsGetElementPtr(V, &IdxList[0],
+                                                        IdxList.size());
+      }
+
+  // Handle casts from one vector constant to another.  We know that the src 
+  // and dest type have the same size (otherwise its an illegal cast).
+  if (const VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
+    if (const VectorType *SrcTy = dyn_cast<VectorType>(V->getType())) {
+      assert(DestPTy->getBitWidth() == SrcTy->getBitWidth() &&
+             "Not cast between same sized vectors!");
+      SrcTy = NULL;
+      // First, check for null.  Undef is already handled.
+      if (isa<ConstantAggregateZero>(V))
+        return Constant::getNullValue(DestTy);
+
+      if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
+        return BitCastConstantVector(CV, DestPTy);
+    }
+
+    // Canonicalize scalar-to-vector bitcasts into vector-to-vector bitcasts
+    // This allows for other simplifications (although some of them
+    // can only be handled by Analysis/ConstantFolding.cpp).
+    if (isa<ConstantInt>(V) || isa<ConstantFP>(V))
+      return ConstantExpr::getBitCast(ConstantVector::get(V), DestPTy);
+  }
+
+  // Finally, implement bitcast folding now.   The code below doesn't handle
+  // bitcast right.
+  if (isa<ConstantPointerNull>(V))  // ptr->ptr cast.
+    return ConstantPointerNull::get(cast<PointerType>(DestTy));
+
+  // Handle integral constant input.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+    if (DestTy->isIntegerTy())
+      // Integral -> Integral. This is a no-op because the bit widths must
+      // be the same. Consequently, we just fold to V.
+      return V;
+
+    if (DestTy->isFloatingPointTy())
+      return ConstantFP::get(DestTy->getContext(),
+                             APFloat(CI->getValue(),
+                                     !DestTy->isPPC_FP128Ty()));
+
+    // Otherwise, can't fold this (vector?)
+    return 0;
+  }
+
+  // Handle ConstantFP input: FP -> Integral.
+  if (ConstantFP *FP = dyn_cast<ConstantFP>(V))
+    return ConstantInt::get(FP->getContext(),
+                            FP->getValueAPF().bitcastToAPInt());
+
+  return 0;
+}
+
+
+/// ExtractConstantBytes - V is an integer constant which only has a subset of
+/// its bytes used.  The bytes used are indicated by ByteStart (which is the
+/// first byte used, counting from the least significant byte) and ByteSize,
+/// which is the number of bytes used.
+///
+/// This function analyzes the specified constant to see if the specified byte
+/// range can be returned as a simplified constant.  If so, the constant is
+/// returned, otherwise null is returned.
+/// 
+static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
+                                      unsigned ByteSize) {
+  assert(C->getType()->isIntegerTy() &&
+         (cast<IntegerType>(C->getType())->getBitWidth() & 7) == 0 &&
+         "Non-byte sized integer input");
+  unsigned CSize = cast<IntegerType>(C->getType())->getBitWidth()/8;
+  assert(ByteSize && "Must be accessing some piece");
+  assert(ByteStart+ByteSize <= CSize && "Extracting invalid piece from input");
+  assert(ByteSize != CSize && "Should not extract everything");
+  
+  // Constant Integers are simple.
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+    APInt V = CI->getValue();
+    if (ByteStart)
+      V = V.lshr(ByteStart*8);
+    V = V.trunc(ByteSize*8);
+    return ConstantInt::get(CI->getContext(), V);
+  }
+  
+  // In the input is a constant expr, we might be able to recursively simplify.
+  // If not, we definitely can't do anything.
+  ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+  if (CE == 0) return 0;
+  
+  switch (CE->getOpcode()) {
+  default: return 0;
+  case Instruction::Or: {
+    Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
+    if (RHS == 0)
+      return 0;
+    
+    // X | -1 -> -1.
+    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS))
+      if (RHSC->isAllOnesValue())
+        return RHSC;
+    
+    Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
+    if (LHS == 0)
+      return 0;
+    return ConstantExpr::getOr(LHS, RHS);
+  }
+  case Instruction::And: {
+    Constant *RHS = ExtractConstantBytes(CE->getOperand(1), ByteStart,ByteSize);
+    if (RHS == 0)
+      return 0;
+    
+    // X & 0 -> 0.
+    if (RHS->isNullValue())
+      return RHS;
+    
+    Constant *LHS = ExtractConstantBytes(CE->getOperand(0), ByteStart,ByteSize);
+    if (LHS == 0)
+      return 0;
+    return ConstantExpr::getAnd(LHS, RHS);
+  }
+  case Instruction::LShr: {
+    ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
+    if (Amt == 0)
+      return 0;
+    unsigned ShAmt = Amt->getZExtValue();
+    // Cannot analyze non-byte shifts.
+    if ((ShAmt & 7) != 0)
+      return 0;
+    ShAmt >>= 3;
+    
+    // If the extract is known to be all zeros, return zero.
+    if (ByteStart >= CSize-ShAmt)
+      return Constant::getNullValue(IntegerType::get(CE->getContext(),
+                                                     ByteSize*8));
+    // If the extract is known to be fully in the input, extract it.
+    if (ByteStart+ByteSize+ShAmt <= CSize)
+      return ExtractConstantBytes(CE->getOperand(0), ByteStart+ShAmt, ByteSize);
+    
+    // TODO: Handle the 'partially zero' case.
+    return 0;
+  }
+    
+  case Instruction::Shl: {
+    ConstantInt *Amt = dyn_cast<ConstantInt>(CE->getOperand(1));
+    if (Amt == 0)
+      return 0;
+    unsigned ShAmt = Amt->getZExtValue();
+    // Cannot analyze non-byte shifts.
+    if ((ShAmt & 7) != 0)
+      return 0;
+    ShAmt >>= 3;
+    
+    // If the extract is known to be all zeros, return zero.
+    if (ByteStart+ByteSize <= ShAmt)
+      return Constant::getNullValue(IntegerType::get(CE->getContext(),
+                                                     ByteSize*8));
+    // If the extract is known to be fully in the input, extract it.
+    if (ByteStart >= ShAmt)
+      return ExtractConstantBytes(CE->getOperand(0), ByteStart-ShAmt, ByteSize);
+    
+    // TODO: Handle the 'partially zero' case.
+    return 0;
+  }
+      
+  case Instruction::ZExt: {
+    unsigned SrcBitSize =
+      cast<IntegerType>(CE->getOperand(0)->getType())->getBitWidth();
+    
+    // If extracting something that is completely zero, return 0.
+    if (ByteStart*8 >= SrcBitSize)
+      return Constant::getNullValue(IntegerType::get(CE->getContext(),
+                                                     ByteSize*8));
+
+    // If exactly extracting the input, return it.
+    if (ByteStart == 0 && ByteSize*8 == SrcBitSize)
+      return CE->getOperand(0);
+    
+    // If extracting something completely in the input, if if the input is a
+    // multiple of 8 bits, recurse.
+    if ((SrcBitSize&7) == 0 && (ByteStart+ByteSize)*8 <= SrcBitSize)
+      return ExtractConstantBytes(CE->getOperand(0), ByteStart, ByteSize);
+      
+    // Otherwise, if extracting a subset of the input, which is not multiple of
+    // 8 bits, do a shift and trunc to get the bits.
+    if ((ByteStart+ByteSize)*8 < SrcBitSize) {
+      assert((SrcBitSize&7) && "Shouldn't get byte sized case here");
+      Constant *Res = CE->getOperand(0);
+      if (ByteStart)
+        Res = ConstantExpr::getLShr(Res, 
+                                 ConstantInt::get(Res->getType(), ByteStart*8));
+      return ConstantExpr::getTrunc(Res, IntegerType::get(C->getContext(),
+                                                          ByteSize*8));
+    }
+    
+    // TODO: Handle the 'partially zero' case.
+    return 0;
+  }
+  }
+}
+
+/// getFoldedSizeOf - Return a ConstantExpr with type DestTy for sizeof
+/// on Ty, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy,
+                                 bool Folded) {
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    Constant *N = ConstantInt::get(DestTy, ATy->getNumElements());
+    Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
+    return ConstantExpr::getNUWMul(E, N);
+  }
+
+  if (const StructType *STy = dyn_cast<StructType>(Ty))
+    if (!STy->isPacked()) {
+      unsigned NumElems = STy->getNumElements();
+      // An empty struct has size zero.
+      if (NumElems == 0)
+        return ConstantExpr::getNullValue(DestTy);
+      // Check for a struct with all members having the same size.
+      Constant *MemberSize =
+        getFoldedSizeOf(STy->getElementType(0), DestTy, true);
+      bool AllSame = true;
+      for (unsigned i = 1; i != NumElems; ++i)
+        if (MemberSize !=
+            getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
+          AllSame = false;
+          break;
+        }
+      if (AllSame) {
+        Constant *N = ConstantInt::get(DestTy, NumElems);
+        return ConstantExpr::getNUWMul(MemberSize, N);
+      }
+    }
+
+  // Pointer size doesn't depend on the pointee type, so canonicalize them
+  // to an arbitrary pointee.
+  if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
+    if (!PTy->getElementType()->isIntegerTy(1))
+      return
+        getFoldedSizeOf(PointerType::get(IntegerType::get(PTy->getContext(), 1),
+                                         PTy->getAddressSpace()),
+                        DestTy, true);
+
+  // If there's no interesting folding happening, bail so that we don't create
+  // a constant that looks like it needs folding but really doesn't.
+  if (!Folded)
+    return 0;
+
+  // Base case: Get a regular sizeof expression.
+  Constant *C = ConstantExpr::getSizeOf(Ty);
+  C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                    DestTy, false),
+                            C, DestTy);
+  return C;
+}
+
+/// getFoldedAlignOf - Return a ConstantExpr with type DestTy for alignof
+/// on Ty, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy,
+                                  bool Folded) {
+  // The alignment of an array is equal to the alignment of the
+  // array element. Note that this is not always true for vectors.
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    Constant *C = ConstantExpr::getAlignOf(ATy->getElementType());
+    C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                      DestTy,
+                                                      false),
+                              C, DestTy);
+    return C;
+  }
+
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    // Packed structs always have an alignment of 1.
+    if (STy->isPacked())
+      return ConstantInt::get(DestTy, 1);
+
+    // Otherwise, struct alignment is the maximum alignment of any member.
+    // Without target data, we can't compare much, but we can check to see
+    // if all the members have the same alignment.
+    unsigned NumElems = STy->getNumElements();
+    // An empty struct has minimal alignment.
+    if (NumElems == 0)
+      return ConstantInt::get(DestTy, 1);
+    // Check for a struct with all members having the same alignment.
+    Constant *MemberAlign =
+      getFoldedAlignOf(STy->getElementType(0), DestTy, true);
+    bool AllSame = true;
+    for (unsigned i = 1; i != NumElems; ++i)
+      if (MemberAlign != getFoldedAlignOf(STy->getElementType(i), DestTy, true)) {
+        AllSame = false;
+        break;
+      }
+    if (AllSame)
+      return MemberAlign;
+  }
+
+  // Pointer alignment doesn't depend on the pointee type, so canonicalize them
+  // to an arbitrary pointee.
+  if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
+    if (!PTy->getElementType()->isIntegerTy(1))
+      return
+        getFoldedAlignOf(PointerType::get(IntegerType::get(PTy->getContext(),
+                                                           1),
+                                          PTy->getAddressSpace()),
+                         DestTy, true);
+
+  // If there's no interesting folding happening, bail so that we don't create
+  // a constant that looks like it needs folding but really doesn't.
+  if (!Folded)
+    return 0;
+
+  // Base case: Get a regular alignof expression.
+  Constant *C = ConstantExpr::getAlignOf(Ty);
+  C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                    DestTy, false),
+                            C, DestTy);
+  return C;
+}
+
+/// getFoldedOffsetOf - Return a ConstantExpr with type DestTy for offsetof
+/// on Ty and FieldNo, with any known factors factored out. If Folded is false,
+/// return null if no factoring was possible, to avoid endlessly
+/// bouncing an unfoldable expression back into the top-level folder.
+///
+static Constant *getFoldedOffsetOf(const Type *Ty, Constant *FieldNo,
+                                   const Type *DestTy,
+                                   bool Folded) {
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
+                                                                DestTy, false),
+                                        FieldNo, DestTy);
+    Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
+    return ConstantExpr::getNUWMul(E, N);
+  }
+
+  if (const StructType *STy = dyn_cast<StructType>(Ty))
+    if (!STy->isPacked()) {
+      unsigned NumElems = STy->getNumElements();
+      // An empty struct has no members.
+      if (NumElems == 0)
+        return 0;
+      // Check for a struct with all members having the same size.
+      Constant *MemberSize =
+        getFoldedSizeOf(STy->getElementType(0), DestTy, true);
+      bool AllSame = true;
+      for (unsigned i = 1; i != NumElems; ++i)
+        if (MemberSize !=
+            getFoldedSizeOf(STy->getElementType(i), DestTy, true)) {
+          AllSame = false;
+          break;
+        }
+      if (AllSame) {
+        Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo,
+                                                                    false,
+                                                                    DestTy,
+                                                                    false),
+                                            FieldNo, DestTy);
+        return ConstantExpr::getNUWMul(MemberSize, N);
+      }
+    }
+
+  // If there's no interesting folding happening, bail so that we don't create
+  // a constant that looks like it needs folding but really doesn't.
+  if (!Folded)
+    return 0;
+
+  // Base case: Get a regular offsetof expression.
+  Constant *C = ConstantExpr::getOffsetOf(Ty, FieldNo);
+  C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+                                                    DestTy, false),
+                            C, DestTy);
+  return C;
+}
+
+Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
+                                            const Type *DestTy) {
+  if (isa<UndefValue>(V)) {
+    // zext(undef) = 0, because the top bits will be zero.
+    // sext(undef) = 0, because the top bits will all be the same.
+    // [us]itofp(undef) = 0, because the result value is bounded.
+    if (opc == Instruction::ZExt || opc == Instruction::SExt ||
+        opc == Instruction::UIToFP || opc == Instruction::SIToFP)
+      return Constant::getNullValue(DestTy);
+    return UndefValue::get(DestTy);
+  }
+
+  // No compile-time operations on this type yet.
+  if (V->getType()->isPPC_FP128Ty() || DestTy->isPPC_FP128Ty())
+    return 0;
+
+  if (V->isNullValue() && !DestTy->isX86_MMXTy())
+    return Constant::getNullValue(DestTy);
+
+  // If the cast operand is a constant expression, there's a few things we can
+  // do to try to simplify it.
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+    if (CE->isCast()) {
+      // Try hard to fold cast of cast because they are often eliminable.
+      if (unsigned newOpc = foldConstantCastPair(opc, CE, DestTy))
+        return ConstantExpr::getCast(newOpc, CE->getOperand(0), DestTy);
+    } else if (CE->getOpcode() == Instruction::GetElementPtr) {
+      // If all of the indexes in the GEP are null values, there is no pointer
+      // adjustment going on.  We might as well cast the source pointer.
+      bool isAllNull = true;
+      for (unsigned i = 1, e = CE->getNumOperands(); i != e; ++i)
+        if (!CE->getOperand(i)->isNullValue()) {
+          isAllNull = false;
+          break;
+        }
+      if (isAllNull)
+        // This is casting one pointer type to another, always BitCast
+        return ConstantExpr::getPointerCast(CE->getOperand(0), DestTy);
+    }
+  }
+
+  // If the cast operand is a constant vector, perform the cast by
+  // operating on each element. In the cast of bitcasts, the element
+  // count may be mismatched; don't attempt to handle that here.
+  if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
+    if (DestTy->isVectorTy() &&
+        cast<VectorType>(DestTy)->getNumElements() ==
+        CV->getType()->getNumElements()) {
+      std::vector<Constant*> res;
+      const VectorType *DestVecTy = cast<VectorType>(DestTy);
+      const Type *DstEltTy = DestVecTy->getElementType();
+      for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
+        res.push_back(ConstantExpr::getCast(opc,
+                                            CV->getOperand(i), DstEltTy));
+      return ConstantVector::get(DestVecTy, res);
+    }
+
+  // We actually have to do a cast now. Perform the cast according to the
+  // opcode specified.
+  switch (opc) {
+  default:
+    llvm_unreachable("Failed to cast constant expression");
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+    if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
+      bool ignored;
+      APFloat Val = FPC->getValueAPF();
+      Val.convert(DestTy->isFloatTy() ? APFloat::IEEEsingle :
+                  DestTy->isDoubleTy() ? APFloat::IEEEdouble :
+                  DestTy->isX86_FP80Ty() ? APFloat::x87DoubleExtended :
+                  DestTy->isFP128Ty() ? APFloat::IEEEquad :
+                  APFloat::Bogus,
+                  APFloat::rmNearestTiesToEven, &ignored);
+      return ConstantFP::get(V->getContext(), Val);
+    }
+    return 0; // Can't fold.
+  case Instruction::FPToUI: 
+  case Instruction::FPToSI:
+    if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
+      const APFloat &V = FPC->getValueAPF();
+      bool ignored;
+      uint64_t x[2]; 
+      uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+      (void) V.convertToInteger(x, DestBitWidth, opc==Instruction::FPToSI,
+                                APFloat::rmTowardZero, &ignored);
+      APInt Val(DestBitWidth, 2, x);
+      return ConstantInt::get(FPC->getContext(), Val);
+    }
+    return 0; // Can't fold.
+  case Instruction::IntToPtr:   //always treated as unsigned
+    if (V->isNullValue())       // Is it an integral null value?
+      return ConstantPointerNull::get(cast<PointerType>(DestTy));
+    return 0;                   // Other pointer types cannot be casted
+  case Instruction::PtrToInt:   // always treated as unsigned
+    // Is it a null pointer value?
+    if (V->isNullValue())
+      return ConstantInt::get(DestTy, 0);
+    // If this is a sizeof-like expression, pull out multiplications by
+    // known factors to expose them to subsequent folding. If it's an
+    // alignof-like expression, factor out known factors.
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
+      if (CE->getOpcode() == Instruction::GetElementPtr &&
+          CE->getOperand(0)->isNullValue()) {
+        const Type *Ty =
+          cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
+        if (CE->getNumOperands() == 2) {
+          // Handle a sizeof-like expression.
+          Constant *Idx = CE->getOperand(1);
+          bool isOne = isa<ConstantInt>(Idx) && cast<ConstantInt>(Idx)->isOne();
+          if (Constant *C = getFoldedSizeOf(Ty, DestTy, !isOne)) {
+            Idx = ConstantExpr::getCast(CastInst::getCastOpcode(Idx, true,
+                                                                DestTy, false),
+                                        Idx, DestTy);
+            return ConstantExpr::getMul(C, Idx);
+          }
+        } else if (CE->getNumOperands() == 3 &&
+                   CE->getOperand(1)->isNullValue()) {
+          // Handle an alignof-like expression.
+          if (const StructType *STy = dyn_cast<StructType>(Ty))
+            if (!STy->isPacked()) {
+              ConstantInt *CI = cast<ConstantInt>(CE->getOperand(2));
+              if (CI->isOne() &&
+                  STy->getNumElements() == 2 &&
+                  STy->getElementType(0)->isIntegerTy(1)) {
+                return getFoldedAlignOf(STy->getElementType(1), DestTy, false);
+              }
+            }
+          // Handle an offsetof-like expression.
+          if (Ty->isStructTy() || Ty->isArrayTy()) {
+            if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2),
+                                                DestTy, false))
+              return C;
+          }
+        }
+      }
+    // Other pointer types cannot be casted
+    return 0;
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      APInt api = CI->getValue();
+      APFloat apf(APInt::getNullValue(DestTy->getPrimitiveSizeInBits()), true);
+      (void)apf.convertFromAPInt(api, 
+                                 opc==Instruction::SIToFP,
+                                 APFloat::rmNearestTiesToEven);
+      return ConstantFP::get(V->getContext(), apf);
+    }
+    return 0;
+  case Instruction::ZExt:
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+      return ConstantInt::get(V->getContext(),
+                              CI->getValue().zext(BitWidth));
+    }
+    return 0;
+  case Instruction::SExt:
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      uint32_t BitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+      return ConstantInt::get(V->getContext(),
+                              CI->getValue().sext(BitWidth));
+    }
+    return 0;
+  case Instruction::Trunc: {
+    uint32_t DestBitWidth = cast<IntegerType>(DestTy)->getBitWidth();
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      return ConstantInt::get(V->getContext(),
+                              CI->getValue().trunc(DestBitWidth));
+    }
+    
+    // The input must be a constantexpr.  See if we can simplify this based on
+    // the bytes we are demanding.  Only do this if the source and dest are an
+    // even multiple of a byte.
+    if ((DestBitWidth & 7) == 0 &&
+        (cast<IntegerType>(V->getType())->getBitWidth() & 7) == 0)
+      if (Constant *Res = ExtractConstantBytes(V, 0, DestBitWidth / 8))
+        return Res;
+      
+    return 0;
+  }
+  case Instruction::BitCast:
+    return FoldBitCast(V, DestTy);
+  }
+}
+
+Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
+                                              Constant *V1, Constant *V2) {
+  if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond))
+    return CB->getZExtValue() ? V1 : V2;
+
+  // Check for zero aggregate and ConstantVector of zeros
+  if (Cond->isNullValue()) return V2;
+
+  if (ConstantVector* CondV = dyn_cast<ConstantVector>(Cond)) {
+
+    if (CondV->isAllOnesValue()) return V1;
+
+    const VectorType *VTy = cast<VectorType>(V1->getType());
+    ConstantVector *CP1 = dyn_cast<ConstantVector>(V1);
+    ConstantVector *CP2 = dyn_cast<ConstantVector>(V2);
+
+    if ((CP1 || isa<ConstantAggregateZero>(V1)) &&
+        (CP2 || isa<ConstantAggregateZero>(V2))) {
+
+      // Find the element type of the returned vector
+      const Type *EltTy = VTy->getElementType();
+      unsigned NumElem = VTy->getNumElements();
+      std::vector<Constant*> Res(NumElem);
+
+      bool Valid = true;
+      for (unsigned i = 0; i < NumElem; ++i) {
+        ConstantInt* c = dyn_cast<ConstantInt>(CondV->getOperand(i));
+        if (!c) {
+          Valid = false;
+          break;
+        }
+        Constant *C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+        Constant *C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+        Res[i] = c->getZExtValue() ? C1 : C2;
+      }
+      // If we were able to build the vector, return it
+      if (Valid) return ConstantVector::get(Res);
+    }
+  }
+
+
+  if (isa<UndefValue>(V1)) return V2;
+  if (isa<UndefValue>(V2)) return V1;
+  if (isa<UndefValue>(Cond)) return V1;
+  if (V1 == V2) return V1;
+
+  if (ConstantExpr *TrueVal = dyn_cast<ConstantExpr>(V1)) {
+    if (TrueVal->getOpcode() == Instruction::Select)
+      if (TrueVal->getOperand(0) == Cond)
+	return ConstantExpr::getSelect(Cond, TrueVal->getOperand(1), V2);
+  }
+  if (ConstantExpr *FalseVal = dyn_cast<ConstantExpr>(V2)) {
+    if (FalseVal->getOpcode() == Instruction::Select)
+      if (FalseVal->getOperand(0) == Cond)
+	return ConstantExpr::getSelect(Cond, V1, FalseVal->getOperand(2));
+  }
+
+  return 0;
+}
+
+Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
+                                                      Constant *Idx) {
+  if (isa<UndefValue>(Val))  // ee(undef, x) -> undef
+    return UndefValue::get(cast<VectorType>(Val->getType())->getElementType());
+  if (Val->isNullValue())  // ee(zero, x) -> zero
+    return Constant::getNullValue(
+                          cast<VectorType>(Val->getType())->getElementType());
+
+  if (ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
+    if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
+      return CVal->getOperand(CIdx->getZExtValue());
+    } else if (isa<UndefValue>(Idx)) {
+      // ee({w,x,y,z}, undef) -> w (an arbitrary value).
+      return CVal->getOperand(0);
+    }
+  }
+  return 0;
+}
+
+Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,
+                                                     Constant *Elt,
+                                                     Constant *Idx) {
+  ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx);
+  if (!CIdx) return 0;
+  APInt idxVal = CIdx->getValue();
+  if (isa<UndefValue>(Val)) { 
+    // Insertion of scalar constant into vector undef
+    // Optimize away insertion of undef
+    if (isa<UndefValue>(Elt))
+      return Val;
+    // Otherwise break the aggregate undef into multiple undefs and do
+    // the insertion
+    unsigned numOps = 
+      cast<VectorType>(Val->getType())->getNumElements();
+    std::vector<Constant*> Ops; 
+    Ops.reserve(numOps);
+    for (unsigned i = 0; i < numOps; ++i) {
+      Constant *Op =
+        (idxVal == i) ? Elt : UndefValue::get(Elt->getType());
+      Ops.push_back(Op);
+    }
+    return ConstantVector::get(Ops);
+  }
+  if (isa<ConstantAggregateZero>(Val)) {
+    // Insertion of scalar constant into vector aggregate zero
+    // Optimize away insertion of zero
+    if (Elt->isNullValue())
+      return Val;
+    // Otherwise break the aggregate zero into multiple zeros and do
+    // the insertion
+    unsigned numOps = 
+      cast<VectorType>(Val->getType())->getNumElements();
+    std::vector<Constant*> Ops; 
+    Ops.reserve(numOps);
+    for (unsigned i = 0; i < numOps; ++i) {
+      Constant *Op =
+        (idxVal == i) ? Elt : Constant::getNullValue(Elt->getType());
+      Ops.push_back(Op);
+    }
+    return ConstantVector::get(Ops);
+  }
+  if (ConstantVector *CVal = dyn_cast<ConstantVector>(Val)) {
+    // Insertion of scalar constant into vector constant
+    std::vector<Constant*> Ops; 
+    Ops.reserve(CVal->getNumOperands());
+    for (unsigned i = 0; i < CVal->getNumOperands(); ++i) {
+      Constant *Op =
+        (idxVal == i) ? Elt : cast<Constant>(CVal->getOperand(i));
+      Ops.push_back(Op);
+    }
+    return ConstantVector::get(Ops);
+  }
+
+  return 0;
+}
+
+/// GetVectorElement - If C is a ConstantVector, ConstantAggregateZero or Undef
+/// return the specified element value.  Otherwise return null.
+static Constant *GetVectorElement(Constant *C, unsigned EltNo) {
+  if (ConstantVector *CV = dyn_cast<ConstantVector>(C))
+    return CV->getOperand(EltNo);
+
+  const Type *EltTy = cast<VectorType>(C->getType())->getElementType();
+  if (isa<ConstantAggregateZero>(C))
+    return Constant::getNullValue(EltTy);
+  if (isa<UndefValue>(C))
+    return UndefValue::get(EltTy);
+  return 0;
+}
+
+Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
+                                                     Constant *V2,
+                                                     Constant *Mask) {
+  // Undefined shuffle mask -> undefined value.
+  if (isa<UndefValue>(Mask)) return UndefValue::get(V1->getType());
+
+  unsigned MaskNumElts = cast<VectorType>(Mask->getType())->getNumElements();
+  unsigned SrcNumElts = cast<VectorType>(V1->getType())->getNumElements();
+  const Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
+
+  // Loop over the shuffle mask, evaluating each element.
+  SmallVector<Constant*, 32> Result;
+  for (unsigned i = 0; i != MaskNumElts; ++i) {
+    Constant *InElt = GetVectorElement(Mask, i);
+    if (InElt == 0) return 0;
+
+    if (isa<UndefValue>(InElt))
+      InElt = UndefValue::get(EltTy);
+    else if (ConstantInt *CI = dyn_cast<ConstantInt>(InElt)) {
+      unsigned Elt = CI->getZExtValue();
+      if (Elt >= SrcNumElts*2)
+        InElt = UndefValue::get(EltTy);
+      else if (Elt >= SrcNumElts)
+        InElt = GetVectorElement(V2, Elt - SrcNumElts);
+      else
+        InElt = GetVectorElement(V1, Elt);
+      if (InElt == 0) return 0;
+    } else {
+      // Unknown value.
+      return 0;
+    }
+    Result.push_back(InElt);
+  }
+
+  return ConstantVector::get(Result);
+}
+
+Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg,
+                                                    const unsigned *Idxs,
+                                                    unsigned NumIdx) {
+  // Base case: no indices, so return the entire value.
+  if (NumIdx == 0)
+    return Agg;
+
+  if (isa<UndefValue>(Agg))  // ev(undef, x) -> undef
+    return UndefValue::get(ExtractValueInst::getIndexedType(Agg->getType(),
+                                                            Idxs,
+                                                            Idxs + NumIdx));
+
+  if (isa<ConstantAggregateZero>(Agg))  // ev(0, x) -> 0
+    return
+      Constant::getNullValue(ExtractValueInst::getIndexedType(Agg->getType(),
+                                                              Idxs,
+                                                              Idxs + NumIdx));
+
+  // Otherwise recurse.
+  if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg))
+    return ConstantFoldExtractValueInstruction(CS->getOperand(*Idxs),
+                                               Idxs+1, NumIdx-1);
+
+  if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg))
+    return ConstantFoldExtractValueInstruction(CA->getOperand(*Idxs),
+                                               Idxs+1, NumIdx-1);
+  ConstantVector *CV = cast<ConstantVector>(Agg);
+  return ConstantFoldExtractValueInstruction(CV->getOperand(*Idxs),
+                                             Idxs+1, NumIdx-1);
+}
+
+Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
+                                                   Constant *Val,
+                                                   const unsigned *Idxs,
+                                                   unsigned NumIdx) {
+  // Base case: no indices, so replace the entire value.
+  if (NumIdx == 0)
+    return Val;
+
+  if (isa<UndefValue>(Agg)) {
+    // Insertion of constant into aggregate undef
+    // Optimize away insertion of undef.
+    if (isa<UndefValue>(Val))
+      return Agg;
+    
+    // Otherwise break the aggregate undef into multiple undefs and do
+    // the insertion.
+    const CompositeType *AggTy = cast<CompositeType>(Agg->getType());
+    unsigned numOps;
+    if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
+      numOps = AR->getNumElements();
+    else
+      numOps = cast<StructType>(AggTy)->getNumElements();
+    
+    std::vector<Constant*> Ops(numOps); 
+    for (unsigned i = 0; i < numOps; ++i) {
+      const Type *MemberTy = AggTy->getTypeAtIndex(i);
+      Constant *Op =
+        (*Idxs == i) ?
+        ConstantFoldInsertValueInstruction(UndefValue::get(MemberTy),
+                                           Val, Idxs+1, NumIdx-1) :
+        UndefValue::get(MemberTy);
+      Ops[i] = Op;
+    }
+    
+    if (const StructType* ST = dyn_cast<StructType>(AggTy))
+      return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
+    return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
+  }
+  
+  if (isa<ConstantAggregateZero>(Agg)) {
+    // Insertion of constant into aggregate zero
+    // Optimize away insertion of zero.
+    if (Val->isNullValue())
+      return Agg;
+    
+    // Otherwise break the aggregate zero into multiple zeros and do
+    // the insertion.
+    const CompositeType *AggTy = cast<CompositeType>(Agg->getType());
+    unsigned numOps;
+    if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
+      numOps = AR->getNumElements();
+    else
+      numOps = cast<StructType>(AggTy)->getNumElements();
+    
+    std::vector<Constant*> Ops(numOps);
+    for (unsigned i = 0; i < numOps; ++i) {
+      const Type *MemberTy = AggTy->getTypeAtIndex(i);
+      Constant *Op =
+        (*Idxs == i) ?
+        ConstantFoldInsertValueInstruction(Constant::getNullValue(MemberTy),
+                                           Val, Idxs+1, NumIdx-1) :
+        Constant::getNullValue(MemberTy);
+      Ops[i] = Op;
+    }
+    
+    if (const StructType *ST = dyn_cast<StructType>(AggTy))
+      return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
+    return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
+  }
+  
+  if (isa<ConstantStruct>(Agg) || isa<ConstantArray>(Agg)) {
+    // Insertion of constant into aggregate constant.
+    std::vector<Constant*> Ops(Agg->getNumOperands());
+    for (unsigned i = 0; i < Agg->getNumOperands(); ++i) {
+      Constant *Op = cast<Constant>(Agg->getOperand(i));
+      if (*Idxs == i)
+        Op = ConstantFoldInsertValueInstruction(Op, Val, Idxs+1, NumIdx-1);
+      Ops[i] = Op;
+    }
+    
+    if (const StructType* ST = dyn_cast<StructType>(Agg->getType()))
+      return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
+    return ConstantArray::get(cast<ArrayType>(Agg->getType()), Ops);
+  }
+
+  return 0;
+}
+
+
+Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
+                                              Constant *C1, Constant *C2) {
+  // No compile-time operations on this type yet.
+  if (C1->getType()->isPPC_FP128Ty())
+    return 0;
+
+  // Handle UndefValue up front.
+  if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+    switch (Opcode) {
+    case Instruction::Xor:
+      if (isa<UndefValue>(C1) && isa<UndefValue>(C2))
+        // Handle undef ^ undef -> 0 special case. This is a common
+        // idiom (misuse).
+        return Constant::getNullValue(C1->getType());
+      // Fallthrough
+    case Instruction::Add:
+    case Instruction::Sub:
+      return UndefValue::get(C1->getType());
+    case Instruction::Mul:
+    case Instruction::And:
+      return Constant::getNullValue(C1->getType());
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+      if (!isa<UndefValue>(C2))                    // undef / X -> 0
+        return Constant::getNullValue(C1->getType());
+      return C2;                                   // X / undef -> undef
+    case Instruction::Or:                          // X | undef -> -1
+      if (const VectorType *PTy = dyn_cast<VectorType>(C1->getType()))
+        return Constant::getAllOnesValue(PTy);
+      return Constant::getAllOnesValue(C1->getType());
+    case Instruction::LShr:
+      if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
+        return C1;                                  // undef lshr undef -> undef
+      return Constant::getNullValue(C1->getType()); // X lshr undef -> 0
+                                                    // undef lshr X -> 0
+    case Instruction::AShr:
+      if (!isa<UndefValue>(C2))                     // undef ashr X --> all ones
+        return Constant::getAllOnesValue(C1->getType());
+      else if (isa<UndefValue>(C1)) 
+        return C1;                                  // undef ashr undef -> undef
+      else
+        return C1;                                  // X ashr undef --> X
+    case Instruction::Shl:
+      // undef << X -> 0   or   X << undef -> 0
+      return Constant::getNullValue(C1->getType());
+    }
+  }
+
+  // Handle simplifications when the RHS is a constant int.
+  if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
+    switch (Opcode) {
+    case Instruction::Add:
+      if (CI2->equalsInt(0)) return C1;                         // X + 0 == X
+      break;
+    case Instruction::Sub:
+      if (CI2->equalsInt(0)) return C1;                         // X - 0 == X
+      break;
+    case Instruction::Mul:
+      if (CI2->equalsInt(0)) return C2;                         // X * 0 == 0
+      if (CI2->equalsInt(1))
+        return C1;                                              // X * 1 == X
+      break;
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+      if (CI2->equalsInt(1))
+        return C1;                                            // X / 1 == X
+      if (CI2->equalsInt(0))
+        return UndefValue::get(CI2->getType());               // X / 0 == undef
+      break;
+    case Instruction::URem:
+    case Instruction::SRem:
+      if (CI2->equalsInt(1))
+        return Constant::getNullValue(CI2->getType());        // X % 1 == 0
+      if (CI2->equalsInt(0))
+        return UndefValue::get(CI2->getType());               // X % 0 == undef
+      break;
+    case Instruction::And:
+      if (CI2->isZero()) return C2;                           // X & 0 == 0
+      if (CI2->isAllOnesValue())
+        return C1;                                            // X & -1 == X
+
+      if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+        // (zext i32 to i64) & 4294967295 -> (zext i32 to i64)
+        if (CE1->getOpcode() == Instruction::ZExt) {
+          unsigned DstWidth = CI2->getType()->getBitWidth();
+          unsigned SrcWidth =
+            CE1->getOperand(0)->getType()->getPrimitiveSizeInBits();
+          APInt PossiblySetBits(APInt::getLowBitsSet(DstWidth, SrcWidth));
+          if ((PossiblySetBits & CI2->getValue()) == PossiblySetBits)
+            return C1;
+        }
+
+        // If and'ing the address of a global with a constant, fold it.
+        if (CE1->getOpcode() == Instruction::PtrToInt && 
+            isa<GlobalValue>(CE1->getOperand(0))) {
+          GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0));
+
+          // Functions are at least 4-byte aligned.
+          unsigned GVAlign = GV->getAlignment();
+          if (isa<Function>(GV))
+            GVAlign = std::max(GVAlign, 4U);
+
+          if (GVAlign > 1) {
+            unsigned DstWidth = CI2->getType()->getBitWidth();
+            unsigned SrcWidth = std::min(DstWidth, Log2_32(GVAlign));
+            APInt BitsNotSet(APInt::getLowBitsSet(DstWidth, SrcWidth));
+
+            // If checking bits we know are clear, return zero.
+            if ((CI2->getValue() & BitsNotSet) == CI2->getValue())
+              return Constant::getNullValue(CI2->getType());
+          }
+        }
+      }
+      break;
+    case Instruction::Or:
+      if (CI2->equalsInt(0)) return C1;    // X | 0 == X
+      if (CI2->isAllOnesValue())
+        return C2;                         // X | -1 == -1
+      break;
+    case Instruction::Xor:
+      if (CI2->equalsInt(0)) return C1;    // X ^ 0 == X
+
+      if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+        switch (CE1->getOpcode()) {
+        default: break;
+        case Instruction::ICmp:
+        case Instruction::FCmp:
+          // cmp pred ^ true -> cmp !pred
+          assert(CI2->equalsInt(1));
+          CmpInst::Predicate pred = (CmpInst::Predicate)CE1->getPredicate();
+          pred = CmpInst::getInversePredicate(pred);
+          return ConstantExpr::getCompare(pred, CE1->getOperand(0),
+                                          CE1->getOperand(1));
+        }
+      }
+      break;
+    case Instruction::AShr:
+      // ashr (zext C to Ty), C2 -> lshr (zext C, CSA), C2
+      if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1))
+        if (CE1->getOpcode() == Instruction::ZExt)  // Top bits known zero.
+          return ConstantExpr::getLShr(C1, C2);
+      break;
+    }
+  } else if (isa<ConstantInt>(C1)) {
+    // If C1 is a ConstantInt and C2 is not, swap the operands.
+    if (Instruction::isCommutative(Opcode))
+      return ConstantExpr::get(Opcode, C2, C1);
+  }
+
+  // At this point we know neither constant is an UndefValue.
+  if (ConstantInt *CI1 = dyn_cast<ConstantInt>(C1)) {
+    if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
+      using namespace APIntOps;
+      const APInt &C1V = CI1->getValue();
+      const APInt &C2V = CI2->getValue();
+      switch (Opcode) {
+      default:
+        break;
+      case Instruction::Add:     
+        return ConstantInt::get(CI1->getContext(), C1V + C2V);
+      case Instruction::Sub:     
+        return ConstantInt::get(CI1->getContext(), C1V - C2V);
+      case Instruction::Mul:     
+        return ConstantInt::get(CI1->getContext(), C1V * C2V);
+      case Instruction::UDiv:
+        assert(!CI2->isNullValue() && "Div by zero handled above");
+        return ConstantInt::get(CI1->getContext(), C1V.udiv(C2V));
+      case Instruction::SDiv:
+        assert(!CI2->isNullValue() && "Div by zero handled above");
+        if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
+          return UndefValue::get(CI1->getType());   // MIN_INT / -1 -> undef
+        return ConstantInt::get(CI1->getContext(), C1V.sdiv(C2V));
+      case Instruction::URem:
+        assert(!CI2->isNullValue() && "Div by zero handled above");
+        return ConstantInt::get(CI1->getContext(), C1V.urem(C2V));
+      case Instruction::SRem:
+        assert(!CI2->isNullValue() && "Div by zero handled above");
+        if (C2V.isAllOnesValue() && C1V.isMinSignedValue())
+          return UndefValue::get(CI1->getType());   // MIN_INT % -1 -> undef
+        return ConstantInt::get(CI1->getContext(), C1V.srem(C2V));
+      case Instruction::And:
+        return ConstantInt::get(CI1->getContext(), C1V & C2V);
+      case Instruction::Or:
+        return ConstantInt::get(CI1->getContext(), C1V | C2V);
+      case Instruction::Xor:
+        return ConstantInt::get(CI1->getContext(), C1V ^ C2V);
+      case Instruction::Shl: {
+        uint32_t shiftAmt = C2V.getZExtValue();
+        if (shiftAmt < C1V.getBitWidth())
+          return ConstantInt::get(CI1->getContext(), C1V.shl(shiftAmt));
+        else
+          return UndefValue::get(C1->getType()); // too big shift is undef
+      }
+      case Instruction::LShr: {
+        uint32_t shiftAmt = C2V.getZExtValue();
+        if (shiftAmt < C1V.getBitWidth())
+          return ConstantInt::get(CI1->getContext(), C1V.lshr(shiftAmt));
+        else
+          return UndefValue::get(C1->getType()); // too big shift is undef
+      }
+      case Instruction::AShr: {
+        uint32_t shiftAmt = C2V.getZExtValue();
+        if (shiftAmt < C1V.getBitWidth())
+          return ConstantInt::get(CI1->getContext(), C1V.ashr(shiftAmt));
+        else
+          return UndefValue::get(C1->getType()); // too big shift is undef
+      }
+      }
+    }
+
+    switch (Opcode) {
+    case Instruction::SDiv:
+    case Instruction::UDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::Shl:
+      if (CI1->equalsInt(0)) return C1;
+      break;
+    default:
+      break;
+    }
+  } else if (ConstantFP *CFP1 = dyn_cast<ConstantFP>(C1)) {
+    if (ConstantFP *CFP2 = dyn_cast<ConstantFP>(C2)) {
+      APFloat C1V = CFP1->getValueAPF();
+      APFloat C2V = CFP2->getValueAPF();
+      APFloat C3V = C1V;  // copy for modification
+      switch (Opcode) {
+      default:                   
+        break;
+      case Instruction::FAdd:
+        (void)C3V.add(C2V, APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(C1->getContext(), C3V);
+      case Instruction::FSub:
+        (void)C3V.subtract(C2V, APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(C1->getContext(), C3V);
+      case Instruction::FMul:
+        (void)C3V.multiply(C2V, APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(C1->getContext(), C3V);
+      case Instruction::FDiv:
+        (void)C3V.divide(C2V, APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(C1->getContext(), C3V);
+      case Instruction::FRem:
+        (void)C3V.mod(C2V, APFloat::rmNearestTiesToEven);
+        return ConstantFP::get(C1->getContext(), C3V);
+      }
+    }
+  } else if (const VectorType *VTy = dyn_cast<VectorType>(C1->getType())) {
+    ConstantVector *CP1 = dyn_cast<ConstantVector>(C1);
+    ConstantVector *CP2 = dyn_cast<ConstantVector>(C2);
+    if ((CP1 != NULL || isa<ConstantAggregateZero>(C1)) &&
+        (CP2 != NULL || isa<ConstantAggregateZero>(C2))) {
+      std::vector<Constant*> Res;
+      const Type* EltTy = VTy->getElementType();  
+      Constant *C1 = 0;
+      Constant *C2 = 0;
+      switch (Opcode) {
+      default:
+        break;
+      case Instruction::Add:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getAdd(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::FAdd:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getFAdd(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::Sub:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getSub(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::FSub:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getFSub(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::Mul:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getMul(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::FMul:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getFMul(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::UDiv:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getUDiv(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::SDiv:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getSDiv(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::FDiv:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getFDiv(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::URem:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getURem(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::SRem:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getSRem(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::FRem:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getFRem(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::And: 
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getAnd(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::Or:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getOr(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::Xor:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getXor(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::LShr:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getLShr(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::AShr:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getAShr(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      case Instruction::Shl:
+        for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+          C1 = CP1 ? CP1->getOperand(i) : Constant::getNullValue(EltTy);
+          C2 = CP2 ? CP2->getOperand(i) : Constant::getNullValue(EltTy);
+          Res.push_back(ConstantExpr::getShl(C1, C2));
+        }
+        return ConstantVector::get(Res);
+      }
+    }
+  }
+
+  if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+    // There are many possible foldings we could do here.  We should probably
+    // at least fold add of a pointer with an integer into the appropriate
+    // getelementptr.  This will improve alias analysis a bit.
+
+    // Given ((a + b) + c), if (b + c) folds to something interesting, return
+    // (a + (b + c)).
+    if (Instruction::isAssociative(Opcode) && CE1->getOpcode() == Opcode) {
+      Constant *T = ConstantExpr::get(Opcode, CE1->getOperand(1), C2);
+      if (!isa<ConstantExpr>(T) || cast<ConstantExpr>(T)->getOpcode() != Opcode)
+        return ConstantExpr::get(Opcode, CE1->getOperand(0), T);
+    }
+  } else if (isa<ConstantExpr>(C2)) {
+    // If C2 is a constant expr and C1 isn't, flop them around and fold the
+    // other way if possible.
+    if (Instruction::isCommutative(Opcode))
+      return ConstantFoldBinaryInstruction(Opcode, C2, C1);
+  }
+
+  // i1 can be simplified in many cases.
+  if (C1->getType()->isIntegerTy(1)) {
+    switch (Opcode) {
+    case Instruction::Add:
+    case Instruction::Sub:
+      return ConstantExpr::getXor(C1, C2);
+    case Instruction::Mul:
+      return ConstantExpr::getAnd(C1, C2);
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+      // We can assume that C2 == 0.  If it were one the result would be
+      // undefined because the shift value is as large as the bitwidth.
+      return C1;
+    case Instruction::SDiv:
+    case Instruction::UDiv:
+      // We can assume that C2 == 1.  If it were zero the result would be
+      // undefined through division by zero.
+      return C1;
+    case Instruction::URem:
+    case Instruction::SRem:
+      // We can assume that C2 == 1.  If it were zero the result would be
+      // undefined through division by zero.
+      return ConstantInt::getFalse(C1->getContext());
+    default:
+      break;
+    }
+  }
+
+  // We don't know how to fold this.
+  return 0;
+}
+
+/// isZeroSizedType - This type is zero sized if its an array or structure of
+/// zero sized types.  The only leaf zero sized type is an empty structure.
+static bool isMaybeZeroSizedType(const Type *Ty) {
+  if (Ty->isOpaqueTy()) return true;  // Can't say.
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+
+    // If all of elements have zero size, this does too.
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
+      if (!isMaybeZeroSizedType(STy->getElementType(i))) return false;
+    return true;
+
+  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    return isMaybeZeroSizedType(ATy->getElementType());
+  }
+  return false;
+}
+
+/// IdxCompare - Compare the two constants as though they were getelementptr
+/// indices.  This allows coersion of the types to be the same thing.
+///
+/// If the two constants are the "same" (after coersion), return 0.  If the
+/// first is less than the second, return -1, if the second is less than the
+/// first, return 1.  If the constants are not integral, return -2.
+///
+static int IdxCompare(Constant *C1, Constant *C2, const Type *ElTy) {
+  if (C1 == C2) return 0;
+
+  // Ok, we found a different index.  If they are not ConstantInt, we can't do
+  // anything with them.
+  if (!isa<ConstantInt>(C1) || !isa<ConstantInt>(C2))
+    return -2; // don't know!
+
+  // Ok, we have two differing integer indices.  Sign extend them to be the same
+  // type.  Long is always big enough, so we use it.
+  if (!C1->getType()->isIntegerTy(64))
+    C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext()));
+
+  if (!C2->getType()->isIntegerTy(64))
+    C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext()));
+
+  if (C1 == C2) return 0;  // They are equal
+
+  // If the type being indexed over is really just a zero sized type, there is
+  // no pointer difference being made here.
+  if (isMaybeZeroSizedType(ElTy))
+    return -2; // dunno.
+
+  // If they are really different, now that they are the same type, then we
+  // found a difference!
+  if (cast<ConstantInt>(C1)->getSExtValue() < 
+      cast<ConstantInt>(C2)->getSExtValue())
+    return -1;
+  else
+    return 1;
+}
+
+/// evaluateFCmpRelation - This function determines if there is anything we can
+/// decide about the two constants provided.  This doesn't need to handle simple
+/// things like ConstantFP comparisons, but should instead handle ConstantExprs.
+/// If we can determine that the two constants have a particular relation to 
+/// each other, we should return the corresponding FCmpInst predicate, 
+/// otherwise return FCmpInst::BAD_FCMP_PREDICATE. This is used below in
+/// ConstantFoldCompareInstruction.
+///
+/// To simplify this code we canonicalize the relation so that the first
+/// operand is always the most "complex" of the two.  We consider ConstantFP
+/// to be the simplest, and ConstantExprs to be the most complex.
+static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) {
+  assert(V1->getType() == V2->getType() &&
+         "Cannot compare values of different types!");
+
+  // No compile-time operations on this type yet.
+  if (V1->getType()->isPPC_FP128Ty())
+    return FCmpInst::BAD_FCMP_PREDICATE;
+
+  // Handle degenerate case quickly
+  if (V1 == V2) return FCmpInst::FCMP_OEQ;
+
+  if (!isa<ConstantExpr>(V1)) {
+    if (!isa<ConstantExpr>(V2)) {
+      // We distilled thisUse the standard constant folder for a few cases
+      ConstantInt *R = 0;
+      R = dyn_cast<ConstantInt>(
+                      ConstantExpr::getFCmp(FCmpInst::FCMP_OEQ, V1, V2));
+      if (R && !R->isZero()) 
+        return FCmpInst::FCMP_OEQ;
+      R = dyn_cast<ConstantInt>(
+                      ConstantExpr::getFCmp(FCmpInst::FCMP_OLT, V1, V2));
+      if (R && !R->isZero()) 
+        return FCmpInst::FCMP_OLT;
+      R = dyn_cast<ConstantInt>(
+                      ConstantExpr::getFCmp(FCmpInst::FCMP_OGT, V1, V2));
+      if (R && !R->isZero()) 
+        return FCmpInst::FCMP_OGT;
+
+      // Nothing more we can do
+      return FCmpInst::BAD_FCMP_PREDICATE;
+    }
+
+    // If the first operand is simple and second is ConstantExpr, swap operands.
+    FCmpInst::Predicate SwappedRelation = evaluateFCmpRelation(V2, V1);
+    if (SwappedRelation != FCmpInst::BAD_FCMP_PREDICATE)
+      return FCmpInst::getSwappedPredicate(SwappedRelation);
+  } else {
+    // Ok, the LHS is known to be a constantexpr.  The RHS can be any of a
+    // constantexpr or a simple constant.
+    ConstantExpr *CE1 = cast<ConstantExpr>(V1);
+    switch (CE1->getOpcode()) {
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+      // We might be able to do something with these but we don't right now.
+      break;
+    default:
+      break;
+    }
+  }
+  // There are MANY other foldings that we could perform here.  They will
+  // probably be added on demand, as they seem needed.
+  return FCmpInst::BAD_FCMP_PREDICATE;
+}
+
+/// evaluateICmpRelation - This function determines if there is anything we can
+/// decide about the two constants provided.  This doesn't need to handle simple
+/// things like integer comparisons, but should instead handle ConstantExprs
+/// and GlobalValues.  If we can determine that the two constants have a
+/// particular relation to each other, we should return the corresponding ICmp
+/// predicate, otherwise return ICmpInst::BAD_ICMP_PREDICATE.
+///
+/// To simplify this code we canonicalize the relation so that the first
+/// operand is always the most "complex" of the two.  We consider simple
+/// constants (like ConstantInt) to be the simplest, followed by
+/// GlobalValues, followed by ConstantExpr's (the most complex).
+///
+static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
+                                                bool isSigned) {
+  assert(V1->getType() == V2->getType() &&
+         "Cannot compare different types of values!");
+  if (V1 == V2) return ICmpInst::ICMP_EQ;
+
+  if (!isa<ConstantExpr>(V1) && !isa<GlobalValue>(V1) &&
+      !isa<BlockAddress>(V1)) {
+    if (!isa<GlobalValue>(V2) && !isa<ConstantExpr>(V2) &&
+        !isa<BlockAddress>(V2)) {
+      // We distilled this down to a simple case, use the standard constant
+      // folder.
+      ConstantInt *R = 0;
+      ICmpInst::Predicate pred = ICmpInst::ICMP_EQ;
+      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
+      if (R && !R->isZero()) 
+        return pred;
+      pred = isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
+      if (R && !R->isZero())
+        return pred;
+      pred = isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+      R = dyn_cast<ConstantInt>(ConstantExpr::getICmp(pred, V1, V2));
+      if (R && !R->isZero())
+        return pred;
+
+      // If we couldn't figure it out, bail.
+      return ICmpInst::BAD_ICMP_PREDICATE;
+    }
+
+    // If the first operand is simple, swap operands.
+    ICmpInst::Predicate SwappedRelation = 
+      evaluateICmpRelation(V2, V1, isSigned);
+    if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+      return ICmpInst::getSwappedPredicate(SwappedRelation);
+
+  } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V1)) {
+    if (isa<ConstantExpr>(V2)) {  // Swap as necessary.
+      ICmpInst::Predicate SwappedRelation = 
+        evaluateICmpRelation(V2, V1, isSigned);
+      if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+        return ICmpInst::getSwappedPredicate(SwappedRelation);
+      return ICmpInst::BAD_ICMP_PREDICATE;
+    }
+
+    // Now we know that the RHS is a GlobalValue, BlockAddress or simple
+    // constant (which, since the types must match, means that it's a
+    // ConstantPointerNull).
+    if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) {
+      // Don't try to decide equality of aliases.
+      if (!isa<GlobalAlias>(GV) && !isa<GlobalAlias>(GV2))
+        if (!GV->hasExternalWeakLinkage() || !GV2->hasExternalWeakLinkage())
+          return ICmpInst::ICMP_NE;
+    } else if (isa<BlockAddress>(V2)) {
+      return ICmpInst::ICMP_NE; // Globals never equal labels.
+    } else {
+      assert(isa<ConstantPointerNull>(V2) && "Canonicalization guarantee!");
+      // GlobalVals can never be null unless they have external weak linkage.
+      // We don't try to evaluate aliases here.
+      if (!GV->hasExternalWeakLinkage() && !isa<GlobalAlias>(GV))
+        return ICmpInst::ICMP_NE;
+    }
+  } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(V1)) {
+    if (isa<ConstantExpr>(V2)) {  // Swap as necessary.
+      ICmpInst::Predicate SwappedRelation = 
+        evaluateICmpRelation(V2, V1, isSigned);
+      if (SwappedRelation != ICmpInst::BAD_ICMP_PREDICATE)
+        return ICmpInst::getSwappedPredicate(SwappedRelation);
+      return ICmpInst::BAD_ICMP_PREDICATE;
+    }
+    
+    // Now we know that the RHS is a GlobalValue, BlockAddress or simple
+    // constant (which, since the types must match, means that it is a
+    // ConstantPointerNull).
+    if (const BlockAddress *BA2 = dyn_cast<BlockAddress>(V2)) {
+      // Block address in another function can't equal this one, but block
+      // addresses in the current function might be the same if blocks are
+      // empty.
+      if (BA2->getFunction() != BA->getFunction())
+        return ICmpInst::ICMP_NE;
+    } else {
+      // Block addresses aren't null, don't equal the address of globals.
+      assert((isa<ConstantPointerNull>(V2) || isa<GlobalValue>(V2)) &&
+             "Canonicalization guarantee!");
+      return ICmpInst::ICMP_NE;
+    }
+  } else {
+    // Ok, the LHS is known to be a constantexpr.  The RHS can be any of a
+    // constantexpr, a global, block address, or a simple constant.
+    ConstantExpr *CE1 = cast<ConstantExpr>(V1);
+    Constant *CE1Op0 = CE1->getOperand(0);
+
+    switch (CE1->getOpcode()) {
+    case Instruction::Trunc:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+      break; // We can't evaluate floating point casts or truncations.
+
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::BitCast:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+      // If the cast is not actually changing bits, and the second operand is a
+      // null pointer, do the comparison with the pre-casted value.
+      if (V2->isNullValue() &&
+          (CE1->getType()->isPointerTy() || CE1->getType()->isIntegerTy())) {
+        if (CE1->getOpcode() == Instruction::ZExt) isSigned = false;
+        if (CE1->getOpcode() == Instruction::SExt) isSigned = true;
+        return evaluateICmpRelation(CE1Op0,
+                                    Constant::getNullValue(CE1Op0->getType()), 
+                                    isSigned);
+      }
+      break;
+
+    case Instruction::GetElementPtr:
+      // Ok, since this is a getelementptr, we know that the constant has a
+      // pointer type.  Check the various cases.
+      if (isa<ConstantPointerNull>(V2)) {
+        // If we are comparing a GEP to a null pointer, check to see if the base
+        // of the GEP equals the null pointer.
+        if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0)) {
+          if (GV->hasExternalWeakLinkage())
+            // Weak linkage GVals could be zero or not. We're comparing that
+            // to null pointer so its greater-or-equal
+            return isSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
+          else 
+            // If its not weak linkage, the GVal must have a non-zero address
+            // so the result is greater-than
+            return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+        } else if (isa<ConstantPointerNull>(CE1Op0)) {
+          // If we are indexing from a null pointer, check to see if we have any
+          // non-zero indices.
+          for (unsigned i = 1, e = CE1->getNumOperands(); i != e; ++i)
+            if (!CE1->getOperand(i)->isNullValue())
+              // Offsetting from null, must not be equal.
+              return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+          // Only zero indexes from null, must still be zero.
+          return ICmpInst::ICMP_EQ;
+        }
+        // Otherwise, we can't really say if the first operand is null or not.
+      } else if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) {
+        if (isa<ConstantPointerNull>(CE1Op0)) {
+          if (GV2->hasExternalWeakLinkage())
+            // Weak linkage GVals could be zero or not. We're comparing it to
+            // a null pointer, so its less-or-equal
+            return isSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+          else
+            // If its not weak linkage, the GVal must have a non-zero address
+            // so the result is less-than
+            return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+        } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(CE1Op0)) {
+          if (GV == GV2) {
+            // If this is a getelementptr of the same global, then it must be
+            // different.  Because the types must match, the getelementptr could
+            // only have at most one index, and because we fold getelementptr's
+            // with a single zero index, it must be nonzero.
+            assert(CE1->getNumOperands() == 2 &&
+                   !CE1->getOperand(1)->isNullValue() &&
+                   "Suprising getelementptr!");
+            return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+          } else {
+            // If they are different globals, we don't know what the value is,
+            // but they can't be equal.
+            return ICmpInst::ICMP_NE;
+          }
+        }
+      } else {
+        ConstantExpr *CE2 = cast<ConstantExpr>(V2);
+        Constant *CE2Op0 = CE2->getOperand(0);
+
+        // There are MANY other foldings that we could perform here.  They will
+        // probably be added on demand, as they seem needed.
+        switch (CE2->getOpcode()) {
+        default: break;
+        case Instruction::GetElementPtr:
+          // By far the most common case to handle is when the base pointers are
+          // obviously to the same or different globals.
+          if (isa<GlobalValue>(CE1Op0) && isa<GlobalValue>(CE2Op0)) {
+            if (CE1Op0 != CE2Op0) // Don't know relative ordering, but not equal
+              return ICmpInst::ICMP_NE;
+            // Ok, we know that both getelementptr instructions are based on the
+            // same global.  From this, we can precisely determine the relative
+            // ordering of the resultant pointers.
+            unsigned i = 1;
+
+            // The logic below assumes that the result of the comparison
+            // can be determined by finding the first index that differs.
+            // This doesn't work if there is over-indexing in any
+            // subsequent indices, so check for that case first.
+            if (!CE1->isGEPWithNoNotionalOverIndexing() ||
+                !CE2->isGEPWithNoNotionalOverIndexing())
+               return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+
+            // Compare all of the operands the GEP's have in common.
+            gep_type_iterator GTI = gep_type_begin(CE1);
+            for (;i != CE1->getNumOperands() && i != CE2->getNumOperands();
+                 ++i, ++GTI)
+              switch (IdxCompare(CE1->getOperand(i),
+                                 CE2->getOperand(i), GTI.getIndexedType())) {
+              case -1: return isSigned ? ICmpInst::ICMP_SLT:ICmpInst::ICMP_ULT;
+              case 1:  return isSigned ? ICmpInst::ICMP_SGT:ICmpInst::ICMP_UGT;
+              case -2: return ICmpInst::BAD_ICMP_PREDICATE;
+              }
+
+            // Ok, we ran out of things they have in common.  If any leftovers
+            // are non-zero then we have a difference, otherwise we are equal.
+            for (; i < CE1->getNumOperands(); ++i)
+              if (!CE1->getOperand(i)->isNullValue()) {
+                if (isa<ConstantInt>(CE1->getOperand(i)))
+                  return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
+                else
+                  return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+              }
+
+            for (; i < CE2->getNumOperands(); ++i)
+              if (!CE2->getOperand(i)->isNullValue()) {
+                if (isa<ConstantInt>(CE2->getOperand(i)))
+                  return isSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
+                else
+                  return ICmpInst::BAD_ICMP_PREDICATE; // Might be equal.
+              }
+            return ICmpInst::ICMP_EQ;
+          }
+        }
+      }
+    default:
+      break;
+    }
+  }
+
+  return ICmpInst::BAD_ICMP_PREDICATE;
+}
+
+Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, 
+                                               Constant *C1, Constant *C2) {
+  const Type *ResultTy;
+  if (const VectorType *VT = dyn_cast<VectorType>(C1->getType()))
+    ResultTy = VectorType::get(Type::getInt1Ty(C1->getContext()),
+                               VT->getNumElements());
+  else
+    ResultTy = Type::getInt1Ty(C1->getContext());
+
+  // Fold FCMP_FALSE/FCMP_TRUE unconditionally.
+  if (pred == FCmpInst::FCMP_FALSE)
+    return Constant::getNullValue(ResultTy);
+
+  if (pred == FCmpInst::FCMP_TRUE)
+    return Constant::getAllOnesValue(ResultTy);
+
+  // Handle some degenerate cases first
+  if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+    // For EQ and NE, we can always pick a value for the undef to make the
+    // predicate pass or fail, so we can return undef.
+    if (ICmpInst::isEquality(ICmpInst::Predicate(pred)))
+      return UndefValue::get(ResultTy);
+    // Otherwise, pick the same value as the non-undef operand, and fold
+    // it to true or false.
+    return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
+  }
+
+  // No compile-time operations on this type yet.
+  if (C1->getType()->isPPC_FP128Ty())
+    return 0;
+
+  // icmp eq/ne(null,GV) -> false/true
+  if (C1->isNullValue()) {
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C2))
+      // Don't try to evaluate aliases.  External weak GV can be null.
+      if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
+        if (pred == ICmpInst::ICMP_EQ)
+          return ConstantInt::getFalse(C1->getContext());
+        else if (pred == ICmpInst::ICMP_NE)
+          return ConstantInt::getTrue(C1->getContext());
+      }
+  // icmp eq/ne(GV,null) -> false/true
+  } else if (C2->isNullValue()) {
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C1))
+      // Don't try to evaluate aliases.  External weak GV can be null.
+      if (!isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage()) {
+        if (pred == ICmpInst::ICMP_EQ)
+          return ConstantInt::getFalse(C1->getContext());
+        else if (pred == ICmpInst::ICMP_NE)
+          return ConstantInt::getTrue(C1->getContext());
+      }
+  }
+
+  // If the comparison is a comparison between two i1's, simplify it.
+  if (C1->getType()->isIntegerTy(1)) {
+    switch(pred) {
+    case ICmpInst::ICMP_EQ:
+      if (isa<ConstantInt>(C2))
+        return ConstantExpr::getXor(C1, ConstantExpr::getNot(C2));
+      return ConstantExpr::getXor(ConstantExpr::getNot(C1), C2);
+    case ICmpInst::ICMP_NE:
+      return ConstantExpr::getXor(C1, C2);
+    default:
+      break;
+    }
+  }
+
+  if (isa<ConstantInt>(C1) && isa<ConstantInt>(C2)) {
+    APInt V1 = cast<ConstantInt>(C1)->getValue();
+    APInt V2 = cast<ConstantInt>(C2)->getValue();
+    switch (pred) {
+    default: llvm_unreachable("Invalid ICmp Predicate"); return 0;
+    case ICmpInst::ICMP_EQ:  return ConstantInt::get(ResultTy, V1 == V2);
+    case ICmpInst::ICMP_NE:  return ConstantInt::get(ResultTy, V1 != V2);
+    case ICmpInst::ICMP_SLT: return ConstantInt::get(ResultTy, V1.slt(V2));
+    case ICmpInst::ICMP_SGT: return ConstantInt::get(ResultTy, V1.sgt(V2));
+    case ICmpInst::ICMP_SLE: return ConstantInt::get(ResultTy, V1.sle(V2));
+    case ICmpInst::ICMP_SGE: return ConstantInt::get(ResultTy, V1.sge(V2));
+    case ICmpInst::ICMP_ULT: return ConstantInt::get(ResultTy, V1.ult(V2));
+    case ICmpInst::ICMP_UGT: return ConstantInt::get(ResultTy, V1.ugt(V2));
+    case ICmpInst::ICMP_ULE: return ConstantInt::get(ResultTy, V1.ule(V2));
+    case ICmpInst::ICMP_UGE: return ConstantInt::get(ResultTy, V1.uge(V2));
+    }
+  } else if (isa<ConstantFP>(C1) && isa<ConstantFP>(C2)) {
+    APFloat C1V = cast<ConstantFP>(C1)->getValueAPF();
+    APFloat C2V = cast<ConstantFP>(C2)->getValueAPF();
+    APFloat::cmpResult R = C1V.compare(C2V);
+    switch (pred) {
+    default: llvm_unreachable("Invalid FCmp Predicate"); return 0;
+    case FCmpInst::FCMP_FALSE: return Constant::getNullValue(ResultTy);
+    case FCmpInst::FCMP_TRUE:  return Constant::getAllOnesValue(ResultTy);
+    case FCmpInst::FCMP_UNO:
+      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered);
+    case FCmpInst::FCMP_ORD:
+      return ConstantInt::get(ResultTy, R!=APFloat::cmpUnordered);
+    case FCmpInst::FCMP_UEQ:
+      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+                                        R==APFloat::cmpEqual);
+    case FCmpInst::FCMP_OEQ:   
+      return ConstantInt::get(ResultTy, R==APFloat::cmpEqual);
+    case FCmpInst::FCMP_UNE:
+      return ConstantInt::get(ResultTy, R!=APFloat::cmpEqual);
+    case FCmpInst::FCMP_ONE:   
+      return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan ||
+                                        R==APFloat::cmpGreaterThan);
+    case FCmpInst::FCMP_ULT: 
+      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+                                        R==APFloat::cmpLessThan);
+    case FCmpInst::FCMP_OLT:   
+      return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan);
+    case FCmpInst::FCMP_UGT:
+      return ConstantInt::get(ResultTy, R==APFloat::cmpUnordered ||
+                                        R==APFloat::cmpGreaterThan);
+    case FCmpInst::FCMP_OGT:
+      return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan);
+    case FCmpInst::FCMP_ULE:
+      return ConstantInt::get(ResultTy, R!=APFloat::cmpGreaterThan);
+    case FCmpInst::FCMP_OLE: 
+      return ConstantInt::get(ResultTy, R==APFloat::cmpLessThan ||
+                                        R==APFloat::cmpEqual);
+    case FCmpInst::FCMP_UGE:
+      return ConstantInt::get(ResultTy, R!=APFloat::cmpLessThan);
+    case FCmpInst::FCMP_OGE: 
+      return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan ||
+                                        R==APFloat::cmpEqual);
+    }
+  } else if (C1->getType()->isVectorTy()) {
+    SmallVector<Constant*, 16> C1Elts, C2Elts;
+    C1->getVectorElements(C1Elts);
+    C2->getVectorElements(C2Elts);
+    if (C1Elts.empty() || C2Elts.empty())
+      return 0;
+
+    // If we can constant fold the comparison of each element, constant fold
+    // the whole vector comparison.
+    SmallVector<Constant*, 4> ResElts;
+    // Compare the elements, producing an i1 result or constant expr.
+    for (unsigned i = 0, e = C1Elts.size(); i != e; ++i)
+      ResElts.push_back(ConstantExpr::getCompare(pred, C1Elts[i], C2Elts[i]));
+
+    return ConstantVector::get(ResElts);
+  }
+
+  if (C1->getType()->isFloatingPointTy()) {
+    int Result = -1;  // -1 = unknown, 0 = known false, 1 = known true.
+    switch (evaluateFCmpRelation(C1, C2)) {
+    default: llvm_unreachable("Unknown relation!");
+    case FCmpInst::FCMP_UNO:
+    case FCmpInst::FCMP_ORD:
+    case FCmpInst::FCMP_UEQ:
+    case FCmpInst::FCMP_UNE:
+    case FCmpInst::FCMP_ULT:
+    case FCmpInst::FCMP_UGT:
+    case FCmpInst::FCMP_ULE:
+    case FCmpInst::FCMP_UGE:
+    case FCmpInst::FCMP_TRUE:
+    case FCmpInst::FCMP_FALSE:
+    case FCmpInst::BAD_FCMP_PREDICATE:
+      break; // Couldn't determine anything about these constants.
+    case FCmpInst::FCMP_OEQ: // We know that C1 == C2
+      Result = (pred == FCmpInst::FCMP_UEQ || pred == FCmpInst::FCMP_OEQ ||
+                pred == FCmpInst::FCMP_ULE || pred == FCmpInst::FCMP_OLE ||
+                pred == FCmpInst::FCMP_UGE || pred == FCmpInst::FCMP_OGE);
+      break;
+    case FCmpInst::FCMP_OLT: // We know that C1 < C2
+      Result = (pred == FCmpInst::FCMP_UNE || pred == FCmpInst::FCMP_ONE ||
+                pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT ||
+                pred == FCmpInst::FCMP_ULE || pred == FCmpInst::FCMP_OLE);
+      break;
+    case FCmpInst::FCMP_OGT: // We know that C1 > C2
+      Result = (pred == FCmpInst::FCMP_UNE || pred == FCmpInst::FCMP_ONE ||
+                pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT ||
+                pred == FCmpInst::FCMP_UGE || pred == FCmpInst::FCMP_OGE);
+      break;
+    case FCmpInst::FCMP_OLE: // We know that C1 <= C2
+      // We can only partially decide this relation.
+      if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT) 
+        Result = 0;
+      else if (pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT) 
+        Result = 1;
+      break;
+    case FCmpInst::FCMP_OGE: // We known that C1 >= C2
+      // We can only partially decide this relation.
+      if (pred == FCmpInst::FCMP_ULT || pred == FCmpInst::FCMP_OLT) 
+        Result = 0;
+      else if (pred == FCmpInst::FCMP_UGT || pred == FCmpInst::FCMP_OGT) 
+        Result = 1;
+      break;
+    case FCmpInst::FCMP_ONE: // We know that C1 != C2
+      // We can only partially decide this relation.
+      if (pred == FCmpInst::FCMP_OEQ || pred == FCmpInst::FCMP_UEQ) 
+        Result = 0;
+      else if (pred == FCmpInst::FCMP_ONE || pred == FCmpInst::FCMP_UNE) 
+        Result = 1;
+      break;
+    }
+
+    // If we evaluated the result, return it now.
+    if (Result != -1)
+      return ConstantInt::get(ResultTy, Result);
+
+  } else {
+    // Evaluate the relation between the two constants, per the predicate.
+    int Result = -1;  // -1 = unknown, 0 = known false, 1 = known true.
+    switch (evaluateICmpRelation(C1, C2, CmpInst::isSigned(pred))) {
+    default: llvm_unreachable("Unknown relational!");
+    case ICmpInst::BAD_ICMP_PREDICATE:
+      break;  // Couldn't determine anything about these constants.
+    case ICmpInst::ICMP_EQ:   // We know the constants are equal!
+      // If we know the constants are equal, we can decide the result of this
+      // computation precisely.
+      Result = ICmpInst::isTrueWhenEqual((ICmpInst::Predicate)pred);
+      break;
+    case ICmpInst::ICMP_ULT:
+      switch (pred) {
+      case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_ULE:
+        Result = 1; break;
+      case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_UGE:
+        Result = 0; break;
+      }
+      break;
+    case ICmpInst::ICMP_SLT:
+      switch (pred) {
+      case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_SLE:
+        Result = 1; break;
+      case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_SGE:
+        Result = 0; break;
+      }
+      break;
+    case ICmpInst::ICMP_UGT:
+      switch (pred) {
+      case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_UGE:
+        Result = 1; break;
+      case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_ULE:
+        Result = 0; break;
+      }
+      break;
+    case ICmpInst::ICMP_SGT:
+      switch (pred) {
+      case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_NE: case ICmpInst::ICMP_SGE:
+        Result = 1; break;
+      case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_EQ: case ICmpInst::ICMP_SLE:
+        Result = 0; break;
+      }
+      break;
+    case ICmpInst::ICMP_ULE:
+      if (pred == ICmpInst::ICMP_UGT) Result = 0;
+      if (pred == ICmpInst::ICMP_ULT || pred == ICmpInst::ICMP_ULE) Result = 1;
+      break;
+    case ICmpInst::ICMP_SLE:
+      if (pred == ICmpInst::ICMP_SGT) Result = 0;
+      if (pred == ICmpInst::ICMP_SLT || pred == ICmpInst::ICMP_SLE) Result = 1;
+      break;
+    case ICmpInst::ICMP_UGE:
+      if (pred == ICmpInst::ICMP_ULT) Result = 0;
+      if (pred == ICmpInst::ICMP_UGT || pred == ICmpInst::ICMP_UGE) Result = 1;
+      break;
+    case ICmpInst::ICMP_SGE:
+      if (pred == ICmpInst::ICMP_SLT) Result = 0;
+      if (pred == ICmpInst::ICMP_SGT || pred == ICmpInst::ICMP_SGE) Result = 1;
+      break;
+    case ICmpInst::ICMP_NE:
+      if (pred == ICmpInst::ICMP_EQ) Result = 0;
+      if (pred == ICmpInst::ICMP_NE) Result = 1;
+      break;
+    }
+
+    // If we evaluated the result, return it now.
+    if (Result != -1)
+      return ConstantInt::get(ResultTy, Result);
+
+    // If the right hand side is a bitcast, try using its inverse to simplify
+    // it by moving it to the left hand side.  We can't do this if it would turn
+    // a vector compare into a scalar compare or visa versa.
+    if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) {
+      Constant *CE2Op0 = CE2->getOperand(0);
+      if (CE2->getOpcode() == Instruction::BitCast &&
+          CE2->getType()->isVectorTy() == CE2Op0->getType()->isVectorTy()) {
+        Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType());
+        return ConstantExpr::getICmp(pred, Inverse, CE2Op0);
+      }
+    }
+
+    // If the left hand side is an extension, try eliminating it.
+    if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(C1)) {
+      if ((CE1->getOpcode() == Instruction::SExt && ICmpInst::isSigned(pred)) ||
+          (CE1->getOpcode() == Instruction::ZExt && !ICmpInst::isSigned(pred))){
+        Constant *CE1Op0 = CE1->getOperand(0);
+        Constant *CE1Inverse = ConstantExpr::getTrunc(CE1, CE1Op0->getType());
+        if (CE1Inverse == CE1Op0) {
+          // Check whether we can safely truncate the right hand side.
+          Constant *C2Inverse = ConstantExpr::getTrunc(C2, CE1Op0->getType());
+          if (ConstantExpr::getZExt(C2Inverse, C2->getType()) == C2) {
+            return ConstantExpr::getICmp(pred, CE1Inverse, C2Inverse);
+          }
+        }
+      }
+    }
+
+    if ((!isa<ConstantExpr>(C1) && isa<ConstantExpr>(C2)) ||
+        (C1->isNullValue() && !C2->isNullValue())) {
+      // If C2 is a constant expr and C1 isn't, flip them around and fold the
+      // other way if possible.
+      // Also, if C1 is null and C2 isn't, flip them around.
+      pred = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)pred);
+      return ConstantExpr::getICmp(pred, C2, C1);
+    }
+  }
+  return 0;
+}
+
+/// isInBoundsIndices - Test whether the given sequence of *normalized* indices
+/// is "inbounds".
+template<typename IndexTy>
+static bool isInBoundsIndices(IndexTy const *Idxs, size_t NumIdx) {
+  // No indices means nothing that could be out of bounds.
+  if (NumIdx == 0) return true;
+
+  // If the first index is zero, it's in bounds.
+  if (cast<Constant>(Idxs[0])->isNullValue()) return true;
+
+  // If the first index is one and all the rest are zero, it's in bounds,
+  // by the one-past-the-end rule.
+  if (!cast<ConstantInt>(Idxs[0])->isOne())
+    return false;
+  for (unsigned i = 1, e = NumIdx; i != e; ++i)
+    if (!cast<Constant>(Idxs[i])->isNullValue())
+      return false;
+  return true;
+}
+
+template<typename IndexTy>
+static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
+                                               bool inBounds,
+                                               IndexTy const *Idxs,
+                                               unsigned NumIdx) {
+  Constant *Idx0 = cast<Constant>(Idxs[0]);
+  if (NumIdx == 0 ||
+      (NumIdx == 1 && Idx0->isNullValue()))
+    return C;
+
+  if (isa<UndefValue>(C)) {
+    const PointerType *Ptr = cast<PointerType>(C->getType());
+    const Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs, Idxs+NumIdx);
+    assert(Ty != 0 && "Invalid indices for GEP!");
+    return UndefValue::get(PointerType::get(Ty, Ptr->getAddressSpace()));
+  }
+
+  if (C->isNullValue()) {
+    bool isNull = true;
+    for (unsigned i = 0, e = NumIdx; i != e; ++i)
+      if (!cast<Constant>(Idxs[i])->isNullValue()) {
+        isNull = false;
+        break;
+      }
+    if (isNull) {
+      const PointerType *Ptr = cast<PointerType>(C->getType());
+      const Type *Ty = GetElementPtrInst::getIndexedType(Ptr, Idxs,
+                                                         Idxs+NumIdx);
+      assert(Ty != 0 && "Invalid indices for GEP!");
+      return ConstantPointerNull::get(PointerType::get(Ty,
+                                                       Ptr->getAddressSpace()));
+    }
+  }
+
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    // Combine Indices - If the source pointer to this getelementptr instruction
+    // is a getelementptr instruction, combine the indices of the two
+    // getelementptr instructions into a single instruction.
+    //
+    if (CE->getOpcode() == Instruction::GetElementPtr) {
+      const Type *LastTy = 0;
+      for (gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
+           I != E; ++I)
+        LastTy = *I;
+
+      if ((LastTy && LastTy->isArrayTy()) || Idx0->isNullValue()) {
+        SmallVector<Value*, 16> NewIndices;
+        NewIndices.reserve(NumIdx + CE->getNumOperands());
+        for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
+          NewIndices.push_back(CE->getOperand(i));
+
+        // Add the last index of the source with the first index of the new GEP.
+        // Make sure to handle the case when they are actually different types.
+        Constant *Combined = CE->getOperand(CE->getNumOperands()-1);
+        // Otherwise it must be an array.
+        if (!Idx0->isNullValue()) {
+          const Type *IdxTy = Combined->getType();
+          if (IdxTy != Idx0->getType()) {
+            const Type *Int64Ty = Type::getInt64Ty(IdxTy->getContext());
+            Constant *C1 = ConstantExpr::getSExtOrBitCast(Idx0, Int64Ty);
+            Constant *C2 = ConstantExpr::getSExtOrBitCast(Combined, Int64Ty);
+            Combined = ConstantExpr::get(Instruction::Add, C1, C2);
+          } else {
+            Combined =
+              ConstantExpr::get(Instruction::Add, Idx0, Combined);
+          }
+        }
+
+        NewIndices.push_back(Combined);
+        NewIndices.append(Idxs+1, Idxs+NumIdx);
+        return (inBounds && cast<GEPOperator>(CE)->isInBounds()) ?
+          ConstantExpr::getInBoundsGetElementPtr(CE->getOperand(0),
+                                                 &NewIndices[0],
+                                                 NewIndices.size()) :
+          ConstantExpr::getGetElementPtr(CE->getOperand(0),
+                                         &NewIndices[0],
+                                         NewIndices.size());
+      }
+    }
+
+    // Implement folding of:
+    //    i32* getelementptr ([2 x i32]* bitcast ([3 x i32]* %X to [2 x i32]*),
+    //                        i64 0, i64 0)
+    // To: i32* getelementptr ([3 x i32]* %X, i64 0, i64 0)
+    //
+    if (CE->isCast() && NumIdx > 1 && Idx0->isNullValue()) {
+      if (const PointerType *SPT =
+          dyn_cast<PointerType>(CE->getOperand(0)->getType()))
+        if (const ArrayType *SAT = dyn_cast<ArrayType>(SPT->getElementType()))
+          if (const ArrayType *CAT =
+        dyn_cast<ArrayType>(cast<PointerType>(C->getType())->getElementType()))
+            if (CAT->getElementType() == SAT->getElementType())
+              return inBounds ?
+                ConstantExpr::getInBoundsGetElementPtr(
+                      (Constant*)CE->getOperand(0), Idxs, NumIdx) :
+                ConstantExpr::getGetElementPtr(
+                      (Constant*)CE->getOperand(0), Idxs, NumIdx);
+    }
+  }
+
+  // Check to see if any array indices are not within the corresponding
+  // notional array bounds. If so, try to determine if they can be factored
+  // out into preceding dimensions.
+  bool Unknown = false;
+  SmallVector<Constant *, 8> NewIdxs;
+  const Type *Ty = C->getType();
+  const Type *Prev = 0;
+  for (unsigned i = 0; i != NumIdx;
+       Prev = Ty, Ty = cast<CompositeType>(Ty)->getTypeAtIndex(Idxs[i]), ++i) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Idxs[i])) {
+      if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty))
+        if (ATy->getNumElements() <= INT64_MAX &&
+            ATy->getNumElements() != 0 &&
+            CI->getSExtValue() >= (int64_t)ATy->getNumElements()) {
+          if (isa<SequentialType>(Prev)) {
+            // It's out of range, but we can factor it into the prior
+            // dimension.
+            NewIdxs.resize(NumIdx);
+            ConstantInt *Factor = ConstantInt::get(CI->getType(),
+                                                   ATy->getNumElements());
+            NewIdxs[i] = ConstantExpr::getSRem(CI, Factor);
+
+            Constant *PrevIdx = cast<Constant>(Idxs[i-1]);
+            Constant *Div = ConstantExpr::getSDiv(CI, Factor);
+
+            // Before adding, extend both operands to i64 to avoid
+            // overflow trouble.
+            if (!PrevIdx->getType()->isIntegerTy(64))
+              PrevIdx = ConstantExpr::getSExt(PrevIdx,
+                                           Type::getInt64Ty(Div->getContext()));
+            if (!Div->getType()->isIntegerTy(64))
+              Div = ConstantExpr::getSExt(Div,
+                                          Type::getInt64Ty(Div->getContext()));
+
+            NewIdxs[i-1] = ConstantExpr::getAdd(PrevIdx, Div);
+          } else {
+            // It's out of range, but the prior dimension is a struct
+            // so we can't do anything about it.
+            Unknown = true;
+          }
+        }
+    } else {
+      // We don't know if it's in range or not.
+      Unknown = true;
+    }
+  }
+
+  // If we did any factoring, start over with the adjusted indices.
+  if (!NewIdxs.empty()) {
+    for (unsigned i = 0; i != NumIdx; ++i)
+      if (!NewIdxs[i]) NewIdxs[i] = cast<Constant>(Idxs[i]);
+    return inBounds ?
+      ConstantExpr::getInBoundsGetElementPtr(C, NewIdxs.data(),
+                                             NewIdxs.size()) :
+      ConstantExpr::getGetElementPtr(C, NewIdxs.data(), NewIdxs.size());
+  }
+
+  // If all indices are known integers and normalized, we can do a simple
+  // check for the "inbounds" property.
+  if (!Unknown && !inBounds &&
+      isa<GlobalVariable>(C) && isInBoundsIndices(Idxs, NumIdx))
+    return ConstantExpr::getInBoundsGetElementPtr(C, Idxs, NumIdx);
+
+  return 0;
+}
+
+Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
+                                          bool inBounds,
+                                          Constant* const *Idxs,
+                                          unsigned NumIdx) {
+  return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs, NumIdx);
+}
+
+Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
+                                          bool inBounds,
+                                          Value* const *Idxs,
+                                          unsigned NumIdx) {
+  return ConstantFoldGetElementPtrImpl(C, inBounds, Idxs, NumIdx);
+}
diff --git a/final/lib/VMCore/ConstantFold.h b/final/lib/VMCore/ConstantFold.h
new file mode 100644
index 00000000000..0ecd7b49a48
--- /dev/null
+++ b/final/lib/VMCore/ConstantFold.h
@@ -0,0 +1,56 @@
+//===-- ConstantFolding.h - Internal Constant Folding Interface -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the (internal) constant folding interfaces for LLVM.  These
+// interfaces are used by the ConstantExpr::get* methods to automatically fold
+// constants when possible.
+//
+// These operators may return a null object if they don't know how to perform
+// the specified operation on the specified constant types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CONSTANTFOLDING_H
+#define CONSTANTFOLDING_H
+
+namespace llvm {
+  class Value;
+  class Constant;
+  class Type;
+
+  // Constant fold various types of instruction...
+  Constant *ConstantFoldCastInstruction(
+    unsigned opcode,     ///< The opcode of the cast
+    Constant *V,         ///< The source constant
+    const Type *DestTy   ///< The destination type
+  );
+  Constant *ConstantFoldSelectInstruction(Constant *Cond,
+                                          Constant *V1, Constant *V2);
+  Constant *ConstantFoldExtractElementInstruction(Constant *Val, Constant *Idx);
+  Constant *ConstantFoldInsertElementInstruction(Constant *Val, Constant *Elt,
+                                                 Constant *Idx);
+  Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
+                                                 Constant *Mask);
+  Constant *ConstantFoldExtractValueInstruction(Constant *Agg,
+                                                const unsigned *Idxs,
+                                                unsigned NumIdx);
+  Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val,
+                                               const unsigned *Idxs,
+                                               unsigned NumIdx);
+  Constant *ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1,
+                                          Constant *V2);
+  Constant *ConstantFoldCompareInstruction(unsigned short predicate, 
+                                           Constant *C1, Constant *C2);
+  Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds,
+                                      Constant* const *Idxs, unsigned NumIdx);
+  Constant *ConstantFoldGetElementPtr(Constant *C, bool inBounds,
+                                      Value* const *Idxs, unsigned NumIdx);
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/VMCore/Constants.cpp b/final/lib/VMCore/Constants.cpp
new file mode 100644
index 00000000000..7a4dcf92bb2
--- /dev/null
+++ b/final/lib/VMCore/Constants.cpp
@@ -0,0 +1,2228 @@
+//===-- Constants.cpp - Implement Constant nodes --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Constant* classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "LLVMContextImpl.h"
+#include "ConstantFold.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include <algorithm>
+#include <map>
+#include <cstdarg>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                              Constant Class
+//===----------------------------------------------------------------------===//
+
+// Constructor to create a '0' constant of arbitrary type...
+Constant *Constant::getNullValue(const Type *Ty) {
+  switch (Ty->getTypeID()) {
+  case Type::IntegerTyID:
+    return ConstantInt::get(Ty, 0);
+  case Type::FloatTyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::IEEEsingle));
+  case Type::DoubleTyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::IEEEdouble));
+  case Type::X86_FP80TyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::x87DoubleExtended));
+  case Type::FP128TyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat::getZero(APFloat::IEEEquad));
+  case Type::PPC_FP128TyID:
+    return ConstantFP::get(Ty->getContext(),
+                           APFloat(APInt::getNullValue(128)));
+  case Type::PointerTyID:
+    return ConstantPointerNull::get(cast<PointerType>(Ty));
+  case Type::StructTyID:
+  case Type::ArrayTyID:
+  case Type::VectorTyID:
+    return ConstantAggregateZero::get(Ty);
+  default:
+    // Function, Label, or Opaque type?
+    assert(!"Cannot create a null constant of that type!");
+    return 0;
+  }
+}
+
+Constant *Constant::getIntegerValue(const Type *Ty, const APInt &V) {
+  const Type *ScalarTy = Ty->getScalarType();
+
+  // Create the base integer constant.
+  Constant *C = ConstantInt::get(Ty->getContext(), V);
+
+  // Convert an integer to a pointer, if necessary.
+  if (const PointerType *PTy = dyn_cast<PointerType>(ScalarTy))
+    C = ConstantExpr::getIntToPtr(C, PTy);
+
+  // Broadcast a scalar to a vector, if necessary.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    C = ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+
+  return C;
+}
+
+Constant *Constant::getAllOnesValue(const Type *Ty) {
+  if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty))
+    return ConstantInt::get(Ty->getContext(),
+                            APInt::getAllOnesValue(ITy->getBitWidth()));
+
+  if (Ty->isFloatingPointTy()) {
+    APFloat FL = APFloat::getAllOnesValue(Ty->getPrimitiveSizeInBits(),
+                                          !Ty->isPPC_FP128Ty());
+    return ConstantFP::get(Ty->getContext(), FL);
+  }
+
+  SmallVector<Constant*, 16> Elts;
+  const VectorType *VTy = cast<VectorType>(Ty);
+  Elts.resize(VTy->getNumElements(), getAllOnesValue(VTy->getElementType()));
+  assert(Elts[0] && "Not a vector integer type!");
+  return cast<ConstantVector>(ConstantVector::get(Elts));
+}
+
+void Constant::destroyConstantImpl() {
+  // When a Constant is destroyed, there may be lingering
+  // references to the constant by other constants in the constant pool.  These
+  // constants are implicitly dependent on the module that is being deleted,
+  // but they don't know that.  Because we only find out when the CPV is
+  // deleted, we must now notify all of our users (that should only be
+  // Constants) that they are, in fact, invalid now and should be deleted.
+  //
+  while (!use_empty()) {
+    Value *V = use_back();
+#ifndef NDEBUG      // Only in -g mode...
+    if (!isa<Constant>(V)) {
+      dbgs() << "While deleting: " << *this
+             << "\n\nUse still stuck around after Def is destroyed: "
+             << *V << "\n\n";
+    }
+#endif
+    assert(isa<Constant>(V) && "References remain to Constant being destroyed");
+    Constant *CV = cast<Constant>(V);
+    CV->destroyConstant();
+
+    // The constant should remove itself from our use list...
+    assert((use_empty() || use_back() != V) && "Constant not removed!");
+  }
+
+  // Value has no outstanding references it is safe to delete it now...
+  delete this;
+}
+
+/// canTrap - Return true if evaluation of this constant could trap.  This is
+/// true for things like constant expressions that could divide by zero.
+bool Constant::canTrap() const {
+  assert(getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
+  // The only thing that could possibly trap are constant exprs.
+  const ConstantExpr *CE = dyn_cast<ConstantExpr>(this);
+  if (!CE) return false;
+  
+  // ConstantExpr traps if any operands can trap. 
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (CE->getOperand(i)->canTrap()) 
+      return true;
+
+  // Otherwise, only specific operations can trap.
+  switch (CE->getOpcode()) {
+  default:
+    return false;
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::FDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+  case Instruction::FRem:
+    // Div and rem can trap if the RHS is not known to be non-zero.
+    if (!isa<ConstantInt>(CE->getOperand(1)) ||CE->getOperand(1)->isNullValue())
+      return true;
+    return false;
+  }
+}
+
+/// isConstantUsed - Return true if the constant has users other than constant
+/// exprs and other dangling things.
+bool Constant::isConstantUsed() const {
+  for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+    const Constant *UC = dyn_cast<Constant>(*UI);
+    if (UC == 0 || isa<GlobalValue>(UC))
+      return true;
+    
+    if (UC->isConstantUsed())
+      return true;
+  }
+  return false;
+}
+
+
+
+/// getRelocationInfo - This method classifies the entry according to
+/// whether or not it may generate a relocation entry.  This must be
+/// conservative, so if it might codegen to a relocatable entry, it should say
+/// so.  The return values are:
+/// 
+///  NoRelocation: This constant pool entry is guaranteed to never have a
+///     relocation applied to it (because it holds a simple constant like
+///     '4').
+///  LocalRelocation: This entry has relocations, but the entries are
+///     guaranteed to be resolvable by the static linker, so the dynamic
+///     linker will never see them.
+///  GlobalRelocations: This entry may have arbitrary relocations.
+///
+/// FIXME: This really should not be in VMCore.
+Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(this)) {
+    if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
+      return LocalRelocation;  // Local to this file/library.
+    return GlobalRelocations;    // Global reference.
+  }
+  
+  if (const BlockAddress *BA = dyn_cast<BlockAddress>(this))
+    return BA->getFunction()->getRelocationInfo();
+  
+  // While raw uses of blockaddress need to be relocated, differences between
+  // two of them don't when they are for labels in the same function.  This is a
+  // common idiom when creating a table for the indirect goto extension, so we
+  // handle it efficiently here.
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(this))
+    if (CE->getOpcode() == Instruction::Sub) {
+      ConstantExpr *LHS = dyn_cast<ConstantExpr>(CE->getOperand(0));
+      ConstantExpr *RHS = dyn_cast<ConstantExpr>(CE->getOperand(1));
+      if (LHS && RHS &&
+          LHS->getOpcode() == Instruction::PtrToInt &&
+          RHS->getOpcode() == Instruction::PtrToInt &&
+          isa<BlockAddress>(LHS->getOperand(0)) &&
+          isa<BlockAddress>(RHS->getOperand(0)) &&
+          cast<BlockAddress>(LHS->getOperand(0))->getFunction() ==
+            cast<BlockAddress>(RHS->getOperand(0))->getFunction())
+        return NoRelocation;
+    }
+  
+  PossibleRelocationsTy Result = NoRelocation;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    Result = std::max(Result,
+                      cast<Constant>(getOperand(i))->getRelocationInfo());
+  
+  return Result;
+}
+
+
+/// getVectorElements - This method, which is only valid on constant of vector
+/// type, returns the elements of the vector in the specified smallvector.
+/// This handles breaking down a vector undef into undef elements, etc.  For
+/// constant exprs and other cases we can't handle, we return an empty vector.
+void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const {
+  assert(getType()->isVectorTy() && "Not a vector constant!");
+  
+  if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) {
+    for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i)
+      Elts.push_back(CV->getOperand(i));
+    return;
+  }
+  
+  const VectorType *VT = cast<VectorType>(getType());
+  if (isa<ConstantAggregateZero>(this)) {
+    Elts.assign(VT->getNumElements(), 
+                Constant::getNullValue(VT->getElementType()));
+    return;
+  }
+  
+  if (isa<UndefValue>(this)) {
+    Elts.assign(VT->getNumElements(), UndefValue::get(VT->getElementType()));
+    return;
+  }
+  
+  // Unknown type, must be constant expr etc.
+}
+
+
+/// removeDeadUsersOfConstant - If the specified constantexpr is dead, remove
+/// it.  This involves recursively eliminating any dead users of the
+/// constantexpr.
+static bool removeDeadUsersOfConstant(const Constant *C) {
+  if (isa<GlobalValue>(C)) return false; // Cannot remove this
+  
+  while (!C->use_empty()) {
+    const Constant *User = dyn_cast<Constant>(C->use_back());
+    if (!User) return false; // Non-constant usage;
+    if (!removeDeadUsersOfConstant(User))
+      return false; // Constant wasn't dead
+  }
+  
+  const_cast<Constant*>(C)->destroyConstant();
+  return true;
+}
+
+
+/// removeDeadConstantUsers - If there are any dead constant users dangling
+/// off of this constant, remove them.  This method is useful for clients
+/// that want to check to see if a global is unused, but don't want to deal
+/// with potentially dead constants hanging off of the globals.
+void Constant::removeDeadConstantUsers() const {
+  Value::const_use_iterator I = use_begin(), E = use_end();
+  Value::const_use_iterator LastNonDeadUser = E;
+  while (I != E) {
+    const Constant *User = dyn_cast<Constant>(*I);
+    if (User == 0) {
+      LastNonDeadUser = I;
+      ++I;
+      continue;
+    }
+    
+    if (!removeDeadUsersOfConstant(User)) {
+      // If the constant wasn't dead, remember that this was the last live use
+      // and move on to the next constant.
+      LastNonDeadUser = I;
+      ++I;
+      continue;
+    }
+    
+    // If the constant was dead, then the iterator is invalidated.
+    if (LastNonDeadUser == E) {
+      I = use_begin();
+      if (I == E) break;
+    } else {
+      I = LastNonDeadUser;
+      ++I;
+    }
+  }
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                                ConstantInt
+//===----------------------------------------------------------------------===//
+
+ConstantInt::ConstantInt(const IntegerType *Ty, const APInt& V)
+  : Constant(Ty, ConstantIntVal, 0, 0), Val(V) {
+  assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type");
+}
+
+ConstantInt *ConstantInt::getTrue(LLVMContext &Context) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  if (!pImpl->TheTrueVal)
+    pImpl->TheTrueVal = ConstantInt::get(Type::getInt1Ty(Context), 1);
+  return pImpl->TheTrueVal;
+}
+
+ConstantInt *ConstantInt::getFalse(LLVMContext &Context) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  if (!pImpl->TheFalseVal)
+    pImpl->TheFalseVal = ConstantInt::get(Type::getInt1Ty(Context), 0);
+  return pImpl->TheFalseVal;
+}
+
+Constant *ConstantInt::getTrue(const Type *Ty) {
+  const VectorType *VTy = dyn_cast<VectorType>(Ty);
+  if (!VTy) {
+    assert(Ty->isIntegerTy(1) && "True must be i1 or vector of i1.");
+    return ConstantInt::getTrue(Ty->getContext());
+  }
+  assert(VTy->getElementType()->isIntegerTy(1) &&
+         "True must be vector of i1 or i1.");
+  SmallVector<Constant*, 16> Splat(VTy->getNumElements(),
+                                   ConstantInt::getTrue(Ty->getContext()));
+  return ConstantVector::get(Splat);
+}
+
+Constant *ConstantInt::getFalse(const Type *Ty) {
+  const VectorType *VTy = dyn_cast<VectorType>(Ty);
+  if (!VTy) {
+    assert(Ty->isIntegerTy(1) && "False must be i1 or vector of i1.");
+    return ConstantInt::getFalse(Ty->getContext());
+  }
+  assert(VTy->getElementType()->isIntegerTy(1) &&
+         "False must be vector of i1 or i1.");
+  SmallVector<Constant*, 16> Splat(VTy->getNumElements(),
+                                   ConstantInt::getFalse(Ty->getContext()));
+  return ConstantVector::get(Splat);
+}
+
+
+// Get a ConstantInt from an APInt. Note that the value stored in the DenseMap 
+// as the key, is a DenseMapAPIntKeyInfo::KeyTy which has provided the
+// operator== and operator!= to ensure that the DenseMap doesn't attempt to
+// compare APInt's of different widths, which would violate an APInt class
+// invariant which generates an assertion.
+ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) {
+  // Get the corresponding integer type for the bit width of the value.
+  const IntegerType *ITy = IntegerType::get(Context, V.getBitWidth());
+  // get an existing value or the insertion position
+  DenseMapAPIntKeyInfo::KeyTy Key(V, ITy);
+  ConstantInt *&Slot = Context.pImpl->IntConstants[Key]; 
+  if (!Slot) Slot = new ConstantInt(ITy, V);
+  return Slot;
+}
+
+Constant *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) {
+  Constant *C = get(cast<IntegerType>(Ty->getScalarType()), V, isSigned);
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::get(SmallVector<Constant*,
+                                           16>(VTy->getNumElements(), C));
+
+  return C;
+}
+
+ConstantInt* ConstantInt::get(const IntegerType* Ty, uint64_t V, 
+                              bool isSigned) {
+  return get(Ty->getContext(), APInt(Ty->getBitWidth(), V, isSigned));
+}
+
+ConstantInt* ConstantInt::getSigned(const IntegerType* Ty, int64_t V) {
+  return get(Ty, V, true);
+}
+
+Constant *ConstantInt::getSigned(const Type *Ty, int64_t V) {
+  return get(Ty, V, true);
+}
+
+Constant *ConstantInt::get(const Type* Ty, const APInt& V) {
+  ConstantInt *C = get(Ty->getContext(), V);
+  assert(C->getType() == Ty->getScalarType() &&
+         "ConstantInt type doesn't match the type implied by its value!");
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::get(
+      SmallVector<Constant *, 16>(VTy->getNumElements(), C));
+
+  return C;
+}
+
+ConstantInt* ConstantInt::get(const IntegerType* Ty, StringRef Str,
+                              uint8_t radix) {
+  return get(Ty->getContext(), APInt(Ty->getBitWidth(), Str, radix));
+}
+
+//===----------------------------------------------------------------------===//
+//                                ConstantFP
+//===----------------------------------------------------------------------===//
+
+static const fltSemantics *TypeToFloatSemantics(const Type *Ty) {
+  if (Ty->isFloatTy())
+    return &APFloat::IEEEsingle;
+  if (Ty->isDoubleTy())
+    return &APFloat::IEEEdouble;
+  if (Ty->isX86_FP80Ty())
+    return &APFloat::x87DoubleExtended;
+  else if (Ty->isFP128Ty())
+    return &APFloat::IEEEquad;
+  
+  assert(Ty->isPPC_FP128Ty() && "Unknown FP format");
+  return &APFloat::PPCDoubleDouble;
+}
+
+/// get() - This returns a constant fp for the specified value in the
+/// specified type.  This should only be used for simple constant values like
+/// 2.0/1.0 etc, that are known-valid both as double and as the target format.
+Constant *ConstantFP::get(const Type* Ty, double V) {
+  LLVMContext &Context = Ty->getContext();
+  
+  APFloat FV(V);
+  bool ignored;
+  FV.convert(*TypeToFloatSemantics(Ty->getScalarType()),
+             APFloat::rmNearestTiesToEven, &ignored);
+  Constant *C = get(Context, FV);
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::get(
+      SmallVector<Constant *, 16>(VTy->getNumElements(), C));
+
+  return C;
+}
+
+
+Constant *ConstantFP::get(const Type* Ty, StringRef Str) {
+  LLVMContext &Context = Ty->getContext();
+
+  APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str);
+  Constant *C = get(Context, FV);
+
+  // For vectors, broadcast the value.
+  if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+    return ConstantVector::get(
+      SmallVector<Constant *, 16>(VTy->getNumElements(), C));
+
+  return C; 
+}
+
+
+ConstantFP* ConstantFP::getNegativeZero(const Type* Ty) {
+  LLVMContext &Context = Ty->getContext();
+  APFloat apf = cast <ConstantFP>(Constant::getNullValue(Ty))->getValueAPF();
+  apf.changeSign();
+  return get(Context, apf);
+}
+
+
+Constant *ConstantFP::getZeroValueForNegation(const Type* Ty) {
+  if (const VectorType *PTy = dyn_cast<VectorType>(Ty))
+    if (PTy->getElementType()->isFloatingPointTy()) {
+      SmallVector<Constant*, 16> zeros(PTy->getNumElements(),
+                           getNegativeZero(PTy->getElementType()));
+      return ConstantVector::get(zeros);
+    }
+
+  if (Ty->isFloatingPointTy()) 
+    return getNegativeZero(Ty);
+
+  return Constant::getNullValue(Ty);
+}
+
+
+// ConstantFP accessors.
+ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) {
+  DenseMapAPFloatKeyInfo::KeyTy Key(V);
+  
+  LLVMContextImpl* pImpl = Context.pImpl;
+  
+  ConstantFP *&Slot = pImpl->FPConstants[Key];
+    
+  if (!Slot) {
+    const Type *Ty;
+    if (&V.getSemantics() == &APFloat::IEEEsingle)
+      Ty = Type::getFloatTy(Context);
+    else if (&V.getSemantics() == &APFloat::IEEEdouble)
+      Ty = Type::getDoubleTy(Context);
+    else if (&V.getSemantics() == &APFloat::x87DoubleExtended)
+      Ty = Type::getX86_FP80Ty(Context);
+    else if (&V.getSemantics() == &APFloat::IEEEquad)
+      Ty = Type::getFP128Ty(Context);
+    else {
+      assert(&V.getSemantics() == &APFloat::PPCDoubleDouble && 
+             "Unknown FP format");
+      Ty = Type::getPPC_FP128Ty(Context);
+    }
+    Slot = new ConstantFP(Ty, V);
+  }
+  
+  return Slot;
+}
+
+ConstantFP *ConstantFP::getInfinity(const Type *Ty, bool Negative) {
+  const fltSemantics &Semantics = *TypeToFloatSemantics(Ty);
+  return ConstantFP::get(Ty->getContext(),
+                         APFloat::getInf(Semantics, Negative));
+}
+
+ConstantFP::ConstantFP(const Type *Ty, const APFloat& V)
+  : Constant(Ty, ConstantFPVal, 0, 0), Val(V) {
+  assert(&V.getSemantics() == TypeToFloatSemantics(Ty) &&
+         "FP type Mismatch");
+}
+
+bool ConstantFP::isNullValue() const {
+  return Val.isZero() && !Val.isNegative();
+}
+
+bool ConstantFP::isExactlyValue(const APFloat& V) const {
+  return Val.bitwiseIsEqual(V);
+}
+
+//===----------------------------------------------------------------------===//
+//                            ConstantXXX Classes
+//===----------------------------------------------------------------------===//
+
+
+ConstantArray::ConstantArray(const ArrayType *T,
+                             const std::vector<Constant*> &V)
+  : Constant(T, ConstantArrayVal,
+             OperandTraits<ConstantArray>::op_end(this) - V.size(),
+             V.size()) {
+  assert(V.size() == T->getNumElements() &&
+         "Invalid initializer vector for constant array");
+  Use *OL = OperandList;
+  for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
+       I != E; ++I, ++OL) {
+    Constant *C = *I;
+    assert(C->getType() == T->getElementType() &&
+           "Initializer for array element doesn't match array element type!");
+    *OL = C;
+  }
+}
+
+Constant *ConstantArray::get(const ArrayType *Ty, 
+                             const std::vector<Constant*> &V) {
+  for (unsigned i = 0, e = V.size(); i != e; ++i) {
+    assert(V[i]->getType() == Ty->getElementType() &&
+           "Wrong type in array element initializer");
+  }
+  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+  // If this is an all-zero array, return a ConstantAggregateZero object
+  if (!V.empty()) {
+    Constant *C = V[0];
+    if (!C->isNullValue())
+      return pImpl->ArrayConstants.getOrCreate(Ty, V);
+    
+    for (unsigned i = 1, e = V.size(); i != e; ++i)
+      if (V[i] != C)
+        return pImpl->ArrayConstants.getOrCreate(Ty, V);
+  }
+  
+  return ConstantAggregateZero::get(Ty);
+}
+
+
+Constant *ConstantArray::get(const ArrayType* T, Constant *const* Vals,
+                             unsigned NumVals) {
+  // FIXME: make this the primary ctor method.
+  return get(T, std::vector<Constant*>(Vals, Vals+NumVals));
+}
+
+/// ConstantArray::get(const string&) - Return an array that is initialized to
+/// contain the specified string.  If length is zero then a null terminator is 
+/// added to the specified string so that it may be used in a natural way. 
+/// Otherwise, the length parameter specifies how much of the string to use 
+/// and it won't be null terminated.
+///
+Constant *ConstantArray::get(LLVMContext &Context, StringRef Str,
+                             bool AddNull) {
+  std::vector<Constant*> ElementVals;
+  ElementVals.reserve(Str.size() + size_t(AddNull));
+  for (unsigned i = 0; i < Str.size(); ++i)
+    ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), Str[i]));
+
+  // Add a null terminator to the string...
+  if (AddNull) {
+    ElementVals.push_back(ConstantInt::get(Type::getInt8Ty(Context), 0));
+  }
+
+  ArrayType *ATy = ArrayType::get(Type::getInt8Ty(Context), ElementVals.size());
+  return get(ATy, ElementVals);
+}
+
+ConstantStruct::ConstantStruct(const StructType *T,
+                               const std::vector<Constant*> &V)
+  : Constant(T, ConstantStructVal,
+             OperandTraits<ConstantStruct>::op_end(this) - V.size(),
+             V.size()) {
+  assert(V.size() == T->getNumElements() &&
+         "Invalid initializer vector for constant structure");
+  Use *OL = OperandList;
+  for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
+       I != E; ++I, ++OL) {
+    Constant *C = *I;
+    assert(C->getType() == T->getElementType(I-V.begin()) &&
+           "Initializer for struct element doesn't match struct element type!");
+    *OL = C;
+  }
+}
+
+// ConstantStruct accessors.
+Constant *ConstantStruct::get(const StructType* T,
+                              const std::vector<Constant*>& V) {
+  LLVMContextImpl* pImpl = T->getContext().pImpl;
+  
+  // Create a ConstantAggregateZero value if all elements are zeros...
+  for (unsigned i = 0, e = V.size(); i != e; ++i)
+    if (!V[i]->isNullValue())
+      return pImpl->StructConstants.getOrCreate(T, V);
+
+  return ConstantAggregateZero::get(T);
+}
+
+Constant *ConstantStruct::get(LLVMContext &Context,
+                              const std::vector<Constant*>& V, bool packed) {
+  std::vector<const Type*> StructEls;
+  StructEls.reserve(V.size());
+  for (unsigned i = 0, e = V.size(); i != e; ++i)
+    StructEls.push_back(V[i]->getType());
+  return get(StructType::get(Context, StructEls, packed), V);
+}
+
+Constant *ConstantStruct::get(LLVMContext &Context,
+                              Constant *const *Vals, unsigned NumVals,
+                              bool Packed) {
+  // FIXME: make this the primary ctor method.
+  return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed);
+}
+
+Constant* ConstantStruct::get(LLVMContext &Context, bool Packed,
+                              Constant * Val, ...) {
+  va_list ap;
+  std::vector<Constant*> Values;
+  va_start(ap, Val);
+  while (Val) {
+    Values.push_back(Val);
+    Val = va_arg(ap, llvm::Constant*);
+  }
+  va_end(ap);
+  return get(Context, Values, Packed);
+}
+
+ConstantVector::ConstantVector(const VectorType *T,
+                               const std::vector<Constant*> &V)
+  : Constant(T, ConstantVectorVal,
+             OperandTraits<ConstantVector>::op_end(this) - V.size(),
+             V.size()) {
+  Use *OL = OperandList;
+  for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
+       I != E; ++I, ++OL) {
+    Constant *C = *I;
+    assert(C->getType() == T->getElementType() &&
+           "Initializer for vector element doesn't match vector element type!");
+    *OL = C;
+  }
+}
+
+// ConstantVector accessors.
+Constant *ConstantVector::get(const VectorType *T,
+                              const std::vector<Constant*> &V) {
+  assert(!V.empty() && "Vectors can't be empty");
+  LLVMContextImpl *pImpl = T->getContext().pImpl;
+
+  // If this is an all-undef or all-zero vector, return a
+  // ConstantAggregateZero or UndefValue.
+  Constant *C = V[0];
+  bool isZero = C->isNullValue();
+  bool isUndef = isa<UndefValue>(C);
+
+  if (isZero || isUndef) {
+    for (unsigned i = 1, e = V.size(); i != e; ++i)
+      if (V[i] != C) {
+        isZero = isUndef = false;
+        break;
+      }
+  }
+  
+  if (isZero)
+    return ConstantAggregateZero::get(T);
+  if (isUndef)
+    return UndefValue::get(T);
+    
+  return pImpl->VectorConstants.getOrCreate(T, V);
+}
+
+Constant *ConstantVector::get(ArrayRef<Constant*> V) {
+  // FIXME: make this the primary ctor method.
+  assert(!V.empty() && "Vectors cannot be empty");
+  return get(VectorType::get(V.front()->getType(), V.size()), V.vec());
+}
+
+// Utility function for determining if a ConstantExpr is a CastOp or not. This
+// can't be inline because we don't want to #include Instruction.h into
+// Constant.h
+bool ConstantExpr::isCast() const {
+  return Instruction::isCast(getOpcode());
+}
+
+bool ConstantExpr::isCompare() const {
+  return getOpcode() == Instruction::ICmp || getOpcode() == Instruction::FCmp;
+}
+
+bool ConstantExpr::isGEPWithNoNotionalOverIndexing() const {
+  if (getOpcode() != Instruction::GetElementPtr) return false;
+
+  gep_type_iterator GEPI = gep_type_begin(this), E = gep_type_end(this);
+  User::const_op_iterator OI = llvm::next(this->op_begin());
+
+  // Skip the first index, as it has no static limit.
+  ++GEPI;
+  ++OI;
+
+  // The remaining indices must be compile-time known integers within the
+  // bounds of the corresponding notional static array types.
+  for (; GEPI != E; ++GEPI, ++OI) {
+    ConstantInt *CI = dyn_cast<ConstantInt>(*OI);
+    if (!CI) return false;
+    if (const ArrayType *ATy = dyn_cast<ArrayType>(*GEPI))
+      if (CI->getValue().getActiveBits() > 64 ||
+          CI->getZExtValue() >= ATy->getNumElements())
+        return false;
+  }
+
+  // All the indices checked out.
+  return true;
+}
+
+bool ConstantExpr::hasIndices() const {
+  return getOpcode() == Instruction::ExtractValue ||
+         getOpcode() == Instruction::InsertValue;
+}
+
+const SmallVector<unsigned, 4> &ConstantExpr::getIndices() const {
+  if (const ExtractValueConstantExpr *EVCE =
+        dyn_cast<ExtractValueConstantExpr>(this))
+    return EVCE->Indices;
+
+  return cast<InsertValueConstantExpr>(this)->Indices;
+}
+
+unsigned ConstantExpr::getPredicate() const {
+  assert(getOpcode() == Instruction::FCmp || 
+         getOpcode() == Instruction::ICmp);
+  return ((const CompareConstantExpr*)this)->predicate;
+}
+
+/// getWithOperandReplaced - Return a constant expression identical to this
+/// one, but with the specified operand set to the specified value.
+Constant *
+ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
+  assert(OpNo < getNumOperands() && "Operand num is out of range!");
+  assert(Op->getType() == getOperand(OpNo)->getType() &&
+         "Replacing operand with value of different type!");
+  if (getOperand(OpNo) == Op)
+    return const_cast<ConstantExpr*>(this);
+  
+  Constant *Op0, *Op1, *Op2;
+  switch (getOpcode()) {
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::BitCast:
+    return ConstantExpr::getCast(getOpcode(), Op, getType());
+  case Instruction::Select:
+    Op0 = (OpNo == 0) ? Op : getOperand(0);
+    Op1 = (OpNo == 1) ? Op : getOperand(1);
+    Op2 = (OpNo == 2) ? Op : getOperand(2);
+    return ConstantExpr::getSelect(Op0, Op1, Op2);
+  case Instruction::InsertElement:
+    Op0 = (OpNo == 0) ? Op : getOperand(0);
+    Op1 = (OpNo == 1) ? Op : getOperand(1);
+    Op2 = (OpNo == 2) ? Op : getOperand(2);
+    return ConstantExpr::getInsertElement(Op0, Op1, Op2);
+  case Instruction::ExtractElement:
+    Op0 = (OpNo == 0) ? Op : getOperand(0);
+    Op1 = (OpNo == 1) ? Op : getOperand(1);
+    return ConstantExpr::getExtractElement(Op0, Op1);
+  case Instruction::ShuffleVector:
+    Op0 = (OpNo == 0) ? Op : getOperand(0);
+    Op1 = (OpNo == 1) ? Op : getOperand(1);
+    Op2 = (OpNo == 2) ? Op : getOperand(2);
+    return ConstantExpr::getShuffleVector(Op0, Op1, Op2);
+  case Instruction::GetElementPtr: {
+    SmallVector<Constant*, 8> Ops;
+    Ops.resize(getNumOperands()-1);
+    for (unsigned i = 1, e = getNumOperands(); i != e; ++i)
+      Ops[i-1] = getOperand(i);
+    if (OpNo == 0)
+      return cast<GEPOperator>(this)->isInBounds() ?
+        ConstantExpr::getInBoundsGetElementPtr(Op, &Ops[0], Ops.size()) :
+        ConstantExpr::getGetElementPtr(Op, &Ops[0], Ops.size());
+    Ops[OpNo-1] = Op;
+    return cast<GEPOperator>(this)->isInBounds() ?
+      ConstantExpr::getInBoundsGetElementPtr(getOperand(0), &Ops[0],Ops.size()):
+      ConstantExpr::getGetElementPtr(getOperand(0), &Ops[0], Ops.size());
+  }
+  default:
+    assert(getNumOperands() == 2 && "Must be binary operator?");
+    Op0 = (OpNo == 0) ? Op : getOperand(0);
+    Op1 = (OpNo == 1) ? Op : getOperand(1);
+    return ConstantExpr::get(getOpcode(), Op0, Op1, SubclassOptionalData);
+  }
+}
+
+/// getWithOperands - This returns the current constant expression with the
+/// operands replaced with the specified values.  The specified operands must
+/// match count and type with the existing ones.
+Constant *ConstantExpr::
+getWithOperands(Constant *const *Ops, unsigned NumOps) const {
+  assert(NumOps == getNumOperands() && "Operand count mismatch!");
+  bool AnyChange = false;
+  for (unsigned i = 0; i != NumOps; ++i) {
+    assert(Ops[i]->getType() == getOperand(i)->getType() &&
+           "Operand type mismatch!");
+    AnyChange |= Ops[i] != getOperand(i);
+  }
+  if (!AnyChange)  // No operands changed, return self.
+    return const_cast<ConstantExpr*>(this);
+
+  switch (getOpcode()) {
+  case Instruction::Trunc:
+  case Instruction::ZExt:
+  case Instruction::SExt:
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+  case Instruction::PtrToInt:
+  case Instruction::IntToPtr:
+  case Instruction::BitCast:
+    return ConstantExpr::getCast(getOpcode(), Ops[0], getType());
+  case Instruction::Select:
+    return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
+  case Instruction::InsertElement:
+    return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2]);
+  case Instruction::ExtractElement:
+    return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
+  case Instruction::ShuffleVector:
+    return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
+  case Instruction::GetElementPtr:
+    return cast<GEPOperator>(this)->isInBounds() ?
+      ConstantExpr::getInBoundsGetElementPtr(Ops[0], &Ops[1], NumOps-1) :
+      ConstantExpr::getGetElementPtr(Ops[0], &Ops[1], NumOps-1);
+  case Instruction::ICmp:
+  case Instruction::FCmp:
+    return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]);
+  default:
+    assert(getNumOperands() == 2 && "Must be binary operator?");
+    return ConstantExpr::get(getOpcode(), Ops[0], Ops[1], SubclassOptionalData);
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                      isValueValidForType implementations
+
+bool ConstantInt::isValueValidForType(const Type *Ty, uint64_t Val) {
+  unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay
+  if (Ty == Type::getInt1Ty(Ty->getContext()))
+    return Val == 0 || Val == 1;
+  if (NumBits >= 64)
+    return true; // always true, has to fit in largest type
+  uint64_t Max = (1ll << NumBits) - 1;
+  return Val <= Max;
+}
+
+bool ConstantInt::isValueValidForType(const Type *Ty, int64_t Val) {
+  unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); // assert okay
+  if (Ty == Type::getInt1Ty(Ty->getContext()))
+    return Val == 0 || Val == 1 || Val == -1;
+  if (NumBits >= 64)
+    return true; // always true, has to fit in largest type
+  int64_t Min = -(1ll << (NumBits-1));
+  int64_t Max = (1ll << (NumBits-1)) - 1;
+  return (Val >= Min && Val <= Max);
+}
+
+bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) {
+  // convert modifies in place, so make a copy.
+  APFloat Val2 = APFloat(Val);
+  bool losesInfo;
+  switch (Ty->getTypeID()) {
+  default:
+    return false;         // These can't be represented as floating point!
+
+  // FIXME rounding mode needs to be more flexible
+  case Type::FloatTyID: {
+    if (&Val2.getSemantics() == &APFloat::IEEEsingle)
+      return true;
+    Val2.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &losesInfo);
+    return !losesInfo;
+  }
+  case Type::DoubleTyID: {
+    if (&Val2.getSemantics() == &APFloat::IEEEsingle ||
+        &Val2.getSemantics() == &APFloat::IEEEdouble)
+      return true;
+    Val2.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
+    return !losesInfo;
+  }
+  case Type::X86_FP80TyID:
+    return &Val2.getSemantics() == &APFloat::IEEEsingle || 
+           &Val2.getSemantics() == &APFloat::IEEEdouble ||
+           &Val2.getSemantics() == &APFloat::x87DoubleExtended;
+  case Type::FP128TyID:
+    return &Val2.getSemantics() == &APFloat::IEEEsingle || 
+           &Val2.getSemantics() == &APFloat::IEEEdouble ||
+           &Val2.getSemantics() == &APFloat::IEEEquad;
+  case Type::PPC_FP128TyID:
+    return &Val2.getSemantics() == &APFloat::IEEEsingle || 
+           &Val2.getSemantics() == &APFloat::IEEEdouble ||
+           &Val2.getSemantics() == &APFloat::PPCDoubleDouble;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                      Factory Function Implementation
+
+ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) {
+  assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) &&
+         "Cannot create an aggregate zero of non-aggregate type!");
+  
+  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+  return pImpl->AggZeroConstants.getOrCreate(Ty, 0);
+}
+
+/// destroyConstant - Remove the constant from the constant table...
+///
+void ConstantAggregateZero::destroyConstant() {
+  getRawType()->getContext().pImpl->AggZeroConstants.remove(this);
+  destroyConstantImpl();
+}
+
+/// destroyConstant - Remove the constant from the constant table...
+///
+void ConstantArray::destroyConstant() {
+  getRawType()->getContext().pImpl->ArrayConstants.remove(this);
+  destroyConstantImpl();
+}
+
+/// isString - This method returns true if the array is an array of i8, and 
+/// if the elements of the array are all ConstantInt's.
+bool ConstantArray::isString() const {
+  // Check the element type for i8...
+  if (!getType()->getElementType()->isIntegerTy(8))
+    return false;
+  // Check the elements to make sure they are all integers, not constant
+  // expressions.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (!isa<ConstantInt>(getOperand(i)))
+      return false;
+  return true;
+}
+
+/// isCString - This method returns true if the array is a string (see
+/// isString) and it ends in a null byte \\0 and does not contains any other
+/// null bytes except its terminator.
+bool ConstantArray::isCString() const {
+  // Check the element type for i8...
+  if (!getType()->getElementType()->isIntegerTy(8))
+    return false;
+
+  // Last element must be a null.
+  if (!getOperand(getNumOperands()-1)->isNullValue())
+    return false;
+  // Other elements must be non-null integers.
+  for (unsigned i = 0, e = getNumOperands()-1; i != e; ++i) {
+    if (!isa<ConstantInt>(getOperand(i)))
+      return false;
+    if (getOperand(i)->isNullValue())
+      return false;
+  }
+  return true;
+}
+
+
+/// getAsString - If the sub-element type of this array is i8
+/// then this method converts the array to an std::string and returns it.
+/// Otherwise, it asserts out.
+///
+std::string ConstantArray::getAsString() const {
+  assert(isString() && "Not a string!");
+  std::string Result;
+  Result.reserve(getNumOperands());
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    Result.push_back((char)cast<ConstantInt>(getOperand(i))->getZExtValue());
+  return Result;
+}
+
+
+//---- ConstantStruct::get() implementation...
+//
+
+namespace llvm {
+
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantStruct::destroyConstant() {
+  getRawType()->getContext().pImpl->StructConstants.remove(this);
+  destroyConstantImpl();
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantVector::destroyConstant() {
+  getRawType()->getContext().pImpl->VectorConstants.remove(this);
+  destroyConstantImpl();
+}
+
+/// This function will return true iff every element in this vector constant
+/// is set to all ones.
+/// @returns true iff this constant's emements are all set to all ones.
+/// @brief Determine if the value is all ones.
+bool ConstantVector::isAllOnesValue() const {
+  // Check out first element.
+  const Constant *Elt = getOperand(0);
+  const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
+  if (!CI || !CI->isAllOnesValue()) return false;
+  // Then make sure all remaining elements point to the same value.
+  for (unsigned I = 1, E = getNumOperands(); I < E; ++I) {
+    if (getOperand(I) != Elt) return false;
+  }
+  return true;
+}
+
+/// getSplatValue - If this is a splat constant, where all of the
+/// elements have the same value, return that value. Otherwise return null.
+Constant *ConstantVector::getSplatValue() const {
+  // Check out first element.
+  Constant *Elt = getOperand(0);
+  // Then make sure all remaining elements point to the same value.
+  for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
+    if (getOperand(I) != Elt) return 0;
+  return Elt;
+}
+
+//---- ConstantPointerNull::get() implementation.
+//
+
+ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) {
+  return Ty->getContext().pImpl->NullPtrConstants.getOrCreate(Ty, 0);
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantPointerNull::destroyConstant() {
+  getRawType()->getContext().pImpl->NullPtrConstants.remove(this);
+  destroyConstantImpl();
+}
+
+
+//---- UndefValue::get() implementation.
+//
+
+UndefValue *UndefValue::get(const Type *Ty) {
+  return Ty->getContext().pImpl->UndefValueConstants.getOrCreate(Ty, 0);
+}
+
+// destroyConstant - Remove the constant from the constant table.
+//
+void UndefValue::destroyConstant() {
+  getRawType()->getContext().pImpl->UndefValueConstants.remove(this);
+  destroyConstantImpl();
+}
+
+//---- BlockAddress::get() implementation.
+//
+
+BlockAddress *BlockAddress::get(BasicBlock *BB) {
+  assert(BB->getParent() != 0 && "Block must have a parent");
+  return get(BB->getParent(), BB);
+}
+
+BlockAddress *BlockAddress::get(Function *F, BasicBlock *BB) {
+  BlockAddress *&BA =
+    F->getContext().pImpl->BlockAddresses[std::make_pair(F, BB)];
+  if (BA == 0)
+    BA = new BlockAddress(F, BB);
+  
+  assert(BA->getFunction() == F && "Basic block moved between functions");
+  return BA;
+}
+
+BlockAddress::BlockAddress(Function *F, BasicBlock *BB)
+: Constant(Type::getInt8PtrTy(F->getContext()), Value::BlockAddressVal,
+           &Op<0>(), 2) {
+  setOperand(0, F);
+  setOperand(1, BB);
+  BB->AdjustBlockAddressRefCount(1);
+}
+
+
+// destroyConstant - Remove the constant from the constant table.
+//
+void BlockAddress::destroyConstant() {
+  getFunction()->getRawType()->getContext().pImpl
+    ->BlockAddresses.erase(std::make_pair(getFunction(), getBasicBlock()));
+  getBasicBlock()->AdjustBlockAddressRefCount(-1);
+  destroyConstantImpl();
+}
+
+void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
+  // This could be replacing either the Basic Block or the Function.  In either
+  // case, we have to remove the map entry.
+  Function *NewF = getFunction();
+  BasicBlock *NewBB = getBasicBlock();
+  
+  if (U == &Op<0>())
+    NewF = cast<Function>(To);
+  else
+    NewBB = cast<BasicBlock>(To);
+  
+  // See if the 'new' entry already exists, if not, just update this in place
+  // and return early.
+  BlockAddress *&NewBA =
+    getContext().pImpl->BlockAddresses[std::make_pair(NewF, NewBB)];
+  if (NewBA == 0) {
+    getBasicBlock()->AdjustBlockAddressRefCount(-1);
+    
+    // Remove the old entry, this can't cause the map to rehash (just a
+    // tombstone will get added).
+    getContext().pImpl->BlockAddresses.erase(std::make_pair(getFunction(),
+                                                            getBasicBlock()));
+    NewBA = this;
+    setOperand(0, NewF);
+    setOperand(1, NewBB);
+    getBasicBlock()->AdjustBlockAddressRefCount(1);
+    return;
+  }
+
+  // Otherwise, I do need to replace this with an existing value.
+  assert(NewBA != this && "I didn't contain From!");
+  
+  // Everyone using this now uses the replacement.
+  uncheckedReplaceAllUsesWith(NewBA);
+  
+  destroyConstant();
+}
+
+//---- ConstantExpr::get() implementations.
+//
+
+/// This is a utility function to handle folding of casts and lookup of the
+/// cast in the ExprConstants map. It is used by the various get* methods below.
+static inline Constant *getFoldedCast(
+  Instruction::CastOps opc, Constant *C, const Type *Ty) {
+  assert(Ty->isFirstClassType() && "Cannot cast to an aggregate type!");
+  // Fold a few common cases
+  if (Constant *FC = ConstantFoldCastInstruction(opc, C, Ty))
+    return FC;
+
+  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> argVec(1, C);
+  ExprMapKeyType Key(opc, argVec);
+  
+  return pImpl->ExprConstants.getOrCreate(Ty, Key);
+}
+ 
+Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) {
+  Instruction::CastOps opc = Instruction::CastOps(oc);
+  assert(Instruction::isCast(opc) && "opcode out of range");
+  assert(C && Ty && "Null arguments to getCast");
+  assert(CastInst::castIsValid(opc, C, Ty) && "Invalid constantexpr cast!");
+
+  switch (opc) {
+  default:
+    llvm_unreachable("Invalid cast opcode");
+    break;
+  case Instruction::Trunc:    return getTrunc(C, Ty);
+  case Instruction::ZExt:     return getZExt(C, Ty);
+  case Instruction::SExt:     return getSExt(C, Ty);
+  case Instruction::FPTrunc:  return getFPTrunc(C, Ty);
+  case Instruction::FPExt:    return getFPExtend(C, Ty);
+  case Instruction::UIToFP:   return getUIToFP(C, Ty);
+  case Instruction::SIToFP:   return getSIToFP(C, Ty);
+  case Instruction::FPToUI:   return getFPToUI(C, Ty);
+  case Instruction::FPToSI:   return getFPToSI(C, Ty);
+  case Instruction::PtrToInt: return getPtrToInt(C, Ty);
+  case Instruction::IntToPtr: return getIntToPtr(C, Ty);
+  case Instruction::BitCast:  return getBitCast(C, Ty);
+  }
+  return 0;
+} 
+
+Constant *ConstantExpr::getZExtOrBitCast(Constant *C, const Type *Ty) {
+  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return getBitCast(C, Ty);
+  return getZExt(C, Ty);
+}
+
+Constant *ConstantExpr::getSExtOrBitCast(Constant *C, const Type *Ty) {
+  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return getBitCast(C, Ty);
+  return getSExt(C, Ty);
+}
+
+Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) {
+  if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return getBitCast(C, Ty);
+  return getTrunc(C, Ty);
+}
+
+Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
+  assert(S->getType()->isPointerTy() && "Invalid cast");
+  assert((Ty->isIntegerTy() || Ty->isPointerTy()) && "Invalid cast");
+
+  if (Ty->isIntegerTy())
+    return getPtrToInt(S, Ty);
+  return getBitCast(S, Ty);
+}
+
+Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty, 
+                                       bool isSigned) {
+  assert(C->getType()->isIntOrIntVectorTy() &&
+         Ty->isIntOrIntVectorTy() && "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  Instruction::CastOps opcode =
+    (SrcBits == DstBits ? Instruction::BitCast :
+     (SrcBits > DstBits ? Instruction::Trunc :
+      (isSigned ? Instruction::SExt : Instruction::ZExt)));
+  return getCast(opcode, C, Ty);
+}
+
+Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+         "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  if (SrcBits == DstBits)
+    return C; // Avoid a useless cast
+  Instruction::CastOps opcode =
+    (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt);
+  return getCast(opcode, C, Ty);
+}
+
+Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVectorTy() && "Trunc operand must be integer");
+  assert(Ty->isIntOrIntVectorTy() && "Trunc produces only integral");
+  assert(C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
+         "SrcTy must be larger than DestTy for Trunc!");
+
+  return getFoldedCast(Instruction::Trunc, C, Ty);
+}
+
+Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVectorTy() && "SExt operand must be integral");
+  assert(Ty->isIntOrIntVectorTy() && "SExt produces only integer");
+  assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
+         "SrcTy must be smaller than DestTy for SExt!");
+
+  return getFoldedCast(Instruction::SExt, C, Ty);
+}
+
+Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVectorTy() && "ZEXt operand must be integral");
+  assert(Ty->isIntOrIntVectorTy() && "ZExt produces only integer");
+  assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
+         "SrcTy must be smaller than DestTy for ZExt!");
+
+  return getFoldedCast(Instruction::ZExt, C, Ty);
+}
+
+Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+         C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
+         "This is an illegal floating point truncation!");
+  return getFoldedCast(Instruction::FPTrunc, C, Ty);
+}
+
+Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+         C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
+         "This is an illegal floating point extension!");
+  return getFoldedCast(Instruction::FPExt, C, Ty);
+}
+
+Constant *ConstantExpr::getUIToFP(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
+         "This is an illegal uint to floating point cast!");
+  return getFoldedCast(Instruction::UIToFP, C, Ty);
+}
+
+Constant *ConstantExpr::getSIToFP(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isFPOrFPVectorTy() &&
+         "This is an illegal sint to floating point cast!");
+  return getFoldedCast(Instruction::SIToFP, C, Ty);
+}
+
+Constant *ConstantExpr::getFPToUI(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
+         "This is an illegal floating point to uint cast!");
+  return getFoldedCast(Instruction::FPToUI, C, Ty);
+}
+
+Constant *ConstantExpr::getFPToSI(Constant *C, const Type *Ty) {
+#ifndef NDEBUG
+  bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+  bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+  assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isIntOrIntVectorTy() &&
+         "This is an illegal floating point to sint cast!");
+  return getFoldedCast(Instruction::FPToSI, C, Ty);
+}
+
+Constant *ConstantExpr::getPtrToInt(Constant *C, const Type *DstTy) {
+  assert(C->getType()->isPointerTy() && "PtrToInt source must be pointer");
+  assert(DstTy->isIntegerTy() && "PtrToInt destination must be integral");
+  return getFoldedCast(Instruction::PtrToInt, C, DstTy);
+}
+
+Constant *ConstantExpr::getIntToPtr(Constant *C, const Type *DstTy) {
+  assert(C->getType()->isIntegerTy() && "IntToPtr source must be integral");
+  assert(DstTy->isPointerTy() && "IntToPtr destination must be a pointer");
+  return getFoldedCast(Instruction::IntToPtr, C, DstTy);
+}
+
+Constant *ConstantExpr::getBitCast(Constant *C, const Type *DstTy) {
+  assert(CastInst::castIsValid(Instruction::BitCast, C, DstTy) &&
+         "Invalid constantexpr bitcast!");
+  
+  // It is common to ask for a bitcast of a value to its own type, handle this
+  // speedily.
+  if (C->getType() == DstTy) return C;
+  
+  return getFoldedCast(Instruction::BitCast, C, DstTy);
+}
+
+Constant *ConstantExpr::getTy(const Type *ReqTy, unsigned Opcode,
+                              Constant *C1, Constant *C2,
+                              unsigned Flags) {
+  // Check the operands for consistency first
+  assert(Opcode >= Instruction::BinaryOpsBegin &&
+         Opcode <  Instruction::BinaryOpsEnd   &&
+         "Invalid opcode in binary constant expression");
+  assert(C1->getType() == C2->getType() &&
+         "Operand types in binary constant expression should match");
+
+  if (ReqTy == C1->getType() || ReqTy == Type::getInt1Ty(ReqTy->getContext()))
+    if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
+      return FC;          // Fold a few common cases...
+
+  std::vector<Constant*> argVec(1, C1); argVec.push_back(C2);
+  ExprMapKeyType Key(Opcode, argVec, 0, Flags);
+  
+  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+}
+
+Constant *ConstantExpr::getCompareTy(unsigned short predicate,
+                                     Constant *C1, Constant *C2) {
+  switch (predicate) {
+    default: llvm_unreachable("Invalid CmpInst predicate");
+    case CmpInst::FCMP_FALSE: case CmpInst::FCMP_OEQ: case CmpInst::FCMP_OGT:
+    case CmpInst::FCMP_OGE:   case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE:
+    case CmpInst::FCMP_ONE:   case CmpInst::FCMP_ORD: case CmpInst::FCMP_UNO:
+    case CmpInst::FCMP_UEQ:   case CmpInst::FCMP_UGT: case CmpInst::FCMP_UGE:
+    case CmpInst::FCMP_ULT:   case CmpInst::FCMP_ULE: case CmpInst::FCMP_UNE:
+    case CmpInst::FCMP_TRUE:
+      return getFCmp(predicate, C1, C2);
+
+    case CmpInst::ICMP_EQ:  case CmpInst::ICMP_NE:  case CmpInst::ICMP_UGT:
+    case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE:
+    case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: case CmpInst::ICMP_SLT:
+    case CmpInst::ICMP_SLE:
+      return getICmp(predicate, C1, C2);
+  }
+}
+
+Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
+                            unsigned Flags) {
+#ifndef NDEBUG
+  switch (Opcode) {
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isIntOrIntVectorTy() &&
+           "Tried to create an integer operation on a non-integer type!");
+    break;
+  case Instruction::FAdd:
+  case Instruction::FSub:
+  case Instruction::FMul:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isFPOrFPVectorTy() &&
+           "Tried to create a floating-point operation on a "
+           "non-floating-point type!");
+    break;
+  case Instruction::UDiv: 
+  case Instruction::SDiv: 
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isIntOrIntVectorTy() &&
+           "Tried to create an arithmetic operation on a non-arithmetic type!");
+    break;
+  case Instruction::FDiv:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isFPOrFPVectorTy() &&
+           "Tried to create an arithmetic operation on a non-arithmetic type!");
+    break;
+  case Instruction::URem: 
+  case Instruction::SRem: 
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isIntOrIntVectorTy() &&
+           "Tried to create an arithmetic operation on a non-arithmetic type!");
+    break;
+  case Instruction::FRem:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isFPOrFPVectorTy() &&
+           "Tried to create an arithmetic operation on a non-arithmetic type!");
+    break;
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isIntOrIntVectorTy() &&
+           "Tried to create a logical operation on a non-integral type!");
+    break;
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    assert(C1->getType() == C2->getType() && "Op types should be identical!");
+    assert(C1->getType()->isIntOrIntVectorTy() &&
+           "Tried to create a shift operation on a non-integer type!");
+    break;
+  default:
+    break;
+  }
+#endif
+
+  return getTy(C1->getType(), Opcode, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getSizeOf(const Type* Ty) {
+  // sizeof is implemented as: (i64) gep (Ty*)null, 1
+  // Note that a non-inbounds gep is used, as null isn't within any object.
+  Constant *GEPIdx = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
+  Constant *GEP = getGetElementPtr(
+                 Constant::getNullValue(PointerType::getUnqual(Ty)), &GEPIdx, 1);
+  return getPtrToInt(GEP, 
+                     Type::getInt64Ty(Ty->getContext()));
+}
+
+Constant *ConstantExpr::getAlignOf(const Type* Ty) {
+  // alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1
+  // Note that a non-inbounds gep is used, as null isn't within any object.
+  const Type *AligningTy = StructType::get(Ty->getContext(),
+                                   Type::getInt1Ty(Ty->getContext()), Ty, NULL);
+  Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo());
+  Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
+  Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
+  Constant *Indices[2] = { Zero, One };
+  Constant *GEP = getGetElementPtr(NullPtr, Indices, 2);
+  return getPtrToInt(GEP,
+                     Type::getInt64Ty(Ty->getContext()));
+}
+
+Constant *ConstantExpr::getOffsetOf(const StructType* STy, unsigned FieldNo) {
+  return getOffsetOf(STy, ConstantInt::get(Type::getInt32Ty(STy->getContext()),
+                                           FieldNo));
+}
+
+Constant *ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) {
+  // offsetof is implemented as: (i64) gep (Ty*)null, 0, FieldNo
+  // Note that a non-inbounds gep is used, as null isn't within any object.
+  Constant *GEPIdx[] = {
+    ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0),
+    FieldNo
+  };
+  Constant *GEP = getGetElementPtr(
+                Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx, 2);
+  return getPtrToInt(GEP,
+                     Type::getInt64Ty(Ty->getContext()));
+}
+
+Constant *ConstantExpr::getCompare(unsigned short pred, 
+                            Constant *C1, Constant *C2) {
+  assert(C1->getType() == C2->getType() && "Op types should be identical!");
+  return getCompareTy(pred, C1, C2);
+}
+
+Constant *ConstantExpr::getSelectTy(const Type *ReqTy, Constant *C,
+                                    Constant *V1, Constant *V2) {
+  assert(!SelectInst::areInvalidOperands(C, V1, V2)&&"Invalid select operands");
+
+  if (ReqTy == V1->getType())
+    if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2))
+      return SC;        // Fold common cases
+
+  std::vector<Constant*> argVec(3, C);
+  argVec[1] = V1;
+  argVec[2] = V2;
+  ExprMapKeyType Key(Instruction::Select, argVec);
+  
+  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+}
+
+template<typename IndexTy>
+Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
+                                           IndexTy const *Idxs,
+                                           unsigned NumIdx, bool InBounds) {
+  assert(GetElementPtrInst::getIndexedType(C->getType(), Idxs,
+                                           Idxs+NumIdx) ==
+         cast<PointerType>(ReqTy)->getElementType() &&
+         "GEP indices invalid!");
+
+  if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs, NumIdx))
+    return FC;          // Fold a few common cases.
+
+  assert(C->getType()->isPointerTy() &&
+         "Non-pointer type for constant GetElementPtr expression");
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> ArgVec;
+  ArgVec.reserve(NumIdx+1);
+  ArgVec.push_back(C);
+  for (unsigned i = 0; i != NumIdx; ++i)
+    ArgVec.push_back(cast<Constant>(Idxs[i]));
+  const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
+                           InBounds ? GEPOperator::IsInBounds : 0);
+
+  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+}
+
+template<typename IndexTy>
+Constant *ConstantExpr::getGetElementPtrImpl(Constant *C, IndexTy const *Idxs,
+                                             unsigned NumIdx, bool InBounds) {
+  // Get the result type of the getelementptr!
+  const Type *Ty = 
+    GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx);
+  assert(Ty && "GEP indices invalid!");
+  unsigned As = cast<PointerType>(C->getType())->getAddressSpace();
+  return getGetElementPtrTy(PointerType::get(Ty, As), C, Idxs, NumIdx,InBounds);
+}
+
+Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs,
+                                         unsigned NumIdx, bool InBounds) {
+  return getGetElementPtrImpl(C, Idxs, NumIdx, InBounds);
+}
+
+Constant *ConstantExpr::getGetElementPtr(Constant *C, Constant *const *Idxs,
+                                         unsigned NumIdx, bool InBounds) {
+  return getGetElementPtrImpl(C, Idxs, NumIdx, InBounds);
+}
+
+Constant *
+ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) {
+  assert(LHS->getType() == RHS->getType());
+  assert(pred >= ICmpInst::FIRST_ICMP_PREDICATE && 
+         pred <= ICmpInst::LAST_ICMP_PREDICATE && "Invalid ICmp Predicate");
+
+  if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
+    return FC;          // Fold a few common cases...
+
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> ArgVec;
+  ArgVec.push_back(LHS);
+  ArgVec.push_back(RHS);
+  // Get the key type with both the opcode and predicate
+  const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred);
+
+  const Type *ResultTy = Type::getInt1Ty(LHS->getContext());
+  if (const VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
+    ResultTy = VectorType::get(ResultTy, VT->getNumElements());
+
+  LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ResultTy, Key);
+}
+
+Constant *
+ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) {
+  assert(LHS->getType() == RHS->getType());
+  assert(pred <= FCmpInst::LAST_FCMP_PREDICATE && "Invalid FCmp Predicate");
+
+  if (Constant *FC = ConstantFoldCompareInstruction(pred, LHS, RHS))
+    return FC;          // Fold a few common cases...
+
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> ArgVec;
+  ArgVec.push_back(LHS);
+  ArgVec.push_back(RHS);
+  // Get the key type with both the opcode and predicate
+  const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred);
+
+  const Type *ResultTy = Type::getInt1Ty(LHS->getContext());
+  if (const VectorType *VT = dyn_cast<VectorType>(LHS->getType()))
+    ResultTy = VectorType::get(ResultTy, VT->getNumElements());
+
+  LLVMContextImpl *pImpl = LHS->getType()->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ResultTy, Key);
+}
+
+Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val,
+                                            Constant *Idx) {
+  if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx))
+    return FC;          // Fold a few common cases.
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> ArgVec(1, Val);
+  ArgVec.push_back(Idx);
+  const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec);
+  
+  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+}
+
+Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
+  assert(Val->getType()->isVectorTy() &&
+         "Tried to create extractelement operation on non-vector type!");
+  assert(Idx->getType()->isIntegerTy(32) &&
+         "Extractelement index must be i32 type!");
+  return getExtractElementTy(cast<VectorType>(Val->getType())->getElementType(),
+                             Val, Idx);
+}
+
+Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val,
+                                           Constant *Elt, Constant *Idx) {
+  if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))
+    return FC;          // Fold a few common cases.
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> ArgVec(1, Val);
+  ArgVec.push_back(Elt);
+  ArgVec.push_back(Idx);
+  const ExprMapKeyType Key(Instruction::InsertElement,ArgVec);
+  
+  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+}
+
+Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt, 
+                                         Constant *Idx) {
+  assert(Val->getType()->isVectorTy() &&
+         "Tried to create insertelement operation on non-vector type!");
+  assert(Elt->getType() == cast<VectorType>(Val->getType())->getElementType()
+         && "Insertelement types must match!");
+  assert(Idx->getType()->isIntegerTy(32) &&
+         "Insertelement index must be i32 type!");
+  return getInsertElementTy(Val->getType(), Val, Elt, Idx);
+}
+
+Constant *ConstantExpr::getShuffleVectorTy(const Type *ReqTy, Constant *V1,
+                                           Constant *V2, Constant *Mask) {
+  if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask))
+    return FC;          // Fold a few common cases...
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> ArgVec(1, V1);
+  ArgVec.push_back(V2);
+  ArgVec.push_back(Mask);
+  const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec);
+  
+  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+}
+
+Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2, 
+                                         Constant *Mask) {
+  assert(ShuffleVectorInst::isValidOperands(V1, V2, Mask) &&
+         "Invalid shuffle vector constant expr operands!");
+
+  unsigned NElts = cast<VectorType>(Mask->getType())->getNumElements();
+  const Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
+  const Type *ShufTy = VectorType::get(EltTy, NElts);
+  return getShuffleVectorTy(ShufTy, V1, V2, Mask);
+}
+
+Constant *ConstantExpr::getInsertValueTy(const Type *ReqTy, Constant *Agg,
+                                         Constant *Val,
+                                        const unsigned *Idxs, unsigned NumIdx) {
+  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs,
+                                          Idxs+NumIdx) == Val->getType() &&
+         "insertvalue indices invalid!");
+  assert(Agg->getType() == ReqTy &&
+         "insertvalue type invalid!");
+  assert(Agg->getType()->isFirstClassType() &&
+         "Non-first-class type for constant InsertValue expression");
+  Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs, NumIdx);
+  assert(FC && "InsertValue constant expr couldn't be folded!");
+  return FC;
+}
+
+Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val,
+                                     const unsigned *IdxList, unsigned NumIdx) {
+  assert(Agg->getType()->isFirstClassType() &&
+         "Tried to create insertelement operation on non-first-class type!");
+
+  const Type *ReqTy = Agg->getType();
+#ifndef NDEBUG
+  const Type *ValTy =
+    ExtractValueInst::getIndexedType(Agg->getType(), IdxList, IdxList+NumIdx);
+#endif
+  assert(ValTy == Val->getType() && "insertvalue indices invalid!");
+  return getInsertValueTy(ReqTy, Agg, Val, IdxList, NumIdx);
+}
+
+Constant *ConstantExpr::getExtractValueTy(const Type *ReqTy, Constant *Agg,
+                                        const unsigned *Idxs, unsigned NumIdx) {
+  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs,
+                                          Idxs+NumIdx) == ReqTy &&
+         "extractvalue indices invalid!");
+  assert(Agg->getType()->isFirstClassType() &&
+         "Non-first-class type for constant extractvalue expression");
+  Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs, NumIdx);
+  assert(FC && "ExtractValue constant expr couldn't be folded!");
+  return FC;
+}
+
+Constant *ConstantExpr::getExtractValue(Constant *Agg,
+                                     const unsigned *IdxList, unsigned NumIdx) {
+  assert(Agg->getType()->isFirstClassType() &&
+         "Tried to create extractelement operation on non-first-class type!");
+
+  const Type *ReqTy =
+    ExtractValueInst::getIndexedType(Agg->getType(), IdxList, IdxList+NumIdx);
+  assert(ReqTy && "extractvalue indices invalid!");
+  return getExtractValueTy(ReqTy, Agg, IdxList, NumIdx);
+}
+
+Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) {
+  assert(C->getType()->isIntOrIntVectorTy() &&
+         "Cannot NEG a nonintegral value!");
+  return getSub(ConstantFP::getZeroValueForNegation(C->getType()),
+                C, HasNUW, HasNSW);
+}
+
+Constant *ConstantExpr::getFNeg(Constant *C) {
+  assert(C->getType()->isFPOrFPVectorTy() &&
+         "Cannot FNEG a non-floating-point value!");
+  return getFSub(ConstantFP::getZeroValueForNegation(C->getType()), C);
+}
+
+Constant *ConstantExpr::getNot(Constant *C) {
+  assert(C->getType()->isIntOrIntVectorTy() &&
+         "Cannot NOT a nonintegral value!");
+  return get(Instruction::Xor, C, Constant::getAllOnesValue(C->getType()));
+}
+
+Constant *ConstantExpr::getAdd(Constant *C1, Constant *C2,
+                               bool HasNUW, bool HasNSW) {
+  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
+  return get(Instruction::Add, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getFAdd(Constant *C1, Constant *C2) {
+  return get(Instruction::FAdd, C1, C2);
+}
+
+Constant *ConstantExpr::getSub(Constant *C1, Constant *C2,
+                               bool HasNUW, bool HasNSW) {
+  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
+  return get(Instruction::Sub, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getFSub(Constant *C1, Constant *C2) {
+  return get(Instruction::FSub, C1, C2);
+}
+
+Constant *ConstantExpr::getMul(Constant *C1, Constant *C2,
+                               bool HasNUW, bool HasNSW) {
+  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
+  return get(Instruction::Mul, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getFMul(Constant *C1, Constant *C2) {
+  return get(Instruction::FMul, C1, C2);
+}
+
+Constant *ConstantExpr::getUDiv(Constant *C1, Constant *C2, bool isExact) {
+  return get(Instruction::UDiv, C1, C2,
+             isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+Constant *ConstantExpr::getSDiv(Constant *C1, Constant *C2, bool isExact) {
+  return get(Instruction::SDiv, C1, C2,
+             isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+Constant *ConstantExpr::getFDiv(Constant *C1, Constant *C2) {
+  return get(Instruction::FDiv, C1, C2);
+}
+
+Constant *ConstantExpr::getURem(Constant *C1, Constant *C2) {
+  return get(Instruction::URem, C1, C2);
+}
+
+Constant *ConstantExpr::getSRem(Constant *C1, Constant *C2) {
+  return get(Instruction::SRem, C1, C2);
+}
+
+Constant *ConstantExpr::getFRem(Constant *C1, Constant *C2) {
+  return get(Instruction::FRem, C1, C2);
+}
+
+Constant *ConstantExpr::getAnd(Constant *C1, Constant *C2) {
+  return get(Instruction::And, C1, C2);
+}
+
+Constant *ConstantExpr::getOr(Constant *C1, Constant *C2) {
+  return get(Instruction::Or, C1, C2);
+}
+
+Constant *ConstantExpr::getXor(Constant *C1, Constant *C2) {
+  return get(Instruction::Xor, C1, C2);
+}
+
+Constant *ConstantExpr::getShl(Constant *C1, Constant *C2,
+                               bool HasNUW, bool HasNSW) {
+  unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
+                   (HasNSW ? OverflowingBinaryOperator::NoSignedWrap   : 0);
+  return get(Instruction::Shl, C1, C2, Flags);
+}
+
+Constant *ConstantExpr::getLShr(Constant *C1, Constant *C2, bool isExact) {
+  return get(Instruction::LShr, C1, C2,
+             isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2, bool isExact) {
+  return get(Instruction::AShr, C1, C2,
+             isExact ? PossiblyExactOperator::IsExact : 0);
+}
+
+// destroyConstant - Remove the constant from the constant table...
+//
+void ConstantExpr::destroyConstant() {
+  getRawType()->getContext().pImpl->ExprConstants.remove(this);
+  destroyConstantImpl();
+}
+
+const char *ConstantExpr::getOpcodeName() const {
+  return Instruction::getOpcodeName(getOpcode());
+}
+
+
+
+GetElementPtrConstantExpr::
+GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
+                          const Type *DestTy)
+  : ConstantExpr(DestTy, Instruction::GetElementPtr,
+                 OperandTraits<GetElementPtrConstantExpr>::op_end(this)
+                 - (IdxList.size()+1), IdxList.size()+1) {
+  OperandList[0] = C;
+  for (unsigned i = 0, E = IdxList.size(); i != E; ++i)
+    OperandList[i+1] = IdxList[i];
+}
+
+
+//===----------------------------------------------------------------------===//
+//                replaceUsesOfWithOnConstant implementations
+
+/// replaceUsesOfWithOnConstant - Update this constant array to change uses of
+/// 'From' to be uses of 'To'.  This must update the uniquing data structures
+/// etc.
+///
+/// Note that we intentionally replace all uses of From with To here.  Consider
+/// a large array that uses 'From' 1000 times.  By handling this case all here,
+/// ConstantArray::replaceUsesOfWithOnConstant is only invoked once, and that
+/// single invocation handles all 1000 uses.  Handling them one at a time would
+/// work, but would be really slow because it would have to unique each updated
+/// array instance.
+///
+void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
+                                                Use *U) {
+  assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+  Constant *ToC = cast<Constant>(To);
+
+  LLVMContextImpl *pImpl = getRawType()->getContext().pImpl;
+
+  std::pair<LLVMContextImpl::ArrayConstantsTy::MapKey, ConstantArray*> Lookup;
+  Lookup.first.first = cast<ArrayType>(getRawType());
+  Lookup.second = this;
+
+  std::vector<Constant*> &Values = Lookup.first.second;
+  Values.reserve(getNumOperands());  // Build replacement array.
+
+  // Fill values with the modified operands of the constant array.  Also, 
+  // compute whether this turns into an all-zeros array.
+  bool isAllZeros = false;
+  unsigned NumUpdated = 0;
+  if (!ToC->isNullValue()) {
+    for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+      Constant *Val = cast<Constant>(O->get());
+      if (Val == From) {
+        Val = ToC;
+        ++NumUpdated;
+      }
+      Values.push_back(Val);
+    }
+  } else {
+    isAllZeros = true;
+    for (Use *O = OperandList, *E = OperandList+getNumOperands();O != E; ++O) {
+      Constant *Val = cast<Constant>(O->get());
+      if (Val == From) {
+        Val = ToC;
+        ++NumUpdated;
+      }
+      Values.push_back(Val);
+      if (isAllZeros) isAllZeros = Val->isNullValue();
+    }
+  }
+  
+  Constant *Replacement = 0;
+  if (isAllZeros) {
+    Replacement = ConstantAggregateZero::get(getRawType());
+  } else {
+    // Check to see if we have this array type already.
+    bool Exists;
+    LLVMContextImpl::ArrayConstantsTy::MapTy::iterator I =
+      pImpl->ArrayConstants.InsertOrGetItem(Lookup, Exists);
+    
+    if (Exists) {
+      Replacement = I->second;
+    } else {
+      // Okay, the new shape doesn't exist in the system yet.  Instead of
+      // creating a new constant array, inserting it, replaceallusesof'ing the
+      // old with the new, then deleting the old... just update the current one
+      // in place!
+      pImpl->ArrayConstants.MoveConstantToNewSlot(this, I);
+      
+      // Update to the new value.  Optimize for the case when we have a single
+      // operand that we're changing, but handle bulk updates efficiently.
+      if (NumUpdated == 1) {
+        unsigned OperandToUpdate = U - OperandList;
+        assert(getOperand(OperandToUpdate) == From &&
+               "ReplaceAllUsesWith broken!");
+        setOperand(OperandToUpdate, ToC);
+      } else {
+        for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+          if (getOperand(i) == From)
+            setOperand(i, ToC);
+      }
+      return;
+    }
+  }
+ 
+  // Otherwise, I do need to replace this with an existing value.
+  assert(Replacement != this && "I didn't contain From!");
+  
+  // Everyone using this now uses the replacement.
+  uncheckedReplaceAllUsesWith(Replacement);
+  
+  // Delete the old constant!
+  destroyConstant();
+}
+
+void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
+                                                 Use *U) {
+  assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+  Constant *ToC = cast<Constant>(To);
+
+  unsigned OperandToUpdate = U-OperandList;
+  assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!");
+
+  std::pair<LLVMContextImpl::StructConstantsTy::MapKey, ConstantStruct*> Lookup;
+  Lookup.first.first = cast<StructType>(getRawType());
+  Lookup.second = this;
+  std::vector<Constant*> &Values = Lookup.first.second;
+  Values.reserve(getNumOperands());  // Build replacement struct.
+  
+  
+  // Fill values with the modified operands of the constant struct.  Also, 
+  // compute whether this turns into an all-zeros struct.
+  bool isAllZeros = false;
+  if (!ToC->isNullValue()) {
+    for (Use *O = OperandList, *E = OperandList + getNumOperands(); O != E; ++O)
+      Values.push_back(cast<Constant>(O->get()));
+  } else {
+    isAllZeros = true;
+    for (Use *O = OperandList, *E = OperandList+getNumOperands(); O != E; ++O) {
+      Constant *Val = cast<Constant>(O->get());
+      Values.push_back(Val);
+      if (isAllZeros) isAllZeros = Val->isNullValue();
+    }
+  }
+  Values[OperandToUpdate] = ToC;
+  
+  LLVMContextImpl *pImpl = getRawType()->getContext().pImpl;
+  
+  Constant *Replacement = 0;
+  if (isAllZeros) {
+    Replacement = ConstantAggregateZero::get(getRawType());
+  } else {
+    // Check to see if we have this struct type already.
+    bool Exists;
+    LLVMContextImpl::StructConstantsTy::MapTy::iterator I =
+      pImpl->StructConstants.InsertOrGetItem(Lookup, Exists);
+    
+    if (Exists) {
+      Replacement = I->second;
+    } else {
+      // Okay, the new shape doesn't exist in the system yet.  Instead of
+      // creating a new constant struct, inserting it, replaceallusesof'ing the
+      // old with the new, then deleting the old... just update the current one
+      // in place!
+      pImpl->StructConstants.MoveConstantToNewSlot(this, I);
+      
+      // Update to the new value.
+      setOperand(OperandToUpdate, ToC);
+      return;
+    }
+  }
+  
+  assert(Replacement != this && "I didn't contain From!");
+  
+  // Everyone using this now uses the replacement.
+  uncheckedReplaceAllUsesWith(Replacement);
+  
+  // Delete the old constant!
+  destroyConstant();
+}
+
+void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
+                                                 Use *U) {
+  assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+  
+  std::vector<Constant*> Values;
+  Values.reserve(getNumOperands());  // Build replacement array...
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    Constant *Val = getOperand(i);
+    if (Val == From) Val = cast<Constant>(To);
+    Values.push_back(Val);
+  }
+  
+  Constant *Replacement = get(cast<VectorType>(getRawType()), Values);
+  assert(Replacement != this && "I didn't contain From!");
+  
+  // Everyone using this now uses the replacement.
+  uncheckedReplaceAllUsesWith(Replacement);
+  
+  // Delete the old constant!
+  destroyConstant();
+}
+
+void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
+                                               Use *U) {
+  assert(isa<Constant>(ToV) && "Cannot make Constant refer to non-constant!");
+  Constant *To = cast<Constant>(ToV);
+  
+  Constant *Replacement = 0;
+  if (getOpcode() == Instruction::GetElementPtr) {
+    SmallVector<Constant*, 8> Indices;
+    Constant *Pointer = getOperand(0);
+    Indices.reserve(getNumOperands()-1);
+    if (Pointer == From) Pointer = To;
+    
+    for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+      Constant *Val = getOperand(i);
+      if (Val == From) Val = To;
+      Indices.push_back(Val);
+    }
+    Replacement = ConstantExpr::getGetElementPtr(Pointer,
+                                                 &Indices[0], Indices.size(),
+                                         cast<GEPOperator>(this)->isInBounds());
+  } else if (getOpcode() == Instruction::ExtractValue) {
+    Constant *Agg = getOperand(0);
+    if (Agg == From) Agg = To;
+    
+    const SmallVector<unsigned, 4> &Indices = getIndices();
+    Replacement = ConstantExpr::getExtractValue(Agg,
+                                                &Indices[0], Indices.size());
+  } else if (getOpcode() == Instruction::InsertValue) {
+    Constant *Agg = getOperand(0);
+    Constant *Val = getOperand(1);
+    if (Agg == From) Agg = To;
+    if (Val == From) Val = To;
+    
+    const SmallVector<unsigned, 4> &Indices = getIndices();
+    Replacement = ConstantExpr::getInsertValue(Agg, Val,
+                                               &Indices[0], Indices.size());
+  } else if (isCast()) {
+    assert(getOperand(0) == From && "Cast only has one use!");
+    Replacement = ConstantExpr::getCast(getOpcode(), To, getRawType());
+  } else if (getOpcode() == Instruction::Select) {
+    Constant *C1 = getOperand(0);
+    Constant *C2 = getOperand(1);
+    Constant *C3 = getOperand(2);
+    if (C1 == From) C1 = To;
+    if (C2 == From) C2 = To;
+    if (C3 == From) C3 = To;
+    Replacement = ConstantExpr::getSelect(C1, C2, C3);
+  } else if (getOpcode() == Instruction::ExtractElement) {
+    Constant *C1 = getOperand(0);
+    Constant *C2 = getOperand(1);
+    if (C1 == From) C1 = To;
+    if (C2 == From) C2 = To;
+    Replacement = ConstantExpr::getExtractElement(C1, C2);
+  } else if (getOpcode() == Instruction::InsertElement) {
+    Constant *C1 = getOperand(0);
+    Constant *C2 = getOperand(1);
+    Constant *C3 = getOperand(1);
+    if (C1 == From) C1 = To;
+    if (C2 == From) C2 = To;
+    if (C3 == From) C3 = To;
+    Replacement = ConstantExpr::getInsertElement(C1, C2, C3);
+  } else if (getOpcode() == Instruction::ShuffleVector) {
+    Constant *C1 = getOperand(0);
+    Constant *C2 = getOperand(1);
+    Constant *C3 = getOperand(2);
+    if (C1 == From) C1 = To;
+    if (C2 == From) C2 = To;
+    if (C3 == From) C3 = To;
+    Replacement = ConstantExpr::getShuffleVector(C1, C2, C3);
+  } else if (isCompare()) {
+    Constant *C1 = getOperand(0);
+    Constant *C2 = getOperand(1);
+    if (C1 == From) C1 = To;
+    if (C2 == From) C2 = To;
+    if (getOpcode() == Instruction::ICmp)
+      Replacement = ConstantExpr::getICmp(getPredicate(), C1, C2);
+    else {
+      assert(getOpcode() == Instruction::FCmp);
+      Replacement = ConstantExpr::getFCmp(getPredicate(), C1, C2);
+    }
+  } else if (getNumOperands() == 2) {
+    Constant *C1 = getOperand(0);
+    Constant *C2 = getOperand(1);
+    if (C1 == From) C1 = To;
+    if (C2 == From) C2 = To;
+    Replacement = ConstantExpr::get(getOpcode(), C1, C2, SubclassOptionalData);
+  } else {
+    llvm_unreachable("Unknown ConstantExpr type!");
+    return;
+  }
+  
+  assert(Replacement != this && "I didn't contain From!");
+  
+  // Everyone using this now uses the replacement.
+  uncheckedReplaceAllUsesWith(Replacement);
+  
+  // Delete the old constant!
+  destroyConstant();
+}
diff --git a/final/lib/VMCore/ConstantsContext.h b/final/lib/VMCore/ConstantsContext.h
new file mode 100644
index 00000000000..ffc673fac0d
--- /dev/null
+++ b/final/lib/VMCore/ConstantsContext.h
@@ -0,0 +1,845 @@
+//===-- ConstantsContext.h - Constants-related Context Interals -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines various helper methods and classes used by
+// LLVMContextImpl for creating and managing constants.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CONSTANTSCONTEXT_H
+#define LLVM_CONSTANTSCONTEXT_H
+
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
+#include "llvm/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+namespace llvm {
+template<class ValType>
+struct ConstantTraits;
+
+/// UnaryConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement unary constant exprs.
+class UnaryConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+  UnaryConstantExpr(unsigned Opcode, Constant *C, const Type *Ty)
+    : ConstantExpr(Ty, Opcode, &Op<0>(), 1) {
+    Op<0>() = C;
+  }
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// BinaryConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement binary constant exprs.
+class BinaryConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  BinaryConstantExpr(unsigned Opcode, Constant *C1, Constant *C2,
+                     unsigned Flags)
+    : ConstantExpr(C1->getType(), Opcode, &Op<0>(), 2) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+    SubclassOptionalData = Flags;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// SelectConstantExpr - This class is private to Constants.cpp, and is used
+/// behind the scenes to implement select constant exprs.
+class SelectConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  SelectConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+    : ConstantExpr(C2->getType(), Instruction::Select, &Op<0>(), 3) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+    Op<2>() = C3;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ExtractElementConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// extractelement constant exprs.
+class ExtractElementConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  ExtractElementConstantExpr(Constant *C1, Constant *C2)
+    : ConstantExpr(cast<VectorType>(C1->getType())->getElementType(), 
+                   Instruction::ExtractElement, &Op<0>(), 2) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// InsertElementConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// insertelement constant exprs.
+class InsertElementConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  InsertElementConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+    : ConstantExpr(C1->getType(), Instruction::InsertElement, 
+                   &Op<0>(), 3) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+    Op<2>() = C3;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ShuffleVectorConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// shufflevector constant exprs.
+class ShuffleVectorConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+  ShuffleVectorConstantExpr(Constant *C1, Constant *C2, Constant *C3)
+  : ConstantExpr(VectorType::get(
+                   cast<VectorType>(C1->getType())->getElementType(),
+                   cast<VectorType>(C3->getType())->getNumElements()),
+                 Instruction::ShuffleVector, 
+                 &Op<0>(), 3) {
+    Op<0>() = C1;
+    Op<1>() = C2;
+    Op<2>() = C3;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// ExtractValueConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// extractvalue constant exprs.
+class ExtractValueConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+  ExtractValueConstantExpr(Constant *Agg,
+                           const SmallVector<unsigned, 4> &IdxList,
+                           const Type *DestTy)
+    : ConstantExpr(DestTy, Instruction::ExtractValue, &Op<0>(), 1),
+      Indices(IdxList) {
+    Op<0>() = Agg;
+  }
+
+  /// Indices - These identify which value to extract.
+  const SmallVector<unsigned, 4> Indices;
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+/// InsertValueConstantExpr - This class is private to
+/// Constants.cpp, and is used behind the scenes to implement
+/// insertvalue constant exprs.
+class InsertValueConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  InsertValueConstantExpr(Constant *Agg, Constant *Val,
+                          const SmallVector<unsigned, 4> &IdxList,
+                          const Type *DestTy)
+    : ConstantExpr(DestTy, Instruction::InsertValue, &Op<0>(), 2),
+      Indices(IdxList) {
+    Op<0>() = Agg;
+    Op<1>() = Val;
+  }
+
+  /// Indices - These identify the position for the insertion.
+  const SmallVector<unsigned, 4> Indices;
+
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+
+/// GetElementPtrConstantExpr - This class is private to Constants.cpp, and is
+/// used behind the scenes to implement getelementpr constant exprs.
+class GetElementPtrConstantExpr : public ConstantExpr {
+  GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
+                            const Type *DestTy);
+public:
+  static GetElementPtrConstantExpr *Create(Constant *C,
+                                           const std::vector<Constant*>&IdxList,
+                                           const Type *DestTy,
+                                           unsigned Flags) {
+    GetElementPtrConstantExpr *Result =
+      new(IdxList.size() + 1) GetElementPtrConstantExpr(C, IdxList, DestTy);
+    Result->SubclassOptionalData = Flags;
+    return Result;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+// CompareConstantExpr - This class is private to Constants.cpp, and is used
+// behind the scenes to implement ICmp and FCmp constant expressions. This is
+// needed in order to store the predicate value for these instructions.
+struct CompareConstantExpr : public ConstantExpr {
+  void *operator new(size_t, unsigned);  // DO NOT IMPLEMENT
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+  unsigned short predicate;
+  CompareConstantExpr(const Type *ty, Instruction::OtherOps opc,
+                      unsigned short pred,  Constant* LHS, Constant* RHS)
+    : ConstantExpr(ty, opc, &Op<0>(), 2), predicate(pred) {
+    Op<0>() = LHS;
+    Op<1>() = RHS;
+  }
+  /// Transparently provide more efficient getOperand methods.
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+template <>
+struct OperandTraits<UnaryConstantExpr> :
+  public FixedNumOperandTraits<UnaryConstantExpr, 1> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryConstantExpr, Value)
+
+template <>
+struct OperandTraits<BinaryConstantExpr> :
+  public FixedNumOperandTraits<BinaryConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryConstantExpr, Value)
+
+template <>
+struct OperandTraits<SelectConstantExpr> :
+  public FixedNumOperandTraits<SelectConstantExpr, 3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectConstantExpr, Value)
+
+template <>
+struct OperandTraits<ExtractElementConstantExpr> :
+  public FixedNumOperandTraits<ExtractElementConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementConstantExpr, Value)
+
+template <>
+struct OperandTraits<InsertElementConstantExpr> :
+  public FixedNumOperandTraits<InsertElementConstantExpr, 3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementConstantExpr, Value)
+
+template <>
+struct OperandTraits<ShuffleVectorConstantExpr> :
+    public FixedNumOperandTraits<ShuffleVectorConstantExpr, 3> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorConstantExpr, Value)
+
+template <>
+struct OperandTraits<ExtractValueConstantExpr> :
+  public FixedNumOperandTraits<ExtractValueConstantExpr, 1> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractValueConstantExpr, Value)
+
+template <>
+struct OperandTraits<InsertValueConstantExpr> :
+  public FixedNumOperandTraits<InsertValueConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueConstantExpr, Value)
+
+template <>
+struct OperandTraits<GetElementPtrConstantExpr> :
+  public VariadicOperandTraits<GetElementPtrConstantExpr, 1> {
+};
+
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrConstantExpr, Value)
+
+
+template <>
+struct OperandTraits<CompareConstantExpr> :
+  public FixedNumOperandTraits<CompareConstantExpr, 2> {
+};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value)
+
+struct ExprMapKeyType {
+  typedef SmallVector<unsigned, 4> IndexList;
+
+  ExprMapKeyType(unsigned opc,
+      const std::vector<Constant*> &ops,
+      unsigned short flags = 0,
+      unsigned short optionalflags = 0,
+      const IndexList &inds = IndexList())
+        : opcode(opc), subclassoptionaldata(optionalflags), subclassdata(flags),
+        operands(ops), indices(inds) {}
+  uint8_t opcode;
+  uint8_t subclassoptionaldata;
+  uint16_t subclassdata;
+  std::vector<Constant*> operands;
+  IndexList indices;
+  bool operator==(const ExprMapKeyType& that) const {
+    return this->opcode == that.opcode &&
+           this->subclassdata == that.subclassdata &&
+           this->subclassoptionaldata == that.subclassoptionaldata &&
+           this->operands == that.operands &&
+           this->indices == that.indices;
+  }
+  bool operator<(const ExprMapKeyType & that) const {
+    if (this->opcode != that.opcode) return this->opcode < that.opcode;
+    if (this->operands != that.operands) return this->operands < that.operands;
+    if (this->subclassdata != that.subclassdata)
+      return this->subclassdata < that.subclassdata;
+    if (this->subclassoptionaldata != that.subclassoptionaldata)
+      return this->subclassoptionaldata < that.subclassoptionaldata;
+    if (this->indices != that.indices) return this->indices < that.indices;
+    return false;
+  }
+
+  bool operator!=(const ExprMapKeyType& that) const {
+    return !(*this == that);
+  }
+};
+
+struct InlineAsmKeyType {
+  InlineAsmKeyType(StringRef AsmString,
+                   StringRef Constraints, bool hasSideEffects,
+                   bool isAlignStack)
+    : asm_string(AsmString), constraints(Constraints),
+      has_side_effects(hasSideEffects), is_align_stack(isAlignStack) {}
+  std::string asm_string;
+  std::string constraints;
+  bool has_side_effects;
+  bool is_align_stack;
+  bool operator==(const InlineAsmKeyType& that) const {
+    return this->asm_string == that.asm_string &&
+           this->constraints == that.constraints &&
+           this->has_side_effects == that.has_side_effects &&
+           this->is_align_stack == that.is_align_stack;
+  }
+  bool operator<(const InlineAsmKeyType& that) const {
+    if (this->asm_string != that.asm_string)
+      return this->asm_string < that.asm_string;
+    if (this->constraints != that.constraints)
+      return this->constraints < that.constraints;
+    if (this->has_side_effects != that.has_side_effects)
+      return this->has_side_effects < that.has_side_effects;
+    if (this->is_align_stack != that.is_align_stack)
+      return this->is_align_stack < that.is_align_stack;
+    return false;
+  }
+
+  bool operator!=(const InlineAsmKeyType& that) const {
+    return !(*this == that);
+  }
+};
+
+// The number of operands for each ConstantCreator::create method is
+// determined by the ConstantTraits template.
+// ConstantCreator - A class that is used to create constants by
+// ConstantUniqueMap*.  This class should be partially specialized if there is
+// something strange that needs to be done to interface to the ctor for the
+// constant.
+//
+template<typename T, typename Alloc>
+struct ConstantTraits< std::vector<T, Alloc> > {
+  static unsigned uses(const std::vector<T, Alloc>& v) {
+    return v.size();
+  }
+};
+
+template<>
+struct ConstantTraits<Constant *> {
+  static unsigned uses(Constant * const & v) {
+    return 1;
+  }
+};
+
+template<class ConstantClass, class TypeClass, class ValType>
+struct ConstantCreator {
+  static ConstantClass *create(const TypeClass *Ty, const ValType &V) {
+    return new(ConstantTraits<ValType>::uses(V)) ConstantClass(Ty, V);
+  }
+};
+
+template<class ConstantClass>
+struct ConstantKeyData {
+  typedef void ValType;
+  static ValType getValType(ConstantClass *C) {
+    llvm_unreachable("Unknown Constant type!");
+  }
+};
+
+template<>
+struct ConstantCreator<ConstantExpr, Type, ExprMapKeyType> {
+  static ConstantExpr *create(const Type *Ty, const ExprMapKeyType &V,
+      unsigned short pred = 0) {
+    if (Instruction::isCast(V.opcode))
+      return new UnaryConstantExpr(V.opcode, V.operands[0], Ty);
+    if ((V.opcode >= Instruction::BinaryOpsBegin &&
+         V.opcode < Instruction::BinaryOpsEnd))
+      return new BinaryConstantExpr(V.opcode, V.operands[0], V.operands[1],
+                                    V.subclassoptionaldata);
+    if (V.opcode == Instruction::Select)
+      return new SelectConstantExpr(V.operands[0], V.operands[1], 
+                                    V.operands[2]);
+    if (V.opcode == Instruction::ExtractElement)
+      return new ExtractElementConstantExpr(V.operands[0], V.operands[1]);
+    if (V.opcode == Instruction::InsertElement)
+      return new InsertElementConstantExpr(V.operands[0], V.operands[1],
+                                           V.operands[2]);
+    if (V.opcode == Instruction::ShuffleVector)
+      return new ShuffleVectorConstantExpr(V.operands[0], V.operands[1],
+                                           V.operands[2]);
+    if (V.opcode == Instruction::InsertValue)
+      return new InsertValueConstantExpr(V.operands[0], V.operands[1],
+                                         V.indices, Ty);
+    if (V.opcode == Instruction::ExtractValue)
+      return new ExtractValueConstantExpr(V.operands[0], V.indices, Ty);
+    if (V.opcode == Instruction::GetElementPtr) {
+      std::vector<Constant*> IdxList(V.operands.begin()+1, V.operands.end());
+      return GetElementPtrConstantExpr::Create(V.operands[0], IdxList, Ty,
+                                               V.subclassoptionaldata);
+    }
+
+    // The compare instructions are weird. We have to encode the predicate
+    // value and it is combined with the instruction opcode by multiplying
+    // the opcode by one hundred. We must decode this to get the predicate.
+    if (V.opcode == Instruction::ICmp)
+      return new CompareConstantExpr(Ty, Instruction::ICmp, V.subclassdata,
+                                     V.operands[0], V.operands[1]);
+    if (V.opcode == Instruction::FCmp) 
+      return new CompareConstantExpr(Ty, Instruction::FCmp, V.subclassdata,
+                                     V.operands[0], V.operands[1]);
+    llvm_unreachable("Invalid ConstantExpr!");
+    return 0;
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantExpr> {
+  typedef ExprMapKeyType ValType;
+  static ValType getValType(ConstantExpr *CE) {
+    std::vector<Constant*> Operands;
+    Operands.reserve(CE->getNumOperands());
+    for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i)
+      Operands.push_back(cast<Constant>(CE->getOperand(i)));
+    return ExprMapKeyType(CE->getOpcode(), Operands,
+        CE->isCompare() ? CE->getPredicate() : 0,
+        CE->getRawSubclassOptionalData(),
+        CE->hasIndices() ?
+          CE->getIndices() : SmallVector<unsigned, 4>());
+  }
+};
+
+// ConstantAggregateZero does not take extra "value" argument...
+template<class ValType>
+struct ConstantCreator<ConstantAggregateZero, Type, ValType> {
+  static ConstantAggregateZero *create(const Type *Ty, const ValType &V){
+    return new ConstantAggregateZero(Ty);
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantVector> {
+  typedef std::vector<Constant*> ValType;
+  static ValType getValType(ConstantVector *CP) {
+    std::vector<Constant*> Elements;
+    Elements.reserve(CP->getNumOperands());
+    for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
+      Elements.push_back(CP->getOperand(i));
+    return Elements;
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantAggregateZero> {
+  typedef char ValType;
+  static ValType getValType(ConstantAggregateZero *C) {
+    return 0;
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantArray> {
+  typedef std::vector<Constant*> ValType;
+  static ValType getValType(ConstantArray *CA) {
+    std::vector<Constant*> Elements;
+    Elements.reserve(CA->getNumOperands());
+    for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i)
+      Elements.push_back(cast<Constant>(CA->getOperand(i)));
+    return Elements;
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantStruct> {
+  typedef std::vector<Constant*> ValType;
+  static ValType getValType(ConstantStruct *CS) {
+    std::vector<Constant*> Elements;
+    Elements.reserve(CS->getNumOperands());
+    for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i)
+      Elements.push_back(cast<Constant>(CS->getOperand(i)));
+    return Elements;
+  }
+};
+
+// ConstantPointerNull does not take extra "value" argument...
+template<class ValType>
+struct ConstantCreator<ConstantPointerNull, PointerType, ValType> {
+  static ConstantPointerNull *create(const PointerType *Ty, const ValType &V){
+    return new ConstantPointerNull(Ty);
+  }
+};
+
+template<>
+struct ConstantKeyData<ConstantPointerNull> {
+  typedef char ValType;
+  static ValType getValType(ConstantPointerNull *C) {
+    return 0;
+  }
+};
+
+// UndefValue does not take extra "value" argument...
+template<class ValType>
+struct ConstantCreator<UndefValue, Type, ValType> {
+  static UndefValue *create(const Type *Ty, const ValType &V) {
+    return new UndefValue(Ty);
+  }
+};
+
+template<>
+struct ConstantKeyData<UndefValue> {
+  typedef char ValType;
+  static ValType getValType(UndefValue *C) {
+    return 0;
+  }
+};
+
+template<>
+struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType> {
+  static InlineAsm *create(const PointerType *Ty, const InlineAsmKeyType &Key) {
+    return new InlineAsm(Ty, Key.asm_string, Key.constraints,
+                         Key.has_side_effects, Key.is_align_stack);
+  }
+};
+
+template<>
+struct ConstantKeyData<InlineAsm> {
+  typedef InlineAsmKeyType ValType;
+  static ValType getValType(InlineAsm *Asm) {
+    return InlineAsmKeyType(Asm->getAsmString(), Asm->getConstraintString(),
+                            Asm->hasSideEffects(), Asm->isAlignStack());
+  }
+};
+
+template<class ValType, class TypeClass, class ConstantClass,
+         bool HasLargeKey = false /*true for arrays and structs*/ >
+class ConstantUniqueMap : public AbstractTypeUser {
+public:
+  typedef std::pair<const TypeClass*, ValType> MapKey;
+  typedef std::map<MapKey, ConstantClass *> MapTy;
+  typedef std::map<ConstantClass *, typename MapTy::iterator> InverseMapTy;
+  typedef std::map<const DerivedType*, typename MapTy::iterator>
+    AbstractTypeMapTy;
+private:
+  /// Map - This is the main map from the element descriptor to the Constants.
+  /// This is the primary way we avoid creating two of the same shape
+  /// constant.
+  MapTy Map;
+    
+  /// InverseMap - If "HasLargeKey" is true, this contains an inverse mapping
+  /// from the constants to their element in Map.  This is important for
+  /// removal of constants from the array, which would otherwise have to scan
+  /// through the map with very large keys.
+  InverseMapTy InverseMap;
+
+  /// AbstractTypeMap - Map for abstract type constants.
+  ///
+  AbstractTypeMapTy AbstractTypeMap;
+    
+public:
+  typename MapTy::iterator map_begin() { return Map.begin(); }
+  typename MapTy::iterator map_end() { return Map.end(); }
+
+  void freeConstants() {
+    for (typename MapTy::iterator I=Map.begin(), E=Map.end();
+         I != E; ++I) {
+      // Asserts that use_empty().
+      delete I->second;
+    }
+  }
+    
+  /// InsertOrGetItem - Return an iterator for the specified element.
+  /// If the element exists in the map, the returned iterator points to the
+  /// entry and Exists=true.  If not, the iterator points to the newly
+  /// inserted entry and returns Exists=false.  Newly inserted entries have
+  /// I->second == 0, and should be filled in.
+  typename MapTy::iterator InsertOrGetItem(std::pair<MapKey, ConstantClass *>
+                                 &InsertVal,
+                                 bool &Exists) {
+    std::pair<typename MapTy::iterator, bool> IP = Map.insert(InsertVal);
+    Exists = !IP.second;
+    return IP.first;
+  }
+    
+private:
+  typename MapTy::iterator FindExistingElement(ConstantClass *CP) {
+    if (HasLargeKey) {
+      typename InverseMapTy::iterator IMI = InverseMap.find(CP);
+      assert(IMI != InverseMap.end() && IMI->second != Map.end() &&
+             IMI->second->second == CP &&
+             "InverseMap corrupt!");
+      return IMI->second;
+    }
+      
+    typename MapTy::iterator I =
+      Map.find(MapKey(static_cast<const TypeClass*>(CP->getRawType()),
+                      ConstantKeyData<ConstantClass>::getValType(CP)));
+    if (I == Map.end() || I->second != CP) {
+      // FIXME: This should not use a linear scan.  If this gets to be a
+      // performance problem, someone should look at this.
+      for (I = Map.begin(); I != Map.end() && I->second != CP; ++I)
+        /* empty */;
+    }
+    return I;
+  }
+    
+  void AddAbstractTypeUser(const Type *Ty, typename MapTy::iterator I) {
+    // If the type of the constant is abstract, make sure that an entry
+    // exists for it in the AbstractTypeMap.
+    if (Ty->isAbstract()) {
+      const DerivedType *DTy = static_cast<const DerivedType *>(Ty);
+      typename AbstractTypeMapTy::iterator TI = AbstractTypeMap.find(DTy);
+
+      if (TI == AbstractTypeMap.end()) {
+        // Add ourselves to the ATU list of the type.
+        cast<DerivedType>(DTy)->addAbstractTypeUser(this);
+
+        AbstractTypeMap.insert(TI, std::make_pair(DTy, I));
+      }
+    }
+  }
+
+  ConstantClass* Create(const TypeClass *Ty, const ValType &V,
+                        typename MapTy::iterator I) {
+    ConstantClass* Result =
+      ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
+
+    assert(Result->getType() == Ty && "Type specified is not correct!");
+    I = Map.insert(I, std::make_pair(MapKey(Ty, V), Result));
+
+    if (HasLargeKey)  // Remember the reverse mapping if needed.
+      InverseMap.insert(std::make_pair(Result, I));
+
+    AddAbstractTypeUser(Ty, I);
+      
+    return Result;
+  }
+public:
+    
+  /// getOrCreate - Return the specified constant from the map, creating it if
+  /// necessary.
+  ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) {
+    MapKey Lookup(Ty, V);
+    ConstantClass* Result = 0;
+    
+    typename MapTy::iterator I = Map.find(Lookup);
+    // Is it in the map?  
+    if (I != Map.end())
+      Result = I->second;
+        
+    if (!Result) {
+      // If no preexisting value, create one now...
+      Result = Create(Ty, V, I);
+    }
+        
+    return Result;
+  }
+
+  void UpdateAbstractTypeMap(const DerivedType *Ty,
+                             typename MapTy::iterator I) {
+    assert(AbstractTypeMap.count(Ty) &&
+           "Abstract type not in AbstractTypeMap?");
+    typename MapTy::iterator &ATMEntryIt = AbstractTypeMap[Ty];
+    if (ATMEntryIt == I) {
+      // Yes, we are removing the representative entry for this type.
+      // See if there are any other entries of the same type.
+      typename MapTy::iterator TmpIt = ATMEntryIt;
+
+      // First check the entry before this one...
+      if (TmpIt != Map.begin()) {
+        --TmpIt;
+        if (TmpIt->first.first != Ty) // Not the same type, move back...
+          ++TmpIt;
+      }
+
+      // If we didn't find the same type, try to move forward...
+      if (TmpIt == ATMEntryIt) {
+        ++TmpIt;
+        if (TmpIt == Map.end() || TmpIt->first.first != Ty)
+          --TmpIt;   // No entry afterwards with the same type
+      }
+
+      // If there is another entry in the map of the same abstract type,
+      // update the AbstractTypeMap entry now.
+      if (TmpIt != ATMEntryIt) {
+        ATMEntryIt = TmpIt;
+      } else {
+        // Otherwise, we are removing the last instance of this type
+        // from the table.  Remove from the ATM, and from user list.
+        cast<DerivedType>(Ty)->removeAbstractTypeUser(this);
+        AbstractTypeMap.erase(Ty);
+      }
+    }
+  }
+
+  void remove(ConstantClass *CP) {
+    typename MapTy::iterator I = FindExistingElement(CP);
+    assert(I != Map.end() && "Constant not found in constant table!");
+    assert(I->second == CP && "Didn't find correct element?");
+
+    if (HasLargeKey)  // Remember the reverse mapping if needed.
+      InverseMap.erase(CP);
+      
+    // Now that we found the entry, make sure this isn't the entry that
+    // the AbstractTypeMap points to.
+    const TypeClass *Ty = I->first.first;
+    if (Ty->isAbstract())
+      UpdateAbstractTypeMap(static_cast<const DerivedType *>(Ty), I);
+
+    Map.erase(I);
+  }
+
+  /// MoveConstantToNewSlot - If we are about to change C to be the element
+  /// specified by I, update our internal data structures to reflect this
+  /// fact.
+  void MoveConstantToNewSlot(ConstantClass *C, typename MapTy::iterator I) {
+    // First, remove the old location of the specified constant in the map.
+    typename MapTy::iterator OldI = FindExistingElement(C);
+    assert(OldI != Map.end() && "Constant not found in constant table!");
+    assert(OldI->second == C && "Didn't find correct element?");
+      
+    // If this constant is the representative element for its abstract type,
+    // update the AbstractTypeMap so that the representative element is I.
+    //
+    // This must use getRawType() because if the type is under refinement, we
+    // will get the refineAbstractType callback below, and we don't want to
+    // kick union find in on the constant.
+    if (C->getRawType()->isAbstract()) {
+      typename AbstractTypeMapTy::iterator ATI =
+          AbstractTypeMap.find(cast<DerivedType>(C->getRawType()));
+      assert(ATI != AbstractTypeMap.end() &&
+             "Abstract type not in AbstractTypeMap?");
+      if (ATI->second == OldI)
+        ATI->second = I;
+    }
+      
+    // Remove the old entry from the map.
+    Map.erase(OldI);
+    
+    // Update the inverse map so that we know that this constant is now
+    // located at descriptor I.
+    if (HasLargeKey) {
+      assert(I->second == C && "Bad inversemap entry!");
+      InverseMap[C] = I;
+    }
+  }
+    
+  void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
+    typename AbstractTypeMapTy::iterator I = AbstractTypeMap.find(OldTy);
+
+    assert(I != AbstractTypeMap.end() &&
+           "Abstract type not in AbstractTypeMap?");
+
+    // Convert a constant at a time until the last one is gone.  The last one
+    // leaving will remove() itself, causing the AbstractTypeMapEntry to be
+    // eliminated eventually.
+    do {
+      ConstantClass *C = I->second->second;
+      MapKey Key(cast<TypeClass>(NewTy),
+                 ConstantKeyData<ConstantClass>::getValType(C));
+
+      std::pair<typename MapTy::iterator, bool> IP =
+        Map.insert(std::make_pair(Key, C));
+      if (IP.second) {
+        // The map didn't previously have an appropriate constant in the
+        // new type.
+        
+        // Remove the old entry.
+        typename MapTy::iterator OldI =
+          Map.find(MapKey(cast<TypeClass>(OldTy), IP.first->first.second));
+        assert(OldI != Map.end() && "Constant not in map!");
+        UpdateAbstractTypeMap(OldTy, OldI);
+        Map.erase(OldI);
+
+        // Set the constant's type. This is done in place!
+        setType(C, NewTy);
+
+        // Update the inverse map so that we know that this constant is now
+        // located at descriptor I.
+        if (HasLargeKey)
+          InverseMap[C] = IP.first;
+
+        AddAbstractTypeUser(NewTy, IP.first);
+      } else {
+        // The map already had an appropriate constant in the new type, so
+        // there's no longer a need for the old constant.
+        C->uncheckedReplaceAllUsesWith(IP.first->second);
+        C->destroyConstant();    // This constant is now dead, destroy it.
+      }
+      I = AbstractTypeMap.find(OldTy);
+    } while (I != AbstractTypeMap.end());
+  }
+
+  // If the type became concrete without being refined to any other existing
+  // type, we just remove ourselves from the ATU list.
+  void typeBecameConcrete(const DerivedType *AbsTy) {
+    AbsTy->removeAbstractTypeUser(this);
+  }
+
+  void dump() const {
+    DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n");
+  }
+};
+
+}
+
+#endif
diff --git a/final/lib/VMCore/Core.cpp b/final/lib/VMCore/Core.cpp
new file mode 100644
index 00000000000..35c3a2e9258
--- /dev/null
+++ b/final/lib/VMCore/Core.cpp
@@ -0,0 +1,2241 @@
+//===-- Core.cpp ----------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common infrastructure (including the C bindings)
+// for libLLVMCore.a, which implements the LLVM intermediate representation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/Core.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+
+using namespace llvm;
+
+void llvm::initializeCore(PassRegistry &Registry) {
+  initializeDominatorTreePass(Registry);
+  initializePrintModulePassPass(Registry);
+  initializePrintFunctionPassPass(Registry);
+  initializeVerifierPass(Registry);
+  initializePreVerifierPass(Registry);
+}
+
+void LLVMInitializeCore(LLVMPassRegistryRef R) {
+  initializeCore(*unwrap(R));
+}
+
+/*===-- Error handling ----------------------------------------------------===*/
+
+void LLVMDisposeMessage(char *Message) {
+  free(Message);
+}
+
+
+/*===-- Operations on contexts --------------------------------------------===*/
+
+LLVMContextRef LLVMContextCreate() {
+  return wrap(new LLVMContext());
+}
+
+LLVMContextRef LLVMGetGlobalContext() {
+  return wrap(&getGlobalContext());
+}
+
+void LLVMContextDispose(LLVMContextRef C) {
+  delete unwrap(C);
+}
+
+unsigned LLVMGetMDKindIDInContext(LLVMContextRef C, const char* Name,
+                                  unsigned SLen) {
+  return unwrap(C)->getMDKindID(StringRef(Name, SLen));
+}
+
+unsigned LLVMGetMDKindID(const char* Name, unsigned SLen) {
+  return LLVMGetMDKindIDInContext(LLVMGetGlobalContext(), Name, SLen);
+}
+
+
+/*===-- Operations on modules ---------------------------------------------===*/
+
+LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID) {
+  return wrap(new Module(ModuleID, getGlobalContext()));
+}
+
+LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID, 
+                                                LLVMContextRef C) {
+  return wrap(new Module(ModuleID, *unwrap(C)));
+}
+
+void LLVMDisposeModule(LLVMModuleRef M) {
+  delete unwrap(M);
+}
+
+/*--.. Data layout .........................................................--*/
+const char * LLVMGetDataLayout(LLVMModuleRef M) {
+  return unwrap(M)->getDataLayout().c_str();
+}
+
+void LLVMSetDataLayout(LLVMModuleRef M, const char *Triple) {
+  unwrap(M)->setDataLayout(Triple);
+}
+
+/*--.. Target triple .......................................................--*/
+const char * LLVMGetTarget(LLVMModuleRef M) {
+  return unwrap(M)->getTargetTriple().c_str();
+}
+
+void LLVMSetTarget(LLVMModuleRef M, const char *Triple) {
+  unwrap(M)->setTargetTriple(Triple);
+}
+
+/*--.. Type names ..........................................................--*/
+LLVMBool LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty) {
+  return unwrap(M)->addTypeName(Name, unwrap(Ty));
+}
+
+void LLVMDeleteTypeName(LLVMModuleRef M, const char *Name) {
+  TypeSymbolTable &TST = unwrap(M)->getTypeSymbolTable();
+
+  TypeSymbolTable::iterator I = TST.find(Name);
+  if (I != TST.end())
+    TST.remove(I);
+}
+
+LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
+  return wrap(unwrap(M)->getTypeByName(Name));
+}
+
+const char *LLVMGetTypeName(LLVMModuleRef M, LLVMTypeRef Ty) {
+  return unwrap(M)->getTypeName(unwrap(Ty)).c_str();
+}
+
+void LLVMDumpModule(LLVMModuleRef M) {
+  unwrap(M)->dump();
+}
+
+/*--.. Operations on inline assembler ......................................--*/
+void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm) {
+  unwrap(M)->setModuleInlineAsm(StringRef(Asm));
+}
+
+
+/*--.. Operations on module contexts ......................................--*/
+LLVMContextRef LLVMGetModuleContext(LLVMModuleRef M) {
+  return wrap(&unwrap(M)->getContext());
+}
+
+
+/*===-- Operations on types -----------------------------------------------===*/
+
+/*--.. Operations on all types (mostly) ....................................--*/
+
+LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
+  switch (unwrap(Ty)->getTypeID()) {
+  default:
+    assert(false && "Unhandled TypeID.");
+  case Type::VoidTyID:
+    return LLVMVoidTypeKind;
+  case Type::FloatTyID:
+    return LLVMFloatTypeKind;
+  case Type::DoubleTyID:
+    return LLVMDoubleTypeKind;
+  case Type::X86_FP80TyID:
+    return LLVMX86_FP80TypeKind;
+  case Type::FP128TyID:
+    return LLVMFP128TypeKind;
+  case Type::PPC_FP128TyID:
+    return LLVMPPC_FP128TypeKind;
+  case Type::LabelTyID:
+    return LLVMLabelTypeKind;
+  case Type::MetadataTyID:
+    return LLVMMetadataTypeKind;
+  case Type::IntegerTyID:
+    return LLVMIntegerTypeKind;
+  case Type::FunctionTyID:
+    return LLVMFunctionTypeKind;
+  case Type::StructTyID:
+    return LLVMStructTypeKind;
+  case Type::ArrayTyID:
+    return LLVMArrayTypeKind;
+  case Type::PointerTyID:
+    return LLVMPointerTypeKind;
+  case Type::OpaqueTyID:
+    return LLVMOpaqueTypeKind;
+  case Type::VectorTyID:
+    return LLVMVectorTypeKind;
+  case Type::X86_MMXTyID:
+    return LLVMX86_MMXTypeKind;
+  }
+}
+
+LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) {
+  return wrap(&unwrap(Ty)->getContext());
+}
+
+/*--.. Operations on integer types .........................................--*/
+
+LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C)  {
+  return (LLVMTypeRef) Type::getInt1Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt8TypeInContext(LLVMContextRef C)  {
+  return (LLVMTypeRef) Type::getInt8Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt16TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getInt16Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt32TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getInt32Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMInt64TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getInt64Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMIntTypeInContext(LLVMContextRef C, unsigned NumBits) {
+  return wrap(IntegerType::get(*unwrap(C), NumBits));
+}
+
+LLVMTypeRef LLVMInt1Type(void)  {
+  return LLVMInt1TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt8Type(void)  {
+  return LLVMInt8TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt16Type(void) {
+  return LLVMInt16TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt32Type(void) {
+  return LLVMInt32TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMInt64Type(void) {
+  return LLVMInt64TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMIntType(unsigned NumBits) {
+  return LLVMIntTypeInContext(LLVMGetGlobalContext(), NumBits);
+}
+
+unsigned LLVMGetIntTypeWidth(LLVMTypeRef IntegerTy) {
+  return unwrap<IntegerType>(IntegerTy)->getBitWidth();
+}
+
+/*--.. Operations on real types ............................................--*/
+
+LLVMTypeRef LLVMFloatTypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getFloatTy(*unwrap(C));
+}
+LLVMTypeRef LLVMDoubleTypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getDoubleTy(*unwrap(C));
+}
+LLVMTypeRef LLVMX86FP80TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getX86_FP80Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMFP128TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getFP128Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMPPCFP128TypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getPPC_FP128Ty(*unwrap(C));
+}
+LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C) {
+  return (LLVMTypeRef) Type::getX86_MMXTy(*unwrap(C));
+}
+
+LLVMTypeRef LLVMFloatType(void) {
+  return LLVMFloatTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMDoubleType(void) {
+  return LLVMDoubleTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMX86FP80Type(void) {
+  return LLVMX86FP80TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMFP128Type(void) {
+  return LLVMFP128TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMPPCFP128Type(void) {
+  return LLVMPPCFP128TypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMX86MMXType(void) {
+  return LLVMX86MMXTypeInContext(LLVMGetGlobalContext());
+}
+
+/*--.. Operations on function types ........................................--*/
+
+LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType,
+                             LLVMTypeRef *ParamTypes, unsigned ParamCount,
+                             LLVMBool IsVarArg) {
+  std::vector<const Type*> Tys;
+  for (LLVMTypeRef *I = ParamTypes, *E = ParamTypes + ParamCount; I != E; ++I)
+    Tys.push_back(unwrap(*I));
+  
+  return wrap(FunctionType::get(unwrap(ReturnType), Tys, IsVarArg != 0));
+}
+
+LLVMBool LLVMIsFunctionVarArg(LLVMTypeRef FunctionTy) {
+  return unwrap<FunctionType>(FunctionTy)->isVarArg();
+}
+
+LLVMTypeRef LLVMGetReturnType(LLVMTypeRef FunctionTy) {
+  return wrap(unwrap<FunctionType>(FunctionTy)->getReturnType());
+}
+
+unsigned LLVMCountParamTypes(LLVMTypeRef FunctionTy) {
+  return unwrap<FunctionType>(FunctionTy)->getNumParams();
+}
+
+void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest) {
+  FunctionType *Ty = unwrap<FunctionType>(FunctionTy);
+  for (FunctionType::param_iterator I = Ty->param_begin(),
+                                    E = Ty->param_end(); I != E; ++I)
+    *Dest++ = wrap(*I);
+}
+
+/*--.. Operations on struct types ..........................................--*/
+
+LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
+                           unsigned ElementCount, LLVMBool Packed) {
+  std::vector<const Type*> Tys;
+  for (LLVMTypeRef *I = ElementTypes,
+                   *E = ElementTypes + ElementCount; I != E; ++I)
+    Tys.push_back(unwrap(*I));
+  
+  return wrap(StructType::get(*unwrap(C), Tys, Packed != 0));
+}
+
+LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes,
+                           unsigned ElementCount, LLVMBool Packed) {
+  return LLVMStructTypeInContext(LLVMGetGlobalContext(), ElementTypes,
+                                 ElementCount, Packed);
+}
+
+
+unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy) {
+  return unwrap<StructType>(StructTy)->getNumElements();
+}
+
+void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest) {
+  StructType *Ty = unwrap<StructType>(StructTy);
+  for (FunctionType::param_iterator I = Ty->element_begin(),
+                                    E = Ty->element_end(); I != E; ++I)
+    *Dest++ = wrap(*I);
+}
+
+LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) {
+  return unwrap<StructType>(StructTy)->isPacked();
+}
+
+/*--.. Operations on array, pointer, and vector types (sequence types) .....--*/
+
+LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) {
+  return wrap(ArrayType::get(unwrap(ElementType), ElementCount));
+}
+
+LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace) {
+  return wrap(PointerType::get(unwrap(ElementType), AddressSpace));
+}
+
+LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount) {
+  return wrap(VectorType::get(unwrap(ElementType), ElementCount));
+}
+
+LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty) {
+  return wrap(unwrap<SequentialType>(Ty)->getElementType());
+}
+
+unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy) {
+  return unwrap<ArrayType>(ArrayTy)->getNumElements();
+}
+
+unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy) {
+  return unwrap<PointerType>(PointerTy)->getAddressSpace();
+}
+
+unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy) {
+  return unwrap<VectorType>(VectorTy)->getNumElements();
+}
+
+/*--.. Operations on other types ...........................................--*/
+
+LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C)  {
+  return wrap(Type::getVoidTy(*unwrap(C)));
+}
+LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C) {
+  return wrap(Type::getLabelTy(*unwrap(C)));
+}
+LLVMTypeRef LLVMOpaqueTypeInContext(LLVMContextRef C) {
+  return wrap(OpaqueType::get(*unwrap(C)));
+}
+
+LLVMTypeRef LLVMVoidType(void)  {
+  return LLVMVoidTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMLabelType(void) {
+  return LLVMLabelTypeInContext(LLVMGetGlobalContext());
+}
+LLVMTypeRef LLVMOpaqueType(void) {
+  return LLVMOpaqueTypeInContext(LLVMGetGlobalContext());
+}
+
+/*--.. Operations on type handles ..........................................--*/
+
+LLVMTypeHandleRef LLVMCreateTypeHandle(LLVMTypeRef PotentiallyAbstractTy) {
+  return wrap(new PATypeHolder(unwrap(PotentiallyAbstractTy)));
+}
+
+void LLVMDisposeTypeHandle(LLVMTypeHandleRef TypeHandle) {
+  delete unwrap(TypeHandle);
+}
+
+LLVMTypeRef LLVMResolveTypeHandle(LLVMTypeHandleRef TypeHandle) {
+  return wrap(unwrap(TypeHandle)->get());
+}
+
+void LLVMRefineType(LLVMTypeRef AbstractTy, LLVMTypeRef ConcreteTy) {
+  unwrap<DerivedType>(AbstractTy)->refineAbstractTypeTo(unwrap(ConcreteTy));
+}
+
+
+/*===-- Operations on values ----------------------------------------------===*/
+
+/*--.. Operations on all values ............................................--*/
+
+LLVMTypeRef LLVMTypeOf(LLVMValueRef Val) {
+  return wrap(unwrap(Val)->getType());
+}
+
+const char *LLVMGetValueName(LLVMValueRef Val) {
+  return unwrap(Val)->getName().data();
+}
+
+void LLVMSetValueName(LLVMValueRef Val, const char *Name) {
+  unwrap(Val)->setName(Name);
+}
+
+void LLVMDumpValue(LLVMValueRef Val) {
+  unwrap(Val)->dump();
+}
+
+void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) {
+  unwrap(OldVal)->replaceAllUsesWith(unwrap(NewVal));
+}
+
+int LLVMHasMetadata(LLVMValueRef Inst) {
+  return unwrap<Instruction>(Inst)->hasMetadata();
+}
+
+LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) {
+  return wrap(unwrap<Instruction>(Inst)->getMetadata(KindID));
+}
+
+void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) {
+  unwrap<Instruction>(Inst)->setMetadata(KindID, MD? unwrap<MDNode>(MD) : NULL);
+}
+
+/*--.. Conversion functions ................................................--*/
+
+#define LLVM_DEFINE_VALUE_CAST(name)                                       \
+  LLVMValueRef LLVMIsA##name(LLVMValueRef Val) {                           \
+    return wrap(static_cast<Value*>(dyn_cast_or_null<name>(unwrap(Val)))); \
+  }
+
+LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DEFINE_VALUE_CAST)
+
+/*--.. Operations on Uses ..................................................--*/
+LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val) {
+  Value *V = unwrap(Val);
+  Value::use_iterator I = V->use_begin();
+  if (I == V->use_end())
+    return 0;
+  return wrap(&(I.getUse()));
+}
+
+LLVMUseRef LLVMGetNextUse(LLVMUseRef U) {
+  Use *Next = unwrap(U)->getNext();
+  if (Next)
+    return wrap(Next);
+  return 0;
+}
+
+LLVMValueRef LLVMGetUser(LLVMUseRef U) {
+  return wrap(unwrap(U)->getUser());
+}
+
+LLVMValueRef LLVMGetUsedValue(LLVMUseRef U) {
+  return wrap(unwrap(U)->get());
+}
+
+/*--.. Operations on Users .................................................--*/
+LLVMValueRef LLVMGetOperand(LLVMValueRef Val, unsigned Index) {
+  return wrap(unwrap<User>(Val)->getOperand(Index));
+}
+
+void LLVMSetOperand(LLVMValueRef Val, unsigned Index, LLVMValueRef Op) {
+  unwrap<User>(Val)->setOperand(Index, unwrap(Op));
+}
+
+int LLVMGetNumOperands(LLVMValueRef Val) {
+  return unwrap<User>(Val)->getNumOperands();
+}
+
+/*--.. Operations on constants of any type .................................--*/
+
+LLVMValueRef LLVMConstNull(LLVMTypeRef Ty) {
+  return wrap(Constant::getNullValue(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMConstAllOnes(LLVMTypeRef Ty) {
+  return wrap(Constant::getAllOnesValue(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMGetUndef(LLVMTypeRef Ty) {
+  return wrap(UndefValue::get(unwrap(Ty)));
+}
+
+LLVMBool LLVMIsConstant(LLVMValueRef Ty) {
+  return isa<Constant>(unwrap(Ty));
+}
+
+LLVMBool LLVMIsNull(LLVMValueRef Val) {
+  if (Constant *C = dyn_cast<Constant>(unwrap(Val)))
+    return C->isNullValue();
+  return false;
+}
+
+LLVMBool LLVMIsUndef(LLVMValueRef Val) {
+  return isa<UndefValue>(unwrap(Val));
+}
+
+LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) {
+  return
+      wrap(ConstantPointerNull::get(unwrap<PointerType>(Ty)));
+}
+
+/*--.. Operations on metadata nodes ........................................--*/
+
+LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
+                                   unsigned SLen) {
+  return wrap(MDString::get(*unwrap(C), StringRef(Str, SLen)));
+}
+
+LLVMValueRef LLVMMDString(const char *Str, unsigned SLen) {
+  return LLVMMDStringInContext(LLVMGetGlobalContext(), Str, SLen);
+}
+
+LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
+                                 unsigned Count) {
+  return wrap(MDNode::get(*unwrap(C), unwrap<Value>(Vals, Count), Count));
+}
+
+LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) {
+  return LLVMMDNodeInContext(LLVMGetGlobalContext(), Vals, Count);
+}
+
+/*--.. Operations on scalar constants ......................................--*/
+
+LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
+                          LLVMBool SignExtend) {
+  return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), N, SignExtend != 0));
+}
+
+LLVMValueRef LLVMConstIntOfArbitraryPrecision(LLVMTypeRef IntTy,
+                                              unsigned NumWords,
+                                              const uint64_t Words[]) {
+    IntegerType *Ty = unwrap<IntegerType>(IntTy);
+    return wrap(ConstantInt::get(Ty->getContext(),
+                                 APInt(Ty->getBitWidth(), NumWords, Words)));
+}
+
+LLVMValueRef LLVMConstIntOfString(LLVMTypeRef IntTy, const char Str[],
+                                  uint8_t Radix) {
+  return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), StringRef(Str),
+                               Radix));
+}
+
+LLVMValueRef LLVMConstIntOfStringAndSize(LLVMTypeRef IntTy, const char Str[],
+                                         unsigned SLen, uint8_t Radix) {
+  return wrap(ConstantInt::get(unwrap<IntegerType>(IntTy), StringRef(Str, SLen),
+                               Radix));
+}
+
+LLVMValueRef LLVMConstReal(LLVMTypeRef RealTy, double N) {
+  return wrap(ConstantFP::get(unwrap(RealTy), N));
+}
+
+LLVMValueRef LLVMConstRealOfString(LLVMTypeRef RealTy, const char *Text) {
+  return wrap(ConstantFP::get(unwrap(RealTy), StringRef(Text)));
+}
+
+LLVMValueRef LLVMConstRealOfStringAndSize(LLVMTypeRef RealTy, const char Str[],
+                                          unsigned SLen) {
+  return wrap(ConstantFP::get(unwrap(RealTy), StringRef(Str, SLen)));
+}
+
+unsigned long long LLVMConstIntGetZExtValue(LLVMValueRef ConstantVal) {
+  return unwrap<ConstantInt>(ConstantVal)->getZExtValue();
+}
+
+long long LLVMConstIntGetSExtValue(LLVMValueRef ConstantVal) {
+  return unwrap<ConstantInt>(ConstantVal)->getSExtValue();
+}
+
+/*--.. Operations on composite constants ...................................--*/
+
+LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
+                                      unsigned Length,
+                                      LLVMBool DontNullTerminate) {
+  /* Inverted the sense of AddNull because ', 0)' is a
+     better mnemonic for null termination than ', 1)'. */
+  return wrap(ConstantArray::get(*unwrap(C), StringRef(Str, Length),
+                                 DontNullTerminate == 0));
+}
+LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, 
+                                      LLVMValueRef *ConstantVals,
+                                      unsigned Count, LLVMBool Packed) {
+  return wrap(ConstantStruct::get(*unwrap(C),
+                                  unwrap<Constant>(ConstantVals, Count),
+                                  Count, Packed != 0));
+}
+
+LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
+                             LLVMBool DontNullTerminate) {
+  return LLVMConstStringInContext(LLVMGetGlobalContext(), Str, Length,
+                                  DontNullTerminate);
+}
+LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
+                            LLVMValueRef *ConstantVals, unsigned Length) {
+  return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length),
+                                 unwrap<Constant>(ConstantVals, Length),
+                                 Length));
+}
+LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
+                             LLVMBool Packed) {
+  return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count,
+                                  Packed);
+}
+LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
+  return wrap(ConstantVector::get(ArrayRef<Constant*>(
+                            unwrap<Constant>(ScalarConstantVals, Size), Size)));
+}
+/*--.. Constant expressions ................................................--*/
+
+LLVMOpcode LLVMGetConstOpcode(LLVMValueRef ConstantVal) {
+  return (LLVMOpcode)unwrap<ConstantExpr>(ConstantVal)->getOpcode();
+}
+
+LLVMValueRef LLVMAlignOf(LLVMTypeRef Ty) {
+  return wrap(ConstantExpr::getAlignOf(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMSizeOf(LLVMTypeRef Ty) {
+  return wrap(ConstantExpr::getSizeOf(unwrap(Ty)));
+}
+
+LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getNeg(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNSWNeg(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getNSWNeg(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNUWNeg(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getNUWNeg(unwrap<Constant>(ConstantVal)));
+}
+
+
+LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getFNeg(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNot(LLVMValueRef ConstantVal) {
+  return wrap(ConstantExpr::getNot(unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getAdd(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNSWAdd(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNUWAdd(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFAdd(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getSub(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNSWSub(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNSWSub(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNUWSub(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFSub(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getMul(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNSWMul(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant,
+                             LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getNUWMul(unwrap<Constant>(LHSConstant),
+                                      unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFMul(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getUDiv(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getSDiv(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant,
+                                LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getExactSDiv(unwrap<Constant>(LHSConstant),
+                                         unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFDiv(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstURem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getURem(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstSRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getSRem(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFRem(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFRem(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstAnd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getAnd(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstOr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getOr(unwrap<Constant>(LHSConstant),
+                                  unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstXor(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getXor(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstICmp(LLVMIntPredicate Predicate,
+                           LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getICmp(Predicate,
+                                    unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstFCmp(LLVMRealPredicate Predicate,
+                           LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getFCmp(Predicate,
+                                    unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstShl(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getShl(unwrap<Constant>(LHSConstant),
+                                   unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstLShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getLShr(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstAShr(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getAShr(unwrap<Constant>(LHSConstant),
+                                    unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
+                          LLVMValueRef *ConstantIndices, unsigned NumIndices) {
+  return wrap(ConstantExpr::getGetElementPtr(unwrap<Constant>(ConstantVal),
+                                             unwrap<Constant>(ConstantIndices, 
+                                                              NumIndices),
+                                             NumIndices));
+}
+
+LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
+                                  LLVMValueRef *ConstantIndices,
+                                  unsigned NumIndices) {
+  Constant* Val = unwrap<Constant>(ConstantVal);
+  Constant** Idxs = unwrap<Constant>(ConstantIndices, NumIndices);
+  return wrap(ConstantExpr::getInBoundsGetElementPtr(Val, Idxs, NumIndices));
+}
+
+LLVMValueRef LLVMConstTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getTrunc(unwrap<Constant>(ConstantVal),
+                                     unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getSExt(unwrap<Constant>(ConstantVal),
+                                    unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstZExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getZExt(unwrap<Constant>(ConstantVal),
+                                    unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPTrunc(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getFPTrunc(unwrap<Constant>(ConstantVal),
+                                       unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPExt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getFPExtend(unwrap<Constant>(ConstantVal),
+                                        unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstUIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getUIToFP(unwrap<Constant>(ConstantVal),
+                                      unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSIToFP(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getSIToFP(unwrap<Constant>(ConstantVal),
+                                      unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPToUI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getFPToUI(unwrap<Constant>(ConstantVal),
+                                      unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstFPToSI(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getFPToSI(unwrap<Constant>(ConstantVal),
+                                      unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstPtrToInt(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getPtrToInt(unwrap<Constant>(ConstantVal),
+                                        unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstIntToPtr(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getIntToPtr(unwrap<Constant>(ConstantVal),
+                                        unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getBitCast(unwrap<Constant>(ConstantVal),
+                                       unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal,
+                                    LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getZExtOrBitCast(unwrap<Constant>(ConstantVal),
+                                             unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSExtOrBitCast(LLVMValueRef ConstantVal,
+                                    LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getSExtOrBitCast(unwrap<Constant>(ConstantVal),
+                                             unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstTruncOrBitCast(LLVMValueRef ConstantVal,
+                                     LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getTruncOrBitCast(unwrap<Constant>(ConstantVal),
+                                              unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal,
+                                  LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getPointerCast(unwrap<Constant>(ConstantVal),
+                                           unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType,
+                              LLVMBool isSigned) {
+  return wrap(ConstantExpr::getIntegerCast(unwrap<Constant>(ConstantVal),
+                                           unwrap(ToType), isSigned));
+}
+
+LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
+  return wrap(ConstantExpr::getFPCast(unwrap<Constant>(ConstantVal),
+                                      unwrap(ToType)));
+}
+
+LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition,
+                             LLVMValueRef ConstantIfTrue,
+                             LLVMValueRef ConstantIfFalse) {
+  return wrap(ConstantExpr::getSelect(unwrap<Constant>(ConstantCondition),
+                                      unwrap<Constant>(ConstantIfTrue),
+                                      unwrap<Constant>(ConstantIfFalse)));
+}
+
+LLVMValueRef LLVMConstExtractElement(LLVMValueRef VectorConstant,
+                                     LLVMValueRef IndexConstant) {
+  return wrap(ConstantExpr::getExtractElement(unwrap<Constant>(VectorConstant),
+                                              unwrap<Constant>(IndexConstant)));
+}
+
+LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant,
+                                    LLVMValueRef ElementValueConstant,
+                                    LLVMValueRef IndexConstant) {
+  return wrap(ConstantExpr::getInsertElement(unwrap<Constant>(VectorConstant),
+                                         unwrap<Constant>(ElementValueConstant),
+                                             unwrap<Constant>(IndexConstant)));
+}
+
+LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
+                                    LLVMValueRef VectorBConstant,
+                                    LLVMValueRef MaskConstant) {
+  return wrap(ConstantExpr::getShuffleVector(unwrap<Constant>(VectorAConstant),
+                                             unwrap<Constant>(VectorBConstant),
+                                             unwrap<Constant>(MaskConstant)));
+}
+
+LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
+                                   unsigned NumIdx) {
+  return wrap(ConstantExpr::getExtractValue(unwrap<Constant>(AggConstant),
+                                            IdxList, NumIdx));
+}
+
+LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
+                                  LLVMValueRef ElementValueConstant,
+                                  unsigned *IdxList, unsigned NumIdx) {
+  return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant),
+                                         unwrap<Constant>(ElementValueConstant),
+                                           IdxList, NumIdx));
+}
+
+LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString,
+                                const char *Constraints,
+                                LLVMBool HasSideEffects,
+                                LLVMBool IsAlignStack) {
+  return wrap(InlineAsm::get(dyn_cast<FunctionType>(unwrap(Ty)), AsmString,
+                             Constraints, HasSideEffects, IsAlignStack));
+}
+
+LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB) {
+  return wrap(BlockAddress::get(unwrap<Function>(F), unwrap(BB)));
+}
+
+/*--.. Operations on global variables, functions, and aliases (globals) ....--*/
+
+LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global) {
+  return wrap(unwrap<GlobalValue>(Global)->getParent());
+}
+
+LLVMBool LLVMIsDeclaration(LLVMValueRef Global) {
+  return unwrap<GlobalValue>(Global)->isDeclaration();
+}
+
+LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
+  switch (unwrap<GlobalValue>(Global)->getLinkage()) {
+  default:
+    assert(false && "Unhandled Linkage Type.");
+  case GlobalValue::ExternalLinkage:
+    return LLVMExternalLinkage;
+  case GlobalValue::AvailableExternallyLinkage:
+    return LLVMAvailableExternallyLinkage;
+  case GlobalValue::LinkOnceAnyLinkage:
+    return LLVMLinkOnceAnyLinkage;
+  case GlobalValue::LinkOnceODRLinkage:
+    return LLVMLinkOnceODRLinkage;
+  case GlobalValue::WeakAnyLinkage:
+    return LLVMWeakAnyLinkage;
+  case GlobalValue::WeakODRLinkage:
+    return LLVMWeakODRLinkage;
+  case GlobalValue::AppendingLinkage:
+    return LLVMAppendingLinkage;
+  case GlobalValue::InternalLinkage:
+    return LLVMInternalLinkage;
+  case GlobalValue::PrivateLinkage:
+    return LLVMPrivateLinkage;
+  case GlobalValue::LinkerPrivateLinkage:
+    return LLVMLinkerPrivateLinkage;
+  case GlobalValue::LinkerPrivateWeakLinkage:
+    return LLVMLinkerPrivateWeakLinkage;
+  case GlobalValue::LinkerPrivateWeakDefAutoLinkage:
+    return LLVMLinkerPrivateWeakDefAutoLinkage;
+  case GlobalValue::DLLImportLinkage:
+    return LLVMDLLImportLinkage;
+  case GlobalValue::DLLExportLinkage:
+    return LLVMDLLExportLinkage;
+  case GlobalValue::ExternalWeakLinkage:
+    return LLVMExternalWeakLinkage;
+  case GlobalValue::CommonLinkage:
+    return LLVMCommonLinkage;
+  }
+
+  // Should never get here.
+  return static_cast<LLVMLinkage>(0);
+}
+
+void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
+  GlobalValue *GV = unwrap<GlobalValue>(Global);
+
+  switch (Linkage) {
+  default:
+    assert(false && "Unhandled Linkage Type.");
+  case LLVMExternalLinkage:
+    GV->setLinkage(GlobalValue::ExternalLinkage);
+    break;
+  case LLVMAvailableExternallyLinkage:
+    GV->setLinkage(GlobalValue::AvailableExternallyLinkage);
+    break;
+  case LLVMLinkOnceAnyLinkage:
+    GV->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+    break;
+  case LLVMLinkOnceODRLinkage:
+    GV->setLinkage(GlobalValue::LinkOnceODRLinkage);
+    break;
+  case LLVMWeakAnyLinkage:
+    GV->setLinkage(GlobalValue::WeakAnyLinkage);
+    break;
+  case LLVMWeakODRLinkage:
+    GV->setLinkage(GlobalValue::WeakODRLinkage);
+    break;
+  case LLVMAppendingLinkage:
+    GV->setLinkage(GlobalValue::AppendingLinkage);
+    break;
+  case LLVMInternalLinkage:
+    GV->setLinkage(GlobalValue::InternalLinkage);
+    break;
+  case LLVMPrivateLinkage:
+    GV->setLinkage(GlobalValue::PrivateLinkage);
+    break;
+  case LLVMLinkerPrivateLinkage:
+    GV->setLinkage(GlobalValue::LinkerPrivateLinkage);
+    break;
+  case LLVMLinkerPrivateWeakLinkage:
+    GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage);
+    break;
+  case LLVMLinkerPrivateWeakDefAutoLinkage:
+    GV->setLinkage(GlobalValue::LinkerPrivateWeakDefAutoLinkage);
+    break;
+  case LLVMDLLImportLinkage:
+    GV->setLinkage(GlobalValue::DLLImportLinkage);
+    break;
+  case LLVMDLLExportLinkage:
+    GV->setLinkage(GlobalValue::DLLExportLinkage);
+    break;
+  case LLVMExternalWeakLinkage:
+    GV->setLinkage(GlobalValue::ExternalWeakLinkage);
+    break;
+  case LLVMGhostLinkage:
+    DEBUG(errs()
+          << "LLVMSetLinkage(): LLVMGhostLinkage is no longer supported.");
+    break;
+  case LLVMCommonLinkage:
+    GV->setLinkage(GlobalValue::CommonLinkage);
+    break;
+  }
+}
+
+const char *LLVMGetSection(LLVMValueRef Global) {
+  return unwrap<GlobalValue>(Global)->getSection().c_str();
+}
+
+void LLVMSetSection(LLVMValueRef Global, const char *Section) {
+  unwrap<GlobalValue>(Global)->setSection(Section);
+}
+
+LLVMVisibility LLVMGetVisibility(LLVMValueRef Global) {
+  return static_cast<LLVMVisibility>(
+    unwrap<GlobalValue>(Global)->getVisibility());
+}
+
+void LLVMSetVisibility(LLVMValueRef Global, LLVMVisibility Viz) {
+  unwrap<GlobalValue>(Global)
+    ->setVisibility(static_cast<GlobalValue::VisibilityTypes>(Viz));
+}
+
+unsigned LLVMGetAlignment(LLVMValueRef Global) {
+  return unwrap<GlobalValue>(Global)->getAlignment();
+}
+
+void LLVMSetAlignment(LLVMValueRef Global, unsigned Bytes) {
+  unwrap<GlobalValue>(Global)->setAlignment(Bytes);
+}
+
+/*--.. Operations on global variables ......................................--*/
+
+LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) {
+  return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
+                                 GlobalValue::ExternalLinkage, 0, Name));
+}
+
+LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty,
+                                         const char *Name,
+                                         unsigned AddressSpace) {
+  return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
+                                 GlobalValue::ExternalLinkage, 0, Name, 0,
+                                 false, AddressSpace));
+}
+
+LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) {
+  return wrap(unwrap(M)->getNamedGlobal(Name));
+}
+
+LLVMValueRef LLVMGetFirstGlobal(LLVMModuleRef M) {
+  Module *Mod = unwrap(M);
+  Module::global_iterator I = Mod->global_begin();
+  if (I == Mod->global_end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastGlobal(LLVMModuleRef M) {
+  Module *Mod = unwrap(M);
+  Module::global_iterator I = Mod->global_end();
+  if (I == Mod->global_begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextGlobal(LLVMValueRef GlobalVar) {
+  GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
+  Module::global_iterator I = GV;
+  if (++I == GV->getParent()->global_end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousGlobal(LLVMValueRef GlobalVar) {
+  GlobalVariable *GV = unwrap<GlobalVariable>(GlobalVar);
+  Module::global_iterator I = GV;
+  if (I == GV->getParent()->global_begin())
+    return 0;
+  return wrap(--I);
+}
+
+void LLVMDeleteGlobal(LLVMValueRef GlobalVar) {
+  unwrap<GlobalVariable>(GlobalVar)->eraseFromParent();
+}
+
+LLVMValueRef LLVMGetInitializer(LLVMValueRef GlobalVar) {
+  GlobalVariable* GV = unwrap<GlobalVariable>(GlobalVar);
+  if ( !GV->hasInitializer() )
+    return 0;
+  return wrap(GV->getInitializer());
+}
+
+void LLVMSetInitializer(LLVMValueRef GlobalVar, LLVMValueRef ConstantVal) {
+  unwrap<GlobalVariable>(GlobalVar)
+    ->setInitializer(unwrap<Constant>(ConstantVal));
+}
+
+LLVMBool LLVMIsThreadLocal(LLVMValueRef GlobalVar) {
+  return unwrap<GlobalVariable>(GlobalVar)->isThreadLocal();
+}
+
+void LLVMSetThreadLocal(LLVMValueRef GlobalVar, LLVMBool IsThreadLocal) {
+  unwrap<GlobalVariable>(GlobalVar)->setThreadLocal(IsThreadLocal != 0);
+}
+
+LLVMBool LLVMIsGlobalConstant(LLVMValueRef GlobalVar) {
+  return unwrap<GlobalVariable>(GlobalVar)->isConstant();
+}
+
+void LLVMSetGlobalConstant(LLVMValueRef GlobalVar, LLVMBool IsConstant) {
+  unwrap<GlobalVariable>(GlobalVar)->setConstant(IsConstant != 0);
+}
+
+/*--.. Operations on aliases ......................................--*/
+
+LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
+                          const char *Name) {
+  return wrap(new GlobalAlias(unwrap(Ty), GlobalValue::ExternalLinkage, Name,
+                              unwrap<Constant>(Aliasee), unwrap (M)));
+}
+
+/*--.. Operations on functions .............................................--*/
+
+LLVMValueRef LLVMAddFunction(LLVMModuleRef M, const char *Name,
+                             LLVMTypeRef FunctionTy) {
+  return wrap(Function::Create(unwrap<FunctionType>(FunctionTy),
+                               GlobalValue::ExternalLinkage, Name, unwrap(M)));
+}
+
+LLVMValueRef LLVMGetNamedFunction(LLVMModuleRef M, const char *Name) {
+  return wrap(unwrap(M)->getFunction(Name));
+}
+
+LLVMValueRef LLVMGetFirstFunction(LLVMModuleRef M) {
+  Module *Mod = unwrap(M);
+  Module::iterator I = Mod->begin();
+  if (I == Mod->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastFunction(LLVMModuleRef M) {
+  Module *Mod = unwrap(M);
+  Module::iterator I = Mod->end();
+  if (I == Mod->begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextFunction(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Module::iterator I = Func;
+  if (++I == Func->getParent()->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousFunction(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Module::iterator I = Func;
+  if (I == Func->getParent()->begin())
+    return 0;
+  return wrap(--I);
+}
+
+void LLVMDeleteFunction(LLVMValueRef Fn) {
+  unwrap<Function>(Fn)->eraseFromParent();
+}
+
+unsigned LLVMGetIntrinsicID(LLVMValueRef Fn) {
+  if (Function *F = dyn_cast<Function>(unwrap(Fn)))
+    return F->getIntrinsicID();
+  return 0;
+}
+
+unsigned LLVMGetFunctionCallConv(LLVMValueRef Fn) {
+  return unwrap<Function>(Fn)->getCallingConv();
+}
+
+void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC) {
+  return unwrap<Function>(Fn)->setCallingConv(
+    static_cast<CallingConv::ID>(CC));
+}
+
+const char *LLVMGetGC(LLVMValueRef Fn) {
+  Function *F = unwrap<Function>(Fn);
+  return F->hasGC()? F->getGC() : 0;
+}
+
+void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
+  Function *F = unwrap<Function>(Fn);
+  if (GC)
+    F->setGC(GC);
+  else
+    F->clearGC();
+}
+
+void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
+  Function *Func = unwrap<Function>(Fn);
+  const AttrListPtr PAL = Func->getAttributes();
+  const AttrListPtr PALnew = PAL.addAttr(~0U, PA);
+  Func->setAttributes(PALnew);
+}
+
+void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
+  Function *Func = unwrap<Function>(Fn);
+  const AttrListPtr PAL = Func->getAttributes();
+  const AttrListPtr PALnew = PAL.removeAttr(~0U, PA);
+  Func->setAttributes(PALnew);
+}
+
+LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  const AttrListPtr PAL = Func->getAttributes();
+  Attributes attr = PAL.getFnAttributes();
+  return (LLVMAttribute)attr;
+}
+
+/*--.. Operations on parameters ............................................--*/
+
+unsigned LLVMCountParams(LLVMValueRef FnRef) {
+  // This function is strictly redundant to
+  //   LLVMCountParamTypes(LLVMGetElementType(LLVMTypeOf(FnRef)))
+  return unwrap<Function>(FnRef)->arg_size();
+}
+
+void LLVMGetParams(LLVMValueRef FnRef, LLVMValueRef *ParamRefs) {
+  Function *Fn = unwrap<Function>(FnRef);
+  for (Function::arg_iterator I = Fn->arg_begin(),
+                              E = Fn->arg_end(); I != E; I++)
+    *ParamRefs++ = wrap(I);
+}
+
+LLVMValueRef LLVMGetParam(LLVMValueRef FnRef, unsigned index) {
+  Function::arg_iterator AI = unwrap<Function>(FnRef)->arg_begin();
+  while (index --> 0)
+    AI++;
+  return wrap(AI);
+}
+
+LLVMValueRef LLVMGetParamParent(LLVMValueRef V) {
+  return wrap(unwrap<Argument>(V)->getParent());
+}
+
+LLVMValueRef LLVMGetFirstParam(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Function::arg_iterator I = Func->arg_begin();
+  if (I == Func->arg_end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastParam(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Function::arg_iterator I = Func->arg_end();
+  if (I == Func->arg_begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg) {
+  Argument *A = unwrap<Argument>(Arg);
+  Function::arg_iterator I = A;
+  if (++I == A->getParent()->arg_end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
+  Argument *A = unwrap<Argument>(Arg);
+  Function::arg_iterator I = A;
+  if (I == A->getParent()->arg_begin())
+    return 0;
+  return wrap(--I);
+}
+
+void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
+  unwrap<Argument>(Arg)->addAttr(PA);
+}
+
+void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
+  unwrap<Argument>(Arg)->removeAttr(PA);
+}
+
+LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
+  Argument *A = unwrap<Argument>(Arg);
+  Attributes attr = A->getParent()->getAttributes().getParamAttributes(
+    A->getArgNo()+1);
+  return (LLVMAttribute)attr;
+}
+  
+
+void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
+  unwrap<Argument>(Arg)->addAttr(
+          Attribute::constructAlignmentFromInt(align));
+}
+
+/*--.. Operations on basic blocks ..........................................--*/
+
+LLVMValueRef LLVMBasicBlockAsValue(LLVMBasicBlockRef BB) {
+  return wrap(static_cast<Value*>(unwrap(BB)));
+}
+
+LLVMBool LLVMValueIsBasicBlock(LLVMValueRef Val) {
+  return isa<BasicBlock>(unwrap(Val));
+}
+
+LLVMBasicBlockRef LLVMValueAsBasicBlock(LLVMValueRef Val) {
+  return wrap(unwrap<BasicBlock>(Val));
+}
+
+LLVMValueRef LLVMGetBasicBlockParent(LLVMBasicBlockRef BB) {
+  return wrap(unwrap(BB)->getParent());
+}
+
+unsigned LLVMCountBasicBlocks(LLVMValueRef FnRef) {
+  return unwrap<Function>(FnRef)->size();
+}
+
+void LLVMGetBasicBlocks(LLVMValueRef FnRef, LLVMBasicBlockRef *BasicBlocksRefs){
+  Function *Fn = unwrap<Function>(FnRef);
+  for (Function::iterator I = Fn->begin(), E = Fn->end(); I != E; I++)
+    *BasicBlocksRefs++ = wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetEntryBasicBlock(LLVMValueRef Fn) {
+  return wrap(&unwrap<Function>(Fn)->getEntryBlock());
+}
+
+LLVMBasicBlockRef LLVMGetFirstBasicBlock(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Function::iterator I = Func->begin();
+  if (I == Func->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetLastBasicBlock(LLVMValueRef Fn) {
+  Function *Func = unwrap<Function>(Fn);
+  Function::iterator I = Func->end();
+  if (I == Func->begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMBasicBlockRef LLVMGetNextBasicBlock(LLVMBasicBlockRef BB) {
+  BasicBlock *Block = unwrap(BB);
+  Function::iterator I = Block;
+  if (++I == Block->getParent()->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMBasicBlockRef LLVMGetPreviousBasicBlock(LLVMBasicBlockRef BB) {
+  BasicBlock *Block = unwrap(BB);
+  Function::iterator I = Block;
+  if (I == Block->getParent()->begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMBasicBlockRef LLVMAppendBasicBlockInContext(LLVMContextRef C,
+                                                LLVMValueRef FnRef,
+                                                const char *Name) {
+  return wrap(BasicBlock::Create(*unwrap(C), Name, unwrap<Function>(FnRef)));
+}
+
+LLVMBasicBlockRef LLVMAppendBasicBlock(LLVMValueRef FnRef, const char *Name) {
+  return LLVMAppendBasicBlockInContext(LLVMGetGlobalContext(), FnRef, Name);
+}
+
+LLVMBasicBlockRef LLVMInsertBasicBlockInContext(LLVMContextRef C,
+                                                LLVMBasicBlockRef BBRef,
+                                                const char *Name) {
+  BasicBlock *BB = unwrap(BBRef);
+  return wrap(BasicBlock::Create(*unwrap(C), Name, BB->getParent(), BB));
+}
+
+LLVMBasicBlockRef LLVMInsertBasicBlock(LLVMBasicBlockRef BBRef,
+                                       const char *Name) {
+  return LLVMInsertBasicBlockInContext(LLVMGetGlobalContext(), BBRef, Name);
+}
+
+void LLVMDeleteBasicBlock(LLVMBasicBlockRef BBRef) {
+  unwrap(BBRef)->eraseFromParent();
+}
+
+void LLVMMoveBasicBlockBefore(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
+  unwrap(BB)->moveBefore(unwrap(MovePos));
+}
+
+void LLVMMoveBasicBlockAfter(LLVMBasicBlockRef BB, LLVMBasicBlockRef MovePos) {
+  unwrap(BB)->moveAfter(unwrap(MovePos));
+}
+
+/*--.. Operations on instructions ..........................................--*/
+
+LLVMBasicBlockRef LLVMGetInstructionParent(LLVMValueRef Inst) {
+  return wrap(unwrap<Instruction>(Inst)->getParent());
+}
+
+LLVMValueRef LLVMGetFirstInstruction(LLVMBasicBlockRef BB) {
+  BasicBlock *Block = unwrap(BB);
+  BasicBlock::iterator I = Block->begin();
+  if (I == Block->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetLastInstruction(LLVMBasicBlockRef BB) {
+  BasicBlock *Block = unwrap(BB);
+  BasicBlock::iterator I = Block->end();
+  if (I == Block->begin())
+    return 0;
+  return wrap(--I);
+}
+
+LLVMValueRef LLVMGetNextInstruction(LLVMValueRef Inst) {
+  Instruction *Instr = unwrap<Instruction>(Inst);
+  BasicBlock::iterator I = Instr;
+  if (++I == Instr->getParent()->end())
+    return 0;
+  return wrap(I);
+}
+
+LLVMValueRef LLVMGetPreviousInstruction(LLVMValueRef Inst) {
+  Instruction *Instr = unwrap<Instruction>(Inst);
+  BasicBlock::iterator I = Instr;
+  if (I == Instr->getParent()->begin())
+    return 0;
+  return wrap(--I);
+}
+
+/*--.. Call and invoke instructions ........................................--*/
+
+unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr) {
+  Value *V = unwrap(Instr);
+  if (CallInst *CI = dyn_cast<CallInst>(V))
+    return CI->getCallingConv();
+  else if (InvokeInst *II = dyn_cast<InvokeInst>(V))
+    return II->getCallingConv();
+  llvm_unreachable("LLVMGetInstructionCallConv applies only to call and invoke!");
+  return 0;
+}
+
+void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
+  Value *V = unwrap(Instr);
+  if (CallInst *CI = dyn_cast<CallInst>(V))
+    return CI->setCallingConv(static_cast<CallingConv::ID>(CC));
+  else if (InvokeInst *II = dyn_cast<InvokeInst>(V))
+    return II->setCallingConv(static_cast<CallingConv::ID>(CC));
+  llvm_unreachable("LLVMSetInstructionCallConv applies only to call and invoke!");
+}
+
+void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, 
+                           LLVMAttribute PA) {
+  CallSite Call = CallSite(unwrap<Instruction>(Instr));
+  Call.setAttributes(
+    Call.getAttributes().addAttr(index, PA));
+}
+
+void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, 
+                              LLVMAttribute PA) {
+  CallSite Call = CallSite(unwrap<Instruction>(Instr));
+  Call.setAttributes(
+    Call.getAttributes().removeAttr(index, PA));
+}
+
+void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, 
+                                unsigned align) {
+  CallSite Call = CallSite(unwrap<Instruction>(Instr));
+  Call.setAttributes(
+    Call.getAttributes().addAttr(index, 
+        Attribute::constructAlignmentFromInt(align)));
+}
+
+/*--.. Operations on call instructions (only) ..............................--*/
+
+LLVMBool LLVMIsTailCall(LLVMValueRef Call) {
+  return unwrap<CallInst>(Call)->isTailCall();
+}
+
+void LLVMSetTailCall(LLVMValueRef Call, LLVMBool isTailCall) {
+  unwrap<CallInst>(Call)->setTailCall(isTailCall);
+}
+
+/*--.. Operations on phi nodes .............................................--*/
+
+void LLVMAddIncoming(LLVMValueRef PhiNode, LLVMValueRef *IncomingValues,
+                     LLVMBasicBlockRef *IncomingBlocks, unsigned Count) {
+  PHINode *PhiVal = unwrap<PHINode>(PhiNode);
+  for (unsigned I = 0; I != Count; ++I)
+    PhiVal->addIncoming(unwrap(IncomingValues[I]), unwrap(IncomingBlocks[I]));
+}
+
+unsigned LLVMCountIncoming(LLVMValueRef PhiNode) {
+  return unwrap<PHINode>(PhiNode)->getNumIncomingValues();
+}
+
+LLVMValueRef LLVMGetIncomingValue(LLVMValueRef PhiNode, unsigned Index) {
+  return wrap(unwrap<PHINode>(PhiNode)->getIncomingValue(Index));
+}
+
+LLVMBasicBlockRef LLVMGetIncomingBlock(LLVMValueRef PhiNode, unsigned Index) {
+  return wrap(unwrap<PHINode>(PhiNode)->getIncomingBlock(Index));
+}
+
+
+/*===-- Instruction builders ----------------------------------------------===*/
+
+LLVMBuilderRef LLVMCreateBuilderInContext(LLVMContextRef C) {
+  return wrap(new IRBuilder<>(*unwrap(C)));
+}
+
+LLVMBuilderRef LLVMCreateBuilder(void) {
+  return LLVMCreateBuilderInContext(LLVMGetGlobalContext());
+}
+
+void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block,
+                         LLVMValueRef Instr) {
+  BasicBlock *BB = unwrap(Block);
+  Instruction *I = Instr? unwrap<Instruction>(Instr) : (Instruction*) BB->end();
+  unwrap(Builder)->SetInsertPoint(BB, I);
+}
+
+void LLVMPositionBuilderBefore(LLVMBuilderRef Builder, LLVMValueRef Instr) {
+  Instruction *I = unwrap<Instruction>(Instr);
+  unwrap(Builder)->SetInsertPoint(I->getParent(), I);
+}
+
+void LLVMPositionBuilderAtEnd(LLVMBuilderRef Builder, LLVMBasicBlockRef Block) {
+  BasicBlock *BB = unwrap(Block);
+  unwrap(Builder)->SetInsertPoint(BB);
+}
+
+LLVMBasicBlockRef LLVMGetInsertBlock(LLVMBuilderRef Builder) {
+   return wrap(unwrap(Builder)->GetInsertBlock());
+}
+
+void LLVMClearInsertionPosition(LLVMBuilderRef Builder) {
+  unwrap(Builder)->ClearInsertionPoint();
+}
+
+void LLVMInsertIntoBuilder(LLVMBuilderRef Builder, LLVMValueRef Instr) {
+  unwrap(Builder)->Insert(unwrap<Instruction>(Instr));
+}
+
+void LLVMInsertIntoBuilderWithName(LLVMBuilderRef Builder, LLVMValueRef Instr,
+                                   const char *Name) {
+  unwrap(Builder)->Insert(unwrap<Instruction>(Instr), Name);
+}
+
+void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
+  delete unwrap(Builder);
+}
+
+/*--.. Metadata builders ...................................................--*/
+
+void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
+  MDNode *Loc = L ? unwrap<MDNode>(L) : NULL;
+  unwrap(Builder)->SetCurrentDebugLocation(DebugLoc::getFromDILocation(Loc));
+}
+
+LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) {
+  return wrap(unwrap(Builder)->getCurrentDebugLocation()
+              .getAsMDNode(unwrap(Builder)->getContext()));
+}
+
+void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
+  unwrap(Builder)->SetInstDebugLocation(unwrap<Instruction>(Inst));
+}
+
+
+/*--.. Instruction builders ................................................--*/
+
+LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef B) {
+  return wrap(unwrap(B)->CreateRetVoid());
+}
+
+LLVMValueRef LLVMBuildRet(LLVMBuilderRef B, LLVMValueRef V) {
+  return wrap(unwrap(B)->CreateRet(unwrap(V)));
+}
+
+LLVMValueRef LLVMBuildAggregateRet(LLVMBuilderRef B, LLVMValueRef *RetVals,
+                                   unsigned N) {
+  return wrap(unwrap(B)->CreateAggregateRet(unwrap(RetVals), N));
+}
+
+LLVMValueRef LLVMBuildBr(LLVMBuilderRef B, LLVMBasicBlockRef Dest) {
+  return wrap(unwrap(B)->CreateBr(unwrap(Dest)));
+}
+
+LLVMValueRef LLVMBuildCondBr(LLVMBuilderRef B, LLVMValueRef If,
+                             LLVMBasicBlockRef Then, LLVMBasicBlockRef Else) {
+  return wrap(unwrap(B)->CreateCondBr(unwrap(If), unwrap(Then), unwrap(Else)));
+}
+
+LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef B, LLVMValueRef V,
+                             LLVMBasicBlockRef Else, unsigned NumCases) {
+  return wrap(unwrap(B)->CreateSwitch(unwrap(V), unwrap(Else), NumCases));
+}
+
+LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr,
+                                 unsigned NumDests) {
+  return wrap(unwrap(B)->CreateIndirectBr(unwrap(Addr), NumDests));
+}
+
+LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
+                             LLVMValueRef *Args, unsigned NumArgs,
+                             LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateInvoke(unwrap(Fn), unwrap(Then), unwrap(Catch),
+                                      unwrap(Args), unwrap(Args) + NumArgs,
+                                      Name));
+}
+
+LLVMValueRef LLVMBuildUnwind(LLVMBuilderRef B) {
+  return wrap(unwrap(B)->CreateUnwind());
+}
+
+LLVMValueRef LLVMBuildUnreachable(LLVMBuilderRef B) {
+  return wrap(unwrap(B)->CreateUnreachable());
+}
+
+void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal,
+                 LLVMBasicBlockRef Dest) {
+  unwrap<SwitchInst>(Switch)->addCase(unwrap<ConstantInt>(OnVal), unwrap(Dest));
+}
+
+void LLVMAddDestination(LLVMValueRef IndirectBr, LLVMBasicBlockRef Dest) {
+  unwrap<IndirectBrInst>(IndirectBr)->addDestination(unwrap(Dest));
+}
+
+/*--.. Arithmetic ..........................................................--*/
+
+LLVMValueRef LLVMBuildAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNSWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNSWAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNUWAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateFAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNSWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNSWSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNUWSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateFSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNSWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNSWMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateNUWMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateFMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildUDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateUDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateSDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildExactSDiv(LLVMBuilderRef B, LLVMValueRef LHS,
+                                LLVMValueRef RHS, const char *Name) {
+  return wrap(unwrap(B)->CreateExactSDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateFDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildURem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateURem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildSRem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateSRem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFRem(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateFRem(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildShl(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateShl(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildLShr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateLShr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildAShr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateAShr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildAnd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateAnd(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildOr(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                         const char *Name) {
+  return wrap(unwrap(B)->CreateOr(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildXor(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateXor(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildBinOp(LLVMBuilderRef B, LLVMOpcode Op,
+                            LLVMValueRef LHS, LLVMValueRef RHS,
+                            const char *Name) {
+  return wrap(unwrap(B)->CreateBinOp(Instruction::BinaryOps(Op), unwrap(LHS),
+                                     unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+  return wrap(unwrap(B)->CreateNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNSWNeg(LLVMBuilderRef B, LLVMValueRef V,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateNSWNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNUWNeg(LLVMBuilderRef B, LLVMValueRef V,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateNUWNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+  return wrap(unwrap(B)->CreateFNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNot(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
+  return wrap(unwrap(B)->CreateNot(unwrap(V), Name));
+}
+
+/*--.. Memory ..............................................................--*/
+
+LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
+                             const char *Name) {
+  const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
+  Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
+  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
+  Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), 
+                                               ITy, unwrap(Ty), AllocSize, 
+                                               0, 0, "");
+  return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
+}
+
+LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
+                                  LLVMValueRef Val, const char *Name) {
+  const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
+  Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
+  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
+  Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), 
+                                               ITy, unwrap(Ty), AllocSize, 
+                                               unwrap(Val), 0, "");
+  return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
+}
+
+LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), 0, Name));
+}
+
+LLVMValueRef LLVMBuildArrayAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
+                                  LLVMValueRef Val, const char *Name) {
+  return wrap(unwrap(B)->CreateAlloca(unwrap(Ty), unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildFree(LLVMBuilderRef B, LLVMValueRef PointerVal) {
+  return wrap(unwrap(B)->Insert(
+     CallInst::CreateFree(unwrap(PointerVal), unwrap(B)->GetInsertBlock())));
+}
+
+
+LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateLoad(unwrap(PointerVal), Name));
+}
+
+LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val, 
+                            LLVMValueRef PointerVal) {
+  return wrap(unwrap(B)->CreateStore(unwrap(Val), unwrap(PointerVal)));
+}
+
+LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+                          LLVMValueRef *Indices, unsigned NumIndices,
+                          const char *Name) {
+  return wrap(unwrap(B)->CreateGEP(unwrap(Pointer), unwrap(Indices),
+                                   unwrap(Indices) + NumIndices, Name));
+}
+
+LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+                                  LLVMValueRef *Indices, unsigned NumIndices,
+                                  const char *Name) {
+  return wrap(unwrap(B)->CreateInBoundsGEP(unwrap(Pointer), unwrap(Indices),
+                                           unwrap(Indices) + NumIndices, Name));
+}
+
+LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
+                                unsigned Idx, const char *Name) {
+  return wrap(unwrap(B)->CreateStructGEP(unwrap(Pointer), Idx, Name));
+}
+
+LLVMValueRef LLVMBuildGlobalString(LLVMBuilderRef B, const char *Str,
+                                   const char *Name) {
+  return wrap(unwrap(B)->CreateGlobalString(Str, Name));
+}
+
+LLVMValueRef LLVMBuildGlobalStringPtr(LLVMBuilderRef B, const char *Str,
+                                      const char *Name) {
+  return wrap(unwrap(B)->CreateGlobalStringPtr(Str, Name));
+}
+
+/*--.. Casts ...............................................................--*/
+
+LLVMValueRef LLVMBuildTrunc(LLVMBuilderRef B, LLVMValueRef Val,
+                            LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateTrunc(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildZExt(LLVMBuilderRef B, LLVMValueRef Val,
+                           LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateZExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildSExt(LLVMBuilderRef B, LLVMValueRef Val,
+                           LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateSExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPToUI(LLVMBuilderRef B, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateFPToUI(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPToSI(LLVMBuilderRef B, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateFPToSI(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildUIToFP(LLVMBuilderRef B, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateUIToFP(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildSIToFP(LLVMBuilderRef B, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateSIToFP(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPTrunc(LLVMBuilderRef B, LLVMValueRef Val,
+                              LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateFPTrunc(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildFPExt(LLVMBuilderRef B, LLVMValueRef Val,
+                            LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateFPExt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildPtrToInt(LLVMBuilderRef B, LLVMValueRef Val,
+                               LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreatePtrToInt(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildIntToPtr(LLVMBuilderRef B, LLVMValueRef Val,
+                               LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateIntToPtr(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+                              LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateBitCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildZExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+                                    LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateZExtOrBitCast(unwrap(Val), unwrap(DestTy),
+                                             Name));
+}
+
+LLVMValueRef LLVMBuildSExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+                                    LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateSExtOrBitCast(unwrap(Val), unwrap(DestTy),
+                                             Name));
+}
+
+LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
+                                     LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateTruncOrBitCast(unwrap(Val), unwrap(DestTy),
+                                              Name));
+}
+
+LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val,
+                           LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateCast(Instruction::CastOps(Op), unwrap(Val),
+                                    unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val,
+                                  LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreatePointerCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef B, LLVMValueRef Val,
+                              LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy),
+                                       /*isSigned*/true, Name));
+}
+
+LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef B, LLVMValueRef Val,
+                             LLVMTypeRef DestTy, const char *Name) {
+  return wrap(unwrap(B)->CreateFPCast(unwrap(Val), unwrap(DestTy), Name));
+}
+
+/*--.. Comparisons .........................................................--*/
+
+LLVMValueRef LLVMBuildICmp(LLVMBuilderRef B, LLVMIntPredicate Op,
+                           LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateICmp(static_cast<ICmpInst::Predicate>(Op),
+                                    unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildFCmp(LLVMBuilderRef B, LLVMRealPredicate Op,
+                           LLVMValueRef LHS, LLVMValueRef RHS,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateFCmp(static_cast<FCmpInst::Predicate>(Op),
+                                    unwrap(LHS), unwrap(RHS), Name));
+}
+
+/*--.. Miscellaneous instructions ..........................................--*/
+
+LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) {
+  return wrap(unwrap(B)->CreatePHI(unwrap(Ty), Name));
+}
+
+LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn,
+                           LLVMValueRef *Args, unsigned NumArgs,
+                           const char *Name) {
+  return wrap(unwrap(B)->CreateCall(unwrap(Fn), unwrap(Args),
+                                    unwrap(Args) + NumArgs, Name));
+}
+
+LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If,
+                             LLVMValueRef Then, LLVMValueRef Else,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateSelect(unwrap(If), unwrap(Then), unwrap(Else),
+                                      Name));
+}
+
+LLVMValueRef LLVMBuildVAArg(LLVMBuilderRef B, LLVMValueRef List,
+                            LLVMTypeRef Ty, const char *Name) {
+  return wrap(unwrap(B)->CreateVAArg(unwrap(List), unwrap(Ty), Name));
+}
+
+LLVMValueRef LLVMBuildExtractElement(LLVMBuilderRef B, LLVMValueRef VecVal,
+                                      LLVMValueRef Index, const char *Name) {
+  return wrap(unwrap(B)->CreateExtractElement(unwrap(VecVal), unwrap(Index),
+                                              Name));
+}
+
+LLVMValueRef LLVMBuildInsertElement(LLVMBuilderRef B, LLVMValueRef VecVal,
+                                    LLVMValueRef EltVal, LLVMValueRef Index,
+                                    const char *Name) {
+  return wrap(unwrap(B)->CreateInsertElement(unwrap(VecVal), unwrap(EltVal),
+                                             unwrap(Index), Name));
+}
+
+LLVMValueRef LLVMBuildShuffleVector(LLVMBuilderRef B, LLVMValueRef V1,
+                                    LLVMValueRef V2, LLVMValueRef Mask,
+                                    const char *Name) {
+  return wrap(unwrap(B)->CreateShuffleVector(unwrap(V1), unwrap(V2),
+                                             unwrap(Mask), Name));
+}
+
+LLVMValueRef LLVMBuildExtractValue(LLVMBuilderRef B, LLVMValueRef AggVal,
+                                   unsigned Index, const char *Name) {
+  return wrap(unwrap(B)->CreateExtractValue(unwrap(AggVal), Index, Name));
+}
+
+LLVMValueRef LLVMBuildInsertValue(LLVMBuilderRef B, LLVMValueRef AggVal,
+                                  LLVMValueRef EltVal, unsigned Index,
+                                  const char *Name) {
+  return wrap(unwrap(B)->CreateInsertValue(unwrap(AggVal), unwrap(EltVal),
+                                           Index, Name));
+}
+
+LLVMValueRef LLVMBuildIsNull(LLVMBuilderRef B, LLVMValueRef Val,
+                             const char *Name) {
+  return wrap(unwrap(B)->CreateIsNull(unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef B, LLVMValueRef Val,
+                                const char *Name) {
+  return wrap(unwrap(B)->CreateIsNotNull(unwrap(Val), Name));
+}
+
+LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS,
+                              LLVMValueRef RHS, const char *Name) {
+  return wrap(unwrap(B)->CreatePtrDiff(unwrap(LHS), unwrap(RHS), Name));
+}
+
+
+/*===-- Module providers --------------------------------------------------===*/
+
+LLVMModuleProviderRef
+LLVMCreateModuleProviderForExistingModule(LLVMModuleRef M) {
+  return reinterpret_cast<LLVMModuleProviderRef>(M);
+}
+
+void LLVMDisposeModuleProvider(LLVMModuleProviderRef MP) {
+  delete unwrap(MP);
+}
+
+
+/*===-- Memory buffers ----------------------------------------------------===*/
+
+LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(
+    const char *Path,
+    LLVMMemoryBufferRef *OutMemBuf,
+    char **OutMessage) {
+
+  OwningPtr<MemoryBuffer> MB;
+  error_code ec;
+  if (!(ec = MemoryBuffer::getFile(Path, MB))) {
+    *OutMemBuf = wrap(MB.take());
+    return 0;
+  }
+
+  *OutMessage = strdup(ec.message().c_str());
+  return 1;
+}
+
+LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
+                                         char **OutMessage) {
+  OwningPtr<MemoryBuffer> MB;
+  error_code ec;
+  if (!(ec = MemoryBuffer::getSTDIN(MB))) {
+    *OutMemBuf = wrap(MB.take());
+    return 0;
+  }
+
+  *OutMessage = strdup(ec.message().c_str());
+  return 1;
+}
+
+void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
+  delete unwrap(MemBuf);
+}
+
+/*===-- Pass Registry -----------------------------------------------------===*/
+
+LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void) {
+  return wrap(PassRegistry::getPassRegistry());
+}
+
+/*===-- Pass Manager ------------------------------------------------------===*/
+
+LLVMPassManagerRef LLVMCreatePassManager() {
+  return wrap(new PassManager());
+}
+
+LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) {
+  return wrap(new FunctionPassManager(unwrap(M)));
+}
+
+LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
+  return LLVMCreateFunctionPassManagerForModule(
+                                            reinterpret_cast<LLVMModuleRef>(P));
+}
+
+LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
+  return unwrap<PassManager>(PM)->run(*unwrap(M));
+}
+
+LLVMBool LLVMInitializeFunctionPassManager(LLVMPassManagerRef FPM) {
+  return unwrap<FunctionPassManager>(FPM)->doInitialization();
+}
+
+LLVMBool LLVMRunFunctionPassManager(LLVMPassManagerRef FPM, LLVMValueRef F) {
+  return unwrap<FunctionPassManager>(FPM)->run(*unwrap<Function>(F));
+}
+
+LLVMBool LLVMFinalizeFunctionPassManager(LLVMPassManagerRef FPM) {
+  return unwrap<FunctionPassManager>(FPM)->doFinalization();
+}
+
+void LLVMDisposePassManager(LLVMPassManagerRef PM) {
+  delete unwrap(PM);
+}
diff --git a/final/lib/VMCore/DebugLoc.cpp b/final/lib/VMCore/DebugLoc.cpp
new file mode 100644
index 00000000000..f8b45eed0d5
--- /dev/null
+++ b/final/lib/VMCore/DebugLoc.cpp
@@ -0,0 +1,288 @@
+//===-- DebugLoc.cpp - Implement DebugLoc class ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DebugLoc.h"
+#include "LLVMContextImpl.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DebugLoc Implementation
+//===----------------------------------------------------------------------===//
+
+MDNode *DebugLoc::getScope(const LLVMContext &Ctx) const {
+  if (ScopeIdx == 0) return 0;
+  
+  if (ScopeIdx > 0) {
+    // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+    // position specified.
+    assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() &&
+           "Invalid ScopeIdx!");
+    return Ctx.pImpl->ScopeRecords[ScopeIdx-1].get();
+  }
+  
+  // Otherwise, the index is in the ScopeInlinedAtRecords array.
+  assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+         "Invalid ScopeIdx");
+  return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get();
+}
+
+MDNode *DebugLoc::getInlinedAt(const LLVMContext &Ctx) const {
+  // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+  // position specified.  Zero is invalid.
+  if (ScopeIdx >= 0) return 0;
+  
+  // Otherwise, the index is in the ScopeInlinedAtRecords array.
+  assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+         "Invalid ScopeIdx");
+  return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].second.get();
+}
+
+/// Return both the Scope and the InlinedAt values.
+void DebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA,
+                                    const LLVMContext &Ctx) const {
+  if (ScopeIdx == 0) {
+    Scope = IA = 0;
+    return;
+  }
+  
+  if (ScopeIdx > 0) {
+    // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+    // position specified.
+    assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() &&
+           "Invalid ScopeIdx!");
+    Scope = Ctx.pImpl->ScopeRecords[ScopeIdx-1].get();
+    IA = 0;
+    return;
+  }
+  
+  // Otherwise, the index is in the ScopeInlinedAtRecords array.
+  assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+         "Invalid ScopeIdx");
+  Scope = Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get();
+  IA    = Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].second.get();
+}
+
+
+DebugLoc DebugLoc::get(unsigned Line, unsigned Col,
+                       MDNode *Scope, MDNode *InlinedAt) {
+  DebugLoc Result;
+  
+  // If no scope is available, this is an unknown location.
+  if (Scope == 0) return Result;
+  
+  // Saturate line and col to "unknown".
+  if (Col > 255) Col = 0;
+  if (Line >= (1 << 24)) Line = 0;
+  Result.LineCol = Line | (Col << 24);
+  
+  LLVMContext &Ctx = Scope->getContext();
+  
+  // If there is no inlined-at location, use the ScopeRecords array.
+  if (InlinedAt == 0)
+    Result.ScopeIdx = Ctx.pImpl->getOrAddScopeRecordIdxEntry(Scope, 0);
+  else
+    Result.ScopeIdx = Ctx.pImpl->getOrAddScopeInlinedAtIdxEntry(Scope,
+                                                                InlinedAt, 0);
+
+  return Result;
+}
+
+/// getAsMDNode - This method converts the compressed DebugLoc node into a
+/// DILocation compatible MDNode.
+MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const {
+  if (isUnknown()) return 0;
+  
+  MDNode *Scope, *IA;
+  getScopeAndInlinedAt(Scope, IA, Ctx);
+  assert(Scope && "If scope is null, this should be isUnknown()");
+  
+  LLVMContext &Ctx2 = Scope->getContext();
+  const Type *Int32 = Type::getInt32Ty(Ctx2);
+  Value *Elts[] = {
+    ConstantInt::get(Int32, getLine()), ConstantInt::get(Int32, getCol()),
+    Scope, IA
+  };
+  return MDNode::get(Ctx2, &Elts[0], 4);
+}
+
+/// getFromDILocation - Translate the DILocation quad into a DebugLoc.
+DebugLoc DebugLoc::getFromDILocation(MDNode *N) {
+  if (N == 0 || N->getNumOperands() != 4) return DebugLoc();
+  
+  MDNode *Scope = dyn_cast_or_null<MDNode>(N->getOperand(2));
+  if (Scope == 0) return DebugLoc();
+  
+  unsigned LineNo = 0, ColNo = 0;
+  if (ConstantInt *Line = dyn_cast_or_null<ConstantInt>(N->getOperand(0)))
+    LineNo = Line->getZExtValue();
+  if (ConstantInt *Col = dyn_cast_or_null<ConstantInt>(N->getOperand(1)))
+    ColNo = Col->getZExtValue();
+  
+  return get(LineNo, ColNo, Scope, dyn_cast_or_null<MDNode>(N->getOperand(3)));
+}
+
+//===----------------------------------------------------------------------===//
+// LLVMContextImpl Implementation
+//===----------------------------------------------------------------------===//
+
+int LLVMContextImpl::getOrAddScopeRecordIdxEntry(MDNode *Scope,
+                                                 int ExistingIdx) {
+  // If we already have an entry for this scope, return it.
+  int &Idx = ScopeRecordIdx[Scope];
+  if (Idx) return Idx;
+  
+  // If we don't have an entry, but ExistingIdx is specified, use it.
+  if (ExistingIdx)
+    return Idx = ExistingIdx;
+  
+  // Otherwise add a new entry.
+  
+  // Start out ScopeRecords with a minimal reasonable size to avoid
+  // excessive reallocation starting out.
+  if (ScopeRecords.empty())
+    ScopeRecords.reserve(128);
+  
+  // Index is biased by 1 for index.
+  Idx = ScopeRecords.size()+1;
+  ScopeRecords.push_back(DebugRecVH(Scope, this, Idx));
+  return Idx;
+}
+
+int LLVMContextImpl::getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,
+                                                    int ExistingIdx) {
+  // If we already have an entry, return it.
+  int &Idx = ScopeInlinedAtIdx[std::make_pair(Scope, IA)];
+  if (Idx) return Idx;
+  
+  // If we don't have an entry, but ExistingIdx is specified, use it.
+  if (ExistingIdx)
+    return Idx = ExistingIdx;
+  
+  // Start out ScopeInlinedAtRecords with a minimal reasonable size to avoid
+  // excessive reallocation starting out.
+  if (ScopeInlinedAtRecords.empty())
+    ScopeInlinedAtRecords.reserve(128);
+    
+  // Index is biased by 1 and negated.
+  Idx = -ScopeInlinedAtRecords.size()-1;
+  ScopeInlinedAtRecords.push_back(std::make_pair(DebugRecVH(Scope, this, Idx),
+                                                 DebugRecVH(IA, this, Idx)));
+  return Idx;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DebugRecVH Implementation
+//===----------------------------------------------------------------------===//
+
+/// deleted - The MDNode this is pointing to got deleted, so this pointer needs
+/// to drop to null and we need remove our entry from the DenseMap.
+void DebugRecVH::deleted() {
+  // If this is a  non-canonical reference, just drop the value to null, we know
+  // it doesn't have a map entry.
+  if (Idx == 0) {
+    setValPtr(0);
+    return;
+  }
+    
+  MDNode *Cur = get();
+  
+  // If the index is positive, it is an entry in ScopeRecords.
+  if (Idx > 0) {
+    assert(Ctx->ScopeRecordIdx[Cur] == Idx && "Mapping out of date!");
+    Ctx->ScopeRecordIdx.erase(Cur);
+    // Reset this VH to null and we're done.
+    setValPtr(0);
+    Idx = 0;
+    return;
+  }
+  
+  // Otherwise, it is an entry in ScopeInlinedAtRecords, we don't know if it
+  // is the scope or the inlined-at record entry.
+  assert(unsigned(-Idx-1) < Ctx->ScopeInlinedAtRecords.size());
+  std::pair<DebugRecVH, DebugRecVH> &Entry = Ctx->ScopeInlinedAtRecords[-Idx-1];
+  assert((this == &Entry.first || this == &Entry.second) &&
+         "Mapping out of date!");
+  
+  MDNode *OldScope = Entry.first.get();
+  MDNode *OldInlinedAt = Entry.second.get();
+  assert(OldScope != 0 && OldInlinedAt != 0 &&
+         "Entry should be non-canonical if either val dropped to null");
+
+  // Otherwise, we do have an entry in it, nuke it and we're done.
+  assert(Ctx->ScopeInlinedAtIdx[std::make_pair(OldScope, OldInlinedAt)] == Idx&&
+         "Mapping out of date");
+  Ctx->ScopeInlinedAtIdx.erase(std::make_pair(OldScope, OldInlinedAt));
+  
+  // Reset this VH to null.  Drop both 'Idx' values to null to indicate that
+  // we're in non-canonical form now.
+  setValPtr(0);
+  Entry.first.Idx = Entry.second.Idx = 0;
+}
+
+void DebugRecVH::allUsesReplacedWith(Value *NewVa) {
+  // If being replaced with a non-mdnode value (e.g. undef) handle this as if
+  // the mdnode got deleted.
+  MDNode *NewVal = dyn_cast<MDNode>(NewVa);
+  if (NewVal == 0) return deleted();
+  
+  // If this is a non-canonical reference, just change it, we know it already
+  // doesn't have a map entry.
+  if (Idx == 0) {
+    setValPtr(NewVa);
+    return;
+  }
+  
+  MDNode *OldVal = get();
+  assert(OldVal != NewVa && "Node replaced with self?");
+  
+  // If the index is positive, it is an entry in ScopeRecords.
+  if (Idx > 0) {
+    assert(Ctx->ScopeRecordIdx[OldVal] == Idx && "Mapping out of date!");
+    Ctx->ScopeRecordIdx.erase(OldVal);
+    setValPtr(NewVal);
+
+    int NewEntry = Ctx->getOrAddScopeRecordIdxEntry(NewVal, Idx);
+    
+    // If NewVal already has an entry, this becomes a non-canonical reference,
+    // just drop Idx to 0 to signify this.
+    if (NewEntry != Idx)
+      Idx = 0;
+    return;
+  }
+  
+  // Otherwise, it is an entry in ScopeInlinedAtRecords, we don't know if it
+  // is the scope or the inlined-at record entry.
+  assert(unsigned(-Idx-1) < Ctx->ScopeInlinedAtRecords.size());
+  std::pair<DebugRecVH, DebugRecVH> &Entry = Ctx->ScopeInlinedAtRecords[-Idx-1];
+  assert((this == &Entry.first || this == &Entry.second) &&
+         "Mapping out of date!");
+  
+  MDNode *OldScope = Entry.first.get();
+  MDNode *OldInlinedAt = Entry.second.get();
+  assert(OldScope != 0 && OldInlinedAt != 0 &&
+         "Entry should be non-canonical if either val dropped to null");
+  
+  // Otherwise, we do have an entry in it, nuke it and we're done.
+  assert(Ctx->ScopeInlinedAtIdx[std::make_pair(OldScope, OldInlinedAt)] == Idx&&
+         "Mapping out of date");
+  Ctx->ScopeInlinedAtIdx.erase(std::make_pair(OldScope, OldInlinedAt));
+  
+  // Reset this VH to the new value.
+  setValPtr(NewVal);
+
+  int NewIdx = Ctx->getOrAddScopeInlinedAtIdxEntry(Entry.first.get(),
+                                                   Entry.second.get(), Idx);
+  // If NewVal already has an entry, this becomes a non-canonical reference,
+  // just drop Idx to 0 to signify this.
+  if (NewIdx != Idx) {
+    std::pair<DebugRecVH, DebugRecVH> &Entry=Ctx->ScopeInlinedAtRecords[-Idx-1];
+    Entry.first.Idx = Entry.second.Idx = 0;
+  }
+}
diff --git a/final/lib/VMCore/Dominators.cpp b/final/lib/VMCore/Dominators.cpp
new file mode 100644
index 00000000000..c374b067d72
--- /dev/null
+++ b/final/lib/VMCore/Dominators.cpp
@@ -0,0 +1,107 @@
+//===- Dominators.cpp - Dominator Calculation -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements simple dominator construction algorithms for finding
+// forward dominators.  Postdominators are available in libanalysis, but are not
+// included in libvmcore, because it's not needed.  Forward dominators are
+// needed to support the Verifier pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/DominatorInternals.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+using namespace llvm;
+
+// Always verify dominfo if expensive checking is enabled.
+#ifdef XDEBUG
+static bool VerifyDomInfo = true;
+#else
+static bool VerifyDomInfo = false;
+#endif
+static cl::opt<bool,true>
+VerifyDomInfoX("verify-dom-info", cl::location(VerifyDomInfo),
+               cl::desc("Verify dominator info (time consuming)"));
+
+//===----------------------------------------------------------------------===//
+//  DominatorTree Implementation
+//===----------------------------------------------------------------------===//
+//
+// Provide public access to DominatorTree information.  Implementation details
+// can be found in DominatorInternals.h.
+//
+//===----------------------------------------------------------------------===//
+
+TEMPLATE_INSTANTIATION(class llvm::DomTreeNodeBase<BasicBlock>);
+TEMPLATE_INSTANTIATION(class llvm::DominatorTreeBase<BasicBlock>);
+
+char DominatorTree::ID = 0;
+INITIALIZE_PASS(DominatorTree, "domtree",
+                "Dominator Tree Construction", true, true)
+
+bool DominatorTree::runOnFunction(Function &F) {
+  DT->recalculate(F);
+  return false;
+}
+
+void DominatorTree::verifyAnalysis() const {
+  if (!VerifyDomInfo) return;
+
+  Function &F = *getRoot()->getParent();
+
+  DominatorTree OtherDT;
+  OtherDT.getBase().recalculate(F);
+  if (compare(OtherDT)) {
+    errs() << "DominatorTree is not up to date!  Computed:\n";
+    print(errs());
+    
+    errs() << "\nActual:\n";
+    OtherDT.print(errs());
+    abort();
+  }
+}
+
+void DominatorTree::print(raw_ostream &OS, const Module *) const {
+  DT->print(OS);
+}
+
+// dominates - Return true if A dominates a use in B. This performs the
+// special checks necessary if A and B are in the same basic block.
+bool DominatorTree::dominates(const Instruction *A, const Instruction *B) const{
+  const BasicBlock *BBA = A->getParent(), *BBB = B->getParent();
+  
+  // If A is an invoke instruction, its value is only available in this normal
+  // successor block.
+  if (const InvokeInst *II = dyn_cast<InvokeInst>(A))
+    BBA = II->getNormalDest();
+  
+  if (BBA != BBB) return dominates(BBA, BBB);
+  
+  // It is not possible to determine dominance between two PHI nodes 
+  // based on their ordering.
+  if (isa<PHINode>(A) && isa<PHINode>(B)) 
+    return false;
+  
+  // Loop through the basic block until we find A or B.
+  BasicBlock::const_iterator I = BBA->begin();
+  for (; &*I != A && &*I != B; ++I)
+    /*empty*/;
+  
+  return &*I == A;
+}
diff --git a/final/lib/VMCore/Function.cpp b/final/lib/VMCore/Function.cpp
new file mode 100644
index 00000000000..00d1d787324
--- /dev/null
+++ b/final/lib/VMCore/Function.cpp
@@ -0,0 +1,409 @@
+//===-- Function.cpp - Implement the Global object classes ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Function class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Module.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/StringPool.h"
+#include "llvm/Support/RWMutex.h"
+#include "llvm/Support/Threading.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+
+// Explicit instantiations of SymbolTableListTraits since some of the methods
+// are not in the public header file...
+template class llvm::SymbolTableListTraits<Argument, Function>;
+template class llvm::SymbolTableListTraits<BasicBlock, Function>;
+
+//===----------------------------------------------------------------------===//
+// Argument Implementation
+//===----------------------------------------------------------------------===//
+
+Argument::Argument(const Type *Ty, const Twine &Name, Function *Par)
+  : Value(Ty, Value::ArgumentVal) {
+  Parent = 0;
+
+  // Make sure that we get added to a function
+  LeakDetector::addGarbageObject(this);
+
+  if (Par)
+    Par->getArgumentList().push_back(this);
+  setName(Name);
+}
+
+void Argument::setParent(Function *parent) {
+  if (getParent())
+    LeakDetector::addGarbageObject(this);
+  Parent = parent;
+  if (getParent())
+    LeakDetector::removeGarbageObject(this);
+}
+
+/// getArgNo - Return the index of this formal argument in its containing
+/// function.  For example in "void foo(int a, float b)" a is 0 and b is 1. 
+unsigned Argument::getArgNo() const {
+  const Function *F = getParent();
+  assert(F && "Argument is not in a function");
+  
+  Function::const_arg_iterator AI = F->arg_begin();
+  unsigned ArgIdx = 0;
+  for (; &*AI != this; ++AI)
+    ++ArgIdx;
+
+  return ArgIdx;
+}
+
+/// hasByValAttr - Return true if this argument has the byval attribute on it
+/// in its containing function.
+bool Argument::hasByValAttr() const {
+  if (!getType()->isPointerTy()) return false;
+  return getParent()->paramHasAttr(getArgNo()+1, Attribute::ByVal);
+}
+
+/// hasNestAttr - Return true if this argument has the nest attribute on
+/// it in its containing function.
+bool Argument::hasNestAttr() const {
+  if (!getType()->isPointerTy()) return false;
+  return getParent()->paramHasAttr(getArgNo()+1, Attribute::Nest);
+}
+
+/// hasNoAliasAttr - Return true if this argument has the noalias attribute on
+/// it in its containing function.
+bool Argument::hasNoAliasAttr() const {
+  if (!getType()->isPointerTy()) return false;
+  return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoAlias);
+}
+
+/// hasNoCaptureAttr - Return true if this argument has the nocapture attribute
+/// on it in its containing function.
+bool Argument::hasNoCaptureAttr() const {
+  if (!getType()->isPointerTy()) return false;
+  return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoCapture);
+}
+
+/// hasSRetAttr - Return true if this argument has the sret attribute on
+/// it in its containing function.
+bool Argument::hasStructRetAttr() const {
+  if (!getType()->isPointerTy()) return false;
+  if (this != getParent()->arg_begin())
+    return false; // StructRet param must be first param
+  return getParent()->paramHasAttr(1, Attribute::StructRet);
+}
+
+/// addAttr - Add a Attribute to an argument
+void Argument::addAttr(Attributes attr) {
+  getParent()->addAttribute(getArgNo() + 1, attr);
+}
+
+/// removeAttr - Remove a Attribute from an argument
+void Argument::removeAttr(Attributes attr) {
+  getParent()->removeAttribute(getArgNo() + 1, attr);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Helper Methods in Function
+//===----------------------------------------------------------------------===//
+
+LLVMContext &Function::getContext() const {
+  return getType()->getContext();
+}
+
+const FunctionType *Function::getFunctionType() const {
+  return cast<FunctionType>(getType()->getElementType());
+}
+
+bool Function::isVarArg() const {
+  return getFunctionType()->isVarArg();
+}
+
+const Type *Function::getReturnType() const {
+  return getFunctionType()->getReturnType();
+}
+
+void Function::removeFromParent() {
+  getParent()->getFunctionList().remove(this);
+}
+
+void Function::eraseFromParent() {
+  getParent()->getFunctionList().erase(this);
+}
+
+//===----------------------------------------------------------------------===//
+// Function Implementation
+//===----------------------------------------------------------------------===//
+
+Function::Function(const FunctionType *Ty, LinkageTypes Linkage,
+                   const Twine &name, Module *ParentModule)
+  : GlobalValue(PointerType::getUnqual(Ty), 
+                Value::FunctionVal, 0, 0, Linkage, name) {
+  assert(FunctionType::isValidReturnType(getReturnType()) &&
+         !getReturnType()->isOpaqueTy() && "invalid return type");
+  SymTab = new ValueSymbolTable();
+
+  // If the function has arguments, mark them as lazily built.
+  if (Ty->getNumParams())
+    setValueSubclassData(1);   // Set the "has lazy arguments" bit.
+  
+  // Make sure that we get added to a function
+  LeakDetector::addGarbageObject(this);
+
+  if (ParentModule)
+    ParentModule->getFunctionList().push_back(this);
+
+  // Ensure intrinsics have the right parameter attributes.
+  if (unsigned IID = getIntrinsicID())
+    setAttributes(Intrinsic::getAttributes(Intrinsic::ID(IID)));
+
+}
+
+Function::~Function() {
+  dropAllReferences();    // After this it is safe to delete instructions.
+
+  // Delete all of the method arguments and unlink from symbol table...
+  ArgumentList.clear();
+  delete SymTab;
+
+  // Remove the function from the on-the-side GC table.
+  clearGC();
+}
+
+void Function::BuildLazyArguments() const {
+  // Create the arguments vector, all arguments start out unnamed.
+  const FunctionType *FT = getFunctionType();
+  for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) {
+    assert(!FT->getParamType(i)->isVoidTy() &&
+           "Cannot have void typed arguments!");
+    ArgumentList.push_back(new Argument(FT->getParamType(i)));
+  }
+  
+  // Clear the lazy arguments bit.
+  unsigned SDC = getSubclassDataFromValue();
+  const_cast<Function*>(this)->setValueSubclassData(SDC &= ~1);
+}
+
+size_t Function::arg_size() const {
+  return getFunctionType()->getNumParams();
+}
+bool Function::arg_empty() const {
+  return getFunctionType()->getNumParams() == 0;
+}
+
+void Function::setParent(Module *parent) {
+  if (getParent())
+    LeakDetector::addGarbageObject(this);
+  Parent = parent;
+  if (getParent())
+    LeakDetector::removeGarbageObject(this);
+}
+
+// dropAllReferences() - This function causes all the subinstructions to "let
+// go" of all references that they are maintaining.  This allows one to
+// 'delete' a whole class at a time, even though there may be circular
+// references... first all references are dropped, and all use counts go to
+// zero.  Then everything is deleted for real.  Note that no operations are
+// valid on an object that has "dropped all references", except operator
+// delete.
+//
+void Function::dropAllReferences() {
+  for (iterator I = begin(), E = end(); I != E; ++I)
+    I->dropAllReferences();
+  
+  // Delete all basic blocks. They are now unused, except possibly by
+  // blockaddresses, but BasicBlock's destructor takes care of those.
+  while (!BasicBlocks.empty())
+    BasicBlocks.begin()->eraseFromParent();
+}
+
+void Function::addAttribute(unsigned i, Attributes attr) {
+  AttrListPtr PAL = getAttributes();
+  PAL = PAL.addAttr(i, attr);
+  setAttributes(PAL);
+}
+
+void Function::removeAttribute(unsigned i, Attributes attr) {
+  AttrListPtr PAL = getAttributes();
+  PAL = PAL.removeAttr(i, attr);
+  setAttributes(PAL);
+}
+
+// Maintain the GC name for each function in an on-the-side table. This saves
+// allocating an additional word in Function for programs which do not use GC
+// (i.e., most programs) at the cost of increased overhead for clients which do
+// use GC.
+static DenseMap<const Function*,PooledStringPtr> *GCNames;
+static StringPool *GCNamePool;
+static ManagedStatic<sys::SmartRWMutex<true> > GCLock;
+
+bool Function::hasGC() const {
+  sys::SmartScopedReader<true> Reader(*GCLock);
+  return GCNames && GCNames->count(this);
+}
+
+const char *Function::getGC() const {
+  assert(hasGC() && "Function has no collector");
+  sys::SmartScopedReader<true> Reader(*GCLock);
+  return *(*GCNames)[this];
+}
+
+void Function::setGC(const char *Str) {
+  sys::SmartScopedWriter<true> Writer(*GCLock);
+  if (!GCNamePool)
+    GCNamePool = new StringPool();
+  if (!GCNames)
+    GCNames = new DenseMap<const Function*,PooledStringPtr>();
+  (*GCNames)[this] = GCNamePool->intern(Str);
+}
+
+void Function::clearGC() {
+  sys::SmartScopedWriter<true> Writer(*GCLock);
+  if (GCNames) {
+    GCNames->erase(this);
+    if (GCNames->empty()) {
+      delete GCNames;
+      GCNames = 0;
+      if (GCNamePool->empty()) {
+        delete GCNamePool;
+        GCNamePool = 0;
+      }
+    }
+  }
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a Function) from the Function Src to this one.
+void Function::copyAttributesFrom(const GlobalValue *Src) {
+  assert(isa<Function>(Src) && "Expected a Function!");
+  GlobalValue::copyAttributesFrom(Src);
+  const Function *SrcF = cast<Function>(Src);
+  setCallingConv(SrcF->getCallingConv());
+  setAttributes(SrcF->getAttributes());
+  if (SrcF->hasGC())
+    setGC(SrcF->getGC());
+  else
+    clearGC();
+}
+
+/// getIntrinsicID - This method returns the ID number of the specified
+/// function, or Intrinsic::not_intrinsic if the function is not an
+/// intrinsic, or if the pointer is null.  This value is always defined to be
+/// zero to allow easy checking for whether a function is intrinsic or not.  The
+/// particular intrinsic functions which correspond to this value are defined in
+/// llvm/Intrinsics.h.
+///
+unsigned Function::getIntrinsicID() const {
+  const ValueName *ValName = this->getValueName();
+  if (!ValName)
+    return 0;
+  unsigned Len = ValName->getKeyLength();
+  const char *Name = ValName->getKeyData();
+  
+  if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l'
+      || Name[2] != 'v' || Name[3] != 'm')
+    return 0;  // All intrinsics start with 'llvm.'
+
+#define GET_FUNCTION_RECOGNIZER
+#include "llvm/Intrinsics.gen"
+#undef GET_FUNCTION_RECOGNIZER
+  return 0;
+}
+
+std::string Intrinsic::getName(ID id, const Type **Tys, unsigned numTys) { 
+  assert(id < num_intrinsics && "Invalid intrinsic ID!");
+  const char * const Table[] = {
+    "not_intrinsic",
+#define GET_INTRINSIC_NAME_TABLE
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_NAME_TABLE
+  };
+  if (numTys == 0)
+    return Table[id];
+  std::string Result(Table[id]);
+  for (unsigned i = 0; i < numTys; ++i) {
+    if (const PointerType* PTyp = dyn_cast<PointerType>(Tys[i])) {
+      Result += ".p" + llvm::utostr(PTyp->getAddressSpace()) + 
+                EVT::getEVT(PTyp->getElementType()).getEVTString();
+    }
+    else if (Tys[i])
+      Result += "." + EVT::getEVT(Tys[i]).getEVTString();
+  }
+  return Result;
+}
+
+const FunctionType *Intrinsic::getType(LLVMContext &Context,
+                                       ID id, const Type **Tys, 
+                                       unsigned numTys) {
+  const Type *ResultTy = NULL;
+  std::vector<const Type*> ArgTys;
+  bool IsVarArg = false;
+  
+#define GET_INTRINSIC_GENERATOR
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_GENERATOR
+
+  return FunctionType::get(ResultTy, ArgTys, IsVarArg); 
+}
+
+bool Intrinsic::isOverloaded(ID id) {
+  const bool OTable[] = {
+    false,
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+  };
+  return OTable[id];
+}
+
+/// This defines the "Intrinsic::getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+Function *Intrinsic::getDeclaration(Module *M, ID id, const Type **Tys, 
+                                    unsigned numTys) {
+  // There can never be multiple globals with the same name of different types,
+  // because intrinsics must be a specific type.
+  return
+    cast<Function>(M->getOrInsertFunction(getName(id, Tys, numTys),
+                                          getType(M->getContext(),
+                                                  id, Tys, numTys)));
+}
+
+// This defines the "Intrinsic::getIntrinsicForGCCBuiltin()" method.
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "llvm/Intrinsics.gen"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+
+/// hasAddressTaken - returns true if there are any uses of this function
+/// other than direct calls or invokes to it.
+bool Function::hasAddressTaken(const User* *PutOffender) const {
+  for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
+    const User *U = *I;
+    if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
+      return PutOffender ? (*PutOffender = U, true) : true;
+    ImmutableCallSite CS(cast<Instruction>(U));
+    if (!CS.isCallee(I))
+      return PutOffender ? (*PutOffender = U, true) : true;
+  }
+  return false;
+}
+
+// vim: sw=2 ai
diff --git a/final/lib/VMCore/GVMaterializer.cpp b/final/lib/VMCore/GVMaterializer.cpp
new file mode 100644
index 00000000000..f77a9c908d5
--- /dev/null
+++ b/final/lib/VMCore/GVMaterializer.cpp
@@ -0,0 +1,18 @@
+//===-- GVMaterializer.cpp - Base implementation for GV materializers -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Minimal implementation of the abstract interface for materializing
+// GlobalValues.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/GVMaterializer.h"
+using namespace llvm;
+
+GVMaterializer::~GVMaterializer() {}
diff --git a/final/lib/VMCore/Globals.cpp b/final/lib/VMCore/Globals.cpp
new file mode 100644
index 00000000000..60000ad1b50
--- /dev/null
+++ b/final/lib/VMCore/Globals.cpp
@@ -0,0 +1,261 @@
+//===-- Globals.cpp - Implement the GlobalValue & GlobalVariable class ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the GlobalValue & GlobalVariable classes for the VMCore
+// library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LeakDetector.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                            GlobalValue Class
+//===----------------------------------------------------------------------===//
+
+bool GlobalValue::isMaterializable() const {
+  return getParent() && getParent()->isMaterializable(this);
+}
+bool GlobalValue::isDematerializable() const {
+  return getParent() && getParent()->isDematerializable(this);
+}
+bool GlobalValue::Materialize(std::string *ErrInfo) {
+  return getParent()->Materialize(this, ErrInfo);
+}
+void GlobalValue::Dematerialize() {
+  getParent()->Dematerialize(this);
+}
+
+/// Override destroyConstant to make sure it doesn't get called on
+/// GlobalValue's because they shouldn't be treated like other constants.
+void GlobalValue::destroyConstant() {
+  llvm_unreachable("You can't GV->destroyConstant()!");
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a GlobalValue) from the GlobalValue Src to this one.
+void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
+  setAlignment(Src->getAlignment());
+  setSection(Src->getSection());
+  setVisibility(Src->getVisibility());
+}
+
+void GlobalValue::setAlignment(unsigned Align) {
+  assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+  assert(Align <= MaximumAlignment &&
+         "Alignment is greater than MaximumAlignment!");
+  Alignment = Log2_32(Align) + 1;
+  assert(getAlignment() == Align && "Alignment representation error!");
+}
+  
+//===----------------------------------------------------------------------===//
+// GlobalVariable Implementation
+//===----------------------------------------------------------------------===//
+
+GlobalVariable::GlobalVariable(const Type *Ty, bool constant, LinkageTypes Link,
+                               Constant *InitVal, const Twine &Name,
+                               bool ThreadLocal, unsigned AddressSpace)
+  : GlobalValue(PointerType::get(Ty, AddressSpace), 
+                Value::GlobalVariableVal,
+                OperandTraits<GlobalVariable>::op_begin(this),
+                InitVal != 0, Link, Name),
+    isConstantGlobal(constant), isThreadLocalSymbol(ThreadLocal) {
+  if (InitVal) {
+    assert(InitVal->getType() == Ty &&
+           "Initializer should be the same type as the GlobalVariable!");
+    Op<0>() = InitVal;
+  }
+
+  LeakDetector::addGarbageObject(this);
+}
+
+GlobalVariable::GlobalVariable(Module &M, const Type *Ty, bool constant,
+                               LinkageTypes Link, Constant *InitVal,
+                               const Twine &Name,
+                               GlobalVariable *Before, bool ThreadLocal,
+                               unsigned AddressSpace)
+  : GlobalValue(PointerType::get(Ty, AddressSpace), 
+                Value::GlobalVariableVal,
+                OperandTraits<GlobalVariable>::op_begin(this),
+                InitVal != 0, Link, Name),
+    isConstantGlobal(constant), isThreadLocalSymbol(ThreadLocal) {
+  if (InitVal) {
+    assert(InitVal->getType() == Ty &&
+           "Initializer should be the same type as the GlobalVariable!");
+    Op<0>() = InitVal;
+  }
+  
+  LeakDetector::addGarbageObject(this);
+  
+  if (Before)
+    Before->getParent()->getGlobalList().insert(Before, this);
+  else
+    M.getGlobalList().push_back(this);
+}
+
+void GlobalVariable::setParent(Module *parent) {
+  if (getParent())
+    LeakDetector::addGarbageObject(this);
+  Parent = parent;
+  if (getParent())
+    LeakDetector::removeGarbageObject(this);
+}
+
+void GlobalVariable::removeFromParent() {
+  getParent()->getGlobalList().remove(this);
+}
+
+void GlobalVariable::eraseFromParent() {
+  getParent()->getGlobalList().erase(this);
+}
+
+void GlobalVariable::replaceUsesOfWithOnConstant(Value *From, Value *To,
+                                                 Use *U) {
+  // If you call this, then you better know this GVar has a constant
+  // initializer worth replacing. Enforce that here.
+  assert(getNumOperands() == 1 &&
+         "Attempt to replace uses of Constants on a GVar with no initializer");
+
+  // And, since you know it has an initializer, the From value better be
+  // the initializer :)
+  assert(getOperand(0) == From &&
+         "Attempt to replace wrong constant initializer in GVar");
+
+  // And, you better have a constant for the replacement value
+  assert(isa<Constant>(To) &&
+         "Attempt to replace GVar initializer with non-constant");
+
+  // Okay, preconditions out of the way, replace the constant initializer.
+  this->setOperand(0, cast<Constant>(To));
+}
+
+void GlobalVariable::setInitializer(Constant *InitVal) {
+  if (InitVal == 0) {
+    if (hasInitializer()) {
+      Op<0>().set(0);
+      NumOperands = 0;
+    }
+  } else {
+    assert(InitVal->getType() == getType()->getElementType() &&
+           "Initializer type must match GlobalVariable type");
+    if (!hasInitializer())
+      NumOperands = 1;
+    Op<0>().set(InitVal);
+  }
+}
+
+/// copyAttributesFrom - copy all additional attributes (those not needed to
+/// create a GlobalVariable) from the GlobalVariable Src to this one.
+void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
+  assert(isa<GlobalVariable>(Src) && "Expected a GlobalVariable!");
+  GlobalValue::copyAttributesFrom(Src);
+  const GlobalVariable *SrcVar = cast<GlobalVariable>(Src);
+  setThreadLocal(SrcVar->isThreadLocal());
+}
+
+
+//===----------------------------------------------------------------------===//
+// GlobalAlias Implementation
+//===----------------------------------------------------------------------===//
+
+GlobalAlias::GlobalAlias(const Type *Ty, LinkageTypes Link,
+                         const Twine &Name, Constant* aliasee,
+                         Module *ParentModule)
+  : GlobalValue(Ty, Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) {
+  LeakDetector::addGarbageObject(this);
+
+  if (aliasee)
+    assert(aliasee->getType() == Ty && "Alias and aliasee types should match!");
+  Op<0>() = aliasee;
+
+  if (ParentModule)
+    ParentModule->getAliasList().push_back(this);
+}
+
+void GlobalAlias::setParent(Module *parent) {
+  if (getParent())
+    LeakDetector::addGarbageObject(this);
+  Parent = parent;
+  if (getParent())
+    LeakDetector::removeGarbageObject(this);
+}
+
+void GlobalAlias::removeFromParent() {
+  getParent()->getAliasList().remove(this);
+}
+
+void GlobalAlias::eraseFromParent() {
+  getParent()->getAliasList().erase(this);
+}
+
+bool GlobalAlias::isDeclaration() const {
+  const GlobalValue* AV = getAliasedGlobal();
+  if (AV)
+    return AV->isDeclaration();
+  else
+    return false;
+}
+
+void GlobalAlias::setAliasee(Constant *Aliasee) 
+{
+  if (Aliasee)
+    assert(Aliasee->getType() == getType() &&
+           "Alias and aliasee types should match!");
+  
+  setOperand(0, Aliasee);
+}
+
+const GlobalValue *GlobalAlias::getAliasedGlobal() const {
+  const Constant *C = getAliasee();
+  if (C) {
+    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+      return GV;
+    else {
+      const ConstantExpr *CE = 0;
+      if ((CE = dyn_cast<ConstantExpr>(C)) &&
+          (CE->getOpcode() == Instruction::BitCast || 
+           CE->getOpcode() == Instruction::GetElementPtr))
+        return dyn_cast<GlobalValue>(CE->getOperand(0));
+      else
+        llvm_unreachable("Unsupported aliasee");
+    }
+  }
+  return 0;
+}
+
+const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const {
+  SmallPtrSet<const GlobalValue*, 3> Visited;
+
+  // Check if we need to stop early.
+  if (stopOnWeak && mayBeOverridden())
+    return this;
+
+  const GlobalValue *GV = getAliasedGlobal();
+  Visited.insert(GV);
+
+  // Iterate over aliasing chain, stopping on weak alias if necessary.
+  while (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) {
+    if (stopOnWeak && GA->mayBeOverridden())
+      break;
+
+    GV = GA->getAliasedGlobal();
+
+    if (!Visited.insert(GV))
+      return NULL;
+  }
+
+  return GV;
+}
diff --git a/final/lib/VMCore/IRBuilder.cpp b/final/lib/VMCore/IRBuilder.cpp
new file mode 100644
index 00000000000..595dea470bc
--- /dev/null
+++ b/final/lib/VMCore/IRBuilder.cpp
@@ -0,0 +1,119 @@
+//===---- IRBuilder.cpp - Builder for LLVM Instrs -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the IRBuilder class, which is used as a convenient way
+// to create LLVM instructions with a consistent and simplified interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/LLVMContext.h"
+using namespace llvm;
+
+/// CreateGlobalString - Make a new global variable with an initializer that
+/// has array of i8 type filled in with the nul terminated string value
+/// specified.  If Name is specified, it is the name of the global variable
+/// created.
+Value *IRBuilderBase::CreateGlobalString(const char *Str, const Twine &Name) {
+  Constant *StrConstant = ConstantArray::get(Context, Str, true);
+  Module &M = *BB->getParent()->getParent();
+  GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(),
+                                          true, GlobalValue::InternalLinkage,
+                                          StrConstant, "", 0, false);
+  GV->setName(Name);
+  return GV;
+}
+
+const Type *IRBuilderBase::getCurrentFunctionReturnType() const {
+  assert(BB && BB->getParent() && "No current function!");
+  return BB->getParent()->getReturnType();
+}
+
+Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
+  const PointerType *PT = cast<PointerType>(Ptr->getType());
+  if (PT->getElementType()->isIntegerTy(8))
+    return Ptr;
+  
+  // Otherwise, we need to insert a bitcast.
+  PT = getInt8PtrTy(PT->getAddressSpace());
+  BitCastInst *BCI = new BitCastInst(Ptr, PT, "");
+  BB->getInstList().insert(InsertPt, BCI);
+  SetInstDebugLocation(BCI);
+  return BCI;
+}
+
+static CallInst *createCallHelper(Value *Callee, Value *const* Ops,
+                                  unsigned NumOps, IRBuilderBase *Builder) {
+  CallInst *CI = CallInst::Create(Callee, Ops, Ops + NumOps, "");
+  Builder->GetInsertBlock()->getInstList().insert(Builder->GetInsertPoint(),CI);
+  Builder->SetInstDebugLocation(CI);
+  return CI;  
+}
+
+
+CallInst *IRBuilderBase::
+CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
+             bool isVolatile, MDNode *TBAATag) {
+  Ptr = getCastedInt8PtrValue(Ptr);
+  Value *Ops[] = { Ptr, Val, Size, getInt32(Align), getInt1(isVolatile) };
+  const Type *Tys[] = { Ptr->getType(), Size->getType() };
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
+  
+  CallInst *CI = createCallHelper(TheFn, Ops, 5, this);
+  
+  // Set the TBAA info if present.
+  if (TBAATag)
+    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+  
+  return CI;
+}
+
+CallInst *IRBuilderBase::
+CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
+             bool isVolatile, MDNode *TBAATag) {
+  Dst = getCastedInt8PtrValue(Dst);
+  Src = getCastedInt8PtrValue(Src);
+
+  Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
+  const Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys, 3);
+  
+  CallInst *CI = createCallHelper(TheFn, Ops, 5, this);
+  
+  // Set the TBAA info if present.
+  if (TBAATag)
+    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+  
+  return CI;  
+}
+
+CallInst *IRBuilderBase::
+CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
+              bool isVolatile, MDNode *TBAATag) {
+  Dst = getCastedInt8PtrValue(Dst);
+  Src = getCastedInt8PtrValue(Src);
+  
+  Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
+  const Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys, 3);
+  
+  CallInst *CI = createCallHelper(TheFn, Ops, 5, this);
+  
+  // Set the TBAA info if present.
+  if (TBAATag)
+    CI->setMetadata(LLVMContext::MD_tbaa, TBAATag);
+  
+  return CI;  
+}
diff --git a/final/lib/VMCore/InlineAsm.cpp b/final/lib/VMCore/InlineAsm.cpp
new file mode 100644
index 00000000000..e4f99f09a5c
--- /dev/null
+++ b/final/lib/VMCore/InlineAsm.cpp
@@ -0,0 +1,289 @@
+//===-- InlineAsm.cpp - Implement the InlineAsm class ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the InlineAsm class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InlineAsm.h"
+#include "ConstantsContext.h"
+#include "LLVMContextImpl.h"
+#include "llvm/DerivedTypes.h"
+#include <algorithm>
+#include <cctype>
+using namespace llvm;
+
+// Implement the first virtual method in this class in this file so the
+// InlineAsm vtable is emitted here.
+InlineAsm::~InlineAsm() {
+}
+
+
+InlineAsm *InlineAsm::get(const FunctionType *Ty, StringRef AsmString,
+                          StringRef Constraints, bool hasSideEffects,
+                          bool isAlignStack) {
+  InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack);
+  LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+  return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(Ty), Key);
+}
+
+InlineAsm::InlineAsm(const PointerType *Ty, const std::string &asmString,
+                     const std::string &constraints, bool hasSideEffects,
+                     bool isAlignStack)
+  : Value(Ty, Value::InlineAsmVal),
+    AsmString(asmString), 
+    Constraints(constraints), HasSideEffects(hasSideEffects), 
+    IsAlignStack(isAlignStack) {
+
+  // Do various checks on the constraint string and type.
+  assert(Verify(getFunctionType(), constraints) &&
+         "Function type not legal for constraints!");
+}
+
+void InlineAsm::destroyConstant() {
+  getRawType()->getContext().pImpl->InlineAsms.remove(this);
+  delete this;
+}
+
+const FunctionType *InlineAsm::getFunctionType() const {
+  return cast<FunctionType>(getType()->getElementType());
+}
+    
+///Default constructor.
+InlineAsm::ConstraintInfo::ConstraintInfo() :
+  Type(isInput), isEarlyClobber(false),
+  MatchingInput(-1), isCommutative(false),
+  isIndirect(false), isMultipleAlternative(false),
+  currentAlternativeIndex(0) {
+}
+
+/// Copy constructor.
+InlineAsm::ConstraintInfo::ConstraintInfo(const ConstraintInfo &other) :
+  Type(other.Type), isEarlyClobber(other.isEarlyClobber),
+  MatchingInput(other.MatchingInput), isCommutative(other.isCommutative),
+  isIndirect(other.isIndirect), Codes(other.Codes),
+  isMultipleAlternative(other.isMultipleAlternative),
+  multipleAlternatives(other.multipleAlternatives),
+  currentAlternativeIndex(other.currentAlternativeIndex) {
+}
+
+/// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
+/// fields in this structure.  If the constraint string is not understood,
+/// return true, otherwise return false.
+bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
+                     InlineAsm::ConstraintInfoVector &ConstraintsSoFar) {
+  StringRef::iterator I = Str.begin(), E = Str.end();
+  unsigned multipleAlternativeCount = Str.count('|') + 1;
+  unsigned multipleAlternativeIndex = 0;
+  ConstraintCodeVector *pCodes = &Codes;
+  
+  // Initialize
+  isMultipleAlternative = (multipleAlternativeCount > 1 ? true : false);
+  if (isMultipleAlternative) {
+    multipleAlternatives.resize(multipleAlternativeCount);
+    pCodes = &multipleAlternatives[0].Codes;
+  }
+  Type = isInput;
+  isEarlyClobber = false;
+  MatchingInput = -1;
+  isCommutative = false;
+  isIndirect = false;
+  currentAlternativeIndex = 0;
+  
+  // Parse prefixes.
+  if (*I == '~') {
+    Type = isClobber;
+    ++I;
+  } else if (*I == '=') {
+    ++I;
+    Type = isOutput;
+  }
+  
+  if (*I == '*') {
+    isIndirect = true;
+    ++I;
+  }
+  
+  if (I == E) return true;  // Just a prefix, like "==" or "~".
+  
+  // Parse the modifiers.
+  bool DoneWithModifiers = false;
+  while (!DoneWithModifiers) {
+    switch (*I) {
+    default:
+      DoneWithModifiers = true;
+      break;
+    case '&':     // Early clobber.
+      if (Type != isOutput ||      // Cannot early clobber anything but output.
+          isEarlyClobber)          // Reject &&&&&&
+        return true;
+      isEarlyClobber = true;
+      break;
+    case '%':     // Commutative.
+      if (Type == isClobber ||     // Cannot commute clobbers.
+          isCommutative)           // Reject %%%%%
+        return true;
+      isCommutative = true;
+      break;
+    case '#':     // Comment.
+    case '*':     // Register preferencing.
+      return true;     // Not supported.
+    }
+    
+    if (!DoneWithModifiers) {
+      ++I;
+      if (I == E) return true;   // Just prefixes and modifiers!
+    }
+  }
+  
+  // Parse the various constraints.
+  while (I != E) {
+    if (*I == '{') {   // Physical register reference.
+      // Find the end of the register name.
+      StringRef::iterator ConstraintEnd = std::find(I+1, E, '}');
+      if (ConstraintEnd == E) return true;  // "{foo"
+      pCodes->push_back(std::string(I, ConstraintEnd+1));
+      I = ConstraintEnd+1;
+    } else if (isdigit(*I)) {     // Matching Constraint
+      // Maximal munch numbers.
+      StringRef::iterator NumStart = I;
+      while (I != E && isdigit(*I))
+        ++I;
+      pCodes->push_back(std::string(NumStart, I));
+      unsigned N = atoi(pCodes->back().c_str());
+      // Check that this is a valid matching constraint!
+      if (N >= ConstraintsSoFar.size() || ConstraintsSoFar[N].Type != isOutput||
+          Type != isInput)
+        return true;  // Invalid constraint number.
+      
+      // If Operand N already has a matching input, reject this.  An output
+      // can't be constrained to the same value as multiple inputs.
+      if (isMultipleAlternative) {
+        InlineAsm::SubConstraintInfo &scInfo =
+          ConstraintsSoFar[N].multipleAlternatives[multipleAlternativeIndex];
+        if (scInfo.MatchingInput != -1)
+          return true;
+        // Note that operand #n has a matching input.
+        scInfo.MatchingInput = ConstraintsSoFar.size();
+      } else {
+        if (ConstraintsSoFar[N].hasMatchingInput())
+          return true;
+        // Note that operand #n has a matching input.
+        ConstraintsSoFar[N].MatchingInput = ConstraintsSoFar.size();
+        }
+    } else if (*I == '|') {
+      multipleAlternativeIndex++;
+      pCodes = &multipleAlternatives[multipleAlternativeIndex].Codes;
+      ++I;
+    } else {
+      // Single letter constraint.
+      pCodes->push_back(std::string(I, I+1));
+      ++I;
+    }
+  }
+
+  return false;
+}
+
+/// selectAlternative - Point this constraint to the alternative constraint
+/// indicated by the index.
+void InlineAsm::ConstraintInfo::selectAlternative(unsigned index) {
+  if (index < multipleAlternatives.size()) {
+    currentAlternativeIndex = index;
+    InlineAsm::SubConstraintInfo &scInfo =
+      multipleAlternatives[currentAlternativeIndex];
+    MatchingInput = scInfo.MatchingInput;
+    Codes = scInfo.Codes;
+  }
+}
+
+InlineAsm::ConstraintInfoVector
+InlineAsm::ParseConstraints(StringRef Constraints) {
+  ConstraintInfoVector Result;
+  
+  // Scan the constraints string.
+  for (StringRef::iterator I = Constraints.begin(),
+         E = Constraints.end(); I != E; ) {
+    ConstraintInfo Info;
+
+    // Find the end of this constraint.
+    StringRef::iterator ConstraintEnd = std::find(I, E, ',');
+
+    if (ConstraintEnd == I ||  // Empty constraint like ",,"
+        Info.Parse(StringRef(I, ConstraintEnd-I), Result)) {
+      Result.clear();          // Erroneous constraint?
+      break;
+    }
+
+    Result.push_back(Info);
+    
+    // ConstraintEnd may be either the next comma or the end of the string.  In
+    // the former case, we skip the comma.
+    I = ConstraintEnd;
+    if (I != E) {
+      ++I;
+      if (I == E) { Result.clear(); break; }    // don't allow "xyz,"
+    }
+  }
+  
+  return Result;
+}
+
+/// Verify - Verify that the specified constraint string is reasonable for the
+/// specified function type, and otherwise validate the constraint string.
+bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) {
+  if (Ty->isVarArg()) return false;
+  
+  ConstraintInfoVector Constraints = ParseConstraints(ConstStr);
+  
+  // Error parsing constraints.
+  if (Constraints.empty() && !ConstStr.empty()) return false;
+  
+  unsigned NumOutputs = 0, NumInputs = 0, NumClobbers = 0;
+  unsigned NumIndirect = 0;
+  
+  for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
+    switch (Constraints[i].Type) {
+    case InlineAsm::isOutput:
+      if ((NumInputs-NumIndirect) != 0 || NumClobbers != 0)
+        return false;  // outputs before inputs and clobbers.
+      if (!Constraints[i].isIndirect) {
+        ++NumOutputs;
+        break;
+      }
+      ++NumIndirect;
+      // FALLTHROUGH for Indirect Outputs.
+    case InlineAsm::isInput:
+      if (NumClobbers) return false;               // inputs before clobbers.
+      ++NumInputs;
+      break;
+    case InlineAsm::isClobber:
+      ++NumClobbers;
+      break;
+    }
+  }
+  
+  switch (NumOutputs) {
+  case 0:
+    if (!Ty->getReturnType()->isVoidTy()) return false;
+    break;
+  case 1:
+    if (Ty->getReturnType()->isStructTy()) return false;
+    break;
+  default:
+    const StructType *STy = dyn_cast<StructType>(Ty->getReturnType());
+    if (STy == 0 || STy->getNumElements() != NumOutputs)
+      return false;
+    break;
+  }      
+  
+  if (Ty->getNumParams() != NumInputs) return false;
+  return true;
+}
+
diff --git a/final/lib/VMCore/Instruction.cpp b/final/lib/VMCore/Instruction.cpp
new file mode 100644
index 00000000000..2c8b8b23b18
--- /dev/null
+++ b/final/lib/VMCore/Instruction.cpp
@@ -0,0 +1,436 @@
+//===-- Instruction.cpp - Implement the Instruction class -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Instruction class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instruction.h"
+#include "llvm/Type.h"
+#include "llvm/Instructions.h"
+#include "llvm/Constants.h"
+#include "llvm/Module.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/LeakDetector.h"
+using namespace llvm;
+
+Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps,
+                         Instruction *InsertBefore)
+  : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
+  // Make sure that we get added to a basicblock
+  LeakDetector::addGarbageObject(this);
+
+  // If requested, insert this instruction into a basic block...
+  if (InsertBefore) {
+    assert(InsertBefore->getParent() &&
+           "Instruction to insert before is not in a basic block!");
+    InsertBefore->getParent()->getInstList().insert(InsertBefore, this);
+  }
+}
+
+Instruction::Instruction(const Type *ty, unsigned it, Use *Ops, unsigned NumOps,
+                         BasicBlock *InsertAtEnd)
+  : User(ty, Value::InstructionVal + it, Ops, NumOps), Parent(0) {
+  // Make sure that we get added to a basicblock
+  LeakDetector::addGarbageObject(this);
+
+  // append this instruction into the basic block
+  assert(InsertAtEnd && "Basic block to append to may not be NULL!");
+  InsertAtEnd->getInstList().push_back(this);
+}
+
+
+// Out of line virtual method, so the vtable, etc has a home.
+Instruction::~Instruction() {
+  assert(Parent == 0 && "Instruction still linked in the program!");
+  if (hasMetadataHashEntry())
+    clearMetadataHashEntries();
+}
+
+
+void Instruction::setParent(BasicBlock *P) {
+  if (getParent()) {
+    if (!P) LeakDetector::addGarbageObject(this);
+  } else {
+    if (P) LeakDetector::removeGarbageObject(this);
+  }
+
+  Parent = P;
+}
+
+void Instruction::removeFromParent() {
+  getParent()->getInstList().remove(this);
+}
+
+void Instruction::eraseFromParent() {
+  getParent()->getInstList().erase(this);
+}
+
+/// insertBefore - Insert an unlinked instructions into a basic block
+/// immediately before the specified instruction.
+void Instruction::insertBefore(Instruction *InsertPos) {
+  InsertPos->getParent()->getInstList().insert(InsertPos, this);
+}
+
+/// insertAfter - Insert an unlinked instructions into a basic block
+/// immediately after the specified instruction.
+void Instruction::insertAfter(Instruction *InsertPos) {
+  InsertPos->getParent()->getInstList().insertAfter(InsertPos, this);
+}
+
+/// moveBefore - Unlink this instruction from its current basic block and
+/// insert it into the basic block that MovePos lives in, right before
+/// MovePos.
+void Instruction::moveBefore(Instruction *MovePos) {
+  MovePos->getParent()->getInstList().splice(MovePos,getParent()->getInstList(),
+                                             this);
+}
+
+
+const char *Instruction::getOpcodeName(unsigned OpCode) {
+  switch (OpCode) {
+  // Terminators
+  case Ret:    return "ret";
+  case Br:     return "br";
+  case Switch: return "switch";
+  case IndirectBr: return "indirectbr";
+  case Invoke: return "invoke";
+  case Unwind: return "unwind";
+  case Unreachable: return "unreachable";
+
+  // Standard binary operators...
+  case Add: return "add";
+  case FAdd: return "fadd";
+  case Sub: return "sub";
+  case FSub: return "fsub";
+  case Mul: return "mul";
+  case FMul: return "fmul";
+  case UDiv: return "udiv";
+  case SDiv: return "sdiv";
+  case FDiv: return "fdiv";
+  case URem: return "urem";
+  case SRem: return "srem";
+  case FRem: return "frem";
+
+  // Logical operators...
+  case And: return "and";
+  case Or : return "or";
+  case Xor: return "xor";
+
+  // Memory instructions...
+  case Alloca:        return "alloca";
+  case Load:          return "load";
+  case Store:         return "store";
+  case GetElementPtr: return "getelementptr";
+
+  // Convert instructions...
+  case Trunc:     return "trunc";
+  case ZExt:      return "zext";
+  case SExt:      return "sext";
+  case FPTrunc:   return "fptrunc";
+  case FPExt:     return "fpext";
+  case FPToUI:    return "fptoui";
+  case FPToSI:    return "fptosi";
+  case UIToFP:    return "uitofp";
+  case SIToFP:    return "sitofp";
+  case IntToPtr:  return "inttoptr";
+  case PtrToInt:  return "ptrtoint";
+  case BitCast:   return "bitcast";
+
+  // Other instructions...
+  case ICmp:           return "icmp";
+  case FCmp:           return "fcmp";
+  case PHI:            return "phi";
+  case Select:         return "select";
+  case Call:           return "call";
+  case Shl:            return "shl";
+  case LShr:           return "lshr";
+  case AShr:           return "ashr";
+  case VAArg:          return "va_arg";
+  case ExtractElement: return "extractelement";
+  case InsertElement:  return "insertelement";
+  case ShuffleVector:  return "shufflevector";
+  case ExtractValue:   return "extractvalue";
+  case InsertValue:    return "insertvalue";
+
+  default: return "<Invalid operator> ";
+  }
+
+  return 0;
+}
+
+/// isIdenticalTo - Return true if the specified instruction is exactly
+/// identical to the current one.  This means that all operands match and any
+/// extra information (e.g. load is volatile) agree.
+bool Instruction::isIdenticalTo(const Instruction *I) const {
+  return isIdenticalToWhenDefined(I) &&
+         SubclassOptionalData == I->SubclassOptionalData;
+}
+
+/// isIdenticalToWhenDefined - This is like isIdenticalTo, except that it
+/// ignores the SubclassOptionalData flags, which specify conditions
+/// under which the instruction's result is undefined.
+bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
+  if (getOpcode() != I->getOpcode() ||
+      getNumOperands() != I->getNumOperands() ||
+      getType() != I->getType())
+    return false;
+
+  // We have two instructions of identical opcode and #operands.  Check to see
+  // if all operands are the same.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (getOperand(i) != I->getOperand(i))
+      return false;
+
+  // Check special state that is a part of some instructions.
+  if (const LoadInst *LI = dyn_cast<LoadInst>(this))
+    return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
+           LI->getAlignment() == cast<LoadInst>(I)->getAlignment();
+  if (const StoreInst *SI = dyn_cast<StoreInst>(this))
+    return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
+           SI->getAlignment() == cast<StoreInst>(I)->getAlignment();
+  if (const CmpInst *CI = dyn_cast<CmpInst>(this))
+    return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
+  if (const CallInst *CI = dyn_cast<CallInst>(this))
+    return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
+           CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
+           CI->getAttributes() == cast<CallInst>(I)->getAttributes();
+  if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
+    return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
+           CI->getAttributes() == cast<InvokeInst>(I)->getAttributes();
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this)) {
+    if (IVI->getNumIndices() != cast<InsertValueInst>(I)->getNumIndices())
+      return false;
+    for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
+      if (IVI->idx_begin()[i] != cast<InsertValueInst>(I)->idx_begin()[i])
+        return false;
+    return true;
+  }
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this)) {
+    if (EVI->getNumIndices() != cast<ExtractValueInst>(I)->getNumIndices())
+      return false;
+    for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
+      if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I)->idx_begin()[i])
+        return false;
+    return true;
+  }
+
+  return true;
+}
+
+// isSameOperationAs
+// This should be kept in sync with isEquivalentOperation in
+// lib/Transforms/IPO/MergeFunctions.cpp.
+bool Instruction::isSameOperationAs(const Instruction *I) const {
+  if (getOpcode() != I->getOpcode() ||
+      getNumOperands() != I->getNumOperands() ||
+      getType() != I->getType())
+    return false;
+
+  // We have two instructions of identical opcode and #operands.  Check to see
+  // if all operands are the same type
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (getOperand(i)->getType() != I->getOperand(i)->getType())
+      return false;
+
+  // Check special state that is a part of some instructions.
+  if (const LoadInst *LI = dyn_cast<LoadInst>(this))
+    return LI->isVolatile() == cast<LoadInst>(I)->isVolatile() &&
+           LI->getAlignment() == cast<LoadInst>(I)->getAlignment();
+  if (const StoreInst *SI = dyn_cast<StoreInst>(this))
+    return SI->isVolatile() == cast<StoreInst>(I)->isVolatile() &&
+           SI->getAlignment() == cast<StoreInst>(I)->getAlignment();
+  if (const CmpInst *CI = dyn_cast<CmpInst>(this))
+    return CI->getPredicate() == cast<CmpInst>(I)->getPredicate();
+  if (const CallInst *CI = dyn_cast<CallInst>(this))
+    return CI->isTailCall() == cast<CallInst>(I)->isTailCall() &&
+           CI->getCallingConv() == cast<CallInst>(I)->getCallingConv() &&
+           CI->getAttributes() == cast<CallInst>(I)->getAttributes();
+  if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
+    return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
+           CI->getAttributes() ==
+             cast<InvokeInst>(I)->getAttributes();
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this)) {
+    if (IVI->getNumIndices() != cast<InsertValueInst>(I)->getNumIndices())
+      return false;
+    for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
+      if (IVI->idx_begin()[i] != cast<InsertValueInst>(I)->idx_begin()[i])
+        return false;
+    return true;
+  }
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this)) {
+    if (EVI->getNumIndices() != cast<ExtractValueInst>(I)->getNumIndices())
+      return false;
+    for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
+      if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I)->idx_begin()[i])
+        return false;
+    return true;
+  }
+
+  return true;
+}
+
+/// isUsedOutsideOfBlock - Return true if there are any uses of I outside of the
+/// specified block.  Note that PHI nodes are considered to evaluate their
+/// operands in the corresponding predecessor block.
+bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const {
+  for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+    // PHI nodes uses values in the corresponding predecessor block.  For other
+    // instructions, just check to see whether the parent of the use matches up.
+    const User *U = *UI;
+    const PHINode *PN = dyn_cast<PHINode>(U);
+    if (PN == 0) {
+      if (cast<Instruction>(U)->getParent() != BB)
+        return true;
+      continue;
+    }
+
+    if (PN->getIncomingBlock(UI) != BB)
+      return true;
+  }
+  return false;
+}
+
+/// mayReadFromMemory - Return true if this instruction may read memory.
+///
+bool Instruction::mayReadFromMemory() const {
+  switch (getOpcode()) {
+  default: return false;
+  case Instruction::VAArg:
+  case Instruction::Load:
+    return true;
+  case Instruction::Call:
+    return !cast<CallInst>(this)->doesNotAccessMemory();
+  case Instruction::Invoke:
+    return !cast<InvokeInst>(this)->doesNotAccessMemory();
+  case Instruction::Store:
+    return cast<StoreInst>(this)->isVolatile();
+  }
+}
+
+/// mayWriteToMemory - Return true if this instruction may modify memory.
+///
+bool Instruction::mayWriteToMemory() const {
+  switch (getOpcode()) {
+  default: return false;
+  case Instruction::Store:
+  case Instruction::VAArg:
+    return true;
+  case Instruction::Call:
+    return !cast<CallInst>(this)->onlyReadsMemory();
+  case Instruction::Invoke:
+    return !cast<InvokeInst>(this)->onlyReadsMemory();
+  case Instruction::Load:
+    return cast<LoadInst>(this)->isVolatile();
+  }
+}
+
+/// mayThrow - Return true if this instruction may throw an exception.
+///
+bool Instruction::mayThrow() const {
+  if (const CallInst *CI = dyn_cast<CallInst>(this))
+    return !CI->doesNotThrow();
+  return false;
+}
+
+/// isAssociative - Return true if the instruction is associative:
+///
+///   Associative operators satisfy:  x op (y op z) === (x op y) op z
+///
+/// In LLVM, the Add, Mul, And, Or, and Xor operators are associative.
+///
+bool Instruction::isAssociative(unsigned Opcode) {
+  return Opcode == And || Opcode == Or || Opcode == Xor ||
+         Opcode == Add || Opcode == Mul;
+}
+
+/// isCommutative - Return true if the instruction is commutative:
+///
+///   Commutative operators satisfy: (x op y) === (y op x)
+///
+/// In LLVM, these are the associative operators, plus SetEQ and SetNE, when
+/// applied to any type.
+///
+bool Instruction::isCommutative(unsigned op) {
+  switch (op) {
+  case Add:
+  case FAdd:
+  case Mul:
+  case FMul:
+  case And:
+  case Or:
+  case Xor:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool Instruction::isSafeToSpeculativelyExecute() const {
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (Constant *C = dyn_cast<Constant>(getOperand(i)))
+      if (C->canTrap())
+        return false;
+
+  switch (getOpcode()) {
+  default:
+    return true;
+  case UDiv:
+  case URem: {
+    // x / y is undefined if y == 0, but calcuations like x / 3 are safe.
+    ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
+    return Op && !Op->isNullValue();
+  }
+  case SDiv:
+  case SRem: {
+    // x / y is undefined if y == 0, and might be undefined if y == -1,
+    // but calcuations like x / 3 are safe.
+    ConstantInt *Op = dyn_cast<ConstantInt>(getOperand(1));
+    return Op && !Op->isNullValue() && !Op->isAllOnesValue();
+  }
+  case Load: {
+    const LoadInst *LI = cast<LoadInst>(this);
+    if (LI->isVolatile())
+      return false;
+    return LI->getPointerOperand()->isDereferenceablePointer();
+  }
+  case Call:
+    return false; // The called function could have undefined behavior or
+                  // side-effects.
+                  // FIXME: We should special-case some intrinsics (bswap,
+                  // overflow-checking arithmetic, etc.)
+  case VAArg:
+  case Alloca:
+  case Invoke:
+  case PHI:
+  case Store:
+  case Ret:
+  case Br:
+  case IndirectBr:
+  case Switch:
+  case Unwind:
+  case Unreachable:
+    return false; // Misc instructions which have effects
+  }
+}
+
+Instruction *Instruction::clone() const {
+  Instruction *New = clone_impl();
+  New->SubclassOptionalData = SubclassOptionalData;
+  if (!hasMetadata())
+    return New;
+  
+  // Otherwise, enumerate and copy over metadata from the old instruction to the
+  // new one.
+  SmallVector<std::pair<unsigned, MDNode*>, 4> TheMDs;
+  getAllMetadata(TheMDs);
+  for (unsigned i = 0, e = TheMDs.size(); i != e; ++i)
+    New->setMetadata(TheMDs[i].first, TheMDs[i].second);
+  return New;
+}
diff --git a/final/lib/VMCore/Instructions.cpp b/final/lib/VMCore/Instructions.cpp
new file mode 100644
index 00000000000..d1290281cb1
--- /dev/null
+++ b/final/lib/VMCore/Instructions.cpp
@@ -0,0 +1,3332 @@
+//===-- Instructions.cpp - Implement the LLVM instructions ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements all of the non-inline methods for the LLVM instruction
+// classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Operator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Support/MathExtras.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                            CallSite Class
+//===----------------------------------------------------------------------===//
+
+User::op_iterator CallSite::getCallee() const {
+  Instruction *II(getInstruction());
+  return isCall()
+    ? cast<CallInst>(II)->op_end() - 1 // Skip Callee
+    : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Callee
+}
+
+//===----------------------------------------------------------------------===//
+//                            TerminatorInst Class
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method, so the vtable, etc has a home.
+TerminatorInst::~TerminatorInst() {
+}
+
+//===----------------------------------------------------------------------===//
+//                           UnaryInstruction Class
+//===----------------------------------------------------------------------===//
+
+// Out of line virtual method, so the vtable, etc has a home.
+UnaryInstruction::~UnaryInstruction() {
+}
+
+//===----------------------------------------------------------------------===//
+//                              SelectInst Class
+//===----------------------------------------------------------------------===//
+
+/// areInvalidOperands - Return a string if the specified operands are invalid
+/// for a select operation, otherwise return null.
+const char *SelectInst::areInvalidOperands(Value *Op0, Value *Op1, Value *Op2) {
+  if (Op1->getType() != Op2->getType())
+    return "both values to select must have same type";
+  
+  if (const VectorType *VT = dyn_cast<VectorType>(Op0->getType())) {
+    // Vector select.
+    if (VT->getElementType() != Type::getInt1Ty(Op0->getContext()))
+      return "vector select condition element type must be i1";
+    const VectorType *ET = dyn_cast<VectorType>(Op1->getType());
+    if (ET == 0)
+      return "selected values for vector select must be vectors";
+    if (ET->getNumElements() != VT->getNumElements())
+      return "vector select requires selected vectors to have "
+                   "the same vector length as select condition";
+  } else if (Op0->getType() != Type::getInt1Ty(Op0->getContext())) {
+    return "select condition must be i1 or <n x i1>";
+  }
+  return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                               PHINode Class
+//===----------------------------------------------------------------------===//
+
+PHINode::PHINode(const PHINode &PN)
+  : Instruction(PN.getType(), Instruction::PHI,
+                allocHungoffUses(PN.getNumOperands()), PN.getNumOperands()),
+    ReservedSpace(PN.getNumOperands()) {
+  Use *OL = OperandList;
+  for (unsigned i = 0, e = PN.getNumOperands(); i != e; i+=2) {
+    OL[i] = PN.getOperand(i);
+    OL[i+1] = PN.getOperand(i+1);
+  }
+  SubclassOptionalData = PN.SubclassOptionalData;
+}
+
+PHINode::~PHINode() {
+  dropHungoffUses();
+}
+
+// removeIncomingValue - Remove an incoming value.  This is useful if a
+// predecessor basic block is deleted.
+Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
+  unsigned NumOps = getNumOperands();
+  Use *OL = OperandList;
+  assert(Idx*2 < NumOps && "BB not in PHI node!");
+  Value *Removed = OL[Idx*2];
+
+  // Move everything after this operand down.
+  //
+  // FIXME: we could just swap with the end of the list, then erase.  However,
+  // client might not expect this to happen.  The code as it is thrashes the
+  // use/def lists, which is kinda lame.
+  for (unsigned i = (Idx+1)*2; i != NumOps; i += 2) {
+    OL[i-2] = OL[i];
+    OL[i-2+1] = OL[i+1];
+  }
+
+  // Nuke the last value.
+  OL[NumOps-2].set(0);
+  OL[NumOps-2+1].set(0);
+  NumOperands = NumOps-2;
+
+  // If the PHI node is dead, because it has zero entries, nuke it now.
+  if (NumOps == 2 && DeletePHIIfEmpty) {
+    // If anyone is using this PHI, make them use a dummy value instead...
+    replaceAllUsesWith(UndefValue::get(getType()));
+    eraseFromParent();
+  }
+  return Removed;
+}
+
+/// resizeOperands - resize operands - This adjusts the length of the operands
+/// list according to the following behavior:
+///   1. If NumOps == 0, grow the operand list in response to a push_back style
+///      of operation.  This grows the number of ops by 1.5 times.
+///   2. If NumOps > NumOperands, reserve space for NumOps operands.
+///   3. If NumOps == NumOperands, trim the reserved space.
+///
+void PHINode::resizeOperands(unsigned NumOps) {
+  unsigned e = getNumOperands();
+  if (NumOps == 0) {
+    NumOps = e*3/2;
+    if (NumOps < 4) NumOps = 4;      // 4 op PHI nodes are VERY common.
+  } else if (NumOps*2 > NumOperands) {
+    // No resize needed.
+    if (ReservedSpace >= NumOps) return;
+  } else if (NumOps == NumOperands) {
+    if (ReservedSpace == NumOps) return;
+  } else {
+    return;
+  }
+
+  ReservedSpace = NumOps;
+  Use *OldOps = OperandList;
+  Use *NewOps = allocHungoffUses(NumOps);
+  std::copy(OldOps, OldOps + e, NewOps);
+  OperandList = NewOps;
+  Use::zap(OldOps, OldOps + e, true);
+}
+
+/// hasConstantValue - If the specified PHI node always merges together the same
+/// value, return the value, otherwise return null.
+Value *PHINode::hasConstantValue() const {
+  // Exploit the fact that phi nodes always have at least one entry.
+  Value *ConstantValue = getIncomingValue(0);
+  for (unsigned i = 1, e = getNumIncomingValues(); i != e; ++i)
+    if (getIncomingValue(i) != ConstantValue)
+      return 0; // Incoming values not all the same.
+  return ConstantValue;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                        CallInst Implementation
+//===----------------------------------------------------------------------===//
+
+CallInst::~CallInst() {
+}
+
+void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) {
+  assert(NumOperands == NumParams+1 && "NumOperands not set up?");
+  Op<-1>() = Func;
+
+  const FunctionType *FTy =
+    cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+  (void)FTy;  // silence warning.
+
+  assert((NumParams == FTy->getNumParams() ||
+          (FTy->isVarArg() && NumParams > FTy->getNumParams())) &&
+         "Calling a function with bad signature!");
+  for (unsigned i = 0; i != NumParams; ++i) {
+    assert((i >= FTy->getNumParams() || 
+            FTy->getParamType(i) == Params[i]->getType()) &&
+           "Calling a function with a bad signature!");
+    OperandList[i] = Params[i];
+  }
+}
+
+void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) {
+  assert(NumOperands == 3 && "NumOperands not set up?");
+  Op<-1>() = Func;
+  Op<0>() = Actual1;
+  Op<1>() = Actual2;
+
+  const FunctionType *FTy =
+    cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+  (void)FTy;  // silence warning.
+
+  assert((FTy->getNumParams() == 2 ||
+          (FTy->isVarArg() && FTy->getNumParams() < 2)) &&
+         "Calling a function with bad signature");
+  assert((0 >= FTy->getNumParams() || 
+          FTy->getParamType(0) == Actual1->getType()) &&
+         "Calling a function with a bad signature!");
+  assert((1 >= FTy->getNumParams() || 
+          FTy->getParamType(1) == Actual2->getType()) &&
+         "Calling a function with a bad signature!");
+}
+
+void CallInst::init(Value *Func, Value *Actual) {
+  assert(NumOperands == 2 && "NumOperands not set up?");
+  Op<-1>() = Func;
+  Op<0>() = Actual;
+
+  const FunctionType *FTy =
+    cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+  (void)FTy;  // silence warning.
+
+  assert((FTy->getNumParams() == 1 ||
+          (FTy->isVarArg() && FTy->getNumParams() == 0)) &&
+         "Calling a function with bad signature");
+  assert((0 == FTy->getNumParams() || 
+          FTy->getParamType(0) == Actual->getType()) &&
+         "Calling a function with a bad signature!");
+}
+
+void CallInst::init(Value *Func) {
+  assert(NumOperands == 1 && "NumOperands not set up?");
+  Op<-1>() = Func;
+
+  const FunctionType *FTy =
+    cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
+  (void)FTy;  // silence warning.
+
+  assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
+}
+
+CallInst::CallInst(Value *Func, Value* Actual, const Twine &Name,
+                   Instruction *InsertBefore)
+  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+                                   ->getElementType())->getReturnType(),
+                Instruction::Call,
+                OperandTraits<CallInst>::op_end(this) - 2,
+                2, InsertBefore) {
+  init(Func, Actual);
+  setName(Name);
+}
+
+CallInst::CallInst(Value *Func, Value* Actual, const Twine &Name,
+                   BasicBlock  *InsertAtEnd)
+  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+                                   ->getElementType())->getReturnType(),
+                Instruction::Call,
+                OperandTraits<CallInst>::op_end(this) - 2,
+                2, InsertAtEnd) {
+  init(Func, Actual);
+  setName(Name);
+}
+CallInst::CallInst(Value *Func, const Twine &Name,
+                   Instruction *InsertBefore)
+  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+                                   ->getElementType())->getReturnType(),
+                Instruction::Call,
+                OperandTraits<CallInst>::op_end(this) - 1,
+                1, InsertBefore) {
+  init(Func);
+  setName(Name);
+}
+
+CallInst::CallInst(Value *Func, const Twine &Name,
+                   BasicBlock *InsertAtEnd)
+  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
+                                   ->getElementType())->getReturnType(),
+                Instruction::Call,
+                OperandTraits<CallInst>::op_end(this) - 1,
+                1, InsertAtEnd) {
+  init(Func);
+  setName(Name);
+}
+
+CallInst::CallInst(const CallInst &CI)
+  : Instruction(CI.getType(), Instruction::Call,
+                OperandTraits<CallInst>::op_end(this) - CI.getNumOperands(),
+                CI.getNumOperands()) {
+  setAttributes(CI.getAttributes());
+  setTailCall(CI.isTailCall());
+  setCallingConv(CI.getCallingConv());
+    
+  Use *OL = OperandList;
+  Use *InOL = CI.OperandList;
+  for (unsigned i = 0, e = CI.getNumOperands(); i != e; ++i)
+    OL[i] = InOL[i];
+  SubclassOptionalData = CI.SubclassOptionalData;
+}
+
+void CallInst::addAttribute(unsigned i, Attributes attr) {
+  AttrListPtr PAL = getAttributes();
+  PAL = PAL.addAttr(i, attr);
+  setAttributes(PAL);
+}
+
+void CallInst::removeAttribute(unsigned i, Attributes attr) {
+  AttrListPtr PAL = getAttributes();
+  PAL = PAL.removeAttr(i, attr);
+  setAttributes(PAL);
+}
+
+bool CallInst::paramHasAttr(unsigned i, Attributes attr) const {
+  if (AttributeList.paramHasAttr(i, attr))
+    return true;
+  if (const Function *F = getCalledFunction())
+    return F->paramHasAttr(i, attr);
+  return false;
+}
+
+/// IsConstantOne - Return true only if val is constant int 1
+static bool IsConstantOne(Value *val) {
+  assert(val && "IsConstantOne does not work with NULL val");
+  return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne();
+}
+
+static Instruction *createMalloc(Instruction *InsertBefore,
+                                 BasicBlock *InsertAtEnd, const Type *IntPtrTy,
+                                 const Type *AllocTy, Value *AllocSize, 
+                                 Value *ArraySize, Function *MallocF,
+                                 const Twine &Name) {
+  assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
+         "createMalloc needs either InsertBefore or InsertAtEnd");
+
+  // malloc(type) becomes: 
+  //       bitcast (i8* malloc(typeSize)) to type*
+  // malloc(type, arraySize) becomes:
+  //       bitcast (i8 *malloc(typeSize*arraySize)) to type*
+  if (!ArraySize)
+    ArraySize = ConstantInt::get(IntPtrTy, 1);
+  else if (ArraySize->getType() != IntPtrTy) {
+    if (InsertBefore)
+      ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
+                                              "", InsertBefore);
+    else
+      ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
+                                              "", InsertAtEnd);
+  }
+
+  if (!IsConstantOne(ArraySize)) {
+    if (IsConstantOne(AllocSize)) {
+      AllocSize = ArraySize;         // Operand * 1 = Operand
+    } else if (Constant *CO = dyn_cast<Constant>(ArraySize)) {
+      Constant *Scale = ConstantExpr::getIntegerCast(CO, IntPtrTy,
+                                                     false /*ZExt*/);
+      // Malloc arg is constant product of type size and array size
+      AllocSize = ConstantExpr::getMul(Scale, cast<Constant>(AllocSize));
+    } else {
+      // Multiply type size by the array size...
+      if (InsertBefore)
+        AllocSize = BinaryOperator::CreateMul(ArraySize, AllocSize,
+                                              "mallocsize", InsertBefore);
+      else
+        AllocSize = BinaryOperator::CreateMul(ArraySize, AllocSize,
+                                              "mallocsize", InsertAtEnd);
+    }
+  }
+
+  assert(AllocSize->getType() == IntPtrTy && "malloc arg is wrong size");
+  // Create the call to Malloc.
+  BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
+  Module* M = BB->getParent()->getParent();
+  const Type *BPTy = Type::getInt8PtrTy(BB->getContext());
+  Value *MallocFunc = MallocF;
+  if (!MallocFunc)
+    // prototype malloc as "void *malloc(size_t)"
+    MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL);
+  const PointerType *AllocPtrType = PointerType::getUnqual(AllocTy);
+  CallInst *MCall = NULL;
+  Instruction *Result = NULL;
+  if (InsertBefore) {
+    MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall", InsertBefore);
+    Result = MCall;
+    if (Result->getType() != AllocPtrType)
+      // Create a cast instruction to convert to the right type...
+      Result = new BitCastInst(MCall, AllocPtrType, Name, InsertBefore);
+  } else {
+    MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall");
+    Result = MCall;
+    if (Result->getType() != AllocPtrType) {
+      InsertAtEnd->getInstList().push_back(MCall);
+      // Create a cast instruction to convert to the right type...
+      Result = new BitCastInst(MCall, AllocPtrType, Name);
+    }
+  }
+  MCall->setTailCall();
+  if (Function *F = dyn_cast<Function>(MallocFunc)) {
+    MCall->setCallingConv(F->getCallingConv());
+    if (!F->doesNotAlias(0)) F->setDoesNotAlias(0);
+  }
+  assert(!MCall->getType()->isVoidTy() && "Malloc has void return type");
+
+  return Result;
+}
+
+/// CreateMalloc - Generate the IR for a call to malloc:
+/// 1. Compute the malloc call's argument as the specified type's size,
+///    possibly multiplied by the array size if the array size is not
+///    constant 1.
+/// 2. Call malloc with that argument.
+/// 3. Bitcast the result of the malloc call to the specified type.
+Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
+                                    const Type *IntPtrTy, const Type *AllocTy,
+                                    Value *AllocSize, Value *ArraySize,
+                                    Function * MallocF,
+                                    const Twine &Name) {
+  return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, AllocSize,
+                      ArraySize, MallocF, Name);
+}
+
+/// CreateMalloc - Generate the IR for a call to malloc:
+/// 1. Compute the malloc call's argument as the specified type's size,
+///    possibly multiplied by the array size if the array size is not
+///    constant 1.
+/// 2. Call malloc with that argument.
+/// 3. Bitcast the result of the malloc call to the specified type.
+/// Note: This function does not add the bitcast to the basic block, that is the
+/// responsibility of the caller.
+Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd,
+                                    const Type *IntPtrTy, const Type *AllocTy,
+                                    Value *AllocSize, Value *ArraySize, 
+                                    Function *MallocF, const Twine &Name) {
+  return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, AllocSize,
+                      ArraySize, MallocF, Name);
+}
+
+static Instruction* createFree(Value* Source, Instruction *InsertBefore,
+                               BasicBlock *InsertAtEnd) {
+  assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
+         "createFree needs either InsertBefore or InsertAtEnd");
+  assert(Source->getType()->isPointerTy() &&
+         "Can not free something of nonpointer type!");
+
+  BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
+  Module* M = BB->getParent()->getParent();
+
+  const Type *VoidTy = Type::getVoidTy(M->getContext());
+  const Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
+  // prototype free as "void free(void*)"
+  Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL);
+  CallInst* Result = NULL;
+  Value *PtrCast = Source;
+  if (InsertBefore) {
+    if (Source->getType() != IntPtrTy)
+      PtrCast = new BitCastInst(Source, IntPtrTy, "", InsertBefore);
+    Result = CallInst::Create(FreeFunc, PtrCast, "", InsertBefore);
+  } else {
+    if (Source->getType() != IntPtrTy)
+      PtrCast = new BitCastInst(Source, IntPtrTy, "", InsertAtEnd);
+    Result = CallInst::Create(FreeFunc, PtrCast, "");
+  }
+  Result->setTailCall();
+  if (Function *F = dyn_cast<Function>(FreeFunc))
+    Result->setCallingConv(F->getCallingConv());
+
+  return Result;
+}
+
+/// CreateFree - Generate the IR for a call to the builtin free function.
+Instruction * CallInst::CreateFree(Value* Source, Instruction *InsertBefore) {
+  return createFree(Source, InsertBefore, NULL);
+}
+
+/// CreateFree - Generate the IR for a call to the builtin free function.
+/// Note: This function does not add the call to the basic block, that is the
+/// responsibility of the caller.
+Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) {
+  Instruction* FreeCall = createFree(Source, NULL, InsertAtEnd);
+  assert(FreeCall && "CreateFree did not create a CallInst");
+  return FreeCall;
+}
+
+//===----------------------------------------------------------------------===//
+//                        InvokeInst Implementation
+//===----------------------------------------------------------------------===//
+
+void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
+                      Value* const *Args, unsigned NumArgs) {
+  assert(NumOperands == 3+NumArgs && "NumOperands not set up?");
+  Op<-3>() = Fn;
+  Op<-2>() = IfNormal;
+  Op<-1>() = IfException;
+  const FunctionType *FTy =
+    cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType());
+  (void)FTy;  // silence warning.
+
+  assert(((NumArgs == FTy->getNumParams()) ||
+          (FTy->isVarArg() && NumArgs > FTy->getNumParams())) &&
+         "Invoking a function with bad signature");
+
+  Use *OL = OperandList;
+  for (unsigned i = 0, e = NumArgs; i != e; i++) {
+    assert((i >= FTy->getNumParams() || 
+            FTy->getParamType(i) == Args[i]->getType()) &&
+           "Invoking a function with a bad signature!");
+    
+    OL[i] = Args[i];
+  }
+}
+
+InvokeInst::InvokeInst(const InvokeInst &II)
+  : TerminatorInst(II.getType(), Instruction::Invoke,
+                   OperandTraits<InvokeInst>::op_end(this)
+                   - II.getNumOperands(),
+                   II.getNumOperands()) {
+  setAttributes(II.getAttributes());
+  setCallingConv(II.getCallingConv());
+  Use *OL = OperandList, *InOL = II.OperandList;
+  for (unsigned i = 0, e = II.getNumOperands(); i != e; ++i)
+    OL[i] = InOL[i];
+  SubclassOptionalData = II.SubclassOptionalData;
+}
+
+BasicBlock *InvokeInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+unsigned InvokeInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  return setSuccessor(idx, B);
+}
+
+bool InvokeInst::paramHasAttr(unsigned i, Attributes attr) const {
+  if (AttributeList.paramHasAttr(i, attr))
+    return true;
+  if (const Function *F = getCalledFunction())
+    return F->paramHasAttr(i, attr);
+  return false;
+}
+
+void InvokeInst::addAttribute(unsigned i, Attributes attr) {
+  AttrListPtr PAL = getAttributes();
+  PAL = PAL.addAttr(i, attr);
+  setAttributes(PAL);
+}
+
+void InvokeInst::removeAttribute(unsigned i, Attributes attr) {
+  AttrListPtr PAL = getAttributes();
+  PAL = PAL.removeAttr(i, attr);
+  setAttributes(PAL);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                        ReturnInst Implementation
+//===----------------------------------------------------------------------===//
+
+ReturnInst::ReturnInst(const ReturnInst &RI)
+  : TerminatorInst(Type::getVoidTy(RI.getContext()), Instruction::Ret,
+                   OperandTraits<ReturnInst>::op_end(this) -
+                     RI.getNumOperands(),
+                   RI.getNumOperands()) {
+  if (RI.getNumOperands())
+    Op<0>() = RI.Op<0>();
+  SubclassOptionalData = RI.SubclassOptionalData;
+}
+
+ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
+                   OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
+                   InsertBefore) {
+  if (retVal)
+    Op<0>() = retVal;
+}
+ReturnInst::ReturnInst(LLVMContext &C, Value *retVal, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(C), Instruction::Ret,
+                   OperandTraits<ReturnInst>::op_end(this) - !!retVal, !!retVal,
+                   InsertAtEnd) {
+  if (retVal)
+    Op<0>() = retVal;
+}
+ReturnInst::ReturnInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Ret,
+                   OperandTraits<ReturnInst>::op_end(this), 0, InsertAtEnd) {
+}
+
+unsigned ReturnInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+
+/// Out-of-line ReturnInst method, put here so the C++ compiler can choose to
+/// emit the vtable for the class in this translation unit.
+void ReturnInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+  llvm_unreachable("ReturnInst has no successors!");
+}
+
+BasicBlock *ReturnInst::getSuccessorV(unsigned idx) const {
+  llvm_unreachable("ReturnInst has no successors!");
+  return 0;
+}
+
+ReturnInst::~ReturnInst() {
+}
+
+//===----------------------------------------------------------------------===//
+//                        UnwindInst Implementation
+//===----------------------------------------------------------------------===//
+
+UnwindInst::UnwindInst(LLVMContext &Context, Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unwind,
+                   0, 0, InsertBefore) {
+}
+UnwindInst::UnwindInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unwind,
+                   0, 0, InsertAtEnd) {
+}
+
+
+unsigned UnwindInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+
+void UnwindInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+  llvm_unreachable("UnwindInst has no successors!");
+}
+
+BasicBlock *UnwindInst::getSuccessorV(unsigned idx) const {
+  llvm_unreachable("UnwindInst has no successors!");
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//                      UnreachableInst Implementation
+//===----------------------------------------------------------------------===//
+
+UnreachableInst::UnreachableInst(LLVMContext &Context, 
+                                 Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
+                   0, 0, InsertBefore) {
+}
+UnreachableInst::UnreachableInst(LLVMContext &Context, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(Context), Instruction::Unreachable,
+                   0, 0, InsertAtEnd) {
+}
+
+unsigned UnreachableInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+
+void UnreachableInst::setSuccessorV(unsigned idx, BasicBlock *NewSucc) {
+  llvm_unreachable("UnwindInst has no successors!");
+}
+
+BasicBlock *UnreachableInst::getSuccessorV(unsigned idx) const {
+  llvm_unreachable("UnwindInst has no successors!");
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+//                        BranchInst Implementation
+//===----------------------------------------------------------------------===//
+
+void BranchInst::AssertOK() {
+  if (isConditional())
+    assert(getCondition()->getType()->isIntegerTy(1) &&
+           "May only branch on boolean predicates!");
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+                   OperandTraits<BranchInst>::op_end(this) - 1,
+                   1, InsertBefore) {
+  assert(IfTrue != 0 && "Branch destination may not be null!");
+  Op<-1>() = IfTrue;
+}
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
+                       Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+                   OperandTraits<BranchInst>::op_end(this) - 3,
+                   3, InsertBefore) {
+  Op<-1>() = IfTrue;
+  Op<-2>() = IfFalse;
+  Op<-3>() = Cond;
+#ifndef NDEBUG
+  AssertOK();
+#endif
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+                   OperandTraits<BranchInst>::op_end(this) - 1,
+                   1, InsertAtEnd) {
+  assert(IfTrue != 0 && "Branch destination may not be null!");
+  Op<-1>() = IfTrue;
+}
+
+BranchInst::BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
+           BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(IfTrue->getContext()), Instruction::Br,
+                   OperandTraits<BranchInst>::op_end(this) - 3,
+                   3, InsertAtEnd) {
+  Op<-1>() = IfTrue;
+  Op<-2>() = IfFalse;
+  Op<-3>() = Cond;
+#ifndef NDEBUG
+  AssertOK();
+#endif
+}
+
+
+BranchInst::BranchInst(const BranchInst &BI) :
+  TerminatorInst(Type::getVoidTy(BI.getContext()), Instruction::Br,
+                 OperandTraits<BranchInst>::op_end(this) - BI.getNumOperands(),
+                 BI.getNumOperands()) {
+  Op<-1>() = BI.Op<-1>();
+  if (BI.getNumOperands() != 1) {
+    assert(BI.getNumOperands() == 3 && "BR can have 1 or 3 operands!");
+    Op<-3>() = BI.Op<-3>();
+    Op<-2>() = BI.Op<-2>();
+  }
+  SubclassOptionalData = BI.SubclassOptionalData;
+}
+
+BasicBlock *BranchInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+unsigned BranchInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void BranchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  setSuccessor(idx, B);
+}
+
+
+//===----------------------------------------------------------------------===//
+//                        AllocaInst Implementation
+//===----------------------------------------------------------------------===//
+
+static Value *getAISize(LLVMContext &Context, Value *Amt) {
+  if (!Amt)
+    Amt = ConstantInt::get(Type::getInt32Ty(Context), 1);
+  else {
+    assert(!isa<BasicBlock>(Amt) &&
+           "Passed basic block into allocation size parameter! Use other ctor");
+    assert(Amt->getType()->isIntegerTy() &&
+           "Allocation array size is not an integer!");
+  }
+  return Amt;
+}
+
+AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize,
+                       const Twine &Name, Instruction *InsertBefore)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), ArraySize), InsertBefore) {
+  setAlignment(0);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize,
+                       const Twine &Name, BasicBlock *InsertAtEnd)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
+  setAlignment(0);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, const Twine &Name,
+                       Instruction *InsertBefore)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), 0), InsertBefore) {
+  setAlignment(0);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, const Twine &Name,
+                       BasicBlock *InsertAtEnd)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), 0), InsertAtEnd) {
+  setAlignment(0);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
+                       const Twine &Name, Instruction *InsertBefore)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), ArraySize), InsertBefore) {
+  setAlignment(Align);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+AllocaInst::AllocaInst(const Type *Ty, Value *ArraySize, unsigned Align,
+                       const Twine &Name, BasicBlock *InsertAtEnd)
+  : UnaryInstruction(PointerType::getUnqual(Ty), Alloca,
+                     getAISize(Ty->getContext(), ArraySize), InsertAtEnd) {
+  setAlignment(Align);
+  assert(!Ty->isVoidTy() && "Cannot allocate void!");
+  setName(Name);
+}
+
+// Out of line virtual method, so the vtable, etc has a home.
+AllocaInst::~AllocaInst() {
+}
+
+void AllocaInst::setAlignment(unsigned Align) {
+  assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+  assert(Align <= MaximumAlignment &&
+         "Alignment is greater than MaximumAlignment!");
+  setInstructionSubclassData(Log2_32(Align) + 1);
+  assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+bool AllocaInst::isArrayAllocation() const {
+  if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(0)))
+    return !CI->isOne();
+  return true;
+}
+
+const Type *AllocaInst::getAllocatedType() const {
+  return getType()->getElementType();
+}
+
+/// isStaticAlloca - Return true if this alloca is in the entry block of the
+/// function and is a constant size.  If so, the code generator will fold it
+/// into the prolog/epilog code, so it is basically free.
+bool AllocaInst::isStaticAlloca() const {
+  // Must be constant size.
+  if (!isa<ConstantInt>(getArraySize())) return false;
+  
+  // Must be in the entry block.
+  const BasicBlock *Parent = getParent();
+  return Parent == &Parent->getParent()->front();
+}
+
+//===----------------------------------------------------------------------===//
+//                           LoadInst Implementation
+//===----------------------------------------------------------------------===//
+
+void LoadInst::AssertOK() {
+  assert(getOperand(0)->getType()->isPointerTy() &&
+         "Ptr must have pointer type.");
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, Instruction *InsertBef)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertBef) {
+  setVolatile(false);
+  setAlignment(0);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, BasicBlock *InsertAE)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertAE) {
+  setVolatile(false);
+  setAlignment(0);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+                   Instruction *InsertBef)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertBef) {
+  setVolatile(isVolatile);
+  setAlignment(0);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
+                   unsigned Align, Instruction *InsertBef)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertBef) {
+  setVolatile(isVolatile);
+  setAlignment(Align);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile, 
+                   unsigned Align, BasicBlock *InsertAE)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertAE) {
+  setVolatile(isVolatile);
+  setAlignment(Align);
+  AssertOK();
+  setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const Twine &Name, bool isVolatile,
+                   BasicBlock *InsertAE)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertAE) {
+  setVolatile(isVolatile);
+  setAlignment(0);
+  AssertOK();
+  setName(Name);
+}
+
+
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, Instruction *InsertBef)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertBef) {
+  setVolatile(false);
+  setAlignment(0);
+  AssertOK();
+  if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, BasicBlock *InsertAE)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertAE) {
+  setVolatile(false);
+  setAlignment(0);
+  AssertOK();
+  if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
+                   Instruction *InsertBef)
+: UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                   Load, Ptr, InsertBef) {
+  setVolatile(isVolatile);
+  setAlignment(0);
+  AssertOK();
+  if (Name && Name[0]) setName(Name);
+}
+
+LoadInst::LoadInst(Value *Ptr, const char *Name, bool isVolatile,
+                   BasicBlock *InsertAE)
+  : UnaryInstruction(cast<PointerType>(Ptr->getType())->getElementType(),
+                     Load, Ptr, InsertAE) {
+  setVolatile(isVolatile);
+  setAlignment(0);
+  AssertOK();
+  if (Name && Name[0]) setName(Name);
+}
+
+void LoadInst::setAlignment(unsigned Align) {
+  assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+  assert(Align <= MaximumAlignment &&
+         "Alignment is greater than MaximumAlignment!");
+  setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
+                             ((Log2_32(Align)+1)<<1));
+  assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+//===----------------------------------------------------------------------===//
+//                           StoreInst Implementation
+//===----------------------------------------------------------------------===//
+
+void StoreInst::AssertOK() {
+  assert(getOperand(0) && getOperand(1) && "Both operands must be non-null!");
+  assert(getOperand(1)->getType()->isPointerTy() &&
+         "Ptr must have pointer type!");
+  assert(getOperand(0)->getType() ==
+                 cast<PointerType>(getOperand(1)->getType())->getElementType()
+         && "Ptr must be a pointer to Val type!");
+}
+
+
+StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertBefore) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(false);
+  setAlignment(0);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, BasicBlock *InsertAtEnd)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertAtEnd) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(false);
+  setAlignment(0);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+                     Instruction *InsertBefore)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertBefore) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(isVolatile);
+  setAlignment(0);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+                     unsigned Align, Instruction *InsertBefore)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertBefore) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(isVolatile);
+  setAlignment(Align);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+                     unsigned Align, BasicBlock *InsertAtEnd)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertAtEnd) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(isVolatile);
+  setAlignment(Align);
+  AssertOK();
+}
+
+StoreInst::StoreInst(Value *val, Value *addr, bool isVolatile,
+                     BasicBlock *InsertAtEnd)
+  : Instruction(Type::getVoidTy(val->getContext()), Store,
+                OperandTraits<StoreInst>::op_begin(this),
+                OperandTraits<StoreInst>::operands(this),
+                InsertAtEnd) {
+  Op<0>() = val;
+  Op<1>() = addr;
+  setVolatile(isVolatile);
+  setAlignment(0);
+  AssertOK();
+}
+
+void StoreInst::setAlignment(unsigned Align) {
+  assert((Align & (Align-1)) == 0 && "Alignment is not a power of 2!");
+  assert(Align <= MaximumAlignment &&
+         "Alignment is greater than MaximumAlignment!");
+  setInstructionSubclassData((getSubclassDataFromInstruction() & 1) |
+                             ((Log2_32(Align)+1) << 1));
+  assert(getAlignment() == Align && "Alignment representation error!");
+}
+
+//===----------------------------------------------------------------------===//
+//                       GetElementPtrInst Implementation
+//===----------------------------------------------------------------------===//
+
+static unsigned retrieveAddrSpace(const Value *Val) {
+  return cast<PointerType>(Val->getType())->getAddressSpace();
+}
+
+void GetElementPtrInst::init(Value *Ptr, Value* const *Idx, unsigned NumIdx,
+                             const Twine &Name) {
+  assert(NumOperands == 1+NumIdx && "NumOperands not initialized?");
+  Use *OL = OperandList;
+  OL[0] = Ptr;
+
+  for (unsigned i = 0; i != NumIdx; ++i)
+    OL[i+1] = Idx[i];
+
+  setName(Name);
+}
+
+void GetElementPtrInst::init(Value *Ptr, Value *Idx, const Twine &Name) {
+  assert(NumOperands == 2 && "NumOperands not initialized?");
+  Use *OL = OperandList;
+  OL[0] = Ptr;
+  OL[1] = Idx;
+
+  setName(Name);
+}
+
+GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
+  : Instruction(GEPI.getType(), GetElementPtr,
+                OperandTraits<GetElementPtrInst>::op_end(this)
+                - GEPI.getNumOperands(),
+                GEPI.getNumOperands()) {
+  Use *OL = OperandList;
+  Use *GEPIOL = GEPI.OperandList;
+  for (unsigned i = 0, E = NumOperands; i != E; ++i)
+    OL[i] = GEPIOL[i];
+  SubclassOptionalData = GEPI.SubclassOptionalData;
+}
+
+GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
+                                     const Twine &Name, Instruction *InBe)
+  : Instruction(PointerType::get(
+      checkType(getIndexedType(Ptr->getType(),Idx)), retrieveAddrSpace(Ptr)),
+                GetElementPtr,
+                OperandTraits<GetElementPtrInst>::op_end(this) - 2,
+                2, InBe) {
+  init(Ptr, Idx, Name);
+}
+
+GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
+                                     const Twine &Name, BasicBlock *IAE)
+  : Instruction(PointerType::get(
+            checkType(getIndexedType(Ptr->getType(),Idx)),  
+                retrieveAddrSpace(Ptr)),
+                GetElementPtr,
+                OperandTraits<GetElementPtrInst>::op_end(this) - 2,
+                2, IAE) {
+  init(Ptr, Idx, Name);
+}
+
+/// getIndexedType - Returns the type of the element that would be accessed with
+/// a gep instruction with the specified parameters.
+///
+/// The Idxs pointer should point to a continuous piece of memory containing the
+/// indices, either as Value* or uint64_t.
+///
+/// A null type is returned if the indices are invalid for the specified
+/// pointer type.
+///
+template <typename IndexTy>
+static const Type* getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs,
+                                          unsigned NumIdx) {
+  const PointerType *PTy = dyn_cast<PointerType>(Ptr);
+  if (!PTy) return 0;   // Type isn't a pointer type!
+  const Type *Agg = PTy->getElementType();
+
+  // Handle the special case of the empty set index set, which is always valid.
+  if (NumIdx == 0)
+    return Agg;
+  
+  // If there is at least one index, the top level type must be sized, otherwise
+  // it cannot be 'stepped over'.  We explicitly allow abstract types (those
+  // that contain opaque types) under the assumption that it will be resolved to
+  // a sane type later.
+  if (!Agg->isSized() && !Agg->isAbstract())
+    return 0;
+
+  unsigned CurIdx = 1;
+  for (; CurIdx != NumIdx; ++CurIdx) {
+    const CompositeType *CT = dyn_cast<CompositeType>(Agg);
+    if (!CT || CT->isPointerTy()) return 0;
+    IndexTy Index = Idxs[CurIdx];
+    if (!CT->indexValid(Index)) return 0;
+    Agg = CT->getTypeAtIndex(Index);
+
+    // If the new type forwards to another type, then it is in the middle
+    // of being refined to another type (and hence, may have dropped all
+    // references to what it was using before).  So, use the new forwarded
+    // type.
+    if (const Type *Ty = Agg->getForwardedType())
+      Agg = Ty;
+  }
+  return CurIdx == NumIdx ? Agg : 0;
+}
+
+const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
+                                              Value* const *Idxs,
+                                              unsigned NumIdx) {
+  return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
+}
+
+const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
+                                              Constant* const *Idxs,
+                                              unsigned NumIdx) {
+  return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
+}
+
+const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
+                                              uint64_t const *Idxs,
+                                              unsigned NumIdx) {
+  return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
+}
+
+const Type* GetElementPtrInst::getIndexedType(const Type *Ptr, Value *Idx) {
+  const PointerType *PTy = dyn_cast<PointerType>(Ptr);
+  if (!PTy) return 0;   // Type isn't a pointer type!
+
+  // Check the pointer index.
+  if (!PTy->indexValid(Idx)) return 0;
+
+  return PTy->getElementType();
+}
+
+
+/// hasAllZeroIndices - Return true if all of the indices of this GEP are
+/// zeros.  If so, the result pointer and the first operand have the same
+/// value, just potentially different types.
+bool GetElementPtrInst::hasAllZeroIndices() const {
+  for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(getOperand(i))) {
+      if (!CI->isZero()) return false;
+    } else {
+      return false;
+    }
+  }
+  return true;
+}
+
+/// hasAllConstantIndices - Return true if all of the indices of this GEP are
+/// constant integers.  If so, the result pointer and the first operand have
+/// a constant offset between them.
+bool GetElementPtrInst::hasAllConstantIndices() const {
+  for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+    if (!isa<ConstantInt>(getOperand(i)))
+      return false;
+  }
+  return true;
+}
+
+void GetElementPtrInst::setIsInBounds(bool B) {
+  cast<GEPOperator>(this)->setIsInBounds(B);
+}
+
+bool GetElementPtrInst::isInBounds() const {
+  return cast<GEPOperator>(this)->isInBounds();
+}
+
+//===----------------------------------------------------------------------===//
+//                           ExtractElementInst Implementation
+//===----------------------------------------------------------------------===//
+
+ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
+                                       const Twine &Name,
+                                       Instruction *InsertBef)
+  : Instruction(cast<VectorType>(Val->getType())->getElementType(),
+                ExtractElement,
+                OperandTraits<ExtractElementInst>::op_begin(this),
+                2, InsertBef) {
+  assert(isValidOperands(Val, Index) &&
+         "Invalid extractelement instruction operands!");
+  Op<0>() = Val;
+  Op<1>() = Index;
+  setName(Name);
+}
+
+ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
+                                       const Twine &Name,
+                                       BasicBlock *InsertAE)
+  : Instruction(cast<VectorType>(Val->getType())->getElementType(),
+                ExtractElement,
+                OperandTraits<ExtractElementInst>::op_begin(this),
+                2, InsertAE) {
+  assert(isValidOperands(Val, Index) &&
+         "Invalid extractelement instruction operands!");
+
+  Op<0>() = Val;
+  Op<1>() = Index;
+  setName(Name);
+}
+
+
+bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) {
+  if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy(32))
+    return false;
+  return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                           InsertElementInst Implementation
+//===----------------------------------------------------------------------===//
+
+InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
+                                     const Twine &Name,
+                                     Instruction *InsertBef)
+  : Instruction(Vec->getType(), InsertElement,
+                OperandTraits<InsertElementInst>::op_begin(this),
+                3, InsertBef) {
+  assert(isValidOperands(Vec, Elt, Index) &&
+         "Invalid insertelement instruction operands!");
+  Op<0>() = Vec;
+  Op<1>() = Elt;
+  Op<2>() = Index;
+  setName(Name);
+}
+
+InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
+                                     const Twine &Name,
+                                     BasicBlock *InsertAE)
+  : Instruction(Vec->getType(), InsertElement,
+                OperandTraits<InsertElementInst>::op_begin(this),
+                3, InsertAE) {
+  assert(isValidOperands(Vec, Elt, Index) &&
+         "Invalid insertelement instruction operands!");
+
+  Op<0>() = Vec;
+  Op<1>() = Elt;
+  Op<2>() = Index;
+  setName(Name);
+}
+
+bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt, 
+                                        const Value *Index) {
+  if (!Vec->getType()->isVectorTy())
+    return false;   // First operand of insertelement must be vector type.
+  
+  if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
+    return false;// Second operand of insertelement must be vector element type.
+    
+  if (!Index->getType()->isIntegerTy(32))
+    return false;  // Third operand of insertelement must be i32.
+  return true;
+}
+
+
+//===----------------------------------------------------------------------===//
+//                      ShuffleVectorInst Implementation
+//===----------------------------------------------------------------------===//
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
+                                     const Twine &Name,
+                                     Instruction *InsertBefore)
+: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
+                cast<VectorType>(Mask->getType())->getNumElements()),
+              ShuffleVector,
+              OperandTraits<ShuffleVectorInst>::op_begin(this),
+              OperandTraits<ShuffleVectorInst>::operands(this),
+              InsertBefore) {
+  assert(isValidOperands(V1, V2, Mask) &&
+         "Invalid shuffle vector instruction operands!");
+  Op<0>() = V1;
+  Op<1>() = V2;
+  Op<2>() = Mask;
+  setName(Name);
+}
+
+ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
+                                     const Twine &Name,
+                                     BasicBlock *InsertAtEnd)
+: Instruction(VectorType::get(cast<VectorType>(V1->getType())->getElementType(),
+                cast<VectorType>(Mask->getType())->getNumElements()),
+              ShuffleVector,
+              OperandTraits<ShuffleVectorInst>::op_begin(this),
+              OperandTraits<ShuffleVectorInst>::operands(this),
+              InsertAtEnd) {
+  assert(isValidOperands(V1, V2, Mask) &&
+         "Invalid shuffle vector instruction operands!");
+
+  Op<0>() = V1;
+  Op<1>() = V2;
+  Op<2>() = Mask;
+  setName(Name);
+}
+
+bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
+                                        const Value *Mask) {
+  if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType())
+    return false;
+  
+  const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
+  if (MaskTy == 0 || !MaskTy->getElementType()->isIntegerTy(32))
+    return false;
+
+  // Check to see if Mask is valid.
+  if (const ConstantVector *MV = dyn_cast<ConstantVector>(Mask)) {
+    const VectorType *VTy = cast<VectorType>(V1->getType());
+    for (unsigned i = 0, e = MV->getNumOperands(); i != e; ++i) {
+      if (ConstantInt* CI = dyn_cast<ConstantInt>(MV->getOperand(i))) {
+        if (CI->uge(VTy->getNumElements()*2))
+          return false;
+      } else if (!isa<UndefValue>(MV->getOperand(i))) {
+        return false;
+      }
+    }
+  }
+  else if (!isa<UndefValue>(Mask) && !isa<ConstantAggregateZero>(Mask))
+    return false;
+  
+  return true;
+}
+
+/// getMaskValue - Return the index from the shuffle mask for the specified
+/// output result.  This is either -1 if the element is undef or a number less
+/// than 2*numelements.
+int ShuffleVectorInst::getMaskValue(unsigned i) const {
+  const Constant *Mask = cast<Constant>(getOperand(2));
+  if (isa<UndefValue>(Mask)) return -1;
+  if (isa<ConstantAggregateZero>(Mask)) return 0;
+  const ConstantVector *MaskCV = cast<ConstantVector>(Mask);
+  assert(i < MaskCV->getNumOperands() && "Index out of range");
+
+  if (isa<UndefValue>(MaskCV->getOperand(i)))
+    return -1;
+  return cast<ConstantInt>(MaskCV->getOperand(i))->getZExtValue();
+}
+
+//===----------------------------------------------------------------------===//
+//                             InsertValueInst Class
+//===----------------------------------------------------------------------===//
+
+void InsertValueInst::init(Value *Agg, Value *Val, const unsigned *Idx, 
+                           unsigned NumIdx, const Twine &Name) {
+  assert(NumOperands == 2 && "NumOperands not initialized?");
+  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idx, Idx + NumIdx) ==
+         Val->getType() && "Inserted value must match indexed type!");
+  Op<0>() = Agg;
+  Op<1>() = Val;
+
+  Indices.append(Idx, Idx + NumIdx);
+  setName(Name);
+}
+
+void InsertValueInst::init(Value *Agg, Value *Val, unsigned Idx, 
+                           const Twine &Name) {
+  assert(NumOperands == 2 && "NumOperands not initialized?");
+  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idx) == Val->getType()
+         && "Inserted value must match indexed type!");
+  Op<0>() = Agg;
+  Op<1>() = Val;
+
+  Indices.push_back(Idx);
+  setName(Name);
+}
+
+InsertValueInst::InsertValueInst(const InsertValueInst &IVI)
+  : Instruction(IVI.getType(), InsertValue,
+                OperandTraits<InsertValueInst>::op_begin(this), 2),
+    Indices(IVI.Indices) {
+  Op<0>() = IVI.getOperand(0);
+  Op<1>() = IVI.getOperand(1);
+  SubclassOptionalData = IVI.SubclassOptionalData;
+}
+
+InsertValueInst::InsertValueInst(Value *Agg,
+                                 Value *Val,
+                                 unsigned Idx, 
+                                 const Twine &Name,
+                                 Instruction *InsertBefore)
+  : Instruction(Agg->getType(), InsertValue,
+                OperandTraits<InsertValueInst>::op_begin(this),
+                2, InsertBefore) {
+  init(Agg, Val, Idx, Name);
+}
+
+InsertValueInst::InsertValueInst(Value *Agg,
+                                 Value *Val,
+                                 unsigned Idx, 
+                                 const Twine &Name,
+                                 BasicBlock *InsertAtEnd)
+  : Instruction(Agg->getType(), InsertValue,
+                OperandTraits<InsertValueInst>::op_begin(this),
+                2, InsertAtEnd) {
+  init(Agg, Val, Idx, Name);
+}
+
+//===----------------------------------------------------------------------===//
+//                             ExtractValueInst Class
+//===----------------------------------------------------------------------===//
+
+void ExtractValueInst::init(const unsigned *Idx, unsigned NumIdx,
+                            const Twine &Name) {
+  assert(NumOperands == 1 && "NumOperands not initialized?");
+
+  Indices.append(Idx, Idx + NumIdx);
+  setName(Name);
+}
+
+void ExtractValueInst::init(unsigned Idx, const Twine &Name) {
+  assert(NumOperands == 1 && "NumOperands not initialized?");
+
+  Indices.push_back(Idx);
+  setName(Name);
+}
+
+ExtractValueInst::ExtractValueInst(const ExtractValueInst &EVI)
+  : UnaryInstruction(EVI.getType(), ExtractValue, EVI.getOperand(0)),
+    Indices(EVI.Indices) {
+  SubclassOptionalData = EVI.SubclassOptionalData;
+}
+
+// getIndexedType - Returns the type of the element that would be extracted
+// with an extractvalue instruction with the specified parameters.
+//
+// A null type is returned if the indices are invalid for the specified
+// pointer type.
+//
+const Type* ExtractValueInst::getIndexedType(const Type *Agg,
+                                             const unsigned *Idxs,
+                                             unsigned NumIdx) {
+  for (unsigned CurIdx = 0; CurIdx != NumIdx; ++CurIdx) {
+    unsigned Index = Idxs[CurIdx];
+    // We can't use CompositeType::indexValid(Index) here.
+    // indexValid() always returns true for arrays because getelementptr allows
+    // out-of-bounds indices. Since we don't allow those for extractvalue and
+    // insertvalue we need to check array indexing manually.
+    // Since the only other types we can index into are struct types it's just
+    // as easy to check those manually as well.
+    if (const ArrayType *AT = dyn_cast<ArrayType>(Agg)) {
+      if (Index >= AT->getNumElements())
+        return 0;
+    } else if (const StructType *ST = dyn_cast<StructType>(Agg)) {
+      if (Index >= ST->getNumElements())
+        return 0;
+    } else {
+      // Not a valid type to index into.
+      return 0;
+    }
+
+    Agg = cast<CompositeType>(Agg)->getTypeAtIndex(Index);
+
+    // If the new type forwards to another type, then it is in the middle
+    // of being refined to another type (and hence, may have dropped all
+    // references to what it was using before).  So, use the new forwarded
+    // type.
+    if (const Type *Ty = Agg->getForwardedType())
+      Agg = Ty;
+  }
+  return Agg;
+}
+
+const Type* ExtractValueInst::getIndexedType(const Type *Agg,
+                                             unsigned Idx) {
+  return getIndexedType(Agg, &Idx, 1);
+}
+
+//===----------------------------------------------------------------------===//
+//                             BinaryOperator Class
+//===----------------------------------------------------------------------===//
+
+BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2,
+                               const Type *Ty, const Twine &Name,
+                               Instruction *InsertBefore)
+  : Instruction(Ty, iType,
+                OperandTraits<BinaryOperator>::op_begin(this),
+                OperandTraits<BinaryOperator>::operands(this),
+                InsertBefore) {
+  Op<0>() = S1;
+  Op<1>() = S2;
+  init(iType);
+  setName(Name);
+}
+
+BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, 
+                               const Type *Ty, const Twine &Name,
+                               BasicBlock *InsertAtEnd)
+  : Instruction(Ty, iType,
+                OperandTraits<BinaryOperator>::op_begin(this),
+                OperandTraits<BinaryOperator>::operands(this),
+                InsertAtEnd) {
+  Op<0>() = S1;
+  Op<1>() = S2;
+  init(iType);
+  setName(Name);
+}
+
+
+void BinaryOperator::init(BinaryOps iType) {
+  Value *LHS = getOperand(0), *RHS = getOperand(1);
+  (void)LHS; (void)RHS; // Silence warnings.
+  assert(LHS->getType() == RHS->getType() &&
+         "Binary operator operand types must match!");
+#ifndef NDEBUG
+  switch (iType) {
+  case Add: case Sub:
+  case Mul:
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert(getType()->isIntOrIntVectorTy() &&
+           "Tried to create an integer operation on a non-integer type!");
+    break;
+  case FAdd: case FSub:
+  case FMul:
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert(getType()->isFPOrFPVectorTy() &&
+           "Tried to create a floating-point operation on a "
+           "non-floating-point type!");
+    break;
+  case UDiv: 
+  case SDiv: 
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert((getType()->isIntegerTy() || (getType()->isVectorTy() && 
+            cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+           "Incorrect operand type (not integer) for S/UDIV");
+    break;
+  case FDiv:
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert(getType()->isFPOrFPVectorTy() &&
+           "Incorrect operand type (not floating point) for FDIV");
+    break;
+  case URem: 
+  case SRem: 
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert((getType()->isIntegerTy() || (getType()->isVectorTy() && 
+            cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+           "Incorrect operand type (not integer) for S/UREM");
+    break;
+  case FRem:
+    assert(getType() == LHS->getType() &&
+           "Arithmetic operation should return same type as operands!");
+    assert(getType()->isFPOrFPVectorTy() &&
+           "Incorrect operand type (not floating point) for FREM");
+    break;
+  case Shl:
+  case LShr:
+  case AShr:
+    assert(getType() == LHS->getType() &&
+           "Shift operation should return same type as operands!");
+    assert((getType()->isIntegerTy() ||
+            (getType()->isVectorTy() && 
+             cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+           "Tried to create a shift operation on a non-integral type!");
+    break;
+  case And: case Or:
+  case Xor:
+    assert(getType() == LHS->getType() &&
+           "Logical operation should return same type as operands!");
+    assert((getType()->isIntegerTy() ||
+            (getType()->isVectorTy() && 
+             cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
+           "Tried to create a logical operation on a non-integral type!");
+    break;
+  default:
+    break;
+  }
+#endif
+}
+
+BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
+                                       const Twine &Name,
+                                       Instruction *InsertBefore) {
+  assert(S1->getType() == S2->getType() &&
+         "Cannot create binary operator with two operands of differing type!");
+  return new BinaryOperator(Op, S1, S2, S1->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
+                                       const Twine &Name,
+                                       BasicBlock *InsertAtEnd) {
+  BinaryOperator *Res = Create(Op, S1, S2, Name);
+  InsertAtEnd->getInstList().push_back(Res);
+  return Res;
+}
+
+BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name,
+                                          Instruction *InsertBefore) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return new BinaryOperator(Instruction::Sub,
+                            zero, Op,
+                            Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name,
+                                          BasicBlock *InsertAtEnd) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return new BinaryOperator(Instruction::Sub,
+                            zero, Op,
+                            Op->getType(), Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name,
+                                             Instruction *InsertBefore) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name,
+                                             BasicBlock *InsertAtEnd) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
+                                             Instruction *InsertBefore) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
+                                             BasicBlock *InsertAtEnd) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
+                                           Instruction *InsertBefore) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return new BinaryOperator(Instruction::FSub,
+                            zero, Op,
+                            Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const Twine &Name,
+                                           BasicBlock *InsertAtEnd) {
+  Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+  return new BinaryOperator(Instruction::FSub,
+                            zero, Op,
+                            Op->getType(), Name, InsertAtEnd);
+}
+
+BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
+                                          Instruction *InsertBefore) {
+  Constant *C;
+  if (const VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
+    C = Constant::getAllOnesValue(PTy->getElementType());
+    C = ConstantVector::get(
+                              std::vector<Constant*>(PTy->getNumElements(), C));
+  } else {
+    C = Constant::getAllOnesValue(Op->getType());
+  }
+  
+  return new BinaryOperator(Instruction::Xor, Op, C,
+                            Op->getType(), Name, InsertBefore);
+}
+
+BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
+                                          BasicBlock *InsertAtEnd) {
+  Constant *AllOnes;
+  if (const VectorType *PTy = dyn_cast<VectorType>(Op->getType())) {
+    // Create a vector of all ones values.
+    Constant *Elt = Constant::getAllOnesValue(PTy->getElementType());
+    AllOnes = ConstantVector::get(
+                            std::vector<Constant*>(PTy->getNumElements(), Elt));
+  } else {
+    AllOnes = Constant::getAllOnesValue(Op->getType());
+  }
+  
+  return new BinaryOperator(Instruction::Xor, Op, AllOnes,
+                            Op->getType(), Name, InsertAtEnd);
+}
+
+
+// isConstantAllOnes - Helper function for several functions below
+static inline bool isConstantAllOnes(const Value *V) {
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
+    return CI->isAllOnesValue();
+  if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
+    return CV->isAllOnesValue();
+  return false;
+}
+
+bool BinaryOperator::isNeg(const Value *V) {
+  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+    if (Bop->getOpcode() == Instruction::Sub)
+      if (Constant* C = dyn_cast<Constant>(Bop->getOperand(0)))
+        return C->isNegativeZeroValue();
+  return false;
+}
+
+bool BinaryOperator::isFNeg(const Value *V) {
+  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+    if (Bop->getOpcode() == Instruction::FSub)
+      if (Constant* C = dyn_cast<Constant>(Bop->getOperand(0)))
+        return C->isNegativeZeroValue();
+  return false;
+}
+
+bool BinaryOperator::isNot(const Value *V) {
+  if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V))
+    return (Bop->getOpcode() == Instruction::Xor &&
+            (isConstantAllOnes(Bop->getOperand(1)) ||
+             isConstantAllOnes(Bop->getOperand(0))));
+  return false;
+}
+
+Value *BinaryOperator::getNegArgument(Value *BinOp) {
+  return cast<BinaryOperator>(BinOp)->getOperand(1);
+}
+
+const Value *BinaryOperator::getNegArgument(const Value *BinOp) {
+  return getNegArgument(const_cast<Value*>(BinOp));
+}
+
+Value *BinaryOperator::getFNegArgument(Value *BinOp) {
+  return cast<BinaryOperator>(BinOp)->getOperand(1);
+}
+
+const Value *BinaryOperator::getFNegArgument(const Value *BinOp) {
+  return getFNegArgument(const_cast<Value*>(BinOp));
+}
+
+Value *BinaryOperator::getNotArgument(Value *BinOp) {
+  assert(isNot(BinOp) && "getNotArgument on non-'not' instruction!");
+  BinaryOperator *BO = cast<BinaryOperator>(BinOp);
+  Value *Op0 = BO->getOperand(0);
+  Value *Op1 = BO->getOperand(1);
+  if (isConstantAllOnes(Op0)) return Op1;
+
+  assert(isConstantAllOnes(Op1));
+  return Op0;
+}
+
+const Value *BinaryOperator::getNotArgument(const Value *BinOp) {
+  return getNotArgument(const_cast<Value*>(BinOp));
+}
+
+
+// swapOperands - Exchange the two operands to this instruction.  This
+// instruction is safe to use on any binary instruction and does not
+// modify the semantics of the instruction.  If the instruction is
+// order dependent (SetLT f.e.) the opcode is changed.
+//
+bool BinaryOperator::swapOperands() {
+  if (!isCommutative())
+    return true; // Can't commute operands
+  Op<0>().swap(Op<1>());
+  return false;
+}
+
+void BinaryOperator::setHasNoUnsignedWrap(bool b) {
+  cast<OverflowingBinaryOperator>(this)->setHasNoUnsignedWrap(b);
+}
+
+void BinaryOperator::setHasNoSignedWrap(bool b) {
+  cast<OverflowingBinaryOperator>(this)->setHasNoSignedWrap(b);
+}
+
+void BinaryOperator::setIsExact(bool b) {
+  cast<PossiblyExactOperator>(this)->setIsExact(b);
+}
+
+bool BinaryOperator::hasNoUnsignedWrap() const {
+  return cast<OverflowingBinaryOperator>(this)->hasNoUnsignedWrap();
+}
+
+bool BinaryOperator::hasNoSignedWrap() const {
+  return cast<OverflowingBinaryOperator>(this)->hasNoSignedWrap();
+}
+
+bool BinaryOperator::isExact() const {
+  return cast<PossiblyExactOperator>(this)->isExact();
+}
+
+//===----------------------------------------------------------------------===//
+//                                CastInst Class
+//===----------------------------------------------------------------------===//
+
+// Just determine if this cast only deals with integral->integral conversion.
+bool CastInst::isIntegerCast() const {
+  switch (getOpcode()) {
+    default: return false;
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::Trunc:
+      return true;
+    case Instruction::BitCast:
+      return getOperand(0)->getType()->isIntegerTy() &&
+        getType()->isIntegerTy();
+  }
+}
+
+bool CastInst::isLosslessCast() const {
+  // Only BitCast can be lossless, exit fast if we're not BitCast
+  if (getOpcode() != Instruction::BitCast)
+    return false;
+
+  // Identity cast is always lossless
+  const Type* SrcTy = getOperand(0)->getType();
+  const Type* DstTy = getType();
+  if (SrcTy == DstTy)
+    return true;
+  
+  // Pointer to pointer is always lossless.
+  if (SrcTy->isPointerTy())
+    return DstTy->isPointerTy();
+  return false;  // Other types have no identity values
+}
+
+/// This function determines if the CastInst does not require any bits to be
+/// changed in order to effect the cast. Essentially, it identifies cases where
+/// no code gen is necessary for the cast, hence the name no-op cast.  For 
+/// example, the following are all no-op casts:
+/// # bitcast i32* %x to i8*
+/// # bitcast <2 x i32> %x to <4 x i16> 
+/// # ptrtoint i32* %x to i32     ; on 32-bit plaforms only
+/// @brief Determine if the described cast is a no-op.
+bool CastInst::isNoopCast(Instruction::CastOps Opcode,
+                          const Type *SrcTy,
+                          const Type *DestTy,
+                          const Type *IntPtrTy) {
+  switch (Opcode) {
+    default:
+      assert(!"Invalid CastOp");
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt: 
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+      return false; // These always modify bits
+    case Instruction::BitCast:
+      return true;  // BitCast never modifies bits.
+    case Instruction::PtrToInt:
+      return IntPtrTy->getScalarSizeInBits() ==
+             DestTy->getScalarSizeInBits();
+    case Instruction::IntToPtr:
+      return IntPtrTy->getScalarSizeInBits() ==
+             SrcTy->getScalarSizeInBits();
+  }
+}
+
+/// @brief Determine if a cast is a no-op.
+bool CastInst::isNoopCast(const Type *IntPtrTy) const {
+  return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy);
+}
+
+/// This function determines if a pair of casts can be eliminated and what 
+/// opcode should be used in the elimination. This assumes that there are two 
+/// instructions like this:
+/// *  %F = firstOpcode SrcTy %x to MidTy
+/// *  %S = secondOpcode MidTy %F to DstTy
+/// The function returns a resultOpcode so these two casts can be replaced with:
+/// *  %Replacement = resultOpcode %SrcTy %x to DstTy
+/// If no such cast is permited, the function returns 0.
+unsigned CastInst::isEliminableCastPair(
+  Instruction::CastOps firstOp, Instruction::CastOps secondOp,
+  const Type *SrcTy, const Type *MidTy, const Type *DstTy, const Type *IntPtrTy)
+{
+  // Define the 144 possibilities for these two cast instructions. The values
+  // in this matrix determine what to do in a given situation and select the
+  // case in the switch below.  The rows correspond to firstOp, the columns 
+  // correspond to secondOp.  In looking at the table below, keep in  mind
+  // the following cast properties:
+  //
+  //          Size Compare       Source               Destination
+  // Operator  Src ? Size   Type       Sign         Type       Sign
+  // -------- ------------ -------------------   ---------------------
+  // TRUNC         >       Integer      Any        Integral     Any
+  // ZEXT          <       Integral   Unsigned     Integer      Any
+  // SEXT          <       Integral    Signed      Integer      Any
+  // FPTOUI       n/a      FloatPt      n/a        Integral   Unsigned
+  // FPTOSI       n/a      FloatPt      n/a        Integral    Signed 
+  // UITOFP       n/a      Integral   Unsigned     FloatPt      n/a   
+  // SITOFP       n/a      Integral    Signed      FloatPt      n/a   
+  // FPTRUNC       >       FloatPt      n/a        FloatPt      n/a   
+  // FPEXT         <       FloatPt      n/a        FloatPt      n/a   
+  // PTRTOINT     n/a      Pointer      n/a        Integral   Unsigned
+  // INTTOPTR     n/a      Integral   Unsigned     Pointer      n/a
+  // BITCAST       =       FirstClass   n/a       FirstClass    n/a   
+  //
+  // NOTE: some transforms are safe, but we consider them to be non-profitable.
+  // For example, we could merge "fptoui double to i32" + "zext i32 to i64",
+  // into "fptoui double to i64", but this loses information about the range
+  // of the produced value (we no longer know the top-part is all zeros). 
+  // Further this conversion is often much more expensive for typical hardware,
+  // and causes issues when building libgcc.  We disallow fptosi+sext for the 
+  // same reason.
+  const unsigned numCastOps = 
+    Instruction::CastOpsEnd - Instruction::CastOpsBegin;
+  static const uint8_t CastResults[numCastOps][numCastOps] = {
+    // T        F  F  U  S  F  F  P  I  B   -+
+    // R  Z  S  P  P  I  I  T  P  2  N  T    |
+    // U  E  E  2  2  2  2  R  E  I  T  C    +- secondOp
+    // N  X  X  U  S  F  F  N  X  N  2  V    |
+    // C  T  T  I  I  P  P  C  T  T  P  T   -+
+    {  1, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // Trunc      -+
+    {  8, 1, 9,99,99, 2, 0,99,99,99, 2, 3 }, // ZExt        |
+    {  8, 0, 1,99,99, 0, 2,99,99,99, 0, 3 }, // SExt        |
+    {  0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToUI      |
+    {  0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToSI      |
+    { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // UIToFP      +- firstOp
+    { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // SIToFP      |
+    { 99,99,99, 0, 0,99,99, 1, 0,99,99, 4 }, // FPTrunc     |
+    { 99,99,99, 2, 2,99,99,10, 2,99,99, 4 }, // FPExt       |
+    {  1, 0, 0,99,99, 0, 0,99,99,99, 7, 3 }, // PtrToInt    |
+    { 99,99,99,99,99,99,99,99,99,13,99,12 }, // IntToPtr    |
+    {  5, 5, 5, 6, 6, 5, 5, 6, 6,11, 5, 1 }, // BitCast    -+
+  };
+  
+  // If either of the casts are a bitcast from scalar to vector, disallow the
+  // merging.
+  if ((firstOp == Instruction::BitCast &&
+       isa<VectorType>(SrcTy) != isa<VectorType>(MidTy)) ||
+      (secondOp == Instruction::BitCast &&
+       isa<VectorType>(MidTy) != isa<VectorType>(DstTy)))
+    return 0; // Disallowed
+
+  int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin]
+                            [secondOp-Instruction::CastOpsBegin];
+  switch (ElimCase) {
+    case 0: 
+      // categorically disallowed
+      return 0;
+    case 1: 
+      // allowed, use first cast's opcode
+      return firstOp;
+    case 2: 
+      // allowed, use second cast's opcode
+      return secondOp;
+    case 3: 
+      // no-op cast in second op implies firstOp as long as the DestTy 
+      // is integer and we are not converting between a vector and a
+      // non vector type.
+      if (!SrcTy->isVectorTy() && DstTy->isIntegerTy())
+        return firstOp;
+      return 0;
+    case 4:
+      // no-op cast in second op implies firstOp as long as the DestTy
+      // is floating point.
+      if (DstTy->isFloatingPointTy())
+        return firstOp;
+      return 0;
+    case 5: 
+      // no-op cast in first op implies secondOp as long as the SrcTy
+      // is an integer.
+      if (SrcTy->isIntegerTy())
+        return secondOp;
+      return 0;
+    case 6:
+      // no-op cast in first op implies secondOp as long as the SrcTy
+      // is a floating point.
+      if (SrcTy->isFloatingPointTy())
+        return secondOp;
+      return 0;
+    case 7: { 
+      // ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size
+      if (!IntPtrTy)
+        return 0;
+      unsigned PtrSize = IntPtrTy->getScalarSizeInBits();
+      unsigned MidSize = MidTy->getScalarSizeInBits();
+      if (MidSize >= PtrSize)
+        return Instruction::BitCast;
+      return 0;
+    }
+    case 8: {
+      // ext, trunc -> bitcast,    if the SrcTy and DstTy are same size
+      // ext, trunc -> ext,        if sizeof(SrcTy) < sizeof(DstTy)
+      // ext, trunc -> trunc,      if sizeof(SrcTy) > sizeof(DstTy)
+      unsigned SrcSize = SrcTy->getScalarSizeInBits();
+      unsigned DstSize = DstTy->getScalarSizeInBits();
+      if (SrcSize == DstSize)
+        return Instruction::BitCast;
+      else if (SrcSize < DstSize)
+        return firstOp;
+      return secondOp;
+    }
+    case 9: // zext, sext -> zext, because sext can't sign extend after zext
+      return Instruction::ZExt;
+    case 10:
+      // fpext followed by ftrunc is allowed if the bit size returned to is
+      // the same as the original, in which case its just a bitcast
+      if (SrcTy == DstTy)
+        return Instruction::BitCast;
+      return 0; // If the types are not the same we can't eliminate it.
+    case 11:
+      // bitcast followed by ptrtoint is allowed as long as the bitcast
+      // is a pointer to pointer cast.
+      if (SrcTy->isPointerTy() && MidTy->isPointerTy())
+        return secondOp;
+      return 0;
+    case 12:
+      // inttoptr, bitcast -> intptr  if bitcast is a ptr to ptr cast
+      if (MidTy->isPointerTy() && DstTy->isPointerTy())
+        return firstOp;
+      return 0;
+    case 13: {
+      // inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize
+      if (!IntPtrTy)
+        return 0;
+      unsigned PtrSize = IntPtrTy->getScalarSizeInBits();
+      unsigned SrcSize = SrcTy->getScalarSizeInBits();
+      unsigned DstSize = DstTy->getScalarSizeInBits();
+      if (SrcSize <= PtrSize && SrcSize == DstSize)
+        return Instruction::BitCast;
+      return 0;
+    }
+    case 99: 
+      // cast combination can't happen (error in input). This is for all cases
+      // where the MidTy is not the same for the two cast instructions.
+      assert(!"Invalid Cast Combination");
+      return 0;
+    default:
+      assert(!"Error in CastResults table!!!");
+      return 0;
+  }
+  return 0;
+}
+
+CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty, 
+  const Twine &Name, Instruction *InsertBefore) {
+  // Construct and return the appropriate CastInst subclass
+  switch (op) {
+    case Trunc:    return new TruncInst    (S, Ty, Name, InsertBefore);
+    case ZExt:     return new ZExtInst     (S, Ty, Name, InsertBefore);
+    case SExt:     return new SExtInst     (S, Ty, Name, InsertBefore);
+    case FPTrunc:  return new FPTruncInst  (S, Ty, Name, InsertBefore);
+    case FPExt:    return new FPExtInst    (S, Ty, Name, InsertBefore);
+    case UIToFP:   return new UIToFPInst   (S, Ty, Name, InsertBefore);
+    case SIToFP:   return new SIToFPInst   (S, Ty, Name, InsertBefore);
+    case FPToUI:   return new FPToUIInst   (S, Ty, Name, InsertBefore);
+    case FPToSI:   return new FPToSIInst   (S, Ty, Name, InsertBefore);
+    case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertBefore);
+    case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore);
+    case BitCast:  return new BitCastInst  (S, Ty, Name, InsertBefore);
+    default:
+      assert(!"Invalid opcode provided");
+  }
+  return 0;
+}
+
+CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty,
+  const Twine &Name, BasicBlock *InsertAtEnd) {
+  // Construct and return the appropriate CastInst subclass
+  switch (op) {
+    case Trunc:    return new TruncInst    (S, Ty, Name, InsertAtEnd);
+    case ZExt:     return new ZExtInst     (S, Ty, Name, InsertAtEnd);
+    case SExt:     return new SExtInst     (S, Ty, Name, InsertAtEnd);
+    case FPTrunc:  return new FPTruncInst  (S, Ty, Name, InsertAtEnd);
+    case FPExt:    return new FPExtInst    (S, Ty, Name, InsertAtEnd);
+    case UIToFP:   return new UIToFPInst   (S, Ty, Name, InsertAtEnd);
+    case SIToFP:   return new SIToFPInst   (S, Ty, Name, InsertAtEnd);
+    case FPToUI:   return new FPToUIInst   (S, Ty, Name, InsertAtEnd);
+    case FPToSI:   return new FPToSIInst   (S, Ty, Name, InsertAtEnd);
+    case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertAtEnd);
+    case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertAtEnd);
+    case BitCast:  return new BitCastInst  (S, Ty, Name, InsertAtEnd);
+    default:
+      assert(!"Invalid opcode provided");
+  }
+  return 0;
+}
+
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, 
+                                        const Twine &Name,
+                                        Instruction *InsertBefore) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+  return Create(Instruction::ZExt, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty, 
+                                        const Twine &Name,
+                                        BasicBlock *InsertAtEnd) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+  return Create(Instruction::ZExt, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, 
+                                        const Twine &Name,
+                                        Instruction *InsertBefore) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+  return Create(Instruction::SExt, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty, 
+                                        const Twine &Name,
+                                        BasicBlock *InsertAtEnd) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+  return Create(Instruction::SExt, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
+                                         const Twine &Name,
+                                         Instruction *InsertBefore) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+  return Create(Instruction::Trunc, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
+                                         const Twine &Name, 
+                                         BasicBlock *InsertAtEnd) {
+  if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
+    return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+  return Create(Instruction::Trunc, S, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
+                                      const Twine &Name,
+                                      BasicBlock *InsertAtEnd) {
+  assert(S->getType()->isPointerTy() && "Invalid cast");
+  assert((Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Invalid cast");
+
+  if (Ty->isIntegerTy())
+    return Create(Instruction::PtrToInt, S, Ty, Name, InsertAtEnd);
+  return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
+}
+
+/// @brief Create a BitCast or a PtrToInt cast instruction
+CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty, 
+                                      const Twine &Name, 
+                                      Instruction *InsertBefore) {
+  assert(S->getType()->isPointerTy() && "Invalid cast");
+  assert((Ty->isIntegerTy() || Ty->isPointerTy()) &&
+         "Invalid cast");
+
+  if (Ty->isIntegerTy())
+    return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore);
+  return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, 
+                                      bool isSigned, const Twine &Name,
+                                      Instruction *InsertBefore) {
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
+         "Invalid integer cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  Instruction::CastOps opcode =
+    (SrcBits == DstBits ? Instruction::BitCast :
+     (SrcBits > DstBits ? Instruction::Trunc :
+      (isSigned ? Instruction::SExt : Instruction::ZExt)));
+  return Create(opcode, C, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty, 
+                                      bool isSigned, const Twine &Name,
+                                      BasicBlock *InsertAtEnd) {
+  assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() &&
+         "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  Instruction::CastOps opcode =
+    (SrcBits == DstBits ? Instruction::BitCast :
+     (SrcBits > DstBits ? Instruction::Trunc :
+      (isSigned ? Instruction::SExt : Instruction::ZExt)));
+  return Create(opcode, C, Ty, Name, InsertAtEnd);
+}
+
+CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, 
+                                 const Twine &Name, 
+                                 Instruction *InsertBefore) {
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+         "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  Instruction::CastOps opcode =
+    (SrcBits == DstBits ? Instruction::BitCast :
+     (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
+  return Create(opcode, C, Ty, Name, InsertBefore);
+}
+
+CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty, 
+                                 const Twine &Name, 
+                                 BasicBlock *InsertAtEnd) {
+  assert(C->getType()->isFPOrFPVectorTy() && Ty->isFPOrFPVectorTy() &&
+         "Invalid cast");
+  unsigned SrcBits = C->getType()->getScalarSizeInBits();
+  unsigned DstBits = Ty->getScalarSizeInBits();
+  Instruction::CastOps opcode =
+    (SrcBits == DstBits ? Instruction::BitCast :
+     (SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
+  return Create(opcode, C, Ty, Name, InsertAtEnd);
+}
+
+// Check whether it is valid to call getCastOpcode for these types.
+// This routine must be kept in sync with getCastOpcode.
+bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
+  if (!SrcTy->isFirstClassType() || !DestTy->isFirstClassType())
+    return false;
+
+  if (SrcTy == DestTy)
+    return true;
+
+  // Get the bit sizes, we'll need these
+  unsigned SrcBits = SrcTy->getScalarSizeInBits();   // 0 for ptr
+  unsigned DestBits = DestTy->getScalarSizeInBits(); // 0 for ptr
+
+  // Run through the possibilities ...
+  if (DestTy->isIntegerTy()) {                   // Casting to integral
+    if (SrcTy->isIntegerTy()) {                  // Casting from integral
+        return true;
+    } else if (SrcTy->isFloatingPointTy()) {     // Casting from floating pt
+      return true;
+    } else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) {
+                                               // Casting from vector
+      return DestBits == PTy->getBitWidth();
+    } else {                                   // Casting from something else
+      return SrcTy->isPointerTy();
+    }
+  } else if (DestTy->isFloatingPointTy()) {      // Casting to floating pt
+    if (SrcTy->isIntegerTy()) {                  // Casting from integral
+      return true;
+    } else if (SrcTy->isFloatingPointTy()) {     // Casting from floating pt
+      return true;
+    } else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) {
+                                               // Casting from vector
+      return DestBits == PTy->getBitWidth();
+    } else {                                   // Casting from something else
+      return false;
+    }
+  } else if (const VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
+                                                // Casting to vector
+    if (const VectorType *SrcPTy = dyn_cast<VectorType>(SrcTy)) {
+                                                // Casting from vector
+      return DestPTy->getBitWidth() == SrcPTy->getBitWidth();
+    } else {                                    // Casting from something else
+      return DestPTy->getBitWidth() == SrcBits;
+    }
+  } else if (DestTy->isPointerTy()) {        // Casting to pointer
+    if (SrcTy->isPointerTy()) {              // Casting from pointer
+      return true;
+    } else if (SrcTy->isIntegerTy()) {            // Casting from integral
+      return true;
+    } else {                                    // Casting from something else
+      return false;
+    }
+  } else if (DestTy->isX86_MMXTy()) {     
+    return SrcBits == 64;
+  } else {                                      // Casting to something else
+    return false;
+  }
+}
+
+// Provide a way to get a "cast" where the cast opcode is inferred from the 
+// types and size of the operand. This, basically, is a parallel of the 
+// logic in the castIsValid function below.  This axiom should hold:
+//   castIsValid( getCastOpcode(Val, Ty), Val, Ty)
+// should not assert in castIsValid. In other words, this produces a "correct"
+// casting opcode for the arguments passed to it.
+// This routine must be kept in sync with isCastable.
+Instruction::CastOps
+CastInst::getCastOpcode(
+  const Value *Src, bool SrcIsSigned, const Type *DestTy, bool DestIsSigned) {
+  // Get the bit sizes, we'll need these
+  const Type *SrcTy = Src->getType();
+  unsigned SrcBits = SrcTy->getScalarSizeInBits();   // 0 for ptr
+  unsigned DestBits = DestTy->getScalarSizeInBits(); // 0 for ptr
+
+  assert(SrcTy->isFirstClassType() && DestTy->isFirstClassType() &&
+         "Only first class types are castable!");
+
+  // Run through the possibilities ...
+  if (DestTy->isIntegerTy()) {                      // Casting to integral
+    if (SrcTy->isIntegerTy()) {                     // Casting from integral
+      if (DestBits < SrcBits)
+        return Trunc;                               // int -> smaller int
+      else if (DestBits > SrcBits) {                // its an extension
+        if (SrcIsSigned)
+          return SExt;                              // signed -> SEXT
+        else
+          return ZExt;                              // unsigned -> ZEXT
+      } else {
+        return BitCast;                             // Same size, No-op cast
+      }
+    } else if (SrcTy->isFloatingPointTy()) {        // Casting from floating pt
+      if (DestIsSigned) 
+        return FPToSI;                              // FP -> sint
+      else
+        return FPToUI;                              // FP -> uint 
+    } else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) {
+      assert(DestBits == PTy->getBitWidth() &&
+               "Casting vector to integer of different width");
+      PTy = NULL;
+      return BitCast;                             // Same size, no-op cast
+    } else {
+      assert(SrcTy->isPointerTy() &&
+             "Casting from a value that is not first-class type");
+      return PtrToInt;                              // ptr -> int
+    }
+  } else if (DestTy->isFloatingPointTy()) {         // Casting to floating pt
+    if (SrcTy->isIntegerTy()) {                     // Casting from integral
+      if (SrcIsSigned)
+        return SIToFP;                              // sint -> FP
+      else
+        return UIToFP;                              // uint -> FP
+    } else if (SrcTy->isFloatingPointTy()) {        // Casting from floating pt
+      if (DestBits < SrcBits) {
+        return FPTrunc;                             // FP -> smaller FP
+      } else if (DestBits > SrcBits) {
+        return FPExt;                               // FP -> larger FP
+      } else  {
+        return BitCast;                             // same size, no-op cast
+      }
+    } else if (const VectorType *PTy = dyn_cast<VectorType>(SrcTy)) {
+      assert(DestBits == PTy->getBitWidth() &&
+             "Casting vector to floating point of different width");
+      PTy = NULL;
+      return BitCast;                             // same size, no-op cast
+    } else {
+      llvm_unreachable("Casting pointer or non-first class to float");
+    }
+  } else if (const VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
+    if (const VectorType *SrcPTy = dyn_cast<VectorType>(SrcTy)) {
+      assert(DestPTy->getBitWidth() == SrcPTy->getBitWidth() &&
+             "Casting vector to vector of different widths");
+      SrcPTy = NULL;
+      return BitCast;                             // vector -> vector
+    } else if (DestPTy->getBitWidth() == SrcBits) {
+      return BitCast;                               // float/int -> vector
+    } else if (SrcTy->isX86_MMXTy()) {
+      assert(DestPTy->getBitWidth()==64 &&
+             "Casting X86_MMX to vector of wrong width");
+      return BitCast;                             // MMX to 64-bit vector
+    } else {
+      assert(!"Illegal cast to vector (wrong type or size)");
+    }
+  } else if (DestTy->isPointerTy()) {
+    if (SrcTy->isPointerTy()) {
+      return BitCast;                               // ptr -> ptr
+    } else if (SrcTy->isIntegerTy()) {
+      return IntToPtr;                              // int -> ptr
+    } else {
+      assert(!"Casting pointer to other than pointer or int");
+    }
+  } else if (DestTy->isX86_MMXTy()) {
+    if (isa<VectorType>(SrcTy)) {
+      assert(cast<VectorType>(SrcTy)->getBitWidth() == 64 &&
+             "Casting vector of wrong width to X86_MMX");
+      return BitCast;                               // 64-bit vector to MMX
+    } else {
+      assert(!"Illegal cast to X86_MMX");
+    }
+  } else {
+    assert(!"Casting to type that is not first-class");
+  }
+
+  // If we fall through to here we probably hit an assertion cast above
+  // and assertions are not turned on. Anything we return is an error, so
+  // BitCast is as good a choice as any.
+  return BitCast;
+}
+
+//===----------------------------------------------------------------------===//
+//                    CastInst SubClass Constructors
+//===----------------------------------------------------------------------===//
+
+/// Check that the construction parameters for a CastInst are correct. This
+/// could be broken out into the separate constructors but it is useful to have
+/// it in one place and to eliminate the redundant code for getting the sizes
+/// of the types involved.
+bool 
+CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
+
+  // Check for type sanity on the arguments
+  const Type *SrcTy = S->getType();
+  if (!SrcTy->isFirstClassType() || !DstTy->isFirstClassType() ||
+      SrcTy->isAggregateType() || DstTy->isAggregateType())
+    return false;
+
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DstBitSize = DstTy->getScalarSizeInBits();
+
+  // Switch on the opcode provided
+  switch (op) {
+  default: return false; // This is an input error
+  case Instruction::Trunc:
+    return SrcTy->isIntOrIntVectorTy() &&
+           DstTy->isIntOrIntVectorTy()&& SrcBitSize > DstBitSize;
+  case Instruction::ZExt:
+    return SrcTy->isIntOrIntVectorTy() &&
+           DstTy->isIntOrIntVectorTy()&& SrcBitSize < DstBitSize;
+  case Instruction::SExt: 
+    return SrcTy->isIntOrIntVectorTy() &&
+           DstTy->isIntOrIntVectorTy()&& SrcBitSize < DstBitSize;
+  case Instruction::FPTrunc:
+    return SrcTy->isFPOrFPVectorTy() &&
+           DstTy->isFPOrFPVectorTy() && 
+           SrcBitSize > DstBitSize;
+  case Instruction::FPExt:
+    return SrcTy->isFPOrFPVectorTy() &&
+           DstTy->isFPOrFPVectorTy() && 
+           SrcBitSize < DstBitSize;
+  case Instruction::UIToFP:
+  case Instruction::SIToFP:
+    if (const VectorType *SVTy = dyn_cast<VectorType>(SrcTy)) {
+      if (const VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
+        return SVTy->getElementType()->isIntOrIntVectorTy() &&
+               DVTy->getElementType()->isFPOrFPVectorTy() &&
+               SVTy->getNumElements() == DVTy->getNumElements();
+      }
+    }
+    return SrcTy->isIntOrIntVectorTy() && DstTy->isFPOrFPVectorTy();
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+    if (const VectorType *SVTy = dyn_cast<VectorType>(SrcTy)) {
+      if (const VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
+        return SVTy->getElementType()->isFPOrFPVectorTy() &&
+               DVTy->getElementType()->isIntOrIntVectorTy() &&
+               SVTy->getNumElements() == DVTy->getNumElements();
+      }
+    }
+    return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy();
+  case Instruction::PtrToInt:
+    return SrcTy->isPointerTy() && DstTy->isIntegerTy();
+  case Instruction::IntToPtr:
+    return SrcTy->isIntegerTy() && DstTy->isPointerTy();
+  case Instruction::BitCast:
+    // BitCast implies a no-op cast of type only. No bits change.
+    // However, you can't cast pointers to anything but pointers.
+    if (SrcTy->isPointerTy() != DstTy->isPointerTy())
+      return false;
+
+    // Now we know we're not dealing with a pointer/non-pointer mismatch. In all
+    // these cases, the cast is okay if the source and destination bit widths
+    // are identical.
+    return SrcTy->getPrimitiveSizeInBits() == DstTy->getPrimitiveSizeInBits();
+  }
+}
+
+TruncInst::TruncInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, Trunc, S, Name, InsertBefore) {
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
+}
+
+TruncInst::TruncInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, Trunc, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal Trunc");
+}
+
+ZExtInst::ZExtInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+)  : CastInst(Ty, ZExt, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
+}
+
+ZExtInst::ZExtInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+)  : CastInst(Ty, ZExt, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal ZExt");
+}
+SExtInst::SExtInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, SExt, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
+}
+
+SExtInst::SExtInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+)  : CastInst(Ty, SExt, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal SExt");
+}
+
+FPTruncInst::FPTruncInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPTrunc, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
+}
+
+FPTruncInst::FPTruncInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPTrunc, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPTrunc");
+}
+
+FPExtInst::FPExtInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPExt, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
+}
+
+FPExtInst::FPExtInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPExt, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPExt");
+}
+
+UIToFPInst::UIToFPInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, UIToFP, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
+}
+
+UIToFPInst::UIToFPInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, UIToFP, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal UIToFP");
+}
+
+SIToFPInst::SIToFPInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, SIToFP, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
+}
+
+SIToFPInst::SIToFPInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, SIToFP, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal SIToFP");
+}
+
+FPToUIInst::FPToUIInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPToUI, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
+}
+
+FPToUIInst::FPToUIInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPToUI, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToUI");
+}
+
+FPToSIInst::FPToSIInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, FPToSI, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
+}
+
+FPToSIInst::FPToSIInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, FPToSI, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal FPToSI");
+}
+
+PtrToIntInst::PtrToIntInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, PtrToInt, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
+}
+
+PtrToIntInst::PtrToIntInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, PtrToInt, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal PtrToInt");
+}
+
+IntToPtrInst::IntToPtrInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, IntToPtr, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
+}
+
+IntToPtrInst::IntToPtrInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, IntToPtr, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal IntToPtr");
+}
+
+BitCastInst::BitCastInst(
+  Value *S, const Type *Ty, const Twine &Name, Instruction *InsertBefore
+) : CastInst(Ty, BitCast, S, Name, InsertBefore) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
+}
+
+BitCastInst::BitCastInst(
+  Value *S, const Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd
+) : CastInst(Ty, BitCast, S, Name, InsertAtEnd) { 
+  assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast");
+}
+
+//===----------------------------------------------------------------------===//
+//                               CmpInst Classes
+//===----------------------------------------------------------------------===//
+
+void CmpInst::Anchor() const {}
+
+CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate,
+                 Value *LHS, Value *RHS, const Twine &Name,
+                 Instruction *InsertBefore)
+  : Instruction(ty, op,
+                OperandTraits<CmpInst>::op_begin(this),
+                OperandTraits<CmpInst>::operands(this),
+                InsertBefore) {
+    Op<0>() = LHS;
+    Op<1>() = RHS;
+  setPredicate((Predicate)predicate);
+  setName(Name);
+}
+
+CmpInst::CmpInst(const Type *ty, OtherOps op, unsigned short predicate,
+                 Value *LHS, Value *RHS, const Twine &Name,
+                 BasicBlock *InsertAtEnd)
+  : Instruction(ty, op,
+                OperandTraits<CmpInst>::op_begin(this),
+                OperandTraits<CmpInst>::operands(this),
+                InsertAtEnd) {
+  Op<0>() = LHS;
+  Op<1>() = RHS;
+  setPredicate((Predicate)predicate);
+  setName(Name);
+}
+
+CmpInst *
+CmpInst::Create(OtherOps Op, unsigned short predicate,
+                Value *S1, Value *S2, 
+                const Twine &Name, Instruction *InsertBefore) {
+  if (Op == Instruction::ICmp) {
+    if (InsertBefore)
+      return new ICmpInst(InsertBefore, CmpInst::Predicate(predicate),
+                          S1, S2, Name);
+    else
+      return new ICmpInst(CmpInst::Predicate(predicate),
+                          S1, S2, Name);
+  }
+  
+  if (InsertBefore)
+    return new FCmpInst(InsertBefore, CmpInst::Predicate(predicate),
+                        S1, S2, Name);
+  else
+    return new FCmpInst(CmpInst::Predicate(predicate),
+                        S1, S2, Name);
+}
+
+CmpInst *
+CmpInst::Create(OtherOps Op, unsigned short predicate, Value *S1, Value *S2, 
+                const Twine &Name, BasicBlock *InsertAtEnd) {
+  if (Op == Instruction::ICmp) {
+    return new ICmpInst(*InsertAtEnd, CmpInst::Predicate(predicate),
+                        S1, S2, Name);
+  }
+  return new FCmpInst(*InsertAtEnd, CmpInst::Predicate(predicate),
+                      S1, S2, Name);
+}
+
+void CmpInst::swapOperands() {
+  if (ICmpInst *IC = dyn_cast<ICmpInst>(this))
+    IC->swapOperands();
+  else
+    cast<FCmpInst>(this)->swapOperands();
+}
+
+bool CmpInst::isCommutative() const {
+  if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
+    return IC->isCommutative();
+  return cast<FCmpInst>(this)->isCommutative();
+}
+
+bool CmpInst::isEquality() const {
+  if (const ICmpInst *IC = dyn_cast<ICmpInst>(this))
+    return IC->isEquality();
+  return cast<FCmpInst>(this)->isEquality();
+}
+
+
+CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
+  switch (pred) {
+    default: assert(!"Unknown cmp predicate!");
+    case ICMP_EQ: return ICMP_NE;
+    case ICMP_NE: return ICMP_EQ;
+    case ICMP_UGT: return ICMP_ULE;
+    case ICMP_ULT: return ICMP_UGE;
+    case ICMP_UGE: return ICMP_ULT;
+    case ICMP_ULE: return ICMP_UGT;
+    case ICMP_SGT: return ICMP_SLE;
+    case ICMP_SLT: return ICMP_SGE;
+    case ICMP_SGE: return ICMP_SLT;
+    case ICMP_SLE: return ICMP_SGT;
+
+    case FCMP_OEQ: return FCMP_UNE;
+    case FCMP_ONE: return FCMP_UEQ;
+    case FCMP_OGT: return FCMP_ULE;
+    case FCMP_OLT: return FCMP_UGE;
+    case FCMP_OGE: return FCMP_ULT;
+    case FCMP_OLE: return FCMP_UGT;
+    case FCMP_UEQ: return FCMP_ONE;
+    case FCMP_UNE: return FCMP_OEQ;
+    case FCMP_UGT: return FCMP_OLE;
+    case FCMP_ULT: return FCMP_OGE;
+    case FCMP_UGE: return FCMP_OLT;
+    case FCMP_ULE: return FCMP_OGT;
+    case FCMP_ORD: return FCMP_UNO;
+    case FCMP_UNO: return FCMP_ORD;
+    case FCMP_TRUE: return FCMP_FALSE;
+    case FCMP_FALSE: return FCMP_TRUE;
+  }
+}
+
+ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
+  switch (pred) {
+    default: assert(! "Unknown icmp predicate!");
+    case ICMP_EQ: case ICMP_NE: 
+    case ICMP_SGT: case ICMP_SLT: case ICMP_SGE: case ICMP_SLE: 
+       return pred;
+    case ICMP_UGT: return ICMP_SGT;
+    case ICMP_ULT: return ICMP_SLT;
+    case ICMP_UGE: return ICMP_SGE;
+    case ICMP_ULE: return ICMP_SLE;
+  }
+}
+
+ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) {
+  switch (pred) {
+    default: assert(! "Unknown icmp predicate!");
+    case ICMP_EQ: case ICMP_NE: 
+    case ICMP_UGT: case ICMP_ULT: case ICMP_UGE: case ICMP_ULE: 
+       return pred;
+    case ICMP_SGT: return ICMP_UGT;
+    case ICMP_SLT: return ICMP_ULT;
+    case ICMP_SGE: return ICMP_UGE;
+    case ICMP_SLE: return ICMP_ULE;
+  }
+}
+
+/// Initialize a set of values that all satisfy the condition with C.
+///
+ConstantRange 
+ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
+  APInt Lower(C);
+  APInt Upper(C);
+  uint32_t BitWidth = C.getBitWidth();
+  switch (pred) {
+  default: llvm_unreachable("Invalid ICmp opcode to ConstantRange ctor!");
+  case ICmpInst::ICMP_EQ: Upper++; break;
+  case ICmpInst::ICMP_NE: Lower++; break;
+  case ICmpInst::ICMP_ULT:
+    Lower = APInt::getMinValue(BitWidth);
+    // Check for an empty-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/false);
+    break;
+  case ICmpInst::ICMP_SLT:
+    Lower = APInt::getSignedMinValue(BitWidth);
+    // Check for an empty-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/false);
+    break;
+  case ICmpInst::ICMP_UGT: 
+    Lower++; Upper = APInt::getMinValue(BitWidth);        // Min = Next(Max)
+    // Check for an empty-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/false);
+    break;
+  case ICmpInst::ICMP_SGT:
+    Lower++; Upper = APInt::getSignedMinValue(BitWidth);  // Min = Next(Max)
+    // Check for an empty-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/false);
+    break;
+  case ICmpInst::ICMP_ULE: 
+    Lower = APInt::getMinValue(BitWidth); Upper++; 
+    // Check for a full-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/true);
+    break;
+  case ICmpInst::ICMP_SLE: 
+    Lower = APInt::getSignedMinValue(BitWidth); Upper++; 
+    // Check for a full-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/true);
+    break;
+  case ICmpInst::ICMP_UGE:
+    Upper = APInt::getMinValue(BitWidth);        // Min = Next(Max)
+    // Check for a full-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/true);
+    break;
+  case ICmpInst::ICMP_SGE:
+    Upper = APInt::getSignedMinValue(BitWidth);  // Min = Next(Max)
+    // Check for a full-set condition.
+    if (Lower == Upper)
+      return ConstantRange(BitWidth, /*isFullSet=*/true);
+    break;
+  }
+  return ConstantRange(Lower, Upper);
+}
+
+CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
+  switch (pred) {
+    default: assert(!"Unknown cmp predicate!");
+    case ICMP_EQ: case ICMP_NE:
+      return pred;
+    case ICMP_SGT: return ICMP_SLT;
+    case ICMP_SLT: return ICMP_SGT;
+    case ICMP_SGE: return ICMP_SLE;
+    case ICMP_SLE: return ICMP_SGE;
+    case ICMP_UGT: return ICMP_ULT;
+    case ICMP_ULT: return ICMP_UGT;
+    case ICMP_UGE: return ICMP_ULE;
+    case ICMP_ULE: return ICMP_UGE;
+  
+    case FCMP_FALSE: case FCMP_TRUE:
+    case FCMP_OEQ: case FCMP_ONE:
+    case FCMP_UEQ: case FCMP_UNE:
+    case FCMP_ORD: case FCMP_UNO:
+      return pred;
+    case FCMP_OGT: return FCMP_OLT;
+    case FCMP_OLT: return FCMP_OGT;
+    case FCMP_OGE: return FCMP_OLE;
+    case FCMP_OLE: return FCMP_OGE;
+    case FCMP_UGT: return FCMP_ULT;
+    case FCMP_ULT: return FCMP_UGT;
+    case FCMP_UGE: return FCMP_ULE;
+    case FCMP_ULE: return FCMP_UGE;
+  }
+}
+
+bool CmpInst::isUnsigned(unsigned short predicate) {
+  switch (predicate) {
+    default: return false;
+    case ICmpInst::ICMP_ULT: case ICmpInst::ICMP_ULE: case ICmpInst::ICMP_UGT: 
+    case ICmpInst::ICMP_UGE: return true;
+  }
+}
+
+bool CmpInst::isSigned(unsigned short predicate) {
+  switch (predicate) {
+    default: return false;
+    case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: case ICmpInst::ICMP_SGT: 
+    case ICmpInst::ICMP_SGE: return true;
+  }
+}
+
+bool CmpInst::isOrdered(unsigned short predicate) {
+  switch (predicate) {
+    default: return false;
+    case FCmpInst::FCMP_OEQ: case FCmpInst::FCMP_ONE: case FCmpInst::FCMP_OGT: 
+    case FCmpInst::FCMP_OLT: case FCmpInst::FCMP_OGE: case FCmpInst::FCMP_OLE: 
+    case FCmpInst::FCMP_ORD: return true;
+  }
+}
+      
+bool CmpInst::isUnordered(unsigned short predicate) {
+  switch (predicate) {
+    default: return false;
+    case FCmpInst::FCMP_UEQ: case FCmpInst::FCMP_UNE: case FCmpInst::FCMP_UGT: 
+    case FCmpInst::FCMP_ULT: case FCmpInst::FCMP_UGE: case FCmpInst::FCMP_ULE: 
+    case FCmpInst::FCMP_UNO: return true;
+  }
+}
+
+bool CmpInst::isTrueWhenEqual(unsigned short predicate) {
+  switch(predicate) {
+    default: return false;
+    case ICMP_EQ:   case ICMP_UGE: case ICMP_ULE: case ICMP_SGE: case ICMP_SLE:
+    case FCMP_TRUE: case FCMP_UEQ: case FCMP_UGE: case FCMP_ULE: return true;
+  }
+}
+
+bool CmpInst::isFalseWhenEqual(unsigned short predicate) {
+  switch(predicate) {
+  case ICMP_NE:    case ICMP_UGT: case ICMP_ULT: case ICMP_SGT: case ICMP_SLT:
+  case FCMP_FALSE: case FCMP_ONE: case FCMP_OGT: case FCMP_OLT: return true;
+  default: return false;
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                        SwitchInst Implementation
+//===----------------------------------------------------------------------===//
+
+void SwitchInst::init(Value *Value, BasicBlock *Default, unsigned NumReserved) {
+  assert(Value && Default && NumReserved);
+  ReservedSpace = NumReserved;
+  NumOperands = 2;
+  OperandList = allocHungoffUses(ReservedSpace);
+
+  OperandList[0] = Value;
+  OperandList[1] = Default;
+}
+
+/// SwitchInst ctor - Create a new switch instruction, specifying a value to
+/// switch on and a default destination.  The number of additional cases can
+/// be specified here to make memory allocation more efficient.  This
+/// constructor can also autoinsert before another instruction.
+SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
+                       Instruction *InsertBefore)
+  : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
+                   0, 0, InsertBefore) {
+  init(Value, Default, 2+NumCases*2);
+}
+
+/// SwitchInst ctor - Create a new switch instruction, specifying a value to
+/// switch on and a default destination.  The number of additional cases can
+/// be specified here to make memory allocation more efficient.  This
+/// constructor also autoinserts at the end of the specified BasicBlock.
+SwitchInst::SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
+                       BasicBlock *InsertAtEnd)
+  : TerminatorInst(Type::getVoidTy(Value->getContext()), Instruction::Switch,
+                   0, 0, InsertAtEnd) {
+  init(Value, Default, 2+NumCases*2);
+}
+
+SwitchInst::SwitchInst(const SwitchInst &SI)
+  : TerminatorInst(SI.getType(), Instruction::Switch, 0, 0) {
+  init(SI.getCondition(), SI.getDefaultDest(), SI.getNumOperands());
+  NumOperands = SI.getNumOperands();
+  Use *OL = OperandList, *InOL = SI.OperandList;
+  for (unsigned i = 2, E = SI.getNumOperands(); i != E; i += 2) {
+    OL[i] = InOL[i];
+    OL[i+1] = InOL[i+1];
+  }
+  SubclassOptionalData = SI.SubclassOptionalData;
+}
+
+SwitchInst::~SwitchInst() {
+  dropHungoffUses();
+}
+
+
+/// addCase - Add an entry to the switch instruction...
+///
+void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) {
+  unsigned OpNo = NumOperands;
+  if (OpNo+2 > ReservedSpace)
+    resizeOperands(0);  // Get more space!
+  // Initialize some new operands.
+  assert(OpNo+1 < ReservedSpace && "Growing didn't work!");
+  NumOperands = OpNo+2;
+  OperandList[OpNo] = OnVal;
+  OperandList[OpNo+1] = Dest;
+}
+
+/// removeCase - This method removes the specified successor from the switch
+/// instruction.  Note that this cannot be used to remove the default
+/// destination (successor #0).
+///
+void SwitchInst::removeCase(unsigned idx) {
+  assert(idx != 0 && "Cannot remove the default case!");
+  assert(idx*2 < getNumOperands() && "Successor index out of range!!!");
+
+  unsigned NumOps = getNumOperands();
+  Use *OL = OperandList;
+
+  // Overwrite this case with the end of the list.
+  if ((idx + 1) * 2 != NumOps) {
+    OL[idx * 2] = OL[NumOps - 2];
+    OL[idx * 2 + 1] = OL[NumOps - 1];
+  }
+
+  // Nuke the last value.
+  OL[NumOps-2].set(0);
+  OL[NumOps-2+1].set(0);
+  NumOperands = NumOps-2;
+}
+
+/// resizeOperands - resize operands - This adjusts the length of the operands
+/// list according to the following behavior:
+///   1. If NumOps == 0, grow the operand list in response to a push_back style
+///      of operation.  This grows the number of ops by 3 times.
+///   2. If NumOps > NumOperands, reserve space for NumOps operands.
+///   3. If NumOps == NumOperands, trim the reserved space.
+///
+void SwitchInst::resizeOperands(unsigned NumOps) {
+  unsigned e = getNumOperands();
+  if (NumOps == 0) {
+    NumOps = e*3;
+  } else if (NumOps*2 > NumOperands) {
+    // No resize needed.
+    if (ReservedSpace >= NumOps) return;
+  } else if (NumOps == NumOperands) {
+    if (ReservedSpace == NumOps) return;
+  } else {
+    return;
+  }
+
+  ReservedSpace = NumOps;
+  Use *NewOps = allocHungoffUses(NumOps);
+  Use *OldOps = OperandList;
+  for (unsigned i = 0; i != e; ++i) {
+      NewOps[i] = OldOps[i];
+  }
+  OperandList = NewOps;
+  Use::zap(OldOps, OldOps + e, true);
+}
+
+
+BasicBlock *SwitchInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+unsigned SwitchInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  setSuccessor(idx, B);
+}
+
+//===----------------------------------------------------------------------===//
+//                        IndirectBrInst Implementation
+//===----------------------------------------------------------------------===//
+
+void IndirectBrInst::init(Value *Address, unsigned NumDests) {
+  assert(Address && Address->getType()->isPointerTy() &&
+         "Address of indirectbr must be a pointer");
+  ReservedSpace = 1+NumDests;
+  NumOperands = 1;
+  OperandList = allocHungoffUses(ReservedSpace);
+  
+  OperandList[0] = Address;
+}
+
+
+/// resizeOperands - resize operands - This adjusts the length of the operands
+/// list according to the following behavior:
+///   1. If NumOps == 0, grow the operand list in response to a push_back style
+///      of operation.  This grows the number of ops by 2 times.
+///   2. If NumOps > NumOperands, reserve space for NumOps operands.
+///   3. If NumOps == NumOperands, trim the reserved space.
+///
+void IndirectBrInst::resizeOperands(unsigned NumOps) {
+  unsigned e = getNumOperands();
+  if (NumOps == 0) {
+    NumOps = e*2;
+  } else if (NumOps*2 > NumOperands) {
+    // No resize needed.
+    if (ReservedSpace >= NumOps) return;
+  } else if (NumOps == NumOperands) {
+    if (ReservedSpace == NumOps) return;
+  } else {
+    return;
+  }
+  
+  ReservedSpace = NumOps;
+  Use *NewOps = allocHungoffUses(NumOps);
+  Use *OldOps = OperandList;
+  for (unsigned i = 0; i != e; ++i)
+    NewOps[i] = OldOps[i];
+  OperandList = NewOps;
+  Use::zap(OldOps, OldOps + e, true);
+}
+
+IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
+                               Instruction *InsertBefore)
+: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
+                 0, 0, InsertBefore) {
+  init(Address, NumCases);
+}
+
+IndirectBrInst::IndirectBrInst(Value *Address, unsigned NumCases,
+                               BasicBlock *InsertAtEnd)
+: TerminatorInst(Type::getVoidTy(Address->getContext()),Instruction::IndirectBr,
+                 0, 0, InsertAtEnd) {
+  init(Address, NumCases);
+}
+
+IndirectBrInst::IndirectBrInst(const IndirectBrInst &IBI)
+  : TerminatorInst(Type::getVoidTy(IBI.getContext()), Instruction::IndirectBr,
+                   allocHungoffUses(IBI.getNumOperands()),
+                   IBI.getNumOperands()) {
+  Use *OL = OperandList, *InOL = IBI.OperandList;
+  for (unsigned i = 0, E = IBI.getNumOperands(); i != E; ++i)
+    OL[i] = InOL[i];
+  SubclassOptionalData = IBI.SubclassOptionalData;
+}
+
+IndirectBrInst::~IndirectBrInst() {
+  dropHungoffUses();
+}
+
+/// addDestination - Add a destination.
+///
+void IndirectBrInst::addDestination(BasicBlock *DestBB) {
+  unsigned OpNo = NumOperands;
+  if (OpNo+1 > ReservedSpace)
+    resizeOperands(0);  // Get more space!
+  // Initialize some new operands.
+  assert(OpNo < ReservedSpace && "Growing didn't work!");
+  NumOperands = OpNo+1;
+  OperandList[OpNo] = DestBB;
+}
+
+/// removeDestination - This method removes the specified successor from the
+/// indirectbr instruction.
+void IndirectBrInst::removeDestination(unsigned idx) {
+  assert(idx < getNumOperands()-1 && "Successor index out of range!");
+  
+  unsigned NumOps = getNumOperands();
+  Use *OL = OperandList;
+
+  // Replace this value with the last one.
+  OL[idx+1] = OL[NumOps-1];
+  
+  // Nuke the last value.
+  OL[NumOps-1].set(0);
+  NumOperands = NumOps-1;
+}
+
+BasicBlock *IndirectBrInst::getSuccessorV(unsigned idx) const {
+  return getSuccessor(idx);
+}
+unsigned IndirectBrInst::getNumSuccessorsV() const {
+  return getNumSuccessors();
+}
+void IndirectBrInst::setSuccessorV(unsigned idx, BasicBlock *B) {
+  setSuccessor(idx, B);
+}
+
+//===----------------------------------------------------------------------===//
+//                           clone_impl() implementations
+//===----------------------------------------------------------------------===//
+
+// Define these methods here so vtables don't get emitted into every translation
+// unit that uses these classes.
+
+GetElementPtrInst *GetElementPtrInst::clone_impl() const {
+  return new (getNumOperands()) GetElementPtrInst(*this);
+}
+
+BinaryOperator *BinaryOperator::clone_impl() const {
+  return Create(getOpcode(), Op<0>(), Op<1>());
+}
+
+FCmpInst* FCmpInst::clone_impl() const {
+  return new FCmpInst(getPredicate(), Op<0>(), Op<1>());
+}
+
+ICmpInst* ICmpInst::clone_impl() const {
+  return new ICmpInst(getPredicate(), Op<0>(), Op<1>());
+}
+
+ExtractValueInst *ExtractValueInst::clone_impl() const {
+  return new ExtractValueInst(*this);
+}
+
+InsertValueInst *InsertValueInst::clone_impl() const {
+  return new InsertValueInst(*this);
+}
+
+AllocaInst *AllocaInst::clone_impl() const {
+  return new AllocaInst(getAllocatedType(),
+                        (Value*)getOperand(0),
+                        getAlignment());
+}
+
+LoadInst *LoadInst::clone_impl() const {
+  return new LoadInst(getOperand(0),
+                      Twine(), isVolatile(),
+                      getAlignment());
+}
+
+StoreInst *StoreInst::clone_impl() const {
+  return new StoreInst(getOperand(0), getOperand(1),
+                       isVolatile(), getAlignment());
+}
+
+TruncInst *TruncInst::clone_impl() const {
+  return new TruncInst(getOperand(0), getType());
+}
+
+ZExtInst *ZExtInst::clone_impl() const {
+  return new ZExtInst(getOperand(0), getType());
+}
+
+SExtInst *SExtInst::clone_impl() const {
+  return new SExtInst(getOperand(0), getType());
+}
+
+FPTruncInst *FPTruncInst::clone_impl() const {
+  return new FPTruncInst(getOperand(0), getType());
+}
+
+FPExtInst *FPExtInst::clone_impl() const {
+  return new FPExtInst(getOperand(0), getType());
+}
+
+UIToFPInst *UIToFPInst::clone_impl() const {
+  return new UIToFPInst(getOperand(0), getType());
+}
+
+SIToFPInst *SIToFPInst::clone_impl() const {
+  return new SIToFPInst(getOperand(0), getType());
+}
+
+FPToUIInst *FPToUIInst::clone_impl() const {
+  return new FPToUIInst(getOperand(0), getType());
+}
+
+FPToSIInst *FPToSIInst::clone_impl() const {
+  return new FPToSIInst(getOperand(0), getType());
+}
+
+PtrToIntInst *PtrToIntInst::clone_impl() const {
+  return new PtrToIntInst(getOperand(0), getType());
+}
+
+IntToPtrInst *IntToPtrInst::clone_impl() const {
+  return new IntToPtrInst(getOperand(0), getType());
+}
+
+BitCastInst *BitCastInst::clone_impl() const {
+  return new BitCastInst(getOperand(0), getType());
+}
+
+CallInst *CallInst::clone_impl() const {
+  return  new(getNumOperands()) CallInst(*this);
+}
+
+SelectInst *SelectInst::clone_impl() const {
+  return SelectInst::Create(getOperand(0), getOperand(1), getOperand(2));
+}
+
+VAArgInst *VAArgInst::clone_impl() const {
+  return new VAArgInst(getOperand(0), getType());
+}
+
+ExtractElementInst *ExtractElementInst::clone_impl() const {
+  return ExtractElementInst::Create(getOperand(0), getOperand(1));
+}
+
+InsertElementInst *InsertElementInst::clone_impl() const {
+  return InsertElementInst::Create(getOperand(0),
+                                   getOperand(1),
+                                   getOperand(2));
+}
+
+ShuffleVectorInst *ShuffleVectorInst::clone_impl() const {
+  return new ShuffleVectorInst(getOperand(0),
+                           getOperand(1),
+                           getOperand(2));
+}
+
+PHINode *PHINode::clone_impl() const {
+  return new PHINode(*this);
+}
+
+ReturnInst *ReturnInst::clone_impl() const {
+  return new(getNumOperands()) ReturnInst(*this);
+}
+
+BranchInst *BranchInst::clone_impl() const {
+  return new(getNumOperands()) BranchInst(*this);
+}
+
+SwitchInst *SwitchInst::clone_impl() const {
+  return new SwitchInst(*this);
+}
+
+IndirectBrInst *IndirectBrInst::clone_impl() const {
+  return new IndirectBrInst(*this);
+}
+
+
+InvokeInst *InvokeInst::clone_impl() const {
+  return new(getNumOperands()) InvokeInst(*this);
+}
+
+UnwindInst *UnwindInst::clone_impl() const {
+  LLVMContext &Context = getContext();
+  return new UnwindInst(Context);
+}
+
+UnreachableInst *UnreachableInst::clone_impl() const {
+  LLVMContext &Context = getContext();
+  return new UnreachableInst(Context);
+}
diff --git a/final/lib/VMCore/IntrinsicInst.cpp b/final/lib/VMCore/IntrinsicInst.cpp
new file mode 100644
index 00000000000..ac8ec2086b1
--- /dev/null
+++ b/final/lib/VMCore/IntrinsicInst.cpp
@@ -0,0 +1,73 @@
+//===-- InstrinsicInst.cpp - Intrinsic Instruction Wrappers -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements methods that make it really easy to deal with intrinsic
+// functions.
+//
+// All intrinsic function calls are instances of the call instruction, so these
+// are all subclasses of the CallInst class.  Note that none of these classes
+// has state or virtual methods, which is an important part of this gross/neat
+// hack working.
+// 
+// In some cases, arguments to intrinsics need to be generic and are defined as
+// type pointer to empty struct { }*.  To access the real item of interest the
+// cast instruction needs to be stripped away. 
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Metadata.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+/// DbgInfoIntrinsic - This is the common base class for debug info intrinsics
+///
+
+static Value *CastOperand(Value *C) {
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    if (CE->isCast())
+      return CE->getOperand(0);
+  return NULL;
+}
+
+Value *DbgInfoIntrinsic::StripCast(Value *C) {
+  if (Value *CO = CastOperand(C)) {
+    C = StripCast(CO);
+  } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
+    if (GV->hasInitializer())
+      if (Value *CO = CastOperand(GV->getInitializer()))
+        C = StripCast(CO);
+  }
+  return dyn_cast<GlobalVariable>(C);
+}
+
+//===----------------------------------------------------------------------===//
+/// DbgDeclareInst - This represents the llvm.dbg.declare instruction.
+///
+
+Value *DbgDeclareInst::getAddress() const {
+  if (MDNode* MD = cast_or_null<MDNode>(getArgOperand(0)))
+    return MD->getOperand(0);
+  else
+    return NULL;
+}
+
+//===----------------------------------------------------------------------===//
+/// DbgValueInst - This represents the llvm.dbg.value instruction.
+///
+
+const Value *DbgValueInst::getValue() const {
+  return cast<MDNode>(getArgOperand(0))->getOperand(0);
+}
+
+Value *DbgValueInst::getValue() {
+  return cast<MDNode>(getArgOperand(0))->getOperand(0);
+}
diff --git a/final/lib/VMCore/LLVMContext.cpp b/final/lib/VMCore/LLVMContext.cpp
new file mode 100644
index 00000000000..1bd497d05d4
--- /dev/null
+++ b/final/lib/VMCore/LLVMContext.cpp
@@ -0,0 +1,143 @@
+//===-- LLVMContext.cpp - Implement LLVMContext -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements LLVMContext, as a wrapper around the opaque
+//  class LLVMContextImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Constants.h"
+#include "llvm/Instruction.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/SourceMgr.h"
+#include "LLVMContextImpl.h"
+#include <cctype>
+using namespace llvm;
+
+static ManagedStatic<LLVMContext> GlobalContext;
+
+LLVMContext& llvm::getGlobalContext() {
+  return *GlobalContext;
+}
+
+LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
+  // Create the fixed metadata kinds. This is done in the same order as the
+  // MD_* enum values so that they correspond.
+
+  // Create the 'dbg' metadata kind.
+  unsigned DbgID = getMDKindID("dbg");
+  assert(DbgID == MD_dbg && "dbg kind id drifted"); (void)DbgID;
+
+  // Create the 'tbaa' metadata kind.
+  unsigned TBAAID = getMDKindID("tbaa");
+  assert(TBAAID == MD_tbaa && "tbaa kind id drifted"); (void)TBAAID;
+}
+LLVMContext::~LLVMContext() { delete pImpl; }
+
+void LLVMContext::addModule(Module *M) {
+  pImpl->OwnedModules.insert(M);
+}
+
+void LLVMContext::removeModule(Module *M) {
+  pImpl->OwnedModules.erase(M);
+}
+
+//===----------------------------------------------------------------------===//
+// Recoverable Backend Errors
+//===----------------------------------------------------------------------===//
+
+void LLVMContext::
+setInlineAsmDiagnosticHandler(InlineAsmDiagHandlerTy DiagHandler,
+                              void *DiagContext) {
+  pImpl->InlineAsmDiagHandler = DiagHandler;
+  pImpl->InlineAsmDiagContext = DiagContext;
+}
+
+/// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by
+/// setInlineAsmDiagnosticHandler.
+LLVMContext::InlineAsmDiagHandlerTy
+LLVMContext::getInlineAsmDiagnosticHandler() const {
+  return pImpl->InlineAsmDiagHandler;
+}
+
+/// getInlineAsmDiagnosticContext - Return the diagnostic context set by
+/// setInlineAsmDiagnosticHandler.
+void *LLVMContext::getInlineAsmDiagnosticContext() const {
+  return pImpl->InlineAsmDiagContext;
+}
+
+void LLVMContext::emitError(StringRef ErrorStr) {
+  emitError(0U, ErrorStr);
+}
+
+void LLVMContext::emitError(const Instruction *I, StringRef ErrorStr) {
+  unsigned LocCookie = 0;
+  if (const MDNode *SrcLoc = I->getMetadata("srcloc")) {
+    if (SrcLoc->getNumOperands() != 0)
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(SrcLoc->getOperand(0)))
+        LocCookie = CI->getZExtValue();
+  }
+  return emitError(LocCookie, ErrorStr);
+}
+
+void LLVMContext::emitError(unsigned LocCookie, StringRef ErrorStr) {
+  // If there is no error handler installed, just print the error and exit.
+  if (pImpl->InlineAsmDiagHandler == 0) {
+    errs() << "error: " << ErrorStr << "\n";
+    exit(1);
+  }
+
+  // If we do have an error handler, we can report the error and keep going.
+  SMDiagnostic Diag("", "error: " + ErrorStr.str());
+
+  pImpl->InlineAsmDiagHandler(Diag, pImpl->InlineAsmDiagContext, LocCookie);
+}
+
+//===----------------------------------------------------------------------===//
+// Metadata Kind Uniquing
+//===----------------------------------------------------------------------===//
+
+#ifndef NDEBUG
+/// isValidName - Return true if Name is a valid custom metadata handler name.
+static bool isValidName(StringRef MDName) {
+  if (MDName.empty())
+    return false;
+
+  if (!std::isalpha(MDName[0]))
+    return false;
+
+  for (StringRef::iterator I = MDName.begin() + 1, E = MDName.end(); I != E;
+       ++I) {
+    if (!std::isalnum(*I) && *I != '_' && *I != '-' && *I != '.')
+      return false;
+  }
+  return true;
+}
+#endif
+
+/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
+unsigned LLVMContext::getMDKindID(StringRef Name) const {
+  assert(isValidName(Name) && "Invalid MDNode name");
+
+  // If this is new, assign it its ID.
+  return
+    pImpl->CustomMDKindNames.GetOrCreateValue(
+      Name, pImpl->CustomMDKindNames.size()).second;
+}
+
+/// getHandlerNames - Populate client supplied smallvector using custome
+/// metadata name and ID.
+void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const {
+  Names.resize(pImpl->CustomMDKindNames.size());
+  for (StringMap<unsigned>::const_iterator I = pImpl->CustomMDKindNames.begin(),
+       E = pImpl->CustomMDKindNames.end(); I != E; ++I)
+    Names[I->second] = I->first();
+}
diff --git a/final/lib/VMCore/LLVMContextImpl.cpp b/final/lib/VMCore/LLVMContextImpl.cpp
new file mode 100644
index 00000000000..ccb8dc500fc
--- /dev/null
+++ b/final/lib/VMCore/LLVMContextImpl.cpp
@@ -0,0 +1,115 @@
+//===-- LLVMContextImpl.cpp - Implement LLVMContextImpl -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file implements the opaque LLVMContextImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/Module.h"
+#include <algorithm>
+using namespace llvm;
+
+LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
+  : TheTrueVal(0), TheFalseVal(0),
+    VoidTy(C, Type::VoidTyID),
+    LabelTy(C, Type::LabelTyID),
+    FloatTy(C, Type::FloatTyID),
+    DoubleTy(C, Type::DoubleTyID),
+    MetadataTy(C, Type::MetadataTyID),
+    X86_FP80Ty(C, Type::X86_FP80TyID),
+    FP128Ty(C, Type::FP128TyID),
+    PPC_FP128Ty(C, Type::PPC_FP128TyID),
+    X86_MMXTy(C, Type::X86_MMXTyID),
+    Int1Ty(C, 1),
+    Int8Ty(C, 8),
+    Int16Ty(C, 16),
+    Int32Ty(C, 32),
+    Int64Ty(C, 64),
+    AlwaysOpaqueTy(new OpaqueType(C)) {
+  InlineAsmDiagHandler = 0;
+  InlineAsmDiagContext = 0;
+      
+  // Make sure the AlwaysOpaqueTy stays alive as long as the Context.
+  AlwaysOpaqueTy->addRef();
+  OpaqueTypes.insert(AlwaysOpaqueTy);
+}
+
+namespace {
+struct DropReferences {
+  // Takes the value_type of a ConstantUniqueMap's internal map, whose 'second'
+  // is a Constant*.
+  template<typename PairT>
+  void operator()(const PairT &P) {
+    P.second->dropAllReferences();
+  }
+};
+}
+
+LLVMContextImpl::~LLVMContextImpl() {
+  // NOTE: We need to delete the contents of OwnedModules, but we have to
+  // duplicate it into a temporary vector, because the destructor of Module
+  // will try to remove itself from OwnedModules set.  This would cause
+  // iterator invalidation if we iterated on the set directly.
+  std::vector<Module*> Modules(OwnedModules.begin(), OwnedModules.end());
+  for (std::vector<Module*>::iterator I = Modules.begin(), E = Modules.end();
+       I != E; ++I)
+    delete *I;
+  
+  std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(),
+                DropReferences());
+  std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(),
+                DropReferences());
+  std::for_each(StructConstants.map_begin(), StructConstants.map_end(),
+                DropReferences());
+  std::for_each(VectorConstants.map_begin(), VectorConstants.map_end(),
+                DropReferences());
+  ExprConstants.freeConstants();
+  ArrayConstants.freeConstants();
+  StructConstants.freeConstants();
+  VectorConstants.freeConstants();
+  AggZeroConstants.freeConstants();
+  NullPtrConstants.freeConstants();
+  UndefValueConstants.freeConstants();
+  InlineAsms.freeConstants();
+  for (IntMapTy::iterator I = IntConstants.begin(), E = IntConstants.end(); 
+       I != E; ++I) {
+    delete I->second;
+  }
+  for (FPMapTy::iterator I = FPConstants.begin(), E = FPConstants.end(); 
+       I != E; ++I) {
+    delete I->second;
+  }
+  AlwaysOpaqueTy->dropRef();
+  for (OpaqueTypesTy::iterator I = OpaqueTypes.begin(), E = OpaqueTypes.end();
+       I != E; ++I) {
+    (*I)->AbstractTypeUsers.clear();
+    delete *I;
+  }
+  // Destroy MDNodes.  ~MDNode can move and remove nodes between the MDNodeSet
+  // and the NonUniquedMDNodes sets, so copy the values out first.
+  SmallVector<MDNode*, 8> MDNodes;
+  MDNodes.reserve(MDNodeSet.size() + NonUniquedMDNodes.size());
+  for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end();
+       I != E; ++I) {
+    MDNodes.push_back(&*I);
+  }
+  MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end());
+  for (SmallVectorImpl<MDNode *>::iterator I = MDNodes.begin(),
+         E = MDNodes.end(); I != E; ++I) {
+    (*I)->destroy();
+  }
+  assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() &&
+         "Destroying all MDNodes didn't empty the Context's sets.");
+  // Destroy MDStrings.
+  for (StringMap<MDString*>::iterator I = MDStringCache.begin(),
+         E = MDStringCache.end(); I != E; ++I) {
+    delete I->second;
+  }
+}
diff --git a/final/lib/VMCore/LLVMContextImpl.h b/final/lib/VMCore/LLVMContextImpl.h
new file mode 100644
index 00000000000..23971aafa74
--- /dev/null
+++ b/final/lib/VMCore/LLVMContextImpl.h
@@ -0,0 +1,250 @@
+//===-- LLVMContextImpl.h - The LLVMContextImpl opaque class ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file declares LLVMContextImpl, the opaque implementation 
+//  of LLVMContext.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LLVMCONTEXT_IMPL_H
+#define LLVM_LLVMCONTEXT_IMPL_H
+
+#include "ConstantsContext.h"
+#include "LeaksContext.h"
+#include "TypesContext.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Metadata.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include <vector>
+
+namespace llvm {
+
+class ConstantInt;
+class ConstantFP;
+class LLVMContext;
+class Type;
+class Value;
+
+struct DenseMapAPIntKeyInfo {
+  struct KeyTy {
+    APInt val;
+    const Type* type;
+    KeyTy(const APInt& V, const Type* Ty) : val(V), type(Ty) {}
+    KeyTy(const KeyTy& that) : val(that.val), type(that.type) {}
+    bool operator==(const KeyTy& that) const {
+      return type == that.type && this->val == that.val;
+    }
+    bool operator!=(const KeyTy& that) const {
+      return !this->operator==(that);
+    }
+  };
+  static inline KeyTy getEmptyKey() { return KeyTy(APInt(1,0), 0); }
+  static inline KeyTy getTombstoneKey() { return KeyTy(APInt(1,1), 0); }
+  static unsigned getHashValue(const KeyTy &Key) {
+    return DenseMapInfo<void*>::getHashValue(Key.type) ^ 
+      Key.val.getHashValue();
+  }
+  static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
+    return LHS == RHS;
+  }
+};
+
+struct DenseMapAPFloatKeyInfo {
+  struct KeyTy {
+    APFloat val;
+    KeyTy(const APFloat& V) : val(V){}
+    KeyTy(const KeyTy& that) : val(that.val) {}
+    bool operator==(const KeyTy& that) const {
+      return this->val.bitwiseIsEqual(that.val);
+    }
+    bool operator!=(const KeyTy& that) const {
+      return !this->operator==(that);
+    }
+  };
+  static inline KeyTy getEmptyKey() { 
+    return KeyTy(APFloat(APFloat::Bogus,1));
+  }
+  static inline KeyTy getTombstoneKey() { 
+    return KeyTy(APFloat(APFloat::Bogus,2)); 
+  }
+  static unsigned getHashValue(const KeyTy &Key) {
+    return Key.val.getHashValue();
+  }
+  static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
+    return LHS == RHS;
+  }
+};
+
+/// DebugRecVH - This is a CallbackVH used to keep the Scope -> index maps
+/// up to date as MDNodes mutate.  This class is implemented in DebugLoc.cpp.
+class DebugRecVH : public CallbackVH {
+  /// Ctx - This is the LLVM Context being referenced.
+  LLVMContextImpl *Ctx;
+  
+  /// Idx - The index into either ScopeRecordIdx or ScopeInlinedAtRecords that
+  /// this reference lives in.  If this is zero, then it represents a
+  /// non-canonical entry that has no DenseMap value.  This can happen due to
+  /// RAUW.
+  int Idx;
+public:
+  DebugRecVH(MDNode *n, LLVMContextImpl *ctx, int idx)
+    : CallbackVH(n), Ctx(ctx), Idx(idx) {}
+  
+  MDNode *get() const {
+    return cast_or_null<MDNode>(getValPtr());
+  }
+  
+  virtual void deleted();
+  virtual void allUsesReplacedWith(Value *VNew);
+};
+  
+class LLVMContextImpl {
+public:
+  /// OwnedModules - The set of modules instantiated in this context, and which
+  /// will be automatically deleted if this context is deleted.
+  SmallPtrSet<Module*, 4> OwnedModules;
+  
+  LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler;
+  void *InlineAsmDiagContext;
+  
+  typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*, 
+                         DenseMapAPIntKeyInfo> IntMapTy;
+  IntMapTy IntConstants;
+  
+  typedef DenseMap<DenseMapAPFloatKeyInfo::KeyTy, ConstantFP*, 
+                         DenseMapAPFloatKeyInfo> FPMapTy;
+  FPMapTy FPConstants;
+  
+  StringMap<MDString*> MDStringCache;
+  
+  FoldingSet<MDNode> MDNodeSet;
+  // MDNodes may be uniqued or not uniqued.  When they're not uniqued, they
+  // aren't in the MDNodeSet, but they're still shared between objects, so no
+  // one object can destroy them.  This set allows us to at least destroy them
+  // on Context destruction.
+  SmallPtrSet<MDNode*, 1> NonUniquedMDNodes;
+  
+  ConstantUniqueMap<char, Type, ConstantAggregateZero> AggZeroConstants;
+
+  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayType,
+    ConstantArray, true /*largekey*/> ArrayConstantsTy;
+  ArrayConstantsTy ArrayConstants;
+  
+  typedef ConstantUniqueMap<std::vector<Constant*>, StructType,
+    ConstantStruct, true /*largekey*/> StructConstantsTy;
+  StructConstantsTy StructConstants;
+  
+  typedef ConstantUniqueMap<std::vector<Constant*>, VectorType,
+                            ConstantVector> VectorConstantsTy;
+  VectorConstantsTy VectorConstants;
+  
+  ConstantUniqueMap<char, PointerType, ConstantPointerNull> NullPtrConstants;
+  ConstantUniqueMap<char, Type, UndefValue> UndefValueConstants;
+  
+  DenseMap<std::pair<Function*, BasicBlock*> , BlockAddress*> BlockAddresses;
+  ConstantUniqueMap<ExprMapKeyType, Type, ConstantExpr> ExprConstants;
+
+  ConstantUniqueMap<InlineAsmKeyType, PointerType, InlineAsm> InlineAsms;
+  
+  ConstantInt *TheTrueVal;
+  ConstantInt *TheFalseVal;
+  
+  LeakDetectorImpl<Value> LLVMObjects;
+  
+  // Basic type instances.
+  const Type VoidTy;
+  const Type LabelTy;
+  const Type FloatTy;
+  const Type DoubleTy;
+  const Type MetadataTy;
+  const Type X86_FP80Ty;
+  const Type FP128Ty;
+  const Type PPC_FP128Ty;
+  const Type X86_MMXTy;
+  const IntegerType Int1Ty;
+  const IntegerType Int8Ty;
+  const IntegerType Int16Ty;
+  const IntegerType Int32Ty;
+  const IntegerType Int64Ty;
+
+  // Concrete/Abstract TypeDescriptions - We lazily calculate type descriptions
+  // for types as they are needed.  Because resolution of types must invalidate
+  // all of the abstract type descriptions, we keep them in a seperate map to 
+  // make this easy.
+  TypePrinting ConcreteTypeDescriptions;
+  TypePrinting AbstractTypeDescriptions;
+  
+  TypeMap<ArrayValType, ArrayType> ArrayTypes;
+  TypeMap<VectorValType, VectorType> VectorTypes;
+  TypeMap<PointerValType, PointerType> PointerTypes;
+  TypeMap<FunctionValType, FunctionType> FunctionTypes;
+  TypeMap<StructValType, StructType> StructTypes;
+  TypeMap<IntegerValType, IntegerType> IntegerTypes;
+
+  // Opaque types are not structurally uniqued, so don't use TypeMap.
+  typedef SmallPtrSet<const OpaqueType*, 8> OpaqueTypesTy;
+  OpaqueTypesTy OpaqueTypes;
+
+  /// Used as an abstract type that will never be resolved.
+  OpaqueType *const AlwaysOpaqueTy;
+
+
+  /// ValueHandles - This map keeps track of all of the value handles that are
+  /// watching a Value*.  The Value::HasValueHandle bit is used to know
+  // whether or not a value has an entry in this map.
+  typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy;
+  ValueHandlesTy ValueHandles;
+  
+  /// CustomMDKindNames - Map to hold the metadata string to ID mapping.
+  StringMap<unsigned> CustomMDKindNames;
+  
+  typedef std::pair<unsigned, TrackingVH<MDNode> > MDPairTy;
+  typedef SmallVector<MDPairTy, 2> MDMapTy;
+
+  /// MetadataStore - Collection of per-instruction metadata used in this
+  /// context.
+  DenseMap<const Instruction *, MDMapTy> MetadataStore;
+  
+  /// ScopeRecordIdx - This is the index in ScopeRecords for an MDNode scope
+  /// entry with no "inlined at" element.
+  DenseMap<MDNode*, int> ScopeRecordIdx;
+  
+  /// ScopeRecords - These are the actual mdnodes (in a value handle) for an
+  /// index.  The ValueHandle ensures that ScopeRecordIdx stays up to date if
+  /// the MDNode is RAUW'd.
+  std::vector<DebugRecVH> ScopeRecords;
+  
+  /// ScopeInlinedAtIdx - This is the index in ScopeInlinedAtRecords for an
+  /// scope/inlined-at pair.
+  DenseMap<std::pair<MDNode*, MDNode*>, int> ScopeInlinedAtIdx;
+  
+  /// ScopeInlinedAtRecords - These are the actual mdnodes (in value handles)
+  /// for an index.  The ValueHandle ensures that ScopeINlinedAtIdx stays up
+  /// to date.
+  std::vector<std::pair<DebugRecVH, DebugRecVH> > ScopeInlinedAtRecords;
+  
+  int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx);
+  int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx);
+  
+  LLVMContextImpl(LLVMContext &C);
+  ~LLVMContextImpl();
+};
+
+}
+
+#endif
diff --git a/final/lib/VMCore/LeakDetector.cpp b/final/lib/VMCore/LeakDetector.cpp
new file mode 100644
index 00000000000..f6651e93e27
--- /dev/null
+++ b/final/lib/VMCore/LeakDetector.cpp
@@ -0,0 +1,69 @@
+//===-- LeakDetector.cpp - Implement LeakDetector interface ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LeakDetector class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Value.h"
+using namespace llvm;
+
+static ManagedStatic<sys::SmartMutex<true> > ObjectsLock;
+static ManagedStatic<LeakDetectorImpl<void> > Objects;
+
+static void clearGarbage(LLVMContext &Context) {
+  Objects->clear();
+  Context.pImpl->LLVMObjects.clear();
+}
+
+void LeakDetector::addGarbageObjectImpl(void *Object) {
+  sys::SmartScopedLock<true> Lock(*ObjectsLock);
+  Objects->addGarbage(Object);
+}
+
+void LeakDetector::addGarbageObjectImpl(const Value *Object) {
+  LLVMContextImpl *pImpl = Object->getContext().pImpl;
+  pImpl->LLVMObjects.addGarbage(Object);
+}
+
+void LeakDetector::removeGarbageObjectImpl(void *Object) {
+  sys::SmartScopedLock<true> Lock(*ObjectsLock);
+  Objects->removeGarbage(Object);
+}
+
+void LeakDetector::removeGarbageObjectImpl(const Value *Object) {
+  LLVMContextImpl *pImpl = Object->getContext().pImpl;
+  pImpl->LLVMObjects.removeGarbage(Object);
+}
+
+void LeakDetector::checkForGarbageImpl(LLVMContext &Context, 
+                                       const std::string &Message) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  sys::SmartScopedLock<true> Lock(*ObjectsLock);
+  
+  Objects->setName("GENERIC");
+  pImpl->LLVMObjects.setName("LLVM");
+  
+  // use non-short-circuit version so that both checks are performed
+  if (Objects->hasGarbage(Message) |
+      pImpl->LLVMObjects.hasGarbage(Message))
+    errs() << "\nThis is probably because you removed an object, but didn't "
+           << "delete it.  Please check your code for memory leaks.\n";
+
+  // Clear out results so we don't get duplicate warnings on
+  // next call...
+  clearGarbage(Context);
+}
diff --git a/final/lib/VMCore/LeaksContext.h b/final/lib/VMCore/LeaksContext.h
new file mode 100644
index 00000000000..b9e59d46b7a
--- /dev/null
+++ b/final/lib/VMCore/LeaksContext.h
@@ -0,0 +1,92 @@
+//===- LeaksContext.h - LeadDetector Implementation ------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines various helper methods and classes used by
+// LLVMContextImpl for leaks detectors.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Value.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+template <class T>
+struct PrinterTrait {
+  static void print(const T* P) { errs() << P; }
+};
+
+template<>
+struct PrinterTrait<Value> {
+  static void print(const Value* P) { errs() << *P; }
+};
+
+template <typename T>
+struct LeakDetectorImpl {
+  explicit LeakDetectorImpl(const char* const name = "") : 
+    Cache(0), Name(name) { }
+
+  void clear() {
+    Cache = 0;
+    Ts.clear();
+  }
+    
+  void setName(const char* n) { 
+    Name = n;
+  }
+    
+  // Because the most common usage pattern, by far, is to add a
+  // garbage object, then remove it immediately, we optimize this
+  // case.  When an object is added, it is not added to the set
+  // immediately, it is added to the CachedValue Value.  If it is
+  // immediately removed, no set search need be performed.
+  void addGarbage(const T* o) {
+    assert(Ts.count(o) == 0 && "Object already in set!");
+    if (Cache) {
+      assert(Cache != o && "Object already in set!");
+      Ts.insert(Cache);
+    }
+    Cache = o;
+  }
+
+  void removeGarbage(const T* o) {
+    if (o == Cache)
+      Cache = 0; // Cache hit
+    else
+      Ts.erase(o);
+  }
+
+  bool hasGarbage(const std::string& Message) {
+    addGarbage(0); // Flush the Cache
+
+    assert(Cache == 0 && "No value should be cached anymore!");
+
+    if (!Ts.empty()) {
+      errs() << "Leaked " << Name << " objects found: " << Message << ":\n";
+      for (typename SmallPtrSet<const T*, 8>::iterator I = Ts.begin(),
+           E = Ts.end(); I != E; ++I) {
+        errs() << '\t';
+        PrinterTrait<T>::print(*I);
+        errs() << '\n';
+      }
+      errs() << '\n';
+
+      return true;
+    }
+    
+    return false;
+  }
+
+private:
+  SmallPtrSet<const T*, 8> Ts;
+  const T* Cache;
+  const char* Name;
+};
+
+}
diff --git a/final/lib/VMCore/Makefile b/final/lib/VMCore/Makefile
new file mode 100644
index 00000000000..03a4fc707de
--- /dev/null
+++ b/final/lib/VMCore/Makefile
@@ -0,0 +1,34 @@
+##===- lib/VMCore/Makefile ---------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+LIBRARYNAME = LLVMCore
+BUILD_ARCHIVE = 1
+REQUIRES_RTTI = 1
+
+BUILT_SOURCES = $(PROJ_OBJ_ROOT)/include/llvm/Intrinsics.gen
+
+include $(LEVEL)/Makefile.common
+
+GENFILE:=$(PROJ_OBJ_ROOT)/include/llvm/Intrinsics.gen
+
+INTRINSICTD  := $(PROJ_SRC_ROOT)/include/llvm/Intrinsics.td
+INTRINSICTDS := $(wildcard $(PROJ_SRC_ROOT)/include/llvm/Intrinsics*.td)
+
+$(ObjDir)/Intrinsics.gen.tmp: $(ObjDir)/.dir $(INTRINSICTDS) $(TBLGEN)
+	$(Echo) Building Intrinsics.gen.tmp from Intrinsics.td
+	$(Verb) $(TableGen) $(call SYSPATH, $(INTRINSICTD)) -o $(call SYSPATH, $@) -gen-intrinsic
+
+$(GENFILE): $(ObjDir)/Intrinsics.gen.tmp
+	$(Verb) $(CMP) -s $@ $< || ( $(CP) $< $@ && \
+	  $(EchoCmd) Updated Intrinsics.gen because Intrinsics.gen.tmp \
+	    changed significantly. )
+
+install-local:: $(GENFILE)
+	$(Echo) Installing $(DESTDIR)$(PROJ_includedir)/llvm/Intrinsics.gen
+	$(Verb) $(DataInstall) $(GENFILE) $(DESTDIR)$(PROJ_includedir)/llvm/Intrinsics.gen
diff --git a/final/lib/VMCore/Metadata.cpp b/final/lib/VMCore/Metadata.cpp
new file mode 100644
index 00000000000..84a0975e61e
--- /dev/null
+++ b/final/lib/VMCore/Metadata.cpp
@@ -0,0 +1,565 @@
+//===-- Metadata.cpp - Implement Metadata classes -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Metadata classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Metadata.h"
+#include "LLVMContextImpl.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Instruction.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ValueHandle.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// MDString implementation.
+//
+
+MDString::MDString(LLVMContext &C, StringRef S)
+  : Value(Type::getMetadataTy(C), Value::MDStringVal), Str(S) {}
+
+MDString *MDString::get(LLVMContext &Context, StringRef Str) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+  StringMapEntry<MDString *> &Entry =
+    pImpl->MDStringCache.GetOrCreateValue(Str);
+  MDString *&S = Entry.getValue();
+  if (!S) S = new MDString(Context, Entry.getKey());
+  return S;
+}
+
+//===----------------------------------------------------------------------===//
+// MDNodeOperand implementation.
+//
+
+// Use CallbackVH to hold MDNode operands.
+namespace llvm {
+class MDNodeOperand : public CallbackVH {
+  MDNode *Parent;
+public:
+  MDNodeOperand(Value *V, MDNode *P) : CallbackVH(V), Parent(P) {}
+  ~MDNodeOperand() {}
+
+  void set(Value *V) {
+    setValPtr(V);
+  }
+
+  virtual void deleted();
+  virtual void allUsesReplacedWith(Value *NV);
+};
+} // end namespace llvm.
+
+
+void MDNodeOperand::deleted() {
+  Parent->replaceOperand(this, 0);
+}
+
+void MDNodeOperand::allUsesReplacedWith(Value *NV) {
+  Parent->replaceOperand(this, NV);
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// MDNode implementation.
+//
+
+/// getOperandPtr - Helper function to get the MDNodeOperand's coallocated on
+/// the end of the MDNode.
+static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) {
+  // Use <= instead of < to permit a one-past-the-end address.
+  assert(Op <= N->getNumOperands() && "Invalid operand number");
+  return reinterpret_cast<MDNodeOperand*>(N+1)+Op;
+}
+
+MDNode::MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals,
+               bool isFunctionLocal)
+: Value(Type::getMetadataTy(C), Value::MDNodeVal) {
+  NumOperands = NumVals;
+
+  if (isFunctionLocal)
+    setValueSubclassData(getSubclassDataFromValue() | FunctionLocalBit);
+
+  // Initialize the operand list, which is co-allocated on the end of the node.
+  for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
+       Op != E; ++Op, ++Vals)
+    new (Op) MDNodeOperand(*Vals, this);
+}
+
+
+/// ~MDNode - Destroy MDNode.
+MDNode::~MDNode() {
+  assert((getSubclassDataFromValue() & DestroyFlag) != 0 &&
+         "Not being destroyed through destroy()?");
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+  if (isNotUniqued()) {
+    pImpl->NonUniquedMDNodes.erase(this);
+  } else {
+    pImpl->MDNodeSet.RemoveNode(this);
+  }
+
+  // Destroy the operands.
+  for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
+       Op != E; ++Op)
+    Op->~MDNodeOperand();
+}
+
+static const Function *getFunctionForValue(Value *V) {
+  if (!V) return NULL;
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    BasicBlock *BB = I->getParent();
+    return BB ? BB->getParent() : 0;
+  }
+  if (Argument *A = dyn_cast<Argument>(V))
+    return A->getParent();
+  if (BasicBlock *BB = dyn_cast<BasicBlock>(V))
+    return BB->getParent();
+  if (MDNode *MD = dyn_cast<MDNode>(V))
+    return MD->getFunction();
+  return NULL;
+}
+
+#ifndef NDEBUG
+static const Function *assertLocalFunction(const MDNode *N) {
+  if (!N->isFunctionLocal()) return 0;
+
+  // FIXME: This does not handle cyclic function local metadata.
+  const Function *F = 0, *NewF = 0;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    if (Value *V = N->getOperand(i)) {
+      if (MDNode *MD = dyn_cast<MDNode>(V))
+        NewF = assertLocalFunction(MD);
+      else
+        NewF = getFunctionForValue(V);
+    }
+    if (F == 0)
+      F = NewF;
+    else 
+      assert((NewF == 0 || F == NewF) &&"inconsistent function-local metadata");
+  }
+  return F;
+}
+#endif
+
+// getFunction - If this metadata is function-local and recursively has a
+// function-local operand, return the first such operand's parent function.
+// Otherwise, return null. getFunction() should not be used for performance-
+// critical code because it recursively visits all the MDNode's operands.  
+const Function *MDNode::getFunction() const {
+#ifndef NDEBUG
+  return assertLocalFunction(this);
+#endif
+  if (!isFunctionLocal()) return NULL;
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (const Function *F = getFunctionForValue(getOperand(i)))
+      return F;
+  return NULL;
+}
+
+// destroy - Delete this node.  Only when there are no uses.
+void MDNode::destroy() {
+  setValueSubclassData(getSubclassDataFromValue() | DestroyFlag);
+  // Placement delete, the free the memory.
+  this->~MDNode();
+  free(this);
+}
+
+/// isFunctionLocalValue - Return true if this is a value that would require a
+/// function-local MDNode.
+static bool isFunctionLocalValue(Value *V) {
+  return isa<Instruction>(V) || isa<Argument>(V) || isa<BasicBlock>(V) ||
+         (isa<MDNode>(V) && cast<MDNode>(V)->isFunctionLocal());
+}
+
+MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
+                          unsigned NumVals, FunctionLocalness FL,
+                          bool Insert) {
+  LLVMContextImpl *pImpl = Context.pImpl;
+
+  // Add all the operand pointers. Note that we don't have to add the
+  // isFunctionLocal bit because that's implied by the operands.
+  // Note that if the operands are later nulled out, the node will be
+  // removed from the uniquing map.
+  FoldingSetNodeID ID;
+  for (unsigned i = 0; i != NumVals; ++i)
+    ID.AddPointer(Vals[i]);
+
+  void *InsertPoint;
+  MDNode *N = NULL;
+  
+  if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)))
+    return N;
+    
+  bool isFunctionLocal = false;
+  switch (FL) {
+  case FL_Unknown:
+    for (unsigned i = 0; i != NumVals; ++i) {
+      Value *V = Vals[i];
+      if (!V) continue;
+      if (isFunctionLocalValue(V)) {
+        isFunctionLocal = true;
+        break;
+      }
+    }
+    break;
+  case FL_No:
+    isFunctionLocal = false;
+    break;
+  case FL_Yes:
+    isFunctionLocal = true;
+    break;
+  }
+
+  // Coallocate space for the node and Operands together, then placement new.
+  void *Ptr = malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand));
+  N = new (Ptr) MDNode(Context, Vals, NumVals, isFunctionLocal);
+
+  // InsertPoint will have been set by the FindNodeOrInsertPos call.
+  pImpl->MDNodeSet.InsertNode(N, InsertPoint);
+
+  return N;
+}
+
+MDNode *MDNode::get(LLVMContext &Context, ArrayRef<Value*> Vals) {
+  return getMDNode(Context, Vals.data(), Vals.size(), FL_Unknown);
+}
+MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals) {
+  return getMDNode(Context, Vals, NumVals, FL_Unknown);
+}
+
+MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context, Value *const *Vals,
+                                      unsigned NumVals, bool isFunctionLocal) {
+  return getMDNode(Context, Vals, NumVals, isFunctionLocal ? FL_Yes : FL_No);
+}
+
+MDNode *MDNode::getIfExists(LLVMContext &Context, Value *const *Vals,
+                            unsigned NumVals) {
+  return getMDNode(Context, Vals, NumVals, FL_Unknown, false);
+}
+
+MDNode *MDNode::getTemporary(LLVMContext &Context, Value *const *Vals,
+                             unsigned NumVals) {
+  MDNode *N = (MDNode *)malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand));
+  N = new (N) MDNode(Context, Vals, NumVals, FL_No);
+  N->setValueSubclassData(N->getSubclassDataFromValue() |
+                          NotUniquedBit);
+  LeakDetector::addGarbageObject(N);
+  return N;
+}
+
+void MDNode::deleteTemporary(MDNode *N) {
+  assert(N->use_empty() && "Temporary MDNode has uses!");
+  assert(!N->getContext().pImpl->MDNodeSet.RemoveNode(N) &&
+         "Deleting a non-temporary uniqued node!");
+  assert(!N->getContext().pImpl->NonUniquedMDNodes.erase(N) &&
+         "Deleting a non-temporary non-uniqued node!");
+  assert((N->getSubclassDataFromValue() & NotUniquedBit) &&
+         "Temporary MDNode does not have NotUniquedBit set!");
+  assert((N->getSubclassDataFromValue() & DestroyFlag) == 0 &&
+         "Temporary MDNode has DestroyFlag set!");
+  LeakDetector::removeGarbageObject(N);
+  N->destroy();
+}
+
+/// getOperand - Return specified operand.
+Value *MDNode::getOperand(unsigned i) const {
+  return *getOperandPtr(const_cast<MDNode*>(this), i);
+}
+
+void MDNode::Profile(FoldingSetNodeID &ID) const {
+  // Add all the operand pointers. Note that we don't have to add the
+  // isFunctionLocal bit because that's implied by the operands.
+  // Note that if the operands are later nulled out, the node will be
+  // removed from the uniquing map.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    ID.AddPointer(getOperand(i));
+}
+
+void MDNode::setIsNotUniqued() {
+  setValueSubclassData(getSubclassDataFromValue() | NotUniquedBit);
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+  pImpl->NonUniquedMDNodes.insert(this);
+}
+
+// Replace value from this node's operand list.
+void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
+  Value *From = *Op;
+
+  // If is possible that someone did GV->RAUW(inst), replacing a global variable
+  // with an instruction or some other function-local object.  If this is a
+  // non-function-local MDNode, it can't point to a function-local object.
+  // Handle this case by implicitly dropping the MDNode reference to null.
+  // Likewise if the MDNode is function-local but for a different function.
+  if (To && isFunctionLocalValue(To)) {
+    if (!isFunctionLocal())
+      To = 0;
+    else {
+      const Function *F = getFunction();
+      const Function *FV = getFunctionForValue(To);
+      // Metadata can be function-local without having an associated function.
+      // So only consider functions to have changed if non-null.
+      if (F && FV && F != FV)
+        To = 0;
+    }
+  }
+  
+  if (From == To)
+    return;
+
+  // Update the operand.
+  Op->set(To);
+
+  // If this node is already not being uniqued (because one of the operands
+  // already went to null), then there is nothing else to do here.
+  if (isNotUniqued()) return;
+
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
+
+  // Remove "this" from the context map.  FoldingSet doesn't have to reprofile
+  // this node to remove it, so we don't care what state the operands are in.
+  pImpl->MDNodeSet.RemoveNode(this);
+
+  // If we are dropping an argument to null, we choose to not unique the MDNode
+  // anymore.  This commonly occurs during destruction, and uniquing these
+  // brings little reuse.  Also, this means we don't need to include
+  // isFunctionLocal bits in FoldingSetNodeIDs for MDNodes.
+  if (To == 0) {
+    setIsNotUniqued();
+    return;
+  }
+
+  // Now that the node is out of the folding set, get ready to reinsert it.
+  // First, check to see if another node with the same operands already exists
+  // in the set.  If so, then this node is redundant.
+  FoldingSetNodeID ID;
+  Profile(ID);
+  void *InsertPoint;
+  if (MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)) {
+    replaceAllUsesWith(N);
+    destroy();
+    return;
+  }
+
+  // InsertPoint will have been set by the FindNodeOrInsertPos call.
+  pImpl->MDNodeSet.InsertNode(this, InsertPoint);
+
+  // If this MDValue was previously function-local but no longer is, clear
+  // its function-local flag.
+  if (isFunctionLocal() && !isFunctionLocalValue(To)) {
+    bool isStillFunctionLocal = false;
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+      Value *V = getOperand(i);
+      if (!V) continue;
+      if (isFunctionLocalValue(V)) {
+        isStillFunctionLocal = true;
+        break;
+      }
+    }
+    if (!isStillFunctionLocal)
+      setValueSubclassData(getSubclassDataFromValue() & ~FunctionLocalBit);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// NamedMDNode implementation.
+//
+
+static SmallVector<TrackingVH<MDNode>, 4> &getNMDOps(void *Operands) {
+  return *(SmallVector<TrackingVH<MDNode>, 4>*)Operands;
+}
+
+NamedMDNode::NamedMDNode(const Twine &N)
+  : Name(N.str()), Parent(0),
+    Operands(new SmallVector<TrackingVH<MDNode>, 4>()) {
+}
+
+NamedMDNode::~NamedMDNode() {
+  dropAllReferences();
+  delete &getNMDOps(Operands);
+}
+
+/// getNumOperands - Return number of NamedMDNode operands.
+unsigned NamedMDNode::getNumOperands() const {
+  return (unsigned)getNMDOps(Operands).size();
+}
+
+/// getOperand - Return specified operand.
+MDNode *NamedMDNode::getOperand(unsigned i) const {
+  assert(i < getNumOperands() && "Invalid Operand number!");
+  return dyn_cast<MDNode>(&*getNMDOps(Operands)[i]);
+}
+
+/// addOperand - Add metadata Operand.
+void NamedMDNode::addOperand(MDNode *M) {
+  assert(!M->isFunctionLocal() &&
+         "NamedMDNode operands must not be function-local!");
+  getNMDOps(Operands).push_back(TrackingVH<MDNode>(M));
+}
+
+/// eraseFromParent - Drop all references and remove the node from parent
+/// module.
+void NamedMDNode::eraseFromParent() {
+  getParent()->eraseNamedMetadata(this);
+}
+
+/// dropAllReferences - Remove all uses and clear node vector.
+void NamedMDNode::dropAllReferences() {
+  getNMDOps(Operands).clear();
+}
+
+/// getName - Return a constant reference to this named metadata's name.
+StringRef NamedMDNode::getName() const {
+  return StringRef(Name);
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Metadata method implementations.
+//
+
+void Instruction::setMetadata(const char *Kind, MDNode *Node) {
+  if (Node == 0 && !hasMetadata()) return;
+  setMetadata(getContext().getMDKindID(Kind), Node);
+}
+
+MDNode *Instruction::getMetadataImpl(const char *Kind) const {
+  return getMetadataImpl(getContext().getMDKindID(Kind));
+}
+
+/// setMetadata - Set the metadata of of the specified kind to the specified
+/// node.  This updates/replaces metadata if already present, or removes it if
+/// Node is null.
+void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
+  if (Node == 0 && !hasMetadata()) return;
+
+  // Handle 'dbg' as a special case since it is not stored in the hash table.
+  if (KindID == LLVMContext::MD_dbg) {
+    DbgLoc = DebugLoc::getFromDILocation(Node);
+    return;
+  }
+  
+  // Handle the case when we're adding/updating metadata on an instruction.
+  if (Node) {
+    LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+    assert(!Info.empty() == hasMetadataHashEntry() &&
+           "HasMetadata bit is wonked");
+    if (Info.empty()) {
+      setHasMetadataHashEntry(true);
+    } else {
+      // Handle replacement of an existing value.
+      for (unsigned i = 0, e = Info.size(); i != e; ++i)
+        if (Info[i].first == KindID) {
+          Info[i].second = Node;
+          return;
+        }
+    }
+
+    // No replacement, just add it to the list.
+    Info.push_back(std::make_pair(KindID, Node));
+    return;
+  }
+
+  // Otherwise, we're removing metadata from an instruction.
+  assert(hasMetadataHashEntry() &&
+         getContext().pImpl->MetadataStore.count(this) &&
+         "HasMetadata bit out of date!");
+  LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+
+  // Common case is removing the only entry.
+  if (Info.size() == 1 && Info[0].first == KindID) {
+    getContext().pImpl->MetadataStore.erase(this);
+    setHasMetadataHashEntry(false);
+    return;
+  }
+
+  // Handle removal of an existing value.
+  for (unsigned i = 0, e = Info.size(); i != e; ++i)
+    if (Info[i].first == KindID) {
+      Info[i] = Info.back();
+      Info.pop_back();
+      assert(!Info.empty() && "Removing last entry should be handled above");
+      return;
+    }
+  // Otherwise, removing an entry that doesn't exist on the instruction.
+}
+
+MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
+  // Handle 'dbg' as a special case since it is not stored in the hash table.
+  if (KindID == LLVMContext::MD_dbg)
+    return DbgLoc.getAsMDNode(getContext());
+  
+  if (!hasMetadataHashEntry()) return 0;
+  
+  LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
+  assert(!Info.empty() && "bit out of sync with hash table");
+
+  for (LLVMContextImpl::MDMapTy::iterator I = Info.begin(), E = Info.end();
+       I != E; ++I)
+    if (I->first == KindID)
+      return I->second;
+  return 0;
+}
+
+void Instruction::getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,
+                                       MDNode*> > &Result) const {
+  Result.clear();
+  
+  // Handle 'dbg' as a special case since it is not stored in the hash table.
+  if (!DbgLoc.isUnknown()) {
+    Result.push_back(std::make_pair((unsigned)LLVMContext::MD_dbg,
+                                    DbgLoc.getAsMDNode(getContext())));
+    if (!hasMetadataHashEntry()) return;
+  }
+  
+  assert(hasMetadataHashEntry() &&
+         getContext().pImpl->MetadataStore.count(this) &&
+         "Shouldn't have called this");
+  const LLVMContextImpl::MDMapTy &Info =
+    getContext().pImpl->MetadataStore.find(this)->second;
+  assert(!Info.empty() && "Shouldn't have called this");
+
+  Result.append(Info.begin(), Info.end());
+
+  // Sort the resulting array so it is stable.
+  if (Result.size() > 1)
+    array_pod_sort(Result.begin(), Result.end());
+}
+
+void Instruction::
+getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
+                                    MDNode*> > &Result) const {
+  Result.clear();
+  assert(hasMetadataHashEntry() &&
+         getContext().pImpl->MetadataStore.count(this) &&
+         "Shouldn't have called this");
+  const LLVMContextImpl::MDMapTy &Info =
+  getContext().pImpl->MetadataStore.find(this)->second;
+  assert(!Info.empty() && "Shouldn't have called this");
+  
+  Result.append(Info.begin(), Info.end());
+  
+  // Sort the resulting array so it is stable.
+  if (Result.size() > 1)
+    array_pod_sort(Result.begin(), Result.end());
+}
+
+
+/// clearMetadataHashEntries - Clear all hashtable-based metadata from
+/// this instruction.
+void Instruction::clearMetadataHashEntries() {
+  assert(hasMetadataHashEntry() && "Caller should check");
+  getContext().pImpl->MetadataStore.erase(this);
+  setHasMetadataHashEntry(false);
+}
+
diff --git a/final/lib/VMCore/Module.cpp b/final/lib/VMCore/Module.cpp
new file mode 100644
index 00000000000..341e527acb5
--- /dev/null
+++ b/final/lib/VMCore/Module.cpp
@@ -0,0 +1,473 @@
+//===-- Module.cpp - Implement the Module class ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Module class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Module.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GVMaterializer.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/LeakDetector.h"
+#include "SymbolTableListTraitsImpl.h"
+#include "llvm/TypeSymbolTable.h"
+#include <algorithm>
+#include <cstdarg>
+#include <cstdlib>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Methods to implement the globals and functions lists.
+//
+
+GlobalVariable *ilist_traits<GlobalVariable>::createSentinel() {
+  GlobalVariable *Ret = new GlobalVariable(Type::getInt32Ty(getGlobalContext()),
+                                           false, GlobalValue::ExternalLinkage);
+  // This should not be garbage monitored.
+  LeakDetector::removeGarbageObject(Ret);
+  return Ret;
+}
+GlobalAlias *ilist_traits<GlobalAlias>::createSentinel() {
+  GlobalAlias *Ret = new GlobalAlias(Type::getInt32Ty(getGlobalContext()),
+                                     GlobalValue::ExternalLinkage);
+  // This should not be garbage monitored.
+  LeakDetector::removeGarbageObject(Ret);
+  return Ret;
+}
+
+// Explicit instantiations of SymbolTableListTraits since some of the methods
+// are not in the public header file.
+template class llvm::SymbolTableListTraits<GlobalVariable, Module>;
+template class llvm::SymbolTableListTraits<Function, Module>;
+template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
+
+//===----------------------------------------------------------------------===//
+// Primitive Module methods.
+//
+
+Module::Module(StringRef MID, LLVMContext& C)
+  : Context(C), Materializer(NULL), ModuleID(MID) {
+  ValSymTab = new ValueSymbolTable();
+  TypeSymTab = new TypeSymbolTable();
+  NamedMDSymTab = new StringMap<NamedMDNode *>();
+  Context.addModule(this);
+}
+
+Module::~Module() {
+  Context.removeModule(this);
+  dropAllReferences();
+  GlobalList.clear();
+  FunctionList.clear();
+  AliasList.clear();
+  LibraryList.clear();
+  NamedMDList.clear();
+  delete ValSymTab;
+  delete TypeSymTab;
+  delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab);
+}
+
+/// Target endian information...
+Module::Endianness Module::getEndianness() const {
+  StringRef temp = DataLayout;
+  Module::Endianness ret = AnyEndianness;
+  
+  while (!temp.empty()) {
+    StringRef token = DataLayout;
+    tie(token, temp) = getToken(temp, "-");
+    
+    if (token[0] == 'e') {
+      ret = LittleEndian;
+    } else if (token[0] == 'E') {
+      ret = BigEndian;
+    }
+  }
+  
+  return ret;
+}
+
+/// Target Pointer Size information...
+Module::PointerSize Module::getPointerSize() const {
+  StringRef temp = DataLayout;
+  Module::PointerSize ret = AnyPointerSize;
+  
+  while (!temp.empty()) {
+    StringRef token, signalToken;
+    tie(token, temp) = getToken(temp, "-");
+    tie(signalToken, token) = getToken(token, ":");
+    
+    if (signalToken[0] == 'p') {
+      int size = 0;
+      getToken(token, ":").first.getAsInteger(10, size);
+      if (size == 32)
+        ret = Pointer32;
+      else if (size == 64)
+        ret = Pointer64;
+    }
+  }
+  
+  return ret;
+}
+
+/// getNamedValue - Return the first global value in the module with
+/// the specified name, of arbitrary type.  This method returns null
+/// if a global with the specified name is not found.
+GlobalValue *Module::getNamedValue(StringRef Name) const {
+  return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name));
+}
+
+/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
+/// This ID is uniqued across modules in the current LLVMContext.
+unsigned Module::getMDKindID(StringRef Name) const {
+  return Context.getMDKindID(Name);
+}
+
+/// getMDKindNames - Populate client supplied SmallVector with the name for
+/// custom metadata IDs registered in this LLVMContext.   ID #0 is not used,
+/// so it is filled in as an empty string.
+void Module::getMDKindNames(SmallVectorImpl<StringRef> &Result) const {
+  return Context.getMDKindNames(Result);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the functions in the module.
+//
+
+// getOrInsertFunction - Look up the specified function in the module symbol
+// table.  If it does not exist, add a prototype for the function and return
+// it.  This is nice because it allows most passes to get away with not handling
+// the symbol table directly for this common task.
+//
+Constant *Module::getOrInsertFunction(StringRef Name,
+                                      const FunctionType *Ty,
+                                      AttrListPtr AttributeList) {
+  // See if we have a definition for the specified function already.
+  GlobalValue *F = getNamedValue(Name);
+  if (F == 0) {
+    // Nope, add it
+    Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
+    if (!New->isIntrinsic())       // Intrinsics get attrs set on construction
+      New->setAttributes(AttributeList);
+    FunctionList.push_back(New);
+    return New;                    // Return the new prototype.
+  }
+
+  // Okay, the function exists.  Does it have externally visible linkage?
+  if (F->hasLocalLinkage()) {
+    // Clear the function's name.
+    F->setName("");
+    // Retry, now there won't be a conflict.
+    Constant *NewF = getOrInsertFunction(Name, Ty);
+    F->setName(Name);
+    return NewF;
+  }
+
+  // If the function exists but has the wrong type, return a bitcast to the
+  // right type.
+  if (F->getType() != PointerType::getUnqual(Ty))
+    return ConstantExpr::getBitCast(F, PointerType::getUnqual(Ty));
+  
+  // Otherwise, we just found the existing function or a prototype.
+  return F;  
+}
+
+Constant *Module::getOrInsertTargetIntrinsic(StringRef Name,
+                                             const FunctionType *Ty,
+                                             AttrListPtr AttributeList) {
+  // See if we have a definition for the specified function already.
+  GlobalValue *F = getNamedValue(Name);
+  if (F == 0) {
+    // Nope, add it
+    Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name);
+    New->setAttributes(AttributeList);
+    FunctionList.push_back(New);
+    return New; // Return the new prototype.
+  }
+
+  // Otherwise, we just found the existing function or a prototype.
+  return F;  
+}
+
+Constant *Module::getOrInsertFunction(StringRef Name,
+                                      const FunctionType *Ty) {
+  AttrListPtr AttributeList = AttrListPtr::get((AttributeWithIndex *)0, 0);
+  return getOrInsertFunction(Name, Ty, AttributeList);
+}
+
+// getOrInsertFunction - Look up the specified function in the module symbol
+// table.  If it does not exist, add a prototype for the function and return it.
+// This version of the method takes a null terminated list of function
+// arguments, which makes it easier for clients to use.
+//
+Constant *Module::getOrInsertFunction(StringRef Name,
+                                      AttrListPtr AttributeList,
+                                      const Type *RetTy, ...) {
+  va_list Args;
+  va_start(Args, RetTy);
+
+  // Build the list of argument types...
+  std::vector<const Type*> ArgTys;
+  while (const Type *ArgTy = va_arg(Args, const Type*))
+    ArgTys.push_back(ArgTy);
+
+  va_end(Args);
+
+  // Build the function type and chain to the other getOrInsertFunction...
+  return getOrInsertFunction(Name,
+                             FunctionType::get(RetTy, ArgTys, false),
+                             AttributeList);
+}
+
+Constant *Module::getOrInsertFunction(StringRef Name,
+                                      const Type *RetTy, ...) {
+  va_list Args;
+  va_start(Args, RetTy);
+
+  // Build the list of argument types...
+  std::vector<const Type*> ArgTys;
+  while (const Type *ArgTy = va_arg(Args, const Type*))
+    ArgTys.push_back(ArgTy);
+
+  va_end(Args);
+
+  // Build the function type and chain to the other getOrInsertFunction...
+  return getOrInsertFunction(Name, 
+                             FunctionType::get(RetTy, ArgTys, false),
+                             AttrListPtr::get((AttributeWithIndex *)0, 0));
+}
+
+// getFunction - Look up the specified function in the module symbol table.
+// If it does not exist, return null.
+//
+Function *Module::getFunction(StringRef Name) const {
+  return dyn_cast_or_null<Function>(getNamedValue(Name));
+}
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the global variables in the module.
+//
+
+/// getGlobalVariable - Look up the specified global variable in the module
+/// symbol table.  If it does not exist, return null.  The type argument
+/// should be the underlying type of the global, i.e., it should not have
+/// the top-level PointerType, which represents the address of the global.
+/// If AllowLocal is set to true, this function will return types that
+/// have an local. By default, these types are not returned.
+///
+GlobalVariable *Module::getGlobalVariable(StringRef Name,
+                                          bool AllowLocal) const {
+  if (GlobalVariable *Result = 
+      dyn_cast_or_null<GlobalVariable>(getNamedValue(Name)))
+    if (AllowLocal || !Result->hasLocalLinkage())
+      return Result;
+  return 0;
+}
+
+/// getOrInsertGlobal - Look up the specified global in the module symbol table.
+///   1. If it does not exist, add a declaration of the global and return it.
+///   2. Else, the global exists but has the wrong type: return the function
+///      with a constantexpr cast to the right type.
+///   3. Finally, if the existing global is the correct delclaration, return the
+///      existing global.
+Constant *Module::getOrInsertGlobal(StringRef Name, const Type *Ty) {
+  // See if we have a definition for the specified global already.
+  GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name));
+  if (GV == 0) {
+    // Nope, add it
+    GlobalVariable *New =
+      new GlobalVariable(*this, Ty, false, GlobalVariable::ExternalLinkage,
+                         0, Name);
+     return New;                    // Return the new declaration.
+  }
+
+  // If the variable exists but has the wrong type, return a bitcast to the
+  // right type.
+  if (GV->getType() != PointerType::getUnqual(Ty))
+    return ConstantExpr::getBitCast(GV, PointerType::getUnqual(Ty));
+  
+  // Otherwise, we just found the existing function or a prototype.
+  return GV;
+}
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the global variables in the module.
+//
+
+// getNamedAlias - Look up the specified global in the module symbol table.
+// If it does not exist, return null.
+//
+GlobalAlias *Module::getNamedAlias(StringRef Name) const {
+  return dyn_cast_or_null<GlobalAlias>(getNamedValue(Name));
+}
+
+/// getNamedMetadata - Return the first NamedMDNode in the module with the
+/// specified name. This method returns null if a NamedMDNode with the 
+/// specified name is not found.
+NamedMDNode *Module::getNamedMetadata(const Twine &Name) const {
+  SmallString<256> NameData;
+  StringRef NameRef = Name.toStringRef(NameData);
+  return static_cast<StringMap<NamedMDNode*> *>(NamedMDSymTab)->lookup(NameRef);
+}
+
+/// getOrInsertNamedMetadata - Return the first named MDNode in the module 
+/// with the specified name. This method returns a new NamedMDNode if a 
+/// NamedMDNode with the specified name is not found.
+NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
+  NamedMDNode *&NMD =
+    (*static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab))[Name];
+  if (!NMD) {
+    NMD = new NamedMDNode(Name);
+    NMD->setParent(this);
+    NamedMDList.push_back(NMD);
+  }
+  return NMD;
+}
+
+void Module::eraseNamedMetadata(NamedMDNode *NMD) {
+  static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab)->erase(NMD->getName());
+  NamedMDList.erase(NMD);
+}
+
+//===----------------------------------------------------------------------===//
+// Methods for easy access to the types in the module.
+//
+
+
+// addTypeName - Insert an entry in the symbol table mapping Str to Type.  If
+// there is already an entry for this name, true is returned and the symbol
+// table is not modified.
+//
+bool Module::addTypeName(StringRef Name, const Type *Ty) {
+  TypeSymbolTable &ST = getTypeSymbolTable();
+
+  if (ST.lookup(Name)) return true;  // Already in symtab...
+
+  // Not in symbol table?  Set the name with the Symtab as an argument so the
+  // type knows what to update...
+  ST.insert(Name, Ty);
+
+  return false;
+}
+
+/// getTypeByName - Return the type with the specified name in this module, or
+/// null if there is none by that name.
+const Type *Module::getTypeByName(StringRef Name) const {
+  const TypeSymbolTable &ST = getTypeSymbolTable();
+  return cast_or_null<Type>(ST.lookup(Name));
+}
+
+// getTypeName - If there is at least one entry in the symbol table for the
+// specified type, return it.
+//
+std::string Module::getTypeName(const Type *Ty) const {
+  const TypeSymbolTable &ST = getTypeSymbolTable();
+
+  TypeSymbolTable::const_iterator TI = ST.begin();
+  TypeSymbolTable::const_iterator TE = ST.end();
+  if ( TI == TE ) return ""; // No names for types
+
+  while (TI != TE && TI->second != Ty)
+    ++TI;
+
+  if (TI != TE)  // Must have found an entry!
+    return TI->first;
+  return "";     // Must not have found anything...
+}
+
+//===----------------------------------------------------------------------===//
+// Methods to control the materialization of GlobalValues in the Module.
+//
+void Module::setMaterializer(GVMaterializer *GVM) {
+  assert(!Materializer &&
+         "Module already has a GVMaterializer.  Call MaterializeAllPermanently"
+         " to clear it out before setting another one.");
+  Materializer.reset(GVM);
+}
+
+bool Module::isMaterializable(const GlobalValue *GV) const {
+  if (Materializer)
+    return Materializer->isMaterializable(GV);
+  return false;
+}
+
+bool Module::isDematerializable(const GlobalValue *GV) const {
+  if (Materializer)
+    return Materializer->isDematerializable(GV);
+  return false;
+}
+
+bool Module::Materialize(GlobalValue *GV, std::string *ErrInfo) {
+  if (Materializer)
+    return Materializer->Materialize(GV, ErrInfo);
+  return false;
+}
+
+void Module::Dematerialize(GlobalValue *GV) {
+  if (Materializer)
+    return Materializer->Dematerialize(GV);
+}
+
+bool Module::MaterializeAll(std::string *ErrInfo) {
+  if (!Materializer)
+    return false;
+  return Materializer->MaterializeModule(this, ErrInfo);
+}
+
+bool Module::MaterializeAllPermanently(std::string *ErrInfo) {
+  if (MaterializeAll(ErrInfo))
+    return true;
+  Materializer.reset();
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Other module related stuff.
+//
+
+
+// dropAllReferences() - This function causes all the subelementss to "let go"
+// of all references that they are maintaining.  This allows one to 'delete' a
+// whole module at a time, even though there may be circular references... first
+// all references are dropped, and all use counts go to zero.  Then everything
+// is deleted for real.  Note that no operations are valid on an object that
+// has "dropped all references", except operator delete.
+//
+void Module::dropAllReferences() {
+  for(Module::iterator I = begin(), E = end(); I != E; ++I)
+    I->dropAllReferences();
+
+  for(Module::global_iterator I = global_begin(), E = global_end(); I != E; ++I)
+    I->dropAllReferences();
+
+  for(Module::alias_iterator I = alias_begin(), E = alias_end(); I != E; ++I)
+    I->dropAllReferences();
+}
+
+void Module::addLibrary(StringRef Lib) {
+  for (Module::lib_iterator I = lib_begin(), E = lib_end(); I != E; ++I)
+    if (*I == Lib)
+      return;
+  LibraryList.push_back(Lib);
+}
+
+void Module::removeLibrary(StringRef Lib) {
+  LibraryListType::iterator I = LibraryList.begin();
+  LibraryListType::iterator E = LibraryList.end();
+  for (;I != E; ++I)
+    if (*I == Lib) {
+      LibraryList.erase(I);
+      return;
+    }
+}
diff --git a/final/lib/VMCore/Pass.cpp b/final/lib/VMCore/Pass.cpp
new file mode 100644
index 00000000000..9afc5406332
--- /dev/null
+++ b/final/lib/VMCore/Pass.cpp
@@ -0,0 +1,293 @@
+//===- Pass.cpp - LLVM Pass Infrastructure Implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM Pass infrastructure.  It is primarily
+// responsible with ensuring that passes are executed and batched together
+// optimally.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/PassNameParser.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Pass Implementation
+//
+
+Pass::Pass(PassKind K, char &pid) : Resolver(0), PassID(&pid), Kind(K) { }
+
+// Force out-of-line virtual method.
+Pass::~Pass() { 
+  delete Resolver; 
+}
+
+// Force out-of-line virtual method.
+ModulePass::~ModulePass() { }
+
+Pass *ModulePass::createPrinterPass(raw_ostream &O,
+                                    const std::string &Banner) const {
+  return createPrintModulePass(&O, false, Banner);
+}
+
+PassManagerType ModulePass::getPotentialPassManagerType() const {
+  return PMT_ModulePassManager;
+}
+
+bool Pass::mustPreserveAnalysisID(char &AID) const {
+  return Resolver->getAnalysisIfAvailable(&AID, true) != 0;
+}
+
+// dumpPassStructure - Implement the -debug-passes=Structure option
+void Pass::dumpPassStructure(unsigned Offset) {
+  dbgs().indent(Offset*2) << getPassName() << "\n";
+}
+
+/// getPassName - Return a nice clean name for a pass.  This usually
+/// implemented in terms of the name that is registered by one of the
+/// Registration templates, but can be overloaded directly.
+///
+const char *Pass::getPassName() const {
+  AnalysisID AID =  getPassID();
+  const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(AID);
+  if (PI)
+    return PI->getPassName();
+  return "Unnamed pass: implement Pass::getPassName()";
+}
+
+void Pass::preparePassManager(PMStack &) {
+  // By default, don't do anything.
+}
+
+PassManagerType Pass::getPotentialPassManagerType() const {
+  // Default implementation.
+  return PMT_Unknown; 
+}
+
+void Pass::getAnalysisUsage(AnalysisUsage &) const {
+  // By default, no analysis results are used, all are invalidated.
+}
+
+void Pass::releaseMemory() {
+  // By default, don't do anything.
+}
+
+void Pass::verifyAnalysis() const {
+  // By default, don't do anything.
+}
+
+void *Pass::getAdjustedAnalysisPointer(AnalysisID AID) {
+  return this;
+}
+
+ImmutablePass *Pass::getAsImmutablePass() {
+  return 0;
+}
+
+PMDataManager *Pass::getAsPMDataManager() {
+  return 0;
+}
+
+void Pass::setResolver(AnalysisResolver *AR) {
+  assert(!Resolver && "Resolver is already set");
+  Resolver = AR;
+}
+
+// print - Print out the internal state of the pass.  This is called by Analyze
+// to print out the contents of an analysis.  Otherwise it is not necessary to
+// implement this method.
+//
+void Pass::print(raw_ostream &O,const Module*) const {
+  O << "Pass::print not implemented for pass: '" << getPassName() << "'!\n";
+}
+
+// dump - call print(cerr);
+void Pass::dump() const {
+  print(dbgs(), 0);
+}
+
+//===----------------------------------------------------------------------===//
+// ImmutablePass Implementation
+//
+// Force out-of-line virtual method.
+ImmutablePass::~ImmutablePass() { }
+
+void ImmutablePass::initializePass() {
+  // By default, don't do anything.
+}
+
+//===----------------------------------------------------------------------===//
+// FunctionPass Implementation
+//
+
+Pass *FunctionPass::createPrinterPass(raw_ostream &O,
+                                      const std::string &Banner) const {
+  return createPrintFunctionPass(Banner, &O);
+}
+
+bool FunctionPass::doInitialization(Module &) {
+  // By default, don't do anything.
+  return false;
+}
+
+bool FunctionPass::doFinalization(Module &) {
+  // By default, don't do anything.
+  return false;
+}
+
+PassManagerType FunctionPass::getPotentialPassManagerType() const {
+  return PMT_FunctionPassManager;
+}
+
+//===----------------------------------------------------------------------===//
+// BasicBlockPass Implementation
+//
+
+Pass *BasicBlockPass::createPrinterPass(raw_ostream &O,
+                                        const std::string &Banner) const {
+  
+  llvm_unreachable("BasicBlockPass printing unsupported.");
+  return 0;
+}
+
+bool BasicBlockPass::doInitialization(Module &) {
+  // By default, don't do anything.
+  return false;
+}
+
+bool BasicBlockPass::doInitialization(Function &) {
+  // By default, don't do anything.
+  return false;
+}
+
+bool BasicBlockPass::doFinalization(Function &) {
+  // By default, don't do anything.
+  return false;
+}
+
+bool BasicBlockPass::doFinalization(Module &) {
+  // By default, don't do anything.
+  return false;
+}
+
+PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
+  return PMT_BasicBlockPassManager; 
+}
+
+const PassInfo *Pass::lookupPassInfo(const void *TI) {
+  return PassRegistry::getPassRegistry()->getPassInfo(TI);
+}
+
+const PassInfo *Pass::lookupPassInfo(StringRef Arg) {
+  return PassRegistry::getPassRegistry()->getPassInfo(Arg);
+}
+
+Pass *PassInfo::createPass() const {
+  assert((!isAnalysisGroup() || NormalCtor) &&
+         "No default implementation found for analysis group!");
+  assert(NormalCtor &&
+         "Cannot call createPass on PassInfo without default ctor!");
+  return NormalCtor();
+}
+
+//===----------------------------------------------------------------------===//
+//                  Analysis Group Implementation Code
+//===----------------------------------------------------------------------===//
+
+// RegisterAGBase implementation
+//
+RegisterAGBase::RegisterAGBase(const char *Name, const void *InterfaceID,
+                               const void *PassID, bool isDefault)
+    : PassInfo(Name, InterfaceID) {
+  PassRegistry::getPassRegistry()->registerAnalysisGroup(InterfaceID, PassID,
+                                                         *this, isDefault);
+}
+
+//===----------------------------------------------------------------------===//
+// PassRegistrationListener implementation
+//
+
+// PassRegistrationListener ctor - Add the current object to the list of
+// PassRegistrationListeners...
+PassRegistrationListener::PassRegistrationListener() {
+  PassRegistry::getPassRegistry()->addRegistrationListener(this);
+}
+
+// dtor - Remove object from list of listeners...
+PassRegistrationListener::~PassRegistrationListener() {
+  PassRegistry::getPassRegistry()->removeRegistrationListener(this);
+}
+
+// enumeratePasses - Iterate over the registered passes, calling the
+// passEnumerate callback on each PassInfo object.
+//
+void PassRegistrationListener::enumeratePasses() {
+  PassRegistry::getPassRegistry()->enumerateWith(this);
+}
+
+PassNameParser::~PassNameParser() {}
+
+//===----------------------------------------------------------------------===//
+//   AnalysisUsage Class Implementation
+//
+
+namespace {
+  struct GetCFGOnlyPasses : public PassRegistrationListener {
+    typedef AnalysisUsage::VectorType VectorType;
+    VectorType &CFGOnlyList;
+    GetCFGOnlyPasses(VectorType &L) : CFGOnlyList(L) {}
+    
+    void passEnumerate(const PassInfo *P) {
+      if (P->isCFGOnlyPass())
+        CFGOnlyList.push_back(P->getTypeInfo());
+    }
+  };
+}
+
+// setPreservesCFG - This function should be called to by the pass, iff they do
+// not:
+//
+//  1. Add or remove basic blocks from the function
+//  2. Modify terminator instructions in any way.
+//
+// This function annotates the AnalysisUsage info object to say that analyses
+// that only depend on the CFG are preserved by this pass.
+//
+void AnalysisUsage::setPreservesCFG() {
+  // Since this transformation doesn't modify the CFG, it preserves all analyses
+  // that only depend on the CFG (like dominators, loop info, etc...)
+  GetCFGOnlyPasses(Preserved).enumeratePasses();
+}
+
+AnalysisUsage &AnalysisUsage::addPreserved(StringRef Arg) {
+  const PassInfo *PI = Pass::lookupPassInfo(Arg);
+  // If the pass exists, preserve it. Otherwise silently do nothing.
+  if (PI) Preserved.push_back(PI->getTypeInfo());
+  return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredID(const void *ID) {
+  Required.push_back(ID);
+  return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredID(char &ID) {
+  Required.push_back(&ID);
+  return *this;
+}
+
+AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(char &ID) {
+  Required.push_back(&ID);
+  RequiredTransitive.push_back(&ID);
+  return *this;
+}
diff --git a/final/lib/VMCore/PassManager.cpp b/final/lib/VMCore/PassManager.cpp
new file mode 100644
index 00000000000..8bfef9855ca
--- /dev/null
+++ b/final/lib/VMCore/PassManager.cpp
@@ -0,0 +1,1849 @@
+//===- PassManager.cpp - LLVM Pass Infrastructure Implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LLVM Pass Manager infrastructure.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/PassManagers.h"
+#include "llvm/PassManager.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Timer.h"
+#include "llvm/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PassNameParser.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Mutex.h"
+#include <algorithm>
+#include <cstdio>
+#include <map>
+using namespace llvm;
+
+// See PassManagers.h for Pass Manager infrastructure overview.
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// Pass debugging information.  Often it is useful to find out what pass is
+// running when a crash occurs in a utility.  When this library is compiled with
+// debugging on, a command line option (--debug-pass) is enabled that causes the
+// pass name to be printed before it executes.
+//
+
+// Different debug levels that can be enabled...
+enum PassDebugLevel {
+  None, Arguments, Structure, Executions, Details
+};
+
+static cl::opt<enum PassDebugLevel>
+PassDebugging("debug-pass", cl::Hidden,
+                  cl::desc("Print PassManager debugging information"),
+                  cl::values(
+  clEnumVal(None      , "disable debug output"),
+  clEnumVal(Arguments , "print pass arguments to pass to 'opt'"),
+  clEnumVal(Structure , "print pass structure before run()"),
+  clEnumVal(Executions, "print pass name before it is executed"),
+  clEnumVal(Details   , "print pass details when it is executed"),
+                             clEnumValEnd));
+
+typedef llvm::cl::list<const llvm::PassInfo *, bool, PassNameParser>
+PassOptionList;
+
+// Print IR out before/after specified passes.
+static PassOptionList
+PrintBefore("print-before",
+            llvm::cl::desc("Print IR before specified passes"));
+
+static PassOptionList
+PrintAfter("print-after",
+           llvm::cl::desc("Print IR after specified passes"));
+
+static cl::opt<bool>
+PrintBeforeAll("print-before-all",
+               llvm::cl::desc("Print IR before each pass"),
+               cl::init(false));
+static cl::opt<bool>
+PrintAfterAll("print-after-all",
+              llvm::cl::desc("Print IR after each pass"),
+              cl::init(false));
+
+/// This is a helper to determine whether to print IR before or
+/// after a pass.
+
+static bool ShouldPrintBeforeOrAfterPass(const void *PassID,
+                                         PassOptionList &PassesToPrint) {
+  if (const llvm::PassInfo *PI =
+      PassRegistry::getPassRegistry()->getPassInfo(PassID)) {
+    for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) {
+      const llvm::PassInfo *PassInf = PassesToPrint[i];
+      if (PassInf)
+        if (PassInf->getPassArgument() == PI->getPassArgument()) {
+          return true;
+        }
+    }
+  }
+  return false;
+}
+
+
+/// This is a utility to check whether a pass should have IR dumped
+/// before it.
+static bool ShouldPrintBeforePass(const void *PassID) {
+  return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PassID, PrintBefore);
+}
+
+/// This is a utility to check whether a pass should have IR dumped
+/// after it.
+static bool ShouldPrintAfterPass(const void *PassID) {
+  return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PassID, PrintAfter);
+}
+
+} // End of llvm namespace
+
+/// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
+/// or higher is specified.
+bool PMDataManager::isPassDebuggingExecutionsOrMore() const {
+  return PassDebugging >= Executions;
+}
+
+
+
+
+void PassManagerPrettyStackEntry::print(raw_ostream &OS) const {
+  if (V == 0 && M == 0)
+    OS << "Releasing pass '";
+  else
+    OS << "Running pass '";
+
+  OS << P->getPassName() << "'";
+
+  if (M) {
+    OS << " on module '" << M->getModuleIdentifier() << "'.\n";
+    return;
+  }
+  if (V == 0) {
+    OS << '\n';
+    return;
+  }
+
+  OS << " on ";
+  if (isa<Function>(V))
+    OS << "function";
+  else if (isa<BasicBlock>(V))
+    OS << "basic block";
+  else
+    OS << "value";
+
+  OS << " '";
+  WriteAsOperand(OS, V, /*PrintTy=*/false, M);
+  OS << "'\n";
+}
+
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// BBPassManager
+//
+/// BBPassManager manages BasicBlockPass. It batches all the
+/// pass together and sequence them to process one basic block before
+/// processing next basic block.
+class BBPassManager : public PMDataManager, public FunctionPass {
+
+public:
+  static char ID;
+  explicit BBPassManager(int Depth)
+    : PMDataManager(Depth), FunctionPass(ID) {}
+
+  /// Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the function, and if so, return true.
+  bool runOnFunction(Function &F);
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  void getAnalysisUsage(AnalysisUsage &Info) const {
+    Info.setPreservesAll();
+  }
+
+  bool doInitialization(Module &M);
+  bool doInitialization(Function &F);
+  bool doFinalization(Module &M);
+  bool doFinalization(Function &F);
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+
+  virtual const char *getPassName() const {
+    return "BasicBlock Pass Manager";
+  }
+
+  // Print passes managed by this manager
+  void dumpPassStructure(unsigned Offset) {
+    llvm::dbgs() << std::string(Offset*2, ' ') << "BasicBlockPass Manager\n";
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      BasicBlockPass *BP = getContainedPass(Index);
+      BP->dumpPassStructure(Offset + 1);
+      dumpLastUses(BP, Offset+1);
+    }
+  }
+
+  BasicBlockPass *getContainedPass(unsigned N) {
+    assert(N < PassVector.size() && "Pass number out of range!");
+    BasicBlockPass *BP = static_cast<BasicBlockPass *>(PassVector[N]);
+    return BP;
+  }
+
+  virtual PassManagerType getPassManagerType() const {
+    return PMT_BasicBlockPassManager;
+  }
+};
+
+char BBPassManager::ID = 0;
+}
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManagerImpl
+//
+/// FunctionPassManagerImpl manages FPPassManagers
+class FunctionPassManagerImpl : public Pass,
+                                public PMDataManager,
+                                public PMTopLevelManager {
+private:
+  bool wasRun;
+public:
+  static char ID;
+  explicit FunctionPassManagerImpl(int Depth) :
+    Pass(PT_PassManager, ID), PMDataManager(Depth),
+    PMTopLevelManager(new FPPassManager(1)), wasRun(false) {}
+
+  /// add - Add a pass to the queue of passes to run.  This passes ownership of
+  /// the Pass to the PassManager.  When the PassManager is destroyed, the pass
+  /// will be destroyed as well, so there is no need to delete the pass.  This
+  /// implies that all passes MUST be allocated with 'new'.
+  void add(Pass *P) {
+    schedulePass(P);
+  }
+
+  /// createPrinterPass - Get a function printer pass.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+    return createPrintFunctionPass(Banner, &O);
+  }
+
+  // Prepare for running an on the fly pass, freeing memory if needed
+  // from a previous run.
+  void releaseMemoryOnTheFly();
+
+  /// run - Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the module, and if so, return true.
+  bool run(Function &F);
+
+  /// doInitialization - Run all of the initializers for the function passes.
+  ///
+  bool doInitialization(Module &M);
+
+  /// doFinalization - Run all of the finalizers for the function passes.
+  ///
+  bool doFinalization(Module &M);
+
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  void getAnalysisUsage(AnalysisUsage &Info) const {
+    Info.setPreservesAll();
+  }
+
+  void addTopLevelPass(Pass *P) {
+    if (ImmutablePass *IP = P->getAsImmutablePass()) {
+      // P is a immutable pass and it will be managed by this
+      // top level manager. Set up analysis resolver to connect them.
+      AnalysisResolver *AR = new AnalysisResolver(*this);
+      P->setResolver(AR);
+      initializeAnalysisImpl(P);
+      addImmutablePass(IP);
+      recordAvailableAnalysis(IP);
+    } else {
+      P->assignPassManager(activeStack, PMT_FunctionPassManager);
+    }
+
+  }
+
+  FPPassManager *getContainedManager(unsigned N) {
+    assert(N < PassManagers.size() && "Pass number out of range!");
+    FPPassManager *FP = static_cast<FPPassManager *>(PassManagers[N]);
+    return FP;
+  }
+};
+
+char FunctionPassManagerImpl::ID = 0;
+
+//===----------------------------------------------------------------------===//
+// MPPassManager
+//
+/// MPPassManager manages ModulePasses and function pass managers.
+/// It batches all Module passes and function pass managers together and
+/// sequences them to process one module.
+class MPPassManager : public Pass, public PMDataManager {
+public:
+  static char ID;
+  explicit MPPassManager(int Depth) :
+    Pass(PT_PassManager, ID), PMDataManager(Depth) { }
+
+  // Delete on the fly managers.
+  virtual ~MPPassManager() {
+    for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+           I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+         I != E; ++I) {
+      FunctionPassManagerImpl *FPP = I->second;
+      delete FPP;
+    }
+  }
+
+  /// createPrinterPass - Get a module printer pass.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+    return createPrintModulePass(&O, false, Banner);
+  }
+
+  /// run - Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the module, and if so, return true.
+  bool runOnModule(Module &M);
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  void getAnalysisUsage(AnalysisUsage &Info) const {
+    Info.setPreservesAll();
+  }
+
+  /// Add RequiredPass into list of lower level passes required by pass P.
+  /// RequiredPass is run on the fly by Pass Manager when P requests it
+  /// through getAnalysis interface.
+  virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass);
+
+  /// Return function pass corresponding to PassInfo PI, that is
+  /// required by module pass MP. Instantiate analysis pass, by using
+  /// its runOnFunction() for function F.
+  virtual Pass* getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F);
+
+  virtual const char *getPassName() const {
+    return "Module Pass Manager";
+  }
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+
+  // Print passes managed by this manager
+  void dumpPassStructure(unsigned Offset) {
+    llvm::dbgs() << std::string(Offset*2, ' ') << "ModulePass Manager\n";
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      ModulePass *MP = getContainedPass(Index);
+      MP->dumpPassStructure(Offset + 1);
+      std::map<Pass *, FunctionPassManagerImpl *>::const_iterator I =
+        OnTheFlyManagers.find(MP);
+      if (I != OnTheFlyManagers.end())
+        I->second->dumpPassStructure(Offset + 2);
+      dumpLastUses(MP, Offset+1);
+    }
+  }
+
+  ModulePass *getContainedPass(unsigned N) {
+    assert(N < PassVector.size() && "Pass number out of range!");
+    return static_cast<ModulePass *>(PassVector[N]);
+  }
+
+  virtual PassManagerType getPassManagerType() const {
+    return PMT_ModulePassManager;
+  }
+
+ private:
+  /// Collection of on the fly FPPassManagers. These managers manage
+  /// function passes that are required by module passes.
+  std::map<Pass *, FunctionPassManagerImpl *> OnTheFlyManagers;
+};
+
+char MPPassManager::ID = 0;
+//===----------------------------------------------------------------------===//
+// PassManagerImpl
+//
+
+/// PassManagerImpl manages MPPassManagers
+class PassManagerImpl : public Pass,
+                        public PMDataManager,
+                        public PMTopLevelManager {
+
+public:
+  static char ID;
+  explicit PassManagerImpl(int Depth) :
+    Pass(PT_PassManager, ID), PMDataManager(Depth),
+                              PMTopLevelManager(new MPPassManager(1)) {}
+
+  /// add - Add a pass to the queue of passes to run.  This passes ownership of
+  /// the Pass to the PassManager.  When the PassManager is destroyed, the pass
+  /// will be destroyed as well, so there is no need to delete the pass.  This
+  /// implies that all passes MUST be allocated with 'new'.
+  void add(Pass *P) {
+    schedulePass(P);
+  }
+
+  /// createPrinterPass - Get a module printer pass.
+  Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const {
+    return createPrintModulePass(&O, false, Banner);
+  }
+
+  /// run - Execute all of the passes scheduled for execution.  Keep track of
+  /// whether any of the passes modifies the module, and if so, return true.
+  bool run(Module &M);
+
+  /// Pass Manager itself does not invalidate any analysis info.
+  void getAnalysisUsage(AnalysisUsage &Info) const {
+    Info.setPreservesAll();
+  }
+
+  void addTopLevelPass(Pass *P) {
+    if (ImmutablePass *IP = P->getAsImmutablePass()) {
+      // P is a immutable pass and it will be managed by this
+      // top level manager. Set up analysis resolver to connect them.
+      AnalysisResolver *AR = new AnalysisResolver(*this);
+      P->setResolver(AR);
+      initializeAnalysisImpl(P);
+      addImmutablePass(IP);
+      recordAvailableAnalysis(IP);
+    } else {
+      P->assignPassManager(activeStack, PMT_ModulePassManager);
+    }
+  }
+
+  virtual PMDataManager *getAsPMDataManager() { return this; }
+  virtual Pass *getAsPass() { return this; }
+
+  MPPassManager *getContainedManager(unsigned N) {
+    assert(N < PassManagers.size() && "Pass number out of range!");
+    MPPassManager *MP = static_cast<MPPassManager *>(PassManagers[N]);
+    return MP;
+  }
+};
+
+char PassManagerImpl::ID = 0;
+} // End of llvm namespace
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+/// TimingInfo Class - This class is used to calculate information about the
+/// amount of time each pass takes to execute.  This only happens when
+/// -time-passes is enabled on the command line.
+///
+
+static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex;
+
+class TimingInfo {
+  DenseMap<Pass*, Timer*> TimingData;
+  TimerGroup TG;
+public:
+  // Use 'create' member to get this.
+  TimingInfo() : TG("... Pass execution timing report ...") {}
+
+  // TimingDtor - Print out information about timing information
+  ~TimingInfo() {
+    // Delete all of the timers, which accumulate their info into the
+    // TimerGroup.
+    for (DenseMap<Pass*, Timer*>::iterator I = TimingData.begin(),
+         E = TimingData.end(); I != E; ++I)
+      delete I->second;
+    // TimerGroup is deleted next, printing the report.
+  }
+
+  // createTheTimeInfo - This method either initializes the TheTimeInfo pointer
+  // to a non null value (if the -time-passes option is enabled) or it leaves it
+  // null.  It may be called multiple times.
+  static void createTheTimeInfo();
+
+  /// getPassTimer - Return the timer for the specified pass if it exists.
+  Timer *getPassTimer(Pass *P) {
+    if (P->getAsPMDataManager())
+      return 0;
+
+    sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
+    Timer *&T = TimingData[P];
+    if (T == 0)
+      T = new Timer(P->getPassName(), TG);
+    return T;
+  }
+};
+
+} // End of anon namespace
+
+static TimingInfo *TheTimeInfo;
+
+//===----------------------------------------------------------------------===//
+// PMTopLevelManager implementation
+
+/// Initialize top level manager. Create first pass manager.
+PMTopLevelManager::PMTopLevelManager(PMDataManager *PMDM) {
+  PMDM->setTopLevelManager(this);
+  addPassManager(PMDM);
+  activeStack.push(PMDM);
+}
+
+/// Set pass P as the last user of the given analysis passes.
+void
+PMTopLevelManager::setLastUser(const SmallVectorImpl<Pass *> &AnalysisPasses,
+                               Pass *P) {
+  unsigned PDepth = 0;
+  if (P->getResolver())
+    PDepth = P->getResolver()->getPMDataManager().getDepth();
+
+  for (SmallVectorImpl<Pass *>::const_iterator I = AnalysisPasses.begin(),
+         E = AnalysisPasses.end(); I != E; ++I) {
+    Pass *AP = *I;
+    LastUser[AP] = P;
+
+    if (P == AP)
+      continue;
+
+    // Update the last users of passes that are required transitive by AP.
+    AnalysisUsage *AnUsage = findAnalysisUsage(AP);
+    const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
+    SmallVector<Pass *, 12> LastUses;
+    SmallVector<Pass *, 12> LastPMUses;
+    for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
+         E = IDs.end(); I != E; ++I) {
+      Pass *AnalysisPass = findAnalysisPass(*I);
+      assert(AnalysisPass && "Expected analysis pass to exist.");
+      AnalysisResolver *AR = AnalysisPass->getResolver();
+      assert(AR && "Expected analysis resolver to exist.");
+      unsigned APDepth = AR->getPMDataManager().getDepth();
+
+      if (PDepth == APDepth)
+        LastUses.push_back(AnalysisPass);
+      else if (PDepth > APDepth)
+        LastPMUses.push_back(AnalysisPass);
+    }
+
+    setLastUser(LastUses, P);
+
+    // If this pass has a corresponding pass manager, push higher level
+    // analysis to this pass manager.
+    if (P->getResolver())
+      setLastUser(LastPMUses, P->getResolver()->getPMDataManager().getAsPass());
+
+
+    // If AP is the last user of other passes then make P last user of
+    // such passes.
+    for (DenseMap<Pass *, Pass *>::iterator LUI = LastUser.begin(),
+           LUE = LastUser.end(); LUI != LUE; ++LUI) {
+      if (LUI->second == AP)
+        // DenseMap iterator is not invalidated here because
+        // this is just updating existing entries.
+        LastUser[LUI->first] = P;
+    }
+  }
+}
+
+/// Collect passes whose last user is P
+void PMTopLevelManager::collectLastUses(SmallVectorImpl<Pass *> &LastUses,
+                                        Pass *P) {
+  DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator DMI =
+    InversedLastUser.find(P);
+  if (DMI == InversedLastUser.end())
+    return;
+
+  SmallPtrSet<Pass *, 8> &LU = DMI->second;
+  for (SmallPtrSet<Pass *, 8>::iterator I = LU.begin(),
+         E = LU.end(); I != E; ++I) {
+    LastUses.push_back(*I);
+  }
+
+}
+
+AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) {
+  AnalysisUsage *AnUsage = NULL;
+  DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.find(P);
+  if (DMI != AnUsageMap.end())
+    AnUsage = DMI->second;
+  else {
+    AnUsage = new AnalysisUsage();
+    P->getAnalysisUsage(*AnUsage);
+    AnUsageMap[P] = AnUsage;
+  }
+  return AnUsage;
+}
+
+/// Schedule pass P for execution. Make sure that passes required by
+/// P are run before P is run. Update analysis info maintained by
+/// the manager. Remove dead passes. This is a recursive function.
+void PMTopLevelManager::schedulePass(Pass *P) {
+
+  // TODO : Allocate function manager for this pass, other wise required set
+  // may be inserted into previous function manager
+
+  // Give pass a chance to prepare the stage.
+  P->preparePassManager(activeStack);
+
+  // If P is an analysis pass and it is available then do not
+  // generate the analysis again. Stale analysis info should not be
+  // available at this point.
+  const PassInfo *PI =
+    PassRegistry::getPassRegistry()->getPassInfo(P->getPassID());
+  if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) {
+    delete P;
+    return;
+  }
+
+  AnalysisUsage *AnUsage = findAnalysisUsage(P);
+
+  bool checkAnalysis = true;
+  while (checkAnalysis) {
+    checkAnalysis = false;
+
+    const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
+    for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(),
+           E = RequiredSet.end(); I != E; ++I) {
+
+      Pass *AnalysisPass = findAnalysisPass(*I);
+      if (!AnalysisPass) {
+        const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+        AnalysisPass = PI->createPass();
+        if (P->getPotentialPassManagerType () ==
+            AnalysisPass->getPotentialPassManagerType())
+          // Schedule analysis pass that is managed by the same pass manager.
+          schedulePass(AnalysisPass);
+        else if (P->getPotentialPassManagerType () >
+                 AnalysisPass->getPotentialPassManagerType()) {
+          // Schedule analysis pass that is managed by a new manager.
+          schedulePass(AnalysisPass);
+          // Recheck analysis passes to ensure that required analyses that
+          // are already checked are still available.
+          checkAnalysis = true;
+        }
+        else
+          // Do not schedule this analysis. Lower level analsyis
+          // passes are run on the fly.
+          delete AnalysisPass;
+      }
+    }
+  }
+
+  // Now all required passes are available.
+  addTopLevelPass(P);
+}
+
+/// Find the pass that implements Analysis AID. Search immutable
+/// passes and all pass managers. If desired pass is not found
+/// then return NULL.
+Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) {
+
+  // Check pass managers
+  for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+         E = PassManagers.end(); I != E; ++I)
+    if (Pass *P = (*I)->findAnalysisPass(AID, false))
+      return P;
+
+  // Check other pass managers
+  for (SmallVectorImpl<PMDataManager *>::iterator
+         I = IndirectPassManagers.begin(),
+         E = IndirectPassManagers.end(); I != E; ++I)
+    if (Pass *P = (*I)->findAnalysisPass(AID, false))
+      return P;
+
+  // Check the immutable passes. Iterate in reverse order so that we find
+  // the most recently registered passes first.
+  for (SmallVector<ImmutablePass *, 8>::reverse_iterator I =
+       ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E; ++I) {
+    AnalysisID PI = (*I)->getPassID();
+    if (PI == AID)
+      return *I;
+
+    // If Pass not found then check the interfaces implemented by Immutable Pass
+    const PassInfo *PassInf =
+      PassRegistry::getPassRegistry()->getPassInfo(PI);
+    const std::vector<const PassInfo*> &ImmPI =
+      PassInf->getInterfacesImplemented();
+    for (std::vector<const PassInfo*>::const_iterator II = ImmPI.begin(),
+         EE = ImmPI.end(); II != EE; ++II) {
+      if ((*II)->getTypeInfo() == AID)
+        return *I;
+    }
+  }
+
+  return 0;
+}
+
+// Print passes managed by this top level manager.
+void PMTopLevelManager::dumpPasses() const {
+
+  if (PassDebugging < Structure)
+    return;
+
+  // Print out the immutable passes
+  for (unsigned i = 0, e = ImmutablePasses.size(); i != e; ++i) {
+    ImmutablePasses[i]->dumpPassStructure(0);
+  }
+
+  // Every class that derives from PMDataManager also derives from Pass
+  // (sometimes indirectly), but there's no inheritance relationship
+  // between PMDataManager and Pass, so we have to getAsPass to get
+  // from a PMDataManager* to a Pass*.
+  for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
+         E = PassManagers.end(); I != E; ++I)
+    (*I)->getAsPass()->dumpPassStructure(1);
+}
+
+void PMTopLevelManager::dumpArguments() const {
+
+  if (PassDebugging < Arguments)
+    return;
+
+  dbgs() << "Pass Arguments: ";
+  for (SmallVector<ImmutablePass *, 8>::const_iterator I =
+       ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
+    if (const PassInfo *PI =
+          PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
+      if (!PI->isAnalysisGroup())
+        dbgs() << " -" << PI->getPassArgument();
+  for (SmallVector<PMDataManager *, 8>::const_iterator I = PassManagers.begin(),
+         E = PassManagers.end(); I != E; ++I)
+    (*I)->dumpPassArguments();
+  dbgs() << "\n";
+}
+
+void PMTopLevelManager::initializeAllAnalysisInfo() {
+  for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+         E = PassManagers.end(); I != E; ++I)
+    (*I)->initializeAnalysisInfo();
+
+  // Initailize other pass managers
+  for (SmallVectorImpl<PMDataManager *>::iterator
+       I = IndirectPassManagers.begin(), E = IndirectPassManagers.end();
+       I != E; ++I)
+    (*I)->initializeAnalysisInfo();
+
+  for (DenseMap<Pass *, Pass *>::iterator DMI = LastUser.begin(),
+        DME = LastUser.end(); DMI != DME; ++DMI) {
+    DenseMap<Pass *, SmallPtrSet<Pass *, 8> >::iterator InvDMI =
+      InversedLastUser.find(DMI->second);
+    if (InvDMI != InversedLastUser.end()) {
+      SmallPtrSet<Pass *, 8> &L = InvDMI->second;
+      L.insert(DMI->first);
+    } else {
+      SmallPtrSet<Pass *, 8> L; L.insert(DMI->first);
+      InversedLastUser[DMI->second] = L;
+    }
+  }
+}
+
+/// Destructor
+PMTopLevelManager::~PMTopLevelManager() {
+  for (SmallVectorImpl<PMDataManager *>::iterator I = PassManagers.begin(),
+         E = PassManagers.end(); I != E; ++I)
+    delete *I;
+
+  for (SmallVectorImpl<ImmutablePass *>::iterator
+         I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I)
+    delete *I;
+
+  for (DenseMap<Pass *, AnalysisUsage *>::iterator DMI = AnUsageMap.begin(),
+         DME = AnUsageMap.end(); DMI != DME; ++DMI)
+    delete DMI->second;
+}
+
+//===----------------------------------------------------------------------===//
+// PMDataManager implementation
+
+/// Augement AvailableAnalysis by adding analysis made available by pass P.
+void PMDataManager::recordAvailableAnalysis(Pass *P) {
+  AnalysisID PI = P->getPassID();
+
+  AvailableAnalysis[PI] = P;
+
+  assert(!AvailableAnalysis.empty());
+
+  // This pass is the current implementation of all of the interfaces it
+  // implements as well.
+  const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI);
+  if (PInf == 0) return;
+  const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
+  for (unsigned i = 0, e = II.size(); i != e; ++i)
+    AvailableAnalysis[II[i]->getTypeInfo()] = P;
+}
+
+// Return true if P preserves high level analysis used by other
+// passes managed by this manager
+bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) {
+  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+  if (AnUsage->getPreservesAll())
+    return true;
+
+  const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+  for (SmallVectorImpl<Pass *>::iterator I = HigherLevelAnalysis.begin(),
+         E = HigherLevelAnalysis.end(); I  != E; ++I) {
+    Pass *P1 = *I;
+    if (P1->getAsImmutablePass() == 0 &&
+        std::find(PreservedSet.begin(), PreservedSet.end(),
+                  P1->getPassID()) ==
+           PreservedSet.end())
+      return false;
+  }
+
+  return true;
+}
+
+/// verifyPreservedAnalysis -- Verify analysis preserved by pass P.
+void PMDataManager::verifyPreservedAnalysis(Pass *P) {
+  // Don't do this unless assertions are enabled.
+#ifdef NDEBUG
+  return;
+#endif
+  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+  const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+
+  // Verify preserved analysis
+  for (AnalysisUsage::VectorType::const_iterator I = PreservedSet.begin(),
+         E = PreservedSet.end(); I != E; ++I) {
+    AnalysisID AID = *I;
+    if (Pass *AP = findAnalysisPass(AID, true)) {
+      TimeRegion PassTimer(getPassTimer(AP));
+      AP->verifyAnalysis();
+    }
+  }
+}
+
+/// Remove Analysis not preserved by Pass P
+void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
+  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+  if (AnUsage->getPreservesAll())
+    return;
+
+  const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
+  for (std::map<AnalysisID, Pass*>::iterator I = AvailableAnalysis.begin(),
+         E = AvailableAnalysis.end(); I != E; ) {
+    std::map<AnalysisID, Pass*>::iterator Info = I++;
+    if (Info->second->getAsImmutablePass() == 0 &&
+        std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+        PreservedSet.end()) {
+      // Remove this analysis
+      if (PassDebugging >= Details) {
+        Pass *S = Info->second;
+        dbgs() << " -- '" <<  P->getPassName() << "' is not preserving '";
+        dbgs() << S->getPassName() << "'\n";
+      }
+      AvailableAnalysis.erase(Info);
+    }
+  }
+
+  // Check inherited analysis also. If P is not preserving analysis
+  // provided by parent manager then remove it here.
+  for (unsigned Index = 0; Index < PMT_Last; ++Index) {
+
+    if (!InheritedAnalysis[Index])
+      continue;
+
+    for (std::map<AnalysisID, Pass*>::iterator
+           I = InheritedAnalysis[Index]->begin(),
+           E = InheritedAnalysis[Index]->end(); I != E; ) {
+      std::map<AnalysisID, Pass *>::iterator Info = I++;
+      if (Info->second->getAsImmutablePass() == 0 &&
+          std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
+             PreservedSet.end()) {
+        // Remove this analysis
+        if (PassDebugging >= Details) {
+          Pass *S = Info->second;
+          dbgs() << " -- '" <<  P->getPassName() << "' is not preserving '";
+          dbgs() << S->getPassName() << "'\n";
+        }
+        InheritedAnalysis[Index]->erase(Info);
+      }
+    }
+  }
+}
+
+/// Remove analysis passes that are not used any longer
+void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg,
+                                     enum PassDebuggingString DBG_STR) {
+
+  SmallVector<Pass *, 12> DeadPasses;
+
+  // If this is a on the fly manager then it does not have TPM.
+  if (!TPM)
+    return;
+
+  TPM->collectLastUses(DeadPasses, P);
+
+  if (PassDebugging >= Details && !DeadPasses.empty()) {
+    dbgs() << " -*- '" <<  P->getPassName();
+    dbgs() << "' is the last user of following pass instances.";
+    dbgs() << " Free these instances\n";
+  }
+
+  for (SmallVectorImpl<Pass *>::iterator I = DeadPasses.begin(),
+         E = DeadPasses.end(); I != E; ++I)
+    freePass(*I, Msg, DBG_STR);
+}
+
+void PMDataManager::freePass(Pass *P, StringRef Msg,
+                             enum PassDebuggingString DBG_STR) {
+  dumpPassInfo(P, FREEING_MSG, DBG_STR, Msg);
+
+  {
+    // If the pass crashes releasing memory, remember this.
+    PassManagerPrettyStackEntry X(P);
+    TimeRegion PassTimer(getPassTimer(P));
+
+    P->releaseMemory();
+  }
+
+  AnalysisID PI = P->getPassID();
+  if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) {
+    // Remove the pass itself (if it is not already removed).
+    AvailableAnalysis.erase(PI);
+
+    // Remove all interfaces this pass implements, for which it is also
+    // listed as the available implementation.
+    const std::vector<const PassInfo*> &II = PInf->getInterfacesImplemented();
+    for (unsigned i = 0, e = II.size(); i != e; ++i) {
+      std::map<AnalysisID, Pass*>::iterator Pos =
+        AvailableAnalysis.find(II[i]->getTypeInfo());
+      if (Pos != AvailableAnalysis.end() && Pos->second == P)
+        AvailableAnalysis.erase(Pos);
+    }
+  }
+}
+
+/// Add pass P into the PassVector. Update
+/// AvailableAnalysis appropriately if ProcessAnalysis is true.
+void PMDataManager::add(Pass *P, bool ProcessAnalysis) {
+  // This manager is going to manage pass P. Set up analysis resolver
+  // to connect them.
+  AnalysisResolver *AR = new AnalysisResolver(*this);
+  P->setResolver(AR);
+
+  // If a FunctionPass F is the last user of ModulePass info M
+  // then the F's manager, not F, records itself as a last user of M.
+  SmallVector<Pass *, 12> TransferLastUses;
+
+  if (!ProcessAnalysis) {
+    // Add pass
+    PassVector.push_back(P);
+    return;
+  }
+
+  // At the moment, this pass is the last user of all required passes.
+  SmallVector<Pass *, 12> LastUses;
+  SmallVector<Pass *, 8> RequiredPasses;
+  SmallVector<AnalysisID, 8> ReqAnalysisNotAvailable;
+
+  unsigned PDepth = this->getDepth();
+
+  collectRequiredAnalysis(RequiredPasses,
+                          ReqAnalysisNotAvailable, P);
+  for (SmallVectorImpl<Pass *>::iterator I = RequiredPasses.begin(),
+         E = RequiredPasses.end(); I != E; ++I) {
+    Pass *PRequired = *I;
+    unsigned RDepth = 0;
+
+    assert(PRequired->getResolver() && "Analysis Resolver is not set");
+    PMDataManager &DM = PRequired->getResolver()->getPMDataManager();
+    RDepth = DM.getDepth();
+
+    if (PDepth == RDepth)
+      LastUses.push_back(PRequired);
+    else if (PDepth > RDepth) {
+      // Let the parent claim responsibility of last use
+      TransferLastUses.push_back(PRequired);
+      // Keep track of higher level analysis used by this manager.
+      HigherLevelAnalysis.push_back(PRequired);
+    } else
+      llvm_unreachable("Unable to accomodate Required Pass");
+  }
+
+  // Set P as P's last user until someone starts using P.
+  // However, if P is a Pass Manager then it does not need
+  // to record its last user.
+  if (P->getAsPMDataManager() == 0)
+    LastUses.push_back(P);
+  TPM->setLastUser(LastUses, P);
+
+  if (!TransferLastUses.empty()) {
+    Pass *My_PM = getAsPass();
+    TPM->setLastUser(TransferLastUses, My_PM);
+    TransferLastUses.clear();
+  }
+
+  // Now, take care of required analyses that are not available.
+  for (SmallVectorImpl<AnalysisID>::iterator
+         I = ReqAnalysisNotAvailable.begin(),
+         E = ReqAnalysisNotAvailable.end() ;I != E; ++I) {
+    const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I);
+    Pass *AnalysisPass = PI->createPass();
+    this->addLowerLevelRequiredPass(P, AnalysisPass);
+  }
+
+  // Take a note of analysis required and made available by this pass.
+  // Remove the analysis not preserved by this pass
+  removeNotPreservedAnalysis(P);
+  recordAvailableAnalysis(P);
+
+  // Add pass
+  PassVector.push_back(P);
+}
+
+
+/// Populate RP with analysis pass that are required by
+/// pass P and are available. Populate RP_NotAvail with analysis
+/// pass that are required by pass P but are not available.
+void PMDataManager::collectRequiredAnalysis(SmallVectorImpl<Pass *> &RP,
+                                       SmallVectorImpl<AnalysisID> &RP_NotAvail,
+                                            Pass *P) {
+  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+  const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet();
+  for (AnalysisUsage::VectorType::const_iterator
+         I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) {
+    if (Pass *AnalysisPass = findAnalysisPass(*I, true))
+      RP.push_back(AnalysisPass);
+    else
+      RP_NotAvail.push_back(*I);
+  }
+
+  const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet();
+  for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(),
+         E = IDs.end(); I != E; ++I) {
+    if (Pass *AnalysisPass = findAnalysisPass(*I, true))
+      RP.push_back(AnalysisPass);
+    else
+      RP_NotAvail.push_back(*I);
+  }
+}
+
+// All Required analyses should be available to the pass as it runs!  Here
+// we fill in the AnalysisImpls member of the pass so that it can
+// successfully use the getAnalysis() method to retrieve the
+// implementations it needs.
+//
+void PMDataManager::initializeAnalysisImpl(Pass *P) {
+  AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P);
+
+  for (AnalysisUsage::VectorType::const_iterator
+         I = AnUsage->getRequiredSet().begin(),
+         E = AnUsage->getRequiredSet().end(); I != E; ++I) {
+    Pass *Impl = findAnalysisPass(*I, true);
+    if (Impl == 0)
+      // This may be analysis pass that is initialized on the fly.
+      // If that is not the case then it will raise an assert when it is used.
+      continue;
+    AnalysisResolver *AR = P->getResolver();
+    assert(AR && "Analysis Resolver is not set");
+    AR->addAnalysisImplsPair(*I, Impl);
+  }
+}
+
+/// Find the pass that implements Analysis AID. If desired pass is not found
+/// then return NULL.
+Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) {
+
+  // Check if AvailableAnalysis map has one entry.
+  std::map<AnalysisID, Pass*>::const_iterator I =  AvailableAnalysis.find(AID);
+
+  if (I != AvailableAnalysis.end())
+    return I->second;
+
+  // Search Parents through TopLevelManager
+  if (SearchParent)
+    return TPM->findAnalysisPass(AID);
+
+  return NULL;
+}
+
+// Print list of passes that are last used by P.
+void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{
+
+  SmallVector<Pass *, 12> LUses;
+
+  // If this is a on the fly manager then it does not have TPM.
+  if (!TPM)
+    return;
+
+  TPM->collectLastUses(LUses, P);
+
+  for (SmallVectorImpl<Pass *>::iterator I = LUses.begin(),
+         E = LUses.end(); I != E; ++I) {
+    llvm::dbgs() << "--" << std::string(Offset*2, ' ');
+    (*I)->dumpPassStructure(0);
+  }
+}
+
+void PMDataManager::dumpPassArguments() const {
+  for (SmallVectorImpl<Pass *>::const_iterator I = PassVector.begin(),
+        E = PassVector.end(); I != E; ++I) {
+    if (PMDataManager *PMD = (*I)->getAsPMDataManager())
+      PMD->dumpPassArguments();
+    else
+      if (const PassInfo *PI =
+            PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID()))
+        if (!PI->isAnalysisGroup())
+          dbgs() << " -" << PI->getPassArgument();
+  }
+}
+
+void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
+                                 enum PassDebuggingString S2,
+                                 StringRef Msg) {
+  if (PassDebugging < Executions)
+    return;
+  dbgs() << (void*)this << std::string(getDepth()*2+1, ' ');
+  switch (S1) {
+  case EXECUTION_MSG:
+    dbgs() << "Executing Pass '" << P->getPassName();
+    break;
+  case MODIFICATION_MSG:
+    dbgs() << "Made Modification '" << P->getPassName();
+    break;
+  case FREEING_MSG:
+    dbgs() << " Freeing Pass '" << P->getPassName();
+    break;
+  default:
+    break;
+  }
+  switch (S2) {
+  case ON_BASICBLOCK_MSG:
+    dbgs() << "' on BasicBlock '" << Msg << "'...\n";
+    break;
+  case ON_FUNCTION_MSG:
+    dbgs() << "' on Function '" << Msg << "'...\n";
+    break;
+  case ON_MODULE_MSG:
+    dbgs() << "' on Module '"  << Msg << "'...\n";
+    break;
+  case ON_REGION_MSG:
+    dbgs() << "' on Region '"  << Msg << "'...\n";
+    break;
+  case ON_LOOP_MSG:
+    dbgs() << "' on Loop '" << Msg << "'...\n";
+    break;
+  case ON_CG_MSG:
+    dbgs() << "' on Call Graph Nodes '" << Msg << "'...\n";
+    break;
+  default:
+    break;
+  }
+}
+
+void PMDataManager::dumpRequiredSet(const Pass *P) const {
+  if (PassDebugging < Details)
+    return;
+
+  AnalysisUsage analysisUsage;
+  P->getAnalysisUsage(analysisUsage);
+  dumpAnalysisUsage("Required", P, analysisUsage.getRequiredSet());
+}
+
+void PMDataManager::dumpPreservedSet(const Pass *P) const {
+  if (PassDebugging < Details)
+    return;
+
+  AnalysisUsage analysisUsage;
+  P->getAnalysisUsage(analysisUsage);
+  dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet());
+}
+
+void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
+                                   const AnalysisUsage::VectorType &Set) const {
+  assert(PassDebugging >= Details);
+  if (Set.empty())
+    return;
+  dbgs() << (void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:";
+  for (unsigned i = 0; i != Set.size(); ++i) {
+    if (i) dbgs() << ',';
+    const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]);
+    dbgs() << ' ' << PInf->getPassName();
+  }
+  dbgs() << '\n';
+}
+
+/// Add RequiredPass into list of lower level passes required by pass P.
+/// RequiredPass is run on the fly by Pass Manager when P requests it
+/// through getAnalysis interface.
+/// This should be handled by specific pass manager.
+void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
+  if (TPM) {
+    TPM->dumpArguments();
+    TPM->dumpPasses();
+  }
+
+  // Module Level pass may required Function Level analysis info
+  // (e.g. dominator info). Pass manager uses on the fly function pass manager
+  // to provide this on demand. In that case, in Pass manager terminology,
+  // module level pass is requiring lower level analysis info managed by
+  // lower level pass manager.
+
+  // When Pass manager is not able to order required analysis info, Pass manager
+  // checks whether any lower level manager will be able to provide this
+  // analysis info on demand or not.
+#ifndef NDEBUG
+  dbgs() << "Unable to schedule '" << RequiredPass->getPassName();
+  dbgs() << "' required by '" << P->getPassName() << "'\n";
+#endif
+  llvm_unreachable("Unable to schedule pass");
+}
+
+Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) {
+  assert(0 && "Unable to find on the fly pass");
+  return NULL;
+}
+
+// Destructor
+PMDataManager::~PMDataManager() {
+  for (SmallVectorImpl<Pass *>::iterator I = PassVector.begin(),
+         E = PassVector.end(); I != E; ++I)
+    delete *I;
+}
+
+//===----------------------------------------------------------------------===//
+// NOTE: Is this the right place to define this method ?
+// getAnalysisIfAvailable - Return analysis result or null if it doesn't exist.
+Pass *AnalysisResolver::getAnalysisIfAvailable(AnalysisID ID, bool dir) const {
+  return PM.findAnalysisPass(ID, dir);
+}
+
+Pass *AnalysisResolver::findImplPass(Pass *P, AnalysisID AnalysisPI,
+                                     Function &F) {
+  return PM.getOnTheFlyPass(P, AnalysisPI, F);
+}
+
+//===----------------------------------------------------------------------===//
+// BBPassManager implementation
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnBasicBlock method.  Keep track of whether any of the passes modifies
+/// the function, and if so, return true.
+bool BBPassManager::runOnFunction(Function &F) {
+  if (F.isDeclaration())
+    return false;
+
+  bool Changed = doInitialization(F);
+
+  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
+    for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+      BasicBlockPass *BP = getContainedPass(Index);
+      bool LocalChanged = false;
+
+      dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName());
+      dumpRequiredSet(BP);
+
+      initializeAnalysisImpl(BP);
+
+      {
+        // If the pass crashes, remember this.
+        PassManagerPrettyStackEntry X(BP, *I);
+        TimeRegion PassTimer(getPassTimer(BP));
+
+        LocalChanged |= BP->runOnBasicBlock(*I);
+      }
+
+      Changed |= LocalChanged;
+      if (LocalChanged)
+        dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
+                     I->getName());
+      dumpPreservedSet(BP);
+
+      verifyPreservedAnalysis(BP);
+      removeNotPreservedAnalysis(BP);
+      recordAvailableAnalysis(BP);
+      removeDeadPasses(BP, I->getName(), ON_BASICBLOCK_MSG);
+    }
+
+  return doFinalization(F) || Changed;
+}
+
+// Implement doInitialization and doFinalization
+bool BBPassManager::doInitialization(Module &M) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+    Changed |= getContainedPass(Index)->doInitialization(M);
+
+  return Changed;
+}
+
+bool BBPassManager::doFinalization(Module &M) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+    Changed |= getContainedPass(Index)->doFinalization(M);
+
+  return Changed;
+}
+
+bool BBPassManager::doInitialization(Function &F) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    BasicBlockPass *BP = getContainedPass(Index);
+    Changed |= BP->doInitialization(F);
+  }
+
+  return Changed;
+}
+
+bool BBPassManager::doFinalization(Function &F) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    BasicBlockPass *BP = getContainedPass(Index);
+    Changed |= BP->doFinalization(F);
+  }
+
+  return Changed;
+}
+
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManager implementation
+
+/// Create new Function pass manager
+FunctionPassManager::FunctionPassManager(Module *m) : M(m) {
+  FPM = new FunctionPassManagerImpl(0);
+  // FPM is the top level manager.
+  FPM->setTopLevelManager(FPM);
+
+  AnalysisResolver *AR = new AnalysisResolver(*FPM);
+  FPM->setResolver(AR);
+}
+
+FunctionPassManager::~FunctionPassManager() {
+  delete FPM;
+}
+
+/// addImpl - Add a pass to the queue of passes to run, without
+/// checking whether to add a printer pass.
+void FunctionPassManager::addImpl(Pass *P) {
+  FPM->add(P);
+}
+
+/// add - Add a pass to the queue of passes to run.  This passes
+/// ownership of the Pass to the PassManager.  When the
+/// PassManager_X is destroyed, the pass will be destroyed as well, so
+/// there is no need to delete the pass. (TODO delete passes.)
+/// This implies that all passes MUST be allocated with 'new'.
+void FunctionPassManager::add(Pass *P) {
+  // If this is a not a function pass, don't add a printer for it.
+  const void *PassID = P->getPassID();
+  if (P->getPassKind() == PT_Function)
+    if (ShouldPrintBeforePass(PassID))
+      addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
+                                   + P->getPassName() + " ***"));
+
+  addImpl(P);
+
+  if (P->getPassKind() == PT_Function)
+    if (ShouldPrintAfterPass(PassID))
+      addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
+                                   + P->getPassName() + " ***"));
+}
+
+/// run - Execute all of the passes scheduled for execution.  Keep
+/// track of whether any of the passes modifies the function, and if
+/// so, return true.
+///
+bool FunctionPassManager::run(Function &F) {
+  if (F.isMaterializable()) {
+    std::string errstr;
+    if (F.Materialize(&errstr))
+      report_fatal_error("Error reading bitcode file: " + Twine(errstr));
+  }
+  return FPM->run(F);
+}
+
+
+/// doInitialization - Run all of the initializers for the function passes.
+///
+bool FunctionPassManager::doInitialization() {
+  return FPM->doInitialization(*M);
+}
+
+/// doFinalization - Run all of the finalizers for the function passes.
+///
+bool FunctionPassManager::doFinalization() {
+  return FPM->doFinalization(*M);
+}
+
+//===----------------------------------------------------------------------===//
+// FunctionPassManagerImpl implementation
+//
+bool FunctionPassManagerImpl::doInitialization(Module &M) {
+  bool Changed = false;
+
+  dumpArguments();
+  dumpPasses();
+
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+    Changed |= getContainedManager(Index)->doInitialization(M);
+
+  return Changed;
+}
+
+bool FunctionPassManagerImpl::doFinalization(Module &M) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+    Changed |= getContainedManager(Index)->doFinalization(M);
+
+  return Changed;
+}
+
+/// cleanup - After running all passes, clean up pass manager cache.
+void FPPassManager::cleanup() {
+ for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    FunctionPass *FP = getContainedPass(Index);
+    AnalysisResolver *AR = FP->getResolver();
+    assert(AR && "Analysis Resolver is not set");
+    AR->clearAnalysisImpls();
+ }
+}
+
+void FunctionPassManagerImpl::releaseMemoryOnTheFly() {
+  if (!wasRun)
+    return;
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) {
+    FPPassManager *FPPM = getContainedManager(Index);
+    for (unsigned Index = 0; Index < FPPM->getNumContainedPasses(); ++Index) {
+      FPPM->getContainedPass(Index)->releaseMemory();
+    }
+  }
+  wasRun = false;
+}
+
+// Execute all the passes managed by this top level manager.
+// Return true if any function is modified by a pass.
+bool FunctionPassManagerImpl::run(Function &F) {
+  bool Changed = false;
+  TimingInfo::createTheTimeInfo();
+
+  initializeAllAnalysisInfo();
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+    Changed |= getContainedManager(Index)->runOnFunction(F);
+
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+    getContainedManager(Index)->cleanup();
+
+  wasRun = true;
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// FPPassManager implementation
+
+char FPPassManager::ID = 0;
+/// Print passes managed by this manager
+void FPPassManager::dumpPassStructure(unsigned Offset) {
+  llvm::dbgs() << std::string(Offset*2, ' ') << "FunctionPass Manager\n";
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    FunctionPass *FP = getContainedPass(Index);
+    FP->dumpPassStructure(Offset + 1);
+    dumpLastUses(FP, Offset+1);
+  }
+}
+
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnFunction method.  Keep track of whether any of the passes modifies
+/// the function, and if so, return true.
+bool FPPassManager::runOnFunction(Function &F) {
+  if (F.isDeclaration())
+    return false;
+
+  bool Changed = false;
+
+  // Collect inherited analysis from Module level pass manager.
+  populateInheritedAnalysis(TPM->activeStack);
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    FunctionPass *FP = getContainedPass(Index);
+    bool LocalChanged = false;
+
+    dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
+    dumpRequiredSet(FP);
+
+    initializeAnalysisImpl(FP);
+
+    {
+      PassManagerPrettyStackEntry X(FP, F);
+      TimeRegion PassTimer(getPassTimer(FP));
+
+      LocalChanged |= FP->runOnFunction(F);
+    }
+
+    Changed |= LocalChanged;
+    if (LocalChanged)
+      dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName());
+    dumpPreservedSet(FP);
+
+    verifyPreservedAnalysis(FP);
+    removeNotPreservedAnalysis(FP);
+    recordAvailableAnalysis(FP);
+    removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG);
+  }
+  return Changed;
+}
+
+bool FPPassManager::runOnModule(Module &M) {
+  bool Changed = doInitialization(M);
+
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
+    runOnFunction(*I);
+
+  return doFinalization(M) || Changed;
+}
+
+bool FPPassManager::doInitialization(Module &M) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+    Changed |= getContainedPass(Index)->doInitialization(M);
+
+  return Changed;
+}
+
+bool FPPassManager::doFinalization(Module &M) {
+  bool Changed = false;
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index)
+    Changed |= getContainedPass(Index)->doFinalization(M);
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// MPPassManager implementation
+
+/// Execute all of the passes scheduled for execution by invoking
+/// runOnModule method.  Keep track of whether any of the passes modifies
+/// the module, and if so, return true.
+bool
+MPPassManager::runOnModule(Module &M) {
+  bool Changed = false;
+
+  // Initialize on-the-fly passes
+  for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+       I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+       I != E; ++I) {
+    FunctionPassManagerImpl *FPP = I->second;
+    Changed |= FPP->doInitialization(M);
+  }
+
+  for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
+    ModulePass *MP = getContainedPass(Index);
+    bool LocalChanged = false;
+
+    dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier());
+    dumpRequiredSet(MP);
+
+    initializeAnalysisImpl(MP);
+
+    {
+      PassManagerPrettyStackEntry X(MP, M);
+      TimeRegion PassTimer(getPassTimer(MP));
+
+      LocalChanged |= MP->runOnModule(M);
+    }
+
+    Changed |= LocalChanged;
+    if (LocalChanged)
+      dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
+                   M.getModuleIdentifier());
+    dumpPreservedSet(MP);
+
+    verifyPreservedAnalysis(MP);
+    removeNotPreservedAnalysis(MP);
+    recordAvailableAnalysis(MP);
+    removeDeadPasses(MP, M.getModuleIdentifier(), ON_MODULE_MSG);
+  }
+
+  // Finalize on-the-fly passes
+  for (std::map<Pass *, FunctionPassManagerImpl *>::iterator
+       I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end();
+       I != E; ++I) {
+    FunctionPassManagerImpl *FPP = I->second;
+    // We don't know when is the last time an on-the-fly pass is run,
+    // so we need to releaseMemory / finalize here
+    FPP->releaseMemoryOnTheFly();
+    Changed |= FPP->doFinalization(M);
+  }
+  return Changed;
+}
+
+/// Add RequiredPass into list of lower level passes required by pass P.
+/// RequiredPass is run on the fly by Pass Manager when P requests it
+/// through getAnalysis interface.
+void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
+  assert(P->getPotentialPassManagerType() == PMT_ModulePassManager &&
+         "Unable to handle Pass that requires lower level Analysis pass");
+  assert((P->getPotentialPassManagerType() <
+          RequiredPass->getPotentialPassManagerType()) &&
+         "Unable to handle Pass that requires lower level Analysis pass");
+
+  FunctionPassManagerImpl *FPP = OnTheFlyManagers[P];
+  if (!FPP) {
+    FPP = new FunctionPassManagerImpl(0);
+    // FPP is the top level manager.
+    FPP->setTopLevelManager(FPP);
+
+    OnTheFlyManagers[P] = FPP;
+  }
+  FPP->add(RequiredPass);
+
+  // Register P as the last user of RequiredPass.
+  SmallVector<Pass *, 1> LU;
+  LU.push_back(RequiredPass);
+  FPP->setLastUser(LU,  P);
+}
+
+/// Return function pass corresponding to PassInfo PI, that is
+/// required by module pass MP. Instantiate analysis pass, by using
+/// its runOnFunction() for function F.
+Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){
+  FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP];
+  assert(FPP && "Unable to find on the fly pass");
+
+  FPP->releaseMemoryOnTheFly();
+  FPP->run(F);
+  return ((PMTopLevelManager*)FPP)->findAnalysisPass(PI);
+}
+
+
+//===----------------------------------------------------------------------===//
+// PassManagerImpl implementation
+//
+/// run - Execute all of the passes scheduled for execution.  Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool PassManagerImpl::run(Module &M) {
+  bool Changed = false;
+  TimingInfo::createTheTimeInfo();
+
+  dumpArguments();
+  dumpPasses();
+
+  initializeAllAnalysisInfo();
+  for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index)
+    Changed |= getContainedManager(Index)->runOnModule(M);
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// PassManager implementation
+
+/// Create new pass manager
+PassManager::PassManager() {
+  PM = new PassManagerImpl(0);
+  // PM is the top level manager
+  PM->setTopLevelManager(PM);
+}
+
+PassManager::~PassManager() {
+  delete PM;
+}
+
+/// addImpl - Add a pass to the queue of passes to run, without
+/// checking whether to add a printer pass.
+void PassManager::addImpl(Pass *P) {
+  PM->add(P);
+}
+
+/// add - Add a pass to the queue of passes to run.  This passes ownership of
+/// the Pass to the PassManager.  When the PassManager is destroyed, the pass
+/// will be destroyed as well, so there is no need to delete the pass.  This
+/// implies that all passes MUST be allocated with 'new'.
+void PassManager::add(Pass *P) {
+  const void* PassID = P->getPassID();
+  if (ShouldPrintBeforePass(PassID))
+    addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
+                                 + P->getPassName() + " ***"));
+
+  addImpl(P);
+
+  if (ShouldPrintAfterPass(PassID))
+    addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
+                                 + P->getPassName() + " ***"));
+}
+
+/// run - Execute all of the passes scheduled for execution.  Keep track of
+/// whether any of the passes modifies the module, and if so, return true.
+bool PassManager::run(Module &M) {
+  return PM->run(M);
+}
+
+//===----------------------------------------------------------------------===//
+// TimingInfo Class - This class is used to calculate information about the
+// amount of time each pass takes to execute.  This only happens with
+// -time-passes is enabled on the command line.
+//
+bool llvm::TimePassesIsEnabled = false;
+static cl::opt<bool,true>
+EnableTiming("time-passes", cl::location(TimePassesIsEnabled),
+            cl::desc("Time each pass, printing elapsed time for each on exit"));
+
+// createTheTimeInfo - This method either initializes the TheTimeInfo pointer to
+// a non null value (if the -time-passes option is enabled) or it leaves it
+// null.  It may be called multiple times.
+void TimingInfo::createTheTimeInfo() {
+  if (!TimePassesIsEnabled || TheTimeInfo) return;
+
+  // Constructed the first time this is called, iff -time-passes is enabled.
+  // This guarantees that the object will be constructed before static globals,
+  // thus it will be destroyed before them.
+  static ManagedStatic<TimingInfo> TTI;
+  TheTimeInfo = &*TTI;
+}
+
+/// If TimingInfo is enabled then start pass timer.
+Timer *llvm::getPassTimer(Pass *P) {
+  if (TheTimeInfo)
+    return TheTimeInfo->getPassTimer(P);
+  return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// PMStack implementation
+//
+
+// Pop Pass Manager from the stack and clear its analysis info.
+void PMStack::pop() {
+
+  PMDataManager *Top = this->top();
+  Top->initializeAnalysisInfo();
+
+  S.pop_back();
+}
+
+// Push PM on the stack and set its top level manager.
+void PMStack::push(PMDataManager *PM) {
+  assert(PM && "Unable to push. Pass Manager expected");
+
+  if (!this->empty()) {
+    PMTopLevelManager *TPM = this->top()->getTopLevelManager();
+
+    assert(TPM && "Unable to find top level manager");
+    TPM->addIndirectPassManager(PM);
+    PM->setTopLevelManager(TPM);
+  }
+
+  S.push_back(PM);
+}
+
+// Dump content of the pass manager stack.
+void PMStack::dump() const {
+  for (std::vector<PMDataManager *>::const_iterator I = S.begin(),
+         E = S.end(); I != E; ++I)
+    printf("%s ", (*I)->getAsPass()->getPassName());
+
+  if (!S.empty())
+    printf("\n");
+}
+
+/// Find appropriate Module Pass Manager in the PM Stack and
+/// add self into that manager.
+void ModulePass::assignPassManager(PMStack &PMS,
+                                   PassManagerType PreferredType) {
+  // Find Module Pass Manager
+  while (!PMS.empty()) {
+    PassManagerType TopPMType = PMS.top()->getPassManagerType();
+    if (TopPMType == PreferredType)
+      break; // We found desired pass manager
+    else if (TopPMType > PMT_ModulePassManager)
+      PMS.pop();    // Pop children pass managers
+    else
+      break;
+  }
+  assert(!PMS.empty() && "Unable to find appropriate Pass Manager");
+  PMS.top()->add(this);
+}
+
+/// Find appropriate Function Pass Manager or Call Graph Pass Manager
+/// in the PM Stack and add self into that manager.
+void FunctionPass::assignPassManager(PMStack &PMS,
+                                     PassManagerType PreferredType) {
+
+  // Find Module Pass Manager
+  while (!PMS.empty()) {
+    if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager)
+      PMS.pop();
+    else
+      break;
+  }
+
+  // Create new Function Pass Manager if needed.
+  FPPassManager *FPP;
+  if (PMS.top()->getPassManagerType() == PMT_FunctionPassManager) {
+    FPP = (FPPassManager *)PMS.top();
+  } else {
+    assert(!PMS.empty() && "Unable to create Function Pass Manager");
+    PMDataManager *PMD = PMS.top();
+
+    // [1] Create new Function Pass Manager
+    FPP = new FPPassManager(PMD->getDepth() + 1);
+    FPP->populateInheritedAnalysis(PMS);
+
+    // [2] Set up new manager's top level manager
+    PMTopLevelManager *TPM = PMD->getTopLevelManager();
+    TPM->addIndirectPassManager(FPP);
+
+    // [3] Assign manager to manage this new manager. This may create
+    // and push new managers into PMS
+    FPP->assignPassManager(PMS, PMD->getPassManagerType());
+
+    // [4] Push new manager into PMS
+    PMS.push(FPP);
+  }
+
+  // Assign FPP as the manager of this pass.
+  FPP->add(this);
+}
+
+/// Find appropriate Basic Pass Manager or Call Graph Pass Manager
+/// in the PM Stack and add self into that manager.
+void BasicBlockPass::assignPassManager(PMStack &PMS,
+                                       PassManagerType PreferredType) {
+  BBPassManager *BBP;
+
+  // Basic Pass Manager is a leaf pass manager. It does not handle
+  // any other pass manager.
+  if (!PMS.empty() &&
+      PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager) {
+    BBP = (BBPassManager *)PMS.top();
+  } else {
+    // If leaf manager is not Basic Block Pass manager then create new
+    // basic Block Pass manager.
+    assert(!PMS.empty() && "Unable to create BasicBlock Pass Manager");
+    PMDataManager *PMD = PMS.top();
+
+    // [1] Create new Basic Block Manager
+    BBP = new BBPassManager(PMD->getDepth() + 1);
+
+    // [2] Set up new manager's top level manager
+    // Basic Block Pass Manager does not live by itself
+    PMTopLevelManager *TPM = PMD->getTopLevelManager();
+    TPM->addIndirectPassManager(BBP);
+
+    // [3] Assign manager to manage this new manager. This may create
+    // and push new managers into PMS
+    BBP->assignPassManager(PMS, PreferredType);
+
+    // [4] Push new manager into PMS
+    PMS.push(BBP);
+  }
+
+  // Assign BBP as the manager of this pass.
+  BBP->add(this);
+}
+
+PassManagerBase::~PassManagerBase() {}
diff --git a/final/lib/VMCore/PassRegistry.cpp b/final/lib/VMCore/PassRegistry.cpp
new file mode 100644
index 00000000000..c97a170f501
--- /dev/null
+++ b/final/lib/VMCore/PassRegistry.cpp
@@ -0,0 +1,208 @@
+//===- PassRegistry.cpp - Pass Registration Implementation ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PassRegistry, with which passes are registered on
+// initialization, and supports the PassManager in dependency resolution.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Mutex.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include <vector>
+
+using namespace llvm;
+
+// FIXME: We use ManagedStatic to erase the pass registrar on shutdown.
+// Unfortunately, passes are registered with static ctors, and having
+// llvm_shutdown clear this map prevents successful ressurection after 
+// llvm_shutdown is run.  Ideally we should find a solution so that we don't
+// leak the map, AND can still resurrect after shutdown.
+static ManagedStatic<PassRegistry> PassRegistryObj;
+PassRegistry *PassRegistry::getPassRegistry() {
+  return &*PassRegistryObj;
+}
+
+static ManagedStatic<sys::SmartMutex<true> > Lock;
+
+//===----------------------------------------------------------------------===//
+// PassRegistryImpl
+//
+
+namespace {
+struct PassRegistryImpl {
+  /// PassInfoMap - Keep track of the PassInfo object for each registered pass.
+  typedef DenseMap<const void*, const PassInfo*> MapType;
+  MapType PassInfoMap;
+  
+  typedef StringMap<const PassInfo*> StringMapType;
+  StringMapType PassInfoStringMap;
+  
+  /// AnalysisGroupInfo - Keep track of information for each analysis group.
+  struct AnalysisGroupInfo {
+    SmallPtrSet<const PassInfo *, 8> Implementations;
+  };
+  DenseMap<const PassInfo*, AnalysisGroupInfo> AnalysisGroupInfoMap;
+  
+  std::vector<const PassInfo*> ToFree;
+  std::vector<PassRegistrationListener*> Listeners;
+};
+} // end anonymous namespace
+
+void *PassRegistry::getImpl() const {
+  if (!pImpl)
+    pImpl = new PassRegistryImpl();
+  return pImpl;
+}
+
+//===----------------------------------------------------------------------===//
+// Accessors
+//
+
+PassRegistry::~PassRegistry() {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(pImpl);
+  
+  for (std::vector<const PassInfo*>::iterator I = Impl->ToFree.begin(),
+       E = Impl->ToFree.end(); I != E; ++I)
+    delete *I;
+  
+  delete Impl;
+  pImpl = 0;
+}
+
+const PassInfo *PassRegistry::getPassInfo(const void *TI) const {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.find(TI);
+  return I != Impl->PassInfoMap.end() ? I->second : 0;
+}
+
+const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  PassRegistryImpl::StringMapType::const_iterator
+    I = Impl->PassInfoStringMap.find(Arg);
+  return I != Impl->PassInfoStringMap.end() ? I->second : 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Pass Registration mechanism
+//
+
+void PassRegistry::registerPass(const PassInfo &PI, bool ShouldFree) {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  bool Inserted =
+    Impl->PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second;
+  assert(Inserted && "Pass registered multiple times!");
+  (void)Inserted;
+  Impl->PassInfoStringMap[PI.getPassArgument()] = &PI;
+  
+  // Notify any listeners.
+  for (std::vector<PassRegistrationListener*>::iterator
+       I = Impl->Listeners.begin(), E = Impl->Listeners.end(); I != E; ++I)
+    (*I)->passRegistered(&PI);
+  
+  if (ShouldFree) Impl->ToFree.push_back(&PI);
+}
+
+void PassRegistry::unregisterPass(const PassInfo &PI) {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  PassRegistryImpl::MapType::iterator I = 
+    Impl->PassInfoMap.find(PI.getTypeInfo());
+  assert(I != Impl->PassInfoMap.end() && "Pass registered but not in map!");
+  
+  // Remove pass from the map.
+  Impl->PassInfoMap.erase(I);
+  Impl->PassInfoStringMap.erase(PI.getPassArgument());
+}
+
+void PassRegistry::enumerateWith(PassRegistrationListener *L) {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  for (PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.begin(),
+       E = Impl->PassInfoMap.end(); I != E; ++I)
+    L->passEnumerate(I->second);
+}
+
+
+/// Analysis Group Mechanisms.
+void PassRegistry::registerAnalysisGroup(const void *InterfaceID, 
+                                         const void *PassID,
+                                         PassInfo& Registeree,
+                                         bool isDefault,
+                                         bool ShouldFree) {
+  PassInfo *InterfaceInfo =  const_cast<PassInfo*>(getPassInfo(InterfaceID));
+  if (InterfaceInfo == 0) {
+    // First reference to Interface, register it now.
+    registerPass(Registeree);
+    InterfaceInfo = &Registeree;
+  }
+  assert(Registeree.isAnalysisGroup() && 
+         "Trying to join an analysis group that is a normal pass!");
+
+  if (PassID) {
+    PassInfo *ImplementationInfo = const_cast<PassInfo*>(getPassInfo(PassID));
+    assert(ImplementationInfo &&
+           "Must register pass before adding to AnalysisGroup!");
+
+    sys::SmartScopedLock<true> Guard(*Lock);
+    
+    // Make sure we keep track of the fact that the implementation implements
+    // the interface.
+    ImplementationInfo->addInterfaceImplemented(InterfaceInfo);
+
+    PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+    PassRegistryImpl::AnalysisGroupInfo &AGI =
+      Impl->AnalysisGroupInfoMap[InterfaceInfo];
+    assert(AGI.Implementations.count(ImplementationInfo) == 0 &&
+           "Cannot add a pass to the same analysis group more than once!");
+    AGI.Implementations.insert(ImplementationInfo);
+    if (isDefault) {
+      assert(InterfaceInfo->getNormalCtor() == 0 &&
+             "Default implementation for analysis group already specified!");
+      assert(ImplementationInfo->getNormalCtor() &&
+           "Cannot specify pass as default if it does not have a default ctor");
+      InterfaceInfo->setNormalCtor(ImplementationInfo->getNormalCtor());
+    }
+  }
+  
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  if (ShouldFree) Impl->ToFree.push_back(&Registeree);
+}
+
+void PassRegistry::addRegistrationListener(PassRegistrationListener *L) {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  Impl->Listeners.push_back(L);
+}
+
+void PassRegistry::removeRegistrationListener(PassRegistrationListener *L) {
+  sys::SmartScopedLock<true> Guard(*Lock);
+  
+  // NOTE: This is necessary, because removeRegistrationListener() can be called
+  // as part of the llvm_shutdown sequence.  Since we have no control over the
+  // order of that sequence, we need to gracefully handle the case where the
+  // PassRegistry is destructed before the object that triggers this call.
+  if (!pImpl) return;
+  
+  PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl());
+  std::vector<PassRegistrationListener*>::iterator I =
+    std::find(Impl->Listeners.begin(), Impl->Listeners.end(), L);
+  assert(I != Impl->Listeners.end() &&
+         "PassRegistrationListener not registered!");
+  Impl->Listeners.erase(I);
+}
diff --git a/final/lib/VMCore/PrintModulePass.cpp b/final/lib/VMCore/PrintModulePass.cpp
new file mode 100644
index 00000000000..1f1fbc91bc3
--- /dev/null
+++ b/final/lib/VMCore/PrintModulePass.cpp
@@ -0,0 +1,101 @@
+//===--- VMCore/PrintModulePass.cpp - Module/Function Printer -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// PrintModulePass and PrintFunctionPass implementations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Assembly/PrintModulePass.h"
+
+#include "llvm/Function.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+  class PrintModulePass : public ModulePass {
+    std::string Banner;
+    raw_ostream *Out;       // raw_ostream to print on
+    bool DeleteStream;      // Delete the ostream in our dtor?
+  public:
+    static char ID;
+    PrintModulePass() : ModulePass(ID), Out(&dbgs()), 
+      DeleteStream(false) {}
+    PrintModulePass(const std::string &B, raw_ostream *o, bool DS)
+        : ModulePass(ID), Banner(B), Out(o), DeleteStream(DS) {}
+    
+    ~PrintModulePass() {
+      if (DeleteStream) delete Out;
+    }
+    
+    bool runOnModule(Module &M) {
+      (*Out) << Banner << M;
+      return false;
+    }
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+  
+  class PrintFunctionPass : public FunctionPass {
+    std::string Banner;     // String to print before each function
+    raw_ostream *Out;       // raw_ostream to print on
+    bool DeleteStream;      // Delete the ostream in our dtor?
+  public:
+    static char ID;
+    PrintFunctionPass() : FunctionPass(ID), Banner(""), Out(&dbgs()), 
+                          DeleteStream(false) {}
+    PrintFunctionPass(const std::string &B, raw_ostream *o, bool DS)
+      : FunctionPass(ID), Banner(B), Out(o), DeleteStream(DS) {}
+    
+    ~PrintFunctionPass() {
+      if (DeleteStream) delete Out;
+    }
+    
+    // runOnFunction - This pass just prints a banner followed by the
+    // function as it's processed.
+    //
+    bool runOnFunction(Function &F) {
+      (*Out) << Banner << static_cast<Value&>(F);
+      return false;
+    }
+    
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char PrintModulePass::ID = 0;
+INITIALIZE_PASS(PrintModulePass, "print-module",
+                "Print module to stderr", false, false)
+char PrintFunctionPass::ID = 0;
+INITIALIZE_PASS(PrintFunctionPass, "print-function",
+                "Print function to stderr", false, false)
+
+/// createPrintModulePass - Create and return a pass that writes the
+/// module to the specified raw_ostream.
+ModulePass *llvm::createPrintModulePass(llvm::raw_ostream *OS, 
+                                        bool DeleteStream,
+                                        const std::string &Banner) {
+  return new PrintModulePass(Banner, OS, DeleteStream);
+}
+
+/// createPrintFunctionPass - Create and return a pass that prints
+/// functions to the specified raw_ostream as they are processed.
+FunctionPass *llvm::createPrintFunctionPass(const std::string &Banner,
+                                            llvm::raw_ostream *OS, 
+                                            bool DeleteStream) {
+  return new PrintFunctionPass(Banner, OS, DeleteStream);
+}
+
diff --git a/final/lib/VMCore/SymbolTableListTraitsImpl.h b/final/lib/VMCore/SymbolTableListTraitsImpl.h
new file mode 100644
index 00000000000..72687bb5e0b
--- /dev/null
+++ b/final/lib/VMCore/SymbolTableListTraitsImpl.h
@@ -0,0 +1,118 @@
+//===-- llvm/SymbolTableListTraitsImpl.h - Implementation ------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the stickier parts of the SymbolTableListTraits class,
+// and is explicitly instantiated where needed to avoid defining all this code
+// in a widely used header.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYMBOLTABLELISTTRAITS_IMPL_H
+#define LLVM_SYMBOLTABLELISTTRAITS_IMPL_H
+
+#include "llvm/SymbolTableListTraits.h"
+#include "llvm/ValueSymbolTable.h"
+
+namespace llvm {
+
+/// setSymTabObject - This is called when (f.e.) the parent of a basic block
+/// changes.  This requires us to remove all the instruction symtab entries from
+/// the current function and reinsert them into the new function.
+template<typename ValueSubClass, typename ItemParentClass>
+template<typename TPtr>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::setSymTabObject(TPtr *Dest, TPtr Src) {
+  // Get the old symtab and value list before doing the assignment.
+  ValueSymbolTable *OldST = TraitsClass::getSymTab(getListOwner());
+
+  // Do it.
+  *Dest = Src;
+  
+  // Get the new SymTab object.
+  ValueSymbolTable *NewST = TraitsClass::getSymTab(getListOwner());
+  
+  // If there is nothing to do, quick exit.
+  if (OldST == NewST) return;
+  
+  // Move all the elements from the old symtab to the new one.
+  iplist<ValueSubClass> &ItemList = TraitsClass::getList(getListOwner());
+  if (ItemList.empty()) return;
+  
+  if (OldST) {
+    // Remove all entries from the previous symtab.
+    for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
+         I != ItemList.end(); ++I)
+      if (I->hasName())
+        OldST->removeValueName(I->getValueName());
+  }
+
+  if (NewST) {
+    // Add all of the items to the new symtab.
+    for (typename iplist<ValueSubClass>::iterator I = ItemList.begin();
+         I != ItemList.end(); ++I)
+      if (I->hasName())
+        NewST->reinsertValue(I);
+  }
+  
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::addNodeToList(ValueSubClass *V) {
+  assert(V->getParent() == 0 && "Value already in a container!!");
+  ItemParentClass *Owner = getListOwner();
+  V->setParent(Owner);
+  if (V->hasName())
+    if (ValueSymbolTable *ST = TraitsClass::getSymTab(Owner))
+      ST->reinsertValue(V);
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::removeNodeFromList(ValueSubClass *V) {
+  V->setParent(0);
+  if (V->hasName())
+    if (ValueSymbolTable *ST = TraitsClass::getSymTab(getListOwner()))
+      ST->removeValueName(V->getValueName());
+}
+
+template<typename ValueSubClass, typename ItemParentClass>
+void SymbolTableListTraits<ValueSubClass,ItemParentClass>
+::transferNodesFromList(ilist_traits<ValueSubClass> &L2,
+                        ilist_iterator<ValueSubClass> first,
+                        ilist_iterator<ValueSubClass> last) {
+  // We only have to do work here if transferring instructions between BBs
+  ItemParentClass *NewIP = getListOwner(), *OldIP = L2.getListOwner();
+  if (NewIP == OldIP) return;  // No work to do at all...
+
+  // We only have to update symbol table entries if we are transferring the
+  // instructions to a different symtab object...
+  ValueSymbolTable *NewST = TraitsClass::getSymTab(NewIP);
+  ValueSymbolTable *OldST = TraitsClass::getSymTab(OldIP);
+  if (NewST != OldST) {
+    for (; first != last; ++first) {
+      ValueSubClass &V = *first;
+      bool HasName = V.hasName();
+      if (OldST && HasName)
+        OldST->removeValueName(V.getValueName());
+      V.setParent(NewIP);
+      if (NewST && HasName)
+        NewST->reinsertValue(&V);
+    }
+  } else {
+    // Just transferring between blocks in the same function, simply update the
+    // parent fields in the instructions...
+    for (; first != last; ++first)
+      first->setParent(NewIP);
+  }
+}
+
+} // End llvm namespace
+
+#endif
diff --git a/final/lib/VMCore/Type.cpp b/final/lib/VMCore/Type.cpp
new file mode 100644
index 00000000000..be28ad1f712
--- /dev/null
+++ b/final/lib/VMCore/Type.cpp
@@ -0,0 +1,1230 @@
+//===-- Type.cpp - Implement the Type class -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Type class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Threading.h"
+#include <algorithm>
+#include <cstdarg>
+using namespace llvm;
+
+// DEBUG_MERGE_TYPES - Enable this #define to see how and when derived types are
+// created and later destroyed, all in an effort to make sure that there is only
+// a single canonical version of a type.
+//
+// #define DEBUG_MERGE_TYPES 1
+
+AbstractTypeUser::~AbstractTypeUser() {}
+
+void AbstractTypeUser::setType(Value *V, const Type *NewTy) {
+  V->VTy = NewTy;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Type Class Implementation
+//===----------------------------------------------------------------------===//
+
+/// Because of the way Type subclasses are allocated, this function is necessary
+/// to use the correct kind of "delete" operator to deallocate the Type object.
+/// Some type objects (FunctionTy, StructTy) allocate additional space
+/// after the space for their derived type to hold the contained types array of
+/// PATypeHandles. Using this allocation scheme means all the PATypeHandles are
+/// allocated with the type object, decreasing allocations and eliminating the
+/// need for a std::vector to be used in the Type class itself. 
+/// @brief Type destruction function
+void Type::destroy() const {
+  // Nothing calls getForwardedType from here on.
+  if (ForwardType && ForwardType->isAbstract()) {
+    ForwardType->dropRef();
+    ForwardType = NULL;
+  }
+
+  // Structures and Functions allocate their contained types past the end of
+  // the type object itself. These need to be destroyed differently than the
+  // other types.
+  if (this->isFunctionTy() || this->isStructTy()) {
+    // First, make sure we destruct any PATypeHandles allocated by these
+    // subclasses.  They must be manually destructed. 
+    for (unsigned i = 0; i < NumContainedTys; ++i)
+      ContainedTys[i].PATypeHandle::~PATypeHandle();
+
+    // Now call the destructor for the subclass directly because we're going
+    // to delete this as an array of char.
+    if (this->isFunctionTy())
+      static_cast<const FunctionType*>(this)->FunctionType::~FunctionType();
+    else {
+      assert(isStructTy());
+      static_cast<const StructType*>(this)->StructType::~StructType();
+    }
+
+    // Finally, remove the memory as an array deallocation of the chars it was
+    // constructed from.
+    operator delete(const_cast<Type *>(this));
+
+    return;
+  } else if (const OpaqueType *opaque_this = dyn_cast<OpaqueType>(this)) {
+    LLVMContextImpl *pImpl = this->getContext().pImpl;
+    pImpl->OpaqueTypes.erase(opaque_this);
+  }
+
+  // For all the other type subclasses, there is either no contained types or 
+  // just one (all Sequentials). For Sequentials, the PATypeHandle is not
+  // allocated past the type object, its included directly in the SequentialType
+  // class. This means we can safely just do "normal" delete of this object and
+  // all the destructors that need to run will be run.
+  delete this; 
+}
+
+const Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
+  switch (IDNumber) {
+  case VoidTyID      : return getVoidTy(C);
+  case FloatTyID     : return getFloatTy(C);
+  case DoubleTyID    : return getDoubleTy(C);
+  case X86_FP80TyID  : return getX86_FP80Ty(C);
+  case FP128TyID     : return getFP128Ty(C);
+  case PPC_FP128TyID : return getPPC_FP128Ty(C);
+  case LabelTyID     : return getLabelTy(C);
+  case MetadataTyID  : return getMetadataTy(C);
+  case X86_MMXTyID   : return getX86_MMXTy(C);
+  default:
+    return 0;
+  }
+}
+
+const Type *Type::getVAArgsPromotedType(LLVMContext &C) const {
+  if (ID == IntegerTyID && getSubclassData() < 32)
+    return Type::getInt32Ty(C);
+  else if (ID == FloatTyID)
+    return Type::getDoubleTy(C);
+  else
+    return this;
+}
+
+/// getScalarType - If this is a vector type, return the element type,
+/// otherwise return this.
+const Type *Type::getScalarType() const {
+  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getElementType();
+  return this;
+}
+
+/// isIntegerTy - Return true if this is an IntegerType of the specified width.
+bool Type::isIntegerTy(unsigned Bitwidth) const {
+  return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
+}
+
+/// isIntOrIntVectorTy - Return true if this is an integer type or a vector of
+/// integer types.
+///
+bool Type::isIntOrIntVectorTy() const {
+  if (isIntegerTy())
+    return true;
+  if (ID != Type::VectorTyID) return false;
+  
+  return cast<VectorType>(this)->getElementType()->isIntegerTy();
+}
+
+/// isFPOrFPVectorTy - Return true if this is a FP type or a vector of FP types.
+///
+bool Type::isFPOrFPVectorTy() const {
+  if (ID == Type::FloatTyID || ID == Type::DoubleTyID || 
+      ID == Type::FP128TyID || ID == Type::X86_FP80TyID || 
+      ID == Type::PPC_FP128TyID)
+    return true;
+  if (ID != Type::VectorTyID) return false;
+  
+  return cast<VectorType>(this)->getElementType()->isFloatingPointTy();
+}
+
+// canLosslesslyBitCastTo - Return true if this type can be converted to
+// 'Ty' without any reinterpretation of bits.  For example, i8* to i32*.
+//
+bool Type::canLosslesslyBitCastTo(const Type *Ty) const {
+  // Identity cast means no change so return true
+  if (this == Ty) 
+    return true;
+  
+  // They are not convertible unless they are at least first class types
+  if (!this->isFirstClassType() || !Ty->isFirstClassType())
+    return false;
+
+  // Vector -> Vector conversions are always lossless if the two vector types
+  // have the same size, otherwise not.  Also, 64-bit vector types can be
+  // converted to x86mmx.
+  if (const VectorType *thisPTy = dyn_cast<VectorType>(this)) {
+    if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+      return thisPTy->getBitWidth() == thatPTy->getBitWidth();
+    if (Ty->getTypeID() == Type::X86_MMXTyID &&
+        thisPTy->getBitWidth() == 64)
+      return true;
+  }
+
+  if (this->getTypeID() == Type::X86_MMXTyID)
+    if (const VectorType *thatPTy = dyn_cast<VectorType>(Ty))
+      if (thatPTy->getBitWidth() == 64)
+        return true;
+
+  // At this point we have only various mismatches of the first class types
+  // remaining and ptr->ptr. Just select the lossless conversions. Everything
+  // else is not lossless.
+  if (this->isPointerTy())
+    return Ty->isPointerTy();
+  return false;  // Other types have no identity values
+}
+
+unsigned Type::getPrimitiveSizeInBits() const {
+  switch (getTypeID()) {
+  case Type::FloatTyID: return 32;
+  case Type::DoubleTyID: return 64;
+  case Type::X86_FP80TyID: return 80;
+  case Type::FP128TyID: return 128;
+  case Type::PPC_FP128TyID: return 128;
+  case Type::X86_MMXTyID: return 64;
+  case Type::IntegerTyID: return cast<IntegerType>(this)->getBitWidth();
+  case Type::VectorTyID:  return cast<VectorType>(this)->getBitWidth();
+  default: return 0;
+  }
+}
+
+/// getScalarSizeInBits - If this is a vector type, return the
+/// getPrimitiveSizeInBits value for the element type. Otherwise return the
+/// getPrimitiveSizeInBits value for this type.
+unsigned Type::getScalarSizeInBits() const {
+  return getScalarType()->getPrimitiveSizeInBits();
+}
+
+/// getFPMantissaWidth - Return the width of the mantissa of this type.  This
+/// is only valid on floating point types.  If the FP type does not
+/// have a stable mantissa (e.g. ppc long double), this method returns -1.
+int Type::getFPMantissaWidth() const {
+  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getElementType()->getFPMantissaWidth();
+  assert(isFloatingPointTy() && "Not a floating point type!");
+  if (ID == FloatTyID) return 24;
+  if (ID == DoubleTyID) return 53;
+  if (ID == X86_FP80TyID) return 64;
+  if (ID == FP128TyID) return 113;
+  assert(ID == PPC_FP128TyID && "unknown fp type");
+  return -1;
+}
+
+/// isSizedDerivedType - Derived types like structures and arrays are sized
+/// iff all of the members of the type are sized as well.  Since asking for
+/// their size is relatively uncommon, move this operation out of line.
+bool Type::isSizedDerivedType() const {
+  if (this->isIntegerTy())
+    return true;
+
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(this))
+    return ATy->getElementType()->isSized();
+
+  if (const VectorType *PTy = dyn_cast<VectorType>(this))
+    return PTy->getElementType()->isSized();
+
+  if (!this->isStructTy()) 
+    return false;
+
+  // Okay, our struct is sized if all of the elements are...
+  for (subtype_iterator I = subtype_begin(), E = subtype_end(); I != E; ++I)
+    if (!(*I)->isSized()) 
+      return false;
+
+  return true;
+}
+
+/// getForwardedTypeInternal - This method is used to implement the union-find
+/// algorithm for when a type is being forwarded to another type.
+const Type *Type::getForwardedTypeInternal() const {
+  assert(ForwardType && "This type is not being forwarded to another type!");
+
+  // Check to see if the forwarded type has been forwarded on.  If so, collapse
+  // the forwarding links.
+  const Type *RealForwardedType = ForwardType->getForwardedType();
+  if (!RealForwardedType)
+    return ForwardType;  // No it's not forwarded again
+
+  // Yes, it is forwarded again.  First thing, add the reference to the new
+  // forward type.
+  if (RealForwardedType->isAbstract())
+    RealForwardedType->addRef();
+
+  // Now drop the old reference.  This could cause ForwardType to get deleted.
+  // ForwardType must be abstract because only abstract types can have their own
+  // ForwardTypes.
+  ForwardType->dropRef();
+
+  // Return the updated type.
+  ForwardType = RealForwardedType;
+  return ForwardType;
+}
+
+void Type::refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
+  llvm_unreachable("Attempting to refine a derived type!");
+}
+void Type::typeBecameConcrete(const DerivedType *AbsTy) {
+  llvm_unreachable("DerivedType is already a concrete type!");
+}
+
+
+std::string Type::getDescription() const {
+  LLVMContextImpl *pImpl = getContext().pImpl;
+  TypePrinting &Map =
+    isAbstract() ?
+      pImpl->AbstractTypeDescriptions :
+      pImpl->ConcreteTypeDescriptions;
+  
+  std::string DescStr;
+  raw_string_ostream DescOS(DescStr);
+  Map.print(this, DescOS);
+  return DescOS.str();
+}
+
+
+bool StructType::indexValid(const Value *V) const {
+  // Structure indexes require 32-bit integer constants.
+  if (V->getType()->isIntegerTy(32))
+    if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
+      return indexValid(CU->getZExtValue());
+  return false;
+}
+
+bool StructType::indexValid(unsigned V) const {
+  return V < NumContainedTys;
+}
+
+// getTypeAtIndex - Given an index value into the type, return the type of the
+// element.  For a structure type, this must be a constant value...
+//
+const Type *StructType::getTypeAtIndex(const Value *V) const {
+  unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
+  return getTypeAtIndex(Idx);
+}
+
+const Type *StructType::getTypeAtIndex(unsigned Idx) const {
+  assert(indexValid(Idx) && "Invalid structure index!");
+  return ContainedTys[Idx];
+}
+
+
+//===----------------------------------------------------------------------===//
+//                          Primitive 'Type' data
+//===----------------------------------------------------------------------===//
+
+const Type *Type::getVoidTy(LLVMContext &C) {
+  return &C.pImpl->VoidTy;
+}
+
+const Type *Type::getLabelTy(LLVMContext &C) {
+  return &C.pImpl->LabelTy;
+}
+
+const Type *Type::getFloatTy(LLVMContext &C) {
+  return &C.pImpl->FloatTy;
+}
+
+const Type *Type::getDoubleTy(LLVMContext &C) {
+  return &C.pImpl->DoubleTy;
+}
+
+const Type *Type::getMetadataTy(LLVMContext &C) {
+  return &C.pImpl->MetadataTy;
+}
+
+const Type *Type::getX86_FP80Ty(LLVMContext &C) {
+  return &C.pImpl->X86_FP80Ty;
+}
+
+const Type *Type::getFP128Ty(LLVMContext &C) {
+  return &C.pImpl->FP128Ty;
+}
+
+const Type *Type::getPPC_FP128Ty(LLVMContext &C) {
+  return &C.pImpl->PPC_FP128Ty;
+}
+
+const Type *Type::getX86_MMXTy(LLVMContext &C) {
+  return &C.pImpl->X86_MMXTy;
+}
+
+const IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
+  return IntegerType::get(C, N);
+}
+
+const IntegerType *Type::getInt1Ty(LLVMContext &C) {
+  return &C.pImpl->Int1Ty;
+}
+
+const IntegerType *Type::getInt8Ty(LLVMContext &C) {
+  return &C.pImpl->Int8Ty;
+}
+
+const IntegerType *Type::getInt16Ty(LLVMContext &C) {
+  return &C.pImpl->Int16Ty;
+}
+
+const IntegerType *Type::getInt32Ty(LLVMContext &C) {
+  return &C.pImpl->Int32Ty;
+}
+
+const IntegerType *Type::getInt64Ty(LLVMContext &C) {
+  return &C.pImpl->Int64Ty;
+}
+
+const PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) {
+  return getFloatTy(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getDoublePtrTy(LLVMContext &C, unsigned AS) {
+  return getDoubleTy(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getX86_FP80PtrTy(LLVMContext &C, unsigned AS) {
+  return getX86_FP80Ty(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getFP128PtrTy(LLVMContext &C, unsigned AS) {
+  return getFP128Ty(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) {
+  return getPPC_FP128Ty(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getX86_MMXPtrTy(LLVMContext &C, unsigned AS) {
+  return getX86_MMXTy(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) {
+  return getIntNTy(C, N)->getPointerTo(AS);
+}
+
+const PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt1Ty(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getInt8PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt8Ty(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getInt16PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt16Ty(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getInt32PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt32Ty(C)->getPointerTo(AS);
+}
+
+const PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) {
+  return getInt64Ty(C)->getPointerTo(AS);
+}
+
+//===----------------------------------------------------------------------===//
+//                          Derived Type Constructors
+//===----------------------------------------------------------------------===//
+
+/// isValidReturnType - Return true if the specified type is valid as a return
+/// type.
+bool FunctionType::isValidReturnType(const Type *RetTy) {
+  return !RetTy->isFunctionTy() && !RetTy->isLabelTy() &&
+         !RetTy->isMetadataTy();
+}
+
+/// isValidArgumentType - Return true if the specified type is valid as an
+/// argument type.
+bool FunctionType::isValidArgumentType(const Type *ArgTy) {
+  return ArgTy->isFirstClassType() || ArgTy->isOpaqueTy();
+}
+
+FunctionType::FunctionType(const Type *Result,
+                           const std::vector<const Type*> &Params,
+                           bool IsVarArgs)
+  : DerivedType(Result->getContext(), FunctionTyID), isVarArgs(IsVarArgs) {
+  ContainedTys = reinterpret_cast<PATypeHandle*>(this+1);
+  NumContainedTys = Params.size() + 1; // + 1 for result type
+  assert(isValidReturnType(Result) && "invalid return type for function");
+
+
+  bool isAbstract = Result->isAbstract();
+  new (&ContainedTys[0]) PATypeHandle(Result, this);
+
+  for (unsigned i = 0; i != Params.size(); ++i) {
+    assert(isValidArgumentType(Params[i]) &&
+           "Not a valid type for function argument!");
+    new (&ContainedTys[i+1]) PATypeHandle(Params[i], this);
+    isAbstract |= Params[i]->isAbstract();
+  }
+
+  // Calculate whether or not this type is abstract
+  setAbstract(isAbstract);
+}
+
+StructType::StructType(LLVMContext &C, 
+                       const std::vector<const Type*> &Types, bool isPacked)
+  : CompositeType(C, StructTyID) {
+  ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1);
+  NumContainedTys = Types.size();
+  setSubclassData(isPacked);
+  bool isAbstract = false;
+  for (unsigned i = 0; i < Types.size(); ++i) {
+    assert(Types[i] && "<null> type for structure field!");
+    assert(isValidElementType(Types[i]) &&
+           "Invalid type for structure element!");
+    new (&ContainedTys[i]) PATypeHandle(Types[i], this);
+    isAbstract |= Types[i]->isAbstract();
+  }
+
+  // Calculate whether or not this type is abstract
+  setAbstract(isAbstract);
+}
+
+ArrayType::ArrayType(const Type *ElType, uint64_t NumEl)
+  : SequentialType(ArrayTyID, ElType) {
+  NumElements = NumEl;
+
+  // Calculate whether or not this type is abstract
+  setAbstract(ElType->isAbstract());
+}
+
+VectorType::VectorType(const Type *ElType, unsigned NumEl)
+  : SequentialType(VectorTyID, ElType) {
+  NumElements = NumEl;
+  setAbstract(ElType->isAbstract());
+  assert(NumEl > 0 && "NumEl of a VectorType must be greater than 0");
+  assert(isValidElementType(ElType) &&
+         "Elements of a VectorType must be a primitive type");
+
+}
+
+
+PointerType::PointerType(const Type *E, unsigned AddrSpace)
+  : SequentialType(PointerTyID, E) {
+  AddressSpace = AddrSpace;
+  // Calculate whether or not this type is abstract
+  setAbstract(E->isAbstract());
+}
+
+OpaqueType::OpaqueType(LLVMContext &C) : DerivedType(C, OpaqueTyID) {
+  setAbstract(true);
+#ifdef DEBUG_MERGE_TYPES
+  DEBUG(dbgs() << "Derived new type: " << *this << "\n");
+#endif
+}
+
+void PATypeHolder::destroy() {
+  Ty = 0;
+}
+
+// dropAllTypeUses - When this (abstract) type is resolved to be equal to
+// another (more concrete) type, we must eliminate all references to other
+// types, to avoid some circular reference problems.
+void DerivedType::dropAllTypeUses() {
+  if (NumContainedTys != 0) {
+    // The type must stay abstract.  To do this, we insert a pointer to a type
+    // that will never get resolved, thus will always be abstract.
+    ContainedTys[0] = getContext().pImpl->AlwaysOpaqueTy;
+
+    // Change the rest of the types to be Int32Ty's.  It doesn't matter what we
+    // pick so long as it doesn't point back to this type.  We choose something
+    // concrete to avoid overhead for adding to AbstractTypeUser lists and
+    // stuff.
+    const Type *ConcreteTy = Type::getInt32Ty(getContext());
+    for (unsigned i = 1, e = NumContainedTys; i != e; ++i)
+      ContainedTys[i] = ConcreteTy;
+  }
+}
+
+
+namespace {
+
+/// TypePromotionGraph and graph traits - this is designed to allow us to do
+/// efficient SCC processing of type graphs.  This is the exact same as
+/// GraphTraits<Type*>, except that we pretend that concrete types have no
+/// children to avoid processing them.
+struct TypePromotionGraph {
+  Type *Ty;
+  TypePromotionGraph(Type *T) : Ty(T) {}
+};
+
+}
+
+namespace llvm {
+  template <> struct GraphTraits<TypePromotionGraph> {
+    typedef Type NodeType;
+    typedef Type::subtype_iterator ChildIteratorType;
+
+    static inline NodeType *getEntryNode(TypePromotionGraph G) { return G.Ty; }
+    static inline ChildIteratorType child_begin(NodeType *N) {
+      if (N->isAbstract())
+        return N->subtype_begin();
+      // No need to process children of concrete types.
+      return N->subtype_end();
+    }
+    static inline ChildIteratorType child_end(NodeType *N) {
+      return N->subtype_end();
+    }
+  };
+}
+
+
+// PromoteAbstractToConcrete - This is a recursive function that walks a type
+// graph calculating whether or not a type is abstract.
+//
+void Type::PromoteAbstractToConcrete() {
+  if (!isAbstract()) return;
+
+  scc_iterator<TypePromotionGraph> SI = scc_begin(TypePromotionGraph(this));
+  scc_iterator<TypePromotionGraph> SE = scc_end  (TypePromotionGraph(this));
+
+  for (; SI != SE; ++SI) {
+    std::vector<Type*> &SCC = *SI;
+
+    // Concrete types are leaves in the tree.  Since an SCC will either be all
+    // abstract or all concrete, we only need to check one type.
+    if (!SCC[0]->isAbstract()) continue;
+    
+    if (SCC[0]->isOpaqueTy())
+      return;     // Not going to be concrete, sorry.
+
+    // If all of the children of all of the types in this SCC are concrete,
+    // then this SCC is now concrete as well.  If not, neither this SCC, nor
+    // any parent SCCs will be concrete, so we might as well just exit.
+    for (unsigned i = 0, e = SCC.size(); i != e; ++i)
+      for (Type::subtype_iterator CI = SCC[i]->subtype_begin(),
+             E = SCC[i]->subtype_end(); CI != E; ++CI)
+        if ((*CI)->isAbstract())
+          // If the child type is in our SCC, it doesn't make the entire SCC
+          // abstract unless there is a non-SCC abstract type.
+          if (std::find(SCC.begin(), SCC.end(), *CI) == SCC.end())
+            return;               // Not going to be concrete, sorry.
+
+    // Okay, we just discovered this whole SCC is now concrete, mark it as
+    // such!
+    for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+      assert(SCC[i]->isAbstract() && "Why are we processing concrete types?");
+
+      SCC[i]->setAbstract(false);
+    }
+
+    for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
+      assert(!SCC[i]->isAbstract() && "Concrete type became abstract?");
+      // The type just became concrete, notify all users!
+      cast<DerivedType>(SCC[i])->notifyUsesThatTypeBecameConcrete();
+    }
+  }
+}
+
+
+//===----------------------------------------------------------------------===//
+//                      Type Structural Equality Testing
+//===----------------------------------------------------------------------===//
+
+// TypesEqual - Two types are considered structurally equal if they have the
+// same "shape": Every level and element of the types have identical primitive
+// ID's, and the graphs have the same edges/nodes in them.  Nodes do not have to
+// be pointer equals to be equivalent though.  This uses an optimistic algorithm
+// that assumes that two graphs are the same until proven otherwise.
+//
+static bool TypesEqual(const Type *Ty, const Type *Ty2,
+                       std::map<const Type *, const Type *> &EqTypes) {
+  if (Ty == Ty2) return true;
+  if (Ty->getTypeID() != Ty2->getTypeID()) return false;
+  if (Ty->isOpaqueTy())
+    return false;  // Two unequal opaque types are never equal
+
+  std::map<const Type*, const Type*>::iterator It = EqTypes.find(Ty);
+  if (It != EqTypes.end())
+    return It->second == Ty2;    // Looping back on a type, check for equality
+
+  // Otherwise, add the mapping to the table to make sure we don't get
+  // recursion on the types...
+  EqTypes.insert(It, std::make_pair(Ty, Ty2));
+
+  // Two really annoying special cases that breaks an otherwise nice simple
+  // algorithm is the fact that arraytypes have sizes that differentiates types,
+  // and that function types can be varargs or not.  Consider this now.
+  //
+  if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
+    const IntegerType *ITy2 = cast<IntegerType>(Ty2);
+    return ITy->getBitWidth() == ITy2->getBitWidth();
+  }
+  
+  if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
+    const PointerType *PTy2 = cast<PointerType>(Ty2);
+    return PTy->getAddressSpace() == PTy2->getAddressSpace() &&
+           TypesEqual(PTy->getElementType(), PTy2->getElementType(), EqTypes);
+  }
+  
+  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    const StructType *STy2 = cast<StructType>(Ty2);
+    if (STy->getNumElements() != STy2->getNumElements()) return false;
+    if (STy->isPacked() != STy2->isPacked()) return false;
+    for (unsigned i = 0, e = STy2->getNumElements(); i != e; ++i)
+      if (!TypesEqual(STy->getElementType(i), STy2->getElementType(i), EqTypes))
+        return false;
+    return true;
+  }
+  
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+    const ArrayType *ATy2 = cast<ArrayType>(Ty2);
+    return ATy->getNumElements() == ATy2->getNumElements() &&
+           TypesEqual(ATy->getElementType(), ATy2->getElementType(), EqTypes);
+  }
+  
+  if (const VectorType *PTy = dyn_cast<VectorType>(Ty)) {
+    const VectorType *PTy2 = cast<VectorType>(Ty2);
+    return PTy->getNumElements() == PTy2->getNumElements() &&
+           TypesEqual(PTy->getElementType(), PTy2->getElementType(), EqTypes);
+  }
+  
+  if (const FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
+    const FunctionType *FTy2 = cast<FunctionType>(Ty2);
+    if (FTy->isVarArg() != FTy2->isVarArg() ||
+        FTy->getNumParams() != FTy2->getNumParams() ||
+        !TypesEqual(FTy->getReturnType(), FTy2->getReturnType(), EqTypes))
+      return false;
+    for (unsigned i = 0, e = FTy2->getNumParams(); i != e; ++i) {
+      if (!TypesEqual(FTy->getParamType(i), FTy2->getParamType(i), EqTypes))
+        return false;
+    }
+    return true;
+  }
+  
+  llvm_unreachable("Unknown derived type!");
+  return false;
+}
+
+namespace llvm { // in namespace llvm so findable by ADL
+static bool TypesEqual(const Type *Ty, const Type *Ty2) {
+  std::map<const Type *, const Type *> EqTypes;
+  return ::TypesEqual(Ty, Ty2, EqTypes);
+}
+}
+
+// AbstractTypeHasCycleThrough - Return true there is a path from CurTy to
+// TargetTy in the type graph.  We know that Ty is an abstract type, so if we
+// ever reach a non-abstract type, we know that we don't need to search the
+// subgraph.
+static bool AbstractTypeHasCycleThrough(const Type *TargetTy, const Type *CurTy,
+                                SmallPtrSet<const Type*, 128> &VisitedTypes) {
+  if (TargetTy == CurTy) return true;
+  if (!CurTy->isAbstract()) return false;
+
+  if (!VisitedTypes.insert(CurTy))
+    return false;  // Already been here.
+
+  for (Type::subtype_iterator I = CurTy->subtype_begin(),
+       E = CurTy->subtype_end(); I != E; ++I)
+    if (AbstractTypeHasCycleThrough(TargetTy, *I, VisitedTypes))
+      return true;
+  return false;
+}
+
+static bool ConcreteTypeHasCycleThrough(const Type *TargetTy, const Type *CurTy,
+                                SmallPtrSet<const Type*, 128> &VisitedTypes) {
+  if (TargetTy == CurTy) return true;
+
+  if (!VisitedTypes.insert(CurTy))
+    return false;  // Already been here.
+
+  for (Type::subtype_iterator I = CurTy->subtype_begin(),
+       E = CurTy->subtype_end(); I != E; ++I)
+    if (ConcreteTypeHasCycleThrough(TargetTy, *I, VisitedTypes))
+      return true;
+  return false;
+}
+
+/// TypeHasCycleThroughItself - Return true if the specified type has
+/// a cycle back to itself.
+
+namespace llvm { // in namespace llvm so it's findable by ADL
+static bool TypeHasCycleThroughItself(const Type *Ty) {
+  SmallPtrSet<const Type*, 128> VisitedTypes;
+
+  if (Ty->isAbstract()) {  // Optimized case for abstract types.
+    for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+         I != E; ++I)
+      if (AbstractTypeHasCycleThrough(Ty, *I, VisitedTypes))
+        return true;
+  } else {
+    for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+         I != E; ++I)
+      if (ConcreteTypeHasCycleThrough(Ty, *I, VisitedTypes))
+        return true;
+  }
+  return false;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Function Type Factory and Value Class...
+//
+const IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
+  assert(NumBits >= MIN_INT_BITS && "bitwidth too small");
+  assert(NumBits <= MAX_INT_BITS && "bitwidth too large");
+
+  // Check for the built-in integer types
+  switch (NumBits) {
+  case  1: return cast<IntegerType>(Type::getInt1Ty(C));
+  case  8: return cast<IntegerType>(Type::getInt8Ty(C));
+  case 16: return cast<IntegerType>(Type::getInt16Ty(C));
+  case 32: return cast<IntegerType>(Type::getInt32Ty(C));
+  case 64: return cast<IntegerType>(Type::getInt64Ty(C));
+  default: 
+    break;
+  }
+
+  LLVMContextImpl *pImpl = C.pImpl;
+  
+  IntegerValType IVT(NumBits);
+  IntegerType *ITy = 0;
+  
+  // First, see if the type is already in the table, for which
+  // a reader lock suffices.
+  ITy = pImpl->IntegerTypes.get(IVT);
+    
+  if (!ITy) {
+    // Value not found.  Derive a new type!
+    ITy = new IntegerType(C, NumBits);
+    pImpl->IntegerTypes.add(IVT, ITy);
+  }
+#ifdef DEBUG_MERGE_TYPES
+  DEBUG(dbgs() << "Derived new type: " << *ITy << "\n");
+#endif
+  return ITy;
+}
+
+bool IntegerType::isPowerOf2ByteWidth() const {
+  unsigned BitWidth = getBitWidth();
+  return (BitWidth > 7) && isPowerOf2_32(BitWidth);
+}
+
+APInt IntegerType::getMask() const {
+  return APInt::getAllOnesValue(getBitWidth());
+}
+
+FunctionValType FunctionValType::get(const FunctionType *FT) {
+  // Build up a FunctionValType
+  std::vector<const Type *> ParamTypes;
+  ParamTypes.reserve(FT->getNumParams());
+  for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i)
+    ParamTypes.push_back(FT->getParamType(i));
+  return FunctionValType(FT->getReturnType(), ParamTypes, FT->isVarArg());
+}
+
+
+// FunctionType::get - The factory function for the FunctionType class...
+FunctionType *FunctionType::get(const Type *ReturnType,
+                                const std::vector<const Type*> &Params,
+                                bool isVarArg) {
+  FunctionValType VT(ReturnType, Params, isVarArg);
+  FunctionType *FT = 0;
+  
+  LLVMContextImpl *pImpl = ReturnType->getContext().pImpl;
+  
+  FT = pImpl->FunctionTypes.get(VT);
+  
+  if (!FT) {
+    FT = (FunctionType*) operator new(sizeof(FunctionType) +
+                                    sizeof(PATypeHandle)*(Params.size()+1));
+    new (FT) FunctionType(ReturnType, Params, isVarArg);
+    pImpl->FunctionTypes.add(VT, FT);
+  }
+
+#ifdef DEBUG_MERGE_TYPES
+  DEBUG(dbgs() << "Derived new type: " << FT << "\n");
+#endif
+  return FT;
+}
+
+ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) {
+  assert(ElementType && "Can't get array of <null> types!");
+  assert(isValidElementType(ElementType) && "Invalid type for array element!");
+
+  ArrayValType AVT(ElementType, NumElements);
+  ArrayType *AT = 0;
+
+  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
+  
+  AT = pImpl->ArrayTypes.get(AVT);
+      
+  if (!AT) {
+    // Value not found.  Derive a new type!
+    pImpl->ArrayTypes.add(AVT, AT = new ArrayType(ElementType, NumElements));
+  }
+#ifdef DEBUG_MERGE_TYPES
+  DEBUG(dbgs() << "Derived new type: " << *AT << "\n");
+#endif
+  return AT;
+}
+
+bool ArrayType::isValidElementType(const Type *ElemTy) {
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+}
+
+VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
+  assert(ElementType && "Can't get vector of <null> types!");
+
+  VectorValType PVT(ElementType, NumElements);
+  VectorType *PT = 0;
+  
+  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
+  
+  PT = pImpl->VectorTypes.get(PVT);
+    
+  if (!PT) {
+    pImpl->VectorTypes.add(PVT, PT = new VectorType(ElementType, NumElements));
+  }
+#ifdef DEBUG_MERGE_TYPES
+  DEBUG(dbgs() << "Derived new type: " << *PT << "\n");
+#endif
+  return PT;
+}
+
+bool VectorType::isValidElementType(const Type *ElemTy) {
+  return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy() ||
+         ElemTy->isOpaqueTy();
+}
+
+//===----------------------------------------------------------------------===//
+// Struct Type Factory...
+//
+
+StructType *StructType::get(LLVMContext &Context,
+                            const std::vector<const Type*> &ETypes, 
+                            bool isPacked) {
+  StructValType STV(ETypes, isPacked);
+  StructType *ST = 0;
+  
+  LLVMContextImpl *pImpl = Context.pImpl;
+  
+  ST = pImpl->StructTypes.get(STV);
+    
+  if (!ST) {
+    // Value not found.  Derive a new type!
+    ST = (StructType*) operator new(sizeof(StructType) +
+                                    sizeof(PATypeHandle) * ETypes.size());
+    new (ST) StructType(Context, ETypes, isPacked);
+    pImpl->StructTypes.add(STV, ST);
+  }
+#ifdef DEBUG_MERGE_TYPES
+  DEBUG(dbgs() << "Derived new type: " << *ST << "\n");
+#endif
+  return ST;
+}
+
+StructType *StructType::get(LLVMContext &Context, const Type *type, ...) {
+  va_list ap;
+  std::vector<const llvm::Type*> StructFields;
+  va_start(ap, type);
+  while (type) {
+    StructFields.push_back(type);
+    type = va_arg(ap, llvm::Type*);
+  }
+  return llvm::StructType::get(Context, StructFields);
+}
+
+bool StructType::isValidElementType(const Type *ElemTy) {
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pointer Type Factory...
+//
+
+PointerType *PointerType::get(const Type *ValueType, unsigned AddressSpace) {
+  assert(ValueType && "Can't get a pointer to <null> type!");
+  assert(ValueType->getTypeID() != VoidTyID &&
+         "Pointer to void is not valid, use i8* instead!");
+  assert(isValidElementType(ValueType) && "Invalid type for pointer element!");
+  PointerValType PVT(ValueType, AddressSpace);
+
+  PointerType *PT = 0;
+  
+  LLVMContextImpl *pImpl = ValueType->getContext().pImpl;
+  
+  PT = pImpl->PointerTypes.get(PVT);
+  
+  if (!PT) {
+    // Value not found.  Derive a new type!
+    pImpl->PointerTypes.add(PVT, PT = new PointerType(ValueType, AddressSpace));
+  }
+#ifdef DEBUG_MERGE_TYPES
+  DEBUG(dbgs() << "Derived new type: " << *PT << "\n");
+#endif
+  return PT;
+}
+
+const PointerType *Type::getPointerTo(unsigned addrs) const {
+  return PointerType::get(this, addrs);
+}
+
+bool PointerType::isValidElementType(const Type *ElemTy) {
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy();
+}
+
+
+//===----------------------------------------------------------------------===//
+// Opaque Type Factory...
+//
+
+OpaqueType *OpaqueType::get(LLVMContext &C) {
+  OpaqueType *OT = new OpaqueType(C);       // All opaque types are distinct.
+  LLVMContextImpl *pImpl = C.pImpl;
+  pImpl->OpaqueTypes.insert(OT);
+  return OT;
+}
+
+
+
+//===----------------------------------------------------------------------===//
+//                     Derived Type Refinement Functions
+//===----------------------------------------------------------------------===//
+
+// addAbstractTypeUser - Notify an abstract type that there is a new user of
+// it.  This function is called primarily by the PATypeHandle class.
+void Type::addAbstractTypeUser(AbstractTypeUser *U) const {
+  assert(isAbstract() && "addAbstractTypeUser: Current type not abstract!");
+  AbstractTypeUsers.push_back(U);
+}
+
+
+// removeAbstractTypeUser - Notify an abstract type that a user of the class
+// no longer has a handle to the type.  This function is called primarily by
+// the PATypeHandle class.  When there are no users of the abstract type, it
+// is annihilated, because there is no way to get a reference to it ever again.
+//
+void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
+  
+  // Search from back to front because we will notify users from back to
+  // front.  Also, it is likely that there will be a stack like behavior to
+  // users that register and unregister users.
+  //
+  unsigned i;
+  for (i = AbstractTypeUsers.size(); AbstractTypeUsers[i-1] != U; --i)
+    assert(i != 0 && "AbstractTypeUser not in user list!");
+
+  --i;  // Convert to be in range 0 <= i < size()
+  assert(i < AbstractTypeUsers.size() && "Index out of range!");  // Wraparound?
+
+  AbstractTypeUsers.erase(AbstractTypeUsers.begin()+i);
+
+#ifdef DEBUG_MERGE_TYPES
+  DEBUG(dbgs() << "  remAbstractTypeUser[" << (void*)this << ", "
+               << *this << "][" << i << "] User = " << U << "\n");
+#endif
+
+  if (AbstractTypeUsers.empty() && getRefCount() == 0 && isAbstract()) {
+#ifdef DEBUG_MERGE_TYPES
+    DEBUG(dbgs() << "DELETEing unused abstract type: <" << *this
+                 << ">[" << (void*)this << "]" << "\n");
+#endif
+  
+    this->destroy();
+  }
+}
+
+// refineAbstractTypeTo - This function is used when it is discovered
+// that the 'this' abstract type is actually equivalent to the NewType
+// specified. This causes all users of 'this' to switch to reference the more 
+// concrete type NewType and for 'this' to be deleted.  Only used for internal
+// callers.
+//
+void DerivedType::refineAbstractTypeTo(const Type *NewType) {
+  assert(isAbstract() && "refineAbstractTypeTo: Current type is not abstract!");
+  assert(this != NewType && "Can't refine to myself!");
+  assert(ForwardType == 0 && "This type has already been refined!");
+
+  LLVMContextImpl *pImpl = getContext().pImpl;
+
+  // The descriptions may be out of date.  Conservatively clear them all!
+  pImpl->AbstractTypeDescriptions.clear();
+
+#ifdef DEBUG_MERGE_TYPES
+  DEBUG(dbgs() << "REFINING abstract type [" << (void*)this << " "
+               << *this << "] to [" << (void*)NewType << " "
+               << *NewType << "]!\n");
+#endif
+
+  // Make sure to put the type to be refined to into a holder so that if IT gets
+  // refined, that we will not continue using a dead reference...
+  //
+  PATypeHolder NewTy(NewType);
+  // Any PATypeHolders referring to this type will now automatically forward to
+  // the type we are resolved to.
+  ForwardType = NewType;
+  if (ForwardType->isAbstract())
+    ForwardType->addRef();
+
+  // Add a self use of the current type so that we don't delete ourself until
+  // after the function exits.
+  //
+  PATypeHolder CurrentTy(this);
+
+  // To make the situation simpler, we ask the subclass to remove this type from
+  // the type map, and to replace any type uses with uses of non-abstract types.
+  // This dramatically limits the amount of recursive type trouble we can find
+  // ourselves in.
+  dropAllTypeUses();
+
+  // Iterate over all of the uses of this type, invoking callback.  Each user
+  // should remove itself from our use list automatically.  We have to check to
+  // make sure that NewTy doesn't _become_ 'this'.  If it does, resolving types
+  // will not cause users to drop off of the use list.  If we resolve to ourself
+  // we succeed!
+  //
+  while (!AbstractTypeUsers.empty() && NewTy != this) {
+    AbstractTypeUser *User = AbstractTypeUsers.back();
+
+    unsigned OldSize = AbstractTypeUsers.size(); (void)OldSize;
+#ifdef DEBUG_MERGE_TYPES
+    DEBUG(dbgs() << " REFINING user " << OldSize-1 << "[" << (void*)User
+                 << "] of abstract type [" << (void*)this << " "
+                 << *this << "] to [" << (void*)NewTy.get() << " "
+                 << *NewTy << "]!\n");
+#endif
+    User->refineAbstractType(this, NewTy);
+
+    assert(AbstractTypeUsers.size() != OldSize &&
+           "AbsTyUser did not remove self from user list!");
+  }
+
+  // If we were successful removing all users from the type, 'this' will be
+  // deleted when the last PATypeHolder is destroyed or updated from this type.
+  // This may occur on exit of this function, as the CurrentTy object is
+  // destroyed.
+}
+
+// notifyUsesThatTypeBecameConcrete - Notify AbstractTypeUsers of this type that
+// the current type has transitioned from being abstract to being concrete.
+//
+void DerivedType::notifyUsesThatTypeBecameConcrete() {
+#ifdef DEBUG_MERGE_TYPES
+  DEBUG(dbgs() << "typeIsREFINED type: " << (void*)this << " " << *this <<"\n");
+#endif
+
+  unsigned OldSize = AbstractTypeUsers.size(); (void)OldSize;
+  while (!AbstractTypeUsers.empty()) {
+    AbstractTypeUser *ATU = AbstractTypeUsers.back();
+    ATU->typeBecameConcrete(this);
+
+    assert(AbstractTypeUsers.size() < OldSize-- &&
+           "AbstractTypeUser did not remove itself from the use list!");
+  }
+}
+
+// refineAbstractType - Called when a contained type is found to be more
+// concrete - this could potentially change us from an abstract type to a
+// concrete type.
+//
+void FunctionType::refineAbstractType(const DerivedType *OldType,
+                                      const Type *NewType) {
+  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
+  pImpl->FunctionTypes.RefineAbstractType(this, OldType, NewType);
+}
+
+void FunctionType::typeBecameConcrete(const DerivedType *AbsTy) {
+  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
+  pImpl->FunctionTypes.TypeBecameConcrete(this, AbsTy);
+}
+
+
+// refineAbstractType - Called when a contained type is found to be more
+// concrete - this could potentially change us from an abstract type to a
+// concrete type.
+//
+void ArrayType::refineAbstractType(const DerivedType *OldType,
+                                   const Type *NewType) {
+  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
+  pImpl->ArrayTypes.RefineAbstractType(this, OldType, NewType);
+}
+
+void ArrayType::typeBecameConcrete(const DerivedType *AbsTy) {
+  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
+  pImpl->ArrayTypes.TypeBecameConcrete(this, AbsTy);
+}
+
+// refineAbstractType - Called when a contained type is found to be more
+// concrete - this could potentially change us from an abstract type to a
+// concrete type.
+//
+void VectorType::refineAbstractType(const DerivedType *OldType,
+                                   const Type *NewType) {
+  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
+  pImpl->VectorTypes.RefineAbstractType(this, OldType, NewType);
+}
+
+void VectorType::typeBecameConcrete(const DerivedType *AbsTy) {
+  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
+  pImpl->VectorTypes.TypeBecameConcrete(this, AbsTy);
+}
+
+// refineAbstractType - Called when a contained type is found to be more
+// concrete - this could potentially change us from an abstract type to a
+// concrete type.
+//
+void StructType::refineAbstractType(const DerivedType *OldType,
+                                    const Type *NewType) {
+  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
+  pImpl->StructTypes.RefineAbstractType(this, OldType, NewType);
+}
+
+void StructType::typeBecameConcrete(const DerivedType *AbsTy) {
+  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
+  pImpl->StructTypes.TypeBecameConcrete(this, AbsTy);
+}
+
+// refineAbstractType - Called when a contained type is found to be more
+// concrete - this could potentially change us from an abstract type to a
+// concrete type.
+//
+void PointerType::refineAbstractType(const DerivedType *OldType,
+                                     const Type *NewType) {
+  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
+  pImpl->PointerTypes.RefineAbstractType(this, OldType, NewType);
+}
+
+void PointerType::typeBecameConcrete(const DerivedType *AbsTy) {
+  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
+  pImpl->PointerTypes.TypeBecameConcrete(this, AbsTy);
+}
+
+bool SequentialType::indexValid(const Value *V) const {
+  if (V->getType()->isIntegerTy()) 
+    return true;
+  return false;
+}
+
+namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS, const Type &T) {
+  T.print(OS);
+  return OS;
+}
+}
diff --git a/final/lib/VMCore/TypeSymbolTable.cpp b/final/lib/VMCore/TypeSymbolTable.cpp
new file mode 100644
index 00000000000..d68a44bd671
--- /dev/null
+++ b/final/lib/VMCore/TypeSymbolTable.cpp
@@ -0,0 +1,169 @@
+//===-- TypeSymbolTable.cpp - Implement the TypeSymbolTable class ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TypeSymbolTable class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+using namespace llvm;
+
+#define DEBUG_SYMBOL_TABLE 0
+#define DEBUG_ABSTYPE 0
+
+TypeSymbolTable::~TypeSymbolTable() {
+  // Drop all abstract type references in the type plane...
+  for (iterator TI = tmap.begin(), TE = tmap.end(); TI != TE; ++TI) {
+    if (TI->second->isAbstract())   // If abstract, drop the reference...
+      cast<DerivedType>(TI->second)->removeAbstractTypeUser(this);
+  }
+}
+
+std::string TypeSymbolTable::getUniqueName(StringRef BaseName) const {
+  std::string TryName = BaseName;
+  
+  const_iterator End = tmap.end();
+
+  // See if the name exists
+  while (tmap.find(TryName) != End)            // Loop until we find a free
+    TryName = BaseName.str() + utostr(++LastUnique); // name in the symbol table
+  return TryName;
+}
+
+// lookup a type by name - returns null on failure
+Type* TypeSymbolTable::lookup(StringRef Name) const {
+  const_iterator TI = tmap.find(Name);
+  Type* result = 0;
+  if (TI != tmap.end())
+    result = const_cast<Type*>(TI->second);
+  return result;
+}
+
+// remove - Remove a type from the symbol table...
+Type* TypeSymbolTable::remove(iterator Entry) {
+  assert(Entry != tmap.end() && "Invalid entry to remove!");
+  const Type* Result = Entry->second;
+
+#if DEBUG_SYMBOL_TABLE
+  dump();
+  dbgs() << " Removing Value: " << Result->getDescription() << "\n";
+#endif
+
+  tmap.erase(Entry);
+  
+  // If we are removing an abstract type, remove the symbol table from it's use
+  // list...
+  if (Result->isAbstract()) {
+#if DEBUG_ABSTYPE
+    dbgs() << "Removing abstract type from symtab"
+           << Result->getDescription()
+           << "\n";
+#endif
+    cast<DerivedType>(Result)->removeAbstractTypeUser(this);
+  }
+
+  return const_cast<Type*>(Result);
+}
+
+
+// insert - Insert a type into the symbol table with the specified name...
+void TypeSymbolTable::insert(StringRef Name, const Type* T) {
+  assert(T && "Can't insert null type into symbol table!");
+
+  if (tmap.insert(std::make_pair(Name, T)).second) {
+    // Type inserted fine with no conflict.
+    
+#if DEBUG_SYMBOL_TABLE
+    dump();
+    dbgs() << " Inserted type: " << Name << ": " << T->getDescription() << "\n";
+#endif
+  } else {
+    // If there is a name conflict...
+    
+    // Check to see if there is a naming conflict.  If so, rename this type!
+    std::string UniqueName = Name;
+    if (lookup(Name))
+      UniqueName = getUniqueName(Name);
+    
+#if DEBUG_SYMBOL_TABLE
+    dump();
+    dbgs() << " Inserting type: " << UniqueName << ": "
+           << T->getDescription() << "\n";
+#endif
+
+    // Insert the tmap entry
+    tmap.insert(make_pair(UniqueName, T));
+  }
+  
+  // If we are adding an abstract type, add the symbol table to it's use list.
+  if (T->isAbstract()) {
+    cast<DerivedType>(T)->addAbstractTypeUser(this);
+#if DEBUG_ABSTYPE
+    dbgs() << "Added abstract type to ST: " << T->getDescription() << "\n";
+#endif
+  }
+}
+
+// This function is called when one of the types in the type plane are refined
+void TypeSymbolTable::refineAbstractType(const DerivedType *OldType,
+                                         const Type *NewType) {
+  // Loop over all of the types in the symbol table, replacing any references
+  // to OldType with references to NewType.  Note that there may be multiple
+  // occurrences, and although we only need to remove one at a time, it's
+  // faster to remove them all in one pass.
+  //
+  for (iterator I = begin(), E = end(); I != E; ++I) {
+    // FIXME when Types aren't const.
+    if (I->second == const_cast<DerivedType *>(OldType)) {
+#if DEBUG_ABSTYPE
+      dbgs() << "Removing type " << OldType->getDescription() << "\n";
+#endif
+      OldType->removeAbstractTypeUser(this);
+
+      // TODO FIXME when types aren't const
+      I->second = const_cast<Type *>(NewType);
+      if (NewType->isAbstract()) {
+#if DEBUG_ABSTYPE
+        dbgs() << "Added type " << NewType->getDescription() << "\n";
+#endif
+        cast<DerivedType>(NewType)->addAbstractTypeUser(this);
+      }
+    }
+  }
+}
+
+
+// Handle situation where type becomes Concreate from Abstract
+void TypeSymbolTable::typeBecameConcrete(const DerivedType *AbsTy) {
+  // Loop over all of the types in the symbol table, dropping any abstract
+  // type user entries for AbsTy which occur because there are names for the
+  // type.
+  for (iterator TI = begin(), TE = end(); TI != TE; ++TI)
+    if (TI->second == const_cast<Type*>(static_cast<const Type*>(AbsTy)))
+      AbsTy->removeAbstractTypeUser(this);
+}
+
+static void DumpTypes(const std::pair<const std::string, const Type*>& T ) {
+  dbgs() << "  '" << T.first << "' = ";
+  T.second->dump();
+  dbgs() << "\n";
+}
+
+void TypeSymbolTable::dump() const {
+  dbgs() << "TypeSymbolPlane: ";
+  for_each(tmap.begin(), tmap.end(), DumpTypes);
+}
+
diff --git a/final/lib/VMCore/TypesContext.h b/final/lib/VMCore/TypesContext.h
new file mode 100644
index 00000000000..4694486c41b
--- /dev/null
+++ b/final/lib/VMCore/TypesContext.h
@@ -0,0 +1,425 @@
+//===-- TypesContext.h - Types-related Context Internals ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines various helper methods and classes used by
+// LLVMContextImpl for creating and managing types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TYPESCONTEXT_H
+#define LLVM_TYPESCONTEXT_H
+
+#include "llvm/ADT/STLExtras.h"
+#include <map>
+
+
+//===----------------------------------------------------------------------===//
+//                       Derived Type Factory Functions
+//===----------------------------------------------------------------------===//
+namespace llvm {
+
+/// getSubElementHash - Generate a hash value for all of the SubType's of this
+/// type.  The hash value is guaranteed to be zero if any of the subtypes are 
+/// an opaque type.  Otherwise we try to mix them in as well as possible, but do
+/// not look at the subtype's subtype's.
+static unsigned getSubElementHash(const Type *Ty) {
+  unsigned HashVal = 0;
+  for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+       I != E; ++I) {
+    HashVal *= 32;
+    const Type *SubTy = I->get();
+    HashVal += SubTy->getTypeID();
+    switch (SubTy->getTypeID()) {
+    default: break;
+    case Type::OpaqueTyID: return 0;    // Opaque -> hash = 0 no matter what.
+    case Type::IntegerTyID:
+      HashVal ^= (cast<IntegerType>(SubTy)->getBitWidth() << 3);
+      break;
+    case Type::FunctionTyID:
+      HashVal ^= cast<FunctionType>(SubTy)->getNumParams()*2 + 
+                 cast<FunctionType>(SubTy)->isVarArg();
+      break;
+    case Type::ArrayTyID:
+      HashVal ^= cast<ArrayType>(SubTy)->getNumElements();
+      break;
+    case Type::VectorTyID:
+      HashVal ^= cast<VectorType>(SubTy)->getNumElements();
+      break;
+    case Type::StructTyID:
+      HashVal ^= cast<StructType>(SubTy)->getNumElements();
+      break;
+    case Type::PointerTyID:
+      HashVal ^= cast<PointerType>(SubTy)->getAddressSpace();
+      break;
+    }
+  }
+  return HashVal ? HashVal : 1;  // Do not return zero unless opaque subty.
+}
+
+//===----------------------------------------------------------------------===//
+// Integer Type Factory...
+//
+class IntegerValType {
+  uint32_t bits;
+public:
+  IntegerValType(uint32_t numbits) : bits(numbits) {}
+
+  static IntegerValType get(const IntegerType *Ty) {
+    return IntegerValType(Ty->getBitWidth());
+  }
+
+  static unsigned hashTypeStructure(const IntegerType *Ty) {
+    return (unsigned)Ty->getBitWidth();
+  }
+
+  inline bool operator<(const IntegerValType &IVT) const {
+    return bits < IVT.bits;
+  }
+};
+
+// PointerValType - Define a class to hold the key that goes into the TypeMap
+//
+class PointerValType {
+  const Type *ValTy;
+  unsigned AddressSpace;
+public:
+  PointerValType(const Type *val, unsigned as) : ValTy(val), AddressSpace(as) {}
+
+  static PointerValType get(const PointerType *PT) {
+    return PointerValType(PT->getElementType(), PT->getAddressSpace());
+  }
+
+  static unsigned hashTypeStructure(const PointerType *PT) {
+    return getSubElementHash(PT);
+  }
+
+  bool operator<(const PointerValType &MTV) const {
+    if (AddressSpace < MTV.AddressSpace) return true;
+    return AddressSpace == MTV.AddressSpace && ValTy < MTV.ValTy;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Array Type Factory...
+//
+class ArrayValType {
+  const Type *ValTy;
+  uint64_t Size;
+public:
+  ArrayValType(const Type *val, uint64_t sz) : ValTy(val), Size(sz) {}
+
+  static ArrayValType get(const ArrayType *AT) {
+    return ArrayValType(AT->getElementType(), AT->getNumElements());
+  }
+
+  static unsigned hashTypeStructure(const ArrayType *AT) {
+    return (unsigned)AT->getNumElements();
+  }
+
+  inline bool operator<(const ArrayValType &MTV) const {
+    if (Size < MTV.Size) return true;
+    return Size == MTV.Size && ValTy < MTV.ValTy;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Vector Type Factory...
+//
+class VectorValType {
+  const Type *ValTy;
+  unsigned Size;
+public:
+  VectorValType(const Type *val, int sz) : ValTy(val), Size(sz) {}
+
+  static VectorValType get(const VectorType *PT) {
+    return VectorValType(PT->getElementType(), PT->getNumElements());
+  }
+
+  static unsigned hashTypeStructure(const VectorType *PT) {
+    return PT->getNumElements();
+  }
+
+  inline bool operator<(const VectorValType &MTV) const {
+    if (Size < MTV.Size) return true;
+    return Size == MTV.Size && ValTy < MTV.ValTy;
+  }
+};
+
+// StructValType - Define a class to hold the key that goes into the TypeMap
+//
+class StructValType {
+  std::vector<const Type*> ElTypes;
+  bool packed;
+public:
+  StructValType(const std::vector<const Type*> &args, bool isPacked)
+    : ElTypes(args), packed(isPacked) {}
+
+  static StructValType get(const StructType *ST) {
+    std::vector<const Type *> ElTypes;
+    ElTypes.reserve(ST->getNumElements());
+    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
+      ElTypes.push_back(ST->getElementType(i));
+
+    return StructValType(ElTypes, ST->isPacked());
+  }
+
+  static unsigned hashTypeStructure(const StructType *ST) {
+    return ST->getNumElements();
+  }
+
+  inline bool operator<(const StructValType &STV) const {
+    if (ElTypes < STV.ElTypes) return true;
+    else if (ElTypes > STV.ElTypes) return false;
+    else return (int)packed < (int)STV.packed;
+  }
+};
+
+// FunctionValType - Define a class to hold the key that goes into the TypeMap
+//
+class FunctionValType {
+  const Type *RetTy;
+  std::vector<const Type*> ArgTypes;
+  bool isVarArg;
+public:
+  FunctionValType(const Type *ret, const std::vector<const Type*> &args,
+                  bool isVA) : RetTy(ret), ArgTypes(args), isVarArg(isVA) {}
+
+  static FunctionValType get(const FunctionType *FT);
+
+  static unsigned hashTypeStructure(const FunctionType *FT) {
+    unsigned Result = FT->getNumParams()*2 + FT->isVarArg();
+    return Result;
+  }
+
+  inline bool operator<(const FunctionValType &MTV) const {
+    if (RetTy < MTV.RetTy) return true;
+    if (RetTy > MTV.RetTy) return false;
+    if (isVarArg < MTV.isVarArg) return true;
+    if (isVarArg > MTV.isVarArg) return false;
+    if (ArgTypes < MTV.ArgTypes) return true;
+    if (ArgTypes > MTV.ArgTypes) return false;
+    return false;
+  }
+};
+
+class TypeMapBase {
+protected:
+  /// TypesByHash - Keep track of types by their structure hash value.  Note
+  /// that we only keep track of types that have cycles through themselves in
+  /// this map.
+  ///
+  std::multimap<unsigned, PATypeHolder> TypesByHash;
+
+  ~TypeMapBase() {
+    // PATypeHolder won't destroy non-abstract types.
+    // We can't destroy them by simply iterating, because
+    // they may contain references to each-other.
+    for (std::multimap<unsigned, PATypeHolder>::iterator I
+         = TypesByHash.begin(), E = TypesByHash.end(); I != E; ++I) {
+      Type *Ty = const_cast<Type*>(I->second.Ty);
+      I->second.destroy();
+      // We can't invoke destroy or delete, because the type may
+      // contain references to already freed types.
+      // So we have to destruct the object the ugly way.
+      if (Ty) {
+        Ty->AbstractTypeUsers.clear();
+        static_cast<const Type*>(Ty)->Type::~Type();
+        operator delete(Ty);
+      }
+    }
+  }
+
+public:
+  void RemoveFromTypesByHash(unsigned Hash, const Type *Ty) {
+    std::multimap<unsigned, PATypeHolder>::iterator I =
+      TypesByHash.lower_bound(Hash);
+    for (; I != TypesByHash.end() && I->first == Hash; ++I) {
+      if (I->second == Ty) {
+        TypesByHash.erase(I);
+        return;
+      }
+    }
+
+    // This must be do to an opaque type that was resolved.  Switch down to hash
+    // code of zero.
+    assert(Hash && "Didn't find type entry!");
+    RemoveFromTypesByHash(0, Ty);
+  }
+
+  /// TypeBecameConcrete - When Ty gets a notification that TheType just became
+  /// concrete, drop uses and make Ty non-abstract if we should.
+  void TypeBecameConcrete(DerivedType *Ty, const DerivedType *TheType) {
+    // If the element just became concrete, remove 'ty' from the abstract
+    // type user list for the type.  Do this for as many times as Ty uses
+    // OldType.
+    for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+         I != E; ++I)
+      if (I->get() == TheType)
+        TheType->removeAbstractTypeUser(Ty);
+
+    // If the type is currently thought to be abstract, rescan all of our
+    // subtypes to see if the type has just become concrete!  Note that this
+    // may send out notifications to AbstractTypeUsers that types become
+    // concrete.
+    if (Ty->isAbstract())
+      Ty->PromoteAbstractToConcrete();
+  }
+};
+
+// TypeMap - Make sure that only one instance of a particular type may be
+// created on any given run of the compiler... note that this involves updating
+// our map if an abstract type gets refined somehow.
+//
+template<class ValType, class TypeClass>
+class TypeMap : public TypeMapBase {
+  std::map<ValType, PATypeHolder> Map;
+public:
+  typedef typename std::map<ValType, PATypeHolder>::iterator iterator;
+
+  inline TypeClass *get(const ValType &V) {
+    iterator I = Map.find(V);
+    return I != Map.end() ? cast<TypeClass>((Type*)I->second.get()) : 0;
+  }
+
+  inline void add(const ValType &V, TypeClass *Ty) {
+    Map.insert(std::make_pair(V, Ty));
+
+    // If this type has a cycle, remember it.
+    TypesByHash.insert(std::make_pair(ValType::hashTypeStructure(Ty), Ty));
+    print("add");
+  }
+  
+  /// RefineAbstractType - This method is called after we have merged a type
+  /// with another one.  We must now either merge the type away with
+  /// some other type or reinstall it in the map with it's new configuration.
+  void RefineAbstractType(TypeClass *Ty, const DerivedType *OldType,
+                        const Type *NewType) {
+#ifdef DEBUG_MERGE_TYPES
+    DEBUG(dbgs() << "RefineAbstractType(" << (void*)OldType << "[" << *OldType
+                 << "], " << (void*)NewType << " [" << *NewType << "])\n");
+#endif
+    
+    // Otherwise, we are changing one subelement type into another.  Clearly the
+    // OldType must have been abstract, making us abstract.
+    assert(Ty->isAbstract() && "Refining a non-abstract type!");
+    assert(OldType != NewType);
+
+    // Make a temporary type holder for the type so that it doesn't disappear on
+    // us when we erase the entry from the map.
+    PATypeHolder TyHolder = Ty;
+
+    // The old record is now out-of-date, because one of the children has been
+    // updated.  Remove the obsolete entry from the map.
+    unsigned NumErased = Map.erase(ValType::get(Ty));
+    assert(NumErased && "Element not found!"); (void)NumErased;
+
+    // Remember the structural hash for the type before we start hacking on it,
+    // in case we need it later.
+    unsigned OldTypeHash = ValType::hashTypeStructure(Ty);
+
+    // Find the type element we are refining... and change it now!
+    for (unsigned i = 0, e = Ty->getNumContainedTypes(); i != e; ++i)
+      if (Ty->ContainedTys[i] == OldType)
+        Ty->ContainedTys[i] = NewType;
+    unsigned NewTypeHash = ValType::hashTypeStructure(Ty);
+    
+    // If there are no cycles going through this node, we can do a simple,
+    // efficient lookup in the map, instead of an inefficient nasty linear
+    // lookup.
+    if (!TypeHasCycleThroughItself(Ty)) {
+      typename std::map<ValType, PATypeHolder>::iterator I;
+      bool Inserted;
+
+      tie(I, Inserted) = Map.insert(std::make_pair(ValType::get(Ty), Ty));
+      if (!Inserted) {
+        // Refined to a different type altogether?
+        RemoveFromTypesByHash(OldTypeHash, Ty);
+
+        // We already have this type in the table.  Get rid of the newly refined
+        // type.
+        TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
+        Ty->refineAbstractTypeTo(NewTy);
+        return;
+      }
+    } else {
+      // Now we check to see if there is an existing entry in the table which is
+      // structurally identical to the newly refined type.  If so, this type
+      // gets refined to the pre-existing type.
+      //
+      std::multimap<unsigned, PATypeHolder>::iterator I, E, Entry;
+      tie(I, E) = TypesByHash.equal_range(NewTypeHash);
+      Entry = E;
+      for (; I != E; ++I) {
+        if (I->second == Ty) {
+          // Remember the position of the old type if we see it in our scan.
+          Entry = I;
+          continue;
+        }
+        
+        if (!TypesEqual(Ty, I->second))
+          continue;
+        
+        TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
+
+        // Remove the old entry form TypesByHash.  If the hash values differ
+        // now, remove it from the old place.  Otherwise, continue scanning
+        // withing this hashcode to reduce work.
+        if (NewTypeHash != OldTypeHash) {
+          RemoveFromTypesByHash(OldTypeHash, Ty);
+        } else {
+          if (Entry == E) {
+            // Find the location of Ty in the TypesByHash structure if we
+            // haven't seen it already.
+            while (I->second != Ty) {
+              ++I;
+              assert(I != E && "Structure doesn't contain type??");
+            }
+            Entry = I;
+          }
+          TypesByHash.erase(Entry);
+        }
+        Ty->refineAbstractTypeTo(NewTy);
+        return;
+      }
+
+      // If there is no existing type of the same structure, we reinsert an
+      // updated record into the map.
+      Map.insert(std::make_pair(ValType::get(Ty), Ty));
+    }
+
+    // If the hash codes differ, update TypesByHash
+    if (NewTypeHash != OldTypeHash) {
+      RemoveFromTypesByHash(OldTypeHash, Ty);
+      TypesByHash.insert(std::make_pair(NewTypeHash, Ty));
+    }
+    
+    // If the type is currently thought to be abstract, rescan all of our
+    // subtypes to see if the type has just become concrete!  Note that this
+    // may send out notifications to AbstractTypeUsers that types become
+    // concrete.
+    if (Ty->isAbstract())
+      Ty->PromoteAbstractToConcrete();
+  }
+
+  void print(const char *Arg) const {
+#ifdef DEBUG_MERGE_TYPES
+    DEBUG(dbgs() << "TypeMap<>::" << Arg << " table contents:\n");
+    unsigned i = 0;
+    for (typename std::map<ValType, PATypeHolder>::const_iterator I
+           = Map.begin(), E = Map.end(); I != E; ++I)
+      DEBUG(dbgs() << " " << (++i) << ". " << (void*)I->second.get() << " "
+                   << *I->second.get() << "\n");
+#endif
+  }
+
+  void dump() const { print("dump output"); }
+};
+}
+
+#endif
diff --git a/final/lib/VMCore/Use.cpp b/final/lib/VMCore/Use.cpp
new file mode 100644
index 00000000000..2258b8d985a
--- /dev/null
+++ b/final/lib/VMCore/Use.cpp
@@ -0,0 +1,146 @@
+//===-- Use.cpp - Implement the Use class ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the algorithm for finding the User of a Use.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Value.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//                         Use swap Implementation
+//===----------------------------------------------------------------------===//
+
+void Use::swap(Use &RHS) {
+  Value *V1(Val);
+  Value *V2(RHS.Val);
+  if (V1 != V2) {
+    if (V1) {
+      removeFromList();
+    }
+
+    if (V2) {
+      RHS.removeFromList();
+      Val = V2;
+      V2->addUse(*this);
+    } else {
+      Val = 0;
+    }
+
+    if (V1) {
+      RHS.Val = V1;
+      V1->addUse(RHS);
+    } else {
+      RHS.Val = 0;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                         Use getImpliedUser Implementation
+//===----------------------------------------------------------------------===//
+
+const Use *Use::getImpliedUser() const {
+  const Use *Current = this;
+
+  while (true) {
+    unsigned Tag = (Current++)->Prev.getInt();
+    switch (Tag) {
+      case zeroDigitTag:
+      case oneDigitTag:
+        continue;
+
+      case stopTag: {
+        ++Current;
+        ptrdiff_t Offset = 1;
+        while (true) {
+          unsigned Tag = Current->Prev.getInt();
+          switch (Tag) {
+            case zeroDigitTag:
+            case oneDigitTag:
+              ++Current;
+              Offset = (Offset << 1) + Tag;
+              continue;
+            default:
+              return Current + Offset;
+          }
+        }
+      }
+
+      case fullStopTag:
+        return Current;
+    }
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                         Use initTags Implementation
+//===----------------------------------------------------------------------===//
+
+Use *Use::initTags(Use * const Start, Use *Stop) {
+  ptrdiff_t Done = 0;
+  while (Done < 20) {
+    if (Start == Stop--)
+      return Start;
+    static const PrevPtrTag tags[20] = { fullStopTag, oneDigitTag, stopTag,
+                                         oneDigitTag, oneDigitTag, stopTag,
+                                         zeroDigitTag, oneDigitTag, oneDigitTag,
+                                         stopTag, zeroDigitTag, oneDigitTag,
+                                         zeroDigitTag, oneDigitTag, stopTag,
+                                         oneDigitTag, oneDigitTag, oneDigitTag,
+                                         oneDigitTag, stopTag
+                                       };
+    new(Stop) Use(tags[Done++]);
+  }
+
+  ptrdiff_t Count = Done;
+  while (Start != Stop) {
+    --Stop;
+    if (!Count) {
+      new(Stop) Use(stopTag);
+      ++Done;
+      Count = Done;
+    } else {
+      new(Stop) Use(PrevPtrTag(Count & 1));
+      Count >>= 1;
+      ++Done;
+    }
+  }
+
+  return Start;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Use zap Implementation
+//===----------------------------------------------------------------------===//
+
+void Use::zap(Use *Start, const Use *Stop, bool del) {
+  while (Start != Stop)
+    (--Stop)->~Use();
+  if (del)
+    ::operator delete(Start);
+}
+
+//===----------------------------------------------------------------------===//
+//                         Use getUser Implementation
+//===----------------------------------------------------------------------===//
+
+User *Use::getUser() const {
+  const Use *End = getImpliedUser();
+  const PointerIntPair<User*, 1, unsigned>&
+    ref(static_cast<const AugmentedUse*>(End - 1)->ref);
+  User *She = ref.getPointer();
+  return ref.getInt()
+    ? She
+    : (User*)End;
+}
+
+} // End llvm namespace
diff --git a/final/lib/VMCore/User.cpp b/final/lib/VMCore/User.cpp
new file mode 100644
index 00000000000..2f4587debb6
--- /dev/null
+++ b/final/lib/VMCore/User.cpp
@@ -0,0 +1,81 @@
+//===-- User.cpp - Implement the User class -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constant.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/User.h"
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//                                 User Class
+//===----------------------------------------------------------------------===//
+
+// replaceUsesOfWith - Replaces all references to the "From" definition with
+// references to the "To" definition.
+//
+void User::replaceUsesOfWith(Value *From, Value *To) {
+  if (From == To) return;   // Duh what?
+
+  assert((!isa<Constant>(this) || isa<GlobalValue>(this)) &&
+         "Cannot call User::replaceUsesOfWith on a constant!");
+
+  for (unsigned i = 0, E = getNumOperands(); i != E; ++i)
+    if (getOperand(i) == From) {  // Is This operand is pointing to oldval?
+      // The side effects of this setOperand call include linking to
+      // "To", adding "this" to the uses list of To, and
+      // most importantly, removing "this" from the use list of "From".
+      setOperand(i, To); // Fix it now...
+    }
+}
+
+//===----------------------------------------------------------------------===//
+//                         User allocHungoffUses Implementation
+//===----------------------------------------------------------------------===//
+
+Use *User::allocHungoffUses(unsigned N) const {
+  Use *Begin = static_cast<Use*>(::operator new(sizeof(Use) * N
+                                                + sizeof(AugmentedUse)
+                                                - sizeof(Use)));
+  Use *End = Begin + N;
+  PointerIntPair<User*, 1, unsigned>&
+    ref(static_cast<AugmentedUse&>(End[-1]).ref);
+  ref.setPointer(const_cast<User*>(this));
+  ref.setInt(1);
+  return Use::initTags(Begin, End);
+}
+
+//===----------------------------------------------------------------------===//
+//                         User operator new Implementations
+//===----------------------------------------------------------------------===//
+
+void *User::operator new(size_t s, unsigned Us) {
+  void *Storage = ::operator new(s + sizeof(Use) * Us);
+  Use *Start = static_cast<Use*>(Storage);
+  Use *End = Start + Us;
+  User *Obj = reinterpret_cast<User*>(End);
+  Obj->OperandList = Start;
+  Obj->NumOperands = Us;
+  Use::initTags(Start, End);
+  return Obj;
+}
+
+//===----------------------------------------------------------------------===//
+//                         User operator delete Implementation
+//===----------------------------------------------------------------------===//
+
+void User::operator delete(void *Usr) {
+  User *Start = static_cast<User*>(Usr);
+  Use *Storage = static_cast<Use*>(Usr) - Start->NumOperands;
+  // If there were hung-off uses, they will have been freed already and
+  // NumOperands reset to 0, so here we just free the User itself.
+  ::operator delete(Storage);
+}
+
+} // End llvm namespace
diff --git a/final/lib/VMCore/Value.cpp b/final/lib/VMCore/Value.cpp
new file mode 100644
index 00000000000..29f6a8094f0
--- /dev/null
+++ b/final/lib/VMCore/Value.cpp
@@ -0,0 +1,640 @@
+//===-- Value.cpp - Implement the Value class -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Value, ValueHandle, and User classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include "llvm/Constant.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InstrTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Operator.h"
+#include "llvm/Module.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/ADT/DenseMap.h"
+#include <algorithm>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//                                Value Class
+//===----------------------------------------------------------------------===//
+
+static inline const Type *checkType(const Type *Ty) {
+  assert(Ty && "Value defined with a null type: Error!");
+  return Ty;
+}
+
+Value::Value(const Type *ty, unsigned scid)
+  : SubclassID(scid), HasValueHandle(0),
+    SubclassOptionalData(0), SubclassData(0), VTy(checkType(ty)),
+    UseList(0), Name(0) {
+  if (isa<CallInst>(this) || isa<InvokeInst>(this))
+    assert((VTy->isFirstClassType() || VTy->isVoidTy() ||
+            ty->isOpaqueTy() || VTy->isStructTy()) &&
+           "invalid CallInst  type!");
+  else if (!isa<Constant>(this) && !isa<BasicBlock>(this))
+    assert((VTy->isFirstClassType() || VTy->isVoidTy() ||
+            ty->isOpaqueTy()) &&
+           "Cannot create non-first-class values except for constants!");
+}
+
+Value::~Value() {
+  // Notify all ValueHandles (if present) that this value is going away.
+  if (HasValueHandle)
+    ValueHandleBase::ValueIsDeleted(this);
+
+#ifndef NDEBUG      // Only in -g mode...
+  // Check to make sure that there are no uses of this value that are still
+  // around when the value is destroyed.  If there are, then we have a dangling
+  // reference and something is wrong.  This code is here to print out what is
+  // still being referenced.  The value in question should be printed as
+  // a <badref>
+  //
+  if (!use_empty()) {
+    dbgs() << "While deleting: " << *VTy << " %" << getNameStr() << "\n";
+    for (use_iterator I = use_begin(), E = use_end(); I != E; ++I)
+      dbgs() << "Use still stuck around after Def is destroyed:"
+           << **I << "\n";
+  }
+#endif
+  assert(use_empty() && "Uses remain when a value is destroyed!");
+
+  // If this value is named, destroy the name.  This should not be in a symtab
+  // at this point.
+  if (Name)
+    Name->Destroy();
+
+  // There should be no uses of this object anymore, remove it.
+  LeakDetector::removeGarbageObject(this);
+}
+
+/// hasNUses - Return true if this Value has exactly N users.
+///
+bool Value::hasNUses(unsigned N) const {
+  const_use_iterator UI = use_begin(), E = use_end();
+
+  for (; N; --N, ++UI)
+    if (UI == E) return false;  // Too few.
+  return UI == E;
+}
+
+/// hasNUsesOrMore - Return true if this value has N users or more.  This is
+/// logically equivalent to getNumUses() >= N.
+///
+bool Value::hasNUsesOrMore(unsigned N) const {
+  const_use_iterator UI = use_begin(), E = use_end();
+
+  for (; N; --N, ++UI)
+    if (UI == E) return false;  // Too few.
+
+  return true;
+}
+
+/// isUsedInBasicBlock - Return true if this value is used in the specified
+/// basic block.
+bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
+  for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
+    const Instruction *User = dyn_cast<Instruction>(*I);
+    if (User && User->getParent() == BB)
+      return true;
+  }
+  return false;
+}
+
+
+/// getNumUses - This method computes the number of uses of this Value.  This
+/// is a linear time operation.  Use hasOneUse or hasNUses to check for specific
+/// values.
+unsigned Value::getNumUses() const {
+  return (unsigned)std::distance(use_begin(), use_end());
+}
+
+static bool getSymTab(Value *V, ValueSymbolTable *&ST) {
+  ST = 0;
+  if (Instruction *I = dyn_cast<Instruction>(V)) {
+    if (BasicBlock *P = I->getParent())
+      if (Function *PP = P->getParent())
+        ST = &PP->getValueSymbolTable();
+  } else if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
+    if (Function *P = BB->getParent())
+      ST = &P->getValueSymbolTable();
+  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+    if (Module *P = GV->getParent())
+      ST = &P->getValueSymbolTable();
+  } else if (Argument *A = dyn_cast<Argument>(V)) {
+    if (Function *P = A->getParent())
+      ST = &P->getValueSymbolTable();
+  } else if (isa<MDString>(V))
+    return true;
+  else {
+    assert(isa<Constant>(V) && "Unknown value type!");
+    return true;  // no name is setable for this.
+  }
+  return false;
+}
+
+StringRef Value::getName() const {
+  // Make sure the empty string is still a C string. For historical reasons,
+  // some clients want to call .data() on the result and expect it to be null
+  // terminated.
+  if (!Name) return StringRef("", 0);
+  return Name->getKey();
+}
+
+std::string Value::getNameStr() const {
+  return getName().str();
+}
+
+void Value::setName(const Twine &NewName) {
+  // Fast path for common IRBuilder case of setName("") when there is no name.
+  if (NewName.isTriviallyEmpty() && !hasName())
+    return;
+
+  SmallString<256> NameData;
+  StringRef NameRef = NewName.toStringRef(NameData);
+
+  // Name isn't changing?
+  if (getName() == NameRef)
+    return;
+
+  assert(!getType()->isVoidTy() && "Cannot assign a name to void values!");
+
+  // Get the symbol table to update for this object.
+  ValueSymbolTable *ST;
+  if (getSymTab(this, ST))
+    return;  // Cannot set a name on this value (e.g. constant).
+
+  if (!ST) { // No symbol table to update?  Just do the change.
+    if (NameRef.empty()) {
+      // Free the name for this value.
+      Name->Destroy();
+      Name = 0;
+      return;
+    }
+
+    if (Name)
+      Name->Destroy();
+
+    // NOTE: Could optimize for the case the name is shrinking to not deallocate
+    // then reallocated.
+
+    // Create the new name.
+    Name = ValueName::Create(NameRef.begin(), NameRef.end());
+    Name->setValue(this);
+    return;
+  }
+
+  // NOTE: Could optimize for the case the name is shrinking to not deallocate
+  // then reallocated.
+  if (hasName()) {
+    // Remove old name.
+    ST->removeValueName(Name);
+    Name->Destroy();
+    Name = 0;
+
+    if (NameRef.empty())
+      return;
+  }
+
+  // Name is changing to something new.
+  Name = ST->createValueName(NameRef, this);
+}
+
+
+/// takeName - transfer the name from V to this value, setting V's name to
+/// empty.  It is an error to call V->takeName(V).
+void Value::takeName(Value *V) {
+  ValueSymbolTable *ST = 0;
+  // If this value has a name, drop it.
+  if (hasName()) {
+    // Get the symtab this is in.
+    if (getSymTab(this, ST)) {
+      // We can't set a name on this value, but we need to clear V's name if
+      // it has one.
+      if (V->hasName()) V->setName("");
+      return;  // Cannot set a name on this value (e.g. constant).
+    }
+
+    // Remove old name.
+    if (ST)
+      ST->removeValueName(Name);
+    Name->Destroy();
+    Name = 0;
+  }
+
+  // Now we know that this has no name.
+
+  // If V has no name either, we're done.
+  if (!V->hasName()) return;
+
+  // Get this's symtab if we didn't before.
+  if (!ST) {
+    if (getSymTab(this, ST)) {
+      // Clear V's name.
+      V->setName("");
+      return;  // Cannot set a name on this value (e.g. constant).
+    }
+  }
+
+  // Get V's ST, this should always succed, because V has a name.
+  ValueSymbolTable *VST;
+  bool Failure = getSymTab(V, VST);
+  assert(!Failure && "V has a name, so it should have a ST!"); (void)Failure;
+
+  // If these values are both in the same symtab, we can do this very fast.
+  // This works even if both values have no symtab yet.
+  if (ST == VST) {
+    // Take the name!
+    Name = V->Name;
+    V->Name = 0;
+    Name->setValue(this);
+    return;
+  }
+
+  // Otherwise, things are slightly more complex.  Remove V's name from VST and
+  // then reinsert it into ST.
+
+  if (VST)
+    VST->removeValueName(V->Name);
+  Name = V->Name;
+  V->Name = 0;
+  Name->setValue(this);
+
+  if (ST)
+    ST->reinsertValue(this);
+}
+
+
+// uncheckedReplaceAllUsesWith - This is exactly the same as replaceAllUsesWith,
+// except that it doesn't have all of the asserts.  The asserts fail because we
+// are half-way done resolving types, which causes some types to exist as two
+// different Type*'s at the same time.  This is a sledgehammer to work around
+// this problem.
+//
+void Value::uncheckedReplaceAllUsesWith(Value *New) {
+  // Notify all ValueHandles (if present) that this value is going away.
+  if (HasValueHandle)
+    ValueHandleBase::ValueIsRAUWd(this, New);
+
+  while (!use_empty()) {
+    Use &U = *UseList;
+    // Must handle Constants specially, we cannot call replaceUsesOfWith on a
+    // constant because they are uniqued.
+    if (Constant *C = dyn_cast<Constant>(U.getUser())) {
+      if (!isa<GlobalValue>(C)) {
+        C->replaceUsesOfWithOnConstant(this, New, &U);
+        continue;
+      }
+    }
+
+    U.set(New);
+  }
+}
+
+void Value::replaceAllUsesWith(Value *New) {
+  assert(New && "Value::replaceAllUsesWith(<null>) is invalid!");
+  assert(New != this && "this->replaceAllUsesWith(this) is NOT valid!");
+  assert(New->getType() == getType() &&
+         "replaceAllUses of value with new value of different type!");
+
+  uncheckedReplaceAllUsesWith(New);
+}
+
+Value *Value::stripPointerCasts() {
+  if (!getType()->isPointerTy())
+    return this;
+
+  // Even though we don't look through PHI nodes, we could be called on an
+  // instruction in an unreachable block, which may be on a cycle.
+  SmallPtrSet<Value *, 4> Visited;
+
+  Value *V = this;
+  Visited.insert(V);
+  do {
+    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+      if (!GEP->hasAllZeroIndices())
+        return V;
+      V = GEP->getPointerOperand();
+    } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+      V = cast<Operator>(V)->getOperand(0);
+    } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+      if (GA->mayBeOverridden())
+        return V;
+      V = GA->getAliasee();
+    } else {
+      return V;
+    }
+    assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+  } while (Visited.insert(V));
+
+  return V;
+}
+
+/// isDereferenceablePointer - Test if this value is always a pointer to
+/// allocated and suitably aligned memory for a simple load or store.
+bool Value::isDereferenceablePointer() const {
+  // Note that it is not safe to speculate into a malloc'd region because
+  // malloc may return null.
+  // It's also not always safe to follow a bitcast, for example:
+  //   bitcast i8* (alloca i8) to i32*
+  // would result in a 4-byte load from a 1-byte alloca. Some cases could
+  // be handled using TargetData to check sizes and alignments though.
+
+  // These are obviously ok.
+  if (isa<AllocaInst>(this)) return true;
+
+  // Global variables which can't collapse to null are ok.
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(this))
+    return !GV->hasExternalWeakLinkage();
+
+  // byval arguments are ok.
+  if (const Argument *A = dyn_cast<Argument>(this))
+    return A->hasByValAttr();
+  
+  // For GEPs, determine if the indexing lands within the allocated object.
+  if (const GEPOperator *GEP = dyn_cast<GEPOperator>(this)) {
+    // Conservatively require that the base pointer be fully dereferenceable.
+    if (!GEP->getOperand(0)->isDereferenceablePointer())
+      return false;
+    // Check the indices.
+    gep_type_iterator GTI = gep_type_begin(GEP);
+    for (User::const_op_iterator I = GEP->op_begin()+1,
+         E = GEP->op_end(); I != E; ++I) {
+      Value *Index = *I;
+      const Type *Ty = *GTI++;
+      // Struct indices can't be out of bounds.
+      if (isa<StructType>(Ty))
+        continue;
+      ConstantInt *CI = dyn_cast<ConstantInt>(Index);
+      if (!CI)
+        return false;
+      // Zero is always ok.
+      if (CI->isZero())
+        continue;
+      // Check to see that it's within the bounds of an array.
+      const ArrayType *ATy = dyn_cast<ArrayType>(Ty);
+      if (!ATy)
+        return false;
+      if (CI->getValue().getActiveBits() > 64)
+        return false;
+      if (CI->getZExtValue() >= ATy->getNumElements())
+        return false;
+    }
+    // Indices check out; this is dereferenceable.
+    return true;
+  }
+
+  // If we don't know, assume the worst.
+  return false;
+}
+
+/// DoPHITranslation - If this value is a PHI node with CurBB as its parent,
+/// return the value in the PHI node corresponding to PredBB.  If not, return
+/// ourself.  This is useful if you want to know the value something has in a
+/// predecessor block.
+Value *Value::DoPHITranslation(const BasicBlock *CurBB,
+                               const BasicBlock *PredBB) {
+  PHINode *PN = dyn_cast<PHINode>(this);
+  if (PN && PN->getParent() == CurBB)
+    return PN->getIncomingValueForBlock(PredBB);
+  return this;
+}
+
+LLVMContext &Value::getContext() const { return VTy->getContext(); }
+
+//===----------------------------------------------------------------------===//
+//                             ValueHandleBase Class
+//===----------------------------------------------------------------------===//
+
+/// AddToExistingUseList - Add this ValueHandle to the use list for VP, where
+/// List is known to point into the existing use list.
+void ValueHandleBase::AddToExistingUseList(ValueHandleBase **List) {
+  assert(List && "Handle list is null?");
+
+  // Splice ourselves into the list.
+  Next = *List;
+  *List = this;
+  setPrevPtr(List);
+  if (Next) {
+    Next->setPrevPtr(&Next);
+    assert(VP == Next->VP && "Added to wrong list?");
+  }
+}
+
+void ValueHandleBase::AddToExistingUseListAfter(ValueHandleBase *List) {
+  assert(List && "Must insert after existing node");
+
+  Next = List->Next;
+  setPrevPtr(&List->Next);
+  List->Next = this;
+  if (Next)
+    Next->setPrevPtr(&Next);
+}
+
+/// AddToUseList - Add this ValueHandle to the use list for VP.
+void ValueHandleBase::AddToUseList() {
+  assert(VP && "Null pointer doesn't have a use list!");
+
+  LLVMContextImpl *pImpl = VP->getContext().pImpl;
+
+  if (VP->HasValueHandle) {
+    // If this value already has a ValueHandle, then it must be in the
+    // ValueHandles map already.
+    ValueHandleBase *&Entry = pImpl->ValueHandles[VP];
+    assert(Entry != 0 && "Value doesn't have any handles?");
+    AddToExistingUseList(&Entry);
+    return;
+  }
+
+  // Ok, it doesn't have any handles yet, so we must insert it into the
+  // DenseMap.  However, doing this insertion could cause the DenseMap to
+  // reallocate itself, which would invalidate all of the PrevP pointers that
+  // point into the old table.  Handle this by checking for reallocation and
+  // updating the stale pointers only if needed.
+  DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
+  const void *OldBucketPtr = Handles.getPointerIntoBucketsArray();
+
+  ValueHandleBase *&Entry = Handles[VP];
+  assert(Entry == 0 && "Value really did already have handles?");
+  AddToExistingUseList(&Entry);
+  VP->HasValueHandle = true;
+
+  // If reallocation didn't happen or if this was the first insertion, don't
+  // walk the table.
+  if (Handles.isPointerIntoBucketsArray(OldBucketPtr) ||
+      Handles.size() == 1) {
+    return;
+  }
+
+  // Okay, reallocation did happen.  Fix the Prev Pointers.
+  for (DenseMap<Value*, ValueHandleBase*>::iterator I = Handles.begin(),
+       E = Handles.end(); I != E; ++I) {
+    assert(I->second && I->first == I->second->VP && "List invariant broken!");
+    I->second->setPrevPtr(&I->second);
+  }
+}
+
+/// RemoveFromUseList - Remove this ValueHandle from its current use list.
+void ValueHandleBase::RemoveFromUseList() {
+  assert(VP && VP->HasValueHandle && "Pointer doesn't have a use list!");
+
+  // Unlink this from its use list.
+  ValueHandleBase **PrevPtr = getPrevPtr();
+  assert(*PrevPtr == this && "List invariant broken");
+
+  *PrevPtr = Next;
+  if (Next) {
+    assert(Next->getPrevPtr() == &Next && "List invariant broken");
+    Next->setPrevPtr(PrevPtr);
+    return;
+  }
+
+  // If the Next pointer was null, then it is possible that this was the last
+  // ValueHandle watching VP.  If so, delete its entry from the ValueHandles
+  // map.
+  LLVMContextImpl *pImpl = VP->getContext().pImpl;
+  DenseMap<Value*, ValueHandleBase*> &Handles = pImpl->ValueHandles;
+  if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
+    Handles.erase(VP);
+    VP->HasValueHandle = false;
+  }
+}
+
+
+void ValueHandleBase::ValueIsDeleted(Value *V) {
+  assert(V->HasValueHandle && "Should only be called if ValueHandles present");
+
+  // Get the linked list base, which is guaranteed to exist since the
+  // HasValueHandle flag is set.
+  LLVMContextImpl *pImpl = V->getContext().pImpl;
+  ValueHandleBase *Entry = pImpl->ValueHandles[V];
+  assert(Entry && "Value bit set but no entries exist");
+
+  // We use a local ValueHandleBase as an iterator so that ValueHandles can add
+  // and remove themselves from the list without breaking our iteration.  This
+  // is not really an AssertingVH; we just have to give ValueHandleBase a kind.
+  // Note that we deliberately do not the support the case when dropping a value
+  // handle results in a new value handle being permanently added to the list
+  // (as might occur in theory for CallbackVH's): the new value handle will not
+  // be processed and the checking code will mete out righteous punishment if
+  // the handle is still present once we have finished processing all the other
+  // value handles (it is fine to momentarily add then remove a value handle).
+  for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) {
+    Iterator.RemoveFromUseList();
+    Iterator.AddToExistingUseListAfter(Entry);
+    assert(Entry->Next == &Iterator && "Loop invariant broken.");
+
+    switch (Entry->getKind()) {
+    case Assert:
+      break;
+    case Tracking:
+      // Mark that this value has been deleted by setting it to an invalid Value
+      // pointer.
+      Entry->operator=(DenseMapInfo<Value *>::getTombstoneKey());
+      break;
+    case Weak:
+      // Weak just goes to null, which will unlink it from the list.
+      Entry->operator=(0);
+      break;
+    case Callback:
+      // Forward to the subclass's implementation.
+      static_cast<CallbackVH*>(Entry)->deleted();
+      break;
+    }
+  }
+
+  // All callbacks, weak references, and assertingVHs should be dropped by now.
+  if (V->HasValueHandle) {
+#ifndef NDEBUG      // Only in +Asserts mode...
+    dbgs() << "While deleting: " << *V->getType() << " %" << V->getNameStr()
+           << "\n";
+    if (pImpl->ValueHandles[V]->getKind() == Assert)
+      llvm_unreachable("An asserting value handle still pointed to this"
+                       " value!");
+
+#endif
+    llvm_unreachable("All references to V were not removed?");
+  }
+}
+
+
+void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
+  assert(Old->HasValueHandle &&"Should only be called if ValueHandles present");
+  assert(Old != New && "Changing value into itself!");
+
+  // Get the linked list base, which is guaranteed to exist since the
+  // HasValueHandle flag is set.
+  LLVMContextImpl *pImpl = Old->getContext().pImpl;
+  ValueHandleBase *Entry = pImpl->ValueHandles[Old];
+
+  assert(Entry && "Value bit set but no entries exist");
+
+  // We use a local ValueHandleBase as an iterator so that
+  // ValueHandles can add and remove themselves from the list without
+  // breaking our iteration.  This is not really an AssertingVH; we
+  // just have to give ValueHandleBase some kind.
+  for (ValueHandleBase Iterator(Assert, *Entry); Entry; Entry = Iterator.Next) {
+    Iterator.RemoveFromUseList();
+    Iterator.AddToExistingUseListAfter(Entry);
+    assert(Entry->Next == &Iterator && "Loop invariant broken.");
+
+    switch (Entry->getKind()) {
+    case Assert:
+      // Asserting handle does not follow RAUW implicitly.
+      break;
+    case Tracking:
+      // Tracking goes to new value like a WeakVH. Note that this may make it
+      // something incompatible with its templated type. We don't want to have a
+      // virtual (or inline) interface to handle this though, so instead we make
+      // the TrackingVH accessors guarantee that a client never sees this value.
+
+      // FALLTHROUGH
+    case Weak:
+      // Weak goes to the new value, which will unlink it from Old's list.
+      Entry->operator=(New);
+      break;
+    case Callback:
+      // Forward to the subclass's implementation.
+      static_cast<CallbackVH*>(Entry)->allUsesReplacedWith(New);
+      break;
+    }
+  }
+
+#ifndef NDEBUG
+  // If any new tracking or weak value handles were added while processing the
+  // list, then complain about it now.
+  if (Old->HasValueHandle)
+    for (Entry = pImpl->ValueHandles[Old]; Entry; Entry = Entry->Next)
+      switch (Entry->getKind()) {
+      case Tracking:
+      case Weak:
+        dbgs() << "After RAUW from " << *Old->getType() << " %"
+          << Old->getNameStr() << " to " << *New->getType() << " %"
+          << New->getNameStr() << "\n";
+        llvm_unreachable("A tracking or weak value handle still pointed to the"
+                         " old value!\n");
+      default:
+        break;
+      }
+#endif
+}
+
+/// ~CallbackVH. Empty, but defined here to avoid emitting the vtable
+/// more than once.
+CallbackVH::~CallbackVH() {}
diff --git a/final/lib/VMCore/ValueSymbolTable.cpp b/final/lib/VMCore/ValueSymbolTable.cpp
new file mode 100644
index 00000000000..254bf06439d
--- /dev/null
+++ b/final/lib/VMCore/ValueSymbolTable.cpp
@@ -0,0 +1,117 @@
+//===-- ValueSymbolTable.cpp - Implement the ValueSymbolTable class -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the ValueSymbolTable class for the VMCore library.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "valuesymtab"
+#include "llvm/GlobalValue.h"
+#include "llvm/Type.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Class destructor
+ValueSymbolTable::~ValueSymbolTable() {
+#ifndef NDEBUG   // Only do this in -g mode...
+  for (iterator VI = vmap.begin(), VE = vmap.end(); VI != VE; ++VI)
+    dbgs() << "Value still in symbol table! Type = '"
+           << VI->getValue()->getType()->getDescription() << "' Name = '"
+           << VI->getKeyData() << "'\n";
+  assert(vmap.empty() && "Values remain in symbol table!");
+#endif
+}
+
+// Insert a value into the symbol table with the specified name...
+//
+void ValueSymbolTable::reinsertValue(Value* V) {
+  assert(V->hasName() && "Can't insert nameless Value into symbol table");
+
+  // Try inserting the name, assuming it won't conflict.
+  if (vmap.insert(V->Name)) {
+    //DEBUG(dbgs() << " Inserted value: " << V->Name << ": " << *V << "\n");
+    return;
+  }
+  
+  // Otherwise, there is a naming conflict.  Rename this value.
+  SmallString<256> UniqueName(V->getName().begin(), V->getName().end());
+
+  // The name is too already used, just free it so we can allocate a new name.
+  V->Name->Destroy();
+  
+  unsigned BaseSize = UniqueName.size();
+  while (1) {
+    // Trim any suffix off and append the next number.
+    UniqueName.resize(BaseSize);
+    raw_svector_ostream(UniqueName) << ++LastUnique;
+
+    // Try insert the vmap entry with this suffix.
+    ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
+    if (NewName.getValue() == 0) {
+      // Newly inserted name.  Success!
+      NewName.setValue(V);
+      V->Name = &NewName;
+     //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
+      return;
+    }
+  }
+}
+
+void ValueSymbolTable::removeValueName(ValueName *V) {
+  //DEBUG(dbgs() << " Removing Value: " << V->getKeyData() << "\n");
+  // Remove the value from the symbol table.
+  vmap.remove(V);
+}
+
+/// createValueName - This method attempts to create a value name and insert
+/// it into the symbol table with the specified name.  If it conflicts, it
+/// auto-renames the name and returns that instead.
+ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
+  // In the common case, the name is not already in the symbol table.
+  ValueName &Entry = vmap.GetOrCreateValue(Name);
+  if (Entry.getValue() == 0) {
+    Entry.setValue(V);
+    //DEBUG(dbgs() << " Inserted value: " << Entry.getKeyData() << ": "
+    //           << *V << "\n");
+    return &Entry;
+  }
+  
+  // Otherwise, there is a naming conflict.  Rename this value.
+  SmallString<256> UniqueName(Name.begin(), Name.end());
+  
+  while (1) {
+    // Trim any suffix off and append the next number.
+    UniqueName.resize(Name.size());
+    raw_svector_ostream(UniqueName) << ++LastUnique;
+    
+    // Try insert the vmap entry with this suffix.
+    ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
+    if (NewName.getValue() == 0) {
+      // Newly inserted name.  Success!
+      NewName.setValue(V);
+     //DEBUG(dbgs() << " Inserted value: " << UniqueName << ": " << *V << "\n");
+      return &NewName;
+    }
+  }
+}
+
+
+// dump - print out the symbol table
+//
+void ValueSymbolTable::dump() const {
+  //DEBUG(dbgs() << "ValueSymbolTable:\n");
+  for (const_iterator I = begin(), E = end(); I != E; ++I) {
+    //DEBUG(dbgs() << "  '" << I->getKeyData() << "' = ");
+    I->getValue()->dump();
+    //DEBUG(dbgs() << "\n");
+  }
+}
diff --git a/final/lib/VMCore/ValueTypes.cpp b/final/lib/VMCore/ValueTypes.cpp
new file mode 100644
index 00000000000..c054ae46f23
--- /dev/null
+++ b/final/lib/VMCore/ValueTypes.cpp
@@ -0,0 +1,211 @@
+//===----------- ValueTypes.cpp - Implementation of EVT methods -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements methods in the CodeGen/ValueTypes.h header.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Type.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+using namespace llvm;
+
+EVT EVT::getExtendedIntegerVT(LLVMContext &Context, unsigned BitWidth) {
+  EVT VT;
+  VT.LLVMTy = IntegerType::get(Context, BitWidth);
+  assert(VT.isExtended() && "Type is not extended!");
+  return VT;
+}
+
+EVT EVT::getExtendedVectorVT(LLVMContext &Context, EVT VT,
+                             unsigned NumElements) {
+  EVT ResultVT;
+  ResultVT.LLVMTy = VectorType::get(VT.getTypeForEVT(Context), NumElements);
+  assert(ResultVT.isExtended() && "Type is not extended!");
+  return ResultVT;
+}
+
+bool EVT::isExtendedFloatingPoint() const {
+  assert(isExtended() && "Type is not extended!");
+  return LLVMTy->isFPOrFPVectorTy();
+}
+
+bool EVT::isExtendedInteger() const {
+  assert(isExtended() && "Type is not extended!");
+  return LLVMTy->isIntOrIntVectorTy();
+}
+
+bool EVT::isExtendedVector() const {
+  assert(isExtended() && "Type is not extended!");
+  return LLVMTy->isVectorTy();
+}
+
+bool EVT::isExtended64BitVector() const {
+  return isExtendedVector() && getSizeInBits() == 64;
+}
+
+bool EVT::isExtended128BitVector() const {
+  return isExtendedVector() && getSizeInBits() == 128;
+}
+
+bool EVT::isExtended256BitVector() const {
+  return isExtendedVector() && getSizeInBits() == 256;
+}
+
+bool EVT::isExtended512BitVector() const {
+  return isExtendedVector() && getSizeInBits() == 512;
+}
+
+EVT EVT::getExtendedVectorElementType() const {
+  assert(isExtended() && "Type is not extended!");
+  return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType());
+}
+
+unsigned EVT::getExtendedVectorNumElements() const {
+  assert(isExtended() && "Type is not extended!");
+  return cast<VectorType>(LLVMTy)->getNumElements();
+}
+
+unsigned EVT::getExtendedSizeInBits() const {
+  assert(isExtended() && "Type is not extended!");
+  if (const IntegerType *ITy = dyn_cast<IntegerType>(LLVMTy))
+    return ITy->getBitWidth();
+  if (const VectorType *VTy = dyn_cast<VectorType>(LLVMTy))
+    return VTy->getBitWidth();
+  assert(false && "Unrecognized extended type!");
+  return 0; // Suppress warnings.
+}
+
+/// getEVTString - This function returns value type as a string, e.g. "i32".
+std::string EVT::getEVTString() const {
+  switch (V.SimpleTy) {
+  default:
+    if (isVector())
+      return "v" + utostr(getVectorNumElements()) +
+             getVectorElementType().getEVTString();
+    if (isInteger())
+      return "i" + utostr(getSizeInBits());
+    llvm_unreachable("Invalid EVT!");
+    return "?";
+  case MVT::i1:      return "i1";
+  case MVT::i8:      return "i8";
+  case MVT::i16:     return "i16";
+  case MVT::i32:     return "i32";
+  case MVT::i64:     return "i64";
+  case MVT::i128:    return "i128";
+  case MVT::f32:     return "f32";
+  case MVT::f64:     return "f64";
+  case MVT::f80:     return "f80";
+  case MVT::f128:    return "f128";
+  case MVT::ppcf128: return "ppcf128";
+  case MVT::isVoid:  return "isVoid";
+  case MVT::Other:   return "ch";
+  case MVT::Glue:    return "glue";
+  case MVT::x86mmx:  return "x86mmx";
+  case MVT::v2i8:    return "v2i8";
+  case MVT::v4i8:    return "v4i8";
+  case MVT::v8i8:    return "v8i8";
+  case MVT::v16i8:   return "v16i8";
+  case MVT::v32i8:   return "v32i8";
+  case MVT::v2i16:   return "v2i16";
+  case MVT::v4i16:   return "v4i16";
+  case MVT::v8i16:   return "v8i16";
+  case MVT::v16i16:  return "v16i16";
+  case MVT::v2i32:   return "v2i32";
+  case MVT::v4i32:   return "v4i32";
+  case MVT::v8i32:   return "v8i32";
+  case MVT::v1i64:   return "v1i64";
+  case MVT::v2i64:   return "v2i64";
+  case MVT::v4i64:   return "v4i64";
+  case MVT::v8i64:   return "v8i64";
+  case MVT::v2f32:   return "v2f32";
+  case MVT::v4f32:   return "v4f32";
+  case MVT::v8f32:   return "v8f32";
+  case MVT::v2f64:   return "v2f64";
+  case MVT::v4f64:   return "v4f64";
+  case MVT::Metadata:return "Metadata";
+  }
+}
+
+/// getTypeForEVT - This method returns an LLVM type corresponding to the
+/// specified EVT.  For integer types, this returns an unsigned type.  Note
+/// that this will abort for types that cannot be represented.
+const Type *EVT::getTypeForEVT(LLVMContext &Context) const {
+  switch (V.SimpleTy) {
+  default:
+    assert(isExtended() && "Type is not extended!");
+    return LLVMTy;
+  case MVT::isVoid:  return Type::getVoidTy(Context);
+  case MVT::i1:      return Type::getInt1Ty(Context);
+  case MVT::i8:      return Type::getInt8Ty(Context);
+  case MVT::i16:     return Type::getInt16Ty(Context);
+  case MVT::i32:     return Type::getInt32Ty(Context);
+  case MVT::i64:     return Type::getInt64Ty(Context);
+  case MVT::i128:    return IntegerType::get(Context, 128);
+  case MVT::f32:     return Type::getFloatTy(Context);
+  case MVT::f64:     return Type::getDoubleTy(Context);
+  case MVT::f80:     return Type::getX86_FP80Ty(Context);
+  case MVT::f128:    return Type::getFP128Ty(Context);
+  case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
+  case MVT::x86mmx:  return Type::getX86_MMXTy(Context);
+  case MVT::v2i8:    return VectorType::get(Type::getInt8Ty(Context), 2);
+  case MVT::v4i8:    return VectorType::get(Type::getInt8Ty(Context), 4);
+  case MVT::v8i8:    return VectorType::get(Type::getInt8Ty(Context), 8);
+  case MVT::v16i8:   return VectorType::get(Type::getInt8Ty(Context), 16);
+  case MVT::v32i8:   return VectorType::get(Type::getInt8Ty(Context), 32);
+  case MVT::v2i16:   return VectorType::get(Type::getInt16Ty(Context), 2);
+  case MVT::v4i16:   return VectorType::get(Type::getInt16Ty(Context), 4);
+  case MVT::v8i16:   return VectorType::get(Type::getInt16Ty(Context), 8);
+  case MVT::v16i16:  return VectorType::get(Type::getInt16Ty(Context), 16);
+  case MVT::v2i32:   return VectorType::get(Type::getInt32Ty(Context), 2);
+  case MVT::v4i32:   return VectorType::get(Type::getInt32Ty(Context), 4);
+  case MVT::v8i32:   return VectorType::get(Type::getInt32Ty(Context), 8);
+  case MVT::v1i64:   return VectorType::get(Type::getInt64Ty(Context), 1);
+  case MVT::v2i64:   return VectorType::get(Type::getInt64Ty(Context), 2);
+  case MVT::v4i64:   return VectorType::get(Type::getInt64Ty(Context), 4);
+  case MVT::v8i64:   return VectorType::get(Type::getInt64Ty(Context), 8);
+  case MVT::v2f32:   return VectorType::get(Type::getFloatTy(Context), 2);
+  case MVT::v4f32:   return VectorType::get(Type::getFloatTy(Context), 4);
+  case MVT::v8f32:   return VectorType::get(Type::getFloatTy(Context), 8);
+  case MVT::v2f64:   return VectorType::get(Type::getDoubleTy(Context), 2);
+  case MVT::v4f64:   return VectorType::get(Type::getDoubleTy(Context), 4); 
+  case MVT::Metadata: return Type::getMetadataTy(Context);
+ }
+}
+
+/// getEVT - Return the value type corresponding to the specified type.  This
+/// returns all pointers as MVT::iPTR.  If HandleUnknown is true, unknown types
+/// are returned as Other, otherwise they are invalid.
+EVT EVT::getEVT(const Type *Ty, bool HandleUnknown){
+  switch (Ty->getTypeID()) {
+  default:
+    if (HandleUnknown) return MVT(MVT::Other);
+    llvm_unreachable("Unknown type!");
+    return MVT::isVoid;
+  case Type::VoidTyID:
+    return MVT::isVoid;
+  case Type::IntegerTyID:
+    return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
+  case Type::FloatTyID:     return MVT(MVT::f32);
+  case Type::DoubleTyID:    return MVT(MVT::f64);
+  case Type::X86_FP80TyID:  return MVT(MVT::f80);
+  case Type::X86_MMXTyID:   return MVT(MVT::x86mmx);
+  case Type::FP128TyID:     return MVT(MVT::f128);
+  case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
+  case Type::PointerTyID:   return MVT(MVT::iPTR);
+  case Type::VectorTyID: {
+    const VectorType *VTy = cast<VectorType>(Ty);
+    return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false),
+                       VTy->getNumElements());
+  }
+  }
+}
diff --git a/final/lib/VMCore/Verifier.cpp b/final/lib/VMCore/Verifier.cpp
new file mode 100644
index 00000000000..58ec6fe88d3
--- /dev/null
+++ b/final/lib/VMCore/Verifier.cpp
@@ -0,0 +1,1994 @@
+//===-- Verifier.cpp - Implement the Module Verifier -------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the function verifier interface, that can be used for some
+// sanity checking of input to the system.
+//
+// Note that this does not provide full `Java style' security and verifications,
+// instead it just tries to ensure that code is well-formed.
+//
+//  * Both of a binary operator's parameters are of the same type
+//  * Verify that the indices of mem access instructions match other operands
+//  * Verify that arithmetic and other things are only performed on first-class
+//    types.  Verify that shifts & logicals only happen on integrals f.e.
+//  * All of the constants in a switch statement are of the correct type
+//  * The code is in valid SSA form
+//  * It should be illegal to put a label into any other type (like a structure)
+//    or to return one. [except constant arrays!]
+//  * Only phi nodes can be self referential: 'add i32 %0, %0 ; <int>:0' is bad
+//  * PHI nodes must have an entry for each predecessor, with no extras.
+//  * PHI nodes must be the first thing in a basic block, all grouped together
+//  * PHI nodes must have at least one entry
+//  * All basic blocks should only end with terminator insts, not contain them
+//  * The entry node to a function must not have predecessors
+//  * All Instructions must be embedded into a basic block
+//  * Functions cannot take a void-typed parameter
+//  * Verify that a function's argument list agrees with it's declared type.
+//  * It is illegal to specify a name for a void value.
+//  * It is illegal to have a internal global value with no initializer
+//  * It is illegal to have a ret instruction that returns a value that does not
+//    agree with the function return value type.
+//  * Function call argument types match the function prototype
+//  * All other things that are tested by asserts spread about the code...
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/CallingConv.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/TypeSymbolTable.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdarg>
+using namespace llvm;
+
+namespace {  // Anonymous namespace for class
+  struct PreVerifier : public FunctionPass {
+    static char ID; // Pass ID, replacement for typeid
+
+    PreVerifier() : FunctionPass(ID) {
+      initializePreVerifierPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+
+    // Check that the prerequisites for successful DominatorTree construction
+    // are satisfied.
+    bool runOnFunction(Function &F) {
+      bool Broken = false;
+
+      for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
+        if (I->empty() || !I->back().isTerminator()) {
+          dbgs() << "Basic Block in function '" << F.getName() 
+                 << "' does not have terminator!\n";
+          WriteAsOperand(dbgs(), I, true);
+          dbgs() << "\n";
+          Broken = true;
+        }
+      }
+
+      if (Broken)
+        report_fatal_error("Broken module, no Basic Block terminator!");
+
+      return false;
+    }
+  };
+}
+
+char PreVerifier::ID = 0;
+INITIALIZE_PASS(PreVerifier, "preverify", "Preliminary module verification", 
+                false, false)
+static char &PreVerifyID = PreVerifier::ID;
+
+namespace {
+  class TypeSet : public AbstractTypeUser {
+  public:
+    TypeSet() {}
+
+    /// Insert a type into the set of types.
+    bool insert(const Type *Ty) {
+      if (!Types.insert(Ty))
+        return false;
+      if (Ty->isAbstract())
+        Ty->addAbstractTypeUser(this);
+      return true;
+    }
+
+    // Remove ourselves as abstract type listeners for any types that remain
+    // abstract when the TypeSet is destroyed.
+    ~TypeSet() {
+      for (SmallSetVector<const Type *, 16>::iterator I = Types.begin(),
+             E = Types.end(); I != E; ++I) {
+        const Type *Ty = *I;
+        if (Ty->isAbstract())
+          Ty->removeAbstractTypeUser(this);
+      }
+    }
+
+    // Abstract type user interface.
+
+    /// Remove types from the set when refined. Do not insert the type it was
+    /// refined to because that type hasn't been verified yet.
+    void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
+      Types.remove(OldTy);
+      OldTy->removeAbstractTypeUser(this);
+    }
+
+    /// Stop listening for changes to a type which is no longer abstract.
+    void typeBecameConcrete(const DerivedType *AbsTy) {
+      AbsTy->removeAbstractTypeUser(this);
+    }
+
+    void dump() const {}
+
+  private:
+    SmallSetVector<const Type *, 16> Types;
+
+    // Disallow copying.
+    TypeSet(const TypeSet &);
+    TypeSet &operator=(const TypeSet &);
+  };
+
+  struct Verifier : public FunctionPass, public InstVisitor<Verifier> {
+    static char ID; // Pass ID, replacement for typeid
+    bool Broken;          // Is this module found to be broken?
+    bool RealPass;        // Are we not being run by a PassManager?
+    VerifierFailureAction action;
+                          // What to do if verification fails.
+    Module *Mod;          // Module we are verifying right now
+    LLVMContext *Context; // Context within which we are verifying
+    DominatorTree *DT;    // Dominator Tree, caution can be null!
+
+    std::string Messages;
+    raw_string_ostream MessagesStr;
+
+    /// InstInThisBlock - when verifying a basic block, keep track of all of the
+    /// instructions we have seen so far.  This allows us to do efficient
+    /// dominance checks for the case when an instruction has an operand that is
+    /// an instruction in the same block.
+    SmallPtrSet<Instruction*, 16> InstsInThisBlock;
+
+    /// Types - keep track of the types that have been checked already.
+    TypeSet Types;
+
+    /// MDNodes - keep track of the metadata nodes that have been checked
+    /// already.
+    SmallPtrSet<MDNode *, 32> MDNodes;
+
+    Verifier()
+      : FunctionPass(ID), 
+      Broken(false), RealPass(true), action(AbortProcessAction),
+      Mod(0), Context(0), DT(0), MessagesStr(Messages) {
+        initializeVerifierPass(*PassRegistry::getPassRegistry());
+      }
+    explicit Verifier(VerifierFailureAction ctn)
+      : FunctionPass(ID), 
+      Broken(false), RealPass(true), action(ctn), Mod(0), Context(0), DT(0),
+      MessagesStr(Messages) {
+        initializeVerifierPass(*PassRegistry::getPassRegistry());
+      }
+
+    bool doInitialization(Module &M) {
+      Mod = &M;
+      Context = &M.getContext();
+      verifyTypeSymbolTable(M.getTypeSymbolTable());
+
+      // If this is a real pass, in a pass manager, we must abort before
+      // returning back to the pass manager, or else the pass manager may try to
+      // run other passes on the broken module.
+      if (RealPass)
+        return abortIfBroken();
+      return false;
+    }
+
+    bool runOnFunction(Function &F) {
+      // Get dominator information if we are being run by PassManager
+      if (RealPass) DT = &getAnalysis<DominatorTree>();
+
+      Mod = F.getParent();
+      if (!Context) Context = &F.getContext();
+
+      visit(F);
+      InstsInThisBlock.clear();
+
+      // If this is a real pass, in a pass manager, we must abort before
+      // returning back to the pass manager, or else the pass manager may try to
+      // run other passes on the broken module.
+      if (RealPass)
+        return abortIfBroken();
+
+      return false;
+    }
+
+    bool doFinalization(Module &M) {
+      // Scan through, checking all of the external function's linkage now...
+      for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+        visitGlobalValue(*I);
+
+        // Check to make sure function prototypes are okay.
+        if (I->isDeclaration()) visitFunction(*I);
+      }
+
+      for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 
+           I != E; ++I)
+        visitGlobalVariable(*I);
+
+      for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); 
+           I != E; ++I)
+        visitGlobalAlias(*I);
+
+      for (Module::named_metadata_iterator I = M.named_metadata_begin(),
+           E = M.named_metadata_end(); I != E; ++I)
+        visitNamedMDNode(*I);
+
+      // If the module is broken, abort at this time.
+      return abortIfBroken();
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequiredID(PreVerifyID);
+      if (RealPass)
+        AU.addRequired<DominatorTree>();
+    }
+
+    /// abortIfBroken - If the module is broken and we are supposed to abort on
+    /// this condition, do so.
+    ///
+    bool abortIfBroken() {
+      if (!Broken) return false;
+      MessagesStr << "Broken module found, ";
+      switch (action) {
+      default: llvm_unreachable("Unknown action");
+      case AbortProcessAction:
+        MessagesStr << "compilation aborted!\n";
+        dbgs() << MessagesStr.str();
+        // Client should choose different reaction if abort is not desired
+        abort();
+      case PrintMessageAction:
+        MessagesStr << "verification continues.\n";
+        dbgs() << MessagesStr.str();
+        return false;
+      case ReturnStatusAction:
+        MessagesStr << "compilation terminated.\n";
+        return true;
+      }
+    }
+
+
+    // Verification methods...
+    void verifyTypeSymbolTable(TypeSymbolTable &ST);
+    void visitGlobalValue(GlobalValue &GV);
+    void visitGlobalVariable(GlobalVariable &GV);
+    void visitGlobalAlias(GlobalAlias &GA);
+    void visitNamedMDNode(NamedMDNode &NMD);
+    void visitMDNode(MDNode &MD, Function *F);
+    void visitFunction(Function &F);
+    void visitBasicBlock(BasicBlock &BB);
+    using InstVisitor<Verifier>::visit;
+
+    void visit(Instruction &I);
+
+    void visitTruncInst(TruncInst &I);
+    void visitZExtInst(ZExtInst &I);
+    void visitSExtInst(SExtInst &I);
+    void visitFPTruncInst(FPTruncInst &I);
+    void visitFPExtInst(FPExtInst &I);
+    void visitFPToUIInst(FPToUIInst &I);
+    void visitFPToSIInst(FPToSIInst &I);
+    void visitUIToFPInst(UIToFPInst &I);
+    void visitSIToFPInst(SIToFPInst &I);
+    void visitIntToPtrInst(IntToPtrInst &I);
+    void visitPtrToIntInst(PtrToIntInst &I);
+    void visitBitCastInst(BitCastInst &I);
+    void visitPHINode(PHINode &PN);
+    void visitBinaryOperator(BinaryOperator &B);
+    void visitICmpInst(ICmpInst &IC);
+    void visitFCmpInst(FCmpInst &FC);
+    void visitExtractElementInst(ExtractElementInst &EI);
+    void visitInsertElementInst(InsertElementInst &EI);
+    void visitShuffleVectorInst(ShuffleVectorInst &EI);
+    void visitVAArgInst(VAArgInst &VAA) { visitInstruction(VAA); }
+    void visitCallInst(CallInst &CI);
+    void visitInvokeInst(InvokeInst &II);
+    void visitGetElementPtrInst(GetElementPtrInst &GEP);
+    void visitLoadInst(LoadInst &LI);
+    void visitStoreInst(StoreInst &SI);
+    void visitInstruction(Instruction &I);
+    void visitTerminatorInst(TerminatorInst &I);
+    void visitBranchInst(BranchInst &BI);
+    void visitReturnInst(ReturnInst &RI);
+    void visitSwitchInst(SwitchInst &SI);
+    void visitIndirectBrInst(IndirectBrInst &BI);
+    void visitSelectInst(SelectInst &SI);
+    void visitUserOp1(Instruction &I);
+    void visitUserOp2(Instruction &I) { visitUserOp1(I); }
+    void visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI);
+    void visitAllocaInst(AllocaInst &AI);
+    void visitExtractValueInst(ExtractValueInst &EVI);
+    void visitInsertValueInst(InsertValueInst &IVI);
+
+    void VerifyCallSite(CallSite CS);
+    bool PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
+                          int VT, unsigned ArgNo, std::string &Suffix);
+    void VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
+                                  unsigned RetNum, unsigned ParamNum, ...);
+    void VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
+                              bool isReturnValue, const Value *V);
+    void VerifyFunctionAttrs(const FunctionType *FT, const AttrListPtr &Attrs,
+                             const Value *V);
+    void VerifyType(const Type *Ty);
+
+    void WriteValue(const Value *V) {
+      if (!V) return;
+      if (isa<Instruction>(V)) {
+        MessagesStr << *V << '\n';
+      } else {
+        WriteAsOperand(MessagesStr, V, true, Mod);
+        MessagesStr << '\n';
+      }
+    }
+
+    void WriteType(const Type *T) {
+      if (!T) return;
+      MessagesStr << ' ';
+      WriteTypeSymbolic(MessagesStr, T, Mod);
+    }
+
+
+    // CheckFailed - A check failed, so print out the condition and the message
+    // that failed.  This provides a nice place to put a breakpoint if you want
+    // to see why something is not correct.
+    void CheckFailed(const Twine &Message,
+                     const Value *V1 = 0, const Value *V2 = 0,
+                     const Value *V3 = 0, const Value *V4 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteValue(V1);
+      WriteValue(V2);
+      WriteValue(V3);
+      WriteValue(V4);
+      Broken = true;
+    }
+
+    void CheckFailed(const Twine &Message, const Value *V1,
+                     const Type *T2, const Value *V3 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteValue(V1);
+      WriteType(T2);
+      WriteValue(V3);
+      Broken = true;
+    }
+
+    void CheckFailed(const Twine &Message, const Type *T1,
+                     const Type *T2 = 0, const Type *T3 = 0) {
+      MessagesStr << Message.str() << "\n";
+      WriteType(T1);
+      WriteType(T2);
+      WriteType(T3);
+      Broken = true;
+    }
+  };
+} // End anonymous namespace
+
+char Verifier::ID = 0;
+INITIALIZE_PASS_BEGIN(Verifier, "verify", "Module Verifier", false, false)
+INITIALIZE_PASS_DEPENDENCY(PreVerifier)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(Verifier, "verify", "Module Verifier", false, false)
+
+// Assert - We know that cond should be true, if not print an error message.
+#define Assert(C, M) \
+  do { if (!(C)) { CheckFailed(M); return; } } while (0)
+#define Assert1(C, M, V1) \
+  do { if (!(C)) { CheckFailed(M, V1); return; } } while (0)
+#define Assert2(C, M, V1, V2) \
+  do { if (!(C)) { CheckFailed(M, V1, V2); return; } } while (0)
+#define Assert3(C, M, V1, V2, V3) \
+  do { if (!(C)) { CheckFailed(M, V1, V2, V3); return; } } while (0)
+#define Assert4(C, M, V1, V2, V3, V4) \
+  do { if (!(C)) { CheckFailed(M, V1, V2, V3, V4); return; } } while (0)
+
+void Verifier::visit(Instruction &I) {
+  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
+    Assert1(I.getOperand(i) != 0, "Operand is null", &I);
+  InstVisitor<Verifier>::visit(I);
+}
+
+
+void Verifier::visitGlobalValue(GlobalValue &GV) {
+  Assert1(!GV.isDeclaration() ||
+          GV.isMaterializable() ||
+          GV.hasExternalLinkage() ||
+          GV.hasDLLImportLinkage() ||
+          GV.hasExternalWeakLinkage() ||
+          (isa<GlobalAlias>(GV) &&
+           (GV.hasLocalLinkage() || GV.hasWeakLinkage())),
+  "Global is external, but doesn't have external or dllimport or weak linkage!",
+          &GV);
+
+  Assert1(!GV.hasDLLImportLinkage() || GV.isDeclaration(),
+          "Global is marked as dllimport, but not external", &GV);
+
+  Assert1(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
+          "Only global variables can have appending linkage!", &GV);
+
+  if (GV.hasAppendingLinkage()) {
+    GlobalVariable *GVar = dyn_cast<GlobalVariable>(&GV);
+    Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(),
+            "Only global arrays can have appending linkage!", GVar);
+  }
+
+  Assert1(!GV.hasLinkerPrivateWeakDefAutoLinkage() || GV.hasDefaultVisibility(),
+          "linker_private_weak_def_auto can only have default visibility!",
+          &GV);
+}
+
+void Verifier::visitGlobalVariable(GlobalVariable &GV) {
+  if (GV.hasInitializer()) {
+    Assert1(GV.getInitializer()->getType() == GV.getType()->getElementType(),
+            "Global variable initializer type does not match global "
+            "variable type!", &GV);
+
+    // If the global has common linkage, it must have a zero initializer and
+    // cannot be constant.
+    if (GV.hasCommonLinkage()) {
+      Assert1(GV.getInitializer()->isNullValue(),
+              "'common' global must have a zero initializer!", &GV);
+      Assert1(!GV.isConstant(), "'common' global may not be marked constant!",
+              &GV);
+    }
+  } else {
+    Assert1(GV.hasExternalLinkage() || GV.hasDLLImportLinkage() ||
+            GV.hasExternalWeakLinkage(),
+            "invalid linkage type for global declaration", &GV);
+  }
+
+  visitGlobalValue(GV);
+}
+
+void Verifier::visitGlobalAlias(GlobalAlias &GA) {
+  Assert1(!GA.getName().empty(),
+          "Alias name cannot be empty!", &GA);
+  Assert1(GA.hasExternalLinkage() || GA.hasLocalLinkage() ||
+          GA.hasWeakLinkage(),
+          "Alias should have external or external weak linkage!", &GA);
+  Assert1(GA.getAliasee(),
+          "Aliasee cannot be NULL!", &GA);
+  Assert1(GA.getType() == GA.getAliasee()->getType(),
+          "Alias and aliasee types should match!", &GA);
+  Assert1(!GA.hasUnnamedAddr(), "Alias cannot have unnamed_addr!", &GA);
+
+  if (!isa<GlobalValue>(GA.getAliasee())) {
+    const ConstantExpr *CE = dyn_cast<ConstantExpr>(GA.getAliasee());
+    Assert1(CE && 
+            (CE->getOpcode() == Instruction::BitCast ||
+             CE->getOpcode() == Instruction::GetElementPtr) &&
+            isa<GlobalValue>(CE->getOperand(0)),
+            "Aliasee should be either GlobalValue or bitcast of GlobalValue",
+            &GA);
+  }
+
+  const GlobalValue* Aliasee = GA.resolveAliasedGlobal(/*stopOnWeak*/ false);
+  Assert1(Aliasee,
+          "Aliasing chain should end with function or global variable", &GA);
+
+  visitGlobalValue(GA);
+}
+
+void Verifier::visitNamedMDNode(NamedMDNode &NMD) {
+  for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) {
+    MDNode *MD = NMD.getOperand(i);
+    if (!MD)
+      continue;
+
+    Assert1(!MD->isFunctionLocal(),
+            "Named metadata operand cannot be function local!", MD);
+    visitMDNode(*MD, 0);
+  }
+}
+
+void Verifier::visitMDNode(MDNode &MD, Function *F) {
+  // Only visit each node once.  Metadata can be mutually recursive, so this
+  // avoids infinite recursion here, as well as being an optimization.
+  if (!MDNodes.insert(&MD))
+    return;
+
+  for (unsigned i = 0, e = MD.getNumOperands(); i != e; ++i) {
+    Value *Op = MD.getOperand(i);
+    if (!Op)
+      continue;
+    if (isa<Constant>(Op) || isa<MDString>(Op))
+      continue;
+    if (MDNode *N = dyn_cast<MDNode>(Op)) {
+      Assert2(MD.isFunctionLocal() || !N->isFunctionLocal(),
+              "Global metadata operand cannot be function local!", &MD, N);
+      visitMDNode(*N, F);
+      continue;
+    }
+    Assert2(MD.isFunctionLocal(), "Invalid operand for global metadata!", &MD, Op);
+
+    // If this was an instruction, bb, or argument, verify that it is in the
+    // function that we expect.
+    Function *ActualF = 0;
+    if (Instruction *I = dyn_cast<Instruction>(Op))
+      ActualF = I->getParent()->getParent();
+    else if (BasicBlock *BB = dyn_cast<BasicBlock>(Op))
+      ActualF = BB->getParent();
+    else if (Argument *A = dyn_cast<Argument>(Op))
+      ActualF = A->getParent();
+    assert(ActualF && "Unimplemented function local metadata case!");
+
+    Assert2(ActualF == F, "function-local metadata used in wrong function",
+            &MD, Op);
+  }
+}
+
+void Verifier::verifyTypeSymbolTable(TypeSymbolTable &ST) {
+  for (TypeSymbolTable::iterator I = ST.begin(), E = ST.end(); I != E; ++I)
+    VerifyType(I->second);
+}
+
+// VerifyParameterAttrs - Check the given attributes for an argument or return
+// value of the specified type.  The value V is printed in error messages.
+void Verifier::VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
+                                    bool isReturnValue, const Value *V) {
+  if (Attrs == Attribute::None)
+    return;
+
+  Attributes FnCheckAttr = Attrs & Attribute::FunctionOnly;
+  Assert1(!FnCheckAttr, "Attribute " + Attribute::getAsString(FnCheckAttr) +
+          " only applies to the function!", V);
+
+  if (isReturnValue) {
+    Attributes RetI = Attrs & Attribute::ParameterOnly;
+    Assert1(!RetI, "Attribute " + Attribute::getAsString(RetI) +
+            " does not apply to return values!", V);
+  }
+
+  for (unsigned i = 0;
+       i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
+    Attributes MutI = Attrs & Attribute::MutuallyIncompatible[i];
+    Assert1(!(MutI & (MutI - 1)), "Attributes " +
+            Attribute::getAsString(MutI) + " are incompatible!", V);
+  }
+
+  Attributes TypeI = Attrs & Attribute::typeIncompatible(Ty);
+  Assert1(!TypeI, "Wrong type for attribute " +
+          Attribute::getAsString(TypeI), V);
+
+  Attributes ByValI = Attrs & Attribute::ByVal;
+  if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
+    Assert1(!ByValI || PTy->getElementType()->isSized(),
+            "Attribute " + Attribute::getAsString(ByValI) +
+            " does not support unsized types!", V);
+  } else {
+    Assert1(!ByValI,
+            "Attribute " + Attribute::getAsString(ByValI) +
+            " only applies to parameters with pointer type!", V);
+  }
+}
+
+// VerifyFunctionAttrs - Check parameter attributes against a function type.
+// The value V is printed in error messages.
+void Verifier::VerifyFunctionAttrs(const FunctionType *FT,
+                                   const AttrListPtr &Attrs,
+                                   const Value *V) {
+  if (Attrs.isEmpty())
+    return;
+
+  bool SawNest = false;
+
+  for (unsigned i = 0, e = Attrs.getNumSlots(); i != e; ++i) {
+    const AttributeWithIndex &Attr = Attrs.getSlot(i);
+
+    const Type *Ty;
+    if (Attr.Index == 0)
+      Ty = FT->getReturnType();
+    else if (Attr.Index-1 < FT->getNumParams())
+      Ty = FT->getParamType(Attr.Index-1);
+    else
+      break;  // VarArgs attributes, verified elsewhere.
+
+    VerifyParameterAttrs(Attr.Attrs, Ty, Attr.Index == 0, V);
+
+    if (Attr.Attrs & Attribute::Nest) {
+      Assert1(!SawNest, "More than one parameter has attribute nest!", V);
+      SawNest = true;
+    }
+
+    if (Attr.Attrs & Attribute::StructRet)
+      Assert1(Attr.Index == 1, "Attribute sret not on first parameter!", V);
+  }
+
+  Attributes FAttrs = Attrs.getFnAttributes();
+  Attributes NotFn = FAttrs & (~Attribute::FunctionOnly);
+  Assert1(!NotFn, "Attribute " + Attribute::getAsString(NotFn) +
+          " does not apply to the function!", V);
+
+  for (unsigned i = 0;
+       i < array_lengthof(Attribute::MutuallyIncompatible); ++i) {
+    Attributes MutI = FAttrs & Attribute::MutuallyIncompatible[i];
+    Assert1(!(MutI & (MutI - 1)), "Attributes " +
+            Attribute::getAsString(MutI) + " are incompatible!", V);
+  }
+}
+
+static bool VerifyAttributeCount(const AttrListPtr &Attrs, unsigned Params) {
+  if (Attrs.isEmpty())
+    return true;
+
+  unsigned LastSlot = Attrs.getNumSlots() - 1;
+  unsigned LastIndex = Attrs.getSlot(LastSlot).Index;
+  if (LastIndex <= Params
+      || (LastIndex == (unsigned)~0
+          && (LastSlot == 0 || Attrs.getSlot(LastSlot - 1).Index <= Params)))  
+    return true;
+
+  return false;
+}
+
+// visitFunction - Verify that a function is ok.
+//
+void Verifier::visitFunction(Function &F) {
+  // Check function arguments.
+  const FunctionType *FT = F.getFunctionType();
+  unsigned NumArgs = F.arg_size();
+
+  Assert1(Context == &F.getContext(),
+          "Function context does not match Module context!", &F);
+
+  Assert1(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
+  Assert2(FT->getNumParams() == NumArgs,
+          "# formal arguments must match # of arguments for function type!",
+          &F, FT);
+  Assert1(F.getReturnType()->isFirstClassType() ||
+          F.getReturnType()->isVoidTy() || 
+          F.getReturnType()->isStructTy(),
+          "Functions cannot return aggregate values!", &F);
+
+  Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
+          "Invalid struct return type!", &F);
+
+  const AttrListPtr &Attrs = F.getAttributes();
+
+  Assert1(VerifyAttributeCount(Attrs, FT->getNumParams()),
+          "Attributes after last parameter!", &F);
+
+  // Check function attributes.
+  VerifyFunctionAttrs(FT, Attrs, &F);
+
+  // Check that this function meets the restrictions on this calling convention.
+  switch (F.getCallingConv()) {
+  default:
+    break;
+  case CallingConv::C:
+    break;
+  case CallingConv::Fast:
+  case CallingConv::Cold:
+  case CallingConv::X86_FastCall:
+  case CallingConv::X86_ThisCall:
+  case CallingConv::PTX_Kernel:
+  case CallingConv::PTX_Device:
+    Assert1(!F.isVarArg(),
+            "Varargs functions must have C calling conventions!", &F);
+    break;
+  }
+
+  bool isLLVMdotName = F.getName().size() >= 5 &&
+                       F.getName().substr(0, 5) == "llvm.";
+
+  // Check that the argument values match the function type for this function...
+  unsigned i = 0;
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
+       I != E; ++I, ++i) {
+    Assert2(I->getType() == FT->getParamType(i),
+            "Argument value does not match function argument type!",
+            I, FT->getParamType(i));
+    Assert1(I->getType()->isFirstClassType(),
+            "Function arguments must have first-class types!", I);
+    if (!isLLVMdotName)
+      Assert2(!I->getType()->isMetadataTy(),
+              "Function takes metadata but isn't an intrinsic", I, &F);
+  }
+
+  if (F.isMaterializable()) {
+    // Function has a body somewhere we can't see.
+  } else if (F.isDeclaration()) {
+    Assert1(F.hasExternalLinkage() || F.hasDLLImportLinkage() ||
+            F.hasExternalWeakLinkage(),
+            "invalid linkage type for function declaration", &F);
+  } else {
+    // Verify that this function (which has a body) is not named "llvm.*".  It
+    // is not legal to define intrinsics.
+    Assert1(!isLLVMdotName, "llvm intrinsics cannot be defined!", &F);
+    
+    // Check the entry node
+    BasicBlock *Entry = &F.getEntryBlock();
+    Assert1(pred_begin(Entry) == pred_end(Entry),
+            "Entry block to function must not have predecessors!", Entry);
+    
+    // The address of the entry block cannot be taken, unless it is dead.
+    if (Entry->hasAddressTaken()) {
+      Assert1(!BlockAddress::get(Entry)->isConstantUsed(),
+              "blockaddress may not be used with the entry block!", Entry);
+    }
+  }
+ 
+  // If this function is actually an intrinsic, verify that it is only used in
+  // direct call/invokes, never having its "address taken".
+  if (F.getIntrinsicID()) {
+    const User *U;
+    if (F.hasAddressTaken(&U))
+      Assert1(0, "Invalid user of intrinsic instruction!", U); 
+  }
+}
+
+// verifyBasicBlock - Verify that a basic block is well formed...
+//
+void Verifier::visitBasicBlock(BasicBlock &BB) {
+  InstsInThisBlock.clear();
+
+  // Ensure that basic blocks have terminators!
+  Assert1(BB.getTerminator(), "Basic Block does not have terminator!", &BB);
+
+  // Check constraints that this basic block imposes on all of the PHI nodes in
+  // it.
+  if (isa<PHINode>(BB.front())) {
+    SmallVector<BasicBlock*, 8> Preds(pred_begin(&BB), pred_end(&BB));
+    SmallVector<std::pair<BasicBlock*, Value*>, 8> Values;
+    std::sort(Preds.begin(), Preds.end());
+    PHINode *PN;
+    for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I));++I) {
+      // Ensure that PHI nodes have at least one entry!
+      Assert1(PN->getNumIncomingValues() != 0,
+              "PHI nodes must have at least one entry.  If the block is dead, "
+              "the PHI should be removed!", PN);
+      Assert1(PN->getNumIncomingValues() == Preds.size(),
+              "PHINode should have one entry for each predecessor of its "
+              "parent basic block!", PN);
+
+      // Get and sort all incoming values in the PHI node...
+      Values.clear();
+      Values.reserve(PN->getNumIncomingValues());
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+        Values.push_back(std::make_pair(PN->getIncomingBlock(i),
+                                        PN->getIncomingValue(i)));
+      std::sort(Values.begin(), Values.end());
+
+      for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+        // Check to make sure that if there is more than one entry for a
+        // particular basic block in this PHI node, that the incoming values are
+        // all identical.
+        //
+        Assert4(i == 0 || Values[i].first  != Values[i-1].first ||
+                Values[i].second == Values[i-1].second,
+                "PHI node has multiple entries for the same basic block with "
+                "different incoming values!", PN, Values[i].first,
+                Values[i].second, Values[i-1].second);
+
+        // Check to make sure that the predecessors and PHI node entries are
+        // matched up.
+        Assert3(Values[i].first == Preds[i],
+                "PHI node entries do not match predecessors!", PN,
+                Values[i].first, Preds[i]);
+      }
+    }
+  }
+}
+
+void Verifier::visitTerminatorInst(TerminatorInst &I) {
+  // Ensure that terminators only exist at the end of the basic block.
+  Assert1(&I == I.getParent()->getTerminator(),
+          "Terminator found in the middle of a basic block!", I.getParent());
+  visitInstruction(I);
+}
+
+void Verifier::visitBranchInst(BranchInst &BI) {
+  if (BI.isConditional()) {
+    Assert2(BI.getCondition()->getType()->isIntegerTy(1),
+            "Branch condition is not 'i1' type!", &BI, BI.getCondition());
+  }
+  visitTerminatorInst(BI);
+}
+
+void Verifier::visitReturnInst(ReturnInst &RI) {
+  Function *F = RI.getParent()->getParent();
+  unsigned N = RI.getNumOperands();
+  if (F->getReturnType()->isVoidTy()) 
+    Assert2(N == 0,
+            "Found return instr that returns non-void in Function of void "
+            "return type!", &RI, F->getReturnType());
+  else if (N == 1 && F->getReturnType() == RI.getOperand(0)->getType()) {
+    // Exactly one return value and it matches the return type. Good.
+  } else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+    // The return type is a struct; check for multiple return values.
+    Assert2(STy->getNumElements() == N,
+            "Incorrect number of return values in ret instruction!",
+            &RI, F->getReturnType());
+    for (unsigned i = 0; i != N; ++i)
+      Assert2(STy->getElementType(i) == RI.getOperand(i)->getType(),
+              "Function return type does not match operand "
+              "type of return inst!", &RI, F->getReturnType());
+  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(F->getReturnType())) {
+    // The return type is an array; check for multiple return values.
+    Assert2(ATy->getNumElements() == N,
+            "Incorrect number of return values in ret instruction!",
+            &RI, F->getReturnType());
+    for (unsigned i = 0; i != N; ++i)
+      Assert2(ATy->getElementType() == RI.getOperand(i)->getType(),
+              "Function return type does not match operand "
+              "type of return inst!", &RI, F->getReturnType());
+  } else {
+    CheckFailed("Function return type does not match operand "
+                "type of return inst!", &RI, F->getReturnType());
+  }
+
+  // Check to make sure that the return value has necessary properties for
+  // terminators...
+  visitTerminatorInst(RI);
+}
+
+void Verifier::visitSwitchInst(SwitchInst &SI) {
+  // Check to make sure that all of the constants in the switch instruction
+  // have the same type as the switched-on value.
+  const Type *SwitchTy = SI.getCondition()->getType();
+  SmallPtrSet<ConstantInt*, 32> Constants;
+  for (unsigned i = 1, e = SI.getNumCases(); i != e; ++i) {
+    Assert1(SI.getCaseValue(i)->getType() == SwitchTy,
+            "Switch constants must all be same type as switch value!", &SI);
+    Assert2(Constants.insert(SI.getCaseValue(i)),
+            "Duplicate integer as switch case", &SI, SI.getCaseValue(i));
+  }
+
+  visitTerminatorInst(SI);
+}
+
+void Verifier::visitIndirectBrInst(IndirectBrInst &BI) {
+  Assert1(BI.getAddress()->getType()->isPointerTy(),
+          "Indirectbr operand must have pointer type!", &BI);
+  for (unsigned i = 0, e = BI.getNumDestinations(); i != e; ++i)
+    Assert1(BI.getDestination(i)->getType()->isLabelTy(),
+            "Indirectbr destinations must all have pointer type!", &BI);
+
+  visitTerminatorInst(BI);
+}
+
+void Verifier::visitSelectInst(SelectInst &SI) {
+  Assert1(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1),
+                                          SI.getOperand(2)),
+          "Invalid operands for select instruction!", &SI);
+
+  Assert1(SI.getTrueValue()->getType() == SI.getType(),
+          "Select values must have same type as select instruction!", &SI);
+  visitInstruction(SI);
+}
+
+/// visitUserOp1 - User defined operators shouldn't live beyond the lifetime of
+/// a pass, if any exist, it's an error.
+///
+void Verifier::visitUserOp1(Instruction &I) {
+  Assert1(0, "User-defined operators should not live outside of a pass!", &I);
+}
+
+void Verifier::visitTruncInst(TruncInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+  Assert1(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "trunc source and destination must both be a vector or neither", &I);
+  Assert1(SrcBitSize > DestBitSize,"DestTy too big for Trunc", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitZExtInst(ZExtInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  // Get the size of the types in bits, we'll need this later
+  Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "zext source and destination must both be a vector or neither", &I);
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+  Assert1(SrcBitSize < DestBitSize,"Type too small for ZExt", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitSExtInst(SExtInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+  Assert1(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "sext source and destination must both be a vector or neither", &I);
+  Assert1(SrcBitSize < DestBitSize,"Type too small for SExt", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitFPTruncInst(FPTruncInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+  Assert1(SrcTy->isFPOrFPVectorTy(),"FPTrunc only operates on FP", &I);
+  Assert1(DestTy->isFPOrFPVectorTy(),"FPTrunc only produces an FP", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "fptrunc source and destination must both be a vector or neither",&I);
+  Assert1(SrcBitSize > DestBitSize,"DestTy too big for FPTrunc", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitFPExtInst(FPExtInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+  unsigned DestBitSize = DestTy->getScalarSizeInBits();
+
+  Assert1(SrcTy->isFPOrFPVectorTy(),"FPExt only operates on FP", &I);
+  Assert1(DestTy->isFPOrFPVectorTy(),"FPExt only produces an FP", &I);
+  Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
+          "fpext source and destination must both be a vector or neither", &I);
+  Assert1(SrcBitSize < DestBitSize,"DestTy too small for FPExt", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitUIToFPInst(UIToFPInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  bool SrcVec = SrcTy->isVectorTy();
+  bool DstVec = DestTy->isVectorTy();
+
+  Assert1(SrcVec == DstVec,
+          "UIToFP source and dest must both be vector or scalar", &I);
+  Assert1(SrcTy->isIntOrIntVectorTy(),
+          "UIToFP source must be integer or integer vector", &I);
+  Assert1(DestTy->isFPOrFPVectorTy(),
+          "UIToFP result must be FP or FP vector", &I);
+
+  if (SrcVec && DstVec)
+    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+            cast<VectorType>(DestTy)->getNumElements(),
+            "UIToFP source and dest vector length mismatch", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitSIToFPInst(SIToFPInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  bool SrcVec = SrcTy->isVectorTy();
+  bool DstVec = DestTy->isVectorTy();
+
+  Assert1(SrcVec == DstVec,
+          "SIToFP source and dest must both be vector or scalar", &I);
+  Assert1(SrcTy->isIntOrIntVectorTy(),
+          "SIToFP source must be integer or integer vector", &I);
+  Assert1(DestTy->isFPOrFPVectorTy(),
+          "SIToFP result must be FP or FP vector", &I);
+
+  if (SrcVec && DstVec)
+    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+            cast<VectorType>(DestTy)->getNumElements(),
+            "SIToFP source and dest vector length mismatch", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitFPToUIInst(FPToUIInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  bool SrcVec = SrcTy->isVectorTy();
+  bool DstVec = DestTy->isVectorTy();
+
+  Assert1(SrcVec == DstVec,
+          "FPToUI source and dest must both be vector or scalar", &I);
+  Assert1(SrcTy->isFPOrFPVectorTy(), "FPToUI source must be FP or FP vector",
+          &I);
+  Assert1(DestTy->isIntOrIntVectorTy(),
+          "FPToUI result must be integer or integer vector", &I);
+
+  if (SrcVec && DstVec)
+    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+            cast<VectorType>(DestTy)->getNumElements(),
+            "FPToUI source and dest vector length mismatch", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitFPToSIInst(FPToSIInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  bool SrcVec = SrcTy->isVectorTy();
+  bool DstVec = DestTy->isVectorTy();
+
+  Assert1(SrcVec == DstVec,
+          "FPToSI source and dest must both be vector or scalar", &I);
+  Assert1(SrcTy->isFPOrFPVectorTy(),
+          "FPToSI source must be FP or FP vector", &I);
+  Assert1(DestTy->isIntOrIntVectorTy(),
+          "FPToSI result must be integer or integer vector", &I);
+
+  if (SrcVec && DstVec)
+    Assert1(cast<VectorType>(SrcTy)->getNumElements() ==
+            cast<VectorType>(DestTy)->getNumElements(),
+            "FPToSI source and dest vector length mismatch", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  Assert1(SrcTy->isPointerTy(), "PtrToInt source must be pointer", &I);
+  Assert1(DestTy->isIntegerTy(), "PtrToInt result must be integral", &I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  Assert1(SrcTy->isIntegerTy(), "IntToPtr source must be an integral", &I);
+  Assert1(DestTy->isPointerTy(), "IntToPtr result must be a pointer",&I);
+
+  visitInstruction(I);
+}
+
+void Verifier::visitBitCastInst(BitCastInst &I) {
+  // Get the source and destination types
+  const Type *SrcTy = I.getOperand(0)->getType();
+  const Type *DestTy = I.getType();
+
+  // Get the size of the types in bits, we'll need this later
+  unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+  unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+
+  // BitCast implies a no-op cast of type only. No bits change.
+  // However, you can't cast pointers to anything but pointers.
+  Assert1(DestTy->isPointerTy() == DestTy->isPointerTy(),
+          "Bitcast requires both operands to be pointer or neither", &I);
+  Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I);
+
+  // Disallow aggregates.
+  Assert1(!SrcTy->isAggregateType(),
+          "Bitcast operand must not be aggregate", &I);
+  Assert1(!DestTy->isAggregateType(),
+          "Bitcast type must not be aggregate", &I);
+
+  visitInstruction(I);
+}
+
+/// visitPHINode - Ensure that a PHI node is well formed.
+///
+void Verifier::visitPHINode(PHINode &PN) {
+  // Ensure that the PHI nodes are all grouped together at the top of the block.
+  // This can be tested by checking whether the instruction before this is
+  // either nonexistent (because this is begin()) or is a PHI node.  If not,
+  // then there is some other instruction before a PHI.
+  Assert2(&PN == &PN.getParent()->front() || 
+          isa<PHINode>(--BasicBlock::iterator(&PN)),
+          "PHI nodes not grouped at top of basic block!",
+          &PN, PN.getParent());
+
+  // Check that all of the values of the PHI node have the same type as the
+  // result, and that the incoming blocks are really basic blocks.
+  for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+    Assert1(PN.getType() == PN.getIncomingValue(i)->getType(),
+            "PHI node operands are not the same type as the result!", &PN);
+    Assert1(isa<BasicBlock>(PN.getOperand(
+                PHINode::getOperandNumForIncomingBlock(i))),
+            "PHI node incoming block is not a BasicBlock!", &PN);
+  }
+
+  // All other PHI node constraints are checked in the visitBasicBlock method.
+
+  visitInstruction(PN);
+}
+
+void Verifier::VerifyCallSite(CallSite CS) {
+  Instruction *I = CS.getInstruction();
+
+  Assert1(CS.getCalledValue()->getType()->isPointerTy(),
+          "Called function must be a pointer!", I);
+  const PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
+
+  Assert1(FPTy->getElementType()->isFunctionTy(),
+          "Called function is not pointer to function type!", I);
+  const FunctionType *FTy = cast<FunctionType>(FPTy->getElementType());
+
+  // Verify that the correct number of arguments are being passed
+  if (FTy->isVarArg())
+    Assert1(CS.arg_size() >= FTy->getNumParams(),
+            "Called function requires more parameters than were provided!",I);
+  else
+    Assert1(CS.arg_size() == FTy->getNumParams(),
+            "Incorrect number of arguments passed to called function!", I);
+
+  // Verify that all arguments to the call match the function type.
+  for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
+    Assert3(CS.getArgument(i)->getType() == FTy->getParamType(i),
+            "Call parameter type does not match function signature!",
+            CS.getArgument(i), FTy->getParamType(i), I);
+
+  const AttrListPtr &Attrs = CS.getAttributes();
+
+  Assert1(VerifyAttributeCount(Attrs, CS.arg_size()),
+          "Attributes after last parameter!", I);
+
+  // Verify call attributes.
+  VerifyFunctionAttrs(FTy, Attrs, I);
+
+  if (FTy->isVarArg())
+    // Check attributes on the varargs part.
+    for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) {
+      Attributes Attr = Attrs.getParamAttributes(Idx);
+
+      VerifyParameterAttrs(Attr, CS.getArgument(Idx-1)->getType(), false, I);
+
+      Attributes VArgI = Attr & Attribute::VarArgsIncompatible;
+      Assert1(!VArgI, "Attribute " + Attribute::getAsString(VArgI) +
+              " cannot be used for vararg call arguments!", I);
+    }
+
+  // Verify that there's no metadata unless it's a direct call to an intrinsic.
+  if (!CS.getCalledFunction() ||
+      !CS.getCalledFunction()->getName().startswith("llvm.")) {
+    for (FunctionType::param_iterator PI = FTy->param_begin(),
+           PE = FTy->param_end(); PI != PE; ++PI)
+      Assert1(!PI->get()->isMetadataTy(),
+              "Function has metadata parameter but isn't an intrinsic", I);
+  }
+
+  visitInstruction(*I);
+}
+
+void Verifier::visitCallInst(CallInst &CI) {
+  VerifyCallSite(&CI);
+
+  if (Function *F = CI.getCalledFunction())
+    if (Intrinsic::ID ID = (Intrinsic::ID)F->getIntrinsicID())
+      visitIntrinsicFunctionCall(ID, CI);
+}
+
+void Verifier::visitInvokeInst(InvokeInst &II) {
+  VerifyCallSite(&II);
+  visitTerminatorInst(II);
+}
+
+/// visitBinaryOperator - Check that both arguments to the binary operator are
+/// of the same type!
+///
+void Verifier::visitBinaryOperator(BinaryOperator &B) {
+  Assert1(B.getOperand(0)->getType() == B.getOperand(1)->getType(),
+          "Both operands to a binary operator are not of the same type!", &B);
+
+  switch (B.getOpcode()) {
+  // Check that integer arithmetic operators are only used with
+  // integral operands.
+  case Instruction::Add:
+  case Instruction::Sub:
+  case Instruction::Mul:
+  case Instruction::SDiv:
+  case Instruction::UDiv:
+  case Instruction::SRem:
+  case Instruction::URem:
+    Assert1(B.getType()->isIntOrIntVectorTy(),
+            "Integer arithmetic operators only work with integral types!", &B);
+    Assert1(B.getType() == B.getOperand(0)->getType(),
+            "Integer arithmetic operators must have same type "
+            "for operands and result!", &B);
+    break;
+  // Check that floating-point arithmetic operators are only used with
+  // floating-point operands.
+  case Instruction::FAdd:
+  case Instruction::FSub:
+  case Instruction::FMul:
+  case Instruction::FDiv:
+  case Instruction::FRem:
+    Assert1(B.getType()->isFPOrFPVectorTy(),
+            "Floating-point arithmetic operators only work with "
+            "floating-point types!", &B);
+    Assert1(B.getType() == B.getOperand(0)->getType(),
+            "Floating-point arithmetic operators must have same type "
+            "for operands and result!", &B);
+    break;
+  // Check that logical operators are only used with integral operands.
+  case Instruction::And:
+  case Instruction::Or:
+  case Instruction::Xor:
+    Assert1(B.getType()->isIntOrIntVectorTy(),
+            "Logical operators only work with integral types!", &B);
+    Assert1(B.getType() == B.getOperand(0)->getType(),
+            "Logical operators must have same type for operands and result!",
+            &B);
+    break;
+  case Instruction::Shl:
+  case Instruction::LShr:
+  case Instruction::AShr:
+    Assert1(B.getType()->isIntOrIntVectorTy(),
+            "Shifts only work with integral types!", &B);
+    Assert1(B.getType() == B.getOperand(0)->getType(),
+            "Shift return type must be same as operands!", &B);
+    break;
+  default:
+    llvm_unreachable("Unknown BinaryOperator opcode!");
+  }
+
+  visitInstruction(B);
+}
+
+void Verifier::visitICmpInst(ICmpInst &IC) {
+  // Check that the operands are the same type
+  const Type *Op0Ty = IC.getOperand(0)->getType();
+  const Type *Op1Ty = IC.getOperand(1)->getType();
+  Assert1(Op0Ty == Op1Ty,
+          "Both operands to ICmp instruction are not of the same type!", &IC);
+  // Check that the operands are the right type
+  Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPointerTy(),
+          "Invalid operand types for ICmp instruction", &IC);
+  // Check that the predicate is valid.
+  Assert1(IC.getPredicate() >= CmpInst::FIRST_ICMP_PREDICATE &&
+          IC.getPredicate() <= CmpInst::LAST_ICMP_PREDICATE,
+          "Invalid predicate in ICmp instruction!", &IC);
+
+  visitInstruction(IC);
+}
+
+void Verifier::visitFCmpInst(FCmpInst &FC) {
+  // Check that the operands are the same type
+  const Type *Op0Ty = FC.getOperand(0)->getType();
+  const Type *Op1Ty = FC.getOperand(1)->getType();
+  Assert1(Op0Ty == Op1Ty,
+          "Both operands to FCmp instruction are not of the same type!", &FC);
+  // Check that the operands are the right type
+  Assert1(Op0Ty->isFPOrFPVectorTy(),
+          "Invalid operand types for FCmp instruction", &FC);
+  // Check that the predicate is valid.
+  Assert1(FC.getPredicate() >= CmpInst::FIRST_FCMP_PREDICATE &&
+          FC.getPredicate() <= CmpInst::LAST_FCMP_PREDICATE,
+          "Invalid predicate in FCmp instruction!", &FC);
+
+  visitInstruction(FC);
+}
+
+void Verifier::visitExtractElementInst(ExtractElementInst &EI) {
+  Assert1(ExtractElementInst::isValidOperands(EI.getOperand(0),
+                                              EI.getOperand(1)),
+          "Invalid extractelement operands!", &EI);
+  visitInstruction(EI);
+}
+
+void Verifier::visitInsertElementInst(InsertElementInst &IE) {
+  Assert1(InsertElementInst::isValidOperands(IE.getOperand(0),
+                                             IE.getOperand(1),
+                                             IE.getOperand(2)),
+          "Invalid insertelement operands!", &IE);
+  visitInstruction(IE);
+}
+
+void Verifier::visitShuffleVectorInst(ShuffleVectorInst &SV) {
+  Assert1(ShuffleVectorInst::isValidOperands(SV.getOperand(0), SV.getOperand(1),
+                                             SV.getOperand(2)),
+          "Invalid shufflevector operands!", &SV);
+  visitInstruction(SV);
+}
+
+void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
+  SmallVector<Value*, 16> Idxs(GEP.idx_begin(), GEP.idx_end());
+  const Type *ElTy =
+    GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(),
+                                      Idxs.begin(), Idxs.end());
+  Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
+  Assert2(GEP.getType()->isPointerTy() &&
+          cast<PointerType>(GEP.getType())->getElementType() == ElTy,
+          "GEP is not of right type for indices!", &GEP, ElTy);
+  visitInstruction(GEP);
+}
+
+void Verifier::visitLoadInst(LoadInst &LI) {
+  const PointerType *PTy = dyn_cast<PointerType>(LI.getOperand(0)->getType());
+  Assert1(PTy, "Load operand must be a pointer.", &LI);
+  const Type *ElTy = PTy->getElementType();
+  Assert2(ElTy == LI.getType(),
+          "Load result type does not match pointer operand type!", &LI, ElTy);
+  visitInstruction(LI);
+}
+
+void Verifier::visitStoreInst(StoreInst &SI) {
+  const PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType());
+  Assert1(PTy, "Store operand must be a pointer.", &SI);
+  const Type *ElTy = PTy->getElementType();
+  Assert2(ElTy == SI.getOperand(0)->getType(),
+          "Stored value type does not match pointer operand type!",
+          &SI, ElTy);
+  visitInstruction(SI);
+}
+
+void Verifier::visitAllocaInst(AllocaInst &AI) {
+  const PointerType *PTy = AI.getType();
+  Assert1(PTy->getAddressSpace() == 0, 
+          "Allocation instruction pointer not in the generic address space!",
+          &AI);
+  Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type",
+          &AI);
+  Assert1(AI.getArraySize()->getType()->isIntegerTy(),
+          "Alloca array size must have integer type", &AI);
+  visitInstruction(AI);
+}
+
+void Verifier::visitExtractValueInst(ExtractValueInst &EVI) {
+  Assert1(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(),
+                                           EVI.idx_begin(), EVI.idx_end()) ==
+          EVI.getType(),
+          "Invalid ExtractValueInst operands!", &EVI);
+  
+  visitInstruction(EVI);
+}
+
+void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
+  Assert1(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(),
+                                           IVI.idx_begin(), IVI.idx_end()) ==
+          IVI.getOperand(1)->getType(),
+          "Invalid InsertValueInst operands!", &IVI);
+  
+  visitInstruction(IVI);
+}
+
+/// verifyInstruction - Verify that an instruction is well formed.
+///
+void Verifier::visitInstruction(Instruction &I) {
+  BasicBlock *BB = I.getParent();
+  Assert1(BB, "Instruction not embedded in basic block!", &I);
+
+  if (!isa<PHINode>(I)) {   // Check that non-phi nodes are not self referential
+    for (Value::use_iterator UI = I.use_begin(), UE = I.use_end();
+         UI != UE; ++UI)
+      Assert1(*UI != (User*)&I || !DT->isReachableFromEntry(BB),
+              "Only PHI nodes may reference their own value!", &I);
+  }
+
+  // Check that void typed values don't have names
+  Assert1(!I.getType()->isVoidTy() || !I.hasName(),
+          "Instruction has a name, but provides a void value!", &I);
+
+  // Check that the return value of the instruction is either void or a legal
+  // value type.
+  Assert1(I.getType()->isVoidTy() || 
+          I.getType()->isFirstClassType(),
+          "Instruction returns a non-scalar type!", &I);
+
+  // Check that the instruction doesn't produce metadata. Calls are already
+  // checked against the callee type.
+  Assert1(!I.getType()->isMetadataTy() ||
+          isa<CallInst>(I) || isa<InvokeInst>(I),
+          "Invalid use of metadata!", &I);
+
+  // Check that all uses of the instruction, if they are instructions
+  // themselves, actually have parent basic blocks.  If the use is not an
+  // instruction, it is an error!
+  for (User::use_iterator UI = I.use_begin(), UE = I.use_end();
+       UI != UE; ++UI) {
+    if (Instruction *Used = dyn_cast<Instruction>(*UI))
+      Assert2(Used->getParent() != 0, "Instruction referencing instruction not"
+              " embedded in a basic block!", &I, Used);
+    else {
+      CheckFailed("Use of instruction is not an instruction!", *UI);
+      return;
+    }
+  }
+
+  for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+    Assert1(I.getOperand(i) != 0, "Instruction has null operand!", &I);
+
+    // Check to make sure that only first-class-values are operands to
+    // instructions.
+    if (!I.getOperand(i)->getType()->isFirstClassType()) {
+      Assert1(0, "Instruction operands must be first-class values!", &I);
+    }
+
+    if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
+      // Check to make sure that the "address of" an intrinsic function is never
+      // taken.
+      Assert1(!F->isIntrinsic() || (i + 1 == e && isa<CallInst>(I)),
+              "Cannot take the address of an intrinsic!", &I);
+      Assert1(F->getParent() == Mod, "Referencing function in another module!",
+              &I);
+    } else if (BasicBlock *OpBB = dyn_cast<BasicBlock>(I.getOperand(i))) {
+      Assert1(OpBB->getParent() == BB->getParent(),
+              "Referring to a basic block in another function!", &I);
+    } else if (Argument *OpArg = dyn_cast<Argument>(I.getOperand(i))) {
+      Assert1(OpArg->getParent() == BB->getParent(),
+              "Referring to an argument in another function!", &I);
+    } else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(i))) {
+      Assert1(GV->getParent() == Mod, "Referencing global in another module!",
+              &I);
+    } else if (Instruction *Op = dyn_cast<Instruction>(I.getOperand(i))) {
+      BasicBlock *OpBlock = Op->getParent();
+
+      // Check that a definition dominates all of its uses.
+      if (InvokeInst *II = dyn_cast<InvokeInst>(Op)) {
+        // Invoke results are only usable in the normal destination, not in the
+        // exceptional destination.
+        BasicBlock *NormalDest = II->getNormalDest();
+
+        Assert2(NormalDest != II->getUnwindDest(),
+                "No uses of invoke possible due to dominance structure!",
+                Op, &I);
+
+        // PHI nodes differ from other nodes because they actually "use" the
+        // value in the predecessor basic blocks they correspond to.
+        BasicBlock *UseBlock = BB;
+        if (isa<PHINode>(I))
+          UseBlock = dyn_cast<BasicBlock>(I.getOperand(i+1));
+        Assert2(UseBlock, "Invoke operand is PHI node with bad incoming-BB",
+                Op, &I);
+
+        if (isa<PHINode>(I) && UseBlock == OpBlock) {
+          // Special case of a phi node in the normal destination or the unwind
+          // destination.
+          Assert2(BB == NormalDest || !DT->isReachableFromEntry(UseBlock),
+                  "Invoke result not available in the unwind destination!",
+                  Op, &I);
+        } else {
+          Assert2(DT->dominates(NormalDest, UseBlock) ||
+                  !DT->isReachableFromEntry(UseBlock),
+                  "Invoke result does not dominate all uses!", Op, &I);
+
+          // If the normal successor of an invoke instruction has multiple
+          // predecessors, then the normal edge from the invoke is critical,
+          // so the invoke value can only be live if the destination block
+          // dominates all of it's predecessors (other than the invoke).
+          if (!NormalDest->getSinglePredecessor() &&
+              DT->isReachableFromEntry(UseBlock))
+            // If it is used by something non-phi, then the other case is that
+            // 'NormalDest' dominates all of its predecessors other than the
+            // invoke.  In this case, the invoke value can still be used.
+            for (pred_iterator PI = pred_begin(NormalDest),
+                 E = pred_end(NormalDest); PI != E; ++PI)
+              if (*PI != II->getParent() && !DT->dominates(NormalDest, *PI) &&
+                  DT->isReachableFromEntry(*PI)) {
+                CheckFailed("Invoke result does not dominate all uses!", Op,&I);
+                return;
+              }
+        }
+      } else if (isa<PHINode>(I)) {
+        // PHI nodes are more difficult than other nodes because they actually
+        // "use" the value in the predecessor basic blocks they correspond to.
+        BasicBlock *PredBB = dyn_cast<BasicBlock>(I.getOperand(i+1));
+        Assert2(PredBB && (DT->dominates(OpBlock, PredBB) ||
+                           !DT->isReachableFromEntry(PredBB)),
+                "Instruction does not dominate all uses!", Op, &I);
+      } else {
+        if (OpBlock == BB) {
+          // If they are in the same basic block, make sure that the definition
+          // comes before the use.
+          Assert2(InstsInThisBlock.count(Op) || !DT->isReachableFromEntry(BB),
+                  "Instruction does not dominate all uses!", Op, &I);
+        }
+
+        // Definition must dominate use unless use is unreachable!
+        Assert2(InstsInThisBlock.count(Op) || DT->dominates(Op, &I) ||
+                !DT->isReachableFromEntry(BB),
+                "Instruction does not dominate all uses!", Op, &I);
+      }
+    } else if (isa<InlineAsm>(I.getOperand(i))) {
+      Assert1((i + 1 == e && isa<CallInst>(I)) ||
+              (i + 3 == e && isa<InvokeInst>(I)),
+              "Cannot take the address of an inline asm!", &I);
+    }
+  }
+  InstsInThisBlock.insert(&I);
+
+  VerifyType(I.getType());
+}
+
+/// VerifyType - Verify that a type is well formed.
+///
+void Verifier::VerifyType(const Type *Ty) {
+  if (!Types.insert(Ty)) return;
+
+  Assert1(Context == &Ty->getContext(),
+          "Type context does not match Module context!", Ty);
+
+  switch (Ty->getTypeID()) {
+  case Type::FunctionTyID: {
+    const FunctionType *FTy = cast<FunctionType>(Ty);
+
+    const Type *RetTy = FTy->getReturnType();
+    Assert2(FunctionType::isValidReturnType(RetTy),
+            "Function type with invalid return type", RetTy, FTy);
+    VerifyType(RetTy);
+
+    for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
+      const Type *ElTy = FTy->getParamType(i);
+      Assert2(FunctionType::isValidArgumentType(ElTy),
+              "Function type with invalid parameter type", ElTy, FTy);
+      VerifyType(ElTy);
+    }
+    break;
+  }
+  case Type::StructTyID: {
+    const StructType *STy = cast<StructType>(Ty);
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+      const Type *ElTy = STy->getElementType(i);
+      Assert2(StructType::isValidElementType(ElTy),
+              "Structure type with invalid element type", ElTy, STy);
+      VerifyType(ElTy);
+    }
+    break;
+  }
+  case Type::ArrayTyID: {
+    const ArrayType *ATy = cast<ArrayType>(Ty);
+    Assert1(ArrayType::isValidElementType(ATy->getElementType()),
+            "Array type with invalid element type", ATy);
+    VerifyType(ATy->getElementType());
+    break;
+  }
+  case Type::PointerTyID: {
+    const PointerType *PTy = cast<PointerType>(Ty);
+    Assert1(PointerType::isValidElementType(PTy->getElementType()),
+            "Pointer type with invalid element type", PTy);
+    VerifyType(PTy->getElementType());
+    break;
+  }
+  case Type::VectorTyID: {
+    const VectorType *VTy = cast<VectorType>(Ty);
+    Assert1(VectorType::isValidElementType(VTy->getElementType()),
+            "Vector type with invalid element type", VTy);
+    VerifyType(VTy->getElementType());
+    break;
+  }
+  default:
+    break;
+  }
+}
+
+// Flags used by TableGen to mark intrinsic parameters with the
+// LLVMExtendedElementVectorType and LLVMTruncatedElementVectorType classes.
+static const unsigned ExtendedElementVectorType = 0x40000000;
+static const unsigned TruncatedElementVectorType = 0x20000000;
+
+/// visitIntrinsicFunction - Allow intrinsics to be verified in different ways.
+///
+void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
+  Function *IF = CI.getCalledFunction();
+  Assert1(IF->isDeclaration(), "Intrinsic functions should never be defined!",
+          IF);
+
+#define GET_INTRINSIC_VERIFIER
+#include "llvm/Intrinsics.gen"
+#undef GET_INTRINSIC_VERIFIER
+
+  // If the intrinsic takes MDNode arguments, verify that they are either global
+  // or are local to *this* function.
+  for (unsigned i = 0, e = CI.getNumArgOperands(); i != e; ++i)
+    if (MDNode *MD = dyn_cast<MDNode>(CI.getArgOperand(i)))
+      visitMDNode(*MD, CI.getParent()->getParent());
+
+  switch (ID) {
+  default:
+    break;
+  case Intrinsic::dbg_declare: {  // llvm.dbg.declare
+    Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)),
+                "invalid llvm.dbg.declare intrinsic call 1", &CI);
+    MDNode *MD = cast<MDNode>(CI.getArgOperand(0));
+    Assert1(MD->getNumOperands() == 1,
+                "invalid llvm.dbg.declare intrinsic call 2", &CI);
+  } break;
+  case Intrinsic::memcpy:
+  case Intrinsic::memmove:
+  case Intrinsic::memset:
+    Assert1(isa<ConstantInt>(CI.getArgOperand(3)),
+            "alignment argument of memory intrinsics must be a constant int",
+            &CI);
+    break;
+  case Intrinsic::gcroot:
+  case Intrinsic::gcwrite:
+  case Intrinsic::gcread:
+    if (ID == Intrinsic::gcroot) {
+      AllocaInst *AI =
+        dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
+      Assert1(AI, "llvm.gcroot parameter #1 must be an alloca.", &CI);
+      Assert1(isa<Constant>(CI.getArgOperand(1)),
+              "llvm.gcroot parameter #2 must be a constant.", &CI);
+      if (!AI->getType()->getElementType()->isPointerTy()) {
+        Assert1(!isa<ConstantPointerNull>(CI.getArgOperand(1)),
+                "llvm.gcroot parameter #1 must either be a pointer alloca, "
+                "or argument #2 must be a non-null constant.", &CI);
+      }
+    }
+
+    Assert1(CI.getParent()->getParent()->hasGC(),
+            "Enclosing function does not use GC.", &CI);
+    break;
+  case Intrinsic::init_trampoline:
+    Assert1(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()),
+            "llvm.init_trampoline parameter #2 must resolve to a function.",
+            &CI);
+    break;
+  case Intrinsic::prefetch:
+    Assert1(isa<ConstantInt>(CI.getArgOperand(1)) &&
+            isa<ConstantInt>(CI.getArgOperand(2)) &&
+            cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 &&
+            cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4,
+            "invalid arguments to llvm.prefetch",
+            &CI);
+    break;
+  case Intrinsic::stackprotector:
+    Assert1(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()),
+            "llvm.stackprotector parameter #2 must resolve to an alloca.",
+            &CI);
+    break;
+  case Intrinsic::lifetime_start:
+  case Intrinsic::lifetime_end:
+  case Intrinsic::invariant_start:
+    Assert1(isa<ConstantInt>(CI.getArgOperand(0)),
+            "size argument of memory use markers must be a constant integer",
+            &CI);
+    break;
+  case Intrinsic::invariant_end:
+    Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
+            "llvm.invariant.end parameter #2 must be a constant integer", &CI);
+    break;
+  }
+}
+
+/// Produce a string to identify an intrinsic parameter or return value.
+/// The ArgNo value numbers the return values from 0 to NumRets-1 and the
+/// parameters beginning with NumRets.
+///
+static std::string IntrinsicParam(unsigned ArgNo, unsigned NumRets) {
+  if (ArgNo >= NumRets)
+    return "Intrinsic parameter #" + utostr(ArgNo - NumRets);
+  if (NumRets == 1)
+    return "Intrinsic result type";
+  return "Intrinsic result type #" + utostr(ArgNo);
+}
+
+bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
+                                int VT, unsigned ArgNo, std::string &Suffix) {
+  const FunctionType *FTy = F->getFunctionType();
+
+  unsigned NumElts = 0;
+  const Type *EltTy = Ty;
+  const VectorType *VTy = dyn_cast<VectorType>(Ty);
+  if (VTy) {
+    EltTy = VTy->getElementType();
+    NumElts = VTy->getNumElements();
+  }
+
+  const Type *RetTy = FTy->getReturnType();
+  const StructType *ST = dyn_cast<StructType>(RetTy);
+  unsigned NumRetVals;
+  if (RetTy->isVoidTy())
+    NumRetVals = 0;
+  else if (ST)
+    NumRetVals = ST->getNumElements();
+  else
+    NumRetVals = 1;
+
+  if (VT < 0) {
+    int Match = ~VT;
+
+    // Check flags that indicate a type that is an integral vector type with
+    // elements that are larger or smaller than the elements of the matched
+    // type.
+    if ((Match & (ExtendedElementVectorType |
+                  TruncatedElementVectorType)) != 0) {
+      const IntegerType *IEltTy = dyn_cast<IntegerType>(EltTy);
+      if (!VTy || !IEltTy) {
+        CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not "
+                    "an integral vector type.", F);
+        return false;
+      }
+      // Adjust the current Ty (in the opposite direction) rather than
+      // the type being matched against.
+      if ((Match & ExtendedElementVectorType) != 0) {
+        if ((IEltTy->getBitWidth() & 1) != 0) {
+          CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " vector "
+                      "element bit-width is odd.", F);
+          return false;
+        }
+        Ty = VectorType::getTruncatedElementVectorType(VTy);
+      } else
+        Ty = VectorType::getExtendedElementVectorType(VTy);
+      Match &= ~(ExtendedElementVectorType | TruncatedElementVectorType);
+    }
+
+    if (Match <= static_cast<int>(NumRetVals - 1)) {
+      if (ST)
+        RetTy = ST->getElementType(Match);
+
+      if (Ty != RetTy) {
+        CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " does not "
+                    "match return type.", F);
+        return false;
+      }
+    } else {
+      if (Ty != FTy->getParamType(Match - NumRetVals)) {
+        CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " does not "
+                    "match parameter %" + utostr(Match - NumRetVals) + ".", F);
+        return false;
+      }
+    }
+  } else if (VT == MVT::iAny) {
+    if (!EltTy->isIntegerTy()) {
+      CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not "
+                  "an integer type.", F);
+      return false;
+    }
+
+    unsigned GotBits = cast<IntegerType>(EltTy)->getBitWidth();
+    Suffix += ".";
+
+    if (EltTy != Ty)
+      Suffix += "v" + utostr(NumElts);
+
+    Suffix += "i" + utostr(GotBits);
+
+    // Check some constraints on various intrinsics.
+    switch (ID) {
+    default: break; // Not everything needs to be checked.
+    case Intrinsic::bswap:
+      if (GotBits < 16 || GotBits % 16 != 0) {
+        CheckFailed("Intrinsic requires even byte width argument", F);
+        return false;
+      }
+      break;
+    }
+  } else if (VT == MVT::fAny) {
+    if (!EltTy->isFloatingPointTy()) {
+      CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not "
+                  "a floating-point type.", F);
+      return false;
+    }
+
+    Suffix += ".";
+
+    if (EltTy != Ty)
+      Suffix += "v" + utostr(NumElts);
+
+    Suffix += EVT::getEVT(EltTy).getEVTString();
+  } else if (VT == MVT::vAny) {
+    if (!VTy) {
+      CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a vector type.",
+                  F);
+      return false;
+    }
+    Suffix += ".v" + utostr(NumElts) + EVT::getEVT(EltTy).getEVTString();
+  } else if (VT == MVT::iPTR) {
+    if (!Ty->isPointerTy()) {
+      CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a "
+                  "pointer and a pointer is required.", F);
+      return false;
+    }
+  } else if (VT == MVT::iPTRAny) {
+    // Outside of TableGen, we don't distinguish iPTRAny (to any address space)
+    // and iPTR. In the verifier, we can not distinguish which case we have so
+    // allow either case to be legal.
+    if (const PointerType* PTyp = dyn_cast<PointerType>(Ty)) {
+      EVT PointeeVT = EVT::getEVT(PTyp->getElementType(), true);
+      if (PointeeVT == MVT::Other) {
+        CheckFailed("Intrinsic has pointer to complex type.");
+        return false;
+      }
+      Suffix += ".p" + utostr(PTyp->getAddressSpace()) +
+        PointeeVT.getEVTString();
+    } else {
+      CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a "
+                  "pointer and a pointer is required.", F);
+      return false;
+    }
+  } else if (EVT((MVT::SimpleValueType)VT).isVector()) {
+    EVT VVT = EVT((MVT::SimpleValueType)VT);
+
+    // If this is a vector argument, verify the number and type of elements.
+    if (VVT.getVectorElementType() != EVT::getEVT(EltTy)) {
+      CheckFailed("Intrinsic prototype has incorrect vector element type!", F);
+      return false;
+    }
+
+    if (VVT.getVectorNumElements() != NumElts) {
+      CheckFailed("Intrinsic prototype has incorrect number of "
+                  "vector elements!", F);
+      return false;
+    }
+  } else if (EVT((MVT::SimpleValueType)VT).getTypeForEVT(Ty->getContext()) != 
+             EltTy) {
+    CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is wrong!", F);
+    return false;
+  } else if (EltTy != Ty) {
+    CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is a vector "
+                "and a scalar is required.", F);
+    return false;
+  }
+
+  return true;
+}
+
+/// VerifyIntrinsicPrototype - TableGen emits calls to this function into
+/// Intrinsics.gen.  This implements a little state machine that verifies the
+/// prototype of intrinsics.
+void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
+                                        unsigned NumRetVals,
+                                        unsigned NumParams, ...) {
+  va_list VA;
+  va_start(VA, NumParams);
+  const FunctionType *FTy = F->getFunctionType();
+
+  // For overloaded intrinsics, the Suffix of the function name must match the
+  // types of the arguments. This variable keeps track of the expected
+  // suffix, to be checked at the end.
+  std::string Suffix;
+
+  if (FTy->getNumParams() + FTy->isVarArg() != NumParams) {
+    CheckFailed("Intrinsic prototype has incorrect number of arguments!", F);
+    return;
+  }
+
+  const Type *Ty = FTy->getReturnType();
+  const StructType *ST = dyn_cast<StructType>(Ty);
+
+  if (NumRetVals == 0 && !Ty->isVoidTy()) {
+    CheckFailed("Intrinsic should return void", F);
+    return;
+  }
+  
+  // Verify the return types.
+  if (ST && ST->getNumElements() != NumRetVals) {
+    CheckFailed("Intrinsic prototype has incorrect number of return types!", F);
+    return;
+  }
+  
+  for (unsigned ArgNo = 0; ArgNo != NumRetVals; ++ArgNo) {
+    int VT = va_arg(VA, int); // An MVT::SimpleValueType when non-negative.
+
+    if (ST) Ty = ST->getElementType(ArgNo);
+    if (!PerformTypeCheck(ID, F, Ty, VT, ArgNo, Suffix))
+      break;
+  }
+
+  // Verify the parameter types.
+  for (unsigned ArgNo = 0; ArgNo != NumParams; ++ArgNo) {
+    int VT = va_arg(VA, int); // An MVT::SimpleValueType when non-negative.
+
+    if (VT == MVT::isVoid && ArgNo > 0) {
+      if (!FTy->isVarArg())
+        CheckFailed("Intrinsic prototype has no '...'!", F);
+      break;
+    }
+
+    if (!PerformTypeCheck(ID, F, FTy->getParamType(ArgNo), VT,
+                          ArgNo + NumRetVals, Suffix))
+      break;
+  }
+
+  va_end(VA);
+
+  // For intrinsics without pointer arguments, if we computed a Suffix then the
+  // intrinsic is overloaded and we need to make sure that the name of the
+  // function is correct. We add the suffix to the name of the intrinsic and
+  // compare against the given function name. If they are not the same, the
+  // function name is invalid. This ensures that overloading of intrinsics
+  // uses a sane and consistent naming convention.  Note that intrinsics with
+  // pointer argument may or may not be overloaded so we will check assuming it
+  // has a suffix and not.
+  if (!Suffix.empty()) {
+    std::string Name(Intrinsic::getName(ID));
+    if (Name + Suffix != F->getName()) {
+      CheckFailed("Overloaded intrinsic has incorrect suffix: '" +
+                  F->getName().substr(Name.length()) + "'. It should be '" +
+                  Suffix + "'", F);
+    }
+  }
+
+  // Check parameter attributes.
+  Assert1(F->getAttributes() == Intrinsic::getAttributes(ID),
+          "Intrinsic has wrong parameter attributes!", F);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Implement the public interfaces to this file...
+//===----------------------------------------------------------------------===//
+
+FunctionPass *llvm::createVerifierPass(VerifierFailureAction action) {
+  return new Verifier(action);
+}
+
+
+/// verifyFunction - Check a function for errors, printing messages on stderr.
+/// Return true if the function is corrupt.
+///
+bool llvm::verifyFunction(const Function &f, VerifierFailureAction action) {
+  Function &F = const_cast<Function&>(f);
+  assert(!F.isDeclaration() && "Cannot verify external functions");
+
+  FunctionPassManager FPM(F.getParent());
+  Verifier *V = new Verifier(action);
+  FPM.add(V);
+  FPM.run(F);
+  return V->Broken;
+}
+
+/// verifyModule - Check a module for errors, printing messages on stderr.
+/// Return true if the module is corrupt.
+///
+bool llvm::verifyModule(const Module &M, VerifierFailureAction action,
+                        std::string *ErrorInfo) {
+  PassManager PM;
+  Verifier *V = new Verifier(action);
+  PM.add(V);
+  PM.run(const_cast<Module&>(M));
+
+  if (ErrorInfo && V->Broken)
+    *ErrorInfo = V->MessagesStr.str();
+  return V->Broken;
+}
diff --git a/final/llvm.spec.in b/final/llvm.spec.in
new file mode 100644
index 00000000000..9284d65d007
--- /dev/null
+++ b/final/llvm.spec.in
@@ -0,0 +1,67 @@
+Name: @PACKAGE_NAME@
+Version: @PACKAGE_VERSION@
+Release: 0
+Summary: The Low Level Virtual Machine (An Optimizing Compiler Infrastructure)
+License: University of Illinois/NCSA Open Source License
+Vendor: None (open source)
+Group: Development/Compilers
+URL: http://llvm..org/
+Source: http://llvm.org/releases/@PACKAGE_VERSION@/@PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz
+BuildRoot: %{_tmppath}/%{name}-root
+Requires: /sbin/ldconfig
+BuildRequires: gcc >= 3.4
+
+%description
+LLVM is a compiler infrastructure designed for compile-time, link-time, runtime,
+and idle-time optimization of programs from arbitrary programming languages.
+LLVM is written in C++ and has been developed since 2000 at the University of
+Illinois and Apple. It currently supports compilation of C and C++ programs, 
+using front-ends derived from GCC 4.0.1. A new front-end for the C family of
+languages is in development. The compiler infrastructure
+includes mirror sets of programming tools as well as libraries with equivalent
+functionality.
+
+%prep
+%setup -q -n @PACKAGE_NAME@-@PACKAGE_VERSION@
+
+%build
+./configure \
+--prefix=%{_prefix} \
+--bindir=%{_bindir} \
+--datadir=%{_datadir} \
+--includedir=%{_includedir} \
+--libdir=%{_libdir} \
+--enable-optimized \
+--enable-assertions 
+make tools-only
+
+%install
+rm -rf %{buildroot}
+make install DESTDIR=%{buildroot}
+
+%clean
+rm -rf %{buildroot}
+
+%post -p /sbin/ldconfig
+
+%postun -p /sbin/ldconfig
+
+%files
+%defattr(-, root, root)
+%doc CREDITS.TXT LICENSE.TXT README.txt docs/*.{html,css,gif,jpg} docs/CommandGuide
+%{_bindir}/*
+%{_libdir}/*.o
+%{_libdir}/*.a
+%{_libdir}/*.so
+%{_includedir}/llvm
+
+%changelog
+* Fri Aug 04 2006 Reid Spencer
+- Updates for release 1.8
+* Fri Apr 07 2006 Reid Spencer
+- Make the build be optimized+assertions
+* Fri May 13 2005 Reid Spencer
+- Minor adjustments for the 1.5 release
+* Mon Feb 09 2003 Brian R. Gaeke
+- Initial working version of RPM spec file.
+
diff --git a/final/projects/CMakeLists.txt b/final/projects/CMakeLists.txt
new file mode 100644
index 00000000000..415530e3323
--- /dev/null
+++ b/final/projects/CMakeLists.txt
@@ -0,0 +1,11 @@
+# Discover the projects that use CMake in the subdirectories.
+# Note that explicit cmake invocation is required every time a new project is
+# added or removed.
+file(GLOB entries *)
+foreach(entry ${entries})
+  if(IS_DIRECTORY ${entry} AND EXISTS ${entry}/CMakeLists.txt)
+    if(NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/compiler-rt)
+      add_subdirectory(${entry})
+    endif()
+  endif()
+endforeach(entry)
diff --git a/final/projects/Makefile b/final/projects/Makefile
new file mode 100644
index 00000000000..1318a35d941
--- /dev/null
+++ b/final/projects/Makefile
@@ -0,0 +1,28 @@
+##===- projects/Makefile ------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL=..
+
+include $(LEVEL)/Makefile.config
+
+# Compile all subdirs, except for the test suite, which lives in test-suite.
+# Before 2008.06.24 it lived in llvm-test, so exclude that as well for now.
+DIRS:= $(filter-out llvm-test test-suite,$(patsubst $(PROJ_SRC_DIR)/%/Makefile,%,$(wildcard $(PROJ_SRC_DIR)/*/Makefile)))
+
+# Don't build compiler-rt, it isn't designed to be built directly.
+DIRS := $(filter-out compiler-rt,$(DIRS))
+
+# Don't build libcxx, it isn't designed to be built directly.
+DIRS := $(filter-out libcxx,$(DIRS))
+
+# Sparc cannot link shared libraries (libtool problem?)
+ifeq ($(ARCH), Sparc)
+DIRS := $(filter-out sample, $(DIRS))
+endif
+
+include $(PROJ_SRC_ROOT)/Makefile.rules
diff --git a/final/projects/sample/Makefile b/final/projects/sample/Makefile
new file mode 100644
index 00000000000..f96f1aba2ed
--- /dev/null
+++ b/final/projects/sample/Makefile
@@ -0,0 +1,18 @@
+##===- projects/sample/Makefile ----------------------------*- Makefile -*-===##
+#
+# This is a sample Makefile for a project that uses LLVM.
+#
+##===----------------------------------------------------------------------===##
+
+#
+# Indicates our relative path to the top of the project's root directory.
+#
+LEVEL = .
+DIRS = lib tools
+EXTRA_DIST = include
+
+#
+# Include the Master Makefile that knows how to build all.
+#
+include $(LEVEL)/Makefile.common
+
diff --git a/final/projects/sample/Makefile.common.in b/final/projects/sample/Makefile.common.in
new file mode 100644
index 00000000000..33bfcd67880
--- /dev/null
+++ b/final/projects/sample/Makefile.common.in
@@ -0,0 +1,22 @@
+# Set the name of the project here
+PROJECT_NAME := sample
+PROJ_VERSION := 0.9
+ 
+# Set this variable to the top of the LLVM source tree.
+LLVM_SRC_ROOT = @LLVM_SRC@
+
+# Set this variable to the top level directory where LLVM was built
+# (this is *not* the same as OBJ_ROOT as defined in LLVM's Makefile.config).
+LLVM_OBJ_ROOT = @LLVM_OBJ@
+
+# Set the directory root of this project's source files
+PROJ_SRC_ROOT := $(subst //,/,@abs_top_srcdir@)
+
+# Set the root directory of this project's object files
+PROJ_OBJ_ROOT := $(subst //,/,@abs_top_objdir@)
+
+# Set the root directory of this project's install prefix
+PROJ_INSTALL_ROOT := @prefix@
+
+# Include LLVM's Master Makefile.
+include $(LLVM_SRC_ROOT)/Makefile.common
diff --git a/final/projects/sample/autoconf/AutoRegen.sh b/final/projects/sample/autoconf/AutoRegen.sh
new file mode 100755
index 00000000000..6e6931c7a9b
--- /dev/null
+++ b/final/projects/sample/autoconf/AutoRegen.sh
@@ -0,0 +1,52 @@
+#!/bin/sh
+die () {
+	echo "$@" 1>&2
+	exit 1
+}
+test -d autoconf && test -f autoconf/configure.ac && cd autoconf
+test -f configure.ac || die "Can't find 'autoconf' dir; please cd into it first"
+autoconf --version | egrep '2\.[56][0-9]' > /dev/null
+if test $? -ne 0 ; then
+  die "Your autoconf was not detected as being 2.5x or 2.6x"
+fi
+cwd=`pwd`
+if test -d ../../../autoconf/m4 ; then
+  cd ../../../autoconf/m4
+  llvm_m4=`pwd`
+  llvm_src_root=../..
+  llvm_obj_root=../..
+  cd $cwd
+elif test -d ../../llvm/autoconf/m4 ; then
+  cd ../../llvm/autoconf/m4
+  llvm_m4=`pwd`
+  llvm_src_root=..
+  llvm_obj_root=..
+  cd $cwd
+else
+  while true ; do
+    echo "LLVM source root not found." 
+    read -p "Enter full path to LLVM source:" REPLY
+    if test -d "$REPLY/autoconf/m4" ; then
+      llvm_src_root="$REPLY"
+      llvm_m4="$REPLY/autoconf/m4"
+      read -p "Enter full path to LLVM objects (empty for same as source):" REPLY
+      if test -d "$REPLY" ; then
+        llvm_obj_root="$REPLY"
+      else
+        llvm_obj_root="$llvm_src_root"
+      fi
+      break
+    fi
+  done
+fi
+# Patch the LLVM_ROOT in configure.ac, if it needs it
+cp configure.ac configure.bak
+sed -e "s#^LLVM_SRC_ROOT=.*#LLVM_SRC_ROOT=\"$llvm_src_root\"#" \
+    -e "s#^LLVM_OBJ_ROOT=.*#LLVM_OBJ_ROOT=\"$llvm_obj_root\"#" configure.bak > configure.ac
+echo "Regenerating aclocal.m4 with aclocal"
+rm -f aclocal.m4
+aclocal -I $llvm_m4 -I "$llvm_m4/.." || die "aclocal failed"
+echo "Regenerating configure with autoconf"
+autoconf --warnings=all -o ../configure configure.ac || die "autoconf failed"
+cd ..
+exit 0
diff --git a/final/projects/sample/autoconf/LICENSE.TXT b/final/projects/sample/autoconf/LICENSE.TXT
new file mode 100644
index 00000000000..72fdd39edcc
--- /dev/null
+++ b/final/projects/sample/autoconf/LICENSE.TXT
@@ -0,0 +1,24 @@
+------------------------------------------------------------------------------
+Autoconf Files
+------------------------------------------------------------------------------
+All autoconf files are licensed under the LLVM license with the following
+additions:
+
+llvm/autoconf/install-sh:
+	This script is licensed under the LLVM license, with the following
+	additional copyrights and restrictions:
+
+	Copyright 1991 by the Massachusetts Institute of Technology
+
+	Permission to use, copy, modify, distribute, and sell this software and its
+	documentation for any purpose is hereby granted without fee, provided that
+	the above copyright notice appear in all copies and that both that
+	copyright notice and this permission notice appear in supporting 
+	documentation, and that the name of M.I.T. not be used in advertising or
+	publicity pertaining to distribution of the software without specific,
+	written prior permission.  M.I.T. makes no representations about the
+	suitability of this software for any purpose.  It is provided "as is"
+	without express or implied warranty.
+
+Please see the source files for additional copyrights.
+
diff --git a/final/projects/sample/autoconf/config.guess b/final/projects/sample/autoconf/config.guess
new file mode 100755
index 00000000000..cc726cd15a9
--- /dev/null
+++ b/final/projects/sample/autoconf/config.guess
@@ -0,0 +1,1388 @@
+#! /bin/sh
+# Attempt to guess a canonical system name.
+#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+#   2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+timestamp='2003-02-22'
+
+# This file is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Originally written by Per Bothner <per@bothner.com>.
+# Please send patches to <config-patches@gnu.org>.  Submit a context
+# diff and a properly formatted ChangeLog entry.
+#
+# This script attempts to guess a canonical system name similar to
+# config.sub.  If it succeeds, it prints the system name on stdout, and
+# exits with 0.  Otherwise, it exits with 1.
+#
+# The plan is that this can be called by configure scripts if you
+# don't specify an explicit build system type.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION]
+
+Output the configuration name of the system \`$me' is run on.
+
+Operation modes:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.guess ($timestamp)
+
+Originally written by Per Bothner.
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit 0 ;;
+    --version | -v )
+       echo "$version" ; exit 0 ;;
+    --help | --h* | -h )
+       echo "$usage"; exit 0 ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help" >&2
+       exit 1 ;;
+    * )
+       break ;;
+  esac
+done
+
+if test $# != 0; then
+  echo "$me: too many arguments$help" >&2
+  exit 1
+fi
+
+trap 'exit 1' 1 2 15
+
+# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
+# compiler to aid in system detection is discouraged as it requires
+# temporary files to be created and, as you can see below, it is a
+# headache to deal with in a portable fashion.
+
+# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
+# use `HOST_CC' if defined, but it is deprecated.
+
+# Portable tmp directory creation inspired by the Autoconf team.
+
+set_cc_for_build='
+trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
+trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
+: ${TMPDIR=/tmp} ;
+ { tmp=`(umask 077 && mktemp -d -q "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
+ { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
+ { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
+dummy=$tmp/dummy ;
+tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
+case $CC_FOR_BUILD,$HOST_CC,$CC in
+ ,,)    echo "int x;" > $dummy.c ;
+	for c in cc gcc c89 c99 ; do
+	  if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then
+	     CC_FOR_BUILD="$c"; break ;
+	  fi ;
+	done ;
+	if test x"$CC_FOR_BUILD" = x ; then
+	  CC_FOR_BUILD=no_compiler_found ;
+	fi
+	;;
+ ,,*)   CC_FOR_BUILD=$CC ;;
+ ,*,*)  CC_FOR_BUILD=$HOST_CC ;;
+esac ;'
+
+# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
+# (ghazi@noc.rutgers.edu 1994-08-24)
+if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+	PATH=$PATH:/.attbin ; export PATH
+fi
+
+UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
+UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown
+UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
+
+# Note: order is significant - the case branches are not exclusive.
+
+case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
+    *:NetBSD:*:*)
+	# NetBSD (nbsd) targets should (where applicable) match one or
+	# more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
+	# switched to ELF, *-*-netbsd* would select the old
+	# object file format.  This provides both forward
+	# compatibility and a consistent mechanism for selecting the
+	# object file format.
+	#
+	# Note: NetBSD doesn't particularly care about the vendor
+	# portion of the name.  We always set it to "unknown".
+	sysctl="sysctl -n hw.machine_arch"
+	UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \
+	    /usr/sbin/$sysctl 2>/dev/null || echo unknown)`
+	case "${UNAME_MACHINE_ARCH}" in
+	    armeb) machine=armeb-unknown ;;
+	    arm*) machine=arm-unknown ;;
+	    sh3el) machine=shl-unknown ;;
+	    sh3eb) machine=sh-unknown ;;
+	    *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
+	esac
+	# The Operating System including object format, if it has switched
+	# to ELF recently, or will in the future.
+	case "${UNAME_MACHINE_ARCH}" in
+	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
+		eval $set_cc_for_build
+		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
+			| grep __ELF__ >/dev/null
+		then
+		    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
+		    # Return netbsd for either.  FIX?
+		    os=netbsd
+		else
+		    os=netbsdelf
+		fi
+		;;
+	    *)
+	        os=netbsd
+		;;
+	esac
+	# The OS release
+	# Debian GNU/NetBSD machines have a different userland, and
+	# thus, need a distinct triplet. However, they do not need
+	# kernel version information, so it can be replaced with a
+	# suitable tag, in the style of linux-gnu.
+	case "${UNAME_VERSION}" in
+	    Debian*)
+		release='-gnu'
+		;;
+	    *)
+		release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+		;;
+	esac
+	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
+	# contains redundant information, the shorter form:
+	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
+	echo "${machine}-${os}${release}"
+	exit 0 ;;
+    amiga:OpenBSD:*:*)
+	echo m68k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    arc:OpenBSD:*:*)
+	echo mipsel-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    hp300:OpenBSD:*:*)
+	echo m68k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    mac68k:OpenBSD:*:*)
+	echo m68k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    macppc:OpenBSD:*:*)
+	echo powerpc-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    mvme68k:OpenBSD:*:*)
+	echo m68k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    mvme88k:OpenBSD:*:*)
+	echo m88k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    mvmeppc:OpenBSD:*:*)
+	echo powerpc-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    pmax:OpenBSD:*:*)
+	echo mipsel-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    sgi:OpenBSD:*:*)
+	echo mipseb-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    sun3:OpenBSD:*:*)
+	echo m68k-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    wgrisc:OpenBSD:*:*)
+	echo mipsel-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    *:OpenBSD:*:*)
+	echo ${UNAME_MACHINE}-unknown-openbsd${UNAME_RELEASE}
+	exit 0 ;;
+    alpha:OSF1:*:*)
+	if test $UNAME_RELEASE = "V4.0"; then
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+	fi
+	# According to Compaq, /usr/sbin/psrinfo has been available on
+	# OSF/1 and Tru64 systems produced since 1995.  I hope that
+	# covers most systems running today.  This code pipes the CPU
+	# types through head -n 1, so we only detect the type of CPU 0.
+	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+	case "$ALPHA_CPU_TYPE" in
+	    "EV4 (21064)")
+		UNAME_MACHINE="alpha" ;;
+	    "EV4.5 (21064)")
+		UNAME_MACHINE="alpha" ;;
+	    "LCA4 (21066/21068)")
+		UNAME_MACHINE="alpha" ;;
+	    "EV5 (21164)")
+		UNAME_MACHINE="alphaev5" ;;
+	    "EV5.6 (21164A)")
+		UNAME_MACHINE="alphaev56" ;;
+	    "EV5.6 (21164PC)")
+		UNAME_MACHINE="alphapca56" ;;
+	    "EV5.7 (21164PC)")
+		UNAME_MACHINE="alphapca57" ;;
+	    "EV6 (21264)")
+		UNAME_MACHINE="alphaev6" ;;
+	    "EV6.7 (21264A)")
+		UNAME_MACHINE="alphaev67" ;;
+	    "EV6.8CB (21264C)")
+		UNAME_MACHINE="alphaev68" ;;
+	    "EV6.8AL (21264B)")
+		UNAME_MACHINE="alphaev68" ;;
+	    "EV6.8CX (21264D)")
+		UNAME_MACHINE="alphaev68" ;;
+	    "EV6.9A (21264/EV69A)")
+		UNAME_MACHINE="alphaev69" ;;
+	    "EV7 (21364)")
+		UNAME_MACHINE="alphaev7" ;;
+	    "EV7.9 (21364A)")
+		UNAME_MACHINE="alphaev79" ;;
+	esac
+	# A Vn.n version is a released version.
+	# A Tn.n version is a released field test version.
+	# A Xn.n version is an unreleased experimental baselevel.
+	# 1.2 uses "1.2" for uname -r.
+	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[VTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+	exit 0 ;;
+    Alpha\ *:Windows_NT*:*)
+	# How do we know it's Interix rather than the generic POSIX subsystem?
+	# Should we change UNAME_MACHINE based on the output of uname instead
+	# of the specific Alpha model?
+	echo alpha-pc-interix
+	exit 0 ;;
+    21064:Windows_NT:50:3)
+	echo alpha-dec-winnt3.5
+	exit 0 ;;
+    Amiga*:UNIX_System_V:4.0:*)
+	echo m68k-unknown-sysv4
+	exit 0;;
+    *:[Aa]miga[Oo][Ss]:*:*)
+	echo ${UNAME_MACHINE}-unknown-amigaos
+	exit 0 ;;
+    *:[Mm]orph[Oo][Ss]:*:*)
+	echo ${UNAME_MACHINE}-unknown-morphos
+	exit 0 ;;
+    *:OS/390:*:*)
+	echo i370-ibm-openedition
+	exit 0 ;;
+    arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
+	echo arm-acorn-riscix${UNAME_RELEASE}
+	exit 0;;
+    SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
+	echo hppa1.1-hitachi-hiuxmpp
+	exit 0;;
+    Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
+	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
+	if test "`(/bin/universe) 2>/dev/null`" = att ; then
+		echo pyramid-pyramid-sysv3
+	else
+		echo pyramid-pyramid-bsd
+	fi
+	exit 0 ;;
+    NILE*:*:*:dcosx)
+	echo pyramid-pyramid-svr4
+	exit 0 ;;
+    DRS?6000:UNIX_SV:4.2*:7*)
+	case `/usr/bin/uname -p` in
+	    sparc) echo sparc-icl-nx7 && exit 0 ;;
+	esac ;;
+    sun4H:SunOS:5.*:*)
+	echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit 0 ;;
+    sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
+	echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit 0 ;;
+    i86pc:SunOS:5.*:*)
+	echo i386-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit 0 ;;
+    sun4*:SunOS:6*:*)
+	# According to config.sub, this is the proper way to canonicalize
+	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
+	# it's likely to be more like Solaris than SunOS4.
+	echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit 0 ;;
+    sun4*:SunOS:*:*)
+	case "`/usr/bin/arch -k`" in
+	    Series*|S4*)
+		UNAME_RELEASE=`uname -v`
+		;;
+	esac
+	# Japanese Language versions have a version number like `4.1.3-JL'.
+	echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
+	exit 0 ;;
+    sun3*:SunOS:*:*)
+	echo m68k-sun-sunos${UNAME_RELEASE}
+	exit 0 ;;
+    sun*:*:4.2BSD:*)
+	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
+	test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
+	case "`/bin/arch`" in
+	    sun3)
+		echo m68k-sun-sunos${UNAME_RELEASE}
+		;;
+	    sun4)
+		echo sparc-sun-sunos${UNAME_RELEASE}
+		;;
+	esac
+	exit 0 ;;
+    aushp:SunOS:*:*)
+	echo sparc-auspex-sunos${UNAME_RELEASE}
+	exit 0 ;;
+    # The situation for MiNT is a little confusing.  The machine name
+    # can be virtually everything (everything which is not
+    # "atarist" or "atariste" at least should have a processor
+    # > m68000).  The system name ranges from "MiNT" over "FreeMiNT"
+    # to the lowercase version "mint" (or "freemint").  Finally
+    # the system name "TOS" denotes a system which is actually not
+    # MiNT.  But MiNT is downward compatible to TOS, so this should
+    # be no problem.
+    atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
+        echo m68k-atari-mint${UNAME_RELEASE}
+	exit 0 ;;
+    atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
+	echo m68k-atari-mint${UNAME_RELEASE}
+        exit 0 ;;
+    *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
+        echo m68k-atari-mint${UNAME_RELEASE}
+	exit 0 ;;
+    milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
+        echo m68k-milan-mint${UNAME_RELEASE}
+        exit 0 ;;
+    hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
+        echo m68k-hades-mint${UNAME_RELEASE}
+        exit 0 ;;
+    *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
+        echo m68k-unknown-mint${UNAME_RELEASE}
+        exit 0 ;;
+    powerpc:machten:*:*)
+	echo powerpc-apple-machten${UNAME_RELEASE}
+	exit 0 ;;
+    RISC*:Mach:*:*)
+	echo mips-dec-mach_bsd4.3
+	exit 0 ;;
+    RISC*:ULTRIX:*:*)
+	echo mips-dec-ultrix${UNAME_RELEASE}
+	exit 0 ;;
+    VAX*:ULTRIX*:*:*)
+	echo vax-dec-ultrix${UNAME_RELEASE}
+	exit 0 ;;
+    2020:CLIX:*:* | 2430:CLIX:*:*)
+	echo clipper-intergraph-clix${UNAME_RELEASE}
+	exit 0 ;;
+    mips:*:*:UMIPS | mips:*:*:RISCos)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+#ifdef __cplusplus
+#include <stdio.h>  /* for printf() prototype */
+	int main (int argc, char *argv[]) {
+#else
+	int main (argc, argv) int argc; char *argv[]; {
+#endif
+	#if defined (host_mips) && defined (MIPSEB)
+	#if defined (SYSTYPE_SYSV)
+	  printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
+	#endif
+	#if defined (SYSTYPE_SVR4)
+	  printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
+	#endif
+	#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
+	  printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
+	#endif
+	#endif
+	  exit (-1);
+	}
+EOF
+	$CC_FOR_BUILD -o $dummy $dummy.c \
+	  && $dummy `echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` \
+	  && exit 0
+	echo mips-mips-riscos${UNAME_RELEASE}
+	exit 0 ;;
+    Motorola:PowerMAX_OS:*:*)
+	echo powerpc-motorola-powermax
+	exit 0 ;;
+    Motorola:*:4.3:PL8-*)
+	echo powerpc-harris-powermax
+	exit 0 ;;
+    Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
+	echo powerpc-harris-powermax
+	exit 0 ;;
+    Night_Hawk:Power_UNIX:*:*)
+	echo powerpc-harris-powerunix
+	exit 0 ;;
+    m88k:CX/UX:7*:*)
+	echo m88k-harris-cxux7
+	exit 0 ;;
+    m88k:*:4*:R4*)
+	echo m88k-motorola-sysv4
+	exit 0 ;;
+    m88k:*:3*:R3*)
+	echo m88k-motorola-sysv3
+	exit 0 ;;
+    AViiON:dgux:*:*)
+        # DG/UX returns AViiON for all architectures
+        UNAME_PROCESSOR=`/usr/bin/uname -p`
+	if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
+	then
+	    if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
+	       [ ${TARGET_BINARY_INTERFACE}x = x ]
+	    then
+		echo m88k-dg-dgux${UNAME_RELEASE}
+	    else
+		echo m88k-dg-dguxbcs${UNAME_RELEASE}
+	    fi
+	else
+	    echo i586-dg-dgux${UNAME_RELEASE}
+	fi
+ 	exit 0 ;;
+    M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
+	echo m88k-dolphin-sysv3
+	exit 0 ;;
+    M88*:*:R3*:*)
+	# Delta 88k system running SVR3
+	echo m88k-motorola-sysv3
+	exit 0 ;;
+    XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
+	echo m88k-tektronix-sysv3
+	exit 0 ;;
+    Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
+	echo m68k-tektronix-bsd
+	exit 0 ;;
+    *:IRIX*:*:*)
+	echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
+	exit 0 ;;
+    ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
+	echo romp-ibm-aix      # uname -m gives an 8 hex-code CPU id
+	exit 0 ;;              # Note that: echo "'`uname -s`'" gives 'AIX '
+    i*86:AIX:*:*)
+	echo i386-ibm-aix
+	exit 0 ;;
+    ia64:AIX:*:*)
+	if [ -x /usr/bin/oslevel ] ; then
+		IBM_REV=`/usr/bin/oslevel`
+	else
+		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+	fi
+	echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
+	exit 0 ;;
+    *:AIX:2:3)
+	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
+		eval $set_cc_for_build
+		sed 's/^		//' << EOF >$dummy.c
+		#include <sys/systemcfg.h>
+
+		main()
+			{
+			if (!__power_pc())
+				exit(1);
+			puts("powerpc-ibm-aix3.2.5");
+			exit(0);
+			}
+EOF
+		$CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0
+		echo rs6000-ibm-aix3.2.5
+	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
+		echo rs6000-ibm-aix3.2.4
+	else
+		echo rs6000-ibm-aix3.2
+	fi
+	exit 0 ;;
+    *:AIX:*:[45])
+	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
+	if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
+		IBM_ARCH=rs6000
+	else
+		IBM_ARCH=powerpc
+	fi
+	if [ -x /usr/bin/oslevel ] ; then
+		IBM_REV=`/usr/bin/oslevel`
+	else
+		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+	fi
+	echo ${IBM_ARCH}-ibm-aix${IBM_REV}
+	exit 0 ;;
+    *:AIX:*:*)
+	echo rs6000-ibm-aix
+	exit 0 ;;
+    ibmrt:4.4BSD:*|romp-ibm:BSD:*)
+	echo romp-ibm-bsd4.4
+	exit 0 ;;
+    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
+	echo romp-ibm-bsd${UNAME_RELEASE}   # 4.3 with uname added to
+	exit 0 ;;                           # report: romp-ibm BSD 4.3
+    *:BOSX:*:*)
+	echo rs6000-bull-bosx
+	exit 0 ;;
+    DPX/2?00:B.O.S.:*:*)
+	echo m68k-bull-sysv3
+	exit 0 ;;
+    9000/[34]??:4.3bsd:1.*:*)
+	echo m68k-hp-bsd
+	exit 0 ;;
+    hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
+	echo m68k-hp-bsd4.4
+	exit 0 ;;
+    9000/[34678]??:HP-UX:*:*)
+	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+	case "${UNAME_MACHINE}" in
+	    9000/31? )            HP_ARCH=m68000 ;;
+	    9000/[34]?? )         HP_ARCH=m68k ;;
+	    9000/[678][0-9][0-9])
+		if [ -x /usr/bin/getconf ]; then
+		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
+                    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+                    case "${sc_cpu_version}" in
+                      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
+                      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+                      532)                      # CPU_PA_RISC2_0
+                        case "${sc_kernel_bits}" in
+                          32) HP_ARCH="hppa2.0n" ;;
+                          64) HP_ARCH="hppa2.0w" ;;
+			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
+                        esac ;;
+                    esac
+		fi
+		if [ "${HP_ARCH}" = "" ]; then
+		    eval $set_cc_for_build
+		    sed 's/^              //' << EOF >$dummy.c
+
+              #define _HPUX_SOURCE
+              #include <stdlib.h>
+              #include <unistd.h>
+
+              int main ()
+              {
+              #if defined(_SC_KERNEL_BITS)
+                  long bits = sysconf(_SC_KERNEL_BITS);
+              #endif
+                  long cpu  = sysconf (_SC_CPU_VERSION);
+
+                  switch (cpu)
+              	{
+              	case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
+              	case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
+              	case CPU_PA_RISC2_0:
+              #if defined(_SC_KERNEL_BITS)
+              	    switch (bits)
+              		{
+              		case 64: puts ("hppa2.0w"); break;
+              		case 32: puts ("hppa2.0n"); break;
+              		default: puts ("hppa2.0"); break;
+              		} break;
+              #else  /* !defined(_SC_KERNEL_BITS) */
+              	    puts ("hppa2.0"); break;
+              #endif
+              	default: puts ("hppa1.0"); break;
+              	}
+                  exit (0);
+              }
+EOF
+		    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+		    test -z "$HP_ARCH" && HP_ARCH=hppa
+		fi ;;
+	esac
+	if [ ${HP_ARCH} = "hppa2.0w" ]
+	then
+	    # avoid double evaluation of $set_cc_for_build
+	    test -n "$CC_FOR_BUILD" || eval $set_cc_for_build
+	    if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E -) | grep __LP64__ >/dev/null
+	    then
+		HP_ARCH="hppa2.0w"
+	    else
+		HP_ARCH="hppa64"
+	    fi
+	fi
+	echo ${HP_ARCH}-hp-hpux${HPUX_REV}
+	exit 0 ;;
+    ia64:HP-UX:*:*)
+	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
+	echo ia64-hp-hpux${HPUX_REV}
+	exit 0 ;;
+    3050*:HI-UX:*:*)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#include <unistd.h>
+	int
+	main ()
+	{
+	  long cpu = sysconf (_SC_CPU_VERSION);
+	  /* The order matters, because CPU_IS_HP_MC68K erroneously returns
+	     true for CPU_PA_RISC1_0.  CPU_IS_PA_RISC returns correct
+	     results, however.  */
+	  if (CPU_IS_PA_RISC (cpu))
+	    {
+	      switch (cpu)
+		{
+		  case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
+		  case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
+		  case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
+		  default: puts ("hppa-hitachi-hiuxwe2"); break;
+		}
+	    }
+	  else if (CPU_IS_HP_MC68K (cpu))
+	    puts ("m68k-hitachi-hiuxwe2");
+	  else puts ("unknown-hitachi-hiuxwe2");
+	  exit (0);
+	}
+EOF
+	$CC_FOR_BUILD -o $dummy $dummy.c && $dummy && exit 0
+	echo unknown-hitachi-hiuxwe2
+	exit 0 ;;
+    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
+	echo hppa1.1-hp-bsd
+	exit 0 ;;
+    9000/8??:4.3bsd:*:*)
+	echo hppa1.0-hp-bsd
+	exit 0 ;;
+    *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
+	echo hppa1.0-hp-mpeix
+	exit 0 ;;
+    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
+	echo hppa1.1-hp-osf
+	exit 0 ;;
+    hp8??:OSF1:*:*)
+	echo hppa1.0-hp-osf
+	exit 0 ;;
+    i*86:OSF1:*:*)
+	if [ -x /usr/sbin/sysversion ] ; then
+	    echo ${UNAME_MACHINE}-unknown-osf1mk
+	else
+	    echo ${UNAME_MACHINE}-unknown-osf1
+	fi
+	exit 0 ;;
+    parisc*:Lites*:*:*)
+	echo hppa1.1-hp-lites
+	exit 0 ;;
+    C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
+	echo c1-convex-bsd
+        exit 0 ;;
+    C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
+	if getsysinfo -f scalar_acc
+	then echo c32-convex-bsd
+	else echo c2-convex-bsd
+	fi
+        exit 0 ;;
+    C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
+	echo c34-convex-bsd
+        exit 0 ;;
+    C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
+	echo c38-convex-bsd
+        exit 0 ;;
+    C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
+	echo c4-convex-bsd
+        exit 0 ;;
+    CRAY*Y-MP:*:*:*)
+	echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    CRAY*[A-Z]90:*:*:*)
+	echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
+	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
+	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
+	      -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    CRAY*TS:*:*:*)
+	echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    CRAY*T3E:*:*:*)
+	echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    CRAY*SV1:*:*:*)
+	echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	exit 0 ;;
+    *:UNICOS/mp:*:*)
+	echo nv1-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' 
+	exit 0 ;;
+    F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
+	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+        FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+        FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+        echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
+        exit 0 ;;
+    i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
+	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
+	exit 0 ;;
+    sparc*:BSD/OS:*:*)
+	echo sparc-unknown-bsdi${UNAME_RELEASE}
+	exit 0 ;;
+    *:BSD/OS:*:*)
+	echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
+	exit 0 ;;
+    *:FreeBSD:*:*)
+	# Determine whether the default compiler uses glibc.
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#include <features.h>
+	#if __GLIBC__ >= 2
+	LIBC=gnu
+	#else
+	LIBC=
+	#endif
+EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=`
+	echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`${LIBC:+-$LIBC}
+	exit 0 ;;
+    i*:CYGWIN*:*)
+	echo ${UNAME_MACHINE}-pc-cygwin
+	exit 0 ;;
+    i*:MINGW*:*)
+	echo ${UNAME_MACHINE}-pc-mingw32
+	exit 0 ;;
+    i*:PW*:*)
+	echo ${UNAME_MACHINE}-pc-pw32
+	exit 0 ;;
+    x86:Interix*:3*)
+	echo i586-pc-interix3
+	exit 0 ;;
+    [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
+	echo i${UNAME_MACHINE}-pc-mks
+	exit 0 ;;
+    i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
+	# How do we know it's Interix rather than the generic POSIX subsystem?
+	# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
+	# UNAME_MACHINE based on the output of uname instead of i386?
+	echo i586-pc-interix
+	exit 0 ;;
+    i*:UWIN*:*)
+	echo ${UNAME_MACHINE}-pc-uwin
+	exit 0 ;;
+    p*:CYGWIN*:*)
+	echo powerpcle-unknown-cygwin
+	exit 0 ;;
+    prep*:SunOS:5.*:*)
+	echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	exit 0 ;;
+    *:GNU:*:*)
+	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+	exit 0 ;;
+    i*86:Minix:*:*)
+	echo ${UNAME_MACHINE}-pc-minix
+	exit 0 ;;
+    arm*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit 0 ;;
+    ia64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit 0 ;;
+    m68*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit 0 ;;
+    mips:Linux:*:*)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#undef CPU
+	#undef mips
+	#undef mipsel
+	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+	CPU=mipsel
+	#else
+	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+	CPU=mips
+	#else
+	CPU=
+	#endif
+	#endif
+EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=`
+	test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0
+	;;
+    mips64:Linux:*:*)
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#undef CPU
+	#undef mips64
+	#undef mips64el
+	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
+	CPU=mips64el
+	#else
+	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
+	CPU=mips64
+	#else
+	CPU=
+	#endif
+	#endif
+EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=`
+	test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0
+	;;
+    ppc:Linux:*:*)
+	echo powerpc-unknown-linux-gnu
+	exit 0 ;;
+    ppc64:Linux:*:*)
+	echo powerpc64-unknown-linux-gnu
+	exit 0 ;;
+    alpha:Linux:*:*)
+	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+	  EV5)   UNAME_MACHINE=alphaev5 ;;
+	  EV56)  UNAME_MACHINE=alphaev56 ;;
+	  PCA56) UNAME_MACHINE=alphapca56 ;;
+	  PCA57) UNAME_MACHINE=alphapca56 ;;
+	  EV6)   UNAME_MACHINE=alphaev6 ;;
+	  EV67)  UNAME_MACHINE=alphaev67 ;;
+	  EV68*) UNAME_MACHINE=alphaev68 ;;
+        esac
+	objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null
+	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+	exit 0 ;;
+    parisc:Linux:*:* | hppa:Linux:*:*)
+	# Look for CPU level
+	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
+	  PA7*) echo hppa1.1-unknown-linux-gnu ;;
+	  PA8*) echo hppa2.0-unknown-linux-gnu ;;
+	  *)    echo hppa-unknown-linux-gnu ;;
+	esac
+	exit 0 ;;
+    parisc64:Linux:*:* | hppa64:Linux:*:*)
+	echo hppa64-unknown-linux-gnu
+	exit 0 ;;
+    s390:Linux:*:* | s390x:Linux:*:*)
+	echo ${UNAME_MACHINE}-ibm-linux
+	exit 0 ;;
+    sh*:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit 0 ;;
+    sparc:Linux:*:* | sparc64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-gnu
+	exit 0 ;;
+    x86_64:Linux:*:*)
+	echo x86_64-unknown-linux-gnu
+	exit 0 ;;
+    i*86:Linux:*:*)
+	# The BFD linker knows what the default object file format is, so
+	# first see if it will tell us. cd to the root directory to prevent
+	# problems with other programs or directories called `ld' in the path.
+	# Set LC_ALL=C to ensure ld outputs messages in English.
+	ld_supported_targets=`cd /; LC_ALL=C ld --help 2>&1 \
+			 | sed -ne '/supported targets:/!d
+				    s/[ 	][ 	]*/ /g
+				    s/.*supported targets: *//
+				    s/ .*//
+				    p'`
+        case "$ld_supported_targets" in
+	  elf32-i386)
+		TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu"
+		;;
+	  a.out-i386-linux)
+		echo "${UNAME_MACHINE}-pc-linux-gnuaout"
+		exit 0 ;;
+	  coff-i386)
+		echo "${UNAME_MACHINE}-pc-linux-gnucoff"
+		exit 0 ;;
+	  "")
+		# Either a pre-BFD a.out linker (linux-gnuoldld) or
+		# one that does not give us useful --help.
+		echo "${UNAME_MACHINE}-pc-linux-gnuoldld"
+		exit 0 ;;
+	esac
+	# Determine whether the default compiler is a.out or elf
+	eval $set_cc_for_build
+	sed 's/^	//' << EOF >$dummy.c
+	#include <features.h>
+	#ifdef __ELF__
+	# ifdef __GLIBC__
+	#  if __GLIBC__ >= 2
+	LIBC=gnu
+	#  else
+	LIBC=gnulibc1
+	#  endif
+	# else
+	LIBC=gnulibc1
+	# endif
+	#else
+	#ifdef __INTEL_COMPILER
+	LIBC=gnu
+	#else
+	LIBC=gnuaout
+	#endif
+	#endif
+EOF
+	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=`
+	test x"${LIBC}" != x && echo "${UNAME_MACHINE}-pc-linux-${LIBC}" && exit 0
+	test x"${TENTATIVE}" != x && echo "${TENTATIVE}" && exit 0
+	;;
+    i*86:DYNIX/ptx:4*:*)
+	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
+	# earlier versions are messed up and put the nodename in both
+	# sysname and nodename.
+	echo i386-sequent-sysv4
+	exit 0 ;;
+    i*86:UNIX_SV:4.2MP:2.*)
+        # Unixware is an offshoot of SVR4, but it has its own version
+        # number series starting with 2...
+        # I am not positive that other SVR4 systems won't match this,
+	# I just have to hope.  -- rms.
+        # Use sysv4.2uw... so that sysv4* matches it.
+	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
+	exit 0 ;;
+    i*86:OS/2:*:*)
+	# If we were able to find `uname', then EMX Unix compatibility
+	# is probably installed.
+	echo ${UNAME_MACHINE}-pc-os2-emx
+	exit 0 ;;
+    i*86:XTS-300:*:STOP)
+	echo ${UNAME_MACHINE}-unknown-stop
+	exit 0 ;;
+    i*86:atheos:*:*)
+	echo ${UNAME_MACHINE}-unknown-atheos
+	exit 0 ;;
+    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*)
+	echo i386-unknown-lynxos${UNAME_RELEASE}
+	exit 0 ;;
+    i*86:*DOS:*:*)
+	echo ${UNAME_MACHINE}-pc-msdosdjgpp
+	exit 0 ;;
+    i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
+	UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
+	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
+		echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
+	else
+		echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
+	fi
+	exit 0 ;;
+    i*86:*:5:[78]*)
+	case `/bin/uname -X | grep "^Machine"` in
+	    *486*)	     UNAME_MACHINE=i486 ;;
+	    *Pentium)	     UNAME_MACHINE=i586 ;;
+	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
+	esac
+	echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
+	exit 0 ;;
+    i*86:*:3.2:*)
+	if test -f /usr/options/cb.name; then
+		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+		echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
+	elif /bin/uname -X 2>/dev/null >/dev/null ; then
+		UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
+		(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
+		(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
+			&& UNAME_MACHINE=i586
+		(/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
+			&& UNAME_MACHINE=i686
+		(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
+			&& UNAME_MACHINE=i686
+		echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
+	else
+		echo ${UNAME_MACHINE}-pc-sysv32
+	fi
+	exit 0 ;;
+    pc:*:*:*)
+	# Left here for compatibility:
+        # uname -m prints for DJGPP always 'pc', but it prints nothing about
+        # the processor, so we play safe by assuming i386.
+	echo i386-pc-msdosdjgpp
+        exit 0 ;;
+    Intel:Mach:3*:*)
+	echo i386-pc-mach3
+	exit 0 ;;
+    paragon:*:*:*)
+	echo i860-intel-osf1
+	exit 0 ;;
+    i860:*:4.*:*) # i860-SVR4
+	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
+	  echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
+	else # Add other i860-SVR4 vendors below as they are discovered.
+	  echo i860-unknown-sysv${UNAME_RELEASE}  # Unknown i860-SVR4
+	fi
+	exit 0 ;;
+    mini*:CTIX:SYS*5:*)
+	# "miniframe"
+	echo m68010-convergent-sysv
+	exit 0 ;;
+    mc68k:UNIX:SYSTEM5:3.51m)
+	echo m68k-convergent-sysv
+	exit 0 ;;
+    M680?0:D-NIX:5.3:*)
+	echo m68k-diab-dnix
+	exit 0 ;;
+    M68*:*:R3V[567]*:*)
+	test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;;
+    3[34]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0)
+	OS_REL=''
+	test -r /etc/.relid \
+	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+	  && echo i486-ncr-sysv4.3${OS_REL} && exit 0
+	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
+	  && echo i586-ncr-sysv4.3${OS_REL} && exit 0 ;;
+    3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
+        /bin/uname -p 2>/dev/null | grep 86 >/dev/null \
+          && echo i486-ncr-sysv4 && exit 0 ;;
+    m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
+	echo m68k-unknown-lynxos${UNAME_RELEASE}
+	exit 0 ;;
+    mc68030:UNIX_System_V:4.*:*)
+	echo m68k-atari-sysv4
+	exit 0 ;;
+    TSUNAMI:LynxOS:2.*:*)
+	echo sparc-unknown-lynxos${UNAME_RELEASE}
+	exit 0 ;;
+    rs6000:LynxOS:2.*:*)
+	echo rs6000-unknown-lynxos${UNAME_RELEASE}
+	exit 0 ;;
+    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.0*:*)
+	echo powerpc-unknown-lynxos${UNAME_RELEASE}
+	exit 0 ;;
+    SM[BE]S:UNIX_SV:*:*)
+	echo mips-dde-sysv${UNAME_RELEASE}
+	exit 0 ;;
+    RM*:ReliantUNIX-*:*:*)
+	echo mips-sni-sysv4
+	exit 0 ;;
+    RM*:SINIX-*:*:*)
+	echo mips-sni-sysv4
+	exit 0 ;;
+    *:SINIX-*:*:*)
+	if uname -p 2>/dev/null >/dev/null ; then
+		UNAME_MACHINE=`(uname -p) 2>/dev/null`
+		echo ${UNAME_MACHINE}-sni-sysv4
+	else
+		echo ns32k-sni-sysv
+	fi
+	exit 0 ;;
+    PENTIUM:*:4.0*:*) # Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+                      # says <Richard.M.Bartel@ccMail.Census.GOV>
+        echo i586-unisys-sysv4
+        exit 0 ;;
+    *:UNIX_System_V:4*:FTX*)
+	# From Gerald Hewes <hewes@openmarket.com>.
+	# How about differentiating between stratus architectures? -djm
+	echo hppa1.1-stratus-sysv4
+	exit 0 ;;
+    *:*:*:FTX*)
+	# From seanf@swdc.stratus.com.
+	echo i860-stratus-sysv4
+	exit 0 ;;
+    *:VOS:*:*)
+	# From Paul.Green@stratus.com.
+	echo hppa1.1-stratus-vos
+	exit 0 ;;
+    mc68*:A/UX:*:*)
+	echo m68k-apple-aux${UNAME_RELEASE}
+	exit 0 ;;
+    news*:NEWS-OS:6*:*)
+	echo mips-sony-newsos6
+	exit 0 ;;
+    R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
+	if [ -d /usr/nec ]; then
+	        echo mips-nec-sysv${UNAME_RELEASE}
+	else
+	        echo mips-unknown-sysv${UNAME_RELEASE}
+	fi
+        exit 0 ;;
+    BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
+	echo powerpc-be-beos
+	exit 0 ;;
+    BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
+	echo powerpc-apple-beos
+	exit 0 ;;
+    BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
+	echo i586-pc-beos
+	exit 0 ;;
+    SX-4:SUPER-UX:*:*)
+	echo sx4-nec-superux${UNAME_RELEASE}
+	exit 0 ;;
+    SX-5:SUPER-UX:*:*)
+	echo sx5-nec-superux${UNAME_RELEASE}
+	exit 0 ;;
+    SX-6:SUPER-UX:*:*)
+	echo sx6-nec-superux${UNAME_RELEASE}
+	exit 0 ;;
+    Power*:Rhapsody:*:*)
+	echo powerpc-apple-rhapsody${UNAME_RELEASE}
+	exit 0 ;;
+    *:Rhapsody:*:*)
+	echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
+	exit 0 ;;
+    *:Darwin:*:*)
+	case `uname -p` in
+	    *86) UNAME_PROCESSOR=i686 ;;
+	    powerpc) UNAME_PROCESSOR=powerpc ;;
+	esac
+	echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
+	exit 0 ;;
+    *:procnto*:*:* | *:QNX:[0123456789]*:*)
+	UNAME_PROCESSOR=`uname -p`
+	if test "$UNAME_PROCESSOR" = "x86"; then
+		UNAME_PROCESSOR=i386
+		UNAME_MACHINE=pc
+	fi
+	echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
+	exit 0 ;;
+    *:QNX:*:4*)
+	echo i386-pc-qnx
+	exit 0 ;;
+    NSR-[DGKLNPTVW]:NONSTOP_KERNEL:*:*)
+	echo nsr-tandem-nsk${UNAME_RELEASE}
+	exit 0 ;;
+    *:NonStop-UX:*:*)
+	echo mips-compaq-nonstopux
+	exit 0 ;;
+    BS2000:POSIX*:*:*)
+	echo bs2000-siemens-sysv
+	exit 0 ;;
+    DS/*:UNIX_System_V:*:*)
+	echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
+	exit 0 ;;
+    *:Plan9:*:*)
+	# "uname -m" is not consistent, so use $cputype instead. 386
+	# is converted to i386 for consistency with other x86
+	# operating systems.
+	if test "$cputype" = "386"; then
+	    UNAME_MACHINE=i386
+	else
+	    UNAME_MACHINE="$cputype"
+	fi
+	echo ${UNAME_MACHINE}-unknown-plan9
+	exit 0 ;;
+    *:TOPS-10:*:*)
+	echo pdp10-unknown-tops10
+	exit 0 ;;
+    *:TENEX:*:*)
+	echo pdp10-unknown-tenex
+	exit 0 ;;
+    KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
+	echo pdp10-dec-tops20
+	exit 0 ;;
+    XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
+	echo pdp10-xkl-tops20
+	exit 0 ;;
+    *:TOPS-20:*:*)
+	echo pdp10-unknown-tops20
+	exit 0 ;;
+    *:ITS:*:*)
+	echo pdp10-unknown-its
+	exit 0 ;;
+esac
+
+#echo '(No uname command or uname output not recognized.)' 1>&2
+#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
+
+eval $set_cc_for_build
+cat >$dummy.c <<EOF
+#ifdef _SEQUENT_
+# include <sys/types.h>
+# include <sys/utsname.h>
+#endif
+main ()
+{
+#if defined (sony)
+#if defined (MIPSEB)
+  /* BFD wants "bsd" instead of "newsos".  Perhaps BFD should be changed,
+     I don't know....  */
+  printf ("mips-sony-bsd\n"); exit (0);
+#else
+#include <sys/param.h>
+  printf ("m68k-sony-newsos%s\n",
+#ifdef NEWSOS4
+          "4"
+#else
+	  ""
+#endif
+         ); exit (0);
+#endif
+#endif
+
+#if defined (__arm) && defined (__acorn) && defined (__unix)
+  printf ("arm-acorn-riscix"); exit (0);
+#endif
+
+#if defined (hp300) && !defined (hpux)
+  printf ("m68k-hp-bsd\n"); exit (0);
+#endif
+
+#if defined (NeXT)
+#if !defined (__ARCHITECTURE__)
+#define __ARCHITECTURE__ "m68k"
+#endif
+  int version;
+  version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
+  if (version < 4)
+    printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
+  else
+    printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
+  exit (0);
+#endif
+
+#if defined (MULTIMAX) || defined (n16)
+#if defined (UMAXV)
+  printf ("ns32k-encore-sysv\n"); exit (0);
+#else
+#if defined (CMU)
+  printf ("ns32k-encore-mach\n"); exit (0);
+#else
+  printf ("ns32k-encore-bsd\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (__386BSD__)
+  printf ("i386-pc-bsd\n"); exit (0);
+#endif
+
+#if defined (sequent)
+#if defined (i386)
+  printf ("i386-sequent-dynix\n"); exit (0);
+#endif
+#if defined (ns32000)
+  printf ("ns32k-sequent-dynix\n"); exit (0);
+#endif
+#endif
+
+#if defined (_SEQUENT_)
+    struct utsname un;
+
+    uname(&un);
+
+    if (strncmp(un.version, "V2", 2) == 0) {
+	printf ("i386-sequent-ptx2\n"); exit (0);
+    }
+    if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
+	printf ("i386-sequent-ptx1\n"); exit (0);
+    }
+    printf ("i386-sequent-ptx\n"); exit (0);
+
+#endif
+
+#if defined (vax)
+# if !defined (ultrix)
+#  include <sys/param.h>
+#  if defined (BSD)
+#   if BSD == 43
+      printf ("vax-dec-bsd4.3\n"); exit (0);
+#   else
+#    if BSD == 199006
+      printf ("vax-dec-bsd4.3reno\n"); exit (0);
+#    else
+      printf ("vax-dec-bsd\n"); exit (0);
+#    endif
+#   endif
+#  else
+    printf ("vax-dec-bsd\n"); exit (0);
+#  endif
+# else
+    printf ("vax-dec-ultrix\n"); exit (0);
+# endif
+#endif
+
+#if defined (alliant) && defined (i860)
+  printf ("i860-alliant-bsd\n"); exit (0);
+#endif
+
+  exit (1);
+}
+EOF
+
+$CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null && $dummy && exit 0
+
+# Apollos put the system type in the environment.
+
+test -d /usr/apollo && { echo ${ISP}-apollo-${SYSTYPE}; exit 0; }
+
+# Convex versions that predate uname can use getsysinfo(1)
+
+if [ -x /usr/convex/getsysinfo ]
+then
+    case `getsysinfo -f cpu_type` in
+    c1*)
+	echo c1-convex-bsd
+	exit 0 ;;
+    c2*)
+	if getsysinfo -f scalar_acc
+	then echo c32-convex-bsd
+	else echo c2-convex-bsd
+	fi
+	exit 0 ;;
+    c34*)
+	echo c34-convex-bsd
+	exit 0 ;;
+    c38*)
+	echo c38-convex-bsd
+	exit 0 ;;
+    c4*)
+	echo c4-convex-bsd
+	exit 0 ;;
+    esac
+fi
+
+cat >&2 <<EOF
+$0: unable to guess system type
+
+This script, last modified $timestamp, has failed to recognize
+the operating system you are using. It is advised that you
+download the most up to date version of the config scripts from
+
+    ftp://ftp.gnu.org/pub/gnu/config/
+
+If the version you run ($0) is already up to date, please
+send the following data and any information you think might be
+pertinent to <config-patches@gnu.org> in order to provide the needed
+information to handle your system.
+
+config.guess timestamp = $timestamp
+
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
+
+hostinfo               = `(hostinfo) 2>/dev/null`
+/bin/universe          = `(/bin/universe) 2>/dev/null`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
+/bin/arch              = `(/bin/arch) 2>/dev/null`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
+
+UNAME_MACHINE = ${UNAME_MACHINE}
+UNAME_RELEASE = ${UNAME_RELEASE}
+UNAME_SYSTEM  = ${UNAME_SYSTEM}
+UNAME_VERSION = ${UNAME_VERSION}
+EOF
+
+exit 1
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/final/projects/sample/autoconf/config.sub b/final/projects/sample/autoconf/config.sub
new file mode 100755
index 00000000000..9772e87d241
--- /dev/null
+++ b/final/projects/sample/autoconf/config.sub
@@ -0,0 +1,1489 @@
+#! /bin/sh
+# Configuration validation subroutine script.
+#   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+#   2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+timestamp='2003-02-22'
+
+# This file is (in principle) common to ALL GNU software.
+# The presence of a machine in this file suggests that SOME GNU software
+# can handle that machine.  It does not imply ALL GNU software can.
+#
+# This file is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330,
+# Boston, MA 02111-1307, USA.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Please send patches to <config-patches@gnu.org>.  Submit a context
+# diff and a properly formatted ChangeLog entry.
+#
+# Configuration subroutine to validate and canonicalize a configuration type.
+# Supply the specified configuration type as an argument.
+# If it is invalid, we print an error message on stderr and exit with code 1.
+# Otherwise, we print the canonical config type on stdout and succeed.
+
+# This file is supposed to be the same for all GNU packages
+# and recognize all the CPU types, system types and aliases
+# that are meaningful with *any* GNU software.
+# Each package is responsible for reporting which valid configurations
+# it does not support.  The user should be able to distinguish
+# a failure to support a valid configuration from a meaningless
+# configuration.
+
+# The goal of this file is to map all the various variations of a given
+# machine specification into a single specification in the form:
+#	CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
+# or in some cases, the newer four-part form:
+#	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
+# It is wrong to echo any other type of specification.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
+
+usage="\
+Usage: $0 [OPTION] CPU-MFR-OPSYS
+       $0 [OPTION] ALIAS
+
+Canonicalize a configuration name.
+
+Operation modes:
+  -h, --help         print this help, then exit
+  -t, --time-stamp   print date of last modification, then exit
+  -v, --version      print version number, then exit
+
+Report bugs and patches to <config-patches@gnu.org>."
+
+version="\
+GNU config.sub ($timestamp)
+
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+Free Software Foundation, Inc.
+
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
+
+help="
+Try \`$me --help' for more information."
+
+# Parse command line
+while test $# -gt 0 ; do
+  case $1 in
+    --time-stamp | --time* | -t )
+       echo "$timestamp" ; exit 0 ;;
+    --version | -v )
+       echo "$version" ; exit 0 ;;
+    --help | --h* | -h )
+       echo "$usage"; exit 0 ;;
+    -- )     # Stop option processing
+       shift; break ;;
+    - )	# Use stdin as input.
+       break ;;
+    -* )
+       echo "$me: invalid option $1$help"
+       exit 1 ;;
+
+    *local*)
+       # First pass through any local machine types.
+       echo $1
+       exit 0;;
+
+    * )
+       break ;;
+  esac
+done
+
+case $# in
+ 0) echo "$me: missing argument$help" >&2
+    exit 1;;
+ 1) ;;
+ *) echo "$me: too many arguments$help" >&2
+    exit 1;;
+esac
+
+# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
+# Here we must recognize all the valid KERNEL-OS combinations.
+maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
+case $maybe_os in
+  nto-qnx* | linux-gnu* | freebsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*)
+    os=-$maybe_os
+    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
+    ;;
+  *)
+    basic_machine=`echo $1 | sed 's/-[^-]*$//'`
+    if [ $basic_machine != $1 ]
+    then os=`echo $1 | sed 's/.*-/-/'`
+    else os=; fi
+    ;;
+esac
+
+### Let's recognize common machines as not being operating systems so
+### that things like config.sub decstation-3100 work.  We also
+### recognize some manufacturers as not being operating systems, so we
+### can provide default operating systems below.
+case $os in
+	-sun*os*)
+		# Prevent following clause from handling this invalid input.
+		;;
+	-dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
+	-att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
+	-unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
+	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
+	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
+	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
+	-apple | -axis)
+		os=
+		basic_machine=$1
+		;;
+	-sim | -cisco | -oki | -wec | -winbond)
+		os=
+		basic_machine=$1
+		;;
+	-scout)
+		;;
+	-wrs)
+		os=-vxworks
+		basic_machine=$1
+		;;
+	-chorusos*)
+		os=-chorusos
+		basic_machine=$1
+		;;
+ 	-chorusrdb)
+ 		os=-chorusrdb
+		basic_machine=$1
+ 		;;
+	-hiux*)
+		os=-hiuxwe2
+		;;
+	-sco5)
+		os=-sco3.2v5
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco4)
+		os=-sco3.2v4
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2.[4-9]*)
+		os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco3.2v[4-9]*)
+		# Don't forget version if it is 3.2v4 or newer.
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-sco*)
+		os=-sco3.2v2
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-udk*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-isc)
+		os=-isc2.2
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-clix*)
+		basic_machine=clipper-intergraph
+		;;
+	-isc*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
+		;;
+	-lynx*)
+		os=-lynxos
+		;;
+	-ptx*)
+		basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
+		;;
+	-windowsnt*)
+		os=`echo $os | sed -e 's/windowsnt/winnt/'`
+		;;
+	-psos*)
+		os=-psos
+		;;
+	-mint | -mint[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+esac
+
+# Decode aliases for certain CPU-COMPANY combinations.
+case $basic_machine in
+	# Recognize the basic CPU types without company name.
+	# Some are omitted here because they have special meanings below.
+	1750a | 580 \
+	| a29k \
+	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
+	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
+	| arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr \
+	| clipper \
+	| d10v | d30v | dlx | dsp16xx \
+	| fr30 | frv \
+	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+	| i370 | i860 | i960 | ia64 \
+	| ip2k \
+	| m32r | m68000 | m68k | m88k | mcore \
+	| mips | mipsbe | mipseb | mipsel | mipsle \
+	| mips16 \
+	| mips64 | mips64el \
+	| mips64vr | mips64vrel \
+	| mips64orion | mips64orionel \
+	| mips64vr4100 | mips64vr4100el \
+	| mips64vr4300 | mips64vr4300el \
+	| mips64vr5000 | mips64vr5000el \
+	| mipsisa32 | mipsisa32el \
+	| mipsisa32r2 | mipsisa32r2el \
+	| mipsisa64 | mipsisa64el \
+	| mipsisa64sb1 | mipsisa64sb1el \
+	| mipsisa64sr71k | mipsisa64sr71kel \
+	| mipstx39 | mipstx39el \
+	| mn10200 | mn10300 \
+	| msp430 \
+	| ns16k | ns32k \
+	| openrisc | or32 \
+	| pdp10 | pdp11 | pj | pjl \
+	| powerpc | powerpc64 | powerpc64le | powerpcle | ppcbe \
+	| pyramid \
+	| sh | sh[1234] | sh[23]e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \
+	| sh64 | sh64le \
+	| sparc | sparc64 | sparc86x | sparclet | sparclite | sparcv9 | sparcv9b \
+	| strongarm \
+	| tahoe | thumb | tic80 | tron \
+	| v850 | v850e \
+	| we32k \
+	| x86 | xscale | xstormy16 | xtensa \
+	| z8k)
+		basic_machine=$basic_machine-unknown
+		;;
+	m6811 | m68hc11 | m6812 | m68hc12)
+		# Motorola 68HC11/12.
+		basic_machine=$basic_machine-unknown
+		os=-none
+		;;
+	m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
+		;;
+
+	# We use `pc' rather than `unknown'
+	# because (1) that's what they normally are, and
+	# (2) the word "unknown" tends to confuse beginning users.
+	i*86 | x86_64)
+	  basic_machine=$basic_machine-pc
+	  ;;
+	# Object if more than one company name word.
+	*-*-*)
+		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+		exit 1
+		;;
+	# Recognize the basic CPU types with company name.
+	580-* \
+	| a29k-* \
+	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
+	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
+	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
+	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
+	| avr-* \
+	| bs2000-* \
+	| c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \
+	| clipper-* | cydra-* \
+	| d10v-* | d30v-* | dlx-* \
+	| elxsi-* \
+	| f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \
+	| h8300-* | h8500-* \
+	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+	| i*86-* | i860-* | i960-* | ia64-* \
+	| ip2k-* \
+	| m32r-* \
+	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
+	| m88110-* | m88k-* | mcore-* \
+	| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
+	| mips16-* \
+	| mips64-* | mips64el-* \
+	| mips64vr-* | mips64vrel-* \
+	| mips64orion-* | mips64orionel-* \
+	| mips64vr4100-* | mips64vr4100el-* \
+	| mips64vr4300-* | mips64vr4300el-* \
+	| mips64vr5000-* | mips64vr5000el-* \
+	| mipsisa32-* | mipsisa32el-* \
+	| mipsisa32r2-* | mipsisa32r2el-* \
+	| mipsisa64-* | mipsisa64el-* \
+	| mipsisa64sb1-* | mipsisa64sb1el-* \
+	| mipsisa64sr71k-* | mipsisa64sr71kel-* \
+	| mipstx39-* | mipstx39el-* \
+	| msp430-* \
+	| none-* | np1-* | nv1-* | ns16k-* | ns32k-* \
+	| orion-* \
+	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
+	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \
+	| pyramid-* \
+	| romp-* | rs6000-* \
+	| sh-* | sh[1234]-* | sh[23]e-* | sh[34]eb-* | shbe-* \
+	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
+	| sparc-* | sparc64-* | sparc86x-* | sparclet-* | sparclite-* \
+	| sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \
+	| tahoe-* | thumb-* \
+	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
+	| tron-* \
+	| v850-* | v850e-* | vax-* \
+	| we32k-* \
+	| x86-* | x86_64-* | xps100-* | xscale-* | xstormy16-* \
+	| xtensa-* \
+	| ymp-* \
+	| z8k-*)
+		;;
+	# Recognize the various machine names and aliases which stand
+	# for a CPU type and a company and sometimes even an OS.
+	386bsd)
+		basic_machine=i386-unknown
+		os=-bsd
+		;;
+	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
+		basic_machine=m68000-att
+		;;
+	3b*)
+		basic_machine=we32k-att
+		;;
+	a29khif)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	adobe68k)
+		basic_machine=m68010-adobe
+		os=-scout
+		;;
+	alliant | fx80)
+		basic_machine=fx80-alliant
+		;;
+	altos | altos3068)
+		basic_machine=m68k-altos
+		;;
+	am29k)
+		basic_machine=a29k-none
+		os=-bsd
+		;;
+	amdahl)
+		basic_machine=580-amdahl
+		os=-sysv
+		;;
+	amiga | amiga-*)
+		basic_machine=m68k-unknown
+		;;
+	amigaos | amigados)
+		basic_machine=m68k-unknown
+		os=-amigaos
+		;;
+	amigaunix | amix)
+		basic_machine=m68k-unknown
+		os=-sysv4
+		;;
+	apollo68)
+		basic_machine=m68k-apollo
+		os=-sysv
+		;;
+	apollo68bsd)
+		basic_machine=m68k-apollo
+		os=-bsd
+		;;
+	aux)
+		basic_machine=m68k-apple
+		os=-aux
+		;;
+	balance)
+		basic_machine=ns32k-sequent
+		os=-dynix
+		;;
+	c90)
+		basic_machine=c90-cray
+		os=-unicos
+		;;
+	convex-c1)
+		basic_machine=c1-convex
+		os=-bsd
+		;;
+	convex-c2)
+		basic_machine=c2-convex
+		os=-bsd
+		;;
+	convex-c32)
+		basic_machine=c32-convex
+		os=-bsd
+		;;
+	convex-c34)
+		basic_machine=c34-convex
+		os=-bsd
+		;;
+	convex-c38)
+		basic_machine=c38-convex
+		os=-bsd
+		;;
+	cray | j90)
+		basic_machine=j90-cray
+		os=-unicos
+		;;
+	crds | unos)
+		basic_machine=m68k-crds
+		;;
+	cris | cris-* | etrax*)
+		basic_machine=cris-axis
+		;;
+	da30 | da30-*)
+		basic_machine=m68k-da30
+		;;
+	decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
+		basic_machine=mips-dec
+		;;
+	decsystem10* | dec10*)
+		basic_machine=pdp10-dec
+		os=-tops10
+		;;
+	decsystem20* | dec20*)
+		basic_machine=pdp10-dec
+		os=-tops20
+		;;
+	delta | 3300 | motorola-3300 | motorola-delta \
+	      | 3300-motorola | delta-motorola)
+		basic_machine=m68k-motorola
+		;;
+	delta88)
+		basic_machine=m88k-motorola
+		os=-sysv3
+		;;
+	dpx20 | dpx20-*)
+		basic_machine=rs6000-bull
+		os=-bosx
+		;;
+	dpx2* | dpx2*-bull)
+		basic_machine=m68k-bull
+		os=-sysv3
+		;;
+	ebmon29k)
+		basic_machine=a29k-amd
+		os=-ebmon
+		;;
+	elxsi)
+		basic_machine=elxsi-elxsi
+		os=-bsd
+		;;
+	encore | umax | mmax)
+		basic_machine=ns32k-encore
+		;;
+	es1800 | OSE68k | ose68k | ose | OSE)
+		basic_machine=m68k-ericsson
+		os=-ose
+		;;
+	fx2800)
+		basic_machine=i860-alliant
+		;;
+	genix)
+		basic_machine=ns32k-ns
+		;;
+	gmicro)
+		basic_machine=tron-gmicro
+		os=-sysv
+		;;
+	go32)
+		basic_machine=i386-pc
+		os=-go32
+		;;
+	h3050r* | hiux*)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	h8300hms)
+		basic_machine=h8300-hitachi
+		os=-hms
+		;;
+	h8300xray)
+		basic_machine=h8300-hitachi
+		os=-xray
+		;;
+	h8500hms)
+		basic_machine=h8500-hitachi
+		os=-hms
+		;;
+	harris)
+		basic_machine=m88k-harris
+		os=-sysv3
+		;;
+	hp300-*)
+		basic_machine=m68k-hp
+		;;
+	hp300bsd)
+		basic_machine=m68k-hp
+		os=-bsd
+		;;
+	hp300hpux)
+		basic_machine=m68k-hp
+		os=-hpux
+		;;
+	hp3k9[0-9][0-9] | hp9[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k2[0-9][0-9] | hp9k31[0-9])
+		basic_machine=m68000-hp
+		;;
+	hp9k3[2-9][0-9])
+		basic_machine=m68k-hp
+		;;
+	hp9k6[0-9][0-9] | hp6[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hp9k7[0-79][0-9] | hp7[0-79][0-9])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k78[0-9] | hp78[0-9])
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
+		# FIXME: really hppa2.0-hp
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][13679] | hp8[0-9][13679])
+		basic_machine=hppa1.1-hp
+		;;
+	hp9k8[0-9][0-9] | hp8[0-9][0-9])
+		basic_machine=hppa1.0-hp
+		;;
+	hppa-next)
+		os=-nextstep3
+		;;
+	hppaosf)
+		basic_machine=hppa1.1-hp
+		os=-osf
+		;;
+	hppro)
+		basic_machine=hppa1.1-hp
+		os=-proelf
+		;;
+	i370-ibm* | ibm*)
+		basic_machine=i370-ibm
+		;;
+# I'm not sure what "Sysv32" means.  Should this be sysv3.2?
+	i*86v32)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv32
+		;;
+	i*86v4*)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv4
+		;;
+	i*86v)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-sysv
+		;;
+	i*86sol2)
+		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
+		os=-solaris2
+		;;
+	i386mach)
+		basic_machine=i386-mach
+		os=-mach
+		;;
+	i386-vsta | vsta)
+		basic_machine=i386-unknown
+		os=-vsta
+		;;
+	iris | iris4d)
+		basic_machine=mips-sgi
+		case $os in
+		    -irix*)
+			;;
+		    *)
+			os=-irix4
+			;;
+		esac
+		;;
+	isi68 | isi)
+		basic_machine=m68k-isi
+		os=-sysv
+		;;
+	m88k-omron*)
+		basic_machine=m88k-omron
+		;;
+	magnum | m3230)
+		basic_machine=mips-mips
+		os=-sysv
+		;;
+	merlin)
+		basic_machine=ns32k-utek
+		os=-sysv
+		;;
+	mingw32)
+		basic_machine=i386-pc
+		os=-mingw32
+		;;
+	miniframe)
+		basic_machine=m68000-convergent
+		;;
+	*mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
+		basic_machine=m68k-atari
+		os=-mint
+		;;
+	mips3*-*)
+		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
+		;;
+	mips3*)
+		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
+		;;
+	mmix*)
+		basic_machine=mmix-knuth
+		os=-mmixware
+		;;
+	monitor)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	morphos)
+		basic_machine=powerpc-unknown
+		os=-morphos
+		;;
+	msdos)
+		basic_machine=i386-pc
+		os=-msdos
+		;;
+	mvs)
+		basic_machine=i370-ibm
+		os=-mvs
+		;;
+	ncr3000)
+		basic_machine=i486-ncr
+		os=-sysv4
+		;;
+	netbsd386)
+		basic_machine=i386-unknown
+		os=-netbsd
+		;;
+	netwinder)
+		basic_machine=armv4l-rebel
+		os=-linux
+		;;
+	news | news700 | news800 | news900)
+		basic_machine=m68k-sony
+		os=-newsos
+		;;
+	news1000)
+		basic_machine=m68030-sony
+		os=-newsos
+		;;
+	news-3600 | risc-news)
+		basic_machine=mips-sony
+		os=-newsos
+		;;
+	necv70)
+		basic_machine=v70-nec
+		os=-sysv
+		;;
+	next | m*-next )
+		basic_machine=m68k-next
+		case $os in
+		    -nextstep* )
+			;;
+		    -ns2*)
+		      os=-nextstep2
+			;;
+		    *)
+		      os=-nextstep3
+			;;
+		esac
+		;;
+	nh3000)
+		basic_machine=m68k-harris
+		os=-cxux
+		;;
+	nh[45]000)
+		basic_machine=m88k-harris
+		os=-cxux
+		;;
+	nindy960)
+		basic_machine=i960-intel
+		os=-nindy
+		;;
+	mon960)
+		basic_machine=i960-intel
+		os=-mon960
+		;;
+	nonstopux)
+		basic_machine=mips-compaq
+		os=-nonstopux
+		;;
+	np1)
+		basic_machine=np1-gould
+		;;
+	nv1)
+		basic_machine=nv1-cray
+		os=-unicosmp
+		;;
+	nsr-tandem)
+		basic_machine=nsr-tandem
+		;;
+	op50n-* | op60c-*)
+		basic_machine=hppa1.1-oki
+		os=-proelf
+		;;
+	or32 | or32-*)
+		basic_machine=or32-unknown
+		os=-coff
+		;;
+	OSE68000 | ose68000)
+		basic_machine=m68000-ericsson
+		os=-ose
+		;;
+	os68k)
+		basic_machine=m68k-none
+		os=-os68k
+		;;
+	pa-hitachi)
+		basic_machine=hppa1.1-hitachi
+		os=-hiuxwe2
+		;;
+	paragon)
+		basic_machine=i860-intel
+		os=-osf
+		;;
+	pbd)
+		basic_machine=sparc-tti
+		;;
+	pbb)
+		basic_machine=m68k-tti
+		;;
+	pc532 | pc532-*)
+		basic_machine=ns32k-pc532
+		;;
+	pentium | p5 | k5 | k6 | nexgen | viac3)
+		basic_machine=i586-pc
+		;;
+	pentiumpro | p6 | 6x86 | athlon | athlon_*)
+		basic_machine=i686-pc
+		;;
+	pentiumii | pentium2)
+		basic_machine=i686-pc
+		;;
+	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
+		basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentiumpro-* | p6-* | 6x86-* | athlon-*)
+		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pentiumii-* | pentium2-*)
+		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	pn)
+		basic_machine=pn-gould
+		;;
+	power)	basic_machine=power-ibm
+		;;
+	ppc)	basic_machine=powerpc-unknown
+		;;
+	ppc-*)	basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppcle | powerpclittle | ppc-le | powerpc-little)
+		basic_machine=powerpcle-unknown
+		;;
+	ppcle-* | powerpclittle-*)
+		basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppc64)	basic_machine=powerpc64-unknown
+		;;
+	ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ppc64le | powerpc64little | ppc64-le | powerpc64-little)
+		basic_machine=powerpc64le-unknown
+		;;
+	ppc64le-* | powerpc64little-*)
+		basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
+		;;
+	ps2)
+		basic_machine=i386-ibm
+		;;
+	pw32)
+		basic_machine=i586-unknown
+		os=-pw32
+		;;
+	rom68k)
+		basic_machine=m68k-rom68k
+		os=-coff
+		;;
+	rm[46]00)
+		basic_machine=mips-siemens
+		;;
+	rtpc | rtpc-*)
+		basic_machine=romp-ibm
+		;;
+	s390 | s390-*)
+		basic_machine=s390-ibm
+		;;
+	s390x | s390x-*)
+		basic_machine=s390x-ibm
+		;;
+	sa29200)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	sb1)
+		basic_machine=mipsisa64sb1-unknown
+		;;
+	sb1el)
+		basic_machine=mipsisa64sb1el-unknown
+		;;
+	sequent)
+		basic_machine=i386-sequent
+		;;
+	sh)
+		basic_machine=sh-hitachi
+		os=-hms
+		;;
+	sparclite-wrs | simso-wrs)
+		basic_machine=sparclite-wrs
+		os=-vxworks
+		;;
+	sps7)
+		basic_machine=m68k-bull
+		os=-sysv2
+		;;
+	spur)
+		basic_machine=spur-unknown
+		;;
+	st2000)
+		basic_machine=m68k-tandem
+		;;
+	stratus)
+		basic_machine=i860-stratus
+		os=-sysv4
+		;;
+	sun2)
+		basic_machine=m68000-sun
+		;;
+	sun2os3)
+		basic_machine=m68000-sun
+		os=-sunos3
+		;;
+	sun2os4)
+		basic_machine=m68000-sun
+		os=-sunos4
+		;;
+	sun3os3)
+		basic_machine=m68k-sun
+		os=-sunos3
+		;;
+	sun3os4)
+		basic_machine=m68k-sun
+		os=-sunos4
+		;;
+	sun4os3)
+		basic_machine=sparc-sun
+		os=-sunos3
+		;;
+	sun4os4)
+		basic_machine=sparc-sun
+		os=-sunos4
+		;;
+	sun4sol2)
+		basic_machine=sparc-sun
+		os=-solaris2
+		;;
+	sun3 | sun3-*)
+		basic_machine=m68k-sun
+		;;
+	sun4)
+		basic_machine=sparc-sun
+		;;
+	sun386 | sun386i | roadrunner)
+		basic_machine=i386-sun
+		;;
+	sv1)
+		basic_machine=sv1-cray
+		os=-unicos
+		;;
+	symmetry)
+		basic_machine=i386-sequent
+		os=-dynix
+		;;
+	t3e)
+		basic_machine=alphaev5-cray
+		os=-unicos
+		;;
+	t90)
+		basic_machine=t90-cray
+		os=-unicos
+		;;
+        tic4x | c4x*)
+		basic_machine=tic4x-unknown
+		os=-coff
+		;;
+	tic54x | c54x*)
+		basic_machine=tic54x-unknown
+		os=-coff
+		;;
+	tic55x | c55x*)
+		basic_machine=tic55x-unknown
+		os=-coff
+		;;
+	tic6x | c6x*)
+		basic_machine=tic6x-unknown
+		os=-coff
+		;;
+	tx39)
+		basic_machine=mipstx39-unknown
+		;;
+	tx39el)
+		basic_machine=mipstx39el-unknown
+		;;
+	toad1)
+		basic_machine=pdp10-xkl
+		os=-tops20
+		;;
+	tower | tower-32)
+		basic_machine=m68k-ncr
+		;;
+	udi29k)
+		basic_machine=a29k-amd
+		os=-udi
+		;;
+	ultra3)
+		basic_machine=a29k-nyu
+		os=-sym1
+		;;
+	v810 | necv810)
+		basic_machine=v810-nec
+		os=-none
+		;;
+	vaxv)
+		basic_machine=vax-dec
+		os=-sysv
+		;;
+	vms)
+		basic_machine=vax-dec
+		os=-vms
+		;;
+	vpp*|vx|vx-*)
+		basic_machine=f301-fujitsu
+		;;
+	vxworks960)
+		basic_machine=i960-wrs
+		os=-vxworks
+		;;
+	vxworks68)
+		basic_machine=m68k-wrs
+		os=-vxworks
+		;;
+	vxworks29k)
+		basic_machine=a29k-wrs
+		os=-vxworks
+		;;
+	w65*)
+		basic_machine=w65-wdc
+		os=-none
+		;;
+	w89k-*)
+		basic_machine=hppa1.1-winbond
+		os=-proelf
+		;;
+	xps | xps100)
+		basic_machine=xps100-honeywell
+		;;
+	ymp)
+		basic_machine=ymp-cray
+		os=-unicos
+		;;
+	z8k-*-coff)
+		basic_machine=z8k-unknown
+		os=-sim
+		;;
+	none)
+		basic_machine=none-none
+		os=-none
+		;;
+
+# Here we handle the default manufacturer of certain CPU types.  It is in
+# some cases the only manufacturer, in others, it is the most popular.
+	w89k)
+		basic_machine=hppa1.1-winbond
+		;;
+	op50n)
+		basic_machine=hppa1.1-oki
+		;;
+	op60c)
+		basic_machine=hppa1.1-oki
+		;;
+	romp)
+		basic_machine=romp-ibm
+		;;
+	rs6000)
+		basic_machine=rs6000-ibm
+		;;
+	vax)
+		basic_machine=vax-dec
+		;;
+	pdp10)
+		# there are many clones, so DEC is not a safe bet
+		basic_machine=pdp10-unknown
+		;;
+	pdp11)
+		basic_machine=pdp11-dec
+		;;
+	we32k)
+		basic_machine=we32k-att
+		;;
+	sh3 | sh4 | sh[34]eb | sh[1234]le | sh[23]ele)
+		basic_machine=sh-unknown
+		;;
+	sh64)
+		basic_machine=sh64-unknown
+		;;
+	sparc | sparcv9 | sparcv9b)
+		basic_machine=sparc-sun
+		;;
+	cydra)
+		basic_machine=cydra-cydrome
+		;;
+	orion)
+		basic_machine=orion-highlevel
+		;;
+	orion105)
+		basic_machine=clipper-highlevel
+		;;
+	mac | mpw | mac-mpw)
+		basic_machine=m68k-apple
+		;;
+	pmac | pmac-mpw)
+		basic_machine=powerpc-apple
+		;;
+	*-unknown)
+		# Make sure to match an already-canonicalized machine name.
+		;;
+	*)
+		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
+		exit 1
+		;;
+esac
+
+# Here we canonicalize certain aliases for manufacturers.
+case $basic_machine in
+	*-digital*)
+		basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
+		;;
+	*-commodore*)
+		basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
+		;;
+	*)
+		;;
+esac
+
+# Decode manufacturer-specific aliases for certain operating systems.
+
+if [ x"$os" != x"" ]
+then
+case $os in
+        # First match some system type aliases
+        # that might get confused with valid system types.
+	# -solaris* is a basic system type, with this one exception.
+	-solaris1 | -solaris1.*)
+		os=`echo $os | sed -e 's|solaris1|sunos4|'`
+		;;
+	-solaris)
+		os=-solaris2
+		;;
+	-svr4*)
+		os=-sysv4
+		;;
+	-unixware*)
+		os=-sysv4.2uw
+		;;
+	-gnu/linux*)
+		os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+		;;
+	# First accept the basic system types.
+	# The portable systems comes first.
+	# Each alternative MUST END IN A *, to match a version number.
+	# -sysv* is not here because it comes later, after sysvr4.
+	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
+	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -sunos | -sunos[34]*\
+	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -solaris* | -sym* \
+	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
+	      | -aos* \
+	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
+	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
+	      | -hiux* | -386bsd* | -netbsd* | -openbsd* | -freebsd* | -riscix* \
+	      | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
+	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
+	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
+	      | -chorusos* | -chorusrdb* \
+	      | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+	      | -mingw32* | -linux-gnu* | -uxpv* | -beos* | -mpeix* | -udk* \
+	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
+	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
+	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
+	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
+	      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
+	      | -powermax* | -dnix*)
+	# Remember, each alternative MUST END IN *, to match a version number.
+		;;
+	-qnx*)
+		case $basic_machine in
+		    x86-* | i*86-*)
+			;;
+		    *)
+			os=-nto$os
+			;;
+		esac
+		;;
+	-nto-qnx*)
+		;;
+	-nto*)
+		os=`echo $os | sed -e 's|nto|nto-qnx|'`
+		;;
+	-sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
+	      | -windows* | -osx | -abug | -netware* | -os9* | -beos* \
+	      | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
+		;;
+	-mac*)
+		os=`echo $os | sed -e 's|mac|macos|'`
+		;;
+	-linux*)
+		os=`echo $os | sed -e 's|linux|linux-gnu|'`
+		;;
+	-sunos5*)
+		os=`echo $os | sed -e 's|sunos5|solaris2|'`
+		;;
+	-sunos6*)
+		os=`echo $os | sed -e 's|sunos6|solaris3|'`
+		;;
+	-opened*)
+		os=-openedition
+		;;
+	-wince*)
+		os=-wince
+		;;
+	-osfrose*)
+		os=-osfrose
+		;;
+	-osf*)
+		os=-osf
+		;;
+	-utek*)
+		os=-bsd
+		;;
+	-dynix*)
+		os=-bsd
+		;;
+	-acis*)
+		os=-aos
+		;;
+	-atheos*)
+		os=-atheos
+		;;
+	-386bsd)
+		os=-bsd
+		;;
+	-ctix* | -uts*)
+		os=-sysv
+		;;
+	-nova*)
+		os=-rtmk-nova
+		;;
+	-ns2 )
+		os=-nextstep2
+		;;
+	-nsk*)
+		os=-nsk
+		;;
+	# Preserve the version number of sinix5.
+	-sinix5.*)
+		os=`echo $os | sed -e 's|sinix|sysv|'`
+		;;
+	-sinix*)
+		os=-sysv4
+		;;
+	-triton*)
+		os=-sysv3
+		;;
+	-oss*)
+		os=-sysv3
+		;;
+	-svr4)
+		os=-sysv4
+		;;
+	-svr3)
+		os=-sysv3
+		;;
+	-sysvr4)
+		os=-sysv4
+		;;
+	# This must come after -sysvr4.
+	-sysv*)
+		;;
+	-ose*)
+		os=-ose
+		;;
+	-es1800*)
+		os=-ose
+		;;
+	-xenix)
+		os=-xenix
+		;;
+	-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+		os=-mint
+		;;
+	-aros*)
+		os=-aros
+		;;
+	-kaos*)
+		os=-kaos
+		;;
+	-none)
+		;;
+	*)
+		# Get rid of the `-' at the beginning of $os.
+		os=`echo $os | sed 's/[^-]*-//'`
+		echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
+		exit 1
+		;;
+esac
+else
+
+# Here we handle the default operating systems that come with various machines.
+# The value should be what the vendor currently ships out the door with their
+# machine or put another way, the most popular os provided with the machine.
+
+# Note that if you're going to try to match "-MANUFACTURER" here (say,
+# "-sun"), then you have to tell the case statement up towards the top
+# that MANUFACTURER isn't an operating system.  Otherwise, code above
+# will signal an error saying that MANUFACTURER isn't an operating
+# system, and we'll never get to this point.
+
+case $basic_machine in
+	*-acorn)
+		os=-riscix1.2
+		;;
+	arm*-rebel)
+		os=-linux
+		;;
+	arm*-semi)
+		os=-aout
+		;;
+	# This must come before the *-dec entry.
+	pdp10-*)
+		os=-tops20
+		;;
+	pdp11-*)
+		os=-none
+		;;
+	*-dec | vax-*)
+		os=-ultrix4.2
+		;;
+	m68*-apollo)
+		os=-domain
+		;;
+	i386-sun)
+		os=-sunos4.0.2
+		;;
+	m68000-sun)
+		os=-sunos3
+		# This also exists in the configure program, but was not the
+		# default.
+		# os=-sunos4
+		;;
+	m68*-cisco)
+		os=-aout
+		;;
+	mips*-cisco)
+		os=-elf
+		;;
+	mips*-*)
+		os=-elf
+		;;
+	or32-*)
+		os=-coff
+		;;
+	*-tti)	# must be before sparc entry or we get the wrong os.
+		os=-sysv3
+		;;
+	sparc-* | *-sun)
+		os=-sunos4.1.1
+		;;
+	*-be)
+		os=-beos
+		;;
+	*-ibm)
+		os=-aix
+		;;
+	*-wec)
+		os=-proelf
+		;;
+	*-winbond)
+		os=-proelf
+		;;
+	*-oki)
+		os=-proelf
+		;;
+	*-hp)
+		os=-hpux
+		;;
+	*-hitachi)
+		os=-hiux
+		;;
+	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
+		os=-sysv
+		;;
+	*-cbm)
+		os=-amigaos
+		;;
+	*-dg)
+		os=-dgux
+		;;
+	*-dolphin)
+		os=-sysv3
+		;;
+	m68k-ccur)
+		os=-rtu
+		;;
+	m88k-omron*)
+		os=-luna
+		;;
+	*-next )
+		os=-nextstep
+		;;
+	*-sequent)
+		os=-ptx
+		;;
+	*-crds)
+		os=-unos
+		;;
+	*-ns)
+		os=-genix
+		;;
+	i370-*)
+		os=-mvs
+		;;
+	*-next)
+		os=-nextstep3
+		;;
+	*-gould)
+		os=-sysv
+		;;
+	*-highlevel)
+		os=-bsd
+		;;
+	*-encore)
+		os=-bsd
+		;;
+	*-sgi)
+		os=-irix
+		;;
+	*-siemens)
+		os=-sysv4
+		;;
+	*-masscomp)
+		os=-rtu
+		;;
+	f30[01]-fujitsu | f700-fujitsu)
+		os=-uxpv
+		;;
+	*-rom68k)
+		os=-coff
+		;;
+	*-*bug)
+		os=-coff
+		;;
+	*-apple)
+		os=-macos
+		;;
+	*-atari*)
+		os=-mint
+		;;
+	*)
+		os=-none
+		;;
+esac
+fi
+
+# Here we handle the case where we know the os, and the CPU type, but not the
+# manufacturer.  We pick the logical manufacturer.
+vendor=unknown
+case $basic_machine in
+	*-unknown)
+		case $os in
+			-riscix*)
+				vendor=acorn
+				;;
+			-sunos*)
+				vendor=sun
+				;;
+			-aix*)
+				vendor=ibm
+				;;
+			-beos*)
+				vendor=be
+				;;
+			-hpux*)
+				vendor=hp
+				;;
+			-mpeix*)
+				vendor=hp
+				;;
+			-hiux*)
+				vendor=hitachi
+				;;
+			-unos*)
+				vendor=crds
+				;;
+			-dgux*)
+				vendor=dg
+				;;
+			-luna*)
+				vendor=omron
+				;;
+			-genix*)
+				vendor=ns
+				;;
+			-mvs* | -opened*)
+				vendor=ibm
+				;;
+			-ptx*)
+				vendor=sequent
+				;;
+			-vxsim* | -vxworks* | -windiss*)
+				vendor=wrs
+				;;
+			-aux*)
+				vendor=apple
+				;;
+			-hms*)
+				vendor=hitachi
+				;;
+			-mpw* | -macos*)
+				vendor=apple
+				;;
+			-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+				vendor=atari
+				;;
+			-vos*)
+				vendor=stratus
+				;;
+		esac
+		basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
+		;;
+esac
+
+echo $basic_machine$os
+exit 0
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "timestamp='"
+# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-end: "'"
+# End:
diff --git a/final/projects/sample/autoconf/configure.ac b/final/projects/sample/autoconf/configure.ac
new file mode 100644
index 00000000000..4e61bee5e79
--- /dev/null
+++ b/final/projects/sample/autoconf/configure.ac
@@ -0,0 +1,73 @@
+dnl **************************************************************************
+dnl * Initialize
+dnl **************************************************************************
+AC_INIT([[[SAMPLE]]],[[[x.xx]]],[bugs@yourdomain])
+
+dnl Identify where LLVM source tree is
+LLVM_SRC_ROOT="../.."
+LLVM_OBJ_ROOT="../.."
+
+dnl Find absolute paths to LLVM source and object trees
+LLVM_ABS_SRC_ROOT="`cd $srcdir ; cd $LLVM_SRC_ROOT ; pwd`"
+LLVM_ABS_OBJ_ROOT="`cd $LLVM_OBJ_ROOT ; pwd`"
+
+dnl Tell autoconf that this is an LLVM project being configured
+dnl This provides the --with-llvmsrc and --with-llvmobj options
+LLVM_CONFIG_PROJECT($LLVM_ABS_SRC_ROOT,$LLVM_ABS_OBJ_ROOT)
+
+dnl Tell autoconf that the auxilliary files are actually located in
+dnl the LLVM autoconf directory, not here.
+AC_CONFIG_AUX_DIR($LLVM_SRC/autoconf)
+
+dnl Verify that the source directory is valid
+AC_CONFIG_SRCDIR(["Makefile.common.in"])
+
+dnl Configure a common Makefile
+AC_CONFIG_FILES(Makefile.common)
+
+dnl Configure project makefiles
+dnl List every Makefile that exists within your source tree
+AC_CONFIG_MAKEFILE(Makefile)
+AC_CONFIG_MAKEFILE(lib/Makefile)
+AC_CONFIG_MAKEFILE(lib/sample/Makefile)
+AC_CONFIG_MAKEFILE(tools/Makefile)
+AC_CONFIG_MAKEFILE(tools/sample/Makefile)
+
+dnl **************************************************************************
+dnl * Determine which system we are building on
+dnl **************************************************************************
+
+dnl **************************************************************************
+dnl * Check for programs.
+dnl **************************************************************************
+
+dnl **************************************************************************
+dnl * Check for libraries.
+dnl **************************************************************************
+
+dnl **************************************************************************
+dnl * Checks for header files.
+dnl **************************************************************************
+
+dnl **************************************************************************
+dnl * Checks for typedefs, structures, and compiler characteristics.
+dnl **************************************************************************
+
+dnl **************************************************************************
+dnl * Checks for library functions.
+dnl **************************************************************************
+
+dnl **************************************************************************
+dnl * Enable various compile-time options
+dnl **************************************************************************
+
+dnl **************************************************************************
+dnl * Set the location of various third-party software packages
+dnl **************************************************************************
+
+dnl **************************************************************************
+dnl * Create the output files
+dnl **************************************************************************
+
+dnl This must be last
+AC_OUTPUT
diff --git a/final/projects/sample/configure b/final/projects/sample/configure
new file mode 100755
index 00000000000..27b64bf8084
--- /dev/null
+++ b/final/projects/sample/configure
@@ -0,0 +1,2722 @@
+#! /bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.60 for [SAMPLE] [x.xx].
+#
+# Report bugs to <bugs@yourdomain>.
+#
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+# 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## --------------------- ##
+## M4sh Initialization.  ##
+## --------------------- ##
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
+fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
+
+
+# PATH needs CR
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  echo "#! /bin/sh" >conf$$.sh
+  echo  "exit 0"   >>conf$$.sh
+  chmod +x conf$$.sh
+  if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+    PATH_SEPARATOR=';'
+  else
+    PATH_SEPARATOR=:
+  fi
+  rm -f conf$$.sh
+fi
+
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+  as_unset=unset
+else
+  as_unset=false
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+as_nl='
+'
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+case $0 in
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  { (exit 1); exit 1; }
+fi
+
+# Work around bugs in pre-3.0 UWIN ksh.
+for as_var in ENV MAIL MAILPATH
+do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+for as_var in \
+  LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
+  LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
+  LC_TELEPHONE LC_TIME
+do
+  if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
+    eval $as_var=C; export $as_var
+  else
+    ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+  fi
+done
+
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# CDPATH.
+$as_unset CDPATH
+
+
+if test "x$CONFIG_SHELL" = x; then
+  if (eval ":") 2>/dev/null; then
+  as_have_required=yes
+else
+  as_have_required=no
+fi
+
+  if test $as_have_required = yes && 	 (eval ":
+(as_func_return () {
+  (exit \$1)
+}
+as_func_success () {
+  as_func_return 0
+}
+as_func_failure () {
+  as_func_return 1
+}
+as_func_ret_success () {
+  return 0
+}
+as_func_ret_failure () {
+  return 1
+}
+
+exitcode=0
+if as_func_success; then
+  :
+else
+  exitcode=1
+  echo as_func_success failed.
+fi
+
+if as_func_failure; then
+  exitcode=1
+  echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+  :
+else
+  exitcode=1
+  echo as_func_ret_success failed.
+fi
+
+if as_func_ret_failure; then
+  exitcode=1
+  echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = \"\$1\" ); then
+  :
+else
+  exitcode=1
+  echo positional parameters were not saved.
+fi
+
+test \$exitcode = 0) || { (exit 1); exit 1; }
+
+(
+  as_lineno_1=\$LINENO
+  as_lineno_2=\$LINENO
+  test \"x\$as_lineno_1\" != \"x\$as_lineno_2\" &&
+  test \"x\`expr \$as_lineno_1 + 1\`\" = \"x\$as_lineno_2\") || { (exit 1); exit 1; }
+") 2> /dev/null; then
+  :
+else
+  as_candidate_shells=
+    as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in /usr/bin/posix$PATH_SEPARATOR/bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  case $as_dir in
+	 /*)
+	   for as_base in sh bash ksh sh5; do
+	     as_candidate_shells="$as_candidate_shells $as_dir/$as_base"
+	   done;;
+       esac
+done
+IFS=$as_save_IFS
+
+
+      for as_shell in $as_candidate_shells $SHELL; do
+	 # Try only shells that exist, to save several forks.
+	 if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+		{ ("$as_shell") 2> /dev/null <<\_ASEOF
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
+fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
+
+:
+_ASEOF
+}; then
+  CONFIG_SHELL=$as_shell
+	       as_have_required=yes
+	       if { "$as_shell" 2> /dev/null <<\_ASEOF
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
+fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
+
+:
+(as_func_return () {
+  (exit $1)
+}
+as_func_success () {
+  as_func_return 0
+}
+as_func_failure () {
+  as_func_return 1
+}
+as_func_ret_success () {
+  return 0
+}
+as_func_ret_failure () {
+  return 1
+}
+
+exitcode=0
+if as_func_success; then
+  :
+else
+  exitcode=1
+  echo as_func_success failed.
+fi
+
+if as_func_failure; then
+  exitcode=1
+  echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+  :
+else
+  exitcode=1
+  echo as_func_ret_success failed.
+fi
+
+if as_func_ret_failure; then
+  exitcode=1
+  echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = "$1" ); then
+  :
+else
+  exitcode=1
+  echo positional parameters were not saved.
+fi
+
+test $exitcode = 0) || { (exit 1); exit 1; }
+
+(
+  as_lineno_1=$LINENO
+  as_lineno_2=$LINENO
+  test "x$as_lineno_1" != "x$as_lineno_2" &&
+  test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2") || { (exit 1); exit 1; }
+
+_ASEOF
+}; then
+  break
+fi
+
+fi
+
+      done
+
+      if test "x$CONFIG_SHELL" != x; then
+  for as_var in BASH_ENV ENV
+        do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+        done
+        export CONFIG_SHELL
+        exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"}
+fi
+
+
+    if test $as_have_required = no; then
+  echo This script requires a shell more modern than all the
+      echo shells that I found on your system.  Please install a
+      echo modern shell, or manually run the script under such a
+      echo shell if you do have one.
+      { (exit 1); exit 1; }
+fi
+
+
+fi
+
+fi
+
+
+
+(eval "as_func_return () {
+  (exit \$1)
+}
+as_func_success () {
+  as_func_return 0
+}
+as_func_failure () {
+  as_func_return 1
+}
+as_func_ret_success () {
+  return 0
+}
+as_func_ret_failure () {
+  return 1
+}
+
+exitcode=0
+if as_func_success; then
+  :
+else
+  exitcode=1
+  echo as_func_success failed.
+fi
+
+if as_func_failure; then
+  exitcode=1
+  echo as_func_failure succeeded.
+fi
+
+if as_func_ret_success; then
+  :
+else
+  exitcode=1
+  echo as_func_ret_success failed.
+fi
+
+if as_func_ret_failure; then
+  exitcode=1
+  echo as_func_ret_failure succeeded.
+fi
+
+if ( set x; as_func_ret_success y && test x = \"\$1\" ); then
+  :
+else
+  exitcode=1
+  echo positional parameters were not saved.
+fi
+
+test \$exitcode = 0") || {
+  echo No shell found that supports shell functions.
+  echo Please tell autoconf@gnu.org about your system,
+  echo including any error possibly output before this
+  echo message
+}
+
+
+
+  as_lineno_1=$LINENO
+  as_lineno_2=$LINENO
+  test "x$as_lineno_1" != "x$as_lineno_2" &&
+  test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || {
+
+  # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+  # uniformly replaced by the line number.  The first 'sed' inserts a
+  # line-number line after each line using $LINENO; the second 'sed'
+  # does the real work.  The second script uses 'N' to pair each
+  # line-number line with the line containing $LINENO, and appends
+  # trailing '-' during substitution so that $LINENO is not a special
+  # case at line end.
+  # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+  # scripts with optimization help from Paolo Bonzini.  Blame Lee
+  # E. McMahon (1931-1989) for sed's syntax.  :-)
+  sed -n '
+    p
+    /[$]LINENO/=
+  ' <$as_myself |
+    sed '
+      s/[$]LINENO.*/&-/
+      t lineno
+      b
+      :lineno
+      N
+      :loop
+      s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+      t loop
+      s/-\n.*//
+    ' >$as_me.lineno &&
+  chmod +x "$as_me.lineno" ||
+    { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+   { (exit 1); exit 1; }; }
+
+  # Don't try to exec as it changes $[0], causing all sort of problems
+  # (the dirname of $[0] is not the place where we might find the
+  # original and so on.  Autoconf is especially sensitive to this).
+  . "./$as_me.lineno"
+  # Exit status is that of the last command.
+  exit
+}
+
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in
+-n*)
+  case `echo 'x\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  *)   ECHO_C='\c';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir
+fi
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+  as_ln_s='ln -s'
+  # ... but there are two gotchas:
+  # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+  # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+  # In both cases, we have to default to `cp -p'.
+  ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+    as_ln_s='cp -p'
+elif ln conf$$.file conf$$ 2>/dev/null; then
+  as_ln_s=ln
+else
+  as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p=:
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+# Find out whether ``test -x'' works.  Don't use a zero-byte file, as
+# systems may use methods other than mode bits to determine executability.
+cat >conf$$.file <<_ASEOF
+#! /bin/sh
+exit 0
+_ASEOF
+chmod +x conf$$.file
+if test -x conf$$.file >/dev/null 2>&1; then
+  as_executable_p="test -x"
+else
+  as_executable_p=:
+fi
+rm -f conf$$.file
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+
+exec 7<&0 </dev/null 6>&1
+
+# Name of the host.
+# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_clean_files=
+ac_config_libobj_dir=.
+LIBOBJS=
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+# Identity of this package.
+PACKAGE_NAME='[SAMPLE]'
+PACKAGE_TARNAME='--sample--'
+PACKAGE_VERSION='[x.xx]'
+PACKAGE_STRING='[SAMPLE] [x.xx]'
+PACKAGE_BUGREPORT='bugs@yourdomain'
+
+ac_unique_file=""Makefile.common.in""
+ac_subst_vars='SHELL
+PATH_SEPARATOR
+PACKAGE_NAME
+PACKAGE_TARNAME
+PACKAGE_VERSION
+PACKAGE_STRING
+PACKAGE_BUGREPORT
+exec_prefix
+prefix
+program_transform_name
+bindir
+sbindir
+libexecdir
+datarootdir
+datadir
+sysconfdir
+sharedstatedir
+localstatedir
+includedir
+oldincludedir
+docdir
+infodir
+htmldir
+dvidir
+pdfdir
+psdir
+libdir
+localedir
+mandir
+DEFS
+ECHO_C
+ECHO_N
+ECHO_T
+LIBS
+build_alias
+host_alias
+target_alias
+LLVM_SRC
+LLVM_OBJ
+LIBOBJS
+LTLIBOBJS'
+ac_subst_files=''
+      ac_precious_vars='build_alias
+host_alias
+target_alias'
+
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+# (The list follows the same order as the GNU Coding Standards.)
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datarootdir='${prefix}/share'
+datadir='${datarootdir}'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
+infodir='${datarootdir}/info'
+htmldir='${docdir}'
+dvidir='${docdir}'
+pdfdir='${docdir}'
+psdir='${docdir}'
+libdir='${exec_prefix}/lib'
+localedir='${datarootdir}/locale'
+mandir='${datarootdir}/man'
+
+ac_prev=
+ac_dashdash=
+for ac_option
+do
+  # If the previous option needs an argument, assign it.
+  if test -n "$ac_prev"; then
+    eval $ac_prev=\$ac_option
+    ac_prev=
+    continue
+  fi
+
+  case $ac_option in
+  *=*)	ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+  *)	ac_optarg=yes ;;
+  esac
+
+  # Accept the important Cygnus configure options, so we can diagnose typos.
+
+  case $ac_dashdash$ac_option in
+  --)
+    ac_dashdash=yes ;;
+
+  -bindir | --bindir | --bindi | --bind | --bin | --bi)
+    ac_prev=bindir ;;
+  -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+    bindir=$ac_optarg ;;
+
+  -build | --build | --buil | --bui | --bu)
+    ac_prev=build_alias ;;
+  -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+    build_alias=$ac_optarg ;;
+
+  -cache-file | --cache-file | --cache-fil | --cache-fi \
+  | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+    ac_prev=cache_file ;;
+  -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+  | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+    cache_file=$ac_optarg ;;
+
+  --config-cache | -C)
+    cache_file=config.cache ;;
+
+  -datadir | --datadir | --datadi | --datad)
+    ac_prev=datadir ;;
+  -datadir=* | --datadir=* | --datadi=* | --datad=*)
+    datadir=$ac_optarg ;;
+
+  -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \
+  | --dataroo | --dataro | --datar)
+    ac_prev=datarootdir ;;
+  -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \
+  | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)
+    datarootdir=$ac_optarg ;;
+
+  -disable-* | --disable-*)
+    ac_feature=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+      { echo "$as_me: error: invalid feature name: $ac_feature" >&2
+   { (exit 1); exit 1; }; }
+    ac_feature=`echo $ac_feature | sed 's/-/_/g'`
+    eval enable_$ac_feature=no ;;
+
+  -docdir | --docdir | --docdi | --doc | --do)
+    ac_prev=docdir ;;
+  -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)
+    docdir=$ac_optarg ;;
+
+  -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)
+    ac_prev=dvidir ;;
+  -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)
+    dvidir=$ac_optarg ;;
+
+  -enable-* | --enable-*)
+    ac_feature=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_feature" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+      { echo "$as_me: error: invalid feature name: $ac_feature" >&2
+   { (exit 1); exit 1; }; }
+    ac_feature=`echo $ac_feature | sed 's/-/_/g'`
+    eval enable_$ac_feature=\$ac_optarg ;;
+
+  -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+  | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+  | --exec | --exe | --ex)
+    ac_prev=exec_prefix ;;
+  -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+  | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+  | --exec=* | --exe=* | --ex=*)
+    exec_prefix=$ac_optarg ;;
+
+  -gas | --gas | --ga | --g)
+    # Obsolete; use --with-gas.
+    with_gas=yes ;;
+
+  -help | --help | --hel | --he | -h)
+    ac_init_help=long ;;
+  -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+    ac_init_help=recursive ;;
+  -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+    ac_init_help=short ;;
+
+  -host | --host | --hos | --ho)
+    ac_prev=host_alias ;;
+  -host=* | --host=* | --hos=* | --ho=*)
+    host_alias=$ac_optarg ;;
+
+  -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)
+    ac_prev=htmldir ;;
+  -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \
+  | --ht=*)
+    htmldir=$ac_optarg ;;
+
+  -includedir | --includedir | --includedi | --included | --include \
+  | --includ | --inclu | --incl | --inc)
+    ac_prev=includedir ;;
+  -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+  | --includ=* | --inclu=* | --incl=* | --inc=*)
+    includedir=$ac_optarg ;;
+
+  -infodir | --infodir | --infodi | --infod | --info | --inf)
+    ac_prev=infodir ;;
+  -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+    infodir=$ac_optarg ;;
+
+  -libdir | --libdir | --libdi | --libd)
+    ac_prev=libdir ;;
+  -libdir=* | --libdir=* | --libdi=* | --libd=*)
+    libdir=$ac_optarg ;;
+
+  -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+  | --libexe | --libex | --libe)
+    ac_prev=libexecdir ;;
+  -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+  | --libexe=* | --libex=* | --libe=*)
+    libexecdir=$ac_optarg ;;
+
+  -localedir | --localedir | --localedi | --localed | --locale)
+    ac_prev=localedir ;;
+  -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)
+    localedir=$ac_optarg ;;
+
+  -localstatedir | --localstatedir | --localstatedi | --localstated \
+  | --localstate | --localstat | --localsta | --localst | --locals)
+    ac_prev=localstatedir ;;
+  -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+  | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)
+    localstatedir=$ac_optarg ;;
+
+  -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+    ac_prev=mandir ;;
+  -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+    mandir=$ac_optarg ;;
+
+  -nfp | --nfp | --nf)
+    # Obsolete; use --without-fp.
+    with_fp=no ;;
+
+  -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+  | --no-cr | --no-c | -n)
+    no_create=yes ;;
+
+  -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+  | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+    no_recursion=yes ;;
+
+  -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+  | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+  | --oldin | --oldi | --old | --ol | --o)
+    ac_prev=oldincludedir ;;
+  -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+  | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+  | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+    oldincludedir=$ac_optarg ;;
+
+  -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+    ac_prev=prefix ;;
+  -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+    prefix=$ac_optarg ;;
+
+  -program-prefix | --program-prefix | --program-prefi | --program-pref \
+  | --program-pre | --program-pr | --program-p)
+    ac_prev=program_prefix ;;
+  -program-prefix=* | --program-prefix=* | --program-prefi=* \
+  | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+    program_prefix=$ac_optarg ;;
+
+  -program-suffix | --program-suffix | --program-suffi | --program-suff \
+  | --program-suf | --program-su | --program-s)
+    ac_prev=program_suffix ;;
+  -program-suffix=* | --program-suffix=* | --program-suffi=* \
+  | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+    program_suffix=$ac_optarg ;;
+
+  -program-transform-name | --program-transform-name \
+  | --program-transform-nam | --program-transform-na \
+  | --program-transform-n | --program-transform- \
+  | --program-transform | --program-transfor \
+  | --program-transfo | --program-transf \
+  | --program-trans | --program-tran \
+  | --progr-tra | --program-tr | --program-t)
+    ac_prev=program_transform_name ;;
+  -program-transform-name=* | --program-transform-name=* \
+  | --program-transform-nam=* | --program-transform-na=* \
+  | --program-transform-n=* | --program-transform-=* \
+  | --program-transform=* | --program-transfor=* \
+  | --program-transfo=* | --program-transf=* \
+  | --program-trans=* | --program-tran=* \
+  | --progr-tra=* | --program-tr=* | --program-t=*)
+    program_transform_name=$ac_optarg ;;
+
+  -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)
+    ac_prev=pdfdir ;;
+  -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)
+    pdfdir=$ac_optarg ;;
+
+  -psdir | --psdir | --psdi | --psd | --ps)
+    ac_prev=psdir ;;
+  -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
+    psdir=$ac_optarg ;;
+
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil)
+    silent=yes ;;
+
+  -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+    ac_prev=sbindir ;;
+  -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+  | --sbi=* | --sb=*)
+    sbindir=$ac_optarg ;;
+
+  -sharedstatedir | --sharedstatedir | --sharedstatedi \
+  | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+  | --sharedst | --shareds | --shared | --share | --shar \
+  | --sha | --sh)
+    ac_prev=sharedstatedir ;;
+  -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+  | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+  | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+  | --sha=* | --sh=*)
+    sharedstatedir=$ac_optarg ;;
+
+  -site | --site | --sit)
+    ac_prev=site ;;
+  -site=* | --site=* | --sit=*)
+    site=$ac_optarg ;;
+
+  -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+    ac_prev=srcdir ;;
+  -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+    srcdir=$ac_optarg ;;
+
+  -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+  | --syscon | --sysco | --sysc | --sys | --sy)
+    ac_prev=sysconfdir ;;
+  -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+  | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+    sysconfdir=$ac_optarg ;;
+
+  -target | --target | --targe | --targ | --tar | --ta | --t)
+    ac_prev=target_alias ;;
+  -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+    target_alias=$ac_optarg ;;
+
+  -v | -verbose | --verbose | --verbos | --verbo | --verb)
+    verbose=yes ;;
+
+  -version | --version | --versio | --versi | --vers | -V)
+    ac_init_version=: ;;
+
+  -with-* | --with-*)
+    ac_package=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+      { echo "$as_me: error: invalid package name: $ac_package" >&2
+   { (exit 1); exit 1; }; }
+    ac_package=`echo $ac_package| sed 's/-/_/g'`
+    eval with_$ac_package=\$ac_optarg ;;
+
+  -without-* | --without-*)
+    ac_package=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_package" : ".*[^-_$as_cr_alnum]" >/dev/null &&
+      { echo "$as_me: error: invalid package name: $ac_package" >&2
+   { (exit 1); exit 1; }; }
+    ac_package=`echo $ac_package | sed 's/-/_/g'`
+    eval with_$ac_package=no ;;
+
+  --x)
+    # Obsolete; use --with-x.
+    with_x=yes ;;
+
+  -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+  | --x-incl | --x-inc | --x-in | --x-i)
+    ac_prev=x_includes ;;
+  -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+  | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+    x_includes=$ac_optarg ;;
+
+  -x-libraries | --x-libraries | --x-librarie | --x-librari \
+  | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+    ac_prev=x_libraries ;;
+  -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+  | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+    x_libraries=$ac_optarg ;;
+
+  -*) { echo "$as_me: error: unrecognized option: $ac_option
+Try \`$0 --help' for more information." >&2
+   { (exit 1); exit 1; }; }
+    ;;
+
+  *=*)
+    ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_envvar" : ".*[^_$as_cr_alnum]" >/dev/null &&
+      { echo "$as_me: error: invalid variable name: $ac_envvar" >&2
+   { (exit 1); exit 1; }; }
+    eval $ac_envvar=\$ac_optarg
+    export $ac_envvar ;;
+
+  *)
+    # FIXME: should be removed in autoconf 3.0.
+    echo "$as_me: WARNING: you should use --build, --host, --target" >&2
+    expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+      echo "$as_me: WARNING: invalid host type: $ac_option" >&2
+    : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}
+    ;;
+
+  esac
+done
+
+if test -n "$ac_prev"; then
+  ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+  { echo "$as_me: error: missing argument to $ac_option" >&2
+   { (exit 1); exit 1; }; }
+fi
+
+# Be sure to have absolute directory names.
+for ac_var in	exec_prefix prefix bindir sbindir libexecdir datarootdir \
+		datadir sysconfdir sharedstatedir localstatedir includedir \
+		oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
+		libdir localedir mandir
+do
+  eval ac_val=\$$ac_var
+  case $ac_val in
+    [\\/$]* | ?:[\\/]* )  continue;;
+    NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
+  esac
+  { echo "$as_me: error: expected an absolute directory name for --$ac_var: $ac_val" >&2
+   { (exit 1); exit 1; }; }
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+  if test "x$build_alias" = x; then
+    cross_compiling=maybe
+    echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
+    If a cross compiler is detected then cross compile mode will be used." >&2
+  elif test "x$build_alias" != "x$host_alias"; then
+    cross_compiling=yes
+  fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+ac_pwd=`pwd` && test -n "$ac_pwd" &&
+ac_ls_di=`ls -di .` &&
+ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
+  { echo "$as_me: error: Working directory cannot be determined" >&2
+   { (exit 1); exit 1; }; }
+test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
+  { echo "$as_me: error: pwd does not report name of working directory" >&2
+   { (exit 1); exit 1; }; }
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+  ac_srcdir_defaulted=yes
+  # Try the directory containing this script, then the parent directory.
+  ac_confdir=`$as_dirname -- "$0" ||
+$as_expr X"$0" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$0" : 'X\(//\)[^/]' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+echo X"$0" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  srcdir=$ac_confdir
+  if test ! -r "$srcdir/$ac_unique_file"; then
+    srcdir=..
+  fi
+else
+  ac_srcdir_defaulted=no
+fi
+if test ! -r "$srcdir/$ac_unique_file"; then
+  test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
+  { echo "$as_me: error: cannot find sources ($ac_unique_file) in $srcdir" >&2
+   { (exit 1); exit 1; }; }
+fi
+ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
+ac_abs_confdir=`(
+	cd "$srcdir" && test -r "./$ac_unique_file" || { echo "$as_me: error: $ac_msg" >&2
+   { (exit 1); exit 1; }; }
+	pwd)`
+# When building in place, set srcdir=.
+if test "$ac_abs_confdir" = "$ac_pwd"; then
+  srcdir=.
+fi
+# Remove unnecessary trailing slashes from srcdir.
+# Double slashes in file names in object file debugging info
+# mess up M-x gdb in Emacs.
+case $srcdir in
+*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;;
+esac
+for ac_var in $ac_precious_vars; do
+  eval ac_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_env_${ac_var}_value=\$${ac_var}
+  eval ac_cv_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_cv_env_${ac_var}_value=\$${ac_var}
+done
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+  # Omit some internal or obsolete options to make the list less imposing.
+  # This message is too long to be a string in the A/UX 3.1 sh.
+  cat <<_ACEOF
+\`configure' configures [SAMPLE] [x.xx] to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE.  See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+  -h, --help              display this help and exit
+      --help=short        display options specific to this package
+      --help=recursive    display the short help of all the included packages
+  -V, --version           display version information and exit
+  -q, --quiet, --silent   do not print \`checking...' messages
+      --cache-file=FILE   cache test results in FILE [disabled]
+  -C, --config-cache      alias for \`--cache-file=config.cache'
+  -n, --no-create         do not create output files
+      --srcdir=DIR        find the sources in DIR [configure dir or \`..']
+
+Installation directories:
+  --prefix=PREFIX         install architecture-independent files in PREFIX
+			  [$ac_default_prefix]
+  --exec-prefix=EPREFIX   install architecture-dependent files in EPREFIX
+			  [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc.  You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+  --bindir=DIR           user executables [EPREFIX/bin]
+  --sbindir=DIR          system admin executables [EPREFIX/sbin]
+  --libexecdir=DIR       program executables [EPREFIX/libexec]
+  --sysconfdir=DIR       read-only single-machine data [PREFIX/etc]
+  --sharedstatedir=DIR   modifiable architecture-independent data [PREFIX/com]
+  --localstatedir=DIR    modifiable single-machine data [PREFIX/var]
+  --libdir=DIR           object code libraries [EPREFIX/lib]
+  --includedir=DIR       C header files [PREFIX/include]
+  --oldincludedir=DIR    C header files for non-gcc [/usr/include]
+  --datarootdir=DIR      read-only arch.-independent data root [PREFIX/share]
+  --datadir=DIR          read-only architecture-independent data [DATAROOTDIR]
+  --infodir=DIR          info documentation [DATAROOTDIR/info]
+  --localedir=DIR        locale-dependent data [DATAROOTDIR/locale]
+  --mandir=DIR           man documentation [DATAROOTDIR/man]
+  --docdir=DIR           documentation root [DATAROOTDIR/doc/--sample--]
+  --htmldir=DIR          html documentation [DOCDIR]
+  --dvidir=DIR           dvi documentation [DOCDIR]
+  --pdfdir=DIR           pdf documentation [DOCDIR]
+  --psdir=DIR            ps documentation [DOCDIR]
+_ACEOF
+
+  cat <<\_ACEOF
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+  case $ac_init_help in
+     short | recursive ) echo "Configuration of [SAMPLE] [x.xx]:";;
+   esac
+  cat <<\_ACEOF
+
+Optional Packages:
+  --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
+  --without-PACKAGE       do not use PACKAGE (same as --with-PACKAGE=no)
+  --with-llvmsrc          Location of LLVM Source Code
+  --with-llvmobj          Location of LLVM Object Code
+
+Report bugs to <bugs@yourdomain>.
+_ACEOF
+ac_status=$?
+fi
+
+if test "$ac_init_help" = "recursive"; then
+  # If there are subdirs, report their specific --help.
+  for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+    test -d "$ac_dir" || continue
+    ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+    cd "$ac_dir" || { ac_status=$?; continue; }
+    # Check for guested configure.
+    if test -f "$ac_srcdir/configure.gnu"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure.gnu" --help=recursive
+    elif test -f "$ac_srcdir/configure"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure" --help=recursive
+    else
+      echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2
+    fi || ac_status=$?
+    cd "$ac_pwd" || { ac_status=$?; break; }
+  done
+fi
+
+test -n "$ac_init_help" && exit $ac_status
+if $ac_init_version; then
+  cat <<\_ACEOF
+[SAMPLE] configure [x.xx]
+generated by GNU Autoconf 2.60
+
+Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+  exit
+fi
+cat >config.log <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by [SAMPLE] $as_me [x.xx], which was
+generated by GNU Autoconf 2.60.  Invocation command line was
+
+  $ $0 $@
+
+_ACEOF
+exec 5>>config.log
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null     || echo unknown`
+
+/bin/arch              = `(/bin/arch) 2>/dev/null              || echo unknown`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null       || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+/usr/bin/hostinfo      = `(/usr/bin/hostinfo) 2>/dev/null      || echo unknown`
+/bin/machine           = `(/bin/machine) 2>/dev/null           || echo unknown`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null       || echo unknown`
+/bin/universe          = `(/bin/universe) 2>/dev/null          || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  echo "PATH: $as_dir"
+done
+IFS=$as_save_IFS
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+  for ac_arg
+  do
+    case $ac_arg in
+    -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+    -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+    | -silent | --silent | --silen | --sile | --sil)
+      continue ;;
+    *\'*)
+      ac_arg=`echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    case $ac_pass in
+    1) ac_configure_args0="$ac_configure_args0 '$ac_arg'" ;;
+    2)
+      ac_configure_args1="$ac_configure_args1 '$ac_arg'"
+      if test $ac_must_keep_next = true; then
+	ac_must_keep_next=false # Got value, back to normal.
+      else
+	case $ac_arg in
+	  *=* | --config-cache | -C | -disable-* | --disable-* \
+	  | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+	  | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+	  | -with-* | --with-* | -without-* | --without-* | --x)
+	    case "$ac_configure_args0 " in
+	      "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+	    esac
+	    ;;
+	  -* ) ac_must_keep_next=true ;;
+	esac
+      fi
+      ac_configure_args="$ac_configure_args '$ac_arg'"
+      ;;
+    esac
+  done
+done
+$as_unset ac_configure_args0 || test "${ac_configure_args0+set}" != set || { ac_configure_args0=; export ac_configure_args0; }
+$as_unset ac_configure_args1 || test "${ac_configure_args1+set}" != set || { ac_configure_args1=; export ac_configure_args1; }
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log.  We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Use '\'' to represent an apostrophe within the trap.
+# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.
+trap 'exit_status=$?
+  # Save into config.log some information that might help in debugging.
+  {
+    echo
+
+    cat <<\_ASBOX
+## ---------------- ##
+## Cache variables. ##
+## ---------------- ##
+_ASBOX
+    echo
+    # The following way of writing the cache mishandles newlines in values,
+(
+  for ac_var in `(set) 2>&1 | sed -n '\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5
+echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      *) $as_unset $ac_var ;;
+      esac ;;
+    esac
+  done
+  (set) 2>&1 |
+    case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      sed -n \
+	"s/'\''/'\''\\\\'\'''\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p"
+      ;; #(
+    *)
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+)
+    echo
+
+    cat <<\_ASBOX
+## ----------------- ##
+## Output variables. ##
+## ----------------- ##
+_ASBOX
+    echo
+    for ac_var in $ac_subst_vars
+    do
+      eval ac_val=\$$ac_var
+      case $ac_val in
+      *\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+      esac
+      echo "$ac_var='\''$ac_val'\''"
+    done | sort
+    echo
+
+    if test -n "$ac_subst_files"; then
+      cat <<\_ASBOX
+## ------------------- ##
+## File substitutions. ##
+## ------------------- ##
+_ASBOX
+      echo
+      for ac_var in $ac_subst_files
+      do
+	eval ac_val=\$$ac_var
+	case $ac_val in
+	*\'\''*) ac_val=`echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+	esac
+	echo "$ac_var='\''$ac_val'\''"
+      done | sort
+      echo
+    fi
+
+    if test -s confdefs.h; then
+      cat <<\_ASBOX
+## ----------- ##
+## confdefs.h. ##
+## ----------- ##
+_ASBOX
+      echo
+      cat confdefs.h
+      echo
+    fi
+    test "$ac_signal" != 0 &&
+      echo "$as_me: caught signal $ac_signal"
+    echo "$as_me: exit $exit_status"
+  } >&5
+  rm -f core *.core core.conftest.* &&
+    rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&
+    exit $exit_status
+' 0
+for ac_signal in 1 2 13 15; do
+  trap 'ac_signal='$ac_signal'; { (exit 1); exit 1; }' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -f -r conftest* confdefs.h
+
+# Predefined preprocessor variables.
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_NAME "$PACKAGE_NAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_TARNAME "$PACKAGE_TARNAME"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_VERSION "$PACKAGE_VERSION"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_STRING "$PACKAGE_STRING"
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
+#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT"
+_ACEOF
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer explicitly selected file to automatically selected ones.
+if test -n "$CONFIG_SITE"; then
+  set x "$CONFIG_SITE"
+elif test "x$prefix" != xNONE; then
+  set x "$prefix/share/config.site" "$prefix/etc/config.site"
+else
+  set x "$ac_default_prefix/share/config.site" \
+	"$ac_default_prefix/etc/config.site"
+fi
+shift
+for ac_site_file
+do
+  if test -r "$ac_site_file"; then
+    { echo "$as_me:$LINENO: loading site script $ac_site_file" >&5
+echo "$as_me: loading site script $ac_site_file" >&6;}
+    sed 's/^/| /' "$ac_site_file" >&5
+    . "$ac_site_file"
+  fi
+done
+
+if test -r "$cache_file"; then
+  # Some versions of bash will fail to source /dev/null (special
+  # files actually), so we avoid doing that.
+  if test -f "$cache_file"; then
+    { echo "$as_me:$LINENO: loading cache $cache_file" >&5
+echo "$as_me: loading cache $cache_file" >&6;}
+    case $cache_file in
+      [\\/]* | ?:[\\/]* ) . "$cache_file";;
+      *)                      . "./$cache_file";;
+    esac
+  fi
+else
+  { echo "$as_me:$LINENO: creating cache $cache_file" >&5
+echo "$as_me: creating cache $cache_file" >&6;}
+  >$cache_file
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in $ac_precious_vars; do
+  eval ac_old_set=\$ac_cv_env_${ac_var}_set
+  eval ac_new_set=\$ac_env_${ac_var}_set
+  eval ac_old_val=\$ac_cv_env_${ac_var}_value
+  eval ac_new_val=\$ac_env_${ac_var}_value
+  case $ac_old_set,$ac_new_set in
+    set,)
+      { echo "$as_me:$LINENO: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5
+echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,set)
+      { echo "$as_me:$LINENO: error: \`$ac_var' was not set in the previous run" >&5
+echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,);;
+    *)
+      if test "x$ac_old_val" != "x$ac_new_val"; then
+	{ echo "$as_me:$LINENO: error: \`$ac_var' has changed since the previous run:" >&5
+echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;}
+	{ echo "$as_me:$LINENO:   former value:  $ac_old_val" >&5
+echo "$as_me:   former value:  $ac_old_val" >&2;}
+	{ echo "$as_me:$LINENO:   current value: $ac_new_val" >&5
+echo "$as_me:   current value: $ac_new_val" >&2;}
+	ac_cache_corrupted=:
+      fi;;
+  esac
+  # Pass precious variables to config.status.
+  if test "$ac_new_set" = set; then
+    case $ac_new_val in
+    *\'*) ac_arg=$ac_var=`echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;;
+    *) ac_arg=$ac_var=$ac_new_val ;;
+    esac
+    case " $ac_configure_args " in
+      *" '$ac_arg' "*) ;; # Avoid dups.  Use of quotes ensures accuracy.
+      *) ac_configure_args="$ac_configure_args '$ac_arg'" ;;
+    esac
+  fi
+done
+if $ac_cache_corrupted; then
+  { echo "$as_me:$LINENO: error: changes in the environment can compromise the build" >&5
+echo "$as_me: error: changes in the environment can compromise the build" >&2;}
+  { { echo "$as_me:$LINENO: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&5
+echo "$as_me: error: run \`make distclean' and/or \`rm $cache_file' and start over" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+LLVM_SRC_ROOT="../.."
+LLVM_OBJ_ROOT="../.."
+
+LLVM_ABS_SRC_ROOT="`cd $srcdir ; cd $LLVM_SRC_ROOT ; pwd`"
+LLVM_ABS_OBJ_ROOT="`cd $LLVM_OBJ_ROOT ; pwd`"
+
+
+# Check whether --with-llvmsrc was given.
+if test "${with_llvmsrc+set}" = set; then
+  withval=$with_llvmsrc; llvm_src="$withval"
+else
+  llvm_src="$LLVM_ABS_SRC_ROOT"
+fi
+
+  LLVM_SRC=$llvm_src
+
+
+# Check whether --with-llvmobj was given.
+if test "${with_llvmobj+set}" = set; then
+  withval=$with_llvmobj; llvm_obj="$withval"
+else
+  llvm_obj="$LLVM_ABS_OBJ_ROOT"
+fi
+
+  LLVM_OBJ=$llvm_obj
+
+  ac_config_commands="$ac_config_commands setup"
+
+
+
+ac_aux_dir=
+for ac_dir in $LLVM_SRC/autoconf "$srcdir"/$LLVM_SRC/autoconf; do
+  if test -f "$ac_dir/install-sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install-sh -c"
+    break
+  elif test -f "$ac_dir/install.sh"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/install.sh -c"
+    break
+  elif test -f "$ac_dir/shtool"; then
+    ac_aux_dir=$ac_dir
+    ac_install_sh="$ac_aux_dir/shtool install -c"
+    break
+  fi
+done
+if test -z "$ac_aux_dir"; then
+  { { echo "$as_me:$LINENO: error: cannot find install-sh or install.sh in $LLVM_SRC/autoconf \"$srcdir\"/$LLVM_SRC/autoconf" >&5
+echo "$as_me: error: cannot find install-sh or install.sh in $LLVM_SRC/autoconf \"$srcdir\"/$LLVM_SRC/autoconf" >&2;}
+   { (exit 1); exit 1; }; }
+fi
+
+# These three variables are undocumented and unsupported,
+# and are intended to be withdrawn in a future Autoconf release.
+# They can cause serious problems if a builder's source tree is in a directory
+# whose full name contains unusual characters.
+ac_config_guess="$SHELL $ac_aux_dir/config.guess"  # Please don't use this var.
+ac_config_sub="$SHELL $ac_aux_dir/config.sub"  # Please don't use this var.
+ac_configure="$SHELL $ac_aux_dir/configure"  # Please don't use this var.
+
+
+
+
+
+ac_config_files="$ac_config_files Makefile.common"
+
+
+ac_config_commands="$ac_config_commands Makefile"
+
+
+ac_config_commands="$ac_config_commands lib/Makefile"
+
+
+ac_config_commands="$ac_config_commands lib/sample/Makefile"
+
+
+ac_config_commands="$ac_config_commands tools/Makefile"
+
+
+ac_config_commands="$ac_config_commands tools/sample/Makefile"
+
+
+
+
+
+
+
+
+
+
+
+
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems.  If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, we kill variables containing newlines.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(
+  for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { echo "$as_me:$LINENO: WARNING: Cache variable $ac_var contains a newline." >&5
+echo "$as_me: WARNING: Cache variable $ac_var contains a newline." >&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      *) $as_unset $ac_var ;;
+      esac ;;
+    esac
+  done
+
+  (set) 2>&1 |
+    case $as_nl`(ac_space=' '; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      # `set' does not quote correctly, so add quotes (double-quote
+      # substitution turns \\\\ into \\, and sed turns \\ into \).
+      sed -n \
+	"s/'/'\\\\''/g;
+	  s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+      ;; #(
+    *)
+      # `set' quotes correctly as required by POSIX, so do not add quotes.
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+) |
+  sed '
+     /^ac_cv_env_/b end
+     t clear
+     :clear
+     s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/
+     t end
+     s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+     :end' >>confcache
+if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
+  if test -w "$cache_file"; then
+    test "x$cache_file" != "x/dev/null" &&
+      { echo "$as_me:$LINENO: updating cache $cache_file" >&5
+echo "$as_me: updating cache $cache_file" >&6;}
+    cat confcache >$cache_file
+  else
+    { echo "$as_me:$LINENO: not updating unwritable cache $cache_file" >&5
+echo "$as_me: not updating unwritable cache $cache_file" >&6;}
+  fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+# Transform confdefs.h into DEFS.
+# Protect against shell expansion while executing Makefile rules.
+# Protect against Makefile macro expansion.
+#
+# If the first sed substitution is executed (which looks for macros that
+# take arguments), then branch to the quote section.  Otherwise,
+# look for a macro that doesn't take arguments.
+ac_script='
+t clear
+:clear
+s/^[	 ]*#[	 ]*define[	 ][	 ]*\([^	 (][^	 (]*([^)]*)\)[	 ]*\(.*\)/-D\1=\2/g
+t quote
+s/^[	 ]*#[	 ]*define[	 ][	 ]*\([^	 ][^	 ]*\)[	 ]*\(.*\)/-D\1=\2/g
+t quote
+b any
+:quote
+s/[	 `~#$^&*(){}\\|;'\''"<>?]/\\&/g
+s/\[/\\&/g
+s/\]/\\&/g
+s/\$/$$/g
+H
+:any
+${
+	g
+	s/^\n//
+	s/\n/ /g
+	p
+}
+'
+DEFS=`sed -n "$ac_script" confdefs.h`
+
+
+ac_libobjs=
+ac_ltlibobjs=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+  # 1. Remove the extension, and $U if already installed.
+  ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
+  ac_i=`echo "$ac_i" | sed "$ac_script"`
+  # 2. Prepend LIBOBJDIR.  When used with automake>=1.10 LIBOBJDIR
+  #    will be set to the directory where LIBOBJS objects are built.
+  ac_libobjs="$ac_libobjs \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+  ac_ltlibobjs="$ac_ltlibobjs \${LIBOBJDIR}$ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+
+: ${CONFIG_STATUS=./config.status}
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ echo "$as_me:$LINENO: creating $CONFIG_STATUS" >&5
+echo "$as_me: creating $CONFIG_STATUS" >&6;}
+cat >$CONFIG_STATUS <<_ACEOF
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+SHELL=\${CONFIG_SHELL-$SHELL}
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+## --------------------- ##
+## M4sh Initialization.  ##
+## --------------------- ##
+
+# Be Bourne compatible
+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then
+  emulate sh
+  NULLCMD=:
+  # Zsh 3.x and 4.x performs word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else
+  case `(set -o) 2>/dev/null` in *posix*) set -o posix;; esac
+fi
+BIN_SH=xpg4; export BIN_SH # for Tru64
+DUALCASE=1; export DUALCASE # for MKS sh
+
+
+# PATH needs CR
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+# The user is always right.
+if test "${PATH_SEPARATOR+set}" != set; then
+  echo "#! /bin/sh" >conf$$.sh
+  echo  "exit 0"   >>conf$$.sh
+  chmod +x conf$$.sh
+  if (PATH="/nonexistent;."; conf$$.sh) >/dev/null 2>&1; then
+    PATH_SEPARATOR=';'
+  else
+    PATH_SEPARATOR=:
+  fi
+  rm -f conf$$.sh
+fi
+
+# Support unset when possible.
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+  as_unset=unset
+else
+  as_unset=false
+fi
+
+
+# IFS
+# We need space, tab and new line, in precisely that order.  Quoting is
+# there to prevent editors from complaining about space-tab.
+# (If _AS_PATH_WALK were called with IFS unset, it would disable word
+# splitting by setting IFS to empty value.)
+as_nl='
+'
+IFS=" ""	$as_nl"
+
+# Find who we are.  Look in the path if we contain no directory separator.
+case $0 in
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  test -z "$as_dir" && as_dir=.
+  test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break
+done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2
+  { (exit 1); exit 1; }
+fi
+
+# Work around bugs in pre-3.0 UWIN ksh.
+for as_var in ENV MAIL MAILPATH
+do ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+done
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# NLS nuisances.
+for as_var in \
+  LANG LANGUAGE LC_ADDRESS LC_ALL LC_COLLATE LC_CTYPE LC_IDENTIFICATION \
+  LC_MEASUREMENT LC_MESSAGES LC_MONETARY LC_NAME LC_NUMERIC LC_PAPER \
+  LC_TELEPHONE LC_TIME
+do
+  if (set +x; test -z "`(eval $as_var=C; export $as_var) 2>&1`"); then
+    eval $as_var=C; export $as_var
+  else
+    ($as_unset $as_var) >/dev/null 2>&1 && $as_unset $as_var
+  fi
+done
+
+# Required to use basename.
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+
+# Name of the executable.
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+	 X"$0" : 'X\(//\)$' \| \
+	 X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+echo X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\/\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+
+# CDPATH.
+$as_unset CDPATH
+
+
+
+  as_lineno_1=$LINENO
+  as_lineno_2=$LINENO
+  test "x$as_lineno_1" != "x$as_lineno_2" &&
+  test "x`expr $as_lineno_1 + 1`" = "x$as_lineno_2" || {
+
+  # Create $as_me.lineno as a copy of $as_myself, but with $LINENO
+  # uniformly replaced by the line number.  The first 'sed' inserts a
+  # line-number line after each line using $LINENO; the second 'sed'
+  # does the real work.  The second script uses 'N' to pair each
+  # line-number line with the line containing $LINENO, and appends
+  # trailing '-' during substitution so that $LINENO is not a special
+  # case at line end.
+  # (Raja R Harinath suggested sed '=', and Paul Eggert wrote the
+  # scripts with optimization help from Paolo Bonzini.  Blame Lee
+  # E. McMahon (1931-1989) for sed's syntax.  :-)
+  sed -n '
+    p
+    /[$]LINENO/=
+  ' <$as_myself |
+    sed '
+      s/[$]LINENO.*/&-/
+      t lineno
+      b
+      :lineno
+      N
+      :loop
+      s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+      t loop
+      s/-\n.*//
+    ' >$as_me.lineno &&
+  chmod +x "$as_me.lineno" ||
+    { echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2
+   { (exit 1); exit 1; }; }
+
+  # Don't try to exec as it changes $[0], causing all sort of problems
+  # (the dirname of $[0] is not the place where we might find the
+  # original and so on.  Autoconf is especially sensitive to this).
+  . "./$as_me.lineno"
+  # Exit status is that of the last command.
+  exit
+}
+
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in
+-n*)
+  case `echo 'x\c'` in
+  *c*) ECHO_T='	';;	# ECHO_T is single tab character.
+  *)   ECHO_C='\c';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir
+fi
+echo >conf$$.file
+if ln -s conf$$.file conf$$ 2>/dev/null; then
+  as_ln_s='ln -s'
+  # ... but there are two gotchas:
+  # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+  # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+  # In both cases, we have to default to `cp -p'.
+  ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+    as_ln_s='cp -p'
+elif ln conf$$.file conf$$ 2>/dev/null; then
+  as_ln_s=ln
+else
+  as_ln_s='cp -p'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p=:
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+# Find out whether ``test -x'' works.  Don't use a zero-byte file, as
+# systems may use methods other than mode bits to determine executability.
+cat >conf$$.file <<_ASEOF
+#! /bin/sh
+exit 0
+_ASEOF
+chmod +x conf$$.file
+if test -x conf$$.file >/dev/null 2>&1; then
+  as_executable_p="test -x"
+else
+  as_executable_p=:
+fi
+rm -f conf$$.file
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+exec 6>&1
+
+# Save the log message, to keep $[0] and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.
+ac_log="
+This file was extended by [SAMPLE] $as_me [x.xx], which was
+generated by GNU Autoconf 2.60.  Invocation command line was
+
+  CONFIG_FILES    = $CONFIG_FILES
+  CONFIG_HEADERS  = $CONFIG_HEADERS
+  CONFIG_LINKS    = $CONFIG_LINKS
+  CONFIG_COMMANDS = $CONFIG_COMMANDS
+  $ $0 $@
+
+on `(hostname || uname -n) 2>/dev/null | sed 1q`
+"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<_ACEOF
+# Files that config.status was made for.
+config_files="$ac_config_files"
+config_commands="$ac_config_commands"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+ac_cs_usage="\
+\`$as_me' instantiates files from templates according to the
+current configuration.
+
+Usage: $0 [OPTIONS] [FILE]...
+
+  -h, --help       print this help, then exit
+  -V, --version    print version number, then exit
+  -q, --quiet      do not print progress messages
+  -d, --debug      don't remove temporary files
+      --recheck    update $as_me by reconfiguring in the same conditions
+  --file=FILE[:TEMPLATE]
+		   instantiate the configuration file FILE
+
+Configuration files:
+$config_files
+
+Configuration commands:
+$config_commands
+
+Report bugs to <bug-autoconf@gnu.org>."
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+ac_cs_version="\\
+[SAMPLE] config.status [x.xx]
+configured by $0, generated by GNU Autoconf 2.60,
+  with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\"
+
+Copyright (C) 2006 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+
+ac_pwd='$ac_pwd'
+srcdir='$srcdir'
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+# If no file are specified by the user, then we need to provide default
+# value.  By we need to know if files were specified by the user.
+ac_need_defaults=:
+while test $# != 0
+do
+  case $1 in
+  --*=*)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
+    ac_shift=:
+    ;;
+  *)
+    ac_option=$1
+    ac_optarg=$2
+    ac_shift=shift
+    ;;
+  esac
+
+  case $ac_option in
+  # Handling of the options.
+  -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+    ac_cs_recheck=: ;;
+  --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
+    echo "$ac_cs_version"; exit ;;
+  --debug | --debu | --deb | --de | --d | -d )
+    debug=: ;;
+  --file | --fil | --fi | --f )
+    $ac_shift
+    CONFIG_FILES="$CONFIG_FILES $ac_optarg"
+    ac_need_defaults=false;;
+  --he | --h |  --help | --hel | -h )
+    echo "$ac_cs_usage"; exit ;;
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil | --si | --s)
+    ac_cs_silent=: ;;
+
+  # This is an error.
+  -*) { echo "$as_me: error: unrecognized option: $1
+Try \`$0 --help' for more information." >&2
+   { (exit 1); exit 1; }; } ;;
+
+  *) ac_config_targets="$ac_config_targets $1"
+     ac_need_defaults=false ;;
+
+  esac
+  shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+  exec 6>/dev/null
+  ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+if \$ac_cs_recheck; then
+  echo "running CONFIG_SHELL=$SHELL $SHELL $0 "$ac_configure_args \$ac_configure_extra_args " --no-create --no-recursion" >&6
+  CONFIG_SHELL=$SHELL
+  export CONFIG_SHELL
+  exec $SHELL "$0"$ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+exec 5>>config.log
+{
+  echo
+  sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+  echo "$ac_log"
+} >&5
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+#
+# INIT-COMMANDS
+#
+llvm_src="${LLVM_SRC}"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+
+# Handling of arguments.
+for ac_config_target in $ac_config_targets
+do
+  case $ac_config_target in
+    "setup") CONFIG_COMMANDS="$CONFIG_COMMANDS setup" ;;
+    "Makefile.common") CONFIG_FILES="$CONFIG_FILES Makefile.common" ;;
+    "Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS Makefile" ;;
+    "lib/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS lib/Makefile" ;;
+    "lib/sample/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS lib/sample/Makefile" ;;
+    "tools/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS tools/Makefile" ;;
+    "tools/sample/Makefile") CONFIG_COMMANDS="$CONFIG_COMMANDS tools/sample/Makefile" ;;
+
+  *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5
+echo "$as_me: error: invalid argument: $ac_config_target" >&2;}
+   { (exit 1); exit 1; }; };;
+  esac
+done
+
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used.  Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+  test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files
+  test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands
+fi
+
+# Have a temporary directory for convenience.  Make it in the build tree
+# simply because there is no reason against having it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Hook for its removal unless debugging.
+# Note that there is a small window in which the directory will not be cleaned:
+# after its creation but before its name has been assigned to `$tmp'.
+$debug ||
+{
+  tmp=
+  trap 'exit_status=$?
+  { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status
+' 0
+  trap '{ (exit 1); exit 1; }' 1 2 13 15
+}
+# Create a (secure) tmp directory for tmp files.
+
+{
+  tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
+  test -n "$tmp" && test -d "$tmp"
+}  ||
+{
+  tmp=./conf$$-$RANDOM
+  (umask 077 && mkdir "$tmp")
+} ||
+{
+   echo "$me: cannot create a temporary directory in ." >&2
+   { (exit 1); exit 1; }
+}
+
+#
+# Set up the sed scripts for CONFIG_FILES section.
+#
+
+# No need to generate the scripts if there are no CONFIG_FILES.
+# This happens for instance when ./config.status config.h
+if test -n "$CONFIG_FILES"; then
+
+_ACEOF
+
+
+
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+  cat >conf$$subs.sed <<_ACEOF
+SHELL!$SHELL$ac_delim
+PATH_SEPARATOR!$PATH_SEPARATOR$ac_delim
+PACKAGE_NAME!$PACKAGE_NAME$ac_delim
+PACKAGE_TARNAME!$PACKAGE_TARNAME$ac_delim
+PACKAGE_VERSION!$PACKAGE_VERSION$ac_delim
+PACKAGE_STRING!$PACKAGE_STRING$ac_delim
+PACKAGE_BUGREPORT!$PACKAGE_BUGREPORT$ac_delim
+exec_prefix!$exec_prefix$ac_delim
+prefix!$prefix$ac_delim
+program_transform_name!$program_transform_name$ac_delim
+bindir!$bindir$ac_delim
+sbindir!$sbindir$ac_delim
+libexecdir!$libexecdir$ac_delim
+datarootdir!$datarootdir$ac_delim
+datadir!$datadir$ac_delim
+sysconfdir!$sysconfdir$ac_delim
+sharedstatedir!$sharedstatedir$ac_delim
+localstatedir!$localstatedir$ac_delim
+includedir!$includedir$ac_delim
+oldincludedir!$oldincludedir$ac_delim
+docdir!$docdir$ac_delim
+infodir!$infodir$ac_delim
+htmldir!$htmldir$ac_delim
+dvidir!$dvidir$ac_delim
+pdfdir!$pdfdir$ac_delim
+psdir!$psdir$ac_delim
+libdir!$libdir$ac_delim
+localedir!$localedir$ac_delim
+mandir!$mandir$ac_delim
+DEFS!$DEFS$ac_delim
+ECHO_C!$ECHO_C$ac_delim
+ECHO_N!$ECHO_N$ac_delim
+ECHO_T!$ECHO_T$ac_delim
+LIBS!$LIBS$ac_delim
+build_alias!$build_alias$ac_delim
+host_alias!$host_alias$ac_delim
+target_alias!$target_alias$ac_delim
+LLVM_SRC!$LLVM_SRC$ac_delim
+LLVM_OBJ!$LLVM_OBJ$ac_delim
+LIBOBJS!$LIBOBJS$ac_delim
+LTLIBOBJS!$LTLIBOBJS$ac_delim
+_ACEOF
+
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 41; then
+    break
+  elif $ac_last_try; then
+    { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
+echo "$as_me: error: could not make $CONFIG_STATUS" >&2;}
+   { (exit 1); exit 1; }; }
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+
+ac_eof=`sed -n '/^CEOF[0-9]*$/s/CEOF/0/p' conf$$subs.sed`
+if test -n "$ac_eof"; then
+  ac_eof=`echo "$ac_eof" | sort -nru | sed 1q`
+  ac_eof=`expr $ac_eof + 1`
+fi
+
+cat >>$CONFIG_STATUS <<_ACEOF
+cat >"\$tmp/subs-1.sed" <<\CEOF$ac_eof
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b end
+_ACEOF
+sed '
+s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g
+s/^/s,@/; s/!/@,|#_!!_#|/
+:n
+t n
+s/'"$ac_delim"'$/,g/; t
+s/$/\\/; p
+N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n
+' >>$CONFIG_STATUS <conf$$subs.sed
+rm -f conf$$subs.sed
+cat >>$CONFIG_STATUS <<_ACEOF
+:end
+s/|#_!!_#|//g
+CEOF$ac_eof
+_ACEOF
+
+
+# VPATH may cause trouble with some makes, so we remove $(srcdir),
+# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+  ac_vpsub='/^[	 ]*VPATH[	 ]*=/{
+s/:*\$(srcdir):*/:/
+s/:*\${srcdir}:*/:/
+s/:*@srcdir@:*/:/
+s/^\([^=]*=[	 ]*\):*/\1/
+s/:*$//
+s/^[^=]*=[	 ]*$//
+}'
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+fi # test -n "$CONFIG_FILES"
+
+
+for ac_tag in  :F $CONFIG_FILES      :C $CONFIG_COMMANDS
+do
+  case $ac_tag in
+  :[FHLC]) ac_mode=$ac_tag; continue;;
+  esac
+  case $ac_mode$ac_tag in
+  :[FHL]*:*);;
+  :L* | :C*:*) { { echo "$as_me:$LINENO: error: Invalid tag $ac_tag." >&5
+echo "$as_me: error: Invalid tag $ac_tag." >&2;}
+   { (exit 1); exit 1; }; };;
+  :[FH]-) ac_tag=-:-;;
+  :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
+  esac
+  ac_save_IFS=$IFS
+  IFS=:
+  set x $ac_tag
+  IFS=$ac_save_IFS
+  shift
+  ac_file=$1
+  shift
+
+  case $ac_mode in
+  :L) ac_source=$1;;
+  :[FH])
+    ac_file_inputs=
+    for ac_f
+    do
+      case $ac_f in
+      -) ac_f="$tmp/stdin";;
+      *) # Look for the file first in the build tree, then in the source tree
+	 # (if the path is not absolute).  The absolute path cannot be DOS-style,
+	 # because $ac_f cannot contain `:'.
+	 test -f "$ac_f" ||
+	   case $ac_f in
+	   [\\/$]*) false;;
+	   *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
+	   esac ||
+	   { { echo "$as_me:$LINENO: error: cannot find input file: $ac_f" >&5
+echo "$as_me: error: cannot find input file: $ac_f" >&2;}
+   { (exit 1); exit 1; }; };;
+      esac
+      ac_file_inputs="$ac_file_inputs $ac_f"
+    done
+
+    # Let's still pretend it is `configure' which instantiates (i.e., don't
+    # use $as_me), people would be surprised to read:
+    #    /* config.h.  Generated by config.status.  */
+    configure_input="Generated from "`IFS=:
+	  echo $* | sed 's|^[^:]*/||;s|:[^:]*/|, |g'`" by configure."
+    if test x"$ac_file" != x-; then
+      configure_input="$ac_file.  $configure_input"
+      { echo "$as_me:$LINENO: creating $ac_file" >&5
+echo "$as_me: creating $ac_file" >&6;}
+    fi
+
+    case $ac_tag in
+    *:-:* | *:-) cat >"$tmp/stdin";;
+    esac
+    ;;
+  esac
+
+  ac_dir=`$as_dirname -- "$ac_file" ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$ac_file" : 'X\(//\)[^/]' \| \
+	 X"$ac_file" : 'X\(//\)$' \| \
+	 X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
+echo X"$ac_file" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+  { as_dir="$ac_dir"
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || { $as_mkdir_p && mkdir -p "$as_dir"; } || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+	 X"$as_dir" : 'X\(//\)[^/]' \| \
+	 X"$as_dir" : 'X\(//\)$' \| \
+	 X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+echo X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)[^/].*/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\/\)$/{
+	    s//\1/
+	    q
+	  }
+	  /^X\(\/\).*/{
+	    s//\1/
+	    q
+	  }
+	  s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || { { echo "$as_me:$LINENO: error: cannot create directory $as_dir" >&5
+echo "$as_me: error: cannot create directory $as_dir" >&2;}
+   { (exit 1); exit 1; }; }; }
+  ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,/..,g;s,/,,'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+  case $ac_mode in
+  :F)
+  #
+  # CONFIG_FILE
+  #
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF
+# If the template does not know about datarootdir, expand it.
+# FIXME: This hack should be removed a few years after 2.60.
+ac_datarootdir_hack=; ac_datarootdir_seen=
+
+case `sed -n '/datarootdir/ {
+  p
+  q
+}
+/@datadir@/p
+/@docdir@/p
+/@infodir@/p
+/@localedir@/p
+/@mandir@/p
+' $ac_file_inputs` in
+*datarootdir*) ac_datarootdir_seen=yes;;
+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
+  { echo "$as_me:$LINENO: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5
+echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF
+  ac_datarootdir_hack='
+  s&@datadir@&$datadir&g
+  s&@docdir@&$docdir&g
+  s&@infodir@&$infodir&g
+  s&@localedir@&$localedir&g
+  s&@mandir@&$mandir&g
+    s&\\\${datarootdir}&$datarootdir&g' ;;
+esac
+_ACEOF
+
+# Neutralize VPATH when `$srcdir' = `.'.
+# Shell code in configure.ac might set extrasub.
+# FIXME: do we really want to maintain this feature?
+cat >>$CONFIG_STATUS <<_ACEOF
+  sed "$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s&@configure_input@&$configure_input&;t t
+s&@top_builddir@&$ac_top_builddir_sub&;t t
+s&@srcdir@&$ac_srcdir&;t t
+s&@abs_srcdir@&$ac_abs_srcdir&;t t
+s&@top_srcdir@&$ac_top_srcdir&;t t
+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
+s&@builddir@&$ac_builddir&;t t
+s&@abs_builddir@&$ac_abs_builddir&;t t
+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+$ac_datarootdir_hack
+" $ac_file_inputs | sed -f "$tmp/subs-1.sed" >$tmp/out
+
+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
+  { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } &&
+  { ac_out=`sed -n '/^[	 ]*datarootdir[	 ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } &&
+  { echo "$as_me:$LINENO: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined." >&5
+echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined." >&2;}
+
+  rm -f "$tmp/stdin"
+  case $ac_file in
+  -) cat "$tmp/out"; rm -f "$tmp/out";;
+  *) rm -f "$ac_file"; mv "$tmp/out" $ac_file;;
+  esac
+ ;;
+
+
+  :C)  { echo "$as_me:$LINENO: executing $ac_file commands" >&5
+echo "$as_me: executing $ac_file commands" >&6;}
+ ;;
+  esac
+
+
+  case $ac_file$ac_mode in
+    "Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname Makefile`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/Makefile Makefile ;;
+    "lib/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname lib/Makefile`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/lib/Makefile lib/Makefile ;;
+    "lib/sample/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname lib/sample/Makefile`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/lib/sample/Makefile lib/sample/Makefile ;;
+    "tools/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname tools/Makefile`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/tools/Makefile tools/Makefile ;;
+    "tools/sample/Makefile":C) ${llvm_src}/autoconf/mkinstalldirs `dirname tools/sample/Makefile`
+   ${SHELL} ${llvm_src}/autoconf/install-sh -m 0644 -c ${srcdir}/tools/sample/Makefile tools/sample/Makefile ;;
+
+  esac
+done # for ac_tag
+
+
+{ (exit 0); exit 0; }
+_ACEOF
+chmod +x $CONFIG_STATUS
+ac_clean_files=$ac_clean_files_save
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded.  So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status.  When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+  ac_cs_success=:
+  ac_config_status_args=
+  test "$silent" = yes &&
+    ac_config_status_args="$ac_config_status_args --quiet"
+  exec 5>/dev/null
+  $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+  exec 5>>config.log
+  # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+  # would make configure fail if this is the last instruction.
+  $ac_cs_success || { (exit 1); exit 1; }
+fi
+
diff --git a/final/projects/sample/docs/index.html b/final/projects/sample/docs/index.html
new file mode 100644
index 00000000000..d2e67863177
--- /dev/null
+++ b/final/projects/sample/docs/index.html
@@ -0,0 +1,6 @@
+<html>
+  <body>
+    <h1>SAMPLE PROJECT DOCUMENTATION</h1>
+    <p>This is just a placeholder</p>
+  </body>
+</html>
diff --git a/final/projects/sample/include/sample.h b/final/projects/sample/include/sample.h
new file mode 100644
index 00000000000..b3ce9ce2928
--- /dev/null
+++ b/final/projects/sample/include/sample.h
@@ -0,0 +1,8 @@
+/*
+ * File: sample.h
+ *
+ *      This is a sample header file that is global to the entire project.
+ *      It is located here so that everyone will find it.
+ */
+extern int compute_sample (int a);
+
diff --git a/final/projects/sample/lib/Makefile b/final/projects/sample/lib/Makefile
new file mode 100644
index 00000000000..959038b7f9b
--- /dev/null
+++ b/final/projects/sample/lib/Makefile
@@ -0,0 +1,13 @@
+##===- projects/sample/lib/Makefile ------------------------*- Makefile -*-===##
+
+#
+# Relative path to the top of the source tree.
+#
+LEVEL=..
+
+#
+# List all of the subdirectories that we will compile.
+#
+DIRS=sample
+
+include $(LEVEL)/Makefile.common
diff --git a/final/projects/sample/lib/sample/Makefile b/final/projects/sample/lib/sample/Makefile
new file mode 100644
index 00000000000..af63399d2aa
--- /dev/null
+++ b/final/projects/sample/lib/sample/Makefile
@@ -0,0 +1,16 @@
+##===- projects/sample/lib/sample/Makefile -----------------*- Makefile -*-===##
+
+#
+# Indicate where we are relative to the top of the source tree.
+#
+LEVEL=../..
+
+#
+# Give the name of a library.  This will build a dynamic version.
+#
+LIBRARYNAME=sample
+
+#
+# Include Makefile.common so we know what to do.
+#
+include $(LEVEL)/Makefile.common
diff --git a/final/projects/sample/lib/sample/sample.c b/final/projects/sample/lib/sample/sample.c
new file mode 100644
index 00000000000..a5ae28091bc
--- /dev/null
+++ b/final/projects/sample/lib/sample/sample.c
@@ -0,0 +1,25 @@
+/*
+ * File: sample.c
+ *
+ * Description:
+ *  This is a sample source file for a library.  It helps to demonstrate
+ *  how to setup a project that uses the LLVM build system, header files,
+ *  and libraries.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/* LLVM Header File
+#include "llvm/Support/DataTypes.h"
+*/
+
+/* Header file global to this project */
+#include "sample.h"
+
+int
+compute_sample (int a)
+{
+  return a;
+}
+
diff --git a/final/projects/sample/tools/Makefile b/final/projects/sample/tools/Makefile
new file mode 100644
index 00000000000..00ff52b42cf
--- /dev/null
+++ b/final/projects/sample/tools/Makefile
@@ -0,0 +1,13 @@
+##===- projects/sample/tools/Makefile ----------------------*- Makefile -*-===##
+
+#
+# Relative path to the top of the source tree.
+#
+LEVEL=..
+
+#
+# List all of the subdirectories that we will compile.
+#
+DIRS=sample
+
+include $(LEVEL)/Makefile.common
diff --git a/final/projects/sample/tools/sample/Makefile b/final/projects/sample/tools/sample/Makefile
new file mode 100644
index 00000000000..324030c5672
--- /dev/null
+++ b/final/projects/sample/tools/sample/Makefile
@@ -0,0 +1,23 @@
+##===- projects/sample/tools/sample/Makefile ---------------*- Makefile -*-===##
+
+#
+# Indicate where we are relative to the top of the source tree.
+#
+LEVEL=../..
+
+#
+# Give the name of the tool.
+#
+TOOLNAME=Sample
+
+#
+# List libraries that we'll need
+# We use LIBS because sample is a dynamic library.
+#
+USEDLIBS = sample.a
+
+#
+# Include Makefile.common so we know what to do.
+#
+include $(LEVEL)/Makefile.common
+
diff --git a/final/projects/sample/tools/sample/main.c b/final/projects/sample/tools/sample/main.c
new file mode 100644
index 00000000000..2880265f845
--- /dev/null
+++ b/final/projects/sample/tools/sample/main.c
@@ -0,0 +1,14 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <unistd.h>
+
+#include "sample.h"
+
+int
+main (int argc, char ** argv)
+{
+  printf ("%d\n", compute_sample (5));
+  exit (0);
+}
+
diff --git a/final/runtime/Makefile b/final/runtime/Makefile
new file mode 100644
index 00000000000..d0e85d58264
--- /dev/null
+++ b/final/runtime/Makefile
@@ -0,0 +1,31 @@
+##===- runtime/Makefile ------------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ..
+include $(LEVEL)/Makefile.config
+
+ifndef NO_RUNTIME_LIBS
+
+PARALLEL_DIRS  := libprofile
+
+# Disable libprofile: a faulty libtool is generated by autoconf which breaks the
+# build on Sparc
+ifeq ($(ARCH), Sparc)
+PARALLEL_DIRS := $(filter-out libprofile, $(PARALLEL_DIRS))
+endif
+
+ifeq ($(TARGET_OS), $(filter $(TARGET_OS), Cygwin MingW Minix))
+PARALLEL_DIRS := $(filter-out libprofile, $(PARALLEL_DIRS))
+endif
+
+endif
+
+include $(LEVEL)/Makefile.common
+
+install::
diff --git a/final/runtime/README.txt b/final/runtime/README.txt
new file mode 100644
index 00000000000..2e2e547de31
--- /dev/null
+++ b/final/runtime/README.txt
@@ -0,0 +1,4 @@
+This directory contains the various runtime libraries used by components of 
+the LLVM compiler.  For example, the automatic pool allocation transformation
+inserts calls to an external pool allocator library.  This runtime library is
+an example of the type of library that lives in these directories.
diff --git a/final/runtime/libprofile/BasicBlockTracing.c b/final/runtime/libprofile/BasicBlockTracing.c
new file mode 100644
index 00000000000..dbe81e3f650
--- /dev/null
+++ b/final/runtime/libprofile/BasicBlockTracing.c
@@ -0,0 +1,67 @@
+/*===-- BasicBlockTracing.c - Support library for basic block tracing -----===*\
+|*
+|*                     The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source
+|* License. See LICENSE.TXT for details.
+|* 
+|*===----------------------------------------------------------------------===*|
+|* 
+|* This file implements the call back routines for the basic block tracing
+|* instrumentation pass.  This should be used with the -trace-basic-blocks
+|* LLVM pass.
+|*
+\*===----------------------------------------------------------------------===*/
+
+#include "Profiling.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+static unsigned *ArrayStart, *ArrayEnd, *ArrayCursor;
+
+/* WriteAndFlushBBTraceData - write out the currently accumulated trace data
+ * and reset the cursor to point to the beginning of the buffer.
+ */
+static void WriteAndFlushBBTraceData () {
+  write_profiling_data(BBTraceInfo, ArrayStart, (ArrayCursor - ArrayStart));
+  ArrayCursor = ArrayStart;
+}
+
+/* BBTraceAtExitHandler - When the program exits, just write out any remaining 
+ * data and free the trace buffer.
+ */
+static void BBTraceAtExitHandler() {
+  WriteAndFlushBBTraceData ();
+  free (ArrayStart);
+}
+
+/* llvm_trace_basic_block - called upon hitting a new basic block. */
+void llvm_trace_basic_block (unsigned BBNum) {
+  *ArrayCursor++ = BBNum;
+  if (ArrayCursor == ArrayEnd)
+    WriteAndFlushBBTraceData ();
+}
+
+/* llvm_start_basic_block_tracing - This is the main entry point of the basic
+ * block tracing library.  It is responsible for setting up the atexit
+ * handler and allocating the trace buffer.
+ */
+int llvm_start_basic_block_tracing(int argc, const char **argv,
+                              unsigned *arrayStart, unsigned numElements) {
+  int Ret;
+  const unsigned BufferSize = 128 * 1024;
+  unsigned ArraySize;
+
+  Ret = save_arguments(argc, argv);
+
+  /* Allocate a buffer to contain BB tracing data */
+  ArraySize = BufferSize / sizeof (unsigned);
+  ArrayStart = malloc (ArraySize * sizeof (unsigned));
+  ArrayEnd = ArrayStart + ArraySize;
+  ArrayCursor = ArrayStart;
+
+  /* Set up the atexit handler. */
+  atexit (BBTraceAtExitHandler);
+
+  return Ret;
+}
diff --git a/final/runtime/libprofile/CommonProfiling.c b/final/runtime/libprofile/CommonProfiling.c
new file mode 100644
index 00000000000..1c1771c3063
--- /dev/null
+++ b/final/runtime/libprofile/CommonProfiling.c
@@ -0,0 +1,130 @@
+/*===-- CommonProfiling.c - Profiling support library support -------------===*\
+|*
+|*                     The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source
+|* License. See LICENSE.TXT for details.
+|*
+|*===----------------------------------------------------------------------===*|
+|*
+|* This file implements functions used by the various different types of
+|* profiling implementations.
+|*
+\*===----------------------------------------------------------------------===*/
+
+#include "Profiling.h"
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+static char *SavedArgs = 0;
+static unsigned SavedArgsLength = 0;
+
+static const char *OutputFilename = "llvmprof.out";
+
+/* save_arguments - Save argc and argv as passed into the program for the file
+ * we output.
+ */
+int save_arguments(int argc, const char **argv) {
+  unsigned Length, i;
+  if (SavedArgs || !argv) return argc;  /* This can be called multiple times */
+
+  /* Check to see if there are any arguments passed into the program for the
+   * profiler.  If there are, strip them off and remember their settings.
+   */
+  while (argc > 1 && !strncmp(argv[1], "-llvmprof-", 10)) {
+    /* Ok, we have an llvmprof argument.  Remove it from the arg list and decide
+     * what to do with it.
+     */
+    const char *Arg = argv[1];
+    memmove(&argv[1], &argv[2], (argc-1)*sizeof(char*));
+    --argc;
+
+    if (!strcmp(Arg, "-llvmprof-output")) {
+      if (argc == 1)
+        puts("-llvmprof-output requires a filename argument!");
+      else {
+        OutputFilename = strdup(argv[1]);
+        memmove(&argv[1], &argv[2], (argc-1)*sizeof(char*));
+        --argc;
+      }
+    } else {
+      printf("Unknown option to the profiler runtime: '%s' - ignored.\n", Arg);
+    }
+  }
+
+  for (Length = 0, i = 0; i != (unsigned)argc; ++i)
+    Length += strlen(argv[i])+1;
+
+  SavedArgs = (char*)malloc(Length);
+  for (Length = 0, i = 0; i != (unsigned)argc; ++i) {
+    unsigned Len = strlen(argv[i]);
+    memcpy(SavedArgs+Length, argv[i], Len);
+    Length += Len;
+    SavedArgs[Length++] = ' ';
+  }
+
+  SavedArgsLength = Length;
+
+  return argc;
+}
+
+
+/*
+ * Retrieves the file descriptor for the profile file.
+ */
+int getOutFile() {
+  static int OutFile = -1;
+
+  /* If this is the first time this function is called, open the output file
+   * for appending, creating it if it does not already exist.
+   */
+  if (OutFile == -1) {
+    OutFile = open(OutputFilename, O_CREAT | O_WRONLY, 0666);
+    lseek(OutFile, 0, SEEK_END); /* O_APPEND prevents seeking */
+    if (OutFile == -1) {
+      fprintf(stderr, "LLVM profiling runtime: while opening '%s': ",
+              OutputFilename);
+      perror("");
+      return(OutFile);
+    }
+
+    /* Output the command line arguments to the file. */
+    {
+      int PTy = ArgumentInfo;
+      int Zeros = 0;
+      write(OutFile, &PTy, sizeof(int));
+      write(OutFile, &SavedArgsLength, sizeof(unsigned));
+      write(OutFile, SavedArgs, SavedArgsLength);
+      /* Pad out to a multiple of four bytes */
+      if (SavedArgsLength & 3)
+        write(OutFile, &Zeros, 4-(SavedArgsLength&3));
+    }
+  }
+  return(OutFile);
+}
+
+/* write_profiling_data - Write a raw block of profiling counters out to the
+ * llvmprof.out file.  Note that we allow programs to be instrumented with
+ * multiple different kinds of instrumentation.  For this reason, this function
+ * may be called more than once.
+ */
+void write_profiling_data(enum ProfilingType PT, unsigned *Start,
+                          unsigned NumElements) {
+  int PTy;
+  int outFile = getOutFile();
+
+  /* Write out this record! */
+  PTy = PT;
+  if( write(outFile, &PTy, sizeof(int)) < 0 ||
+      write(outFile, &NumElements, sizeof(unsigned)) < 0 ||
+      write(outFile, Start, NumElements*sizeof(unsigned)) < 0 ) {
+    fprintf(stderr,"error: unable to write to output file.");
+    exit(0);
+  }
+}
diff --git a/final/runtime/libprofile/EdgeProfiling.c b/final/runtime/libprofile/EdgeProfiling.c
new file mode 100644
index 00000000000..4a68a086fbc
--- /dev/null
+++ b/final/runtime/libprofile/EdgeProfiling.c
@@ -0,0 +1,45 @@
+/*===-- EdgeProfiling.c - Support library for edge profiling --------------===*\
+|*
+|*                     The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source      
+|* License. See LICENSE.TXT for details.                                      
+|* 
+|*===----------------------------------------------------------------------===*|
+|* 
+|* This file implements the call back routines for the edge profiling
+|* instrumentation pass.  This should be used with the -insert-edge-profiling
+|* LLVM pass.
+|*
+\*===----------------------------------------------------------------------===*/
+
+#include "Profiling.h"
+#include <stdlib.h>
+
+static unsigned *ArrayStart;
+static unsigned NumElements;
+
+/* EdgeProfAtExitHandler - When the program exits, just write out the profiling
+ * data.
+ */
+static void EdgeProfAtExitHandler() {
+  /* Note that if this were doing something more intelligent with the
+   * instrumentation, we could do some computation here to expand what we
+   * collected into simple edge profiles.  Since we directly count each edge, we
+   * just write out all of the counters directly.
+   */
+  write_profiling_data(EdgeInfo, ArrayStart, NumElements);
+}
+
+
+/* llvm_start_edge_profiling - This is the main entry point of the edge
+ * profiling library.  It is responsible for setting up the atexit handler.
+ */
+int llvm_start_edge_profiling(int argc, const char **argv,
+                              unsigned *arrayStart, unsigned numElements) {
+  int Ret = save_arguments(argc, argv);
+  ArrayStart = arrayStart;
+  NumElements = numElements;
+  atexit(EdgeProfAtExitHandler);
+  return Ret;
+}
diff --git a/final/runtime/libprofile/Makefile b/final/runtime/libprofile/Makefile
new file mode 100644
index 00000000000..4125af60d21
--- /dev/null
+++ b/final/runtime/libprofile/Makefile
@@ -0,0 +1,22 @@
+##===- runtime/libprofile/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+include $(LEVEL)/Makefile.config
+
+ifneq ($(strip $(LLVMCC)),)
+BYTECODE_LIBRARY = 1
+endif
+SHARED_LIBRARY = 1
+LOADABLE_MODULE = 1
+LIBRARYNAME = profile_rt
+EXTRA_DIST = libprofile.exports
+EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/libprofile.exports
+
+include $(LEVEL)/Makefile.common
diff --git a/final/runtime/libprofile/OptimalEdgeProfiling.c b/final/runtime/libprofile/OptimalEdgeProfiling.c
new file mode 100644
index 00000000000..eb7887b2aea
--- /dev/null
+++ b/final/runtime/libprofile/OptimalEdgeProfiling.c
@@ -0,0 +1,45 @@
+/*===-- OptimalEdgeProfiling.c - Support library for opt. edge profiling --===*\
+|*
+|*                     The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source      
+|* License. See LICENSE.TXT for details.                                      
+|* 
+|*===----------------------------------------------------------------------===*|
+|* 
+|* This file implements the call back routines for the edge profiling
+|* instrumentation pass.  This should be used with the
+|* -insert-opt-edge-profiling LLVM pass.
+|*
+\*===----------------------------------------------------------------------===*/
+
+#include "Profiling.h"
+#include <stdlib.h>
+
+static unsigned *ArrayStart;
+static unsigned NumElements;
+
+/* OptEdgeProfAtExitHandler - When the program exits, just write out the
+ * profiling data.
+ */
+static void OptEdgeProfAtExitHandler() {
+  /* Note that, although the array has a counter for each edge, not all
+   * counters are updated, the ones that are not used are initialised with -1.
+   * When loading this information the counters with value -1 have to be
+   * recalculated, it is guranteed that this is possible.
+   */
+  write_profiling_data(OptEdgeInfo, ArrayStart, NumElements);
+}
+
+
+/* llvm_start_opt_edge_profiling - This is the main entry point of the edge
+ * profiling library.  It is responsible for setting up the atexit handler.
+ */
+int llvm_start_opt_edge_profiling(int argc, const char **argv,
+                                  unsigned *arrayStart, unsigned numElements) {
+  int Ret = save_arguments(argc, argv);
+  ArrayStart = arrayStart;
+  NumElements = numElements;
+  atexit(OptEdgeProfAtExitHandler);
+  return Ret;
+}
diff --git a/final/runtime/libprofile/PathProfiling.c b/final/runtime/libprofile/PathProfiling.c
new file mode 100644
index 00000000000..651e63cbdd2
--- /dev/null
+++ b/final/runtime/libprofile/PathProfiling.c
@@ -0,0 +1,266 @@
+/*===-- PathProfiling.c - Support library for path profiling --------------===*\
+|*
+|*                     The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source
+|* License. See LICENSE.TXT for details.
+|*
+|*===----------------------------------------------------------------------===*|
+|*
+|* This file implements the call back routines for the path profiling
+|* instrumentation pass.  This should be used with the -insert-path-profiling
+|* LLVM pass.
+|*
+\*===----------------------------------------------------------------------===*/
+
+#include "Profiling.h"
+#include "llvm/Analysis/ProfileInfoTypes.h"
+#include <sys/types.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdio.h>
+
+/* note that this is used for functions with large path counts,
+         but it is unlikely those paths will ALL be executed */
+#define ARBITRARY_HASH_BIN_COUNT 100
+
+typedef struct pathHashEntry_s {
+  uint32_t pathNumber;
+  uint32_t pathCount;
+  struct pathHashEntry_s* next;
+} pathHashEntry_t;
+
+typedef struct pathHashTable_s {
+  pathHashEntry_t* hashBins[ARBITRARY_HASH_BIN_COUNT];
+  uint32_t pathCounts;
+} pathHashTable_t;
+
+typedef struct {
+  enum ProfilingStorageType type;
+  uint32_t size;
+  void* array;
+} ftEntry_t;
+
+/* pointer to the function table allocated in the instrumented program */
+ftEntry_t* ft;
+uint32_t ftSize;
+
+/* write an array table to file */
+void writeArrayTable(uint32_t fNumber, ftEntry_t* ft, uint32_t* funcCount) {
+  int outFile = getOutFile();
+  uint32_t arrayHeaderLocation = 0;
+  uint32_t arrayCurrentLocation = 0;
+  uint32_t arrayIterator = 0;
+  uint32_t functionUsed = 0;
+  uint32_t pathCounts = 0;
+
+  /* look through each entry in the array to determine whether the function
+     was executed at all */
+  for( arrayIterator = 0; arrayIterator < ft->size; arrayIterator++ ) {
+    uint32_t pc = ((uint32_t*)ft->array)[arrayIterator];
+
+    /* was this path executed? */
+    if( pc ) {
+      PathProfileTableEntry pte;
+      pte.pathNumber = arrayIterator;
+      pte.pathCounter = pc;
+      pathCounts++;
+
+      /* one-time initialization stuff */
+      if(!functionUsed) {
+        arrayHeaderLocation = lseek(outFile, 0, SEEK_CUR);
+        lseek(outFile, sizeof(PathProfileHeader), SEEK_CUR);
+        functionUsed = 1;
+        (*funcCount)++;
+      }
+
+      /* write path data */
+      if (write(outFile, &pte, sizeof(PathProfileTableEntry)) < 0) {
+        fprintf(stderr, "error: unable to write path entry to output file.\n");
+        return;
+      }
+    }
+  }
+
+  /* If this function was executed, write the header */
+  if( functionUsed ) {
+    PathProfileHeader fHeader;
+    fHeader.fnNumber = fNumber;
+    fHeader.numEntries = pathCounts;
+
+    arrayCurrentLocation = lseek(outFile, 0, SEEK_CUR);
+    lseek(outFile, arrayHeaderLocation, SEEK_SET);
+
+    if (write(outFile, &fHeader, sizeof(PathProfileHeader)) < 0) {
+      fprintf(stderr,
+              "error: unable to write function header to output file.\n");
+      return;
+    }
+
+    lseek(outFile, arrayCurrentLocation, SEEK_SET);
+  }
+}
+
+inline uint32_t hash (uint32_t key) {
+  /* this may benifit from a proper hash function */
+  return key%ARBITRARY_HASH_BIN_COUNT;
+}
+
+/* output a specific function's hash table to the profile file */
+void writeHashTable(uint32_t functionNumber, pathHashTable_t* hashTable) {
+  int outFile = getOutFile();
+  PathProfileHeader header;
+  uint32_t i;
+
+  header.fnNumber = functionNumber;
+  header.numEntries = hashTable->pathCounts;
+
+  if (write(outFile, &header, sizeof(PathProfileHeader)) < 0) {
+    fprintf(stderr, "error: unable to write function header to output file.\n");
+    return;
+  }
+
+  for (i = 0; i < ARBITRARY_HASH_BIN_COUNT; i++) {
+    pathHashEntry_t* hashEntry = hashTable->hashBins[i];
+
+    while (hashEntry) {
+      pathHashEntry_t* temp;
+
+      PathProfileTableEntry pte;
+      pte.pathNumber = hashEntry->pathNumber;
+      pte.pathCounter = hashEntry->pathCount;
+
+      if (write(outFile, &pte, sizeof(PathProfileTableEntry)) < 0) {
+        fprintf(stderr, "error: unable to write path entry to output file.\n");
+        return;
+      }
+
+      temp = hashEntry;
+      hashEntry = hashEntry->next;
+      free (temp);
+
+    }
+  }
+}
+
+/* Return a pointer to this path's specific path counter */
+inline uint32_t* getPathCounter(uint32_t functionNumber, uint32_t pathNumber) {
+  pathHashTable_t* hashTable;
+  pathHashEntry_t* hashEntry;
+  uint32_t index = hash(pathNumber);
+
+  if( ft[functionNumber-1].array == 0)
+    ft[functionNumber-1].array = calloc(sizeof(pathHashTable_t), 1);
+
+  hashTable = (pathHashTable_t*)((ftEntry_t*)ft)[functionNumber-1].array;
+  hashEntry = hashTable->hashBins[index];
+
+  while (hashEntry) {
+    if (hashEntry->pathNumber == pathNumber) {
+      return &hashEntry->pathCount;
+    }
+
+    hashEntry = hashEntry->next;
+  }
+
+  hashEntry = malloc(sizeof(pathHashEntry_t));
+  hashEntry->pathNumber = pathNumber;
+  hashEntry->pathCount = 0;
+  hashEntry->next = hashTable->hashBins[index];
+  hashTable->hashBins[index] = hashEntry;
+  hashTable->pathCounts++;
+  return &hashEntry->pathCount;
+}
+
+/* Increment a specific path's count */
+void llvm_increment_path_count (uint32_t functionNumber, uint32_t pathNumber) {
+  uint32_t* pathCounter = getPathCounter(functionNumber, pathNumber);
+  if( *pathCounter < 0xffffffff )
+    (*pathCounter)++;
+}
+
+/* Increment a specific path's count */
+void llvm_decrement_path_count (uint32_t functionNumber, uint32_t pathNumber) {
+  uint32_t* pathCounter = getPathCounter(functionNumber, pathNumber);
+  (*pathCounter)--;
+}
+
+/*
+ * Writes out a path profile given a function table, in the following format.
+ *
+ *
+ *      | <-- 32 bits --> |
+ *      +-----------------+-----------------+
+ * 0x00 | profileType     | functionCount   |
+ *      +-----------------+-----------------+
+ * 0x08 | functionNum     | profileEntries  |  // function 1
+ *      +-----------------+-----------------+
+ * 0x10 | pathNumber      | pathCounter     |  // entry 1.1
+ *      +-----------------+-----------------+
+ * 0x18 | pathNumber      | pathCounter     |  // entry 1.2
+ *      +-----------------+-----------------+
+ *  ... |       ...       |       ...       |  // entry 1.n
+ *      +-----------------+-----------------+
+ *  ... | functionNum     | profileEntries  |  // function 2
+ *      +-----------------+-----------------+
+ *  ... | pathNumber      | pathCounter     |  // entry 2.1
+ *      +-----------------+-----------------+
+ *  ... | pathNumber      | pathCounter     |  // entry 2.2
+ *      +-----------------+-----------------+
+ *  ... |       ...       |       ...       |  // entry 2.n
+ *      +-----------------+-----------------+
+ *
+ */
+static void pathProfAtExitHandler() {
+  int outFile = getOutFile();
+  uint32_t i;
+  uint32_t header[2] = { PathInfo, 0 };
+  uint32_t headerLocation;
+  uint32_t currentLocation;
+
+  /* skip over the header for now */
+  headerLocation = lseek(outFile, 0, SEEK_CUR);
+  lseek(outFile, 2*sizeof(uint32_t), SEEK_CUR);
+
+  /* Iterate through each function */
+  for( i = 0; i < ftSize; i++ ) {
+    if( ft[i].type == ProfilingArray ) {
+      writeArrayTable(i+1,&ft[i],header + 1);
+
+    } else if( ft[i].type == ProfilingHash ) {
+      /* If the hash exists, write it to file */
+      if( ft[i].array ) {
+        writeHashTable(i+1,ft[i].array);
+        header[1]++;
+        free(ft[i].array);
+      }
+    }
+  }
+
+  /* Setup and write the path profile header */
+  currentLocation = lseek(outFile, 0, SEEK_CUR);
+  lseek(outFile, headerLocation, SEEK_SET);
+
+  if (write(outFile, header, sizeof(header)) < 0) {
+    fprintf(stderr,
+            "error: unable to write path profile header to output file.\n");
+    return;
+  }
+
+  lseek(outFile, currentLocation, SEEK_SET);
+}
+/* llvm_start_path_profiling - This is the main entry point of the path
+ * profiling library.  It is responsible for setting up the atexit handler.
+ */
+int llvm_start_path_profiling(int argc, const char** argv,
+                              void* functionTable, uint32_t numElements) {
+  int Ret = save_arguments(argc, argv);
+  ft = functionTable;
+  ftSize = numElements;
+  atexit(pathProfAtExitHandler);
+
+  return Ret;
+}
diff --git a/final/runtime/libprofile/Profiling.h b/final/runtime/libprofile/Profiling.h
new file mode 100644
index 00000000000..c6b9a4d71c0
--- /dev/null
+++ b/final/runtime/libprofile/Profiling.h
@@ -0,0 +1,36 @@
+/*===-- Profiling.h - Profiling support library support routines ----------===*\
+|*
+|*                     The LLVM Compiler Infrastructure
+|*
+|* This file is distributed under the University of Illinois Open Source
+|* License. See LICENSE.TXT for details.
+|*
+|*===----------------------------------------------------------------------===*|
+|*
+|* This file defines functions shared by the various different profiling
+|* implementations.
+|*
+\*===----------------------------------------------------------------------===*/
+
+#ifndef PROFILING_H
+#define PROFILING_H
+
+#include "llvm/Analysis/ProfileInfoTypes.h" /* for enum ProfilingType */
+
+/* save_arguments - Save argc and argv as passed into the program for the file
+ * we output.
+ */
+int save_arguments(int argc, const char **argv);
+
+/*
+ * Retrieves the file descriptor for the profile file.
+ */
+int getOutFile();
+
+/* write_profiling_data - Write out a typed packet of profiling data to the
+ * current output file.
+ */
+void write_profiling_data(enum ProfilingType PT, unsigned *Start,
+                          unsigned NumElements);
+
+#endif
diff --git a/final/runtime/libprofile/libprofile.exports b/final/runtime/libprofile/libprofile.exports
new file mode 100644
index 00000000000..b8057c7aac9
--- /dev/null
+++ b/final/runtime/libprofile/libprofile.exports
@@ -0,0 +1,7 @@
+llvm_start_edge_profiling
+llvm_start_opt_edge_profiling
+llvm_start_path_profiling
+llvm_start_basic_block_tracing
+llvm_trace_basic_block
+llvm_increment_path_count
+llvm_decrement_path_count
diff --git a/final/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll b/final/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll
new file mode 100644
index 00000000000..1c2d910c109
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2003-02-26-AccessSizeTest.ll
@@ -0,0 +1,18 @@
+; This testcase makes sure that size is taken to account when alias analysis 
+; is performed.  It is not legal to delete the second load instruction because
+; the value computed by the first load instruction is changed by the store.
+
+; RUN: opt < %s -basicaa -gvn -instcombine -S | grep DONOTREMOVE
+
+define i32 @test() {
+	%A = alloca i32
+	store i32 0, i32* %A
+    %X = load i32* %A
+    %B = bitcast i32* %A to i8*
+    %C = getelementptr i8* %B, i64 1
+	store i8 1, i8* %C    ; Aliases %A
+    %Y.DONOTREMOVE = load i32* %A
+	%Z = sub i32 %X, %Y.DONOTREMOVE
+    ret i32 %Z
+}
+
diff --git a/final/test/Analysis/BasicAA/2003-03-04-GEPCrash.ll b/final/test/Analysis/BasicAA/2003-03-04-GEPCrash.ll
new file mode 100644
index 00000000000..4f8eabb7930
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2003-03-04-GEPCrash.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -basicaa -aa-eval -disable-output 2>/dev/null
+; Test for a bug in BasicAA which caused a crash when querying equality of P1&P2
+define void @test({[2 x i32],[2 x i32]}* %A, i64 %X, i64 %Y) {
+	%P1 = getelementptr {[2 x i32],[2 x i32]}* %A, i64 0, i32 0, i64 %X
+	%P2 = getelementptr {[2 x i32],[2 x i32]}* %A, i64 0, i32 1, i64 %Y
+	ret void
+}
diff --git a/final/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll b/final/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll
new file mode 100644
index 00000000000..5d200774da5
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2003-04-22-GEPProblem.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -basicaa -gvn -instcombine -S | grep sub
+
+; BasicAA was incorrectly concluding that P1 and P2 didn't conflict!
+
+define i32 @test(i32 *%Ptr, i64 %V) {
+	%P2 = getelementptr i32* %Ptr, i64 1
+	%P1 = getelementptr i32* %Ptr, i64 %V
+	%X = load i32* %P1
+	store i32 5, i32* %P2
+
+	%Y = load i32* %P1
+
+	%Z = sub i32 %X, %Y
+	ret i32 %Z
+}
diff --git a/final/test/Analysis/BasicAA/2003-04-25-GEPCrash.ll b/final/test/Analysis/BasicAA/2003-04-25-GEPCrash.ll
new file mode 100644
index 00000000000..97bc38eb69b
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2003-04-25-GEPCrash.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -basicaa -aa-eval -disable-output 2>/dev/null
+; Test for a bug in BasicAA which caused a crash when querying equality of P1&P2
+define void @test([17 x i16]* %mask_bits) {
+	%P1 = getelementptr [17 x i16]* %mask_bits, i64 0, i64 0
+	%P2 = getelementptr [17 x i16]* %mask_bits, i64 252645134, i64 0
+	ret void
+}
diff --git a/final/test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll b/final/test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll
new file mode 100644
index 00000000000..8ca34698559
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2003-05-21-GEP-Problem.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -basicaa -licm -disable-output
+	%struct..apr_array_header_t = type { i32*, i32, i32, i32, i8* }
+	%struct..apr_table_t = type { %struct..apr_array_header_t, i32, [32 x i32], [32 x i32] }
+
+define void @table_reindex(%struct..apr_table_t* %t.1) {		; No predecessors!
+	br label %loopentry
+
+loopentry:		; preds = %0, %no_exit
+	%tmp.101 = getelementptr %struct..apr_table_t* %t.1, i64 0, i32 0, i32 2
+	%tmp.11 = load i32* %tmp.101		; <i32> [#uses=0]
+	br i1 false, label %no_exit, label %UnifiedExitNode
+
+no_exit:		; preds = %loopentry
+	%tmp.25 = sext i32 0 to i64		; <i64> [#uses=1]
+	%tmp.261 = getelementptr %struct..apr_table_t* %t.1, i64 0, i32 3, i64 %tmp.25		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp.261
+	br label %loopentry
+
+UnifiedExitNode:		; preds = %loopentry
+	ret void
+}
diff --git a/final/test/Analysis/BasicAA/2003-06-01-AliasCrash.ll b/final/test/Analysis/BasicAA/2003-06-01-AliasCrash.ll
new file mode 100644
index 00000000000..0abd3847836
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2003-06-01-AliasCrash.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -basicaa -aa-eval -disable-output 2>/dev/null
+
+define i32 @MTConcat([3 x i32]* %a.1) {
+	%tmp.961 = getelementptr [3 x i32]* %a.1, i64 0, i64 4
+	%tmp.97 = load i32* %tmp.961
+	%tmp.119 = getelementptr [3 x i32]* %a.1, i64 1, i64 0
+	%tmp.120 = load i32* %tmp.119
+	%tmp.1541 = getelementptr [3 x i32]* %a.1, i64 0, i64 4
+	%tmp.155 = load i32* %tmp.1541
+	ret i32 0
+}
diff --git a/final/test/Analysis/BasicAA/2003-07-03-BasicAACrash.ll b/final/test/Analysis/BasicAA/2003-07-03-BasicAACrash.ll
new file mode 100644
index 00000000000..3e813fa2ca1
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2003-07-03-BasicAACrash.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -basicaa -aa-eval -disable-output 2>/dev/null
+
+%struct..RefPoint = type { i32, { i32, i8, i8 } }
+%struct..RefRect = type { %struct..RefPoint, %struct..RefPoint }
+
+define i32 @BMT_CommitPartDrawObj() {
+	%tmp.19111 = getelementptr %struct..RefRect* null, i64 0, i32 0, i32 1, i32 2
+	%tmp.20311 = getelementptr %struct..RefRect* null, i64 0, i32 1, i32 1, i32 2
+	ret i32 0
+}
diff --git a/final/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll b/final/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll
new file mode 100644
index 00000000000..56e33393701
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2003-09-19-LocalArgument.ll
@@ -0,0 +1,12 @@
+; In this test, a local alloca cannot alias an incoming argument.
+
+; RUN: opt < %s -basicaa -gvn -instcombine -S | not grep sub
+
+define i32 @test(i32* %P) {
+	%X = alloca i32
+	%V1 = load i32* %P
+	store i32 0, i32* %X
+	%V2 = load i32* %P
+	%Diff = sub i32 %V1, %V2
+	ret i32 %Diff
+}
diff --git a/final/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll b/final/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll
new file mode 100644
index 00000000000..010a4588103
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2003-11-04-SimpleCases.ll
@@ -0,0 +1,16 @@
+; This testcase consists of alias relations which should be completely
+; resolvable by basicaa.
+
+; RUN: opt < %s -basicaa -aa-eval -print-may-aliases -disable-output \
+; RUN: |& not grep May:
+
+%T = type { i32, [10 x i8] }
+
+define void @test(%T* %P) {
+  %A = getelementptr %T* %P, i64 0
+  %B = getelementptr %T* %P, i64 0, i32 0
+  %C = getelementptr %T* %P, i64 0, i32 1
+  %D = getelementptr %T* %P, i64 0, i32 1, i64 0
+  %E = getelementptr %T* %P, i64 0, i32 1, i64 5
+  ret void
+}
diff --git a/final/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll b/final/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll
new file mode 100644
index 00000000000..ce01db647ff
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2003-12-11-ConstExprGEP.ll
@@ -0,0 +1,18 @@
+; This testcase consists of alias relations which should be completely
+; resolvable by basicaa, but require analysis of getelementptr constant exprs.
+
+; RUN: opt < %s -basicaa -aa-eval -print-may-aliases -disable-output \
+; RUN: |& not grep May:
+
+%T = type { i32, [10 x i8] }
+
+@G = external global %T
+
+define void @test() {
+  %D = getelementptr %T* @G, i64 0, i32 0
+  %E = getelementptr %T* @G, i64 0, i32 1, i64 5
+  %F = getelementptr i32* getelementptr (%T* @G, i64 0, i32 0), i64 0
+  %X = getelementptr [10 x i8]* getelementptr (%T* @G, i64 0, i32 1), i64 0, i64 5
+
+  ret void
+}
diff --git a/final/test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll b/final/test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll
new file mode 100644
index 00000000000..56e4ed05cef
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2004-07-28-MustAliasbug.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -basicaa -dse -S | grep {store i32 0}
+
+define void @test({i32,i32 }* %P) {
+	%Q = getelementptr {i32,i32}* %P, i32 1
+	%X = getelementptr {i32,i32}* %Q, i32 0, i32 1
+	%Y = getelementptr {i32,i32}* %Q, i32 1, i32 1
+	store i32 0, i32* %X
+	store i32 1, i32* %Y
+	ret void
+}
diff --git a/final/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll b/final/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll
new file mode 100644
index 00000000000..50fb222a5d6
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -basicaa -licm
+
+%"java/lang/Object" = type { %struct.llvm_java_object_base }
+%"java/lang/StringBuffer" = type { "java/lang/Object", i32, { "java/lang/Object", i32, [0 x i8] }*, i1 }
+%struct.llvm_java_object_base = type opaque
+
+define void @"java/lang/StringBuffer/setLength(I)V"(%struct.llvm_java_object_base*) {
+bc0:
+	br i1 false, label %bc40, label %bc65
+
+bc65:		; preds = %bc0, %bc40
+	ret void
+
+bc40:		; preds = %bc0, %bc40
+	%tmp75 = bitcast %struct.llvm_java_object_base* %0 to %"java/lang/StringBuffer"*		; <"java/lang/StringBuffer"*> [#uses=1]
+	%tmp76 = getelementptr %"java/lang/StringBuffer"* %tmp75, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp76
+	%tmp381 = bitcast %struct.llvm_java_object_base* %0 to %"java/lang/StringBuffer"*		; <"java/lang/StringBuffer"*> [#uses=1]
+	%tmp392 = getelementptr %"java/lang/StringBuffer"* %tmp381, i32 0, i32 1		; <i32*> [#uses=1]
+	%tmp403 = load i32* %tmp392		; <i32> [#uses=0]
+	br i1 false, label %bc40, label %bc65
+}
diff --git a/final/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll b/final/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll
new file mode 100644
index 00000000000..cc8431496ed
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -basicaa -dse
+
+%"java/lang/Object" = type { %struct.llvm_java_object_base }
+%"java/lang/StringBuffer" = type { "java/lang/Object", i32, { "java/lang/Object", i32, [0 x i8] }*, i1 }
+%struct.llvm_java_object_base = type opaque
+
+define void @"java/lang/StringBuffer/ensureCapacity_unsynchronized(I)V"() {
+bc0:
+	%tmp = getelementptr %"java/lang/StringBuffer"* null, i32 0, i32 3		; <i1*> [#uses=1]
+	br i1 false, label %bc16, label %bc7
+
+bc16:		; preds = %bc0
+	%tmp91 = getelementptr %"java/lang/StringBuffer"* null, i32 0, i32 2		; <{ "java/lang/Object", i32, [0 x i8] }**> [#uses=1]
+	store { %"java/lang/Object", i32, [0 x i8] }* null, { %"java/lang/Object", i32, [0 x i8] }** %tmp91
+	store i1 false, i1* %tmp
+	ret void
+
+bc7:		; preds = %bc0
+	ret void
+}
diff --git a/final/test/Analysis/BasicAA/2005-03-09-BrokenBasicAA.ll b/final/test/Analysis/BasicAA/2005-03-09-BrokenBasicAA.ll
new file mode 100644
index 00000000000..4564263de01
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2005-03-09-BrokenBasicAA.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -basicaa -gvn -instcombine |\
+; RUN: llvm-dis | grep {load i32\\* %A}
+
+declare double* @useit(i32*)
+
+define i32 @foo(i32 %Amt) {
+	%A = malloc i32, i32 %Amt
+	%P = call double*  @useit(i32* %A)
+
+	%X = load i32* %A
+	store double 0.0, double* %P
+	%Y = load i32* %A
+	%Z = sub i32 %X, %Y
+	ret i32 %Z
+}
diff --git a/final/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll b/final/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
new file mode 100644
index 00000000000..83205944727
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2006-03-03-BadArraySubscript.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -basicaa -aa-eval -disable-output |& grep {2 no alias respon}
+; TEST that A[1][0] may alias A[0][i].
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define void @test(i32 %N) {
+entry:
+	%X = alloca [3 x [3 x i32]]		; <[3 x [3 x i32]]*> [#uses=4]
+	%tmp.24 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %tmp.24, label %no_exit, label %loopexit
+
+no_exit:		; preds = %no_exit, %entry
+	%i.0.0 = phi i32 [ 0, %entry ], [ %inc, %no_exit ]		; <i32> [#uses=2]
+	%tmp.6 = getelementptr [3 x [3 x i32]]* %X, i32 0, i32 0, i32 %i.0.0		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp.6
+	%tmp.8 = getelementptr [3 x [3 x i32]]* %X, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp.9 = load i32* %tmp.8		; <i32> [#uses=1]
+	%tmp.11 = getelementptr [3 x [3 x i32]]* %X, i32 0, i32 1, i32 0		; <i32*> [#uses=1]
+	%tmp.12 = load i32* %tmp.11		; <i32> [#uses=1]
+	%tmp.13 = add i32 %tmp.12, %tmp.9		; <i32> [#uses=1]
+	%inc = add i32 %i.0.0, 1		; <i32> [#uses=2]
+	%tmp.2 = icmp slt i32 %inc, %N		; <i1> [#uses=1]
+	br i1 %tmp.2, label %no_exit, label %loopexit
+
+loopexit:		; preds = %no_exit, %entry
+	%Y.0.1 = phi i32 [ 0, %entry ], [ %tmp.13, %no_exit ]		; <i32> [#uses=1]
+	%tmp.4 = getelementptr [3 x [3 x i32]]* %X, i32 0, i32 0		; <[3 x i32]*> [#uses=1]
+	%tmp.15 = call i32 (...)* @foo( [3 x i32]* %tmp.4, i32 %Y.0.1 )		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @foo(...)
diff --git a/final/test/Analysis/BasicAA/2006-11-03-BasicAAVectorCrash.ll b/final/test/Analysis/BasicAA/2006-11-03-BasicAAVectorCrash.ll
new file mode 100644
index 00000000000..0db58156547
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2006-11-03-BasicAAVectorCrash.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -basicaa -licm -disable-output
+target datalayout = "E-p:32:32"
+target triple = "powerpc-apple-darwin8.7.0"
+
+define void @glgRunProcessor() {
+entry:
+	br i1 false, label %bb2037.i, label %cond_true.i18
+
+cond_true.i18:		; preds = %entry
+	ret void
+
+bb205.i:		; preds = %bb2037.i
+	switch i32 0, label %bb1013.i [
+		 i32 14, label %bb239.i
+		 i32 15, label %bb917.i
+	]
+
+bb239.i:		; preds = %bb205.i
+	br i1 false, label %cond_false277.i, label %cond_true264.i
+
+cond_true264.i:		; preds = %bb239.i
+	ret void
+
+cond_false277.i:		; preds = %bb239.i
+	%tmp1062.i = getelementptr [2 x <4 x i32>]* null, i32 0, i32 1		; <<4 x i32>*> [#uses=1]
+	store <4 x i32> zeroinitializer, <4 x i32>* %tmp1062.i
+	br i1 false, label %cond_true1032.i, label %cond_false1063.i85
+
+bb917.i:		; preds = %bb205.i
+	ret void
+
+bb1013.i:		; preds = %bb205.i
+	ret void
+
+cond_true1032.i:		; preds = %cond_false277.i
+	%tmp1187.i = getelementptr [2 x <4 x i32>]* null, i32 0, i32 0, i32 7		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp1187.i
+	br label %bb2037.i
+
+cond_false1063.i85:		; preds = %cond_false277.i
+	ret void
+
+bb2037.i:		; preds = %cond_true1032.i, %entry
+	br i1 false, label %bb205.i, label %cond_next2042.i
+
+cond_next2042.i:		; preds = %bb2037.i
+	ret void
+}
diff --git a/final/test/Analysis/BasicAA/2007-01-13-BasePointerBadNoAlias.ll b/final/test/Analysis/BasicAA/2007-01-13-BasePointerBadNoAlias.ll
new file mode 100644
index 00000000000..917bf2554dc
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2007-01-13-BasePointerBadNoAlias.ll
@@ -0,0 +1,35 @@
+; PR1109
+; RUN: opt < %s -basicaa -gvn -instcombine -S | \
+; RUN:   grep {sub i32}
+; RUN: opt < %s -basicaa -gvn -instcombine -S | \
+; RUN:   not grep {ret i32 0}
+; END.
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8"
+	%struct.CONSTRAINT = type { i32, i32, i32, i32 }
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FOURTH_UNION = type { %struct.CONSTRAINT }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { { i16, i8, i8 } }
+	%struct.STYLE = type { { %struct.GAP }, { %struct.GAP }, i16, i16, i16, i8, i8 }
+	%struct.THIRD_UNION = type { { [2 x i32], [2 x i32] } }
+	%struct.closure_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, { %struct.rec* } }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, { %struct.rec* }, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.rec = type { %struct.head_type }
+
+
+define i32 @test(%struct.closure_type* %tmp18169) {
+	%tmp18174 = getelementptr %struct.closure_type* %tmp18169, i32 0, i32 4, i32 0, i32 0		; <i32*> [#uses=2]
+	%tmp18269 = bitcast i32* %tmp18174  to %struct.STYLE*		; <%struct.STYLE*> [#uses=1]
+	%A = load i32* %tmp18174		; <i32> [#uses=1]
+
+        %tmp18272 = getelementptr %struct.STYLE* %tmp18269, i32 0, i32 0, i32 0, i32 2          ; <i16*> [#uses=1]
+        store i16 123, i16* %tmp18272
+
+	%Q = load i32* %tmp18174		; <i32> [#uses=1]
+	%Z = sub i32 %A, %Q		; <i32> [#uses=1]
+	ret i32 %Z
+}
diff --git a/final/test/Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll b/final/test/Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll
new file mode 100644
index 00000000000..e6a26e30c02
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2007-08-01-NoAliasAndCalls.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {MayAlias:.*i32\\* %., i32\\* %.} | grep {%x} | grep {%y}
+
+declare i32* @unclear(i32* %a)
+
+define void @foo(i32* noalias %x) {
+  %y = call i32* @unclear(i32* %x)
+  store i32 0, i32* %x
+  store i32 0, i32* %y
+  ret void
+}
diff --git a/final/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll b/final/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll
new file mode 100644
index 00000000000..7f33fa4a2d0
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2007-08-01-NoAliasAndGEP.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {9 no alias}
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {6 may alias}
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {MayAlias:.*i32\\* %Ipointer, i32\\* %Jpointer}
+
+define void @foo(i32* noalias %p, i32* noalias %q, i32 %i, i32 %j) {
+  %Ipointer = getelementptr i32* %p, i32 %i
+  %qi = getelementptr i32* %q, i32 %i
+  %Jpointer = getelementptr i32* %p, i32 %j
+  %qj = getelementptr i32* %q, i32 %j
+  store i32 0, i32* %p
+  store i32 0, i32* %Ipointer
+  store i32 0, i32* %Jpointer
+  store i32 0, i32* %q
+  store i32 0, i32* %qi
+  store i32 0, i32* %qj
+  ret void
+}
diff --git a/final/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll b/final/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
new file mode 100644
index 00000000000..035299e0ac8
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2007-08-05-GetOverloadedModRef.ll
@@ -0,0 +1,17 @@
+; PR1600
+; RUN: opt < %s -basicaa -gvn -instcombine -S | \
+; RUN:   grep {ret i32 0}
+; END.
+
+declare i16 @llvm.cttz.i16(i16)
+
+define i32 @test(i32* %P, i16* %Q) {
+        %A = load i16* %Q               ; <i16> [#uses=1]
+        %x = load i32* %P               ; <i32> [#uses=1]
+        %B = call i16 @llvm.cttz.i16( i16 %A )          ; <i16> [#uses=1]
+        %y = load i32* %P               ; <i32> [#uses=1]
+        store i16 %B, i16* %Q
+        %z = sub i32 %x, %y             ; <i32> [#uses=1]
+        ret i32 %z
+}
+
diff --git a/final/test/Analysis/BasicAA/2007-10-24-ArgumentsGlobals.ll b/final/test/Analysis/BasicAA/2007-10-24-ArgumentsGlobals.ll
new file mode 100644
index 00000000000..78f24b5e305
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2007-10-24-ArgumentsGlobals.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -basicaa -gvn -dce -S | grep tmp7
+
+        %struct.A = type { i32 }
+        %struct.B = type { %struct.A }
+@a = global %struct.B zeroinitializer           ; <%struct.B*> [#uses=2]
+
+define i32 @_Z3fooP1A(%struct.A* %b) {
+entry:
+        store i32 1, i32* getelementptr (%struct.B* @a, i32 0, i32 0, i32 0), align 8
+        %tmp4 = getelementptr %struct.A* %b, i32 0, i32 0               ;<i32*> [#uses=1]
+        store i32 0, i32* %tmp4, align 4
+        %tmp7 = load i32* getelementptr (%struct.B* @a, i32 0, i32 0, i32 0), align 8           ; <i32> [#uses=1]
+        ret i32 %tmp7
+}
diff --git a/final/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll b/final/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
new file mode 100644
index 00000000000..563d3326367
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2007-11-05-SizeCrash.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -basicaa -gvn -disable-output
+; PR1774
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+        %struct.device = type { [20 x i8] }
+        %struct.pci_device_id = type { i32, i32, i32, i32, i32, i32, i64 }
+        %struct.usb_bus = type { %struct.device* }
+        %struct.usb_hcd = type { %struct.usb_bus, i64, [0 x i64] }
+@uhci_pci_ids = external constant [1 x %struct.pci_device_id]           ; <[1 x %struct.pci_device_id]*> [#uses=1]
+
+@__mod_pci_device_table = alias [1 x %struct.pci_device_id]* @uhci_pci_ids     
+        ; <[1 x %struct.pci_device_id]*> [#uses=0]
+
+define i32 @uhci_suspend(%struct.usb_hcd* %hcd) {
+entry:
+        %tmp17 = getelementptr %struct.usb_hcd* %hcd, i32 0, i32 2, i64 1      
+        ; <i64*> [#uses=1]
+        %tmp1718 = bitcast i64* %tmp17 to i32*          ; <i32*> [#uses=1]
+        %tmp19 = load i32* %tmp1718, align 4            ; <i32> [#uses=0]
+        br i1 false, label %cond_true34, label %done_okay
+
+cond_true34:            ; preds = %entry
+        %tmp631 = getelementptr %struct.usb_hcd* %hcd, i32 0, i32 2, i64
+2305843009213693950            ; <i64*> [#uses=1]
+        %tmp70 = bitcast i64* %tmp631 to %struct.device**
+
+        %tmp71 = load %struct.device** %tmp70, align 8
+
+        ret i32 undef
+
+done_okay:              ; preds = %entry
+        ret i32 undef
+}
diff --git a/final/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll b/final/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
new file mode 100644
index 00000000000..52d0af1b81c
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2007-12-08-OutOfBoundsCrash.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -basicaa -gvn -disable-output
+; PR1782
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.device = type { [20 x i8] }
+	%struct.pci_device_id = type { i32, i32, i32, i32, i32, i32, i64 }
+	%struct.usb_bus = type { %struct.device* }
+	%struct.usb_hcd = type { %struct.usb_bus, [0 x i64] }
+@pci_ids = external constant [1 x %struct.pci_device_id]		; <[1 x %struct.pci_device_id]*> [#uses=1]
+
+@__mod_pci_device_table = alias [1 x %struct.pci_device_id]* @pci_ids		; <[1 x %struct.pci_device_id]*> [#uses=0]
+
+define i32 @ehci_pci_setup(%struct.usb_hcd* %hcd) {
+entry:
+	%tmp14 = getelementptr %struct.usb_hcd* %hcd, i32 0, i32 0, i32 0		; <%struct.device**> [#uses=1]
+	%tmp15 = load %struct.device** %tmp14, align 8		; <%struct.device*> [#uses=0]
+	br i1 false, label %bb25, label %return
+
+bb25:		; preds = %entry
+	br i1 false, label %cond_true, label %return
+
+cond_true:		; preds = %bb25
+	%tmp601 = getelementptr %struct.usb_hcd* %hcd, i32 0, i32 1, i64 2305843009213693951		; <i64*> [#uses=1]
+	%tmp67 = bitcast i64* %tmp601 to %struct.device**		; <%struct.device**> [#uses=1]
+	%tmp68 = load %struct.device** %tmp67, align 8		; <%struct.device*> [#uses=0]
+	ret i32 undef
+
+return:		; preds = %bb25, %entry
+	ret i32 undef
+}
diff --git a/final/test/Analysis/BasicAA/2008-04-15-Byval.ll b/final/test/Analysis/BasicAA/2008-04-15-Byval.ll
new file mode 100644
index 00000000000..2069401628d
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2008-04-15-Byval.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -std-compile-opts -S | grep store
+; ModuleID = 'small2.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+	%struct.x = type { [4 x i32] }
+
+define void @foo(%struct.x* byval align 4  %X) nounwind  {
+entry:
+	%tmp = getelementptr %struct.x* %X, i32 0, i32 0		; <[4 x i32]*> [#uses=1]
+	%tmp1 = getelementptr [4 x i32]* %tmp, i32 0, i32 3		; <i32*> [#uses=1]
+	store i32 2, i32* %tmp1, align 4
+	%tmp2 = call i32 (...)* @bar( %struct.x* byval align 4  %X ) nounwind 		; <i32> [#uses=0]
+	br label %return
+return:		; preds = %entry
+	ret void
+}
+
+declare i32 @bar(...)
diff --git a/final/test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll b/final/test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll
new file mode 100644
index 00000000000..17091444764
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2008-06-02-GEPTailCrash.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -basicaa -gvn -disable-output
+; PR2395
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.S291 = type <{ %union.anon, i32 }>
+	%union.anon = type {  }
+@a291 = external global [5 x %struct.S291]		; <[5 x %struct.S291]*> [#uses=2]
+
+define void @test291() nounwind  {
+entry:
+	store i32 1138410269, i32* getelementptr ([5 x %struct.S291]* @a291, i32 0, i32 2, i32 1)
+	%tmp54 = load i32* bitcast (%struct.S291* getelementptr ([5 x %struct.S291]* @a291, i32 0, i32 2) to i32*), align 4		; <i32> [#uses=0]
+	unreachable
+}
diff --git a/final/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll b/final/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll
new file mode 100644
index 00000000000..c9e553d6947
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2008-11-23-NoaliasRet.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -basicaa -aa-eval |& grep {1 no alias response}
+
+declare noalias i32* @_Znwj(i32 %x) nounwind
+
+define i32 @foo() {
+  %A = call i32* @_Znwj(i32 4)
+  %B = call i32* @_Znwj(i32 4)
+  store i32 1, i32* %A
+  store i32 2, i32* %B
+  %C = load i32* %A
+  ret i32 %C
+}
diff --git a/final/test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll b/final/test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll
new file mode 100644
index 00000000000..3ab5d03ca8c
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2009-03-04-GEPNoalias.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -basicaa -gvn -S | grep load
+
+declare noalias i32* @noalias()
+
+define i32 @test(i32 %x) {
+  %a = call i32* @noalias()
+  store i32 1, i32* %a
+  %b = getelementptr i32* %a, i32 %x
+  store i32 2, i32* %b
+
+  %c = load i32* %a
+  ret i32 %c
+}
diff --git a/final/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll b/final/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
new file mode 100644
index 00000000000..5078dd53a79
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2009-10-13-AtomicModRef.ll
@@ -0,0 +1,17 @@
+; RUN: opt -basicaa -gvn -instcombine -S < %s | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8*, i8)
+
+define i8 @foo(i8* %ptr) {
+  %P = getelementptr i8* %ptr, i32 0
+  %Q = getelementptr i8* %ptr, i32 1
+; CHECK: getelementptr
+  %X = load i8* %P
+  %Y = call i8 @llvm.atomic.load.add.i8.p0i8(i8* %Q, i8 1)
+  %Z = load i8* %P
+; CHECK-NOT: = load
+  %A = sub i8 %X, %Z
+  ret i8 %A
+; CHECK: ret i8 0
+}
diff --git a/final/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll b/final/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll
new file mode 100644
index 00000000000..17db2fd739a
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2009-10-13-GEP-BaseNoAlias.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {NoAlias:.*%P,.*@Z}
+; If GEP base doesn't alias Z, then GEP doesn't alias Z.
+; rdar://7282591
+
+@Y = common global i32 0
+@Z = common global i32 0
+
+define void @foo(i32 %cond) nounwind {
+entry:
+  %a = alloca i32
+  %tmp = icmp ne i32 %cond, 0
+  br i1 %tmp, label %bb, label %bb1
+
+bb:
+  %b = getelementptr i32* %a, i32 0
+  br label %bb2
+
+bb1:
+  br label %bb2
+
+bb2:
+  %P = phi i32* [ %b, %bb ], [ @Y, %bb1 ]
+  %tmp1 = load i32* @Z, align 4
+  store i32 123, i32* %P, align 4
+  %tmp2 = load i32* @Z, align 4
+  br label %return
+
+return:
+  ret void
+}
diff --git a/final/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll b/final/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
new file mode 100644
index 00000000000..2b0cd78fece
--- /dev/null
+++ b/final/test/Analysis/BasicAA/2010-09-15-GEP-SignedArithmetic.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {1 may alias}
+; PR7959
+
+target datalayout = "e-p:32:32:32"
+
+define i32 @test(i32* %tab, i32 %indvar) nounwind {
+  %tmp31 = mul i32 %indvar, -2
+  %tmp32 = add i32 %tmp31, 30
+  %t.5 = getelementptr i32* %tab, i32 %tmp32
+  %loada = load i32* %tab
+  store i32 0, i32* %t.5
+  %loadb = load i32* %tab
+  %rval = add i32 %loada, %loadb
+  ret i32 %rval
+}
diff --git a/final/test/Analysis/BasicAA/args-rets-allocas-loads.ll b/final/test/Analysis/BasicAA/args-rets-allocas-loads.ll
new file mode 100644
index 00000000000..c3c4afcc239
--- /dev/null
+++ b/final/test/Analysis/BasicAA/args-rets-allocas-loads.ll
@@ -0,0 +1,310 @@
+; RUN: opt -basicaa -aa-eval -print-all-alias-modref-info -disable-output < %s |& FileCheck  %s
+
+declare void @callee(double* %callee_arg)
+declare void @nocap_callee(double* nocapture %nocap_callee_arg)
+
+declare double* @normal_returner()
+declare noalias double* @noalias_returner()
+
+define void @caller_a(double* %arg_a0,
+                      double* %arg_a1,
+                      double* noalias %noalias_arg_a0,
+                      double* noalias %noalias_arg_a1,
+                      double** %indirect_a0,
+                      double** %indirect_a1) {
+  %loaded_a0 = load double** %indirect_a0
+  %loaded_a1 = load double** %indirect_a1
+
+  %escape_alloca_a0 = alloca double
+  %escape_alloca_a1 = alloca double
+  %noescape_alloca_a0 = alloca double
+  %noescape_alloca_a1 = alloca double
+
+  %normal_ret_a0 = call double* @normal_returner()
+  %normal_ret_a1 = call double* @normal_returner()
+  %noalias_ret_a0 = call double* @noalias_returner()
+  %noalias_ret_a1 = call double* @noalias_returner()
+
+  call void @callee(double* %escape_alloca_a0)
+  call void @callee(double* %escape_alloca_a1)
+  call void @nocap_callee(double* %noescape_alloca_a0)
+  call void @nocap_callee(double* %noescape_alloca_a1)
+
+  store double 0.0, double* %loaded_a0
+  store double 0.0, double* %loaded_a1
+  store double 0.0, double* %arg_a0
+  store double 0.0, double* %arg_a1
+  store double 0.0, double* %noalias_arg_a0
+  store double 0.0, double* %noalias_arg_a1
+  store double 0.0, double* %escape_alloca_a0
+  store double 0.0, double* %escape_alloca_a1
+  store double 0.0, double* %noescape_alloca_a0
+  store double 0.0, double* %noescape_alloca_a1
+  store double 0.0, double* %normal_ret_a0
+  store double 0.0, double* %normal_ret_a1
+  store double 0.0, double* %noalias_ret_a0
+  store double 0.0, double* %noalias_ret_a1
+  ret void
+}
+
+; CHECK: Function: caller_a: 16 pointers, 8 call sites
+; CHECK:   MayAlias:	double* %arg_a0, double* %arg_a1
+; CHECK:   NoAlias:	double* %arg_a0, double* %noalias_arg_a0
+; CHECK:   NoAlias:	double* %arg_a1, double* %noalias_arg_a0
+; CHECK:   NoAlias:	double* %arg_a0, double* %noalias_arg_a1
+; CHECK:   NoAlias:	double* %arg_a1, double* %noalias_arg_a1
+; CHECK:   NoAlias:	double* %noalias_arg_a0, double* %noalias_arg_a1
+; CHECK:   MayAlias:	double* %arg_a0, double** %indirect_a0
+; CHECK:   MayAlias:	double* %arg_a1, double** %indirect_a0
+; CHECK:   NoAlias:	double* %noalias_arg_a0, double** %indirect_a0
+; CHECK:   NoAlias:	double* %noalias_arg_a1, double** %indirect_a0
+; CHECK:   MayAlias:	double* %arg_a0, double** %indirect_a1
+; CHECK:   MayAlias:	double* %arg_a1, double** %indirect_a1
+; CHECK:   NoAlias:	double* %noalias_arg_a0, double** %indirect_a1
+; CHECK:   NoAlias:	double* %noalias_arg_a1, double** %indirect_a1
+; CHECK:   MayAlias:	double** %indirect_a0, double** %indirect_a1
+; CHECK:   MayAlias:	double* %arg_a0, double* %loaded_a0
+; CHECK:   MayAlias:	double* %arg_a1, double* %loaded_a0
+; CHECK:   NoAlias:	double* %loaded_a0, double* %noalias_arg_a0
+; CHECK:   NoAlias:	double* %loaded_a0, double* %noalias_arg_a1
+; CHECK:   MayAlias:	double* %loaded_a0, double** %indirect_a0
+; CHECK:   MayAlias:	double* %loaded_a0, double** %indirect_a1
+; CHECK:   MayAlias:	double* %arg_a0, double* %loaded_a1
+; CHECK:   MayAlias:	double* %arg_a1, double* %loaded_a1
+; CHECK:   NoAlias:	double* %loaded_a1, double* %noalias_arg_a0
+; CHECK:   NoAlias:	double* %loaded_a1, double* %noalias_arg_a1
+; CHECK:   MayAlias:	double* %loaded_a1, double** %indirect_a0
+; CHECK:   MayAlias:	double* %loaded_a1, double** %indirect_a1
+; CHECK:   MayAlias:	double* %loaded_a0, double* %loaded_a1
+; CHECK:   NoAlias:	double* %arg_a0, double* %escape_alloca_a0
+; CHECK:   NoAlias:	double* %arg_a1, double* %escape_alloca_a0
+; CHECK:   NoAlias:	double* %escape_alloca_a0, double* %noalias_arg_a0
+; CHECK:   NoAlias:	double* %escape_alloca_a0, double* %noalias_arg_a1
+; CHECK:   NoAlias:	double* %escape_alloca_a0, double** %indirect_a0
+; CHECK:   NoAlias:	double* %escape_alloca_a0, double** %indirect_a1
+; CHECK:   MayAlias:	double* %escape_alloca_a0, double* %loaded_a0
+; CHECK:   MayAlias:	double* %escape_alloca_a0, double* %loaded_a1
+; CHECK:   NoAlias:	double* %arg_a0, double* %escape_alloca_a1
+; CHECK:   NoAlias:	double* %arg_a1, double* %escape_alloca_a1
+; CHECK:   NoAlias:	double* %escape_alloca_a1, double* %noalias_arg_a0
+; CHECK:   NoAlias:	double* %escape_alloca_a1, double* %noalias_arg_a1
+; CHECK:   NoAlias:	double* %escape_alloca_a1, double** %indirect_a0
+; CHECK:   NoAlias:	double* %escape_alloca_a1, double** %indirect_a1
+; CHECK:   MayAlias:	double* %escape_alloca_a1, double* %loaded_a0
+; CHECK:   MayAlias:	double* %escape_alloca_a1, double* %loaded_a1
+; CHECK:   NoAlias:	double* %escape_alloca_a0, double* %escape_alloca_a1
+; CHECK:   NoAlias:	double* %arg_a0, double* %noescape_alloca_a0
+; CHECK:   NoAlias:	double* %arg_a1, double* %noescape_alloca_a0
+; CHECK:   NoAlias:	double* %noalias_arg_a0, double* %noescape_alloca_a0
+; CHECK:   NoAlias:	double* %noalias_arg_a1, double* %noescape_alloca_a0
+; CHECK:   NoAlias:	double* %noescape_alloca_a0, double** %indirect_a0
+; CHECK:   NoAlias:	double* %noescape_alloca_a0, double** %indirect_a1
+; CHECK:   NoAlias:	double* %loaded_a0, double* %noescape_alloca_a0
+; CHECK:   NoAlias:	double* %loaded_a1, double* %noescape_alloca_a0
+; CHECK:   NoAlias:	double* %escape_alloca_a0, double* %noescape_alloca_a0
+; CHECK:   NoAlias:	double* %escape_alloca_a1, double* %noescape_alloca_a0
+; CHECK:   NoAlias:	double* %arg_a0, double* %noescape_alloca_a1
+; CHECK:   NoAlias:	double* %arg_a1, double* %noescape_alloca_a1
+; CHECK:   NoAlias:	double* %noalias_arg_a0, double* %noescape_alloca_a1
+; CHECK:   NoAlias:	double* %noalias_arg_a1, double* %noescape_alloca_a1
+; CHECK:   NoAlias:	double* %noescape_alloca_a1, double** %indirect_a0
+; CHECK:   NoAlias:	double* %noescape_alloca_a1, double** %indirect_a1
+; CHECK:   NoAlias:	double* %loaded_a0, double* %noescape_alloca_a1
+; CHECK:   NoAlias:	double* %loaded_a1, double* %noescape_alloca_a1
+; CHECK:   NoAlias:	double* %escape_alloca_a0, double* %noescape_alloca_a1
+; CHECK:   NoAlias:	double* %escape_alloca_a1, double* %noescape_alloca_a1
+; CHECK:   NoAlias:	double* %noescape_alloca_a0, double* %noescape_alloca_a1
+; CHECK:   MayAlias:	double* %arg_a0, double* %normal_ret_a0
+; CHECK:   MayAlias:	double* %arg_a1, double* %normal_ret_a0
+; CHECK:   NoAlias:	double* %noalias_arg_a0, double* %normal_ret_a0
+; CHECK:   NoAlias:	double* %noalias_arg_a1, double* %normal_ret_a0
+; CHECK:   MayAlias:	double* %normal_ret_a0, double** %indirect_a0
+; CHECK:   MayAlias:	double* %normal_ret_a0, double** %indirect_a1
+; CHECK:   MayAlias:	double* %loaded_a0, double* %normal_ret_a0
+; CHECK:   MayAlias:	double* %loaded_a1, double* %normal_ret_a0
+; CHECK:   MayAlias:	double* %escape_alloca_a0, double* %normal_ret_a0
+; CHECK:   MayAlias:	double* %escape_alloca_a1, double* %normal_ret_a0
+; CHECK:   NoAlias:	double* %noescape_alloca_a0, double* %normal_ret_a0
+; CHECK:   NoAlias:	double* %noescape_alloca_a1, double* %normal_ret_a0
+; CHECK:   MayAlias:	double* %arg_a0, double* %normal_ret_a1
+; CHECK:   MayAlias:	double* %arg_a1, double* %normal_ret_a1
+; CHECK:   NoAlias:	double* %noalias_arg_a0, double* %normal_ret_a1
+; CHECK:   NoAlias:	double* %noalias_arg_a1, double* %normal_ret_a1
+; CHECK:   MayAlias:	double* %normal_ret_a1, double** %indirect_a0
+; CHECK:   MayAlias:	double* %normal_ret_a1, double** %indirect_a1
+; CHECK:   MayAlias:	double* %loaded_a0, double* %normal_ret_a1
+; CHECK:   MayAlias:	double* %loaded_a1, double* %normal_ret_a1
+; CHECK:   MayAlias:	double* %escape_alloca_a0, double* %normal_ret_a1
+; CHECK:   MayAlias:	double* %escape_alloca_a1, double* %normal_ret_a1
+; CHECK:   NoAlias:	double* %noescape_alloca_a0, double* %normal_ret_a1
+; CHECK:   NoAlias:	double* %noescape_alloca_a1, double* %normal_ret_a1
+; CHECK:   MayAlias:	double* %normal_ret_a0, double* %normal_ret_a1
+; CHECK:   NoAlias:	double* %arg_a0, double* %noalias_ret_a0
+; CHECK:   NoAlias:	double* %arg_a1, double* %noalias_ret_a0
+; CHECK:   NoAlias:	double* %noalias_arg_a0, double* %noalias_ret_a0
+; CHECK:   NoAlias:	double* %noalias_arg_a1, double* %noalias_ret_a0
+; CHECK:   NoAlias:	double* %noalias_ret_a0, double** %indirect_a0
+; CHECK:   NoAlias:	double* %noalias_ret_a0, double** %indirect_a1
+; CHECK:   NoAlias:	double* %loaded_a0, double* %noalias_ret_a0
+; CHECK:   NoAlias:	double* %loaded_a1, double* %noalias_ret_a0
+; CHECK:   NoAlias:	double* %escape_alloca_a0, double* %noalias_ret_a0
+; CHECK:   NoAlias:	double* %escape_alloca_a1, double* %noalias_ret_a0
+; CHECK:   NoAlias:	double* %noalias_ret_a0, double* %noescape_alloca_a0
+; CHECK:   NoAlias:	double* %noalias_ret_a0, double* %noescape_alloca_a1
+; CHECK:   NoAlias:	double* %noalias_ret_a0, double* %normal_ret_a0
+; CHECK:   NoAlias:	double* %noalias_ret_a0, double* %normal_ret_a1
+; CHECK:   NoAlias:	double* %arg_a0, double* %noalias_ret_a1
+; CHECK:   NoAlias:	double* %arg_a1, double* %noalias_ret_a1
+; CHECK:   NoAlias:	double* %noalias_arg_a0, double* %noalias_ret_a1
+; CHECK:   NoAlias:	double* %noalias_arg_a1, double* %noalias_ret_a1
+; CHECK:   NoAlias:	double* %noalias_ret_a1, double** %indirect_a0
+; CHECK:   NoAlias:	double* %noalias_ret_a1, double** %indirect_a1
+; CHECK:   NoAlias:	double* %loaded_a0, double* %noalias_ret_a1
+; CHECK:   NoAlias:	double* %loaded_a1, double* %noalias_ret_a1
+; CHECK:   NoAlias:	double* %escape_alloca_a0, double* %noalias_ret_a1
+; CHECK:   NoAlias:	double* %escape_alloca_a1, double* %noalias_ret_a1
+; CHECK:   NoAlias:	double* %noalias_ret_a1, double* %noescape_alloca_a0
+; CHECK:   NoAlias:	double* %noalias_ret_a1, double* %noescape_alloca_a1
+; CHECK:   NoAlias:	double* %noalias_ret_a1, double* %normal_ret_a0
+; CHECK:   NoAlias:	double* %noalias_ret_a1, double* %normal_ret_a1
+; CHECK:   NoAlias:	double* %noalias_ret_a0, double* %noalias_ret_a1
+; CHECK: Both ModRef:  Ptr: double* %arg_a0	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double* %arg_a1	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double** %indirect_a0	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double** %indirect_a1	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double* %loaded_a0	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double* %loaded_a1	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double* %normal_ret_a0	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double* %normal_ret_a1	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double* %arg_a0	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double* %arg_a1	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double** %indirect_a0	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double** %indirect_a1	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double* %loaded_a0	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double* %loaded_a1	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double* %normal_ret_a0	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK: Both ModRef:  Ptr: double* %normal_ret_a1	<->  %normal_ret_a1 = call double* @normal_returner() 
+; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  %normal_ret_a1 = call double* @normal_returner() 
+; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  %normal_ret_a1 = call double* @normal_returner() 
+; CHECK: Both ModRef:  Ptr: double* %arg_a0	<->  %noalias_ret_a0 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %arg_a1	<->  %noalias_ret_a0 = call double* @noalias_returner() 
+; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  %noalias_ret_a0 = call double* @noalias_returner() 
+; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  %noalias_ret_a0 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double** %indirect_a0	<->  %noalias_ret_a0 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double** %indirect_a1	<->  %noalias_ret_a0 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %loaded_a0	<->  %noalias_ret_a0 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %loaded_a1	<->  %noalias_ret_a0 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  %noalias_ret_a0 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  %noalias_ret_a0 = call double* @noalias_returner() 
+; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  %noalias_ret_a0 = call double* @noalias_returner() 
+; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  %noalias_ret_a0 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %normal_ret_a0	<->  %noalias_ret_a0 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %normal_ret_a1	<->  %noalias_ret_a0 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %noalias_ret_a0	<->  %noalias_ret_a0 = call double* @noalias_returner() 
+; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  %noalias_ret_a0 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %arg_a0	<->  %noalias_ret_a1 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %arg_a1	<->  %noalias_ret_a1 = call double* @noalias_returner() 
+; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  %noalias_ret_a1 = call double* @noalias_returner() 
+; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  %noalias_ret_a1 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double** %indirect_a0	<->  %noalias_ret_a1 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double** %indirect_a1	<->  %noalias_ret_a1 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %loaded_a0	<->  %noalias_ret_a1 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %loaded_a1	<->  %noalias_ret_a1 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  %noalias_ret_a1 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  %noalias_ret_a1 = call double* @noalias_returner() 
+; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  %noalias_ret_a1 = call double* @noalias_returner() 
+; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  %noalias_ret_a1 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %normal_ret_a0	<->  %noalias_ret_a1 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %normal_ret_a1	<->  %noalias_ret_a1 = call double* @noalias_returner() 
+; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  %noalias_ret_a1 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %noalias_ret_a1	<->  %noalias_ret_a1 = call double* @noalias_returner() 
+; CHECK: Both ModRef:  Ptr: double* %arg_a0	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %arg_a1	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double** %indirect_a0	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double** %indirect_a1	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %loaded_a0	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %loaded_a1	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %normal_ret_a0	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %normal_ret_a1	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %arg_a0	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double* %arg_a1	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double** %indirect_a0	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double** %indirect_a1	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double* %loaded_a0	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double* %loaded_a1	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double* %normal_ret_a0	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double* %normal_ret_a1	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double* %arg_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %arg_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double** %indirect_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double** %indirect_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %loaded_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %loaded_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %noescape_alloca_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %normal_ret_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %normal_ret_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK: Both ModRef:  Ptr: double* %arg_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double* %arg_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double** %indirect_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double** %indirect_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double* %loaded_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double* %loaded_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double* %noescape_alloca_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double* %normal_ret_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: Both ModRef:  Ptr: double* %normal_ret_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK: ===== Alias Analysis Evaluator Report =====
+; CHECK:   120 Total Alias Queries Performed
+; CHECK:   84 no alias responses (70.0%)
+; CHECK:   36 may alias responses (30.0%)
+; CHECK:   0 must alias responses (0.0%)
+; CHECK:   Alias Analysis Evaluator Pointer Alias Summary: 70%/30%/0%
+; CHECK:   184 Total ModRef Queries Performed
+; CHECK:   44 no mod/ref responses (23.9%)
+; CHECK:   0 mod responses (0.0%)
+; CHECK:   0 ref responses (0.0%)
+; CHECK:   140 mod & ref responses (76.0%)
+; CHECK:   Alias Analysis Evaluator Mod/Ref Summary: 23%/0%/0%/76%
diff --git a/final/test/Analysis/BasicAA/byval.ll b/final/test/Analysis/BasicAA/byval.ll
new file mode 100644
index 00000000000..2aba7538ed5
--- /dev/null
+++ b/final/test/Analysis/BasicAA/byval.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -basicaa -gvn -S | grep {ret i32 1}
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+	%struct.x = type { i32, i32, i32, i32 }
+@g = weak global i32 0		; <i32*> [#uses=1]
+
+define i32 @foo(%struct.x* byval  %a) nounwind  {
+entry:
+	%tmp1 = tail call i32 (...)* @bar( %struct.x* %a ) nounwind 		; <i32> [#uses=0]
+	%tmp2 = getelementptr %struct.x* %a, i32 0, i32 0		; <i32*> [#uses=2]
+	store i32 1, i32* %tmp2, align 4
+	store i32 2, i32* @g, align 4
+	%tmp4 = load i32* %tmp2, align 4		; <i32> [#uses=1]
+	ret i32 %tmp4
+}
+
+declare i32 @bar(...)
+
diff --git a/final/test/Analysis/BasicAA/cas.ll b/final/test/Analysis/BasicAA/cas.ll
new file mode 100644
index 00000000000..8dd3695d6d5
--- /dev/null
+++ b/final/test/Analysis/BasicAA/cas.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -basicaa -gvn -instcombine -S | grep {ret i32 0}
+
+@flag0 = internal global i32 zeroinitializer
+@turn = internal global i32 zeroinitializer
+
+
+define i32 @main() {
+  %a = load i32* @flag0
+  %b = tail call i32 @llvm.atomic.swap.i32.p0i32(i32* @turn, i32 1)
+  %c = load i32* @flag0
+  %d = sub i32 %a, %c
+  ret i32 %d
+}
+
+declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind
diff --git a/final/test/Analysis/BasicAA/constant-over-index.ll b/final/test/Analysis/BasicAA/constant-over-index.ll
new file mode 100644
index 00000000000..8a8ac4f7210
--- /dev/null
+++ b/final/test/Analysis/BasicAA/constant-over-index.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info |& FileCheck %s
+; PR4267
+
+; CHECK: MayAlias: double* %p.0.i.0, double* %p3
+
+; %p3 is equal to %p.0.i.0 on the second iteration of the loop,
+; so MayAlias is needed.
+
+define void @foo([3 x [3 x double]]* noalias %p) {
+entry:
+  %p3 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 0, i64 3
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+
+  %p.0.i.0 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 %i, i64 0
+
+  volatile store double 0.0, double* %p3
+  volatile store double 0.1, double* %p.0.i.0
+
+  %i.next = add i64 %i, 1
+  %cmp = icmp slt i64 %i.next, 3
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/final/test/Analysis/BasicAA/dg.exp b/final/test/Analysis/BasicAA/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Analysis/BasicAA/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Analysis/BasicAA/empty.ll b/final/test/Analysis/BasicAA/empty.ll
new file mode 100644
index 00000000000..7b06780e6b1
--- /dev/null
+++ b/final/test/Analysis/BasicAA/empty.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output \
+; RUN:   |& grep {NoAlias:	\{\}\\* \[%\]p, \{\}\\* \[%\]q}
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define void @foo({}* %p, {}* %q) {
+  store {} {}, {}* %p
+  store {} {}, {}* %q
+  ret void
+}
diff --git a/final/test/Analysis/BasicAA/featuretest.ll b/final/test/Analysis/BasicAA/featuretest.ll
new file mode 100644
index 00000000000..47d278fab1c
--- /dev/null
+++ b/final/test/Analysis/BasicAA/featuretest.ll
@@ -0,0 +1,127 @@
+; This testcase tests for various features the basicaa test should be able to 
+; determine, as noted in the comments.
+
+; RUN: opt < %s -basicaa -gvn -instcombine -dce -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+@Global = external global { i32 }
+
+declare void @external(i32*)
+
+; Array test:  Test that operations on one local array do not invalidate 
+; operations on another array.  Important for scientific codes.
+;
+define i32 @different_array_test(i64 %A, i64 %B) {
+	%Array1 = alloca i32, i32 100
+	%Array2 = alloca i32, i32 200
+        
+        call void @external(i32* %Array1)
+        call void @external(i32* %Array2)
+
+	%pointer = getelementptr i32* %Array1, i64 %A
+	%val = load i32* %pointer
+
+	%pointer2 = getelementptr i32* %Array2, i64 %B
+	store i32 7, i32* %pointer2
+
+	%REMOVE = load i32* %pointer ; redundant with above load
+	%retval = sub i32 %REMOVE, %val
+	ret i32 %retval
+; CHECK: @different_array_test
+; CHECK: ret i32 0
+}
+
+; Constant index test: Constant indexes into the same array should not 
+; interfere with each other.  Again, important for scientific codes.
+;
+define i32 @constant_array_index_test() {
+	%Array = alloca i32, i32 100
+        call void @external(i32* %Array)
+
+	%P1 = getelementptr i32* %Array, i64 7
+	%P2 = getelementptr i32* %Array, i64 6
+	
+	%A = load i32* %P1
+	store i32 1, i32* %P2   ; Should not invalidate load
+	%BREMOVE = load i32* %P1
+	%Val = sub i32 %A, %BREMOVE
+	ret i32 %Val
+; CHECK: @constant_array_index_test
+; CHECK: ret i32 0
+}
+
+; Test that if two pointers are spaced out by a constant getelementptr, that 
+; they cannot alias.
+define i32 @gep_distance_test(i32* %A) {
+        %REMOVEu = load i32* %A
+        %B = getelementptr i32* %A, i64 2  ; Cannot alias A
+        store i32 7, i32* %B
+        %REMOVEv = load i32* %A
+        %r = sub i32 %REMOVEu, %REMOVEv
+        ret i32 %r
+; CHECK: @gep_distance_test
+; CHECK: ret i32 0
+}
+
+; Test that if two pointers are spaced out by a constant offset, that they
+; cannot alias, even if there is a variable offset between them...
+define i32 @gep_distance_test2({i32,i32}* %A, i64 %distance) {
+	%A1 = getelementptr {i32,i32}* %A, i64 0, i32 0
+	%REMOVEu = load i32* %A1
+	%B = getelementptr {i32,i32}* %A, i64 %distance, i32 1
+	store i32 7, i32* %B    ; B cannot alias A, it's at least 4 bytes away
+	%REMOVEv = load i32* %A1
+        %r = sub i32 %REMOVEu, %REMOVEv
+        ret i32 %r
+; CHECK: @gep_distance_test2
+; CHECK: ret i32 0
+}
+
+; Test that we can do funny pointer things and that distance calc will still 
+; work.
+define i32 @gep_distance_test3(i32 * %A) {
+	%X = load i32* %A
+	%B = bitcast i32* %A to i8*
+	%C = getelementptr i8* %B, i64 4
+        store i8 42, i8* %C
+	%Y = load i32* %A
+        %R = sub i32 %X, %Y
+	ret i32 %R
+; CHECK: @gep_distance_test3
+; CHECK: ret i32 0
+}
+
+; Test that we can disambiguate globals reached through constantexpr geps
+define i32 @constexpr_test() {
+   %X = alloca i32
+   call void @external(i32* %X)
+
+   %Y = load i32* %X
+   store i32 5, i32* getelementptr ({ i32 }* @Global, i64 0, i32 0)
+   %REMOVE = load i32* %X
+   %retval = sub i32 %Y, %REMOVE
+   ret i32 %retval
+; CHECK: @constexpr_test
+; CHECK: ret i32 0
+}
+
+
+
+; PR7589
+; These two index expressions are different, this cannot be CSE'd.
+define i16 @zext_sext_confusion(i16* %row2col, i5 %j) nounwind{
+entry:
+  %sum5.cast = zext i5 %j to i64             ; <i64> [#uses=1]
+  %P1 = getelementptr i16* %row2col, i64 %sum5.cast
+  %row2col.load.1.2 = load i16* %P1, align 1 ; <i16> [#uses=1]
+  
+  %sum13.cast31 = sext i5 %j to i6          ; <i6> [#uses=1]
+  %sum13.cast = zext i6 %sum13.cast31 to i64      ; <i64> [#uses=1]
+  %P2 = getelementptr i16* %row2col, i64 %sum13.cast
+  %row2col.load.1.6 = load i16* %P2, align 1 ; <i16> [#uses=1]
+  
+  %.ret = sub i16 %row2col.load.1.6, %row2col.load.1.2 ; <i16> [#uses=1]
+  ret i16 %.ret
+; CHECK: @zext_sext_confusion
+; CHECK: ret i16 %.ret
+}
diff --git a/final/test/Analysis/BasicAA/full-store-partial-alias.ll b/final/test/Analysis/BasicAA/full-store-partial-alias.ll
new file mode 100644
index 00000000000..4fa6375c143
--- /dev/null
+++ b/final/test/Analysis/BasicAA/full-store-partial-alias.ll
@@ -0,0 +1,33 @@
+; RUN: opt -S -tbaa -basicaa -gvn < %s | grep {ret i32 %}
+; RUN: opt -S -tbaa -gvn < %s | grep {ret i32 0}
+; rdar://8875631, rdar://8875069
+
+; BasicAA should notice that the store stores to the entire %u object,
+; so the %tmp5 load is PartialAlias with the store and suppress TBAA.
+; Without BasicAA, TBAA should say that %tmp5 is NoAlias with the store.
+
+target datalayout = "e-p:64:64:64"
+
+%union.anon = type { double }
+
+@u = global %union.anon { double -2.500000e-01 }, align 8
+@endianness_test = global i64 1, align 8
+
+define i32 @signbit(double %x) nounwind {
+entry:
+  %u = alloca %union.anon, align 8
+  %tmp9 = getelementptr inbounds %union.anon* %u, i64 0, i32 0
+  store double %x, double* %tmp9, align 8, !tbaa !0
+  %tmp2 = load i32* bitcast (i64* @endianness_test to i32*), align 8, !tbaa !3
+  %idxprom = sext i32 %tmp2 to i64
+  %tmp4 = bitcast %union.anon* %u to [2 x i32]*
+  %arrayidx = getelementptr inbounds [2 x i32]* %tmp4, i64 0, i64 %idxprom
+  %tmp5 = load i32* %arrayidx, align 4, !tbaa !3
+  %tmp5.lobit = lshr i32 %tmp5, 31
+  ret i32 %tmp5.lobit
+}
+
+!0 = metadata !{metadata !"double", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"int", metadata !1}
diff --git a/final/test/Analysis/BasicAA/gcsetest.ll b/final/test/Analysis/BasicAA/gcsetest.ll
new file mode 100644
index 00000000000..a903362beb9
--- /dev/null
+++ b/final/test/Analysis/BasicAA/gcsetest.ll
@@ -0,0 +1,46 @@
+; Test that GCSE uses basicaa to do alias analysis, which is capable of 
+; disambiguating some obvious cases.  All loads should be removable in 
+; this testcase.
+
+; RUN: opt < %s -basicaa -gvn -instcombine -dce -S \
+; RUN: | not grep load
+
+@A = global i32 7
+@B = global i32 8
+
+define i32 @test() {
+	%A1 = load i32* @A
+
+	store i32 123, i32* @B  ; Store cannot alias @A
+
+	%A2 = load i32* @A
+	%X = sub i32 %A1, %A2
+	ret i32 %X
+}
+
+define i32 @test2() {
+        %A1 = load i32* @A
+        br label %Loop
+Loop:
+        %AP = phi i32 [0, %0], [%X, %Loop]
+        store i32 %AP, i32* @B  ; Store cannot alias @A
+
+        %A2 = load i32* @A
+        %X = sub i32 %A1, %A2
+        %c = icmp eq i32 %X, 0
+        br i1 %c, label %out, label %Loop
+
+out:
+        ret i32 %X
+}
+
+declare void @external()
+
+define i32 @test3() {
+	%X = alloca i32
+	store i32 7, i32* %X
+	call void @external()
+	%V = load i32* %X
+	ret i32 %V
+}
+
diff --git a/final/test/Analysis/BasicAA/gep-alias.ll b/final/test/Analysis/BasicAA/gep-alias.ll
new file mode 100644
index 00000000000..69f7fafaca0
--- /dev/null
+++ b/final/test/Analysis/BasicAA/gep-alias.ll
@@ -0,0 +1,171 @@
+; RUN: opt < %s -basicaa -gvn -instcombine -S |& FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+; Make sure that basicaa thinks R and r are must aliases.
+define i32 @test1(i8 * %P) {
+entry:
+	%Q = bitcast i8* %P to {i32, i32}*
+	%R = getelementptr {i32, i32}* %Q, i32 0, i32 1
+	%S = load i32* %R
+
+	%q = bitcast i8* %P to {i32, i32}*
+	%r = getelementptr {i32, i32}* %q, i32 0, i32 1
+	%s = load i32* %r
+
+	%t = sub i32 %S, %s
+	ret i32 %t
+; CHECK: @test1
+; CHECK: ret i32 0
+}
+
+define i32 @test2(i8 * %P) {
+entry:
+	%Q = bitcast i8* %P to {i32, i32, i32}*
+	%R = getelementptr {i32, i32, i32}* %Q, i32 0, i32 1
+	%S = load i32* %R
+
+	%r = getelementptr {i32, i32, i32}* %Q, i32 0, i32 2
+  store i32 42, i32* %r
+
+	%s = load i32* %R
+
+	%t = sub i32 %S, %s
+	ret i32 %t
+; CHECK: @test2
+; CHECK: ret i32 0
+}
+
+
+; This was a miscompilation.
+define i32 @test3({float, {i32, i32, i32}}* %P) {
+entry:
+  %P2 = getelementptr {float, {i32, i32, i32}}* %P, i32 0, i32 1
+	%R = getelementptr {i32, i32, i32}* %P2, i32 0, i32 1
+	%S = load i32* %R
+
+	%r = getelementptr {i32, i32, i32}* %P2, i32 0, i32 2
+  store i32 42, i32* %r
+
+	%s = load i32* %R
+
+	%t = sub i32 %S, %s
+	ret i32 %t
+; CHECK: @test3
+; CHECK: ret i32 0
+}
+
+
+;; This is reduced from the SmallPtrSet constructor.
+%SmallPtrSetImpl = type { i8**, i32, i32, i32, [1 x i8*] }
+%SmallPtrSet64 = type { %SmallPtrSetImpl, [64 x i8*] }
+
+define i32 @test4(%SmallPtrSet64* %P) {
+entry:
+  %tmp2 = getelementptr inbounds %SmallPtrSet64* %P, i64 0, i32 0, i32 1
+  store i32 64, i32* %tmp2, align 8
+  %tmp3 = getelementptr inbounds %SmallPtrSet64* %P, i64 0, i32 0, i32 4, i64 64
+  store i8* null, i8** %tmp3, align 8
+  %tmp4 = load i32* %tmp2, align 8
+	ret i32 %tmp4
+; CHECK: @test4
+; CHECK: ret i32 64
+}
+
+; P[i] != p[i+1]
+define i32 @test5(i32* %p, i64 %i) {
+  %pi = getelementptr i32* %p, i64 %i
+  %i.next = add i64 %i, 1
+  %pi.next = getelementptr i32* %p, i64 %i.next
+  %x = load i32* %pi
+  store i32 42, i32* %pi.next
+  %y = load i32* %pi
+  %z = sub i32 %x, %y
+  ret i32 %z
+; CHECK: @test5
+; CHECK: ret i32 0
+}
+
+; P[i] != p[(i*4)|1]
+define i32 @test6(i32* %p, i64 %i1) {
+  %i = shl i64 %i1, 2
+  %pi = getelementptr i32* %p, i64 %i
+  %i.next = or i64 %i, 1
+  %pi.next = getelementptr i32* %p, i64 %i.next
+  %x = load i32* %pi
+  store i32 42, i32* %pi.next
+  %y = load i32* %pi
+  %z = sub i32 %x, %y
+  ret i32 %z
+; CHECK: @test6
+; CHECK: ret i32 0
+}
+
+; P[1] != P[i*4]
+define i32 @test7(i32* %p, i64 %i) {
+  %pi = getelementptr i32* %p, i64 1
+  %i.next = shl i64 %i, 2
+  %pi.next = getelementptr i32* %p, i64 %i.next
+  %x = load i32* %pi
+  store i32 42, i32* %pi.next
+  %y = load i32* %pi
+  %z = sub i32 %x, %y
+  ret i32 %z
+; CHECK: @test7
+; CHECK: ret i32 0
+}
+
+; P[zext(i)] != p[zext(i+1)]
+; PR1143
+define i32 @test8(i32* %p, i16 %i) {
+  %i1 = zext i16 %i to i32
+  %pi = getelementptr i32* %p, i32 %i1
+  %i.next = add i16 %i, 1
+  %i.next2 = zext i16 %i.next to i32
+  %pi.next = getelementptr i32* %p, i32 %i.next2
+  %x = load i32* %pi
+  store i32 42, i32* %pi.next
+  %y = load i32* %pi
+  %z = sub i32 %x, %y
+  ret i32 %z
+; CHECK: @test8
+; CHECK: ret i32 0
+}
+
+define i8 @test9([4 x i8] *%P, i32 %i, i32 %j) {
+  %i2 = shl i32 %i, 2
+  %i3 = add i32 %i2, 1
+  ; P2 = P + 1 + 4*i
+  %P2 = getelementptr [4 x i8] *%P, i32 0, i32 %i3
+
+  %j2 = shl i32 %j, 2
+  
+  ; P4 = P + 4*j
+  %P4 = getelementptr [4 x i8]* %P, i32 0, i32 %j2
+
+  %x = load i8* %P2
+  store i8 42, i8* %P4
+  %y = load i8* %P2
+  %z = sub i8 %x, %y
+  ret i8 %z
+; CHECK: @test9
+; CHECK: ret i8 0
+}
+
+define i8 @test10([4 x i8] *%P, i32 %i) {
+  %i2 = shl i32 %i, 2
+  %i3 = add i32 %i2, 4
+  ; P2 = P + 4 + 4*i
+  %P2 = getelementptr [4 x i8] *%P, i32 0, i32 %i3
+  
+  ; P4 = P + 4*i
+  %P4 = getelementptr [4 x i8]* %P, i32 0, i32 %i2
+
+  %x = load i8* %P2
+  store i8 42, i8* %P4
+  %y = load i8* %P2
+  %z = sub i8 %x, %y
+  ret i8 %z
+; CHECK: @test10
+; CHECK: ret i8 0
+}
diff --git a/final/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll b/final/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
new file mode 100644
index 00000000000..062ea59f128
--- /dev/null
+++ b/final/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& FileCheck %s
+
+
+; CHECK: Just Ref: call void @ro() <-> call void @f0()
+
+declare void @f0()
+declare void @ro() readonly
+
+define void @test0() {
+  call void @f0()
+  call void @ro()
+  ret void
+}
+
+; CHECK: NoModRef:   call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) <->   call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false)
+; CHECK: NoModRef:   call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) <->   call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false)
+
+declare void @llvm.memset.i64(i8*, i8, i64, i32)
+
+@A = external global i8
+@B = external global i8
+define void @test1() {
+  call void @llvm.memset.i64(i8* @A, i8 0, i64 1, i32 1)
+  call void @llvm.memset.i64(i8* @B, i8 0, i64 1, i32 1)
+  ret void
+}
diff --git a/final/test/Analysis/BasicAA/global-size.ll b/final/test/Analysis/BasicAA/global-size.ll
new file mode 100644
index 00000000000..a7e5aab6c1f
--- /dev/null
+++ b/final/test/Analysis/BasicAA/global-size.ll
@@ -0,0 +1,40 @@
+; A store or load cannot alias a global if the accessed amount is larger then
+; the global.
+
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+@B = global i16 8
+
+; CHECK: @test1
+define i16 @test1(i32* %P) {
+        %X = load i16* @B
+        store i32 7, i32* %P
+        %Y = load i16* @B
+        %Z = sub i16 %Y, %X
+        ret i16 %Z
+; CHECK: ret i16 0
+}
+
+; Cannot know anything about the size of this global.
+; rdar://8813415
+@window = external global [0 x i8]
+
+; CHECK: @test2
+define i8 @test2(i32 %tmp79, i32 %w.2, i32 %indvar89) nounwind {
+  %tmp92 = add i32 %tmp79, %indvar89
+  %arrayidx412 = getelementptr [0 x i8]* @window, i32 0, i32 %tmp92
+  %tmp93 = add i32 %w.2, %indvar89
+  %arrayidx416 = getelementptr [0 x i8]* @window, i32 0, i32 %tmp93
+
+  %A = load i8* %arrayidx412, align 1
+  store i8 4, i8* %arrayidx416, align 1
+
+  %B = load i8* %arrayidx412, align 1
+  %C = sub i8 %A, %B
+  ret i8 %C
+
+; CHECK: %B = load i8
+; CHECK: ret i8 %C
+}
+
diff --git a/final/test/Analysis/BasicAA/modref.ll b/final/test/Analysis/BasicAA/modref.ll
new file mode 100644
index 00000000000..ec0c8a73444
--- /dev/null
+++ b/final/test/Analysis/BasicAA/modref.ll
@@ -0,0 +1,136 @@
+; RUN: opt < %s -basicaa -gvn -dse -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32)
+declare void @llvm.memset.i8(i8*, i8, i8, i32)
+declare void @llvm.memcpy.i8(i8*, i8*, i8, i32)
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+declare void @external(i32*) 
+
+define i32 @test0(i8* %P) {
+  %A = alloca i32
+  call void @external(i32* %A)
+  
+  store i32 0, i32* %A
+  
+  call void @llvm.memset.i32(i8* %P, i8 0, i32 42, i32 1)
+  
+  %B = load i32* %A
+  ret i32 %B
+  
+; CHECK: @test0
+; CHECK: ret i32 0
+}
+
+declare void @llvm.memcpy.i8(i8*, i8*, i8, i32)
+
+define i8 @test1() {
+; CHECK: @test1
+  %A = alloca i8
+  %B = alloca i8
+
+  store i8 2, i8* %B  ;; Not written to by memcpy
+
+  call void @llvm.memcpy.i8(i8* %A, i8* %B, i8 -1, i32 0)
+
+  %C = load i8* %B
+  ret i8 %C
+; CHECK: ret i8 2
+}
+
+define i8 @test2(i8* %P) {
+; CHECK: @test2
+  %P2 = getelementptr i8* %P, i32 127
+  store i8 1, i8* %P2  ;; Not dead across memset
+  call void @llvm.memset.i8(i8* %P, i8 2, i8 127, i32 0)
+  %A = load i8* %P2
+  ret i8 %A
+; CHECK: ret i8 1
+}
+
+define i8 @test2a(i8* %P) {
+; CHECK: @test2
+  %P2 = getelementptr i8* %P, i32 126
+  
+  ;; FIXME: DSE isn't zapping this dead store.
+  store i8 1, i8* %P2  ;; Dead, clobbered by memset.
+  
+  call void @llvm.memset.i8(i8* %P, i8 2, i8 127, i32 0)
+  %A = load i8* %P2
+  ret i8 %A
+; CHECK-NOT: load
+; CHECK: ret i8 2
+}
+
+define void @test3(i8* %P, i8 %X) {
+; CHECK: @test3
+; CHECK-NOT: store
+; CHECK-NOT: %Y
+  %Y = add i8 %X, 1     ;; Dead, because the only use (the store) is dead.
+  
+  %P2 = getelementptr i8* %P, i32 2
+  store i8 %Y, i8* %P2  ;; Not read by lifetime.end, should be removed.
+; CHECK: store i8 2, i8* %P2
+  call void @llvm.lifetime.end(i64 1, i8* %P)
+  store i8 2, i8* %P2
+; CHECK-NOT: store
+  ret void
+; CHECK: ret void
+}
+
+define void @test3a(i8* %P, i8 %X) {
+; CHECK: @test3a
+  %Y = add i8 %X, 1     ;; Dead, because the only use (the store) is dead.
+  
+  %P2 = getelementptr i8* %P, i32 2
+  store i8 %Y, i8* %P2
+; CHECK-NEXT: call void @llvm.lifetime.end
+  call void @llvm.lifetime.end(i64 10, i8* %P)
+  ret void
+; CHECK-NEXT: ret void
+}
+
+@G1 = external global i32
+@G2 = external global [4000 x i32]
+
+define i32 @test4(i8* %P) {
+  %tmp = load i32* @G1
+  call void @llvm.memset.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i32 1)
+  %tmp2 = load i32* @G1
+  %sub = sub i32 %tmp2, %tmp
+  ret i32 %sub
+; CHECK: @test4
+; CHECK: load i32* @G
+; CHECK: memset.p0i8.i32
+; CHECK-NOT: load
+; CHECK: ret i32 0
+}
+
+; Verify that basicaa is handling variable length memcpy, knowing it doesn't
+; write to G1.
+define i32 @test5(i8* %P, i32 %Len) {
+  %tmp = load i32* @G1
+  call void @llvm.memcpy.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i32 1)
+  %tmp2 = load i32* @G1
+  %sub = sub i32 %tmp2, %tmp
+  ret i32 %sub
+; CHECK: @test5
+; CHECK: load i32* @G
+; CHECK: memcpy.p0i8.p0i8.i32
+; CHECK-NOT: load
+; CHECK: ret i32 0
+}
+
+define i8 @test6(i8* %p, i8* noalias %a) {
+  %x = load i8* %a
+  %t = va_arg i8* %p, float
+  %y = load i8* %a
+  %z = add i8 %x, %y
+  ret i8 %z
+; CHECK: @test6
+; CHECK: load i8* %a
+; CHECK-NOT: load
+; CHECK: ret
+}
diff --git a/final/test/Analysis/BasicAA/no-escape-call.ll b/final/test/Analysis/BasicAA/no-escape-call.ll
new file mode 100644
index 00000000000..ccabce9b7b4
--- /dev/null
+++ b/final/test/Analysis/BasicAA/no-escape-call.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -basicaa -gvn -instcombine -S | grep {ret i1 true}
+; PR2436
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define i1 @foo(i32 %i) nounwind  {
+entry:
+	%arr = alloca [10 x i8*]		; <[10 x i8*]*> [#uses=1]
+	%tmp2 = call i8* @getPtr( ) nounwind 		; <i8*> [#uses=2]
+	%tmp4 = getelementptr [10 x i8*]* %arr, i32 0, i32 %i		; <i8**> [#uses=2]
+	store i8* %tmp2, i8** %tmp4, align 4
+	%tmp10 = getelementptr i8* %tmp2, i32 10		; <i8*> [#uses=1]
+	store i8 42, i8* %tmp10, align 1
+	%tmp14 = load i8** %tmp4, align 4		; <i8*> [#uses=1]
+	%tmp16 = getelementptr i8* %tmp14, i32 10		; <i8*> [#uses=1]
+	%tmp17 = load i8* %tmp16, align 1		; <i8> [#uses=1]
+	%tmp19 = icmp eq i8 %tmp17, 42		; <i1> [#uses=1]
+	ret i1 %tmp19
+}
+
+declare i8* @getPtr()
+
+declare void @abort() noreturn nounwind 
diff --git a/final/test/Analysis/BasicAA/nocapture.ll b/final/test/Analysis/BasicAA/nocapture.ll
new file mode 100644
index 00000000000..7970fbb9a03
--- /dev/null
+++ b/final/test/Analysis/BasicAA/nocapture.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -basicaa -gvn -instcombine -S | grep {ret i32 0}
+
+declare i32* @test(i32* nocapture)
+
+define i32 @test2() {
+       %P = alloca i32
+       %Q = call i32* @test(i32* %P)
+       %a = load i32* %P
+       store i32 4, i32* %Q   ;; cannot clobber P since it is nocapture.
+       %b = load i32* %P
+       %c = sub i32 %a, %b
+       ret i32 %c
+}
+
diff --git a/final/test/Analysis/BasicAA/phi-aa.ll b/final/test/Analysis/BasicAA/phi-aa.ll
new file mode 100644
index 00000000000..50fd5cd22ba
--- /dev/null
+++ b/final/test/Analysis/BasicAA/phi-aa.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output |& grep {NoAlias:.*%P,.*@Z}
+; rdar://7282591
+
+@X = common global i32 0
+@Y = common global i32 0
+@Z = common global i32 0
+
+define void @foo(i32 %cond) nounwind {
+entry:
+  %"alloca point" = bitcast i32 0 to i32
+  %tmp = icmp ne i32 %cond, 0
+  br i1 %tmp, label %bb, label %bb1
+
+bb:
+  br label %bb2
+
+bb1:
+  br label %bb2
+
+bb2:
+  %P = phi i32* [ @X, %bb ], [ @Y, %bb1 ]
+  %tmp1 = load i32* @Z, align 4
+  store i32 123, i32* %P, align 4
+  %tmp2 = load i32* @Z, align 4
+  br label %return
+
+return:
+  ret void
+}
diff --git a/final/test/Analysis/BasicAA/phi-and-select.ll b/final/test/Analysis/BasicAA/phi-and-select.ll
new file mode 100644
index 00000000000..9bc47ae44a9
--- /dev/null
+++ b/final/test/Analysis/BasicAA/phi-and-select.ll
@@ -0,0 +1,73 @@
+; RUN: opt < %s -basicaa -aa-eval -print-all-alias-modref-info -disable-output \
+; RUN:   |& grep {NoAlias:	double\\* \[%\]a, double\\* \[%\]b\$} | count 4
+
+; BasicAA should detect NoAliases in PHIs and Selects.
+
+; Two PHIs in the same block.
+define void @foo(i1 %m, double* noalias %x, double* noalias %y) {
+entry:
+  br i1 %m, label %true, label %false
+
+true:
+  br label %exit
+
+false:
+  br label %exit
+
+exit:
+  %a = phi double* [ %x, %true ], [ %y, %false ]
+  %b = phi double* [ %x, %false ], [ %y, %true ]
+  volatile store double 0.0, double* %a
+  volatile store double 1.0, double* %b
+  ret void
+}
+
+; Two selects with the same condition.
+define void @bar(i1 %m, double* noalias %x, double* noalias %y) {
+entry:
+  %a = select i1 %m, double* %x, double* %y
+  %b = select i1 %m, double* %y, double* %x
+  volatile store double 0.000000e+00, double* %a
+  volatile store double 1.000000e+00, double* %b
+  ret void
+}
+
+; Two PHIs with disjoint sets of inputs.
+define void @qux(i1 %m, double* noalias %x, double* noalias %y,
+                 i1 %n, double* noalias %v, double* noalias %w) {
+entry:
+  br i1 %m, label %true, label %false
+
+true:
+  br label %exit
+
+false:
+  br label %exit
+
+exit:
+  %a = phi double* [ %x, %true ], [ %y, %false ]
+  br i1 %n, label %ntrue, label %nfalse
+
+ntrue:
+  br label %nexit
+
+nfalse:
+  br label %nexit
+
+nexit:
+  %b = phi double* [ %v, %ntrue ], [ %w, %nfalse ]
+  volatile store double 0.0, double* %a
+  volatile store double 1.0, double* %b
+  ret void
+}
+
+; Two selects with disjoint sets of arms.
+define void @fin(i1 %m, double* noalias %x, double* noalias %y,
+                 i1 %n, double* noalias %v, double* noalias %w) {
+entry:
+  %a = select i1 %m, double* %x, double* %y
+  %b = select i1 %n, double* %v, double* %w
+  volatile store double 0.000000e+00, double* %a
+  volatile store double 1.000000e+00, double* %b
+  ret void
+}
diff --git a/final/test/Analysis/BasicAA/pure-const-dce.ll b/final/test/Analysis/BasicAA/pure-const-dce.ll
new file mode 100644
index 00000000000..54e6e79a5e5
--- /dev/null
+++ b/final/test/Analysis/BasicAA/pure-const-dce.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -basicaa -gvn -S | grep TestConst | count 2
+; RUN: opt < %s -basicaa -gvn -S | grep TestPure  | count 3
+; RUN: opt < %s -basicaa -gvn -S | grep TestNone  | count 4
+@g = global i32 0		; <i32*> [#uses=1]
+
+define i32 @test() {
+entry:
+	%tmp0 = call i32 @TestConst( i32 5 ) readnone 		; <i32> [#uses=1]
+	%tmp1 = call i32 @TestPure( i32 6 ) readonly 		; <i32> [#uses=1]
+	%tmp2 = call i32 @TestNone( i32 7 )		; <i32> [#uses=1]
+	store i32 1, i32* @g
+	%tmp3 = call i32 @TestConst( i32 5 ) readnone 		; <i32> [#uses=1]
+	%tmp4 = call i32 @TestConst( i32 5 ) readnone 		; <i32> [#uses=1]
+	%tmp5 = call i32 @TestPure( i32 6 ) readonly 		; <i32> [#uses=1]
+	%tmp6 = call i32 @TestPure( i32 6 ) readonly 		; <i32> [#uses=1]
+	%tmp7 = call i32 @TestNone( i32 7 )		; <i32> [#uses=1]
+	%tmp8 = call i32 @TestNone( i32 7 )		; <i32> [#uses=1]
+	%sum0 = add i32 %tmp0, %tmp1		; <i32> [#uses=1]
+	%sum1 = add i32 %sum0, %tmp2		; <i32> [#uses=1]
+	%sum2 = add i32 %sum1, %tmp3		; <i32> [#uses=1]
+	%sum3 = add i32 %sum2, %tmp4		; <i32> [#uses=1]
+	%sum4 = add i32 %sum3, %tmp5		; <i32> [#uses=1]
+	%sum5 = add i32 %sum4, %tmp6		; <i32> [#uses=1]
+	%sum6 = add i32 %sum5, %tmp7		; <i32> [#uses=1]
+	%sum7 = add i32 %sum6, %tmp8		; <i32> [#uses=1]
+	ret i32 %sum7
+}
+
+declare i32 @TestConst(i32) readnone
+
+declare i32 @TestPure(i32) readonly
+
+declare i32 @TestNone(i32)
diff --git a/final/test/Analysis/BasicAA/store-promote.ll b/final/test/Analysis/BasicAA/store-promote.ll
new file mode 100644
index 00000000000..33d0f3a5449
--- /dev/null
+++ b/final/test/Analysis/BasicAA/store-promote.ll
@@ -0,0 +1,54 @@
+; Test that LICM uses basicaa to do alias analysis, which is capable of 
+; disambiguating some obvious cases.  If LICM is able to disambiguate the
+; two pointers, then the load should be hoisted, and the store sunk.
+
+; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+@A = global i32 7               ; <i32*> [#uses=3]
+@B = global i32 8               ; <i32*> [#uses=2]
+@C = global [2 x i32] [ i32 4, i32 8 ]          ; <[2 x i32]*> [#uses=2]
+
+define i32 @test1(i1 %c) {
+        %Atmp = load i32* @A            ; <i32> [#uses=2]
+        br label %Loop
+
+Loop:           ; preds = %Loop, %0
+        %ToRemove = load i32* @A                ; <i32> [#uses=1]
+        store i32 %Atmp, i32* @B
+        br i1 %c, label %Out, label %Loop
+
+Out:            ; preds = %Loop
+        %X = sub i32 %ToRemove, %Atmp           ; <i32> [#uses=1]
+        ret i32 %X
+        
+; The Loop block should be empty after the load/store are promoted.
+; CHECK:     @test1
+; CHECK:        load i32* @B
+; CHECK:      Loop:
+; CHECK-NEXT:   br i1 %c, label %Out, label %Loop
+; CHECK:      Out:
+; CHECK:        store i32 %Atmp, i32* @B
+}
+
+define i32 @test2(i1 %c) {
+        br label %Loop
+
+Loop:           ; preds = %Loop, %0
+        %AVal = load i32* @A            ; <i32> [#uses=2]
+        %C0 = getelementptr [2 x i32]* @C, i64 0, i64 0         ; <i32*> [#uses=1]
+        store i32 %AVal, i32* %C0
+        %BVal = load i32* @B            ; <i32> [#uses=2]
+        %C1 = getelementptr [2 x i32]* @C, i64 0, i64 1         ; <i32*> [#uses=1]
+        store i32 %BVal, i32* %C1
+        br i1 %c, label %Out, label %Loop
+
+Out:            ; preds = %Loop
+        %X = sub i32 %AVal, %BVal               ; <i32> [#uses=1]
+        ret i32 %X
+; The Loop block should be empty after the load/store are promoted.
+; CHECK:     @test2
+; CHECK:      Loop:
+; CHECK-NEXT:   br i1 %c, label %Out, label %Loop
+}
+
diff --git a/final/test/Analysis/BasicAA/tailcall-modref.ll b/final/test/Analysis/BasicAA/tailcall-modref.ll
new file mode 100644
index 00000000000..f7d6c57c1bc
--- /dev/null
+++ b/final/test/Analysis/BasicAA/tailcall-modref.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -basicaa -gvn -instcombine |\
+; RUN:   llvm-dis | grep {ret i32 0}
+
+declare void @foo(i32*)
+
+declare void @bar()
+
+define i32 @test() {
+        %A = alloca i32         ; <i32*> [#uses=3]
+        call void @foo( i32* %A )
+        %X = load i32* %A               ; <i32> [#uses=1]
+        tail call void @bar( )
+        %Y = load i32* %A               ; <i32> [#uses=1]
+        %Z = sub i32 %X, %Y             ; <i32> [#uses=1]
+        ret i32 %Z
+}
diff --git a/final/test/Analysis/BasicAA/unreachable-block.ll b/final/test/Analysis/BasicAA/unreachable-block.ll
new file mode 100644
index 00000000000..1ca1e66f894
--- /dev/null
+++ b/final/test/Analysis/BasicAA/unreachable-block.ll
@@ -0,0 +1,16 @@
+; RUN: opt -basicaa -aa-eval -disable-output < %s >& /dev/null
+
+; BasicAA shouldn't infinitely recurse on the use-def cycles in
+; unreachable code.
+
+define void @func_2() nounwind {
+entry:
+  unreachable
+
+bb:
+  %t = select i1 undef, i32* %t, i32* undef
+  %p = select i1 undef, i32* %p, i32* %p
+  %q = select i1 undef, i32* undef, i32* %p
+  %a = getelementptr i8* %a, i32 0
+  unreachable
+}
diff --git a/final/test/Analysis/CallGraph/2008-09-09-DirectCall.ll b/final/test/Analysis/CallGraph/2008-09-09-DirectCall.ll
new file mode 100644
index 00000000000..784d6c7ba1b
--- /dev/null
+++ b/final/test/Analysis/CallGraph/2008-09-09-DirectCall.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -print-callgraph -disable-output |& \
+; RUN:   grep {calls function 'callee'} | count 2
+
+define internal void @callee(...) {
+entry:
+	unreachable
+}
+
+define void @caller() {
+entry:
+	call void (...)* @callee( void (...)* @callee )
+	unreachable
+}
diff --git a/final/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll b/final/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll
new file mode 100644
index 00000000000..0c5ef92ef71
--- /dev/null
+++ b/final/test/Analysis/CallGraph/2008-09-09-UsedByGlobal.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -print-callgraph -disable-output |& grep {calls function}
+
+@a = global void ()* @f		; <void ()**> [#uses=0]
+
+define internal void @f() {
+	unreachable
+}
diff --git a/final/test/Analysis/CallGraph/dg.exp b/final/test/Analysis/CallGraph/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Analysis/CallGraph/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll b/final/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll
new file mode 100644
index 00000000000..e31f4165b62
--- /dev/null
+++ b/final/test/Analysis/Dominators/2006-10-02-BreakCritEdges.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -domtree -break-crit-edges -analyze \
+; RUN:  -domtree | grep {3.*%brtrue }
+; PR932
+
+declare void @use1(i32)
+
+define void @f(i32 %i, i1 %c) {
+entry:
+	%A = icmp eq i32 %i, 0		; <i1> [#uses=1]
+	br i1 %A, label %brtrue, label %brfalse
+
+brtrue:		; preds = %brtrue, %entry
+	%B = phi i1 [ true, %brtrue ], [ false, %entry ]		; <i1> [#uses=1]
+	call void @use1( i32 %i )
+	br i1 %B, label %brtrue, label %brfalse
+
+brfalse:		; preds = %brtrue, %entry
+	call void @use1( i32 %i )
+	ret void
+}
diff --git a/final/test/Analysis/Dominators/2007-01-14-BreakCritEdges.ll b/final/test/Analysis/Dominators/2007-01-14-BreakCritEdges.ll
new file mode 100644
index 00000000000..96dc73929d1
--- /dev/null
+++ b/final/test/Analysis/Dominators/2007-01-14-BreakCritEdges.ll
@@ -0,0 +1,187 @@
+; RUN: opt < %s -domtree -break-crit-edges -domtree -disable-output
+; PR1110
+
+	%struct.OggVorbis_File = type { i8*, i32, i64, i64, %struct.ogg_sync_state, i32, i64*, i64*, i32*, i64*, %struct.vorbis_info*, %struct.vorbis_comment*, i64, i32, i32, i32, double, double, %struct.ogg_stream_state, %struct.vorbis_dsp_state, %struct.vorbis_block, %struct.ov_callbacks }
+	%struct.alloc_chain = type { i8*, %struct.alloc_chain* }
+	%struct.ogg_stream_state = type { i8*, i32, i32, i32, i32*, i64*, i32, i32, i32, i32, [282 x i8], i32, i32, i32, i32, i32, i64, i64 }
+	%struct.ogg_sync_state = type { i8*, i32, i32, i32, i32, i32, i32 }
+	%struct.oggpack_buffer = type { i32, i32, i8*, i8*, i32 }
+	%struct.ov_callbacks = type { i32 (i8*, i32, i32, i8*)*, i32 (i8*, i64, i32)*, i32 (i8*)*, i32 (i8*)* }
+	%struct.vorbis_block = type { float**, %struct.oggpack_buffer, i32, i32, i32, i32, i32, i32, i64, i64, %struct.vorbis_dsp_state*, i8*, i32, i32, i32, %struct.alloc_chain*, i32, i32, i32, i32, i8* }
+	%struct.vorbis_comment = type { i8**, i32*, i32, i8* }
+	%struct.vorbis_dsp_state = type { i32, %struct.vorbis_info*, float**, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
+	%struct.vorbis_info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+
+
+define void @ov_read() {
+entry:
+	br i1 false, label %bb, label %return
+
+bb:		; preds = %cond_next22, %entry
+	br i1 false, label %cond_true8, label %cond_next15
+
+cond_true8:		; preds = %bb
+	br i1 false, label %cond_next15, label %bb29
+
+cond_next15:		; preds = %cond_true8, %bb
+	br i1 false, label %return, label %cond_next22
+
+cond_next22:		; preds = %cond_next15
+	br i1 false, label %bb, label %return
+
+bb29:		; preds = %cond_true8
+	br i1 false, label %cond_true32, label %return
+
+cond_true32:		; preds = %bb29
+	br i1 false, label %cond_false37.i, label %cond_true.i11
+
+cond_true.i11:		; preds = %cond_true32
+	br i1 false, label %cond_true8.i, label %ov_info.exit
+
+cond_true8.i:		; preds = %cond_true.i11
+	br i1 false, label %cond_true44, label %cond_next48
+
+cond_false37.i:		; preds = %cond_true32
+	br label %ov_info.exit
+
+ov_info.exit:		; preds = %cond_false37.i, %cond_true.i11
+	br i1 false, label %cond_true44, label %cond_next48
+
+cond_true44:		; preds = %ov_info.exit, %cond_true8.i
+	br label %cond_next48
+
+cond_next48:		; preds = %cond_true44, %ov_info.exit, %cond_true8.i
+	br i1 false, label %cond_next53, label %return
+
+cond_next53:		; preds = %cond_next48
+	br i1 false, label %cond_true56, label %cond_false97
+
+cond_true56:		; preds = %cond_next53
+	br i1 false, label %bb85, label %cond_next304
+
+bb63:		; preds = %bb85
+	br i1 false, label %cond_next78, label %cond_false73
+
+cond_false73:		; preds = %bb63
+	br i1 false, label %cond_true76, label %cond_next78
+
+cond_true76:		; preds = %cond_false73
+	br label %cond_next78
+
+cond_next78:		; preds = %cond_true76, %cond_false73, %bb63
+	br label %bb85
+
+bb85:		; preds = %bb89, %cond_next78, %cond_true56
+	br i1 false, label %bb63, label %bb89
+
+bb89:		; preds = %bb85
+	br i1 false, label %bb85, label %cond_next304
+
+cond_false97:		; preds = %cond_next53
+	br i1 false, label %cond_true108, label %bb248
+
+cond_true108:		; preds = %cond_false97
+	br i1 false, label %bb196, label %bb149
+
+bb112:		; preds = %bb149, %bb146
+	br i1 false, label %bb119, label %bb146
+
+bb119:		; preds = %cond_next134, %bb112
+	br i1 false, label %cond_next134, label %cond_false129
+
+cond_false129:		; preds = %bb119
+	br i1 false, label %cond_true132, label %cond_next134
+
+cond_true132:		; preds = %cond_false129
+	br label %cond_next134
+
+cond_next134:		; preds = %cond_true132, %cond_false129, %bb119
+	br i1 false, label %bb119, label %bb146
+
+bb146:		; preds = %cond_next134, %bb112
+	br i1 false, label %bb112, label %cond_next304
+
+bb149:		; preds = %cond_true108
+	br i1 false, label %bb112, label %cond_next304
+
+bb155:		; preds = %bb196, %bb193
+	br i1 false, label %bb165, label %bb193
+
+bb165:		; preds = %cond_next180, %bb155
+	br i1 false, label %cond_next180, label %cond_false175
+
+cond_false175:		; preds = %bb165
+	br i1 false, label %cond_true178, label %cond_next180
+
+cond_true178:		; preds = %cond_false175
+	br label %cond_next180
+
+cond_next180:		; preds = %cond_true178, %cond_false175, %bb165
+	br i1 false, label %bb165, label %bb193
+
+bb193:		; preds = %cond_next180, %bb155
+	br i1 false, label %bb155, label %cond_next304
+
+bb196:		; preds = %cond_true108
+	br i1 false, label %bb155, label %cond_next304
+
+bb207:		; preds = %bb241
+	br i1 false, label %cond_next225, label %cond_false220
+
+cond_false220:		; preds = %bb207
+	br i1 false, label %cond_true223, label %cond_next225
+
+cond_true223:		; preds = %cond_false220
+	br label %cond_next225
+
+cond_next225:		; preds = %cond_true223, %cond_false220, %bb207
+	br label %bb241
+
+bb241:		; preds = %bb248, %bb245, %cond_next225
+	br i1 false, label %bb207, label %bb245
+
+bb245:		; preds = %bb241
+	br i1 false, label %bb241, label %cond_next304
+
+bb248:		; preds = %cond_false97
+	br i1 false, label %bb241, label %cond_next304
+
+bb256:		; preds = %bb290
+	br i1 false, label %cond_next274, label %cond_false269
+
+cond_false269:		; preds = %bb256
+	br i1 false, label %cond_true272, label %cond_next274
+
+cond_true272:		; preds = %cond_false269
+	br label %cond_next274
+
+cond_next274:		; preds = %cond_true272, %cond_false269, %bb256
+	br label %bb290
+
+bb290:		; preds = %bb294, %cond_next274
+	br i1 false, label %bb256, label %bb294
+
+bb294:		; preds = %bb290
+	br i1 false, label %bb290, label %cond_next304
+
+cond_next304:		; preds = %bb294, %bb248, %bb245, %bb196, %bb193, %bb149, %bb146, %bb89, %cond_true56
+	br i1 false, label %cond_next11.i, label %cond_true.i
+
+cond_true.i:		; preds = %cond_next304
+	br i1 false, label %vorbis_synthesis_read.exit, label %cond_next11.i
+
+cond_next11.i:		; preds = %cond_true.i, %cond_next304
+	br label %vorbis_synthesis_read.exit
+
+vorbis_synthesis_read.exit:		; preds = %cond_next11.i, %cond_true.i
+	br i1 false, label %cond_next321, label %cond_true316
+
+cond_true316:		; preds = %vorbis_synthesis_read.exit
+	ret void
+
+cond_next321:		; preds = %vorbis_synthesis_read.exit
+	ret void
+
+return:		; preds = %cond_next48, %bb29, %cond_next22, %cond_next15, %entry
+	ret void
+}
diff --git a/final/test/Analysis/Dominators/2007-07-11-SplitBlock.ll b/final/test/Analysis/Dominators/2007-07-11-SplitBlock.ll
new file mode 100644
index 00000000000..52fdd2b16db
--- /dev/null
+++ b/final/test/Analysis/Dominators/2007-07-11-SplitBlock.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -loop-rotate -loop-unswitch -disable-output
+
+define i32 @stringSearch_Clib(i32 %count) {
+entry:
+	br i1 false, label %bb36, label %bb44
+
+cond_true20:		; preds = %bb36
+	%tmp33 = add i32 0, 0		; <i32> [#uses=1]
+	br label %bb36
+
+bb36:		; preds = %cond_true20, %entry
+	%c.2 = phi i32 [ %tmp33, %cond_true20 ], [ 0, %entry ]		; <i32> [#uses=1]
+	br i1 false, label %cond_true20, label %bb41
+
+bb41:		; preds = %bb36
+	%c.2.lcssa = phi i32 [ %c.2, %bb36 ]		; <i32> [#uses=0]
+	ret i32 0
+
+bb44:		; preds = %entry
+	ret i32 0
+}
diff --git a/final/test/Analysis/Dominators/2007-07-12-SplitBlock.ll b/final/test/Analysis/Dominators/2007-07-12-SplitBlock.ll
new file mode 100644
index 00000000000..b46f0c75e10
--- /dev/null
+++ b/final/test/Analysis/Dominators/2007-07-12-SplitBlock.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -loop-rotate -licm -loop-unswitch -disable-output
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	br label %bb7
+
+bb7:		; preds = %bb7, %entry
+	%tmp54 = icmp slt i32 0, 2000000		; <i1> [#uses=1]
+	br i1 %tmp54, label %bb7, label %bb56
+
+bb56:		; preds = %bb7
+	ret i32 0
+}
diff --git a/final/test/Analysis/Dominators/dg.exp b/final/test/Analysis/Dominators/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Analysis/Dominators/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll b/final/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
new file mode 100644
index 00000000000..17ace8a950b
--- /dev/null
+++ b/final/test/Analysis/GlobalsModRef/2008-09-03-ReadGlobals.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -globalsmodref-aa -gvn -S | grep call | count 2
+
+@g = internal global i32 0		; <i32*> [#uses=2]
+
+define i32 @r() {
+	%tmp = load i32* @g		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define i32 @f() {
+entry:
+	%tmp = call i32 @e( )		; <i32> [#uses=1]
+	store i32 %tmp, i32* @g
+	%tmp2 = call i32 @e( )		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+declare i32 @e() readonly	; might call @r
diff --git a/final/test/Analysis/GlobalsModRef/aliastest.ll b/final/test/Analysis/GlobalsModRef/aliastest.ll
new file mode 100644
index 00000000000..75af4dc5b93
--- /dev/null
+++ b/final/test/Analysis/GlobalsModRef/aliastest.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | not grep load
+@X = internal global i32 4		; <i32*> [#uses=1]
+
+define i32 @test(i32* %P) {
+	store i32 7, i32* %P
+	store i32 12, i32* @X
+	%V = load i32* %P		; <i32> [#uses=1]
+	ret i32 %V
+}
diff --git a/final/test/Analysis/GlobalsModRef/chaining-analysis.ll b/final/test/Analysis/GlobalsModRef/chaining-analysis.ll
new file mode 100644
index 00000000000..431b2a68cf4
--- /dev/null
+++ b/final/test/Analysis/GlobalsModRef/chaining-analysis.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | not grep load
+
+; This test requires the use of previous analyses to determine that
+; doesnotmodX does not modify X (because 'sin' doesn't).
+
+@X = internal global i32 4		; <i32*> [#uses=2]
+
+declare double @sin(double) readnone
+
+define i32 @test(i32* %P) {
+	store i32 12, i32* @X
+	call double @doesnotmodX( double 1.000000e+00 )		; <double>:1 [#uses=0]
+	%V = load i32* @X		; <i32> [#uses=1]
+	ret i32 %V
+}
+
+define double @doesnotmodX(double %V) {
+	%V2 = call double @sin( double %V ) readnone		; <double> [#uses=1]
+	ret double %V2
+}
diff --git a/final/test/Analysis/GlobalsModRef/dg.exp b/final/test/Analysis/GlobalsModRef/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Analysis/GlobalsModRef/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Analysis/GlobalsModRef/indirect-global.ll b/final/test/Analysis/GlobalsModRef/indirect-global.ll
new file mode 100644
index 00000000000..1eab0bc2081
--- /dev/null
+++ b/final/test/Analysis/GlobalsModRef/indirect-global.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -instcombine -S | \
+; RUN:   grep {ret i32 0}
+
+@G = internal global i32* null		; <i32**> [#uses=3]
+
+define void @test() {
+	%A = malloc i32		; <i32*> [#uses=1]
+	store i32* %A, i32** @G
+	ret void
+}
+
+define i32 @test1(i32* %P) {
+	%g1 = load i32** @G		; <i32*> [#uses=2]
+	%h1 = load i32* %g1		; <i32> [#uses=1]
+	store i32 123, i32* %P
+	%g2 = load i32** @G		; <i32*> [#uses=0]
+	%h2 = load i32* %g1		; <i32> [#uses=1]
+	%X = sub i32 %h1, %h2		; <i32> [#uses=1]
+	ret i32 %X
+}
diff --git a/final/test/Analysis/GlobalsModRef/modreftest.ll b/final/test/Analysis/GlobalsModRef/modreftest.ll
new file mode 100644
index 00000000000..3a02a94a99b
--- /dev/null
+++ b/final/test/Analysis/GlobalsModRef/modreftest.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -basicaa -globalsmodref-aa -gvn -S | not grep load
+@X = internal global i32 4		; <i32*> [#uses=2]
+
+define i32 @test(i32* %P) {
+	store i32 12, i32* @X
+	call void @doesnotmodX( )
+	%V = load i32* @X		; <i32> [#uses=1]
+	ret i32 %V
+}
+
+define void @doesnotmodX() {
+	ret void
+}
diff --git a/final/test/Analysis/GlobalsModRef/purecse.ll b/final/test/Analysis/GlobalsModRef/purecse.ll
new file mode 100644
index 00000000000..994aff8d4c6
--- /dev/null
+++ b/final/test/Analysis/GlobalsModRef/purecse.ll
@@ -0,0 +1,23 @@
+; Test that pure functions are cse'd away
+; RUN: opt < %s -globalsmodref-aa -gvn -instcombine | \
+; RUN: llvm-dis | not grep sub
+
+define i32 @pure(i32 %X) {
+        %Y = add i32 %X, 1              ; <i32> [#uses=1]
+        ret i32 %Y
+}
+
+define i32 @test1(i32 %X) {
+        %A = call i32 @pure( i32 %X )           ; <i32> [#uses=1]
+        %B = call i32 @pure( i32 %X )           ; <i32> [#uses=1]
+        %C = sub i32 %A, %B             ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i32 @test2(i32 %X, i32* %P) {
+        %A = call i32 @pure( i32 %X )           ; <i32> [#uses=1]
+        store i32 %X, i32* %P ;; Does not invalidate 'pure' call.
+        %B = call i32 @pure( i32 %X )           ; <i32> [#uses=1]
+        %C = sub i32 %A, %B             ; <i32> [#uses=1]
+        ret i32 %C
+}
diff --git a/final/test/Analysis/LoopDependenceAnalysis/alias.ll b/final/test/Analysis/LoopDependenceAnalysis/alias.ll
new file mode 100644
index 00000000000..78d0bf4fee1
--- /dev/null
+++ b/final/test/Analysis/LoopDependenceAnalysis/alias.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s
+
+;; x[5] = x[6] // with x being a pointer passed as argument
+
+define void @f1(i32* nocapture %xptr) nounwind {
+entry:
+  %x.ld.addr = getelementptr i32* %xptr, i64 6
+  %x.st.addr = getelementptr i32* %xptr, i64 5
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %x = load i32* %x.ld.addr
+  store i32 %x, i32* %x.st.addr
+; CHECK: 0,1: dep
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; x[5] = x[6] // with x being an array on the stack
+
+define void @foo(...) nounwind {
+entry:
+  %xptr = alloca [256 x i32], align 4
+  %x.ld.addr = getelementptr [256 x i32]* %xptr, i64 0, i64 6
+  %x.st.addr = getelementptr [256 x i32]* %xptr, i64 0, i64 5
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %x = load i32* %x.ld.addr
+  store i32 %x, i32* %x.st.addr
+; CHECK: 0,1: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/final/test/Analysis/LoopDependenceAnalysis/dg.exp b/final/test/Analysis/LoopDependenceAnalysis/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Analysis/LoopDependenceAnalysis/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Analysis/LoopDependenceAnalysis/siv-strong.ll b/final/test/Analysis/LoopDependenceAnalysis/siv-strong.ll
new file mode 100644
index 00000000000..401e466d666
--- /dev/null
+++ b/final/test/Analysis/LoopDependenceAnalysis/siv-strong.ll
@@ -0,0 +1,110 @@
+; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s
+
+@x = common global [256 x i32] zeroinitializer, align 4
+@y = common global [256 x i32] zeroinitializer, align 4
+
+;; for (i = 0; i < 256; i++)
+;;   x[i] = x[i] + y[i]
+
+define void @f1(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %y.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
+  %x.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
+  %x = load i32* %x.addr      ; 0
+  %y = load i32* %y.addr      ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.addr  ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; for (i = 0; i < 256; i++)
+;;   x[i+1] = x[i] + y[i]
+
+define void @f2(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
+  %i.next = add i64 %i, 1
+  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.next
+  %x = load i32* %x.ld.addr     ; 0
+  %y = load i32* %y.ld.addr     ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.st.addr ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; for (i = 0; i < 10; i++)
+;;   x[i+20] = x[i] + y[i]
+
+define void @f3(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
+  %i.20 = add i64 %i, 20
+  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.20
+  %x = load i32* %x.ld.addr     ; 0
+  %y = load i32* %y.ld.addr     ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.st.addr ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; for (i = 0; i < 10; i++)
+;;   x[10*i+1] = x[10*i] + y[i]
+
+define void @f4(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %i.10 = mul i64 %i, 10
+  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i.10
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.10
+  %i.10.1 = add i64 %i.10, 1
+  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.10.1
+  %x = load i32* %x.ld.addr     ; 0
+  %y = load i32* %y.ld.addr     ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.st.addr ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 10
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/final/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll b/final/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll
new file mode 100644
index 00000000000..9d0128c5fec
--- /dev/null
+++ b/final/test/Analysis/LoopDependenceAnalysis/siv-weak-crossing.ll
@@ -0,0 +1,118 @@
+; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s
+
+@x = common global [256 x i32] zeroinitializer, align 4
+@y = common global [256 x i32] zeroinitializer, align 4
+
+;; for (i = 0; i < 256; i++)
+;;   x[i] = x[255 - i] + y[i]
+
+define void @f1(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %i.255 = sub i64 255, %i
+  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.255
+  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
+  %x = load i32* %x.ld.addr     ; 0
+  %y = load i32* %y.ld.addr     ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.st.addr ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; for (i = 0; i < 100; i++)
+;;   x[i] = x[255 - i] + y[i]
+
+define void @f2(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %i.255 = sub i64 255, %i
+  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.255
+  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
+  %x = load i32* %x.ld.addr     ; 0
+  %y = load i32* %y.ld.addr     ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.st.addr ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 100
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; // the first iteration (i=0) leads to an out-of-bounds access of x. as the
+;; // result of this access is undefined, _any_ dependence result is safe.
+;; for (i = 0; i < 256; i++)
+;;   x[i] = x[256 - i] + y[i]
+
+define void @f3(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %i.256 = sub i64 0, %i
+  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 1, i64 %i.256
+  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
+  %x = load i32* %x.ld.addr     ; 0
+  %y = load i32* %y.ld.addr     ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.st.addr ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2:
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; // slightly contrived but valid IR for the following loop, where all
+;; // accesses in all iterations are within bounds. while this example's first
+;; // (ZIV-)subscript is (0, 1), accesses are dependent.
+;; for (i = 1; i < 256; i++)
+;;   x[i] = x[256 - i] + y[i]
+
+define void @f4(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %i.1 = add i64 1, %i
+  %i.256 = sub i64 -1, %i
+  %y.ld.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i.1
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 1, i64 %i.256
+  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i.1
+  %x = load i32* %x.ld.addr     ; 0
+  %y = load i32* %y.ld.addr     ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.st.addr ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/final/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll b/final/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll
new file mode 100644
index 00000000000..1c5ae4c490e
--- /dev/null
+++ b/final/test/Analysis/LoopDependenceAnalysis/siv-weak-zero.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s
+
+@x = common global [256 x i32] zeroinitializer, align 4
+@y = common global [256 x i32] zeroinitializer, align 4
+
+;; for (i = 0; i < 256; i++)
+;;   x[i] = x[42] + y[i]
+
+define void @f1(...) nounwind {
+entry:
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 42
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %x.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
+  %y.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
+  %x = load i32* %x.ld.addr   ; 0
+  %y = load i32* %y.addr      ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.addr  ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; for (i = 0; i < 250; i++)
+;;   x[i] = x[255] + y[i]
+
+define void @f2(...) nounwind {
+entry:
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 255
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %x.addr = getelementptr [256 x i32]* @x, i64 0, i64 %i
+  %y.addr = getelementptr [256 x i32]* @y, i64 0, i64 %i
+  %x = load i32* %x.ld.addr   ; 0
+  %y = load i32* %y.addr      ; 1
+  %r = add i32 %y, %x
+  store i32 %r, i32* %x.addr  ; 2
+; CHECK: 0,2: dep
+; CHECK: 1,2: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 250
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/final/test/Analysis/LoopDependenceAnalysis/ziv.ll b/final/test/Analysis/LoopDependenceAnalysis/ziv.ll
new file mode 100644
index 00000000000..645ae7f152e
--- /dev/null
+++ b/final/test/Analysis/LoopDependenceAnalysis/ziv.ll
@@ -0,0 +1,63 @@
+; RUN: opt < %s -analyze -basicaa -lda | FileCheck %s
+
+@x = common global [256 x i32] zeroinitializer, align 4
+
+;; x[5] = x[6]
+
+define void @f1(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %x = load i32* getelementptr ([256 x i32]* @x, i32 0, i64 6)
+  store i32 %x, i32* getelementptr ([256 x i32]* @x, i32 0, i64 5)
+; CHECK: 0,1: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; x[c] = x[c+1] // with c being a loop-invariant constant
+
+define void @f2(i64 %c0) nounwind {
+entry:
+  %c1 = add i64 %c0, 1
+  %x.ld.addr = getelementptr [256 x i32]* @x, i64 0, i64 %c0
+  %x.st.addr = getelementptr [256 x i32]* @x, i64 0, i64 %c1
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %x = load i32* %x.ld.addr
+  store i32 %x, i32* %x.st.addr
+; CHECK: 0,1: ind
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+;; x[6] = x[6]
+
+define void @f3(...) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+  %x = load i32* getelementptr ([256 x i32]* @x, i32 0, i64 6)
+  store i32 %x, i32* getelementptr ([256 x i32]* @x, i32 0, i64 6)
+; CHECK: 0,1: dep
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/final/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll b/final/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
new file mode 100644
index 00000000000..9355aeea549
--- /dev/null
+++ b/final/test/Analysis/LoopInfo/2003-05-15-NestingProblem.ll
@@ -0,0 +1,30 @@
+; This testcase was incorrectly computing that the loopentry.7 loop was
+; not a child of the loopentry.6 loop.
+;
+; RUN: opt < %s -analyze -loops | \
+; RUN:   grep {^            Loop at depth 4 containing: %loopentry.7<header><latch><exiting>}
+
+define void @getAndMoveToFrontDecode() {
+	br label %endif.2
+
+endif.2:		; preds = %loopexit.5, %0
+	br i1 false, label %loopentry.5, label %UnifiedExitNode
+
+loopentry.5:		; preds = %loopexit.6, %endif.2
+	br i1 false, label %loopentry.6, label %UnifiedExitNode
+
+loopentry.6:		; preds = %loopentry.7, %loopentry.5
+	br i1 false, label %loopentry.7, label %loopexit.6
+
+loopentry.7:		; preds = %loopentry.7, %loopentry.6
+	br i1 false, label %loopentry.7, label %loopentry.6
+
+loopexit.6:		; preds = %loopentry.6
+	br i1 false, label %loopentry.5, label %loopexit.5
+
+loopexit.5:		; preds = %loopexit.6
+	br i1 false, label %endif.2, label %UnifiedExitNode
+
+UnifiedExitNode:		; preds = %loopexit.5, %loopentry.5, %endif.2
+	ret void
+}
diff --git a/final/test/Analysis/LoopInfo/dg.exp b/final/test/Analysis/LoopInfo/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Analysis/LoopInfo/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll b/final/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll
new file mode 100644
index 00000000000..b73b7f03f7e
--- /dev/null
+++ b/final/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll
@@ -0,0 +1,97 @@
+; RUN: opt < %s -analyze -postdomfrontier \
+; RUN:   -disable-verify
+; ModuleID = '2006-09-26-PostDominanceFrontier.bc'
+target datalayout = "e-p:64:64"
+target triple = "alphaev67-unknown-linux-gnu"
+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [44 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+@TOP = external global i64*		; <i64**> [#uses=1]
+@BOT = external global i64*		; <i64**> [#uses=1]
+@str = external global [2 x i8]		; <[2 x i8]*> [#uses=0]
+
+declare void @fopen()
+
+define void @main(i8** %argv) {
+entry:
+	%netSelect.i507 = alloca i64, align 8		; <i64*> [#uses=0]
+	%topStart.i = alloca i64, align 8		; <i64*> [#uses=0]
+	%topEnd.i = alloca i64, align 8		; <i64*> [#uses=0]
+	%botStart.i = alloca i64, align 8		; <i64*> [#uses=0]
+	%botEnd.i = alloca i64, align 8		; <i64*> [#uses=0]
+	%c1.i154 = alloca i32, align 4		; <i32*> [#uses=0]
+	%b1.i155 = alloca i32, align 4		; <i32*> [#uses=0]
+	%t1.i156 = alloca i32, align 4		; <i32*> [#uses=0]
+	%c1.i = alloca i32, align 4		; <i32*> [#uses=0]
+	%b1.i = alloca i32, align 4		; <i32*> [#uses=0]
+	%t1.i = alloca i32, align 4		; <i32*> [#uses=0]
+	%netSelect.i5 = alloca i64, align 8		; <i64*> [#uses=0]
+	%netSelect.i = alloca i64, align 8		; <i64*> [#uses=0]
+	%tmp2.i = getelementptr i8** %argv, i32 1		; <i8**> [#uses=1]
+	%tmp3.i4 = load i8** %tmp2.i		; <i8*> [#uses=0]
+	call void @fopen( )
+	br i1 false, label %DimensionChannel.exit, label %bb.backedge.i
+
+bb.backedge.i:		; preds = %entry
+	ret void
+
+DimensionChannel.exit:		; preds = %entry
+	%tmp13.i137 = malloc i64, i32 0		; <i64*> [#uses=1]
+	%tmp610.i = malloc i64, i32 0		; <i64*> [#uses=1]
+	br label %cond_true.i143
+
+cond_true.i143:		; preds = %cond_true.i143, %DimensionChannel.exit
+	%tmp9.i140 = getelementptr i64* %tmp13.i137, i64 0		; <i64*> [#uses=0]
+	%tmp12.i = getelementptr i64* %tmp610.i, i64 0		; <i64*> [#uses=0]
+	br i1 false, label %bb18.i144, label %cond_true.i143
+
+bb18.i144:		; preds = %cond_true.i143
+	call void @fopen( )
+	%tmp76.i105 = malloc i64, i32 0		; <i64*> [#uses=3]
+	%tmp674.i = malloc i64, i32 0		; <i64*> [#uses=2]
+	%tmp1072.i = malloc i64, i32 0		; <i64*> [#uses=2]
+	%tmp1470.i = malloc i64, i32 0		; <i64*> [#uses=1]
+	br label %cond_true.i114
+
+cond_true.i114:		; preds = %cond_true.i114, %bb18.i144
+	%tmp17.i108 = getelementptr i64* %tmp76.i105, i64 0		; <i64*> [#uses=0]
+	%tmp20.i = getelementptr i64* %tmp674.i, i64 0		; <i64*> [#uses=0]
+	%tmp23.i111 = getelementptr i64* %tmp1470.i, i64 0		; <i64*> [#uses=0]
+	br i1 false, label %cond_true40.i, label %cond_true.i114
+
+cond_true40.i:		; preds = %cond_true40.i, %cond_true.i114
+	%tmp33.i115 = getelementptr i64* %tmp1072.i, i64 0		; <i64*> [#uses=0]
+	br i1 false, label %bb142.i, label %cond_true40.i
+
+cond_next54.i:		; preds = %cond_true76.i
+	%tmp57.i = getelementptr i64* %tmp55.i, i64 0		; <i64*> [#uses=0]
+	br i1 false, label %bb64.i, label %bb69.i
+
+bb64.i:		; preds = %cond_true76.i, %cond_next54.i
+	%tmp67.i117 = getelementptr i64* %tmp76.i105, i64 0		; <i64*> [#uses=0]
+	br i1 false, label %bb114.i, label %cond_true111.i
+
+bb69.i:		; preds = %cond_next54.i
+	br i1 false, label %bb79.i, label %cond_true76.i
+
+cond_true76.i:		; preds = %bb142.i, %bb69.i
+	%tmp48.i = getelementptr i64* %tmp46.i, i64 0		; <i64*> [#uses=0]
+	br i1 false, label %bb64.i, label %cond_next54.i
+
+bb79.i:		; preds = %bb69.i
+	br i1 false, label %bb114.i, label %cond_true111.i
+
+cond_true111.i:		; preds = %bb79.i, %bb64.i
+	%tmp84.i127 = getelementptr i64* %tmp46.i, i64 0		; <i64*> [#uses=0]
+	ret void
+
+bb114.i:		; preds = %bb142.i, %bb79.i, %bb64.i
+	%tmp117.i = getelementptr i64* %tmp76.i105, i64 0		; <i64*> [#uses=0]
+	%tmp132.i131 = getelementptr i64* %tmp674.i, i64 0		; <i64*> [#uses=0]
+	%tmp122.i = getelementptr i64* %tmp1072.i, i64 0		; <i64*> [#uses=0]
+	ret void
+
+bb142.i:		; preds = %cond_true40.i
+	%tmp46.i = load i64** @BOT		; <i64*> [#uses=2]
+	%tmp55.i = load i64** @TOP		; <i64*> [#uses=1]
+	br i1 false, label %bb114.i, label %cond_true76.i
+}
diff --git a/final/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll b/final/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll
new file mode 100644
index 00000000000..1ec056bc34e
--- /dev/null
+++ b/final/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll
@@ -0,0 +1,692 @@
+; RUN: opt < %s -postdomfrontier -disable-output
+
+define void @SManager() {
+entry:
+	br label %bb.outer
+
+bb.outer:		; preds = %bb193, %entry
+	br label %bb.outer156
+
+bb.loopexit:		; preds = %bb442
+	br label %bb.outer156
+
+bb.outer156:		; preds = %bb.loopexit, %bb.outer
+	br label %bb
+
+bb:		; preds = %bb.backedge, %bb.outer156
+	br i1 false, label %cond_true, label %bb.cond_next_crit_edge
+
+bb.cond_next_crit_edge:		; preds = %bb
+	br label %cond_next
+
+cond_true:		; preds = %bb
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %bb.cond_next_crit_edge
+	br i1 false, label %cond_next.bb.backedge_crit_edge, label %cond_next107
+
+cond_next.bb.backedge_crit_edge:		; preds = %cond_next
+	br label %bb.backedge
+
+bb.backedge:		; preds = %cond_true112.bb.backedge_crit_edge, %cond_next.bb.backedge_crit_edge
+	br label %bb
+
+cond_next107:		; preds = %cond_next
+	br i1 false, label %cond_true112, label %cond_next197
+
+cond_true112:		; preds = %cond_next107
+	br i1 false, label %cond_true118, label %cond_true112.bb.backedge_crit_edge
+
+cond_true112.bb.backedge_crit_edge:		; preds = %cond_true112
+	br label %bb.backedge
+
+cond_true118:		; preds = %cond_true112
+	br i1 false, label %bb123.preheader, label %cond_true118.bb148_crit_edge
+
+cond_true118.bb148_crit_edge:		; preds = %cond_true118
+	br label %bb148
+
+bb123.preheader:		; preds = %cond_true118
+	br label %bb123
+
+bb123:		; preds = %bb142.bb123_crit_edge, %bb123.preheader
+	br i1 false, label %bb123.bb142_crit_edge, label %cond_next.i57
+
+bb123.bb142_crit_edge:		; preds = %bb123
+	br label %bb142
+
+cond_next.i57:		; preds = %bb123
+	br i1 false, label %cond_true135, label %cond_next.i57.bb142_crit_edge
+
+cond_next.i57.bb142_crit_edge:		; preds = %cond_next.i57
+	br label %bb142
+
+cond_true135:		; preds = %cond_next.i57
+	br label %bb142
+
+bb142:		; preds = %cond_true135, %cond_next.i57.bb142_crit_edge, %bb123.bb142_crit_edge
+	br i1 false, label %bb148.loopexit, label %bb142.bb123_crit_edge
+
+bb142.bb123_crit_edge:		; preds = %bb142
+	br label %bb123
+
+bb148.loopexit:		; preds = %bb142
+	br label %bb148
+
+bb148:		; preds = %bb148.loopexit, %cond_true118.bb148_crit_edge
+	br i1 false, label %bb151.preheader, label %bb148.bb177_crit_edge
+
+bb148.bb177_crit_edge:		; preds = %bb148
+	br label %bb177
+
+bb151.preheader:		; preds = %bb148
+	br label %bb151
+
+bb151:		; preds = %bb171.bb151_crit_edge, %bb151.preheader
+	br i1 false, label %bb151.bb171_crit_edge, label %cond_next.i49
+
+bb151.bb171_crit_edge:		; preds = %bb151
+	br label %bb171
+
+cond_next.i49:		; preds = %bb151
+	br i1 false, label %cond_true164, label %cond_next.i49.bb171_crit_edge
+
+cond_next.i49.bb171_crit_edge:		; preds = %cond_next.i49
+	br label %bb171
+
+cond_true164:		; preds = %cond_next.i49
+	br label %bb171
+
+bb171:		; preds = %cond_true164, %cond_next.i49.bb171_crit_edge, %bb151.bb171_crit_edge
+	br i1 false, label %bb177.loopexit, label %bb171.bb151_crit_edge
+
+bb171.bb151_crit_edge:		; preds = %bb171
+	br label %bb151
+
+bb177.loopexit:		; preds = %bb171
+	br label %bb177
+
+bb177:		; preds = %bb177.loopexit, %bb148.bb177_crit_edge
+	br i1 false, label %bb180.preheader, label %bb177.bb193_crit_edge
+
+bb177.bb193_crit_edge:		; preds = %bb177
+	br label %bb193
+
+bb180.preheader:		; preds = %bb177
+	br label %bb180
+
+bb180:		; preds = %bb180.bb180_crit_edge, %bb180.preheader
+	br i1 false, label %bb193.loopexit, label %bb180.bb180_crit_edge
+
+bb180.bb180_crit_edge:		; preds = %bb180
+	br label %bb180
+
+bb193.loopexit:		; preds = %bb180
+	br label %bb193
+
+bb193:		; preds = %bb193.loopexit, %bb177.bb193_crit_edge
+	br label %bb.outer
+
+cond_next197:		; preds = %cond_next107
+	br i1 false, label %cond_next210, label %cond_true205
+
+cond_true205:		; preds = %cond_next197
+	br i1 false, label %cond_true205.bb213_crit_edge, label %cond_true205.bb299_crit_edge
+
+cond_true205.bb299_crit_edge:		; preds = %cond_true205
+	br label %bb299
+
+cond_true205.bb213_crit_edge:		; preds = %cond_true205
+	br label %bb213
+
+cond_next210:		; preds = %cond_next197
+	br label %bb293
+
+bb213:		; preds = %bb293.bb213_crit_edge, %cond_true205.bb213_crit_edge
+	br i1 false, label %bb213.cond_next290_crit_edge, label %cond_true248
+
+bb213.cond_next290_crit_edge:		; preds = %bb213
+	br label %cond_next290
+
+cond_true248:		; preds = %bb213
+	br i1 false, label %cond_true248.cond_next290_crit_edge, label %cond_true255
+
+cond_true248.cond_next290_crit_edge:		; preds = %cond_true248
+	br label %cond_next290
+
+cond_true255:		; preds = %cond_true248
+	br i1 false, label %cond_true266, label %cond_true255.cond_next271_crit_edge
+
+cond_true255.cond_next271_crit_edge:		; preds = %cond_true255
+	br label %cond_next271
+
+cond_true266:		; preds = %cond_true255
+	br label %cond_next271
+
+cond_next271:		; preds = %cond_true266, %cond_true255.cond_next271_crit_edge
+	br label %cond_next290
+
+cond_next290:		; preds = %cond_next271, %cond_true248.cond_next290_crit_edge, %bb213.cond_next290_crit_edge
+	br label %bb293
+
+bb293:		; preds = %cond_next290, %cond_next210
+	br i1 false, label %bb293.bb213_crit_edge, label %bb293.bb299_crit_edge
+
+bb293.bb299_crit_edge:		; preds = %bb293
+	br label %bb299
+
+bb293.bb213_crit_edge:		; preds = %bb293
+	br label %bb213
+
+bb299:		; preds = %bb293.bb299_crit_edge, %cond_true205.bb299_crit_edge
+	br i1 false, label %bb302.preheader, label %bb299.bb390_crit_edge
+
+bb299.bb390_crit_edge:		; preds = %bb299
+	br label %bb390
+
+bb302.preheader:		; preds = %bb299
+	br label %bb302
+
+bb302:		; preds = %bb384.bb302_crit_edge, %bb302.preheader
+	br i1 false, label %bb302.bb384_crit_edge, label %cond_true339
+
+bb302.bb384_crit_edge:		; preds = %bb302
+	br label %bb384
+
+cond_true339:		; preds = %bb302
+	br i1 false, label %cond_true339.bb384_crit_edge, label %cond_true346
+
+cond_true339.bb384_crit_edge:		; preds = %cond_true339
+	br label %bb384
+
+cond_true346:		; preds = %cond_true339
+	br i1 false, label %cond_true357, label %cond_true346.cond_next361_crit_edge
+
+cond_true346.cond_next361_crit_edge:		; preds = %cond_true346
+	br label %cond_next361
+
+cond_true357:		; preds = %cond_true346
+	br label %cond_next361
+
+cond_next361:		; preds = %cond_true357, %cond_true346.cond_next361_crit_edge
+	br label %bb384
+
+bb384:		; preds = %cond_next361, %cond_true339.bb384_crit_edge, %bb302.bb384_crit_edge
+	br i1 false, label %bb390.loopexit, label %bb384.bb302_crit_edge
+
+bb384.bb302_crit_edge:		; preds = %bb384
+	br label %bb302
+
+bb390.loopexit:		; preds = %bb384
+	br label %bb390
+
+bb390:		; preds = %bb390.loopexit, %bb299.bb390_crit_edge
+	br i1 false, label %bb391.preheader, label %bb390.bb442.preheader_crit_edge
+
+bb390.bb442.preheader_crit_edge:		; preds = %bb390
+	br label %bb442.preheader
+
+bb391.preheader:		; preds = %bb390
+	br label %bb391
+
+bb391:		; preds = %bb413.bb391_crit_edge, %bb391.preheader
+	br i1 false, label %bb391.bb413_crit_edge, label %cond_next404
+
+bb391.bb413_crit_edge:		; preds = %bb391
+	br label %bb413
+
+cond_next404:		; preds = %bb391
+	br i1 false, label %cond_next404.HWrite.exit_crit_edge, label %cond_next.i13
+
+cond_next404.HWrite.exit_crit_edge:		; preds = %cond_next404
+	br label %HWrite.exit
+
+cond_next.i13:		; preds = %cond_next404
+	br i1 false, label %cond_next.i13.cond_next13.i_crit_edge, label %cond_true12.i
+
+cond_next.i13.cond_next13.i_crit_edge:		; preds = %cond_next.i13
+	br label %cond_next13.i
+
+cond_true12.i:		; preds = %cond_next.i13
+	br label %cond_next13.i
+
+cond_next13.i:		; preds = %cond_true12.i, %cond_next.i13.cond_next13.i_crit_edge
+	br i1 false, label %cond_next13.i.bb.i22_crit_edge, label %cond_next43.i
+
+cond_next13.i.bb.i22_crit_edge:		; preds = %cond_next13.i
+	br label %bb.i22
+
+cond_next43.i:		; preds = %cond_next13.i
+	br i1 false, label %cond_next43.i.bb.i22_crit_edge, label %bb60.i
+
+cond_next43.i.bb.i22_crit_edge:		; preds = %cond_next43.i
+	br label %bb.i22
+
+bb.i22:		; preds = %cond_next43.i.bb.i22_crit_edge, %cond_next13.i.bb.i22_crit_edge
+	br label %bb413
+
+bb60.i:		; preds = %cond_next43.i
+	br i1 false, label %bb60.i.HWrite.exit_crit_edge, label %cond_true81.i
+
+bb60.i.HWrite.exit_crit_edge:		; preds = %bb60.i
+	br label %HWrite.exit
+
+cond_true81.i:		; preds = %bb60.i
+	br label %bb413
+
+HWrite.exit:		; preds = %bb60.i.HWrite.exit_crit_edge, %cond_next404.HWrite.exit_crit_edge
+	br label %bb413
+
+bb413:		; preds = %HWrite.exit, %cond_true81.i, %bb.i22, %bb391.bb413_crit_edge
+	br i1 false, label %bb442.preheader.loopexit, label %bb413.bb391_crit_edge
+
+bb413.bb391_crit_edge:		; preds = %bb413
+	br label %bb391
+
+bb442.preheader.loopexit:		; preds = %bb413
+	br label %bb442.preheader
+
+bb442.preheader:		; preds = %bb442.preheader.loopexit, %bb390.bb442.preheader_crit_edge
+	br label %bb442.outer
+
+bb420:		; preds = %bb442
+	br i1 false, label %bb439.loopexit, label %cond_next433
+
+cond_next433:		; preds = %bb420
+	br i1 false, label %cond_next433.HRead.exit.loopexit_crit_edge, label %cond_next.i
+
+cond_next433.HRead.exit.loopexit_crit_edge:		; preds = %cond_next433
+	br label %HRead.exit.loopexit
+
+cond_next.i:		; preds = %cond_next433
+	br i1 false, label %cond_true9.i, label %cond_false223.i
+
+cond_true9.i:		; preds = %cond_next.i
+	switch i32 0, label %cond_false.i [
+		 i32 1, label %cond_true9.i.cond_true15.i_crit_edge
+		 i32 5, label %cond_true9.i.cond_true15.i_crit_edge9
+	]
+
+cond_true9.i.cond_true15.i_crit_edge9:		; preds = %cond_true9.i
+	br label %cond_true15.i
+
+cond_true9.i.cond_true15.i_crit_edge:		; preds = %cond_true9.i
+	br label %cond_true15.i
+
+cond_true15.i:		; preds = %cond_true9.i.cond_true15.i_crit_edge, %cond_true9.i.cond_true15.i_crit_edge9
+	br i1 false, label %cond_true15.i.cond_true44.i_crit_edge, label %cond_true15.i.cond_false49.i_crit_edge
+
+cond_true15.i.cond_false49.i_crit_edge:		; preds = %cond_true15.i
+	br label %cond_false49.i
+
+cond_true15.i.cond_true44.i_crit_edge:		; preds = %cond_true15.i
+	br label %cond_true44.i
+
+cond_false.i:		; preds = %cond_true9.i
+	br i1 false, label %cond_false.i.cond_next39.i_crit_edge, label %cond_true30.i
+
+cond_false.i.cond_next39.i_crit_edge:		; preds = %cond_false.i
+	br label %cond_next39.i
+
+cond_true30.i:		; preds = %cond_false.i
+	br label %cond_next39.i
+
+cond_next39.i:		; preds = %cond_true30.i, %cond_false.i.cond_next39.i_crit_edge
+	br i1 false, label %cond_next39.i.cond_true44.i_crit_edge, label %cond_next39.i.cond_false49.i_crit_edge
+
+cond_next39.i.cond_false49.i_crit_edge:		; preds = %cond_next39.i
+	br label %cond_false49.i
+
+cond_next39.i.cond_true44.i_crit_edge:		; preds = %cond_next39.i
+	br label %cond_true44.i
+
+cond_true44.i:		; preds = %cond_next39.i.cond_true44.i_crit_edge, %cond_true15.i.cond_true44.i_crit_edge
+	br i1 false, label %cond_true44.i.cond_next70.i_crit_edge, label %cond_true44.i.cond_true61.i_crit_edge
+
+cond_true44.i.cond_true61.i_crit_edge:		; preds = %cond_true44.i
+	br label %cond_true61.i
+
+cond_true44.i.cond_next70.i_crit_edge:		; preds = %cond_true44.i
+	br label %cond_next70.i
+
+cond_false49.i:		; preds = %cond_next39.i.cond_false49.i_crit_edge, %cond_true15.i.cond_false49.i_crit_edge
+	br i1 false, label %cond_false49.i.cond_next70.i_crit_edge, label %cond_false49.i.cond_true61.i_crit_edge
+
+cond_false49.i.cond_true61.i_crit_edge:		; preds = %cond_false49.i
+	br label %cond_true61.i
+
+cond_false49.i.cond_next70.i_crit_edge:		; preds = %cond_false49.i
+	br label %cond_next70.i
+
+cond_true61.i:		; preds = %cond_false49.i.cond_true61.i_crit_edge, %cond_true44.i.cond_true61.i_crit_edge
+	br i1 false, label %cond_true61.i.cond_next70.i_crit_edge, label %cond_true67.i
+
+cond_true61.i.cond_next70.i_crit_edge:		; preds = %cond_true61.i
+	br label %cond_next70.i
+
+cond_true67.i:		; preds = %cond_true61.i
+	br label %cond_next70.i
+
+cond_next70.i:		; preds = %cond_true67.i, %cond_true61.i.cond_next70.i_crit_edge, %cond_false49.i.cond_next70.i_crit_edge, %cond_true44.i.cond_next70.i_crit_edge
+	br i1 false, label %cond_true77.i, label %cond_next81.i
+
+cond_true77.i:		; preds = %cond_next70.i
+	br label %bb442.outer.backedge
+
+cond_next81.i:		; preds = %cond_next70.i
+	br i1 false, label %cond_true87.i, label %cond_false94.i
+
+cond_true87.i:		; preds = %cond_next81.i
+	br i1 false, label %cond_true87.i.cond_true130.i_crit_edge, label %cond_true87.i.cond_next135.i_crit_edge
+
+cond_true87.i.cond_next135.i_crit_edge:		; preds = %cond_true87.i
+	br label %cond_next135.i
+
+cond_true87.i.cond_true130.i_crit_edge:		; preds = %cond_true87.i
+	br label %cond_true130.i
+
+cond_false94.i:		; preds = %cond_next81.i
+	switch i32 0, label %cond_false94.i.cond_next125.i_crit_edge [
+		 i32 1, label %cond_false94.i.cond_true100.i_crit_edge
+		 i32 5, label %cond_false94.i.cond_true100.i_crit_edge10
+	]
+
+cond_false94.i.cond_true100.i_crit_edge10:		; preds = %cond_false94.i
+	br label %cond_true100.i
+
+cond_false94.i.cond_true100.i_crit_edge:		; preds = %cond_false94.i
+	br label %cond_true100.i
+
+cond_false94.i.cond_next125.i_crit_edge:		; preds = %cond_false94.i
+	br label %cond_next125.i
+
+cond_true100.i:		; preds = %cond_false94.i.cond_true100.i_crit_edge, %cond_false94.i.cond_true100.i_crit_edge10
+	br i1 false, label %cond_true107.i, label %cond_true100.i.cond_next109.i_crit_edge
+
+cond_true100.i.cond_next109.i_crit_edge:		; preds = %cond_true100.i
+	br label %cond_next109.i
+
+cond_true107.i:		; preds = %cond_true100.i
+	br label %cond_next109.i
+
+cond_next109.i:		; preds = %cond_true107.i, %cond_true100.i.cond_next109.i_crit_edge
+	br i1 false, label %cond_next109.i.cond_next125.i_crit_edge, label %cond_true116.i
+
+cond_next109.i.cond_next125.i_crit_edge:		; preds = %cond_next109.i
+	br label %cond_next125.i
+
+cond_true116.i:		; preds = %cond_next109.i
+	br label %cond_next125.i
+
+cond_next125.i:		; preds = %cond_true116.i, %cond_next109.i.cond_next125.i_crit_edge, %cond_false94.i.cond_next125.i_crit_edge
+	br i1 false, label %cond_next125.i.cond_true130.i_crit_edge, label %cond_next125.i.cond_next135.i_crit_edge
+
+cond_next125.i.cond_next135.i_crit_edge:		; preds = %cond_next125.i
+	br label %cond_next135.i
+
+cond_next125.i.cond_true130.i_crit_edge:		; preds = %cond_next125.i
+	br label %cond_true130.i
+
+cond_true130.i:		; preds = %cond_next125.i.cond_true130.i_crit_edge, %cond_true87.i.cond_true130.i_crit_edge
+	br label %cond_next135.i
+
+cond_next135.i:		; preds = %cond_true130.i, %cond_next125.i.cond_next135.i_crit_edge, %cond_true87.i.cond_next135.i_crit_edge
+	br i1 false, label %cond_true142.i, label %cond_next135.i.cond_next149.i_crit_edge
+
+cond_next135.i.cond_next149.i_crit_edge:		; preds = %cond_next135.i
+	br label %cond_next149.i
+
+cond_true142.i:		; preds = %cond_next135.i
+	br label %cond_next149.i
+
+cond_next149.i:		; preds = %cond_true142.i, %cond_next135.i.cond_next149.i_crit_edge
+	br i1 false, label %cond_true156.i, label %cond_next149.i.cond_next163.i_crit_edge
+
+cond_next149.i.cond_next163.i_crit_edge:		; preds = %cond_next149.i
+	br label %cond_next163.i
+
+cond_true156.i:		; preds = %cond_next149.i
+	br label %cond_next163.i
+
+cond_next163.i:		; preds = %cond_true156.i, %cond_next149.i.cond_next163.i_crit_edge
+	br i1 false, label %cond_true182.i, label %cond_next163.i.cond_next380.i_crit_edge
+
+cond_next163.i.cond_next380.i_crit_edge:		; preds = %cond_next163.i
+	br label %cond_next380.i
+
+cond_true182.i:		; preds = %cond_next163.i
+	br i1 false, label %cond_true182.i.cond_next380.i_crit_edge, label %cond_true196.i
+
+cond_true182.i.cond_next380.i_crit_edge:		; preds = %cond_true182.i
+	br label %cond_next380.i
+
+cond_true196.i:		; preds = %cond_true182.i
+	br i1 false, label %cond_true210.i, label %cond_true196.i.cond_next380.i_crit_edge
+
+cond_true196.i.cond_next380.i_crit_edge:		; preds = %cond_true196.i
+	br label %cond_next380.i
+
+cond_true210.i:		; preds = %cond_true196.i
+	br i1 false, label %cond_true216.i, label %cond_true210.i.cond_next380.i_crit_edge
+
+cond_true210.i.cond_next380.i_crit_edge:		; preds = %cond_true210.i
+	br label %cond_next380.i
+
+cond_true216.i:		; preds = %cond_true210.i
+	br label %cond_next380.i
+
+cond_false223.i:		; preds = %cond_next.i
+	br i1 false, label %cond_true229.i, label %cond_false355.i
+
+cond_true229.i:		; preds = %cond_false223.i
+	br i1 false, label %cond_true229.i.HRead.exit.loopexit_crit_edge, label %cond_next243.i
+
+cond_true229.i.HRead.exit.loopexit_crit_edge:		; preds = %cond_true229.i
+	br label %HRead.exit.loopexit
+
+cond_next243.i:		; preds = %cond_true229.i
+	br i1 false, label %cond_true248.i, label %cond_false255.i
+
+cond_true248.i:		; preds = %cond_next243.i
+	br label %cond_next260.i
+
+cond_false255.i:		; preds = %cond_next243.i
+	br label %cond_next260.i
+
+cond_next260.i:		; preds = %cond_false255.i, %cond_true248.i
+	br i1 false, label %cond_true267.i, label %cond_next273.i
+
+cond_true267.i:		; preds = %cond_next260.i
+	br label %bb442.backedge
+
+bb442.backedge:		; preds = %bb.i, %cond_true267.i
+	br label %bb442
+
+cond_next273.i:		; preds = %cond_next260.i
+	br i1 false, label %cond_true281.i, label %cond_next273.i.cond_next288.i_crit_edge
+
+cond_next273.i.cond_next288.i_crit_edge:		; preds = %cond_next273.i
+	br label %cond_next288.i
+
+cond_true281.i:		; preds = %cond_next273.i
+	br label %cond_next288.i
+
+cond_next288.i:		; preds = %cond_true281.i, %cond_next273.i.cond_next288.i_crit_edge
+	br i1 false, label %cond_true295.i, label %cond_next288.i.cond_next302.i_crit_edge
+
+cond_next288.i.cond_next302.i_crit_edge:		; preds = %cond_next288.i
+	br label %cond_next302.i
+
+cond_true295.i:		; preds = %cond_next288.i
+	br label %cond_next302.i
+
+cond_next302.i:		; preds = %cond_true295.i, %cond_next288.i.cond_next302.i_crit_edge
+	br i1 false, label %cond_next302.i.cond_next380.i_crit_edge, label %cond_true328.i
+
+cond_next302.i.cond_next380.i_crit_edge:		; preds = %cond_next302.i
+	br label %cond_next380.i
+
+cond_true328.i:		; preds = %cond_next302.i
+	br i1 false, label %cond_true343.i, label %cond_true328.i.cond_next380.i_crit_edge
+
+cond_true328.i.cond_next380.i_crit_edge:		; preds = %cond_true328.i
+	br label %cond_next380.i
+
+cond_true343.i:		; preds = %cond_true328.i
+	br i1 false, label %cond_true349.i, label %cond_true343.i.cond_next380.i_crit_edge
+
+cond_true343.i.cond_next380.i_crit_edge:		; preds = %cond_true343.i
+	br label %cond_next380.i
+
+cond_true349.i:		; preds = %cond_true343.i
+	br label %cond_next380.i
+
+cond_false355.i:		; preds = %cond_false223.i
+	br i1 false, label %cond_false355.i.bb.i_crit_edge, label %cond_next363.i
+
+cond_false355.i.bb.i_crit_edge:		; preds = %cond_false355.i
+	br label %bb.i
+
+cond_next363.i:		; preds = %cond_false355.i
+	br i1 false, label %bb377.i, label %cond_next363.i.bb.i_crit_edge
+
+cond_next363.i.bb.i_crit_edge:		; preds = %cond_next363.i
+	br label %bb.i
+
+bb.i:		; preds = %cond_next363.i.bb.i_crit_edge, %cond_false355.i.bb.i_crit_edge
+	br label %bb442.backedge
+
+bb377.i:		; preds = %cond_next363.i
+	br label %cond_next380.i
+
+cond_next380.i:		; preds = %bb377.i, %cond_true349.i, %cond_true343.i.cond_next380.i_crit_edge, %cond_true328.i.cond_next380.i_crit_edge, %cond_next302.i.cond_next380.i_crit_edge, %cond_true216.i, %cond_true210.i.cond_next380.i_crit_edge, %cond_true196.i.cond_next380.i_crit_edge, %cond_true182.i.cond_next380.i_crit_edge, %cond_next163.i.cond_next380.i_crit_edge
+	br i1 false, label %cond_next380.i.HRead.exit_crit_edge, label %cond_true391.i
+
+cond_next380.i.HRead.exit_crit_edge:		; preds = %cond_next380.i
+	br label %HRead.exit
+
+cond_true391.i:		; preds = %cond_next380.i
+	br label %bb442.outer.backedge
+
+bb442.outer.backedge:		; preds = %bb439, %cond_true391.i, %cond_true77.i
+	br label %bb442.outer
+
+HRead.exit.loopexit:		; preds = %cond_true229.i.HRead.exit.loopexit_crit_edge, %cond_next433.HRead.exit.loopexit_crit_edge
+	br label %HRead.exit
+
+HRead.exit:		; preds = %HRead.exit.loopexit, %cond_next380.i.HRead.exit_crit_edge
+	br label %bb439
+
+bb439.loopexit:		; preds = %bb420
+	br label %bb439
+
+bb439:		; preds = %bb439.loopexit, %HRead.exit
+	br label %bb442.outer.backedge
+
+bb442.outer:		; preds = %bb442.outer.backedge, %bb442.preheader
+	br label %bb442
+
+bb442:		; preds = %bb442.outer, %bb442.backedge
+	br i1 false, label %bb420, label %bb.loopexit
+}
+
+define void @Invalidate() {
+entry:
+	br i1 false, label %cond_false, label %cond_true
+
+cond_true:		; preds = %entry
+	br i1 false, label %cond_true40, label %cond_true.cond_next_crit_edge
+
+cond_true.cond_next_crit_edge:		; preds = %cond_true
+	br label %cond_next
+
+cond_true40:		; preds = %cond_true
+	br label %cond_next
+
+cond_next:		; preds = %cond_true40, %cond_true.cond_next_crit_edge
+	br i1 false, label %cond_true68, label %cond_next.cond_next73_crit_edge
+
+cond_next.cond_next73_crit_edge:		; preds = %cond_next
+	br label %cond_next73
+
+cond_true68:		; preds = %cond_next
+	br label %cond_next73
+
+cond_next73:		; preds = %cond_true68, %cond_next.cond_next73_crit_edge
+	br i1 false, label %cond_true91, label %cond_next73.cond_next96_crit_edge
+
+cond_next73.cond_next96_crit_edge:		; preds = %cond_next73
+	br label %cond_next96
+
+cond_true91:		; preds = %cond_next73
+	br label %cond_next96
+
+cond_next96:		; preds = %cond_true91, %cond_next73.cond_next96_crit_edge
+	br i1 false, label %cond_next96.cond_next112_crit_edge, label %cond_true105
+
+cond_next96.cond_next112_crit_edge:		; preds = %cond_next96
+	br label %cond_next112
+
+cond_true105:		; preds = %cond_next96
+	br label %cond_next112
+
+cond_next112:		; preds = %cond_true105, %cond_next96.cond_next112_crit_edge
+	br i1 false, label %cond_next112.cond_next127_crit_edge, label %cond_true119
+
+cond_next112.cond_next127_crit_edge:		; preds = %cond_next112
+	br label %cond_next127
+
+cond_true119:		; preds = %cond_next112
+	br label %cond_next127
+
+cond_next127:		; preds = %cond_true119, %cond_next112.cond_next127_crit_edge
+	br i1 false, label %cond_next141, label %cond_true134
+
+cond_true134:		; preds = %cond_next127
+	br i1 false, label %cond_true134.bb161_crit_edge, label %cond_true134.bb_crit_edge
+
+cond_true134.bb_crit_edge:		; preds = %cond_true134
+	br label %bb
+
+cond_true134.bb161_crit_edge:		; preds = %cond_true134
+	br label %bb161
+
+cond_next141:		; preds = %cond_next127
+	br label %bb154
+
+bb:		; preds = %bb154.bb_crit_edge, %cond_true134.bb_crit_edge
+	br label %bb154
+
+bb154:		; preds = %bb, %cond_next141
+	br i1 false, label %bb154.bb161_crit_edge, label %bb154.bb_crit_edge
+
+bb154.bb_crit_edge:		; preds = %bb154
+	br label %bb
+
+bb154.bb161_crit_edge:		; preds = %bb154
+	br label %bb161
+
+bb161:		; preds = %bb154.bb161_crit_edge, %cond_true134.bb161_crit_edge
+	br i1 false, label %bb161.cond_next201_crit_edge, label %cond_true198
+
+bb161.cond_next201_crit_edge:		; preds = %bb161
+	br label %cond_next201
+
+cond_true198:		; preds = %bb161
+	br label %cond_next201
+
+cond_next201:		; preds = %cond_true198, %bb161.cond_next201_crit_edge
+	br i1 false, label %cond_next212, label %cond_true206
+
+cond_true206:		; preds = %cond_next201
+	br label %UnifiedReturnBlock
+
+cond_false:		; preds = %entry
+	br label %UnifiedReturnBlock
+
+cond_next212:		; preds = %cond_next201
+	br label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %cond_next212, %cond_false, %cond_true206
+	ret void
+}
diff --git a/final/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll b/final/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll
new file mode 100644
index 00000000000..767e5db94ce
--- /dev/null
+++ b/final/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -postdomfrontier -disable-output
+
+define void @args_out_of_range() {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	br label %bb
+}
+
+define void @args_out_of_range_3() {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	br label %bb
+}
+
+define void @Feq() {
+entry:
+	br i1 false, label %cond_true, label %cond_next
+
+cond_true:		; preds = %entry
+	unreachable
+
+cond_next:		; preds = %entry
+	unreachable
+}
diff --git a/final/test/Analysis/PostDominators/dg.exp b/final/test/Analysis/PostDominators/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Analysis/PostDominators/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Analysis/PostDominators/pr1098.ll b/final/test/Analysis/PostDominators/pr1098.ll
new file mode 100644
index 00000000000..afb47769ee4
--- /dev/null
+++ b/final/test/Analysis/PostDominators/pr1098.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -postdomtree -analyze | grep entry
+; PR932
+
+define void @foo(i1 %x) {
+entry:
+        br i1 %x, label %bb1, label %bb0
+bb0:            ; preds = %entry, bb0
+        br label %bb0
+bb1:            ; preds = %entry
+        br label %bb2
+bb2:            ; preds = %bb1
+        ret void
+}
+
diff --git a/final/test/Analysis/PostDominators/pr6047_a.ll b/final/test/Analysis/PostDominators/pr6047_a.ll
new file mode 100644
index 00000000000..ec1455b46fe
--- /dev/null
+++ b/final/test/Analysis/PostDominators/pr6047_a.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -postdomtree -analyze | FileCheck %s
+define internal void @f() {
+entry:
+  br i1 undef, label %bb35, label %bb3.i
+
+bb3.i:
+  br label %bb3.i
+
+bb35.loopexit3:
+  br label %bb35
+
+bb35:
+  ret void
+}
+; CHECK: [3] %entry
diff --git a/final/test/Analysis/PostDominators/pr6047_b.ll b/final/test/Analysis/PostDominators/pr6047_b.ll
new file mode 100644
index 00000000000..7bd2c86b737
--- /dev/null
+++ b/final/test/Analysis/PostDominators/pr6047_b.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -postdomtree -analyze | FileCheck %s
+define internal void @f() {
+entry:
+  br i1 undef, label %a, label %bb3.i
+
+a:
+  br i1 undef, label %bb35, label %bb3.i
+
+bb3.i:
+  br label %bb3.i
+
+
+bb35.loopexit3:
+  br label %bb35
+
+bb35:
+  ret void
+}
+; CHECK: [4] %entry
diff --git a/final/test/Analysis/PostDominators/pr6047_c.ll b/final/test/Analysis/PostDominators/pr6047_c.ll
new file mode 100644
index 00000000000..08c9551f156
--- /dev/null
+++ b/final/test/Analysis/PostDominators/pr6047_c.ll
@@ -0,0 +1,147 @@
+; RUN: opt < %s -postdomtree -analyze | FileCheck %s
+define internal void @f() {
+entry:
+  br i1 undef, label %bb35, label %bb3.i
+
+bb3.i:
+  br label %bb3.i
+
+bb:
+  br label %bb35
+
+bb.i:
+  br label %bb35
+
+_float32_unpack.exit:
+  br label %bb35
+
+bb.i5:
+  br label %bb35
+
+_float32_unpack.exit8:
+  br label %bb35
+
+bb32.preheader:
+  br label %bb35
+
+bb3:
+  br label %bb35
+
+bb3.split.us:
+  br label %bb35
+
+bb.i4.us:
+  br label %bb35
+
+bb7.i.us:
+  br label %bb35
+
+bb.i4.us.backedge:
+  br label %bb35
+
+bb1.i.us:
+  br label %bb35
+
+bb6.i.us:
+  br label %bb35
+
+bb4.i.us:
+  br label %bb35
+
+bb8.i.us:
+  br label %bb35
+
+bb3.i.loopexit.us:
+  br label %bb35
+
+bb.nph21:
+  br label %bb35
+
+bb4:
+  br label %bb35
+
+bb5:
+  br label %bb35
+
+bb14.preheader:
+  br label %bb35
+
+bb.nph18:
+  br label %bb35
+
+bb8.us.preheader:
+  br label %bb35
+
+bb8.preheader:
+  br label %bb35
+
+bb8.us:
+  br label %bb35
+
+bb8:
+  br label %bb35
+
+bb15.loopexit:
+  br label %bb35
+
+bb15.loopexit2:
+  br label %bb35
+
+bb15:
+  br label %bb35
+
+bb16:
+  br label %bb35
+
+bb17.loopexit.split:
+  br label %bb35
+
+bb.nph14:
+  br label %bb35
+
+bb19:
+  br label %bb35
+
+bb20:
+  br label %bb35
+
+bb29.preheader:
+  br label %bb35
+
+bb.nph:
+  br label %bb35
+
+bb23.us.preheader:
+  br label %bb35
+
+bb23.preheader:
+  br label %bb35
+
+bb23.us:
+  br label %bb35
+
+bb23:
+  br label %bb35
+
+bb30.loopexit:
+  br label %bb35
+
+bb30.loopexit1:
+  br label %bb35
+
+bb30:
+  br label %bb35
+
+bb31:
+  br label %bb35
+
+bb35.loopexit:
+  br label %bb35
+
+bb35.loopexit3:
+  br label %bb35
+
+bb35:
+  ret void
+}
+; CHECK: [3] %entry
diff --git a/final/test/Analysis/PostDominators/pr6047_d.ll b/final/test/Analysis/PostDominators/pr6047_d.ll
new file mode 100644
index 00000000000..4cfa88029ae
--- /dev/null
+++ b/final/test/Analysis/PostDominators/pr6047_d.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -postdomtree -analyze | FileCheck %s
+define internal void @f() {
+entry:
+  br i1 1, label %a, label %b
+
+a:
+br label %c
+
+b:
+br label %c
+
+c:
+  br i1 undef, label %bb35, label %bb3.i
+
+bb3.i:
+  br label %bb3.i
+
+bb35.loopexit3:
+  br label %bb35
+
+bb35:
+  ret void
+}
+; CHECK: [4] %entry
diff --git a/final/test/Analysis/Profiling/dg.exp b/final/test/Analysis/Profiling/dg.exp
new file mode 100644
index 00000000000..1eb4755c410
--- /dev/null
+++ b/final/test/Analysis/Profiling/dg.exp
@@ -0,0 +1,4 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+
diff --git a/final/test/Analysis/Profiling/edge-profiling.ll b/final/test/Analysis/Profiling/edge-profiling.ll
new file mode 100644
index 00000000000..cbaf47617fb
--- /dev/null
+++ b/final/test/Analysis/Profiling/edge-profiling.ll
@@ -0,0 +1,139 @@
+; Test the edge profiling instrumentation.
+; RUN: opt < %s -insert-edge-profiling -S | FileCheck %s
+
+; ModuleID = '<stdin>'
+
+@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1]
+@.str1 = private constant [6 x i8] c"franz\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str2 = private constant [9 x i8] c"argc > 2\00", align 1 ; <[9 x i8]*> [#uses=1]
+@.str3 = private constant [9 x i8] c"argc = 1\00", align 1 ; <[9 x i8]*> [#uses=1]
+@.str4 = private constant [6 x i8] c"fritz\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str5 = private constant [10 x i8] c"argc <= 1\00", align 1 ; <[10 x i8]*> [#uses=1]
+; CHECK:@EdgeProfCounters
+; CHECK:[19 x i32] 
+; CHECK:zeroinitializer
+
+define void @oneblock() nounwind {
+entry:
+; CHECK:entry:
+; CHECK:%OldFuncCounter
+; CHECK:load 
+; CHECK:getelementptr
+; CHECK:@EdgeProfCounters
+; CHECK:i32 0
+; CHECK:i32 0
+; CHECK:%NewFuncCounter
+; CHECK:add
+; CHECK:%OldFuncCounter
+; CHECK:store 
+; CHECK:%NewFuncCounter
+; CHECK:getelementptr
+; CHECK:@EdgeProfCounters
+  %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  ret void
+}
+
+declare i32 @puts(i8*)
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+; CHECK:entry:
+  %argc_addr = alloca i32                         ; <i32*> [#uses=4]
+  %argv_addr = alloca i8**                        ; <i8***> [#uses=1]
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %j = alloca i32                                 ; <i32*> [#uses=4]
+  %i = alloca i32                                 ; <i32*> [#uses=4]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+; CHECK:call 
+; CHECK:@llvm_start_edge_profiling
+; CHECK:@EdgeProfCounters
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 %argc, i32* %argc_addr
+  store i8** %argv, i8*** %argv_addr
+  store i32 0, i32* %i, align 4
+  br label %bb10
+
+bb:                                               ; preds = %bb10
+; CHECK:bb:
+  %1 = load i32* %argc_addr, align 4              ; <i32> [#uses=1]
+  %2 = icmp sgt i32 %1, 1                         ; <i1> [#uses=1]
+  br i1 %2, label %bb1, label %bb8
+
+bb1:                                              ; preds = %bb
+; CHECK:bb1:
+  store i32 0, i32* %j, align 4
+  br label %bb6
+
+bb2:                                              ; preds = %bb6
+; CHECK:bb2:
+  %3 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  %4 = load i32* %argc_addr, align 4              ; <i32> [#uses=1]
+  %5 = icmp sgt i32 %4, 2                         ; <i1> [#uses=1]
+  br i1 %5, label %bb3, label %bb4
+
+bb3:                                              ; preds = %bb2
+; CHECK:bb3:
+  %6 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str2, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %bb5
+
+bb4:                                              ; preds = %bb2
+; CHECK:bb4:
+  %7 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str3, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %bb11
+
+bb5:                                              ; preds = %bb3
+; CHECK:bb5:
+  %8 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  %9 = load i32* %j, align 4                      ; <i32> [#uses=1]
+  %10 = add nsw i32 %9, 1                         ; <i32> [#uses=1]
+  store i32 %10, i32* %j, align 4
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %bb1
+; CHECK:bb6:
+  %11 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %12 = load i32* %argc_addr, align 4             ; <i32> [#uses=1]
+  %13 = icmp slt i32 %11, %12                     ; <i1> [#uses=1]
+  br i1 %13, label %bb2, label %bb7
+
+bb7:                                              ; preds = %bb6
+; CHECK:bb7:
+  br label %bb9
+
+bb8:                                              ; preds = %bb
+; CHECK:bb8:
+  %14 = call i32 @puts(i8* getelementptr inbounds ([10 x i8]* @.str5, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %bb9
+
+bb9:                                              ; preds = %bb8, %bb7
+; CHECK:bb9:
+  %15 = load i32* %i, align 4                     ; <i32> [#uses=1]
+  %16 = add nsw i32 %15, 1                        ; <i32> [#uses=1]
+  store i32 %16, i32* %i, align 4
+  br label %bb10
+
+bb10:                                             ; preds = %bb9, %entry
+; CHECK:bb10:
+  %17 = load i32* %i, align 4                     ; <i32> [#uses=1]
+  %18 = icmp ne i32 %17, 3                        ; <i1> [#uses=1]
+  br i1 %18, label %bb, label %bb11
+; CHECK:br
+; CHECK:label %bb10.bb11_crit_edge
+
+; CHECK:bb10.bb11_crit_edge:
+; CHECK:br
+; CHECK:label %bb11
+
+bb11:                                             ; preds = %bb10, %bb4
+; CHECK:bb11:
+  call void @oneblock() nounwind
+  store i32 0, i32* %0, align 4
+  %19 = load i32* %0, align 4                     ; <i32> [#uses=1]
+  store i32 %19, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %bb11
+; CHECK:return:
+  %retval12 = load i32* %retval                   ; <i32> [#uses=1]
+  ret i32 %retval12
+}
diff --git a/final/test/Analysis/Profiling/profiling-tool-chain.ll b/final/test/Analysis/Profiling/profiling-tool-chain.ll
new file mode 100644
index 00000000000..9135a85dc3a
--- /dev/null
+++ b/final/test/Analysis/Profiling/profiling-tool-chain.ll
@@ -0,0 +1,212 @@
+; RUN: llvm-as %s -o %t1
+
+; FIXME: The RUX parts of the test are disabled for now, they aren't working on
+; llvm-gcc-x86_64-darwin10-selfhost.
+
+; Test the edge optimal profiling instrumentation.
+; RUN: opt %t1 -insert-optimal-edge-profiling -o %t2
+; RUX: llvm-dis < %t2 | FileCheck --check-prefix=INST %s
+
+; Test the creation, reading and displaying of profile
+; RUX: rm -f llvmprof.out
+; RUX: lli -load %llvmshlibdir/profile_rt%shlibext %t2
+; RUX: lli -load %llvmshlibdir/profile_rt%shlibext %t2 1 2
+; RUX: llvm-prof -print-all-code %t1 | FileCheck --check-prefix=PROF %s
+
+; Test the loaded profile also with verifier.
+; RUX  opt %t1 -profile-loader -profile-verifier -o %t3
+
+; Test profile estimator.
+; RUN: opt %t1 -profile-estimator -profile-verifier -o %t3
+
+; PROF:  1.     2/4 oneblock
+; PROF:  2.     2/4 main
+; PROF:  1. 15.7895%    12/76	main() - bb6
+; PROF:  2. 11.8421%     9/76	main() - bb2
+; PROF:  3. 11.8421%     9/76	main() - bb3
+; PROF:  4. 11.8421%     9/76	main() - bb5
+; PROF:  5. 10.5263%     8/76	main() - bb10
+; PROF:  6. 7.89474%     6/76	main() - bb
+; PROF:  7. 7.89474%     6/76	main() - bb9
+; PROF:  8. 3.94737%     3/76	main() - bb1
+; PROF:  9. 3.94737%     3/76	main() - bb7
+; PROF: 10. 3.94737%     3/76	main() - bb8
+; PROF: 11. 2.63158%     2/76	oneblock() - entry
+; PROF: 12. 2.63158%     2/76	main() - entry
+; PROF: 13. 2.63158%     2/76	main() - bb11
+; PROF: 14. 2.63158%     2/76	main() - return
+
+; ModuleID = '<stdin>'
+
+@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1]
+@.str1 = private constant [6 x i8] c"franz\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str2 = private constant [9 x i8] c"argc > 2\00", align 1 ; <[9 x i8]*> [#uses=1]
+@.str3 = private constant [9 x i8] c"argc = 1\00", align 1 ; <[9 x i8]*> [#uses=1]
+@.str4 = private constant [6 x i8] c"fritz\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str5 = private constant [10 x i8] c"argc <= 1\00", align 1 ; <[10 x i8]*> [#uses=1]
+; INST:@OptEdgeProfCounters
+; INST:[21 x i32]
+; INST:[i32 0,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 0,
+; INST:i32 0,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 0,
+; INST:i32 0,
+; INST:i32 -1,
+; INST:i32 -1,
+; INST:i32 0,
+; INST:i32 -1,
+; INST:i32 -1]
+
+; PROF:;;; %oneblock called 2 times.
+; PROF:;;;
+define void @oneblock() nounwind {
+entry:
+; PROF:entry:
+; PROF:	;;; Basic block executed 2 times.
+  %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  ret void
+}
+
+declare i32 @puts(i8*)
+
+; PROF:;;; %main called 2 times.
+; PROF:;;;
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+; PROF:entry:
+; PROF:	;;; Basic block executed 2 times.
+  %argc_addr = alloca i32                         ; <i32*> [#uses=4]
+  %argv_addr = alloca i8**                        ; <i8***> [#uses=1]
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %j = alloca i32                                 ; <i32*> [#uses=4]
+  %i = alloca i32                                 ; <i32*> [#uses=4]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+; INST:call 
+; INST:@llvm_start_opt_edge_profiling
+; INST:@OptEdgeProfCounters
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 %argc, i32* %argc_addr
+  store i8** %argv, i8*** %argv_addr
+  store i32 0, i32* %i, align 4
+  br label %bb10
+; PROF:	;;; Out-edge counts: [2.000000e+00 -> bb10]
+
+bb:                                               ; preds = %bb10
+; PROF:bb:
+; PROF:	;;; Basic block executed 6 times.
+  %1 = load i32* %argc_addr, align 4              ; <i32> [#uses=1]
+  %2 = icmp sgt i32 %1, 1                         ; <i1> [#uses=1]
+  br i1 %2, label %bb1, label %bb8
+; PROF:	;;; Out-edge counts: [3.000000e+00 -> bb1] [3.000000e+00 -> bb8]
+
+bb1:                                              ; preds = %bb
+; PROF:bb1:
+; PROF:	;;; Basic block executed 3 times.
+  store i32 0, i32* %j, align 4
+  br label %bb6
+; PROF:	;;; Out-edge counts: [3.000000e+00 -> bb6]
+
+bb2:                                              ; preds = %bb6
+; PROF:bb2:
+; PROF:	;;; Basic block executed 9 times.
+  %3 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  %4 = load i32* %argc_addr, align 4              ; <i32> [#uses=1]
+  %5 = icmp sgt i32 %4, 2                         ; <i1> [#uses=1]
+  br i1 %5, label %bb3, label %bb4
+; PROF:	;;; Out-edge counts: [9.000000e+00 -> bb3]
+
+bb3:                                              ; preds = %bb2
+; PROF:bb3:
+; PROF:	;;; Basic block executed 9 times.
+  %6 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str2, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %bb5
+; PROF:	;;; Out-edge counts: [9.000000e+00 -> bb5]
+
+bb4:                                              ; preds = %bb2
+; PROF:bb4:
+; PROF:	;;; Never executed!
+  %7 = call i32 @puts(i8* getelementptr inbounds ([9 x i8]* @.str3, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %bb11
+
+bb5:                                              ; preds = %bb3
+; PROF:bb5:
+; PROF:	;;; Basic block executed 9 times.
+  %8 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  %9 = load i32* %j, align 4                      ; <i32> [#uses=1]
+  %10 = add nsw i32 %9, 1                         ; <i32> [#uses=1]
+  store i32 %10, i32* %j, align 4
+  br label %bb6
+; PROF:	;;; Out-edge counts: [9.000000e+00 -> bb6]
+
+bb6:                                              ; preds = %bb5, %bb1
+; PROF:bb6:
+; PROF:	;;; Basic block executed 12 times.
+  %11 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %12 = load i32* %argc_addr, align 4             ; <i32> [#uses=1]
+  %13 = icmp slt i32 %11, %12                     ; <i1> [#uses=1]
+  br i1 %13, label %bb2, label %bb7
+; PROF:	;;; Out-edge counts: [9.000000e+00 -> bb2] [3.000000e+00 -> bb7]
+
+bb7:                                              ; preds = %bb6
+; PROF:bb7:
+; PROF:	;;; Basic block executed 3 times.
+  br label %bb9
+; PROF:	;;; Out-edge counts: [3.000000e+00 -> bb9]
+
+bb8:                                              ; preds = %bb
+; PROF:bb8:
+; PROF:	;;; Basic block executed 3 times.
+  %14 = call i32 @puts(i8* getelementptr inbounds ([10 x i8]* @.str5, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %bb9
+; PROF:	;;; Out-edge counts: [3.000000e+00 -> bb9]
+
+bb9:                                              ; preds = %bb8, %bb7
+; PROF:bb9:
+; PROF:	;;; Basic block executed 6 times.
+  %15 = load i32* %i, align 4                     ; <i32> [#uses=1]
+  %16 = add nsw i32 %15, 1                        ; <i32> [#uses=1]
+  store i32 %16, i32* %i, align 4
+  br label %bb10
+; PROF:	;;; Out-edge counts: [6.000000e+00 -> bb10]
+
+bb10:                                             ; preds = %bb9, %entry
+; PROF:bb10:
+; PROF:	;;; Basic block executed 8 times.
+  %17 = load i32* %i, align 4                     ; <i32> [#uses=1]
+  %18 = icmp ne i32 %17, 3                        ; <i1> [#uses=1]
+  br i1 %18, label %bb, label %bb11
+; INST:br
+; INST:label %bb10.bb11_crit_edge
+; PROF:	;;; Out-edge counts: [6.000000e+00 -> bb] [2.000000e+00 -> bb11]
+
+; INST:bb10.bb11_crit_edge:
+; INST:br
+; INST:label %bb11
+
+bb11:                                             ; preds = %bb10, %bb4
+; PROF:bb11:
+; PROF:	;;; Basic block executed 2 times.
+  call void @oneblock() nounwind
+  store i32 0, i32* %0, align 4
+  %19 = load i32* %0, align 4                     ; <i32> [#uses=1]
+  store i32 %19, i32* %retval, align 4
+  br label %return
+; PROF:	;;; Out-edge counts: [2.000000e+00 -> return]
+
+return:                                           ; preds = %bb11
+; PROF:return:
+; PROF:	;;; Basic block executed 2 times.
+  %retval12 = load i32* %retval                   ; <i32> [#uses=1]
+  ret i32 %retval12
+}
diff --git a/final/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll b/final/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll
new file mode 100644
index 00000000000..218b4375f70
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/20100809_bb_not_in_domtree.ll
@@ -0,0 +1,20 @@
+; RUN: opt -regions %s
+define i32 @main() nounwind {
+entry:
+  br label %for.cond
+
+test:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  br i1 true, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret i32 0
+}
diff --git a/final/test/Analysis/RegionInfo/block_sort.ll b/final/test/Analysis/RegionInfo/block_sort.ll
new file mode 100644
index 00000000000..faec45a911f
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/block_sort.ll
@@ -0,0 +1,42 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats -analyze < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @BZ2_blockSort() nounwind {
+start:
+  br label %while
+
+while:
+  br label %while.body134.i.i
+
+while.body134.i.i:
+  br i1 1, label %end, label %w
+
+w:
+  br label %if.end140.i.i
+
+if.end140.i.i:
+  br i1 1, label %while.end186.i.i, label %if.end183.i.i
+
+if.end183.i.i:
+  br label %while.body134.i.i
+
+while.end186.i.i:
+  br label %while
+
+end:
+  ret void
+}
+; CHECK-NOT: =>
+; CHECK: [0] start => <Function Return>
+; CHECK: [1] while => end
+
+; STAT: 2 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: start, while, while.body134.i.i, end, w, if.end140.i.i, while.end186.i.i, if.end183.i.i,
+; BBIT: while, while.body134.i.i, w, if.end140.i.i, while.end186.i.i, if.end183.i.i,
+
+; RNIT: start, while => end, end,
+; RNIT: while, while.body134.i.i, w, if.end140.i.i, while.end186.i.i, if.end183.i.i,
diff --git a/final/test/Analysis/RegionInfo/cond_loop.ll b/final/test/Analysis/RegionInfo/cond_loop.ll
new file mode 100644
index 00000000000..2ce57c3c5f3
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/cond_loop.ll
@@ -0,0 +1,33 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition() nounwind {
+5:
+        br label %"0"
+
+0:
+	br label %"1"
+1:
+	br i1 1, label %"2", label %"3"
+2:
+	ret void
+3:
+	br i1 1, label %"1", label %"4"
+4:
+        br label %"0"
+}
+
+; CHECK-NOT: =>
+; CHECK: [0] 5 => <Function Return>
+; CHECK: [1] 0 => 2
+
+; STAT: 2 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: 5, 0, 1, 2, 3, 4,
+; BBIT: 0, 1, 3, 4,
+
+; RNIT: 5, 0 => 2, 2,
+; RNIT: 0, 1, 3, 4,
diff --git a/final/test/Analysis/RegionInfo/condition_complicated.ll b/final/test/Analysis/RegionInfo/condition_complicated.ll
new file mode 100644
index 00000000000..7ca5c7c7b53
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/condition_complicated.ll
@@ -0,0 +1,60 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define internal fastcc zeroext i8 @handle_compress() nounwind {
+end165:
+  br i1 1, label %false239, label %true181
+
+true181:
+  br i1 1, label %then187, label %else232
+
+then187:
+  br label %end265
+
+else232:
+  br i1 1, label %false239, label %then245
+
+false239:
+  br i1 1, label %then245, label %else259
+
+then245:
+  br i1 1, label %then251, label %end253
+
+then251:
+  br label %end253
+
+end253:
+  br label %end265
+
+else259:
+  br label %end265
+
+end265:
+  br i1 1, label %then291, label %end298
+
+then291:
+  br label %end298
+
+end298:
+  ret i8 1
+}
+
+; CHECK-NOT: =>
+; CHECK: [0] end165 => <Function Return>
+; CHECK-NEXT: [1] end165 => end265
+; CHECK-NEXT: [2] then245 => end253
+; CHECK-NEXT: [1] end265 => end298
+
+; STAT: 4 region - The # of regions
+
+; BBIT: end165, false239, then245, then251, end253, end265, then291, end298, else259, true181, then187, else232,
+; BBIT: end165, false239, then245, then251, end253, else259, true181, then187, else232,
+; BBIT: then245, then251,
+; BBIT: end265, then291,
+
+; RNIT: end165 => end265, end265 => end298, end298,
+; RNIT: end165, false239, then245 => end253, end253, else259, true181, then187, else232,
+; RNIT: then245, then251,
+; RNIT: end265, then291,
diff --git a/final/test/Analysis/RegionInfo/condition_complicated_2.ll b/final/test/Analysis/RegionInfo/condition_complicated_2.ll
new file mode 100644
index 00000000000..5fa940a61ef
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/condition_complicated_2.ll
@@ -0,0 +1,44 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define internal fastcc void @compress() nounwind {
+end33:
+  br i1 1, label %end124, label %lor.lhs.false95
+
+lor.lhs.false95:
+  br i1 1, label %then107, label %end172
+
+then107:
+  br i1 1, label %end124, label %then113
+
+then113:
+  br label %end124
+
+end124:
+  br label %exit
+
+end172:
+  br label %exit
+
+
+exit:
+  unreachable
+
+
+}
+; CHECK-NOT: =>
+; CHECK: [0] end33 => <Function Return>
+; CHECK-NEXT:      [1] end33 => exit
+; CHECK-NEXT:   [2] then107 => end124
+
+; STAT: 3 region - The # of regions
+
+; BBIT: end33, end124, exit, lor.lhs.false95, then107, then113, end172,
+; BBIT: end33, end124, lor.lhs.false95, then107, then113, end172,
+; BBIT: then107, then113,
+
+; RNIT: end33 => exit, exit,
+; RNIT: end33, end124, lor.lhs.false95, then107 => end124, end172,
+; RNIT: then107, then113,
diff --git a/final/test/Analysis/RegionInfo/condition_forward_edge.ll b/final/test/Analysis/RegionInfo/condition_forward_edge.ll
new file mode 100644
index 00000000000..098c9b6b461
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/condition_forward_edge.ll
@@ -0,0 +1,26 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition() nounwind {
+0:
+	br label %"1"
+1:
+	br i1 1, label %"2", label %"3"
+2:
+	br label %"3"
+3:
+	ret void
+}
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK: [1] 1 => 3
+
+; STAT: 2 region - The # of regions
+
+; BBIT: 0, 1, 2, 3,
+; BBIT: 1, 2,
+
+; RNIT: 0, 1 => 3, 3,
+; RNIT: 1, 2,
diff --git a/final/test/Analysis/RegionInfo/condition_same_exit.ll b/final/test/Analysis/RegionInfo/condition_same_exit.ll
new file mode 100644
index 00000000000..1b88596c0f8
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/condition_same_exit.ll
@@ -0,0 +1,31 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition() nounwind {
+0:
+	br i1 1, label %"1", label %"4"
+
+1:
+	br i1 1, label %"2", label %"3"
+2:
+	br label %"4"
+3:
+	br label %"4"
+4:
+	ret void
+}
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 0 => 4
+; CHECK-NEXT:   [2] 1 => 4
+; STAT: 3 region - The # of regions
+
+; BBIT: 0, 1, 2, 4, 3,
+; BBIT: 0, 1, 2, 3,
+; BBIT: 1, 2, 3,
+
+; RNIT: 0 => 4, 4,
+; RNIT: 0, 1 => 4,
+; RNIT: 1, 2, 3,
diff --git a/final/test/Analysis/RegionInfo/condition_simple.ll b/final/test/Analysis/RegionInfo/condition_simple.ll
new file mode 100644
index 00000000000..19b154b6476
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/condition_simple.ll
@@ -0,0 +1,28 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition() nounwind {
+0:
+	br label %"1"
+1:
+	br i1 1, label %"2", label %"3"
+2:
+	br label %"4"
+3:
+	br label %"4"
+4:
+	ret void
+}
+
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 1 => 4
+; STAT: 2 region - The # of regions
+
+; BBIT: 0, 1, 2, 4, 3,
+; BBIT: 1, 2, 3,
+
+; RNIT: 0, 1 => 4, 4,
+; RNIT: 1, 2, 3,
diff --git a/final/test/Analysis/RegionInfo/dg.exp b/final/test/Analysis/RegionInfo/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Analysis/RegionInfo/exit_in_condition.ll b/final/test/Analysis/RegionInfo/exit_in_condition.ll
new file mode 100644
index 00000000000..3b152d2f565
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/exit_in_condition.ll
@@ -0,0 +1,38 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define internal fastcc zeroext i8 @handle_compress() nounwind {
+entry:
+  br label %outer
+
+outer:
+  br label %body
+
+body:
+  br i1 1, label %body.i, label %if.end
+
+body.i:
+  br i1 1, label %end, label %if.end
+
+if.end:
+  br label %if.then64
+
+if.then64:
+  br label %outer
+
+end:
+  ret i8 1
+}
+; CHECK-NOT: =>
+; CHECK: [0] entry => <Function Return>
+; CHECK-NEXT: [1] outer => end
+; STAT: 2 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: entry, outer, body, body.i, end, if.end, if.then64,
+; BBIT: outer, body, body.i, if.end, if.then64,
+
+; RNIT: entry, outer => end, end,
+; RNIT: outer, body, body.i, if.end, if.then64,
diff --git a/final/test/Analysis/RegionInfo/infinite_loop.ll b/final/test/Analysis/RegionInfo/infinite_loop.ll
new file mode 100644
index 00000000000..59cead49261
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/infinite_loop.ll
@@ -0,0 +1,20 @@
+; RUN: opt -regions -analyze < %s 
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+
+define void @normal_condition() nounwind {
+0:
+	br label %"1"
+1:
+	br i1 1, label %"2", label %"3"
+2:
+	br label %"2"
+3:
+	br label %"4"
+4:
+	ret void
+}
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK: [1] 1 => 4
+; STAT: 2 region - The # of regions
+; STAT: 1 region - The # of simple regions
diff --git a/final/test/Analysis/RegionInfo/infinite_loop_2.ll b/final/test/Analysis/RegionInfo/infinite_loop_2.ll
new file mode 100644
index 00000000000..80c69b7ab2e
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/infinite_loop_2.ll
@@ -0,0 +1,36 @@
+; RUN: opt -regions -analyze < %s 
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition() nounwind {
+0:
+	br label %"1"
+1:
+	br i1 1, label %"2", label %"3"
+2:
+	br label %"5"
+5:
+	br i1 1, label %"11", label %"12"
+11:
+        br label %"6"
+12:
+        br label %"6"
+6:
+        br label %"2"
+3:
+	br label %"4"
+4:
+	ret void
+}
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK: [1] 1 => 3
+; STAT: 2 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: 0, 1, 2, 5, 11, 6, 12, 3, 4,
+; BBIT: 1, 2, 5, 11, 6, 12,
+
+; RNIT: 0, 1 => 3, 3, 4,
+; RNIT: 1, 2, 5, 11, 6, 12,
diff --git a/final/test/Analysis/RegionInfo/infinite_loop_3.ll b/final/test/Analysis/RegionInfo/infinite_loop_3.ll
new file mode 100644
index 00000000000..74ceafb8495
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/infinite_loop_3.ll
@@ -0,0 +1,52 @@
+; RUN: opt -regions -analyze < %s 
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition() nounwind {
+0:
+	br label %"7"
+7:
+	br i1 1, label %"1", label %"8"
+1:
+	br i1 1, label %"2", label %"3"
+2:
+	br label %"5"
+5:
+	br i1 1, label %"11", label %"12"
+11:
+        br label %"6"
+12:
+        br label %"6"
+6:
+        br label %"2"
+8:
+	br label %"9"
+9:
+	br i1 1, label %"13", label %"14"
+13:
+        br label %"10"
+14:
+        br label %"10"
+10:
+        br label %"8"
+3:
+	br label %"4"
+4:
+	ret void
+}
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 1 => 3
+; CHECK-NEXT: [1] 7 => 1
+; STAT: 3 region - The # of regions
+; STAT: 2 region - The # of simple regions
+
+; BBIT: 0, 7, 1, 2, 5, 11, 6, 12, 3, 4, 8, 9, 13, 10, 14,
+; BBIT: 7, 8, 9, 13, 10, 14,
+; BBIT: 1, 2, 5, 11, 6, 12,
+
+; RNIT: 0, 7 => 1, 1 => 3, 3, 4,
+; RNIT: 7, 8, 9, 13, 10, 14,
+; RNIT: 1, 2, 5, 11, 6, 12,
diff --git a/final/test/Analysis/RegionInfo/infinite_loop_4.ll b/final/test/Analysis/RegionInfo/infinite_loop_4.ll
new file mode 100644
index 00000000000..fd56af1d3b8
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/infinite_loop_4.ll
@@ -0,0 +1,48 @@
+; RUN: opt -regions -analyze < %s 
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition() nounwind {
+0:
+	br label %"7"
+7:
+	br i1 1, label %"1", label %"8"
+1:
+	br i1 1, label %"2", label %"3"
+2:
+	br label %"5"
+5:
+	br i1 1, label %"11", label %"12"
+11:
+        br label %"6"
+12:
+        br label %"6"
+6:
+	br i1 1, label %"2", label %"10"
+8:
+	br label %"9"
+9:
+	br i1 1, label %"13", label %"14"
+13:
+        br label %"10"
+14:
+        br label %"10"
+10:
+        br label %"8"
+3:
+	br label %"4"
+4:
+	ret void
+}
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 7 => 3
+; STAT: 2 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: 0, 7, 1, 2, 5, 11, 6, 10, 8, 9, 13, 14, 12, 3, 4,
+; BBIT: 7, 1, 2, 5, 11, 6, 10, 8, 9, 13, 14, 12,
+
+; RNIT: 0, 7 => 3, 3, 4,
+; RNIT: 7, 1, 2, 5, 11, 6, 10, 8, 9, 13, 14, 12,
diff --git a/final/test/Analysis/RegionInfo/loop_with_condition.ll b/final/test/Analysis/RegionInfo/loop_with_condition.ll
new file mode 100644
index 00000000000..d1d68982eec
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/loop_with_condition.ll
@@ -0,0 +1,46 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition() nounwind {
+0:
+        br label %"1"
+1:
+	br i1 1, label %"6", label %"2"
+2:
+	br i1 1, label %"3", label %"4"
+3:
+	br label %"5"
+4:
+	br label %"5"
+5:
+        br label %"8"
+8:
+        br i1 1, label %"7", label %"9"
+9:
+        br label %"2"
+7:
+        br label %"6"
+6:
+	ret void
+}
+
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 1 => 6
+; CHECK-NEXT:   [2] 2 => 7
+; CHECK-NEXT:     [3] 2 => 5
+; STAT: 4 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: 0, 1, 6, 2, 3, 5, 8, 7, 9, 4,
+; BBIT: 1, 2, 3, 5, 8, 7, 9, 4,
+; BBIT: 2, 3, 5, 8, 9, 4,
+; BBIT: 2, 3, 4,
+
+; RNIT: 0, 1 => 6, 6,
+; RNIT: 1, 2 => 7, 7,
+; RNIT: 2 => 5, 5, 8, 9,
+; RNIT: 2, 3, 4,
diff --git a/final/test/Analysis/RegionInfo/loops_1.ll b/final/test/Analysis/RegionInfo/loops_1.ll
new file mode 100644
index 00000000000..d4bf3cc5011
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/loops_1.ll
@@ -0,0 +1,40 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define internal fastcc zeroext i8 @loops_1() nounwind {
+entry:
+  br i1 1, label %outer , label %a
+
+a:
+  br label %body
+
+outer:
+  br label %body
+
+body:
+  br i1 1, label %land, label %if
+
+land:
+  br i1 1, label %exit, label %end
+
+exit:
+  br i1 1, label %if, label %end
+
+if:
+  br label %outer
+
+end:
+  ret i8 1
+}
+; CHECK-NOT: =>
+; CHECK: [0] entry => <Function Return>
+; CHECK-NEXT: [1] entry => end
+; STAT: 2 region - The # of regions
+
+; BBIT: entry, outer, body, land, exit, if, end, a,
+; BBIT: entry, outer, body, land, exit, if, a,
+
+; RNIT: entry => end, end,
+; RNIT: entry, outer, body, land, exit, if, a,
diff --git a/final/test/Analysis/RegionInfo/loops_2.ll b/final/test/Analysis/RegionInfo/loops_2.ll
new file mode 100644
index 00000000000..07aa7c31101
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/loops_2.ll
@@ -0,0 +1,49 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @meread_() nounwind {
+entry:
+  br label %bb23
+
+bb23:
+  br label %bb.i
+
+bb.i:                                             ; preds = %bb.i, %bb54
+  br label %pflini_.exit
+
+pflini_.exit:                                     ; preds = %bb.i
+  br label %bb58thread-split
+
+bb58thread-split:                                 ; preds = %bb64, %bb61, %pflini_.exit
+  br label %bb58
+
+bb58:                                             ; preds = %bb60, %bb58thread-split
+  br i1 1, label %bb59, label %bb23
+
+bb59:                                             ; preds = %bb58
+  switch i32 1, label %bb60 [
+    i32 1, label %l98
+  ]
+
+bb60:                                             ; preds = %bb59
+  br i1 1, label %bb61, label %bb58
+
+bb61:                                             ; preds = %bb60
+  br label %bb58thread-split
+
+l98:                                   ; preds = %bb69, %bb59
+  ret void
+}
+; CHECK-NOT: =>
+; CHECK: [0] entry => <Function Return>
+; CHECK: [1] bb23 => l98
+; STAT: 2 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: entry, bb23, bb.i, pflini_.exit, bb58thread-split, bb58, bb59, bb60, bb61, l98,
+; BBIT: bb23, bb.i, pflini_.exit, bb58thread-split, bb58, bb59, bb60, bb61,
+
+; RNIT: entry, bb23 => l98, l98,
+; RNIT: bb23, bb.i, pflini_.exit, bb58thread-split, bb58, bb59, bb60, bb61,
diff --git a/final/test/Analysis/RegionInfo/mix_1.ll b/final/test/Analysis/RegionInfo/mix_1.ll
new file mode 100644
index 00000000000..829c157c2c6
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/mix_1.ll
@@ -0,0 +1,69 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @a_linear_impl_fig_1() nounwind {
+0:
+
+	br i1 1, label %"1", label %"15"
+1:
+ 	switch i32 0, label %"2" [ i32 0, label %"3"
+                                  i32 1, label %"7"]
+2:
+	br label %"4"
+3:
+	br label %"5"
+4:
+	br label %"6"
+5:
+	br label %"6"
+6:
+	br label %"7"
+7:
+	br label %"15"
+15:
+	br label %"8"
+8:
+	br label %"16"
+16:
+        br label %"9"
+9:
+	br i1 1, label %"10", label %"11"
+11:
+	br i1 1, label %"13", label %"12"
+13:
+	br label %"14"
+12:
+	br label %"14"
+14:
+	br label %"8"
+10:
+        br label %"17"
+17:
+        br label %"18"
+18:
+        ret void
+}
+
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 0 => 15
+; CHECK-NEXT:  [2] 1 => 7
+; CHECK-NEXT: [1] 8 => 10
+; CHECK-NEXT:  [2] 11 => 14
+; STAT: 5 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: 0, 1, 2, 4, 6, 7, 15, 8, 16, 9, 10, 17, 18, 11, 13, 14, 12, 3, 5,
+; BBIT: 0, 1, 2, 4, 6, 7, 3, 5,
+; BBIT: 1, 2, 4, 6, 3, 5,
+; BBIT: 8, 16, 9, 11, 13, 14, 12,
+; BBIT: 11, 13, 12,
+
+; RNIT: 0 => 15, 15, 8 => 10, 10, 17, 18,
+; RNIT: 0, 1 => 7, 7,
+; RNIT: 1, 2, 4, 6, 3, 5,
+; RNIT: 8, 16, 9, 11 => 14, 14,
+; RNIT: 11, 13, 12,
diff --git a/final/test/Analysis/RegionInfo/multiple_exiting_edge.ll b/final/test/Analysis/RegionInfo/multiple_exiting_edge.ll
new file mode 100644
index 00000000000..7bc0e4607d6
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/multiple_exiting_edge.ll
@@ -0,0 +1,38 @@
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @normal_condition_0() nounwind {
+bb38:                                             ; preds = %bb34, %bb34, %bb37
+  switch i32 undef, label %bb42 [
+    i32 67, label %bb42
+    i32 90, label %bb41
+  ]
+bb41:                                             ; preds = %bb38
+  br label %bb42
+bb42:                                             ; preds = %bb38, %bb38, %bb41
+  ret void
+}
+
+; BBIT: bb38, bb42, bb41,
+; BBIT: bb38, bb41,
+
+; RNIT: bb38 => bb42, bb42,
+; RNIT: bb38, bb41,
+
+define void @normal_condition_1() nounwind {
+bb38:                                             ; preds = %bb34, %bb34, %bb37
+  switch i32 undef, label %bb41 [
+    i32 67, label %bb42
+    i32 90, label %bb42
+  ]
+bb41:                                             ; preds = %bb38
+  br label %bb42
+bb42:                                             ; preds = %bb38, %bb38, %bb41
+  ret void
+}
+
+; BBIT: bb38, bb41, bb42,
+; BBIT: bb38, bb41,
+
+; RNIT: bb38 => bb42, bb42,
+; RNIT: bb38, bb41,
diff --git a/final/test/Analysis/RegionInfo/nested_loops.ll b/final/test/Analysis/RegionInfo/nested_loops.ll
new file mode 100644
index 00000000000..9d8c4558f04
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/nested_loops.ll
@@ -0,0 +1,33 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define internal fastcc zeroext i8 @handle_compress() nounwind {
+entry:
+  br label %outer
+
+outer:
+  br label %body
+
+body:
+  br i1 1, label %exit172, label %end
+
+exit172:
+  br i1 1, label %end, label %outer
+
+end:
+  ret i8 1
+}
+; CHECK-NOT: =>
+; CHECK: [0] entry => <Function Return>
+; CHECK-NEXT: [1] outer => end
+
+; STAT: 2 region - The # of regions
+
+; BBIT: entry, outer, body, exit172, end,
+; BBIT: outer, body, exit172,
+
+; RNIT: entry, outer => end, end,
+; RNIT: outer, body, exit172,
diff --git a/final/test/Analysis/RegionInfo/next.ll b/final/test/Analysis/RegionInfo/next.ll
new file mode 100644
index 00000000000..d986387099c
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/next.ll
@@ -0,0 +1,49 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @MAIN__() nounwind {
+entry:
+  br label %__label_002001.outer
+
+__label_002001.outer:                             ; preds = %bb236, %bb92
+  br label %__label_002001
+
+__label_002001:                                   ; preds = %bb229, %__label_002001.outer
+  br i1  1, label %bb93, label %__label_000020
+
+bb93:                                             ; preds = %__label_002001
+  br i1  1, label %__label_000020, label %bb197
+
+bb197:                                            ; preds = %bb193
+  br i1  1, label %bb229, label %bb224
+
+bb224:                                            ; preds = %bb223, %bb227
+  br i1  1, label %bb229, label %bb224
+
+bb229:                                            ; preds = %bb227, %bb223
+  br i1  1, label %__label_002001, label %__label_002001.outer
+
+__label_000020:                                   ; preds = %__label_002001, %bb194
+  ret void
+}
+
+; CHECK-NOT: =>
+; CHECK: [0] entry => <Function Return>
+; CHECK-NEXT:  [1] __label_002001.outer => __label_000020
+; CHECK-NEXT;      [2] bb197 => bb229
+; CHECK-NEXT;            [3] bb224 => bb229
+
+; STAT: 4 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: entry, __label_002001.outer, __label_002001, bb93, __label_000020, bb197, bb229, bb224,
+; BBIT: __label_002001.outer, __label_002001, bb93, bb197, bb229, bb224,
+; BBIT: bb197, bb224,
+; BBIT: bb224,
+
+; RNIT: entry, __label_002001.outer => __label_000020, __label_000020,
+; RNIT: __label_002001.outer, __label_002001, bb93, bb197 => bb229, bb229,
+; RNIT: bb197, bb224 => bb229,
+; RNIT: bb224,
diff --git a/final/test/Analysis/RegionInfo/paper.ll b/final/test/Analysis/RegionInfo/paper.ll
new file mode 100644
index 00000000000..00b544bc691
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/paper.ll
@@ -0,0 +1,55 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define void @a_linear_impl_fig_1() nounwind {
+0:
+        br label %"1"
+1:
+	br label %"2"
+2:
+	br label %"3"
+3:
+	br i1 1, label %"13", label %"4"
+4:
+	br i1 1, label %"5", label %"1"
+5:
+	br i1 1, label %"8", label %"6"
+6:
+	br i1 1, label %"7", label %"4"
+7:
+	ret void
+8:
+	br i1 1, label %"9", label %"1"
+9:
+	br label %"10"
+10:
+	br i1 1, label %"12", label %"11"
+11:
+	br i1 1, label %"9", label %"8"
+13:
+	br i1 1, label %"2", label %"1"
+12:
+ 	switch i32 0, label %"1" [ i32 0, label %"9"
+                                  i32 1, label %"8"]
+}
+
+; CHECK-NOT: =>
+; CHECK: [0] 0 => <Function Return>
+; CHECK-NEXT: [1] 1 => 7
+; CHECK-NEXT:   [2] 1 => 4
+; CHECK-NEXT:   [2] 8 => 1
+
+; STAT: 4 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: 0, 1, 2, 3, 13, 4, 5, 8, 9, 10, 12, 11, 6, 7,
+; BBIT: 1, 2, 3, 13, 4, 5, 8, 9, 10, 12, 11, 6,
+; BBIT: 1, 2, 3, 13,
+; BBIT: 8, 9, 10, 12, 11,
+
+; RNIT: 0, 1 => 7, 7,
+; RNIT: 1 => 4, 4, 5, 8 => 1, 6,
+; RNIT: 1, 2, 3, 13,
+; RNIT: 8, 9, 10, 12, 11,
diff --git a/final/test/Analysis/RegionInfo/two_loops_same_header.ll b/final/test/Analysis/RegionInfo/two_loops_same_header.ll
new file mode 100644
index 00000000000..a97182b81a2
--- /dev/null
+++ b/final/test/Analysis/RegionInfo/two_loops_same_header.ll
@@ -0,0 +1,46 @@
+; RUN: opt -regions -analyze < %s | FileCheck %s
+; RUN: opt -regions -stats < %s |& FileCheck -check-prefix=STAT %s
+; RUN: opt -regions -print-region-style=bb  -analyze < %s |& FileCheck -check-prefix=BBIT %s
+; RUN: opt -regions -print-region-style=rn  -analyze < %s |& FileCheck -check-prefix=RNIT %s
+
+define internal fastcc zeroext i8 @handle_compress() nounwind {
+entry:
+  br label %outer
+
+outer:
+  br label %body
+
+body:
+  br i1 1, label %else, label %true77
+
+true77:
+  br i1 1, label %then83, label %else
+
+then83:
+  br label %outer
+
+else:
+  br label %else106
+
+else106:
+  br i1 1, label %end, label %outer
+
+end:
+  ret i8 1
+}
+
+; CHECK-NOT: =>
+; CHECK: [0] entry => <Function Return>
+; CHECK-NEXT: [1] outer => end
+; CHECK-NEXT:   [2] outer => else
+
+; STAT: 3 region - The # of regions
+; STAT: 1 region - The # of simple regions
+
+; BBIT: entry, outer, body, else, else106, end, true77, then83,
+; BBIT: outer, body, else, else106, true77, then83,
+; BBIT: outer, body, true77, then83,
+
+; RNIT: entry, outer => end, end,
+; RNIT: outer => else, else, else106,
+; RNIT: outer, body, true77, then83,
diff --git a/final/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll b/final/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
new file mode 100644
index 00000000000..7ff130f201b
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:   -scalar-evolution-max-iterations=0 | grep {Loop %bb: backedge-taken count is 100}
+; PR1533
+
+@array = weak global [101 x i32] zeroinitializer, align 32		; <[100 x i32]*> [#uses=1]
+
+define void @loop(i32 %x) {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.01.0 = phi i32 [ 100, %entry ], [ %tmp4, %bb ]		; <i32> [#uses=2]
+	%tmp1 = getelementptr [101 x i32]* @array, i32 0, i32 %i.01.0		; <i32*> [#uses=1]
+	store i32 %x, i32* %tmp1
+	%tmp4 = add i32 %i.01.0, -1		; <i32> [#uses=2]
+	%tmp7 = icmp sgt i32 %tmp4, -1		; <i1> [#uses=1]
+	br i1 %tmp7, label %bb, label %return
+
+return:		; preds = %bb
+	ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll b/final/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll
new file mode 100644
index 00000000000..e67e4d00d62
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2007-08-06-MisinterpretBranch.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -indvars -adce -simplifycfg -S | grep "icmp s"
+; PR1598
+
+define i32 @f(i32 %a, i32 %b, i32 %x, i32 %y) {
+entry:
+	%tmp3 = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %tmp3, label %return, label %bb
+
+bb:		; preds = %bb, %entry
+	%x_addr.0 = phi i32 [ %tmp6, %bb ], [ %x, %entry ]		; <i32> [#uses=1]
+	%tmp6 = add i32 %x_addr.0, 1		; <i32> [#uses=3]
+	%tmp9 = icmp slt i32 %tmp6, %y		; <i1> [#uses=1]
+	br i1 %tmp9, label %bb, label %return
+
+return:		; preds = %bb, %entry
+	%x_addr.1 = phi i32 [ %x, %entry ], [ %tmp6, %bb ]		; <i32> [#uses=1]
+	ret i32 %x_addr.1
+}
diff --git a/final/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll b/final/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
new file mode 100644
index 00000000000..ab96243ef1f
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -scalar-evolution -analyze | grep {Loop %bb: backedge-taken count is (-1 + (-1 \\* %x) + %y)}
+; PR1597
+
+define i32 @f(i32 %x, i32 %y) {
+entry:
+        %tmp63 = icmp ult i32 %x, %y            ; <i1> [#uses=1]
+        br i1 %tmp63, label %bb.preheader, label %bb8
+
+bb.preheader:           ; preds = %entry
+        br label %bb
+
+bb:             ; preds = %bb3, %bb.preheader
+        %x_addr.0 = phi i32 [ %tmp2, %bb3 ], [ %x, %bb.preheader ]              ; <i32> [#uses=1]
+        %tmp2 = add i32 %x_addr.0, 1            ; <i32> [#uses=3]
+        br label %bb3
+
+bb3:            ; preds = %bb
+        %tmp6 = icmp ult i32 %tmp2, %y          ; <i1> [#uses=1]
+        br i1 %tmp6, label %bb, label %bb8.loopexit
+
+bb8.loopexit:           ; preds = %bb3
+        br label %bb8
+
+bb8:            ; preds = %bb8.loopexit, %entry
+        %x_addr.1 = phi i32 [ %x, %entry ], [ %tmp2, %bb8.loopexit ]            ; <i32> [#uses=1]
+        br label %return
+
+return:         ; preds = %bb8
+        ret i32 %x_addr.1
+}
diff --git a/final/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll b/final/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
new file mode 100644
index 00000000000..b678fee22cb
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 13}
+; PR1706
+
+define i32 @f() {
+entry:
+	br label %bb5
+
+bb:		; preds = %bb5
+	%tmp2 = shl i32 %j.0, 1		; <i32> [#uses=1]
+	%tmp4 = add i32 %i.0, 268435456		; <i32> [#uses=1]
+	br label %bb5
+
+bb5:		; preds = %bb, %entry
+	%j.0 = phi i32 [ 1, %entry ], [ %tmp2, %bb ]		; <i32> [#uses=2]
+	%i.0 = phi i32 [ -1879048192, %entry ], [ %tmp4, %bb ]		; <i32> [#uses=2]
+	%tmp7 = icmp slt i32 %i.0, 1610612736		; <i1> [#uses=1]
+	br i1 %tmp7, label %bb, label %return
+
+return:		; preds = %bb5
+	ret i32 %j.0
+}
diff --git a/final/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll b/final/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll
new file mode 100644
index 00000000000..514920f0f6f
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -indvars -S | grep printd | grep 1206807378
+; PR1798
+
+declare void @printd(i32)
+
+define i32 @test() {
+entry:
+	br label %bb6
+
+bb:		; preds = %bb6
+	%tmp3 = add i32 %x.0, %i.0		; <i32> [#uses=1]
+	%tmp5 = add i32 %i.0, 1		; <i32> [#uses=1]
+	br label %bb6
+
+bb6:		; preds = %bb, %entry
+	%i.0 = phi i32 [ 0, %entry ], [ %tmp5, %bb ]		; <i32> [#uses=3]
+	%x.0 = phi i32 [ 0, %entry ], [ %tmp3, %bb ]		; <i32> [#uses=3]
+	%tmp8 = icmp slt i32 %i.0, 123456789		; <i1> [#uses=1]
+	br i1 %tmp8, label %bb, label %bb10
+
+bb10:		; preds = %bb6
+	call void @printd(i32 %x.0)
+	ret i32 0
+}
diff --git a/final/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll b/final/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll
new file mode 100644
index 00000000000..c12721d82f0
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; PR1810
+
+define void @fun() {
+entry:
+        br label %header
+header:
+        %i = phi i32 [ 1, %entry ], [ %i.next, %body ]
+        %cond = icmp eq i32 %i, 10
+        br i1 %cond, label %exit, label %body
+body:
+        %a = mul i32 %i, 5
+        %b = or i32 %a, 1
+        %i.next = add i32 %i, 1
+        br label %header
+exit:        
+        ret void
+}
+
+; CHECK: -->  %b
+
diff --git a/final/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll b/final/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
new file mode 100644
index 00000000000..fe3a7f4191d
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -scalar-evolution -analyze | grep {Loop %header: backedge-taken count is (0 smax %n)}
+
+define void @foo(i32 %n) {
+entry:
+	br label %header
+header:
+	%i = phi i32 [ 0, %entry ], [ %i.inc, %next ]
+	%cond = icmp sgt i32 %n, %i
+	br i1 %cond, label %next, label %return
+next:
+        %i.inc = add i32 %i, 1
+	br label %header
+return:
+	ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll b/final/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
new file mode 100644
index 00000000000..4f14a0d9a5e
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -scalar-evolution -analyze | grep {Loop %loop: backedge-taken count is (100 + (-100 smax %n))}
+; PR2002
+
+define void @foo(i8 %n) {
+entry:
+	br label %loop
+loop:
+	%i = phi i8 [ -100, %entry ], [ %i.inc, %next ]
+	%cond = icmp slt i8 %i, %n
+	br i1 %cond, label %next, label %return
+next:
+        %i.inc = add i8 %i, 1
+	br label %loop
+return:
+	ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll b/final/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
new file mode 100644
index 00000000000..52c7985045d
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -analyze -scalar-evolution | grep umax
+; PR2003
+
+define i32 @foo(i32 %n) {
+entry:
+        br label %header
+header:
+        %i = phi i32 [ 100, %entry ], [ %i.inc, %next ]
+        %cond = icmp ult i32 %i, %n
+        br i1 %cond, label %next, label %return
+next:
+        %i.inc = add i32 %i, 1
+        br label %header
+return:
+        ret i32 %i
+}
+
diff --git a/final/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll b/final/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
new file mode 100644
index 00000000000..bcc124d1ecd
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 61}
+; PR2364
+
+define i32 @func_6() nounwind  {
+entry:
+	br label %bb5
+
+bb:		; preds = %bb5
+	%tmp2 = add i32 %i.0, 1		; <i32> [#uses=1]
+	%tmp4 = add i8 %x.0, -4		; <i8> [#uses=1]
+	br label %bb5
+
+bb5:		; preds = %bb, %entry
+	%x.0 = phi i8 [ 0, %entry ], [ %tmp4, %bb ]		; <i8> [#uses=2]
+	%i.0 = phi i32 [ 0, %entry ], [ %tmp2, %bb ]		; <i32> [#uses=2]
+	%tmp7 = icmp eq i8 %x.0, 12		; <i1> [#uses=1]
+	br i1 %tmp7, label %return, label %bb
+
+return:		; preds = %bb5
+	ret i32 %i.0
+}
diff --git a/final/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll b/final/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll
new file mode 100644
index 00000000000..d503329292c
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -analyze -scalar-evolution 2>/dev/null
+; PR2433
+
+define i32 @main1(i32 %argc, i8** %argv) nounwind  {
+entry:
+	br i1 false, label %bb10, label %bb23
+
+bb10:		; preds = %bb10, %entry
+	%accum.03 = phi i64 [ %tmp14, %bb10 ], [ 0, %entry ]		; <i64> [#uses=1]
+	%i.02 = phi i32 [ %tmp16, %bb10 ], [ 0, %entry ]		; <i32> [#uses=1]
+	%d.1.01 = phi i64 [ %tmp5.i, %bb10 ], [ 0, %entry ]		; <i64> [#uses=1]
+	%tmp5.i = add i64 %d.1.01, 1		; <i64> [#uses=2]
+	%tmp14 = add i64 %accum.03, %tmp5.i		; <i64> [#uses=2]
+	%tmp16 = add i32 %i.02, 1		; <i32> [#uses=2]
+	%tmp20 = icmp slt i32 %tmp16, 0		; <i1> [#uses=1]
+	br i1 %tmp20, label %bb10, label %bb23
+
+bb23:		; preds = %bb10, %entry
+	%accum.0.lcssa = phi i64 [ 0, %entry ], [ %tmp14, %bb10 ]		; <i64> [#uses=0]
+	ret i32 0
+}
+
+define i32 @main2(i32 %argc, i8** %argv) {
+entry:
+	%tmp8 = tail call i32 @atoi( i8* null ) nounwind readonly 		; <i32> [#uses=1]
+	br i1 false, label %bb9, label %bb21
+
+bb9:		; preds = %bb9, %entry
+	%accum.03 = phi i64 [ %tmp12, %bb9 ], [ 0, %entry ]		; <i64> [#uses=1]
+	%i.02 = phi i32 [ %tmp14, %bb9 ], [ 0, %entry ]		; <i32> [#uses=1]
+	%d.1.01 = phi i64 [ %tmp4.i, %bb9 ], [ 0, %entry ]		; <i64> [#uses=1]
+	%tmp4.i = add i64 %d.1.01, 1		; <i64> [#uses=2]
+	%tmp12 = add i64 %accum.03, %tmp4.i		; <i64> [#uses=2]
+	%tmp14 = add i32 %i.02, 1		; <i32> [#uses=2]
+	%tmp18 = icmp slt i32 %tmp14, %tmp8		; <i1> [#uses=1]
+	br i1 %tmp18, label %bb9, label %bb21
+
+bb21:		; preds = %bb9, %entry
+	%accum.0.lcssa = phi i64 [ 0, %entry ], [ %tmp12, %bb9 ]		; <i64> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @atoi(i8*) nounwind readonly 
diff --git a/final/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll b/final/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
new file mode 100644
index 00000000000..9db9b71c7c7
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -analyze -scalar-evolution |& not grep smax
+; PR2261
+
+@lut = common global [256 x i8] zeroinitializer, align 32		; <[256 x i8]*> [#uses=1]
+
+define void @foo(i32 %count, i32* %srcptr, i32* %dstptr) nounwind  {
+entry:
+	icmp sgt i32 %count, 0		; <i1>:0 [#uses=1]
+	br i1 %0, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%j.01 = phi i32 [ %8, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=1]
+	load i32* %srcptr, align 4		; <i32>:1 [#uses=2]
+	and i32 %1, 255		; <i32>:2 [#uses=1]
+	and i32 %1, -256		; <i32>:3 [#uses=1]
+	getelementptr [256 x i8]* @lut, i32 0, i32 %2		; <i8*>:4 [#uses=1]
+	load i8* %4, align 1		; <i8>:5 [#uses=1]
+	zext i8 %5 to i32		; <i32>:6 [#uses=1]
+	or i32 %6, %3		; <i32>:7 [#uses=1]
+	store i32 %7, i32* %dstptr, align 4
+	add i32 %j.01, 1		; <i32>:8 [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	icmp slt i32 %8, %count		; <i1>:9 [#uses=1]
+	br i1 %9, label %bb, label %bb1.return_crit_edge
+
+bb1.return_crit_edge:		; preds = %bb1
+	br label %return
+
+return:		; preds = %bb1.return_crit_edge, %entry
+	ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll b/final/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
new file mode 100644
index 00000000000..18476655525
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -analyze -scalar-evolution |& not grep smax
+; PR2070
+
+define i32 @a(i32 %x) nounwind  {
+entry:
+	icmp sgt i32 %x, 1		; <i1>:0 [#uses=1]
+	br i1 %0, label %bb.nph, label %bb2
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%z.02 = phi i32 [ %1, %bb1 ], [ 1, %bb.nph ]		; <i32> [#uses=1]
+	%i.01 = phi i32 [ %2, %bb1 ], [ 1, %bb.nph ]		; <i32> [#uses=2]
+	mul i32 %z.02, %i.01		; <i32>:1 [#uses=2]
+	add i32 %i.01, 1		; <i32>:2 [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	icmp slt i32 %2, %x		; <i1>:3 [#uses=1]
+	br i1 %3, label %bb, label %bb1.bb2_crit_edge
+
+bb1.bb2_crit_edge:		; preds = %bb1
+	%.lcssa = phi i32 [ %1, %bb1 ]		; <i32> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb1.bb2_crit_edge, %entry
+	%z.0.lcssa = phi i32 [ %.lcssa, %bb1.bb2_crit_edge ], [ 1, %entry ]		; <i32> [#uses=1]
+	ret i32 %z.0.lcssa
+}
diff --git a/final/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll b/final/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
new file mode 100644
index 00000000000..1865c059a99
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:   -scalar-evolution-max-iterations=0 | grep Unpredictable
+; PR2088
+
+define void @fun() {
+entry:
+        br label %loop
+loop:
+        %i = phi i8 [ 0, %entry ], [ %i.next, %loop ]
+        %i.next = add i8 %i, 4
+        %cond = icmp ne i8 %i.next, 6
+        br i1 %cond, label %loop, label %exit
+exit:
+        ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll b/final/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
new file mode 100644
index 00000000000..86e07ec41b9
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 113}
+; PR2088
+
+define void @fun() {
+entry:
+        br label %loop
+loop:
+        %i = phi i8 [ 0, %entry ], [ %i.next, %loop ]
+        %i.next = add i8 %i, 18
+        %cond = icmp ne i8 %i.next, 4
+        br i1 %cond, label %loop, label %exit
+exit:
+        ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll b/final/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll
new file mode 100644
index 00000000000..75bd634b3ef
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:   -scalar-evolution-max-iterations=0 | FileCheck %s
+; PR2607
+
+define i32 @_Z1aj(i32 %j) nounwind  {
+entry:
+	icmp sgt i32 0, %j		; <i1>:0 [#uses=1]
+	br i1 %0, label %bb.preheader, label %return
+
+bb.preheader:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb, %bb.preheader
+	%i.01 = phi i32 [ %1, %bb ], [ 0, %bb.preheader ]		; <i32> [#uses=1]
+	add i32 %i.01, -1		; <i32>:1 [#uses=3]
+	icmp sgt i32 %1, %j		; <i1>:2 [#uses=1]
+	br i1 %2, label %bb, label %return.loopexit
+
+return.loopexit:		; preds = %bb
+	br label %return
+
+return:		; preds = %return.loopexit, %entry
+	%i.0.lcssa = phi i32 [ 0, %entry ], [ %1, %return.loopexit ]		; <i32> [#uses=1]
+	ret i32 %i.0.lcssa
+}
+
+; CHECK: backedge-taken count is (-1 + (-1 * %j))
+
diff --git a/final/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll b/final/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
new file mode 100644
index 00000000000..3542ad2a41e
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:   -scalar-evolution-max-iterations=0 | FileCheck %s
+; PR2607
+
+define i32 @b(i32 %x, i32 %y) nounwind {
+entry:
+	%cmp2 = icmp slt i32 %y, %x
+	%cond3 = select i1 %cmp2, i32 %y, i32 %x
+	%cmp54 = icmp slt i32 %cond3, -2147483632
+	br i1 %cmp54, label %forinc, label %afterfor
+
+forinc:		; preds = %forinc, %entry
+	%j.01 = phi i32 [ %dec, %forinc ], [ -2147483632, %entry ]
+	%dec = add i32 %j.01, -1
+	%cmp = icmp slt i32 %y, %x
+	%cond = select i1 %cmp, i32 %y, i32 %x
+	%cmp5 = icmp sgt i32 %dec, %cond
+	br i1 %cmp5, label %forinc, label %afterfor
+
+afterfor:		; preds = %forinc, %entry
+	%j.0.lcssa = phi i32 [ -2147483632, %entry ], [ %dec, %forinc ]
+	ret i32 %j.0.lcssa
+}
+
+; CHECK: backedge-taken count is (-2147483632 + ((-1 + (-1 * %{{[xy]}})) smax (-1 + (-1 * %{{[xy]}}))))
+
diff --git a/final/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll b/final/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll
new file mode 100644
index 00000000000..3b31d797cf4
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:   -scalar-evolution-max-iterations=0 | FileCheck %s
+; PR2621
+
+define i32 @a() nounwind  {
+entry:
+	br label %bb1
+
+bb:
+	trunc i32 %i.0 to i16
+	add i16 %0, %x16.0
+	add i32 %i.0, 1
+	br label %bb1
+
+bb1:
+	%i.0 = phi i32 [ 0, %entry ], [ %2, %bb ]
+	%x16.0 = phi i16 [ 0, %entry ], [ %1, %bb ]
+	icmp ult i32 %i.0, 888888
+	br i1 %3, label %bb, label %bb2
+
+bb2:
+	zext i16 %x16.0 to i32
+	ret i32 %4
+}
+
+; CHECK: Exits: 20028
+
diff --git a/final/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll b/final/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll
new file mode 100644
index 00000000000..b296a19716c
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll
@@ -0,0 +1,58 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:   -scalar-evolution-max-iterations=0 | FileCheck %s
+; PR2621
+
+define i32 @a() nounwind  {
+entry:
+	br label %bb1
+
+bb:		; preds = %bb1
+	add i16 %x17.0, 1		; <i16>:0 [#uses=2]
+	add i16 %0, %x16.0		; <i16>:1 [#uses=2]
+	add i16 %1, %x15.0		; <i16>:2 [#uses=2]
+	add i16 %2, %x14.0		; <i16>:3 [#uses=2]
+	add i16 %3, %x13.0		; <i16>:4 [#uses=2]
+	add i16 %4, %x12.0		; <i16>:5 [#uses=2]
+	add i16 %5, %x11.0		; <i16>:6 [#uses=2]
+	add i16 %6, %x10.0		; <i16>:7 [#uses=2]
+	add i16 %7, %x9.0		; <i16>:8 [#uses=2]
+	add i16 %8, %x8.0		; <i16>:9 [#uses=2]
+	add i16 %9, %x7.0		; <i16>:10 [#uses=2]
+	add i16 %10, %x6.0		; <i16>:11 [#uses=2]
+	add i16 %11, %x5.0		; <i16>:12 [#uses=2]
+	add i16 %12, %x4.0		; <i16>:13 [#uses=2]
+	add i16 %13, %x3.0		; <i16>:14 [#uses=2]
+	add i16 %14, %x2.0		; <i16>:15 [#uses=2]
+	add i16 %15, %x1.0		; <i16>:16 [#uses=1]
+	add i32 %i.0, 1		; <i32>:17 [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%x2.0 = phi i16 [ 0, %entry ], [ %15, %bb ]		; <i16> [#uses=1]
+	%x3.0 = phi i16 [ 0, %entry ], [ %14, %bb ]		; <i16> [#uses=1]
+	%x4.0 = phi i16 [ 0, %entry ], [ %13, %bb ]		; <i16> [#uses=1]
+	%x5.0 = phi i16 [ 0, %entry ], [ %12, %bb ]		; <i16> [#uses=1]
+	%x6.0 = phi i16 [ 0, %entry ], [ %11, %bb ]		; <i16> [#uses=1]
+	%x7.0 = phi i16 [ 0, %entry ], [ %10, %bb ]		; <i16> [#uses=1]
+	%x8.0 = phi i16 [ 0, %entry ], [ %9, %bb ]		; <i16> [#uses=1]
+	%x9.0 = phi i16 [ 0, %entry ], [ %8, %bb ]		; <i16> [#uses=1]
+	%x10.0 = phi i16 [ 0, %entry ], [ %7, %bb ]		; <i16> [#uses=1]
+	%x11.0 = phi i16 [ 0, %entry ], [ %6, %bb ]		; <i16> [#uses=1]
+	%x12.0 = phi i16 [ 0, %entry ], [ %5, %bb ]		; <i16> [#uses=1]
+	%x13.0 = phi i16 [ 0, %entry ], [ %4, %bb ]		; <i16> [#uses=1]
+	%x14.0 = phi i16 [ 0, %entry ], [ %3, %bb ]		; <i16> [#uses=1]
+	%x15.0 = phi i16 [ 0, %entry ], [ %2, %bb ]		; <i16> [#uses=1]
+	%x16.0 = phi i16 [ 0, %entry ], [ %1, %bb ]		; <i16> [#uses=1]
+	%x17.0 = phi i16 [ 0, %entry ], [ %0, %bb ]		; <i16> [#uses=1]
+	%i.0 = phi i32 [ 0, %entry ], [ %17, %bb ]		; <i32> [#uses=2]
+	%x1.0 = phi i16 [ 0, %entry ], [ %16, %bb ]		; <i16> [#uses=2]
+	icmp ult i32 %i.0, 8888		; <i1>:18 [#uses=1]
+	br i1 %18, label %bb, label %bb2
+
+bb2:		; preds = %bb1
+	zext i16 %x1.0 to i32		; <i32>:19 [#uses=1]
+	ret i32 %19
+}
+
+; CHECK: Exits: -19168
+
diff --git a/final/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll b/final/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll
new file mode 100644
index 00000000000..7722122117d
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -analyze -scalar-evolution
+; PR1827
+
+declare void @use(i32)
+
+define void @foo() {
+entry:
+	br label %loop_1
+
+loop_1:		; preds = %loop_1, %entry
+	%a = phi i32 [ 2, %entry ], [ %b, %loop_1 ]		; <i32> [#uses=2]
+	%c = phi i32 [ 5, %entry ], [ %d, %loop_1 ]		; <i32> [#uses=1]
+	%b = add i32 %a, 1		; <i32> [#uses=1]
+	%d = add i32 %c, %a		; <i32> [#uses=3]
+	%A = icmp ult i32 %d, 50		; <i1> [#uses=1]
+	br i1 %A, label %loop_1, label %endloop
+
+endloop:		; preds = %loop_1
+	call void @use(i32 %d)
+	ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll b/final/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll
new file mode 100644
index 00000000000..2e2aabc475a
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -analyze -scalar-evolution
+; PR2602
+
+define i32 @a() nounwind  {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%w.0 = phi i32 [ 0, %entry ], [ %tmp, %bb ]		; <i32> [#uses=2]
+	%e.0 = phi i32 [ 0, %entry ], [ %e.1, %bb ]		; <i32> [#uses=2]
+	%w.1 = add i32 0, %w.0		; <i32>:0 [#uses=1]
+	%tmp = add i32 %e.0, %w.0		; <i32>:1 [#uses=1]
+	%e.1 = add i32 %e.0, 1		; <i32>:2 [#uses=1]
+	%cond = icmp eq i32 %w.1, -1		; <i1>:3 [#uses=1]
+	br i1 %cond, label %return, label %bb
+
+return:		; preds = %bb
+	ret i32 undef
+}
diff --git a/final/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll b/final/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
new file mode 100644
index 00000000000..335bbaf9ad0
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -analyze -scalar-evolution |& \
+; RUN: grep {Loop %bb: backedge-taken count is (7 + (-1 \\* %argc))}
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	%0 = icmp ugt i32 %argc, 7		; <i1> [#uses=1]
+	br i1 %0, label %bb2, label %bb.nph
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb.nph, %bb1
+	%indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb1 ]		; <i32> [#uses=2]
+	%argc_addr.04 = add i32 %indvar, %argc		; <i32> [#uses=1]
+	tail call void (...)* @Test() nounwind
+	%1 = add i32 %argc_addr.04, 1		; <i32> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%phitmp = icmp ugt i32 %1, 7		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %phitmp, label %bb1.bb2_crit_edge, label %bb
+
+bb1.bb2_crit_edge:		; preds = %bb1
+	br label %bb2
+
+bb2:		; preds = %bb1.bb2_crit_edge, %entry
+	ret i32 0
+}
+
+declare void @Test(...)
diff --git a/final/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll b/final/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
new file mode 100644
index 00000000000..db527fefa97
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:  | grep {Loop %bb: Unpredictable backedge-taken count\\.}
+
+; ScalarEvolution can't compute a trip count because it doesn't know if
+; dividing by the stride will have a remainder. This could theoretically
+; be teaching it how to use a more elaborate trip count computation.
+
+define i32 @f(i32 %x) nounwind readnone {
+entry:
+	%0 = icmp ugt i32 %x, 4		; <i1> [#uses=1]
+	br i1 %0, label %bb.nph, label %bb2
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb.nph, %bb1
+	%indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb1 ]		; <i32> [#uses=2]
+	%tmp = mul i32 %indvar, -3		; <i32> [#uses=1]
+	%x_addr.04 = add i32 %tmp, %x		; <i32> [#uses=1]
+	%1 = add i32 %x_addr.04, -3		; <i32> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%2 = icmp ugt i32 %1, 4		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %2, label %bb, label %bb1.bb2_crit_edge
+
+bb1.bb2_crit_edge:		; preds = %bb1
+	%.lcssa = phi i32 [ %1, %bb1 ]		; <i32> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb1.bb2_crit_edge, %entry
+	%x_addr.0.lcssa = phi i32 [ %.lcssa, %bb1.bb2_crit_edge ], [ %x, %entry ]		; <i32> [#uses=1]
+	ret i32 %x_addr.0.lcssa
+}
diff --git a/final/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll b/final/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
new file mode 100644
index 00000000000..fa9f21af371
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -analyze -scalar-evolution |& grep {/u 3}
+; XFAIL: *
+
+; This is a tricky testcase for unsigned wrap detection which ScalarEvolution
+; doesn't yet know how to do.
+
+define i32 @f(i32 %x) nounwind readnone {
+entry:
+	%0 = icmp ugt i32 %x, 999		; <i1> [#uses=1]
+	br i1 %0, label %bb2, label %bb.nph
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb.nph, %bb1
+	%indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb1 ]		; <i32> [#uses=2]
+	%tmp = mul i32 %indvar, 3		; <i32> [#uses=1]
+	%x_addr.04 = add i32 %tmp, %x		; <i32> [#uses=1]
+	%1 = add i32 %x_addr.04, 3		; <i32> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%2 = icmp ugt i32 %1, 999		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %2, label %bb1.bb2_crit_edge, label %bb
+
+bb1.bb2_crit_edge:		; preds = %bb1
+	%.lcssa = phi i32 [ %1, %bb1 ]		; <i32> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb1.bb2_crit_edge, %entry
+	%x_addr.0.lcssa = phi i32 [ %.lcssa, %bb1.bb2_crit_edge ], [ %x, %entry ]		; <i32> [#uses=1]
+	ret i32 %x_addr.0.lcssa
+}
diff --git a/final/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll b/final/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
new file mode 100644
index 00000000000..25a0434b29e
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -analyze -scalar-evolution | grep {backedge-taken count is 255}
+
+define i32 @foo(i32 %x, i32 %y, i32* %lam, i32* %alp) nounwind {
+bb1.thread:
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%indvar = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]		; <i32> [#uses=4]
+	%i.0.reg2mem.0 = sub i32 255, %indvar		; <i32> [#uses=2]
+	%0 = getelementptr i32* %alp, i32 %i.0.reg2mem.0		; <i32*> [#uses=1]
+	%1 = load i32* %0, align 4		; <i32> [#uses=1]
+	%2 = getelementptr i32* %lam, i32 %i.0.reg2mem.0		; <i32*> [#uses=1]
+	store i32 %1, i32* %2, align 4
+	%3 = sub i32 254, %indvar		; <i32> [#uses=1]
+	%4 = icmp slt i32 %3, 0		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %4, label %bb2, label %bb1
+
+bb2:		; preds = %bb1
+	%tmp10 = mul i32 %indvar, %x		; <i32> [#uses=1]
+	%z.0.reg2mem.0 = add i32 %tmp10, %y		; <i32> [#uses=1]
+	%5 = add i32 %z.0.reg2mem.0, %x		; <i32> [#uses=1]
+	ret i32 %5
+}
diff --git a/final/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll b/final/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll
new file mode 100644
index 00000000000..12254e37dcc
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; CHECK: @f
+; CHECK: Loop %bb16.preheader: backedge-taken count is (-1 + %c.idx.val)
+
+define i32 @f(i32 %c.idx.val) {
+
+bb2:
+	%k.018 = add i32 %c.idx.val, -1		; <i32> [#uses=2]
+	%a14 = icmp slt i32 %k.018, 0		; <i1> [#uses=1]
+	br i1 %a14, label %bb19, label %bb16.preheader
+
+bb16.preheader:
+	%k.019 = phi i32 [ %k.0, %bb18 ], [ %k.018, %bb2 ]		; <i32> [#uses=5]
+	%x = phi i32 [ 0, %bb2 ], [ %x.1, %bb18]
+	br label %bb18
+
+bb18:		; preds = %bb18.loopexit
+	%x.1 = add i32 %x, 1
+	%k.0 = add i32 %k.019, -1		; <i32> [#uses=2]
+	%a107 = icmp slt i32 %k.0, 0		; <i1> [#uses=1]
+	br i1 %a107, label %bb18.bb19_crit_edge, label %bb16.preheader
+
+bb18.bb19_crit_edge:
+	ret i32 %x
+
+bb19:
+	ret i32 0
+
+}
diff --git a/final/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll b/final/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
new file mode 100644
index 00000000000..8152e988ffc
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -analyze -scalar-evolution |& \
+; RUN: grep {(((-1 \\* %i0) + (100005 smax %i0)) /u 5)}
+; XFAIL: *
+
+define i32 @foo0(i32 %i0) nounwind {
+entry:
+	br label %bb1
+
+bb:		; preds = %bb1
+	%0 = add i32 %j.0, 1		; <i32> [#uses=1]
+	%1 = add i32 %i.0, 5		; <i32> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%j.0 = phi i32 [ 0, %entry ], [ %0, %bb ]		; <i32> [#uses=2]
+	%i.0 = phi i32 [ %i0, %entry ], [ %1, %bb ]		; <i32> [#uses=2]
+	%2 = icmp sgt i32 %i.0, 100000		; <i1> [#uses=1]
+	br i1 %2, label %return, label %bb
+
+return:		; preds = %bb1
+	ret i32 %j.0
+}
diff --git a/final/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll b/final/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
new file mode 100644
index 00000000000..3eaa49212e7
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -analyze -scalar-evolution |& grep {/u 5}
+; XFAIL: *
+
+define i8 @foo0(i8 %i0) nounwind {
+entry:
+	br label %bb1
+
+bb:		; preds = %bb1
+	%0 = add i8 %j.0, 1		; <i8> [#uses=1]
+	%1 = add i8 %i.0, 5		; <i8> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%j.0 = phi i8 [ 0, %entry ], [ %0, %bb ]		; <i8> [#uses=2]
+	%i.0 = phi i8 [ %i0, %entry ], [ %1, %bb ]		; <i8> [#uses=2]
+	%2 = icmp sgt i8 %i.0, 100		; <i1> [#uses=1]
+	br i1 %2, label %return, label %bb
+
+return:		; preds = %bb1
+	ret i8 %j.0
+}
diff --git a/final/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll b/final/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
new file mode 100644
index 00000000000..cc2a2e42bc9
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -analyze -scalar-evolution | not grep {/u -1}
+; PR3275
+
+@g_16 = external global i16		; <i16*> [#uses=3]
+@.str = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+
+define void @func_15() nounwind {
+entry:
+	%0 = load i16* @g_16, align 2		; <i16> [#uses=1]
+	%1 = icmp sgt i16 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb2, label %bb.nph
+
+bb.nph:		; preds = %entry
+	%g_16.promoted = load i16* @g_16		; <i16> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%g_16.tmp.0 = phi i16 [ %g_16.promoted, %bb.nph ], [ %2, %bb1 ]		; <i16> [#uses=1]
+	%2 = add i16 %g_16.tmp.0, -1		; <i16> [#uses=3]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%3 = icmp sgt i16 %2, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb1.bb2_crit_edge, label %bb
+
+bb1.bb2_crit_edge:		; preds = %bb1
+	store i16 %2, i16* @g_16
+	br label %bb2
+
+bb2:		; preds = %bb1.bb2_crit_edge, %entry
+	br label %return
+
+return:		; preds = %bb2
+	ret void
+}
+
+declare i32 @main() nounwind
+
+declare i32 @printf(i8*, ...) nounwind
+
diff --git a/final/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll b/final/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
new file mode 100644
index 00000000000..c2e108aa9c0
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -analyze -scalar-evolution | grep {(trunc i} | not grep ext
+
+define i16 @test1(i8 %x) {
+  %A = sext i8 %x to i32
+  %B = trunc i32 %A to i16
+  ret i16 %B
+}
+
+define i8 @test2(i16 %x) {
+  %A = sext i16 %x to i32
+  %B = trunc i32 %A to i8
+  ret i8 %B
+}
+
+define i16 @test3(i16 %x) {
+  %A = sext i16 %x to i32
+  %B = trunc i32 %A to i16
+  ret i16 %B
+}
+
+define i16 @test4(i8 %x) {
+  %A = zext i8 %x to i32
+  %B = trunc i32 %A to i16
+  ret i16 %B
+}
+
+define i8 @test5(i16 %x) {
+  %A = zext i16 %x to i32
+  %B = trunc i32 %A to i8
+  ret i8 %B
+}
+
+define i16 @test6(i16 %x) {
+  %A = zext i16 %x to i32
+  %B = trunc i32 %A to i16
+  ret i16 %B
+}
diff --git a/final/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll b/final/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
new file mode 100644
index 00000000000..dc7bd29c57e
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -analyze -scalar-evolution | grep {count is 2}
+; PR3171
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+	%struct.Foo = type { i32 }
+	%struct.NonPod = type { [2 x %struct.Foo] }
+
+define void @_Z3foov() nounwind {
+entry:
+	%x = alloca %struct.NonPod, align 8		; <%struct.NonPod*> [#uses=2]
+	%0 = getelementptr %struct.NonPod* %x, i32 0, i32 0		; <[2 x %struct.Foo]*> [#uses=1]
+	%1 = getelementptr [2 x %struct.Foo]* %0, i32 1, i32 0		; <%struct.Foo*> [#uses=1]
+	br label %bb1.i
+
+bb1.i:		; preds = %bb2.i, %entry
+	%.0.i = phi %struct.Foo* [ %1, %entry ], [ %4, %bb2.i ]		; <%struct.Foo*> [#uses=2]
+	%2 = getelementptr %struct.NonPod* %x, i32 0, i32 0, i32 0		; <%struct.Foo*> [#uses=1]
+	%3 = icmp eq %struct.Foo* %.0.i, %2		; <i1> [#uses=1]
+	br i1 %3, label %_ZN6NonPodD1Ev.exit, label %bb2.i
+
+bb2.i:		; preds = %bb1.i
+	%4 = getelementptr %struct.Foo* %.0.i, i32 -1		; <%struct.Foo*> [#uses=1]
+	br label %bb1.i
+
+_ZN6NonPodD1Ev.exit:		; preds = %bb1.i
+	ret void
+}
+
diff --git a/final/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll b/final/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll
new file mode 100644
index 00000000000..a4358aa6321
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -analyze -scalar-evolution
+; PR4501
+
+define void @test() {
+entry:
+        %0 = load i16* undef, align 1
+        %1 = lshr i16 %0, 8
+        %2 = and i16 %1, 3
+        %3 = zext i16 %2 to i32
+        %4 = load i8* undef, align 1
+        %5 = lshr i8 %4, 4
+        %6 = and i8 %5, 1
+        %7 = zext i8 %6 to i32
+        %t1 = add i32 %3, %7
+        ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll b/final/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll
new file mode 100644
index 00000000000..aba0ce74678
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll
@@ -0,0 +1,24 @@
+; RUN: opt -indvars -scalar-evolution -analyze %s
+; This test checks if the SCEV analysis is printed out at all.
+; It failed once as the RequiredTransitive option was not implemented
+; correctly.
+
+define i32 @main() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvar1 = phi i64 [ %indvar.next2, %for.inc ], [ 0, %entry ] ; <i64> [#uses=3]
+  %exitcond = icmp ne i64 %indvar1, 1024          ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvar.next2 = add i64 %indvar1, 1             ; <i64> [#uses=1]
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret i32 0
+}
diff --git a/final/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll b/final/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
new file mode 100644
index 00000000000..9573aed1d73
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 100}
+; PR1101
+
+@A = weak global [1000 x i32] zeroinitializer, align 32         
+
+
+define void @test(i32 %N) {
+entry:
+        "alloca point" = bitcast i32 0 to i32           ; <i32> [#uses=0]
+        br label %bb3
+
+bb:             ; preds = %bb3
+        %tmp = getelementptr [1000 x i32]* @A, i32 0, i32 %i.0          ; <i32*> [#uses=1]
+        store i32 123, i32* %tmp
+        %tmp2 = add i32 %i.0, 1         ; <i32> [#uses=1]
+        br label %bb3
+
+bb3:            ; preds = %bb, %entry
+        %i.0 = phi i32 [ 2, %entry ], [ %tmp2, %bb ]            ; <i32> [#uses=3]
+        %SQ = mul i32 %i.0, %i.0
+        %tmp4 = mul i32 %i.0, 2
+        %tmp5 = sub i32 %SQ, %tmp4
+        %tmp3 = icmp sle i32 %tmp5, 9999          ; <i1> [#uses=1]
+        br i1 %tmp3, label %bb, label %bb5
+
+bb5:            ; preds = %bb3
+        br label %return
+
+return:         ; preds = %bb5
+        ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/and-xor.ll b/final/test/Analysis/ScalarEvolution/and-xor.ll
new file mode 100644
index 00000000000..17725735a7c
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/and-xor.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -scalar-evolution -analyze \
+; RUN:   | grep {\\-->  (zext} | count 2
+
+define i32 @foo(i32 %x) {
+  %n = and i32 %x, 255
+  %y = xor i32 %n, 255
+  ret i32 %y
+}
diff --git a/final/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll b/final/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll
new file mode 100644
index 00000000000..7eeb3084958
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -analyze -scalar-evolution
+; PR4537
+
+; ModuleID = 'b.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @test() {
+entry:
+	%0 = load i32** undef, align 8		; <i32*> [#uses=1]
+	%1 = ptrtoint i32* %0 to i64		; <i64> [#uses=1]
+	%2 = sub i64 undef, %1		; <i64> [#uses=1]
+	%3 = lshr i64 %2, 3		; <i64> [#uses=1]
+	%4 = trunc i64 %3 to i32		; <i32> [#uses=2]
+	br i1 undef, label %bb10, label %bb4.i
+
+bb4.i:		; preds = %bb4.i, %entry
+	%i.0.i6 = phi i32 [ %8, %bb4.i ], [ 0, %entry ]		; <i32> [#uses=2]
+	%5 = sub i32 %4, %i.0.i6		; <i32> [#uses=1]
+	%6 = sext i32 %5 to i64		; <i64> [#uses=1]
+	%7 = udiv i64 undef, %6		; <i64> [#uses=1]
+	%8 = add i32 %i.0.i6, 1		; <i32> [#uses=2]
+	%phitmp = icmp eq i64 %7, 0		; <i1> [#uses=1]
+	%.not.i = icmp sge i32 %8, %4		; <i1> [#uses=1]
+	%or.cond.i = or i1 %phitmp, %.not.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %bb10, label %bb4.i
+
+bb10:		; preds = %bb4.i, %entry
+	unreachable
+}
diff --git a/final/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll b/final/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll
new file mode 100644
index 00000000000..31b95e1470b
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll
@@ -0,0 +1,354 @@
+; RUN: opt < %s -iv-users
+; PR4538
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-freebsd8.0"
+module asm ".ident\09\22$FreeBSD: head/sys/kern/vfs_subr.c 195285 2009-07-02 14:19:33Z jamie $\22"
+module asm ".section set_pcpu, \22aw\22, @progbits"
+module asm ".previous"
+	type <{ [40 x i8] }>		; type %0
+	type <{ %struct.vm_object*, %struct.vm_object** }>		; type %1
+	type <{ %struct.vm_object* }>		; type %2
+	type <{ %struct.vm_page*, %struct.vm_page** }>		; type %3
+	type <{ %struct.pv_entry*, %struct.pv_entry** }>		; type %4
+	type <{ %struct.vm_reserv* }>		; type %5
+	type <{ %struct.bufobj*, %struct.bufobj** }>		; type %6
+	type <{ %struct.proc*, %struct.proc** }>		; type %7
+	type <{ %struct.thread*, %struct.thread** }>		; type %8
+	type <{ %struct.prison*, %struct.prison** }>		; type %9
+	type <{ %struct.prison* }>		; type %10
+	type <{ %struct.task* }>		; type %11
+	type <{ %struct.osd*, %struct.osd** }>		; type %12
+	type <{ %struct.proc* }>		; type %13
+	type <{ %struct.ksiginfo*, %struct.ksiginfo** }>		; type %14
+	type <{ %struct.pv_chunk*, %struct.pv_chunk** }>		; type %15
+	type <{ %struct.pgrp*, %struct.pgrp** }>		; type %16
+	type <{ %struct.knote*, %struct.knote** }>		; type %17
+	type <{ %struct.ktr_request*, %struct.ktr_request** }>		; type %18
+	type <{ %struct.mqueue_notifier* }>		; type %19
+	type <{ %struct.turnstile* }>		; type %20
+	type <{ %struct.namecache* }>		; type %21
+	type <{ %struct.namecache*, %struct.namecache** }>		; type %22
+	type <{ %struct.lockf*, %struct.lockf** }>		; type %23
+	type <{ %struct.lockf_entry*, %struct.lockf_entry** }>		; type %24
+	type <{ %struct.lockf_edge*, %struct.lockf_edge** }>		; type %25
+	%struct.__siginfo = type <{ i32, i32, i32, i32, i32, i32, i8*, %union.sigval, %0 }>
+	%struct.__sigset = type <{ [4 x i32] }>
+	%struct.acl = type <{ i32, i32, [4 x i32], [254 x %struct.acl_entry] }>
+	%struct.acl_entry = type <{ i32, i32, i32, i16, i16 }>
+	%struct.au_mask = type <{ i32, i32 }>
+	%struct.au_tid_addr = type <{ i32, i32, [4 x i32] }>
+	%struct.auditinfo_addr = type <{ i32, %struct.au_mask, %struct.au_tid_addr, i32, i64 }>
+	%struct.bintime = type <{ i64, i64 }>
+	%struct.buf = type <{ %struct.bufobj*, i64, i8*, i8*, i32, i8, i8, i8, i8, i64, i64, void (%struct.buf*)*, i64, i64, %struct.buflists, %struct.buf*, %struct.buf*, i32, i8, i8, i8, i8, %struct.buflists, i16, i8, i8, i32, i8, i8, i8, i8, i8, i8, i8, i8, %struct.lock, i64, i64, i8*, i32, i8, i8, i8, i8, i64, %struct.vnode*, i32, i32, %struct.ucred*, %struct.ucred*, i8*, %union.pager_info, i8, i8, i8, i8, %union.anon, [32 x %struct.vm_page*], i32, i8, i8, i8, i8, %struct.workhead, i8*, i8*, i8*, i32, i8, i8, i8, i8 }>
+	%struct.buf_ops = type <{ i8*, i32 (%struct.buf*)*, void (%struct.bufobj*, %struct.buf*)*, i32 (%struct.bufobj*, i32)*, void (%struct.bufobj*, %struct.buf*)* }>
+	%struct.buflists = type <{ %struct.buf*, %struct.buf** }>
+	%struct.bufobj = type <{ %struct.mtx, %struct.bufv, %struct.bufv, i64, i32, i8, i8, i8, i8, %struct.buf_ops*, i32, i8, i8, i8, i8, %struct.vm_object*, %6, i8*, %struct.vnode* }>
+	%struct.bufv = type <{ %struct.buflists, %struct.buf*, i32, i8, i8, i8, i8 }>
+	%struct.callout = type <{ %union.anon, i32, i8, i8, i8, i8, i8*, void (i8*)*, %struct.lock_object*, i32, i32 }>
+	%struct.cdev_privdata = type opaque
+	%struct.cluster_save = type <{ i64, i64, i8*, i32, i8, i8, i8, i8, %struct.buf** }>
+	%struct.componentname = type <{ i64, i64, %struct.thread*, %struct.ucred*, i32, i8, i8, i8, i8, i8*, i8*, i64, i64 }>
+	%struct.cpuset = type opaque
+	%struct.cv = type <{ i8*, i32, i8, i8, i8, i8 }>
+	%struct.fid = type <{ i16, i16, [16 x i8] }>
+	%struct.file = type <{ i8*, %struct.fileops*, %struct.ucred*, %struct.vnode*, i16, i16, i32, i32, i32, i64, %struct.cdev_privdata*, i64, i8* }>
+	%struct.filedesc = type opaque
+	%struct.filedesc_to_leader = type opaque
+	%struct.fileops = type <{ i32 (%struct.file*, %struct.uio*, %struct.ucred*, i32, %struct.thread*)*, i32 (%struct.file*, %struct.uio*, %struct.ucred*, i32, %struct.thread*)*, i32 (%struct.file*, i64, %struct.ucred*, %struct.thread*)*, i32 (%struct.file*, i64, i8*, %struct.ucred*, %struct.thread*)*, i32 (%struct.file*, i32, %struct.ucred*, %struct.thread*)*, i32 (%struct.file*, %struct.knote*)*, i32 (%struct.file*, %struct.stat*, %struct.ucred*, %struct.thread*)*, i32 (%struct.file*, %struct.thread*)*, i32, i8, i8, i8, i8 }>
+	%struct.filterops = type <{ i32, i8, i8, i8, i8, i32 (%struct.knote*)*, void (%struct.knote*)*, i32 (%struct.knote*, i64)* }>
+	%struct.flock = type <{ i64, i64, i32, i16, i16, i32, i8, i8, i8, i8 }>
+	%struct.freelst = type <{ %struct.vnode*, %struct.vnode** }>
+	%struct.fsid = type <{ [2 x i32] }>
+	%struct.in6_addr = type opaque
+	%struct.in_addr = type opaque
+	%struct.inode = type opaque
+	%struct.iovec = type <{ i8*, i64 }>
+	%struct.itimers = type opaque
+	%struct.itimerval = type <{ %struct.bintime, %struct.bintime }>
+	%struct.kaioinfo = type opaque
+	%struct.kaudit_record = type opaque
+	%struct.kdtrace_proc = type opaque
+	%struct.kdtrace_thread = type opaque
+	%struct.kevent = type <{ i64, i16, i16, i32, i64, i8* }>
+	%struct.klist = type <{ %struct.knote* }>
+	%struct.knlist = type <{ %struct.klist, void (i8*)*, void (i8*)*, void (i8*)*, void (i8*)*, i8* }>
+	%struct.knote = type <{ %struct.klist, %struct.klist, %struct.knlist*, %17, %struct.kqueue*, %struct.kevent, i32, i32, i64, %union.sigval, %struct.filterops*, i8* }>
+	%struct.kqueue = type opaque
+	%struct.ksiginfo = type <{ %14, %struct.__siginfo, i32, i8, i8, i8, i8, %struct.sigqueue* }>
+	%struct.ktr_request = type opaque
+	%struct.label = type opaque
+	%struct.lock = type <{ %struct.lock_object, i64, i32, i32 }>
+	%struct.lock_list_entry = type opaque
+	%struct.lock_object = type <{ i8*, i32, i32, %struct.witness* }>
+	%struct.lock_owner = type opaque
+	%struct.lock_profile_object = type opaque
+	%struct.lockf = type <{ %23, %struct.mtx, %struct.lockf_entry_list, %struct.lockf_entry_list, i32, i8, i8, i8, i8 }>
+	%struct.lockf_edge = type <{ %25, %25, %struct.lockf_entry*, %struct.lockf_entry* }>
+	%struct.lockf_edge_list = type <{ %struct.lockf_edge* }>
+	%struct.lockf_entry = type <{ i16, i16, i8, i8, i8, i8, i64, i64, %struct.lock_owner*, %struct.vnode*, %struct.inode*, %struct.task*, %24, %struct.lockf_edge_list, %struct.lockf_edge_list, i32, i8, i8, i8, i8 }>
+	%struct.lockf_entry_list = type <{ %struct.lockf_entry* }>
+	%struct.lpohead = type <{ %struct.lock_profile_object* }>
+	%struct.md_page = type <{ %4 }>
+	%struct.mdproc = type <{ %struct.cv*, %struct.system_segment_descriptor }>
+	%struct.mdthread = type <{ i32, i8, i8, i8, i8, i64 }>
+	%struct.mntarg = type opaque
+	%struct.mntlist = type <{ %struct.mount*, %struct.mount** }>
+	%struct.mount = type <{ %struct.mtx, i32, i8, i8, i8, i8, %struct.mntlist, %struct.vfsops*, %struct.vfsconf*, %struct.vnode*, %struct.vnode*, i32, i8, i8, i8, i8, %struct.freelst, i32, i32, i32, i32, i32, i32, %struct.vfsoptlist*, %struct.vfsoptlist*, i32, i8, i8, i8, i8, %struct.statfs, %struct.ucred*, i8*, i64, i32, i8, i8, i8, i8, %struct.netexport*, %struct.label*, i32, i32, i32, i32, %struct.thread*, i8*, %struct.lock }>
+	%struct.mqueue_notifier = type opaque
+	%struct.mtx = type <{ %struct.lock_object, i64 }>
+	%struct.namecache = type opaque
+	%struct.netexport = type opaque
+	%struct.nlminfo = type opaque
+	%struct.osd = type <{ i32, i8, i8, i8, i8, i8**, %12 }>
+	%struct.p_sched = type opaque
+	%struct.pargs = type <{ i32, i32, [1 x i8], i8, i8, i8 }>
+	%struct.pcb = type opaque
+	%struct.pgrp = type <{ %16, %13, %struct.session*, %struct.sigiolst, i32, i32, %struct.mtx }>
+	%struct.plimit = type opaque
+	%struct.pmap = type <{ %struct.mtx, i64*, %15, i32, i8, i8, i8, i8, %struct.bintime, %struct.vm_page* }>
+	%struct.prison = type <{ %9, i32, i32, i32, i32, %10, %9, %struct.prison*, %struct.mtx, %struct.task, %struct.osd, %struct.cpuset*, %struct.vnet*, %struct.vnode*, i32, i32, %struct.in_addr*, %struct.in6_addr*, [4 x i8*], i32, i32, i32, i32, i32, [5 x i32], i64, [256 x i8], [1024 x i8], [256 x i8], [256 x i8], [64 x i8] }>
+	%struct.proc = type <{ %7, %8, %struct.mtx, %struct.ucred*, %struct.filedesc*, %struct.filedesc_to_leader*, %struct.pstats*, %struct.plimit*, %struct.callout, %struct.sigacts*, i32, i32, i32, i8, i8, i8, i8, %7, %7, %struct.proc*, %7, %13, %struct.mtx, %struct.ksiginfo*, %struct.sigqueue, i32, i8, i8, i8, i8, %struct.vmspace*, i32, i8, i8, i8, i8, %struct.itimerval, %struct.rusage, %struct.rusage_ext, %struct.rusage_ext, i32, i32, i32, i8, i8, i8, i8, %struct.vnode*, %struct.ucred*, %struct.vnode*, i32, i8, i8, i8, i8, %struct.sigiolst, i32, i32, i64, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, %struct.nlminfo*, %struct.kaioinfo*, %struct.thread*, i32, i8, i8, i8, i8, %struct.thread*, i32, i32, %struct.itimers*, i32, i32, [20 x i8], i8, i8, i8, i8, %struct.pgrp*, %struct.sysentvec*, %struct.pargs*, i64, i8, i8, i8, i8, i32, i16, i8, i8, i8, i8, i8, i8, %struct.knlist, i32, i8, i8, i8, i8, %struct.mdproc, %struct.callout, i16, i8, i8, i8, i8, i8, i8, %struct.proc*, %struct.proc*, i8*, %struct.label*, %struct.p_sched*, %18, %19, %struct.kdtrace_proc*, %struct.cv }>
+	%struct.pstats = type opaque
+	%struct.pv_chunk = type <{ %struct.pmap*, %15, [3 x i64], [2 x i64], [168 x %struct.pv_entry] }>
+	%struct.pv_entry = type <{ i64, %4 }>
+	%struct.rusage = type <{ %struct.bintime, %struct.bintime, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 }>
+	%struct.rusage_ext = type <{ i64, i64, i64, i64, i64, i64, i64 }>
+	%struct.selfd = type opaque
+	%struct.selfdlist = type <{ %struct.selfd*, %struct.selfd** }>
+	%struct.selinfo = type <{ %struct.selfdlist, %struct.knlist, %struct.mtx* }>
+	%struct.seltd = type opaque
+	%struct.session = type <{ i32, i8, i8, i8, i8, %struct.proc*, %struct.vnode*, %struct.tty*, i32, [24 x i8], i8, i8, i8, i8, %struct.mtx }>
+	%struct.shmmap_state = type opaque
+	%struct.sigacts = type <{ [128 x void (i32)*], [128 x %struct.__sigset], %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, %struct.__sigset, i32, i32, %struct.mtx }>
+	%struct.sigaltstack = type <{ i8*, i64, i32, i8, i8, i8, i8 }>
+	%struct.sigio = type <{ %union.sigval, %struct.sigiolst, %struct.sigio**, %struct.ucred*, i32, i8, i8, i8, i8 }>
+	%struct.sigiolst = type <{ %struct.sigio* }>
+	%struct.sigqueue = type <{ %struct.__sigset, %struct.__sigset, %14, %struct.proc*, i32, i8, i8, i8, i8 }>
+	%struct.sleepqueue = type opaque
+	%struct.sockaddr = type opaque
+	%struct.stat = type <{ i32, i32, i16, i16, i32, i32, i32, %struct.bintime, %struct.bintime, %struct.bintime, i64, i64, i32, i32, i32, i32, %struct.bintime }>
+	%struct.statfs = type <{ i32, i32, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, [10 x i64], i32, i32, %struct.fsid, [80 x i8], [16 x i8], [88 x i8], [88 x i8] }>
+	%struct.sysctl_req = type <{ %struct.thread*, i32, i8, i8, i8, i8, i8*, i64, i64, i32 (%struct.sysctl_req*, i8*, i64)*, i8*, i64, i64, i32 (%struct.sysctl_req*, i8*, i64)*, i64, i32, i8, i8, i8, i8 }>
+	%struct.sysentvec = type opaque
+	%struct.system_segment_descriptor = type <{ i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%struct.task = type <{ %11, i16, i16, i8, i8, i8, i8, void (i8*, i32)*, i8* }>
+	%struct.td_sched = type opaque
+	%struct.thread = type <{ %struct.mtx*, %struct.proc*, %8, %8, %8, %8, %struct.cpuset*, %struct.seltd*, %struct.sleepqueue*, %struct.turnstile*, %struct.umtx_q*, i32, i8, i8, i8, i8, %struct.sigqueue, i32, i32, i32, i32, i32, i8, i8, i8, i8, i8*, i8*, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i8, i8, i8, %struct.turnstile*, i8*, %20, %struct.lock_list_entry*, i32, i32, %struct.ucred*, i32, i32, %struct.rusage, i64, i64, i32, i32, i32, i32, i32, %struct.__sigset, %struct.__sigset, i32, %struct.sigaltstack, i32, i8, i8, i8, i8, i64, i32, [20 x i8], %struct.file*, i32, i32, %struct.osd, i8, i8, i8, i8, i8, i8, i8, i8, %struct.pcb*, i32, i8, i8, i8, i8, [2 x i64], %struct.callout, %struct.trapframe*, %struct.vm_object*, i64, i32, i8, i8, i8, i8, %struct.vm_object*, i64, i32, i32, %struct.mdthread, %struct.td_sched*, %struct.kaudit_record*, i32, i8, i8, i8, i8, [2 x %struct.lpohead], %struct.kdtrace_thread*, i32, i8, i8, i8, i8, %struct.vnet*, i8* }>
+	%struct.trapframe = type <{ i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i16, i16, i64, i32, i16, i16, i64, i64, i64, i64, i64, i64 }>
+	%struct.tty = type opaque
+	%struct.turnstile = type opaque
+	%struct.ucred = type <{ i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8, %struct.uidinfo*, %struct.uidinfo*, %struct.prison*, %struct.vimage*, i32, i8, i8, i8, i8, [2 x i8*], %struct.label*, %struct.auditinfo_addr, i32*, i32, i8, i8, i8, i8 }>
+	%struct.uidinfo = type opaque
+	%struct.uio = type <{ %struct.iovec*, i32, i8, i8, i8, i8, i64, i64, i32, i32, %struct.thread* }>
+	%struct.umtx_q = type opaque
+	%struct.vattr = type <{ i32, i16, i16, i32, i32, i32, i8, i8, i8, i8, i64, i64, i64, %struct.bintime, %struct.bintime, %struct.bintime, %struct.bintime, i64, i64, i32, i8, i8, i8, i8, i64, i64, i32, i8, i8, i8, i8, i64 }>
+	%struct.vfsconf = type <{ i32, [16 x i8], i8, i8, i8, i8, %struct.vfsops*, i32, i32, i32, i8, i8, i8, i8, %struct.vfsoptdecl*, %struct.vfsconfhead }>
+	%struct.vfsconfhead = type <{ %struct.vfsconf*, %struct.vfsconf** }>
+	%struct.vfsops = type <{ i32 (%struct.mount*)*, i32 (%struct.mntarg*, i8*, i32)*, i32 (%struct.mount*, i32)*, i32 (%struct.mount*, i32, %struct.vnode**)*, i32 (%struct.mount*, i32, i32, i8*)*, i32 (%struct.mount*, %struct.statfs*)*, i32 (%struct.mount*, i32)*, i32 (%struct.mount*, i32, i32, %struct.vnode**)*, i32 (%struct.mount*, %struct.fid*, %struct.vnode**)*, i32 (%struct.mount*, %struct.sockaddr*, i32*, %struct.ucred**, i32*, i32**)*, i32 (%struct.vfsconf*)*, i32 (%struct.vfsconf*)*, i32 (%struct.mount*, i32, %struct.vnode*, i32, i8*)*, i32 (%struct.mount*, i32, %struct.sysctl_req*)*, void (%struct.mount*)* }>
+	%struct.vfsopt = type <{ %struct.vfsoptlist, i8*, i8*, i32, i32, i32, i8, i8, i8, i8 }>
+	%struct.vfsoptdecl = type opaque
+	%struct.vfsoptlist = type <{ %struct.vfsopt*, %struct.vfsopt** }>
+	%struct.vimage = type opaque
+	%struct.vm_map = type <{ %struct.vm_map_entry, %struct.mtx, %struct.mtx, i32, i8, i8, i8, i8, i64, i32, i8, i8, i8, i8, %struct.vm_map_entry*, %struct.pmap*, %struct.vm_map_entry* }>
+	%struct.vm_map_entry = type <{ %struct.vm_map_entry*, %struct.vm_map_entry*, %struct.vm_map_entry*, %struct.vm_map_entry*, i64, i64, i64, i64, i64, %union.sigval, i64, i32, i8, i8, i8, i8, i32, i8, i8, i8, i8, i64, %struct.uidinfo* }>
+	%struct.vm_object = type <{ %struct.mtx, %1, %2, %1, %3, %struct.vm_page*, i64, i32, i32, i32, i8, i8, i16, i16, i16, i32, %struct.vm_object*, i64, %1, %5, %struct.vm_page*, i8*, %union.anon, %struct.uidinfo*, i64 }>
+	%struct.vm_page = type <{ %3, %3, %struct.vm_page*, %struct.vm_page*, %struct.vm_object*, i64, i64, %struct.md_page, i8, i8, i16, i8, i8, i16, i32, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%struct.vm_reserv = type opaque
+	%struct.vmspace = type <{ %struct.vm_map, %struct.shmmap_state*, i64, i64, i64, i64, i8*, i8*, i8*, i32, i8, i8, i8, i8, %struct.pmap }>
+	%struct.vnet = type opaque
+	%struct.vnode = type <{ i32, i8, i8, i8, i8, i8*, %struct.vop_vector*, i8*, %struct.mount*, %struct.freelst, %union.sigval, %struct.freelst, i32, i8, i8, i8, i8, %21, %22, %struct.namecache*, i64, i64, i64, i32, i8, i8, i8, i8, %struct.lock, %struct.mtx, %struct.lock*, i32, i32, i64, i64, i32, i8, i8, i8, i8, %struct.freelst, %struct.bufobj, %struct.vpollinfo*, %struct.label*, %struct.lockf* }>
+	%struct.vnodeop_desc = type <{ i8*, i32, i8, i8, i8, i8, i32 (%struct.vop_generic_args*)*, i32*, i32, i32, i32, i32 }>
+	%struct.vop_access_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, %struct.ucred*, %struct.thread* }>
+	%struct.vop_aclcheck_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, %struct.acl*, %struct.ucred*, %struct.thread* }>
+	%struct.vop_advlock_args = type <{ %struct.vop_generic_args, %struct.vnode*, i8*, i32, i8, i8, i8, i8, %struct.flock*, i32, i8, i8, i8, i8 }>
+	%struct.vop_advlockasync_args = type <{ %struct.vop_generic_args, %struct.vnode*, i8*, i32, i8, i8, i8, i8, %struct.flock*, i32, i8, i8, i8, i8, %struct.task*, i8** }>
+	%struct.vop_bmap_args = type <{ %struct.vop_generic_args, %struct.vnode*, i64, %struct.bufobj**, i64*, i32*, i32* }>
+	%struct.vop_cachedlookup_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode**, %struct.componentname* }>
+	%struct.vop_create_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode**, %struct.componentname*, %struct.vattr* }>
+	%struct.vop_deleteextattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, i8*, %struct.ucred*, %struct.thread* }>
+	%struct.vop_fsync_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, %struct.thread* }>
+	%struct.vop_generic_args = type <{ %struct.vnodeop_desc* }>
+	%struct.vop_getattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vattr*, %struct.ucred* }>
+	%struct.vop_getextattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, i8*, %struct.uio*, i64*, %struct.ucred*, %struct.thread* }>
+	%struct.vop_getpages_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vm_page**, i32, i32, i64 }>
+	%struct.vop_getwritemount_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.mount** }>
+	%struct.vop_inactive_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.thread* }>
+	%struct.vop_ioctl_args = type <{ %struct.vop_generic_args, %struct.vnode*, i64, i8*, i32, i8, i8, i8, i8, %struct.ucred*, %struct.thread* }>
+	%struct.vop_islocked_args = type <{ %struct.vop_generic_args, %struct.vnode* }>
+	%struct.vop_kqfilter_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.knote* }>
+	%struct.vop_link_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode*, %struct.componentname* }>
+	%struct.vop_listextattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, %struct.uio*, i64*, %struct.ucred*, %struct.thread* }>
+	%struct.vop_lock1_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, i8*, i32, i8, i8, i8, i8 }>
+	%struct.vop_open_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, %struct.ucred*, %struct.thread*, %struct.file* }>
+	%struct.vop_openextattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.ucred*, %struct.thread* }>
+	%struct.vop_pathconf_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, i64* }>
+	%struct.vop_putpages_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vm_page**, i32, i32, i32*, i64 }>
+	%struct.vop_read_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.uio*, i32, i8, i8, i8, i8, %struct.ucred* }>
+	%struct.vop_readdir_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.uio*, %struct.ucred*, i32*, i32*, i64** }>
+	%struct.vop_readlink_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.uio*, %struct.ucred* }>
+	%struct.vop_reallocblks_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.cluster_save* }>
+	%struct.vop_rename_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode*, %struct.componentname*, %struct.vnode*, %struct.vnode*, %struct.componentname* }>
+	%struct.vop_revoke_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8 }>
+	%struct.vop_setextattr_args = type <{ %struct.vop_generic_args, %struct.vnode*, i32, i8, i8, i8, i8, i8*, %struct.uio*, %struct.ucred*, %struct.thread* }>
+	%struct.vop_setlabel_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.label*, %struct.ucred*, %struct.thread* }>
+	%struct.vop_strategy_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.buf* }>
+	%struct.vop_symlink_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode**, %struct.componentname*, %struct.vattr*, i8* }>
+	%struct.vop_vector = type <{ %struct.vop_vector*, i32 (%struct.vop_generic_args*)*, i32 (%struct.vop_islocked_args*)*, i32 (%struct.vop_cachedlookup_args*)*, i32 (%struct.vop_cachedlookup_args*)*, i32 (%struct.vop_create_args*)*, i32 (%struct.vop_whiteout_args*)*, i32 (%struct.vop_create_args*)*, i32 (%struct.vop_open_args*)*, i32 (%struct.vop_access_args*)*, i32 (%struct.vop_access_args*)*, i32 (%struct.vop_access_args*)*, i32 (%struct.vop_getattr_args*)*, i32 (%struct.vop_getattr_args*)*, i32 (%struct.vop_islocked_args*)*, i32 (%struct.vop_read_args*)*, i32 (%struct.vop_read_args*)*, i32 (%struct.vop_ioctl_args*)*, i32 (%struct.vop_access_args*)*, i32 (%struct.vop_kqfilter_args*)*, i32 (%struct.vop_revoke_args*)*, i32 (%struct.vop_fsync_args*)*, i32 (%struct.vop_link_args*)*, i32 (%struct.vop_link_args*)*, i32 (%struct.vop_rename_args*)*, i32 (%struct.vop_create_args*)*, i32 (%struct.vop_link_args*)*, i32 (%struct.vop_symlink_args*)*, i32 (%struct.vop_readdir_args*)*, i32 (%struct.vop_readlink_args*)*, i32 (%struct.vop_inactive_args*)*, i32 (%struct.vop_inactive_args*)*, i32 (%struct.vop_lock1_args*)*, i32 (%struct.vop_revoke_args*)*, i32 (%struct.vop_bmap_args*)*, i32 (%struct.vop_strategy_args*)*, i32 (%struct.vop_getwritemount_args*)*, i32 (%struct.vop_islocked_args*)*, i32 (%struct.vop_pathconf_args*)*, i32 (%struct.vop_advlock_args*)*, i32 (%struct.vop_advlockasync_args*)*, i32 (%struct.vop_reallocblks_args*)*, i32 (%struct.vop_getpages_args*)*, i32 (%struct.vop_putpages_args*)*, i32 (%struct.vop_aclcheck_args*)*, i32 (%struct.vop_aclcheck_args*)*, i32 (%struct.vop_aclcheck_args*)*, i32 (%struct.vop_access_args*)*, i32 (%struct.vop_getextattr_args*)*, i32 (%struct.vop_listextattr_args*)*, i32 (%struct.vop_openextattr_args*)*, i32 (%struct.vop_deleteextattr_args*)*, i32 (%struct.vop_setextattr_args*)*, i32 (%struct.vop_setlabel_args*)*, i32 (%struct.vop_vptofh_args*)*, i32 (%struct.vop_vptocnp_args*)* }>
+	%struct.vop_vptocnp_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.vnode**, %struct.ucred*, i8*, i32* }>
+	%struct.vop_vptofh_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.fid* }>
+	%struct.vop_whiteout_args = type <{ %struct.vop_generic_args, %struct.vnode*, %struct.componentname*, i32, i8, i8, i8, i8 }>
+	%struct.vpollinfo = type <{ %struct.mtx, %struct.selinfo, i16, i16, i8, i8, i8, i8 }>
+	%struct.witness = type opaque
+	%struct.workhead = type <{ %struct.worklist* }>
+	%struct.worklist = type opaque
+	%union.anon = type <{ [16 x i8] }>
+	%union.pager_info = type <{ [4 x i8] }>
+	%union.sigval = type <{ [8 x i8] }>
+
+define i32 @vlrureclaim(%struct.mount* %mp) nounwind {
+entry:
+	br i1 undef, label %if.then11, label %do.end
+
+if.then11:		; preds = %entry
+	br label %do.end
+
+do.end:		; preds = %if.then11, %entry
+	br label %while.cond.outer
+
+while.cond.outer:		; preds = %while.cond.outer.backedge, %do.end
+	%count.0.ph = phi i32 [ undef, %do.end ], [ undef, %while.cond.outer.backedge ]		; <i32> [#uses=1]
+	br label %while.cond
+
+while.cond:		; preds = %next_iter, %while.cond.outer
+	%count.0 = phi i32 [ %dec, %next_iter ], [ %count.0.ph, %while.cond.outer ]		; <i32> [#uses=2]
+	%cmp21 = icmp eq i32 %count.0, 0		; <i1> [#uses=1]
+	br i1 %cmp21, label %do.body288.loopexit4, label %while.body
+
+while.body:		; preds = %while.cond
+	br label %while.cond27
+
+while.cond27:		; preds = %while.body36, %while.body
+	br i1 undef, label %do.body288.loopexit, label %land.rhs
+
+land.rhs:		; preds = %while.cond27
+	br i1 undef, label %while.body36, label %while.end
+
+while.body36:		; preds = %land.rhs
+	br label %while.cond27
+
+while.end:		; preds = %land.rhs
+	br i1 undef, label %do.body288.loopexit4, label %do.body46
+
+do.body46:		; preds = %while.end
+	br i1 undef, label %if.else64, label %if.then53
+
+if.then53:		; preds = %do.body46
+	br label %if.end72
+
+if.else64:		; preds = %do.body46
+	br label %if.end72
+
+if.end72:		; preds = %if.else64, %if.then53
+	%dec = add i32 %count.0, -1		; <i32> [#uses=2]
+	br i1 undef, label %next_iter, label %if.end111
+
+if.end111:		; preds = %if.end72
+	br i1 undef, label %lor.lhs.false, label %do.body145
+
+lor.lhs.false:		; preds = %if.end111
+	br i1 undef, label %lor.lhs.false122, label %do.body145
+
+lor.lhs.false122:		; preds = %lor.lhs.false
+	br i1 undef, label %lor.lhs.false128, label %do.body145
+
+lor.lhs.false128:		; preds = %lor.lhs.false122
+	br i1 undef, label %do.body162, label %land.lhs.true
+
+land.lhs.true:		; preds = %lor.lhs.false128
+	br i1 undef, label %do.body145, label %do.body162
+
+do.body145:		; preds = %land.lhs.true, %lor.lhs.false122, %lor.lhs.false, %if.end111
+	br i1 undef, label %if.then156, label %next_iter
+
+if.then156:		; preds = %do.body145
+	br label %next_iter
+
+do.body162:		; preds = %land.lhs.true, %lor.lhs.false128
+	br i1 undef, label %if.then173, label %do.end177
+
+if.then173:		; preds = %do.body162
+	br label %do.end177
+
+do.end177:		; preds = %if.then173, %do.body162
+	br i1 undef, label %do.body185, label %if.then182
+
+if.then182:		; preds = %do.end177
+	br label %next_iter_mntunlocked
+
+do.body185:		; preds = %do.end177
+	br i1 undef, label %if.then196, label %do.end202
+
+if.then196:		; preds = %do.body185
+	br label %do.end202
+
+do.end202:		; preds = %if.then196, %do.body185
+	br i1 undef, label %lor.lhs.false207, label %if.then231
+
+lor.lhs.false207:		; preds = %do.end202
+	br i1 undef, label %lor.lhs.false214, label %if.then231
+
+lor.lhs.false214:		; preds = %lor.lhs.false207
+	br i1 undef, label %do.end236, label %land.lhs.true221
+
+land.lhs.true221:		; preds = %lor.lhs.false214
+	br i1 undef, label %if.then231, label %do.end236
+
+if.then231:		; preds = %land.lhs.true221, %lor.lhs.false207, %do.end202
+	br label %next_iter_mntunlocked
+
+do.end236:		; preds = %land.lhs.true221, %lor.lhs.false214
+	br label %next_iter_mntunlocked
+
+next_iter_mntunlocked:		; preds = %do.end236, %if.then231, %if.then182
+	br i1 undef, label %yield, label %do.body269
+
+next_iter:		; preds = %if.then156, %do.body145, %if.end72
+	%rem2482 = and i32 %dec, 255		; <i32> [#uses=1]
+	%cmp249 = icmp eq i32 %rem2482, 0		; <i1> [#uses=1]
+	br i1 %cmp249, label %do.body253, label %while.cond
+
+do.body253:		; preds = %next_iter
+	br i1 undef, label %if.then264, label %yield
+
+if.then264:		; preds = %do.body253
+	br label %yield
+
+yield:		; preds = %if.then264, %do.body253, %next_iter_mntunlocked
+	br label %do.body269
+
+do.body269:		; preds = %yield, %next_iter_mntunlocked
+	br i1 undef, label %if.then280, label %while.cond.outer.backedge
+
+if.then280:		; preds = %do.body269
+	br label %while.cond.outer.backedge
+
+while.cond.outer.backedge:		; preds = %if.then280, %do.body269
+	br label %while.cond.outer
+
+do.body288.loopexit:		; preds = %while.cond27
+	br label %do.body288
+
+do.body288.loopexit4:		; preds = %while.end, %while.cond
+	br label %do.body288
+
+do.body288:		; preds = %do.body288.loopexit4, %do.body288.loopexit
+	br i1 undef, label %if.then299, label %do.end303
+
+if.then299:		; preds = %do.body288
+	br label %do.end303
+
+do.end303:		; preds = %if.then299, %do.body288
+	ret i32 undef
+}
diff --git a/final/test/Analysis/ScalarEvolution/avoid-smax-0.ll b/final/test/Analysis/ScalarEvolution/avoid-smax-0.ll
new file mode 100644
index 00000000000..24275f9fc4a
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/avoid-smax-0.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -scalar-evolution -analyze | grep {Loop %bb3: backedge-taken count is (-1 + %n)}
+
+; We don't want to use a max in the trip count expression in
+; this testcase.
+
+define void @foo(i32 %n, i32* %p, i32* %q) nounwind {
+entry:
+	icmp sgt i32 %n, 0
+	br i1 %0, label %bb, label %return
+
+bb:
+	load i32* %q, align 4
+	icmp eq i32 %1, 0
+	br i1 %2, label %return, label %bb3.preheader
+
+bb3.preheader:
+	br label %bb3
+
+bb3:
+	%i.0 = phi i32 [ %7, %bb3 ], [ 0, %bb3.preheader ]
+	getelementptr i32* %p, i32 %i.0
+	load i32* %3, align 4
+	add i32 %4, 1
+	getelementptr i32* %p, i32 %i.0
+	store i32 %5, i32* %6, align 4
+	add i32 %i.0, 1
+	icmp slt i32 %7, %n
+	br i1 %8, label %bb3, label %return.loopexit
+
+return.loopexit:
+	br label %return
+
+return:
+	ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/final/test/Analysis/ScalarEvolution/avoid-smax-1.ll
new file mode 100644
index 00000000000..89e8b983c0c
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/avoid-smax-1.ll
@@ -0,0 +1,236 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: grep select %t | count 2
+; RUN: grep {icmp ne i32.\* } %t
+
+; Indvars should be able to insert a canonical induction variable
+; for the bb6 loop without using a maximum calculation (icmp, select)
+; because it should be able to prove that the comparison is guarded
+; by an appropriate conditional branch. Unfortunately, indvars is
+; not yet able to find the comparison for the other two loops in
+; this testcase.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+
+define void @foo(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
+entry:
+	%0 = mul i32 %x, %w		; <i32> [#uses=2]
+	%1 = mul i32 %x, %w		; <i32> [#uses=1]
+	%2 = sdiv i32 %1, 4		; <i32> [#uses=1]
+	%.sum2 = add i32 %2, %0		; <i32> [#uses=2]
+	%cond = icmp eq i32 %d, 1		; <i1> [#uses=1]
+	br i1 %cond, label %bb29, label %bb10.preheader
+
+bb10.preheader:		; preds = %entry
+	%3 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb.nph9, label %bb18.loopexit
+
+bb.nph7:		; preds = %bb7.preheader
+	%4 = mul i32 %y.08, %w		; <i32> [#uses=1]
+	%5 = mul i32 %y.08, %s		; <i32> [#uses=1]
+	%6 = add i32 %5, 1		; <i32> [#uses=1]
+	br label %bb6
+
+bb6:		; preds = %bb7, %bb.nph7
+	%x.06 = phi i32 [ %13, %bb7 ], [ 0, %bb.nph7 ]		; <i32> [#uses=3]
+	%7 = add i32 %x.06, %4		; <i32> [#uses=1]
+	%8 = shl i32 %x.06, 1		; <i32> [#uses=1]
+	%9 = add i32 %6, %8		; <i32> [#uses=1]
+	%10 = getelementptr i8* %r, i32 %9		; <i8*> [#uses=1]
+	%11 = load i8* %10, align 1		; <i8> [#uses=1]
+	%12 = getelementptr i8* %j, i32 %7		; <i8*> [#uses=1]
+	store i8 %11, i8* %12, align 1
+	%13 = add i32 %x.06, 1		; <i32> [#uses=2]
+	br label %bb7
+
+bb7:		; preds = %bb6
+	%14 = icmp slt i32 %13, %w		; <i1> [#uses=1]
+	br i1 %14, label %bb6, label %bb7.bb9_crit_edge
+
+bb7.bb9_crit_edge:		; preds = %bb7
+	br label %bb9
+
+bb9:		; preds = %bb7.preheader, %bb7.bb9_crit_edge
+	%15 = add i32 %y.08, 1		; <i32> [#uses=2]
+	br label %bb10
+
+bb10:		; preds = %bb9
+	%16 = icmp slt i32 %15, %x		; <i1> [#uses=1]
+	br i1 %16, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
+
+bb10.bb18.loopexit_crit_edge:		; preds = %bb10
+	br label %bb10.bb18.loopexit_crit_edge.split
+
+bb10.bb18.loopexit_crit_edge.split:		; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
+	br label %bb18.loopexit
+
+bb.nph9:		; preds = %bb10.preheader
+	%17 = icmp sgt i32 %w, 0		; <i1> [#uses=1]
+	br i1 %17, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
+
+bb.nph9.split:		; preds = %bb.nph9
+	br label %bb7.preheader
+
+bb7.preheader:		; preds = %bb.nph9.split, %bb10
+	%y.08 = phi i32 [ %15, %bb10 ], [ 0, %bb.nph9.split ]		; <i32> [#uses=3]
+	br i1 true, label %bb.nph7, label %bb9
+
+bb.nph5:		; preds = %bb18.loopexit
+	%18 = sdiv i32 %w, 2		; <i32> [#uses=1]
+	%19 = icmp slt i32 %w, 2		; <i1> [#uses=1]
+	%20 = sdiv i32 %x, 2		; <i32> [#uses=1]
+	br i1 %19, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
+
+bb.nph5.split:		; preds = %bb.nph5
+	br label %bb13
+
+bb13:		; preds = %bb18, %bb.nph5.split
+	%y.14 = phi i32 [ %42, %bb18 ], [ 0, %bb.nph5.split ]		; <i32> [#uses=4]
+	%21 = mul i32 %18, %y.14		; <i32> [#uses=2]
+	%22 = shl i32 %y.14, 1		; <i32> [#uses=1]
+	%23 = srem i32 %y.14, 2		; <i32> [#uses=1]
+	%24 = add i32 %23, %22		; <i32> [#uses=1]
+	%25 = mul i32 %24, %s		; <i32> [#uses=2]
+	br i1 true, label %bb.nph3, label %bb17
+
+bb.nph3:		; preds = %bb13
+	%26 = add i32 %21, %0		; <i32> [#uses=1]
+	%27 = add i32 %21, %.sum2		; <i32> [#uses=1]
+	%28 = sdiv i32 %w, 2		; <i32> [#uses=1]
+	br label %bb14
+
+bb14:		; preds = %bb15, %bb.nph3
+	%x.12 = phi i32 [ %40, %bb15 ], [ 0, %bb.nph3 ]		; <i32> [#uses=5]
+	%29 = shl i32 %x.12, 2		; <i32> [#uses=1]
+	%30 = add i32 %29, %25		; <i32> [#uses=1]
+	%31 = getelementptr i8* %r, i32 %30		; <i8*> [#uses=1]
+	%32 = load i8* %31, align 1		; <i8> [#uses=1]
+	%.sum = add i32 %26, %x.12		; <i32> [#uses=1]
+	%33 = getelementptr i8* %j, i32 %.sum		; <i8*> [#uses=1]
+	store i8 %32, i8* %33, align 1
+	%34 = shl i32 %x.12, 2		; <i32> [#uses=1]
+	%35 = or i32 %34, 2		; <i32> [#uses=1]
+	%36 = add i32 %35, %25		; <i32> [#uses=1]
+	%37 = getelementptr i8* %r, i32 %36		; <i8*> [#uses=1]
+	%38 = load i8* %37, align 1		; <i8> [#uses=1]
+	%.sum6 = add i32 %27, %x.12		; <i32> [#uses=1]
+	%39 = getelementptr i8* %j, i32 %.sum6		; <i8*> [#uses=1]
+	store i8 %38, i8* %39, align 1
+	%40 = add i32 %x.12, 1		; <i32> [#uses=2]
+	br label %bb15
+
+bb15:		; preds = %bb14
+	%41 = icmp sgt i32 %28, %40		; <i1> [#uses=1]
+	br i1 %41, label %bb14, label %bb15.bb17_crit_edge
+
+bb15.bb17_crit_edge:		; preds = %bb15
+	br label %bb17
+
+bb17:		; preds = %bb15.bb17_crit_edge, %bb13
+	%42 = add i32 %y.14, 1		; <i32> [#uses=2]
+	br label %bb18
+
+bb18.loopexit:		; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
+	%43 = icmp slt i32 %x, 2		; <i1> [#uses=1]
+	br i1 %43, label %bb20, label %bb.nph5
+
+bb18:		; preds = %bb17
+	%44 = icmp sgt i32 %20, %42		; <i1> [#uses=1]
+	br i1 %44, label %bb13, label %bb18.bb20_crit_edge
+
+bb18.bb20_crit_edge:		; preds = %bb18
+	br label %bb18.bb20_crit_edge.split
+
+bb18.bb20_crit_edge.split:		; preds = %bb18.bb20_crit_edge, %bb.nph5
+	br label %bb20
+
+bb20:		; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
+	switch i32 %d, label %return [
+		i32 3, label %bb22
+		i32 1, label %bb29
+	]
+
+bb22:		; preds = %bb20
+	%45 = mul i32 %x, %w		; <i32> [#uses=1]
+	%46 = sdiv i32 %45, 4		; <i32> [#uses=1]
+	%.sum3 = add i32 %46, %.sum2		; <i32> [#uses=2]
+	%47 = add i32 %x, 15		; <i32> [#uses=1]
+	%48 = and i32 %47, -16		; <i32> [#uses=1]
+	%49 = add i32 %w, 15		; <i32> [#uses=1]
+	%50 = and i32 %49, -16		; <i32> [#uses=1]
+	%51 = mul i32 %48, %s		; <i32> [#uses=1]
+	%52 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %52, label %bb.nph, label %bb26
+
+bb.nph:		; preds = %bb22
+	br label %bb23
+
+bb23:		; preds = %bb24, %bb.nph
+	%y.21 = phi i32 [ %57, %bb24 ], [ 0, %bb.nph ]		; <i32> [#uses=3]
+	%53 = mul i32 %y.21, %50		; <i32> [#uses=1]
+	%.sum1 = add i32 %53, %51		; <i32> [#uses=1]
+	%54 = getelementptr i8* %r, i32 %.sum1		; <i8*> [#uses=1]
+	%55 = mul i32 %y.21, %w		; <i32> [#uses=1]
+	%.sum5 = add i32 %55, %.sum3		; <i32> [#uses=1]
+	%56 = getelementptr i8* %j, i32 %.sum5		; <i8*> [#uses=1]
+	tail call void @llvm.memcpy.i32(i8* %56, i8* %54, i32 %w, i32 1)
+	%57 = add i32 %y.21, 1		; <i32> [#uses=2]
+	br label %bb24
+
+bb24:		; preds = %bb23
+	%58 = icmp slt i32 %57, %x		; <i1> [#uses=1]
+	br i1 %58, label %bb23, label %bb24.bb26_crit_edge
+
+bb24.bb26_crit_edge:		; preds = %bb24
+	br label %bb26
+
+bb26:		; preds = %bb24.bb26_crit_edge, %bb22
+	%59 = mul i32 %x, %w		; <i32> [#uses=1]
+	%.sum4 = add i32 %.sum3, %59		; <i32> [#uses=1]
+	%60 = getelementptr i8* %j, i32 %.sum4		; <i8*> [#uses=1]
+	%61 = mul i32 %x, %w		; <i32> [#uses=1]
+	%62 = sdiv i32 %61, 2		; <i32> [#uses=1]
+	tail call void @llvm.memset.i32(i8* %60, i8 -128, i32 %62, i32 1)
+	ret void
+
+bb29:		; preds = %bb20, %entry
+	%63 = add i32 %w, 15		; <i32> [#uses=1]
+	%64 = and i32 %63, -16		; <i32> [#uses=1]
+	%65 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %65, label %bb.nph11, label %bb33
+
+bb.nph11:		; preds = %bb29
+	br label %bb30
+
+bb30:		; preds = %bb31, %bb.nph11
+	%y.310 = phi i32 [ %70, %bb31 ], [ 0, %bb.nph11 ]		; <i32> [#uses=3]
+	%66 = mul i32 %y.310, %64		; <i32> [#uses=1]
+	%67 = getelementptr i8* %r, i32 %66		; <i8*> [#uses=1]
+	%68 = mul i32 %y.310, %w		; <i32> [#uses=1]
+	%69 = getelementptr i8* %j, i32 %68		; <i8*> [#uses=1]
+	tail call void @llvm.memcpy.i32(i8* %69, i8* %67, i32 %w, i32 1)
+	%70 = add i32 %y.310, 1		; <i32> [#uses=2]
+	br label %bb31
+
+bb31:		; preds = %bb30
+	%71 = icmp slt i32 %70, %x		; <i1> [#uses=1]
+	br i1 %71, label %bb30, label %bb31.bb33_crit_edge
+
+bb31.bb33_crit_edge:		; preds = %bb31
+	br label %bb33
+
+bb33:		; preds = %bb31.bb33_crit_edge, %bb29
+	%72 = mul i32 %x, %w		; <i32> [#uses=1]
+	%73 = getelementptr i8* %j, i32 %72		; <i8*> [#uses=1]
+	%74 = mul i32 %x, %w		; <i32> [#uses=1]
+	%75 = sdiv i32 %74, 2		; <i32> [#uses=1]
+	tail call void @llvm.memset.i32(i8* %73, i8 -128, i32 %75, i32 1)
+	ret void
+
+return:		; preds = %bb20
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
diff --git a/final/test/Analysis/ScalarEvolution/dg.exp b/final/test/Analysis/ScalarEvolution/dg.exp
new file mode 100644
index 00000000000..b65a2503ac6
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]] 
diff --git a/final/test/Analysis/ScalarEvolution/div-overflow.ll b/final/test/Analysis/ScalarEvolution/div-overflow.ll
new file mode 100644
index 00000000000..4f6f1e2a300
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/div-overflow.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -scalar-evolution -analyze \
+; RUN:  | grep {\\-->  ((-128 \\* %a) /u -128)}
+
+; Don't let ScalarEvolution fold this div away.
+
+define i8 @foo(i8 %a) {
+        %t0 = shl i8 %a, 7
+        %t1 = lshr i8 %t0, 7
+        ret i8 %t1
+}
diff --git a/final/test/Analysis/ScalarEvolution/do-loop.ll b/final/test/Analysis/ScalarEvolution/do-loop.ll
new file mode 100644
index 00000000000..6e3295a920b
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/do-loop.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -analyze -scalar-evolution | grep smax
+; PR1614
+
+define i32 @f(i32 %x, i32 %y) {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%x_addr.0 = add i32 %indvar, %x		; <i32> [#uses=1]
+	%tmp2 = add i32 %x_addr.0, 1		; <i32> [#uses=2]
+	%tmp5 = icmp slt i32 %tmp2, %y		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp5, label %bb, label %bb7
+
+bb7:		; preds = %bb
+	ret i32 %tmp2
+}
diff --git a/final/test/Analysis/ScalarEvolution/fold.ll b/final/test/Analysis/ScalarEvolution/fold.ll
new file mode 100644
index 00000000000..4e2adf187e8
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/fold.ll
@@ -0,0 +1,62 @@
+; RUN: opt -analyze -scalar-evolution %s -S | FileCheck %s
+
+define i16 @test1(i8 %x) {
+  %A = zext i8 %x to i12
+  %B = sext i12 %A to i16
+; CHECK: zext i8 %x to i16
+  ret i16 %B
+}
+
+define i8 @test2(i8 %x) {
+  %A = zext i8 %x to i16
+  %B = add i16 %A, 1025
+  %C = trunc i16 %B to i8
+; CHECK: (1 + %x)
+  ret i8 %C
+}
+
+define i8 @test3(i8 %x) {
+  %A = zext i8 %x to i16
+  %B = mul i16 %A, 1027
+  %C = trunc i16 %B to i8
+; CHECK: (3 * %x)
+  ret i8 %C
+}
+
+define void @test4(i32 %x, i32 %y) {
+entry:
+  %Y = and i32 %y, 3
+  br label %loop
+loop:
+  %A = phi i32 [0, %entry], [%I, %loop]
+  %rand1 = icmp sgt i32 %A, %Y
+  %Z1 = select i1 %rand1, i32 %A, i32 %Y
+  %rand2 = icmp ugt i32 %A, %Z1
+  %Z2 = select i1 %rand2, i32 %A, i32 %Z1
+; CHECK: %Z2 =
+; CHECK-NEXT: -->  ([[EXPR:.*]]){{ +}}Exits: 20
+  %B = trunc i32 %Z2 to i16
+  %C = sext i16 %B to i30
+; CHECK: %C =
+; CHECK-NEXT: (trunc i32 ([[EXPR]]) to i30)
+  %D = sext i16 %B to i32
+; CHECK: %D =
+; CHECK-NEXT: ([[EXPR]])
+  %E = sext i16 %B to i34
+; CHECK: %E =
+; CHECK-NEXT: (zext i32 ([[EXPR]]) to i34)
+  %F = zext i16 %B to i30
+; CHECK: %F =
+; CHECK-NEXT: (trunc i32 ([[EXPR]]) to i30
+  %G = zext i16 %B to i32
+; CHECK: %G =
+; CHECK-NEXT: ([[EXPR]])
+  %H = zext i16 %B to i34
+; CHECK: %H =
+; CHECK-NEXT: (zext i32 ([[EXPR]]) to i34)
+  %I = add i32 %A, 1
+  %0 = icmp ne i32 %A, 20
+  br i1 %0, label %loop, label %exit
+exit:
+  ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/max-trip-count.ll b/final/test/Analysis/ScalarEvolution/max-trip-count.ll
new file mode 100644
index 00000000000..843fb073087
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/max-trip-count.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; ScalarEvolution should be able to understand the loop and eliminate the casts.
+
+; CHECK: {%d,+,sizeof(i32)}
+
+define void @foo(i32* nocapture %d, i32 %n) nounwind {
+entry:
+	%0 = icmp sgt i32 %n, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%i.02 = phi i32 [ %5, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=2]
+	%p.01 = phi i8 [ %4, %bb1 ], [ -1, %bb.nph ]		; <i8> [#uses=2]
+	%1 = sext i8 %p.01 to i32		; <i32> [#uses=1]
+	%2 = sext i32 %i.02 to i64		; <i64> [#uses=1]
+	%3 = getelementptr i32* %d, i64 %2		; <i32*> [#uses=1]
+	store i32 %1, i32* %3, align 4
+	%4 = add i8 %p.01, 1		; <i8> [#uses=1]
+	%5 = add i32 %i.02, 1		; <i32> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%6 = icmp slt i32 %5, %n		; <i1> [#uses=1]
+	br i1 %6, label %bb, label %bb1.return_crit_edge
+
+bb1.return_crit_edge:		; preds = %bb1
+	br label %return
+
+return:		; preds = %bb1.return_crit_edge, %entry
+	ret void
+}
+
+; ScalarEvolution should be able to find the maximum tripcount
+; of this multiple-exit loop, and if it doesn't know the exact
+; count, it should say so.
+
+; PR7845
+; CHECK: Loop %for.cond: <multiple exits> Unpredictable backedge-taken count. 
+; CHECK: Loop %for.cond: max backedge-taken count is 5
+
+@.str = private constant [4 x i8] c"%d\0A\00"     ; <[4 x i8]*> [#uses=2]
+
+define i32 @main() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %g_4.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ] ; <i32> [#uses=5]
+  %cmp = icmp slt i32 %g_4.0, 5                   ; <i1> [#uses=1]
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %conv = trunc i32 %g_4.0 to i16                 ; <i16> [#uses=1]
+  %tobool.not = icmp eq i16 %conv, 0              ; <i1> [#uses=1]
+  %tobool3 = icmp ne i32 %g_4.0, 0                ; <i1> [#uses=1]
+  %or.cond = and i1 %tobool.not, %tobool3         ; <i1> [#uses=1]
+  br i1 %or.cond, label %for.end, label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %add = add nsw i32 %g_4.0, 1                    ; <i32> [#uses=1]
+  br label %for.cond
+
+for.end:                                          ; preds = %for.body, %for.cond
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %g_4.0) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/final/test/Analysis/ScalarEvolution/nsw-offset.ll b/final/test/Analysis/ScalarEvolution/nsw-offset.ll
new file mode 100644
index 00000000000..4cd9a6de48c
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/nsw-offset.ll
@@ -0,0 +1,77 @@
+; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s
+
+; ScalarEvolution should be able to fold away the sign-extensions
+; on this loop with a primary induction variable incremented with
+; a nsw add of 2.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define void @foo(i32 %no, double* nocapture %d, double* nocapture %q) nounwind {
+entry:
+  %n = and i32 %no, 4294967294
+  %0 = icmp sgt i32 %n, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %bb.nph, label %return
+
+bb.nph:                                           ; preds = %entry
+  br label %bb
+
+bb:                                               ; preds = %bb.nph, %bb1
+  %i.01 = phi i32 [ %16, %bb1 ], [ 0, %bb.nph ]   ; <i32> [#uses=5]
+
+; CHECK: %1 = sext i32 %i.01 to i64
+; CHECK: -->  {0,+,2}<%bb>
+  %1 = sext i32 %i.01 to i64                      ; <i64> [#uses=1]
+
+; CHECK: %2 = getelementptr inbounds double* %d, i64 %1
+; CHECK: -->  {%d,+,16}<%bb>
+  %2 = getelementptr inbounds double* %d, i64 %1  ; <double*> [#uses=1]
+
+  %3 = load double* %2, align 8                   ; <double> [#uses=1]
+  %4 = sext i32 %i.01 to i64                      ; <i64> [#uses=1]
+  %5 = getelementptr inbounds double* %q, i64 %4  ; <double*> [#uses=1]
+  %6 = load double* %5, align 8                   ; <double> [#uses=1]
+  %7 = or i32 %i.01, 1                            ; <i32> [#uses=1]
+
+; CHECK: %8 = sext i32 %7 to i64
+; CHECK: -->  {1,+,2}<%bb>
+  %8 = sext i32 %7 to i64                         ; <i64> [#uses=1]
+
+; CHECK: %9 = getelementptr inbounds double* %q, i64 %8
+; CHECK: {(8 + %q),+,16}<%bb>
+  %9 = getelementptr inbounds double* %q, i64 %8  ; <double*> [#uses=1]
+
+; Artificially repeat the above three instructions, this time using
+; add nsw instead of or.
+  %t7 = add nsw i32 %i.01, 1                            ; <i32> [#uses=1]
+
+; CHECK: %t8 = sext i32 %t7 to i64
+; CHECK: -->  {1,+,2}<%bb>
+  %t8 = sext i32 %t7 to i64                         ; <i64> [#uses=1]
+
+; CHECK: %t9 = getelementptr inbounds double* %q, i64 %t8
+; CHECK: {(8 + %q),+,16}<%bb>
+  %t9 = getelementptr inbounds double* %q, i64 %t8  ; <double*> [#uses=1]
+
+  %10 = load double* %9, align 8                  ; <double> [#uses=1]
+  %11 = fadd double %6, %10                       ; <double> [#uses=1]
+  %12 = fadd double %11, 3.200000e+00             ; <double> [#uses=1]
+  %13 = fmul double %3, %12                       ; <double> [#uses=1]
+  %14 = sext i32 %i.01 to i64                     ; <i64> [#uses=1]
+  %15 = getelementptr inbounds double* %d, i64 %14 ; <double*> [#uses=1]
+  store double %13, double* %15, align 8
+  %16 = add nsw i32 %i.01, 2                      ; <i32> [#uses=2]
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  %17 = icmp slt i32 %16, %n                      ; <i1> [#uses=1]
+  br i1 %17, label %bb, label %bb1.return_crit_edge
+
+bb1.return_crit_edge:                             ; preds = %bb1
+  br label %return
+
+return:                                           ; preds = %bb1.return_crit_edge, %entry
+  ret void
+}
+
+; CHECK: Loop %bb: backedge-taken count is ((-1 + %n) /u 2)
+; CHECK: Loop %bb: max backedge-taken count is 1073741822
diff --git a/final/test/Analysis/ScalarEvolution/nsw.ll b/final/test/Analysis/ScalarEvolution/nsw.ll
new file mode 100644
index 00000000000..9d8e2b62a9d
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/nsw.ll
@@ -0,0 +1,106 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; The addrecs in this loop are analyzable only by using nsw information.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
+
+; CHECK: Classifying expressions for: @test1
+define void @test1(double* %p) nounwind {
+entry:
+	%tmp = load double* %p, align 8		; <double> [#uses=1]
+	%tmp1 = fcmp ogt double %tmp, 2.000000e+00		; <i1> [#uses=1]
+	br i1 %tmp1, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%i.01 = phi i32 [ %tmp8, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=3]
+; CHECK: %i.01
+; CHECK-NEXT: -->  {0,+,1}<nuw><nsw><%bb>
+	%tmp2 = sext i32 %i.01 to i64		; <i64> [#uses=1]
+	%tmp3 = getelementptr double* %p, i64 %tmp2		; <double*> [#uses=1]
+	%tmp4 = load double* %tmp3, align 8		; <double> [#uses=1]
+	%tmp5 = fmul double %tmp4, 9.200000e+00		; <double> [#uses=1]
+	%tmp6 = sext i32 %i.01 to i64		; <i64> [#uses=1]
+	%tmp7 = getelementptr double* %p, i64 %tmp6		; <double*> [#uses=1]
+; CHECK: %tmp7
+; CHECK-NEXT:   -->  {%p,+,8}<%bb>
+	store double %tmp5, double* %tmp7, align 8
+	%tmp8 = add nsw i32 %i.01, 1		; <i32> [#uses=2]
+; CHECK: %tmp8
+; CHECK-NEXT: -->  {1,+,1}<nuw><nsw><%bb>
+	br label %bb1
+
+bb1:		; preds = %bb
+	%phitmp = sext i32 %tmp8 to i64		; <i64> [#uses=1]
+; CHECK: %phitmp
+; CHECK-NEXT: -->  {1,+,1}<%bb>
+	%tmp9 = getelementptr double* %p, i64 %phitmp		; <double*> [#uses=1]
+; CHECK: %tmp9
+; CHECK-NEXT:  -->  {(8 + %p),+,8}<%bb>
+	%tmp10 = load double* %tmp9, align 8		; <double> [#uses=1]
+	%tmp11 = fcmp ogt double %tmp10, 2.000000e+00		; <i1> [#uses=1]
+	br i1 %tmp11, label %bb, label %bb1.return_crit_edge
+
+bb1.return_crit_edge:		; preds = %bb1
+	br label %return
+
+return:		; preds = %bb1.return_crit_edge, %entry
+	ret void
+}
+
+; CHECK: Classifying expressions for: @test2
+define void @test2(i32* %begin, i32* %end) ssp {
+entry:
+  %cmp1.i.i = icmp eq i32* %begin, %end
+  br i1 %cmp1.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.lr.ph.i.i
+
+for.body.lr.ph.i.i:                               ; preds = %entry
+  br label %for.body.i.i
+
+for.body.i.i:                                     ; preds = %for.body.i.i, %for.body.lr.ph.i.i
+  %__first.addr.02.i.i = phi i32* [ %begin, %for.body.lr.ph.i.i ], [ %ptrincdec.i.i, %for.body.i.i ]
+; CHECK: %__first.addr.02.i.i
+; CHECK-NEXT: -->  {%begin,+,4}<nsw><%for.body.i.i>	
+  store i32 0, i32* %__first.addr.02.i.i, align 4
+  %ptrincdec.i.i = getelementptr inbounds i32* %__first.addr.02.i.i, i64 1
+; CHECK: %ptrincdec.i.i
+; CHECK-NEXT: -->  {(4 + %begin),+,4}<nsw><%for.body.i.i>
+  %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end
+  br i1 %cmp.i.i, label %for.cond.for.end_crit_edge.i.i, label %for.body.i.i
+
+for.cond.for.end_crit_edge.i.i:                   ; preds = %for.body.i.i
+  br label %_ZSt4fillIPiiEvT_S1_RKT0_.exit
+
+_ZSt4fillIPiiEvT_S1_RKT0_.exit:                   ; preds = %entry, %for.cond.for.end_crit_edge.i.i
+  ret void
+}
+
+; Various checks for inbounds geps.
+define void @test3(i32* %begin, i32* %end) nounwind ssp {
+entry:
+  %cmp7.i.i = icmp eq i32* %begin, %end
+  br i1 %cmp7.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i
+
+for.body.i.i:                                     ; preds = %entry, %for.body.i.i
+  %indvar.i.i = phi i64 [ %tmp, %for.body.i.i ], [ 0, %entry ]
+; CHECK: %indvar.i.i
+; CHECK: {0,+,1}<nuw><nsw><%for.body.i.i>
+  %tmp = add nsw i64 %indvar.i.i, 1
+; CHECK: %tmp = 
+; CHECK: {1,+,1}<nuw><nsw><%for.body.i.i>
+  %ptrincdec.i.i = getelementptr inbounds i32* %begin, i64 %tmp
+; CHECK: %ptrincdec.i.i =
+; CHECK: {(4 + %begin),+,4}<nsw><%for.body.i.i>
+  %__first.addr.08.i.i = getelementptr inbounds i32* %begin, i64 %indvar.i.i
+; CHECK: %__first.addr.08.i.i
+; CHECK: {%begin,+,4}<nsw><%for.body.i.i>
+  store i32 0, i32* %__first.addr.08.i.i, align 4
+  %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end
+  br i1 %cmp.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i
+; CHECK: Loop %for.body.i.i: Unpredictable backedge-taken count. 
+; CHECK: Loop %for.body.i.i: Unpredictable max backedge-taken count.
+_ZSt4fillIPiiEvT_S1_RKT0_.exit:                   ; preds = %for.body.i.i, %entry
+  ret void
+}
\ No newline at end of file
diff --git a/final/test/Analysis/ScalarEvolution/pointer-sign-bits.ll b/final/test/Analysis/ScalarEvolution/pointer-sign-bits.ll
new file mode 100644
index 00000000000..b2cec2d9fc8
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/pointer-sign-bits.ll
@@ -0,0 +1,220 @@
+; RUN: opt < %s -analyze -scalar-evolution
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+  %JavaObject = type { [0 x i32 (...)*]*, i8* }
+
+define void @JnJVM_antlr_CSharpCodeGenerator_genBitSet__Lantlr_collections_impl_BitSet_2I(%JavaObject*, %JavaObject*, i32) {
+start:
+  br i1 undef, label %"stack overflow", label %"no stack overflow"
+
+"GOTO or IF*2":         ; preds = %"true verifyAndComputePtr89", %verifyNullCont84
+  unreachable
+
+"GOTO or IF*5":         ; preds = %"true verifyAndComputePtr127", %"GOTO or IF*6"
+  unreachable
+
+"GOTO or IF*6":         ; preds = %"true verifyAndComputePtr131.GOTO or IF*6_crit_edge", %"true verifyAndComputePtr89"
+  %indvar = phi i32 [ %indvar.next, %"true verifyAndComputePtr131.GOTO or IF*6_crit_edge" ], [ 0, %"true verifyAndComputePtr89" ]               ; <i32> [#uses=2]
+  %.0.in = add i32 %indvar, 0           ; <i32> [#uses=1]
+  %.0 = add i32 %.0.in, 1               ; <i32> [#uses=1]
+  %3 = icmp slt i32 %.0, %4             ; <i1> [#uses=1]
+  br i1 %3, label %verifyNullCont126, label %"GOTO or IF*5"
+
+end:            ; preds = %"no exception block35"
+  ret void
+
+"stack overflow":               ; preds = %start
+  ret void
+
+"no stack overflow":            ; preds = %start
+  br i1 undef, label %verifyNullCont, label %"no stack overflow.end_crit_edge"
+
+"no stack overflow.end_crit_edge":              ; preds = %"no stack overflow"
+  ret void
+
+verifyNullCont:         ; preds = %"no stack overflow"
+  br i1 undef, label %verifyNullCont9, label %verifyNullCont.end_crit_edge
+
+verifyNullCont.end_crit_edge:           ; preds = %verifyNullCont
+  ret void
+
+verifyNullCont9:                ; preds = %verifyNullCont
+  br i1 undef, label %verifyNullCont12, label %verifyNullCont9.end_crit_edge
+
+verifyNullCont9.end_crit_edge:          ; preds = %verifyNullCont9
+  ret void
+
+verifyNullCont12:               ; preds = %verifyNullCont9
+  br i1 undef, label %"no exception block13", label %verifyNullCont12.end_crit_edge
+
+verifyNullCont12.end_crit_edge:         ; preds = %verifyNullCont12
+  ret void
+
+"no exception block13":         ; preds = %verifyNullCont12
+  br i1 undef, label %verifyNullExit14, label %verifyNullCont15
+
+verifyNullExit14:               ; preds = %"no exception block13"
+  ret void
+
+verifyNullCont15:               ; preds = %"no exception block13"
+  br i1 undef, label %"no exception block16", label %verifyNullCont15.end_crit_edge
+
+verifyNullCont15.end_crit_edge:         ; preds = %verifyNullCont15
+  ret void
+
+"no exception block16":         ; preds = %verifyNullCont15
+  br i1 undef, label %verifyNullExit17, label %verifyNullCont18
+
+verifyNullExit17:               ; preds = %"no exception block16"
+  ret void
+
+verifyNullCont18:               ; preds = %"no exception block16"
+  br i1 undef, label %"no exception block19", label %verifyNullCont18.end_crit_edge
+
+verifyNullCont18.end_crit_edge:         ; preds = %verifyNullCont18
+  ret void
+
+"no exception block19":         ; preds = %verifyNullCont18
+  br i1 undef, label %verifyNullExit20, label %verifyNullCont21
+
+verifyNullExit20:               ; preds = %"no exception block19"
+  ret void
+
+verifyNullCont21:               ; preds = %"no exception block19"
+  br i1 undef, label %verifyNullCont24, label %verifyNullCont21.end_crit_edge
+
+verifyNullCont21.end_crit_edge:         ; preds = %verifyNullCont21
+  ret void
+
+verifyNullCont24:               ; preds = %verifyNullCont21
+  br i1 undef, label %verifyNullCont27, label %verifyNullCont24.end_crit_edge
+
+verifyNullCont24.end_crit_edge:         ; preds = %verifyNullCont24
+  ret void
+
+verifyNullCont27:               ; preds = %verifyNullCont24
+  br i1 undef, label %verifyNullCont32, label %verifyNullCont27.end_crit_edge
+
+verifyNullCont27.end_crit_edge:         ; preds = %verifyNullCont27
+  ret void
+
+verifyNullCont32:               ; preds = %verifyNullCont27
+  br i1 undef, label %verifyNullExit33, label %verifyNullCont34
+
+verifyNullExit33:               ; preds = %verifyNullCont32
+  ret void
+
+verifyNullCont34:               ; preds = %verifyNullCont32
+  br i1 undef, label %"no exception block35", label %verifyNullCont34.end_crit_edge
+
+verifyNullCont34.end_crit_edge:         ; preds = %verifyNullCont34
+  ret void
+
+"no exception block35":         ; preds = %verifyNullCont34
+  br i1 undef, label %end, label %verifyNullCont60
+
+verifyNullCont60:               ; preds = %"no exception block35"
+  br i1 undef, label %verifyNullCont63, label %verifyNullCont60.end_crit_edge
+
+verifyNullCont60.end_crit_edge:         ; preds = %verifyNullCont60
+  ret void
+
+verifyNullCont63:               ; preds = %verifyNullCont60
+  br i1 undef, label %"no exception block64", label %verifyNullCont63.end_crit_edge
+
+verifyNullCont63.end_crit_edge:         ; preds = %verifyNullCont63
+  ret void
+
+"no exception block64":         ; preds = %verifyNullCont63
+  br i1 undef, label %verifyNullExit65, label %verifyNullCont66
+
+verifyNullExit65:               ; preds = %"no exception block64"
+  ret void
+
+verifyNullCont66:               ; preds = %"no exception block64"
+  br i1 undef, label %"no exception block67", label %verifyNullCont66.end_crit_edge
+
+verifyNullCont66.end_crit_edge:         ; preds = %verifyNullCont66
+  ret void
+
+"no exception block67":         ; preds = %verifyNullCont66
+  br i1 undef, label %verifyNullExit68, label %verifyNullCont69
+
+verifyNullExit68:               ; preds = %"no exception block67"
+  ret void
+
+verifyNullCont69:               ; preds = %"no exception block67"
+  br i1 undef, label %"no exception block70", label %verifyNullCont69.end_crit_edge
+
+verifyNullCont69.end_crit_edge:         ; preds = %verifyNullCont69
+  ret void
+
+"no exception block70":         ; preds = %verifyNullCont69
+  br i1 undef, label %verifyNullExit71, label %verifyNullCont72
+
+verifyNullExit71:               ; preds = %"no exception block70"
+  ret void
+
+verifyNullCont72:               ; preds = %"no exception block70"
+  br i1 undef, label %verifyNullCont75, label %verifyNullCont72.end_crit_edge
+
+verifyNullCont72.end_crit_edge:         ; preds = %verifyNullCont72
+  ret void
+
+verifyNullCont75:               ; preds = %verifyNullCont72
+  br i1 undef, label %verifyNullCont78, label %verifyNullCont75.end_crit_edge
+
+verifyNullCont75.end_crit_edge:         ; preds = %verifyNullCont75
+  ret void
+
+verifyNullCont78:               ; preds = %verifyNullCont75
+  br i1 undef, label %"verifyNullCont78.GOTO or IF*4_crit_edge", label %verifyNullCont78.end_crit_edge
+
+"verifyNullCont78.GOTO or IF*4_crit_edge":              ; preds = %verifyNullCont78
+  br i1 undef, label %verifyNullExit80, label %verifyNullCont81
+
+verifyNullCont78.end_crit_edge:         ; preds = %verifyNullCont78
+  ret void
+
+verifyNullExit80:               ; preds = %"verifyNullCont78.GOTO or IF*4_crit_edge"
+  ret void
+
+verifyNullCont81:               ; preds = %"verifyNullCont78.GOTO or IF*4_crit_edge"
+  %4 = ptrtoint i8* undef to i32                ; <i32> [#uses=2]
+  %5 = icmp slt i32 0, %4               ; <i1> [#uses=1]
+  br i1 %5, label %verifyNullCont84, label %verifyNullCont172
+
+verifyNullCont84:               ; preds = %verifyNullCont81
+  br i1 undef, label %"GOTO or IF*2", label %verifyNullCont86
+
+verifyNullCont86:               ; preds = %verifyNullCont84
+  br i1 undef, label %"true verifyAndComputePtr", label %"false verifyAndComputePtr"
+
+"true verifyAndComputePtr":             ; preds = %verifyNullCont86
+  br i1 undef, label %"true verifyAndComputePtr89", label %"false verifyAndComputePtr90"
+
+"false verifyAndComputePtr":            ; preds = %verifyNullCont86
+  ret void
+
+"true verifyAndComputePtr89":           ; preds = %"true verifyAndComputePtr"
+  br i1 undef, label %"GOTO or IF*6", label %"GOTO or IF*2"
+
+"false verifyAndComputePtr90":          ; preds = %"true verifyAndComputePtr"
+  ret void
+
+verifyNullCont126:              ; preds = %"GOTO or IF*6"
+  br i1 undef, label %"true verifyAndComputePtr127", label %"false verifyAndComputePtr128"
+
+"true verifyAndComputePtr127":          ; preds = %verifyNullCont126
+  br i1 undef, label %"true verifyAndComputePtr131.GOTO or IF*6_crit_edge", label %"GOTO or IF*5"
+
+"false verifyAndComputePtr128":         ; preds = %verifyNullCont126
+  ret void
+
+"true verifyAndComputePtr131.GOTO or IF*6_crit_edge":           ; preds = %"true verifyAndComputePtr127"
+  %indvar.next = add i32 %indvar, 1             ; <i32> [#uses=1]
+  br label %"GOTO or IF*6"
+
+verifyNullCont172:              ; preds = %verifyNullCont81
+  unreachable
+}
diff --git a/final/test/Analysis/ScalarEvolution/pr3909.ll b/final/test/Analysis/ScalarEvolution/pr3909.ll
new file mode 100644
index 00000000000..10e328ddf7e
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/pr3909.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -indvars -disable-output
+; PR 3909
+
+
+	type { i32, %1* }		; type %0
+	type { i32, i8* }		; type %1
+
+define x86_stdcallcc i32 @_Dmain(%0 %unnamed) {
+entry:
+	br label %whilebody
+
+whilebody:		; preds = %endwhile5, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %tmp11, %endwhile5 ]		; <i64> [#uses=1]
+	%m.0 = phi i64 [ 0, %entry ], [ %tmp11, %endwhile5 ]		; <i64> [#uses=2]
+	%tmp2 = mul i64 %m.0, %m.0		; <i64> [#uses=1]
+	br label %whilecond3
+
+whilecond3:		; preds = %whilebody4, %whilebody
+	%j.0 = phi i64 [ %tmp2, %whilebody ], [ %tmp9, %whilebody4 ]		; <i64> [#uses=2]
+	%tmp7 = icmp ne i64 %j.0, 0		; <i1> [#uses=1]
+	br i1 %tmp7, label %whilebody4, label %endwhile5
+
+whilebody4:		; preds = %whilecond3
+	%tmp9 = add i64 %j.0, 1		; <i64> [#uses=1]
+	br label %whilecond3
+
+endwhile5:		; preds = %whilecond3
+	%tmp11 = add i64 %i.0, 1		; <i64> [#uses=2]
+	br label %whilebody
+}
diff --git a/final/test/Analysis/ScalarEvolution/scev-aa.ll b/final/test/Analysis/ScalarEvolution/scev-aa.ll
new file mode 100644
index 00000000000..dd5a66ccb44
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/scev-aa.ll
@@ -0,0 +1,215 @@
+; RUN: opt < %s -scev-aa -aa-eval -print-all-alias-modref-info \
+; RUN:   |& FileCheck %s
+
+; At the time of this writing, -basicaa misses the example of the form
+; A[i+(j+1)] != A[i+j], which can arise from multi-dimensional array references,
+; and the example of the form A[0] != A[i+1], where i+1 is known to be positive.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
+
+; p[i] and p[i+1] don't alias.
+
+; CHECK: Function: loop: 3 pointers, 0 call sites
+; CHECK: NoAlias: double* %pi, double* %pi.next
+
+define void @loop(double* nocapture %p, i64 %n) nounwind {
+entry:
+  %j = icmp sgt i64 %n, 0
+  br i1 %j, label %bb, label %return
+
+bb:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %bb ]
+  %pi = getelementptr double* %p, i64 %i
+  %i.next = add i64 %i, 1
+  %pi.next = getelementptr double* %p, i64 %i.next
+  %x = load double* %pi
+  %y = load double* %pi.next
+  %z = fmul double %x, %y
+  store double %z, double* %pi
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}
+
+; Slightly more involved: p[j][i], p[j][i+1], and p[j+1][i] don't alias.
+
+; CHECK: Function: nestedloop: 4 pointers, 0 call sites
+; CHECK: NoAlias: double* %pi.j, double* %pi.next.j
+; CHECK: NoAlias: double* %pi.j, double* %pi.j.next
+; CHECK: NoAlias: double* %pi.j.next, double* %pi.next.j
+
+define void @nestedloop(double* nocapture %p, i64 %m) nounwind {
+entry:
+  %k = icmp sgt i64 %m, 0
+  br i1 %k, label %guard, label %return
+
+guard:
+  %l = icmp sgt i64 91, 0
+  br i1 %l, label %outer.loop, label %return
+
+outer.loop:
+  %j = phi i64 [ 0, %guard ], [ %j.next, %outer.latch ]
+  br label %bb
+
+bb:
+  %i = phi i64 [ 0, %outer.loop ], [ %i.next, %bb ]
+  %i.next = add i64 %i, 1
+
+  %e = add i64 %i, %j
+  %pi.j = getelementptr double* %p, i64 %e
+  %f = add i64 %i.next, %j
+  %pi.next.j = getelementptr double* %p, i64 %f
+  %x = load double* %pi.j
+  %y = load double* %pi.next.j
+  %z = fmul double %x, %y
+  store double %z, double* %pi.j
+
+  %o = add i64 %j, 91
+  %g = add i64 %i, %o
+  %pi.j.next = getelementptr double* %p, i64 %g
+  %a = load double* %pi.j.next
+  %b = fmul double %x, %a
+  store double %b, double* %pi.j.next
+
+  %exitcond = icmp eq i64 %i.next, 91
+  br i1 %exitcond, label %outer.latch, label %bb
+
+outer.latch:
+  %j.next = add i64 %j, 91
+  %h = icmp eq i64 %j.next, %m
+  br i1 %h, label %return, label %outer.loop
+
+return:
+  ret void
+}
+
+; Even more involved: same as nestedloop, but with a variable extent.
+; When n is 1, p[j+1][i] does alias p[j][i+1], and there's no way to
+; prove whether n will be greater than 1, so that relation will always
+; by MayAlias. The loop is guarded by a n > 0 test though, so
+; p[j+1][i] and p[j][i] can theoretically be determined to be NoAlias,
+; however the analysis currently doesn't do that.
+; TODO: Make the analysis smarter and turn that MayAlias into a NoAlias.
+
+; CHECK: Function: nestedloop_more: 4 pointers, 0 call sites
+; CHECK: NoAlias: double* %pi.j, double* %pi.next.j
+; CHECK: MayAlias: double* %pi.j, double* %pi.j.next
+
+define void @nestedloop_more(double* nocapture %p, i64 %n, i64 %m) nounwind {
+entry:
+  %k = icmp sgt i64 %m, 0
+  br i1 %k, label %guard, label %return
+
+guard:
+  %l = icmp sgt i64 %n, 0
+  br i1 %l, label %outer.loop, label %return
+
+outer.loop:
+  %j = phi i64 [ 0, %guard ], [ %j.next, %outer.latch ]
+  br label %bb
+
+bb:
+  %i = phi i64 [ 0, %outer.loop ], [ %i.next, %bb ]
+  %i.next = add i64 %i, 1
+
+  %e = add i64 %i, %j
+  %pi.j = getelementptr double* %p, i64 %e
+  %f = add i64 %i.next, %j
+  %pi.next.j = getelementptr double* %p, i64 %f
+  %x = load double* %pi.j
+  %y = load double* %pi.next.j
+  %z = fmul double %x, %y
+  store double %z, double* %pi.j
+
+  %o = add i64 %j, %n
+  %g = add i64 %i, %o
+  %pi.j.next = getelementptr double* %p, i64 %g
+  %a = load double* %pi.j.next
+  %b = fmul double %x, %a
+  store double %b, double* %pi.j.next
+
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %outer.latch, label %bb
+
+outer.latch:
+  %j.next = add i64 %j, %n
+  %h = icmp eq i64 %j.next, %m
+  br i1 %h, label %return, label %outer.loop
+
+return:
+  ret void
+}
+
+; ScalarEvolution expands field offsets into constants, which allows it to
+; do aggressive analysis. Contrast this with BasicAA, which works by
+; recognizing GEP idioms.
+
+%struct.A = type { %struct.B, i32, i32 }
+%struct.B = type { double }
+
+; CHECK: Function: foo: 7 pointers, 0 call sites
+; CHECK: NoAlias: %struct.B* %B, i32* %Z
+; CHECK: NoAlias: %struct.B* %B, %struct.B* %C
+; CHECK: MustAlias: %struct.B* %C, i32* %Z
+; CHECK: NoAlias: %struct.B* %B, i32* %X
+; CHECK: MustAlias: i32* %X, i32* %Z
+; CHECK: MustAlias: %struct.B* %C, i32* %Y
+; CHECK: MustAlias: i32* %X, i32* %Y
+
+define void @foo() {
+entry:
+  %A = alloca %struct.A
+  %B = getelementptr %struct.A* %A, i32 0, i32 0
+  %Q = bitcast %struct.B* %B to %struct.A*
+  %Z = getelementptr %struct.A* %Q, i32 0, i32 1
+  %C = getelementptr %struct.B* %B, i32 1
+  %X = bitcast %struct.B* %C to i32*
+  %Y = getelementptr %struct.A* %A, i32 0, i32 1
+  ret void
+}
+
+; CHECK: Function: bar: 7 pointers, 0 call sites
+; CHECK: NoAlias: %struct.B* %N, i32* %P
+; CHECK: NoAlias: %struct.B* %N, %struct.B* %R
+; CHECK: MustAlias: %struct.B* %R, i32* %P
+; CHECK: NoAlias: %struct.B* %N, i32* %W
+; CHECK: MustAlias: i32* %P, i32* %W
+; CHECK: MustAlias: %struct.B* %R, i32* %V
+; CHECK: MustAlias: i32* %V, i32* %W
+
+define void @bar() {
+  %M = alloca %struct.A
+  %N = getelementptr %struct.A* %M, i32 0, i32 0
+  %O = bitcast %struct.B* %N to %struct.A*
+  %P = getelementptr %struct.A* %O, i32 0, i32 1
+  %R = getelementptr %struct.B* %N, i32 1
+  %W = bitcast %struct.B* %R to i32*
+  %V = getelementptr %struct.A* %M, i32 0, i32 1
+  ret void
+}
+
+; CHECK: Function: nonnegative: 2 pointers, 0 call sites
+; CHECK: NoAlias:  i64* %arrayidx, i64* %p
+
+define void @nonnegative(i64* %p) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i = phi i64 [ %inc, %for.body ], [ 0, %entry ] ; <i64> [#uses=2]
+  %inc = add nsw i64 %i, 1                         ; <i64> [#uses=2]
+  %arrayidx = getelementptr inbounds i64* %p, i64 %inc
+  store i64 0, i64* %arrayidx
+  %tmp6 = load i64* %p                            ; <i64> [#uses=1]
+  %cmp = icmp slt i64 %inc, %tmp6                 ; <i1> [#uses=1]
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; CHECK: 14 no alias responses
+; CHECK: 26 may alias responses
+; CHECK: 18 must alias responses
diff --git a/final/test/Analysis/ScalarEvolution/sext-inreg.ll b/final/test/Analysis/ScalarEvolution/sext-inreg.ll
new file mode 100644
index 00000000000..23e1210dba5
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/sext-inreg.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -analyze -scalar-evolution > %t
+; RUN: grep {sext i57 \{0,+,199\}<%bb> to i64} %t | count 1
+; RUN: grep {sext i59 \{0,+,199\}<%bb> to i64} %t | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define i64 @foo(i64* nocapture %x, i64 %n) nounwind {
+entry:
+	%t0 = icmp sgt i64 %n, 0		; <i1> [#uses=1]
+	br i1 %t0, label %bb, label %return
+
+bb:		; preds = %bb, %entry
+	%i.01 = phi i64 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%t1 = shl i64 %i.01, 7		; <i32> [#uses=1]
+	%t2 = ashr i64 %t1, 7		; <i32> [#uses=1]
+	%s1 = shl i64 %i.01, 5		; <i32> [#uses=1]
+	%s2 = ashr i64 %s1, 5		; <i32> [#uses=1]
+	%t3 = getelementptr i64* %x, i64 %i.01		; <i64*> [#uses=1]
+	store i64 0, i64* %t3, align 1
+	%indvar.next = add i64 %i.01, 199		; <i32> [#uses=2]
+	%exitcond = icmp eq i64 %indvar.next, %n		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+        %p = phi i64 [ 0, %entry ], [ %t2, %bb ]
+        %q = phi i64 [ 0, %entry ], [ %s2, %bb ]
+        %v = xor i64 %p, %q
+	ret i64 %v
+}
diff --git a/final/test/Analysis/ScalarEvolution/sext-iv-0.ll b/final/test/Analysis/ScalarEvolution/sext-iv-0.ll
new file mode 100644
index 00000000000..2af794fbbc3
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/sext-iv-0.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -scalar-evolution -analyze \
+; RUN:  | grep { -->  \{-128,+,1\}<%bb1>		Exits: 127} | count 5
+
+; Convert (sext {-128,+,1}) to {sext(-128),+,sext(1)}, since the
+; trip count is within range where this is safe.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(double* nocapture %x) nounwind {
+bb1.thread:
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%i.0.reg2mem.0 = phi i64 [ -128, %bb1.thread ], [ %8, %bb1 ]		; <i64> [#uses=3]
+	%0 = trunc i64 %i.0.reg2mem.0 to i8		; <i8> [#uses=1]
+	%1 = trunc i64 %i.0.reg2mem.0 to i9		; <i8> [#uses=1]
+	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
+	%3 = getelementptr double* %x, i64 %2		; <double*> [#uses=1]
+	%4 = load double* %3, align 8		; <double> [#uses=1]
+	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
+	%6 = sext i8 %0 to i64		; <i64> [#uses=1]
+	%7 = getelementptr double* %x, i64 %6		; <double*> [#uses=1]
+	store double %5, double* %7, align 8
+	%8 = add i64 %i.0.reg2mem.0, 1		; <i64> [#uses=2]
+	%9 = icmp sgt i64 %8, 127		; <i1> [#uses=1]
+	br i1 %9, label %return, label %bb1
+
+return:		; preds = %bb1
+	ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/sext-iv-1.ll b/final/test/Analysis/ScalarEvolution/sext-iv-1.ll
new file mode 100644
index 00000000000..9063cbb22a7
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/sext-iv-1.ll
@@ -0,0 +1,100 @@
+; RUN: opt < %s -scalar-evolution -analyze \
+; RUN:  | grep { -->  (sext i. \{.\*,+,.\*\}<%bb1> to i64)} | count 5
+
+; Don't convert (sext {...,+,...}) to {sext(...),+,sext(...)} in cases
+; where the trip count is not within range.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo0(double* nocapture %x) nounwind {
+bb1.thread:
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%i.0.reg2mem.0 = phi i64 [ -128, %bb1.thread ], [ %8, %bb1 ]		; <i64> [#uses=3]
+	%0 = trunc i64 %i.0.reg2mem.0 to i7		; <i8> [#uses=1]
+	%1 = trunc i64 %i.0.reg2mem.0 to i9		; <i8> [#uses=1]
+	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
+	%3 = getelementptr double* %x, i64 %2		; <double*> [#uses=1]
+	%4 = load double* %3, align 8		; <double> [#uses=1]
+	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
+	%6 = sext i7 %0 to i64		; <i64> [#uses=1]
+	%7 = getelementptr double* %x, i64 %6		; <double*> [#uses=1]
+	store double %5, double* %7, align 8
+	%8 = add i64 %i.0.reg2mem.0, 1		; <i64> [#uses=2]
+	%9 = icmp sgt i64 %8, 127		; <i1> [#uses=1]
+	br i1 %9, label %return, label %bb1
+
+return:		; preds = %bb1
+	ret void
+}
+
+define void @foo1(double* nocapture %x) nounwind {
+bb1.thread:
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%i.0.reg2mem.0 = phi i64 [ -128, %bb1.thread ], [ %8, %bb1 ]		; <i64> [#uses=3]
+	%0 = trunc i64 %i.0.reg2mem.0 to i8		; <i8> [#uses=1]
+	%1 = trunc i64 %i.0.reg2mem.0 to i9		; <i8> [#uses=1]
+	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
+	%3 = getelementptr double* %x, i64 %2		; <double*> [#uses=1]
+	%4 = load double* %3, align 8		; <double> [#uses=1]
+	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
+	%6 = sext i8 %0 to i64		; <i64> [#uses=1]
+	%7 = getelementptr double* %x, i64 %6		; <double*> [#uses=1]
+	store double %5, double* %7, align 8
+	%8 = add i64 %i.0.reg2mem.0, 1		; <i64> [#uses=2]
+	%9 = icmp sgt i64 %8, 128		; <i1> [#uses=1]
+	br i1 %9, label %return, label %bb1
+
+return:		; preds = %bb1
+	ret void
+}
+
+define void @foo2(double* nocapture %x) nounwind {
+bb1.thread:
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%i.0.reg2mem.0 = phi i64 [ -129, %bb1.thread ], [ %8, %bb1 ]		; <i64> [#uses=3]
+	%0 = trunc i64 %i.0.reg2mem.0 to i8		; <i8> [#uses=1]
+	%1 = trunc i64 %i.0.reg2mem.0 to i9		; <i8> [#uses=1]
+	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
+	%3 = getelementptr double* %x, i64 %2		; <double*> [#uses=1]
+	%4 = load double* %3, align 8		; <double> [#uses=1]
+	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
+	%6 = sext i8 %0 to i64		; <i64> [#uses=1]
+	%7 = getelementptr double* %x, i64 %6		; <double*> [#uses=1]
+	store double %5, double* %7, align 8
+	%8 = add i64 %i.0.reg2mem.0, 1		; <i64> [#uses=2]
+	%9 = icmp sgt i64 %8, 127		; <i1> [#uses=1]
+	br i1 %9, label %return, label %bb1
+
+return:		; preds = %bb1
+	ret void
+}
+
+define void @foo3(double* nocapture %x) nounwind {
+bb1.thread:
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%i.0.reg2mem.0 = phi i64 [ -128, %bb1.thread ], [ %8, %bb1 ]		; <i64> [#uses=3]
+	%0 = trunc i64 %i.0.reg2mem.0 to i8		; <i8> [#uses=1]
+	%1 = trunc i64 %i.0.reg2mem.0 to i9		; <i8> [#uses=1]
+	%2 = sext i9 %1 to i64		; <i64> [#uses=1]
+	%3 = getelementptr double* %x, i64 %2		; <double*> [#uses=1]
+	%4 = load double* %3, align 8		; <double> [#uses=1]
+	%5 = fmul double %4, 3.900000e+00		; <double> [#uses=1]
+	%6 = sext i8 %0 to i64		; <i64> [#uses=1]
+	%7 = getelementptr double* %x, i64 %6		; <double*> [#uses=1]
+	store double %5, double* %7, align 8
+	%8 = add i64 %i.0.reg2mem.0, -1		; <i64> [#uses=2]
+	%9 = icmp sgt i64 %8, 127		; <i1> [#uses=1]
+	br i1 %9, label %return, label %bb1
+
+return:		; preds = %bb1
+	ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/sext-iv-2.ll b/final/test/Analysis/ScalarEvolution/sext-iv-2.ll
new file mode 100644
index 00000000000..97e252c1fb3
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/sext-iv-2.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; CHECK: %tmp3 = sext i8 %tmp2 to i32
+; CHECK: -->  (sext i8 {0,+,1}<%bb1> to i32)   Exits: -1
+; CHECK: %tmp4 = mul i32 %tmp3, %i.02
+; CHECK: -->  ((sext i8 {0,+,1}<%bb1> to i32) * {0,+,1}<%bb>)   Exits: {0,+,-1}<%bb>
+
+; These sexts are not foldable.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
+
+@table = common global [32 x [256 x i32]] zeroinitializer, align 32		; <[32 x [256 x i32]]*> [#uses=2]
+
+define i32 @main() nounwind {
+entry:
+	br i1 false, label %bb5, label %bb.nph3
+
+bb.nph3:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb4, %bb.nph3
+	%i.02 = phi i32 [ %tmp10, %bb4 ], [ 0, %bb.nph3 ]		; <i32> [#uses=3]
+	br i1 false, label %bb3, label %bb.nph
+
+bb.nph:		; preds = %bb
+	br label %bb1
+
+bb1:		; preds = %bb2, %bb.nph
+	%j.01 = phi i32 [ %tmp8, %bb2 ], [ 0, %bb.nph ]		; <i32> [#uses=3]
+	%tmp2 = trunc i32 %j.01 to i8		; <i8> [#uses=1]
+	%tmp3 = sext i8 %tmp2 to i32		; <i32> [#uses=1]
+	%tmp4 = mul i32 %tmp3, %i.02		; <i32> [#uses=1]
+	%tmp5 = sext i32 %i.02 to i64		; <i64> [#uses=1]
+	%tmp6 = sext i32 %j.01 to i64		; <i64> [#uses=1]
+	%tmp7 = getelementptr [32 x [256 x i32]]* @table, i64 0, i64 %tmp5, i64 %tmp6		; <i32*> [#uses=1]
+	store i32 %tmp4, i32* %tmp7, align 4
+	%tmp8 = add i32 %j.01, 1		; <i32> [#uses=2]
+	br label %bb2
+
+bb2:		; preds = %bb1
+	%phitmp1 = icmp sgt i32 %tmp8, 255		; <i1> [#uses=1]
+	br i1 %phitmp1, label %bb2.bb3_crit_edge, label %bb1
+
+bb2.bb3_crit_edge:		; preds = %bb2
+	br label %bb3
+
+bb3:		; preds = %bb2.bb3_crit_edge, %bb
+	%tmp10 = add i32 %i.02, 1		; <i32> [#uses=2]
+	br label %bb4
+
+bb4:		; preds = %bb3
+	%phitmp = icmp sgt i32 %tmp10, 31		; <i1> [#uses=1]
+	br i1 %phitmp, label %bb4.bb5_crit_edge, label %bb
+
+bb4.bb5_crit_edge:		; preds = %bb4
+	br label %bb5
+
+bb5:		; preds = %bb4.bb5_crit_edge, %entry
+	%tmp12 = load i32* getelementptr ([32 x [256 x i32]]* @table, i64 0, i64 9, i64 132), align 16		; <i32> [#uses=1]
+	%tmp13 = icmp eq i32 %tmp12, -1116		; <i1> [#uses=1]
+	br i1 %tmp13, label %bb7, label %bb6
+
+bb6:		; preds = %bb5
+	call void @abort() noreturn nounwind
+	unreachable
+
+bb7:		; preds = %bb5
+	br label %return
+
+return:		; preds = %bb7
+	ret i32 0
+}
+
+declare void @abort() noreturn nounwind
diff --git a/final/test/Analysis/ScalarEvolution/sle.ll b/final/test/Analysis/ScalarEvolution/sle.ll
new file mode 100644
index 00000000000..f38f6b63dce
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/sle.ll
@@ -0,0 +1,27 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+; ScalarEvolution should be able to use nsw information to prove that
+; this loop has a finite trip count.
+
+; CHECK: @le
+; CHECK: Loop %for.body: backedge-taken count is %n
+; CHECK: Loop %for.body: max backedge-taken count is 9223372036854775807
+
+define void @le(i64 %n, double* nocapture %p) nounwind {
+entry:
+  %cmp6 = icmp slt i64 %n, 0                      ; <i1> [#uses=1]
+  br i1 %cmp6, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ] ; <i64> [#uses=2]
+  %arrayidx = getelementptr double* %p, i64 %i    ; <double*> [#uses=2]
+  %t4 = load double* %arrayidx                    ; <double> [#uses=1]
+  %mul = fmul double %t4, 2.200000e+00            ; <double> [#uses=1]
+  store double %mul, double* %arrayidx
+  %i.next = add nsw i64 %i, 1                     ; <i64> [#uses=2]
+  %cmp = icmp sgt i64 %i.next, %n                 ; <i1> [#uses=1]
+  br i1 %cmp, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/smax.ll b/final/test/Analysis/ScalarEvolution/smax.ll
new file mode 100644
index 00000000000..15dd744c8fd
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/smax.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -analyze -scalar-evolution | grep smax | count 2
+; RUN: opt < %s -analyze -scalar-evolution | grep \
+; RUN:     {%. smax %. smax %.}
+; PR1614
+
+define i32 @x(i32 %a, i32 %b, i32 %c) {
+  %A = icmp sgt i32 %a, %b
+  %B = select i1 %A, i32 %a, i32 %b
+  %C = icmp sle i32 %c, %B
+  %D = select i1 %C, i32 %B, i32 %c
+  ret i32 %D
+}
diff --git a/final/test/Analysis/ScalarEvolution/trip-count.ll b/final/test/Analysis/ScalarEvolution/trip-count.ll
new file mode 100644
index 00000000000..d750d4a1f37
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/trip-count.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:   -scalar-evolution-max-iterations=0 | grep {backedge-taken count is 10000}
+; PR1101
+
+@A = weak global [1000 x i32] zeroinitializer, align 32         
+
+
+define void @test(i32 %N) {
+entry:
+        "alloca point" = bitcast i32 0 to i32           ; <i32> [#uses=0]
+        br label %bb3
+
+bb:             ; preds = %bb3
+        %tmp = getelementptr [1000 x i32]* @A, i32 0, i32 %i.0          ; <i32*> [#uses=1]
+        store i32 123, i32* %tmp
+        %tmp2 = add i32 %i.0, 1         ; <i32> [#uses=1]
+        br label %bb3
+
+bb3:            ; preds = %bb, %entry
+        %i.0 = phi i32 [ 0, %entry ], [ %tmp2, %bb ]            ; <i32> [#uses=3]
+        %tmp3 = icmp sle i32 %i.0, 9999          ; <i1> [#uses=1]
+        br i1 %tmp3, label %bb, label %bb5
+
+bb5:            ; preds = %bb3
+        br label %return
+
+return:         ; preds = %bb5
+        ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/trip-count10.ll b/final/test/Analysis/ScalarEvolution/trip-count10.ll
new file mode 100644
index 00000000000..546e1dc7d8f
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/trip-count10.ll
@@ -0,0 +1,126 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; Trip counts with trivial exit conditions.
+
+; CHECK: Determining loop execution counts for: @a
+; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable max backedge-taken count.
+
+; CHECK: Determining loop execution counts for: @b
+; CHECK: Loop %loop: backedge-taken count is false
+; CHECK: Loop %loop: max backedge-taken count is false
+
+; CHECK: Determining loop execution counts for: @c
+; CHECK: Loop %loop: backedge-taken count is false
+; CHECK: Loop %loop: max backedge-taken count is false
+
+; CHECK: Determining loop execution counts for: @d
+; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+
+define void @a(i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 false, label %return, label %loop
+
+return:
+  ret void
+}
+define void @b(i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 true, label %return, label %loop
+
+return:
+  ret void
+}
+define void @c(i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 false, label %loop, label %return
+
+return:
+  ret void
+}
+define void @d(i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 true, label %loop, label %return
+
+return:
+  ret void
+}
+
+; Trip counts for non-polynomial iterations. It's theoretically possible
+; to compute a maximum count for these, but short of that, ScalarEvolution
+; should return unknown.
+
+; PR7416
+; CHECK: Determining loop execution counts for: @nonpolynomial
+; CHECK-NEXT: Loop %loophead: Unpredictable backedge-taken count
+; CHECK-NEXT: Loop %loophead: Unpredictable max backedge-taken count
+
+declare i1 @g() nounwind
+
+define void @nonpolynomial() {
+entry:
+  br label %loophead
+loophead:
+  %x = phi i32 [0, %entry], [%x.1, %bb1], [%x.2, %bb2]
+  %y = icmp slt i32 %x, 100
+  br i1 %y, label %loopbody, label %retbb
+loopbody:
+  %z = call i1 @g()
+  br i1 %z, label %bb1, label %bb2
+bb1:
+  %x.1 = add i32 %x, 2
+  br label %loophead
+bb2:
+  %x.2 = add i32 %x, 3
+  br label %loophead
+retbb:
+  ret void
+}
+
+; PHI nodes with all constant operands.
+
+; CHECK: Determining loop execution counts for: @constant_phi_operands
+; CHECK: Loop %loop: backedge-taken count is 1
+; CHECK: Loop %loop: max backedge-taken count is 1
+
+define void @constant_phi_operands() nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 1, %loop ], [ 0, %entry ]
+  %exitcond = icmp eq i64 %i, 1
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/trip-count2.ll b/final/test/Analysis/ScalarEvolution/trip-count2.ll
new file mode 100644
index 00000000000..79f31619fd8
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/trip-count2.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -analyze -scalar-evolution | \
+; RUN:   grep {backedge-taken count is 4}
+; PR1101
+
+@A = weak global [1000 x i32] zeroinitializer, align 32         
+
+
+define void @test(i32 %N) {
+entry:
+        "alloca point" = bitcast i32 0 to i32           ; <i32> [#uses=0]
+        br label %bb3
+
+bb:             ; preds = %bb3
+        %tmp = getelementptr [1000 x i32]* @A, i32 0, i32 %i.0          ; <i32*> [#uses=1]
+        store i32 123, i32* %tmp
+        %tmp4 = mul i32 %i.0, 4         ; <i32> [#uses=1]
+        %tmp5 = or i32 %tmp4, 1
+        %tmp61 = xor i32 %tmp5, -2147483648
+        %tmp6 = trunc i32 %tmp61 to i16
+        %tmp71 = shl i16 %tmp6, 2
+        %tmp7 = zext i16 %tmp71 to i32
+        %tmp2 = add i32 %tmp7, %i.0
+        br label %bb3
+
+bb3:            ; preds = %bb, %entry
+        %i.0 = phi i32 [ 0, %entry ], [ %tmp2, %bb ]            ; <i32> [#uses=3]
+        %tmp3 = icmp sle i32 %i.0, 9999          ; <i1> [#uses=1]
+        br i1 %tmp3, label %bb, label %bb5
+
+bb5:            ; preds = %bb3
+        br label %return
+
+return:         ; preds = %bb5
+        ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/trip-count3.ll b/final/test/Analysis/ScalarEvolution/trip-count3.ll
new file mode 100644
index 00000000000..10b798b5067
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/trip-count3.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -scalar-evolution -analyze \
+; RUN:  | grep {Loop %bb3\\.i: Unpredictable backedge-taken count\\.}
+
+; ScalarEvolution can't compute a trip count because it doesn't know if
+; dividing by the stride will have a remainder. This could theoretically
+; be teaching it how to use a more elaborate trip count computation.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+	%struct.SHA_INFO = type { [5 x i32], i32, i32, [16 x i32] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+@_2E_str = external constant [26 x i8]		; <[26 x i8]*> [#uses=0]
+@stdin = external global %struct.FILE*		; <%struct.FILE**> [#uses=0]
+@_2E_str1 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@_2E_str12 = external constant [30 x i8]		; <[30 x i8]*> [#uses=0]
+
+declare void @sha_init(%struct.SHA_INFO* nocapture) nounwind
+
+declare fastcc void @sha_transform(%struct.SHA_INFO* nocapture) nounwind
+
+declare void @sha_print(%struct.SHA_INFO* nocapture) nounwind
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @sha_final(%struct.SHA_INFO* nocapture) nounwind
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
+
+declare void @sha_update(%struct.SHA_INFO* nocapture, i8* nocapture, i32) nounwind
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+
+declare i64 @fread(i8* noalias nocapture, i64, i64, %struct.FILE* noalias nocapture) nounwind
+
+declare i32 @main(i32, i8** nocapture) nounwind
+
+declare noalias %struct.FILE* @fopen(i8* noalias nocapture, i8* noalias nocapture) nounwind
+
+declare i32 @fclose(%struct.FILE* nocapture) nounwind
+
+declare void @sha_stream(%struct.SHA_INFO* nocapture, %struct.FILE* nocapture) nounwind
+
+define void @sha_stream_bb3_2E_i(%struct.SHA_INFO* %sha_info, i8* %data1, i32, i8** %buffer_addr.0.i.out, i32* %count_addr.0.i.out) nounwind {
+newFuncRoot:
+	br label %bb3.i
+
+sha_update.exit.exitStub:		; preds = %bb3.i
+	store i8* %buffer_addr.0.i, i8** %buffer_addr.0.i.out
+	store i32 %count_addr.0.i, i32* %count_addr.0.i.out
+	ret void
+
+bb2.i:		; preds = %bb3.i
+	%1 = getelementptr %struct.SHA_INFO* %sha_info, i64 0, i32 3		; <[16 x i32]*> [#uses=1]
+	%2 = bitcast [16 x i32]* %1 to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i64(i8* %2, i8* %buffer_addr.0.i, i64 64, i32 1) nounwind
+	%3 = getelementptr %struct.SHA_INFO* %sha_info, i64 0, i32 3, i64 0		; <i32*> [#uses=1]
+	%4 = bitcast i32* %3 to i8*		; <i8*> [#uses=1]
+	br label %codeRepl
+
+codeRepl:		; preds = %bb2.i
+	call void @sha_stream_bb3_2E_i_bb1_2E_i_2E_i(i8* %4)
+	br label %byte_reverse.exit.i
+
+byte_reverse.exit.i:		; preds = %codeRepl
+	call fastcc void @sha_transform(%struct.SHA_INFO* %sha_info) nounwind
+	%5 = getelementptr i8* %buffer_addr.0.i, i64 64		; <i8*> [#uses=1]
+	%6 = add i32 %count_addr.0.i, -64		; <i32> [#uses=1]
+	br label %bb3.i
+
+bb3.i:		; preds = %byte_reverse.exit.i, %newFuncRoot
+	%buffer_addr.0.i = phi i8* [ %data1, %newFuncRoot ], [ %5, %byte_reverse.exit.i ]		; <i8*> [#uses=3]
+	%count_addr.0.i = phi i32 [ %0, %newFuncRoot ], [ %6, %byte_reverse.exit.i ]		; <i32> [#uses=3]
+	%7 = icmp sgt i32 %count_addr.0.i, 63		; <i1> [#uses=1]
+	br i1 %7, label %bb2.i, label %sha_update.exit.exitStub
+}
+
+declare void @sha_stream_bb3_2E_i_bb1_2E_i_2E_i(i8*) nounwind
diff --git a/final/test/Analysis/ScalarEvolution/trip-count4.ll b/final/test/Analysis/ScalarEvolution/trip-count4.ll
new file mode 100644
index 00000000000..116f62dbdbf
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/trip-count4.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:   | grep {sext.*trunc.*Exits: 11}
+
+; ScalarEvolution should be able to compute a loop exit value for %indvar.i8.
+
+define void @another_count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:		; preds = %loop, %entry
+	%indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ]		; <i64> [#uses=4]
+	%s0 = shl i64 %indvar, 8		; <i64> [#uses=1]
+	%indvar.i8 = ashr i64 %s0, 8		; <i64> [#uses=1]
+	%t0 = getelementptr double* %d, i64 %indvar.i8		; <double*> [#uses=2]
+	%t1 = load double* %t0		; <double> [#uses=1]
+	%t2 = fmul double %t1, 1.000000e-01		; <double> [#uses=1]
+	store double %t2, double* %t0
+	%indvar.next = sub i64 %indvar, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %indvar.next, 10		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %loop
+
+return:		; preds = %loop
+	ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/trip-count5.ll b/final/test/Analysis/ScalarEvolution/trip-count5.ll
new file mode 100644
index 00000000000..1194a1da66e
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/trip-count5.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -analyze -scalar-evolution > %t
+; RUN: grep sext %t | count 2
+; RUN: not grep {(sext} %t
+
+; ScalarEvolution should be able to compute a maximum trip count
+; value sufficient to fold away both sext casts.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define float @t(float* %pTmp1, float* %peakWeight, float* %nrgReducePeakrate, i32 %bim) nounwind {
+entry:
+	%tmp3 = load float* %peakWeight, align 4		; <float> [#uses=2]
+	%tmp2538 = icmp sgt i32 %bim, 0		; <i1> [#uses=1]
+	br i1 %tmp2538, label %bb.nph, label %bb4
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%distERBhi.036 = phi float [ %tmp10, %bb1 ], [ 0.000000e+00, %bb.nph ]		; <float> [#uses=1]
+	%hiPart.035 = phi i32 [ %tmp12, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=2]
+	%peakCount.034 = phi float [ %tmp19, %bb1 ], [ %tmp3, %bb.nph ]		; <float> [#uses=1]
+	%tmp6 = sext i32 %hiPart.035 to i64		; <i64> [#uses=1]
+	%tmp7 = getelementptr float* %pTmp1, i64 %tmp6		; <float*> [#uses=1]
+	%tmp8 = load float* %tmp7, align 4		; <float> [#uses=1]
+	%tmp10 = fadd float %tmp8, %distERBhi.036		; <float> [#uses=3]
+	%tmp12 = add i32 %hiPart.035, 1		; <i32> [#uses=3]
+	%tmp15 = sext i32 %tmp12 to i64		; <i64> [#uses=1]
+	%tmp16 = getelementptr float* %peakWeight, i64 %tmp15		; <float*> [#uses=1]
+	%tmp17 = load float* %tmp16, align 4		; <float> [#uses=1]
+	%tmp19 = fadd float %tmp17, %peakCount.034		; <float> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%tmp21 = fcmp olt float %tmp10, 2.500000e+00		; <i1> [#uses=1]
+	%tmp25 = icmp slt i32 %tmp12, %bim		; <i1> [#uses=1]
+	%tmp27 = and i1 %tmp21, %tmp25		; <i1> [#uses=1]
+	br i1 %tmp27, label %bb, label %bb1.bb4_crit_edge
+
+bb1.bb4_crit_edge:		; preds = %bb1
+	br label %bb4
+
+bb4:		; preds = %bb1.bb4_crit_edge, %entry
+	%distERBhi.0.lcssa = phi float [ %tmp10, %bb1.bb4_crit_edge ], [ 0.000000e+00, %entry ]		; <float> [#uses=1]
+	%peakCount.0.lcssa = phi float [ %tmp19, %bb1.bb4_crit_edge ], [ %tmp3, %entry ]		; <float> [#uses=1]
+	%tmp31 = fdiv float %peakCount.0.lcssa, %distERBhi.0.lcssa		; <float> [#uses=1]
+	ret float %tmp31
+}
diff --git a/final/test/Analysis/ScalarEvolution/trip-count6.ll b/final/test/Analysis/ScalarEvolution/trip-count6.ll
new file mode 100644
index 00000000000..956fb81b0ed
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/trip-count6.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:  | grep {max backedge-taken count is 1\$}
+
+@mode_table = global [4 x i32] zeroinitializer          ; <[4 x i32]*> [#uses=1]
+
+define i8 @f() {
+entry:
+  tail call i32 @fegetround( )          ; <i32>:0 [#uses=1]
+  br label %bb
+
+bb:             ; preds = %bb4, %entry
+  %mode.0 = phi i8 [ 0, %entry ], [ %indvar.next, %bb4 ]                ; <i8> [#uses=4]
+  zext i8 %mode.0 to i32                ; <i32>:1 [#uses=1]
+  getelementptr [4 x i32]* @mode_table, i32 0, i32 %1           ; <i32*>:2 [#uses=1]
+  load i32* %2, align 4         ; <i32>:3 [#uses=1]
+  icmp eq i32 %3, %0            ; <i1>:4 [#uses=1]
+  br i1 %4, label %bb1, label %bb2
+
+bb1:            ; preds = %bb
+  ret i8 %mode.0
+
+bb2:            ; preds = %bb
+  icmp eq i8 %mode.0, 1         ; <i1>:5 [#uses=1]
+  br i1 %5, label %bb5, label %bb4
+
+bb4:            ; preds = %bb2
+  %indvar.next = add i8 %mode.0, 1              ; <i8> [#uses=1]
+  br label %bb
+
+bb5:            ; preds = %bb2
+  tail call void @raise_exception( ) noreturn 
+  unreachable
+}
+
+declare i32 @fegetround()
+
+declare void @raise_exception() noreturn 
diff --git a/final/test/Analysis/ScalarEvolution/trip-count7.ll b/final/test/Analysis/ScalarEvolution/trip-count7.ll
new file mode 100644
index 00000000000..a8b797e142f
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/trip-count7.ll
@@ -0,0 +1,150 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:   | grep {Loop %bb7.i: Unpredictable backedge-taken count\\.}
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+	%struct.complex = type { float, float }
+	%struct.element = type { i32, i32 }
+	%struct.node = type { %struct.node*, %struct.node*, i32 }
+@seed = external global i64		; <i64*> [#uses=0]
+@_2E_str = external constant [18 x i8], align 1		; <[18 x i8]*> [#uses=0]
+@_2E_str1 = external constant [4 x i8], align 1		; <[4 x i8]*> [#uses=0]
+@value = external global float		; <float*> [#uses=0]
+@fixed = external global float		; <float*> [#uses=0]
+@floated = external global float		; <float*> [#uses=0]
+@permarray = external global [11 x i32], align 32		; <[11 x i32]*> [#uses=0]
+@pctr = external global i32		; <i32*> [#uses=0]
+@tree = external global %struct.node*		; <%struct.node**> [#uses=0]
+@stack = external global [4 x i32], align 16		; <[4 x i32]*> [#uses=0]
+@cellspace = external global [19 x %struct.element], align 32		; <[19 x %struct.element]*> [#uses=0]
+@freelist = external global i32		; <i32*> [#uses=0]
+@movesdone = external global i32		; <i32*> [#uses=0]
+@ima = external global [41 x [41 x i32]], align 32		; <[41 x [41 x i32]]*> [#uses=0]
+@imb = external global [41 x [41 x i32]], align 32		; <[41 x [41 x i32]]*> [#uses=0]
+@imr = external global [41 x [41 x i32]], align 32		; <[41 x [41 x i32]]*> [#uses=0]
+@rma = external global [41 x [41 x float]], align 32		; <[41 x [41 x float]]*> [#uses=0]
+@rmb = external global [41 x [41 x float]], align 32		; <[41 x [41 x float]]*> [#uses=0]
+@rmr = external global [41 x [41 x float]], align 32		; <[41 x [41 x float]]*> [#uses=0]
+@piececount = external global [4 x i32], align 16		; <[4 x i32]*> [#uses=0]
+@class = external global [13 x i32], align 32		; <[13 x i32]*> [#uses=0]
+@piecemax = external global [13 x i32], align 32		; <[13 x i32]*> [#uses=0]
+@puzzl = external global [512 x i32], align 32		; <[512 x i32]*> [#uses=0]
+@p = external global [13 x [512 x i32]], align 32		; <[13 x [512 x i32]]*> [#uses=0]
+@n = external global i32		; <i32*> [#uses=0]
+@kount = external global i32		; <i32*> [#uses=0]
+@sortlist = external global [5001 x i32], align 32		; <[5001 x i32]*> [#uses=0]
+@biggest = external global i32		; <i32*> [#uses=0]
+@littlest = external global i32		; <i32*> [#uses=0]
+@top = external global i32		; <i32*> [#uses=0]
+@z = external global [257 x %struct.complex], align 32		; <[257 x %struct.complex]*> [#uses=0]
+@w = external global [257 x %struct.complex], align 32		; <[257 x %struct.complex]*> [#uses=0]
+@e = external global [130 x %struct.complex], align 32		; <[130 x %struct.complex]*> [#uses=0]
+@zr = external global float		; <float*> [#uses=0]
+@zi = external global float		; <float*> [#uses=0]
+
+declare void @Initrand() nounwind
+
+declare i32 @Rand() nounwind
+
+declare void @Try(i32, i32*, i32*, i32*, i32*, i32*) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind
+
+declare void @Queens(i32) nounwind
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare i32 @main() nounwind
+
+declare void @Doit() nounwind
+
+declare void @Doit_bb7([15 x i32]*, [17 x i32]*, [9 x i32]*) nounwind
+
+define void @Doit_bb7_2E_i([9 x i32]* %x1, [15 x i32]* %c, [17 x i32]* %b, [9 x i32]* %a, i32* %q, i32* %x1.sub, i32* %b9, i32* %a10, i32* %c11) nounwind {
+newFuncRoot:
+	br label %bb7.i
+
+Try.exit.exitStub:		; preds = %bb7.i
+	ret void
+
+bb.i:		; preds = %bb7.i
+	%tmp = add i32 %j.0.i, 1		; <i32> [#uses=5]
+	store i32 0, i32* %q, align 4
+	%tmp1 = sext i32 %tmp to i64		; <i64> [#uses=1]
+	%tmp2 = getelementptr [9 x i32]* %a, i64 0, i64 %tmp1		; <i32*> [#uses=1]
+	%tmp3 = load i32* %tmp2, align 4		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb.i.bb7.i.backedge_crit_edge, label %bb1.i
+
+bb1.i:		; preds = %bb.i
+	%tmp5 = add i32 %j.0.i, 2		; <i32> [#uses=1]
+	%tmp6 = sext i32 %tmp5 to i64		; <i64> [#uses=1]
+	%tmp7 = getelementptr [17 x i32]* %b, i64 0, i64 %tmp6		; <i32*> [#uses=1]
+	%tmp8 = load i32* %tmp7, align 4		; <i32> [#uses=1]
+	%tmp9 = icmp eq i32 %tmp8, 0		; <i1> [#uses=1]
+	br i1 %tmp9, label %bb1.i.bb7.i.backedge_crit_edge, label %bb2.i
+
+bb2.i:		; preds = %bb1.i
+	%tmp10 = sub i32 7, %j.0.i		; <i32> [#uses=1]
+	%tmp11 = sext i32 %tmp10 to i64		; <i64> [#uses=1]
+	%tmp12 = getelementptr [15 x i32]* %c, i64 0, i64 %tmp11		; <i32*> [#uses=1]
+	%tmp13 = load i32* %tmp12, align 4		; <i32> [#uses=1]
+	%tmp14 = icmp eq i32 %tmp13, 0		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb2.i.bb7.i.backedge_crit_edge, label %bb3.i
+
+bb3.i:		; preds = %bb2.i
+	%tmp15 = getelementptr [9 x i32]* %x1, i64 0, i64 1		; <i32*> [#uses=1]
+	store i32 %tmp, i32* %tmp15, align 4
+	%tmp16 = sext i32 %tmp to i64		; <i64> [#uses=1]
+	%tmp17 = getelementptr [9 x i32]* %a, i64 0, i64 %tmp16		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp17, align 4
+	%tmp18 = add i32 %j.0.i, 2		; <i32> [#uses=1]
+	%tmp19 = sext i32 %tmp18 to i64		; <i64> [#uses=1]
+	%tmp20 = getelementptr [17 x i32]* %b, i64 0, i64 %tmp19		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp20, align 4
+	%tmp21 = sub i32 7, %j.0.i		; <i32> [#uses=1]
+	%tmp22 = sext i32 %tmp21 to i64		; <i64> [#uses=1]
+	%tmp23 = getelementptr [15 x i32]* %c, i64 0, i64 %tmp22		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp23, align 4
+	call void @Try(i32 2, i32* %q, i32* %b9, i32* %a10, i32* %c11, i32* %x1.sub) nounwind
+	%tmp24 = load i32* %q, align 4		; <i32> [#uses=1]
+	%tmp25 = icmp eq i32 %tmp24, 0		; <i1> [#uses=1]
+	br i1 %tmp25, label %bb5.i, label %bb3.i.bb7.i.backedge_crit_edge
+
+bb5.i:		; preds = %bb3.i
+	%tmp26 = sext i32 %tmp to i64		; <i64> [#uses=1]
+	%tmp27 = getelementptr [9 x i32]* %a, i64 0, i64 %tmp26		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp27, align 4
+	%tmp28 = add i32 %j.0.i, 2		; <i32> [#uses=1]
+	%tmp29 = sext i32 %tmp28 to i64		; <i64> [#uses=1]
+	%tmp30 = getelementptr [17 x i32]* %b, i64 0, i64 %tmp29		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp30, align 4
+	%tmp31 = sub i32 7, %j.0.i		; <i32> [#uses=1]
+	%tmp32 = sext i32 %tmp31 to i64		; <i64> [#uses=1]
+	%tmp33 = getelementptr [15 x i32]* %c, i64 0, i64 %tmp32		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp33, align 4
+	br label %bb7.i.backedge
+
+bb7.i.backedge:		; preds = %bb3.i.bb7.i.backedge_crit_edge, %bb2.i.bb7.i.backedge_crit_edge, %bb1.i.bb7.i.backedge_crit_edge, %bb.i.bb7.i.backedge_crit_edge, %bb5.i
+	br label %bb7.i
+
+bb7.i:		; preds = %bb7.i.backedge, %newFuncRoot
+	%j.0.i = phi i32 [ 0, %newFuncRoot ], [ %tmp, %bb7.i.backedge ]		; <i32> [#uses=8]
+	%tmp34 = load i32* %q, align 4		; <i32> [#uses=1]
+	%tmp35 = icmp eq i32 %tmp34, 0		; <i1> [#uses=1]
+	%tmp36 = icmp ne i32 %j.0.i, 8		; <i1> [#uses=1]
+	%tmp37 = and i1 %tmp35, %tmp36		; <i1> [#uses=1]
+	br i1 %tmp37, label %bb.i, label %Try.exit.exitStub
+
+bb.i.bb7.i.backedge_crit_edge:		; preds = %bb.i
+	br label %bb7.i.backedge
+
+bb1.i.bb7.i.backedge_crit_edge:		; preds = %bb1.i
+	br label %bb7.i.backedge
+
+bb2.i.bb7.i.backedge_crit_edge:		; preds = %bb2.i
+	br label %bb7.i.backedge
+
+bb3.i.bb7.i.backedge_crit_edge:		; preds = %bb3.i
+	br label %bb7.i.backedge
+}
diff --git a/final/test/Analysis/ScalarEvolution/trip-count8.ll b/final/test/Analysis/ScalarEvolution/trip-count8.ll
new file mode 100644
index 00000000000..ac5ee607ec4
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/trip-count8.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:  | grep {Loop %for\\.body: backedge-taken count is (-1 + \[%\]ecx)}
+; PR4599
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i32 @foo(i32 %ecx) nounwind {
+entry:
+	%cmp2 = icmp eq i32 %ecx, 0		; <i1> [#uses=1]
+	br i1 %cmp2, label %for.end, label %bb.nph
+
+for.cond:		; preds = %for.inc
+	%cmp = icmp ult i32 %inc, %ecx		; <i1> [#uses=1]
+	br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:		; preds = %for.cond
+	%phitmp = add i32 %i.01, 2		; <i32> [#uses=1]
+	br label %for.end
+
+bb.nph:		; preds = %entry
+	br label %for.body
+
+for.body:		; preds = %bb.nph, %for.cond
+	%i.01 = phi i32 [ %inc, %for.cond ], [ 0, %bb.nph ]		; <i32> [#uses=3]
+	%call = call i32 @bar(i32 %i.01) nounwind		; <i32> [#uses=0]
+	br label %for.inc
+
+for.inc:		; preds = %for.body
+	%inc = add i32 %i.01, 1		; <i32> [#uses=2]
+	br label %for.cond
+
+for.end:		; preds = %for.cond.for.end_crit_edge, %entry
+	%i.0.lcssa = phi i32 [ %phitmp, %for.cond.for.end_crit_edge ], [ 1, %entry ]		; <i32> [#uses=1]
+	ret i32 %i.0.lcssa
+}
+
+declare i32 @bar(i32)
diff --git a/final/test/Analysis/ScalarEvolution/trip-count9.ll b/final/test/Analysis/ScalarEvolution/trip-count9.ll
new file mode 100644
index 00000000000..9180f2b8dd7
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/trip-count9.ll
@@ -0,0 +1,408 @@
+; RUN: opt -analyze -scalar-evolution -S < %s | FileCheck %s
+
+; Every combination of
+;  - starting at 0, 1, or %x
+;  - steping by 1 or 2
+;  - stopping at %n or %n*2
+;  - using nsw, or not
+
+; Some of these represent missed opportunities.
+
+; CHECK: Determining loop execution counts for: @foo
+; CHECK: Loop %loop: backedge-taken count is (-1 + %n)
+; CHECK: Loop %loop: max backedge-taken count is 6
+define void @foo(i4 %n) {
+entry:
+  %s = icmp sgt i4 %n, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add i4 %i, 1
+  %t = icmp slt i4 %i.next, %n
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @step2
+; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+define void @step2(i4 %n) {
+entry:
+  %s = icmp sgt i4 %n, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add i4 %i, 2
+  %t = icmp slt i4 %i.next, %n
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @start1
+; CHECK: Loop %loop: backedge-taken count is (-2 + (2 smax %n))
+; CHECK: Loop %loop: max backedge-taken count is 5
+define void @start1(i4 %n) {
+entry:
+  %s = icmp sgt i4 %n, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ 1, %entry ], [ %i.next, %loop ]
+  %i.next = add i4 %i, 1
+  %t = icmp slt i4 %i.next, %n
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @start1_step2
+; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+define void @start1_step2(i4 %n) {
+entry:
+  %s = icmp sgt i4 %n, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ 1, %entry ], [ %i.next, %loop ]
+  %i.next = add i4 %i, 2
+  %t = icmp slt i4 %i.next, %n
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @startx
+; CHECK: Loop %loop: backedge-taken count is (-1 + (-1 * %x) + ((1 + %x) smax %n))
+; CHECK: Loop %loop: max backedge-taken count is -1
+define void @startx(i4 %n, i4 %x) {
+entry:
+  %s = icmp sgt i4 %n, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ %x, %entry ], [ %i.next, %loop ]
+  %i.next = add i4 %i, 1
+  %t = icmp slt i4 %i.next, %n
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @startx_step2
+; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+define void @startx_step2(i4 %n, i4 %x) {
+entry:
+  %s = icmp sgt i4 %n, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ %x, %entry ], [ %i.next, %loop ]
+  %i.next = add i4 %i, 2
+  %t = icmp slt i4 %i.next, %n
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @nsw
+; CHECK: Loop %loop: backedge-taken count is (-1 + %n)
+; CHECK: Loop %loop: max backedge-taken count is 6
+define void @nsw(i4 %n) {
+entry:
+  %s = icmp sgt i4 %n, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add nsw i4 %i, 1
+  %t = icmp slt i4 %i.next, %n
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; Be careful with this one. If %n is INT4_MAX, %i.next will wrap. The nsw bit
+; says that the result is undefined, but ScalarEvolution must respect that
+; subsequent passes may result the undefined behavior in predictable ways.
+; CHECK: Determining loop execution counts for: @nsw_step2
+; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+define void @nsw_step2(i4 %n) {
+entry:
+  %s = icmp sgt i4 %n, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add nsw i4 %i, 2
+  %t = icmp slt i4 %i.next, %n
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @nsw_start1
+; CHECK: Loop %loop: backedge-taken count is (-2 + (2 smax %n))
+; CHECK: Loop %loop: max backedge-taken count is 5
+define void @nsw_start1(i4 %n) {
+entry:
+  %s = icmp sgt i4 %n, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ 1, %entry ], [ %i.next, %loop ]
+  %i.next = add nsw i4 %i, 1
+  %t = icmp slt i4 %i.next, %n
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @nsw_start1_step2
+; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+define void @nsw_start1_step2(i4 %n) {
+entry:
+  %s = icmp sgt i4 %n, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ 1, %entry ], [ %i.next, %loop ]
+  %i.next = add nsw i4 %i, 2
+  %t = icmp slt i4 %i.next, %n
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @nsw_startx
+; CHECK: Loop %loop: backedge-taken count is (-1 + (-1 * %x) + ((1 + %x) smax %n))
+; CHECK: Loop %loop: max backedge-taken count is -1
+define void @nsw_startx(i4 %n, i4 %x) {
+entry:
+  %s = icmp sgt i4 %n, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ %x, %entry ], [ %i.next, %loop ]
+  %i.next = add nsw i4 %i, 1
+  %t = icmp slt i4 %i.next, %n
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @nsw_startx_step2
+; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: Unpredictable max backedge-taken count. 
+define void @nsw_startx_step2(i4 %n, i4 %x) {
+entry:
+  %s = icmp sgt i4 %n, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ %x, %entry ], [ %i.next, %loop ]
+  %i.next = add nsw i4 %i, 2
+  %t = icmp slt i4 %i.next, %n
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @even
+; CHECK: Loop %loop: backedge-taken count is (-1 + (2 * %n))
+; CHECK: Loop %loop: max backedge-taken count is 5
+define void @even(i4 %n) {
+entry:
+  %m = shl i4 %n, 1
+  %s = icmp sgt i4 %m, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add i4 %i, 1
+  %t = icmp slt i4 %i.next, %m
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @even_step2
+; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: max backedge-taken count is 2
+define void @even_step2(i4 %n) {
+entry:
+  %m = shl i4 %n, 1
+  %s = icmp sgt i4 %m, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add i4 %i, 2
+  %t = icmp slt i4 %i.next, %m
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @even_start1
+; CHECK: Loop %loop: backedge-taken count is (-2 + (2 smax (2 * %n)))
+; CHECK: Loop %loop: max backedge-taken count is 4
+define void @even_start1(i4 %n) {
+entry:
+  %m = shl i4 %n, 1
+  %s = icmp sgt i4 %m, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ 1, %entry ], [ %i.next, %loop ]
+  %i.next = add i4 %i, 1
+  %t = icmp slt i4 %i.next, %m
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @even_start1_step2
+; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: max backedge-taken count is 2
+define void @even_start1_step2(i4 %n) {
+entry:
+  %m = shl i4 %n, 1
+  %s = icmp sgt i4 %m, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ 1, %entry ], [ %i.next, %loop ]
+  %i.next = add i4 %i, 2
+  %t = icmp slt i4 %i.next, %m
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @even_startx
+; CHECK: Loop %loop: backedge-taken count is (-1 + (-1 * %x) + ((1 + %x) smax (2 * %n)))
+; CHECK: Loop %loop: max backedge-taken count is -1
+define void @even_startx(i4 %n, i4 %x) {
+entry:
+  %m = shl i4 %n, 1
+  %s = icmp sgt i4 %m, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ %x, %entry ], [ %i.next, %loop ]
+  %i.next = add i4 %i, 1
+  %t = icmp slt i4 %i.next, %m
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @even_startx_step2
+; CHECK: Loop %loop: Unpredictable backedge-taken count. 
+; CHECK: Loop %loop: max backedge-taken count is 7
+define void @even_startx_step2(i4 %n, i4 %x) {
+entry:
+  %m = shl i4 %n, 1
+  %s = icmp sgt i4 %m, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ %x, %entry ], [ %i.next, %loop ]
+  %i.next = add i4 %i, 2
+  %t = icmp slt i4 %i.next, %m
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @even_nsw
+; CHECK: Loop %loop: backedge-taken count is (-1 + (2 * %n))
+; CHECK: Loop %loop: max backedge-taken count is 5
+define void @even_nsw(i4 %n) {
+entry:
+  %m = shl i4 %n, 1
+  %s = icmp sgt i4 %m, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add nsw i4 %i, 1
+  %t = icmp slt i4 %i.next, %m
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @even_nsw_step2
+; CHECK: Loop %loop: backedge-taken count is ((-1 + (2 * %n)) /u 2)
+; CHECK: Loop %loop: max backedge-taken count is 2
+define void @even_nsw_step2(i4 %n) {
+entry:
+  %m = shl i4 %n, 1
+  %s = icmp sgt i4 %m, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ 0, %entry ], [ %i.next, %loop ]
+  %i.next = add nsw i4 %i, 2
+  %t = icmp slt i4 %i.next, %m
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @even_nsw_start1
+; CHECK: Loop %loop: backedge-taken count is (-2 + (2 smax (2 * %n)))
+; CHECK: Loop %loop: max backedge-taken count is 4
+define void @even_nsw_start1(i4 %n) {
+entry:
+  %m = shl i4 %n, 1
+  %s = icmp sgt i4 %m, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ 1, %entry ], [ %i.next, %loop ]
+  %i.next = add nsw i4 %i, 1
+  %t = icmp slt i4 %i.next, %m
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @even_nsw_start1_step2
+; CHECK: Loop %loop: backedge-taken count is ((-2 + (3 smax (2 * %n))) /u 2)
+; CHECK: Loop %loop: max backedge-taken count is 2
+define void @even_nsw_start1_step2(i4 %n) {
+entry:
+  %m = shl i4 %n, 1
+  %s = icmp sgt i4 %m, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ 1, %entry ], [ %i.next, %loop ]
+  %i.next = add nsw i4 %i, 2
+  %t = icmp slt i4 %i.next, %m
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @even_nsw_startx
+; CHECK: Loop %loop: backedge-taken count is (-1 + (-1 * %x) + ((1 + %x) smax (2 * %n)))
+; CHECK: Loop %loop: max backedge-taken count is -1
+define void @even_nsw_startx(i4 %n, i4 %x) {
+entry:
+  %m = shl i4 %n, 1
+  %s = icmp sgt i4 %m, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ %x, %entry ], [ %i.next, %loop ]
+  %i.next = add nsw i4 %i, 1
+  %t = icmp slt i4 %i.next, %m
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
+
+; CHECK: Determining loop execution counts for: @even_nsw_startx_step2
+; CHECK: Loop %loop: backedge-taken count is ((-1 + (-1 * %x) + ((2 + %x) smax (2 * %n))) /u 2)
+; CHECK: Loop %loop: max backedge-taken count is 7
+define void @even_nsw_startx_step2(i4 %n, i4 %x) {
+entry:
+  %m = shl i4 %n, 1
+  %s = icmp sgt i4 %m, 0
+  br i1 %s, label %loop, label %exit
+loop:
+  %i = phi i4 [ %x, %entry ], [ %i.next, %loop ]
+  %i.next = add nsw i4 %i, 2
+  %t = icmp slt i4 %i.next, %m
+  br i1 %t, label %loop, label %exit
+exit:
+  ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/undefined.ll b/final/test/Analysis/ScalarEvolution/undefined.ll
new file mode 100644
index 00000000000..b1f44460af6
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/undefined.ll
@@ -0,0 +1,39 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+; ScalarEvolution shouldn't attempt to interpret expressions which have
+; undefined results.
+
+define void @foo(i64 %x) {
+
+  %a = udiv i64 %x, 0
+; CHECK: -->  (%x /u 0)
+
+  %B = shl i64 %x, 64
+; CHECK: -->  %B
+
+  %b = ashr i64 %B, 64
+; CHECK: -->  %b
+
+  %c = lshr i64 %x, 64
+; CHECK: -->  %c
+
+  %d = shl i64 %x, 64
+; CHECK: -->  %d
+
+  %E = shl i64 %x, -1
+; CHECK: -->  %E
+
+  %e = ashr i64 %E, -1
+; CHECK: -->  %e
+
+  %f = lshr i64 %x, -1
+; CHECK: -->  %f
+
+  %g = shl i64 %x, -1
+; CHECK: -->  %g
+
+  %h = bitcast i64 undef to i64
+; CHECK: -->  undef
+
+  ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/unreachable-code.ll b/final/test/Analysis/ScalarEvolution/unreachable-code.ll
new file mode 100644
index 00000000000..51d93981800
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/unreachable-code.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+
+; CHECK: %t = add i64 %t, 1
+; CHECK: -->  %t
+
+define void @foo() {
+entry:
+  ret void
+
+dead:
+  %t = add i64 %t, 1
+  ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/unsimplified-loop.ll b/final/test/Analysis/ScalarEvolution/unsimplified-loop.ll
new file mode 100644
index 00000000000..a3175077b68
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/unsimplified-loop.ll
@@ -0,0 +1,29 @@
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+
+; This loop has no preheader, multiple backedges, etc., but ScalarEvolution
+; should still be able to analyze it.
+
+; CHECK: %i = phi i64 [ 5, %entry ], [ 5, %alt ], [ %i.next, %loop.a ], [ %i.next, %loop.b ]
+; CHECK-NEXT: -->  {5,+,1}<%loop>
+
+define void @foo(i1 %p, i1 %q, i1 %s, i1 %u) {
+entry:
+  br i1 %p, label %loop, label %alt
+
+alt:
+  br i1 %s, label %loop, label %exit
+
+loop:
+  %i = phi i64 [ 5, %entry ], [ 5, %alt ], [ %i.next, %loop.a ], [ %i.next, %loop.b ]
+  %i.next = add i64 %i, 1
+  br i1 %q, label %loop.a, label %loop.b
+
+loop.a:
+  br label %loop
+
+loop.b:
+  br i1 %u, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/final/test/Analysis/ScalarEvolution/xor-and.ll b/final/test/Analysis/ScalarEvolution/xor-and.ll
new file mode 100644
index 00000000000..c0530bbc3cc
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/xor-and.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -scalar-evolution -analyze \
+; RUN:   | grep {\\-->  (zext i4 (-8 + (trunc i64 (8 \\* %x) to i4)) to i64)}
+
+; ScalarEvolution shouldn't try to analyze %z into something like
+;   -->  (zext i4 (-1 + (-1 * (trunc i64 (8 * %x) to i4))) to i64)
+
+define i64 @foo(i64 %x) {
+  %a = shl i64 %x, 3
+  %t = and i64 %a, 8
+  %z = xor i64 %t, 8
+  ret i64 %z
+}
diff --git a/final/test/Analysis/ScalarEvolution/zext-wrap.ll b/final/test/Analysis/ScalarEvolution/zext-wrap.ll
new file mode 100644
index 00000000000..38d15ffbd88
--- /dev/null
+++ b/final/test/Analysis/ScalarEvolution/zext-wrap.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -analyze -scalar-evolution \
+; RUN:  | FileCheck %s
+; PR4569
+
+define i16 @main() nounwind {
+entry:
+        br label %bb.i
+
+bb.i:           ; preds = %bb1.i, %bb.nph
+        %l_95.0.i1 = phi i8 [ %tmp1, %bb.i ], [ 0, %entry ]
+
+; This cast shouldn't be folded into the addrec.
+; CHECK: %tmp = zext i8 %l_95.0.i1 to i16
+; CHECK: -->  (zext i8 {0,+,-1}<%bb.i> to i16)    Exits: 2
+
+        %tmp = zext i8 %l_95.0.i1 to i16
+
+        %tmp1 = add i8 %l_95.0.i1, -1
+        %phitmp = icmp eq i8 %tmp1, 1
+        br i1 %phitmp, label %bb1.i.func_36.exit_crit_edge, label %bb.i
+
+bb1.i.func_36.exit_crit_edge:
+        ret i16 %tmp
+}
diff --git a/final/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll b/final/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
new file mode 100644
index 00000000000..d59e3924acd
--- /dev/null
+++ b/final/test/Analysis/TypeBasedAliasAnalysis/aliastest.ll
@@ -0,0 +1,62 @@
+; RUN: opt < %s -tbaa -basicaa -gvn -S | FileCheck %s
+
+; Test that basic alias queries work.
+
+; CHECK: @test0_yes
+; CHECK: add i8 %x, %x
+define i8 @test0_yes(i8* %a, i8* %b) nounwind {
+  %x = load i8* %a, !tbaa !1
+  store i8 0, i8* %b, !tbaa !2
+  %y = load i8* %a, !tbaa !1
+  %z = add i8 %x, %y
+  ret i8 %z
+}
+
+; CHECK: @test0_no
+; CHECK: add i8 %x, %y
+define i8 @test0_no(i8* %a, i8* %b) nounwind {
+  %x = load i8* %a, !tbaa !3
+  store i8 0, i8* %b, !tbaa !4
+  %y = load i8* %a, !tbaa !3
+  %z = add i8 %x, %y
+  ret i8 %z
+}
+
+; Test that basic invariant-memory queries work.
+
+; CHECK: @test1_yes
+; CHECK: add i8 %x, %x
+define i8 @test1_yes(i8* %a, i8* %b) nounwind {
+  %x = load i8* %a, !tbaa !5
+  store i8 0, i8* %b
+  %y = load i8* %a, !tbaa !5
+  %z = add i8 %x, %y
+  ret i8 %z
+}
+
+; CHECK: @test1_no
+; CHECK: add i8 %x, %y
+define i8 @test1_no(i8* %a, i8* %b) nounwind {
+  %x = load i8* %a, !tbaa !6
+  store i8 0, i8* %b
+  %y = load i8* %a, !tbaa !6
+  %z = add i8 %x, %y
+  ret i8 %z
+}
+
+; Root note.
+!0 = metadata !{ }
+; Some type.
+!1 = metadata !{ metadata !"foo", metadata !0 }
+; Some other non-aliasing type.
+!2 = metadata !{ metadata !"bar", metadata !0 }
+
+; Some type.
+!3 = metadata !{ metadata !"foo", metadata !0 }
+; Some type in a different type system.
+!4 = metadata !{ metadata !"bar", metadata !"different" }
+
+; Invariant memory.
+!5 = metadata !{ metadata !"qux", metadata !0, i1 1 }
+; Not invariant memory.
+!6 = metadata !{ metadata !"qux", metadata !0, i1 0 }
diff --git a/final/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll b/final/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
new file mode 100644
index 00000000000..3b5211e5999
--- /dev/null
+++ b/final/test/Analysis/TypeBasedAliasAnalysis/argument-promotion.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -tbaa -basicaa -argpromotion -mem2reg -S | not grep alloca
+
+target datalayout = "E-p:64:64:64"
+
+define internal i32 @test(i32* %X, i32* %Y, i32* %Q) {
+  store i32 77, i32* %Q, !tbaa !2
+  %A = load i32* %X, !tbaa !1
+  %B = load i32* %Y, !tbaa !1
+  %C = add i32 %A, %B
+  ret i32 %C
+}
+
+define internal i32 @caller(i32* %B, i32* %Q) {
+  %A = alloca i32
+  store i32 78, i32* %Q, !tbaa !2
+  store i32 1, i32* %A, !tbaa !1
+  %C = call i32 @test(i32* %A, i32* %B, i32* %Q)
+  ret i32 %C
+}
+
+define i32 @callercaller(i32* %Q) {
+  %B = alloca i32
+  store i32 2, i32* %B, !tbaa !1
+  store i32 79, i32* %Q, !tbaa !2
+  %X = call i32 @caller(i32* %B, i32* %Q)
+  ret i32 %X
+}
+
+!0 = metadata !{metadata !"test"}
+!1 = metadata !{metadata !"green", metadata !0}
+!2 = metadata !{metadata !"blue", metadata !0}
diff --git a/final/test/Analysis/TypeBasedAliasAnalysis/dg.exp b/final/test/Analysis/TypeBasedAliasAnalysis/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Analysis/TypeBasedAliasAnalysis/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Analysis/TypeBasedAliasAnalysis/dse.ll b/final/test/Analysis/TypeBasedAliasAnalysis/dse.ll
new file mode 100644
index 00000000000..6b44eb63842
--- /dev/null
+++ b/final/test/Analysis/TypeBasedAliasAnalysis/dse.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -tbaa -basicaa -dse -S | FileCheck %s
+
+; DSE should make use of TBAA.
+
+; CHECK: @test0_yes
+; CHECK-NEXT: load i8* %b
+; CHECK-NEXT: store i8 1, i8* %a
+; CHECK-NEXT: ret i8 %y
+define i8 @test0_yes(i8* %a, i8* %b) nounwind {
+  store i8 0, i8* %a, !tbaa !1
+  %y = load i8* %b, !tbaa !2
+  store i8 1, i8* %a, !tbaa !1
+  ret i8 %y
+}
+
+; CHECK: @test0_no
+; CHECK-NEXT: store i8 0, i8* %a
+; CHECK-NEXT: load i8* %b
+; CHECK-NEXT: store i8 1, i8* %a
+; CHECK-NEXT: ret i8 %y
+define i8 @test0_no(i8* %a, i8* %b) nounwind {
+  store i8 0, i8* %a, !tbaa !3
+  %y = load i8* %b, !tbaa !4
+  store i8 1, i8* %a, !tbaa !3
+  ret i8 %y
+}
+
+; CHECK: @test1_yes
+; CHECK-NEXT: load i8* %b
+; CHECK-NEXT: store i8 1, i8* %a
+; CHECK-NEXT: ret i8 %y
+define i8 @test1_yes(i8* %a, i8* %b) nounwind {
+  store i8 0, i8* %a
+  %y = load i8* %b, !tbaa !5
+  store i8 1, i8* %a
+  ret i8 %y
+}
+
+; CHECK: @test1_no
+; CHECK-NEXT: store i8 0, i8* %a
+; CHECK-NEXT: load i8* %b
+; CHECK-NEXT: store i8 1, i8* %a
+; CHECK-NEXT: ret i8 %y
+define i8 @test1_no(i8* %a, i8* %b) nounwind {
+  store i8 0, i8* %a
+  %y = load i8* %b, !tbaa !6
+  store i8 1, i8* %a
+  ret i8 %y
+}
+
+; Root note.
+!0 = metadata !{ }
+; Some type.
+!1 = metadata !{ metadata !"foo", metadata !0 }
+; Some other non-aliasing type.
+!2 = metadata !{ metadata !"bar", metadata !0 }
+
+; Some type.
+!3 = metadata !{ metadata !"foo", metadata !0 }
+; Some type in a different type system.
+!4 = metadata !{ metadata !"bar", metadata !"different" }
+
+; Invariant memory.
+!5 = metadata !{ metadata !"qux", metadata !0, i1 1 }
+; Not invariant memory.
+!6 = metadata !{ metadata !"qux", metadata !0, i1 0 }
diff --git a/final/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/final/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
new file mode 100644
index 00000000000..8fb5ffffbae
--- /dev/null
+++ b/final/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -0,0 +1,81 @@
+; RUN: opt < %s -tbaa -basicaa -functionattrs -S | FileCheck %s
+
+; FunctionAttrs should make use of TBAA.
+
+; Add the readnone attribute, since the only access is a store which TBAA
+; says is to constant memory.
+;
+; It's unusual to see a store to constant memory, but it isn't necessarily
+; invalid, as it's possible that this only happens after optimization on a
+; code path which isn't ever executed.
+
+; CHECK: define void @test0_yes(i32* nocapture %p) nounwind readnone {
+define void @test0_yes(i32* %p) nounwind {
+  store i32 0, i32* %p, !tbaa !1
+  ret void
+}
+
+; CHECK: define void @test0_no(i32* nocapture %p) nounwind {
+define void @test0_no(i32* %p) nounwind {
+  store i32 0, i32* %p, !tbaa !2
+  ret void
+}
+
+; Add the readonly attribute, since there's just a call to a function which 
+; TBAA says doesn't modify any memory.
+
+; CHECK: define void @test1_yes(i32* %p) nounwind readonly {
+define void @test1_yes(i32* %p) nounwind {
+  call void @callee(i32* %p), !tbaa !1
+  ret void
+}
+
+; CHECK: define void @test1_no(i32* %p) nounwind {
+define void @test1_no(i32* %p) nounwind {
+  call void @callee(i32* %p), !tbaa !2
+  ret void
+}
+
+; Add the readonly attribute, as above, but this time BasicAA will say
+; that the function accesses memory through its arguments, which TBAA
+; still says that the function doesn't write to memory.
+;
+; This is unusual, since the function is memcpy, but as above, this
+; isn't necessarily invalid.
+
+; CHECK: define void @test2_yes(i8* nocapture %p, i8* nocapture %q, i64 %n) nounwind readnone {
+define void @test2_yes(i8* %p, i8* %q, i64 %n) nounwind {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !1
+  ret void
+}
+
+; CHECK: define void @test2_no(i8* nocapture %p, i8* nocapture %q, i64 %n) nounwind {
+define void @test2_no(i8* %p, i8* %q, i64 %n) nounwind {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 %n, i32 1, i1 false), !tbaa !2
+  ret void
+}
+
+; Similar to the others, va_arg only accesses memory through its operand.
+
+; CHECK: define i32 @test3_yes(i8* nocapture %p) nounwind readnone {
+define i32 @test3_yes(i8* %p) nounwind {
+  %t = va_arg i8* %p, i32, !tbaa !1
+  ret i32 %t
+}
+
+; CHECK: define i32 @test3_no(i8* nocapture %p) nounwind {
+define i32 @test3_no(i8* %p) nounwind {
+  %t = va_arg i8* %p, i32, !tbaa !2
+  ret i32 %t
+}
+
+declare void @callee(i32* %p) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1) nounwind
+
+; Root note.
+!0 = metadata !{ }
+
+; Invariant memory.
+!1 = metadata !{ metadata !"foo", metadata !0, i1 1 }
+; Not invariant memory.
+!2 = metadata !{ metadata !"foo", metadata !0, i1 0 }
diff --git a/final/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll b/final/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
new file mode 100644
index 00000000000..eceaa2cf02d
--- /dev/null
+++ b/final/test/Analysis/TypeBasedAliasAnalysis/gvn-nonlocal-type-mismatch.ll
@@ -0,0 +1,91 @@
+; RUN: opt -tbaa -basicaa -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+; GVN should ignore the store to p1 to see that the load from p is
+; fully redundant.
+
+; CHECK: @yes
+; CHECK: if.then:
+; CHECK-NEXT: store i32 0, i32* %q
+; CHECK-NEXT: ret void
+
+define void @yes(i1 %c, i32* %p, i32* %p1, i32* %q) nounwind {
+entry:
+  store i32 0, i32* %p, !tbaa !1
+  store i32 1, i32* %p1, !tbaa !2
+  br i1 %c, label %if.else, label %if.then
+
+if.then:
+  %t = load i32* %p, !tbaa !1
+  store i32 %t, i32* %q
+  ret void
+
+if.else:
+  ret void
+}
+
+; GVN should ignore the store to p1 to see that the first load from p is
+; fully redundant. However, the second load uses a different type. Theoretically
+; the other type could be unified with the first type, however for now, GVN
+; should just be conservative.
+
+; CHECK: @watch_out_for_type_change
+; CHECK: if.then:
+; CHECK:   %t = load i32* %p
+; CHECK:   store i32 %t, i32* %q
+; CHECK:   ret void
+; CHECK: if.else:
+; CHECK:   %u = load i32* %p
+; CHECK:   store i32 %u, i32* %q
+
+define void @watch_out_for_type_change(i1 %c, i32* %p, i32* %p1, i32* %q) nounwind {
+entry:
+  store i32 0, i32* %p, !tbaa !1
+  store i32 1, i32* %p1, !tbaa !2
+  br i1 %c, label %if.else, label %if.then
+
+if.then:
+  %t = load i32* %p, !tbaa !4
+  store i32 %t, i32* %q
+  ret void
+
+if.else:
+  %u = load i32* %p, !tbaa !3
+  store i32 %u, i32* %q
+  ret void
+}
+
+; As before, but the types are swapped. This time GVN does managed to
+; eliminate one of the loads before noticing the type mismatch.
+
+; CHECK: @watch_out_for_another_type_change
+; CHECK: if.then:
+; CHECK:   %t = load i32* %p
+; CHECK:   store i32 %t, i32* %q
+; CHECK:   ret void
+; CHECK: if.else:
+; CHECK:   store i32 0, i32* %q
+
+define void @watch_out_for_another_type_change(i1 %c, i32* %p, i32* %p1, i32* %q) nounwind {
+entry:
+  store i32 0, i32* %p, !tbaa !1
+  store i32 1, i32* %p1, !tbaa !2
+  br i1 %c, label %if.else, label %if.then
+
+if.then:
+  %t = load i32* %p, !tbaa !3
+  store i32 %t, i32* %q
+  ret void
+
+if.else:
+  %u = load i32* %p, !tbaa !4
+  store i32 %u, i32* %q
+  ret void
+}
+
+!0 = metadata !{}
+!1 = metadata !{metadata !"red", metadata !0}
+!2 = metadata !{metadata !"blu", metadata !0}
+!3 = metadata !{metadata !"outer space"}
+!4 = metadata !{metadata !"brick red", metadata !1}
diff --git a/final/test/Analysis/TypeBasedAliasAnalysis/licm.ll b/final/test/Analysis/TypeBasedAliasAnalysis/licm.ll
new file mode 100644
index 00000000000..12a9c1dc564
--- /dev/null
+++ b/final/test/Analysis/TypeBasedAliasAnalysis/licm.ll
@@ -0,0 +1,61 @@
+; RUN: opt -tbaa -licm -S < %s | FileCheck %s
+
+; LICM should be able to hoist the address load out of the loop
+; by using TBAA information.
+
+; CHECK: @foo
+; CHECK:      entry:
+; CHECK-NEXT:   %tmp3 = load double** @P, !tbaa !0
+; CHECK-NEXT:   br label %for.body
+
+@P = common global double* null
+
+define void @foo(i64 %n) nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %tmp3 = load double** @P, !tbaa !1
+  %scevgep = getelementptr double* %tmp3, i64 %i.07
+  %tmp4 = load double* %scevgep, !tbaa !2
+  %mul = fmul double %tmp4, 2.300000e+00
+  store double %mul, double* %scevgep, !tbaa !2
+  %inc = add i64 %i.07, 1
+  %exitcond = icmp eq i64 %inc, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+!0 = metadata !{metadata !"root", null}
+!1 = metadata !{metadata !"pointer", metadata !0}
+!2 = metadata !{metadata !"double", metadata !0}
+
+; LICM shouldn't hoist anything here.
+
+; CHECK: @bar
+; CHECK: loop:
+; CHECK: load
+; CHECK: store
+; CHECK: load
+; CHECK: store
+; CHECK: br label %loop
+
+define void @bar(i8** %p) nounwind {
+entry:
+  %q = bitcast i8** %p to i8*
+  br label %loop
+
+loop:
+  %tmp51 = load i8** %p, !tbaa !4
+  store i8* %tmp51, i8** %p
+  %tmp40 = load i8* %q, !tbaa !5
+  store i8 %tmp40, i8* %q
+  br label %loop
+}
+
+!3 = metadata !{metadata !"pointer", metadata !4}
+!4 = metadata !{metadata !"char", metadata !5}
+!5 = metadata !{metadata !"root", null}
diff --git a/final/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll b/final/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
new file mode 100644
index 00000000000..c2407dfd4c8
--- /dev/null
+++ b/final/test/Analysis/TypeBasedAliasAnalysis/memcpyopt.ll
@@ -0,0 +1,23 @@
+; RUN: opt -S -tbaa -basicaa -memcpyopt -instcombine < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+; The second memcpy is redundant and can be deleted. There's an intervening store, but
+; it has a TBAA tag which declares that it is unrelated.
+
+; CHECK: @foo
+; CHECK-NEXT: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !0
+; CHECK-NEXT: store i8 2, i8* %s, align 1, !tbaa !2
+; CHECK-NEXT: ret void
+define void @foo(i8* nocapture %p, i8* nocapture %q, i8* nocapture %s) nounwind {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q, i64 16, i32 1, i1 false), !tbaa !2
+  store i8 2, i8* %s, align 1, !tbaa !1
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %p, i64 16, i32 1, i1 false), !tbaa !2
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+!0 = metadata !{metadata !"tbaa root", null}
+!1 = metadata !{metadata !"A", metadata !0}
+!2 = metadata !{metadata !"B", metadata !0}
diff --git a/final/test/Analysis/TypeBasedAliasAnalysis/precedence.ll b/final/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
new file mode 100644
index 00000000000..47cb5f2256d
--- /dev/null
+++ b/final/test/Analysis/TypeBasedAliasAnalysis/precedence.ll
@@ -0,0 +1,46 @@
+; RUN: opt -basicaa -tbaa -gvn -instcombine -S < %s | FileCheck %s --check-prefix=TBAA
+; RUN: opt -tbaa -basicaa -gvn -instcombine -S < %s | FileCheck %s --check-prefix=BASICAA
+
+; According to the TBAA metadata the load and store don't alias. However,
+; according to the actual code, they do. The order of the alias analysis
+; passes should determine which of these takes precedence.
+
+target datalayout = "e-p:64:64:64"
+
+; Test for simple MustAlias aliasing.
+
+; TBAA:    @trouble
+; TBAA:      ret i32 0
+; BASICAA: @trouble
+; BASICAA:   ret i32 1075000115
+define i32 @trouble(i32* %x) nounwind {
+entry:
+  store i32 0, i32* %x, !tbaa !0
+  %0 = bitcast i32* %x to float*
+  store float 0x4002666660000000, float* %0, !tbaa !3
+  %tmp3 = load i32* %x, !tbaa !0
+  ret i32 %tmp3
+}
+
+; Test for PartialAlias aliasing. GVN doesn't yet eliminate the load
+; in the BasicAA case.
+
+; TBAA:    @offset
+; TBAA:      ret i64 0
+; BASICAA: @offset
+; BASICAA:   ret i64 %tmp3
+define i64 @offset(i64* %x) nounwind {
+entry:
+  store i64 0, i64* %x, !tbaa !4
+  %0 = bitcast i64* %x to i8*
+  %1 = getelementptr i8* %0, i64 1
+  store i8 1, i8* %1, !tbaa !5
+  %tmp3 = load i64* %x, !tbaa !4
+  ret i64 %tmp3
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"simple"}
+!3 = metadata !{metadata !"float", metadata !1}
+!4 = metadata !{metadata !"long", metadata !1}
+!5 = metadata !{metadata !"small", metadata !1}
diff --git a/final/test/Analysis/TypeBasedAliasAnalysis/sink.ll b/final/test/Analysis/TypeBasedAliasAnalysis/sink.ll
new file mode 100644
index 00000000000..fd32d6a7a58
--- /dev/null
+++ b/final/test/Analysis/TypeBasedAliasAnalysis/sink.ll
@@ -0,0 +1,20 @@
+; RUN: opt -tbaa -sink -S < %s | FileCheck %s
+
+; CHECK: a:
+; CHECK:   %f = load float* %p, !tbaa !2
+; CHECK:   store float %f, float* %q
+
+define void @foo(float* %p, i1 %c, float* %q, float* %r) {
+  %f = load float* %p, !tbaa !0
+  store float 0.0, float* %r, !tbaa !1
+  br i1 %c, label %a, label %b
+a:
+  store float %f, float* %q
+  br label %b
+b:
+  ret void
+}
+
+!0 = metadata !{metadata !"A", metadata !2}
+!1 = metadata !{metadata !"B", metadata !2}
+!2 = metadata !{metadata !"test"}
diff --git a/final/test/Archive/GNU.a b/final/test/Archive/GNU.a
new file mode 100644
index 0000000000000000000000000000000000000000..4c09881eb39dc513184462952d414396b3f4a0c0
GIT binary patch
literal 4210
zcma)92V9d!8^4zfLrB07P{5$CLO@g!0%3@w3=xp2fJH__7BeK61Q2T#L{zXY(28gQ
zw^9dG>%jJNfFcTtKtXA7wHBpAq}p2Sr@ae#CE9+!ug{P7o_YVf=egtVGI6F@Elr(G
z=i_gaK;Yuy<>n@w?t<g+50YVv<3bN2K8X$hFalsyIQm3!;X;KXC(}Jx6Yd)xkPO~2
z$N>M{p$MS=!QF&}BZ;0PSLcZoDrJgDlBbnRRWi9KIZYuKDaDy`o<z#0)8#pGr9!TJ
zGlo777k5t_cXvTK47V^n4<SVMBEaz^qAy(~lNmzb1cp#R^nM!yLhC6>-<MA350{H@
z5E!o0|Bm75?vCREp$BXcCDCF1q^^E(sqO;bMGjGmN>|iv2SD-q>lLpd6|A_QP!tEA
zyuO>I4b=koRl@v8N`!e=@0p`#zo?DGm~^qQymJ4c<DnI0&Ha^oI&bdZ5nO%S#|E@@
zTmk?as_yJeuPO_R8r<%<<-N#!CN<v`nyab#z=jlM=NA?>%cuD<qqD6I2;suQhUCLZ
zg+$G7WMZ2DwF1~c1{n5pe1+xfNplA6=7xoB5dx;b#+oT?v$4kZueaR1C5p~VIJdN7
zP#L#y<_@#(1c6N(KP5Tu0_|@5eDTUvq~#vZn$N|TeoYg$TejN79uDnL4m_{Q&HqIp
ziodC`;hg4gV`}cKCwn^9zM68r*eN#RRKN;J&lZc{f7u$<{G!=eJRoyD;l6<)wOi_3
z!Akf#Jq({E-&V>=*}jfj@>M#cXeILxbN_FezSx$V*V+<q=M{5U-S7A$$8w2fyXi4y
z&GC!w@ompDtu2;*E|}M_+$Z@GpZ?H)?SmfrDt{+av6*cg9-sGP)uzgCpQ>`I4s84E
zJ~J~tb3?GCf>uHEOuy65EjnK9o-wbnHfYb{X|ufT{3IdwI6c$6gqGL#b$0%D(YGD`
z3qxycG74k34@106zY~A^)5_q2t3C}uf_!(YT}9pZv>VfJGXz!#n8JF`eY;Bs<4D^X
zqqvckeW8iP7C4(McP@Lf@kVfmPgHeeG~br&Q=Vj0cQiJB^TkyyL4BcBAEw1moPMLE
zmGuKNaEkTS;>IzrqV|`U6bfJ7zLpx_z-`=@a4AHpWDK+o)vR3F*JQ)JCg~ivb#%+6
z#BrRH&0>zl=NSUalFO3Zi1F*r)UhsW)>Sd?uMW#FeNMk6=`$I-k}Q7S!Q`m3B%Zm?
zH45%S$HZHtsi{A7pC7&1LdHt-&F{`yx_;TIFEMf{eP_$5__g;NEd4E>4}H`1%=!EB
zhyNLKd79Iy#C=J=rUhZ!TpKvEwfktb=0UOj4ce6IgFQT}hJ&<VhlB`w>vQ27B>0$;
zN6E8q%#FDFXG3}8!ofe9<~bxS^mIzMjP(xjYq{HNl2u>e;#qLj`2*#IO*tDm3Eo~O
zo~E6Xm$la1jTa}48H#>%@xqQ1Gq;B3Q+uW<+EZiku9LeRN^B!Z2QN)4w|t%Z>-aey
zT?ySbzkl36_oD1<sfN<DfnCo#`DsMg5M_E;QKi$F2nt?h^t5&2XG6D^e|&+h*y6LZ
zwrSv5U34+m+I)W2M+>gNEwSUsH{1v8_DPv{e?9E|<4;$;%uXjRzw=3r4Rgj7YdSkJ
zVSd0=c}so!&9f$chYG`1N`p<lD*k}~X+l!YpKi{RYI?e23bSXdrU-5_n%?se?VsH`
z@M@h+V$Kfo*uK354<=p~onA!^S$xK?&gG=Z&Xs#7AK+O3z?g5GqOtd2bNALTS?|wV
zAYGpQ*l5pUlbkC;`6EB!;3f|LsMz$`%Y*S#8kZN`CvSB1wb;%%?P(fw;`V@JEw|@-
zKda$alailhikCG4u|wuH(Glg7-|WmhvnMx|1+I7KV5cq`GAlo9o8CA}ayKaOSn}p^
z9#_UMVTDi0$PpY_t13S1=iM8~Vtk~su**J?RQJbn+g%|y>m}!dE?=<S`643v7@qZ_
zdiwjSicj9p8)Lof-sjz-z1kjLm)H0GEt1Cpjg;$nOl4KbjKrBuW*KRsCC3i+O+0#t
z`tcLdJaf9;LN!Ua%ap_2-LdECqqraOv1{E;TzK<2m65`p#e*kT73QCqd%Y_xa{Y|0
zM_=BHioQ+TcH?YkZDxTO`|^5O`~xe6+teU8XIpdjnyL$X9K!926m>_+rnq;9^(7x#
z-8yb{H8=BFx|}iA{0cq#&;qA9KL=0VUK(BIT0Dtm7C3OZEa<%NI{Q6Km}MW%KS1Z!
zCzN(u=C@Ndt|?#M;MI0_1M2x$Yf|{KGxaW8*RbYX-+jD%!qZ&)s3K!y$CuWd=(<af
zr+WAH5qiSg;Gb?ES646nJ&{CT5fC~T!!UH_>IefQFroo)o-}ONoyuS`umt8Hrubpu
zPqcLbv|_UTqd(#8(A=;|XS3=Jp!Fuc=nZ($%OO0lJI2CBQ_W$^rLj#&G<a{@0}{=|
zWVQ(ar0E#OCegf2W<Z6^gUWl1VRelUEtox%0RV*pg4qC2dD!1vqfm!HHVZSyeBgtC
zMuE5pDwu5qRbw7D;scXlTJXUDiL6V|StkP!7#@TBPo0Xp@q|1VK;~zuRq6a(l{$lu
zUKD&ucA7%Q=kbO$MVf>km7SHPQfv9cWtV1U0VDJ{z-6jzi9!w-WTP~t7LX}M(p0e;
zU{oW$gGMIZgJU98fZ3AIKpE3v0BpiL@0Dbbj=U5U52!>G{!WxkAC-rqfKjqSC5GdY
zM;XKXHtK8uz`_A6hL#M+xQBu^(||D=*#ds30?;P|7>*!}HZK7{gai<XJlIQ9I&5{D
z1N+xy0bryqx;j&IMJJ8Nq@AaXm=F+*hUn-~fdOn8jX@@_g>h&;1fe<mn$n7l{FoHN
zj<~gfeG-(1SJMIZehjtYQli|4C1?ViH&JAy>WEl(of7Ep?~PA}Tfj-b{(z^+%hZY`
zkZaY$Je82tas^K-U!ny(nOG|ZJc&jFMY$w9MI@F;)bbp_Lm|VKG-a~NP*<cW<%W8)
z8onM44O!U|^q_>0BOqe6Seb%MN|jd5gVS`@XvNYDz>}&nGv#oX{cD;4BhhkD^*w};
z8&-m-&p2o|*2f?g3;$iwdg=BN<R}iB02h7;Pk%fB8I%o)z?%w<DE|M&xxg5NM-akW
zO(>8PK0gB)x%=UVb^t+yI2#%a^GJo0@PRgz2dV*)#{wu5eV_<N!%Zbcf}RK;{F(6<
z-a&|m@<Mos6L=CR6Mdivv=DC&67+=P5PbkY!}--h5FFYN9z43<;Ax<2h<6&|IY1jh
zgii$xhInlT^MhyT8@x48Ci)<ZI}i^?f}RLpA=C}=Zb3X^ADBWPk?$u^Ci)<VR}jyT
z2;Y8aBxK}AapnergpUPn#9l$}(NH<|4{<2oa46a+7z>T)oM6Ha%@55N<zR?s1@l0A
zgCJsDLMOPPoeZA7ChUm6eM2JMa0H|it32F`1Sp2xp&#wQ5hCe!kfDxtzzC55TEP(_
z0ki@mMABtysIvgPO?8C!lR2zBItd9vbwU5X#F>ix9)uHe<Tp6Q-???*k84qyMk!YQ
z!>??;2X;qmU<9Y>h&Tj98Dp>vC=(2=72<FK(HsyT)bro;+!QVVvYYF9wBBxoI9x-N
T5AwU{b)jC@jRPBk6XW~^w%_U7

literal 0
HcmV?d00001

diff --git a/final/test/Archive/IsNAN.o b/final/test/Archive/IsNAN.o
new file mode 100644
index 0000000000000000000000000000000000000000..7b3a12a69d747d1c191587aa934e1b1d949f28b6
GIT binary patch
literal 2280
zcma)7&1)QG7=PZG+0CSzgk)DCVzo<YYbA6x;3`<9rj2U|NDgX11YKtKBiVMp*qyB{
z2p)>o;z4`yPY`c<Q*VMNZyr2&iS#0<c+rCt6xZKxW}fVuD%b~Rp3nF9zR$<J^LFjU
z7mP8YWW<%3Ci0b=da4OKD-TLhc5nT7<5&Ovnt#hX_0wO|>GYegKTK?^2Q~lT^}7eP
zy9dYBgLP)bEa!`OpY}@y*B5b4iR%{%7YZWo)5ers|9s&YC|&^NxrJ1{x#4e@o?Q^h
z<z%}gk`K(kTytEoRa!AcvvEws&jFWE+a(9;d|+liIf=BKBnvM|@LZahZ0+opH`mt6
zPXyIqUA)S0G`L)Ob1=G6>2|O7D$%6VZB~LH)w-Ri^73Rj9E{@1_IT&wPS6+*#pw?E
zd*$X}5_Ma$;5nUsEMCrO?1v*Uc_(xDo_hy-5>?D2-j|R~f+6Na{k5Gc9Zx2PT3asD
zn`6A>HL841R5OdZ&M0x(-9d<by}5bh_d#M4LE*&g`fcoShe5Y}ndi<A;9&*eYl<lo
z!nu7-q)tJ$=rMmT9m6{r4s+jS5t-E`sdFwVy8EbUeD}Gt2tZi|I;Li7#JuG%c;0)6
z<9U>L=8H@IJI<xWoZ87Bvg{`zr`<e*)jvZ^OUc|*2_`vy80Aq-Nc)~BTbrBb%V#kH
zk7xatV0^t7hY@%@O8LG@qgFSFTi0R<nqeGD5RJzWThU~%9!AlqbyWg}q?S&<J+Sp|
zr{A*m_9*NfF$^bBA0|M~K*CYj-=nENh+6@Ud2SqsjVlr~2EASjQ+D5U0+DP-K3h=*
zw=1Fbc^LNjEQah=e6H+QGDqNygGo-~AzpSpNE5P6inj(M<Nq&i9WlgHBHp{IfUED4
zCDZ*29!`LgiWgw8^Vr8BHeh2Otbyk75@cNiQI;`O4GI~lZwI=K_X+Tr7xBmyFM_OV
zAW96p=P6{wIMN$<Y`o6^QihFqxVjGU#*l5iFMxLjHYN29V6gGNwa$k-^bqeRWL-my
zAAnb;kdgXsK)3O}2cFKs5`3EPN04<5A^rxQP3rp?MwRJjT+s?t-wJFxD|9bI`Sd^H
z7;kUo&mk5@-V=*>cs`z&IoNos$b++?r2DF_+_ro2+f?o3NB@x2?H#CVm(x|Q<m~0}
za{_1NCKF`qoPZg*!Vb>J6?R}oZj!03uL%C832~l^s61~%CDw)CuimNjKc!A^`mJ01
zp1)38>h;dJANC;}{cAPVgXQdjgJV8M4nSsXPzGjV*eh~0kY^zOJ>!35d<hMpy_oS=
YGW#kyddPg}U(fVvrYHO0rCj&<2V<@IEdT%j

literal 0
HcmV?d00001

diff --git a/final/test/Archive/MacOSX.a b/final/test/Archive/MacOSX.a
new file mode 100644
index 0000000000000000000000000000000000000000..8ba1e6d30e3df3b52b7787061a383088d40da926
GIT binary patch
literal 4166
zcma)830PCd7M`2U5E3v16fkHM0-_Q~NZ4G;Dj=%@7Fik+64sDl5<sj)5K+OpfmTEd
zxGQymYF)5>E})2lB2Z9T+**s$MWotV?9<MKTuJ-d@AZ7&owLqA=bV|$Wa=D=Iyr3~
z&6e*haN&#*pYP)0;VIw?_#BSN1rGWVn%#s#4oBdD$0pIlVqVmeF#mx0oT!LJ(E<Lz
zfFLekfQLe^!*z9a5xL`tqv&sO-zJenljq8n3b|5;rk7Eke~Tylr#ubtR5BS32S#NY
z!gKvoegZd<z|GBF1ZxzzAkXNC=f@X%!pspR(Q@VLe6d2MOchJ>wen<@OfF7ISIEUm
zNtT=^O$G`@ZnD5PE=|PuSz;fxq<m$=4geHyzFheRQqjr>2_<ph>6?4m+E6VJtriwW
zk|WG|`p+Ia_gQ@;#-K@rmDL9hp9rn0XdSHH+kNZ6&fwZR-qxU_>oNe?NNsm_MomRn
z)bI|6t?xt@GAM-tXs)Fc0&8NFt#4S^T<_K=^zM!hAVeJ$0-`rN4HBiWnSpHvlqz5i
z8KB$w`v@yH5dDX3=ZA%D6#@p|+KM6Uu(rYuY_Qm}HHyYdIKRAVSQ)o?&Q8;B_<=2(
zJ|;Tt2A!_^eK^&ti3M)YThGUre@PW~TC`io9trJI4!x+!EBuKsj=!a`W}k81&d}W5
zKyr7ee?8+usbg%!>425ezOClJ{<JNs^<}G*WJo4BDcVR*wq5R2#Z34zBaAawzP+5C
zx??@J?28O~$tuQgW`19_e6~F=zr8Kq)+6SKdeGr%uEjEoPLt!xx)YZ~@f|O+tjw2x
z%3sh_;GJ^WnfAzU-NQcGYClI4iK$H-CqDoCn$6W;KU3w_9NhlN14dRx*2Z9I6}5`$
zo^f}OTXLdSl)0d}K4|aKS#!N?eWfAy*?qG-gcjHLcX$7H$=6+ei$m+IGmB%mk3u|5
zzL9+W!>Zt-Yu-&k{6dlC?vmd7+D#dE=zPnA3}K`D{ypWxal{?;aoou2fzZTKa}J9n
zcdB^0>1J@3cT{a;w6hJ#yE2K;a4a@{%ca$AK?9*R@2AI4^}Jcu&isxMIK%2%Y4e2F
zQ3oo^iiNN4Tu+N{;x_M3xEzwKqz`qB)U8@R&|=NKF72MQZG7A1#7XQ^trE8Rr<r_<
zvMbWOh{@~EHZZSf*4NM<tO?6Bc|p4^9Wb7_iX?f_#bB$mrS5qzGz#t`hs4{&nQ7nk
zUKqc{T*gfIDeTQ&zG21b&oNRtZCBgr_;nAOEd0!0jC|Ge-09oONB@~{WtQXV#QjM=
zCPiV}1x@UE+Wpjev!K|)CT(i%p+25v(;;fGeL{qt)%oy^QqF|3$0_q}&X2hFdsAie
z;^E&~7T6~(c6ZFMi1iBbZM)ZRoZVRD;$C#k={@C?&AFS{30@v2pQWFdSG3pNi<cx#
z7>Ry->Eh0lbGC&RQu<~oI@4k~J*W2Am)S%T4_%&BY4IlS=gIzVJqf+mzkWD4|B~!n
zxrW@bk=4jM^>IYc2-!2Nq}uUp1esGqc-B7klabp6A6{fBwtDZXZy9>t5M9c(GFzDa
z!J@11NbEfN755>lb6VEDpO1Kb|HCy8(=&+$cR#ALX3V~7MPo%KEDV?_Z)@zlb<WuL
zaB<kG<Y413O5bz-I3X$bcUPxrb$vZC#W}OrkomXhE$_IA56o*HdcEE{F?T0v;=sP5
zhf}YJ&#b0|EIsSn;Bv}%*Q$Nf53;Skqc1c{)!4bQxcllD%y$<oN-oHGLfE_1IQObh
z{@7PIyqWEMOk(o<)uH$q%>_jdNSg#c<~!JD+)ZLm-WhVJ=l0zgWH$Y5T=t_(@v4q5
zvCq0LKB|2Bi>;}9&h(awzzz0Yth6N~rj<u*GMeW~?*#=OPuVib?ds%Z%<vhRx%{K+
zRHa9Jz4`;0^bb_#wmB!08h$IV*&TALQF<Zh%0-)9FC(IlbFyF7dcLcw`sm&K305oa
zf7&bFr|skQcziq9CVdjnOuoU1sjdl`oj9k(G&5bi?D*k<smBgeK71-(U`DfDtR@O~
zo3Ociy7oSM9QQqE;yRJB3vVI2I#SrTbokWj;=+^jZ}fykZkWC8*sJ?d(RZlZZ=UO}
z&nl8&pWmp6e`u+2of+inWMjr!TXS)*eYkCjqTy)83{h{`K+54Y?UUBja<iUi$mtW!
zuF|3pFLLz%F?jlp^5_ad=`^Nk;Lw$dpbI|h?e;EXRJ_0NAdTCYP~K}%*h$d{Qa``R
ztMBavlnb#|#PAho8(p@oW%}RPbE0y}vpl<~5+fsrS5}+<;g#`l7r6byt0nN%KO#xA
zU`@DBI8UW}wN?g%&c`rJC!+xaMEH%-1>&fzyP?5!U;)fPOzETIAE@gCsHG&k$G^ju
zqP1zW&Su#kK<!U_*&pz-pN)GECKDS^F@qzQ$}%QW0U+1`BGuS<o-qJKPYh!bsb0pj
zp+e$8<sJH{y4IT-%$h?7fJ_F#EC47x?0;P2(1bu16EnlSX#h~k5EnrOvj|W%;$dSx
zFb$SP&=o_{73icdGB7-b<2Q3A$CW4Kxd4)Lwpx|poTpM}I>Xm7%UPO}u8=wNc%zyk
zUFsZ_lbx+nYn?~in4Fyr2<VCB$W%E}g&fdHgmk4AkjRAOG>IBu6oTGCB@yq#H4!Sn
zY)EIJjOj1{Hbop4E<_HI&Jd6SGLV;y;sFJZ;`}8_rjN=;QGk%5P)Xprq;W>DzD+tC
z0I+cYi=n2#HSVLJEmUBHRyKzp>Hze~0LDQOge}Ve5F-HuA`kY;ga$|5;lTcNTL2jA
zi>}XPUDJt^vE=U41O@~I;~_fwgMk5T1(i-Bt%GqWADmFmJ|@%>f-i%N+Yz@iuup^X
z=x*A>*-xN0+DcUWs02-b{Wgk(R2|{_u2Ta2{Jc2R;Sq4u??2#a^0TxODdbx9C{M#B
zwOql|%9m*YPbSey08gsXKv6EuNfk?^Qnfr6@KDI8C0&`KGSn66O1Yt)qK5y$3=P>i
zQuMbBLXLq*)DmSXGAUJBIS;1kq|r)}GXXDIm6aujr|e(T6c~xNgQA}ygxs(bcz-5A
zyOBNyu~_(XMf;^YN06g9XaZdLAw2!{0Ax@$Bph!hG@|(b7v}<F5FSnlZw;<Mj{AHK
zWaJ)%A36az;o>}KFsvgDCgBZjs1DQvypBas#^*p0jE9FR83}sAec@0y#5)A>P+bTQ
zaU4$yWqb}4ffnNVBSB9n4$*t?GsLThARK5zc<}0ai>HCIA>J8?XAf;S;XZVm8{%~s
z<OlE2w|HxzjL$(BcOf1J33|eP#ZWiIyAAR1b6^5}c)cG%8J~k7UPC-X!hHvz5tor4
z#hDod;yxy{;b#T8$3x}#-^8JK!=<R>U@SDEdx8N!lpo3$)nJHc3F|;-11Ef4T*tYg
zodlk}!R?6u`GrKf;R;B{cX@Oc5s(e1Lq9r!V?@-QAVVFUfH5Khw1Z<r1ZW4wh^VX7
zP-g=8n(7FhCo|Z2bQ9u)`htGH_??RUZnzV2<Ttp*|MKdj0I@h-qm(G2@aI=chI`<2
zv<C#3rUT*-5LJx9HlRu{v{#734MaI0KCI`z=(!2p0Ax4Q^Ju-@5^=bPs2=2Z(d$CJ
Mu3HBVILFud18L~rE&u=k

literal 0
HcmV?d00001

diff --git a/final/test/Archive/README.txt b/final/test/Archive/README.txt
new file mode 100644
index 00000000000..6810befc585
--- /dev/null
+++ b/final/test/Archive/README.txt
@@ -0,0 +1,24 @@
+test/Regression/Archive
+=======================
+
+This directory contains various tests of llvm-ar and llvm-ranlib to ensure 
+compatibility reading other ar(1) formats. It also provides a basic
+functionality test for these tools.
+
+There are four archives accompanying these tests: 
+
+GNU.a    - constructed on Linux with GNU ar
+MacOSX.a - constructed on Mac OS X with its native BSD4.4 ar
+SVR4.a   - constructed on Solaris with /usr/ccs/bin/ar
+xpg4.a   - constructed on Solaris with /usr/xpg4/bin/ar
+
+Each type of test is run on each of these archive files.  These archives each 
+contain four members:
+
+oddlen - a member with an odd lengthed name and content
+evenlen - a member with an even lengthed name and content
+IsNAN.o - a Linux native binary
+very_long_bytecode_file_name.bc - LLVM bytecode file with really long name
+
+These files test different aspects of the archiver that should cause failures
+in llvm-ar if regressions are introduced.
diff --git a/final/test/Archive/SVR4.a b/final/test/Archive/SVR4.a
new file mode 100644
index 0000000000000000000000000000000000000000..3947813ac60a7ec91b873be98819d758915ecfe7
GIT binary patch
literal 4214
zcma)930PCd7M`2U5E3v16fo#j2#89;5*BeOiwMZ7fJK&uY^)(c5<sjEK|}@X23ip<
z;I7mKs&&Ekxqu=Hia<eWaceC~7m;dfwNKwnk_)M?-)qnJ-8t+0Gv~}?W-@uUI9r-J
zmm$FaOfD|Y&R%ZrZq6P!?yPkgwm2^IBI1)6000sIV-}%L6c;X3C~`C1gVo`_;Q`6u
z9fJ%83*H&*@5XY2bI?%|BUhfCFH)$KDI!U}MlMy!<f7y>g<PZ*XUh2!sel1Vo-0=>
z<jRpzbofFKp(~EVw6ra3knAB8;`m4^coNZ<p_0iAA@IM$@Fb=PF?4+y41um>BcA_*
z;py%U=OgrhEutg_tfAD^FD}*H#dnE))ROX*4Lbl(y!~qBTS!GKA0(8-fv0cpWobe+
zz<sr_Fp?T!*3*CX*tyT^BQX|3EUc_PaQH-MRYmJy_1^AV2X+S6-tn;p9bK0Jz+TmM
zcc<4>ghdVSaM=1@WFd=I=nBoXv_fD_j<WR&3!CH9`h?lt(E)^Ty<kK3;if{O6*jZ5
z&45+~tRVwTyLrCC$_?as!?yFo!nO(l%f;G?CG4=a!VYY(*s?W>!B059ylPk(w|MqW
z({Eh@TQ+?{cG?X(-S+$9)vL(`9?x6P$CrOa7j{~-TgM&=?NScCsL3n**+mq8OKr_P
zBiPPT-`zm*bf|wb?Lw(zY{cn+m6E=#=D+>CEvofptCM(0=6ce7BUNg<+^LG4@Kt&k
zK1aU2oSU*^J+JJ`bY{sa*6(KiU$=a|Jukn#E#B5E=1BIS!_!=gWfq+#$CY&_F1g2d
zyvVdNU;dfPf~EqW<jVraBmZ>|`xvYJ9ZkfhHgR}-{tq>qtG{`s%B?xL{nH1m%=FBS
z!ICO^72Pxa?jWz^M6G+qg68_5y-R1z@wWAogxu%$&F~UhT;Jc_{huY@bonn1t+UQ3
zj^RBD@iO^V{LPQ6f{U*CGzGa7x?Aop>AkPnlzxZlVtJ4yZ1mi}r+he$yn{K07g;?J
znpkR%b0~7Bil>`y26y>H)ka1OY$!gJNu-8jvGH3jt!@h%2(9@rEq?OMn`Q0n?^%J<
ztge+dk9!k!pt7u3`1;QE)c7V|^Zta(AyOrCsN+@Ls^tSM*1YSI?g`t*wp~t~z&+I}
z=9+(&;bKvCMUodWasAl__7(N|8s>vFVHqYb7`G(@#^YB}#4oy7-0UogXWk37g7?TF
z@iuvS>i4}D#%?i}vD17Dd$X2rSaJFbj8e|n)pk05-Ge3zfAbfwzV3PM^j+nne~-H|
z!|`<D{v=<MqOk3*P29Pf{q%aXpxD7CO-k*dKE7qsA$qWVLWG^w`S6Vrd|cV%<heKJ
zN8J0Psj_+T@b4`P?2{III;LC1dWZP6-Rn2bYAka0EV}0OfpXI3+)dmBZ?BWj($32(
z+UxGci<8E^ihg|Q;?9$^w}lqc`erCPQ)BU-Q+w>oY$C~rF3+g6c$@dj#CaY)3BA_8
zeKa`#lI&c$n%c6F)5t&dNkq>p>ddf`YR9t?RJ?}ttbOvQuWlE7bdjUj>a(l9W$1ZB
zbScluY+=^F7F~r~V&~DXc@H_AQ!?-Ua>V<GAFp|to=GgY`*EE$Yt~gO1}8FMVZd~G
zTVv;~bH;v$i^En)gN?r|{Xp<ZLQ?J@ZcbC``g&rDb7rlfy4+&6yyqc0Ft>f^&3fy^
z+?|y11N({|PQD^KvzivN^sHZl^C{z9tM*Mj$hG>OxzH#@ZRf$^?W<$4-(RpuT9EUE
zw0Eg-?p2}uv7c~wGgoj-Z1Vi|q4;Ud1w{`in_PX(cW}>mn#7#EGvrXu>$@??Zu-Tz
z>?fJxb)AdYKJ&WhsPgHrwx*srQ(GzmH`sS^QkT3ktvq6r-aJQgFDUSM@|Fo6S0^rG
zhfmAMbve3DReHqFyFZZ4{FlnyHs@qg!|w$)yF+d@N-hLlxoETNWkmFGJnLoc%=cAQ
zAHSbJ&T7T|&w54sG=2OYukQxiBu@gGsW<SL>Y9*QiL+ZwGtxxMjvpSFeC#moqo<+;
zW(?cK*<|5v6E1I0*WPE3<9@)$uX8tc<}c({M+*Cv4xd_GTzGQ+jh?W`4YRf#dwoAD
z`VM{j&2!!LnMGpkiyIa34=ojL(}UccY|J=oYcB4!54SB*G#ss%=H44NkbHPe`-C;M
zyv*n6a^`rmtBmNwiyY_u6g+iDd31$q=@hnU;Lw$dpbNh1?e;EXReZScAcNPKP~K}%
z*hy2nrhIXeU*FpcXcuCw$l)u_Hac%x%bs^*&xy)O&+_b|N{oygUR!NuXfHm#>L1@e
zuC8ACdm@RkG9Yw5hGFQ;)e;8CU{nL(0$FF*p2}b<umENtru0$qkM#8c^iqo5<3Hf-
z(Au<FYqRVRp!X-f><@U^&m}ytJI2Pw(#&AXqjQYOba;2$0W#g#c&;%3<e3=8A=AB$
zXF-L+hst|Qox0YC9?Y4|1b|8f!5jc+eC&T*V^D`c4jVJWeBh&iPKCG#Dwsopsu3R>
z^?@lcE%<1FMA0Vbq?G{(43EM6r%%V-_(Hxjpa`<ERq28}Rd$8|y($EfoHT_@z~}2U
zMVdqqm6MgF%GL;UWtV1U0TOy1;4)Q?L?H)E3Moyg0Te1pnkvo)7>%TN&?)5ma7=^>
zFdNEQC}Ua-fK7Vm&5{h#k(Y|%0gZ^l--(jxqw-M{ASEkQVmK~kj1kOllhy_REF8dM
z=*e)5`zUA&9T=gJ&EbbC0DUrm;Ru4XWf=e>B!EEV!Csp%V5{96*gq}{0Hbx$)|sj;
zI(Z@{?LJLnK|nATqN7Iz2Cx-$CWW#N#-aHTgy!sPLN6itv8aR{aVrD+6e#Of(;oJI
z0ySMJQSLekngDksih@)v5$mp10{#8H@u_eNIO^9Q@YVU58nFa&O}37w5>mEY!Pm%_
zX#ig))`$ULqE<suF3Cv|i6xS3c`o3i5S=AWnXEF@6=_Pjp`M%#-;aidtQ-k?QbNd4
z5OKCxnSxA8l}65o({xg6#L^7Fm#Q)|<#3n%W10jb(Q?rAJ%o@OR)VO{1ZX$X#~>C9
zf39e~wEGBh6bDU!2S0?TKOTS#%7#SXO@~Gl|95fDFb3ffgz(l73gm>(&p<}*LHMB^
zKoB9$g$BbsQsE?gpbh1LYCz-x|3ZvlLlKOHn@WlVJrQ`}P&dRo1o2Q_2oG@rPXcA4
z4-|n0;>|;Xo=_a358!8rR}VpOXhV4L=o-OOL)j4T48*gCHi8JB3K|UYIt=Cq&(IOP
zwNNJdAdI^Z4@ZKY2wySO4e@S6JYpZ1Kp&Cs$51BvAc!{*&yWb;0ca#-<VSI427!c+
z4Q<3;LGH0oIsO-MDBf@=`WP4sjp&?U!4J(3%@^fhh-V4&KzoBAVq8KexS^c_p1mdP
zi2wP9M7rS!NGDcV*NY6OhTWkb?Z8nYYj=>Lj&{H(kpWu4Q6dAh0;5FMW@@Oj0lZDM
zg!YpetUNji2|{&2zhB}^MSc&$2|4l`oZ|1?I`9v+C{3*tEC1r3w%!A~qctGGX*wVd
z0a3;nECb2}Lu-XNTtGAj#E13#S3Ng@3xMoqdLFH}TOtnE5aomX&U#&_*R|uohTz0F
F{{`)U>mUFC

literal 0
HcmV?d00001

diff --git a/final/test/Archive/dg.exp b/final/test/Archive/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Archive/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Archive/evenlen b/final/test/Archive/evenlen
new file mode 100644
index 00000000000..59ee8d552e3
--- /dev/null
+++ b/final/test/Archive/evenlen
@@ -0,0 +1 @@
+evenlen
diff --git a/final/test/Archive/extract.ll b/final/test/Archive/extract.ll
new file mode 100644
index 00000000000..714c5f1ed98
--- /dev/null
+++ b/final/test/Archive/extract.ll
@@ -0,0 +1,16 @@
+; This isn't really an assembly file, its just here to run the test.
+
+; This test just makes sure that llvm-ar can extract bytecode members
+; from various style archives.
+
+; RUN: llvm-ar x %p/GNU.a very_long_bytecode_file_name.bc
+; RUN: cmp -s %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc
+
+; RUN: llvm-ar x %p/MacOSX.a very_long_bytecode_file_name.bc
+; RUN: cmp -s %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc
+
+; RUN: llvm-ar x %p/SVR4.a very_long_bytecode_file_name.bc
+; RUN: cmp -s %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc
+
+; RUN: llvm-ar x %p/xpg4.a very_long_bytecode_file_name.bc
+; RUN: cmp -s %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc
diff --git a/final/test/Archive/oddlen b/final/test/Archive/oddlen
new file mode 100644
index 00000000000..8cf5bd181b1
--- /dev/null
+++ b/final/test/Archive/oddlen
@@ -0,0 +1 @@
+oddlen
diff --git a/final/test/Archive/toc_GNU.ll b/final/test/Archive/toc_GNU.ll
new file mode 100644
index 00000000000..9ed7d8eb8cb
--- /dev/null
+++ b/final/test/Archive/toc_GNU.ll
@@ -0,0 +1,8 @@
+;This isn't really an assembly file, its just here to run the test.
+;This test just makes sure that llvm-ar can generate a table of contents for
+;GNU style archives
+;RUN: llvm-ar t %p/GNU.a | FileCheck %s
+;CHECK:      evenlen
+;CHECK-NEXT: oddlen
+;CHECK-NEXT: very_long_bytecode_file_name.bc
+;CHECK-NEXT: IsNAN.o
diff --git a/final/test/Archive/toc_MacOSX.ll b/final/test/Archive/toc_MacOSX.ll
new file mode 100644
index 00000000000..6dbc9d2ea4a
--- /dev/null
+++ b/final/test/Archive/toc_MacOSX.ll
@@ -0,0 +1,9 @@
+;This isn't really an assembly file, its just here to run the test.
+;This test just makes sure that llvm-ar can generate a table of contents for
+;MacOSX style archives
+;RUN: llvm-ar t %p/MacOSX.a | FileCheck %s
+;CHECK:      __.SYMDEF SORTED
+;CHECK-NEXT: evenlen
+;CHECK-NEXT: oddlen
+;CHECK-NEXT: very_long_bytecode_file_name.bc
+;CHECK-NEXT: IsNAN.o
diff --git a/final/test/Archive/toc_SVR4.ll b/final/test/Archive/toc_SVR4.ll
new file mode 100644
index 00000000000..d447b921999
--- /dev/null
+++ b/final/test/Archive/toc_SVR4.ll
@@ -0,0 +1,8 @@
+;This isn't really an assembly file, its just here to run the test.
+;This test just makes sure that llvm-ar can generate a table of contents for
+;SVR4 style archives
+;RUN: llvm-ar t %p/SVR4.a | FileCheck %s
+;CHECK:      evenlen
+;CHECK-NEXT: oddlen
+;CHECK-NEXT: very_long_bytecode_file_name.bc
+;CHECK-NEXT: IsNAN.o
diff --git a/final/test/Archive/toc_xpg4.ll b/final/test/Archive/toc_xpg4.ll
new file mode 100644
index 00000000000..fd875eebdaa
--- /dev/null
+++ b/final/test/Archive/toc_xpg4.ll
@@ -0,0 +1,8 @@
+;This isn't really an assembly file, its just here to run the test.
+;This test just makes sure that llvm-ar can generate a table of contents for
+;xpg4 style archives
+;RUN: llvm-ar t %p/xpg4.a | FileCheck %s
+CHECK:      evenlen
+CHECK-NEXT: oddlen
+CHECK-NEXT: very_long_bytecode_file_name.bc
+CHECK-NEXT: IsNAN.o
diff --git a/final/test/Archive/very_long_bytecode_file_name.bc b/final/test/Archive/very_long_bytecode_file_name.bc
new file mode 100644
index 0000000000000000000000000000000000000000..f7fce249020a07869e8259f81ea74739b0b4c06c
GIT binary patch
literal 1465
zcmV;q1xEU8Y<6QZLRx4wF+o`-Q(2OKv6%n>Z2#4N|NsC0fq&s#g<1gb|KD(QOmqM>
zgfxIq1y2&z-MOy2wXjeD4GLj2n4YGovrL+k$?Be?)X}D!Nub$48UWDK#Q*>S`k>U*
zYM+x$Q}UT7ls`~_4F-TR0004?27mw>15+bHO-(&O$>|K#(9i%h00000GynrY9B620
zXa<194FQk<2ATjG0gwOy3?xKBG?<M8MDim>O-+<E01Yu384Wbh8W{nmjTMoUQwlCz
zzj~VTZdz76n-AkLN5_wy11+Hd(lez&AfAK+em3;Uzgv>k2sF|a%^Fy&Ow(@g^`Ccu
z?lE6m(Q_IcxG$FtbKZ>vHz%<ED!_>;SWmY_e`4B{6!-3xQ_1+rEn)CvGPE_01!E(6
zEt(Nr)oM*3J!O}Y9A}w}C5pXj426OX`4U6h$F-MtdCkmQBREy8a_T4Vb`^LP(hjd~
zs<Xm1ThR4w85DZ5F-pUJKxV}+3gttH;@S#?Ln#hn4;@+{TY2lBke=M}Z+4%jm#g6o
zZE9_fNn)A^ng};)-s&ZVv!FF=O2n{8qj4-fIwL}2OW+*ZEI2e3%ca!R{aM`8Lsm?x
z8f$}9CFM&v4&!0m?1D*w%0R<NF@QA~p@rArbdPG;3^5p|4K%tprJ|DZS_7F32PIIR
z=S*FR6d)4>Wi6BMkIzZdKvSSlR4*L_K$u_xv94HKk-~(`NaswSdudxCInRpC5#|j?
zDjCX{TjrkIL@H2c}hXj{W2#HC!tOJi;f@X-3If_mr28YRnO)Eku$%*9<B9Ja|}
z92Bx^F%^o&Vs}p-i@C88#&e6G4B>`NYYz1a*kb1k7lH&~_0tU;a&TfdclC2@CFLhw
z*aItQ=GVXwkrZSRX+eP3aC(h=x4Z!ak_w#6w_Awe!xcjm_4?b^^eyC=<-QiiEGf5L
zrC>o0flZe(!yG<zr3kPRNLcE_bZ4Nb+AbKws0c|WTu&q!zfO)~AQp<~W<JkGPv88*
zn8a4{`Nv8oU{*INY86;IOG3=w-3xHKfiO3L$}L`QACPvB99%j$wDD=bWs}XS-&<i|
z7W!1^!oi!gJe5p<2HGra(r8#9*0!Q1iXBh`sKqRp75{hh9z-_QT-O@+jOs?hWV@1c
z1;>sPx-PbzPuBVcIZcJ0DY;JtAfE#9%^|D$*nW(`6Ks?~oUq66^s!WlB^eS?aD7t9
zp4{=9t=lEy6Vf7W-}9_G>+H%n54c@^-j=Ew4Lisg3KLLVQAI0d%(~Liy9+|8gH3{C
zNejJ+UN4<oV0Qd7Eh4Jg)>VUbJB9@@(G15wHeaSb&G7z<8eMjq1Q+L|f#M;?U$}$@
zOL4hEu`sp^oPwk%ryLpP3{eSZb0jtsC8Vki5m!o5V}5n%0;6#YcE~hk=t4B|kQ^_r
zVGi{Bs9P$;eu3cxk1|0NnH;z`4pp?-@F%b(+Ro|`!}AM@?PP5Gsxe_EZOdP+ZtwOZ
z4>xrv$CF2mCessWS^5u{tQ~5^J!0QTN3Uj)8#c%uco9x2Yj!cMh;NCkLOR_?5e$8A
z6eD%CV6pjr9idCnx?;dc#=#w&_)k=?AaMAgIahC*mRETe8GPWf*I%S`+Ah{O<m${~
z=|#i^&LCBupG!MkJjV}fX<vA+splcCsRoSiUrG`RBUW+)G@%Y0C8E=#@#tFXAQy-=
z3otHG9G*}#+Hvx>goA*zM$XnvP>nm4uKVCqRM`lZ&%4yHZGm9{yv~zb;uvf*D@Zdf
z9TF3VpTVOhP9ueEv8|ITHP=n&W~qkF8-}1IZS-no3>Ol}3RJ05DMal_D4CK}lQM}S
z5f4Z3#*;|EL5n1#cny<#QKt$ex?Gai6@bzPb24YV&n~dn*8m2<SQ!IOe7U+Xm4^{T
T&Z4uJAMtl2Q-ui$C;J(Y=1r8W

literal 0
HcmV?d00001

diff --git a/final/test/Archive/xpg4.a b/final/test/Archive/xpg4.a
new file mode 100644
index 0000000000000000000000000000000000000000..b2bdb51188fecf14fcabd08515a2251b30ca3951
GIT binary patch
literal 4214
zcma)930PCd7M`2U5E3v16fo#j2#88TAS~ii77>tD0gEgR*;qq@B!E~Sf`|&%4YVR!
zz+I^eRO^E6a{)yZ6oG=$;?`P}E+W;|YM;KDkPE4=-)qnJ-8t+0Gv~}?W-@uUBukbu
zmm$FaOhTcvv!|<@tEU@|JL_DAEsl#kiTFeY0DuI*m__Il$A*cN%A5?hAWfK0n12#@
z#~_Enf_Db{yRlr|N5{%hWaWvKYE`mWnx|FB)N+M5DOITut0Wl;zEmb)KvLu=R7!<v
zWRziik-NwR$6;E!7B)zB7m09uBo#c7=*v*c<%SUW-(h$VQ-m19eHjeFaLGnI{|Cdv
z4c1vGa)&LVL<X#(%*8i0#ZBn5#6EIK*~<DI04U#nwel^b!j%u=i(|plxA!u&AzI+J
zT2v504L9rVJA3Th=XDVniy;wJR2?{cBBZjsWuR(r*R2CPgKF-0TZ8t_%K%`nYP!18
zs>?$ohjut@eJ`SbMJsTD<{DZ7uqH>^`i6$i@oss->}qcZBDh|#A$xOEAkhk%SlDJj
zs|41N0jAwNA5p~y^1LD2`Jtg(MSvx=wql9et*x*F8!WbLjb!lS&o8eWQpGNwz0>qt
zVL<byPsmQYL5J&pAG~TcIp6(x%lWvnujrx<i#F?+BO#rt!57uJ1wRYLakn(q+%tmh
zEX~~w6c2~GH`6YZIL3sZ_FpOO-D>{Z&)XtfUbZ+%2IVd%-8NEXw#%I=+3{bch2nD*
z+snAgJJ$0`zf5BmuVVdf=J$2;=i77h+FIjmJ)@6g4LCf_u~=r&VRBqmd*YH?T>Fa*
zEA!=_2^Tcxdna8MFdq4>d)UiZ?dNDBF|~=s<MMu}-dy#~Gj&e&!R?<uU}dCbYz&fC
z(ktm6X?F*B#V2aq(ib$<1@2usV~&@tuQd2Rw|9o8$m06`uCD(q`KHrvaY(IodQmj*
zQLv}Uw~}vuToqJ!&ATyBSm0*4ySV4Rc2n9NrqJ>rOVr@8e^1#^EO`fW3@@UpKO~{V
z9OqCJPUTNG-3;pVj;x7@64+3@D-ucd$715PTw2{4*dJ2;VQSpunKw(@*x$1PrdeGp
zX&Uz?@<2stk?8fE>nU-Kyr%u}mxE<0=3x7)+EvT@o2_}*rCk%YjcvW0FoAojMZz`z
zEL~_(dPSNWK5_lodiE8~`fBEbHKFMyFBrF_{l?>0Q6w)qS=_8lsYmV$jgt4sA>lT8
zddl}b7shTem$OrS3VJe^Z&-2q3yf06*wuPEZry`M3qSK0ufFbn?(|*7qkoUPGQ;t7
z!u~`blfuyLE{)u|+Wqu8v%r{vMs0G<p<cda;~{#GeSEl`)%mcEQhZ$L<D|Ja=ZD|>
zqp_lC@zC$h3+xjYdpM?9#CQe!w%+SA&TJ@j_9(pO^nq&9=A2F3crVYB&r;7T%G+x1
z#Yqy!y^4B#>Eh0lv$us5(0XSmJ5pls?o)g0OKl>^hc3^kuy~vM%fxx^-SIuvzkM_?
z|C0P%nTFcDk<-9G^+|a5E9%V9;ws0p;Z(eu^sH_2r>}13e{_+f-0Honu6gizeN+k0
z%4}igzZPAETVm(YuXztS9aA#y{c^<Xhaazbnx09>zx#2mHEY&YD+VVbexd(#MQcOH
zt#ih{hl@g2$%2f(Ecrn2Nql0?AFfVQYJ0n*i?U~}p$cy?o8NO6ADG)V_-4IzLe5Ug
z`2Kx`4<}y{pIJ={UV7HI-uaaAu2uV{9^_hm&s=Dftg&<F@b=ZR*zYe`B+Ji!LfX63
zIOnQJ@z_^1w3#b7CNX*b`cT}oru@POlua%^<~z7&JWQfb-WhbL<MrMcU^o6^T>6t-
z`MOppvCp_JKB{{9tF5U=_SEL`fDQJYoRlT6Oe>Dqq&3Zv-U|#kp0s6x`_+ld*kRMs
zbA(6NsY{Oddi4dcng3Fo+h(6ktp7dVW_R$d2I+;sD;I5cy$p{!j%U8CnfboD^5ggO
z##ybn|5=ZCpSG9Z?fKn6tMrL~6ZHljT~!@CD`9rCX?m)7+3~~ulaC#yee_hkz>HzL
zIEyUWZNla4>D>G5aqJKH_;qf^&isYkst8f<(xFqUiwaK8ztJ5Uv0>J>W3TT=M%|%r
zzj>~!E~8L_eQ~2a?xCg9b$XzylZ_c?ZS}>y_F=Zg%KD?_)7*MO`;!i@X`8U7hL`a?
zO~D**c9juzc#-40pMs|DD2pn0DVf4H4H&#q9(ciLz1`krtnv>R9%S$u;>&t03OZ;S
zm*g*Q^6Pqf0PRAI6*+9h*#_rrYuWQ|>^V^}=~=E_WU-Nv!)vR}4Bf@YSO4SN$Hm1{
ze@`SbR{Dp`$1n_?xjMoC8H{QGTp$nIb*D0z3M_ybh%R|l^do(}KfQ!v_xKNZJG3-z
z*4Zrk{ONrOFZ=vo_HhXh?2fUqu{1N-^5`66G9BLCc7RMbHlAw?0C^^camaKp<5^Il
z@S*Y^b68#LO%LMCW&%K^f*=k6G(Pq}t}&=X0Edm4VczgjK&L`n1Qo;~LDh(ljrzb8
zm==6AK%(dpbkfNH1cXK7e$%Jpu6z;S8Bhe7S?V-Ft~x7SfL;{>X?ChoF5vTrHD#()
z5Sg8ssm{_0hRZI?%mgI#Jiz7ZY^hQKm=sc~N((4dk}O4%1uz;(@1Rr2_u-fb6<{`$
zvrxu#7yz5}&YL9(q#-XA#RD1<g})Oe*GJ`{C_qY5swHq-$`~V<-zJ?609ZJHMbneu
z823@o7CJCOBb&nyRRH?r0K*XkY0EMI#7F=E$b-E$VZc_mIk10R763-;qN_7iS9J13
zOxAUp#DaieEJR0-3JhQ?=u8S_9gIWsAqdUc$An%?@?}v8JK|Ob_9;*vUQK)0`w7&B
zONnwHmY@l6N1`Z5)e*7oIwio*&kLUlw}7L5{Q+N-m!XwNA=hRN^AtkLQYiUa#WF46
z%OzR~;7c_cC@Q4c$zq9Anx)7Ad=xTlNmV7O4RvLzN@1ucWx@BOp&>I{ik_4Zauh_8
zB~c|KlS-{s@ZmI_G+K!)9q?u9j0^?bW&fBa!AP_mG<^>d<c5_X>N5e_jr1{y#lW8{
zS})x`f*i#`6X3xQ;pvYDAcwLc5qQ&~5yk&qoHL9;cmyH5HG~2=;qx_+k$V7sXa^8P
zh;yOAFpm^C32$gad7v5)dBDFABiK*`W8tQfAwf?BUKrF3@eV;elo!H7oWPSpndk#W
zpoMtzkf0|Nhv)<N8RFGJ5FFYN9z426@H9|1#5)7=?4gYy!l#A?L%eo_`N1=E1aB>r
zi9QJ9F2uu;peMpt1a(8a+Ypb~2PV)*<ohv{i9QJ84a74f!gl}~2^slOoS8u&;bTJ^
zu~(3LEL4vFMI4GZ9Ev^$#zG@HCs^=9^F#ARIT+$u!aUI4Acz>3&<Sp6r+{Z~2|MC{
zz9ErrI0Dj%RUYm|22{iD(2sWDD3NtL$WTW+V3f!Jt>7q;0a}4kBI`0W)Y$;uraD6V
z$qZH=orDCTx}e`Lai$`_JK=;J`3+9-cWxc{2V0z~QAt#P@lRXtf!)y>kl-{O5Ql&$
zV+@u7WrCr#LL4q2ngilPdj6}Po4^G?b~8PX(%UT&hii!PL4IewF4F6|abQDmVx0d1
D&r$0x

literal 0
HcmV?d00001

diff --git a/final/test/Assembler/2002-01-24-BadSymbolTableAssert.ll b/final/test/Assembler/2002-01-24-BadSymbolTableAssert.ll
new file mode 100644
index 00000000000..7c49e2bd993
--- /dev/null
+++ b/final/test/Assembler/2002-01-24-BadSymbolTableAssert.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as %s -o /dev/null
+
+; This testcase failed due to a bad assertion in SymbolTable.cpp, removed in
+; the 1.20 revision. Basically the symbol table assumed that if there was an
+; abstract type in the symbol table, [in this case for the entry %foo of type
+; void(opaque)* ], that there should have also been named types by now.  This
+; was obviously not the case here, and this is valid.  Assertion disabled.
+	
+%bb = type i32
+
+declare void @foo(i32)
diff --git a/final/test/Assembler/2002-01-24-ValueRefineAbsType.ll b/final/test/Assembler/2002-01-24-ValueRefineAbsType.ll
new file mode 100644
index 00000000000..6e49674a32f
--- /dev/null
+++ b/final/test/Assembler/2002-01-24-ValueRefineAbsType.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as %s -o /dev/null
+
+; This testcase used to fail due to a lack of this diff in Value.cpp:
+; diff -r1.16 Value.cpp
+; 11c11
+; < #include "llvm/Type.h"
+; ---
+; > #include "llvm/DerivedTypes.h"
+; 74c74,76
+; <   assert(Ty.get() == (const Type*)OldTy &&"Can't refine anything but my type!");
+; ---
+; >   assert(Ty.get() == OldTy &&"Can't refine anything but my type!");
+; >   if (OldTy == NewTy && !OldTy->isAbstract())
+; >     Ty.removeUserFromConcrete();
+;
+; This was causing an assertion failure, due to the "foo" Method object never
+; releasing it's reference to the opaque %bb value.
+;
+	
+%bb = type i32
+%exception_descriptor = type i32
+
+declare void @foo(i32)
diff --git a/final/test/Assembler/2002-02-19-TypeParsing.ll b/final/test/Assembler/2002-02-19-TypeParsing.ll
new file mode 100644
index 00000000000..0df67849784
--- /dev/null
+++ b/final/test/Assembler/2002-02-19-TypeParsing.ll
@@ -0,0 +1,3 @@
+; RUN: llvm-as %s -o /dev/null
+	
+%Hosp = type { i32, i32, i32, { \2*, { i32, i32, i32, { [4 x \3], \2, \5, %Hosp, i32, i32 }* }*, \2* }, { \2*, { i32, i32, i32, { [4 x \3], \2, \5, %Hosp, i32, i32 }* }*, \2* }, { \2*, { i32, i32, i32, { [4 x \3], \2, \5, %Hosp, i32, i32 }* }*, \2* }, { \2*, { i32, i32, i32, { [4 x \3], \2, \5, %Hosp, i32, i32 }* }*, \2* } }
diff --git a/final/test/Assembler/2002-03-08-NameCollision.ll b/final/test/Assembler/2002-03-08-NameCollision.ll
new file mode 100644
index 00000000000..b49789b2902
--- /dev/null
+++ b/final/test/Assembler/2002-03-08-NameCollision.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as %s -o /dev/null
+
+; Method arguments were being checked for collisions at the global scope before
+; the method object was created by the parser.  Because of this, false
+; collisions could occur that would cause the following error message to be
+; produced:
+;
+;    Redefinition of value named 'X' in the 'int *' type plane!
+;
+; Fixed by delaying binding of variable names until _after_ the method symtab is
+; created.
+;
+@X = global i32 4		; <i32*> [#uses=0]
+
+declare i32 @xxx(i32*)
diff --git a/final/test/Assembler/2002-03-08-NameCollision2.ll b/final/test/Assembler/2002-03-08-NameCollision2.ll
new file mode 100644
index 00000000000..1f7a4e16f8b
--- /dev/null
+++ b/final/test/Assembler/2002-03-08-NameCollision2.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as %s -o /dev/null
+
+; Another name collision problem.  Here the problem was that if a forward
+; declaration for a method was found, that this would cause spurious conflicts
+; to be detected between locals and globals.
+;
+@Var = external global i32		; <i32*> [#uses=0]
+
+define void @foo() {
+	%Var = alloca i32		; <i32*> [#uses=0]
+	ret void
+}
diff --git a/final/test/Assembler/2002-04-04-PureVirtMethCall.ll b/final/test/Assembler/2002-04-04-PureVirtMethCall.ll
new file mode 100644
index 00000000000..29aed55a3a9
--- /dev/null
+++ b/final/test/Assembler/2002-04-04-PureVirtMethCall.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as %s -o /dev/null
+
+  type { { \2 *, \4 ** },
+         { \2 *, \4 ** }
+       }
+
diff --git a/final/test/Assembler/2002-04-04-PureVirtMethCall2.ll b/final/test/Assembler/2002-04-04-PureVirtMethCall2.ll
new file mode 100644
index 00000000000..a0968999a92
--- /dev/null
+++ b/final/test/Assembler/2002-04-04-PureVirtMethCall2.ll
@@ -0,0 +1,5 @@
+; RUN: llvm-as %s -o /dev/null
+
+%t = type { { \2*, \2 },
+            { \2*, \2 }
+          }
diff --git a/final/test/Assembler/2002-04-05-TypeParsing.ll b/final/test/Assembler/2002-04-05-TypeParsing.ll
new file mode 100644
index 00000000000..f725944b921
--- /dev/null
+++ b/final/test/Assembler/2002-04-05-TypeParsing.ll
@@ -0,0 +1,3 @@
+; RUN: llvm-as %s -o /dev/null
+  
+ %Hosp = type { { \2*, { \2, %Hosp }* }, { \2*, { \2, %Hosp }* } }
diff --git a/final/test/Assembler/2002-04-07-HexFloatConstants.ll b/final/test/Assembler/2002-04-07-HexFloatConstants.ll
new file mode 100644
index 00000000000..b0d7cc0e43a
--- /dev/null
+++ b/final/test/Assembler/2002-04-07-HexFloatConstants.ll
@@ -0,0 +1,16 @@
+; This testcase checks to make sure that the assembler can handle floating 
+; point constants in IEEE hex format. This also checks that the disassembler,
+; when presented with a FP constant that cannot be represented exactly in 
+; exponential form, outputs it correctly in hex format.  This is a distillation
+; of the bug that was causing the Olden Health benchmark to output incorrect
+; results!
+;
+; RUN: opt -constprop -S > %t.1 < %s
+; RUN: llvm-as < %s | llvm-dis | llvm-as | opt -constprop | \
+; RUN: llvm-dis > %t.2
+; RUN: diff %t.1 %t.2
+
+define double @test() {
+        %tmp = fmul double 7.200000e+101, 0x427F4000             ; <double> [#uses=1]
+        ret double %tmp
+}
diff --git a/final/test/Assembler/2002-04-07-InfConstant.ll b/final/test/Assembler/2002-04-07-InfConstant.ll
new file mode 100644
index 00000000000..71837c94378
--- /dev/null
+++ b/final/test/Assembler/2002-04-07-InfConstant.ll
@@ -0,0 +1,9 @@
+; The output formater prints out 1.0e100 as Inf!
+;
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep 0x7FF0000000000000
+
+define float @test() {
+        %tmp = fmul float 0x7FF0000000000000, 1.000000e+01               ; <float> [#uses=1]
+        ret float %tmp
+}
+
diff --git a/final/test/Assembler/2002-04-29-NameBinding.ll b/final/test/Assembler/2002-04-29-NameBinding.ll
new file mode 100644
index 00000000000..7960c20ddce
--- /dev/null
+++ b/final/test/Assembler/2002-04-29-NameBinding.ll
@@ -0,0 +1,18 @@
+; There should be NO references to the global v1.  The local v1 should
+; have all of the references!
+;
+; Check by running globaldce, which will remove the constant if there are
+; no references to it!
+; 
+; RUN: opt < %s -globaldce -S | \
+; RUN:   not grep constant
+;
+
+@v1 = internal constant i32 5           
+
+define i32 @createtask() {
+        %v1 = alloca i32                ;; Alloca should have one use! 
+        %reg112 = load i32* %v1         ;; This load should not use the global!
+        ret i32 %reg112
+}
+
diff --git a/final/test/Assembler/2002-05-02-InvalidForwardRef.ll b/final/test/Assembler/2002-05-02-InvalidForwardRef.ll
new file mode 100644
index 00000000000..234545c2936
--- /dev/null
+++ b/final/test/Assembler/2002-05-02-InvalidForwardRef.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as %s -o /dev/null
+; It looks like the assembler is not forward resolving the function declaraion
+; correctly.
+
+define void @test() {
+        call void @foo( )
+        ret void
+}
+
+declare void @foo()
diff --git a/final/test/Assembler/2002-05-02-ParseError.ll b/final/test/Assembler/2002-05-02-ParseError.ll
new file mode 100644
index 00000000000..5a9817c1eaa
--- /dev/null
+++ b/final/test/Assembler/2002-05-02-ParseError.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as %s -o /dev/null
+
+%T = type i32 *
+
+define %T @test() {
+	ret %T null
+}
diff --git a/final/test/Assembler/2002-07-08-HugePerformanceProblem.ll b/final/test/Assembler/2002-07-08-HugePerformanceProblem.ll
new file mode 100644
index 00000000000..52c90af18c8
--- /dev/null
+++ b/final/test/Assembler/2002-07-08-HugePerformanceProblem.ll
@@ -0,0 +1,67 @@
+; This file takes about 48 __MINUTES__ to assemble using as.  This is WAY too
+; long.  The type resolution code needs to be sped up a lot.
+; RUN: llvm-as %s -o /dev/null
+	%ALL_INTERSECTIONS_METHOD = type i32 (%OBJECT*, %RAY*, %ISTACK*)*
+	%BBOX = type { %BBOX_VECT, %BBOX_VECT }
+	%BBOX_TREE = type { i16, i16, %BBOX, %BBOX_TREE** }
+	%BBOX_VECT = type [3 x float]
+	%BLEND_MAP = type { i16, i16, i16, i32, %BLEND_MAP_ENTRY* }
+	%BLEND_MAP_ENTRY = type { float, i8, { %COLOUR, %PIGMENT*, %TNORMAL*, %TEXTURE*, %UV_VECT } }
+	%CAMERA = type { %VECTOR, %VECTOR, %VECTOR, %VECTOR, %VECTOR, %VECTOR, double, double, i32, double, double, i32, double, %TNORMAL* }
+	%COLOUR = type [5 x float]
+	%COPY_METHOD = type i8* (%OBJECT*)*
+	%COUNTER = type { i32, i32 }
+	%DENSITY_FILE = type { i32, %DENSITY_FILE_DATA* }
+	%DENSITY_FILE_DATA = type { i32, i8*, i32, i32, i32, i8*** }
+	%DESTROY_METHOD = type void (%OBJECT*)*
+	%FILE = type { i32, i8*, i8*, i8, i8, i32, i32, i32 }
+	%FILE_HANDLE = type { i8*, i32, i32, i32, i32, i8*, %FILE*, i32, i32 (%FILE_HANDLE*, i8*, i32*, i32*, i32, i32)*, void (%FILE_HANDLE*, %COLOUR*, i32)*, i32 (%FILE_HANDLE*, %COLOUR*, i32*)*, void (%IMAGE*, i8*)*, void (%FILE_HANDLE*)* }
+	%FINISH = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, %BBOX_VECT, %BBOX_VECT }
+	%FOG = type { i32, double, double, double, %COLOUR, %VECTOR, %TURB*, float, %FOG* }
+	%FRAME = type { %CAMERA*, i32, i32, i32, %LIGHT_SOURCE*, %OBJECT*, double, double, %COLOUR, %COLOUR, %COLOUR, %IMEDIA*, %FOG*, %RAINBOW*, %SKYSPHERE* }
+	%FRAMESEQ = type { i32, double, i32, i32, double, i32, i32, double, i32, double, i32, double, i32, i32 }
+	%IMAGE = type { i32, i32, i32, i32, i32, i16, i16, %VECTOR, float, float, i32, i32, i16, %IMAGE_COLOUR*, { %IMAGE_LINE*, i8** } }
+	%IMAGE_COLOUR = type { i16, i16, i16, i16, i16 }
+	%IMAGE_LINE = type { i8*, i8*, i8*, i8* }
+	%IMEDIA = type { i32, i32, i32, i32, i32, double, double, i32, i32, i32, i32, %COLOUR, %COLOUR, %COLOUR, %COLOUR, double, double, double, double*, %PIGMENT*, %IMEDIA* }
+	%INSIDE_METHOD = type i32 (double*, %OBJECT*)*
+	%INTERIOR = type { i32, i32, float, float, float, float, float, %IMEDIA* }
+	%INTERSECTION = type { double, %VECTOR, %VECTOR, %OBJECT*, i32, i32, double, double, i8* }
+	%INVERT_METHOD = type void (%OBJECT*)*
+	%ISTACK = type { %ISTACK*, %INTERSECTION*, i32 }
+	%LIGHT_SOURCE = type { %METHODS*, i32, %OBJECT*, %TEXTURE*, %INTERIOR*, %OBJECT*, %OBJECT*, %BBOX, i32, %OBJECT*, %COLOUR, %VECTOR, %VECTOR, %VECTOR, %VECTOR, %VECTOR, double, double, double, double, double, %LIGHT_SOURCE*, i8, i8, i8, i8, i32, i32, i32, i32, i32, %COLOUR**, %OBJECT*, [6 x %PROJECT_TREE_NODE*] }
+	%MATRIX = type [4 x %VECTOR_4D]
+	%METHODS = type { %ALL_INTERSECTIONS_METHOD, %INSIDE_METHOD, %NORMAL_METHOD, %COPY_METHOD, %ROTATE_METHOD, %ROTATE_METHOD, %ROTATE_METHOD, %TRANSFORM_METHOD, %DESTROY_METHOD, %DESTROY_METHOD }
+	%NORMAL_METHOD = type void (double*, %OBJECT*, %INTERSECTION*)*
+	%OBJECT = type { %METHODS*, i32, %OBJECT*, %TEXTURE*, %INTERIOR*, %OBJECT*, %OBJECT*, %BBOX, i32 }
+	%Opts = type { i32, i32, i8, i8, i8, i32, [150 x i8], [150 x i8], [150 x i8], [150 x i8], [150 x i8], double, double, i32, i32, double, double, i32, [25 x i8*], i32, i32, i32, double, double, i32, i32, double, double, double, i32, i32, i32, i32, i32, %FRAMESEQ, double, i32, double, double, double, double, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [150 x i8], %SHELLDATA*, [150 x i8], i32, i32 }
+	%PIGMENT = type { i16, i16, i16, i32, float, float, float, %WARP*, %TPATTERN*, %BLEND_MAP*, { %DENSITY_FILE*, %IMAGE*, %VECTOR, float, i16, i16, i16, { float, %VECTOR }, %complex.float }, %COLOUR }
+	%PRIORITY_QUEUE = type { i32, i32, %QELEM* }
+	%PROJECT = type { i32, i32, i32, i32 }
+	%PROJECT_QUEUE = type { i32, i32, %PROJECT_TREE_NODE** }
+	%PROJECT_TREE_NODE = type { i16, %BBOX_TREE*, %PROJECT, i16, %PROJECT_TREE_NODE** }
+	%QELEM = type { double, %BBOX_TREE* }
+	%RAINBOW = type { double, double, double, double, double, double, double, %VECTOR, %VECTOR, %VECTOR, %PIGMENT*, %RAINBOW* }
+	%RAY = type { %VECTOR, %VECTOR, i32, [100 x %INTERIOR*] }
+	%RAYINFO = type { %VECTOR, %VECTOR, %VECTORI, %VECTORI }
+	%RGB = type [3 x float]
+	%ROTATE_METHOD = type void (%OBJECT*, double*, %TRANSFORM*)*
+	%SCALE_METHOD = type void (%OBJECT*, double*, %TRANSFORM*)*
+	%SHELLDATA = type { i32, i32, [250 x i8] }
+	%SKYSPHERE = type { i32, %PIGMENT**, %TRANSFORM* }
+	%SNGL_VECT = type [3 x float]
+	%TEXTURE = type { i16, i16, i16, i32, float, float, float, %WARP*, %TPATTERN*, %BLEND_MAP*, { %DENSITY_FILE*, %IMAGE*, %VECTOR, float, i16, i16, i16, { float, %VECTOR }, %complex.float }, %TEXTURE*, %PIGMENT*, %TNORMAL*, %FINISH*, %TEXTURE*, i32 }
+	%TNORMAL = type { i16, i16, i16, i32, float, float, float, %WARP*, %TPATTERN*, %BLEND_MAP*, { %DENSITY_FILE*, %IMAGE*, %VECTOR, float, i16, i16, i16, { float, %VECTOR }, %complex.float }, float }
+	%TPATTERN = type { i16, i16, i16, i32, float, float, float, %WARP*, %TPATTERN*, %BLEND_MAP*, { %DENSITY_FILE*, %IMAGE*, %VECTOR, float, i16, i16, i16, { float, %VECTOR }, %complex.float } }
+	%TRANSFORM = type { %MATRIX, %MATRIX }
+	%TRANSFORM_METHOD = type void (%OBJECT*, %TRANSFORM*)*
+	%TRANSLATE_METHOD = type void (%OBJECT*, double*, %TRANSFORM*)*
+	%TURB = type { i16, %WARP*, %VECTOR, i32, float, float }
+	%UV_VECT = type [2 x double]
+	%VECTOR = type [3 x double]
+	%VECTORI = type [3 x i32]
+	%VECTOR_4D = type [4 x double]
+	%WARP = type { i16, %WARP* }
+	%__FILE = type { i32, i8*, i8*, i8, i8, i32, i32, i32 }
+	%_h_val = type { [2 x i32], double }
+	%complex.float = type { float, float }
diff --git a/final/test/Assembler/2002-07-14-InternalLossage.ll b/final/test/Assembler/2002-07-14-InternalLossage.ll
new file mode 100644
index 00000000000..f93f1c4a5fb
--- /dev/null
+++ b/final/test/Assembler/2002-07-14-InternalLossage.ll
@@ -0,0 +1,9 @@
+; Test to make sure that the 'internal' tag is not lost!
+;
+; RUN: llvm-as < %s | llvm-dis | grep internal
+
+declare void @foo()
+
+define internal void @foo() {
+        ret void
+}
diff --git a/final/test/Assembler/2002-07-14-OpaqueType.ll b/final/test/Assembler/2002-07-14-OpaqueType.ll
new file mode 100644
index 00000000000..662fb0f31c1
--- /dev/null
+++ b/final/test/Assembler/2002-07-14-OpaqueType.ll
@@ -0,0 +1,10 @@
+; Test that opaque types are preserved correctly
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
+;
+
+%Ty = type opaque
+
+define %Ty* @func() {
+	ret %Ty* null
+}
+ 
diff --git a/final/test/Assembler/2002-07-25-ParserAssertionFailure.ll b/final/test/Assembler/2002-07-25-ParserAssertionFailure.ll
new file mode 100644
index 00000000000..3c5c5546b43
--- /dev/null
+++ b/final/test/Assembler/2002-07-25-ParserAssertionFailure.ll
@@ -0,0 +1,13 @@
+; Make sure we don't get an assertion failure, even though this is a parse 
+; error
+; RUN: not llvm-as %s -o /dev/null |& grep {'@foo' defined with}
+
+%ty = type void (i32)
+
+declare %ty* @foo()
+
+define void @test() {
+        call %ty* @foo( )               ; <%ty*>:0 [#uses=0]
+        ret void
+}
+
diff --git a/final/test/Assembler/2002-07-25-QuoteInString.ll b/final/test/Assembler/2002-07-25-QuoteInString.ll
new file mode 100644
index 00000000000..facc5bdae62
--- /dev/null
+++ b/final/test/Assembler/2002-07-25-QuoteInString.ll
@@ -0,0 +1,5 @@
+; Test double quotes in strings work correctly!
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
+;
+@str = internal global [6 x i8] c"\22foo\22\00"         ; <[6 x i8]*> [#uses=0]
+
diff --git a/final/test/Assembler/2002-07-25-ReturnPtrFunction.ll b/final/test/Assembler/2002-07-25-ReturnPtrFunction.ll
new file mode 100644
index 00000000000..515d105c14c
--- /dev/null
+++ b/final/test/Assembler/2002-07-25-ReturnPtrFunction.ll
@@ -0,0 +1,15 @@
+; Test that returning a pointer to a function causes the disassembler to print 
+; the right thing.
+;
+; RUN: llvm-as < %s | llvm-dis | llvm-as
+
+%ty = type void (i32)
+
+declare %ty* @foo()
+
+define void @test() {
+        call %ty* ()* @foo( )           ; <%ty*>:1 [#uses=0]
+        ret void
+}
+
+
diff --git a/final/test/Assembler/2002-07-31-SlashInString.ll b/final/test/Assembler/2002-07-31-SlashInString.ll
new file mode 100644
index 00000000000..ff48258870d
--- /dev/null
+++ b/final/test/Assembler/2002-07-31-SlashInString.ll
@@ -0,0 +1,5 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as 
+
+; Make sure that \\ works in a string initializer
+@Slashtest = internal global [8 x i8] c"\5Cbegin{\00"
+
diff --git a/final/test/Assembler/2002-08-15-CastAmbiguity.ll b/final/test/Assembler/2002-08-15-CastAmbiguity.ll
new file mode 100644
index 00000000000..c71652446d6
--- /dev/null
+++ b/final/test/Assembler/2002-08-15-CastAmbiguity.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as %s -o /dev/null
+
+define void @test(i32 %X) {
+        call void @test( i32 6 )
+        ret void
+}
diff --git a/final/test/Assembler/2002-08-15-ConstantExprProblem.ll b/final/test/Assembler/2002-08-15-ConstantExprProblem.ll
new file mode 100644
index 00000000000..02b9ea9adb8
--- /dev/null
+++ b/final/test/Assembler/2002-08-15-ConstantExprProblem.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-as %s -o /dev/null
+
+@.LC0 = internal global [12 x i8] c"hello world\00"             ; <[12 x i8]*> [#uses=1]
+
+define i8* @test() {
+; <label>:0
+        br label %BB1
+
+BB1:            ; preds = %BB2, %0
+        %ret = phi i8* [ getelementptr ([12 x i8]* @.LC0, i64 0, i64 0), %0 ], [ null, %BB2 ]          ; <i8*> [#uses=1]
+        ret i8* %ret
+
+BB2:            ; No predecessors!
+        br label %BB1
+}
+
diff --git a/final/test/Assembler/2002-08-15-UnresolvedGlobalReference.ll b/final/test/Assembler/2002-08-15-UnresolvedGlobalReference.ll
new file mode 100644
index 00000000000..2ba3f14a48e
--- /dev/null
+++ b/final/test/Assembler/2002-08-15-UnresolvedGlobalReference.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as %s -o /dev/null
+
+@.LC0 = internal global [12 x i8] c"hello world\00"             ; <[12 x i8]*> [#uses=1]
+
+define i8* @test() {
+        ret i8* getelementptr ([12 x i8]* @.LC0, i64 0, i64 0)
+}
+
diff --git a/final/test/Assembler/2002-08-16-ConstExprInlined.ll b/final/test/Assembler/2002-08-16-ConstExprInlined.ll
new file mode 100644
index 00000000000..f233bacca46
--- /dev/null
+++ b/final/test/Assembler/2002-08-16-ConstExprInlined.ll
@@ -0,0 +1,22 @@
+; In this testcase, the bytecode reader or writer is not correctly handling the
+; ConstExpr reference.  Disassembling this program assembled yields invalid
+; assembly (because there are placeholders still around), which the assembler
+; dies on.
+
+; There are two things that need to be fixed here.  Obviously assembling and
+; disassembling this would be good, but in addition to that, the bytecode
+; reader should NEVER produce a program "successfully" with placeholders still
+; around!
+;
+; RUN: llvm-as < %s | llvm-dis | llvm-as
+
+@.LC0 = internal global [4 x i8] c"foo\00"		; <[4 x i8]*> [#uses=1]
+@X = global i8* null		; <i8**> [#uses=0]
+
+declare i32 @puts(i8*)
+
+define void @main() {
+bb1:
+	%reg211 = call i32 @puts( i8* getelementptr ([4 x i8]* @.LC0, i64 0, i64 0) )		; <i32> [#uses=0]
+	ret void
+}
diff --git a/final/test/Assembler/2002-08-19-BytecodeReader.ll b/final/test/Assembler/2002-08-19-BytecodeReader.ll
new file mode 100644
index 00000000000..e211014eb0e
--- /dev/null
+++ b/final/test/Assembler/2002-08-19-BytecodeReader.ll
@@ -0,0 +1,17 @@
+; Testcase that seems to break the bytecode reader.  This comes from the
+; "crafty" spec benchmark.
+;
+; RUN: opt < %s -instcombine | llvm-dis
+	
+%CHESS_POSITION = type { i32, i32 }
+@pawn_probes = external global i32		; <i32*> [#uses=0]
+@pawn_hash_mask = external global i32		; <i32*> [#uses=0]
+@search = external global %CHESS_POSITION		; <%CHESS_POSITION*> [#uses=2]
+
+define void @Evaluate() {
+	%reg1321 = getelementptr %CHESS_POSITION* @search, i64 0, i32 1		; <i32*> [#uses=1]
+	%reg114 = load i32* %reg1321		; <i32> [#uses=0]
+	%reg1801 = getelementptr %CHESS_POSITION* @search, i64 0, i32 0		; <i32*> [#uses=1]
+	%reg182 = load i32* %reg1801		; <i32> [#uses=0]
+	ret void
+}
diff --git a/final/test/Assembler/2002-08-22-DominanceProblem.ll b/final/test/Assembler/2002-08-22-DominanceProblem.ll
new file mode 100644
index 00000000000..0dc192df235
--- /dev/null
+++ b/final/test/Assembler/2002-08-22-DominanceProblem.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as %s -o /dev/null
+
+; Dominance relationships is not calculated correctly for unreachable blocks,
+; which causes the verifier to barf on this input.
+
+define i32 @test(i1 %b) {
+BB0:
+        ret i32 7 ; Loop is unreachable
+
+Loop:           ; preds = %L2, %Loop
+        %B = phi i32 [ %B, %L2 ], [ %B, %Loop ]         ;PHI has same value always. 
+        br i1 %b, label %L2, label %Loop
+
+L2:             ; preds = %Loop
+        br label %Loop
+}
+
diff --git a/final/test/Assembler/2002-10-08-LargeArrayPerformance.ll b/final/test/Assembler/2002-10-08-LargeArrayPerformance.ll
new file mode 100644
index 00000000000..34a993214e9
--- /dev/null
+++ b/final/test/Assembler/2002-10-08-LargeArrayPerformance.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as %s -o /dev/null
+; This testcase comes from the following really simple c file:
+;; int foo[30000]
+;;; We should not be soo slow for such a simple case!
+
+@foo = global [30000 x i32] zeroinitializer		; <[30000 x i32]*> [#uses=0]
+
+declare void @__main()
diff --git a/final/test/Assembler/2002-10-13-ConstantEncodingProblem.ll b/final/test/Assembler/2002-10-13-ConstantEncodingProblem.ll
new file mode 100644
index 00000000000..bf3a5215434
--- /dev/null
+++ b/final/test/Assembler/2002-10-13-ConstantEncodingProblem.ll
@@ -0,0 +1,5 @@
+; RUN: llvm-as < %s | llvm-dis
+
+%Domain = type { %Domain**, %Domain* }
+@D = global %Domain zeroinitializer             ; <%Domain*> [#uses=0]
+
diff --git a/final/test/Assembler/2002-10-15-NameClash.ll b/final/test/Assembler/2002-10-15-NameClash.ll
new file mode 100644
index 00000000000..89346cba9be
--- /dev/null
+++ b/final/test/Assembler/2002-10-15-NameClash.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as %s -o /dev/null
+
+declare i32 @"ArrayRef"([100 x i32] * %Array)
+
+define i32 @"ArrayRef"([100 x i32] * %Array) {
+	ret i32 0
+}
diff --git a/final/test/Assembler/2002-12-15-GlobalResolve.ll b/final/test/Assembler/2002-12-15-GlobalResolve.ll
new file mode 100644
index 00000000000..f9ad12e5478
--- /dev/null
+++ b/final/test/Assembler/2002-12-15-GlobalResolve.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as %s -o /dev/null
+
+@X = external global i32*
+@X1 = external global %T* 
+@X2 = external global i32*
+
+%T = type i32
diff --git a/final/test/Assembler/2003-01-30-UnsignedString.ll b/final/test/Assembler/2003-01-30-UnsignedString.ll
new file mode 100644
index 00000000000..3c14d71621c
--- /dev/null
+++ b/final/test/Assembler/2003-01-30-UnsignedString.ll
@@ -0,0 +1,4 @@
+; RUN: llvm-as %s -o /dev/null
+
+@spell_order = global [4 x i8] c"\FF\00\F7\00"
+
diff --git a/final/test/Assembler/2003-04-15-ConstantInitAssertion.ll b/final/test/Assembler/2003-04-15-ConstantInitAssertion.ll
new file mode 100644
index 00000000000..e0121688dc3
--- /dev/null
+++ b/final/test/Assembler/2003-04-15-ConstantInitAssertion.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s >/dev/null |& grep {constant expression type mismatch}
+; Test the case of a misformed constant initializer
+; This should cause an assembler error, not an assertion failure!
+constant { i32 } { float 1.0 }
diff --git a/final/test/Assembler/2003-04-25-UnresolvedGlobalReference.ll b/final/test/Assembler/2003-04-25-UnresolvedGlobalReference.ll
new file mode 100644
index 00000000000..f1a5ed7b56b
--- /dev/null
+++ b/final/test/Assembler/2003-04-25-UnresolvedGlobalReference.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as %s -o /dev/null
+; There should be absolutely no problem with this testcase.
+
+define i32 @test(i32 %arg1, i32 %arg2) {
+        ret i32 ptrtoint (i32 (i32, i32)* @test to i32)
+}
+
diff --git a/final/test/Assembler/2003-05-03-BytecodeReaderProblem.ll b/final/test/Assembler/2003-05-03-BytecodeReaderProblem.ll
new file mode 100644
index 00000000000..f4a69116051
--- /dev/null
+++ b/final/test/Assembler/2003-05-03-BytecodeReaderProblem.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llvm-dis
+
+define void @test() {
+        %tmp.123 = trunc i64 0 to i32           ; <i32> [#uses=0]
+        ret void
+}
diff --git a/final/test/Assembler/2003-05-12-MinIntProblem.ll b/final/test/Assembler/2003-05-12-MinIntProblem.ll
new file mode 100644
index 00000000000..ebe169013b4
--- /dev/null
+++ b/final/test/Assembler/2003-05-12-MinIntProblem.ll
@@ -0,0 +1,5 @@
+; RUN: llvm-as < %s | llvm-dis | grep -- -2147483648
+
+define i32 @foo() {
+        ret i32 -2147483648
+}
diff --git a/final/test/Assembler/2003-05-15-AssemblerProblem.ll b/final/test/Assembler/2003-05-15-AssemblerProblem.ll
new file mode 100644
index 00000000000..146ce6534d7
--- /dev/null
+++ b/final/test/Assembler/2003-05-15-AssemblerProblem.ll
@@ -0,0 +1,14 @@
+; This bug was caused by two CPR's existing for the same global variable, 
+; colliding in the Module level CPR map.
+; RUN: llvm-as %s -o /dev/null
+
+define void @test() {
+        call void (...)* bitcast (void (i16*, i32)* @AddString to void (...)*)( i16* null, i32 0 )
+        ret void
+}
+
+define void @AddString(i16* %tmp.124, i32 %tmp.127) {
+        call void (...)* bitcast (void (i16*, i32)* @AddString to void (...)*)( i16* %tmp.124, i32 %tmp.127 )
+        ret void
+}
+
diff --git a/final/test/Assembler/2003-05-15-SwitchBug.ll b/final/test/Assembler/2003-05-15-SwitchBug.ll
new file mode 100644
index 00000000000..3768d9c9a67
--- /dev/null
+++ b/final/test/Assembler/2003-05-15-SwitchBug.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as %s -o /dev/null
+
+; Check minimal switch statement
+
+define void @test(i32 %X) {
+        switch i32 %X, label %dest [
+        ]
+
+dest:           ; preds = %0
+        ret void
+}
diff --git a/final/test/Assembler/2003-05-21-ConstantShiftExpr.ll b/final/test/Assembler/2003-05-21-ConstantShiftExpr.ll
new file mode 100644
index 00000000000..40b96514e04
--- /dev/null
+++ b/final/test/Assembler/2003-05-21-ConstantShiftExpr.ll
@@ -0,0 +1,4 @@
+; RUN: llvm-as %s -o /dev/null
+; Test that shift instructions can be used in constant expressions.
+
+global i32 3670016
diff --git a/final/test/Assembler/2003-05-21-EmptyStructTest.ll b/final/test/Assembler/2003-05-21-EmptyStructTest.ll
new file mode 100644
index 00000000000..26e83d931c4
--- /dev/null
+++ b/final/test/Assembler/2003-05-21-EmptyStructTest.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as %s -o /dev/null
+
+; The old C front-end never generated empty structures, now the new one
+; can.  For some reason we never handled them in the parser. Weird.
+
+@X = global {  } zeroinitializer
diff --git a/final/test/Assembler/2003-05-21-MalformedShiftCrash.ll b/final/test/Assembler/2003-05-21-MalformedShiftCrash.ll
new file mode 100644
index 00000000000..a845d89bb6c
--- /dev/null
+++ b/final/test/Assembler/2003-05-21-MalformedShiftCrash.ll
@@ -0,0 +1,4 @@
+; Found by inspection of the code
+; RUN: not llvm-as < %s > /dev/null |& grep {constexpr requires integer operands}
+
+global i32 ashr (float 1.0, float 2.0)
diff --git a/final/test/Assembler/2003-05-21-MalformedStructCrash.ll b/final/test/Assembler/2003-05-21-MalformedStructCrash.ll
new file mode 100644
index 00000000000..1efb57704ec
--- /dev/null
+++ b/final/test/Assembler/2003-05-21-MalformedStructCrash.ll
@@ -0,0 +1,4 @@
+; Found by inspection of the code
+; RUN: not llvm-as < %s  > /dev/null |& grep {constant expression type mismatch}
+
+global {} { i32 7, float 1.0, i32 7, i32 8 }
diff --git a/final/test/Assembler/2003-06-17-InvokeDisassemble.ll b/final/test/Assembler/2003-06-17-InvokeDisassemble.ll
new file mode 100644
index 00000000000..922a996cd7d
--- /dev/null
+++ b/final/test/Assembler/2003-06-17-InvokeDisassemble.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llvm-dis
+
+define void @test() {
+        invoke void @test( )
+                        to label %Next unwind label %Next
+
+Next:           ; preds = %0, %0
+        ret void
+}
diff --git a/final/test/Assembler/2003-06-30-RecursiveTypeProblem.ll b/final/test/Assembler/2003-06-30-RecursiveTypeProblem.ll
new file mode 100644
index 00000000000..5db31140a74
--- /dev/null
+++ b/final/test/Assembler/2003-06-30-RecursiveTypeProblem.ll
@@ -0,0 +1,3 @@
+; RUN: llvm-as %s -o /dev/null
+
+%MidFnTy = type void (%MidFnTy*)
diff --git a/final/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll b/final/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll
new file mode 100644
index 00000000000..50cdeedd695
--- /dev/null
+++ b/final/test/Assembler/2003-08-20-ConstantExprGEP-Fold.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -simplifycfg -S | not grep br
+
+@.str_1 = internal constant [6 x i8] c"_Bool\00"                ; <[6 x i8]*> [#uses=2]
+
+define i32 @test() {
+        %tmp.54 = load i8* getelementptr ([6 x i8]* @.str_1, i64 0, i64 1)            ; <i8> [#uses=1]
+        %tmp.55 = icmp ne i8 %tmp.54, 66                ; <i1> [#uses=1]
+        br i1 %tmp.55, label %then.7, label %endif.7
+
+then.7:         ; preds = %then.7, %0
+        br label %then.7
+
+endif.7:                ; preds = %0
+        ret i32 0
+}
+
diff --git a/final/test/Assembler/2003-08-21-ConstantExprCast-Fold.ll b/final/test/Assembler/2003-08-21-ConstantExprCast-Fold.ll
new file mode 100644
index 00000000000..b76f7745849
--- /dev/null
+++ b/final/test/Assembler/2003-08-21-ConstantExprCast-Fold.ll
@@ -0,0 +1,4 @@
+; RUN: llvm-as < %s | llvm-dis | not grep getelementptr
+
+@A = external global { float }          ; <{ float }*> [#uses=2]
+global i32* bitcast ({ float }* @A to i32*)             ; <i32**>:0 [#uses=0]
diff --git a/final/test/Assembler/2003-10-04-NotMergingGlobalConstants.ll b/final/test/Assembler/2003-10-04-NotMergingGlobalConstants.ll
new file mode 100644
index 00000000000..5fec05d8cbf
--- /dev/null
+++ b/final/test/Assembler/2003-10-04-NotMergingGlobalConstants.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as %s -o /dev/null
+
+%T = type i32
+@X = global i32* null           ; <i32**> [#uses=0]
+@Y = global i32* null           ; <i32**> [#uses=0]
+
diff --git a/final/test/Assembler/2003-11-05-ConstantExprShift.ll b/final/test/Assembler/2003-11-05-ConstantExprShift.ll
new file mode 100644
index 00000000000..86b093e3512
--- /dev/null
+++ b/final/test/Assembler/2003-11-05-ConstantExprShift.ll
@@ -0,0 +1,5 @@
+; RUN: llvm-as < %s | llvm-dis
+
+define i32 @test() {
+        ret i32 ashr (i32 ptrtoint (i32 ()* @test to i32), i32 2)
+}
diff --git a/final/test/Assembler/2003-11-11-ImplicitRename.ll b/final/test/Assembler/2003-11-11-ImplicitRename.ll
new file mode 100644
index 00000000000..7bfd3c14bf1
--- /dev/null
+++ b/final/test/Assembler/2003-11-11-ImplicitRename.ll
@@ -0,0 +1,8 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+void %test() {
+  %X = add int 0, 1
+  %X = add int 1, 2
+  ret void
+}
+
diff --git a/final/test/Assembler/2003-11-12-ConstantExprCast.ll b/final/test/Assembler/2003-11-12-ConstantExprCast.ll
new file mode 100644
index 00000000000..149fef2276e
--- /dev/null
+++ b/final/test/Assembler/2003-11-12-ConstantExprCast.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llvm-dis | not grep { bitcast (}
+
+@.Base64_1 = external constant [4 x i8]         ; <[4 x i8]*> [#uses=1]
+
+define i8 @test(i8 %Y) {
+        %X = bitcast i8 %Y to i8                ; <i8> [#uses=1]
+        %tmp.13 = add i8 %X, sub (i8 0, i8 ptrtoint ([4 x i8]* @.Base64_1 to i8))     ; <i8> [#uses=1]
+        ret i8 %tmp.13
+}
+
diff --git a/final/test/Assembler/2003-11-24-SymbolTableCrash.ll b/final/test/Assembler/2003-11-24-SymbolTableCrash.ll
new file mode 100644
index 00000000000..041b0d94c41
--- /dev/null
+++ b/final/test/Assembler/2003-11-24-SymbolTableCrash.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s |& grep {multiple definition}
+
+define void @test() {
+	%tmp.1 = add i32 0, 1
+	br label %return
+return:
+	%tmp.1 = add i32 0, 1
+	ret void
+}
+
diff --git a/final/test/Assembler/2003-12-30-TypeMapInvalidMemory.ll b/final/test/Assembler/2003-12-30-TypeMapInvalidMemory.ll
new file mode 100644
index 00000000000..93f9a708147
--- /dev/null
+++ b/final/test/Assembler/2003-12-30-TypeMapInvalidMemory.ll
@@ -0,0 +1,55 @@
+; RUN: not llvm-as %s -o /dev/null |& grep {use of undefined type named 'struct.D_Scope'}
+; END.
+
+@d_reduction_0_dparser_gram = global { 
+  i32 (i8*, i8**, i32, i32, { 
+    %struct.Grammar*, void (\4, %struct.d_loc_t*, i8**)*, %struct.D_Scope*, 
+    void (\4)*, { i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
+      void (\8, %struct.d_loc_t*, i8**)*, %struct.Grammar*, 
+      %struct.ParseNode_User }* (\4, i32, { i32, %struct.d_loc_t, i8*, i8*, 
+        %struct.D_Scope*, void (\9, %struct.d_loc_t*, i8**)*, %struct.Grammar*,
+        %struct.ParseNode_User }**)*, 
+        void ({ i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
+          void (\8, %struct.d_loc_t*, i8**)*, 
+          %struct.Grammar*, %struct.ParseNode_User }*)*, 
+        %struct.d_loc_t, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
+        i32 }*)*, 
+        i32 (i8*, i8**, i32, i32, { %struct.Grammar*, 
+        void (\4, %struct.d_loc_t*, i8**)*, %struct.D_Scope*, void (\4)*, { 
+          i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
+          void (\8, %struct.d_loc_t*, i8**)*, %struct.Grammar*, 
+          %struct.ParseNode_User }* (\4, i32, { i32, %struct.d_loc_t, i8*, i8*, 
+            %struct.D_Scope*, void (\9, %struct.d_loc_t*, i8**)*, 
+            %struct.Grammar*, %struct.ParseNode_User }**)*, 
+            void ({ i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
+              void (\8, %struct.d_loc_t*, i8**)*, %struct.Grammar*, 
+              %struct.ParseNode_User }*)*, %struct.d_loc_t, i32, i32, i32, i32,
+              i32, i32, i32, i32, i32, i32, i32, i32 }*)** }
+
+        { i32 (i8*, i8**, i32, i32, { 
+          %struct.Grammar*, void (\4, %struct.d_loc_t*, i8**)*, 
+          %struct.D_Scope*, void (\4)*, { 
+            i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
+            void (\8, %struct.d_loc_t*, i8**)*, %struct.Grammar*, 
+            %struct.ParseNode_User 
+          }* (\4, i32, { i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
+            void (\9, %struct.d_loc_t*, i8**)*, %struct.Grammar*, 
+            %struct.ParseNode_User }**)*, 
+          void ({ i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
+            void (\8, %struct.d_loc_t*, i8**)*, %struct.Grammar*, 
+            %struct.ParseNode_User }*)*, 
+          %struct.d_loc_t, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, 
+          i32, i32 }*)* null, 
+        i32 (i8*, i8**, i32, i32, { 
+          %struct.Grammar*, void (\4, %struct.d_loc_t*, i8**)*, 
+          %struct.D_Scope*, void (\4)*, { i32, %struct.d_loc_t, i8*, i8*, 
+            %struct.D_Scope*, void (\8, %struct.d_loc_t*, i8**)*, 
+            %struct.Grammar*, %struct.ParseNode_User }* (\4, i32, { i32, 
+              %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
+              void (\9, %struct.d_loc_t*, i8**)*, %struct.Grammar*, 
+              %struct.ParseNode_User }**)*, 
+              void ({ i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
+                void (\8, %struct.d_loc_t*, i8**)*, %struct.Grammar*, 
+                %struct.ParseNode_User }*)*, %struct.d_loc_t, i32, i32, i32, 
+                i32, i32, i32, i32, i32, i32, i32, i32, i32 }*)** null 
+        }
diff --git a/final/test/Assembler/2004-01-11-getelementptrfolding.ll b/final/test/Assembler/2004-01-11-getelementptrfolding.ll
new file mode 100644
index 00000000000..c22aede5df4
--- /dev/null
+++ b/final/test/Assembler/2004-01-11-getelementptrfolding.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llvm-dis | \
+; RUN:   not grep {getelementptr.*getelementptr}
+
+%struct.TTriangleItem = type { i8*, i8*, [3 x %struct.TUVVertex] }
+%struct.TUVVertex = type { i16, i16, i16, i16 }
+@data_triangleItems = internal constant [2908 x %struct.TTriangleItem] zeroinitializer; <[2908 x %struct.TTriangleItem]*> [#uses=2]
+
+define void @foo() {
+        store i16 0, i16* getelementptr ([2908 x %struct.TTriangleItem]* @data_triangleItems, i64 0, i64 0, i32 2, i64 0, i32 0)
+        ret void
+}
+
diff --git a/final/test/Assembler/2004-01-20-MaxLongLong.ll b/final/test/Assembler/2004-01-20-MaxLongLong.ll
new file mode 100644
index 00000000000..8af5332ccc6
--- /dev/null
+++ b/final/test/Assembler/2004-01-20-MaxLongLong.ll
@@ -0,0 +1,4 @@
+; RUN: llvm-as < %s | llvm-dis | grep 9223372036854775808
+
+global i64 -9223372036854775808
+
diff --git a/final/test/Assembler/2004-02-01-NegativeZero.ll b/final/test/Assembler/2004-02-01-NegativeZero.ll
new file mode 100644
index 00000000000..b28930f2852
--- /dev/null
+++ b/final/test/Assembler/2004-02-01-NegativeZero.ll
@@ -0,0 +1,5 @@
+; RUN: llvm-as < %s | llvm-dis | grep -- -0.0
+
+global double 0x8000000000000000
+global float -0.0
+
diff --git a/final/test/Assembler/2004-02-27-SelfUseAssertError.ll b/final/test/Assembler/2004-02-27-SelfUseAssertError.ll
new file mode 100644
index 00000000000..7052eac5cbd
--- /dev/null
+++ b/final/test/Assembler/2004-02-27-SelfUseAssertError.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as %s -o /dev/null
+
+; %inc2 uses it's own value, but that's ok, as it's unreachable!
+
+define void @test() {
+entry:
+        ret void
+
+no_exit.2:              ; preds = %endif.6
+        %tmp.103 = fcmp olt double 0.000000e+00, 0.000000e+00           ; <i1> [#uses=1]
+        br i1 %tmp.103, label %endif.6, label %else.0
+
+else.0:         ; preds = %no_exit.2
+        store i16 0, i16* null
+        br label %endif.6
+
+endif.6:                ; preds = %else.0, %no_exit.2
+        %inc.2 = add i32 %inc.2, 1              ; <i32> [#uses=2]
+        %tmp.96 = icmp slt i32 %inc.2, 0                ; <i1> [#uses=1]
+        br i1 %tmp.96, label %no_exit.2, label %UnifiedReturnBlock1
+
+UnifiedReturnBlock1:            ; preds = %endif.6
+        ret void
+}
+
diff --git a/final/test/Assembler/2004-03-07-FunctionAddressAlignment.ll b/final/test/Assembler/2004-03-07-FunctionAddressAlignment.ll
new file mode 100644
index 00000000000..e3bf0bb8ac7
--- /dev/null
+++ b/final/test/Assembler/2004-03-07-FunctionAddressAlignment.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llvm-dis | not grep ptrtoint
+; All of these should be eliminable
+
+
+define i32 @foo() {
+	ret i32 and (i32 ptrtoint (i32()* @foo to i32), i32 1)
+}
+
+define i32 @foo2() {
+	ret i32 and (i32 1, i32 ptrtoint (i32()* @foo2 to i32))
+}
+
+define i1 @foo3() {
+	ret i1 icmp ne (i1()* @foo3, i1()* null)
+}
diff --git a/final/test/Assembler/2004-03-30-UnclosedFunctionCrash.ll b/final/test/Assembler/2004-03-30-UnclosedFunctionCrash.ll
new file mode 100644
index 00000000000..775b7558f3c
--- /dev/null
+++ b/final/test/Assembler/2004-03-30-UnclosedFunctionCrash.ll
@@ -0,0 +1,3 @@
+; RUN: not llvm-as %s |& grep {found end of file when expecting more instructions}
+
+define void @foo() {
diff --git a/final/test/Assembler/2004-04-04-GetElementPtrIndexTypes.ll b/final/test/Assembler/2004-04-04-GetElementPtrIndexTypes.ll
new file mode 100644
index 00000000000..ab46f887be0
--- /dev/null
+++ b/final/test/Assembler/2004-04-04-GetElementPtrIndexTypes.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as %s -o /dev/null
+
+define i32* @t1({ float, i32 }* %X) {
+        %W = getelementptr { float, i32 }* %X, i32 20, i32 1            ; <i32*> [#uses=0]
+        %X.upgrd.1 = getelementptr { float, i32 }* %X, i64 20, i32 1            ; <i32*> [#uses=0]
+        %Y = getelementptr { float, i32 }* %X, i64 20, i32 1            ; <i32*> [#uses=1]
+        %Z = getelementptr { float, i32 }* %X, i64 20, i32 1            ; <i32*> [#uses=0]
+        ret i32* %Y
+}
+
diff --git a/final/test/Assembler/2004-06-07-VerifierBug.ll b/final/test/Assembler/2004-06-07-VerifierBug.ll
new file mode 100644
index 00000000000..07d2383ccff
--- /dev/null
+++ b/final/test/Assembler/2004-06-07-VerifierBug.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s > /dev/null
+
+define void @t() {
+entry:
+     ret void
+
+loop:           ; preds = %loop
+     %tmp.4.i9 = getelementptr i32* null, i32 %tmp.5.i10             ; <i32*> [#uses=1]
+     %tmp.5.i10 = load i32* %tmp.4.i9                ; <i32> [#uses=1]
+     br label %loop
+}
diff --git a/final/test/Assembler/2004-10-22-BCWriterUndefBug.ll b/final/test/Assembler/2004-10-22-BCWriterUndefBug.ll
new file mode 100644
index 00000000000..694b80b78c1
--- /dev/null
+++ b/final/test/Assembler/2004-10-22-BCWriterUndefBug.ll
@@ -0,0 +1,5 @@
+;; The bytecode writer was trying to treat undef values as ConstantArray's when
+;; they looked like strings.
+;; RUN: llvm-as %s -o /dev/null
+@G = internal global [8 x i8] undef
+
diff --git a/final/test/Assembler/2004-11-28-InvalidTypeCrash.ll b/final/test/Assembler/2004-11-28-InvalidTypeCrash.ll
new file mode 100644
index 00000000000..f9b453b5746
--- /dev/null
+++ b/final/test/Assembler/2004-11-28-InvalidTypeCrash.ll
@@ -0,0 +1,4 @@
+; Test for PR463.  This program is erroneous, but should not crash llvm-as.
+; RUN: not llvm-as %s -o /dev/null |& grep {invalid type for null constant}
+
+@.FOO  = internal global %struct.none zeroinitializer
diff --git a/final/test/Assembler/2005-01-03-FPConstantDisassembly.ll b/final/test/Assembler/2005-01-03-FPConstantDisassembly.ll
new file mode 100644
index 00000000000..aaa776f51b8
--- /dev/null
+++ b/final/test/Assembler/2005-01-03-FPConstantDisassembly.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llvm-dis | grep 1.0
+
+define double @test() {
+        ret double 1.0   ;; This should not require hex notation
+}
+
diff --git a/final/test/Assembler/2005-01-31-CallingAggregateFunction.ll b/final/test/Assembler/2005-01-31-CallingAggregateFunction.ll
new file mode 100644
index 00000000000..ce769a2e9d7
--- /dev/null
+++ b/final/test/Assembler/2005-01-31-CallingAggregateFunction.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as %s -o /dev/null
+
+define void @test() {
+	call {i32} @foo()
+	ret void
+}
+
+declare {i32 } @foo()
diff --git a/final/test/Assembler/2005-02-09-AsmWriterStoreBug.ll b/final/test/Assembler/2005-02-09-AsmWriterStoreBug.ll
new file mode 100644
index 00000000000..4ec17966e79
--- /dev/null
+++ b/final/test/Assembler/2005-02-09-AsmWriterStoreBug.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as
+
+; Ensure that the asm writer emits types before both operands of the 
+; store, even though they can be the same.
+
+%RecTy = type %RecTy*
+
+define void @foo() {
+        %A = malloc %RecTy              ; <%RecTy> [#uses=1]
+        %B = malloc %RecTy              ; <%RecTy> [#uses=1]
+        store %RecTy %B, %RecTy %A
+        ret void
+}
+
diff --git a/final/test/Assembler/2005-05-05-OpaqueUndefValues.ll b/final/test/Assembler/2005-05-05-OpaqueUndefValues.ll
new file mode 100644
index 00000000000..8cd1419b08b
--- /dev/null
+++ b/final/test/Assembler/2005-05-05-OpaqueUndefValues.ll
@@ -0,0 +1,4 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as > /dev/null
+
+%t = type opaque
+@x = global %t undef
diff --git a/final/test/Assembler/2005-12-21-ZeroInitVector.ll b/final/test/Assembler/2005-12-21-ZeroInitVector.ll
new file mode 100644
index 00000000000..d3a692c60c9
--- /dev/null
+++ b/final/test/Assembler/2005-12-21-ZeroInitVector.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s > /dev/null
+
+define <4 x i32> @foo() {
+        ret <4 x i32> zeroinitializer
+}
+
diff --git a/final/test/Assembler/2006-05-26-VarargsCallEncode.ll b/final/test/Assembler/2006-05-26-VarargsCallEncode.ll
new file mode 100644
index 00000000000..6dc60c3f79d
--- /dev/null
+++ b/final/test/Assembler/2006-05-26-VarargsCallEncode.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llvm-dis | grep {tail call void.*sret null}
+
+declare void @foo({  }* sret , ...)
+
+define void @bar() {
+        tail call void ({  }* sret , ...)* @foo( {  }* null sret , i32 0 )
+        ret void
+}
diff --git a/final/test/Assembler/2006-09-28-CrashOnInvalid.ll b/final/test/Assembler/2006-09-28-CrashOnInvalid.ll
new file mode 100644
index 00000000000..a203c6ad034
--- /dev/null
+++ b/final/test/Assembler/2006-09-28-CrashOnInvalid.ll
@@ -0,0 +1,8 @@
+; Test for PR902.  This program is erroneous, but should not crash llvm-as.
+; This tests that a simple error is caught and processed correctly.
+; RUN: not llvm-as < %s >/dev/null |& grep {floating point constant invalid for type}
+
+define void @test() {
+  add i32 1, 2.0
+  ret void
+}
diff --git a/final/test/Assembler/2006-12-09-Cast-To-Bool.ll b/final/test/Assembler/2006-12-09-Cast-To-Bool.ll
new file mode 100644
index 00000000000..a70262c802c
--- /dev/null
+++ b/final/test/Assembler/2006-12-09-Cast-To-Bool.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llvm-dis | grep bitcast
+
+define i1 @main(i32 %X) {
+  %res = bitcast i1 true to i1
+  ret i1 %res
+}
diff --git a/final/test/Assembler/2007-01-02-Undefined-Arg-Type.ll b/final/test/Assembler/2007-01-02-Undefined-Arg-Type.ll
new file mode 100644
index 00000000000..a39de1cb6cb
--- /dev/null
+++ b/final/test/Assembler/2007-01-02-Undefined-Arg-Type.ll
@@ -0,0 +1,9 @@
+; The assembler should catch an undefined argument type .
+; RUN: not llvm-as %s -o /dev/null |& grep {use of undefined type named 'typedef.bc_struct'}
+
+; %typedef.bc_struct = type opaque
+
+
+define i1 @someFunc(i32* %tmp.71.reload, %typedef.bc_struct* %n1) {
+	ret i1 true
+}
diff --git a/final/test/Assembler/2007-01-05-Cmp-ConstExpr.ll b/final/test/Assembler/2007-01-05-Cmp-ConstExpr.ll
new file mode 100644
index 00000000000..e3f67ba13af
--- /dev/null
+++ b/final/test/Assembler/2007-01-05-Cmp-ConstExpr.ll
@@ -0,0 +1,18 @@
+; Test Case for PR1080
+; RUN: llvm-as %s -o /dev/null
+
+@str = internal constant [4 x i8] c"-ga\00"             ; <[4 x i8]*> [#uses=2]
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+        %tmp65 = getelementptr i8** %argv, i32 1                ; <i8**> [#uses=1]
+        %tmp66 = load i8** %tmp65               ; <i8*> [#uses=0]
+        br i1 icmp ne (i32 sub (i32 ptrtoint (i8* getelementptr ([4 x i8]* @str, i32 0, i64 1) to i32), i32 ptrtoint ([4 x i8]* @str to i32)), i32 1), label %exit_1, label %exit_2
+
+exit_1:         ; preds = %entry
+        ret i32 0
+
+exit_2:         ; preds = %entry
+        ret i32 1
+}
+
diff --git a/final/test/Assembler/2007-01-16-CrashOnBadCast.ll b/final/test/Assembler/2007-01-16-CrashOnBadCast.ll
new file mode 100644
index 00000000000..81f5458b2eb
--- /dev/null
+++ b/final/test/Assembler/2007-01-16-CrashOnBadCast.ll
@@ -0,0 +1,7 @@
+; PR1117
+; RUN: not llvm-as %s -o /dev/null |& grep {invalid cast opcode for cast from}
+
+define i8* @nada(i64 %X) {
+    %result = trunc i64 %X to i8*
+    ret i8* %result
+}
diff --git a/final/test/Assembler/2007-01-16-CrashOnBadCast2.ll b/final/test/Assembler/2007-01-16-CrashOnBadCast2.ll
new file mode 100644
index 00000000000..c05c60952c5
--- /dev/null
+++ b/final/test/Assembler/2007-01-16-CrashOnBadCast2.ll
@@ -0,0 +1,4 @@
+; PR1117
+; RUN: not llvm-as %s -o /dev/null |& grep {invalid cast opcode for cast from}
+
+@X = constant i8* trunc (i64 0 to i8*)
diff --git a/final/test/Assembler/2007-03-18-InvalidNumberedVar.ll b/final/test/Assembler/2007-03-18-InvalidNumberedVar.ll
new file mode 100644
index 00000000000..b2193b17013
--- /dev/null
+++ b/final/test/Assembler/2007-03-18-InvalidNumberedVar.ll
@@ -0,0 +1,9 @@
+; PR 1258
+; RUN: not llvm-as < %s >/dev/null |& grep {'%0' defined with type 'i1'}
+
+define i32 @test1(i32 %a, i32 %b) {
+entry:
+  icmp eq i32 %b, %a              ; <i1>:0 [#uses=1]
+  zext i1 %0 to i32               ; <i32>:0 [#uses=1]
+  ret i32 %0                      ; Invalid Type for %0
+}
diff --git a/final/test/Assembler/2007-03-19-NegValue.ll b/final/test/Assembler/2007-03-19-NegValue.ll
new file mode 100644
index 00000000000..e90cf351e1d
--- /dev/null
+++ b/final/test/Assembler/2007-03-19-NegValue.ll
@@ -0,0 +1,7 @@
+; Test whether negative values > 64 bits retain their negativeness.
+; RUN: llvm-as < %s | llvm-dis | grep {add i65.*, -1}
+
+define i65 @testConsts(i65 %N) {
+  %a = add i65 %N, -1
+  ret i65 %a
+}
diff --git a/final/test/Assembler/2007-04-20-AlignedLoad.ll b/final/test/Assembler/2007-04-20-AlignedLoad.ll
new file mode 100644
index 00000000000..f0217aec2ce
--- /dev/null
+++ b/final/test/Assembler/2007-04-20-AlignedLoad.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llvm-dis | grep {align 1024}
+
+define i32 @test(i32* %arg) {
+entry:
+        %tmp2 = load i32* %arg, align 1024      ; <i32> [#uses=1]
+        ret i32 %tmp2
+}
diff --git a/final/test/Assembler/2007-04-20-AlignedStore.ll b/final/test/Assembler/2007-04-20-AlignedStore.ll
new file mode 100644
index 00000000000..1b08c48444c
--- /dev/null
+++ b/final/test/Assembler/2007-04-20-AlignedStore.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llvm-dis | grep {align 1024}
+
+define void @test(i32* %arg) {
+entry:
+        store i32 0, i32* %arg, align 1024
+        ret void
+}
diff --git a/final/test/Assembler/2007-04-25-AssemblerFoldExternWeak.ll b/final/test/Assembler/2007-04-25-AssemblerFoldExternWeak.ll
new file mode 100644
index 00000000000..c26d9ebc26c
--- /dev/null
+++ b/final/test/Assembler/2007-04-25-AssemblerFoldExternWeak.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llvm-dis | grep {icmp.*test_weak.*null}
+; PR1358
+@G = global i1 icmp ne (i32 (...)* @test_weak, i32 (...)* null)
+
+declare extern_weak i32 @test_weak(...)
+
diff --git a/final/test/Assembler/2007-05-21-Escape.ll b/final/test/Assembler/2007-05-21-Escape.ll
new file mode 100644
index 00000000000..08681339d21
--- /dev/null
+++ b/final/test/Assembler/2007-05-21-Escape.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "x86_64-apple-darwin8"
+	%struct.bar = type { i32 }
+	%struct.foo = type { i32 }
+
+define i32 @"Func64"(%struct.bar* %F) {
+entry:
+	ret i32 1
+}
+
+define i32 @Func64(%struct.bar* %B) {
+entry:
+	ret i32 0
+}
+
+define i32 @test() {
+entry:
+	%tmp = tail call i32 @"Func64"( %struct.bar* null )		; <i32> [#uses=0]
+	%tmp1 = tail call i32 @Func64( %struct.bar* null )		; <i32> [#uses=0]
+	ret i32 undef
+}
diff --git a/final/test/Assembler/2007-07-19-ParamAttrAmbiguity.ll b/final/test/Assembler/2007-07-19-ParamAttrAmbiguity.ll
new file mode 100644
index 00000000000..9c7daa841ab
--- /dev/null
+++ b/final/test/Assembler/2007-07-19-ParamAttrAmbiguity.ll
@@ -0,0 +1,9 @@
+; PR1553
+; RUN: llvm-as < %s > /dev/null
+define void @bar() {
+        %t = call i8 @foo( i8 10 )
+        zext i8 %t to i32
+        ret void
+}
+
+declare i8 @foo(i8)
diff --git a/final/test/Assembler/2007-07-30-AutoUpgradeZextSext.ll b/final/test/Assembler/2007-07-30-AutoUpgradeZextSext.ll
new file mode 100644
index 00000000000..ea2db4414f5
--- /dev/null
+++ b/final/test/Assembler/2007-07-30-AutoUpgradeZextSext.ll
@@ -0,0 +1,12 @@
+; Test that upgrading zext/sext attributes to zeroext and signext
+; works correctly.
+; PR1553
+; RUN: llvm-as < %s > /dev/null
+
+define i32 @bar() {
+        %t = call i8 @foo( i8 10 sext ) zext
+        %x = zext i8 %t to i32
+        ret i32 %x
+}
+
+declare i8 @foo(i8 signext ) zeroext
diff --git a/final/test/Assembler/2007-08-06-AliasInvalid.ll b/final/test/Assembler/2007-08-06-AliasInvalid.ll
new file mode 100644
index 00000000000..94095982468
--- /dev/null
+++ b/final/test/Assembler/2007-08-06-AliasInvalid.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-as < %s > /dev/null |& grep {expected top-level entity}
+; PR1577
+
+@anInt = global i32 1 
+alias i32 @anAlias
+
+define i32 @main() {
+   ret i32 0 
+}
diff --git a/final/test/Assembler/2007-09-10-AliasFwdRef.ll b/final/test/Assembler/2007-09-10-AliasFwdRef.ll
new file mode 100644
index 00000000000..b21491ba5a9
--- /dev/null
+++ b/final/test/Assembler/2007-09-10-AliasFwdRef.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llvm-dis
+; PR1645
+
+@__gthread_active_ptr.5335 = internal constant i8* bitcast (i32 (i32)* @__gthrw_pthread_cancel to i8*)    
+@__gthrw_pthread_cancel = alias weak i32 (i32)* @pthread_cancel   
+
+
+
+declare extern_weak i32 @pthread_cancel(i32)
diff --git a/final/test/Assembler/2007-09-29-GC.ll b/final/test/Assembler/2007-09-29-GC.ll
new file mode 100644
index 00000000000..789a0fe1edc
--- /dev/null
+++ b/final/test/Assembler/2007-09-29-GC.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llvm-dis | grep {@f.*gc.*shadowstack}
+; RUN: llvm-as < %s | llvm-dis | grep {@g.*gc.*java}
+
+define void @f() gc "shadowstack" {
+entry:
+	ret void
+}
+
+define void @g() gc "java" {
+entry:
+	ret void
+}
diff --git a/final/test/Assembler/2007-11-26-AttributeOverload.ll b/final/test/Assembler/2007-11-26-AttributeOverload.ll
new file mode 100644
index 00000000000..aebc2e8d01e
--- /dev/null
+++ b/final/test/Assembler/2007-11-26-AttributeOverload.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+declare i32 @atoi(i8*) nounwind readonly
+declare i32 @atoi(i8*)
diff --git a/final/test/Assembler/2007-11-27-AutoUpgradeAttributes.ll b/final/test/Assembler/2007-11-27-AutoUpgradeAttributes.ll
new file mode 100644
index 00000000000..ee260eadb7a
--- /dev/null
+++ b/final/test/Assembler/2007-11-27-AutoUpgradeAttributes.ll
@@ -0,0 +1,3 @@
+; RUN: llvm-as < %s
+
+@FP = weak global i8 (...) signext * null
diff --git a/final/test/Assembler/2007-12-11-AddressSpaces.ll b/final/test/Assembler/2007-12-11-AddressSpaces.ll
new file mode 100644
index 00000000000..0eb4a797306
--- /dev/null
+++ b/final/test/Assembler/2007-12-11-AddressSpaces.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as < %s | llvm-dis | grep {addrspace(33)} | count 7
+; RUN: llvm-as < %s | llvm-dis | grep {addrspace(42)} | count 2
+; RUN: llvm-as < %s | llvm-dis | grep {addrspace(66)} | count 2
+; RUN: llvm-as < %s | llvm-dis | grep {addrspace(11)} | count 6
+; RUN: llvm-as < %s | llvm-dis | grep {addrspace(22)} | count 5
+
+	%struct.mystruct = type { i32, i32 addrspace(33)*, i32, i32 addrspace(33)* }
+@input = weak addrspace(42) global %struct.mystruct zeroinitializer  		; <%struct.mystruct addrspace(42)*> [#uses=1]
+@output = addrspace(66) global %struct.mystruct zeroinitializer 		; <%struct.mystruct addrspace(66)*> [#uses=1]
+@y = external addrspace(33) global i32 addrspace(11)* addrspace(22)* 		; <i32 addrspace(11)* addrspace(22)* addrspace(33)*> [#uses=1]
+
+define void @foo() {
+entry:
+	%tmp1 = load i32 addrspace(33)* addrspace(42)* getelementptr (%struct.mystruct addrspace(42)* @input, i32 0, i32 3), align 4		; <i32 addrspace(33)*> [#uses=1]
+	store i32 addrspace(33)* %tmp1, i32 addrspace(33)* addrspace(66)* getelementptr (%struct.mystruct addrspace(66)* @output, i32 0, i32 1), align 4
+	ret void
+}
+
+define i32 addrspace(11)* @bar(i32 addrspace(11)* addrspace(22)* addrspace(33)* %x) {
+entry:
+	%tmp1 = load i32 addrspace(11)* addrspace(22)* addrspace(33)* @y, align 4		; <i32 addrspace(11)* addrspace(22)*> [#uses=2]
+	store i32 addrspace(11)* addrspace(22)* %tmp1, i32 addrspace(11)* addrspace(22)* addrspace(33)* %x, align 4
+	%tmp5 = load i32 addrspace(11)* addrspace(22)* %tmp1, align 4		; <i32 addrspace(11)*> [#uses=1]
+	ret i32 addrspace(11)* %tmp5
+}
diff --git a/final/test/Assembler/2008-01-11-VarargAttrs.ll b/final/test/Assembler/2008-01-11-VarargAttrs.ll
new file mode 100644
index 00000000000..c0aedc80b4c
--- /dev/null
+++ b/final/test/Assembler/2008-01-11-VarargAttrs.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llvm-dis | grep byval
+
+	%struct = type {  }
+
+declare void @foo(...)
+
+define void @bar() {
+	call void (...)* @foo(%struct* byval null )
+	ret void
+}
diff --git a/final/test/Assembler/2008-02-18-IntPointerCrash.ll b/final/test/Assembler/2008-02-18-IntPointerCrash.ll
new file mode 100644
index 00000000000..5a661ad9b99
--- /dev/null
+++ b/final/test/Assembler/2008-02-18-IntPointerCrash.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as %s |& grep {integer constant must have integer type}
+; PR2060
+
+define i8* @foo() {
+       ret i8* 0
+}
diff --git a/final/test/Assembler/2008-02-20-MultipleReturnValue.ll b/final/test/Assembler/2008-02-20-MultipleReturnValue.ll
new file mode 100644
index 00000000000..32c893a9f5f
--- /dev/null
+++ b/final/test/Assembler/2008-02-20-MultipleReturnValue.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -verify -S | llvm-as -disable-output
+
+define {i32, i8} @foo(i32 %p) {
+  ret i32 1, i8 2
+}
+
+define i8 @f2(i32 %p) {
+   %c = call {i32, i8} @foo(i32 %p)
+   %d = getresult {i32, i8} %c, 1
+   %e = add i8 %d, 1
+   ret i8 %e
+}
+
+define i32 @f3(i32 %p) {
+   %c = invoke {i32, i8} @foo(i32 %p)
+         to label %L unwind label %L2
+   L: 
+   %d = getresult {i32, i8} %c, 0
+   ret i32 %d
+   L2:
+   ret i32 0
+}
diff --git a/final/test/Assembler/2008-07-10-APInt.ll b/final/test/Assembler/2008-07-10-APInt.ll
new file mode 100644
index 00000000000..99347e92aba
--- /dev/null
+++ b/final/test/Assembler/2008-07-10-APInt.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llvm-dis
+; PR2538
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define i128 @a() {
+        ret i128 18446744073709551616
+}
+
diff --git a/final/test/Assembler/2008-09-02-FunctionNotes.ll b/final/test/Assembler/2008-09-02-FunctionNotes.ll
new file mode 100644
index 00000000000..761c91e864c
--- /dev/null
+++ b/final/test/Assembler/2008-09-02-FunctionNotes.ll
@@ -0,0 +1,14 @@
+; Test function attributes
+; RUN: llvm-as < %s | llvm-dis | grep inline | count 2
+
+define void @fn1() alwaysinline {
+  ret void
+}
+
+define void @fn2() noinline {
+  ret void
+}
+
+define void @fn3() {
+  ret void
+}
diff --git a/final/test/Assembler/2008-09-02-FunctionNotes2.ll b/final/test/Assembler/2008-09-02-FunctionNotes2.ll
new file mode 100644
index 00000000000..8a49e899028
--- /dev/null
+++ b/final/test/Assembler/2008-09-02-FunctionNotes2.ll
@@ -0,0 +1,6 @@
+; Test function notes
+; RUN: not llvm-as %s -o /dev/null |& grep "Attributes noinline alwaysinline are incompatible"
+define void @fn1() alwaysinline  noinline {
+  ret void
+}
+
diff --git a/final/test/Assembler/2008-09-29-RetAttr.ll b/final/test/Assembler/2008-09-29-RetAttr.ll
new file mode 100644
index 00000000000..f7db96d2264
--- /dev/null
+++ b/final/test/Assembler/2008-09-29-RetAttr.ll
@@ -0,0 +1,13 @@
+; Test return attributes
+; RUN: llvm-as < %s | llvm-dis | grep "define inreg i32"
+; RUN: llvm-as < %s | llvm-dis | grep "call inreg i32"
+
+define inreg i32 @fn1() {
+  ret i32 0
+}
+
+define void @fn2() {
+  %t = call inreg i32 @fn1()
+  ret void
+}
+
diff --git a/final/test/Assembler/2008-10-14-NamedTypeOnInteger.ll b/final/test/Assembler/2008-10-14-NamedTypeOnInteger.ll
new file mode 100644
index 00000000000..009489dafd7
--- /dev/null
+++ b/final/test/Assembler/2008-10-14-NamedTypeOnInteger.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
+; PR2733
+
+%t1 = type i32
+%t2 = type { %t1 }
+@i1 = constant %t2 { %t1 15 } 
diff --git a/final/test/Assembler/2008-10-14-QuoteInName.ll b/final/test/Assembler/2008-10-14-QuoteInName.ll
new file mode 100644
index 00000000000..ccd77791caa
--- /dev/null
+++ b/final/test/Assembler/2008-10-14-QuoteInName.ll
@@ -0,0 +1,3 @@
+; RUN: llvm-as < %s | llvm-dis | grep "quote"
+
+@"a\22quote" = global i32 0
diff --git a/final/test/Assembler/2009-02-01-UnnamedForwardRef.ll b/final/test/Assembler/2009-02-01-UnnamedForwardRef.ll
new file mode 100644
index 00000000000..9c6e20d7335
--- /dev/null
+++ b/final/test/Assembler/2009-02-01-UnnamedForwardRef.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llvm-dis
+; PR3372
+
+@X = global i32* @0
+global i32 4
+
diff --git a/final/test/Assembler/2009-02-28-CastOpc.ll b/final/test/Assembler/2009-02-28-CastOpc.ll
new file mode 100644
index 00000000000..ee98d41bdf9
--- /dev/null
+++ b/final/test/Assembler/2009-02-28-CastOpc.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llvm-dis
+
+type i32
+
+define void @foo() {
+  bitcast %0* null to i32*
+  ret void
+}
diff --git a/final/test/Assembler/2009-02-28-StripOpaqueName.ll b/final/test/Assembler/2009-02-28-StripOpaqueName.ll
new file mode 100644
index 00000000000..f61a44cbd15
--- /dev/null
+++ b/final/test/Assembler/2009-02-28-StripOpaqueName.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -strip -S | llvm-as | llvm-dis
+
+; Stripping the name from A should not break references to it.
+%A = type opaque
+@g1 = external global %A
+@g2 = global %A* @g1
diff --git a/final/test/Assembler/2009-03-24-ZextConstantExpr.ll b/final/test/Assembler/2009-03-24-ZextConstantExpr.ll
new file mode 100644
index 00000000000..daedb95da15
--- /dev/null
+++ b/final/test/Assembler/2009-03-24-ZextConstantExpr.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llvm-dis
+; PR3876
+@gdtr = external global [0 x i8]
+
+define void @test() {
+	call zeroext i1 @paging_map(i64 zext (i32 and (i32 ptrtoint ([0 x i8]* @gdtr to i32), i32 -4096) to i64))
+	ret void
+}
+
+declare zeroext i1 @paging_map(i64)
+
diff --git a/final/test/Assembler/2009-04-25-AliasGEP.ll b/final/test/Assembler/2009-04-25-AliasGEP.ll
new file mode 100644
index 00000000000..6d07208defe
--- /dev/null
+++ b/final/test/Assembler/2009-04-25-AliasGEP.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
+; PR4066
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+	%struct.i2c_device_id = type { }
+@w83l785ts_id = internal constant [0 x %struct.i2c_device_id] zeroinitializer, align 1		; <[0 x %struct.i2c_device_id]*> [#uses=1]
+
+@__mod_i2c_device_table = alias getelementptr ([0 x %struct.i2c_device_id]* @w83l785ts_id, i32 0, i32 0)		; <%struct.i2c_device_id*> [#uses=0]
diff --git a/final/test/Assembler/2009-07-24-ZeroArgGEP.ll b/final/test/Assembler/2009-07-24-ZeroArgGEP.ll
new file mode 100644
index 00000000000..2a3d11477cb
--- /dev/null
+++ b/final/test/Assembler/2009-07-24-ZeroArgGEP.ll
@@ -0,0 +1,5 @@
+; RUN: llvm-as %s -o /dev/null
+
+@foo = global i32 0
+@bar = constant i32* getelementptr(i32* @foo)
+
diff --git a/final/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll b/final/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
new file mode 100644
index 00000000000..b2256b10a8d
--- /dev/null
+++ b/final/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll
@@ -0,0 +1,25 @@
+; RUN: opt -std-compile-opts < %s | llvm-dis | not grep badref 
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+%struct.anon = type { i32, i32 }
+%struct.test = type { i64, %struct.anon, %struct.test* }
+
+@TestArrayPtr = global %struct.test* getelementptr inbounds ([10 x %struct.test]* @TestArray, i64 0, i64 3) ; <%struct.test**> [#uses=1]
+@TestArray = common global [10 x %struct.test] zeroinitializer, align 32 ; <[10 x %struct.test]*> [#uses=2]
+
+define i32 @main() nounwind readonly {
+  %diff1 = alloca i64                             ; <i64*> [#uses=2]
+  call void @llvm.dbg.declare(metadata !{i64* %diff1}, metadata !0)
+  store i64 72, i64* %diff1, align 8
+  %v1 = load %struct.test** @TestArrayPtr, align 8 ; <%struct.test*> [#uses=1]
+  %v2 = ptrtoint %struct.test* %v1 to i64 ; <i64> [#uses=1]
+  %v3 = sub i64 %v2, ptrtoint ([10 x %struct.test]* @TestArray to i64) ; <i64> [#uses=1]
+  store i64 %v3, i64* %diff1, align 8
+  ret i32 4
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!0 = metadata !{i32 459008, metadata !0, metadata !0, metadata !0, i32 38, metadata !0} ; [ DW_TAG_auto_variable ]
diff --git a/final/test/Assembler/AutoUpgradeIntrinsics.ll b/final/test/Assembler/AutoUpgradeIntrinsics.ll
new file mode 100644
index 00000000000..6752bd8281b
--- /dev/null
+++ b/final/test/Assembler/AutoUpgradeIntrinsics.ll
@@ -0,0 +1,81 @@
+; Tests to make sure intrinsics are automatically upgraded.
+; RUN: llvm-as < %s | llvm-dis | not grep {i32 @llvm\\.ct}
+; RUN: llvm-as < %s | llvm-dis | \
+; RUN:   not grep {llvm\\.part\\.set\\.i\[0-9\]*\\.i\[0-9\]*\\.i\[0-9\]*}
+; RUN: llvm-as < %s | llvm-dis | \
+; RUN:   not grep {llvm\\.part\\.select\\.i\[0-9\]*\\.i\[0-9\]*}
+; RUN: llvm-as < %s | llvm-dis | \
+; RUN:   not grep {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*}
+; RUN: llvm-as < %s | llvm-dis | \
+; RUN:   grep {llvm\\.x86\\.mmx\\.ps} | grep {x86_mmx} | count 16
+
+declare i32 @llvm.ctpop.i28(i28 %val)
+declare i32 @llvm.cttz.i29(i29 %val)
+declare i32 @llvm.ctlz.i30(i30 %val)
+
+define i32 @test_ct(i32 %A) {
+  %c1 = call i32 @llvm.ctpop.i28(i28 1234)
+  %c2 = call i32 @llvm.cttz.i29(i29 2345)
+  %c3 = call i32 @llvm.ctlz.i30(i30 3456)
+  %r1 = add i32 %c1, %c2
+  %r2 = add i32 %r1, %c3
+  ret i32 %r2
+}
+
+declare i32 @llvm.part.set.i32.i32.i32(i32 %x, i32 %rep, i32 %hi, i32 %lo)
+declare i16 @llvm.part.set.i16.i16.i16(i16 %x, i16 %rep, i32 %hi, i32 %lo)
+define i32 @test_part_set(i32 %A, i16 %B) {
+  %a = call i32 @llvm.part.set.i32.i32.i32(i32 %A, i32 27, i32 8, i32 0)
+  %b = call i16 @llvm.part.set.i16.i16.i16(i16 %B, i16 27, i32 8, i32 0)
+  %c = zext i16 %b to i32
+  %d = add i32 %a, %c
+  ret i32 %d
+}
+
+declare i32 @llvm.part.select.i32.i32(i32 %x, i32 %hi, i32 %lo)
+declare i16 @llvm.part.select.i16.i16(i16 %x, i32 %hi, i32 %lo)
+define i32 @test_part_select(i32 %A, i16 %B) {
+  %a = call i32 @llvm.part.select.i32.i32(i32 %A, i32 8, i32 0)
+  %b = call i16 @llvm.part.select.i16.i16(i16 %B, i32 8, i32 0)
+  %c = zext i16 %b to i32
+  %d = add i32 %a, %c
+  ret i32 %d
+}
+
+declare i32 @llvm.bswap.i32.i32(i32 %x)
+declare i16 @llvm.bswap.i16.i16(i16 %x)
+define i32 @test_bswap(i32 %A, i16 %B) {
+  %a = call i32 @llvm.bswap.i32.i32(i32 %A)
+  %b = call i16 @llvm.bswap.i16.i16(i16 %B)
+  %c = zext i16 %b to i32
+  %d = add i32 %a, %c
+  ret i32 %d
+}
+
+declare <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16>, <2 x i32>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16>, <2 x i32>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16>, <2 x i32>) nounwind readnone 
+define void @sh16(<4 x i16> %A, <2 x i32> %B) {
+	%r1 = call <4 x i16> @llvm.x86.mmx.psra.w( <4 x i16> %A, <2 x i32> %B )		; <<4 x i16>> [#uses=0]
+	%r2 = call <4 x i16> @llvm.x86.mmx.psll.w( <4 x i16> %A, <2 x i32> %B )		; <<4 x i16>> [#uses=0]
+	%r3 = call <4 x i16> @llvm.x86.mmx.psrl.w( <4 x i16> %A, <2 x i32> %B )		; <<4 x i16>> [#uses=0]
+	ret void
+}
+
+declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <2 x i32>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32>, <2 x i32>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32>, <2 x i32>) nounwind readnone 
+define void @sh32(<2 x i32> %A, <2 x i32> %B) {
+	%r1 = call <2 x i32> @llvm.x86.mmx.psra.d( <2 x i32> %A, <2 x i32> %B )		; <<2 x i32>> [#uses=0]
+	%r2 = call <2 x i32> @llvm.x86.mmx.psll.d( <2 x i32> %A, <2 x i32> %B )		; <<2 x i32>> [#uses=0]
+	%r3 = call <2 x i32> @llvm.x86.mmx.psrl.d( <2 x i32> %A, <2 x i32> %B )		; <<2 x i32>> [#uses=0]
+	ret void
+}
+
+declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <2 x i32>) nounwind readnone 
+declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <2 x i32>) nounwind readnone 
+define void @sh64(<1 x i64> %A, <2 x i32> %B) {
+	%r1 = call <1 x i64> @llvm.x86.mmx.psll.q( <1 x i64> %A, <2 x i32> %B )		; <<1 x i64>> [#uses=0]
+	%r2 = call <1 x i64> @llvm.x86.mmx.psrl.q( <1 x i64> %A, <2 x i32> %B )		; <<1 x i64>> [#uses=0]
+	ret void
+}
diff --git a/final/test/Assembler/AutoUpgradeMMXIntrinsics.ll b/final/test/Assembler/AutoUpgradeMMXIntrinsics.ll
new file mode 100644
index 00000000000..54120ff9dce
--- /dev/null
+++ b/final/test/Assembler/AutoUpgradeMMXIntrinsics.ll
@@ -0,0 +1,223 @@
+; Tests to make sure MMX intrinsics are automatically upgraded.
+; RUN: llvm-as < %s | llvm-dis -o %t
+; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<1 x i64\\\>}
+; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<2 x i32\\\>}
+; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<4 x i16\\\>}
+; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<8 x i8\\\>}
+; RUN: grep {llvm\\.x86\\.sse\\.pshuf\\.w} %t | not grep i32
+
+; Addition
+declare <8 x i8>  @llvm.x86.mmx.padd.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.padd.w(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.x86.mmx.padd.d(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.x86.mmx.padds.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i8>  @llvm.x86.mmx.paddus.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>) nounwind readnone
+define void @add(<8 x i8> %A,  <8 x i8> %B,  <4 x i16> %C, <4 x i16> %D,
+                 <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H) {
+  %r1 = call <8 x i8>  @llvm.x86.mmx.padd.b(<8 x i8> %A,  <8 x i8> %B)
+  %r2 = call <4 x i16> @llvm.x86.mmx.padd.w(<4 x i16> %C, <4 x i16> %D)
+  %r3 = call <2 x i32> @llvm.x86.mmx.padd.d(<2 x i32> %E, <2 x i32> %F)
+  %r4 = call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %G, <1 x i64> %H)
+  %r5 = call <8 x i8>  @llvm.x86.mmx.padds.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r6 = call <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16> %C, <4 x i16> %D)
+  %r7 = call <8 x i8>  @llvm.x86.mmx.paddus.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r8 = call <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16> %C, <4 x i16> %D)
+  ret void
+}
+
+; Subtraction
+declare <8 x i8>  @llvm.x86.mmx.psub.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.psub.w(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.x86.mmx.psub.d(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64>, <1 x i64>) nounwind readnone
+declare <8 x i8>  @llvm.x86.mmx.psubs.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i8>  @llvm.x86.mmx.psubus.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16>, <4 x i16>) nounwind readnone
+define void @sub(<8 x i8> %A,  <8 x i8> %B,  <4 x i16> %C, <4 x i16> %D,
+                 <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H) {
+  %r1 = call <8 x i8>  @llvm.x86.mmx.psub.b(<8 x i8> %A,  <8 x i8> %B)
+  %r2 = call <4 x i16> @llvm.x86.mmx.psub.w(<4 x i16> %C, <4 x i16> %D)
+  %r3 = call <2 x i32> @llvm.x86.mmx.psub.d(<2 x i32> %E, <2 x i32> %F)
+  %r4 = call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> %G, <1 x i64> %H)
+  %r5 = call <8 x i8>  @llvm.x86.mmx.psubs.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r6 = call <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16> %C, <4 x i16> %D)
+  %r7 = call <8 x i8>  @llvm.x86.mmx.psubus.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r8 = call <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16> %C, <4 x i16> %D)
+  ret void
+}
+
+; Multiplication
+declare <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16>, <4 x i16>) nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.pmull.w(<4 x i16>, <4 x i16>) nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.pmulhu.w(<4 x i16>, <4 x i16>) nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.pmulu.dq(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16>, <4 x i16>) nounwind readnone
+define void @mul(<4 x i16> %A, <4 x i16> %B) {
+  %r1 = call <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16> %A, <4 x i16> %B)
+  %r2 = call <4 x i16> @llvm.x86.mmx.pmull.w(<4 x i16> %A, <4 x i16> %B)
+  %r3 = call <4 x i16> @llvm.x86.mmx.pmulhu.w(<4 x i16> %A, <4 x i16> %B)
+  %r4 = call <4 x i16> @llvm.x86.mmx.pmulu.dq(<4 x i16> %A, <4 x i16> %B)
+  %r5 = call <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16> %A, <4 x i16> %B)
+  ret void
+}
+
+; Bitwise operations
+declare <1 x i64> @llvm.x86.mmx.pand(<1 x i64>, <1 x i64>)  nounwind readnone
+declare <1 x i64> @llvm.x86.mmx.pandn(<1 x i64>, <1 x i64>) nounwind readnone
+declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>)   nounwind readnone
+declare <1 x i64> @llvm.x86.mmx.pxor(<1 x i64>, <1 x i64>)  nounwind readnone
+define void @bit(<1 x i64> %A, <1 x i64> %B) {
+  %r1 = call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> %A, <1 x i64> %B)
+  %r2 = call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> %A, <1 x i64> %B)
+  %r3 = call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %A, <1 x i64> %B)
+  %r4 = call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> %A, <1 x i64> %B)
+  ret void
+}
+
+; Averages
+declare <8 x i8>  @llvm.x86.mmx.pavg.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.pavg.w(<4 x i16>, <4 x i16>) nounwind readnone
+define void @avg(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) {
+  %r1 = call <8 x i8>  @llvm.x86.mmx.pavg.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r2 = call <4 x i16> @llvm.x86.mmx.pavg.w(<4 x i16> %C, <4 x i16> %D)
+  ret void
+}
+
+; Maximum
+declare <8 x i8>  @llvm.x86.mmx.pmaxu.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.pmaxs.w(<4 x i16>, <4 x i16>) nounwind readnone
+define void @max(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) {
+  %r1 = call <8 x i8>  @llvm.x86.mmx.pmaxu.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r2 = call <4 x i16> @llvm.x86.mmx.pmaxs.w(<4 x i16> %C, <4 x i16> %D)
+  ret void
+}
+
+; Minimum
+declare <8 x i8>  @llvm.x86.mmx.pminu.b(<8 x i8>,  <8 x i8>)  nounwind readnone
+declare <4 x i16> @llvm.x86.mmx.pmins.w(<4 x i16>, <4 x i16>) nounwind readnone
+define void @min(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) {
+  %r1 = call <8 x i8>  @llvm.x86.mmx.pminu.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r2 = call <4 x i16> @llvm.x86.mmx.pmins.w(<4 x i16> %C, <4 x i16> %D)
+  ret void
+}
+
+; Packed sum of absolute differences
+declare <4 x i16> @llvm.x86.mmx.psad.bw(<8 x i8>, <8 x i8>) nounwind readnone
+define void @psad(<8 x i8> %A, <8 x i8> %B) {
+  %r1 = call <4 x i16> @llvm.x86.mmx.psad.bw(<8 x i8> %A, <8 x i8> %B)
+  ret void
+}
+
+; Shift left
+declare <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16>, <1 x i64>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32>, <1 x i64>) nounwind readnone 
+declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.pslli.w(<4 x i16>, i32) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.pslli.d(<2 x i32>, i32) nounwind readnone 
+declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone 
+define void @shl(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) {
+  %r1 = call <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16> %A, <1 x i64> %C)
+  %r2 = call <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32> %B, <1 x i64> %C)
+  %r3 = call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> %C, <1 x i64> %C)
+  %r4 = call <4 x i16> @llvm.x86.mmx.pslli.w(<4 x i16> %A, i32 %D)
+  %r5 = call <2 x i32> @llvm.x86.mmx.pslli.d(<2 x i32> %B, i32 %D)
+  %r6 = call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %C, i32 %D)
+  ret void
+}
+
+; Shift right logical
+declare <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16>, <1 x i64>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32>, <1 x i64>) nounwind readnone 
+declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16>, i32) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.psrli.d(<2 x i32>, i32) nounwind readnone 
+declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone 
+define void @shr(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) {
+  %r1 = call <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16> %A, <1 x i64> %C)
+  %r2 = call <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32> %B, <1 x i64> %C)
+  %r3 = call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %C, <1 x i64> %C)
+  %r4 = call <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16> %A, i32 %D)
+  %r5 = call <2 x i32> @llvm.x86.mmx.psrli.d(<2 x i32> %B, i32 %D)
+  %r6 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %C, i32 %D)
+  ret void
+}
+
+; Shift right arithmetic
+declare <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16>, <1 x i64>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <1 x i64>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.psrai.w(<4 x i16>, i32) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.psrai.d(<2 x i32>, i32) nounwind readnone 
+define void @sra(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) {
+  %r1 = call <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16> %A, <1 x i64> %C)
+  %r2 = call <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32> %B, <1 x i64> %C)
+  %r3 = call <4 x i16> @llvm.x86.mmx.psrai.w(<4 x i16> %A, i32 %D)
+  %r4 = call <2 x i32> @llvm.x86.mmx.psrai.d(<2 x i32> %B, i32 %D)
+  ret void
+}
+
+; Pack/Unpack ops
+declare <8 x i8>  @llvm.x86.mmx.packsswb(<4 x i16>, <4 x i16>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.packssdw(<2 x i32>, <2 x i32>) nounwind readnone 
+declare <8 x i8>  @llvm.x86.mmx.packuswb(<4 x i16>, <4 x i16>) nounwind readnone 
+declare <8 x i8>  @llvm.x86.mmx.punpckhbw(<8 x i8>, <8 x i8>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.punpckhwd(<4 x i16>, <4 x i16>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.punpckhdq(<2 x i32>, <2 x i32>) nounwind readnone 
+declare <8 x i8>  @llvm.x86.mmx.punpcklbw(<8 x i8>, <8 x i8>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.punpcklwd(<4 x i16>, <4 x i16>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.punpckldq(<2 x i32>, <2 x i32>) nounwind readnone 
+define void @pack_unpack(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
+                         <2 x i32> %E, <2 x i32> %F) {
+  %r1 = call <8 x i8>  @llvm.x86.mmx.packsswb(<4 x i16> %C, <4 x i16> %D)
+  %r2 = call <4 x i16> @llvm.x86.mmx.packssdw(<2 x i32> %E, <2 x i32> %F)
+  %r3 = call <8 x i8>  @llvm.x86.mmx.packuswb(<4 x i16> %C, <4 x i16> %D)
+  %r4 = call <8 x i8>  @llvm.x86.mmx.punpckhbw(<8 x i8>  %A, <8 x i8>  %B)
+  %r5 = call <4 x i16> @llvm.x86.mmx.punpckhwd(<4 x i16> %C, <4 x i16> %D)
+  %r6 = call <2 x i32> @llvm.x86.mmx.punpckhdq(<2 x i32> %E, <2 x i32> %F)
+  %r7 = call <8 x i8>  @llvm.x86.mmx.punpcklbw(<8 x i8>  %A, <8 x i8>  %B)
+  %r8 = call <4 x i16> @llvm.x86.mmx.punpcklwd(<4 x i16> %C, <4 x i16> %D)
+  %r9 = call <2 x i32> @llvm.x86.mmx.punpckldq(<2 x i32> %E, <2 x i32> %F)
+  ret void
+}
+
+; Integer comparison ops
+declare <8 x i8>  @llvm.x86.mmx.pcmpeq.b(<8 x i8>, <8 x i8>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.pcmpeq.w(<4 x i16>, <4 x i16>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.pcmpeq.d(<2 x i32>, <2 x i32>) nounwind readnone 
+declare <8 x i8>  @llvm.x86.mmx.pcmpgt.b(<8 x i8>, <8 x i8>) nounwind readnone 
+declare <4 x i16> @llvm.x86.mmx.pcmpgt.w(<4 x i16>, <4 x i16>) nounwind readnone 
+declare <2 x i32> @llvm.x86.mmx.pcmpgt.d(<2 x i32>, <2 x i32>) nounwind readnone 
+define void @cmp(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
+                 <2 x i32> %E, <2 x i32> %F) {
+  %r1 = call <8 x i8>  @llvm.x86.mmx.pcmpeq.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r2 = call <4 x i16> @llvm.x86.mmx.pcmpeq.w(<4 x i16> %C, <4 x i16> %D)
+  %r3 = call <2 x i32> @llvm.x86.mmx.pcmpeq.d(<2 x i32> %E, <2 x i32> %F)
+  %r4 = call <8 x i8>  @llvm.x86.mmx.pcmpgt.b(<8 x i8>  %A, <8 x i8>  %B)
+  %r5 = call <4 x i16> @llvm.x86.mmx.pcmpgt.w(<4 x i16> %C, <4 x i16> %D)
+  %r6 = call <2 x i32> @llvm.x86.mmx.pcmpgt.d(<2 x i32> %E, <2 x i32> %F)
+  ret void
+}
+
+; Miscellaneous
+declare void      @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i32*) nounwind readnone 
+declare i32       @llvm.x86.mmx.pmovmskb(<8 x i8>) nounwind readnone 
+declare void      @llvm.x86.mmx.movnt.dq(i32*, <1 x i64>) nounwind readnone 
+declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>,  i8) nounwind readnone 
+declare i32       @llvm.x86.mmx.pextr.w(<1 x i64>, i32) nounwind readnone 
+declare <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64>, i32, i32) nounwind readnone 
+declare <4 x i16> @llvm.x86.ssse3.pshuf.w(<4 x i16>, i32) nounwind readnone 
+define void @misc(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
+                  <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H,
+                  i32* %I, i8 %J, i16 %K, i32 %L) {
+        call void      @llvm.x86.mmx.maskmovq(<8 x i8> %A, <8 x i8> %B, i32* %I)
+  %r1 = call i32       @llvm.x86.mmx.pmovmskb(<8 x i8> %A)
+        call void      @llvm.x86.mmx.movnt.dq(i32* %I, <1 x i64> %G)
+  %r2 = call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %G, <1 x i64> %H, i8 %J)
+  %r3 = call i32       @llvm.x86.mmx.pextr.w(<1 x i64> %G, i32 37)
+  %r4 = call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %G, i32 37, i32 927)
+  %r5 = call <4 x i16> @llvm.x86.ssse3.pshuf.w(<4 x i16> %C, i32 37)
+  ret void
+}
diff --git a/final/test/Assembler/ConstantExprFold.ll b/final/test/Assembler/ConstantExprFold.ll
new file mode 100644
index 00000000000..d3d374a07cf
--- /dev/null
+++ b/final/test/Assembler/ConstantExprFold.ll
@@ -0,0 +1,31 @@
+; This test checks to make sure that constant exprs fold in some simple 
+; situations
+
+; RUN: llvm-as < %s | llvm-dis | not grep {(}
+
+@A = global i64 0
+
+global i64* inttoptr (i64 add (i64 ptrtoint (i64* @A to i64), i64 0) to i64*) ; X + 0 == X
+global i64* inttoptr (i64 sub (i64 ptrtoint (i64* @A to i64), i64 0) to i64*) ; X - 0 == X
+global i64* inttoptr (i64 mul (i64 ptrtoint (i64* @A to i64), i64 0) to i64*) ; X * 0 == 0
+global i64* inttoptr (i64 sdiv (i64 ptrtoint (i64* @A to i64), i64 1) to i64*) ; X / 1 == X
+global i64* inttoptr (i64 srem (i64 ptrtoint (i64* @A to i64), i64 1) to i64*) ; X % 1 == 0
+global i64* inttoptr (i64 and (i64 ptrtoint (i64* @A to i64), i64 0) to i64*) ; X & 0 == 0
+global i64* inttoptr (i64 and (i64 ptrtoint (i64* @A to i64), i64 -1) to i64*) ; X & -1 == X
+global i64 or (i64 ptrtoint (i64* @A to i64), i64 -1)  ; X | -1 == -1
+global i64* inttoptr (i64 xor (i64 ptrtoint (i64* @A to i64), i64 0) to i64*) ; X ^ 0 == X
+
+%Ty = type { i32, i32 }
+@B = external global %Ty 
+
+global i1 icmp slt (i64* @A, i64* getelementptr (i64* @A, i64 1))        ; true
+global i1 icmp ult (i64* @A, i64* getelementptr (i64* @A, i64 1))        ; true
+global i1 icmp slt (i64* @A, i64* getelementptr (i64* @A, i64 0))        ; false
+global i1 icmp slt (i32* getelementptr (%Ty* @B, i64 0, i32 0), 
+                   i32* getelementptr (%Ty* @B, i64 0, i32 1))            ; true
+;global i1 icmp ne (i64* @A, i64* bitcast (%Ty* @B to i64*))                 ; true
+
+; PR2206
+@cons = weak global i32 0, align 8              ; <i32*> [#uses=1]
+global i64 and (i64 ptrtoint (i32* @cons to i64), i64 7)
+
diff --git a/final/test/Assembler/ConstantExprFoldCast.ll b/final/test/Assembler/ConstantExprFoldCast.ll
new file mode 100644
index 00000000000..0ce6e84626a
--- /dev/null
+++ b/final/test/Assembler/ConstantExprFoldCast.ll
@@ -0,0 +1,14 @@
+; This test checks to make sure that constant exprs fold in some simple situations
+
+; RUN: llvm-as < %s | llvm-dis | not grep cast
+
+@A = global i32* bitcast (i8* null to i32*)  ; Cast null -> fold
+@B = global i32** bitcast (i32** @A to i32**)   ; Cast to same type -> fold
+@C = global i32 trunc (i64 42 to i32)        ; Integral casts
+@D = global i32* bitcast(float*  bitcast (i32* @C to float*) to i32*)  ; cast of cast ptr->ptr
+@E = global i32 ptrtoint(float* inttoptr (i8 5 to float*) to i32)  ; i32 -> ptr -> i32
+
+; Test folding of binary instrs
+@F = global i32* inttoptr (i32 add (i32 5, i32 -5) to i32*)
+@G = global i32* inttoptr (i32 sub (i32 5, i32 5) to i32*)
+
diff --git a/final/test/Assembler/MultipleReturnValueType.ll b/final/test/Assembler/MultipleReturnValueType.ll
new file mode 100644
index 00000000000..61771439454
--- /dev/null
+++ b/final/test/Assembler/MultipleReturnValueType.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s
+
+        %struct.S_102 = type { float, float }
+
+declare %struct.S_102 @f_102() nounwind
+
+@callthis = external global void ()*            ; <void ()**> [#uses=50]
+
+
+define void @foo() {
+        store void ()* bitcast (%struct.S_102 ()* @f_102 to void ()*), void ()** @callthis, align 8
+        ret void
+}
diff --git a/final/test/Assembler/aggregate-constant-values.ll b/final/test/Assembler/aggregate-constant-values.ll
new file mode 100644
index 00000000000..a37d03ebb38
--- /dev/null
+++ b/final/test/Assembler/aggregate-constant-values.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-as < %s | llvm-dis | grep 7 | count 3
+
+define void @foo({i32, i32}* %x) nounwind {
+  store {i32, i32}{i32 7, i32 9}, {i32, i32}* %x
+  ret void
+}
+define void @foo_empty({}* %x) nounwind {
+  store {}{}, {}* %x
+  ret void
+}
+define void @bar([2 x i32]* %x) nounwind {
+  store [2 x i32][i32 7, i32 9], [2 x i32]* %x
+  ret void
+}
+define void @bar_empty([0 x i32]* %x) nounwind {
+  store [0 x i32][], [0 x i32]* %x
+  ret void
+}
+define void @qux(<{i32, i32}>* %x) nounwind {
+  store <{i32, i32}><{i32 7, i32 9}>, <{i32, i32}>* %x
+  ret void
+}
+define void @qux_empty(<{}>* %x) nounwind {
+  store <{}><{}>, <{}>* %x
+  ret void
+}
+
diff --git a/final/test/Assembler/aggregate-return-single-value.ll b/final/test/Assembler/aggregate-return-single-value.ll
new file mode 100644
index 00000000000..02fb59f8a0a
--- /dev/null
+++ b/final/test/Assembler/aggregate-return-single-value.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | llvm-dis
+
+define { i32 } @fooa() nounwind {
+  ret i32 0
+}
+define { i32 } @foob() nounwind {
+  ret {i32}{ i32 0 }
+}
+define [1 x i32] @fooc() nounwind {
+  ret i32 0
+}
+define [1 x i32] @food() nounwind {
+  ret [1 x i32][ i32 0 ]
+}
diff --git a/final/test/Assembler/align-inst-alloca.ll b/final/test/Assembler/align-inst-alloca.ll
new file mode 100644
index 00000000000..0343bebf187
--- /dev/null
+++ b/final/test/Assembler/align-inst-alloca.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as %s -o /dev/null 2>/dev/null
+
+define void @foo() {
+  %p = alloca i1, align 1073741824
+  ret void
+}
diff --git a/final/test/Assembler/align-inst-load.ll b/final/test/Assembler/align-inst-load.ll
new file mode 100644
index 00000000000..3586be2d6e0
--- /dev/null
+++ b/final/test/Assembler/align-inst-load.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as %s -o /dev/null 2>/dev/null
+
+define void @foo() {
+  load i1* %p, align 1073741824
+  ret void
+}
diff --git a/final/test/Assembler/align-inst-store.ll b/final/test/Assembler/align-inst-store.ll
new file mode 100644
index 00000000000..8c3b7124b43
--- /dev/null
+++ b/final/test/Assembler/align-inst-store.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as %s -o /dev/null 2>/dev/null
+
+define void @foo() {
+  store i1 false, i1* %p, align 1073741824
+  ret void
+}
diff --git a/final/test/Assembler/align-inst.ll b/final/test/Assembler/align-inst.ll
new file mode 100644
index 00000000000..6f7100e065d
--- /dev/null
+++ b/final/test/Assembler/align-inst.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as %s -o /dev/null
+
+@A = global i1 0, align 536870912
+
+define void @foo() {
+  %p = alloca i1, align 536870912
+  load i1* %p, align 536870912
+  store i1 false, i1* %p, align 536870912
+  ret void
+}
diff --git a/final/test/Assembler/alignstack.ll b/final/test/Assembler/alignstack.ll
new file mode 100644
index 00000000000..9f2059f722f
--- /dev/null
+++ b/final/test/Assembler/alignstack.ll
@@ -0,0 +1,36 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+define void @test1() nounwind {
+; CHECK: test1
+; CHECK: sideeffect
+; CHECK-NOT: alignstack
+	tail call void asm sideeffect "mov", "~{dirflag},~{fpsr},~{flags}"() nounwind
+	ret void
+; CHECK: ret
+}
+define void @test2() nounwind {
+; CHECK: test2
+; CHECK: sideeffect
+; CHECK: alignstack
+	tail call void asm sideeffect alignstack "mov", "~{dirflag},~{fpsr},~{flags}"() nounwind
+	ret void
+; CHECK: ret
+}
+define void @test3() nounwind {
+; CHECK: test3
+; CHECK-NOT: sideeffect
+; CHECK: alignstack
+	tail call void asm alignstack "mov", "~{dirflag},~{fpsr},~{flags}"() nounwind
+	ret void
+; CHECK: ret
+}
+define void @test4() nounwind {
+; CHECK: test4
+; CHECK-NOT: sideeffect
+; CHECK-NOT: alignstack
+	tail call void asm  "mov", "~{dirflag},~{fpsr},~{flags}"() nounwind
+	ret void
+; CHECK: ret
+}
diff --git a/final/test/Assembler/anon-functions.ll b/final/test/Assembler/anon-functions.ll
new file mode 100644
index 00000000000..ac06e8ce305
--- /dev/null
+++ b/final/test/Assembler/anon-functions.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
+; PR3611
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@f = alias void ()* @0		; <void ()*> [#uses=0]
+@g = alias void ()* @1		; <void ()*> [#uses=0]
+@h = external global void ()* 		; <void ()*> [#uses=0]
+
+define internal void @0() nounwind {
+entry:
+  store void()* @0, void()** @h
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define internal void @1() nounwind {
+entry:
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Assembler/bcwrap.ll b/final/test/Assembler/bcwrap.ll
new file mode 100644
index 00000000000..859dc4bc80f
--- /dev/null
+++ b/final/test/Assembler/bcwrap.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s > %t
+; RUN: llvm-nm %t | grep foo
+; test for isBitcodeFile, llvm-nm must read from a file for this test
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9.2.2"
+
+define i32 @foo() {
+  ret i32 0
+}
diff --git a/final/test/Assembler/comment.ll b/final/test/Assembler/comment.ll
new file mode 100644
index 00000000000..fe23d26fbeb
--- /dev/null
+++ b/final/test/Assembler/comment.ll
@@ -0,0 +1,20 @@
+; RUN: llvm-as < %s | llvm-dis -show-annotations | FileCheck -check-prefix=ANNOT %s
+; RUN: llvm-as < %s | llvm-dis | FileCheck -check-prefix=BARE %s
+
+; The bare version of this file should not have any #uses lines.
+; BARE: @B =
+; BARE-NOT: #uses
+; BARE: }
+
+@B = external global i32
+; ANNOT: @B = external global i32   ; [#uses=0]
+
+define <4 x i1> @foo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+  %cmp = fcmp olt <4 x float> %a, %b              ; [#uses=1]
+  ret <4 x i1> %cmp
+}
+
+; ANNOT: %cmp = fcmp olt <4 x float> %a, %b              ; [#uses=1]
+
+
diff --git a/final/test/Assembler/dg.exp b/final/test/Assembler/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Assembler/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Assembler/extractvalue-invalid-idx.ll b/final/test/Assembler/extractvalue-invalid-idx.ll
new file mode 100644
index 00000000000..f9644eadbd5
--- /dev/null
+++ b/final/test/Assembler/extractvalue-invalid-idx.ll
@@ -0,0 +1,8 @@
+; RUN: not llvm-as < %s |& grep {invalid indices for extractvalue}
+; PR4170
+
+define void @test() {
+entry:
+        extractvalue [0 x i32] undef, 0
+        ret void
+}
diff --git a/final/test/Assembler/flags.ll b/final/test/Assembler/flags.ll
new file mode 100644
index 00000000000..310b807c5d5
--- /dev/null
+++ b/final/test/Assembler/flags.ll
@@ -0,0 +1,276 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+@addr = external global i64
+
+define i64 @add_unsigned(i64 %x, i64 %y) {
+; CHECK: %z = add nuw i64 %x, %y
+	%z = add nuw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @sub_unsigned(i64 %x, i64 %y) {
+; CHECK: %z = sub nuw i64 %x, %y
+	%z = sub nuw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @mul_unsigned(i64 %x, i64 %y) {
+; CHECK: %z = mul nuw i64 %x, %y
+	%z = mul nuw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @add_signed(i64 %x, i64 %y) {
+; CHECK: %z = add nsw i64 %x, %y
+	%z = add nsw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @sub_signed(i64 %x, i64 %y) {
+; CHECK: %z = sub nsw i64 %x, %y
+	%z = sub nsw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @mul_signed(i64 %x, i64 %y) {
+; CHECK: %z = mul nsw i64 %x, %y
+	%z = mul nsw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @add_plain(i64 %x, i64 %y) {
+; CHECK: %z = add i64 %x, %y
+	%z = add i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @sub_plain(i64 %x, i64 %y) {
+; CHECK: %z = sub i64 %x, %y
+	%z = sub i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @mul_plain(i64 %x, i64 %y) {
+; CHECK: %z = mul i64 %x, %y
+	%z = mul i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @add_both(i64 %x, i64 %y) {
+; CHECK: %z = add nuw nsw i64 %x, %y
+	%z = add nuw nsw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @sub_both(i64 %x, i64 %y) {
+; CHECK: %z = sub nuw nsw i64 %x, %y
+	%z = sub nuw nsw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @mul_both(i64 %x, i64 %y) {
+; CHECK: %z = mul nuw nsw i64 %x, %y
+	%z = mul nuw nsw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @add_both_reversed(i64 %x, i64 %y) {
+; CHECK: %z = add nuw nsw i64 %x, %y
+	%z = add nsw nuw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @sub_both_reversed(i64 %x, i64 %y) {
+; CHECK: %z = sub nuw nsw i64 %x, %y
+	%z = sub nsw nuw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @mul_both_reversed(i64 %x, i64 %y) {
+; CHECK: %z = mul nuw nsw i64 %x, %y
+	%z = mul nsw nuw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @shl_both(i64 %x, i64 %y) {
+; CHECK: %z = shl nuw nsw i64 %x, %y
+	%z = shl nuw nsw i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @sdiv_exact(i64 %x, i64 %y) {
+; CHECK: %z = sdiv exact i64 %x, %y
+	%z = sdiv exact i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @sdiv_plain(i64 %x, i64 %y) {
+; CHECK: %z = sdiv i64 %x, %y
+	%z = sdiv i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @udiv_exact(i64 %x, i64 %y) {
+; CHECK: %z = udiv exact i64 %x, %y
+	%z = udiv exact i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @udiv_plain(i64 %x, i64 %y) {
+; CHECK: %z = udiv i64 %x, %y
+	%z = udiv i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @ashr_plain(i64 %x, i64 %y) {
+; CHECK: %z = ashr i64 %x, %y
+	%z = ashr i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @ashr_exact(i64 %x, i64 %y) {
+; CHECK: %z = ashr exact i64 %x, %y
+	%z = ashr exact i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @lshr_plain(i64 %x, i64 %y) {
+; CHECK: %z = lshr i64 %x, %y
+	%z = lshr i64 %x, %y
+	ret i64 %z
+}
+
+define i64 @lshr_exact(i64 %x, i64 %y) {
+; CHECK: %z = lshr exact i64 %x, %y
+	%z = lshr exact i64 %x, %y
+	ret i64 %z
+}
+
+define i64* @gep_nw(i64* %p, i64 %x) {
+; CHECK: %z = getelementptr inbounds i64* %p, i64 %x
+	%z = getelementptr inbounds i64* %p, i64 %x
+        ret i64* %z
+}
+
+define i64* @gep_plain(i64* %p, i64 %x) {
+; CHECK: %z = getelementptr i64* %p, i64 %x
+	%z = getelementptr i64* %p, i64 %x
+        ret i64* %z
+}
+
+define i64 @add_both_ce() {
+; CHECK: ret i64 add nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 add nsw nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @sub_both_ce() {
+; CHECK: ret i64 sub nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 sub nsw nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @mul_both_ce() {
+; CHECK: ret i64 mul nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 mul nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @sdiv_exact_ce() {
+; CHECK: ret i64 sdiv exact (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 sdiv exact (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @udiv_exact_ce() {
+; CHECK: ret i64 udiv exact (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 udiv exact (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @ashr_exact_ce() {
+; CHECK: ret i64 ashr exact (i64 ptrtoint (i64* @addr to i64), i64 9)
+	ret i64 ashr exact (i64 ptrtoint (i64* @addr to i64), i64 9)
+}
+
+define i64 @lshr_exact_ce() {
+; CHECK: ret i64 lshr exact (i64 ptrtoint (i64* @addr to i64), i64 9)
+	ret i64 lshr exact (i64 ptrtoint (i64* @addr to i64), i64 9)
+}
+
+define i64* @gep_nw_ce() {
+; CHECK: ret i64* getelementptr inbounds (i64* @addr, i64 171)
+        ret i64* getelementptr inbounds (i64* @addr, i64 171)
+}
+
+define i64 @add_plain_ce() {
+; CHECK: ret i64 add (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 add (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @sub_plain_ce() {
+; CHECK: ret i64 sub (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 sub (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @mul_plain_ce() {
+; CHECK: ret i64 mul (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 mul (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @sdiv_plain_ce() {
+; CHECK: ret i64 sdiv (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 sdiv (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64* @gep_plain_ce() {
+; CHECK: ret i64* getelementptr (i64* @addr, i64 171)
+        ret i64* getelementptr (i64* @addr, i64 171)
+}
+
+define i64 @add_both_reversed_ce() {
+; CHECK: ret i64 add nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 add nsw nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @sub_both_reversed_ce() {
+; CHECK: ret i64 sub nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 sub nsw nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @mul_both_reversed_ce() {
+; CHECK: ret i64 mul nuw nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 mul nsw nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @add_signed_ce() {
+; CHECK: ret i64 add nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 add nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @sub_signed_ce() {
+; CHECK: ret i64 sub nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 sub nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @mul_signed_ce() {
+; CHECK: ret i64 mul nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 mul nsw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @shl_signed_ce() {
+; CHECK: ret i64 shl nsw (i64 ptrtoint (i64* @addr to i64), i64 17)
+	ret i64 shl nsw (i64 ptrtoint (i64* @addr to i64), i64 17)
+}
+
+
+define i64 @add_unsigned_ce() {
+; CHECK: ret i64 add nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 add nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @sub_unsigned_ce() {
+; CHECK: ret i64 sub nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 sub nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
+define i64 @mul_unsigned_ce() {
+; CHECK: ret i64 mul nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+	ret i64 mul nuw (i64 ptrtoint (i64* @addr to i64), i64 91)
+}
+
diff --git a/final/test/Assembler/functionlocal-metadata.ll b/final/test/Assembler/functionlocal-metadata.ll
new file mode 100644
index 00000000000..216587d98a7
--- /dev/null
+++ b/final/test/Assembler/functionlocal-metadata.ll
@@ -0,0 +1,44 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+define void @Foo(i32 %a, i32 %b) {
+entry:
+  call void @llvm.dbg.value(metadata !{ i32* %1 }, i64 16, metadata !"bar")
+; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata !"bar")
+  %0 = add i32 %a, 1                              ; <i32> [#uses=1]
+  %two = add i32 %b, %0                           ; <i32> [#uses=0]
+  %1 = alloca i32                                 ; <i32*> [#uses=1]
+
+  call void @llvm.dbg.declare(metadata !{i32* %1}, metadata !{i32* %1})
+; CHECK: metadata !{i32* %1}, metadata !{i32* %1}
+  call void @llvm.dbg.declare(metadata !{i32 %two}, metadata !{i32 %0})
+; CHECK: metadata !{i32 %two}, metadata !{i32 %0}
+  call void @llvm.dbg.declare(metadata !{i32 %0}, metadata !{i32* %1, i32 %0})
+; CHECK: metadata !{i32 %0}, metadata !{i32* %1, i32 %0}
+  call void @llvm.dbg.declare(metadata !{i32* %1}, metadata !{i32 %b, i32 %0})
+; CHECK: metadata !{i32* %1}, metadata !{i32 %b, i32 %0}
+  call void @llvm.dbg.declare(metadata !{i32 %a}, metadata !{i32 %a, metadata !"foo"})
+; CHECK: metadata !{i32 %a}, metadata !{i32 %a, metadata !"foo"}
+  call void @llvm.dbg.declare(metadata !{i32 %b}, metadata !{metadata !0, i32 %two})
+; CHECK: metadata !{i32 %b}, metadata !{metadata !0, i32 %two}
+
+  call void @llvm.dbg.value(metadata !{ i32 %a }, i64 0, metadata !1)
+; CHECK: metadata !{i32 %a}, i64 0, metadata !1
+  call void @llvm.dbg.value(metadata !{ i32 %0 }, i64 25, metadata !0)
+; CHECK: metadata !{i32 %0}, i64 25, metadata !0
+  call void @llvm.dbg.value(metadata !{ i32* %1 }, i64 16, metadata !"foo")
+; CHECK: call void @llvm.dbg.value(metadata !{i32* %1}, i64 16, metadata !"foo")
+  call void @llvm.dbg.value(metadata !"foo", i64 12, metadata !"bar")
+; CHECK: metadata !"foo", i64 12, metadata !"bar"
+
+  ret void, !foo !0, !bar !1
+; CHECK: ret void, !foo !0, !bar !1
+}
+
+!0 = metadata !{i32 662302, i32 26, metadata !1, null}
+!1 = metadata !{i32 4, metadata !"foo"}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!foo = !{ !0 }
+!bar = !{ !1 }
diff --git a/final/test/Assembler/getelementptr.ll b/final/test/Assembler/getelementptr.ll
new file mode 100644
index 00000000000..ebef58ff459
--- /dev/null
+++ b/final/test/Assembler/getelementptr.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s
+
+; Verify that over-indexed getelementptrs are folded.
+@A = external global [2 x [3 x [5 x [7 x i32]]]]
+@B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 0, i64 0, i64 2, i64 1, i64 7523)
+; CHECK: @B = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 36, i64 0, i64 1, i64 0, i64 5)
+@C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 3, i64 2, i64 0, i64 0, i64 7523)
+; CHECK: @C = global i32* getelementptr ([2 x [3 x [5 x [7 x i32]]]]* @A, i64 39, i64 1, i64 1, i64 4, i64 5)
+
+;; Verify that i16 indices work.
+@x = external global {i32, i32}
+@y = global i32* getelementptr ({i32, i32}* @x, i16 42, i32 0)
+; CHECK: @y = global i32* getelementptr (%0* @x, i16 42, i32 0)
+
+; see if i92 indices work too.
+define i32 *@test({i32, i32}* %t, i92 %n) {
+; CHECK: @test
+; CHECK: %B = getelementptr %0* %t, i92 %n, i32 0
+  %B = getelementptr {i32, i32}* %t, i92 %n, i32 0
+  ret i32* %B
+}
+
diff --git a/final/test/Assembler/getelementptr_struct.ll b/final/test/Assembler/getelementptr_struct.ll
new file mode 100644
index 00000000000..c8779a64077
--- /dev/null
+++ b/final/test/Assembler/getelementptr_struct.ll
@@ -0,0 +1,12 @@
+; RUN: not llvm-as < %s >/dev/null |& grep {invalid getelementptr indices}
+; Test the case of a incorrect indices type into struct
+
+%RT = type { i8 , [10 x [20 x i32]], i8  }
+%ST = type { i32, double, %RT }
+
+define i32* @foo(%ST* %s) {
+entry:
+  %reg = getelementptr %ST* %s, i32 1, i64 2, i32 1, i32 5, i32 13
+  ret i32* %reg
+}
+
diff --git a/final/test/Assembler/huge-array.ll b/final/test/Assembler/huge-array.ll
new file mode 100644
index 00000000000..e080947b994
--- /dev/null
+++ b/final/test/Assembler/huge-array.ll
@@ -0,0 +1,5 @@
+; RUN: llvm-as < %s | llvm-dis | grep 18446744073709551615 | count 2
+
+define [18446744073709551615 x i8]* @foo() {
+  ret [18446744073709551615 x i8]* null
+}
diff --git a/final/test/Assembler/insertextractvalue.ll b/final/test/Assembler/insertextractvalue.ll
new file mode 100644
index 00000000000..2f5521fba87
--- /dev/null
+++ b/final/test/Assembler/insertextractvalue.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as < %s | llvm-dis > %t
+; RUN: grep insertvalue %t | count 1
+; RUN: grep extractvalue %t | count 1
+
+define float @foo({{i32},{float, double}}* %p) nounwind {
+  %t = load {{i32},{float, double}}* %p
+  %s = extractvalue {{i32},{float, double}} %t, 1, 0
+  %r = insertvalue {{i32},{float, double}} %t, double 2.0, 1, 1
+  store {{i32},{float, double}} %r, {{i32},{float, double}}* %p
+  ret float %s
+}
+define float @bar({{i32},{float, double}}* %p) nounwind {
+  store {{i32},{float, double}} insertvalue ({{i32},{float, double}}{{i32}{i32 4},{float, double}{float 4.0, double 5.0}}, double 20.0, 1, 1), {{i32},{float, double}}* %p
+  ret float extractvalue ({{i32},{float, double}}{{i32}{i32 3},{float, double}{float 7.0, double 9.0}}, 1, 0)
+}
+define float @car({{i32},{float, double}}* %p) nounwind {
+  store {{i32},{float, double}} insertvalue ({{i32},{float, double}} undef, double 20.0, 1, 1), {{i32},{float, double}}* %p
+  ret float extractvalue ({{i32},{float, double}} undef, 1, 0)
+}
+define float @dar({{i32},{float, double}}* %p) nounwind {
+  store {{i32},{float, double}} insertvalue ({{i32},{float, double}} zeroinitializer, double 20.0, 1, 1), {{i32},{float, double}}* %p
+  ret float extractvalue ({{i32},{float, double}} zeroinitializer, 1, 0)
+}
+
+
+; PR4963
+define <{ i32, i32 }> @test57() {
+  ret <{ i32, i32 }> insertvalue (<{ i32, i32 }> zeroinitializer, i32 4, 1)
+}
diff --git a/final/test/Assembler/insertvalue-invalid-idx.ll b/final/test/Assembler/insertvalue-invalid-idx.ll
new file mode 100644
index 00000000000..86e7258cc59
--- /dev/null
+++ b/final/test/Assembler/insertvalue-invalid-idx.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s |& grep {invalid indices for insertvalue}
+
+define void @test() {
+entry:
+        insertvalue [0 x i32] undef, i32 0, 0
+        ret void
+}
diff --git a/final/test/Assembler/metadata.ll b/final/test/Assembler/metadata.ll
new file mode 100644
index 00000000000..50f27b4eb45
--- /dev/null
+++ b/final/test/Assembler/metadata.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep {ret void, !bar !1, !foo !0}
+define void @test() {
+  add i32 2, 1, !bar !0
+  add i32 1, 2, !foo !1
+  
+  call void @llvm.dbg.func.start(metadata !"foo")
+  
+  extractvalue {{i32, i32}, i32} undef, 0, 1, !foo !0
+  
+  ret void, !foo !0, !bar !1
+}
+
+!0 = metadata !{i32 662302, i32 26, metadata !1, null}
+!1 = metadata !{i32 4, metadata !"foo"}
+
+declare void @llvm.dbg.func.start(metadata) nounwind readnone
+
+
+!foo = !{ !0 }
+!bar = !{ !1 }
+
+; !foo = !{ !0, !"foo" }
diff --git a/final/test/Assembler/numbered-values.ll b/final/test/Assembler/numbered-values.ll
new file mode 100644
index 00000000000..2439c831a70
--- /dev/null
+++ b/final/test/Assembler/numbered-values.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
+; PR2480
+
+define i32 @test(i32 %X) nounwind {
+entry:
+	%X_addr = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%0 = alloca i32		; <i32*>:0 [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %X, i32* %X_addr
+	%1 = load i32* %X_addr, align 4		; <i32>:1 [#uses=1]
+	mul i32 %1, 4		; <i32>:2 [#uses=1]
+	%3 = add i32 %2, 123		; <i32>:3 [#uses=1]
+	store i32 %3, i32* %0, align 4
+	ret i32 %3
+}
diff --git a/final/test/Assembler/private.ll b/final/test/Assembler/private.ll
new file mode 100644
index 00000000000..37145727a58
--- /dev/null
+++ b/final/test/Assembler/private.ll
@@ -0,0 +1,9 @@
+; Test to make sure that the 'private' tag is not lost!
+;
+; RUN: llvm-as < %s | llvm-dis | grep private
+
+declare void @foo()
+
+define private void @foo() {
+        ret void
+}
diff --git a/final/test/Assembler/select.ll b/final/test/Assembler/select.ll
new file mode 100644
index 00000000000..2d3f412d256
--- /dev/null
+++ b/final/test/Assembler/select.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as %s -o /dev/null
+
+
+define i32 @test(i1 %C, i32 %V1, i32 %V2) {
+        %X = select i1 true, i1 false, i1 true          ; <i1> [#uses=1]
+        %V = select i1 %X, i32 %V1, i32 %V2             ; <i32> [#uses=1]
+        ret i32 %V
+}
+
diff --git a/final/test/Assembler/unnamed-addr.ll b/final/test/Assembler/unnamed-addr.ll
new file mode 100644
index 00000000000..3c94ca21308
--- /dev/null
+++ b/final/test/Assembler/unnamed-addr.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+%struct.foobar = type { i32 }
+
+@bar.d = internal unnamed_addr constant %struct.foobar zeroinitializer, align 4
+@foo.d = internal constant %struct.foobar zeroinitializer, align 4
+
+define i32 @main() unnamed_addr nounwind ssp {
+entry:
+  %call2 = tail call i32 @zed(%struct.foobar* @foo.d, %struct.foobar* @bar.d) nounwind
+  ret i32 0
+}
+
+declare i32 @zed(%struct.foobar*, %struct.foobar*)
+
+; CHECK: @bar.d = internal unnamed_addr constant %struct.foobar zeroinitializer, align 4
+; CHECK: @foo.d = internal constant %struct.foobar zeroinitializer, align 4
+; CHECK: define i32 @main() unnamed_addr nounwind ssp {
diff --git a/final/test/Assembler/unnamed.ll b/final/test/Assembler/unnamed.ll
new file mode 100644
index 00000000000..fb4fa6244e5
--- /dev/null
+++ b/final/test/Assembler/unnamed.ll
@@ -0,0 +1,51 @@
+; RUN: llvm-as < %s | llvm-dis
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+module asm "this is an inline asm block"
+module asm "this is another inline asm block"
+
+%0 = type { %1, %2 }
+%1 = type { i32 }
+%2 = type { float, double }
+
+@0 = global i32 0
+@1 = global float 3.0
+@2 = global i8* null
+@3 = global x86_fp80 0xK4001E000000000000000
+
+define float @foo(%0* %p) nounwind {
+  %t = load %0* %p                                ; <%0> [#uses=2]
+  %s = extractvalue %0 %t, 1, 0                   ; <float> [#uses=1]
+  %r = insertvalue %0 %t, double 2.000000e+00, 1, 1; <%0> [#uses=1]
+  store %0 %r, %0* %p
+  ret float %s
+}
+
+define float @bar(%0* %p) nounwind {
+  store %0 { %1 { i32 4 }, %2 { float 4.000000e+00, double 2.000000e+01 } }, %0* %p
+  ret float 7.000000e+00
+}
+
+define float @car(%0* %p) nounwind {
+  store %0 { %1 undef, %2 { float undef, double 2.000000e+01 } }, %0* %p
+  ret float undef
+}
+
+define float @dar(%0* %p) nounwind {
+  store %0 { %1 zeroinitializer, %2 { float 0.000000e+00, double 2.000000e+01 } }, %0* %p
+  ret float 0.000000e+00
+}
+
+define i32* @qqq() {
+  ret i32* @0
+}
+define float* @rrr() {
+  ret float* @1
+}
+define i8** @sss() {
+  ret i8** @2
+}
+define x86_fp80* @nnn() {
+  ret x86_fp80* @3
+}
diff --git a/final/test/Assembler/vbool-cmp.ll b/final/test/Assembler/vbool-cmp.ll
new file mode 100644
index 00000000000..ac8fb29362f
--- /dev/null
+++ b/final/test/Assembler/vbool-cmp.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep {icmp slt}
+; rudimentary test of fcmp/icmp on vectors returning vector of bool
+
+define <4 x i1> @ffoo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+	%cmp = fcmp olt <4 x float> %a, %b		; <4 x i1> [#uses=1]
+	ret <4 x i1> %cmp
+}
+
+define <4 x i1> @ifoo(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+	%cmp = icmp slt <4 x i32> %a, %b		; <4 x i1> [#uses=1]
+	ret <4 x i1> %cmp
+}
+
diff --git a/final/test/Assembler/vector-cmp.ll b/final/test/Assembler/vector-cmp.ll
new file mode 100644
index 00000000000..688369bb62b
--- /dev/null
+++ b/final/test/Assembler/vector-cmp.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep {global.*icmp slt}
+; PR2317
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9.2.2"
+
+define <4 x i1> @foo(<4 x float> %a, <4 x float> %b) nounwind  {
+entry:
+	%cmp = fcmp olt <4 x float> %a, %b		; <4 x i32> [#uses=1]
+	ret <4 x i1> %cmp
+}
+
+global <4 x i1> icmp slt ( <4 x i32> <i32 1, i32 1, i32 1, i32 1>, <4 x i32>  <i32 1, i32 2, i32 1, i32 2> )
+
+@B = external global i32
+
+global <4 x i1> icmp slt ( <4 x i32> <i32 ptrtoint (i32 * @B to i32), i32 1, i32 1, i32 1>, <4 x i32>  <i32 1, i32 2, i32 1, i32 2> )
diff --git a/final/test/Assembler/vector-select.ll b/final/test/Assembler/vector-select.ll
new file mode 100644
index 00000000000..87af602aaf5
--- /dev/null
+++ b/final/test/Assembler/vector-select.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep select
+; rudimentary test of select on vectors returning vector of bool
+
+define <4 x i32> @foo(<4 x i32> %a, <4 x i32> %b,
+    <4 x i1> %cond) nounwind  {
+entry:
+  %cmp = select <4 x i1>  %cond, <4 x i32> %a, <4 x i32> %b 
+                             ; <4 x i32> [#uses=1]
+  ret <4 x i32> %cmp
+}
+
diff --git a/final/test/Assembler/vector-shift.ll b/final/test/Assembler/vector-shift.ll
new file mode 100644
index 00000000000..1850e66e8ed
--- /dev/null
+++ b/final/test/Assembler/vector-shift.ll
@@ -0,0 +1,32 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep shl | count 1
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep ashr | count 1
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | grep lshr | count 1
+
+define <4 x i32> @foo(<4 x i32> %a, <4 x i32> %b) nounwind  {
+entry:
+	%cmp = shl <4 x i32> %a, %b		; <4 x i32> [#uses=1]
+	ret <4 x i32> %cmp
+}
+
+define <4 x i32> @bar(<4 x i32> %a, <4 x i32> %b) nounwind  {
+entry:
+	%cmp = lshr <4 x i32> %a, %b		; <4 x i32> [#uses=1]
+	ret <4 x i32> %cmp
+}
+
+define <4 x i32> @baz(<4 x i32> %a, <4 x i32> %b) nounwind  {
+entry:
+	%cmp = ashr <4 x i32> %a, %b		; <4 x i32> [#uses=1]
+	ret <4 x i32> %cmp
+}
+
+; Constant expressions: these should be folded.
+define <2 x i64> @foo_ce() nounwind {
+  ret <2 x i64> shl (<2 x i64> <i64 5, i64 6>, <2 x i64> <i64 3, i64 5>)
+}
+define <2 x i64> @bar_ce() nounwind {
+  ret <2 x i64> lshr (<2 x i64> <i64 340, i64 380>, <2 x i64> <i64 3, i64 5>)
+}
+define <2 x i64> @baz_ce() nounwind {
+  ret <2 x i64> ashr (<2 x i64> <i64 573, i64 411>, <2 x i64> <i64 3, i64 5>)
+}
diff --git a/final/test/Assembler/x86mmx.ll b/final/test/Assembler/x86mmx.ll
new file mode 100644
index 00000000000..732d3be8619
--- /dev/null
+++ b/final/test/Assembler/x86mmx.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+; Basic smoke test for x86_mmx type.
+
+; CHECK: define x86_mmx @sh16
+define x86_mmx  @sh16(x86_mmx %A) {
+; CHECK: ret x86_mmx %A
+        ret x86_mmx %A
+}
diff --git a/final/test/Bindings/Ocaml/analysis.ml b/final/test/Bindings/Ocaml/analysis.ml
new file mode 100644
index 00000000000..7df8e21203a
--- /dev/null
+++ b/final/test/Bindings/Ocaml/analysis.ml
@@ -0,0 +1,51 @@
+(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa %s -o %t
+ * RUN: %t
+ * XFAIL: vg_leak
+ *)
+
+open Llvm
+open Llvm_analysis
+
+(* Note that this takes a moment to link, so it's best to keep the number of
+   individual tests low. *)
+
+let context = global_context ()
+
+let test x = if not x then exit 1 else ()
+
+let bomb msg =
+  prerr_endline msg;
+  exit 2
+
+let _ =
+  let fty = function_type (void_type context) [| |] in
+  let m = create_module context "valid_m" in
+  let fn = define_function "valid_fn" fty m in
+  let at_entry = builder_at_end context (entry_block fn) in
+  ignore (build_ret_void at_entry);
+  
+  
+  (* Test that valid constructs verify. *)
+  begin match verify_module m with
+    Some msg -> bomb "valid module failed verification!"
+  | None -> ()
+  end;
+  
+  if not (verify_function fn) then bomb "valid function failed verification!";
+  
+  
+  (* Test that invalid constructs do not verify.
+     A basic block can contain only one terminator instruction. *)
+  ignore (build_ret_void at_entry);
+  
+  begin match verify_module m with
+    Some msg -> ()
+  | None -> bomb "invalid module passed verification!"
+  end;
+  
+  if verify_function fn then bomb "invalid function passed verification!";
+  
+  
+  dispose_module m
+  
+  (* Don't bother to test assert_valid_{module,function}. *)
diff --git a/final/test/Bindings/Ocaml/bitreader.ml b/final/test/Bindings/Ocaml/bitreader.ml
new file mode 100644
index 00000000000..a3bd91a0af2
--- /dev/null
+++ b/final/test/Bindings/Ocaml/bitreader.ml
@@ -0,0 +1,78 @@
+(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_bitwriter.cmxa %s -o %t
+ * RUN: %t %t.bc
+ * RUN: llvm-dis < %t.bc | grep caml_int_ty
+ * XFAIL: vg_leak
+ *)
+
+(* Note that this takes a moment to link, so it's best to keep the number of
+   individual tests low. *)
+
+let context = Llvm.global_context ()
+
+let test x = if not x then exit 1 else ()
+
+let _ =
+  let fn = Sys.argv.(1) in
+  let m = Llvm.create_module context "ocaml_test_module" in
+  
+  ignore (Llvm.define_type_name "caml_int_ty" (Llvm.i32_type context) m);
+  
+  test (Llvm_bitwriter.write_bitcode_file m fn);
+  
+  Llvm.dispose_module m;
+  
+  (* parse_bitcode *)
+  begin
+    let mb = Llvm.MemoryBuffer.of_file fn in
+    begin try
+      let m = Llvm_bitreader.parse_bitcode context mb in
+      Llvm.dispose_module m
+    with x ->
+      Llvm.MemoryBuffer.dispose mb;
+      raise x
+    end
+  end;
+  
+  (* MemoryBuffer.of_file *)
+  test begin try
+    let mb = Llvm.MemoryBuffer.of_file (fn ^ ".bogus") in
+    Llvm.MemoryBuffer.dispose mb;
+    false
+  with Llvm.IoError _ ->
+    true
+  end;
+  
+  (* get_module *)
+  begin
+    let mb = Llvm.MemoryBuffer.of_file fn in
+    let m = begin try
+      Llvm_bitreader.get_module context mb
+    with x ->
+      Llvm.MemoryBuffer.dispose mb;
+      raise x
+    end in
+    Llvm.dispose_module m
+  end;
+  
+  (* corrupt the bitcode *)
+  let fn = fn ^ ".txt" in
+  begin let oc = open_out fn in
+    output_string oc "not a bitcode file\n";
+    close_out oc
+  end;
+  
+  (* test get_module exceptions *)
+  test begin
+    try
+      let mb = Llvm.MemoryBuffer.of_file fn in
+      let m = begin try
+        Llvm_bitreader.get_module context mb
+      with x ->
+        Llvm.MemoryBuffer.dispose mb;
+        raise x
+      end in
+      Llvm.dispose_module m;
+      false
+    with Llvm_bitreader.Error _ ->
+      true
+  end
diff --git a/final/test/Bindings/Ocaml/bitwriter.ml b/final/test/Bindings/Ocaml/bitwriter.ml
new file mode 100644
index 00000000000..3f55fb9fa1d
--- /dev/null
+++ b/final/test/Bindings/Ocaml/bitwriter.ml
@@ -0,0 +1,47 @@
+(* RUN: %ocamlopt -warn-error A unix.cmxa llvm.cmxa llvm_bitwriter.cmxa %s -o %t
+ * RUN: %t %t.bc
+ * RUN: llvm-dis < %t.bc | grep caml_int_ty
+ * XFAIL: vg_leak
+ *)
+
+(* Note that this takes a moment to link, so it's best to keep the number of
+   individual tests low. *)
+
+let context = Llvm.global_context ()
+
+let test x = if not x then exit 1 else ()
+
+let read_file name =
+  let ic = open_in_bin name in
+  let len = in_channel_length ic in
+  let buf = String.create len in
+
+  test ((input ic buf 0 len) = len);
+
+  close_in ic;
+
+  buf
+
+let temp_bitcode ?unbuffered m =
+  let temp_name, temp_oc = Filename.open_temp_file ~mode:[Open_binary] "" "" in
+
+  test (Llvm_bitwriter.output_bitcode ?unbuffered temp_oc m);
+  flush temp_oc;
+
+  let temp_buf = read_file temp_name in
+
+  close_out temp_oc;
+
+  temp_buf
+
+let _ =
+  let m = Llvm.create_module context "ocaml_test_module" in
+  
+  ignore (Llvm.define_type_name "caml_int_ty" (Llvm.i32_type context) m);
+
+  test (Llvm_bitwriter.write_bitcode_file m Sys.argv.(1));
+  let file_buf = read_file Sys.argv.(1) in
+
+  test (file_buf = temp_bitcode m);
+  test (file_buf = temp_bitcode ~unbuffered:false m);
+  test (file_buf = temp_bitcode ~unbuffered:true m)
diff --git a/final/test/Bindings/Ocaml/dg.exp b/final/test/Bindings/Ocaml/dg.exp
new file mode 100644
index 00000000000..fb4bd078e37
--- /dev/null
+++ b/final/test/Bindings/Ocaml/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if [ llvm_supports_binding ocaml ] then {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,ml}]]
+}
diff --git a/final/test/Bindings/Ocaml/executionengine.ml b/final/test/Bindings/Ocaml/executionengine.ml
new file mode 100644
index 00000000000..f7a49bb284c
--- /dev/null
+++ b/final/test/Bindings/Ocaml/executionengine.ml
@@ -0,0 +1,115 @@
+(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa llvm_executionengine.cmxa %s -o %t
+ * RUN: %t
+ * XFAIL: vg_leak
+ *)
+
+open Llvm
+open Llvm_executionengine
+open Llvm_target
+
+(* Note that this takes a moment to link, so it's best to keep the number of
+   individual tests low. *)
+
+let context = global_context ()
+let i8_type = Llvm.i8_type context
+let i32_type = Llvm.i32_type context
+let i64_type = Llvm.i64_type context
+let double_type = Llvm.double_type context
+
+let bomb msg =
+  prerr_endline msg;
+  exit 2
+
+let define_main_fn m retval =
+  let fn =
+    let str_arr_type = pointer_type (pointer_type i8_type) in
+    define_function "main" (function_type i32_type [| i32_type;
+                                                      str_arr_type;
+                                                      str_arr_type |]) m in
+  let b = builder_at_end (global_context ()) (entry_block fn) in
+  ignore (build_ret (const_int i32_type retval) b);
+  fn
+
+let define_plus m =
+  let fn = define_function "plus" (function_type i32_type [| i32_type;
+                                                             i32_type |]) m in
+  let b = builder_at_end (global_context ()) (entry_block fn) in
+  let add = build_add (param fn 0) (param fn 1) "sum" b in
+  ignore (build_ret add b)
+
+let test_genericvalue () =
+  let tu = (1, 2) in
+  let ptrgv = GenericValue.of_pointer tu in
+  assert (tu = GenericValue.as_pointer ptrgv);
+  
+  let fpgv = GenericValue.of_float double_type 2. in
+  assert (2. = GenericValue.as_float double_type fpgv);
+  
+  let intgv = GenericValue.of_int i32_type 3 in
+  assert (3  = GenericValue.as_int intgv);
+  
+  let i32gv = GenericValue.of_int32 i32_type (Int32.of_int 4) in
+  assert ((Int32.of_int 4) = GenericValue.as_int32 i32gv);
+  
+  let nigv = GenericValue.of_nativeint i32_type (Nativeint.of_int 5) in
+  assert ((Nativeint.of_int 5) = GenericValue.as_nativeint nigv);
+  
+  let i64gv = GenericValue.of_int64 i64_type (Int64.of_int 6) in
+  assert ((Int64.of_int 6) = GenericValue.as_int64 i64gv)
+
+let test_executionengine () =
+  (* create *)
+  let m = create_module (global_context ()) "test_module" in
+  let main = define_main_fn m 42 in
+  
+  let m2 = create_module (global_context ()) "test_module2" in
+  define_plus m2;
+  
+  let ee = ExecutionEngine.create m in
+  ExecutionEngine.add_module m2 ee;
+  
+  (* run_static_ctors *)
+  ExecutionEngine.run_static_ctors ee;
+  
+  (* run_function_as_main *)
+  let res = ExecutionEngine.run_function_as_main main [|"test"|] [||] ee in
+  if 42 != res then bomb "main did not return 42";
+  
+  (* free_machine_code *)
+  ExecutionEngine.free_machine_code main ee;
+  
+  (* find_function *)
+  match ExecutionEngine.find_function "dne" ee with
+  | Some _ -> raise (Failure "find_function 'dne' failed")
+  | None ->
+  
+  match ExecutionEngine.find_function "plus" ee with
+  | None -> raise (Failure "find_function 'plus' failed")
+  | Some plus ->
+  
+  (* run_function *)
+  let res = ExecutionEngine.run_function plus
+                                         [| GenericValue.of_int i32_type 2;
+                                            GenericValue.of_int i32_type 2 |]
+                                         ee in
+  if 4 != GenericValue.as_int res then bomb "plus did not work";
+  
+  (* remove_module *)
+  Llvm.dispose_module (ExecutionEngine.remove_module m2 ee);
+  
+  (* run_static_dtors *)
+  ExecutionEngine.run_static_dtors ee;
+
+  (* Show that the target data binding links and runs.*)
+  let td = ExecutionEngine.target_data ee in
+
+  (* Demonstrate that a garbage pointer wasn't returned. *)
+  let ty = intptr_type td in
+  if ty != i32_type && ty != i64_type then bomb "target_data did not work";
+  
+  (* dispose *)
+  ExecutionEngine.dispose ee
+
+let _ =
+  test_genericvalue ();
+  test_executionengine ()
diff --git a/final/test/Bindings/Ocaml/ext_exc.ml b/final/test/Bindings/Ocaml/ext_exc.ml
new file mode 100644
index 00000000000..b4d2e6dc641
--- /dev/null
+++ b/final/test/Bindings/Ocaml/ext_exc.ml
@@ -0,0 +1,17 @@
+(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_executionengine.cmxa %s -o %t
+ * RUN: %t </dev/null
+ * XFAIL: vg_leak
+ *)
+let context = Llvm.global_context ()
+(* this used to crash, we must not use 'external' in .mli files, but 'val' if we
+ * want the let _ bindings executed, see http://caml.inria.fr/mantis/view.php?id=4166 *)
+let _ =
+    try
+        ignore (Llvm_bitreader.get_module context (Llvm.MemoryBuffer.of_stdin ()))
+    with
+    Llvm_bitreader.Error _ -> ();;
+let _ =
+    try
+        ignore (Llvm.MemoryBuffer.of_file "/path/to/nonexistent/file")
+    with
+    Llvm.IoError _ -> ();;
diff --git a/final/test/Bindings/Ocaml/scalar_opts.ml b/final/test/Bindings/Ocaml/scalar_opts.ml
new file mode 100644
index 00000000000..1ea97858edf
--- /dev/null
+++ b/final/test/Bindings/Ocaml/scalar_opts.ml
@@ -0,0 +1,78 @@
+(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_scalar_opts.cmxa llvm_target.cmxa %s -o %t
+ * RUN: %t %t.bc
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_scalar_opts
+open Llvm_target
+
+let context = global_context ()
+let void_type = Llvm.void_type context
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let suite name f =
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+
+(*===-- Fixture -----------------------------------------------------------===*)
+
+let filename = Sys.argv.(1)
+let m = create_module context filename
+
+
+(*===-- Transforms --------------------------------------------------------===*)
+
+let test_transforms () =
+  let (++) x f = ignore (f x); x in
+
+  let fty = function_type void_type [| |] in
+  let fn = define_function "fn" fty m in
+  ignore (build_ret_void (builder_at_end context (entry_block fn)));
+  
+  let td = TargetData.create (target_triple m) in
+  
+  ignore (PassManager.create_function m
+           ++ TargetData.add td
+           ++ add_constant_propagation
+           ++ add_sccp
+           ++ add_dead_store_elimination
+           ++ add_aggressive_dce
+           ++ add_scalar_repl_aggregation
+           ++ add_ind_var_simplification
+           ++ add_instruction_combination
+           ++ add_licm
+           ++ add_loop_unswitch
+           ++ add_loop_unroll
+           ++ add_loop_rotation
+           ++ add_memory_to_register_promotion
+           ++ add_memory_to_register_demotion
+           ++ add_reassociation
+           ++ add_jump_threading
+           ++ add_cfg_simplification
+           ++ add_tail_call_elimination
+           ++ add_gvn
+           ++ add_memcpy_opt
+           ++ add_loop_deletion
+           ++ add_lib_call_simplification
+           ++ PassManager.initialize
+           ++ PassManager.run_function fn
+           ++ PassManager.finalize
+           ++ PassManager.dispose);
+  
+  TargetData.dispose td
+
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "transforms" test_transforms;
+  dispose_module m
diff --git a/final/test/Bindings/Ocaml/target.ml b/final/test/Bindings/Ocaml/target.ml
new file mode 100644
index 00000000000..5e3ab4bf935
--- /dev/null
+++ b/final/test/Bindings/Ocaml/target.ml
@@ -0,0 +1,60 @@
+(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_target.cmxa %s -o %t
+ * RUN: %t %t.bc
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_target
+
+
+let context = global_context ()
+let i32_type = Llvm.i32_type context
+let i64_type = Llvm.i64_type context
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let print_checkpoints = false
+
+let suite name f =
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+
+(*===-- Fixture -----------------------------------------------------------===*)
+
+let filename = Sys.argv.(1)
+let m = create_module context filename
+
+
+(*===-- Target Data -------------------------------------------------------===*)
+
+let test_target_data () =
+  let td = TargetData.create (target_triple m) in
+  let sty = struct_type context [| i32_type; i64_type |] in
+  
+  ignore (TargetData.as_string td);
+  ignore (TargetData.invalidate_struct_layout td sty);
+  ignore (byte_order td);
+  ignore (pointer_size td);
+  ignore (intptr_type td);
+  ignore (size_in_bits td sty);
+  ignore (store_size td sty);
+  ignore (abi_size td sty);
+  ignore (stack_align td sty);
+  ignore (preferred_align td sty);
+  ignore (preferred_align_of_global td (declare_global sty "g" m));
+  ignore (element_at_offset td sty (Int64.of_int 1));
+  ignore (offset_of_element td sty 1);
+  
+  TargetData.dispose td
+
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "target data" test_target_data;
+  dispose_module m
diff --git a/final/test/Bindings/Ocaml/vmcore.ml b/final/test/Bindings/Ocaml/vmcore.ml
new file mode 100644
index 00000000000..ceb650e1ca4
--- /dev/null
+++ b/final/test/Bindings/Ocaml/vmcore.ml
@@ -0,0 +1,1331 @@
+(* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_analysis.cmxa llvm_bitwriter.cmxa %s -o %t
+ * RUN: %t %t.bc
+ * RUN: llvm-dis < %t.bc > %t.ll
+ * XFAIL: vg_leak
+ *)
+
+(* Note: It takes several seconds for ocamlopt to link an executable with
+         libLLVMCore.a, so it's better to write a big test than a bunch of
+         little ones. *)
+
+open Llvm
+open Llvm_bitwriter
+
+
+(* Tiny unit test framework - really just to help find which line is busted *)
+let exit_status = ref 0
+let suite_name = ref ""
+let group_name = ref ""
+let case_num = ref 0
+let print_checkpoints = false
+let context = global_context ()
+let i1_type = Llvm.i1_type context
+let i8_type = Llvm.i8_type context
+let i16_type = Llvm.i16_type context
+let i32_type = Llvm.i32_type context
+let i64_type = Llvm.i64_type context
+let void_type = Llvm.void_type context
+let float_type = Llvm.float_type context
+let double_type = Llvm.double_type context
+let fp128_type = Llvm.fp128_type context
+
+let group name =
+  group_name := !suite_name ^ "/" ^ name;
+  case_num := 0;
+  if print_checkpoints then
+    prerr_endline ("  " ^ name ^ "...")
+
+let insist cond =
+  incr case_num;
+  if not cond then
+    exit_status := 10;
+  match print_checkpoints, cond with
+  | false, true -> ()
+  | false, false ->
+      prerr_endline ("FAILED: " ^ !suite_name ^ "/" ^ !group_name ^ " #" ^ (string_of_int !case_num))
+  | true, true ->
+      prerr_endline ("    " ^ (string_of_int !case_num))
+  | true, false ->
+      prerr_endline ("    " ^ (string_of_int !case_num) ^ " FAIL")
+
+let suite name f =
+  suite_name := name;
+  if print_checkpoints then
+    prerr_endline (name ^ ":");
+  f ()
+
+
+(*===-- Fixture -----------------------------------------------------------===*)
+
+let filename = Sys.argv.(1)
+let m = create_module context filename
+
+
+(*===-- Target ------------------------------------------------------------===*)
+
+let test_target () =
+  begin group "triple";
+    (* RUN: grep "i686-apple-darwin8" < %t.ll
+     *)
+    let trip = "i686-apple-darwin8" in
+    set_target_triple trip m;
+    insist (trip = target_triple m)
+  end;
+  
+  begin group "layout";
+    (* RUN: grep "bogus" < %t.ll
+     *)
+    let layout = "bogus" in
+    set_data_layout layout m;
+    insist (layout = data_layout m)
+  end
+
+(*===-- Types -------------------------------------------------------------===*)
+
+let test_types () =
+  (* RUN: grep {void_type.*void} < %t.ll
+   *)
+  group "void";
+  insist (define_type_name "void_type" void_type m);
+  insist (TypeKind.Void == classify_type void_type);
+
+  (* RUN: grep {i1_type.*i1} < %t.ll
+   *)
+  group "i1";
+  insist (define_type_name "i1_type" i1_type m);
+  insist (TypeKind.Integer == classify_type i1_type);
+
+  (* RUN: grep {i32_type.*i32} < %t.ll
+   *)
+  group "i32";
+  insist (define_type_name "i32_type" i32_type m);
+
+  (* RUN: grep {i42_type.*i42} < %t.ll
+   *)
+  group "i42";
+  let ty = integer_type context 42 in
+  insist (define_type_name "i42_type" ty m);
+
+  (* RUN: grep {float_type.*float} < %t.ll
+   *)
+  group "float";
+  insist (define_type_name "float_type" float_type m);
+  insist (TypeKind.Float == classify_type float_type);
+
+  (* RUN: grep {double_type.*double} < %t.ll
+   *)
+  group "double";
+  insist (define_type_name "double_type" double_type m);
+  insist (TypeKind.Double == classify_type double_type);
+
+  (* RUN: grep {function_type.*i32.*i1, double} < %t.ll
+   *)
+  group "function";
+  let ty = function_type i32_type [| i1_type; double_type |] in
+  insist (define_type_name "function_type" ty m);
+  insist (TypeKind.Function = classify_type ty);
+  insist (not (is_var_arg ty));
+  insist (i32_type == return_type ty);
+  insist (double_type == (param_types ty).(1));
+  
+  (* RUN: grep {var_arg_type.*\.\.\.} < %t.ll
+   *)
+  group "var arg function";
+  let ty = var_arg_function_type void_type [| i32_type |] in
+  insist (define_type_name "var_arg_type" ty m);
+  insist (is_var_arg ty);
+  
+  (* RUN: grep {array_type.*\\\[7 x i8\\\]} < %t.ll
+   *)
+  group "array";
+  let ty = array_type i8_type 7 in
+  insist (define_type_name "array_type" ty m);
+  insist (7 = array_length ty);
+  insist (i8_type == element_type ty);
+  insist (TypeKind.Array == classify_type ty);
+  
+  begin group "pointer";
+    (* RUN: grep {pointer_type.*float\*} < %t.ll
+     *)
+    let ty = pointer_type float_type in
+    insist (define_type_name "pointer_type" ty m);
+    insist (float_type == element_type ty);
+    insist (0 == address_space ty);
+    insist (TypeKind.Pointer == classify_type ty)
+  end;
+  
+  begin group "qualified_pointer";
+    (* RUN: grep {qualified_pointer_type.*i8.*3.*\*} < %t.ll
+     *)
+    let ty = qualified_pointer_type i8_type 3 in
+    insist (define_type_name "qualified_pointer_type" ty m);
+    insist (i8_type == element_type ty);
+    insist (3 == address_space ty)
+  end;
+  
+  (* RUN: grep {vector_type.*\<4 x i16\>} < %t.ll
+   *)
+  group "vector";
+  let ty = vector_type i16_type 4 in
+  insist (define_type_name "vector_type" ty m);
+  insist (i16_type == element_type ty);
+  insist (4 = vector_size ty);
+  
+  (* RUN: grep {opaque_type.*opaque} < %t.ll
+   *)
+  group "opaque";
+  let ty = opaque_type context in
+  insist (define_type_name "opaque_type" ty m);
+  insist (ty == ty);
+  insist (ty <> opaque_type context);
+  
+  (* RUN: grep -v {delete_type} < %t.ll
+   *)
+  group "delete";
+  let ty = opaque_type context in
+  insist (define_type_name "delete_type" ty m);
+  delete_type_name "delete_type" m;
+
+  (* RUN: grep {type_name.*opaque} < %t.ll
+   *)
+  group "type_name"; begin
+    let ty = opaque_type context in
+    insist (define_type_name "type_name" ty m);
+    insist ((type_by_name m "type_name") = Some ty)
+  end;
+  
+  (* RUN: grep -v {recursive_type.*recursive_type} < %t.ll
+   *)
+  group "recursive";
+  let ty = opaque_type context in
+  let th = handle_to_type ty in
+  refine_type ty (pointer_type ty);
+  let ty = type_of_handle th in
+  insist (define_type_name "recursive_type" ty m);
+  insist (ty == element_type ty)
+
+
+(*===-- Constants ---------------------------------------------------------===*)
+
+let test_constants () =
+  (* RUN: grep {const_int.*i32.*-1} < %t.ll
+   *)
+  group "int";
+  let c = const_int i32_type (-1) in
+  ignore (define_global "const_int" c m);
+  insist (i32_type = type_of c);
+  insist (is_constant c);
+
+  (* RUN: grep {const_sext_int.*i64.*-1} < %t.ll
+   *)
+  group "sext int";
+  let c = const_int i64_type (-1) in
+  ignore (define_global "const_sext_int" c m);
+  insist (i64_type = type_of c);
+
+  (* RUN: grep {const_zext_int64.*i64.*4294967295} < %t.ll
+   *)
+  group "zext int64";
+  let c = const_of_int64 i64_type (Int64.of_string "4294967295") false in
+  ignore (define_global "const_zext_int64" c m);
+  insist (i64_type = type_of c);
+
+  (* RUN: grep {const_int_string.*i32.*-1} < %t.ll
+   *)
+  group "int string";
+  let c = const_int_of_string i32_type "-1" 10 in
+  ignore (define_global "const_int_string" c m);
+  insist (i32_type = type_of c);
+
+  (* RUN: grep {const_string.*"cruel\\\\00world"} < %t.ll
+   *)
+  group "string";
+  let c = const_string context "cruel\000world" in
+  ignore (define_global "const_string" c m);
+  insist ((array_type i8_type 11) = type_of c);
+
+  (* RUN: grep {const_stringz.*"hi\\\\00again\\\\00"} < %t.ll
+   *)
+  group "stringz";
+  let c = const_stringz context "hi\000again" in
+  ignore (define_global "const_stringz" c m);
+  insist ((array_type i8_type 9) = type_of c);
+
+  (* RUN: grep {const_single.*2.75} < %t.ll
+   * RUN: grep {const_double.*3.1459} < %t.ll
+   * RUN: grep {const_double_string.*1.25} < %t.ll
+   *)
+  begin group "real";
+    let cs = const_float float_type 2.75 in
+    ignore (define_global "const_single" cs m);
+    insist (float_type = type_of cs);
+    
+    let cd = const_float double_type 3.1459 in
+    ignore (define_global "const_double" cd m);
+    insist (double_type = type_of cd);
+
+    let cd = const_float_of_string double_type "1.25" in
+    ignore (define_global "const_double_string" cd m);
+    insist (double_type = type_of cd)
+  end;
+  
+  let one = const_int i16_type 1 in
+  let two = const_int i16_type 2 in
+  let three = const_int i32_type 3 in
+  let four = const_int i32_type 4 in
+  
+  (* RUN: grep {const_array.*\\\[i32 3, i32 4\\\]} < %t.ll
+   *)
+  group "array";
+  let c = const_array i32_type [| three; four |] in
+  ignore (define_global "const_array" c m);
+  insist ((array_type i32_type 2) = (type_of c));
+  
+  (* RUN: grep {const_vector.*<i16 1, i16 2.*>} < %t.ll
+   *)
+  group "vector";
+  let c = const_vector [| one; two; one; two;
+                          one; two; one; two |] in
+  ignore (define_global "const_vector" c m);
+  insist ((vector_type i16_type 8) = (type_of c));
+
+  (* RUN: grep {const_structure.*.i16 1, i16 2, i32 3, i32 4} < %t.ll
+   *)
+  group "structure";
+  let c = const_struct context [| one; two; three; four |] in
+  ignore (define_global "const_structure" c m);
+  insist ((struct_type context [| i16_type; i16_type; i32_type; i32_type |])
+        = (type_of c));
+
+  (* RUN: grep {const_null.*zeroinit} < %t.ll
+   *)
+  group "null";
+  let c = const_null (packed_struct_type context [| i1_type; i8_type; i64_type;
+                                                    double_type |]) in
+  ignore (define_global "const_null" c m);
+  
+  (* RUN: grep {const_all_ones.*-1} < %t.ll
+   *)
+  group "all ones";
+  let c = const_all_ones i64_type in
+  ignore (define_global "const_all_ones" c m);
+
+  group "pointer null"; begin
+    (* RUN: grep {const_pointer_null = global i64\\* null} < %t.ll
+     *)
+    let c = const_pointer_null (pointer_type i64_type) in
+    ignore (define_global "const_pointer_null" c m);
+  end;
+  
+  (* RUN: grep {const_undef.*undef} < %t.ll
+   *)
+  group "undef";
+  let c = undef i1_type in
+  ignore (define_global "const_undef" c m);
+  insist (i1_type = type_of c);
+  insist (is_undef c);
+  
+  group "constant arithmetic";
+  (* RUN: grep {@const_neg = global i64 sub} < %t.ll
+   * RUN: grep {@const_nsw_neg = global i64 sub nsw } < %t.ll
+   * RUN: grep {@const_nuw_neg = global i64 sub nuw } < %t.ll
+   * RUN: grep {@const_fneg = global double fsub } < %t.ll
+   * RUN: grep {@const_not = global i64 xor } < %t.ll
+   * RUN: grep {@const_add = global i64 add } < %t.ll
+   * RUN: grep {@const_nsw_add = global i64 add nsw } < %t.ll
+   * RUN: grep {@const_nuw_add = global i64 add nuw } < %t.ll
+   * RUN: grep {@const_fadd = global double fadd } < %t.ll
+   * RUN: grep {@const_sub = global i64 sub } < %t.ll
+   * RUN: grep {@const_nsw_sub = global i64 sub nsw } < %t.ll
+   * RUN: grep {@const_nuw_sub = global i64 sub nuw } < %t.ll
+   * RUN: grep {@const_fsub = global double fsub } < %t.ll
+   * RUN: grep {@const_mul = global i64 mul } < %t.ll
+   * RUN: grep {@const_nsw_mul = global i64 mul nsw } < %t.ll
+   * RUN: grep {@const_nuw_mul = global i64 mul nuw } < %t.ll
+   * RUN: grep {@const_fmul = global double fmul } < %t.ll
+   * RUN: grep {@const_udiv = global i64 udiv } < %t.ll
+   * RUN: grep {@const_sdiv = global i64 sdiv } < %t.ll
+   * RUN: grep {@const_exact_sdiv = global i64 sdiv exact } < %t.ll
+   * RUN: grep {@const_fdiv = global double fdiv } < %t.ll
+   * RUN: grep {@const_urem = global i64 urem } < %t.ll
+   * RUN: grep {@const_srem = global i64 srem } < %t.ll
+   * RUN: grep {@const_frem = global double frem } < %t.ll
+   * RUN: grep {@const_and = global i64 and } < %t.ll
+   * RUN: grep {@const_or = global i64 or } < %t.ll
+   * RUN: grep {@const_xor = global i64 xor } < %t.ll
+   * RUN: grep {@const_icmp = global i1 icmp sle } < %t.ll
+   * RUN: grep {@const_fcmp = global i1 fcmp ole } < %t.ll
+   *)
+  let void_ptr = pointer_type i8_type in
+  let five = const_int i64_type 5 in
+  let ffive = const_uitofp five double_type in
+  let foldbomb_gv = define_global "FoldBomb" (const_null i8_type) m in
+  let foldbomb = const_ptrtoint foldbomb_gv i64_type in
+  let ffoldbomb = const_uitofp foldbomb double_type in
+  ignore (define_global "const_neg" (const_neg foldbomb) m);
+  ignore (define_global "const_nsw_neg" (const_nsw_neg foldbomb) m);
+  ignore (define_global "const_nuw_neg" (const_nuw_neg foldbomb) m);
+  ignore (define_global "const_fneg" (const_fneg ffoldbomb) m);
+  ignore (define_global "const_not" (const_not foldbomb) m);
+  ignore (define_global "const_add" (const_add foldbomb five) m);
+  ignore (define_global "const_nsw_add" (const_nsw_add foldbomb five) m);
+  ignore (define_global "const_nuw_add" (const_nuw_add foldbomb five) m);
+  ignore (define_global "const_fadd" (const_fadd ffoldbomb ffive) m);
+  ignore (define_global "const_sub" (const_sub foldbomb five) m);
+  ignore (define_global "const_nsw_sub" (const_nsw_sub foldbomb five) m);
+  ignore (define_global "const_nuw_sub" (const_nuw_sub foldbomb five) m);
+  ignore (define_global "const_fsub" (const_fsub ffoldbomb ffive) m);
+  ignore (define_global "const_mul" (const_mul foldbomb five) m);
+  ignore (define_global "const_nsw_mul" (const_nsw_mul foldbomb five) m);
+  ignore (define_global "const_nuw_mul" (const_nuw_mul foldbomb five) m);
+  ignore (define_global "const_fmul" (const_fmul ffoldbomb ffive) m);
+  ignore (define_global "const_udiv" (const_udiv foldbomb five) m);
+  ignore (define_global "const_sdiv" (const_sdiv foldbomb five) m);
+  ignore (define_global "const_exact_sdiv" (const_exact_sdiv foldbomb five) m);
+  ignore (define_global "const_fdiv" (const_fdiv ffoldbomb ffive) m);
+  ignore (define_global "const_urem" (const_urem foldbomb five) m);
+  ignore (define_global "const_srem" (const_srem foldbomb five) m);
+  ignore (define_global "const_frem" (const_frem ffoldbomb ffive) m);
+  ignore (define_global "const_and" (const_and foldbomb five) m);
+  ignore (define_global "const_or" (const_or foldbomb five) m);
+  ignore (define_global "const_xor" (const_xor foldbomb five) m);
+  ignore (define_global "const_icmp" (const_icmp Icmp.Sle foldbomb five) m);
+  ignore (define_global "const_fcmp" (const_fcmp Fcmp.Ole ffoldbomb ffive) m);
+  
+  group "constant casts";
+  (* RUN: grep {const_trunc.*trunc} < %t.ll
+   * RUN: grep {const_sext.*sext} < %t.ll
+   * RUN: grep {const_zext.*zext} < %t.ll
+   * RUN: grep {const_fptrunc.*fptrunc} < %t.ll
+   * RUN: grep {const_fpext.*fpext} < %t.ll
+   * RUN: grep {const_uitofp.*uitofp} < %t.ll
+   * RUN: grep {const_sitofp.*sitofp} < %t.ll
+   * RUN: grep {const_fptoui.*fptoui} < %t.ll
+   * RUN: grep {const_fptosi.*fptosi} < %t.ll
+   * RUN: grep {const_ptrtoint.*ptrtoint} < %t.ll
+   * RUN: grep {const_inttoptr.*inttoptr} < %t.ll
+   * RUN: grep {const_bitcast.*bitcast} < %t.ll
+   *)
+  let i128_type = integer_type context 128 in
+  ignore (define_global "const_trunc" (const_trunc (const_add foldbomb five)
+                                               i8_type) m);
+  ignore (define_global "const_sext" (const_sext foldbomb i128_type) m);
+  ignore (define_global "const_zext" (const_zext foldbomb i128_type) m);
+  ignore (define_global "const_fptrunc" (const_fptrunc ffoldbomb float_type) m);
+  ignore (define_global "const_fpext" (const_fpext ffoldbomb fp128_type) m);
+  ignore (define_global "const_uitofp" (const_uitofp foldbomb double_type) m);
+  ignore (define_global "const_sitofp" (const_sitofp foldbomb double_type) m);
+  ignore (define_global "const_fptoui" (const_fptoui ffoldbomb i32_type) m);
+  ignore (define_global "const_fptosi" (const_fptosi ffoldbomb i32_type) m);
+  ignore (define_global "const_ptrtoint" (const_ptrtoint 
+    (const_gep (const_null (pointer_type i8_type))
+               [| const_int i32_type 1 |])
+    i32_type) m);
+  ignore (define_global "const_inttoptr" (const_inttoptr (const_add foldbomb five)
+                                                  void_ptr) m);
+  ignore (define_global "const_bitcast" (const_bitcast ffoldbomb i64_type) m);
+  
+  group "misc constants";
+  (* RUN: grep {const_size_of.*getelementptr.*null} < %t.ll
+   * RUN: grep {const_gep.*getelementptr} < %t.ll
+   * RUN: grep {const_select.*select} < %t.ll
+   * RUN: grep {const_extractelement.*extractelement} < %t.ll
+   * RUN: grep {const_insertelement.*insertelement} < %t.ll
+   * RUN: grep {const_shufflevector = global <4 x i32> <i32 0, i32 1, i32 1, i32 0>} < %t.ll
+   *)
+  ignore (define_global "const_size_of" (size_of (pointer_type i8_type)) m);
+  ignore (define_global "const_gep" (const_gep foldbomb_gv [| five |]) m);
+  ignore (define_global "const_select" (const_select
+    (const_icmp Icmp.Sle foldbomb five)
+    (const_int i8_type (-1))
+    (const_int i8_type 0)) m);
+  let zero = const_int i32_type 0 in
+  let one  = const_int i32_type 1 in
+  ignore (define_global "const_extractelement" (const_extractelement
+    (const_vector [| zero; one; zero; one |])
+    (const_trunc foldbomb i32_type)) m);
+  ignore (define_global "const_insertelement" (const_insertelement
+    (const_vector [| zero; one; zero; one |])
+    zero (const_trunc foldbomb i32_type)) m);
+  ignore (define_global "const_shufflevector" (const_shufflevector
+    (const_vector [| zero; one |])
+    (const_vector [| one; zero |])
+    (const_vector [| const_int i32_type 0; const_int i32_type 1;
+                     const_int i32_type 2; const_int i32_type 3 |])) m);
+
+  group "asm"; begin
+    let ft = function_type void_type [| i32_type; i32_type; i32_type |] in
+    ignore (const_inline_asm
+      ft
+      ""
+      "{cx},{ax},{di},~{dirflag},~{fpsr},~{flags},~{edi},~{ecx}"
+      true
+      false)
+  end
+
+
+(*===-- Global Values -----------------------------------------------------===*)
+
+let test_global_values () =
+  let (++) x f = f x; x in
+  let zero32 = const_null i32_type in
+
+  (* RUN: grep {GVal01} < %t.ll
+   *)
+  group "naming";
+  let g = define_global "TEMPORARY" zero32 m in
+  insist ("TEMPORARY" = value_name g);
+  set_value_name "GVal01" g;
+  insist ("GVal01" = value_name g);
+
+  (* RUN: grep {GVal02.*linkonce} < %t.ll
+   *)
+  group "linkage";
+  let g = define_global "GVal02" zero32 m ++
+          set_linkage Linkage.Link_once in
+  insist (Linkage.Link_once = linkage g);
+
+  (* RUN: grep {GVal03.*Hanalei} < %t.ll
+   *)
+  group "section";
+  let g = define_global "GVal03" zero32 m ++
+          set_section "Hanalei" in
+  insist ("Hanalei" = section g);
+  
+  (* RUN: grep {GVal04.*hidden} < %t.ll
+   *)
+  group "visibility";
+  let g = define_global "GVal04" zero32 m ++
+          set_visibility Visibility.Hidden in
+  insist (Visibility.Hidden = visibility g);
+  
+  (* RUN: grep {GVal05.*align 128} < %t.ll
+   *)
+  group "alignment";
+  let g = define_global "GVal05" zero32 m ++
+          set_alignment 128 in
+  insist (128 = alignment g)
+
+
+(*===-- Global Variables --------------------------------------------------===*)
+
+let test_global_variables () =
+  let (++) x f = f x; x in
+  let fourty_two32 = const_int i32_type 42 in
+
+  group "declarations"; begin
+    (* RUN: grep {GVar01.*external} < %t.ll
+     *)
+    insist (None == lookup_global "GVar01" m);
+    let g = declare_global i32_type "GVar01" m in
+    insist (is_declaration g);
+    insist (pointer_type float_type ==
+              type_of (declare_global float_type "GVar01" m));
+    insist (g == declare_global i32_type "GVar01" m);
+    insist (match lookup_global "GVar01" m with Some x -> x = g
+                                              | None -> false);
+
+    insist (None == lookup_global "QGVar01" m);
+    let g = declare_qualified_global i32_type "QGVar01" 3 m in
+    insist (is_declaration g);
+    insist (qualified_pointer_type float_type 3 ==
+              type_of (declare_qualified_global float_type "QGVar01" 3 m));
+    insist (g == declare_qualified_global i32_type "QGVar01" 3 m);
+    insist (match lookup_global "QGVar01" m with Some x -> x = g
+                                              | None -> false);
+  end;
+  
+  group "definitions"; begin
+    (* RUN: grep {GVar02.*42} < %t.ll
+     * RUN: grep {GVar03.*42} < %t.ll
+     *)
+    let g = define_global "GVar02" fourty_two32 m in
+    let g2 = declare_global i32_type "GVar03" m ++
+           set_initializer fourty_two32 in
+    insist (not (is_declaration g));
+    insist (not (is_declaration g2));
+    insist ((global_initializer g) == (global_initializer g2));
+
+    let g = define_qualified_global "QGVar02" fourty_two32 3 m in
+    let g2 = declare_qualified_global i32_type "QGVar03" 3 m ++
+           set_initializer fourty_two32 in
+    insist (not (is_declaration g));
+    insist (not (is_declaration g2));
+    insist ((global_initializer g) == (global_initializer g2));
+  end;
+
+  (* RUN: grep {GVar04.*thread_local} < %t.ll
+   *)
+  group "threadlocal";
+  let g = define_global "GVar04" fourty_two32 m ++
+          set_thread_local true in
+  insist (is_thread_local g);
+
+  (* RUN: grep -v {GVar05} < %t.ll
+   *)
+  group "delete";
+  let g = define_global "GVar05" fourty_two32 m in
+  delete_global g;
+
+  (* RUN: grep -v {ConstGlobalVar.*constant} < %t.ll
+   *)
+  group "constant";
+  let g = define_global "ConstGlobalVar" fourty_two32 m in
+  insist (not (is_global_constant g));
+  set_global_constant true g;
+  insist (is_global_constant g);
+  
+  begin group "iteration";
+    let m = create_module context "temp" in
+    
+    insist (At_end m = global_begin m);
+    insist (At_start m = global_end m);
+    
+    let g1 = declare_global i32_type "One" m in
+    let g2 = declare_global i32_type "Two" m in
+    
+    insist (Before g1 = global_begin m);
+    insist (Before g2 = global_succ g1);
+    insist (At_end m = global_succ g2);
+    
+    insist (After g2 = global_end m);
+    insist (After g1 = global_pred g2);
+    insist (At_start m = global_pred g1);
+    
+    let lf s x = s ^ "->" ^ value_name x in
+    insist ("->One->Two" = fold_left_globals lf "" m);
+    
+    let rf x s = value_name x ^ "<-" ^ s in
+    insist ("One<-Two<-" = fold_right_globals rf m "");
+    
+    dispose_module m
+  end
+
+
+(*===-- Uses --------------------------------------------------------------===*)
+
+let test_uses () =
+  let ty = function_type i32_type [| i32_type; i32_type |] in
+  let fn = define_function "use_function" ty m in
+  let b = builder_at_end context (entry_block fn) in
+
+  let p1 = param fn 0 in
+  let p2 = param fn 1 in
+  let v1 = build_add p1 p2 "v1" b in
+  let v2 = build_add p1 v1 "v2" b in
+  let _ = build_add v1 v2 "v3" b in
+
+  let lf s u = value_name (user u) ^ "->" ^ s in
+  insist ("v2->v3->" = fold_left_uses lf "" v1);
+  let rf u s = value_name (user u) ^ "<-" ^ s in
+  insist ("v3<-v2<-" = fold_right_uses rf v1 "");
+
+  let lf s u = value_name (used_value u) ^ "->" ^ s in
+  insist ("v1->v1->" = fold_left_uses lf "" v1);
+
+  let rf u s = value_name (used_value u) ^ "<-" ^ s in
+  insist ("v1<-v1<-" = fold_right_uses rf v1 "");
+
+  ignore (build_unreachable b)
+
+
+(*===-- Users -------------------------------------------------------------===*)
+
+let test_users () =
+  let ty = function_type i32_type [| i32_type; i32_type |] in
+  let fn = define_function "user_function" ty m in
+  let b = builder_at_end context (entry_block fn) in
+
+  let p1 = param fn 0 in
+  let p2 = param fn 1 in
+  let a3 = build_alloca i32_type "user_alloca" b in
+  let p3 = build_load a3 "user_load" b in
+  let i = build_add p1 p2 "sum" b in
+
+  insist ((num_operands i) = 2);
+  insist ((operand i 0) = p1);
+  insist ((operand i 1) = p2);
+
+  set_operand i 1 p3;
+  insist ((operand i 1) != p2);
+  insist ((operand i 1) = p3);
+
+  ignore (build_unreachable b)
+
+
+(*===-- Aliases -----------------------------------------------------------===*)
+
+let test_aliases () =
+  (* RUN: grep {@alias = alias i32\\* @aliasee} < %t.ll
+   *)
+  let v = declare_global i32_type "aliasee" m in
+  ignore (add_alias m (pointer_type i32_type) v "alias")
+
+
+(*===-- Functions ---------------------------------------------------------===*)
+
+let test_functions () =
+  let ty = function_type i32_type [| i32_type; i64_type |] in
+  let ty2 = function_type i8_type [| i8_type; i64_type |] in
+  
+  (* RUN: grep {declare i32 @Fn1\(i32, i64\)} < %t.ll
+   *)
+  begin group "declare";
+    insist (None = lookup_function "Fn1" m);
+    let fn = declare_function "Fn1" ty m in
+    insist (pointer_type ty = type_of fn);
+    insist (is_declaration fn);
+    insist (0 = Array.length (basic_blocks fn));
+    insist (pointer_type ty2 == type_of (declare_function "Fn1" ty2 m));
+    insist (fn == declare_function "Fn1" ty m);
+    insist (None <> lookup_function "Fn1" m);
+    insist (match lookup_function "Fn1" m with Some x -> x = fn
+                                             | None -> false);
+    insist (m == global_parent fn)
+  end;
+  
+  (* RUN: grep -v {Fn2} < %t.ll
+   *)
+  group "delete";
+  let fn = declare_function "Fn2" ty m in
+  delete_function fn;
+  
+  (* RUN: grep {define.*Fn3} < %t.ll
+   *)
+  group "define";
+  let fn = define_function "Fn3" ty m in
+  insist (not (is_declaration fn));
+  insist (1 = Array.length (basic_blocks fn));
+  ignore (build_unreachable (builder_at_end context (entry_block fn)));
+  
+  (* RUN: grep {define.*Fn4.*Param1.*Param2} < %t.ll
+   *)
+  group "params";
+  let fn = define_function "Fn4" ty m in
+  let params = params fn in
+  insist (2 = Array.length params);
+  insist (params.(0) = param fn 0);
+  insist (params.(1) = param fn 1);
+  insist (i32_type = type_of params.(0));
+  insist (i64_type = type_of params.(1));
+  set_value_name "Param1" params.(0);
+  set_value_name "Param2" params.(1);
+  ignore (build_unreachable (builder_at_end context (entry_block fn)));
+  
+  (* RUN: grep {fastcc.*Fn5} < %t.ll
+   *)
+  group "callconv";
+  let fn = define_function "Fn5" ty m in
+  insist (CallConv.c = function_call_conv fn);
+  set_function_call_conv CallConv.fast fn;
+  insist (CallConv.fast = function_call_conv fn);
+  ignore (build_unreachable (builder_at_end context (entry_block fn)));
+  
+  begin group "gc";
+    (* RUN: grep {Fn6.*gc.*shadowstack} < %t.ll
+     *)
+    let fn = define_function "Fn6" ty m in
+    insist (None = gc fn);
+    set_gc (Some "ocaml") fn;
+    insist (Some "ocaml" = gc fn);
+    set_gc None fn;
+    insist (None = gc fn);
+    set_gc (Some "shadowstack") fn;
+    ignore (build_unreachable (builder_at_end context (entry_block fn)));
+  end;
+  
+  begin group "iteration";
+    let m = create_module context "temp" in
+    
+    insist (At_end m = function_begin m);
+    insist (At_start m = function_end m);
+    
+    let f1 = define_function "One" ty m in
+    let f2 = define_function "Two" ty m in
+    
+    insist (Before f1 = function_begin m);
+    insist (Before f2 = function_succ f1);
+    insist (At_end m = function_succ f2);
+    
+    insist (After f2 = function_end m);
+    insist (After f1 = function_pred f2);
+    insist (At_start m = function_pred f1);
+    
+    let lf s x = s ^ "->" ^ value_name x in
+    insist ("->One->Two" = fold_left_functions lf "" m);
+    
+    let rf x s = value_name x ^ "<-" ^ s in
+    insist ("One<-Two<-" = fold_right_functions rf m "");
+    
+    dispose_module m
+  end
+
+
+(*===-- Params ------------------------------------------------------------===*)
+
+let test_params () =
+  begin group "iteration";
+    let m = create_module context "temp" in
+    
+    let vf = define_function "void" (function_type void_type [| |]) m in
+    
+    insist (At_end vf = param_begin vf);
+    insist (At_start vf = param_end vf);
+    
+    let ty = function_type void_type [| i32_type; i32_type |] in
+    let f = define_function "f" ty m in
+    let p1 = param f 0 in
+    let p2 = param f 1 in
+    set_value_name "One" p1;
+    set_value_name "Two" p2;
+    add_param_attr p1 Attribute.Sext;
+    add_param_attr p2 Attribute.Noalias;
+    remove_param_attr p2 Attribute.Noalias;
+    add_function_attr f Attribute.Nounwind;
+    add_function_attr f Attribute.Noreturn;
+    remove_function_attr f Attribute.Noreturn;
+
+    insist (Before p1 = param_begin f);
+    insist (Before p2 = param_succ p1);
+    insist (At_end f = param_succ p2);
+    
+    insist (After p2 = param_end f);
+    insist (After p1 = param_pred p2);
+    insist (At_start f = param_pred p1);
+    
+    let lf s x = s ^ "->" ^ value_name x in
+    insist ("->One->Two" = fold_left_params lf "" f);
+    
+    let rf x s = value_name x ^ "<-" ^ s in
+    insist ("One<-Two<-" = fold_right_params rf f "");
+    
+    dispose_module m
+  end
+
+
+(*===-- Basic Blocks ------------------------------------------------------===*)
+
+let test_basic_blocks () =
+  let ty = function_type void_type [| |] in
+  
+  (* RUN: grep {Bb1} < %t.ll
+   *)
+  group "entry";
+  let fn = declare_function "X" ty m in
+  let bb = append_block context "Bb1" fn in
+  insist (bb = entry_block fn);
+  ignore (build_unreachable (builder_at_end context bb));
+  
+  (* RUN: grep -v Bb2 < %t.ll
+   *)
+  group "delete";
+  let fn = declare_function "X2" ty m in
+  let bb = append_block context "Bb2" fn in
+  delete_block bb;
+  
+  group "insert";
+  let fn = declare_function "X3" ty m in
+  let bbb = append_block context "b" fn in
+  let bba = insert_block context "a" bbb in
+  insist ([| bba; bbb |] = basic_blocks fn);
+  ignore (build_unreachable (builder_at_end context bba));
+  ignore (build_unreachable (builder_at_end context bbb));
+  
+  (* RUN: grep Bb3 < %t.ll
+   *)
+  group "name/value";
+  let fn = define_function "X4" ty m in
+  let bb = entry_block fn in
+  ignore (build_unreachable (builder_at_end context bb));
+  let bbv = value_of_block bb in
+  set_value_name "Bb3" bbv;
+  insist ("Bb3" = value_name bbv);
+  
+  group "casts";
+  let fn = define_function "X5" ty m in
+  let bb = entry_block fn in
+  ignore (build_unreachable (builder_at_end context bb));
+  insist (bb = block_of_value (value_of_block bb));
+  insist (value_is_block (value_of_block bb));
+  insist (not (value_is_block (const_null i32_type)));
+  
+  begin group "iteration";
+    let m = create_module context "temp" in
+    let f = declare_function "Temp" (function_type i32_type [| |]) m in
+    
+    insist (At_end f = block_begin f);
+    insist (At_start f = block_end f);
+    
+    let b1 = append_block context "One" f in
+    let b2 = append_block context "Two" f in
+    
+    insist (Before b1 = block_begin f);
+    insist (Before b2 = block_succ b1);
+    insist (At_end f = block_succ b2);
+    
+    insist (After b2 = block_end f);
+    insist (After b1 = block_pred b2);
+    insist (At_start f = block_pred b1);
+    
+    let lf s x = s ^ "->" ^ value_name (value_of_block x) in
+    insist ("->One->Two" = fold_left_blocks lf "" f);
+    
+    let rf x s = value_name (value_of_block x) ^ "<-" ^ s in
+    insist ("One<-Two<-" = fold_right_blocks rf f "");
+    
+    dispose_module m
+  end
+
+
+(*===-- Instructions ------------------------------------------------------===*)
+
+let test_instructions () =
+  begin group "iteration";
+    let m = create_module context "temp" in
+    let fty = function_type void_type [| i32_type; i32_type |] in
+    let f = define_function "f" fty m in
+    let bb = entry_block f in
+    let b = builder_at context (At_end bb) in
+    
+    insist (At_end bb = instr_begin bb);
+    insist (At_start bb = instr_end bb);
+    
+    let i1 = build_add (param f 0) (param f 1) "One" b in
+    let i2 = build_sub (param f 0) (param f 1) "Two" b in
+    
+    insist (Before i1 = instr_begin bb);
+    insist (Before i2 = instr_succ i1);
+    insist (At_end bb = instr_succ i2);
+    
+    insist (After i2 = instr_end bb);
+    insist (After i1 = instr_pred i2);
+    insist (At_start bb = instr_pred i1);
+    
+    let lf s x = s ^ "->" ^ value_name x in
+    insist ("->One->Two" = fold_left_instrs lf "" bb);
+    
+    let rf x s = value_name x ^ "<-" ^ s in
+    insist ("One<-Two<-" = fold_right_instrs rf bb "");
+    
+    dispose_module m
+  end
+
+
+(*===-- Builder -----------------------------------------------------------===*)
+
+let test_builder () =
+  let (++) x f = f x; x in
+  
+  begin group "parent";
+    insist (try
+              ignore (insertion_block (builder context));
+              false
+            with Not_found ->
+              true);
+    
+    let fty = function_type void_type [| i32_type |] in
+    let fn = define_function "BuilderParent" fty m in
+    let bb = entry_block fn in
+    let b = builder_at_end context bb in
+    let p = param fn 0 in
+    let sum = build_add p p "sum" b in
+    ignore (build_ret_void b);
+    
+    insist (fn = block_parent bb);
+    insist (fn = param_parent p);
+    insist (bb = instr_parent sum);
+    insist (bb = insertion_block b)
+  end;
+  
+  group "ret void";
+  begin
+    (* RUN: grep {ret void} < %t.ll
+     *)
+    let fty = function_type void_type [| |] in
+    let fn = declare_function "X6" fty m in
+    let b = builder_at_end context (append_block context "Bb01" fn) in
+    ignore (build_ret_void b)
+  end;
+  
+  (* The rest of the tests will use one big function. *)
+  let fty = function_type i32_type [| i32_type; i32_type |] in
+  let fn = define_function "X7" fty m in
+  let atentry = builder_at_end context (entry_block fn) in
+  let p1 = param fn 0 ++ set_value_name "P1" in
+  let p2 = param fn 1 ++ set_value_name "P2" in
+  let f1 = build_uitofp p1 float_type "F1" atentry in
+  let f2 = build_uitofp p2 float_type "F2" atentry in
+  
+  let bb00 = append_block context "Bb00" fn in
+  ignore (build_unreachable (builder_at_end context bb00));
+  
+  group "ret"; begin
+    (* RUN: grep {ret.*P1} < %t.ll
+     *)
+    let ret = build_ret p1 atentry in
+    position_before ret atentry
+  end;
+  
+  group "br"; begin
+    (* RUN: grep {br.*Bb02} < %t.ll
+     *)
+    let bb02 = append_block context "Bb02" fn in
+    let b = builder_at_end context bb02 in
+    ignore (build_br bb02 b)
+  end;
+  
+  group "cond_br"; begin
+    (* RUN: grep {br.*build_br.*Bb03.*Bb00} < %t.ll
+     *)
+    let bb03 = append_block context "Bb03" fn in
+    let b = builder_at_end context bb03 in
+    let cond = build_trunc p1 i1_type "build_br" b in
+    ignore (build_cond_br cond bb03 bb00 b)
+  end;
+  
+  group "switch"; begin
+    (* RUN: grep {switch.*P1.*SwiBlock3} < %t.ll
+     * RUN: grep {2,.*SwiBlock2} < %t.ll
+     *)
+    let bb1 = append_block context "SwiBlock1" fn in
+    let bb2 = append_block context "SwiBlock2" fn in
+    ignore (build_unreachable (builder_at_end context bb2));
+    let bb3 = append_block context "SwiBlock3" fn in
+    ignore (build_unreachable (builder_at_end context bb3));
+    let si = build_switch p1 bb3 1 (builder_at_end context bb1) in
+    ignore (add_case si (const_int i32_type 2) bb2)
+  end;
+
+  group "indirectbr"; begin
+    (* RUN: grep {indirectbr i8\\* blockaddress(@X7, %IBRBlock2), \\\[label %IBRBlock2, label %IBRBlock3\\\]} < %t.ll
+     *)
+    let bb1 = append_block context "IBRBlock1" fn in
+
+    let bb2 = append_block context "IBRBlock2" fn in
+    ignore (build_unreachable (builder_at_end context bb2));
+
+    let bb3 = append_block context "IBRBlock3" fn in
+    ignore (build_unreachable (builder_at_end context bb3));
+
+    let addr = block_address fn bb2 in
+    let ibr = build_indirect_br addr 2 (builder_at_end context bb1) in
+    ignore (add_destination ibr bb2);
+    ignore (add_destination ibr bb3)
+  end;
+  
+  group "invoke"; begin
+    (* RUN: grep {build_invoke.*invoke.*P1.*P2} < %t.ll
+     * RUN: grep {to.*Bb04.*unwind.*Bb00} < %t.ll
+     *)
+    let bb04 = append_block context "Bb04" fn in
+    let b = builder_at_end context bb04 in
+    ignore (build_invoke fn [| p1; p2 |] bb04 bb00 "build_invoke" b)
+  end;
+  
+  group "unwind"; begin
+    (* RUN: grep {unwind} < %t.ll
+     *)
+    let bb05 = append_block context "Bb05" fn in
+    let b = builder_at_end context bb05 in
+    ignore (build_unwind b)
+  end;
+  
+  group "unreachable"; begin
+    (* RUN: grep {unreachable} < %t.ll
+     *)
+    let bb06 = append_block context "Bb06" fn in
+    let b = builder_at_end context bb06 in
+    ignore (build_unreachable b)
+  end;
+  
+  group "arithmetic"; begin
+    let bb07 = append_block context "Bb07" fn in
+    let b = builder_at_end context bb07 in
+    
+    (* RUN: grep {%build_add = add i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_nsw_add = add nsw i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_nuw_add = add nuw i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_fadd = fadd float %F1, %F2} < %t.ll
+     * RUN: grep {%build_sub = sub i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_nsw_sub = sub nsw i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_nuw_sub = sub nuw i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_fsub = fsub float %F1, %F2} < %t.ll
+     * RUN: grep {%build_mul = mul i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_nsw_mul = mul nsw i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_nuw_mul = mul nuw i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_fmul = fmul float %F1, %F2} < %t.ll
+     * RUN: grep {%build_udiv = udiv i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_sdiv = sdiv i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_exact_sdiv = sdiv exact i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_fdiv = fdiv float %F1, %F2} < %t.ll
+     * RUN: grep {%build_urem = urem i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_srem = srem i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_frem = frem float %F1, %F2} < %t.ll
+     * RUN: grep {%build_shl = shl i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_lshl = lshr i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_ashl = ashr i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_and = and i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_or = or i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_xor = xor i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_neg = sub i32 0, %P1} < %t.ll
+     * RUN: grep {%build_nsw_neg = sub nsw i32 0, %P1} < %t.ll
+     * RUN: grep {%build_nuw_neg = sub nuw i32 0, %P1} < %t.ll
+     * RUN: grep {%build_fneg = fsub float .*0.*, %F1} < %t.ll
+     * RUN: grep {%build_not = xor i32 %P1, -1} < %t.ll
+     *)
+    ignore (build_add p1 p2 "build_add" b);
+    ignore (build_nsw_add p1 p2 "build_nsw_add" b);
+    ignore (build_nuw_add p1 p2 "build_nuw_add" b);
+    ignore (build_fadd f1 f2 "build_fadd" b);
+    ignore (build_sub p1 p2 "build_sub" b);
+    ignore (build_nsw_sub p1 p2 "build_nsw_sub" b);
+    ignore (build_nuw_sub p1 p2 "build_nuw_sub" b);
+    ignore (build_fsub f1 f2 "build_fsub" b);
+    ignore (build_mul p1 p2 "build_mul" b);
+    ignore (build_nsw_mul p1 p2 "build_nsw_mul" b);
+    ignore (build_nuw_mul p1 p2 "build_nuw_mul" b);
+    ignore (build_fmul f1 f2 "build_fmul" b);
+    ignore (build_udiv p1 p2 "build_udiv" b);
+    ignore (build_sdiv p1 p2 "build_sdiv" b);
+    ignore (build_exact_sdiv p1 p2 "build_exact_sdiv" b);
+    ignore (build_fdiv f1 f2 "build_fdiv" b);
+    ignore (build_urem p1 p2 "build_urem" b);
+    ignore (build_srem p1 p2 "build_srem" b);
+    ignore (build_frem f1 f2 "build_frem" b);
+    ignore (build_shl p1 p2 "build_shl" b);
+    ignore (build_lshr p1 p2 "build_lshl" b);
+    ignore (build_ashr p1 p2 "build_ashl" b);
+    ignore (build_and p1 p2 "build_and" b);
+    ignore (build_or p1 p2 "build_or" b);
+    ignore (build_xor p1 p2 "build_xor" b);
+    ignore (build_neg p1 "build_neg" b);
+    ignore (build_nsw_neg p1 "build_nsw_neg" b);
+    ignore (build_nuw_neg p1 "build_nuw_neg" b);
+    ignore (build_fneg f1 "build_fneg" b);
+    ignore (build_not p1 "build_not" b);
+    ignore (build_unreachable b)
+  end;
+  
+  group "memory"; begin
+    let bb08 = append_block context "Bb08" fn in
+    let b = builder_at_end context bb08 in
+
+    (* RUN: grep {%build_alloca = alloca i32} < %t.ll
+     * RUN: grep {%build_array_alloca = alloca i32, i32 %P2} < %t.ll
+     * RUN: grep {%build_load = load i32\\* %build_array_alloca} < %t.ll
+     * RUN: grep {store i32 %P2, i32\\* %build_alloca} < %t.ll
+     * RUN: grep {%build_gep = getelementptr i32\\* %build_array_alloca, i32 %P2} < %t.ll
+     *)
+    let alloca = build_alloca i32_type "build_alloca" b in
+    let array_alloca = build_array_alloca i32_type p2 "build_array_alloca" b in
+    ignore(build_load array_alloca "build_load" b);
+    ignore(build_store p2 alloca b);
+    ignore(build_gep array_alloca [| p2 |] "build_gep" b);
+    ignore(build_unreachable b)
+  end;
+  
+  group "casts"; begin
+    let void_ptr = pointer_type i8_type in
+    
+    (* RUN: grep {%build_trunc = trunc i32 %P1 to i8} < %t.ll
+     * RUN: grep {%build_zext = zext i8 %build_trunc to i32} < %t.ll
+     * RUN: grep {%build_sext = sext i32 %build_zext to i64} < %t.ll
+     * RUN: grep {%build_uitofp = uitofp i64 %build_sext to float} < %t.ll
+     * RUN: grep {%build_sitofp = sitofp i32 %build_zext to double} < %t.ll
+     * RUN: grep {%build_fptoui = fptoui float %build_uitofp to i32} < %t.ll
+     * RUN: grep {%build_fptosi = fptosi double %build_sitofp to i64} < %t.ll
+     * RUN: grep {%build_fptrunc = fptrunc double %build_sitofp to float} < %t.ll
+     * RUN: grep {%build_fpext = fpext float %build_fptrunc to double} < %t.ll
+     * RUN: grep {%build_inttoptr = inttoptr i32 %P1 to i8\\*} < %t.ll
+     * RUN: grep {%build_ptrtoint = ptrtoint i8\\* %build_inttoptr to i64} < %t.ll
+     * RUN: grep {%build_bitcast = bitcast i64 %build_ptrtoint to double} < %t.ll
+     *)
+    let inst28 = build_trunc p1 i8_type "build_trunc" atentry in
+    let inst29 = build_zext inst28 i32_type "build_zext" atentry in
+    let inst30 = build_sext inst29 i64_type "build_sext" atentry in
+    let inst31 = build_uitofp inst30 float_type "build_uitofp" atentry in
+    let inst32 = build_sitofp inst29 double_type "build_sitofp" atentry in
+    ignore(build_fptoui inst31 i32_type "build_fptoui" atentry);
+    ignore(build_fptosi inst32 i64_type "build_fptosi" atentry);
+    let inst35 = build_fptrunc inst32 float_type "build_fptrunc" atentry in
+    ignore(build_fpext inst35 double_type "build_fpext" atentry);
+    let inst37 = build_inttoptr p1 void_ptr "build_inttoptr" atentry in
+    let inst38 = build_ptrtoint inst37 i64_type "build_ptrtoint" atentry in
+    ignore(build_bitcast inst38 double_type "build_bitcast" atentry)
+  end;
+  
+  group "comparisons"; begin
+    (* RUN: grep {%build_icmp_ne = icmp ne i32 %P1, %P2} < %t.ll
+     * RUN: grep {%build_icmp_sle = icmp sle i32 %P2, %P1} < %t.ll
+     * RUN: grep {%build_fcmp_false = fcmp false float %F1, %F2} < %t.ll
+     * RUN: grep {%build_fcmp_true = fcmp true float %F2, %F1} < %t.ll
+     *)
+    ignore (build_icmp Icmp.Ne    p1 p2 "build_icmp_ne" atentry);
+    ignore (build_icmp Icmp.Sle   p2 p1 "build_icmp_sle" atentry);
+    ignore (build_fcmp Fcmp.False f1 f2 "build_fcmp_false" atentry);
+    ignore (build_fcmp Fcmp.True  f2 f1 "build_fcmp_true" atentry)
+  end;
+  
+  group "miscellaneous"; begin
+    (* RUN: grep {%build_call = tail call cc63 i32 @.*(i32 signext %P2, i32 %P1)} < %t.ll
+     * RUN: grep {%build_select = select i1 %build_icmp, i32 %P1, i32 %P2} < %t.ll
+     * RUN: grep {%build_va_arg = va_arg i8\\*\\* null, i32} < %t.ll
+     * RUN: grep {%build_extractelement = extractelement <4 x i32> %Vec1, i32 %P2} < %t.ll
+     * RUN: grep {%build_insertelement = insertelement <4 x i32> %Vec1, i32 %P1, i32 %P2} < %t.ll
+     * RUN: grep {%build_shufflevector = shufflevector <4 x i32> %Vec1, <4 x i32> %Vec2, <4 x i32> <i32 1, i32 1, i32 0, i32 0>} < %t.ll
+     *)
+    let ci = build_call fn [| p2; p1 |] "build_call" atentry in
+    insist (CallConv.c = instruction_call_conv ci);
+    set_instruction_call_conv 63 ci;
+    insist (63 = instruction_call_conv ci);
+    insist (not (is_tail_call ci));
+    set_tail_call true ci;
+    insist (is_tail_call ci);
+    add_instruction_param_attr ci 1 Attribute.Sext;
+    add_instruction_param_attr ci 2 Attribute.Noalias;
+    remove_instruction_param_attr ci 2 Attribute.Noalias;
+    
+    let inst46 = build_icmp Icmp.Eq p1 p2 "build_icmp" atentry in
+    ignore (build_select inst46 p1 p2 "build_select" atentry);
+    ignore (build_va_arg
+      (const_null (pointer_type (pointer_type i8_type)))
+      i32_type "build_va_arg" atentry);
+    
+    (* Set up some vector vregs. *)
+    let one  = const_int i32_type 1 in
+    let zero = const_int i32_type 0 in
+    let t1 = const_vector [| one; zero; one; zero |] in
+    let t2 = const_vector [| zero; one; zero; one |] in
+    let t3 = const_vector [| one; one; zero; zero |] in
+    let vec1 = build_insertelement t1 p1 p2 "Vec1" atentry in
+    let vec2 = build_insertelement t2 p1 p2 "Vec2" atentry in
+    
+    ignore (build_extractelement vec1 p2 "build_extractelement" atentry);
+    ignore (build_insertelement vec1 p1 p2 "build_insertelement" atentry);
+    ignore (build_shufflevector vec1 vec2 t3 "build_shufflevector" atentry);
+  end;
+
+  group "metadata"; begin
+    (* RUN: grep {%metadata = add i32 %P1, %P2, !test !0} < %t.ll
+     * RUN: grep {!0 = metadata !\{i32 1, metadata !"metadata test"\}} < %t.ll
+     *)
+    let i = build_add p1 p2 "metadata" atentry in
+    insist ((has_metadata i) = false);
+
+    let m1 = const_int i32_type 1 in
+    let m2 = mdstring context "metadata test" in
+    let md = mdnode context [| m1; m2 |] in
+
+    let kind = mdkind_id context "test" in
+    set_metadata i kind md;
+
+    insist ((has_metadata i) = true);
+    insist ((metadata i kind) = Some md);
+
+    clear_metadata i kind;
+
+    insist ((has_metadata i) = false);
+    insist ((metadata i kind) = None);
+
+    set_metadata i kind md
+  end;
+
+  group "dbg"; begin
+    (* RUN: grep {%dbg = add i32 %P1, %P2, !dbg !1} < %t.ll
+     * RUN: grep {!1 = metadata !\{i32 2, i32 3, metadata !2, metadata !2\}} < %t.ll
+     *)
+    insist ((current_debug_location atentry) = None);
+
+    let m_line = const_int i32_type 2 in
+    let m_col = const_int i32_type 3 in
+    let m_scope = mdnode context [| |] in
+    let m_inlined = mdnode context [| |] in
+    let md = mdnode context [| m_line; m_col; m_scope; m_inlined |] in
+    set_current_debug_location atentry md;
+
+    insist ((current_debug_location atentry) = Some md);
+
+    let i = build_add p1 p2 "dbg" atentry in
+    insist ((has_metadata i) = true);
+
+    clear_current_debug_location atentry
+  end;
+  
+  group "phi"; begin
+    (* RUN: grep {PhiNode.*P1.*PhiBlock1.*P2.*PhiBlock2} < %t.ll
+     *)
+    let b1 = append_block context "PhiBlock1" fn in
+    let b2 = append_block context "PhiBlock2" fn in
+    
+    let jb = append_block context "PhiJoinBlock" fn in
+    ignore (build_br jb (builder_at_end context b1));
+    ignore (build_br jb (builder_at_end context b2));
+    let at_jb = builder_at_end context jb in
+    
+    let phi = build_phi [(p1, b1)] "PhiNode" at_jb in
+    insist ([(p1, b1)] = incoming phi);
+    
+    add_incoming (p2, b2) phi;
+    insist ([(p1, b1); (p2, b2)] = incoming phi);
+    
+    ignore (build_unreachable at_jb);
+  end
+
+
+(*===-- Pass Managers -----------------------------------------------------===*)
+
+let test_pass_manager () =
+  let (++) x f = ignore (f x); x in
+
+  begin group "module pass manager";
+    ignore (PassManager.create ()
+             ++ PassManager.run_module m
+             ++ PassManager.dispose)
+  end;
+  
+  begin group "function pass manager";
+    let fty = function_type void_type [| |] in
+    let fn = define_function "FunctionPassManager" fty m in
+    ignore (build_ret_void (builder_at_end context (entry_block fn)));
+    
+    ignore (PassManager.create_function m
+             ++ PassManager.initialize
+             ++ PassManager.run_function fn
+             ++ PassManager.finalize
+             ++ PassManager.dispose)
+  end
+
+
+(*===-- Writer ------------------------------------------------------------===*)
+
+let test_writer () =
+  group "valid";
+  insist (match Llvm_analysis.verify_module m with
+          | None -> true
+          | Some msg -> prerr_string msg; false);
+
+  group "writer";
+  insist (write_bitcode_file m filename);
+  
+  dispose_module m
+
+
+(*===-- Driver ------------------------------------------------------------===*)
+
+let _ =
+  suite "target"           test_target;
+  suite "types"            test_types;
+  suite "constants"        test_constants;
+  suite "global values"    test_global_values;
+  suite "global variables" test_global_variables;
+  suite "uses"             test_uses;
+  suite "users"            test_users;
+  suite "aliases"          test_aliases;
+  suite "functions"        test_functions;
+  suite "params"           test_params;
+  suite "basic blocks"     test_basic_blocks;
+  suite "instructions"     test_instructions;
+  suite "builder"          test_builder;
+  suite "pass manager"     test_pass_manager;
+  suite "writer"           test_writer; (* Keep this last; it disposes m. *)
+  exit !exit_status
diff --git a/final/test/Bitcode/2006-12-11-Cast-ConstExpr.ll b/final/test/Bitcode/2006-12-11-Cast-ConstExpr.ll
new file mode 100644
index 00000000000..6df8711fe70
--- /dev/null
+++ b/final/test/Bitcode/2006-12-11-Cast-ConstExpr.ll
@@ -0,0 +1,10 @@
+; This test ensures that we get a bitcast constant expression in and out,
+; not a sitofp constant expression. 
+; RUN: llvm-as < %s | llvm-dis | \
+; RUN:   grep {bitcast (}
+
+@G = external global i32
+
+define float @tryit(i32 %A) {
+   ret float bitcast( i32 ptrtoint (i32* @G to i32) to float)
+}
diff --git a/final/test/Bitcode/2009-06-11-FirstClassAggregateConstant.ll b/final/test/Bitcode/2009-06-11-FirstClassAggregateConstant.ll
new file mode 100644
index 00000000000..415f88e1637
--- /dev/null
+++ b/final/test/Bitcode/2009-06-11-FirstClassAggregateConstant.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llvm-dis -disable-output
+; PR4373
+
+@foo = weak global { i32 } zeroinitializer              
+@bar = weak global i32 0                
+
+define void @test() {
+entry:
+        store { i32 } zeroinitializer, { i32 }* @foo
+        store i32 1, i32* @bar
+        ret void
+}
diff --git a/final/test/Bitcode/AutoUpgradeGlobals.ll b/final/test/Bitcode/AutoUpgradeGlobals.ll
new file mode 100644
index 00000000000..8a8767337dc
--- /dev/null
+++ b/final/test/Bitcode/AutoUpgradeGlobals.ll
@@ -0,0 +1,3 @@
+; This isn't really an assembly file. It just runs test on bitcode to ensure
+; it is auto-upgraded.
+; RUN: llvm-dis < %s.bc | not grep {i32 @\\.llvm\\.eh}
diff --git a/final/test/Bitcode/AutoUpgradeGlobals.ll.bc b/final/test/Bitcode/AutoUpgradeGlobals.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..1abe9688e291ccdedf5e7516cef2b111d450ffe4
GIT binary patch
literal 312
zcmZ>AK5$Qwhk?PHfq{WhfPp~>NV7L8Pjo!N;>jjpWI2hkg-4Z@LsE{VJE?)kmC0!m
zw^NFMk%$F{geRAgi;4%=1V!Z|K@B{TibX2yCr$_?tyr+a;n)X8pgEor3=GUbn!Dvc
z5Xd_SFflL$Dljl`0>zA!PH-rsHnlLQIeV~jsyQEHbuv=p;1NpfV^EyNpbS*Oz@RNv
zmcU+}!ET<>UbLXSK%u<=NM5K^aAcBHWK{Xk$mhw%zyLI}K}Udr6=bH|3=T;_p9wvj
zYVOBaoDXpT#S#@57=Wr6gjpCFRe)vyl{1(bgd70TAW#W(BZzIx(I9i=p$tQz0?_|J
iK3l`j(?N!Ig+G`KINSMky5kwST_YF*1A2jSAOHZ^5JPbQ

literal 0
HcmV?d00001

diff --git a/final/test/Bitcode/AutoUpgradeIntrinsics.ll b/final/test/Bitcode/AutoUpgradeIntrinsics.ll
new file mode 100644
index 00000000000..5f9bcd56f19
--- /dev/null
+++ b/final/test/Bitcode/AutoUpgradeIntrinsics.ll
@@ -0,0 +1,10 @@
+; This isn't really an assembly file. It just runs test on bitcode to ensure
+; it is auto-upgraded.
+; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.ct}
+; RUN: llvm-dis < %s.bc | \
+; RUN:   not grep {llvm\\.part\\.set\\.i\[0-9\]*\\.i\[0-9\]*\\.i\[0-9\]*}
+; RUN: llvm-dis < %s.bc | \
+; RUN:   not grep {llvm\\.part\\.select\\.i\[0-9\]*\\.i\[0-9\]*}
+; RUN: llvm-dis < %s.bc | \
+; RUN:   not grep {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*}
+
diff --git a/final/test/Bitcode/AutoUpgradeIntrinsics.ll.bc b/final/test/Bitcode/AutoUpgradeIntrinsics.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..9de756ba685913aba377a74fbd217c56488646f0
GIT binary patch
literal 800
zcmY*WUr19?82|3wQ7?10d-g{hTHZTzXf2xKA_~Lq=AGP8!io^X7o977&@ic<x62Y+
zrn!j^^&+HD4?a{D(St3?5Gp8@9<qWE3VI5RP{isx4oMH+`F`ho-|zQ3=Ws&Jxi7K>
zU;zL)5dZ^<l2v2DMG`3#{f!weWvMO@T!+YwSi%wJLz&{xl<0Rf2xSqoUpuUuZ^>2K
zKw{V-51P~FxTO-{v;^QpAGcc{^9K#cx~B}1b+7Sqt(|<V;L6@mb^N>Vw9&XYlDiw3
zkir@5{!(>-j}l)RbgI_!Fpc_XTIAt8NDdyrOnC?k95jnWz*a)md4iY|H4eMbDa0@X
z_{g~g!!pCCGPDE_Fn~d+guM*qMS1xm$F@1~CFJF9U<Ds(WMvRcZ0EpfCqXY$ns6%V
z%Z47wMr+k_=%xS-n8W}kNp(h|E2RCJi7Q0j2eGF`)*gq2P@m1{Tun)z+GMRB8#uZo
z^hDh2OUyAc*;_MSg7xl$pepZ8H?M&DRky|+L${BpM=Jy<z#jU36MO3^v{2GUg$oq_
zjg2k|z=pW%$zH@f5un=z{fbqY;9zK7^(620@Kf_itpE2brQo5Yo(e5{{n%aX<V8Dy
znzxv|BSGoMb(rp#_Gy2MP)JEB6+FAzOGv0g&a25&DP=nu=SH3J*JtYfn6&J1njKwv
zmJgR=ms|guv}G)oRN(r#&lP;)b*`OIl}%gS=3?L0PIRb`uvol2zB%-{Jdt0o1)5^(
zR};rhE^JuE);Z0a^5yw=j9DZXWeNgK1FIOGcV6Vo&&HbumIwFq^_eYwrh`}L(l0!5
Z+x9@5j~6dJJUd*em;T(?>9QXO_y_d##V-H=

literal 0
HcmV?d00001

diff --git a/final/test/Bitcode/dg.exp b/final/test/Bitcode/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Bitcode/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Bitcode/extractelement.ll b/final/test/Bitcode/extractelement.ll
new file mode 100644
index 00000000000..d88f811e8e9
--- /dev/null
+++ b/final/test/Bitcode/extractelement.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -constprop | llvm-dis
+; PR3465
+
+define double @test() {
+  %tmp24 = extractelement <2 x double> bitcast (<1 x i128> < i128 85070591730234615870450834276742070272 > to <2 x double>), i32 0
+  ret double %tmp24
+}
+
diff --git a/final/test/Bitcode/flags.ll b/final/test/Bitcode/flags.ll
new file mode 100644
index 00000000000..7b0c5b53889
--- /dev/null
+++ b/final/test/Bitcode/flags.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-as < %s | llvm-dis > %t0
+; RUN: opt -S < %s > %t1
+; RUN: diff %t0 %t1
+; PR6140
+
+; Make sure the flags are serialized/deserialized properly for both
+; forward and backward references.
+
+define void @foo() nounwind {
+entry:
+  br label %first
+
+second:                                           ; preds = %first
+  %u = add nuw i32 %a, 0                          ; <i32> [#uses=0]
+  %s = add nsw i32 %a, 0                          ; <i32> [#uses=0]
+  %us = add nuw nsw i32 %a, 0                     ; <i32> [#uses=0]
+  %z = add i32 %a, 0                              ; <i32> [#uses=0]
+  unreachable
+
+first:                                            ; preds = %entry
+  %a = bitcast i32 0 to i32                       ; <i32> [#uses=8]
+  %uu = add nuw i32 %a, 0                         ; <i32> [#uses=0]
+  %ss = add nsw i32 %a, 0                         ; <i32> [#uses=0]
+  %uuss = add nuw nsw i32 %a, 0                   ; <i32> [#uses=0]
+  %zz = add i32 %a, 0                             ; <i32> [#uses=0]
+  br label %second
+}
diff --git a/final/test/Bitcode/memcpy.ll b/final/test/Bitcode/memcpy.ll
new file mode 100644
index 00000000000..299eb1ed41e
--- /dev/null
+++ b/final/test/Bitcode/memcpy.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as %s -o /dev/null
+
+define void @test(i32* %P, i32* %Q) {
+entry:
+        %tmp.1 = bitcast i32* %P to i8*         ; <i8*> [#uses=3]
+        %tmp.3 = bitcast i32* %Q to i8*         ; <i8*> [#uses=4]
+        tail call void @llvm.memcpy.i32( i8* %tmp.1, i8* %tmp.3, i32 100000, i32 1 )
+        tail call void @llvm.memcpy.i64( i8* %tmp.1, i8* %tmp.3, i64 100000, i32 1 )
+        tail call void @llvm.memset.i32( i8* %tmp.3, i8 14, i32 10000, i32 0 )
+        tail call void @llvm.memmove.i32( i8* %tmp.1, i8* %tmp.3, i32 123124, i32 1 )
+        tail call void @llvm.memmove.i64( i8* %tmp.1, i8* %tmp.3, i64 123124, i32 1 )
+        ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32)
+
+declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
+
+declare void @llvm.memmove.i64(i8*, i8*, i64, i32)
diff --git a/final/test/Bitcode/metadata-2.ll b/final/test/Bitcode/metadata-2.ll
new file mode 100644
index 00000000000..1a59ce6f9df
--- /dev/null
+++ b/final/test/Bitcode/metadata-2.ll
@@ -0,0 +1,87 @@
+; RUN: llvm-as < %s | llvm-dis -o /dev/null
+	type { %object.ModuleInfo.__vtbl*, i8*, %"byte[]", %1, %"ClassInfo[]", i32, void ()*, void ()*, void ()*, i8*, void ()* }		; type %0
+	type { i64, %object.ModuleInfo* }		; type %1
+	type { i32, void ()* }		; type %2
+	%"ClassInfo[]" = type { i64, %object.ClassInfo** }
+	%"Interface[]" = type { i64, %object.Interface* }
+	%"ModuleInfo[]" = type { i64, %object.ModuleInfo** }
+	%ModuleReference = type { %ModuleReference*, %object.ModuleInfo* }
+	%"OffsetTypeInfo[]" = type { i64, %object.OffsetTypeInfo* }
+	%"byte[]" = type { i64, i8* }
+	%object.ClassInfo = type { %object.ClassInfo.__vtbl*, i8*, %"byte[]", %"byte[]", %"void*[]", %"Interface[]", %object.ClassInfo*, i8*, i8*, i32, i8*, %"OffsetTypeInfo[]", i8*, %object.TypeInfo* }
+	%object.ClassInfo.__vtbl = type { %object.ClassInfo*, %"byte[]" (%object.Object*)*, i64 (%object.Object*)*, i32 (%object.Object*, %object.Object*)*, i32 (%object.Object*, %object.Object*)*, %object.Object* (%object.ClassInfo*)* }
+	%object.Interface = type { %object.ClassInfo*, %"void*[]", i64 }
+	%object.ModuleInfo = type { %object.ModuleInfo.__vtbl*, i8*, %"byte[]", %"ModuleInfo[]", %"ClassInfo[]", i32, void ()*, void ()*, void ()*, i8*, void ()* }
+	%object.ModuleInfo.__vtbl = type { %object.ClassInfo*, %"byte[]" (%object.Object*)*, i64 (%object.Object*)*, i32 (%object.Object*, %object.Object*)*, i32 (%object.Object*, %object.Object*)* }
+	%object.Object = type { %object.ModuleInfo.__vtbl*, i8* }
+	%object.OffsetTypeInfo = type { i64, %object.TypeInfo* }
+	%object.TypeInfo = type { %object.TypeInfo.__vtbl*, i8* }
+	%object.TypeInfo.__vtbl = type { %object.ClassInfo*, %"byte[]" (%object.Object*)*, i64 (%object.Object*)*, i32 (%object.Object*, %object.Object*)*, i32 (%object.Object*, %object.Object*)*, i64 (%object.TypeInfo*, i8*)*, i32 (%object.TypeInfo*, i8*, i8*)*, i32 (%object.TypeInfo*, i8*, i8*)*, i64 (%object.TypeInfo*)*, void (%object.TypeInfo*, i8*, i8*)*, %object.TypeInfo* (%object.TypeInfo*)*, %"byte[]" (%object.TypeInfo*)*, i32 (%object.TypeInfo*)*, %"OffsetTypeInfo[]" (%object.TypeInfo*)* }
+	%"void*[]" = type { i64, i8** }
+@_D10ModuleInfo6__vtblZ = external constant %object.ModuleInfo.__vtbl		; <%object.ModuleInfo.__vtbl*> [#uses=1]
+@.str = internal constant [20 x i8] c"tango.core.BitManip\00"		; <[20 x i8]*> [#uses=1]
+@_D5tango4core8BitManip8__ModuleZ = global %0 { %object.ModuleInfo.__vtbl* @_D10ModuleInfo6__vtblZ, i8* null, %"byte[]" { i64 19, i8* getelementptr ([20 x i8]* @.str, i32 0, i32 0) }, %1 zeroinitializer, %"ClassInfo[]" zeroinitializer, i32 4, void ()* null, void ()* null, void ()* null, i8* null, void ()* null }		; <%0*> [#uses=1]
+@_D5tango4core8BitManip11__moduleRefZ = internal global %ModuleReference { %ModuleReference* null, %object.ModuleInfo* bitcast (%0* @_D5tango4core8BitManip8__ModuleZ to %object.ModuleInfo*) }		; <%ModuleReference*> [#uses=2]
+@_Dmodule_ref = external global %ModuleReference*		; <%ModuleReference**> [#uses=2]
+@llvm.global_ctors = appending constant [1 x %2] [%2 { i32 65535, void ()* @_D5tango4core8BitManip16__moduleinfoCtorZ }]		; <[1 x %2]*> [#uses=0]
+
+define fastcc i32 @_D5tango4core8BitManip6popcntFkZi(i32 %x_arg) nounwind readnone {
+entry:
+	%tmp1 = lshr i32 %x_arg, 1		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 1431655765		; <i32> [#uses=1]
+	%tmp4 = sub i32 %x_arg, %tmp2		; <i32> [#uses=2]
+	%tmp6 = lshr i32 %tmp4, 2		; <i32> [#uses=1]
+	%tmp7 = and i32 %tmp6, 858993459		; <i32> [#uses=1]
+	%tmp9 = and i32 %tmp4, 858993459		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp7, %tmp9		; <i32> [#uses=2]
+	%tmp12 = lshr i32 %tmp10, 4		; <i32> [#uses=1]
+	%tmp14 = add i32 %tmp12, %tmp10		; <i32> [#uses=1]
+	%tmp16 = and i32 %tmp14, 252645135		; <i32> [#uses=2]
+	%tmp18 = lshr i32 %tmp16, 8		; <i32> [#uses=1]
+	%tmp20 = add i32 %tmp18, %tmp16		; <i32> [#uses=1]
+	%tmp22 = and i32 %tmp20, 16711935		; <i32> [#uses=2]
+	%tmp24 = lshr i32 %tmp22, 16		; <i32> [#uses=1]
+	%tmp26 = add i32 %tmp24, %tmp22		; <i32> [#uses=1]
+	%tmp28 = and i32 %tmp26, 65535		; <i32> [#uses=1]
+	ret i32 %tmp28
+}
+
+define fastcc i32 @_D5tango4core8BitManip7bitswapFkZk(i32 %x_arg) nounwind readnone {
+entry:
+	%tmp1 = lshr i32 %x_arg, 1		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 1431655765		; <i32> [#uses=1]
+	%tmp4 = shl i32 %x_arg, 1		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp4, -1431655766		; <i32> [#uses=1]
+	%tmp6 = or i32 %tmp2, %tmp5		; <i32> [#uses=2]
+	%tmp8 = lshr i32 %tmp6, 2		; <i32> [#uses=1]
+	%tmp9 = and i32 %tmp8, 858993459		; <i32> [#uses=1]
+	%tmp11 = shl i32 %tmp6, 2		; <i32> [#uses=1]
+	%tmp12 = and i32 %tmp11, -858993460		; <i32> [#uses=1]
+	%tmp13 = or i32 %tmp9, %tmp12		; <i32> [#uses=2]
+	%tmp15 = lshr i32 %tmp13, 4		; <i32> [#uses=1]
+	%tmp16 = and i32 %tmp15, 252645135		; <i32> [#uses=1]
+	%tmp18 = shl i32 %tmp13, 4		; <i32> [#uses=1]
+	%tmp19 = and i32 %tmp18, -252645136		; <i32> [#uses=1]
+	%tmp20 = or i32 %tmp16, %tmp19		; <i32> [#uses=2]
+	%tmp22 = lshr i32 %tmp20, 8		; <i32> [#uses=1]
+	%tmp23 = and i32 %tmp22, 16711935		; <i32> [#uses=1]
+	%tmp25 = shl i32 %tmp20, 8		; <i32> [#uses=1]
+	%tmp26 = and i32 %tmp25, -16711936		; <i32> [#uses=1]
+	%tmp27 = or i32 %tmp23, %tmp26		; <i32> [#uses=2]
+	%tmp29 = lshr i32 %tmp27, 16		; <i32> [#uses=1]
+	%tmp31 = shl i32 %tmp27, 16		; <i32> [#uses=1]
+	%tmp32 = or i32 %tmp29, %tmp31		; <i32> [#uses=1]
+	ret i32 %tmp32
+}
+
+define internal void @_D5tango4core8BitManip16__moduleinfoCtorZ() nounwind {
+moduleinfoCtorEntry:
+	%current = load %ModuleReference** @_Dmodule_ref		; <%ModuleReference*> [#uses=1]
+	store %ModuleReference* %current, %ModuleReference** getelementptr (%ModuleReference* @_D5tango4core8BitManip11__moduleRefZ, i32 0, i32 0)
+	store %ModuleReference* @_D5tango4core8BitManip11__moduleRefZ, %ModuleReference** @_Dmodule_ref
+	ret void
+}
+!llvm.ldc.classinfo._D6Object7__ClassZ = !{!0}
+!llvm.ldc.classinfo._D10ModuleInfo7__ClassZ = !{!1}
+!0 = metadata !{%object.Object undef, i1 false, i1 false}
+!1 = metadata !{%object.ModuleInfo undef, i1 false, i1 false}
diff --git a/final/test/Bitcode/metadata.ll b/final/test/Bitcode/metadata.ll
new file mode 100644
index 00000000000..19db3eac216
--- /dev/null
+++ b/final/test/Bitcode/metadata.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llvm-dis -o /dev/null
+
+!llvm.foo = !{!0}
+!0 = metadata !{i32 42}
+@my.str = internal constant [4 x i8] c"foo\00"
+
diff --git a/final/test/Bitcode/neon-intrinsics.ll b/final/test/Bitcode/neon-intrinsics.ll
new file mode 100644
index 00000000000..272cd424e2a
--- /dev/null
+++ b/final/test/Bitcode/neon-intrinsics.ll
@@ -0,0 +1,213 @@
+; RUN: llvm-dis < %s.bc | FileCheck %s
+
+; vmovls should be auto-upgraded to sext
+
+; CHECK: vmovls8
+; CHECK-NOT: arm.neon.vmovls.v8i16
+; CHECK: sext <8 x i8>
+
+; CHECK: vmovls16
+; CHECK-NOT: arm.neon.vmovls.v4i32
+; CHECK: sext <4 x i16>
+
+; CHECK: vmovls32
+; CHECK-NOT: arm.neon.vmovls.v2i64
+; CHECK: sext <2 x i32>
+
+; vmovlu should be auto-upgraded to zext
+
+; CHECK: vmovlu8
+; CHECK-NOT: arm.neon.vmovlu.v8i16
+; CHECK: zext <8 x i8>
+
+; CHECK: vmovlu16
+; CHECK-NOT: arm.neon.vmovlu.v4i32
+; CHECK: zext <4 x i16>
+
+; CHECK: vmovlu32
+; CHECK-NOT: arm.neon.vmovlu.v2i64
+; CHECK: zext <2 x i32>
+
+; vaddl/vaddw should be auto-upgraded to add with sext/zext
+
+; CHECK: vaddls16
+; CHECK-NOT: arm.neon.vaddls.v4i32
+; CHECK: sext <4 x i16>
+; CHECK-NEXT: sext <4 x i16>
+; CHECK-NEXT: add <4 x i32>
+
+; CHECK: vaddlu32
+; CHECK-NOT: arm.neon.vaddlu.v2i64
+; CHECK: zext <2 x i32>
+; CHECK-NEXT: zext <2 x i32>
+; CHECK-NEXT: add <2 x i64>
+
+; CHECK: vaddws8
+; CHECK-NOT: arm.neon.vaddws.v8i16
+; CHECK: sext <8 x i8>
+; CHECK-NEXT: add <8 x i16>
+
+; CHECK: vaddwu16
+; CHECK-NOT: arm.neon.vaddwu.v4i32
+; CHECK: zext <4 x i16>
+; CHECK-NEXT: add <4 x i32>
+
+; vsubl/vsubw should be auto-upgraded to subtract with sext/zext
+
+; CHECK: vsubls16
+; CHECK-NOT: arm.neon.vsubls.v4i32
+; CHECK: sext <4 x i16>
+; CHECK-NEXT: sext <4 x i16>
+; CHECK-NEXT: sub <4 x i32>
+
+; CHECK: vsublu32
+; CHECK-NOT: arm.neon.vsublu.v2i64
+; CHECK: zext <2 x i32>
+; CHECK-NEXT: zext <2 x i32>
+; CHECK-NEXT: sub <2 x i64>
+
+; CHECK: vsubws8
+; CHECK-NOT: arm.neon.vsubws.v8i16
+; CHECK: sext <8 x i8>
+; CHECK-NEXT: sub <8 x i16>
+
+; CHECK: vsubwu16
+; CHECK-NOT: arm.neon.vsubwu.v4i32
+; CHECK: zext <4 x i16>
+; CHECK-NEXT: sub <4 x i32>
+
+; vmull should be auto-upgraded to multiply with sext/zext
+; (but vmullp should remain an intrinsic)
+
+; CHECK: vmulls8
+; CHECK-NOT: arm.neon.vmulls.v8i16
+; CHECK: sext <8 x i8>
+; CHECK-NEXT: sext <8 x i8>
+; CHECK-NEXT: mul <8 x i16>
+
+; CHECK: vmullu16
+; CHECK-NOT: arm.neon.vmullu.v4i32
+; CHECK: zext <4 x i16>
+; CHECK-NEXT: zext <4 x i16>
+; CHECK-NEXT: mul <4 x i32>
+
+; CHECK: vmullp8
+; CHECK: arm.neon.vmullp.v8i16
+
+; vmlal should be auto-upgraded to multiply/add with sext/zext
+
+; CHECK: vmlals32
+; CHECK-NOT: arm.neon.vmlals.v2i64
+; CHECK: sext <2 x i32>
+; CHECK-NEXT: sext <2 x i32>
+; CHECK-NEXT: mul <2 x i64>
+; CHECK-NEXT: add <2 x i64>
+
+; CHECK: vmlalu8
+; CHECK-NOT: arm.neon.vmlalu.v8i16
+; CHECK: zext <8 x i8>
+; CHECK-NEXT: zext <8 x i8>
+; CHECK-NEXT: mul <8 x i16>
+; CHECK-NEXT: add <8 x i16>
+
+; vmlsl should be auto-upgraded to multiply/sub with sext/zext
+
+; CHECK: vmlsls16
+; CHECK-NOT: arm.neon.vmlsls.v4i32
+; CHECK: sext <4 x i16>
+; CHECK-NEXT: sext <4 x i16>
+; CHECK-NEXT: mul <4 x i32>
+; CHECK-NEXT: sub <4 x i32>
+
+; CHECK: vmlslu32
+; CHECK-NOT: arm.neon.vmlslu.v2i64
+; CHECK: zext <2 x i32>
+; CHECK-NEXT: zext <2 x i32>
+; CHECK-NEXT: mul <2 x i64>
+; CHECK-NEXT: sub <2 x i64>
+
+; vaba should be auto-upgraded to vabd + add
+
+; CHECK: vabas32
+; CHECK-NOT: arm.neon.vabas.v2i32
+; CHECK: arm.neon.vabds.v2i32
+; CHECK-NEXT: add <2 x i32>
+
+; CHECK: vabaQu8
+; CHECK-NOT: arm.neon.vabau.v16i8
+; CHECK: arm.neon.vabdu.v16i8
+; CHECK-NEXT: add <16 x i8>
+
+; vabal should be auto-upgraded to vabd with zext + add
+
+; CHECK: vabals16
+; CHECK-NOT: arm.neon.vabals.v4i32
+; CHECK: arm.neon.vabds.v4i16
+; CHECK-NEXT: zext <4 x i16>
+; CHECK-NEXT: add <4 x i32>
+
+; CHECK: vabalu32
+; CHECK-NOT: arm.neon.vabalu.v2i64
+; CHECK: arm.neon.vabdu.v2i32
+; CHECK-NEXT: zext <2 x i32>
+; CHECK-NEXT: add <2 x i64>
+
+; vabdl should be auto-upgraded to vabd with zext
+
+; CHECK: vabdls8
+; CHECK-NOT: arm.neon.vabdls.v8i16
+; CHECK: arm.neon.vabds.v8i8
+; CHECK-NEXT: zext <8 x i8>
+
+; CHECK: vabdlu16
+; CHECK-NOT: arm.neon.vabdlu.v4i32
+; CHECK: arm.neon.vabdu.v4i16
+; CHECK-NEXT: zext <4 x i16>
+
+; vmovn should be auto-upgraded to trunc
+
+; CHECK: vmovni16
+; CHECK-NOT: arm.neon.vmovn.v8i8
+; CHECK: trunc <8 x i16>
+
+; CHECK: vmovni32
+; CHECK-NOT: arm.neon.vmovn.v4i16
+; CHECK: trunc <4 x i32>
+
+; CHECK: vmovni64
+; CHECK-NOT: arm.neon.vmovn.v2i32
+; CHECK: trunc <2 x i64>
+
+; vld* and vst* intrinsic calls need an alignment argument (defaulted to 1)
+
+; CHECK: vld1i8
+; CHECK: i32 1
+; CHECK: vld2Qi16
+; CHECK: i32 1
+; CHECK: vld3i32
+; CHECK: i32 1
+; CHECK: vld4Qf
+; CHECK: i32 1
+
+; CHECK: vst1i8
+; CHECK: i32 1
+; CHECK: vst2Qi16
+; CHECK: i32 1
+; CHECK: vst3i32
+; CHECK: i32 1
+; CHECK: vst4Qf
+; CHECK: i32 1
+
+; CHECK: vld2laneQi16
+; CHECK: i32 1
+; CHECK: vld3lanei32
+; CHECK: i32 1
+; CHECK: vld4laneQf
+; CHECK: i32 1
+
+; CHECK: vst2laneQi16
+; CHECK: i32 1
+; CHECK: vst3lanei32
+; CHECK: i32 1
+; CHECK: vst4laneQf
+; CHECK: i32 1
diff --git a/final/test/Bitcode/neon-intrinsics.ll.bc b/final/test/Bitcode/neon-intrinsics.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..cabc3c9341362ad47ede7cc08c5907de591c3aac
GIT binary patch
literal 5764
zcmb_f4Nw&48Gd(f4^CM0fIB(DHCf<r&;Z^M)QdIAa>pHe{>0-vgA<}022#z40-`Z#
zLKZlTiYDg7Oj=_Htm8P7F<KMQ5|bQ%*1zdk#8iW}5=_Q4lLpgFi)|<E`|aK0N}5!J
zZ<t~E_I;oCd7tn3zHhPFgMUh90e+<hptJyS01#Q5`g-mu#ucH>emFq+*)+8#@qVUp
zrN}vGTOiuT#AfTP8iOk;JI^FU?MzPX_K0j^a+N9alqPQF!RCYf$sr1HTn5C#-_id6
z;CLYDs~sW_Dnq9BJN*4JbO97_096>1ouY~2%If?uO%T+RrU@rC@uopKIX9lkv~1_h
zT^dayo2uQ&zucu^65XfGv?l_e?2u>P&K;x^&&7(AnoZ@)_{cmPTWWu4KmYUV{Kh5+
zUF|4pa{kQgEYUf<UdM{{%y}bad&2Kno0DJpt+UwVSkmoSp5rWi&$*^=wu*nwpZ`1~
z)S86)JfWdfc*QSl-7hqD3)QCliuFu0|E%BnjLGTSzcfM(Vm6bL(KK)^xrL>uh%&2{
zOScC*ZHY9?Mz!#@qI<K7uCrlGhR&8tL9-U4A{wll1R=e<gQ9X1by{r{C5jkW@AuoR
z%a$$E(WphUb}|l!8`A4??Xi<J+xQ{3AL>)Ut?S^LHB6)f-1sUtR;S&jg#v%QKLzr1
z$-Wj19p}_&((8*mY>EHo?mKfWq1GMo*jDx-1Z_%&eH~gFM;UD44ODu)nCFcBOVqaJ
zLO1haUG}ivpzA!$am~6Kv;y=DJ*dTz?pU77gWd9N6wVpWbQE$pg6^n~>O|PQa#)YE
z=BBR0JPwg8IoJa^031FT0tayde}X$pV*oTbwb`^9fX4^3sXzsQt%}K~?*l;F0qe4I
zz>pbNBqK^OH$~~qW|L7%VuL&Pt7snI7y_Gk3C|vw0wPZMgw6;c$zYFTHKe9bpd;j%
z*)pHwG9MEwBd-KT#5A+%mdtRC%&<d_@yRi(WzJiZWX_$)Ln@admy1KKIfOI>UX&?~
z%&ZDi8X>ulqDU<3`aL#|4@f&YoE3-(BY!i3Y?YDUk{c-<X(TBu(REyrkks$UjciqM
zAz`-~f#R?V>$)bFtCDpKSb*k%&|V{x9HBkZh)k(Yru`%prrnA5U@+YtSSph|ho_0O
zYW~Txk{f^#G01Gnm6deOkd<`HP*o#P(P8i^JVPY#xOp=8by<P^G;j!M2rQHdjNtr8
z784@^o=s@T7k~-kgV}T$RYEbu<Vg<zWyUGi^-s`<_}~*hjpW1KJq9#~Q-**@YXLa~
zcbarE)IJ$1Q7uDF2Xdq$)O;!owK6IMRYG8}Q;27%0ou?Akec90=&Y6MY}T)O!?Ez(
zl08!`me+9akWs?j884189!z7zA7ebEjS>GY;z{e0KFv?T(|Wjeqch7i)32g%oq%Yh
zUvq%S)K6q8f2zBC-W+^^ql5z(l{++!T<(i|uWKGhu5TQ<730Wl9!KtxapVq-BNrmt
zk3^r29^mi68``nt#-hJuPwtQQWvJkkFJV(lLND38IHbcj<V}6WNwnwF#GZ@GSD-zQ
zZ-|Zm+Z7TO^62-m&P*WI`3c18oj|Nh6NvT61Y%vAK&(&i8Ee$J#Yf(|uiazGRo+G7
zyi*N*4HX=@bnoUaf=9*{FWyD`B&;3JbSz`wAe~j3g{M9Mel?8nA<tx#aH0QMFm}M!
z@u~xYGp={8s>_03PioipWew4#<i@zYFL6iO$-TO`ya;z>TP4|%ALC?w81Q>|@Zbm@
zHxYRJ`0+y8P@mC;w*d-+>gB--PSz%TCraIrUP68%+IX_W->|Srz5G%GNdN%yogcbK
z+sHjiWSK(Z$tdAQ=S~Ju%Dtp!Bk3{bYGF$#S1SGg>Lz0ScqTW4d^_RNLARNPSEAJm
zehVk(0zY+QZNjG91ZVsUlwr}0hUOOs61a|JBObiO!=Zl2!7F_iOpy-WW8~ny6yaQW
zZdZQ-HaQssag=ZX_v7y$sAU76RI4_jx+JG1LHvV%g$AqUrWX&Z(<?NQLZv2>CVJgf
z&9y7hZ|^rCksFvIjS>z&`|#-p>dyx;Twl?y51IIpiQ!B_4HaeVq~;q>QURlM`Ak8q
zv2t;uC>Qz-C<@6AX+m`W0ZwzFIjq0gsJXq4xAj)~Cv?C4&vmpxP}Y{}A{Xa4u;#<H
z+d3hwAH{kezx(mu5_m`>8oLLKup0l?;om7L<`>3q%GbA_c=W*g9~M7%>#Og6dHHMe
zOVhe8481pV?}p27WgUL)i+8^M<MNvKKUzKKk!QLFhmOwN`@*Psj6XZ{?#!P+B?lm&
zc|Wi7@{r*<l+;~7b@^N;l2@JeSRU0{YV6h>#g@lgg_^R|xocwtHQQyo+Lk%$ty_Bg
zkf3I|Y!}-XDc>SmK}|#IvGParTiH2P{Rdhy7xWDC-NP{hIW=howQ0BXul2Q0;`D>w
z#_Il$YrSl1hWAhr<-wxvW95Zcj(WFG$=m*C?c;-mz5bcJeOSNZO55wo5|-#AA$pHF
zTiN4JzImpAqPVMVmJ&q^nO$EM&aUd8R_8rOXY`gQsXZ?55utT${=&3Hf2VW&+xpr&
zl$aESn1uGMPZR`rb4F9~z^U>*4Q(0vK~?PF9RG#k-FfQ9>xI^;{*wVkYidTkPAYy}
z?>*~#TR2oSS&@Rx?kK*gStl$=Tl5b)hpFn1sPJxIk{P#-Q<g(1q;f@Dl)3iIvx9~6
zJYR0E^v3Cz#jTsJOgWd5*;IUa%d18DV+w{jm!r>XYkE>!*E$xovZ0F4G%MKlSTCkX
z3$d5ezbiy<=zn9>TbuMYCUJMpz_TS*p~t$k{>Jg?%asb-zbc&9S+qb&RSQkU7w2~F
zdEJw#`VeATrarMxSu`UUf-U1v!>LQZoVMPJ3dgNmqFnT|6=czO@3_)-({M4nPqWUV
z>=#Wd^_vvCB(welERoW>cD6*XuRT|30M`Y{02}YLUx)41?j1Ldf57RrfL(8_$5ohh
zsys>Mp}L4Y^g>!e2O7Ry=?#}RY-!j0v_->co+sbDQ|+rn?J_h^oRVtLyev?B6P*CW
z+X(*C%L;2@T`7e@@)vOv%~F!jRQ<JZQ)T~wT}t(=O9u<Bo-ZRxy}jPl6AvpH67rIJ
z7hF=5IP3ct*=v<Xy*;zBJ88!qE5)LR(Nv#TD@vp-Y9%gJ<r7LbH;%Fd8s^n2ZPS%?
z$)1(!KA>zIZ7emuXOhR#_#QT%n%{#<e{*WX9kq@Z5A>85X+5r8hj0-WE1CC}O3WRc
z-pry;oIQ%fR+cOQ$=&n20!!b~c_$7TaW!Z3?9nN<z{TuK@)k%}a>M-+AUnFhUa5we
VjsC1~_P)Vs)!yFf)MMex;=j`vd5Zu5

literal 0
HcmV?d00001

diff --git a/final/test/Bitcode/null-type.ll b/final/test/Bitcode/null-type.ll
new file mode 100644
index 00000000000..5d3dfab5753
--- /dev/null
+++ b/final/test/Bitcode/null-type.ll
@@ -0,0 +1,2 @@
+; RUN: not llvm-dis < %s.bc > /dev/null |& grep "Invalid MODULE_CODE_FUNCTION record"
+; PR8494
diff --git a/final/test/Bitcode/null-type.ll.bc b/final/test/Bitcode/null-type.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..f0a97d94ce9030ad9fb7578408be5d80ebce18d1
GIT binary patch
literal 312
bcmZ>AK5$P_fB_5`Ss-+?0)!qVafbi^k~jkW

literal 0
HcmV?d00001

diff --git a/final/test/Bitcode/sse2_loadl_pd.ll b/final/test/Bitcode/sse2_loadl_pd.ll
new file mode 100644
index 00000000000..b0bea16a338
--- /dev/null
+++ b/final/test/Bitcode/sse2_loadl_pd.ll
@@ -0,0 +1,2 @@
+; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.loadl.pd}
+; RUN: llvm-dis < %s.bc | grep shufflevector
diff --git a/final/test/Bitcode/sse2_loadl_pd.ll.bc b/final/test/Bitcode/sse2_loadl_pd.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..402cbe192ddab974eb8d615a0ccfab4e8fe25ffb
GIT binary patch
literal 532
zcmZ>AK5$Qwhk>Dqfq{WhfPn#s7}y(?Cpw;B@njP)vYf=&!lTN{At}ewozx)U%H%YO
z+o?ssNW_9e!jsF$Ma6?_f}*lZQA3Y_VjEW%R|=0j1B0go0|PVA9Bvi{9x)CE2{s@*
zP=SGg7f2f^ad0Z6HnlLQxp=U0s<|9vb#hYUn32fhs35^KLz#iuNyzJxCDRffhLDcl
z34%Z!3=G0cN@g(IY-l#)V6SLkFVko@Q)n-k&|dAqUVfsP_j>?eE(3p+0ejH`_F@Hg
z5aR;dO9A$(1`x`fz*n(=9fZJwWeeB~6WFT+*vo)iG%X42B@h#Uj0<efP-UN?%6<cL
z%N*FNz>2>KuonTXLn3np_zD)V0})V7ZUSEg1B$HkBxZ&NpwALKk{B2jfQ*(#AR!LK
z!YqzDpg3RzViQ#c24h}VMjkf_7RCz!XBNn=7HD|jz^2@?gh5Y+gMkaECM_Yy-hgK!
zFUu@Jmf4&v6J-U2Ki_E!@HsR2^)v^cGn?fd98H2-K&CM;6e<AY7D%%>__UZrHf+@q
WeH_2%owrcp?`~$E?*G740096j(1s2G

literal 0
HcmV?d00001

diff --git a/final/test/Bitcode/sse2_movl_dq.ll b/final/test/Bitcode/sse2_movl_dq.ll
new file mode 100644
index 00000000000..093d8213ed4
--- /dev/null
+++ b/final/test/Bitcode/sse2_movl_dq.ll
@@ -0,0 +1,2 @@
+; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.movl.dq}
+; RUN: llvm-dis < %s.bc | grep shufflevector
diff --git a/final/test/Bitcode/sse2_movl_dq.ll.bc b/final/test/Bitcode/sse2_movl_dq.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..74d1826d2e1cecdecf9dc7ff28ec1523328e9d7e
GIT binary patch
literal 480
zcmZ>AK5$Qwhk>D#fq{WhfPn#s7}y(?Cpw;B@njP)vYf=&!lTN{At}ewozx)U%H%YO
z+o?ssNW_9e!jsF$Ma6?_f}*lZQA3Y_VjEW%R|=0j1B0go0|OJ#9Bvkd9w5aKsKCGg
zw1UA%iGx!iwW)<c&BcS2Q_bZVtCN!&$D~A-CJ8qIE~8zROa?QQ8CsPXL==-)fa-;p
zlx$$O+0bmp!Cuk8UZ&A*rqEt6p}pFJz5GNo@Am+{Tn7Fs1NNc??8OS~AjSo@mjdin
z4Iq>|fv;i#I|zXV%NDQ~Ca_ltu$KY3Xj&53OCTly85h`|p~^l(mHh_hmN~Fjffauf
zU@rn%heYNI@D(gz2O^-F+yuS~1{7K6NsK^$F)$=}D1k$pp`{VX;{{@2mPXDE$$}07
zJZ?vl1o>FlfnsU`3}y~KXC_NC8XETJu^u^M^P5q`#n=@XykJnMz`(!;WU(puY-bHQ
Y9c1|L=<9tdg4@npTz$k3ibx;;03j%N;Q#;t

literal 0
HcmV?d00001

diff --git a/final/test/Bitcode/sse2_movs_d.ll b/final/test/Bitcode/sse2_movs_d.ll
new file mode 100644
index 00000000000..25a35b6455c
--- /dev/null
+++ b/final/test/Bitcode/sse2_movs_d.ll
@@ -0,0 +1,2 @@
+; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.movs.d}
+; RUN: llvm-dis < %s.bc | grep shufflevector
diff --git a/final/test/Bitcode/sse2_movs_d.ll.bc b/final/test/Bitcode/sse2_movs_d.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..719d5294e16defd3b395c4c2eec4dd8b9e1fa3d7
GIT binary patch
literal 476
zcmZ>AK5$Qwhk>Dlfq{WhfPn#s7}y(?Cpw;B@njP)vYf=&!lTN{At}ewozx)U%H%YO
z+o?ssNW_9e!jsF$Ma6?_f}*lZQA3Y_VjEW%R|=0j1B0go0|PVA9Bvi{9x)CE2}U40
zPyxsX(nd-goC>K;EevWd9;}>dF2`7%oYXibB(f+<FexlptkuGKfWtu&s1OK*mz2z4
zw%O2Z#=&0Ez+R@&Zl=&)FrmHLgT4GjGw=5RzFY?WDg*YS1?<HN>>$PkwwD6zRSh7N
zJAtoa0XqnR1<Mw&7bdV*39y#|xoBDv*h?TL02vq9o}tP<LzVpo=9W3ISAi9O6JRd_
zT8BjD3h)&yU<V?gn%o4w3I-HeXCWqr2B6OpJd79^6@ZMEMj!zUeg<KdgAuI)EX;-y
ziXyB94;|QmVrBvvZUQ-K0xn?=K4%uoJ2;vIw}8SD1PT=x7(i;+9DG_#A{n;oh(3<r
S^Uhl+@pn(K=Rc4N1_l6G?{&NY

literal 0
HcmV?d00001

diff --git a/final/test/Bitcode/sse2_punpck_qdq.ll b/final/test/Bitcode/sse2_punpck_qdq.ll
new file mode 100644
index 00000000000..b9d711c1084
--- /dev/null
+++ b/final/test/Bitcode/sse2_punpck_qdq.ll
@@ -0,0 +1,3 @@
+; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.punpckh.qdq}
+; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.punpckl.qdq}
+; RUN: llvm-dis < %s.bc | grep shufflevector
diff --git a/final/test/Bitcode/sse2_punpck_qdq.ll.bc b/final/test/Bitcode/sse2_punpck_qdq.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..7c1b7ed0ca6b7bda4c1d1726188dda0248581cab
GIT binary patch
literal 576
zcmZ>AK5$Qwhk>D&fq{WhfPn#s7}y(?Cpw;B@njP)vYf=&!lTN{At}ewozx)U%H%YO
z+o?ssNW_9e!jsF$Ma6?_f}*lZQA3Y_VjEW%R|=0j1B0go0|PVA9Bvi{9x)CE2}U40
zPyxsX(nd-goC>K;EevWd9;}>dF2`7%oYXibB(f;ZP-b9dvRdUdgQ16E3P=ls@RE`l
z%r+aE%{bUA8raJ;+RYT&3nsKzd$5<EXy*MMz?aLwUuD2vw1B->fgQxS!1hvry{Z9(
zawqUrEMNyAuwdB&_QC}ADgpK~AQw$b0(%L>1R&!A+cQ+zXQ;B@z}zwi_A0RAZvyN^
zK<kjmTmim<1?)fsRFj*)SHXZH>ny~?&;ayVf`=0%sChiu6c|_-7#b8$GO&PT1lX7u
zfYHGq%yKZIRe-0}P{L7$wcw!x8&E7vAR|p6$4$Vc%)#f(VtEHgli(I`Kmp|v6&R58
zaS*Mq5U7tAs9$Y@uJ_R&;_H7;a=G!(*|dLwj-82v8M>ehP>@Z*XDchmWRNBx007a-
Bkc$8S

literal 0
HcmV?d00001

diff --git a/final/test/Bitcode/sse2_shuf_pd.ll b/final/test/Bitcode/sse2_shuf_pd.ll
new file mode 100644
index 00000000000..5829edbc256
--- /dev/null
+++ b/final/test/Bitcode/sse2_shuf_pd.ll
@@ -0,0 +1,2 @@
+; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.shuf.pd}
+; RUN: llvm-dis < %s.bc | grep shufflevector
diff --git a/final/test/Bitcode/sse2_shuf_pd.ll.bc b/final/test/Bitcode/sse2_shuf_pd.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..832c39e103f37cae2189ba44ac5154bb8dab056f
GIT binary patch
literal 584
zcmZ>AK5$Qwhk>D=fq{WhfPn#s7}y(?Cpw;B@njP)vYf=&!lTN{At}ewozx)U%H%YO
z+o?ssNW_9e!jsF$Ma6?_f}*lZQA3Y_VjEW%R|=0j1B0go0|PVA9Bvi{9x)CE2}U40
zP=SE~Xbppr5(lS3YEuh?nu`Z3r<%(#RwpMlj!B6uiV{o;Gn5&aoi;hmVd!BHVPaqd
zDrXR0QZj?tW<#?X2YW>WdznVNnL>NPg!XC=_VN?Wyx#-(avAum4A_enuoo+^gBTas
zUJ9^RHGoj=1ip#|>>vadEL*@{n802oz+MLAqG?HBFM*f<WL#i-hAR6ERrVX0Tjs!C
z1y=k`fV~K49TJ%<z*n$<9f*KxaufI}7*J%LCowTJ0DYF=5d;Zt9#0Mh1{MZ}1_NmL
z3$r-tfZ_oZZYHS=49SvC3IeT$5{5FY1rHq<zyaeXkP#-3V<zBI=HPQCS>D0XB)A2n
z9t08<AO@qD<4U?Yg+OySfaa+M=z1UhA-?|iB$pfeN&;H{^RNZ@w3u8K1Ccj@3V;9r
DQG<s%

literal 0
HcmV?d00001

diff --git a/final/test/Bitcode/sse2_unpck_pd.ll b/final/test/Bitcode/sse2_unpck_pd.ll
new file mode 100644
index 00000000000..f4e5d540684
--- /dev/null
+++ b/final/test/Bitcode/sse2_unpck_pd.ll
@@ -0,0 +1,3 @@
+; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.unpckh.pd}
+; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.unpckl.pd}
+; RUN: llvm-dis < %s.bc | grep shufflevector
diff --git a/final/test/Bitcode/sse2_unpck_pd.ll.bc b/final/test/Bitcode/sse2_unpck_pd.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..4fb829cbf712b9e654820f87701d9e57f2960126
GIT binary patch
literal 572
zcmZ>AK5$Qwhk>Dofq{WhfPn#s7}y(?Cpw;B@njP)vYf=&!lTN{At}ewozx)U%H%YO
z+o?ssNW_9e!jsF$Ma6?_f}*lZQA3Y_VjEW%R|=0j1B0go0|PVA9Bvi{9x)CE2}U40
zPyxsX(nd-goC>K;EevWd9;}>dF2`7%oYXibB(f+<FexlptkuGKfWtu&s1OK*mz2z4
zw%O2Z#=&0Ez+R@&Zl=&)FrmHLgT4GjGw=5RzFY?WDg*YS1?<HN>>$PkwwD6zRSh7N
zJAtoa0XqnR1<Mw&7bdV*39y#|xoBDv*h?TL02vq9o}tP<LzVpo=9W3ISAi9O6JRd_
zT8BjD3h)&yU<V?gn%o4w3I-HeXCWqr2B6OpJe(jw&Ev_Yz`(-5(4csdfdwQZz{bP?
zj1C52mV*(k0z9pT5{@#g1rHtAfMQ_+8EFDJZUQc44nAiV%R4xl1h;^~76cL%7?AXF
z5UsBes!y#z*Zb%X@%6tax!kb3YSXmo$N2>2>zhJ)>Obz%_iT9^d{$Yb{XY+zg3ne~
Lj>)`08yOe?K3S0C

literal 0
HcmV?d00001

diff --git a/final/test/Bitcode/sse41_pmulld.ll b/final/test/Bitcode/sse41_pmulld.ll
new file mode 100644
index 00000000000..6872cc0b712
--- /dev/null
+++ b/final/test/Bitcode/sse41_pmulld.ll
@@ -0,0 +1,2 @@
+; RUN: llvm-dis < %s.bc | not grep {i32 @llvm\\.pmulld}
+; RUN: llvm-dis < %s.bc | grep mul
diff --git a/final/test/Bitcode/sse41_pmulld.ll.bc b/final/test/Bitcode/sse41_pmulld.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..bd66f0a05ca62ce3e05462d1beb5ad790ae9c48f
GIT binary patch
literal 560
zcmcb2K%AQa2t<Hbf{B5Foq>VT$@#!NMIHu*W}q0O00V;%kY;aGp6Gaj#gk3I$Z`^6
z3y&%*hol@!cT$6ZE0fbCZl@LjBM}P@2~RE~7Zne#35v=`f*N=v6^m5ZPjCn(tyr+a
z;n)X8pgEor3=FJ5n!Dvc5Xc{ph*J0iVllWeFa#<vFz^E9jFdPy6;hj87}Q)mSUJ^P
zj&TSpJ!Vo=6lOVTz~soI%)u&oSB6pIj9`P4B*Ow8K|`Rsh4pF<FkAm%wO-I{#=&0E
zz+R@&Zl=&)FrmHLgT1_?*`k2i;>H1?rx(~hEMPBlV6RGGF99=hC-4<6V6On83v4eN
z*sH(@B$&WnCBR;`fE~yHa*Gw%s|?tS7O(>uKrV_238=COXtMmMiunr_K-MA13v3S=
z*sFjD=%fp5pCJs0+Y<OHfLsCgq6R3L`+yImHv#GxpvlgY7#SLXftKK*#K5QkWVAE_
z2?kK43bP#K-JpJ;FO<VEKp_3dGeJHU1{R>WngD~D01JD7z*&(*kXjHZRA69W1JY~`
eJ}oAZ4qJ6ZAII-`=PlHD{omn$(?%dAU;qHnxst5_

literal 0
HcmV?d00001

diff --git a/final/test/Bitcode/ssse3_palignr.ll b/final/test/Bitcode/ssse3_palignr.ll
new file mode 100644
index 00000000000..d596dd5eb36
--- /dev/null
+++ b/final/test/Bitcode/ssse3_palignr.ll
@@ -0,0 +1 @@
+; RUN: llvm-dis < %s.bc | not grep {@llvm\\.palign}
diff --git a/final/test/Bitcode/ssse3_palignr.ll.bc b/final/test/Bitcode/ssse3_palignr.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..3fc9cdf15a358a1628f390844fec183b55fb3f8a
GIT binary patch
literal 1504
zcmcIkZ)jUp6hAL{$$d@J?5$1P6sx|hu@tP5#u5{<V*Zn44ztk@>im#b+d5&<v~FFk
z17Et-#;lbr{@YL_YtnR}ps2L!r{HQA0=1nonIK~uiYNm+3S*!+&wWj4W54R*zI)F-
z=ic8r_uSuoSBuTHfTG8*qyiuSjN4PZYSaR}$l!zrs7D`(x1DyC*r1xX*fUH*`+$mT
zxsN^7C+U3(cc#{z=Pi00X9#L6UQ<AmHnuGul(a3zVG}6@cK4mno_8*-Ashq^02l^p
z6L(SWFB+Z}{-PTA0S*WNP3W`i3DgJ!;|XZ@1ynWdz9p6WU_1AbU12u(1xaL?7C)E9
z!QhuZ5ZRy4a%b9L!iWx52)@~I*{<j}FbY4Ed3T3rPhR`$`xcIdhK7ev6qSxUDo6ID
zl_R4P8R?j`3X`$)<SPMkJbBU<Q`)W+>wj5QuVu-IlZ-{kQCW$mbwgP)ifvW>n?%Os
zMvW0N#*>jOK?SET31rMnhO-0}oK_)3s=bkad3RMW-Zdo9a9jPV`Z|y?Y?#ujdVNFL
z$StB9#VJpQrAi;YrlWf!m1GdvlTm?$1&9Q`2EYpiExiT>fMl#uf)fp9lL)|}@|p?h
zfYmCd`xy|>V90a=u&8)4YL0p$fg8XGXh_7dYsIgDb#+oyN+h<NbB6MPay}fIeX=|o
z2^9{P3$f7LDEBSXHNEaU-F@KgW8&F)1cHTRkMHKOft3dxrL{P7Toi6E3P;{>*(es}
zNP&lWo+V*^SuZvX?=6lS_R~Z_6aenQaWgJ*7UP~1LNguZ8FMIS^Fv@cZkD6Tai)3(
zZay{mhs9Bn1};DU>_y)C`xl@8IH5!*J@fWBKg7L@z0AJodrP;Vwy%3Px@8ds06N)>
ziNyof4~M64T%h;{_l4HCthpGjQ=@G=W>W=IiFPWYP@O-uBNo@lPTR+EvyA6@ZpH5C
z3*+kxK01A`Cwu0@?+T4N7N2Dm_Fx735Zl%DM6`b5T6np?;i9rLdCBXKZ<TCEOu1&7
z8B=PeQ<LjX<@WwhHLJsXH)}@k!i4W!zV|M1y@n`cW^Jz(E|EnPHki77FRM{(>`l5D
zc>Tgp4c2qC+&j2(JGOOjinD{Gn!Goae_S)$o0@$_Q|M0>j%enNrshukha7#m@}GNz
zlUf(Aw+`x=o}|Z?kSQ>PIW=S6{s^VmmvshY_Vf!1^(TV=vqKVtcenOV_@RB@+-d&!
z&y4rV-)+8Ak2y!=#q$hZyg$l)|7q{ktGB*dF~55^aOs9;Q1&h~Qg59Ou{Y%(AJvP@

literal 0
HcmV?d00001

diff --git a/final/test/BugPoint/crash-narrowfunctiontest.ll b/final/test/BugPoint/crash-narrowfunctiontest.ll
new file mode 100644
index 00000000000..d080d9dd4b0
--- /dev/null
+++ b/final/test/BugPoint/crash-narrowfunctiontest.ll
@@ -0,0 +1,13 @@
+; Test that bugpoint can narrow down the testcase to the important function
+;
+; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null
+; REQUIRES: loadable_module
+
+define i32 @foo() { ret i32 1 }
+
+define i32 @test() {
+	call i32 @test()
+	ret i32 %1
+}
+
+define i32 @bar() { ret i32 2 }
diff --git a/final/test/BugPoint/dg.exp b/final/test/BugPoint/dg.exp
new file mode 100644
index 00000000000..de42dad163f
--- /dev/null
+++ b/final/test/BugPoint/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/final/test/BugPoint/metadata.ll b/final/test/BugPoint/metadata.ll
new file mode 100644
index 00000000000..0eda5667ba4
--- /dev/null
+++ b/final/test/BugPoint/metadata.ll
@@ -0,0 +1,35 @@
+; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes > /dev/null
+; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
+; REQUIRES: loadable_module
+
+; Bugpoint should keep the call's metadata attached to the call.
+
+; CHECK: call void @foo(), !dbg !0, !attach !2
+; CHECK: !0 = metadata !{i32 104, i32 105, metadata !1, metadata !1}
+; CHECK: !1 = metadata !{i32 0, i32 0, i32 0, metadata !"source.c", metadata !"/dir", metadata !"me", i1 true, i1 false, metadata !"", i32 0}
+; CHECK: !2 = metadata !{metadata !"the call to foo"}
+
+%rust_task = type {}
+define void @test(i32* %a, i8* %b) {
+    %s = mul i8 22, 9, !attach !0, !dbg !10
+    store i8 %s, i8* %b, !attach !1, !dbg !11
+    call void @foo(), !attach !2, !dbg !12
+    store i32 7, i32* %a, !attach !3, !dbg !13
+    %t = add i32 0, 5, !attach !4, !dbg !14
+    ret void
+}
+
+declare void @foo()
+
+!0 = metadata !{metadata !"boring"}
+!1 = metadata !{metadata !"uninteresting"}
+!2 = metadata !{metadata !"the call to foo"}
+!3 = metadata !{metadata !"noise"}
+!4 = metadata !{metadata !"filler"}
+
+!9 = metadata !{i32 0, i32 0, i32 0, metadata !"source.c", metadata !"/dir", metadata !"me", i1 true, i1 false, metadata !"", i32 0}
+!10 = metadata !{i32 100, i32 101, metadata !9, metadata !9}
+!11 = metadata !{i32 102, i32 103, metadata !9, metadata !9}
+!12 = metadata !{i32 104, i32 105, metadata !9, metadata !9}
+!13 = metadata !{i32 106, i32 107, metadata !9, metadata !9}
+!14 = metadata !{i32 108, i32 109, metadata !9, metadata !9}
diff --git a/final/test/BugPoint/remove_arguments_test.ll b/final/test/BugPoint/remove_arguments_test.ll
new file mode 100644
index 00000000000..29a03b83107
--- /dev/null
+++ b/final/test/BugPoint/remove_arguments_test.ll
@@ -0,0 +1,18 @@
+; RUN: bugpoint -load %llvmshlibdir/BugpointPasses%shlibext %s -output-prefix %t -bugpoint-crashcalls -silence-passes
+; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s
+; REQUIRES: loadable_module
+
+; Test to make sure that arguments are removed from the function if they are 
+; unnecessary. And clean up any types that that frees up too.
+
+; CHECK: target triple
+; CHECK-NOT: struct.anon
+%struct.anon = type { i32 }
+
+declare i32 @test2()
+
+; CHECK: define void @test() {
+define i32 @test(i32 %A, %struct.anon* %B, float %C) {
+	call i32 @test2()
+	ret i32 %1
+}
diff --git a/final/test/CMakeLists.txt b/final/test/CMakeLists.txt
new file mode 100644
index 00000000000..b696682c13f
--- /dev/null
+++ b/final/test/CMakeLists.txt
@@ -0,0 +1,97 @@
+foreach(c ${LLVM_TARGETS_TO_BUILD})
+  set(TARGETS_BUILT "${TARGETS_BUILT} ${c}")
+endforeach(c)
+set(TARGETS_TO_BUILD ${TARGETS_BUILT})
+
+# FIXME: This won't work for project files, we need to use a --param.
+set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib/${CMAKE_CFG_INTDIR}")
+set(SHLIBEXT "${LTDL_SHLIB_EXT}")
+
+set(SHLIBDIR "${LLVM_BINARY_DIR}/lib/${CMAKE_CFG_INTDIR}")
+
+if(BUILD_SHARED_LIBS)
+  set(LLVM_SHARED_LIBS_ENABLED "1")
+else()
+  set(LLVM_SHARED_LIBS_ENABLED "0")
+endif(BUILD_SHARED_LIBS)
+
+if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+  set(SHLIBPATH_VAR "DYLD_LIBRARY_PATH")
+else() # Default for all other unix like systems.
+  # CMake hardcodes the library locaction using rpath.
+  # Therefore LD_LIBRARY_PATH is not required to run binaries in the
+  # build dir. We pass it anyways.
+  set(SHLIBPATH_VAR "LD_LIBRARY_PATH")
+endif()
+
+include(FindPythonInterp)
+if(PYTHONINTERP_FOUND)
+  set(LIT_ARGS "${LLVM_LIT_ARGS}")
+  separate_arguments(LIT_ARGS)
+
+  get_directory_property(DEFINITIONS COMPILE_DEFINITIONS)
+  foreach(DEF ${DEFINITIONS})
+    set(DEFS "${DEFS} -D${DEF}")
+  endforeach()
+  get_directory_property(INC_DIRS INCLUDE_DIRECTORIES)
+  foreach(INC_DIR ${INC_DIRS})
+    set(IDIRS "${IDIRS} -I${INC_DIR}")
+  endforeach()
+  string(REPLACE "<CMAKE_CXX_COMPILER>" "${CMAKE_CXX_COMPILER}" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT})
+  string(REPLACE "<DEFINES>"            "${DEFS}"               TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
+  string(REPLACE "<FLAGS>"              "${CMAKE_CXX_FLAGS}"    TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
+  string(REPLACE "-o"                   ""                      TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
+  string(REGEX REPLACE "<[^>]+>"        ""                      TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD})
+  set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} ${IDIRS}")
+  if(NOT MSVC)
+    set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} -x c++")
+  endif()
+  configure_file(
+    ${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in
+    ${CMAKE_CURRENT_BINARY_DIR}/site.exp)
+
+  MAKE_DIRECTORY(${CMAKE_CURRENT_BINARY_DIR}/Unit)
+
+  # Configuration-time: See Unit/lit.site.cfg.in
+  set(LLVM_BUILD_MODE "%(build_mode)s")
+
+  set(LLVM_SOURCE_DIR ${LLVM_MAIN_SRC_DIR})
+  set(LLVM_BINARY_DIR ${LLVM_BINARY_DIR})
+  set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s")
+  set(LLVMGCCDIR "")
+  set(PYTHON_EXECUTABLE ${PYTHON_EXECUTABLE})
+  set(ENABLE_SHARED ${LLVM_SHARED_LIBS_ENABLED})
+  set(SHLIBPATH_VAR ${SHLIBPATH_VAR})
+
+  configure_file(
+    ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+    @ONLY)
+  configure_file(
+    ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.in
+    ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
+    @ONLY)
+
+  add_custom_target(check
+    COMMAND ${PYTHON_EXECUTABLE}
+                ${LLVM_SOURCE_DIR}/utils/lit/lit.py
+                --param llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+                --param llvm_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
+                --param build_config=${CMAKE_CFG_INTDIR}
+                --param build_mode=${RUNTIME_BUILD_MODE}
+                ${LIT_ARGS}
+                ${CMAKE_CURRENT_BINARY_DIR}
+                COMMENT "Running LLVM regression tests")
+  set_target_properties(check PROPERTIES FOLDER "Tests")
+
+  add_custom_target(check.deps)
+  add_dependencies(check check.deps)
+  add_dependencies(check.deps
+                UnitTests
+                BugpointPasses LLVMHello
+                llc lli llvm-ar llvm-as llvm-dis llvm-extract
+                llvm-ld llvm-link llvm-mc llvm-nm macho-dump opt
+                FileCheck count not)
+  set_target_properties(check.deps PROPERTIES FOLDER "Tests")
+
+endif()
diff --git a/final/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll b/final/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
new file mode 100644
index 00000000000..a0235f78706
--- /dev/null
+++ b/final/test/CodeGen/ARM/2006-11-10-CycleInDAG.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+
+%struct.layer_data = type { i32, [2048 x i8], i8*, [16 x i8], i32, i8*, i32, i32, [64 x i32], [64 x i32], [64 x i32], [64 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [12 x [64 x i16]] }
+@ld = external global %struct.layer_data*               ; <%struct.layer_data**> [#uses=1]
+
+define void @main() {
+entry:
+        br i1 false, label %bb169.i, label %cond_true11
+
+bb169.i:                ; preds = %entry
+        ret void
+
+cond_true11:            ; preds = %entry
+        %tmp.i32 = load %struct.layer_data** @ld                ; <%struct.layer_data*> [#uses=2]
+        %tmp3.i35 = getelementptr %struct.layer_data* %tmp.i32, i32 0, i32 1, i32 2048; <i8*> [#uses=2]
+        %tmp.i36 = getelementptr %struct.layer_data* %tmp.i32, i32 0, i32 2          ; <i8**> [#uses=1]
+        store i8* %tmp3.i35, i8** %tmp.i36
+        store i8* %tmp3.i35, i8** null
+        ret void
+}
diff --git a/final/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll b/final/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
new file mode 100644
index 00000000000..3694aaad554
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-01-19-InfiniteLoop.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s
+
+@quant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
+@dequant_coef = external global [6 x [4 x [4 x i32]]]		; <[6 x [4 x [4 x i32]]]*> [#uses=1]
+@A = external global [4 x [4 x i32]]		; <[4 x [4 x i32]]*> [#uses=1]
+
+; CHECK: dct_luma_sp:
+define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) {
+entry:
+; Make sure to use base-updating stores for saving callee-saved registers.
+; CHECK: push
+; CHECK-NOT: sub sp
+; CHECK: push 
+	%predicted_block = alloca [4 x [4 x i32]], align 4		; <[4 x [4 x i32]]*> [#uses=1]
+	br label %cond_next489
+
+cond_next489:		; preds = %cond_false, %bb471
+	%j.7.in = load i8* null		; <i8> [#uses=1]
+	%i.8.in = load i8* null		; <i8> [#uses=1]
+	%i.8 = zext i8 %i.8.in to i32		; <i32> [#uses=4]
+	%j.7 = zext i8 %j.7.in to i32		; <i32> [#uses=4]
+	%tmp495 = getelementptr [4 x [4 x i32]]* %predicted_block, i32 0, i32 %i.8, i32 %j.7		; <i32*> [#uses=2]
+	%tmp496 = load i32* %tmp495		; <i32> [#uses=2]
+	%tmp502 = load i32* null		; <i32> [#uses=1]
+	%tmp542 = getelementptr [6 x [4 x [4 x i32]]]* @quant_coef, i32 0, i32 0, i32 %i.8, i32 %j.7		; <i32*> [#uses=1]
+	%tmp543 = load i32* %tmp542		; <i32> [#uses=1]
+	%tmp548 = ashr i32 0, 0		; <i32> [#uses=3]
+	%tmp561 = sub i32 0, %tmp496		; <i32> [#uses=3]
+	%abscond563 = icmp sgt i32 %tmp561, -1		; <i1> [#uses=1]
+	%abs564 = select i1 %abscond563, i32 %tmp561, i32 0		; <i32> [#uses=1]
+	%tmp572 = mul i32 %abs564, %tmp543		; <i32> [#uses=1]
+	%tmp574 = add i32 %tmp572, 0		; <i32> [#uses=1]
+	%tmp576 = ashr i32 %tmp574, 0		; <i32> [#uses=7]
+	%tmp579 = icmp eq i32 %tmp548, %tmp576		; <i1> [#uses=1]
+	br i1 %tmp579, label %bb712, label %cond_next589
+
+cond_next589:		; preds = %cond_next489
+	%tmp605 = getelementptr [6 x [4 x [4 x i32]]]* @dequant_coef, i32 0, i32 0, i32 %i.8, i32 %j.7		; <i32*> [#uses=1]
+	%tmp606 = load i32* %tmp605		; <i32> [#uses=1]
+	%tmp612 = load i32* null		; <i32> [#uses=1]
+	%tmp629 = load i32* null		; <i32> [#uses=1]
+	%tmp629a = sitofp i32 %tmp629 to double		; <double> [#uses=1]
+	%tmp631 = fmul double %tmp629a, 0.000000e+00		; <double> [#uses=1]
+	%tmp632 = fadd double 0.000000e+00, %tmp631		; <double> [#uses=1]
+	%tmp642 = call fastcc i32 @sign( i32 %tmp576, i32 %tmp561 )		; <i32> [#uses=1]
+	%tmp650 = mul i32 %tmp606, %tmp642		; <i32> [#uses=1]
+	%tmp656 = mul i32 %tmp650, %tmp612		; <i32> [#uses=1]
+	%tmp658 = shl i32 %tmp656, 0		; <i32> [#uses=1]
+	%tmp659 = ashr i32 %tmp658, 6		; <i32> [#uses=1]
+	%tmp660 = sub i32 0, %tmp659		; <i32> [#uses=1]
+	%tmp666 = sub i32 %tmp660, %tmp496		; <i32> [#uses=1]
+	%tmp667 = sitofp i32 %tmp666 to double		; <double> [#uses=2]
+	call void @levrun_linfo_inter( i32 %tmp576, i32 0, i32* null, i32* null )
+	%tmp671 = fmul double %tmp667, %tmp667		; <double> [#uses=1]
+	%tmp675 = fadd double %tmp671, 0.000000e+00		; <double> [#uses=1]
+	%tmp678 = fcmp oeq double %tmp632, %tmp675		; <i1> [#uses=1]
+	br i1 %tmp678, label %cond_true679, label %cond_false693
+
+cond_true679:		; preds = %cond_next589
+	%abscond681 = icmp sgt i32 %tmp548, -1		; <i1> [#uses=1]
+	%abs682 = select i1 %abscond681, i32 %tmp548, i32 0		; <i32> [#uses=1]
+	%abscond684 = icmp sgt i32 %tmp576, -1		; <i1> [#uses=1]
+	%abs685 = select i1 %abscond684, i32 %tmp576, i32 0		; <i32> [#uses=1]
+	%tmp686 = icmp slt i32 %abs682, %abs685		; <i1> [#uses=1]
+	br i1 %tmp686, label %cond_next702, label %cond_false689
+
+cond_false689:		; preds = %cond_true679
+	%tmp739 = icmp eq i32 %tmp576, 0		; <i1> [#uses=1]
+	br i1 %tmp579, label %bb737, label %cond_false708
+
+cond_false693:		; preds = %cond_next589
+	ret i32 0
+
+cond_next702:		; preds = %cond_true679
+	ret i32 0
+
+cond_false708:		; preds = %cond_false689
+	ret i32 0
+
+bb712:		; preds = %cond_next489
+	ret i32 0
+
+bb737:		; preds = %cond_false689
+	br i1 %tmp739, label %cond_next791, label %cond_true740
+
+cond_true740:		; preds = %bb737
+	%tmp761 = call fastcc i32 @sign( i32 %tmp576, i32 0 )		; <i32> [#uses=1]
+	%tmp780 = load i32* null		; <i32> [#uses=1]
+	%tmp785 = getelementptr [4 x [4 x i32]]* @A, i32 0, i32 %i.8, i32 %j.7		; <i32*> [#uses=1]
+	%tmp786 = load i32* %tmp785		; <i32> [#uses=1]
+	%tmp781 = mul i32 %tmp780, %tmp761		; <i32> [#uses=1]
+	%tmp787 = mul i32 %tmp781, %tmp786		; <i32> [#uses=1]
+	%tmp789 = shl i32 %tmp787, 0		; <i32> [#uses=1]
+	%tmp790 = ashr i32 %tmp789, 6		; <i32> [#uses=1]
+	br label %cond_next791
+
+cond_next791:		; preds = %cond_true740, %bb737
+	%ilev.1 = phi i32 [ %tmp790, %cond_true740 ], [ 0, %bb737 ]		; <i32> [#uses=1]
+	%tmp796 = load i32* %tmp495		; <i32> [#uses=1]
+	%tmp798 = add i32 %tmp796, %ilev.1		; <i32> [#uses=1]
+	%tmp812 = mul i32 0, %tmp502		; <i32> [#uses=0]
+	%tmp818 = call fastcc i32 @sign( i32 0, i32 %tmp798 )		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @sign(i32, i32)
+
+declare void @levrun_linfo_inter(i32, i32, i32*, i32*)
diff --git a/final/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll b/final/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
new file mode 100644
index 00000000000..83b26d34006
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-03-07-CombinerCrash.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
+
+define fastcc i8* @read_sleb128(i8* %p, i32* %val) {
+	br label %bb
+
+bb:		; preds = %bb, %0
+	%p_addr.0 = getelementptr i8* %p, i32 0		; <i8*> [#uses=1]
+	%tmp2 = load i8* %p_addr.0		; <i8> [#uses=2]
+	%tmp4.rec = add i32 0, 1		; <i32> [#uses=1]
+	%tmp4 = getelementptr i8* %p, i32 %tmp4.rec		; <i8*> [#uses=1]
+	%tmp56 = zext i8 %tmp2 to i32		; <i32> [#uses=1]
+	%tmp7 = and i32 %tmp56, 127		; <i32> [#uses=1]
+	%tmp9 = shl i32 %tmp7, 0		; <i32> [#uses=1]
+	%tmp11 = or i32 %tmp9, 0		; <i32> [#uses=1]
+	icmp slt i8 %tmp2, 0		; <i1>:1 [#uses=1]
+	br i1 %1, label %bb, label %cond_next28
+
+cond_next28:		; preds = %bb
+	store i32 %tmp11, i32* %val
+	ret i8* %tmp4
+}
diff --git a/final/test/CodeGen/ARM/2007-03-13-InstrSched.ll b/final/test/CodeGen/ARM/2007-03-13-InstrSched.ll
new file mode 100644
index 00000000000..33f935e960b
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-03-13-InstrSched.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
+; RUN:   -mattr=+v6 | grep r9
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic \
+; RUN:   -mattr=+v6 -arm-reserve-r9 -ifcvt-limit=0 -stats |& grep asm-printer
+; | grep 35
+
+define void @test(i32 %tmp56222, i32 %tmp36224, i32 %tmp46223, i32 %i.0196.0.ph, i32 %tmp8, i32* %tmp1011, i32** %tmp1, i32* %d2.1.out, i32* %d3.1.out, i32* %d0.1.out, i32* %d1.1.out) {
+newFuncRoot:
+	br label %bb74
+
+bb78.exitStub:		; preds = %bb74
+	store i32 %d2.1, i32* %d2.1.out
+	store i32 %d3.1, i32* %d3.1.out
+	store i32 %d0.1, i32* %d0.1.out
+	store i32 %d1.1, i32* %d1.1.out
+	ret void
+
+bb74:		; preds = %bb26, %newFuncRoot
+	%fp.1.rec = phi i32 [ 0, %newFuncRoot ], [ %tmp71.rec, %bb26 ]		; <i32> [#uses=3]
+	%fm.1.in = phi i32* [ %tmp71, %bb26 ], [ %tmp1011, %newFuncRoot ]		; <i32*> [#uses=1]
+	%d0.1 = phi i32 [ %tmp44, %bb26 ], [ 8192, %newFuncRoot ]		; <i32> [#uses=2]
+	%d1.1 = phi i32 [ %tmp54, %bb26 ], [ 8192, %newFuncRoot ]		; <i32> [#uses=2]
+	%d2.1 = phi i32 [ %tmp64, %bb26 ], [ 8192, %newFuncRoot ]		; <i32> [#uses=2]
+	%d3.1 = phi i32 [ %tmp69, %bb26 ], [ 8192, %newFuncRoot ]		; <i32> [#uses=2]
+	%fm.1 = load i32* %fm.1.in		; <i32> [#uses=4]
+	icmp eq i32 %fp.1.rec, %tmp8		; <i1>:0 [#uses=1]
+	br i1 %0, label %bb78.exitStub, label %bb26
+
+bb26:		; preds = %bb74
+	%tmp28 = getelementptr i32** %tmp1, i32 %fp.1.rec		; <i32**> [#uses=1]
+	%tmp30 = load i32** %tmp28		; <i32*> [#uses=4]
+	%tmp33 = getelementptr i32* %tmp30, i32 %i.0196.0.ph		; <i32*> [#uses=1]
+	%tmp34 = load i32* %tmp33		; <i32> [#uses=1]
+	%tmp38 = getelementptr i32* %tmp30, i32 %tmp36224		; <i32*> [#uses=1]
+	%tmp39 = load i32* %tmp38		; <i32> [#uses=1]
+	%tmp42 = mul i32 %tmp34, %fm.1		; <i32> [#uses=1]
+	%tmp44 = add i32 %tmp42, %d0.1		; <i32> [#uses=1]
+	%tmp48 = getelementptr i32* %tmp30, i32 %tmp46223		; <i32*> [#uses=1]
+	%tmp49 = load i32* %tmp48		; <i32> [#uses=1]
+	%tmp52 = mul i32 %tmp39, %fm.1		; <i32> [#uses=1]
+	%tmp54 = add i32 %tmp52, %d1.1		; <i32> [#uses=1]
+	%tmp58 = getelementptr i32* %tmp30, i32 %tmp56222		; <i32*> [#uses=1]
+	%tmp59 = load i32* %tmp58		; <i32> [#uses=1]
+	%tmp62 = mul i32 %tmp49, %fm.1		; <i32> [#uses=1]
+	%tmp64 = add i32 %tmp62, %d2.1		; <i32> [#uses=1]
+	%tmp67 = mul i32 %tmp59, %fm.1		; <i32> [#uses=1]
+	%tmp69 = add i32 %tmp67, %d3.1		; <i32> [#uses=1]
+	%tmp71.rec = add i32 %fp.1.rec, 1		; <i32> [#uses=2]
+	%tmp71 = getelementptr i32* %tmp1011, i32 %tmp71.rec		; <i32*> [#uses=1]
+	br label %bb74
+}
diff --git a/final/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll b/final/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
new file mode 100644
index 00000000000..b0953dc8b61
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-03-21-JoinIntervalsCrash.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; PR1257
+
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 }
+	%struct.arm_stack_offsets = type { i32, i32, i32, i32, i32 }
+	%struct.c_arg_info = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i8 }
+	%struct.c_language_function = type { %struct.stmt_tree_s }
+	%struct.c_switch = type opaque
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
+	%struct.ht_identifier = type { i8*, i32, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type { i8 }
+	%struct.language_function = type { %struct.c_language_function, %struct.tree_node*, %struct.tree_node*, %struct.c_switch*, %struct.c_arg_info*, i32, i32, i32, i32 }
+	%struct.location_t = type { i8*, i32 }
+	%struct.machine_function = type { %struct.rtx_def*, i32, i32, i32, %struct.arm_stack_offsets, i32, i32, i32, [14 x %struct.rtx_def*] }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.stmt_tree_s = type { %struct.tree_node*, i32 }
+	%struct.temp_slot = type opaque
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_identifier = type { %struct.tree_common, %struct.ht_identifier }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.u = type { [1 x i64] }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_head_tag = type opaque
+	%union.tree_ann_d = type opaque
+
+
+define void @declspecs_add_type(i32 %spec.1) {
+entry:
+	%spec.1961 = zext i32 %spec.1 to i64		; <i64> [#uses=1]
+	%spec.1961.adj = shl i64 %spec.1961, 32		; <i64> [#uses=1]
+	%spec.1961.adj.ins = or i64 %spec.1961.adj, 0		; <i64> [#uses=2]
+	%tmp10959 = lshr i64 %spec.1961.adj.ins, 32		; <i64> [#uses=2]
+	%tmp1920 = inttoptr i64 %tmp10959 to %struct.tree_common*		; <%struct.tree_common*> [#uses=1]
+	%tmp21 = getelementptr %struct.tree_common* %tmp1920, i32 0, i32 3		; <i8*> [#uses=1]
+	%tmp2122 = bitcast i8* %tmp21 to i32*		; <i32*> [#uses=1]
+	br i1 false, label %cond_next53, label %cond_true
+
+cond_true:		; preds = %entry
+	ret void
+
+cond_next53:		; preds = %entry
+	br i1 false, label %cond_true63, label %cond_next689
+
+cond_true63:		; preds = %cond_next53
+	ret void
+
+cond_next689:		; preds = %cond_next53
+	br i1 false, label %cond_false841, label %bb743
+
+bb743:		; preds = %cond_next689
+	ret void
+
+cond_false841:		; preds = %cond_next689
+	br i1 false, label %cond_true851, label %cond_true918
+
+cond_true851:		; preds = %cond_false841
+	tail call void @lookup_name( )
+	br i1 false, label %bb866, label %cond_next856
+
+cond_next856:		; preds = %cond_true851
+	ret void
+
+bb866:		; preds = %cond_true851
+	%tmp874 = load i32* %tmp2122		; <i32> [#uses=1]
+	%tmp876877 = trunc i32 %tmp874 to i8		; <i8> [#uses=1]
+	icmp eq i8 %tmp876877, 1		; <i1>:0 [#uses=1]
+	br i1 %0, label %cond_next881, label %cond_true878
+
+cond_true878:		; preds = %bb866
+	unreachable
+
+cond_next881:		; preds = %bb866
+	%tmp884885 = inttoptr i64 %tmp10959 to %struct.tree_identifier*		; <%struct.tree_identifier*> [#uses=1]
+	%tmp887 = getelementptr %struct.tree_identifier* %tmp884885, i32 0, i32 1, i32 0		; <i8**> [#uses=1]
+	%tmp888 = load i8** %tmp887		; <i8*> [#uses=1]
+	tail call void (i32, ...)* @error( i32 undef, i8* %tmp888 )
+	ret void
+
+cond_true918:		; preds = %cond_false841
+	%tmp920957 = trunc i64 %spec.1961.adj.ins to i32		; <i32> [#uses=0]
+	ret void
+}
+
+declare void @error(i32, ...)
+
+declare void @lookup_name()
diff --git a/final/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll b/final/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
new file mode 100644
index 00000000000..76fa3649c88
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
@@ -0,0 +1,947 @@
+; RUN: llc < %s -march=arm
+; PR1266
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "arm-unknown-linux-gnueabi"
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 }
+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
+	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
+	%struct.VEC_tree = type { i32, i32, [1 x %struct.tree_node*] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.addr_diff_vec_flags = type { i8, i8, i8, i8 }
+	%struct.arm_stack_offsets = type { i32, i32, i32, i32, i32 }
+	%struct.attribute_spec = type { i8*, i32, i32, i8, i8, i8, %struct.tree_node* (%struct.tree_node**, %struct.tree_node*, %struct.tree_node*, i32, i8*)* }
+	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
+	%struct.bb_ann_d = type { %struct.tree_node*, i8, %struct.edge_prediction* }
+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+	%struct.cgraph_edge = type { %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_edge*, %struct.cgraph_edge*, %struct.cgraph_edge*, %struct.cgraph_edge*, %struct.tree_node*, i8*, i8* }
+	%struct.cgraph_global_info = type { %struct.cgraph_node*, i32, i8 }
+	%struct.cgraph_local_info = type { i32, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.cgraph_node = type { %struct.tree_node*, %struct.cgraph_edge*, %struct.cgraph_edge*, %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_node*, i8*, %struct.cgraph_local_info, %struct.cgraph_global_info, %struct.cgraph_rtl_info, i32, i8, i8, i8, i8, i8 }
+	%struct.cgraph_rtl_info = type { i32, i8, i8 }
+	%struct.cl_perfunc_opts = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.cselib_val_struct = type opaque
+	%struct.dataflow_d = type { %struct.varray_head_tag*, [2 x %struct.tree_node*] }
+	%struct.def_operand_ptr = type { %struct.tree_node** }
+	%struct.def_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
+	%struct.diagnostic_context = type { %struct.pretty_printer*, [8 x i32], i8, i8, i8, void (%struct.diagnostic_context*, %struct.diagnostic_info*)*, void (%struct.diagnostic_context*, %struct.diagnostic_info*)*, void (i8*, i8**)*, %struct.tree_node*, i32, i32 }
+	%struct.diagnostic_info = type { %struct.text_info, %struct.location_t, i32 }
+	%struct.die_struct = type opaque
+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
+	%struct.edge_def_insns = type { %struct.rtx_def* }
+	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.elt_list = type opaque
+	%struct.elt_t = type { %struct.tree_node*, %struct.tree_node* }
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.et_node = type opaque
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
+	%struct.ggc_root_tab = type { i8*, i32, i32, void (i8*)*, void (i8*)* }
+	%struct.gimplify_ctx = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.varray_head_tag*, %struct.htab*, i32, i8, i8 }
+	%struct.gimplify_init_ctor_preeval_data = type { %struct.tree_node*, i32 }
+	%struct.ht_identifier = type { i8*, i32, i32 }
+	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i32, i32, i32, i32, i32, i8* (i32, i32)*, void (i8*)*, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type opaque
+	%struct.lang_hooks = type { i8*, i32, i32 (i32)*, i32 (i32, i8**)*, void (%struct.diagnostic_context*)*, i32 (i32, i8*, i32)*, i8 (i8*, i32) zeroext *, i8 (i8**) zeroext *, i8 () zeroext *, void ()*, void ()*, void (i32)*, void ()*, i64 (%struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, %struct.rtx_def* (%struct.tree_node*, %struct.rtx_def*, i32, i32, %struct.rtx_def**)*, i32 (%struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, i32 (%struct.rtx_def*, %struct.tree_node*)*, void (%struct.tree_node*)*, i8 (%struct.tree_node*) zeroext *, %struct.tree_node* (%struct.tree_node*)*, void (%struct.tree_node*)*, void (%struct.tree_node*)*, i8 () zeroext *, i8, i8, void ()*, void (%struct.FILE*, %struct.tree_node*, i32)*, void (%struct.FILE*, %struct.tree_node*, i32)*, void (%struct.FILE*, %struct.tree_node*, i32)*, void (%struct.FILE*, %struct.tree_node*, i32)*, i8* (%struct.tree_node*, i32)*, i32 (%struct.tree_node*, %struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, void (%struct.diagnostic_context*, i8*)*, %struct.tree_node* (%struct.tree_node*)*, i64 (i64)*, %struct.attribute_spec*, %struct.attribute_spec*, %struct.attribute_spec*, i32 (%struct.tree_node*)*, %struct.lang_hooks_for_functions, %struct.lang_hooks_for_tree_inlining, %struct.lang_hooks_for_callgraph, %struct.lang_hooks_for_tree_dump, %struct.lang_hooks_for_decls, %struct.lang_hooks_for_types, i32 (%struct.tree_node**, %struct.tree_node**, %struct.tree_node**)*, %struct.tree_node* (%struct.tree_node*, %struct.tree_node*)*, %struct.tree_node* (i8*, %struct.tree_node*, i32, i32, i8*, %struct.tree_node*)* }
+	%struct.lang_hooks_for_callgraph = type { %struct.tree_node* (%struct.tree_node**, i32*, %struct.tree_node*)*, void (%struct.tree_node*)* }
+	%struct.lang_hooks_for_decls = type { i32 ()*, void (%struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, %struct.tree_node* ()*, i8 (%struct.tree_node*) zeroext *, void ()*, void (%struct.tree_node*)*, i8 (%struct.tree_node*) zeroext *, i8* (%struct.tree_node*)* }
+	%struct.lang_hooks_for_functions = type { void (%struct.function*)*, void (%struct.function*)*, void (%struct.function*)*, void (%struct.function*)*, i8 (%struct.tree_node*) zeroext * }
+	%struct.lang_hooks_for_tree_dump = type { i8 (i8*, %struct.tree_node*) zeroext *, i32 (%struct.tree_node*)* }
+	%struct.lang_hooks_for_tree_inlining = type { %struct.tree_node* (%struct.tree_node**, i32*, %struct.tree_node* (%struct.tree_node**, i32*, i8*)*, i8*, %struct.pointer_set_t*)*, i32 (%struct.tree_node**)*, i32 (%struct.tree_node*)*, %struct.tree_node* (i8*, %struct.tree_node*)*, i32 (%struct.tree_node*, %struct.tree_node*)*, i32 (%struct.tree_node*)*, i8 (%struct.tree_node*, %struct.tree_node*) zeroext *, i32 (%struct.tree_node*)*, void (%struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32)* }
+	%struct.lang_hooks_for_types = type { %struct.tree_node* (i32)*, %struct.tree_node* (i32, i32)*, %struct.tree_node* (i32, i32)*, %struct.tree_node* (%struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, %struct.tree_node* (i32, %struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, void (%struct.tree_node*, i8*)*, void (%struct.tree_node*, %struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, i8 }
+	%struct.lang_type = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.loop = type opaque
+	%struct.machine_function = type { %struct.rtx_def*, i32, i32, i32, %struct.arm_stack_offsets, i32, i32, i32, [14 x %struct.rtx_def*] }
+	%struct.mem_attrs = type { i64, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32 }
+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+	%struct.output_buffer = type { %struct.obstack, %struct.FILE*, i32, [128 x i8] }
+	%struct.phi_arg_d = type { %struct.tree_node*, i8 }
+	%struct.pointer_set_t = type opaque
+	%struct.pretty_printer = type { %struct.output_buffer*, i8*, i32, i32, i32, i32, i32, i8 (%struct.pretty_printer*, %struct.text_info*) zeroext *, i8, i8 }
+	%struct.ptr_info_def = type { i8, %struct.bitmap_head_def*, %struct.tree_node* }
+	%struct.real_value = type { i8, [3 x i8], [5 x i32] }
+	%struct.reg_attrs = type { %struct.tree_node*, i64 }
+	%struct.reg_info_def = type opaque
+	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
+	%struct.rtunion = type { i32 }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.dataflow_d*, %struct.bitmap_head_def*, i32 }
+	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.def_optype_d*, %struct.v_may_def_optype_d*, %struct.vuse_optype_d*, %struct.v_may_def_optype_d* }
+	%struct.temp_slot = type opaque
+	%struct.text_info = type { i8*, i8**, i32 }
+	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
+	%struct.tree_ann_d = type { %struct.stmt_ann_d }
+	%struct.tree_binfo = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree }
+	%struct.tree_block = type { %struct.tree_common, i8, [3 x i8], %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_complex = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u1_a = type { i32 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_exp = type { %struct.tree_common, %struct.location_t*, i32, %struct.tree_node*, [1 x %struct.tree_node*] }
+	%struct.tree_identifier = type { %struct.tree_common, %struct.ht_identifier }
+	%struct.tree_int_cst = type { %struct.tree_common, %struct.tree_int_cst_lowhi }
+	%struct.tree_int_cst_lowhi = type { i64, i64 }
+	%struct.tree_list = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.tree_phi_node = type { %struct.tree_common, %struct.tree_node*, i32, i32, i32, %struct.basic_block_def*, %struct.dataflow_d*, [1 x %struct.phi_arg_d] }
+	%struct.tree_real_cst = type { %struct.tree_common, %struct.real_value* }
+	%struct.tree_ssa_name = type { %struct.tree_common, %struct.tree_node*, i32, %struct.ptr_info_def*, %struct.tree_node*, i8* }
+	%struct.tree_statement_list = type { %struct.tree_common, %struct.tree_statement_list_node*, %struct.tree_statement_list_node* }
+	%struct.tree_statement_list_node = type { %struct.tree_statement_list_node*, %struct.tree_statement_list_node*, %struct.tree_node* }
+	%struct.tree_stmt_iterator = type { %struct.tree_statement_list_node*, %struct.tree_node* }
+	%struct.tree_string = type { %struct.tree_common, i32, [1 x i8] }
+	%struct.tree_type = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32, i16, i8, i8, i32, %struct.tree_node*, %struct.tree_node*, %struct.rtunion, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_type* }
+	%struct.tree_type_symtab = type { i32 }
+	%struct.tree_value_handle = type { %struct.tree_common, %struct.value_set*, i32 }
+	%struct.tree_vec = type { %struct.tree_common, i32, [1 x %struct.tree_node*] }
+	%struct.tree_vector = type { %struct.tree_common, %struct.tree_node* }
+	%struct.u = type { [1 x i64] }
+	%struct.use_operand_ptr = type { %struct.tree_node** }
+	%struct.use_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
+	%struct.v_def_use_operand_type_t = type { %struct.tree_node*, %struct.tree_node* }
+	%struct.v_may_def_optype_d = type { i32, [1 x %struct.elt_t] }
+	%struct.v_must_def_optype_d = type { i32, [1 x %struct.elt_t] }
+	%struct.value_set = type opaque
+	%struct.var_ann_d = type { %struct.tree_ann_common_d, i8, i8, %struct.tree_node*, %struct.varray_head_tag*, i32, i32, i32, %struct.tree_node*, %struct.tree_node* }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_data = type { [1 x i64] }
+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }
+	%struct.vuse_optype_d = type { i32, [1 x %struct.tree_node*] }
+@gt_pch_rs_gt_gimplify_h = external global [2 x %struct.ggc_root_tab]		; <[2 x %struct.ggc_root_tab]*> [#uses=0]
+@tmp_var_id_num = external global i32		; <i32*> [#uses=0]
+@gt_ggc_r_gt_gimplify_h = external global [1 x %struct.ggc_root_tab]		; <[1 x %struct.ggc_root_tab]*> [#uses=0]
+@__FUNCTION__.19956 = external global [15 x i8]		; <[15 x i8]*> [#uses=0]
+@str = external global [42 x i8]		; <[42 x i8]*> [#uses=1]
+@__FUNCTION__.19974 = external global [22 x i8]		; <[22 x i8]*> [#uses=0]
+@gimplify_ctxp = external global %struct.gimplify_ctx*		; <%struct.gimplify_ctx**> [#uses=0]
+@cl_pf_opts = external global %struct.cl_perfunc_opts		; <%struct.cl_perfunc_opts*> [#uses=0]
+@__FUNCTION__.20030 = external global [22 x i8]		; <[22 x i8]*> [#uses=0]
+@__FUNCTION__.20099 = external global [24 x i8]		; <[24 x i8]*> [#uses=0]
+@global_trees = external global [47 x %struct.tree_node*]		; <[47 x %struct.tree_node*]*> [#uses=0]
+@tree_code_type = external global [0 x i32]		; <[0 x i32]*> [#uses=2]
+@current_function_decl = external global %struct.tree_node*		; <%struct.tree_node**> [#uses=0]
+@str1 = external global [2 x i8]		; <[2 x i8]*> [#uses=0]
+@str2 = external global [7 x i8]		; <[7 x i8]*> [#uses=0]
+@__FUNCTION__.20151 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
+@__FUNCTION__.20221 = external global [9 x i8]		; <[9 x i8]*> [#uses=0]
+@tree_code_length = external global [0 x i8]		; <[0 x i8]*> [#uses=0]
+@__FUNCTION__.20435 = external global [17 x i8]		; <[17 x i8]*> [#uses=0]
+@__FUNCTION__.20496 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
+@cfun = external global %struct.function*		; <%struct.function**> [#uses=0]
+@__FUNCTION__.20194 = external global [15 x i8]		; <[15 x i8]*> [#uses=0]
+@__FUNCTION__.19987 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.20532 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.20583 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
+@__FUNCTION__.20606 = external global [22 x i8]		; <[22 x i8]*> [#uses=0]
+@__FUNCTION__.20644 = external global [17 x i8]		; <[17 x i8]*> [#uses=0]
+@__FUNCTION__.20681 = external global [13 x i8]		; <[13 x i8]*> [#uses=0]
+@__FUNCTION__.20700 = external global [13 x i8]		; <[13 x i8]*> [#uses=0]
+@__FUNCTION__.21426 = external global [20 x i8]		; <[20 x i8]*> [#uses=0]
+@__FUNCTION__.21471 = external global [17 x i8]		; <[17 x i8]*> [#uses=0]
+@__FUNCTION__.21962 = external global [27 x i8]		; <[27 x i8]*> [#uses=0]
+@__FUNCTION__.22992 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.23735 = external global [15 x i8]		; <[15 x i8]*> [#uses=0]
+@lang_hooks = external global %struct.lang_hooks		; <%struct.lang_hooks*> [#uses=0]
+@__FUNCTION__.27383 = external global [22 x i8]		; <[22 x i8]*> [#uses=0]
+@__FUNCTION__.20776 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.10672 = external global [9 x i8]		; <[9 x i8]*> [#uses=0]
+@str3 = external global [47 x i8]		; <[47 x i8]*> [#uses=0]
+@str4 = external global [7 x i8]		; <[7 x i8]*> [#uses=0]
+@__FUNCTION__.20065 = external global [25 x i8]		; <[25 x i8]*> [#uses=0]
+@__FUNCTION__.23256 = external global [16 x i8]		; <[16 x i8]*> [#uses=0]
+@__FUNCTION__.23393 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
+@__FUNCTION__.20043 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.20729 = external global [23 x i8]		; <[23 x i8]*> [#uses=0]
+@__FUNCTION__.20563 = external global [24 x i8]		; <[24 x i8]*> [#uses=0]
+@__FUNCTION__.10663 = external global [10 x i8]		; <[10 x i8]*> [#uses=0]
+@__FUNCTION__.20367 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.20342 = external global [15 x i8]		; <[15 x i8]*> [#uses=0]
+@input_location = external global %struct.location_t		; <%struct.location_t*> [#uses=0]
+@__FUNCTION__.24510 = external global [27 x i8]		; <[27 x i8]*> [#uses=0]
+@__FUNCTION__.25097 = external global [25 x i8]		; <[25 x i8]*> [#uses=0]
+@__FUNCTION__.24705 = external global [26 x i8]		; <[26 x i8]*> [#uses=0]
+@str5 = external global [2 x i8]		; <[2 x i8]*> [#uses=0]
+@__FUNCTION__.25136 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.24450 = external global [31 x i8]		; <[31 x i8]*> [#uses=0]
+@implicit_built_in_decls = external global [471 x %struct.tree_node*]		; <[471 x %struct.tree_node*]*> [#uses=0]
+@__FUNCTION__.24398 = external global [31 x i8]		; <[31 x i8]*> [#uses=0]
+@__FUNCTION__.26156 = external global [14 x i8]		; <[14 x i8]*> [#uses=1]
+@unknown_location = external global %struct.location_t		; <%struct.location_t*> [#uses=0]
+@__FUNCTION__.23038 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
+@str6 = external global [43 x i8]		; <[43 x i8]*> [#uses=0]
+@__FUNCTION__.25476 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
+@__FUNCTION__.22136 = external global [20 x i8]		; <[20 x i8]*> [#uses=1]
+@__FUNCTION__.21997 = external global [23 x i8]		; <[23 x i8]*> [#uses=0]
+@__FUNCTION__.21247 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
+@built_in_decls = external global [471 x %struct.tree_node*]		; <[471 x %struct.tree_node*]*> [#uses=0]
+@__FUNCTION__.21924 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
+@__FUNCTION__.21861 = external global [25 x i8]		; <[25 x i8]*> [#uses=0]
+@global_dc = external global %struct.diagnostic_context*		; <%struct.diagnostic_context**> [#uses=0]
+@__FUNCTION__.25246 = external global [32 x i8]		; <[32 x i8]*> [#uses=0]
+@str7 = external global [4 x i8]		; <[4 x i8]*> [#uses=0]
+@stderr = external global %struct.FILE*		; <%struct.FILE**> [#uses=0]
+@str8 = external global [24 x i8]		; <[24 x i8]*> [#uses=0]
+@str9 = external global [22 x i8]		; <[22 x i8]*> [#uses=0]
+@__FUNCTION__.27653 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.27322 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.27139 = external global [20 x i8]		; <[20 x i8]*> [#uses=0]
+@__FUNCTION__.22462 = external global [23 x i8]		; <[23 x i8]*> [#uses=0]
+@str10 = external global [6 x i8]		; <[6 x i8]*> [#uses=0]
+@__FUNCTION__.25389 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
+@__FUNCTION__.25650 = external global [18 x i8]		; <[18 x i8]*> [#uses=0]
+@str11 = external global [32 x i8]		; <[32 x i8]*> [#uses=0]
+@str12 = external global [3 x i8]		; <[3 x i8]*> [#uses=0]
+@str13 = external global [44 x i8]		; <[44 x i8]*> [#uses=0]
+@__FUNCTION__.27444 = external global [14 x i8]		; <[14 x i8]*> [#uses=0]
+@timevar_enable = external global i8		; <i8*> [#uses=0]
+@__FUNCTION__.27533 = external global [23 x i8]		; <[23 x i8]*> [#uses=0]
+@flag_instrument_function_entry_exit = external global i32		; <i32*> [#uses=0]
+@__FUNCTION__.25331 = external global [23 x i8]		; <[23 x i8]*> [#uses=0]
+@__FUNCTION__.20965 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
+@str14 = external global [12 x i8]		; <[12 x i8]*> [#uses=0]
+@__FUNCTION__.26053 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.26004 = external global [20 x i8]		; <[20 x i8]*> [#uses=0]
+@str15 = external global [8 x i8]		; <[8 x i8]*> [#uses=0]
+@__FUNCTION__.21584 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
+@str16 = external global [12 x i8]		; <[12 x i8]*> [#uses=0]
+@__FUNCTION__.25903 = external global [28 x i8]		; <[28 x i8]*> [#uses=0]
+@__FUNCTION__.22930 = external global [23 x i8]		; <[23 x i8]*> [#uses=0]
+@__FUNCTION__.23832 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
+@str17 = external global [6 x i8]		; <[6 x i8]*> [#uses=0]
+@__FUNCTION__.24620 = external global [24 x i8]		; <[24 x i8]*> [#uses=0]
+@__FUNCTION__.24582 = external global [30 x i8]		; <[30 x i8]*> [#uses=0]
+@__FUNCTION__.21382 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
+@__FUNCTION__.21117 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
+
+
+declare void @push_gimplify_context()
+
+declare i32 @gimple_tree_hash(i8*)
+
+declare i32 @iterative_hash_expr(%struct.tree_node*, i32)
+
+declare i32 @gimple_tree_eq(i8*, i8*)
+
+declare i32 @operand_equal_p(%struct.tree_node*, %struct.tree_node*, i32)
+
+declare void @fancy_abort(i8*, i32, i8*)
+
+declare i8* @xcalloc(i32, i32)
+
+declare %struct.htab* @htab_create(i32, i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*)
+
+declare void @free(i8*)
+
+declare void @gimple_push_bind_expr(%struct.tree_node*)
+
+declare void @gimple_pop_bind_expr()
+
+declare %struct.tree_node* @gimple_current_bind_expr()
+
+declare fastcc void @gimple_push_condition()
+
+declare %struct.tree_node* @create_artificial_label()
+
+declare %struct.tree_node* @build_decl_stat(i32, %struct.tree_node*, %struct.tree_node*)
+
+declare void @tree_class_check_failed(%struct.tree_node*, i32, i8*, i32, i8*)
+
+declare %struct.tree_node* @create_tmp_var_name(i8*)
+
+declare i32 @strlen(i8*)
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare i32 @sprintf(i8*, i8*, ...)
+
+declare %struct.tree_node* @get_identifier(i8*)
+
+declare %struct.tree_node* @create_tmp_var_raw(%struct.tree_node*, i8*)
+
+declare %struct.tree_node* @build_qualified_type(%struct.tree_node*, i32)
+
+declare i8* @get_name(%struct.tree_node*)
+
+declare void @tree_operand_check_failed(i32, i32, i8*, i32, i8*)
+
+declare void @tree_check_failed(%struct.tree_node*, i8*, i32, i8*, ...)
+
+declare void @declare_tmp_vars(%struct.tree_node*, %struct.tree_node*)
+
+declare %struct.tree_node* @nreverse(%struct.tree_node*)
+
+declare void @gimple_add_tmp_var(%struct.tree_node*)
+
+declare void @record_vars(%struct.tree_node*)
+
+declare %struct.tree_node* @create_tmp_var(%struct.tree_node*, i8*)
+
+declare void @pop_gimplify_context(%struct.tree_node*)
+
+declare void @htab_delete(%struct.htab*)
+
+declare fastcc void @annotate_one_with_locus(%struct.tree_node*, i32, i32)
+
+declare void @annotate_with_locus(%struct.tree_node*, i32, i32)
+
+declare %struct.tree_node* @mostly_copy_tree_r(%struct.tree_node**, i32*, i8*)
+
+declare %struct.tree_node* @copy_tree_r(%struct.tree_node**, i32*, i8*)
+
+declare %struct.tree_node* @mark_decls_volatile_r(%struct.tree_node**, i32*, i8*)
+
+declare %struct.tree_node* @copy_if_shared_r(%struct.tree_node**, i32*, i8*)
+
+declare %struct.tree_node* @walk_tree(%struct.tree_node**, %struct.tree_node* (%struct.tree_node**, i32*, i8*)*, i8*, %struct.pointer_set_t*)
+
+declare %struct.tree_node* @unmark_visited_r(%struct.tree_node**, i32*, i8*)
+
+declare fastcc void @unshare_body(%struct.tree_node**, %struct.tree_node*)
+
+declare %struct.cgraph_node* @cgraph_node(%struct.tree_node*)
+
+declare fastcc void @unvisit_body(%struct.tree_node**, %struct.tree_node*)
+
+declare void @unshare_all_trees(%struct.tree_node*)
+
+declare %struct.tree_node* @unshare_expr(%struct.tree_node*)
+
+declare %struct.tree_node* @build_and_jump(%struct.tree_node**)
+
+declare %struct.tree_node* @build1_stat(i32, %struct.tree_node*, %struct.tree_node*)
+
+declare i32 @compare_case_labels(i8*, i8*)
+
+declare i32 @tree_int_cst_compare(%struct.tree_node*, %struct.tree_node*)
+
+declare void @sort_case_labels(%struct.tree_node*)
+
+declare void @tree_vec_elt_check_failed(i32, i32, i8*, i32, i8*)
+
+declare void @qsort(i8*, i32, i32, i32 (i8*, i8*)*)
+
+declare %struct.tree_node* @force_labels_r(%struct.tree_node**, i32*, i8*)
+
+declare fastcc void @canonicalize_component_ref(%struct.tree_node**)
+
+declare %struct.tree_node* @get_unwidened(%struct.tree_node*, %struct.tree_node*)
+
+declare fastcc void @maybe_with_size_expr(%struct.tree_node**)
+
+declare %struct.tree_node* @substitute_placeholder_in_expr(%struct.tree_node*, %struct.tree_node*)
+
+declare %struct.tree_node* @build2_stat(i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
+
+declare fastcc %struct.tree_node* @gimple_boolify(%struct.tree_node*)
+
+declare %struct.tree_node* @convert(%struct.tree_node*, %struct.tree_node*)
+
+declare %struct.tree_node* @gimplify_init_ctor_preeval_1(%struct.tree_node**, i32*, i8*)
+
+declare i64 @get_alias_set(%struct.tree_node*)
+
+declare i32 @alias_sets_conflict_p(i64, i64)
+
+declare fastcc i8 @cpt_same_type(%struct.tree_node*, %struct.tree_node*) zeroext
+
+declare %struct.tree_node* @check_pointer_types_r(%struct.tree_node**, i32*, i8*)
+
+declare %struct.tree_node* @voidify_wrapper_expr(%struct.tree_node*, %struct.tree_node*)
+
+declare i32 @integer_zerop(%struct.tree_node*)
+
+declare fastcc void @append_to_statement_list_1(%struct.tree_node*, %struct.tree_node**)
+
+declare %struct.tree_node* @alloc_stmt_list()
+
+declare void @tsi_link_after(%struct.tree_stmt_iterator*, %struct.tree_node*, i32)
+
+declare void @append_to_statement_list_force(%struct.tree_node*, %struct.tree_node**)
+
+declare void @append_to_statement_list(%struct.tree_node*, %struct.tree_node**)
+
+declare fastcc %struct.tree_node* @shortcut_cond_r(%struct.tree_node*, %struct.tree_node**, %struct.tree_node**)
+
+declare %struct.tree_node* @build3_stat(i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
+
+declare fastcc %struct.tree_node* @shortcut_cond_expr(%struct.tree_node*)
+
+declare %struct.tree_node* @expr_last(%struct.tree_node*)
+
+declare i8 @block_may_fallthru(%struct.tree_node*) zeroext 
+
+declare fastcc void @gimple_pop_condition(%struct.tree_node**)
+
+declare %struct.tree_node* @gimple_build_eh_filter(%struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
+
+declare void @annotate_all_with_locus(%struct.tree_node**, i32, i32)
+
+declare fastcc %struct.tree_node* @internal_get_tmp_var(%struct.tree_node*, %struct.tree_node**, %struct.tree_node**, i8 zeroext )
+
+define i32 @gimplify_expr(%struct.tree_node** %expr_p, %struct.tree_node** %pre_p, %struct.tree_node** %post_p, i8 (%struct.tree_node*) zeroext * %gimple_test_f, i32 %fallback) {
+entry:
+	%internal_post = alloca %struct.tree_node*, align 4		; <%struct.tree_node**> [#uses=2]
+	%pre_p_addr.0 = select i1 false, %struct.tree_node** null, %struct.tree_node** %pre_p		; <%struct.tree_node**> [#uses=7]
+	%post_p_addr.0 = select i1 false, %struct.tree_node** %internal_post, %struct.tree_node** %post_p		; <%struct.tree_node**> [#uses=7]
+	br i1 false, label %bb277, label %bb191
+
+bb191:		; preds = %entry
+	ret i32 0
+
+bb277:		; preds = %entry
+	%tmp283 = call i32 null( %struct.tree_node** %expr_p, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %post_p_addr.0 )		; <i32> [#uses=1]
+	switch i32 %tmp283, label %bb7478 [
+		 i32 0, label %cond_next289
+		 i32 -1, label %cond_next298
+	]
+
+cond_next289:		; preds = %bb277
+	ret i32 0
+
+cond_next298:		; preds = %bb277
+	switch i32 0, label %bb7444 [
+		 i32 24, label %bb7463
+		 i32 25, label %bb7463
+		 i32 26, label %bb7463
+		 i32 27, label %bb7463
+		 i32 28, label %bb7463
+		 i32 33, label %bb4503
+		 i32 39, label %bb397
+		 i32 40, label %bb5650
+		 i32 41, label %bb4339
+		 i32 42, label %bb4350
+		 i32 43, label %bb4350
+		 i32 44, label %bb319
+		 i32 45, label %bb397
+		 i32 46, label %bb6124
+		 i32 47, label %bb7463
+		 i32 49, label %bb5524
+		 i32 50, label %bb1283
+		 i32 51, label %bb1289
+		 i32 52, label %bb1289
+		 i32 53, label %bb5969
+		 i32 54, label %bb408
+		 i32 56, label %bb5079
+		 i32 57, label %bb428
+		 i32 59, label %bb5965
+		 i32 74, label %bb4275
+		 i32 75, label %bb4275
+		 i32 76, label %bb4275
+		 i32 77, label %bb4275
+		 i32 91, label %bb1296
+		 i32 92, label %bb1296
+		 i32 96, label %bb1322
+		 i32 112, label %bb2548
+		 i32 113, label %bb2548
+		 i32 115, label %bb397
+		 i32 116, label %bb5645
+		 i32 117, label %bb1504
+		 i32 121, label %bb397
+		 i32 122, label %bb397
+		 i32 123, label %bb313
+		 i32 124, label %bb313
+		 i32 125, label %bb313
+		 i32 126, label %bb313
+		 i32 127, label %bb2141
+		 i32 128, label %cond_next5873
+		 i32 129, label %cond_next5873
+		 i32 130, label %bb4536
+		 i32 131, label %bb5300
+		 i32 132, label %bb5170
+		 i32 133, label %bb5519
+		 i32 134, label %bb5091
+		 i32 135, label %bb5083
+		 i32 136, label %bb5087
+		 i32 137, label %bb5382
+		 i32 139, label %bb7463
+		 i32 140, label %bb7463
+		 i32 142, label %bb5974
+		 i32 143, label %bb6049
+		 i32 147, label %bb6296
+		 i32 151, label %cond_next6474
+	]
+
+bb313:		; preds = %cond_next298, %cond_next298, %cond_next298, %cond_next298
+	ret i32 0
+
+bb319:		; preds = %cond_next298
+	ret i32 0
+
+bb397:		; preds = %cond_next298, %cond_next298, %cond_next298, %cond_next298, %cond_next298
+	ret i32 0
+
+bb408:		; preds = %cond_next298
+	%tmp413 = call fastcc i32 @gimplify_cond_expr( %struct.tree_node** %expr_p, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %post_p_addr.0, %struct.tree_node* null, i32 %fallback )		; <i32> [#uses=0]
+	ret i32 0
+
+bb428:		; preds = %cond_next298
+	ret i32 0
+
+bb1283:		; preds = %cond_next298
+	ret i32 0
+
+bb1289:		; preds = %cond_next298, %cond_next298
+	ret i32 0
+
+bb1296:		; preds = %cond_next298, %cond_next298
+	ret i32 0
+
+bb1322:		; preds = %cond_next298
+	ret i32 0
+
+bb1504:		; preds = %cond_next298
+	ret i32 0
+
+bb2141:		; preds = %cond_next298
+	ret i32 0
+
+bb2548:		; preds = %cond_next298, %cond_next298
+	%tmp2554 = load %struct.tree_node** %expr_p		; <%struct.tree_node*> [#uses=2]
+	%tmp2562 = and i32 0, 255		; <i32> [#uses=1]
+	%tmp2569 = add i8 0, -4		; <i8> [#uses=1]
+	icmp ugt i8 %tmp2569, 5		; <i1>:0 [#uses=2]
+	%tmp2587 = load i8* null		; <i8> [#uses=1]
+	icmp eq i8 %tmp2587, 0		; <i1>:1 [#uses=2]
+	%tmp2607 = load %struct.tree_node** null		; <%struct.tree_node*> [#uses=2]
+	br i1 false, label %bb2754, label %cond_next2617
+
+cond_next2617:		; preds = %bb2548
+	ret i32 0
+
+bb2754:		; preds = %bb2548
+	br i1 %0, label %cond_true2780, label %cond_next2783
+
+cond_true2780:		; preds = %bb2754
+	call void @tree_class_check_failed( %struct.tree_node* %tmp2554, i32 9, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 1415, i8* getelementptr ([20 x i8]* @__FUNCTION__.22136, i32 0, i32 0) )
+	unreachable
+
+cond_next2783:		; preds = %bb2754
+	%tmp2825 = and i32 0, 255		; <i32> [#uses=1]
+	%tmp2829 = load i32* null		; <i32> [#uses=1]
+	%tmp28292830 = trunc i32 %tmp2829 to i8		; <i8> [#uses=1]
+	%tmp2832 = add i8 %tmp28292830, -4		; <i8> [#uses=1]
+	icmp ugt i8 %tmp2832, 5		; <i1>:2 [#uses=1]
+	icmp eq i8 0, 0		; <i1>:3 [#uses=1]
+	%tmp28652866 = bitcast %struct.tree_node* %tmp2607 to %struct.tree_exp*		; <%struct.tree_exp*> [#uses=1]
+	%tmp2868 = getelementptr %struct.tree_exp* %tmp28652866, i32 0, i32 4, i32 0		; <%struct.tree_node**> [#uses=1]
+	%tmp2870 = load %struct.tree_node** %tmp2868		; <%struct.tree_node*> [#uses=1]
+	br i1 %1, label %cond_true2915, label %cond_next2927
+
+cond_true2915:		; preds = %cond_next2783
+	unreachable
+
+cond_next2927:		; preds = %cond_next2783
+	%tmp2938 = load %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
+	%tmp2944 = load i32* null		; <i32> [#uses=1]
+	%tmp2946 = and i32 %tmp2944, 255		; <i32> [#uses=1]
+	%tmp2949 = getelementptr [0 x i32]* @tree_code_type, i32 0, i32 %tmp2946		; <i32*> [#uses=1]
+	%tmp2950 = load i32* %tmp2949		; <i32> [#uses=1]
+	icmp eq i32 %tmp2950, 2		; <i1>:4 [#uses=1]
+	br i1 %4, label %cond_next2954, label %cond_true2951
+
+cond_true2951:		; preds = %cond_next2927
+	call void @tree_class_check_failed( %struct.tree_node* %tmp2938, i32 2, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 1415, i8* getelementptr ([20 x i8]* @__FUNCTION__.22136, i32 0, i32 0) )
+	unreachable
+
+cond_next2954:		; preds = %cond_next2927
+	br i1 %0, label %cond_true2991, label %cond_next2994
+
+cond_true2991:		; preds = %cond_next2954
+	unreachable
+
+cond_next2994:		; preds = %cond_next2954
+	br i1 %1, label %cond_true3009, label %cond_next3021
+
+cond_true3009:		; preds = %cond_next2994
+	call void @tree_operand_check_failed( i32 0, i32 %tmp2562, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 1415, i8* getelementptr ([20 x i8]* @__FUNCTION__.22136, i32 0, i32 0) )
+	unreachable
+
+cond_next3021:		; preds = %cond_next2994
+	br i1 %2, label %cond_true3044, label %cond_next3047
+
+cond_true3044:		; preds = %cond_next3021
+	call void @tree_class_check_failed( %struct.tree_node* %tmp2607, i32 9, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 1415, i8* getelementptr ([20 x i8]* @__FUNCTION__.22136, i32 0, i32 0) )
+	unreachable
+
+cond_next3047:		; preds = %cond_next3021
+	br i1 %3, label %cond_true3062, label %cond_next3074
+
+cond_true3062:		; preds = %cond_next3047
+	call void @tree_operand_check_failed( i32 0, i32 %tmp2825, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 1415, i8* getelementptr ([20 x i8]* @__FUNCTION__.22136, i32 0, i32 0) )
+	unreachable
+
+cond_next3074:		; preds = %cond_next3047
+	%tmp3084 = getelementptr %struct.tree_node* %tmp2870, i32 0, i32 0, i32 0, i32 1		; <%struct.tree_node**> [#uses=1]
+	%tmp3085 = load %struct.tree_node** %tmp3084		; <%struct.tree_node*> [#uses=1]
+	%tmp31043105 = bitcast %struct.tree_node* %tmp3085 to %struct.tree_type*		; <%struct.tree_type*> [#uses=1]
+	%tmp3106 = getelementptr %struct.tree_type* %tmp31043105, i32 0, i32 6		; <i16*> [#uses=1]
+	%tmp31063107 = bitcast i16* %tmp3106 to i32*		; <i32*> [#uses=1]
+	%tmp3108 = load i32* %tmp31063107		; <i32> [#uses=1]
+	xor i32 %tmp3108, 0		; <i32>:5 [#uses=1]
+	%tmp81008368 = and i32 %5, 65024		; <i32> [#uses=1]
+	icmp eq i32 %tmp81008368, 0		; <i1>:6 [#uses=1]
+	br i1 %6, label %cond_next3113, label %bb3351
+
+cond_next3113:		; preds = %cond_next3074
+	ret i32 0
+
+bb3351:		; preds = %cond_next3074
+	%tmp3354 = call i8 @tree_ssa_useless_type_conversion( %struct.tree_node* %tmp2554 ) zeroext 		; <i8> [#uses=1]
+	icmp eq i8 %tmp3354, 0		; <i1>:7 [#uses=1]
+	%tmp3424 = load i32* null		; <i32> [#uses=1]
+	br i1 %7, label %cond_next3417, label %cond_true3356
+
+cond_true3356:		; preds = %bb3351
+	ret i32 0
+
+cond_next3417:		; preds = %bb3351
+	br i1 false, label %cond_true3429, label %cond_next4266
+
+cond_true3429:		; preds = %cond_next3417
+	%tmp3443 = and i32 %tmp3424, 255		; <i32> [#uses=0]
+	ret i32 0
+
+cond_next4266:		; preds = %cond_next3417
+	%tmp4268 = load %struct.tree_node** %expr_p		; <%struct.tree_node*> [#uses=1]
+	icmp eq %struct.tree_node* %tmp4268, null		; <i1>:8 [#uses=1]
+	br i1 %8, label %bb4275, label %bb7463
+
+bb4275:		; preds = %cond_next4266, %cond_next298, %cond_next298, %cond_next298, %cond_next298
+	%tmp4289 = and i32 0, 255		; <i32> [#uses=2]
+	%tmp4292 = getelementptr [0 x i32]* @tree_code_type, i32 0, i32 %tmp4289		; <i32*> [#uses=1]
+	%tmp4293 = load i32* %tmp4292		; <i32> [#uses=1]
+	%tmp42934294 = trunc i32 %tmp4293 to i8		; <i8> [#uses=1]
+	%tmp4296 = add i8 %tmp42934294, -4		; <i8> [#uses=1]
+	icmp ugt i8 %tmp4296, 5		; <i1>:9 [#uses=1]
+	br i1 %9, label %cond_true4297, label %cond_next4300
+
+cond_true4297:		; preds = %bb4275
+	unreachable
+
+cond_next4300:		; preds = %bb4275
+	%tmp4314 = load i8* null		; <i8> [#uses=1]
+	icmp eq i8 %tmp4314, 0		; <i1>:10 [#uses=1]
+	br i1 %10, label %cond_true4315, label %cond_next4327
+
+cond_true4315:		; preds = %cond_next4300
+	call void @tree_operand_check_failed( i32 0, i32 %tmp4289, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 3997, i8* getelementptr ([14 x i8]* @__FUNCTION__.26156, i32 0, i32 0) )
+	unreachable
+
+cond_next4327:		; preds = %cond_next4300
+	%tmp4336 = call i32 @gimplify_expr( %struct.tree_node** null, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %post_p_addr.0, i8 (%struct.tree_node*) zeroext * @is_gimple_val, i32 1 )		; <i32> [#uses=0]
+	ret i32 0
+
+bb4339:		; preds = %cond_next298
+	ret i32 0
+
+bb4350:		; preds = %cond_next298, %cond_next298
+	ret i32 0
+
+bb4503:		; preds = %cond_next298
+	ret i32 0
+
+bb4536:		; preds = %cond_next298
+	ret i32 0
+
+bb5079:		; preds = %cond_next298
+	ret i32 0
+
+bb5083:		; preds = %cond_next298
+	ret i32 0
+
+bb5087:		; preds = %cond_next298
+	ret i32 0
+
+bb5091:		; preds = %cond_next298
+	ret i32 0
+
+bb5170:		; preds = %cond_next298
+	ret i32 0
+
+bb5300:		; preds = %cond_next298
+	ret i32 0
+
+bb5382:		; preds = %cond_next298
+	ret i32 0
+
+bb5519:		; preds = %cond_next298
+	ret i32 0
+
+bb5524:		; preds = %cond_next298
+	ret i32 0
+
+bb5645:		; preds = %cond_next298
+	ret i32 0
+
+bb5650:		; preds = %cond_next298
+	ret i32 0
+
+cond_next5873:		; preds = %cond_next298, %cond_next298
+	ret i32 0
+
+bb5965:		; preds = %cond_next298
+	%tmp5968 = call fastcc i32 @gimplify_cleanup_point_expr( %struct.tree_node** %expr_p, %struct.tree_node** %pre_p_addr.0 )		; <i32> [#uses=0]
+	ret i32 0
+
+bb5969:		; preds = %cond_next298
+	%tmp5973 = call fastcc i32 @gimplify_target_expr( %struct.tree_node** %expr_p, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %post_p_addr.0 )		; <i32> [#uses=0]
+	ret i32 0
+
+bb5974:		; preds = %cond_next298
+	ret i32 0
+
+bb6049:		; preds = %cond_next298
+	ret i32 0
+
+bb6124:		; preds = %cond_next298
+	ret i32 0
+
+bb6296:		; preds = %cond_next298
+	ret i32 0
+
+cond_next6474:		; preds = %cond_next298
+	icmp eq %struct.tree_node** %internal_post, %post_p_addr.0		; <i1>:11 [#uses=1]
+	%iftmp.381.0 = select i1 %11, %struct.tree_node** null, %struct.tree_node** %post_p_addr.0		; <%struct.tree_node**> [#uses=1]
+	%tmp6490 = call i32 @gimplify_expr( %struct.tree_node** null, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %iftmp.381.0, i8 (%struct.tree_node*) zeroext * %gimple_test_f, i32 %fallback )		; <i32> [#uses=0]
+	%tmp6551 = call i32 @gimplify_expr( %struct.tree_node** null, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %post_p_addr.0, i8 (%struct.tree_node*) zeroext * @is_gimple_val, i32 1 )		; <i32> [#uses=0]
+	ret i32 0
+
+bb7444:		; preds = %cond_next298
+	ret i32 0
+
+bb7463:		; preds = %cond_next4266, %cond_next298, %cond_next298, %cond_next298, %cond_next298, %cond_next298, %cond_next298, %cond_next298, %cond_next298
+	ret i32 0
+
+bb7478:		; preds = %bb277
+	ret i32 0
+}
+
+declare i8 @is_gimple_formal_tmp_rhs(%struct.tree_node*) zeroext 
+
+declare void @gimplify_and_add(%struct.tree_node*, %struct.tree_node**)
+
+declare %struct.tree_node* @get_initialized_tmp_var(%struct.tree_node*, %struct.tree_node**, %struct.tree_node**)
+
+declare %struct.tree_node* @get_formal_tmp_var(%struct.tree_node*, %struct.tree_node**)
+
+declare fastcc void @gimplify_init_ctor_preeval(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, %struct.gimplify_init_ctor_preeval_data*)
+
+declare i8 @type_contains_placeholder_p(%struct.tree_node*) zeroext 
+
+declare i8 @is_gimple_mem_rhs(%struct.tree_node*) zeroext 
+
+declare fastcc i32 @gimplify_modify_expr_rhs(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, %struct.tree_node**, %struct.tree_node**, i8 zeroext )
+
+declare %struct.tree_node* @fold_indirect_ref(%struct.tree_node*)
+
+declare fastcc i32 @gimplify_compound_expr(%struct.tree_node**, %struct.tree_node**, i8 zeroext )
+
+declare i8 @is_gimple_lvalue(%struct.tree_node*) zeroext 
+
+declare void @categorize_ctor_elements(%struct.tree_node*, i64*, i64*, i64*, i8*)
+
+declare void @lhd_set_decl_assembler_name(%struct.tree_node*)
+
+declare i64 @int_size_in_bytes(%struct.tree_node*)
+
+declare i32 @can_move_by_pieces(i64, i32)
+
+declare i64 @count_type_elements(%struct.tree_node*)
+
+declare void @gimplify_stmt(%struct.tree_node**)
+
+declare %struct.tree_node* @get_base_address(%struct.tree_node*)
+
+declare fastcc void @gimplify_init_ctor_eval(%struct.tree_node*, %struct.tree_node*, %struct.tree_node**, i8 zeroext )
+
+declare %struct.tree_node* @build_complex(%struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
+
+declare i8 (%struct.tree_node*) zeroext * @rhs_predicate_for(%struct.tree_node*)
+
+declare %struct.tree_node* @build_vector(%struct.tree_node*, %struct.tree_node*)
+
+declare i8 @is_gimple_val(%struct.tree_node*) zeroext 
+
+declare i8 @is_gimple_reg_type(%struct.tree_node*) zeroext 
+
+declare fastcc i32 @gimplify_cond_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, %struct.tree_node*, i32)
+
+declare fastcc i32 @gimplify_modify_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, i8 zeroext )
+
+declare %struct.tree_node* @tree_cons_stat(%struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
+
+declare %struct.tree_node* @build_fold_addr_expr(%struct.tree_node*)
+
+declare %struct.tree_node* @build_function_call_expr(%struct.tree_node*, %struct.tree_node*)
+
+declare i8 @is_gimple_addressable(%struct.tree_node*) zeroext 
+
+declare i8 @is_gimple_reg(%struct.tree_node*) zeroext 
+
+declare %struct.tree_node* @make_ssa_name(%struct.tree_node*, %struct.tree_node*)
+
+declare i8 @tree_ssa_useless_type_conversion(%struct.tree_node*) zeroext 
+
+declare fastcc i32 @gimplify_self_mod_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, i8 zeroext )
+
+declare fastcc i32 @gimplify_compound_lval(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, i32)
+
+declare %struct.tree_node* @get_callee_fndecl(%struct.tree_node*)
+
+declare %struct.tree_node* @fold_builtin(%struct.tree_node*, i8 zeroext )
+
+declare void @error(i8*, ...)
+
+declare %struct.tree_node* @build_empty_stmt()
+
+declare i8 @fold_builtin_next_arg(%struct.tree_node*) zeroext 
+
+declare fastcc i32 @gimplify_arg(%struct.tree_node**, %struct.tree_node**)
+
+declare i8 @is_gimple_call_addr(%struct.tree_node*) zeroext 
+
+declare i32 @call_expr_flags(%struct.tree_node*)
+
+declare void @recalculate_side_effects(%struct.tree_node*)
+
+declare %struct.tree_node* @fold_convert(%struct.tree_node*, %struct.tree_node*)
+
+declare void @recompute_tree_invarant_for_addr_expr(%struct.tree_node*)
+
+declare i32 @gimplify_va_arg_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**)
+
+declare %struct.tree_node* @size_int_kind(i64, i32)
+
+declare %struct.tree_node* @size_binop(i32, %struct.tree_node*, %struct.tree_node*)
+
+declare %struct.tree_node* @build4_stat(i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
+
+declare void @gimplify_type_sizes(%struct.tree_node*, %struct.tree_node**)
+
+declare void @gimplify_one_sizepos(%struct.tree_node**, %struct.tree_node**)
+
+declare %struct.tree_node* @build_pointer_type(%struct.tree_node*)
+
+declare %struct.tree_node* @build_fold_indirect_ref(%struct.tree_node*)
+
+declare fastcc i32 @gimplify_bind_expr(%struct.tree_node**, %struct.tree_node*, %struct.tree_node**)
+
+declare fastcc void @gimplify_loop_expr(%struct.tree_node**, %struct.tree_node**)
+
+declare fastcc i32 @gimplify_switch_expr(%struct.tree_node**, %struct.tree_node**)
+
+declare %struct.tree_node* @decl_function_context(%struct.tree_node*)
+
+declare %struct.varray_head_tag* @varray_grow(%struct.varray_head_tag*, i32)
+
+declare fastcc void @gimplify_return_expr(%struct.tree_node*, %struct.tree_node**)
+
+declare fastcc i32 @gimplify_save_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**)
+
+declare fastcc i32 @gimplify_asm_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**)
+
+declare void @gimplify_to_stmt_list(%struct.tree_node**)
+
+declare fastcc i32 @gimplify_cleanup_point_expr(%struct.tree_node**, %struct.tree_node**)
+
+declare fastcc i32 @gimplify_target_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**)
+
+declare void @tsi_delink(%struct.tree_stmt_iterator*)
+
+declare void @tsi_link_before(%struct.tree_stmt_iterator*, %struct.tree_node*, i32)
+
+declare i8 @is_gimple_stmt(%struct.tree_node*) zeroext 
+
+declare void @print_generic_expr(%struct.FILE*, %struct.tree_node*, i32)
+
+declare void @debug_tree(%struct.tree_node*)
+
+declare void @internal_error(i8*, ...)
+
+declare %struct.tree_node* @force_gimple_operand(%struct.tree_node*, %struct.tree_node**, i8 zeroext , %struct.tree_node*)
+
+declare i8 @is_gimple_reg_rhs(%struct.tree_node*) zeroext 
+
+declare void @add_referenced_tmp_var(%struct.tree_node*)
+
+declare i8 @contains_placeholder_p(%struct.tree_node*) zeroext 
+
+declare %struct.varray_head_tag* @varray_init(i32, i32, i8*)
+
+declare i32 @handled_component_p(%struct.tree_node*)
+
+declare void @varray_check_failed(%struct.varray_head_tag*, i32, i8*, i32, i8*)
+
+declare %struct.tree_node* @array_ref_low_bound(%struct.tree_node*)
+
+declare i8 @is_gimple_min_invariant(%struct.tree_node*) zeroext 
+
+declare i8 @is_gimple_formal_tmp_reg(%struct.tree_node*) zeroext 
+
+declare %struct.tree_node* @array_ref_element_size(%struct.tree_node*)
+
+declare %struct.tree_node* @component_ref_field_offset(%struct.tree_node*)
+
+declare i8 @is_gimple_min_lval(%struct.tree_node*) zeroext 
+
+declare void @varray_underflow(%struct.varray_head_tag*, i8*, i32, i8*)
+
+declare i32 @list_length(%struct.tree_node*)
+
+declare i8 @parse_output_constraint(i8**, i32, i32, i32, i8*, i8*, i8*) zeroext 
+
+declare i8* @xstrdup(i8*)
+
+declare %struct.tree_node* @build_string(i32, i8*)
+
+declare i8* @strchr(i8*, i32)
+
+declare %struct.tree_node* @build_tree_list_stat(%struct.tree_node*, %struct.tree_node*)
+
+declare %struct.tree_node* @chainon(%struct.tree_node*, %struct.tree_node*)
+
+declare i8 @parse_input_constraint(i8**, i32, i32, i32, i32, i8**, i8*, i8*) zeroext 
+
+declare i8 @is_gimple_asm_val(%struct.tree_node*) zeroext 
+
+declare void @gimplify_body(%struct.tree_node**, %struct.tree_node*, i8 zeroext )
+
+declare void @timevar_push_1(i32)
+
+declare %struct.tree_node* @gimplify_parameters()
+
+declare %struct.tree_node* @expr_only(%struct.tree_node*)
+
+declare void @timevar_pop_1(i32)
+
+declare void @gimplify_function_tree(%struct.tree_node*)
+
+declare void @allocate_struct_function(%struct.tree_node*)
+
+declare %struct.tree_node* @make_tree_vec_stat(i32)
+
+declare %struct.tree_node* @tsi_split_statement_list_after(%struct.tree_stmt_iterator*)
+
+declare i8 @is_gimple_condexpr(%struct.tree_node*) zeroext 
+
+declare %struct.tree_node* @invert_truthvalue(%struct.tree_node*)
+
+declare i8 @initializer_zerop(%struct.tree_node*) zeroext 
+
+declare i32 @simple_cst_equal(%struct.tree_node*, %struct.tree_node*)
+
+declare i32 @aggregate_value_p(%struct.tree_node*, %struct.tree_node*)
+
+declare i32 @fwrite(i8*, i32, i32, %struct.FILE*)
diff --git a/final/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll b/final/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
new file mode 100644
index 00000000000..e4635f50279
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-03-27-RegScavengerAssert.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi
+; PR1279
+
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.u = type { [1 x i64] }
+
+define fastcc void @find_reloads_address(%struct.rtx_def** %loc) {
+entry:
+	%ad_addr = alloca %struct.rtx_def*		; <%struct.rtx_def**> [#uses=2]
+	br i1 false, label %cond_next416, label %cond_true340
+
+cond_true340:		; preds = %entry
+	ret void
+
+cond_next416:		; preds = %entry
+	%tmp1085 = load %struct.rtx_def** %ad_addr		; <%struct.rtx_def*> [#uses=1]
+	br i1 false, label %bb1084, label %cond_true418
+
+cond_true418:		; preds = %cond_next416
+	ret void
+
+bb1084:		; preds = %cond_next416
+	br i1 false, label %cond_true1092, label %cond_next1102
+
+cond_true1092:		; preds = %bb1084
+	%tmp1094 = getelementptr %struct.rtx_def* %tmp1085, i32 0, i32 3		; <%struct.u*> [#uses=1]
+	%tmp10981099 = bitcast %struct.u* %tmp1094 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=2]
+	%tmp1101 = load %struct.rtx_def** %tmp10981099		; <%struct.rtx_def*> [#uses=1]
+	store %struct.rtx_def* %tmp1101, %struct.rtx_def** %ad_addr
+	br label %cond_next1102
+
+cond_next1102:		; preds = %cond_true1092, %bb1084
+	%loc_addr.0 = phi %struct.rtx_def** [ %tmp10981099, %cond_true1092 ], [ %loc, %bb1084 ]		; <%struct.rtx_def**> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll b/final/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll
new file mode 100644
index 00000000000..ea27676a9f0
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-03-30-RegScavengerAssert.ll
@@ -0,0 +1,101 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi
+; PR1279
+
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 }
+	%struct.arm_stack_offsets = type { i32, i32, i32, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.machine_function = type { %struct.rtx_def*, i32, i32, i32, %struct.arm_stack_offsets, i32, i32, i32, [14 x %struct.rtx_def*] }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.temp_slot = type opaque
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.u = type { [1 x i64] }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }
+	%union.tree_ann_d = type opaque
+@str469 = external global [42 x i8]		; <[42 x i8]*> [#uses=0]
+@__FUNCTION__.24265 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
+
+declare void @fancy_abort()
+
+define fastcc void @fold_builtin_bitop() {
+entry:
+	br i1 false, label %cond_true105, label %UnifiedReturnBlock
+
+cond_true105:		; preds = %entry
+	br i1 false, label %cond_true134, label %UnifiedReturnBlock
+
+cond_true134:		; preds = %cond_true105
+	switch i32 0, label %bb479 [
+		 i32 378, label %bb313
+		 i32 380, label %bb313
+		 i32 381, label %bb313
+		 i32 383, label %bb366
+		 i32 385, label %bb366
+		 i32 386, label %bb366
+		 i32 403, label %bb250
+		 i32 405, label %bb250
+		 i32 406, label %bb250
+		 i32 434, label %bb464
+		 i32 436, label %bb464
+		 i32 437, label %bb464
+		 i32 438, label %bb441
+		 i32 440, label %bb441
+		 i32 441, label %bb441
+	]
+
+bb250:		; preds = %cond_true134, %cond_true134, %cond_true134
+	ret void
+
+bb313:		; preds = %cond_true134, %cond_true134, %cond_true134
+	ret void
+
+bb366:		; preds = %cond_true134, %cond_true134, %cond_true134
+	ret void
+
+bb441:		; preds = %cond_true134, %cond_true134, %cond_true134
+	ret void
+
+bb457:		; preds = %bb464, %bb457
+	%tmp459 = add i64 0, 1		; <i64> [#uses=1]
+	br i1 false, label %bb474.preheader, label %bb457
+
+bb464:		; preds = %cond_true134, %cond_true134, %cond_true134
+	br i1 false, label %bb474.preheader, label %bb457
+
+bb474.preheader:		; preds = %bb464, %bb457
+	%result.5.ph = phi i64 [ 0, %bb464 ], [ %tmp459, %bb457 ]		; <i64> [#uses=1]
+	br label %bb474
+
+bb467:		; preds = %bb474
+	%indvar.next586 = add i64 %indvar585, 1		; <i64> [#uses=1]
+	br label %bb474
+
+bb474:		; preds = %bb467, %bb474.preheader
+	%indvar585 = phi i64 [ 0, %bb474.preheader ], [ %indvar.next586, %bb467 ]		; <i64> [#uses=2]
+	br i1 false, label %bb476, label %bb467
+
+bb476:		; preds = %bb474
+	%result.5 = add i64 %indvar585, %result.5.ph		; <i64> [#uses=0]
+	ret void
+
+bb479:		; preds = %cond_true134
+	tail call void @fancy_abort( )
+	unreachable
+
+UnifiedReturnBlock:		; preds = %cond_true105, %entry
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll b/final/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
new file mode 100644
index 00000000000..f24def31f97
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-04-02-RegScavengerAssert.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin
+
+	%struct.H_TBL = type { [17 x i8], [256 x i8], i32 }
+	%struct.Q_TBL = type { [64 x i16], i32 }
+	%struct.anon = type { [80 x i8] }
+	%struct.X_c_coef_ccler = type { void (%struct.X_Y*, i32)*, i32 (%struct.X_Y*, i8***)* }
+	%struct.X_c_main_ccler = type { void (%struct.X_Y*, i32)*, void (%struct.X_Y*, i8**, i32*, i32)* }
+	%struct.X_c_prep_ccler = type { void (%struct.X_Y*, i32)*, void (%struct.X_Y*, i8**, i32*, i32, i8***, i32*, i32)* }
+	%struct.X_color_converter = type { void (%struct.X_Y*)*, void (%struct.X_Y*, i8**, i8***, i32, i32)* }
+	%struct.X_common_struct = type { %struct.X_error_mgr*, %struct.X_memory_mgr*, %struct.X_progress_mgr*, i8*, i32, i32 }
+	%struct.X_comp_master = type { void (%struct.X_Y*)*, void (%struct.X_Y*)*, void (%struct.X_Y*)*, i32, i32 }
+	%struct.X_component_info = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Q_TBL*, i8* }
+	%struct.X_Y = type { %struct.X_error_mgr*, %struct.X_memory_mgr*, %struct.X_progress_mgr*, i8*, i32, i32, %struct.X_destination_mgr*, i32, i32, i32, i32, double, i32, i32, i32, %struct.X_component_info*, [4 x %struct.Q_TBL*], [4 x %struct.H_TBL*], [4 x %struct.H_TBL*], [16 x i8], [16 x i8], [16 x i8], i32, %struct.X_scan_info*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i16, i16, i32, i32, i32, i32, i32, i32, i32, [4 x %struct.X_component_info*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, %struct.X_comp_master*, %struct.X_c_main_ccler*, %struct.X_c_prep_ccler*, %struct.X_c_coef_ccler*, %struct.X_marker_writer*, %struct.X_color_converter*, %struct.X_downssr*, %struct.X_forward_D*, %struct.X_entropy_en*, %struct.X_scan_info*, i32 }
+	%struct.X_destination_mgr = type { i8*, i32, void (%struct.X_Y*)*, i32 (%struct.X_Y*)*, void (%struct.X_Y*)* }
+	%struct.X_downssr = type { void (%struct.X_Y*)*, void (%struct.X_Y*, i8***, i32, i8***, i32)*, i32 }
+	%struct.X_entropy_en = type { void (%struct.X_Y*, i32)*, i32 (%struct.X_Y*, [64 x i16]**)*, void (%struct.X_Y*)* }
+	%struct.X_error_mgr = type { void (%struct.X_common_struct*)*, void (%struct.X_common_struct*, i32)*, void (%struct.X_common_struct*)*, void (%struct.X_common_struct*, i8*)*, void (%struct.X_common_struct*)*, i32, %struct.anon, i32, i32, i8**, i32, i8**, i32, i32 }
+	%struct.X_forward_D = type { void (%struct.X_Y*)*, void (%struct.X_Y*, %struct.X_component_info*, i8**, [64 x i16]*, i32, i32, i32)* }
+	%struct.X_marker_writer = type { void (%struct.X_Y*)*, void (%struct.X_Y*)*, void (%struct.X_Y*)*, void (%struct.X_Y*)*, void (%struct.X_Y*)*, void (%struct.X_Y*, i32, i32)*, void (%struct.X_Y*, i32)* }
+	%struct.X_memory_mgr = type { i8* (%struct.X_common_struct*, i32, i32)*, i8* (%struct.X_common_struct*, i32, i32)*, i8** (%struct.X_common_struct*, i32, i32, i32)*, [64 x i16]** (%struct.X_common_struct*, i32, i32, i32)*, %struct.jvirt_sAY_cc* (%struct.X_common_struct*, i32, i32, i32, i32, i32)*, %struct.jvirt_bAY_cc* (%struct.X_common_struct*, i32, i32, i32, i32, i32)*, void (%struct.X_common_struct*)*, i8** (%struct.X_common_struct*, %struct.jvirt_sAY_cc*, i32, i32, i32)*, [64 x i16]** (%struct.X_common_struct*, %struct.jvirt_bAY_cc*, i32, i32, i32)*, void (%struct.X_common_struct*, i32)*, void (%struct.X_common_struct*)*, i32, i32 }
+	%struct.X_progress_mgr = type { void (%struct.X_common_struct*)*, i32, i32, i32, i32 }
+	%struct.X_scan_info = type { i32, [4 x i32], i32, i32, i32, i32 }
+	%struct.jvirt_bAY_cc = type opaque
+	%struct.jvirt_sAY_cc = type opaque
+
+define void @test(%struct.X_Y* %cinfo) {
+entry:
+	br i1 false, label %bb.preheader, label %return
+
+bb.preheader:		; preds = %entry
+	%tbl.014.us = load i32* null		; <i32> [#uses=1]
+	br i1 false, label %cond_next.us, label %bb
+
+cond_next51.us:		; preds = %cond_next.us, %cond_true33.us.cond_true46.us_crit_edge
+	%htblptr.019.1.us = phi %struct.H_TBL** [ %tmp37.us, %cond_true33.us.cond_true46.us_crit_edge ], [ %tmp37.us, %cond_next.us ]		; <%struct.H_TBL**> [#uses=0]
+	ret void
+
+cond_true33.us.cond_true46.us_crit_edge:		; preds = %cond_next.us
+	call void @_C_X_a_HT( )
+	br label %cond_next51.us
+
+cond_next.us:		; preds = %bb.preheader
+	%tmp37.us = getelementptr %struct.X_Y* %cinfo, i32 0, i32 17, i32 %tbl.014.us		; <%struct.H_TBL**> [#uses=3]
+	%tmp4524.us = load %struct.H_TBL** %tmp37.us		; <%struct.H_TBL*> [#uses=1]
+	icmp eq %struct.H_TBL* %tmp4524.us, null		; <i1>:0 [#uses=1]
+	br i1 %0, label %cond_true33.us.cond_true46.us_crit_edge, label %cond_next51.us
+
+bb:		; preds = %bb.preheader
+	ret void
+
+return:		; preds = %entry
+	ret void
+}
+
+declare void @_C_X_a_HT()
diff --git a/final/test/CodeGen/ARM/2007-04-03-PEIBug.ll b/final/test/CodeGen/ARM/2007-04-03-PEIBug.ll
new file mode 100644
index 00000000000..b543c57e1a8
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-04-03-PEIBug.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm | not grep {add.*#0}
+
+define i32 @foo() {
+entry:
+	%A = alloca [1123 x i32], align 16		; <[1123 x i32]*> [#uses=1]
+	%B = alloca [3123 x i32], align 16		; <[3123 x i32]*> [#uses=1]
+	%C = alloca [12312 x i32], align 16		; <[12312 x i32]*> [#uses=1]
+	%tmp = call i32 (...)* @bar( [3123 x i32]* %B, [1123 x i32]* %A, [12312 x i32]* %C )		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @bar(...)
diff --git a/final/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll b/final/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
new file mode 100644
index 00000000000..e001cde8351
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-04-03-UndefinedSymbol.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | \
+; RUN:   not grep LPC9
+
+	%struct.B = type { i32 }
+	%struct.anon = type { void (%struct.B*)*, i32 }
+@str = internal constant [7 x i8] c"i, %d\0A\00"		; <[7 x i8]*> [#uses=1]
+@str1 = internal constant [7 x i8] c"j, %d\0A\00"		; <[7 x i8]*> [#uses=1]
+
+define internal void @_ZN1B1iEv(%struct.B* %this) {
+entry:
+	%tmp1 = getelementptr %struct.B* %this, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp2 = load i32* %tmp1		; <i32> [#uses=1]
+	%tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @str, i32 0, i32 0), i32 %tmp2 )		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @printf(i8*, ...)
+
+define internal void @_ZN1B1jEv(%struct.B* %this) {
+entry:
+	%tmp1 = getelementptr %struct.B* %this, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp2 = load i32* %tmp1		; <i32> [#uses=1]
+	%tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @str1, i32 0, i32 0), i32 %tmp2 )		; <i32> [#uses=0]
+	ret void
+}
+
+define i32 @main() {
+entry:
+	%b.i29 = alloca %struct.B, align 4		; <%struct.B*> [#uses=3]
+	%b.i1 = alloca %struct.B, align 4		; <%struct.B*> [#uses=3]
+	%b.i = alloca %struct.B, align 4		; <%struct.B*> [#uses=3]
+	%tmp2.i = getelementptr %struct.B* %b.i, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 4, i32* %tmp2.i
+	br i1 icmp eq (i64 and (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 4294967296), i64 0), label %_Z3fooiM1BFvvE.exit, label %cond_true.i
+
+cond_true.i:		; preds = %entry
+	%b2.i = bitcast %struct.B* %b.i to i8*		; <i8*> [#uses=1]
+	%ctg23.i = getelementptr i8* %b2.i, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]
+	%tmp121314.i = bitcast i8* %ctg23.i to i32 (...)***		; <i32 (...)***> [#uses=1]
+	%tmp15.i = load i32 (...)*** %tmp121314.i		; <i32 (...)**> [#uses=1]
+	%tmp151.i = bitcast i32 (...)** %tmp15.i to i8*		; <i8*> [#uses=1]
+	%ctg2.i = getelementptr i8* %tmp151.i, i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32)		; <i8*> [#uses=1]
+	%tmp2021.i = bitcast i8* %ctg2.i to i32 (...)**		; <i32 (...)**> [#uses=1]
+	%tmp22.i = load i32 (...)** %tmp2021.i		; <i32 (...)*> [#uses=1]
+	%tmp2223.i = bitcast i32 (...)* %tmp22.i to void (%struct.B*)*		; <void (%struct.B*)*> [#uses=1]
+	br label %_Z3fooiM1BFvvE.exit
+
+_Z3fooiM1BFvvE.exit:		; preds = %cond_true.i, %entry
+	%iftmp.2.0.i = phi void (%struct.B*)* [ %tmp2223.i, %cond_true.i ], [ inttoptr (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to void (%struct.B*)*), %entry ]		; <void (%struct.B*)*> [#uses=1]
+	%b4.i = bitcast %struct.B* %b.i to i8*		; <i8*> [#uses=1]
+	%ctg25.i = getelementptr i8* %b4.i, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]
+	%tmp3031.i = bitcast i8* %ctg25.i to %struct.B*		; <%struct.B*> [#uses=1]
+	call void %iftmp.2.0.i( %struct.B* %tmp3031.i )
+	%tmp2.i30 = getelementptr %struct.B* %b.i29, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 6, i32* %tmp2.i30
+	br i1 icmp eq (i64 and (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to i64), i64 4294967296), i64 0), label %_Z3fooiM1BFvvE.exit56, label %cond_true.i46
+
+cond_true.i46:		; preds = %_Z3fooiM1BFvvE.exit
+	%b2.i35 = bitcast %struct.B* %b.i29 to i8*		; <i8*> [#uses=1]
+	%ctg23.i36 = getelementptr i8* %b2.i35, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]
+	%tmp121314.i37 = bitcast i8* %ctg23.i36 to i32 (...)***		; <i32 (...)***> [#uses=1]
+	%tmp15.i38 = load i32 (...)*** %tmp121314.i37		; <i32 (...)**> [#uses=1]
+	%tmp151.i41 = bitcast i32 (...)** %tmp15.i38 to i8*		; <i8*> [#uses=1]
+	%ctg2.i42 = getelementptr i8* %tmp151.i41, i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32)		; <i8*> [#uses=1]
+	%tmp2021.i43 = bitcast i8* %ctg2.i42 to i32 (...)**		; <i32 (...)**> [#uses=1]
+	%tmp22.i44 = load i32 (...)** %tmp2021.i43		; <i32 (...)*> [#uses=1]
+	%tmp2223.i45 = bitcast i32 (...)* %tmp22.i44 to void (%struct.B*)*		; <void (%struct.B*)*> [#uses=1]
+	br label %_Z3fooiM1BFvvE.exit56
+
+_Z3fooiM1BFvvE.exit56:		; preds = %cond_true.i46, %_Z3fooiM1BFvvE.exit
+	%iftmp.2.0.i49 = phi void (%struct.B*)* [ %tmp2223.i45, %cond_true.i46 ], [ inttoptr (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to void (%struct.B*)*), %_Z3fooiM1BFvvE.exit ]		; <void (%struct.B*)*> [#uses=1]
+	%b4.i53 = bitcast %struct.B* %b.i29 to i8*		; <i8*> [#uses=1]
+	%ctg25.i54 = getelementptr i8* %b4.i53, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1jEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]
+	%tmp3031.i55 = bitcast i8* %ctg25.i54 to %struct.B*		; <%struct.B*> [#uses=1]
+	call void %iftmp.2.0.i49( %struct.B* %tmp3031.i55 )
+	%tmp2.i2 = getelementptr %struct.B* %b.i1, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 -1, i32* %tmp2.i2
+	br i1 icmp eq (i64 and (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 4294967296), i64 0), label %_Z3fooiM1BFvvE.exit28, label %cond_true.i18
+
+cond_true.i18:		; preds = %_Z3fooiM1BFvvE.exit56
+	%b2.i7 = bitcast %struct.B* %b.i1 to i8*		; <i8*> [#uses=1]
+	%ctg23.i8 = getelementptr i8* %b2.i7, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]
+	%tmp121314.i9 = bitcast i8* %ctg23.i8 to i32 (...)***		; <i32 (...)***> [#uses=1]
+	%tmp15.i10 = load i32 (...)*** %tmp121314.i9		; <i32 (...)**> [#uses=1]
+	%tmp151.i13 = bitcast i32 (...)** %tmp15.i10 to i8*		; <i8*> [#uses=1]
+	%ctg2.i14 = getelementptr i8* %tmp151.i13, i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32)		; <i8*> [#uses=1]
+	%tmp2021.i15 = bitcast i8* %ctg2.i14 to i32 (...)**		; <i32 (...)**> [#uses=1]
+	%tmp22.i16 = load i32 (...)** %tmp2021.i15		; <i32 (...)*> [#uses=1]
+	%tmp2223.i17 = bitcast i32 (...)* %tmp22.i16 to void (%struct.B*)*		; <void (%struct.B*)*> [#uses=1]
+	br label %_Z3fooiM1BFvvE.exit28
+
+_Z3fooiM1BFvvE.exit28:		; preds = %cond_true.i18, %_Z3fooiM1BFvvE.exit56
+	%iftmp.2.0.i21 = phi void (%struct.B*)* [ %tmp2223.i17, %cond_true.i18 ], [ inttoptr (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to void (%struct.B*)*), %_Z3fooiM1BFvvE.exit56 ]		; <void (%struct.B*)*> [#uses=1]
+	%b4.i25 = bitcast %struct.B* %b.i1 to i8*		; <i8*> [#uses=1]
+	%ctg25.i26 = getelementptr i8* %b4.i25, i32 ashr (i32 trunc (i64 lshr (i64 zext (i32 ptrtoint (void (%struct.B*)* @_ZN1B1iEv to i32) to i64), i64 32) to i32), i32 1)		; <i8*> [#uses=1]
+	%tmp3031.i27 = bitcast i8* %ctg25.i26 to %struct.B*		; <%struct.B*> [#uses=1]
+	call void %iftmp.2.0.i21( %struct.B* %tmp3031.i27 )
+	ret i32 0
+}
diff --git a/final/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll b/final/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
new file mode 100644
index 00000000000..a89e937d3e1
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-04-30-CombinerCrash.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "arm-apple-darwin8"
+        %struct.CHESS_POSITION = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, i8, i8, [64 x i8], i8, i8, i8, i8, i8 }
+@search = external global %struct.CHESS_POSITION                ; <%struct.CHESS_POSITION*> [#uses=3]
+@file_mask = external global [8 x i64]          ; <[8 x i64]*> [#uses=1]
+@rank_mask.1.b = external global i1             ; <i1*> [#uses=1]
+
+define fastcc void @EvaluateDevelopment() {
+entry:
+        %tmp7 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 7)         ; <i64> [#uses=1]
+        %tmp50 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 0)                ; <i64> [#uses=1]
+        %tmp52 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 1)                ; <i64> [#uses=1]
+        %tmp53 = or i64 %tmp52, %tmp50          ; <i64> [#uses=1]
+        %tmp57.b = load i1* @rank_mask.1.b              ; <i1> [#uses=1]
+        %tmp57 = select i1 %tmp57.b, i64 71776119061217280, i64 0               ; <i64> [#uses=1]
+        %tmp58 = and i64 %tmp57, %tmp7          ; <i64> [#uses=1]
+        %tmp59 = lshr i64 %tmp58, 8             ; <i64> [#uses=1]
+        %tmp63 = load i64* getelementptr ([8 x i64]* @file_mask, i32 0, i32 4)          ; <i64> [#uses=1]
+        %tmp64 = or i64 %tmp63, 0               ; <i64> [#uses=1]
+        %tmp65 = and i64 %tmp59, %tmp53         ; <i64> [#uses=1]
+        %tmp66 = and i64 %tmp65, %tmp64         ; <i64> [#uses=1]
+        %tmp67 = icmp eq i64 %tmp66, 0          ; <i1> [#uses=1]
+        br i1 %tmp67, label %cond_next145, label %cond_true70
+
+cond_true70:            ; preds = %entry
+        ret void
+
+cond_next145:           ; preds = %entry
+        ret void
+}
diff --git a/final/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll b/final/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
new file mode 100644
index 00000000000..c73b6793da0
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
@@ -0,0 +1,113 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+	%struct.Connection = type { i32, [10 x i8], i32 }
+	%struct.IntChunk = type { %struct.cppobjtype, i32, i32*, i32 }
+	%struct.Point = type { i8*, %struct.cppobjtype, i16 (%struct.Point*) signext *, i16 (%struct.Point*) signext *, double (%struct.Point*)*, double (%struct.Point*)* }
+	%struct.RefPoint = type { %struct.Point*, %struct.cppobjtype }
+	%struct.ShortArray = type { %struct.cppobjtype, i32, i16* }
+	%struct.TestObj = type { i8*, %struct.cppobjtype, i8, [32 x i8], i8*, i8**, i16, i16, i32, i32, i32, i32, float, double, %struct.cppobjtype, i32, i16*, i16**, i8**, i32, %struct.XyPoint, [3 x %struct.Connection], %struct.Point*, %struct.XyPoint*, i32, i8*, i8*, i16*, %struct.ShortArray, %struct.IntChunk, %struct.cppobjtype, %struct.cppobjtype, %struct.RefPoint, i32, %struct.cppobjtype, %struct.cppobjtype }
+	%struct.XyPoint = type { i16, i16 }
+	%struct.cppobjtype = type { i32, i16, i16 }
+@Msg = external global [256 x i8]		; <[256 x i8]*> [#uses=1]
+@.str53615 = external constant [48 x i8]		; <[48 x i8]*> [#uses=1]
+@FirstTime.4637.b = external global i1		; <i1*> [#uses=1]
+
+define fastcc void @Draw7(i32 %Option, i32* %Status) {
+entry:
+	%tmp115.b = load i1* @FirstTime.4637.b		; <i1> [#uses=1]
+	br i1 %tmp115.b, label %cond_next239, label %cond_next.i
+
+cond_next.i:		; preds = %entry
+	ret void
+
+cond_next239:		; preds = %entry
+	%tmp242 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp242, label %cond_next253, label %cond_next296
+
+cond_next253:		; preds = %cond_next239
+	switch i32 %Option, label %bb1326 [
+		 i32 3, label %cond_true258
+		 i32 4, label %cond_true268
+		 i32 2, label %cond_true279
+		 i32 1, label %cond_next315
+	]
+
+cond_true258:		; preds = %cond_next253
+	ret void
+
+cond_true268:		; preds = %cond_next253
+	ret void
+
+cond_true279:		; preds = %cond_next253
+	ret void
+
+cond_next296:		; preds = %cond_next239
+	ret void
+
+cond_next315:		; preds = %cond_next253
+	%tmp1140 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp1140, label %cond_true1143, label %bb1326
+
+cond_true1143:		; preds = %cond_next315
+	%tmp1148 = icmp eq i32 0, 0		; <i1> [#uses=4]
+	br i1 %tmp1148, label %cond_next1153, label %cond_true1151
+
+cond_true1151:		; preds = %cond_true1143
+	ret void
+
+cond_next1153:		; preds = %cond_true1143
+	%tmp8.i.i185 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp8.i.i185, label %TestObj_new1.exit, label %cond_true.i.i187
+
+cond_true.i.i187:		; preds = %cond_next1153
+	ret void
+
+TestObj_new1.exit:		; preds = %cond_next1153
+	%tmp1167 = icmp eq i16 0, 0		; <i1> [#uses=1]
+	%tmp1178 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	%bothcond = and i1 %tmp1167, %tmp1178		; <i1> [#uses=1]
+	br i1 %bothcond, label %bb1199, label %bb1181
+
+bb1181:		; preds = %TestObj_new1.exit
+	ret void
+
+bb1199:		; preds = %TestObj_new1.exit
+	br i1 %tmp1148, label %cond_next1235, label %Object_Dump.exit302
+
+Object_Dump.exit302:		; preds = %bb1199
+	ret void
+
+cond_next1235:		; preds = %bb1199
+	%bothcond10485 = or i1 false, %tmp1148		; <i1> [#uses=1]
+	br i1 %bothcond10485, label %cond_next1267, label %cond_true1248
+
+cond_true1248:		; preds = %cond_next1235
+	ret void
+
+cond_next1267:		; preds = %cond_next1235
+	br i1 %tmp1148, label %cond_next1275, label %cond_true1272
+
+cond_true1272:		; preds = %cond_next1267
+	%tmp1273 = load %struct.TestObj** null		; <%struct.TestObj*> [#uses=2]
+	%tmp2930.i = ptrtoint %struct.TestObj* %tmp1273 to i32		; <i32> [#uses=1]
+	%tmp42.i348 = sub i32 0, %tmp2930.i		; <i32> [#uses=1]
+	%tmp45.i = getelementptr %struct.TestObj* %tmp1273, i32 0, i32 0		; <i8**> [#uses=2]
+	%tmp48.i = load i8** %tmp45.i		; <i8*> [#uses=1]
+	%tmp50.i350 = call i32 (i8*, i8*, ...)* @sprintf( i8* getelementptr ([256 x i8]* @Msg, i32 0, i32 0), i8* getelementptr ([48 x i8]* @.str53615, i32 0, i32 0), i8* null, i8** %tmp45.i, i8* %tmp48.i )		; <i32> [#uses=0]
+	br i1 false, label %cond_true.i632.i, label %Ut_TraceMsg.exit648.i
+
+cond_true.i632.i:		; preds = %cond_true1272
+	ret void
+
+Ut_TraceMsg.exit648.i:		; preds = %cond_true1272
+	%tmp57.i = getelementptr i8* null, i32 %tmp42.i348		; <i8*> [#uses=0]
+	ret void
+
+cond_next1275:		; preds = %cond_next1267
+	ret void
+
+bb1326:		; preds = %cond_next315, %cond_next253
+	ret void
+}
+
+declare i32 @sprintf(i8*, i8*, ...)
diff --git a/final/test/CodeGen/ARM/2007-05-07-jumptoentry.ll b/final/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
new file mode 100644
index 00000000000..26864f18a69
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s | not grep 1_0
+; This used to create an extra branch to 'entry', LBB1_0.
+
+; ModuleID = 'bug.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "arm-apple-darwin8"
+	%struct.HexxagonMove = type { i8, i8, i32 }
+	%struct.HexxagonMoveList = type { i32, %struct.HexxagonMove* }
+
+define void @_ZN16HexxagonMoveList8sortListEv(%struct.HexxagonMoveList* %this) {
+entry:
+	%tmp51 = getelementptr %struct.HexxagonMoveList* %this, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp2 = getelementptr %struct.HexxagonMoveList* %this, i32 0, i32 1		; <%struct.HexxagonMove**> [#uses=2]
+	br label %bb49
+
+bb1:		; preds = %bb49
+	%tmp3 = load %struct.HexxagonMove** %tmp2		; <%struct.HexxagonMove*> [#uses=5]
+	%tmp6 = getelementptr %struct.HexxagonMove* %tmp3, i32 %i.1, i32 2		; <i32*> [#uses=1]
+	%tmp7 = load i32* %tmp6		; <i32> [#uses=2]
+	%tmp12 = add i32 %i.1, 1		; <i32> [#uses=7]
+	%tmp14 = getelementptr %struct.HexxagonMove* %tmp3, i32 %tmp12, i32 2		; <i32*> [#uses=1]
+	%tmp15 = load i32* %tmp14		; <i32> [#uses=1]
+	%tmp16 = icmp slt i32 %tmp7, %tmp15		; <i1> [#uses=1]
+	br i1 %tmp16, label %cond_true, label %bb49
+
+cond_true:		; preds = %bb1
+	%tmp23.0 = getelementptr %struct.HexxagonMove* %tmp3, i32 %i.1, i32 0		; <i8*> [#uses=2]
+	%tmp67 = load i8* %tmp23.0		; <i8> [#uses=1]
+	%tmp23.1 = getelementptr %struct.HexxagonMove* %tmp3, i32 %i.1, i32 1		; <i8*> [#uses=1]
+	%tmp68 = load i8* %tmp23.1		; <i8> [#uses=1]
+	%tmp3638 = getelementptr %struct.HexxagonMove* %tmp3, i32 %tmp12, i32 0		; <i8*> [#uses=1]
+	tail call void @llvm.memcpy.i32( i8* %tmp23.0, i8* %tmp3638, i32 8, i32 4 )
+	%tmp41 = load %struct.HexxagonMove** %tmp2		; <%struct.HexxagonMove*> [#uses=3]
+	%tmp44.0 = getelementptr %struct.HexxagonMove* %tmp41, i32 %tmp12, i32 0		; <i8*> [#uses=1]
+	store i8 %tmp67, i8* %tmp44.0
+	%tmp44.1 = getelementptr %struct.HexxagonMove* %tmp41, i32 %tmp12, i32 1		; <i8*> [#uses=1]
+	store i8 %tmp68, i8* %tmp44.1
+	%tmp44.2 = getelementptr %struct.HexxagonMove* %tmp41, i32 %tmp12, i32 2		; <i32*> [#uses=1]
+	store i32 %tmp7, i32* %tmp44.2
+	br label %bb49
+
+bb49:		; preds = %bb59, %cond_true, %bb1, %entry
+	%i.1 = phi i32 [ 0, %entry ], [ %tmp12, %bb1 ], [ %tmp12, %cond_true ], [ 0, %bb59 ]		; <i32> [#uses=5]
+	%move.2 = phi i32 [ 0, %entry ], [ 1, %cond_true ], [ %move.2, %bb1 ], [ 0, %bb59 ]		; <i32> [#uses=2]
+	%tmp52 = load i32* %tmp51		; <i32> [#uses=1]
+	%tmp53 = add i32 %tmp52, -1		; <i32> [#uses=1]
+	%tmp55 = icmp sgt i32 %tmp53, %i.1		; <i1> [#uses=1]
+	br i1 %tmp55, label %bb1, label %bb59
+
+bb59:		; preds = %bb49
+	%tmp61 = icmp eq i32 %move.2, 0		; <i1> [#uses=1]
+	br i1 %tmp61, label %return, label %bb49
+
+return:		; preds = %bb59
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/final/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll b/final/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
new file mode 100644
index 00000000000..52937c1dd9f
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1
+; Check that calls to baz and quux are tail-merged.
+; PR1628
+
+; ModuleID = 'tail.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define i32 @f(i32 %i, i32 %q) {
+entry:
+	%i_addr = alloca i32		; <i32*> [#uses=2]
+	%q_addr = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %i, i32* %i_addr
+	store i32 %q, i32* %q_addr
+	%tmp = load i32* %i_addr		; <i32> [#uses=1]
+	%tmp1 = icmp ne i32 %tmp, 0		; <i1> [#uses=1]
+	%tmp12 = zext i1 %tmp1 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp12, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
+	br label %cond_next
+
+cond_false:		; preds = %entry
+	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]
+	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
+	br label %cond_next
+
+cond_next:		; preds = %cond_false, %cond_true
+	%tmp7 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
+	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]
+	%toBool10 = icmp ne i8 %tmp89, 0		; <i1> [#uses=1]
+	br i1 %toBool10, label %cond_true11, label %cond_false15
+
+cond_true11:		; preds = %cond_next
+	%tmp13 = call i32 (...)* @foo( )		; <i32> [#uses=0]
+	%tmp14 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]
+	br label %cond_next18
+
+cond_false15:		; preds = %cond_next
+	%tmp16 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp17 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]
+	br label %cond_next18
+
+cond_next18:		; preds = %cond_false15, %cond_true11
+	%tmp19 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %cond_next18
+	%retval20 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval20
+}
+
+declare i32 @bar(...)
+
+declare i32 @baz(...)
+
+declare i32 @foo(...)
+
+declare i32 @quux(...)
diff --git a/final/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll b/final/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
new file mode 100644
index 00000000000..c925fa83ef3
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge | grep bl.*quux | count 1
+; Check that calls to baz and quux are tail-merged.
+; PR1628
+
+; ModuleID = 'tail.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define i32 @f(i32 %i, i32 %q) {
+entry:
+	%i_addr = alloca i32		; <i32*> [#uses=2]
+	%q_addr = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %i, i32* %i_addr
+	store i32 %q, i32* %q_addr
+	%tmp = load i32* %i_addr		; <i32> [#uses=1]
+	%tmp1 = icmp ne i32 %tmp, 0		; <i1> [#uses=1]
+	%tmp12 = zext i1 %tmp1 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp12, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
+	%tmp7 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
+	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]
+	%toBool10 = icmp ne i8 %tmp89, 0		; <i1> [#uses=1]
+	br i1 %toBool10, label %cond_true11, label %cond_false15
+
+cond_false:		; preds = %entry
+	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]
+	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
+	%tmp27 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp28 = icmp ne i32 %tmp27, 0		; <i1> [#uses=1]
+	%tmp289 = zext i1 %tmp28 to i8		; <i8> [#uses=1]
+	%toBool210 = icmp ne i8 %tmp289, 0		; <i1> [#uses=1]
+	br i1 %toBool210, label %cond_true11, label %cond_false15
+
+cond_true11:		; preds = %cond_next
+	%tmp13 = call i32 (...)* @foo( )		; <i32> [#uses=0]
+	%tmp14 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]
+	br label %cond_next18
+
+cond_false15:		; preds = %cond_next
+	%tmp16 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp17 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]
+	br label %cond_next18
+
+cond_next18:		; preds = %cond_false15, %cond_true11
+	%tmp19 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %cond_next18
+	%retval20 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval20
+}
+
+declare i32 @bar(...)
+
+declare i32 @baz(...)
+
+declare i32 @foo(...)
+
+declare i32 @quux(...)
diff --git a/final/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll b/final/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
new file mode 100644
index 00000000000..b3b0769347f
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-05-14-InlineAsmCstCrash.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+
+define i32 @test3() {
+	tail call void asm sideeffect "/* number: ${0:c} */", "i"( i32 1 )
+	ret i32 11
+}
diff --git a/final/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll b/final/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll
new file mode 100644
index 00000000000..7b15ded4479
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-05-14-RegScavengerAssert.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi
+; PR1406
+
+	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
+	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
+	%struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64 }
+	%struct.AVFrame = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*] }
+	%struct.AVOption = type opaque
+	%struct.AVPaletteControl = type { i32, [256 x i32] }
+	%struct.AVPanScan = type { i32, i32, i32, [3 x [2 x i16]] }
+	%struct.AVRational = type { i32, i32 }
+	%struct.RcOverride = type { i32, i32, i32, float }
+
+define i32 @decode_init(%struct.AVCodecContext* %avctx) {
+entry:
+	br i1 false, label %bb, label %cond_next789
+
+bb:		; preds = %bb, %entry
+	br i1 false, label %bb59, label %bb
+
+bb59:		; preds = %bb
+	%tmp68 = sdiv i64 0, 0		; <i64> [#uses=1]
+	%tmp6869 = trunc i64 %tmp68 to i32		; <i32> [#uses=2]
+	%tmp81 = call i32 asm "smull $0, $1, $2, $3     \0A\09mov   $0, $0,     lsr $4\0A\09add   $1, $0, $1, lsl $5\0A\09", "=&r,=*&r,r,r,i,i"( i32* null, i32 %tmp6869, i32 13316085, i32 23, i32 9 )		; <i32> [#uses=0]
+	%tmp90 = call i32 asm "smull $0, $1, $2, $3     \0A\09mov   $0, $0,     lsr $4\0A\09add   $1, $0, $1, lsl $5\0A\09", "=&r,=*&r,r,r,i,i"( i32* null, i32 %tmp6869, i32 10568984, i32 23, i32 9 )		; <i32> [#uses=0]
+	unreachable
+
+cond_next789:		; preds = %entry
+	ret i32 0
+}
diff --git a/final/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/final/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
new file mode 100644
index 00000000000..9df5af59a79
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=arm | grep bl.*baz | count 1
+; RUN: llc < %s -march=arm | grep bl.*quux | count 1
+; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*baz | count 2
+; RUN: llc < %s -march=arm -enable-tail-merge=0 | grep bl.*quux | count 2
+; Check that tail merging is the default on ARM, and that -enable-tail-merge=0 works.
+; PR1628
+
+; ModuleID = 'tail.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define i32 @f(i32 %i, i32 %q) {
+entry:
+	%i_addr = alloca i32		; <i32*> [#uses=2]
+	%q_addr = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %i, i32* %i_addr
+	store i32 %q, i32* %q_addr
+	%tmp = load i32* %i_addr		; <i32> [#uses=1]
+	%tmp1 = icmp ne i32 %tmp, 0		; <i1> [#uses=1]
+	%tmp12 = zext i1 %tmp1 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp12, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
+	%tmp7 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
+	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]
+	%toBool10 = icmp ne i8 %tmp89, 0		; <i1> [#uses=1]
+	br i1 %toBool10, label %cond_true11, label %cond_false15
+
+cond_false:		; preds = %entry
+	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]
+	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
+	%tmp27 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp28 = icmp ne i32 %tmp27, 0		; <i1> [#uses=1]
+	%tmp289 = zext i1 %tmp28 to i8		; <i8> [#uses=1]
+	%toBool210 = icmp ne i8 %tmp289, 0		; <i1> [#uses=1]
+	br i1 %toBool210, label %cond_true11, label %cond_false15
+
+cond_true11:		; preds = %cond_next
+	%tmp13 = call i32 (...)* @foo( )		; <i32> [#uses=0]
+	%tmp14 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]
+	br label %cond_next18
+
+cond_false15:		; preds = %cond_next
+	%tmp16 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp17 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]
+	br label %cond_next18
+
+cond_next18:		; preds = %cond_false15, %cond_true11
+	%tmp19 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %cond_next18
+	%retval20 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval20
+}
+
+declare i32 @bar(...)
+
+declare i32 @baz(...)
+
+declare i32 @foo(...)
+
+declare i32 @quux(...)
diff --git a/final/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll b/final/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
new file mode 100644
index 00000000000..d2eb85d356c
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-05-23-BadPreIndexedStore.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=arm | not grep {str.*\\!}
+
+	%struct.shape_edge_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32 }
+	%struct.shape_path_t = type { %struct.shape_edge_t*, %struct.shape_edge_t*, i32, i32, i32, i32, i32, i32 }
+	%struct.shape_pool_t = type { i8* (%struct.shape_pool_t*, i8*, i32)*, i8* (%struct.shape_pool_t*, i32)*, void (%struct.shape_pool_t*, i8*)* }
+
+define %struct.shape_path_t* @shape_path_alloc(%struct.shape_pool_t* %pool, i32* %shape) {
+entry:
+	br i1 false, label %cond_false, label %bb45
+
+bb45:		; preds = %entry
+	ret %struct.shape_path_t* null
+
+cond_false:		; preds = %entry
+	br i1 false, label %bb140, label %bb174
+
+bb140:		; preds = %bb140, %cond_false
+	%indvar = phi i32 [ 0, %cond_false ], [ %indvar.next, %bb140 ]		; <i32> [#uses=2]
+	%edge.230.0.rec = shl i32 %indvar, 1		; <i32> [#uses=3]
+	%edge.230.0 = getelementptr %struct.shape_edge_t* null, i32 %edge.230.0.rec		; <%struct.shape_edge_t*> [#uses=1]
+	%edge.230.0.sum6970 = or i32 %edge.230.0.rec, 1		; <i32> [#uses=2]
+	%tmp154 = getelementptr %struct.shape_edge_t* null, i32 %edge.230.0.sum6970		; <%struct.shape_edge_t*> [#uses=1]
+	%tmp11.i5 = getelementptr %struct.shape_edge_t* null, i32 %edge.230.0.sum6970, i32 0		; <%struct.shape_edge_t**> [#uses=1]
+	store %struct.shape_edge_t* %edge.230.0, %struct.shape_edge_t** %tmp11.i5
+	store %struct.shape_edge_t* %tmp154, %struct.shape_edge_t** null
+	%tmp16254.0.rec = add i32 %edge.230.0.rec, 2		; <i32> [#uses=1]
+	%xp.350.sum = add i32 0, %tmp16254.0.rec		; <i32> [#uses=1]
+	%tmp168 = icmp slt i32 %xp.350.sum, 0		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp168, label %bb140, label %bb174
+
+bb174:		; preds = %bb140, %cond_false
+	ret %struct.shape_path_t* null
+}
diff --git a/final/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll b/final/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
new file mode 100644
index 00000000000..7ba2a190be7
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
@@ -0,0 +1,237 @@
+; RUN: llc < %s 
+; PR1424
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "arm-unknown-linux-gnueabi"
+	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
+	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
+	%struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64 }
+	%struct.AVEvalExpr = type opaque
+	%struct.AVFrame = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*] }
+	%struct.AVOption = type opaque
+	%struct.AVPaletteControl = type { i32, [256 x i32] }
+	%struct.AVPanScan = type { i32, i32, i32, [3 x [2 x i16]] }
+	%struct.AVRational = type { i32, i32 }
+	%struct.BlockNode = type { i16, i16, i8, [3 x i8], i8, i8 }
+	%struct.DSPContext = type { void (i16*, i8*, i32)*, void (i16*, i8*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, i32 (i16*)*, void (i8*, i8*, i32, i32, i32, i32, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, void (i16*)*, i32 (i8*, i32)*, i32 (i8*, i32)*, [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], i32 (i8*, i16*, i32)*, [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [2 x void (i8*, i8*, i8*, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [8 x void (i8*, i8*, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [10 x void (i8*, i32, i32, i32, i32)*], [10 x void (i8*, i8*, i32, i32, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i16*, i32)*, [2 x [4 x i32 (i8*, i8*, i8*, i32, i32)*]], void (i8*, i8*, i32)*, void (i8*, i8*, i8*, i32)*, void (i8*, i8*, i8*, i32, i32*, i32*)*, void (i32*, i32*, i32)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32)*, void (i8*, i32, i32, i32)*, void ([4 x [4 x i16]]*, i8*, [40 x i8]*, [40 x [2 x i16]]*, i32, i32, i32, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32)*, void (float*, float*, i32)*, void (float*, float*, i32)*, void (float*, float*, float*, i32)*, void (float*, float*, float*, float*, i32, i32, i32)*, void (i16*, float*, i32)*, void (i16*)*, void (i16*)*, void (i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, [64 x i8], i32, i32 (i16*, i16*, i16*, i32)*, void (i16*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void ([4 x i16]*)*, void (i32*, i32*, i32*, i32*, i32*, i32*, i32)*, void (i32*, i32)*, void (i8*, i32, i8**, i32, i32, i32, i32, i32, %struct.slice_buffer*, i32, i8*)*, void (i8*, i32, i32)*, [4 x void (i8*, i32, i8*, i32, i32, i32)*], void (i16*)*, void (i16*, i32)*, void (i16*, i32)*, void (i16*, i32)*, void (i8*, i32)*, void (i8*, i32)*, [16 x void (i8*, i8*, i32, i32)*] }
+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct.GetBitContext = type { i8*, i8*, i32*, i32, i32, i32, i32 }
+	%struct.MJpegContext = type opaque
+	%struct.MotionEstContext = type { %struct.AVCodecContext*, i32, [4 x [2 x i32]], [4 x [2 x i32]], i8*, i8*, [2 x i8*], i8*, i32, i32*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [4 x [4 x i8*]], [4 x [4 x i8*]], i32, i32, i32, i32, i32, [4 x void (i8*, i8*, i32, i32)*]*, [4 x void (i8*, i8*, i32, i32)*]*, [16 x void (i8*, i8*, i32)*]*, [16 x void (i8*, i8*, i32)*]*, [4097 x i8]*, i8*, i32 (%struct.MpegEncContext*, i32*, i32*, i32, i32, i32, i32, i32)* }
+	%struct.MpegEncContext = type { %struct.AVCodecContext*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.PutBitContext, i32, i32, i32, i32, i32, i32, i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Picture*, %struct.Picture**, %struct.Picture**, i32, i32, [8 x %struct.MpegEncContext*], %struct.Picture, %struct.Picture, %struct.Picture, %struct.Picture, %struct.Picture*, %struct.Picture*, %struct.Picture*, [3 x i8*], [3 x i32], i16*, [3 x i16*], [20 x i16], i32, i32, i8*, i8*, i8*, i8*, i8*, [16 x i16]*, [3 x [16 x i16]*], i32, i8*, i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, [5 x i32], i32, i32, i32, i32, %struct.DSPContext, i32, i32, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x [2 x [2 x i16]*]], [2 x [2 x [2 x [2 x i16]*]]], [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x [2 x [2 x i16]*]], [2 x [2 x [2 x [2 x i16]*]]], [2 x i8*], [2 x [2 x i8*]], i32, i32, i32, [2 x [4 x [2 x i32]]], [2 x [2 x i32]], [2 x [2 x [2 x i32]]], i8*, [2 x [64 x i16]], %struct.MotionEstContext, i32, i32, i32, i32, i32, i32, i16*, [6 x i32], [6 x i32], [3 x i8*], i32*, [64 x i16], [64 x i16], [64 x i16], [64 x i16], i32, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i8*, [8 x i32], [64 x i32]*, [64 x i32]*, [2 x [64 x i16]]*, [2 x [64 x i16]]*, [12 x i32], %struct.ScanTable, %struct.ScanTable, %struct.ScanTable, %struct.ScanTable, [64 x i32]*, [2 x i32], [64 x i16]*, i8*, i64, i64, i32, i32, %struct.RateControlContext, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i32, i32, %struct.GetBitContext, i32, i32, i32, %struct.ParseContext, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i16, i16, i16, i16, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x [2 x i32]], [2 x [2 x i32]], [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.PutBitContext, %struct.PutBitContext, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, [3 x i32], %struct.MJpegContext*, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x [65 x [65 x [2 x i32]]]]*, i32, i32, %struct.GetBitContext, i32, i32, i32, i8*, i32, [2 x [2 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i32], i32, i32, i32, i32, i8*, i32, [12 x i16*], [64 x i16]*, [8 x [64 x i16]]*, i32 (%struct.MpegEncContext*, [64 x i16]*)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, i32 (%struct.MpegEncContext*, i16*, i32, i32, i32*)*, i32 (%struct.MpegEncContext*, i16*, i32, i32, i32*)*, void (%struct.MpegEncContext*, i16*)* }
+	%struct.ParseContext = type { i8*, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.Picture = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*], [3 x i8*], [2 x [2 x i16]*], i32*, [2 x i32], i32, i32, i32, i32, [2 x [16 x i32]], [2 x i32], i32, i32, i16*, i16*, i8*, i32*, i32 }
+	%struct.Plane = type { i32, i32, [8 x [4 x %struct.SubBand]] }
+	%struct.Predictor = type { double, double, double }
+	%struct.PutBitContext = type { i32, i32, i8*, i8*, i8* }
+	%struct.RangeCoder = type { i32, i32, i32, i32, [256 x i8], [256 x i8], i8*, i8*, i8* }
+	%struct.RateControlContext = type { %struct.FILE*, i32, %struct.RateControlEntry*, double, [5 x %struct.Predictor], double, double, double, double, double, [5 x double], i32, i32, [5 x i64], [5 x i64], [5 x i64], [5 x i64], [5 x i32], i32, i8*, float, i32, %struct.AVEvalExpr* }
+	%struct.RateControlEntry = type { i32, float, i32, i32, i32, i32, i32, i64, i32, float, i32, i32, i32, i32, i32, i32 }
+	%struct.RcOverride = type { i32, i32, i32, float }
+	%struct.ScanTable = type { i8*, [64 x i8], [64 x i8] }
+	%struct.SnowContext = type { %struct.AVCodecContext*, %struct.RangeCoder, %struct.DSPContext, %struct.AVFrame, %struct.AVFrame, %struct.AVFrame, [8 x %struct.AVFrame], %struct.AVFrame, [32 x i8], [4224 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [8 x [2 x i16]*], [8 x i32*], i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [4 x %struct.Plane], %struct.BlockNode*, [1024 x i32], i32, %struct.slice_buffer, %struct.MpegEncContext }
+	%struct.SubBand = type { i32, i32, i32, i32, i32, i32*, i32, i32, i32, %struct.x_and_coeff*, %struct.SubBand*, [519 x [32 x i8]] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+	%struct.slice_buffer = type { i32**, i32**, i32, i32, i32, i32, i32* }
+	%struct.x_and_coeff = type { i16, i16 }
+
+define fastcc void @iterative_me(%struct.SnowContext* %s) {
+entry:
+	%state = alloca [4224 x i8], align 8		; <[4224 x i8]*> [#uses=0]
+	%best_rd4233 = alloca i32, align 4		; <i32*> [#uses=0]
+	%tmp21 = getelementptr %struct.SnowContext* %s, i32 0, i32 36		; <i32*> [#uses=2]
+	br label %bb4198
+
+bb79:		; preds = %bb4189.preheader
+	br i1 false, label %cond_next239, label %cond_true
+
+cond_true:		; preds = %bb79
+	ret void
+
+cond_next239:		; preds = %bb79
+	%tmp286 = alloca i8, i32 0		; <i8*> [#uses=0]
+	ret void
+
+bb4198:		; preds = %bb4189.preheader, %entry
+	br i1 false, label %bb4189.preheader, label %bb4204
+
+bb4189.preheader:		; preds = %bb4198
+	br i1 false, label %bb79, label %bb4198
+
+bb4204:		; preds = %bb4198
+	br i1 false, label %bb4221, label %cond_next4213
+
+cond_next4213:		; preds = %bb4204
+	ret void
+
+bb4221:		; preds = %bb4204
+	br i1 false, label %bb5242.preheader, label %UnifiedReturnBlock
+
+bb5242.preheader:		; preds = %bb4221
+	br label %bb5242
+
+bb4231:		; preds = %bb5233
+	%tmp4254.sum = add i32 0, 1		; <i32> [#uses=2]
+	br i1 false, label %bb4559, label %cond_next4622
+
+bb4559:		; preds = %bb4231
+	ret void
+
+cond_next4622:		; preds = %bb4231
+	%tmp4637 = load i16* null		; <i16> [#uses=1]
+	%tmp46374638 = sext i16 %tmp4637 to i32		; <i32> [#uses=1]
+	%tmp4642 = load i16* null		; <i16> [#uses=1]
+	%tmp46424643 = sext i16 %tmp4642 to i32		; <i32> [#uses=1]
+	%tmp4648 = load i16* null		; <i16> [#uses=1]
+	%tmp46484649 = sext i16 %tmp4648 to i32		; <i32> [#uses=1]
+	%tmp4653 = getelementptr %struct.BlockNode* null, i32 %tmp4254.sum, i32 0		; <i16*> [#uses=1]
+	%tmp4654 = load i16* %tmp4653		; <i16> [#uses=1]
+	%tmp46544655 = sext i16 %tmp4654 to i32		; <i32> [#uses=1]
+	%tmp4644 = add i32 %tmp46374638, 2		; <i32> [#uses=1]
+	%tmp4650 = add i32 %tmp4644, %tmp46424643		; <i32> [#uses=1]
+	%tmp4656 = add i32 %tmp4650, %tmp46484649		; <i32> [#uses=1]
+	%tmp4657 = add i32 %tmp4656, %tmp46544655		; <i32> [#uses=2]
+	%tmp4658 = ashr i32 %tmp4657, 2		; <i32> [#uses=1]
+	%tmp4662 = load i16* null		; <i16> [#uses=1]
+	%tmp46624663 = sext i16 %tmp4662 to i32		; <i32> [#uses=1]
+	%tmp4672 = getelementptr %struct.BlockNode* null, i32 0, i32 1		; <i16*> [#uses=1]
+	%tmp4673 = load i16* %tmp4672		; <i16> [#uses=1]
+	%tmp46734674 = sext i16 %tmp4673 to i32		; <i32> [#uses=1]
+	%tmp4678 = getelementptr %struct.BlockNode* null, i32 %tmp4254.sum, i32 1		; <i16*> [#uses=1]
+	%tmp4679 = load i16* %tmp4678		; <i16> [#uses=1]
+	%tmp46794680 = sext i16 %tmp4679 to i32		; <i32> [#uses=1]
+	%tmp4669 = add i32 %tmp46624663, 2		; <i32> [#uses=1]
+	%tmp4675 = add i32 %tmp4669, 0		; <i32> [#uses=1]
+	%tmp4681 = add i32 %tmp4675, %tmp46734674		; <i32> [#uses=1]
+	%tmp4682 = add i32 %tmp4681, %tmp46794680		; <i32> [#uses=2]
+	%tmp4683 = ashr i32 %tmp4682, 2		; <i32> [#uses=1]
+	%tmp4703 = load i32* %tmp21		; <i32> [#uses=1]
+	%tmp4707 = shl i32 %tmp4703, 0		; <i32> [#uses=4]
+	%tmp4710 = load %struct.BlockNode** null		; <%struct.BlockNode*> [#uses=6]
+	%tmp4713 = mul i32 %tmp4707, %mb_y.4		; <i32> [#uses=1]
+	%tmp4715 = add i32 %tmp4713, %mb_x.7		; <i32> [#uses=7]
+	store i8 0, i8* null
+	store i8 0, i8* null
+	%tmp47594761 = bitcast %struct.BlockNode* null to i8*		; <i8*> [#uses=2]
+	call void @llvm.memcpy.i32( i8* null, i8* %tmp47594761, i32 10, i32 0 )
+	%tmp4716.sum5775 = add i32 %tmp4715, 1		; <i32> [#uses=1]
+	%tmp4764 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4716.sum5775		; <%struct.BlockNode*> [#uses=1]
+	%tmp47644766 = bitcast %struct.BlockNode* %tmp4764 to i8*		; <i8*> [#uses=1]
+	%tmp4716.sum5774 = add i32 %tmp4715, %tmp4707		; <i32> [#uses=0]
+	%tmp47704772 = bitcast %struct.BlockNode* null to i8*		; <i8*> [#uses=1]
+	%tmp4774 = add i32 %tmp4707, 1		; <i32> [#uses=1]
+	%tmp4716.sum5773 = add i32 %tmp4774, %tmp4715		; <i32> [#uses=1]
+	%tmp4777 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4716.sum5773		; <%struct.BlockNode*> [#uses=1]
+	%tmp47774779 = bitcast %struct.BlockNode* %tmp4777 to i8*		; <i8*> [#uses=1]
+	%tmp4781 = icmp slt i32 %mb_x.7, 0		; <i1> [#uses=1]
+	%tmp4788 = or i1 %tmp4781, %tmp4784		; <i1> [#uses=2]
+	br i1 %tmp4788, label %cond_true4791, label %cond_next4794
+
+cond_true4791:		; preds = %cond_next4622
+	unreachable
+
+cond_next4794:		; preds = %cond_next4622
+	%tmp4797 = icmp slt i32 %mb_x.7, %tmp4707		; <i1> [#uses=1]
+	br i1 %tmp4797, label %cond_next4803, label %cond_true4800
+
+cond_true4800:		; preds = %cond_next4794
+	unreachable
+
+cond_next4803:		; preds = %cond_next4794
+	%tmp4825 = ashr i32 %tmp4657, 12		; <i32> [#uses=1]
+	shl i32 %tmp4682, 4		; <i32>:0 [#uses=1]
+	%tmp4828 = and i32 %0, -64		; <i32> [#uses=1]
+	%tmp4831 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 2		; <i8*> [#uses=0]
+	%tmp4826 = add i32 %tmp4828, %tmp4825		; <i32> [#uses=1]
+	%tmp4829 = add i32 %tmp4826, 0		; <i32> [#uses=1]
+	%tmp4835 = add i32 %tmp4829, 0		; <i32> [#uses=1]
+	store i32 %tmp4835, i32* null
+	%tmp48534854 = trunc i32 %tmp4658 to i16		; <i16> [#uses=1]
+	%tmp4856 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 0		; <i16*> [#uses=1]
+	store i16 %tmp48534854, i16* %tmp4856
+	%tmp48574858 = trunc i32 %tmp4683 to i16		; <i16> [#uses=1]
+	%tmp4860 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 1		; <i16*> [#uses=1]
+	store i16 %tmp48574858, i16* %tmp4860
+	%tmp4866 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 4		; <i8*> [#uses=0]
+	br i1 false, label %bb4933, label %cond_false4906
+
+cond_false4906:		; preds = %cond_next4803
+	call void @llvm.memcpy.i32( i8* %tmp47594761, i8* null, i32 10, i32 0 )
+	call void @llvm.memcpy.i32( i8* %tmp47644766, i8* null, i32 10, i32 0 )
+	call void @llvm.memcpy.i32( i8* %tmp47704772, i8* null, i32 10, i32 0 )
+	call void @llvm.memcpy.i32( i8* %tmp47774779, i8* null, i32 10, i32 0 )
+	br label %bb5215
+
+bb4933:		; preds = %bb5215, %cond_next4803
+	br i1 false, label %cond_true4944, label %bb5215
+
+cond_true4944:		; preds = %bb4933
+	%tmp4982 = load i32* %tmp21		; <i32> [#uses=1]
+	%tmp4986 = shl i32 %tmp4982, 0		; <i32> [#uses=2]
+	%tmp4992 = mul i32 %tmp4986, %mb_y.4		; <i32> [#uses=1]
+	%tmp4994 = add i32 %tmp4992, %mb_x.7		; <i32> [#uses=5]
+	%tmp4995.sum5765 = add i32 %tmp4994, 1		; <i32> [#uses=1]
+	%tmp5043 = getelementptr %struct.BlockNode* null, i32 %tmp4995.sum5765		; <%struct.BlockNode*> [#uses=1]
+	%tmp50435045 = bitcast %struct.BlockNode* %tmp5043 to i8*		; <i8*> [#uses=2]
+	call void @llvm.memcpy.i32( i8* null, i8* %tmp50435045, i32 10, i32 0 )
+	%tmp4995.sum5764 = add i32 %tmp4994, %tmp4986		; <i32> [#uses=1]
+	%tmp5049 = getelementptr %struct.BlockNode* null, i32 %tmp4995.sum5764		; <%struct.BlockNode*> [#uses=1]
+	%tmp50495051 = bitcast %struct.BlockNode* %tmp5049 to i8*		; <i8*> [#uses=2]
+	call void @llvm.memcpy.i32( i8* null, i8* %tmp50495051, i32 10, i32 0 )
+	%tmp4995.sum5763 = add i32 0, %tmp4994		; <i32> [#uses=1]
+	%tmp5056 = getelementptr %struct.BlockNode* null, i32 %tmp4995.sum5763		; <%struct.BlockNode*> [#uses=1]
+	%tmp50565058 = bitcast %struct.BlockNode* %tmp5056 to i8*		; <i8*> [#uses=1]
+	br i1 %tmp4788, label %cond_true5070, label %cond_next5073
+
+cond_true5070:		; preds = %cond_true4944
+	unreachable
+
+cond_next5073:		; preds = %cond_true4944
+	%tmp5139 = getelementptr %struct.BlockNode* null, i32 %tmp4994, i32 1		; <i16*> [#uses=0]
+	%tmp5145 = getelementptr %struct.BlockNode* null, i32 %tmp4994, i32 4		; <i8*> [#uses=0]
+	call void @llvm.memcpy.i32( i8* %tmp50435045, i8* null, i32 10, i32 0 )
+	call void @llvm.memcpy.i32( i8* %tmp50495051, i8* null, i32 10, i32 0 )
+	call void @llvm.memcpy.i32( i8* %tmp50565058, i8* null, i32 10, i32 0 )
+	br label %bb5215
+
+bb5215:		; preds = %cond_next5073, %bb4933, %cond_false4906
+	%i4232.3 = phi i32 [ 0, %cond_false4906 ], [ 0, %cond_next5073 ], [ 0, %bb4933 ]		; <i32> [#uses=1]
+	%tmp5217 = icmp slt i32 %i4232.3, 4		; <i1> [#uses=1]
+	br i1 %tmp5217, label %bb4933, label %bb5220
+
+bb5220:		; preds = %bb5215
+	br i1 false, label %bb5230, label %cond_true5226
+
+cond_true5226:		; preds = %bb5220
+	ret void
+
+bb5230:		; preds = %bb5220
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br label %bb5233
+
+bb5233:		; preds = %bb5233.preheader, %bb5230
+	%indvar = phi i32 [ 0, %bb5233.preheader ], [ %indvar.next, %bb5230 ]		; <i32> [#uses=2]
+	%mb_x.7 = shl i32 %indvar, 1		; <i32> [#uses=4]
+	br i1 false, label %bb4231, label %bb5239
+
+bb5239:		; preds = %bb5233
+	%indvar.next37882 = add i32 %indvar37881, 1		; <i32> [#uses=1]
+	br label %bb5242
+
+bb5242:		; preds = %bb5239, %bb5242.preheader
+	%indvar37881 = phi i32 [ 0, %bb5242.preheader ], [ %indvar.next37882, %bb5239 ]		; <i32> [#uses=2]
+	%mb_y.4 = shl i32 %indvar37881, 1		; <i32> [#uses=3]
+	br i1 false, label %bb5233.preheader, label %bb5248
+
+bb5233.preheader:		; preds = %bb5242
+	%tmp4784 = icmp slt i32 %mb_y.4, 0		; <i1> [#uses=1]
+	br label %bb5233
+
+bb5248:		; preds = %bb5242
+	ret void
+
+UnifiedReturnBlock:		; preds = %bb4221
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/final/test/CodeGen/ARM/2007-08-15-ReuseBug.ll b/final/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
new file mode 100644
index 00000000000..30b72e09a11
--- /dev/null
+++ b/final/test/CodeGen/ARM/2007-08-15-ReuseBug.ll
@@ -0,0 +1,106 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6
+; PR1609
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+@_C_nextcmd = external global i32		; <i32*> [#uses=2]
+@_C_cmds = external global [100 x i8*]		; <[100 x i8*]*> [#uses=2]
+@.str44 = external constant [2 x i8]		; <[2 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	br label %cond_next212.i
+
+bb21.i:		; preds = %cond_next212.i
+	br label %cond_next212.i
+
+bb24.i:		; preds = %cond_next212.i
+	ret i32 0
+
+bb27.i:		; preds = %cond_next212.i
+	ret i32 0
+
+bb30.i:		; preds = %cond_next212.i
+	%tmp205399.i = add i32 %argc_addr.2358.0.i, -1		; <i32> [#uses=1]
+	br label %cond_next212.i
+
+bb33.i:		; preds = %cond_next212.i
+	ret i32 0
+
+cond_next73.i:		; preds = %cond_next212.i
+	ret i32 0
+
+bb75.i:		; preds = %cond_next212.i
+	ret i32 0
+
+bb77.i:		; preds = %cond_next212.i
+	ret i32 0
+
+bb79.i:		; preds = %cond_next212.i
+	ret i32 0
+
+bb102.i:		; preds = %cond_next212.i
+	br i1 false, label %cond_true110.i, label %cond_next123.i
+
+cond_true110.i:		; preds = %bb102.i
+	%tmp116.i = getelementptr i8** %argv_addr.2321.0.i, i32 2		; <i8**> [#uses=1]
+	%tmp117.i = load i8** %tmp116.i		; <i8*> [#uses=1]
+	%tmp126425.i = call %struct.FILE* @fopen( i8* %tmp117.i, i8* getelementptr ([2 x i8]* @.str44, i32 0, i32 0) )		; <%struct.FILE*> [#uses=0]
+	ret i32 0
+
+cond_next123.i:		; preds = %bb102.i
+	%tmp122.i = getelementptr i8* %tmp215.i, i32 2		; <i8*> [#uses=0]
+	ret i32 0
+
+bb162.i:		; preds = %cond_next212.i
+	ret i32 0
+
+C_addcmd.exit120.i:		; preds = %cond_next212.i
+	%tmp3.i.i.i.i105.i = call i8* @calloc( i32 15, i32 1 )		; <i8*> [#uses=1]
+	%tmp1.i108.i = getelementptr [100 x i8*]* @_C_cmds, i32 0, i32 0		; <i8**> [#uses=1]
+	store i8* %tmp3.i.i.i.i105.i, i8** %tmp1.i108.i, align 4
+	%tmp.i91.i = load i32* @_C_nextcmd, align 4		; <i32> [#uses=1]
+	store i32 0, i32* @_C_nextcmd, align 4
+	%tmp3.i.i.i.i95.i = call i8* @calloc( i32 15, i32 1 )		; <i8*> [#uses=1]
+	%tmp1.i98.i = getelementptr [100 x i8*]* @_C_cmds, i32 0, i32 %tmp.i91.i		; <i8**> [#uses=1]
+	store i8* %tmp3.i.i.i.i95.i, i8** %tmp1.i98.i, align 4
+	br label %cond_next212.i
+
+bb174.i:		; preds = %cond_next212.i
+	ret i32 0
+
+bb192.i:		; preds = %cond_next212.i
+	br label %cond_next212.i
+
+cond_next212.i:		; preds = %cond_next212.i, %cond_next212.i, %cond_next212.i, %cond_next212.i, %bb192.i, %C_addcmd.exit120.i, %bb30.i, %bb21.i, %entry
+	%max_d.3 = phi i32 [ -1, %entry ], [ %max_d.3, %bb30.i ], [ %max_d.3, %bb21.i ], [ %max_d.3, %C_addcmd.exit120.i ], [ 0, %bb192.i ], [ %max_d.3, %cond_next212.i ], [ %max_d.3, %cond_next212.i ], [ %max_d.3, %cond_next212.i ], [ %max_d.3, %cond_next212.i ]		; <i32> [#uses=7]
+	%argv_addr.2321.0.i = phi i8** [ %argv, %entry ], [ %tmp214.i, %bb192.i ], [ %tmp214.i, %C_addcmd.exit120.i ], [ %tmp214.i, %bb30.i ], [ %tmp214.i, %bb21.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ], [ %tmp214.i, %cond_next212.i ]		; <i8**> [#uses=2]
+	%argc_addr.2358.0.i = phi i32 [ %argc, %entry ], [ %tmp205399.i, %bb30.i ], [ 0, %bb21.i ], [ 0, %C_addcmd.exit120.i ], [ 0, %bb192.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ], [ 0, %cond_next212.i ]		; <i32> [#uses=1]
+	%tmp214.i = getelementptr i8** %argv_addr.2321.0.i, i32 1		; <i8**> [#uses=9]
+	%tmp215.i = load i8** %tmp214.i		; <i8*> [#uses=1]
+	%tmp1314.i = sext i8 0 to i32		; <i32> [#uses=1]
+	switch i32 %tmp1314.i, label %bb192.i [
+		 i32 76, label %C_addcmd.exit120.i
+		 i32 77, label %bb174.i
+		 i32 83, label %bb162.i
+		 i32 97, label %bb33.i
+		 i32 98, label %bb21.i
+		 i32 99, label %bb24.i
+		 i32 100, label %bb27.i
+		 i32 101, label %cond_next212.i
+		 i32 102, label %bb102.i
+		 i32 105, label %bb75.i
+		 i32 109, label %bb30.i
+		 i32 113, label %cond_next212.i
+		 i32 114, label %cond_next73.i
+		 i32 115, label %bb79.i
+		 i32 116, label %cond_next212.i
+		 i32 118, label %bb77.i
+		 i32 119, label %cond_next212.i
+	]
+}
+
+declare %struct.FILE* @fopen(i8*, i8*)
+
+declare i8* @calloc(i32, i32)
diff --git a/final/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll b/final/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
new file mode 100644
index 00000000000..fd2f4620bce
--- /dev/null
+++ b/final/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=fast
+; PR1925
+
+	%struct.encode_aux_nearestmatch = type { i32*, i32*, i32*, i32*, i32, i32 }
+	%struct.encode_aux_pigeonhole = type { float, float, i32, i32, i32*, i32, i32*, i32*, i32* }
+	%struct.encode_aux_threshmatch = type { float*, i32*, i32, i32 }
+	%struct.oggpack_buffer = type { i32, i32, i8*, i8*, i32 }
+	%struct.static_codebook = type { i32, i32, i32*, i32, i32, i32, i32, i32, i32*, %struct.encode_aux_nearestmatch*, %struct.encode_aux_threshmatch*, %struct.encode_aux_pigeonhole*, i32 }
+
+define i32 @vorbis_staticbook_pack(%struct.static_codebook* %c, %struct.oggpack_buffer* %opb) {
+entry:
+	%opb_addr = alloca %struct.oggpack_buffer*		; <%struct.oggpack_buffer**> [#uses=1]
+	%tmp1 = load %struct.oggpack_buffer** %opb_addr, align 4		; <%struct.oggpack_buffer*> [#uses=1]
+	call void @oggpack_write( %struct.oggpack_buffer* %tmp1, i32 5653314, i32 24 ) nounwind 
+	call void @oggpack_write( %struct.oggpack_buffer* null, i32 0, i32 24 ) nounwind 
+	unreachable
+}
+
+declare void @oggpack_write(%struct.oggpack_buffer*, i32, i32)
diff --git a/final/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll b/final/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
new file mode 100644
index 00000000000..44da8e7905f
--- /dev/null
+++ b/final/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=fast
+; PR1925
+
+	%"struct.kc::impl_Ccode_option" = type { %"struct.kc::impl_abstract_phylum" }
+	%"struct.kc::impl_ID" = type { %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_Ccode_option"*, %"struct.kc::impl_casestring__Str"*, i32, %"struct.kc::impl_casestring__Str"* }
+	%"struct.kc::impl_abstract_phylum" = type { i32 (...)** }
+	%"struct.kc::impl_casestring__Str" = type { %"struct.kc::impl_abstract_phylum", i8* }
+
+define %"struct.kc::impl_ID"* @_ZN2kc18f_typeofunpsubtermEPNS_15impl_unpsubtermEPNS_7impl_IDE(%"struct.kc::impl_Ccode_option"* %a_unpsubterm, %"struct.kc::impl_ID"* %a_operator) {
+entry:
+	%tmp8 = getelementptr %"struct.kc::impl_Ccode_option"* %a_unpsubterm, i32 0, i32 0, i32 0		; <i32 (...)***> [#uses=0]
+	br i1 false, label %bb41, label %bb55
+
+bb41:		; preds = %entry
+	ret %"struct.kc::impl_ID"* null
+
+bb55:		; preds = %entry
+	%tmp67 = tail call i32 null( %"struct.kc::impl_abstract_phylum"* null )		; <i32> [#uses=0]
+	%tmp97 = tail call i32 null( %"struct.kc::impl_abstract_phylum"* null )		; <i32> [#uses=0]
+	ret %"struct.kc::impl_ID"* null
+}
diff --git a/final/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll b/final/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
new file mode 100644
index 00000000000..a604c5cd574
--- /dev/null
+++ b/final/test/CodeGen/ARM/2008-03-05-SxtInRegBug.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm -mattr=+v6 | not grep 255
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	br label %bb1
+bb1:		; preds = %entry
+	%tmp3.i.i = load i8* null, align 1		; <i8> [#uses=1]
+	%tmp4.i.i = icmp slt i8 %tmp3.i.i, 0		; <i1> [#uses=1]
+	br i1 %tmp4.i.i, label %bb2, label %bb3
+bb2:		; preds = %bb1
+	ret i32 1
+bb3:		; preds = %bb1
+	ret i32 0
+}
diff --git a/final/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll b/final/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
new file mode 100644
index 00000000000..78c62223756
--- /dev/null
+++ b/final/test/CodeGen/ARM/2008-03-07-RegScavengerAssert.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2
+
+@accum = external global { double, double }		; <{ double, double }*> [#uses=1]
+@.str = external constant [4 x i8]		; <[4 x i8]*> [#uses=1]
+
+define i32 @main() {
+entry:
+	br label %bb74.i
+bb74.i:		; preds = %bb88.i, %bb74.i, %entry
+	br i1 false, label %bb88.i, label %bb74.i
+bb88.i:		; preds = %bb74.i
+	br i1 false, label %mandel.exit, label %bb74.i
+mandel.exit:		; preds = %bb88.i
+	%tmp2 = volatile load double* getelementptr ({ double, double }* @accum, i32 0, i32 0), align 8		; <double> [#uses=1]
+	%tmp23 = fptosi double %tmp2 to i32		; <i32> [#uses=1]
+	%tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %tmp23 )		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/final/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll b/final/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
new file mode 100644
index 00000000000..234c7b69e3e
--- /dev/null
+++ b/final/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+@numBinsY = external global i32		; <i32*> [#uses=1]
+
+declare double @pow(double, double)
+
+define void @main(i32 %argc, i8** %argv) noreturn nounwind {
+entry:
+	br i1 false, label %bb34.outer.i.i.i, label %cond_false674
+bb34.outer.i.i.i:		; preds = %entry
+	br i1 false, label %bb2.i.i.i, label %bb47.i.i.i
+bb2.i.i.i:		; preds = %bb34.outer.i.i.i
+	%tmp24.i.i.i = call double @pow( double 0.000000e+00, double 2.000000e+00 )		; <double> [#uses=0]
+	ret void
+bb47.i.i.i:		; preds = %bb34.outer.i.i.i
+	br i1 false, label %bb220.i.i.i, label %bb62.preheader.i.i.i
+bb62.preheader.i.i.i:		; preds = %bb47.i.i.i
+	ret void
+bb220.i.i.i:		; preds = %bb47.i.i.i
+	br i1 false, label %bb248.i.i.i, label %cond_next232.i.i.i
+cond_next232.i.i.i:		; preds = %bb220.i.i.i
+	ret void
+bb248.i.i.i:		; preds = %bb220.i.i.i
+	br i1 false, label %bb300.i.i.i, label %cond_false256.i.i.i
+cond_false256.i.i.i:		; preds = %bb248.i.i.i
+	ret void
+bb300.i.i.i:		; preds = %bb248.i.i.i
+	store i32 undef, i32* @numBinsY, align 4
+	ret void
+cond_false674:		; preds = %entry
+	ret void
+}
+
+	%struct.anon = type { %struct.rnode*, %struct.rnode* }
+	%struct.ch_set = type { { i8, i8 }*, %struct.ch_set* }
+	%struct.pat_list = type { i32, %struct.pat_list* }
+	%struct.rnode = type { i16, { %struct.anon }, i16, %struct.pat_list*, %struct.pat_list* }
+
+define fastcc { i16, %struct.rnode* }* @get_token(i8** %s) nounwind  {
+entry:
+	br i1 false, label %bb42, label %bb78
+bb42:		; preds = %entry
+	br label %cond_next119.i
+bb17.i:		; preds = %cond_next119.i
+	br i1 false, label %cond_true53.i, label %cond_false99.i
+cond_true53.i:		; preds = %bb17.i
+	ret { i16, %struct.rnode* }* null
+cond_false99.i:		; preds = %bb17.i
+	%tmp106.i = malloc %struct.ch_set		; <%struct.ch_set*> [#uses=1]
+	br i1 false, label %bb126.i, label %cond_next119.i
+cond_next119.i:		; preds = %cond_false99.i, %bb42
+	%curr_ptr.0.reg2mem.0.i = phi %struct.ch_set* [ %tmp106.i, %cond_false99.i ], [ null, %bb42 ]		; <%struct.ch_set*> [#uses=2]
+	%prev_ptr.0.reg2mem.0.i = phi %struct.ch_set* [ %curr_ptr.0.reg2mem.0.i, %cond_false99.i ], [ undef, %bb42 ]		; <%struct.ch_set*> [#uses=1]
+	br i1 false, label %bb126.i, label %bb17.i
+bb126.i:		; preds = %cond_next119.i, %cond_false99.i
+	%prev_ptr.0.reg2mem.1.i = phi %struct.ch_set* [ %prev_ptr.0.reg2mem.0.i, %cond_next119.i ], [ %curr_ptr.0.reg2mem.0.i, %cond_false99.i ]		; <%struct.ch_set*> [#uses=0]
+	ret { i16, %struct.rnode* }* null
+bb78:		; preds = %entry
+	ret { i16, %struct.rnode* }* null
+}
diff --git a/final/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll b/final/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
new file mode 100644
index 00000000000..77418be3808
--- /dev/null
+++ b/final/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
@@ -0,0 +1,258 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+	%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.LOCBOX = type { i32, i32, i32, i32 }
+	%struct.SIDEBOX = type { i32, i32 }
+	%struct.UNCOMBOX = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.cellbox = type { i8*, i32, i32, i32, [9 x i32], i32, i32, i32, i32, i32, i32, i32, double, double, double, double, double, i32, i32, %struct.CONTENTBOX*, %struct.UNCOMBOX*, [8 x %struct.tilebox*], %struct.SIDEBOX* }
+	%struct.termbox = type { %struct.termbox*, i32, i32, i32, i32, i32 }
+	%struct.tilebox = type { %struct.tilebox*, double, double, double, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.termbox*, %struct.LOCBOX* }
+@.str127 = external constant [2 x i8]		; <[2 x i8]*> [#uses=1]
+@.str584 = external constant [5 x i8]		; <[5 x i8]*> [#uses=1]
+@.str8115 = external constant [9 x i8]		; <[9 x i8]*> [#uses=1]
+
+declare %struct.FILE* @fopen(i8*, i8*)
+
+declare i32 @strcmp(i8*, i8*)
+
+declare i32 @fscanf(%struct.FILE*, i8*, ...)
+
+define void @main(i32 %argc, i8** %argv) noreturn  {
+entry:
+	br i1 false, label %cond_next48, label %cond_false674
+cond_next48:		; preds = %entry
+	%tmp61 = call %struct.FILE* @fopen( i8* null, i8* getelementptr ([2 x i8]* @.str127, i32 0, i32 0) )		; <%struct.FILE*> [#uses=2]
+	br i1 false, label %bb220.i.i.i, label %bb62.preheader.i.i.i
+bb62.preheader.i.i.i:		; preds = %cond_next48
+	ret void
+bb220.i.i.i:		; preds = %cond_next48
+	br i1 false, label %bb248.i.i.i, label %cond_next232.i.i.i
+cond_next232.i.i.i:		; preds = %bb220.i.i.i
+	ret void
+bb248.i.i.i:		; preds = %bb220.i.i.i
+	br i1 false, label %bb300.i.i.i, label %cond_false256.i.i.i
+cond_false256.i.i.i:		; preds = %bb248.i.i.i
+	ret void
+bb300.i.i.i:		; preds = %bb248.i.i.i
+	br label %bb.i.i347.i
+bb.i.i347.i:		; preds = %bb.i.i347.i, %bb300.i.i.i
+	br i1 false, label %bb894.loopexit.i.i, label %bb.i.i347.i
+bb.i350.i:		; preds = %bb894.i.i
+	br i1 false, label %bb24.i.i, label %cond_false373.i.i
+bb24.i.i:		; preds = %bb24.i.i, %bb.i350.i
+	br i1 false, label %bb40.i.i, label %bb24.i.i
+bb40.i.i:		; preds = %bb24.i.i
+	br i1 false, label %bb177.i393.i, label %bb82.i.i
+bb82.i.i:		; preds = %bb40.i.i
+	ret void
+bb177.i393.i:		; preds = %bb40.i.i
+	br i1 false, label %bb894.i.i, label %bb192.i.i
+bb192.i.i:		; preds = %bb177.i393.i
+	ret void
+cond_false373.i.i:		; preds = %bb.i350.i
+	%tmp376.i.i = call i32 @strcmp( i8* null, i8* getelementptr ([9 x i8]* @.str8115, i32 0, i32 0) )		; <i32> [#uses=0]
+	br i1 false, label %cond_true380.i.i, label %cond_next602.i.i
+cond_true380.i.i:		; preds = %cond_false373.i.i
+	%tmp394.i418.i = add i32 %cell.0.i.i, 1		; <i32> [#uses=1]
+	%tmp397.i420.i = load %struct.cellbox** null, align 4		; <%struct.cellbox*> [#uses=1]
+	br label %bb398.i.i
+bb398.i.i:		; preds = %bb398.i.i, %cond_true380.i.i
+	br i1 false, label %bb414.i.i, label %bb398.i.i
+bb414.i.i:		; preds = %bb398.i.i
+	br i1 false, label %bb581.i.i, label %bb455.i442.i
+bb455.i442.i:		; preds = %bb414.i.i
+	ret void
+bb581.i.i:		; preds = %bb581.i.i, %bb414.i.i
+	br i1 false, label %bb894.i.i, label %bb581.i.i
+cond_next602.i.i:		; preds = %cond_false373.i.i
+	br i1 false, label %bb609.i.i, label %bb661.i.i
+bb609.i.i:		; preds = %cond_next602.i.i
+	br label %bb620.i.i
+bb620.i.i:		; preds = %bb620.i.i, %bb609.i.i
+	%indvar166.i465.i = phi i32 [ %indvar.next167.i.i, %bb620.i.i ], [ 0, %bb609.i.i ]		; <i32> [#uses=1]
+	%tmp640.i.i = call i32 (%struct.FILE*, i8*, ...)* @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null )		; <i32> [#uses=0]
+	%tmp648.i.i = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp650.i468.i = icmp sgt i32 0, %tmp648.i.i		; <i1> [#uses=1]
+	%tmp624.i469.i = call i32 (%struct.FILE*, i8*, ...)* @fscanf( %struct.FILE* %tmp61, i8* getelementptr ([5 x i8]* @.str584, i32 0, i32 0), [1024 x i8]* null )		; <i32> [#uses=0]
+	%indvar.next167.i.i = add i32 %indvar166.i465.i, 1		; <i32> [#uses=1]
+	br i1 %tmp650.i468.i, label %bb653.i.i.loopexit, label %bb620.i.i
+bb653.i.i.loopexit:		; preds = %bb620.i.i
+	%tmp642.i466.i = add i32 0, 1		; <i32> [#uses=1]
+	br label %bb894.i.i
+bb661.i.i:		; preds = %cond_next602.i.i
+	ret void
+bb894.loopexit.i.i:		; preds = %bb.i.i347.i
+	br label %bb894.i.i
+bb894.i.i:		; preds = %bb894.loopexit.i.i, %bb653.i.i.loopexit, %bb581.i.i, %bb177.i393.i
+	%pinctr.0.i.i = phi i32 [ 0, %bb894.loopexit.i.i ], [ %tmp642.i466.i, %bb653.i.i.loopexit ], [ %pinctr.0.i.i, %bb177.i393.i ], [ %pinctr.0.i.i, %bb581.i.i ]		; <i32> [#uses=2]
+	%soft.0.i.i = phi i32 [ undef, %bb894.loopexit.i.i ], [ %soft.0.i.i, %bb653.i.i.loopexit ], [ 0, %bb177.i393.i ], [ 1, %bb581.i.i ]		; <i32> [#uses=1]
+	%cell.0.i.i = phi i32 [ 0, %bb894.loopexit.i.i ], [ %cell.0.i.i, %bb653.i.i.loopexit ], [ 0, %bb177.i393.i ], [ %tmp394.i418.i, %bb581.i.i ]		; <i32> [#uses=2]
+	%ptr.0.i.i = phi %struct.cellbox* [ undef, %bb894.loopexit.i.i ], [ %ptr.0.i.i, %bb653.i.i.loopexit ], [ null, %bb177.i393.i ], [ %tmp397.i420.i, %bb581.i.i ]		; <%struct.cellbox*> [#uses=1]
+	br i1 false, label %bb.i350.i, label %bb902.i502.i
+bb902.i502.i:		; preds = %bb894.i.i
+	ret void
+cond_false674:		; preds = %entry
+	ret void
+}
+
+	%struct.III_psy_xmin = type { [22 x double], [13 x [3 x double]] }
+	%struct.III_scalefac_t = type { [22 x i32], [13 x [3 x i32]] }
+	%struct.gr_info = type { i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, [4 x i32] }
+	%struct.lame_global_flags = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
+@scalefac_band.1 = external global [14 x i32]		; <[14 x i32]*> [#uses=2]
+
+declare fastcc i32 @init_outer_loop(%struct.lame_global_flags*, double*, %struct.gr_info*)
+
+define fastcc void @outer_loop(%struct.lame_global_flags* %gfp, double* %xr, i32 %targ_bits, double* %best_noise, %struct.III_psy_xmin* %l3_xmin, i32* %l3_enc, %struct.III_scalefac_t* %scalefac, %struct.gr_info* %cod_info, i32 %ch) {
+entry:
+	%cod_info.182 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 1		; <i32*> [#uses=1]
+	br label %bb
+bb:		; preds = %bb226, %entry
+	%save_cod_info.1.1 = phi i32 [ undef, %entry ], [ %save_cod_info.1.1, %bb226 ]		; <i32> [#uses=2]
+	br i1 false, label %cond_next, label %cond_true
+cond_true:		; preds = %bb
+	ret void
+cond_next:		; preds = %bb
+	br i1 false, label %cond_next144, label %cond_false
+cond_false:		; preds = %cond_next
+	ret void
+cond_next144:		; preds = %cond_next
+	br i1 false, label %cond_next205, label %cond_true163
+cond_true163:		; preds = %cond_next144
+	br i1 false, label %bb34.i, label %bb.i53
+bb.i53:		; preds = %cond_true163
+	ret void
+bb34.i:		; preds = %cond_true163
+	%tmp37.i55 = load i32* null, align 4		; <i32> [#uses=1]
+	br i1 false, label %bb65.preheader.i, label %bb78.i
+bb65.preheader.i:		; preds = %bb34.i
+	br label %bb65.outer.us.i
+bb65.outer.us.i:		; preds = %bb65.outer.us.i, %bb65.preheader.i
+	br i1 false, label %bb78.i, label %bb65.outer.us.i
+bb78.i:		; preds = %bb65.outer.us.i, %bb34.i
+	br i1 false, label %bb151.i.preheader, label %bb90.i
+bb90.i:		; preds = %bb78.i
+	ret void
+bb151.i.preheader:		; preds = %bb78.i
+	br label %bb151.i
+bb151.i:		; preds = %bb226.backedge.i, %bb151.i.preheader
+	%i.154.i = phi i32 [ %tmp15747.i, %bb226.backedge.i ], [ 0, %bb151.i.preheader ]		; <i32> [#uses=2]
+	%tmp15747.i = add i32 %i.154.i, 1		; <i32> [#uses=3]
+	br i1 false, label %bb155.i, label %bb226.backedge.i
+bb226.backedge.i:		; preds = %cond_next215.i, %bb151.i
+	%tmp228.i71 = icmp slt i32 %tmp15747.i, 3		; <i1> [#uses=1]
+	br i1 %tmp228.i71, label %bb151.i, label %amp_scalefac_bands.exit
+bb155.i:		; preds = %cond_next215.i, %bb151.i
+	%indvar90.i = phi i32 [ %indvar.next91.i, %cond_next215.i ], [ 0, %bb151.i ]		; <i32> [#uses=2]
+	%sfb.3.reg2mem.0.i = add i32 %indvar90.i, %tmp37.i55		; <i32> [#uses=4]
+	%tmp161.i = getelementptr [4 x [21 x double]]* null, i32 0, i32 %tmp15747.i, i32 %sfb.3.reg2mem.0.i		; <double*> [#uses=1]
+	%tmp162.i74 = load double* %tmp161.i, align 4		; <double> [#uses=0]
+	br i1 false, label %cond_true167.i, label %cond_next215.i
+cond_true167.i:		; preds = %bb155.i
+	%tmp173.i = getelementptr %struct.III_scalefac_t* null, i32 0, i32 1, i32 %sfb.3.reg2mem.0.i, i32 %i.154.i		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp173.i, align 4
+	%tmp182.1.i = getelementptr [14 x i32]* @scalefac_band.1, i32 0, i32 %sfb.3.reg2mem.0.i		; <i32*> [#uses=0]
+	%tmp185.i78 = add i32 %sfb.3.reg2mem.0.i, 1		; <i32> [#uses=1]
+	%tmp187.1.i = getelementptr [14 x i32]* @scalefac_band.1, i32 0, i32 %tmp185.i78		; <i32*> [#uses=1]
+	%tmp188.i = load i32* %tmp187.1.i, align 4		; <i32> [#uses=1]
+	%tmp21153.i = icmp slt i32 0, %tmp188.i		; <i1> [#uses=1]
+	br i1 %tmp21153.i, label %bb190.preheader.i, label %cond_next215.i
+bb190.preheader.i:		; preds = %cond_true167.i
+	ret void
+cond_next215.i:		; preds = %cond_true167.i, %bb155.i
+	%indvar.next91.i = add i32 %indvar90.i, 1		; <i32> [#uses=2]
+	%exitcond99.i87 = icmp eq i32 %indvar.next91.i, 0		; <i1> [#uses=1]
+	br i1 %exitcond99.i87, label %bb226.backedge.i, label %bb155.i
+amp_scalefac_bands.exit:		; preds = %bb226.backedge.i
+	br i1 false, label %bb19.i, label %bb.i16
+bb.i16:		; preds = %amp_scalefac_bands.exit
+	ret void
+bb19.i:		; preds = %amp_scalefac_bands.exit
+	br i1 false, label %bb40.outer.i, label %cond_next205
+bb40.outer.i:		; preds = %bb19.i
+	ret void
+cond_next205:		; preds = %bb19.i, %cond_next144
+	br i1 false, label %bb226, label %cond_true210
+cond_true210:		; preds = %cond_next205
+	br i1 false, label %bb226, label %cond_true217
+cond_true217:		; preds = %cond_true210
+	%tmp221 = call fastcc i32 @init_outer_loop( %struct.lame_global_flags* %gfp, double* %xr, %struct.gr_info* %cod_info )		; <i32> [#uses=0]
+	ret void
+bb226:		; preds = %cond_true210, %cond_next205
+	br i1 false, label %bb231, label %bb
+bb231:		; preds = %bb226
+	store i32 %save_cod_info.1.1, i32* %cod_info.182
+	ret void
+}
+
+	%struct.III_psy_xmin = type { [22 x double], [13 x [3 x double]] }
+	%struct.III_scalefac_t = type { [22 x i32], [13 x [3 x i32]] }
+	%struct.gr_info = type { i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, [4 x i32] }
+	%struct.lame_global_flags = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define fastcc void @outer_loop2(%struct.lame_global_flags* %gfp, double* %xr, i32 %targ_bits, double* %best_noise, %struct.III_psy_xmin* %l3_xmin, i32* %l3_enc, %struct.III_scalefac_t* %scalefac, %struct.gr_info* %cod_info, i32 %ch) {
+entry:
+	%cod_info.20128.1 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 20, i32 1		; <i32*> [#uses=1]
+	%cod_info.20128.2 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 20, i32 2		; <i32*> [#uses=1]
+	%cod_info.20128.3 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 20, i32 3		; <i32*> [#uses=1]
+	br label %bb
+bb:		; preds = %bb226, %entry
+	%save_cod_info.19.1 = phi i32* [ undef, %entry ], [ %save_cod_info.19.0, %bb226 ]		; <i32*> [#uses=1]
+	%save_cod_info.0.1 = phi i32 [ undef, %entry ], [ %save_cod_info.0.0, %bb226 ]		; <i32> [#uses=1]
+	br i1 false, label %cond_next144, label %cond_false
+cond_false:		; preds = %bb
+	br i1 false, label %cond_true56, label %cond_false78
+cond_true56:		; preds = %cond_false
+	br i1 false, label %inner_loop.exit, label %cond_next85
+inner_loop.exit:		; preds = %cond_true56
+	br i1 false, label %cond_next104, label %cond_false96
+cond_false78:		; preds = %cond_false
+	ret void
+cond_next85:		; preds = %cond_true56
+	ret void
+cond_false96:		; preds = %inner_loop.exit
+	ret void
+cond_next104:		; preds = %inner_loop.exit
+	br i1 false, label %cond_next144, label %cond_false110
+cond_false110:		; preds = %cond_next104
+	ret void
+cond_next144:		; preds = %cond_next104, %bb
+	%save_cod_info.19.0 = phi i32* [ %save_cod_info.19.1, %bb ], [ null, %cond_next104 ]		; <i32*> [#uses=1]
+	%save_cod_info.4.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
+	%save_cod_info.3.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
+	%save_cod_info.2.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
+	%save_cod_info.1.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
+	%save_cod_info.0.0 = phi i32 [ %save_cod_info.0.1, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
+	%over.1 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
+	%best_over.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
+	%notdone.0 = phi i32 [ 0, %bb ], [ 0, %cond_next104 ]		; <i32> [#uses=1]
+	%tmp147 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp148 = icmp eq i32 %tmp147, 0		; <i1> [#uses=1]
+	%tmp153 = icmp eq i32 %over.1, 0		; <i1> [#uses=1]
+	%bothcond = and i1 %tmp148, %tmp153		; <i1> [#uses=1]
+	%notdone.2 = select i1 %bothcond, i32 0, i32 %notdone.0		; <i32> [#uses=1]
+	br i1 false, label %cond_next205, label %cond_true163
+cond_true163:		; preds = %cond_next144
+	ret void
+cond_next205:		; preds = %cond_next144
+	br i1 false, label %bb226, label %cond_true210
+cond_true210:		; preds = %cond_next205
+	ret void
+bb226:		; preds = %cond_next205
+	%tmp228 = icmp eq i32 %notdone.2, 0		; <i1> [#uses=1]
+	br i1 %tmp228, label %bb231, label %bb
+bb231:		; preds = %bb226
+	store i32 %save_cod_info.1.0, i32* null
+	store i32 %save_cod_info.2.0, i32* null
+	store i32 %save_cod_info.3.0, i32* null
+	store i32 %save_cod_info.4.0, i32* null
+	store i32 0, i32* %cod_info.20128.1
+	store i32 0, i32* %cod_info.20128.2
+	store i32 0, i32* %cod_info.20128.3
+	%tmp244245 = sitofp i32 %best_over.0 to double		; <double> [#uses=1]
+	store double %tmp244245, double* %best_noise, align 4
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll b/final/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll
new file mode 100644
index 00000000000..33bd4def5b4
--- /dev/null
+++ b/final/test/CodeGen/ARM/2008-04-11-PHIofImpDef.ll
@@ -0,0 +1,3544 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+declare void @foo(i8*, i8*, i32, i32, i32, i32, i32, i32, i32)
+
+define void @t() nounwind  {
+	br label %1
+; <label>:1		; preds = %0
+	br label %bb4351.i
+bb4351.i:		; preds = %1
+	switch i32 0, label %bb4411.i [
+		 i32 1, label %bb4354.i
+		 i32 2, label %bb4369.i
+	]
+bb4354.i:		; preds = %bb4351.i
+	br label %t.exit
+bb4369.i:		; preds = %bb4351.i
+	br label %bb4374.i
+bb4374.i:		; preds = %bb4369.i
+	br label %bb4411.i
+bb4411.i:		; preds = %bb4374.i, %bb4351.i
+	%sf4083.0.i = phi i32 [ 0, %bb4374.i ], [ 6, %bb4351.i ]		; <i32> [#uses=8]
+	br label %bb4498.i
+bb4498.i:		; preds = %bb4411.i
+	%sfComp4077.1.i = phi i32 [ undef, %bb4411.i ]		; <i32> [#uses=2]
+	%stComp4075.1.i = phi i32 [ undef, %bb4411.i ]		; <i32> [#uses=1]
+	switch i32 0, label %bb4553.i [
+		 i32 1, label %bb4501.i
+		 i32 2, label %bb4521.i
+	]
+bb4501.i:		; preds = %bb4498.i
+	%sfComp4077.1.reg2mem.0.i = phi i32 [ %sfComp4077.1.i, %bb4498.i ]		; <i32> [#uses=1]
+	call void @foo( i8* null, i8* null, i32 %sfComp4077.1.reg2mem.0.i, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0 ) nounwind 
+	br i1 false, label %UnifiedReturnBlock.i, label %bb4517.i
+bb4517.i:		; preds = %bb4501.i
+	br label %t.exit
+bb4521.i:		; preds = %bb4498.i
+	br label %bb4526.i
+bb4526.i:		; preds = %bb4521.i
+	switch i32 0, label %bb4529.i [
+		 i32 6, label %bb4530.i
+		 i32 7, label %bb4530.i
+	]
+bb4529.i:		; preds = %bb4526.i
+	br label %bb4530.i
+bb4530.i:		; preds = %bb4529.i, %bb4526.i, %bb4526.i
+	br label %bb4553.i
+bb4553.i:		; preds = %bb4530.i, %bb4498.i
+	%dt4080.0.i = phi i32 [ %stComp4075.1.i, %bb4530.i ], [ 7, %bb4498.i ]		; <i32> [#uses=32]
+	%df4081.0.i = phi i32 [ %sfComp4077.1.i, %bb4530.i ], [ 8, %bb4498.i ]		; <i32> [#uses=17]
+	switch i32 %sf4083.0.i, label %bb4559.i [
+		 i32 0, label %bb4558.i
+		 i32 1, label %bb4558.i
+		 i32 2, label %bb4558.i
+		 i32 5, label %bb4561.i
+		 i32 6, label %bb4561.i
+		 i32 7, label %bb4561.i
+		 i32 9, label %bb4557.i
+	]
+bb4557.i:		; preds = %bb4553.i
+	switch i32 %df4081.0.i, label %bb4569.i [
+		 i32 0, label %bb4568.i
+		 i32 1, label %bb4568.i
+		 i32 2, label %bb4568.i
+		 i32 5, label %bb4571.i
+		 i32 6, label %bb4571.i
+		 i32 7, label %bb4571.i
+		 i32 9, label %bb4567.i
+	]
+bb4558.i:		; preds = %bb4553.i, %bb4553.i, %bb4553.i
+	switch i32 %df4081.0.i, label %bb4569.i [
+		 i32 0, label %bb4568.i
+		 i32 1, label %bb4568.i
+		 i32 2, label %bb4568.i
+		 i32 5, label %bb4571.i
+		 i32 6, label %bb4571.i
+		 i32 7, label %bb4571.i
+		 i32 9, label %bb4567.i
+	]
+bb4559.i:		; preds = %bb4553.i
+	br label %bb4561.i
+bb4561.i:		; preds = %bb4559.i, %bb4553.i, %bb4553.i, %bb4553.i
+	switch i32 %df4081.0.i, label %bb4569.i [
+		 i32 0, label %bb4568.i
+		 i32 1, label %bb4568.i
+		 i32 2, label %bb4568.i
+		 i32 5, label %bb4571.i
+		 i32 6, label %bb4571.i
+		 i32 7, label %bb4571.i
+		 i32 9, label %bb4567.i
+	]
+bb4567.i:		; preds = %bb4561.i, %bb4558.i, %bb4557.i
+	br label %bb4580.i
+bb4568.i:		; preds = %bb4561.i, %bb4561.i, %bb4561.i, %bb4558.i, %bb4558.i, %bb4558.i, %bb4557.i, %bb4557.i, %bb4557.i
+	br label %bb4580.i
+bb4569.i:		; preds = %bb4561.i, %bb4558.i, %bb4557.i
+	br label %bb4571.i
+bb4571.i:		; preds = %bb4569.i, %bb4561.i, %bb4561.i, %bb4561.i, %bb4558.i, %bb4558.i, %bb4558.i, %bb4557.i, %bb4557.i, %bb4557.i
+	br label %bb4580.i
+bb4580.i:		; preds = %bb4571.i, %bb4568.i, %bb4567.i
+	br i1 false, label %bb4611.i, label %bb4593.i
+bb4593.i:		; preds = %bb4580.i
+	br i1 false, label %bb4610.i, label %bb4611.i
+bb4610.i:		; preds = %bb4593.i
+	br label %bb4611.i
+bb4611.i:		; preds = %bb4610.i, %bb4593.i, %bb4580.i
+	br i1 false, label %bb4776.i, label %bb4620.i
+bb4620.i:		; preds = %bb4611.i
+	switch i32 0, label %bb4776.i [
+		 i32 0, label %bb4691.i
+		 i32 2, label %bb4740.i
+		 i32 4, label %bb4755.i
+		 i32 8, label %bb4622.i
+		 i32 9, label %bb4622.i
+		 i32 10, label %bb4629.i
+		 i32 11, label %bb4629.i
+		 i32 12, label %bb4651.i
+		 i32 13, label %bb4651.i
+		 i32 14, label %bb4665.i
+		 i32 15, label %bb4665.i
+		 i32 16, label %bb4691.i
+		 i32 17, label %bb4691.i
+		 i32 18, label %bb4712.i
+		 i32 19, label %bb4712.i
+		 i32 22, label %bb4733.i
+		 i32 23, label %bb4733.i
+	]
+bb4622.i:		; preds = %bb4620.i, %bb4620.i
+	br i1 false, label %bb4628.i, label %bb4776.i
+bb4628.i:		; preds = %bb4622.i
+	br label %bb4776.i
+bb4629.i:		; preds = %bb4620.i, %bb4620.i
+	br i1 false, label %bb4776.i, label %bb4644.i
+bb4644.i:		; preds = %bb4629.i
+	br i1 false, label %bb4650.i, label %bb4776.i
+bb4650.i:		; preds = %bb4644.i
+	br label %bb4776.i
+bb4651.i:		; preds = %bb4620.i, %bb4620.i
+	br i1 false, label %bb4776.i, label %bb4658.i
+bb4658.i:		; preds = %bb4651.i
+	br i1 false, label %bb4664.i, label %bb4776.i
+bb4664.i:		; preds = %bb4658.i
+	br label %bb4776.i
+bb4665.i:		; preds = %bb4620.i, %bb4620.i
+	br i1 false, label %bb4776.i, label %bb4684.i
+bb4684.i:		; preds = %bb4665.i
+	br i1 false, label %bb4690.i, label %bb4776.i
+bb4690.i:		; preds = %bb4684.i
+	br label %bb4776.i
+bb4691.i:		; preds = %bb4620.i, %bb4620.i, %bb4620.i
+	br i1 false, label %bb4776.i, label %bb4698.i
+bb4698.i:		; preds = %bb4691.i
+	br i1 false, label %bb4711.i, label %bb4776.i
+bb4711.i:		; preds = %bb4698.i
+	br label %bb4776.i
+bb4712.i:		; preds = %bb4620.i, %bb4620.i
+	br i1 false, label %bb4776.i, label %bb4726.i
+bb4726.i:		; preds = %bb4712.i
+	br i1 false, label %bb4732.i, label %bb4776.i
+bb4732.i:		; preds = %bb4726.i
+	br label %bb4776.i
+bb4733.i:		; preds = %bb4620.i, %bb4620.i
+	br i1 false, label %bb4739.i, label %bb4776.i
+bb4739.i:		; preds = %bb4733.i
+	br label %bb4776.i
+bb4740.i:		; preds = %bb4620.i
+	br i1 false, label %bb4776.i, label %bb4754.i
+bb4754.i:		; preds = %bb4740.i
+	br label %bb4776.i
+bb4755.i:		; preds = %bb4620.i
+	br i1 false, label %bb4776.i, label %bb4774.i
+bb4774.i:		; preds = %bb4755.i
+	br label %bb4776.i
+bb4776.i:		; preds = %bb4774.i, %bb4755.i, %bb4754.i, %bb4740.i, %bb4739.i, %bb4733.i, %bb4732.i, %bb4726.i, %bb4712.i, %bb4711.i, %bb4698.i, %bb4691.i, %bb4690.i, %bb4684.i, %bb4665.i, %bb4664.i, %bb4658.i, %bb4651.i, %bb4650.i, %bb4644.i, %bb4629.i, %bb4628.i, %bb4622.i, %bb4620.i, %bb4611.i
+	switch i32 0, label %bb4790.i [
+		 i32 0, label %bb4786.i
+		 i32 1, label %bb4784.i
+		 i32 3, label %bb4784.i
+		 i32 5, label %bb4784.i
+		 i32 6, label %bb4785.i
+		 i32 7, label %bb4785.i
+		 i32 8, label %bb4791.i
+		 i32 9, label %bb4791.i
+		 i32 10, label %bb4791.i
+		 i32 11, label %bb4791.i
+		 i32 12, label %bb4791.i
+		 i32 13, label %bb4791.i
+		 i32 14, label %bb4791.i
+		 i32 15, label %bb4791.i
+		 i32 16, label %bb4791.i
+		 i32 17, label %bb4791.i
+		 i32 18, label %bb4791.i
+		 i32 19, label %bb4791.i
+	]
+bb4784.i:		; preds = %bb4776.i, %bb4776.i, %bb4776.i
+	br label %bb4791.i
+bb4785.i:		; preds = %bb4776.i, %bb4776.i
+	br label %bb4791.i
+bb4786.i:		; preds = %bb4776.i
+	br label %bb4791.i
+bb4790.i:		; preds = %bb4776.i
+	br label %bb4791.i
+bb4791.i:		; preds = %bb4790.i, %bb4786.i, %bb4785.i, %bb4784.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i, %bb4776.i
+	switch i32 %dt4080.0.i, label %bb4803.i [
+		 i32 0, label %bb4799.i
+		 i32 6, label %bb4794.i
+		 i32 7, label %bb4794.i
+		 i32 8, label %bb4804.i
+		 i32 9, label %bb4804.i
+		 i32 10, label %bb4804.i
+		 i32 11, label %bb4804.i
+		 i32 12, label %bb4804.i
+		 i32 13, label %bb4804.i
+		 i32 14, label %bb4804.i
+		 i32 15, label %bb4804.i
+		 i32 16, label %bb4804.i
+		 i32 17, label %bb4804.i
+		 i32 18, label %bb4804.i
+		 i32 19, label %bb4804.i
+	]
+bb4794.i:		; preds = %bb4791.i, %bb4791.i
+	br i1 false, label %bb4809.i, label %bb4819.i
+bb4799.i:		; preds = %bb4791.i
+	br i1 false, label %bb4809.i, label %bb4819.i
+bb4803.i:		; preds = %bb4791.i
+	br label %bb4804.i
+bb4804.i:		; preds = %bb4803.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i, %bb4791.i
+	br i1 false, label %bb4809.i, label %bb4819.i
+bb4809.i:		; preds = %bb4804.i, %bb4799.i, %bb4794.i
+	switch i32 %df4081.0.i, label %bb71.i.i [
+		 i32 3, label %bb61.i.i
+		 i32 4, label %bb.i.i
+		 i32 5, label %bb.i.i
+		 i32 6, label %bb.i.i
+		 i32 7, label %bb.i.i
+		 i32 8, label %bb38.i.i
+		 i32 9, label %bb38.i.i
+		 i32 10, label %bb50.i.i
+		 i32 11, label %bb40.i.i
+		 i32 16, label %bb38.i.i
+	]
+bb.i.i:		; preds = %bb4809.i, %bb4809.i, %bb4809.i, %bb4809.i
+	br label %bb403.i.i
+bb38.i.i:		; preds = %bb4809.i, %bb4809.i, %bb4809.i
+	br label %bb403.i.i
+bb40.i.i:		; preds = %bb4809.i
+	br label %bb403.i.i
+bb50.i.i:		; preds = %bb4809.i
+	br label %bb403.i.i
+bb61.i.i:		; preds = %bb4809.i
+	br label %bb403.i.i
+bb71.i.i:		; preds = %bb4809.i
+	br label %bb403.i.i
+bb403.i.i:		; preds = %bb71.i.i, %bb61.i.i, %bb50.i.i, %bb40.i.i, %bb38.i.i, %bb.i.i
+	br i1 false, label %bb408.i.i, label %bb502.i.i
+bb408.i.i:		; preds = %bb403.i.i
+	br label %bb708.i.i
+bb502.i.i:		; preds = %bb403.i.i
+	br label %bb708.i.i
+bb708.i.i:		; preds = %bb502.i.i, %bb408.i.i
+	switch i32 0, label %bb758.i.i [
+		 i32 0, label %bb710.i.i
+		 i32 1, label %bb713.i.i
+		 i32 2, label %bb718.i.i
+		 i32 3, label %bb721.i.i
+		 i32 4, label %bb726.i.i
+		 i32 5, label %bb729.i.i
+		 i32 8, label %bb732.i.i
+		 i32 9, label %bb732.i.i
+		 i32 10, label %bb737.i.i
+		 i32 11, label %bb737.i.i
+		 i32 12, label %bb742.i.i
+		 i32 13, label %bb742.i.i
+		 i32 14, label %bb745.i.i
+		 i32 15, label %bb745.i.i
+		 i32 16, label %bb750.i.i
+		 i32 17, label %bb750.i.i
+		 i32 18, label %bb753.i.i
+		 i32 19, label %bb753.i.i
+		 i32 22, label %bb750.i.i
+		 i32 23, label %bb750.i.i
+	]
+bb710.i.i:		; preds = %bb708.i.i
+	br label %bb758.i.i
+bb713.i.i:		; preds = %bb708.i.i
+	br label %bb758.i.i
+bb718.i.i:		; preds = %bb708.i.i
+	br label %bb758.i.i
+bb721.i.i:		; preds = %bb708.i.i
+	br label %bb758.i.i
+bb726.i.i:		; preds = %bb708.i.i
+	br label %bb758.i.i
+bb729.i.i:		; preds = %bb708.i.i
+	br label %bb758.i.i
+bb732.i.i:		; preds = %bb708.i.i, %bb708.i.i
+	br label %bb758.i.i
+bb737.i.i:		; preds = %bb708.i.i, %bb708.i.i
+	br label %bb758.i.i
+bb742.i.i:		; preds = %bb708.i.i, %bb708.i.i
+	br label %bb758.i.i
+bb745.i.i:		; preds = %bb708.i.i, %bb708.i.i
+	br label %bb758.i.i
+bb750.i.i:		; preds = %bb708.i.i, %bb708.i.i, %bb708.i.i, %bb708.i.i
+	br label %bb758.i.i
+bb753.i.i:		; preds = %bb708.i.i, %bb708.i.i
+	br label %bb758.i.i
+bb758.i.i:		; preds = %bb753.i.i, %bb750.i.i, %bb745.i.i, %bb742.i.i, %bb737.i.i, %bb732.i.i, %bb729.i.i, %bb726.i.i, %bb721.i.i, %bb718.i.i, %bb713.i.i, %bb710.i.i, %bb708.i.i
+	switch i32 %dt4080.0.i, label %bb808.i.i [
+		 i32 0, label %bb760.i.i
+		 i32 1, label %bb763.i.i
+		 i32 2, label %bb768.i.i
+		 i32 3, label %bb771.i.i
+		 i32 4, label %bb776.i.i
+		 i32 5, label %bb779.i.i
+		 i32 8, label %bb782.i.i
+		 i32 9, label %bb782.i.i
+		 i32 10, label %bb787.i.i
+		 i32 11, label %bb787.i.i
+		 i32 12, label %bb792.i.i
+		 i32 13, label %bb792.i.i
+		 i32 14, label %bb795.i.i
+		 i32 15, label %bb795.i.i
+		 i32 16, label %bb800.i.i
+		 i32 17, label %bb800.i.i
+		 i32 18, label %bb803.i.i
+		 i32 19, label %bb803.i.i
+		 i32 22, label %bb800.i.i
+		 i32 23, label %bb800.i.i
+	]
+bb760.i.i:		; preds = %bb758.i.i
+	br label %bb811.i.i
+bb763.i.i:		; preds = %bb758.i.i
+	br label %bb811.i.i
+bb768.i.i:		; preds = %bb758.i.i
+	br label %bb811.i.i
+bb771.i.i:		; preds = %bb758.i.i
+	br label %bb811.i.i
+bb776.i.i:		; preds = %bb758.i.i
+	br label %bb811.i.i
+bb779.i.i:		; preds = %bb758.i.i
+	br label %bb811.i.i
+bb782.i.i:		; preds = %bb758.i.i, %bb758.i.i
+	br label %bb811.i.i
+bb787.i.i:		; preds = %bb758.i.i, %bb758.i.i
+	br label %bb811.i.i
+bb792.i.i:		; preds = %bb758.i.i, %bb758.i.i
+	br label %bb811.i.i
+bb795.i.i:		; preds = %bb758.i.i, %bb758.i.i
+	br label %bb811.i.i
+bb800.i.i:		; preds = %bb758.i.i, %bb758.i.i, %bb758.i.i, %bb758.i.i
+	br label %bb811.i.i
+bb803.i.i:		; preds = %bb758.i.i, %bb758.i.i
+	br label %bb808.i.i
+bb808.i.i:		; preds = %bb803.i.i, %bb758.i.i
+	br label %bb811.i.i
+bb811.i.i:		; preds = %bb808.i.i, %bb800.i.i, %bb795.i.i, %bb792.i.i, %bb787.i.i, %bb782.i.i, %bb779.i.i, %bb776.i.i, %bb771.i.i, %bb768.i.i, %bb763.i.i, %bb760.i.i
+	switch i32 0, label %bb928.i.i [
+		 i32 0, label %bb813.i.i
+		 i32 1, label %bb833.i.i
+		 i32 2, label %bb813.i.i
+		 i32 3, label %bb833.i.i
+		 i32 4, label %bb813.i.i
+		 i32 5, label %bb813.i.i
+		 i32 8, label %bb872.i.i
+		 i32 9, label %bb872.i.i
+		 i32 10, label %bb890.i.i
+		 i32 11, label %bb890.i.i
+		 i32 12, label %bb813.i.i
+		 i32 13, label %bb813.i.i
+		 i32 14, label %bb908.i.i
+		 i32 15, label %bb908.i.i
+		 i32 16, label %bb813.i.i
+		 i32 17, label %bb813.i.i
+		 i32 18, label %bb908.i.i
+		 i32 19, label %bb908.i.i
+		 i32 22, label %bb813.i.i
+		 i32 23, label %bb813.i.i
+	]
+bb813.i.i:		; preds = %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i
+	switch i32 %dt4080.0.i, label %bb1065.i.i [
+		 i32 0, label %bb930.i.i
+		 i32 1, label %bb950.i.i
+		 i32 2, label %bb930.i.i
+		 i32 3, label %bb950.i.i
+		 i32 4, label %bb989.i.i
+		 i32 5, label %bb989.i.i
+		 i32 8, label %bb1009.i.i
+		 i32 9, label %bb1009.i.i
+		 i32 10, label %bb1027.i.i
+		 i32 11, label %bb1027.i.i
+		 i32 12, label %bb930.i.i
+		 i32 13, label %bb930.i.i
+		 i32 14, label %bb1045.i.i
+		 i32 15, label %bb1045.i.i
+		 i32 16, label %bb930.i.i
+		 i32 17, label %bb930.i.i
+		 i32 18, label %bb1045.i.i
+		 i32 19, label %bb1045.i.i
+		 i32 22, label %bb930.i.i
+		 i32 23, label %bb930.i.i
+	]
+bb833.i.i:		; preds = %bb811.i.i, %bb811.i.i
+	switch i32 %dt4080.0.i, label %bb1065.i.i [
+		 i32 0, label %bb930.i.i
+		 i32 1, label %bb950.i.i
+		 i32 2, label %bb930.i.i
+		 i32 3, label %bb950.i.i
+		 i32 4, label %bb989.i.i
+		 i32 5, label %bb989.i.i
+		 i32 8, label %bb1009.i.i
+		 i32 9, label %bb1009.i.i
+		 i32 10, label %bb1027.i.i
+		 i32 11, label %bb1027.i.i
+		 i32 12, label %bb930.i.i
+		 i32 13, label %bb930.i.i
+		 i32 14, label %bb1045.i.i
+		 i32 15, label %bb1045.i.i
+		 i32 16, label %bb930.i.i
+		 i32 17, label %bb930.i.i
+		 i32 18, label %bb1045.i.i
+		 i32 19, label %bb1045.i.i
+		 i32 22, label %bb930.i.i
+		 i32 23, label %bb930.i.i
+	]
+bb872.i.i:		; preds = %bb811.i.i, %bb811.i.i
+	switch i32 %dt4080.0.i, label %bb1065.i.i [
+		 i32 0, label %bb930.i.i
+		 i32 1, label %bb950.i.i
+		 i32 2, label %bb930.i.i
+		 i32 3, label %bb950.i.i
+		 i32 4, label %bb989.i.i
+		 i32 5, label %bb989.i.i
+		 i32 8, label %bb1009.i.i
+		 i32 9, label %bb1009.i.i
+		 i32 10, label %bb1027.i.i
+		 i32 11, label %bb1027.i.i
+		 i32 12, label %bb930.i.i
+		 i32 13, label %bb930.i.i
+		 i32 14, label %bb1045.i.i
+		 i32 15, label %bb1045.i.i
+		 i32 16, label %bb930.i.i
+		 i32 17, label %bb930.i.i
+		 i32 18, label %bb1045.i.i
+		 i32 19, label %bb1045.i.i
+		 i32 22, label %bb930.i.i
+		 i32 23, label %bb930.i.i
+	]
+bb890.i.i:		; preds = %bb811.i.i, %bb811.i.i
+	switch i32 %dt4080.0.i, label %bb1065.i.i [
+		 i32 0, label %bb930.i.i
+		 i32 1, label %bb950.i.i
+		 i32 2, label %bb930.i.i
+		 i32 3, label %bb950.i.i
+		 i32 4, label %bb989.i.i
+		 i32 5, label %bb989.i.i
+		 i32 8, label %bb1009.i.i
+		 i32 9, label %bb1009.i.i
+		 i32 10, label %bb1027.i.i
+		 i32 11, label %bb1027.i.i
+		 i32 12, label %bb930.i.i
+		 i32 13, label %bb930.i.i
+		 i32 14, label %bb1045.i.i
+		 i32 15, label %bb1045.i.i
+		 i32 16, label %bb930.i.i
+		 i32 17, label %bb930.i.i
+		 i32 18, label %bb1045.i.i
+		 i32 19, label %bb1045.i.i
+		 i32 22, label %bb930.i.i
+		 i32 23, label %bb930.i.i
+	]
+bb908.i.i:		; preds = %bb811.i.i, %bb811.i.i, %bb811.i.i, %bb811.i.i
+	br label %bb928.i.i
+bb928.i.i:		; preds = %bb908.i.i, %bb811.i.i
+	switch i32 %dt4080.0.i, label %bb1065.i.i [
+		 i32 0, label %bb930.i.i
+		 i32 1, label %bb950.i.i
+		 i32 2, label %bb930.i.i
+		 i32 3, label %bb950.i.i
+		 i32 4, label %bb989.i.i
+		 i32 5, label %bb989.i.i
+		 i32 8, label %bb1009.i.i
+		 i32 9, label %bb1009.i.i
+		 i32 10, label %bb1027.i.i
+		 i32 11, label %bb1027.i.i
+		 i32 12, label %bb930.i.i
+		 i32 13, label %bb930.i.i
+		 i32 14, label %bb1045.i.i
+		 i32 15, label %bb1045.i.i
+		 i32 16, label %bb930.i.i
+		 i32 17, label %bb930.i.i
+		 i32 18, label %bb1045.i.i
+		 i32 19, label %bb1045.i.i
+		 i32 22, label %bb930.i.i
+		 i32 23, label %bb930.i.i
+	]
+bb930.i.i:		; preds = %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i
+	br label %bb5235.i
+bb950.i.i:		; preds = %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i
+	br label %bb5235.i
+bb989.i.i:		; preds = %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i
+	br label %bb5235.i
+bb1009.i.i:		; preds = %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i
+	br label %bb5235.i
+bb1027.i.i:		; preds = %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i
+	br label %bb5235.i
+bb1045.i.i:		; preds = %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb928.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb890.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb872.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb833.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i, %bb813.i.i
+	br label %bb1065.i.i
+bb1065.i.i:		; preds = %bb1045.i.i, %bb928.i.i, %bb890.i.i, %bb872.i.i, %bb833.i.i, %bb813.i.i
+	br label %bb5235.i
+bb4819.i:		; preds = %bb4804.i, %bb4799.i, %bb4794.i
+	br i1 false, label %bb5208.i, label %bb5011.i
+bb5011.i:		; preds = %bb4819.i
+	switch i32 0, label %bb5039.i [
+		 i32 10, label %bb5016.i
+		 i32 3, label %bb5103.i
+	]
+bb5016.i:		; preds = %bb5011.i
+	br i1 false, label %bb5103.i, label %bb5039.i
+bb5039.i:		; preds = %bb5016.i, %bb5011.i
+	switch i32 0, label %bb5052.i [
+		 i32 3, label %bb5103.i
+		 i32 10, label %bb5103.i
+	]
+bb5052.i:		; preds = %bb5039.i
+	br i1 false, label %bb5103.i, label %bb5065.i
+bb5065.i:		; preds = %bb5052.i
+	br i1 false, label %bb5078.i, label %bb5103.i
+bb5078.i:		; preds = %bb5065.i
+	br i1 false, label %bb5103.i, label %bb5084.i
+bb5084.i:		; preds = %bb5078.i
+	br i1 false, label %bb5103.i, label %bb5090.i
+bb5090.i:		; preds = %bb5084.i
+	br i1 false, label %bb5103.i, label %bb5096.i
+bb5096.i:		; preds = %bb5090.i
+	br i1 false, label %bb5103.i, label %bb5102.i
+bb5102.i:		; preds = %bb5096.i
+	br label %bb5103.i
+bb5103.i:		; preds = %bb5102.i, %bb5096.i, %bb5090.i, %bb5084.i, %bb5078.i, %bb5065.i, %bb5052.i, %bb5039.i, %bb5039.i, %bb5016.i, %bb5011.i
+	switch i32 0, label %bb5208.i [
+		 i32 0, label %bb5133.i
+		 i32 2, label %bb5162.i
+		 i32 4, label %bb5182.i
+		 i32 10, label %bb5113.i
+		 i32 11, label %bb5113.i
+		 i32 12, label %bb5121.i
+		 i32 13, label %bb5121.i
+		 i32 14, label %bb5125.i
+		 i32 15, label %bb5125.i
+		 i32 16, label %bb5133.i
+		 i32 17, label %bb5133.i
+		 i32 18, label %bb5146.i
+		 i32 19, label %bb5146.i
+	]
+bb5113.i:		; preds = %bb5103.i, %bb5103.i
+	switch i32 %dt4080.0.i, label %bb5208.i [
+		 i32 8, label %bb5115.i
+		 i32 9, label %bb5115.i
+		 i32 12, label %bb5117.i
+		 i32 13, label %bb5117.i
+		 i32 14, label %bb5119.i
+		 i32 15, label %bb5119.i
+	]
+bb5115.i:		; preds = %bb5113.i, %bb5113.i
+	br label %bb5208.i
+bb5117.i:		; preds = %bb5113.i, %bb5113.i
+	br label %bb5208.i
+bb5119.i:		; preds = %bb5113.i, %bb5113.i
+	br label %bb5208.i
+bb5121.i:		; preds = %bb5103.i, %bb5103.i
+	switch i32 %dt4080.0.i, label %bb5208.i [
+		 i32 8, label %bb5123.i
+		 i32 9, label %bb5123.i
+	]
+bb5123.i:		; preds = %bb5121.i, %bb5121.i
+	br label %bb5208.i
+bb5125.i:		; preds = %bb5103.i, %bb5103.i
+	switch i32 %dt4080.0.i, label %bb5208.i [
+		 i32 8, label %bb5127.i
+		 i32 9, label %bb5127.i
+		 i32 12, label %bb5129.i
+		 i32 13, label %bb5129.i
+	]
+bb5127.i:		; preds = %bb5125.i, %bb5125.i
+	br label %bb5208.i
+bb5129.i:		; preds = %bb5125.i, %bb5125.i
+	br label %bb5208.i
+bb5133.i:		; preds = %bb5103.i, %bb5103.i, %bb5103.i
+	switch i32 %dt4080.0.i, label %bb5208.i [
+		 i32 8, label %bb5135.i
+		 i32 9, label %bb5135.i
+		 i32 10, label %bb5137.i
+		 i32 11, label %bb5137.i
+		 i32 12, label %bb5139.i
+		 i32 13, label %bb5139.i
+		 i32 14, label %bb5143.i
+		 i32 15, label %bb5143.i
+	]
+bb5135.i:		; preds = %bb5133.i, %bb5133.i
+	br label %bb5208.i
+bb5137.i:		; preds = %bb5133.i, %bb5133.i
+	br label %bb5208.i
+bb5139.i:		; preds = %bb5133.i, %bb5133.i
+	br label %bb5208.i
+bb5143.i:		; preds = %bb5133.i, %bb5133.i
+	br label %bb5208.i
+bb5146.i:		; preds = %bb5103.i, %bb5103.i
+	switch i32 %dt4080.0.i, label %bb5208.i [
+		 i32 0, label %bb5158.i
+		 i32 8, label %bb5148.i
+		 i32 9, label %bb5148.i
+		 i32 10, label %bb5150.i
+		 i32 11, label %bb5150.i
+		 i32 12, label %bb5152.i
+		 i32 13, label %bb5152.i
+		 i32 14, label %bb5155.i
+		 i32 15, label %bb5155.i
+		 i32 16, label %bb5158.i
+		 i32 17, label %bb5158.i
+	]
+bb5148.i:		; preds = %bb5146.i, %bb5146.i
+	br label %bb5208.i
+bb5150.i:		; preds = %bb5146.i, %bb5146.i
+	br label %bb5208.i
+bb5152.i:		; preds = %bb5146.i, %bb5146.i
+	br label %bb5208.i
+bb5155.i:		; preds = %bb5146.i, %bb5146.i
+	br label %bb5208.i
+bb5158.i:		; preds = %bb5146.i, %bb5146.i, %bb5146.i
+	br label %bb5208.i
+bb5162.i:		; preds = %bb5103.i
+	switch i32 %dt4080.0.i, label %bb5208.i [
+		 i32 0, label %bb5175.i
+		 i32 8, label %bb5164.i
+		 i32 9, label %bb5164.i
+		 i32 10, label %bb5166.i
+		 i32 11, label %bb5166.i
+		 i32 12, label %bb5168.i
+		 i32 13, label %bb5168.i
+		 i32 14, label %bb5172.i
+		 i32 15, label %bb5172.i
+		 i32 16, label %bb5175.i
+		 i32 17, label %bb5175.i
+		 i32 18, label %bb5179.i
+		 i32 19, label %bb5179.i
+	]
+bb5164.i:		; preds = %bb5162.i, %bb5162.i
+	br label %bb5208.i
+bb5166.i:		; preds = %bb5162.i, %bb5162.i
+	br label %bb5208.i
+bb5168.i:		; preds = %bb5162.i, %bb5162.i
+	br label %bb5208.i
+bb5172.i:		; preds = %bb5162.i, %bb5162.i
+	br label %bb5208.i
+bb5175.i:		; preds = %bb5162.i, %bb5162.i, %bb5162.i
+	br label %bb5208.i
+bb5179.i:		; preds = %bb5162.i, %bb5162.i
+	br label %bb5208.i
+bb5182.i:		; preds = %bb5103.i
+	switch i32 %dt4080.0.i, label %bb5208.i [
+		 i32 0, label %bb5195.i
+		 i32 2, label %bb5202.i
+		 i32 8, label %bb5184.i
+		 i32 9, label %bb5184.i
+		 i32 10, label %bb5186.i
+		 i32 11, label %bb5186.i
+		 i32 12, label %bb5188.i
+		 i32 13, label %bb5188.i
+		 i32 14, label %bb5192.i
+		 i32 15, label %bb5192.i
+		 i32 16, label %bb5195.i
+		 i32 17, label %bb5195.i
+		 i32 18, label %bb5199.i
+		 i32 19, label %bb5199.i
+	]
+bb5184.i:		; preds = %bb5182.i, %bb5182.i
+	br label %bb5208.i
+bb5186.i:		; preds = %bb5182.i, %bb5182.i
+	br label %bb5208.i
+bb5188.i:		; preds = %bb5182.i, %bb5182.i
+	br label %bb5208.i
+bb5192.i:		; preds = %bb5182.i, %bb5182.i
+	br label %bb5208.i
+bb5195.i:		; preds = %bb5182.i, %bb5182.i, %bb5182.i
+	br label %bb5208.i
+bb5199.i:		; preds = %bb5182.i, %bb5182.i
+	br label %bb5208.i
+bb5202.i:		; preds = %bb5182.i
+	br label %bb5208.i
+bb5208.i:		; preds = %bb5202.i, %bb5199.i, %bb5195.i, %bb5192.i, %bb5188.i, %bb5186.i, %bb5184.i, %bb5182.i, %bb5179.i, %bb5175.i, %bb5172.i, %bb5168.i, %bb5166.i, %bb5164.i, %bb5162.i, %bb5158.i, %bb5155.i, %bb5152.i, %bb5150.i, %bb5148.i, %bb5146.i, %bb5143.i, %bb5139.i, %bb5137.i, %bb5135.i, %bb5133.i, %bb5129.i, %bb5127.i, %bb5125.i, %bb5123.i, %bb5121.i, %bb5119.i, %bb5117.i, %bb5115.i, %bb5113.i, %bb5103.i, %bb4819.i
+	switch i32 0, label %bb5221.i [
+		 i32 0, label %bb5210.i
+		 i32 1, label %bb5211.i
+		 i32 2, label %bb5212.i
+		 i32 3, label %bb5213.i
+		 i32 4, label %bb5214.i
+		 i32 5, label %bb5215.i
+		 i32 6, label %bb5217.i
+		 i32 7, label %bb5216.i
+		 i32 12, label %bb5218.i
+		 i32 13, label %bb5218.i
+		 i32 14, label %bb5219.i
+		 i32 15, label %bb5219.i
+		 i32 16, label %bb5210.i
+		 i32 17, label %bb5210.i
+		 i32 22, label %bb5210.i
+		 i32 23, label %bb5210.i
+	]
+bb5210.i:		; preds = %bb5208.i, %bb5208.i, %bb5208.i, %bb5208.i, %bb5208.i
+	br label %bb5224.i
+bb5211.i:		; preds = %bb5208.i
+	br label %bb5224.i
+bb5212.i:		; preds = %bb5208.i
+	br label %bb5224.i
+bb5213.i:		; preds = %bb5208.i
+	br label %bb5224.i
+bb5214.i:		; preds = %bb5208.i
+	br label %bb5224.i
+bb5215.i:		; preds = %bb5208.i
+	br label %bb5224.i
+bb5216.i:		; preds = %bb5208.i
+	br label %bb5224.i
+bb5217.i:		; preds = %bb5208.i
+	br label %bb5224.i
+bb5218.i:		; preds = %bb5208.i, %bb5208.i
+	br label %bb5224.i
+bb5219.i:		; preds = %bb5208.i, %bb5208.i
+	br label %bb5224.i
+bb5221.i:		; preds = %bb5208.i
+	br label %bb5224.i
+bb5224.i:		; preds = %bb5221.i, %bb5219.i, %bb5218.i, %bb5217.i, %bb5216.i, %bb5215.i, %bb5214.i, %bb5213.i, %bb5212.i, %bb5211.i, %bb5210.i
+	br label %bb5235.i
+bb5235.i:		; preds = %bb5224.i, %bb1065.i.i, %bb1027.i.i, %bb1009.i.i, %bb989.i.i, %bb950.i.i, %bb930.i.i
+	br label %bb5272.i
+bb5272.i:		; preds = %bb5235.i
+	br label %bb5276.i
+bb5276.i:		; preds = %bb19808.i, %bb5272.i
+	br label %bb16607.i
+bb5295.i:		; preds = %bb5295.preheader.i, %storeVecColor_RGB_UI.exit
+	br label %loadVecColor_BGRA_UI8888R.exit
+loadVecColor_BGRA_UI8888R.exit:		; preds = %bb5295.i
+	br i1 false, label %bb5325.i, label %bb5351.i
+bb5325.i:		; preds = %loadVecColor_BGRA_UI8888R.exit
+	br i1 false, label %bb4527.i, label %bb.i
+bb.i:		; preds = %bb5325.i
+	switch i32 0, label %bb4527.i [
+		 i32 4, label %bb4362.i
+		 i32 8, label %bb4448.i
+	]
+bb4362.i:		; preds = %bb.i
+	br i1 false, label %bb4532.i, label %bb5556.i
+bb4448.i:		; preds = %bb.i
+	br label %bb4527.i
+bb4527.i:		; preds = %bb4448.i, %bb.i, %bb5325.i
+	br i1 false, label %bb4532.i, label %bb5556.i
+bb4532.i:		; preds = %bb4527.i, %bb4362.i
+	switch i32 0, label %bb4997.i [
+		 i32 6, label %bb4534.i
+		 i32 7, label %bb4982.i
+	]
+bb4534.i:		; preds = %bb4532.i
+	br i1 false, label %bb4875.i, label %bb4619.i
+bb4619.i:		; preds = %bb4534.i
+	br i1 false, label %bb4875.i, label %bb4663.i
+bb4663.i:		; preds = %bb4619.i
+	br label %bb4855.i
+bb4759.i:		; preds = %bb4855.i
+	br label %bb4855.i
+bb4855.i:		; preds = %bb4759.i, %bb4663.i
+	br i1 false, label %bb4866.i, label %bb4759.i
+bb4866.i:		; preds = %bb4855.i
+	br label %bb4875.i
+bb4875.i:		; preds = %bb4866.i, %bb4619.i, %bb4534.i
+	br i1 false, label %bb4973.i, label %bb4922.i
+bb4922.i:		; preds = %bb4875.i
+	br label %bb4973.i
+bb4973.i:		; preds = %bb4922.i, %bb4875.i
+	br label %bb4982.i
+bb4982.i:		; preds = %bb4973.i, %bb4532.i
+	br label %bb5041.i
+bb4997.i:		; preds = %bb4532.i
+	br label %bb5041.i
+bb5041.i:		; preds = %bb4997.i, %bb4982.i
+	switch i32 0, label %bb5464.i [
+		 i32 0, label %bb5344.i
+		 i32 1, label %bb5374.i
+		 i32 2, label %bb5404.i
+		 i32 3, label %bb5434.i
+		 i32 11, label %bb5263.i
+	]
+bb5263.i:		; preds = %bb5041.i
+	br i1 false, label %bb12038.i, label %bb5467.i
+bb5344.i:		; preds = %bb5041.i
+	br i1 false, label %bb12038.i, label %bb5467.i
+bb5374.i:		; preds = %bb5041.i
+	br i1 false, label %bb12038.i, label %bb5467.i
+bb5404.i:		; preds = %bb5041.i
+	br i1 false, label %bb12038.i, label %bb5467.i
+bb5434.i:		; preds = %bb5041.i
+	br label %bb5464.i
+bb5464.i:		; preds = %bb5434.i, %bb5041.i
+	br i1 false, label %bb12038.i, label %bb5467.i
+bb5467.i:		; preds = %bb5464.i, %bb5404.i, %bb5374.i, %bb5344.i, %bb5263.i
+	switch i32 0, label %bb15866.i [
+		 i32 3, label %bb13016.i
+		 i32 4, label %bb12040.i
+		 i32 8, label %bb12514.i
+		 i32 10, label %bb12903.i
+		 i32 11, label %bb12553.i
+		 i32 16, label %bb12514.i
+	]
+bb5556.i:		; preds = %bb4527.i, %bb4362.i
+	switch i32 0, label %bb8990.i [
+		 i32 3, label %bb6403.i
+		 i32 4, label %bb6924.i
+		 i32 8, label %bb6924.i
+		 i32 10, label %bb6403.i
+		 i32 11, label %bb5882.i
+		 i32 16, label %bb5558.i
+	]
+bb5558.i:		; preds = %bb5556.i
+	br label %bb8990.i
+bb5882.i:		; preds = %bb5556.i
+	switch i32 0, label %bb6387.i [
+		 i32 1, label %bb6332.i
+		 i32 3, label %bb6332.i
+		 i32 4, label %bb6352.i
+		 i32 6, label %bb5884.i
+		 i32 7, label %bb8990.i
+	]
+bb5884.i:		; preds = %bb5882.i
+	br i1 false, label %bb6225.i, label %bb5969.i
+bb5969.i:		; preds = %bb5884.i
+	br i1 false, label %bb6225.i, label %bb6013.i
+bb6013.i:		; preds = %bb5969.i
+	br label %bb6205.i
+bb6109.i:		; preds = %bb6205.i
+	br label %bb6205.i
+bb6205.i:		; preds = %bb6109.i, %bb6013.i
+	br i1 false, label %bb6216.i, label %bb6109.i
+bb6216.i:		; preds = %bb6205.i
+	br label %bb6225.i
+bb6225.i:		; preds = %bb6216.i, %bb5969.i, %bb5884.i
+	br i1 false, label %bb6323.i, label %bb6272.i
+bb6272.i:		; preds = %bb6225.i
+	switch i32 0, label %bb6908.i [
+		 i32 1, label %bb6853.i48
+		 i32 3, label %bb6853.i48
+		 i32 4, label %bb6873.i
+		 i32 6, label %bb6405.i
+		 i32 7, label %bb8990.i
+	]
+bb6323.i:		; preds = %bb6225.i
+	switch i32 0, label %bb6908.i [
+		 i32 1, label %bb6853.i48
+		 i32 3, label %bb6853.i48
+		 i32 4, label %bb6873.i
+		 i32 6, label %bb6405.i
+		 i32 7, label %bb8990.i
+	]
+bb6332.i:		; preds = %bb5882.i, %bb5882.i
+	switch i32 0, label %bb6908.i [
+		 i32 1, label %bb6853.i48
+		 i32 3, label %bb6853.i48
+		 i32 4, label %bb6873.i
+		 i32 6, label %bb6405.i
+		 i32 7, label %bb8990.i
+	]
+bb6352.i:		; preds = %bb5882.i
+	br label %bb6873.i
+bb6387.i:		; preds = %bb5882.i
+	br label %bb6403.i
+bb6403.i:		; preds = %bb6387.i, %bb5556.i, %bb5556.i
+	switch i32 0, label %bb6908.i [
+		 i32 1, label %bb6853.i48
+		 i32 3, label %bb6853.i48
+		 i32 4, label %bb6873.i
+		 i32 6, label %bb6405.i
+		 i32 7, label %bb8990.i
+	]
+bb6405.i:		; preds = %bb6403.i, %bb6332.i, %bb6323.i, %bb6272.i
+	br i1 false, label %bb6746.i, label %bb6490.i
+bb6490.i:		; preds = %bb6405.i
+	br i1 false, label %bb6746.i, label %bb6534.i
+bb6534.i:		; preds = %bb6490.i
+	br label %bb6726.i
+bb6630.i:		; preds = %bb6726.i
+	br label %bb6726.i
+bb6726.i:		; preds = %bb6630.i, %bb6534.i
+	br i1 false, label %bb6737.i, label %bb6630.i
+bb6737.i:		; preds = %bb6726.i
+	br label %bb6746.i
+bb6746.i:		; preds = %bb6737.i, %bb6490.i, %bb6405.i
+	br i1 false, label %bb6844.i, label %bb6793.i
+bb6793.i:		; preds = %bb6746.i
+	br label %bb8990.i
+bb6844.i:		; preds = %bb6746.i
+	br label %bb8990.i
+bb6853.i48:		; preds = %bb6403.i, %bb6403.i, %bb6332.i, %bb6332.i, %bb6323.i, %bb6323.i, %bb6272.i, %bb6272.i
+	br label %bb8990.i
+bb6873.i:		; preds = %bb6403.i, %bb6352.i, %bb6332.i, %bb6323.i, %bb6272.i
+	br label %bb8990.i
+bb6908.i:		; preds = %bb6403.i, %bb6332.i, %bb6323.i, %bb6272.i
+	br label %bb8990.i
+bb6924.i:		; preds = %bb5556.i, %bb5556.i
+	switch i32 0, label %bb8929.i [
+		 i32 1, label %bb8715.i
+		 i32 3, label %bb8715.i
+		 i32 4, label %bb8792.i
+		 i32 6, label %bb6926.i
+		 i32 7, label %bb8990.i
+	]
+bb6926.i:		; preds = %bb6924.i
+	br i1 false, label %bb7267.i, label %bb7011.i
+bb7011.i:		; preds = %bb6926.i
+	br i1 false, label %bb7267.i, label %bb7055.i
+bb7055.i:		; preds = %bb7011.i
+	br label %bb7247.i
+bb7151.i:		; preds = %bb7247.i
+	br label %bb7247.i
+bb7247.i:		; preds = %bb7151.i, %bb7055.i
+	br i1 false, label %bb7258.i, label %bb7151.i
+bb7258.i:		; preds = %bb7247.i
+	br label %bb7267.i
+bb7267.i:		; preds = %bb7258.i, %bb7011.i, %bb6926.i
+	br i1 false, label %bb7365.i, label %bb7314.i
+bb7314.i:		; preds = %bb7267.i
+	br label %bb7365.i
+bb7365.i:		; preds = %bb7314.i, %bb7267.i
+	br i1 false, label %bb7714.i, label %bb7458.i
+bb7458.i:		; preds = %bb7365.i
+	br i1 false, label %bb7714.i, label %bb7502.i
+bb7502.i:		; preds = %bb7458.i
+	br label %bb7694.i
+bb7598.i:		; preds = %bb7694.i
+	br label %bb7694.i
+bb7694.i:		; preds = %bb7598.i, %bb7502.i
+	br i1 false, label %bb7705.i, label %bb7598.i
+bb7705.i:		; preds = %bb7694.i
+	br label %bb7714.i
+bb7714.i:		; preds = %bb7705.i, %bb7458.i, %bb7365.i
+	br i1 false, label %bb7812.i, label %bb7761.i
+bb7761.i:		; preds = %bb7714.i
+	br label %bb7812.i
+bb7812.i:		; preds = %bb7761.i, %bb7714.i
+	br i1 false, label %bb8161.i, label %bb7905.i
+bb7905.i:		; preds = %bb7812.i
+	br i1 false, label %bb8161.i, label %bb7949.i
+bb7949.i:		; preds = %bb7905.i
+	br label %bb8141.i
+bb8045.i:		; preds = %bb8141.i
+	br label %bb8141.i
+bb8141.i:		; preds = %bb8045.i, %bb7949.i
+	br i1 false, label %bb8152.i, label %bb8045.i
+bb8152.i:		; preds = %bb8141.i
+	br label %bb8161.i
+bb8161.i:		; preds = %bb8152.i, %bb7905.i, %bb7812.i
+	br i1 false, label %bb8259.i, label %bb8208.i
+bb8208.i:		; preds = %bb8161.i
+	br label %bb8259.i
+bb8259.i:		; preds = %bb8208.i, %bb8161.i
+	br i1 false, label %bb8608.i, label %bb8352.i
+bb8352.i:		; preds = %bb8259.i
+	br i1 false, label %bb8608.i, label %bb8396.i
+bb8396.i:		; preds = %bb8352.i
+	br label %bb8588.i63
+bb8492.i:		; preds = %bb8588.i63
+	br label %bb8588.i63
+bb8588.i63:		; preds = %bb8492.i, %bb8396.i
+	br i1 false, label %bb8599.i, label %bb8492.i
+bb8599.i:		; preds = %bb8588.i63
+	br label %bb8608.i
+bb8608.i:		; preds = %bb8599.i, %bb8352.i, %bb8259.i
+	br i1 false, label %bb8706.i, label %bb8655.i
+bb8655.i:		; preds = %bb8608.i
+	br label %bb8990.i
+bb8706.i:		; preds = %bb8608.i
+	br label %bb8990.i
+bb8715.i:		; preds = %bb6924.i, %bb6924.i
+	br label %bb8990.i
+bb8792.i:		; preds = %bb6924.i
+	br label %bb8990.i
+bb8929.i:		; preds = %bb6924.i
+	br label %bb8990.i
+bb8990.i:		; preds = %bb8929.i, %bb8792.i, %bb8715.i, %bb8706.i, %bb8655.i, %bb6924.i, %bb6908.i, %bb6873.i, %bb6853.i48, %bb6844.i, %bb6793.i, %bb6403.i, %bb6332.i, %bb6323.i, %bb6272.i, %bb5882.i, %bb5558.i, %bb5556.i
+	switch i32 %sf4083.0.i, label %bb11184.i [
+		 i32 0, label %bb10372.i
+		 i32 1, label %bb10609.i
+		 i32 2, label %bb10811.i
+		 i32 3, label %bb11013.i
+		 i32 4, label %bb8992.i
+		 i32 5, label %bb8992.i
+		 i32 6, label %bb8992.i
+		 i32 7, label %bb8992.i
+		 i32 8, label %bb9195.i
+		 i32 9, label %bb9195.i
+		 i32 10, label %bb9965.i
+		 i32 11, label %bb9585.i
+		 i32 16, label %bb9195.i
+	]
+bb8992.i:		; preds = %bb8990.i, %bb8990.i, %bb8990.i, %bb8990.i
+	switch i32 0, label %bb11184.i [
+		 i32 0, label %bb9075.i
+		 i32 1, label %bb9105.i
+		 i32 2, label %bb9135.i
+		 i32 3, label %bb9165.i
+		 i32 11, label %bb8994.i
+	]
+bb8994.i:		; preds = %bb8992.i
+	br label %bb11247.i
+bb9075.i:		; preds = %bb8992.i
+	br label %bb11247.i
+bb9105.i:		; preds = %bb8992.i
+	br label %bb11247.i
+bb9135.i:		; preds = %bb8992.i
+	br label %bb11247.i
+bb9165.i:		; preds = %bb8992.i
+	br label %bb11247.i
+bb9195.i:		; preds = %bb8990.i, %bb8990.i, %bb8990.i
+	switch i32 0, label %bb11184.i [
+		 i32 0, label %bb9491.i
+		 i32 1, label %bb9521.i
+		 i32 2, label %bb9551.i
+		 i32 3, label %bb9581.i
+		 i32 4, label %bb9197.i
+		 i32 11, label %bb9342.i
+	]
+bb9197.i:		; preds = %bb9195.i
+	br label %bb11247.i
+bb9342.i:		; preds = %bb9195.i
+	br label %bb11247.i
+bb9491.i:		; preds = %bb9195.i
+	br label %bb11247.i
+bb9521.i:		; preds = %bb9195.i
+	br label %bb11247.i
+bb9551.i:		; preds = %bb9195.i
+	br label %bb11247.i
+bb9581.i:		; preds = %bb9195.i
+	br label %bb11247.i
+bb9585.i:		; preds = %bb8990.i
+	switch i32 0, label %bb11184.i [
+		 i32 0, label %bb9879.i
+		 i32 1, label %bb9920.i
+		 i32 2, label %bb9920.i
+		 i32 3, label %bb9924.i
+		 i32 4, label %bb9587.i
+		 i32 8, label %bb9587.i
+	]
+bb9587.i:		; preds = %bb9585.i, %bb9585.i
+	br label %bb11247.i
+bb9879.i:		; preds = %bb9585.i
+	br label %bb11247.i
+bb9920.i:		; preds = %bb9585.i, %bb9585.i
+	br label %bb11247.i
+bb9924.i:		; preds = %bb9585.i
+	br label %bb11247.i
+bb9965.i:		; preds = %bb8990.i
+	switch i32 0, label %bb11184.i [
+		 i32 1, label %bb10368.i
+		 i32 2, label %bb10368.i
+		 i32 3, label %bb10364.i
+		 i32 4, label %bb9967.i
+		 i32 8, label %bb10127.i
+		 i32 11, label %bb10287.i
+	]
+bb9967.i:		; preds = %bb9965.i
+	br label %bb11247.i
+bb10127.i:		; preds = %bb9965.i
+	br label %bb11247.i
+bb10287.i:		; preds = %bb9965.i
+	br label %bb11247.i
+bb10364.i:		; preds = %bb9965.i
+	br label %bb11247.i
+bb10368.i:		; preds = %bb9965.i, %bb9965.i
+	br label %bb11247.i
+bb10372.i:		; preds = %bb8990.i
+	switch i32 0, label %bb11184.i [
+		 i32 1, label %bb10605.i
+		 i32 2, label %bb10605.i
+		 i32 3, label %bb10601.i
+		 i32 4, label %bb10374.i
+		 i32 8, label %bb10449.i
+		 i32 11, label %bb10524.i
+	]
+bb10374.i:		; preds = %bb10372.i
+	br label %bb11247.i
+bb10449.i:		; preds = %bb10372.i
+	br label %bb11247.i
+bb10524.i:		; preds = %bb10372.i
+	br label %bb11247.i
+bb10601.i:		; preds = %bb10372.i
+	br label %bb11247.i
+bb10605.i:		; preds = %bb10372.i, %bb10372.i
+	br label %bb11247.i
+bb10609.i:		; preds = %bb8990.i
+	switch i32 0, label %bb11184.i [
+		 i32 0, label %bb10807.i
+		 i32 2, label %bb10807.i
+		 i32 3, label %bb10803.i
+		 i32 4, label %bb10611.i
+		 i32 8, label %bb10686.i
+		 i32 11, label %bb10761.i
+	]
+bb10611.i:		; preds = %bb10609.i
+	br label %bb11247.i
+bb10686.i:		; preds = %bb10609.i
+	br label %bb11247.i
+bb10761.i:		; preds = %bb10609.i
+	br label %bb11247.i
+bb10803.i:		; preds = %bb10609.i
+	br label %bb11247.i
+bb10807.i:		; preds = %bb10609.i, %bb10609.i
+	br label %bb11247.i
+bb10811.i:		; preds = %bb8990.i
+	switch i32 0, label %bb11184.i [
+		 i32 0, label %bb11009.i
+		 i32 1, label %bb11009.i
+		 i32 3, label %bb11005.i
+		 i32 4, label %bb10813.i
+		 i32 8, label %bb10888.i
+		 i32 11, label %bb10963.i
+	]
+bb10813.i:		; preds = %bb10811.i
+	br label %bb11247.i
+bb10888.i:		; preds = %bb10811.i
+	br label %bb11247.i
+bb10963.i:		; preds = %bb10811.i
+	br label %bb11247.i
+bb11005.i:		; preds = %bb10811.i
+	br label %bb11247.i
+bb11009.i:		; preds = %bb10811.i, %bb10811.i
+	br label %bb11247.i
+bb11013.i:		; preds = %bb8990.i
+	switch i32 0, label %bb11184.i [
+		 i32 0, label %bb11180.i
+		 i32 1, label %bb11180.i
+		 i32 2, label %bb11180.i
+		 i32 4, label %bb11015.i
+		 i32 8, label %bb11090.i
+		 i32 11, label %bb11103.i
+	]
+bb11015.i:		; preds = %bb11013.i
+	br label %bb11247.i
+bb11090.i:		; preds = %bb11013.i
+	br label %bb11247.i
+bb11103.i:		; preds = %bb11013.i
+	br label %bb11247.i
+bb11180.i:		; preds = %bb11013.i, %bb11013.i, %bb11013.i
+	br label %bb11184.i
+bb11184.i:		; preds = %bb11180.i, %bb11013.i, %bb10811.i, %bb10609.i, %bb10372.i, %bb9965.i, %bb9585.i, %bb9195.i, %bb8992.i, %bb8990.i
+	br label %bb11247.i
+bb11247.i:		; preds = %bb11184.i, %bb11103.i, %bb11090.i, %bb11015.i, %bb11009.i, %bb11005.i, %bb10963.i, %bb10888.i, %bb10813.i, %bb10807.i, %bb10803.i, %bb10761.i, %bb10686.i, %bb10611.i, %bb10605.i, %bb10601.i, %bb10524.i, %bb10449.i, %bb10374.i, %bb10368.i, %bb10364.i, %bb10287.i, %bb10127.i, %bb9967.i, %bb9924.i, %bb9920.i, %bb9879.i, %bb9587.i, %bb9581.i, %bb9551.i, %bb9521.i, %bb9491.i, %bb9342.i, %bb9197.i, %bb9165.i, %bb9135.i, %bb9105.i, %bb9075.i, %bb8994.i
+	br i1 false, label %bb11250.i, label %bb11256.i
+bb11250.i:		; preds = %bb11247.i
+	br label %bb11378.i
+bb11256.i:		; preds = %bb11247.i
+	switch i32 0, label %bb11348.i [
+		 i32 4, label %bb11258.i
+		 i32 8, label %bb11258.i
+		 i32 11, label %bb11318.i
+	]
+bb11258.i:		; preds = %bb11256.i, %bb11256.i
+	br i1 false, label %bb11273.i, label %bb11261.i
+bb11261.i:		; preds = %bb11258.i
+	br label %bb11273.i
+bb11273.i:		; preds = %bb11261.i, %bb11258.i
+	br i1 false, label %bb11288.i, label %bb11276.i
+bb11276.i:		; preds = %bb11273.i
+	br label %bb11288.i
+bb11288.i:		; preds = %bb11276.i, %bb11273.i
+	br i1 false, label %bb11303.i, label %bb11291.i
+bb11291.i:		; preds = %bb11288.i
+	br label %bb11303.i
+bb11303.i:		; preds = %bb11291.i, %bb11288.i
+	br i1 false, label %bb11318.i, label %bb11306.i
+bb11306.i:		; preds = %bb11303.i
+	br label %bb11318.i
+bb11318.i:		; preds = %bb11306.i, %bb11303.i, %bb11256.i
+	br i1 false, label %bb11333.i, label %bb11321.i
+bb11321.i:		; preds = %bb11318.i
+	br label %bb11333.i
+bb11333.i:		; preds = %bb11321.i, %bb11318.i
+	br i1 false, label %bb11348.i, label %bb11336.i
+bb11336.i:		; preds = %bb11333.i
+	br label %bb11348.i
+bb11348.i:		; preds = %bb11336.i, %bb11333.i, %bb11256.i
+	br i1 false, label %bb11363.i, label %bb11351.i
+bb11351.i:		; preds = %bb11348.i
+	br label %bb11363.i
+bb11363.i:		; preds = %bb11351.i, %bb11348.i
+	br i1 false, label %bb11378.i, label %bb11366.i
+bb11366.i:		; preds = %bb11363.i
+	br label %bb11378.i
+bb11378.i:		; preds = %bb11366.i, %bb11363.i, %bb11250.i
+	br label %bb12038.i
+bb12038.i:		; preds = %bb11378.i, %bb5464.i, %bb5404.i, %bb5374.i, %bb5344.i, %bb5263.i
+	switch i32 0, label %bb15866.i [
+		 i32 3, label %bb13016.i
+		 i32 4, label %bb12040.i
+		 i32 8, label %bb12514.i
+		 i32 10, label %bb12903.i
+		 i32 11, label %bb12553.i
+		 i32 16, label %bb12514.i
+	]
+bb12040.i:		; preds = %bb12038.i, %bb5467.i
+	br label %bb13026.i
+bb12514.i:		; preds = %bb12038.i, %bb12038.i, %bb5467.i, %bb5467.i
+	br label %bb13026.i
+bb12553.i:		; preds = %bb12038.i, %bb5467.i
+	br i1 false, label %bb12558.i, label %bb12747.i
+bb12558.i:		; preds = %bb12553.i
+	br i1 false, label %bb12666.i, label %bb12654.i
+bb12654.i:		; preds = %bb12558.i
+	br label %bb12666.i
+bb12666.i:		; preds = %bb12654.i, %bb12558.i
+	br label %bb12747.i
+bb12747.i:		; preds = %bb12666.i, %bb12553.i
+	br label %bb13026.i
+bb12903.i:		; preds = %bb12038.i, %bb5467.i
+	br i1 false, label %bb12908.i, label %bb13026.i
+bb12908.i:		; preds = %bb12903.i
+	br i1 false, label %bb13026.i, label %bb13004.i
+bb13004.i:		; preds = %bb12908.i
+	switch i32 0, label %bb15866.i [
+		 i32 3, label %bb13752.i
+		 i32 4, label %bb14197.i
+		 i32 8, label %bb14197.i
+		 i32 10, label %bb13752.i
+		 i32 11, label %bb13307.i
+		 i32 16, label %bb13028.i
+	]
+bb13016.i:		; preds = %bb12038.i, %bb5467.i
+	br label %bb13026.i
+bb13026.i:		; preds = %bb13016.i, %bb12908.i, %bb12903.i, %bb12747.i, %bb12514.i, %bb12040.i
+	switch i32 0, label %bb15866.i [
+		 i32 3, label %bb13752.i
+		 i32 4, label %bb14197.i
+		 i32 8, label %bb14197.i
+		 i32 10, label %bb13752.i
+		 i32 11, label %bb13307.i
+		 i32 16, label %bb13028.i
+	]
+bb13028.i:		; preds = %bb13026.i, %bb13004.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb13307.i:		; preds = %bb13026.i, %bb13004.i
+	switch i32 %dt4080.0.i, label %bb13736.i [
+		 i32 6, label %bb13312.i
+		 i32 1, label %bb13624.i
+		 i32 3, label %bb13624.i
+		 i32 5, label %bb13649.i
+		 i32 4, label %bb13688.i
+		 i32 7, label %bb15866.i
+	]
+bb13312.i:		; preds = %bb13307.i
+	br i1 false, label %bb13483.i, label %bb13400.i
+bb13400.i:		; preds = %bb13312.i
+	br label %bb13483.i
+bb13483.i:		; preds = %bb13400.i, %bb13312.i
+	br i1 false, label %bb13593.i, label %bb13505.i
+bb13505.i:		; preds = %bb13483.i
+	switch i32 %dt4080.0.i, label %bb14181.i [
+		 i32 6, label %bb13757.i
+		 i32 1, label %bb14069.i
+		 i32 3, label %bb14069.i
+		 i32 5, label %bb14094.i
+		 i32 4, label %bb14133.i
+		 i32 7, label %bb15866.i
+	]
+bb13593.i:		; preds = %bb13483.i
+	switch i32 %dt4080.0.i, label %bb14181.i [
+		 i32 6, label %bb13757.i
+		 i32 1, label %bb14069.i
+		 i32 3, label %bb14069.i
+		 i32 5, label %bb14094.i
+		 i32 4, label %bb14133.i
+		 i32 7, label %bb15866.i
+	]
+bb13624.i:		; preds = %bb13307.i, %bb13307.i
+	switch i32 %dt4080.0.i, label %bb14181.i [
+		 i32 6, label %bb13757.i
+		 i32 1, label %bb14069.i
+		 i32 3, label %bb14069.i
+		 i32 5, label %bb14094.i
+		 i32 4, label %bb14133.i
+		 i32 7, label %bb15866.i
+	]
+bb13649.i:		; preds = %bb13307.i
+	br label %bb14094.i
+bb13688.i:		; preds = %bb13307.i
+	br label %bb14133.i
+bb13736.i:		; preds = %bb13307.i
+	br label %bb13752.i
+bb13752.i:		; preds = %bb13736.i, %bb13026.i, %bb13026.i, %bb13004.i, %bb13004.i
+	switch i32 %dt4080.0.i, label %bb14181.i [
+		 i32 6, label %bb13757.i
+		 i32 1, label %bb14069.i
+		 i32 3, label %bb14069.i
+		 i32 5, label %bb14094.i
+		 i32 4, label %bb14133.i
+		 i32 7, label %bb15866.i
+	]
+bb13757.i:		; preds = %bb13752.i, %bb13624.i, %bb13593.i, %bb13505.i
+	br i1 false, label %bb13928.i, label %bb13845.i
+bb13845.i:		; preds = %bb13757.i
+	br label %bb13928.i
+bb13928.i:		; preds = %bb13845.i, %bb13757.i
+	br i1 false, label %bb14038.i, label %bb13950.i
+bb13950.i:		; preds = %bb13928.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14038.i:		; preds = %bb13928.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14069.i:		; preds = %bb13752.i, %bb13752.i, %bb13624.i, %bb13624.i, %bb13593.i, %bb13593.i, %bb13505.i, %bb13505.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14094.i:		; preds = %bb13752.i, %bb13649.i, %bb13624.i, %bb13593.i, %bb13505.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14133.i:		; preds = %bb13752.i, %bb13688.i, %bb13624.i, %bb13593.i, %bb13505.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14181.i:		; preds = %bb13752.i, %bb13624.i, %bb13593.i, %bb13505.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb14197.i:		; preds = %bb13026.i, %bb13026.i, %bb13004.i, %bb13004.i
+	switch i32 %dt4080.0.i, label %bb15805.i [
+		 i32 6, label %bb14202.i
+		 i32 1, label %bb15411.i
+		 i32 3, label %bb15411.i
+		 i32 5, label %bb15493.i
+		 i32 4, label %bb15631.i
+		 i32 7, label %bb15866.i
+	]
+bb14202.i:		; preds = %bb14197.i
+	br i1 false, label %bb14373.i, label %bb14290.i
+bb14290.i:		; preds = %bb14202.i
+	br label %bb14373.i
+bb14373.i:		; preds = %bb14290.i, %bb14202.i
+	br i1 false, label %bb14483.i, label %bb14395.i
+bb14395.i:		; preds = %bb14373.i
+	br label %bb14483.i
+bb14483.i:		; preds = %bb14395.i, %bb14373.i
+	br i1 false, label %bb14672.i, label %bb14589.i
+bb14589.i:		; preds = %bb14483.i
+	br label %bb14672.i
+bb14672.i:		; preds = %bb14589.i, %bb14483.i
+	br i1 false, label %bb14782.i, label %bb14694.i
+bb14694.i:		; preds = %bb14672.i
+	br label %bb14782.i
+bb14782.i:		; preds = %bb14694.i, %bb14672.i
+	br i1 false, label %bb14971.i, label %bb14888.i
+bb14888.i:		; preds = %bb14782.i
+	br label %bb14971.i
+bb14971.i:		; preds = %bb14888.i, %bb14782.i
+	br i1 false, label %bb15081.i, label %bb14993.i
+bb14993.i:		; preds = %bb14971.i
+	br label %bb15081.i
+bb15081.i:		; preds = %bb14993.i, %bb14971.i
+	br i1 false, label %bb15270.i, label %bb15187.i
+bb15187.i:		; preds = %bb15081.i
+	br label %bb15270.i
+bb15270.i:		; preds = %bb15187.i, %bb15081.i
+	br i1 false, label %bb15380.i, label %bb15292.i
+bb15292.i:		; preds = %bb15270.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15380.i:		; preds = %bb15270.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15411.i:		; preds = %bb14197.i, %bb14197.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15493.i:		; preds = %bb14197.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15631.i:		; preds = %bb14197.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15805.i:		; preds = %bb14197.i
+	br label %bb15866.i
+bb15866.i:		; preds = %bb15805.i, %bb14197.i, %bb13752.i, %bb13624.i, %bb13593.i, %bb13505.i, %bb13307.i, %bb13026.i, %bb13004.i, %bb12038.i, %bb5467.i
+	br i1 false, label %UnifiedReturnBlock.i177, label %bb15869.i
+bb15869.i:		; preds = %bb15866.i, %bb15631.i, %bb15493.i, %bb15411.i, %bb15380.i, %bb15292.i, %bb14181.i, %bb14133.i, %bb14094.i, %bb14069.i, %bb14038.i, %bb13950.i, %bb13028.i
+	switch i32 0, label %UnifiedReturnBlock.i177 [
+		 i32 4, label %bb15874.i
+		 i32 8, label %bb15960.i
+	]
+bb15874.i:		; preds = %bb15869.i
+	br label %glgVectorFloatConversion.exit
+bb15960.i:		; preds = %bb15869.i
+	br label %glgVectorFloatConversion.exit
+UnifiedReturnBlock.i177:		; preds = %bb15869.i, %bb15866.i, %bb15631.i, %bb15493.i, %bb15411.i, %bb15380.i, %bb15292.i, %bb14181.i, %bb14133.i, %bb14094.i, %bb14069.i, %bb14038.i, %bb13950.i, %bb13028.i
+	br label %glgVectorFloatConversion.exit
+glgVectorFloatConversion.exit:		; preds = %UnifiedReturnBlock.i177, %bb15960.i, %bb15874.i
+	br label %bb16581.i
+bb5351.i:		; preds = %loadVecColor_BGRA_UI8888R.exit
+	br i1 false, label %bb5359.i, label %bb5586.i
+bb5359.i:		; preds = %bb5351.i
+	switch i32 0, label %bb5586.i [
+		 i32 0, label %bb5361.i
+		 i32 1, label %bb5511.i
+		 i32 2, label %bb5511.i
+	]
+bb5361.i:		; preds = %bb5359.i
+	br i1 false, label %bb5366.i, label %bb5379.i
+bb5366.i:		; preds = %bb5361.i
+	br label %bb7230.i
+bb5379.i:		; preds = %bb5361.i
+	switch i32 %sf4083.0.i, label %bb5415.i [
+		 i32 1, label %bb5384.i
+		 i32 2, label %bb5402.i
+	]
+bb5384.i:		; preds = %bb5379.i
+	switch i32 0, label %bb7230.i [
+		 i32 4, label %bb5445.i
+		 i32 8, label %bb5445.i
+		 i32 11, label %bb5445.i
+	]
+bb5402.i:		; preds = %bb5379.i
+	switch i32 0, label %bb7230.i [
+		 i32 4, label %bb5445.i
+		 i32 8, label %bb5445.i
+		 i32 11, label %bb5445.i
+	]
+bb5415.i:		; preds = %bb5379.i
+	switch i32 0, label %bb7230.i [
+		 i32 4, label %bb5445.i
+		 i32 8, label %bb5445.i
+		 i32 11, label %bb5445.i
+	]
+bb5445.i:		; preds = %bb5415.i, %bb5415.i, %bb5415.i, %bb5402.i, %bb5402.i, %bb5402.i, %bb5384.i, %bb5384.i, %bb5384.i
+	switch i32 0, label %bb7230.i [
+		 i32 4, label %bb5470.i
+		 i32 8, label %bb5470.i
+		 i32 11, label %bb6853.i
+	]
+bb5470.i:		; preds = %bb5445.i, %bb5445.i
+	switch i32 0, label %bb7230.i [
+		 i32 4, label %bb5498.i
+		 i32 8, label %bb5493.i
+		 i32 11, label %bb6853.i
+	]
+bb5493.i:		; preds = %bb5470.i
+	br i1 false, label %bb5498.i, label %bb5586.i
+bb5498.i:		; preds = %bb5493.i, %bb5470.i
+	switch i32 0, label %bb7230.i [
+		 i32 4, label %bb5591.i
+		 i32 8, label %bb6153.i
+		 i32 11, label %bb6853.i
+	]
+bb5511.i:		; preds = %bb5359.i, %bb5359.i
+	br i1 false, label %bb5568.i, label %bb5586.i
+bb5568.i:		; preds = %bb5511.i
+	br label %bb5586.i
+bb5586.i:		; preds = %bb5568.i, %bb5511.i, %bb5493.i, %bb5359.i, %bb5351.i
+	switch i32 0, label %bb7230.i [
+		 i32 4, label %bb5591.i
+		 i32 8, label %bb6153.i
+		 i32 11, label %bb6853.i
+	]
+bb5591.i:		; preds = %bb5586.i, %bb5498.i
+	switch i32 0, label %bb5995.i [
+		 i32 4, label %bb5596.i
+		 i32 8, label %bb5680.i
+		 i32 11, label %bb5842.i
+	]
+bb5596.i:		; preds = %bb5591.i
+	br i1 false, label %bb8428.i, label %bb5602.i
+bb5602.i:		; preds = %bb5596.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb5680.i:		; preds = %bb5591.i
+	br i1 false, label %bb5692.i, label %bb5764.i
+bb5692.i:		; preds = %bb5680.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb5764.i:		; preds = %bb5680.i
+	br i1 false, label %bb8428.i, label %bb5772.i
+bb5772.i:		; preds = %bb5764.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb5842.i:		; preds = %bb5591.i
+	br i1 false, label %bb5920.i, label %bb5845.i
+bb5845.i:		; preds = %bb5842.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb5920.i:		; preds = %bb5842.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb5995.i:		; preds = %bb5591.i
+	switch i32 %df4081.0.i, label %bb8428.i [
+		 i32 0, label %bb6007.i
+		 i32 10, label %bb6007.i
+		 i32 1, label %bb6042.i
+		 i32 2, label %bb6079.i
+		 i32 3, label %bb6116.i
+	]
+bb6007.i:		; preds = %bb5995.i, %bb5995.i
+	br i1 false, label %bb6012.i, label %bb8428.i
+bb6012.i:		; preds = %bb6007.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6042.i:		; preds = %bb5995.i
+	br i1 false, label %bb6049.i, label %bb6045.i
+bb6045.i:		; preds = %bb6042.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6049.i:		; preds = %bb6042.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6079.i:		; preds = %bb5995.i
+	br i1 false, label %bb6086.i, label %bb6082.i
+bb6082.i:		; preds = %bb6079.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6086.i:		; preds = %bb6079.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6116.i:		; preds = %bb5995.i
+	br i1 false, label %bb6123.i, label %bb6119.i
+bb6119.i:		; preds = %bb6116.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6123.i:		; preds = %bb6116.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6153.i:		; preds = %bb5586.i, %bb5498.i
+	switch i32 0, label %bb6724.i [
+		 i32 4, label %bb6158.i
+		 i32 8, label %bb6459.i
+		 i32 11, label %bb6621.i
+	]
+bb6158.i:		; preds = %bb6153.i
+	br i1 false, label %bb6242.i, label %bb6161.i
+bb6161.i:		; preds = %bb6158.i
+	br i1 false, label %bb6239.i, label %bb6166.i
+bb6166.i:		; preds = %bb6161.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6239.i:		; preds = %bb6161.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6242.i:		; preds = %bb6158.i
+	br i1 false, label %bb6245.i, label %bb6317.i
+bb6245.i:		; preds = %bb6242.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6317.i:		; preds = %bb6242.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6459.i:		; preds = %bb6153.i
+	br i1 false, label %bb6471.i, label %bb6543.i
+bb6471.i:		; preds = %bb6459.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6543.i:		; preds = %bb6459.i
+	br i1 false, label %bb8428.i, label %bb6551.i
+bb6551.i:		; preds = %bb6543.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6621.i:		; preds = %bb6153.i
+	br i1 false, label %bb6626.i, label %bb6651.i
+bb6626.i:		; preds = %bb6621.i
+	br label %bb6651.i
+bb6651.i:		; preds = %bb6626.i, %bb6621.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6724.i:		; preds = %bb6153.i
+	switch i32 %df4081.0.i, label %bb8428.i [
+		 i32 0, label %bb6736.i
+		 i32 10, label %bb6736.i
+		 i32 1, label %bb6771.i
+		 i32 2, label %bb6808.i
+		 i32 3, label %bb6845.i
+	]
+bb6736.i:		; preds = %bb6724.i, %bb6724.i
+	br i1 false, label %bb6741.i, label %bb8428.i
+bb6741.i:		; preds = %bb6736.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6771.i:		; preds = %bb6724.i
+	br i1 false, label %bb6778.i, label %bb6774.i
+bb6774.i:		; preds = %bb6771.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6778.i:		; preds = %bb6771.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6808.i:		; preds = %bb6724.i
+	br i1 false, label %bb6815.i, label %bb6811.i
+bb6811.i:		; preds = %bb6808.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6815.i:		; preds = %bb6808.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6845.i:		; preds = %bb6724.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6853.i:		; preds = %bb5586.i, %bb5498.i, %bb5470.i, %bb5445.i
+	switch i32 0, label %bb8428.i [
+		 i32 4, label %bb6858.i
+		 i32 8, label %bb7072.i
+		 i32 10, label %bb7149.i
+		 i32 3, label %bb7192.i
+	]
+bb6858.i:		; preds = %bb6853.i
+	br i1 false, label %bb6942.i, label %bb6861.i
+bb6861.i:		; preds = %bb6858.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb6942.i:		; preds = %bb6858.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7072.i:		; preds = %bb6853.i
+	br i1 false, label %bb7119.i, label %bb7075.i
+bb7075.i:		; preds = %bb7072.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7119.i:		; preds = %bb7072.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7149.i:		; preds = %bb6853.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7192.i:		; preds = %bb6853.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7230.i:		; preds = %bb5586.i, %bb5498.i, %bb5470.i, %bb5445.i, %bb5415.i, %bb5402.i, %bb5384.i, %bb5366.i
+	switch i32 %sf4083.0.i, label %bb8428.i [
+		 i32 10, label %bb7235.i
+		 i32 0, label %bb7455.i
+		 i32 1, label %bb7725.i
+		 i32 2, label %bb7978.i
+		 i32 3, label %bb8231.i
+	]
+bb7235.i:		; preds = %bb7230.i
+	switch i32 0, label %bb7442.i [
+		 i32 4, label %bb7240.i
+		 i32 8, label %bb7329.i
+		 i32 11, label %bb7369.i
+	]
+bb7240.i:		; preds = %bb7235.i
+	br i1 false, label %bb7252.i, label %bb7243.i
+bb7243.i:		; preds = %bb7240.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7252.i:		; preds = %bb7240.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7329.i:		; preds = %bb7235.i
+	br i1 false, label %bb7339.i, label %bb7332.i
+bb7332.i:		; preds = %bb7329.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7339.i:		; preds = %bb7329.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7369.i:		; preds = %bb7235.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7442.i:		; preds = %bb7235.i
+	br i1 false, label %bb7447.i, label %bb8428.i
+bb7447.i:		; preds = %bb7442.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7455.i:		; preds = %bb7230.i
+	switch i32 0, label %bb7703.i [
+		 i32 4, label %bb7460.i
+		 i32 8, label %bb7546.i
+		 i32 11, label %bb7630.i
+	]
+bb7460.i:		; preds = %bb7455.i
+	br i1 false, label %bb7471.i, label %bb7463.i
+bb7463.i:		; preds = %bb7460.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7471.i:		; preds = %bb7460.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7546.i:		; preds = %bb7455.i
+	br i1 false, label %bb7555.i, label %bb7549.i
+bb7549.i:		; preds = %bb7546.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7555.i:		; preds = %bb7546.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7630.i:		; preds = %bb7455.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7703.i:		; preds = %bb7455.i
+	br i1 false, label %bb7709.i, label %bb7712.i
+bb7709.i:		; preds = %bb7703.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7712.i:		; preds = %bb7703.i
+	br i1 false, label %bb7717.i, label %bb8428.i
+bb7717.i:		; preds = %bb7712.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7725.i:		; preds = %bb7230.i
+	switch i32 0, label %bb7945.i [
+		 i32 4, label %bb7730.i
+		 i32 8, label %bb7819.i
+		 i32 11, label %bb7906.i
+	]
+bb7730.i:		; preds = %bb7725.i
+	br i1 false, label %bb7744.i, label %bb7733.i
+bb7733.i:		; preds = %bb7730.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7744.i:		; preds = %bb7730.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7819.i:		; preds = %bb7725.i
+	br i1 false, label %bb7831.i, label %bb7822.i
+bb7822.i:		; preds = %bb7819.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7831.i:		; preds = %bb7819.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7906.i:		; preds = %bb7725.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7945.i:		; preds = %bb7725.i
+	switch i32 %df4081.0.i, label %bb8428.i [
+		 i32 0, label %bb7962.i
+		 i32 2, label %bb7962.i
+		 i32 10, label %bb7962.i
+		 i32 3, label %bb7970.i
+	]
+bb7962.i:		; preds = %bb7945.i, %bb7945.i, %bb7945.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7970.i:		; preds = %bb7945.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7978.i:		; preds = %bb7230.i
+	switch i32 0, label %bb8198.i [
+		 i32 4, label %bb7983.i
+		 i32 8, label %bb8072.i
+		 i32 11, label %bb8159.i
+	]
+bb7983.i:		; preds = %bb7978.i
+	br i1 false, label %bb7997.i, label %bb7986.i
+bb7986.i:		; preds = %bb7983.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb7997.i:		; preds = %bb7983.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8072.i:		; preds = %bb7978.i
+	br i1 false, label %bb8084.i, label %bb8075.i
+bb8075.i:		; preds = %bb8072.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8084.i:		; preds = %bb8072.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8159.i:		; preds = %bb7978.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8198.i:		; preds = %bb7978.i
+	switch i32 %df4081.0.i, label %bb8428.i [
+		 i32 0, label %bb8215.i
+		 i32 1, label %bb8215.i
+		 i32 10, label %bb8215.i
+		 i32 3, label %bb8223.i
+	]
+bb8215.i:		; preds = %bb8198.i, %bb8198.i, %bb8198.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8223.i:		; preds = %bb8198.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8231.i:		; preds = %bb7230.i
+	switch i32 0, label %bb8428.i [
+		 i32 4, label %bb8236.i
+		 i32 8, label %bb8326.i
+		 i32 11, label %bb8347.i
+		 i32 10, label %bb8425.i
+	]
+bb8236.i:		; preds = %bb8231.i
+	br i1 false, label %bb8251.i, label %bb8239.i
+bb8239.i:		; preds = %bb8236.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8251.i:		; preds = %bb8236.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8326.i:		; preds = %bb8231.i
+	br i1 false, label %bb8339.i, label %bb8428.i
+bb8339.i:		; preds = %bb8326.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8347.i:		; preds = %bb8231.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8425.i:		; preds = %bb8231.i
+	br label %bb8428.i
+bb8428.i:		; preds = %bb8425.i, %bb8326.i, %bb8231.i, %bb8198.i, %bb7945.i, %bb7712.i, %bb7442.i, %bb7230.i, %bb6853.i, %bb6736.i, %bb6724.i, %bb6543.i, %bb6007.i, %bb5995.i, %bb5764.i, %bb5596.i
+	br i1 false, label %bb8668.i, label %bb8434.i
+bb8434.i:		; preds = %bb8428.i, %bb8347.i, %bb8339.i, %bb8251.i, %bb8239.i, %bb8223.i, %bb8215.i, %bb8159.i, %bb8084.i, %bb8075.i, %bb7997.i, %bb7986.i, %bb7970.i, %bb7962.i, %bb7906.i, %bb7831.i, %bb7822.i, %bb7744.i, %bb7733.i, %bb7717.i, %bb7709.i, %bb7630.i, %bb7555.i, %bb7549.i, %bb7471.i, %bb7463.i, %bb7447.i, %bb7369.i, %bb7339.i, %bb7332.i, %bb7252.i, %bb7243.i, %bb7192.i, %bb7149.i, %bb7119.i, %bb7075.i, %bb6942.i, %bb6861.i, %bb6845.i, %bb6815.i, %bb6811.i, %bb6778.i, %bb6774.i, %bb6741.i, %bb6651.i, %bb6551.i, %bb6471.i, %bb6317.i, %bb6245.i, %bb6239.i, %bb6166.i, %bb6123.i, %bb6119.i, %bb6086.i, %bb6082.i, %bb6049.i, %bb6045.i, %bb6012.i, %bb5920.i, %bb5845.i, %bb5772.i, %bb5692.i, %bb5602.i
+	switch i32 0, label %bb8668.i [
+		 i32 0, label %bb8436.i
+		 i32 1, label %bb8531.i
+		 i32 2, label %bb8531.i
+	]
+bb8436.i:		; preds = %bb8434.i
+	switch i32 0, label %bb9310.i [
+		 i32 4, label %bb8465.i
+		 i32 8, label %bb8465.i
+		 i32 11, label %bb8465.i
+		 i32 3, label %bb9301.i
+	]
+bb8465.i:		; preds = %bb8436.i, %bb8436.i, %bb8436.i
+	switch i32 0, label %bb9310.i [
+		 i32 4, label %bb8490.i
+		 i32 8, label %bb8490.i
+		 i32 3, label %bb9301.i
+		 i32 11, label %bb9153.i
+	]
+bb8490.i:		; preds = %bb8465.i, %bb8465.i
+	switch i32 0, label %bb9310.i [
+		 i32 4, label %bb8518.i
+		 i32 8, label %bb8513.i
+		 i32 3, label %bb9301.i
+		 i32 11, label %bb9153.i
+	]
+bb8513.i:		; preds = %bb8490.i
+	br i1 false, label %bb8518.i, label %bb8668.i
+bb8518.i:		; preds = %bb8513.i, %bb8490.i
+	switch i32 0, label %bb9310.i [
+		 i32 3, label %bb9301.i
+		 i32 4, label %bb8670.i
+		 i32 8, label %bb9112.i
+		 i32 11, label %bb9153.i
+	]
+bb8531.i:		; preds = %bb8434.i, %bb8434.i
+	br i1 false, label %bb8536.i, label %bb8575.i
+bb8536.i:		; preds = %bb8531.i
+	br i1 false, label %bb8557.i, label %bb8588.i
+bb8557.i:		; preds = %bb8536.i
+	switch i32 0, label %bb9310.i [
+		 i32 4, label %bb8600.i
+		 i32 8, label %bb8600.i
+		 i32 3, label %bb9301.i
+		 i32 11, label %bb9153.i
+	]
+bb8575.i:		; preds = %bb8531.i
+	br label %bb8588.i
+bb8588.i:		; preds = %bb8575.i, %bb8536.i
+	switch i32 0, label %bb9310.i [
+		 i32 4, label %bb8600.i
+		 i32 8, label %bb8600.i
+		 i32 3, label %bb9301.i
+		 i32 11, label %bb9153.i
+	]
+bb8600.i:		; preds = %bb8588.i, %bb8588.i, %bb8557.i, %bb8557.i
+	switch i32 0, label %bb9310.i [
+		 i32 4, label %bb8629.i
+		 i32 3, label %bb9301.i
+		 i32 8, label %bb9112.i
+		 i32 11, label %bb9153.i
+	]
+bb8629.i:		; preds = %bb8600.i
+	br i1 false, label %bb8650.i, label %bb8668.i
+bb8650.i:		; preds = %bb8629.i
+	br label %bb8668.i
+bb8668.i:		; preds = %bb8650.i, %bb8629.i, %bb8513.i, %bb8434.i, %bb8428.i, %bb8347.i, %bb8339.i, %bb8251.i, %bb8239.i, %bb8223.i, %bb8215.i, %bb8159.i, %bb8084.i, %bb8075.i, %bb7997.i, %bb7986.i, %bb7970.i, %bb7962.i, %bb7906.i, %bb7831.i, %bb7822.i, %bb7744.i, %bb7733.i, %bb7717.i, %bb7709.i, %bb7630.i, %bb7555.i, %bb7549.i, %bb7471.i, %bb7463.i, %bb7447.i, %bb7369.i, %bb7339.i, %bb7332.i, %bb7252.i, %bb7243.i, %bb7192.i, %bb7149.i, %bb7119.i, %bb7075.i, %bb6942.i, %bb6861.i, %bb6845.i, %bb6815.i, %bb6811.i, %bb6778.i, %bb6774.i, %bb6741.i, %bb6651.i, %bb6551.i, %bb6471.i, %bb6317.i, %bb6245.i, %bb6239.i, %bb6166.i, %bb6123.i, %bb6119.i, %bb6086.i, %bb6082.i, %bb6049.i, %bb6045.i, %bb6012.i, %bb5920.i, %bb5845.i, %bb5772.i, %bb5692.i, %bb5602.i
+	switch i32 0, label %bb9310.i [
+		 i32 3, label %bb9301.i
+		 i32 4, label %bb8670.i
+		 i32 8, label %bb9112.i
+		 i32 11, label %bb9153.i
+	]
+bb8670.i:		; preds = %bb8668.i, %bb8518.i
+	br label %bb9310.i
+bb9112.i:		; preds = %bb8668.i, %bb8600.i, %bb8518.i
+	br label %bb9310.i
+bb9153.i:		; preds = %bb8668.i, %bb8600.i, %bb8588.i, %bb8557.i, %bb8518.i, %bb8490.i, %bb8465.i
+	br label %bb9310.i
+bb9301.i:		; preds = %bb8668.i, %bb8600.i, %bb8588.i, %bb8557.i, %bb8518.i, %bb8490.i, %bb8465.i, %bb8436.i
+	br label %bb9310.i
+bb9310.i:		; preds = %bb9301.i, %bb9153.i, %bb9112.i, %bb8670.i, %bb8668.i, %bb8600.i, %bb8588.i, %bb8557.i, %bb8518.i, %bb8490.i, %bb8465.i, %bb8436.i
+	br i1 false, label %bb16581.i, label %bb9313.i
+bb9313.i:		; preds = %bb9310.i
+	switch i32 %dt4080.0.i, label %bb16578.i [
+		 i32 0, label %bb9315.i
+		 i32 1, label %bb9890.i
+		 i32 2, label %bb10465.i
+		 i32 3, label %bb11040.i
+		 i32 4, label %bb11615.i
+		 i32 5, label %bb11823.i
+		 i32 8, label %bb12398.i
+		 i32 9, label %bb12833.i
+		 i32 10, label %bb13268.i
+		 i32 11, label %bb13268.i
+		 i32 12, label %bb13703.i
+		 i32 13, label %bb13703.i
+		 i32 14, label %bb14278.i
+		 i32 15, label %bb14853.i
+		 i32 16, label %bb9315.i
+		 i32 17, label %bb9315.i
+		 i32 18, label %bb15428.i
+		 i32 19, label %bb16003.i
+	]
+bb9315.i:		; preds = %bb9313.i, %bb9313.i, %bb9313.i
+	br i1 false, label %bb9535.i, label %bb9323.i
+bb9323.i:		; preds = %bb9315.i
+	br label %bb9535.i
+bb9535.i:		; preds = %bb9323.i, %bb9315.i
+	br label %bb16581.i
+bb9890.i:		; preds = %bb9313.i
+	br i1 false, label %bb10255.i, label %bb9898.i
+bb9898.i:		; preds = %bb9890.i
+	br label %bb10255.i
+bb10255.i:		; preds = %bb9898.i, %bb9890.i
+	br label %bb16581.i
+bb10465.i:		; preds = %bb9313.i
+	br i1 false, label %bb10685.i, label %bb10473.i
+bb10473.i:		; preds = %bb10465.i
+	br label %bb10685.i
+bb10685.i:		; preds = %bb10473.i, %bb10465.i
+	br label %bb16581.i
+bb11040.i:		; preds = %bb9313.i
+	br i1 false, label %bb11405.i, label %bb11048.i
+bb11048.i:		; preds = %bb11040.i
+	br label %bb11405.i
+bb11405.i:		; preds = %bb11048.i, %bb11040.i
+	br label %bb16581.i
+bb11615.i:		; preds = %bb9313.i
+	br i1 false, label %bb16581.i, label %bb11618.i
+bb11618.i:		; preds = %bb11615.i
+	br label %bb16581.i
+bb11823.i:		; preds = %bb9313.i
+	br i1 false, label %bb12188.i, label %bb11831.i
+bb11831.i:		; preds = %bb11823.i
+	br label %bb12188.i
+bb12188.i:		; preds = %bb11831.i, %bb11823.i
+	br label %bb16581.i
+bb12398.i:		; preds = %bb9313.i
+	br i1 false, label %bb12566.i, label %bb12406.i
+bb12406.i:		; preds = %bb12398.i
+	br label %bb12566.i
+bb12566.i:		; preds = %bb12406.i, %bb12398.i
+	br label %bb16581.i
+bb12833.i:		; preds = %bb9313.i
+	br i1 false, label %bb13001.i, label %bb12841.i
+bb12841.i:		; preds = %bb12833.i
+	br label %bb13001.i
+bb13001.i:		; preds = %bb12841.i, %bb12833.i
+	br label %bb16581.i
+bb13268.i:		; preds = %bb9313.i, %bb9313.i
+	br i1 false, label %bb13436.i, label %bb13276.i
+bb13276.i:		; preds = %bb13268.i
+	br label %bb13436.i
+bb13436.i:		; preds = %bb13276.i, %bb13268.i
+	br label %bb16581.i
+bb13703.i:		; preds = %bb9313.i, %bb9313.i
+	br i1 false, label %bb13923.i, label %bb13711.i
+bb13711.i:		; preds = %bb13703.i
+	br label %bb13923.i
+bb13923.i:		; preds = %bb13711.i, %bb13703.i
+	br label %bb16581.i
+bb14278.i:		; preds = %bb9313.i
+	br i1 false, label %bb14498.i, label %bb14286.i
+bb14286.i:		; preds = %bb14278.i
+	br label %bb14498.i
+bb14498.i:		; preds = %bb14286.i, %bb14278.i
+	br label %bb16581.i
+bb14853.i:		; preds = %bb9313.i
+	br i1 false, label %bb15073.i, label %bb14861.i
+bb14861.i:		; preds = %bb14853.i
+	br label %bb15073.i
+bb15073.i:		; preds = %bb14861.i, %bb14853.i
+	br label %bb16581.i
+bb15428.i:		; preds = %bb9313.i
+	br i1 false, label %bb15648.i, label %bb15436.i
+bb15436.i:		; preds = %bb15428.i
+	br label %bb15648.i
+bb15648.i:		; preds = %bb15436.i, %bb15428.i
+	br label %bb16581.i
+bb16003.i:		; preds = %bb9313.i
+	br i1 false, label %bb16223.i, label %bb16011.i
+bb16011.i:		; preds = %bb16003.i
+	br label %bb16223.i
+bb16223.i:		; preds = %bb16011.i, %bb16003.i
+	br label %bb16581.i
+bb16578.i:		; preds = %bb9313.i
+	unreachable
+bb16581.i:		; preds = %bb16223.i, %bb15648.i, %bb15073.i, %bb14498.i, %bb13923.i, %bb13436.i, %bb13001.i, %bb12566.i, %bb12188.i, %bb11618.i, %bb11615.i, %bb11405.i, %bb10685.i, %bb10255.i, %bb9535.i, %bb9310.i, %glgVectorFloatConversion.exit
+	br label %storeVecColor_RGB_UI.exit
+storeVecColor_RGB_UI.exit:		; preds = %bb16581.i
+	br i1 false, label %bb5295.i, label %bb16621.i
+bb16607.i:		; preds = %bb5276.i
+	br i1 false, label %bb5295.preheader.i, label %bb16621.i
+bb5295.preheader.i:		; preds = %bb16607.i
+	br label %bb5295.i
+bb16621.i:		; preds = %bb16607.i, %storeVecColor_RGB_UI.exit
+	br label %bb16650.outer.i
+bb16650.outer.i:		; preds = %bb16621.i
+	br label %bb16650.i
+bb16650.i:		; preds = %storeColor_RGB_UI.exit, %bb16650.outer.i
+	br label %loadColor_BGRA_UI8888R.exit
+loadColor_BGRA_UI8888R.exit:		; preds = %bb16650.i
+	br i1 false, label %bb16671.i, label %bb16697.i
+bb16671.i:		; preds = %loadColor_BGRA_UI8888R.exit
+	br i1 false, label %bb.i179, label %bb662.i
+bb.i179:		; preds = %bb16671.i
+	switch i32 0, label %bb513.i [
+		 i32 7, label %bb418.i
+		 i32 6, label %bb433.i
+	]
+bb418.i:		; preds = %bb.i179
+	br label %bb559.i
+bb433.i:		; preds = %bb.i179
+	switch i32 0, label %bb493.i [
+		 i32 31744, label %bb455.i
+		 i32 0, label %bb471.i
+	]
+bb455.i:		; preds = %bb433.i
+	br i1 false, label %bb463.i, label %bb504.i
+bb463.i:		; preds = %bb455.i
+	br label %bb559.i
+bb471.i:		; preds = %bb433.i
+	br i1 false, label %bb497.i, label %bb484.preheader.i
+bb484.preheader.i:		; preds = %bb471.i
+	br i1 false, label %bb479.i, label %bb490.i
+bb479.i:		; preds = %bb479.i, %bb484.preheader.i
+	br i1 false, label %bb479.i, label %bb490.i
+bb490.i:		; preds = %bb479.i, %bb484.preheader.i
+	br label %bb559.i
+bb493.i:		; preds = %bb433.i
+	br label %bb497.i
+bb497.i:		; preds = %bb493.i, %bb471.i
+	br label %bb504.i
+bb504.i:		; preds = %bb497.i, %bb455.i
+	br label %bb513.i
+bb513.i:		; preds = %bb504.i, %bb.i179
+	br label %bb559.i
+bb559.i:		; preds = %bb513.i, %bb490.i, %bb463.i, %bb418.i
+	br i1 false, label %bb2793.i, label %bb614.i
+bb614.i:		; preds = %bb559.i
+	br i1 false, label %bb626.i, label %bb620.i
+bb620.i:		; preds = %bb614.i
+	br i1 false, label %bb625.i, label %bb626.i
+bb625.i:		; preds = %bb620.i
+	br label %bb626.i
+bb626.i:		; preds = %bb625.i, %bb620.i, %bb614.i
+	br i1 false, label %bb638.i, label %bb632.i
+bb632.i:		; preds = %bb626.i
+	br i1 false, label %bb637.i, label %bb638.i
+bb637.i:		; preds = %bb632.i
+	br label %bb638.i
+bb638.i:		; preds = %bb637.i, %bb632.i, %bb626.i
+	br i1 false, label %bb650.i, label %bb644.i
+bb644.i:		; preds = %bb638.i
+	br i1 false, label %bb649.i, label %bb650.i
+bb649.i:		; preds = %bb644.i
+	br label %bb650.i
+bb650.i:		; preds = %bb649.i, %bb644.i, %bb638.i
+	br i1 false, label %bb2793.i, label %bb656.i
+bb656.i:		; preds = %bb650.i
+	br i1 false, label %bb661.i, label %bb2793.i
+bb661.i:		; preds = %bb656.i
+	switch i32 0, label %bb2883.i [
+		 i32 3, label %bb2874.i
+		 i32 4, label %bb2795.i
+		 i32 8, label %bb2810.i
+		 i32 10, label %bb2834.i
+		 i32 11, label %bb2819.i
+		 i32 16, label %bb2810.i
+	]
+bb662.i:		; preds = %bb16671.i
+	switch i32 0, label %bb1937.i [
+		 i32 3, label %bb902.i
+		 i32 4, label %bb1416.i
+		 i32 8, label %bb1020.i
+		 i32 10, label %bb902.i
+		 i32 11, label %bb784.i
+		 i32 16, label %bb664.i
+	]
+bb664.i:		; preds = %bb662.i
+	br i1 false, label %bb682.i, label %bb669.i
+bb669.i:		; preds = %bb664.i
+	br label %bb710.i
+bb682.i:		; preds = %bb664.i
+	br label %bb710.i
+bb710.i:		; preds = %bb682.i, %bb669.i
+	br i1 false, label %bb760.i, label %bb754.i
+bb754.i:		; preds = %bb710.i
+	br i1 false, label %bb759.i, label %bb760.i
+bb759.i:		; preds = %bb754.i
+	br label %bb760.i
+bb760.i:		; preds = %bb759.i, %bb754.i, %bb710.i
+	br i1 false, label %bb772.i, label %bb766.i
+bb766.i:		; preds = %bb760.i
+	br i1 false, label %bb771.i, label %bb772.i
+bb771.i:		; preds = %bb766.i
+	br label %bb772.i
+bb772.i:		; preds = %bb771.i, %bb766.i, %bb760.i
+	br i1 false, label %bb1937.i, label %bb778.i
+bb778.i:		; preds = %bb772.i
+	br i1 false, label %bb783.i, label %bb1937.i
+bb783.i:		; preds = %bb778.i
+	br label %bb1937.i
+bb784.i:		; preds = %bb662.i
+	switch i32 0, label %bb892.i [
+		 i32 1, label %bb868.i
+		 i32 3, label %bb868.i
+		 i32 4, label %bb882.i
+		 i32 6, label %bb792.i
+		 i32 7, label %bb786.i
+	]
+bb786.i:		; preds = %bb784.i
+	br label %bb904.i
+bb792.i:		; preds = %bb784.i
+	switch i32 0, label %bb852.i [
+		 i32 31744, label %bb814.i
+		 i32 0, label %bb830.i
+	]
+bb814.i:		; preds = %bb792.i
+	br i1 false, label %bb822.i, label %bb863.i
+bb822.i:		; preds = %bb814.i
+	switch i32 0, label %bb1010.i [
+		 i32 1, label %bb986.i
+		 i32 3, label %bb986.i
+		 i32 4, label %bb1000.i
+		 i32 6, label %bb910.i
+		 i32 7, label %bb904.i
+	]
+bb830.i:		; preds = %bb792.i
+	br i1 false, label %bb856.i, label %bb843.preheader.i
+bb843.preheader.i:		; preds = %bb830.i
+	br i1 false, label %bb838.i, label %bb849.i
+bb838.i:		; preds = %bb838.i, %bb843.preheader.i
+	br i1 false, label %bb838.i, label %bb849.i
+bb849.i:		; preds = %bb838.i, %bb843.preheader.i
+	switch i32 0, label %bb1010.i [
+		 i32 1, label %bb986.i
+		 i32 3, label %bb986.i
+		 i32 4, label %bb1000.i
+		 i32 6, label %bb910.i
+		 i32 7, label %bb904.i
+	]
+bb852.i:		; preds = %bb792.i
+	br label %bb856.i
+bb856.i:		; preds = %bb852.i, %bb830.i
+	switch i32 0, label %bb1010.i [
+		 i32 1, label %bb986.i
+		 i32 3, label %bb986.i
+		 i32 4, label %bb1000.i
+		 i32 6, label %bb910.i
+		 i32 7, label %bb904.i
+	]
+bb863.i:		; preds = %bb814.i
+	switch i32 0, label %bb1010.i [
+		 i32 1, label %bb986.i
+		 i32 3, label %bb986.i
+		 i32 4, label %bb1000.i
+		 i32 6, label %bb910.i
+		 i32 7, label %bb904.i
+	]
+bb868.i:		; preds = %bb784.i, %bb784.i
+	switch i32 0, label %bb1010.i [
+		 i32 1, label %bb986.i
+		 i32 3, label %bb986.i
+		 i32 4, label %bb1000.i
+		 i32 6, label %bb910.i
+		 i32 7, label %bb904.i
+	]
+bb882.i:		; preds = %bb784.i
+	br label %bb1000.i
+bb892.i:		; preds = %bb784.i
+	br label %bb902.i
+bb902.i:		; preds = %bb892.i, %bb662.i, %bb662.i
+	switch i32 0, label %bb1010.i [
+		 i32 1, label %bb986.i
+		 i32 3, label %bb986.i
+		 i32 4, label %bb1000.i
+		 i32 6, label %bb910.i
+		 i32 7, label %bb904.i
+	]
+bb904.i:		; preds = %bb902.i, %bb868.i, %bb863.i, %bb856.i, %bb849.i, %bb822.i, %bb786.i
+	br label %bb1937.i
+bb910.i:		; preds = %bb902.i, %bb868.i, %bb863.i, %bb856.i, %bb849.i, %bb822.i
+	switch i32 0, label %bb970.i [
+		 i32 31744, label %bb932.i
+		 i32 0, label %bb948.i
+	]
+bb932.i:		; preds = %bb910.i
+	br i1 false, label %bb940.i, label %bb981.i
+bb940.i:		; preds = %bb932.i
+	br label %bb1937.i
+bb948.i:		; preds = %bb910.i
+	br i1 false, label %bb974.i, label %bb961.preheader.i
+bb961.preheader.i:		; preds = %bb948.i
+	br i1 false, label %bb956.i, label %bb967.i
+bb956.i:		; preds = %bb956.i, %bb961.preheader.i
+	br i1 false, label %bb956.i, label %bb967.i
+bb967.i:		; preds = %bb956.i, %bb961.preheader.i
+	br label %bb1937.i
+bb970.i:		; preds = %bb910.i
+	br label %bb974.i
+bb974.i:		; preds = %bb970.i, %bb948.i
+	br label %bb1937.i
+bb981.i:		; preds = %bb932.i
+	br label %bb1937.i
+bb986.i:		; preds = %bb902.i, %bb902.i, %bb868.i, %bb868.i, %bb863.i, %bb863.i, %bb856.i, %bb856.i, %bb849.i, %bb849.i, %bb822.i, %bb822.i
+	br label %bb1937.i
+bb1000.i:		; preds = %bb902.i, %bb882.i, %bb868.i, %bb863.i, %bb856.i, %bb849.i, %bb822.i
+	br label %bb1937.i
+bb1010.i:		; preds = %bb902.i, %bb868.i, %bb863.i, %bb856.i, %bb849.i, %bb822.i
+	br label %bb1937.i
+bb1020.i:		; preds = %bb662.i
+	switch i32 0, label %bb1388.i [
+		 i32 1, label %bb1264.i
+		 i32 3, label %bb1264.i
+		 i32 4, label %bb1304.i
+		 i32 6, label %bb1038.i
+		 i32 7, label %bb1022.i
+		 i32 8, label %bb1332.i
+		 i32 9, label %bb1332.i
+		 i32 10, label %bb1360.i
+		 i32 11, label %bb1360.i
+	]
+bb1022.i:		; preds = %bb1020.i
+	br label %bb1937.i
+bb1038.i:		; preds = %bb1020.i
+	switch i32 0, label %bb1098.i [
+		 i32 31744, label %bb1060.i
+		 i32 0, label %bb1076.i
+	]
+bb1060.i:		; preds = %bb1038.i
+	br i1 false, label %bb1068.i, label %bb1109.i
+bb1068.i:		; preds = %bb1060.i
+	br label %bb1109.i
+bb1076.i:		; preds = %bb1038.i
+	br i1 false, label %bb1102.i, label %bb1089.preheader.i
+bb1089.preheader.i:		; preds = %bb1076.i
+	br i1 false, label %bb1084.i, label %bb1095.i
+bb1084.i:		; preds = %bb1084.i, %bb1089.preheader.i
+	br i1 false, label %bb1084.i, label %bb1095.i
+bb1095.i:		; preds = %bb1084.i, %bb1089.preheader.i
+	br label %bb1109.i
+bb1098.i:		; preds = %bb1038.i
+	br label %bb1102.i
+bb1102.i:		; preds = %bb1098.i, %bb1076.i
+	br label %bb1109.i
+bb1109.i:		; preds = %bb1102.i, %bb1095.i, %bb1068.i, %bb1060.i
+	switch i32 0, label %bb1173.i [
+		 i32 31744, label %bb1135.i
+		 i32 0, label %bb1151.i
+	]
+bb1135.i:		; preds = %bb1109.i
+	br i1 false, label %bb1143.i, label %bb1184.i
+bb1143.i:		; preds = %bb1135.i
+	br label %bb1184.i
+bb1151.i:		; preds = %bb1109.i
+	br i1 false, label %bb1177.i, label %bb1164.preheader.i
+bb1164.preheader.i:		; preds = %bb1151.i
+	br i1 false, label %bb1159.i, label %bb1170.i
+bb1159.i:		; preds = %bb1159.i, %bb1164.preheader.i
+	br i1 false, label %bb1159.i, label %bb1170.i
+bb1170.i:		; preds = %bb1159.i, %bb1164.preheader.i
+	br label %bb1184.i
+bb1173.i:		; preds = %bb1109.i
+	br label %bb1177.i
+bb1177.i:		; preds = %bb1173.i, %bb1151.i
+	br label %bb1184.i
+bb1184.i:		; preds = %bb1177.i, %bb1170.i, %bb1143.i, %bb1135.i
+	switch i32 0, label %bb1248.i [
+		 i32 31744, label %bb1210.i
+		 i32 0, label %bb1226.i
+	]
+bb1210.i:		; preds = %bb1184.i
+	br i1 false, label %bb1218.i, label %bb1259.i
+bb1218.i:		; preds = %bb1210.i
+	br label %bb1937.i
+bb1226.i:		; preds = %bb1184.i
+	br i1 false, label %bb1252.i, label %bb1239.preheader.i
+bb1239.preheader.i:		; preds = %bb1226.i
+	br i1 false, label %bb1234.i, label %bb1245.i
+bb1234.i:		; preds = %bb1234.i, %bb1239.preheader.i
+	br i1 false, label %bb1234.i, label %bb1245.i
+bb1245.i:		; preds = %bb1234.i, %bb1239.preheader.i
+	br label %bb1937.i
+bb1248.i:		; preds = %bb1184.i
+	br label %bb1252.i
+bb1252.i:		; preds = %bb1248.i, %bb1226.i
+	br label %bb1937.i
+bb1259.i:		; preds = %bb1210.i
+	br label %bb1937.i
+bb1264.i:		; preds = %bb1020.i, %bb1020.i
+	br label %bb1937.i
+bb1304.i:		; preds = %bb1020.i
+	br label %bb1937.i
+bb1332.i:		; preds = %bb1020.i, %bb1020.i
+	br label %bb1937.i
+bb1360.i:		; preds = %bb1020.i, %bb1020.i
+	br label %bb1937.i
+bb1388.i:		; preds = %bb1020.i
+	br label %bb1937.i
+bb1416.i:		; preds = %bb662.i
+	switch i32 0, label %bb1900.i [
+		 i32 1, label %bb1740.i
+		 i32 3, label %bb1740.i
+		 i32 4, label %bb1793.i
+		 i32 6, label %bb1439.i
+		 i32 7, label %bb1418.i
+		 i32 14, label %bb1830.i
+		 i32 15, label %bb1830.i
+		 i32 18, label %bb1863.i
+		 i32 19, label %bb1863.i
+	]
+bb1418.i:		; preds = %bb1416.i
+	br label %bb1937.i
+bb1439.i:		; preds = %bb1416.i
+	switch i32 0, label %bb1499.i [
+		 i32 31744, label %bb1461.i
+		 i32 0, label %bb1477.i
+	]
+bb1461.i:		; preds = %bb1439.i
+	br i1 false, label %bb1469.i, label %bb1510.i
+bb1469.i:		; preds = %bb1461.i
+	br label %bb1510.i
+bb1477.i:		; preds = %bb1439.i
+	br i1 false, label %bb1503.i, label %bb1490.preheader.i
+bb1490.preheader.i:		; preds = %bb1477.i
+	br i1 false, label %bb1485.i, label %bb1496.i
+bb1485.i:		; preds = %bb1485.i, %bb1490.preheader.i
+	br i1 false, label %bb1485.i, label %bb1496.i
+bb1496.i:		; preds = %bb1485.i, %bb1490.preheader.i
+	br label %bb1510.i
+bb1499.i:		; preds = %bb1439.i
+	br label %bb1503.i
+bb1503.i:		; preds = %bb1499.i, %bb1477.i
+	br label %bb1510.i
+bb1510.i:		; preds = %bb1503.i, %bb1496.i, %bb1469.i, %bb1461.i
+	switch i32 0, label %bb1574.i [
+		 i32 31744, label %bb1536.i
+		 i32 0, label %bb1552.i
+	]
+bb1536.i:		; preds = %bb1510.i
+	br i1 false, label %bb1544.i, label %bb1585.i
+bb1544.i:		; preds = %bb1536.i
+	br label %bb1585.i
+bb1552.i:		; preds = %bb1510.i
+	br i1 false, label %bb1578.i, label %bb1565.preheader.i
+bb1565.preheader.i:		; preds = %bb1552.i
+	br i1 false, label %bb1560.i, label %bb1571.i
+bb1560.i:		; preds = %bb1560.i, %bb1565.preheader.i
+	br i1 false, label %bb1560.i, label %bb1571.i
+bb1571.i:		; preds = %bb1560.i, %bb1565.preheader.i
+	br label %bb1585.i
+bb1574.i:		; preds = %bb1510.i
+	br label %bb1578.i
+bb1578.i:		; preds = %bb1574.i, %bb1552.i
+	br label %bb1585.i
+bb1585.i:		; preds = %bb1578.i, %bb1571.i, %bb1544.i, %bb1536.i
+	switch i32 0, label %bb1649.i [
+		 i32 31744, label %bb1611.i
+		 i32 0, label %bb1627.i
+	]
+bb1611.i:		; preds = %bb1585.i
+	br i1 false, label %bb1619.i, label %bb1660.i
+bb1619.i:		; preds = %bb1611.i
+	br label %bb1660.i
+bb1627.i:		; preds = %bb1585.i
+	br i1 false, label %bb1653.i, label %bb1640.preheader.i
+bb1640.preheader.i:		; preds = %bb1627.i
+	br i1 false, label %bb1635.i, label %bb1646.i
+bb1635.i:		; preds = %bb1635.i, %bb1640.preheader.i
+	br i1 false, label %bb1635.i, label %bb1646.i
+bb1646.i:		; preds = %bb1635.i, %bb1640.preheader.i
+	br label %bb1660.i
+bb1649.i:		; preds = %bb1585.i
+	br label %bb1653.i
+bb1653.i:		; preds = %bb1649.i, %bb1627.i
+	br label %bb1660.i
+bb1660.i:		; preds = %bb1653.i, %bb1646.i, %bb1619.i, %bb1611.i
+	switch i32 0, label %bb1724.i [
+		 i32 31744, label %bb1686.i
+		 i32 0, label %bb1702.i
+	]
+bb1686.i:		; preds = %bb1660.i
+	br i1 false, label %bb1694.i, label %bb1735.i
+bb1694.i:		; preds = %bb1686.i
+	br label %bb1937.i
+bb1702.i:		; preds = %bb1660.i
+	br i1 false, label %bb1728.i, label %bb1715.preheader.i
+bb1715.preheader.i:		; preds = %bb1702.i
+	br i1 false, label %bb1710.i, label %bb1721.i
+bb1710.i:		; preds = %bb1710.i, %bb1715.preheader.i
+	br i1 false, label %bb1710.i, label %bb1721.i
+bb1721.i:		; preds = %bb1710.i, %bb1715.preheader.i
+	br label %bb1937.i
+bb1724.i:		; preds = %bb1660.i
+	br label %bb1728.i
+bb1728.i:		; preds = %bb1724.i, %bb1702.i
+	br label %bb1937.i
+bb1735.i:		; preds = %bb1686.i
+	br label %bb1937.i
+bb1740.i:		; preds = %bb1416.i, %bb1416.i
+	br label %bb1937.i
+bb1793.i:		; preds = %bb1416.i
+	br label %bb1937.i
+bb1830.i:		; preds = %bb1416.i, %bb1416.i
+	br label %bb1937.i
+bb1863.i:		; preds = %bb1416.i, %bb1416.i
+	br label %bb1937.i
+bb1900.i:		; preds = %bb1416.i
+	br label %bb1937.i
+bb1937.i:		; preds = %bb1900.i, %bb1863.i, %bb1830.i, %bb1793.i, %bb1740.i, %bb1735.i, %bb1728.i, %bb1721.i, %bb1694.i, %bb1418.i, %bb1388.i, %bb1360.i, %bb1332.i, %bb1304.i, %bb1264.i, %bb1259.i, %bb1252.i, %bb1245.i, %bb1218.i, %bb1022.i, %bb1010.i, %bb1000.i, %bb986.i, %bb981.i, %bb974.i, %bb967.i, %bb940.i, %bb904.i, %bb783.i, %bb778.i, %bb772.i, %bb662.i
+	switch i32 %sf4083.0.i, label %bb2321.i [
+		 i32 0, label %bb2027.i
+		 i32 1, label %bb2081.i
+		 i32 2, label %bb2161.i
+		 i32 3, label %bb2241.i
+		 i32 8, label %bb1939.i
+		 i32 9, label %bb1939.i
+		 i32 10, label %bb1957.i
+		 i32 11, label %bb1975.i
+		 i32 16, label %bb1939.i
+	]
+bb1939.i:		; preds = %bb1937.i, %bb1937.i, %bb1937.i
+	switch i32 0, label %bb2321.i [
+		 i32 3, label %bb1956.i
+		 i32 4, label %bb1956.i
+		 i32 11, label %bb1956.i
+	]
+bb1956.i:		; preds = %bb1939.i, %bb1939.i, %bb1939.i
+	br label %bb2337.i
+bb1957.i:		; preds = %bb1937.i
+	switch i32 0, label %bb1975.i [
+		 i32 3, label %bb1974.i
+		 i32 4, label %bb1974.i
+		 i32 11, label %bb1974.i
+	]
+bb1974.i:		; preds = %bb1957.i, %bb1957.i, %bb1957.i
+	br label %bb1975.i
+bb1975.i:		; preds = %bb1974.i, %bb1957.i, %bb1937.i
+	switch i32 0, label %bb2001.i [
+		 i32 1, label %bb1992.i
+		 i32 4, label %bb1992.i
+		 i32 8, label %bb1992.i
+	]
+bb1992.i:		; preds = %bb1975.i, %bb1975.i, %bb1975.i
+	br label %bb2001.i
+bb2001.i:		; preds = %bb1992.i, %bb1975.i
+	switch i32 0, label %bb2321.i [
+		 i32 2, label %bb2018.i
+		 i32 4, label %bb2018.i
+		 i32 8, label %bb2018.i
+	]
+bb2018.i:		; preds = %bb2001.i, %bb2001.i, %bb2001.i
+	br label %bb2321.i
+bb2027.i:		; preds = %bb1937.i
+	switch i32 0, label %bb2045.i [
+		 i32 1, label %bb2044.i
+		 i32 4, label %bb2044.i
+		 i32 8, label %bb2044.i
+	]
+bb2044.i:		; preds = %bb2027.i, %bb2027.i, %bb2027.i
+	br label %bb2045.i
+bb2045.i:		; preds = %bb2044.i, %bb2027.i
+	switch i32 0, label %bb2063.i [
+		 i32 2, label %bb2062.i
+		 i32 4, label %bb2062.i
+		 i32 8, label %bb2062.i
+	]
+bb2062.i:		; preds = %bb2045.i, %bb2045.i, %bb2045.i
+	br label %bb2063.i
+bb2063.i:		; preds = %bb2062.i, %bb2045.i
+	switch i32 0, label %bb2321.i [
+		 i32 3, label %bb2080.i
+		 i32 4, label %bb2080.i
+		 i32 11, label %bb2080.i
+	]
+bb2080.i:		; preds = %bb2063.i, %bb2063.i, %bb2063.i
+	br label %bb2321.i
+bb2081.i:		; preds = %bb1937.i
+	switch i32 0, label %bb2100.i [
+		 i32 1, label %bb2098.i
+		 i32 4, label %bb2098.i
+		 i32 8, label %bb2098.i
+	]
+bb2098.i:		; preds = %bb2081.i, %bb2081.i, %bb2081.i
+	br label %bb2100.i
+bb2100.i:		; preds = %bb2098.i, %bb2081.i
+	switch i32 0, label %bb2125.i [
+		 i32 4, label %bb2124.i
+		 i32 8, label %bb2124.i
+		 i32 0, label %bb2124.i
+		 i32 11, label %bb2124.i
+	]
+bb2124.i:		; preds = %bb2100.i, %bb2100.i, %bb2100.i, %bb2100.i
+	br label %bb2125.i
+bb2125.i:		; preds = %bb2124.i, %bb2100.i
+	switch i32 0, label %bb2143.i [
+		 i32 2, label %bb2142.i
+		 i32 4, label %bb2142.i
+		 i32 8, label %bb2142.i
+	]
+bb2142.i:		; preds = %bb2125.i, %bb2125.i, %bb2125.i
+	br label %bb2143.i
+bb2143.i:		; preds = %bb2142.i, %bb2125.i
+	switch i32 0, label %bb2321.i [
+		 i32 3, label %bb2160.i
+		 i32 4, label %bb2160.i
+		 i32 11, label %bb2160.i
+	]
+bb2160.i:		; preds = %bb2143.i, %bb2143.i, %bb2143.i
+	br label %bb2321.i
+bb2161.i:		; preds = %bb1937.i
+	switch i32 0, label %bb2180.i [
+		 i32 2, label %bb2178.i
+		 i32 4, label %bb2178.i
+		 i32 8, label %bb2178.i
+	]
+bb2178.i:		; preds = %bb2161.i, %bb2161.i, %bb2161.i
+	br label %bb2180.i
+bb2180.i:		; preds = %bb2178.i, %bb2161.i
+	switch i32 0, label %bb2205.i [
+		 i32 4, label %bb2204.i
+		 i32 8, label %bb2204.i
+		 i32 0, label %bb2204.i
+		 i32 11, label %bb2204.i
+	]
+bb2204.i:		; preds = %bb2180.i, %bb2180.i, %bb2180.i, %bb2180.i
+	br label %bb2205.i
+bb2205.i:		; preds = %bb2204.i, %bb2180.i
+	switch i32 0, label %bb2223.i [
+		 i32 1, label %bb2222.i
+		 i32 4, label %bb2222.i
+		 i32 8, label %bb2222.i
+	]
+bb2222.i:		; preds = %bb2205.i, %bb2205.i, %bb2205.i
+	br label %bb2223.i
+bb2223.i:		; preds = %bb2222.i, %bb2205.i
+	switch i32 0, label %bb2321.i [
+		 i32 3, label %bb2240.i
+		 i32 4, label %bb2240.i
+		 i32 11, label %bb2240.i
+	]
+bb2240.i:		; preds = %bb2223.i, %bb2223.i, %bb2223.i
+	br label %bb2321.i
+bb2241.i:		; preds = %bb1937.i
+	switch i32 0, label %bb2260.i [
+		 i32 3, label %bb2258.i
+		 i32 4, label %bb2258.i
+		 i32 11, label %bb2258.i
+	]
+bb2258.i:		; preds = %bb2241.i, %bb2241.i, %bb2241.i
+	br label %bb2260.i
+bb2260.i:		; preds = %bb2258.i, %bb2241.i
+	switch i32 0, label %bb2285.i [
+		 i32 4, label %bb2284.i
+		 i32 11, label %bb2284.i
+		 i32 0, label %bb2284.i
+		 i32 8, label %bb2284.i
+	]
+bb2284.i:		; preds = %bb2260.i, %bb2260.i, %bb2260.i, %bb2260.i
+	br label %bb2285.i
+bb2285.i:		; preds = %bb2284.i, %bb2260.i
+	switch i32 0, label %bb2303.i [
+		 i32 1, label %bb2302.i
+		 i32 4, label %bb2302.i
+		 i32 8, label %bb2302.i
+	]
+bb2302.i:		; preds = %bb2285.i, %bb2285.i, %bb2285.i
+	br label %bb2303.i
+bb2303.i:		; preds = %bb2302.i, %bb2285.i
+	switch i32 0, label %bb2321.i [
+		 i32 2, label %bb2320.i
+		 i32 4, label %bb2320.i
+		 i32 8, label %bb2320.i
+	]
+bb2320.i:		; preds = %bb2303.i, %bb2303.i, %bb2303.i
+	br label %bb2321.i
+bb2321.i:		; preds = %bb2320.i, %bb2303.i, %bb2240.i, %bb2223.i, %bb2160.i, %bb2143.i, %bb2080.i, %bb2063.i, %bb2018.i, %bb2001.i, %bb1939.i, %bb1937.i
+	br label %bb2337.i
+bb2337.i:		; preds = %bb2321.i, %bb1956.i
+	br label %bb2353.i
+bb2353.i:		; preds = %bb2337.i
+	br label %bb2369.i
+bb2369.i:		; preds = %bb2353.i
+	br label %bb2385.i
+bb2385.i:		; preds = %bb2369.i
+	br i1 false, label %bb2388.i, label %bb2394.i
+bb2388.i:		; preds = %bb2385.i
+	br label %bb2600.i
+bb2394.i:		; preds = %bb2385.i
+	switch i32 0, label %bb2600.i [
+		 i32 0, label %bb2504.i
+		 i32 1, label %bb2528.i
+		 i32 2, label %bb2552.i
+		 i32 3, label %bb2576.i
+		 i32 4, label %bb2396.i
+		 i32 8, label %bb2420.i
+		 i32 11, label %bb2480.i
+	]
+bb2396.i:		; preds = %bb2394.i
+	br i1 false, label %bb2411.i, label %bb2399.i
+bb2399.i:		; preds = %bb2396.i
+	br i1 false, label %bb2420.i, label %bb2405.i
+bb2405.i:		; preds = %bb2399.i
+	br i1 false, label %bb2410.i, label %bb2420.i
+bb2410.i:		; preds = %bb2405.i
+	br i1 false, label %bb2459.i, label %bb2423.i
+bb2411.i:		; preds = %bb2396.i
+	br i1 false, label %bb2420.i, label %bb2414.i
+bb2414.i:		; preds = %bb2411.i
+	br i1 false, label %bb2419.i, label %bb2420.i
+bb2419.i:		; preds = %bb2414.i
+	br label %bb2420.i
+bb2420.i:		; preds = %bb2419.i, %bb2414.i, %bb2411.i, %bb2405.i, %bb2399.i, %bb2394.i
+	br i1 false, label %bb2459.i, label %bb2423.i
+bb2423.i:		; preds = %bb2420.i, %bb2410.i
+	br i1 false, label %bb2435.i, label %bb2429.i
+bb2429.i:		; preds = %bb2423.i
+	br i1 false, label %bb2434.i, label %bb2435.i
+bb2434.i:		; preds = %bb2429.i
+	br label %bb2435.i
+bb2435.i:		; preds = %bb2434.i, %bb2429.i, %bb2423.i
+	br i1 false, label %bb2447.i, label %bb2441.i
+bb2441.i:		; preds = %bb2435.i
+	br i1 false, label %bb2446.i, label %bb2447.i
+bb2446.i:		; preds = %bb2441.i
+	br label %bb2447.i
+bb2447.i:		; preds = %bb2446.i, %bb2441.i, %bb2435.i
+	br i1 false, label %bb2600.i, label %bb2453.i
+bb2453.i:		; preds = %bb2447.i
+	br i1 false, label %bb2458.i, label %bb2600.i
+bb2458.i:		; preds = %bb2453.i
+	br label %bb2793.i
+bb2459.i:		; preds = %bb2420.i, %bb2410.i
+	br i1 false, label %bb2600.i, label %bb2462.i
+bb2462.i:		; preds = %bb2459.i
+	br i1 false, label %bb2479.i, label %bb2600.i
+bb2479.i:		; preds = %bb2462.i
+	br label %bb2600.i
+bb2480.i:		; preds = %bb2394.i
+	br i1 false, label %bb2495.i, label %bb2483.i
+bb2483.i:		; preds = %bb2480.i
+	br i1 false, label %bb2504.i, label %bb2489.i
+bb2489.i:		; preds = %bb2483.i
+	br i1 false, label %bb2494.i, label %bb2504.i
+bb2494.i:		; preds = %bb2489.i
+	br i1 false, label %bb2519.i, label %bb2507.i
+bb2495.i:		; preds = %bb2480.i
+	br i1 false, label %bb2504.i, label %bb2498.i
+bb2498.i:		; preds = %bb2495.i
+	br i1 false, label %bb2503.i, label %bb2504.i
+bb2503.i:		; preds = %bb2498.i
+	br label %bb2504.i
+bb2504.i:		; preds = %bb2503.i, %bb2498.i, %bb2495.i, %bb2489.i, %bb2483.i, %bb2394.i
+	br i1 false, label %bb2519.i, label %bb2507.i
+bb2507.i:		; preds = %bb2504.i, %bb2494.i
+	br i1 false, label %bb2600.i, label %bb2513.i
+bb2513.i:		; preds = %bb2507.i
+	br i1 false, label %bb2518.i, label %bb2600.i
+bb2518.i:		; preds = %bb2513.i
+	br label %bb2600.i
+bb2519.i:		; preds = %bb2504.i, %bb2494.i
+	br i1 false, label %bb2600.i, label %bb2522.i
+bb2522.i:		; preds = %bb2519.i
+	br i1 false, label %bb2527.i, label %bb2600.i
+bb2527.i:		; preds = %bb2522.i
+	br label %bb2600.i
+bb2528.i:		; preds = %bb2394.i
+	br i1 false, label %bb2543.i, label %bb2531.i
+bb2531.i:		; preds = %bb2528.i
+	br i1 false, label %bb2600.i, label %bb2537.i
+bb2537.i:		; preds = %bb2531.i
+	br i1 false, label %bb2542.i, label %bb2600.i
+bb2542.i:		; preds = %bb2537.i
+	br label %bb2600.i
+bb2543.i:		; preds = %bb2528.i
+	br i1 false, label %bb2600.i, label %bb2546.i
+bb2546.i:		; preds = %bb2543.i
+	br i1 false, label %bb2551.i, label %bb2600.i
+bb2551.i:		; preds = %bb2546.i
+	br label %bb2600.i
+bb2552.i:		; preds = %bb2394.i
+	br i1 false, label %bb2567.i, label %bb2555.i
+bb2555.i:		; preds = %bb2552.i
+	br i1 false, label %bb2600.i, label %bb2561.i
+bb2561.i:		; preds = %bb2555.i
+	br i1 false, label %bb2566.i, label %bb2600.i
+bb2566.i:		; preds = %bb2561.i
+	br label %bb2600.i
+bb2567.i:		; preds = %bb2552.i
+	br i1 false, label %bb2600.i, label %bb2570.i
+bb2570.i:		; preds = %bb2567.i
+	br i1 false, label %bb2575.i, label %bb2600.i
+bb2575.i:		; preds = %bb2570.i
+	br label %bb2600.i
+bb2576.i:		; preds = %bb2394.i
+	br i1 false, label %bb2591.i, label %bb2579.i
+bb2579.i:		; preds = %bb2576.i
+	br i1 false, label %bb2600.i, label %bb2585.i
+bb2585.i:		; preds = %bb2579.i
+	br i1 false, label %bb2590.i, label %bb2600.i
+bb2590.i:		; preds = %bb2585.i
+	br label %bb2600.i
+bb2591.i:		; preds = %bb2576.i
+	br i1 false, label %bb2600.i, label %bb2594.i
+bb2594.i:		; preds = %bb2591.i
+	br i1 false, label %bb2599.i, label %bb2600.i
+bb2599.i:		; preds = %bb2594.i
+	br label %bb2600.i
+bb2600.i:		; preds = %bb2599.i, %bb2594.i, %bb2591.i, %bb2590.i, %bb2585.i, %bb2579.i, %bb2575.i, %bb2570.i, %bb2567.i, %bb2566.i, %bb2561.i, %bb2555.i, %bb2551.i, %bb2546.i, %bb2543.i, %bb2542.i, %bb2537.i, %bb2531.i, %bb2527.i, %bb2522.i, %bb2519.i, %bb2518.i, %bb2513.i, %bb2507.i, %bb2479.i, %bb2462.i, %bb2459.i, %bb2453.i, %bb2447.i, %bb2394.i, %bb2388.i
+	br label %bb2793.i
+bb2793.i:		; preds = %bb2600.i, %bb2458.i, %bb656.i, %bb650.i, %bb559.i
+	switch i32 0, label %bb2883.i [
+		 i32 3, label %bb2874.i
+		 i32 4, label %bb2795.i
+		 i32 8, label %bb2810.i
+		 i32 10, label %bb2834.i
+		 i32 11, label %bb2819.i
+		 i32 16, label %bb2810.i
+	]
+bb2795.i:		; preds = %bb2793.i, %bb661.i
+	br label %bb2810.i
+bb2810.i:		; preds = %bb2795.i, %bb2793.i, %bb2793.i, %bb661.i, %bb661.i
+	br label %bb2883.i
+bb2819.i:		; preds = %bb2793.i, %bb661.i
+	br label %bb2834.i
+bb2834.i:		; preds = %bb2819.i, %bb2793.i, %bb661.i
+	switch i32 0, label %bb2860.i [
+		 i32 4, label %bb2846.i
+		 i32 8, label %bb2846.i
+	]
+bb2846.i:		; preds = %bb2834.i, %bb2834.i
+	br i1 false, label %bb2859.i, label %bb2860.i
+bb2859.i:		; preds = %bb2846.i
+	br label %bb2860.i
+bb2860.i:		; preds = %bb2859.i, %bb2846.i, %bb2834.i
+	switch i32 %df4081.0.i, label %bb2867.bb2883_crit_edge.i [
+		 i32 1, label %bb2883.i
+		 i32 2, label %bb2872.i
+	]
+bb2867.bb2883_crit_edge.i:		; preds = %bb2860.i
+	br label %bb2883.i
+bb2872.i:		; preds = %bb2860.i
+	switch i32 0, label %UnifiedReturnBlock.i235 [
+		 i32 3, label %bb3253.i
+		 i32 4, label %bb4173.i
+		 i32 8, label %bb3485.i
+		 i32 10, label %bb3253.i
+		 i32 11, label %bb3021.i
+		 i32 16, label %bb2885.i
+	]
+bb2874.i:		; preds = %bb2793.i, %bb661.i
+	br label %bb2883.i
+bb2883.i:		; preds = %bb2874.i, %bb2867.bb2883_crit_edge.i, %bb2860.i, %bb2810.i, %bb2793.i, %bb661.i
+	%f_alpha.1.i = phi i32 [ 0, %bb2867.bb2883_crit_edge.i ], [ 0, %bb2874.i ], [ 1065353216, %bb661.i ], [ 0, %bb2793.i ], [ 0, %bb2810.i ], [ 0, %bb2860.i ]		; <i32> [#uses=1]
+	switch i32 0, label %UnifiedReturnBlock.i235 [
+		 i32 3, label %bb3253.i
+		 i32 4, label %bb4173.i
+		 i32 8, label %bb3485.i
+		 i32 10, label %bb3253.i
+		 i32 11, label %bb3021.i
+		 i32 16, label %bb2885.i
+	]
+bb2885.i:		; preds = %bb2883.i, %bb2872.i
+	br i1 false, label %bb3011.i, label %bb2890.i
+bb2890.i:		; preds = %bb2885.i
+	br i1 false, label %bb2960.i, label %bb2954.i
+bb2954.i:		; preds = %bb2890.i
+	br i1 false, label %bb2959.i, label %bb2960.i
+bb2959.i:		; preds = %bb2954.i
+	br label %bb2960.i
+bb2960.i:		; preds = %bb2959.i, %bb2954.i, %bb2890.i
+	br i1 false, label %bb2972.i, label %bb2966.i
+bb2966.i:		; preds = %bb2960.i
+	br i1 false, label %bb2971.i, label %bb2972.i
+bb2971.i:		; preds = %bb2966.i
+	br label %bb2972.i
+bb2972.i:		; preds = %bb2971.i, %bb2966.i, %bb2960.i
+	br label %glgScalarFloatConversion.exit
+bb3011.i:		; preds = %bb2885.i
+	br label %glgScalarFloatConversion.exit
+bb3021.i:		; preds = %bb2883.i, %bb2872.i
+	switch i32 %dt4080.0.i, label %bb3192.i [
+		 i32 7, label %bb3026.i
+		 i32 6, label %bb3037.i
+		 i32 1, label %bb3125.i
+		 i32 3, label %bb3125.i
+		 i32 5, label %bb3144.i
+	]
+bb3026.i:		; preds = %bb3021.i
+	br label %bb3258.i
+bb3037.i:		; preds = %bb3021.i
+	br i1 false, label %bb3052.i, label %bb3074.i
+bb3052.i:		; preds = %bb3037.i
+	br i1 false, label %bb3105.i, label %bb3069.i
+bb3069.i:		; preds = %bb3052.i
+	switch i32 %dt4080.0.i, label %bb3424.i [
+		 i32 7, label %bb3258.i
+		 i32 6, label %bb3269.i
+		 i32 1, label %bb3357.i
+		 i32 3, label %bb3357.i
+		 i32 5, label %bb3376.i
+	]
+bb3074.i:		; preds = %bb3037.i
+	br i1 false, label %bb3079.i, label %bb3092.i
+bb3079.i:		; preds = %bb3074.i
+	switch i32 %dt4080.0.i, label %bb3424.i [
+		 i32 7, label %bb3258.i
+		 i32 6, label %bb3269.i
+		 i32 1, label %bb3357.i
+		 i32 3, label %bb3357.i
+		 i32 5, label %bb3376.i
+	]
+bb3092.i:		; preds = %bb3074.i
+	switch i32 %dt4080.0.i, label %bb3424.i [
+		 i32 7, label %bb3258.i
+		 i32 6, label %bb3269.i
+		 i32 1, label %bb3357.i
+		 i32 3, label %bb3357.i
+		 i32 5, label %bb3376.i
+	]
+bb3105.i:		; preds = %bb3052.i
+	switch i32 %dt4080.0.i, label %bb3424.i [
+		 i32 7, label %bb3258.i
+		 i32 6, label %bb3269.i
+		 i32 1, label %bb3357.i
+		 i32 3, label %bb3357.i
+		 i32 5, label %bb3376.i
+	]
+bb3125.i:		; preds = %bb3021.i, %bb3021.i
+	switch i32 %dt4080.0.i, label %bb3424.i [
+		 i32 7, label %bb3258.i
+		 i32 6, label %bb3269.i
+		 i32 1, label %bb3357.i
+		 i32 3, label %bb3357.i
+		 i32 5, label %bb3376.i
+	]
+bb3144.i:		; preds = %bb3021.i
+	br label %bb3376.i
+bb3192.i:		; preds = %bb3021.i
+	br i1 false, label %bb3197.i, label %bb3243.i
+bb3197.i:		; preds = %bb3192.i
+	br label %bb3424.i
+bb3243.i:		; preds = %bb3192.i
+	br label %bb3253.i
+bb3253.i:		; preds = %bb3243.i, %bb2883.i, %bb2883.i, %bb2872.i, %bb2872.i
+	switch i32 %dt4080.0.i, label %bb3424.i [
+		 i32 7, label %bb3258.i
+		 i32 6, label %bb3269.i
+		 i32 1, label %bb3357.i
+		 i32 3, label %bb3357.i
+		 i32 5, label %bb3376.i
+	]
+bb3258.i:		; preds = %bb3253.i, %bb3125.i, %bb3105.i, %bb3092.i, %bb3079.i, %bb3069.i, %bb3026.i
+	br label %glgScalarFloatConversion.exit
+bb3269.i:		; preds = %bb3253.i, %bb3125.i, %bb3105.i, %bb3092.i, %bb3079.i, %bb3069.i
+	br i1 false, label %bb3284.i, label %bb3306.i
+bb3284.i:		; preds = %bb3269.i
+	br i1 false, label %bb3337.i, label %bb3301.i
+bb3301.i:		; preds = %bb3284.i
+	br label %glgScalarFloatConversion.exit
+bb3306.i:		; preds = %bb3269.i
+	br i1 false, label %bb3311.i, label %bb3324.i
+bb3311.i:		; preds = %bb3306.i
+	br label %glgScalarFloatConversion.exit
+bb3324.i:		; preds = %bb3306.i
+	br label %glgScalarFloatConversion.exit
+bb3337.i:		; preds = %bb3284.i
+	br label %glgScalarFloatConversion.exit
+bb3357.i:		; preds = %bb3253.i, %bb3253.i, %bb3125.i, %bb3125.i, %bb3105.i, %bb3105.i, %bb3092.i, %bb3092.i, %bb3079.i, %bb3079.i, %bb3069.i, %bb3069.i
+	br label %glgScalarFloatConversion.exit
+bb3376.i:		; preds = %bb3253.i, %bb3144.i, %bb3125.i, %bb3105.i, %bb3092.i, %bb3079.i, %bb3069.i
+	br label %glgScalarFloatConversion.exit
+bb3424.i:		; preds = %bb3253.i, %bb3197.i, %bb3125.i, %bb3105.i, %bb3092.i, %bb3079.i, %bb3069.i
+	br i1 false, label %bb3429.i, label %bb3475.i
+bb3429.i:		; preds = %bb3424.i
+	br label %glgScalarFloatConversion.exit
+bb3475.i:		; preds = %bb3424.i
+	br label %glgScalarFloatConversion.exit
+bb3485.i:		; preds = %bb2883.i, %bb2872.i
+	switch i32 %dt4080.0.i, label %bb4077.i [
+		 i32 7, label %bb3490.i
+		 i32 6, label %bb3511.i
+		 i32 1, label %bb3749.i
+		 i32 3, label %bb3749.i
+		 i32 5, label %bb3794.i
+		 i32 4, label %bb3941.i
+	]
+bb3490.i:		; preds = %bb3485.i
+	br label %glgScalarFloatConversion.exit
+bb3511.i:		; preds = %bb3485.i
+	br i1 false, label %bb3526.i, label %bb3548.i
+bb3526.i:		; preds = %bb3511.i
+	br i1 false, label %bb3579.i, label %bb3543.i
+bb3543.i:		; preds = %bb3526.i
+	br label %bb3579.i
+bb3548.i:		; preds = %bb3511.i
+	br i1 false, label %bb3553.i, label %bb3566.i
+bb3553.i:		; preds = %bb3548.i
+	br label %bb3579.i
+bb3566.i:		; preds = %bb3548.i
+	br label %bb3579.i
+bb3579.i:		; preds = %bb3566.i, %bb3553.i, %bb3543.i, %bb3526.i
+	br i1 false, label %bb3601.i, label %bb3623.i
+bb3601.i:		; preds = %bb3579.i
+	br i1 false, label %bb3654.i, label %bb3618.i
+bb3618.i:		; preds = %bb3601.i
+	br label %bb3654.i
+bb3623.i:		; preds = %bb3579.i
+	br i1 false, label %bb3628.i, label %bb3641.i
+bb3628.i:		; preds = %bb3623.i
+	br label %bb3654.i
+bb3641.i:		; preds = %bb3623.i
+	br label %bb3654.i
+bb3654.i:		; preds = %bb3641.i, %bb3628.i, %bb3618.i, %bb3601.i
+	br i1 false, label %bb3676.i, label %bb3698.i
+bb3676.i:		; preds = %bb3654.i
+	br i1 false, label %bb3729.i, label %bb3693.i
+bb3693.i:		; preds = %bb3676.i
+	br label %glgScalarFloatConversion.exit
+bb3698.i:		; preds = %bb3654.i
+	br i1 false, label %bb3703.i, label %bb3716.i
+bb3703.i:		; preds = %bb3698.i
+	br label %glgScalarFloatConversion.exit
+bb3716.i:		; preds = %bb3698.i
+	br label %glgScalarFloatConversion.exit
+bb3729.i:		; preds = %bb3676.i
+	br label %glgScalarFloatConversion.exit
+bb3749.i:		; preds = %bb3485.i, %bb3485.i
+	br label %glgScalarFloatConversion.exit
+bb3794.i:		; preds = %bb3485.i
+	br label %glgScalarFloatConversion.exit
+bb3941.i:		; preds = %bb3485.i
+	br label %glgScalarFloatConversion.exit
+bb4077.i:		; preds = %bb3485.i
+	br i1 false, label %bb4083.i, label %bb4111.i
+bb4083.i:		; preds = %bb4077.i
+	br label %glgScalarFloatConversion.exit
+bb4111.i:		; preds = %bb4077.i
+	br i1 false, label %bb4117.i, label %bb4145.i
+bb4117.i:		; preds = %bb4111.i
+	br label %glgScalarFloatConversion.exit
+bb4145.i:		; preds = %bb4111.i
+	br label %glgScalarFloatConversion.exit
+bb4173.i:		; preds = %bb2883.i, %bb2872.i
+	%f_red.0.reg2mem.4.i = phi i32 [ 0, %bb2872.i ], [ 0, %bb2883.i ]		; <i32> [#uses=2]
+	%f_green.0.reg2mem.2.i = phi i32 [ 0, %bb2872.i ], [ 0, %bb2883.i ]		; <i32> [#uses=1]
+	%f_blue.0.reg2mem.2.i = phi i32 [ 0, %bb2872.i ], [ 0, %bb2883.i ]		; <i32> [#uses=1]
+	%f_alpha.1.reg2mem.1.i = phi i32 [ 0, %bb2872.i ], [ %f_alpha.1.i, %bb2883.i ]		; <i32> [#uses=1]
+	switch i32 %dt4080.0.i, label %bb4950.i [
+		 i32 7, label %bb4178.i
+		 i32 6, label %bb4204.i
+		 i32 1, label %bb4517.i202
+		 i32 3, label %bb4517.i202
+		 i32 5, label %bb4575.i
+		 i32 4, label %bb4769.i
+	]
+bb4178.i:		; preds = %bb4173.i
+	br label %glgScalarFloatConversion.exit
+bb4204.i:		; preds = %bb4173.i
+	%tmp4210.i = and i32 0, 32768		; <i32> [#uses=4]
+	%tmp4212.i = and i32 %f_red.0.reg2mem.4.i, 2139095040		; <i32> [#uses=1]
+	%tmp4214.i = and i32 %f_red.0.reg2mem.4.i, 8388607		; <i32> [#uses=1]
+	br i1 false, label %bb4219.i, label %bb4241.i
+bb4219.i:		; preds = %bb4204.i
+	br i1 false, label %bb4272.i, label %bb4236.i
+bb4236.i:		; preds = %bb4219.i
+	br label %bb4272.i
+bb4241.i:		; preds = %bb4204.i
+	br i1 false, label %bb4246.i, label %bb4259.i
+bb4246.i:		; preds = %bb4241.i
+	%tmp4253.i = lshr i32 %tmp4214.i, 0		; <i32> [#uses=1]
+	%tmp4253.masked.i = and i32 %tmp4253.i, 65535		; <i32> [#uses=1]
+	br label %bb4272.i
+bb4259.i:		; preds = %bb4241.i
+	%tmp4261.i187 = add i32 %tmp4212.i, 134217728		; <i32> [#uses=1]
+	%tmp4262.i188 = lshr i32 %tmp4261.i187, 13		; <i32> [#uses=1]
+	%tmp4262.masked.i = and i32 %tmp4262.i188, 64512		; <i32> [#uses=1]
+	%tmp42665693.masked.i = or i32 %tmp4262.masked.i, %tmp4210.i		; <i32> [#uses=1]
+	br label %bb4272.i
+bb4272.i:		; preds = %bb4259.i, %bb4246.i, %bb4236.i, %bb4219.i
+	%tmp42665693.masked.pn.i = phi i32 [ %tmp42665693.masked.i, %bb4259.i ], [ %tmp4253.masked.i, %bb4246.i ], [ %tmp4210.i, %bb4236.i ], [ %tmp4210.i, %bb4219.i ]		; <i32> [#uses=1]
+	%tmp4268.pn.i = phi i32 [ 0, %bb4259.i ], [ %tmp4210.i, %bb4246.i ], [ 31744, %bb4236.i ], [ 32767, %bb4219.i ]		; <i32> [#uses=1]
+	%tmp100.0.i = or i32 %tmp4268.pn.i, %tmp42665693.masked.pn.i		; <i32> [#uses=0]
+	%tmp4289.i = and i32 %f_green.0.reg2mem.2.i, 8388607		; <i32> [#uses=1]
+	br i1 false, label %bb4294.i, label %bb4316.i
+bb4294.i:		; preds = %bb4272.i
+	br i1 false, label %bb4347.i, label %bb4311.i
+bb4311.i:		; preds = %bb4294.i
+	br label %bb4347.i
+bb4316.i:		; preds = %bb4272.i
+	br i1 false, label %bb4321.i, label %bb4334.i
+bb4321.i:		; preds = %bb4316.i
+	br label %bb4347.i
+bb4334.i:		; preds = %bb4316.i
+	%tmp4343.i = lshr i32 %tmp4289.i, 13		; <i32> [#uses=0]
+	br label %bb4347.i
+bb4347.i:		; preds = %bb4334.i, %bb4321.i, %bb4311.i, %bb4294.i
+	%tmp4364.i190 = and i32 %f_blue.0.reg2mem.2.i, 8388607		; <i32> [#uses=1]
+	br i1 false, label %bb4369.i192, label %bb4391.i
+bb4369.i192:		; preds = %bb4347.i
+	br i1 false, label %bb4422.i, label %bb4386.i
+bb4386.i:		; preds = %bb4369.i192
+	br label %bb4422.i
+bb4391.i:		; preds = %bb4347.i
+	br i1 false, label %bb4396.i, label %bb4409.i
+bb4396.i:		; preds = %bb4391.i
+	br label %bb4422.i
+bb4409.i:		; preds = %bb4391.i
+	%tmp4418.i = lshr i32 %tmp4364.i190, 13		; <i32> [#uses=0]
+	br label %bb4422.i
+bb4422.i:		; preds = %bb4409.i, %bb4396.i, %bb4386.i, %bb4369.i192
+	%tmp4439.i194 = and i32 %f_alpha.1.reg2mem.1.i, 8388607		; <i32> [#uses=1]
+	br i1 false, label %bb4444.i, label %bb4466.i
+bb4444.i:		; preds = %bb4422.i
+	br i1 false, label %bb4497.i, label %bb4461.i
+bb4461.i:		; preds = %bb4444.i
+	br label %glgScalarFloatConversion.exit
+bb4466.i:		; preds = %bb4422.i
+	br i1 false, label %bb4471.i, label %bb4484.i
+bb4471.i:		; preds = %bb4466.i
+	br label %glgScalarFloatConversion.exit
+bb4484.i:		; preds = %bb4466.i
+	%tmp4493.i = lshr i32 %tmp4439.i194, 13		; <i32> [#uses=0]
+	br label %glgScalarFloatConversion.exit
+bb4497.i:		; preds = %bb4444.i
+	br label %glgScalarFloatConversion.exit
+bb4517.i202:		; preds = %bb4173.i, %bb4173.i
+	br label %glgScalarFloatConversion.exit
+bb4575.i:		; preds = %bb4173.i
+	br label %glgScalarFloatConversion.exit
+bb4769.i:		; preds = %bb4173.i
+	br label %glgScalarFloatConversion.exit
+bb4950.i:		; preds = %bb4173.i
+	br i1 false, label %bb4956.i, label %bb4993.i
+bb4956.i:		; preds = %bb4950.i
+	br label %glgScalarFloatConversion.exit
+bb4993.i:		; preds = %bb4950.i
+	br i1 false, label %bb4999.i, label %bb5036.i
+bb4999.i:		; preds = %bb4993.i
+	br label %glgScalarFloatConversion.exit
+bb5036.i:		; preds = %bb4993.i
+	br label %glgScalarFloatConversion.exit
+UnifiedReturnBlock.i235:		; preds = %bb2883.i, %bb2872.i
+	br label %glgScalarFloatConversion.exit
+glgScalarFloatConversion.exit:		; preds = %UnifiedReturnBlock.i235, %bb5036.i, %bb4999.i, %bb4956.i, %bb4769.i, %bb4575.i, %bb4517.i202, %bb4497.i, %bb4484.i, %bb4471.i, %bb4461.i, %bb4178.i, %bb4145.i, %bb4117.i, %bb4083.i, %bb3941.i, %bb3794.i, %bb3749.i, %bb3729.i, %bb3716.i, %bb3703.i, %bb3693.i, %bb3490.i, %bb3475.i, %bb3429.i, %bb3376.i, %bb3357.i, %bb3337.i, %bb3324.i, %bb3311.i, %bb3301.i, %bb3258.i, %bb3011.i, %bb2972.i
+	br label %bb18851.i
+bb16697.i:		; preds = %loadColor_BGRA_UI8888R.exit
+	br i1 false, label %bb17749.i, label %bb16700.i
+bb16700.i:		; preds = %bb16697.i
+	switch i32 0, label %bb16829.i [
+		 i32 4, label %bb16705.i
+		 i32 8, label %bb16743.i
+		 i32 11, label %bb16795.i
+	]
+bb16705.i:		; preds = %bb16700.i
+	switch i32 %df4081.0.i, label %bb17183.i [
+		 i32 1, label %bb16710.i
+		 i32 2, label %bb16721.i
+		 i32 3, label %bb16732.i
+	]
+bb16710.i:		; preds = %bb16705.i
+	br label %bb17195.i
+bb16721.i:		; preds = %bb16705.i
+	br label %bb17195.i
+bb16732.i:		; preds = %bb16705.i
+	br label %bb17195.i
+bb16743.i:		; preds = %bb16700.i
+	switch i32 0, label %bb16759.i [
+		 i32 4, label %bb16755.i
+		 i32 11, label %bb16755.i
+	]
+bb16755.i:		; preds = %bb16743.i, %bb16743.i
+	br label %bb17195.i
+bb16759.i:		; preds = %bb16743.i
+	switch i32 %df4081.0.i, label %bb17183.i [
+		 i32 1, label %bb16764.i
+		 i32 2, label %bb16775.i
+		 i32 3, label %bb16786.i
+	]
+bb16764.i:		; preds = %bb16759.i
+	br label %bb17195.i
+bb16775.i:		; preds = %bb16759.i
+	br label %bb17195.i
+bb16786.i:		; preds = %bb16759.i
+	br label %bb17195.i
+bb16795.i:		; preds = %bb16700.i
+	switch i32 0, label %bb17183.i [
+		 i32 4, label %bb16807.i
+		 i32 8, label %bb16807.i
+		 i32 3, label %bb16823.i
+	]
+bb16807.i:		; preds = %bb16795.i, %bb16795.i
+	br label %bb17195.i
+bb16823.i:		; preds = %bb16795.i
+	br label %bb17195.i
+bb16829.i:		; preds = %bb16700.i
+	switch i32 %sf4083.0.i, label %bb17183.i [
+		 i32 10, label %bb16834.i
+		 i32 0, label %bb16892.i
+		 i32 1, label %bb16953.i
+		 i32 2, label %bb17037.i
+		 i32 3, label %bb17121.i
+	]
+bb16834.i:		; preds = %bb16829.i
+	switch i32 0, label %bb16878.i [
+		 i32 4, label %bb16839.i
+		 i32 8, label %bb16858.i
+		 i32 11, label %bb16874.i
+	]
+bb16839.i:		; preds = %bb16834.i
+	br label %bb17195.i
+bb16858.i:		; preds = %bb16834.i
+	br label %bb17195.i
+bb16874.i:		; preds = %bb16834.i
+	br label %bb17195.i
+bb16878.i:		; preds = %bb16834.i
+	br i1 false, label %bb16883.i, label %bb17183.i
+bb16883.i:		; preds = %bb16878.i
+	br label %bb17195.i
+bb16892.i:		; preds = %bb16829.i
+	switch i32 0, label %bb16930.i [
+		 i32 4, label %bb16897.i
+		 i32 8, label %bb16913.i
+		 i32 11, label %bb16926.i
+	]
+bb16897.i:		; preds = %bb16892.i
+	br label %bb17195.i
+bb16913.i:		; preds = %bb16892.i
+	br label %bb17195.i
+bb16926.i:		; preds = %bb16892.i
+	br label %bb17195.i
+bb16930.i:		; preds = %bb16892.i
+	br i1 false, label %bb16936.i, label %bb16939.i
+bb16936.i:		; preds = %bb16930.i
+	br label %bb17195.i
+bb16939.i:		; preds = %bb16930.i
+	br i1 false, label %bb16944.i, label %bb17183.i
+bb16944.i:		; preds = %bb16939.i
+	br label %bb17195.i
+bb16953.i:		; preds = %bb16829.i
+	switch i32 0, label %bb17003.i [
+		 i32 4, label %bb16958.i
+		 i32 8, label %bb16979.i
+		 i32 11, label %bb16997.i
+	]
+bb16958.i:		; preds = %bb16953.i
+	br label %bb17195.i
+bb16979.i:		; preds = %bb16953.i
+	br label %bb17195.i
+bb16997.i:		; preds = %bb16953.i
+	br label %bb17195.i
+bb17003.i:		; preds = %bb16953.i
+	switch i32 %df4081.0.i, label %bb17183.i [
+		 i32 0, label %bb17020.i
+		 i32 2, label %bb17020.i
+		 i32 10, label %bb17020.i
+		 i32 3, label %bb17028.i
+	]
+bb17020.i:		; preds = %bb17003.i, %bb17003.i, %bb17003.i
+	br label %bb17195.i
+bb17028.i:		; preds = %bb17003.i
+	br label %bb17195.i
+bb17037.i:		; preds = %bb16829.i
+	switch i32 0, label %bb17087.i [
+		 i32 4, label %bb17042.i
+		 i32 8, label %bb17063.i
+		 i32 11, label %bb17081.i
+	]
+bb17042.i:		; preds = %bb17037.i
+	br label %bb17195.i
+bb17063.i:		; preds = %bb17037.i
+	br label %bb17195.i
+bb17081.i:		; preds = %bb17037.i
+	br label %bb17195.i
+bb17087.i:		; preds = %bb17037.i
+	switch i32 %df4081.0.i, label %bb17183.i [
+		 i32 0, label %bb17104.i
+		 i32 1, label %bb17104.i
+		 i32 10, label %bb17104.i
+		 i32 3, label %bb17112.i
+	]
+bb17104.i:		; preds = %bb17087.i, %bb17087.i, %bb17087.i
+	br label %bb17195.i
+bb17112.i:		; preds = %bb17087.i
+	br label %bb17195.i
+bb17121.i:		; preds = %bb16829.i
+	switch i32 0, label %bb17183.i [
+		 i32 4, label %bb17126.i
+		 i32 8, label %bb17149.i
+		 i32 11, label %bb17167.i
+		 i32 10, label %bb17180.i
+	]
+bb17126.i:		; preds = %bb17121.i
+	br label %bb17195.i
+bb17149.i:		; preds = %bb17121.i
+	br label %bb17195.i
+bb17167.i:		; preds = %bb17121.i
+	br label %bb17195.i
+bb17180.i:		; preds = %bb17121.i
+	br label %bb17183.i
+bb17183.i:		; preds = %bb17180.i, %bb17121.i, %bb17087.i, %bb17003.i, %bb16939.i, %bb16878.i, %bb16829.i, %bb16795.i, %bb16759.i, %bb16705.i
+	br label %bb17195.i
+bb17195.i:		; preds = %bb17183.i, %bb17167.i, %bb17149.i, %bb17126.i, %bb17112.i, %bb17104.i, %bb17081.i, %bb17063.i, %bb17042.i, %bb17028.i, %bb17020.i, %bb16997.i, %bb16979.i, %bb16958.i, %bb16944.i, %bb16936.i, %bb16926.i, %bb16913.i, %bb16897.i, %bb16883.i, %bb16874.i, %bb16858.i, %bb16839.i, %bb16823.i, %bb16807.i, %bb16786.i, %bb16775.i, %bb16764.i, %bb16755.i, %bb16732.i, %bb16721.i, %bb16710.i
+	br i1 false, label %bb18845.i, label %bb17225.i
+bb17225.i:		; preds = %bb17195.i
+	switch i32 %dt4080.0.i, label %bb17677.i [
+		 i32 4, label %bb17227.i
+		 i32 8, label %bb17259.i
+		 i32 9, label %bb17309.i
+		 i32 10, label %bb17359.i
+		 i32 11, label %bb17359.i
+		 i32 14, label %bb17409.i
+		 i32 15, label %bb17474.i
+		 i32 18, label %bb17539.i
+		 i32 19, label %bb17604.i
+		 i32 0, label %bb17680.i
+		 i32 1, label %bb17672.i
+		 i32 2, label %bb17673.i
+		 i32 3, label %bb17674.i
+		 i32 5, label %bb17675.i
+		 i32 12, label %bb17676.i
+		 i32 13, label %bb17676.i
+		 i32 16, label %bb17680.i
+		 i32 17, label %bb17680.i
+	]
+bb17227.i:		; preds = %bb17225.i
+	br i1 false, label %bb18845.i, label %bb17230.i
+bb17230.i:		; preds = %bb17227.i
+	br label %bb18851.i
+bb17259.i:		; preds = %bb17225.i
+	br i1 false, label %bb17284.i, label %bb17262.i
+bb17262.i:		; preds = %bb17259.i
+	br label %bb17284.i
+bb17284.i:		; preds = %bb17262.i, %bb17259.i
+	br label %bb18851.i
+bb17309.i:		; preds = %bb17225.i
+	br i1 false, label %bb17334.i, label %bb17312.i
+bb17312.i:		; preds = %bb17309.i
+	br label %bb17334.i
+bb17334.i:		; preds = %bb17312.i, %bb17309.i
+	br label %bb18851.i
+bb17359.i:		; preds = %bb17225.i, %bb17225.i
+	br i1 false, label %bb17384.i, label %bb17362.i
+bb17362.i:		; preds = %bb17359.i
+	br label %bb17384.i
+bb17384.i:		; preds = %bb17362.i, %bb17359.i
+	br label %bb18851.i
+bb17409.i:		; preds = %bb17225.i
+	br i1 false, label %bb17441.i, label %bb17412.i
+bb17412.i:		; preds = %bb17409.i
+	br label %bb17441.i
+bb17441.i:		; preds = %bb17412.i, %bb17409.i
+	br label %bb18851.i
+bb17474.i:		; preds = %bb17225.i
+	br i1 false, label %bb17506.i, label %bb17477.i
+bb17477.i:		; preds = %bb17474.i
+	br label %bb17506.i
+bb17506.i:		; preds = %bb17477.i, %bb17474.i
+	br label %bb18851.i
+bb17539.i:		; preds = %bb17225.i
+	br i1 false, label %bb17571.i, label %bb17542.i
+bb17542.i:		; preds = %bb17539.i
+	br label %bb17571.i
+bb17571.i:		; preds = %bb17542.i, %bb17539.i
+	br label %bb18851.i
+bb17604.i:		; preds = %bb17225.i
+	br i1 false, label %bb17636.i, label %bb17607.i
+bb17607.i:		; preds = %bb17604.i
+	br label %bb17636.i
+bb17636.i:		; preds = %bb17607.i, %bb17604.i
+	br label %bb18851.i
+bb17672.i:		; preds = %bb17225.i
+	br i1 false, label %bb17716.i, label %bb17683.i
+bb17673.i:		; preds = %bb17225.i
+	br i1 false, label %bb17716.i, label %bb17683.i
+bb17674.i:		; preds = %bb17225.i
+	br i1 false, label %bb17716.i, label %bb17683.i
+bb17675.i:		; preds = %bb17225.i
+	br i1 false, label %bb17716.i, label %bb17683.i
+bb17676.i:		; preds = %bb17225.i, %bb17225.i
+	br i1 false, label %bb17716.i, label %bb17683.i
+bb17677.i:		; preds = %bb17225.i
+	unreachable
+bb17680.i:		; preds = %bb17225.i, %bb17225.i, %bb17225.i
+	br i1 false, label %bb17716.i, label %bb17683.i
+bb17683.i:		; preds = %bb17680.i, %bb17676.i, %bb17675.i, %bb17674.i, %bb17673.i, %bb17672.i
+	br label %bb17716.i
+bb17716.i:		; preds = %bb17683.i, %bb17680.i, %bb17676.i, %bb17675.i, %bb17674.i, %bb17673.i, %bb17672.i
+	br label %bb18851.i
+bb17749.i:		; preds = %bb16697.i
+	br i1 false, label %bb17757.i, label %bb17903.i
+bb17757.i:		; preds = %bb17749.i
+	switch i32 0, label %bb17903.i [
+		 i32 0, label %bb17759.i
+		 i32 1, label %bb17853.i
+		 i32 2, label %bb17853.i
+	]
+bb17759.i:		; preds = %bb17757.i
+	br i1 false, label %bb17764.i, label %bb17772.i
+bb17764.i:		; preds = %bb17759.i
+	br label %bb18032.i
+bb17772.i:		; preds = %bb17759.i
+	switch i32 %sf4083.0.i, label %bb17798.i [
+		 i32 1, label %bb17777.i
+		 i32 2, label %bb17790.i
+	]
+bb17777.i:		; preds = %bb17772.i
+	switch i32 0, label %bb18032.i [
+		 i32 4, label %bb17818.i
+		 i32 8, label %bb17818.i
+		 i32 11, label %bb17845.i
+	]
+bb17790.i:		; preds = %bb17772.i
+	switch i32 0, label %bb18032.i [
+		 i32 4, label %bb17818.i
+		 i32 8, label %bb17818.i
+		 i32 11, label %bb17845.i
+	]
+bb17798.i:		; preds = %bb17772.i
+	switch i32 0, label %bb18032.i [
+		 i32 4, label %bb17818.i
+		 i32 8, label %bb17818.i
+		 i32 11, label %bb17845.i
+	]
+bb17818.i:		; preds = %bb17798.i, %bb17798.i, %bb17790.i, %bb17790.i, %bb17777.i, %bb17777.i
+	switch i32 0, label %bb18032.i [
+		 i32 4, label %bb17845.i
+		 i32 11, label %bb17845.i
+		 i32 8, label %bb17946.i
+	]
+bb17845.i:		; preds = %bb17818.i, %bb17818.i, %bb17798.i, %bb17790.i, %bb17777.i
+	switch i32 0, label %bb18032.i [
+		 i32 4, label %bb17908.i
+		 i32 8, label %bb17946.i
+		 i32 11, label %bb17998.i
+	]
+bb17853.i:		; preds = %bb17757.i, %bb17757.i
+	br i1 false, label %bb17890.i, label %bb17903.i
+bb17890.i:		; preds = %bb17853.i
+	br label %bb17903.i
+bb17903.i:		; preds = %bb17890.i, %bb17853.i, %bb17757.i, %bb17749.i
+	switch i32 0, label %bb18032.i [
+		 i32 4, label %bb17908.i
+		 i32 8, label %bb17946.i
+		 i32 11, label %bb17998.i
+	]
+bb17908.i:		; preds = %bb17903.i, %bb17845.i
+	switch i32 %df4081.0.i, label %bb18386.i [
+		 i32 1, label %bb17913.i
+		 i32 2, label %bb17924.i
+		 i32 3, label %bb17935.i
+	]
+bb17913.i:		; preds = %bb17908.i
+	br label %bb18398.i
+bb17924.i:		; preds = %bb17908.i
+	br label %bb18398.i
+bb17935.i:		; preds = %bb17908.i
+	br label %bb18398.i
+bb17946.i:		; preds = %bb17903.i, %bb17845.i, %bb17818.i
+	switch i32 0, label %bb17962.i [
+		 i32 4, label %bb17958.i
+		 i32 11, label %bb17958.i
+	]
+bb17958.i:		; preds = %bb17946.i, %bb17946.i
+	br label %bb18398.i
+bb17962.i:		; preds = %bb17946.i
+	switch i32 %df4081.0.i, label %bb18386.i [
+		 i32 1, label %bb17967.i
+		 i32 2, label %bb17978.i
+		 i32 3, label %bb17989.i
+	]
+bb17967.i:		; preds = %bb17962.i
+	br label %bb18398.i
+bb17978.i:		; preds = %bb17962.i
+	br label %bb18398.i
+bb17989.i:		; preds = %bb17962.i
+	br label %bb18398.i
+bb17998.i:		; preds = %bb17903.i, %bb17845.i
+	switch i32 0, label %bb18386.i [
+		 i32 4, label %bb18010.i
+		 i32 8, label %bb18010.i
+		 i32 3, label %bb18026.i
+	]
+bb18010.i:		; preds = %bb17998.i, %bb17998.i
+	br label %bb18398.i
+bb18026.i:		; preds = %bb17998.i
+	br label %bb18398.i
+bb18032.i:		; preds = %bb17903.i, %bb17845.i, %bb17818.i, %bb17798.i, %bb17790.i, %bb17777.i, %bb17764.i
+	switch i32 %sf4083.0.i, label %bb18386.i [
+		 i32 10, label %bb18037.i
+		 i32 0, label %bb18095.i
+		 i32 1, label %bb18156.i
+		 i32 2, label %bb18240.i
+		 i32 3, label %bb18324.i
+	]
+bb18037.i:		; preds = %bb18032.i
+	switch i32 0, label %bb18081.i [
+		 i32 4, label %bb18042.i
+		 i32 8, label %bb18061.i
+		 i32 11, label %bb18077.i
+	]
+bb18042.i:		; preds = %bb18037.i
+	br label %bb18398.i
+bb18061.i:		; preds = %bb18037.i
+	br label %bb18398.i
+bb18077.i:		; preds = %bb18037.i
+	br label %bb18398.i
+bb18081.i:		; preds = %bb18037.i
+	br i1 false, label %bb18086.i, label %bb18386.i
+bb18086.i:		; preds = %bb18081.i
+	br label %bb18398.i
+bb18095.i:		; preds = %bb18032.i
+	switch i32 0, label %bb18133.i [
+		 i32 4, label %bb18100.i
+		 i32 8, label %bb18116.i
+		 i32 11, label %bb18129.i
+	]
+bb18100.i:		; preds = %bb18095.i
+	br label %bb18398.i
+bb18116.i:		; preds = %bb18095.i
+	br label %bb18398.i
+bb18129.i:		; preds = %bb18095.i
+	br label %bb18398.i
+bb18133.i:		; preds = %bb18095.i
+	br i1 false, label %bb18139.i, label %bb18142.i
+bb18139.i:		; preds = %bb18133.i
+	br label %bb18398.i
+bb18142.i:		; preds = %bb18133.i
+	br i1 false, label %bb18147.i, label %bb18386.i
+bb18147.i:		; preds = %bb18142.i
+	br label %bb18398.i
+bb18156.i:		; preds = %bb18032.i
+	switch i32 0, label %bb18206.i [
+		 i32 4, label %bb18161.i
+		 i32 8, label %bb18182.i
+		 i32 11, label %bb18200.i
+	]
+bb18161.i:		; preds = %bb18156.i
+	br label %bb18398.i
+bb18182.i:		; preds = %bb18156.i
+	br label %bb18398.i
+bb18200.i:		; preds = %bb18156.i
+	br label %bb18398.i
+bb18206.i:		; preds = %bb18156.i
+	switch i32 %df4081.0.i, label %bb18386.i [
+		 i32 0, label %bb18223.i
+		 i32 2, label %bb18223.i
+		 i32 10, label %bb18223.i
+		 i32 3, label %bb18231.i
+	]
+bb18223.i:		; preds = %bb18206.i, %bb18206.i, %bb18206.i
+	br label %bb18398.i
+bb18231.i:		; preds = %bb18206.i
+	br label %bb18398.i
+bb18240.i:		; preds = %bb18032.i
+	switch i32 0, label %bb18290.i [
+		 i32 4, label %bb18245.i
+		 i32 8, label %bb18266.i
+		 i32 11, label %bb18284.i
+	]
+bb18245.i:		; preds = %bb18240.i
+	br label %bb18398.i
+bb18266.i:		; preds = %bb18240.i
+	br label %bb18398.i
+bb18284.i:		; preds = %bb18240.i
+	br label %bb18398.i
+bb18290.i:		; preds = %bb18240.i
+	switch i32 %df4081.0.i, label %bb18386.i [
+		 i32 0, label %bb18307.i
+		 i32 1, label %bb18307.i
+		 i32 10, label %bb18307.i
+		 i32 3, label %bb18315.i
+	]
+bb18307.i:		; preds = %bb18290.i, %bb18290.i, %bb18290.i
+	br label %bb18398.i
+bb18315.i:		; preds = %bb18290.i
+	br label %bb18398.i
+bb18324.i:		; preds = %bb18032.i
+	switch i32 0, label %bb18386.i [
+		 i32 4, label %bb18329.i
+		 i32 8, label %bb18352.i
+		 i32 11, label %bb18370.i
+		 i32 10, label %bb18383.i
+	]
+bb18329.i:		; preds = %bb18324.i
+	br label %bb18398.i
+bb18352.i:		; preds = %bb18324.i
+	br label %bb18398.i
+bb18370.i:		; preds = %bb18324.i
+	br label %bb18398.i
+bb18383.i:		; preds = %bb18324.i
+	br label %bb18386.i
+bb18386.i:		; preds = %bb18383.i, %bb18324.i, %bb18290.i, %bb18206.i, %bb18142.i, %bb18081.i, %bb18032.i, %bb17998.i, %bb17962.i, %bb17908.i
+	br label %bb18398.i
+bb18398.i:		; preds = %bb18386.i, %bb18370.i, %bb18352.i, %bb18329.i, %bb18315.i, %bb18307.i, %bb18284.i, %bb18266.i, %bb18245.i, %bb18231.i, %bb18223.i, %bb18200.i, %bb18182.i, %bb18161.i, %bb18147.i, %bb18139.i, %bb18129.i, %bb18116.i, %bb18100.i, %bb18086.i, %bb18077.i, %bb18061.i, %bb18042.i, %bb18026.i, %bb18010.i, %bb17989.i, %bb17978.i, %bb17967.i, %bb17958.i, %bb17935.i, %bb17924.i, %bb17913.i
+	br i1 false, label %bb18589.i, label %bb18431.i
+bb18431.i:		; preds = %bb18398.i
+	switch i32 0, label %bb18589.i [
+		 i32 0, label %bb18433.i
+		 i32 1, label %bb18487.i
+		 i32 2, label %bb18487.i
+	]
+bb18433.i:		; preds = %bb18431.i
+	switch i32 0, label %bb18589.i [
+		 i32 4, label %bb18452.i
+		 i32 8, label %bb18452.i
+		 i32 11, label %bb18479.i
+	]
+bb18452.i:		; preds = %bb18433.i, %bb18433.i
+	switch i32 0, label %bb18589.i [
+		 i32 4, label %bb18479.i
+		 i32 11, label %bb18479.i
+	]
+bb18479.i:		; preds = %bb18452.i, %bb18452.i, %bb18433.i
+	br i1 false, label %bb18845.i, label %bb18592.i
+bb18487.i:		; preds = %bb18431.i, %bb18431.i
+	br i1 false, label %bb18492.i, label %bb18521.i
+bb18492.i:		; preds = %bb18487.i
+	br i1 false, label %bb18508.i, label %bb18529.i
+bb18508.i:		; preds = %bb18492.i
+	switch i32 0, label %bb18589.i [
+		 i32 4, label %bb18541.i
+		 i32 8, label %bb18541.i
+	]
+bb18521.i:		; preds = %bb18487.i
+	br label %bb18529.i
+bb18529.i:		; preds = %bb18521.i, %bb18492.i
+	switch i32 0, label %bb18589.i [
+		 i32 4, label %bb18541.i
+		 i32 8, label %bb18541.i
+	]
+bb18541.i:		; preds = %bb18529.i, %bb18529.i, %bb18508.i, %bb18508.i
+	br i1 false, label %bb18560.i, label %bb18589.i
+bb18560.i:		; preds = %bb18541.i
+	br i1 false, label %bb18576.i, label %bb18589.i
+bb18576.i:		; preds = %bb18560.i
+	br label %bb18589.i
+bb18589.i:		; preds = %bb18576.i, %bb18560.i, %bb18541.i, %bb18529.i, %bb18508.i, %bb18452.i, %bb18433.i, %bb18431.i, %bb18398.i
+	br i1 false, label %bb18845.i, label %bb18592.i
+bb18592.i:		; preds = %bb18589.i, %bb18479.i
+	switch i32 %dt4080.0.i, label %bb18809.i [
+		 i32 4, label %bb18845.i
+		 i32 8, label %bb18594.i
+		 i32 9, label %bb18619.i
+		 i32 10, label %bb18644.i
+		 i32 11, label %bb18644.i
+		 i32 14, label %bb18669.i
+		 i32 15, label %bb18702.i
+		 i32 18, label %bb18735.i
+		 i32 19, label %bb18768.i
+		 i32 0, label %bb18812.i
+		 i32 1, label %bb18804.i
+		 i32 2, label %bb18805.i
+		 i32 3, label %bb18806.i
+		 i32 5, label %bb18807.i
+		 i32 12, label %bb18808.i
+		 i32 13, label %bb18808.i
+		 i32 16, label %bb18812.i
+		 i32 17, label %bb18812.i
+	]
+bb18594.i:		; preds = %bb18592.i
+	br label %bb18851.i
+bb18619.i:		; preds = %bb18592.i
+	br label %bb18851.i
+bb18644.i:		; preds = %bb18592.i, %bb18592.i
+	br label %bb18851.i
+bb18669.i:		; preds = %bb18592.i
+	br label %bb18851.i
+bb18702.i:		; preds = %bb18592.i
+	br label %bb18851.i
+bb18735.i:		; preds = %bb18592.i
+	br label %bb18851.i
+bb18768.i:		; preds = %bb18592.i
+	br label %bb18851.i
+bb18804.i:		; preds = %bb18592.i
+	br label %bb18812.i
+bb18805.i:		; preds = %bb18592.i
+	br label %bb18812.i
+bb18806.i:		; preds = %bb18592.i
+	br label %bb18812.i
+bb18807.i:		; preds = %bb18592.i
+	br label %bb18812.i
+bb18808.i:		; preds = %bb18592.i, %bb18592.i
+	br label %bb18812.i
+bb18809.i:		; preds = %bb18592.i
+	unreachable
+bb18812.i:		; preds = %bb18808.i, %bb18807.i, %bb18806.i, %bb18805.i, %bb18804.i, %bb18592.i, %bb18592.i, %bb18592.i
+	br label %bb18845.i
+bb18845.i:		; preds = %bb18812.i, %bb18592.i, %bb18589.i, %bb18479.i, %bb17227.i, %bb17195.i
+	br label %bb18851.i
+bb18851.i:		; preds = %bb18845.i, %bb18768.i, %bb18735.i, %bb18702.i, %bb18669.i, %bb18644.i, %bb18619.i, %bb18594.i, %bb17716.i, %bb17636.i, %bb17571.i, %bb17506.i, %bb17441.i, %bb17384.i, %bb17334.i, %bb17284.i, %bb17230.i, %glgScalarFloatConversion.exit
+	br label %storeColor_RGB_UI.exit
+storeColor_RGB_UI.exit:		; preds = %bb18851.i
+	br i1 false, label %bb19786.i, label %bb16650.i
+bb19786.i:		; preds = %storeColor_RGB_UI.exit
+	br label %bb19808.i
+bb19808.i:		; preds = %bb19786.i
+	br i1 false, label %bb19818.i, label %bb5276.i
+bb19818.i:		; preds = %bb19808.i
+	br i1 false, label %bb19840.i, label %bb19821.i
+bb19821.i:		; preds = %bb19818.i
+	br label %bb19840.i
+bb19840.i:		; preds = %bb19821.i, %bb19818.i
+	br i1 false, label %UnifiedReturnBlock.i, label %bb19843.i
+bb19843.i:		; preds = %bb19840.i
+	br label %t.exit
+UnifiedReturnBlock.i:		; preds = %bb19840.i, %bb4501.i
+	br label %t.exit
+t.exit:		; preds = %UnifiedReturnBlock.i, %bb19843.i, %bb4517.i, %bb4354.i
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll b/final/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
new file mode 100644
index 00000000000..71aa6037a13
--- /dev/null
+++ b/final/test/CodeGen/ARM/2008-05-19-LiveIntervalsBug.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+	%struct.BiContextType = type { i16, i8, i32 }
+	%struct.Bitstream = type { i32, i32, i8, i32, i32, i8, i8, i32, i32, i8*, i32 }
+	%struct.DataPartition = type { %struct.Bitstream*, %struct.EncodingEnvironment, %struct.EncodingEnvironment }
+	%struct.DecRefPicMarking_t = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_t* }
+	%struct.EncodingEnvironment = type { i32, i32, i32, i32, i32, i8*, i32*, i32, i32 }
+	%struct.ImageParameters = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i8**, i32, i32***, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [9 x [16 x [16 x i16]]], [5 x [16 x [16 x i16]]], [9 x [8 x [8 x i16]]], [2 x [4 x [16 x [16 x i16]]]], [16 x [16 x i16]], [16 x [16 x i32]], i32****, i32***, i32***, i32***, i32****, i32****, %struct.Picture*, %struct.Slice*, %struct.Macroblock*, i32*, i32*, i32, i32, i32, i32, [4 x [4 x i32]], i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i16******, i16******, i16******, i16******, [15 x i16], i32, i32, i32, i32, i32, i32, i32, i32, [6 x [32 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [1 x i32], i32, i32, [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking_t*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, double**, double***, i32***, double**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [2 x i32], i32, i32, i16, i32, i32, i32, i32, i32 }
+	%struct.Macroblock = type { i32, i32, i32, [2 x i32], i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.MotionInfoContexts = type { [3 x [11 x %struct.BiContextType]], [2 x [9 x %struct.BiContextType]], [2 x [10 x %struct.BiContextType]], [2 x [6 x %struct.BiContextType]], [4 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x %struct.BiContextType] }
+	%struct.Picture = type { i32, i32, [100 x %struct.Slice*], i32, float, float, float }
+	%struct.Slice = type { i32, i32, i32, i32, i32, i32, %struct.DataPartition*, %struct.MotionInfoContexts*, %struct.TextureInfoContexts*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (i32)*, [3 x [2 x i32]] }
+	%struct.TextureInfoContexts = type { [2 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x [4 x %struct.BiContextType]], [10 x [4 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]] }
+@images = external global %struct.ImageParameters		; <%struct.ImageParameters*> [#uses=2]
+
+declare i8* @calloc(i32, i32)
+
+define fastcc void @init_global_buffers() nounwind {
+entry:
+	%tmp50.i.i = mul i32 0, 0		; <i32> [#uses=2]
+	br i1 false, label %init_orig_buffers.exit, label %cond_true.i29
+
+cond_true.i29:		; preds = %entry
+	%tmp17.i = load i32* getelementptr (%struct.ImageParameters* @images, i32 0, i32 20), align 8		; <i32> [#uses=1]
+	%tmp20.i27 = load i32* getelementptr (%struct.ImageParameters* @images, i32 0, i32 16), align 8		; <i32> [#uses=1]
+	%tmp8.i.i = select i1 false, i32 1, i32 0		; <i32> [#uses=1]
+	br label %bb.i8.us.i
+
+bb.i8.us.i:		; preds = %get_mem2Dpel.exit.i.us.i, %cond_true.i29
+	%j.04.i.us.i = phi i32 [ %indvar.next39.i, %get_mem2Dpel.exit.i.us.i ], [ 0, %cond_true.i29 ]		; <i32> [#uses=2]
+	%tmp13.i.us.i = getelementptr i16*** null, i32 %j.04.i.us.i		; <i16***> [#uses=0]
+	%tmp15.i.i.us.i = tail call i8* @calloc( i32 0, i32 2 )		; <i8*> [#uses=0]
+	store i16* null, i16** null, align 4
+	br label %bb.i.i.us.i
+
+get_mem2Dpel.exit.i.us.i:		; preds = %bb.i.i.us.i
+	%indvar.next39.i = add i32 %j.04.i.us.i, 1		; <i32> [#uses=2]
+	%exitcond40.i = icmp eq i32 %indvar.next39.i, 2		; <i1> [#uses=1]
+	br i1 %exitcond40.i, label %get_mem3Dpel.exit.split.i, label %bb.i8.us.i
+
+bb.i.i.us.i:		; preds = %bb.i.i.us.i, %bb.i8.us.i
+	%exitcond.i = icmp eq i32 0, %tmp8.i.i		; <i1> [#uses=1]
+	br i1 %exitcond.i, label %get_mem2Dpel.exit.i.us.i, label %bb.i.i.us.i
+
+get_mem3Dpel.exit.split.i:		; preds = %get_mem2Dpel.exit.i.us.i
+	%tmp30.i.i = shl i32 %tmp17.i, 2		; <i32> [#uses=1]
+	%tmp31.i.i = mul i32 %tmp30.i.i, %tmp20.i27		; <i32> [#uses=1]
+	%tmp23.i31 = add i32 %tmp31.i.i, %tmp50.i.i		; <i32> [#uses=1]
+	br label %init_orig_buffers.exit
+
+init_orig_buffers.exit:		; preds = %get_mem3Dpel.exit.split.i, %entry
+	%memory_size.0.i = phi i32 [ %tmp23.i31, %get_mem3Dpel.exit.split.i ], [ %tmp50.i.i, %entry ]		; <i32> [#uses=1]
+	%tmp41 = add i32 0, %memory_size.0.i		; <i32> [#uses=0]
+	unreachable
+}
diff --git a/final/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll b/final/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
new file mode 100644
index 00000000000..aa61d86e138
--- /dev/null
+++ b/final/test/CodeGen/ARM/2008-05-19-ScavengerAssert.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+	%struct.Decoders = type { i32**, i16***, i16****, i16***, i16**, i8**, i8** }
+@decoders = external global %struct.Decoders		; <%struct.Decoders*> [#uses=1]
+
+declare i8* @calloc(i32, i32)
+
+declare fastcc i32 @get_mem2Dint(i32***, i32, i32)
+
+define fastcc void @init_global_buffers() nounwind {
+entry:
+	%tmp151 = tail call fastcc i32 @get_mem2Dint( i32*** getelementptr (%struct.Decoders* @decoders, i32 0, i32 0), i32 16, i32 16 )		; <i32> [#uses=1]
+	%tmp158 = tail call i8* @calloc( i32 0, i32 4 )		; <i8*> [#uses=0]
+	br i1 false, label %cond_true166, label %bb190.preheader
+
+bb190.preheader:		; preds = %entry
+	%memory_size.3555 = add i32 0, %tmp151		; <i32> [#uses=0]
+	unreachable
+
+cond_true166:		; preds = %entry
+	unreachable
+}
diff --git a/final/test/CodeGen/ARM/2008-07-17-Fdiv.ll b/final/test/CodeGen/ARM/2008-07-17-Fdiv.ll
new file mode 100644
index 00000000000..4cb768ef5b6
--- /dev/null
+++ b/final/test/CodeGen/ARM/2008-07-17-Fdiv.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm
+
+define float @f(float %a, float %b) nounwind  {
+	%tmp = fdiv float %a, %b
+	ret float %tmp
+}
diff --git a/final/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll b/final/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
new file mode 100644
index 00000000000..83fde07779b
--- /dev/null
+++ b/final/test/CodeGen/ARM/2008-07-24-CodeGenPrepCrash.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm
+; PR2589
+
+define void @main({ i32 }*) {
+entry:
+	%sret1 = alloca { i32 }		; <{ i32 }*> [#uses=1]
+	load { i32 }* %sret1		; <{ i32 }>:1 [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll b/final/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
new file mode 100644
index 00000000000..adb01127760
--- /dev/null
+++ b/final/test/CodeGen/ARM/2008-08-07-AsmPrintBug.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 -relocation-model=pic | grep comm
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__gcov_var = type { %struct.FILE*, i32, i32, i32, i32, i32, i32, [1025 x i32] }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+@__gcov_var = common global %struct.__gcov_var zeroinitializer		; <%struct.__gcov_var*> [#uses=1]
+
+define i32 @__gcov_close() nounwind {
+entry:
+	load i32* getelementptr (%struct.__gcov_var* @__gcov_var, i32 0, i32 5), align 4		; <i32>:0 [#uses=1]
+	ret i32 %0
+}
diff --git a/final/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll b/final/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll
new file mode 100644
index 00000000000..5f9d9aea58d
--- /dev/null
+++ b/final/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+@"\01LC1" = external constant [288 x i8]		; <[288 x i8]*> [#uses=1]
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	br label %bb.i
+
+bb.i:		; preds = %bb.i, %entry
+	%i.01.i = phi i32 [ 0, %entry ], [ %indvar.next52, %bb.i ]		; <i32> [#uses=1]
+	%indvar.next52 = add i32 %i.01.i, 1		; <i32> [#uses=2]
+	%exitcond53 = icmp eq i32 %indvar.next52, 15		; <i1> [#uses=1]
+	br i1 %exitcond53, label %bb.i33.loopexit, label %bb.i
+
+bb.i33.loopexit:		; preds = %bb.i
+	%0 = malloc [347 x i8]		; <[347 x i8]*> [#uses=2]
+	%.sub = getelementptr [347 x i8]* %0, i32 0, i32 0		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %.sub, i8* getelementptr ([288 x i8]* @"\01LC1", i32 0, i32 0), i32 287, i32 1 ) nounwind
+	br label %bb.i28
+
+bb.i28:		; preds = %bb.i28, %bb.i33.loopexit
+	br i1 false, label %repeat_fasta.exit, label %bb.i28
+
+repeat_fasta.exit:		; preds = %bb.i28
+	free [347 x i8]* %0
+	unreachable
+}
diff --git a/final/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll b/final/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll
new file mode 100644
index 00000000000..d3bc3e1663b
--- /dev/null
+++ b/final/test/CodeGen/ARM/2008-09-17-CoalescerBug.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define void @gcov_exit() nounwind {
+entry:
+	br i1 false, label %bb24, label %bb33.thread
+
+bb24:		; preds = %entry
+	br label %bb39
+
+bb33.thread:		; preds = %entry
+	%0 = alloca i8, i32 0		; <i8*> [#uses=1]
+	br label %bb39
+
+bb39:		; preds = %bb33.thread, %bb24
+	%.reg2mem.0 = phi i8* [ %0, %bb33.thread ], [ null, %bb24 ]		; <i8*> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll b/final/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
new file mode 100644
index 00000000000..601a516eb09
--- /dev/null
+++ b/final/test/CodeGen/ARM/2008-11-18-ScavengerAssert.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
+
+define hidden i64 @__muldi3(i64 %u, i64 %v) nounwind {
+entry:
+	%0 = trunc i64 %u to i32		; <i32> [#uses=1]
+	%asmtmp = tail call { i32, i32, i32, i32, i32 } asm "@ Inlined umul_ppmm\0A\09mov\09$2, $5, lsr #16\0A\09mov\09$0, $6, lsr #16\0A\09bic\09$3, $5, $2, lsl #16\0A\09bic\09$4, $6, $0, lsl #16\0A\09mul\09$1, $3, $4\0A\09mul\09$4, $2, $4\0A\09mul\09$3, $0, $3\0A\09mul\09$0, $2, $0\0A\09adds\09$3, $4, $3\0A\09addcs\09$0, $0, #65536\0A\09adds\09$1, $1, $3, lsl #16\0A\09adc\09$0, $0, $3, lsr #16", "=&r,=r,=&r,=&r,=r,r,r,~{cc}"(i32 %0, i32 0) nounwind		; <{ i32, i32, i32, i32, i32 }> [#uses=1]
+	%asmresult1 = extractvalue { i32, i32, i32, i32, i32 } %asmtmp, 1		; <i32> [#uses=1]
+	%asmresult116 = zext i32 %asmresult1 to i64		; <i64> [#uses=1]
+	%asmresult116.ins = or i64 0, %asmresult116		; <i64> [#uses=1]
+	%1 = lshr i64 %v, 32		; <i64> [#uses=1]
+	%2 = mul i64 %1, %u		; <i64> [#uses=1]
+	%3 = add i64 %2, 0		; <i64> [#uses=1]
+	%4 = shl i64 %3, 32		; <i64> [#uses=1]
+	%5 = add i64 %asmresult116.ins, %4		; <i64> [#uses=1]
+	ret i64 %5
+}
diff --git a/final/test/CodeGen/ARM/2009-02-16-SpillerBug.ll b/final/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
new file mode 100644
index 00000000000..4c0c59ccfbc
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-02-16-SpillerBug.ll
@@ -0,0 +1,117 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
+
+target triple = "arm-apple-darwin9"
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { { i16, i8, i8 } }
+	%struct.STYLE = type { { %struct.GAP }, { %struct.GAP }, i16, i16, i32 }
+	%struct.THIRD_UNION = type { { [2 x i32], [2 x i32] } }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, { %struct.rec* }, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.rec = type { %struct.head_type }
+@no_file_pos = external global %struct.FILE_POS		; <%struct.FILE_POS*> [#uses=1]
+@"\01LC13423" = external constant [23 x i8]		; <[23 x i8]*> [#uses=1]
+@"\01LC18972" = external constant [13 x i8]		; <[13 x i8]*> [#uses=1]
+
+define fastcc void @FlushGalley(%struct.rec* %hd) nounwind {
+entry:
+	br label %RESUME
+
+RESUME:		; preds = %bb520.preheader, %entry
+	br label %bb396
+
+bb122:		; preds = %bb396
+	switch i32 0, label %bb394 [
+		i32 1, label %bb131
+		i32 2, label %bb244
+		i32 4, label %bb244
+		i32 5, label %bb244
+		i32 6, label %bb244
+		i32 7, label %bb244
+		i32 11, label %bb244
+		i32 12, label %bb244
+		i32 15, label %bb244
+		i32 17, label %bb244
+		i32 18, label %bb244
+		i32 19, label %bb244
+		i32 20, label %bb396
+		i32 21, label %bb396
+		i32 22, label %bb396
+		i32 23, label %bb396
+		i32 24, label %bb244
+		i32 25, label %bb244
+		i32 26, label %bb244
+		i32 27, label %bb244
+		i32 28, label %bb244
+		i32 29, label %bb244
+		i32 30, label %bb244
+		i32 31, label %bb244
+		i32 32, label %bb244
+		i32 33, label %bb244
+		i32 34, label %bb244
+		i32 35, label %bb244
+		i32 36, label %bb244
+		i32 37, label %bb244
+		i32 38, label %bb244
+		i32 39, label %bb244
+		i32 40, label %bb244
+		i32 41, label %bb244
+		i32 42, label %bb244
+		i32 43, label %bb244
+		i32 44, label %bb244
+		i32 45, label %bb244
+		i32 46, label %bb244
+		i32 50, label %bb244
+		i32 51, label %bb244
+		i32 94, label %bb244
+		i32 95, label %bb244
+		i32 96, label %bb244
+		i32 97, label %bb244
+		i32 98, label %bb244
+		i32 99, label %bb244
+	]
+
+bb131:		; preds = %bb122
+	br label %bb396
+
+bb244:		; preds = %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122, %bb122
+	%0 = icmp eq %struct.rec* %stop_link.3, null		; <i1> [#uses=1]
+	br i1 %0, label %bb435, label %bb433
+
+bb394:		; preds = %bb122
+	call void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 3, i8* getelementptr ([23 x i8]* @"\01LC13423", i32 0, i32 0), i32 0, %struct.FILE_POS* @no_file_pos, i8* getelementptr ([13 x i8]* @"\01LC18972", i32 0, i32 0), i8* null) nounwind
+	br label %bb396
+
+bb396:		; preds = %bb394, %bb131, %bb122, %bb122, %bb122, %bb122, %RESUME
+	%stop_link.3 = phi %struct.rec* [ null, %RESUME ], [ %stop_link.3, %bb394 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %stop_link.3, %bb122 ], [ %link.1, %bb131 ]		; <%struct.rec*> [#uses=7]
+	%headers_seen.1 = phi i32 [ 0, %RESUME ], [ %headers_seen.1, %bb394 ], [ 1, %bb122 ], [ 1, %bb122 ], [ 1, %bb122 ], [ 1, %bb122 ], [ %headers_seen.1, %bb131 ]		; <i32> [#uses=2]
+	%link.1 = load %struct.rec** null		; <%struct.rec*> [#uses=2]
+	%1 = icmp eq %struct.rec* %link.1, %hd		; <i1> [#uses=1]
+	br i1 %1, label %bb398, label %bb122
+
+bb398:		; preds = %bb396
+	unreachable
+
+bb433:		; preds = %bb244
+	call fastcc void @Promote(%struct.rec* %hd, %struct.rec* %stop_link.3, %struct.rec* null, i32 1) nounwind
+	br label %bb435
+
+bb435:		; preds = %bb433, %bb244
+	br i1 false, label %bb491, label %bb499
+
+bb491:		; preds = %bb435
+	br label %bb499
+
+bb499:		; preds = %bb499, %bb491, %bb435
+	%2 = icmp eq %struct.rec* null, null		; <i1> [#uses=1]
+	br i1 %2, label %bb520.preheader, label %bb499
+
+bb520.preheader:		; preds = %bb499
+	br label %RESUME
+}
+
+declare fastcc void @Promote(%struct.rec*, %struct.rec*, %struct.rec* nocapture, i32) nounwind
+
+declare void @Error(i32, i32, i8*, i32, %struct.FILE_POS*, ...) nounwind
diff --git a/final/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll b/final/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
new file mode 100644
index 00000000000..a48f0033acc
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-02-22-SoftenFloatVaArg.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s
+; PR3610
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-s0:0:64-f80:32:32"
+target triple = "arm-elf"
+
+define i32 @main(i8*) nounwind {
+entry:
+	%ap = alloca i8*		; <i8**> [#uses=2]
+	store i8* %0, i8** %ap
+	%retval = alloca i32		; <i32*> [#uses=2]
+	store i32 0, i32* %retval
+	%tmp = alloca float		; <float*> [#uses=1]
+	%1 = va_arg i8** %ap, float		; <float> [#uses=1]
+	store float %1, float* %tmp
+	br label %return
+
+return:		; preds = %entry
+	%2 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %2
+}
diff --git a/final/test/CodeGen/ARM/2009-02-27-SpillerBug.ll b/final/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
new file mode 100644
index 00000000000..bc5e6023409
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-02-27-SpillerBug.ll
@@ -0,0 +1,229 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2
+
+target triple = "arm-apple-darwin9"
+@a = external global double		; <double*> [#uses=1]
+@N = external global double		; <double*> [#uses=1]
+
+declare double @llvm.exp.f64(double) nounwind readonly
+
+define fastcc void @findratio(double* nocapture %res1, double* nocapture %res2) nounwind {
+bb.thread:
+	br label %bb52
+
+bb32:		; preds = %bb52
+	%0 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%1 = add i32 %j.1, 1		; <i32> [#uses=1]
+	br label %bb52
+
+bb52:		; preds = %bb53, %bb32, %bb.thread
+	%i.3494 = phi i32 [ 0, %bb.thread ], [ %3, %bb53 ], [ %i.3494, %bb32 ]		; <i32> [#uses=2]
+	%k.4 = phi double [ %0, %bb32 ], [ 0.000000e+00, %bb53 ], [ 0.000000e+00, %bb.thread ]		; <double> [#uses=2]
+	%j.1 = phi i32 [ %1, %bb32 ], [ 0, %bb53 ], [ 0, %bb.thread ]		; <i32> [#uses=2]
+	%2 = icmp sgt i32 %j.1, 99		; <i1> [#uses=1]
+	br i1 %2, label %bb53, label %bb32
+
+bb53:		; preds = %bb52
+	%3 = add i32 %i.3494, 1		; <i32> [#uses=2]
+	%phitmp = icmp sgt i32 %3, 999999		; <i1> [#uses=1]
+	br i1 %phitmp, label %bb55, label %bb52
+
+bb55:		; preds = %bb53
+	%4 = load double* @a, align 4		; <double> [#uses=10]
+	%5 = fadd double %4, 0.000000e+00		; <double> [#uses=16]
+	%6 = fcmp ogt double %k.4, 0.000000e+00		; <i1> [#uses=1]
+	%.pn404 = fmul double %4, %4		; <double> [#uses=4]
+	%.pn402 = fmul double %5, %5		; <double> [#uses=5]
+	%.pn165.in = load double* @N		; <double> [#uses=5]
+	%.pn198 = fmul double 0.000000e+00, %5		; <double> [#uses=1]
+	%.pn185 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%.pn147 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%.pn141 = fdiv double 0.000000e+00, %4		; <double> [#uses=1]
+	%.pn142 = fdiv double 0.000000e+00, %5		; <double> [#uses=1]
+	%.pn136 = fdiv double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%.pn132 = fdiv double 0.000000e+00, %5		; <double> [#uses=1]
+	%.pn123 = fdiv double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%.pn124 = fdiv double 0.000000e+00, %.pn198		; <double> [#uses=1]
+	%.pn120 = fdiv double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%.pn117 = fdiv double 0.000000e+00, %4		; <double> [#uses=1]
+	%.pn118 = fdiv double %.pn185, %5		; <double> [#uses=1]
+	%.pn88 = fdiv double %.pn147, %5		; <double> [#uses=1]
+	%.pn81 = fsub double %.pn141, %.pn142		; <double> [#uses=1]
+	%.pn77 = fsub double 0.000000e+00, %.pn136		; <double> [#uses=1]
+	%.pn75 = fsub double 0.000000e+00, %.pn132		; <double> [#uses=1]
+	%.pn69 = fsub double %.pn123, %.pn124		; <double> [#uses=1]
+	%.pn67 = fsub double 0.000000e+00, %.pn120		; <double> [#uses=1]
+	%.pn56 = fsub double %.pn117, %.pn118		; <double> [#uses=1]
+	%.pn42 = fsub double 0.000000e+00, %.pn88		; <double> [#uses=1]
+	%.pn60 = fmul double %.pn81, 0.000000e+00		; <double> [#uses=1]
+	%.pn57 = fadd double %.pn77, 0.000000e+00		; <double> [#uses=1]
+	%.pn58 = fmul double %.pn75, %.pn165.in		; <double> [#uses=1]
+	%.pn32 = fadd double %.pn69, 0.000000e+00		; <double> [#uses=1]
+	%.pn33 = fmul double %.pn67, %.pn165.in		; <double> [#uses=1]
+	%.pn17 = fsub double 0.000000e+00, %.pn60		; <double> [#uses=1]
+	%.pn9 = fadd double %.pn57, %.pn58		; <double> [#uses=1]
+	%.pn30 = fmul double 0.000000e+00, %.pn56		; <double> [#uses=1]
+	%.pn24 = fmul double 0.000000e+00, %.pn42		; <double> [#uses=1]
+	%.pn1 = fadd double %.pn32, %.pn33		; <double> [#uses=1]
+	%.pn28 = fsub double %.pn30, 0.000000e+00		; <double> [#uses=1]
+	%.pn26 = fadd double %.pn28, 0.000000e+00		; <double> [#uses=1]
+	%.pn22 = fsub double %.pn26, 0.000000e+00		; <double> [#uses=1]
+	%.pn20 = fsub double %.pn24, 0.000000e+00		; <double> [#uses=1]
+	%.pn18 = fadd double %.pn22, 0.000000e+00		; <double> [#uses=1]
+	%.pn16 = fadd double %.pn20, 0.000000e+00		; <double> [#uses=1]
+	%.pn14 = fsub double %.pn18, 0.000000e+00		; <double> [#uses=1]
+	%.pn12 = fsub double %.pn16, %.pn17		; <double> [#uses=1]
+	%.pn10 = fadd double %.pn14, 0.000000e+00		; <double> [#uses=1]
+	%.pn8 = fadd double %.pn12, 0.000000e+00		; <double> [#uses=1]
+	%.pn6 = fsub double %.pn10, 0.000000e+00		; <double> [#uses=1]
+	%.pn4 = fsub double %.pn8, %.pn9		; <double> [#uses=1]
+	%.pn2 = fadd double %.pn6, 0.000000e+00		; <double> [#uses=1]
+	%.pn = fadd double %.pn4, 0.000000e+00		; <double> [#uses=1]
+	%N1.0 = fsub double %.pn2, 0.000000e+00		; <double> [#uses=2]
+	%D1.0 = fsub double %.pn, %.pn1		; <double> [#uses=2]
+	br i1 %6, label %bb62, label %bb64
+
+bb62:		; preds = %bb55
+	%7 = fmul double 0.000000e+00, %4		; <double> [#uses=1]
+	%8 = fsub double -0.000000e+00, %7		; <double> [#uses=3]
+	%9 = fmul double 0.000000e+00, %5		; <double> [#uses=1]
+	%10 = fsub double -0.000000e+00, %9		; <double> [#uses=3]
+	%11 = fmul double %.pn404, %4		; <double> [#uses=5]
+	%12 = fmul double %.pn402, %5		; <double> [#uses=5]
+	%13 = fmul double 0.000000e+00, -2.000000e+00		; <double> [#uses=1]
+	%14 = fdiv double 0.000000e+00, %.pn402		; <double> [#uses=1]
+	%15 = fsub double 0.000000e+00, %14		; <double> [#uses=1]
+	%16 = fmul double 0.000000e+00, %15		; <double> [#uses=1]
+	%17 = fadd double %13, %16		; <double> [#uses=1]
+	%18 = fmul double %.pn165.in, -2.000000e+00		; <double> [#uses=5]
+	%19 = fmul double %18, 0.000000e+00		; <double> [#uses=1]
+	%20 = fadd double %17, %19		; <double> [#uses=1]
+	%21 = fmul double 0.000000e+00, %20		; <double> [#uses=1]
+	%22 = fadd double 0.000000e+00, %21		; <double> [#uses=1]
+	%23 = fdiv double 0.000000e+00, %12		; <double> [#uses=1]
+	%24 = fsub double 0.000000e+00, %23		; <double> [#uses=0]
+	%25 = fmul double %18, 0.000000e+00		; <double> [#uses=1]
+	%26 = fadd double 0.000000e+00, %25		; <double> [#uses=1]
+	%27 = fmul double 0.000000e+00, %26		; <double> [#uses=1]
+	%28 = fsub double %22, %27		; <double> [#uses=1]
+	%29 = fmul double %11, %4		; <double> [#uses=1]
+	%30 = fmul double %12, %5		; <double> [#uses=3]
+	%31 = fmul double %.pn165.in, -4.000000e+00		; <double> [#uses=1]
+	%32 = fmul double %.pn165.in, 0x3FF5555555555555		; <double> [#uses=1]
+	%33 = fmul double %32, 0.000000e+00		; <double> [#uses=2]
+	%34 = fadd double %28, 0.000000e+00		; <double> [#uses=1]
+	%35 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%36 = fdiv double %35, %11		; <double> [#uses=1]
+	%37 = fdiv double 0.000000e+00, %12		; <double> [#uses=1]
+	%38 = fsub double %36, %37		; <double> [#uses=1]
+	%39 = fmul double 0.000000e+00, %38		; <double> [#uses=1]
+	%40 = fadd double 0.000000e+00, %39		; <double> [#uses=1]
+	%41 = fadd double %40, 0.000000e+00		; <double> [#uses=1]
+	%42 = fadd double %41, 0.000000e+00		; <double> [#uses=1]
+	%43 = fmul double %42, 0.000000e+00		; <double> [#uses=1]
+	%44 = fsub double %34, %43		; <double> [#uses=1]
+	%45 = tail call double @llvm.exp.f64(double %8) nounwind		; <double> [#uses=1]
+	%46 = fsub double -0.000000e+00, %45		; <double> [#uses=2]
+	%47 = fdiv double %46, 0.000000e+00		; <double> [#uses=1]
+	%48 = fmul double %30, %5		; <double> [#uses=1]
+	%49 = fdiv double 0.000000e+00, %48		; <double> [#uses=1]
+	%50 = fsub double %47, %49		; <double> [#uses=1]
+	%51 = fmul double %50, -4.000000e+00		; <double> [#uses=1]
+	%52 = fadd double %51, 0.000000e+00		; <double> [#uses=1]
+	%53 = fdiv double %46, %11		; <double> [#uses=1]
+	%54 = fsub double %53, 0.000000e+00		; <double> [#uses=1]
+	%55 = fmul double %31, %54		; <double> [#uses=1]
+	%56 = fadd double %52, %55		; <double> [#uses=1]
+	%57 = fadd double %56, 0.000000e+00		; <double> [#uses=1]
+	%58 = fadd double %44, %57		; <double> [#uses=1]
+	%59 = fsub double %58, 0.000000e+00		; <double> [#uses=1]
+	%60 = tail call double @llvm.exp.f64(double 0.000000e+00) nounwind		; <double> [#uses=1]
+	%61 = fsub double -0.000000e+00, %60		; <double> [#uses=1]
+	%62 = fdiv double 0.000000e+00, -6.000000e+00		; <double> [#uses=1]
+	%63 = fdiv double %61, %5		; <double> [#uses=1]
+	%64 = fsub double 0.000000e+00, %63		; <double> [#uses=1]
+	%65 = fmul double %62, %64		; <double> [#uses=1]
+	%66 = fsub double 0.000000e+00, %65		; <double> [#uses=1]
+	%67 = fsub double -0.000000e+00, 0.000000e+00		; <double> [#uses=2]
+	%68 = tail call double @llvm.exp.f64(double %10) nounwind		; <double> [#uses=1]
+	%69 = fsub double -0.000000e+00, %68		; <double> [#uses=2]
+	%70 = fdiv double %67, %.pn404		; <double> [#uses=1]
+	%71 = fdiv double %69, %.pn402		; <double> [#uses=1]
+	%72 = fsub double %70, %71		; <double> [#uses=1]
+	%73 = fmul double %72, -5.000000e-01		; <double> [#uses=1]
+	%74 = fdiv double %67, %4		; <double> [#uses=1]
+	%75 = fdiv double %69, %5		; <double> [#uses=1]
+	%76 = fsub double %74, %75		; <double> [#uses=1]
+	%77 = fmul double %76, 0.000000e+00		; <double> [#uses=1]
+	%78 = fadd double %73, %77		; <double> [#uses=1]
+	%79 = fmul double 0.000000e+00, %78		; <double> [#uses=1]
+	%80 = fadd double %66, %79		; <double> [#uses=1]
+	%81 = fdiv double 0.000000e+00, %.pn404		; <double> [#uses=1]
+	%82 = fdiv double 0.000000e+00, %.pn402		; <double> [#uses=1]
+	%83 = fsub double %81, %82		; <double> [#uses=1]
+	%84 = fmul double %83, -5.000000e-01		; <double> [#uses=1]
+	%85 = fdiv double 0.000000e+00, %4		; <double> [#uses=1]
+	%86 = fdiv double 0.000000e+00, %5		; <double> [#uses=1]
+	%87 = fsub double %85, %86		; <double> [#uses=1]
+	%88 = fmul double %87, 0.000000e+00		; <double> [#uses=1]
+	%89 = fadd double %84, %88		; <double> [#uses=1]
+	%90 = fmul double 0.000000e+00, %89		; <double> [#uses=1]
+	%91 = fsub double %80, %90		; <double> [#uses=1]
+	%92 = tail call double @llvm.exp.f64(double %8) nounwind		; <double> [#uses=1]
+	%93 = fsub double -0.000000e+00, %92		; <double> [#uses=1]
+	%94 = tail call double @llvm.exp.f64(double %10) nounwind		; <double> [#uses=1]
+	%95 = fsub double -0.000000e+00, %94		; <double> [#uses=3]
+	%96 = fdiv double %95, %.pn402		; <double> [#uses=1]
+	%97 = fsub double 0.000000e+00, %96		; <double> [#uses=1]
+	%98 = fmul double 0.000000e+00, %97		; <double> [#uses=1]
+	%99 = fdiv double %93, %11		; <double> [#uses=1]
+	%100 = fdiv double %95, %12		; <double> [#uses=1]
+	%101 = fsub double %99, %100		; <double> [#uses=1]
+	%102 = fsub double %98, %101		; <double> [#uses=1]
+	%103 = fdiv double %95, %5		; <double> [#uses=1]
+	%104 = fsub double 0.000000e+00, %103		; <double> [#uses=1]
+	%105 = fmul double %18, %104		; <double> [#uses=1]
+	%106 = fadd double %102, %105		; <double> [#uses=1]
+	%107 = fmul double %106, %k.4		; <double> [#uses=1]
+	%108 = fadd double %91, %107		; <double> [#uses=1]
+	%109 = fsub double %108, 0.000000e+00		; <double> [#uses=1]
+	%110 = tail call double @llvm.exp.f64(double %8) nounwind		; <double> [#uses=1]
+	%111 = fsub double -0.000000e+00, %110		; <double> [#uses=2]
+	%112 = tail call double @llvm.exp.f64(double %10) nounwind		; <double> [#uses=1]
+	%113 = fsub double -0.000000e+00, %112		; <double> [#uses=2]
+	%114 = fdiv double %111, %11		; <double> [#uses=1]
+	%115 = fdiv double %113, %12		; <double> [#uses=1]
+	%116 = fsub double %114, %115		; <double> [#uses=1]
+	%117 = fmul double 0.000000e+00, %116		; <double> [#uses=1]
+	%118 = fdiv double %111, %29		; <double> [#uses=1]
+	%119 = fdiv double %113, %30		; <double> [#uses=1]
+	%120 = fsub double %118, %119		; <double> [#uses=1]
+	%121 = fsub double %117, %120		; <double> [#uses=1]
+	%122 = fmul double %18, 0.000000e+00		; <double> [#uses=1]
+	%123 = fadd double %121, %122		; <double> [#uses=1]
+	%124 = fmul double %33, 0.000000e+00		; <double> [#uses=1]
+	%125 = fadd double %123, %124		; <double> [#uses=1]
+	%126 = fadd double %109, %125		; <double> [#uses=1]
+	%127 = tail call double @llvm.exp.f64(double 0.000000e+00) nounwind		; <double> [#uses=1]
+	%128 = fsub double -0.000000e+00, %127		; <double> [#uses=2]
+	%129 = fdiv double %128, %30		; <double> [#uses=1]
+	%130 = fsub double 0.000000e+00, %129		; <double> [#uses=1]
+	%131 = fsub double 0.000000e+00, %130		; <double> [#uses=1]
+	%132 = fdiv double 0.000000e+00, %.pn404		; <double> [#uses=1]
+	%133 = fsub double %132, 0.000000e+00		; <double> [#uses=1]
+	%134 = fmul double %18, %133		; <double> [#uses=1]
+	%135 = fadd double %131, %134		; <double> [#uses=1]
+	%136 = fdiv double %128, %5		; <double> [#uses=1]
+	%137 = fsub double 0.000000e+00, %136		; <double> [#uses=1]
+	%138 = fmul double %33, %137		; <double> [#uses=1]
+	%139 = fadd double %135, %138		; <double> [#uses=1]
+	%140 = fsub double %126, %139		; <double> [#uses=1]
+	%141 = fadd double %N1.0, %59		; <double> [#uses=1]
+	%142 = fadd double %D1.0, %140		; <double> [#uses=1]
+	br label %bb64
+
+bb64:		; preds = %bb62, %bb55
+	%N1.0.pn = phi double [ %141, %bb62 ], [ %N1.0, %bb55 ]		; <double> [#uses=1]
+	%D1.0.pn = phi double [ %142, %bb62 ], [ %D1.0, %bb55 ]		; <double> [#uses=1]
+	%x.1 = fdiv double %N1.0.pn, %D1.0.pn		; <double> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/final/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
new file mode 100644
index 00000000000..0ec17ae23d6
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin9 -mattr=+vfp2
+; rdar://6653182
+
+	%struct.ggBRDF = type { i32 (...)** }
+	%struct.ggPoint2 = type { [2 x double] }
+	%struct.ggPoint3 = type { [3 x double] }
+	%struct.ggSpectrum = type { [8 x float] }
+	%struct.ggSphere = type { %struct.ggPoint3, double }
+	%struct.mrDiffuseAreaSphereLuminaire = type { %struct.mrSphere, %struct.ggSpectrum }
+	%struct.mrDiffuseCosineSphereLuminaire = type { %struct.mrDiffuseAreaSphereLuminaire }
+	%struct.mrSphere = type { %struct.ggBRDF, %struct.ggSphere }
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+
+declare double @llvm.sqrt.f64(double) nounwind readonly
+
+declare double @sin(double) nounwind readonly
+
+declare double @acos(double) nounwind readonly
+
+define i32 @_ZNK34mrDiffuseSolidAngleSphereLuminaire18selectVisiblePointERK8ggPoint3RK9ggVector3RK8ggPoint2dRS0_Rd(%struct.mrDiffuseCosineSphereLuminaire* nocapture %this, %struct.ggPoint3* nocapture %x, %struct.ggPoint3* nocapture %unnamed_arg, %struct.ggPoint2* nocapture %uv, double %unnamed_arg2, %struct.ggPoint3* nocapture %on_light, double* nocapture %invProb) nounwind {
+entry:
+	%0 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind		; <double> [#uses=4]
+	%1 = fcmp ult double 0.000000e+00, %0		; <i1> [#uses=1]
+	br i1 %1, label %bb3, label %bb7
+
+bb3:		; preds = %entry
+	%2 = fdiv double 1.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%3 = fmul double 0.000000e+00, %2		; <double> [#uses=2]
+	%4 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind		; <double> [#uses=1]
+	%5 = fdiv double 1.000000e+00, %4		; <double> [#uses=2]
+	%6 = fmul double %3, %5		; <double> [#uses=2]
+	%7 = fmul double 0.000000e+00, %5		; <double> [#uses=2]
+	%8 = fmul double %3, %7		; <double> [#uses=1]
+	%9 = fsub double %8, 0.000000e+00		; <double> [#uses=1]
+	%10 = fmul double 0.000000e+00, %6		; <double> [#uses=1]
+	%11 = fsub double 0.000000e+00, %10		; <double> [#uses=1]
+	%12 = fsub double -0.000000e+00, %11		; <double> [#uses=1]
+	%13 = fmul double %0, %0		; <double> [#uses=2]
+	%14 = fsub double %13, 0.000000e+00		; <double> [#uses=1]
+	%15 = call double @llvm.sqrt.f64(double %14)		; <double> [#uses=1]
+	%16 = fmul double 0.000000e+00, %15		; <double> [#uses=1]
+	%17 = fdiv double %16, %0		; <double> [#uses=1]
+	%18 = fadd double 0.000000e+00, %17		; <double> [#uses=1]
+	%19 = call double @acos(double %18) nounwind readonly		; <double> [#uses=1]
+	%20 = load double* null, align 4		; <double> [#uses=1]
+	%21 = fmul double %20, 0x401921FB54442D18		; <double> [#uses=1]
+	%22 = call double @sin(double %19) nounwind readonly		; <double> [#uses=2]
+	%23 = fmul double %22, 0.000000e+00		; <double> [#uses=2]
+	%24 = fmul double %6, %23		; <double> [#uses=1]
+	%25 = fmul double %7, %23		; <double> [#uses=1]
+	%26 = call double @sin(double %21) nounwind readonly		; <double> [#uses=1]
+	%27 = fmul double %22, %26		; <double> [#uses=2]
+	%28 = fmul double %9, %27		; <double> [#uses=1]
+	%29 = fmul double %27, %12		; <double> [#uses=1]
+	%30 = fadd double %24, %28		; <double> [#uses=1]
+	%31 = fadd double 0.000000e+00, %29		; <double> [#uses=1]
+	%32 = fadd double %25, 0.000000e+00		; <double> [#uses=1]
+	%33 = fadd double %30, 0.000000e+00		; <double> [#uses=1]
+	%34 = fadd double %31, 0.000000e+00		; <double> [#uses=1]
+	%35 = fadd double %32, 0.000000e+00		; <double> [#uses=1]
+	%36 = bitcast %struct.ggPoint3* %x to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32(i8* null, i8* %36, i32 24, i32 4) nounwind
+	store double %33, double* null, align 8
+	br i1 false, label %_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit, label %bb5.i.i.i
+
+bb5.i.i.i:		; preds = %bb3
+	unreachable
+
+_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit:		; preds = %bb3
+	%37 = fsub double %13, 0.000000e+00		; <double> [#uses=0]
+	%38 = fsub double -0.000000e+00, %34		; <double> [#uses=0]
+	%39 = fsub double -0.000000e+00, %35		; <double> [#uses=0]
+	ret i32 1
+
+bb7:		; preds = %entry
+	ret i32 0
+}
diff --git a/final/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll b/final/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
new file mode 100644
index 00000000000..a1ce384b534
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-03-09-AddrModeBug.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm
+
+	%struct.hit_t = type { %struct.v_t, double }
+	%struct.node_t = type { %struct.hit_t, %struct.hit_t, i32 }
+	%struct.v_t = type { double, double, double }
+
+define fastcc %struct.node_t* @_ZL6createP6node_tii3v_tS1_d(%struct.node_t* %n, i32 %lvl, i32 %dist, i64 %c.0.0, i64 %c.0.1, i64 %c.0.2, i64 %d.0.0, i64 %d.0.1, i64 %d.0.2, double %r) nounwind {
+entry:
+	%0 = getelementptr %struct.node_t* %n, i32 0, i32 1		; <%struct.hit_t*> [#uses=1]
+	%1 = bitcast %struct.hit_t* %0 to i256*		; <i256*> [#uses=1]
+	store i256 0, i256* %1, align 4
+	unreachable
+}
diff --git a/final/test/CodeGen/ARM/2009-04-06-AsmModifier.ll b/final/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
new file mode 100644
index 00000000000..352672274d2
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-04-06-AsmModifier.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm | grep {swi 107}
+
+define i32 @_swilseek(i32) nounwind {
+entry:
+	%ptr = alloca i32		; <i32*> [#uses=2]
+	store i32 %0, i32* %ptr
+	%retval = alloca i32		; <i32*> [#uses=2]
+	store i32 0, i32* %retval
+	%res = alloca i32		; <i32*> [#uses=0]
+	%fh = alloca i32		; <i32*> [#uses=1]
+	%1 = load i32* %fh		; <i32> [#uses=1]
+	%2 = load i32* %ptr		; <i32> [#uses=1]
+	%3 = call i32 asm "mov r0, $2; mov r1, $3; swi ${1:a}; mov $0, r0", "=r,i,r,r,~{r0},~{r1}"(i32 107, i32 %1, i32 %2) nounwind		; <i32> [#uses=1]
+        store i32 %3, i32* %retval
+	br label %return
+
+return:		; preds = %entry
+	%4 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %4
+}
diff --git a/final/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll b/final/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
new file mode 100644
index 00000000000..f6b3d2c0147
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-04-08-AggregateAddr.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm
+; PR3795
+
+define fastcc void @_D3foo3fooFAriZv({ i32, { double, double }* } %d_arg, i32 %x_arg) {
+entry:
+	%d = alloca { i32, { double, double }* }		; <{ i32, { double, double }* }*> [#uses=2]
+	%x = alloca i32		; <i32*> [#uses=2]
+	%b = alloca { double, double }		; <{ double, double }*> [#uses=1]
+	store { i32, { double, double }* } %d_arg, { i32, { double, double }* }* %d
+	store i32 %x_arg, i32* %x
+	%tmp = load i32* %x		; <i32> [#uses=1]
+	%tmp1 = getelementptr { i32, { double, double }* }* %d, i32 0, i32 1		; <{ double, double }**> [#uses=1]
+	%.ptr = load { double, double }** %tmp1		; <{ double, double }*> [#uses=1]
+	%tmp2 = getelementptr { double, double }* %.ptr, i32 %tmp		; <{ double, double }*> [#uses=1]
+	%tmp3 = load { double, double }* %tmp2		; <{ double, double }> [#uses=1]
+	store { double, double } %tmp3, { double, double }* %b
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-04-08-FREM.ll b/final/test/CodeGen/ARM/2009-04-08-FREM.ll
new file mode 100644
index 00000000000..99907fc697b
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-04-08-FREM.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%rem_r = frem double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%1 = call i32 (i8*, ...)* @printf(i8* null, double %rem_r)		; <i32> [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/CodeGen/ARM/2009-04-08-FloatUndef.ll b/final/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
new file mode 100644
index 00000000000..05d2f26be0b
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-04-08-FloatUndef.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm
+
+define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>* %CONST) {
+entry:
+	%input2 = load <4 x float>* null, align 16		; <<4 x float>> [#uses=2]
+	%shuffle7 = shufflevector <4 x float> %input2, <4 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>		; <<4 x float>> [#uses=1]
+	%mul1 = fmul <4 x float> %shuffle7, zeroinitializer		; <<4 x float>> [#uses=1]
+	%add2 = fadd <4 x float> %mul1, %input2		; <<4 x float>> [#uses=1]
+	store <4 x float> %add2, <4 x float>* null, align 16
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll b/final/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
new file mode 100644
index 00000000000..deb092bbf86
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-04-09-RegScavengerAsm.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm
+; PR3954
+
+define void @foo(...) nounwind {
+entry:
+	%rr = alloca i32		; <i32*> [#uses=2]
+	%0 = load i32* %rr		; <i32> [#uses=1]
+	%1 = call i32 asm "nop", "=r,0"(i32 %0) nounwind		; <i32> [#uses=1]
+	store i32 %1, i32* %rr
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll b/final/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
new file mode 100644
index 00000000000..a48e41f55e5
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-05-05-DAGCombineBug.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=arm-unknown-linux-gnueabi -mattr=+v6
+; PR4166
+
+	%"byte[]" = type { i32, i8* }
+	%tango.time.Time.Time = type { i64 }
+
+define fastcc void @t() {
+entry:
+	%tmp28 = call fastcc i1 null(i32* null, %"byte[]" undef, %"byte[]" undef, %tango.time.Time.Time* byval null)		; <i1> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll b/final/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
new file mode 100644
index 00000000000..524b5ebddc0
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=fast
+; PR4100
+@.str = external constant [30 x i8]		; <[30 x i8]*> [#uses=1]
+
+define i16 @fn16(i16 %arg0.0, <2 x i16> %arg1, i16 %arg2.0) nounwind {
+entry:
+	store <2 x i16> %arg1, <2 x i16>* null
+	%0 = call i32 (i8*, ...)* @printf(i8* getelementptr ([30 x i8]* @.str, i32 0, i32 0), i32 0) nounwind		; <i32> [#uses=0]
+	ret i16 0
+}
+
+declare i32 @printf(i8*, ...) nounwind
diff --git a/final/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll b/final/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
new file mode 100644
index 00000000000..7046fccb5ee
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-05-11-CodePlacementCrash.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=arm
+	%struct.List = type { %struct.List*, i32 }
+@Node5 = external constant %struct.List		; <%struct.List*> [#uses=1]
+@"\01LC" = external constant [7 x i8]		; <[7 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb3, %entry
+	%CurL.02 = phi %struct.List* [ @Node5, %entry ], [ %2, %bb3 ]		; <%struct.List*> [#uses=1]
+	%PrevL.01 = phi %struct.List* [ null, %entry ], [ %CurL.02, %bb3 ]		; <%struct.List*> [#uses=1]
+	%0 = icmp eq %struct.List* %PrevL.01, null		; <i1> [#uses=1]
+	br i1 %0, label %bb3, label %bb1
+
+bb1:		; preds = %bb
+	br label %bb3
+
+bb3:		; preds = %bb1, %bb
+	%iftmp.0.0 = phi i32 [ 0, %bb1 ], [ -1, %bb ]		; <i32> [#uses=1]
+	%1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 %iftmp.0.0) nounwind		; <i32> [#uses=0]
+	%2 = load %struct.List** null, align 4		; <%struct.List*> [#uses=2]
+	%phitmp = icmp eq %struct.List* %2, null		; <i1> [#uses=1]
+	br i1 %phitmp, label %bb5, label %bb
+
+bb5:		; preds = %bb3
+	ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/final/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll b/final/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
new file mode 100644
index 00000000000..1e2707f7b5b
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-05-18-InlineAsmMem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=thumb | FileCheck %s
+; PR4091
+
+define void @foo(i32 %i, i32* %p) nounwind {
+;CHECK: swp r2, r0, [r1]
+	%asmtmp = call i32 asm sideeffect "swp $0, $2, $3", "=&r,=*m,r,*m,~{memory}"(i32* %p, i32 %i, i32* %p) nounwind
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-06-02-ISelCrash.ll b/final/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
new file mode 100644
index 00000000000..403e3f6509f
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-06-02-ISelCrash.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -mattr=+v6,+vfp2
+
+@"\01LC" = external constant [15 x i8]		; <[15 x i8]*> [#uses=1]
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define i32 @main() nounwind {
+entry:
+	br label %bb.i1.i
+
+bb.i1.i:		; preds = %Cos.exit.i.i, %entry
+	br label %bb.i.i.i
+
+bb.i.i.i:		; preds = %bb.i.i.i, %bb.i1.i
+	br i1 undef, label %Cos.exit.i.i, label %bb.i.i.i
+
+Cos.exit.i.i:		; preds = %bb.i.i.i
+	br i1 undef, label %bb2.i.i, label %bb.i1.i
+
+bb2.i.i:		; preds = %Cos.exit.i.i
+	br label %bb3.i.i
+
+bb3.i.i:		; preds = %bb5.i.i, %bb2.i.i
+	br label %bb4.i.i
+
+bb4.i.i:		; preds = %bb4.i.i, %bb3.i.i
+	br i1 undef, label %bb5.i.i, label %bb4.i.i
+
+bb5.i.i:		; preds = %bb4.i.i
+	br i1 undef, label %bb.i, label %bb3.i.i
+
+bb.i:		; preds = %bb.i, %bb5.i.i
+	br i1 undef, label %bb1.outer2.i.i.outer, label %bb.i
+
+bb1.outer2.i.i.outer:		; preds = %Fft.exit.i, %bb5.i12.i, %bb.i
+	br label %bb1.outer2.i.i
+
+bb1.outer2.i.i:		; preds = %bb2.i9.i, %bb1.outer2.i.i.outer
+	br label %bb1.i.i
+
+bb1.i.i:		; preds = %bb1.i.i, %bb1.outer2.i.i
+	br i1 undef, label %bb2.i9.i, label %bb1.i.i
+
+bb2.i9.i:		; preds = %bb1.i.i
+	br i1 undef, label %bb4.i11.i, label %bb1.outer2.i.i
+
+bb4.i11.i:		; preds = %bb4.i11.i, %bb2.i9.i
+	br i1 undef, label %bb5.i12.i, label %bb4.i11.i
+
+bb5.i12.i:		; preds = %bb4.i11.i
+	br i1 undef, label %bb7.i.i, label %bb1.outer2.i.i.outer
+
+bb7.i.i:		; preds = %bb7.i.i, %bb5.i12.i
+	br i1 undef, label %Fft.exit.i, label %bb7.i.i
+
+Fft.exit.i:		; preds = %bb7.i.i
+	br i1 undef, label %bb5.i, label %bb1.outer2.i.i.outer
+
+bb5.i:		; preds = %Fft.exit.i
+	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([15 x i8]* @"\01LC", i32 0, i32 0), double undef, double undef) nounwind		; <i32> [#uses=0]
+	unreachable
+}
diff --git a/final/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll b/final/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
new file mode 100644
index 00000000000..98e00230255
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-06-04-MissingLiveIn.ll
@@ -0,0 +1,263 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6
+
+	%struct.anon = type { i16, i16 }
+	%struct.cab_archive = type { i32, i16, i16, i16, i16, i8, %struct.cab_folder*, %struct.cab_file* }
+	%struct.cab_file = type { i32, i16, i64, i8*, i32, i32, i32, %struct.cab_folder*, %struct.cab_file*, %struct.cab_archive*, %struct.cab_state* }
+	%struct.cab_folder = type { i16, i16, %struct.cab_archive*, i64, %struct.cab_folder* }
+	%struct.cab_state = type { i8*, i8*, [38912 x i8], i16, i16, i8*, i16 }
+	%struct.qtm_model = type { i32, i32, %struct.anon* }
+	%struct.qtm_stream = type { i32, i32, i8, i8*, i32, i32, i32, i16, i16, i16, i8, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i8, [42 x i32], [42 x i8], [27 x i8], [27 x i8], %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, %struct.qtm_model, [65 x %struct.anon], [65 x %struct.anon], [65 x %struct.anon], [65 x %struct.anon], [25 x %struct.anon], [37 x %struct.anon], [43 x %struct.anon], [28 x %struct.anon], [8 x %struct.anon], %struct.cab_file*, i32 (%struct.cab_file*, i8*, i32)* }
+
+declare fastcc i32 @qtm_read_input(%struct.qtm_stream* nocapture) nounwind
+
+define fastcc i32 @qtm_decompress(%struct.qtm_stream* %qtm, i64 %out_bytes) nounwind {
+entry:
+	br i1 undef, label %bb245, label %bb3
+
+bb3:		; preds = %entry
+	br i1 undef, label %bb5, label %bb4
+
+bb4:		; preds = %bb3
+	ret i32 undef
+
+bb5:		; preds = %bb3
+	br i1 undef, label %bb245, label %bb14
+
+bb14:		; preds = %bb5
+	br label %bb238
+
+bb28:		; preds = %bb215
+	br label %bb31
+
+bb29:		; preds = %bb31
+	br i1 undef, label %bb31, label %bb32
+
+bb31:		; preds = %bb29, %bb28
+	br i1 undef, label %bb29, label %bb32
+
+bb32:		; preds = %bb31, %bb29
+	br label %bb33
+
+bb33:		; preds = %bb33, %bb32
+	br i1 undef, label %bb34, label %bb33
+
+bb34:		; preds = %bb33
+	br i1 undef, label %bb35, label %bb36
+
+bb35:		; preds = %bb34
+	br label %bb36
+
+bb36:		; preds = %bb46, %bb35, %bb34
+	br i1 undef, label %bb40, label %bb37
+
+bb37:		; preds = %bb36
+	br i1 undef, label %bb77, label %bb60
+
+bb40:		; preds = %bb36
+	br i1 undef, label %bb46, label %bb41
+
+bb41:		; preds = %bb40
+	br i1 undef, label %bb45, label %bb42
+
+bb42:		; preds = %bb41
+	ret i32 undef
+
+bb45:		; preds = %bb41
+	br label %bb46
+
+bb46:		; preds = %bb45, %bb40
+	br label %bb36
+
+bb60:		; preds = %bb60, %bb37
+	br label %bb60
+
+bb77:		; preds = %bb37
+	switch i32 undef, label %bb197 [
+		i32 5, label %bb108
+		i32 6, label %bb138
+	]
+
+bb108:		; preds = %bb77
+	br label %bb111
+
+bb109:		; preds = %bb111
+	br i1 undef, label %bb111, label %bb112
+
+bb111:		; preds = %bb109, %bb108
+	br i1 undef, label %bb109, label %bb112
+
+bb112:		; preds = %bb111, %bb109
+	br label %bb113
+
+bb113:		; preds = %bb113, %bb112
+	br i1 undef, label %bb114, label %bb113
+
+bb114:		; preds = %bb113
+	br i1 undef, label %bb115, label %bb116
+
+bb115:		; preds = %bb114
+	br label %bb116
+
+bb116:		; preds = %bb115, %bb114
+	br i1 undef, label %bb120, label %bb117
+
+bb117:		; preds = %bb116
+	br label %bb136
+
+bb120:		; preds = %bb116
+	ret i32 undef
+
+bb128:		; preds = %bb136
+	br i1 undef, label %bb134, label %bb129
+
+bb129:		; preds = %bb128
+	br i1 undef, label %bb133, label %bb130
+
+bb130:		; preds = %bb129
+	br i1 undef, label %bb132, label %bb131
+
+bb131:		; preds = %bb130
+	ret i32 undef
+
+bb132:		; preds = %bb130
+	br label %bb133
+
+bb133:		; preds = %bb132, %bb129
+	br label %bb134
+
+bb134:		; preds = %bb133, %bb128
+	br label %bb136
+
+bb136:		; preds = %bb134, %bb117
+	br i1 undef, label %bb198, label %bb128
+
+bb138:		; preds = %bb77
+	%0 = trunc i32 undef to i16		; <i16> [#uses=1]
+	br label %bb141
+
+bb139:		; preds = %bb141
+	%scevgep441442881 = load i16* undef		; <i16> [#uses=1]
+	%1 = icmp ugt i16 %scevgep441442881, %0		; <i1> [#uses=1]
+	br i1 %1, label %bb141, label %bb142
+
+bb141:		; preds = %bb139, %bb138
+	br i1 undef, label %bb139, label %bb142
+
+bb142:		; preds = %bb141, %bb139
+	br label %bb143
+
+bb143:		; preds = %bb143, %bb142
+	br i1 undef, label %bb144, label %bb143
+
+bb144:		; preds = %bb143
+	br i1 undef, label %bb145, label %bb146
+
+bb145:		; preds = %bb144
+	unreachable
+
+bb146:		; preds = %bb156, %bb144
+	br i1 undef, label %bb150, label %bb147
+
+bb147:		; preds = %bb146
+	br i1 undef, label %bb157, label %bb148
+
+bb148:		; preds = %bb147
+	br i1 undef, label %bb149, label %bb157
+
+bb149:		; preds = %bb148
+	br label %bb150
+
+bb150:		; preds = %bb149, %bb146
+	br i1 undef, label %bb156, label %bb152
+
+bb152:		; preds = %bb150
+	unreachable
+
+bb156:		; preds = %bb150
+	br label %bb146
+
+bb157:		; preds = %bb148, %bb147
+	br i1 undef, label %bb167, label %bb160
+
+bb160:		; preds = %bb157
+	ret i32 undef
+
+bb167:		; preds = %bb157
+	br label %bb170
+
+bb168:		; preds = %bb170
+	br i1 undef, label %bb170, label %bb171
+
+bb170:		; preds = %bb168, %bb167
+	br i1 undef, label %bb168, label %bb171
+
+bb171:		; preds = %bb170, %bb168
+	br label %bb172
+
+bb172:		; preds = %bb172, %bb171
+	br i1 undef, label %bb173, label %bb172
+
+bb173:		; preds = %bb172
+	br i1 undef, label %bb174, label %bb175
+
+bb174:		; preds = %bb173
+	unreachable
+
+bb175:		; preds = %bb179, %bb173
+	br i1 undef, label %bb179, label %bb176
+
+bb176:		; preds = %bb175
+	br i1 undef, label %bb186, label %bb177
+
+bb177:		; preds = %bb176
+	br i1 undef, label %bb178, label %bb186
+
+bb178:		; preds = %bb177
+	br label %bb179
+
+bb179:		; preds = %bb178, %bb175
+	br label %bb175
+
+bb186:		; preds = %bb177, %bb176
+	br label %bb195
+
+bb187:		; preds = %bb195
+	br i1 undef, label %bb193, label %bb189
+
+bb189:		; preds = %bb187
+	%2 = tail call fastcc i32 @qtm_read_input(%struct.qtm_stream* %qtm) nounwind		; <i32> [#uses=0]
+	ret i32 undef
+
+bb193:		; preds = %bb187
+	br label %bb195
+
+bb195:		; preds = %bb193, %bb186
+	br i1 undef, label %bb198, label %bb187
+
+bb197:		; preds = %bb77
+	ret i32 -124
+
+bb198:		; preds = %bb195, %bb136
+	br i1 undef, label %bb211.preheader, label %bb214
+
+bb211.preheader:		; preds = %bb198
+	br label %bb211
+
+bb211:		; preds = %bb211, %bb211.preheader
+	br i1 undef, label %bb214, label %bb211
+
+bb214:		; preds = %bb211, %bb198
+	br label %bb215
+
+bb215:		; preds = %bb238, %bb214
+	br i1 undef, label %bb28, label %bb216
+
+bb216:		; preds = %bb215
+	br label %bb238
+
+bb238:		; preds = %bb216, %bb14
+	br label %bb215
+
+bb245:		; preds = %bb5, %entry
+	ret i32 undef
+}
diff --git a/final/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll b/final/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
new file mode 100644
index 00000000000..27888d75f67
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin
+
+	type { i32, i32, %struct.D_Sym**, [3 x %struct.D_Sym*] }		; type %0
+	type { i32, %struct.D_Reduction** }		; type %1
+	type { i32, %struct.D_RightEpsilonHint* }		; type %2
+	type { i32, %struct.D_ErrorRecoveryHint* }		; type %3
+	type { i32, i32, %struct.D_Reduction**, [3 x %struct.D_Reduction*] }		; type %4
+	%struct.D_ErrorRecoveryHint = type { i16, i16, i8* }
+	%struct.D_ParseNode = type { i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, i8*, i8* }
+	%struct.D_Parser = type { i8*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, %struct.D_Scope*, void (%struct.D_Parser*)*, %struct.D_ParseNode* (%struct.D_Parser*, i32, %struct.D_ParseNode**)*, void (%struct.D_ParseNode*)*, %struct.d_loc_t, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.D_ParserTables = type { i32, %struct.D_State*, i16*, i32, i32, %struct.D_Symbol*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, i32, %struct.D_Pass*, i32 }
+	%struct.D_Pass = type { i8*, i32, i32, i32 }
+	%struct.D_Reduction = type { i16, i16, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)*, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)*, i16, i16, i32, i32, i32, i32, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)** }
+	%struct.D_RightEpsilonHint = type { i16, i16, %struct.D_Reduction* }
+	%struct.D_Scope = type { i8, %struct.D_Sym*, %struct.D_SymHash*, %struct.D_Sym*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope* }
+	%struct.D_Shift = type { i16, i8, i8, i32, i32, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)* }
+	%struct.D_State = type { i8*, i32, %1, %2, %3, %struct.D_Shift**, i32 (i8**, i32*, i32*, i16*, i32*, i8*, i32*)*, i8*, i8, i8, i8, i8*, %struct.D_Shift***, i32 }
+	%struct.D_Sym = type { i8*, i32, i32, %struct.D_Sym*, %struct.D_Sym*, i32 }
+	%struct.D_SymHash = type { i32, i32, %0 }
+	%struct.D_Symbol = type { i32, i8*, i32 }
+	%struct.PNode = type { i32, i32, i32, i32, %struct.D_Reduction*, %struct.D_Shift*, i32, %struct.VecPNode, i32, i8, i8, %struct.PNode*, %struct.PNode*, %struct.PNode*, %struct.PNode*, i8*, i8*, %struct.D_Scope*, i8*, %struct.D_ParseNode }
+	%struct.PNodeHash = type { %struct.PNode**, i32, i32, i32, %struct.PNode* }
+	%struct.Parser = type { %struct.D_Parser, i8*, i8*, %struct.D_ParserTables*, i32, i32, i32, i32, i32, i32, i32, %struct.PNodeHash, %struct.SNodeHash, %struct.Reduction*, %struct.Shift*, %struct.D_Scope*, %struct.SNode*, i32, %struct.Reduction*, %struct.Shift*, i32, %struct.PNode*, %struct.SNode*, %struct.ZNode*, %4, %struct.ShiftResult*, %struct.D_Shift, %struct.Parser*, i8* }
+	%struct.Reduction = type { %struct.ZNode*, %struct.SNode*, %struct.D_Reduction*, %struct.SNode*, i32, %struct.Reduction* }
+	%struct.SNode = type { %struct.D_State*, %struct.D_Scope*, i8*, %struct.d_loc_t, i32, %struct.PNode*, %struct.VecZNode, i32, %struct.SNode*, %struct.SNode* }
+	%struct.SNodeHash = type { %struct.SNode**, i32, i32, i32, %struct.SNode*, %struct.SNode* }
+	%struct.Shift = type { %struct.SNode*, %struct.Shift* }
+	%struct.ShiftResult = type { %struct.D_Shift*, %struct.d_loc_t }
+	%struct.VecPNode = type { i32, i32, %struct.PNode**, [3 x %struct.PNode*] }
+	%struct.VecSNode = type { i32, i32, %struct.SNode**, [3 x %struct.SNode*] }
+	%struct.VecZNode = type { i32, i32, %struct.ZNode**, [3 x %struct.ZNode*] }
+	%struct.ZNode = type { %struct.PNode*, %struct.VecSNode }
+	%struct.d_loc_t = type { i8*, i8*, i32, i32, i32 }
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+
+define fastcc i32 @exhaustive_parse(%struct.Parser* %p, i32 %state) nounwind {
+entry:
+	store i8* undef, i8** undef, align 4
+	%0 = getelementptr %struct.Parser* %p, i32 0, i32 0, i32 6		; <%struct.d_loc_t*> [#uses=1]
+	%1 = bitcast %struct.d_loc_t* %0 to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32(i8* undef, i8* %1, i32 20, i32 4)
+	br label %bb10
+
+bb10:		; preds = %bb30, %bb29, %bb26, %entry
+	br i1 undef, label %bb18, label %bb20
+
+bb18:		; preds = %bb10
+	br i1 undef, label %bb20, label %bb19
+
+bb19:		; preds = %bb18
+	br label %bb20
+
+bb20:		; preds = %bb19, %bb18, %bb10
+	br i1 undef, label %bb21, label %bb22
+
+bb21:		; preds = %bb20
+	unreachable
+
+bb22:		; preds = %bb20
+	br i1 undef, label %bb24, label %bb26
+
+bb24:		; preds = %bb22
+	unreachable
+
+bb26:		; preds = %bb22
+	br i1 undef, label %bb10, label %bb29
+
+bb29:		; preds = %bb26
+	br i1 undef, label %bb10, label %bb30
+
+bb30:		; preds = %bb29
+	br i1 undef, label %bb31, label %bb10
+
+bb31:		; preds = %bb30
+	unreachable
+}
diff --git a/final/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll b/final/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
new file mode 100644
index 00000000000..a0f903b0bdf
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-06-15-RegScavengerAssert.ll
@@ -0,0 +1,344 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin
+
+  %struct.term = type { i32, i32, i32 }
+
+declare fastcc i8* @memory_Malloc(i32) nounwind
+
+define fastcc %struct.term* @t1() nounwind {
+entry:
+	br i1 undef, label %bb, label %bb1
+
+bb:		; preds = %entry
+	ret %struct.term* undef
+
+bb1:		; preds = %entry
+	%0 = tail call fastcc i8* @memory_Malloc(i32 12) nounwind		; <i8*> [#uses=0]
+	%1 = tail call fastcc i8* @memory_Malloc(i32 12) nounwind		; <i8*> [#uses=0]
+	ret %struct.term* undef
+}
+
+
+define i32 @t2(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+	br label %bb6.i8
+
+bb6.i8:		; preds = %memory_CalculateRealBlockSize1374.exit.i, %entry
+	br i1 undef, label %memory_CalculateRealBlockSize1374.exit.i, label %bb.i.i9
+
+bb.i.i9:		; preds = %bb6.i8
+	br label %memory_CalculateRealBlockSize1374.exit.i
+
+memory_CalculateRealBlockSize1374.exit.i:		; preds = %bb.i.i9, %bb6.i8
+	%0 = phi i32 [ undef, %bb.i.i9 ], [ undef, %bb6.i8 ]		; <i32> [#uses=2]
+	store i32 %0, i32* undef, align 4
+	%1 = urem i32 8184, %0		; <i32> [#uses=1]
+	%2 = sub i32 8188, %1		; <i32> [#uses=1]
+	store i32 %2, i32* undef, align 4
+	br i1 undef, label %memory_Init.exit, label %bb6.i8
+
+memory_Init.exit:		; preds = %memory_CalculateRealBlockSize1374.exit.i
+	br label %bb.i.i
+
+bb.i.i:		; preds = %bb.i.i, %memory_Init.exit
+	br i1 undef, label %symbol_Init.exit, label %bb.i.i
+
+symbol_Init.exit:		; preds = %bb.i.i
+	br label %bb.i.i67
+
+bb.i.i67:		; preds = %bb.i.i67, %symbol_Init.exit
+	br i1 undef, label %symbol_CreatePrecedence3522.exit, label %bb.i.i67
+
+symbol_CreatePrecedence3522.exit:		; preds = %bb.i.i67
+	br label %bb.i.i8.i
+
+bb.i.i8.i:		; preds = %bb.i.i8.i, %symbol_CreatePrecedence3522.exit
+	br i1 undef, label %cont_Create.exit9.i, label %bb.i.i8.i
+
+cont_Create.exit9.i:		; preds = %bb.i.i8.i
+	br label %bb.i.i.i72
+
+bb.i.i.i72:		; preds = %bb.i.i.i72, %cont_Create.exit9.i
+	br i1 undef, label %cont_Init.exit, label %bb.i.i.i72
+
+cont_Init.exit:		; preds = %bb.i.i.i72
+	br label %bb.i103
+
+bb.i103:		; preds = %bb.i103, %cont_Init.exit
+	br i1 undef, label %subs_Init.exit, label %bb.i103
+
+subs_Init.exit:		; preds = %bb.i103
+	br i1 undef, label %bb1.i.i.i80, label %cc_Init.exit
+
+bb1.i.i.i80:		; preds = %subs_Init.exit
+	unreachable
+
+cc_Init.exit:		; preds = %subs_Init.exit
+	br label %bb.i.i375
+
+bb.i.i375:		; preds = %bb.i.i375, %cc_Init.exit
+	br i1 undef, label %bb.i439, label %bb.i.i375
+
+bb.i439:		; preds = %bb.i439, %bb.i.i375
+	br i1 undef, label %opts_DeclareSPASSFlagsAsOptions.exit, label %bb.i439
+
+opts_DeclareSPASSFlagsAsOptions.exit:		; preds = %bb.i439
+	br i1 undef, label %opts_TranslateShortOptDeclarations.exit.i, label %bb.i.i82
+
+bb.i.i82:		; preds = %opts_DeclareSPASSFlagsAsOptions.exit
+	unreachable
+
+opts_TranslateShortOptDeclarations.exit.i:		; preds = %opts_DeclareSPASSFlagsAsOptions.exit
+	br i1 undef, label %list_Length.exit.i.thread.i, label %bb.i.i4.i
+
+list_Length.exit.i.thread.i:		; preds = %opts_TranslateShortOptDeclarations.exit.i
+	br i1 undef, label %bb18.i.i.i, label %bb26.i.i.i
+
+bb.i.i4.i:		; preds = %opts_TranslateShortOptDeclarations.exit.i
+	unreachable
+
+bb18.i.i.i:		; preds = %list_Length.exit.i.thread.i
+	unreachable
+
+bb26.i.i.i:		; preds = %list_Length.exit.i.thread.i
+	br i1 undef, label %bb27.i142, label %opts_GetOptLongOnly.exit.thread97.i
+
+opts_GetOptLongOnly.exit.thread97.i:		; preds = %bb26.i.i.i
+	br label %bb27.i142
+
+bb27.i142:		; preds = %opts_GetOptLongOnly.exit.thread97.i, %bb26.i.i.i
+	br label %bb1.i3.i
+
+bb1.i3.i:		; preds = %bb1.i3.i, %bb27.i142
+	br i1 undef, label %opts_FreeLongOptsArray.exit.i, label %bb1.i3.i
+
+opts_FreeLongOptsArray.exit.i:		; preds = %bb1.i3.i
+	br label %bb.i443
+
+bb.i443:		; preds = %bb.i443, %opts_FreeLongOptsArray.exit.i
+	br i1 undef, label %flag_InitStoreByDefaults3542.exit, label %bb.i443
+
+flag_InitStoreByDefaults3542.exit:		; preds = %bb.i443
+	br i1 undef, label %bb6.i449, label %bb.i503
+
+bb6.i449:		; preds = %flag_InitStoreByDefaults3542.exit
+	unreachable
+
+bb.i503:		; preds = %bb.i503, %flag_InitStoreByDefaults3542.exit
+	br i1 undef, label %flag_CleanStore3464.exit, label %bb.i503
+
+flag_CleanStore3464.exit:		; preds = %bb.i503
+	br i1 undef, label %bb1.i81.i.preheader, label %bb.i173
+
+bb.i173:		; preds = %flag_CleanStore3464.exit
+	unreachable
+
+bb1.i81.i.preheader:		; preds = %flag_CleanStore3464.exit
+	br i1 undef, label %bb1.i64.i.preheader, label %bb5.i179
+
+bb5.i179:		; preds = %bb1.i81.i.preheader
+	unreachable
+
+bb1.i64.i.preheader:		; preds = %bb1.i81.i.preheader
+	br i1 undef, label %dfg_DeleteProofList.exit.i, label %bb.i9.i
+
+bb.i9.i:		; preds = %bb1.i64.i.preheader
+	unreachable
+
+dfg_DeleteProofList.exit.i:		; preds = %bb1.i64.i.preheader
+	br i1 undef, label %term_DeleteTermList621.exit.i, label %bb.i.i62.i
+
+bb.i.i62.i:		; preds = %bb.i.i62.i, %dfg_DeleteProofList.exit.i
+	br i1 undef, label %term_DeleteTermList621.exit.i, label %bb.i.i62.i
+
+term_DeleteTermList621.exit.i:		; preds = %bb.i.i62.i, %dfg_DeleteProofList.exit.i
+	br i1 undef, label %dfg_DFGParser.exit, label %bb.i.i211
+
+bb.i.i211:		; preds = %term_DeleteTermList621.exit.i
+	unreachable
+
+dfg_DFGParser.exit:		; preds = %term_DeleteTermList621.exit.i
+	br label %bb.i513
+
+bb.i513:		; preds = %bb2.i516, %dfg_DFGParser.exit
+	br i1 undef, label %bb2.i516, label %bb1.i514
+
+bb1.i514:		; preds = %bb.i513
+	unreachable
+
+bb2.i516:		; preds = %bb.i513
+	br i1 undef, label %bb.i509, label %bb.i513
+
+bb.i509:		; preds = %bb.i509, %bb2.i516
+	br i1 undef, label %symbol_TransferPrecedence3468.exit511, label %bb.i509
+
+symbol_TransferPrecedence3468.exit511:		; preds = %bb.i509
+	br i1 undef, label %bb20, label %bb21
+
+bb20:		; preds = %symbol_TransferPrecedence3468.exit511
+	unreachable
+
+bb21:		; preds = %symbol_TransferPrecedence3468.exit511
+	br i1 undef, label %cnf_Init.exit, label %bb.i498
+
+bb.i498:		; preds = %bb21
+	unreachable
+
+cnf_Init.exit:		; preds = %bb21
+	br i1 undef, label %bb23, label %bb22
+
+bb22:		; preds = %cnf_Init.exit
+	br i1 undef, label %bb2.i.i496, label %bb.i.i494
+
+bb.i.i494:		; preds = %bb22
+	unreachable
+
+bb2.i.i496:		; preds = %bb22
+	unreachable
+
+bb23:		; preds = %cnf_Init.exit
+	br i1 undef, label %bb28, label %bb24
+
+bb24:		; preds = %bb23
+	unreachable
+
+bb28:		; preds = %bb23
+	br i1 undef, label %bb31, label %bb29
+
+bb29:		; preds = %bb28
+	unreachable
+
+bb31:		; preds = %bb28
+	br i1 undef, label %bb34, label %bb32
+
+bb32:		; preds = %bb31
+	unreachable
+
+bb34:		; preds = %bb31
+	br i1 undef, label %bb83, label %bb66
+
+bb66:		; preds = %bb34
+	unreachable
+
+bb83:		; preds = %bb34
+	br i1 undef, label %bb2.i1668, label %bb.i1667
+
+bb.i1667:		; preds = %bb83
+	unreachable
+
+bb2.i1668:		; preds = %bb83
+	br i1 undef, label %bb5.i205, label %bb3.i204
+
+bb3.i204:		; preds = %bb2.i1668
+	unreachable
+
+bb5.i205:		; preds = %bb2.i1668
+	br i1 undef, label %bb.i206.i, label %ana_AnalyzeSortStructure.exit.i
+
+bb.i206.i:		; preds = %bb5.i205
+	br i1 undef, label %bb1.i207.i, label %ana_AnalyzeSortStructure.exit.i
+
+bb1.i207.i:		; preds = %bb.i206.i
+	br i1 undef, label %bb25.i1801.thread, label %bb.i1688
+
+bb.i1688:		; preds = %bb1.i207.i
+	unreachable
+
+bb25.i1801.thread:		; preds = %bb1.i207.i
+	unreachable
+
+ana_AnalyzeSortStructure.exit.i:		; preds = %bb.i206.i, %bb5.i205
+	br i1 undef, label %bb7.i207, label %bb.i1806
+
+bb.i1806:		; preds = %ana_AnalyzeSortStructure.exit.i
+	br i1 undef, label %bb2.i.i.i1811, label %bb.i.i.i1809
+
+bb.i.i.i1809:		; preds = %bb.i1806
+	unreachable
+
+bb2.i.i.i1811:		; preds = %bb.i1806
+	unreachable
+
+bb7.i207:		; preds = %ana_AnalyzeSortStructure.exit.i
+	br i1 undef, label %bb9.i, label %bb8.i
+
+bb8.i:		; preds = %bb7.i207
+	unreachable
+
+bb9.i:		; preds = %bb7.i207
+	br i1 undef, label %bb23.i, label %bb26.i
+
+bb23.i:		; preds = %bb9.i
+	br i1 undef, label %bb25.i, label %bb24.i
+
+bb24.i:		; preds = %bb23.i
+	br i1 undef, label %sort_SortTheoryIsTrivial.exit.i, label %bb.i2093
+
+bb.i2093:		; preds = %bb.i2093, %bb24.i
+	br label %bb.i2093
+
+sort_SortTheoryIsTrivial.exit.i:		; preds = %bb24.i
+	br i1 undef, label %bb3.i2141, label %bb4.i2143
+
+bb3.i2141:		; preds = %sort_SortTheoryIsTrivial.exit.i
+	unreachable
+
+bb4.i2143:		; preds = %sort_SortTheoryIsTrivial.exit.i
+	br i1 undef, label %bb8.i2178, label %bb5.i2144
+
+bb5.i2144:		; preds = %bb4.i2143
+	br i1 undef, label %bb7.i2177, label %bb1.i28.i
+
+bb1.i28.i:		; preds = %bb5.i2144
+	br i1 undef, label %bb4.i43.i, label %bb2.i.i2153
+
+bb2.i.i2153:		; preds = %bb1.i28.i
+	br i1 undef, label %bb4.i.i33.i, label %bb.i.i30.i
+
+bb.i.i30.i:		; preds = %bb2.i.i2153
+	unreachable
+
+bb4.i.i33.i:		; preds = %bb2.i.i2153
+	br i1 undef, label %bb9.i.i36.i, label %bb5.i.i34.i
+
+bb5.i.i34.i:		; preds = %bb4.i.i33.i
+	unreachable
+
+bb9.i.i36.i:		; preds = %bb4.i.i33.i
+	br i1 undef, label %bb14.i.i.i2163, label %bb10.i.i37.i
+
+bb10.i.i37.i:		; preds = %bb9.i.i36.i
+	unreachable
+
+bb14.i.i.i2163:		; preds = %bb9.i.i36.i
+	br i1 undef, label %sort_LinkPrint.exit.i.i, label %bb15.i.i.i2164
+
+bb15.i.i.i2164:		; preds = %bb14.i.i.i2163
+	unreachable
+
+sort_LinkPrint.exit.i.i:		; preds = %bb14.i.i.i2163
+	unreachable
+
+bb4.i43.i:		; preds = %bb1.i28.i
+	unreachable
+
+bb7.i2177:		; preds = %bb5.i2144
+	unreachable
+
+bb8.i2178:		; preds = %bb4.i2143
+	br i1 undef, label %sort_ApproxStaticSortTheory.exit, label %bb.i5.i2185.preheader
+
+bb.i5.i2185.preheader:		; preds = %bb8.i2178
+	br label %bb.i5.i2185
+
+bb.i5.i2185:		; preds = %bb.i5.i2185, %bb.i5.i2185.preheader
+	br i1 undef, label %sort_ApproxStaticSortTheory.exit, label %bb.i5.i2185
+
+sort_ApproxStaticSortTheory.exit:		; preds = %bb.i5.i2185, %bb8.i2178
+	br label %bb25.i
+
+bb25.i:		; preds = %sort_ApproxStaticSortTheory.exit, %bb23.i
+	unreachable
+
+bb26.i:		; preds = %bb9.i
+	unreachable
+}
diff --git a/final/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll b/final/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
new file mode 100644
index 00000000000..b56b6844736
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-06-19-RegScavengerAssert.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=armv6-eabi -mattr=+vfp2 -float-abi=hard
+; PR4419
+
+define float @__ieee754_acosf(float %x) nounwind {
+entry:
+	br i1 undef, label %bb, label %bb4
+
+bb:		; preds = %entry
+	ret float undef
+
+bb4:		; preds = %entry
+	br i1 undef, label %bb5, label %bb6
+
+bb5:		; preds = %bb4
+	ret float undef
+
+bb6:		; preds = %bb4
+	br i1 undef, label %bb11, label %bb12
+
+bb11:		; preds = %bb6
+	%0 = tail call float @__ieee754_sqrtf(float undef) nounwind		; <float> [#uses=1]
+	%1 = fmul float %0, -2.000000e+00		; <float> [#uses=1]
+	%2 = fadd float %1, 0x400921FB40000000		; <float> [#uses=1]
+	ret float %2
+
+bb12:		; preds = %bb6
+	ret float undef
+}
+
+declare float @__ieee754_sqrtf(float)
diff --git a/final/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll b/final/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
new file mode 100644
index 00000000000..7e9b066984f
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin
+
+	%struct.rtunion = type { i64 }
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] }
+
+define void @simplify_unary_real(i8* nocapture %p) nounwind {
+entry:
+	%tmp121 = load i64* null, align 4		; <i64> [#uses=1]
+	%0 = getelementptr %struct.rtx_def* null, i32 0, i32 3, i32 3, i32 0		; <i64*> [#uses=1]
+	%tmp122 = load i64* %0, align 4		; <i64> [#uses=1]
+	%1 = zext i64 undef to i192		; <i192> [#uses=2]
+	%2 = zext i64 %tmp121 to i192		; <i192> [#uses=1]
+	%3 = shl i192 %2, 64		; <i192> [#uses=2]
+	%4 = zext i64 %tmp122 to i192		; <i192> [#uses=1]
+	%5 = shl i192 %4, 128		; <i192> [#uses=1]
+	%6 = or i192 %3, %1		; <i192> [#uses=1]
+	%7 = or i192 %6, %5		; <i192> [#uses=2]
+	switch i32 undef, label %bb82 [
+		i32 77, label %bb38
+		i32 129, label %bb21
+		i32 130, label %bb20
+	]
+
+bb20:		; preds = %entry
+	ret void
+
+bb21:		; preds = %entry
+	br i1 undef, label %bb82, label %bb29
+
+bb29:		; preds = %bb21
+	%tmp18.i = and i192 %3, 1208907372870555465154560		; <i192> [#uses=1]
+	%mask.i = or i192 %tmp18.i, %1		; <i192> [#uses=1]
+	%mask41.i = or i192 %mask.i, 0		; <i192> [#uses=1]
+	br label %bb82
+
+bb38:		; preds = %entry
+	br label %bb82
+
+bb82:		; preds = %bb38, %bb29, %bb21, %entry
+	%d.0 = phi i192 [ %mask41.i, %bb29 ], [ undef, %bb38 ], [ %7, %entry ], [ %7, %bb21 ]		; <i192> [#uses=1]
+	%tmp51 = trunc i192 %d.0 to i64		; <i64> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll b/final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
new file mode 100644
index 00000000000..812f0188f19
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll
@@ -0,0 +1,122 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@nn = external global i32		; <i32*> [#uses=1]
+@al_len = external global i32		; <i32*> [#uses=2]
+@no_mat = external global i32		; <i32*> [#uses=2]
+@no_mis = external global i32		; <i32*> [#uses=2]
+@"\01LC12" = external constant [29 x i8], align 1		; <[29 x i8]*> [#uses=1]
+@"\01LC16" = external constant [33 x i8], align 1		; <[33 x i8]*> [#uses=1]
+@"\01LC17" = external constant [47 x i8], align 1		; <[47 x i8]*> [#uses=1]
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @diff(i8*, i8*, i32, i32, i32, i32) nounwind
+
+define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br label %bb
+
+bb5:		; preds = %entry
+	br i1 undef, label %bb6, label %bb8
+
+bb6:		; preds = %bb6, %bb5
+	br i1 undef, label %bb8, label %bb6
+
+bb8:		; preds = %bb6, %bb5
+	br label %bb15
+
+bb9:		; preds = %bb15
+	br i1 undef, label %bb10, label %bb11
+
+bb10:		; preds = %bb9
+	unreachable
+
+bb11:		; preds = %bb9
+	%0 = load i32* undef, align 4		; <i32> [#uses=2]
+	%1 = add i32 %0, 1		; <i32> [#uses=2]
+	store i32 %1, i32* undef, align 4
+	%2 = load i32* undef, align 4		; <i32> [#uses=1]
+	store i32 %2, i32* @nn, align 4
+	store i32 0, i32* @al_len, align 4
+	store i32 0, i32* @no_mat, align 4
+	store i32 0, i32* @no_mis, align 4
+	%3 = getelementptr i8* %B, i32 %0		; <i8*> [#uses=1]
+	tail call  void @diff(i8* undef, i8* %3, i32 undef, i32 undef, i32 undef, i32 undef) nounwind
+	%4 = sitofp i32 undef to double		; <double> [#uses=1]
+	%5 = fdiv double %4, 1.000000e+01		; <double> [#uses=1]
+	%6 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8]* @"\01LC12", i32 0, i32 0), double %5) nounwind		; <i32> [#uses=0]
+	%7 = load i32* @al_len, align 4		; <i32> [#uses=1]
+	%8 = load i32* @no_mat, align 4		; <i32> [#uses=1]
+	%9 = load i32* @no_mis, align 4		; <i32> [#uses=1]
+	%10 = sub i32 %7, %8		; <i32> [#uses=1]
+	%11 = sub i32 %10, %9		; <i32> [#uses=1]
+	%12 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC16", i32 0, i32 0), i32 %11) nounwind		; <i32> [#uses=0]
+	%13 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 undef) nounwind		; <i32> [#uses=0]
+	br i1 undef, label %bb15, label %bb12
+
+bb12:		; preds = %bb11
+	br label %bb228.i
+
+bb74.i:		; preds = %bb228.i
+	br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i:		; preds = %bb74.i
+	br label %bb145.i
+
+bb145.i:		; preds = %bb228.i, %bb138.i, %bb74.i
+	br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i:		; preds = %bb145.i
+	br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i:		; preds = %bb146.i, %bb145.i
+	br i1 undef, label %bb153.i, label %bb228.i
+
+bb153.i:		; preds = %bb151.i
+	br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98:		; preds = %bb153.i
+	br label %bb158.i
+
+bb158.i:		; preds = %bb218.i, %bb.nph.i98
+	br i1 undef, label %bb168.i, label %bb160.i
+
+bb160.i:		; preds = %bb158.i
+	br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i:		; preds = %bb160.i
+	br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i:		; preds = %bb161.i
+	br i1 undef, label %bb167.i, label %bb168.i
+
+bb167.i:		; preds = %bb163.i
+	br label %bb168.i
+
+bb168.i:		; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+	br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i:		; preds = %bb168.i
+	br label %bb218.i
+
+bb218.i:		; preds = %bb211.i, %bb168.i
+	br i1 undef, label %bb220.i, label %bb158.i
+
+bb220.i:		; preds = %bb218.i, %bb153.i
+	br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i:		; preds = %bb220.i
+	br label %bb228.i
+
+bb228.i:		; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+	br i1 undef, label %bb74.i, label %bb145.i
+
+bb15:		; preds = %bb11, %bb8
+	br i1 undef, label %return, label %bb9
+
+return:		; preds = %bb15
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll b/final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
new file mode 100644
index 00000000000..f5fb97c0ef5
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll
@@ -0,0 +1,116 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@no_mat = external global i32		; <i32*> [#uses=1]
+@no_mis = external global i32		; <i32*> [#uses=2]
+@"\01LC11" = external constant [33 x i8], align 1		; <[33 x i8]*> [#uses=1]
+@"\01LC15" = external constant [33 x i8], align 1		; <[33 x i8]*> [#uses=1]
+@"\01LC17" = external constant [47 x i8], align 1		; <[47 x i8]*> [#uses=1]
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @diff(i8*, i8*, i32, i32, i32, i32) nounwind
+
+define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br label %bb
+
+bb5:		; preds = %entry
+	br i1 undef, label %bb6, label %bb8
+
+bb6:		; preds = %bb6, %bb5
+	br i1 undef, label %bb8, label %bb6
+
+bb8:		; preds = %bb6, %bb5
+	br label %bb15
+
+bb9:		; preds = %bb15
+	br i1 undef, label %bb10, label %bb11
+
+bb10:		; preds = %bb9
+	unreachable
+
+bb11:		; preds = %bb9
+	%0 = load i32* undef, align 4		; <i32> [#uses=3]
+	%1 = add i32 %0, 1		; <i32> [#uses=2]
+	store i32 %1, i32* undef, align 4
+	%2 = load i32* undef, align 4		; <i32> [#uses=2]
+	%3 = sub i32 %2, %0		; <i32> [#uses=1]
+	store i32 0, i32* @no_mat, align 4
+	store i32 0, i32* @no_mis, align 4
+	%4 = getelementptr i8* %B, i32 %0		; <i8*> [#uses=1]
+	tail call  void @diff(i8* undef, i8* %4, i32 undef, i32 %3, i32 undef, i32 undef) nounwind
+	%5 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC11", i32 0, i32 0), i32 %tmp13) nounwind		; <i32> [#uses=0]
+	%6 = load i32* @no_mis, align 4		; <i32> [#uses=1]
+	%7 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC15", i32 0, i32 0), i32 %6) nounwind		; <i32> [#uses=0]
+	%8 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 %2) nounwind		; <i32> [#uses=0]
+	br i1 undef, label %bb15, label %bb12
+
+bb12:		; preds = %bb11
+	br label %bb228.i
+
+bb74.i:		; preds = %bb228.i
+	br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i:		; preds = %bb74.i
+	br label %bb145.i
+
+bb145.i:		; preds = %bb228.i, %bb138.i, %bb74.i
+	br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i:		; preds = %bb145.i
+	br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i:		; preds = %bb146.i, %bb145.i
+	br i1 undef, label %bb153.i, label %bb228.i
+
+bb153.i:		; preds = %bb151.i
+	br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98:		; preds = %bb153.i
+	br label %bb158.i
+
+bb158.i:		; preds = %bb218.i, %bb.nph.i98
+	br i1 undef, label %bb168.i, label %bb160.i
+
+bb160.i:		; preds = %bb158.i
+	br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i:		; preds = %bb160.i
+	br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i:		; preds = %bb161.i
+	br i1 undef, label %bb167.i, label %bb168.i
+
+bb167.i:		; preds = %bb163.i
+	br label %bb168.i
+
+bb168.i:		; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+	br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i:		; preds = %bb168.i
+	br label %bb218.i
+
+bb218.i:		; preds = %bb211.i, %bb168.i
+	br i1 undef, label %bb220.i, label %bb158.i
+
+bb220.i:		; preds = %bb218.i, %bb153.i
+	br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i:		; preds = %bb220.i
+	br label %bb228.i
+
+bb228.i:		; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+	br i1 undef, label %bb74.i, label %bb145.i
+
+bb15:		; preds = %bb11, %bb8
+	%indvar11 = phi i32 [ 0, %bb8 ], [ %tmp13, %bb11 ]		; <i32> [#uses=2]
+	%tmp13 = add i32 %indvar11, 1		; <i32> [#uses=2]
+	%count.0 = sub i32 undef, %indvar11		; <i32> [#uses=0]
+	br i1 undef, label %return, label %bb9
+
+return:		; preds = %bb15
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll b/final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
new file mode 100644
index 00000000000..d7e4c90abb1
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll
@@ -0,0 +1,128 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@JJ = external global i32*		; <i32**> [#uses=1]
+
+define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br label %bb
+
+bb5:		; preds = %entry
+	br i1 undef, label %bb6, label %bb8
+
+bb6:		; preds = %bb6, %bb5
+	br i1 undef, label %bb8, label %bb6
+
+bb8:		; preds = %bb6, %bb5
+	br label %bb15
+
+bb9:		; preds = %bb15
+	br i1 undef, label %bb10, label %bb11
+
+bb10:		; preds = %bb9
+	unreachable
+
+bb11:		; preds = %bb9
+	br i1 undef, label %bb15, label %bb12
+
+bb12:		; preds = %bb11
+	%0 = load i32** @JJ, align 4		; <i32*> [#uses=1]
+	br label %bb228.i
+
+bb74.i:		; preds = %bb228.i
+	br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i:		; preds = %bb74.i
+	br label %bb145.i
+
+bb145.i:		; preds = %bb228.i, %bb138.i, %bb74.i
+	%cflag.0.i = phi i16 [ 0, %bb228.i ], [ 0, %bb74.i ], [ 1, %bb138.i ]		; <i16> [#uses=1]
+	br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i:		; preds = %bb145.i
+	br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i:		; preds = %bb146.i, %bb145.i
+	%.not297 = icmp ne i16 %cflag.0.i, 0		; <i1> [#uses=1]
+	%or.cond298 = and i1 undef, %.not297		; <i1> [#uses=1]
+	br i1 %or.cond298, label %bb153.i, label %bb228.i
+
+bb153.i:		; preds = %bb151.i
+	br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98:		; preds = %bb153.i
+	br label %bb158.i
+
+bb158.i:		; preds = %bb218.i, %bb.nph.i98
+	%c.1020.i = phi i32 [ 0, %bb.nph.i98 ], [ %c.14.i, %bb218.i ]		; <i32> [#uses=1]
+	%cflag.418.i = phi i16 [ 0, %bb.nph.i98 ], [ %cflag.3.i, %bb218.i ]		; <i16> [#uses=1]
+	%pj.317.i = phi i32 [ undef, %bb.nph.i98 ], [ %8, %bb218.i ]		; <i32> [#uses=1]
+	%pi.316.i = phi i32 [ undef, %bb.nph.i98 ], [ %7, %bb218.i ]		; <i32> [#uses=1]
+	%fj.515.i = phi i32 [ undef, %bb.nph.i98 ], [ %fj.4.i, %bb218.i ]		; <i32> [#uses=3]
+	%ci.910.i = phi i32 [ undef, %bb.nph.i98 ], [ %ci.12.i, %bb218.i ]		; <i32> [#uses=2]
+	%i.121.i = sub i32 undef, undef		; <i32> [#uses=3]
+	%tmp105.i = sub i32 undef, undef		; <i32> [#uses=1]
+	%1 = sub i32 %c.1020.i, undef		; <i32> [#uses=0]
+	br i1 undef, label %bb168.i, label %bb160.i
+
+bb160.i:		; preds = %bb158.i
+	br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i:		; preds = %bb160.i
+	br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i:		; preds = %bb161.i
+	%2 = icmp slt i32 %fj.515.i, undef		; <i1> [#uses=1]
+	%3 = and i1 %2, undef		; <i1> [#uses=1]
+	br i1 %3, label %bb167.i, label %bb168.i
+
+bb167.i:		; preds = %bb163.i
+	br label %bb168.i
+
+bb168.i:		; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+	%fi.5.i = phi i32 [ undef, %bb167.i ], [ %ci.910.i, %bb158.i ], [ undef, %bb160.i ], [ %ci.910.i, %bb161.i ], [ undef, %bb163.i ]		; <i32> [#uses=1]
+	%fj.4.i = phi i32 [ undef, %bb167.i ], [ undef, %bb158.i ], [ %fj.515.i, %bb160.i ], [ undef, %bb161.i ], [ %fj.515.i, %bb163.i ]		; <i32> [#uses=2]
+	%scevgep88.i = getelementptr i32* null, i32 %i.121.i		; <i32*> [#uses=3]
+	%4 = load i32* %scevgep88.i, align 4		; <i32> [#uses=2]
+	%scevgep89.i = getelementptr i32* %0, i32 %i.121.i		; <i32*> [#uses=3]
+	%5 = load i32* %scevgep89.i, align 4		; <i32> [#uses=1]
+	%ci.10.i = select i1 undef, i32 %pi.316.i, i32 %i.121.i		; <i32> [#uses=0]
+	%cj.9.i = select i1 undef, i32 %pj.317.i, i32 undef		; <i32> [#uses=0]
+	%6 = icmp slt i32 undef, 0		; <i1> [#uses=3]
+	%ci.12.i = select i1 %6, i32 %fi.5.i, i32 %4		; <i32> [#uses=2]
+	%cj.11.i100 = select i1 %6, i32 %fj.4.i, i32 %5		; <i32> [#uses=1]
+	%c.14.i = select i1 %6, i32 0, i32 undef		; <i32> [#uses=2]
+	store i32 %c.14.i, i32* undef, align 4
+	%7 = load i32* %scevgep88.i, align 4		; <i32> [#uses=1]
+	%8 = load i32* %scevgep89.i, align 4		; <i32> [#uses=1]
+	store i32 %ci.12.i, i32* %scevgep88.i, align 4
+	store i32 %cj.11.i100, i32* %scevgep89.i, align 4
+	store i32 %4, i32* undef, align 4
+	br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i:		; preds = %bb168.i
+	br label %bb218.i
+
+bb218.i:		; preds = %bb211.i, %bb168.i
+	%cflag.3.i = phi i16 [ %cflag.418.i, %bb168.i ], [ 1, %bb211.i ]		; <i16> [#uses=2]
+	%9 = icmp slt i32 %tmp105.i, undef		; <i1> [#uses=1]
+	br i1 %9, label %bb220.i, label %bb158.i
+
+bb220.i:		; preds = %bb218.i, %bb153.i
+	%cflag.4.lcssa.i = phi i16 [ 0, %bb153.i ], [ %cflag.3.i, %bb218.i ]		; <i16> [#uses=0]
+	br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i:		; preds = %bb220.i
+	br label %bb228.i
+
+bb228.i:		; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+	br i1 undef, label %bb74.i, label %bb145.i
+
+bb15:		; preds = %bb11, %bb8
+	br i1 undef, label %return, label %bb9
+
+return:		; preds = %bb15
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll b/final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
new file mode 100644
index 00000000000..77c133a80f9
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll
@@ -0,0 +1,128 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@r = external global i32		; <i32*> [#uses=1]
+@qr = external global i32		; <i32*> [#uses=1]
+@II = external global i32*		; <i32**> [#uses=1]
+@no_mis = external global i32		; <i32*> [#uses=1]
+@name1 = external global i8*		; <i8**> [#uses=1]
+
+declare void @diff(i8*, i8*, i32, i32, i32, i32) nounwind
+
+define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br label %bb
+
+bb5:		; preds = %entry
+	br i1 undef, label %bb6, label %bb8
+
+bb6:		; preds = %bb6, %bb5
+	br i1 undef, label %bb8, label %bb6
+
+bb8:		; preds = %bb6, %bb5
+	%0 = load i8** @name1, align 4		; <i8*> [#uses=0]
+	br label %bb15
+
+bb9:		; preds = %bb15
+	br i1 undef, label %bb10, label %bb11
+
+bb10:		; preds = %bb9
+	unreachable
+
+bb11:		; preds = %bb9
+	store i32 0, i32* @no_mis, align 4
+	%1 = getelementptr i8* %A, i32 0		; <i8*> [#uses=1]
+	%2 = getelementptr i8* %B, i32 0		; <i8*> [#uses=1]
+	tail call  void @diff(i8* %1, i8* %2, i32 undef, i32 undef, i32 undef, i32 undef) nounwind
+	br i1 undef, label %bb15, label %bb12
+
+bb12:		; preds = %bb11
+	%3 = load i32** @II, align 4		; <i32*> [#uses=1]
+	%4 = load i32* @r, align 4		; <i32> [#uses=1]
+	%5 = load i32* @qr, align 4		; <i32> [#uses=1]
+	br label %bb228.i
+
+bb74.i:		; preds = %bb228.i
+	br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i:		; preds = %bb74.i
+	br label %bb145.i
+
+bb145.i:		; preds = %bb228.i, %bb138.i, %bb74.i
+	br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i:		; preds = %bb145.i
+	br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i:		; preds = %bb146.i, %bb145.i
+	br i1 undef, label %bb153.i, label %bb228.i
+
+bb153.i:		; preds = %bb151.i
+	%6 = add i32 undef, -1		; <i32> [#uses=3]
+	br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98:		; preds = %bb153.i
+	br label %bb158.i
+
+bb158.i:		; preds = %bb218.i, %bb.nph.i98
+	%c.1020.i = phi i32 [ 0, %bb.nph.i98 ], [ %c.14.i, %bb218.i ]		; <i32> [#uses=1]
+	%f.419.i = phi i32 [ undef, %bb.nph.i98 ], [ %f.5.i, %bb218.i ]		; <i32> [#uses=1]
+	%pi.316.i = phi i32 [ undef, %bb.nph.i98 ], [ %10, %bb218.i ]		; <i32> [#uses=1]
+	%fj.515.i = phi i32 [ %6, %bb.nph.i98 ], [ %fj.4.i, %bb218.i ]		; <i32> [#uses=2]
+	%fi.614.i = phi i32 [ undef, %bb.nph.i98 ], [ %fi.5.i, %bb218.i ]		; <i32> [#uses=3]
+	%cj.811.i = phi i32 [ %6, %bb.nph.i98 ], [ %cj.11.i100, %bb218.i ]		; <i32> [#uses=3]
+	%ci.910.i = phi i32 [ undef, %bb.nph.i98 ], [ %ci.12.i, %bb218.i ]		; <i32> [#uses=2]
+	%7 = sub i32 %f.419.i, %4		; <i32> [#uses=5]
+	%8 = sub i32 %c.1020.i, %5		; <i32> [#uses=2]
+	%9 = icmp slt i32 %7, %8		; <i1> [#uses=1]
+	br i1 %9, label %bb168.i, label %bb160.i
+
+bb160.i:		; preds = %bb158.i
+	br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i:		; preds = %bb160.i
+	br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i:		; preds = %bb161.i
+	br i1 undef, label %bb167.i, label %bb168.i
+
+bb167.i:		; preds = %bb163.i
+	br label %bb168.i
+
+bb168.i:		; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+	%fi.5.i = phi i32 [ %fi.614.i, %bb167.i ], [ %ci.910.i, %bb158.i ], [ %fi.614.i, %bb160.i ], [ %ci.910.i, %bb161.i ], [ %fi.614.i, %bb163.i ]		; <i32> [#uses=2]
+	%fj.4.i = phi i32 [ %cj.811.i, %bb167.i ], [ %cj.811.i, %bb158.i ], [ %fj.515.i, %bb160.i ], [ %cj.811.i, %bb161.i ], [ %fj.515.i, %bb163.i ]		; <i32> [#uses=2]
+	%f.5.i = phi i32 [ %7, %bb167.i ], [ %8, %bb158.i ], [ %7, %bb160.i ], [ %7, %bb161.i ], [ %7, %bb163.i ]		; <i32> [#uses=2]
+	%scevgep88.i = getelementptr i32* %3, i32 undef		; <i32*> [#uses=1]
+	%ci.10.i = select i1 undef, i32 %pi.316.i, i32 undef		; <i32> [#uses=0]
+	%ci.12.i = select i1 undef, i32 %fi.5.i, i32 undef		; <i32> [#uses=1]
+	%cj.11.i100 = select i1 undef, i32 %fj.4.i, i32 undef		; <i32> [#uses=1]
+	%c.14.i = select i1 undef, i32 %f.5.i, i32 undef		; <i32> [#uses=1]
+	%10 = load i32* %scevgep88.i, align 4		; <i32> [#uses=1]
+	br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i:		; preds = %bb168.i
+	br label %bb218.i
+
+bb218.i:		; preds = %bb211.i, %bb168.i
+	br i1 undef, label %bb220.i, label %bb158.i
+
+bb220.i:		; preds = %bb218.i, %bb153.i
+	%11 = getelementptr i32* null, i32 %6		; <i32*> [#uses=1]
+	store i32 undef, i32* %11, align 4
+	br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i:		; preds = %bb220.i
+	br label %bb228.i
+
+bb228.i:		; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+	br i1 undef, label %bb74.i, label %bb145.i
+
+bb15:		; preds = %bb11, %bb8
+	br i1 undef, label %return, label %bb9
+
+return:		; preds = %bb15
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll b/final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
new file mode 100644
index 00000000000..16f5d1dc150
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@XX = external global i32*		; <i32**> [#uses=1]
+
+define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br label %bb
+
+bb5:		; preds = %entry
+	br i1 undef, label %bb6, label %bb8
+
+bb6:		; preds = %bb6, %bb5
+	br i1 undef, label %bb8, label %bb6
+
+bb8:		; preds = %bb6, %bb5
+	br label %bb15
+
+bb9:		; preds = %bb15
+	br i1 undef, label %bb10, label %bb11
+
+bb10:		; preds = %bb9
+	unreachable
+
+bb11:		; preds = %bb9
+	br i1 undef, label %bb15, label %bb12
+
+bb12:		; preds = %bb11
+	%0 = load i32** @XX, align 4		; <i32*> [#uses=0]
+	br label %bb228.i
+
+bb74.i:		; preds = %bb228.i
+	br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i:		; preds = %bb74.i
+	br label %bb145.i
+
+bb145.i:		; preds = %bb228.i, %bb138.i, %bb74.i
+	br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i:		; preds = %bb145.i
+	br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i:		; preds = %bb146.i, %bb145.i
+	br i1 undef, label %bb153.i, label %bb228.i
+
+bb153.i:		; preds = %bb151.i
+	br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98:		; preds = %bb153.i
+	br label %bb158.i
+
+bb158.i:		; preds = %bb218.i, %bb.nph.i98
+	%1 = sub i32 undef, undef		; <i32> [#uses=4]
+	%2 = sub i32 undef, undef		; <i32> [#uses=1]
+	br i1 undef, label %bb168.i, label %bb160.i
+
+bb160.i:		; preds = %bb158.i
+	br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i:		; preds = %bb160.i
+	br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i:		; preds = %bb161.i
+	br i1 undef, label %bb167.i, label %bb168.i
+
+bb167.i:		; preds = %bb163.i
+	br label %bb168.i
+
+bb168.i:		; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+	%f.5.i = phi i32 [ %1, %bb167.i ], [ %2, %bb158.i ], [ %1, %bb160.i ], [ %1, %bb161.i ], [ %1, %bb163.i ]		; <i32> [#uses=1]
+	%c.14.i = select i1 undef, i32 %f.5.i, i32 undef		; <i32> [#uses=1]
+	store i32 %c.14.i, i32* undef, align 4
+	store i32 undef, i32* null, align 4
+	br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i:		; preds = %bb168.i
+	br label %bb218.i
+
+bb218.i:		; preds = %bb211.i, %bb168.i
+	br i1 undef, label %bb220.i, label %bb158.i
+
+bb220.i:		; preds = %bb218.i, %bb153.i
+	br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i:		; preds = %bb220.i
+	br label %bb228.i
+
+bb228.i:		; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+	br i1 undef, label %bb74.i, label %bb145.i
+
+bb15:		; preds = %bb11, %bb8
+	br i1 undef, label %return, label %bb9
+
+return:		; preds = %bb15
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-07-01-CommuteBug.ll b/final/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
new file mode 100644
index 00000000000..f0d79ce25c9
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-07-01-CommuteBug.ll
@@ -0,0 +1,130 @@
+; RUN: llc < %s -march=arm -mtriple=armv6-apple-darwin9
+
+@qr = external global i32		; <i32*> [#uses=1]
+@II = external global i32*		; <i32**> [#uses=1]
+@JJ = external global i32*		; <i32**> [#uses=1]
+
+define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind {
+entry:
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br label %bb
+
+bb5:		; preds = %entry
+	br i1 undef, label %bb6, label %bb8
+
+bb6:		; preds = %bb6, %bb5
+	br i1 undef, label %bb8, label %bb6
+
+bb8:		; preds = %bb6, %bb5
+	br label %bb15
+
+bb9:		; preds = %bb15
+	br i1 undef, label %bb10, label %bb11
+
+bb10:		; preds = %bb9
+	unreachable
+
+bb11:		; preds = %bb9
+	br i1 undef, label %bb15, label %bb12
+
+bb12:		; preds = %bb11
+	%0 = load i32** @II, align 4		; <i32*> [#uses=1]
+	%1 = load i32** @JJ, align 4		; <i32*> [#uses=1]
+	%2 = load i32* @qr, align 4		; <i32> [#uses=1]
+	br label %bb228.i
+
+bb74.i:		; preds = %bb228.i
+	br i1 undef, label %bb138.i, label %bb145.i
+
+bb138.i:		; preds = %bb74.i
+	br label %bb145.i
+
+bb145.i:		; preds = %bb228.i, %bb138.i, %bb74.i
+	%cflag.0.i = phi i16 [ %cflag.1.i, %bb228.i ], [ %cflag.1.i, %bb74.i ], [ 1, %bb138.i ]		; <i16> [#uses=2]
+	br i1 undef, label %bb146.i, label %bb151.i
+
+bb146.i:		; preds = %bb145.i
+	br i1 undef, label %bb228.i, label %bb151.i
+
+bb151.i:		; preds = %bb146.i, %bb145.i
+	%.not297 = icmp ne i16 %cflag.0.i, 0		; <i1> [#uses=1]
+	%or.cond298 = and i1 undef, %.not297		; <i1> [#uses=1]
+	br i1 %or.cond298, label %bb153.i, label %bb228.i
+
+bb153.i:		; preds = %bb151.i
+	br i1 undef, label %bb220.i, label %bb.nph.i98
+
+bb.nph.i98:		; preds = %bb153.i
+	br label %bb158.i
+
+bb158.i:		; preds = %bb218.i, %bb.nph.i98
+	%c.1020.i = phi i32 [ 0, %bb.nph.i98 ], [ %c.14.i, %bb218.i ]		; <i32> [#uses=1]
+	%f.419.i = phi i32 [ undef, %bb.nph.i98 ], [ %f.5.i, %bb218.i ]		; <i32> [#uses=1]
+	%cflag.418.i = phi i16 [ 0, %bb.nph.i98 ], [ %cflag.3.i, %bb218.i ]		; <i16> [#uses=1]
+	%pj.317.i = phi i32 [ undef, %bb.nph.i98 ], [ %7, %bb218.i ]		; <i32> [#uses=1]
+	%pi.316.i = phi i32 [ undef, %bb.nph.i98 ], [ %6, %bb218.i ]		; <i32> [#uses=1]
+	%fj.515.i = phi i32 [ undef, %bb.nph.i98 ], [ %fj.4.i, %bb218.i ]		; <i32> [#uses=2]
+	%fi.614.i = phi i32 [ undef, %bb.nph.i98 ], [ %fi.5.i, %bb218.i ]		; <i32> [#uses=3]
+	%cj.811.i = phi i32 [ undef, %bb.nph.i98 ], [ %cj.11.i100, %bb218.i ]		; <i32> [#uses=3]
+	%ci.910.i = phi i32 [ undef, %bb.nph.i98 ], [ %ci.12.i, %bb218.i ]		; <i32> [#uses=2]
+	%3 = sub i32 %f.419.i, 0		; <i32> [#uses=5]
+	%4 = sub i32 %c.1020.i, %2		; <i32> [#uses=2]
+	%5 = icmp slt i32 %3, %4		; <i1> [#uses=1]
+	br i1 %5, label %bb168.i, label %bb160.i
+
+bb160.i:		; preds = %bb158.i
+	br i1 undef, label %bb161.i, label %bb168.i
+
+bb161.i:		; preds = %bb160.i
+	br i1 undef, label %bb168.i, label %bb163.i
+
+bb163.i:		; preds = %bb161.i
+	br i1 undef, label %bb167.i, label %bb168.i
+
+bb167.i:		; preds = %bb163.i
+	br label %bb168.i
+
+bb168.i:		; preds = %bb167.i, %bb163.i, %bb161.i, %bb160.i, %bb158.i
+	%fi.5.i = phi i32 [ %fi.614.i, %bb167.i ], [ %ci.910.i, %bb158.i ], [ %fi.614.i, %bb160.i ], [ %ci.910.i, %bb161.i ], [ %fi.614.i, %bb163.i ]		; <i32> [#uses=2]
+	%fj.4.i = phi i32 [ %cj.811.i, %bb167.i ], [ %cj.811.i, %bb158.i ], [ %fj.515.i, %bb160.i ], [ %cj.811.i, %bb161.i ], [ %fj.515.i, %bb163.i ]		; <i32> [#uses=2]
+	%f.5.i = phi i32 [ %3, %bb167.i ], [ %4, %bb158.i ], [ %3, %bb160.i ], [ %3, %bb161.i ], [ %3, %bb163.i ]		; <i32> [#uses=2]
+	%scevgep88.i = getelementptr i32* %0, i32 undef		; <i32*> [#uses=2]
+	%scevgep89.i = getelementptr i32* %1, i32 undef		; <i32*> [#uses=2]
+	%ci.10.i = select i1 undef, i32 %pi.316.i, i32 undef		; <i32> [#uses=0]
+	%cj.9.i = select i1 undef, i32 %pj.317.i, i32 undef		; <i32> [#uses=0]
+	%ci.12.i = select i1 undef, i32 %fi.5.i, i32 undef		; <i32> [#uses=2]
+	%cj.11.i100 = select i1 undef, i32 %fj.4.i, i32 undef		; <i32> [#uses=2]
+	%c.14.i = select i1 undef, i32 %f.5.i, i32 undef		; <i32> [#uses=1]
+	%6 = load i32* %scevgep88.i, align 4		; <i32> [#uses=1]
+	%7 = load i32* %scevgep89.i, align 4		; <i32> [#uses=1]
+	store i32 %ci.12.i, i32* %scevgep88.i, align 4
+	store i32 %cj.11.i100, i32* %scevgep89.i, align 4
+	br i1 undef, label %bb211.i, label %bb218.i
+
+bb211.i:		; preds = %bb168.i
+	br label %bb218.i
+
+bb218.i:		; preds = %bb211.i, %bb168.i
+	%cflag.3.i = phi i16 [ %cflag.418.i, %bb168.i ], [ 1, %bb211.i ]		; <i16> [#uses=2]
+	%8 = icmp slt i32 undef, undef		; <i1> [#uses=1]
+	br i1 %8, label %bb220.i, label %bb158.i
+
+bb220.i:		; preds = %bb218.i, %bb153.i
+	%cflag.4.lcssa.i = phi i16 [ 0, %bb153.i ], [ %cflag.3.i, %bb218.i ]		; <i16> [#uses=2]
+	br i1 undef, label %bb221.i, label %bb228.i
+
+bb221.i:		; preds = %bb220.i
+	br label %bb228.i
+
+bb228.i:		; preds = %bb221.i, %bb220.i, %bb151.i, %bb146.i, %bb12
+	%cflag.1.i = phi i16 [ 0, %bb146.i ], [ %cflag.0.i, %bb151.i ], [ %cflag.4.lcssa.i, %bb220.i ], [ 1, %bb12 ], [ %cflag.4.lcssa.i, %bb221.i ]		; <i16> [#uses=2]
+	br i1 false, label %bb74.i, label %bb145.i
+
+bb15:		; preds = %bb11, %bb8
+	br i1 false, label %return, label %bb9
+
+return:		; preds = %bb15
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll b/final/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll
new file mode 100644
index 00000000000..e1e94b64121
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-07-09-asm-p-constraint.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+
+define void @test(i8* %x) nounwind {
+entry:
+	call void asm sideeffect "pld\09${0:a}", "r,~{cc}"(i8* %x) nounwind
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/final/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
new file mode 100644
index 00000000000..454fee5c5ae
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-07-18-RewriterBug.ll
@@ -0,0 +1,1323 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin10 -mattr=+vfp2 | grep vcmpe | count 13
+
+	%struct.EDGE_PAIR = type { %struct.edge_rec*, %struct.edge_rec* }
+	%struct.VEC2 = type { double, double, double }
+	%struct.VERTEX = type { %struct.VEC2, %struct.VERTEX*, %struct.VERTEX* }
+	%struct.edge_rec = type { %struct.VERTEX*, %struct.edge_rec*, i32, i8* }
+@avail_edge = internal global %struct.edge_rec* null		; <%struct.edge_rec**> [#uses=6]
+@_2E_str7 = internal constant [21 x i8] c"ERROR: Only 1 point!\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[21 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.EDGE_PAIR*, %struct.VERTEX*, %struct.VERTEX*)* @build_delaunay to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @build_delaunay(%struct.EDGE_PAIR* noalias nocapture sret %agg.result, %struct.VERTEX* %tree, %struct.VERTEX* %extra) nounwind {
+entry:
+	%delright = alloca %struct.EDGE_PAIR, align 8		; <%struct.EDGE_PAIR*> [#uses=3]
+	%delleft = alloca %struct.EDGE_PAIR, align 8		; <%struct.EDGE_PAIR*> [#uses=3]
+	%0 = icmp eq %struct.VERTEX* %tree, null		; <i1> [#uses=1]
+	br i1 %0, label %bb8, label %bb
+
+bb:		; preds = %entry
+	%1 = getelementptr %struct.VERTEX* %tree, i32 0, i32 2		; <%struct.VERTEX**> [#uses=1]
+	%2 = load %struct.VERTEX** %1, align 4		; <%struct.VERTEX*> [#uses=2]
+	%3 = icmp eq %struct.VERTEX* %2, null		; <i1> [#uses=1]
+	br i1 %3, label %bb7, label %bb1.i
+
+bb1.i:		; preds = %bb1.i, %bb
+	%tree_addr.0.i = phi %struct.VERTEX* [ %5, %bb1.i ], [ %tree, %bb ]		; <%struct.VERTEX*> [#uses=3]
+	%4 = getelementptr %struct.VERTEX* %tree_addr.0.i, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
+	%5 = load %struct.VERTEX** %4, align 4		; <%struct.VERTEX*> [#uses=2]
+	%6 = icmp eq %struct.VERTEX* %5, null		; <i1> [#uses=1]
+	br i1 %6, label %get_low.exit, label %bb1.i
+
+get_low.exit:		; preds = %bb1.i
+	call  void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delright, %struct.VERTEX* %2, %struct.VERTEX* %extra) nounwind
+	%7 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
+	%8 = load %struct.VERTEX** %7, align 4		; <%struct.VERTEX*> [#uses=1]
+	call  void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delleft, %struct.VERTEX* %8, %struct.VERTEX* %tree) nounwind
+	%9 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 0		; <%struct.edge_rec**> [#uses=1]
+	%10 = load %struct.edge_rec** %9, align 8		; <%struct.edge_rec*> [#uses=2]
+	%11 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%12 = load %struct.edge_rec** %11, align 4		; <%struct.edge_rec*> [#uses=1]
+	%13 = getelementptr %struct.EDGE_PAIR* %delright, i32 0, i32 0		; <%struct.edge_rec**> [#uses=1]
+	%14 = load %struct.edge_rec** %13, align 8		; <%struct.edge_rec*> [#uses=1]
+	%15 = getelementptr %struct.EDGE_PAIR* %delright, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%16 = load %struct.edge_rec** %15, align 4		; <%struct.edge_rec*> [#uses=2]
+	br label %bb.i
+
+bb.i:		; preds = %bb4.i, %get_low.exit
+	%rdi_addr.0.i = phi %struct.edge_rec* [ %14, %get_low.exit ], [ %72, %bb4.i ]		; <%struct.edge_rec*> [#uses=2]
+	%ldi_addr.1.i = phi %struct.edge_rec* [ %12, %get_low.exit ], [ %ldi_addr.0.i, %bb4.i ]		; <%struct.edge_rec*> [#uses=3]
+	%17 = getelementptr %struct.edge_rec* %rdi_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%18 = load %struct.VERTEX** %17, align 4		; <%struct.VERTEX*> [#uses=3]
+	%19 = ptrtoint %struct.edge_rec* %ldi_addr.1.i to i32		; <i32> [#uses=1]
+	%20 = getelementptr %struct.VERTEX* %18, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%21 = load double* %20, align 4		; <double> [#uses=3]
+	%22 = getelementptr %struct.VERTEX* %18, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%23 = load double* %22, align 4		; <double> [#uses=3]
+	br label %bb2.i
+
+bb1.i1:		; preds = %bb2.i
+	%24 = ptrtoint %struct.edge_rec* %ldi_addr.0.i to i32		; <i32> [#uses=2]
+	%25 = add i32 %24, 48		; <i32> [#uses=1]
+	%26 = and i32 %25, 63		; <i32> [#uses=1]
+	%27 = and i32 %24, -64		; <i32> [#uses=1]
+	%28 = or i32 %26, %27		; <i32> [#uses=1]
+	%29 = inttoptr i32 %28 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%30 = getelementptr %struct.edge_rec* %29, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%31 = load %struct.edge_rec** %30, align 4		; <%struct.edge_rec*> [#uses=1]
+	%32 = ptrtoint %struct.edge_rec* %31 to i32		; <i32> [#uses=2]
+	%33 = add i32 %32, 16		; <i32> [#uses=1]
+	%34 = and i32 %33, 63		; <i32> [#uses=1]
+	%35 = and i32 %32, -64		; <i32> [#uses=1]
+	%36 = or i32 %34, %35		; <i32> [#uses=2]
+	%37 = inttoptr i32 %36 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	br label %bb2.i
+
+bb2.i:		; preds = %bb1.i1, %bb.i
+	%ldi_addr.1.pn.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ]		; <%struct.edge_rec*> [#uses=1]
+	%.pn6.in.in.i = phi i32 [ %19, %bb.i ], [ %36, %bb1.i1 ]		; <i32> [#uses=1]
+	%ldi_addr.0.i = phi %struct.edge_rec* [ %ldi_addr.1.i, %bb.i ], [ %37, %bb1.i1 ]		; <%struct.edge_rec*> [#uses=4]
+	%.pn6.in.i = xor i32 %.pn6.in.in.i, 32		; <i32> [#uses=1]
+	%.pn6.i = inttoptr i32 %.pn6.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%t1.0.in.i = getelementptr %struct.edge_rec* %ldi_addr.1.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%t2.0.in.i = getelementptr %struct.edge_rec* %.pn6.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%t1.0.i = load %struct.VERTEX** %t1.0.in.i		; <%struct.VERTEX*> [#uses=2]
+	%t2.0.i = load %struct.VERTEX** %t2.0.in.i		; <%struct.VERTEX*> [#uses=2]
+	%38 = getelementptr %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%39 = load double* %38, align 4		; <double> [#uses=3]
+	%40 = getelementptr %struct.VERTEX* %t1.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%41 = load double* %40, align 4		; <double> [#uses=3]
+	%42 = getelementptr %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%43 = load double* %42, align 4		; <double> [#uses=1]
+	%44 = getelementptr %struct.VERTEX* %t2.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%45 = load double* %44, align 4		; <double> [#uses=1]
+	%46 = fsub double %39, %21		; <double> [#uses=1]
+	%47 = fsub double %45, %23		; <double> [#uses=1]
+	%48 = fmul double %46, %47		; <double> [#uses=1]
+	%49 = fsub double %43, %21		; <double> [#uses=1]
+	%50 = fsub double %41, %23		; <double> [#uses=1]
+	%51 = fmul double %49, %50		; <double> [#uses=1]
+	%52 = fsub double %48, %51		; <double> [#uses=1]
+	%53 = fcmp ogt double %52, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %53, label %bb1.i1, label %bb3.i
+
+bb3.i:		; preds = %bb2.i
+	%54 = ptrtoint %struct.edge_rec* %rdi_addr.0.i to i32		; <i32> [#uses=1]
+	%55 = xor i32 %54, 32		; <i32> [#uses=3]
+	%56 = inttoptr i32 %55 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%57 = getelementptr %struct.edge_rec* %56, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%58 = load %struct.VERTEX** %57, align 4		; <%struct.VERTEX*> [#uses=2]
+	%59 = getelementptr %struct.VERTEX* %58, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%60 = load double* %59, align 4		; <double> [#uses=1]
+	%61 = getelementptr %struct.VERTEX* %58, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%62 = load double* %61, align 4		; <double> [#uses=1]
+	%63 = fsub double %60, %39		; <double> [#uses=1]
+	%64 = fsub double %23, %41		; <double> [#uses=1]
+	%65 = fmul double %63, %64		; <double> [#uses=1]
+	%66 = fsub double %21, %39		; <double> [#uses=1]
+	%67 = fsub double %62, %41		; <double> [#uses=1]
+	%68 = fmul double %66, %67		; <double> [#uses=1]
+	%69 = fsub double %65, %68		; <double> [#uses=1]
+	%70 = fcmp ogt double %69, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %70, label %bb4.i, label %bb5.i
+
+bb4.i:		; preds = %bb3.i
+	%71 = getelementptr %struct.edge_rec* %56, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%72 = load %struct.edge_rec** %71, align 4		; <%struct.edge_rec*> [#uses=1]
+	br label %bb.i
+
+bb5.i:		; preds = %bb3.i
+	%73 = add i32 %55, 48		; <i32> [#uses=1]
+	%74 = and i32 %73, 63		; <i32> [#uses=1]
+	%75 = and i32 %55, -64		; <i32> [#uses=1]
+	%76 = or i32 %74, %75		; <i32> [#uses=1]
+	%77 = inttoptr i32 %76 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%78 = getelementptr %struct.edge_rec* %77, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%79 = load %struct.edge_rec** %78, align 4		; <%struct.edge_rec*> [#uses=1]
+	%80 = ptrtoint %struct.edge_rec* %79 to i32		; <i32> [#uses=2]
+	%81 = add i32 %80, 16		; <i32> [#uses=1]
+	%82 = and i32 %81, 63		; <i32> [#uses=1]
+	%83 = and i32 %80, -64		; <i32> [#uses=1]
+	%84 = or i32 %82, %83		; <i32> [#uses=1]
+	%85 = inttoptr i32 %84 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%86 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%87 = load %struct.VERTEX** %86, align 4		; <%struct.VERTEX*> [#uses=1]
+	%88 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=6]
+	%89 = getelementptr %struct.edge_rec* %88, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %88, %struct.edge_rec** %89, align 4
+	%90 = getelementptr %struct.edge_rec* %88, i32 0, i32 0		; <%struct.VERTEX**> [#uses=2]
+	store %struct.VERTEX* %18, %struct.VERTEX** %90, align 4
+	%91 = ptrtoint %struct.edge_rec* %88 to i32		; <i32> [#uses=5]
+	%92 = add i32 %91, 16		; <i32> [#uses=2]
+	%93 = inttoptr i32 %92 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%94 = add i32 %91, 48		; <i32> [#uses=1]
+	%95 = inttoptr i32 %94 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%96 = getelementptr %struct.edge_rec* %93, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %95, %struct.edge_rec** %96, align 4
+	%97 = add i32 %91, 32		; <i32> [#uses=1]
+	%98 = inttoptr i32 %97 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%99 = getelementptr %struct.edge_rec* %98, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %98, %struct.edge_rec** %99, align 4
+	%100 = getelementptr %struct.edge_rec* %98, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %87, %struct.VERTEX** %100, align 4
+	%101 = getelementptr %struct.edge_rec* %95, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %93, %struct.edge_rec** %101, align 4
+	%102 = load %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
+	%103 = ptrtoint %struct.edge_rec* %102 to i32		; <i32> [#uses=2]
+	%104 = add i32 %103, 16		; <i32> [#uses=1]
+	%105 = and i32 %104, 63		; <i32> [#uses=1]
+	%106 = and i32 %103, -64		; <i32> [#uses=1]
+	%107 = or i32 %105, %106		; <i32> [#uses=1]
+	%108 = inttoptr i32 %107 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%109 = getelementptr %struct.edge_rec* %85, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%110 = load %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
+	%111 = ptrtoint %struct.edge_rec* %110 to i32		; <i32> [#uses=2]
+	%112 = add i32 %111, 16		; <i32> [#uses=1]
+	%113 = and i32 %112, 63		; <i32> [#uses=1]
+	%114 = and i32 %111, -64		; <i32> [#uses=1]
+	%115 = or i32 %113, %114		; <i32> [#uses=1]
+	%116 = inttoptr i32 %115 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%117 = getelementptr %struct.edge_rec* %116, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%118 = load %struct.edge_rec** %117, align 4		; <%struct.edge_rec*> [#uses=1]
+	%119 = getelementptr %struct.edge_rec* %108, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%120 = load %struct.edge_rec** %119, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %118, %struct.edge_rec** %119, align 4
+	store %struct.edge_rec* %120, %struct.edge_rec** %117, align 4
+	%121 = load %struct.edge_rec** %89, align 4		; <%struct.edge_rec*> [#uses=1]
+	%122 = load %struct.edge_rec** %109, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %121, %struct.edge_rec** %109, align 4
+	store %struct.edge_rec* %122, %struct.edge_rec** %89, align 4
+	%123 = xor i32 %91, 32		; <i32> [#uses=1]
+	%124 = inttoptr i32 %123 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%125 = getelementptr %struct.edge_rec* %124, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%126 = load %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
+	%127 = ptrtoint %struct.edge_rec* %126 to i32		; <i32> [#uses=2]
+	%128 = add i32 %127, 16		; <i32> [#uses=1]
+	%129 = and i32 %128, 63		; <i32> [#uses=1]
+	%130 = and i32 %127, -64		; <i32> [#uses=1]
+	%131 = or i32 %129, %130		; <i32> [#uses=1]
+	%132 = inttoptr i32 %131 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%133 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%134 = load %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=1]
+	%135 = ptrtoint %struct.edge_rec* %134 to i32		; <i32> [#uses=2]
+	%136 = add i32 %135, 16		; <i32> [#uses=1]
+	%137 = and i32 %136, 63		; <i32> [#uses=1]
+	%138 = and i32 %135, -64		; <i32> [#uses=1]
+	%139 = or i32 %137, %138		; <i32> [#uses=1]
+	%140 = inttoptr i32 %139 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%141 = getelementptr %struct.edge_rec* %140, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%142 = load %struct.edge_rec** %141, align 4		; <%struct.edge_rec*> [#uses=1]
+	%143 = getelementptr %struct.edge_rec* %132, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%144 = load %struct.edge_rec** %143, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %142, %struct.edge_rec** %143, align 4
+	store %struct.edge_rec* %144, %struct.edge_rec** %141, align 4
+	%145 = load %struct.edge_rec** %125, align 4		; <%struct.edge_rec*> [#uses=1]
+	%146 = load %struct.edge_rec** %133, align 4		; <%struct.edge_rec*> [#uses=2]
+	store %struct.edge_rec* %145, %struct.edge_rec** %133, align 4
+	store %struct.edge_rec* %146, %struct.edge_rec** %125, align 4
+	%147 = and i32 %92, 63		; <i32> [#uses=1]
+	%148 = and i32 %91, -64		; <i32> [#uses=1]
+	%149 = or i32 %147, %148		; <i32> [#uses=1]
+	%150 = inttoptr i32 %149 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%151 = getelementptr %struct.edge_rec* %150, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%152 = load %struct.edge_rec** %151, align 4		; <%struct.edge_rec*> [#uses=1]
+	%153 = ptrtoint %struct.edge_rec* %152 to i32		; <i32> [#uses=2]
+	%154 = add i32 %153, 16		; <i32> [#uses=1]
+	%155 = and i32 %154, 63		; <i32> [#uses=1]
+	%156 = and i32 %153, -64		; <i32> [#uses=1]
+	%157 = or i32 %155, %156		; <i32> [#uses=1]
+	%158 = inttoptr i32 %157 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%159 = load %struct.VERTEX** %90, align 4		; <%struct.VERTEX*> [#uses=1]
+	%160 = getelementptr %struct.edge_rec* %124, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%161 = load %struct.VERTEX** %160, align 4		; <%struct.VERTEX*> [#uses=1]
+	%162 = getelementptr %struct.edge_rec* %16, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%163 = load %struct.VERTEX** %162, align 4		; <%struct.VERTEX*> [#uses=1]
+	%164 = icmp eq %struct.VERTEX* %163, %159		; <i1> [#uses=1]
+	%rdo_addr.0.i = select i1 %164, %struct.edge_rec* %88, %struct.edge_rec* %16		; <%struct.edge_rec*> [#uses=3]
+	%165 = getelementptr %struct.edge_rec* %10, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%166 = load %struct.VERTEX** %165, align 4		; <%struct.VERTEX*> [#uses=1]
+	%167 = icmp eq %struct.VERTEX* %166, %161		; <i1> [#uses=1]
+	%ldo_addr.0.ph.i = select i1 %167, %struct.edge_rec* %124, %struct.edge_rec* %10		; <%struct.edge_rec*> [#uses=3]
+	br label %bb9.i
+
+bb9.i:		; preds = %bb25.i, %bb24.i, %bb5.i
+	%lcand.2.i = phi %struct.edge_rec* [ %146, %bb5.i ], [ %lcand.1.i, %bb24.i ], [ %739, %bb25.i ]		; <%struct.edge_rec*> [#uses=5]
+	%rcand.2.i = phi %struct.edge_rec* [ %158, %bb5.i ], [ %666, %bb24.i ], [ %rcand.1.i, %bb25.i ]		; <%struct.edge_rec*> [#uses=5]
+	%basel.0.i = phi %struct.edge_rec* [ %88, %bb5.i ], [ %595, %bb24.i ], [ %716, %bb25.i ]		; <%struct.edge_rec*> [#uses=2]
+	%168 = getelementptr %struct.edge_rec* %lcand.2.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%169 = load %struct.edge_rec** %168, align 4		; <%struct.edge_rec*> [#uses=3]
+	%170 = getelementptr %struct.edge_rec* %basel.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
+	%171 = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=4]
+	%172 = ptrtoint %struct.edge_rec* %basel.0.i to i32		; <i32> [#uses=3]
+	%173 = xor i32 %172, 32		; <i32> [#uses=1]
+	%174 = inttoptr i32 %173 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%175 = getelementptr %struct.edge_rec* %174, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
+	%176 = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=3]
+	%177 = ptrtoint %struct.edge_rec* %169 to i32		; <i32> [#uses=1]
+	%178 = xor i32 %177, 32		; <i32> [#uses=1]
+	%179 = inttoptr i32 %178 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%180 = getelementptr %struct.edge_rec* %179, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%181 = load %struct.VERTEX** %180, align 4		; <%struct.VERTEX*> [#uses=2]
+	%182 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 0		; <double*> [#uses=2]
+	%183 = load double* %182, align 4		; <double> [#uses=2]
+	%184 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 1		; <double*> [#uses=2]
+	%185 = load double* %184, align 4		; <double> [#uses=2]
+	%186 = getelementptr %struct.VERTEX* %181, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%187 = load double* %186, align 4		; <double> [#uses=1]
+	%188 = getelementptr %struct.VERTEX* %181, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%189 = load double* %188, align 4		; <double> [#uses=1]
+	%190 = getelementptr %struct.VERTEX* %176, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%191 = load double* %190, align 4		; <double> [#uses=2]
+	%192 = getelementptr %struct.VERTEX* %176, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%193 = load double* %192, align 4		; <double> [#uses=2]
+	%194 = fsub double %183, %191		; <double> [#uses=1]
+	%195 = fsub double %189, %193		; <double> [#uses=1]
+	%196 = fmul double %194, %195		; <double> [#uses=1]
+	%197 = fsub double %187, %191		; <double> [#uses=1]
+	%198 = fsub double %185, %193		; <double> [#uses=1]
+	%199 = fmul double %197, %198		; <double> [#uses=1]
+	%200 = fsub double %196, %199		; <double> [#uses=1]
+	%201 = fcmp ogt double %200, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %201, label %bb10.i, label %bb13.i
+
+bb10.i:		; preds = %bb9.i
+	%202 = getelementptr %struct.VERTEX* %171, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%avail_edge.promoted25 = load %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
+	br label %bb12.i
+
+bb11.i:		; preds = %bb12.i
+	%203 = ptrtoint %struct.edge_rec* %lcand.0.i to i32		; <i32> [#uses=3]
+	%204 = add i32 %203, 16		; <i32> [#uses=1]
+	%205 = and i32 %204, 63		; <i32> [#uses=1]
+	%206 = and i32 %203, -64		; <i32> [#uses=3]
+	%207 = or i32 %205, %206		; <i32> [#uses=1]
+	%208 = inttoptr i32 %207 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%209 = getelementptr %struct.edge_rec* %208, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%210 = load %struct.edge_rec** %209, align 4		; <%struct.edge_rec*> [#uses=1]
+	%211 = ptrtoint %struct.edge_rec* %210 to i32		; <i32> [#uses=2]
+	%212 = add i32 %211, 16		; <i32> [#uses=1]
+	%213 = and i32 %212, 63		; <i32> [#uses=1]
+	%214 = and i32 %211, -64		; <i32> [#uses=1]
+	%215 = or i32 %213, %214		; <i32> [#uses=1]
+	%216 = inttoptr i32 %215 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%217 = getelementptr %struct.edge_rec* %lcand.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%218 = load %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
+	%219 = ptrtoint %struct.edge_rec* %218 to i32		; <i32> [#uses=2]
+	%220 = add i32 %219, 16		; <i32> [#uses=1]
+	%221 = and i32 %220, 63		; <i32> [#uses=1]
+	%222 = and i32 %219, -64		; <i32> [#uses=1]
+	%223 = or i32 %221, %222		; <i32> [#uses=1]
+	%224 = inttoptr i32 %223 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%225 = getelementptr %struct.edge_rec* %216, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%226 = load %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
+	%227 = ptrtoint %struct.edge_rec* %226 to i32		; <i32> [#uses=2]
+	%228 = add i32 %227, 16		; <i32> [#uses=1]
+	%229 = and i32 %228, 63		; <i32> [#uses=1]
+	%230 = and i32 %227, -64		; <i32> [#uses=1]
+	%231 = or i32 %229, %230		; <i32> [#uses=1]
+	%232 = inttoptr i32 %231 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%233 = getelementptr %struct.edge_rec* %232, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%234 = load %struct.edge_rec** %233, align 4		; <%struct.edge_rec*> [#uses=1]
+	%235 = getelementptr %struct.edge_rec* %224, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%236 = load %struct.edge_rec** %235, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %234, %struct.edge_rec** %235, align 4
+	store %struct.edge_rec* %236, %struct.edge_rec** %233, align 4
+	%237 = load %struct.edge_rec** %217, align 4		; <%struct.edge_rec*> [#uses=1]
+	%238 = load %struct.edge_rec** %225, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %237, %struct.edge_rec** %225, align 4
+	store %struct.edge_rec* %238, %struct.edge_rec** %217, align 4
+	%239 = xor i32 %203, 32		; <i32> [#uses=2]
+	%240 = add i32 %239, 16		; <i32> [#uses=1]
+	%241 = and i32 %240, 63		; <i32> [#uses=1]
+	%242 = or i32 %241, %206		; <i32> [#uses=1]
+	%243 = inttoptr i32 %242 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%244 = getelementptr %struct.edge_rec* %243, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%245 = load %struct.edge_rec** %244, align 4		; <%struct.edge_rec*> [#uses=1]
+	%246 = ptrtoint %struct.edge_rec* %245 to i32		; <i32> [#uses=2]
+	%247 = add i32 %246, 16		; <i32> [#uses=1]
+	%248 = and i32 %247, 63		; <i32> [#uses=1]
+	%249 = and i32 %246, -64		; <i32> [#uses=1]
+	%250 = or i32 %248, %249		; <i32> [#uses=1]
+	%251 = inttoptr i32 %250 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%252 = inttoptr i32 %239 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%253 = getelementptr %struct.edge_rec* %252, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%254 = load %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
+	%255 = ptrtoint %struct.edge_rec* %254 to i32		; <i32> [#uses=2]
+	%256 = add i32 %255, 16		; <i32> [#uses=1]
+	%257 = and i32 %256, 63		; <i32> [#uses=1]
+	%258 = and i32 %255, -64		; <i32> [#uses=1]
+	%259 = or i32 %257, %258		; <i32> [#uses=1]
+	%260 = inttoptr i32 %259 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%261 = getelementptr %struct.edge_rec* %251, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%262 = load %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
+	%263 = ptrtoint %struct.edge_rec* %262 to i32		; <i32> [#uses=2]
+	%264 = add i32 %263, 16		; <i32> [#uses=1]
+	%265 = and i32 %264, 63		; <i32> [#uses=1]
+	%266 = and i32 %263, -64		; <i32> [#uses=1]
+	%267 = or i32 %265, %266		; <i32> [#uses=1]
+	%268 = inttoptr i32 %267 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%269 = getelementptr %struct.edge_rec* %268, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%270 = load %struct.edge_rec** %269, align 4		; <%struct.edge_rec*> [#uses=1]
+	%271 = getelementptr %struct.edge_rec* %260, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%272 = load %struct.edge_rec** %271, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %270, %struct.edge_rec** %271, align 4
+	store %struct.edge_rec* %272, %struct.edge_rec** %269, align 4
+	%273 = load %struct.edge_rec** %253, align 4		; <%struct.edge_rec*> [#uses=1]
+	%274 = load %struct.edge_rec** %261, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %273, %struct.edge_rec** %261, align 4
+	store %struct.edge_rec* %274, %struct.edge_rec** %253, align 4
+	%275 = inttoptr i32 %206 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%276 = getelementptr %struct.edge_rec* %275, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** %276, align 4
+	%277 = getelementptr %struct.edge_rec* %t.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%278 = load %struct.edge_rec** %277, align 4		; <%struct.edge_rec*> [#uses=2]
+	%.pre.i = load double* %182, align 4		; <double> [#uses=1]
+	%.pre22.i = load double* %184, align 4		; <double> [#uses=1]
+	br label %bb12.i
+
+bb12.i:		; preds = %bb11.i, %bb10.i
+	%avail_edge.tmp.026 = phi %struct.edge_rec* [ %avail_edge.promoted25, %bb10.i ], [ %275, %bb11.i ]		; <%struct.edge_rec*> [#uses=2]
+	%279 = phi double [ %.pre22.i, %bb11.i ], [ %185, %bb10.i ]		; <double> [#uses=3]
+	%280 = phi double [ %.pre.i, %bb11.i ], [ %183, %bb10.i ]		; <double> [#uses=3]
+	%lcand.0.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]		; <%struct.edge_rec*> [#uses=3]
+	%t.0.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ]		; <%struct.edge_rec*> [#uses=4]
+	%.pn5.in.in.in.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]		; <%struct.edge_rec*> [#uses=1]
+	%.pn4.in.in.in.i = phi %struct.edge_rec* [ %169, %bb10.i ], [ %278, %bb11.i ]		; <%struct.edge_rec*> [#uses=1]
+	%lcand.2.pn.i = phi %struct.edge_rec* [ %lcand.2.i, %bb10.i ], [ %t.0.i, %bb11.i ]		; <%struct.edge_rec*> [#uses=1]
+	%.pn5.in.in.i = ptrtoint %struct.edge_rec* %.pn5.in.in.in.i to i32		; <i32> [#uses=1]
+	%.pn4.in.in.i = ptrtoint %struct.edge_rec* %.pn4.in.in.in.i to i32		; <i32> [#uses=1]
+	%.pn5.in.i = xor i32 %.pn5.in.in.i, 32		; <i32> [#uses=1]
+	%.pn4.in.i = xor i32 %.pn4.in.in.i, 32		; <i32> [#uses=1]
+	%.pn5.i = inttoptr i32 %.pn5.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%.pn4.i = inttoptr i32 %.pn4.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%v1.0.in.i = getelementptr %struct.edge_rec* %.pn5.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v2.0.in.i = getelementptr %struct.edge_rec* %.pn4.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v3.0.in.i = getelementptr %struct.edge_rec* %lcand.2.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v1.0.i = load %struct.VERTEX** %v1.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v2.0.i = load %struct.VERTEX** %v2.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v3.0.i = load %struct.VERTEX** %v3.0.in.i		; <%struct.VERTEX*> [#uses=3]
+	%281 = load double* %202, align 4		; <double> [#uses=3]
+	%282 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%283 = load double* %282, align 4		; <double> [#uses=1]
+	%284 = fsub double %283, %280		; <double> [#uses=2]
+	%285 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%286 = load double* %285, align 4		; <double> [#uses=1]
+	%287 = fsub double %286, %279		; <double> [#uses=2]
+	%288 = getelementptr %struct.VERTEX* %v1.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%289 = load double* %288, align 4		; <double> [#uses=1]
+	%290 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%291 = load double* %290, align 4		; <double> [#uses=1]
+	%292 = fsub double %291, %280		; <double> [#uses=2]
+	%293 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%294 = load double* %293, align 4		; <double> [#uses=1]
+	%295 = fsub double %294, %279		; <double> [#uses=2]
+	%296 = getelementptr %struct.VERTEX* %v2.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%297 = load double* %296, align 4		; <double> [#uses=1]
+	%298 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%299 = load double* %298, align 4		; <double> [#uses=1]
+	%300 = fsub double %299, %280		; <double> [#uses=2]
+	%301 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%302 = load double* %301, align 4		; <double> [#uses=1]
+	%303 = fsub double %302, %279		; <double> [#uses=2]
+	%304 = getelementptr %struct.VERTEX* %v3.0.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%305 = load double* %304, align 4		; <double> [#uses=1]
+	%306 = fsub double %289, %281		; <double> [#uses=1]
+	%307 = fmul double %292, %303		; <double> [#uses=1]
+	%308 = fmul double %295, %300		; <double> [#uses=1]
+	%309 = fsub double %307, %308		; <double> [#uses=1]
+	%310 = fmul double %306, %309		; <double> [#uses=1]
+	%311 = fsub double %297, %281		; <double> [#uses=1]
+	%312 = fmul double %300, %287		; <double> [#uses=1]
+	%313 = fmul double %303, %284		; <double> [#uses=1]
+	%314 = fsub double %312, %313		; <double> [#uses=1]
+	%315 = fmul double %311, %314		; <double> [#uses=1]
+	%316 = fadd double %315, %310		; <double> [#uses=1]
+	%317 = fsub double %305, %281		; <double> [#uses=1]
+	%318 = fmul double %284, %295		; <double> [#uses=1]
+	%319 = fmul double %287, %292		; <double> [#uses=1]
+	%320 = fsub double %318, %319		; <double> [#uses=1]
+	%321 = fmul double %317, %320		; <double> [#uses=1]
+	%322 = fadd double %321, %316		; <double> [#uses=1]
+	%323 = fcmp ogt double %322, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %323, label %bb11.i, label %bb13.loopexit.i
+
+bb13.loopexit.i:		; preds = %bb12.i
+	store %struct.edge_rec* %avail_edge.tmp.026, %struct.edge_rec** @avail_edge
+	%.pre23.i = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre24.i = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
+	br label %bb13.i
+
+bb13.i:		; preds = %bb13.loopexit.i, %bb9.i
+	%324 = phi %struct.VERTEX* [ %.pre24.i, %bb13.loopexit.i ], [ %176, %bb9.i ]		; <%struct.VERTEX*> [#uses=4]
+	%325 = phi %struct.VERTEX* [ %.pre23.i, %bb13.loopexit.i ], [ %171, %bb9.i ]		; <%struct.VERTEX*> [#uses=3]
+	%lcand.1.i = phi %struct.edge_rec* [ %lcand.0.i, %bb13.loopexit.i ], [ %lcand.2.i, %bb9.i ]		; <%struct.edge_rec*> [#uses=3]
+	%326 = ptrtoint %struct.edge_rec* %rcand.2.i to i32		; <i32> [#uses=2]
+	%327 = add i32 %326, 16		; <i32> [#uses=1]
+	%328 = and i32 %327, 63		; <i32> [#uses=1]
+	%329 = and i32 %326, -64		; <i32> [#uses=1]
+	%330 = or i32 %328, %329		; <i32> [#uses=1]
+	%331 = inttoptr i32 %330 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%332 = getelementptr %struct.edge_rec* %331, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%333 = load %struct.edge_rec** %332, align 4		; <%struct.edge_rec*> [#uses=1]
+	%334 = ptrtoint %struct.edge_rec* %333 to i32		; <i32> [#uses=2]
+	%335 = add i32 %334, 16		; <i32> [#uses=1]
+	%336 = and i32 %335, 63		; <i32> [#uses=1]
+	%337 = and i32 %334, -64		; <i32> [#uses=1]
+	%338 = or i32 %336, %337		; <i32> [#uses=3]
+	%339 = xor i32 %338, 32		; <i32> [#uses=1]
+	%340 = inttoptr i32 %339 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%341 = getelementptr %struct.edge_rec* %340, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%342 = load %struct.VERTEX** %341, align 4		; <%struct.VERTEX*> [#uses=2]
+	%343 = getelementptr %struct.VERTEX* %325, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%344 = load double* %343, align 4		; <double> [#uses=1]
+	%345 = getelementptr %struct.VERTEX* %325, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%346 = load double* %345, align 4		; <double> [#uses=1]
+	%347 = getelementptr %struct.VERTEX* %342, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%348 = load double* %347, align 4		; <double> [#uses=1]
+	%349 = getelementptr %struct.VERTEX* %342, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%350 = load double* %349, align 4		; <double> [#uses=1]
+	%351 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 0		; <double*> [#uses=2]
+	%352 = load double* %351, align 4		; <double> [#uses=3]
+	%353 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 1		; <double*> [#uses=2]
+	%354 = load double* %353, align 4		; <double> [#uses=3]
+	%355 = fsub double %344, %352		; <double> [#uses=1]
+	%356 = fsub double %350, %354		; <double> [#uses=1]
+	%357 = fmul double %355, %356		; <double> [#uses=1]
+	%358 = fsub double %348, %352		; <double> [#uses=1]
+	%359 = fsub double %346, %354		; <double> [#uses=1]
+	%360 = fmul double %358, %359		; <double> [#uses=1]
+	%361 = fsub double %357, %360		; <double> [#uses=1]
+	%362 = fcmp ogt double %361, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %362, label %bb14.i, label %bb17.i
+
+bb14.i:		; preds = %bb13.i
+	%363 = getelementptr %struct.VERTEX* %324, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%avail_edge.promoted = load %struct.edge_rec** @avail_edge		; <%struct.edge_rec*> [#uses=1]
+	br label %bb16.i
+
+bb15.i:		; preds = %bb16.i
+	%364 = ptrtoint %struct.edge_rec* %rcand.0.i to i32		; <i32> [#uses=3]
+	%365 = add i32 %364, 16		; <i32> [#uses=1]
+	%366 = and i32 %365, 63		; <i32> [#uses=1]
+	%367 = and i32 %364, -64		; <i32> [#uses=3]
+	%368 = or i32 %366, %367		; <i32> [#uses=1]
+	%369 = inttoptr i32 %368 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%370 = getelementptr %struct.edge_rec* %369, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%371 = load %struct.edge_rec** %370, align 4		; <%struct.edge_rec*> [#uses=1]
+	%372 = ptrtoint %struct.edge_rec* %371 to i32		; <i32> [#uses=2]
+	%373 = add i32 %372, 16		; <i32> [#uses=1]
+	%374 = and i32 %373, 63		; <i32> [#uses=1]
+	%375 = and i32 %372, -64		; <i32> [#uses=1]
+	%376 = or i32 %374, %375		; <i32> [#uses=1]
+	%377 = inttoptr i32 %376 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%378 = getelementptr %struct.edge_rec* %rcand.0.i, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%379 = load %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
+	%380 = ptrtoint %struct.edge_rec* %379 to i32		; <i32> [#uses=2]
+	%381 = add i32 %380, 16		; <i32> [#uses=1]
+	%382 = and i32 %381, 63		; <i32> [#uses=1]
+	%383 = and i32 %380, -64		; <i32> [#uses=1]
+	%384 = or i32 %382, %383		; <i32> [#uses=1]
+	%385 = inttoptr i32 %384 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%386 = getelementptr %struct.edge_rec* %377, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%387 = load %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
+	%388 = ptrtoint %struct.edge_rec* %387 to i32		; <i32> [#uses=2]
+	%389 = add i32 %388, 16		; <i32> [#uses=1]
+	%390 = and i32 %389, 63		; <i32> [#uses=1]
+	%391 = and i32 %388, -64		; <i32> [#uses=1]
+	%392 = or i32 %390, %391		; <i32> [#uses=1]
+	%393 = inttoptr i32 %392 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%394 = getelementptr %struct.edge_rec* %393, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%395 = load %struct.edge_rec** %394, align 4		; <%struct.edge_rec*> [#uses=1]
+	%396 = getelementptr %struct.edge_rec* %385, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%397 = load %struct.edge_rec** %396, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %395, %struct.edge_rec** %396, align 4
+	store %struct.edge_rec* %397, %struct.edge_rec** %394, align 4
+	%398 = load %struct.edge_rec** %378, align 4		; <%struct.edge_rec*> [#uses=1]
+	%399 = load %struct.edge_rec** %386, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %398, %struct.edge_rec** %386, align 4
+	store %struct.edge_rec* %399, %struct.edge_rec** %378, align 4
+	%400 = xor i32 %364, 32		; <i32> [#uses=2]
+	%401 = add i32 %400, 16		; <i32> [#uses=1]
+	%402 = and i32 %401, 63		; <i32> [#uses=1]
+	%403 = or i32 %402, %367		; <i32> [#uses=1]
+	%404 = inttoptr i32 %403 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%405 = getelementptr %struct.edge_rec* %404, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%406 = load %struct.edge_rec** %405, align 4		; <%struct.edge_rec*> [#uses=1]
+	%407 = ptrtoint %struct.edge_rec* %406 to i32		; <i32> [#uses=2]
+	%408 = add i32 %407, 16		; <i32> [#uses=1]
+	%409 = and i32 %408, 63		; <i32> [#uses=1]
+	%410 = and i32 %407, -64		; <i32> [#uses=1]
+	%411 = or i32 %409, %410		; <i32> [#uses=1]
+	%412 = inttoptr i32 %411 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%413 = inttoptr i32 %400 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%414 = getelementptr %struct.edge_rec* %413, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%415 = load %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
+	%416 = ptrtoint %struct.edge_rec* %415 to i32		; <i32> [#uses=2]
+	%417 = add i32 %416, 16		; <i32> [#uses=1]
+	%418 = and i32 %417, 63		; <i32> [#uses=1]
+	%419 = and i32 %416, -64		; <i32> [#uses=1]
+	%420 = or i32 %418, %419		; <i32> [#uses=1]
+	%421 = inttoptr i32 %420 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%422 = getelementptr %struct.edge_rec* %412, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%423 = load %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
+	%424 = ptrtoint %struct.edge_rec* %423 to i32		; <i32> [#uses=2]
+	%425 = add i32 %424, 16		; <i32> [#uses=1]
+	%426 = and i32 %425, 63		; <i32> [#uses=1]
+	%427 = and i32 %424, -64		; <i32> [#uses=1]
+	%428 = or i32 %426, %427		; <i32> [#uses=1]
+	%429 = inttoptr i32 %428 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%430 = getelementptr %struct.edge_rec* %429, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%431 = load %struct.edge_rec** %430, align 4		; <%struct.edge_rec*> [#uses=1]
+	%432 = getelementptr %struct.edge_rec* %421, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%433 = load %struct.edge_rec** %432, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %431, %struct.edge_rec** %432, align 4
+	store %struct.edge_rec* %433, %struct.edge_rec** %430, align 4
+	%434 = load %struct.edge_rec** %414, align 4		; <%struct.edge_rec*> [#uses=1]
+	%435 = load %struct.edge_rec** %422, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %434, %struct.edge_rec** %422, align 4
+	store %struct.edge_rec* %435, %struct.edge_rec** %414, align 4
+	%436 = inttoptr i32 %367 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%437 = getelementptr %struct.edge_rec* %436, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** %437, align 4
+	%438 = add i32 %t.1.in.i, 16		; <i32> [#uses=1]
+	%439 = and i32 %438, 63		; <i32> [#uses=1]
+	%440 = and i32 %t.1.in.i, -64		; <i32> [#uses=1]
+	%441 = or i32 %439, %440		; <i32> [#uses=1]
+	%442 = inttoptr i32 %441 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%443 = getelementptr %struct.edge_rec* %442, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%444 = load %struct.edge_rec** %443, align 4		; <%struct.edge_rec*> [#uses=1]
+	%445 = ptrtoint %struct.edge_rec* %444 to i32		; <i32> [#uses=2]
+	%446 = add i32 %445, 16		; <i32> [#uses=1]
+	%447 = and i32 %446, 63		; <i32> [#uses=1]
+	%448 = and i32 %445, -64		; <i32> [#uses=1]
+	%449 = or i32 %447, %448		; <i32> [#uses=2]
+	%.pre25.i = load double* %351, align 4		; <double> [#uses=1]
+	%.pre26.i = load double* %353, align 4		; <double> [#uses=1]
+	br label %bb16.i
+
+bb16.i:		; preds = %bb15.i, %bb14.i
+	%avail_edge.tmp.0 = phi %struct.edge_rec* [ %avail_edge.promoted, %bb14.i ], [ %436, %bb15.i ]		; <%struct.edge_rec*> [#uses=2]
+	%450 = phi double [ %.pre26.i, %bb15.i ], [ %354, %bb14.i ]		; <double> [#uses=3]
+	%451 = phi double [ %.pre25.i, %bb15.i ], [ %352, %bb14.i ]		; <double> [#uses=3]
+	%rcand.0.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]		; <%struct.edge_rec*> [#uses=3]
+	%t.1.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ]		; <i32> [#uses=3]
+	%.pn3.in.in.i = phi i32 [ %338, %bb14.i ], [ %449, %bb15.i ]		; <i32> [#uses=1]
+	%.pn.in.in.in.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]		; <%struct.edge_rec*> [#uses=1]
+	%rcand.2.pn.i = phi %struct.edge_rec* [ %rcand.2.i, %bb14.i ], [ %t.1.i, %bb15.i ]		; <%struct.edge_rec*> [#uses=1]
+	%t.1.i = inttoptr i32 %t.1.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%.pn.in.in.i = ptrtoint %struct.edge_rec* %.pn.in.in.in.i to i32		; <i32> [#uses=1]
+	%.pn3.in.i = xor i32 %.pn3.in.in.i, 32		; <i32> [#uses=1]
+	%.pn.in.i = xor i32 %.pn.in.in.i, 32		; <i32> [#uses=1]
+	%.pn3.i = inttoptr i32 %.pn3.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%.pn.i = inttoptr i32 %.pn.in.i to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%v1.1.in.i = getelementptr %struct.edge_rec* %.pn3.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v2.1.in.i = getelementptr %struct.edge_rec* %.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v3.1.in.i = getelementptr %struct.edge_rec* %rcand.2.pn.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%v1.1.i = load %struct.VERTEX** %v1.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v2.1.i = load %struct.VERTEX** %v2.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%v3.1.i = load %struct.VERTEX** %v3.1.in.i		; <%struct.VERTEX*> [#uses=3]
+	%452 = load double* %363, align 4		; <double> [#uses=3]
+	%453 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%454 = load double* %453, align 4		; <double> [#uses=1]
+	%455 = fsub double %454, %451		; <double> [#uses=2]
+	%456 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%457 = load double* %456, align 4		; <double> [#uses=1]
+	%458 = fsub double %457, %450		; <double> [#uses=2]
+	%459 = getelementptr %struct.VERTEX* %v1.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%460 = load double* %459, align 4		; <double> [#uses=1]
+	%461 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%462 = load double* %461, align 4		; <double> [#uses=1]
+	%463 = fsub double %462, %451		; <double> [#uses=2]
+	%464 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%465 = load double* %464, align 4		; <double> [#uses=1]
+	%466 = fsub double %465, %450		; <double> [#uses=2]
+	%467 = getelementptr %struct.VERTEX* %v2.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%468 = load double* %467, align 4		; <double> [#uses=1]
+	%469 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%470 = load double* %469, align 4		; <double> [#uses=1]
+	%471 = fsub double %470, %451		; <double> [#uses=2]
+	%472 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%473 = load double* %472, align 4		; <double> [#uses=1]
+	%474 = fsub double %473, %450		; <double> [#uses=2]
+	%475 = getelementptr %struct.VERTEX* %v3.1.i, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%476 = load double* %475, align 4		; <double> [#uses=1]
+	%477 = fsub double %460, %452		; <double> [#uses=1]
+	%478 = fmul double %463, %474		; <double> [#uses=1]
+	%479 = fmul double %466, %471		; <double> [#uses=1]
+	%480 = fsub double %478, %479		; <double> [#uses=1]
+	%481 = fmul double %477, %480		; <double> [#uses=1]
+	%482 = fsub double %468, %452		; <double> [#uses=1]
+	%483 = fmul double %471, %458		; <double> [#uses=1]
+	%484 = fmul double %474, %455		; <double> [#uses=1]
+	%485 = fsub double %483, %484		; <double> [#uses=1]
+	%486 = fmul double %482, %485		; <double> [#uses=1]
+	%487 = fadd double %486, %481		; <double> [#uses=1]
+	%488 = fsub double %476, %452		; <double> [#uses=1]
+	%489 = fmul double %455, %466		; <double> [#uses=1]
+	%490 = fmul double %458, %463		; <double> [#uses=1]
+	%491 = fsub double %489, %490		; <double> [#uses=1]
+	%492 = fmul double %488, %491		; <double> [#uses=1]
+	%493 = fadd double %492, %487		; <double> [#uses=1]
+	%494 = fcmp ogt double %493, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %494, label %bb15.i, label %bb17.loopexit.i
+
+bb17.loopexit.i:		; preds = %bb16.i
+	store %struct.edge_rec* %avail_edge.tmp.0, %struct.edge_rec** @avail_edge
+	%.pre27.i = load %struct.VERTEX** %170, align 4		; <%struct.VERTEX*> [#uses=1]
+	%.pre28.i = load %struct.VERTEX** %175, align 4		; <%struct.VERTEX*> [#uses=1]
+	br label %bb17.i
+
+bb17.i:		; preds = %bb17.loopexit.i, %bb13.i
+	%495 = phi %struct.VERTEX* [ %.pre28.i, %bb17.loopexit.i ], [ %324, %bb13.i ]		; <%struct.VERTEX*> [#uses=3]
+	%496 = phi %struct.VERTEX* [ %.pre27.i, %bb17.loopexit.i ], [ %325, %bb13.i ]		; <%struct.VERTEX*> [#uses=3]
+	%rcand.1.i = phi %struct.edge_rec* [ %rcand.0.i, %bb17.loopexit.i ], [ %rcand.2.i, %bb13.i ]		; <%struct.edge_rec*> [#uses=3]
+	%497 = ptrtoint %struct.edge_rec* %lcand.1.i to i32		; <i32> [#uses=1]
+	%498 = xor i32 %497, 32		; <i32> [#uses=1]
+	%499 = inttoptr i32 %498 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%500 = getelementptr %struct.edge_rec* %499, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%501 = load %struct.VERTEX** %500, align 4		; <%struct.VERTEX*> [#uses=4]
+	%502 = getelementptr %struct.VERTEX* %496, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%503 = load double* %502, align 4		; <double> [#uses=1]
+	%504 = getelementptr %struct.VERTEX* %496, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%505 = load double* %504, align 4		; <double> [#uses=1]
+	%506 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%507 = load double* %506, align 4		; <double> [#uses=2]
+	%508 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%509 = load double* %508, align 4		; <double> [#uses=2]
+	%510 = getelementptr %struct.VERTEX* %495, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%511 = load double* %510, align 4		; <double> [#uses=3]
+	%512 = getelementptr %struct.VERTEX* %495, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%513 = load double* %512, align 4		; <double> [#uses=3]
+	%514 = fsub double %503, %511		; <double> [#uses=2]
+	%515 = fsub double %509, %513		; <double> [#uses=1]
+	%516 = fmul double %514, %515		; <double> [#uses=1]
+	%517 = fsub double %507, %511		; <double> [#uses=1]
+	%518 = fsub double %505, %513		; <double> [#uses=2]
+	%519 = fmul double %517, %518		; <double> [#uses=1]
+	%520 = fsub double %516, %519		; <double> [#uses=1]
+	%521 = fcmp ogt double %520, 0.000000e+00		; <i1> [#uses=2]
+	%522 = ptrtoint %struct.edge_rec* %rcand.1.i to i32		; <i32> [#uses=3]
+	%523 = xor i32 %522, 32		; <i32> [#uses=1]
+	%524 = inttoptr i32 %523 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%525 = getelementptr %struct.edge_rec* %524, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%526 = load %struct.VERTEX** %525, align 4		; <%struct.VERTEX*> [#uses=4]
+	%527 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%528 = load double* %527, align 4		; <double> [#uses=4]
+	%529 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%530 = load double* %529, align 4		; <double> [#uses=4]
+	%531 = fsub double %530, %513		; <double> [#uses=1]
+	%532 = fmul double %514, %531		; <double> [#uses=1]
+	%533 = fsub double %528, %511		; <double> [#uses=1]
+	%534 = fmul double %533, %518		; <double> [#uses=1]
+	%535 = fsub double %532, %534		; <double> [#uses=1]
+	%536 = fcmp ogt double %535, 0.000000e+00		; <i1> [#uses=2]
+	%537 = or i1 %536, %521		; <i1> [#uses=1]
+	br i1 %537, label %bb21.i, label %do_merge.exit
+
+bb21.i:		; preds = %bb17.i
+	%538 = getelementptr %struct.edge_rec* %lcand.1.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%539 = load %struct.VERTEX** %538, align 4		; <%struct.VERTEX*> [#uses=3]
+	%540 = getelementptr %struct.edge_rec* %rcand.1.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%541 = load %struct.VERTEX** %540, align 4		; <%struct.VERTEX*> [#uses=3]
+	br i1 %521, label %bb22.i, label %bb24.i
+
+bb22.i:		; preds = %bb21.i
+	br i1 %536, label %bb23.i, label %bb25.i
+
+bb23.i:		; preds = %bb22.i
+	%542 = getelementptr %struct.VERTEX* %526, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%543 = load double* %542, align 4		; <double> [#uses=3]
+	%544 = fsub double %507, %528		; <double> [#uses=2]
+	%545 = fsub double %509, %530		; <double> [#uses=2]
+	%546 = getelementptr %struct.VERTEX* %501, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%547 = load double* %546, align 4		; <double> [#uses=1]
+	%548 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%549 = load double* %548, align 4		; <double> [#uses=1]
+	%550 = fsub double %549, %528		; <double> [#uses=2]
+	%551 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%552 = load double* %551, align 4		; <double> [#uses=1]
+	%553 = fsub double %552, %530		; <double> [#uses=2]
+	%554 = getelementptr %struct.VERTEX* %539, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%555 = load double* %554, align 4		; <double> [#uses=1]
+	%556 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%557 = load double* %556, align 4		; <double> [#uses=1]
+	%558 = fsub double %557, %528		; <double> [#uses=2]
+	%559 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%560 = load double* %559, align 4		; <double> [#uses=1]
+	%561 = fsub double %560, %530		; <double> [#uses=2]
+	%562 = getelementptr %struct.VERTEX* %541, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%563 = load double* %562, align 4		; <double> [#uses=1]
+	%564 = fsub double %547, %543		; <double> [#uses=1]
+	%565 = fmul double %550, %561		; <double> [#uses=1]
+	%566 = fmul double %553, %558		; <double> [#uses=1]
+	%567 = fsub double %565, %566		; <double> [#uses=1]
+	%568 = fmul double %564, %567		; <double> [#uses=1]
+	%569 = fsub double %555, %543		; <double> [#uses=1]
+	%570 = fmul double %558, %545		; <double> [#uses=1]
+	%571 = fmul double %561, %544		; <double> [#uses=1]
+	%572 = fsub double %570, %571		; <double> [#uses=1]
+	%573 = fmul double %569, %572		; <double> [#uses=1]
+	%574 = fadd double %573, %568		; <double> [#uses=1]
+	%575 = fsub double %563, %543		; <double> [#uses=1]
+	%576 = fmul double %544, %553		; <double> [#uses=1]
+	%577 = fmul double %545, %550		; <double> [#uses=1]
+	%578 = fsub double %576, %577		; <double> [#uses=1]
+	%579 = fmul double %575, %578		; <double> [#uses=1]
+	%580 = fadd double %579, %574		; <double> [#uses=1]
+	%581 = fcmp ogt double %580, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %581, label %bb24.i, label %bb25.i
+
+bb24.i:		; preds = %bb23.i, %bb21.i
+	%582 = add i32 %522, 48		; <i32> [#uses=1]
+	%583 = and i32 %582, 63		; <i32> [#uses=1]
+	%584 = and i32 %522, -64		; <i32> [#uses=1]
+	%585 = or i32 %583, %584		; <i32> [#uses=1]
+	%586 = inttoptr i32 %585 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%587 = getelementptr %struct.edge_rec* %586, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%588 = load %struct.edge_rec** %587, align 4		; <%struct.edge_rec*> [#uses=1]
+	%589 = ptrtoint %struct.edge_rec* %588 to i32		; <i32> [#uses=2]
+	%590 = add i32 %589, 16		; <i32> [#uses=1]
+	%591 = and i32 %590, 63		; <i32> [#uses=1]
+	%592 = and i32 %589, -64		; <i32> [#uses=1]
+	%593 = or i32 %591, %592		; <i32> [#uses=1]
+	%594 = inttoptr i32 %593 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%595 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=5]
+	%596 = getelementptr %struct.edge_rec* %595, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %595, %struct.edge_rec** %596, align 4
+	%597 = getelementptr %struct.edge_rec* %595, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %526, %struct.VERTEX** %597, align 4
+	%598 = ptrtoint %struct.edge_rec* %595 to i32		; <i32> [#uses=5]
+	%599 = add i32 %598, 16		; <i32> [#uses=1]
+	%600 = inttoptr i32 %599 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%601 = add i32 %598, 48		; <i32> [#uses=1]
+	%602 = inttoptr i32 %601 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%603 = getelementptr %struct.edge_rec* %600, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %602, %struct.edge_rec** %603, align 4
+	%604 = add i32 %598, 32		; <i32> [#uses=1]
+	%605 = inttoptr i32 %604 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%606 = getelementptr %struct.edge_rec* %605, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %605, %struct.edge_rec** %606, align 4
+	%607 = getelementptr %struct.edge_rec* %605, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %495, %struct.VERTEX** %607, align 4
+	%608 = getelementptr %struct.edge_rec* %602, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %600, %struct.edge_rec** %608, align 4
+	%609 = load %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
+	%610 = ptrtoint %struct.edge_rec* %609 to i32		; <i32> [#uses=2]
+	%611 = add i32 %610, 16		; <i32> [#uses=1]
+	%612 = and i32 %611, 63		; <i32> [#uses=1]
+	%613 = and i32 %610, -64		; <i32> [#uses=1]
+	%614 = or i32 %612, %613		; <i32> [#uses=1]
+	%615 = inttoptr i32 %614 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%616 = getelementptr %struct.edge_rec* %594, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%617 = load %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
+	%618 = ptrtoint %struct.edge_rec* %617 to i32		; <i32> [#uses=2]
+	%619 = add i32 %618, 16		; <i32> [#uses=1]
+	%620 = and i32 %619, 63		; <i32> [#uses=1]
+	%621 = and i32 %618, -64		; <i32> [#uses=1]
+	%622 = or i32 %620, %621		; <i32> [#uses=1]
+	%623 = inttoptr i32 %622 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%624 = getelementptr %struct.edge_rec* %623, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%625 = load %struct.edge_rec** %624, align 4		; <%struct.edge_rec*> [#uses=1]
+	%626 = getelementptr %struct.edge_rec* %615, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%627 = load %struct.edge_rec** %626, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %625, %struct.edge_rec** %626, align 4
+	store %struct.edge_rec* %627, %struct.edge_rec** %624, align 4
+	%628 = load %struct.edge_rec** %596, align 4		; <%struct.edge_rec*> [#uses=1]
+	%629 = load %struct.edge_rec** %616, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %628, %struct.edge_rec** %616, align 4
+	store %struct.edge_rec* %629, %struct.edge_rec** %596, align 4
+	%630 = xor i32 %598, 32		; <i32> [#uses=2]
+	%631 = inttoptr i32 %630 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%632 = getelementptr %struct.edge_rec* %631, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%633 = load %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
+	%634 = ptrtoint %struct.edge_rec* %633 to i32		; <i32> [#uses=2]
+	%635 = add i32 %634, 16		; <i32> [#uses=1]
+	%636 = and i32 %635, 63		; <i32> [#uses=1]
+	%637 = and i32 %634, -64		; <i32> [#uses=1]
+	%638 = or i32 %636, %637		; <i32> [#uses=1]
+	%639 = inttoptr i32 %638 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%640 = getelementptr %struct.edge_rec* %174, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%641 = load %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
+	%642 = ptrtoint %struct.edge_rec* %641 to i32		; <i32> [#uses=2]
+	%643 = add i32 %642, 16		; <i32> [#uses=1]
+	%644 = and i32 %643, 63		; <i32> [#uses=1]
+	%645 = and i32 %642, -64		; <i32> [#uses=1]
+	%646 = or i32 %644, %645		; <i32> [#uses=1]
+	%647 = inttoptr i32 %646 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%648 = getelementptr %struct.edge_rec* %647, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%649 = load %struct.edge_rec** %648, align 4		; <%struct.edge_rec*> [#uses=1]
+	%650 = getelementptr %struct.edge_rec* %639, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%651 = load %struct.edge_rec** %650, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %649, %struct.edge_rec** %650, align 4
+	store %struct.edge_rec* %651, %struct.edge_rec** %648, align 4
+	%652 = load %struct.edge_rec** %632, align 4		; <%struct.edge_rec*> [#uses=1]
+	%653 = load %struct.edge_rec** %640, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %652, %struct.edge_rec** %640, align 4
+	store %struct.edge_rec* %653, %struct.edge_rec** %632, align 4
+	%654 = add i32 %630, 48		; <i32> [#uses=1]
+	%655 = and i32 %654, 63		; <i32> [#uses=1]
+	%656 = and i32 %598, -64		; <i32> [#uses=1]
+	%657 = or i32 %655, %656		; <i32> [#uses=1]
+	%658 = inttoptr i32 %657 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%659 = getelementptr %struct.edge_rec* %658, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%660 = load %struct.edge_rec** %659, align 4		; <%struct.edge_rec*> [#uses=1]
+	%661 = ptrtoint %struct.edge_rec* %660 to i32		; <i32> [#uses=2]
+	%662 = add i32 %661, 16		; <i32> [#uses=1]
+	%663 = and i32 %662, 63		; <i32> [#uses=1]
+	%664 = and i32 %661, -64		; <i32> [#uses=1]
+	%665 = or i32 %663, %664		; <i32> [#uses=1]
+	%666 = inttoptr i32 %665 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	br label %bb9.i
+
+bb25.i:		; preds = %bb23.i, %bb22.i
+	%667 = add i32 %172, 16		; <i32> [#uses=1]
+	%668 = and i32 %667, 63		; <i32> [#uses=1]
+	%669 = and i32 %172, -64		; <i32> [#uses=1]
+	%670 = or i32 %668, %669		; <i32> [#uses=1]
+	%671 = inttoptr i32 %670 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%672 = getelementptr %struct.edge_rec* %671, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%673 = load %struct.edge_rec** %672, align 4		; <%struct.edge_rec*> [#uses=1]
+	%674 = ptrtoint %struct.edge_rec* %673 to i32		; <i32> [#uses=2]
+	%675 = add i32 %674, 16		; <i32> [#uses=1]
+	%676 = and i32 %675, 63		; <i32> [#uses=1]
+	%677 = and i32 %674, -64		; <i32> [#uses=1]
+	%678 = or i32 %676, %677		; <i32> [#uses=1]
+	%679 = inttoptr i32 %678 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%680 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
+	%681 = getelementptr %struct.edge_rec* %680, i32 0, i32 1		; <%struct.edge_rec**> [#uses=5]
+	store %struct.edge_rec* %680, %struct.edge_rec** %681, align 4
+	%682 = getelementptr %struct.edge_rec* %680, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %501, %struct.VERTEX** %682, align 4
+	%683 = ptrtoint %struct.edge_rec* %680 to i32		; <i32> [#uses=4]
+	%684 = add i32 %683, 16		; <i32> [#uses=1]
+	%685 = inttoptr i32 %684 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%686 = add i32 %683, 48		; <i32> [#uses=1]
+	%687 = inttoptr i32 %686 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%688 = getelementptr %struct.edge_rec* %685, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %687, %struct.edge_rec** %688, align 4
+	%689 = add i32 %683, 32		; <i32> [#uses=1]
+	%690 = inttoptr i32 %689 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%691 = getelementptr %struct.edge_rec* %690, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %690, %struct.edge_rec** %691, align 4
+	%692 = getelementptr %struct.edge_rec* %690, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %496, %struct.VERTEX** %692, align 4
+	%693 = getelementptr %struct.edge_rec* %687, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %685, %struct.edge_rec** %693, align 4
+	%694 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	%695 = ptrtoint %struct.edge_rec* %694 to i32		; <i32> [#uses=2]
+	%696 = add i32 %695, 16		; <i32> [#uses=1]
+	%697 = and i32 %696, 63		; <i32> [#uses=1]
+	%698 = and i32 %695, -64		; <i32> [#uses=1]
+	%699 = or i32 %697, %698		; <i32> [#uses=1]
+	%700 = inttoptr i32 %699 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%701 = getelementptr %struct.edge_rec* %499, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%702 = load %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
+	%703 = ptrtoint %struct.edge_rec* %702 to i32		; <i32> [#uses=2]
+	%704 = add i32 %703, 16		; <i32> [#uses=1]
+	%705 = and i32 %704, 63		; <i32> [#uses=1]
+	%706 = and i32 %703, -64		; <i32> [#uses=1]
+	%707 = or i32 %705, %706		; <i32> [#uses=1]
+	%708 = inttoptr i32 %707 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%709 = getelementptr %struct.edge_rec* %708, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%710 = load %struct.edge_rec** %709, align 4		; <%struct.edge_rec*> [#uses=1]
+	%711 = getelementptr %struct.edge_rec* %700, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%712 = load %struct.edge_rec** %711, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %710, %struct.edge_rec** %711, align 4
+	store %struct.edge_rec* %712, %struct.edge_rec** %709, align 4
+	%713 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	%714 = load %struct.edge_rec** %701, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %713, %struct.edge_rec** %701, align 4
+	store %struct.edge_rec* %714, %struct.edge_rec** %681, align 4
+	%715 = xor i32 %683, 32		; <i32> [#uses=1]
+	%716 = inttoptr i32 %715 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%717 = getelementptr %struct.edge_rec* %716, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%718 = load %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
+	%719 = ptrtoint %struct.edge_rec* %718 to i32		; <i32> [#uses=2]
+	%720 = add i32 %719, 16		; <i32> [#uses=1]
+	%721 = and i32 %720, 63		; <i32> [#uses=1]
+	%722 = and i32 %719, -64		; <i32> [#uses=1]
+	%723 = or i32 %721, %722		; <i32> [#uses=1]
+	%724 = inttoptr i32 %723 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%725 = getelementptr %struct.edge_rec* %679, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%726 = load %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
+	%727 = ptrtoint %struct.edge_rec* %726 to i32		; <i32> [#uses=2]
+	%728 = add i32 %727, 16		; <i32> [#uses=1]
+	%729 = and i32 %728, 63		; <i32> [#uses=1]
+	%730 = and i32 %727, -64		; <i32> [#uses=1]
+	%731 = or i32 %729, %730		; <i32> [#uses=1]
+	%732 = inttoptr i32 %731 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%733 = getelementptr %struct.edge_rec* %732, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%734 = load %struct.edge_rec** %733, align 4		; <%struct.edge_rec*> [#uses=1]
+	%735 = getelementptr %struct.edge_rec* %724, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%736 = load %struct.edge_rec** %735, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %734, %struct.edge_rec** %735, align 4
+	store %struct.edge_rec* %736, %struct.edge_rec** %733, align 4
+	%737 = load %struct.edge_rec** %717, align 4		; <%struct.edge_rec*> [#uses=1]
+	%738 = load %struct.edge_rec** %725, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %737, %struct.edge_rec** %725, align 4
+	store %struct.edge_rec* %738, %struct.edge_rec** %717, align 4
+	%739 = load %struct.edge_rec** %681, align 4		; <%struct.edge_rec*> [#uses=1]
+	br label %bb9.i
+
+do_merge.exit:		; preds = %bb17.i
+	%740 = getelementptr %struct.edge_rec* %ldo_addr.0.ph.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%741 = load %struct.VERTEX** %740, align 4		; <%struct.VERTEX*> [#uses=1]
+	%742 = icmp eq %struct.VERTEX* %741, %tree_addr.0.i		; <i1> [#uses=1]
+	br i1 %742, label %bb5.loopexit, label %bb2
+
+bb2:		; preds = %bb2, %do_merge.exit
+	%ldo.07 = phi %struct.edge_rec* [ %747, %bb2 ], [ %ldo_addr.0.ph.i, %do_merge.exit ]		; <%struct.edge_rec*> [#uses=1]
+	%743 = ptrtoint %struct.edge_rec* %ldo.07 to i32		; <i32> [#uses=1]
+	%744 = xor i32 %743, 32		; <i32> [#uses=1]
+	%745 = inttoptr i32 %744 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%746 = getelementptr %struct.edge_rec* %745, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%747 = load %struct.edge_rec** %746, align 4		; <%struct.edge_rec*> [#uses=3]
+	%748 = getelementptr %struct.edge_rec* %747, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%749 = load %struct.VERTEX** %748, align 4		; <%struct.VERTEX*> [#uses=1]
+	%750 = icmp eq %struct.VERTEX* %749, %tree_addr.0.i		; <i1> [#uses=1]
+	br i1 %750, label %bb5.loopexit, label %bb2
+
+bb4:		; preds = %bb5.loopexit, %bb4
+	%rdo.05 = phi %struct.edge_rec* [ %755, %bb4 ], [ %rdo_addr.0.i, %bb5.loopexit ]		; <%struct.edge_rec*> [#uses=1]
+	%751 = getelementptr %struct.edge_rec* %rdo.05, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%752 = load %struct.edge_rec** %751, align 4		; <%struct.edge_rec*> [#uses=1]
+	%753 = ptrtoint %struct.edge_rec* %752 to i32		; <i32> [#uses=1]
+	%754 = xor i32 %753, 32		; <i32> [#uses=1]
+	%755 = inttoptr i32 %754 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%756 = getelementptr %struct.edge_rec* %755, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%757 = load %struct.VERTEX** %756, align 4		; <%struct.VERTEX*> [#uses=1]
+	%758 = icmp eq %struct.VERTEX* %757, %extra		; <i1> [#uses=1]
+	br i1 %758, label %bb6, label %bb4
+
+bb5.loopexit:		; preds = %bb2, %do_merge.exit
+	%ldo.0.lcssa = phi %struct.edge_rec* [ %ldo_addr.0.ph.i, %do_merge.exit ], [ %747, %bb2 ]		; <%struct.edge_rec*> [#uses=1]
+	%759 = getelementptr %struct.edge_rec* %rdo_addr.0.i, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%760 = load %struct.VERTEX** %759, align 4		; <%struct.VERTEX*> [#uses=1]
+	%761 = icmp eq %struct.VERTEX* %760, %extra		; <i1> [#uses=1]
+	br i1 %761, label %bb6, label %bb4
+
+bb6:		; preds = %bb5.loopexit, %bb4
+	%rdo.0.lcssa = phi %struct.edge_rec* [ %rdo_addr.0.i, %bb5.loopexit ], [ %755, %bb4 ]		; <%struct.edge_rec*> [#uses=1]
+	%tmp16 = ptrtoint %struct.edge_rec* %ldo.0.lcssa to i32		; <i32> [#uses=1]
+	%tmp4 = ptrtoint %struct.edge_rec* %rdo.0.lcssa to i32		; <i32> [#uses=1]
+	br label %bb15
+
+bb7:		; preds = %bb
+	%762 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1		; <%struct.VERTEX**> [#uses=1]
+	%763 = load %struct.VERTEX** %762, align 4		; <%struct.VERTEX*> [#uses=4]
+	%764 = icmp eq %struct.VERTEX* %763, null		; <i1> [#uses=1]
+	%765 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=5]
+	%766 = getelementptr %struct.edge_rec* %765, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %765, %struct.edge_rec** %766, align 4
+	%767 = getelementptr %struct.edge_rec* %765, i32 0, i32 0		; <%struct.VERTEX**> [#uses=3]
+	br i1 %764, label %bb10, label %bb11
+
+bb8:		; preds = %entry
+	%768 = call  i32 @puts(i8* getelementptr ([21 x i8]* @_2E_str7, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	call  void @exit(i32 -1) noreturn nounwind
+	unreachable
+
+bb10:		; preds = %bb7
+	store %struct.VERTEX* %tree, %struct.VERTEX** %767, align 4
+	%769 = ptrtoint %struct.edge_rec* %765 to i32		; <i32> [#uses=5]
+	%770 = add i32 %769, 16		; <i32> [#uses=1]
+	%771 = inttoptr i32 %770 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%772 = add i32 %769, 48		; <i32> [#uses=1]
+	%773 = inttoptr i32 %772 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%774 = getelementptr %struct.edge_rec* %771, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %773, %struct.edge_rec** %774, align 4
+	%775 = add i32 %769, 32		; <i32> [#uses=1]
+	%776 = inttoptr i32 %775 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%777 = getelementptr %struct.edge_rec* %776, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %776, %struct.edge_rec** %777, align 4
+	%778 = getelementptr %struct.edge_rec* %776, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %extra, %struct.VERTEX** %778, align 4
+	%779 = getelementptr %struct.edge_rec* %773, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %771, %struct.edge_rec** %779, align 4
+	%780 = xor i32 %769, 32		; <i32> [#uses=1]
+	br label %bb15
+
+bb11:		; preds = %bb7
+	store %struct.VERTEX* %763, %struct.VERTEX** %767, align 4
+	%781 = ptrtoint %struct.edge_rec* %765 to i32		; <i32> [#uses=6]
+	%782 = add i32 %781, 16		; <i32> [#uses=1]
+	%783 = inttoptr i32 %782 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%784 = add i32 %781, 48		; <i32> [#uses=1]
+	%785 = inttoptr i32 %784 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%786 = getelementptr %struct.edge_rec* %783, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %785, %struct.edge_rec** %786, align 4
+	%787 = add i32 %781, 32		; <i32> [#uses=1]
+	%788 = inttoptr i32 %787 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%789 = getelementptr %struct.edge_rec* %788, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %788, %struct.edge_rec** %789, align 4
+	%790 = getelementptr %struct.edge_rec* %788, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %tree, %struct.VERTEX** %790, align 4
+	%791 = getelementptr %struct.edge_rec* %785, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %783, %struct.edge_rec** %791, align 4
+	%792 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
+	%793 = getelementptr %struct.edge_rec* %792, i32 0, i32 1		; <%struct.edge_rec**> [#uses=4]
+	store %struct.edge_rec* %792, %struct.edge_rec** %793, align 4
+	%794 = getelementptr %struct.edge_rec* %792, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %tree, %struct.VERTEX** %794, align 4
+	%795 = ptrtoint %struct.edge_rec* %792 to i32		; <i32> [#uses=5]
+	%796 = add i32 %795, 16		; <i32> [#uses=1]
+	%797 = inttoptr i32 %796 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%798 = add i32 %795, 48		; <i32> [#uses=2]
+	%799 = inttoptr i32 %798 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%800 = getelementptr %struct.edge_rec* %797, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %799, %struct.edge_rec** %800, align 4
+	%801 = add i32 %795, 32		; <i32> [#uses=1]
+	%802 = inttoptr i32 %801 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%803 = getelementptr %struct.edge_rec* %802, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %802, %struct.edge_rec** %803, align 4
+	%804 = getelementptr %struct.edge_rec* %802, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %extra, %struct.VERTEX** %804, align 4
+	%805 = getelementptr %struct.edge_rec* %799, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %797, %struct.edge_rec** %805, align 4
+	%806 = xor i32 %781, 32		; <i32> [#uses=1]
+	%807 = inttoptr i32 %806 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%808 = getelementptr %struct.edge_rec* %807, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%809 = load %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
+	%810 = ptrtoint %struct.edge_rec* %809 to i32		; <i32> [#uses=2]
+	%811 = add i32 %810, 16		; <i32> [#uses=1]
+	%812 = and i32 %811, 63		; <i32> [#uses=1]
+	%813 = and i32 %810, -64		; <i32> [#uses=1]
+	%814 = or i32 %812, %813		; <i32> [#uses=1]
+	%815 = inttoptr i32 %814 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%816 = load %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
+	%817 = ptrtoint %struct.edge_rec* %816 to i32		; <i32> [#uses=2]
+	%818 = add i32 %817, 16		; <i32> [#uses=1]
+	%819 = and i32 %818, 63		; <i32> [#uses=1]
+	%820 = and i32 %817, -64		; <i32> [#uses=1]
+	%821 = or i32 %819, %820		; <i32> [#uses=1]
+	%822 = inttoptr i32 %821 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%823 = getelementptr %struct.edge_rec* %822, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%824 = load %struct.edge_rec** %823, align 4		; <%struct.edge_rec*> [#uses=1]
+	%825 = getelementptr %struct.edge_rec* %815, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%826 = load %struct.edge_rec** %825, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %824, %struct.edge_rec** %825, align 4
+	store %struct.edge_rec* %826, %struct.edge_rec** %823, align 4
+	%827 = load %struct.edge_rec** %808, align 4		; <%struct.edge_rec*> [#uses=1]
+	%828 = load %struct.edge_rec** %793, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %827, %struct.edge_rec** %793, align 4
+	store %struct.edge_rec* %828, %struct.edge_rec** %808, align 4
+	%829 = xor i32 %795, 32		; <i32> [#uses=3]
+	%830 = inttoptr i32 %829 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%831 = getelementptr %struct.edge_rec* %830, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	%832 = load %struct.VERTEX** %831, align 4		; <%struct.VERTEX*> [#uses=1]
+	%833 = and i32 %798, 63		; <i32> [#uses=1]
+	%834 = and i32 %795, -64		; <i32> [#uses=1]
+	%835 = or i32 %833, %834		; <i32> [#uses=1]
+	%836 = inttoptr i32 %835 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%837 = getelementptr %struct.edge_rec* %836, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%838 = load %struct.edge_rec** %837, align 4		; <%struct.edge_rec*> [#uses=1]
+	%839 = ptrtoint %struct.edge_rec* %838 to i32		; <i32> [#uses=2]
+	%840 = add i32 %839, 16		; <i32> [#uses=1]
+	%841 = and i32 %840, 63		; <i32> [#uses=1]
+	%842 = and i32 %839, -64		; <i32> [#uses=1]
+	%843 = or i32 %841, %842		; <i32> [#uses=1]
+	%844 = inttoptr i32 %843 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%845 = load %struct.VERTEX** %767, align 4		; <%struct.VERTEX*> [#uses=1]
+	%846 = call  %struct.edge_rec* @alloc_edge() nounwind		; <%struct.edge_rec*> [#uses=4]
+	%847 = getelementptr %struct.edge_rec* %846, i32 0, i32 1		; <%struct.edge_rec**> [#uses=7]
+	store %struct.edge_rec* %846, %struct.edge_rec** %847, align 4
+	%848 = getelementptr %struct.edge_rec* %846, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %832, %struct.VERTEX** %848, align 4
+	%849 = ptrtoint %struct.edge_rec* %846 to i32		; <i32> [#uses=6]
+	%850 = add i32 %849, 16		; <i32> [#uses=2]
+	%851 = inttoptr i32 %850 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%852 = add i32 %849, 48		; <i32> [#uses=1]
+	%853 = inttoptr i32 %852 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%854 = getelementptr %struct.edge_rec* %851, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %853, %struct.edge_rec** %854, align 4
+	%855 = add i32 %849, 32		; <i32> [#uses=1]
+	%856 = inttoptr i32 %855 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=3]
+	%857 = getelementptr %struct.edge_rec* %856, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %856, %struct.edge_rec** %857, align 4
+	%858 = getelementptr %struct.edge_rec* %856, i32 0, i32 0		; <%struct.VERTEX**> [#uses=1]
+	store %struct.VERTEX* %845, %struct.VERTEX** %858, align 4
+	%859 = getelementptr %struct.edge_rec* %853, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %851, %struct.edge_rec** %859, align 4
+	%860 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%861 = ptrtoint %struct.edge_rec* %860 to i32		; <i32> [#uses=2]
+	%862 = add i32 %861, 16		; <i32> [#uses=1]
+	%863 = and i32 %862, 63		; <i32> [#uses=1]
+	%864 = and i32 %861, -64		; <i32> [#uses=1]
+	%865 = or i32 %863, %864		; <i32> [#uses=1]
+	%866 = inttoptr i32 %865 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%867 = getelementptr %struct.edge_rec* %844, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%868 = load %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
+	%869 = ptrtoint %struct.edge_rec* %868 to i32		; <i32> [#uses=2]
+	%870 = add i32 %869, 16		; <i32> [#uses=1]
+	%871 = and i32 %870, 63		; <i32> [#uses=1]
+	%872 = and i32 %869, -64		; <i32> [#uses=1]
+	%873 = or i32 %871, %872		; <i32> [#uses=1]
+	%874 = inttoptr i32 %873 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%875 = getelementptr %struct.edge_rec* %874, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%876 = load %struct.edge_rec** %875, align 4		; <%struct.edge_rec*> [#uses=1]
+	%877 = getelementptr %struct.edge_rec* %866, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%878 = load %struct.edge_rec** %877, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %876, %struct.edge_rec** %877, align 4
+	store %struct.edge_rec* %878, %struct.edge_rec** %875, align 4
+	%879 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%880 = load %struct.edge_rec** %867, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %879, %struct.edge_rec** %867, align 4
+	store %struct.edge_rec* %880, %struct.edge_rec** %847, align 4
+	%881 = xor i32 %849, 32		; <i32> [#uses=3]
+	%882 = inttoptr i32 %881 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%883 = getelementptr %struct.edge_rec* %882, i32 0, i32 1		; <%struct.edge_rec**> [#uses=6]
+	%884 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%885 = ptrtoint %struct.edge_rec* %884 to i32		; <i32> [#uses=2]
+	%886 = add i32 %885, 16		; <i32> [#uses=1]
+	%887 = and i32 %886, 63		; <i32> [#uses=1]
+	%888 = and i32 %885, -64		; <i32> [#uses=1]
+	%889 = or i32 %887, %888		; <i32> [#uses=1]
+	%890 = inttoptr i32 %889 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%891 = load %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
+	%892 = ptrtoint %struct.edge_rec* %891 to i32		; <i32> [#uses=2]
+	%893 = add i32 %892, 16		; <i32> [#uses=1]
+	%894 = and i32 %893, 63		; <i32> [#uses=1]
+	%895 = and i32 %892, -64		; <i32> [#uses=1]
+	%896 = or i32 %894, %895		; <i32> [#uses=1]
+	%897 = inttoptr i32 %896 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%898 = getelementptr %struct.edge_rec* %897, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%899 = load %struct.edge_rec** %898, align 4		; <%struct.edge_rec*> [#uses=1]
+	%900 = getelementptr %struct.edge_rec* %890, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%901 = load %struct.edge_rec** %900, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %899, %struct.edge_rec** %900, align 4
+	store %struct.edge_rec* %901, %struct.edge_rec** %898, align 4
+	%902 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%903 = load %struct.edge_rec** %766, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %902, %struct.edge_rec** %766, align 4
+	store %struct.edge_rec* %903, %struct.edge_rec** %883, align 4
+	%904 = getelementptr %struct.VERTEX* %763, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%905 = load double* %904, align 4		; <double> [#uses=2]
+	%906 = getelementptr %struct.VERTEX* %763, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%907 = load double* %906, align 4		; <double> [#uses=2]
+	%908 = getelementptr %struct.VERTEX* %extra, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%909 = load double* %908, align 4		; <double> [#uses=3]
+	%910 = getelementptr %struct.VERTEX* %extra, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%911 = load double* %910, align 4		; <double> [#uses=3]
+	%912 = getelementptr %struct.VERTEX* %tree, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%913 = load double* %912, align 4		; <double> [#uses=3]
+	%914 = getelementptr %struct.VERTEX* %tree, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%915 = load double* %914, align 4		; <double> [#uses=3]
+	%916 = fsub double %905, %913		; <double> [#uses=1]
+	%917 = fsub double %911, %915		; <double> [#uses=1]
+	%918 = fmul double %916, %917		; <double> [#uses=1]
+	%919 = fsub double %909, %913		; <double> [#uses=1]
+	%920 = fsub double %907, %915		; <double> [#uses=1]
+	%921 = fmul double %919, %920		; <double> [#uses=1]
+	%922 = fsub double %918, %921		; <double> [#uses=1]
+	%923 = fcmp ogt double %922, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %923, label %bb15, label %bb13
+
+bb13:		; preds = %bb11
+	%924 = fsub double %905, %909		; <double> [#uses=1]
+	%925 = fsub double %915, %911		; <double> [#uses=1]
+	%926 = fmul double %924, %925		; <double> [#uses=1]
+	%927 = fsub double %913, %909		; <double> [#uses=1]
+	%928 = fsub double %907, %911		; <double> [#uses=1]
+	%929 = fmul double %927, %928		; <double> [#uses=1]
+	%930 = fsub double %926, %929		; <double> [#uses=1]
+	%931 = fcmp ogt double %930, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %931, label %bb15, label %bb14
+
+bb14:		; preds = %bb13
+	%932 = and i32 %850, 63		; <i32> [#uses=1]
+	%933 = and i32 %849, -64		; <i32> [#uses=3]
+	%934 = or i32 %932, %933		; <i32> [#uses=1]
+	%935 = inttoptr i32 %934 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%936 = getelementptr %struct.edge_rec* %935, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%937 = load %struct.edge_rec** %936, align 4		; <%struct.edge_rec*> [#uses=1]
+	%938 = ptrtoint %struct.edge_rec* %937 to i32		; <i32> [#uses=2]
+	%939 = add i32 %938, 16		; <i32> [#uses=1]
+	%940 = and i32 %939, 63		; <i32> [#uses=1]
+	%941 = and i32 %938, -64		; <i32> [#uses=1]
+	%942 = or i32 %940, %941		; <i32> [#uses=1]
+	%943 = inttoptr i32 %942 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%944 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%945 = ptrtoint %struct.edge_rec* %944 to i32		; <i32> [#uses=2]
+	%946 = add i32 %945, 16		; <i32> [#uses=1]
+	%947 = and i32 %946, 63		; <i32> [#uses=1]
+	%948 = and i32 %945, -64		; <i32> [#uses=1]
+	%949 = or i32 %947, %948		; <i32> [#uses=1]
+	%950 = inttoptr i32 %949 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%951 = getelementptr %struct.edge_rec* %943, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%952 = load %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
+	%953 = ptrtoint %struct.edge_rec* %952 to i32		; <i32> [#uses=2]
+	%954 = add i32 %953, 16		; <i32> [#uses=1]
+	%955 = and i32 %954, 63		; <i32> [#uses=1]
+	%956 = and i32 %953, -64		; <i32> [#uses=1]
+	%957 = or i32 %955, %956		; <i32> [#uses=1]
+	%958 = inttoptr i32 %957 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%959 = getelementptr %struct.edge_rec* %958, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%960 = load %struct.edge_rec** %959, align 4		; <%struct.edge_rec*> [#uses=1]
+	%961 = getelementptr %struct.edge_rec* %950, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%962 = load %struct.edge_rec** %961, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %960, %struct.edge_rec** %961, align 4
+	store %struct.edge_rec* %962, %struct.edge_rec** %959, align 4
+	%963 = load %struct.edge_rec** %847, align 4		; <%struct.edge_rec*> [#uses=1]
+	%964 = load %struct.edge_rec** %951, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %963, %struct.edge_rec** %951, align 4
+	store %struct.edge_rec* %964, %struct.edge_rec** %847, align 4
+	%965 = add i32 %881, 16		; <i32> [#uses=1]
+	%966 = and i32 %965, 63		; <i32> [#uses=1]
+	%967 = or i32 %966, %933		; <i32> [#uses=1]
+	%968 = inttoptr i32 %967 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%969 = getelementptr %struct.edge_rec* %968, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	%970 = load %struct.edge_rec** %969, align 4		; <%struct.edge_rec*> [#uses=1]
+	%971 = ptrtoint %struct.edge_rec* %970 to i32		; <i32> [#uses=2]
+	%972 = add i32 %971, 16		; <i32> [#uses=1]
+	%973 = and i32 %972, 63		; <i32> [#uses=1]
+	%974 = and i32 %971, -64		; <i32> [#uses=1]
+	%975 = or i32 %973, %974		; <i32> [#uses=1]
+	%976 = inttoptr i32 %975 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%977 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%978 = ptrtoint %struct.edge_rec* %977 to i32		; <i32> [#uses=2]
+	%979 = add i32 %978, 16		; <i32> [#uses=1]
+	%980 = and i32 %979, 63		; <i32> [#uses=1]
+	%981 = and i32 %978, -64		; <i32> [#uses=1]
+	%982 = or i32 %980, %981		; <i32> [#uses=1]
+	%983 = inttoptr i32 %982 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%984 = getelementptr %struct.edge_rec* %976, i32 0, i32 1		; <%struct.edge_rec**> [#uses=3]
+	%985 = load %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
+	%986 = ptrtoint %struct.edge_rec* %985 to i32		; <i32> [#uses=2]
+	%987 = add i32 %986, 16		; <i32> [#uses=1]
+	%988 = and i32 %987, 63		; <i32> [#uses=1]
+	%989 = and i32 %986, -64		; <i32> [#uses=1]
+	%990 = or i32 %988, %989		; <i32> [#uses=1]
+	%991 = inttoptr i32 %990 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=1]
+	%992 = getelementptr %struct.edge_rec* %991, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%993 = load %struct.edge_rec** %992, align 4		; <%struct.edge_rec*> [#uses=1]
+	%994 = getelementptr %struct.edge_rec* %983, i32 0, i32 1		; <%struct.edge_rec**> [#uses=2]
+	%995 = load %struct.edge_rec** %994, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %993, %struct.edge_rec** %994, align 4
+	store %struct.edge_rec* %995, %struct.edge_rec** %992, align 4
+	%996 = load %struct.edge_rec** %883, align 4		; <%struct.edge_rec*> [#uses=1]
+	%997 = load %struct.edge_rec** %984, align 4		; <%struct.edge_rec*> [#uses=1]
+	store %struct.edge_rec* %996, %struct.edge_rec** %984, align 4
+	store %struct.edge_rec* %997, %struct.edge_rec** %883, align 4
+	%998 = inttoptr i32 %933 to %struct.edge_rec*		; <%struct.edge_rec*> [#uses=2]
+	%999 = load %struct.edge_rec** @avail_edge, align 4		; <%struct.edge_rec*> [#uses=1]
+	%1000 = getelementptr %struct.edge_rec* %998, i32 0, i32 1		; <%struct.edge_rec**> [#uses=1]
+	store %struct.edge_rec* %999, %struct.edge_rec** %1000, align 4
+	store %struct.edge_rec* %998, %struct.edge_rec** @avail_edge, align 4
+	br label %bb15
+
+bb15:		; preds = %bb14, %bb13, %bb11, %bb10, %bb6
+	%retval.1.0 = phi i32 [ %780, %bb10 ], [ %829, %bb13 ], [ %829, %bb14 ], [ %tmp4, %bb6 ], [ %849, %bb11 ]		; <i32> [#uses=1]
+	%retval.0.0 = phi i32 [ %769, %bb10 ], [ %781, %bb13 ], [ %781, %bb14 ], [ %tmp16, %bb6 ], [ %881, %bb11 ]		; <i32> [#uses=1]
+	%agg.result162 = bitcast %struct.EDGE_PAIR* %agg.result to i64*		; <i64*> [#uses=1]
+	%1001 = zext i32 %retval.0.0 to i64		; <i64> [#uses=1]
+	%1002 = zext i32 %retval.1.0 to i64		; <i64> [#uses=1]
+	%1003 = shl i64 %1002, 32		; <i64> [#uses=1]
+	%1004 = or i64 %1003, %1001		; <i64> [#uses=1]
+	store i64 %1004, i64* %agg.result162, align 4
+	ret void
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+
+declare void @exit(i32) noreturn nounwind
+
+declare %struct.edge_rec* @alloc_edge() nounwind
diff --git a/final/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll b/final/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
new file mode 100644
index 00000000000..d477ba9835b
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin10
+
+	%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+	%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+	%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+	%struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 }
+	%struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+declare i32 @strlen(i8* nocapture) nounwind readonly
+
+define i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind {
+entry:
+	br i1 undef, label %bb126, label %bb1
+
+bb1:		; preds = %entry
+	br i1 undef, label %cli_calloc.exit.thread, label %cli_calloc.exit
+
+cli_calloc.exit.thread:		; preds = %bb1
+	ret i32 -114
+
+cli_calloc.exit:		; preds = %bb1
+	store i16 %parts, i16* undef, align 4
+	br i1 undef, label %bb52, label %bb4
+
+bb4:		; preds = %cli_calloc.exit
+	br i1 undef, label %bb.i, label %bb1.i3
+
+bb.i:		; preds = %bb4
+	unreachable
+
+bb1.i3:		; preds = %bb4
+	br i1 undef, label %bb2.i4, label %cli_strdup.exit
+
+bb2.i4:		; preds = %bb1.i3
+	ret i32 -114
+
+cli_strdup.exit:		; preds = %bb1.i3
+	br i1 undef, label %cli_calloc.exit54.thread, label %cli_calloc.exit54
+
+cli_calloc.exit54.thread:		; preds = %cli_strdup.exit
+	ret i32 -114
+
+cli_calloc.exit54:		; preds = %cli_strdup.exit
+	br label %bb45
+
+cli_calloc.exit70.thread:		; preds = %bb45
+	unreachable
+
+cli_calloc.exit70:		; preds = %bb45
+	br i1 undef, label %bb.i83, label %bb1.i84
+
+bb.i83:		; preds = %cli_calloc.exit70
+	unreachable
+
+bb1.i84:		; preds = %cli_calloc.exit70
+	br i1 undef, label %bb2.i85, label %bb17
+
+bb2.i85:		; preds = %bb1.i84
+	unreachable
+
+bb17:		; preds = %bb1.i84
+	br i1 undef, label %bb22, label %bb.nph
+
+bb.nph:		; preds = %bb17
+	br label %bb18
+
+bb18:		; preds = %bb18, %bb.nph
+	br i1 undef, label %bb18, label %bb22
+
+bb22:		; preds = %bb18, %bb17
+	br i1 undef, label %bb25, label %bb43.preheader
+
+bb43.preheader:		; preds = %bb22
+	br i1 undef, label %bb28, label %bb45
+
+bb25:		; preds = %bb22
+	unreachable
+
+bb28:		; preds = %bb43.preheader
+	unreachable
+
+bb45:		; preds = %bb43.preheader, %cli_calloc.exit54
+	br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70
+
+bb52:		; preds = %cli_calloc.exit
+	%0 = load i16* undef, align 4		; <i16> [#uses=1]
+	%1 = icmp eq i16 %0, 0		; <i1> [#uses=1]
+	%iftmp.20.0 = select i1 %1, i8* %hexsig, i8* null		; <i8*> [#uses=1]
+	%2 = tail call  i32 @strlen(i8* %iftmp.20.0) nounwind readonly		; <i32> [#uses=0]
+	unreachable
+
+bb126:		; preds = %entry
+	ret i32 -117
+}
diff --git a/final/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll b/final/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
new file mode 100644
index 00000000000..67616877beb
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -march=arm
+
+	%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+	%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+	%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+	%struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 }
+	%struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+define i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind {
+entry:
+	br i1 undef, label %bb126, label %bb1
+
+bb1:		; preds = %entry
+	br i1 undef, label %cli_calloc.exit.thread, label %cli_calloc.exit
+
+cli_calloc.exit.thread:		; preds = %bb1
+	ret i32 -114
+
+cli_calloc.exit:		; preds = %bb1
+	br i1 undef, label %bb52, label %bb4
+
+bb4:		; preds = %cli_calloc.exit
+	br i1 undef, label %bb.i, label %bb1.i3
+
+bb.i:		; preds = %bb4
+	unreachable
+
+bb1.i3:		; preds = %bb4
+	br i1 undef, label %bb2.i4, label %cli_strdup.exit
+
+bb2.i4:		; preds = %bb1.i3
+	ret i32 -114
+
+cli_strdup.exit:		; preds = %bb1.i3
+	br i1 undef, label %cli_calloc.exit54.thread, label %cli_calloc.exit54
+
+cli_calloc.exit54.thread:		; preds = %cli_strdup.exit
+	ret i32 -114
+
+cli_calloc.exit54:		; preds = %cli_strdup.exit
+	br label %bb45
+
+cli_calloc.exit70.thread:		; preds = %bb45
+	unreachable
+
+cli_calloc.exit70:		; preds = %bb45
+	br i1 undef, label %bb.i83, label %bb1.i84
+
+bb.i83:		; preds = %cli_calloc.exit70
+	unreachable
+
+bb1.i84:		; preds = %cli_calloc.exit70
+	br i1 undef, label %bb2.i85, label %bb17
+
+bb2.i85:		; preds = %bb1.i84
+	unreachable
+
+bb17:		; preds = %bb1.i84
+	br i1 undef, label %bb22, label %bb.nph
+
+bb.nph:		; preds = %bb17
+	br label %bb18
+
+bb18:		; preds = %bb18, %bb.nph
+	br i1 undef, label %bb18, label %bb22
+
+bb22:		; preds = %bb18, %bb17
+	%0 = getelementptr i8* null, i32 10		; <i8*> [#uses=1]
+	%1 = bitcast i8* %0 to i16*		; <i16*> [#uses=1]
+	%2 = load i16* %1, align 2		; <i16> [#uses=1]
+	%3 = add i16 %2, 1		; <i16> [#uses=1]
+	%4 = zext i16 %3 to i32		; <i32> [#uses=1]
+	%5 = mul i32 %4, 3		; <i32> [#uses=1]
+	%6 = add i32 %5, -1		; <i32> [#uses=1]
+	%7 = icmp eq i32 %6, undef		; <i1> [#uses=1]
+	br i1 %7, label %bb25, label %bb43.preheader
+
+bb43.preheader:		; preds = %bb22
+	br i1 undef, label %bb28, label %bb45
+
+bb25:		; preds = %bb22
+	unreachable
+
+bb28:		; preds = %bb43.preheader
+	unreachable
+
+bb45:		; preds = %bb43.preheader, %cli_calloc.exit54
+	br i1 undef, label %cli_calloc.exit70.thread, label %cli_calloc.exit70
+
+bb52:		; preds = %cli_calloc.exit
+	unreachable
+
+bb126:		; preds = %entry
+	ret i32 -117
+}
diff --git a/final/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll b/final/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
new file mode 100644
index 00000000000..5003fbdedb2
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin10 -mattr=+vfp3
+
+@a = external global double		; <double*> [#uses=1]
+
+declare double @llvm.exp.f64(double) nounwind readonly
+
+define void @findratio(double* nocapture %res1, double* nocapture %res2) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	br i1 undef, label %bb28, label %bb
+
+bb28:		; preds = %bb
+	%0 = load double* @a, align 4		; <double> [#uses=2]
+	%1 = fadd double %0, undef		; <double> [#uses=2]
+	br i1 undef, label %bb59, label %bb60
+
+bb59:		; preds = %bb28
+	%2 = fsub double -0.000000e+00, undef		; <double> [#uses=2]
+	br label %bb61
+
+bb60:		; preds = %bb28
+	%3 = tail call double @llvm.exp.f64(double undef) nounwind		; <double> [#uses=1]
+	%4 = fsub double -0.000000e+00, %3		; <double> [#uses=2]
+	%5 = fsub double -0.000000e+00, undef		; <double> [#uses=1]
+	%6 = fsub double -0.000000e+00, undef		; <double> [#uses=1]
+	br label %bb61
+
+bb61:		; preds = %bb60, %bb59
+	%.pn201 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn111 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn452 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn85 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn238 = phi double [ 0.000000e+00, %bb59 ], [ 0.000000e+00, %bb60 ]		; <double> [#uses=1]
+	%.pn39 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn230 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn228 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn224 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn222 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn218 = phi double [ %2, %bb59 ], [ %4, %bb60 ]		; <double> [#uses=1]
+	%.pn214 = phi double [ 0.000000e+00, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn212 = phi double [ %2, %bb59 ], [ %4, %bb60 ]		; <double> [#uses=1]
+	%.pn213 = phi double [ undef, %bb59 ], [ undef, %bb60 ]		; <double> [#uses=1]
+	%.pn210 = phi double [ undef, %bb59 ], [ %5, %bb60 ]		; <double> [#uses=1]
+	%.pn202 = phi double [ undef, %bb59 ], [ %6, %bb60 ]		; <double> [#uses=0]
+	%.pn390 = fdiv double %.pn452, undef		; <double> [#uses=0]
+	%.pn145 = fdiv double %.pn238, %1		; <double> [#uses=0]
+	%.pn138 = fdiv double %.pn230, undef		; <double> [#uses=1]
+	%.pn139 = fdiv double %.pn228, undef		; <double> [#uses=1]
+	%.pn134 = fdiv double %.pn224, %0		; <double> [#uses=1]
+	%.pn135 = fdiv double %.pn222, %1		; <double> [#uses=1]
+	%.pn133 = fdiv double %.pn218, undef		; <double> [#uses=0]
+	%.pn128 = fdiv double %.pn214, undef		; <double> [#uses=1]
+	%.pn129 = fdiv double %.pn212, %.pn213		; <double> [#uses=1]
+	%.pn126 = fdiv double %.pn210, undef		; <double> [#uses=0]
+	%.pn54.in = fmul double undef, %.pn201		; <double> [#uses=1]
+	%.pn42.in = fmul double undef, undef		; <double> [#uses=1]
+	%.pn76 = fsub double %.pn138, %.pn139		; <double> [#uses=1]
+	%.pn74 = fsub double %.pn134, %.pn135		; <double> [#uses=1]
+	%.pn70 = fsub double %.pn128, %.pn129		; <double> [#uses=1]
+	%.pn54 = fdiv double %.pn54.in, 6.000000e+00		; <double> [#uses=1]
+	%.pn64 = fmul double undef, 0x3FE5555555555555		; <double> [#uses=1]
+	%.pn65 = fmul double undef, undef		; <double> [#uses=1]
+	%.pn50 = fmul double undef, %.pn111		; <double> [#uses=0]
+	%.pn42 = fdiv double %.pn42.in, 6.000000e+00		; <double> [#uses=1]
+	%.pn40 = fmul double undef, %.pn85		; <double> [#uses=0]
+	%.pn56 = fadd double %.pn76, undef		; <double> [#uses=1]
+	%.pn57 = fmul double %.pn74, undef		; <double> [#uses=1]
+	%.pn36 = fadd double undef, undef		; <double> [#uses=1]
+	%.pn37 = fmul double %.pn70, undef		; <double> [#uses=1]
+	%.pn33 = fmul double undef, 0x3FC5555555555555		; <double> [#uses=1]
+	%.pn29 = fsub double %.pn64, %.pn65		; <double> [#uses=1]
+	%.pn21 = fadd double undef, undef		; <double> [#uses=1]
+	%.pn27 = fmul double undef, 0x3FC5555555555555		; <double> [#uses=1]
+	%.pn11 = fadd double %.pn56, %.pn57		; <double> [#uses=1]
+	%.pn32 = fmul double %.pn54, undef		; <double> [#uses=1]
+	%.pn26 = fmul double %.pn42, undef		; <double> [#uses=1]
+	%.pn15 = fmul double 0.000000e+00, %.pn39		; <double> [#uses=1]
+	%.pn7 = fadd double %.pn36, %.pn37		; <double> [#uses=1]
+	%.pn30 = fsub double %.pn32, %.pn33		; <double> [#uses=1]
+	%.pn28 = fadd double %.pn30, 0.000000e+00		; <double> [#uses=1]
+	%.pn24 = fsub double %.pn28, %.pn29		; <double> [#uses=1]
+	%.pn22 = fsub double %.pn26, %.pn27		; <double> [#uses=1]
+	%.pn20 = fadd double %.pn24, undef		; <double> [#uses=1]
+	%.pn18 = fadd double %.pn22, 0.000000e+00		; <double> [#uses=1]
+	%.pn16 = fsub double %.pn20, %.pn21		; <double> [#uses=1]
+	%.pn14 = fsub double %.pn18, undef		; <double> [#uses=1]
+	%.pn12 = fadd double %.pn16, undef		; <double> [#uses=1]
+	%.pn10 = fadd double %.pn14, %.pn15		; <double> [#uses=1]
+	%.pn8 = fsub double %.pn12, undef		; <double> [#uses=1]
+	%.pn6 = fsub double %.pn10, %.pn11		; <double> [#uses=1]
+	%.pn4 = fadd double %.pn8, undef		; <double> [#uses=1]
+	%.pn2 = fadd double %.pn6, %.pn7		; <double> [#uses=1]
+	%N1.0 = fsub double %.pn4, undef		; <double> [#uses=1]
+	%D1.0 = fsub double %.pn2, undef		; <double> [#uses=2]
+	br i1 undef, label %bb62, label %bb64
+
+bb62:		; preds = %bb61
+	%7 = fadd double %D1.0, undef		; <double> [#uses=1]
+	br label %bb64
+
+bb64:		; preds = %bb62, %bb61
+	%.pn = phi double [ undef, %bb62 ], [ %N1.0, %bb61 ]		; <double> [#uses=1]
+	%.pn1 = phi double [ %7, %bb62 ], [ %D1.0, %bb61 ]		; <double> [#uses=1]
+	%x.1 = fdiv double %.pn, %.pn1		; <double> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll b/final/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
new file mode 100644
index 00000000000..a656c495f79
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm -mattr=+neon
+; PR4657
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+define <4 x i32> @scale(<4 x i32> %v, i32 %f) nounwind {
+entry:
+	%v_addr = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%f_addr = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%0 = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <4 x i32> %v, <4 x i32>* %v_addr
+	store i32 %f, i32* %f_addr
+	%1 = load <4 x i32>* %v_addr, align 16		; <<4 x i32>> [#uses=1]
+	%2 = load i32* %f_addr, align 4		; <i32> [#uses=1]
+	%3 = insertelement <4 x i32> undef, i32 %2, i32 0		; <<4 x i32>> [#uses=1]
+	%4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>> [#uses=1]
+	%5 = mul <4 x i32> %1, %4		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %5, <4 x i32>* %0, align 16
+	%6 = load <4 x i32>* %0, align 16		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %6, <4 x i32>* %retval, align 16
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load <4 x i32>* %retval		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %retval1
+}
diff --git a/final/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll b/final/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll
new file mode 100644
index 00000000000..30975225c3e
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; PR4528
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv6-elf"
+
+define i32 @file_read_actor(i32* nocapture %desc, i32* %page, i32 %offset, i32 %size) nounwind optsize {
+entry:
+	br i1 undef, label %fault_in_pages_writeable.exit, label %bb5.i
+
+bb5.i:		; preds = %entry
+	%asmtmp.i = tail call i32 asm sideeffect "1:\09strbt\09$1,[$2]\0A2:\0A\09.section .fixup,\22ax\22\0A\09.align\092\0A3:\09mov\09$0, $3\0A\09b\092b\0A\09.previous\0A\09.section __ex_table,\22a\22\0A\09.align\093\0A\09.long\091b, 3b\0A\09.previous", "=r,r,r,i,0,~{cc}"(i8 0, i32 undef, i32 -14, i32 0) nounwind		; <i32> [#uses=1]
+	%0 = icmp eq i32 %asmtmp.i, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb6.i, label %fault_in_pages_writeable.exit
+
+bb6.i:		; preds = %bb5.i
+	br i1 undef, label %fault_in_pages_writeable.exit, label %bb7.i
+
+bb7.i:		; preds = %bb6.i
+	unreachable
+
+fault_in_pages_writeable.exit:		; preds = %bb6.i, %bb5.i, %entry
+	br i1 undef, label %bb2, label %bb3
+
+bb2:		; preds = %fault_in_pages_writeable.exit
+	unreachable
+
+bb3:		; preds = %fault_in_pages_writeable.exit
+	%1 = tail call  i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @__copy_to_user(i8*, i8*, i32)
diff --git a/final/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll b/final/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll
new file mode 100644
index 00000000000..d666f12b86a
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; PR4528
+
+define i32 @file_read_actor(i32 %desc, i32 %page, i32 %offset, i32 %size) nounwind optsize {
+entry:
+	br i1 undef, label %fault_in_pages_writeable.exit, label %bb5.i
+
+bb5.i:		; preds = %entry
+	%asmtmp.i = tail call i32 asm sideeffect "1:\09strbt\09$1,[$2]\0A2:\0A\09.section .fixup,\22ax\22\0A\09.align\092\0A3:\09mov\09$0, $3\0A\09b\092b\0A\09.previous\0A\09.section __ex_table,\22a\22\0A\09.align\093\0A\09.long\091b, 3b\0A\09.previous", "=r,r,r,i,0,~{cc}"(i8 0, i32 undef, i32 -14, i32 0) nounwind		; <i32> [#uses=1]
+	br label %fault_in_pages_writeable.exit
+
+fault_in_pages_writeable.exit:		; preds = %bb5.i, %entry
+	%0 = phi i32 [ 0, %entry ], [ %asmtmp.i, %bb5.i ]		; <i32> [#uses=1]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb2, label %bb3
+
+bb2:		; preds = %fault_in_pages_writeable.exit
+	unreachable
+
+bb3:		; preds = %fault_in_pages_writeable.exit
+	%2 = tail call  i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @__copy_to_user(i8*, i8*, i32)
diff --git a/final/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll b/final/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
new file mode 100644
index 00000000000..4b4101556f1
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; PR4528
+
+; Inline asm is allowed to contain operands "=&r", "0".
+
+%struct.device_dma_parameters = type { i32, i32 }
+%struct.iovec = type { i8*, i32 }
+
+define i32 @generic_segment_checks(%struct.iovec* nocapture %iov, i32* nocapture %nr_segs, i32* nocapture %count, i32 %access_flags) nounwind optsize {
+entry:
+  br label %bb8
+
+bb:                                               ; preds = %bb8
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb
+  %asmtmp = tail call %struct.device_dma_parameters asm "adds $1, $2, $3; sbcccs $1, $1, $0; movcc $0, #0", "=&r,=&r,r,Ir,0,~{cc}"(i8* undef, i32 undef, i32 0) nounwind; <%struct.device_dma_parameters> [#uses=1]
+  %asmresult = extractvalue %struct.device_dma_parameters %asmtmp, 0; <i32> [#uses=1]
+  %0 = icmp eq i32 %asmresult, 0                  ; <i1> [#uses=1]
+  br i1 %0, label %bb7, label %bb4
+
+bb4:                                              ; preds = %bb2
+  br i1 undef, label %bb10, label %bb9
+
+bb7:                                              ; preds = %bb2
+  %1 = add i32 %2, 1                              ; <i32> [#uses=1]
+  br label %bb8
+
+bb8:                                              ; preds = %bb7, %entry
+  %2 = phi i32 [ 0, %entry ], [ %1, %bb7 ]        ; <i32> [#uses=3]
+  %scevgep22 = getelementptr %struct.iovec* %iov, i32 %2, i32 0; <i8**> [#uses=0]
+  %3 = load i32* %nr_segs, align 4                ; <i32> [#uses=1]
+  %4 = icmp ult i32 %2, %3                        ; <i1> [#uses=1]
+  br i1 %4, label %bb, label %bb9
+
+bb9:                                              ; preds = %bb8, %bb4
+  store i32 undef, i32* %count, align 4
+  ret i32 0
+
+bb10:                                             ; preds = %bb4, %bb
+  ret i32 0
+}
diff --git a/final/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll b/final/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll
new file mode 100644
index 00000000000..299364773f6
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; PR4716
+
+define void @_start() nounwind naked {
+entry:
+  tail call  void @exit(i32 undef) noreturn nounwind
+  unreachable
+}
+
+declare void @exit(i32) noreturn nounwind
diff --git a/final/test/CodeGen/ARM/2009-08-21-PostRAKill.ll b/final/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
new file mode 100644
index 00000000000..c598fe6e2e1
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-08-21-PostRAKill.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 -post-RA-scheduler -mcpu=cortex-a8
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.tree = type { i32, double, double, %struct.tree*, %struct.tree*, %struct.tree*, %struct.tree* }
+@g = common global %struct.tree* null
+
+define %struct.tree* @tsp(%struct.tree* %t, i32 %nproc) nounwind {
+entry:
+  %t.idx51.val.i = load double* null              ; <double> [#uses=1]
+  br i1 undef, label %bb4.i, label %bb.i
+
+bb.i:                                             ; preds = %entry
+  unreachable
+
+bb4.i:                                            ; preds = %entry
+  %0 = load %struct.tree** @g, align 4         ; <%struct.tree*> [#uses=2]
+  %.idx45.i = getelementptr %struct.tree* %0, i32 0, i32 1 ; <double*> [#uses=1]
+  %.idx45.val.i = load double* %.idx45.i          ; <double> [#uses=1]
+  %.idx46.i = getelementptr %struct.tree* %0, i32 0, i32 2 ; <double*> [#uses=1]
+  %.idx46.val.i = load double* %.idx46.i          ; <double> [#uses=1]
+  %1 = fsub double 0.000000e+00, %.idx45.val.i    ; <double> [#uses=2]
+  %2 = fmul double %1, %1                         ; <double> [#uses=1]
+  %3 = fsub double %t.idx51.val.i, %.idx46.val.i  ; <double> [#uses=2]
+  %4 = fmul double %3, %3                         ; <double> [#uses=1]
+  %5 = fadd double %2, %4                         ; <double> [#uses=1]
+  %6 = tail call double @llvm.sqrt.f64(double %5) nounwind ; <double> [#uses=1]
+  br i1 undef, label %bb7.i4, label %bb6.i
+
+bb6.i:                                            ; preds = %bb4.i
+  br label %bb7.i4
+
+bb7.i4:                                           ; preds = %bb6.i, %bb4.i
+  %tton1.0.i = phi double [ %6, %bb6.i ], [ undef, %bb4.i ] ; <double> [#uses=0]
+  unreachable
+}
+
+declare double @llvm.sqrt.f64(double) nounwind readonly
diff --git a/final/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll b/final/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
new file mode 100644
index 00000000000..cc92c26aeec
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
+%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
+%struct.icstruct = type { [3 x i32], i16 }
+%struct.node = type { i16, double, [3 x double], i32, i32 }
+
+declare double @floor(double) nounwind readnone
+
+define void @intcoord(%struct.icstruct* noalias nocapture sret %agg.result, i1 %a, double %b) {
+entry:
+  br i1 %a, label %bb3, label %bb1
+
+bb1:                                              ; preds = %entry
+  unreachable
+
+bb3:                                              ; preds = %entry
+  br i1 %a, label %bb7, label %bb5
+
+bb5:                                              ; preds = %bb3
+  unreachable
+
+bb7:                                              ; preds = %bb3
+  br i1 %a, label %bb11, label %bb9
+
+bb9:                                              ; preds = %bb7
+  %0 = tail call  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
+  br label %bb11
+
+bb11:                                             ; preds = %bb9, %bb7
+  %1 = getelementptr %struct.icstruct* %agg.result, i32 0, i32 0, i32 0 ; <i32*> [#uses=1]
+  store i32 0, i32* %1
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll b/final/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
new file mode 100644
index 00000000000..90a4a42531c
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+%struct.Patient = type { i32, i32, i32, %struct.Village* }
+%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define %struct.Village* @alloc_tree(i32 %level, i32 %label, %struct.Village* %back, i1 %p) nounwind {
+entry:
+  br i1 %p, label %bb8, label %bb1
+
+bb1:                                              ; preds = %entry
+  %0 = malloc %struct.Village                     ; <%struct.Village*> [#uses=3]
+  %exp2 = call double @ldexp(double 1.000000e+00, i32 %level) nounwind ; <double> [#uses=1]
+  %.c = fptosi double %exp2 to i32                ; <i32> [#uses=1]
+  store i32 %.c, i32* null
+  %1 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 0 ; <%struct.List**> [#uses=1]
+  store %struct.List* null, %struct.List** %1
+  %2 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 2 ; <%struct.List**> [#uses=1]
+  store %struct.List* null, %struct.List** %2
+  ret %struct.Village* %0
+
+bb8:                                              ; preds = %entry
+  ret %struct.Village* null
+}
+
+declare double @ldexp(double, i32)
diff --git a/final/test/CodeGen/ARM/2009-08-23-linkerprivate.ll b/final/test/CodeGen/ARM/2009-08-23-linkerprivate.ll
new file mode 100644
index 00000000000..392c70a9fd3
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-08-23-linkerprivate.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private_weak hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
diff --git a/final/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll b/final/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll
new file mode 100644
index 00000000000..5407013f335
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mattr=+neon | not grep fldmfdd
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+define void @_ZN6squish10ClusterFit9Compress3EPv(%quuz* %this, i8* %block) {
+entry:
+  %0 = lshr <4 x i32> zeroinitializer, <i32 31, i32 31, i32 31, i32 31> ; <<4 x i32>> [#uses=1]
+  %1 = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> <i32 2, i32 3> ; <<2 x i32>> [#uses=1]
+  %2 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> undef, <2 x i32> %1) nounwind ; <<2 x i32>> [#uses=1]
+  %3 = extractelement <2 x i32> %2, i32 0         ; <i32> [#uses=1]
+  %not..i = icmp eq i32 %3, undef                 ; <i1> [#uses=1]
+  br i1 %not..i, label %return, label %bb221
+
+bb221:                                            ; preds = %bb221, %entry
+  br label %bb221
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll b/final/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll
new file mode 100644
index 00000000000..cac85696179
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mattr=+neon | not grep fldmfdd
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+define void @aaaa(%quuz* %this, i8* %block) {
+entry:
+  br i1 undef, label %bb.nph269, label %bb201
+
+bb.nph269:                                        ; preds = %entry
+  br label %bb12
+
+bb12:                                             ; preds = %bb194, %bb.nph269
+  %0 = fmul <4 x float> undef, undef              ; <<4 x float>> [#uses=1]
+  %1 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %2 = shufflevector <2 x float> %1, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %3 = fadd <4 x float> undef, %2                 ; <<4 x float>> [#uses=1]
+  br i1 undef, label %bb194, label %bb186
+
+bb186:                                            ; preds = %bb12
+  br label %bb194
+
+bb194:                                            ; preds = %bb186, %bb12
+  %besterror.0.0 = phi <4 x float> [ %3, %bb186 ], [ undef, %bb12 ] ; <<4 x float>> [#uses=0]
+  %indvar.next294 = add i32 undef, 1              ; <i32> [#uses=0]
+  br label %bb12
+
+bb201:                                            ; preds = %entry
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll b/final/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll
new file mode 100644
index 00000000000..5bd30ea1f1f
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mattr=+neon
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define void @foo() nounwind {
+entry:
+  %0 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> undef, <2 x float> undef) nounwind ; <<2 x float>> [#uses=1]
+  %tmp28 = extractelement <2 x float> %0, i32 0   ; <float> [#uses=1]
+  %1 = fcmp une float %tmp28, 4.900000e+01        ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %bb7
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb7:                                              ; preds = %entry
+  br i1 undef, label %bb8, label %bb9
+
+bb8:                                              ; preds = %bb7
+  unreachable
+
+bb9:                                              ; preds = %bb7
+  ret void
+}
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll b/final/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll
new file mode 100644
index 00000000000..4655962978d
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mattr=+neon
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define void @aaa() nounwind {
+entry:
+  %0 = fmul <4 x float> undef, <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02, float 0x3EB0C6F7A0000000> ; <<4 x float>> [#uses=1]
+  %tmp31 = extractelement <4 x float> %0, i32 0   ; <float> [#uses=1]
+  %1 = fpext float %tmp31 to double               ; <double> [#uses=1]
+  %2 = fsub double 1.000000e+00, %1               ; <double> [#uses=1]
+  %3 = fdiv double %2, 1.000000e+00               ; <double> [#uses=1]
+  %4 = tail call double @fabs(double %3) nounwind readnone ; <double> [#uses=1]
+  %5 = fcmp ogt double %4, 1.000000e-05           ; <i1> [#uses=1]
+  br i1 %5, label %bb, label %bb7
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb7:                                              ; preds = %entry
+  unreachable
+}
+
+declare double @fabs(double)
diff --git a/final/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/final/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
new file mode 100644
index 00000000000..397eba410b1
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin9 -march=arm | FileCheck %s
+
+%struct.A = type { i32* }
+
+define void @"\01-[MyFunction Name:]"() {
+entry:
+  %save_filt.1 = alloca i32                       ; <i32*> [#uses=2]
+  %save_eptr.0 = alloca i8*                       ; <i8**> [#uses=2]
+  %a = alloca %struct.A                           ; <%struct.A*> [#uses=3]
+  %eh_exception = alloca i8*                      ; <i8**> [#uses=5]
+  %eh_selector = alloca i32                       ; <i32*> [#uses=3]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call  void @_ZN1AC1Ev(%struct.A* %a)
+  invoke  void @_Z3barv()
+          to label %invcont unwind label %lpad
+
+invcont:                                          ; preds = %entry
+  call  void @_ZN1AD1Ev(%struct.A* %a) nounwind
+  br label %return
+
+bb:                                               ; preds = %ppad
+  %eh_select = load i32* %eh_selector             ; <i32> [#uses=1]
+  store i32 %eh_select, i32* %save_filt.1, align 4
+  %eh_value = load i8** %eh_exception             ; <i8*> [#uses=1]
+  store i8* %eh_value, i8** %save_eptr.0, align 4
+  call  void @_ZN1AD1Ev(%struct.A* %a) nounwind
+  %0 = load i8** %save_eptr.0, align 4            ; <i8*> [#uses=1]
+  store i8* %0, i8** %eh_exception, align 4
+  %1 = load i32* %save_filt.1, align 4            ; <i32> [#uses=1]
+  store i32 %1, i32* %eh_selector, align 4
+  br label %Unwind
+
+return:                                           ; preds = %invcont
+  ret void
+
+lpad:                                             ; preds = %entry
+  %eh_ptr = call i8* @llvm.eh.exception()         ; <i8*> [#uses=1]
+  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr1 = load i8** %eh_exception              ; <i8*> [#uses=1]
+  %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0) ; <i32> [#uses=1]
+  store i32 %eh_select2, i32* %eh_selector
+  br label %ppad
+
+ppad:                                             ; preds = %lpad
+  br label %bb
+
+Unwind:                                           ; preds = %bb
+  %eh_ptr3 = load i8** %eh_exception              ; <i8*> [#uses=1]
+  call  void @_Unwind_SjLj_Resume(i8* %eh_ptr3)
+  unreachable
+}
+
+define linkonce_odr void @_ZN1AC1Ev(%struct.A* %this) {
+entry:
+  %this_addr = alloca %struct.A*                  ; <%struct.A**> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store %struct.A* %this, %struct.A** %this_addr
+  %0 = call  i8* @_Znwm(i32 4)         ; <i8*> [#uses=1]
+  %1 = bitcast i8* %0 to i32*                     ; <i32*> [#uses=1]
+  %2 = load %struct.A** %this_addr, align 4       ; <%struct.A*> [#uses=1]
+  %3 = getelementptr inbounds %struct.A* %2, i32 0, i32 0 ; <i32**> [#uses=1]
+  store i32* %1, i32** %3, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare i8* @_Znwm(i32)
+
+define linkonce_odr void @_ZN1AD1Ev(%struct.A* %this) nounwind {
+entry:
+  %this_addr = alloca %struct.A*                  ; <%struct.A**> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store %struct.A* %this, %struct.A** %this_addr
+  %0 = load %struct.A** %this_addr, align 4       ; <%struct.A*> [#uses=1]
+  %1 = getelementptr inbounds %struct.A* %0, i32 0, i32 0 ; <i32**> [#uses=1]
+  %2 = load i32** %1, align 4                     ; <i32*> [#uses=1]
+  %3 = bitcast i32* %2 to i8*                     ; <i8*> [#uses=1]
+  call  void @_ZdlPv(i8* %3) nounwind
+  br label %bb
+
+bb:                                               ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %bb
+  ret void
+}
+;CHECK: L_LSDA_0:
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare void @_Z3barv()
+
+declare i8* @llvm.eh.exception() nounwind
+
+declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare void @_Unwind_SjLj_Resume(i8*)
diff --git a/final/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll b/final/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
new file mode 100644
index 00000000000..e1e60e6317a
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-08-31-TwoRegShuffle.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; pr4843
+define <4 x i16> @v2regbug(<4 x i16>* %B) nounwind {
+;CHECK: v2regbug:
+;CHECK: vzip.16
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32><i32 0, i32 0, i32 1, i32 1>
+	ret <4 x i16> %tmp2
+}
diff --git a/final/test/CodeGen/ARM/2009-09-09-AllOnes.ll b/final/test/CodeGen/ARM/2009-09-09-AllOnes.ll
new file mode 100644
index 00000000000..8522a779a42
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-09-09-AllOnes.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mattr=+neon < %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define void @foo() {
+entry:
+  %0 = insertelement <4 x i32> undef, i32 -1, i32 3
+  store <4 x i32> %0, <4 x i32>* undef, align 16
+  unreachable
+}
diff --git a/final/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll b/final/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
new file mode 100644
index 00000000000..3909c6a526e
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-09-09-fpcmp-ole.ll
@@ -0,0 +1,18 @@
+; RUN: llc -O1 -march=arm -mattr=+vfp2 < %s | FileCheck %s
+; pr4939
+
+define void @test(double* %x, double* %y) nounwind {
+  %1 = load double* %x, align 4
+  %2 = load double* %y, align 4
+  %3 = fsub double -0.000000e+00, %1
+  %4 = fcmp ugt double %2, %3
+  br i1 %4, label %bb1, label %bb2
+
+bb1:
+;CHECK: vstrhi.64
+  store double %1, double* %y, align 4
+  br label %bb2
+
+bb2:
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-09-10-postdec.ll b/final/test/CodeGen/ARM/2009-09-10-postdec.ll
new file mode 100644
index 00000000000..10653b51c14
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-09-10-postdec.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=arm < %s | FileCheck %s
+; Radar 7213850
+
+define i32 @test(i8* %d, i32 %x, i32 %y) nounwind {
+  %1 = ptrtoint i8* %d to i32
+;CHECK: sub
+  %2 = sub i32 %x, %1
+  %3 = add nsw i32 %2, %y
+  store i8 0, i8* %d, align 1
+  ret i32 %3
+}
diff --git a/final/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll b/final/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
new file mode 100644
index 00000000000..13adb24e2f6
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-09-13-InvalidSubreg.ll
@@ -0,0 +1,61 @@
+; RUN: llc -mattr=+neon < %s
+; PR4965
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+%struct.fr = type { [6 x %struct.pl] }
+%struct.obb = type { %"struct.m4", %"struct.p3" }
+%struct.pl = type { %"struct.p3" }
+%"struct.m4" = type { %"struct.p3", %"struct.p3", %"struct.p3", %"struct.p3" }
+%"struct.p3" = type { <4 x float> }
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define arm_aapcs_vfpcc i8 @foo(%struct.fr* nocapture %this, %struct.obb* %box) nounwind {
+entry:
+  %val.i.i = load <4 x float>* undef              ; <<4 x float>> [#uses=1]
+  %val2.i.i = load <4 x float>* null              ; <<4 x float>> [#uses=1]
+  %elt3.i.i = getelementptr inbounds %struct.obb* %box, i32 0, i32 0, i32 2, i32 0 ; <<4 x float>*> [#uses=1]
+  %val4.i.i = load <4 x float>* %elt3.i.i         ; <<4 x float>> [#uses=1]
+  %0 = shufflevector <2 x float> undef, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %1 = fadd <4 x float> undef, zeroinitializer    ; <<4 x float>> [#uses=1]
+  br label %bb33
+
+bb:                                               ; preds = %bb33
+  %2 = fmul <4 x float> %val.i.i, undef           ; <<4 x float>> [#uses=1]
+  %3 = fmul <4 x float> %val2.i.i, undef          ; <<4 x float>> [#uses=1]
+  %4 = fadd <4 x float> %3, %2                    ; <<4 x float>> [#uses=1]
+  %5 = fmul <4 x float> %val4.i.i, undef          ; <<4 x float>> [#uses=1]
+  %6 = fadd <4 x float> %5, %4                    ; <<4 x float>> [#uses=1]
+  %7 = bitcast <4 x float> %6 to <4 x i32>        ; <<4 x i32>> [#uses=1]
+  %8 = and <4 x i32> %7, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> ; <<4 x i32>> [#uses=1]
+  %9 = or <4 x i32> %8, undef                     ; <<4 x i32>> [#uses=1]
+  %10 = bitcast <4 x i32> %9 to <4 x float>       ; <<4 x float>> [#uses=1]
+  %11 = shufflevector <4 x float> %10, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %12 = shufflevector <2 x float> %11, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %13 = fmul <4 x float> undef, %12               ; <<4 x float>> [#uses=1]
+  %14 = fmul <4 x float> %0, undef                ; <<4 x float>> [#uses=1]
+  %15 = fadd <4 x float> %14, %13                 ; <<4 x float>> [#uses=1]
+  %16 = fadd <4 x float> undef, %15               ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %1, %16                  ; <<4 x float>> [#uses=1]
+  %18 = fmul <4 x float> zeroinitializer, %17     ; <<4 x float>> [#uses=1]
+  %19 = insertelement <4 x float> %18, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=2]
+  %20 = shufflevector <4 x float> %19, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %21 = shufflevector <4 x float> %19, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %22 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %20, <2 x float> %21) nounwind ; <<2 x float>> [#uses=2]
+  %23 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %22, <2 x float> %22) nounwind ; <<2 x float>> [#uses=2]
+  %24 = shufflevector <2 x float> %23, <2 x float> %23, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %25 = fadd <4 x float> %24, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %tmp46 = extractelement <4 x float> %25, i32 0  ; <float> [#uses=1]
+  %26 = fcmp olt float %tmp46, 0.000000e+00       ; <i1> [#uses=1]
+  br i1 %26, label %bb41, label %bb33
+
+bb33:                                             ; preds = %bb, %entry
+  br i1 undef, label %bb34, label %bb
+
+bb34:                                             ; preds = %bb33
+  ret i8 undef
+
+bb41:                                             ; preds = %bb
+  ret i8 1
+}
diff --git a/final/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll b/final/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
new file mode 100644
index 00000000000..758b59a4638
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-09-13-InvalidSuperReg.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=arm -mattr=+neon -mcpu=cortex-a9
+
+define arm_aapcs_vfpcc <4 x float> @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+  %1 = ptrtoint i8* %pBuffer to i32
+
+  %lsr.iv2641 = inttoptr i32 %1 to float*
+  %tmp29 = add i32 %1, 4
+  %tmp2930 = inttoptr i32 %tmp29 to float*
+  %tmp31 = add i32 %1, 8
+  %tmp3132 = inttoptr i32 %tmp31 to float*
+  %tmp33 = add i32 %1, 12
+  %tmp3334 = inttoptr i32 %tmp33 to float*
+  %tmp35 = add i32 %1, 16
+  %tmp3536 = inttoptr i32 %tmp35 to float*
+  %tmp37 = add i32 %1, 20
+  %tmp3738 = inttoptr i32 %tmp37 to float*
+  %tmp39 = add i32 %1, 24
+  %tmp3940 = inttoptr i32 %tmp39 to float*
+  %2 = load float* %lsr.iv2641, align 4
+  %3 = load float* %tmp2930, align 4
+  %4 = load float* %tmp3132, align 4
+  %5 = load float* %tmp3334, align 4
+  %6 = load float* %tmp3536, align 4
+  %7 = load float* %tmp3738, align 4
+  %8 = load float* %tmp3940, align 4
+  %9 = insertelement <4 x float> undef, float %6, i32 0
+  %10 = shufflevector <4 x float> %9, <4 x float> undef, <4 x i32> zeroinitializer
+  %11 = insertelement <4 x float> %10, float %7, i32 1
+  %12 = insertelement <4 x float> %11, float %8, i32 2
+  %13 = insertelement <4 x float> undef, float %2, i32 0
+  %14 = shufflevector <4 x float> %13, <4 x float> undef, <4 x i32> zeroinitializer
+  %15 = insertelement <4 x float> %14, float %3, i32 1
+  %16 = insertelement <4 x float> %15, float %4, i32 2
+  %17 = insertelement <4 x float> %16, float %5, i32 3
+  %18 = fsub <4 x float> zeroinitializer, %12
+  %19 = shufflevector <4 x float> %18, <4 x float> undef, <4 x i32> zeroinitializer
+  %20 = shufflevector <4 x float> %17, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+  %21 = shufflevector <2 x float> %20, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+
+  ret <4 x float> %21
+}
diff --git a/final/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll b/final/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll
new file mode 100644
index 00000000000..980f8ce6fa1
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-09-20-LiveIntervalsBug.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=arm-eabi -mattr=+neon -mcpu=cortex-a9
+
+; PR4986
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb.preheader
+
+bb.preheader:                                     ; preds = %entry
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.preheader
+  %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %2 = insertelement <4 x float> %1, float undef, i32 2 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float undef, i32 3 ; <<4 x float>> [#uses=1]
+  %4 = fmul <4 x float> undef, %3                 ; <<4 x float>> [#uses=1]
+  %5 = extractelement <4 x float> %4, i32 3       ; <float> [#uses=1]
+  store float %5, float* undef, align 4
+  br i1 undef, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc <4 x float> @bar(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+  %1 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %2 = insertelement <4 x float> %1, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float undef, i32 2 ; <<4 x float>> [#uses=1]
+  %4 = insertelement <4 x float> %3, float undef, i32 3 ; <<4 x float>> [#uses=1]
+  %5 = shufflevector <4 x float> %4, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  ret <4 x float> %6
+}
diff --git a/final/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll b/final/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll
new file mode 100644
index 00000000000..aace4751915
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-09-21-LiveVariablesBug.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%bar = type { <4 x float> }
+%foo = type { %bar, %bar, %bar, %bar }
+
+declare arm_aapcs_vfpcc <4 x float> @bbb(%bar*) nounwind
+
+define arm_aapcs_vfpcc void @aaa(%foo* noalias sret %agg.result, %foo* %tfrm) nounwind {
+entry:
+  %0 = call arm_aapcs_vfpcc  <4 x float> @bbb(%bar* undef) nounwind ; <<4 x float>> [#uses=0]
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll b/final/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll
new file mode 100644
index 00000000000..30931a2ffb6
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-09-22-LiveVariablesBug.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%bar = type { %foo, %foo }
+%foo = type { <4 x float> }
+
+declare arm_aapcs_vfpcc float @aaa(%foo* nocapture) nounwind readonly
+
+declare arm_aapcs_vfpcc %bar* @bbb(%bar*, <4 x float>, <4 x float>) nounwind
+
+define arm_aapcs_vfpcc void @ccc(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %0 = call arm_aapcs_vfpcc  %bar* @bbb(%bar* undef, <4 x float> undef, <4 x float> undef) nounwind ; <%bar*> [#uses=0]
+  %1 = call arm_aapcs_vfpcc  float @aaa(%foo* undef) nounwind ; <float> [#uses=0]
+  unreachable
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll b/final/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
new file mode 100644
index 00000000000..2ff479b2178
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-09-23-LiveVariablesBug.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+neon
+
+; PR5024
+
+%struct.1 = type { %struct.4, %struct.4 }
+%struct.4 = type { <4 x float> }
+
+define arm_aapcs_vfpcc %struct.1* @hhh3(%struct.1* %this, <4 x float> %lenation.0, <4 x float> %legalation.0) nounwind {
+entry:
+  %0 = call arm_aapcs_vfpcc  %struct.4* @sss1(%struct.4* undef, float 0.000000e+00) nounwind ; <%struct.4*> [#uses=0]
+  %1 = call arm_aapcs_vfpcc  %struct.4* @qqq1(%struct.4* null, float 5.000000e-01) nounwind ; <%struct.4*> [#uses=0]
+  %val92 = load <4 x float>* null                 ; <<4 x float>> [#uses=1]
+  %2 = call arm_aapcs_vfpcc  %struct.4* @zzz2(%struct.4* undef, <4 x float> %val92) nounwind ; <%struct.4*> [#uses=0]
+  ret %struct.1* %this
+}
+
+declare arm_aapcs_vfpcc %struct.4* @qqq1(%struct.4*, float) nounwind
+
+declare arm_aapcs_vfpcc %struct.4* @sss1(%struct.4*, float) nounwind
+
+declare arm_aapcs_vfpcc %struct.4* @zzz2(%struct.4*, <4 x float>) nounwind
diff --git a/final/test/CodeGen/ARM/2009-09-24-spill-align.ll b/final/test/CodeGen/ARM/2009-09-24-spill-align.ll
new file mode 100644
index 00000000000..8bfd02697b7
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-09-24-spill-align.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; pr4926
+
+define void @test_vget_lanep16() nounwind {
+entry:
+  %arg0_poly16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
+  %out_poly16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+; CHECK: vldr.64
+  %0 = load <4 x i16>* %arg0_poly16x4_t, align 8  ; <<4 x i16>> [#uses=1]
+  %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
+  store i16 %1, i16* %out_poly16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll b/final/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll
new file mode 100644
index 00000000000..ea2693ac2e4
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-09-27-CoalescerBug.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8
+; PR5055
+
+module asm ".globl\09__aeabi_f2lz"
+module asm ".set\09__aeabi_f2lz, __fixsfdi"
+module asm ""
+
+define arm_aapcs_vfpcc i64 @__fixsfdi(float %a) nounwind {
+entry:
+  %0 = fcmp olt float %a, 0.000000e+00            ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %1 = fsub float -0.000000e+00, %a               ; <float> [#uses=1]
+  %2 = tail call arm_aapcs_vfpcc  i64 @__fixunssfdi(float %1) nounwind ; <i64> [#uses=1]
+  %3 = sub i64 0, %2                              ; <i64> [#uses=1]
+  ret i64 %3
+
+bb1:                                              ; preds = %entry
+  %4 = tail call arm_aapcs_vfpcc  i64 @__fixunssfdi(float %a) nounwind ; <i64> [#uses=1]
+  ret i64 %4
+}
+
+declare arm_aapcs_vfpcc i64 @__fixunssfdi(float)
diff --git a/final/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll b/final/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
new file mode 100644
index 00000000000..0fe3b39a622
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -mcpu=arm10tdmi | FileCheck %s
+; PR4687
+
+%0 = type { double, double }
+
+define void @foo(%0* noalias nocapture sret %agg.result, double %x.0, double %y.0) nounwind {
+; CHECK: foo:
+; CHECK: bl __aeabi_dadd
+; CHECK-NOT: strd
+; CHECK: mov
+  %x76 = fmul double %y.0, 0.000000e+00           ; <double> [#uses=1]
+  %x77 = fadd double %y.0, 0.000000e+00           ; <double> [#uses=1]
+  %tmpr = fadd double %x.0, %x76                  ; <double> [#uses=1]
+  %agg.result.0 = getelementptr %0* %agg.result, i32 0, i32 0 ; <double*> [#uses=1]
+  store double %tmpr, double* %agg.result.0, align 8
+  %agg.result.1 = getelementptr %0* %agg.result, i32 0, i32 1 ; <double*> [#uses=1]
+  store double %x77, double* %agg.result.1, align 8
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll b/final/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
new file mode 100644
index 00000000000..465368b0ba8
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-10-02-NEONSubregsBug.ll
@@ -0,0 +1,63 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 -enable-unsafe-fp-math < %s
+; PR5367
+
+define arm_aapcs_vfpcc void @_Z27Benchmark_SceDualQuaternionPvm(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %0 = load float* undef, align 4                 ; <float> [#uses=1]
+  %1 = load float* null, align 4                  ; <float> [#uses=1]
+  %2 = insertelement <4 x float> undef, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float %1, i32 2 ; <<4 x float>> [#uses=2]
+  %4 = insertelement <4 x float> undef, float %0, i32 2 ; <<4 x float>> [#uses=1]
+  %5 = insertelement <4 x float> %4, float 0.000000e+00, i32 3 ; <<4 x float>> [#uses=4]
+  %6 = fsub <4 x float> zeroinitializer, %3       ; <<4 x float>> [#uses=1]
+  %7 = shufflevector <4 x float> %6, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=2]
+  %8 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %9 = shufflevector <2 x float> %8, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=2]
+  %10 = fmul <4 x float> %7, %9                   ; <<4 x float>> [#uses=1]
+  %11 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %12 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=2]
+  %13 = shufflevector <2 x float> %12, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %14 = fmul <4 x float> %11, %13                 ; <<4 x float>> [#uses=1]
+  %15 = fadd <4 x float> %10, %14                 ; <<4 x float>> [#uses=1]
+  %16 = shufflevector <2 x float> %12, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %15, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %18 = shufflevector <4 x float> %17, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+  %19 = fmul <4 x float> %7, %16                  ; <<4 x float>> [#uses=1]
+  %20 = fadd <4 x float> %19, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %21 = shufflevector <4 x float> %3, <4 x float> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+  %22 = shufflevector <4 x float> %21, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %23 = fmul <4 x float> %22, %9                  ; <<4 x float>> [#uses=1]
+  %24 = fadd <4 x float> %20, %23                 ; <<4 x float>> [#uses=1]
+  %25 = shufflevector <4 x float> %18, <4 x float> %24, <4 x i32> <i32 0, i32 1, i32 6, i32 undef> ; <<4 x float>> [#uses=1]
+  %26 = shufflevector <4 x float> %25, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 7> ; <<4 x float>> [#uses=1]
+  %27 = fmul <4 x float> %26, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %28 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %5 ; <<4 x float>> [#uses=1]
+  %29 = tail call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
+  %30 = fmul <4 x float> zeroinitializer, %29     ; <<4 x float>> [#uses=1]
+  %31 = fmul <4 x float> %30, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
+  %32 = shufflevector <4 x float> %27, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %33 = shufflevector <4 x float> %28, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %34 = shufflevector <2 x float> %33, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %35 = fmul <4 x float> %32, %34                 ; <<4 x float>> [#uses=1]
+  %36 = fadd <4 x float> %35, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %37 = shufflevector <4 x float> %5, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+  %38 = shufflevector <4 x float> %37, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %39 = fmul <4 x float> zeroinitializer, %38     ; <<4 x float>> [#uses=1]
+  %40 = fadd <4 x float> %36, %39                 ; <<4 x float>> [#uses=1]
+  %41 = fadd <4 x float> %40, zeroinitializer     ; <<4 x float>> [#uses=1]
+  %42 = shufflevector <4 x float> undef, <4 x float> %41, <4 x i32> <i32 0, i32 1, i32 6, i32 3> ; <<4 x float>> [#uses=1]
+  %43 = fmul <4 x float> %42, %31                 ; <<4 x float>> [#uses=1]
+  store float undef, float* undef, align 4
+  store float 0.000000e+00, float* null, align 4
+  %44 = extractelement <4 x float> %43, i32 1     ; <float> [#uses=1]
+  store float %44, float* undef, align 4
+  br i1 undef, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/2009-10-16-Scope.ll b/final/test/CodeGen/ARM/2009-10-16-Scope.ll
new file mode 100644
index 00000000000..ce440e986de
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-10-16-Scope.ll
@@ -0,0 +1,32 @@
+; RUN: llc %s -O0 -o /dev/null -mtriple=arm-apple-darwin
+; PR 5197
+; There is not any llvm instruction assocated with !5. The code generator
+; should be able to handle this.
+
+define void @bar() nounwind ssp {
+entry:
+  %count_ = alloca i32, align 4                   ; <i32*> [#uses=2]
+  br label %do.body, !dbg !0
+
+do.body:                                          ; preds = %entry
+  call void @llvm.dbg.declare(metadata !{i32* %count_}, metadata !4)
+  %conv = ptrtoint i32* %count_ to i32, !dbg !0   ; <i32> [#uses=1]
+  %call = call i32 @foo(i32 %conv) ssp, !dbg !0   ; <i32> [#uses=0]
+  br label %do.end, !dbg !0
+
+do.end:                                           ; preds = %do.body
+  ret void, !dbg !7
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i32 @foo(i32) ssp
+
+!0 = metadata !{i32 5, i32 2, metadata !1, null}
+!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", metadata !3, i32 4, null, i1 false, i1 true}; [DW_TAG_subprogram ]
+!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"genmodes.i", metadata !"/Users/yash/Downloads", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!4 = metadata !{i32 459008, metadata !5, metadata !"count_", metadata !3, i32 5, metadata !6}; [ DW_TAG_auto_variable ]
+!5 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
+!6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
+!7 = metadata !{i32 6, i32 1, metadata !2, null}
diff --git a/final/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll b/final/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll
new file mode 100644
index 00000000000..0f021d28aa1
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-10-21-InvalidFNeg.ll
@@ -0,0 +1,48 @@
+; RUN: llc -mcpu=cortex-a8 -mattr=+neon < %s | grep vneg
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+%aaa = type { %fff, %fff }
+%bbb = type { [6 x %ddd] }
+%ccc = type { %eee, %fff }
+%ddd = type { %fff }
+%eee = type { %fff, %fff, %fff, %fff }
+%fff = type { %struct.vec_float4 }
+%struct.vec_float4 = type { <4 x float> }
+
+define linkonce_odr arm_aapcs_vfpcc void @foo(%eee* noalias sret %agg.result, i64 %tfrm.0.0, i64 %tfrm.0.1, i64 %tfrm.0.2, i64 %tfrm.0.3, i64 %tfrm.0.4, i64 %tfrm.0.5, i64 %tfrm.0.6, i64 %tfrm.0.7) nounwind noinline {
+entry:
+  %tmp104 = zext i64 %tfrm.0.2 to i512            ; <i512> [#uses=1]
+  %tmp105 = shl i512 %tmp104, 128                 ; <i512> [#uses=1]
+  %tmp118 = zext i64 %tfrm.0.3 to i512            ; <i512> [#uses=1]
+  %tmp119 = shl i512 %tmp118, 192                 ; <i512> [#uses=1]
+  %ins121 = or i512 %tmp119, %tmp105              ; <i512> [#uses=1]
+  %tmp99 = zext i64 %tfrm.0.4 to i512             ; <i512> [#uses=1]
+  %tmp100 = shl i512 %tmp99, 256                  ; <i512> [#uses=1]
+  %tmp123 = zext i64 %tfrm.0.5 to i512            ; <i512> [#uses=1]
+  %tmp124 = shl i512 %tmp123, 320                 ; <i512> [#uses=1]
+  %tmp96 = zext i64 %tfrm.0.6 to i512             ; <i512> [#uses=1]
+  %tmp97 = shl i512 %tmp96, 384                   ; <i512> [#uses=1]
+  %tmp128 = zext i64 %tfrm.0.7 to i512            ; <i512> [#uses=1]
+  %tmp129 = shl i512 %tmp128, 448                 ; <i512> [#uses=1]
+  %mask.masked = or i512 %tmp124, %tmp100         ; <i512> [#uses=1]
+  %ins131 = or i512 %tmp129, %tmp97               ; <i512> [#uses=1]
+  %tmp109132 = zext i64 %tfrm.0.0 to i128         ; <i128> [#uses=1]
+  %tmp113134 = zext i64 %tfrm.0.1 to i128         ; <i128> [#uses=1]
+  %tmp114133 = shl i128 %tmp113134, 64            ; <i128> [#uses=1]
+  %tmp94 = or i128 %tmp114133, %tmp109132         ; <i128> [#uses=1]
+  %tmp95 = bitcast i128 %tmp94 to <4 x float>     ; <<4 x float>> [#uses=0]
+  %tmp82 = lshr i512 %ins121, 128                 ; <i512> [#uses=1]
+  %tmp83 = trunc i512 %tmp82 to i128              ; <i128> [#uses=1]
+  %tmp84 = bitcast i128 %tmp83 to <4 x float>     ; <<4 x float>> [#uses=0]
+  %tmp86 = lshr i512 %mask.masked, 256            ; <i512> [#uses=1]
+  %tmp87 = trunc i512 %tmp86 to i128              ; <i128> [#uses=1]
+  %tmp88 = bitcast i128 %tmp87 to <4 x float>     ; <<4 x float>> [#uses=0]
+  %tmp90 = lshr i512 %ins131, 384                 ; <i512> [#uses=1]
+  %tmp91 = trunc i512 %tmp90 to i128              ; <i128> [#uses=1]
+  %tmp92 = bitcast i128 %tmp91 to <4 x float>     ; <<4 x float>> [#uses=1]
+  %tmp = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp92 ; <<4 x float>> [#uses=1]
+  %tmp28 = getelementptr inbounds %eee* %agg.result, i32 0, i32 3, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
+  store <4 x float> %tmp, <4 x float>* %tmp28, align 16
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-10-27-double-align.ll b/final/test/CodeGen/ARM/2009-10-27-double-align.ll
new file mode 100644
index 00000000000..c31b116c55b
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-10-27-double-align.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s  -mtriple=arm-linux-gnueabi  | FileCheck %s
+
+@.str = private constant [1 x i8] zeroinitializer, align 1
+
+define void @g() {
+entry:
+;CHECK: [sp, #8]
+;CHECK: [sp, #12]
+;CHECK: [sp]
+        tail call  void (i8*, ...)* @f(i8* getelementptr ([1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00)
+        ret void
+}
+
+declare void @f(i8*, ...)
diff --git a/final/test/CodeGen/ARM/2009-10-30.ll b/final/test/CodeGen/ARM/2009-10-30.ll
new file mode 100644
index 00000000000..87d1a8b9e9a
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-10-30.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s  -mtriple=arm-linux-gnueabi  | FileCheck %s
+; This test checks that the address of the varg arguments is correctly
+; computed when there are 5 or more regular arguments.
+
+define void @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) {
+entry:
+;CHECK: sub	sp, sp, #4
+;CHECK: add	r{{[0-9]+}}, sp, #8
+;CHECK: str	r{{[0-9]+}}, [sp], #4
+;CHECK: bx	lr
+	%ap = alloca i8*, align 4
+	%ap1 = bitcast i8** %ap to i8*
+	call void @llvm.va_start(i8* %ap1)
+	ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
diff --git a/final/test/CodeGen/ARM/2009-11-01-NeonMoves.ll b/final/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
new file mode 100644
index 00000000000..34f7519a98a
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+%foo = type { <4 x float> }
+
+define arm_aapcs_vfpcc void @bar(%foo* noalias sret %agg.result, <4 x float> %quat.0) nounwind {
+entry:
+  %quat_addr = alloca %foo, align 16              ; <%foo*> [#uses=2]
+  %0 = getelementptr inbounds %foo* %quat_addr, i32 0, i32 0 ; <<4 x float>*> [#uses=1]
+  store <4 x float> %quat.0, <4 x float>* %0
+  %1 = call arm_aapcs_vfpcc  <4 x float> @quux(%foo* %quat_addr) nounwind ; <<4 x float>> [#uses=3]
+  %2 = fmul <4 x float> %1, %1                    ; <<4 x float>> [#uses=2]
+  %3 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> <i32 0, i32 1> ; <<2 x float>> [#uses=1]
+  %4 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+;CHECK-NOT: vmov
+;CHECK: vpadd
+  %5 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %3, <2 x float> %4) nounwind ; <<2 x float>> [#uses=2]
+  %6 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %5, <2 x float> %5) nounwind ; <<2 x float>> [#uses=2]
+  %7 = shufflevector <2 x float> %6, <2 x float> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=2]
+;CHECK: vmov
+  %8 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %7) nounwind ; <<4 x float>> [#uses=3]
+  %9 = fmul <4 x float> %8, %8                    ; <<4 x float>> [#uses=1]
+  %10 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %9, <4 x float> %7) nounwind ; <<4 x float>> [#uses=1]
+  %11 = fmul <4 x float> %10, %8                  ; <<4 x float>> [#uses=1]
+  %12 = fmul <4 x float> %11, %1                  ; <<4 x float>> [#uses=1]
+  %13 = call arm_aapcs_vfpcc  %foo* @baz(%foo* %agg.result, <4 x float> %12) nounwind ; <%foo*> [#uses=0]
+  ret void
+}
+
+declare arm_aapcs_vfpcc %foo* @baz(%foo*, <4 x float>) nounwind
+
+declare arm_aapcs_vfpcc <4 x float> @quux(%foo* nocapture) nounwind readonly
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/2009-11-02-NegativeLane.ll b/final/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
new file mode 100644
index 00000000000..ca5ae8b62e8
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-11-02-NegativeLane.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+define arm_aapcs_vfpcc void @foo(i8* nocapture %pBuffer, i32 %numItems) nounwind {
+entry:
+  br i1 undef, label %return, label %bb
+
+bb:                                               ; preds = %bb, %entry
+; CHECK: vld1.16 {d16[], d17[]}
+  %0 = load i16* undef, align 2
+  %1 = insertelement <8 x i16> undef, i16 %0, i32 2
+  %2 = insertelement <8 x i16> %1, i16 undef, i32 3
+  %3 = mul <8 x i16> %2, %2
+  %4 = extractelement <8 x i16> %3, i32 2
+  store i16 %4, i16* undef, align 2
+  br i1 undef, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/final/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
new file mode 100644
index 00000000000..7aae3acd76e
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll
@@ -0,0 +1,66 @@
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+; PR5423
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+define arm_aapcs_vfpcc void @foo() {
+entry:
+  %0 = load float* null, align 4                  ; <float> [#uses=2]
+  %1 = fmul float %0, undef                       ; <float> [#uses=2]
+  %2 = fmul float 0.000000e+00, %1                ; <float> [#uses=2]
+  %3 = fmul float %0, %1                          ; <float> [#uses=1]
+  %4 = fadd float 0.000000e+00, %3                ; <float> [#uses=1]
+  %5 = fsub float 1.000000e+00, %4                ; <float> [#uses=1]
+; CHECK: foo:
+; CHECK: vmov.f32 s{{[0-9]+}}, #1.000000e+00
+  %6 = fsub float 1.000000e+00, undef             ; <float> [#uses=2]
+  %7 = fsub float %2, undef                       ; <float> [#uses=1]
+  %8 = fsub float 0.000000e+00, undef             ; <float> [#uses=3]
+  %9 = fadd float %2, undef                       ; <float> [#uses=3]
+  %10 = load float* undef, align 8                ; <float> [#uses=3]
+  %11 = fmul float %8, %10                        ; <float> [#uses=1]
+  %12 = fadd float undef, %11                     ; <float> [#uses=2]
+  %13 = fmul float undef, undef                   ; <float> [#uses=1]
+  %14 = fmul float %6, 0.000000e+00               ; <float> [#uses=1]
+  %15 = fadd float %13, %14                       ; <float> [#uses=1]
+  %16 = fmul float %9, %10                        ; <float> [#uses=1]
+  %17 = fadd float %15, %16                       ; <float> [#uses=2]
+  %18 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %19 = fadd float %18, 0.000000e+00              ; <float> [#uses=1]
+  %20 = fmul float undef, %10                     ; <float> [#uses=1]
+  %21 = fadd float %19, %20                       ; <float> [#uses=1]
+  %22 = load float* undef, align 8                ; <float> [#uses=1]
+  %23 = fmul float %5, %22                        ; <float> [#uses=1]
+  %24 = fadd float %23, undef                     ; <float> [#uses=1]
+  %25 = load float* undef, align 8                ; <float> [#uses=2]
+  %26 = fmul float %8, %25                        ; <float> [#uses=1]
+  %27 = fadd float %24, %26                       ; <float> [#uses=1]
+  %28 = fmul float %9, %25                        ; <float> [#uses=1]
+  %29 = fadd float undef, %28                     ; <float> [#uses=1]
+  %30 = fmul float %8, undef                      ; <float> [#uses=1]
+  %31 = fadd float undef, %30                     ; <float> [#uses=1]
+  %32 = fmul float %6, undef                      ; <float> [#uses=1]
+  %33 = fadd float undef, %32                     ; <float> [#uses=1]
+  %34 = fmul float %9, undef                      ; <float> [#uses=1]
+  %35 = fadd float %33, %34                       ; <float> [#uses=1]
+  %36 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %37 = fmul float %7, undef                      ; <float> [#uses=1]
+  %38 = fadd float %36, %37                       ; <float> [#uses=1]
+  %39 = fmul float undef, undef                   ; <float> [#uses=1]
+  %40 = fadd float %38, %39                       ; <float> [#uses=1]
+  store float %12, float* undef, align 8
+  store float %17, float* undef, align 4
+  store float %21, float* undef, align 8
+  store float %27, float* undef, align 8
+  store float %29, float* undef, align 4
+  store float %31, float* undef, align 8
+  store float %40, float* undef, align 8
+  store float %12, float* null, align 8
+  %41 = fmul float %17, undef                     ; <float> [#uses=1]
+  %42 = fadd float %41, undef                     ; <float> [#uses=1]
+  %43 = fmul float %35, undef                     ; <float> [#uses=1]
+  %44 = fadd float %42, %43                       ; <float> [#uses=1]
+  store float %44, float* null, align 4
+  unreachable
+}
diff --git a/final/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll b/final/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll
new file mode 100644
index 00000000000..efc4be11581
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-11-13-CoalescerCrash.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5410
+
+%0 = type { float, float, float, float }
+%pln = type { %vec, float }
+%vec = type { [4 x float] }
+
+define arm_aapcs_vfpcc float @aaa(%vec* nocapture %ustart, %vec* nocapture %udir, %vec* nocapture %vstart, %vec* nocapture %vdir, %vec* %upoint, %vec* %vpoint) {
+entry:
+  br i1 undef, label %bb81, label %bb48
+
+bb48:                                             ; preds = %entry
+  %0 = call arm_aapcs_vfpcc  %0 @bbb(%pln* undef, %vec* %vstart, %vec* undef) nounwind ; <%0> [#uses=0]
+  ret float 0.000000e+00
+
+bb81:                                             ; preds = %entry
+  ret float 0.000000e+00
+}
+
+declare arm_aapcs_vfpcc %0 @bbb(%pln* nocapture, %vec* nocapture, %vec* nocapture) nounwind
diff --git a/final/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll b/final/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
new file mode 100644
index 00000000000..6cce02dd48c
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-11-13-ScavengerAssert.ll
@@ -0,0 +1,42 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5411
+
+%bar = type { %quad, float, float, [3 x %quux*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quuz, %quad, float, [64 x %quux], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { %quad, %quad }
+%quuz = type { [4 x %quux*], [4 x float], i32 }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quux* %a, %quux* %b, %quux* %c, i8 zeroext %forced) {
+entry:
+  br i1 undef, label %bb85, label %bb
+
+bb:                                               ; preds = %entry
+  %0 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 2 ; <float*> [#uses=2]
+  %1 = load float* undef, align 4                 ; <float> [#uses=1]
+  %2 = fsub float 0.000000e+00, undef             ; <float> [#uses=2]
+  %3 = fmul float 0.000000e+00, undef             ; <float> [#uses=1]
+  %4 = load float* %0, align 4                    ; <float> [#uses=3]
+  %5 = fmul float %4, %2                          ; <float> [#uses=1]
+  %6 = fsub float %3, %5                          ; <float> [#uses=1]
+  %7 = fmul float %4, undef                       ; <float> [#uses=1]
+  %8 = fsub float %7, undef                       ; <float> [#uses=1]
+  %9 = fmul float undef, %2                       ; <float> [#uses=1]
+  %10 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %11 = fsub float %9, %10                        ; <float> [#uses=1]
+  %12 = fmul float undef, %6                      ; <float> [#uses=1]
+  %13 = fmul float 0.000000e+00, %8               ; <float> [#uses=1]
+  %14 = fadd float %12, %13                       ; <float> [#uses=1]
+  %15 = fmul float %1, %11                        ; <float> [#uses=1]
+  %16 = fadd float %14, %15                       ; <float> [#uses=1]
+  %17 = select i1 undef, float undef, float %16   ; <float> [#uses=1]
+  %18 = fdiv float %17, 0.000000e+00              ; <float> [#uses=1]
+  store float %18, float* undef, align 4
+  %19 = fmul float %4, undef                      ; <float> [#uses=1]
+  store float %19, float* %0, align 4
+  ret %bar* null
+
+bb85:                                             ; preds = %entry
+  ret %bar* null
+}
diff --git a/final/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll b/final/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
new file mode 100644
index 00000000000..3ff66312481
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-11-13-ScavengerAssert2.ll
@@ -0,0 +1,123 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5412
+
+%bar = type { %quad, float, float, [3 x %quuz*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quux, %quad, float, [64 x %quuz], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { [4 x %quuz*], [4 x float], i32 }
+%quuz = type { %quad, %quad }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
+entry:
+  br i1 undef, label %bb85, label %bb
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb3.i, label %bb2.i
+
+bb2.i:                                            ; preds = %bb
+  br label %bb3.i
+
+bb3.i:                                            ; preds = %bb2.i, %bb
+  %0 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=0]
+  %1 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
+  %2 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
+  %3 = load float* %2, align 4                    ; <float> [#uses=1]
+  %4 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+  %5 = fsub float %3, undef                       ; <float> [#uses=2]
+  %6 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=2]
+  %7 = load float* %6, align 4                    ; <float> [#uses=1]
+  %8 = fsub float %7, undef                       ; <float> [#uses=1]
+  %9 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=2]
+  %10 = load float* %9, align 4                   ; <float> [#uses=1]
+  %11 = fsub float %10, undef                     ; <float> [#uses=2]
+  %12 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=2]
+  %13 = load float* %12, align 4                  ; <float> [#uses=1]
+  %14 = fsub float %13, undef                     ; <float> [#uses=1]
+  %15 = load float* undef, align 4                ; <float> [#uses=1]
+  %16 = fsub float %15, undef                     ; <float> [#uses=1]
+  %17 = fmul float %5, %16                        ; <float> [#uses=1]
+  %18 = fsub float %17, 0.000000e+00              ; <float> [#uses=5]
+  %19 = fmul float %8, %11                        ; <float> [#uses=1]
+  %20 = fsub float %19, undef                     ; <float> [#uses=3]
+  %21 = fmul float %1, %14                        ; <float> [#uses=1]
+  %22 = fmul float %5, %11                        ; <float> [#uses=1]
+  %23 = fsub float %21, %22                       ; <float> [#uses=2]
+  store float %18, float* undef
+  %24 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 1 ; <float*> [#uses=2]
+  store float %20, float* %24
+  store float %23, float* undef
+  %25 = getelementptr inbounds %bar* null, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=0]
+  %26 = fmul float %18, %18                       ; <float> [#uses=1]
+  %27 = fadd float %26, undef                     ; <float> [#uses=1]
+  %28 = fadd float %27, undef                     ; <float> [#uses=1]
+  %29 = call arm_aapcs_vfpcc  float @sqrtf(float %28) readnone ; <float> [#uses=1]
+  %30 = load float* null, align 4                 ; <float> [#uses=2]
+  %31 = load float* %4, align 4                   ; <float> [#uses=2]
+  %32 = load float* %2, align 4                   ; <float> [#uses=2]
+  %33 = load float* null, align 4                 ; <float> [#uses=3]
+  %34 = load float* %6, align 4                   ; <float> [#uses=2]
+  %35 = fsub float %33, %34                       ; <float> [#uses=2]
+  %36 = fmul float %20, %35                       ; <float> [#uses=1]
+  %37 = fsub float %36, undef                     ; <float> [#uses=1]
+  %38 = fmul float %23, 0.000000e+00              ; <float> [#uses=1]
+  %39 = fmul float %18, %35                       ; <float> [#uses=1]
+  %40 = fsub float %38, %39                       ; <float> [#uses=1]
+  %41 = fmul float %18, 0.000000e+00              ; <float> [#uses=1]
+  %42 = fmul float %20, 0.000000e+00              ; <float> [#uses=1]
+  %43 = fsub float %41, %42                       ; <float> [#uses=1]
+  %44 = fmul float 0.000000e+00, %37              ; <float> [#uses=1]
+  %45 = fmul float %31, %40                       ; <float> [#uses=1]
+  %46 = fadd float %44, %45                       ; <float> [#uses=1]
+  %47 = fmul float %33, %43                       ; <float> [#uses=1]
+  %48 = fadd float %46, %47                       ; <float> [#uses=2]
+  %49 = load float* %9, align 4                   ; <float> [#uses=2]
+  %50 = fsub float %30, %49                       ; <float> [#uses=1]
+  %51 = load float* %12, align 4                  ; <float> [#uses=3]
+  %52 = fsub float %32, %51                       ; <float> [#uses=2]
+  %53 = load float* undef, align 4                ; <float> [#uses=2]
+  %54 = load float* %24, align 4                  ; <float> [#uses=2]
+  %55 = fmul float %54, undef                     ; <float> [#uses=1]
+  %56 = fmul float undef, %52                     ; <float> [#uses=1]
+  %57 = fsub float %55, %56                       ; <float> [#uses=1]
+  %58 = fmul float undef, %52                     ; <float> [#uses=1]
+  %59 = fmul float %54, %50                       ; <float> [#uses=1]
+  %60 = fsub float %58, %59                       ; <float> [#uses=1]
+  %61 = fmul float %30, %57                       ; <float> [#uses=1]
+  %62 = fmul float %32, 0.000000e+00              ; <float> [#uses=1]
+  %63 = fadd float %61, %62                       ; <float> [#uses=1]
+  %64 = fmul float %34, %60                       ; <float> [#uses=1]
+  %65 = fadd float %63, %64                       ; <float> [#uses=2]
+  %66 = fcmp olt float %48, %65                   ; <i1> [#uses=1]
+  %67 = fsub float %49, 0.000000e+00              ; <float> [#uses=1]
+  %68 = fsub float %51, %31                       ; <float> [#uses=1]
+  %69 = fsub float %53, %33                       ; <float> [#uses=1]
+  %70 = fmul float undef, %67                     ; <float> [#uses=1]
+  %71 = load float* undef, align 4                ; <float> [#uses=2]
+  %72 = fmul float %71, %69                       ; <float> [#uses=1]
+  %73 = fsub float %70, %72                       ; <float> [#uses=1]
+  %74 = fmul float %71, %68                       ; <float> [#uses=1]
+  %75 = fsub float %74, 0.000000e+00              ; <float> [#uses=1]
+  %76 = fmul float %51, %73                       ; <float> [#uses=1]
+  %77 = fadd float undef, %76                     ; <float> [#uses=1]
+  %78 = fmul float %53, %75                       ; <float> [#uses=1]
+  %79 = fadd float %77, %78                       ; <float> [#uses=1]
+  %80 = select i1 %66, float %48, float %65       ; <float> [#uses=1]
+  %81 = select i1 undef, float %80, float %79     ; <float> [#uses=1]
+  %iftmp.164.0 = select i1 undef, float %29, float 1.000000e+00 ; <float> [#uses=1]
+  %82 = fdiv float %81, %iftmp.164.0              ; <float> [#uses=1]
+  %iftmp.165.0 = select i1 undef, float %82, float 0.000000e+00 ; <float> [#uses=1]
+  store float %iftmp.165.0, float* undef, align 4
+  br i1 false, label %bb4.i97, label %ccc.exit98
+
+bb4.i97:                                          ; preds = %bb3.i
+  br label %ccc.exit98
+
+ccc.exit98:                                       ; preds = %bb4.i97, %bb3.i
+  ret %bar* null
+
+bb85:                                             ; preds = %entry
+  ret %bar* null
+}
+
+declare arm_aapcs_vfpcc float @sqrtf(float) readnone
diff --git a/final/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll b/final/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
new file mode 100644
index 00000000000..832ff4fa098
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-11-13-VRRewriterCrash.ll
@@ -0,0 +1,113 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5412
+; rdar://7384107
+
+%bar = type { %quad, float, float, [3 x %quuz*], [3 x %bar*], [2 x %bar*], [3 x i8], i8 }
+%baz = type { %bar*, i32 }
+%foo = type { i8, %quux, %quad, float, [64 x %quuz], [128 x %bar], i32, %baz, %baz }
+%quad = type { [4 x float] }
+%quux = type { [4 x %quuz*], [4 x float], i32 }
+%quuz = type { %quad, %quad }
+
+define arm_aapcs_vfpcc %bar* @aaa(%foo* nocapture %this, %quuz* %a, %quuz* %b, %quuz* %c, i8 zeroext %forced) {
+entry:
+  %0 = load %bar** undef, align 4                 ; <%bar*> [#uses=2]
+  br i1 false, label %bb85, label %bb
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb3.i, label %bb2.i
+
+bb2.i:                                            ; preds = %bb
+  br label %bb3.i
+
+bb3.i:                                            ; preds = %bb2.i, %bb
+  %1 = getelementptr inbounds %quuz* %a, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
+  %2 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
+  %3 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+  %4 = getelementptr inbounds %quuz* %b, i32 0, i32 1, i32 0, i32 2 ; <float*> [#uses=1]
+  %5 = fsub float 0.000000e+00, undef             ; <float> [#uses=1]
+  %6 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 0 ; <float*> [#uses=1]
+  %7 = getelementptr inbounds %quuz* %c, i32 0, i32 1, i32 0, i32 1 ; <float*> [#uses=1]
+  %8 = fsub float undef, undef                    ; <float> [#uses=1]
+  %9 = fmul float 0.000000e+00, %8                ; <float> [#uses=1]
+  %10 = fmul float %5, 0.000000e+00               ; <float> [#uses=1]
+  %11 = fsub float %9, %10                        ; <float> [#uses=3]
+  %12 = fmul float %2, 0.000000e+00               ; <float> [#uses=1]
+  %13 = fmul float 0.000000e+00, undef            ; <float> [#uses=1]
+  %14 = fsub float %12, %13                       ; <float> [#uses=2]
+  store float %14, float* undef
+  %15 = getelementptr inbounds %bar* %0, i32 0, i32 0, i32 0, i32 3 ; <float*> [#uses=1]
+  store float 0.000000e+00, float* %15
+  %16 = fmul float %11, %11                       ; <float> [#uses=1]
+  %17 = fadd float %16, 0.000000e+00              ; <float> [#uses=1]
+  %18 = fadd float %17, undef                     ; <float> [#uses=1]
+  %19 = call arm_aapcs_vfpcc  float @sqrtf(float %18) readnone ; <float> [#uses=2]
+  %20 = fcmp ogt float %19, 0x3F1A36E2E0000000    ; <i1> [#uses=1]
+  %21 = load float* %1, align 4                   ; <float> [#uses=2]
+  %22 = load float* %3, align 4                   ; <float> [#uses=2]
+  %23 = load float* undef, align 4                ; <float> [#uses=2]
+  %24 = load float* %4, align 4                   ; <float> [#uses=2]
+  %25 = fsub float %23, %24                       ; <float> [#uses=2]
+  %26 = fmul float 0.000000e+00, %25              ; <float> [#uses=1]
+  %27 = fsub float %26, undef                     ; <float> [#uses=1]
+  %28 = fmul float %14, 0.000000e+00              ; <float> [#uses=1]
+  %29 = fmul float %11, %25                       ; <float> [#uses=1]
+  %30 = fsub float %28, %29                       ; <float> [#uses=1]
+  %31 = fsub float undef, 0.000000e+00            ; <float> [#uses=1]
+  %32 = fmul float %21, %27                       ; <float> [#uses=1]
+  %33 = fmul float undef, %30                     ; <float> [#uses=1]
+  %34 = fadd float %32, %33                       ; <float> [#uses=1]
+  %35 = fmul float %23, %31                       ; <float> [#uses=1]
+  %36 = fadd float %34, %35                       ; <float> [#uses=1]
+  %37 = load float* %6, align 4                   ; <float> [#uses=2]
+  %38 = load float* %7, align 4                   ; <float> [#uses=2]
+  %39 = fsub float %22, %38                       ; <float> [#uses=2]
+  %40 = load float* undef, align 4                ; <float> [#uses=1]
+  %41 = load float* null, align 4                 ; <float> [#uses=2]
+  %42 = fmul float %41, undef                     ; <float> [#uses=1]
+  %43 = fmul float undef, %39                     ; <float> [#uses=1]
+  %44 = fsub float %42, %43                       ; <float> [#uses=1]
+  %45 = fmul float undef, %39                     ; <float> [#uses=1]
+  %46 = fmul float %41, 0.000000e+00              ; <float> [#uses=1]
+  %47 = fsub float %45, %46                       ; <float> [#uses=1]
+  %48 = fmul float 0.000000e+00, %44              ; <float> [#uses=1]
+  %49 = fmul float %22, undef                     ; <float> [#uses=1]
+  %50 = fadd float %48, %49                       ; <float> [#uses=1]
+  %51 = fmul float %24, %47                       ; <float> [#uses=1]
+  %52 = fadd float %50, %51                       ; <float> [#uses=1]
+  %53 = fsub float %37, %21                       ; <float> [#uses=2]
+  %54 = fmul float undef, undef                   ; <float> [#uses=1]
+  %55 = fmul float undef, undef                   ; <float> [#uses=1]
+  %56 = fsub float %54, %55                       ; <float> [#uses=1]
+  %57 = fmul float undef, %53                     ; <float> [#uses=1]
+  %58 = load float* undef, align 4                ; <float> [#uses=2]
+  %59 = fmul float %58, undef                     ; <float> [#uses=1]
+  %60 = fsub float %57, %59                       ; <float> [#uses=1]
+  %61 = fmul float %58, undef                     ; <float> [#uses=1]
+  %62 = fmul float undef, %53                     ; <float> [#uses=1]
+  %63 = fsub float %61, %62                       ; <float> [#uses=1]
+  %64 = fmul float %37, %56                       ; <float> [#uses=1]
+  %65 = fmul float %38, %60                       ; <float> [#uses=1]
+  %66 = fadd float %64, %65                       ; <float> [#uses=1]
+  %67 = fmul float %40, %63                       ; <float> [#uses=1]
+  %68 = fadd float %66, %67                       ; <float> [#uses=1]
+  %69 = select i1 undef, float %36, float %52     ; <float> [#uses=1]
+  %70 = select i1 undef, float %69, float %68     ; <float> [#uses=1]
+  %iftmp.164.0 = select i1 %20, float %19, float 1.000000e+00 ; <float> [#uses=1]
+  %71 = fdiv float %70, %iftmp.164.0              ; <float> [#uses=1]
+  store float %71, float* null, align 4
+  %72 = icmp eq %bar* null, %0                    ; <i1> [#uses=1]
+  br i1 %72, label %bb4.i97, label %ccc.exit98
+
+bb4.i97:                                          ; preds = %bb3.i
+  %73 = load %bar** undef, align 4                ; <%bar*> [#uses=0]
+  br label %ccc.exit98
+
+ccc.exit98:                                       ; preds = %bb4.i97, %bb3.i
+  ret %bar* null
+
+bb85:                                             ; preds = %entry
+  ret %bar* null
+}
+
+declare arm_aapcs_vfpcc float @sqrtf(float) readnone
diff --git a/final/test/CodeGen/ARM/2009-11-30-LiveVariablesBug.ll b/final/test/CodeGen/ARM/2009-11-30-LiveVariablesBug.ll
new file mode 100644
index 00000000000..efe74cfd138
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-11-30-LiveVariablesBug.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s
+; PR5614
+
+%"als" = type { i32 (...)** }
+%"av" = type { %"als" }
+%"c" = type { %"lsm", %"Vec3", %"av"*, float, i8, float, %"lsm", i8, %"Vec3", %"Vec3", %"Vec3", float, float, float, %"Vec3", %"Vec3" }
+%"lsm" = type { %"als", %"Vec3", %"Vec3", %"Vec3", %"Vec3" }
+%"Vec3" = type { float, float, float }
+
+define arm_aapcs_vfpcc void @foo(%"c"* %this, %"Vec3"* nocapture %adjustment) {
+entry:
+  switch i32 undef, label %return [
+    i32 1, label %bb
+    i32 2, label %bb72
+    i32 3, label %bb31
+    i32 4, label %bb79
+    i32 5, label %bb104
+  ]
+
+bb:                                               ; preds = %entry
+  ret void
+
+bb31:                                             ; preds = %entry
+  %0 = call arm_aapcs_vfpcc  %"Vec3" undef(%"lsm"* undef) ; <%"Vec3"> [#uses=1]
+  %mrv_gr69 = extractvalue %"Vec3" %0, 1 ; <float> [#uses=1]
+  %1 = fsub float %mrv_gr69, undef                ; <float> [#uses=1]
+  store float %1, float* undef, align 4
+  ret void
+
+bb72:                                             ; preds = %entry
+  ret void
+
+bb79:                                             ; preds = %entry
+  ret void
+
+bb104:                                            ; preds = %entry
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll b/final/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll
new file mode 100644
index 00000000000..f89a5de77b3
--- /dev/null
+++ b/final/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll
@@ -0,0 +1,33 @@
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "armv7-apple-darwin10"
+
+%struct.int16x8_t = type { <8 x i16> }
+%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] }
+
+define void @t(%struct.int16x8x2_t* noalias nocapture sret %agg.result, <8 x i16> %tmp.0, %struct.int16x8x2_t* nocapture %dst) nounwind {
+entry:
+;CHECK: vtrn.16
+  %0 = shufflevector <8 x i16> %tmp.0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  %1 = shufflevector <8 x i16> %tmp.0, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+  %agg.result1218.0 = getelementptr %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 0, i32 0 ; <<8 x i16>*>
+  store <8 x i16> %0, <8 x i16>* %agg.result1218.0, align 16
+  %agg.result12.1.0 = getelementptr %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 1, i32 0 ; <<8 x i16>*>
+  store <8 x i16> %1, <8 x i16>* %agg.result12.1.0, align 16
+  ret void
+}
+
+; Radar 8290937: Ignore undef shuffle indices.
+; CHECK: t2
+; CHECK: vtrn.16
+define void @t2(%struct.int16x8x2_t* nocapture %ptr, <4 x i16> %a.0, <4 x i16> %b.0) nounwind {
+entry:
+  %0 = shufflevector <4 x i16> %a.0, <4 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
+  %1 = shufflevector <4 x i16> %a.0, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %ptr26.0 = getelementptr inbounds %struct.int16x8x2_t* %ptr, i32 0, i32 0, i32 0, i32 0
+  store <8 x i16> %0, <8 x i16>* %ptr26.0, align 16
+  %ptr20.1.0 = getelementptr inbounds %struct.int16x8x2_t* %ptr, i32 0, i32 0, i32 1, i32 0
+  store <8 x i16> %1, <8 x i16>* %ptr20.1.0, align 16
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll b/final/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
new file mode 100644
index 00000000000..f7adf73263f
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-03-04-eabi-fp-spill.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=arm-unknown-linux-gnueabi
+
+define void @"java.lang.String::getChars"([84 x i8]* %method, i32 %base_pc, [788 x i8]* %thread) {
+  %1 = load i32* undef                            ; <i32> [#uses=1]
+  %2 = sub i32 %1, 48                             ; <i32> [#uses=1]
+  br i1 undef, label %stack_overflow, label %no_overflow
+
+stack_overflow:                                   ; preds = %0
+  unreachable
+
+no_overflow:                                      ; preds = %0
+  %frame = inttoptr i32 %2 to [17 x i32]*         ; <[17 x i32]*> [#uses=4]
+  %3 = load i32* undef                            ; <i32> [#uses=1]
+  %4 = load i32* null                             ; <i32> [#uses=1]
+  %5 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 13 ; <i32*> [#uses=1]
+  %6 = bitcast i32* %5 to [8 x i8]**              ; <[8 x i8]**> [#uses=1]
+  %7 = load [8 x i8]** %6                         ; <[8 x i8]*> [#uses=1]
+  %8 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 12 ; <i32*> [#uses=1]
+  %9 = load i32* %8                               ; <i32> [#uses=1]
+  br i1 undef, label %bci_13, label %bci_4
+
+bci_13:                                           ; preds = %no_overflow
+  br i1 undef, label %bci_30, label %bci_21
+
+bci_30:                                           ; preds = %bci_13
+  br i1 undef, label %bci_46, label %bci_35
+
+bci_46:                                           ; preds = %bci_30
+  %10 = sub i32 %4, %3                            ; <i32> [#uses=1]
+  %11 = load [8 x i8]** null                      ; <[8 x i8]*> [#uses=1]
+  %callee = bitcast [8 x i8]* %11 to [84 x i8]*   ; <[84 x i8]*> [#uses=1]
+  %12 = bitcast i8* undef to i32*                 ; <i32*> [#uses=1]
+  %base_pc7 = load i32* %12                       ; <i32> [#uses=2]
+  %13 = add i32 %base_pc7, 0                      ; <i32> [#uses=1]
+  %14 = inttoptr i32 %13 to void ([84 x i8]*, i32, [788 x i8]*)** ; <void ([84 x i8]*, i32, [788 x i8]*)**> [#uses=1]
+  %entry_point = load void ([84 x i8]*, i32, [788 x i8]*)** %14 ; <void ([84 x i8]*, i32, [788 x i8]*)*> [#uses=1]
+  %15 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 1 ; <i32*> [#uses=1]
+  %16 = ptrtoint i32* %15 to i32                  ; <i32> [#uses=1]
+  %stack_pointer_addr9 = bitcast i8* undef to i32* ; <i32*> [#uses=1]
+  store i32 %16, i32* %stack_pointer_addr9
+  %17 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 2 ; <i32*> [#uses=1]
+  store i32 %9, i32* %17
+  store i32 %10, i32* undef
+  store [84 x i8]* %method, [84 x i8]** undef
+  %18 = add i32 %base_pc, 20                      ; <i32> [#uses=1]
+  store i32 %18, i32* undef
+  store [8 x i8]* %7, [8 x i8]** undef
+  call void %entry_point([84 x i8]* %callee, i32 %base_pc7, [788 x i8]* %thread)
+  br i1 undef, label %no_exception, label %exception
+
+exception:                                        ; preds = %bci_46
+  ret void
+
+no_exception:                                     ; preds = %bci_46
+  ret void
+
+bci_35:                                           ; preds = %bci_30
+  ret void
+
+bci_21:                                           ; preds = %bci_13
+  ret void
+
+bci_4:                                            ; preds = %no_overflow
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll b/final/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
new file mode 100644
index 00000000000..b0b4cb37d1a
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-03-04-stm-undef-addr.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=arm
+
+define void @"java.lang.String::getChars"([84 x i8]* %method, i32 %base_pc, [788 x i8]* %thread) {
+  %1 = sub i32 undef, 48                          ; <i32> [#uses=1]
+  br i1 undef, label %stack_overflow, label %no_overflow
+
+stack_overflow:                                   ; preds = %0
+  unreachable
+
+no_overflow:                                      ; preds = %0
+  %frame = inttoptr i32 %1 to [17 x i32]*         ; <[17 x i32]*> [#uses=4]
+  %2 = load i32* null                             ; <i32> [#uses=2]
+  %3 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 14 ; <i32*> [#uses=1]
+  %4 = load i32* %3                               ; <i32> [#uses=2]
+  %5 = load [8 x i8]** undef                      ; <[8 x i8]*> [#uses=2]
+  br i1 undef, label %bci_13, label %bci_4
+
+bci_13:                                           ; preds = %no_overflow
+  br i1 undef, label %bci_30, label %bci_21
+
+bci_30:                                           ; preds = %bci_13
+  %6 = icmp sle i32 %2, %4                        ; <i1> [#uses=1]
+  br i1 %6, label %bci_46, label %bci_35
+
+bci_46:                                           ; preds = %bci_30
+  store [84 x i8]* %method, [84 x i8]** undef
+  br i1 false, label %no_exception, label %exception
+
+exception:                                        ; preds = %bci_46
+  ret void
+
+no_exception:                                     ; preds = %bci_46
+  ret void
+
+bci_35:                                           ; preds = %bci_30
+  %7 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 15 ; <i32*> [#uses=1]
+  store i32 %2, i32* %7
+  %8 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 14 ; <i32*> [#uses=1]
+  store i32 %4, i32* %8
+  %9 = getelementptr inbounds [17 x i32]* %frame, i32 0, i32 13 ; <i32*> [#uses=1]
+  %10 = bitcast i32* %9 to [8 x i8]**             ; <[8 x i8]**> [#uses=1]
+  store [8 x i8]* %5, [8 x i8]** %10
+  call void inttoptr (i32 13839116 to void ([788 x i8]*, i32)*)([788 x i8]* %thread, i32 7)
+  ret void
+
+bci_21:                                           ; preds = %bci_13
+  ret void
+
+bci_4:                                            ; preds = %no_overflow
+  store [8 x i8]* %5, [8 x i8]** undef
+  store i32 undef, i32* undef
+  call void inttoptr (i32 13839116 to void ([788 x i8]*, i32)*)([788 x i8]* %thread, i32 7)
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll b/final/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
new file mode 100644
index 00000000000..d9e1a1486a3
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=armv4-unknown-eabi | FileCheck %s -check-prefix=V4
+; RUN: llc < %s -mtriple=armv5-unknown-eabi | FileCheck %s
+; RUN: llc < %s -mtriple=armv6-unknown-eabi | FileCheck %s
+
+define i32 @bar(i32 %a) nounwind {
+entry:
+  %0 = tail call i32 @foo(i32 %a) nounwind ; <i32> [#uses=1]
+  %1 = add nsw i32 %0, 3                          ; <i32> [#uses=1]
+; CHECK: ldmia	sp!, {r11, pc}
+; V4: pop
+; V4-NEXT: mov pc, lr
+  ret i32 %1
+}
+
+declare i32 @foo(i32)
diff --git a/final/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll b/final/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 00000000000..64226899206
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=arm -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/final/test/CodeGen/ARM/2010-04-09-NeonSelect.ll b/final/test/CodeGen/ARM/2010-04-09-NeonSelect.ll
new file mode 100644
index 00000000000..89d6a68fcae
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-04-09-NeonSelect.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=arm -mattr=+neon < %s
+; Radar 7770501: Don't crash on SELECT and SELECT_CC with NEON vector values.
+
+define void @vDSP_FFT16_copv(float* nocapture %O, float* nocapture %I, i32 %Direction) nounwind {
+entry:
+  %.22 = select i1 undef, <4 x float> undef, <4 x float> zeroinitializer ; <<4 x float>> [#uses=1]
+  %0 = fadd <4 x float> undef, %.22               ; <<4 x float>> [#uses=1]
+  %1 = fsub <4 x float> %0, undef                 ; <<4 x float>> [#uses=1]
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> <i32 2, i32 6, i32 3, i32 7> ; <<4 x float>> [#uses=1]
+  %3 = shufflevector <4 x float> undef, <4 x float> %2, <4 x i32> <i32 2, i32 3, i32 6, i32 7> ; <<4 x float>> [#uses=1]
+  %4 = fmul <4 x float> %3, <float 0.000000e+00, float 0x3FED906BC0000000, float 0x3FE6A09E60000000, float 0xBFD87DE2A0000000> ; <<4 x float>> [#uses=1]
+  %5 = fadd <4 x float> undef, %4                 ; <<4 x float>> [#uses=1]
+  %6 = fadd <4 x float> undef, %5                 ; <<4 x float>> [#uses=1]
+  %7 = fadd <4 x float> undef, %6                 ; <<4 x float>> [#uses=1]
+  br i1 undef, label %bb4, label %bb3
+
+bb3:                                              ; preds = %entry
+  %8 = shufflevector <4 x float> undef, <4 x float> %7, <4 x i32> <i32 2, i32 6, i32 3, i32 7> ; <<4 x float>> [#uses=0]
+  ret void
+
+bb4:                                              ; preds = %entry
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2010-04-13-v2f64SplitArg.ll b/final/test/CodeGen/ARM/2010-04-13-v2f64SplitArg.ll
new file mode 100644
index 00000000000..1354c797551
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-04-13-v2f64SplitArg.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8
+; Radar 7855014
+
+define void @test1(i32 %f0, i32 %f1, i32 %f2, <4 x i32> %f3) nounwind {
+entry:
+  unreachable
+}
diff --git a/final/test/CodeGen/ARM/2010-04-14-SplitVector.ll b/final/test/CodeGen/ARM/2010-04-14-SplitVector.ll
new file mode 100644
index 00000000000..5d0c3cf74aa
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-04-14-SplitVector.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm -mcpu=arm1136jf-s
+; Radar 7854640
+
+define void @test() nounwind {
+bb:
+  br i1 undef, label %bb9, label %bb10
+
+bb9:
+  %tmp63 = bitcast <4 x float> zeroinitializer to i128
+  %tmp64 = trunc i128 %tmp63 to i32
+  br label %bb10
+
+bb10:
+  %0 = phi i32 [ %tmp64, %bb9 ], [ undef, %bb ]
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll b/final/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
new file mode 100644
index 00000000000..05581c3f16c
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s
+; PR6847
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "armv4t-apple-darwin10"
+
+define hidden i32 @__addvsi3(i32 %a, i32 %b) nounwind {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %b}, i64 0, metadata !0)
+  %0 = add nsw i32 %b, %a, !dbg !9                ; <i32> [#uses=1]
+  ret i32 %0, !dbg !11
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"b", metadata !2, i32 93, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"__addvsi3", metadata !"__addvsi3", metadata !"__addvsi3", metadata !2, i32 94, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"libgcc2.c", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"libgcc2.c", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !6, metadata !6}
+!6 = metadata !{i32 524310, metadata !2, metadata !"SItype", metadata !7, i32 152, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 524329, metadata !"libgcc2.h", metadata !"/Users/bwilson/local/nightly/test-2010-04-14/build/llvmgcc.roots/llvmgcc~obj/src/gcc", metadata !3} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 95, i32 0, metadata !10, null}
+!10 = metadata !{i32 524299, metadata !1, i32 94, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 100, i32 0, metadata !10, null}
diff --git a/final/test/CodeGen/ARM/2010-05-14-IllegalType.ll b/final/test/CodeGen/ARM/2010-05-14-IllegalType.ll
new file mode 100644
index 00000000000..99e5b09df61
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-05-14-IllegalType.ll
@@ -0,0 +1,10 @@
+; RUN: llc -march=thumb -mcpu=cortex-a8 -mtriple=thumbv7-eabi -float-abi=hard < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+define <4 x i64> @f_4_i64(<4 x i64> %a, <4 x i64> %b) nounwind {
+; CHECK: vadd.i64
+ %y = add <4 x i64> %a, %b
+ ret <4 x i64> %y
+}
diff --git a/final/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll b/final/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
new file mode 100644
index 00000000000..813bf3c360d
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-05-17-FastAllocCrash.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -regalloc=fast -verify-machineinstrs
+target triple = "arm-pc-linux-gnu"
+
+; This test case would accidentally use the same physreg for two virtregs
+; because allocVirtReg forgot to check if registers were already used in the
+; instruction.
+; This caused the RegScavenger to complain, but -verify-machineinstrs also
+; catches it.
+
+%struct.CHESS_POSITION = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, i8, i8, [64 x i8], i8, i8, i8, i8, i8 }
+
+@search = external global %struct.CHESS_POSITION  ; <%struct.CHESS_POSITION*> [#uses=1]
+@bishop_mobility_rr45 = external global [64 x [256 x i32]] ; <[64 x [256 x i32]]*> [#uses=1]
+
+declare fastcc i32 @FirstOne()
+
+define fastcc void @Evaluate() {
+entry:
+  br i1 false, label %cond_false186, label %cond_true
+
+cond_true:                                        ; preds = %entry
+  ret void
+
+cond_false186:                                    ; preds = %entry
+  br i1 false, label %cond_true293, label %bb203
+
+bb203:                                            ; preds = %cond_false186
+  ret void
+
+cond_true293:                                     ; preds = %cond_false186
+  br i1 false, label %cond_true298, label %cond_next317
+
+cond_true298:                                     ; preds = %cond_true293
+  br i1 false, label %cond_next518, label %cond_true397.preheader
+
+cond_next317:                                     ; preds = %cond_true293
+  ret void
+
+cond_true397.preheader:                           ; preds = %cond_true298
+  ret void
+
+cond_next518:                                     ; preds = %cond_true298
+  br i1 false, label %bb1069, label %cond_true522
+
+cond_true522:                                     ; preds = %cond_next518
+  ret void
+
+bb1069:                                           ; preds = %cond_next518
+  br i1 false, label %cond_next1131, label %bb1096
+
+bb1096:                                           ; preds = %bb1069
+  ret void
+
+cond_next1131:                                    ; preds = %bb1069
+  br i1 false, label %cond_next1207, label %cond_true1150
+
+cond_true1150:                                    ; preds = %cond_next1131
+  ret void
+
+cond_next1207:                                    ; preds = %cond_next1131
+  br i1 false, label %cond_next1219, label %cond_true1211
+
+cond_true1211:                                    ; preds = %cond_next1207
+  ret void
+
+cond_next1219:                                    ; preds = %cond_next1207
+  br i1 false, label %cond_true1223, label %cond_next1283
+
+cond_true1223:                                    ; preds = %cond_next1219
+  br i1 false, label %cond_true1254, label %cond_true1264
+
+cond_true1254:                                    ; preds = %cond_true1223
+  br i1 false, label %bb1567, label %cond_true1369.preheader
+
+cond_true1264:                                    ; preds = %cond_true1223
+  ret void
+
+cond_next1283:                                    ; preds = %cond_next1219
+  ret void
+
+cond_true1369.preheader:                          ; preds = %cond_true1254
+  ret void
+
+bb1567:                                           ; preds = %cond_true1254
+  %tmp1591 = load i64* getelementptr inbounds (%struct.CHESS_POSITION* @search, i32 0, i32 4) ; <i64> [#uses=1]
+  %tmp1572 = tail call fastcc i32 @FirstOne()     ; <i32> [#uses=1]
+  %tmp1594 = load i32* undef                      ; <i32> [#uses=1]
+  %tmp1594.upgrd.5 = trunc i32 %tmp1594 to i8     ; <i8> [#uses=1]
+  %shift.upgrd.6 = zext i8 %tmp1594.upgrd.5 to i64 ; <i64> [#uses=1]
+  %tmp1595 = lshr i64 %tmp1591, %shift.upgrd.6    ; <i64> [#uses=1]
+  %tmp1595.upgrd.7 = trunc i64 %tmp1595 to i32    ; <i32> [#uses=1]
+  %tmp1596 = and i32 %tmp1595.upgrd.7, 255        ; <i32> [#uses=1]
+  %gep.upgrd.8 = zext i32 %tmp1596 to i64         ; <i64> [#uses=1]
+  %tmp1598 = getelementptr [64 x [256 x i32]]* @bishop_mobility_rr45, i32 0, i32 %tmp1572, i64 %gep.upgrd.8 ; <i32*> [#uses=1]
+  %tmp1599 = load i32* %tmp1598                   ; <i32> [#uses=1]
+  %tmp1602 = sub i32 0, %tmp1599                  ; <i32> [#uses=1]
+  br i1 undef, label %cond_next1637, label %cond_true1607
+
+cond_true1607:                                    ; preds = %bb1567
+  ret void
+
+cond_next1637:                                    ; preds = %bb1567
+  %tmp1662 = sub i32 %tmp1602, 0                  ; <i32> [#uses=0]
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll b/final/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
new file mode 100644
index 00000000000..946164321a2
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -O0 -verify-machineinstrs -regalloc=fast
+; rdar://problem/7948106
+;; This test would spill %R4 before the call to zz, but it forgot to move the
+; 'last use' marker to the spill.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "armv6-apple-darwin"
+
+%struct.q = type { i32, i32 }
+
+@.str = external constant [1 x i8]                ; <[1 x i8]*> [#uses=1]
+
+define void @yy(%struct.q* %qq) nounwind {
+entry:
+  %vla6 = alloca i8, i32 undef, align 1           ; <i8*> [#uses=1]
+  %vla10 = alloca i8, i32 undef, align 1          ; <i8*> [#uses=1]
+  %vla14 = alloca i8, i32 undef, align 1          ; <i8*> [#uses=1]
+  %vla18 = alloca i8, i32 undef, align 1          ; <i8*> [#uses=1]
+  %tmp21 = load i32* undef                        ; <i32> [#uses=1]
+  %0 = mul i32 1, %tmp21                          ; <i32> [#uses=1]
+  %vla22 = alloca i8, i32 %0, align 1             ; <i8*> [#uses=1]
+  call  void (...)* @zz(i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0), i32 2, i32 1)
+  br i1 undef, label %if.then, label %if.end36
+
+if.then:                                          ; preds = %entry
+  %call = call  i32 (...)* @x(%struct.q* undef, i8* undef, i8* %vla6, i8* %vla10, i32 undef) ; <i32> [#uses=0]
+  %call35 = call  i32 (...)* @x(%struct.q* undef, i8* %vla14, i8* %vla18, i8* %vla22, i32 undef) ; <i32> [#uses=0]
+  unreachable
+
+if.end36:                                         ; preds = %entry
+  ret void
+}
+
+declare void @zz(...)
+
+declare i32 @x(...)
diff --git a/final/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/final/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
new file mode 100644
index 00000000000..5ad1c09eda4
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=THUMB
+; rdar://7998649
+
+%struct.foo = type { i64, i64 }
+
+define zeroext i8 @t(%struct.foo* %this) noreturn optsize {
+entry:
+; ARM:       t:
+; ARM:       str r0, [r1], r0
+
+; THUMB:     t:
+; THUMB-NOT: str r0, [r1], r0
+; THUMB:     str r0, [r1]
+  %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; <i64*> [#uses=1]
+  store i32 undef, i32* inttoptr (i32 8 to i32*), align 8
+  br i1 undef, label %bb.nph96, label %bb3
+
+bb3:                                              ; preds = %entry
+  %1 = load i64* %0, align 4                      ; <i64> [#uses=0]
+  unreachable
+
+bb.nph96:                                         ; preds = %entry
+  unreachable
+}
diff --git a/final/test/CodeGen/ARM/2010-05-19-Shuffles.ll b/final/test/CodeGen/ARM/2010-05-19-Shuffles.ll
new file mode 100644
index 00000000000..587c0afcb71
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-05-19-Shuffles.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mcpu=cortex-a8
+; pr7167
+
+define <8 x i8> @f1(<8 x i8> %x) nounwind {
+  %y = shufflevector <8 x i8> %x, <8 x i8> undef,
+       <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
+  ret <8 x i8> %y
+}
+
+define <8 x i8> @f2(<8 x i8> %x) nounwind {
+  %y = shufflevector <8 x i8> %x, <8 x i8> undef,
+       <8 x i32> <i32 1, i32 2, i32 0, i32 5, i32 3, i32 6, i32 7, i32 4>
+  ret <8 x i8> %y
+}
+
+define void @f3(<4 x i64>* %xp) nounwind {
+  %x = load <4 x i64>* %xp
+  %y = shufflevector <4 x i64> %x, <4 x i64> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 1>
+  store <4 x i64> %y, <4 x i64>* %xp
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/final/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
new file mode 100644
index 00000000000..e47c0383937
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=arm -mattr=+neon -O0 -regalloc=linearscan
+
+; This test would crash the rewriter when trying to handle a spill after one of
+; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register.
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+
+define <8 x i8> @t3(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind {
+  %tmp1b = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A2, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp2b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 0 ; <<8 x i8>> [#uses=1]
+  %tmp4b = extractvalue %struct.__neon_int8x8x3_t %tmp1b, 1 ; <<8 x i8>> [#uses=1]
+  %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1]
+  %tmp4d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 1 ; <<8 x i8>> [#uses=1]
+  %tmp1e = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A5, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp2e = extractvalue %struct.__neon_int8x8x3_t %tmp1e, 0 ; <<8 x i8>> [#uses=1]
+  %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1]
+  %tmp1g = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A7, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp2g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 0 ; <<8 x i8>> [#uses=1]
+  %tmp4g = extractvalue %struct.__neon_int8x8x3_t %tmp1g, 1 ; <<8 x i8>> [#uses=1]
+  %tmp1h = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A8, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp2h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 0 ; <<8 x i8>> [#uses=1]
+  %tmp3h = extractvalue %struct.__neon_int8x8x3_t %tmp1h, 2 ; <<8 x i8>> [#uses=1]
+  %tmp2bd = add <8 x i8> %tmp2b, %tmp2d           ; <<8 x i8>> [#uses=1]
+  %tmp4bd = add <8 x i8> %tmp4b, %tmp4d           ; <<8 x i8>> [#uses=1]
+  %tmp2abcd = mul <8 x i8> undef, %tmp2bd         ; <<8 x i8>> [#uses=1]
+  %tmp4abcd = mul <8 x i8> undef, %tmp4bd         ; <<8 x i8>> [#uses=2]
+  call void @llvm.arm.neon.vst3.v8i8(i8* %A1, <8 x i8> %tmp4abcd, <8 x i8> zeroinitializer, <8 x i8> %tmp2abcd, i32 1)
+  %tmp2ef = sub <8 x i8> %tmp2e, %tmp2f           ; <<8 x i8>> [#uses=1]
+  %tmp2gh = sub <8 x i8> %tmp2g, %tmp2h           ; <<8 x i8>> [#uses=1]
+  %tmp3gh = sub <8 x i8> zeroinitializer, %tmp3h  ; <<8 x i8>> [#uses=1]
+  %tmp4ef = sub <8 x i8> zeroinitializer, %tmp4g  ; <<8 x i8>> [#uses=1]
+  %tmp2efgh = mul <8 x i8> %tmp2ef, %tmp2gh       ; <<8 x i8>> [#uses=1]
+  %tmp3efgh = mul <8 x i8> undef, %tmp3gh         ; <<8 x i8>> [#uses=1]
+  %tmp4efgh = mul <8 x i8> %tmp4ef, undef         ; <<8 x i8>> [#uses=2]
+  call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> %tmp4efgh, <8 x i8> %tmp3efgh, <8 x i8> %tmp2efgh, i32 1)
+  %tmp4 = sub <8 x i8> %tmp4efgh, %tmp4abcd       ; <<8 x i8>> [#uses=1]
+  tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> zeroinitializer, <8 x i8> undef, <8 x i8> undef, i32 1)
+  ret <8 x i8> %tmp4
+}
diff --git a/final/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/final/test/CodeGen/ARM/2010-05-21-BuildVector.ll
new file mode 100644
index 00000000000..cd1c9c8c042
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-05-21-BuildVector.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; Radar 7872877
+
+define void @test(float* %fltp, i32 %packedValue, float* %table) nounwind {
+entry:
+  %0 = load float* %fltp
+  %1 = insertelement <4 x float> undef, float %0, i32 0
+  %2 = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer
+  %3 = shl i32 %packedValue, 16
+  %4 = ashr i32 %3, 30
+  %.sum = add i32 %4, 4
+  %5 = getelementptr inbounds float* %table, i32 %.sum
+;CHECK: vldr.32 s
+  %6 = load float* %5, align 4
+  %tmp11 = insertelement <4 x float> undef, float %6, i32 0
+  %7 = shl i32 %packedValue, 18
+  %8 = ashr i32 %7, 30
+  %.sum12 = add i32 %8, 4
+  %9 = getelementptr inbounds float* %table, i32 %.sum12
+;CHECK: vldr.32 s
+  %10 = load float* %9, align 4
+  %tmp9 = insertelement <4 x float> %tmp11, float %10, i32 1
+  %11 = shl i32 %packedValue, 20
+  %12 = ashr i32 %11, 30
+  %.sum13 = add i32 %12, 4
+  %13 = getelementptr inbounds float* %table, i32 %.sum13
+;CHECK: vldr.32 s
+  %14 = load float* %13, align 4
+  %tmp7 = insertelement <4 x float> %tmp9, float %14, i32 2
+  %15 = shl i32 %packedValue, 22
+  %16 = ashr i32 %15, 30
+  %.sum14 = add i32 %16, 4
+  %17 = getelementptr inbounds float* %table, i32 %.sum14
+;CHECK: vldr.32 s
+  %18 = load float* %17, align 4
+  %tmp5 = insertelement <4 x float> %tmp7, float %18, i32 3
+  %19 = fmul <4 x float> %tmp5, %2
+  %20 = bitcast float* %fltp to i8*
+  tail call void @llvm.arm.neon.vst1.v4f32(i8* %20, <4 x float> %19, i32 1)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
diff --git a/final/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll b/final/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
new file mode 100644
index 00000000000..6f487962310
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm -mattr=+neon
+; Radar 8084742
+
+%struct.__int8x8x2_t = type { [2 x <8 x i8>] }
+
+define void @foo(%struct.__int8x8x2_t* nocapture %a, i8* %b) nounwind {
+entry:
+ %0 = bitcast %struct.__int8x8x2_t* %a to i128*  ; <i128*> [#uses=1]
+ %srcval = load i128* %0, align 8                ; <i128> [#uses=2]
+ %tmp6 = trunc i128 %srcval to i64               ; <i64> [#uses=1]
+ %tmp8 = lshr i128 %srcval, 64                   ; <i128> [#uses=1]
+ %tmp9 = trunc i128 %tmp8 to i64                 ; <i64> [#uses=1]
+ %tmp16.i = bitcast i64 %tmp6 to <8 x i8>        ; <<8 x i8>> [#uses=1]
+ %tmp20.i = bitcast i64 %tmp9 to <8 x i8>        ; <<8 x i8>> [#uses=1]
+ tail call void @llvm.arm.neon.vst2.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i, i32 1) nounwind
+ ret void
+}
+
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
diff --git a/final/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll b/final/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
new file mode 100644
index 00000000000..816a6d4f4b9
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll
@@ -0,0 +1,148 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -O3 -mcpu=arm1136jf-s
+; PR7421
+
+%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
+%struct.FILE = type { i8* }
+%struct.tilebox = type { %struct.tilebox*, double, double, double, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct.UNCOMBOX = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct.cellbox = type { i8*, i32, i32, i32, [9 x i32], i32, i32, i32, i32, i32, i32, i32, double, double, double, double, double, i32, i32, %struct.CONTENTBOX*, %struct.UNCOMBOX*, [8 x %struct.tilebox*] }
+%struct.termbox = type { %struct.termbox*, i32, i32, i32, i32, i32 }
+
+@.str2708 = external constant [14 x i8], align 4  ; <[14 x i8]*> [#uses=1]
+
+define void @TW_oldinput(%struct.FILE* nocapture %fp) nounwind {
+entry:
+  %xcenter = alloca i32, align 4                  ; <i32*> [#uses=2]
+  %0 = call i32 (%struct.FILE*, i8*, ...)* @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 4                          ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %return
+
+bb:                                               ; preds = %bb445, %entry
+  %2 = load %struct.cellbox** undef, align 4      ; <%struct.cellbox*> [#uses=2]
+  %3 = getelementptr inbounds %struct.cellbox* %2, i32 0, i32 3 ; <i32*> [#uses=1]
+  store i32 undef, i32* %3, align 4
+  %4 = load i32* undef, align 4                   ; <i32> [#uses=3]
+  %5 = icmp eq i32 undef, 1                       ; <i1> [#uses=1]
+  br i1 %5, label %bb10, label %bb445
+
+bb10:                                             ; preds = %bb
+  br i1 undef, label %bb11, label %bb445
+
+bb11:                                             ; preds = %bb10
+  %6 = load %struct.tilebox** undef, align 4      ; <%struct.tilebox*> [#uses=3]
+  %7 = load %struct.termbox** null, align 4       ; <%struct.termbox*> [#uses=1]
+  %8 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 13 ; <i32*> [#uses=1]
+  %9 = load i32* %8, align 4                      ; <i32> [#uses=3]
+  %10 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 15 ; <i32*> [#uses=1]
+  %11 = load i32* %10, align 4                    ; <i32> [#uses=1]
+  br i1 false, label %bb12, label %bb13
+
+bb12:                                             ; preds = %bb11
+  unreachable
+
+bb13:                                             ; preds = %bb11
+  %iftmp.40.0.neg = sdiv i32 0, -2                ; <i32> [#uses=2]
+  %12 = sub nsw i32 0, %9                         ; <i32> [#uses=1]
+  %13 = sitofp i32 %12 to double                  ; <double> [#uses=1]
+  %14 = fdiv double %13, 0.000000e+00             ; <double> [#uses=1]
+  %15 = fptosi double %14 to i32                  ; <i32> [#uses=1]
+  %iftmp.41.0.in = add i32 0, %15                 ; <i32> [#uses=1]
+  %iftmp.41.0.neg = sdiv i32 %iftmp.41.0.in, -2   ; <i32> [#uses=3]
+  br i1 undef, label %bb43.loopexit, label %bb21
+
+bb21:                                             ; preds = %bb13
+  %16 = fptosi double undef to i32                ; <i32> [#uses=1]
+  %17 = fsub double undef, 0.000000e+00           ; <double> [#uses=1]
+  %not.460 = fcmp oge double %17, 5.000000e-01    ; <i1> [#uses=1]
+  %18 = zext i1 %not.460 to i32                   ; <i32> [#uses=1]
+  %iftmp.42.0 = add i32 %16, %iftmp.41.0.neg      ; <i32> [#uses=1]
+  %19 = add i32 %iftmp.42.0, %18                  ; <i32> [#uses=1]
+  store i32 %19, i32* undef, align 4
+  %20 = sub nsw i32 0, %9                         ; <i32> [#uses=1]
+  %21 = sitofp i32 %20 to double                  ; <double> [#uses=1]
+  %22 = fdiv double %21, 0.000000e+00             ; <double> [#uses=2]
+  %23 = fptosi double %22 to i32                  ; <i32> [#uses=1]
+  %24 = fsub double %22, undef                    ; <double> [#uses=1]
+  %not.461 = fcmp oge double %24, 5.000000e-01    ; <i1> [#uses=1]
+  %25 = zext i1 %not.461 to i32                   ; <i32> [#uses=1]
+  %iftmp.43.0 = add i32 %23, %iftmp.41.0.neg      ; <i32> [#uses=1]
+  %26 = add i32 %iftmp.43.0, %25                  ; <i32> [#uses=1]
+  %27 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 10 ; <i32*> [#uses=1]
+  store i32 %26, i32* %27, align 4
+  %28 = fptosi double undef to i32                ; <i32> [#uses=1]
+  %iftmp.45.0 = add i32 %28, %iftmp.40.0.neg      ; <i32> [#uses=1]
+  %29 = add i32 %iftmp.45.0, 0                    ; <i32> [#uses=1]
+  store i32 %29, i32* undef, align 4
+  br label %bb43.loopexit
+
+bb36:                                             ; preds = %bb43.loopexit, %bb36
+  %termptr.0478 = phi %struct.termbox* [ %42, %bb36 ], [ %7, %bb43.loopexit ] ; <%struct.termbox*> [#uses=1]
+  %30 = load i32* undef, align 4                  ; <i32> [#uses=1]
+  %31 = sub nsw i32 %30, %9                       ; <i32> [#uses=1]
+  %32 = sitofp i32 %31 to double                  ; <double> [#uses=1]
+  %33 = fdiv double %32, 0.000000e+00             ; <double> [#uses=1]
+  %34 = fptosi double %33 to i32                  ; <i32> [#uses=1]
+  %iftmp.46.0 = add i32 %34, %iftmp.41.0.neg      ; <i32> [#uses=1]
+  %35 = add i32 %iftmp.46.0, 0                    ; <i32> [#uses=1]
+  store i32 %35, i32* undef, align 4
+  %36 = sub nsw i32 0, %11                        ; <i32> [#uses=1]
+  %37 = sitofp i32 %36 to double                  ; <double> [#uses=1]
+  %38 = fmul double %37, 0.000000e+00             ; <double> [#uses=1]
+  %39 = fptosi double %38 to i32                  ; <i32> [#uses=1]
+  %iftmp.47.0 = add i32 %39, %iftmp.40.0.neg      ; <i32> [#uses=1]
+  %40 = add i32 %iftmp.47.0, 0                    ; <i32> [#uses=1]
+  store i32 %40, i32* undef, align 4
+  %41 = getelementptr inbounds %struct.termbox* %termptr.0478, i32 0, i32 0 ; <%struct.termbox**> [#uses=1]
+  %42 = load %struct.termbox** %41, align 4       ; <%struct.termbox*> [#uses=2]
+  %43 = icmp eq %struct.termbox* %42, null        ; <i1> [#uses=1]
+  br i1 %43, label %bb52.loopexit, label %bb36
+
+bb43.loopexit:                                    ; preds = %bb21, %bb13
+  br i1 undef, label %bb52.loopexit, label %bb36
+
+bb52.loopexit:                                    ; preds = %bb43.loopexit, %bb36
+  %44 = icmp eq i32 %4, 0                         ; <i1> [#uses=1]
+  br i1 %44, label %bb.nph485, label %bb54
+
+bb54:                                             ; preds = %bb52.loopexit
+  switch i32 %4, label %bb62 [
+    i32 2, label %bb56
+    i32 3, label %bb57
+  ]
+
+bb56:                                             ; preds = %bb54
+  br label %bb62
+
+bb57:                                             ; preds = %bb54
+  br label %bb62
+
+bb62:                                             ; preds = %bb57, %bb56, %bb54
+  unreachable
+
+bb.nph485:                                        ; preds = %bb52.loopexit
+  br label %bb248
+
+bb248:                                            ; preds = %bb322, %bb.nph485
+  %45 = icmp eq i32 undef, %4                     ; <i1> [#uses=1]
+  br i1 %45, label %bb322, label %bb249
+
+bb249:                                            ; preds = %bb248
+  %46 = getelementptr inbounds %struct.cellbox* %2, i32 0, i32 21, i32 undef ; <%struct.tilebox**> [#uses=1]
+  %47 = load %struct.tilebox** %46, align 4       ; <%struct.tilebox*> [#uses=1]
+  %48 = getelementptr inbounds %struct.tilebox* %47, i32 0, i32 11 ; <i32*> [#uses=1]
+  store i32 undef, i32* %48, align 4
+  unreachable
+
+bb322:                                            ; preds = %bb248
+  br i1 undef, label %bb248, label %bb445
+
+bb445:                                            ; preds = %bb322, %bb10, %bb
+  %49 = call i32 (%struct.FILE*, i8*, ...)* @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; <i32> [#uses=1]
+  %50 = icmp eq i32 %49, 4                        ; <i1> [#uses=1]
+  br i1 %50, label %bb, label %return
+
+return:                                           ; preds = %bb445, %entry
+  ret void
+}
+
+declare i32 @fscanf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind
diff --git a/final/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll b/final/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
new file mode 100755
index 00000000000..ac8e80904ed
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll
@@ -0,0 +1,146 @@
+; RUN: llc < %s -march=arm -mtriple=armv4t-unknown-linux-gnueabi  | FileCheck %s
+; PR 7433
+; XFAIL: *
+
+%0 = type { i8*, i8* }
+%1 = type { i8*, i8*, i8* }
+%"class.llvm::Record" = type { i32, %"class.std::basic_string", %"class.llvm::SMLoc", %"class.std::vector", %"class.std::vector", %"class.std::vector" }
+%"class.llvm::RecordVal" = type { %"class.std::basic_string", %"struct.llvm::Init"*, i32, %"struct.llvm::Init"* }
+%"class.llvm::SMLoc" = type { i8* }
+%"class.llvm::StringInit" = type { [8 x i8], %"class.std::basic_string" }
+%"class.std::basic_string" = type { %"class.llvm::SMLoc" }
+%"class.std::vector" = type { [12 x i8] }
+%"struct.llvm::Init" = type { i32 (...)** }
+
+@_ZTIN4llvm5RecTyE = external constant %0         ; <%0*> [#uses=1]
+@_ZTIN4llvm4InitE = external constant %0          ; <%0*> [#uses=1]
+@_ZTIN4llvm11RecordRecTyE = external constant %1  ; <%1*> [#uses=1]
+@.str8 = external constant [47 x i8]              ; <[47 x i8]*> [#uses=1]
+@_ZTIN4llvm9UnsetInitE = external constant %1     ; <%1*> [#uses=1]
+@.str51 = external constant [45 x i8]             ; <[45 x i8]*> [#uses=1]
+@__PRETTY_FUNCTION__._ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs = external constant [116 x i8] ; <[116 x i8]*> [#uses=1]
+
+@_ZN4llvm9RecordValC1ERKSsPNS_5RecTyEj = alias void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32)* @_ZN4llvm9RecordValC2ERKSsPNS_5RecTyEj ; <void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32)*> [#uses=0]
+
+declare i8* @__dynamic_cast(i8*, i8*, i8*, i32)
+
+declare void @__assert_fail(i8*, i8*, i32, i8*) noreturn
+
+declare void @_ZN4llvm9RecordValC2ERKSsPNS_5RecTyEj(%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32) align 2
+
+define %"struct.llvm::Init"* @_ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs(%"class.llvm::StringInit"* %this, %"class.llvm::Record"* %R, %"class.llvm::RecordVal"* %RV, %"class.std::basic_string"* %FieldName) align 2 {
+;CHECK:  ldmia sp!, {r4, r5, r6, r7, r8, lr}
+;CHECK:  bx  r12  @ TAILCALL
+entry:
+  %.loc = alloca i32                              ; <i32*> [#uses=2]
+  %tmp.i = getelementptr inbounds %"class.llvm::StringInit"* %this, i32 0, i32 0, i32 4 ; <i8*> [#uses=1]
+  %0 = bitcast i8* %tmp.i to %"struct.llvm::Init"** ; <%"struct.llvm::Init"**> [#uses=1]
+  %tmp2.i = load %"struct.llvm::Init"** %0        ; <%"struct.llvm::Init"*> [#uses=2]
+  %1 = icmp eq %"struct.llvm::Init"* %tmp2.i, null ; <i1> [#uses=1]
+  br i1 %1, label %entry.return_crit_edge, label %tmpbb
+
+entry.return_crit_edge:                           ; preds = %entry
+  br label %return
+
+tmpbb:                                            ; preds = %entry
+  %2 = bitcast %"struct.llvm::Init"* %tmp2.i to i8* ; <i8*> [#uses=1]
+  %3 = tail call i8* @__dynamic_cast(i8* %2, i8* bitcast (%0* @_ZTIN4llvm5RecTyE to i8*), i8* bitcast (%1* @_ZTIN4llvm11RecordRecTyE to i8*), i32 -1) ; <i8*> [#uses=1]
+  %phitmp = icmp eq i8* %3, null                  ; <i1> [#uses=1]
+  br i1 %phitmp, label %.return_crit_edge, label %if.then
+
+.return_crit_edge:                                ; preds = %tmpbb
+  br label %return
+
+if.then:                                          ; preds = %tmpbb
+  %tmp2.i.i.i.i = getelementptr inbounds %"class.llvm::StringInit"* %this, i32 0, i32 1, i32 0, i32 0 ; <i8**> [#uses=1]
+  %tmp3.i.i.i.i = load i8** %tmp2.i.i.i.i         ; <i8*> [#uses=2]
+  %arrayidx.i.i.i.i = getelementptr inbounds i8* %tmp3.i.i.i.i, i32 -12 ; <i8*> [#uses=1]
+  %tmp.i.i.i = bitcast i8* %arrayidx.i.i.i.i to i32* ; <i32*> [#uses=1]
+  %tmp2.i.i.i = load i32* %tmp.i.i.i              ; <i32> [#uses=1]
+  %tmp.i5 = getelementptr inbounds %"class.llvm::Record"* %R, i32 0, i32 4 ; <%"class.std::vector"*> [#uses=1]
+  %tmp2.i.i = getelementptr inbounds %"class.llvm::Record"* %R, i32 0, i32 4, i32 0, i32 4 ; <i8*> [#uses=1]
+  %4 = bitcast i8* %tmp2.i.i to %"class.llvm::RecordVal"** ; <%"class.llvm::RecordVal"**> [#uses=1]
+  %tmp3.i.i6 = load %"class.llvm::RecordVal"** %4 ; <%"class.llvm::RecordVal"*> [#uses=1]
+  %tmp5.i.i = bitcast %"class.std::vector"* %tmp.i5 to %"class.llvm::RecordVal"** ; <%"class.llvm::RecordVal"**> [#uses=1]
+  %tmp6.i.i = load %"class.llvm::RecordVal"** %tmp5.i.i ; <%"class.llvm::RecordVal"*> [#uses=5]
+  %sub.ptr.lhs.cast.i.i = ptrtoint %"class.llvm::RecordVal"* %tmp3.i.i6 to i32 ; <i32> [#uses=1]
+  %sub.ptr.rhs.cast.i.i = ptrtoint %"class.llvm::RecordVal"* %tmp6.i.i to i32 ; <i32> [#uses=1]
+  %sub.ptr.sub.i.i = sub i32 %sub.ptr.lhs.cast.i.i, %sub.ptr.rhs.cast.i.i ; <i32> [#uses=1]
+  %sub.ptr.div.i.i = ashr i32 %sub.ptr.sub.i.i, 4 ; <i32> [#uses=1]
+  br label %codeRepl
+
+codeRepl:                                         ; preds = %if.then
+  %targetBlock = call i1 @_ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs_for.cond.i(i32 %sub.ptr.div.i.i, %"class.llvm::RecordVal"* %tmp6.i.i, i32 %tmp2.i.i.i, i8* %tmp3.i.i.i.i, i32* %.loc) ; <i1> [#uses=1]
+  %.reload = load i32* %.loc                      ; <i32> [#uses=3]
+  br i1 %targetBlock, label %for.cond.i.return_crit_edge, label %_ZN4llvm6Record8getValueENS_9StringRefE.exit
+
+for.cond.i.return_crit_edge:                      ; preds = %codeRepl
+  br label %return
+
+_ZN4llvm6Record8getValueENS_9StringRefE.exit:     ; preds = %codeRepl
+  %add.ptr.i.i = getelementptr inbounds %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload ; <%"class.llvm::RecordVal"*> [#uses=2]
+  %tobool5 = icmp eq %"class.llvm::RecordVal"* %add.ptr.i.i, null ; <i1> [#uses=1]
+  br i1 %tobool5, label %_ZN4llvm6Record8getValueENS_9StringRefE.exit.return_crit_edge, label %if.then6
+
+_ZN4llvm6Record8getValueENS_9StringRefE.exit.return_crit_edge: ; preds = %_ZN4llvm6Record8getValueENS_9StringRefE.exit
+  br label %return
+
+if.then6:                                         ; preds = %_ZN4llvm6Record8getValueENS_9StringRefE.exit
+  %cmp = icmp eq %"class.llvm::RecordVal"* %add.ptr.i.i, %RV ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then6.if.end_crit_edge, label %land.lhs.true
+
+if.then6.if.end_crit_edge:                        ; preds = %if.then6
+  br label %if.end
+
+land.lhs.true:                                    ; preds = %if.then6
+  %tobool10 = icmp eq %"class.llvm::RecordVal"* %RV, null ; <i1> [#uses=1]
+  br i1 %tobool10, label %lor.lhs.false, label %land.lhs.true.return_crit_edge
+
+land.lhs.true.return_crit_edge:                   ; preds = %land.lhs.true
+  br label %return
+
+lor.lhs.false:                                    ; preds = %land.lhs.true
+  %tmp.i3 = getelementptr inbounds %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload, i32 3 ; <%"struct.llvm::Init"**> [#uses=1]
+  %tmp2.i4 = load %"struct.llvm::Init"** %tmp.i3  ; <%"struct.llvm::Init"*> [#uses=2]
+  %5 = icmp eq %"struct.llvm::Init"* %tmp2.i4, null ; <i1> [#uses=1]
+  br i1 %5, label %lor.lhs.false.if.end_crit_edge, label %tmpbb1
+
+lor.lhs.false.if.end_crit_edge:                   ; preds = %lor.lhs.false
+  br label %if.end
+
+tmpbb1:                                           ; preds = %lor.lhs.false
+  %6 = bitcast %"struct.llvm::Init"* %tmp2.i4 to i8* ; <i8*> [#uses=1]
+  %7 = tail call i8* @__dynamic_cast(i8* %6, i8* bitcast (%0* @_ZTIN4llvm4InitE to i8*), i8* bitcast (%1* @_ZTIN4llvm9UnsetInitE to i8*), i32 -1) ; <i8*> [#uses=1]
+  %phitmp32 = icmp eq i8* %7, null                ; <i1> [#uses=1]
+  br i1 %phitmp32, label %.if.end_crit_edge, label %.return_crit_edge1
+
+.return_crit_edge1:                               ; preds = %tmpbb1
+  br label %return
+
+.if.end_crit_edge:                                ; preds = %tmpbb1
+  br label %if.end
+
+if.end:                                           ; preds = %.if.end_crit_edge, %lor.lhs.false.if.end_crit_edge, %if.then6.if.end_crit_edge
+  %tmp.i1 = getelementptr inbounds %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload, i32 3 ; <%"struct.llvm::Init"**> [#uses=1]
+  %tmp2.i2 = load %"struct.llvm::Init"** %tmp.i1  ; <%"struct.llvm::Init"*> [#uses=3]
+  %8 = bitcast %"class.llvm::StringInit"* %this to %"struct.llvm::Init"* ; <%"struct.llvm::Init"*> [#uses=1]
+  %cmp19 = icmp eq %"struct.llvm::Init"* %tmp2.i2, %8 ; <i1> [#uses=1]
+  br i1 %cmp19, label %cond.false, label %cond.end
+
+cond.false:                                       ; preds = %if.end
+  tail call void @__assert_fail(i8* getelementptr inbounds ([45 x i8]* @.str51, i32 0, i32 0), i8* getelementptr inbounds ([47 x i8]* @.str8, i32 0, i32 0), i32 1141, i8* getelementptr inbounds ([116 x i8]* @__PRETTY_FUNCTION__._ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs, i32 0, i32 0)) noreturn
+  unreachable
+
+cond.end:                                         ; preds = %if.end
+  %9 = bitcast %"struct.llvm::Init"* %tmp2.i2 to %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*** ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)***> [#uses=1]
+  %10 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*** %9 ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**> [#uses=1]
+  %vfn = getelementptr inbounds %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)** %10, i32 8 ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**> [#uses=1]
+  %11 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)** %vfn ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*> [#uses=1]
+  %call25 = tail call %"struct.llvm::Init"* %11(%"struct.llvm::Init"* %tmp2.i2, %"class.llvm::Record"* %R, %"class.llvm::RecordVal"* %RV, %"class.std::basic_string"* %FieldName) ; <%"struct.llvm::Init"*> [#uses=1]
+  ret %"struct.llvm::Init"* %call25
+
+return:                                           ; preds = %.return_crit_edge1, %land.lhs.true.return_crit_edge, %_ZN4llvm6Record8getValueENS_9StringRefE.exit.return_crit_edge, %for.cond.i.return_crit_edge, %.return_crit_edge, %entry.return_crit_edge
+  ret %"struct.llvm::Init"* null
+}
+
+declare i1 @_ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs_for.cond.i(i32, %"class.llvm::RecordVal"*, i32, i8*, i32*)
diff --git a/final/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll b/final/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
new file mode 100644
index 00000000000..cdb11c71fc0
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin3.0.0-iphoneos"
+
+@length = common global i32 0, align 4            ; <i32*> [#uses=1]
+
+define void @x0(i8* nocapture %buf, i32 %nbytes) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8* %buf}, i64 0, metadata !0), !dbg !15
+  tail call void @llvm.dbg.value(metadata !{i32 %nbytes}, i64 0, metadata !8), !dbg !16
+  %tmp = load i32* @length, !dbg !17              ; <i32> [#uses=3]
+  %cmp = icmp eq i32 %tmp, -1, !dbg !17           ; <i1> [#uses=1]
+  %cmp.not = xor i1 %cmp, true                    ; <i1> [#uses=1]
+  %cmp3 = icmp ult i32 %tmp, %nbytes, !dbg !17    ; <i1> [#uses=1]
+  %or.cond = and i1 %cmp.not, %cmp3               ; <i1> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{i32 %tmp}, i64 0, metadata !8), !dbg !17
+  %nbytes.addr.0 = select i1 %or.cond, i32 %tmp, i32 %nbytes ; <i32> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !18, i64 0, metadata !10), !dbg !19
+  br label %while.cond, !dbg !20
+
+while.cond:                                       ; preds = %while.body, %entry
+  %0 = phi i32 [ 0, %entry ], [ %inc, %while.body ] ; <i32> [#uses=3]
+  %buf.addr.0 = getelementptr i8* %buf, i32 %0    ; <i8*> [#uses=1]
+  %cmp7 = icmp ult i32 %0, %nbytes.addr.0, !dbg !20 ; <i1> [#uses=1]
+  br i1 %cmp7, label %land.rhs, label %while.end, !dbg !20
+
+land.rhs:                                         ; preds = %while.cond
+  %call = tail call i32 @x1() nounwind optsize, !dbg !20 ; <i32> [#uses=2]
+  %cmp9 = icmp eq i32 %call, -1, !dbg !20         ; <i1> [#uses=1]
+  br i1 %cmp9, label %while.end, label %while.body, !dbg !20
+
+while.body:                                       ; preds = %land.rhs
+  %conv = trunc i32 %call to i8, !dbg !21         ; <i8> [#uses=1]
+  store i8 %conv, i8* %buf.addr.0, !dbg !21
+  %inc = add i32 %0, 1, !dbg !23                  ; <i32> [#uses=1]
+  br label %while.cond, !dbg !24
+
+while.end:                                        ; preds = %land.rhs, %while.cond
+  ret void, !dbg !25
+}
+
+declare i32 @x1() optsize
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.lv.fn = !{!0, !8, !10, !12}
+!llvm.dbg.gv = !{!14}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"buf", metadata !2, i32 4, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"x0", metadata !"x0", metadata !"x0", metadata !2, i32 5, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"t.c", metadata !"/private/tmp", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 12, metadata !"t.c", metadata !".", metadata !"clang 2.0", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{null}
+!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 524324, metadata !2, metadata !"unsigned char", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 524545, metadata !1, metadata !"nbytes", metadata !2, i32 4, metadata !9} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 524324, metadata !2, metadata !"unsigned long", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 524544, metadata !11, metadata !"nread", metadata !2, i32 6, metadata !9} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 524299, metadata !1, i32 5, i32 1} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 524544, metadata !11, metadata !"c", metadata !2, i32 7, metadata !13} ; [ DW_TAG_auto_variable ]
+!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 524340, i32 0, metadata !2, metadata !"length", metadata !"length", metadata !"length", metadata !2, i32 1, metadata !13, i1 false, i1 true, i32* @length} ; [ DW_TAG_variable ]
+!15 = metadata !{i32 4, i32 24, metadata !1, null}
+!16 = metadata !{i32 4, i32 43, metadata !1, null}
+!17 = metadata !{i32 9, i32 2, metadata !11, null}
+!18 = metadata !{i32 0}
+!19 = metadata !{i32 10, i32 2, metadata !11, null}
+!20 = metadata !{i32 11, i32 2, metadata !11, null}
+!21 = metadata !{i32 12, i32 3, metadata !22, null}
+!22 = metadata !{i32 524299, metadata !11, i32 11, i32 45} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 13, i32 3, metadata !22, null}
+!24 = metadata !{i32 14, i32 2, metadata !22, null}
+!25 = metadata !{i32 15, i32 1, metadata !11, null}
diff --git a/final/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/final/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
new file mode 100644
index 00000000000..b9d5600d2ad
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -O0 -mcpu=cortex-a8 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; This tests the fast register allocator's handling of partial redefines:
+;
+;      %reg1028:dsub_0<def>, %reg1028:dsub_1<def> = VLD1q64 %reg1025...
+;      %reg1030:dsub_1<def> = COPY %reg1028:dsub_0<kill>
+;
+; %reg1028 gets allocated %Q0, and if %reg1030 is reloaded for the partial
+; redef, it cannot also get %Q0.
+
+; CHECK: vld1.64 {d16, d17}, [r{{.}}]
+; CHECK-NOT: vld1.64 {d16, d17}
+; CHECK: vmov.f64 d19, d16
+
+define i32 @test(i8* %arg) nounwind {
+entry:
+ %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %arg, i32 1)
+ %1 = shufflevector <2 x i64> undef, <2 x i64> %0, <2 x i32> <i32 1, i32 2>
+ store <2 x i64> %1, <2 x i64>* undef, align 16
+ ret i32 undef
+}
+
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly
diff --git a/final/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll b/final/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll
new file mode 100644
index 00000000000..984583e8068
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -mattr=+neon
+
+@.str271 = external constant [21 x i8], align 4   ; <[21 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32, i8**)* @main to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+  %0 = shufflevector <2 x i64> undef, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 2> ; <<2 x i64>> [#uses=1]
+  store <2 x i64> %0, <2 x i64>* undef, align 16
+  %val4723 = load <8 x i16>* undef                ; <<8 x i16>> [#uses=1]
+  call void @PrintShortX(i8* getelementptr inbounds ([21 x i8]* @.str271, i32 0, i32 0), <8 x i16> %val4723, i32 0) nounwind
+  ret i32 undef
+}
+
+declare void @PrintShortX(i8*, <8 x i16>, i32) nounwind
diff --git a/final/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll b/final/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
new file mode 100644
index 00000000000..c03c8154594
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-07-26-GlobalMerge.ll
@@ -0,0 +1,95 @@
+; RUN: llc -enable-correct-eh-support < %s
+; PR7716
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10.0.0"
+
+%0 = type { i8*, i8* }
+%struct.A = type { i32 }
+
+@d = internal global i32 0, align 4               ; <i32*> [#uses=6]
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8* ; <i8**> [#uses=1]
+@_ZTS1A = internal constant [3 x i8] c"1A\00"     ; <[3 x i8]*> [#uses=1]
+@_ZTI1A = internal constant %0 { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i32 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1A, i32 0, i32 0) } ; <%0*> [#uses=1]
+@.str2 = private constant [18 x i8] c"c == %d, d == %d\0A\00" ; <[18 x i8]*> [#uses=1]
+@.str3 = private constant [16 x i8] c"A(const A&) %d\0A\00" ; <[16 x i8]*> [#uses=1]
+@.str4 = private constant [9 x i8] c"~A() %d\0A\00" ; <[9 x i8]*> [#uses=1]
+@.str5 = private constant [8 x i8] c"A() %d\0A\00" ; <[8 x i8]*> [#uses=1]
+@str = internal constant [14 x i8] c"Throwing 1...\00" ; <[14 x i8]*> [#uses=1]
+@str1 = internal constant [8 x i8] c"Caught.\00"  ; <[8 x i8]*> [#uses=1]
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare i8* @__cxa_allocate_exception(i32)
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @__gxx_personality_sj0(...)
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
+
+declare void @_Unwind_SjLj_Resume(i8*)
+
+define internal void @_ZN1AD1Ev(%struct.A* nocapture %this) nounwind ssp align 2 {
+entry:
+  %tmp.i = getelementptr inbounds %struct.A* %this, i32 0, i32 0 ; <i32*> [#uses=1]
+  %tmp2.i = load i32* %tmp.i                      ; <i32> [#uses=1]
+  %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 %tmp2.i) nounwind ; <i32> [#uses=0]
+  %tmp3.i = load i32* @d                          ; <i32> [#uses=1]
+  %inc.i = add nsw i32 %tmp3.i, 1                 ; <i32> [#uses=1]
+  store i32 %inc.i, i32* @d
+  ret void
+}
+
+declare void @__cxa_throw(i8*, i8*, i8*)
+
+define i32 @main() ssp {
+entry:
+  %puts.i = tail call i32 @puts(i8* getelementptr inbounds ([14 x i8]* @str, i32 0, i32 0)) ; <i32> [#uses=0]
+  %exception.i = tail call i8* @__cxa_allocate_exception(i32 4) nounwind ; <i8*> [#uses=2]
+  %tmp2.i.i.i = bitcast i8* %exception.i to i32*  ; <i32*> [#uses=1]
+  store i32 1, i32* %tmp2.i.i.i
+  %call.i.i.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([8 x i8]* @.str5, i32 0, i32 0), i32 1) nounwind ; <i32> [#uses=0]
+  invoke void @__cxa_throw(i8* %exception.i, i8* bitcast (%0* @_ZTI1A to i8*), i8* bitcast (void (%struct.A*)* @_ZN1AD1Ev to i8*)) noreturn
+          to label %.noexc unwind label %lpad
+
+.noexc:                                           ; preds = %entry
+  unreachable
+
+try.cont:                                         ; preds = %lpad
+  %0 = tail call i8* @__cxa_get_exception_ptr(i8* %exn) nounwind ; <i8*> [#uses=0]
+  %call.i.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([16 x i8]* @.str3, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0]
+  %1 = tail call i8* @__cxa_begin_catch(i8* %exn) nounwind ; <i8*> [#uses=0]
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([8 x i8]* @str1, i32 0, i32 0)) ; <i32> [#uses=0]
+  %call.i.i3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([9 x i8]* @.str4, i32 0, i32 0), i32 2) nounwind ; <i32> [#uses=0]
+  %tmp3.i.i = load i32* @d                        ; <i32> [#uses=1]
+  %inc.i.i4 = add nsw i32 %tmp3.i.i, 1            ; <i32> [#uses=1]
+  store i32 %inc.i.i4, i32* @d
+  tail call void @__cxa_end_catch()
+  %tmp13 = load i32* @d                           ; <i32> [#uses=1]
+  %call14 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([18 x i8]* @.str2, i32 0, i32 0), i32 2, i32 %tmp13) ; <i32> [#uses=0]
+  %tmp16 = load i32* @d                           ; <i32> [#uses=1]
+  %cmp = icmp ne i32 %tmp16, 2                    ; <i1> [#uses=1]
+  %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
+  ret i32 %conv
+
+lpad:                                             ; preds = %entry
+  %exn = tail call i8* @llvm.eh.exception() nounwind ; <i8*> [#uses=4]
+  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i8* bitcast (%0* @_ZTI1A to i8*), i8* null) nounwind ; <i32> [#uses=1]
+  %2 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (%0* @_ZTI1A to i8*)) nounwind ; <i32> [#uses=1]
+  %3 = icmp eq i32 %eh.selector, %2               ; <i1> [#uses=1]
+  br i1 %3, label %try.cont, label %eh.resume
+
+eh.resume:                                        ; preds = %lpad
+  tail call void @_Unwind_SjLj_Resume(i8* %exn) noreturn
+  unreachable
+}
+
+declare i8* @__cxa_get_exception_ptr(i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+declare i32 @puts(i8* nocapture) nounwind
diff --git a/final/test/CodeGen/ARM/2010-08-04-EHCrash.ll b/final/test/CodeGen/ARM/2010-08-04-EHCrash.ll
new file mode 100644
index 00000000000..f57b7e67694
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-08-04-EHCrash.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+; <rdar://problem/8264008>
+
+define linkonce_odr arm_apcscc void @func1() {
+entry:
+  %save_filt.936 = alloca i32                     ; <i32*> [#uses=2]
+  %save_eptr.935 = alloca i8*                     ; <i8**> [#uses=2]
+  %eh_exception = alloca i8*                      ; <i8**> [#uses=5]
+  %eh_selector = alloca i32                       ; <i32*> [#uses=3]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call arm_apcscc  void @func2()
+  br label %return
+
+bb:                                               ; No predecessors!
+  %eh_select = load i32* %eh_selector             ; <i32> [#uses=1]
+  store i32 %eh_select, i32* %save_filt.936, align 4
+  %eh_value = load i8** %eh_exception             ; <i8*> [#uses=1]
+  store i8* %eh_value, i8** %save_eptr.935, align 4
+  invoke arm_apcscc  void @func3()
+          to label %invcont unwind label %lpad
+
+invcont:                                          ; preds = %bb
+  %tmp6 = load i8** %save_eptr.935, align 4          ; <i8*> [#uses=1]
+  store i8* %tmp6, i8** %eh_exception, align 4
+  %tmp7 = load i32* %save_filt.936, align 4          ; <i32> [#uses=1]
+  store i32 %tmp7, i32* %eh_selector, align 4
+  br label %Unwind
+
+bb12:                                             ; preds = %ppad
+  call arm_apcscc  void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+
+return:                                           ; preds = %entry
+  ret void
+
+lpad:                                             ; preds = %bb
+  %eh_ptr = call i8* @llvm.eh.exception()         ; <i8*> [#uses=1]
+  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr13 = load i8** %eh_exception             ; <i8*> [#uses=1]
+  %eh_select14 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr13, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 1)
+  store i32 %eh_select14, i32* %eh_selector
+  br label %ppad
+
+ppad:
+  br label %bb12
+
+Unwind:
+  %eh_ptr15 = load i8** %eh_exception
+  call arm_apcscc  void @_Unwind_SjLj_Resume(i8* %eh_ptr15)
+  unreachable
+}
+
+declare arm_apcscc void @func2()
+
+declare arm_apcscc void @_ZSt9terminatev() noreturn nounwind
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare arm_apcscc void @_Unwind_SjLj_Resume(i8*)
+
+declare arm_apcscc void @func3()
+
+declare arm_apcscc i32 @__gxx_personality_sj0(...)
diff --git a/final/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/final/test/CodeGen/ARM/2010-08-04-StackVariable.ll
new file mode 100644
index 00000000000..f077d04803b
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -0,0 +1,124 @@
+; RUN: llc -O0 -mtriple=arm-apple-darwin < %s | grep DW_OP_fbreg
+; Use DW_OP_fbreg in variable's location expression if the variable is in a stack slot.
+
+%struct.SVal = type { i8*, i32 }
+
+define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp {
+entry:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !23), !dbg !24
+  call void @llvm.dbg.value(metadata !{%struct.SVal* %location}, i64 0, metadata !25), !dbg !24
+  %0 = icmp ne i32 %i, 0, !dbg !27                ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb1, !dbg !27
+
+bb:                                               ; preds = %entry
+  %1 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !29 ; <i32*> [#uses=1]
+  %2 = load i32* %1, align 8, !dbg !29            ; <i32> [#uses=1]
+  %3 = add i32 %2, %i, !dbg !29                   ; <i32> [#uses=1]
+  br label %bb2, !dbg !29
+
+bb1:                                              ; preds = %entry
+  %4 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !30 ; <i32*> [#uses=1]
+  %5 = load i32* %4, align 8, !dbg !30            ; <i32> [#uses=1]
+  %6 = sub i32 %5, 1, !dbg !30                    ; <i32> [#uses=1]
+  br label %bb2, !dbg !30
+
+bb2:                                              ; preds = %bb1, %bb
+  %.0 = phi i32 [ %3, %bb ], [ %6, %bb1 ]         ; <i32> [#uses=1]
+  br label %return, !dbg !29
+
+return:                                           ; preds = %bb2
+  ret i32 %.0, !dbg !29
+}
+
+define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 {
+entry:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.value(metadata !{%struct.SVal* %this}, i64 0, metadata !31), !dbg !34
+  %0 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 0, !dbg !34 ; <i8**> [#uses=1]
+  store i8* null, i8** %0, align 8, !dbg !34
+  %1 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 1, !dbg !34 ; <i32*> [#uses=1]
+  store i32 0, i32* %1, align 8, !dbg !34
+  br label %return, !dbg !34
+
+return:                                           ; preds = %entry
+  ret void, !dbg !35
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @main() nounwind ssp {
+entry:
+  %0 = alloca %struct.SVal                        ; <%struct.SVal*> [#uses=3]
+  %v = alloca %struct.SVal                        ; <%struct.SVal*> [#uses=4]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.SVal* %v}, metadata !38), !dbg !41
+  call void @_ZN4SValC1Ev(%struct.SVal* %v) nounwind, !dbg !41
+  %1 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !42 ; <i32*> [#uses=1]
+  store i32 1, i32* %1, align 8, !dbg !42
+  %2 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
+  %3 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
+  %4 = load i8** %3, align 8, !dbg !43            ; <i8*> [#uses=1]
+  store i8* %4, i8** %2, align 8, !dbg !43
+  %5 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
+  %6 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
+  %7 = load i32* %6, align 8, !dbg !43            ; <i32> [#uses=1]
+  store i32 %7, i32* %5, align 8, !dbg !43
+  %8 = call i32 @_Z3fooi4SVal(i32 2, %struct.SVal* noalias %0) nounwind, !dbg !43 ; <i32> [#uses=0]
+  call void @llvm.dbg.value(metadata !{i32 %8}, i64 0, metadata !44), !dbg !43
+  br label %return, !dbg !45
+
+return:                                           ; preds = %entry
+  ret i32 0, !dbg !45
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !9, !16, !17, !20}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524307, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
+!2 = metadata !{i32 524329, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 4, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
+!5 = metadata !{i32 524301, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 524301, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
+!8 = metadata !{i32 524324, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{null, metadata !12, metadata !13}
+!12 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!15 = metadata !{null, metadata !12}
+!16 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ]
+!18 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!19 = metadata !{metadata !13, metadata !13, metadata !1}
+!20 = metadata !{i32 524334, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!21 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!22 = metadata !{metadata !13}
+!23 = metadata !{i32 524545, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 16, i32 0, metadata !17, null}
+!25 = metadata !{i32 524545, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26} ; [ DW_TAG_arg_variable ]
+!26 = metadata !{i32 524304, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
+!27 = metadata !{i32 17, i32 0, metadata !28, null}
+!28 = metadata !{i32 524299, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 18, i32 0, metadata !28, null}
+!30 = metadata !{i32 20, i32 0, metadata !28, null}
+!31 = metadata !{i32 524545, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32} ; [ DW_TAG_arg_variable ]
+!32 = metadata !{i32 524326, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
+!33 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
+!34 = metadata !{i32 11, i32 0, metadata !16, null}
+!35 = metadata !{i32 11, i32 0, metadata !36, null}
+!36 = metadata !{i32 524299, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!37 = metadata !{i32 524299, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!38 = metadata !{i32 524544, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1} ; [ DW_TAG_auto_variable ]
+!39 = metadata !{i32 524299, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 524299, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 24, i32 0, metadata !39, null}
+!42 = metadata !{i32 25, i32 0, metadata !39, null}
+!43 = metadata !{i32 26, i32 0, metadata !39, null}
+!44 = metadata !{i32 524544, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13} ; [ DW_TAG_auto_variable ]
+!45 = metadata !{i32 27, i32 0, metadata !39, null}
diff --git a/final/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll b/final/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll
new file mode 100644
index 00000000000..d2820918626
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-09-21-OptCmpBug.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+
+declare noalias i8* @malloc(i32) nounwind
+
+define internal void @gl_DrawPixels(i32 %width, i32 %height, i32 %format, i32 %type, i8* %pixels) nounwind {
+entry:
+  br i1 undef, label %bb3.i, label %bb3
+
+bb3.i:                                            ; preds = %entry
+  unreachable
+
+gl_error.exit:                                    ; preds = %bb22
+  ret void
+
+bb3:                                              ; preds = %entry
+  br i1 false, label %bb5, label %bb4
+
+bb4:                                              ; preds = %bb3
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb3
+  br i1 undef, label %bb19, label %bb22
+
+bb19:                                             ; preds = %bb5
+  switch i32 %type, label %bb3.i6.i [
+    i32 5120, label %bb1.i13
+    i32 5121, label %bb1.i13
+    i32 6656, label %bb9.i.i6
+  ]
+
+bb9.i.i6:                                         ; preds = %bb19
+  br label %bb1.i13
+
+bb3.i6.i:                                         ; preds = %bb19
+  unreachable
+
+bb1.i13:                                          ; preds = %bb9.i.i6, %bb19, %bb19
+  br i1 undef, label %bb3.i17, label %bb2.i16
+
+bb2.i16:                                          ; preds = %bb1.i13
+  unreachable
+
+bb3.i17:                                          ; preds = %bb1.i13
+  br i1 undef, label %bb4.i18, label %bb23.i
+
+bb4.i18:                                          ; preds = %bb3.i17
+  %0 = mul nsw i32 %height, %width
+  %1 = and i32 %0, 7
+  %not..i = icmp ne i32 %1, 0
+  %2 = zext i1 %not..i to i32
+  %storemerge2.i = add i32 0, %2
+  %3 = call noalias i8* @malloc(i32 %storemerge2.i) nounwind
+  br i1 undef, label %bb3.i9, label %bb9.i
+
+bb9.i:                                            ; preds = %bb4.i18
+  br i1 undef, label %bb13.i19, label %bb.i24.i
+
+bb13.i19:                                         ; preds = %bb9.i
+  br i1 undef, label %bb14.i20, label %bb15.i
+
+bb14.i20:                                         ; preds = %bb13.i19
+  unreachable
+
+bb15.i:                                           ; preds = %bb13.i19
+  unreachable
+
+bb.i24.i:                                         ; preds = %bb.i24.i, %bb9.i
+  %storemerge1.i21.i = phi i32 [ %4, %bb.i24.i ], [ 0, %bb9.i ]
+  %4 = add i32 %storemerge1.i21.i, 1
+  %exitcond47.i = icmp eq i32 %4, %storemerge2.i
+  br i1 %exitcond47.i, label %bb22, label %bb.i24.i
+
+bb23.i:                                           ; preds = %bb3.i17
+  unreachable
+
+bb3.i9:                                           ; preds = %bb4.i18
+  unreachable
+
+bb22:                                             ; preds = %bb.i24.i, %bb5
+  br i1 undef, label %gl_error.exit, label %bb23
+
+bb23:                                             ; preds = %bb22
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll b/final/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
new file mode 100644
index 00000000000..bda14bcb152
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-09-29-mc-asm-header-test.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s
+; This tests that MC/asm header conversion is smooth
+;
+; CHECK:      .syntax unified
+; CHECK: .eabi_attribute 20, 1
+; CHECK: .eabi_attribute 21, 1
+; CHECK: .eabi_attribute 23, 3
+; CHECK: .eabi_attribute 24, 1
+; CHECK: .eabi_attribute 25, 1
+
+define i32 @f(i64 %z) {
+	ret i32 0
+}
diff --git a/final/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll b/final/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
new file mode 100644
index 00000000000..ee443febcc1
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
@@ -0,0 +1,37 @@
+; RUN: llc  %s -mtriple=arm-linux-gnueabi -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=BASIC %s 
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -march=arm -mcpu=cortex-a8 \
+; RUN:    -mattr=-neon -mattr=+vfp2 \
+; RUN:    -arm-reserve-r9 -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=CORTEXA8 %s
+
+
+; This tests that the extpected ARM attributes are emitted.
+;
+; BASIC:        .ARM.attributes
+; BASIC-NEXT:         0x70000003
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         0x0000003c
+; BASIC-NEXT:         0x00000020
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         0x00000001
+; BASIC-NEXT:         0x00000000
+; BASIC-NEXT:         '411f0000 00616561 62690001 15000000 06020801 09011401 15011703 18011901'
+
+; CORTEXA8:        .ARM.attributes
+; CORTEXA8-NEXT:         0x70000003
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         0x0000003c
+; CORTEXA8-NEXT:         0x0000002f
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         0x00000001
+; CORTEXA8-NEXT:         0x00000000
+; CORTEXA8-NEXT:         '412e0000 00616561 62690001 24000000 05434f52 5445582d 41380006 0a074108 0109020a 02140115 01170318 011901'
+
+define i32 @f(i64 %z) {
+       ret i32 0
+}
diff --git a/final/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll b/final/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll
new file mode 100644
index 00000000000..163c9b030ec
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=arm1136jf-s | FileCheck %s
+; Radar 8589805: Counting the number of microcoded operations, such as for an
+; LDM instruction, was causing an assertion failure because the microop count
+; was being treated as an instruction count.
+
+; CHECK: push
+; CHECK: ldmia
+; CHECK: ldmia
+; CHECK: ldmia
+
+define i32 @test(i32 %x) {
+entry:
+  %0 = tail call signext i16 undef(i32* undef)
+  switch i32 undef, label %bb3 [
+    i32 0, label %bb4
+    i32 1, label %bb1
+    i32 2, label %bb2
+  ]
+
+bb1:
+  ret i32 1
+
+bb2:
+  ret i32 2
+
+bb3:
+  ret i32 1
+
+bb4:
+  ret i32 3
+}
diff --git a/final/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll b/final/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
new file mode 100644
index 00000000000..04220949027
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-11-15-SpillEarlyClobber.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -verify-machineinstrs -spiller=standard
+; RUN: llc < %s -verify-machineinstrs -spiller=inline
+; PR8612
+;
+; This test has an inline asm with early-clobber arguments.
+; It is big enough that one of the early clobber registers is spilled.
+;
+; All the spillers would get the live ranges wrong when spilling an early
+; clobber, allowing the undef register to be allocated to the same register as
+; the early clobber.
+;
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32"
+target triple = "armv7-eabi"
+
+%0 = type { i32, i32 }
+
+define void @foo(i32* %in) nounwind {
+entry:
+  br label %bb.i
+
+bb.i:                                             ; preds = %bb.i, %entry
+  br i1 undef, label %bb10.preheader.i, label %bb.i
+
+bb10.preheader.i:                                 ; preds = %bb.i
+  br label %bb10.i
+
+bb10.i:                                           ; preds = %bb10.i, %bb10.preheader.i
+  br i1 undef, label %bb27.i, label %bb10.i
+
+bb27.i:                                           ; preds = %bb10.i
+  br label %bb28.i
+
+bb28.i:                                           ; preds = %bb28.i, %bb27.i
+  br i1 undef, label %presymmetry.exit, label %bb28.i
+
+presymmetry.exit:                                 ; preds = %bb28.i
+  %tmp175387 = or i32 undef, 12
+  %scevgep101.i = getelementptr i32* %in, i32 undef
+  %tmp189401 = or i32 undef, 7
+  %scevgep97.i = getelementptr i32* %in, i32 undef
+  %tmp198410 = or i32 undef, 1
+  %scevgep.i48 = getelementptr i32* %in, i32 undef
+  %0 = load i32* %scevgep.i48, align 4
+  %1 = add nsw i32 %0, 0
+  store i32 %1, i32* undef, align 4
+  %asmtmp.i.i33.i.i.i = tail call %0 asm "smull\09$0, $1, $2, $3", "=&r,=&r,%r,r,~{cc}"(i32 undef, i32 1518500250) nounwind
+  %asmresult1.i.i34.i.i.i = extractvalue %0 %asmtmp.i.i33.i.i.i, 1
+  %2 = shl i32 %asmresult1.i.i34.i.i.i, 1
+  %3 = load i32* null, align 4
+  %4 = load i32* undef, align 4
+  %5 = sub nsw i32 %3, %4
+  %6 = load i32* undef, align 4
+  %7 = load i32* null, align 4
+  %8 = sub nsw i32 %6, %7
+  %9 = load i32* %scevgep97.i, align 4
+  %10 = load i32* undef, align 4
+  %11 = sub nsw i32 %9, %10
+  %12 = load i32* null, align 4
+  %13 = load i32* %scevgep101.i, align 4
+  %14 = sub nsw i32 %12, %13
+  %15 = load i32* %scevgep.i48, align 4
+  %16 = load i32* null, align 4
+  %17 = add nsw i32 %16, %15
+  %18 = sub nsw i32 %15, %16
+  %19 = load i32* undef, align 4
+  %20 = add nsw i32 %19, %2
+  %21 = sub nsw i32 %19, %2
+  %22 = add nsw i32 %14, %5
+  %23 = sub nsw i32 %5, %14
+  %24 = add nsw i32 %11, %8
+  %25 = sub nsw i32 %8, %11
+  %26 = add nsw i32 %21, %23
+  store i32 %26, i32* %scevgep.i48, align 4
+  %27 = sub nsw i32 %25, %18
+  store i32 %27, i32* null, align 4
+  %28 = sub nsw i32 %23, %21
+  store i32 %28, i32* undef, align 4
+  %29 = add nsw i32 %18, %25
+  store i32 %29, i32* undef, align 4
+  %30 = add nsw i32 %17, %22
+  store i32 %30, i32* %scevgep101.i, align 4
+  %31 = add nsw i32 %20, %24
+  store i32 %31, i32* null, align 4
+  unreachable
+}
diff --git a/final/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/final/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
new file mode 100644
index 00000000000..8d7541feae9
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-11-29-PrologueBug.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB2
+; rdar://8690640
+
+define i32* @t(i32* %x) nounwind {
+entry:
+; ARM: t:
+; ARM: push
+; ARM: mov r7, sp
+; ARM: bl _foo
+; ARM: bl _foo
+; ARM: bl _foo
+; ARM: ldmia sp!, {r7, pc}
+
+; THUMB2: t:
+; THUMB2: push
+; THUMB2: mov r7, sp
+; THUMB2: blx _foo
+; THUMB2: blx _foo
+; THUMB2: blx _foo
+; THUMB2: pop
+  %0 = tail call i32* @foo(i32* %x) nounwind
+  %1 = tail call i32* @foo(i32* %0) nounwind
+  %2 = tail call i32* @foo(i32* %1) nounwind
+  ret i32* %2
+}
+
+declare i32* @foo(i32*)
diff --git a/final/test/CodeGen/ARM/2010-11-30-reloc-movt.ll b/final/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
new file mode 100644
index 00000000000..930cd8d4156
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-11-30-reloc-movt.ll
@@ -0,0 +1,42 @@
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=OBJ %s
+
+target triple = "armv7-none-linux-gnueabi"
+
+@a = external global i8
+
+define arm_aapcs_vfpcc i32 @barf() nounwind {
+entry:
+  %0 = tail call arm_aapcs_vfpcc  i32 @foo(i8* @a) nounwind
+  ret i32 %0
+; OBJ:         '.text'
+; OBJ-NEXT:    'sh_type'
+; OBJ-NEXT:    'sh_flags'
+; OBJ-NEXT:    'sh_addr'
+; OBJ-NEXT:    'sh_offset'
+; OBJ-NEXT:    'sh_size'
+; OBJ-NEXT:    'sh_link'
+; OBJ-NEXT:    'sh_info'
+; OBJ-NEXT:    'sh_addralign'
+; OBJ-NEXT:    'sh_entsize'
+; OBJ-NEXT:    '_section_data', '00482de9 000000e3 000040e3 feffffeb 0088bde8'
+
+; OBJ:            Relocation 0x00000000
+; OBJ-NEXT:       'r_offset', 0x00000004
+; OBJ-NEXT:       'r_sym', 0x00000007
+; OBJ-NEXT:        'r_type', 0x0000002b
+
+; OBJ:          Relocation 0x00000001
+; OBJ-NEXT:       'r_offset', 0x00000008
+; OBJ-NEXT:       'r_sym'
+; OBJ-NEXT:        'r_type', 0x0000002c
+
+; OBJ:          # Relocation 0x00000002
+; OBJ-NEXT:       'r_offset', 0x0000000c
+; OBJ-NEXT:       'r_sym', 0x00000008
+; OBJ-NEXT:       'r_type', 0x0000001c
+
+}
+
+declare arm_aapcs_vfpcc i32 @foo(i8*)
+
diff --git a/final/test/CodeGen/ARM/2010-12-07-PEIBug.ll b/final/test/CodeGen/ARM/2010-12-07-PEIBug.ll
new file mode 100644
index 00000000000..c65952be3c6
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-12-07-PEIBug.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; rdar://8728956
+
+define hidden void @foo() nounwind ssp {
+entry:
+; CHECK: foo:
+; CHECK: push {r7, lr}
+; CHECK-NEXT: mov r7, sp
+; CHECK-NEXT: vpush {d8}
+; CHECK-NEXT: vpush {d10, d11}
+  %tmp40 = load <4 x i8>* undef
+  %tmp41 = extractelement <4 x i8> %tmp40, i32 2
+  %conv42 = zext i8 %tmp41 to i32
+  %conv43 = sitofp i32 %conv42 to float
+  %div44 = fdiv float %conv43, 2.560000e+02
+  %vecinit45 = insertelement <4 x float> undef, float %div44, i32 2
+  %vecinit46 = insertelement <4 x float> %vecinit45, float 1.000000e+00, i32 3
+  store <4 x float> %vecinit46, <4 x float>* undef
+  br i1 undef, label %if.then105, label %if.else109
+
+if.then105:                                       ; preds = %entry
+  br label %if.end114
+
+if.else109:                                       ; preds = %entry
+  br label %if.end114
+
+if.end114:                                        ; preds = %if.else109, %if.then105
+  %call185 = call float @bar()
+  %vecinit186 = insertelement <4 x float> undef, float %call185, i32 1
+  %call189 = call float @bar()
+  %vecinit190 = insertelement <4 x float> %vecinit186, float %call189, i32 2
+  %vecinit191 = insertelement <4 x float> %vecinit190, float 1.000000e+00, i32 3
+  store <4 x float> %vecinit191, <4 x float>* undef
+; CHECK: vpop {d10, d11}
+; CHECK-NEXT: vpop {d8}
+; CHECK-NEXT: pop {r7, pc}
+  ret void
+}
+
+declare hidden float @bar() nounwind readnone ssp
diff --git a/final/test/CodeGen/ARM/2010-12-08-tpsoft.ll b/final/test/CodeGen/ARM/2010-12-08-tpsoft.ll
new file mode 100644
index 00000000000..b8ed8199d39
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-12-08-tpsoft.ll
@@ -0,0 +1,52 @@
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -o - | \
+; RUN:    FileCheck  -check-prefix=ELFASM %s 
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=ELFOBJ %s
+
+;; Make sure that bl __aeabi_read_tp is materiazlied and fixed up correctly
+;; in the obj case. 
+
+@i = external thread_local global i32
+@a = external global i8
+@b = external global [10 x i8]
+
+define arm_aapcs_vfpcc i32 @main() nounwind {
+entry:
+  %0 = load i32* @i, align 4
+  switch i32 %0, label %bb2 [
+    i32 12, label %bb
+    i32 13, label %bb1
+  ]
+
+bb:                                               ; preds = %entry
+  %1 = tail call arm_aapcs_vfpcc  i32 @foo(i8* @a) nounwind
+  ret i32 %1
+; ELFASM:       	bl	__aeabi_read_tp
+
+
+; ELFOBJ:   '.text'
+; ELFOBJ-NEXT:  'sh_type'
+; ELFOBJ-NEXT:  'sh_flags'
+; ELFOBJ-NEXT:  'sh_addr'
+; ELFOBJ-NEXT:  'sh_offset'
+; ELFOBJ-NEXT:  'sh_size'
+; ELFOBJ-NEXT:  'sh_link'
+; ELFOBJ-NEXT:  'sh_info'
+; ELFOBJ-NEXT:  'sh_addralign'
+; ELFOBJ-NEXT:  'sh_entsize'
+;;;               BL __aeabi_read_tp is ---+
+;;;                                        V
+; ELFOBJ-NEXT:  00482de9 3c009fe5 00109fe7 feffffeb
+
+
+bb1:                                              ; preds = %entry
+  %2 = tail call arm_aapcs_vfpcc  i32 @bar(i32* bitcast ([10 x i8]* @b to i32*)) nounwind
+  ret i32 %2
+
+bb2:                                              ; preds = %entry
+  ret i32 -1
+}
+
+declare arm_aapcs_vfpcc i32 @foo(i8*)
+
+declare arm_aapcs_vfpcc i32 @bar(i32*)
diff --git a/final/test/CodeGen/ARM/2010-12-13-reloc-pic.ll b/final/test/CodeGen/ARM/2010-12-13-reloc-pic.ll
new file mode 100644
index 00000000000..d5aefbee197
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-12-13-reloc-pic.ll
@@ -0,0 +1,100 @@
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -relocation-model=pic -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=PIC01 %s
+
+;; FIXME: Reduce this test further, or even better,
+;; redo as .s -> .o test once ARM AsmParser is working better
+
+; ModuleID = 'large2.pnacl.bc'
+target triple = "armv7-none-linux-gnueabi"
+
+%struct._Bigint = type { %struct._Bigint*, i32, i32, i32, i32, [1 x i32] }
+%struct.__FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, %struct._reent*, i8*, i32 (%struct._reent*, i8*, i8*, i32)*, i32 (%struct._reent*, i8*, i8*, i32)*, i32 (%struct._reent*, i8*, i32, i32)*, i32 (%struct._reent*, i8*)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i32, %struct._flock_t, %struct._mbstate_t, i32 }
+%struct.__sbuf = type { i8*, i32 }
+%struct.__tm = type { i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+%struct._atexit = type { %struct._atexit*, i32, [32 x void ()*], %struct._on_exit_args* }
+%struct._flock_t = type { i32, i32, i32, i32, i32 }
+%struct._glue = type { %struct._glue*, i32, %struct.__FILE* }
+%struct._mbstate_t = type { i32, %union.anon }
+%struct._misc_reent = type { i8*, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, [8 x i8], i32, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t }
+%struct._mprec = type { %struct._Bigint*, i32, %struct._Bigint*, %struct._Bigint** }
+%struct._on_exit_args = type { [32 x i8*], [32 x i8*], i32, i32 }
+%struct._rand48 = type { [3 x i16], [3 x i16], i16, i64 }
+%struct._reent = type { %struct.__FILE*, %struct.__FILE*, %struct.__FILE*, i32, i32, i8*, i32, i32, i8*, %struct._mprec*, void (%struct._reent*)*, i32, i32, i8*, %struct._rand48*, %struct.__tm*, i8*, void (i32)**, %struct._atexit*, %struct._atexit, %struct._glue, %struct.__FILE*, %struct._misc_reent*, i8* }
+%union.anon = type { i32 }
+
+@buf = constant [2 x i8] c"x\00", align 4
+@_impure_ptr = external thread_local global %struct._reent*
+@.str = private constant [22 x i8] c"This should fault...\0A\00", align 4
+@.str1 = private constant [40 x i8] c"We're still running. This is not good.\0A\00", align 4
+
+define i32 @main() nounwind {
+entry:
+  %0 = load %struct._reent** @_impure_ptr, align 4
+  %1 = getelementptr inbounds %struct._reent* %0, i32 0, i32 1
+  %2 = load %struct.__FILE** %1, align 4
+  %3 = bitcast %struct.__FILE* %2 to i8*
+  %4 = tail call i32 @fwrite(i8* getelementptr inbounds ([22 x i8]* @.str, i32 0, i32 0), i32 1, i32 21, i8* %3) nounwind
+  %5 = load %struct._reent** @_impure_ptr, align 4
+  %6 = getelementptr inbounds %struct._reent* %5, i32 0, i32 1
+  %7 = load %struct.__FILE** %6, align 4
+  %8 = tail call i32 @fflush(%struct.__FILE* %7) nounwind
+  store i8 121, i8* getelementptr inbounds ([2 x i8]* @buf, i32 0, i32 0), align 4
+  %9 = load %struct._reent** @_impure_ptr, align 4
+  %10 = getelementptr inbounds %struct._reent* %9, i32 0, i32 1
+  %11 = load %struct.__FILE** %10, align 4
+  %12 = bitcast %struct.__FILE* %11 to i8*
+  %13 = tail call i32 @fwrite(i8* getelementptr inbounds ([40 x i8]* @.str1, i32 0, i32 0), i32 1, i32 39, i8* %12) nounwind
+  ret i32 1
+}
+
+
+; PIC01:             Relocation 0x00000000
+; PIC01-NEXT:        'r_offset', 0x0000001c
+; PIC01-NEXT:          'r_sym'
+; PIC01-NEXT:          'r_type', 0x0000001b
+
+
+; PIC01:             Relocation 0x00000001
+; PIC01-NEXT:      'r_offset', 0x00000038
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x0000001b
+
+; PIC01:              Relocation 0x00000002
+; PIC01-NEXT:      'r_offset', 0x00000044
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x0000001b
+
+; PIC01:              Relocation 0x00000003
+; PIC01-NEXT:      'r_offset', 0x00000070
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x0000001b
+
+; PIC01:              Relocation 0x00000004
+; PIC01-NEXT:      'r_offset', 0x0000007c
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x00000019
+
+
+; PIC01:              Relocation 0x00000005
+; PIC01-NEXT:      'r_offset', 0x00000080
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x00000018
+
+; PIC01:              Relocation 0x00000006
+; PIC01-NEXT:      'r_offset', 0x00000084
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x00000068
+
+; PIC01:              Relocation 0x00000007
+; PIC01-NEXT:      'r_offset', 0x00000088
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x0000001a
+
+; PIC01:              Relocation 0x00000008
+; PIC01-NEXT:      'r_offset', 0x0000008c
+; PIC01-NEXT:        'r_sym'
+; PIC01-NEXT:        'r_type', 0x00000018
+
+declare i32 @fwrite(i8* nocapture, i32, i32, i8* nocapture) nounwind
+
+declare i32 @fflush(%struct.__FILE* nocapture) nounwind
diff --git a/final/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/final/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
new file mode 100644
index 00000000000..eaa34e7960f
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll
@@ -0,0 +1,35 @@
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -filetype=obj -o - | \
+; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=OBJ %s
+; RUN: llc  %s -mtriple=armv7-linux-gnueabi -o - | \
+; RUN:    FileCheck  -check-prefix=ASM %s
+
+
+@dummy = internal global i32 666
+@array00 = internal global [20 x i32] zeroinitializer
+@sum = internal global i32 55
+@STRIDE = internal global i32 8
+
+; ASM:          .type   array00,%object         @ @array00
+; ASM-NEXT:     .lcomm  array00,80              @ @array00
+; ASM-NEXT:     .type   _MergedGlobals,%object  @ @_MergedGlobals
+
+
+
+; OBJ:          Section 0x00000003
+; OBJ-NEXT:     '.bss'
+
+; OBJ:          'array00'
+; OBJ-NEXT:     'st_value', 0x00000000
+; OBJ-NEXT:     'st_size', 0x00000050
+; OBJ-NEXT:     'st_bind', 0x00000000
+; OBJ-NEXT:     'st_type', 0x00000001
+; OBJ-NEXT:     'st_other', 0x00000000
+; OBJ-NEXT:     'st_shndx', 0x00000003
+
+define i32 @main(i32 %argc) nounwind {
+  %1 = load i32* @sum, align 4
+  %2 = getelementptr  [20 x i32]* @array00, i32 0, i32 %argc
+  %3 = load i32* %2, align 4
+  %4 = add i32 %1, %3
+  ret i32 %4;
+}
diff --git a/final/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll b/final/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll
new file mode 100644
index 00000000000..a2f50b587b2
--- /dev/null
+++ b/final/test/CodeGen/ARM/2010-12-17-LocalStackSlotCrash.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin10
+; <rdar://problem/8782198>
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:64-n32"
+target triple = "armv6-apple-darwin10"
+
+define void @func() nounwind optsize {
+entry:
+  %buf = alloca [8096 x i8], align 1
+  br label %bb
+
+bb:
+  %p.2 = getelementptr [8096 x i8]* %buf, i32 0, i32 0
+  store i8 undef, i8* %p.2, align 1
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/final/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
new file mode 100644
index 00000000000..ec76f71d884
--- /dev/null
+++ b/final/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll
@@ -0,0 +1,126 @@
+; RUN: llc < %s | FileCheck %s
+; XFAIL: *
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+@x1 = internal global i8 1
+@x2 = internal global i8 1
+@x3 = internal global i8 1
+@x4 = internal global i8 1
+@x5 = global i8 1
+
+; Check debug info output for merged global.
+; DW_AT_location
+; DW_OP_addr
+; DW_OP_plus
+; .long __MergedGlobals
+; DW_OP_constu
+; offset
+
+;CHECK:        .ascii   "x2"                   @ DW_AT_name
+;CHECK-NEXT:        .byte   0
+;CHECK-NEXT:        @ DW_AT_type
+;CHECK-NEXT:        @ DW_AT_decl_file
+;CHECK-NEXT:        @ DW_AT_decl_line
+;CHECK-NEXT:        @ DW_AT_location
+;CHECK-NEXT:        .byte   3
+;CHECK-NEXT:        .long   __MergedGlobals
+;CHECK-NEXT:        .byte   16
+;CHECK-NEXT:        .byte   1
+;CHECK-NEXT:        .byte   34
+
+define zeroext i8 @get1(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !10), !dbg !30
+  %0 = load i8* @x1, align 4, !dbg !30
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !11), !dbg !30
+  store i8 %a, i8* @x1, align 4, !dbg !30
+  ret i8 %0, !dbg !31
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+define zeroext i8 @get2(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !18), !dbg !32
+  %0 = load i8* @x2, align 4, !dbg !32
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !19), !dbg !32
+  store i8 %a, i8* @x2, align 4, !dbg !32
+  ret i8 %0, !dbg !33
+}
+
+define zeroext i8 @get3(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !21), !dbg !34
+  %0 = load i8* @x3, align 4, !dbg !34
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !22), !dbg !34
+  store i8 %a, i8* @x3, align 4, !dbg !34
+  ret i8 %0, !dbg !35
+}
+
+define zeroext i8 @get4(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !24), !dbg !36
+  %0 = load i8* @x4, align 4, !dbg !36
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !25), !dbg !36
+  store i8 %a, i8* @x4, align 4, !dbg !36
+  ret i8 %0, !dbg !37
+}
+
+define zeroext i8 @get5(i8 zeroext %a) nounwind optsize {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i8 %a}, i64 0, metadata !27), !dbg !38
+  %0 = load i8* @x5, align 4, !dbg !38
+  tail call void @llvm.dbg.value(metadata !{i8 %0}, i64 0, metadata !28), !dbg !38
+  store i8 %a, i8* @x5, align 4, !dbg !38
+  ret i8 %0, !dbg !39
+}
+
+!llvm.dbg.sp = !{!0, !6, !7, !8, !9}
+!llvm.dbg.lv.get1 = !{!10, !11}
+!llvm.dbg.gv = !{!13, !14, !15, !16, !17}
+!llvm.dbg.lv.get2 = !{!18, !19}
+!llvm.dbg.lv.get3 = !{!21, !22}
+!llvm.dbg.lv.get4 = !{!24, !25}
+!llvm.dbg.lv.get5 = !{!27, !28}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get1", metadata !"get1", metadata !"get1", metadata !1, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get1} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"foo.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2369.8)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !5}
+!5 = metadata !{i32 589860, metadata !1, metadata !"_Bool", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get2", metadata !"get2", metadata !"get2", metadata !1, i32 7, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get2} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get3", metadata !"get3", metadata !"get3", metadata !1, i32 10, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get3} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get4", metadata !"get4", metadata !"get4", metadata !1, i32 13, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get4} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"get5", metadata !"get5", metadata !"get5", metadata !1, i32 16, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8 (i8)* @get5} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 4, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 590080, metadata !12, metadata !"b", metadata !1, i32 4, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 589835, metadata !0, i32 4, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x1", metadata !"x1", metadata !"", metadata !1, i32 3, metadata !5, i1 true, i1 true, i8* @x1} ; [ DW_TAG_variable ]
+!14 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x2", metadata !"x2", metadata !"", metadata !1, i32 6, metadata !5, i1 true, i1 true, i8* @x2} ; [ DW_TAG_variable ]
+!15 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x3", metadata !"x3", metadata !"", metadata !1, i32 9, metadata !5, i1 true, i1 true, i8* @x3} ; [ DW_TAG_variable ]
+!16 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x4", metadata !"x4", metadata !"", metadata !1, i32 12, metadata !5, i1 true, i1 true, i8* @x4} ; [ DW_TAG_variable ]
+!17 = metadata !{i32 589876, i32 0, metadata !1, metadata !"x5", metadata !"x5", metadata !"", metadata !1, i32 15, metadata !5, i1 false, i1 true, i8* @x5} ; [ DW_TAG_variable ]
+!18 = metadata !{i32 590081, metadata !6, metadata !"a", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 590080, metadata !20, metadata !"b", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!20 = metadata !{i32 589835, metadata !6, i32 7, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!21 = metadata !{i32 590081, metadata !7, metadata !"a", metadata !1, i32 10, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 590080, metadata !23, metadata !"b", metadata !1, i32 10, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!23 = metadata !{i32 589835, metadata !7, i32 10, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{i32 590081, metadata !8, metadata !"a", metadata !1, i32 13, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 590080, metadata !26, metadata !"b", metadata !1, i32 13, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!26 = metadata !{i32 589835, metadata !8, i32 13, i32 0, metadata !1, i32 3} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 590081, metadata !9, metadata !"a", metadata !1, i32 16, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!28 = metadata !{i32 590080, metadata !29, metadata !"b", metadata !1, i32 16, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 589835, metadata !9, i32 16, i32 0, metadata !1, i32 4} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 4, i32 0, metadata !0, null}
+!31 = metadata !{i32 4, i32 0, metadata !12, null}
+!32 = metadata !{i32 7, i32 0, metadata !6, null}
+!33 = metadata !{i32 7, i32 0, metadata !20, null}
+!34 = metadata !{i32 10, i32 0, metadata !7, null}
+!35 = metadata !{i32 10, i32 0, metadata !23, null}
+!36 = metadata !{i32 13, i32 0, metadata !8, null}
+!37 = metadata !{i32 13, i32 0, metadata !26, null}
+!38 = metadata !{i32 16, i32 0, metadata !9, null}
+!39 = metadata !{i32 16, i32 0, metadata !29, null}
diff --git a/final/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll b/final/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
new file mode 100644
index 00000000000..85a113755bf
--- /dev/null
+++ b/final/test/CodeGen/ARM/2011-02-04-AntidepMultidef.ll
@@ -0,0 +1,128 @@
+; RUN: llc < %s -asm-verbose=false -O3 -mtriple=armv6-apple-darwin -relocation-model=pic  -mcpu=arm1136jf-s | FileCheck %s
+; rdar://8959122 illegal register operands for UMULL instruction
+;   in cfrac nightly test.
+; Armv6 generates a umull that must write to two distinct destination regs.
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:64-n32"
+target triple = "armv6-apple-darwin10"
+
+define void @ptoa() nounwind {
+entry:
+  br i1 false, label %bb3, label %bb
+
+bb:                                               ; preds = %entry
+  br label %bb3
+
+bb3:                                              ; preds = %bb, %entry
+  %0 = call noalias i8* @malloc() nounwind
+  br i1 undef, label %bb46, label %bb8
+
+bb8:                                              ; preds = %bb3
+  %1 = getelementptr inbounds i8* %0, i32 0
+  store i8 0, i8* %1, align 1
+  %2 = call i32 @ptou() nounwind
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %3 = udiv i32 %2, 10
+  %4 = urem i32 %3, 10
+  %5 = icmp ult i32 %4, 10
+  %6 = trunc i32 %4 to i8
+  %7 = or i8 %6, 48
+  %8 = add i8 %6, 87
+  %iftmp.5.0.1 = select i1 %5, i8 %7, i8 %8
+  store i8 %iftmp.5.0.1, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %9 = udiv i32 %2, 100
+  %10 = urem i32 %9, 10
+  %11 = icmp ult i32 %10, 10
+  %12 = trunc i32 %10 to i8
+  %13 = or i8 %12, 48
+  %14 = add i8 %12, 87
+  %iftmp.5.0.2 = select i1 %11, i8 %13, i8 %14
+  store i8 %iftmp.5.0.2, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %15 = udiv i32 %2, 10000
+  %16 = urem i32 %15, 10
+  %17 = icmp ult i32 %16, 10
+  %18 = trunc i32 %16 to i8
+  %19 = or i8 %18, 48
+  %20 = add i8 %18, 87
+  %iftmp.5.0.4 = select i1 %17, i8 %19, i8 %20
+  store i8 %iftmp.5.0.4, i8* null, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %21 = udiv i32 %2, 100000
+  %22 = urem i32 %21, 10
+  %23 = icmp ult i32 %22, 10
+  %iftmp.5.0.5 = select i1 %23, i8 0, i8 undef
+  store i8 %iftmp.5.0.5, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %24 = udiv i32 %2, 1000000
+  %25 = urem i32 %24, 10
+  %26 = icmp ult i32 %25, 10
+  %27 = trunc i32 %25 to i8
+  %28 = or i8 %27, 48
+  %29 = add i8 %27, 87
+  %iftmp.5.0.6 = select i1 %26, i8 %28, i8 %29
+  store i8 %iftmp.5.0.6, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %30 = udiv i32 %2, 10000000
+  %31 = urem i32 %30, 10
+  %32 = icmp ult i32 %31, 10
+  %33 = trunc i32 %31 to i8
+  %34 = or i8 %33, 48
+  %35 = add i8 %33, 87
+  %iftmp.5.0.7 = select i1 %32, i8 %34, i8 %35
+  store i8 %iftmp.5.0.7, i8* undef, align 1
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  ; CHECK: umull [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{lr|r[0-9]+}}, {{lr|r[0-9]+$}}
+  %36 = udiv i32 %2, 100000000
+  %37 = urem i32 %36, 10
+  %38 = icmp ult i32 %37, 10
+  %39 = trunc i32 %37 to i8
+  %40 = or i8 %39, 48
+  %41 = add i8 %39, 87
+  %iftmp.5.0.8 = select i1 %38, i8 %40, i8 %41
+  store i8 %iftmp.5.0.8, i8* null, align 1
+  unreachable
+
+bb46:                                             ; preds = %bb3
+  ret void
+}
+
+declare noalias i8* @malloc() nounwind
+
+declare i32 @ptou()
diff --git a/final/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll b/final/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
new file mode 100644
index 00000000000..f3d788818af
--- /dev/null
+++ b/final/test/CodeGen/ARM/2011-02-07-AntidepClobber.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s -asm-verbose=false -O3  -mtriple=armv5e-none-linux-gnueabi | FileCheck %s
+; PR8986: PostRA antidependence breaker must respect "earlyclobber".
+; armv5e generates mulv5 that cannot used the same reg for src/dest.
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32"
+target triple = "armv5e-none-linux-gnueabi"
+
+define hidden fastcc void @storeAtts() nounwind {
+entry:
+  %.SV116 = alloca i8**
+  br i1 undef, label %meshBB520, label %meshBB464
+
+bb15:                                             ; preds = %meshBB424
+  br i1 undef, label %bb216, label %meshBB396
+
+bb22:                                             ; preds = %meshBB396
+  br label %cBB564
+
+cBB564:                                           ; preds = %cBB564, %bb22
+  br label %cBB564
+
+poolStoreString.exit.thread:                      ; preds = %meshBB424
+  ret void
+
+bb78:                                             ; preds = %meshBB412
+  unreachable
+
+bb129:                                            ; preds = %meshBB540
+  br i1 undef, label %bb131.loopexit, label %meshBB540
+
+bb131.loopexit:                                   ; preds = %bb129
+  br label %bb131
+
+bb131:                                            ; preds = %bb135, %bb131.loopexit
+  br i1 undef, label %bb134, label %meshBB396
+
+bb134:                                            ; preds = %bb131
+  unreachable
+
+bb135:                                            ; preds = %meshBB396
+  %uriHash.1.phi.load = load i32* undef
+  %.load120 = load i8*** %.SV116
+  %.phi24 = load i8* null
+  %.phi26 = load i8** null
+  store i8 %.phi24, i8* %.phi26, align 1
+  %0 = getelementptr inbounds i8* %.phi26, i32 1
+  store i8* %0, i8** %.load120, align 4
+  ; CHECK: mul [[REGISTER:lr|r[0-9]+]],
+  ; CHECK-NOT: [[REGISTER]],
+  ; CHECK: {{(lr|r[0-9]+)$}}
+  %1 = mul i32 %uriHash.1.phi.load, 1000003
+  %2 = xor i32 0, %1
+  store i32 %2, i32* null
+  %3 = load i8* null, align 1
+  %4 = icmp eq i8 %3, 0
+  store i8* %0, i8** undef
+  br i1 %4, label %meshBB472, label %bb131
+
+bb212:                                            ; preds = %meshBB540
+  unreachable
+
+bb216:                                            ; preds = %bb15
+  ret void
+
+meshBB396:                                        ; preds = %bb131, %bb15
+  br i1 undef, label %bb135, label %bb22
+
+meshBB412:                                        ; preds = %meshBB464
+  br i1 undef, label %meshBB504, label %bb78
+
+meshBB424:                                        ; preds = %meshBB464
+  br i1 undef, label %poolStoreString.exit.thread, label %bb15
+
+meshBB464:                                        ; preds = %entry
+  br i1 undef, label %meshBB424, label %meshBB412
+
+meshBB472:                                        ; preds = %meshBB504, %bb135
+  unreachable
+
+meshBB504:                                        ; preds = %meshBB412
+  br label %meshBB472
+
+meshBB520:                                        ; preds = %entry
+  br label %meshBB540
+
+meshBB540:                                        ; preds = %meshBB520, %bb129
+  br i1 undef, label %bb212, label %bb129
+}
diff --git a/final/test/CodeGen/ARM/addrmode.ll b/final/test/CodeGen/ARM/addrmode.ll
new file mode 100644
index 00000000000..9ccff07d456
--- /dev/null
+++ b/final/test/CodeGen/ARM/addrmode.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm -stats |& grep asm-printer | grep 4
+
+define i32 @t1(i32 %a) {
+	%b = mul i32 %a, 9
+        %c = inttoptr i32 %b to i32*
+        %d = load i32* %c
+	ret i32 %d
+}
+
+define i32 @t2(i32 %a) {
+	%b = mul i32 %a, -7
+        %c = inttoptr i32 %b to i32*
+        %d = load i32* %c
+	ret i32 %d
+}
diff --git a/final/test/CodeGen/ARM/aliases.ll b/final/test/CodeGen/ARM/aliases.ll
new file mode 100644
index 00000000000..31c500756c4
--- /dev/null
+++ b/final/test/CodeGen/ARM/aliases.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -o %t
+; RUN: grep { = } %t   | count 5
+; RUN: grep globl %t | count 4
+; RUN: grep weak %t  | count 1
+
+@bar = external global i32
+@foo1 = alias i32* @bar
+@foo2 = alias i32* @bar
+
+%FunTy = type i32()
+
+declare i32 @foo_f()
+@bar_f = alias weak %FunTy* @foo_f
+
+@bar_i = alias internal i32* @bar
+
+@A = alias bitcast (i32* @bar to i64*)
+
+define i32 @test() {
+entry:
+   %tmp = load i32* @foo1
+   %tmp1 = load i32* @foo2
+   %tmp0 = load i32* @bar_i
+   %tmp2 = call i32 @foo_f()
+   %tmp3 = add i32 %tmp, %tmp2
+   %tmp4 = call %FunTy* @bar_f()
+   %tmp5 = add i32 %tmp3, %tmp4
+   %tmp6 = add i32 %tmp1, %tmp5
+   %tmp7 = add i32 %tmp6, %tmp0
+   ret i32 %tmp7
+}
diff --git a/final/test/CodeGen/ARM/align.ll b/final/test/CodeGen/ARM/align.ll
new file mode 100644
index 00000000000..d57c159b85c
--- /dev/null
+++ b/final/test/CodeGen/ARM/align.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN
+
+@a = global i1 true
+; no alignment
+
+@b = global i8 1
+; no alignment
+
+@c = global i16 2
+;ELF: .align 1
+;ELF: c:
+;DARWIN: .align 1
+;DARWIN: _c:
+
+@d = global i32 3
+;ELF: .align 2
+;ELF: d:
+;DARWIN: .align 2
+;DARWIN: _d:
+
+@e = global i64 4
+;ELF: .align 3
+;ELF: e
+;DARWIN: .align 3
+;DARWIN: _e:
+
+@f = global float 5.0
+;ELF: .align 2
+;ELF: f:
+;DARWIN: .align 2
+;DARWIN: _f:
+
+@g = global double 6.0
+;ELF: .align 3
+;ELF: g:
+;DARWIN: .align 3
+;DARWIN: _g:
+
+@bar = common global [75 x i8] zeroinitializer, align 128
+;ELF: .comm bar,75,128
+;DARWIN: .comm _bar,75,7
diff --git a/final/test/CodeGen/ARM/alloca.ll b/final/test/CodeGen/ARM/alloca.ll
new file mode 100644
index 00000000000..4a0835a2c0c
--- /dev/null
+++ b/final/test/CodeGen/ARM/alloca.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | FileCheck %s
+
+define void @f(i32 %a) {
+entry:
+; CHECK: add  r11, sp, #4
+        %tmp = alloca i8, i32 %a                ; <i8*> [#uses=1]
+        call void @g( i8* %tmp, i32 %a, i32 1, i32 2, i32 3 )
+        ret void
+; CHECK: sub  sp, r11, #4
+}
+
+declare void @g(i8*, i32, i32, i32, i32)
diff --git a/final/test/CodeGen/ARM/argaddr.ll b/final/test/CodeGen/ARM/argaddr.ll
new file mode 100644
index 00000000000..116a32f9c74
--- /dev/null
+++ b/final/test/CodeGen/ARM/argaddr.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm
+
+define void @f(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+entry:
+        %a_addr = alloca i32            ; <i32*> [#uses=2]
+        %b_addr = alloca i32            ; <i32*> [#uses=2]
+        %c_addr = alloca i32            ; <i32*> [#uses=2]
+        %d_addr = alloca i32            ; <i32*> [#uses=2]
+        %e_addr = alloca i32            ; <i32*> [#uses=2]
+        store i32 %a, i32* %a_addr
+        store i32 %b, i32* %b_addr
+        store i32 %c, i32* %c_addr
+        store i32 %d, i32* %d_addr
+        store i32 %e, i32* %e_addr
+        call void @g( i32* %a_addr, i32* %b_addr, i32* %c_addr, i32* %d_addr, i32* %e_addr )
+        ret void
+}
+
+declare void @g(i32*, i32*, i32*, i32*, i32*)
diff --git a/final/test/CodeGen/ARM/arguments-nosplit-double.ll b/final/test/CodeGen/ARM/arguments-nosplit-double.ll
new file mode 100644
index 00000000000..770e41df2c2
--- /dev/null
+++ b/final/test/CodeGen/ARM/arguments-nosplit-double.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | not grep r3
+; PR4059
+
+define i32 @f(i64 %z, i32 %a, double %b) {
+	%tmp = call i32 @g(double %b)
+	ret i32 %tmp
+}
+
+declare i32 @g(double)
diff --git a/final/test/CodeGen/ARM/arguments-nosplit-i64.ll b/final/test/CodeGen/ARM/arguments-nosplit-i64.ll
new file mode 100644
index 00000000000..815edfd845a
--- /dev/null
+++ b/final/test/CodeGen/ARM/arguments-nosplit-i64.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | not grep r3
+; PR4058
+
+define i32 @f(i64 %z, i32 %a, i64 %b) {
+	%tmp = call i32 @g(i64 %b)
+	ret i32 %tmp
+}
+
+declare i32 @g(i64)
diff --git a/final/test/CodeGen/ARM/arguments.ll b/final/test/CodeGen/ARM/arguments.ll
new file mode 100644
index 00000000000..c7fcb9755d9
--- /dev/null
+++ b/final/test/CodeGen/ARM/arguments.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+vfp2 | FileCheck %s -check-prefix=DARWIN
+
+define i32 @f1(i32 %a, i64 %b) {
+; ELF: f1:
+; ELF: mov r0, r2
+; DARWIN: f1:
+; DARWIN: mov r0, r1
+        %tmp = call i32 @g1(i64 %b)
+        ret i32 %tmp
+}
+
+; test that allocating the double to r2/r3 makes r1 unavailable on gnueabi.
+define i32 @f2() nounwind optsize {
+; ELF: f2:
+; ELF: mov  [[REGISTER:(r[0-9]+)]], #128
+; ELF: str  [[REGISTER]], [sp]
+; DARWIN: f2:
+; DARWIN: mov	r3, #128
+entry:
+  %0 = tail call i32 (i32, ...)* @g2(i32 5, double 1.600000e+01, i32 128) nounwind optsize ; <i32> [#uses=1]
+  %not. = icmp ne i32 %0, 128                     ; <i1> [#uses=1]
+  %.0 = zext i1 %not. to i32                      ; <i32> [#uses=1]
+  ret i32 %.0
+}
+
+; test that on gnueabi a 64 bit value at this position will cause r3 to go
+; unused and the value stored in [sp]
+; ELF: f3:
+; ELF: ldr r0, [sp]
+; ELF-NEXT: mov pc, lr
+; DARWIN: f3:
+; DARWIN: mov r0, r3
+; DARWIN-NEXT: mov pc, lr
+define i32 @f3(i32 %i, i32 %j, i32 %k, i64 %l, ...) {
+entry:
+  %0 = trunc i64 %l to i32
+  ret i32 %0
+}
+
+declare i32 @g1(i64)
+
+declare i32 @g2(i32 %i, ...)
diff --git a/final/test/CodeGen/ARM/arguments2.ll b/final/test/CodeGen/ARM/arguments2.ll
new file mode 100644
index 00000000000..a515ad75a66
--- /dev/null
+++ b/final/test/CodeGen/ARM/arguments2.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define i32 @f(i32 %a, i128 %b) {
+        %tmp = call i32 @g(i128 %b)
+        ret i32 %tmp
+}
+
+declare i32 @g(i128)
diff --git a/final/test/CodeGen/ARM/arguments3.ll b/final/test/CodeGen/ARM/arguments3.ll
new file mode 100644
index 00000000000..58f64c6c2f1
--- /dev/null
+++ b/final/test/CodeGen/ARM/arguments3.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define i64 @f(i32 %a, i128 %b) {
+        %tmp = call i64 @g(i128 %b)
+        ret i64 %tmp
+}
+
+declare i64 @g(i128)
diff --git a/final/test/CodeGen/ARM/arguments4.ll b/final/test/CodeGen/ARM/arguments4.ll
new file mode 100644
index 00000000000..f5f4207b7b3
--- /dev/null
+++ b/final/test/CodeGen/ARM/arguments4.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define float @f(i32 %a, i128 %b) {
+        %tmp = call float @g(i128 %b)
+        ret float %tmp
+}
+
+declare float @g(i128)
diff --git a/final/test/CodeGen/ARM/arguments5.ll b/final/test/CodeGen/ARM/arguments5.ll
new file mode 100644
index 00000000000..388a8ebee67
--- /dev/null
+++ b/final/test/CodeGen/ARM/arguments5.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define double @f(i32 %a, i128 %b) {
+        %tmp = call double @g(i128 %b)
+        ret double %tmp
+}
+
+declare double @g(i128)
diff --git a/final/test/CodeGen/ARM/arguments6.ll b/final/test/CodeGen/ARM/arguments6.ll
new file mode 100644
index 00000000000..3f757fee45e
--- /dev/null
+++ b/final/test/CodeGen/ARM/arguments6.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define i128 @f(i32 %a, i128 %b) {
+        %tmp = call i128 @g(i128 %b)
+        ret i128 %tmp
+}
+
+declare i128 @g(i128)
diff --git a/final/test/CodeGen/ARM/arguments7.ll b/final/test/CodeGen/ARM/arguments7.ll
new file mode 100644
index 00000000000..fa97ee821b3
--- /dev/null
+++ b/final/test/CodeGen/ARM/arguments7.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define double @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b) {
+        %tmp = call double @g(i32 %a2, i32 %a3, i32 %a4, i32 %a5, double %b)
+        ret double %tmp
+}
+
+declare double @g(i32, i32, i32, i32, double)
diff --git a/final/test/CodeGen/ARM/arguments8.ll b/final/test/CodeGen/ARM/arguments8.ll
new file mode 100644
index 00000000000..abe059bf234
--- /dev/null
+++ b/final/test/CodeGen/ARM/arguments8.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi
+; RUN: llc < %s -mtriple=arm-apple-darwin
+
+define i64 @f(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b) {
+        %tmp = call i64 @g(i32 %a2, i32 %a3, i32 %a4, i32 %a5, i64 %b)
+        ret i64 %tmp
+}
+
+declare i64 @g(i32, i32, i32, i32, i64)
diff --git a/final/test/CodeGen/ARM/arguments_f64_backfill.ll b/final/test/CodeGen/ARM/arguments_f64_backfill.ll
new file mode 100644
index 00000000000..062133e8645
--- /dev/null
+++ b/final/test/CodeGen/ARM/arguments_f64_backfill.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+vfp2 -float-abi=hard | FileCheck %s
+
+define float @f(float %z, double %a, float %b) {
+; CHECK: vmov.f32 s0, s1
+        %tmp = call float @g(float %b)
+        ret float %tmp
+}
+
+declare float @g(float)
diff --git a/final/test/CodeGen/ARM/arm-and-tst-peephole.ll b/final/test/CodeGen/ARM/arm-and-tst-peephole.ll
new file mode 100644
index 00000000000..50c638b7393
--- /dev/null
+++ b/final/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s -march=arm | FileCheck -check-prefix=ARM %s
+; RUN: llc < %s -march=thumb | FileCheck -check-prefix=THUMB %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck -check-prefix=T2 %s
+
+; FIXME: The -march=thumb test doesn't change if -disable-peephole is specified.
+
+%struct.Foo = type { i8* }
+
+; ARM:   foo
+; THUMB: foo
+; T2:    foo
+define %struct.Foo* @foo(%struct.Foo* %this, i32 %acc) nounwind readonly align 2 {
+entry:
+  %scevgep = getelementptr %struct.Foo* %this, i32 1
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %sw.bb, %entry
+  %lsr.iv2 = phi %struct.Foo* [ %scevgep3, %sw.bb ], [ %scevgep, %entry ]
+  %lsr.iv = phi i32 [ %lsr.iv.next, %sw.bb ], [ 1, %entry ]
+  %acc.tr = phi i32 [ %or, %sw.bb ], [ %acc, %entry ]
+  %lsr.iv24 = bitcast %struct.Foo* %lsr.iv2 to i8**
+  %scevgep5 = getelementptr i8** %lsr.iv24, i32 -1
+  %tmp2 = load i8** %scevgep5
+  %0 = ptrtoint i8* %tmp2 to i32
+
+; ARM:      ands r12, r12, #3
+; ARM-NEXT: beq
+
+; THUMB:      movs r5, #3
+; THUMB-NEXT: ands r5, r4
+; THUMB-NEXT: cmp r5, #0
+; THUMB-NEXT: beq
+
+; T2:      ands r12, r12, #3
+; T2-NEXT: beq
+
+  %and = and i32 %0, 3
+  %tst = icmp eq i32 %and, 0
+  br i1 %tst, label %sw.bb, label %tailrecurse.switch
+
+tailrecurse.switch:                               ; preds = %tailrecurse
+  switch i32 %and, label %sw.epilog [
+    i32 1, label %sw.bb
+    i32 3, label %sw.bb6
+    i32 2, label %sw.bb8
+  ]
+
+sw.bb:                                            ; preds = %tailrecurse.switch, %tailrecurse
+  %shl = shl i32 %acc.tr, 1
+  %or = or i32 %and, %shl
+  %lsr.iv.next = add i32 %lsr.iv, 1
+  %scevgep3 = getelementptr %struct.Foo* %lsr.iv2, i32 1
+  br label %tailrecurse
+
+sw.bb6:                                           ; preds = %tailrecurse.switch
+  ret %struct.Foo* %lsr.iv2
+
+sw.bb8:                                           ; preds = %tailrecurse.switch
+  %tmp1 = add i32 %acc.tr, %lsr.iv
+  %add.ptr11 = getelementptr inbounds %struct.Foo* %this, i32 %tmp1
+  ret %struct.Foo* %add.ptr11
+
+sw.epilog:                                        ; preds = %tailrecurse.switch
+  ret %struct.Foo* undef
+}
+
+; Another test that exercises the AND/TST peephole optimization and also
+; generates a predicated ANDS instruction. Check that the predicate is printed
+; after the "S" modifier on the instruction.
+
+%struct.S = type { i8* (i8*)*, [1 x i8] }
+
+; ARM: bar
+; THUMB: bar
+; T2: bar
+define internal zeroext i8 @bar(%struct.S* %x, %struct.S* nocapture %y) nounwind readonly {
+entry:
+  %0 = getelementptr inbounds %struct.S* %x, i32 0, i32 1, i32 0
+  %1 = load i8* %0, align 1
+  %2 = zext i8 %1 to i32
+; ARM: ands
+; THUMB: ands
+; T2: ands
+  %3 = and i32 %2, 112
+  %4 = icmp eq i32 %3, 0
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  %5 = getelementptr inbounds %struct.S* %y, i32 0, i32 1, i32 0
+  %6 = load i8* %5, align 1
+  %7 = zext i8 %6 to i32
+; ARM: andsne
+; THUMB: ands
+; T2: andsne
+  %8 = and i32 %7, 112
+  %9 = icmp eq i32 %8, 0
+  br i1 %9, label %return, label %bb2
+
+bb2:                                              ; preds = %bb
+  %10 = icmp eq i32 %3, 16
+  %11 = icmp eq i32 %8, 16
+  %or.cond = or i1 %10, %11
+  br i1 %or.cond, label %bb4, label %return
+
+bb4:                                              ; preds = %bb2
+  %12 = ptrtoint %struct.S* %x to i32
+  %phitmp = trunc i32 %12 to i8
+  ret i8 %phitmp
+
+return:                                           ; preds = %bb2, %bb, %entry
+  ret i8 1
+}
diff --git a/final/test/CodeGen/ARM/arm-asm.ll b/final/test/CodeGen/ARM/arm-asm.ll
new file mode 100644
index 00000000000..2e35e3953f7
--- /dev/null
+++ b/final/test/CodeGen/ARM/arm-asm.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=arm
+
+define void @frame_dummy() {
+entry:
+        %tmp1 = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0,~{dirflag},~{fpsr},~{flags}"( void (i8*)* null )           ; <void (i8*)*> [#uses=0]
+        ret void
+}
diff --git a/final/test/CodeGen/ARM/arm-frameaddr.ll b/final/test/CodeGen/ARM/arm-frameaddr.ll
new file mode 100644
index 00000000000..2cf1422c66a
--- /dev/null
+++ b/final/test/CodeGen/ARM/arm-frameaddr.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=LINUX
+; PR4344
+; PR4416
+
+define i8* @t() nounwind {
+entry:
+; DARWIN: t:
+; DARWIN: mov r0, r7
+
+; LINUX: t:
+; LINUX: mov r0, r11
+	%0 = call i8* @llvm.frameaddress(i32 0)
+        ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
diff --git a/final/test/CodeGen/ARM/arm-negative-stride.ll b/final/test/CodeGen/ARM/arm-negative-stride.ll
new file mode 100644
index 00000000000..fb0f8ff8790
--- /dev/null
+++ b/final/test/CodeGen/ARM/arm-negative-stride.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+; This loop is rewritten with an indvar which counts down, which
+; frees up a register from holding the trip count.
+
+define void @test(i32* %P, i32 %A, i32 %i) nounwind {
+entry:
+; CHECK: str r1, [{{r.*}}, {{r.*}}, lsl #2]
+        icmp eq i32 %i, 0               ; <i1>:0 [#uses=1]
+        br i1 %0, label %return, label %bb
+
+bb:             ; preds = %bb, %entry
+        %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]          ; <i32> [#uses=2]
+        %i_addr.09.0 = sub i32 %i, %indvar              ; <i32> [#uses=1]
+        %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0         ; <i32*> [#uses=1]
+        store i32 %A, i32* %tmp2
+        %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+        icmp eq i32 %indvar.next, %i            ; <i1>:1 [#uses=1]
+        br i1 %1, label %return, label %bb
+
+return:         ; preds = %bb, %entry
+        ret void
+}
+
+; This loop has a non-address use of the count-up indvar, so
+; it'll remain. Now the original store uses a negative-stride address.
+
+define void @test_with_forced_iv(i32* %P, i32 %A, i32 %i) nounwind {
+entry:
+; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2]
+        icmp eq i32 %i, 0               ; <i1>:0 [#uses=1]
+        br i1 %0, label %return, label %bb
+
+bb:             ; preds = %bb, %entry
+        %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]          ; <i32> [#uses=2]
+        %i_addr.09.0 = sub i32 %i, %indvar              ; <i32> [#uses=1]
+        %tmp2 = getelementptr i32* %P, i32 %i_addr.09.0         ; <i32*> [#uses=1]
+        store i32 %A, i32* %tmp2
+        store i32 %indvar, i32* null
+        %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+        icmp eq i32 %indvar.next, %i            ; <i1>:1 [#uses=1]
+        br i1 %1, label %return, label %bb
+
+return:         ; preds = %bb, %entry
+        ret void
+}
+
diff --git a/final/test/CodeGen/ARM/arm-returnaddr.ll b/final/test/CodeGen/ARM/arm-returnaddr.ll
new file mode 100644
index 00000000000..382a1833460
--- /dev/null
+++ b/final/test/CodeGen/ARM/arm-returnaddr.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
+; rdar://8015977
+; rdar://8020118
+
+define i8* @rt0(i32 %x) nounwind readnone {
+entry:
+; CHECK: rt0:
+; CHECK: {r7, lr}
+; CHECK: mov r0, lr
+  %0 = tail call i8* @llvm.returnaddress(i32 0)
+  ret i8* %0
+}
+
+define i8* @rt2() nounwind readnone {
+entry:
+; CHECK: rt2:
+; CHECK: {r7, lr}
+; CHECK: ldr r0, [r7]
+; CHECK: ldr r0, [r0]
+; CHECK: ldr r0, [r0, #4]
+  %0 = tail call i8* @llvm.returnaddress(i32 2)
+  ret i8* %0
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
diff --git a/final/test/CodeGen/ARM/armv4.ll b/final/test/CodeGen/ARM/armv4.ll
new file mode 100644
index 00000000000..ef722de01d2
--- /dev/null
+++ b/final/test/CodeGen/ARM/armv4.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-unknown-eabi | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=arm-unknown-eabi -mcpu=strongarm | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=arm-unknown-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=arm-unknown-eabi -mattr=+v6 | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=armv4-unknown-eabi | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=armv4t-unknown-eabi | FileCheck %s -check-prefix=THUMB
+
+define i32 @test(i32 %a) nounwind readnone {
+entry:
+; ARM: mov pc
+; THUMB: bx
+  ret i32 %a
+}
diff --git a/final/test/CodeGen/ARM/atomic-cmp.ll b/final/test/CodeGen/ARM/atomic-cmp.ll
new file mode 100644
index 00000000000..f31aa7bc58e
--- /dev/null
+++ b/final/test/CodeGen/ARM/atomic-cmp.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=T2
+; rdar://8964854
+
+define i8 @t(i8* %a, i8 %b, i8 %c) nounwind {
+; ARM: t:
+; ARM: ldrexb
+; ARM: strexb
+
+; T2: t:
+; T2: ldrexb
+; T2: strexb
+  %tmp0 = tail call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* %a, i8 %b, i8 %c)
+  ret i8 %tmp0
+}
+
+declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* nocapture, i8, i8) nounwind
diff --git a/final/test/CodeGen/ARM/available_externally.ll b/final/test/CodeGen/ARM/available_externally.ll
new file mode 100644
index 00000000000..0f646d582e7
--- /dev/null
+++ b/final/test/CodeGen/ARM/available_externally.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | FileCheck %s
+; rdar://9027648
+
+@A = available_externally hidden constant i32 1
+@B = external hidden constant i32
+
+define i32 @t1() {
+  %tmp = load i32* @A
+  store i32 %tmp, i32* @B
+  ret i32 %tmp
+}
+
+; CHECK:      L_A$non_lazy_ptr:
+; CHECK-NEXT: .long _A
+; CHECK:      L_B$non_lazy_ptr:
+; CHECK-NEXT: .long _B
diff --git a/final/test/CodeGen/ARM/bfc.ll b/final/test/CodeGen/ARM/bfc.ll
new file mode 100644
index 00000000000..c4a44b4472d
--- /dev/null
+++ b/final/test/CodeGen/ARM/bfc.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+
+; 4278190095 = 0xff00000f
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: bfc
+    %tmp = and i32 %a, 4278190095
+    ret i32 %tmp
+}
+
+; 4286578688 = 0xff800000
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: bfc
+    %tmp = and i32 %a, 4286578688
+    ret i32 %tmp
+}
+
+; 4095 = 0x00000fff
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: bfc
+    %tmp = and i32 %a, 4095
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/ARM/bfi.ll b/final/test/CodeGen/ARM/bfi.ll
new file mode 100644
index 00000000000..946db1909fe
--- /dev/null
+++ b/final/test/CodeGen/ARM/bfi.ll
@@ -0,0 +1,64 @@
+; RUN: llc -march=arm -mattr=+v6t2 < %s | FileCheck %s
+
+%struct.F = type { [3 x i8], i8 }
+
+@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1]
+
+define void @f1([1 x i32] %f.coerce0) nounwind {
+entry:
+; CHECK: f1
+; CHECK: mov r2, #10
+; CHECK: bfi r1, r2, #22, #4
+  %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
+  %1 = and i32 %0, -62914561                      ; <i32> [#uses=1]
+  %2 = or i32 %1, 41943040                        ; <i32> [#uses=1]
+  store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4
+  ret void
+}
+
+define i32 @f2(i32 %A, i32 %B) nounwind {
+entry:
+; CHECK: f2
+; CHECK: lsr{{.*}}#7
+; CHECK: bfi r0, r1, #7, #16
+  %and = and i32 %A, -8388481                     ; <i32> [#uses=1]
+  %and2 = and i32 %B, 8388480                     ; <i32> [#uses=1]
+  %or = or i32 %and2, %and                        ; <i32> [#uses=1]
+  ret i32 %or
+}
+
+define i32 @f3(i32 %A, i32 %B) nounwind {
+entry:
+; CHECK: f3
+; CHECK: lsr{{.*}} #7
+; CHECK: mov r0, r1
+; CHECK: bfi r0, r2, #7, #16
+  %and = and i32 %A, 8388480                      ; <i32> [#uses=1]
+  %and2 = and i32 %B, -8388481                    ; <i32> [#uses=1]
+  %or = or i32 %and2, %and                        ; <i32> [#uses=1]
+  ret i32 %or
+}
+
+; rdar://8752056
+define i32 @f4(i32 %a) nounwind {
+; CHECK: f4
+; CHECK: movw r1, #3137
+; CHECK: bfi r1, r0, #15, #5
+  %1 = shl i32 %a, 15
+  %ins7 = and i32 %1, 1015808
+  %ins12 = or i32 %ins7, 3137
+  ret i32 %ins12
+}
+
+; rdar://8458663
+define i32 @f5(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: f5:
+; CHECK-NOT: bfc
+; CHECK: bfi r0, r1, #20, #4
+  %0 = and i32 %a, -15728641
+  %1 = shl i32 %b, 20
+  %2 = and i32 %1, 15728640
+  %3 = or i32 %2, %0
+  ret i32 %3
+}
diff --git a/final/test/CodeGen/ARM/bfx.ll b/final/test/CodeGen/ARM/bfx.ll
new file mode 100644
index 00000000000..fcca191a016
--- /dev/null
+++ b/final/test/CodeGen/ARM/bfx.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=arm -mattr=+v7a | FileCheck %s
+
+define i32 @sbfx1(i32 %a) {
+; CHECK: sbfx1
+; CHECK: sbfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = trunc i32 %t1 to i11
+	%t3 = sext i11 %t2 to i32
+	ret i32 %t3
+}
+
+define i32 @ubfx1(i32 %a) {
+; CHECK: ubfx1
+; CHECK: ubfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = trunc i32 %t1 to i11
+	%t3 = zext i11 %t2 to i32
+	ret i32 %t3
+}
+
+define i32 @ubfx2(i32 %a) {
+; CHECK: ubfx2
+; CHECK: ubfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = and i32 %t1, 2047
+	ret i32 %t2
+}
+
diff --git a/final/test/CodeGen/ARM/bic.ll b/final/test/CodeGen/ARM/bic.ll
new file mode 100644
index 00000000000..1dfd6278287
--- /dev/null
+++ b/final/test/CodeGen/ARM/bic.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+; CHECK: bic	r0, r0, r1
+
+define i32 @f2(i32 %a, i32 %b) {
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = and i32 %tmp, %a
+    ret i32 %tmp1
+}
+
+; CHECK: bic	r0, r0, r1
diff --git a/final/test/CodeGen/ARM/bits.ll b/final/test/CodeGen/ARM/bits.ll
new file mode 100644
index 00000000000..ce1b2ad5fad
--- /dev/null
+++ b/final/test/CodeGen/ARM/bits.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+entry:
+; CHECK: f1
+; CHECK: and r0, r1, r0
+	%tmp2 = and i32 %b, %a		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+entry:
+; CHECK: f2
+; CHECK: orr r0, r1, r0
+	%tmp2 = or i32 %b, %a		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+entry:
+; CHECK: f3
+; CHECK: eor r0, r1, r0
+	%tmp2 = xor i32 %b, %a		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+entry:
+; CHECK: f4
+; CHECK: lsl
+	%tmp3 = shl i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %tmp3
+}
+
+define i32 @f5(i32 %a, i32 %b) {
+entry:
+; CHECK: f5
+; CHECK: asr
+	%tmp3 = ashr i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %tmp3
+}
diff --git a/final/test/CodeGen/ARM/bswap-inline-asm.ll b/final/test/CodeGen/ARM/bswap-inline-asm.ll
new file mode 100644
index 00000000000..472213d5f85
--- /dev/null
+++ b/final/test/CodeGen/ARM/bswap-inline-asm.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 | FileCheck %s
+
+define i32 @t1(i32 %x) nounwind {
+; CHECK: t1:
+; CHECK-NOT: InlineAsm
+; CHECK: rev
+  %asmtmp = tail call i32 asm "rev $0, $1\0A", "=l,l"(i32 %x) nounwind
+  ret i32 %asmtmp
+}
diff --git a/final/test/CodeGen/ARM/bx_fold.ll b/final/test/CodeGen/ARM/bx_fold.ll
new file mode 100644
index 00000000000..09f1aae0a9f
--- /dev/null
+++ b/final/test/CodeGen/ARM/bx_fold.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=armv5t-apple-darwin | FileCheck %s
+
+define void @test(i32 %Ptr, i8* %L) {
+entry:
+	br label %bb1
+
+bb:		; preds = %bb1
+	%gep.upgrd.1 = zext i32 %indvar to i64		; <i64> [#uses=1]
+	%tmp7 = getelementptr i8* %L, i64 %gep.upgrd.1		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp7
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%i.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]
+	%tmp = tail call i32 (...)* @bar( )		; <i32> [#uses=1]
+	%tmp2 = add i32 %i.0, %tmp		; <i32> [#uses=1]
+	%Ptr_addr.0 = sub i32 %Ptr, %tmp2		; <i32> [#uses=0]
+	%tmp12 = icmp eq i32 %i.0, %Ptr		; <i1> [#uses=1]
+	%tmp12.not = xor i1 %tmp12, true		; <i1> [#uses=1]
+	%bothcond = and i1 %tmp12.not, false		; <i1> [#uses=1]
+	br i1 %bothcond, label %bb, label %bb18
+
+bb18:		; preds = %bb1
+; CHECK-NOT: bx
+; CHECK: ldmia sp!
+	ret void
+}
+
+declare i32 @bar(...)
diff --git a/final/test/CodeGen/ARM/call-tc.ll b/final/test/CodeGen/ARM/call-tc.ll
new file mode 100644
index 00000000000..a77aba037be
--- /dev/null
+++ b/final/test/CodeGen/ARM/call-tc.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKV6
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic -mattr=+vfp2 -arm-tail-calls | FileCheck %s -check-prefix=CHECKELF
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-tail-calls | FileCheck %s -check-prefix=CHECKT2
+
+@t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
+
+declare void @g(i32, i32, i32, i32)
+
+define void @t1() {
+; CHECKELF: t1:
+; CHECKELF: bl g(PLT)
+        call void @g( i32 1, i32 2, i32 3, i32 4 )
+        ret void
+}
+
+define void @t2() {
+; CHECKV6: t2:
+; CHECKV6: bx r0 @ TAILCALL
+        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
+        %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
+        ret void
+}
+
+define void @t3() {
+; CHECKV6: t3:
+; CHECKV6: b _t2  @ TAILCALL
+; CHECKELF: t3:
+; CHECKELF: b t2(PLT) @ TAILCALL
+        tail call void @t2( )            ; <i32> [#uses=0]
+        ret void
+}
+
+; Sibcall optimization of expanded libcalls. rdar://8707777
+define double @t4(double %a) nounwind readonly ssp {
+entry:
+; CHECKV6: t4:
+; CHECKV6: b _sin @ TAILCALL
+; CHECKELF: t4:
+; CHECKELF: b sin(PLT) @ TAILCALL
+  %0 = tail call double @sin(double %a) nounwind readonly ; <double> [#uses=1]
+  ret double %0
+}
+
+define float @t5(float %a) nounwind readonly ssp {
+entry:
+; CHECKV6: t5:
+; CHECKV6: b _sinf @ TAILCALL
+; CHECKELF: t5:
+; CHECKELF: b sinf(PLT) @ TAILCALL
+  %0 = tail call float @sinf(float %a) nounwind readonly ; <float> [#uses=1]
+  ret float %0
+}
+
+declare float @sinf(float) nounwind readonly
+
+declare double @sin(double) nounwind readonly
+
+define i32 @t6(i32 %a, i32 %b) nounwind readnone {
+entry:
+; CHECKV6: t6:
+; CHECKV6: b ___divsi3 @ TAILCALL
+; CHECKELF: t6:
+; CHECKELF: b __aeabi_idiv(PLT) @ TAILCALL
+  %0 = sdiv i32 %a, %b
+  ret i32 %0
+}
+
+; Make sure the tail call instruction isn't deleted
+; rdar://8309338
+declare void @foo() nounwind
+
+define void @t7() nounwind {
+entry:
+; CHECKT2: t7:
+; CHECKT2: blxeq _foo
+; CHECKT2-NEXT: pop.w
+; CHECKT2-NEXT: b.w _foo
+  br i1 undef, label %bb, label %bb1.lr.ph
+
+bb1.lr.ph:
+  tail call void @foo() nounwind
+  unreachable
+
+bb:
+  tail call void @foo() nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/call.ll b/final/test/CodeGen/ARM/call.ll
new file mode 100644
index 00000000000..c020b6fbd24
--- /dev/null
+++ b/final/test/CodeGen/ARM/call.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=CHECKV4
+; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
+; RUN:   -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF
+
+@t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
+
+declare void @g(i32, i32, i32, i32)
+
+define void @f() {
+; CHECKELF: PLT
+        call void @g( i32 1, i32 2, i32 3, i32 4 )
+        ret void
+}
+
+define void @g.upgrd.1() {
+; CHECKV4: mov lr, pc
+; CHECKV5: blx
+        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
+        %tmp.upgrd.2 = call i32 %tmp( )            ; <i32> [#uses=0]
+        ret void
+}
+
+define i32* @m_231b(i32, i32, i32*, i32*, i32*) nounwind {
+; CHECKV4: m_231b
+; CHECKV4: bx r{{.*}}
+BB0:
+  %5 = inttoptr i32 %0 to i32*                    ; <i32*> [#uses=1]
+  %t35 = volatile load i32* %5                    ; <i32> [#uses=1]
+  %6 = inttoptr i32 %t35 to i32**                 ; <i32**> [#uses=1]
+  %7 = getelementptr i32** %6, i32 86             ; <i32**> [#uses=1]
+  %8 = load i32** %7                              ; <i32*> [#uses=1]
+  %9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; <i32* (i32, i32*, i32, i32*, i32*, i32*)*> [#uses=1]
+  %10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; <i32*> [#uses=1]
+  ret i32* %10
+}
diff --git a/final/test/CodeGen/ARM/call_nolink.ll b/final/test/CodeGen/ARM/call_nolink.ll
new file mode 100644
index 00000000000..efe29d857d2
--- /dev/null
+++ b/final/test/CodeGen/ARM/call_nolink.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN:   not grep {bx lr}
+
+	%struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
+@r = external global [14 x i32]		; <[14 x i32]*> [#uses=4]
+@isa = external global [13 x %struct.anon]		; <[13 x %struct.anon]*> [#uses=1]
+@pgm = external global [2 x { i32, [3 x i32] }]		; <[2 x { i32, [3 x i32] }]*> [#uses=4]
+@numi = external global i32		; <i32*> [#uses=1]
+@counter = external global [2 x i32]		; <[2 x i32]*> [#uses=1]
+
+
+define void @main_bb_2E_i_bb205_2E_i_2E_i_bb115_2E_i_2E_i() {
+newFuncRoot:
+	br label %bb115.i.i
+
+bb115.i.i.bb170.i.i_crit_edge.exitStub:		; preds = %bb115.i.i
+	ret void
+
+bb115.i.i.bb115.i.i_crit_edge:		; preds = %bb115.i.i
+	br label %bb115.i.i
+
+bb115.i.i:		; preds = %bb115.i.i.bb115.i.i_crit_edge, %newFuncRoot
+	%i_addr.3210.0.i.i = phi i32 [ %tmp166.i.i, %bb115.i.i.bb115.i.i_crit_edge ], [ 0, %newFuncRoot ]		; <i32> [#uses=7]
+	%tmp124.i.i = getelementptr [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 0		; <i32*> [#uses=1]
+	%tmp125.i.i = load i32* %tmp124.i.i		; <i32> [#uses=1]
+	%tmp126.i.i = getelementptr [14 x i32]* @r, i32 0, i32 %tmp125.i.i		; <i32*> [#uses=1]
+	%tmp127.i.i = load i32* %tmp126.i.i		; <i32> [#uses=1]
+	%tmp131.i.i = getelementptr [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 1		; <i32*> [#uses=1]
+	%tmp132.i.i = load i32* %tmp131.i.i		; <i32> [#uses=1]
+	%tmp133.i.i = getelementptr [14 x i32]* @r, i32 0, i32 %tmp132.i.i		; <i32*> [#uses=1]
+	%tmp134.i.i = load i32* %tmp133.i.i		; <i32> [#uses=1]
+	%tmp138.i.i = getelementptr [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 1, i32 2		; <i32*> [#uses=1]
+	%tmp139.i.i = load i32* %tmp138.i.i		; <i32> [#uses=1]
+	%tmp140.i.i = getelementptr [14 x i32]* @r, i32 0, i32 %tmp139.i.i		; <i32*> [#uses=1]
+	%tmp141.i.i = load i32* %tmp140.i.i		; <i32> [#uses=1]
+	%tmp143.i.i = add i32 %i_addr.3210.0.i.i, 12		; <i32> [#uses=1]
+	%tmp146.i.i = getelementptr [2 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %i_addr.3210.0.i.i, i32 0		; <i32*> [#uses=1]
+	%tmp147.i.i = load i32* %tmp146.i.i		; <i32> [#uses=1]
+	%tmp149.i.i = getelementptr [13 x %struct.anon]* @isa, i32 0, i32 %tmp147.i.i, i32 0		; <i32 (i32, i32, i32)**> [#uses=1]
+	%tmp150.i.i = load i32 (i32, i32, i32)** %tmp149.i.i		; <i32 (i32, i32, i32)*> [#uses=1]
+	%tmp154.i.i = tail call i32 %tmp150.i.i( i32 %tmp127.i.i, i32 %tmp134.i.i, i32 %tmp141.i.i )		; <i32> [#uses=1]
+	%tmp155.i.i = getelementptr [14 x i32]* @r, i32 0, i32 %tmp143.i.i		; <i32*> [#uses=1]
+	store i32 %tmp154.i.i, i32* %tmp155.i.i
+	%tmp159.i.i = getelementptr [2 x i32]* @counter, i32 0, i32 %i_addr.3210.0.i.i		; <i32*> [#uses=2]
+	%tmp160.i.i = load i32* %tmp159.i.i		; <i32> [#uses=1]
+	%tmp161.i.i = add i32 %tmp160.i.i, 1		; <i32> [#uses=1]
+	store i32 %tmp161.i.i, i32* %tmp159.i.i
+	%tmp166.i.i = add i32 %i_addr.3210.0.i.i, 1		; <i32> [#uses=2]
+	%tmp168.i.i = load i32* @numi		; <i32> [#uses=1]
+	icmp slt i32 %tmp166.i.i, %tmp168.i.i		; <i1>:0 [#uses=1]
+	br i1 %0, label %bb115.i.i.bb115.i.i_crit_edge, label %bb115.i.i.bb170.i.i_crit_edge.exitStub
+}
diff --git a/final/test/CodeGen/ARM/carry.ll b/final/test/CodeGen/ARM/carry.ll
new file mode 100644
index 00000000000..a6a7ed6af18
--- /dev/null
+++ b/final/test/CodeGen/ARM/carry.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: subs r
+; CHECK: sbc r
+entry:
+	%tmp = sub i64 %a, %b
+	ret i64 %tmp
+}
+
+define i64 @f2(i64 %a, i64 %b) {
+; CHECK: f2:
+; CHECK: adc r
+; CHECK: subs r
+; CHECK: sbc r
+entry:
+        %tmp1 = shl i64 %a, 1
+	%tmp2 = sub i64 %tmp1, %b
+	ret i64 %tmp2
+}
diff --git a/final/test/CodeGen/ARM/clz.ll b/final/test/CodeGen/ARM/clz.ll
new file mode 100644
index 00000000000..e381e002981
--- /dev/null
+++ b/final/test/CodeGen/ARM/clz.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s
+
+declare i32 @llvm.ctlz.i32(i32)
+
+define i32 @test(i32 %x) {
+; CHECK: test
+; CHECK: clz r0, r0
+        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x )
+        ret i32 %tmp.1
+}
diff --git a/final/test/CodeGen/ARM/code-placement.ll b/final/test/CodeGen/ARM/code-placement.ll
new file mode 100644
index 00000000000..036598fc060
--- /dev/null
+++ b/final/test/CodeGen/ARM/code-placement.ll
@@ -0,0 +1,79 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; PHI elimination shouldn't break backedge.
+; rdar://8263994
+
+%struct.list_data_s = type { i16, i16 }
+%struct.list_head = type { %struct.list_head*, %struct.list_data_s* }
+
+define arm_apcscc %struct.list_head* @t1(%struct.list_head* %list) nounwind {
+entry:
+; CHECK: t1:
+  %0 = icmp eq %struct.list_head* %list, null
+  br i1 %0, label %bb2, label %bb
+
+bb:
+; CHECK: LBB0_2:
+; CHECK: bne LBB0_2
+; CHECK-NOT: b LBB0_2
+; CHECK: bx lr
+  %list_addr.05 = phi %struct.list_head* [ %2, %bb ], [ %list, %entry ]
+  %next.04 = phi %struct.list_head* [ %list_addr.05, %bb ], [ null, %entry ]
+  %1 = getelementptr inbounds %struct.list_head* %list_addr.05, i32 0, i32 0
+  %2 = load %struct.list_head** %1, align 4
+  store %struct.list_head* %next.04, %struct.list_head** %1, align 4
+  %3 = icmp eq %struct.list_head* %2, null
+  br i1 %3, label %bb2, label %bb
+
+bb2:
+  %next.0.lcssa = phi %struct.list_head* [ null, %entry ], [ %list_addr.05, %bb ]
+  ret %struct.list_head* %next.0.lcssa
+}
+
+; Optimize loop entry, eliminate intra loop branches
+; rdar://8117827
+define i32 @t2(i32 %passes, i32* nocapture %src, i32 %size) nounwind readonly {
+entry:
+; CHECK: t2:
+; CHECK: beq LBB1_[[RET:.]]
+  %0 = icmp eq i32 %passes, 0                     ; <i1> [#uses=1]
+  br i1 %0, label %bb5, label %bb.nph15
+
+; CHECK: LBB1_[[PREHDR:.]]: @ %bb2.preheader
+bb1:                                              ; preds = %bb2.preheader, %bb1
+; CHECK: LBB1_[[BB1:.]]: @ %bb1
+; CHECK: bne LBB1_[[BB1]]
+  %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %bb2.preheader ] ; <i32> [#uses=2]
+  %sum.08 = phi i32 [ %2, %bb1 ], [ %sum.110, %bb2.preheader ] ; <i32> [#uses=1]
+  %tmp17 = sub i32 %i.07, %indvar                 ; <i32> [#uses=1]
+  %scevgep = getelementptr i32* %src, i32 %tmp17  ; <i32*> [#uses=1]
+  %1 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %2 = add nsw i32 %1, %sum.08                    ; <i32> [#uses=2]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %size     ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb3, label %bb1
+
+bb3:                                              ; preds = %bb1, %bb2.preheader
+; CHECK: LBB1_[[BB3:.]]: @ %bb3
+; CHECK: bne LBB1_[[PREHDR]]
+; CHECK-NOT: b LBB1_
+  %sum.0.lcssa = phi i32 [ %sum.110, %bb2.preheader ], [ %2, %bb1 ] ; <i32> [#uses=2]
+  %3 = add i32 %pass.011, 1                       ; <i32> [#uses=2]
+  %exitcond18 = icmp eq i32 %3, %passes           ; <i1> [#uses=1]
+  br i1 %exitcond18, label %bb5, label %bb2.preheader
+
+bb.nph15:                                         ; preds = %entry
+  %i.07 = add i32 %size, -1                       ; <i32> [#uses=2]
+  %4 = icmp sgt i32 %i.07, -1                     ; <i1> [#uses=1]
+  br label %bb2.preheader
+
+bb2.preheader:                                    ; preds = %bb3, %bb.nph15
+  %pass.011 = phi i32 [ 0, %bb.nph15 ], [ %3, %bb3 ] ; <i32> [#uses=1]
+  %sum.110 = phi i32 [ 0, %bb.nph15 ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=2]
+  br i1 %4, label %bb1, label %bb3
+
+; CHECK: LBB1_[[RET]]: @ %bb5
+; CHECK: ldmia sp!
+bb5:                                              ; preds = %bb3, %entry
+  %sum.1.lcssa = phi i32 [ 0, %entry ], [ %sum.0.lcssa, %bb3 ] ; <i32> [#uses=1]
+  ret i32 %sum.1.lcssa
+}
diff --git a/final/test/CodeGen/ARM/compare-call.ll b/final/test/CodeGen/ARM/compare-call.ll
new file mode 100644
index 00000000000..fac2bc5e432
--- /dev/null
+++ b/final/test/CodeGen/ARM/compare-call.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | \
+; RUN:   grep vcmpe.f32
+
+define void @test3(float* %glob, i32 %X) {
+entry:
+        %tmp = load float* %glob                ; <float> [#uses=1]
+        %tmp2 = getelementptr float* %glob, i32 2               ; <float*> [#uses=1]
+        %tmp3 = load float* %tmp2               ; <float> [#uses=1]
+        %tmp.upgrd.1 = fcmp ogt float %tmp, %tmp3               ; <i1> [#uses=1]
+        br i1 %tmp.upgrd.1, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:              ; preds = %entry
+        %tmp.upgrd.2 = tail call i32 (...)* @bar( )             ; <i32> [#uses=0]
+        ret void
+
+UnifiedReturnBlock:             ; preds = %entry
+        ret void
+}
+
+declare i32 @bar(...)
diff --git a/final/test/CodeGen/ARM/constants.ll b/final/test/CodeGen/ARM/constants.ll
new file mode 100644
index 00000000000..542cf02f2a9
--- /dev/null
+++ b/final/test/CodeGen/ARM/constants.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @f1() {
+; CHECK: f1
+; CHECK: mov r0, #0
+        ret i32 0
+}
+
+define i32 @f2() {
+; CHECK: f2
+; CHECK: mov r0, #255
+        ret i32 255
+}
+
+define i32 @f3() {
+; CHECK: f3
+; CHECK: mov r0, #1, 24
+        ret i32 256
+}
+
+define i32 @f4() {
+; CHECK: f4
+; CHECK: orr{{.*}}#1, 24
+        ret i32 257
+}
+
+define i32 @f5() {
+; CHECK: f5
+; CHECK: mov r0, #255, 2
+        ret i32 -1073741761
+}
+
+define i32 @f6() {
+; CHECK: f6
+; CHECK: mov r0, #63, 28
+        ret i32 1008
+}
+
+define void @f7(i32 %a) {
+; CHECK: f7
+; CHECK: cmp r0, #1, 16
+        %b = icmp ugt i32 %a, 65536
+        br i1 %b, label %r, label %r
+r:
+        ret void
+}
diff --git a/final/test/CodeGen/ARM/crash-O0.ll b/final/test/CodeGen/ARM/crash-O0.ll
new file mode 100644
index 00000000000..8bce4e0097f
--- /dev/null
+++ b/final/test/CodeGen/ARM/crash-O0.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -O0 -relocation-model=pic -disable-fp-elim
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "armv6-apple-darwin10"
+
+%struct0 = type { i32, i32 }
+
+; This function would crash RegAllocFast because it tried to spill %CPSR.
+define arm_apcscc void @clobber_cc() nounwind noinline ssp {
+entry:
+  %asmtmp = call %struct0 asm sideeffect "...", "=&r,=&r,r,Ir,r,~{cc},~{memory}"(i32* undef, i32 undef, i32 1) nounwind ; <%0> [#uses=0]
+  unreachable
+}
+
+@.str523 = private constant [256 x i8] c"<Unknown>\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 4 ; <[256 x i8]*> [#uses=1]
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+; This function uses the scavenger for an ADDri instruction.
+; ARMBaseRegisterInfo::estimateRSStackSizeLimit must return a 255 limit.
+define arm_apcscc void @scavence_ADDri() nounwind {
+entry:
+  %letter = alloca i8                             ; <i8*> [#uses=0]
+  %prodvers = alloca [256 x i8]                   ; <[256 x i8]*> [#uses=1]
+  %buildver = alloca [256 x i8]                   ; <[256 x i8]*> [#uses=0]
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* getelementptr inbounds ([256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false)
+  %prodvers2 = bitcast [256 x i8]* %prodvers to i8* ; <i8*> [#uses=1]
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %prodvers2, i8* getelementptr inbounds ([256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false)
+  unreachable
+}
diff --git a/final/test/CodeGen/ARM/crash.ll b/final/test/CodeGen/ARM/crash.ll
new file mode 100644
index 00000000000..4b6876df4a0
--- /dev/null
+++ b/final/test/CodeGen/ARM/crash.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+
+; <rdar://problem/8529919>
+%struct.foo = type { i32, i32 }
+
+define void @func() nounwind {
+entry:
+  %tmp = load i32* undef, align 4
+  br label %bb1
+
+bb1:
+  %tmp1 = and i32 %tmp, 16
+  %tmp2 = icmp eq i32 %tmp1, 0
+  %invok.1.i = select i1 %tmp2, i32 undef, i32 0
+  %tmp119 = add i32 %invok.1.i, 0
+  br i1 undef, label %bb2, label %exit
+
+bb2:
+  %tmp120 = add i32 %tmp119, 0
+  %scevgep810.i = getelementptr %struct.foo* null, i32 %tmp120, i32 1
+  store i32 undef, i32* %scevgep810.i, align 4
+  br i1 undef, label %bb2, label %bb3
+
+bb3:
+  br i1 %tmp2, label %bb2, label %bb2
+
+exit:
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/cse-libcalls.ll b/final/test/CodeGen/ARM/cse-libcalls.ll
new file mode 100644
index 00000000000..0dcf9ddc0bb
--- /dev/null
+++ b/final/test/CodeGen/ARM/cse-libcalls.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=arm | grep {bl.\*__ltdf} | count 1
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+; Without CSE of libcalls, there are two calls in the output instead of one.
+
+define i32 @u_f_nonbon(double %lambda) nounwind {
+entry:
+	%tmp19.i.i = load double* null, align 4		; <double> [#uses=2]
+	%tmp6.i = fcmp olt double %tmp19.i.i, 1.000000e+00		; <i1> [#uses=1]
+	%dielectric.0.i = select i1 %tmp6.i, double 1.000000e+00, double %tmp19.i.i		; <double> [#uses=1]
+	%tmp10.i4 = fdiv double 0x4074C2D71F36262D, %dielectric.0.i		; <double> [#uses=1]
+	br i1 false, label %bb28.i, label %bb508.i
+
+bb28.i:		; preds = %bb28.i, %entry
+	br i1 false, label %bb502.loopexit.i, label %bb28.i
+
+bb.nph53.i:		; preds = %bb502.loopexit.i
+	%tmp354.i = fsub double -0.000000e+00, %tmp10.i4		; <double> [#uses=0]
+	br label %bb244.i
+
+bb244.i:		; preds = %bb244.i, %bb.nph53.i
+	br label %bb244.i
+
+bb502.loopexit.i:		; preds = %bb28.i
+	br i1 false, label %bb.nph53.i, label %bb508.i
+
+bb508.i:		; preds = %bb502.loopexit.i, %entry
+	ret i32 1
+}
diff --git a/final/test/CodeGen/ARM/ctors_dtors.ll b/final/test/CodeGen/ARM/ctors_dtors.ll
new file mode 100644
index 00000000000..fb94626ab7d
--- /dev/null
+++ b/final/test/CodeGen/ARM/ctors_dtors.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin  | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=arm-linux-gnu     | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=GNUEABI
+
+; DARWIN: .section	__DATA,__mod_init_func,mod_init_funcs
+; DARWIN: .section	__DATA,__mod_term_func,mod_term_funcs
+
+; ELF: .section .ctors,"aw",%progbits
+; ELF: .section .dtors,"aw",%progbits
+
+; GNUEABI: .section .init_array,"aw",%init_array
+; GNUEABI: .section .fini_array,"aw",%fini_array
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @__mf_init } ]                ; <[1 x { i32, void ()* }]*> [#uses=0]
+@llvm.global_dtors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @__mf_fini } ]                ; <[1 x { i32, void ()* }]*> [#uses=0]
+
+define void @__mf_init() {
+entry:
+        ret void
+}
+
+define void @__mf_fini() {
+entry:
+        ret void
+}
diff --git a/final/test/CodeGen/ARM/ctz.ll b/final/test/CodeGen/ARM/ctz.ll
new file mode 100644
index 00000000000..1d2ced37b03
--- /dev/null
+++ b/final/test/CodeGen/ARM/ctz.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+
+declare i32 @llvm.cttz.i32(i32)
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: rbit
+; CHECK: clz
+  %tmp = call i32 @llvm.cttz.i32( i32 %a )
+  ret i32 %tmp
+}
diff --git a/final/test/CodeGen/ARM/dg.exp b/final/test/CodeGen/ARM/dg.exp
new file mode 100644
index 00000000000..3ff359aab39
--- /dev/null
+++ b/final/test/CodeGen/ARM/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target ARM] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/ARM/div.ll b/final/test/CodeGen/ARM/div.ll
new file mode 100644
index 00000000000..3d29e05a0cc
--- /dev/null
+++ b/final/test/CodeGen/ARM/div.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=CHECK-ARM
+
+define i32 @f1(i32 %a, i32 %b) {
+entry:
+; CHECK-ARM: f1
+; CHECK-ARM: __divsi3
+        %tmp1 = sdiv i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+entry:
+; CHECK-ARM: f2
+; CHECK-ARM: __udivsi3
+        %tmp1 = udiv i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+entry:
+; CHECK-ARM: f3
+; CHECK-ARM: __modsi3
+        %tmp1 = srem i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+entry:
+; CHECK-ARM: f4
+; CHECK-ARM: __umodsi3
+        %tmp1 = urem i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
diff --git a/final/test/CodeGen/ARM/dyn-stackalloc.ll b/final/test/CodeGen/ARM/dyn-stackalloc.ll
new file mode 100644
index 00000000000..92e2d136af6
--- /dev/null
+++ b/final/test/CodeGen/ARM/dyn-stackalloc.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=arm
+
+	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
+	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+
+define void @t1(%struct.state* %v) {
+	%tmp6 = load i32* null
+	%tmp8 = alloca float, i32 %tmp6
+	store i32 1, i32* null
+	br i1 false, label %bb123.preheader, label %return
+
+bb123.preheader:
+	br i1 false, label %bb43, label %return
+
+bb43:
+	call fastcc void @f1( float* %tmp8, float* null, i32 0 )
+	%tmp70 = load i32* null
+	%tmp85 = getelementptr float* %tmp8, i32 0
+	call fastcc void @f2( float* null, float* null, float* %tmp85, i32 %tmp70 )
+	ret void
+
+return:
+	ret void
+}
+
+declare fastcc void @f1(float*, float*, i32)
+
+declare fastcc void @f2(float*, float*, float*, i32)
+
+	%struct.comment = type { i8**, i32*, i32, i8* }
+@str215 = external global [2 x i8]
+
+define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
+	%tmp1 = call i32 @strlen( i8* %tag )
+	%tmp3 = call i32 @strlen( i8* %contents )
+	%tmp4 = add i32 %tmp1, 2
+	%tmp5 = add i32 %tmp4, %tmp3
+	%tmp6 = alloca i8, i32 %tmp5
+	%tmp9 = call i8* @strcpy( i8* %tmp6, i8* %tag )
+	%tmp6.len = call i32 @strlen( i8* %tmp6 )
+	%tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len
+	call void @llvm.memcpy.i32( i8* %tmp6.indexed, i8* getelementptr ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1 )
+	%tmp15 = call i8* @strcat( i8* %tmp6, i8* %contents )
+	call fastcc void @comment_add( %struct.comment* %vc, i8* %tmp6 )
+	ret void
+}
+
+declare i32 @strlen(i8*)
+
+declare i8* @strcat(i8*, i8*)
+
+declare fastcc void @comment_add(%struct.comment*, i8*)
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare i8* @strcpy(i8*, i8*)
diff --git a/final/test/CodeGen/ARM/extloadi1.ll b/final/test/CodeGen/ARM/extloadi1.ll
new file mode 100644
index 00000000000..dc45ce705f4
--- /dev/null
+++ b/final/test/CodeGen/ARM/extloadi1.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm
+@handler_installed.6144.b = external global i1          ; <i1*> [#uses=1]
+
+define void @__mf_sigusr1_respond() {
+entry:
+        %tmp8.b = load i1* @handler_installed.6144.b            ; <i1> [#uses=1]
+        br i1 false, label %cond_true7, label %cond_next
+
+cond_next:              ; preds = %entry
+        br i1 %tmp8.b, label %bb, label %cond_next3
+
+cond_next3:             ; preds = %cond_next
+        ret void
+
+bb:             ; preds = %cond_next
+        ret void
+
+cond_true7:             ; preds = %entry
+        ret void
+}
diff --git a/final/test/CodeGen/ARM/fabss.ll b/final/test/CodeGen/ARM/fabss.ll
new file mode 100644
index 00000000000..f03282bdab7
--- /dev/null
+++ b/final/test/CodeGen/ARM/fabss.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test(float %a, float %b) {
+entry:
+        %dum = fadd float %a, %b
+	%0 = tail call float @fabsf(float %dum)
+        %dum1 = fadd float %0, %b
+	ret float %dum1
+}
+
+declare float @fabsf(float)
+
+; VFP2: test:
+; VFP2: 	vabs.f32	s1, s1
+
+; NFP1: test:
+; NFP1: 	vabs.f32	d1, d1
+; NFP0: test:
+; NFP0: 	vabs.f32	s1, s1
+
+; CORTEXA8: test:
+; CORTEXA8: 	vabs.f32	d1, d1
+; CORTEXA9: test:
+; CORTEXA9: 	vabs.f32	s1, s1
diff --git a/final/test/CodeGen/ARM/fadds.ll b/final/test/CodeGen/ARM/fadds.ll
new file mode 100644
index 00000000000..749690e98d0
--- /dev/null
+++ b/final/test/CodeGen/ARM/fadds.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fadd float %a, %b
+	ret float %0
+}
+
+; VFP2: test:
+; VFP2: 	vadd.f32	s0, s1, s0
+
+; NFP1: test:
+; NFP1: 	vadd.f32	d0, d1, d0
+; NFP0: test:
+; NFP0: 	vadd.f32	s0, s1, s0
+
+; CORTEXA8: test:
+; CORTEXA8: 	vadd.f32	d0, d1, d0
+; CORTEXA9: test:
+; CORTEXA9: 	vadd.f32	s0, s1, s0
diff --git a/final/test/CodeGen/ARM/fast-isel-crash.ll b/final/test/CodeGen/ARM/fast-isel-crash.ll
new file mode 100644
index 00000000000..370c70f174f
--- /dev/null
+++ b/final/test/CodeGen/ARM/fast-isel-crash.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -O0 -mtriple=thumbv7-apple-darwin
+
+%union.anon = type { <16 x i32> }
+
+@__md0 = external global [137 x i8]
+
+define internal void @stretch(<4 x i8> addrspace(1)* %src, <4 x i8> addrspace(1)* %dst, i32 %width, i32 %height, i32 %iLS, i32 %oLS, <2 x float> %c, <4 x float> %param) nounwind {
+entry:
+  ret void
+}
+
+define internal i32 @_Z13get_global_idj(i32 %dim) nounwind ssp {
+entry:
+  ret i32 undef
+}
+
+define void @wrap(i8 addrspace(1)* addrspace(1)* %arglist, i32 addrspace(1)* %gtid) nounwind ssp {
+entry:
+  call void @stretch(<4 x i8> addrspace(1)* undef, <4 x i8> addrspace(1)* undef, i32 undef, i32 undef, i32 undef, i32 undef, <2 x float> undef, <4 x float> undef)
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/fast-isel-static.ll b/final/test/CodeGen/ARM/fast-isel-static.ll
new file mode 100644
index 00000000000..8f58480be16
--- /dev/null
+++ b/final/test/CodeGen/ARM/fast-isel-static.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -relocation-model=static -arm-long-calls | FileCheck -check-prefix=LONG %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O0 -relocation-model=static | FileCheck -check-prefix=NORM %s
+
+define void @myadd(float* %sum, float* %addend) nounwind {
+entry:
+  %sum.addr = alloca float*, align 4
+  %addend.addr = alloca float*, align 4
+  store float* %sum, float** %sum.addr, align 4
+  store float* %addend, float** %addend.addr, align 4
+  %tmp = load float** %sum.addr, align 4
+  %tmp1 = load float* %tmp
+  %tmp2 = load float** %addend.addr, align 4
+  %tmp3 = load float* %tmp2
+  %add = fadd float %tmp1, %tmp3
+  %tmp4 = load float** %sum.addr, align 4
+  store float %add, float* %tmp4
+  ret void
+}
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+  %ztot = alloca float, align 4
+  %z = alloca float, align 4
+  store float 0.000000e+00, float* %ztot, align 4
+  store float 1.000000e+00, float* %z, align 4
+; CHECK-LONG: blx     r2
+; CHECK-NORM: blx     _myadd
+  call void @myadd(float* %ztot, float* %z)
+  ret i32 0
+}
diff --git a/final/test/CodeGen/ARM/fast-isel.ll b/final/test/CodeGen/ARM/fast-isel.ll
new file mode 100644
index 00000000000..dd806ec6f1a
--- /dev/null
+++ b/final/test/CodeGen/ARM/fast-isel.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-darwin
+; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-darwin
+
+; Very basic fast-isel functionality.
+
+define i32 @add(i32 %a, i32 %b) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca i32, align 4
+  store i32 %a, i32* %a.addr
+  store i32 %b, i32* %b.addr
+  %tmp = load i32* %a.addr
+  %tmp1 = load i32* %b.addr
+  %add = add nsw i32 %tmp, %tmp1
+  ret i32 %add
+}
\ No newline at end of file
diff --git a/final/test/CodeGen/ARM/fcopysign.ll b/final/test/CodeGen/ARM/fcopysign.ll
new file mode 100644
index 00000000000..9e94e3976ce
--- /dev/null
+++ b/final/test/CodeGen/ARM/fcopysign.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
+; RUN: llc < %s -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
+
+; rdar://8984306
+define float @test1(float %x, float %y) nounwind {
+entry:
+; SOFT: test1:
+; SOFT: lsr r1, r1, #31
+; SOFT: bfi r0, r1, #31, #1
+
+; HARD: test1:
+; HARD: vmov.i32 [[REG1:(d[0-9]+)]], #0x80000000
+; HARD: vbsl [[REG1]], d2, d0
+  %0 = tail call float @copysignf(float %x, float %y) nounwind
+  ret float %0
+}
+
+define double @test2(double %x, double %y) nounwind {
+entry:
+; SOFT: test2:
+; SOFT: lsr r2, r3, #31
+; SOFT: bfi r1, r2, #31, #1
+
+; HARD: test2:
+; HARD: vmov.i32 [[REG2:(d[0-9]+)]], #0x80000000
+; HARD: vshl.i64 [[REG2]], [[REG2]], #32
+; HARD: vbsl [[REG2]], d1, d0
+  %0 = tail call double @copysign(double %x, double %y) nounwind
+  ret double %0
+}
+
+define double @test3(double %x, double %y, double %z) nounwind {
+entry:
+; SOFT: test3:
+; SOFT: vmov.i32 [[REG3:(d[0-9]+)]], #0x80000000
+; SOFT: vshl.i64 [[REG3]], [[REG3]], #32
+; SOFT: vbsl [[REG3]],
+  %0 = fmul double %x, %y
+  %1 = tail call double @copysign(double %0, double %z) nounwind
+  ret double %1
+}
+
+; rdar://9059537
+define i32 @test4() ssp {
+entry:
+; SOFT: test4:
+; SOFT: vcvt.f32.f64 s0, 
+; SOFT: vmov.i32 [[REG4:(d[0-9]+)]], #0x80000000
+; SOFT: vbic [[REG5:(d[0-9]+)]], d0, [[REG4]]
+; SOFT: vorr d0, [[REG4]], [[REG5]]
+  %call80 = tail call double @copysign(double 1.000000e+00, double undef)
+  %conv81 = fptrunc double %call80 to float
+  %tmp88 = bitcast float %conv81 to i32
+  ret i32 %tmp88
+}
+
+declare double @copysign(double, double) nounwind
+declare float @copysignf(float, float) nounwind
diff --git a/final/test/CodeGen/ARM/fdivs.ll b/final/test/CodeGen/ARM/fdivs.ll
new file mode 100644
index 00000000000..0c314957929
--- /dev/null
+++ b/final/test/CodeGen/ARM/fdivs.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fdiv float %a, %b
+	ret float %0
+}
+
+; VFP2: test:
+; VFP2: 	vdiv.f32	s0, s1, s0
+
+; NFP1: test:
+; NFP1: 	vdiv.f32	s0, s1, s0
+; NFP0: test:
+; NFP0: 	vdiv.f32	s0, s1, s0
+
+; CORTEXA8: test:
+; CORTEXA8: 	vdiv.f32	s0, s1, s0
+; CORTEXA9: test:
+; CORTEXA9: 	vdiv.f32	s0, s1, s0
diff --git a/final/test/CodeGen/ARM/fixunsdfdi.ll b/final/test/CodeGen/ARM/fixunsdfdi.ll
new file mode 100644
index 00000000000..6db2385a63e
--- /dev/null
+++ b/final/test/CodeGen/ARM/fixunsdfdi.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep vstr.64
+
+define hidden i64 @__fixunsdfdi(double %x) nounwind readnone {
+entry:
+	%x14 = bitcast double %x to i64		; <i64> [#uses=1]
+	br i1 true, label %bb3, label %bb10
+
+bb3:		; preds = %entry
+	br i1 true, label %bb5, label %bb7
+
+bb5:		; preds = %bb3
+	%u.in.mask = and i64 %x14, -4294967296		; <i64> [#uses=1]
+	%.ins = or i64 0, %u.in.mask		; <i64> [#uses=1]
+	%0 = bitcast i64 %.ins to double		; <double> [#uses=1]
+	%1 = fsub double %x, %0		; <double> [#uses=1]
+	%2 = fptosi double %1 to i32		; <i32> [#uses=1]
+	%3 = add i32 %2, 0		; <i32> [#uses=1]
+	%4 = zext i32 %3 to i64		; <i64> [#uses=1]
+	%5 = shl i64 %4, 32		; <i64> [#uses=1]
+	%6 = or i64 %5, 0		; <i64> [#uses=1]
+	ret i64 %6
+
+bb7:		; preds = %bb3
+	ret i64 0
+
+bb10:		; preds = %entry
+	ret i64 0
+}
diff --git a/final/test/CodeGen/ARM/flag-crash.ll b/final/test/CodeGen/ARM/flag-crash.ll
new file mode 100644
index 00000000000..9c61944a215
--- /dev/null
+++ b/final/test/CodeGen/ARM/flag-crash.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -O3 -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 -relocation-model=pic
+; PR7484
+
+%struct.gs_matrix = type { float, i32, float, i32, float, i32, float, i32, float, i32, float, i32 }
+
+define fastcc void @func(%struct.gs_matrix* nocapture %pm1) nounwind {
+entry:
+  %0 = getelementptr inbounds %struct.gs_matrix* %pm1, i32 0, i32 6
+  %1 = load float* %0, align 4
+  %2 = getelementptr inbounds %struct.gs_matrix* %pm1, i32 0, i32 8
+  %3 = load float* %2, align 4
+  %4 = getelementptr inbounds %struct.gs_matrix* %pm1, i32 0, i32 2
+  %5 = bitcast float* %4 to i32*
+  %6 = load i32* %5, align 4
+  %7 = or i32 0, %6
+  %.mask = and i32 %7, 2147483647
+  %8 = icmp eq i32 %.mask, 0
+  br i1 %8, label %bb, label %bb11
+
+bb:
+  ret void
+
+bb11:
+  %9 = fmul float %1, undef
+  %10 = fmul float %3, undef
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/fmacs.ll b/final/test/CodeGen/ARM/fmacs.ll
new file mode 100644
index 00000000000..fb83ef626af
--- /dev/null
+++ b/final/test/CodeGen/ARM/fmacs.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+
+define float @t1(float %acc, float %a, float %b) {
+entry:
+; VFP2: t1:
+; VFP2: vmla.f32
+
+; NEON: t1:
+; NEON: vmla.f32
+
+; A8: t1:
+; A8: vmul.f32
+; A8: vadd.f32
+	%0 = fmul float %a, %b
+        %1 = fadd float %acc, %0
+	ret float %1
+}
+
+define double @t2(double %acc, double %a, double %b) {
+entry:
+; VFP2: t2:
+; VFP2: vmla.f64
+
+; NEON: t2:
+; NEON: vmla.f64
+
+; A8: t2:
+; A8: vmul.f64
+; A8: vadd.f64
+	%0 = fmul double %a, %b
+        %1 = fadd double %acc, %0
+	ret double %1
+}
+
+define float @t3(float %acc, float %a, float %b) {
+entry:
+; VFP2: t3:
+; VFP2: vmla.f32
+
+; NEON: t3:
+; NEON: vmla.f32
+
+; A8: t3:
+; A8: vmul.f32
+; A8: vadd.f32
+	%0 = fmul float %a, %b
+        %1 = fadd float %0, %acc
+	ret float %1
+}
diff --git a/final/test/CodeGen/ARM/fmdrr-fmrrd.ll b/final/test/CodeGen/ARM/fmdrr-fmrrd.ll
new file mode 100644
index 00000000000..eb72faf8d81
--- /dev/null
+++ b/final/test/CodeGen/ARM/fmdrr-fmrrd.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmdrr
+; RUN: llc < %s -march=arm -mattr=vfp2 | not grep fmrrd
+
+; naive codegen for this is:
+; _i:
+;        fmdrr d0, r0, r1
+;        fmrrd r0, r1, d0
+;        bx lr
+
+define i64 @test(double %X) {
+        %Y = bitcast double %X to i64
+        ret i64 %Y
+}
diff --git a/final/test/CodeGen/ARM/fmscs.ll b/final/test/CodeGen/ARM/fmscs.ll
new file mode 100644
index 00000000000..a182833a7a2
--- /dev/null
+++ b/final/test/CodeGen/ARM/fmscs.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+
+define float @t1(float %acc, float %a, float %b) {
+entry:
+; VFP2: t1:
+; VFP2: vnmls.f32
+
+; NEON: t1:
+; NEON: vnmls.f32
+
+; A8: t1:
+; A8: vmul.f32
+; A8: vsub.f32
+	%0 = fmul float %a, %b
+        %1 = fsub float %0, %acc
+	ret float %1
+}
+
+define double @t2(double %acc, double %a, double %b) {
+entry:
+; VFP2: t2:
+; VFP2: vnmls.f64
+
+; NEON: t2:
+; NEON: vnmls.f64
+
+; A8: t2:
+; A8: vmul.f64
+; A8: vsub.f64
+	%0 = fmul double %a, %b
+        %1 = fsub double %0, %acc
+	ret double %1
+}
diff --git a/final/test/CodeGen/ARM/fmuls.ll b/final/test/CodeGen/ARM/fmuls.ll
new file mode 100644
index 00000000000..ef4e3e52818
--- /dev/null
+++ b/final/test/CodeGen/ARM/fmuls.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fmul float %a, %b
+	ret float %0
+}
+
+; VFP2: test:
+; VFP2: 	vmul.f32	s0, s1, s0
+
+; NFP1: test:
+; NFP1: 	vmul.f32	d0, d1, d0
+; NFP0: test:
+; NFP0: 	vmul.f32	s0, s1, s0
+
+; CORTEXA8: test:
+; CORTEXA8: 	vmul.f32	d0, d1, d0
+; CORTEXA9: test:
+; CORTEXA9: 	vmul.f32	s0, s1, s0
diff --git a/final/test/CodeGen/ARM/fnegs.ll b/final/test/CodeGen/ARM/fnegs.ll
new file mode 100644
index 00000000000..418b59803d3
--- /dev/null
+++ b/final/test/CodeGen/ARM/fnegs.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9
+
+define float @test1(float* %a) {
+entry:
+	%0 = load float* %a, align 4		; <float> [#uses=2]
+	%1 = fsub float -0.000000e+00, %0		; <float> [#uses=2]
+	%2 = fpext float %1 to double		; <double> [#uses=1]
+	%3 = fcmp olt double %2, 1.234000e+00		; <i1> [#uses=1]
+	%retval = select i1 %3, float %1, float %0		; <float> [#uses=1]
+	ret float %retval
+}
+; VFP2: test1:
+; VFP2: 	vneg.f32	s{{.*}}, s{{.*}}
+
+; NFP1: test1:
+; NFP1: 	vneg.f32	d{{.*}}, d{{.*}}
+
+; NFP0: test1:
+; NFP0: 	vneg.f32	s{{.*}}, s{{.*}}
+
+; CORTEXA8: test1:
+; CORTEXA8: 	vneg.f32	d{{.*}}, d{{.*}}
+
+; CORTEXA9: test1:
+; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
+
+define float @test2(float* %a) {
+entry:
+	%0 = load float* %a, align 4		; <float> [#uses=2]
+	%1 = fmul float -1.000000e+00, %0		; <float> [#uses=2]
+	%2 = fpext float %1 to double		; <double> [#uses=1]
+	%3 = fcmp olt double %2, 1.234000e+00		; <i1> [#uses=1]
+	%retval = select i1 %3, float %1, float %0		; <float> [#uses=1]
+	ret float %retval
+}
+; VFP2: test2:
+; VFP2: 	vneg.f32	s{{.*}}, s{{.*}}
+
+; NFP1: test2:
+; NFP1: 	vneg.f32	d{{.*}}, d{{.*}}
+
+; NFP0: test2:
+; NFP0: 	vneg.f32	s{{.*}}, s{{.*}}
+
+; CORTEXA8: test2:
+; CORTEXA8: 	vneg.f32	d{{.*}}, d{{.*}}
+
+; CORTEXA9: test2:
+; CORTEXA9: 	vneg.f32	s{{.*}}, s{{.*}}
+
diff --git a/final/test/CodeGen/ARM/fnmacs.ll b/final/test/CodeGen/ARM/fnmacs.ll
new file mode 100644
index 00000000000..1763d46e06c
--- /dev/null
+++ b/final/test/CodeGen/ARM/fnmacs.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+
+define float @t1(float %acc, float %a, float %b) {
+entry:
+; VFP2: t1:
+; VFP2: vmls.f32
+
+; NEON: t1:
+; NEON: vmls.f32
+
+; A8: t1:
+; A8: vmul.f32
+; A8: vsub.f32
+	%0 = fmul float %a, %b
+        %1 = fsub float %acc, %0
+	ret float %1
+}
+
+define double @t2(double %acc, double %a, double %b) {
+entry:
+; VFP2: t2:
+; VFP2: vmls.f64
+
+; NEON: t2:
+; NEON: vmls.f64
+
+; A8: t2:
+; A8: vmul.f64
+; A8: vsub.f64
+	%0 = fmul double %a, %b
+        %1 = fsub double %acc, %0
+	ret double %1
+}
diff --git a/final/test/CodeGen/ARM/fnmscs.ll b/final/test/CodeGen/ARM/fnmscs.ll
new file mode 100644
index 00000000000..76c806761f7
--- /dev/null
+++ b/final/test/CodeGen/ARM/fnmscs.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+
+define float @t1(float %acc, float %a, float %b) nounwind {
+entry:
+; VFP2: t1:
+; VFP2: vnmla.f32
+
+; NEON: t1:
+; NEON: vnmla.f32
+
+; A8: t1:
+; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}}
+; A8: vsub.f32 d0, d0, d1
+	%0 = fmul float %a, %b
+	%1 = fsub float -0.0, %0
+        %2 = fsub float %1, %acc
+	ret float %2
+}
+
+define float @t2(float %acc, float %a, float %b) nounwind {
+entry:
+; VFP2: t2:
+; VFP2: vnmla.f32
+
+; NEON: t2:
+; NEON: vnmla.f32
+
+; A8: t2:
+; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}}
+; A8: vsub.f32 d0, d0, d1
+	%0 = fmul float %a, %b
+	%1 = fmul float -1.0, %0
+        %2 = fsub float %1, %acc
+	ret float %2
+}
+
+define double @t3(double %acc, double %a, double %b) nounwind {
+entry:
+; VFP2: t3:
+; VFP2: vnmla.f64
+
+; NEON: t3:
+; NEON: vnmla.f64
+
+; A8: t3:
+; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}}
+; A8: vsub.f64 d16, d16, d17
+	%0 = fmul double %a, %b
+	%1 = fsub double -0.0, %0
+        %2 = fsub double %1, %acc
+	ret double %2
+}
+
+define double @t4(double %acc, double %a, double %b) nounwind {
+entry:
+; VFP2: t4:
+; VFP2: vnmla.f64
+
+; NEON: t4:
+; NEON: vnmla.f64
+
+; A8: t4:
+; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}}
+; A8: vsub.f64 d16, d16, d17
+	%0 = fmul double %a, %b
+	%1 = fmul double -1.0, %0
+        %2 = fsub double %1, %acc
+	ret double %2
+}
diff --git a/final/test/CodeGen/ARM/fnmul.ll b/final/test/CodeGen/ARM/fnmul.ll
new file mode 100644
index 00000000000..6d7bc05ffa9
--- /dev/null
+++ b/final/test/CodeGen/ARM/fnmul.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep vnmul.f64
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -enable-sign-dependent-rounding-fp-math | grep vmul.f64
+
+
+define double @t1(double %a, double %b) {
+entry:
+        %tmp2 = fsub double -0.000000e+00, %a            ; <double> [#uses=1]
+        %tmp4 = fmul double %tmp2, %b            ; <double> [#uses=1]
+        ret double %tmp4
+}
+
diff --git a/final/test/CodeGen/ARM/fnmuls.ll b/final/test/CodeGen/ARM/fnmuls.ll
new file mode 100644
index 00000000000..3223885feda
--- /dev/null
+++ b/final/test/CodeGen/ARM/fnmuls.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s
+
+define arm_aapcs_vfpcc float @test1(float %a, float %b) nounwind {
+; CHECK: vnmul.f32 s0, s0, s1 
+entry:
+	%0 = fmul float %a, %b
+        %1 = fsub float -0.0, %0
+	ret float %1
+}
+
+define arm_aapcs_vfpcc float @test2(float %a, float %b) nounwind {
+; CHECK: vnmul.f32 s0, s0, s1 
+entry:
+	%0 = fmul float %a, %b
+        %1 = fmul float -1.0, %0
+	ret float %1
+}
+
diff --git a/final/test/CodeGen/ARM/formal.ll b/final/test/CodeGen/ARM/formal.ll
new file mode 100644
index 00000000000..4ac10badea9
--- /dev/null
+++ b/final/test/CodeGen/ARM/formal.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+declare void @bar(i64 %x, i64 %y)
+
+define void @foo() {
+  call void @bar(i64 2, i64 3)
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/fp.ll b/final/test/CodeGen/ARM/fp.ll
new file mode 100644
index 00000000000..b6e9c3c22e7
--- /dev/null
+++ b/final/test/CodeGen/ARM/fp.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+
+define float @f(i32 %a) {
+;CHECK: f:
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f32.s32
+;CHECK-NEXT: vmov
+entry:
+        %tmp = sitofp i32 %a to float           ; <float> [#uses=1]
+        ret float %tmp
+}
+
+define double @g(i32 %a) {
+;CHECK: g:
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f64.s32
+;CHECK-NEXT: vmov
+entry:
+        %tmp = sitofp i32 %a to double          ; <double> [#uses=1]
+        ret double %tmp
+}
+
+define double @uint_to_double(i32 %a) {
+;CHECK: uint_to_double:
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f64.u32
+;CHECK-NEXT: vmov
+entry:
+        %tmp = uitofp i32 %a to double          ; <double> [#uses=1]
+        ret double %tmp
+}
+
+define float @uint_to_float(i32 %a) {
+;CHECK: uint_to_float:
+;CHECK: vmov
+;CHECK-NEXT: vcvt.f32.u32
+;CHECK-NEXT: vmov
+entry:
+        %tmp = uitofp i32 %a to float           ; <float> [#uses=1]
+        ret float %tmp
+}
+
+define double @h(double* %v) {
+;CHECK: h:
+;CHECK: vldr.64 
+;CHECK-NEXT: vmov
+entry:
+        %tmp = load double* %v          ; <double> [#uses=1]
+        ret double %tmp
+}
+
+define float @h2() {
+;CHECK: h2:
+;CHECK: mov r0, #254, 10
+entry:
+        ret float 1.000000e+00
+}
+
+define double @f2(double %a) {
+;CHECK: f2:
+;CHECK-NOT: vmov
+        ret double %a
+}
+
+define void @f3() {
+;CHECK: f3:
+;CHECK-NOT: vmov
+;CHECK: f4
+entry:
+        %tmp = call double @f5( )               ; <double> [#uses=1]
+        call void @f4( double %tmp )
+        ret void
+}
+
+declare void @f4(double)
+
+declare double @f5()
+
diff --git a/final/test/CodeGen/ARM/fp16.ll b/final/test/CodeGen/ARM/fp16.ll
new file mode 100644
index 00000000000..c5583b94bef
--- /dev/null
+++ b/final/test/CodeGen/ARM/fp16.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s | FileCheck %s
+; RUN: llc -mattr=+vfp3,+fp16 < %s | FileCheck --check-prefix=CHECK-FP16 %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "armv7-eabi"
+
+@x = global i16 12902
+@y = global i16 0
+@z = common global i16 0
+
+define arm_aapcs_vfpcc void @foo() nounwind {
+; CHECK: foo:
+; CHECK-FP6: foo:
+entry:
+  %0 = load i16* @x, align 2
+  %1 = load i16* @y, align 2
+  %2 = tail call float @llvm.convert.from.fp16(i16 %0)
+; CHECK: __gnu_h2f_ieee
+; CHECK-FP16: vcvtb.f16.f32
+  %3 = tail call float @llvm.convert.from.fp16(i16 %1)
+; CHECK: __gnu_h2f_ieee
+; CHECK-FP16: vcvtb.f16.f32
+  %4 = fadd float %2, %3
+  %5 = tail call i16 @llvm.convert.to.fp16(float %4)
+; CHECK: __gnu_f2h_ieee
+; CHECK-FP16: vcvtb.f32.f16
+  store i16 %5, i16* @x, align 2
+  ret void
+}
+
+declare float @llvm.convert.from.fp16(i16) nounwind readnone
+
+declare i16 @llvm.convert.to.fp16(float) nounwind readnone
diff --git a/final/test/CodeGen/ARM/fp_convert.ll b/final/test/CodeGen/ARM/fp_convert.ll
new file mode 100644
index 00000000000..1ef9f7f3216
--- /dev/null
+++ b/final/test/CodeGen/ARM/fp_convert.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NEON
+; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=VFP2
+
+define i32 @test1(float %a, float %b) {
+; VFP2: test1:
+; VFP2: vcvt.s32.f32 s0, s0
+; NEON: test1:
+; NEON: vcvt.s32.f32 d0, d0
+entry:
+        %0 = fadd float %a, %b
+        %1 = fptosi float %0 to i32
+	ret i32 %1
+}
+
+define i32 @test2(float %a, float %b) {
+; VFP2: test2:
+; VFP2: vcvt.u32.f32 s0, s0
+; NEON: test2:
+; NEON: vcvt.u32.f32 d0, d0
+entry:
+        %0 = fadd float %a, %b
+        %1 = fptoui float %0 to i32
+	ret i32 %1
+}
+
+define float @test3(i32 %a, i32 %b) {
+; VFP2: test3:
+; VFP2: vcvt.f32.u32 s0, s0
+; NEON: test3:
+; NEON: vcvt.f32.u32 d0, d0
+entry:
+        %0 = add i32 %a, %b
+        %1 = uitofp i32 %0 to float
+	ret float %1
+}
+
+define float @test4(i32 %a, i32 %b) {
+; VFP2: test4:
+; VFP2: vcvt.f32.s32 s0, s0
+; NEON: test4:
+; NEON: vcvt.f32.s32 d0, d0
+entry:
+        %0 = add i32 %a, %b
+        %1 = sitofp i32 %0 to float
+	ret float %1
+}
diff --git a/final/test/CodeGen/ARM/fparith.ll b/final/test/CodeGen/ARM/fparith.ll
new file mode 100644
index 00000000000..ce6d6b29e9d
--- /dev/null
+++ b/final/test/CodeGen/ARM/fparith.ll
@@ -0,0 +1,101 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+
+define float @f1(float %a, float %b) {
+;CHECK: f1:
+;CHECK: vadd.f32
+entry:
+	%tmp = fadd float %a, %b		; <float> [#uses=1]
+	ret float %tmp
+}
+
+define double @f2(double %a, double %b) {
+;CHECK: f2:
+;CHECK: vadd.f64
+entry:
+	%tmp = fadd double %a, %b		; <double> [#uses=1]
+	ret double %tmp
+}
+
+define float @f3(float %a, float %b) {
+;CHECK: f3:
+;CHECK: vmul.f32
+entry:
+	%tmp = fmul float %a, %b		; <float> [#uses=1]
+	ret float %tmp
+}
+
+define double @f4(double %a, double %b) {
+;CHECK: f4:
+;CHECK: vmul.f64
+entry:
+	%tmp = fmul double %a, %b		; <double> [#uses=1]
+	ret double %tmp
+}
+
+define float @f5(float %a, float %b) {
+;CHECK: f5:
+;CHECK: vsub.f32
+entry:
+	%tmp = fsub float %a, %b		; <float> [#uses=1]
+	ret float %tmp
+}
+
+define double @f6(double %a, double %b) {
+;CHECK: f6:
+;CHECK: vsub.f64
+entry:
+	%tmp = fsub double %a, %b		; <double> [#uses=1]
+	ret double %tmp
+}
+
+define float @f7(float %a) {
+;CHECK: f7:
+;CHECK: eor
+entry:
+	%tmp1 = fsub float -0.000000e+00, %a		; <float> [#uses=1]
+	ret float %tmp1
+}
+
+define double @f8(double %a) {
+;CHECK: f8:
+;CHECK: vneg.f64
+entry:
+	%tmp1 = fsub double -0.000000e+00, %a		; <double> [#uses=1]
+	ret double %tmp1
+}
+
+define float @f9(float %a, float %b) {
+;CHECK: f9:
+;CHECK: vdiv.f32
+entry:
+	%tmp1 = fdiv float %a, %b		; <float> [#uses=1]
+	ret float %tmp1
+}
+
+define double @f10(double %a, double %b) {
+;CHECK: f10:
+;CHECK: vdiv.f64
+entry:
+	%tmp1 = fdiv double %a, %b		; <double> [#uses=1]
+	ret double %tmp1
+}
+
+define float @f11(float %a) {
+;CHECK: f11:
+;CHECK: bic
+entry:
+	%tmp1 = call float @fabsf( float %a )		; <float> [#uses=1]
+	ret float %tmp1
+}
+
+declare float @fabsf(float)
+
+define double @f12(double %a) {
+;CHECK: f12:
+;CHECK: vabs.f64
+entry:
+	%tmp1 = call double @fabs( double %a )		; <double> [#uses=1]
+	ret double %tmp1
+}
+
+declare double @fabs(double)
diff --git a/final/test/CodeGen/ARM/fpcmp-opt.ll b/final/test/CodeGen/ARM/fpcmp-opt.ll
new file mode 100644
index 00000000000..65b921bdf65
--- /dev/null
+++ b/final/test/CodeGen/ARM/fpcmp-opt.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
+; rdar://7461510
+
+define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
+entry:
+; FINITE: t1:
+; FINITE-NOT: vldr
+; FINITE: ldr
+; FINITE: ldr
+; FINITE: cmp r0, r1
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: beq
+
+; NAN: t1:
+; NAN: vldr.32 s0,
+; NAN: vldr.32 s1,
+; NAN: vcmpe.f32 s1, s0
+; NAN: vmrs apsr_nzcv, fpscr
+; NAN: beq
+  %0 = load float* %a
+  %1 = load float* %b
+  %2 = fcmp une float %0, %1
+  br i1 %2, label %bb1, label %bb2
+
+bb1:
+  %3 = call i32 @bar()
+  ret i32 %3
+
+bb2:
+  %4 = call i32 @foo()
+  ret i32 %4
+}
+
+define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
+entry:
+; FINITE: t2:
+; FINITE-NOT: vldr
+; FINITE: ldrd r0, [r0]
+; FINITE-NOT: b LBB
+; FINITE: cmp r0, #0
+; FINITE: cmpeq r1, #0
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: bne
+  %0 = load double* %a
+  %1 = fcmp oeq double %0, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+
+bb1:
+  %2 = call i32 @bar()
+  ret i32 %2
+
+bb2:
+  %3 = call i32 @foo()
+  ret i32 %3
+}
+
+define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
+entry:
+; FINITE: t3:
+; FINITE-NOT: vldr
+; FINITE: ldr r0, [r0]
+; FINITE: cmp r0, #0
+; FINITE-NOT: vcmpe.f32
+; FINITE-NOT: vmrs
+; FINITE: bne
+  %0 = load float* %a
+  %1 = fcmp oeq float %0, 0.000000e+00
+  br i1 %1, label %bb1, label %bb2
+
+bb1:
+  %2 = call i32 @bar()
+  ret i32 %2
+
+bb2:
+  %3 = call i32 @foo()
+  ret i32 %3
+}
+
+declare i32 @bar()
+declare i32 @foo()
diff --git a/final/test/CodeGen/ARM/fpcmp.ll b/final/test/CodeGen/ARM/fpcmp.ll
new file mode 100644
index 00000000000..260ec49cd86
--- /dev/null
+++ b/final/test/CodeGen/ARM/fpcmp.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+
+define i32 @f1(float %a) {
+;CHECK: f1:
+;CHECK: vcmpe.f32
+;CHECK: movmi
+entry:
+        %tmp = fcmp olt float %a, 1.000000e+00          ; <i1> [#uses=1]
+        %tmp1 = zext i1 %tmp to i32              ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @f2(float %a) {
+;CHECK: f2:
+;CHECK: vcmpe.f32
+;CHECK: moveq
+entry:
+        %tmp = fcmp oeq float %a, 1.000000e+00          ; <i1> [#uses=1]
+        %tmp2 = zext i1 %tmp to i32              ; <i32> [#uses=1]
+        ret i32 %tmp2
+}
+
+define i32 @f3(float %a) {
+;CHECK: f3:
+;CHECK: vcmpe.f32
+;CHECK: movgt
+entry:
+        %tmp = fcmp ogt float %a, 1.000000e+00          ; <i1> [#uses=1]
+        %tmp3 = zext i1 %tmp to i32              ; <i32> [#uses=1]
+        ret i32 %tmp3
+}
+
+define i32 @f4(float %a) {
+;CHECK: f4:
+;CHECK: vcmpe.f32
+;CHECK: movge
+entry:
+        %tmp = fcmp oge float %a, 1.000000e+00          ; <i1> [#uses=1]
+        %tmp4 = zext i1 %tmp to i32              ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define i32 @f5(float %a) {
+;CHECK: f5:
+;CHECK: vcmpe.f32
+;CHECK: movls
+entry:
+        %tmp = fcmp ole float %a, 1.000000e+00          ; <i1> [#uses=1]
+        %tmp5 = zext i1 %tmp to i32              ; <i32> [#uses=1]
+        ret i32 %tmp5
+}
+
+define i32 @f6(float %a) {
+;CHECK: f6:
+;CHECK: vcmpe.f32
+;CHECK: movne
+entry:
+        %tmp = fcmp une float %a, 1.000000e+00          ; <i1> [#uses=1]
+        %tmp6 = zext i1 %tmp to i32              ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+define i32 @g1(double %a) {
+;CHECK: g1:
+;CHECK: vcmpe.f64
+;CHECK: movmi
+entry:
+        %tmp = fcmp olt double %a, 1.000000e+00         ; <i1> [#uses=1]
+        %tmp7 = zext i1 %tmp to i32              ; <i32> [#uses=1]
+        ret i32 %tmp7
+}
diff --git a/final/test/CodeGen/ARM/fpcmp_ueq.ll b/final/test/CodeGen/ARM/fpcmp_ueq.ll
new file mode 100644
index 00000000000..2e6b3e3167a
--- /dev/null
+++ b/final/test/CodeGen/ARM/fpcmp_ueq.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep moveq 
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+
+define i32 @f7(float %a, float %b) {
+entry:
+; CHECK: f7:
+; CHECK: vcmpe.f32
+; CHECK: vmrs apsr_nzcv, fpscr
+; CHECK: movweq
+; CHECK-NOT: vmrs
+; CHECK: movwvs
+    %tmp = fcmp ueq float %a,%b
+    %retval = select i1 %tmp, i32 666, i32 42
+    ret i32 %retval
+}
+
diff --git a/final/test/CodeGen/ARM/fpconsts.ll b/final/test/CodeGen/ARM/fpconsts.ll
new file mode 100644
index 00000000000..638dde9d8a0
--- /dev/null
+++ b/final/test/CodeGen/ARM/fpconsts.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=arm -mattr=+vfp3 | FileCheck %s
+
+define float @t1(float %x) nounwind readnone optsize {
+entry:
+; CHECK: t1:
+; CHECK: vmov.f32 s{{.*}}, #4.000000e+00
+  %0 = fadd float %x, 4.000000e+00
+  ret float %0
+}
+
+define double @t2(double %x) nounwind readnone optsize {
+entry:
+; CHECK: t2:
+; CHECK: vmov.f64 d{{.*}}, #3.000000e+00
+  %0 = fadd double %x, 3.000000e+00
+  ret double %0
+}
+
+define double @t3(double %x) nounwind readnone optsize {
+entry:
+; CHECK: t3:
+; CHECK: vmov.f64 d{{.*}}, #-1.300000e+01
+  %0 = fmul double %x, -1.300000e+01
+  ret double %0
+}
+
+define float @t4(float %x) nounwind readnone optsize {
+entry:
+; CHECK: t4:
+; CHECK: vmov.f32 s{{.*}}, #-2.400000e+01
+  %0 = fmul float %x, -2.400000e+01
+  ret float %0
+}
diff --git a/final/test/CodeGen/ARM/fpconv.ll b/final/test/CodeGen/ARM/fpconv.ll
new file mode 100644
index 00000000000..1b4c008bb77
--- /dev/null
+++ b/final/test/CodeGen/ARM/fpconv.ll
@@ -0,0 +1,102 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+
+define float @f1(double %x) {
+;CHECK-VFP: f1:
+;CHECK-VFP: vcvt.f32.f64
+;CHECK: f1:
+;CHECK: truncdfsf2
+entry:
+	%tmp1 = fptrunc double %x to float		; <float> [#uses=1]
+	ret float %tmp1
+}
+
+define double @f2(float %x) {
+;CHECK-VFP: f2:
+;CHECK-VFP: vcvt.f64.f32
+;CHECK: f2:
+;CHECK: extendsfdf2
+entry:
+	%tmp1 = fpext float %x to double		; <double> [#uses=1]
+	ret double %tmp1
+}
+
+define i32 @f3(float %x) {
+;CHECK-VFP: f3:
+;CHECK-VFP: vcvt.s32.f32
+;CHECK: f3:
+;CHECK: fixsfsi
+entry:
+	%tmp = fptosi float %x to i32		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define i32 @f4(float %x) {
+;CHECK-VFP: f4:
+;CHECK-VFP: vcvt.u32.f32
+;CHECK: f4:
+;CHECK: fixunssfsi
+entry:
+	%tmp = fptoui float %x to i32		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define i32 @f5(double %x) {
+;CHECK-VFP: f5:
+;CHECK-VFP: vcvt.s32.f64
+;CHECK: f5:
+;CHECK: fixdfsi
+entry:
+	%tmp = fptosi double %x to i32		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define i32 @f6(double %x) {
+;CHECK-VFP: f6:
+;CHECK-VFP: vcvt.u32.f64
+;CHECK: f6:
+;CHECK: fixunsdfsi
+entry:
+	%tmp = fptoui double %x to i32		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define float @f7(i32 %a) {
+;CHECK-VFP: f7:
+;CHECK-VFP: vcvt.f32.s32
+;CHECK: f7:
+;CHECK: floatsisf
+entry:
+	%tmp = sitofp i32 %a to float		; <float> [#uses=1]
+	ret float %tmp
+}
+
+define double @f8(i32 %a) {
+;CHECK-VFP: f8:
+;CHECK-VFP: vcvt.f64.s32
+;CHECK: f8:
+;CHECK: floatsidf
+entry:
+	%tmp = sitofp i32 %a to double		; <double> [#uses=1]
+	ret double %tmp
+}
+
+define float @f9(i32 %a) {
+;CHECK-VFP: f9:
+;CHECK-VFP: vcvt.f32.u32
+;CHECK: f9:
+;CHECK: floatunsisf
+entry:
+	%tmp = uitofp i32 %a to float		; <float> [#uses=1]
+	ret float %tmp
+}
+
+define double @f10(i32 %a) {
+;CHECK-VFP: f10:
+;CHECK-VFP: vcvt.f64.u32
+;CHECK: f10:
+;CHECK: floatunsidf
+entry:
+	%tmp = uitofp i32 %a to double		; <double> [#uses=1]
+	ret double %tmp
+}
diff --git a/final/test/CodeGen/ARM/fpmem.ll b/final/test/CodeGen/ARM/fpmem.ll
new file mode 100644
index 00000000000..c3cff18c959
--- /dev/null
+++ b/final/test/CodeGen/ARM/fpmem.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+
+define float @f1(float %a) {
+; CHECK: f1:
+; CHECK: mov r0, #0
+        ret float 0.000000e+00
+}
+
+define float @f2(float* %v, float %u) {
+; CHECK: f2:
+; CHECK: vldr.32{{.*}}[
+        %tmp = load float* %v           ; <float> [#uses=1]
+        %tmp1 = fadd float %tmp, %u              ; <float> [#uses=1]
+        ret float %tmp1
+}
+
+define void @f3(float %a, float %b, float* %v) {
+; CHECK: f3:
+; CHECK: vstr.32{{.*}}[
+        %tmp = fadd float %a, %b         ; <float> [#uses=1]
+        store float %tmp, float* %v
+        ret void
+}
diff --git a/final/test/CodeGen/ARM/fpow.ll b/final/test/CodeGen/ARM/fpow.ll
new file mode 100644
index 00000000000..6d487927ee6
--- /dev/null
+++ b/final/test/CodeGen/ARM/fpow.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=arm
+
+define double @t(double %x, double %y) nounwind optsize {
+entry:
+	%0 = tail call double @llvm.pow.f64( double %x, double %y )		; <double> [#uses=1]
+	ret double %0
+}
+
+declare double @llvm.pow.f64(double, double) nounwind readonly
diff --git a/final/test/CodeGen/ARM/fpowi.ll b/final/test/CodeGen/ARM/fpowi.ll
new file mode 100644
index 00000000000..561463720c8
--- /dev/null
+++ b/final/test/CodeGen/ARM/fpowi.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep powidf2
+; PR1287
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "arm-unknown-linux-gnueabi"
+
+define double @_ZSt3powdi(double %__x, i32 %__i) {
+entry:
+	%tmp3 = call double @llvm.powi.f64( double %__x, i32 %__i )
+        ret double %tmp3
+}
+
+declare double @llvm.powi.f64(double, i32)
+
diff --git a/final/test/CodeGen/ARM/fptoint.ll b/final/test/CodeGen/ARM/fptoint.ll
new file mode 100644
index 00000000000..299cb8f8150
--- /dev/null
+++ b/final/test/CodeGen/ARM/fptoint.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | FileCheck %s
+
+@i = weak global i32 0		; <i32*> [#uses=2]
+@u = weak global i32 0		; <i32*> [#uses=2]
+
+define i32 @foo1(float *%x) {
+        %tmp1 = load float* %x
+	%tmp2 = bitcast float %tmp1 to i32
+	ret i32 %tmp2
+}
+
+define i64 @foo2(double *%x) {
+        %tmp1 = load double* %x
+	%tmp2 = bitcast double %tmp1 to i64
+	ret i64 %tmp2
+}
+
+define void @foo5(float %x) {
+	%tmp1 = fptosi float %x to i32
+	store i32 %tmp1, i32* @i
+	ret void
+}
+
+define void @foo6(float %x) {
+	%tmp1 = fptoui float %x to i32
+	store i32 %tmp1, i32* @u
+	ret void
+}
+
+define void @foo7(double %x) {
+	%tmp1 = fptosi double %x to i32
+	store i32 %tmp1, i32* @i
+	ret void
+}
+
+define void @foo8(double %x) {
+	%tmp1 = fptoui double %x to i32
+	store i32 %tmp1, i32* @u
+	ret void
+}
+
+define void @foo9(double %x) {
+	%tmp = fptoui double %x to i16
+	store i16 %tmp, i16* null
+	ret void
+}
+; CHECK: foo9:
+; CHECK: 	vmov	r0, s0
+
diff --git a/final/test/CodeGen/ARM/fsubs.ll b/final/test/CodeGen/ARM/fsubs.ll
new file mode 100644
index 00000000000..bea8d5f4f30
--- /dev/null
+++ b/final/test/CodeGen/ARM/fsubs.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=NFP1
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NFP0
+
+define float @test(float %a, float %b) {
+entry:
+	%0 = fsub float %a, %b
+	ret float %0
+}
+
+; VFP2: vsub.f32	s0, s1, s0
+; NFP1: vsub.f32	d0, d1, d0
+; NFP0: vsub.f32	s0, s1, s0
diff --git a/final/test/CodeGen/ARM/global-merge.ll b/final/test/CodeGen/ARM/global-merge.ll
new file mode 100644
index 00000000000..49e546c13bf
--- /dev/null
+++ b/final/test/CodeGen/ARM/global-merge.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; XFAIL: *
+; Test the ARMGlobalMerge pass.  Use -march=thumb because it has a small
+; value for the maximum offset (127).
+
+; A local array that exceeds the maximum offset should not be merged.
+; CHECK: g0:
+@g0 = internal global [32 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2 ]
+
+; CHECK: _MergedGlobals:
+@g1 = internal global i32 1
+@g2 = internal global i32 2
+
+; Make sure that the complete variable fits within the range of the maximum
+; offset.  Having the starting offset in range is not sufficient.
+; When this works properly, @g3 is placed in a separate chunk of merged globals.
+; CHECK: _MergedGlobals1:
+@g3 = internal global [30 x i32] [ i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 ]
+
+; Global variables that can be placed in BSS should be kept together in a
+; separate pool of merged globals.
+; CHECK: _MergedGlobals2
+@g4 = internal global i32 0
+@g5 = internal global i32 0
diff --git a/final/test/CodeGen/ARM/globals.ll b/final/test/CodeGen/ARM/globals.ll
new file mode 100644
index 00000000000..ccb14280df9
--- /dev/null
+++ b/final/test/CodeGen/ARM/globals.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=DarwinStatic
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DarwinDynamic
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=DarwinPIC
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LinuxPIC
+
+@G = external global i32
+
+define i32 @test1() {
+	%tmp = load i32* @G
+	ret i32 %tmp
+}
+
+; DarwinStatic: _test1:
+; DarwinStatic: 	ldr r0, LCPI0_0
+; DarwinStatic:	        ldr r0, [r0]
+; DarwinStatic:	        bx lr
+
+; DarwinStatic: 	.align	2
+; DarwinStatic:	LCPI0_0:
+; DarwinStatic: 	.long	{{_G$}}
+
+
+; DarwinDynamic: _test1:
+; DarwinDynamic: 	ldr r0, LCPI0_0
+; DarwinDynamic:        ldr r0, [r0]
+; DarwinDynamic:        ldr r0, [r0]
+; DarwinDynamic:        bx lr
+
+; DarwinDynamic: 	.align	2
+; DarwinDynamic:	LCPI0_0:
+; DarwinDynamic: 	.long	L_G$non_lazy_ptr
+
+; DarwinDynamic: 	.section __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
+; DarwinDynamic:	.align	2
+; DarwinDynamic: L_G$non_lazy_ptr:
+; DarwinDynamic:	.indirect_symbol _G
+; DarwinDynamic:	.long	0
+
+
+
+; DarwinPIC: _test1:
+; DarwinPIC: 	ldr r0, LCPI0_0
+; DarwinPIC: LPC0_0:
+; DarwinPIC:    ldr r0, [pc, r0]
+; DarwinPIC:    ldr r0, [r0]
+; DarwinPIC:    bx lr
+
+; DarwinPIC: 	.align	2
+; DarwinPIC: LCPI0_0:
+; DarwinPIC: 	.long	L_G$non_lazy_ptr-(LPC0_0+8)
+
+; DarwinPIC: 	.section __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
+; DarwinPIC:	.align	2
+; DarwinPIC: L_G$non_lazy_ptr:
+; DarwinPIC:	.indirect_symbol _G
+; DarwinPIC:	.long	0
+
+
+
+; LinuxPIC: test1:
+; LinuxPIC: 	ldr r0, .LCPI0_0
+; LinuxPIC: 	ldr r1, .LCPI0_1
+	
+; LinuxPIC: .LPC0_0:
+; LinuxPIC: 	add r0, pc, r0
+; LinuxPIC: 	ldr r0, [r1, r0]
+; LinuxPIC: 	ldr r0, [r0]
+; LinuxPIC: 	bx lr
+
+; LinuxPIC: .align 2
+; LinuxPIC: .LCPI0_0:
+; LinuxPIC:     .long _GLOBAL_OFFSET_TABLE_-(.LPC0_0+8)
+; LinuxPIC: .align 2
+; LinuxPIC: .LCPI0_1:
+; LinuxPIC:     .long	G(GOT)
diff --git a/final/test/CodeGen/ARM/hardfloat_neon.ll b/final/test/CodeGen/ARM/hardfloat_neon.ll
new file mode 100644
index 00000000000..4abf04b0a4b
--- /dev/null
+++ b/final/test/CodeGen/ARM/hardfloat_neon.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=arm-linux-gnueabi -mattr=+neon -float-abi=hard
+
+define <16 x i8> @vmulQi8_reg(<16 x i8> %A, <16 x i8> %B) nounwind {
+        %tmp1 = mul <16 x i8> %A, %B
+        ret <16 x i8> %tmp1
+}
+
+define <16 x i8> @f(<16 x i8> %a, <16 x i8> %b) {
+        %tmp = call <16 x i8> @g(<16 x i8> %b)
+        ret <16 x i8> %tmp
+}
+
+declare <16 x i8> @g(<16 x i8>)
diff --git a/final/test/CodeGen/ARM/hello.ll b/final/test/CodeGen/ARM/hello.ll
new file mode 100644
index 00000000000..bfed7a6630b
--- /dev/null
+++ b/final/test/CodeGen/ARM/hello.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | count 1
+; RUN: llc < %s -mtriple=arm-linux-gnu --disable-fp-elim | \
+; RUN:   grep mov | count 2
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep mov | count 2
+
+@str = internal constant [12 x i8] c"Hello World\00"
+
+define i32 @main() {
+	%tmp = call i32 @puts( i8* getelementptr ([12 x i8]* @str, i32 0, i64 0) )		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @puts(i8*)
diff --git a/final/test/CodeGen/ARM/hidden-vis-2.ll b/final/test/CodeGen/ARM/hidden-vis-2.ll
new file mode 100644
index 00000000000..90f5308d5ff
--- /dev/null
+++ b/final/test/CodeGen/ARM/hidden-vis-2.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+
+@x = weak hidden global i32 0		; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+; CHECK: t:
+; CHECK: ldr
+; CHECK-NEXT: ldr
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
diff --git a/final/test/CodeGen/ARM/hidden-vis-3.ll b/final/test/CodeGen/ARM/hidden-vis-3.ll
new file mode 100644
index 00000000000..fc8b2febc32
--- /dev/null
+++ b/final/test/CodeGen/ARM/hidden-vis-3.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin9   | FileCheck %s
+
+@x = external hidden global i32		; <i32*> [#uses=1]
+@y = extern_weak hidden global i32	; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+; CHECK: LCPI0_0:
+; CHECK-NEXT: .long _x
+; CHECK: LCPI0_1:
+; CHECK-NEXT: .long _y
+
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	%1 = load i32* @y, align 4		; <i32> [#uses=1]
+	%2 = add i32 %1, %0		; <i32> [#uses=1]
+	ret i32 %2
+}
diff --git a/final/test/CodeGen/ARM/hidden-vis.ll b/final/test/CodeGen/ARM/hidden-vis.ll
new file mode 100644
index 00000000000..3544ae81a0a
--- /dev/null
+++ b/final/test/CodeGen/ARM/hidden-vis.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=arm-linux | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN
+
+@a = hidden global i32 0
+@b = external global i32
+
+define weak hidden void @t1() nounwind {
+; LINUX: .hidden t1
+; LINUX: t1:
+
+; DARWIN: .private_extern _t1
+; DARWIN: t1:
+  ret void
+}
+
+define weak void @t2() nounwind {
+; LINUX: t2:
+; LINUX: .hidden a
+
+; DARWIN: t2:
+; DARWIN: .private_extern _a
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/iabs.ll b/final/test/CodeGen/ARM/iabs.ll
new file mode 100644
index 00000000000..63808b238bf
--- /dev/null
+++ b/final/test/CodeGen/ARM/iabs.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+;; Integer absolute value, should produce something as good as: ARM:
+;;   add r3, r0, r0, asr #31
+;;   eor r0, r3, r0, asr #31
+;;   bx lr
+
+define i32 @test(i32 %a) {
+        %tmp1neg = sub i32 0, %a
+        %b = icmp sgt i32 %a, -1
+        %abs = select i1 %b, i32 %a, i32 %tmp1neg
+        ret i32 %abs
+; CHECK:   add r1, r0, r0, asr #31
+; CHECK:   eor r0, r1, r0, asr #31
+; CHECK:  bx lr
+}
diff --git a/final/test/CodeGen/ARM/ifcvt1.ll b/final/test/CodeGen/ARM/ifcvt1.ll
new file mode 100644
index 00000000000..e6aa044564a
--- /dev/null
+++ b/final/test/CodeGen/ARM/ifcvt1.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep bx | count 1
+
+define i32 @t1(i32 %a, i32 %b) {
+	%tmp2 = icmp eq i32 %a, 0
+	br i1 %tmp2, label %cond_false, label %cond_true
+
+cond_true:
+	%tmp5 = add i32 %b, 1
+	ret i32 %tmp5
+
+cond_false:
+	%tmp7 = add i32 %b, -1
+	ret i32 %tmp7
+}
diff --git a/final/test/CodeGen/ARM/ifcvt10.ll b/final/test/CodeGen/ARM/ifcvt10.ll
new file mode 100644
index 00000000000..75428ac2165
--- /dev/null
+++ b/final/test/CodeGen/ARM/ifcvt10.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a9 | FileCheck %s
+; rdar://8402126
+; Make sure if-converter is not predicating vldmia and ldmia. These are
+; micro-coded and would have long issue latency even if predicated on
+; false predicate.
+
+define void @t(double %a, double %b, double %c, double %d, i32* nocapture %solutions, double* nocapture %x) nounwind {
+entry:
+; CHECK: t:
+; CHECK: vpop {d8}
+; CHECK-NOT: vpopne
+; CHECK: ldmia sp!, {r7, pc}
+; CHECK: vpop {d8}
+; CHECK: ldmia sp!, {r7, pc}
+  br i1 undef, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  %mul73 = fmul double undef, 0.000000e+00
+  %sub76 = fsub double %mul73, undef
+  store double %sub76, double* undef, align 4
+  %call88 = tail call double @cos(double 0.000000e+00) nounwind
+  %mul89 = fmul double undef, %call88
+  %sub92 = fsub double %mul89, undef
+  store double %sub92, double* undef, align 4
+  ret void
+
+if.else:                                          ; preds = %entry
+  %tmp101 = tail call double @llvm.pow.f64(double undef, double 0x3FD5555555555555)
+  %add112 = fadd double %tmp101, undef
+  %mul118 = fmul double %add112, undef
+  store double 0.000000e+00, double* %x, align 4
+  ret void
+}
+
+declare double @acos(double)
+
+declare double @sqrt(double) readnone
+
+declare double @cos(double) readnone
+
+declare double @fabs(double)
+
+declare double @llvm.pow.f64(double, double) nounwind readonly
diff --git a/final/test/CodeGen/ARM/ifcvt11.ll b/final/test/CodeGen/ARM/ifcvt11.ll
new file mode 100644
index 00000000000..63f8557d555
--- /dev/null
+++ b/final/test/CodeGen/ARM/ifcvt11.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; rdar://8598427
+; Adjust if-converter heuristics to avoid predicating vmrs which can cause
+; significant regression.
+
+%struct.xyz_t = type { double, double, double }
+
+define i32 @effie(i32 %tsets, %struct.xyz_t* nocapture %p, i32 %a, i32 %b, i32 %c) nounwind readonly noinline {
+; CHECK: effie:
+entry:
+  %0 = icmp sgt i32 %tsets, 0
+  br i1 %0, label %bb.nph, label %bb6
+
+bb.nph:                                           ; preds = %entry
+  %1 = add nsw i32 %b, %a
+  %2 = add nsw i32 %1, %c
+  br label %bb
+
+bb:                                               ; preds = %bb4, %bb.nph
+; CHECK: vcmpe.f64
+; CHECK: vmrs apsr_nzcv, fpscr
+  %r.19 = phi i32 [ 0, %bb.nph ], [ %r.0, %bb4 ]
+  %n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ]
+  %scevgep10 = getelementptr inbounds %struct.xyz_t* %p, i32 %n.08, i32 0
+  %scevgep11 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 1
+  %3 = load double* %scevgep10, align 4
+  %4 = load double* %scevgep11, align 4
+  %5 = fcmp uge double %3, %4
+  br i1 %5, label %bb3, label %bb1
+
+bb1:                                              ; preds = %bb
+; CHECK-NOT: it
+; CHECK-NOT: vcmpemi
+; CHECK-NOT: vmrsmi
+; CHECK: vcmpe.f64
+; CHECK: vmrs apsr_nzcv, fpscr
+  %scevgep12 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 2
+  %6 = load double* %scevgep12, align 4
+  %7 = fcmp uge double %3, %6
+  br i1 %7, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %8 = add nsw i32 %2, %r.19
+  br label %bb4
+
+bb3:                                              ; preds = %bb1, %bb
+  %9 = add nsw i32 %r.19, 1
+  br label %bb4
+
+bb4:                                              ; preds = %bb3, %bb2
+  %r.0 = phi i32 [ %9, %bb3 ], [ %8, %bb2 ]
+  %10 = add nsw i32 %n.08, 1
+  %exitcond = icmp eq i32 %10, %tsets
+  br i1 %exitcond, label %bb6, label %bb
+
+bb6:                                              ; preds = %bb4, %entry
+  %r.1.lcssa = phi i32 [ 0, %entry ], [ %r.0, %bb4 ]
+  ret i32 %r.1.lcssa
+}
diff --git a/final/test/CodeGen/ARM/ifcvt2.ll b/final/test/CodeGen/ARM/ifcvt2.ll
new file mode 100644
index 00000000000..7b9d0cf32cf
--- /dev/null
+++ b/final/test/CodeGen/ARM/ifcvt2.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t1:
+; CHECK: bxlt lr
+	%tmp2 = icmp sgt i32 %c, 10
+	%tmp5 = icmp slt i32 %d, 4
+	%tmp8 = or i1 %tmp5, %tmp2
+	%tmp13 = add i32 %b, %a
+	br i1 %tmp8, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:
+	%tmp15 = add i32 %tmp13, %c
+	%tmp1821 = sub i32 %tmp15, %d
+	ret i32 %tmp1821
+
+UnifiedReturnBlock:
+	ret i32 %tmp13
+}
+
+define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK: t2:
+; CHECK: bxgt lr
+; CHECK: cmp
+; CHECK: addge
+; CHECK: subge
+; CHECK-NOT: bxge lr
+; CHECK: bx lr
+	%tmp2 = icmp sgt i32 %c, 10
+	%tmp5 = icmp slt i32 %d, 4
+	%tmp8 = and i1 %tmp5, %tmp2
+	%tmp13 = add i32 %b, %a
+	br i1 %tmp8, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:
+	%tmp15 = add i32 %tmp13, %c
+	%tmp1821 = sub i32 %tmp15, %d
+	ret i32 %tmp1821
+
+UnifiedReturnBlock:
+	ret i32 %tmp13
+}
diff --git a/final/test/CodeGen/ARM/ifcvt3.ll b/final/test/CodeGen/ARM/ifcvt3.ll
new file mode 100644
index 00000000000..f7ebac6f2ba
--- /dev/null
+++ b/final/test/CodeGen/ARM/ifcvt3.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep cmpne | count 1
+; RUN: llc < %s -march=arm | grep bx | count 2
+
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
+	switch i32 %c, label %cond_next [
+		 i32 1, label %cond_true
+		 i32 7, label %cond_true
+	]
+
+cond_true:
+	%tmp12 = add i32 %a, 1
+	%tmp1518 = add i32 %tmp12, %b
+	ret i32 %tmp1518
+
+cond_next:
+	%tmp15 = add i32 %b, %a
+	ret i32 %tmp15
+}
diff --git a/final/test/CodeGen/ARM/ifcvt4.ll b/final/test/CodeGen/ARM/ifcvt4.ll
new file mode 100644
index 00000000000..f28c61b9787
--- /dev/null
+++ b/final/test/CodeGen/ARM/ifcvt4.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep subgt | count 1
+; RUN: llc < %s -march=arm | grep suble | count 1
+; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
+
+define i32 @t(i32 %a, i32 %b) {
+entry:
+	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %tmp1434, label %bb17, label %bb.outer
+
+bb.outer:		; preds = %cond_false, %entry
+	%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]		; <i32> [#uses=5]
+	%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %cond_true, %bb.outer
+	%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%tmp. = sub i32 0, %b_addr.021.0.ph		; <i32> [#uses=1]
+	%tmp.40 = mul i32 %indvar, %tmp.		; <i32> [#uses=1]
+	%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph		; <i32> [#uses=6]
+	%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph		; <i1> [#uses=1]
+	br i1 %tmp3, label %cond_true, label %cond_false
+
+cond_true:		; preds = %bb
+	%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph		; <i32> [#uses=2]
+	%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp1437, label %bb17, label %bb
+
+cond_false:		; preds = %bb
+	%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0		; <i32> [#uses=2]
+	%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb17, label %bb.outer
+
+bb17:		; preds = %cond_false, %cond_true, %entry
+	%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	ret i32 %a_addr.026.1
+}
diff --git a/final/test/CodeGen/ARM/ifcvt5.ll b/final/test/CodeGen/ARM/ifcvt5.ll
new file mode 100644
index 00000000000..bca2ae346a6
--- /dev/null
+++ b/final/test/CodeGen/ARM/ifcvt5.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+@x = external global i32*		; <i32**> [#uses=1]
+
+define void @foo(i32 %a) {
+entry:
+	%tmp = load i32** @x		; <i32*> [#uses=1]
+	store i32 %a, i32* %tmp
+	ret void
+}
+
+define i32 @t1(i32 %a, i32 %b) {
+; CHECK: t1:
+; CHECK: ldmialt sp!, {r7, pc}
+entry:
+	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
+	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	tail call void @foo( i32 %b )
+	ret i32 0
+
+UnifiedReturnBlock:		; preds = %entry
+	ret i32 1
+}
diff --git a/final/test/CodeGen/ARM/ifcvt6.ll b/final/test/CodeGen/ARM/ifcvt6.ll
new file mode 100644
index 00000000000..5edf32fd1af
--- /dev/null
+++ b/final/test/CodeGen/ARM/ifcvt6.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+define void @foo(i32 %X, i32 %Y) {
+entry:
+; CHECK: cmpne
+; CHECK: ldmiahi sp!
+	%tmp1 = icmp ult i32 %X, 4		; <i1> [#uses=1]
+	%tmp4 = icmp eq i32 %Y, 0		; <i1> [#uses=1]
+	%tmp7 = or i1 %tmp4, %tmp1		; <i1> [#uses=1]
+	br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	%tmp10 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+declare i32 @bar(...)
diff --git a/final/test/CodeGen/ARM/ifcvt7.ll b/final/test/CodeGen/ARM/ifcvt7.ll
new file mode 100644
index 00000000000..62e13557cfd
--- /dev/null
+++ b/final/test/CodeGen/ARM/ifcvt7.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
+
+	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
+; CHECK: cmpeq
+; CHECK: moveq
+; CHECK: ldmiaeq sp!
+entry:
+	br label %tailrecurse
+
+tailrecurse:		; preds = %bb, %entry
+	%tmp6 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp9 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]
+	%tmp12 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp14 = icmp eq %struct.quad_struct* null, null		; <i1> [#uses=1]
+	%tmp17 = icmp eq %struct.quad_struct* %tmp6, null		; <i1> [#uses=1]
+	%tmp23 = icmp eq %struct.quad_struct* %tmp9, null		; <i1> [#uses=1]
+	%tmp29 = icmp eq %struct.quad_struct* %tmp12, null		; <i1> [#uses=1]
+	%bothcond = and i1 %tmp17, %tmp14		; <i1> [#uses=1]
+	%bothcond1 = and i1 %bothcond, %tmp23		; <i1> [#uses=1]
+	%bothcond2 = and i1 %bothcond1, %tmp29		; <i1> [#uses=1]
+	br i1 %bothcond2, label %return, label %bb
+
+bb:		; preds = %tailrecurse
+	%tmp41 = tail call fastcc i32 @CountTree( %struct.quad_struct* %tmp9 )		; <i32> [#uses=0]
+	br label %tailrecurse
+
+return:		; preds = %tailrecurse
+	ret i32 0
+}
diff --git a/final/test/CodeGen/ARM/ifcvt8.ll b/final/test/CodeGen/ARM/ifcvt8.ll
new file mode 100644
index 00000000000..5fdfc4ea680
--- /dev/null
+++ b/final/test/CodeGen/ARM/ifcvt8.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+	%struct.SString = type { i8*, i32, i32 }
+
+declare void @abort()
+
+define fastcc void @t(%struct.SString* %word, i8 signext  %c) {
+; CHECK: ldmiane sp!
+entry:
+	%tmp1 = icmp eq %struct.SString* %word, null		; <i1> [#uses=1]
+	br i1 %tmp1, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	tail call void @abort( )
+	unreachable
+
+cond_false:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/ifcvt9.ll b/final/test/CodeGen/ARM/ifcvt9.ll
new file mode 100644
index 00000000000..05bdc459c83
--- /dev/null
+++ b/final/test/CodeGen/ARM/ifcvt9.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm
+
+define fastcc void @t() nounwind {
+entry:
+	br i1 undef, label %bb.i.i3, label %growMapping.exit
+
+bb.i.i3:		; preds = %entry
+	unreachable
+
+growMapping.exit:		; preds = %entry
+	unreachable
+}
diff --git a/final/test/CodeGen/ARM/illegal-vector-bitcast.ll b/final/test/CodeGen/ARM/illegal-vector-bitcast.ll
new file mode 100644
index 00000000000..febe6f56b66
--- /dev/null
+++ b/final/test/CodeGen/ARM/illegal-vector-bitcast.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -mtriple=arm-linux
+
+define void @foo(<8 x float>* %f, <8 x float>* %g, <4 x i64>* %y)
+{
+  %h = load <8 x float>* %f
+  %i = fmul <8 x float> %h, <float 0x3FF19999A0000000, float 0x400A666660000000, float 0x40119999A0000000, float 0x40159999A0000000, float 0.5, float 0x3FE3333340000000, float 0x3FE6666660000000, float 0x3FE99999A0000000>
+  %m = bitcast <8 x float> %i to <4 x i64>
+  %z = load <4 x i64>* %y
+  %n = mul <4 x i64> %z, %m
+  %p = bitcast <4 x i64> %n to <8 x float>
+  store <8 x float> %p, <8 x float>* %g
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/imm.ll b/final/test/CodeGen/ARM/imm.ll
new file mode 100644
index 00000000000..6f25f9dcb32
--- /dev/null
+++ b/final/test/CodeGen/ARM/imm.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm | not grep CPI
+
+define i32 @test1(i32 %A) {
+        %B = add i32 %A, -268435441             ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i32 @test2() {
+        ret i32 65533
+}
+
+define i32 @test3(i32 %A) {
+        %B = or i32 %A, 65533           ; <i32> [#uses=1]
+        ret i32 %B
+}
+
diff --git a/final/test/CodeGen/ARM/indirectbr.ll b/final/test/CodeGen/ARM/indirectbr.ll
new file mode 100644
index 00000000000..0aac9d16ec6
--- /dev/null
+++ b/final/test/CodeGen/ARM/indirectbr.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -relocation-model=pic -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -relocation-model=pic -mtriple=thumb-apple-darwin | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -relocation-model=static -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=THUMB2
+
+@nextaddr = global i8* null                       ; <i8**> [#uses=2]
+@C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
+
+define internal i32 @foo(i32 %i) nounwind {
+; ARM: foo:
+; THUMB: foo:
+; THUMB2: foo:
+entry:
+  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
+; indirect branch gets duplicated here
+; ARM: bx
+; THUMB: mov pc, r1
+; THUMB2: mov pc, r2
+  br i1 %1, label %bb3, label %bb2
+
+bb2:                                              ; preds = %entry, %bb3
+  %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
+; ARM: bx
+; THUMB: mov pc, r1
+; THUMB2: mov pc, r2
+  indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
+
+bb3:                                              ; preds = %entry
+  %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
+  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  br label %bb2
+
+L5:                                               ; preds = %bb2
+  br label %L4
+
+L4:                                               ; preds = %L5, %bb2
+  %res.0 = phi i32 [ 385, %L5 ], [ 35, %bb2 ]     ; <i32> [#uses=1]
+  br label %L3
+
+L3:                                               ; preds = %L4, %bb2
+  %res.1 = phi i32 [ %res.0, %L4 ], [ 5, %bb2 ]   ; <i32> [#uses=1]
+  br label %L2
+
+L2:                                               ; preds = %L3, %bb2
+  %res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ]   ; <i32> [#uses=1]
+  %phitmp = mul i32 %res.2, 6                     ; <i32> [#uses=1]
+  br label %L1
+
+L1:                                               ; preds = %L2, %bb2
+  %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
+; ARM: ldr r1, LCPI
+; ARM: add r1, pc, r1
+; ARM: str r1
+; THUMB: ldr.n r2, LCPI
+; THUMB: add r2, pc
+; THUMB: str r2
+; THUMB2: ldr.n r2, LCPI
+; THUMB2-NEXT: str r2
+  store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
+  ret i32 %res.3
+}
+; ARM: .long Ltmp0-(LPC{{.*}}+8)
+; THUMB: .long Ltmp0-(LPC{{.*}}+4)
+; THUMB2: .long Ltmp0
diff --git a/final/test/CodeGen/ARM/inlineasm-imm-arm.ll b/final/test/CodeGen/ARM/inlineasm-imm-arm.ll
new file mode 100644
index 00000000000..45dfcf0b82a
--- /dev/null
+++ b/final/test/CodeGen/ARM/inlineasm-imm-arm.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=arm
+
+; Test ARM-mode "I" constraint, for any Data Processing immediate.
+define i32 @testI(i32 %x) {
+	%y = call i32 asm "add $0, $1, $2", "=r,r,I"( i32 %x, i32 65280 ) nounwind
+	ret i32 %y
+}
+
+; Test ARM-mode "J" constraint, for compatibility with unknown use in GCC.
+define void @testJ() {
+	tail call void asm sideeffect ".word $0", "J"( i32 4080 ) nounwind
+	ret void
+}
+
+; Test ARM-mode "K" constraint, for bitwise inverted Data Processing immediates.
+define void @testK() {
+	tail call void asm sideeffect ".word $0", "K"( i32 16777215 ) nounwind
+	ret void
+}
+
+; Test ARM-mode "L" constraint, for negated Data Processing immediates.
+define void @testL() {
+	tail call void asm sideeffect ".word $0", "L"( i32 -65280 ) nounwind
+	ret void
+}
+
+; Test ARM-mode "M" constraint, for value between 0 and 32.
+define i32 @testM(i32 %x) {
+	%y = call i32 asm "lsl $0, $1, $2", "=r,r,M"( i32 %x, i32 31 ) nounwind
+	ret i32 %y
+}
diff --git a/final/test/CodeGen/ARM/inlineasm.ll b/final/test/CodeGen/ARM/inlineasm.ll
new file mode 100644
index 00000000000..cca3c696b4a
--- /dev/null
+++ b/final/test/CodeGen/ARM/inlineasm.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+
+define i32 @test1(i32 %tmp54) {
+	%tmp56 = tail call i32 asm "uxtb16 $0,$1", "=r,r"( i32 %tmp54 )		; <i32> [#uses=1]
+	ret i32 %tmp56
+}
+
+define void @test2() {
+	tail call void asm sideeffect "/* number: ${0:c} */", "i"( i32 1 )
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/inlineasm2.ll b/final/test/CodeGen/ARM/inlineasm2.ll
new file mode 100644
index 00000000000..a99bccf5a65
--- /dev/null
+++ b/final/test/CodeGen/ARM/inlineasm2.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define double @__ieee754_sqrt(double %x) {
+	%tmp2 = tail call double asm "fsqrtd ${0:P}, ${1:P}", "=w,w"( double %x )
+	ret double %tmp2
+}
+
+define float @__ieee754_sqrtf(float %x) {
+	%tmp2 = tail call float asm "fsqrts $0, $1", "=w,w"( float %x )
+	ret float %tmp2
+}
diff --git a/final/test/CodeGen/ARM/inlineasm3.ll b/final/test/CodeGen/ARM/inlineasm3.ll
new file mode 100644
index 00000000000..9f77ad1f794
--- /dev/null
+++ b/final/test/CodeGen/ARM/inlineasm3.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; Radar 7449043
+%struct.int32x4_t = type { <4 x i32> }
+
+define void @t() nounwind {
+entry:
+; CHECK: vmov.I64 q15, #0
+; CHECK: vmov.32 d30[0], r0
+; CHECK: vmov q8, q15
+  %tmp = alloca %struct.int32x4_t, align 16
+  call void asm sideeffect "vmov.I64 q15, #0\0Avmov.32 d30[0], $1\0Avmov ${0:q}, q15\0A", "=*w,r,~{d31},~{d30}"(%struct.int32x4_t* %tmp, i32 8192) nounwind
+  ret void
+}
+
+; Radar 7457110
+%struct.int32x2_t = type { <4 x i32> }
+
+define void @t2() nounwind {
+entry:
+; CHECK: vmov d30, d16
+; CHECK: vmov.32 r0, d30[0]
+  %asmtmp2 = tail call i32 asm sideeffect "vmov d30, $1\0Avmov.32 $0, d30[0]\0A", "=r,w,~{d30}"(<2 x i32> undef) nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/insn-sched1.ll b/final/test/CodeGen/ARM/insn-sched1.ll
new file mode 100644
index 00000000000..1d323228cd1
--- /dev/null
+++ b/final/test/CodeGen/ARM/insn-sched1.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6 |\
+; RUN:   grep mov | count 3
+
+define i32 @test(i32 %x) {
+        %tmp = trunc i32 %x to i16              ; <i16> [#uses=1]
+        %tmp2 = call i32 @f( i32 1, i16 %tmp )             ; <i32> [#uses=1]
+        ret i32 %tmp2
+}
+
+declare i32 @f(i32, i16)
diff --git a/final/test/CodeGen/ARM/ispositive.ll b/final/test/CodeGen/ARM/ispositive.ll
new file mode 100644
index 00000000000..2f1a2cfd778
--- /dev/null
+++ b/final/test/CodeGen/ARM/ispositive.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @test1(i32 %X) {
+; CHECK: lsr{{.*}}#31
+entry:
+        icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
+        zext i1 %0 to i32               ; <i32>:1 [#uses=1]
+        ret i32 %1
+}
+
diff --git a/final/test/CodeGen/ARM/large-stack.ll b/final/test/CodeGen/ARM/large-stack.ll
new file mode 100644
index 00000000000..ddf0f0ec7cc
--- /dev/null
+++ b/final/test/CodeGen/ARM/large-stack.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm
+
+define void @test1() {
+    %tmp = alloca [ 64 x i32 ] , align 4
+    ret void
+}
+
+define void @test2() {
+    %tmp = alloca [ 4168 x i8 ] , align 4
+    ret void
+}
+
+define i32 @test3() {
+	%retval = alloca i32, align 4
+	%tmp = alloca i32, align 4
+	%a = alloca [805306369 x i8], align 16
+	store i32 0, i32* %tmp
+	%tmp1 = load i32* %tmp
+        ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/ARM/ldm.ll b/final/test/CodeGen/ARM/ldm.ll
new file mode 100644
index 00000000000..2f1b85ebbb0
--- /dev/null
+++ b/final/test/CodeGen/ARM/ldm.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=armv4t-apple-darwin | FileCheck %s -check-prefix=V4T
+
+@X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
+
+define i32 @t1() {
+; CHECK: t1:
+; CHECK: ldmia
+; V4T: t1:
+; V4T: ldmia
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
+        %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define i32 @t2() {
+; CHECK: t2:
+; CHECK: ldmia
+; V4T: t2:
+; V4T: ldmia
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
+        %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+define i32 @t3() {
+; CHECK: t3:
+; CHECK: ldmib
+; CHECK: ldmia sp!
+; V4T: t3:
+; V4T: ldmib
+; V4T: pop
+; V4T-NEXT: bx lr
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+declare i32 @f1(i32, i32)
+
+declare i32 @f2(i32, i32, i32)
diff --git a/final/test/CodeGen/ARM/ldr.ll b/final/test/CodeGen/ARM/ldr.ll
new file mode 100644
index 00000000000..011e61caea9
--- /dev/null
+++ b/final/test/CodeGen/ARM/ldr.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @f1(i32* %v) {
+; CHECK: f1:
+; CHECK: ldr r0
+entry:
+        %tmp = load i32* %v
+        ret i32 %tmp
+}
+
+define i32 @f2(i32* %v) {
+; CHECK: f2:
+; CHECK: ldr r0
+entry:
+        %tmp2 = getelementptr i32* %v, i32 1023
+        %tmp = load i32* %tmp2
+        ret i32 %tmp
+}
+
+define i32 @f3(i32* %v) {
+; CHECK: f3:
+; CHECK: mov
+; CHECK: ldr r0
+entry:
+        %tmp2 = getelementptr i32* %v, i32 1024
+        %tmp = load i32* %tmp2
+        ret i32 %tmp
+}
+
+define i32 @f4(i32 %base) {
+; CHECK: f4:
+; CHECK-NOT: mvn
+; CHECK: ldr r0
+entry:
+        %tmp1 = sub i32 %base, 128
+        %tmp2 = inttoptr i32 %tmp1 to i32*
+        %tmp3 = load i32* %tmp2
+        ret i32 %tmp3
+}
+
+define i32 @f5(i32 %base, i32 %offset) {
+; CHECK: f5:
+; CHECK: ldr r0
+entry:
+        %tmp1 = add i32 %base, %offset
+        %tmp2 = inttoptr i32 %tmp1 to i32*
+        %tmp3 = load i32* %tmp2
+        ret i32 %tmp3
+}
+
+define i32 @f6(i32 %base, i32 %offset) {
+; CHECK: f6:
+; CHECK: ldr r0{{.*}}lsl{{.*}}
+entry:
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i32*
+        %tmp4 = load i32* %tmp3
+        ret i32 %tmp4
+}
+
+define i32 @f7(i32 %base, i32 %offset) {
+; CHECK: f7:
+; CHECK: ldr r0{{.*}}lsr{{.*}}
+entry:
+        %tmp1 = lshr i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i32*
+        %tmp4 = load i32* %tmp3
+        ret i32 %tmp4
+}
diff --git a/final/test/CodeGen/ARM/ldr_ext.ll b/final/test/CodeGen/ARM/ldr_ext.ll
new file mode 100644
index 00000000000..d29eb022bac
--- /dev/null
+++ b/final/test/CodeGen/ARM/ldr_ext.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @test1(i8* %t1) nounwind {
+; CHECK: ldrb
+    %tmp.u = load i8* %t1
+    %tmp1.s = zext i8 %tmp.u to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test2(i16* %t1) nounwind {
+; CHECK: ldrh
+    %tmp.u = load i16* %t1
+    %tmp1.s = zext i16 %tmp.u to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test3(i8* %t0) nounwind {
+; CHECK: ldrsb
+    %tmp.s = load i8* %t0
+    %tmp1.s = sext i8 %tmp.s to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test4(i16* %t0) nounwind {
+; CHECK: ldrsh
+    %tmp.s = load i16* %t0
+    %tmp1.s = sext i16 %tmp.s to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test5() nounwind {
+; CHECK: mov r0, #0
+; CHECK: ldrsh
+    %tmp.s = load i16* null
+    %tmp1.s = sext i16 %tmp.s to i32
+    ret i32 %tmp1.s
+}
diff --git a/final/test/CodeGen/ARM/ldr_frame.ll b/final/test/CodeGen/ARM/ldr_frame.ll
new file mode 100644
index 00000000000..a3abdb603fa
--- /dev/null
+++ b/final/test/CodeGen/ARM/ldr_frame.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=arm | not grep mov
+
+define i32 @f1() {
+	%buf = alloca [32 x i32], align 4
+	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 0
+	%tmp1 = load i32* %tmp
+	ret i32 %tmp1
+}
+
+define i32 @f2() {
+	%buf = alloca [32 x i8], align 4
+	%tmp = getelementptr [32 x i8]* %buf, i32 0, i32 0
+	%tmp1 = load i8* %tmp
+        %tmp2 = zext i8 %tmp1 to i32
+	ret i32 %tmp2
+}
+
+define i32 @f3() {
+	%buf = alloca [32 x i32], align 4
+	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 32
+	%tmp1 = load i32* %tmp
+	ret i32 %tmp1
+}
+
+define i32 @f4() {
+	%buf = alloca [32 x i8], align 4
+	%tmp = getelementptr [32 x i8]* %buf, i32 0, i32 2
+	%tmp1 = load i8* %tmp
+        %tmp2 = zext i8 %tmp1 to i32
+	ret i32 %tmp2
+}
diff --git a/final/test/CodeGen/ARM/ldr_post.ll b/final/test/CodeGen/ARM/ldr_post.ll
new file mode 100644
index 00000000000..97a48e1377e
--- /dev/null
+++ b/final/test/CodeGen/ARM/ldr_post.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm | \
+; RUN:   grep {ldr.*\\\[.*\],} | count 1
+
+define i32 @test(i32 %a, i32 %b, i32 %c) {
+        %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]
+        %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
+        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]
+        %tmp4 = sub i32 %tmp1, %c               ; <i32> [#uses=1]
+        %tmp5 = mul i32 %tmp4, %tmp3            ; <i32> [#uses=1]
+        ret i32 %tmp5
+}
+
diff --git a/final/test/CodeGen/ARM/ldr_pre.ll b/final/test/CodeGen/ARM/ldr_pre.ll
new file mode 100644
index 00000000000..7c442845682
--- /dev/null
+++ b/final/test/CodeGen/ARM/ldr_pre.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm | \
+; RUN:   grep {ldr.*\\!} | count 2
+
+define i32* @test1(i32* %X, i32* %dest) {
+        %Y = getelementptr i32* %X, i32 4               ; <i32*> [#uses=2]
+        %A = load i32* %Y               ; <i32> [#uses=1]
+        store i32 %A, i32* %dest
+        ret i32* %Y
+}
+
+define i32 @test2(i32 %a, i32 %b, i32 %c) {
+        %tmp1 = sub i32 %a, %b          ; <i32> [#uses=2]
+        %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
+        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]
+        %tmp4 = sub i32 %tmp1, %c               ; <i32> [#uses=1]
+        %tmp5 = add i32 %tmp4, %tmp3            ; <i32> [#uses=1]
+        ret i32 %tmp5
+}
+
diff --git a/final/test/CodeGen/ARM/ldrd.ll b/final/test/CodeGen/ARM/ldrd.ll
new file mode 100644
index 00000000000..895562a1d31
--- /dev/null
+++ b/final/test/CodeGen/ARM/ldrd.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=V6
+; RUN: llc < %s -mtriple=armv5-apple-darwin | FileCheck %s -check-prefix=V5
+; RUN: llc < %s -mtriple=armv6-eabi | FileCheck %s -check-prefix=EABI
+; rdar://r6949835
+
+@b = external global i64*
+
+define i64 @t(i64 %a) nounwind readonly {
+entry:
+;V6:   ldrd r2, [r2]
+
+;V5:   ldr r3, [r2]
+;V5:   ldr r2, [r2, #4]
+
+;EABI: ldr r3, [r2]
+;EABI: ldr r2, [r2, #4]
+
+	%0 = load i64** @b, align 4
+	%1 = load i64* %0, align 4
+	%2 = mul i64 %1, %a
+	ret i64 %2
+}
diff --git a/final/test/CodeGen/ARM/ldst-f32-2-i32.ll b/final/test/CodeGen/ARM/ldst-f32-2-i32.ll
new file mode 100644
index 00000000000..2d016f6cd42
--- /dev/null
+++ b/final/test/CodeGen/ARM/ldst-f32-2-i32.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; Check if the f32 load / store pair are optimized to i32 load / store.
+; rdar://8944252
+
+define void @t(i32 %width, float* nocapture %src, float* nocapture %dst, i32 %index) nounwind {
+; CHECK: t:
+entry:
+  %src6 = bitcast float* %src to i8*
+  %0 = icmp eq i32 %width, 0
+  br i1 %0, label %return, label %bb
+
+bb:
+; CHECK: ldr [[REGISTER:(r[0-9]+)]], [r1], r3
+; CHECK: str [[REGISTER]], [r2], #4
+  %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ]
+  %tmp = mul i32 %j.05, %index
+  %uglygep = getelementptr i8* %src6, i32 %tmp
+  %src_addr.04 = bitcast i8* %uglygep to float*
+  %dst_addr.03 = getelementptr float* %dst, i32 %j.05
+  %1 = load float* %src_addr.04, align 4
+  store float %1, float* %dst_addr.03, align 4
+  %2 = add i32 %j.05, 1
+  %exitcond = icmp eq i32 %2, %width
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/load-global.ll b/final/test/CodeGen/ARM/load-global.ll
new file mode 100644
index 00000000000..15a415df731
--- /dev/null
+++ b/final/test/CodeGen/ARM/load-global.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC_T
+; RUN: llc < %s -mtriple=armv7-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC_V7
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LINUX
+
+@G = external global i32
+
+define i32 @test1() {
+; STATIC: _test1:
+; STATIC: ldr r0, LCPI0_0
+; STATIC: ldr r0, [r0]
+; STATIC: .long _G
+
+; DYNAMIC: _test1:
+; DYNAMIC: ldr r0, LCPI0_0
+; DYNAMIC: ldr r0, [r0]
+; DYNAMIC: ldr r0, [r0]
+; DYNAMIC: .long L_G$non_lazy_ptr
+
+; PIC: _test1
+; PIC: ldr r0, LCPI0_0
+; PIC: ldr r0, [pc, r0]
+; PIC: ldr r0, [r0]
+; PIC: .long L_G$non_lazy_ptr-(LPC0_0+8)
+
+; PIC_T: _test1
+; PIC_T: ldr.n r0, LCPI0_0
+; PIC_T: add r0, pc
+; PIC_T: ldr r0, [r0]
+; PIC_T: ldr r0, [r0]
+; PIC_T: .long L_G$non_lazy_ptr-(LPC0_0+4)
+
+; PIC_V7: _test1
+; PIC_V7: movw r0, :lower16:(L_G$non_lazy_ptr-(LPC0_0+8))
+; PIC_V7: movt r0, :upper16:(L_G$non_lazy_ptr-(LPC0_0+8))
+; PIC_V7: ldr r0, [pc, r0]
+; PIC_V7: ldr r0, [r0]
+
+; LINUX: test1
+; LINUX: ldr r0, .LCPI0_0
+; LINUX: ldr r1, .LCPI0_1
+; LINUX: add r0, pc, r0
+; LINUX: ldr r0, [r1, r0]
+; LINUX: ldr r0, [r0]
+; LINUX: .long G(GOT)
+	%tmp = load i32* @G
+	ret i32 %tmp
+}
diff --git a/final/test/CodeGen/ARM/load.ll b/final/test/CodeGen/ARM/load.ll
new file mode 100644
index 00000000000..253b0e145f8
--- /dev/null
+++ b/final/test/CodeGen/ARM/load.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=arm > %t
+; RUN: grep ldrsb %t
+; RUN: grep ldrb %t
+; RUN: grep ldrsh %t
+; RUN: grep ldrh %t
+
+
+define i32 @f1(i8* %p) {
+entry:
+        %tmp = load i8* %p              ; <i8> [#uses=1]
+        %tmp1 = sext i8 %tmp to i32              ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @f2(i8* %p) {
+entry:
+        %tmp = load i8* %p              ; <i8> [#uses=1]
+        %tmp2 = zext i8 %tmp to i32              ; <i32> [#uses=1]
+        ret i32 %tmp2
+}
+
+define i32 @f3(i16* %p) {
+entry:
+        %tmp = load i16* %p             ; <i16> [#uses=1]
+        %tmp3 = sext i16 %tmp to i32             ; <i32> [#uses=1]
+        ret i32 %tmp3
+}
+
+define i32 @f4(i16* %p) {
+entry:
+        %tmp = load i16* %p             ; <i16> [#uses=1]
+        %tmp4 = zext i16 %tmp to i32             ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
diff --git a/final/test/CodeGen/ARM/long-setcc.ll b/final/test/CodeGen/ARM/long-setcc.ll
new file mode 100644
index 00000000000..c76a5e4d4d1
--- /dev/null
+++ b/final/test/CodeGen/ARM/long-setcc.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=arm | grep cmp | count 1
+
+
+define i1 @t1(i64 %x) {
+	%B = icmp slt i64 %x, 0
+	ret i1 %B
+}
+
+define i1 @t2(i64 %x) {
+	%tmp = icmp ult i64 %x, 4294967296
+	ret i1 %tmp
+}
+
+define i1 @t3(i32 %x) {
+	%tmp = icmp ugt i32 %x, -1
+	ret i1 %tmp
+}
diff --git a/final/test/CodeGen/ARM/long.ll b/final/test/CodeGen/ARM/long.ll
new file mode 100644
index 00000000000..74f8d783377
--- /dev/null
+++ b/final/test/CodeGen/ARM/long.ll
@@ -0,0 +1,90 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i64 @f1() {
+; CHECK: f1:
+entry:
+        ret i64 0
+}
+
+define i64 @f2() {
+; CHECK: f2:
+entry:
+        ret i64 1
+}
+
+define i64 @f3() {
+; CHECK: f3:
+; CHECK: mvn r0, #2, 2
+entry:
+        ret i64 2147483647
+}
+
+define i64 @f4() {
+; CHECK: f4:
+; CHECK: mov r0, #2, 2
+entry:
+        ret i64 2147483648
+}
+
+define i64 @f5() {
+; CHECK: f5:
+; CHECK: mvn r0, #0
+; CHECK: mvn r1, #2, 2
+entry:
+        ret i64 9223372036854775807
+}
+
+define i64 @f6(i64 %x, i64 %y) {
+; CHECK: f6:
+; CHECK: adds
+; CHECK: adc
+entry:
+        %tmp1 = add i64 %y, 1           ; <i64> [#uses=1]
+        ret i64 %tmp1
+}
+
+define void @f7() {
+; CHECK: f7:
+entry:
+        %tmp = call i64 @f8( )          ; <i64> [#uses=0]
+        ret void
+}
+
+declare i64 @f8()
+
+define i64 @f9(i64 %a, i64 %b) {
+; CHECK: f9:
+; CHECK: subs r
+; CHECK: sbc
+entry:
+        %tmp = sub i64 %a, %b           ; <i64> [#uses=1]
+        ret i64 %tmp
+}
+
+define i64 @f(i32 %a, i32 %b) {
+; CHECK: f:
+; CHECK: smull
+entry:
+        %tmp = sext i32 %a to i64               ; <i64> [#uses=1]
+        %tmp1 = sext i32 %b to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        ret i64 %tmp2
+}
+
+define i64 @g(i32 %a, i32 %b) {
+; CHECK: g:
+; CHECK: umull
+entry:
+        %tmp = zext i32 %a to i64               ; <i64> [#uses=1]
+        %tmp1 = zext i32 %b to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        ret i64 %tmp2
+}
+
+define i64 @f10() {
+; CHECK: f10:
+entry:
+        %a = alloca i64, align 8                ; <i64*> [#uses=1]
+        %retval = load i64* %a          ; <i64> [#uses=1]
+        ret i64 %retval
+}
diff --git a/final/test/CodeGen/ARM/long_shift.ll b/final/test/CodeGen/ARM/long_shift.ll
new file mode 100644
index 00000000000..5e4f5730f8d
--- /dev/null
+++ b/final/test/CodeGen/ARM/long_shift.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i64 @f0(i64 %A, i64 %B) {
+; CHECK: f0
+; CHECK:      lsrs    r3, r3, #1
+; CHECK-NEXT: rrx     r2, r2
+; CHECK-NEXT: subs    r0, r0, r2
+; CHECK-NEXT: sbc     r1, r1, r3
+	%tmp = bitcast i64 %A to i64
+	%tmp2 = lshr i64 %B, 1
+	%tmp3 = sub i64 %tmp, %tmp2
+	ret i64 %tmp3
+}
+
+define i32 @f1(i64 %x, i64 %y) {
+; CHECK: f1
+; CHECK: lsl{{.*}}r2
+	%a = shl i64 %x, %y
+	%b = trunc i64 %a to i32
+	ret i32 %b
+}
+
+define i32 @f2(i64 %x, i64 %y) {
+; CHECK: f2
+; CHECK:      lsr{{.*}}r2
+; CHECK-NEXT: rsb     r3, r2, #32
+; CHECK-NEXT: subs    r2, r2, #32
+; CHECK-NEXT: orr     r0, r0, r1, lsl r3
+; CHECK-NEXT: movge   r0, r1, asr r2
+	%a = ashr i64 %x, %y
+	%b = trunc i64 %a to i32
+	ret i32 %b
+}
+
+define i32 @f3(i64 %x, i64 %y) {
+; CHECK: f3
+; CHECK:      lsr{{.*}}r2
+; CHECK-NEXT: rsb     r3, r2, #32
+; CHECK-NEXT: subs    r2, r2, #32
+; CHECK-NEXT: orr     r0, r0, r1, lsl r3
+; CHECK-NEXT: movge   r0, r1, lsr r2
+	%a = lshr i64 %x, %y
+	%b = trunc i64 %a to i32
+	ret i32 %b
+}
diff --git a/final/test/CodeGen/ARM/lsr-code-insertion.ll b/final/test/CodeGen/ARM/lsr-code-insertion.ll
new file mode 100644
index 00000000000..1bbb96deeef
--- /dev/null
+++ b/final/test/CodeGen/ARM/lsr-code-insertion.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed}
+; RUN: llc < %s -stats |& not grep {.*Number of re-materialization}
+; This test really wants to check that the resultant "cond_true" block only 
+; has a single store in it, and that cond_true55 only has code to materialize 
+; the constant and do a store.  We do *not* want something like this:
+;
+;LBB1_3: @cond_true
+;        add r8, r0, r6
+;        str r10, [r8, #+4]
+;
+target triple = "arm-apple-darwin8"
+
+define void @foo(i32* %mc, i32* %mpp, i32* %ip, i32* %dpp, i32* %tpmm, i32 %M, i32* %tpim, i32* %tpdm, i32* %bp, i32* %ms, i32 %xmb) {
+entry:
+	%tmp6584 = icmp slt i32 %M, 1		; <i1> [#uses=1]
+	br i1 %tmp6584, label %return, label %bb
+
+bb:		; preds = %cond_next59, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %k.069.0, %cond_next59 ]		; <i32> [#uses=6]
+	%k.069.0 = add i32 %indvar, 1		; <i32> [#uses=3]
+	%tmp3 = getelementptr i32* %mpp, i32 %indvar		; <i32*> [#uses=1]
+	%tmp4 = load i32* %tmp3		; <i32> [#uses=1]
+	%tmp8 = getelementptr i32* %tpmm, i32 %indvar		; <i32*> [#uses=1]
+	%tmp9 = load i32* %tmp8		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, %tmp4		; <i32> [#uses=2]
+	%tmp13 = getelementptr i32* %mc, i32 %k.069.0		; <i32*> [#uses=5]
+	store i32 %tmp10, i32* %tmp13
+	%tmp17 = getelementptr i32* %ip, i32 %indvar		; <i32*> [#uses=1]
+	%tmp18 = load i32* %tmp17		; <i32> [#uses=1]
+	%tmp22 = getelementptr i32* %tpim, i32 %indvar		; <i32*> [#uses=1]
+	%tmp23 = load i32* %tmp22		; <i32> [#uses=1]
+	%tmp24 = add i32 %tmp23, %tmp18		; <i32> [#uses=2]
+	%tmp30 = icmp sgt i32 %tmp24, %tmp10		; <i1> [#uses=1]
+	br i1 %tmp30, label %cond_true, label %cond_next
+
+cond_true:		; preds = %bb
+	store i32 %tmp24, i32* %tmp13
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %bb
+	%tmp39 = load i32* %tmp13		; <i32> [#uses=1]
+	%tmp42 = getelementptr i32* %ms, i32 %k.069.0		; <i32*> [#uses=1]
+	%tmp43 = load i32* %tmp42		; <i32> [#uses=1]
+	%tmp44 = add i32 %tmp43, %tmp39		; <i32> [#uses=2]
+	store i32 %tmp44, i32* %tmp13
+	%tmp52 = icmp slt i32 %tmp44, -987654321		; <i1> [#uses=1]
+	br i1 %tmp52, label %cond_true55, label %cond_next59
+
+cond_true55:		; preds = %cond_next
+	store i32 -987654321, i32* %tmp13
+	br label %cond_next59
+
+cond_next59:		; preds = %cond_true55, %cond_next
+	%tmp61 = add i32 %indvar, 2		; <i32> [#uses=1]
+	%tmp65 = icmp sgt i32 %tmp61, %M		; <i1> [#uses=1]
+	br i1 %tmp65, label %return, label %bb
+
+return:		; preds = %cond_next59, %entry
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/final/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
new file mode 100644
index 00000000000..9882690da26
--- /dev/null
+++ b/final/test/CodeGen/ARM/lsr-on-unrolled-loops.ll
@@ -0,0 +1,642 @@
+; RUN: llc -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 < %s | FileCheck %s
+
+; LSR should recognize that this is an unrolled loop which can use
+; constant offset addressing, so that each of the following stores
+; uses the same register.
+
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-128]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-96]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-64]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-32]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64]
+; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96]
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+
+%0 = type { %1*, %3*, %6*, i8*, i32, i32, %8*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %9*], [4 x %10*], [4 x %10*], i32, %11*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i8, i8, i16, i16, i32, i8, i32, %12*, i32, i32, i32, i32, i8*, i32, [4 x %11*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %13*, %14*, %15*, %16*, %17*, %18*, %19*, %20*, %21*, %22*, %23* }
+%1 = type { void (%2*)*, void (%2*, i32)*, void (%2*)*, void (%2*, i8*)*, void (%2*)*, i32, %7, i32, i32, i8**, i32, i8**, i32, i32 }
+%2 = type { %1*, %3*, %6*, i8*, i32, i32 }
+%3 = type { i8* (%2*, i32, i32)*, i8* (%2*, i32, i32)*, i8** (%2*, i32, i32, i32)*, [64 x i16]** (%2*, i32, i32, i32)*, %4* (%2*, i32, i32, i32, i32, i32)*, %5* (%2*, i32, i32, i32, i32, i32)*, void (%2*)*, i8** (%2*, %4*, i32, i32, i32)*, [64 x i16]** (%2*, %5*, i32, i32, i32)*, void (%2*, i32)*, void (%2*)*, i32, i32 }
+%4 = type opaque
+%5 = type opaque
+%6 = type { void (%2*)*, i32, i32, i32, i32 }
+%7 = type { [8 x i32], [12 x i32] }
+%8 = type { i8*, i32, void (%0*)*, i32 (%0*)*, void (%0*, i32)*, i32 (%0*, i32)*, void (%0*)* }
+%9 = type { [64 x i16], i32 }
+%10 = type { [17 x i8], [256 x i8], i32 }
+%11 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %9*, i8* }
+%12 = type { %12*, i8, i32, i32, i8* }
+%13 = type { void (%0*)*, void (%0*)*, i32 }
+%14 = type { void (%0*, i32)*, void (%0*, i8**, i32*, i32)* }
+%15 = type { void (%0*)*, i32 (%0*)*, void (%0*)*, i32 (%0*, i8***)*, %5** }
+%16 = type { void (%0*, i32)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)* }
+%17 = type { i32 (%0*)*, void (%0*)*, void (%0*)*, void (%0*)*, i32, i32 }
+%18 = type { void (%0*)*, i32 (%0*)*, i32 (%0*)*, i32, i32, i32, i32 }
+%19 = type { void (%0*)*, i32 (%0*, [64 x i16]**)*, i32 }
+%20 = type { void (%0*)*, [10 x void (%0*, %11*, i16*, i8**, i32)*] }
+%21 = type { void (%0*)*, void (%0*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
+%22 = type { void (%0*)*, void (%0*, i8***, i32, i8**, i32)* }
+%23 = type { void (%0*, i32)*, void (%0*, i8**, i8**, i32)*, void (%0*)*, void (%0*)* }
+
+define void @test(%0* nocapture %a0, %11* nocapture %a1, i16* nocapture %a2, i8** nocapture %a3, i32 %a4) nounwind {
+bb:
+  %t = alloca [64 x float], align 4           
+  %t5 = getelementptr inbounds %0* %a0, i32 0, i32 65
+  %t6 = load i8** %t5, align 4              
+  %t7 = getelementptr inbounds %11* %a1, i32 0, i32 20
+  %t8 = load i8** %t7, align 4              
+  br label %bb9
+
+bb9:                                            
+  %t10 = phi i32 [ 0, %bb ], [ %t157, %bb156 ]
+  %t11 = add i32 %t10, 8                    
+  %t12 = getelementptr [64 x float]* %t, i32 0, i32 %t11
+  %t13 = add i32 %t10, 16                   
+  %t14 = getelementptr [64 x float]* %t, i32 0, i32 %t13
+  %t15 = add i32 %t10, 24                   
+  %t16 = getelementptr [64 x float]* %t, i32 0, i32 %t15
+  %t17 = add i32 %t10, 32                   
+  %t18 = getelementptr [64 x float]* %t, i32 0, i32 %t17
+  %t19 = add i32 %t10, 40                   
+  %t20 = getelementptr [64 x float]* %t, i32 0, i32 %t19
+  %t21 = add i32 %t10, 48                   
+  %t22 = getelementptr [64 x float]* %t, i32 0, i32 %t21
+  %t23 = add i32 %t10, 56                   
+  %t24 = getelementptr [64 x float]* %t, i32 0, i32 %t23
+  %t25 = getelementptr [64 x float]* %t, i32 0, i32 %t10
+  %t26 = shl i32 %t10, 5                    
+  %t27 = or i32 %t26, 8                     
+  %t28 = getelementptr i8* %t8, i32 %t27  
+  %t29 = bitcast i8* %t28 to float*         
+  %t30 = or i32 %t26, 16                    
+  %t31 = getelementptr i8* %t8, i32 %t30  
+  %t32 = bitcast i8* %t31 to float*         
+  %t33 = or i32 %t26, 24                    
+  %t34 = getelementptr i8* %t8, i32 %t33  
+  %t35 = bitcast i8* %t34 to float*         
+  %t36 = or i32 %t26, 4                     
+  %t37 = getelementptr i8* %t8, i32 %t36  
+  %t38 = bitcast i8* %t37 to float*         
+  %t39 = or i32 %t26, 12                    
+  %t40 = getelementptr i8* %t8, i32 %t39  
+  %t41 = bitcast i8* %t40 to float*         
+  %t42 = or i32 %t26, 20                    
+  %t43 = getelementptr i8* %t8, i32 %t42  
+  %t44 = bitcast i8* %t43 to float*         
+  %t45 = or i32 %t26, 28                    
+  %t46 = getelementptr i8* %t8, i32 %t45  
+  %t47 = bitcast i8* %t46 to float*         
+  %t48 = getelementptr i8* %t8, i32 %t26  
+  %t49 = bitcast i8* %t48 to float*         
+  %t50 = shl i32 %t10, 3                    
+  %t51 = or i32 %t50, 1                     
+  %t52 = getelementptr i16* %a2, i32 %t51 
+  %t53 = or i32 %t50, 2                     
+  %t54 = getelementptr i16* %a2, i32 %t53 
+  %t55 = or i32 %t50, 3                     
+  %t56 = getelementptr i16* %a2, i32 %t55 
+  %t57 = or i32 %t50, 4                     
+  %t58 = getelementptr i16* %a2, i32 %t57 
+  %t59 = or i32 %t50, 5                     
+  %t60 = getelementptr i16* %a2, i32 %t59 
+  %t61 = or i32 %t50, 6                     
+  %t62 = getelementptr i16* %a2, i32 %t61 
+  %t63 = or i32 %t50, 7                     
+  %t64 = getelementptr i16* %a2, i32 %t63 
+  %t65 = getelementptr i16* %a2, i32 %t50 
+  %t66 = load i16* %t52, align 2            
+  %t67 = icmp eq i16 %t66, 0                
+  %t68 = load i16* %t54, align 2            
+  %t69 = icmp eq i16 %t68, 0                
+  %t70 = and i1 %t67, %t69                
+  br i1 %t70, label %bb71, label %bb91
+
+bb71:                                           
+  %t72 = load i16* %t56, align 2            
+  %t73 = icmp eq i16 %t72, 0                
+  br i1 %t73, label %bb74, label %bb91
+
+bb74:                                           
+  %t75 = load i16* %t58, align 2            
+  %t76 = icmp eq i16 %t75, 0                
+  br i1 %t76, label %bb77, label %bb91
+
+bb77:                                           
+  %t78 = load i16* %t60, align 2            
+  %t79 = icmp eq i16 %t78, 0                
+  br i1 %t79, label %bb80, label %bb91
+
+bb80:                                           
+  %t81 = load i16* %t62, align 2            
+  %t82 = icmp eq i16 %t81, 0                
+  br i1 %t82, label %bb83, label %bb91
+
+bb83:                                           
+  %t84 = load i16* %t64, align 2            
+  %t85 = icmp eq i16 %t84, 0                
+  br i1 %t85, label %bb86, label %bb91
+
+bb86:                                           
+  %t87 = load i16* %t65, align 2            
+  %t88 = sitofp i16 %t87 to float           
+  %t89 = load float* %t49, align 4          
+  %t90 = fmul float %t88, %t89            
+  store float %t90, float* %t25, align 4
+  store float %t90, float* %t12, align 4
+  store float %t90, float* %t14, align 4
+  store float %t90, float* %t16, align 4
+  store float %t90, float* %t18, align 4
+  store float %t90, float* %t20, align 4
+  store float %t90, float* %t22, align 4
+  store float %t90, float* %t24, align 4
+  br label %bb156
+
+bb91:                                           
+  %t92 = load i16* %t65, align 2            
+  %t93 = sitofp i16 %t92 to float           
+  %t94 = load float* %t49, align 4          
+  %t95 = fmul float %t93, %t94            
+  %t96 = sitofp i16 %t68 to float           
+  %t97 = load float* %t29, align 4          
+  %t98 = fmul float %t96, %t97            
+  %t99 = load i16* %t58, align 2            
+  %t100 = sitofp i16 %t99 to float          
+  %t101 = load float* %t32, align 4         
+  %t102 = fmul float %t100, %t101         
+  %t103 = load i16* %t62, align 2           
+  %t104 = sitofp i16 %t103 to float         
+  %t105 = load float* %t35, align 4         
+  %t106 = fmul float %t104, %t105         
+  %t107 = fadd float %t95, %t102          
+  %t108 = fsub float %t95, %t102          
+  %t109 = fadd float %t98, %t106          
+  %t110 = fsub float %t98, %t106          
+  %t111 = fmul float %t110, 0x3FF6A09E60000000
+  %t112 = fsub float %t111, %t109         
+  %t113 = fadd float %t107, %t109         
+  %t114 = fsub float %t107, %t109         
+  %t115 = fadd float %t108, %t112         
+  %t116 = fsub float %t108, %t112         
+  %t117 = sitofp i16 %t66 to float          
+  %t118 = load float* %t38, align 4         
+  %t119 = fmul float %t117, %t118         
+  %t120 = load i16* %t56, align 2           
+  %t121 = sitofp i16 %t120 to float         
+  %t122 = load float* %t41, align 4         
+  %t123 = fmul float %t121, %t122         
+  %t124 = load i16* %t60, align 2           
+  %t125 = sitofp i16 %t124 to float         
+  %t126 = load float* %t44, align 4         
+  %t127 = fmul float %t125, %t126         
+  %t128 = load i16* %t64, align 2           
+  %t129 = sitofp i16 %t128 to float         
+  %t130 = load float* %t47, align 4         
+  %t131 = fmul float %t129, %t130         
+  %t132 = fadd float %t127, %t123         
+  %t133 = fsub float %t127, %t123         
+  %t134 = fadd float %t119, %t131         
+  %t135 = fsub float %t119, %t131         
+  %t136 = fadd float %t134, %t132         
+  %t137 = fsub float %t134, %t132         
+  %t138 = fmul float %t137, 0x3FF6A09E60000000
+  %t139 = fadd float %t133, %t135         
+  %t140 = fmul float %t139, 0x3FFD906BC0000000
+  %t141 = fmul float %t135, 0x3FF1517A80000000
+  %t142 = fsub float %t141, %t140         
+  %t143 = fmul float %t133, 0xC004E7AEA0000000
+  %t144 = fadd float %t143, %t140         
+  %t145 = fsub float %t144, %t136         
+  %t146 = fsub float %t138, %t145         
+  %t147 = fadd float %t142, %t146         
+  %t148 = fadd float %t113, %t136         
+  store float %t148, float* %t25, align 4
+  %t149 = fsub float %t113, %t136         
+  store float %t149, float* %t24, align 4
+  %t150 = fadd float %t115, %t145         
+  store float %t150, float* %t12, align 4
+  %t151 = fsub float %t115, %t145         
+  store float %t151, float* %t22, align 4
+  %t152 = fadd float %t116, %t146         
+  store float %t152, float* %t14, align 4
+  %t153 = fsub float %t116, %t146         
+  store float %t153, float* %t20, align 4
+  %t154 = fadd float %t114, %t147         
+  store float %t154, float* %t18, align 4
+  %t155 = fsub float %t114, %t147         
+  store float %t155, float* %t16, align 4
+  br label %bb156
+
+bb156:                                          
+  %t157 = add i32 %t10, 1                   
+  %t158 = icmp eq i32 %t157, 8              
+  br i1 %t158, label %bb159, label %bb9
+
+bb159:                                          
+  %t160 = add i32 %a4, 7                    
+  %t161 = add i32 %a4, 1                    
+  %t162 = add i32 %a4, 6                    
+  %t163 = add i32 %a4, 2                    
+  %t164 = add i32 %a4, 5                    
+  %t165 = add i32 %a4, 4                    
+  %t166 = add i32 %a4, 3                    
+  br label %bb167
+
+bb167:                                          
+  %t168 = phi i32 [ 0, %bb159 ], [ %t293, %bb167 ]
+  %t169 = getelementptr i8** %a3, i32 %t168
+  %t170 = shl i32 %t168, 3                  
+  %t171 = or i32 %t170, 4                   
+  %t172 = getelementptr [64 x float]* %t, i32 0, i32 %t171
+  %t173 = or i32 %t170, 2                   
+  %t174 = getelementptr [64 x float]* %t, i32 0, i32 %t173
+  %t175 = or i32 %t170, 6                   
+  %t176 = getelementptr [64 x float]* %t, i32 0, i32 %t175
+  %t177 = or i32 %t170, 5                   
+  %t178 = getelementptr [64 x float]* %t, i32 0, i32 %t177
+  %t179 = or i32 %t170, 3                   
+  %t180 = getelementptr [64 x float]* %t, i32 0, i32 %t179
+  %t181 = or i32 %t170, 1                   
+  %t182 = getelementptr [64 x float]* %t, i32 0, i32 %t181
+  %t183 = or i32 %t170, 7                   
+  %t184 = getelementptr [64 x float]* %t, i32 0, i32 %t183
+  %t185 = getelementptr [64 x float]* %t, i32 0, i32 %t170
+  %t186 = load i8** %t169, align 4          
+  %t187 = getelementptr inbounds i8* %t186, i32 %a4
+  %t188 = load float* %t185, align 4        
+  %t189 = load float* %t172, align 4        
+  %t190 = fadd float %t188, %t189         
+  %t191 = fsub float %t188, %t189         
+  %t192 = load float* %t174, align 4        
+  %t193 = load float* %t176, align 4        
+  %t194 = fadd float %t192, %t193         
+  %t195 = fsub float %t192, %t193         
+  %t196 = fmul float %t195, 0x3FF6A09E60000000
+  %t197 = fsub float %t196, %t194         
+  %t198 = fadd float %t190, %t194         
+  %t199 = fsub float %t190, %t194         
+  %t200 = fadd float %t191, %t197         
+  %t201 = fsub float %t191, %t197         
+  %t202 = load float* %t178, align 4        
+  %t203 = load float* %t180, align 4        
+  %t204 = fadd float %t202, %t203         
+  %t205 = fsub float %t202, %t203         
+  %t206 = load float* %t182, align 4        
+  %t207 = load float* %t184, align 4        
+  %t208 = fadd float %t206, %t207         
+  %t209 = fsub float %t206, %t207         
+  %t210 = fadd float %t208, %t204         
+  %t211 = fsub float %t208, %t204         
+  %t212 = fmul float %t211, 0x3FF6A09E60000000
+  %t213 = fadd float %t205, %t209         
+  %t214 = fmul float %t213, 0x3FFD906BC0000000
+  %t215 = fmul float %t209, 0x3FF1517A80000000
+  %t216 = fsub float %t215, %t214         
+  %t217 = fmul float %t205, 0xC004E7AEA0000000
+  %t218 = fadd float %t217, %t214         
+  %t219 = fsub float %t218, %t210         
+  %t220 = fsub float %t212, %t219         
+  %t221 = fadd float %t216, %t220         
+  %t222 = fadd float %t198, %t210         
+  %t223 = fptosi float %t222 to i32         
+  %t224 = add nsw i32 %t223, 4              
+  %t225 = lshr i32 %t224, 3                 
+  %t226 = and i32 %t225, 1023               
+  %t227 = add i32 %t226, 128                
+  %t228 = getelementptr inbounds i8* %t6, i32 %t227
+  %t229 = load i8* %t228, align 1           
+  store i8 %t229, i8* %t187, align 1
+  %t230 = fsub float %t198, %t210         
+  %t231 = fptosi float %t230 to i32         
+  %t232 = add nsw i32 %t231, 4              
+  %t233 = lshr i32 %t232, 3                 
+  %t234 = and i32 %t233, 1023               
+  %t235 = add i32 %t234, 128                
+  %t236 = getelementptr inbounds i8* %t6, i32 %t235
+  %t237 = load i8* %t236, align 1           
+  %t238 = getelementptr inbounds i8* %t186, i32 %t160
+  store i8 %t237, i8* %t238, align 1
+  %t239 = fadd float %t200, %t219         
+  %t240 = fptosi float %t239 to i32         
+  %t241 = add nsw i32 %t240, 4              
+  %t242 = lshr i32 %t241, 3                 
+  %t243 = and i32 %t242, 1023               
+  %t244 = add i32 %t243, 128                
+  %t245 = getelementptr inbounds i8* %t6, i32 %t244
+  %t246 = load i8* %t245, align 1           
+  %t247 = getelementptr inbounds i8* %t186, i32 %t161
+  store i8 %t246, i8* %t247, align 1
+  %t248 = fsub float %t200, %t219         
+  %t249 = fptosi float %t248 to i32         
+  %t250 = add nsw i32 %t249, 4              
+  %t251 = lshr i32 %t250, 3                 
+  %t252 = and i32 %t251, 1023               
+  %t253 = add i32 %t252, 128                
+  %t254 = getelementptr inbounds i8* %t6, i32 %t253
+  %t255 = load i8* %t254, align 1           
+  %t256 = getelementptr inbounds i8* %t186, i32 %t162
+  store i8 %t255, i8* %t256, align 1
+  %t257 = fadd float %t201, %t220         
+  %t258 = fptosi float %t257 to i32         
+  %t259 = add nsw i32 %t258, 4              
+  %t260 = lshr i32 %t259, 3                 
+  %t261 = and i32 %t260, 1023               
+  %t262 = add i32 %t261, 128                
+  %t263 = getelementptr inbounds i8* %t6, i32 %t262
+  %t264 = load i8* %t263, align 1           
+  %t265 = getelementptr inbounds i8* %t186, i32 %t163
+  store i8 %t264, i8* %t265, align 1
+  %t266 = fsub float %t201, %t220         
+  %t267 = fptosi float %t266 to i32         
+  %t268 = add nsw i32 %t267, 4              
+  %t269 = lshr i32 %t268, 3                 
+  %t270 = and i32 %t269, 1023               
+  %t271 = add i32 %t270, 128                
+  %t272 = getelementptr inbounds i8* %t6, i32 %t271
+  %t273 = load i8* %t272, align 1           
+  %t274 = getelementptr inbounds i8* %t186, i32 %t164
+  store i8 %t273, i8* %t274, align 1
+  %t275 = fadd float %t199, %t221         
+  %t276 = fptosi float %t275 to i32         
+  %t277 = add nsw i32 %t276, 4              
+  %t278 = lshr i32 %t277, 3                 
+  %t279 = and i32 %t278, 1023               
+  %t280 = add i32 %t279, 128                
+  %t281 = getelementptr inbounds i8* %t6, i32 %t280
+  %t282 = load i8* %t281, align 1           
+  %t283 = getelementptr inbounds i8* %t186, i32 %t165
+  store i8 %t282, i8* %t283, align 1
+  %t284 = fsub float %t199, %t221         
+  %t285 = fptosi float %t284 to i32         
+  %t286 = add nsw i32 %t285, 4              
+  %t287 = lshr i32 %t286, 3                 
+  %t288 = and i32 %t287, 1023               
+  %t289 = add i32 %t288, 128                
+  %t290 = getelementptr inbounds i8* %t6, i32 %t289
+  %t291 = load i8* %t290, align 1           
+  %t292 = getelementptr inbounds i8* %t186, i32 %t166
+  store i8 %t291, i8* %t292, align 1
+  %t293 = add nsw i32 %t168, 1              
+  %t294 = icmp eq i32 %t293, 8              
+  br i1 %t294, label %bb295, label %bb167
+
+bb295:                                          
+  ret void
+}
+
+%struct.ct_data_s = type { %union.anon, %union.anon }
+%struct.gz_header = type { i32, i32, i32, i32, i8*, i32, i32, i8*, i32, i8*, i32, i32, i32 }
+%struct.internal_state = type { %struct.z_stream*, i32, i8*, i32, i8*, i32, i32, %struct.gz_header*, i32, i8, i32, i32, i32, i32, i8*, i32, i16*, i16*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [573 x %struct.ct_data_s], [61 x %struct.ct_data_s], [39 x %struct.ct_data_s], %struct.tree_desc_s, %struct.tree_desc_s, %struct.tree_desc_s, [16 x i16], [573 x i32], i32, i32, [573 x i8], i8*, i32, i32, i16*, i32, i32, i32, i32, i16, i32 }
+%struct.static_tree_desc = type { i32 }
+%struct.tree_desc_s = type { %struct.ct_data_s*, i32, %struct.static_tree_desc* }
+%struct.z_stream = type { i8*, i32, i32, i8*, i32, i32, i8*, %struct.internal_state*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8*, i32, i32, i32 }
+%union.anon = type { i16 }
+
+define i32 @longest_match(%struct.internal_state* %s, i32 %cur_match) nounwind optsize {
+entry:
+  %0 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 31 ; <i32*> [#uses=1]
+  %1 = load i32* %0, align 4                      ; <i32> [#uses=2]
+  %2 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 14 ; <i8**> [#uses=1]
+  %3 = load i8** %2, align 4                      ; <i8*> [#uses=27]
+  %4 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 27 ; <i32*> [#uses=1]
+  %5 = load i32* %4, align 4                      ; <i32> [#uses=17]
+  %6 = getelementptr inbounds i8* %3, i32 %5      ; <i8*> [#uses=1]
+  %7 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 30 ; <i32*> [#uses=1]
+  %8 = load i32* %7, align 4                      ; <i32> [#uses=4]
+  %9 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 36 ; <i32*> [#uses=1]
+  %10 = load i32* %9, align 4                     ; <i32> [#uses=2]
+  %11 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 11 ; <i32*> [#uses=1]
+  %12 = load i32* %11, align 4                    ; <i32> [#uses=2]
+  %13 = add i32 %12, -262                         ; <i32> [#uses=1]
+  %14 = icmp ugt i32 %5, %13                      ; <i1> [#uses=1]
+  br i1 %14, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  %15 = add i32 %5, 262                           ; <i32> [#uses=1]
+  %16 = sub i32 %15, %12                          ; <i32> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb, %entry
+  %iftmp.48.0 = phi i32 [ %16, %bb ], [ 0, %entry ] ; <i32> [#uses=1]
+  %17 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 16 ; <i16**> [#uses=1]
+  %18 = load i16** %17, align 4                   ; <i16*> [#uses=1]
+  %19 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 13 ; <i32*> [#uses=1]
+  %20 = load i32* %19, align 4                    ; <i32> [#uses=1]
+  %.sum = add i32 %5, 258                         ; <i32> [#uses=2]
+  %21 = getelementptr inbounds i8* %3, i32 %.sum  ; <i8*> [#uses=1]
+  %22 = add nsw i32 %5, -1                        ; <i32> [#uses=1]
+  %.sum30 = add i32 %22, %8                       ; <i32> [#uses=1]
+  %23 = getelementptr inbounds i8* %3, i32 %.sum30 ; <i8*> [#uses=1]
+  %24 = load i8* %23, align 1                     ; <i8> [#uses=1]
+  %.sum31 = add i32 %8, %5                        ; <i32> [#uses=1]
+  %25 = getelementptr inbounds i8* %3, i32 %.sum31 ; <i8*> [#uses=1]
+  %26 = load i8* %25, align 1                     ; <i8> [#uses=1]
+  %27 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 35 ; <i32*> [#uses=1]
+  %28 = load i32* %27, align 4                    ; <i32> [#uses=1]
+  %29 = lshr i32 %1, 2                            ; <i32> [#uses=1]
+  %30 = icmp ult i32 %8, %28                      ; <i1> [#uses=1]
+  %. = select i1 %30, i32 %1, i32 %29             ; <i32> [#uses=1]
+  %31 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 29 ; <i32*> [#uses=1]
+  %32 = load i32* %31, align 4                    ; <i32> [#uses=4]
+  %33 = icmp ugt i32 %10, %32                     ; <i1> [#uses=1]
+  %nice_match.0.ph = select i1 %33, i32 %32, i32 %10 ; <i32> [#uses=1]
+  %34 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 28 ; <i32*> [#uses=1]
+  %35 = ptrtoint i8* %21 to i32                   ; <i32> [#uses=1]
+  %36 = add nsw i32 %5, 257                       ; <i32> [#uses=1]
+  %tmp81 = add i32 %., -1                         ; <i32> [#uses=1]
+  br label %bb6
+
+bb6:                                              ; preds = %bb24, %bb2
+  %indvar78 = phi i32 [ 0, %bb2 ], [ %indvar.next79, %bb24 ] ; <i32> [#uses=2]
+  %best_len.2 = phi i32 [ %8, %bb2 ], [ %best_len.0, %bb24 ] ; <i32> [#uses=8]
+  %scan_end1.1 = phi i8 [ %24, %bb2 ], [ %scan_end1.0, %bb24 ] ; <i8> [#uses=6]
+  %cur_match_addr.0 = phi i32 [ %cur_match, %bb2 ], [ %90, %bb24 ] ; <i32> [#uses=14]
+  %scan_end.1 = phi i8 [ %26, %bb2 ], [ %scan_end.0, %bb24 ] ; <i8> [#uses=6]
+  %37 = getelementptr inbounds i8* %3, i32 %cur_match_addr.0 ; <i8*> [#uses=1]
+  %.sum32 = add i32 %cur_match_addr.0, %best_len.2 ; <i32> [#uses=1]
+  %38 = getelementptr inbounds i8* %3, i32 %.sum32 ; <i8*> [#uses=1]
+  %39 = load i8* %38, align 1                     ; <i8> [#uses=1]
+  %40 = icmp eq i8 %39, %scan_end.1               ; <i1> [#uses=1]
+  br i1 %40, label %bb7, label %bb23
+
+bb7:                                              ; preds = %bb6
+  %41 = add nsw i32 %best_len.2, -1               ; <i32> [#uses=1]
+  %.sum33 = add i32 %41, %cur_match_addr.0        ; <i32> [#uses=1]
+  %42 = getelementptr inbounds i8* %3, i32 %.sum33 ; <i8*> [#uses=1]
+  %43 = load i8* %42, align 1                     ; <i8> [#uses=1]
+  %44 = icmp eq i8 %43, %scan_end1.1              ; <i1> [#uses=1]
+  br i1 %44, label %bb8, label %bb23
+
+bb8:                                              ; preds = %bb7
+  %45 = load i8* %37, align 1                     ; <i8> [#uses=1]
+  %46 = load i8* %6, align 1                      ; <i8> [#uses=1]
+  %47 = icmp eq i8 %45, %46                       ; <i1> [#uses=1]
+  br i1 %47, label %bb9, label %bb23
+
+bb9:                                              ; preds = %bb8
+  %.sum34 = add i32 %cur_match_addr.0, 1          ; <i32> [#uses=1]
+  %48 = getelementptr inbounds i8* %3, i32 %.sum34 ; <i8*> [#uses=1]
+  %49 = load i8* %48, align 1                     ; <i8> [#uses=1]
+  %.sum88 = add i32 %5, 1                         ; <i32> [#uses=1]
+  %50 = getelementptr inbounds i8* %3, i32 %.sum88 ; <i8*> [#uses=1]
+  %51 = load i8* %50, align 1                     ; <i8> [#uses=1]
+  %52 = icmp eq i8 %49, %51                       ; <i1> [#uses=1]
+  br i1 %52, label %bb10, label %bb23
+
+bb10:                                             ; preds = %bb9
+  %tmp39 = add i32 %cur_match_addr.0, 10          ; <i32> [#uses=1]
+  %tmp41 = add i32 %cur_match_addr.0, 9           ; <i32> [#uses=1]
+  %tmp44 = add i32 %cur_match_addr.0, 8           ; <i32> [#uses=1]
+  %tmp47 = add i32 %cur_match_addr.0, 7           ; <i32> [#uses=1]
+  %tmp50 = add i32 %cur_match_addr.0, 6           ; <i32> [#uses=1]
+  %tmp53 = add i32 %cur_match_addr.0, 5           ; <i32> [#uses=1]
+  %tmp56 = add i32 %cur_match_addr.0, 4           ; <i32> [#uses=1]
+  %tmp59 = add i32 %cur_match_addr.0, 3           ; <i32> [#uses=1]
+  br label %bb11
+
+bb11:                                             ; preds = %bb18, %bb10
+  %indvar = phi i32 [ %indvar.next, %bb18 ], [ 0, %bb10 ] ; <i32> [#uses=2]
+  %tmp = shl i32 %indvar, 3                       ; <i32> [#uses=16]
+  %tmp40 = add i32 %tmp39, %tmp                   ; <i32> [#uses=1]
+  %scevgep = getelementptr i8* %3, i32 %tmp40     ; <i8*> [#uses=1]
+  %tmp42 = add i32 %tmp41, %tmp                   ; <i32> [#uses=1]
+  %scevgep43 = getelementptr i8* %3, i32 %tmp42   ; <i8*> [#uses=1]
+  %tmp45 = add i32 %tmp44, %tmp                   ; <i32> [#uses=1]
+  %scevgep46 = getelementptr i8* %3, i32 %tmp45   ; <i8*> [#uses=1]
+  %tmp48 = add i32 %tmp47, %tmp                   ; <i32> [#uses=1]
+  %scevgep49 = getelementptr i8* %3, i32 %tmp48   ; <i8*> [#uses=1]
+  %tmp51 = add i32 %tmp50, %tmp                   ; <i32> [#uses=1]
+  %scevgep52 = getelementptr i8* %3, i32 %tmp51   ; <i8*> [#uses=1]
+  %tmp54 = add i32 %tmp53, %tmp                   ; <i32> [#uses=1]
+  %scevgep55 = getelementptr i8* %3, i32 %tmp54   ; <i8*> [#uses=1]
+  %tmp60 = add i32 %tmp59, %tmp                   ; <i32> [#uses=1]
+  %scevgep61 = getelementptr i8* %3, i32 %tmp60   ; <i8*> [#uses=1]
+  %tmp62 = add i32 %tmp, 10                       ; <i32> [#uses=1]
+  %.sum89 = add i32 %5, %tmp62                    ; <i32> [#uses=2]
+  %scevgep63 = getelementptr i8* %3, i32 %.sum89  ; <i8*> [#uses=2]
+  %tmp64 = add i32 %tmp, 9                        ; <i32> [#uses=1]
+  %.sum90 = add i32 %5, %tmp64                    ; <i32> [#uses=1]
+  %scevgep65 = getelementptr i8* %3, i32 %.sum90  ; <i8*> [#uses=2]
+  %tmp66 = add i32 %tmp, 8                        ; <i32> [#uses=1]
+  %.sum91 = add i32 %5, %tmp66                    ; <i32> [#uses=1]
+  %scevgep67 = getelementptr i8* %3, i32 %.sum91  ; <i8*> [#uses=2]
+  %tmp6883 = or i32 %tmp, 7                       ; <i32> [#uses=1]
+  %.sum92 = add i32 %5, %tmp6883                  ; <i32> [#uses=1]
+  %scevgep69 = getelementptr i8* %3, i32 %.sum92  ; <i8*> [#uses=2]
+  %tmp7084 = or i32 %tmp, 6                       ; <i32> [#uses=1]
+  %.sum93 = add i32 %5, %tmp7084                  ; <i32> [#uses=1]
+  %scevgep71 = getelementptr i8* %3, i32 %.sum93  ; <i8*> [#uses=2]
+  %tmp7285 = or i32 %tmp, 5                       ; <i32> [#uses=1]
+  %.sum94 = add i32 %5, %tmp7285                  ; <i32> [#uses=1]
+  %scevgep73 = getelementptr i8* %3, i32 %.sum94  ; <i8*> [#uses=2]
+  %tmp7486 = or i32 %tmp, 4                       ; <i32> [#uses=1]
+  %.sum95 = add i32 %5, %tmp7486                  ; <i32> [#uses=1]
+  %scevgep75 = getelementptr i8* %3, i32 %.sum95  ; <i8*> [#uses=2]
+  %tmp7687 = or i32 %tmp, 3                       ; <i32> [#uses=1]
+  %.sum96 = add i32 %5, %tmp7687                  ; <i32> [#uses=1]
+  %scevgep77 = getelementptr i8* %3, i32 %.sum96  ; <i8*> [#uses=2]
+  %53 = load i8* %scevgep77, align 1              ; <i8> [#uses=1]
+  %54 = load i8* %scevgep61, align 1              ; <i8> [#uses=1]
+  %55 = icmp eq i8 %53, %54                       ; <i1> [#uses=1]
+  br i1 %55, label %bb12, label %bb20
+
+bb12:                                             ; preds = %bb11
+  %tmp57 = add i32 %tmp56, %tmp                   ; <i32> [#uses=1]
+  %scevgep58 = getelementptr i8* %3, i32 %tmp57   ; <i8*> [#uses=1]
+  %56 = load i8* %scevgep75, align 1              ; <i8> [#uses=1]
+  %57 = load i8* %scevgep58, align 1              ; <i8> [#uses=1]
+  %58 = icmp eq i8 %56, %57                       ; <i1> [#uses=1]
+  br i1 %58, label %bb13, label %bb20
+
+bb13:                                             ; preds = %bb12
+  %59 = load i8* %scevgep73, align 1              ; <i8> [#uses=1]
+  %60 = load i8* %scevgep55, align 1              ; <i8> [#uses=1]
+  %61 = icmp eq i8 %59, %60                       ; <i1> [#uses=1]
+  br i1 %61, label %bb14, label %bb20
+
+bb14:                                             ; preds = %bb13
+  %62 = load i8* %scevgep71, align 1              ; <i8> [#uses=1]
+  %63 = load i8* %scevgep52, align 1              ; <i8> [#uses=1]
+  %64 = icmp eq i8 %62, %63                       ; <i1> [#uses=1]
+  br i1 %64, label %bb15, label %bb20
+
+bb15:                                             ; preds = %bb14
+  %65 = load i8* %scevgep69, align 1              ; <i8> [#uses=1]
+  %66 = load i8* %scevgep49, align 1              ; <i8> [#uses=1]
+  %67 = icmp eq i8 %65, %66                       ; <i1> [#uses=1]
+  br i1 %67, label %bb16, label %bb20
+
+bb16:                                             ; preds = %bb15
+  %68 = load i8* %scevgep67, align 1              ; <i8> [#uses=1]
+  %69 = load i8* %scevgep46, align 1              ; <i8> [#uses=1]
+  %70 = icmp eq i8 %68, %69                       ; <i1> [#uses=1]
+  br i1 %70, label %bb17, label %bb20
+
+bb17:                                             ; preds = %bb16
+  %71 = load i8* %scevgep65, align 1              ; <i8> [#uses=1]
+  %72 = load i8* %scevgep43, align 1              ; <i8> [#uses=1]
+  %73 = icmp eq i8 %71, %72                       ; <i1> [#uses=1]
+  br i1 %73, label %bb18, label %bb20
+
+bb18:                                             ; preds = %bb17
+  %74 = load i8* %scevgep63, align 1              ; <i8> [#uses=1]
+  %75 = load i8* %scevgep, align 1                ; <i8> [#uses=1]
+  %76 = icmp eq i8 %74, %75                       ; <i1> [#uses=1]
+  %77 = icmp slt i32 %.sum89, %.sum               ; <i1> [#uses=1]
+  %or.cond = and i1 %76, %77                      ; <i1> [#uses=1]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
+  br i1 %or.cond, label %bb11, label %bb20
+
+bb20:                                             ; preds = %bb18, %bb17, %bb16, %bb15, %bb14, %bb13, %bb12, %bb11
+  %scan.3 = phi i8* [ %scevgep77, %bb11 ], [ %scevgep75, %bb12 ], [ %scevgep73, %bb13 ], [ %scevgep71, %bb14 ], [ %scevgep69, %bb15 ], [ %scevgep67, %bb16 ], [ %scevgep65, %bb17 ], [ %scevgep63, %bb18 ] ; <i8*> [#uses=1]
+  %78 = ptrtoint i8* %scan.3 to i32               ; <i32> [#uses=1]
+  %79 = sub nsw i32 %78, %35                      ; <i32> [#uses=2]
+  %80 = add i32 %79, 258                          ; <i32> [#uses=5]
+  %81 = icmp sgt i32 %80, %best_len.2             ; <i1> [#uses=1]
+  br i1 %81, label %bb21, label %bb23
+
+bb21:                                             ; preds = %bb20
+  store i32 %cur_match_addr.0, i32* %34, align 4
+  %82 = icmp slt i32 %80, %nice_match.0.ph        ; <i1> [#uses=1]
+  br i1 %82, label %bb22, label %bb25
+
+bb22:                                             ; preds = %bb21
+  %.sum37 = add i32 %36, %79                      ; <i32> [#uses=1]
+  %83 = getelementptr inbounds i8* %3, i32 %.sum37 ; <i8*> [#uses=1]
+  %84 = load i8* %83, align 1                     ; <i8> [#uses=1]
+  %.sum38 = add i32 %80, %5                       ; <i32> [#uses=1]
+  %85 = getelementptr inbounds i8* %3, i32 %.sum38 ; <i8*> [#uses=1]
+  %86 = load i8* %85, align 1                     ; <i8> [#uses=1]
+  br label %bb23
+
+bb23:                                             ; preds = %bb22, %bb20, %bb9, %bb8, %bb7, %bb6
+  %best_len.0 = phi i32 [ %best_len.2, %bb6 ], [ %best_len.2, %bb7 ], [ %best_len.2, %bb8 ], [ %best_len.2, %bb9 ], [ %80, %bb22 ], [ %best_len.2, %bb20 ] ; <i32> [#uses=3]
+  %scan_end1.0 = phi i8 [ %scan_end1.1, %bb6 ], [ %scan_end1.1, %bb7 ], [ %scan_end1.1, %bb8 ], [ %scan_end1.1, %bb9 ], [ %84, %bb22 ], [ %scan_end1.1, %bb20 ] ; <i8> [#uses=1]
+  %scan_end.0 = phi i8 [ %scan_end.1, %bb6 ], [ %scan_end.1, %bb7 ], [ %scan_end.1, %bb8 ], [ %scan_end.1, %bb9 ], [ %86, %bb22 ], [ %scan_end.1, %bb20 ] ; <i8> [#uses=1]
+  %87 = and i32 %cur_match_addr.0, %20            ; <i32> [#uses=1]
+  %88 = getelementptr inbounds i16* %18, i32 %87  ; <i16*> [#uses=1]
+  %89 = load i16* %88, align 2                    ; <i16> [#uses=1]
+  %90 = zext i16 %89 to i32                       ; <i32> [#uses=2]
+  %91 = icmp ugt i32 %90, %iftmp.48.0             ; <i1> [#uses=1]
+  br i1 %91, label %bb24, label %bb25
+
+bb24:                                             ; preds = %bb23
+
+; LSR should use count-down iteration to avoid requiring the trip count
+; in a register.
+
+;      CHECK: @ %bb24
+; CHECK: subs{{.*}} {{(r[0-9]+)|(lr)}}, #1
+; CHECK: bne.w
+
+  %92 = icmp eq i32 %tmp81, %indvar78             ; <i1> [#uses=1]
+  %indvar.next79 = add i32 %indvar78, 1           ; <i32> [#uses=1]
+  br i1 %92, label %bb25, label %bb6
+
+bb25:                                             ; preds = %bb24, %bb23, %bb21
+  %best_len.1 = phi i32 [ %best_len.0, %bb23 ], [ %best_len.0, %bb24 ], [ %80, %bb21 ] ; <i32> [#uses=2]
+  %93 = icmp ugt i32 %best_len.1, %32             ; <i1> [#uses=1]
+  %merge = select i1 %93, i32 %32, i32 %best_len.1 ; <i32> [#uses=1]
+  ret i32 %merge
+}
diff --git a/final/test/CodeGen/ARM/lsr-scale-addr-mode.ll b/final/test/CodeGen/ARM/lsr-scale-addr-mode.ll
new file mode 100644
index 00000000000..8130019cbfd
--- /dev/null
+++ b/final/test/CodeGen/ARM/lsr-scale-addr-mode.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm | grep lsl | grep -F {lsl #2\]}
+; Should use scaled addressing mode.
+
+define void @sintzero(i32* %a) nounwind {
+entry:
+	store i32 0, i32* %a
+	br label %cond_next
+
+cond_next:		; preds = %cond_next, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %tmp25, %cond_next ]		; <i32> [#uses=1]
+	%tmp25 = add i32 %indvar, 1		; <i32> [#uses=3]
+	%tmp36 = getelementptr i32* %a, i32 %tmp25		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp36
+	icmp eq i32 %tmp25, -1		; <i1>:0 [#uses=1]
+	br i1 %0, label %return, label %cond_next
+
+return:		; preds = %cond_next
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/machine-cse-cmp.ll b/final/test/CodeGen/ARM/machine-cse-cmp.ll
new file mode 100644
index 00000000000..c77402f3bc1
--- /dev/null
+++ b/final/test/CodeGen/ARM/machine-cse-cmp.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+;rdar://8003725
+
+@G1 = external global i32
+@G2 = external global i32
+
+define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) {
+entry:
+; CHECK: cmp
+; CHECK: moveq
+; CHECK-NOT: cmp
+; CHECK: moveq
+    %tmp1 = icmp eq i32 %cond1, 0
+    %tmp2 = select i1 %tmp1, i32 %x1, i32 %x2
+    %tmp3 = select i1 %tmp1, i32 %x2, i32 %x3
+    %tmp4 = add i32 %tmp2, %tmp3
+    ret i32 %tmp4
+}
diff --git a/final/test/CodeGen/ARM/machine-licm.ll b/final/test/CodeGen/ARM/machine-licm.ll
new file mode 100644
index 00000000000..8656c5bbd72
--- /dev/null
+++ b/final/test/CodeGen/ARM/machine-licm.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -disable-fp-elim   | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic -disable-fp-elim -mattr=+v6t2 | FileCheck %s -check-prefix=MOVT
+; rdar://7353541
+; rdar://7354376
+; rdar://8887598
+
+; The generated code is no where near ideal. It's not recognizing the two
+; constantpool entries being loaded can be merged into one.
+
+@GV = external global i32                         ; <i32*> [#uses=2]
+
+define void @t(i32* nocapture %vals, i32 %c) nounwind {
+entry:
+; ARM: t:
+; ARM: ldr [[REGISTER_1:r[0-9]+]], LCPI0_0
+; Unfortunately currently ARM codegen doesn't cse the ldr from constantpool.
+; The issue is it can be read by an "add pc" or a "ldr [pc]" so it's messy
+; to add the pseudo instructions to make sure they are CSE'ed at the same
+; time as the "ldr cp".
+; ARM: ldr r{{[0-9]+}}, LCPI0_1
+; ARM: LPC0_0:
+; ARM: ldr r{{[0-9]+}}, [pc, [[REGISTER_1]]]
+; ARM: ldr r{{[0-9]+}}, [r{{[0-9]+}}]
+
+; MOVT: t:
+; MOVT: movw [[REGISTER_2:r[0-9]+]], :lower16:(L_GV$non_lazy_ptr-(LPC0_0+8))
+; MOVT: movt [[REGISTER_2]], :upper16:(L_GV$non_lazy_ptr-(LPC0_0+8))
+; MOVT: LPC0_0:
+; MOVT: ldr r{{[0-9]+}}, [pc, [[REGISTER_2]]]
+; MOVT: ldr r{{[0-9]+}}, [r{{[0-9]+}}]
+
+; THUMB: t:
+  %0 = icmp eq i32 %c, 0                          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+; ARM: LCPI0_0:
+; ARM: LCPI0_1:
+; ARM: .section
+
+; THUMB: BB#1
+; THUMB: ldr.n r2, LCPI0_0
+; THUMB: add r2, pc
+; THUMB: ldr r{{[0-9]+}}, [r2]
+; THUMB: LBB0_2
+; THUMB: LCPI0_0:
+; THUMB-NOT: LCPI0_1:
+; THUMB: .section
+  %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %1 = phi i32 [ %.pre, %bb.nph ], [ %3, %bb ]    ; <i32> [#uses=1]
+  %i.03 = phi i32 [ 0, %bb.nph ], [ %4, %bb ]     ; <i32> [#uses=2]
+  %scevgep = getelementptr i32* %vals, i32 %i.03  ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = add nsw i32 %1, %2                         ; <i32> [#uses=2]
+  store i32 %3, i32* @GV, align 4
+  %4 = add i32 %i.03, 1                           ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %4, %c                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/mem.ll b/final/test/CodeGen/ARM/mem.ll
new file mode 100644
index 00000000000..f46c7a5857a
--- /dev/null
+++ b/final/test/CodeGen/ARM/mem.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm | grep strb
+; RUN: llc < %s -march=arm | grep strh
+
+define void @f1() {
+entry:
+        store i8 0, i8* null
+        ret void
+}
+
+define void @f2() {
+entry:
+        store i16 0, i16* null
+        ret void
+}
diff --git a/final/test/CodeGen/ARM/memcpy-inline.ll b/final/test/CodeGen/ARM/memcpy-inline.ll
new file mode 100644
index 00000000000..ed20c32dc0d
--- /dev/null
+++ b/final/test/CodeGen/ARM/memcpy-inline.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldmia
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep stmia
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrb
+; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrh
+
+	%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+@src = external global %struct.x
+@dst = external global %struct.x
+
+define i32 @t() {
+entry:
+	call void @llvm.memcpy.i32( i8* getelementptr (%struct.x* @dst, i32 0, i32 0), i8* getelementptr (%struct.x* @src, i32 0, i32 0), i32 11, i32 8 )
+	ret i32 0
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/final/test/CodeGen/ARM/memfunc.ll b/final/test/CodeGen/ARM/memfunc.ll
new file mode 100644
index 00000000000..41d5944cb83
--- /dev/null
+++ b/final/test/CodeGen/ARM/memfunc.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm
+
+define void @f() {
+entry:
+        call void @llvm.memmove.i32( i8* null, i8* null, i32 64, i32 0 )
+        call void @llvm.memcpy.i32( i8* null, i8* null, i32 64, i32 0 )
+        call void @llvm.memset.i32( i8* null, i8 64, i32 0, i32 0 )
+        unreachable
+}
+
+declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32)
+
diff --git a/final/test/CodeGen/ARM/mls.ll b/final/test/CodeGen/ARM/mls.ll
new file mode 100644
index 00000000000..a6cdba44545
--- /dev/null
+++ b/final/test/CodeGen/ARM/mls.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = sub i32 %c, %tmp1
+    ret i32 %tmp2
+}
+
+; sub doesn't commute, so no mls for this one
+define i32 @f2(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = sub i32 %tmp1, %c
+    ret i32 %tmp2
+}
+
+; CHECK: mls	r0, r0, r1, r2
diff --git a/final/test/CodeGen/ARM/movt-movw-global.ll b/final/test/CodeGen/ARM/movt-movw-global.ll
new file mode 100644
index 00000000000..886ff3fea7a
--- /dev/null
+++ b/final/test/CodeGen/ARM/movt-movw-global.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+@foo = common global i32 0                        ; <i32*> [#uses=1]
+
+define arm_aapcs_vfpcc i32* @bar1() nounwind readnone {
+entry:
+; CHECK:      movw    r0, :lower16:foo
+; CHECK-NEXT: movt    r0, :upper16:foo
+  ret i32* @foo
+}
+
+define arm_aapcs_vfpcc void @bar2(i32 %baz) nounwind {
+entry:
+; CHECK:      movw    r1, :lower16:foo
+; CHECK-NEXT: movt    r1, :upper16:foo
+  store i32 %baz, i32* @foo, align 4
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/movt.ll b/final/test/CodeGen/ARM/movt.ll
new file mode 100644
index 00000000000..e82aca0e9c6
--- /dev/null
+++ b/final/test/CodeGen/ARM/movt.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s
+; rdar://7317664
+
+define i32 @t(i32 %X) nounwind {
+; CHECK: t:
+; CHECK: movt r0, #65535
+entry:
+	%0 = or i32 %X, -65536
+	ret i32 %0
+}
+
+define i32 @t2(i32 %X) nounwind {
+; CHECK: t2:
+; CHECK: movt r0, #65534
+entry:
+	%0 = or i32 %X, -131072
+	%1 = and i32 %0, -65537
+	ret i32 %1
+}
diff --git a/final/test/CodeGen/ARM/mul.ll b/final/test/CodeGen/ARM/mul.ll
new file mode 100644
index 00000000000..466a8020acc
--- /dev/null
+++ b/final/test/CodeGen/ARM/mul.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm | grep mul | count 2
+; RUN: llc < %s -march=arm | grep lsl | count 2
+
+define i32 @f1(i32 %u) {
+    %tmp = mul i32 %u, %u
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %u, i32 %v) {
+    %tmp = mul i32 %u, %v
+    ret i32 %tmp
+}
+
+define i32 @f3(i32 %u) {
+	%tmp = mul i32 %u, 5
+        ret i32 %tmp
+}
+
+define i32 @f4(i32 %u) {
+	%tmp = mul i32 %u, 4
+        ret i32 %tmp
+}
diff --git a/final/test/CodeGen/ARM/mul_const.ll b/final/test/CodeGen/ARM/mul_const.ll
new file mode 100644
index 00000000000..3cb8a8e816f
--- /dev/null
+++ b/final/test/CodeGen/ARM/mul_const.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i32 @t9(i32 %v) nounwind readnone {
+entry:
+; CHECK: t9:
+; CHECK: add r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 9
+	ret i32 %0
+}
+
+define i32 @t7(i32 %v) nounwind readnone {
+entry:
+; CHECK: t7:
+; CHECK: rsb r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 7
+	ret i32 %0
+}
+
+define i32 @t5(i32 %v) nounwind readnone {
+entry:
+; CHECK: t5:
+; CHECK: add r0, r0, r0, lsl #2
+        %0 = mul i32 %v, 5
+        ret i32 %0
+}
+
+define i32 @t3(i32 %v) nounwind readnone {
+entry:
+; CHECK: t3:
+; CHECK: add r0, r0, r0, lsl #1
+        %0 = mul i32 %v, 3
+        ret i32 %0
+}
+
+define i32 @t12288(i32 %v) nounwind readnone {
+entry:
+; CHECK: t12288:
+; CHECK: add r0, r0, r0, lsl #1
+; CHECK: lsl{{.*}}#12
+        %0 = mul i32 %v, 12288
+        ret i32 %0
+}
+
diff --git a/final/test/CodeGen/ARM/mulhi.ll b/final/test/CodeGen/ARM/mulhi.ll
new file mode 100644
index 00000000000..148f291e551
--- /dev/null
+++ b/final/test/CodeGen/ARM/mulhi.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm -mattr=+v6
+; RUN: llc < %s -march=arm -mattr=+v6 | \
+; RUN:   grep smmul | count 1
+; RUN: llc < %s -march=arm | grep umull | count 1
+
+define i32 @smulhi(i32 %x, i32 %y) {
+        %tmp = sext i32 %x to i64               ; <i64> [#uses=1]
+        %tmp1 = sext i32 %y to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        %tmp3 = lshr i64 %tmp2, 32              ; <i64> [#uses=1]
+        %tmp3.upgrd.1 = trunc i64 %tmp3 to i32          ; <i32> [#uses=1]
+        ret i32 %tmp3.upgrd.1
+}
+
+define i32 @umulhi(i32 %x, i32 %y) {
+        %tmp = zext i32 %x to i64               ; <i64> [#uses=1]
+        %tmp1 = zext i32 %y to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        %tmp3 = lshr i64 %tmp2, 32              ; <i64> [#uses=1]
+        %tmp3.upgrd.2 = trunc i64 %tmp3 to i32          ; <i32> [#uses=1]
+        ret i32 %tmp3.upgrd.2
+}
diff --git a/final/test/CodeGen/ARM/mult-alt-generic-arm.ll b/final/test/CodeGen/ARM/mult-alt-generic-arm.ll
new file mode 100644
index 00000000000..a8104db337f
--- /dev/null
+++ b/final/test/CodeGen/ARM/mult-alt-generic-arm.ll
@@ -0,0 +1,323 @@
+; RUN: llc < %s -march=arm
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "arm"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define arm_aapcscc void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define arm_aapcscc void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define arm_aapcscc void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define arm_aapcscc void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define arm_aapcscc void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+; No lowering support.
+;  %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+;  store i32 %4, i32* %out0, align 4
+;  %5 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+;  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define arm_aapcscc void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define arm_aapcscc void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define arm_aapcscc void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define arm_aapcscc void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+; No lowering support.
+;  %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+;  store i32 %4, i32* %out0, align 4
+;  %5 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+;  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define arm_aapcscc void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/mvn.ll b/final/test/CodeGen/ARM/mvn.ll
new file mode 100644
index 00000000000..571c21a833e
--- /dev/null
+++ b/final/test/CodeGen/ARM/mvn.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -march=arm | grep mvn | count 8
+
+define i32 @f1() {
+entry:
+	ret i32 -1
+}
+
+define i32 @f2(i32 %a) {
+entry:
+	%tmpnot = xor i32 %a, -1		; <i32> [#uses=1]
+	ret i32 %tmpnot
+}
+
+define i32 @f3(i32 %a) {
+entry:
+	%tmp1 = shl i32 %a, 2		; <i32> [#uses=1]
+	%tmp1not = xor i32 %tmp1, -1		; <i32> [#uses=1]
+	ret i32 %tmp1not
+}
+
+define i32 @f4(i32 %a, i8 %b) {
+entry:
+	%shift.upgrd.1 = zext i8 %b to i32		; <i32> [#uses=1]
+	%tmp3 = shl i32 %a, %shift.upgrd.1		; <i32> [#uses=1]
+	%tmp3not = xor i32 %tmp3, -1		; <i32> [#uses=1]
+	ret i32 %tmp3not
+}
+
+define i32 @f5(i32 %a) {
+entry:
+	%tmp1 = lshr i32 %a, 2		; <i32> [#uses=1]
+	%tmp1not = xor i32 %tmp1, -1		; <i32> [#uses=1]
+	ret i32 %tmp1not
+}
+
+define i32 @f6(i32 %a, i8 %b) {
+entry:
+	%shift.upgrd.2 = zext i8 %b to i32		; <i32> [#uses=1]
+	%tmp2 = lshr i32 %a, %shift.upgrd.2		; <i32> [#uses=1]
+	%tmp2not = xor i32 %tmp2, -1		; <i32> [#uses=1]
+	ret i32 %tmp2not
+}
+
+define i32 @f7(i32 %a) {
+entry:
+	%tmp1 = ashr i32 %a, 2		; <i32> [#uses=1]
+	%tmp1not = xor i32 %tmp1, -1		; <i32> [#uses=1]
+	ret i32 %tmp1not
+}
+
+define i32 @f8(i32 %a, i8 %b) {
+entry:
+	%shift.upgrd.3 = zext i8 %b to i32		; <i32> [#uses=1]
+	%tmp3 = ashr i32 %a, %shift.upgrd.3		; <i32> [#uses=1]
+	%tmp3not = xor i32 %tmp3, -1		; <i32> [#uses=1]
+	ret i32 %tmp3not
+}
+
+define i32 @f9() {
+entry:
+	%tmp4845 = add i32 0, 0		; <i32> [#uses=1]
+	br label %cond_true4848
+
+cond_true4848:		; preds = %entry
+	%tmp4851 = sub i32 -3, 0		; <i32> [#uses=1]
+	%abc = add i32 %tmp4851, %tmp4845		; <i32> [#uses=1]
+	ret i32 %abc
+}
+
+define i1 @f10(i32 %a) {
+entry:
+	%tmp102 = icmp eq i32 -2, %a		; <i1> [#uses=1]
+	ret i1 %tmp102
+}
diff --git a/final/test/CodeGen/ARM/neon_arith1.ll b/final/test/CodeGen/ARM/neon_arith1.ll
new file mode 100644
index 00000000000..58927374177
--- /dev/null
+++ b/final/test/CodeGen/ARM/neon_arith1.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=arm -mattr=+neon | grep vadd
+
+define <8 x i8> @t_i8x8(<8 x i8> %a, <8 x i8> %b) nounwind {
+entry:
+	%0 = add <8 x i8> %a, %b
+	ret <8 x i8> %0
+}
diff --git a/final/test/CodeGen/ARM/neon_div.ll b/final/test/CodeGen/ARM/neon_div.ll
new file mode 100644
index 00000000000..e3379707909
--- /dev/null
+++ b/final/test/CodeGen/ARM/neon_div.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecpe.f32
+;CHECK: vmovn.i32
+;CHECK: vmovn.i32
+;CHECK: vmovn.i16
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = sdiv <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <8 x i8> @udivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vmovn.i32
+;CHECK: vmovn.i32
+;CHECK: vqmovun.s16
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = udiv <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sdivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vmovn.i32
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = sdiv <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <4 x i16> @udivi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrecpe.f32
+;CHECK: vrecps.f32
+;CHECK: vrecps.f32
+;CHECK: vmovn.i32
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = udiv <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
diff --git a/final/test/CodeGen/ARM/neon_ld1.ll b/final/test/CodeGen/ARM/neon_ld1.ll
new file mode 100644
index 00000000000..c78872a4bca
--- /dev/null
+++ b/final/test/CodeGen/ARM/neon_ld1.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm -mattr=+neon | grep vldr.64 | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | grep vstr.64
+; RUN: llc < %s -march=arm -mattr=+neon | grep vmov
+
+define void @t1(<2 x i32>* %r, <4 x i16>* %a, <4 x i16>* %b) nounwind {
+entry:
+	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
+	%1 = load <4 x i16>* %b, align 8		; <<4 x i16>> [#uses=1]
+	%2 = add <4 x i16> %0, %1		; <<4 x i16>> [#uses=1]
+	%3 = bitcast <4 x i16> %2 to <2 x i32>		; <<2 x i32>> [#uses=1]
+	store <2 x i32> %3, <2 x i32>* %r, align 8
+	ret void
+}
+
+define <2 x i32> @t2(<4 x i16>* %a, <4 x i16>* %b) nounwind readonly {
+entry:
+	%0 = load <4 x i16>* %a, align 8		; <<4 x i16>> [#uses=1]
+	%1 = load <4 x i16>* %b, align 8		; <<4 x i16>> [#uses=1]
+	%2 = sub <4 x i16> %0, %1		; <<4 x i16>> [#uses=1]
+	%3 = bitcast <4 x i16> %2 to <2 x i32>		; <<2 x i32>> [#uses=1]
+	ret <2 x i32> %3
+}
diff --git a/final/test/CodeGen/ARM/neon_ld2.ll b/final/test/CodeGen/ARM/neon_ld2.ll
new file mode 100644
index 00000000000..130277b31c3
--- /dev/null
+++ b/final/test/CodeGen/ARM/neon_ld2.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mattr=+neon | grep vldmia | count 4
+; RUN: llc < %s -march=arm -mattr=+neon | grep vstmia | count 1
+; RUN: llc < %s -march=arm -mattr=+neon | grep vmov  | count 2
+
+define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
+entry:
+	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
+	%1 = load <2 x i64>* %b, align 16		; <<2 x i64>> [#uses=1]
+	%2 = add <2 x i64> %0, %1		; <<2 x i64>> [#uses=1]
+	%3 = bitcast <2 x i64> %2 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %3, <4 x i32>* %r, align 16
+	ret void
+}
+
+define <4 x i32> @t2(<2 x i64>* %a, <2 x i64>* %b) nounwind readonly {
+entry:
+	%0 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
+	%1 = load <2 x i64>* %b, align 16		; <<2 x i64>> [#uses=1]
+	%2 = sub <2 x i64> %0, %1		; <<2 x i64>> [#uses=1]
+	%3 = bitcast <2 x i64> %2 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %3
+}
+
diff --git a/final/test/CodeGen/ARM/neon_minmax.ll b/final/test/CodeGen/ARM/neon_minmax.ll
new file mode 100644
index 00000000000..d301c6a4ca9
--- /dev/null
+++ b/final/test/CodeGen/ARM/neon_minmax.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+
+define float @fmin_ole(float %x) nounwind {
+;CHECK: fmin_ole:
+;CHECK: vmin.f32
+  %cond = fcmp ole float 1.0, %x
+  %min1 = select i1 %cond, float 1.0, float %x
+  ret float %min1
+}
+
+define float @fmin_ole_zero(float %x) nounwind {
+;CHECK: fmin_ole_zero:
+;CHECK-NOT: vmin.f32
+  %cond = fcmp ole float 0.0, %x
+  %min1 = select i1 %cond, float 0.0, float %x
+  ret float %min1
+}
+
+define float @fmin_ult(float %x) nounwind {
+;CHECK: fmin_ult:
+;CHECK: vmin.f32
+  %cond = fcmp ult float %x, 1.0
+  %min1 = select i1 %cond, float %x, float 1.0
+  ret float %min1
+}
+
+define float @fmax_ogt(float %x) nounwind {
+;CHECK: fmax_ogt:
+;CHECK: vmax.f32
+  %cond = fcmp ogt float 1.0, %x
+  %max1 = select i1 %cond, float 1.0, float %x
+  ret float %max1
+}
+
+define float @fmax_uge(float %x) nounwind {
+;CHECK: fmax_uge:
+;CHECK: vmax.f32
+  %cond = fcmp uge float %x, 1.0
+  %max1 = select i1 %cond, float %x, float 1.0
+  ret float %max1
+}
+
+define float @fmax_uge_zero(float %x) nounwind {
+;CHECK: fmax_uge_zero:
+;CHECK-NOT: vmax.f32
+  %cond = fcmp uge float %x, 0.0
+  %max1 = select i1 %cond, float %x, float 0.0
+  ret float %max1
+}
+
+define float @fmax_olt_reverse(float %x) nounwind {
+;CHECK: fmax_olt_reverse:
+;CHECK: vmax.f32
+  %cond = fcmp olt float %x, 1.0
+  %max1 = select i1 %cond, float 1.0, float %x
+  ret float %max1
+}
+
+define float @fmax_ule_reverse(float %x) nounwind {
+;CHECK: fmax_ule_reverse:
+;CHECK: vmax.f32
+  %cond = fcmp ult float 1.0, %x
+  %max1 = select i1 %cond, float %x, float 1.0
+  ret float %max1
+}
+
+define float @fmin_oge_reverse(float %x) nounwind {
+;CHECK: fmin_oge_reverse:
+;CHECK: vmin.f32
+  %cond = fcmp oge float %x, 1.0
+  %min1 = select i1 %cond, float 1.0, float %x
+  ret float %min1
+}
+
+define float @fmin_ugt_reverse(float %x) nounwind {
+;CHECK: fmin_ugt_reverse:
+;CHECK: vmin.f32
+  %cond = fcmp ugt float 1.0, %x
+  %min1 = select i1 %cond, float %x, float 1.0
+  ret float %min1
+}
diff --git a/final/test/CodeGen/ARM/neon_shift.ll b/final/test/CodeGen/ARM/neon_shift.ll
new file mode 100644
index 00000000000..340f220fb36
--- /dev/null
+++ b/final/test/CodeGen/ARM/neon_shift.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; <rdar://problem/9055897>
+define <4 x i16> @t1(<4 x i32> %a) nounwind {
+entry:
+; CHECK: vqrshrn.s32 d{{[0-9]+}}, q{{[0-9]*}}, #13
+  %x = tail call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %a, <4 x i32> <i32 -13, i32 -13, i32 -13, i32 -13>)
+  ret <4 x i16> %x
+}
+
+declare <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/pack.ll b/final/test/CodeGen/ARM/pack.ll
new file mode 100644
index 00000000000..90151767b91
--- /dev/null
+++ b/final/test/CodeGen/ARM/pack.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s
+
+; CHECK: test1
+; CHECK: pkhbt   r0, r0, r1, lsl #16
+define i32 @test1(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535
+	%tmp4 = shl i32 %Y, 16
+	%tmp5 = or i32 %tmp4, %tmp1
+	ret i32 %tmp5
+}
+
+; CHECK: test2
+; CHECK: pkhbt   r0, r0, r1, lsl #12
+define i32 @test2(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535
+	%tmp3 = shl i32 %Y, 12
+	%tmp4 = and i32 %tmp3, -65536
+	%tmp57 = or i32 %tmp4, %tmp1
+	ret i32 %tmp57
+}
+
+; CHECK: test3
+; CHECK: pkhbt   r0, r0, r1, lsl #18
+define i32 @test3(i32 %X, i32 %Y) {
+	%tmp19 = and i32 %X, 65535
+	%tmp37 = shl i32 %Y, 18
+	%tmp5 = or i32 %tmp37, %tmp19
+	ret i32 %tmp5
+}
+
+; CHECK: test4
+; CHECK: pkhbt   r0, r0, r1
+define i32 @test4(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535
+	%tmp3 = and i32 %Y, -65536
+	%tmp46 = or i32 %tmp3, %tmp1
+	ret i32 %tmp46
+}
+
+; CHECK: test5
+; CHECK: pkhtb   r0, r0, r1, asr #16
+define i32 @test5(i32 %X, i32 %Y) {
+	%tmp17 = and i32 %X, -65536
+	%tmp2 = bitcast i32 %Y to i32
+	%tmp4 = lshr i32 %tmp2, 16
+	%tmp5 = or i32 %tmp4, %tmp17
+	ret i32 %tmp5
+}
+
+; CHECK: test5a
+; CHECK: pkhtb   r0, r0, r1, asr #16
+define i32 @test5a(i32 %X, i32 %Y) {
+	%tmp110 = and i32 %X, -65536
+	%tmp37 = lshr i32 %Y, 16
+	%tmp39 = bitcast i32 %tmp37 to i32
+	%tmp5 = or i32 %tmp39, %tmp110
+	ret i32 %tmp5
+}
+
+; CHECK: test6
+; CHECK: pkhtb   r0, r0, r1, asr #12
+define i32 @test6(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536
+	%tmp37 = lshr i32 %Y, 12
+	%tmp38 = bitcast i32 %tmp37 to i32
+	%tmp4 = and i32 %tmp38, 65535
+	%tmp59 = or i32 %tmp4, %tmp1
+	ret i32 %tmp59
+}
+
+; CHECK: test7
+; CHECK: pkhtb   r0, r0, r1, asr #18
+define i32 @test7(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536
+	%tmp3 = ashr i32 %Y, 18
+	%tmp4 = and i32 %tmp3, 65535
+	%tmp57 = or i32 %tmp4, %tmp1
+	ret i32 %tmp57
+}
+
+; CHECK: test8
+; CHECK: pkhtb   r0, r0, r1, asr #22
+define i32 @test8(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536
+	%tmp3 = lshr i32 %Y, 22
+	%tmp57 = or i32 %tmp3, %tmp1
+	ret i32 %tmp57
+}
diff --git a/final/test/CodeGen/ARM/phi.ll b/final/test/CodeGen/ARM/phi.ll
new file mode 100644
index 00000000000..29e17c095a7
--- /dev/null
+++ b/final/test/CodeGen/ARM/phi.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=arm < %s | FileCheck %s
+; <rdar://problem/8686347>
+
+define i32 @test1(i1 %a, i32* %b) {
+; CHECK: test1
+entry:
+  br i1 %a, label %lblock, label %rblock
+
+lblock:
+  %lbranch = getelementptr i32* %b, i32 1
+  br label %end
+
+rblock:
+  %rbranch = getelementptr i32* %b, i32 1
+  br label %end
+  
+end:
+; CHECK: ldr	r0, [r1, #4]
+  %gep = phi i32* [%lbranch, %lblock], [%rbranch, %rblock]
+  %r = load i32* %gep
+; CHECK-NEXT: bx	lr
+  ret i32 %r
+}
\ No newline at end of file
diff --git a/final/test/CodeGen/ARM/pr3502.ll b/final/test/CodeGen/ARM/pr3502.ll
new file mode 100644
index 00000000000..606d9698b97
--- /dev/null
+++ b/final/test/CodeGen/ARM/pr3502.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=arm-none-linux-gnueabi
+;pr3502
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+	%struct.ArmPTD = type { i32 }
+	%struct.RegisterSave = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.SHARED_AREA = type { i32, %struct.SHARED_AREA*, %struct.SHARED_AREA*, %struct.SHARED_AREA*, %struct.ArmPTD, void (%struct.RegisterSave*)*, void (%struct.RegisterSave*)*, i32, [1024 x i8], i32, i32, i32, i32, i32, i8, i8, i16, i32, i32, i32, i32, [16 x i8], i32, i32, i32, i8, i8, i8, i32, i16, i32, i64, i32, i32, i32, i32, i32, i32, i8*, i32, [256 x i8], i32, i32, i32, [20 x i8], %struct.RegisterSave, { %struct.WorldSwitchV5 }, [4 x i32] }
+	%struct.WorldSwitchV5 = type { i32, i32, i32, i32, i32, i32, i32 }
+
+define void @SomeCall(i32 %num) nounwind {
+entry:
+	tail call void asm sideeffect "mcr p15, 0, $0, c7, c10, 4 \0A\09", "r,~{memory}"(i32 0) nounwind
+	tail call void asm sideeffect "mcr p15,0,$0,c7,c14,0", "r,~{memory}"(i32 0) nounwind
+	%0 = load %struct.SHARED_AREA** null, align 4		; <%struct.SHARED_AREA*> [#uses=1]
+	%1 = ptrtoint %struct.SHARED_AREA* %0 to i32		; <i32> [#uses=1]
+	%2 = lshr i32 %1, 20		; <i32> [#uses=1]
+	%3 = tail call i32 @SetCurrEntry(i32 %2, i32 0) nounwind		; <i32> [#uses=0]
+	tail call void @ClearStuff(i32 0) nounwind
+	ret void
+}
+
+declare i32 @SetCurrEntry(i32, i32)
+
+declare void @ClearStuff(i32)
diff --git a/final/test/CodeGen/ARM/prefetch.ll b/final/test/CodeGen/ARM/prefetch.ll
new file mode 100644
index 00000000000..895b27b749d
--- /dev/null
+++ b/final/test/CodeGen/ARM/prefetch.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -march=thumb -mattr=-thumb2 | not grep pld
+; RUN: llc < %s -march=thumb -mattr=+v7a     | FileCheck %s -check-prefix=THUMB2
+; RUN: llc < %s -march=arm   -mattr=+v7a,+mp | FileCheck %s -check-prefix=ARM-MP
+; rdar://8601536
+
+define void @t1(i8* %ptr) nounwind  {
+entry:
+; ARM-MP: t1:
+; ARM-MP: pldw [r0]
+; ARM-MP: pld [r0]
+
+; THUMB2: t1:
+; THUMB2-NOT: pldw [r0]
+; THUMB2: pld [r0]
+  tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3 )
+  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
+  ret void
+}
+
+define void @t2(i8* %ptr) nounwind  {
+entry:
+; ARM-MP: t2:
+; ARM-MP: pld [r0, #1023]
+
+; THUMB2: t2:
+; THUMB2: pld [r0, #1023]
+  %tmp = getelementptr i8* %ptr, i32 1023
+  tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3 )
+  ret void
+}
+
+define void @t3(i32 %base, i32 %offset) nounwind  {
+entry:
+; ARM-MP: t3:
+; ARM-MP: pld [r0, r1, lsr #2]
+
+; THUMB2: t3:
+; THUMB2: lsrs r1, r1, #2
+; THUMB2: pld [r0, r1]
+  %tmp1 = lshr i32 %offset, 2
+  %tmp2 = add i32 %base, %tmp1
+  %tmp3 = inttoptr i32 %tmp2 to i8*
+  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
+  ret void
+}
+
+define void @t4(i32 %base, i32 %offset) nounwind  {
+entry:
+; ARM-MP: t4:
+; ARM-MP: pld [r0, r1, lsl #2]
+
+; THUMB2: t4:
+; THUMB2: pld [r0, r1, lsl #2]
+  %tmp1 = shl i32 %offset, 2
+  %tmp2 = add i32 %base, %tmp1
+  %tmp3 = inttoptr i32 %tmp2 to i8*
+  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
+  ret void
+}
+
+declare void @llvm.prefetch(i8*, i32, i32) nounwind 
diff --git a/final/test/CodeGen/ARM/private.ll b/final/test/CodeGen/ARM/private.ll
new file mode 100644
index 00000000000..fba56b4ffee
--- /dev/null
+++ b/final/test/CodeGen/ARM/private.ll
@@ -0,0 +1,22 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s -mtriple=arm-linux-gnueabi > %t
+; RUN: grep .Lfoo: %t
+; RUN: egrep bl.*\.Lfoo %t
+; RUN: grep .Lbaz: %t
+; RUN: grep long.*\.Lbaz %t
+
+declare void @foo()
+
+define private void @foo() {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}
+
diff --git a/final/test/CodeGen/ARM/reg_sequence.ll b/final/test/CodeGen/ARM/reg_sequence.ll
new file mode 100644
index 00000000000..53214fd4c30
--- /dev/null
+++ b/final/test/CodeGen/ARM/reg_sequence.ll
@@ -0,0 +1,348 @@
+; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
+; Implementing vld / vst as REG_SEQUENCE eliminates the extra vmov's.
+
+%struct.int16x8_t = type { <8 x i16> }
+%struct.int32x4_t = type { <4 x i32> }
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+
+define void @t1(i16* %i_ptr, i16* %o_ptr, %struct.int32x4_t* nocapture %vT0ptr, %struct.int32x4_t* nocapture %vT1ptr) nounwind {
+entry:
+; CHECK:        t1:
+; CHECK:        vld1.16
+; CHECK-NOT:    vmov d
+; CHECK:        vmovl.s16
+; CHECK:        vshrn.i32
+; CHECK:        vshrn.i32
+; CHECK-NOT:    vmov d
+; CHECK-NEXT:   vst1.16
+  %0 = getelementptr inbounds %struct.int32x4_t* %vT0ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
+  %1 = load <4 x i32>* %0, align 16               ; <<4 x i32>> [#uses=1]
+  %2 = getelementptr inbounds %struct.int32x4_t* %vT1ptr, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
+  %3 = load <4 x i32>* %2, align 16               ; <<4 x i32>> [#uses=1]
+  %4 = bitcast i16* %i_ptr to i8*                 ; <i8*> [#uses=1]
+  %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
+  %6 = bitcast <8 x i16> %5 to <2 x double>       ; <<2 x double>> [#uses=2]
+  %7 = extractelement <2 x double> %6, i32 0      ; <double> [#uses=1]
+  %8 = bitcast double %7 to <4 x i16>             ; <<4 x i16>> [#uses=1]
+  %9 = sext <4 x i16> %8 to <4 x i32>             ; <<4 x i32>> [#uses=1]
+  %10 = extractelement <2 x double> %6, i32 1     ; <double> [#uses=1]
+  %11 = bitcast double %10 to <4 x i16>           ; <<4 x i16>> [#uses=1]
+  %12 = sext <4 x i16> %11 to <4 x i32>           ; <<4 x i32>> [#uses=1]
+  %13 = mul <4 x i32> %1, %9                      ; <<4 x i32>> [#uses=1]
+  %14 = mul <4 x i32> %3, %12                     ; <<4 x i32>> [#uses=1]
+  %15 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %13, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1]
+  %16 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %14, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1]
+  %17 = shufflevector <4 x i16> %15, <4 x i16> %16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; <<8 x i16>> [#uses=1]
+  %18 = bitcast i16* %o_ptr to i8*                ; <i8*> [#uses=1]
+  tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17, i32 1)
+  ret void
+}
+
+define void @t2(i16* %i_ptr, i16* %o_ptr, %struct.int16x8_t* nocapture %vT0ptr, %struct.int16x8_t* nocapture %vT1ptr) nounwind {
+entry:
+; CHECK:        t2:
+; CHECK:        vld1.16
+; CHECK-NOT:    vmov
+; CHECK:        vmul.i16
+; CHECK:        vld1.16
+; CHECK:        vmul.i16
+; CHECK-NOT:    vmov
+; CHECK:        vst1.16
+; CHECK:        vst1.16
+  %0 = getelementptr inbounds %struct.int16x8_t* %vT0ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %1 = load <8 x i16>* %0, align 16               ; <<8 x i16>> [#uses=1]
+  %2 = getelementptr inbounds %struct.int16x8_t* %vT1ptr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %3 = load <8 x i16>* %2, align 16               ; <<8 x i16>> [#uses=1]
+  %4 = bitcast i16* %i_ptr to i8*                 ; <i8*> [#uses=1]
+  %5 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %4, i32 1) ; <<8 x i16>> [#uses=1]
+  %6 = getelementptr inbounds i16* %i_ptr, i32 8  ; <i16*> [#uses=1]
+  %7 = bitcast i16* %6 to i8*                     ; <i8*> [#uses=1]
+  %8 = tail call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %7, i32 1) ; <<8 x i16>> [#uses=1]
+  %9 = mul <8 x i16> %1, %5                       ; <<8 x i16>> [#uses=1]
+  %10 = mul <8 x i16> %3, %8                      ; <<8 x i16>> [#uses=1]
+  %11 = bitcast i16* %o_ptr to i8*                ; <i8*> [#uses=1]
+  tail call void @llvm.arm.neon.vst1.v8i16(i8* %11, <8 x i16> %9, i32 1)
+  %12 = getelementptr inbounds i16* %o_ptr, i32 8 ; <i16*> [#uses=1]
+  %13 = bitcast i16* %12 to i8*                   ; <i8*> [#uses=1]
+  tail call void @llvm.arm.neon.vst1.v8i16(i8* %13, <8 x i16> %10, i32 1)
+  ret void
+}
+
+define <8 x i8> @t3(i8* %A, i8* %B) nounwind {
+; CHECK:        t3:
+; CHECK:        vld3.8
+; CHECK:        vmul.i8
+; CHECK:        vmov r
+; CHECK-NOT:    vmov d
+; CHECK:        vst3.8
+  %tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=2]
+  %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0 ; <<8 x i8>> [#uses=1]
+  %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2 ; <<8 x i8>> [#uses=1]
+  %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 1 ; <<8 x i8>> [#uses=1]
+  %tmp5 = sub <8 x i8> %tmp3, %tmp4
+  %tmp6 = add <8 x i8> %tmp2, %tmp3               ; <<8 x i8>> [#uses=1]
+  %tmp7 = mul <8 x i8> %tmp4, %tmp2
+  tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7, i32 1)
+  ret <8 x i8> %tmp4
+}
+
+define void @t4(i32* %in, i32* %out) nounwind {
+entry:
+; CHECK:        t4:
+; CHECK:        vld2.32
+; CHECK-NOT:    vmov
+; CHECK:        vld2.32
+; CHECK-NOT:    vmov
+; CHECK:        bne
+  %tmp1 = bitcast i32* %in to i8*                 ; <i8*> [#uses=1]
+  %tmp2 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp1, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+  %tmp3 = getelementptr inbounds i32* %in, i32 8  ; <i32*> [#uses=1]
+  %tmp4 = bitcast i32* %tmp3 to i8*               ; <i8*> [#uses=1]
+  %tmp5 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp4, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+  %tmp8 = bitcast i32* %out to i8*                ; <i8*> [#uses=1]
+  br i1 undef, label %return1, label %return2
+
+return1:
+; CHECK:        %return1
+; CHECK-NOT:    vmov
+; CHECK-NEXT:   vadd.i32
+; CHECK-NEXT:   vadd.i32
+; CHECK-NEXT:   vst2.32
+  %tmp52 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1]
+  %tmp57 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1 ; <<4 x i32>> [#uses=1]
+  %tmp = extractvalue %struct.__neon_int32x4x2_t %tmp5, 0 ; <<4 x i32>> [#uses=1]
+  %tmp39 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1]
+  %tmp6 = add <4 x i32> %tmp52, %tmp              ; <<4 x i32>> [#uses=1]
+  %tmp7 = add <4 x i32> %tmp57, %tmp39            ; <<4 x i32>> [#uses=1]
+  tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp6, <4 x i32> %tmp7, i32 1)
+  ret void
+
+return2:
+; CHECK:        %return2
+; CHECK:        vadd.i32
+; CHECK:        vmov q9, q11
+; CHECK-NOT:    vmov
+; CHECK:        vst2.32 {d16, d17, d18, d19}
+  %tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1]
+  %tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1]
+  %tmp102 = add <4 x i32> %tmp100, %tmp101              ; <<4 x i32>> [#uses=1]
+  tail call void @llvm.arm.neon.vst2.v4i32(i8* %tmp8, <4 x i32> %tmp102, <4 x i32> %tmp101, i32 1)
+  call void @llvm.trap()
+  unreachable
+}
+
+define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
+; CHECK:        t5:
+; CHECK:        vldmia
+; CHECK:        vmov q9, q8
+; CHECK-NOT:    vmov
+; CHECK:        vld2.16 {d16[1], d18[1]}, [r0]
+; CHECK-NOT:    vmov
+; CHECK:        vadd.i16
+  %tmp0 = bitcast i16* %A to i8*                  ; <i8*> [#uses=1]
+  %tmp1 = load <8 x i16>* %B                      ; <<8 x i16>> [#uses=2]
+  %tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 1) ; <%struct.__neon_int16x8x2_t> [#uses=2]
+  %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0 ; <<8 x i16>> [#uses=1]
+  %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1 ; <<8 x i16>> [#uses=1]
+  %tmp5 = add <8 x i16> %tmp3, %tmp4              ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %tmp5
+}
+
+define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
+; CHECK:        t6:
+; CHECK:        vldr.64
+; CHECK:        vmov d17, d16
+; CHECK-NEXT:   vld2.8 {d16[1], d17[1]}
+  %tmp1 = load <8 x i8>* %B                       ; <<8 x i8>> [#uses=2]
+  %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
+  %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1]
+  %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1 ; <<8 x i8>> [#uses=1]
+  %tmp5 = add <8 x i8> %tmp3, %tmp4               ; <<8 x i8>> [#uses=1]
+  ret <8 x i8> %tmp5
+}
+
+define void @t7(i32* %iptr, i32* %optr) nounwind {
+entry:
+; CHECK:        t7:
+; CHECK:        vld2.32
+; CHECK:        vst2.32
+; CHECK:        vld1.32 {d16, d17},
+; CHECK:        vmov q9, q8
+; CHECK-NOT:    vmov
+; CHECK:        vuzp.32 q8, q9
+; CHECK:        vst1.32
+  %0 = bitcast i32* %iptr to i8*                  ; <i8*> [#uses=2]
+  %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2]
+  %tmp57 = extractvalue %struct.__neon_int32x4x2_t %1, 0 ; <<4 x i32>> [#uses=1]
+  %tmp60 = extractvalue %struct.__neon_int32x4x2_t %1, 1 ; <<4 x i32>> [#uses=1]
+  %2 = bitcast i32* %optr to i8*                  ; <i8*> [#uses=2]
+  tail call void @llvm.arm.neon.vst2.v4i32(i8* %2, <4 x i32> %tmp57, <4 x i32> %tmp60, i32 1)
+  %3 = tail call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %0, i32 1) ; <<4 x i32>> [#uses=1]
+  %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 0, i32 2, i32 0, i32 2> ; <<4 x i32>> [#uses=1]
+  tail call void @llvm.arm.neon.vst1.v4i32(i8* %2, <4 x i32> %4, i32 1)
+  ret void
+}
+
+; PR7156
+define arm_aapcs_vfpcc i32 @t8() nounwind {
+; CHECK: t8:
+; CHECK: vrsqrte.f32 q8, q8
+bb.nph55.bb.nph55.split_crit_edge:
+  br label %bb3
+
+bb3:                                              ; preds = %bb3, %bb.nph55.bb.nph55.split_crit_edge
+  br i1 undef, label %bb5, label %bb3
+
+bb5:                                              ; preds = %bb3
+  br label %bb.i25
+
+bb.i25:                                           ; preds = %bb.i25, %bb5
+  %0 = shufflevector <2 x float> undef, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %1 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %0) nounwind ; <<4 x float>> [#uses=1]
+  %2 = fmul <4 x float> %1, undef                 ; <<4 x float>> [#uses=1]
+  %3 = fmul <4 x float> undef, %2                 ; <<4 x float>> [#uses=1]
+  %tmp26.i = bitcast <4 x float> %3 to <2 x double> ; <<2 x double>> [#uses=1]
+  %4 = extractelement <2 x double> %tmp26.i, i32 0 ; <double> [#uses=1]
+  %5 = bitcast double %4 to <2 x float>           ; <<2 x float>> [#uses=1]
+  %6 = extractelement <2 x float> %5, i32 1       ; <float> [#uses=1]
+  store float %6, float* undef, align 4
+  br i1 undef, label %bb6, label %bb.i25
+
+bb6:                                              ; preds = %bb.i25
+  br i1 undef, label %bb7, label %bb14
+
+bb7:                                              ; preds = %bb6
+  br label %bb.i49
+
+bb.i49:                                           ; preds = %bb.i49, %bb7
+  br i1 undef, label %bb.i19, label %bb.i49
+
+bb.i19:                                           ; preds = %bb.i19, %bb.i49
+  br i1 undef, label %exit, label %bb.i19
+
+exit:          ; preds = %bb.i19
+  unreachable
+
+bb14:                                             ; preds = %bb6
+  ret i32 0
+}
+
+%0 = type { %1, %1, %1, %1 }
+%1 = type { %2 }
+%2 = type { <4 x float> }
+%3 = type { %0, %1 }
+
+; PR7157
+define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
+; CHECK:        t9:
+; CHECK:        vldr.64
+; CHECK-NOT:    vmov d{{.*}}, d16
+; CHECK:        vmov.i32 d17
+; CHECK-NEXT:   vstmia r0, {d16, d17}
+; CHECK-NEXT:   vstmia r0, {d16, d17}
+  %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
+  %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  store <4 x float> %4, <4 x float>* undef, align 16
+  %5 = shufflevector <2 x float> %3, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  store <4 x float> %5, <4 x float>* undef, align 16
+  br label %8
+
+; <label>:6                                       ; preds = %8
+  br label %7
+
+; <label>:7                                       ; preds = %6
+  br label %8
+
+; <label>:8                                       ; preds = %7, %2
+  br label %6
+
+; <label>:9                                       ; preds = %8
+  ret float undef
+
+; <label>:10                                      ; preds = %6
+  ret float 9.990000e+02
+}
+
+; PR7162
+define arm_aapcs_vfpcc i32 @t10() nounwind {
+entry:
+; CHECK: t10:
+; CHECK: vmul.f32 q8, q8, d0[0]
+; CHECK: vmov.i32 q9, #0x3F000000
+; CHECK: vadd.f32 q8, q8, q8
+  %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
+  %2 = insertelement <4 x float> %1, float undef, i32 2 ; <<4 x float>> [#uses=1]
+  %3 = insertelement <4 x float> %2, float undef, i32 3 ; <<4 x float>> [#uses=1]
+  %tmp54.i = bitcast <4 x float> %3 to <2 x double> ; <<2 x double>> [#uses=1]
+  %4 = extractelement <2 x double> %tmp54.i, i32 1 ; <double> [#uses=1]
+  %5 = bitcast double %4 to <2 x float>           ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %7 = fmul <4 x float> undef, %6                 ; <<4 x float>> [#uses=1]
+  %8 = fadd <4 x float> %7, undef                 ; <<4 x float>> [#uses=1]
+  %9 = fadd <4 x float> %8, undef                 ; <<4 x float>> [#uses=1]
+  %10 = shufflevector <4 x float> undef, <4 x float> %9, <4 x i32> <i32 0, i32 1, i32 2, i32 7> ; <<4 x float>> [#uses=1]
+  %11 = fmul <4 x float> %10, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %12 = shufflevector <4 x float> %11, <4 x float> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
+  %13 = shufflevector <4 x float> %12, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %14 = fmul <4 x float> %13, undef               ; <<4 x float>> [#uses=1]
+  %15 = fadd <4 x float> undef, %14               ; <<4 x float>> [#uses=1]
+  %16 = shufflevector <4 x float> undef, <4 x float> %15, <4 x i32> <i32 0, i32 1, i32 6, i32 3> ; <<4 x float>> [#uses=1]
+  %17 = fmul <4 x float> %16, undef               ; <<4 x float>> [#uses=1]
+  %18 = extractelement <4 x float> %17, i32 2     ; <float> [#uses=1]
+  store float %18, float* undef, align 4
+  br i1 undef, label %exit, label %bb14
+
+exit:          ; preds = %bb.i19
+  unreachable
+
+bb14:                                             ; preds = %bb6
+  ret i32 0
+}
+
+; This test crashes the coalescer because live variables were not updated properly.
+define <8 x i8> @t11(i8* %A1, i8* %A2, i8* %A3, i8* %A4, i8* %A5, i8* %A6, i8* %A7, i8* %A8, i8* %B) nounwind {
+  %tmp1d = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A4, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp2d = extractvalue %struct.__neon_int8x8x3_t %tmp1d, 0 ; <<8 x i8>> [#uses=1]
+  %tmp1f = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A6, i32 1) ; <%struct.__neon_int8x8x3_t> [#uses=1]
+  %tmp2f = extractvalue %struct.__neon_int8x8x3_t %tmp1f, 0 ; <<8 x i8>> [#uses=1]
+  %tmp2bd = add <8 x i8> zeroinitializer, %tmp2d  ; <<8 x i8>> [#uses=1]
+  %tmp2abcd = mul <8 x i8> zeroinitializer, %tmp2bd ; <<8 x i8>> [#uses=1]
+  %tmp2ef = sub <8 x i8> zeroinitializer, %tmp2f  ; <<8 x i8>> [#uses=1]
+  %tmp2efgh = mul <8 x i8> %tmp2ef, undef         ; <<8 x i8>> [#uses=2]
+  call void @llvm.arm.neon.vst3.v8i8(i8* %A2, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp2efgh, i32 1)
+  %tmp2 = sub <8 x i8> %tmp2efgh, %tmp2abcd       ; <<8 x i8>> [#uses=1]
+  %tmp7 = mul <8 x i8> undef, %tmp2               ; <<8 x i8>> [#uses=1]
+  tail call void @llvm.arm.neon.vst3.v8i8(i8* %B, <8 x i8> undef, <8 x i8> undef, <8 x i8> %tmp7, i32 1)
+  ret <8 x i8> undef
+}
+
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly
+
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
+
+declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind
+
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32)
+nounwind
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
+
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
+
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
+
+declare void @llvm.trap() nounwind
diff --git a/final/test/CodeGen/ARM/ret0.ll b/final/test/CodeGen/ARM/ret0.ll
new file mode 100644
index 00000000000..5c312eb98a3
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret0.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=arm
+
+define i32 @test() {
+        ret i32 0
+}
diff --git a/final/test/CodeGen/ARM/ret_arg1.ll b/final/test/CodeGen/ARM/ret_arg1.ll
new file mode 100644
index 00000000000..1ab947b1e20
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret_arg1.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=arm
+
+define i32 @test(i32 %a1) {
+        ret i32 %a1
+}
diff --git a/final/test/CodeGen/ARM/ret_arg2.ll b/final/test/CodeGen/ARM/ret_arg2.ll
new file mode 100644
index 00000000000..84477d042c7
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret_arg2.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm
+
+define i32 @test(i32 %a1, i32 %a2) {
+        ret i32 %a2
+}
+
diff --git a/final/test/CodeGen/ARM/ret_arg3.ll b/final/test/CodeGen/ARM/ret_arg3.ll
new file mode 100644
index 00000000000..f7f9057432d
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret_arg3.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=arm
+define i32 @test(i32 %a1, i32 %a2, i32 %a3) {
+        ret i32 %a3
+}
+
diff --git a/final/test/CodeGen/ARM/ret_arg4.ll b/final/test/CodeGen/ARM/ret_arg4.ll
new file mode 100644
index 00000000000..f7b3e4a282b
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret_arg4.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=arm
+
+define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+        ret i32 %a4
+}
diff --git a/final/test/CodeGen/ARM/ret_arg5.ll b/final/test/CodeGen/ARM/ret_arg5.ll
new file mode 100644
index 00000000000..c4f9fb5e0a9
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret_arg5.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=arm
+
+define i32 @test(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) {
+        ret i32 %a5
+}
diff --git a/final/test/CodeGen/ARM/ret_f32_arg2.ll b/final/test/CodeGen/ARM/ret_f32_arg2.ll
new file mode 100644
index 00000000000..2bafea67553
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret_f32_arg2.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define float @test_f32(float %a1, float %a2) {
+        ret float %a2
+}
+
diff --git a/final/test/CodeGen/ARM/ret_f32_arg5.ll b/final/test/CodeGen/ARM/ret_f32_arg5.ll
new file mode 100644
index 00000000000..c6ce60ecb9c
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret_f32_arg5.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define float @test_f32_arg5(float %a1, float %a2, float %a3, float %a4, float %a5) {
+        ret float %a5
+}
+
diff --git a/final/test/CodeGen/ARM/ret_f64_arg2.ll b/final/test/CodeGen/ARM/ret_f64_arg2.ll
new file mode 100644
index 00000000000..386e85f4b9a
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret_f64_arg2.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define double @test_f64(double %a1, double %a2) {
+        ret double %a2
+}
+
diff --git a/final/test/CodeGen/ARM/ret_f64_arg_reg_split.ll b/final/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
new file mode 100644
index 00000000000..bdb0a606227
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret_f64_arg_reg_split.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mcpu=arm8 -mattr=+vfp2
+
+define double @test_double_arg_reg_split(i32 %a1, double %a2) {
+        ret double %a2
+}
+
diff --git a/final/test/CodeGen/ARM/ret_f64_arg_split.ll b/final/test/CodeGen/ARM/ret_f64_arg_split.ll
new file mode 100644
index 00000000000..4f841a3cde7
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret_f64_arg_split.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define double @test_double_arg_split(i64 %a1, i32 %a2, double %a3) {
+        ret double %a3
+}
+
diff --git a/final/test/CodeGen/ARM/ret_f64_arg_stack.ll b/final/test/CodeGen/ARM/ret_f64_arg_stack.ll
new file mode 100644
index 00000000000..21443177d3d
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret_f64_arg_stack.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define double @test_double_arg_stack(i64 %a1, i32 %a2, i32 %a3, double %a4) {
+        ret double %a4
+}
+
diff --git a/final/test/CodeGen/ARM/ret_i128_arg2.ll b/final/test/CodeGen/ARM/ret_i128_arg2.ll
new file mode 100644
index 00000000000..908c34f8cda
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret_i128_arg2.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define i128 @test_i128(i128 %a1, i128 %a2, i128 %a3) {
+        ret i128 %a3
+}
+
diff --git a/final/test/CodeGen/ARM/ret_i64_arg2.ll b/final/test/CodeGen/ARM/ret_i64_arg2.ll
new file mode 100644
index 00000000000..b1a1024acaf
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret_i64_arg2.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define i64 @test_i64(i64 %a1, i64 %a2) {
+        ret i64 %a2
+}
+
diff --git a/final/test/CodeGen/ARM/ret_i64_arg3.ll b/final/test/CodeGen/ARM/ret_i64_arg3.ll
new file mode 100644
index 00000000000..ffc1d2f4b52
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret_i64_arg3.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define i64 @test_i64_arg3(i64 %a1, i64 %a2, i64 %a3) {
+        ret i64 %a3
+}
+
diff --git a/final/test/CodeGen/ARM/ret_i64_arg_split.ll b/final/test/CodeGen/ARM/ret_i64_arg_split.ll
new file mode 100644
index 00000000000..956bce558fc
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret_i64_arg_split.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2
+
+define i64 @test_i64_arg_split(i64 %a1, i32 %a2, i64 %a3) {
+        ret i64 %a3
+}
+
diff --git a/final/test/CodeGen/ARM/ret_void.ll b/final/test/CodeGen/ARM/ret_void.ll
new file mode 100644
index 00000000000..2b7ae056288
--- /dev/null
+++ b/final/test/CodeGen/ARM/ret_void.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=arm
+
+define void @test() {
+        ret void
+}
+
diff --git a/final/test/CodeGen/ARM/rev.ll b/final/test/CodeGen/ARM/rev.ll
new file mode 100644
index 00000000000..687bf8834c9
--- /dev/null
+++ b/final/test/CodeGen/ARM/rev.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s
+
+define i32 @test1(i32 %X) {
+; CHECK: test1
+; CHECK: rev16 r0, r0
+        %tmp1 = lshr i32 %X, 8
+        %X15 = bitcast i32 %X to i32
+        %tmp4 = shl i32 %X15, 8
+        %tmp2 = and i32 %tmp1, 16711680
+        %tmp5 = and i32 %tmp4, -16777216
+        %tmp9 = and i32 %tmp1, 255
+        %tmp13 = and i32 %tmp4, 65280
+        %tmp6 = or i32 %tmp5, %tmp2
+        %tmp10 = or i32 %tmp6, %tmp13
+        %tmp14 = or i32 %tmp10, %tmp9
+        ret i32 %tmp14
+}
+
+define i32 @test2(i32 %X) {
+; CHECK: test2
+; CHECK: revsh r0, r0
+        %tmp1 = lshr i32 %X, 8
+        %tmp1.upgrd.1 = trunc i32 %tmp1 to i16
+        %tmp3 = trunc i32 %X to i16
+        %tmp2 = and i16 %tmp1.upgrd.1, 255
+        %tmp4 = shl i16 %tmp3, 8
+        %tmp5 = or i16 %tmp2, %tmp4
+        %tmp5.upgrd.2 = sext i16 %tmp5 to i32
+        ret i32 %tmp5.upgrd.2
+}
diff --git a/final/test/CodeGen/ARM/sbfx.ll b/final/test/CodeGen/ARM/sbfx.ll
new file mode 100644
index 00000000000..d29693e4cf9
--- /dev/null
+++ b/final/test/CodeGen/ARM/sbfx.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+entry:
+; CHECK: f1:
+; CHECK: sbfx r0, r0, #0, #20
+    %tmp = shl i32 %a, 12
+    %tmp2 = ashr i32 %tmp, 12
+    ret i32 %tmp2
+}
+
+define i32 @f2(i32 %a) {
+entry:
+; CHECK: f2:
+; CHECK: bfc	r0, #20, #12
+    %tmp = shl i32 %a, 12
+    %tmp2 = lshr i32 %tmp, 12
+    ret i32 %tmp2
+}
+
+define i32 @f3(i32 %a) {
+entry:
+; CHECK: f3:
+; CHECK: sbfx r0, r0, #5, #3
+    %tmp = shl i32 %a, 24
+    %tmp2 = ashr i32 %tmp, 29
+    ret i32 %tmp2
+}
+
+define i32 @f4(i32 %a) {
+entry:
+; CHECK: f4:
+; CHECK: ubfx r0, r0, #5, #3
+    %tmp = shl i32 %a, 24
+    %tmp2 = lshr i32 %tmp, 29
+    ret i32 %tmp2
+}
+
+define i32 @f5(i32 %a) {
+entry:
+; CHECK: f5:
+; CHECK-NOT: sbfx
+; CHECK: bx
+    %tmp = shl i32 %a, 3
+    %tmp2 = ashr i32 %tmp, 1
+    ret i32 %tmp2
+}
diff --git a/final/test/CodeGen/ARM/section.ll b/final/test/CodeGen/ARM/section.ll
new file mode 100644
index 00000000000..7a566d49d32
--- /dev/null
+++ b/final/test/CodeGen/ARM/section.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -mtriple=arm-linux | \
+; RUN:   grep {__DTOR_END__:}
+; RUN: llc < %s -mtriple=arm-linux | \
+; RUN:   grep {\\.section.\\.dtors,"aw",.progbits}
+
+@__DTOR_END__ = internal global [1 x i32] zeroinitializer, section ".dtors"       ; <[1 x i32]*> [#uses=0]
+
diff --git a/final/test/CodeGen/ARM/select-imm.ll b/final/test/CodeGen/ARM/select-imm.ll
new file mode 100644
index 00000000000..578834ec93b
--- /dev/null
+++ b/final/test/CodeGen/ARM/select-imm.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -march=arm                  | FileCheck %s --check-prefix=ARM
+; RUN: llc < %s -march=arm -mattr=+thumb2   | FileCheck %s --check-prefix=ARMT2
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s --check-prefix=THUMB2
+
+define i32 @t1(i32 %c) nounwind readnone {
+entry:
+; ARM: t1:
+; ARM: mov r1, #101
+; ARM: orr r1, r1, #1, 24
+; ARM: movgt r0, #123
+
+; ARMT2: t1:
+; ARMT2: movw r0, #357
+; ARMT2: movgt r0, #123
+
+; THUMB2: t1:
+; THUMB2: movw r0, #357
+; THUMB2: movgt r0, #123
+
+  %0 = icmp sgt i32 %c, 1
+  %1 = select i1 %0, i32 123, i32 357
+  ret i32 %1
+}
+
+define i32 @t2(i32 %c) nounwind readnone {
+entry:
+; ARM: t2:
+; ARM: mov r0, #123
+; ARM: movgt r0, #101
+; ARM: orrgt r0, r0, #1, 24
+
+; ARMT2: t2:
+; ARMT2: mov r0, #123
+; ARMT2: movwgt r0, #357
+
+; THUMB2: t2:
+; THUMB2: mov.w r0, #123
+; THUMB2: movwgt r0, #357
+
+  %0 = icmp sgt i32 %c, 1
+  %1 = select i1 %0, i32 357, i32 123
+  ret i32 %1
+}
+
+define i32 @t3(i32 %a) nounwind readnone {
+entry:
+; ARM: t3:
+; ARM: mov r0, #0
+; ARM: moveq r0, #1
+
+; ARMT2: t3:
+; ARMT2: mov r0, #0
+; ARMT2: moveq r0, #1
+
+; THUMB2: t3:
+; THUMB2: mov.w r0, #0
+; THUMB2: moveq r0, #1
+  %0 = icmp eq i32 %a, 160
+  %1 = zext i1 %0 to i32
+  ret i32 %1
+}
+
+define i32 @t4(i32 %a, i32 %b, i32 %x) nounwind {
+entry:
+; ARM: t4:
+; ARM: ldr
+; ARM: movlt
+
+; ARMT2: t4:
+; ARMT2: movwlt r0, #65365
+; ARMT2: movtlt r0, #65365
+
+; THUMB2: t4:
+; THUMB2: mvnlt.w r0, #11141290
+  %0 = icmp slt i32 %a, %b
+  %1 = select i1 %0, i32 4283826005, i32 %x
+  ret i32 %1
+}
diff --git a/final/test/CodeGen/ARM/select.ll b/final/test/CodeGen/ARM/select.ll
new file mode 100644
index 00000000000..d1493ee56e4
--- /dev/null
+++ b/final/test/CodeGen/ARM/select.ll
@@ -0,0 +1,115 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
+; RUN: llc < %s -mattr=+neon,+thumb2 -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=CHECK-NEON
+
+define i32 @f1(i32 %a.s) {
+;CHECK: f1:
+;CHECK: moveq
+entry:
+    %tmp = icmp eq i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f2(i32 %a.s) {
+;CHECK: f2:
+;CHECK: movgt
+entry:
+    %tmp = icmp sgt i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f3(i32 %a.s, i32 %b.s) {
+;CHECK: f3:
+;CHECK: movlt
+entry:
+    %tmp = icmp slt i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f4(i32 %a.s, i32 %b.s) {
+;CHECK: f4:
+;CHECK: movle
+entry:
+    %tmp = icmp sle i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f5(i32 %a.u, i32 %b.u) {
+;CHECK: f5:
+;CHECK: movls
+entry:
+    %tmp = icmp ule i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f6(i32 %a.u, i32 %b.u) {
+;CHECK: f6:
+;CHECK: movhi
+entry:
+    %tmp = icmp ugt i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define double @f7(double %a, double %b) {
+;CHECK: f7:
+;CHECK: movlt
+;CHECK: movlt
+;CHECK-VFP: f7:
+;CHECK-VFP: vmovmi
+    %tmp = fcmp olt double %a, 1.234e+00
+    %tmp1 = select i1 %tmp, double -1.000e+00, double %b
+    ret double %tmp1
+}
+
+; <rdar://problem/7260094>
+;
+; We used to generate really horrible code for this function. The main cause was
+; a lack of a custom lowering routine for an ISD::SELECT. This would result in
+; two "it" blocks in the code: one for the "icmp" and another to move the index
+; into the constant pool based on the value of the "icmp". If we have one "it"
+; block generated, odds are good that we have close to the ideal code for this:
+;
+; CHECK-NEON:      _f8:
+; CHECK-NEON:      movw   [[REGISTER_1:r[0-9]+]], #1123
+; CHECK-NEON-NEXT: movs   [[REGISTER_2:r[0-9]+]], #0
+; CHECK-NEON-NEXT: cmp    r0, [[REGISTER_1]]
+; CHECK-NEON-NEXT: it     eq
+; CHECK-NEON-NEXT: moveq  [[REGISTER_2]], #4
+; CHECK-NEON-NEXT: adr    [[REGISTER_3:r[0-9]+]], #LCPI
+; CHECK-NEON-NEXT: ldr
+; CHECK-NEON:      bx
+
+define arm_apcscc float @f8(i32 %a) nounwind {
+  %tmp = icmp eq i32 %a, 1123
+  %tmp1 = select i1 %tmp, float 0x3FF3BE76C0000000, float 0x40030E9A20000000
+  ret float %tmp1
+}
+
+; <rdar://problem/9049552>
+; Glue values can only have a single use, but the following test exposed a
+; case where a SELECT was lowered with 2 uses of a comparison, causing the
+; scheduler to assert.
+; CHECK-VFP: f9:
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+define void @f9() optsize {
+entry:
+  %cmp = icmp eq i8* undef, inttoptr (i32 4 to i8*)
+  %conv191 = select i1 %cmp, float -3.000000e+00, float 0.000000e+00
+  %conv195 = select i1 %cmp, double -1.000000e+00, double 0.000000e+00
+  %add = fadd double %conv195, 1.100000e+01
+  %conv196 = fptrunc double %add to float
+  %add201 = fadd float undef, %conv191
+  %tmp484 = bitcast float %conv196 to i32
+  %tmp478 = bitcast float %add201 to i32
+  %tmp490 = insertvalue [2 x i32] undef, i32 %tmp484, 0
+  %tmp493 = insertvalue [2 x i32] %tmp490, i32 %tmp478, 1
+  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, [2 x i32], i32, float)*)(i8* undef, i8* undef, [2 x i32] %tmp493, i32 0, float 1.000000e+00) optsize
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/select_xform.ll b/final/test/CodeGen/ARM/select_xform.ll
new file mode 100644
index 00000000000..5dabfc3a82a
--- /dev/null
+++ b/final/test/CodeGen/ARM/select_xform.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=thumb-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=T2
+; rdar://8662825
+
+define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
+; ARM: t1:
+; ARM: sub r0, r1, #6, 2
+; ARM: movgt r0, r1
+
+; T2: t1:
+; T2: mvn r0, #-2147483648
+; T2: add r0, r1
+; T2: movgt r0, r1
+  %tmp1 = icmp sgt i32 %c, 10
+  %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
+  %tmp3 = add i32 %tmp2, %b
+  ret i32 %tmp3
+}
+
+define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; ARM: t2:
+; ARM: sub r0, r1, #10
+; ARM: movgt r0, r1
+
+; T2: t2:
+; T2: sub.w r0, r1, #10
+; T2: movgt r0, r1
+  %tmp1 = icmp sgt i32 %c, 10
+  %tmp2 = select i1 %tmp1, i32 0, i32 10
+  %tmp3 = sub i32 %b, %tmp2
+  ret i32 %tmp3
+}
+
+define i32 @t3(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
+; ARM: t3:
+; ARM: mvnlt r2, #0
+; ARM: and r0, r2, r3
+
+; T2: t3:
+; T2: movlt.w r2, #-1
+; T2: and.w r0, r2, r3
+  %cond = icmp slt i32 %a, %b
+  %z = select i1 %cond, i32 -1, i32 %x
+  %s = and i32 %z, %y
+ ret i32 %s
+}
+
+define i32 @t4(i32 %a, i32 %b, i32 %x, i32 %y) nounwind {
+; ARM: t4:
+; ARM: movlt r2, #0
+; ARM: orr r0, r2, r3
+
+; T2: t4:
+; T2: movlt r2, #0
+; T2: orr.w r0, r2, r3
+  %cond = icmp slt i32 %a, %b
+  %z = select i1 %cond, i32 0, i32 %x
+  %s = or i32 %z, %y
+ ret i32 %s
+}
diff --git a/final/test/CodeGen/ARM/shifter_operand.ll b/final/test/CodeGen/ARM/shifter_operand.ll
new file mode 100644
index 00000000000..01e3a922f65
--- /dev/null
+++ b/final/test/CodeGen/ARM/shifter_operand.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8
+; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9
+; rdar://8576755
+
+
+define i32 @test1(i32 %X, i32 %Y, i8 %sh) {
+; A8: test1:
+; A8: add r0, r0, r1, lsl r2
+
+; A9: test1:
+; A9: add r0, r0, r1, lsl r2
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = shl i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @test2(i32 %X, i32 %Y, i8 %sh) {
+; A8: test2:
+; A8: bic r0, r0, r1, asr r2
+
+; A9: test2:
+; A9: bic r0, r0, r1, asr r2
+        %shift.upgrd.2 = zext i8 %sh to i32
+        %A = ashr i32 %Y, %shift.upgrd.2
+        %B = xor i32 %A, -1
+        %C = and i32 %X, %B
+        ret i32 %C
+}
+
+define i32 @test3(i32 %base, i32 %base2, i32 %offset) {
+entry:
+; A8: test3:
+; A8: ldr r0, [r0, r2, lsl #2]
+; A8: ldr r1, [r1, r2, lsl #2]
+
+; lsl #2 is free
+; A9: test3:
+; A9: ldr r0, [r0, r2, lsl #2]
+; A9: ldr r1, [r1, r2, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i32*
+        %tmp4 = add i32 %base2, %tmp1
+        %tmp5 = inttoptr i32 %tmp4 to i32*
+        %tmp6 = load i32* %tmp3
+        %tmp7 = load i32* %tmp5
+        %tmp8 = add i32 %tmp7, %tmp6
+        ret i32 %tmp8
+}
+
+declare i8* @malloc(...)
+
+define fastcc void @test4() nounwind {
+entry:
+; A8: test4:
+; A8: ldr r1, [r0, r0, lsl #2]
+; A8: str r1, [r0, r0, lsl #2]
+
+; A9: test4:
+; A9: add r0, r0, r0, lsl #2
+; A9: ldr r1, [r0]
+; A9: str r1, [r0]
+  %0 = tail call i8* (...)* @malloc(i32 undef) nounwind
+  %1 = bitcast i8* %0 to i32*
+  %2 = sext i16 undef to i32
+  %3 = getelementptr inbounds i32* %1, i32 %2
+  %4 = load i32* %3, align 4
+  %5 = add nsw i32 %4, 1
+  store i32 %5, i32* %3, align 4
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/smul.ll b/final/test/CodeGen/ARM/smul.ll
new file mode 100644
index 00000000000..b7ab2e796f8
--- /dev/null
+++ b/final/test/CodeGen/ARM/smul.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm -mattr=+v5TE
+; RUN: llc < %s -march=arm -mattr=+v5TE | \
+; RUN:   grep smulbt | count 1
+; RUN: llc < %s -march=arm -mattr=+v5TE | \
+; RUN:   grep smultt | count 1
+; RUN: llc < %s -march=arm -mattr=+v5TE | \
+; RUN:   grep smlabt | count 1
+
+@x = weak global i16 0          ; <i16*> [#uses=1]
+@y = weak global i16 0          ; <i16*> [#uses=0]
+
+define i32 @f1(i32 %y) {
+        %tmp = load i16* @x             ; <i16> [#uses=1]
+        %tmp1 = add i16 %tmp, 2         ; <i16> [#uses=1]
+        %tmp2 = sext i16 %tmp1 to i32           ; <i32> [#uses=1]
+        %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp4 = mul i32 %tmp2, %tmp3            ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define i32 @f2(i32 %x, i32 %y) {
+        %tmp1 = ashr i32 %x, 16         ; <i32> [#uses=1]
+        %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp4 = mul i32 %tmp3, %tmp1            ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define i32 @f3(i32 %a, i16 %x, i32 %y) {
+        %tmp = sext i16 %x to i32               ; <i32> [#uses=1]
+        %tmp2 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp3 = mul i32 %tmp2, %tmp             ; <i32> [#uses=1]
+        %tmp5 = add i32 %tmp3, %a               ; <i32> [#uses=1]
+        ret i32 %tmp5
+}
+
diff --git a/final/test/CodeGen/ARM/spill-q.ll b/final/test/CodeGen/ARM/spill-q.ll
new file mode 100644
index 00000000000..bf4e55cb06c
--- /dev/null
+++ b/final/test/CodeGen/ARM/spill-q.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s -mtriple=armv7-elf -mattr=+neon | FileCheck %s
+; PR4789
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+
+define void @aaa(%quuz* %this, i8* %block) {
+; CHECK: aaa:
+; CHECK: bic sp, sp, #15
+; CHECK: vst1.64 {{.*}}sp, :128
+; CHECK: vld1.64 {{.*}}sp, :128
+entry:
+  %aligned_vec = alloca <4 x float>, align 16
+  %"alloca point" = bitcast i32 0 to i32
+  %vecptr = bitcast <4 x float>* %aligned_vec to i8*
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %vecptr, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  store float 6.300000e+01, float* undef, align 4
+  %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  store float 0.000000e+00, float* undef, align 4
+  %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
+  br label %bb4
+
+bb4:                                              ; preds = %bb193, %entry
+  %besterror.0.2264 = phi <4 x float> [ undef, %entry ], [ %besterror.0.0, %bb193 ] ; <<4 x float>> [#uses=2]
+  %part0.0.0261 = phi <4 x float> [ zeroinitializer, %entry ], [ %23, %bb193 ] ; <<4 x float>> [#uses=2]
+  %3 = fmul <4 x float> zeroinitializer, %0       ; <<4 x float>> [#uses=2]
+  %4 = fadd <4 x float> %3, %part0.0.0261         ; <<4 x float>> [#uses=1]
+  %5 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %7 = fmul <4 x float> %1, undef                 ; <<4 x float>> [#uses=1]
+  %8 = fadd <4 x float> %7, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %9 = fptosi <4 x float> %8 to <4 x i32>         ; <<4 x i32>> [#uses=1]
+  %10 = sitofp <4 x i32> %9 to <4 x float>        ; <<4 x float>> [#uses=1]
+  %11 = fmul <4 x float> %10, %2                  ; <<4 x float>> [#uses=1]
+  %12 = fmul <4 x float> undef, %6                ; <<4 x float>> [#uses=1]
+  %13 = fmul <4 x float> %11, %4                  ; <<4 x float>> [#uses=1]
+  %14 = fsub <4 x float> %12, %13                 ; <<4 x float>> [#uses=1]
+  %15 = fsub <4 x float> %14, undef               ; <<4 x float>> [#uses=1]
+  %16 = fmul <4 x float> %15, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %16, undef               ; <<4 x float>> [#uses=1]
+  %18 = fmul <4 x float> %17, %val173             ; <<4 x float>> [#uses=1]
+  %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %tmp1 = fadd <4 x float> %20, %ld3
+  %tmp2 = fadd <4 x float> %tmp1, %ld4
+  %tmp3 = fadd <4 x float> %tmp2, %ld5
+  %tmp4 = fadd <4 x float> %tmp3, %ld6
+  %tmp5 = fadd <4 x float> %tmp4, %ld7
+  %tmp6 = fadd <4 x float> %tmp5, %ld8
+  %tmp7 = fadd <4 x float> %tmp6, %ld9
+  %tmp8 = fadd <4 x float> %tmp7, %ld10
+  %tmp9 = fadd <4 x float> %tmp8, %ld11
+  %21 = fadd <4 x float> %tmp9, %ld12
+  %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
+  %tmp = extractelement <4 x i1> %22, i32 0
+  br i1 %tmp, label %bb193, label %bb186
+
+bb186:                                            ; preds = %bb4
+  br label %bb193
+
+bb193:                                            ; preds = %bb186, %bb4
+  %besterror.0.0 = phi <4 x float> [ %21, %bb186 ], [ %besterror.0.2264, %bb4 ] ; <<4 x float>> [#uses=1]
+  %23 = fadd <4 x float> %part0.0.0261, zeroinitializer ; <<4 x float>> [#uses=1]
+  br label %bb4
+}
diff --git a/final/test/CodeGen/ARM/stack-frame.ll b/final/test/CodeGen/ARM/stack-frame.ll
new file mode 100644
index 00000000000..1dd57ddb9f2
--- /dev/null
+++ b/final/test/CodeGen/ARM/stack-frame.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm
+; RUN: llc < %s -march=arm | grep add | count 1
+
+define void @f1() {
+	%c = alloca i8, align 1
+	ret void
+}
+
+define i32 @f2() {
+	ret i32 1
+}
+
+
diff --git a/final/test/CodeGen/ARM/stm.ll b/final/test/CodeGen/ARM/stm.ll
new file mode 100644
index 00000000000..2f5fadbee28
--- /dev/null
+++ b/final/test/CodeGen/ARM/stm.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 | FileCheck %s
+
+@"\01LC" = internal constant [32 x i8] c"Boolean Not: %d %d %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[32 x i8]*> [#uses=1]
+@"\01LC1" = internal constant [26 x i8] c"Bitwise Not: %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[26 x i8]*> [#uses=1]
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define i32 @main() nounwind {
+entry:
+; CHECK: main
+; CHECK: push
+; CHECK: stmib
+	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @"\01LC1", i32 0, i32 0), i32 -2, i32 -3, i32 2, i32 -6) nounwind		; <i32> [#uses=0]
+	%1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([32 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 1, i32 0, i32 1, i32 0, i32 1) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/CodeGen/ARM/str_post.ll b/final/test/CodeGen/ARM/str_post.ll
new file mode 100644
index 00000000000..97916f169b0
--- /dev/null
+++ b/final/test/CodeGen/ARM/str_post.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+
+define i16 @test1(i32* %X, i16* %A) {
+; CHECK: test1:
+; CHECK: strh {{.*}}[{{.*}}], #-4
+        %Y = load i32* %X               ; <i32> [#uses=1]
+        %tmp1 = trunc i32 %Y to i16             ; <i16> [#uses=1]
+        store i16 %tmp1, i16* %A
+        %tmp2 = ptrtoint i16* %A to i16         ; <i16> [#uses=1]
+        %tmp3 = sub i16 %tmp2, 4                ; <i16> [#uses=1]
+        ret i16 %tmp3
+}
+
+define i32 @test2(i32* %X, i32* %A) {
+; CHECK: test2:
+; CHECK: str {{.*}}[{{.*}}],
+        %Y = load i32* %X               ; <i32> [#uses=1]
+        store i32 %Y, i32* %A
+        %tmp1 = ptrtoint i32* %A to i32         ; <i32> [#uses=1]
+        %tmp2 = sub i32 %tmp1, 4                ; <i32> [#uses=1]
+        ret i32 %tmp2
+}
diff --git a/final/test/CodeGen/ARM/str_pre-2.ll b/final/test/CodeGen/ARM/str_pre-2.ll
new file mode 100644
index 00000000000..465c7e676c5
--- /dev/null
+++ b/final/test/CodeGen/ARM/str_pre-2.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=armv6-linux-gnu | FileCheck %s
+
+@b = external global i64*
+
+define i64 @t(i64 %a) nounwind readonly {
+entry:
+; CHECK: str lr, [sp, #-4]!
+; CHECK: ldr lr, [sp], #4
+	%0 = load i64** @b, align 4
+	%1 = load i64* %0, align 4
+	%2 = mul i64 %1, %a
+	ret i64 %2
+}
diff --git a/final/test/CodeGen/ARM/str_pre.ll b/final/test/CodeGen/ARM/str_pre.ll
new file mode 100644
index 00000000000..e56e3f253e6
--- /dev/null
+++ b/final/test/CodeGen/ARM/str_pre.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm | \
+; RUN:   grep {str.*\\!} | count 2
+
+define void @test1(i32* %X, i32* %A, i32** %dest) {
+        %B = load i32* %A               ; <i32> [#uses=1]
+        %Y = getelementptr i32* %X, i32 4               ; <i32*> [#uses=2]
+        store i32 %B, i32* %Y
+        store i32* %Y, i32** %dest
+        ret void
+}
+
+define i16* @test2(i16* %X, i32* %A) {
+        %B = load i32* %A               ; <i32> [#uses=1]
+        %Y = getelementptr i16* %X, i32 4               ; <i16*> [#uses=2]
+        %tmp = trunc i32 %B to i16              ; <i16> [#uses=1]
+        store i16 %tmp, i16* %Y
+        ret i16* %Y
+}
diff --git a/final/test/CodeGen/ARM/str_trunc.ll b/final/test/CodeGen/ARM/str_trunc.ll
new file mode 100644
index 00000000000..2f1166b64b5
--- /dev/null
+++ b/final/test/CodeGen/ARM/str_trunc.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm | \
+; RUN:   grep strb | count 1
+; RUN: llc < %s -march=arm | \
+; RUN:   grep strh | count 1
+
+define void @test1(i32 %v, i16* %ptr) {
+        %tmp = trunc i32 %v to i16              ; <i16> [#uses=1]
+        store i16 %tmp, i16* %ptr
+        ret void
+}
+
+define void @test2(i32 %v, i8* %ptr) {
+        %tmp = trunc i32 %v to i8               ; <i8> [#uses=1]
+        store i8 %tmp, i8* %ptr
+        ret void
+}
diff --git a/final/test/CodeGen/ARM/sub.ll b/final/test/CodeGen/ARM/sub.ll
new file mode 100644
index 00000000000..81513e23e80
--- /dev/null
+++ b/final/test/CodeGen/ARM/sub.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=arm < %s | FileCheck %s
+
+; 171 = 0x000000ab
+define i64 @f1(i64 %a) {
+; CHECK: f1
+; CHECK: subs r0, r0, #171
+; CHECK: sbc r1, r1, #0
+    %tmp = sub i64 %a, 171
+    ret i64 %tmp
+}
+
+; 66846720 = 0x03fc0000
+define i64 @f2(i64 %a) {
+; CHECK: f2
+; CHECK: subs r0, r0, #255, 14
+; CHECK: sbc r1, r1, #0
+    %tmp = sub i64 %a, 66846720
+    ret i64 %tmp
+}
+
+; 734439407618 = 0x000000ab00000002
+define i64 @f3(i64 %a) {
+; CHECK: f3
+; CHECK: subs r0, r0, #2
+; CHECK: sbc r1, r1, #171
+   %tmp = sub i64 %a, 734439407618
+   ret i64 %tmp
+}
+
diff --git a/final/test/CodeGen/ARM/sxt_rot.ll b/final/test/CodeGen/ARM/sxt_rot.ll
new file mode 100644
index 00000000000..4752f17f1e1
--- /dev/null
+++ b/final/test/CodeGen/ARM/sxt_rot.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm -mattr=+v6 | \
+; RUN:   grep sxtb | count 2
+; RUN: llc < %s -march=arm -mattr=+v6 | \
+; RUN:   grep sxtb | grep ror | count 1
+; RUN: llc < %s -march=arm -mattr=+v6 | \
+; RUN:   grep sxtab | count 1
+
+define i32 @test0(i8 %A) {
+        %B = sext i8 %A to i32
+	ret i32 %B
+}
+
+define i8 @test1(i32 %A) signext {
+	%B = lshr i32 %A, 8
+	%C = shl i32 %A, 24
+	%D = or i32 %B, %C
+	%E = trunc i32 %D to i8
+	ret i8 %E
+}
+
+define i32 @test2(i32 %A, i32 %X) signext {
+	%B = lshr i32 %A, 8
+	%C = shl i32 %A, 24
+	%D = or i32 %B, %C
+	%E = trunc i32 %D to i8
+        %F = sext i8 %E to i32
+        %G = add i32 %F, %X
+	ret i32 %G
+}
diff --git a/final/test/CodeGen/ARM/t2-imm.ll b/final/test/CodeGen/ARM/t2-imm.ll
new file mode 100644
index 00000000000..8b4145914e7
--- /dev/null
+++ b/final/test/CodeGen/ARM/t2-imm.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f6(i32 %a) {
+; CHECK:f6
+; CHECK: movw r0, #1123
+; CHECK: movt r0, #1000
+    %tmp = add i32 0, 65537123
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/ARM/tail-opts.ll b/final/test/CodeGen/ARM/tail-opts.ll
new file mode 100644
index 00000000000..5b3dce386bb
--- /dev/null
+++ b/final/test/CodeGen/ARM/tail-opts.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 -asm-verbose=false | FileCheck %s
+
+declare void @bar(i32)
+declare void @car(i32)
+declare void @dar(i32)
+declare void @ear(i32)
+declare void @far(i32)
+declare i1 @qux()
+
+@GHJK = global i32 0
+
+declare i8* @choose(i8*, i8*)
+
+; BranchFolding should tail-duplicate the indirect jump to avoid
+; redundant branching.
+
+; CHECK: tail_duplicate_me:
+; CHECK:      qux
+; CHECK:      qux
+; CHECK:      movw r{{[0-9]+}}, :lower16:_GHJK
+; CHECK:      movt r{{[0-9]+}}, :upper16:_GHJK
+; CHECK:      str r
+; CHECK-NEXT: bx r
+; CHECK:      movw r{{[0-9]+}}, :lower16:_GHJK
+; CHECK:      movt r{{[0-9]+}}, :upper16:_GHJK
+; CHECK:      str r
+; CHECK-NEXT: bx r
+; CHECK:      movw r{{[0-9]+}}, :lower16:_GHJK
+; CHECK:      movt r{{[0-9]+}}, :upper16:_GHJK
+; CHECK:      str r
+; CHECK-NEXT: bx r
+
+define void @tail_duplicate_me() nounwind {
+entry:
+  %a = call i1 @qux()
+  %c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return),
+                        i8* blockaddress(@tail_duplicate_me, %altret))
+  br i1 %a, label %A, label %next
+next:
+  %b = call i1 @qux()
+  br i1 %b, label %B, label %C
+
+A:
+  call void @bar(i32 0)
+  store i32 0, i32* @GHJK
+  br label %M
+
+B:
+  call void @car(i32 1)
+  store i32 0, i32* @GHJK
+  br label %M
+
+C:
+  call void @dar(i32 2)
+  store i32 0, i32* @GHJK
+  br label %M
+
+M:
+  indirectbr i8* %c, [label %return, label %altret]
+
+return:
+  call void @ear(i32 1000)
+  ret void
+altret:
+  call void @far(i32 1001)
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/thread_pointer.ll b/final/test/CodeGen/ARM/thread_pointer.ll
new file mode 100644
index 00000000000..3143387ead6
--- /dev/null
+++ b/final/test/CodeGen/ARM/thread_pointer.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN:     grep {__aeabi_read_tp}
+
+define i8* @test() {
+entry:
+	%tmp1 = call i8* @llvm.arm.thread.pointer( )		; <i8*> [#uses=0]
+	ret i8* %tmp1
+}
+
+declare i8* @llvm.arm.thread.pointer()
diff --git a/final/test/CodeGen/ARM/thumb1-varalloc.ll b/final/test/CodeGen/ARM/thumb1-varalloc.ll
new file mode 100644
index 00000000000..25093fee225
--- /dev/null
+++ b/final/test/CodeGen/ARM/thumb1-varalloc.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
+; rdar://8819685
+
+@__bar = external hidden global i8*
+@__baz = external hidden global i8*
+
+define i8* @_foo() {
+entry:
+; CHECK: foo:
+
+	%size = alloca i32, align 4
+	%0 = load i8** @__bar, align 4
+	%1 = icmp eq i8* %0, null
+	br i1 %1, label %bb1, label %bb3
+		
+bb1:
+	store i32 1026, i32* %size, align 4
+	%2 = alloca [1026 x i8], align 1
+; CHECK: mov     r0, sp
+; CHECK: adds    r4, r0, r4
+	%3 = getelementptr inbounds [1026 x i8]* %2, i32 0, i32 0
+	%4 = call i32 @_called_func(i8* %3, i32* %size) nounwind
+	%5 = icmp eq i32 %4, 0
+	br i1 %5, label %bb2, label %bb3
+	
+bb2:
+	%6 = call i8* @strdup(i8* %3) nounwind
+	store i8* %6, i8** @__baz, align 4
+	br label %bb3
+	
+bb3:
+	%.0 = phi i8* [ %0, %entry ], [ %6, %bb2 ], [ %3, %bb1 ]
+; CHECK: subs    r4, #5
+; CHECK-NEXT: mov     sp, r4
+; CHECK-NEXT: pop     {r4, r5, r6, r7, pc}
+	ret i8* %.0
+}
+
+declare noalias i8* @strdup(i8* nocapture) nounwind
+declare i32 @_called_func(i8*, i32*) nounwind
\ No newline at end of file
diff --git a/final/test/CodeGen/ARM/tls1.ll b/final/test/CodeGen/ARM/tls1.ll
new file mode 100644
index 00000000000..1087094e579
--- /dev/null
+++ b/final/test/CodeGen/ARM/tls1.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN:     grep {i(tpoff)}
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN:     grep {__aeabi_read_tp}
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
+; RUN:     -relocation-model=pic | grep {__tls_get_addr}
+
+
+@i = thread_local global i32 15		; <i32*> [#uses=2]
+
+define i32 @f() {
+entry:
+	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+define i32* @g() {
+entry:
+	ret i32* @i
+}
diff --git a/final/test/CodeGen/ARM/tls2.ll b/final/test/CodeGen/ARM/tls2.ll
new file mode 100644
index 00000000000..57370c4de1c
--- /dev/null
+++ b/final/test/CodeGen/ARM/tls2.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
+; RUN:   | FileCheck %s -check-prefix=CHECK-NONPIC
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi \
+; RUN:   -relocation-model=pic | FileCheck %s -check-prefix=CHECK-PIC
+
+@i = external thread_local global i32		; <i32*> [#uses=2]
+
+define i32 @f() {
+; CHECK-NONPIC: f:
+; CHECK-NONPIC: ldr {{r.}}, [pc, {{r.}}]
+; CHECK-NONPIC: i(gottpoff)
+; CHECK-PIC: f:
+; CHECK-PIC: __tls_get_addr
+entry:
+	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+define i32* @g() {
+; CHECK-NONPIC: g:
+; CHECK-NONPIC: ldr {{r.}}, [pc, {{r.}}]
+; CHECK-NONPIC: i(gottpoff)
+; CHECK-PIC: g:
+; CHECK-PIC: __tls_get_addr
+entry:
+	ret i32* @i
+}
diff --git a/final/test/CodeGen/ARM/tls3.ll b/final/test/CodeGen/ARM/tls3.ll
new file mode 100644
index 00000000000..df7a4ca02db
--- /dev/null
+++ b/final/test/CodeGen/ARM/tls3.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | \
+; RUN:     grep {tbss}
+
+%struct.anon = type { i32, i32 }
+@teste = internal thread_local global %struct.anon zeroinitializer		; <%struct.anon*> [#uses=1]
+
+define i32 @main() {
+entry:
+	%tmp2 = load i32* getelementptr (%struct.anon* @teste, i32 0, i32 0), align 8		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
diff --git a/final/test/CodeGen/ARM/trap.ll b/final/test/CodeGen/ARM/trap.ll
new file mode 100644
index 00000000000..b2f6b6e69fa
--- /dev/null
+++ b/final/test/CodeGen/ARM/trap.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm | FileCheck %s
+; rdar://7961298
+
+define void @t() nounwind {
+entry:
+; CHECK: t:
+; CHECK: trap
+  call void @llvm.trap()
+  unreachable
+}
+
+declare void @llvm.trap() nounwind
diff --git a/final/test/CodeGen/ARM/trunc_ldr.ll b/final/test/CodeGen/ARM/trunc_ldr.ll
new file mode 100644
index 00000000000..3033c2ba3e2
--- /dev/null
+++ b/final/test/CodeGen/ARM/trunc_ldr.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm | grep ldrb.*7 | count 1
+; RUN: llc < %s -march=arm | grep ldrsb.*7 | count 1
+
+	%struct.A = type { i8, i8, i8, i8, i16, i8, i8, %struct.B** }
+	%struct.B = type { float, float, i32, i32, i32, [0 x i8] }
+
+define i8 @f1(%struct.A* %d) {
+	%tmp2 = getelementptr %struct.A* %d, i32 0, i32 4
+	%tmp23 = bitcast i16* %tmp2 to i32*
+	%tmp4 = load i32* %tmp23
+	%tmp512 = lshr i32 %tmp4, 24
+	%tmp56 = trunc i32 %tmp512 to i8
+	ret i8 %tmp56
+}
+
+define i32 @f2(%struct.A* %d) {
+	%tmp2 = getelementptr %struct.A* %d, i32 0, i32 4
+	%tmp23 = bitcast i16* %tmp2 to i32*
+	%tmp4 = load i32* %tmp23
+	%tmp512 = lshr i32 %tmp4, 24
+	%tmp56 = trunc i32 %tmp512 to i8
+        %tmp57 = sext i8 %tmp56 to i32
+	ret i32 %tmp57
+}
diff --git a/final/test/CodeGen/ARM/truncstore-dag-combine.ll b/final/test/CodeGen/ARM/truncstore-dag-combine.ll
new file mode 100644
index 00000000000..2da08b60e86
--- /dev/null
+++ b/final/test/CodeGen/ARM/truncstore-dag-combine.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm | not grep orr
+; RUN: llc < %s -march=arm | not grep mov
+
+define void @bar(i8* %P, i16* %Q) {
+entry:
+	%P1 = bitcast i8* %P to i16*		; <i16*> [#uses=1]
+	%tmp = load i16* %Q, align 1		; <i16> [#uses=1]
+	store i16 %tmp, i16* %P1, align 1
+	ret void
+}
+
+define void @foo(i8* %P, i32* %Q) {
+entry:
+	%P1 = bitcast i8* %P to i32*		; <i32*> [#uses=1]
+	%tmp = load i32* %Q, align 1		; <i32> [#uses=1]
+	store i32 %tmp, i32* %P1, align 1
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/tst_teq.ll b/final/test/CodeGen/ARM/tst_teq.ll
new file mode 100644
index 00000000000..c83111e6993
--- /dev/null
+++ b/final/test/CodeGen/ARM/tst_teq.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm | grep tst
+; RUN: llc < %s -march=arm | grep teq
+
+define i32 @f(i32 %a) {
+entry:
+	%tmp2 = and i32 %a, 255		; <i32> [#uses=1]
+	icmp eq i32 %tmp2, 0		; <i1>:0 [#uses=1]
+	%retval = select i1 %0, i32 20, i32 10		; <i32> [#uses=1]
+	ret i32 %retval
+}
+
+define i32 @g(i32 %a) {
+entry:
+        %tmp2 = xor i32 %a, 255
+	icmp eq i32 %tmp2, 0		; <i1>:0 [#uses=1]
+	%retval = select i1 %0, i32 20, i32 10		; <i32> [#uses=1]
+	ret i32 %retval
+}
diff --git a/final/test/CodeGen/ARM/uint64tof64.ll b/final/test/CodeGen/ARM/uint64tof64.ll
new file mode 100644
index 00000000000..32eb225a2ad
--- /dev/null
+++ b/final/test/CodeGen/ARM/uint64tof64.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=arm-apple-darwin -mattr=+vfp2
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+@"\01LC10" = external constant [54 x i8]		; <[54 x i8]*> [#uses=1]
+
+define fastcc void @t() {
+entry:
+	%0 = load i64* null, align 4		; <i64> [#uses=1]
+	%1 = uitofp i64 %0 to double		; <double> [#uses=1]
+	%2 = fdiv double 0.000000e+00, %1		; <double> [#uses=1]
+	%3 = call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* null, i8* getelementptr ([54 x i8]* @"\01LC10", i32 0, i32 0), i64 0, double %2)		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @fprintf(%struct.FILE*, i8*, ...)
diff --git a/final/test/CodeGen/ARM/umulo-32.ll b/final/test/CodeGen/ARM/umulo-32.ll
new file mode 100644
index 00000000000..aa7d28a6234
--- /dev/null
+++ b/final/test/CodeGen/ARM/umulo-32.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s
+
+%umul.ty = type { i32, i1 }
+
+define i32 @func(i32 %a) nounwind {
+; CHECK: func
+; CHECK: muldi3
+  %tmp0 = tail call %umul.ty @llvm.umul.with.overflow.i32(i32 %a, i32 37)
+  %tmp1 = extractvalue %umul.ty %tmp0, 0
+  %tmp2 = select i1 undef, i32 -1, i32 %tmp1
+  ret i32 %tmp2
+}
+
+declare %umul.ty @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
diff --git a/final/test/CodeGen/ARM/unaligned_load_store.ll b/final/test/CodeGen/ARM/unaligned_load_store.ll
new file mode 100644
index 00000000000..b42e11f2c4a
--- /dev/null
+++ b/final/test/CodeGen/ARM/unaligned_load_store.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=GENERIC
+; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6
+; RUN: llc < %s -mtriple=armv6-apple-darwin -arm-strict-align | FileCheck %s -check-prefix=GENERIC
+; RUN: llc < %s -mtriple=armv6-linux | FileCheck %s -check-prefix=GENERIC
+
+; rdar://7113725
+
+define void @t(i8* nocapture %a, i8* nocapture %b) nounwind {
+entry:
+; GENERIC: t:
+; GENERIC: ldrb r2
+; GENERIC: ldrb r3
+; GENERIC: ldrb r12
+; GENERIC: ldrb r1
+; GENERIC: strb r1
+; GENERIC: strb r12
+; GENERIC: strb r3
+; GENERIC: strb r2
+
+; DARWIN_V6: t:
+; DARWIN_V6: ldr r1
+; DARWIN_V6: str r1
+
+  %__src1.i = bitcast i8* %b to i32*              ; <i32*> [#uses=1]
+  %__dest2.i = bitcast i8* %a to i32*             ; <i32*> [#uses=1]
+  %tmp.i = load i32* %__src1.i, align 1           ; <i32> [#uses=1]
+  store i32 %tmp.i, i32* %__dest2.i, align 1
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/unord.ll b/final/test/CodeGen/ARM/unord.ll
new file mode 100644
index 00000000000..bd28034b3ad
--- /dev/null
+++ b/final/test/CodeGen/ARM/unord.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm | grep movne | count 1
+; RUN: llc < %s -march=arm | grep moveq | count 1
+
+define i32 @f1(float %X, float %Y) {
+	%tmp = fcmp uno float %X, %Y
+	%retval = select i1 %tmp, i32 1, i32 -1
+	ret i32 %retval
+}
+
+define i32 @f2(float %X, float %Y) {
+	%tmp = fcmp ord float %X, %Y
+	%retval = select i1 %tmp, i32 1, i32 -1
+	ret i32 %retval
+}
diff --git a/final/test/CodeGen/ARM/uxt_rot.ll b/final/test/CodeGen/ARM/uxt_rot.ll
new file mode 100644
index 00000000000..6307795499b
--- /dev/null
+++ b/final/test/CodeGen/ARM/uxt_rot.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtb | count 1
+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtab | count 1
+; RUN: llc < %s -march=arm -mattr=+v6 | grep uxth | count 1
+
+define i8 @test1(i32 %A.u) zeroext {
+    %B.u = trunc i32 %A.u to i8
+    ret i8 %B.u
+}
+
+define i32 @test2(i32 %A.u, i32 %B.u) zeroext {
+    %C.u = trunc i32 %B.u to i8
+    %D.u = zext i8 %C.u to i32
+    %E.u = add i32 %A.u, %D.u
+    ret i32 %E.u
+}
+
+define i32 @test3(i32 %A.u) zeroext {
+    %B.u = lshr i32 %A.u, 8
+    %C.u = shl i32 %A.u, 24
+    %D.u = or i32 %B.u, %C.u
+    %E.u = trunc i32 %D.u to i16
+    %F.u = zext i16 %E.u to i32
+    ret i32 %F.u
+}
diff --git a/final/test/CodeGen/ARM/uxtb.ll b/final/test/CodeGen/ARM/uxtb.ll
new file mode 100644
index 00000000000..9d6e4bd4dfc
--- /dev/null
+++ b/final/test/CodeGen/ARM/uxtb.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -mtriple=armv6-apple-darwin | \
+; RUN:   grep uxt | count 10
+
+define i32 @test1(i32 %x) {
+	%tmp1 = and i32 %x, 16711935		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+define i32 @test2(i32 %x) {
+	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @test3(i32 %x) {
+	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @test4(i32 %x) {
+	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
+	%tmp6 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test5(i32 %x) {
+	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @test6(i32 %x) {
+	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
+	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]
+	%tmp6 = or i32 %tmp2, %tmp5		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test7(i32 %x) {
+	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
+	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]
+	%tmp6 = or i32 %tmp2, %tmp5		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test8(i32 %x) {
+	%tmp1 = shl i32 %x, 8		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 16711680		; <i32> [#uses=1]
+	%tmp5 = lshr i32 %x, 24		; <i32> [#uses=1]
+	%tmp6 = or i32 %tmp2, %tmp5		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test9(i32 %x) {
+	%tmp1 = lshr i32 %x, 24		; <i32> [#uses=1]
+	%tmp4 = shl i32 %x, 8		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]
+	%tmp6 = or i32 %tmp5, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test10(i32 %p0) {
+	%tmp1 = lshr i32 %p0, 7		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 16253176		; <i32> [#uses=2]
+	%tmp4 = lshr i32 %tmp2, 5		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp4, 458759		; <i32> [#uses=1]
+	%tmp7 = or i32 %tmp5, %tmp2		; <i32> [#uses=1]
+	ret i32 %tmp7
+}
diff --git a/final/test/CodeGen/ARM/va_arg.ll b/final/test/CodeGen/ARM/va_arg.ll
new file mode 100644
index 00000000000..7cb976236dc
--- /dev/null
+++ b/final/test/CodeGen/ARM/va_arg.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s
+; Test that we correctly align elements when using va_arg
+
+; CHECK: test1:
+; CHECK-NOT: bfc
+; CHECK: add	r0, r0, #7
+; CHECK: bfc	r0, #0, #3
+; CHECK-NOT: bfc
+
+define i64 @test1(i32 %i, ...) nounwind optsize {
+entry:
+  %g = alloca i8*, align 4
+  %g1 = bitcast i8** %g to i8*
+  call void @llvm.va_start(i8* %g1)
+  %0 = va_arg i8** %g, i64
+  call void @llvm.va_end(i8* %g1)
+  ret i64 %0
+}
+
+; CHECK: test2:
+; CHECK-NOT: bfc
+; CHECK: add	r0, r0, #7
+; CHECK: bfc	r0, #0, #3
+; CHECK-NOT:	bfc
+; CHECK: bx	lr
+
+define double @test2(i32 %a, i32 %b, ...) nounwind optsize {
+entry:
+  %ap = alloca i8*, align 4                       ; <i8**> [#uses=3]
+  %ap1 = bitcast i8** %ap to i8*                  ; <i8*> [#uses=2]
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, i32                       ; <i32> [#uses=0]
+  %1 = va_arg i8** %ap, double                    ; <double> [#uses=1]
+  call void @llvm.va_end(i8* %ap1)
+  ret double %1
+}
+
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
diff --git a/final/test/CodeGen/ARM/vaba.ll b/final/test/CodeGen/ARM/vaba.ll
new file mode 100644
index 00000000000..4fe1c434799
--- /dev/null
+++ b/final/test/CodeGen/ARM/vaba.ll
@@ -0,0 +1,221 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vabas8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabas8:
+;CHECK: vaba.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
+	%tmp5 = add <8 x i8> %tmp1, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vabas16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabas16:
+;CHECK: vaba.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
+	%tmp5 = add <4 x i16> %tmp1, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vabas32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabas32:
+;CHECK: vaba.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
+	%tmp5 = add <2 x i32> %tmp1, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <8 x i8> @vabau8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabau8:
+;CHECK: vaba.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
+	%tmp5 = add <8 x i8> %tmp1, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vabau16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabau16:
+;CHECK: vaba.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
+	%tmp5 = add <4 x i16> %tmp1, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vabau32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabau32:
+;CHECK: vaba.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
+	%tmp5 = add <2 x i32> %tmp1, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @vabaQs8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: vabaQs8:
+;CHECK: vaba.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = load <16 x i8>* %C
+	%tmp4 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
+	%tmp5 = add <16 x i8> %tmp1, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vabaQs16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vabaQs16:
+;CHECK: vaba.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = load <8 x i16>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
+	%tmp5 = add <8 x i16> %tmp1, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vabaQs32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vabaQs32:
+;CHECK: vaba.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = load <4 x i32>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
+	%tmp5 = add <4 x i32> %tmp1, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <16 x i8> @vabaQu8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: vabaQu8:
+;CHECK: vaba.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = load <16 x i8>* %C
+	%tmp4 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
+	%tmp5 = add <16 x i8> %tmp1, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vabaQu16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vabaQu16:
+;CHECK: vaba.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = load <8 x i16>* %C
+	%tmp4 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
+	%tmp5 = add <8 x i16> %tmp1, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vabaQu32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vabaQu32:
+;CHECK: vaba.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = load <4 x i32>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
+	%tmp5 = add <4 x i32> %tmp1, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+declare <8 x i8>  @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i16> @vabals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabals8:
+;CHECK: vabal.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
+	%tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
+	%tmp6 = add <8 x i16> %tmp1, %tmp5
+	ret <8 x i16> %tmp6
+}
+
+define <4 x i32> @vabals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabals16:
+;CHECK: vabal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
+	%tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
+	%tmp6 = add <4 x i32> %tmp1, %tmp5
+	ret <4 x i32> %tmp6
+}
+
+define <2 x i64> @vabals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabals32:
+;CHECK: vabal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
+	%tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
+	%tmp6 = add <2 x i64> %tmp1, %tmp5
+	ret <2 x i64> %tmp6
+}
+
+define <8 x i16> @vabalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vabalu8:
+;CHECK: vabal.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
+	%tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
+	%tmp6 = add <8 x i16> %tmp1, %tmp5
+	ret <8 x i16> %tmp6
+}
+
+define <4 x i32> @vabalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vabalu16:
+;CHECK: vabal.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
+	%tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
+	%tmp6 = add <4 x i32> %tmp1, %tmp5
+	ret <4 x i32> %tmp6
+}
+
+define <2 x i64> @vabalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vabalu32:
+;CHECK: vabal.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
+	%tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
+	%tmp6 = add <2 x i64> %tmp1, %tmp5
+	ret <2 x i64> %tmp6
+}
diff --git a/final/test/CodeGen/ARM/vabd.ll b/final/test/CodeGen/ARM/vabd.ll
new file mode 100644
index 00000000000..9ec734fa764
--- /dev/null
+++ b/final/test/CodeGen/ARM/vabd.ll
@@ -0,0 +1,207 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vabds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabds8:
+;CHECK: vabd.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vabds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabds16:
+;CHECK: vabd.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vabds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabds32:
+;CHECK: vabd.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vabdu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdu8:
+;CHECK: vabd.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vabdu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdu16:
+;CHECK: vabd.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vabdu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdu32:
+;CHECK: vabd.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vabdf32:
+;CHECK: vabd.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vabdQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vabdQs8:
+;CHECK: vabd.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vabdQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vabdQs16:
+;CHECK: vabd.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vabdQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vabdQs32:
+;CHECK: vabd.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vabdQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vabdQu8:
+;CHECK: vabd.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vabdQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vabdQu16:
+;CHECK: vabd.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vabdQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vabdQu32:
+;CHECK: vabd.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vabdQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vabdQf32:
+;CHECK: vabd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i16> @vabdls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdls8:
+;CHECK: vabdl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vabdls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdls16:
+;CHECK: vabdl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vabdls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdls32:
+;CHECK: vabdl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vabdlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vabdlu8:
+;CHECK: vabdl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vabdlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vabdlu16:
+;CHECK: vabdl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vabdlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vabdlu32:
+;CHECK: vabdl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
+	ret <2 x i64> %tmp4
+}
diff --git a/final/test/CodeGen/ARM/vabs.ll b/final/test/CodeGen/ARM/vabs.ll
new file mode 100644
index 00000000000..18ba61f81e6
--- /dev/null
+++ b/final/test/CodeGen/ARM/vabs.ll
@@ -0,0 +1,131 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vabss8(<8 x i8>* %A) nounwind {
+;CHECK: vabss8:
+;CHECK: vabs.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vabss16(<4 x i16>* %A) nounwind {
+;CHECK: vabss16:
+;CHECK: vabs.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vabss32(<2 x i32>* %A) nounwind {
+;CHECK: vabss32:
+;CHECK: vabs.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
+;CHECK: vabsf32:
+;CHECK: vabs.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float> %tmp1)
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vabsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vabsQs8:
+;CHECK: vabs.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vabsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vabsQs16:
+;CHECK: vabs.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vabsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vabsQs32:
+;CHECK: vabs.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vabsQf32(<4 x float>* %A) nounwind {
+;CHECK: vabsQf32:
+;CHECK: vabs.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float> %tmp1)
+	ret <4 x float> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vabs.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float>) nounwind readnone
+
+define <8 x i8> @vqabss8(<8 x i8>* %A) nounwind {
+;CHECK: vqabss8:
+;CHECK: vqabs.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqabss16(<4 x i16>* %A) nounwind {
+;CHECK: vqabss16:
+;CHECK: vqabs.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqabss32(<2 x i32>* %A) nounwind {
+;CHECK: vqabss32:
+;CHECK: vqabs.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vqabsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vqabsQs8:
+;CHECK: vqabs.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqabsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vqabsQs16:
+;CHECK: vqabs.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqabsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vqabsQs32:
+;CHECK: vqabs.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqabs.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vadd.ll b/final/test/CodeGen/ARM/vadd.ll
new file mode 100644
index 00000000000..a830e968ff7
--- /dev/null
+++ b/final/test/CodeGen/ARM/vadd.ll
@@ -0,0 +1,279 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddi8:
+;CHECK: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = add <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddi16:
+;CHECK: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = add <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddi32:
+;CHECK: vadd.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = add <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vaddi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vaddi64:
+;CHECK: vadd.i64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = add <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <2 x float> @vaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vaddf32:
+;CHECK: vadd.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fadd <2 x float> %tmp1, %tmp2
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vaddQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vaddQi8:
+;CHECK: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = add <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vaddQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vaddQi16:
+;CHECK: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = add <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vaddQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vaddQi32:
+;CHECK: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = add <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vaddQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vaddQi64:
+;CHECK: vadd.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = add <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <4 x float> @vaddQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vaddQf32:
+;CHECK: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = fadd <4 x float> %tmp1, %tmp2
+	ret <4 x float> %tmp3
+}
+
+define <8 x i8> @vaddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vaddhni16:
+;CHECK: vaddhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vaddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vaddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vaddhni32:
+;CHECK: vaddhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vaddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vaddhni64:
+;CHECK: vaddhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vaddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vaddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vaddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vraddhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vraddhni16:
+;CHECK: vraddhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vraddhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vraddhni32:
+;CHECK: vraddhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vraddhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vraddhni64:
+;CHECK: vraddhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vraddhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @vaddls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddls8:
+;CHECK: vaddl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
+	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vaddls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddls16:
+;CHECK: vaddl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
+	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @vaddls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddls32:
+;CHECK: vaddl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
+	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = add <2 x i64> %tmp3, %tmp4
+	ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @vaddlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddlu8:
+;CHECK: vaddl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
+	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vaddlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddlu16:
+;CHECK: vaddl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
+	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @vaddlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddlu32:
+;CHECK: vaddl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
+	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = add <2 x i64> %tmp3, %tmp4
+	ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @vaddws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddws8:
+;CHECK: vaddw.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
+	%tmp4 = add <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vaddws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddws16:
+;CHECK: vaddw.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
+	%tmp4 = add <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vaddws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddws32:
+;CHECK: vaddw.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
+	%tmp4 = add <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vaddwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vaddwu8:
+;CHECK: vaddw.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
+	%tmp4 = add <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vaddwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vaddwu16:
+;CHECK: vaddw.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
+	%tmp4 = add <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vaddwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vaddwu32:
+;CHECK: vaddw.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
+	%tmp4 = add <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
diff --git a/final/test/CodeGen/ARM/vargs.ll b/final/test/CodeGen/ARM/vargs.ll
new file mode 100644
index 00000000000..5f3536cbb9a
--- /dev/null
+++ b/final/test/CodeGen/ARM/vargs.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=arm
+@str = internal constant [43 x i8] c"Hello World %d %d %d %d %d %d %d %d %d %d\0A\00"           ; <[43 x i8]*> [#uses=1]
+
+define i32 @main() {
+entry:
+        %tmp = call i32 (i8*, ...)* @printf( i8* getelementptr ([43 x i8]* @str, i32 0, i64 0), i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 )         ; <i32> [#uses=0]
+        %tmp2 = call i32 (i8*, ...)* @printf( i8* getelementptr ([43 x i8]* @str, i32 0, i64 0), i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1 )                ; <i32> [#uses=0]
+        ret i32 11
+}
+
+declare i32 @printf(i8*, ...)
+
diff --git a/final/test/CodeGen/ARM/vargs_align.ll b/final/test/CodeGen/ARM/vargs_align.ll
new file mode 100644
index 00000000000..e4ef9e3c36c
--- /dev/null
+++ b/final/test/CodeGen/ARM/vargs_align.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=EABI
+; RUN: llc < %s -march=arm -mtriple=arm-linux-gnu | FileCheck %s -check-prefix=OABI
+
+define i32 @f(i32 %a, ...) {
+entry:
+	%a_addr = alloca i32		; <i32*> [#uses=1]
+	%retval = alloca i32, align 4		; <i32*> [#uses=2]
+	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
+	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %a, i32* %a_addr
+	store i32 0, i32* %tmp
+	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
+	store i32 %tmp1, i32* %retval
+	br label %return
+
+return:		; preds = %entry
+	%retval2 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval2
+; EABI: add sp, sp, #12
+; EABI: add sp, sp, #16
+; OABI: add sp, sp, #12
+; OABI: add sp, sp, #12
+}
diff --git a/final/test/CodeGen/ARM/vbits.ll b/final/test/CodeGen/ARM/vbits.ll
new file mode 100644
index 00000000000..51f9bdf9718
--- /dev/null
+++ b/final/test/CodeGen/ARM/vbits.ll
@@ -0,0 +1,547 @@
+; RUN: llc < %s -march=arm -mattr=+neon -mcpu=cortex-a8 | FileCheck %s
+
+define <8 x i8> @v_andi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_andi8:
+;CHECK: vand
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = and <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_andi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_andi16:
+;CHECK: vand
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = and <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_andi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_andi32:
+;CHECK: vand
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = and <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_andi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_andi64:
+;CHECK: vand
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = and <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_andQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_andQi8:
+;CHECK: vand
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = and <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_andQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_andQi16:
+;CHECK: vand
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = and <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_andQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_andQi32:
+;CHECK: vand
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = and <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_andQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_andQi64:
+;CHECK: vand
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = and <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_bici8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_bici8:
+;CHECK: vbic
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = and <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @v_bici16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_bici16:
+;CHECK: vbic
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = and <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_bici32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_bici32:
+;CHECK: vbic
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
+	%tmp4 = and <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @v_bici64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_bici64:
+;CHECK: vbic
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
+	%tmp4 = and <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @v_bicQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_bicQi8:
+;CHECK: vbic
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = and <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @v_bicQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_bicQi16:
+;CHECK: vbic
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = and <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @v_bicQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_bicQi32:
+;CHECK: vbic
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	%tmp4 = and <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @v_bicQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_bicQi64:
+;CHECK: vbic
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
+	%tmp4 = and <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @v_eori8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_eori8:
+;CHECK: veor
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = xor <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_eori16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_eori16:
+;CHECK: veor
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = xor <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_eori32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_eori32:
+;CHECK: veor
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = xor <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_eori64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_eori64:
+;CHECK: veor
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = xor <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_eorQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_eorQi8:
+;CHECK: veor
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = xor <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_eorQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_eorQi16:
+;CHECK: veor
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = xor <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_eorQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_eorQi32:
+;CHECK: veor
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = xor <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_eorQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_eorQi64:
+;CHECK: veor
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = xor <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_mvni8(<8 x i8>* %A) nounwind {
+;CHECK: v_mvni8:
+;CHECK: vmvn
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @v_mvni16(<4 x i16>* %A) nounwind {
+;CHECK: v_mvni16:
+;CHECK: vmvn
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @v_mvni32(<2 x i32>* %A) nounwind {
+;CHECK: v_mvni32:
+;CHECK: vmvn
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @v_mvni64(<1 x i64>* %A) nounwind {
+;CHECK: v_mvni64:
+;CHECK: vmvn
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = xor <1 x i64> %tmp1, < i64 -1 >
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @v_mvnQi8(<16 x i8>* %A) nounwind {
+;CHECK: v_mvnQi8:
+;CHECK: vmvn
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @v_mvnQi16(<8 x i16>* %A) nounwind {
+;CHECK: v_mvnQi16:
+;CHECK: vmvn
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @v_mvnQi32(<4 x i32>* %A) nounwind {
+;CHECK: v_mvnQi32:
+;CHECK: vmvn
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @v_mvnQi64(<2 x i64>* %A) nounwind {
+;CHECK: v_mvnQi64:
+;CHECK: vmvn
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i8> @v_orri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_orri8:
+;CHECK: vorr
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = or <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @v_orri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_orri16:
+;CHECK: vorr
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = or <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @v_orri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_orri32:
+;CHECK: vorr
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = or <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @v_orri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_orri64:
+;CHECK: vorr
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = or <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @v_orrQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_orrQi8:
+;CHECK: vorr
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = or <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @v_orrQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_orrQi16:
+;CHECK: vorr
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = or <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @v_orrQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_orrQi32:
+;CHECK: vorr
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = or <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @v_orrQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_orrQi64:
+;CHECK: vorr
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = or <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @v_orni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: v_orni8:
+;CHECK: vorn
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = xor <8 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = or <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @v_orni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: v_orni16:
+;CHECK: vorn
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = xor <4 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = or <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_orni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: v_orni32:
+;CHECK: vorn
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = xor <2 x i32> %tmp2, < i32 -1, i32 -1 >
+	%tmp4 = or <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @v_orni64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: v_orni64:
+;CHECK: vorn
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = xor <1 x i64> %tmp2, < i64 -1 >
+	%tmp4 = or <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @v_ornQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: v_ornQi8:
+;CHECK: vorn
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = xor <16 x i8> %tmp2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp4 = or <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @v_ornQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: v_ornQi16:
+;CHECK: vorn
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = xor <8 x i16> %tmp2, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp4 = or <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @v_ornQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: v_ornQi32:
+;CHECK: vorn
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = xor <4 x i32> %tmp2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	%tmp4 = or <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @v_ornQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: v_ornQi64:
+;CHECK: vorn
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = xor <2 x i64> %tmp2, < i64 -1, i64 -1 >
+	%tmp4 = or <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @vtsti8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtsti8:
+;CHECK: vtst.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = and <8 x i8> %tmp1, %tmp2
+	%tmp4 = icmp ne <8 x i8> %tmp3, zeroinitializer
+        %tmp5 = sext <8 x i1> %tmp4 to <8 x i8>
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vtsti16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vtsti16:
+;CHECK: vtst.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = and <4 x i16> %tmp1, %tmp2
+	%tmp4 = icmp ne <4 x i16> %tmp3, zeroinitializer
+        %tmp5 = sext <4 x i1> %tmp4 to <4 x i16>
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vtsti32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vtsti32:
+;CHECK: vtst.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = and <2 x i32> %tmp1, %tmp2
+	%tmp4 = icmp ne <2 x i32> %tmp3, zeroinitializer
+        %tmp5 = sext <2 x i1> %tmp4 to <2 x i32>
+	ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @vtstQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vtstQi8:
+;CHECK: vtst.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = and <16 x i8> %tmp1, %tmp2
+	%tmp4 = icmp ne <16 x i8> %tmp3, zeroinitializer
+        %tmp5 = sext <16 x i1> %tmp4 to <16 x i8>
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vtstQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtstQi16:
+;CHECK: vtst.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = and <8 x i16> %tmp1, %tmp2
+	%tmp4 = icmp ne <8 x i16> %tmp3, zeroinitializer
+        %tmp5 = sext <8 x i1> %tmp4 to <8 x i16>
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vtstQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vtstQi32:
+;CHECK: vtst.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = and <4 x i32> %tmp1, %tmp2
+	%tmp4 = icmp ne <4 x i32> %tmp3, zeroinitializer
+        %tmp5 = sext <4 x i1> %tmp4 to <4 x i32>
+	ret <4 x i32> %tmp5
+}
+
+define <8 x i8> @v_orrimm(<8 x i8>* %A) nounwind {
+; CHECK: v_orrimm:
+; CHECK-NOT: vmov
+; CHECK-NOT: vmvn
+; CHECK: vorr
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = or <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @v_orrimmQ(<16 x i8>* %A) nounwind {
+; CHECK: v_orrimmQ
+; CHECK-NOT: vmov
+; CHECK-NOT: vmvn
+; CHECK: vorr
+	%tmp1 = load <16 x i8>* %A
+	%tmp3 = or <16 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1, i8 0, i8 0, i8 0, i8 1>
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i8> @v_bicimm(<8 x i8>* %A) nounwind {
+; CHECK: v_bicimm:
+; CHECK-NOT: vmov
+; CHECK-NOT: vmvn
+; CHECK: vbic
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = and <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @v_bicimmQ(<16 x i8>* %A) nounwind {
+; CHECK: v_bicimmQ:
+; CHECK-NOT: vmov
+; CHECK-NOT: vmvn
+; CHECK: vbic
+	%tmp1 = load <16 x i8>* %A
+	%tmp3 = and <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 -1, i8 -1, i8 -1, i8 0 >
+	ret <16 x i8> %tmp3
+}
diff --git a/final/test/CodeGen/ARM/vbsl.ll b/final/test/CodeGen/ARM/vbsl.ll
new file mode 100644
index 00000000000..9f3bb4e1030
--- /dev/null
+++ b/final/test/CodeGen/ARM/vbsl.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: v_bsli8:
+;CHECK: vbsl
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = and <8 x i8> %tmp1, %tmp2
+	%tmp5 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp6 = and <8 x i8> %tmp5, %tmp3
+	%tmp7 = or <8 x i8> %tmp4, %tmp6
+	ret <8 x i8> %tmp7
+}
+
+define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: v_bsli16:
+;CHECK: vbsl
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = and <4 x i16> %tmp1, %tmp2
+	%tmp5 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp6 = and <4 x i16> %tmp5, %tmp3
+	%tmp7 = or <4 x i16> %tmp4, %tmp6
+	ret <4 x i16> %tmp7
+}
+
+define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: v_bsli32:
+;CHECK: vbsl
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = and <2 x i32> %tmp1, %tmp2
+	%tmp5 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 >
+	%tmp6 = and <2 x i32> %tmp5, %tmp3
+	%tmp7 = or <2 x i32> %tmp4, %tmp6
+	ret <2 x i32> %tmp7
+}
+
+define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
+;CHECK: v_bsli64:
+;CHECK: vbsl
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = load <1 x i64>* %C
+	%tmp4 = and <1 x i64> %tmp1, %tmp2
+	%tmp5 = xor <1 x i64> %tmp1, < i64 -1 >
+	%tmp6 = and <1 x i64> %tmp5, %tmp3
+	%tmp7 = or <1 x i64> %tmp4, %tmp6
+	ret <1 x i64> %tmp7
+}
+
+define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
+;CHECK: v_bslQi8:
+;CHECK: vbsl
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = load <16 x i8>* %C
+	%tmp4 = and <16 x i8> %tmp1, %tmp2
+	%tmp5 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 >
+	%tmp6 = and <16 x i8> %tmp5, %tmp3
+	%tmp7 = or <16 x i8> %tmp4, %tmp6
+	ret <16 x i8> %tmp7
+}
+
+define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: v_bslQi16:
+;CHECK: vbsl
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = load <8 x i16>* %C
+	%tmp4 = and <8 x i16> %tmp1, %tmp2
+	%tmp5 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >
+	%tmp6 = and <8 x i16> %tmp5, %tmp3
+	%tmp7 = or <8 x i16> %tmp4, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: v_bslQi32:
+;CHECK: vbsl
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = load <4 x i32>* %C
+	%tmp4 = and <4 x i32> %tmp1, %tmp2
+	%tmp5 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+	%tmp6 = and <4 x i32> %tmp5, %tmp3
+	%tmp7 = or <4 x i32> %tmp4, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
+;CHECK: v_bslQi64:
+;CHECK: vbsl
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = load <2 x i64>* %C
+	%tmp4 = and <2 x i64> %tmp1, %tmp2
+	%tmp5 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 >
+	%tmp6 = and <2 x i64> %tmp5, %tmp3
+	%tmp7 = or <2 x i64> %tmp4, %tmp6
+	ret <2 x i64> %tmp7
+}
diff --git a/final/test/CodeGen/ARM/vceq.ll b/final/test/CodeGen/ARM/vceq.ll
new file mode 100644
index 00000000000..051c349a06a
--- /dev/null
+++ b/final/test/CodeGen/ARM/vceq.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vceqi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vceqi8:
+;CHECK: vceq.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = icmp eq <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vceqi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vceqi16:
+;CHECK: vceq.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = icmp eq <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vceqi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vceqi32:
+;CHECK: vceq.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = icmp eq <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <2 x i32> @vceqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vceqf32:
+;CHECK: vceq.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp oeq <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <16 x i8> @vceqQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vceqQi8:
+;CHECK: vceq.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = icmp eq <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vceqQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vceqQi16:
+;CHECK: vceq.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = icmp eq <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vceqQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vceqQi32:
+;CHECK: vceq.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = icmp eq <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <4 x i32> @vceqQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vceqQf32:
+;CHECK: vceq.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = fcmp oeq <4 x float> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <8 x i8> @vceqi8Z(<8 x i8>* %A) nounwind {
+;CHECK: vceqi8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vceq.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp eq <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
diff --git a/final/test/CodeGen/ARM/vcge.ll b/final/test/CodeGen/ARM/vcge.ll
new file mode 100644
index 00000000000..bf5f0b9efb2
--- /dev/null
+++ b/final/test/CodeGen/ARM/vcge.ll
@@ -0,0 +1,203 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vcges8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcges8:
+;CHECK: vcge.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = icmp sge <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vcges16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcges16:
+;CHECK: vcge.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = icmp sge <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vcges32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcges32:
+;CHECK: vcge.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = icmp sge <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <8 x i8> @vcgeu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgeu8:
+;CHECK: vcge.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = icmp uge <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vcgeu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgeu16:
+;CHECK: vcge.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = icmp uge <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vcgeu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgeu32:
+;CHECK: vcge.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = icmp uge <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <2 x i32> @vcgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcgef32:
+;CHECK: vcge.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp oge <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <16 x i8> @vcgeQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgeQs8:
+;CHECK: vcge.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = icmp sge <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vcgeQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgeQs16:
+;CHECK: vcge.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = icmp sge <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vcgeQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgeQs32:
+;CHECK: vcge.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = icmp sge <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <16 x i8> @vcgeQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgeQu8:
+;CHECK: vcge.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = icmp uge <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vcgeQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgeQu16:
+;CHECK: vcge.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = icmp uge <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vcgeQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgeQu32:
+;CHECK: vcge.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = icmp uge <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <4 x i32> @vcgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vcgeQf32:
+;CHECK: vcge.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = fcmp oge <4 x float> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i32> @vacgef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vacgef32:
+;CHECK: vacge.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vacged(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @vacgeQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vacgeQf32:
+;CHECK: vacge.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgeq(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <2 x i32> @llvm.arm.neon.vacged(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacgeq(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vcgei8Z(<8 x i8>* %A) nounwind {
+;CHECK: vcgei8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcge.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp sge <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vclei8Z(<8 x i8>* %A) nounwind {
+;CHECK: vclei8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcle.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp sle <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+; Radar 8782191
+; Floating-point comparisons against zero produce results with integer
+; elements, not floating-point elements.
+define void @test_vclez_fp() nounwind optsize {
+;CHECK: test_vclez_fp
+;CHECK: vcle.f32
+entry:
+  %0 = fcmp ole <4 x float> undef, zeroinitializer
+  %1 = sext <4 x i1> %0 to <4 x i16>
+  %2 = add <4 x i16> %1, zeroinitializer
+  %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %4 = add <8 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %5 = trunc <8 x i16> %4 to <8 x i8>
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* undef, <8 x i8> %5, i32 1)
+  unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
diff --git a/final/test/CodeGen/ARM/vcgt.ll b/final/test/CodeGen/ARM/vcgt.ll
new file mode 100644
index 00000000000..c3c4cb35630
--- /dev/null
+++ b/final/test/CodeGen/ARM/vcgt.ll
@@ -0,0 +1,197 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgts8:
+;CHECK: vcgt.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = icmp sgt <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vcgts16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgts16:
+;CHECK: vcgt.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = icmp sgt <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vcgts32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgts32:
+;CHECK: vcgt.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = icmp sgt <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <8 x i8> @vcgtu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcgtu8:
+;CHECK: vcgt.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = icmp ugt <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vcgtu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcgtu16:
+;CHECK: vcgt.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = icmp ugt <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vcgtu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcgtu32:
+;CHECK: vcgt.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = icmp ugt <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <2 x i32> @vcgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcgtf32:
+;CHECK: vcgt.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp ogt <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <16 x i8> @vcgtQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgtQs8:
+;CHECK: vcgt.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = icmp sgt <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vcgtQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgtQs16:
+;CHECK: vcgt.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = icmp sgt <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vcgtQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgtQs32:
+;CHECK: vcgt.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = icmp sgt <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <16 x i8> @vcgtQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcgtQu8:
+;CHECK: vcgt.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = icmp ugt <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vcgtQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcgtQu16:
+;CHECK: vcgt.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = icmp ugt <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vcgtQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcgtQu32:
+;CHECK: vcgt.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = icmp ugt <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <4 x i32> @vcgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vcgtQf32:
+;CHECK: vcgt.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i32> @vacgtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vacgtf32:
+;CHECK: vacgt.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vacgtd(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vacgtQf32:
+;CHECK: vacgt.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vacgtq(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+; rdar://7923010
+define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vcgt_zext:
+;CHECK: vmov.i32 q10, #0x1
+;CHECK: vcgt.f32 q8
+;CHECK: vand q8, q8, q10
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2
+        %tmp4 = zext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+declare <2 x i32> @llvm.arm.neon.vacgtd(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vacgtq(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vcgti8Z(<8 x i8>* %A) nounwind {
+;CHECK: vcgti8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vcgt.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp sgt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vclti8Z(<8 x i8>* %A) nounwind {
+;CHECK: vclti8Z:
+;CHECK-NOT: vmov
+;CHECK-NOT: vmvn
+;CHECK: vclt.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp3 = icmp slt <8 x i8> %tmp1, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
diff --git a/final/test/CodeGen/ARM/vcnt.ll b/final/test/CodeGen/ARM/vcnt.ll
new file mode 100644
index 00000000000..450f90d03df
--- /dev/null
+++ b/final/test/CodeGen/ARM/vcnt.ll
@@ -0,0 +1,132 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vcnt8(<8 x i8>* %A) nounwind {
+;CHECK: vcnt8:
+;CHECK: vcnt.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vcnt.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <16 x i8> @vcntQ8(<16 x i8>* %A) nounwind {
+;CHECK: vcntQ8:
+;CHECK: vcnt.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vcnt.v8i8(<8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.arm.neon.vcnt.v16i8(<16 x i8>) nounwind readnone
+
+define <8 x i8> @vclz8(<8 x i8>* %A) nounwind {
+;CHECK: vclz8:
+;CHECK: vclz.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vclz.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclz16(<4 x i16>* %A) nounwind {
+;CHECK: vclz16:
+;CHECK: vclz.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclz32(<2 x i32>* %A) nounwind {
+;CHECK: vclz32:
+;CHECK: vclz.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclzQ8(<16 x i8>* %A) nounwind {
+;CHECK: vclzQ8:
+;CHECK: vclz.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclzQ16(<8 x i16>* %A) nounwind {
+;CHECK: vclzQ16:
+;CHECK: vclz.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclzQ32(<4 x i32>* %A) nounwind {
+;CHECK: vclzQ32:
+;CHECK: vclz.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vclz.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vclz.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vclz.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vclz.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vclz.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vclz.v4i32(<4 x i32>) nounwind readnone
+
+define <8 x i8> @vclss8(<8 x i8>* %A) nounwind {
+;CHECK: vclss8:
+;CHECK: vcls.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vclss16(<4 x i16>* %A) nounwind {
+;CHECK: vclss16:
+;CHECK: vcls.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vclss32(<2 x i32>* %A) nounwind {
+;CHECK: vclss32:
+;CHECK: vcls.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vclsQs8(<16 x i8>* %A) nounwind {
+;CHECK: vclsQs8:
+;CHECK: vcls.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vclsQs16(<8 x i16>* %A) nounwind {
+;CHECK: vclsQs16:
+;CHECK: vcls.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vclsQs32(<4 x i32>* %A) nounwind {
+;CHECK: vclsQs32:
+;CHECK: vcls.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vcls.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vcombine.ll b/final/test/CodeGen/ARM/vcombine.ll
new file mode 100644
index 00000000000..527f93b6637
--- /dev/null
+++ b/final/test/CodeGen/ARM/vcombine.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+; CHECK: vcombine8
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+; CHECK: vcombine16
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+; CHECK: vcombine32
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
+; CHECK: vcombinefloat
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+	ret <4 x float> %tmp3
+}
+
+define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+; CHECK: vcombine64
+; CHECK: vmov r0, r1, d16
+; CHECK: vmov r2, r3, d17
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
+	ret <2 x i64> %tmp3
+}
+
+; Check for vget_low and vget_high implemented with shufflevector.  PR8411.
+; They should not require storing to the stack.
+
+define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind {
+; CHECK: vget_low16
+; CHECK-NOT: vst
+; CHECK: vmov r0, r1, d16
+	%tmp1 = load <8 x i16>* %A
+        %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+        ret <4 x i16> %tmp2
+}
+
+define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind {
+; CHECK: vget_high8
+; CHECK-NOT: vst
+; CHECK: vmov r0, r1, d17
+	%tmp1 = load <16 x i8>* %A
+        %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+        ret <8 x i8> %tmp2
+}
diff --git a/final/test/CodeGen/ARM/vcvt.ll b/final/test/CodeGen/ARM/vcvt.ll
new file mode 100644
index 00000000000..c078f493094
--- /dev/null
+++ b/final/test/CodeGen/ARM/vcvt.ll
@@ -0,0 +1,158 @@
+; RUN: llc < %s -march=arm -mattr=+neon,+fp16 | FileCheck %s
+
+define <2 x i32> @vcvt_f32tos32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_f32tos32:
+;CHECK: vcvt.s32.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = fptosi <2 x float> %tmp1 to <2 x i32>
+	ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @vcvt_f32tou32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_f32tou32:
+;CHECK: vcvt.u32.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = fptoui <2 x float> %tmp1 to <2 x i32>
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vcvt_s32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_s32tof32:
+;CHECK: vcvt.f32.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = sitofp <2 x i32> %tmp1 to <2 x float>
+	ret <2 x float> %tmp2
+}
+
+define <2 x float> @vcvt_u32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_u32tof32:
+;CHECK: vcvt.f32.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = uitofp <2 x i32> %tmp1 to <2 x float>
+	ret <2 x float> %tmp2
+}
+
+define <4 x i32> @vcvtQ_f32tos32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_f32tos32:
+;CHECK: vcvt.s32.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = fptosi <4 x float> %tmp1 to <4 x i32>
+	ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtQ_f32tou32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_f32tou32:
+;CHECK: vcvt.u32.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = fptoui <4 x float> %tmp1 to <4 x i32>
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vcvtQ_s32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_s32tof32:
+;CHECK: vcvt.f32.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = sitofp <4 x i32> %tmp1 to <4 x float>
+	ret <4 x float> %tmp2
+}
+
+define <4 x float> @vcvtQ_u32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_u32tof32:
+;CHECK: vcvt.f32.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = uitofp <4 x i32> %tmp1 to <4 x float>
+	ret <4 x float> %tmp2
+}
+
+define <2 x i32> @vcvt_n_f32tos32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_n_f32tos32:
+;CHECK: vcvt.s32.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> %tmp1, i32 1)
+	ret <2 x i32> %tmp2
+}
+
+define <2 x i32> @vcvt_n_f32tou32(<2 x float>* %A) nounwind {
+;CHECK: vcvt_n_f32tou32:
+;CHECK: vcvt.u32.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> %tmp1, i32 1)
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vcvt_n_s32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_n_s32tof32:
+;CHECK: vcvt.f32.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
+	ret <2 x float> %tmp2
+}
+
+define <2 x float> @vcvt_n_u32tof32(<2 x i32>* %A) nounwind {
+;CHECK: vcvt_n_u32tof32:
+;CHECK: vcvt.f32.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> %tmp1, i32 1)
+	ret <2 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float>, i32) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32>, i32) nounwind readnone
+
+define <4 x i32> @vcvtQ_n_f32tos32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_n_f32tos32:
+;CHECK: vcvt.s32.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> %tmp1, i32 1)
+	ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @vcvtQ_n_f32tou32(<4 x float>* %A) nounwind {
+;CHECK: vcvtQ_n_f32tou32:
+;CHECK: vcvt.u32.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> %tmp1, i32 1)
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vcvtQ_n_s32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_n_s32tof32:
+;CHECK: vcvt.f32.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
+	ret <4 x float> %tmp2
+}
+
+define <4 x float> @vcvtQ_n_u32tof32(<4 x i32>* %A) nounwind {
+;CHECK: vcvtQ_n_u32tof32:
+;CHECK: vcvt.f32.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> %tmp1, i32 1)
+	ret <4 x float> %tmp2
+}
+
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float>, i32) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32>, i32) nounwind readnone
+
+define <4 x float> @vcvt_f16tof32(<4 x i16>* %A) nounwind {
+;CHECK: vcvt_f16tof32:
+;CHECK: vcvt.f32.f16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> %tmp1)
+	ret <4 x float> %tmp2
+}
+
+define <4 x i16> @vcvt_f32tof16(<4 x float>* %A) nounwind {
+;CHECK: vcvt_f32tof16:
+;CHECK: vcvt.f16.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vdup.ll b/final/test/CodeGen/ARM/vdup.ll
new file mode 100644
index 00000000000..e99fac1f1e6
--- /dev/null
+++ b/final/test/CodeGen/ARM/vdup.ll
@@ -0,0 +1,263 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @v_dup8(i8 %A) nounwind {
+;CHECK: v_dup8:
+;CHECK: vdup.8
+	%tmp1 = insertelement <8 x i8> zeroinitializer, i8 %A, i32 0
+	%tmp2 = insertelement <8 x i8> %tmp1, i8 %A, i32 1
+	%tmp3 = insertelement <8 x i8> %tmp2, i8 %A, i32 2
+	%tmp4 = insertelement <8 x i8> %tmp3, i8 %A, i32 3
+	%tmp5 = insertelement <8 x i8> %tmp4, i8 %A, i32 4
+	%tmp6 = insertelement <8 x i8> %tmp5, i8 %A, i32 5
+	%tmp7 = insertelement <8 x i8> %tmp6, i8 %A, i32 6
+	%tmp8 = insertelement <8 x i8> %tmp7, i8 %A, i32 7
+	ret <8 x i8> %tmp8
+}
+
+define <4 x i16> @v_dup16(i16 %A) nounwind {
+;CHECK: v_dup16:
+;CHECK: vdup.16
+	%tmp1 = insertelement <4 x i16> zeroinitializer, i16 %A, i32 0
+	%tmp2 = insertelement <4 x i16> %tmp1, i16 %A, i32 1
+	%tmp3 = insertelement <4 x i16> %tmp2, i16 %A, i32 2
+	%tmp4 = insertelement <4 x i16> %tmp3, i16 %A, i32 3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @v_dup32(i32 %A) nounwind {
+;CHECK: v_dup32:
+;CHECK: vdup.32
+	%tmp1 = insertelement <2 x i32> zeroinitializer, i32 %A, i32 0
+	%tmp2 = insertelement <2 x i32> %tmp1, i32 %A, i32 1
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @v_dupfloat(float %A) nounwind {
+;CHECK: v_dupfloat:
+;CHECK: vdup.32
+	%tmp1 = insertelement <2 x float> zeroinitializer, float %A, i32 0
+	%tmp2 = insertelement <2 x float> %tmp1, float %A, i32 1
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @v_dupQ8(i8 %A) nounwind {
+;CHECK: v_dupQ8:
+;CHECK: vdup.8
+	%tmp1 = insertelement <16 x i8> zeroinitializer, i8 %A, i32 0
+	%tmp2 = insertelement <16 x i8> %tmp1, i8 %A, i32 1
+	%tmp3 = insertelement <16 x i8> %tmp2, i8 %A, i32 2
+	%tmp4 = insertelement <16 x i8> %tmp3, i8 %A, i32 3
+	%tmp5 = insertelement <16 x i8> %tmp4, i8 %A, i32 4
+	%tmp6 = insertelement <16 x i8> %tmp5, i8 %A, i32 5
+	%tmp7 = insertelement <16 x i8> %tmp6, i8 %A, i32 6
+	%tmp8 = insertelement <16 x i8> %tmp7, i8 %A, i32 7
+	%tmp9 = insertelement <16 x i8> %tmp8, i8 %A, i32 8
+	%tmp10 = insertelement <16 x i8> %tmp9, i8 %A, i32 9
+	%tmp11 = insertelement <16 x i8> %tmp10, i8 %A, i32 10
+	%tmp12 = insertelement <16 x i8> %tmp11, i8 %A, i32 11
+	%tmp13 = insertelement <16 x i8> %tmp12, i8 %A, i32 12
+	%tmp14 = insertelement <16 x i8> %tmp13, i8 %A, i32 13
+	%tmp15 = insertelement <16 x i8> %tmp14, i8 %A, i32 14
+	%tmp16 = insertelement <16 x i8> %tmp15, i8 %A, i32 15
+	ret <16 x i8> %tmp16
+}
+
+define <8 x i16> @v_dupQ16(i16 %A) nounwind {
+;CHECK: v_dupQ16:
+;CHECK: vdup.16
+	%tmp1 = insertelement <8 x i16> zeroinitializer, i16 %A, i32 0
+	%tmp2 = insertelement <8 x i16> %tmp1, i16 %A, i32 1
+	%tmp3 = insertelement <8 x i16> %tmp2, i16 %A, i32 2
+	%tmp4 = insertelement <8 x i16> %tmp3, i16 %A, i32 3
+	%tmp5 = insertelement <8 x i16> %tmp4, i16 %A, i32 4
+	%tmp6 = insertelement <8 x i16> %tmp5, i16 %A, i32 5
+	%tmp7 = insertelement <8 x i16> %tmp6, i16 %A, i32 6
+	%tmp8 = insertelement <8 x i16> %tmp7, i16 %A, i32 7
+	ret <8 x i16> %tmp8
+}
+
+define <4 x i32> @v_dupQ32(i32 %A) nounwind {
+;CHECK: v_dupQ32:
+;CHECK: vdup.32
+	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %A, i32 0
+	%tmp2 = insertelement <4 x i32> %tmp1, i32 %A, i32 1
+	%tmp3 = insertelement <4 x i32> %tmp2, i32 %A, i32 2
+	%tmp4 = insertelement <4 x i32> %tmp3, i32 %A, i32 3
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @v_dupQfloat(float %A) nounwind {
+;CHECK: v_dupQfloat:
+;CHECK: vdup.32
+	%tmp1 = insertelement <4 x float> zeroinitializer, float %A, i32 0
+	%tmp2 = insertelement <4 x float> %tmp1, float %A, i32 1
+	%tmp3 = insertelement <4 x float> %tmp2, float %A, i32 2
+	%tmp4 = insertelement <4 x float> %tmp3, float %A, i32 3
+	ret <4 x float> %tmp4
+}
+
+; Check to make sure it works with shuffles, too.
+
+define <8 x i8> @v_shuffledup8(i8 %A) nounwind {
+;CHECK: v_shuffledup8:
+;CHECK: vdup.8
+	%tmp1 = insertelement <8 x i8> undef, i8 %A, i32 0
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @v_shuffledup16(i16 %A) nounwind {
+;CHECK: v_shuffledup16:
+;CHECK: vdup.16
+	%tmp1 = insertelement <4 x i16> undef, i16 %A, i32 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @v_shuffledup32(i32 %A) nounwind {
+;CHECK: v_shuffledup32:
+;CHECK: vdup.32
+	%tmp1 = insertelement <2 x i32> undef, i32 %A, i32 0
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @v_shuffledupfloat(float %A) nounwind {
+;CHECK: v_shuffledupfloat:
+;CHECK: vdup.32
+	%tmp1 = insertelement <2 x float> undef, float %A, i32 0
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @v_shuffledupQ8(i8 %A) nounwind {
+;CHECK: v_shuffledupQ8:
+;CHECK: vdup.8
+	%tmp1 = insertelement <16 x i8> undef, i8 %A, i32 0
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> zeroinitializer
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @v_shuffledupQ16(i16 %A) nounwind {
+;CHECK: v_shuffledupQ16:
+;CHECK: vdup.16
+	%tmp1 = insertelement <8 x i16> undef, i16 %A, i32 0
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> zeroinitializer
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @v_shuffledupQ32(i32 %A) nounwind {
+;CHECK: v_shuffledupQ32:
+;CHECK: vdup.32
+	%tmp1 = insertelement <4 x i32> undef, i32 %A, i32 0
+	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> zeroinitializer
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @v_shuffledupQfloat(float %A) nounwind {
+;CHECK: v_shuffledupQfloat:
+;CHECK: vdup.32
+	%tmp1 = insertelement <4 x float> undef, float %A, i32 0
+	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
+	ret <4 x float> %tmp2
+}
+
+define <8 x i8> @vduplane8(<8 x i8>* %A) nounwind {
+;CHECK: vduplane8:
+;CHECK: vdup.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vduplane16(<4 x i16>* %A) nounwind {
+;CHECK: vduplane16:
+;CHECK: vdup.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vduplane32(<2 x i32>* %A) nounwind {
+;CHECK: vduplane32:
+;CHECK: vdup.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vduplanefloat(<2 x float>* %A) nounwind {
+;CHECK: vduplanefloat:
+;CHECK: vdup.32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> < i32 1, i32 1 >
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vduplaneQ8(<8 x i8>* %A) nounwind {
+;CHECK: vduplaneQ8:
+;CHECK: vdup.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <16 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vduplaneQ16(<4 x i16>* %A) nounwind {
+;CHECK: vduplaneQ16:
+;CHECK: vdup.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vduplaneQ32(<2 x i32>* %A) nounwind {
+;CHECK: vduplaneQ32:
+;CHECK: vdup.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vduplaneQfloat(<2 x float>* %A) nounwind {
+;CHECK: vduplaneQfloat:
+;CHECK: vdup.32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >
+	ret <4 x float> %tmp2
+}
+
+define <2 x i64> @foo(<2 x i64> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x i64> %0
+}
+
+define <2 x i64> @bar(<2 x i64> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x i64> %arg0_int64x1_t, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x i64> %0
+}
+
+define <2 x double> @baz(<2 x double> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 1, i32 1>
+  ret <2 x double> %0
+}
+
+define <2 x double> @qux(<2 x double> %arg0_int64x1_t) nounwind readnone {
+entry:
+  %0 = shufflevector <2 x double> %arg0_int64x1_t, <2 x double> undef, <2 x i32> <i32 0, i32 0>
+  ret <2 x double> %0
+}
+
+; Radar 7373643
+;CHECK: redundantVdup:
+;CHECK: vmov.i8
+;CHECK-NOT: vdup.8
+;CHECK: vstr.64
+define void @redundantVdup(<8 x i8>* %ptr) nounwind {
+  %1 = insertelement <8 x i8> undef, i8 -128, i32 0
+  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  store <8 x i8> %2, <8 x i8>* %ptr, align 8
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/vector-DAGCombine.ll b/final/test/CodeGen/ARM/vector-DAGCombine.ll
new file mode 100644
index 00000000000..3ab0cfcbbc7
--- /dev/null
+++ b/final/test/CodeGen/ARM/vector-DAGCombine.ll
@@ -0,0 +1,107 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s
+
+; PR7158
+define i32 @test_pr7158() nounwind {
+bb.nph55.bb.nph55.split_crit_edge:
+  br label %bb3
+
+bb3:                                              ; preds = %bb3, %bb.nph55.bb.nph55.split_crit_edge
+  br i1 undef, label %bb.i19, label %bb3
+
+bb.i19:                                           ; preds = %bb.i19, %bb3
+  %0 = insertelement <4 x float> undef, float undef, i32 3 ; <<4 x float>> [#uses=3]
+  %1 = fmul <4 x float> %0, %0                    ; <<4 x float>> [#uses=1]
+  %2 = bitcast <4 x float> %1 to <2 x double>     ; <<2 x double>> [#uses=0]
+  %3 = fmul <4 x float> %0, undef                 ; <<4 x float>> [#uses=0]
+  br label %bb.i19
+}
+
+; Check that the DAG combiner does not arbitrarily modify BUILD_VECTORs
+; after legalization.
+define void @test_illegal_build_vector() nounwind {
+entry:
+  store <2 x i64> undef, <2 x i64>* undef, align 16
+  %0 = load <16 x i8>* undef, align 16            ; <<16 x i8>> [#uses=1]
+  %1 = or <16 x i8> zeroinitializer, %0           ; <<16 x i8>> [#uses=1]
+  store <16 x i8> %1, <16 x i8>* undef, align 16
+  ret void
+}
+
+; Radar 8407927: Make sure that VMOVRRD gets optimized away when the result is
+; converted back to be used as a vector type.
+; CHECK: test_vmovrrd_combine
+define <4 x i32> @test_vmovrrd_combine() nounwind {
+entry:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+  %0 = bitcast <2 x i64> zeroinitializer to <2 x double>
+  %1 = extractelement <2 x double> %0, i32 0
+  %2 = bitcast double %1 to i64
+  %3 = insertelement <1 x i64> undef, i64 %2, i32 0
+; CHECK-NOT: vmov s
+; CHECK: vext.8
+  %4 = shufflevector <1 x i64> %3, <1 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %tmp2006.3 = bitcast <2 x i64> %4 to <16 x i8>
+  %5 = shufflevector <16 x i8> %tmp2006.3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
+  %tmp2004.3 = bitcast <16 x i8> %5 to <4 x i32>
+  br i1 undef, label %bb2, label %bb1
+
+bb2:
+  %result = phi <4 x i32> [ undef, %entry ], [ %tmp2004.3, %bb1 ]
+  ret <4 x i32> %result
+}
+
+; Test trying to do a ShiftCombine on illegal types.
+; The vector should be split first.
+define void @lshrIllegalType(<8 x i32>* %A) nounwind {
+       %tmp1 = load <8 x i32>* %A
+       %tmp2 = lshr <8 x i32> %tmp1, < i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+       store <8 x i32> %tmp2, <8 x i32>* %A
+       ret void
+}
+
+; Test folding a binary vector operation with constant BUILD_VECTOR
+; operands with i16 elements.
+define void @test_i16_constant_fold() nounwind optsize {
+entry:
+  %0 = sext <4 x i1> zeroinitializer to <4 x i16>
+  %1 = add <4 x i16> %0, zeroinitializer
+  %2 = shufflevector <4 x i16> %1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %3 = add <8 x i16> %2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %4 = trunc <8 x i16> %3 to <8 x i8>
+  tail call void @llvm.arm.neon.vst1.v8i8(i8* undef, <8 x i8> %4, i32 1)
+  unreachable
+}
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+
+; Test that loads and stores of i64 vector elements are handled as f64 values
+; so they are not split up into i32 values.  Radar 8755338.
+define void @i64_buildvector(i64* %ptr, <2 x i64>* %vp) nounwind {
+; CHECK: i64_buildvector
+; CHECK: vldr.64
+  %t0 = load i64* %ptr, align 4
+  %t1 = insertelement <2 x i64> undef, i64 %t0, i32 0
+  store <2 x i64> %t1, <2 x i64>* %vp
+  ret void
+}
+
+define void @i64_insertelement(i64* %ptr, <2 x i64>* %vp) nounwind {
+; CHECK: i64_insertelement
+; CHECK: vldr.64
+  %t0 = load i64* %ptr, align 4
+  %vec = load <2 x i64>* %vp
+  %t1 = insertelement <2 x i64> %vec, i64 %t0, i32 0
+  store <2 x i64> %t1, <2 x i64>* %vp
+  ret void
+}
+
+define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind {
+; CHECK: i64_extractelement
+; CHECK: vstr.64
+  %vec = load <2 x i64>* %vp
+  %t1 = extractelement <2 x i64> %vec, i32 0
+  store i64 %t1, i64* %ptr
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/vext.ll b/final/test/CodeGen/ARM/vext.ll
new file mode 100644
index 00000000000..55abefef0fa
--- /dev/null
+++ b/final/test/CodeGen/ARM/vext.ll
@@ -0,0 +1,135 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @test_vextd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextd:
+;CHECK: vext
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+	ret <8 x i8> %tmp3
+}
+
+define <8 x i8> @test_vextRd(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextRd:
+;CHECK: vext
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @test_vextq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextq:
+;CHECK: vext
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+	ret <16 x i8> %tmp3
+}
+
+define <16 x i8> @test_vextRq(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextRq:
+;CHECK: vext
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6>
+	ret <16 x i8> %tmp3
+}
+
+define <4 x i16> @test_vextd16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: test_vextd16:
+;CHECK: vext
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+	ret <4 x i16> %tmp3
+}
+
+define <4 x i32> @test_vextq32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: test_vextq32:
+;CHECK: vext
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+	ret <4 x i32> %tmp3
+}
+
+; Undef shuffle indices should not prevent matching to VEXT:
+
+define <8 x i8> @test_vextd_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: test_vextd_undef:
+;CHECK: vext
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 3, i32 undef, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10>
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @test_vextRq_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_vextRq_undef:
+;CHECK: vext
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 23, i32 24, i32 25, i32 26, i32 undef, i32 undef, i32 29, i32 30, i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 undef, i32 6>
+	ret <16 x i8> %tmp3
+}
+
+; Tests for ReconstructShuffle function. Indices have to be carefully
+; chosen to reach lowering phase as a BUILD_VECTOR.
+
+; One vector needs vext, the other can be handled by extract_subvector
+; Also checks interleaving of sources is handled correctly.
+; Essence: a vext is used on %A and something saner than stack load/store for final result.
+define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: test_interleaved:
+;CHECK: vext.16
+;CHECK-NOT: vext.16
+;CHECK: vzip.16
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 3, i32 8, i32 5, i32 9>
+        ret <4 x i16> %tmp3
+}
+
+; An undef in the shuffle list should still be optimizable
+define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: test_undef:
+;CHECK: vzip.16
+        %tmp1 = load <8 x i16>* %A
+        %tmp2 = load <8 x i16>* %B
+        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 undef, i32 8, i32 5, i32 9>
+        ret <4 x i16> %tmp3
+}
+
+; We should ignore a build_vector with more than two sources.
+; Use illegal <32 x i16> type to produce such a shuffle after legalizing types.
+; Try to look for fallback to stack expansion.
+define <4 x i16> @test_multisource(<32 x i16>* %B) nounwind {
+;CHECK: test_multisource:
+;CHECK: vst1.16
+        %tmp1 = load <32 x i16>* %B
+        %tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
+        ret <4 x i16> %tmp2
+}
+
+; We don't handle shuffles using more than half of a 128-bit vector.
+; Again, test for fallback to stack expansion
+define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind {
+;CHECK: test_largespan:
+;CHECK: vst1.16
+        %tmp1 = load <8 x i16>* %B
+        %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+        ret <4 x i16> %tmp2
+}
+
+; The actual shuffle code only handles some cases, make sure we check
+; this rather than blindly emitting a VECTOR_SHUFFLE (infinite
+; lowering loop can result otherwise).
+define <8 x i8> @test_illegal(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: test_illegal:
+;CHECK: vst1.8
+       %tmp1 = load <16 x i8>* %A
+       %tmp2 = load <16 x i8>* %B
+       %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <8 x i32> <i32 0, i32 7, i32 5, i32 25, i32 3, i32 2, i32 2, i32 26>
+       ret <8 x i8> %tmp3
+}
diff --git a/final/test/CodeGen/ARM/vfcmp.ll b/final/test/CodeGen/ARM/vfcmp.ll
new file mode 100644
index 00000000000..6946d02637e
--- /dev/null
+++ b/final/test/CodeGen/ARM/vfcmp.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; This tests fcmp operations that do not map directly to NEON instructions.
+
+; une is implemented with VCEQ/VMVN
+define <2 x i32> @vcunef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcunef32:
+;CHECK: vceq.f32
+;CHECK-NEXT: vmvn
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp une <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; olt is implemented with VCGT
+define <2 x i32> @vcoltf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcoltf32:
+;CHECK: vcgt.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp olt <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; ole is implemented with VCGE
+define <2 x i32> @vcolef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcolef32:
+;CHECK: vcge.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp ole <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; uge is implemented with VCGT/VMVN
+define <2 x i32> @vcugef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcugef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vmvn
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp uge <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; ule is implemented with VCGT/VMVN
+define <2 x i32> @vculef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vculef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vmvn
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp ule <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; ugt is implemented with VCGE/VMVN
+define <2 x i32> @vcugtf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcugtf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vmvn
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp ugt <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; ult is implemented with VCGE/VMVN
+define <2 x i32> @vcultf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcultf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vmvn
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp ult <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; ueq is implemented with VCGT/VCGT/VORR/VMVN
+define <2 x i32> @vcueqf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcueqf32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+;CHECK-NEXT: vmvn
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp ueq <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; one is implemented with VCGT/VCGT/VORR
+define <2 x i32> @vconef32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vconef32:
+;CHECK: vcgt.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp one <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; uno is implemented with VCGT/VCGE/VORR/VMVN
+define <2 x i32> @vcunof32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcunof32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+;CHECK-NEXT: vmvn
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp uno <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+; ord is implemented with VCGT/VCGE/VORR
+define <2 x i32> @vcordf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vcordf32:
+;CHECK: vcge.f32
+;CHECK-NEXT: vcgt.f32
+;CHECK-NEXT: vorr
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fcmp ord <2 x float> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
diff --git a/final/test/CodeGen/ARM/vfp.ll b/final/test/CodeGen/ARM/vfp.ll
new file mode 100644
index 00000000000..44a44afe9af
--- /dev/null
+++ b/final/test/CodeGen/ARM/vfp.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s
+
+define void @test(float* %P, double* %D) {
+	%A = load float* %P		; <float> [#uses=1]
+	%B = load double* %D		; <double> [#uses=1]
+	store float %A, float* %P
+	store double %B, double* %D
+	ret void
+}
+
+declare float @fabsf(float)
+
+declare double @fabs(double)
+
+define void @test_abs(float* %P, double* %D) {
+;CHECK: test_abs:
+	%a = load float* %P		; <float> [#uses=1]
+;CHECK: vabs.f32
+	%b = call float @fabsf( float %a )		; <float> [#uses=1]
+	store float %b, float* %P
+	%A = load double* %D		; <double> [#uses=1]
+;CHECK: vabs.f64
+	%B = call double @fabs( double %A )		; <double> [#uses=1]
+	store double %B, double* %D
+	ret void
+}
+
+define void @test_add(float* %P, double* %D) {
+;CHECK: test_add:
+	%a = load float* %P		; <float> [#uses=2]
+	%b = fadd float %a, %a		; <float> [#uses=1]
+	store float %b, float* %P
+	%A = load double* %D		; <double> [#uses=2]
+	%B = fadd double %A, %A		; <double> [#uses=1]
+	store double %B, double* %D
+	ret void
+}
+
+define void @test_ext_round(float* %P, double* %D) {
+;CHECK: test_ext_round:
+	%a = load float* %P		; <float> [#uses=1]
+;CHECK: vcvt.f64.f32
+	%b = fpext float %a to double		; <double> [#uses=1]
+	%A = load double* %D		; <double> [#uses=1]
+;CHECK: vcvt.f32.f64
+	%B = fptrunc double %A to float		; <float> [#uses=1]
+	store double %b, double* %D
+	store float %B, float* %P
+	ret void
+}
+
+define void @test_fma(float* %P1, float* %P2, float* %P3) {
+;CHECK: test_fma:
+	%a1 = load float* %P1		; <float> [#uses=1]
+	%a2 = load float* %P2		; <float> [#uses=1]
+	%a3 = load float* %P3		; <float> [#uses=1]
+;CHECK: vnmls.f32
+	%X = fmul float %a1, %a2		; <float> [#uses=1]
+	%Y = fsub float %X, %a3		; <float> [#uses=1]
+	store float %Y, float* %P1
+	ret void
+}
+
+define i32 @test_ftoi(float* %P1) {
+;CHECK: test_ftoi:
+	%a1 = load float* %P1		; <float> [#uses=1]
+;CHECK: vcvt.s32.f32
+	%b1 = fptosi float %a1 to i32		; <i32> [#uses=1]
+	ret i32 %b1
+}
+
+define i32 @test_ftou(float* %P1) {
+;CHECK: test_ftou:
+	%a1 = load float* %P1		; <float> [#uses=1]
+;CHECK: vcvt.u32.f32
+	%b1 = fptoui float %a1 to i32		; <i32> [#uses=1]
+	ret i32 %b1
+}
+
+define i32 @test_dtoi(double* %P1) {
+;CHECK: test_dtoi:
+	%a1 = load double* %P1		; <double> [#uses=1]
+;CHECK: vcvt.s32.f64
+	%b1 = fptosi double %a1 to i32		; <i32> [#uses=1]
+	ret i32 %b1
+}
+
+define i32 @test_dtou(double* %P1) {
+;CHECK: test_dtou:
+	%a1 = load double* %P1		; <double> [#uses=1]
+;CHECK: vcvt.u32.f64
+	%b1 = fptoui double %a1 to i32		; <i32> [#uses=1]
+	ret i32 %b1
+}
+
+define void @test_utod(double* %P1, i32 %X) {
+;CHECK: test_utod:
+;CHECK: vcvt.f64.u32
+	%b1 = uitofp i32 %X to double		; <double> [#uses=1]
+	store double %b1, double* %P1
+	ret void
+}
+
+define void @test_utod2(double* %P1, i8 %X) {
+;CHECK: test_utod2:
+;CHECK: vcvt.f64.u32
+	%b1 = uitofp i8 %X to double		; <double> [#uses=1]
+	store double %b1, double* %P1
+	ret void
+}
+
+define void @test_cmp(float* %glob, i32 %X) {
+;CHECK: test_cmp:
+entry:
+	%tmp = load float* %glob		; <float> [#uses=2]
+	%tmp3 = getelementptr float* %glob, i32 2		; <float*> [#uses=1]
+	%tmp4 = load float* %tmp3		; <float> [#uses=2]
+	%tmp.upgrd.1 = fcmp oeq float %tmp, %tmp4		; <i1> [#uses=1]
+	%tmp5 = fcmp uno float %tmp, %tmp4		; <i1> [#uses=1]
+	%tmp6 = or i1 %tmp.upgrd.1, %tmp5		; <i1> [#uses=1]
+;CHECK: bmi
+;CHECK-NEXT: bgt
+	br i1 %tmp6, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	%tmp.upgrd.2 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	ret void
+
+cond_false:		; preds = %entry
+	%tmp7 = tail call i32 (...)* @baz( )		; <i32> [#uses=0]
+	ret void
+}
+
+declare i1 @llvm.isunordered.f32(float, float)
+
+declare i32 @bar(...)
+
+declare i32 @baz(...)
+
+define void @test_cmpfp0(float* %glob, i32 %X) {
+;CHECK: test_cmpfp0:
+entry:
+	%tmp = load float* %glob		; <float> [#uses=1]
+;CHECK: vcmpe.f32
+	%tmp.upgrd.3 = fcmp ogt float %tmp, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %tmp.upgrd.3, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	%tmp.upgrd.4 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	ret void
+
+cond_false:		; preds = %entry
+	%tmp1 = tail call i32 (...)* @baz( )		; <i32> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/ARM/vget_lane.ll b/final/test/CodeGen/ARM/vget_lane.ll
new file mode 100644
index 00000000000..1fc885d6137
--- /dev/null
+++ b/final/test/CodeGen/ARM/vget_lane.ll
@@ -0,0 +1,233 @@
+; RUN: llc < %s -mattr=+neon | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define i32 @vget_lanes8(<8 x i8>* %A) nounwind {
+;CHECK: vget_lanes8:
+;CHECK: vmov.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = extractelement <8 x i8> %tmp1, i32 1
+	%tmp3 = sext i8 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+define i32 @vget_lanes16(<4 x i16>* %A) nounwind {
+;CHECK: vget_lanes16:
+;CHECK: vmov.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = extractelement <4 x i16> %tmp1, i32 1
+	%tmp3 = sext i16 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+define i32 @vget_laneu8(<8 x i8>* %A) nounwind {
+;CHECK: vget_laneu8:
+;CHECK: vmov.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = extractelement <8 x i8> %tmp1, i32 1
+	%tmp3 = zext i8 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+define i32 @vget_laneu16(<4 x i16>* %A) nounwind {
+;CHECK: vget_laneu16:
+;CHECK: vmov.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = extractelement <4 x i16> %tmp1, i32 1
+	%tmp3 = zext i16 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+; Do a vector add to keep the extraction from being done directly from memory.
+define i32 @vget_lanei32(<2 x i32>* %A) nounwind {
+;CHECK: vget_lanei32:
+;CHECK: vmov.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = add <2 x i32> %tmp1, %tmp1
+	%tmp3 = extractelement <2 x i32> %tmp2, i32 1
+	ret i32 %tmp3
+}
+
+define i32 @vgetQ_lanes8(<16 x i8>* %A) nounwind {
+;CHECK: vgetQ_lanes8:
+;CHECK: vmov.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = extractelement <16 x i8> %tmp1, i32 1
+	%tmp3 = sext i8 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+define i32 @vgetQ_lanes16(<8 x i16>* %A) nounwind {
+;CHECK: vgetQ_lanes16:
+;CHECK: vmov.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = extractelement <8 x i16> %tmp1, i32 1
+	%tmp3 = sext i16 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+define i32 @vgetQ_laneu8(<16 x i8>* %A) nounwind {
+;CHECK: vgetQ_laneu8:
+;CHECK: vmov.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = extractelement <16 x i8> %tmp1, i32 1
+	%tmp3 = zext i8 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+define i32 @vgetQ_laneu16(<8 x i16>* %A) nounwind {
+;CHECK: vgetQ_laneu16:
+;CHECK: vmov.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = extractelement <8 x i16> %tmp1, i32 1
+	%tmp3 = zext i16 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+; Do a vector add to keep the extraction from being done directly from memory.
+define i32 @vgetQ_lanei32(<4 x i32>* %A) nounwind {
+;CHECK: vgetQ_lanei32:
+;CHECK: vmov.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = add <4 x i32> %tmp1, %tmp1
+	%tmp3 = extractelement <4 x i32> %tmp2, i32 1
+	ret i32 %tmp3
+}
+
+define arm_aapcs_vfpcc void @test_vget_laneu16() nounwind {
+entry:
+; CHECK: vmov.u16 r0, d{{.*}}[1]
+  %arg0_uint16x4_t = alloca <4 x i16>             ; <<4 x i16>*> [#uses=1]
+  %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <4 x i16>* %arg0_uint16x4_t, align 8  ; <<4 x i16>> [#uses=1]
+  %1 = extractelement <4 x i16> %0, i32 1         ; <i16> [#uses=1]
+  %2 = add i16 %1, %1
+  store i16 %2, i16* %out_uint16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vget_laneu8() nounwind {
+entry:
+; CHECK: vmov.u8 r0, d{{.*}}[1]
+  %arg0_uint8x8_t = alloca <8 x i8>               ; <<8 x i8>*> [#uses=1]
+  %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <8 x i8>* %arg0_uint8x8_t, align 8    ; <<8 x i8>> [#uses=1]
+  %1 = extractelement <8 x i8> %0, i32 1          ; <i8> [#uses=1]
+  %2 = add i8 %1, %1
+  store i8 %2, i8* %out_uint8_t, align 1
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vgetQ_laneu16() nounwind {
+entry:
+; CHECK: vmov.u16 r0, d{{.*}}[1]
+  %arg0_uint16x8_t = alloca <8 x i16>             ; <<8 x i16>*> [#uses=1]
+  %out_uint16_t = alloca i16                      ; <i16*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <8 x i16>* %arg0_uint16x8_t, align 16 ; <<8 x i16>> [#uses=1]
+  %1 = extractelement <8 x i16> %0, i32 1         ; <i16> [#uses=1]
+  %2 = add i16 %1, %1
+  store i16 %2, i16* %out_uint16_t, align 2
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define arm_aapcs_vfpcc void @test_vgetQ_laneu8() nounwind {
+entry:
+; CHECK: vmov.u8 r0, d{{.*}}[1]
+  %arg0_uint8x16_t = alloca <16 x i8>             ; <<16 x i8>*> [#uses=1]
+  %out_uint8_t = alloca i8                        ; <i8*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load <16 x i8>* %arg0_uint8x16_t, align 16 ; <<16 x i8>> [#uses=1]
+  %1 = extractelement <16 x i8> %0, i32 1         ; <i8> [#uses=1]
+  %2 = add i8 %1, %1
+  store i8 %2, i8* %out_uint8_t, align 1
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define <8 x i8> @vset_lane8(<8 x i8>* %A, i8 %B) nounwind {
+;CHECK: vset_lane8:
+;CHECK: vmov.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = insertelement <8 x i8> %tmp1, i8 %B, i32 1
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vset_lane16(<4 x i16>* %A, i16 %B) nounwind {
+;CHECK: vset_lane16:
+;CHECK: vmov.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = insertelement <4 x i16> %tmp1, i16 %B, i32 1
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vset_lane32(<2 x i32>* %A, i32 %B) nounwind {
+;CHECK: vset_lane32:
+;CHECK: vmov.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = insertelement <2 x i32> %tmp1, i32 %B, i32 1
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vsetQ_lane8(<16 x i8>* %A, i8 %B) nounwind {
+;CHECK: vsetQ_lane8:
+;CHECK: vmov.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = insertelement <16 x i8> %tmp1, i8 %B, i32 1
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vsetQ_lane16(<8 x i16>* %A, i16 %B) nounwind {
+;CHECK: vsetQ_lane16:
+;CHECK: vmov.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = insertelement <8 x i16> %tmp1, i16 %B, i32 1
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vsetQ_lane32(<4 x i32>* %A, i32 %B) nounwind {
+;CHECK: vsetQ_lane32:
+;CHECK: vmov.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = insertelement <4 x i32> %tmp1, i32 %B, i32 1
+	ret <4 x i32> %tmp2
+}
+
+define arm_aapcs_vfpcc <2 x float> @test_vset_lanef32(float %arg0_float32_t, <2 x float> %arg1_float32x2_t) nounwind {
+;CHECK: test_vset_lanef32:
+;CHECK: vmov.f32 s3, s0
+;CHECK: vmov.f64 d0, d1
+entry:
+  %0 = insertelement <2 x float> %arg1_float32x2_t, float %arg0_float32_t, i32 1 ; <<2 x float>> [#uses=1]
+  ret <2 x float> %0
+}
+
+; The llvm extractelement instruction does not require that the lane number
+; be an immediate constant.  Make sure a variable lane number is handled.
+
+define i32 @vget_variable_lanes8(<8 x i8>* %A, i32 %B) nounwind {
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = extractelement <8 x i8> %tmp1, i32 %B
+	%tmp3 = sext i8 %tmp2 to i32
+	ret i32 %tmp3
+}
+
+define i32 @vgetQ_variable_lanei32(<4 x i32>* %A, i32 %B) nounwind {
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = add <4 x i32> %tmp1, %tmp1
+	%tmp3 = extractelement <4 x i32> %tmp2, i32 %B
+	ret i32 %tmp3
+}
diff --git a/final/test/CodeGen/ARM/vhadd.ll b/final/test/CodeGen/ARM/vhadd.ll
new file mode 100644
index 00000000000..379e062838f
--- /dev/null
+++ b/final/test/CodeGen/ARM/vhadd.ll
@@ -0,0 +1,249 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhadds8:
+;CHECK: vhadd.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhadds16:
+;CHECK: vhadd.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhadds32:
+;CHECK: vhadd.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhaddu8:
+;CHECK: vhadd.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhaddu16:
+;CHECK: vhadd.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhaddu32:
+;CHECK: vhadd.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhaddQs8:
+;CHECK: vhadd.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhaddQs16:
+;CHECK: vhadd.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhaddQs32:
+;CHECK: vhadd.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhaddQu8:
+;CHECK: vhadd.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhaddQu16:
+;CHECK: vhadd.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhaddQu32:
+;CHECK: vhadd.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <8 x i8> @vrhadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrhadds8:
+;CHECK: vrhadd.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrhadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrhadds16:
+;CHECK: vrhadd.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrhadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrhadds32:
+;CHECK: vrhadd.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vrhaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrhaddu8:
+;CHECK: vrhadd.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrhaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrhaddu16:
+;CHECK: vrhadd.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrhaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrhaddu32:
+;CHECK: vrhadd.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vrhaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrhaddQs8:
+;CHECK: vrhadd.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrhaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrhaddQs16:
+;CHECK: vrhadd.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrhaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrhaddQs32:
+;CHECK: vrhadd.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vrhaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrhaddQu8:
+;CHECK: vrhadd.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrhaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrhaddQu16:
+;CHECK: vrhadd.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrhaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrhaddQu32:
+;CHECK: vrhadd.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vhsub.ll b/final/test/CodeGen/ARM/vhsub.ll
new file mode 100644
index 00000000000..0f0d0279a52
--- /dev/null
+++ b/final/test/CodeGen/ARM/vhsub.ll
@@ -0,0 +1,125 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vhsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhsubs8:
+;CHECK: vhsub.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vhsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhsubs16:
+;CHECK: vhsub.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vhsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhsubs32:
+;CHECK: vhsub.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vhsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vhsubu8:
+;CHECK: vhsub.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vhsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vhsubu16:
+;CHECK: vhsub.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vhsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vhsubu32:
+;CHECK: vhsub.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @vhsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhsubQs8:
+;CHECK: vhsub.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vhsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhsubQs16:
+;CHECK: vhsub.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vhsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhsubQs32:
+;CHECK: vhsub.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vhsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vhsubQu8:
+;CHECK: vhsub.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vhsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vhsubQu16:
+;CHECK: vhsub.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vhsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vhsubQu32:
+;CHECK: vhsub.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vhsubs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vhsubu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vicmp.ll b/final/test/CodeGen/ARM/vicmp.ll
new file mode 100644
index 00000000000..2d8cb893bd8
--- /dev/null
+++ b/final/test/CodeGen/ARM/vicmp.ll
@@ -0,0 +1,113 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+; This tests icmp operations that do not map directly to NEON instructions.
+; Not-equal (ne) operations are implemented by VCEQ/VMVN.  Less-than (lt/ult)
+; and less-than-or-equal (le/ule) are implemented by swapping the arguments
+; to VCGT and VCGE.  Test all the operand types for not-equal but only sample
+; the other operations.
+
+define <8 x i8> @vcnei8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vcnei8:
+;CHECK: vceq.i8
+;CHECK-NEXT: vmvn
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = icmp ne <8 x i8> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i8>
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vcnei16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcnei16:
+;CHECK: vceq.i16
+;CHECK-NEXT: vmvn
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = icmp ne <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vcnei32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vcnei32:
+;CHECK: vceq.i32
+;CHECK-NEXT: vmvn
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = icmp ne <2 x i32> %tmp1, %tmp2
+        %tmp4 = sext <2 x i1> %tmp3 to <2 x i32>
+	ret <2 x i32> %tmp4
+}
+
+define <16 x i8> @vcneQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcneQi8:
+;CHECK: vceq.i8
+;CHECK-NEXT: vmvn
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = icmp ne <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vcneQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vcneQi16:
+;CHECK: vceq.i16
+;CHECK-NEXT: vmvn
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = icmp ne <8 x i16> %tmp1, %tmp2
+        %tmp4 = sext <8 x i1> %tmp3 to <8 x i16>
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vcneQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcneQi32:
+;CHECK: vceq.i32
+;CHECK-NEXT: vmvn
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = icmp ne <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
+
+define <16 x i8> @vcltQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vcltQs8:
+;CHECK: vcgt.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = icmp slt <16 x i8> %tmp1, %tmp2
+        %tmp4 = sext <16 x i1> %tmp3 to <16 x i8>
+	ret <16 x i8> %tmp4
+}
+
+define <4 x i16> @vcles16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcles16:
+;CHECK: vcge.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = icmp sle <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <4 x i16> @vcltu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vcltu16:
+;CHECK: vcgt.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = icmp ult <4 x i16> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i16>
+	ret <4 x i16> %tmp4
+}
+
+define <4 x i32> @vcleQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vcleQu32:
+;CHECK: vcge.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = icmp ule <4 x i32> %tmp1, %tmp2
+        %tmp4 = sext <4 x i1> %tmp3 to <4 x i32>
+	ret <4 x i32> %tmp4
+}
diff --git a/final/test/CodeGen/ARM/vld1.ll b/final/test/CodeGen/ARM/vld1.ll
new file mode 100644
index 00000000000..c886125a2fb
--- /dev/null
+++ b/final/test/CodeGen/ARM/vld1.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vld1i8(i8* %A) nounwind {
+;CHECK: vld1i8:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld1.8 {d16}, [r0, :64]
+	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A, i32 16)
+	ret <8 x i8> %tmp1
+}
+
+define <4 x i16> @vld1i16(i16* %A) nounwind {
+;CHECK: vld1i16:
+;CHECK: vld1.16
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
+	ret <4 x i16> %tmp1
+}
+
+;Check for a post-increment updating load. 
+define <4 x i16> @vld1i16_update(i16** %ptr) nounwind {
+;CHECK: vld1i16_update:
+;CHECK: vld1.16 {d16}, [r1]!
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1)
+	%tmp2 = getelementptr i16* %A, i32 4
+	       store i16* %tmp2, i16** %ptr
+	ret <4 x i16> %tmp1
+}
+
+define <2 x i32> @vld1i32(i32* %A) nounwind {
+;CHECK: vld1i32:
+;CHECK: vld1.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
+	ret <2 x i32> %tmp1
+}
+
+;Check for a post-increment updating load with register increment.
+define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind {
+;CHECK: vld1i32_update:
+;CHECK: vld1.32 {d16}, [r2], r1
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1)
+	%tmp2 = getelementptr i32* %A, i32 %inc
+	store i32* %tmp2, i32** %ptr
+	ret <2 x i32> %tmp1
+}
+
+define <2 x float> @vld1f(float* %A) nounwind {
+;CHECK: vld1f:
+;CHECK: vld1.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(i8* %tmp0, i32 1)
+	ret <2 x float> %tmp1
+}
+
+define <1 x i64> @vld1i64(i64* %A) nounwind {
+;CHECK: vld1i64:
+;CHECK: vld1.64
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i8* %tmp0, i32 1)
+	ret <1 x i64> %tmp1
+}
+
+define <16 x i8> @vld1Qi8(i8* %A) nounwind {
+;CHECK: vld1Qi8:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld1.8 {d16, d17}, [r0, :64]
+	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
+	ret <16 x i8> %tmp1
+}
+
+;Check for a post-increment updating load.
+define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind {
+;CHECK: vld1Qi8_update:
+;CHECK: vld1.8 {d16, d17}, [r1, :64]!
+	%A = load i8** %ptr
+	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8)
+	%tmp2 = getelementptr i8* %A, i32 16
+	store i8* %tmp2, i8** %ptr
+	ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @vld1Qi16(i16* %A) nounwind {
+;CHECK: vld1Qi16:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld1.16 {d16, d17}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %tmp0, i32 32)
+	ret <8 x i16> %tmp1
+}
+
+define <4 x i32> @vld1Qi32(i32* %A) nounwind {
+;CHECK: vld1Qi32:
+;CHECK: vld1.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i8* %tmp0, i32 1)
+	ret <4 x i32> %tmp1
+}
+
+define <4 x float> @vld1Qf(float* %A) nounwind {
+;CHECK: vld1Qf:
+;CHECK: vld1.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %tmp0, i32 1)
+	ret <4 x float> %tmp1
+}
+
+define <2 x i64> @vld1Qi64(i64* %A) nounwind {
+;CHECK: vld1Qi64:
+;CHECK: vld1.64
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %tmp0, i32 1)
+	ret <2 x i64> %tmp1
+}
+
+declare <8 x i8>  @llvm.arm.neon.vld1.v8i8(i8*, i32) nounwind readonly
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*, i32) nounwind readonly
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*, i32) nounwind readonly
+declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*, i32) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*, i32) nounwind readonly
+
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*, i32) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly
+
+; Radar 8355607
+; Do not crash if the vld1 result is not used.
+define void @unused_vld1_result() {
+entry:
+;CHECK: unused_vld1_result
+;CHECK: vld1.32
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) 
+  call void @llvm.trap()
+  unreachable
+}
+
+declare void @llvm.trap() nounwind
diff --git a/final/test/CodeGen/ARM/vld2.ll b/final/test/CodeGen/ARM/vld2.ll
new file mode 100644
index 00000000000..29b379465db
--- /dev/null
+++ b/final/test/CodeGen/ARM/vld2.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
+%struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x2_t = type { <16 x i8>,  <16 x i8> }
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
+
+define <8 x i8> @vld2i8(i8* %A) nounwind {
+;CHECK: vld2i8:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld2.8 {d16, d17}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8)
+        %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
+        %tmp4 = add <8 x i8> %tmp2, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld2i16(i16* %A) nounwind {
+;CHECK: vld2i16:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld2.16 {d16, d17}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 32)
+        %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
+        %tmp4 = add <4 x i16> %tmp2, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld2i32(i32* %A) nounwind {
+;CHECK: vld2i32:
+;CHECK: vld2.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1
+        %tmp4 = add <2 x i32> %tmp2, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld2f(float* %A) nounwind {
+;CHECK: vld2f:
+;CHECK: vld2.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
+        %tmp4 = fadd <2 x float> %tmp2, %tmp3
+	ret <2 x float> %tmp4
+}
+
+;Check for a post-increment updating load. 
+define <2 x float> @vld2f_update(float** %ptr) nounwind {
+;CHECK: vld2f_update:
+;CHECK: vld2.32 {d16, d17}, [r1]!
+	%A = load float** %ptr
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
+	%tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
+	%tmp4 = fadd <2 x float> %tmp2, %tmp3
+	%tmp5 = getelementptr float* %A, i32 4
+	store float* %tmp5, float** %ptr
+	ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld2i64(i64* %A) nounwind {
+;CHECK: vld2i64:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vld1.64 {d16, d17}, [r0, :128]
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 32)
+        %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld2Qi8(i8* %A) nounwind {
+;CHECK: vld2Qi8:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld2.8 {d16, d17, d18, d19}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8)
+        %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+;Check for a post-increment updating load with register increment.
+define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
+;CHECK: vld2Qi8_update:
+;CHECK: vld2.8 {d16, d17, d18, d19}, [r2, :128], r1
+	%A = load i8** %ptr
+	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
+        %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	%tmp5 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp5, i8** %ptr
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld2Qi16(i16* %A) nounwind {
+;CHECK: vld2Qi16:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld2.16 {d16, d17, d18, d19}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 16)
+        %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
+        %tmp4 = add <8 x i16> %tmp2, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld2Qi32(i32* %A) nounwind {
+;CHECK: vld2Qi32:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld2.32 {d16, d17, d18, d19}, [r0, :256]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 64)
+        %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
+        %tmp4 = add <4 x i32> %tmp2, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld2Qf(float* %A) nounwind {
+;CHECK: vld2Qf:
+;CHECK: vld2.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1
+        %tmp4 = fadd <4 x float> %tmp2, %tmp3
+	ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*, i32) nounwind readonly
+
+declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly
diff --git a/final/test/CodeGen/ARM/vld3.ll b/final/test/CodeGen/ARM/vld3.ll
new file mode 100644
index 00000000000..dde530f6df1
--- /dev/null
+++ b/final/test/CodeGen/ARM/vld3.ll
@@ -0,0 +1,158 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
+%struct.__neon_int64x1x3_t = type { <1 x i64>, <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x3_t = type { <16 x i8>,  <16 x i8>,  <16 x i8> }
+%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld3i8(i8* %A) nounwind {
+;CHECK: vld3i8:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld3.8 {d16, d17, d18}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8* %A, i32 32)
+        %tmp2 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp1, 2
+        %tmp4 = add <8 x i8> %tmp2, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld3i16(i16* %A) nounwind {
+;CHECK: vld3i16:
+;CHECK: vld3.16
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
+        %tmp4 = add <4 x i16> %tmp2, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+;Check for a post-increment updating load with register increment.
+define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
+;CHECK: vld3i16_update:
+;CHECK: vld3.16 {d16, d17, d18}, [r2], r1
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
+	%tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
+	%tmp4 = add <4 x i16> %tmp2, %tmp3
+	%tmp5 = getelementptr i16* %A, i32 %inc
+	store i16* %tmp5, i16** %ptr
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld3i32(i32* %A) nounwind {
+;CHECK: vld3i32:
+;CHECK: vld3.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp1, 2
+        %tmp4 = add <2 x i32> %tmp2, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld3f(float* %A) nounwind {
+;CHECK: vld3f:
+;CHECK: vld3.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp1, 2
+        %tmp4 = fadd <2 x float> %tmp2, %tmp3
+	ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld3i64(i64* %A) nounwind {
+;CHECK: vld3i64:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld1.64 {d16, d17, d18}, [r0, :64]
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = call %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8* %tmp0, i32 16)
+        %tmp2 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x3_t %tmp1, 2
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld3Qi8(i8* %A) nounwind {
+;CHECK: vld3Qi8:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld3.8 {d16, d18, d20}, [r0, :64]!
+;CHECK: vld3.8 {d17, d19, d21}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8* %A, i32 32)
+        %tmp2 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x3_t %tmp1, 2
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld3Qi16(i16* %A) nounwind {
+;CHECK: vld3Qi16:
+;CHECK: vld3.16
+;CHECK: vld3.16
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp1, 2
+        %tmp4 = add <8 x i16> %tmp2, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld3Qi32(i32* %A) nounwind {
+;CHECK: vld3Qi32:
+;CHECK: vld3.32
+;CHECK: vld3.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
+        %tmp4 = add <4 x i32> %tmp2, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+;Check for a post-increment updating load. 
+define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
+;CHECK: vld3Qi32_update:
+;CHECK: vld3.32 {d16, d18, d20}, [r1]!
+;CHECK: vld3.32 {d17, d19, d21}, [r1]!
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
+	%tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
+	%tmp4 = add <4 x i32> %tmp2, %tmp3
+	%tmp5 = getelementptr i32* %A, i32 12
+	store i32* %tmp5, i32** %ptr
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld3Qf(float* %A) nounwind {
+;CHECK: vld3Qf:
+;CHECK: vld3.32
+;CHECK: vld3.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp1, 2
+        %tmp4 = fadd <4 x float> %tmp2, %tmp3
+	ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3.v2i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3.v2f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x3_t @llvm.arm.neon.vld3.v1i64(i8*, i32) nounwind readonly
+
+declare %struct.__neon_int8x16x3_t @llvm.arm.neon.vld3.v16i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3.v8i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3.v4f32(i8*, i32) nounwind readonly
diff --git a/final/test/CodeGen/ARM/vld4.ll b/final/test/CodeGen/ARM/vld4.ll
new file mode 100644
index 00000000000..59a73db3187
--- /dev/null
+++ b/final/test/CodeGen/ARM/vld4.ll
@@ -0,0 +1,160 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
+%struct.__neon_int64x1x4_t = type { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }
+
+%struct.__neon_int8x16x4_t = type { <16 x i8>,  <16 x i8>,  <16 x i8>, <16 x i8> }
+%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld4i8(i8* %A) nounwind {
+;CHECK: vld4i8:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld4.8 {d16, d17, d18, d19}, [r0, :64]
+	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 8)
+        %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
+        %tmp4 = add <8 x i8> %tmp2, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+;Check for a post-increment updating load with register increment.
+define <8 x i8> @vld4i8_update(i8** %ptr, i32 %inc) nounwind {
+;CHECK: vld4i8_update:
+;CHECK: vld4.8 {d16, d17, d18, d19}, [r2, :128], r1
+	%A = load i8** %ptr
+	%tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8* %A, i32 16)
+	%tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 2
+	%tmp4 = add <8 x i8> %tmp2, %tmp3
+	%tmp5 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp5, i8** %ptr
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vld4i16(i16* %A) nounwind {
+;CHECK: vld4i16:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld4.16 {d16, d17, d18, d19}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8* %tmp0, i32 16)
+        %tmp2 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp1, 2
+        %tmp4 = add <4 x i16> %tmp2, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vld4i32(i32* %A) nounwind {
+;CHECK: vld4i32:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld4.32 {d16, d17, d18, d19}, [r0, :256]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* %tmp0, i32 32)
+        %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
+        %tmp4 = add <2 x i32> %tmp2, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <2 x float> @vld4f(float* %A) nounwind {
+;CHECK: vld4f:
+;CHECK: vld4.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp1, 2
+        %tmp4 = fadd <2 x float> %tmp2, %tmp3
+	ret <2 x float> %tmp4
+}
+
+define <1 x i64> @vld4i64(i64* %A) nounwind {
+;CHECK: vld4i64:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld1.64 {d16, d17, d18, d19}, [r0, :256]
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = call %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8* %tmp0, i32 64)
+        %tmp2 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int64x1x4_t %tmp1, 2
+        %tmp4 = add <1 x i64> %tmp2, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vld4Qi8(i8* %A) nounwind {
+;CHECK: vld4Qi8:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vld4.8 {d16, d18, d20, d22}, [r0, :256]!
+;CHECK: vld4.8 {d17, d19, d21, d23}, [r0, :256]
+	%tmp1 = call %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8* %A, i32 64)
+        %tmp2 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int8x16x4_t %tmp1, 2
+        %tmp4 = add <16 x i8> %tmp2, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vld4Qi16(i16* %A) nounwind {
+;CHECK: vld4Qi16:
+;Check for no alignment specifier.
+;CHECK: vld4.16 {d16, d18, d20, d22}, [r0]!
+;CHECK: vld4.16 {d17, d19, d21, d23}, [r0]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
+        %tmp4 = add <8 x i16> %tmp2, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+;Check for a post-increment updating load. 
+define <8 x i16> @vld4Qi16_update(i16** %ptr) nounwind {
+;CHECK: vld4Qi16_update:
+;CHECK: vld4.16 {d16, d18, d20, d22}, [r1, :64]!
+;CHECK: vld4.16 {d17, d19, d21, d23}, [r1, :64]!
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8* %tmp0, i32 8)
+	%tmp2 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 0
+	%tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp1, 2
+	%tmp4 = add <8 x i16> %tmp2, %tmp3
+	%tmp5 = getelementptr i16* %A, i32 32
+	store i16* %tmp5, i16** %ptr
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vld4Qi32(i32* %A) nounwind {
+;CHECK: vld4Qi32:
+;CHECK: vld4.32
+;CHECK: vld4.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp1, 2
+        %tmp4 = add <4 x i32> %tmp2, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <4 x float> @vld4Qf(float* %A) nounwind {
+;CHECK: vld4Qf:
+;CHECK: vld4.32
+;CHECK: vld4.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8* %tmp0, i32 1)
+        %tmp2 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 0
+        %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp1, 2
+        %tmp4 = fadd <4 x float> %tmp2, %tmp3
+	ret <4 x float> %tmp4
+}
+
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4.v4i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4.v2f32(i8*, i32) nounwind readonly
+declare %struct.__neon_int64x1x4_t @llvm.arm.neon.vld4.v1i64(i8*, i32) nounwind readonly
+
+declare %struct.__neon_int8x16x4_t @llvm.arm.neon.vld4.v16i8(i8*, i32) nounwind readonly
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4.v8i16(i8*, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4.v4i32(i8*, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4.v4f32(i8*, i32) nounwind readonly
diff --git a/final/test/CodeGen/ARM/vlddup.ll b/final/test/CodeGen/ARM/vlddup.ll
new file mode 100644
index 00000000000..d0e9ac3ad3c
--- /dev/null
+++ b/final/test/CodeGen/ARM/vlddup.ll
@@ -0,0 +1,212 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vld1dupi8(i8* %A) nounwind {
+;CHECK: vld1dupi8:
+;Check the (default) alignment value.
+;CHECK: vld1.8 {d16[]}, [r0]
+	%tmp1 = load i8* %A, align 8
+	%tmp2 = insertelement <8 x i8> undef, i8 %tmp1, i32 0
+	%tmp3 = shufflevector <8 x i8> %tmp2, <8 x i8> undef, <8 x i32> zeroinitializer
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vld1dupi16(i16* %A) nounwind {
+;CHECK: vld1dupi16:
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vld1.16 {d16[]}, [r0, :16]
+	%tmp1 = load i16* %A, align 8
+	%tmp2 = insertelement <4 x i16> undef, i16 %tmp1, i32 0
+	%tmp3 = shufflevector <4 x i16> %tmp2, <4 x i16> undef, <4 x i32> zeroinitializer
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vld1dupi32(i32* %A) nounwind {
+;CHECK: vld1dupi32:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vld1.32 {d16[]}, [r0, :32]
+	%tmp1 = load i32* %A, align 8
+	%tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0
+	%tmp3 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
+        ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vld1dupf(float* %A) nounwind {
+;CHECK: vld1dupf:
+;CHECK: vld1.32 {d16[]}, [r0]
+	%tmp0 = load float* %A
+        %tmp1 = insertelement <2 x float> undef, float %tmp0, i32 0
+        %tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer
+        ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vld1dupQi8(i8* %A) nounwind {
+;CHECK: vld1dupQi8:
+;Check the (default) alignment value.
+;CHECK: vld1.8 {d16[], d17[]}, [r0]
+	%tmp1 = load i8* %A, align 8
+	%tmp2 = insertelement <16 x i8> undef, i8 %tmp1, i32 0
+	%tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <16 x i32> zeroinitializer
+        ret <16 x i8> %tmp3
+}
+
+define <4 x float> @vld1dupQf(float* %A) nounwind {
+;CHECK: vld1dupQf:
+;CHECK: vld1.32 {d16[], d17[]}, [r0]
+        %tmp0 = load float* %A
+        %tmp1 = insertelement <4 x float> undef, float %tmp0, i32 0
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer
+        ret <4 x float> %tmp2
+}
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int4x16x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int2x32x2_t = type { <2 x i32>, <2 x i32> }
+
+define <8 x i8> @vld2dupi8(i8* %A) nounwind {
+;CHECK: vld2dupi8:
+;Check the (default) alignment value.
+;CHECK: vld2.8 {d16[], d17[]}, [r0]
+	%tmp0 = tail call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, i32 0, i32 1)
+	%tmp1 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 0
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp0, 1
+	%tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <8 x i32> zeroinitializer
+        %tmp5 = add <8 x i8> %tmp2, %tmp4
+        ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vld2dupi16(i16* %A) nounwind {
+;CHECK: vld2dupi16:
+;Check that a power-of-two alignment smaller than the total size of the memory
+;being loaded is ignored.
+;CHECK: vld2.16 {d16[], d17[]}, [r0]
+	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+        %tmp5 = add <4 x i16> %tmp2, %tmp4
+        ret <4 x i16> %tmp5
+}
+
+;Check for a post-increment updating load. 
+define <4 x i16> @vld2dupi16_update(i16** %ptr) nounwind {
+;CHECK: vld2dupi16_update:
+;CHECK: vld2.16 {d16[], d17[]}, [r1]!
+	%A = load i16** %ptr
+	%tmp0 = tail call %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, i32 0, i32 2)
+	%tmp1 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int4x16x2_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp5 = add <4 x i16> %tmp2, %tmp4
+	%tmp6 = getelementptr i16* %A, i32 2
+	store i16* %tmp6, i16** %ptr
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vld2dupi32(i32* %A) nounwind {
+;CHECK: vld2dupi32:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld2.32 {d16[], d17[]}, [r0, :64]
+	%tmp0 = tail call %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32* %A, <2 x i32> undef, <2 x i32> undef, i32 0, i32 16)
+	%tmp1 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 0
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int2x32x2_t %tmp0, 1
+	%tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer
+        %tmp5 = add <2 x i32> %tmp2, %tmp4
+        ret <2 x i32> %tmp5
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int4x16x2_t @llvm.arm.neon.vld2lane.v4i16(i16*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int2x32x2_t @llvm.arm.neon.vld2lane.v2i32(i32*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+
+;Check for a post-increment updating load with register increment.
+define <8 x i8> @vld3dupi8_update(i8** %ptr, i32 %inc) nounwind {
+;CHECK: vld3dupi8_update:
+;CHECK: vld3.8 {d16[], d17[], d18[]}, [r2], r1
+	%A = load i8** %ptr
+	%tmp0 = tail call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> undef, <8 x i8> undef, <8 x i8> undef, i32 0, i32 8)
+	%tmp1 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 0
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 1
+	%tmp4 = shufflevector <8 x i8> %tmp3, <8 x i8> undef, <8 x i32> zeroinitializer
+	%tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp0, 2
+	%tmp6 = shufflevector <8 x i8> %tmp5, <8 x i8> undef, <8 x i32> zeroinitializer
+	%tmp7 = add <8 x i8> %tmp2, %tmp4
+	%tmp8 = add <8 x i8> %tmp7, %tmp6
+	%tmp9 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp9, i8** %ptr
+	ret <8 x i8> %tmp8
+}
+
+define <4 x i16> @vld3dupi16(i16* %A) nounwind {
+;CHECK: vld3dupi16:
+;Check the (default) alignment value. VLD3 does not support alignment.
+;CHECK: vld3.16 {d16[], d17[], d18[]}, [r0]
+	%tmp0 = tail call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 8)
+	%tmp1 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp0, 2
+	%tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer
+        %tmp7 = add <4 x i16> %tmp2, %tmp4
+        %tmp8 = add <4 x i16> %tmp7, %tmp6
+        ret <4 x i16> %tmp8
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+
+;Check for a post-increment updating load.
+define <4 x i16> @vld4dupi16_update(i16** %ptr) nounwind {
+;CHECK: vld4dupi16_update:
+;CHECK: vld4.16 {d16[], d17[], d18[], d19[]}, [r1]!
+	%A = load i16** %ptr
+	%tmp0 = tail call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16* %A, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, <4 x i16> undef, i32 0, i32 1)
+	%tmp1 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 0
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 1
+	%tmp4 = shufflevector <4 x i16> %tmp3, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 2
+	%tmp6 = shufflevector <4 x i16> %tmp5, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp7 = extractvalue %struct.__neon_int16x4x4_t %tmp0, 3
+	%tmp8 = shufflevector <4 x i16> %tmp7, <4 x i16> undef, <4 x i32> zeroinitializer
+	%tmp9 = add <4 x i16> %tmp2, %tmp4
+	%tmp10 = add <4 x i16> %tmp6, %tmp8
+	%tmp11 = add <4 x i16> %tmp9, %tmp10
+	%tmp12 = getelementptr i16* %A, i32 4
+	store i16* %tmp12, i16** %ptr
+	ret <4 x i16> %tmp11
+}
+
+define <2 x i32> @vld4dupi32(i32* %A) nounwind {
+;CHECK: vld4dupi32:
+;Check the alignment value.  An 8-byte alignment is allowed here even though
+;it is smaller than the total size of the memory being loaded.
+;CHECK: vld4.32 {d16[], d17[], d18[], d19[]}, [r0, :64]
+	%tmp0 = tail call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32* %A, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, <2 x i32> undef, i32 0, i32 8)
+	%tmp1 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 0
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> zeroinitializer
+	%tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 1
+	%tmp4 = shufflevector <2 x i32> %tmp3, <2 x i32> undef, <2 x i32> zeroinitializer
+	%tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 2
+	%tmp6 = shufflevector <2 x i32> %tmp5, <2 x i32> undef, <2 x i32> zeroinitializer
+	%tmp7 = extractvalue %struct.__neon_int32x2x4_t %tmp0, 3
+	%tmp8 = shufflevector <2 x i32> %tmp7, <2 x i32> undef, <2 x i32> zeroinitializer
+        %tmp9 = add <2 x i32> %tmp2, %tmp4
+        %tmp10 = add <2 x i32> %tmp6, %tmp8
+        %tmp11 = add <2 x i32> %tmp9, %tmp10
+        ret <2 x i32> %tmp11
+}
+
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i16*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i32*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
diff --git a/final/test/CodeGen/ARM/vldlane.ll b/final/test/CodeGen/ARM/vldlane.ll
new file mode 100644
index 00000000000..770ed071ac1
--- /dev/null
+++ b/final/test/CodeGen/ARM/vldlane.ll
@@ -0,0 +1,506 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld1lanei8:
+;Check the (default) alignment value.
+;CHECK: vld1.8 {d16[3]}, [r0]
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = load i8* %A, align 8
+	%tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 3
+        ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vld1lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld1lanei16:
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vld1.16 {d16[2]}, [r0, :16]
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = load i16* %A, align 8
+	%tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 2
+        ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vld1lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld1lanei32:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vld1.32 {d16[1]}, [r0, :32]
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = load i32* %A, align 8
+	%tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1
+        ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vld1lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld1lanef:
+;CHECK: vld1.32 {d16[1]}, [r0]
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = load float* %A, align 4
+	%tmp3 = insertelement <2 x float> %tmp1, float %tmp2, i32 1
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vld1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vld1laneQi8:
+;CHECK: vld1.8 {d17[1]}, [r0]
+	%tmp1 = load <16 x i8>* %B
+	%tmp2 = load i8* %A, align 8
+	%tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 9
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vld1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld1laneQi16:
+;CHECK: vld1.16 {d17[1]}, [r0, :16]
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = load i16* %A, align 8
+	%tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 5
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vld1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld1laneQi32:
+;CHECK: vld1.32 {d17[1]}, [r0, :32]
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = load i32* %A, align 8
+	%tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 3
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vld1laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld1laneQf:
+;CHECK: vld1.32 {d16[0]}, [r0]
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = load float* %A
+	%tmp3 = insertelement <4 x float> %tmp1, float %tmp2, i32 0
+	ret <4 x float> %tmp3
+}
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
+
+define <8 x i8> @vld2lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld2lanei8:
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vld2.8 {d16[1], d17[1]}, [r0, :16]
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vld2lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld2lanei16:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vld2.16 {d16[1], d17[1]}, [r0, :32]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+        %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x4x2_t %tmp2, 1
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vld2lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld2lanei32:
+;CHECK: vld2.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+;Check for a post-increment updating load.
+define <2 x i32> @vld2lanei32_update(i32** %ptr, <2 x i32>* %B) nounwind {
+;CHECK: vld2lanei32_update:
+;CHECK: vld2.32 {d16[1], d17[1]}, [r1]!
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	%tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 0
+	%tmp4 = extractvalue %struct.__neon_int32x2x2_t %tmp2, 1
+	%tmp5 = add <2 x i32> %tmp3, %tmp4
+	%tmp6 = getelementptr i32* %A, i32 2
+	store i32* %tmp6, i32** %ptr
+	ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vld2lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld2lanef:
+;CHECK: vld2.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x2x2_t %tmp2, 1
+        %tmp5 = fadd <2 x float> %tmp3, %tmp4
+	ret <2 x float> %tmp5
+}
+
+define <8 x i16> @vld2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld2laneQi16:
+;Check the (default) alignment.
+;CHECK: vld2.16 {d17[1], d19[1]}, [r0]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x8x2_t %tmp2, 1
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vld2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld2laneQi32:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld2.32 {d17[0], d19[0]}, [r0, :64]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
+        %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 1
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vld2laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld2laneQf:
+;CHECK: vld2.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x4x2_t %tmp2, 1
+        %tmp5 = fadd <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
+
+declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x3_t = type { <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x3_t = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x3_t = type { <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x3_t = type { <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld3lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld3lanei8:
+;CHECK: vld3.8
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+        %tmp6 = add <8 x i8> %tmp3, %tmp4
+        %tmp7 = add <8 x i8> %tmp5, %tmp6
+	ret <8 x i8> %tmp7
+}
+
+define <4 x i16> @vld3lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld3lanei16:
+;Check the (default) alignment value.  VLD3 does not support alignment.
+;CHECK: vld3.16 {d16[1], d17[1], d18[1]}, [r0]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+        %tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x4x3_t %tmp2, 2
+        %tmp6 = add <4 x i16> %tmp3, %tmp4
+        %tmp7 = add <4 x i16> %tmp5, %tmp6
+	ret <4 x i16> %tmp7
+}
+
+define <2 x i32> @vld3lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld3lanei32:
+;CHECK: vld3.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x2x3_t %tmp2, 2
+        %tmp6 = add <2 x i32> %tmp3, %tmp4
+        %tmp7 = add <2 x i32> %tmp5, %tmp6
+	ret <2 x i32> %tmp7
+}
+
+define <2 x float> @vld3lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld3lanef:
+;CHECK: vld3.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = call %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x2x3_t %tmp2, 2
+        %tmp6 = fadd <2 x float> %tmp3, %tmp4
+        %tmp7 = fadd <2 x float> %tmp5, %tmp6
+	ret <2 x float> %tmp7
+}
+
+define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld3laneQi16:
+;Check the (default) alignment value.  VLD3 does not support alignment.
+;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r0]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
+        %tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
+        %tmp6 = add <8 x i16> %tmp3, %tmp4
+        %tmp7 = add <8 x i16> %tmp5, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+;Check for a post-increment updating load with register increment.
+define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
+;CHECK: vld3laneQi16_update:
+;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r2], r1
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 8)
+	%tmp3 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 0
+	%tmp4 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 1
+	%tmp5 = extractvalue %struct.__neon_int16x8x3_t %tmp2, 2
+	%tmp6 = add <8 x i16> %tmp3, %tmp4
+	%tmp7 = add <8 x i16> %tmp5, %tmp6
+	%tmp8 = getelementptr i16* %A, i32 %inc
+	store i16* %tmp8, i16** %ptr
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @vld3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld3laneQi32:
+;CHECK: vld3.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 3, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x4x3_t %tmp2, 2
+        %tmp6 = add <4 x i32> %tmp3, %tmp4
+        %tmp7 = add <4 x i32> %tmp5, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <4 x float> @vld3laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld3laneQf:
+;CHECK: vld3.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = call %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x4x3_t %tmp2, 2
+        %tmp6 = fadd <4 x float> %tmp3, %tmp4
+        %tmp7 = fadd <4 x float> %tmp5, %tmp6
+	ret <4 x float> %tmp7
+}
+
+declare %struct.__neon_int8x8x3_t @llvm.arm.neon.vld3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x3_t @llvm.arm.neon.vld3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x3_t @llvm.arm.neon.vld3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x3_t @llvm.arm.neon.vld3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>,  <8 x i8> }
+%struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> }
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> }
+
+%struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> }
+%struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> }
+%struct.__neon_float32x4x4_t = type { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
+
+define <8 x i8> @vld4lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vld4lanei8:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+        %tmp7 = add <8 x i8> %tmp3, %tmp4
+        %tmp8 = add <8 x i8> %tmp5, %tmp6
+        %tmp9 = add <8 x i8> %tmp7, %tmp8
+	ret <8 x i8> %tmp9
+}
+
+;Check for a post-increment updating load.
+define <8 x i8> @vld4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
+;CHECK: vld4lanei8_update:
+;CHECK: vld4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+	%A = load i8** %ptr
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+	%tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+	%tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+	%tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+	%tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+	%tmp7 = add <8 x i8> %tmp3, %tmp4
+	%tmp8 = add <8 x i8> %tmp5, %tmp6
+	%tmp9 = add <8 x i8> %tmp7, %tmp8
+	%tmp10 = getelementptr i8* %A, i32 4
+	store i8* %tmp10, i8** %ptr
+	ret <8 x i8> %tmp9
+}
+
+define <4 x i16> @vld4lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vld4lanei16:
+;Check that a power-of-two alignment smaller than the total size of the memory
+;being loaded is ignored.
+;CHECK: vld4.16 {d16[1], d17[1], d18[1], d19[1]}, [r0]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 4)
+        %tmp3 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int16x4x4_t %tmp2, 3
+        %tmp7 = add <4 x i16> %tmp3, %tmp4
+        %tmp8 = add <4 x i16> %tmp5, %tmp6
+        %tmp9 = add <4 x i16> %tmp7, %tmp8
+	ret <4 x i16> %tmp9
+}
+
+define <2 x i32> @vld4lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vld4lanei32:
+;Check the alignment value.  An 8-byte alignment is allowed here even though
+;it is smaller than the total size of the memory being loaded.
+;CHECK: vld4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :64]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 8)
+        %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int32x2x4_t %tmp2, 3
+        %tmp7 = add <2 x i32> %tmp3, %tmp4
+        %tmp8 = add <2 x i32> %tmp5, %tmp6
+        %tmp9 = add <2 x i32> %tmp7, %tmp8
+	ret <2 x i32> %tmp9
+}
+
+define <2 x float> @vld4lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vld4lanef:
+;CHECK: vld4.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	%tmp2 = call %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_float32x2x4_t %tmp2, 3
+        %tmp7 = fadd <2 x float> %tmp3, %tmp4
+        %tmp8 = fadd <2 x float> %tmp5, %tmp6
+        %tmp9 = fadd <2 x float> %tmp7, %tmp8
+	ret <2 x float> %tmp9
+}
+
+define <8 x i16> @vld4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vld4laneQi16:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vld4.16 {d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	%tmp2 = call %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1, i32 16)
+        %tmp3 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int16x8x4_t %tmp2, 3
+        %tmp7 = add <8 x i16> %tmp3, %tmp4
+        %tmp8 = add <8 x i16> %tmp5, %tmp6
+        %tmp9 = add <8 x i16> %tmp7, %tmp8
+	ret <8 x i16> %tmp9
+}
+
+define <4 x i32> @vld4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vld4laneQi32:
+;Check the (default) alignment.
+;CHECK: vld4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = call %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
+        %tmp3 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int32x4x4_t %tmp2, 3
+        %tmp7 = add <4 x i32> %tmp3, %tmp4
+        %tmp8 = add <4 x i32> %tmp5, %tmp6
+        %tmp9 = add <4 x i32> %tmp7, %tmp8
+	ret <4 x i32> %tmp9
+}
+
+define <4 x float> @vld4laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vld4laneQf:
+;CHECK: vld4.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	%tmp2 = call %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+        %tmp3 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_float32x4x4_t %tmp2, 3
+        %tmp7 = fadd <4 x float> %tmp3, %tmp4
+        %tmp8 = fadd <4 x float> %tmp5, %tmp6
+        %tmp9 = fadd <4 x float> %tmp7, %tmp8
+	ret <4 x float> %tmp9
+}
+
+declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind readonly
+declare %struct.__neon_int16x4x4_t @llvm.arm.neon.vld4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x2x4_t @llvm.arm.neon.vld4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind readonly
+
+declare %struct.__neon_int16x8x4_t @llvm.arm.neon.vld4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind readonly
+declare %struct.__neon_int32x4x4_t @llvm.arm.neon.vld4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind readonly
+declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind readonly
+
+; Radar 8776599: If one of the operands to a QQQQ REG_SEQUENCE is a register
+; in the QPR_VFP2 regclass, it needs to be copied to a QPR regclass because
+; we don't currently have a QQQQ_VFP2 super-regclass.  (The "0" for the low
+; part of %ins67 is supposed to be loaded by a VLDRS instruction in this test.)
+define void @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind {
+;CHECK: test_qqqq_regsequence_subreg
+;CHECK: vld3.16
+  %tmp63 = extractvalue [6 x i64] %b, 5
+  %tmp64 = zext i64 %tmp63 to i128
+  %tmp65 = shl i128 %tmp64, 64
+  %ins67 = or i128 %tmp65, 0
+  %tmp78 = bitcast i128 %ins67 to <8 x i16>
+  %vld3_lane = tail call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> %tmp78, i32 1, i32 2)
+  call void @llvm.trap()
+  unreachable
+}
+
+declare void @llvm.trap() nounwind
diff --git a/final/test/CodeGen/ARM/vminmax.ll b/final/test/CodeGen/ARM/vminmax.ll
new file mode 100644
index 00000000000..e3527c1a4d9
--- /dev/null
+++ b/final/test/CodeGen/ARM/vminmax.ll
@@ -0,0 +1,293 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmins8:
+;CHECK: vmin.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmins16:
+;CHECK: vmin.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmins32:
+;CHECK: vmin.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vminu8:
+;CHECK: vmin.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vminu16:
+;CHECK: vmin.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vminu32:
+;CHECK: vmin.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vminf32:
+;CHECK: vmin.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vminQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vminQs8:
+;CHECK: vmin.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vminQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vminQs16:
+;CHECK: vmin.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vminQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vminQs32:
+;CHECK: vmin.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vminQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vminQu8:
+;CHECK: vmin.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vminQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vminQu16:
+;CHECK: vmin.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vminQu32:
+;CHECK: vmin.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vminQf32:
+;CHECK: vmin.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <8 x i8> @vmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmaxs8:
+;CHECK: vmax.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmaxs16:
+;CHECK: vmax.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmaxs32:
+;CHECK: vmax.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmaxu8:
+;CHECK: vmax.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmaxu16:
+;CHECK: vmax.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmaxu32:
+;CHECK: vmax.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vmaxf32:
+;CHECK: vmax.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vmaxQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmaxQs8:
+;CHECK: vmax.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmaxQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmaxQs16:
+;CHECK: vmax.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmaxQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmaxQs32:
+;CHECK: vmax.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <16 x i8> @vmaxQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmaxQu8:
+;CHECK: vmax.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmaxQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmaxQu16:
+;CHECK: vmax.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmaxQu32:
+;CHECK: vmax.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vmaxQf32:
+;CHECK: vmax.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vmla.ll b/final/test/CodeGen/ARM/vmla.ll
new file mode 100644
index 00000000000..9c6b210be79
--- /dev/null
+++ b/final/test/CodeGen/ARM/vmla.ll
@@ -0,0 +1,215 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vmlai8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
+;CHECK: vmlai8:
+;CHECK: vmla.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = mul <8 x i8> %tmp2, %tmp3
+	%tmp5 = add <8 x i8> %tmp1, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vmlai16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlai16:
+;CHECK: vmla.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = mul <4 x i16> %tmp2, %tmp3
+	%tmp5 = add <4 x i16> %tmp1, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vmlai32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlai32:
+;CHECK: vmla.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = mul <2 x i32> %tmp2, %tmp3
+	%tmp5 = add <2 x i32> %tmp1, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vmlaf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
+;CHECK: vmlaf32:
+;CHECK: vmla.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = load <2 x float>* %C
+	%tmp4 = fmul <2 x float> %tmp2, %tmp3
+	%tmp5 = fadd <2 x float> %tmp1, %tmp4
+	ret <2 x float> %tmp5
+}
+
+define <16 x i8> @vmlaQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
+;CHECK: vmlaQi8:
+;CHECK: vmla.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = load <16 x i8>* %C
+	%tmp4 = mul <16 x i8> %tmp2, %tmp3
+	%tmp5 = add <16 x i8> %tmp1, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vmlaQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vmlaQi16:
+;CHECK: vmla.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = load <8 x i16>* %C
+	%tmp4 = mul <8 x i16> %tmp2, %tmp3
+	%tmp5 = add <8 x i16> %tmp1, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vmlaQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vmlaQi32:
+;CHECK: vmla.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = load <4 x i32>* %C
+	%tmp4 = mul <4 x i32> %tmp2, %tmp3
+	%tmp5 = add <4 x i32> %tmp1, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vmlaQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+;CHECK: vmlaQf32:
+;CHECK: vmla.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = load <4 x float>* %C
+	%tmp4 = fmul <4 x float> %tmp2, %tmp3
+	%tmp5 = fadd <4 x float> %tmp1, %tmp4
+	ret <4 x float> %tmp5
+}
+
+define <8 x i16> @vmlals8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlals8:
+;CHECK: vmlal.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
+	%tmp6 = mul <8 x i16> %tmp4, %tmp5
+	%tmp7 = add <8 x i16> %tmp1, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @vmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlals16:
+;CHECK: vmlal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
+	%tmp6 = mul <4 x i32> %tmp4, %tmp5
+	%tmp7 = add <4 x i32> %tmp1, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @vmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlals32:
+;CHECK: vmlal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
+	%tmp6 = mul <2 x i64> %tmp4, %tmp5
+	%tmp7 = add <2 x i64> %tmp1, %tmp6
+	ret <2 x i64> %tmp7
+}
+
+define <8 x i16> @vmlalu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlalu8:
+;CHECK: vmlal.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
+	%tmp6 = mul <8 x i16> %tmp4, %tmp5
+	%tmp7 = add <8 x i16> %tmp1, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @vmlalu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlalu16:
+;CHECK: vmlal.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
+	%tmp6 = mul <4 x i32> %tmp4, %tmp5
+	%tmp7 = add <4 x i32> %tmp1, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @vmlalu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlalu32:
+;CHECK: vmlal.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
+	%tmp6 = mul <2 x i64> %tmp4, %tmp5
+	%tmp7 = add <2 x i64> %tmp1, %tmp6
+	ret <2 x i64> %tmp7
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_lanes16
+; CHECK: vmlal.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = sext <4 x i16> %arg1_int16x4_t to <4 x i32>
+  %2 = sext <4 x i16> %0 to <4 x i32>
+  %3 = mul <4 x i32> %1, %2
+  %4 = add <4 x i32> %arg0_int32x4_t, %3
+  ret <4 x i32> %4
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_lanes32
+; CHECK: vmlal.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = sext <2 x i32> %arg1_int32x2_t to <2 x i64>
+  %2 = sext <2 x i32> %0 to <2 x i64>
+  %3 = mul <2 x i64> %1, %2
+  %4 = add <2 x i64> %arg0_int64x2_t, %3
+  ret <2 x i64> %4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlal_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_laneu16
+; CHECK: vmlal.u16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = zext <4 x i16> %arg1_uint16x4_t to <4 x i32>
+  %2 = zext <4 x i16> %0 to <4 x i32>
+  %3 = mul <4 x i32> %1, %2
+  %4 = add <4 x i32> %arg0_uint32x4_t, %3
+  ret <4 x i32> %4
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlal_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlal_laneu32
+; CHECK: vmlal.u32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = zext <2 x i32> %arg1_uint32x2_t to <2 x i64>
+  %2 = zext <2 x i32> %0 to <2 x i64>
+  %3 = mul <2 x i64> %1, %2
+  %4 = add <2 x i64> %arg0_uint64x2_t, %3
+  ret <2 x i64> %4
+}
diff --git a/final/test/CodeGen/ARM/vmls.ll b/final/test/CodeGen/ARM/vmls.ll
new file mode 100644
index 00000000000..65e7fe41bb3
--- /dev/null
+++ b/final/test/CodeGen/ARM/vmls.ll
@@ -0,0 +1,215 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vmlsi8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8> * %C) nounwind {
+;CHECK: vmlsi8:
+;CHECK: vmls.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = mul <8 x i8> %tmp2, %tmp3
+	%tmp5 = sub <8 x i8> %tmp1, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vmlsi16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlsi16:
+;CHECK: vmls.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = mul <4 x i16> %tmp2, %tmp3
+	%tmp5 = sub <4 x i16> %tmp1, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vmlsi32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlsi32:
+;CHECK: vmls.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = mul <2 x i32> %tmp2, %tmp3
+	%tmp5 = sub <2 x i32> %tmp1, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vmlsf32(<2 x float>* %A, <2 x float>* %B, <2 x float>* %C) nounwind {
+;CHECK: vmlsf32:
+;CHECK: vmls.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = load <2 x float>* %C
+	%tmp4 = fmul <2 x float> %tmp2, %tmp3
+	%tmp5 = fsub <2 x float> %tmp1, %tmp4
+	ret <2 x float> %tmp5
+}
+
+define <16 x i8> @vmlsQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8> * %C) nounwind {
+;CHECK: vmlsQi8:
+;CHECK: vmls.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = load <16 x i8>* %C
+	%tmp4 = mul <16 x i8> %tmp2, %tmp3
+	%tmp5 = sub <16 x i8> %tmp1, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vmlsQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
+;CHECK: vmlsQi16:
+;CHECK: vmls.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = load <8 x i16>* %C
+	%tmp4 = mul <8 x i16> %tmp2, %tmp3
+	%tmp5 = sub <8 x i16> %tmp1, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vmlsQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
+;CHECK: vmlsQi32:
+;CHECK: vmls.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = load <4 x i32>* %C
+	%tmp4 = mul <4 x i32> %tmp2, %tmp3
+	%tmp5 = sub <4 x i32> %tmp1, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vmlsQf32(<4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+;CHECK: vmlsQf32:
+;CHECK: vmls.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = load <4 x float>* %C
+	%tmp4 = fmul <4 x float> %tmp2, %tmp3
+	%tmp5 = fsub <4 x float> %tmp1, %tmp4
+	ret <4 x float> %tmp5
+}
+
+define <8 x i16> @vmlsls8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlsls8:
+;CHECK: vmlsl.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = sext <8 x i8> %tmp3 to <8 x i16>
+	%tmp6 = mul <8 x i16> %tmp4, %tmp5
+	%tmp7 = sub <8 x i16> %tmp1, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @vmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlsls16:
+;CHECK: vmlsl.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = sext <4 x i16> %tmp3 to <4 x i32>
+	%tmp6 = mul <4 x i32> %tmp4, %tmp5
+	%tmp7 = sub <4 x i32> %tmp1, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @vmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlsls32:
+;CHECK: vmlsl.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = sext <2 x i32> %tmp3 to <2 x i64>
+	%tmp6 = mul <2 x i64> %tmp4, %tmp5
+	%tmp7 = sub <2 x i64> %tmp1, %tmp6
+	ret <2 x i64> %tmp7
+}
+
+define <8 x i16> @vmlslu8(<8 x i16>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vmlslu8:
+;CHECK: vmlsl.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = zext <8 x i8> %tmp3 to <8 x i16>
+	%tmp6 = mul <8 x i16> %tmp4, %tmp5
+	%tmp7 = sub <8 x i16> %tmp1, %tmp6
+	ret <8 x i16> %tmp7
+}
+
+define <4 x i32> @vmlslu16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vmlslu16:
+;CHECK: vmlsl.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = zext <4 x i16> %tmp3 to <4 x i32>
+	%tmp6 = mul <4 x i32> %tmp4, %tmp5
+	%tmp7 = sub <4 x i32> %tmp1, %tmp6
+	ret <4 x i32> %tmp7
+}
+
+define <2 x i64> @vmlslu32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vmlslu32:
+;CHECK: vmlsl.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = zext <2 x i32> %tmp3 to <2 x i64>
+	%tmp6 = mul <2 x i64> %tmp4, %tmp5
+	%tmp7 = sub <2 x i64> %tmp1, %tmp6
+	ret <2 x i64> %tmp7
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_lanes16
+; CHECK: vmlsl.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = sext <4 x i16> %arg1_int16x4_t to <4 x i32>
+  %2 = sext <4 x i16> %0 to <4 x i32>
+  %3 = mul <4 x i32> %1, %2
+  %4 = sub <4 x i32> %arg0_int32x4_t, %3
+  ret <4 x i32> %4
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_lanes32
+; CHECK: vmlsl.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = sext <2 x i32> %arg1_int32x2_t to <2 x i64>
+  %2 = sext <2 x i32> %0 to <2 x i64>
+  %3 = mul <2 x i64> %1, %2
+  %4 = sub <2 x i64> %arg0_int64x2_t, %3
+  ret <2 x i64> %4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmlsl_laneu16(<4 x i32> %arg0_uint32x4_t, <4 x i16> %arg1_uint16x4_t, <4 x i16> %arg2_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_laneu16
+; CHECK: vmlsl.u16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = zext <4 x i16> %arg1_uint16x4_t to <4 x i32>
+  %2 = zext <4 x i16> %0 to <4 x i32>
+  %3 = mul <4 x i32> %1, %2
+  %4 = sub <4 x i32> %arg0_uint32x4_t, %3
+  ret <4 x i32> %4
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmlsl_laneu32(<2 x i64> %arg0_uint64x2_t, <2 x i32> %arg1_uint32x2_t, <2 x i32> %arg2_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmlsl_laneu32
+; CHECK: vmlsl.u32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = zext <2 x i32> %arg1_uint32x2_t to <2 x i64>
+  %2 = zext <2 x i32> %0 to <2 x i64>
+  %3 = mul <2 x i64> %1, %2
+  %4 = sub <2 x i64> %arg0_uint64x2_t, %3
+  ret <2 x i64> %4
+}
diff --git a/final/test/CodeGen/ARM/vmov.ll b/final/test/CodeGen/ARM/vmov.ll
new file mode 100644
index 00000000000..a86be32bd20
--- /dev/null
+++ b/final/test/CodeGen/ARM/vmov.ll
@@ -0,0 +1,355 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @v_movi8() nounwind {
+;CHECK: v_movi8:
+;CHECK: vmov.i8 d{{.*}}, #0x8
+	ret <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+}
+
+define <4 x i16> @v_movi16a() nounwind {
+;CHECK: v_movi16a:
+;CHECK: vmov.i16 d{{.*}}, #0x10
+	ret <4 x i16> < i16 16, i16 16, i16 16, i16 16 >
+}
+
+define <4 x i16> @v_movi16b() nounwind {
+;CHECK: v_movi16b:
+;CHECK: vmov.i16 d{{.*}}, #0x1000
+	ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 >
+}
+
+define <4 x i16> @v_mvni16a() nounwind {
+;CHECK: v_mvni16a:
+;CHECK: vmvn.i16 d{{.*}}, #0x10
+	ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 >
+}
+
+define <4 x i16> @v_mvni16b() nounwind {
+;CHECK: v_mvni16b:
+;CHECK: vmvn.i16 d{{.*}}, #0x1000
+	ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 >
+}
+
+define <2 x i32> @v_movi32a() nounwind {
+;CHECK: v_movi32a:
+;CHECK: vmov.i32 d{{.*}}, #0x20
+	ret <2 x i32> < i32 32, i32 32 >
+}
+
+define <2 x i32> @v_movi32b() nounwind {
+;CHECK: v_movi32b:
+;CHECK: vmov.i32 d{{.*}}, #0x2000
+	ret <2 x i32> < i32 8192, i32 8192 >
+}
+
+define <2 x i32> @v_movi32c() nounwind {
+;CHECK: v_movi32c:
+;CHECK: vmov.i32 d{{.*}}, #0x200000
+	ret <2 x i32> < i32 2097152, i32 2097152 >
+}
+
+define <2 x i32> @v_movi32d() nounwind {
+;CHECK: v_movi32d:
+;CHECK: vmov.i32 d{{.*}}, #0x20000000
+	ret <2 x i32> < i32 536870912, i32 536870912 >
+}
+
+define <2 x i32> @v_movi32e() nounwind {
+;CHECK: v_movi32e:
+;CHECK: vmov.i32 d{{.*}}, #0x20FF
+	ret <2 x i32> < i32 8447, i32 8447 >
+}
+
+define <2 x i32> @v_movi32f() nounwind {
+;CHECK: v_movi32f:
+;CHECK: vmov.i32 d{{.*}}, #0x20FFFF
+	ret <2 x i32> < i32 2162687, i32 2162687 >
+}
+
+define <2 x i32> @v_mvni32a() nounwind {
+;CHECK: v_mvni32a:
+;CHECK: vmvn.i32 d{{.*}}, #0x20
+	ret <2 x i32> < i32 4294967263, i32 4294967263 >
+}
+
+define <2 x i32> @v_mvni32b() nounwind {
+;CHECK: v_mvni32b:
+;CHECK: vmvn.i32 d{{.*}}, #0x2000
+	ret <2 x i32> < i32 4294959103, i32 4294959103 >
+}
+
+define <2 x i32> @v_mvni32c() nounwind {
+;CHECK: v_mvni32c:
+;CHECK: vmvn.i32 d{{.*}}, #0x200000
+	ret <2 x i32> < i32 4292870143, i32 4292870143 >
+}
+
+define <2 x i32> @v_mvni32d() nounwind {
+;CHECK: v_mvni32d:
+;CHECK: vmvn.i32 d{{.*}}, #0x20000000
+	ret <2 x i32> < i32 3758096383, i32 3758096383 >
+}
+
+define <2 x i32> @v_mvni32e() nounwind {
+;CHECK: v_mvni32e:
+;CHECK: vmvn.i32 d{{.*}}, #0x20FF
+	ret <2 x i32> < i32 4294958848, i32 4294958848 >
+}
+
+define <2 x i32> @v_mvni32f() nounwind {
+;CHECK: v_mvni32f:
+;CHECK: vmvn.i32 d{{.*}}, #0x20FFFF
+	ret <2 x i32> < i32 4292804608, i32 4292804608 >
+}
+
+define <1 x i64> @v_movi64() nounwind {
+;CHECK: v_movi64:
+;CHECK: vmov.i64 d{{.*}}, #0xFF0000FF0000FFFF
+	ret <1 x i64> < i64 18374687574888349695 >
+}
+
+define <16 x i8> @v_movQi8() nounwind {
+;CHECK: v_movQi8:
+;CHECK: vmov.i8 q{{.*}}, #0x8
+	ret <16 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+}
+
+define <8 x i16> @v_movQi16a() nounwind {
+;CHECK: v_movQi16a:
+;CHECK: vmov.i16 q{{.*}}, #0x10
+	ret <8 x i16> < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+}
+
+define <8 x i16> @v_movQi16b() nounwind {
+;CHECK: v_movQi16b:
+;CHECK: vmov.i16 q{{.*}}, #0x1000
+	ret <8 x i16> < i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096, i16 4096 >
+}
+
+define <4 x i32> @v_movQi32a() nounwind {
+;CHECK: v_movQi32a:
+;CHECK: vmov.i32 q{{.*}}, #0x20
+	ret <4 x i32> < i32 32, i32 32, i32 32, i32 32 >
+}
+
+define <4 x i32> @v_movQi32b() nounwind {
+;CHECK: v_movQi32b:
+;CHECK: vmov.i32 q{{.*}}, #0x2000
+	ret <4 x i32> < i32 8192, i32 8192, i32 8192, i32 8192 >
+}
+
+define <4 x i32> @v_movQi32c() nounwind {
+;CHECK: v_movQi32c:
+;CHECK: vmov.i32 q{{.*}}, #0x200000
+	ret <4 x i32> < i32 2097152, i32 2097152, i32 2097152, i32 2097152 >
+}
+
+define <4 x i32> @v_movQi32d() nounwind {
+;CHECK: v_movQi32d:
+;CHECK: vmov.i32 q{{.*}}, #0x20000000
+	ret <4 x i32> < i32 536870912, i32 536870912, i32 536870912, i32 536870912 >
+}
+
+define <4 x i32> @v_movQi32e() nounwind {
+;CHECK: v_movQi32e:
+;CHECK: vmov.i32 q{{.*}}, #0x20FF
+	ret <4 x i32> < i32 8447, i32 8447, i32 8447, i32 8447 >
+}
+
+define <4 x i32> @v_movQi32f() nounwind {
+;CHECK: v_movQi32f:
+;CHECK: vmov.i32 q{{.*}}, #0x20FFFF
+	ret <4 x i32> < i32 2162687, i32 2162687, i32 2162687, i32 2162687 >
+}
+
+define <2 x i64> @v_movQi64() nounwind {
+;CHECK: v_movQi64:
+;CHECK: vmov.i64 q{{.*}}, #0xFF0000FF0000FFFF
+	ret <2 x i64> < i64 18374687574888349695, i64 18374687574888349695 >
+}
+
+; Check for correct assembler printing for immediate values.
+%struct.int8x8_t = type { <8 x i8> }
+define void @vdupn128(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
+entry:
+;CHECK: vdupn128:
+;CHECK: vmov.i8 d{{.*}}, #0x80
+  %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
+  store <8 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>, <8 x i8>* %0, align 8
+  ret void
+}
+
+define void @vdupnneg75(%struct.int8x8_t* noalias nocapture sret %agg.result) nounwind {
+entry:
+;CHECK: vdupnneg75:
+;CHECK: vmov.i8 d{{.*}}, #0xB5
+  %0 = getelementptr inbounds %struct.int8x8_t* %agg.result, i32 0, i32 0 ; <<8 x i8>*> [#uses=1]
+  store <8 x i8> <i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75, i8 -75>, <8 x i8>* %0, align 8
+  ret void
+}
+
+define <8 x i16> @vmovls8(<8 x i8>* %A) nounwind {
+;CHECK: vmovls8:
+;CHECK: vmovl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vmovls16(<4 x i16>* %A) nounwind {
+;CHECK: vmovls16:
+;CHECK: vmovl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vmovls32(<2 x i32>* %A) nounwind {
+;CHECK: vmovls32:
+;CHECK: vmovl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vmovlu8(<8 x i8>* %A) nounwind {
+;CHECK: vmovlu8:
+;CHECK: vmovl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vmovlu16(<4 x i16>* %A) nounwind {
+;CHECK: vmovlu16:
+;CHECK: vmovl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vmovlu32(<2 x i32>* %A) nounwind {
+;CHECK: vmovlu32:
+;CHECK: vmovl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
+;CHECK: vmovni16:
+;CHECK: vmovn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = trunc <8 x i16> %tmp1 to <8 x i8>
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
+;CHECK: vmovni32:
+;CHECK: vmovn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
+;CHECK: vmovni64:
+;CHECK: vmovn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = trunc <2 x i64> %tmp1 to <2 x i32>
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovns16:
+;CHECK: vqmovn.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovns32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovns32:
+;CHECK: vqmovn.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovns64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovns64:
+;CHECK: vqmovn.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqmovnu16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovnu16:
+;CHECK: vqmovn.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovnu32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovnu32:
+;CHECK: vqmovn.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovnu64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovnu64:
+;CHECK: vqmovn.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqmovuns16(<8 x i16>* %A) nounwind {
+;CHECK: vqmovuns16:
+;CHECK: vqmovun.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqmovuns32(<4 x i32>* %A) nounwind {
+;CHECK: vqmovuns32:
+;CHECK: vqmovun.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqmovuns64(<2 x i64>* %A) nounwind {
+;CHECK: vqmovuns64:
+;CHECK: vqmovun.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqmovns.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqmovnu.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64>) nounwind readnone
+
+; Truncating vector stores are not supported.  The following should not crash.
+; Radar 8598391.
+define void @noTruncStore(<4 x i32>* %a, <4 x i16>* %b) nounwind {
+;CHECK: vmovn
+  %tmp1 = load <4 x i32>* %a, align 16
+  %tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
+  store <4 x i16> %tmp2, <4 x i16>* %b, align 8
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/vmul.ll b/final/test/CodeGen/ARM/vmul.ll
new file mode 100644
index 00000000000..ee033caa00d
--- /dev/null
+++ b/final/test/CodeGen/ARM/vmul.ll
@@ -0,0 +1,341 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmuli8:
+;CHECK: vmul.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = mul <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vmuli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmuli16:
+;CHECK: vmul.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = mul <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vmuli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmuli32:
+;CHECK: vmul.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = mul <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vmulf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vmulf32:
+;CHECK: vmul.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fmul <2 x float> %tmp1, %tmp2
+	ret <2 x float> %tmp3
+}
+
+define <8 x i8> @vmulp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmulp8:
+;CHECK: vmul.p8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <16 x i8> @vmulQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmulQi8:
+;CHECK: vmul.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = mul <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vmulQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vmulQi16:
+;CHECK: vmul.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = mul <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vmulQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vmulQi32:
+;CHECK: vmul.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = mul <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <4 x float> @vmulQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vmulQf32:
+;CHECK: vmul.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = fmul <4 x float> %tmp1, %tmp2
+	ret <4 x float> %tmp3
+}
+
+define <16 x i8> @vmulQp8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vmulQp8:
+;CHECK: vmul.p8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vmulp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8>  @llvm.arm.neon.vmulp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+
+define arm_aapcs_vfpcc <2 x float> @test_vmul_lanef32(<2 x float> %arg0_float32x2_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanef32:
+; CHECK: vmul.f32 d0, d0, d1[0]
+  %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <2 x i32> zeroinitializer ; <<2 x float>> [#uses=1]
+  %1 = fmul <2 x float> %0, %arg0_float32x2_t     ; <<2 x float>> [#uses=1]
+  ret <2 x float> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vmul_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanes16:
+; CHECK: vmul.i16 d0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses$
+  %1 = mul <4 x i16> %0, %arg0_int16x4_t          ; <<4 x i16>> [#uses=1]
+  ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vmul_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmul_lanes32:
+; CHECK: vmul.i32 d0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = mul <2 x i32> %0, %arg0_int32x2_t          ; <<2 x i32>> [#uses=1]
+  ret <2 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vmulQ_lanef32(<4 x float> %arg0_float32x4_t, <2 x float> %arg1_float32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanef32:
+; CHECK: vmul.f32 q0, q0, d2[1]
+  %0 = shufflevector <2 x float> %arg1_float32x2_t, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>$
+  %1 = fmul <4 x float> %0, %arg0_float32x4_t     ; <<4 x float>> [#uses=1]
+  ret <4 x float> %1
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vmulQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanes16:
+; CHECK: vmul.i16 q0, q0, d2[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %1 = mul <8 x i16> %0, %arg0_int16x8_t          ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmulQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmulQ_lanes32:
+; CHECK: vmul.i32 q0, q0, d2[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses$
+  %1 = mul <4 x i32> %0, %arg0_int32x4_t          ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmulls8:
+;CHECK: vmull.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
+	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = mul <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmulls16:
+;CHECK: vmull.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
+	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = mul <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmulls32:
+;CHECK: vmull.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
+	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = mul <2 x i64> %tmp3, %tmp4
+	ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmullu8:
+;CHECK: vmull.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
+	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = mul <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vmullu16:
+;CHECK: vmull.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
+	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = mul <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vmullu32:
+;CHECK: vmull.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
+	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = mul <2 x i64> %tmp3, %tmp4
+	ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vmullp8:
+;CHECK: vmull.p8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes16
+; CHECK: vmull.s16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = sext <4 x i16> %arg0_int16x4_t to <4 x i32>
+  %2 = sext <4 x i16> %0 to <4 x i32>
+  %3 = mul <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_lanes32
+; CHECK: vmull.s32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = sext <2 x i32> %arg0_int32x2_t to <2 x i64>
+  %2 = sext <2 x i32> %0 to <2 x i64>
+  %3 = mul <2 x i64> %1, %2
+  ret <2 x i64> %3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu16
+; CHECK: vmull.u16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = zext <4 x i16> %arg0_uint16x4_t to <4 x i32>
+  %2 = zext <4 x i16> %0 to <4 x i32>
+  %3 = mul <4 x i32> %1, %2
+  ret <4 x i32> %3
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vmull_laneu32
+; CHECK: vmull.u32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = zext <2 x i32> %arg0_uint32x2_t to <2 x i64>
+  %2 = zext <2 x i32> %0 to <2 x i64>
+  %3 = mul <2 x i64> %1, %2
+  ret <2 x i64> %3
+}
+
+declare <8 x i16>  @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+
+
+; Radar 8687140
+; VMULL needs to recognize BUILD_VECTORs with sign/zero-extended elements.
+
+define <8 x i16> @vmull_extvec_s8(<8 x i8> %arg) nounwind {
+; CHECK: vmull_extvec_s8
+; CHECK: vmull.s8
+  %tmp3 = sext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12, i16 -12>
+  ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @vmull_extvec_u8(<8 x i8> %arg) nounwind {
+; CHECK: vmull_extvec_u8
+; CHECK: vmull.u8
+  %tmp3 = zext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12, i16 12>
+  ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @vmull_noextvec_s8(<8 x i8> %arg) nounwind {
+; Do not use VMULL if the BUILD_VECTOR element values are too big.
+; CHECK: vmull_noextvec_s8
+; CHECK: vmovl.s8
+; CHECK: vmul.i16
+  %tmp3 = sext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999, i16 -999>
+  ret <8 x i16> %tmp4
+}
+
+define <8 x i16> @vmull_noextvec_u8(<8 x i8> %arg) nounwind {
+; Do not use VMULL if the BUILD_VECTOR element values are too big.
+; CHECK: vmull_noextvec_u8
+; CHECK: vmovl.u8
+; CHECK: vmul.i16
+  %tmp3 = zext <8 x i8> %arg to <8 x i16>
+  %tmp4 = mul <8 x i16> %tmp3, <i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999, i16 999>
+  ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vmull_extvec_s16(<4 x i16> %arg) nounwind {
+; CHECK: vmull_extvec_s16
+; CHECK: vmull.s16
+  %tmp3 = sext <4 x i16> %arg to <4 x i32>
+  %tmp4 = mul <4 x i32> %tmp3, <i32 -12, i32 -12, i32 -12, i32 -12>
+  ret <4 x i32> %tmp4
+}
+
+define <4 x i32> @vmull_extvec_u16(<4 x i16> %arg) nounwind {
+; CHECK: vmull_extvec_u16
+; CHECK: vmull.u16
+  %tmp3 = zext <4 x i16> %arg to <4 x i32>
+  %tmp4 = mul <4 x i32> %tmp3, <i32 1234, i32 1234, i32 1234, i32 1234>
+  ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vmull_extvec_s32(<2 x i32> %arg) nounwind {
+; CHECK: vmull_extvec_s32
+; CHECK: vmull.s32
+  %tmp3 = sext <2 x i32> %arg to <2 x i64>
+  %tmp4 = mul <2 x i64> %tmp3, <i64 -1234, i64 -1234>
+  ret <2 x i64> %tmp4
+}
+
+define <2 x i64> @vmull_extvec_u32(<2 x i32> %arg) nounwind {
+; CHECK: vmull_extvec_u32
+; CHECK: vmull.u32
+  %tmp3 = zext <2 x i32> %arg to <2 x i64>
+  %tmp4 = mul <2 x i64> %tmp3, <i64 1234, i64 1234>
+  ret <2 x i64> %tmp4
+}
diff --git a/final/test/CodeGen/ARM/vneg.ll b/final/test/CodeGen/ARM/vneg.ll
new file mode 100644
index 00000000000..4a10732458e
--- /dev/null
+++ b/final/test/CodeGen/ARM/vneg.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vnegs8(<8 x i8>* %A) nounwind {
+;CHECK: vnegs8:
+;CHECK: vneg.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = sub <8 x i8> zeroinitializer, %tmp1
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vnegs16(<4 x i16>* %A) nounwind {
+;CHECK: vnegs16:
+;CHECK: vneg.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = sub <4 x i16> zeroinitializer, %tmp1
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vnegs32(<2 x i32>* %A) nounwind {
+;CHECK: vnegs32:
+;CHECK: vneg.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = sub <2 x i32> zeroinitializer, %tmp1
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @vnegf32(<2 x float>* %A) nounwind {
+;CHECK: vnegf32:
+;CHECK: vneg.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = fsub <2 x float> < float -0.000000e+00, float -0.000000e+00 >, %tmp1
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @vnegQs8(<16 x i8>* %A) nounwind {
+;CHECK: vnegQs8:
+;CHECK: vneg.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = sub <16 x i8> zeroinitializer, %tmp1
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vnegQs16(<8 x i16>* %A) nounwind {
+;CHECK: vnegQs16:
+;CHECK: vneg.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = sub <8 x i16> zeroinitializer, %tmp1
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vnegQs32(<4 x i32>* %A) nounwind {
+;CHECK: vnegQs32:
+;CHECK: vneg.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = sub <4 x i32> zeroinitializer, %tmp1
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @vnegQf32(<4 x float>* %A) nounwind {
+;CHECK: vnegQf32:
+;CHECK: vneg.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1
+	ret <4 x float> %tmp2
+}
+
+define <8 x i8> @vqnegs8(<8 x i8>* %A) nounwind {
+;CHECK: vqnegs8:
+;CHECK: vqneg.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %tmp1)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqnegs16(<4 x i16>* %A) nounwind {
+;CHECK: vqnegs16:
+;CHECK: vqneg.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqnegs32(<2 x i32>* %A) nounwind {
+;CHECK: vqnegs32:
+;CHECK: vqneg.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <16 x i8> @vqnegQs8(<16 x i8>* %A) nounwind {
+;CHECK: vqnegQs8:
+;CHECK: vqneg.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %tmp1)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqnegQs16(<8 x i16>* %A) nounwind {
+;CHECK: vqnegQs16:
+;CHECK: vqneg.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqnegQs32(<4 x i32>* %A) nounwind {
+;CHECK: vqnegQs32:
+;CHECK: vqneg.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqneg.v8i8(<8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vpadal.ll b/final/test/CodeGen/ARM/vpadal.ll
new file mode 100644
index 00000000000..7296e936cd7
--- /dev/null
+++ b/final/test/CodeGen/ARM/vpadal.ll
@@ -0,0 +1,125 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <4 x i16> @vpadals8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpadals8:
+;CHECK: vpadal.s8
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpadals16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpadals16:
+;CHECK: vpadal.s16
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vpadals32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpadals32:
+;CHECK: vpadal.s32
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <4 x i16> @vpadalu8(<4 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpadalu8:
+;CHECK: vpadal.u8
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %tmp1, <8 x i8> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpadalu16(<2 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpadalu16:
+;CHECK: vpadal.u16
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %tmp1, <4 x i16> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vpadalu32(<1 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpadalu32:
+;CHECK: vpadal.u32
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %tmp1, <2 x i32> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i16> @vpadalQs8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vpadalQs8:
+;CHECK: vpadal.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vpadalQs16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vpadalQs16:
+;CHECK: vpadal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vpadalQs32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vpadalQs32:
+;CHECK: vpadal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @vpadalQu8(<8 x i16>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vpadalQu8:
+;CHECK: vpadal.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %tmp1, <16 x i8> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vpadalQu16(<4 x i32>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vpadalQu16:
+;CHECK: vpadal.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %tmp1, <8 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vpadalQu32(<2 x i64>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vpadalQu32:
+;CHECK: vpadal.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %tmp1, <4 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16>, <8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32>, <4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64>, <2 x i32>) nounwind readnone
+
+declare <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16>, <8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32>, <4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16>, <16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32>, <8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64>, <4 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16>, <16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32>, <8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64>, <4 x i32>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vpadd.ll b/final/test/CodeGen/ARM/vpadd.ll
new file mode 100644
index 00000000000..21255739451
--- /dev/null
+++ b/final/test/CodeGen/ARM/vpadd.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpaddi8:
+;CHECK: vpadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpaddi16:
+;CHECK: vpadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpaddi32:
+;CHECK: vpadd.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpaddf32:
+;CHECK: vpadd.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define <4 x i16> @vpaddls8(<8 x i8>* %A) nounwind {
+;CHECK: vpaddls8:
+;CHECK: vpaddl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vpaddls16(<4 x i16>* %A) nounwind {
+;CHECK: vpaddls16:
+;CHECK: vpaddl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vpaddls32(<2 x i32>* %A) nounwind {
+;CHECK: vpaddls32:
+;CHECK: vpaddl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %tmp1)
+	ret <1 x i64> %tmp2
+}
+
+define <4 x i16> @vpaddlu8(<8 x i8>* %A) nounwind {
+;CHECK: vpaddlu8:
+;CHECK: vpaddl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %tmp1)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vpaddlu16(<4 x i16>* %A) nounwind {
+;CHECK: vpaddlu16:
+;CHECK: vpaddl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vpaddlu32(<2 x i32>* %A) nounwind {
+;CHECK: vpaddlu32:
+;CHECK: vpaddl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %tmp1)
+	ret <1 x i64> %tmp2
+}
+
+define <8 x i16> @vpaddlQs8(<16 x i8>* %A) nounwind {
+;CHECK: vpaddlQs8:
+;CHECK: vpaddl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vpaddlQs16(<8 x i16>* %A) nounwind {
+;CHECK: vpaddlQs16:
+;CHECK: vpaddl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vpaddlQs32(<4 x i32>* %A) nounwind {
+;CHECK: vpaddlQs32:
+;CHECK: vpaddl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %tmp1)
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vpaddlQu8(<16 x i8>* %A) nounwind {
+;CHECK: vpaddlQu8:
+;CHECK: vpaddl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %tmp1)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vpaddlQu16(<8 x i16>* %A) nounwind {
+;CHECK: vpaddlQu16:
+;CHECK: vpaddl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind {
+;CHECK: vpaddlQu32:
+;CHECK: vpaddl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %tmp1)
+	ret <2 x i64> %tmp2
+}
+
+declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone
+
+declare <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vpminmax.ll b/final/test/CodeGen/ARM/vpminmax.ll
new file mode 100644
index 00000000000..b75bcc99f6b
--- /dev/null
+++ b/final/test/CodeGen/ARM/vpminmax.ll
@@ -0,0 +1,147 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vpmins8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmins8:
+;CHECK: vpmin.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmins16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmins16:
+;CHECK: vpmin.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmins32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmins32:
+;CHECK: vpmin.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vpminu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpminu8:
+;CHECK: vpmin.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpminu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpminu16:
+;CHECK: vpmin.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpminu32:
+;CHECK: vpmin.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpminf32:
+;CHECK: vpmin.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vpmins.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
+
+define <8 x i8> @vpmaxs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmaxs8:
+;CHECK: vpmax.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmaxs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmaxs16:
+;CHECK: vpmax.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmaxs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmaxs32:
+;CHECK: vpmax.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @vpmaxu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vpmaxu8:
+;CHECK: vpmax.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vpmaxu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vpmaxu16:
+;CHECK: vpmax.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vpmaxu32:
+;CHECK: vpmax.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <2 x float> @vpmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vpmaxf32:
+;CHECK: vpmax.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vpmaxs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vqadd.ll b/final/test/CodeGen/ARM/vqadd.ll
new file mode 100644
index 00000000000..a1669b60ab5
--- /dev/null
+++ b/final/test/CodeGen/ARM/vqadd.ll
@@ -0,0 +1,165 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vqadds8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqadds8:
+;CHECK: vqadd.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqadds16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqadds16:
+;CHECK: vqadd.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqadds32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqadds32:
+;CHECK: vqadd.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqadds64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqadds64:
+;CHECK: vqadd.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vqaddu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqaddu8:
+;CHECK: vqadd.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqaddu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqaddu16:
+;CHECK: vqadd.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqaddu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqaddu32:
+;CHECK: vqadd.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqaddu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqaddu64:
+;CHECK: vqadd.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vqaddQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqaddQs8:
+;CHECK: vqadd.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqaddQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqaddQs16:
+;CHECK: vqadd.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqaddQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqaddQs32:
+;CHECK: vqadd.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqaddQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqaddQs64:
+;CHECK: vqadd.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vqaddQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqaddQu8:
+;CHECK: vqadd.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqaddQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqaddQu16:
+;CHECK: vqadd.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqaddQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqaddQu32:
+;CHECK: vqadd.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqaddQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqaddQu64:
+;CHECK: vqadd.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vqdmul.ll b/final/test/CodeGen/ARM/vqdmul.ll
new file mode 100644
index 00000000000..8dcc7f73633
--- /dev/null
+++ b/final/test/CodeGen/ARM/vqdmul.ll
@@ -0,0 +1,281 @@
+; RUN: llc -mattr=+neon < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-elf"
+
+define <4 x i16> @vqdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqdmulhs16:
+;CHECK: vqdmulh.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqdmulhs32:
+;CHECK: vqdmulh.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i16> @vqdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqdmulhQs16:
+;CHECK: vqdmulh.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqdmulhQs32:
+;CHECK: vqdmulh.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulhQ_lanes16
+; CHECK: vqdmulh.s16 q0, q0, d2[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
+  %1 = tail call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulhQ_lanes32
+; CHECK: vqdmulh.s32 q0, q0, d2[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vqdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulh_lanes16
+; CHECK: vqdmulh.s16 d0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
+  ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vqdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmulh_lanes32
+; CHECK: vqdmulh.s32 d0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
+  ret <2 x i32> %1
+}
+
+declare <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i16> @vqrdmulhs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrdmulhs16:
+;CHECK: vqrdmulh.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrdmulhs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrdmulhs32:
+;CHECK: vqrdmulh.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <8 x i16> @vqrdmulhQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrdmulhQs16:
+;CHECK: vqrdmulh.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrdmulhQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrdmulhQs32:
+;CHECK: vqrdmulh.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define arm_aapcs_vfpcc <8 x i16> @test_vqRdmulhQ_lanes16(<8 x i16> %arg0_int16x8_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulhQ_lanes16
+; CHECK: vqrdmulh.s16 q0, q0, d2[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> ; <<8 x i16>> [#uses=1]
+  %1 = tail call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %arg0_int16x8_t, <8 x i16> %0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %1
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqRdmulhQ_lanes32(<4 x i32> %arg0_int32x4_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulhQ_lanes32
+; CHECK: vqrdmulh.s32 q0, q0, d2[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i32>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i32> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <4 x i16> @test_vqRdmulh_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulh_lanes16
+; CHECK: vqrdmulh.s16 d0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i16>> [#uses=1]
+  ret <4 x i16> %1
+}
+
+define arm_aapcs_vfpcc <2 x i32> @test_vqRdmulh_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqRdmulh_lanes32
+; CHECK: vqrdmulh.s32 d0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i32>> [#uses=1]
+  ret <2 x i32> %1
+}
+
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqdmulls16:
+;CHECK: vqdmull.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqdmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqdmulls32:
+;CHECK: vqdmull.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmull_lanes16(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmull_lanes16
+; CHECK: vqdmull.s16 q0, d0, d1[1]
+  %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmull_lanes32
+; CHECK: vqdmull.s32 q0, d0, d1[1]
+  %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <4 x i32>  @llvm.arm.neon.vqdmull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqdmull.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmlals16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vqdmlals16:
+;CHECK: vqdmlal.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vqdmlals32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vqdmlals32:
+;CHECK: vqdmlal.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlal_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlal_lanes16
+; CHECK: vqdmlal.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlal.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlal_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlal_lanes32
+; CHECK: vqdmlal.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlal.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <4 x i32>  @llvm.arm.neon.vqdmlal.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqdmlal.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
+
+define <4 x i32> @vqdmlsls16(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
+;CHECK: vqdmlsls16:
+;CHECK: vqdmlsl.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = load <4 x i16>* %C
+	%tmp4 = call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %tmp1, <4 x i16> %tmp2, <4 x i16> %tmp3)
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vqdmlsls32(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
+;CHECK: vqdmlsls32:
+;CHECK: vqdmlsl.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = load <2 x i32>* %C
+	%tmp4 = call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %tmp1, <2 x i32> %tmp2, <2 x i32> %tmp3)
+	ret <2 x i64> %tmp4
+}
+
+define arm_aapcs_vfpcc <4 x i32> @test_vqdmlsl_lanes16(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %arg2_int16x4_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlsl_lanes16
+; CHECK: vqdmlsl.s16 q0, d2, d3[1]
+  %0 = shufflevector <4 x i16> %arg2_int16x4_t, <4 x i16> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x i16>> [#uses=1]
+  %1 = tail call <4 x i32> @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32> %arg0_int32x4_t, <4 x i16> %arg1_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %1
+}
+
+define arm_aapcs_vfpcc <2 x i64> @test_vqdmlsl_lanes32(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %arg2_int32x2_t) nounwind readnone {
+entry:
+; CHECK: test_vqdmlsl_lanes32
+; CHECK: vqdmlsl.s32 q0, d2, d3[1]
+  %0 = shufflevector <2 x i32> %arg2_int32x2_t, <2 x i32> undef, <2 x i32> <i32 1, i32 1> ; <<2 x i32>> [#uses=1]
+  %1 = tail call <2 x i64> @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64> %arg0_int64x2_t, <2 x i32> %arg1_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %1
+}
+
+declare <4 x i32>  @llvm.arm.neon.vqdmlsl.v4i32(<4 x i32>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64>  @llvm.arm.neon.vqdmlsl.v2i64(<2 x i64>, <2 x i32>, <2 x i32>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vqshl.ll b/final/test/CodeGen/ARM/vqshl.ll
new file mode 100644
index 00000000000..e4d29a337cf
--- /dev/null
+++ b/final/test/CodeGen/ARM/vqshl.ll
@@ -0,0 +1,531 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vqshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqshls8:
+;CHECK: vqshl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqshls16:
+;CHECK: vqshl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqshls32:
+;CHECK: vqshl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqshls64:
+;CHECK: vqshl.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vqshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqshlu8:
+;CHECK: vqshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqshlu16:
+;CHECK: vqshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqshlu32:
+;CHECK: vqshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqshlu64:
+;CHECK: vqshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vqshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqshlQs8:
+;CHECK: vqshl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqshlQs16:
+;CHECK: vqshl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqshlQs32:
+;CHECK: vqshl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqshlQs64:
+;CHECK: vqshl.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vqshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqshlQu8:
+;CHECK: vqshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqshlQu16:
+;CHECK: vqshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqshlQu32:
+;CHECK: vqshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqshlQu64:
+;CHECK: vqshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @vqshls_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshls_n8:
+;CHECK: vqshl.s8{{.*#7}}
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshls_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshls_n16:
+;CHECK: vqshl.s16{{.*#15}}
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshls_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshls_n32:
+;CHECK: vqshl.s32{{.*#31}}
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vqshls_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshls_n64:
+;CHECK: vqshl.s64{{.*#63}}
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
+	ret <1 x i64> %tmp2
+}
+
+define <8 x i8> @vqshlu_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshlu_n8:
+;CHECK: vqshl.u8{{.*#7}}
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshlu_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshlu_n16:
+;CHECK: vqshl.u16{{.*#15}}
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshlu_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshlu_n32:
+;CHECK: vqshl.u32{{.*#31}}
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vqshlu_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshlu_n64:
+;CHECK: vqshl.u64{{.*#63}}
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
+	ret <1 x i64> %tmp2
+}
+
+define <8 x i8> @vqshlsu_n8(<8 x i8>* %A) nounwind {
+;CHECK: vqshlsu_n8:
+;CHECK: vqshlu.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshlsu_n16(<4 x i16>* %A) nounwind {
+;CHECK: vqshlsu_n16:
+;CHECK: vqshlu.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshlsu_n32(<2 x i32>* %A) nounwind {
+;CHECK: vqshlsu_n32:
+;CHECK: vqshlu.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vqshlsu_n64(<1 x i64>* %A) nounwind {
+;CHECK: vqshlsu_n64:
+;CHECK: vqshlu.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vqshlQs_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQs_n8:
+;CHECK: vqshl.s8{{.*#7}}
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqshlQs_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQs_n16:
+;CHECK: vqshl.s16{{.*#15}}
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqshlQs_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQs_n32:
+;CHECK: vqshl.s32{{.*#31}}
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vqshlQs_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQs_n64:
+;CHECK: vqshl.s64{{.*#63}}
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
+	ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vqshlQu_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQu_n8:
+;CHECK: vqshl.u8{{.*#7}}
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqshlQu_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQu_n16:
+;CHECK: vqshl.u16{{.*#15}}
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqshlQu_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQu_n32:
+;CHECK: vqshl.u32{{.*#31}}
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vqshlQu_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQu_n64:
+;CHECK: vqshl.u64{{.*#63}}
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
+	ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vqshlQsu_n8(<16 x i8>* %A) nounwind {
+;CHECK: vqshlQsu_n8:
+;CHECK: vqshlu.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vqshlQsu_n16(<8 x i16>* %A) nounwind {
+;CHECK: vqshlQsu_n16:
+;CHECK: vqshlu.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vqshlQsu_n32(<4 x i32>* %A) nounwind {
+;CHECK: vqshlQsu_n32:
+;CHECK: vqshlu.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vqshlQsu_n64(<2 x i64>* %A) nounwind {
+;CHECK: vqshlQsu_n64:
+;CHECK: vqshlu.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
+	ret <2 x i64> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vqrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqrshls8:
+;CHECK: vqrshl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrshls16:
+;CHECK: vqrshl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrshls32:
+;CHECK: vqrshl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqrshls64:
+;CHECK: vqrshl.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vqrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqrshlu8:
+;CHECK: vqrshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqrshlu16:
+;CHECK: vqrshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqrshlu32:
+;CHECK: vqrshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqrshlu64:
+;CHECK: vqrshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vqrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqrshlQs8:
+;CHECK: vqrshl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrshlQs16:
+;CHECK: vqrshl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrshlQs32:
+;CHECK: vqrshl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqrshlQs64:
+;CHECK: vqrshl.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vqrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqrshlQu8:
+;CHECK: vqrshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqrshlQu16:
+;CHECK: vqrshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqrshlQu32:
+;CHECK: vqrshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqrshlQu64:
+;CHECK: vqrshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vqshrn.ll b/final/test/CodeGen/ARM/vqshrn.ll
new file mode 100644
index 00000000000..5da79432bb4
--- /dev/null
+++ b/final/test/CodeGen/ARM/vqshrn.ll
@@ -0,0 +1,169 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vqshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vqshrns8:
+;CHECK: vqshrn.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vqshrns16:
+;CHECK: vqshrn.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vqshrns32:
+;CHECK: vqshrn.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqshrnu8(<8 x i16>* %A) nounwind {
+;CHECK: vqshrnu8:
+;CHECK: vqshrn.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshrnu16(<4 x i32>* %A) nounwind {
+;CHECK: vqshrnu16:
+;CHECK: vqshrn.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshrnu32(<2 x i64>* %A) nounwind {
+;CHECK: vqshrnu32:
+;CHECK: vqshrn.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqshruns8(<8 x i16>* %A) nounwind {
+;CHECK: vqshruns8:
+;CHECK: vqshrun.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqshruns16(<4 x i32>* %A) nounwind {
+;CHECK: vqshruns16:
+;CHECK: vqshrun.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqshruns32(<2 x i64>* %A) nounwind {
+;CHECK: vqshruns32:
+;CHECK: vqshrun.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vqrshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshrns8:
+;CHECK: vqrshrn.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshrns16:
+;CHECK: vqrshrn.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshrns32:
+;CHECK: vqrshrn.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqrshrnu8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshrnu8:
+;CHECK: vqrshrn.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshrnu16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshrnu16:
+;CHECK: vqrshrn.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshrnu32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshrnu32:
+;CHECK: vqrshrn.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <8 x i8> @vqrshruns8(<8 x i16>* %A) nounwind {
+;CHECK: vqrshruns8:
+;CHECK: vqrshrun.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vqrshruns16(<4 x i32>* %A) nounwind {
+;CHECK: vqrshruns16:
+;CHECK: vqrshrun.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vqrshruns32(<2 x i64>* %A) nounwind {
+;CHECK: vqrshruns32:
+;CHECK: vqrshrun.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vqsub.ll b/final/test/CodeGen/ARM/vqsub.ll
new file mode 100644
index 00000000000..4231fca37e3
--- /dev/null
+++ b/final/test/CodeGen/ARM/vqsub.ll
@@ -0,0 +1,165 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vqsubs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqsubs8:
+;CHECK: vqsub.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqsubs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqsubs16:
+;CHECK: vqsub.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqsubs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqsubs32:
+;CHECK: vqsub.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqsubs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqsubs64:
+;CHECK: vqsub.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vqsubu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vqsubu8:
+;CHECK: vqsub.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vqsubu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vqsubu16:
+;CHECK: vqsub.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vqsubu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vqsubu32:
+;CHECK: vqsub.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vqsubu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vqsubu64:
+;CHECK: vqsub.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vqsubQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqsubQs8:
+;CHECK: vqsub.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqsubQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqsubQs16:
+;CHECK: vqsub.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqsubQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqsubQs32:
+;CHECK: vqsub.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqsubQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqsubQs64:
+;CHECK: vqsub.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vqsubQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vqsubQu8:
+;CHECK: vqsub.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vqsubQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vqsubQu16:
+;CHECK: vqsub.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vqsubQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vqsubQu32:
+;CHECK: vqsub.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vqsubQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vqsubQu64:
+;CHECK: vqsub.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vqsubs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vqsubu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vrec.ll b/final/test/CodeGen/ARM/vrec.ll
new file mode 100644
index 00000000000..99989e9d614
--- /dev/null
+++ b/final/test/CodeGen/ARM/vrec.ll
@@ -0,0 +1,119 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <2 x i32> @vrecpei32(<2 x i32>* %A) nounwind {
+;CHECK: vrecpei32:
+;CHECK: vrecpe.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
+;CHECK: vrecpeQi32:
+;CHECK: vrecpe.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
+;CHECK: vrecpef32:
+;CHECK: vrecpe.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1)
+	ret <2 x float> %tmp2
+}
+
+define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind {
+;CHECK: vrecpeQf32:
+;CHECK: vrecpe.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1)
+	ret <4 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
+
+define <2 x float> @vrecpsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vrecpsf32:
+;CHECK: vrecps.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @vrecpsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vrecpsQf32:
+;CHECK: vrecps.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+define <2 x i32> @vrsqrtei32(<2 x i32>* %A) nounwind {
+;CHECK: vrsqrtei32:
+;CHECK: vrsqrte.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %tmp1)
+	ret <2 x i32> %tmp2
+}
+
+define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
+;CHECK: vrsqrteQi32:
+;CHECK: vrsqrte.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %tmp1)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
+;CHECK: vrsqrtef32:
+;CHECK: vrsqrte.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1)
+	ret <2 x float> %tmp2
+}
+
+define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind {
+;CHECK: vrsqrteQf32:
+;CHECK: vrsqrte.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1)
+	ret <4 x float> %tmp2
+}
+
+declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>) nounwind readnone
+
+declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
+
+define <2 x float> @vrsqrtsf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vrsqrtsf32:
+;CHECK: vrsqrts.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	ret <2 x float> %tmp3
+}
+
+define <4 x float> @vrsqrtsQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vrsqrtsQf32:
+;CHECK: vrsqrts.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	ret <4 x float> %tmp3
+}
+
+declare <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vrev.ll b/final/test/CodeGen/ARM/vrev.ll
new file mode 100644
index 00000000000..f0f9e4e339b
--- /dev/null
+++ b/final/test/CodeGen/ARM/vrev.ll
@@ -0,0 +1,149 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @test_vrev64D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev64D8:
+;CHECK: vrev64.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @test_vrev64D16(<4 x i16>* %A) nounwind {
+;CHECK: test_vrev64D16:
+;CHECK: vrev64.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @test_vrev64D32(<2 x i32>* %A) nounwind {
+;CHECK: test_vrev64D32:
+;CHECK: vrev64.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
+	ret <2 x i32> %tmp2
+}
+
+define <2 x float> @test_vrev64Df(<2 x float>* %A) nounwind {
+;CHECK: test_vrev64Df:
+;CHECK: vrev64.32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
+	ret <2 x float> %tmp2
+}
+
+define <16 x i8> @test_vrev64Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev64Q8:
+;CHECK: vrev64.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @test_vrev64Q16(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev64Q16:
+;CHECK: vrev64.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @test_vrev64Q32(<4 x i32>* %A) nounwind {
+;CHECK: test_vrev64Q32:
+;CHECK: vrev64.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x i32> %tmp2
+}
+
+define <4 x float> @test_vrev64Qf(<4 x float>* %A) nounwind {
+;CHECK: test_vrev64Qf:
+;CHECK: vrev64.32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x float> %tmp2
+}
+
+define <8 x i8> @test_vrev32D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev32D8:
+;CHECK: vrev32.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @test_vrev32D16(<4 x i16>* %A) nounwind {
+;CHECK: test_vrev32D16:
+;CHECK: vrev32.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+	ret <4 x i16> %tmp2
+}
+
+define <16 x i8> @test_vrev32Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev32Q8:
+;CHECK: vrev32.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @test_vrev32Q16(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev32Q16:
+;CHECK: vrev32.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+	ret <8 x i16> %tmp2
+}
+
+define <8 x i8> @test_vrev16D8(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev16D8:
+;CHECK: vrev16.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+	ret <8 x i8> %tmp2
+}
+
+define <16 x i8> @test_vrev16Q8(<16 x i8>* %A) nounwind {
+;CHECK: test_vrev16Q8:
+;CHECK: vrev16.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+	ret <16 x i8> %tmp2
+}
+
+; Undef shuffle indices should not prevent matching to VREV:
+
+define <8 x i8> @test_vrev64D8_undef(<8 x i8>* %A) nounwind {
+;CHECK: test_vrev64D8_undef:
+;CHECK: vrev64.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
+	ret <8 x i8> %tmp2
+}
+
+define <8 x i16> @test_vrev32Q16_undef(<8 x i16>* %A) nounwind {
+;CHECK: test_vrev32Q16_undef:
+;CHECK: vrev32.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
+	ret <8 x i16> %tmp2
+}
+
+; A vcombine feeding a VREV should not obscure things.  Radar 8597007.
+
+define void @test_with_vcombine(<4 x float>* %v) nounwind {
+;CHECK: test_with_vcombine:
+;CHECK-NOT: vext
+;CHECK: vrev64.32
+  %tmp1 = load <4 x float>* %v, align 16
+  %tmp2 = bitcast <4 x float> %tmp1 to <2 x double>
+  %tmp3 = extractelement <2 x double> %tmp2, i32 0
+  %tmp4 = bitcast double %tmp3 to <2 x float>
+  %tmp5 = extractelement <2 x double> %tmp2, i32 1
+  %tmp6 = bitcast double %tmp5 to <2 x float>
+  %tmp7 = fadd <2 x float> %tmp6, %tmp6
+  %tmp8 = shufflevector <2 x float> %tmp4, <2 x float> %tmp7, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+  store <4 x float> %tmp8, <4 x float>* %v, align 16
+  ret void
+}
diff --git a/final/test/CodeGen/ARM/vshift.ll b/final/test/CodeGen/ARM/vshift.ll
new file mode 100644
index 00000000000..f3cbec7457d
--- /dev/null
+++ b/final/test/CodeGen/ARM/vshift.ll
@@ -0,0 +1,432 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshls8:
+;CHECK: vshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shl <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshls16:
+;CHECK: vshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shl <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshls32:
+;CHECK: vshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = shl <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshls64:
+;CHECK: vshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = shl <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
+;CHECK: vshli8:
+;CHECK: vshl.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = shl <8 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
+;CHECK: vshli16:
+;CHECK: vshl.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = shl <4 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
+;CHECK: vshli32:
+;CHECK: vshl.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = shl <2 x i32> %tmp1, < i32 31, i32 31 >
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
+;CHECK: vshli64:
+;CHECK: vshl.i64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = shl <1 x i64> %tmp1, < i64 63 >
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQs8:
+;CHECK: vshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shl <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQs16:
+;CHECK: vshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shl <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQs32:
+;CHECK: vshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shl <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQs64:
+;CHECK: vshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = shl <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
+;CHECK: vshlQi8:
+;CHECK: vshl.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = shl <16 x i8> %tmp1, < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
+;CHECK: vshlQi16:
+;CHECK: vshl.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = shl <8 x i16> %tmp1, < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
+;CHECK: vshlQi32:
+;CHECK: vshl.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = shl <4 x i32> %tmp1, < i32 31, i32 31, i32 31, i32 31 >
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
+;CHECK: vshlQi64:
+;CHECK: vshl.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = shl <2 x i64> %tmp1, < i64 63, i64 63 >
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i8> @vlshru8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vlshru8:
+;CHECK: vneg.s8
+;CHECK: vshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = lshr <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vlshru16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vlshru16:
+;CHECK: vneg.s16
+;CHECK: vshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = lshr <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vlshru32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vlshru32:
+;CHECK: vneg.s32
+;CHECK: vshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = lshr <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vlshru64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vlshru64:
+;CHECK: vsub.i64
+;CHECK: vshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = lshr <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vlshri8(<8 x i8>* %A) nounwind {
+;CHECK: vlshri8:
+;CHECK: vshr.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = lshr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vlshri16(<4 x i16>* %A) nounwind {
+;CHECK: vlshri16:
+;CHECK: vshr.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = lshr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vlshri32(<2 x i32>* %A) nounwind {
+;CHECK: vlshri32:
+;CHECK: vshr.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = lshr <2 x i32> %tmp1, < i32 32, i32 32 >
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vlshri64(<1 x i64>* %A) nounwind {
+;CHECK: vlshri64:
+;CHECK: vshr.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = lshr <1 x i64> %tmp1, < i64 64 >
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vlshrQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vlshrQu8:
+;CHECK: vneg.s8
+;CHECK: vshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = lshr <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vlshrQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vlshrQu16:
+;CHECK: vneg.s16
+;CHECK: vshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = lshr <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vlshrQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vlshrQu32:
+;CHECK: vneg.s32
+;CHECK: vshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = lshr <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vlshrQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vlshrQu64:
+;CHECK: vsub.i64
+;CHECK: vshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = lshr <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vlshrQi8(<16 x i8>* %A) nounwind {
+;CHECK: vlshrQi8:
+;CHECK: vshr.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = lshr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vlshrQi16(<8 x i16>* %A) nounwind {
+;CHECK: vlshrQi16:
+;CHECK: vshr.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = lshr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vlshrQi32(<4 x i32>* %A) nounwind {
+;CHECK: vlshrQi32:
+;CHECK: vshr.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = lshr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vlshrQi64(<2 x i64>* %A) nounwind {
+;CHECK: vlshrQi64:
+;CHECK: vshr.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = lshr <2 x i64> %tmp1, < i64 64, i64 64 >
+	ret <2 x i64> %tmp2
+}
+
+; Example that requires splitting and expanding a vector shift.
+define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
+entry:
+	%shr = lshr <2 x i64> %val, < i64 2, i64 2 >		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %shr
+}
+
+define <8 x i8> @vashrs8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vashrs8:
+;CHECK: vneg.s8
+;CHECK: vshl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = ashr <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vashrs16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vashrs16:
+;CHECK: vneg.s16
+;CHECK: vshl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = ashr <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vashrs32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vashrs32:
+;CHECK: vneg.s32
+;CHECK: vshl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = ashr <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vashrs64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vashrs64:
+;CHECK: vsub.i64
+;CHECK: vshl.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = ashr <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vashri8(<8 x i8>* %A) nounwind {
+;CHECK: vashri8:
+;CHECK: vshr.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = ashr <8 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vashri16(<4 x i16>* %A) nounwind {
+;CHECK: vashri16:
+;CHECK: vshr.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = ashr <4 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16 >
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vashri32(<2 x i32>* %A) nounwind {
+;CHECK: vashri32:
+;CHECK: vshr.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = ashr <2 x i32> %tmp1, < i32 32, i32 32 >
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vashri64(<1 x i64>* %A) nounwind {
+;CHECK: vashri64:
+;CHECK: vshr.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = ashr <1 x i64> %tmp1, < i64 64 >
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vashrQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vashrQs8:
+;CHECK: vneg.s8
+;CHECK: vshl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = ashr <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vashrQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vashrQs16:
+;CHECK: vneg.s16
+;CHECK: vshl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = ashr <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vashrQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vashrQs32:
+;CHECK: vneg.s32
+;CHECK: vshl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = ashr <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vashrQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vashrQs64:
+;CHECK: vsub.i64
+;CHECK: vshl.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = ashr <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vashrQi8(<16 x i8>* %A) nounwind {
+;CHECK: vashrQi8:
+;CHECK: vshr.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = ashr <16 x i8> %tmp1, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vashrQi16(<8 x i16>* %A) nounwind {
+;CHECK: vashrQi16:
+;CHECK: vshr.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = ashr <8 x i16> %tmp1, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vashrQi32(<4 x i32>* %A) nounwind {
+;CHECK: vashrQi32:
+;CHECK: vshr.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = ashr <4 x i32> %tmp1, < i32 32, i32 32, i32 32, i32 32 >
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vashrQi64(<2 x i64>* %A) nounwind {
+;CHECK: vashrQi64:
+;CHECK: vshr.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = ashr <2 x i64> %tmp1, < i64 64, i64 64 >
+	ret <2 x i64> %tmp2
+}
diff --git a/final/test/CodeGen/ARM/vshiftins.ll b/final/test/CodeGen/ARM/vshiftins.ll
new file mode 100644
index 00000000000..3a4f8574e39
--- /dev/null
+++ b/final/test/CodeGen/ARM/vshiftins.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vsli8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsli8:
+;CHECK: vsli.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vsli16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsli16:
+;CHECK: vsli.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vsli32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsli32:
+;CHECK: vsli.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 31, i32 31 >)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vsli64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsli64:
+;CHECK: vsli.64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 63 >)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vsliQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsliQ8:
+;CHECK: vsli.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vsliQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsliQ16:
+;CHECK: vsli.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsliQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsliQ32:
+;CHECK: vsli.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsliQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsliQ64:
+;CHECK: vsli.64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 63, i64 63 >)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @vsri8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsri8:
+;CHECK: vsri.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vsri16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsri16:
+;CHECK: vsri.16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vsri32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsri32:
+;CHECK: vsri.32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vsri64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsri64:
+;CHECK: vsri.64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, <1 x i64> < i64 -64 >)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vsriQ8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsriQ8:
+;CHECK: vsri.8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vsriQ16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsriQ16:
+;CHECK: vsri.16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsriQ32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsriQ32:
+;CHECK: vsri.32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsriQ64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsriQ64:
+;CHECK: vsri.64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
+	ret <2 x i64> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vshiftins.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64>, <1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vshl.ll b/final/test/CodeGen/ARM/vshl.ll
new file mode 100644
index 00000000000..818e71b8ff8
--- /dev/null
+++ b/final/test/CodeGen/ARM/vshl.ll
@@ -0,0 +1,654 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshls8:
+;CHECK: vshl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshls16:
+;CHECK: vshl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshls32:
+;CHECK: vshl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshls64:
+;CHECK: vshl.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vshlu8:
+;CHECK: vshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vshlu16:
+;CHECK: vshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vshlu32:
+;CHECK: vshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vshlu64:
+;CHECK: vshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQs8:
+;CHECK: vshl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQs16:
+;CHECK: vshl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQs32:
+;CHECK: vshl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQs64:
+;CHECK: vshl.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vshlQu8:
+;CHECK: vshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vshlQu16:
+;CHECK: vshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vshlQu32:
+;CHECK: vshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vshlQu64:
+;CHECK: vshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+; For left shifts by immediates, the signedness is irrelevant.
+; Test a mix of both signed and unsigned intrinsics.
+
+define <8 x i8> @vshli8(<8 x i8>* %A) nounwind {
+;CHECK: vshli8:
+;CHECK: vshl.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vshli16(<4 x i16>* %A) nounwind {
+;CHECK: vshli16:
+;CHECK: vshl.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vshli32(<2 x i32>* %A) nounwind {
+;CHECK: vshli32:
+;CHECK: vshl.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vshli64(<1 x i64>* %A) nounwind {
+;CHECK: vshli64:
+;CHECK: vshl.i64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 63 >)
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vshlQi8(<16 x i8>* %A) nounwind {
+;CHECK: vshlQi8:
+;CHECK: vshl.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vshlQi16(<8 x i16>* %A) nounwind {
+;CHECK: vshlQi16:
+;CHECK: vshl.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshlQi32(<4 x i32>* %A) nounwind {
+;CHECK: vshlQi32:
+;CHECK: vshl.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 31, i32 31, i32 31, i32 31 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshlQi64(<2 x i64>* %A) nounwind {
+;CHECK: vshlQi64:
+;CHECK: vshl.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 63, i64 63 >)
+	ret <2 x i64> %tmp2
+}
+
+; Right shift by immediate:
+
+define <8 x i8> @vshrs8(<8 x i8>* %A) nounwind {
+;CHECK: vshrs8:
+;CHECK: vshr.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vshrs16(<4 x i16>* %A) nounwind {
+;CHECK: vshrs16:
+;CHECK: vshr.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vshrs32(<2 x i32>* %A) nounwind {
+;CHECK: vshrs32:
+;CHECK: vshr.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vshrs64(<1 x i64>* %A) nounwind {
+;CHECK: vshrs64:
+;CHECK: vshr.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+	ret <1 x i64> %tmp2
+}
+
+define <8 x i8> @vshru8(<8 x i8>* %A) nounwind {
+;CHECK: vshru8:
+;CHECK: vshr.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vshru16(<4 x i16>* %A) nounwind {
+;CHECK: vshru16:
+;CHECK: vshr.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vshru32(<2 x i32>* %A) nounwind {
+;CHECK: vshru32:
+;CHECK: vshr.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vshru64(<1 x i64>* %A) nounwind {
+;CHECK: vshru64:
+;CHECK: vshr.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vshrQs8(<16 x i8>* %A) nounwind {
+;CHECK: vshrQs8:
+;CHECK: vshr.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vshrQs16(<8 x i16>* %A) nounwind {
+;CHECK: vshrQs16:
+;CHECK: vshr.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshrQs32(<4 x i32>* %A) nounwind {
+;CHECK: vshrQs32:
+;CHECK: vshr.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshrQs64(<2 x i64>* %A) nounwind {
+;CHECK: vshrQs64:
+;CHECK: vshr.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+	ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vshrQu8(<16 x i8>* %A) nounwind {
+;CHECK: vshrQu8:
+;CHECK: vshr.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vshrQu16(<8 x i16>* %A) nounwind {
+;CHECK: vshrQu16:
+;CHECK: vshr.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshrQu32(<4 x i32>* %A) nounwind {
+;CHECK: vshrQu32:
+;CHECK: vshr.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshrQu64(<2 x i64>* %A) nounwind {
+;CHECK: vshrQu64:
+;CHECK: vshr.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+	ret <2 x i64> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrshls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrshls8:
+;CHECK: vrshl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrshls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrshls16:
+;CHECK: vrshl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrshls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrshls32:
+;CHECK: vrshl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vrshls64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrshls64:
+;CHECK: vrshl.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <8 x i8> @vrshlu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrshlu8:
+;CHECK: vrshl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrshlu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrshlu16:
+;CHECK: vrshl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrshlu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrshlu32:
+;CHECK: vrshl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vrshlu64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrshlu64:
+;CHECK: vrshl.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+	ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @vrshlQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrshlQs8:
+;CHECK: vrshl.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrshlQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrshlQs16:
+;CHECK: vrshl.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrshlQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrshlQs32:
+;CHECK: vrshl.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vrshlQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrshlQs64:
+;CHECK: vrshl.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @vrshlQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrshlQu8:
+;CHECK: vrshl.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vrshlQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrshlQu16:
+;CHECK: vrshl.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vrshlQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrshlQu32:
+;CHECK: vrshl.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vrshlQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrshlQu64:
+;CHECK: vrshl.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @vrshrs8(<8 x i8>* %A) nounwind {
+;CHECK: vrshrs8:
+;CHECK: vrshr.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshrs16(<4 x i16>* %A) nounwind {
+;CHECK: vrshrs16:
+;CHECK: vrshr.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshrs32(<2 x i32>* %A) nounwind {
+;CHECK: vrshrs32:
+;CHECK: vrshr.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vrshrs64(<1 x i64>* %A) nounwind {
+;CHECK: vrshrs64:
+;CHECK: vrshr.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+	ret <1 x i64> %tmp2
+}
+
+define <8 x i8> @vrshru8(<8 x i8>* %A) nounwind {
+;CHECK: vrshru8:
+;CHECK: vrshr.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp1, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshru16(<4 x i16>* %A) nounwind {
+;CHECK: vrshru16:
+;CHECK: vrshr.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp1, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshru32(<2 x i32>* %A) nounwind {
+;CHECK: vrshru32:
+;CHECK: vrshr.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp1, <2 x i32> < i32 -32, i32 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+define <1 x i64> @vrshru64(<1 x i64>* %A) nounwind {
+;CHECK: vrshru64:
+;CHECK: vrshr.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp1, <1 x i64> < i64 -64 >)
+	ret <1 x i64> %tmp2
+}
+
+define <16 x i8> @vrshrQs8(<16 x i8>* %A) nounwind {
+;CHECK: vrshrQs8:
+;CHECK: vrshr.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vrshrQs16(<8 x i16>* %A) nounwind {
+;CHECK: vrshrQs16:
+;CHECK: vrshr.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vrshrQs32(<4 x i32>* %A) nounwind {
+;CHECK: vrshrQs32:
+;CHECK: vrshr.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vrshrQs64(<2 x i64>* %A) nounwind {
+;CHECK: vrshrQs64:
+;CHECK: vrshr.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+	ret <2 x i64> %tmp2
+}
+
+define <16 x i8> @vrshrQu8(<16 x i8>* %A) nounwind {
+;CHECK: vrshrQu8:
+;CHECK: vrshr.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp1, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+	ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @vrshrQu16(<8 x i16>* %A) nounwind {
+;CHECK: vrshrQu16:
+;CHECK: vrshr.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp1, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vrshrQu32(<4 x i32>* %A) nounwind {
+;CHECK: vrshrQu32:
+;CHECK: vrshr.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp1, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vrshrQu64(<2 x i64>* %A) nounwind {
+;CHECK: vrshrQu64:
+;CHECK: vrshr.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp1, <2 x i64> < i64 -64, i64 -64 >)
+	ret <2 x i64> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vshll.ll b/final/test/CodeGen/ARM/vshll.ll
new file mode 100644
index 00000000000..8e85b98f49b
--- /dev/null
+++ b/final/test/CodeGen/ARM/vshll.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i16> @vshlls8(<8 x i8>* %A) nounwind {
+;CHECK: vshlls8:
+;CHECK: vshll.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshlls16(<4 x i16>* %A) nounwind {
+;CHECK: vshlls16:
+;CHECK: vshll.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftls.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshlls32(<2 x i32>* %A) nounwind {
+;CHECK: vshlls32:
+;CHECK: vshll.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+	ret <2 x i64> %tmp2
+}
+
+define <8 x i16> @vshllu8(<8 x i8>* %A) nounwind {
+;CHECK: vshllu8:
+;CHECK: vshll.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftlu.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshllu16(<4 x i16>* %A) nounwind {
+;CHECK: vshllu16:
+;CHECK: vshll.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 15, i16 15, i16 15, i16 15 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshllu32(<2 x i32>* %A) nounwind {
+;CHECK: vshllu32:
+;CHECK: vshll.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftlu.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 31, i32 31 >)
+	ret <2 x i64> %tmp2
+}
+
+; The following tests use the maximum shift count, so the signedness is
+; irrelevant.  Test both signed and unsigned versions.
+define <8 x i16> @vshlli8(<8 x i8>* %A) nounwind {
+;CHECK: vshlli8:
+;CHECK: vshll.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = call <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8> %tmp1, <8 x i8> < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >)
+	ret <8 x i16> %tmp2
+}
+
+define <4 x i32> @vshlli16(<4 x i16>* %A) nounwind {
+;CHECK: vshlli16:
+;CHECK: vshll.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = call <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16> %tmp1, <4 x i16> < i16 16, i16 16, i16 16, i16 16 >)
+	ret <4 x i32> %tmp2
+}
+
+define <2 x i64> @vshlli32(<2 x i32>* %A) nounwind {
+;CHECK: vshlli32:
+;CHECK: vshll.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = call <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32> %tmp1, <2 x i32> < i32 32, i32 32 >)
+	ret <2 x i64> %tmp2
+}
+
+declare <8 x i16> @llvm.arm.neon.vshiftls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vshiftls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vshiftls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
+
+declare <8 x i16> @llvm.arm.neon.vshiftlu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vshiftlu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vshiftlu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vshrn.ll b/final/test/CodeGen/ARM/vshrn.ll
new file mode 100644
index 00000000000..e2544f424a2
--- /dev/null
+++ b/final/test/CodeGen/ARM/vshrn.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vshrns8:
+;CHECK: vshrn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vshrns16:
+;CHECK: vshrn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vshrns32:
+;CHECK: vshrn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
+;CHECK: vrshrns8:
+;CHECK: vrshrn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
+	ret <8 x i8> %tmp2
+}
+
+define <4 x i16> @vrshrns16(<4 x i32>* %A) nounwind {
+;CHECK: vrshrns16:
+;CHECK: vrshrn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
+	ret <4 x i16> %tmp2
+}
+
+define <2 x i32> @vrshrns32(<2 x i64>* %A) nounwind {
+;CHECK: vrshrns32:
+;CHECK: vrshrn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
+	ret <2 x i32> %tmp2
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vsra.ll b/final/test/CodeGen/ARM/vsra.ll
new file mode 100644
index 00000000000..acb672d00fa
--- /dev/null
+++ b/final/test/CodeGen/ARM/vsra.ll
@@ -0,0 +1,341 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsras8:
+;CHECK: vsra.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = ashr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+        %tmp4 = add <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsras16:
+;CHECK: vsra.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = ashr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 >
+        %tmp4 = add <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsras32:
+;CHECK: vsra.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = ashr <2 x i32> %tmp2, < i32 32, i32 32 >
+        %tmp4 = add <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @vsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsras64:
+;CHECK: vsra.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = ashr <1 x i64> %tmp2, < i64 64 >
+        %tmp4 = add <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsraQs8:
+;CHECK: vsra.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = ashr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+        %tmp4 = add <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsraQs16:
+;CHECK: vsra.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = ashr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+        %tmp4 = add <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsraQs32:
+;CHECK: vsra.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = ashr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 >
+        %tmp4 = add <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsraQs64:
+;CHECK: vsra.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = ashr <2 x i64> %tmp2, < i64 64, i64 64 >
+        %tmp4 = add <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @vsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsrau8:
+;CHECK: vsra.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = lshr <8 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+        %tmp4 = add <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsrau16:
+;CHECK: vsra.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = lshr <4 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16 >
+        %tmp4 = add <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsrau32:
+;CHECK: vsra.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = lshr <2 x i32> %tmp2, < i32 32, i32 32 >
+        %tmp4 = add <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @vsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsrau64:
+;CHECK: vsra.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = lshr <1 x i64> %tmp2, < i64 64 >
+        %tmp4 = add <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsraQu8:
+;CHECK: vsra.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = lshr <16 x i8> %tmp2, < i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8 >
+        %tmp4 = add <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsraQu16:
+;CHECK: vsra.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = lshr <8 x i16> %tmp2, < i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16 >
+        %tmp4 = add <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsraQu32:
+;CHECK: vsra.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = lshr <4 x i32> %tmp2, < i32 32, i32 32, i32 32, i32 32 >
+        %tmp4 = add <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsraQu64:
+;CHECK: vsra.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = lshr <2 x i64> %tmp2, < i64 64, i64 64 >
+        %tmp4 = add <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i8> @vrsras8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrsras8:
+;CHECK: vrsra.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+        %tmp4 = add <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vrsras16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrsras16:
+;CHECK: vrsra.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+        %tmp4 = add <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vrsras32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrsras32:
+;CHECK: vrsra.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
+        %tmp4 = add <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @vrsras64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrsras64:
+;CHECK: vrsra.s64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
+        %tmp4 = add <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <8 x i8> @vrsrau8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vrsrau8:
+;CHECK: vrsra.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %tmp2, <8 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+        %tmp4 = add <8 x i8> %tmp1, %tmp3
+	ret <8 x i8> %tmp4
+}
+
+define <4 x i16> @vrsrau16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vrsrau16:
+;CHECK: vrsra.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %tmp2, <4 x i16> < i16 -16, i16 -16, i16 -16, i16 -16 >)
+        %tmp4 = add <4 x i16> %tmp1, %tmp3
+	ret <4 x i16> %tmp4
+}
+
+define <2 x i32> @vrsrau32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vrsrau32:
+;CHECK: vrsra.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %tmp2, <2 x i32> < i32 -32, i32 -32 >)
+        %tmp4 = add <2 x i32> %tmp1, %tmp3
+	ret <2 x i32> %tmp4
+}
+
+define <1 x i64> @vrsrau64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vrsrau64:
+;CHECK: vrsra.u64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %tmp2, <1 x i64> < i64 -64 >)
+        %tmp4 = add <1 x i64> %tmp1, %tmp3
+	ret <1 x i64> %tmp4
+}
+
+define <16 x i8> @vrsraQs8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrsraQs8:
+;CHECK: vrsra.s8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+        %tmp4 = add <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vrsraQs16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsraQs16:
+;CHECK: vrsra.s16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+        %tmp4 = add <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vrsraQs32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsraQs32:
+;CHECK: vrsra.s32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+        %tmp4 = add <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vrsraQs64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsraQs64:
+;CHECK: vrsra.s64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
+        %tmp4 = add <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <16 x i8> @vrsraQu8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vrsraQu8:
+;CHECK: vrsra.u8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %tmp2, <16 x i8> < i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8, i8 -8 >)
+        %tmp4 = add <16 x i8> %tmp1, %tmp3
+	ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @vrsraQu16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsraQu16:
+;CHECK: vrsra.u16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %tmp2, <8 x i16> < i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16 >)
+        %tmp4 = add <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vrsraQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsraQu32:
+;CHECK: vrsra.u32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %tmp2, <4 x i32> < i32 -32, i32 -32, i32 -32, i32 -32 >)
+        %tmp4 = add <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vrsraQu64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsraQu64:
+;CHECK: vrsra.u64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %tmp2, <2 x i64> < i64 -64, i64 -64 >)
+        %tmp4 = add <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrshifts.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vrshiftu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vst1.ll b/final/test/CodeGen/ARM/vst1.ll
new file mode 100644
index 00000000000..364d44b7116
--- /dev/null
+++ b/final/test/CodeGen/ARM/vst1.ll
@@ -0,0 +1,130 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst1i8:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vst1.8 {d16}, [r0, :64]
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
+	ret void
+}
+
+define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst1i16:
+;CHECK: vst1.16
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst1i32:
+;CHECK: vst1.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst1f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst1f:
+;CHECK: vst1.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {
+;CHECK: vst1f_update:
+;CHECK: vst1.32 {d16}, [r1]!
+	%A = load float** %ptr
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
+	%tmp2 = getelementptr float* %A, i32 2
+	store float* %tmp2, float** %ptr
+	ret void
+}
+
+define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst1i64:
+;CHECK: vst1.64
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst1Qi8:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst1.8 {d16, d17}, [r0, :64]
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
+	ret void
+}
+
+define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst1Qi16:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst1.16 {d16, d17}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
+	ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst1Qi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
+;CHECK: vst1Qi16_update:
+;CHECK: vst1.16 {d16, d17}, [r1, :64], r2
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 8)
+	%tmp2 = getelementptr i16* %A, i32 %inc
+	store i16* %tmp2, i16** %ptr
+	ret void
+}
+
+define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst1Qi32:
+;CHECK: vst1.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst1Qf:
+;CHECK: vst1.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
+;CHECK: vst1Qi64:
+;CHECK: vst1.64
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = load <2 x i64>* %B
+	call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32) nounwind
+
+declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32) nounwind
diff --git a/final/test/CodeGen/ARM/vst2.ll b/final/test/CodeGen/ARM/vst2.ll
new file mode 100644
index 00000000000..915a84b6776
--- /dev/null
+++ b/final/test/CodeGen/ARM/vst2.ll
@@ -0,0 +1,122 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst2i8:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst2.8 {d16, d17}, [r0, :64]
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
+	ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst2i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
+;CHECK: vst2i8_update:
+;CHECK: vst2.8 {d16, d17}, [r1], r2
+	%A = load i8** %ptr
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 4)
+	%tmp2 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp2, i8** %ptr
+	ret void
+}
+
+define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst2i16:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst2.16 {d16, d17}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst2.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 32)
+	ret void
+}
+
+define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst2i32:
+;CHECK: vst2.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst2.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst2f:
+;CHECK: vst2.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst2.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst2i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst2i64:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst1.64 {d16, d17}, [r0, :128]
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 32)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst2i64_update(i64** %ptr, <1 x i64>* %B) nounwind {
+;CHECK: vst2i64_update:
+;CHECK: vst1.64 {d16, d17}, [r1, :64]!
+	%A = load i64** %ptr
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst2.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 8)
+	%tmp2 = getelementptr i64* %A, i32 2
+	store i64* %tmp2, i64** %ptr
+	ret void
+}
+
+define void @vst2Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst2Qi8:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst2.8 {d16, d17, d18, d19}, [r0, :64]
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst2.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 8)
+	ret void
+}
+
+define void @vst2Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst2Qi16:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst2.16 {d16, d17, d18, d19}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst2.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 16)
+	ret void
+}
+
+define void @vst2Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst2Qi32:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst2.32 {d16, d17, d18, d19}, [r0, :256]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst2.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 64)
+	ret void
+}
+
+define void @vst2Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst2Qf:
+;CHECK: vst2.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst2.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v1i64(i8*, <1 x i64>, <1 x i64>, i32) nounwind
+
+declare void @llvm.arm.neon.vst2.v16i8(i8*, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v8i16(i8*, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v4i32(i8*, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst2.v4f32(i8*, <4 x float>, <4 x float>, i32) nounwind
diff --git a/final/test/CodeGen/ARM/vst3.ll b/final/test/CodeGen/ARM/vst3.ll
new file mode 100644
index 00000000000..d262303bc60
--- /dev/null
+++ b/final/test/CodeGen/ARM/vst3.ll
@@ -0,0 +1,128 @@
+; RUN: llc < %s -march=arm -mattr=+neon -O0 | FileCheck %s
+
+define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst3i8:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;This test runs at -O0 so do not check for specific register numbers.
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
+	ret void
+}
+
+define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst3i16:
+;CHECK: vst3.16
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst3i32:
+;CHECK: vst3.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst3i32_update(i32** %ptr, <2 x i32>* %B) nounwind {
+;CHECK: vst3i32_update:
+;CHECK: vst3.32 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
+	%tmp2 = getelementptr i32* %A, i32 6
+	store i32* %tmp2, i32** %ptr
+	ret void
+}
+
+define void @vst3f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst3f:
+;CHECK: vst3.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst3i64:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;This test runs at -O0 so do not check for specific register numbers.
+;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16)
+	ret void
+}
+
+define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst3Qi8:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;This test runs at -O0 so do not check for specific register numbers.
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]!
+;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 32)
+	ret void
+}
+
+define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst3Qi16:
+;CHECK: vst3.16
+;CHECK: vst3.16
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind {
+;CHECK: vst3Qi16_update:
+;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	%tmp2 = getelementptr i16* %A, i32 24
+	store i16* %tmp2, i16** %ptr
+	ret void
+}
+
+define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst3Qi32:
+;CHECK: vst3.32
+;CHECK: vst3.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst3Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst3Qf:
+;CHECK: vst3.32
+;CHECK: vst3.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
+
+declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
diff --git a/final/test/CodeGen/ARM/vst4.ll b/final/test/CodeGen/ARM/vst4.ll
new file mode 100644
index 00000000000..e94acb66bf2
--- /dev/null
+++ b/final/test/CodeGen/ARM/vst4.ll
@@ -0,0 +1,127 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst4i8:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst4.8 {d16, d17, d18, d19}, [r0, :64]
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 8)
+	ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst4i8_update(i8** %ptr, <8 x i8>* %B, i32 %inc) nounwind {
+;CHECK: vst4i8_update:
+;CHECK: vst4.8 {d16, d17, d18, d19}, [r1, :128], r2
+	%A = load i8** %ptr
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 16)
+	%tmp2 = getelementptr i8* %A, i32 %inc
+	store i8* %tmp2, i8** %ptr
+	ret void
+}
+
+define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst4i16:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst4.16 {d16, d17, d18, d19}, [r0, :128]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst4.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 16)
+	ret void
+}
+
+define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst4i32:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst4.32 {d16, d17, d18, d19}, [r0, :256]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst4.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 32)
+	ret void
+}
+
+define void @vst4f(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst4f:
+;CHECK: vst4.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst4.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4i64(i64* %A, <1 x i64>* %B) nounwind {
+;CHECK: vst4i64:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst1.64 {d16, d17, d18, d19}, [r0, :256]
+	%tmp0 = bitcast i64* %A to i8*
+	%tmp1 = load <1 x i64>* %B
+	call void @llvm.arm.neon.vst4.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 64)
+	ret void
+}
+
+define void @vst4Qi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst4Qi8:
+;Check the alignment value.  Max for this instruction is 256 bits:
+;CHECK: vst4.8 {d16, d18, d20, d22}, [r0, :256]!
+;CHECK: vst4.8 {d17, d19, d21, d23}, [r0, :256]
+	%tmp1 = load <16 x i8>* %B
+	call void @llvm.arm.neon.vst4.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 64)
+	ret void
+}
+
+define void @vst4Qi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst4Qi16:
+;Check for no alignment specifier.
+;CHECK: vst4.16 {d16, d18, d20, d22}, [r0]!
+;CHECK: vst4.16 {d17, d19, d21, d23}, [r0]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst4.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4Qi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst4Qi32:
+;CHECK: vst4.32
+;CHECK: vst4.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst4.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
+	ret void
+}
+
+define void @vst4Qf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst4Qf:
+;CHECK: vst4.32
+;CHECK: vst4.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst4Qf_update(float** %ptr, <4 x float>* %B) nounwind {
+;CHECK: vst4Qf_update:
+;CHECK: vst4.32 {d16, d18, d20, d22}, [r1]!
+;CHECK: vst4.32 {d17, d19, d21, d23}, [r1]!
+	%A = load float** %ptr
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst4.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
+	%tmp2 = getelementptr float* %A, i32 16
+	store float* %tmp2, float** %ptr
+	ret void
+}
+
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
+
+declare void @llvm.arm.neon.vst4.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+declare void @llvm.arm.neon.vst4.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
diff --git a/final/test/CodeGen/ARM/vstlane.ll b/final/test/CodeGen/ARM/vstlane.ll
new file mode 100644
index 00000000000..d1bc15ad576
--- /dev/null
+++ b/final/test/CodeGen/ARM/vstlane.ll
@@ -0,0 +1,364 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst1lanei8:
+;Check the (default) alignment.
+;CHECK: vst1.8 {d16[3]}, [r0]
+	%tmp1 = load <8 x i8>* %B
+        %tmp2 = extractelement <8 x i8> %tmp1, i32 3
+        store i8 %tmp2, i8* %A, align 8
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst1lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
+;CHECK: vst1lanei8_update:
+;CHECK: vst1.8 {d16[3]}, [r2]!
+	%A = load i8** %ptr
+	%tmp1 = load <8 x i8>* %B
+	%tmp2 = extractelement <8 x i8> %tmp1, i32 3
+	store i8 %tmp2, i8* %A, align 8
+	%tmp3 = getelementptr i8* %A, i32 1
+	store i8* %tmp3, i8** %ptr
+	ret void
+}
+
+define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst1lanei16:
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vst1.16 {d16[2]}, [r0, :16]
+	%tmp1 = load <4 x i16>* %B
+        %tmp2 = extractelement <4 x i16> %tmp1, i32 2
+        store i16 %tmp2, i16* %A, align 8
+	ret void
+}
+
+define void @vst1lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst1lanei32:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vst1.32 {d16[1]}, [r0, :32]
+	%tmp1 = load <2 x i32>* %B
+        %tmp2 = extractelement <2 x i32> %tmp1, i32 1
+        store i32 %tmp2, i32* %A, align 8
+	ret void
+}
+
+define void @vst1lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst1lanef:
+;CHECK: vst1.32 {d16[1]}, [r0]
+	%tmp1 = load <2 x float>* %B
+        %tmp2 = extractelement <2 x float> %tmp1, i32 1
+        store float %tmp2, float* %A
+	ret void
+}
+
+define void @vst1laneQi8(i8* %A, <16 x i8>* %B) nounwind {
+;CHECK: vst1laneQi8:
+;CHECK: vst1.8 {d17[1]}, [r0]
+	%tmp1 = load <16 x i8>* %B
+        %tmp2 = extractelement <16 x i8> %tmp1, i32 9
+        store i8 %tmp2, i8* %A, align 8
+	ret void
+}
+
+define void @vst1laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst1laneQi16:
+;CHECK: vst1.16 {d17[1]}, [r0, :16]
+	%tmp1 = load <8 x i16>* %B
+        %tmp2 = extractelement <8 x i16> %tmp1, i32 5
+        store i16 %tmp2, i16* %A, align 8
+	ret void
+}
+
+define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst1laneQi32:
+;CHECK: vst1.32 {d17[1]}, [r0, :32]
+	%tmp1 = load <4 x i32>* %B
+        %tmp2 = extractelement <4 x i32> %tmp1, i32 3
+        store i32 %tmp2, i32* %A, align 8
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
+;CHECK: vst1laneQi32_update:
+;CHECK: vst1.32 {d17[1]}, [r1, :32]!
+	%A = load i32** %ptr
+	%tmp1 = load <4 x i32>* %B
+	%tmp2 = extractelement <4 x i32> %tmp1, i32 3
+	store i32 %tmp2, i32* %A, align 8
+	%tmp3 = getelementptr i32* %A, i32 1
+	store i32* %tmp3, i32** %ptr
+	ret void
+}
+
+define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst1laneQf:
+;CHECK: vst1.32 {d17[1]}, [r0]
+	%tmp1 = load <4 x float>* %B
+        %tmp2 = extractelement <4 x float> %tmp1, i32 3
+        store float %tmp2, float* %A
+	ret void
+}
+
+define void @vst2lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst2lanei8:
+;Check the alignment value.  Max for this instruction is 16 bits:
+;CHECK: vst2.8 {d16[1], d17[1]}, [r0, :16]
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 4)
+	ret void
+}
+
+define void @vst2lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst2lanei16:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vst2.16 {d16[1], d17[1]}, [r0, :32]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+	ret void
+}
+
+;Check for a post-increment updating store with register increment.
+define void @vst2lanei16_update(i16** %ptr, <4 x i16>* %B, i32 %inc) nounwind {
+;CHECK: vst2lanei16_update:
+;CHECK: vst2.16 {d16[1], d17[1]}, [r1], r2
+	%A = load i16** %ptr
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst2lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 2)
+	%tmp2 = getelementptr i16* %A, i32 %inc
+	store i16* %tmp2, i16** %ptr
+	ret void
+}
+
+define void @vst2lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst2lanei32:
+;CHECK: vst2.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst2lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+define void @vst2lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst2lanef:
+;CHECK: vst2.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst2lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+define void @vst2laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst2laneQi16:
+;Check the (default) alignment.
+;CHECK: vst2.16 {d17[1], d19[1]}, [r0]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst2lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 5, i32 1)
+	ret void
+}
+
+define void @vst2laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst2laneQi32:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vst2.32 {d17[0], d19[0]}, [r0, :64]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst2lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 16)
+	ret void
+}
+
+define void @vst2laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst2laneQf:
+;CHECK: vst2.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst2lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, i32 3, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst2lane.v8i8(i8*, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4i16(i8*, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v2i32(i8*, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v2f32(i8*, <2 x float>, <2 x float>, i32, i32) nounwind
+
+declare void @llvm.arm.neon.vst2lane.v8i16(i8*, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4i32(i8*, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst2lane.v4f32(i8*, <4 x float>, <4 x float>, i32, i32) nounwind
+
+define void @vst3lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst3lanei8:
+;CHECK: vst3.8
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst3lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+define void @vst3lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst3lanei16:
+;Check the (default) alignment value.  VST3 does not support alignment.
+;CHECK: vst3.16 {d16[1], d17[1], d18[1]}, [r0]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst3lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 8)
+	ret void
+}
+
+define void @vst3lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst3lanei32:
+;CHECK: vst3.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst3lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+define void @vst3lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst3lanef:
+;CHECK: vst3.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst3lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+define void @vst3laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst3laneQi16:
+;Check the (default) alignment value.  VST3 does not support alignment.
+;CHECK: vst3.16 {d17[2], d19[2], d21[2]}, [r0]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst3lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 6, i32 8)
+	ret void
+}
+
+define void @vst3laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst3laneQi32:
+;CHECK: vst3.32
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst3laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind {
+;CHECK: vst3laneQi32_update:
+;CHECK: vst3.32 {d16[0], d18[0], d20[0]}, [r1]!
+	%A = load i32** %ptr
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst3lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 0, i32 1)
+	%tmp2 = getelementptr i32* %A, i32 3
+	store i32* %tmp2, i32** %ptr
+	ret void
+}
+
+define void @vst3laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst3laneQf:
+;CHECK: vst3.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst3lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst3lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
+
+declare void @llvm.arm.neon.vst3lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst3lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
+
+
+define void @vst4lanei8(i8* %A, <8 x i8>* %B) nounwind {
+;CHECK: vst4lanei8:
+;Check the alignment value.  Max for this instruction is 32 bits:
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+	ret void
+}
+
+;Check for a post-increment updating store.
+define void @vst4lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind {
+;CHECK: vst4lanei8_update:
+;CHECK: vst4.8 {d16[1], d17[1], d18[1], d19[1]}, [r1, :32]!
+	%A = load i8** %ptr
+	%tmp1 = load <8 x i8>* %B
+	call void @llvm.arm.neon.vst4lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 8)
+	%tmp2 = getelementptr i8* %A, i32 4
+	store i8* %tmp2, i8** %ptr
+	ret void
+}
+
+define void @vst4lanei16(i16* %A, <4 x i16>* %B) nounwind {
+;CHECK: vst4lanei16:
+;CHECK: vst4.16
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <4 x i16>* %B
+	call void @llvm.arm.neon.vst4lane.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+define void @vst4lanei32(i32* %A, <2 x i32>* %B) nounwind {
+;CHECK: vst4lanei32:
+;Check the alignment value.  Max for this instruction is 128 bits:
+;CHECK: vst4.32 {d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <2 x i32>* %B
+	call void @llvm.arm.neon.vst4lane.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1, i32 16)
+	ret void
+}
+
+define void @vst4lanef(float* %A, <2 x float>* %B) nounwind {
+;CHECK: vst4lanef:
+;CHECK: vst4.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <2 x float>* %B
+	call void @llvm.arm.neon.vst4lane.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+define void @vst4laneQi16(i16* %A, <8 x i16>* %B) nounwind {
+;CHECK: vst4laneQi16:
+;Check the alignment value.  Max for this instruction is 64 bits:
+;CHECK: vst4.16 {d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
+	%tmp0 = bitcast i16* %A to i8*
+	%tmp1 = load <8 x i16>* %B
+	call void @llvm.arm.neon.vst4lane.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 7, i32 16)
+	ret void
+}
+
+define void @vst4laneQi32(i32* %A, <4 x i32>* %B) nounwind {
+;CHECK: vst4laneQi32:
+;Check the (default) alignment.
+;CHECK: vst4.32 {d17[0], d19[0], d21[0], d23[0]}, [r0]
+	%tmp0 = bitcast i32* %A to i8*
+	%tmp1 = load <4 x i32>* %B
+	call void @llvm.arm.neon.vst4lane.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 2, i32 1)
+	ret void
+}
+
+define void @vst4laneQf(float* %A, <4 x float>* %B) nounwind {
+;CHECK: vst4laneQf:
+;CHECK: vst4.32
+	%tmp0 = bitcast float* %A to i8*
+	%tmp1 = load <4 x float>* %B
+	call void @llvm.arm.neon.vst4lane.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1, i32 1)
+	ret void
+}
+
+declare void @llvm.arm.neon.vst4lane.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>, i32, i32) nounwind
+
+declare void @llvm.arm.neon.vst4lane.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32, i32) nounwind
+declare void @llvm.arm.neon.vst4lane.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32) nounwind
diff --git a/final/test/CodeGen/ARM/vsub.ll b/final/test/CodeGen/ARM/vsub.ll
new file mode 100644
index 00000000000..df77bb31fc8
--- /dev/null
+++ b/final/test/CodeGen/ARM/vsub.ll
@@ -0,0 +1,279 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vsubi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubi8:
+;CHECK: vsub.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = sub <8 x i8> %tmp1, %tmp2
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vsubi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubi16:
+;CHECK: vsub.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = sub <4 x i16> %tmp1, %tmp2
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vsubi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubi32:
+;CHECK: vsub.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = sub <2 x i32> %tmp1, %tmp2
+	ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @vsubi64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
+;CHECK: vsubi64:
+;CHECK: vsub.i64
+	%tmp1 = load <1 x i64>* %A
+	%tmp2 = load <1 x i64>* %B
+	%tmp3 = sub <1 x i64> %tmp1, %tmp2
+	ret <1 x i64> %tmp3
+}
+
+define <2 x float> @vsubf32(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vsubf32:
+;CHECK: vsub.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = fsub <2 x float> %tmp1, %tmp2
+	ret <2 x float> %tmp3
+}
+
+define <16 x i8> @vsubQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vsubQi8:
+;CHECK: vsub.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = sub <16 x i8> %tmp1, %tmp2
+	ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @vsubQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsubQi16:
+;CHECK: vsub.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = sub <8 x i16> %tmp1, %tmp2
+	ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @vsubQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsubQi32:
+;CHECK: vsub.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = sub <4 x i32> %tmp1, %tmp2
+	ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @vsubQi64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsubQi64:
+;CHECK: vsub.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = sub <2 x i64> %tmp1, %tmp2
+	ret <2 x i64> %tmp3
+}
+
+define <4 x float> @vsubQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vsubQf32:
+;CHECK: vsub.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = fsub <4 x float> %tmp1, %tmp2
+	ret <4 x float> %tmp3
+}
+
+define <8 x i8> @vsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vsubhni16:
+;CHECK: vsubhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vsubhni32:
+;CHECK: vsubhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vsubhni64:
+;CHECK: vsubhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @vrsubhni16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vrsubhni16:
+;CHECK: vrsubhn.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @vrsubhni32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vrsubhni32:
+;CHECK: vrsubhn.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @vrsubhni64(<2 x i64>* %A, <2 x i64>* %B) nounwind {
+;CHECK: vrsubhni64:
+;CHECK: vrsubhn.i64
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i64>* %B
+	%tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %tmp1, <2 x i64> %tmp2)
+	ret <2 x i32> %tmp3
+}
+
+declare <8 x i8>  @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i16> @vsubls8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubls8:
+;CHECK: vsubl.s8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = sext <8 x i8> %tmp1 to <8 x i16>
+	%tmp4 = sext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = sub <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vsubls16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubls16:
+;CHECK: vsubl.s16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = sext <4 x i16> %tmp1 to <4 x i32>
+	%tmp4 = sext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = sub <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @vsubls32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubls32:
+;CHECK: vsubl.s32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = sext <2 x i32> %tmp1 to <2 x i64>
+	%tmp4 = sext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = sub <2 x i64> %tmp3, %tmp4
+	ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @vsublu8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsublu8:
+;CHECK: vsubl.u8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = zext <8 x i8> %tmp1 to <8 x i16>
+	%tmp4 = zext <8 x i8> %tmp2 to <8 x i16>
+	%tmp5 = sub <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vsublu16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsublu16:
+;CHECK: vsubl.u16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = zext <4 x i16> %tmp1 to <4 x i32>
+	%tmp4 = zext <4 x i16> %tmp2 to <4 x i32>
+	%tmp5 = sub <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @vsublu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsublu32:
+;CHECK: vsubl.u32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = zext <2 x i32> %tmp1 to <2 x i64>
+	%tmp4 = zext <2 x i32> %tmp2 to <2 x i64>
+	%tmp5 = sub <2 x i64> %tmp3, %tmp4
+	ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @vsubws8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubws8:
+;CHECK: vsubw.s8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = sext <8 x i8> %tmp2 to <8 x i16>
+	%tmp4 = sub <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vsubws16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubws16:
+;CHECK: vsubw.s16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = sext <4 x i16> %tmp2 to <4 x i32>
+	%tmp4 = sub <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vsubws32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubws32:
+;CHECK: vsubw.s32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = sext <2 x i32> %tmp2 to <2 x i64>
+	%tmp4 = sub <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
+
+define <8 x i16> @vsubwu8(<8 x i16>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vsubwu8:
+;CHECK: vsubw.u8
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = zext <8 x i8> %tmp2 to <8 x i16>
+	%tmp4 = sub <8 x i16> %tmp1, %tmp3
+	ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @vsubwu16(<4 x i32>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vsubwu16:
+;CHECK: vsubw.u16
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = zext <4 x i16> %tmp2 to <4 x i32>
+	%tmp4 = sub <4 x i32> %tmp1, %tmp3
+	ret <4 x i32> %tmp4
+}
+
+define <2 x i64> @vsubwu32(<2 x i64>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vsubwu32:
+;CHECK: vsubw.u32
+	%tmp1 = load <2 x i64>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = zext <2 x i32> %tmp2 to <2 x i64>
+	%tmp4 = sub <2 x i64> %tmp1, %tmp3
+	ret <2 x i64> %tmp4
+}
diff --git a/final/test/CodeGen/ARM/vtbl.ll b/final/test/CodeGen/ARM/vtbl.ll
new file mode 100644
index 00000000000..926498739e8
--- /dev/null
+++ b/final/test/CodeGen/ARM/vtbl.ll
@@ -0,0 +1,109 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
+%struct.__neon_int8x8x3_t = type { <8 x i8>,  <8 x i8>, <8 x i8> }
+%struct.__neon_int8x8x4_t = type { <8 x i8>,  <8 x i8>,  <8 x i8>, <8 x i8> }
+
+define <8 x i8> @vtbl1(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtbl1:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	ret <8 x i8> %tmp3
+}
+
+define <8 x i8> @vtbl2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B) nounwind {
+;CHECK: vtbl2:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x2_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+	%tmp5 = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4)
+	ret <8 x i8> %tmp5
+}
+
+define <8 x i8> @vtbl3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B) nounwind {
+;CHECK: vtbl3:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x3_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+	%tmp6 = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
+	ret <8 x i8> %tmp6
+}
+
+define <8 x i8> @vtbl4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B) nounwind {
+;CHECK: vtbl4:
+;CHECK: vtbl.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+	%tmp7 = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
+	ret <8 x i8> %tmp7
+}
+
+define <8 x i8> @vtbx1(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx1:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = load <8 x i8>* %C
+	%tmp4 = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i8> %tmp3)
+	ret <8 x i8> %tmp4
+}
+
+define <8 x i8> @vtbx2(<8 x i8>* %A, %struct.__neon_int8x8x2_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx2:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x2_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 1
+	%tmp5 = load <8 x i8>* %C
+	%tmp6 = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5)
+	ret <8 x i8> %tmp6
+}
+
+define <8 x i8> @vtbx3(<8 x i8>* %A, %struct.__neon_int8x8x3_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx3:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x3_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x3_t %tmp2, 2
+	%tmp6 = load <8 x i8>* %C
+	%tmp7 = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6)
+	ret <8 x i8> %tmp7
+}
+
+define <8 x i8> @vtbx4(<8 x i8>* %A, %struct.__neon_int8x8x4_t* %B, <8 x i8>* %C) nounwind {
+;CHECK: vtbx4:
+;CHECK: vtbx.8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load %struct.__neon_int8x8x4_t* %B
+        %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 0
+        %tmp4 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 1
+        %tmp5 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 2
+        %tmp6 = extractvalue %struct.__neon_int8x8x4_t %tmp2, 3
+	%tmp7 = load <8 x i8>* %C
+	%tmp8 = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %tmp1, <8 x i8> %tmp3, <8 x i8> %tmp4, <8 x i8> %tmp5, <8 x i8> %tmp6, <8 x i8> %tmp7)
+	ret <8 x i8> %tmp8
+}
+
+declare <8 x i8>  @llvm.arm.neon.vtbl1(<8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbl2(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbl3(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbl4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+
+declare <8 x i8>  @llvm.arm.neon.vtbx1(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbx2(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbx3(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
+declare <8 x i8>  @llvm.arm.neon.vtbx4(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone
diff --git a/final/test/CodeGen/ARM/vtrn.ll b/final/test/CodeGen/ARM/vtrn.ll
new file mode 100644
index 00000000000..b1c2f93b47c
--- /dev/null
+++ b/final/test/CodeGen/ARM/vtrn.ll
@@ -0,0 +1,124 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtrni8:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vtrni16:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK: vtrni32:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <2 x i32>* %A
+	%tmp2 = load <2 x i32>* %B
+	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 0, i32 2>
+	%tmp4 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <2 x i32> <i32 1, i32 3>
+        %tmp5 = add <2 x i32> %tmp3, %tmp4
+	ret <2 x i32> %tmp5
+}
+
+define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
+;CHECK: vtrnf:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <2 x float>* %A
+	%tmp2 = load <2 x float>* %B
+	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 0, i32 2>
+	%tmp4 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <2 x i32> <i32 1, i32 3>
+        %tmp5 = fadd <2 x float> %tmp3, %tmp4
+	ret <2 x float> %tmp5
+}
+
+define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vtrnQi8:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtrnQi16:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vtrnQi32:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vtrnQf:
+;CHECK: vtrn.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+        %tmp5 = fadd <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
+
+; Undef shuffle indices should not prevent matching to VTRN:
+
+define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vtrni8_undef:
+;CHECK: vtrn.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 2, i32 10, i32 undef, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 undef, i32 undef, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <8 x i16> @vtrnQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vtrnQi16_undef:
+;CHECK: vtrn.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 undef, i32 undef, i32 4, i32 12, i32 6, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 undef, i32 3, i32 11, i32 5, i32 13, i32 undef, i32 undef>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
diff --git a/final/test/CodeGen/ARM/vuzp.ll b/final/test/CodeGen/ARM/vuzp.ll
new file mode 100644
index 00000000000..9130f628919
--- /dev/null
+++ b/final/test/CodeGen/ARM/vuzp.ll
@@ -0,0 +1,102 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vuzpi8:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vuzpi16:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+; VUZP.32 is equivalent to VTRN.32 for 64-bit vectors.
+
+define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vuzpQi8:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vuzpQi16:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vuzpQi32:
+;CHECK: vuzp.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vuzpQf:
+;CHECK: vuzp.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+        %tmp5 = fadd <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
+
+; Undef shuffle indices should not prevent matching to VUZP:
+
+define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vuzpi8_undef:
+;CHECK: vuzp.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 2, i32 undef, i32 undef, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <8 x i16> @vuzpQi16_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vuzpQi16_undef:
+;CHECK: vuzp.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 undef, i32 4, i32 undef, i32 8, i32 10, i32 12, i32 14>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 1, i32 3, i32 5, i32 undef, i32 undef, i32 11, i32 13, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
diff --git a/final/test/CodeGen/ARM/vzip.ll b/final/test/CodeGen/ARM/vzip.ll
new file mode 100644
index 00000000000..926970aeb29
--- /dev/null
+++ b/final/test/CodeGen/ARM/vzip.ll
@@ -0,0 +1,102 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vzipi8:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK: vzipi16:
+;CHECK: vzip.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <4 x i16>* %A
+	%tmp2 = load <4 x i16>* %B
+	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = add <4 x i16> %tmp3, %tmp4
+	ret <4 x i16> %tmp5
+}
+
+; VZIP.32 is equivalent to VTRN.32 for 64-bit vectors.
+
+define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vzipQi8:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK: vzipQi16:
+;CHECK: vzip.16
+;CHECK-NEXT: vadd.i16
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
+	%tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+        %tmp5 = add <8 x i16> %tmp3, %tmp4
+	ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK: vzipQi32:
+;CHECK: vzip.32
+;CHECK-NEXT: vadd.i32
+	%tmp1 = load <4 x i32>* %A
+	%tmp2 = load <4 x i32>* %B
+	%tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = add <4 x i32> %tmp3, %tmp4
+	ret <4 x i32> %tmp5
+}
+
+define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
+;CHECK: vzipQf:
+;CHECK: vzip.32
+;CHECK-NEXT: vadd.f32
+	%tmp1 = load <4 x float>* %A
+	%tmp2 = load <4 x float>* %B
+	%tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+	%tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
+        %tmp5 = fadd <4 x float> %tmp3, %tmp4
+	ret <4 x float> %tmp5
+}
+
+; Undef shuffle indices should not prevent matching to VZIP:
+
+define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
+;CHECK: vzipi8_undef:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <8 x i8>* %A
+	%tmp2 = load <8 x i8>* %B
+	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11>
+	%tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15>
+        %tmp5 = add <8 x i8> %tmp3, %tmp4
+	ret <8 x i8> %tmp5
+}
+
+define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+;CHECK: vzipQi8_undef:
+;CHECK: vzip.8
+;CHECK-NEXT: vadd.i8
+	%tmp1 = load <16 x i8>* %A
+	%tmp2 = load <16 x i8>* %B
+	%tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
+	%tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31>
+        %tmp5 = add <16 x i8> %tmp3, %tmp4
+	ret <16 x i8> %tmp5
+}
+
diff --git a/final/test/CodeGen/ARM/weak.ll b/final/test/CodeGen/ARM/weak.ll
new file mode 100644
index 00000000000..5ac4b8c061d
--- /dev/null
+++ b/final/test/CodeGen/ARM/weak.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=arm | grep .weak.*f
+; RUN: llc < %s -march=arm | grep .weak.*h
+
+define weak i32 @f() {
+entry:
+        unreachable
+}
+
+define void @g() {
+entry:
+        tail call void @h( )
+        ret void
+}
+
+declare extern_weak void @h()
+
diff --git a/final/test/CodeGen/ARM/weak2.ll b/final/test/CodeGen/ARM/weak2.ll
new file mode 100644
index 00000000000..cf327bbf5c8
--- /dev/null
+++ b/final/test/CodeGen/ARM/weak2.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=arm | grep .weak
+
+define i32 @f(i32 %a) {
+entry:
+	%tmp2 = icmp eq i32 %a, 0		; <i1> [#uses=1]
+	%t.0 = select i1 %tmp2, i32 (...)* null, i32 (...)* @test_weak		; <i32 (...)*> [#uses=2]
+	%tmp5 = icmp eq i32 (...)* %t.0, null		; <i1> [#uses=1]
+	br i1 %tmp5, label %UnifiedReturnBlock, label %cond_true8
+
+cond_true8:		; preds = %entry
+	%tmp10 = tail call i32 (...)* %t.0( )		; <i32> [#uses=1]
+	ret i32 %tmp10
+
+UnifiedReturnBlock:		; preds = %entry
+	ret i32 250
+}
+
+declare extern_weak i32 @test_weak(...)
diff --git a/final/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll b/final/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll
new file mode 100644
index 00000000000..87d992836bc
--- /dev/null
+++ b/final/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll
@@ -0,0 +1,17 @@
+; There should be exactly two calls here (memset and malloc), no more.
+; RUN: llc < %s -march=alpha | grep jsr | count 2
+
+%typedef.bc_struct = type opaque
+declare void @llvm.memset.i64(i8*, i8, i64, i32)
+
+define i1 @l12_l94_bc_divide_endif_2E_3_2E_ce(i32* %tmp.71.reload, i32 %scale2.1.3, i32 %extra.0, %typedef.bc_struct* %n1, %typedef.bc_struct* %n2, i32* %tmp.92.reload, i32 %tmp.94.reload, i32* %tmp.98.reload, i32 %tmp.100.reload, i8** %tmp.112.out, i32* %tmp.157.out, i8** %tmp.158.out) {
+newFuncRoot:
+        %tmp.120 = add i32 %extra.0, 2          ; <i32> [#uses=1]
+        %tmp.122 = add i32 %tmp.120, %tmp.94.reload             ; <i32> [#uses=1]
+        %tmp.123 = add i32 %tmp.122, %tmp.100.reload            ; <i32> [#uses=2]
+        %tmp.112 = malloc i8, i32 %tmp.123              ; <i8*> [#uses=1]
+        %tmp.137 = zext i32 %tmp.123 to i64             ; <i64> [#uses=1]
+        tail call void @llvm.memset.i64( i8* %tmp.112, i8 0, i64 %tmp.137, i32 0 )
+        ret i1 true
+}
+
diff --git a/final/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll b/final/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
new file mode 100644
index 00000000000..4b3d022c1d8
--- /dev/null
+++ b/final/test/CodeGen/Alpha/2005-12-12-MissingFCMov.ll
@@ -0,0 +1,40 @@
+; This shouldn't crash
+; RUN: llc < %s -march=alpha
+
+@.str_4 = external global [44 x i8]             ; <[44 x i8]*> [#uses=0]
+
+declare void @printf(i32, ...)
+
+define void @main() {
+entry:
+        %tmp.11861 = icmp slt i64 0, 1          ; <i1> [#uses=1]
+        %tmp.19466 = icmp slt i64 0, 1          ; <i1> [#uses=1]
+        %tmp.21571 = icmp slt i64 0, 1          ; <i1> [#uses=1]
+        %tmp.36796 = icmp slt i64 0, 1          ; <i1> [#uses=1]
+        br i1 %tmp.11861, label %loopexit.2, label %no_exit.2
+
+no_exit.2:              ; preds = %entry
+        ret void
+
+loopexit.2:             ; preds = %entry
+        br i1 %tmp.19466, label %loopexit.3, label %no_exit.3.preheader
+
+no_exit.3.preheader:            ; preds = %loopexit.2
+        ret void
+
+loopexit.3:             ; preds = %loopexit.2
+        br i1 %tmp.21571, label %no_exit.6, label %no_exit.4
+
+no_exit.4:              ; preds = %loopexit.3
+        ret void
+
+no_exit.6:              ; preds = %no_exit.6, %loopexit.3
+        %tmp.30793 = icmp sgt i64 0, 0          ; <i1> [#uses=1]
+        br i1 %tmp.30793, label %loopexit.6, label %no_exit.6
+
+loopexit.6:             ; preds = %no_exit.6
+        %Z.1 = select i1 %tmp.36796, double 1.000000e+00, double 0x3FEFFF7CEDE74EAE; <double> [#uses=2]
+        tail call void (i32, ...)* @printf( i32 0, i64 0, i64 0, i64 0, double 1.000000e+00, double 1.000000e+00, double %Z.1, double %Z.1 )
+        ret void
+}
+
diff --git a/final/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll b/final/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
new file mode 100644
index 00000000000..65d2a8d02ac
--- /dev/null
+++ b/final/test/CodeGen/Alpha/2006-01-18-MissedGlobal.ll
@@ -0,0 +1,27 @@
+; The global symbol should be legalized
+; RUN: llc < %s -march=alpha 
+
+target datalayout = "e-p:64:64"
+        %struct.LIST_HELP = type { %struct.LIST_HELP*, i8* }
+        %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [44 x i8] }
+        %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+@clause_SORT = external global [21 x %struct.LIST_HELP*]                ; <[21 x %struct.LIST_HELP*]*> [#uses=0]
+@ia_in = external global %struct._IO_FILE*              ; <%struct._IO_FILE**> [#uses=1]
+@multvec_j = external global [100 x i32]                ; <[100 x i32]*> [#uses=0]
+
+define void @main(i32 %argc) {
+clock_Init.exit:
+        %tmp.5.i575 = load i32* null            ; <i32> [#uses=1]
+        %tmp.309 = icmp eq i32 %tmp.5.i575, 0           ; <i1> [#uses=1]
+        br i1 %tmp.309, label %UnifiedReturnBlock, label %then.17
+
+then.17:                ; preds = %clock_Init.exit
+        store %struct._IO_FILE* null, %struct._IO_FILE** @ia_in
+        %savedstack = call i8* @llvm.stacksave( )               ; <i8*> [#uses=0]
+        ret void
+
+UnifiedReturnBlock:             ; preds = %clock_Init.exit
+        ret void
+}
+
+declare i8* @llvm.stacksave()
diff --git a/final/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll b/final/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
new file mode 100644
index 00000000000..45587f08fd6
--- /dev/null
+++ b/final/test/CodeGen/Alpha/2006-01-26-VaargBreak.ll
@@ -0,0 +1,14 @@
+; This shouldn't crash
+; RUN: llc < %s -march=alpha 
+
+target datalayout = "e-p:64:64"
+target triple = "alphaev6-unknown-linux-gnu"
+deplibs = [ "c", "crtend", "stdc++" ]
+        %struct.__va_list_tag = type { i8*, i32 }
+
+define i32 @emit_library_call_value(i32 %nargs, ...) {
+entry:
+        %tmp.223 = va_arg %struct.__va_list_tag* null, i32              ; <i32> [#uses=1]
+        ret i32 %tmp.223
+}
+
diff --git a/final/test/CodeGen/Alpha/2006-04-04-zextload.ll b/final/test/CodeGen/Alpha/2006-04-04-zextload.ll
new file mode 100644
index 00000000000..2b28903c501
--- /dev/null
+++ b/final/test/CodeGen/Alpha/2006-04-04-zextload.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=alpha
+
+target datalayout = "e-p:64:64"
+target triple = "alphaev67-unknown-linux-gnu"
+        %llvm.dbg.compile_unit.type = type { i32, {  }*, i32, i32, i8*, i8*, i8* }
+        %struct._Callback_list = type { %struct._Callback_list*, void (i32, %struct.ios_base*, i32)*, i32, i32 }
+        %struct._Impl = type { i32, %struct.facet**, i64, %struct.facet**, i8** }
+        %struct._Words = type { i8*, i64 }
+        %"struct.__codecvt_abstract_base<char,char,__mbstate_t>" = type { %struct.facet }
+        %"struct.basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %struct.locale }
+        %struct.facet = type { i32 (...)**, i32 }
+        %struct.ios_base = type { i32 (...)**, i64, i64, i32, i32, i32, %struct._Callback_list*, %struct._Words, [8 x %struct._Words], i32, %struct._Words*, %struct.locale }
+        %struct.locale = type { %struct._Impl* }
+        %"struct.ostreambuf_iterator<char,std::char_traits<char> >" = type { %"struct.basic_streambuf<char,std::char_traits<char> >"*, i1 }
+@llvm.dbg.compile_unit1047 = external global %llvm.dbg.compile_unit.type          ; <%llvm.dbg.compile_unit.type*> [#uses=1]
+
+define void @_ZNKSt7num_putIcSt19ostreambuf_iteratorIcSt11char_traitsIcEEE15_M_insert_floatIdEES3_S3_RSt8ios_baseccT_() {
+entry:
+        %tmp234 = icmp eq i8 0, 0               ; <i1> [#uses=1]
+        br i1 %tmp234, label %cond_next243, label %cond_true235
+
+cond_true235:           ; preds = %entry
+        ret void
+
+cond_next243:           ; preds = %entry
+        %tmp428 = load i64* null                ; <i64> [#uses=1]
+        %tmp428.upgrd.1 = trunc i64 %tmp428 to i32              ; <i32> [#uses=1]
+        %tmp429 = alloca i8, i32 %tmp428.upgrd.1                ; <i8*> [#uses=0]
+        call void @llvm.dbg.stoppoint( i32 1146, i32 0, {  }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit1047 to {  }*) )
+        unreachable
+}
+
+declare void @llvm.dbg.stoppoint(i32, i32, {  }*)
+
diff --git a/final/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll b/final/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
new file mode 100644
index 00000000000..5d31bc3798d
--- /dev/null
+++ b/final/test/CodeGen/Alpha/2006-07-03-ASMFormalLowering.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=alpha
+
+target datalayout = "e-p:64:64"
+target triple = "alphaev67-unknown-linux-gnu"
+
+define i32 @_ZN9__gnu_cxx18__exchange_and_addEPVii(i32* %__mem, i32 %__val) {
+entry:
+        %__tmp = alloca i32, align 4            ; <i32*> [#uses=1]
+        %tmp3 = call i32 asm sideeffect "\0A$$Lxadd_0:\0A\09ldl_l  $0,$3\0A\09addl   $0,$4,$1\0A\09stl_c  $1,$2\0A\09beq    $1,$$Lxadd_0\0A\09mb", "=&r,=*&r,=*m,m,r"( i32* %__tmp, i32* %__mem, i32* %__mem, i32 %__val )            ; <i32> [#uses=1]
+        ret i32 %tmp3
+}
+
+define void @_ZN9__gnu_cxx12__atomic_addEPVii(i32* %__mem, i32 %__val) {
+entry:
+        %tmp2 = call i32 asm sideeffect "\0A$$Ladd_1:\0A\09ldl_l  $0,$2\0A\09addl   $0,$3,$0\0A\09stl_c  $0,$1\0A\09beq    $0,$$Ladd_1\0A\09mb", "=&r,=*m,m,r"( i32* %__mem, i32* %__mem, i32 %__val )                ; <i32> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/CodeGen/Alpha/2006-11-01-vastart.ll b/final/test/CodeGen/Alpha/2006-11-01-vastart.ll
new file mode 100644
index 00000000000..14e0bccc848
--- /dev/null
+++ b/final/test/CodeGen/Alpha/2006-11-01-vastart.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=alpha
+
+target datalayout = "e-p:64:64"
+target triple = "alphaev67-unknown-linux-gnu"
+        %struct.va_list = type { i8*, i32, i32 }
+
+define void @yyerror(i32, ...) {
+entry:
+        %va.upgrd.1 = bitcast %struct.va_list* null to i8*              ; <i8*> [#uses=1]
+        call void @llvm.va_start( i8* %va.upgrd.1 )
+        ret void
+}
+
+declare void @llvm.va_start(i8*)
+
diff --git a/final/test/CodeGen/Alpha/2007-11-27-mulneg3.ll b/final/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
new file mode 100644
index 00000000000..b537e250ad8
--- /dev/null
+++ b/final/test/CodeGen/Alpha/2007-11-27-mulneg3.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=alpha
+
+;FIXME: this should produce no mul inst.  But not crashing will have to do for now
+
+; ModuleID = 'Output/bugpoint-train/bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
+target triple = "alphaev6-unknown-linux-gnu"
+
+define fastcc i32 @getcount(i32 %s) {
+cond_next43:		; preds = %bb27
+	%tmp431 = mul i32 %s, -3
+	ret i32 %tmp431
+}
diff --git a/final/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll b/final/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
new file mode 100644
index 00000000000..1a4b40e2da2
--- /dev/null
+++ b/final/test/CodeGen/Alpha/2008-11-10-smul_lohi.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=alpha
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
+target triple = "alphaev6-unknown-linux-gnu"
+
+define i64 @__mulvdi3(i64 %a, i64 %b) nounwind {
+entry:
+	%0 = sext i64 %a to i128		; <i128> [#uses=1]
+	%1 = sext i64 %b to i128		; <i128> [#uses=1]
+	%2 = mul i128 %1, %0		; <i128> [#uses=2]
+	%3 = lshr i128 %2, 64		; <i128> [#uses=1]
+	%4 = trunc i128 %3 to i64		; <i64> [#uses=1]
+	%5 = trunc i128 %2 to i64		; <i64> [#uses=1]
+	%6 = icmp eq i64 %4, 0		; <i1> [#uses=1]
+	br i1 %6, label %bb1, label %bb
+
+bb:		; preds = %entry
+	unreachable
+
+bb1:		; preds = %entry
+	ret i64 %5
+}
diff --git a/final/test/CodeGen/Alpha/2008-11-12-Add128.ll b/final/test/CodeGen/Alpha/2008-11-12-Add128.ll
new file mode 100644
index 00000000000..8b9b603fe6f
--- /dev/null
+++ b/final/test/CodeGen/Alpha/2008-11-12-Add128.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s
+; PR3044
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
+target triple = "alphaev6-unknown-linux-gnu"
+
+define i128 @__mulvti3(i128 %u, i128 %v) nounwind {
+entry:
+	%0 = load i128* null, align 16		; <i128> [#uses=1]
+	%1 = load i64* null, align 8		; <i64> [#uses=1]
+	%2 = zext i64 %1 to i128		; <i128> [#uses=1]
+	%3 = add i128 %2, %0		; <i128> [#uses=1]
+	store i128 %3, i128* null, align 16
+	unreachable
+}
diff --git a/final/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll b/final/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll
new file mode 100644
index 00000000000..cfbf7fcdfd9
--- /dev/null
+++ b/final/test/CodeGen/Alpha/2009-07-16-PromoteFloatCompare.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=alpha
+
+define i1 @a(float %x) {
+  %r = fcmp ult float %x, 1.0
+  ret i1 %r
+}
diff --git a/final/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll b/final/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 00000000000..4590f1245ae
--- /dev/null
+++ b/final/test/CodeGen/Alpha/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=alpha -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/final/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll b/final/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll
new file mode 100644
index 00000000000..b838ec949ea
--- /dev/null
+++ b/final/test/CodeGen/Alpha/2010-08-01-mulreduce64.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=alpha | FileCheck %s
+
+define fastcc i64 @getcount(i64 %s) {
+	%tmp431 = mul i64 %s, 12884901888
+	ret i64 %tmp431
+}
+
+; CHECK: sll $16,33,$0
+; CHECK-NEXT: sll $16,32,$1
+; CHECK-NEXT: addq $0,$1,$0
+
diff --git a/final/test/CodeGen/Alpha/add.ll b/final/test/CodeGen/Alpha/add.ll
new file mode 100644
index 00000000000..cd883f64a6b
--- /dev/null
+++ b/final/test/CodeGen/Alpha/add.ll
@@ -0,0 +1,178 @@
+;test all the shifted and signextending adds and subs with and without consts
+;
+; RUN: llc < %s -march=alpha -o %t.s
+; RUN: grep {	addl} %t.s | count 2
+; RUN: grep {	addq} %t.s | count 2
+; RUN: grep {	subl} %t.s | count 2
+; RUN: grep {	subq} %t.s | count 2
+;
+; RUN: grep {s4addl} %t.s | count 2
+; RUN: grep {s8addl} %t.s | count 2
+; RUN: grep {s4addq} %t.s | count 2
+; RUN: grep {s8addq} %t.s | count 2
+;
+; RUN: grep {s4subl} %t.s | count 2
+; RUN: grep {s8subl} %t.s | count 2
+; RUN: grep {s4subq} %t.s | count 2
+; RUN: grep {s8subq} %t.s | count 2
+
+
+define i32 @al(i32 signext %x.s, i32 signext %y.s) signext {
+entry:
+	%tmp.3.s = add i32 %y.s, %x.s		; <i32> [#uses=1]
+	ret i32 %tmp.3.s
+}
+
+define i32 @ali(i32 signext %x.s) signext {
+entry:
+	%tmp.3.s = add i32 100, %x.s		; <i32> [#uses=1]
+	ret i32 %tmp.3.s
+}
+
+define i64 @aq(i64 signext %x.s, i64 signext %y.s) signext {
+entry:
+	%tmp.3.s = add i64 %y.s, %x.s		; <i64> [#uses=1]
+	ret i64 %tmp.3.s
+}
+
+define i64 @aqi(i64 %x.s) {
+entry:
+	%tmp.3.s = add i64 100, %x.s		; <i64> [#uses=1]
+	ret i64 %tmp.3.s
+}
+
+define i32 @sl(i32 signext %x.s, i32 signext %y.s) signext {
+entry:
+	%tmp.3.s = sub i32 %y.s, %x.s		; <i32> [#uses=1]
+	ret i32 %tmp.3.s
+}
+
+define i32 @sli(i32 signext %x.s) signext {
+entry:
+	%tmp.3.s = sub i32 %x.s, 100		; <i32> [#uses=1]
+	ret i32 %tmp.3.s
+}
+
+define i64 @sq(i64 %x.s, i64 %y.s) {
+entry:
+	%tmp.3.s = sub i64 %y.s, %x.s		; <i64> [#uses=1]
+	ret i64 %tmp.3.s
+}
+
+define i64 @sqi(i64 %x.s) {
+entry:
+	%tmp.3.s = sub i64 %x.s, 100		; <i64> [#uses=1]
+	ret i64 %tmp.3.s
+}
+
+define i32 @a4l(i32 signext %x.s, i32 signext %y.s) signext {
+entry:
+	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
+	%tmp.3.s = add i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
+	ret i32 %tmp.3.s
+}
+
+define i32 @a8l(i32 signext %x.s, i32 signext %y.s) signext {
+entry:
+	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
+	%tmp.3.s = add i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
+	ret i32 %tmp.3.s
+}
+
+define i64 @a4q(i64 %x.s, i64 %y.s) {
+entry:
+	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]
+	%tmp.3.s = add i64 %tmp.1.s, %x.s		; <i64> [#uses=1]
+	ret i64 %tmp.3.s
+}
+
+define i64 @a8q(i64 %x.s, i64 %y.s) {
+entry:
+	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]
+	%tmp.3.s = add i64 %tmp.1.s, %x.s		; <i64> [#uses=1]
+	ret i64 %tmp.3.s
+}
+
+define i32 @a4li(i32 signext %y.s) signext {
+entry:
+	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
+	%tmp.3.s = add i32 100, %tmp.1.s		; <i32> [#uses=1]
+	ret i32 %tmp.3.s
+}
+
+define i32 @a8li(i32 signext %y.s) signext {
+entry:
+	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
+	%tmp.3.s = add i32 100, %tmp.1.s		; <i32> [#uses=1]
+	ret i32 %tmp.3.s
+}
+
+define i64 @a4qi(i64 %y.s) {
+entry:
+	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]
+	%tmp.3.s = add i64 100, %tmp.1.s		; <i64> [#uses=1]
+	ret i64 %tmp.3.s
+}
+
+define i64 @a8qi(i64 %y.s) {
+entry:
+	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]
+	%tmp.3.s = add i64 100, %tmp.1.s		; <i64> [#uses=1]
+	ret i64 %tmp.3.s
+}
+
+define i32 @s4l(i32 signext %x.s, i32 signext %y.s) signext {
+entry:
+	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
+	%tmp.3.s = sub i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
+	ret i32 %tmp.3.s
+}
+
+define i32 @s8l(i32 signext %x.s, i32 signext %y.s) signext {
+entry:
+	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
+	%tmp.3.s = sub i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
+	ret i32 %tmp.3.s
+}
+
+define i64 @s4q(i64 %x.s, i64 %y.s) {
+entry:
+	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]
+	%tmp.3.s = sub i64 %tmp.1.s, %x.s		; <i64> [#uses=1]
+	ret i64 %tmp.3.s
+}
+
+define i64 @s8q(i64 %x.s, i64 %y.s) {
+entry:
+	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]
+	%tmp.3.s = sub i64 %tmp.1.s, %x.s		; <i64> [#uses=1]
+	ret i64 %tmp.3.s
+}
+
+define i32 @s4li(i32 signext %y.s) signext {
+entry:
+	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
+	%tmp.3.s = sub i32 %tmp.1.s, 100		; <i32> [#uses=1]
+	ret i32 %tmp.3.s
+}
+
+define i32 @s8li(i32 signext %y.s) signext {
+entry:
+	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
+	%tmp.3.s = sub i32 %tmp.1.s, 100		; <i32> [#uses=1]
+	ret i32 %tmp.3.s
+}
+
+define i64 @s4qi(i64 %y.s) {
+entry:
+	%tmp.1.s = shl i64 %y.s, 2		; <i64> [#uses=1]
+	%tmp.3.s = sub i64 %tmp.1.s, 100		; <i64> [#uses=1]
+	ret i64 %tmp.3.s
+}
+
+define i64 @s8qi(i64 %y.s) {
+entry:
+	%tmp.1.s = shl i64 %y.s, 3		; <i64> [#uses=1]
+	%tmp.3.s = sub i64 %tmp.1.s, 100		; <i64> [#uses=1]
+	ret i64 %tmp.3.s
+}
diff --git a/final/test/CodeGen/Alpha/add128.ll b/final/test/CodeGen/Alpha/add128.ll
new file mode 100644
index 00000000000..fa3b949fc7b
--- /dev/null
+++ b/final/test/CodeGen/Alpha/add128.ll
@@ -0,0 +1,9 @@
+;test for ADDC and ADDE expansion
+;
+; RUN: llc < %s -march=alpha
+
+define i128 @add128(i128 %x, i128 %y) {
+entry:
+	%tmp = add i128 %y, %x
+	ret i128 %tmp
+}
diff --git a/final/test/CodeGen/Alpha/bic.ll b/final/test/CodeGen/Alpha/bic.ll
new file mode 100644
index 00000000000..9f0035097b0
--- /dev/null
+++ b/final/test/CodeGen/Alpha/bic.ll
@@ -0,0 +1,9 @@
+; Make sure this testcase codegens to the bic instruction
+; RUN: llc < %s -march=alpha | grep {bic}
+
+define i64 @bar(i64 %x, i64 %y) {
+entry:
+        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]
+        %tmp.2 = and i64 %y, %tmp.1             ; <i64> [#uses=1]
+        ret i64 %tmp.2
+}
diff --git a/final/test/CodeGen/Alpha/bsr.ll b/final/test/CodeGen/Alpha/bsr.ll
new file mode 100644
index 00000000000..14f6b46c549
--- /dev/null
+++ b/final/test/CodeGen/Alpha/bsr.ll
@@ -0,0 +1,12 @@
+; Make sure this testcase codegens the bsr instruction
+; RUN: llc < %s -march=alpha | grep bsr
+
+define internal i64 @abc(i32 %x) {
+        %tmp.2 = add i32 %x, -1         ; <i32> [#uses=1]
+        %tmp.0 = call i64 @abc( i32 %tmp.2 )            ; <i64> [#uses=1]
+        %tmp.5 = add i32 %x, -2         ; <i32> [#uses=1]
+        %tmp.3 = call i64 @abc( i32 %tmp.5 )            ; <i64> [#uses=1]
+        %tmp.6 = add i64 %tmp.0, %tmp.3         ; <i64> [#uses=1]
+        ret i64 %tmp.6
+}
+
diff --git a/final/test/CodeGen/Alpha/call_adj.ll b/final/test/CodeGen/Alpha/call_adj.ll
new file mode 100644
index 00000000000..24e97a92b86
--- /dev/null
+++ b/final/test/CodeGen/Alpha/call_adj.ll
@@ -0,0 +1,13 @@
+;All this should do is not crash
+;RUN: llc < %s -march=alpha
+
+target datalayout = "e-p:64:64"
+target triple = "alphaev67-unknown-linux-gnu"
+
+define void @_ZNSt13basic_filebufIcSt11char_traitsIcEE22_M_convert_to_externalEPcl(i32 %f) {
+entry:
+        %tmp49 = alloca i8, i32 %f              ; <i8*> [#uses=0]
+        %tmp = call i32 null( i8* null, i8* null, i8* null, i8* null, i8* null, i8* null, i8* null )               ; <i32> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/CodeGen/Alpha/cmov.ll b/final/test/CodeGen/Alpha/cmov.ll
new file mode 100644
index 00000000000..9b655f03efd
--- /dev/null
+++ b/final/test/CodeGen/Alpha/cmov.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=alpha | not grep cmovlt
+; RUN: llc < %s -march=alpha | grep cmoveq
+
+define i64 @cmov_lt(i64 %a, i64 %c) {
+entry:
+        %tmp.1 = icmp slt i64 %c, 0             ; <i1> [#uses=1]
+        %retval = select i1 %tmp.1, i64 %a, i64 10              ; <i64> [#uses=1]
+        ret i64 %retval
+}
+
+define i64 @cmov_const(i64 %a, i64 %b, i64 %c) {
+entry:
+        %tmp.1 = icmp slt i64 %a, %b            ; <i1> [#uses=1]
+        %retval = select i1 %tmp.1, i64 %c, i64 10              ; <i64> [#uses=1]
+        ret i64 %retval
+}
+
+define i64 @cmov_lt2(i64 %a, i64 %c) {
+entry:
+        %tmp.1 = icmp sgt i64 %c, 0             ; <i1> [#uses=1]
+        %retval = select i1 %tmp.1, i64 10, i64 %a              ; <i64> [#uses=1]
+        ret i64 %retval
+}
diff --git a/final/test/CodeGen/Alpha/cmpbge.ll b/final/test/CodeGen/Alpha/cmpbge.ll
new file mode 100644
index 00000000000..e88d2eec75e
--- /dev/null
+++ b/final/test/CodeGen/Alpha/cmpbge.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=alpha | grep cmpbge | count 2
+
+define i1 @test1(i64 %A, i64 %B) {
+        %C = and i64 %A, 255            ; <i64> [#uses=1]
+        %D = and i64 %B, 255            ; <i64> [#uses=1]
+        %E = icmp uge i64 %C, %D                ; <i1> [#uses=1]
+        ret i1 %E
+}
+
+define i1 @test2(i64 %a, i64 %B) {
+        %A = shl i64 %a, 1              ; <i64> [#uses=1]
+        %C = and i64 %A, 254            ; <i64> [#uses=1]
+        %D = and i64 %B, 255            ; <i64> [#uses=1]
+        %E = icmp uge i64 %C, %D                ; <i1> [#uses=1]
+        ret i1 %E
+}
diff --git a/final/test/CodeGen/Alpha/ctlz.ll b/final/test/CodeGen/Alpha/ctlz.ll
new file mode 100644
index 00000000000..aa1588aa39e
--- /dev/null
+++ b/final/test/CodeGen/Alpha/ctlz.ll
@@ -0,0 +1,14 @@
+; Make sure this testcase codegens to the ctlz instruction
+; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctlz
+; RUN: llc < %s -march=alpha -mattr=+CIX | grep -i ctlz
+; RUN: llc < %s -march=alpha -mcpu=ev6 | not grep -i ctlz
+; RUN: llc < %s -march=alpha -mattr=-CIX | not grep -i ctlz
+
+declare i8 @llvm.ctlz.i8(i8)
+
+define i32 @bar(i8 %x) {
+entry:
+	%tmp.1 = call i8 @llvm.ctlz.i8( i8 %x ) 
+	%tmp.2 = sext i8 %tmp.1 to i32
+	ret i32 %tmp.2
+}
diff --git a/final/test/CodeGen/Alpha/ctlz_e.ll b/final/test/CodeGen/Alpha/ctlz_e.ll
new file mode 100644
index 00000000000..230e096b08d
--- /dev/null
+++ b/final/test/CodeGen/Alpha/ctlz_e.ll
@@ -0,0 +1,11 @@
+; Make sure this testcase does not use ctpop
+; RUN: llc < %s -march=alpha | not grep -i ctpop 
+
+declare i64 @llvm.ctlz.i64(i64)
+
+define i64 @bar(i64 %x) {
+entry:
+        %tmp.1 = call i64 @llvm.ctlz.i64( i64 %x )              ; <i64> [#uses=1]
+        ret i64 %tmp.1
+}
+
diff --git a/final/test/CodeGen/Alpha/ctpop.ll b/final/test/CodeGen/Alpha/ctpop.ll
new file mode 100644
index 00000000000..f887882cec2
--- /dev/null
+++ b/final/test/CodeGen/Alpha/ctpop.ll
@@ -0,0 +1,17 @@
+; Make sure this testcase codegens to the ctpop instruction
+; RUN: llc < %s -march=alpha -mcpu=ev67 | grep -i ctpop
+; RUN: llc < %s -march=alpha -mattr=+CIX | \
+; RUN:   grep -i ctpop
+; RUN: llc < %s -march=alpha -mcpu=ev6 | \
+; RUN:   not grep -i ctpop
+; RUN: llc < %s -march=alpha -mattr=-CIX | \
+; RUN:   not grep -i ctpop
+
+declare i64 @llvm.ctpop.i64(i64)
+
+define i64 @bar(i64 %x) {
+entry:
+        %tmp.1 = call i64 @llvm.ctpop.i64( i64 %x )             ; <i64> [#uses=1]
+        ret i64 %tmp.1
+}
+
diff --git a/final/test/CodeGen/Alpha/dg.exp b/final/test/CodeGen/Alpha/dg.exp
new file mode 100644
index 00000000000..fb9f710b295
--- /dev/null
+++ b/final/test/CodeGen/Alpha/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target Alpha] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
+}
diff --git a/final/test/CodeGen/Alpha/eqv.ll b/final/test/CodeGen/Alpha/eqv.ll
new file mode 100644
index 00000000000..b3413d6b5dc
--- /dev/null
+++ b/final/test/CodeGen/Alpha/eqv.ll
@@ -0,0 +1,10 @@
+; Make sure this testcase codegens to the eqv instruction
+; RUN: llc < %s -march=alpha | grep eqv
+
+define i64 @bar(i64 %x, i64 %y) {
+entry:
+        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]
+        %tmp.2 = xor i64 %y, %tmp.1             ; <i64> [#uses=1]
+        ret i64 %tmp.2
+}
+
diff --git a/final/test/CodeGen/Alpha/i32_sub_1.ll b/final/test/CodeGen/Alpha/i32_sub_1.ll
new file mode 100644
index 00000000000..ffeafbd7593
--- /dev/null
+++ b/final/test/CodeGen/Alpha/i32_sub_1.ll
@@ -0,0 +1,9 @@
+; Make sure this testcase codegens to the ctpop instruction
+; RUN: llc < %s -march=alpha | grep -i {subl \$16,1,\$0}
+
+
+define i32 @foo(i32 signext %x) signext {
+entry:
+	%tmp.1 = add i32 %x, -1		; <int> [#uses=1]
+	ret i32 %tmp.1
+}
diff --git a/final/test/CodeGen/Alpha/illegal-element-type.ll b/final/test/CodeGen/Alpha/illegal-element-type.ll
new file mode 100644
index 00000000000..4cf80dee57b
--- /dev/null
+++ b/final/test/CodeGen/Alpha/illegal-element-type.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=alphaev6-unknown-linux-gnu
+
+define void @foo() {
+entry:
+        br label %bb
+
+bb:             ; preds = %bb, %entry
+        br i1 false, label %bb26, label %bb
+
+bb19:           ; preds = %bb26
+        ret void
+
+bb26:           ; preds = %bb
+        br i1 false, label %bb30, label %bb19
+
+bb30:           ; preds = %bb26
+        br label %bb45
+
+bb45:           ; preds = %bb45, %bb30
+        %V.0 = phi <8 x i16> [ %tmp42, %bb45 ], [ zeroinitializer, %bb30 ]     ; <<8 x i16>> [#uses=1]
+        %tmp42 = mul <8 x i16> zeroinitializer, %V.0            ; <<8 x i16>> [#uses=1]
+        br label %bb45
+}
diff --git a/final/test/CodeGen/Alpha/jmp_table.ll b/final/test/CodeGen/Alpha/jmp_table.ll
new file mode 100644
index 00000000000..917c9327dc1
--- /dev/null
+++ b/final/test/CodeGen/Alpha/jmp_table.ll
@@ -0,0 +1,99 @@
+; try to check that we have the most important instructions, which shouldn't 
+; appear otherwise
+; RUN: llc < %s -march=alpha | grep jmp
+; RUN: llc < %s -march=alpha | grep gprel32
+; RUN: llc < %s -march=alpha | grep ldl
+; RUN: llc < %s -march=alpha | grep rodata
+; END.
+
+target datalayout = "e-p:64:64"
+target triple = "alphaev67-unknown-linux-gnu"
+@str = internal constant [2 x i8] c"1\00"               ; <[2 x i8]*> [#uses=1]
+@str1 = internal constant [2 x i8] c"2\00"              ; <[2 x i8]*> [#uses=1]
+@str2 = internal constant [2 x i8] c"3\00"              ; <[2 x i8]*> [#uses=1]
+@str3 = internal constant [2 x i8] c"4\00"              ; <[2 x i8]*> [#uses=1]
+@str4 = internal constant [2 x i8] c"5\00"              ; <[2 x i8]*> [#uses=1]
+@str5 = internal constant [2 x i8] c"6\00"              ; <[2 x i8]*> [#uses=1]
+@str6 = internal constant [2 x i8] c"7\00"              ; <[2 x i8]*> [#uses=1]
+@str7 = internal constant [2 x i8] c"8\00"              ; <[2 x i8]*> [#uses=1]
+
+define i32 @main(i32 %x, i8** %y) {
+entry:
+        %x_addr = alloca i32            ; <i32*> [#uses=2]
+        %y_addr = alloca i8**           ; <i8***> [#uses=1]
+        %retval = alloca i32, align 4           ; <i32*> [#uses=2]
+        %tmp = alloca i32, align 4              ; <i32*> [#uses=2]
+        %foo = alloca i8*, align 8              ; <i8**> [#uses=9]
+        %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+        store i32 %x, i32* %x_addr
+        store i8** %y, i8*** %y_addr
+        %tmp.upgrd.1 = load i32* %x_addr                ; <i32> [#uses=1]
+        switch i32 %tmp.upgrd.1, label %bb15 [
+                 i32 1, label %bb
+                 i32 2, label %bb1
+                 i32 3, label %bb3
+                 i32 4, label %bb5
+                 i32 5, label %bb7
+                 i32 6, label %bb9
+                 i32 7, label %bb11
+                 i32 8, label %bb13
+        ]
+
+bb:             ; preds = %entry
+        %tmp.upgrd.2 = getelementptr [2 x i8]* @str, i32 0, i64 0               ; <i8*> [#uses=1]
+        store i8* %tmp.upgrd.2, i8** %foo
+        br label %bb16
+
+bb1:            ; preds = %entry
+        %tmp2 = getelementptr [2 x i8]* @str1, i32 0, i64 0             ; <i8*> [#uses=1]
+        store i8* %tmp2, i8** %foo
+        br label %bb16
+
+bb3:            ; preds = %entry
+        %tmp4 = getelementptr [2 x i8]* @str2, i32 0, i64 0             ; <i8*> [#uses=1]
+        store i8* %tmp4, i8** %foo
+        br label %bb16
+
+bb5:            ; preds = %entry
+        %tmp6 = getelementptr [2 x i8]* @str3, i32 0, i64 0             ; <i8*> [#uses=1]
+        store i8* %tmp6, i8** %foo
+        br label %bb16
+
+bb7:            ; preds = %entry
+        %tmp8 = getelementptr [2 x i8]* @str4, i32 0, i64 0             ; <i8*> [#uses=1]
+        store i8* %tmp8, i8** %foo
+        br label %bb16
+
+bb9:            ; preds = %entry
+        %tmp10 = getelementptr [2 x i8]* @str5, i32 0, i64 0            ; <i8*> [#uses=1]
+        store i8* %tmp10, i8** %foo
+        br label %bb16
+
+bb11:           ; preds = %entry
+        %tmp12 = getelementptr [2 x i8]* @str6, i32 0, i64 0            ; <i8*> [#uses=1]
+        store i8* %tmp12, i8** %foo
+        br label %bb16
+
+bb13:           ; preds = %entry
+        %tmp14 = getelementptr [2 x i8]* @str7, i32 0, i64 0            ; <i8*> [#uses=1]
+        store i8* %tmp14, i8** %foo
+        br label %bb16
+
+bb15:           ; preds = %entry
+        br label %bb16
+
+bb16:           ; preds = %bb15, %bb13, %bb11, %bb9, %bb7, %bb5, %bb3, %bb1, %bb
+        %tmp17 = load i8** %foo         ; <i8*> [#uses=1]
+        %tmp18 = call i32 (...)* @print( i8* %tmp17 )           ; <i32> [#uses=0]
+        store i32 0, i32* %tmp
+        %tmp19 = load i32* %tmp         ; <i32> [#uses=1]
+        store i32 %tmp19, i32* %retval
+        br label %return
+
+return:         ; preds = %bb16
+        %retval.upgrd.3 = load i32* %retval             ; <i32> [#uses=1]
+        ret i32 %retval.upgrd.3
+}
+
+declare i32 @print(...)
+
diff --git a/final/test/CodeGen/Alpha/mb.ll b/final/test/CodeGen/Alpha/mb.ll
new file mode 100644
index 00000000000..93e8b1b0446
--- /dev/null
+++ b/final/test/CodeGen/Alpha/mb.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=alpha | grep mb
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+	call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 true)
+	ret void
+}
diff --git a/final/test/CodeGen/Alpha/mul128.ll b/final/test/CodeGen/Alpha/mul128.ll
new file mode 100644
index 00000000000..daf8409409d
--- /dev/null
+++ b/final/test/CodeGen/Alpha/mul128.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=alpha
+
+define i128 @__mulvdi3(i128 %a, i128 %b) nounwind {
+entry:
+        %r = mul i128 %a, %b
+        ret i128 %r
+}
diff --git a/final/test/CodeGen/Alpha/mul5.ll b/final/test/CodeGen/Alpha/mul5.ll
new file mode 100644
index 00000000000..4075dd6289e
--- /dev/null
+++ b/final/test/CodeGen/Alpha/mul5.ll
@@ -0,0 +1,33 @@
+; Make sure this testcase does not use mulq
+; RUN: llc < %s -march=alpha | not grep -i mul
+
+define i64 @foo1(i64 %x) {
+entry:
+        %tmp.1 = mul i64 %x, 9          ; <i64> [#uses=1]
+        ret i64 %tmp.1
+}
+
+define i64 @foo3(i64 %x) {
+entry:
+        %tmp.1 = mul i64 %x, 259                ; <i64> [#uses=1]
+        ret i64 %tmp.1
+}
+
+define i64 @foo4l(i64 %x) {
+entry:
+        %tmp.1 = mul i64 %x, 260                ; <i64> [#uses=1]
+        ret i64 %tmp.1
+}
+
+define i64 @foo8l(i64 %x) {
+entry:
+        %tmp.1 = mul i64 %x, 768                ; <i64> [#uses=1]
+        ret i64 %tmp.1
+}
+
+define i64 @bar(i64 %x) {
+entry:
+        %tmp.1 = mul i64 %x, 5          ; <i64> [#uses=1]
+        ret i64 %tmp.1
+}
+
diff --git a/final/test/CodeGen/Alpha/neg1.ll b/final/test/CodeGen/Alpha/neg1.ll
new file mode 100644
index 00000000000..0db767f68e5
--- /dev/null
+++ b/final/test/CodeGen/Alpha/neg1.ll
@@ -0,0 +1,7 @@
+; Make sure this testcase codegens to the lda -1 instruction
+; RUN: llc < %s -march=alpha | grep {\\-1}
+
+define i64 @bar() {
+entry:
+	ret i64 -1
+}
diff --git a/final/test/CodeGen/Alpha/not.ll b/final/test/CodeGen/Alpha/not.ll
new file mode 100644
index 00000000000..4f0a5c2946e
--- /dev/null
+++ b/final/test/CodeGen/Alpha/not.ll
@@ -0,0 +1,8 @@
+; Make sure this testcase codegens to the ornot instruction
+; RUN: llc < %s -march=alpha | grep eqv
+
+define i64 @bar(i64 %x) {
+entry:
+        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]
+        ret i64 %tmp.1
+}
diff --git a/final/test/CodeGen/Alpha/ornot.ll b/final/test/CodeGen/Alpha/ornot.ll
new file mode 100644
index 00000000000..f930e345ce4
--- /dev/null
+++ b/final/test/CodeGen/Alpha/ornot.ll
@@ -0,0 +1,10 @@
+; Make sure this testcase codegens to the ornot instruction
+; RUN: llc < %s -march=alpha | grep ornot
+
+define i64 @bar(i64 %x, i64 %y) {
+entry:
+        %tmp.1 = xor i64 %x, -1         ; <i64> [#uses=1]
+        %tmp.2 = or i64 %y, %tmp.1              ; <i64> [#uses=1]
+        ret i64 %tmp.2
+}
+
diff --git a/final/test/CodeGen/Alpha/private.ll b/final/test/CodeGen/Alpha/private.ll
new file mode 100644
index 00000000000..26076e0f8d3
--- /dev/null
+++ b/final/test/CodeGen/Alpha/private.ll
@@ -0,0 +1,21 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s -march=alpha > %t
+; RUN: grep \\\$foo: %t
+; RUN: grep bsr.*\\\$\\\$foo %t
+; RUN: grep \\\$baz: %t
+; RUN: grep ldah.*\\\$baz %t
+
+declare void @foo()
+
+define private void @foo() {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}
diff --git a/final/test/CodeGen/Alpha/rpcc.ll b/final/test/CodeGen/Alpha/rpcc.ll
new file mode 100644
index 00000000000..d6665b5d8d6
--- /dev/null
+++ b/final/test/CodeGen/Alpha/rpcc.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=alpha | grep rpcc
+
+declare i64 @llvm.readcyclecounter()
+
+define i64 @foo() {
+entry:
+        %tmp.1 = call i64 @llvm.readcyclecounter( )             ; <i64> [#uses=1]
+        ret i64 %tmp.1
+}
diff --git a/final/test/CodeGen/Alpha/srl_and.ll b/final/test/CodeGen/Alpha/srl_and.ll
new file mode 100644
index 00000000000..3042ef3d023
--- /dev/null
+++ b/final/test/CodeGen/Alpha/srl_and.ll
@@ -0,0 +1,10 @@
+; Make sure this testcase codegens to the zapnot instruction
+; RUN: llc < %s -march=alpha | grep zapnot
+
+define i64 @foo(i64 %y) {
+entry:
+        %tmp = lshr i64 %y, 3           ; <i64> [#uses=1]
+        %tmp2 = and i64 %tmp, 8191              ; <i64> [#uses=1]
+        ret i64 %tmp2
+}
+
diff --git a/final/test/CodeGen/Alpha/sub128.ll b/final/test/CodeGen/Alpha/sub128.ll
new file mode 100644
index 00000000000..d26404bfe02
--- /dev/null
+++ b/final/test/CodeGen/Alpha/sub128.ll
@@ -0,0 +1,9 @@
+;test for SUBC and SUBE expansion
+;
+; RUN: llc < %s -march=alpha
+
+define i128 @sub128(i128 %x, i128 %y) {
+entry:
+	%tmp = sub i128 %y, %x
+	ret i128 %tmp
+}
diff --git a/final/test/CodeGen/Alpha/weak.ll b/final/test/CodeGen/Alpha/weak.ll
new file mode 100644
index 00000000000..ff04de9ef46
--- /dev/null
+++ b/final/test/CodeGen/Alpha/weak.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=alpha | grep .weak.*f
+; RUN: llc < %s -march=alpha | grep .weak.*h
+
+define weak i32 @f() {
+entry:
+        unreachable
+}
+
+define void @g() {
+entry:
+        tail call void @h( )
+        ret void
+}
+
+declare extern_weak void @h()
+
diff --git a/final/test/CodeGen/Alpha/wmb.ll b/final/test/CodeGen/Alpha/wmb.ll
new file mode 100644
index 00000000000..a3e2ccf5725
--- /dev/null
+++ b/final/test/CodeGen/Alpha/wmb.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=alpha | grep wmb
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+	call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true , i1 true)
+	ret void
+}
diff --git a/final/test/CodeGen/Alpha/zapnot.ll b/final/test/CodeGen/Alpha/zapnot.ll
new file mode 100644
index 00000000000..d00984acf7f
--- /dev/null
+++ b/final/test/CodeGen/Alpha/zapnot.ll
@@ -0,0 +1,9 @@
+; Make sure this testcase codegens to the bic instruction
+; RUN: llc < %s -march=alpha | grep zapnot
+
+
+define i16 @foo(i64 %y) zeroext {
+entry:
+        %tmp.1 = trunc i64 %y to i16         ; <ushort> [#uses=1]
+        ret i16 %tmp.1
+}
diff --git a/final/test/CodeGen/Alpha/zapnot2.ll b/final/test/CodeGen/Alpha/zapnot2.ll
new file mode 100644
index 00000000000..cd3caae41d5
--- /dev/null
+++ b/final/test/CodeGen/Alpha/zapnot2.ll
@@ -0,0 +1,9 @@
+; Make sure this testcase codegens to the zapnot instruction
+; RUN: llc < %s -march=alpha | grep zapnot
+
+define i64 @bar(i64 %x) {
+entry:
+        %tmp.1 = and i64 %x, 16711935           ; <i64> [#uses=1]
+        ret i64 %tmp.1
+}
+
diff --git a/final/test/CodeGen/Alpha/zapnot3.ll b/final/test/CodeGen/Alpha/zapnot3.ll
new file mode 100644
index 00000000000..f02961f1eae
--- /dev/null
+++ b/final/test/CodeGen/Alpha/zapnot3.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=alpha | grep zapnot
+
+;demanded bits mess up this mask in a hard to fix way
+;define i64 @foo(i64 %y) {
+;        %tmp = and i64 %y,  65535
+;        %tmp2 = shr i64 %tmp,  i8 3
+;        ret i64 %tmp2
+;}
+
+define i64 @foo2(i64 %y) {
+        %tmp = lshr i64 %y, 3           ; <i64> [#uses=1]
+        %tmp2 = and i64 %tmp, 8191              ; <i64> [#uses=1]
+        ret i64 %tmp2
+}
+
diff --git a/final/test/CodeGen/Alpha/zapnot4.ll b/final/test/CodeGen/Alpha/zapnot4.ll
new file mode 100644
index 00000000000..89beeef2d81
--- /dev/null
+++ b/final/test/CodeGen/Alpha/zapnot4.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=alpha | grep zapnot
+
+define i64 @foo(i64 %y) {
+        %tmp = shl i64 %y, 3            ; <i64> [#uses=1]
+        %tmp2 = and i64 %tmp, 65535             ; <i64> [#uses=1]
+        ret i64 %tmp2
+}
diff --git a/final/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll b/final/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
new file mode 100644
index 00000000000..3ee5e8df997
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs
+
+; Provoke an error in LowerSubregsPass::LowerExtract where the live range of a
+; super-register is illegally extended.
+
+define i16 @f(i16 %x1, i16 %x2, i16 %x3, i16 %x4) {
+  %y1 = add i16 %x1, 1
+  %y2 = add i16 %x2, 2
+  %y3 = add i16 %x3, 3
+  %y4 = add i16 %x4, 4
+  %z12 = add i16 %y1, %y2
+  %z34 = add i16 %y3, %y4
+  %p = add i16 %z12, %z34
+  ret i16 %p
+}
diff --git a/final/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll b/final/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll
new file mode 100644
index 00000000000..e5d1637a50c
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/2009-08-11-RegScavenger-CSR.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+declare i64 @llvm.cttz.i64(i64) nounwind readnone
+
+declare i16 @llvm.cttz.i16(i16) nounwind readnone
+
+declare i8 @llvm.cttz.i8(i8) nounwind readnone
+
+define void @cttztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
+	%a = call i8 @llvm.cttz.i8(i8 %A)		; <i8> [#uses=1]
+	%b = call i16 @llvm.cttz.i16(i16 %B)		; <i16> [#uses=1]
+	%d = call i64 @llvm.cttz.i64(i64 %D)		; <i64> [#uses=1]
+	store i8 %a, i8* %AP
+	store i16 %b, i16* %BP
+	store i64 %d, i64* %DP
+	ret void
+}
diff --git a/final/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll b/final/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll
new file mode 100644
index 00000000000..0b731dccd19
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/2009-08-15-LiveIn-SubReg.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; When joining live intervals of sub-registers, an MBB live-in list is not
+; updated properly. The register scavenger asserts on an undefined register.
+
+define i32 @foo(i8 %bar) {
+entry:
+  switch i8 %bar, label %bb1203 [
+    i8 117, label %bb1204
+    i8 85, label %bb1204
+    i8 106, label %bb1204
+  ]
+
+bb1203:                                           ; preds = %entry
+  ret i32 1
+
+bb1204:                                           ; preds = %entry, %entry, %entry
+  ret i32 2
+}
diff --git a/final/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll b/final/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll
new file mode 100644
index 00000000000..dcc3ea0dec8
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/2009-08-15-MissingDead.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; LocalRewriter can forget to transfer a <def,dead> flag when setting up call
+; argument registers. This then causes register scavenger asserts.
+
+declare i32 @printf(i8*, i32, float)
+
+define i32 @testissue(i32 %i, float %x, float %y) {
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %0
+  %x2 = fmul float %x, 5.000000e-01               ; <float> [#uses=1]
+  %y2 = fmul float %y, 0x3FECCCCCC0000000         ; <float> [#uses=1]
+  %z2 = fadd float %x2, %y2                       ; <float> [#uses=1]
+  %z3 = fadd float undef, %z2                     ; <float> [#uses=1]
+  %i1 = shl i32 %i, 3                             ; <i32> [#uses=1]
+  %j1 = add i32 %i, 7                             ; <i32> [#uses=1]
+  %m1 = add i32 %i1, %j1                          ; <i32> [#uses=2]
+  %b = icmp sle i32 %m1, 6                        ; <i1> [#uses=1]
+  br i1 %b, label %bb1, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %1 = call i32 @printf(i8* undef, i32 %m1, float %z3); <i32> [#uses=0]
+  ret i32 0
+}
diff --git a/final/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll b/final/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
new file mode 100644
index 00000000000..b6cd2d40d1a
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; An undef argument causes a setugt node to escape instruction selection.
+
+define void @bugt() {
+cond_next305:
+  %tmp306307 = trunc i32 undef to i8              ; <i8> [#uses=1]
+  %tmp308 = icmp ugt i8 %tmp306307, 6             ; <i1> [#uses=1]
+  br i1 %tmp308, label %bb311, label %bb314
+
+bb311:                                            ; preds = %cond_next305
+  unreachable
+
+bb314:                                            ; preds = %cond_next305
+  ret void
+}
diff --git a/final/test/CodeGen/Blackfin/add-overflow.ll b/final/test/CodeGen/Blackfin/add-overflow.ll
new file mode 100644
index 00000000000..e982e437d68
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/add-overflow.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+	type { i24, i1 }		; type %0
+
+define i1 @func2(i24 zeroext %v1, i24 zeroext %v2) nounwind {
+entry:
+	%t = call %0 @llvm.uadd.with.overflow.i24(i24 %v1, i24 %v2)		; <%0> [#uses=1]
+	%obit = extractvalue %0 %t, 1		; <i1> [#uses=1]
+	br i1 %obit, label %carry, label %normal
+
+normal:		; preds = %entry
+	ret i1 true
+
+carry:		; preds = %entry
+	ret i1 false
+}
+
+declare %0 @llvm.uadd.with.overflow.i24(i24, i24) nounwind
diff --git a/final/test/CodeGen/Blackfin/add.ll b/final/test/CodeGen/Blackfin/add.ll
new file mode 100644
index 00000000000..3311c03199e
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/add.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+define i32 @add(i32 %A, i32 %B) {
+	%R = add i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
diff --git a/final/test/CodeGen/Blackfin/addsub-i128.ll b/final/test/CodeGen/Blackfin/addsub-i128.ll
new file mode 100644
index 00000000000..dd5610120b4
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/addsub-i128.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; These functions have just the right size to annoy the register scavenger: They
+; use all the scratch registers, but not all the callee-saved registers.
+
+define void @test_add(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
+	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
+	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
+	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
+	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
+	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
+	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
+	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
+	%tmp15 = add i128 %tmp12, %tmp5		; <i128> [#uses=2]
+	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
+	store i64 %tmp1617, i64* %RL
+	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
+	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
+	store i64 %tmp2122, i64* %RH
+	ret void
+}
+
+define void @test_sub(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
+	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
+	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
+	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
+	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
+	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
+	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
+	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
+	%tmp15 = sub i128 %tmp5, %tmp12		; <i128> [#uses=2]
+	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
+	store i64 %tmp1617, i64* %RL
+	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
+	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
+	store i64 %tmp2122, i64* %RH
+	ret void
+}
diff --git a/final/test/CodeGen/Blackfin/basic-i1.ll b/final/test/CodeGen/Blackfin/basic-i1.ll
new file mode 100644
index 00000000000..c63adaba06c
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/basic-i1.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin > %t
+
+define i1 @add(i1 %A, i1 %B) {
+	%R = add i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @sub(i1 %A, i1 %B) {
+	%R = sub i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @mul(i1 %A, i1 %B) {
+	%R = mul i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @sdiv(i1 %A, i1 %B) {
+	%R = sdiv i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @udiv(i1 %A, i1 %B) {
+	%R = udiv i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @srem(i1 %A, i1 %B) {
+	%R = srem i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @urem(i1 %A, i1 %B) {
+	%R = urem i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @and(i1 %A, i1 %B) {
+	%R = and i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @or(i1 %A, i1 %B) {
+	%R = or i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
+
+define i1 @xor(i1 %A, i1 %B) {
+	%R = xor i1 %A, %B		; <i1> [#uses=1]
+	ret i1 %R
+}
diff --git a/final/test/CodeGen/Blackfin/basic-i16.ll b/final/test/CodeGen/Blackfin/basic-i16.ll
new file mode 100644
index 00000000000..541e9a8dc94
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/basic-i16.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=bfin
+
+define i16 @add(i16 %A, i16 %B) {
+	%R = add i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @sub(i16 %A, i16 %B) {
+	%R = sub i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @mul(i16 %A, i16 %B) {
+	%R = mul i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @sdiv(i16 %A, i16 %B) {
+	%R = sdiv i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @udiv(i16 %A, i16 %B) {
+	%R = udiv i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @srem(i16 %A, i16 %B) {
+	%R = srem i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @urem(i16 %A, i16 %B) {
+	%R = urem i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
diff --git a/final/test/CodeGen/Blackfin/basic-i32.ll b/final/test/CodeGen/Blackfin/basic-i32.ll
new file mode 100644
index 00000000000..4b5dbfcb957
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/basic-i32.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define i32 @add(i32 %A, i32 %B) {
+	%R = add i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @sub(i32 %A, i32 %B) {
+	%R = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @mul(i32 %A, i32 %B) {
+	%R = mul i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @sdiv(i32 %A, i32 %B) {
+	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @udiv(i32 %A, i32 %B) {
+	%R = udiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @srem(i32 %A, i32 %B) {
+	%R = srem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @urem(i32 %A, i32 %B) {
+	%R = urem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @and(i32 %A, i32 %B) {
+	%R = and i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @or(i32 %A, i32 %B) {
+	%R = or i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @xor(i32 %A, i32 %B) {
+	%R = xor i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
diff --git a/final/test/CodeGen/Blackfin/basic-i64.ll b/final/test/CodeGen/Blackfin/basic-i64.ll
new file mode 100644
index 00000000000..d4dd8e2703b
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/basic-i64.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define i64 @add(i64 %A, i64 %B) {
+	%R = add i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @sub(i64 %A, i64 %B) {
+	%R = sub i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @mul(i64 %A, i64 %B) {
+	%R = mul i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @sdiv(i64 %A, i64 %B) {
+	%R = sdiv i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @udiv(i64 %A, i64 %B) {
+	%R = udiv i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @srem(i64 %A, i64 %B) {
+	%R = srem i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @urem(i64 %A, i64 %B) {
+	%R = urem i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @and(i64 %A, i64 %B) {
+	%R = and i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @or(i64 %A, i64 %B) {
+	%R = or i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
+
+define i64 @xor(i64 %A, i64 %B) {
+	%R = xor i64 %A, %B		; <i64> [#uses=1]
+	ret i64 %R
+}
diff --git a/final/test/CodeGen/Blackfin/basic-i8.ll b/final/test/CodeGen/Blackfin/basic-i8.ll
new file mode 100644
index 00000000000..2c7ce9d1015
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/basic-i8.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=bfin
+
+define i8 @add(i8 %A, i8 %B) {
+	%R = add i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @sub(i8 %A, i8 %B) {
+	%R = sub i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @mul(i8 %A, i8 %B) {
+	%R = mul i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @sdiv(i8 %A, i8 %B) {
+	%R = sdiv i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @udiv(i8 %A, i8 %B) {
+	%R = udiv i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @srem(i8 %A, i8 %B) {
+	%R = srem i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @urem(i8 %A, i8 %B) {
+	%R = urem i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @and(i8 %A, i8 %B) {
+	%R = and i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @or(i8 %A, i8 %B) {
+	%R = or i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
+
+define i8 @xor(i8 %A, i8 %B) {
+	%R = xor i8 %A, %B		; <i8> [#uses=1]
+	ret i8 %R
+}
diff --git a/final/test/CodeGen/Blackfin/basictest.ll b/final/test/CodeGen/Blackfin/basictest.ll
new file mode 100644
index 00000000000..85040df0fde
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/basictest.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define void @void(i32, i32) {
+        add i32 0, 0            ; <i32>:3 [#uses=2]
+        sub i32 0, 4            ; <i32>:4 [#uses=2]
+        br label %5
+
+; <label>:5             ; preds = %5, %2
+        add i32 %0, %1          ; <i32>:6 [#uses=2]
+        sub i32 %6, %4          ; <i32>:7 [#uses=1]
+        icmp sle i32 %7, %3             ; <i1>:8 [#uses=1]
+        br i1 %8, label %9, label %5
+
+; <label>:9             ; preds = %5
+        add i32 %0, %1          ; <i32>:10 [#uses=0]
+        sub i32 %6, %4          ; <i32>:11 [#uses=1]
+        icmp sle i32 %11, %3            ; <i1>:12 [#uses=0]
+        ret void
+}
diff --git a/final/test/CodeGen/Blackfin/burg.ll b/final/test/CodeGen/Blackfin/burg.ll
new file mode 100644
index 00000000000..8cc3713b7e7
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/burg.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+	%IntList = type %struct.intlist*
+	%ReadFn = type i32 ()*
+	%YYSTYPE = type { %IntList }
+	%struct.intlist = type { i32, %IntList }
+@yyval = external global %YYSTYPE		; <%YYSTYPE*> [#uses=1]
+
+define i32 @yyparse() {
+bb0:
+	%reg254 = load i16* null		; <i16> [#uses=1]
+	%reg254-idxcast = sext i16 %reg254 to i64		; <i64> [#uses=1]
+	%reg254-idxcast-scale = mul i64 %reg254-idxcast, -1		; <i64> [#uses=1]
+	%reg254-idxcast-scale-offset = add i64 %reg254-idxcast-scale, 1		; <i64> [#uses=1]
+	%reg261.idx1 = getelementptr %YYSTYPE* null, i64 %reg254-idxcast-scale-offset, i32 0		; <%IntList*> [#uses=1]
+	%reg261 = load %IntList* %reg261.idx1		; <%IntList> [#uses=1]
+	store %IntList %reg261, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	unreachable
+}
diff --git a/final/test/CodeGen/Blackfin/cmp-small-imm.ll b/final/test/CodeGen/Blackfin/cmp-small-imm.ll
new file mode 100644
index 00000000000..e1732a8f806
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/cmp-small-imm.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=bfin > %t
+
+define i1 @cmp3(i32 %A) {
+	%R = icmp uge i32 %A, 2
+	ret i1 %R
+}
diff --git a/final/test/CodeGen/Blackfin/cmp64.ll b/final/test/CodeGen/Blackfin/cmp64.ll
new file mode 100644
index 00000000000..6c4f9c5bd7f
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/cmp64.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin
+
+; This test tries to use a JustCC register as a data operand for MOVEcc.  It
+; copies (JustCC -> DP), failing because JustCC can only be copied to D.
+; The proper solution would be to restrict the virtual register to D only.
+
+define i32 @main() {
+entry:
+	br label %loopentry
+
+loopentry:
+	%done = icmp sle i64 undef, 5
+	br i1 %done, label %loopentry, label %exit.1
+
+exit.1:
+	ret i32 0
+}
diff --git a/final/test/CodeGen/Blackfin/ct32.ll b/final/test/CodeGen/Blackfin/ct32.ll
new file mode 100644
index 00000000000..363286d4b2f
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/ct32.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=bfin
+
+declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.cttz.i32(i32)
+declare i32 @llvm.ctpop.i32(i32)
+
+define i32 @ctlztest(i32 %B) {
+	%b = call i32 @llvm.ctlz.i32( i32 %B )
+	ret i32 %b
+}
+
+define i32 @cttztest(i32 %B) {
+	%b = call i32 @llvm.cttz.i32( i32 %B )
+	ret i32 %b
+}
+
+define i32 @ctpoptest(i32 %B) {
+	%b = call i32 @llvm.ctpop.i32( i32 %B )
+	ret i32 %b
+}
diff --git a/final/test/CodeGen/Blackfin/ct64.ll b/final/test/CodeGen/Blackfin/ct64.ll
new file mode 100644
index 00000000000..75024343ea4
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/ct64.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=bfin
+
+declare i64 @llvm.ctlz.i64(i64)
+declare i64 @llvm.cttz.i64(i64)
+declare i64 @llvm.ctpop.i64(i64)
+
+define i64 @ctlztest(i64 %B) {
+	%b = call i64 @llvm.ctlz.i64( i64 %B )
+	ret i64 %b
+}
+
+define i64 @cttztest(i64 %B) {
+	%b = call i64 @llvm.cttz.i64( i64 %B )
+	ret i64 %b
+}
+
+define i64 @ctpoptest(i64 %B) {
+	%b = call i64 @llvm.ctpop.i64( i64 %B )
+	ret i64 %b
+}
diff --git a/final/test/CodeGen/Blackfin/ctlz16.ll b/final/test/CodeGen/Blackfin/ctlz16.ll
new file mode 100644
index 00000000000..eb4af232cfe
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/ctlz16.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin
+
+declare i16 @llvm.ctlz.i16(i16)
+
+define i16 @ctlztest(i16 %B) {
+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+define i16 @ctlztest_z(i16 zeroext %B) {
+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+
+define i16 @ctlztest_s(i16 signext %B) {
+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+
diff --git a/final/test/CodeGen/Blackfin/ctlz64.ll b/final/test/CodeGen/Blackfin/ctlz64.ll
new file mode 100644
index 00000000000..3e22f884355
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/ctlz64.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+@.str = external constant [14 x i8]		; <[14 x i8]*> [#uses=1]
+
+define i32 @main(i64 %arg) nounwind {
+entry:
+	%tmp47 = tail call i64 @llvm.cttz.i64(i64 %arg)		; <i64> [#uses=1]
+	%tmp48 = trunc i64 %tmp47 to i32		; <i32> [#uses=1]
+	%tmp40 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr ([14 x i8]* @.str, i32 0, i32 0), i64 %arg, i32 0, i32 %tmp48, i32 0) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @printf(i8* noalias, ...) nounwind
+
+declare i64 @llvm.cttz.i64(i64) nounwind readnone
diff --git a/final/test/CodeGen/Blackfin/ctpop16.ll b/final/test/CodeGen/Blackfin/ctpop16.ll
new file mode 100644
index 00000000000..8b6c07ef28a
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/ctpop16.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin
+
+declare i16 @llvm.ctpop.i16(i16)
+
+define i16 @ctpoptest(i16 %B) {
+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+define i16 @ctpoptest_z(i16 zeroext %B) {
+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+
+define i16 @ctpoptest_s(i16 signext %B) {
+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+
diff --git a/final/test/CodeGen/Blackfin/cttz16.ll b/final/test/CodeGen/Blackfin/cttz16.ll
new file mode 100644
index 00000000000..510882ad41f
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/cttz16.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin
+
+declare i16 @llvm.cttz.i16(i16)
+
+define i16 @cttztest(i16 %B) {
+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+define i16 @cttztest_z(i16 zeroext %B) {
+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+
+define i16 @cttztest_s(i16 signext %B) {
+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
+	ret i16 %b
+}
+
diff --git a/final/test/CodeGen/Blackfin/cycles.ll b/final/test/CodeGen/Blackfin/cycles.ll
new file mode 100644
index 00000000000..6451c747bd7
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/cycles.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=bfin | FileCheck %s
+
+declare i64 @llvm.readcyclecounter()
+
+; CHECK: cycles
+; CHECK: cycles2
+define i64 @cyc64() {
+	%tmp.1 = call i64 @llvm.readcyclecounter()
+	ret i64 %tmp.1
+}
+
+; CHECK: cycles
+define i32@cyc32() {
+	%tmp.1 = call i64 @llvm.readcyclecounter()
+        %s = trunc i64 %tmp.1 to i32
+	ret i32 %s
+}
diff --git a/final/test/CodeGen/Blackfin/dg.exp b/final/test/CodeGen/Blackfin/dg.exp
new file mode 100644
index 00000000000..5fdbe5feb08
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target Blackfin] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/Blackfin/double-cast.ll b/final/test/CodeGen/Blackfin/double-cast.ll
new file mode 100644
index 00000000000..815ca797d75
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/double-cast.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=bfin
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%1 = call i32 (i8*, ...)* @printf(i8* undef, double undef)
+	ret i32 0
+}
diff --git a/final/test/CodeGen/Blackfin/frameindex.ll b/final/test/CodeGen/Blackfin/frameindex.ll
new file mode 100644
index 00000000000..7e677fbf18c
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/frameindex.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+declare i32 @SIM(i8*, i8*, i32, i32, i32, [256 x i32]*, i32, i32, i32)
+
+define void @foo() {
+bb0:
+	%V = alloca [256 x i32], i32 256		; <[256 x i32]*> [#uses=1]
+	%0 = call i32 @SIM(i8* null, i8* null, i32 0, i32 0, i32 0, [256 x i32]* %V, i32 0, i32 0, i32 2)		; <i32> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/Blackfin/i17mem.ll b/final/test/CodeGen/Blackfin/i17mem.ll
new file mode 100644
index 00000000000..bc5ade7416f
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/i17mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i17_l = external global i17		; <i17*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+
+define void @i17_ls() nounwind  {
+	%tmp = load i17* @i17_l		; <i17> [#uses=1]
+	store i17 %tmp, i17* @i17_s
+	ret void
+}
diff --git a/final/test/CodeGen/Blackfin/i1mem.ll b/final/test/CodeGen/Blackfin/i1mem.ll
new file mode 100644
index 00000000000..cb03e3d7fcb
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/i1mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i1_l = external global i1		; <i1*> [#uses=1]
+@i1_s = external global i1		; <i1*> [#uses=1]
+
+define void @i1_ls() nounwind  {
+	%tmp = load i1* @i1_l		; <i1> [#uses=1]
+	store i1 %tmp, i1* @i1_s
+	ret void
+}
diff --git a/final/test/CodeGen/Blackfin/i1ops.ll b/final/test/CodeGen/Blackfin/i1ops.ll
new file mode 100644
index 00000000000..6b5612cc499
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/i1ops.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+define i32 @adj(i32 %d.1, i32 %ct.1) {
+entry:
+	%tmp.22.not = trunc i32 %ct.1 to i1		; <i1> [#uses=1]
+	%tmp.221 = xor i1 %tmp.22.not, true		; <i1> [#uses=1]
+	%tmp.26 = or i1 false, %tmp.221		; <i1> [#uses=1]
+	%tmp.27 = zext i1 %tmp.26 to i32		; <i32> [#uses=1]
+	ret i32 %tmp.27
+}
diff --git a/final/test/CodeGen/Blackfin/i216mem.ll b/final/test/CodeGen/Blackfin/i216mem.ll
new file mode 100644
index 00000000000..9f8cf48e875
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/i216mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i216_l = external global i216		; <i216*> [#uses=1]
+@i216_s = external global i216		; <i216*> [#uses=1]
+
+define void @i216_ls() nounwind  {
+	%tmp = load i216* @i216_l		; <i216> [#uses=1]
+	store i216 %tmp, i216* @i216_s
+	ret void
+}
diff --git a/final/test/CodeGen/Blackfin/i248mem.ll b/final/test/CodeGen/Blackfin/i248mem.ll
new file mode 100644
index 00000000000..db23f541adc
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/i248mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin
+@i248_l = external global i248		; <i248*> [#uses=1]
+@i248_s = external global i248		; <i248*> [#uses=1]
+
+define void @i248_ls() nounwind  {
+	%tmp = load i248* @i248_l		; <i248> [#uses=1]
+	store i248 %tmp, i248* @i248_s
+	ret void
+}
diff --git a/final/test/CodeGen/Blackfin/i256mem.ll b/final/test/CodeGen/Blackfin/i256mem.ll
new file mode 100644
index 00000000000..bc5ade7416f
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/i256mem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i17_l = external global i17		; <i17*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+
+define void @i17_ls() nounwind  {
+	%tmp = load i17* @i17_l		; <i17> [#uses=1]
+	store i17 %tmp, i17* @i17_s
+	ret void
+}
diff --git a/final/test/CodeGen/Blackfin/i256param.ll b/final/test/CodeGen/Blackfin/i256param.ll
new file mode 100644
index 00000000000..df74c9a6e0e
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/i256param.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i256_s = external global i256		; <i256*> [#uses=1]
+
+define void @i256_ls(i256 %x) nounwind  {
+	store i256 %x, i256* @i256_s
+	ret void
+}
diff --git a/final/test/CodeGen/Blackfin/i56param.ll b/final/test/CodeGen/Blackfin/i56param.ll
new file mode 100644
index 00000000000..ca0256391b1
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/i56param.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@i56_l = external global i56		; <i56*> [#uses=1]
+@i56_s = external global i56		; <i56*> [#uses=1]
+
+define void @i56_ls(i56 %x) nounwind  {
+	store i56 %x, i56* @i56_s
+	ret void
+}
diff --git a/final/test/CodeGen/Blackfin/i8mem.ll b/final/test/CodeGen/Blackfin/i8mem.ll
new file mode 100644
index 00000000000..ea3a67e4994
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/i8mem.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin
+
+@i8_l = external global i8		; <i8*> [#uses=1]
+@i8_s = external global i8		; <i8*> [#uses=1]
+
+define void @i8_ls() nounwind  {
+	%tmp = load i8* @i8_l		; <i8> [#uses=1]
+	store i8 %tmp, i8* @i8_s
+	ret void
+}
diff --git a/final/test/CodeGen/Blackfin/inline-asm.ll b/final/test/CodeGen/Blackfin/inline-asm.ll
new file mode 100644
index 00000000000..d623f6bd95a
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/inline-asm.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=bfin | FileCheck %s
+
+; Standard "r"
+; CHECK: r0 = r0 + r1;
+define i32 @add_r(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = $1 + $2;", "=r,r,r"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "d"
+; CHECK: r0 = r0 - r1;
+define i32 @add_d(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = $1 - $2;", "=d,d,d"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "a" for P-regs
+; CHECK: p0 = (p0 + p1) << 1;
+define i32 @add_a(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = ($1 + $2) << 1;", "=a,a,a"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "z" for P0, P1, P2. This is not a real regclass
+; CHECK: p0 = (p0 + p1) << 2;
+define i32 @add_Z(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = ($1 + $2) << 2;", "=z,z,z"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
+; Target "C" for CC. This is a single register
+; CHECK: cc = p0 < p1;
+; CHECK: r0 = cc;
+define i32 @add_C(i32 %A, i32 %B) {
+	%R = call i32 asm "$0 = $1 < $2;", "=C,z,z"( i32 %A, i32 %B ) nounwind
+	ret i32 %R
+}
+
diff --git a/final/test/CodeGen/Blackfin/int-setcc.ll b/final/test/CodeGen/Blackfin/int-setcc.ll
new file mode 100644
index 00000000000..6bd9f86a999
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/int-setcc.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+define fastcc void @Evaluate() {
+entry:
+	br i1 false, label %cond_false186, label %cond_true
+
+cond_true:		; preds = %entry
+	ret void
+
+cond_false186:		; preds = %entry
+	br i1 false, label %cond_true293, label %bb203
+
+bb203:		; preds = %cond_false186
+	ret void
+
+cond_true293:		; preds = %cond_false186
+	br i1 false, label %cond_true298, label %cond_next317
+
+cond_true298:		; preds = %cond_true293
+	br i1 false, label %cond_next518, label %cond_true397.preheader
+
+cond_next317:		; preds = %cond_true293
+	ret void
+
+cond_true397.preheader:		; preds = %cond_true298
+	ret void
+
+cond_next518:		; preds = %cond_true298
+	br i1 false, label %bb1069, label %cond_true522
+
+cond_true522:		; preds = %cond_next518
+	ret void
+
+bb1069:		; preds = %cond_next518
+	br i1 false, label %cond_next1131, label %bb1096
+
+bb1096:		; preds = %bb1069
+	ret void
+
+cond_next1131:		; preds = %bb1069
+	br i1 false, label %cond_next1207, label %cond_true1150
+
+cond_true1150:		; preds = %cond_next1131
+	ret void
+
+cond_next1207:		; preds = %cond_next1131
+	br i1 false, label %cond_next1219, label %cond_true1211
+
+cond_true1211:		; preds = %cond_next1207
+	ret void
+
+cond_next1219:		; preds = %cond_next1207
+	br i1 false, label %cond_true1223, label %cond_next1283
+
+cond_true1223:		; preds = %cond_next1219
+	br i1 false, label %cond_true1254, label %cond_true1264
+
+cond_true1254:		; preds = %cond_true1223
+	br i1 false, label %bb1567, label %cond_true1369.preheader
+
+cond_true1264:		; preds = %cond_true1223
+	ret void
+
+cond_next1283:		; preds = %cond_next1219
+	ret void
+
+cond_true1369.preheader:		; preds = %cond_true1254
+	ret void
+
+bb1567:		; preds = %cond_true1254
+	%tmp1605 = load i8* null		; <i8> [#uses=1]
+	%tmp1606 = icmp eq i8 %tmp1605, 0		; <i1> [#uses=1]
+	br i1 %tmp1606, label %cond_next1637, label %cond_true1607
+
+cond_true1607:		; preds = %bb1567
+	ret void
+
+cond_next1637:		; preds = %bb1567
+	ret void
+}
diff --git a/final/test/CodeGen/Blackfin/invalid-apint.ll b/final/test/CodeGen/Blackfin/invalid-apint.ll
new file mode 100644
index 00000000000..a8c01ba65f8
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/invalid-apint.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=bfin
+
+; Assertion failed: (width < BitWidth && "Invalid APInt Truncate request"),
+; function trunc, file APInt.cpp, line 956.
+
+@str2 = external global [29 x i8]
+
+define void @printArgsNoRet(i32 %a1, float %a2, i8 %a3, double %a4, i8* %a5, i32 %a6, float %a7, i8 %a8, double %a9, i8* %a10, i32 %a11, float %a12, i8 %a13, double %a14, i8* %a15) {
+entry:
+	%tmp17 = sext i8 %a13 to i32
+	%tmp23 = call i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8]* @str2, i32 0, i64 0), i32 %a11, double 0.000000e+00, i32 %tmp17, double %a14, i32 0)
+	ret void
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/final/test/CodeGen/Blackfin/jumptable.ll b/final/test/CodeGen/Blackfin/jumptable.ll
new file mode 100644
index 00000000000..263533c0009
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/jumptable.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
+
+; CHECK: .section .rodata
+; CHECK: JTI0_0:
+; CHECK: .long .BB0_1
+
+define i32 @oper(i32 %op, i32 %A, i32 %B) {
+entry:
+        switch i32 %op, label %bbx [
+               i32 1 , label %bb1
+               i32 2 , label %bb2
+               i32 3 , label %bb3
+               i32 4 , label %bb4
+               i32 5 , label %bb5
+               i32 6 , label %bb6
+               i32 7 , label %bb7
+               i32 8 , label %bb8
+               i32 9 , label %bb9
+               i32 10, label %bb10
+        ]
+bb1:
+	%R1 = add i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R1
+bb2:
+	%R2 = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R2
+bb3:
+	%R3 = mul i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R3
+bb4:
+	%R4 = sdiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R4
+bb5:
+	%R5 = udiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R5
+bb6:
+	%R6 = srem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R6
+bb7:
+	%R7 = urem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R7
+bb8:
+	%R8 = and i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R8
+bb9:
+	%R9 = or i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R9
+bb10:
+	%R10 = xor i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R10
+bbx:
+        ret i32 0
+}
diff --git a/final/test/CodeGen/Blackfin/large-switch.ll b/final/test/CodeGen/Blackfin/large-switch.ll
new file mode 100644
index 00000000000..02d32ef85f1
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/large-switch.ll
@@ -0,0 +1,187 @@
+; RUN: llc < %s -march=bfin
+
+; The switch expansion uses a dynamic shl, and it produces a jumptable
+
+define void @athlon_fp_unit_ready_cost() {
+entry:
+	switch i32 0, label %UnifiedReturnBlock [
+		i32 -1, label %bb2063
+		i32 19, label %bb2035
+		i32 20, label %bb2035
+		i32 21, label %bb2035
+		i32 23, label %bb2035
+		i32 24, label %bb2035
+		i32 27, label %bb2035
+		i32 32, label %bb2035
+		i32 33, label %bb1994
+		i32 35, label %bb2035
+		i32 36, label %bb1994
+		i32 90, label %bb1948
+		i32 94, label %bb1948
+		i32 95, label %bb1948
+		i32 133, label %bb1419
+		i32 135, label %bb1238
+		i32 136, label %bb1238
+		i32 137, label %bb1238
+		i32 138, label %bb1238
+		i32 139, label %bb1201
+		i32 140, label %bb1201
+		i32 141, label %bb1154
+		i32 142, label %bb1126
+		i32 144, label %bb1201
+		i32 145, label %bb1126
+		i32 146, label %bb1201
+		i32 147, label %bb1126
+		i32 148, label %bb1201
+		i32 149, label %bb1126
+		i32 150, label %bb1201
+		i32 151, label %bb1126
+		i32 152, label %bb1096
+		i32 153, label %bb1096
+		i32 154, label %bb1096
+		i32 157, label %bb1096
+		i32 158, label %bb1096
+		i32 159, label %bb1096
+		i32 162, label %bb1096
+		i32 163, label %bb1096
+		i32 164, label %bb1096
+		i32 167, label %bb1201
+		i32 168, label %bb1201
+		i32 170, label %bb1201
+		i32 171, label %bb1201
+		i32 173, label %bb1201
+		i32 174, label %bb1201
+		i32 176, label %bb1201
+		i32 177, label %bb1201
+		i32 179, label %bb993
+		i32 180, label %bb993
+		i32 181, label %bb993
+		i32 182, label %bb993
+		i32 183, label %bb993
+		i32 184, label %bb993
+		i32 365, label %bb1126
+		i32 366, label %bb1126
+		i32 367, label %bb1126
+		i32 368, label %bb1126
+		i32 369, label %bb1126
+		i32 370, label %bb1126
+		i32 371, label %bb1126
+		i32 372, label %bb1126
+		i32 373, label %bb1126
+		i32 384, label %bb1126
+		i32 385, label %bb1126
+		i32 386, label %bb1126
+		i32 387, label %bb1126
+		i32 388, label %bb1126
+		i32 389, label %bb1126
+		i32 390, label %bb1126
+		i32 391, label %bb1126
+		i32 392, label %bb1126
+		i32 525, label %bb919
+		i32 526, label %bb839
+		i32 528, label %bb919
+		i32 529, label %bb839
+		i32 532, label %cond_next6.i97
+		i32 533, label %cond_next6.i81
+		i32 534, label %bb495
+		i32 536, label %cond_next6.i81
+		i32 537, label %cond_next6.i81
+		i32 538, label %bb396
+		i32 539, label %bb288
+		i32 541, label %bb396
+		i32 542, label %bb396
+		i32 543, label %bb396
+		i32 544, label %bb396
+		i32 545, label %bb189
+		i32 546, label %cond_next6.i
+		i32 547, label %bb189
+		i32 548, label %cond_next6.i
+		i32 549, label %bb189
+		i32 550, label %cond_next6.i
+		i32 551, label %bb189
+		i32 552, label %cond_next6.i
+		i32 553, label %bb189
+		i32 554, label %cond_next6.i
+		i32 555, label %bb189
+		i32 556, label %cond_next6.i
+		i32 557, label %bb189
+		i32 558, label %cond_next6.i
+		i32 618, label %bb40
+		i32 619, label %bb18
+		i32 620, label %bb40
+		i32 621, label %bb10
+		i32 622, label %bb10
+	]
+
+bb10:
+	ret void
+
+bb18:
+	ret void
+
+bb40:
+	ret void
+
+cond_next6.i:
+	ret void
+
+bb189:
+	ret void
+
+bb288:
+	ret void
+
+bb396:
+	ret void
+
+bb495:
+	ret void
+
+cond_next6.i81:
+	ret void
+
+cond_next6.i97:
+	ret void
+
+bb839:
+	ret void
+
+bb919:
+	ret void
+
+bb993:
+	ret void
+
+bb1096:
+	ret void
+
+bb1126:
+	ret void
+
+bb1154:
+	ret void
+
+bb1201:
+	ret void
+
+bb1238:
+	ret void
+
+bb1419:
+	ret void
+
+bb1948:
+	ret void
+
+bb1994:
+	ret void
+
+bb2035:
+	ret void
+
+bb2063:
+	ret void
+
+UnifiedReturnBlock:
+	ret void
+}
diff --git a/final/test/CodeGen/Blackfin/load-i16.ll b/final/test/CodeGen/Blackfin/load-i16.ll
new file mode 100644
index 00000000000..eb18d410d08
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/load-i16.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+; This somewhat contrived function heavily exercises register classes
+; It can trick -join-cross-class-copies into making illegal joins
+
+define void @f(i16** nocapture %p) nounwind readonly {
+entry:
+	%tmp1 = load i16** %p		; <i16*> [#uses=1]
+	%tmp2 = load i16* %tmp1		; <i16> [#uses=1]
+	%ptr = getelementptr i16* %tmp1, i16 %tmp2
+    store i16 %tmp2, i16* %ptr
+    ret void
+}
diff --git a/final/test/CodeGen/Blackfin/logic-i16.ll b/final/test/CodeGen/Blackfin/logic-i16.ll
new file mode 100644
index 00000000000..e44672ff420
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/logic-i16.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=bfin
+
+define i16 @and(i16 %A, i16 %B) {
+	%R = and i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @or(i16 %A, i16 %B) {
+	%R = or i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
+
+define i16 @xor(i16 %A, i16 %B) {
+	%R = xor i16 %A, %B		; <i16> [#uses=1]
+	ret i16 %R
+}
diff --git a/final/test/CodeGen/Blackfin/many-args.ll b/final/test/CodeGen/Blackfin/many-args.ll
new file mode 100644
index 00000000000..8c52874e773
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/many-args.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+
+	type { i32, float, float, float, float, float, float, float, float, float, float }		; type %0
+	%struct..s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }
+
+define i32 @main(i32 %argc.1, i8** %argv.1) {
+entry:
+	%tmp.218 = load float* null		; <float> [#uses=1]
+	%tmp.219 = getelementptr %0* null, i64 0, i32 6		; <float*> [#uses=1]
+	%tmp.220 = load float* %tmp.219		; <float> [#uses=1]
+	%tmp.221 = getelementptr %0* null, i64 0, i32 7		; <float*> [#uses=1]
+	%tmp.222 = load float* %tmp.221		; <float> [#uses=1]
+	%tmp.223 = getelementptr %0* null, i64 0, i32 8		; <float*> [#uses=1]
+	%tmp.224 = load float* %tmp.223		; <float> [#uses=1]
+	%tmp.225 = getelementptr %0* null, i64 0, i32 9		; <float*> [#uses=1]
+	%tmp.226 = load float* %tmp.225		; <float> [#uses=1]
+	%tmp.227 = getelementptr %0* null, i64 0, i32 10		; <float*> [#uses=1]
+	%tmp.228 = load float* %tmp.227		; <float> [#uses=1]
+	call void @place_and_route(i32 0, i32 0, float 0.000000e+00, i32 0, i32 0, i8* null, i32 0, i32 0, i8* null, i8* null, i8* null, i8* null, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 0, i32 0, i32 0, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, i32 0, i32 0, i16 0, i16 0, i16 0, float 0.000000e+00, float 0.000000e+00, %struct..s_segment_inf* null, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float %tmp.218, float %tmp.220, float %tmp.222, float %tmp.224, float %tmp.226, float %tmp.228)
+	ret i32 0
+}
+
+declare void @place_and_route(i32, i32, float, i32, i32, i8*, i32, i32, i8*, i8*, i8*, i8*, i32, i32, i32, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, float, float, float, i32, i32, i16, i16, i16, float, float, %struct..s_segment_inf*, i32, float, float, float, float, float, float, float, float, float, float)
diff --git a/final/test/CodeGen/Blackfin/mulhu.ll b/final/test/CodeGen/Blackfin/mulhu.ll
new file mode 100644
index 00000000000..72bacee33eb
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/mulhu.ll
@@ -0,0 +1,106 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
+	%struct.bb_ann_d = type { %struct.tree_node*, i8, %struct.edge_prediction* }
+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+	%struct.cost_pair = type { %struct.iv_cand*, i32, %struct.bitmap_head_def* }
+	%struct.dataflow_d = type { %struct.varray_head_tag*, [2 x %struct.tree_node*] }
+	%struct.def_operand_ptr = type { %struct.tree_node** }
+	%struct.def_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
+	%struct.edge_def_insns = type { %struct.rtx_def* }
+	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.et_node = type opaque
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i1, i1, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
+	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i32, i32, i32, i32, i32, i8* (i32, i32)*, void (i8*)*, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.iv = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i1, i1, i32 }
+	%struct.iv_cand = type { i32, i1, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.iv*, i32 }
+	%struct.iv_use = type { i32, i32, %struct.iv*, %struct.tree_node*, %struct.tree_node**, %struct.bitmap_head_def*, i32, %struct.cost_pair*, %struct.iv_cand* }
+	%struct.ivopts_data = type { %struct.loop*, %struct.htab*, i32, %struct.version_info*, %struct.bitmap_head_def*, i32, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.bitmap_head_def*, i1 }
+	%struct.lang_decl = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.loop = type { i32, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.lpt_decision, i32, i32, %struct.edge_def**, i32, %struct.basic_block_def*, %struct.basic_block_def*, i32, %struct.edge_def**, i32, %struct.edge_def**, i32, %struct.simple_bitmap_def*, i32, %struct.loop**, i32, %struct.loop*, %struct.loop*, %struct.loop*, %struct.loop*, i32, i8*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound*, %struct.edge_def*, i1 }
+	%struct.lpt_decision = type { i32, i32 }
+	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
+	%struct.nb_iter_bound = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound* }
+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.simple_bitmap_def = type { i32, i32, i32, [1 x i64] }
+	%struct.stack_local_entry = type opaque
+	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.dataflow_d*, %struct.bitmap_head_def*, i32 }
+	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.def_optype_d*, %struct.v_may_def_optype_d*, %struct.vuse_optype_d*, %struct.v_may_def_optype_d* }
+	%struct.temp_slot = type opaque
+	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
+	%struct.tree_ann_d = type { %struct.stmt_ann_d }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.u = type { [1 x i64] }
+	%struct.v_def_use_operand_type_t = type { %struct.tree_node*, %struct.tree_node* }
+	%struct.v_may_def_optype_d = type { i32, [1 x %struct.v_def_use_operand_type_t] }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }
+	%struct.version_info = type { %struct.tree_node*, %struct.iv*, i1, i32, i1 }
+	%struct.vuse_optype_d = type { i32, [1 x %struct.tree_node*] }
+
+define i1 @determine_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand) {
+entry:
+	switch i32 0, label %bb91 [
+		i32 0, label %bb
+		i32 1, label %bb6
+		i32 3, label %cond_next135
+	]
+
+bb:		; preds = %entry
+	ret i1 false
+
+bb6:		; preds = %entry
+	br i1 false, label %bb87, label %cond_next27
+
+cond_next27:		; preds = %bb6
+	br i1 false, label %cond_true30, label %cond_next55
+
+cond_true30:		; preds = %cond_next27
+	br i1 false, label %cond_next41, label %cond_true35
+
+cond_true35:		; preds = %cond_true30
+	ret i1 false
+
+cond_next41:		; preds = %cond_true30
+	%tmp44 = call i32 @force_var_cost(%struct.ivopts_data* %data, %struct.tree_node* null, %struct.bitmap_head_def** null)		; <i32> [#uses=1]
+	%tmp46 = udiv i32 %tmp44, 5		; <i32> [#uses=1]
+	call void @set_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand, i32 %tmp46, %struct.bitmap_head_def* null)
+	br label %bb87
+
+cond_next55:		; preds = %cond_next27
+	ret i1 false
+
+bb87:		; preds = %cond_next41, %bb6
+	ret i1 false
+
+bb91:		; preds = %entry
+	ret i1 false
+
+cond_next135:		; preds = %entry
+	ret i1 false
+}
+
+declare void @set_use_iv_cost(%struct.ivopts_data*, %struct.iv_use*, %struct.iv_cand*, i32, %struct.bitmap_head_def*)
+
+declare i32 @force_var_cost(%struct.ivopts_data*, %struct.tree_node*, %struct.bitmap_head_def**)
diff --git a/final/test/CodeGen/Blackfin/printf.ll b/final/test/CodeGen/Blackfin/printf.ll
new file mode 100644
index 00000000000..9e54b73c877
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/printf.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+@.str_1 = external constant [42 x i8]		; <[42 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main(i32 %argc.1, i8** %argv.1) {
+entry:
+	%tmp.16 = call i32 (i8*, ...)* @printf(i8* getelementptr ([42 x i8]* @.str_1, i64 0, i64 0), i32 0, i32 0, i64 0, i64 0)
+	ret i32 0
+}
diff --git a/final/test/CodeGen/Blackfin/printf2.ll b/final/test/CodeGen/Blackfin/printf2.ll
new file mode 100644
index 00000000000..7ac7e8032bb
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/printf2.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=bfin
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%1 = call i32 (i8*, ...)* @printf(i8* undef, i1 undef)
+	ret i32 0
+}
diff --git a/final/test/CodeGen/Blackfin/promote-logic.ll b/final/test/CodeGen/Blackfin/promote-logic.ll
new file mode 100644
index 00000000000..1ac14082907
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/promote-logic.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=bfin 
+
+; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR
+; operation after LegalizeOps.
+
+define void @mng_display_bgr565() {
+entry:
+	br i1 false, label %bb.preheader, label %return
+
+bb.preheader:
+	br i1 false, label %cond_true48, label %cond_next80
+
+cond_true48:
+	%tmp = load i8* null
+	%tmp51 = zext i8 %tmp to i16
+	%tmp99 = load i8* null
+	%tmp54 = bitcast i8 %tmp99 to i8
+	%tmp54.upgrd.1 = zext i8 %tmp54 to i32
+	%tmp55 = lshr i32 %tmp54.upgrd.1, 3
+	%tmp55.upgrd.2 = trunc i32 %tmp55 to i16
+	%tmp52 = shl i16 %tmp51, 5
+	%tmp56 = and i16 %tmp55.upgrd.2, 28
+	%tmp57 = or i16 %tmp56, %tmp52
+	%tmp60 = zext i16 %tmp57 to i32
+	%tmp62 = xor i32 0, 65535
+	%tmp63 = mul i32 %tmp60, %tmp62
+	%tmp65 = add i32 0, %tmp63
+	%tmp69 = add i32 0, %tmp65
+	%tmp70 = lshr i32 %tmp69, 16
+	%tmp70.upgrd.3 = trunc i32 %tmp70 to i16
+	%tmp75 = lshr i16 %tmp70.upgrd.3, 8
+	%tmp75.upgrd.4 = trunc i16 %tmp75 to i8
+	%tmp76 = lshr i8 %tmp75.upgrd.4, 5
+	store i8 %tmp76, i8* null
+	ret void
+
+cond_next80:
+	ret void
+
+return:
+	ret void
+}
diff --git a/final/test/CodeGen/Blackfin/promote-setcc.ll b/final/test/CodeGen/Blackfin/promote-setcc.ll
new file mode 100644
index 00000000000..d344fadbf3d
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/promote-setcc.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=bfin > %t
+
+; The DAG combiner may sometimes create illegal i16 SETCC operations when run
+; after LegalizeOps. Try to tease out all the optimizations in
+; TargetLowering::SimplifySetCC.
+
+@x = external global i16
+@y = external global i16
+
+declare i16 @llvm.ctlz.i16(i16)
+
+; Case (srl (ctlz x), 5) == const
+; Note: ctlz is promoted, so this test does not catch the DAG combiner
+define i1 @srl_ctlz_const() {
+  %x = load i16* @x
+  %c = call i16 @llvm.ctlz.i16(i16 %x)
+  %s = lshr i16 %c, 4
+  %r = icmp eq i16 %s, 1
+  ret i1 %r
+}
+
+; Case (zext x) == const
+define i1 @zext_const() {
+  %x = load i16* @x
+  %r = icmp ugt i16 %x, 1
+  ret i1 %r
+}
+
+; Case (sext x) == const
+define i1 @sext_const() {
+  %x = load i16* @x
+  %y = add i16 %x, 1
+  %x2 = sext i16 %y to i32
+  %r = icmp ne i32 %x2, -1
+  ret i1 %r
+}
+
diff --git a/final/test/CodeGen/Blackfin/sdiv.ll b/final/test/CodeGen/Blackfin/sdiv.ll
new file mode 100644
index 00000000000..1426655ba0b
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/sdiv.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs
+define i32 @sdiv(i32 %A, i32 %B) {
+	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
diff --git a/final/test/CodeGen/Blackfin/simple-select.ll b/final/test/CodeGen/Blackfin/simple-select.ll
new file mode 100644
index 00000000000..0f7f270967a
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/simple-select.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+declare i1 @foo()
+
+define i32 @test(i32* %A, i32* %B) {
+	%a = load i32* %A
+	%b = load i32* %B
+	%cond = call i1 @foo()
+	%c = select i1 %cond, i32 %a, i32 %b
+	ret i32 %c
+}
diff --git a/final/test/CodeGen/Blackfin/switch.ll b/final/test/CodeGen/Blackfin/switch.ll
new file mode 100644
index 00000000000..3680ec6e554
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/switch.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+define i32 @foo(i32 %A, i32 %B, i32 %C) {
+entry:
+	switch i32 %A, label %out [
+		i32 1, label %bb
+		i32 0, label %bb13
+	]
+
+bb:		; preds = %entry
+	ret i32 1
+
+bb13:		; preds = %entry
+	ret i32 1
+
+out:		; preds = %entry
+	ret i32 0
+}
diff --git a/final/test/CodeGen/Blackfin/switch2.ll b/final/test/CodeGen/Blackfin/switch2.ll
new file mode 100644
index 00000000000..7877bce9c37
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/switch2.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
+
+define i8* @FindChar(i8* %CurPtr) {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%tmp = load i8* null		; <i8> [#uses=1]
+	switch i8 %tmp, label %bb [
+		i8 0, label %bb7
+		i8 120, label %bb7
+	]
+
+bb7:		; preds = %bb, %bb
+	ret i8* null
+}
diff --git a/final/test/CodeGen/Blackfin/sync-intr.ll b/final/test/CodeGen/Blackfin/sync-intr.ll
new file mode 100644
index 00000000000..0b103a3bf77
--- /dev/null
+++ b/final/test/CodeGen/Blackfin/sync-intr.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=bfin -verify-machineinstrs | FileCheck %s
+
+define void @f() nounwind {
+entry:
+        ; CHECK-NOT: llvm.bfin
+        ; CHECK: csync;
+        call void @llvm.bfin.csync()
+
+        ; CHECK-NOT: llvm.bfin
+        ; CHECK: ssync;
+        call void @llvm.bfin.ssync()
+	ret void
+}
+
+declare void @llvm.bfin.csync() nounwind
+declare void @llvm.bfin.ssync() nounwind
diff --git a/final/test/CodeGen/CBackend/2002-05-16-NameCollide.ll b/final/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
new file mode 100644
index 00000000000..0b06041f571
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-05-16-NameCollide.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=c
+
+; Make sure that global variables do not collide if they have the same name,
+; but different types.
+
+@X = global i32 5               ; <i32*> [#uses=0]
+@X.upgrd.1 = global i64 7               ; <i64*> [#uses=0]
+
diff --git a/final/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll b/final/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
new file mode 100644
index 00000000000..a9f54e467d7
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-05-21-MissingReturn.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=c
+
+; This case was emitting code that looked like this:
+; ...
+;   llvm_BB1:       /* no statement here */
+; }
+; 
+; Which the Sun C compiler rejected, so now we are sure to put a return 
+; instruction in there if the basic block is otherwise empty.
+;
+define void @test() {
+        br label %BB1
+
+BB2:            ; preds = %BB2
+        br label %BB2
+
+BB1:            ; preds = %0
+        ret void
+}
+
diff --git a/final/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll b/final/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
new file mode 100644
index 00000000000..2afb1a02bba
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-08-19-ConstPointerRef.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=c
+
+; Test const pointer refs & forward references
+
+@t3 = global i32* @t1           ; <i32**> [#uses=0]
+@t1 = global i32 4              ; <i32*> [#uses=1]
+
diff --git a/final/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll b/final/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
new file mode 100644
index 00000000000..b71cf07dbf0
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-08-19-ConstantExpr.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=c
+
+global i32* bitcast (float* @2 to i32*)   ;; Forward numeric reference
+global float* @2                       ;; Duplicate forward numeric reference
+global float 0.0
+
+@array = constant [2 x i32] [ i32 12, i32 52 ]
+@arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)
diff --git a/final/test/CodeGen/CBackend/2002-08-19-DataPointer.ll b/final/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
new file mode 100644
index 00000000000..b5a1f0b28b2
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-08-19-DataPointer.ll
@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=c
+
+@sptr1 = global [11 x i8]* @somestr         ;; Forward ref to a constant
+@somestr = constant [11 x i8] c"hello world"
diff --git a/final/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll b/final/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
new file mode 100644
index 00000000000..10b9fe22847
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-08-19-FunctionPointer.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=c
+
+@fptr = global void ()* @f       ;; Forward ref method defn
+declare void @f()               ;; External method
+
diff --git a/final/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll b/final/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
new file mode 100644
index 00000000000..0827423e1ad
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-08-19-HardConstantExpr.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=c
+
+@array = constant [2 x i32] [ i32 12, i32 52 ]          ; <[2 x i32]*> [#uses=1]
+@arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)         ; <i32**> [#uses=0]
+
diff --git a/final/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll b/final/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll
new file mode 100644
index 00000000000..3b2085c950c
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll
@@ -0,0 +1,3 @@
+; RUN: llc < %s -march=c
+
+@MyIntList = external global { \2*, i32 }
diff --git a/final/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll b/final/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
new file mode 100644
index 00000000000..59aafd55d4c
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-08-20-UnnamedArgument.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=c
+
+; The C Writer bombs on this testcase because it tries the print the prototype
+; for the test function, which tries to print the argument name.  The function
+; has not been incorporated into the slot calculator, so after it does the name
+; lookup, it tries a slot calculator lookup, which fails.
+
+define i32 @test(i32) {
+        ret i32 0
+}
diff --git a/final/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll b/final/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
new file mode 100644
index 00000000000..6c4d62905b1
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-08-26-IndirectCallTest.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=c
+
+; Indirect function call test... found by Joel & Brian
+;
+
+@taskArray = external global i32*               ; <i32**> [#uses=1]
+
+define void @test(i32 %X) {
+        %Y = add i32 %X, -1             ; <i32> [#uses=1]
+        %cast100 = sext i32 %Y to i64           ; <i64> [#uses=1]
+        %gep100 = getelementptr i32** @taskArray, i64 %cast100          ; <i32**> [#uses=1]
+        %fooPtr = load i32** %gep100            ; <i32*> [#uses=1]
+        %cast101 = bitcast i32* %fooPtr to void (i32)*          ; <void (i32)*> [#uses=1]
+        call void %cast101( i32 1000 )
+        ret void
+}
+
diff --git a/final/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll b/final/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
new file mode 100644
index 00000000000..1187a374601
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-08-30-StructureOrderingTest.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=c
+
+; This testcase fails because the C backend does not arrange to output the 
+; contents of a structure type before it outputs the structure type itself.
+
+@Y = external global { { i32 } }                ; <{ { i32 } }*> [#uses=0]
+@X = external global { float }          ; <{ float }*> [#uses=0]
+
diff --git a/final/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll b/final/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
new file mode 100644
index 00000000000..021adb9c887
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-09-20-ArrayTypeFailure.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=c
+
+define void @test() {
+        %X = alloca [4 x i32]           ; <[4 x i32]*> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll b/final/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
new file mode 100644
index 00000000000..e915cd2fb3f
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-09-20-VarArgPrototypes.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=c
+
+
+declare void @foo(...)
+
+
diff --git a/final/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll b/final/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll
new file mode 100644
index 00000000000..2563d8cb51e
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=c
+
+%MPI_Comm = type %struct.Comm*
+%struct.Comm = type opaque
+@thing = global %MPI_Comm* null         ; <%MPI_Comm**> [#uses=0]
+
diff --git a/final/test/CodeGen/CBackend/2002-10-16-External.ll b/final/test/CodeGen/CBackend/2002-10-16-External.ll
new file mode 100644
index 00000000000..2cdd15cf185
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-10-16-External.ll
@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=c
+
+@bob = external global i32              ; <i32*> [#uses=0]
+
diff --git a/final/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll b/final/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll
new file mode 100644
index 00000000000..54e0aa6c0bb
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=c
+
+        %BitField = type i32
+        %tokenptr = type i32*
+
+define void @test() {
+        %pmf1 = alloca %tokenptr (%tokenptr, i8*)*              ; <%tokenptr (%tokenptr, i8*)**> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll b/final/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
new file mode 100644
index 00000000000..82d594fc7e2
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2002-11-06-PrintEscaped.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=c
+
+@testString = internal constant [18 x i8] c"Escaped newline\5Cn\00"             ; <[18 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+        call i32 (i8*, ...)* @printf( i8* getelementptr ([18 x i8]* @testString, i64 0, i64 0) )                ; <i32>:1 [#uses=0]
+        ret i32 0
+}
+
diff --git a/final/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll b/final/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
new file mode 100644
index 00000000000..92d582d7f36
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2003-05-12-IntegerSizeWarning.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=c
+
+; Apparently this constant was unsigned in ISO C 90, but not in C 99.
+
+define i32 @foo() {
+        ret i32 -2147483648
+}
+
diff --git a/final/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll b/final/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
new file mode 100644
index 00000000000..a42dc27a1e7
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2003-05-13-VarArgFunction.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=c
+
+; This testcase breaks the C backend, because gcc doesn't like (...) functions
+; with no arguments at all.
+
+define void @test(i64 %Ptr) {
+        %P = inttoptr i64 %Ptr to void (...)*           ; <void (...)*> [#uses=1]
+        call void (...)* %P( i64 %Ptr )
+        ret void
+}
+
diff --git a/final/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll b/final/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
new file mode 100644
index 00000000000..19c78402292
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2003-05-31-MissingStructName.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=c
+
+; The C backend was dying when there was no typename for a struct type!
+
+declare i32 @test(i32, { [32 x i32] }*)
diff --git a/final/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll b/final/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
new file mode 100644
index 00000000000..048e045b31e
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2003-06-01-NullPointerType.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=c
+
+%X = type { i32, float }
+
+define void @test() {
+        getelementptr %X* null, i64 0, i32 1            ; <float*>:1 [#uses=0]
+        ret void
+}
+
diff --git a/final/test/CodeGen/CBackend/2003-06-11-HexConstant.ll b/final/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
new file mode 100644
index 00000000000..6197b301fd4
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2003-06-11-HexConstant.ll
@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=c
+
+; Make sure hex constant does not continue into a valid hexadecimal letter/number
+@version = global [3 x i8] c"\001\00"
diff --git a/final/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll b/final/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
new file mode 100644
index 00000000000..f6177ea7db3
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2003-06-11-LiteralStringProblem.ll
@@ -0,0 +1,3 @@
+; RUN: llc < %s -march=c
+
+@version = global [3 x i8] c"1\00\00"
diff --git a/final/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll b/final/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
new file mode 100644
index 00000000000..f0b1bbc7f03
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2003-06-28-InvokeSupport.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=c
+
+declare i32 @callee(i32, i32)
+
+define i32 @test(i32 %X) {
+; <label>:0
+        %A = invoke i32 @callee( i32 %X, i32 5 )
+                        to label %Ok unwind label %Threw                ; <i32> [#uses=1]
+
+Ok:             ; preds = %Threw, %0
+        %B = phi i32 [ %A, %0 ], [ -1, %Threw ]         ; <i32> [#uses=1]
+        ret i32 %B
+
+Threw:          ; preds = %0
+        br label %Ok
+}
+
diff --git a/final/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll b/final/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
new file mode 100644
index 00000000000..4bd1da25b35
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2003-06-28-LinkOnceGlobalVars.ll
@@ -0,0 +1,3 @@
+; RUN: llc < %s -march=c | grep common | grep X
+
+@X = linkonce global i32 5
diff --git a/final/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll b/final/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
new file mode 100644
index 00000000000..0fbb3feef13
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2003-10-12-NANGlobalInits.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=c
+
+; This is a non-normal FP value: it's a nan.
+@NAN = global { float } { float 0x7FF8000000000000 }            ; <{ float }*> [#uses=0]
+@NANs = global { float } { float 0x7FFC000000000000 }           ; <{ float }*> [#uses=0]
diff --git a/final/test/CodeGen/CBackend/2003-10-23-UnusedType.ll b/final/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
new file mode 100644
index 00000000000..9195634b0fc
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2003-10-23-UnusedType.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=c
+
+%A = type { i32, i8*, { i32, i32, i32, i32, i32, i32, i32, i32 }*, i16 }
+
+define void @test(%A*) {
+        ret void
+}
+
diff --git a/final/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll b/final/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
new file mode 100644
index 00000000000..b4389ffab18
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2003-10-28-CastToPtrToStruct.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=c
+
+; reduced from DOOM.
+        %union._XEvent = type { i32 }
+@.X_event_9 = global %union._XEvent zeroinitializer             ; <%union._XEvent*> [#uses=1]
+
+define void @I_InitGraphics() {
+shortcirc_next.3:
+        %tmp.319 = load i32* getelementptr ({ i32, i32 }* bitcast (%union._XEvent* @.X_event_9 to { i32, i32 }*), i64 0, i32 1)               ; <i32> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll b/final/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
new file mode 100644
index 00000000000..6a262912404
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2003-11-21-ConstantShiftExpr.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=c
+@y = weak global i8 0           ; <i8*> [#uses=1]
+
+define i32 @testcaseshr() {
+entry:
+        ret i32 lshr (i32 ptrtoint (i8* @y to i32), i32 4)
+}
+
+define i32 @testcaseshl() {
+entry:
+        ret i32 shl (i32 ptrtoint (i8* @y to i32), i32 4)
+}
+
diff --git a/final/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll b/final/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
new file mode 100644
index 00000000000..142fbd84dd8
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2004-02-13-FrameReturnAddress.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=c | grep builtin_return_address
+
+declare i8* @llvm.returnaddress(i32)
+
+declare i8* @llvm.frameaddress(i32)
+
+define i8* @test1() {
+        %X = call i8* @llvm.returnaddress( i32 0 )              ; <i8*> [#uses=1]
+        ret i8* %X
+}
+
+define i8* @test2() {
+        %X = call i8* @llvm.frameaddress( i32 0 )               ; <i8*> [#uses=1]
+        ret i8* %X
+}
+
diff --git a/final/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll b/final/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
new file mode 100644
index 00000000000..d1c6861c58d
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2004-02-15-PreexistingExternals.ll
@@ -0,0 +1,18 @@
+; The intrinsic lowering pass was lowering intrinsics like llvm.memcpy to 
+; explicitly specified prototypes, inserting a new function if the old one
+; didn't exist.  This caused there to be two external memcpy functions in 
+; this testcase for example, which caused the CBE to mangle one, screwing
+; everything up.  :(  Test that this does not happen anymore.
+;
+; RUN: llc < %s -march=c | not grep _memcpy
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare float* @memcpy(i32*, i32, i32)
+
+define i32 @test(i8* %A, i8* %B, i32* %C) {
+        call float* @memcpy( i32* %C, i32 4, i32 17 )           ; <float*>:1 [#uses=0]
+        call void @llvm.memcpy.i32( i8* %A, i8* %B, i32 123, i32 14 )
+        ret i32 7
+}
+
diff --git a/final/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll b/final/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
new file mode 100644
index 00000000000..6fceb086574
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2004-02-26-FPNotPrintableConstants.ll
@@ -0,0 +1,11 @@
+; This is a non-normal FP value
+; RUN: llc < %s -march=c | grep FPConstant | grep static
+
+define float @func() {
+        ret float 0xFFF0000000000000
+}
+
+define double @func2() {
+        ret double 0xFF20000000000000
+}
+
diff --git a/final/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll b/final/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
new file mode 100644
index 00000000000..cf59634e82c
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2004-02-26-LinkOnceFunctions.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=c | grep func1 | grep WEAK
+
+define linkonce i32 @func1() {
+        ret i32 5
+}
+
diff --git a/final/test/CodeGen/CBackend/2004-08-09-va-end-null.ll b/final/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
new file mode 100644
index 00000000000..3ee23d1a909
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2004-08-09-va-end-null.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=c
+
+declare void @llvm.va_end(i8*)
+
+define void @test() {
+        %va.upgrd.1 = bitcast i8* null to i8*           ; <i8*> [#uses=1]
+        call void @llvm.va_end( i8* %va.upgrd.1 )
+        ret void
+}
+
diff --git a/final/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll b/final/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
new file mode 100644
index 00000000000..af8f441c222
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2004-11-13-FunctionPointerCast.ll
@@ -0,0 +1,12 @@
+; The CBE should not emit code that casts the function pointer.  This causes
+; GCC to get testy and insert trap instructions instead of doing the right
+; thing. :(
+; RUN: llc < %s -march=c
+
+declare void @external(i8*)
+
+define i32 @test(i32* %X) {
+        %RV = call i32 bitcast (void (i8*)* @external to i32 (i32*)*)( i32* %X )                ; <i32> [#uses=1]
+        ret i32 %RV
+}
+
diff --git a/final/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll b/final/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
new file mode 100644
index 00000000000..78e9bacd9e7
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2004-12-03-ExternStatics.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=c | not grep extern.*msg
+; PR472
+
+@msg = internal global [6 x i8] c"hello\00"             ; <[6 x i8]*> [#uses=1]
+
+define i8* @foo() {
+entry:
+        ret i8* getelementptr ([6 x i8]* @msg, i32 0, i32 0)
+}
+
diff --git a/final/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll b/final/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
new file mode 100644
index 00000000000..57a9adc7e89
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2004-12-28-LogicalConstantExprs.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=c
+
+define i32 @foo() {
+        ret i32 and (i32 123456, i32 ptrtoint (i32 ()* @foo to i32))
+}
diff --git a/final/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll b/final/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
new file mode 100644
index 00000000000..dd505af4831
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2005-02-14-VolatileOperations.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=c | grep volatile
+
+define void @test(i32* %P) {
+        %X = volatile load i32* %P              ; <i32> [#uses=1]
+        volatile store i32 %X, i32* %P
+        ret void
+}
+
diff --git a/final/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll b/final/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll
new file mode 100644
index 00000000000..1c5f5061df6
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=c
+
+        %JNIEnv = type %struct.JNINa*
+        %struct.JNINa = type { i8*, i8*, i8*, void (%JNIEnv*)* }
+
diff --git a/final/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll b/final/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
new file mode 100644
index 00000000000..808b8f91407
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2005-07-14-NegationToMinusMinus.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=c | not grep -- --65535
+; PR596
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+
+declare void @func(i32)
+
+define void @funcb() {
+entry:
+        %tmp.1 = sub i32 0, -65535              ; <i32> [#uses=1]
+        call void @func( i32 %tmp.1 )
+        br label %return
+
+return:         ; preds = %entry
+        ret void
+}
+
diff --git a/final/test/CodeGen/CBackend/2005-08-23-Fmod.ll b/final/test/CodeGen/CBackend/2005-08-23-Fmod.ll
new file mode 100644
index 00000000000..6e650eb293f
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2005-08-23-Fmod.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=c | grep fmod
+
+define double @test(double %A, double %B) {
+        %C = frem double %A, %B         ; <double> [#uses=1]
+        ret double %C
+}
+
diff --git a/final/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll b/final/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
new file mode 100644
index 00000000000..99de837dc79
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2005-09-27-VolatileFuncPtr.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=c | grep {\\* *volatile *\\*}
+
+@G = external global void ()*           ; <void ()**> [#uses=2]
+
+define void @test() {
+        volatile store void ()* @test, void ()** @G
+        volatile load void ()** @G              ; <void ()*>:1 [#uses=0]
+        ret void
+}
+
diff --git a/final/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll b/final/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
new file mode 100644
index 00000000000..c9df800d72d
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2006-12-11-Float-Bitcast.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=c | \
+; RUN:   grep __BITCAST | count 14
+
+define i32 @test1(float %F) {
+        %X = bitcast float %F to i32            ; <i32> [#uses=1]
+        ret i32 %X
+}
+
+define float @test2(i32 %I) {
+        %X = bitcast i32 %I to float            ; <float> [#uses=1]
+        ret float %X
+}
+
+define i64 @test3(double %D) {
+        %X = bitcast double %D to i64           ; <i64> [#uses=1]
+        ret i64 %X
+}
+
+define double @test4(i64 %L) {
+        %X = bitcast i64 %L to double           ; <double> [#uses=1]
+        ret double %X
+}
+
+define double @test5(double %D) {
+        %X = bitcast double %D to double                ; <double> [#uses=1]
+        %Y = fadd double %X, 2.000000e+00                ; <double> [#uses=1]
+        %Z = bitcast double %Y to i64           ; <i64> [#uses=1]
+        %res = bitcast i64 %Z to double         ; <double> [#uses=1]
+        ret double %res
+}
+
+define float @test6(float %F) {
+        %X = bitcast float %F to float          ; <float> [#uses=1]
+        %Y = fadd float %X, 2.000000e+00         ; <float> [#uses=1]
+        %Z = bitcast float %Y to i32            ; <i32> [#uses=1]
+        %res = bitcast i32 %Z to float          ; <float> [#uses=1]
+        ret float %res
+}
+
+define i32 @main(i32 %argc, i8** %argv) {
+        %a = call i32 @test1( float 0x400921FB40000000 )                ; <i32> [#uses=2]
+        %b = call float @test2( i32 %a )                ; <float> [#uses=0]
+        %c = call i64 @test3( double 0x400921FB4D12D84A )               ; <i64> [#uses=1]
+        %d = call double @test4( i64 %c )               ; <double> [#uses=0]
+        %e = call double @test5( double 7.000000e+00 )          ; <double> [#uses=0]
+        %f = call float @test6( float 7.000000e+00 )            ; <float> [#uses=0]
+        ret i32 %a
+}
+
diff --git a/final/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll b/final/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
new file mode 100644
index 00000000000..da36e78e0b0
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2007-01-08-ParamAttr-ICmp.ll
@@ -0,0 +1,26 @@
+; For PR1099
+; RUN: llc < %s -march=c | grep {(llvm_cbe_tmp2 == llvm_cbe_b_2e_0_2e_0_2e_val)}
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8"
+        %struct.Connector = type { i16, i16, i8, i8, %struct.Connector*, i8* }
+
+
+define i1 @prune_match_entry_2E_ce(%struct.Connector* %a, i16 %b.0.0.val) {
+newFuncRoot:
+        br label %entry.ce
+
+cond_next.exitStub:             ; preds = %entry.ce
+        ret i1 true
+
+entry.return_crit_edge.exitStub:                ; preds = %entry.ce
+        ret i1 false
+
+entry.ce:               ; preds = %newFuncRoot
+        %tmp1 = getelementptr %struct.Connector* %a, i32 0, i32 0                ; <i16*> [#uses=1]
+        %tmp2 = load i16* %tmp1           ; <i16> [#uses=1]
+        %tmp3 = icmp eq i16 %tmp2, %b.0.0.val             ; <i1> [#uses=1]
+        br i1 %tmp3, label %cond_next.exitStub, label %entry.return_crit_edge.exitStub
+}
+
+
diff --git a/final/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll b/final/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll
new file mode 100644
index 00000000000..8a5f2532e70
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll
@@ -0,0 +1,11 @@
+; PR918
+; RUN: llc < %s -march=c | not grep {l_structtype_s l_fixarray_array3}
+
+%structtype_s = type { i32 }
+%fixarray_array3 = type [3 x %structtype_s]
+
+define i32 @witness(%fixarray_array3* %p) {
+    %q = getelementptr %fixarray_array3* %p, i32 0, i32 0, i32 0
+    %v = load i32* %q
+    ret i32 %v
+}
diff --git a/final/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll b/final/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
new file mode 100644
index 00000000000..4f699b792e2
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2007-01-17-StackSaveNRestore.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=c | grep __builtin_stack_save
+; RUN: llc < %s -march=c | grep __builtin_stack_restore
+; PR1028
+
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+
+define i8* @test() {
+    %s = call i8* @llvm.stacksave()
+    call void @llvm.stackrestore(i8* %s)
+    ret i8* %s
+}
diff --git a/final/test/CodeGen/CBackend/2007-02-05-memset.ll b/final/test/CodeGen/CBackend/2007-02-05-memset.ll
new file mode 100644
index 00000000000..7d508e42405
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2007-02-05-memset.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=c
+; PR1181
+target datalayout = "e-p:64:64"
+target triple = "x86_64-apple-darwin8"
+
+
+declare void @llvm.memset.i64(i8*, i8, i64, i32)
+
+define fastcc void @InitUser_data_unregistered() {
+entry:
+        tail call void @llvm.memset.i64( i8* null, i8 0, i64 65496, i32 1 )
+        ret void
+}
diff --git a/final/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll b/final/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
new file mode 100644
index 00000000000..7e1ff2a9dfa
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2007-02-23-NameConflicts.ll
@@ -0,0 +1,14 @@
+; PR1164
+; RUN: llc < %s -march=c | grep {llvm_cbe_A = \\*llvm_cbe_G;}
+; RUN: llc < %s -march=c | grep {llvm_cbe_B = \\*(&ltmp_0_1);}
+; RUN: llc < %s -march=c | grep {return (((unsigned int )(((unsigned int )llvm_cbe_A) + ((unsigned int )llvm_cbe_B))));}
+
+@G = global i32 123
+@ltmp_0_1 = global i32 123
+
+define i32 @test(i32 *%G) {
+        %A = load i32* %G
+        %B = load i32* @ltmp_0_1
+        %C = add i32 %A, %B
+        ret i32 %C
+}
diff --git a/final/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll b/final/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
new file mode 100644
index 00000000000..c8bfdd6bcfc
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2007-07-11-PackedStruct.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=c | grep {packed}
+
+	%struct.p = type <{ i16 }>
+
+define i32 @main() {
+entry:
+        %t = alloca %struct.p, align 2
+	ret i32 5
+}
diff --git a/final/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll b/final/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
new file mode 100644
index 00000000000..6e0cf682929
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2008-02-01-UnalignedLoadStore.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=c | \
+; RUN:          grep {struct __attribute__ ((packed, aligned(} | count 4
+
+define void @test(i32* %P) {
+        %X = load i32* %P, align 1
+        store i32 %X, i32* %P, align 1
+        ret void
+}
+
+define void @test2(i32* %P) {
+        %X = volatile load i32* %P, align 2
+        volatile store i32 %X, i32* %P, align 2
+        ret void
+}
+
diff --git a/final/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll b/final/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll
new file mode 100644
index 00000000000..8db3167e54d
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=c
+
+declare {i32, i32} @foo()
+
+define i32 @test() {
+  %A = call {i32, i32} @foo()
+  %B = getresult {i32, i32} %A, 0
+  %C = getresult {i32, i32} %A, 1
+  %D = add i32 %B, %C
+  ret i32 %D
+}
+
+define i32 @test2() {
+  %A = call {i32, i32} asm sideeffect "...", "={cx},={di},~{dirflag},~{fpsr},~{flags},~{memory}"()
+  %B = getresult {i32, i32} %A, 0
+  %C = getresult {i32, i32} %A, 1
+  %D = add i32 %B, %C
+  ret i32 %D
+}
diff --git a/final/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll b/final/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
new file mode 100644
index 00000000000..e9fa552433a
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2008-05-31-BoolOverflow.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=c | grep {llvm_cbe_t.*&1}
+define i32 @test(i32 %r) {
+  %s = icmp eq i32 %r, 0
+  %t = add i1 %s, %s
+  %u = zext i1 %t to i32
+  br i1 %t, label %A, label %B
+A:
+
+  ret i32 %u
+B:
+
+  %v = select i1 %t, i32 %r, i32 %u
+  ret i32 %v
+}
diff --git a/final/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll b/final/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll
new file mode 100644
index 00000000000..054a3cad900
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=c | grep {"m"(llvm_cbe_newcw))}
+; PR2407
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define void @foo() {
+  %newcw = alloca i16             ; <i16*> [#uses=2]
+  call void asm sideeffect "fldcw $0", "*m,~{dirflag},~{fpsr},~{flags}"( i16*
+%newcw ) nounwind 
+  ret void
+}
diff --git a/final/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll b/final/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
new file mode 100644
index 00000000000..b72b57343cd
--- /dev/null
+++ b/final/test/CodeGen/CBackend/2008-10-21-PPCLongDoubleConstant.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=c
+; PR2907
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9.5"
+	%"struct.Point<0>" = type { %"struct.Tensor<1,0>" }
+	%"struct.QGauss2<1>" = type { %"struct.Quadrature<0>" }
+	%"struct.Quadrature<0>" = type { %struct.Subscriptor, i32, %"struct.std::vector<Point<0>,std::allocator<Point<0> > >", %"struct.std::vector<double,std::allocator<double> >" }
+	%struct.Subscriptor = type { i32 (...)**, i32, %"struct.std::type_info"* }
+	%"struct.Tensor<1,0>" = type { [1 x double] }
+	%"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >" = type { %"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >::_Vector_impl" }
+	%"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >::_Vector_impl" = type { %"struct.Point<0>"*, %"struct.Point<0>"*, %"struct.Point<0>"* }
+	%"struct.std::_Vector_base<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" }
+	%"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" = type { double*, double*, double* }
+	%"struct.std::type_info" = type { i32 (...)**, i8* }
+	%"struct.std::vector<Point<0>,std::allocator<Point<0> > >" = type { %"struct.std::_Vector_base<Point<0>,std::allocator<Point<0> > >" }
+	%"struct.std::vector<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >" }
+
+define fastcc void @_ZN6QGaussILi1EEC1Ej(%"struct.QGauss2<1>"* %this, i32 %n) {
+entry:
+	br label %bb4
+
+bb4:		; preds = %bb5.split, %bb4, %entry
+	%0 = fcmp ogt ppc_fp128 0xM00000000000000000000000000000000, select (i1 fcmp olt (ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128), ppc_fp128 fmul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000)), ppc_fp128 fmul (ppc_fp128 0xM00000000000000010000000000000000, ppc_fp128 0xM40140000000000000000000000000000), ppc_fp128 fpext (double 0x3C447AE147AE147B to ppc_fp128))		; <i1> [#uses=1]
+	br i1 %0, label %bb4, label %bb5.split
+
+bb5.split:		; preds = %bb4
+	%1 = getelementptr double* null, i32 0		; <double*> [#uses=0]
+	br label %bb4
+}
diff --git a/final/test/CodeGen/CBackend/dg.exp b/final/test/CodeGen/CBackend/dg.exp
new file mode 100644
index 00000000000..9d789409d4a
--- /dev/null
+++ b/final/test/CodeGen/CBackend/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target CBackend] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/CBackend/fneg.ll b/final/test/CodeGen/CBackend/fneg.ll
new file mode 100644
index 00000000000..7dec3d9e09c
--- /dev/null
+++ b/final/test/CodeGen/CBackend/fneg.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=c
+
+define void @func() nounwind {
+  entry:
+  %0 = fsub double -0.0, undef
+  ret void
+}
diff --git a/final/test/CodeGen/CBackend/pr2408.ll b/final/test/CodeGen/CBackend/pr2408.ll
new file mode 100644
index 00000000000..bf8477b7e6d
--- /dev/null
+++ b/final/test/CodeGen/CBackend/pr2408.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=c | grep {\\* ((unsigned int )}
+; PR2408
+
+define i32 @a(i32 %a) {
+entry:
+        %shr = ashr i32 %a, 0           ; <i32> [#uses=1]
+        %shr2 = ashr i32 2, 0           ; <i32> [#uses=1]
+        %mul = mul i32 %shr, %shr2              ; <i32> [#uses=1]
+        %shr4 = ashr i32 2, 0           ; <i32> [#uses=1]
+        %div = sdiv i32 %mul, %shr4             ; <i32> [#uses=1]
+        ret i32 %div
+}
diff --git a/final/test/CodeGen/CBackend/vectors.ll b/final/test/CodeGen/CBackend/vectors.ll
new file mode 100644
index 00000000000..b7b76775f6c
--- /dev/null
+++ b/final/test/CodeGen/CBackend/vectors.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=c
+@.str15 = external global [2 x i8]
+
+define <4 x i32> @foo(<4 x i32> %a, i32 %b) {
+  %c = insertelement <4 x i32> %a, i32 1, i32 %b
+  
+  ret <4 x i32> %c
+}
+
+define i32 @test2(<4 x i32> %a, i32 %b) {
+  %c = extractelement <4 x i32> %a, i32 1
+  
+  ret i32 %c
+}
+
+define <4 x float> @test3(<4 x float> %Y) {
+	%Z = fadd <4 x float> %Y, %Y
+	%X = shufflevector <4 x float> zeroinitializer, <4 x float> %Z, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >
+	ret <4 x float> %X
+}
+
+define void @test4() {
+	%x = alloca <4 x float>
+	%tmp3.i16 = getelementptr <4 x float>* %x, i32 0, i32 0
+	store float 1.0, float* %tmp3.i16
+	ret void
+}
+
+define i32* @test5({i32, i32} * %P) {
+	%x = getelementptr {i32, i32} * %P, i32 0, i32 1
+	ret i32* %x
+}
+
+define i8* @test6() {
+  ret i8* getelementptr ([2 x i8]* @.str15, i32 0, i32 0) 
+}
+
diff --git a/final/test/CodeGen/CPP/2007-06-16-Funcname.ll b/final/test/CodeGen/CPP/2007-06-16-Funcname.ll
new file mode 100644
index 00000000000..71fea12d9c2
--- /dev/null
+++ b/final/test/CodeGen/CPP/2007-06-16-Funcname.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=cpp -cppfname=WAKKA | not grep makeLLVMModule
+; PR1515
+
+define void @foo() {
+  ret void
+}
+
diff --git a/final/test/CodeGen/CPP/2009-05-01-Long-Double.ll b/final/test/CodeGen/CPP/2009-05-01-Long-Double.ll
new file mode 100644
index 00000000000..0b2d882971a
--- /dev/null
+++ b/final/test/CodeGen/CPP/2009-05-01-Long-Double.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=cpp -cppgen=program -o %t
+
+define x86_fp80 @some_func() nounwind {
+entry:
+	%retval = alloca x86_fp80		; <x86_fp80*> [#uses=2]
+	%call = call i32 (...)* @other_func()		; <i32> [#uses=1]
+	%conv = sitofp i32 %call to x86_fp80		; <x86_fp80> [#uses=1]
+	store x86_fp80 %conv, x86_fp80* %retval
+	%0 = load x86_fp80* %retval		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %0
+}
+
+declare i32 @other_func(...)
diff --git a/final/test/CodeGen/CPP/2009-05-04-CondBr.ll b/final/test/CodeGen/CPP/2009-05-04-CondBr.ll
new file mode 100644
index 00000000000..feb2cf765e7
--- /dev/null
+++ b/final/test/CodeGen/CPP/2009-05-04-CondBr.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=cpp -cppgen=program -o %t
+; RUN: grep "BranchInst::Create(label_if_then, label_if_end, int1_cmp, label_entry);" %t
+
+define i32 @some_func(i32 %a) nounwind {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%a.addr = alloca i32		; <i32*> [#uses=8]
+	store i32 %a, i32* %a.addr
+	%tmp = load i32* %a.addr		; <i32> [#uses=1]
+	%inc = add i32 %tmp, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %a.addr
+	%tmp1 = load i32* %a.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp1, 3		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	store i32 7, i32* %a.addr
+	br label %if.end
+
+if.end:		; preds = %if.then, %entry
+	%tmp2 = load i32* %a.addr		; <i32> [#uses=1]
+	%inc3 = add i32 %tmp2, 1		; <i32> [#uses=1]
+	store i32 %inc3, i32* %a.addr
+	%tmp4 = load i32* %a.addr		; <i32> [#uses=1]
+	store i32 %tmp4, i32* %retval
+	%0 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %0
+}
diff --git a/final/test/CodeGen/CPP/dg.exp b/final/test/CodeGen/CPP/dg.exp
new file mode 100644
index 00000000000..3276dcc3275
--- /dev/null
+++ b/final/test/CodeGen/CPP/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target CppBackend] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/CPP/llvm2cpp.ll b/final/test/CodeGen/CPP/llvm2cpp.ll
new file mode 100644
index 00000000000..d0ba0cfac31
--- /dev/null
+++ b/final/test/CodeGen/CPP/llvm2cpp.ll
@@ -0,0 +1,756 @@
+; RUN: llvm-as < %s | llvm-dis > /dev/null
+; RUN: llc < %s -march=cpp -cppgen=program -o -
+
+@X = global i32 4, align 16		; <i32*> [#uses=0]
+
+define i32* @test1012() align 32 {
+	%X = alloca i32, align 4		; <i32*> [#uses=1]
+	%Y = alloca i32, i32 42, align 16		; <i32*> [#uses=0]
+	%Z = alloca i32		; <i32*> [#uses=0]
+	ret i32* %X
+}
+
+define i32* @test1013() {
+	%X = malloc i32, align 4		; <i32*> [#uses=1]
+	%Y = malloc i32, i32 42, align 16		; <i32*> [#uses=0]
+	%Z = malloc i32		; <i32*> [#uses=0]
+	ret i32* %X
+}
+
+define void @void(i32, i32) {
+	add i32 0, 0		; <i32>:3 [#uses=2]
+	sub i32 0, 4		; <i32>:4 [#uses=2]
+	br label %5
+
+; <label>:5		; preds = %5, %2
+	add i32 %0, %1		; <i32>:6 [#uses=2]
+	sub i32 %6, %4		; <i32>:7 [#uses=1]
+	icmp sle i32 %7, %3		; <i1>:8 [#uses=1]
+	br i1 %8, label %9, label %5
+
+; <label>:9		; preds = %5
+	add i32 %0, %1		; <i32>:10 [#uses=0]
+	sub i32 %6, %4		; <i32>:11 [#uses=1]
+	icmp sle i32 %11, %3		; <i1>:12 [#uses=0]
+	ret void
+}
+
+define i32 @zarro() {
+Startup:
+	ret i32 0
+}
+
+define fastcc void @foo() {
+	ret void
+}
+
+define coldcc void @bar() {
+	call fastcc void @foo( )
+	ret void
+}
+
+define void @structret({ i8 }* sret  %P) {
+	call void @structret( { i8 }* %P sret  )
+	ret void
+}
+
+define void @foo4() {
+	ret void
+}
+
+define coldcc void @bar2() {
+	call fastcc void @foo( )
+	ret void
+}
+
+define cc42 void @bar3() {
+	invoke fastcc void @foo( )
+			to label %Ok unwind label %U
+
+Ok:		; preds = %0
+	ret void
+
+U:		; preds = %0
+	unwind
+}
+
+define void @bar4() {
+	call cc42 void @bar( )
+	invoke cc42 void @bar3( )
+			to label %Ok unwind label %U
+
+Ok:		; preds = %0
+	ret void
+
+U:		; preds = %0
+	unwind
+}
+; ModuleID = 'calltest.ll'
+	%FunTy = type i32 (i32)
+
+define i32 @test1000(i32 %i0) {
+	ret i32 %i0
+}
+
+define void @invoke(%FunTy* %x) {
+	%foo = call i32 %x( i32 123 )		; <i32> [#uses=0]
+	%foo2 = tail call i32 %x( i32 123 )		; <i32> [#uses=0]
+	ret void
+}
+
+define i32 @main(i32 %argc) {
+	%retval = call i32 @test1000( i32 %argc )		; <i32> [#uses=2]
+	%two = add i32 %retval, %retval		; <i32> [#uses=1]
+	%retval2 = invoke i32 @test1000( i32 %argc )
+			to label %Next unwind label %Error		; <i32> [#uses=1]
+
+Next:		; preds = %0
+	%two2 = add i32 %two, %retval2		; <i32> [#uses=1]
+	call void @invoke( %FunTy* @test1000 )
+	ret i32 %two2
+
+Error:		; preds = %0
+	ret i32 -1
+}
+; ModuleID = 'casttest.ll'
+
+define i16 @FunFunc(i64 %x, i8 %z) {
+bb0:
+	%cast110 = sext i8 %z to i16		; <i16> [#uses=1]
+	%cast10 = trunc i64 %x to i16		; <i16> [#uses=1]
+	%reg109 = add i16 %cast110, %cast10		; <i16> [#uses=1]
+	ret i16 %reg109
+}
+; ModuleID = 'cfgstructures.ll'
+
+define void @irreducible(i1 %cond) {
+	br i1 %cond, label %X, label %Y
+
+X:		; preds = %Y, %0
+	br label %Y
+
+Y:		; preds = %X, %0
+	br label %X
+}
+
+define void @sharedheader(i1 %cond) {
+	br label %A
+
+A:		; preds = %Y, %X, %0
+	br i1 %cond, label %X, label %Y
+
+X:		; preds = %A
+	br label %A
+
+Y:		; preds = %A
+	br label %A
+}
+
+define void @nested(i1 %cond1, i1 %cond2, i1 %cond3) {
+	br label %Loop1
+
+Loop1:		; preds = %L2Exit, %0
+	br label %Loop2
+
+Loop2:		; preds = %L3Exit, %Loop1
+	br label %Loop3
+
+Loop3:		; preds = %Loop3, %Loop2
+	br i1 %cond3, label %Loop3, label %L3Exit
+
+L3Exit:		; preds = %Loop3
+	br i1 %cond2, label %Loop2, label %L2Exit
+
+L2Exit:		; preds = %L3Exit
+	br i1 %cond1, label %Loop1, label %L1Exit
+
+L1Exit:		; preds = %L2Exit
+	ret void
+}
+; ModuleID = 'constexpr.ll'
+	%SAType = type { i32, { [2 x float], i64 } }
+	%SType = type { i32, { float, { i8 } }, i64 }
+global i64 1		; <i64*>:0 [#uses=0]
+global i64 74514		; <i64*>:1 [#uses=0]
+@t2 = global i32* @t1		; <i32**> [#uses=0]
+@t3 = global i32* @t1		; <i32**> [#uses=2]
+@t1 = global i32 4		; <i32*> [#uses=2]
+@t4 = global i32** @t3		; <i32***> [#uses=1]
+@t5 = global i32** @t3		; <i32***> [#uses=0]
+@t6 = global i32*** @t4		; <i32****> [#uses=0]
+@t7 = global float* inttoptr (i32 12345678 to float*)		; <float**> [#uses=0]
+@t9 = global i32 8		; <i32*> [#uses=0]
+global i32* bitcast (float* @4 to i32*)		; <i32**>:2 [#uses=0]
+global float* @4		; <float**>:3 [#uses=0]
+global float 0.000000e+00		; <float*>:4 [#uses=2]
+@array = constant [2 x i32] [ i32 12, i32 52 ]		; <[2 x i32]*> [#uses=1]
+@arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)		; <i32**> [#uses=1]
+@arrayPtr5 = global i32** getelementptr (i32** @arrayPtr, i64 5)		; <i32***> [#uses=0]
+@somestr = constant [11 x i8] c"hello world"		; <[11 x i8]*> [#uses=2]
+@char5 = global i8* getelementptr ([11 x i8]* @somestr, i64 0, i64 5)		; <i8**> [#uses=0]
+@char8a = global i32* bitcast (i8* getelementptr ([11 x i8]* @somestr, i64 0, i64 8) to i32*)		; <i32**> [#uses=0]
+@char8b = global i8* getelementptr ([11 x i8]* @somestr, i64 0, i64 8)		; <i8**> [#uses=0]
+@S1 = global %SType* null		; <%SType**> [#uses=1]
+@S2c = constant %SType {
+    i32 1, 
+    { float, { i8 } } { float 2.000000e+00, { i8 } { i8 3 } }, 
+    i64 4 }		; <%SType*> [#uses=3]
+@S3c = constant %SAType { i32 1, { [2 x float], i64 } { [2 x float] [ float 2.000000e+00, float 3.000000e+00 ], i64 4 } }		; <%SAType*> [#uses=1]
+@S1ptr = global %SType** @S1		; <%SType***> [#uses=0]
+@S2 = global %SType* @S2c		; <%SType**> [#uses=0]
+@S3 = global %SAType* @S3c		; <%SAType**> [#uses=0]
+@S1fld1a = global float* getelementptr (%SType* @S2c, i64 0, i32 1, i32 0)		; <float**> [#uses=0]
+@S1fld1b = global float* getelementptr (%SType* @S2c, i64 0, i32 1, i32 0)		; <float**> [#uses=1]
+@S1fld1bptr = global float** @S1fld1b		; <float***> [#uses=0]
+@S2fld3 = global i8* getelementptr (%SType* @S2c, i64 0, i32 1, i32 1, i32 0)		; <i8**> [#uses=0]
+
+; ModuleID = 'constpointer.ll'
+@cpt3 = global i32* @cpt1		; <i32**> [#uses=1]
+@cpt1 = global i32 4		; <i32*> [#uses=2]
+@cpt4 = global i32** @cpt3		; <i32***> [#uses=0]
+@cpt2 = global i32* @cpt1		; <i32**> [#uses=0]
+global float* @7		; <float**>:0 [#uses=0]
+global float* @7		; <float**>:1 [#uses=0]
+global float 0.000000e+00		; <float*>:2 [#uses=3]
+global float* @7		; <float**>:3 [#uses=0]
+@fptr = global void ()* @f		; <void ()**> [#uses=0]
+@sptr1 = global [11 x i8]* @somestr		; <[11 x i8]**> [#uses=0]
+@somestr2 = constant [11 x i8] c"hello world"		; <[11 x i8]*> [#uses=2]
+@sptr2 = global [11 x i8]* @somestr2		; <[11 x i8]**> [#uses=0]
+
+declare void @f()
+; ModuleID = 'escaped_label.ll'
+
+define i32 @foo3() {
+	br label "foo`~!@#$%^&*()-_=+{}[]\\|;:',<.>/?"
+
+"foo`~!@#$%^&*()-_=+{}[]\\|;:',<.>/?":		; preds = %0
+	ret i32 17
+}
+; ModuleID = 'float.ll'
+@F1 = global float 4.000000e+00		; <float*> [#uses=0]
+@D1 = global double 4.000000e+00		; <double*> [#uses=0]
+; ModuleID = 'fold-fpcast.ll'
+
+define i32 @test1() {
+	ret i32 1080872141
+}
+
+define float @test1002() {
+	ret float 0x36E1000000000000
+}
+
+define i64 @test3() {
+	ret i64 4614256656431372362
+}
+
+define double @test4() {
+	ret double 2.075076e-322
+}
+; ModuleID = 'forwardreftest.ll'
+	%myfn = type float (i32, double, i32, i16)
+	%myty = type i32
+	%thisfuncty = type i32 (i32)*
+
+declare void @F(%thisfuncty, %thisfuncty, %thisfuncty)
+
+define i32 @zarro2(i32 %Func) {
+Startup:
+	add i32 0, 10		; <i32>:0 [#uses=0]
+	ret i32 0
+}
+
+define i32 @test1004(i32) {
+	call void @F( %thisfuncty @zarro2, %thisfuncty @test1004, %thisfuncty @foozball )
+	ret i32 0
+}
+
+define i32 @foozball(i32) {
+	ret i32 0
+}
+
+; ModuleID = 'globalredefinition.ll'
+@A = global i32* @B		; <i32**> [#uses=0]
+@B = global i32 7		; <i32*> [#uses=1]
+
+define void @test12312() {
+	ret void
+}
+; ModuleID = 'global_section.ll'
+@GlobSec = global i32 4, section "foo", align 16
+
+define void @test1005() section "bar" {
+	ret void
+}
+
+; ModuleID = 'globalvars.ll'
+@MyVar = external global i32		; <i32*> [#uses=1]
+@MyIntList = external global { \2*, i32 }		; <{ \2*, i32 }*> [#uses=1]
+external global i32		; <i32*>:0 [#uses=0]
+@AConst = constant i32 123		; <i32*> [#uses=0]
+@AString = constant [4 x i8] c"test"		; <[4 x i8]*> [#uses=0]
+@ZeroInit = global { [100 x i32], [40 x float] } zeroinitializer		; <{ [100 x i32], [40 x float] }*> [#uses=0]
+
+define i32 @foo10015(i32 %blah) {
+	store i32 5, i32* @MyVar
+	%idx = getelementptr { \2*, i32 }* @MyIntList, i64 0, i32 1		; <i32*> [#uses=1]
+	store i32 12, i32* %idx
+	ret i32 %blah
+}
+; ModuleID = 'indirectcall2.ll'
+
+define i64 @test1006(i64 %X) {
+	ret i64 %X
+}
+
+define i64 @fib(i64 %n) {
+; <label>:0
+	%T = icmp ult i64 %n, 2		; <i1> [#uses=1]
+	br i1 %T, label %BaseCase, label %RecurseCase
+
+RecurseCase:		; preds = %0
+	%result = call i64 @test1006( i64 %n )		; <i64> [#uses=0]
+	br label %BaseCase
+
+BaseCase:		; preds = %RecurseCase, %0
+	%X = phi i64 [ 1, %0 ], [ 2, %RecurseCase ]		; <i64> [#uses=1]
+	ret i64 %X
+}
+; ModuleID = 'indirectcall.ll'
+
+declare i32 @atoi(i8*)
+
+define i64 @fibonacc(i64 %n) {
+	icmp ult i64 %n, 2		; <i1>:1 [#uses=1]
+	br i1 %1, label %BaseCase, label %RecurseCase
+
+BaseCase:		; preds = %0
+	ret i64 1
+
+RecurseCase:		; preds = %0
+	%n2 = sub i64 %n, 2		; <i64> [#uses=1]
+	%n1 = sub i64 %n, 1		; <i64> [#uses=1]
+	%f2 = call i64 @fibonacc( i64 %n2 )		; <i64> [#uses=1]
+	%f1 = call i64 @fibonacc( i64 %n1 )		; <i64> [#uses=1]
+	%result = add i64 %f2, %f1		; <i64> [#uses=1]
+	ret i64 %result
+}
+
+define i64 @realmain(i32 %argc, i8** %argv) {
+; <label>:0
+	icmp eq i32 %argc, 2		; <i1>:1 [#uses=1]
+	br i1 %1, label %HasArg, label %Continue
+
+HasArg:		; preds = %0
+	%n1 = add i32 1, 1		; <i32> [#uses=1]
+	br label %Continue
+
+Continue:		; preds = %HasArg, %0
+	%n = phi i32 [ %n1, %HasArg ], [ 1, %0 ]		; <i32> [#uses=1]
+	%N = sext i32 %n to i64		; <i64> [#uses=1]
+	%F = call i64 @fib( i64 %N )		; <i64> [#uses=1]
+	ret i64 %F
+}
+
+define i64 @trampoline(i64 %n, i64 (i64)* %fibfunc) {
+	%F = call i64 %fibfunc( i64 %n )		; <i64> [#uses=1]
+	ret i64 %F
+}
+
+define i32 @main2() {
+	%Result = call i64 @trampoline( i64 10, i64 (i64)* @fib )		; <i64> [#uses=1]
+	%Result.upgrd.1 = trunc i64 %Result to i32		; <i32> [#uses=1]
+	ret i32 %Result.upgrd.1
+}
+; ModuleID = 'inlineasm.ll'
+module asm "this is an inline asm block"
+module asm "this is another inline asm block"
+
+define i32 @test1007() {
+	%X = call i32 asm "tricky here $0, $1", "=r,r"( i32 4 )		; <i32> [#uses=1]
+	call void asm sideeffect "eieio", ""( )
+	ret i32 %X
+}
+; ModuleID = 'instructions.ll'
+
+define i32 @test_extractelement(<4 x i32> %V) {
+	%R = extractelement <4 x i32> %V, i32 1		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define <4 x i32> @test_insertelement(<4 x i32> %V) {
+	%R = insertelement <4 x i32> %V, i32 0, i32 0		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %R
+}
+
+define <4 x i32> @test_shufflevector_u(<4 x i32> %V) {
+	%R = shufflevector <4 x i32> %V, <4 x i32> %V, <4 x i32> < i32 1, i32 undef, i32 7, i32 2 >		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %R
+}
+
+define <4 x float> @test_shufflevector_f(<4 x float> %V) {
+	%R = shufflevector <4 x float> %V, <4 x float> undef, <4 x i32> < i32 1, i32 undef, i32 7, i32 2 >		; <<4 x float>> [#uses=1]
+	ret <4 x float> %R
+}
+; ModuleID = 'intrinsics.ll'
+
+declare i1 @llvm.isunordered.f32(float, float)
+
+declare i1 @llvm.isunordered.f64(double, double)
+
+declare void @llvm.prefetch(i8*, i32, i32)
+
+declare float @llvm.sqrt.f32(float)
+
+declare double @llvm.sqrt.f64(double)
+
+define void @libm() {
+	fcmp uno float 1.000000e+00, 2.000000e+00		; <i1>:1 [#uses=0]
+	fcmp uno double 3.000000e+00, 4.000000e+00		; <i1>:2 [#uses=0]
+	call void @llvm.prefetch( i8* null, i32 1, i32 3 )
+	call float @llvm.sqrt.f32( float 5.000000e+00 )		; <float>:3 [#uses=0]
+	call double @llvm.sqrt.f64( double 6.000000e+00 )		; <double>:4 [#uses=0]
+	call i8 @llvm.ctpop.i8( i8 10 )		; <i32>:5 [#uses=1]
+	call i16 @llvm.ctpop.i16( i16 11 )		; <i32>:7 [#uses=1]
+	call i32 @llvm.ctpop.i32( i32 12 )		; <i32>:9 [#uses=1]
+	call i64 @llvm.ctpop.i64( i64 13 )		; <i32>:11 [#uses=1]
+	call i8 @llvm.ctlz.i8( i8 14 )		; <i32>:13 [#uses=1]
+	call i16 @llvm.ctlz.i16( i16 15 )		; <i32>:15 [#uses=1]
+	call i32 @llvm.ctlz.i32( i32 16 )		; <i32>:17 [#uses=1]
+	call i64 @llvm.ctlz.i64( i64 17 )		; <i32>:19 [#uses=1]
+	call i8 @llvm.cttz.i8( i8 18 )		; <i32>:21 [#uses=1]
+	call i16 @llvm.cttz.i16( i16 19 )		; <i32>:23 [#uses=1]
+	call i32 @llvm.cttz.i32( i32 20 )		; <i32>:25 [#uses=1]
+	call i64 @llvm.cttz.i64( i64 21 )		; <i32>:27 [#uses=1]
+	ret void
+}
+
+declare i8 @llvm.ctpop.i8(i8)
+
+declare i16 @llvm.ctpop.i16(i16)
+
+declare i32 @llvm.ctpop.i32(i32)
+
+declare i64 @llvm.ctpop.i64(i64)
+
+declare i8 @llvm.ctlz.i8(i8)
+
+declare i16 @llvm.ctlz.i16(i16)
+
+declare i32 @llvm.ctlz.i32(i32)
+
+declare i64 @llvm.ctlz.i64(i64)
+
+declare i8 @llvm.cttz.i8(i8)
+
+declare i16 @llvm.cttz.i16(i16)
+
+declare i32 @llvm.cttz.i32(i32)
+
+declare i64 @llvm.cttz.i64(i64)
+
+; ModuleID = 'packed.ll'
+@foo1 = external global <4 x float>		; <<4 x float>*> [#uses=2]
+@foo102 = external global <2 x i32>		; <<2 x i32>*> [#uses=2]
+
+define void @main3() {
+	store <4 x float> < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >, <4 x float>* @foo1
+	store <2 x i32> < i32 4, i32 4 >, <2 x i32>* @foo102
+	%l1 = load <4 x float>* @foo1		; <<4 x float>> [#uses=0]
+	%l2 = load <2 x i32>* @foo102		; <<2 x i32>> [#uses=0]
+	ret void
+}
+
+; ModuleID = 'properties.ll'
+target datalayout = "e-p:32:32"
+target triple = "proc-vend-sys"
+deplibs = [ "m", "c" ]
+; ModuleID = 'prototype.ll'
+
+declare i32 @bar1017(i32 %in)
+
+define i32 @foo1016(i32 %blah) {
+	%xx = call i32 @bar1017( i32 %blah )		; <i32> [#uses=1]
+	ret i32 %xx
+}
+
+; ModuleID = 'recursivetype.ll'
+	%list = type { %list*, i32 }
+
+declare i8* @malloc(i32)
+
+define void @InsertIntoListTail(%list** %L, i32 %Data) {
+bb1:
+	%reg116 = load %list** %L		; <%list*> [#uses=1]
+	%cast1004 = inttoptr i64 0 to %list*		; <%list*> [#uses=1]
+	%cond1000 = icmp eq %list* %reg116, %cast1004		; <i1> [#uses=1]
+	br i1 %cond1000, label %bb3, label %bb2
+
+bb2:		; preds = %bb2, %bb1
+	%reg117 = phi %list** [ %reg118, %bb2 ], [ %L, %bb1 ]		; <%list**> [#uses=1]
+	%cast1010 = bitcast %list** %reg117 to %list***		; <%list***> [#uses=1]
+	%reg118 = load %list*** %cast1010		; <%list**> [#uses=3]
+	%reg109 = load %list** %reg118		; <%list*> [#uses=1]
+	%cast1005 = inttoptr i64 0 to %list*		; <%list*> [#uses=1]
+	%cond1001 = icmp ne %list* %reg109, %cast1005		; <i1> [#uses=1]
+	br i1 %cond1001, label %bb2, label %bb3
+
+bb3:		; preds = %bb2, %bb1
+	%reg119 = phi %list** [ %reg118, %bb2 ], [ %L, %bb1 ]		; <%list**> [#uses=1]
+	%cast1006 = bitcast %list** %reg119 to i8**		; <i8**> [#uses=1]
+	%reg111 = call i8* @malloc( i32 16 )		; <i8*> [#uses=3]
+	store i8* %reg111, i8** %cast1006
+	%reg111.upgrd.1 = ptrtoint i8* %reg111 to i64		; <i64> [#uses=1]
+	%reg1002 = add i64 %reg111.upgrd.1, 8		; <i64> [#uses=1]
+	%reg1002.upgrd.2 = inttoptr i64 %reg1002 to i8*		; <i8*> [#uses=1]
+	%cast1008 = bitcast i8* %reg1002.upgrd.2 to i32*		; <i32*> [#uses=1]
+	store i32 %Data, i32* %cast1008
+	%cast1003 = inttoptr i64 0 to i64*		; <i64*> [#uses=1]
+	%cast1009 = bitcast i8* %reg111 to i64**		; <i64**> [#uses=1]
+	store i64* %cast1003, i64** %cast1009
+	ret void
+}
+
+define %list* @FindData(%list* %L, i32 %Data) {
+bb1:
+	br label %bb2
+
+bb2:		; preds = %bb6, %bb1
+	%reg115 = phi %list* [ %reg116, %bb6 ], [ %L, %bb1 ]		; <%list*> [#uses=4]
+	%cast1014 = inttoptr i64 0 to %list*		; <%list*> [#uses=1]
+	%cond1011 = icmp ne %list* %reg115, %cast1014		; <i1> [#uses=1]
+	br i1 %cond1011, label %bb4, label %bb3
+
+bb3:		; preds = %bb2
+	ret %list* null
+
+bb4:		; preds = %bb2
+	%idx = getelementptr %list* %reg115, i64 0, i32 1		; <i32*> [#uses=1]
+	%reg111 = load i32* %idx		; <i32> [#uses=1]
+	%cond1013 = icmp ne i32 %reg111, %Data		; <i1> [#uses=1]
+	br i1 %cond1013, label %bb6, label %bb5
+
+bb5:		; preds = %bb4
+	ret %list* %reg115
+
+bb6:		; preds = %bb4
+	%idx2 = getelementptr %list* %reg115, i64 0, i32 0		; <%list**> [#uses=1]
+	%reg116 = load %list** %idx2		; <%list*> [#uses=1]
+	br label %bb2
+}
+; ModuleID = 'simplecalltest.ll'
+	%FunTy = type i32 (i32)
+
+define void @invoke1019(%FunTy* %x) {
+	%foo = call i32 %x( i32 123 )		; <i32> [#uses=0]
+	ret void
+}
+
+define i32 @main4(i32 %argc, i8** %argv, i8** %envp) {
+	%retval = call i32 @test1008( i32 %argc )		; <i32> [#uses=2]
+	%two = add i32 %retval, %retval		; <i32> [#uses=1]
+	%retval2 = call i32 @test1008( i32 %argc )		; <i32> [#uses=1]
+	%two2 = add i32 %two, %retval2		; <i32> [#uses=1]
+	call void @invoke1019( %FunTy* @test1008 )
+	ret i32 %two2
+}
+
+define i32 @test1008(i32 %i0) {
+	ret i32 %i0
+}
+; ModuleID = 'smallest.ll'
+; ModuleID = 'small.ll'
+	%x = type i32
+
+define i32 @foo1020(i32 %in) {
+label:
+	ret i32 2
+}
+; ModuleID = 'testalloca.ll'
+	%inners = type { float, { i8 } }
+	%struct = type { i32, %inners, i64 }
+
+define i32 @testfunction(i32 %i0, i32 %j0) {
+	alloca i8, i32 5		; <i8*>:1 [#uses=0]
+	%ptr = alloca i32		; <i32*> [#uses=2]
+	store i32 3, i32* %ptr
+	%val = load i32* %ptr		; <i32> [#uses=0]
+	%sptr = alloca %struct		; <%struct*> [#uses=2]
+	%nsptr = getelementptr %struct* %sptr, i64 0, i32 1		; <%inners*> [#uses=1]
+	%ubsptr = getelementptr %inners* %nsptr, i64 0, i32 1		; <{ i8 }*> [#uses=1]
+	%idx = getelementptr { i8 }* %ubsptr, i64 0, i32 0		; <i8*> [#uses=1]
+	store i8 4, i8* %idx
+	%fptr = getelementptr %struct* %sptr, i64 0, i32 1, i32 0		; <float*> [#uses=1]
+	store float 4.000000e+00, float* %fptr
+	ret i32 3
+}
+; ModuleID = 'testconstants.ll'
+@somestr3 = constant [11 x i8] c"hello world"
+@array99 = constant [2 x i32] [ i32 12, i32 52 ]
+constant { i32, i32 } { i32 4, i32 3 }		; <{ i32, i32 }*>:0 [#uses=0]
+
+define [2 x i32]* @testfunction99(i32 %i0, i32 %j0) {
+	ret [2 x i32]* @array
+}
+
+define i8* @otherfunc(i32, double) {
+	%somestr = getelementptr [11 x i8]* @somestr3, i64 0, i64 0		; <i8*> [#uses=1]
+	ret i8* %somestr
+}
+
+define i8* @yetanotherfunc(i32, double) {
+	ret i8* null
+}
+
+define i32 @negativeUnsigned() {
+	ret i32 -1
+}
+
+define i32 @largeSigned() {
+	ret i32 -394967296
+}
+; ModuleID = 'testlogical.ll'
+
+define i32 @simpleAdd(i32 %i0, i32 %j0) {
+	%t1 = xor i32 %i0, %j0		; <i32> [#uses=1]
+	%t2 = or i32 %i0, %j0		; <i32> [#uses=1]
+	%t3 = and i32 %t1, %t2		; <i32> [#uses=1]
+	ret i32 %t3
+}
+; ModuleID = 'testmemory.ll'
+	%complexty = type { i32, { [4 x i8*], float }, double }
+	%struct = type { i32, { float, { i8 } }, i64 }
+
+define i32 @main6() {
+	call i32 @testfunction98( i64 0, i64 1 )
+	ret i32 0
+}
+
+define i32 @testfunction98(i64 %i0, i64 %j0) {
+	%array0 = malloc [4 x i8]		; <[4 x i8]*> [#uses=2]
+	%size = add i32 2, 2		; <i32> [#uses=1]
+	%array1 = malloc i8, i32 4		; <i8*> [#uses=1]
+	%array2 = malloc i8, i32 %size		; <i8*> [#uses=1]
+	%idx = getelementptr [4 x i8]* %array0, i64 0, i64 2		; <i8*> [#uses=1]
+	store i8 123, i8* %idx
+	free [4 x i8]* %array0
+	free i8* %array1
+	free i8* %array2
+	%aa = alloca %complexty, i32 5		; <%complexty*> [#uses=1]
+	%idx2 = getelementptr %complexty* %aa, i64 %i0, i32 1, i32 0, i64 %j0		; <i8**> [#uses=1]
+	store i8* null, i8** %idx2
+	%ptr = alloca i32		; <i32*> [#uses=2]
+	store i32 3, i32* %ptr
+	%val = load i32* %ptr		; <i32> [#uses=0]
+	%sptr = alloca %struct		; <%struct*> [#uses=1]
+	%ubsptr = getelementptr %struct* %sptr, i64 0, i32 1, i32 1		; <{ i8 }*> [#uses=1]
+	%idx3 = getelementptr { i8 }* %ubsptr, i64 0, i32 0		; <i8*> [#uses=1]
+	store i8 4, i8* %idx3
+	ret i32 3
+}
+; ModuleID = 'testswitch.ll'
+	%int = type i32
+
+define i32 @squared(i32 %i0) {
+	switch i32 %i0, label %Default [
+		 i32 1, label %Case1
+		 i32 2, label %Case2
+		 i32 4, label %Case4
+	]
+
+Default:		; preds = %0
+	ret i32 -1
+
+Case1:		; preds = %0
+	ret i32 1
+
+Case2:		; preds = %0
+	ret i32 4
+
+Case4:		; preds = %0
+	ret i32 16
+}
+; ModuleID = 'testvarargs.ll'
+
+declare i32 @printf(i8*, ...)
+
+define i32 @testvarar() {
+	call i32 (i8*, ...)* @printf( i8* null, i32 12, i8 42 )		; <i32>:1 [#uses=1]
+	ret i32 %1
+}
+; ModuleID = 'undefined.ll'
+@X2 = global i32 undef		; <i32*> [#uses=0]
+
+declare i32 @atoi(i8*)
+
+define i32 @test1009() {
+	ret i32 undef
+}
+
+define i32 @test1003() {
+	%X = add i32 undef, 1		; <i32> [#uses=1]
+	ret i32 %X
+}
+; ModuleID = 'unreachable.ll'
+
+declare void @bar()
+
+define i32 @foo1021() {
+	unreachable
+}
+
+define double @xyz() {
+	call void @bar( )
+	unreachable
+}
+; ModuleID = 'varargs.ll'
+
+declare void @llvm.va_start(i8* %ap)
+
+declare void @llvm.va_copy(i8* %aq, i8* %ap)
+
+declare void @llvm.va_end(i8* %ap)
+
+define i32 @test1010(i32 %X, ...) {
+	%ap = alloca i8*		; <i8**> [#uses=4]
+	%va.upgrd.1 = bitcast i8** %ap to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_start( i8* %va.upgrd.1 )
+	%tmp = va_arg i8** %ap, i32		; <i32> [#uses=1]
+	%aq = alloca i8*		; <i8**> [#uses=2]
+	%va0.upgrd.2 = bitcast i8** %aq to i8*		; <i8*> [#uses=1]
+	%va1.upgrd.3 = bitcast i8** %ap to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_copy( i8* %va0.upgrd.2, i8* %va1.upgrd.3 )
+	%va.upgrd.4 = bitcast i8** %aq to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_end( i8* %va.upgrd.4 )
+	%va.upgrd.5 = bitcast i8** %ap to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_end( i8* %va.upgrd.5 )
+	ret i32 %tmp
+}
+; ModuleID = 'varargs_new.ll'
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_copy(i8*, i8*)
+
+declare void @llvm.va_end(i8*)
+
+define i32 @test1011(i32 %X, ...) {
+	%ap = alloca i8*		; <i8**> [#uses=4]
+	%aq = alloca i8*		; <i8**> [#uses=2]
+	%va.upgrd.1 = bitcast i8** %ap to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_start( i8* %va.upgrd.1 )
+	%tmp = va_arg i8** %ap, i32		; <i32> [#uses=1]
+	%apv = load i8** %ap		; <i8*> [#uses=1]
+	%va0.upgrd.2 = bitcast i8** %aq to i8*		; <i8*> [#uses=1]
+	%va1.upgrd.3 = bitcast i8* %apv to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_copy( i8* %va0.upgrd.2, i8* %va1.upgrd.3 )
+	%va.upgrd.4 = bitcast i8** %aq to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_end( i8* %va.upgrd.4 )
+	%va.upgrd.5 = bitcast i8** %ap to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_end( i8* %va.upgrd.5 )
+	ret i32 %tmp
+}
+; ModuleID = 'weirdnames.ll'
+	"&^ " = type { i32 }
+@"%.*+ foo" = global "&^ " { i32 5 }		; <"&^ "*> [#uses=0]
+@"0" = global float 0.000000e+00		; <float*> [#uses=0]
diff --git a/final/test/CodeGen/CellSPU/2009-01-01-BrCond.ll b/final/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
new file mode 100644
index 00000000000..58e3190454f
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/2009-01-01-BrCond.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=cellspu -o - | grep brnz
+; PR3274
+
+target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
+target triple = "spu"
+	%struct.anon = type { i64 }
+	%struct.fp_number_type = type { i32, i32, i32, [4 x i8], %struct.anon }
+
+define double @__floatunsidf(i32 %arg_a) nounwind {
+entry:
+	%in = alloca %struct.fp_number_type, align 16
+	%0 = getelementptr %struct.fp_number_type* %in, i32 0, i32 1
+	store i32 0, i32* %0, align 4
+	%1 = icmp eq i32 %arg_a, 0
+	%2 = getelementptr %struct.fp_number_type* %in, i32 0, i32 0
+	br i1 %1, label %bb, label %bb1
+
+bb:		; preds = %entry
+	store i32 2, i32* %2, align 8
+	br label %bb7
+
+bb1:		; preds = %entry
+	ret double 0.0
+
+bb7:		; preds = %bb5, %bb1, %bb
+	ret double 1.0
+}
+
+; declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+
+declare double @__pack_d(%struct.fp_number_type*)
diff --git a/final/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll b/final/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 00000000000..401399face9
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=cellspu -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/final/test/CodeGen/CellSPU/and_ops.ll b/final/test/CodeGen/CellSPU/and_ops.ll
new file mode 100644
index 00000000000..139e97b967a
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/and_ops.ll
@@ -0,0 +1,279 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep and    %t1.s | count 234
+; RUN: grep andc   %t1.s | count 85
+; RUN: grep andi   %t1.s | count 37
+; RUN: grep andhi  %t1.s | count 30
+; RUN: grep andbi  %t1.s | count 4
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; AND instruction generation:
+define <4 x i32> @and_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = and <4 x i32> %arg1, %arg2
+        ret <4 x i32> %A
+}
+
+define <4 x i32> @and_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = and <4 x i32> %arg2, %arg1
+        ret <4 x i32> %A
+}
+
+define <8 x i16> @and_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = and <8 x i16> %arg1, %arg2
+        ret <8 x i16> %A
+}
+
+define <8 x i16> @and_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = and <8 x i16> %arg2, %arg1
+        ret <8 x i16> %A
+}
+
+define <16 x i8> @and_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = and <16 x i8> %arg2, %arg1
+        ret <16 x i8> %A
+}
+
+define <16 x i8> @and_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = and <16 x i8> %arg1, %arg2
+        ret <16 x i8> %A
+}
+
+define i32 @and_i32_1(i32 %arg1, i32 %arg2) {
+        %A = and i32 %arg2, %arg1
+        ret i32 %A
+}
+
+define i32 @and_i32_2(i32 %arg1, i32 %arg2) {
+        %A = and i32 %arg1, %arg2
+        ret i32 %A
+}
+
+define i16 @and_i16_1(i16 %arg1, i16 %arg2) {
+        %A = and i16 %arg2, %arg1
+        ret i16 %A
+}
+
+define i16 @and_i16_2(i16 %arg1, i16 %arg2) {
+        %A = and i16 %arg1, %arg2
+        ret i16 %A
+}
+
+define i8 @and_i8_1(i8 %arg1, i8 %arg2) {
+        %A = and i8 %arg2, %arg1
+        ret i8 %A
+}
+
+define i8 @and_i8_2(i8 %arg1, i8 %arg2) {
+        %A = and i8 %arg1, %arg2
+        ret i8 %A
+}
+
+; ANDC instruction generation:
+define <4 x i32> @andc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = and <4 x i32> %arg1, %A
+        ret <4 x i32> %B
+}
+
+define <4 x i32> @andc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = and <4 x i32> %arg2, %A
+        ret <4 x i32> %B
+}
+
+define <4 x i32> @andc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = and <4 x i32> %A, %arg2
+        ret <4 x i32> %B
+}
+
+define <8 x i16> @andc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                    i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %arg1, %A
+        ret <8 x i16> %B
+}
+
+define <8 x i16> @andc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                    i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %arg2, %A
+        ret <8 x i16> %B
+}
+
+define <16 x i8> @andc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %arg2, %A
+        ret <16 x i8> %B
+}
+
+define <16 x i8> @andc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %arg1, %A
+        ret <16 x i8> %B
+}
+
+define <16 x i8> @andc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %A, %arg1
+        ret <16 x i8> %B
+}
+
+define i32 @andc_i32_1(i32 %arg1, i32 %arg2) {
+        %A = xor i32 %arg2, -1
+        %B = and i32 %A, %arg1
+        ret i32 %B
+}
+
+define i32 @andc_i32_2(i32 %arg1, i32 %arg2) {
+        %A = xor i32 %arg1, -1
+        %B = and i32 %A, %arg2
+        ret i32 %B
+}
+
+define i32 @andc_i32_3(i32 %arg1, i32 %arg2) {
+        %A = xor i32 %arg2, -1
+        %B = and i32 %arg1, %A
+        ret i32 %B
+}
+
+define i16 @andc_i16_1(i16 %arg1, i16 %arg2) {
+        %A = xor i16 %arg2, -1
+        %B = and i16 %A, %arg1
+        ret i16 %B
+}
+
+define i16 @andc_i16_2(i16 %arg1, i16 %arg2) {
+        %A = xor i16 %arg1, -1
+        %B = and i16 %A, %arg2
+        ret i16 %B
+}
+
+define i16 @andc_i16_3(i16 %arg1, i16 %arg2) {
+        %A = xor i16 %arg2, -1
+        %B = and i16 %arg1, %A
+        ret i16 %B
+}
+
+define i8 @andc_i8_1(i8 %arg1, i8 %arg2) {
+        %A = xor i8 %arg2, -1
+        %B = and i8 %A, %arg1
+        ret i8 %B
+}
+
+define i8 @andc_i8_2(i8 %arg1, i8 %arg2) {
+        %A = xor i8 %arg1, -1
+        %B = and i8 %A, %arg2
+        ret i8 %B
+}
+
+define i8 @andc_i8_3(i8 %arg1, i8 %arg2) {
+        %A = xor i8 %arg2, -1
+        %B = and i8 %arg1, %A
+        ret i8 %B
+}
+
+; ANDI instruction generation (i32 data type):
+define <4 x i32> @andi_v4i32_1(<4 x i32> %in) {
+        %tmp2 = and <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 >
+        ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @andi_v4i32_2(<4 x i32> %in) {
+        %tmp2 = and <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 >
+        ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @andi_v4i32_3(<4 x i32> %in) {
+        %tmp2 = and <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @andi_v4i32_4(<4 x i32> %in) {
+        %tmp2 = and <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 >
+        ret <4 x i32> %tmp2
+}
+
+define i32 @andi_u32(i32 zeroext  %in) zeroext  {
+        %tmp37 = and i32 %in, 37
+        ret i32 %tmp37
+}
+
+define i32 @andi_i32(i32 signext  %in) signext  {
+        %tmp38 = and i32 %in, 37
+        ret i32 %tmp38
+}
+
+define i32 @andi_i32_1(i32 %in) {
+        %tmp37 = and i32 %in, 37
+        ret i32 %tmp37
+}
+
+; ANDHI instruction generation (i16 data type):
+define <8 x i16> @andhi_v8i16_1(<8 x i16> %in) {
+        %tmp2 = and <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511,
+                                     i16 511, i16 511, i16 511, i16 511 >
+        ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @andhi_v8i16_2(<8 x i16> %in) {
+        %tmp2 = and <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510,
+                                     i16 510, i16 510, i16 510, i16 510 >
+        ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @andhi_v8i16_3(<8 x i16> %in) {
+        %tmp2 = and <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
+                                     i16 -1, i16 -1, i16 -1 >
+        ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @andhi_v8i16_4(<8 x i16> %in) {
+        %tmp2 = and <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512,
+                                     i16 -512, i16 -512, i16 -512, i16 -512 >
+        ret <8 x i16> %tmp2
+}
+
+define i16 @andhi_u16(i16 zeroext  %in) zeroext  {
+        %tmp37 = and i16 %in, 37         ; <i16> [#uses=1]
+        ret i16 %tmp37
+}
+
+define i16 @andhi_i16(i16 signext  %in) signext  {
+        %tmp38 = and i16 %in, 37         ; <i16> [#uses=1]
+        ret i16 %tmp38
+}
+
+; i8 data type (s/b ANDBI if 8-bit registers were supported):
+define <16 x i8> @and_v16i8(<16 x i8> %in) {
+        ; ANDBI generated for vector types
+        %tmp2 = and <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
+                                     i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
+                                     i8 42, i8 42, i8 42, i8 42 >
+        ret <16 x i8> %tmp2
+}
+
+define i8 @and_u8(i8 zeroext  %in) zeroext  {
+        ; ANDBI generated:
+        %tmp37 = and i8 %in, 37
+        ret i8 %tmp37
+}
+
+define i8 @and_sext8(i8 signext  %in) signext  {
+        ; ANDBI generated
+        %tmp38 = and i8 %in, 37
+        ret i8 %tmp38
+}
+
+define i8 @and_i8(i8 %in) {
+        ; ANDBI generated
+        %tmp38 = and i8 %in, 205
+        ret i8 %tmp38
+}
diff --git a/final/test/CodeGen/CellSPU/arg_ret.ll b/final/test/CodeGen/CellSPU/arg_ret.ll
new file mode 100644
index 00000000000..7410b724d6f
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/arg_ret.ll
@@ -0,0 +1,34 @@
+; Test parameter passing and return values
+;RUN: llc --march=cellspu %s -o - | FileCheck %s
+
+; this fits into registers r3-r74
+%paramstruct = type { i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
+                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
+                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
+                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
+                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,
+                      i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32,i32}
+define ccc i32 @test_regs( %paramstruct %prm )
+{
+;CHECK:	lr	$3, $74
+;CHECK:	bi	$lr
+  %1 = extractvalue %paramstruct %prm, 71
+  ret i32 %1
+}
+
+define ccc i32 @test_regs_and_stack( %paramstruct %prm, i32 %stackprm )
+{
+;CHECK-NOT:	a	$3, $74, $75
+  %1 = extractvalue %paramstruct %prm, 71
+  %2 = add i32 %1, %stackprm
+  ret i32 %2
+}
+
+define ccc %paramstruct @test_return( i32 %param,  %paramstruct %prm )
+{
+;CHECK:  lqd	{{\$[0-9]+}}, 80($sp)
+;CHECK-NOT:	ori	{{\$[0-9]+, \$[0-9]+, 0}}
+;CHECK:  lr    $3, $4
+  ret %paramstruct %prm
+}
+
diff --git a/final/test/CodeGen/CellSPU/bigstack.ll b/final/test/CodeGen/CellSPU/bigstack.ll
new file mode 100644
index 00000000000..63293e2aecb
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/bigstack.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=cellspu -o %t1.s
+; RUN: grep lqx   %t1.s | count 3
+; RUN: grep il    %t1.s | grep -v file | count 5
+; RUN: grep stqx  %t1.s | count 1
+
+define i32 @bigstack() nounwind {
+entry:
+  %avar = alloca i32                            
+  %big_data = alloca [2048 x i32]                
+  store i32 3840, i32* %avar, align 4
+  br label %return
+
+return:                                          
+  %retval = load i32* %avar                
+  ret i32 %retval
+}
+
diff --git a/final/test/CodeGen/CellSPU/bss.ll b/final/test/CodeGen/CellSPU/bss.ll
new file mode 100644
index 00000000000..327800d09cb
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/bss.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=cellspu | FileCheck %s
+
+@bssVar = global i32 zeroinitializer
+; CHECK: .section .bss
+; CHECK-NEXT: .globl
+
+@localVar= internal global i32 zeroinitializer
+; CHECK-NOT: .lcomm
+; CHECK: .local
+; CHECK-NEXT: .comm
+
diff --git a/final/test/CodeGen/CellSPU/call.ll b/final/test/CodeGen/CellSPU/call.ll
new file mode 100644
index 00000000000..559b266e59d
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/call.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=cellspu -regalloc=linearscan > %t1.s
+; RUN: grep brsl    %t1.s | count 1
+; RUN: grep brasl   %t1.s | count 2
+; RUN: grep stqd    %t1.s | count 82
+; RUN: llc < %s -march=cellspu | FileCheck %s
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i32 @main() {
+entry:
+  %a = call i32 @stub_1(i32 1, float 0x400921FA00000000)
+  call void @extern_stub_1(i32 %a, i32 4)
+  ret i32 %a
+}
+
+declare void @extern_stub_1(i32, i32)
+
+define i32 @stub_1(i32 %x, float %y) {
+ ; CHECK: il $3, 0
+ ; CHECK: bi $lr 
+entry:
+  ret i32 0
+}
+
+; vararg call: ensure that all caller-saved registers are spilled to the
+; stack:
+define i32 @stub_2(...) {
+entry:
+  ret i32 0
+}
+
+; check that struct is passed in r3->
+; assert this by changing the second field in the struct
+%0 = type { i32, i32, i32 }
+declare %0 @callee()
+define %0 @test_structret()
+{
+;CHECK:	stqd	$lr, 16($sp)
+;CHECK:	stqd	$sp, -48($sp)
+;CHECK:	ai	$sp, $sp, -48
+;CHECK:	brasl	$lr, callee
+  %rv = call %0 @callee()
+;CHECK: ai	$4, $4, 1
+;CHECK: lqd	$lr, 64($sp)
+;CHECK:	ai	$sp, $sp, 48
+;CHECK:	bi	$lr
+  %oldval = extractvalue %0 %rv, 1
+  %newval = add i32 %oldval,1
+  %newrv = insertvalue %0 %rv, i32 %newval, 1
+  ret %0 %newrv
+}
+
diff --git a/final/test/CodeGen/CellSPU/call_indirect.ll b/final/test/CodeGen/CellSPU/call_indirect.ll
new file mode 100644
index 00000000000..141361d5702
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/call_indirect.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=cellspu -asm-verbose=0 -regalloc=linearscan > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem -asm-verbose=0 -regalloc=linearscan > %t2.s
+; RUN: grep bisl    %t1.s | count 7
+; RUN: grep ila     %t1.s | count 1
+; RUN: grep rotqby  %t1.s | count 5
+; RUN: grep lqa     %t1.s | count 1
+; RUN: grep lqd     %t1.s | count 12
+; RUN: grep dispatch_tab %t1.s | count 5
+; RUN: grep bisl    %t2.s | count 7
+; RUN: grep ilhu    %t2.s | count 2
+; RUN: grep iohl    %t2.s | count 2
+; RUN: grep rotqby  %t2.s | count 5
+; RUN: grep lqd     %t2.s | count 13
+; RUN: grep ilhu    %t2.s | count 2
+; RUN: grep ai      %t2.s | count 9
+; RUN: grep dispatch_tab %t2.s | count 6
+
+; ModuleID = 'call_indirect.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
+target triple = "spu-unknown-elf"
+
+@dispatch_tab = global [6 x void (i32, float)*] zeroinitializer, align 16
+
+define void @dispatcher(i32 %i_arg, float %f_arg) {
+entry:
+        %tmp2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 0), align 16
+        tail call void %tmp2( i32 %i_arg, float %f_arg )
+        %tmp2.1 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 1), align 4
+        tail call void %tmp2.1( i32 %i_arg, float %f_arg )
+        %tmp2.2 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 2), align 4
+        tail call void %tmp2.2( i32 %i_arg, float %f_arg )
+        %tmp2.3 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 3), align 4
+        tail call void %tmp2.3( i32 %i_arg, float %f_arg )
+        %tmp2.4 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 4), align 4
+        tail call void %tmp2.4( i32 %i_arg, float %f_arg )
+        %tmp2.5 = load void (i32, float)** getelementptr ([6 x void (i32, float)*]* @dispatch_tab, i32 0, i32 5), align 4
+        tail call void %tmp2.5( i32 %i_arg, float %f_arg )
+        ret void
+}
+
+@ptr_list = internal global [1 x void ()*] [ void ()* inttoptr (i64 4294967295 to void ()*) ], align 4
+@ptr.a = internal global void ()** getelementptr ([1 x void ()*]* @ptr_list, i32 0, i32 1), align 16
+
+define void @double_indirect_call() {
+        %a = load void ()*** @ptr.a, align 16
+        %b = load void ()** %a, align 4
+        tail call void %b()
+        ret void
+}
diff --git a/final/test/CodeGen/CellSPU/crash.ll b/final/test/CodeGen/CellSPU/crash.ll
new file mode 100644
index 00000000000..cc2ab71db3b
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/crash.ll
@@ -0,0 +1,8 @@
+; RUN: llc %s -march=cellspu -o -
+declare i8 @return_i8()
+declare i16 @return_i16()
+define void @testfunc() {
+ %rv1 = call i8 @return_i8()
+ %rv2 = call i16 @return_i16()
+ ret void
+}
\ No newline at end of file
diff --git a/final/test/CodeGen/CellSPU/ctpop.ll b/final/test/CodeGen/CellSPU/ctpop.ll
new file mode 100644
index 00000000000..e1a6cd82926
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/ctpop.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep cntb    %t1.s | count 3
+; RUN: grep andi    %t1.s | count 3
+; RUN: grep rotmi   %t1.s | count 2
+; RUN: grep rothmi  %t1.s | count 1
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare i8 @llvm.ctpop.i8(i8)
+declare i16 @llvm.ctpop.i16(i16)
+declare i32 @llvm.ctpop.i32(i32)
+
+define i32 @test_i8(i8 %X) {
+        call i8 @llvm.ctpop.i8(i8 %X)
+        %Y = zext i8 %1 to i32
+        ret i32 %Y
+}
+
+define i32 @test_i16(i16 %X) {
+        call i16 @llvm.ctpop.i16(i16 %X)
+        %Y = zext i16 %1 to i32
+        ret i32 %Y
+}
+
+define i32 @test_i32(i32 %X) {
+        call i32 @llvm.ctpop.i32(i32 %X)
+        %Y = bitcast i32 %1 to i32
+        ret i32 %Y
+}
+
diff --git a/final/test/CodeGen/CellSPU/dg.exp b/final/test/CodeGen/CellSPU/dg.exp
new file mode 100644
index 00000000000..d41647991a0
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target CellSPU] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/CellSPU/div_ops.ll b/final/test/CodeGen/CellSPU/div_ops.ll
new file mode 100644
index 00000000000..0c93d83ca76
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/div_ops.ll
@@ -0,0 +1,22 @@
+; RUN: llc --march=cellspu %s -o - | FileCheck %s
+
+; signed division rounds towards zero, rotma don't.
+define i32 @sdivide (i32 %val )
+{
+; CHECK: rotmai
+; CHECK: rotmi
+; CHECK: a
+; CHECK: rotmai
+; CHECK: bi $lr
+   %rv = sdiv i32 %val, 4
+   ret i32 %rv
+}
+
+define i32 @udivide (i32 %val )
+{
+; CHECK: rotmi
+; CHECK: bi $lr
+   %rv = udiv i32 %val, 4
+   ret i32 %rv
+}
+
diff --git a/final/test/CodeGen/CellSPU/dp_farith.ll b/final/test/CodeGen/CellSPU/dp_farith.ll
new file mode 100644
index 00000000000..66bff3eb783
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/dp_farith.ll
@@ -0,0 +1,102 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep dfa    %t1.s | count 2
+; RUN: grep dfs    %t1.s | count 2
+; RUN: grep dfm    %t1.s | count 6
+; RUN: grep dfma   %t1.s | count 2
+; RUN: grep dfms   %t1.s | count 2
+; RUN: grep dfnms  %t1.s | count 4
+;
+; This file includes double precision floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define double @fadd(double %arg1, double %arg2) {
+        %A = fadd double %arg1, %arg2
+        ret double %A
+}
+
+define <2 x double> @fadd_vec(<2 x double> %arg1, <2 x double> %arg2) {
+        %A = fadd <2 x double> %arg1, %arg2
+        ret <2 x double> %A
+}
+
+define double @fsub(double %arg1, double %arg2) {
+        %A = fsub double %arg1,  %arg2
+        ret double %A
+}
+
+define <2 x double> @fsub_vec(<2 x double> %arg1, <2 x double> %arg2) {
+        %A = fsub <2 x double> %arg1,  %arg2
+        ret <2 x double> %A
+}
+
+define double @fmul(double %arg1, double %arg2) {
+        %A = fmul double %arg1,  %arg2
+        ret double %A
+}
+
+define <2 x double> @fmul_vec(<2 x double> %arg1, <2 x double> %arg2) {
+        %A = fmul <2 x double> %arg1,  %arg2
+        ret <2 x double> %A
+}
+
+define double @fma(double %arg1, double %arg2, double %arg3) {
+        %A = fmul double %arg1,  %arg2
+        %B = fadd double %A, %arg3
+        ret double %B
+}
+
+define <2 x double> @fma_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
+        %A = fmul <2 x double> %arg1,  %arg2
+        %B = fadd <2 x double> %A, %arg3
+        ret <2 x double> %B
+}
+
+define double @fms(double %arg1, double %arg2, double %arg3) {
+        %A = fmul double %arg1,  %arg2
+        %B = fsub double %A, %arg3
+        ret double %B
+}
+
+define <2 x double> @fms_vec(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
+        %A = fmul <2 x double> %arg1,  %arg2
+        %B = fsub <2 x double> %A, %arg3
+        ret <2 x double> %B
+}
+
+; - (a * b - c)
+define double @d_fnms_1(double %arg1, double %arg2, double %arg3) {
+        %A = fmul double %arg1,  %arg2
+        %B = fsub double %A, %arg3
+        %C = fsub double -0.000000e+00, %B               ; <double> [#uses=1]
+        ret double %C
+}
+
+; Annother way of getting fnms
+; - ( a * b ) + c => c - (a * b)
+define double @d_fnms_2(double %arg1, double %arg2, double %arg3) {
+        %A = fmul double %arg1,  %arg2
+        %B = fsub double %arg3, %A
+        ret double %B
+}
+
+; FNMS: - (a * b - c) => c - (a * b)
+define <2 x double> @d_fnms_vec_1(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
+        %A = fmul <2 x double> %arg1,  %arg2
+        %B = fsub <2 x double> %arg3, %A
+        ret <2 x double> %B
+}
+
+; Another way to get fnms using a constant vector
+; - ( a * b - c)
+define <2 x double> @d_fnms_vec_2(<2 x double> %arg1, <2 x double> %arg2, <2 x double> %arg3) {
+        %A = fmul <2 x double> %arg1,  %arg2     ; <<2 x double>> [#uses=1]
+        %B = fsub <2 x double> %A, %arg3 ; <<2 x double>> [#uses=1]
+        %C = fsub <2 x double> < double -0.00000e+00, double -0.00000e+00 >, %B
+        ret <2 x double> %C
+}
+
+;define double @fdiv_1(double %arg1, double %arg2) {
+;       %A = fdiv double %arg1,  %arg2  ; <double> [#uses=1]
+;       ret double %A
+;}
diff --git a/final/test/CodeGen/CellSPU/eqv.ll b/final/test/CodeGen/CellSPU/eqv.ll
new file mode 100644
index 00000000000..22c8c3bff94
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/eqv.ll
@@ -0,0 +1,152 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep eqv  %t1.s | count 18
+; RUN: grep xshw %t1.s | count 6
+; RUN: grep xsbh %t1.s | count 3
+; RUN: grep andi %t1.s | count 3
+
+; Test the 'eqv' instruction, whose boolean expression is:
+; (a & b) | (~a & ~b), which simplifies to
+; (a & b) | ~(a | b)
+; Alternatively, a ^ ~b, which the compiler will also match.
+
+; ModuleID = 'eqv.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define <4 x i32> @equiv_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = and <4 x i32> %arg1, %arg2
+        %B = or <4 x i32> %arg1, %arg2
+        %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %C = or <4 x i32> %A, %Bnot
+        ret <4 x i32> %C
+}
+
+define <4 x i32> @equiv_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %B = or <4 x i32> %arg1, %arg2          ; <<4 x i32>> [#uses=1]
+        %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 >            ; <<4 x i32>> [#uses=1]
+        %A = and <4 x i32> %arg1, %arg2         ; <<4 x i32>> [#uses=1]
+        %C = or <4 x i32> %A, %Bnot             ; <<4 x i32>> [#uses=1]
+        ret <4 x i32> %C
+}
+
+define <4 x i32> @equiv_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %B = or <4 x i32> %arg1, %arg2          ; <<4 x i32>> [#uses=1]
+        %A = and <4 x i32> %arg1, %arg2         ; <<4 x i32>> [#uses=1]
+        %Bnot = xor <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 >            ; <<4 x i32>> [#uses=1]
+        %C = or <4 x i32> %A, %Bnot             ; <<4 x i32>> [#uses=1]
+        ret <4 x i32> %C
+}
+
+define <4 x i32> @equiv_v4i32_4(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %arg2not = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %C = xor <4 x i32> %arg1, %arg2not
+        ret <4 x i32> %C
+}
+
+define i32 @equiv_i32_1(i32 %arg1, i32 %arg2) {
+        %A = and i32 %arg1, %arg2               ; <i32> [#uses=1]
+        %B = or i32 %arg1, %arg2                ; <i32> [#uses=1]
+        %Bnot = xor i32 %B, -1                  ; <i32> [#uses=1]
+        %C = or i32 %A, %Bnot                   ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i32 @equiv_i32_2(i32 %arg1, i32 %arg2) {
+        %B = or i32 %arg1, %arg2                ; <i32> [#uses=1]
+        %Bnot = xor i32 %B, -1                  ; <i32> [#uses=1]
+        %A = and i32 %arg1, %arg2               ; <i32> [#uses=1]
+        %C = or i32 %A, %Bnot                   ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i32 @equiv_i32_3(i32 %arg1, i32 %arg2) {
+        %B = or i32 %arg1, %arg2                ; <i32> [#uses=1]
+        %A = and i32 %arg1, %arg2               ; <i32> [#uses=1]
+        %Bnot = xor i32 %B, -1                  ; <i32> [#uses=1]
+        %C = or i32 %A, %Bnot                   ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i32 @equiv_i32_4(i32 %arg1, i32 %arg2) {
+        %arg2not = xor i32 %arg2, -1
+        %C = xor i32 %arg1, %arg2not
+        ret i32 %C
+}
+
+define i32 @equiv_i32_5(i32 %arg1, i32 %arg2) {
+        %arg1not = xor i32 %arg1, -1
+        %C = xor i32 %arg2, %arg1not
+        ret i32 %C
+}
+
+define i16 @equiv_i16_1(i16 signext %arg1, i16 signext %arg2) signext {
+        %A = and i16 %arg1, %arg2               ; <i16> [#uses=1]
+        %B = or i16 %arg1, %arg2                ; <i16> [#uses=1]
+        %Bnot = xor i16 %B, -1                  ; <i16> [#uses=1]
+        %C = or i16 %A, %Bnot                   ; <i16> [#uses=1]
+        ret i16 %C
+}
+
+define i16 @equiv_i16_2(i16 signext %arg1, i16 signext %arg2) signext {
+        %B = or i16 %arg1, %arg2                ; <i16> [#uses=1]
+        %Bnot = xor i16 %B, -1                  ; <i16> [#uses=1]
+        %A = and i16 %arg1, %arg2               ; <i16> [#uses=1]
+        %C = or i16 %A, %Bnot                   ; <i16> [#uses=1]
+        ret i16 %C
+}
+
+define i16 @equiv_i16_3(i16 signext %arg1, i16 signext %arg2) signext {
+        %B = or i16 %arg1, %arg2                ; <i16> [#uses=1]
+        %A = and i16 %arg1, %arg2               ; <i16> [#uses=1]
+        %Bnot = xor i16 %B, -1                  ; <i16> [#uses=1]
+        %C = or i16 %A, %Bnot                   ; <i16> [#uses=1]
+        ret i16 %C
+}
+
+define i8 @equiv_i8_1(i8 signext %arg1, i8 signext %arg2) signext {
+        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
+        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
+        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
+        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define i8 @equiv_i8_2(i8 signext %arg1, i8 signext %arg2) signext {
+        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
+        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
+        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
+        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define i8 @equiv_i8_3(i8 signext %arg1, i8 signext %arg2) signext {
+        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
+        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
+        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
+        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define i8 @equiv_u8_1(i8 zeroext %arg1, i8 zeroext %arg2) zeroext {
+        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
+        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
+        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
+        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define i8 @equiv_u8_2(i8 zeroext %arg1, i8 zeroext %arg2) zeroext {
+        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
+        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
+        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
+        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define i8 @equiv_u8_3(i8 zeroext %arg1, i8 zeroext %arg2) zeroext {
+        %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
+        %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
+        %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
+        %C = or i8 %A, %Bnot                    ; <i8> [#uses=1]
+        ret i8 %C
+}
diff --git a/final/test/CodeGen/CellSPU/extract_elt.ll b/final/test/CodeGen/CellSPU/extract_elt.ll
new file mode 100644
index 00000000000..0ac971c58c5
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/extract_elt.ll
@@ -0,0 +1,277 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep shufb   %t1.s | count 39
+; RUN: grep ilhu    %t1.s | count 27
+; RUN: grep iohl    %t1.s | count 27
+; RUN: grep lqa     %t1.s | count 10
+; RUN: grep shlqby  %t1.s | count 12
+; RUN: grep   515   %t1.s | count 1
+; RUN: grep  1029   %t1.s | count 2
+; RUN: grep  1543   %t1.s | count 2
+; RUN: grep  2057   %t1.s | count 2
+; RUN: grep  2571   %t1.s | count 2
+; RUN: grep  3085   %t1.s | count 2
+; RUN: grep  3599   %t1.s | count 2
+; RUN: grep 32768   %t1.s | count 1
+; RUN: grep 32769   %t1.s | count 1
+; RUN: grep 32770   %t1.s | count 1
+; RUN: grep 32771   %t1.s | count 1
+; RUN: grep 32772   %t1.s | count 1
+; RUN: grep 32773   %t1.s | count 1
+; RUN: grep 32774   %t1.s | count 1
+; RUN: grep 32775   %t1.s | count 1
+; RUN: grep 32776   %t1.s | count 1
+; RUN: grep 32777   %t1.s | count 1
+; RUN: grep 32778   %t1.s | count 1
+; RUN: grep 32779   %t1.s | count 1
+; RUN: grep 32780   %t1.s | count 1
+; RUN: grep 32781   %t1.s | count 1
+; RUN: grep 32782   %t1.s | count 1
+; RUN: grep 32783   %t1.s | count 1
+; RUN: grep 32896   %t1.s | count 24
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i32 @i32_extract_0(<4 x i32> %v) {
+entry:
+  %a = extractelement <4 x i32> %v, i32 0
+  ret i32 %a
+}
+
+define i32 @i32_extract_1(<4 x i32> %v) {
+entry:
+  %a = extractelement <4 x i32> %v, i32 1
+  ret i32 %a
+}
+
+define i32 @i32_extract_2(<4 x i32> %v) {
+entry:
+  %a = extractelement <4 x i32> %v, i32 2
+  ret i32 %a
+}
+
+define i32 @i32_extract_3(<4 x i32> %v) {
+entry:
+  %a = extractelement <4 x i32> %v, i32 3
+  ret i32 %a
+}
+
+define i16 @i16_extract_0(<8 x i16> %v) {
+entry:
+  %a = extractelement <8 x i16> %v, i32 0
+  ret i16 %a
+}
+
+define i16 @i16_extract_1(<8 x i16> %v) {
+entry:
+  %a = extractelement <8 x i16> %v, i32 1
+  ret i16 %a
+}
+
+define i16 @i16_extract_2(<8 x i16> %v) {
+entry:
+  %a = extractelement <8 x i16> %v, i32 2
+  ret i16 %a
+}
+
+define i16 @i16_extract_3(<8 x i16> %v) {
+entry:
+  %a = extractelement <8 x i16> %v, i32 3
+  ret i16 %a
+}
+
+define i16 @i16_extract_4(<8 x i16> %v) {
+entry:
+  %a = extractelement <8 x i16> %v, i32 4
+  ret i16 %a
+}
+
+define i16 @i16_extract_5(<8 x i16> %v) {
+entry:
+  %a = extractelement <8 x i16> %v, i32 5
+  ret i16 %a
+}
+
+define i16 @i16_extract_6(<8 x i16> %v) {
+entry:
+  %a = extractelement <8 x i16> %v, i32 6
+  ret i16 %a
+}
+
+define i16 @i16_extract_7(<8 x i16> %v) {
+entry:
+  %a = extractelement <8 x i16> %v, i32 7
+  ret i16 %a
+}
+
+define i8 @i8_extract_0(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 0
+  ret i8 %a
+}
+
+define i8 @i8_extract_1(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 1
+  ret i8 %a
+}
+
+define i8 @i8_extract_2(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 2
+  ret i8 %a
+}
+
+define i8 @i8_extract_3(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 3
+  ret i8 %a
+}
+
+define i8 @i8_extract_4(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 4
+  ret i8 %a
+}
+
+define i8 @i8_extract_5(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 5
+  ret i8 %a
+}
+
+define i8 @i8_extract_6(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 6
+  ret i8 %a
+}
+
+define i8 @i8_extract_7(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 7
+  ret i8 %a
+}
+
+define i8 @i8_extract_8(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 8
+  ret i8 %a
+}
+
+define i8 @i8_extract_9(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 9
+  ret i8 %a
+}
+
+define i8 @i8_extract_10(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 10
+  ret i8 %a
+}
+
+define i8 @i8_extract_11(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 11
+  ret i8 %a
+}
+
+define i8 @i8_extract_12(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 12
+  ret i8 %a
+}
+
+define i8 @i8_extract_13(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 13
+  ret i8 %a
+}
+
+define i8 @i8_extract_14(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 14
+  ret i8 %a
+}
+
+define i8 @i8_extract_15(<16 x i8> %v) {
+entry:
+  %a = extractelement <16 x i8> %v, i32 15
+  ret i8 %a
+}
+
+;;--------------------------------------------------------------------------
+;; extract element, variable index:
+;;--------------------------------------------------------------------------
+
+define i8 @extract_varadic_i8(i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <16 x i8> < i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i32 %i
+        ret i8 %0
+}
+
+define i8 @extract_varadic_i8_1(<16 x i8> %v, i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <16 x i8> %v, i32 %i
+        ret i8 %0
+}
+
+define i16 @extract_varadic_i16(i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <8 x i16> < i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i32 %i
+        ret i16 %0
+}
+
+define i16 @extract_varadic_i16_1(<8 x i16> %v, i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <8 x i16> %v, i32 %i
+        ret i16 %0
+}
+
+define i32 @extract_varadic_i32(i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <4 x i32> < i32 0, i32 1, i32 2, i32 3>, i32 %i
+        ret i32 %0
+}
+
+define i32 @extract_varadic_i32_1(<4 x i32> %v, i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <4 x i32> %v, i32 %i
+        ret i32 %0
+}
+
+define float @extract_varadic_f32(i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <4 x float> < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >, i32 %i
+        ret float %0
+}
+
+define float @extract_varadic_f32_1(<4 x float> %v, i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <4 x float> %v, i32 %i
+        ret float %0
+}
+
+define i64 @extract_varadic_i64(i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <2 x i64> < i64 0, i64 1>, i32 %i
+        ret i64 %0
+}
+
+define i64 @extract_varadic_i64_1(<2 x i64> %v, i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <2 x i64> %v, i32 %i
+        ret i64 %0
+}
+
+define double @extract_varadic_f64(i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <2 x double> < double 1.000000e+00, double 2.000000e+00>, i32 %i
+        ret double %0
+}
+
+define double @extract_varadic_f64_1(<2 x double> %v, i32 %i) nounwind readnone {
+entry:
+        %0 = extractelement <2 x double> %v, i32 %i
+        ret double %0
+}
diff --git a/final/test/CodeGen/CellSPU/fcmp32.ll b/final/test/CodeGen/CellSPU/fcmp32.ll
new file mode 100644
index 00000000000..c14fd7ba4a4
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/fcmp32.ll
@@ -0,0 +1,36 @@
+; RUN: llc --march=cellspu %s -o - | FileCheck %s
+
+; Exercise the floating point comparison operators for f32:
+
+declare double @fabs(double)
+declare float @fabsf(float)
+
+define i1 @fcmp_eq(float %arg1, float %arg2) {
+; CHECK: fceq
+; CHECK: bi $lr
+        %A = fcmp oeq float %arg1,  %arg2
+        ret i1 %A
+}
+
+define i1 @fcmp_mag_eq(float %arg1, float %arg2) {
+; CHECK: fcmeq
+; CHECK: bi $lr
+        %1 = call float @fabsf(float %arg1)
+        %2 = call float @fabsf(float %arg2)
+        %3 = fcmp oeq float %1, %2
+        ret i1 %3
+}
+
+define i1 @test_ogt(float %a, float %b) {
+; CHECK: fcgt
+; CHECK: bi $lr
+	%cmp = fcmp ogt float %a, %b
+	ret i1 %cmp
+}
+
+define i1 @test_ugt(float %a, float %b) {
+; CHECK: fcgt
+; CHECK: bi $lr
+	%cmp = fcmp ugt float %a, %b
+	ret i1 %cmp
+}
diff --git a/final/test/CodeGen/CellSPU/fcmp64.ll b/final/test/CodeGen/CellSPU/fcmp64.ll
new file mode 100644
index 00000000000..2b61fa6d2dc
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/fcmp64.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+
+define i1 @fcmp_eq_setcc_f64(double %arg1, double %arg2) nounwind {
+entry:
+       %A = fcmp oeq double %arg1, %arg2
+       ret i1 %A
+}
diff --git a/final/test/CodeGen/CellSPU/fdiv.ll b/final/test/CodeGen/CellSPU/fdiv.ll
new file mode 100644
index 00000000000..9921626b79c
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/fdiv.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep frest    %t1.s | count 2 
+; RUN: grep -w fi    %t1.s | count 2 
+; RUN: grep -w fm    %t1.s | count 2
+; RUN: grep fma      %t1.s | count 2 
+; RUN: grep fnms     %t1.s | count 4
+; RUN: grep cgti     %t1.s | count 2
+; RUN: grep selb     %t1.s | count 2
+;
+; This file includes standard floating point arithmetic instructions
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define float @fdiv32(float %arg1, float %arg2) {
+        %A = fdiv float %arg1,  %arg2
+        ret float %A
+}
+
+define <4 x float> @fdiv_v4f32(<4 x float> %arg1, <4 x float> %arg2) {
+        %A = fdiv <4 x float> %arg1,  %arg2
+        ret <4 x float> %A
+}
diff --git a/final/test/CodeGen/CellSPU/fneg-fabs.ll b/final/test/CodeGen/CellSPU/fneg-fabs.ll
new file mode 100644
index 00000000000..1e5e3b34144
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/fneg-fabs.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep 32768   %t1.s | count 2
+; RUN: grep xor     %t1.s | count 4
+; RUN: grep and     %t1.s | count 2
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define double @fneg_dp(double %X) {
+        %Y = fsub double -0.000000e+00, %X
+        ret double %Y
+}
+
+define <2 x double> @fneg_dp_vec(<2 x double> %X) {
+        %Y = fsub <2 x double> < double -0.0000e+00, double -0.0000e+00 >, %X
+        ret <2 x double> %Y
+}
+
+define float @fneg_sp(float %X) {
+        %Y = fsub float -0.000000e+00, %X
+        ret float %Y
+}
+
+define <4 x float> @fneg_sp_vec(<4 x float> %X) {
+        %Y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00,
+                              float -0.000000e+00, float -0.000000e+00>, %X
+        ret <4 x float> %Y
+}
+
+declare double @fabs(double)
+
+declare float @fabsf(float)
+
+define double @fabs_dp(double %X) {
+        %Y = call double @fabs( double %X )
+        ret double %Y
+}
+
+define float @fabs_sp(float %X) {
+        %Y = call float @fabsf( float %X )
+        ret float %Y
+}
diff --git a/final/test/CodeGen/CellSPU/i64ops.ll b/final/test/CodeGen/CellSPU/i64ops.ll
new file mode 100644
index 00000000000..3553cbbf7b5
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/i64ops.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep xswd	     %t1.s | count 3
+; RUN: grep xsbh	     %t1.s | count 1
+; RUN: grep xshw	     %t1.s | count 2
+; RUN: grep shufb        %t1.s | count 7
+; RUN: grep cg           %t1.s | count 4
+; RUN: grep addx         %t1.s | count 4
+; RUN: grep fsmbi        %t1.s | count 3
+; RUN: grep il           %t1.s | count 2
+; RUN: grep mpy          %t1.s | count 10
+; RUN: grep mpyh         %t1.s | count 6
+; RUN: grep mpyhhu       %t1.s | count 2
+; RUN: grep mpyu         %t1.s | count 4
+
+; ModuleID = 'stores.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i64 @sext_i64_i8(i8 %a) nounwind {
+  %1 = sext i8 %a to i64
+  ret i64 %1
+}
+
+define i64 @sext_i64_i16(i16 %a) nounwind {
+  %1 = sext i16 %a to i64
+  ret i64 %1
+}
+
+define i64 @sext_i64_i32(i32 %a) nounwind {
+  %1 = sext i32 %a to i64
+  ret i64 %1
+}
+
+define i64 @zext_i64_i8(i8 %a) nounwind {
+  %1 = zext i8 %a to i64
+  ret i64 %1
+}
+
+define i64 @zext_i64_i16(i16 %a) nounwind {
+  %1 = zext i16 %a to i64
+  ret i64 %1
+}
+
+define i64 @zext_i64_i32(i32 %a) nounwind {
+  %1 = zext i32 %a to i64
+  ret i64 %1
+}
+
+define i64 @add_i64(i64 %a, i64 %b) nounwind {
+  %1 = add i64 %a, %b
+  ret i64 %1
+}
+
+define i64 @mul_i64(i64 %a, i64 %b) nounwind {
+  %1 = mul i64 %a, %b
+  ret i64 %1
+}
diff --git a/final/test/CodeGen/CellSPU/i8ops.ll b/final/test/CodeGen/CellSPU/i8ops.ll
new file mode 100644
index 00000000000..57a2aa89472
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/i8ops.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+
+; ModuleID = 'i8ops.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i8 @add_i8(i8 %a, i8 %b) nounwind {
+  %1 = add i8 %a, %b
+  ret i8 %1
+}
+
+define i8 @add_i8_imm(i8 %a, i8 %b) nounwind {
+  %1 = add i8 %a, 15 
+  ret i8 %1
+}
+
+define i8 @sub_i8(i8 %a, i8 %b) nounwind {
+  %1 = sub i8 %a, %b
+  ret i8 %1
+}
+
+define i8 @sub_i8_imm(i8 %a, i8 %b) nounwind {
+  %1 = sub i8 %a, 15 
+  ret i8 %1
+}
diff --git a/final/test/CodeGen/CellSPU/icmp16.ll b/final/test/CodeGen/CellSPU/icmp16.ll
new file mode 100644
index 00000000000..32b12617cfc
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/icmp16.ll
@@ -0,0 +1,350 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep ilh                                %t1.s | count 15
+; RUN: grep ceqh                               %t1.s | count 29
+; RUN: grep ceqhi                              %t1.s | count 13
+; RUN: grep clgth                              %t1.s | count 15
+; RUN: grep cgth                               %t1.s | count 14
+; RUN: grep cgthi                              %t1.s | count 6
+; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 17
+; RUN: grep {selb\t\\\$3, \\\$4, \\\$5, \\\$3} %t1.s | count 6
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
+; $3 = %arg1, $4 = %val1, $5 = %val2
+;
+; For "positive" comparisons:
+; selb $3, $6, $5, <i1>
+; selb $3, $5, $4, <i1>
+;
+; For "negative" comparisons, i.e., those where the result of the comparison
+; must be inverted (setne, for example):
+; selb $3, $5, $6, <i1>
+; selb $3, $4, $5, <i1>
+
+; i16 integer comparisons:
+define i16 @icmp_eq_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp eq i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_eq_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp eq i16 %arg1, %arg2
+       ret i1 %A
+}
+
+define i16 @icmp_eq_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp eq i16 %arg1, 511
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_eq_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp eq i16 %arg1, -512
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_eq_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp eq i16 %arg1, -1
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_eq_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp eq i16 %arg1, 32768
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ne_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ne i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_ne_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ne i16 %arg1, %arg2
+       ret i1 %A
+}
+
+define i16 @icmp_ne_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ne i16 %arg1, 511
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ne_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ne i16 %arg1, -512
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ne_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ne i16 %arg1, -1
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ne_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ne i16 %arg1, 32768
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ugt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ugt i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_ugt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ugt i16 %arg1, %arg2
+       ret i1 %A
+}
+
+define i16 @icmp_ugt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ugt i16 %arg1, 500
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ugt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ugt i16 %arg1, 0
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ugt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ugt i16 %arg1, 65024
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ugt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ugt i16 %arg1, 32768
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_uge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp uge i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_uge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp uge i16 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp uge i16 %arg1, <immed> can always be transformed into
+;;       icmp ugt i16 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i16 @icmp_ult_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ult i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_ult_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ult i16 %arg1, %arg2
+       ret i1 %A
+}
+
+define i16 @icmp_ult_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ult i16 %arg1, 511
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ult_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ult i16 %arg1, 65534
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ult_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ult i16 %arg1, 65024
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ult_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ult i16 %arg1, 32769
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_ule_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ule i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_ule_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp ule i16 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp ule i16 %arg1, <immed> can always be transformed into
+;;       icmp ult i16 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i16 @icmp_sgt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sgt i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_sgt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sgt i16 %arg1, %arg2
+       ret i1 %A
+}
+
+define i16 @icmp_sgt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sgt i16 %arg1, 511
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_sgt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sgt i16 %arg1, -1
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_sgt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sgt i16 %arg1, -512
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_sgt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sgt i16 %arg1, 32768
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_sge_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sge i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_sge_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sge i16 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp sge i16 %arg1, <immed> can always be transformed into
+;;       icmp sgt i16 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i16 @icmp_slt_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp slt i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_slt_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp slt i16 %arg1, %arg2
+       ret i1 %A
+}
+
+define i16 @icmp_slt_immed01_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp slt i16 %arg1, 511
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_slt_immed02_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp slt i16 %arg1, -512
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_slt_immed03_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp slt i16 %arg1, -1
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_slt_immed04_i16(i16 %arg1, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp slt i16 %arg1, 32768
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i16 @icmp_sle_select_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sle i16 %arg1, %arg2
+       %B = select i1 %A, i16 %val1, i16 %val2
+       ret i16 %B
+}
+
+define i1 @icmp_sle_setcc_i16(i16 %arg1, i16 %arg2, i16 %val1, i16 %val2) nounwind {
+entry:
+       %A = icmp sle i16 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp sle i16 %arg1, <immed> can always be transformed into
+;;       icmp slt i16 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
diff --git a/final/test/CodeGen/CellSPU/icmp32.ll b/final/test/CodeGen/CellSPU/icmp32.ll
new file mode 100644
index 00000000000..ccbb5f7cde5
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/icmp32.ll
@@ -0,0 +1,350 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep ila                                %t1.s | count 6
+; RUN: grep ceq                                %t1.s | count 28
+; RUN: grep ceqi                               %t1.s | count 12
+; RUN: grep clgt                               %t1.s | count 16
+; RUN: grep clgti                              %t1.s | count 6
+; RUN: grep cgt                                %t1.s | count 16
+; RUN: grep cgti                               %t1.s | count 6
+; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 20
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
+; $3 = %arg1, $4 = %val1, $5 = %val2
+;
+; For "positive" comparisons:
+; selb $3, $6, $5, <i1>
+; selb $3, $5, $4, <i1>
+;
+; For "negative" comparisons, i.e., those where the result of the comparison
+; must be inverted (setne, for example):
+; selb $3, $5, $6, <i1>
+; selb $3, $4, $5, <i1>
+
+; i32 integer comparisons:
+define i32 @icmp_eq_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp eq i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_eq_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp eq i32 %arg1, %arg2
+       ret i1 %A
+}
+
+define i32 @icmp_eq_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp eq i32 %arg1, 511
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_eq_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp eq i32 %arg1, -512
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_eq_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp eq i32 %arg1, -1
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_eq_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp eq i32 %arg1, 32768
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ne_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ne i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_ne_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ne i32 %arg1, %arg2
+       ret i1 %A
+}
+
+define i32 @icmp_ne_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ne i32 %arg1, 511
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ne_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ne i32 %arg1, -512
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ne_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ne i32 %arg1, -1
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ne_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ne i32 %arg1, 32768
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ugt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ugt i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_ugt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ugt i32 %arg1, %arg2
+       ret i1 %A
+}
+
+define i32 @icmp_ugt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ugt i32 %arg1, 511
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ugt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ugt i32 %arg1, 4294966784
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ugt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ugt i32 %arg1, 4294967293
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ugt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ugt i32 %arg1, 32768
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_uge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp uge i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_uge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp uge i32 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp uge i32 %arg1, <immed> can always be transformed into
+;;       icmp ugt i32 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i32 @icmp_ult_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ult i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_ult_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ult i32 %arg1, %arg2
+       ret i1 %A
+}
+
+define i32 @icmp_ult_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ult i32 %arg1, 511
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ult_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ult i32 %arg1, 4294966784
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ult_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ult i32 %arg1, 4294967293
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ult_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ult i32 %arg1, 32768
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_ule_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ule i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_ule_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp ule i32 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp ule i32 %arg1, <immed> can always be transformed into
+;;       icmp ult i32 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i32 @icmp_sgt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sgt i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_sgt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sgt i32 %arg1, %arg2
+       ret i1 %A
+}
+
+define i32 @icmp_sgt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sgt i32 %arg1, 511
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_sgt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sgt i32 %arg1, 4294966784
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_sgt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sgt i32 %arg1, 4294967293
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_sgt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sgt i32 %arg1, 32768
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_sge_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sge i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_sge_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sge i32 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp sge i32 %arg1, <immed> can always be transformed into
+;;       icmp sgt i32 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i32 @icmp_slt_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp slt i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_slt_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp slt i32 %arg1, %arg2
+       ret i1 %A
+}
+
+define i32 @icmp_slt_immed01_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp slt i32 %arg1, 511
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_slt_immed02_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp slt i32 %arg1, -512
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_slt_immed03_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp slt i32 %arg1, -1
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_slt_immed04_i32(i32 %arg1, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp slt i32 %arg1, 32768
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i32 @icmp_sle_select_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sle i32 %arg1, %arg2
+       %B = select i1 %A, i32 %val1, i32 %val2
+       ret i32 %B
+}
+
+define i1 @icmp_sle_setcc_i32(i32 %arg1, i32 %arg2, i32 %val1, i32 %val2) nounwind {
+entry:
+       %A = icmp sle i32 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp sle i32 %arg1, <immed> can always be transformed into
+;;       icmp slt i32 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
diff --git a/final/test/CodeGen/CellSPU/icmp64.ll b/final/test/CodeGen/CellSPU/icmp64.ll
new file mode 100644
index 00000000000..9dd2cdc0dea
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/icmp64.ll
@@ -0,0 +1,146 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep ceq                                %t1.s | count 20
+; RUN: grep cgti                               %t1.s | count 12
+; RUN: grep cgt                                %t1.s | count 16
+; RUN: grep clgt                               %t1.s | count 12
+; RUN: grep gb                                 %t1.s | count 12
+; RUN: grep fsm                                %t1.s | count 10
+; RUN: grep xori                               %t1.s | count 5
+; RUN: grep selb                               %t1.s | count 18
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
+; $3 = %arg1, $4 = %val1, $5 = %val2
+;
+; i64 integer comparisons:
+define i64 @icmp_eq_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp eq i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_eq_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp eq i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_ne_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp ne i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_ne_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp ne i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_ugt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp ugt i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_ugt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp ugt i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_uge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp uge i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_uge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp uge i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_ult_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp ult i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_ult_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp ult i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_ule_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp ule i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_ule_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp ule i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_sgt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp sgt i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_sgt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp sgt i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_sge_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp sge i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_sge_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp sge i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_slt_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp slt i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_slt_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp slt i64 %arg1, %arg2
+       ret i1 %A
+}
+
+define i64 @icmp_sle_select_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp sle i64 %arg1, %arg2
+       %B = select i1 %A, i64 %val1, i64 %val2
+       ret i64 %B
+}
+
+define i1 @icmp_sle_setcc_i64(i64 %arg1, i64 %arg2, i64 %val1, i64 %val2) nounwind {
+entry:
+       %A = icmp sle i64 %arg1, %arg2
+       ret i1 %A
+}
diff --git a/final/test/CodeGen/CellSPU/icmp8.ll b/final/test/CodeGen/CellSPU/icmp8.ll
new file mode 100644
index 00000000000..5517d104ab9
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/icmp8.ll
@@ -0,0 +1,286 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep ceqb                               %t1.s | count 24
+; RUN: grep ceqbi                              %t1.s | count 12
+; RUN: grep clgtb                              %t1.s | count 11
+; RUN: grep cgtb                               %t1.s | count 13
+; RUN: grep cgtbi                              %t1.s | count 5
+; RUN: grep {selb\t\\\$3, \\\$6, \\\$5, \\\$3} %t1.s | count 7
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$6, \\\$3} %t1.s | count 3
+; RUN: grep {selb\t\\\$3, \\\$5, \\\$4, \\\$3} %t1.s | count 11
+; RUN: grep {selb\t\\\$3, \\\$4, \\\$5, \\\$3} %t1.s | count 4
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; $3 = %arg1, $4 = %arg2, $5 = %val1, $6 = %val2
+; $3 = %arg1, $4 = %val1, $5 = %val2
+;
+; For "positive" comparisons:
+; selb $3, $6, $5, <i1>
+; selb $3, $5, $4, <i1>
+;
+; For "negative" comparisons, i.e., those where the result of the comparison
+; must be inverted (setne, for example):
+; selb $3, $5, $6, <i1>
+; selb $3, $4, $5, <i1>
+
+; i8 integer comparisons:
+define i8 @icmp_eq_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp eq i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_eq_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp eq i8 %arg1, %arg2
+       ret i1 %A
+}
+
+define i8 @icmp_eq_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp eq i8 %arg1, 127
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_eq_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp eq i8 %arg1, -128
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_eq_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp eq i8 %arg1, -1
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_ne_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ne i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_ne_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ne i8 %arg1, %arg2
+       ret i1 %A
+}
+
+define i8 @icmp_ne_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ne i8 %arg1, 127
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_ne_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ne i8 %arg1, -128
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_ne_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ne i8 %arg1, -1
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_ugt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ugt i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_ugt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ugt i8 %arg1, %arg2
+       ret i1 %A
+}
+
+define i8 @icmp_ugt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ugt i8 %arg1, 126
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_uge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp uge i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_uge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp uge i8 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp uge i8 %arg1, <immed> can always be transformed into
+;;       icmp ugt i8 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i8 @icmp_ult_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ult i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_ult_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ult i8 %arg1, %arg2
+       ret i1 %A
+}
+
+define i8 @icmp_ult_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ult i8 %arg1, 253
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_ult_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ult i8 %arg1, 129
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_ule_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ule i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_ule_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp ule i8 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp ule i8 %arg1, <immed> can always be transformed into
+;;       icmp ult i8 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i8 @icmp_sgt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sgt i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_sgt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sgt i8 %arg1, %arg2
+       ret i1 %A
+}
+
+define i8 @icmp_sgt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sgt i8 %arg1, 96
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_sgt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sgt i8 %arg1, -1
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_sgt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sgt i8 %arg1, -128
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_sge_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sge i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_sge_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sge i8 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp sge i8 %arg1, <immed> can always be transformed into
+;;       icmp sgt i8 %arg1, <immed>-1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
+define i8 @icmp_slt_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp slt i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_slt_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp slt i8 %arg1, %arg2
+       ret i1 %A
+}
+
+define i8 @icmp_slt_immed01_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp slt i8 %arg1, 96
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_slt_immed02_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp slt i8 %arg1, -120
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_slt_immed03_i8(i8 %arg1, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp slt i8 %arg1, -1
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i8 @icmp_sle_select_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sle i8 %arg1, %arg2
+       %B = select i1 %A, i8 %val1, i8 %val2
+       ret i8 %B
+}
+
+define i1 @icmp_sle_setcc_i8(i8 %arg1, i8 %arg2, i8 %val1, i8 %val2) nounwind {
+entry:
+       %A = icmp sle i8 %arg1, %arg2
+       ret i1 %A
+}
+
+;; Note: icmp sle i8 %arg1, <immed> can always be transformed into
+;;       icmp slt i8 %arg1, <immed>+1
+;;
+;; Consequently, even though the patterns exist to match, it's unlikely
+;; they'll ever be generated.
+
diff --git a/final/test/CodeGen/CellSPU/immed16.ll b/final/test/CodeGen/CellSPU/immed16.ll
new file mode 100644
index 00000000000..077d07169e4
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/immed16.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep "ilh" %t1.s | count 11
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i16 @test_1() {
+  %x = alloca i16, align 16
+  store i16 419, i16* %x        ;; ILH via pattern
+  ret i16 0
+}
+
+define i16 @test_2() {
+  %x = alloca i16, align 16
+  store i16 1023, i16* %x       ;; ILH via pattern
+  ret i16 0
+}
+
+define i16 @test_3() {
+  %x = alloca i16, align 16
+  store i16 -1023, i16* %x      ;; ILH via pattern
+  ret i16 0
+}
+
+define i16 @test_4() {
+  %x = alloca i16, align 16
+  store i16 32767, i16* %x      ;; ILH via pattern
+  ret i16 0
+}
+
+define i16 @test_5() {
+  %x = alloca i16, align 16
+  store i16 -32768, i16* %x     ;; ILH via pattern
+  ret i16 0
+}
+
+define i16 @test_6() {
+  ret i16 0
+}
+
+
diff --git a/final/test/CodeGen/CellSPU/immed32.ll b/final/test/CodeGen/CellSPU/immed32.ll
new file mode 100644
index 00000000000..8e48f0b52c1
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/immed32.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep ilhu  %t1.s | count 9
+; RUN: grep iohl  %t1.s | count 7
+; RUN: grep -w il    %t1.s | count 3
+; RUN: grep 16429 %t1.s | count 1
+; RUN: grep 63572 %t1.s | count 1
+; RUN: grep   128 %t1.s | count 1
+; RUN: grep 32639 %t1.s | count 1
+; RUN: grep 65535 %t1.s | count 1
+; RUN: grep 16457 %t1.s | count 1
+; RUN: grep  4059 %t1.s | count 1
+; RUN: grep 49077 %t1.s | count 1
+; RUN: grep  1267 %t1.s | count 2
+; RUN: grep 16309 %t1.s | count 1
+; RUN: cat %t1.s | FileCheck %s
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define i32 @test_1() {
+  ret i32 4784128               ;; ILHU via pattern (0x49000)
+}
+
+define i32 @test_2() {
+  ret i32 5308431               ;; ILHU/IOHL via pattern (0x5100f)
+}
+
+define i32 @test_3() {
+  ret i32 511                   ;; IL via pattern
+}
+
+define i32 @test_4() {
+  ret i32 -512                  ;; IL via pattern
+}
+
+define i32 @test_5()
+{
+;CHECK: test_5:
+;CHECK-NOT: ila $3, 40000
+;CHECK: ilhu
+;CHECK: iohl
+;CHECK: bi $lr
+  ret i32 400000
+}
+
+;; double             float       floatval
+;; 0x4005bf0a80000000 0x402d|f854 2.718282
+define float @float_const_1() {
+  ret float 0x4005BF0A80000000  ;; ILHU/IOHL
+}
+
+;; double             float       floatval
+;; 0x3810000000000000 0x0080|0000 0.000000
+define float @float_const_2() {
+  ret float 0x3810000000000000  ;; IL 128
+}
+
+;; double             float       floatval
+;; 0x47efffffe0000000 0x7f7f|ffff NaN
+define float @float_const_3() {
+  ret float 0x47EFFFFFE0000000  ;; ILHU/IOHL via pattern
+}
+
+;; double             float       floatval
+;; 0x400921fb60000000 0x4049|0fdb 3.141593
+define float @float_const_4() {
+  ret float 0x400921FB60000000  ;; ILHU/IOHL via pattern
+}
+
+;; double             float       floatval
+;; 0xbff6a09e60000000 0xbfb5|04f3 -1.414214
+define float @float_const_5() {
+  ret float 0xBFF6A09E60000000  ;; ILHU/IOHL via pattern
+}
+
+;; double             float       floatval
+;; 0x3ff6a09e60000000 0x3fb5|04f3 1.414214
+define float @float_const_6() {
+  ret float 0x3FF6A09E60000000  ;; ILHU/IOHL via pattern
+}
+
+define float @float_const_7() {
+  ret float 0.000000e+00        ;; IL 0 via pattern
+}
diff --git a/final/test/CodeGen/CellSPU/immed64.ll b/final/test/CodeGen/CellSPU/immed64.ll
new file mode 100644
index 00000000000..fd483651756
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/immed64.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep lqa        %t1.s | count 13
+; RUN: grep ilhu       %t1.s | count 15
+; RUN: grep ila        %t1.s | count 1
+; RUN: grep -w il      %t1.s | count 6
+; RUN: grep shufb      %t1.s | count 13
+; RUN: grep      65520 %t1.s | count  1
+; RUN: grep      43981 %t1.s | count  1
+; RUN: grep      13702 %t1.s | count  1
+; RUN: grep      28225 %t1.s | count  1
+; RUN: grep      30720 %t1.s | count  1
+; RUN: grep 3233857728 %t1.s | count  8
+; RUN: grep 2155905152 %t1.s | count  6
+; RUN: grep      66051 %t1.s | count  7
+; RUN: grep  471670303 %t1.s | count 11
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+;  1311768467750121234 => 0x 12345678 abcdef12 (4660,22136/43981,61202)
+; 18446744073709551591 => 0x ffffffff ffffffe7 (-25)
+; 18446744073708516742 => 0x ffffffff fff03586 (-1034874)
+;              5308431 => 0x 00000000 0051000F
+;  9223372038704560128 => 0x 80000000 6e417800
+
+define i64 @i64_const_1() {
+  ret i64  1311768467750121234          ;; Constant pool spill
+}
+
+define i64 @i64_const_2() {
+  ret i64 18446744073709551591          ;; IL/SHUFB
+}
+
+define i64 @i64_const_3() {
+  ret i64 18446744073708516742          ;; IHLU/IOHL/SHUFB
+}
+
+define i64 @i64_const_4() {
+  ret i64              5308431          ;; ILHU/IOHL/SHUFB
+}
+
+define i64 @i64_const_5() {
+  ret i64                  511          ;; IL/SHUFB
+}
+
+define i64 @i64_const_6() {
+  ret i64                 -512          ;; IL/SHUFB
+}
+
+define i64 @i64_const_7() {
+  ret i64  9223372038704560128          ;; IHLU/IOHL/SHUFB
+}
+
+define i64 @i64_const_8() {
+  ret i64 0                             ;; IL
+}
+
+define i64 @i64_const_9() {
+  ret i64 -1                            ;; IL
+}
+
+define i64 @i64_const_10() {
+  ret i64 281470681808895                ;; IL 65535
+}
+
+; 0x4005bf0a8b145769 ->
+;   (ILHU 0x4005 [16389]/IOHL 0xbf0a [48906])
+;   (ILHU 0x8b14 [35604]/IOHL 0x5769 [22377])
+define double @f64_const_1() {
+ ret double 0x4005bf0a8b145769        ;; ILHU/IOHL via pattern
+}
+ 
+define double @f64_const_2() {
+ ret double 0x0010000000000000
+}
+
+define double @f64_const_3() {
+ ret double 0x7fefffffffffffff
+}
+
+define double @f64_const_4() {
+ ret double 0x400921fb54442d18
+}
+ 
+define double @f64_const_5() {
+  ret double 0xbff6a09e667f3bcd         ;; ILHU/IOHL via pattern
+}
+ 
+define double @f64_const_6() {
+  ret double 0x3ff6a09e667f3bcd
+}
+
+define double @f64_const_7() {
+  ret double 0.000000e+00
+}
diff --git a/final/test/CodeGen/CellSPU/int2fp.ll b/final/test/CodeGen/CellSPU/int2fp.ll
new file mode 100644
index 00000000000..984c017c96d
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/int2fp.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep csflt %t1.s | count 5
+; RUN: grep cuflt %t1.s | count 1
+; RUN: grep xshw  %t1.s | count 2
+; RUN: grep xsbh  %t1.s | count 1
+; RUN: grep and   %t1.s | count 2
+; RUN: grep andi  %t1.s | count 1
+; RUN: grep ila   %t1.s | count 1
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define float @sitofp_i32(i32 %arg1) {
+        %A = sitofp i32 %arg1 to float          ; <float> [#uses=1]
+        ret float %A
+}
+
+define float @uitofp_u32(i32 %arg1) {
+        %A = uitofp i32 %arg1 to float          ; <float> [#uses=1]
+        ret float %A
+}
+
+define float @sitofp_i16(i16 %arg1) {
+        %A = sitofp i16 %arg1 to float          ; <float> [#uses=1]
+        ret float %A
+}
+
+define float @uitofp_i16(i16 %arg1) {
+        %A = uitofp i16 %arg1 to float          ; <float> [#uses=1]
+        ret float %A
+}
+
+define float @sitofp_i8(i8 %arg1) {
+        %A = sitofp i8 %arg1 to float           ; <float> [#uses=1]
+        ret float %A
+}
+
+define float @uitofp_i8(i8 %arg1) {
+        %A = uitofp i8 %arg1 to float           ; <float> [#uses=1]
+        ret float %A
+}
diff --git a/final/test/CodeGen/CellSPU/intrinsics_branch.ll b/final/test/CodeGen/CellSPU/intrinsics_branch.ll
new file mode 100644
index 00000000000..b0f6a6247e4
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/intrinsics_branch.ll
@@ -0,0 +1,150 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep ceq     %t1.s | count 30 
+; RUN: grep ceqb    %t1.s | count 10
+; RUN: grep ceqhi   %t1.s | count 5
+; RUN: grep ceqi    %t1.s | count 5
+; RUN: grep cgt     %t1.s | count 30
+; RUN: grep cgtb    %t1.s | count 10
+; RUN: grep cgthi   %t1.s | count 5
+; RUN: grep cgti    %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
+
+declare <4 x i32> @llvm.spu.si.ceq(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.ceqb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.ceqh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.ceqi(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.ceqhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.ceqbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.cgt(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.cgtb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.cgth(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.cgti(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.cgthi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.cgtbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.clgt(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.spu.si.clgtb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.spu.si.clgth(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.spu.si.clgti(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.clgthi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.clgtbi(<16 x i8>, i8)
+
+
+
+define <4 x i32> @test(<4 x i32> %A) {
+        call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <4 x i32> @ceqtest(<4 x i32> %A, <4 x i32> %B) {
+        call <4 x i32> @llvm.spu.si.ceq(<4 x i32> %A, <4 x i32> %B)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <8 x i16> @ceqhtest(<8 x i16> %A, <8 x i16> %B) {
+        call <8 x i16> @llvm.spu.si.ceqh(<8 x i16> %A, <8 x i16> %B)
+        %Y = bitcast <8 x i16> %1 to <8 x i16>
+        ret <8 x i16> %Y
+}
+
+define <16 x i8> @ceqbtest(<16 x i8> %A, <16 x i8> %B) {
+        call <16 x i8> @llvm.spu.si.ceqb(<16 x i8> %A, <16 x i8> %B)
+        %Y = bitcast <16 x i8> %1 to <16 x i8>
+        ret <16 x i8> %Y
+}
+
+define <4 x i32> @ceqitest(<4 x i32> %A) {
+        call <4 x i32> @llvm.spu.si.ceqi(<4 x i32> %A, i16 65)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <8 x i16> @ceqhitest(<8 x i16> %A) {
+        call <8 x i16> @llvm.spu.si.ceqhi(<8 x i16> %A, i16 65)
+        %Y = bitcast <8 x i16> %1 to <8 x i16>
+        ret <8 x i16> %Y
+}
+
+define <16 x i8> @ceqbitest(<16 x i8> %A) {
+        call <16 x i8> @llvm.spu.si.ceqbi(<16 x i8> %A, i8 65)
+        %Y = bitcast <16 x i8> %1 to <16 x i8>
+        ret <16 x i8> %Y
+}
+
+define <4 x i32> @cgttest(<4 x i32> %A, <4 x i32> %B) {
+        call <4 x i32> @llvm.spu.si.cgt(<4 x i32> %A, <4 x i32> %B)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <8 x i16> @cgthtest(<8 x i16> %A, <8 x i16> %B) {
+        call <8 x i16> @llvm.spu.si.cgth(<8 x i16> %A, <8 x i16> %B)
+        %Y = bitcast <8 x i16> %1 to <8 x i16>
+        ret <8 x i16> %Y
+}
+
+define <16 x i8> @cgtbtest(<16 x i8> %A, <16 x i8> %B) {
+        call <16 x i8> @llvm.spu.si.cgtb(<16 x i8> %A, <16 x i8> %B)
+        %Y = bitcast <16 x i8> %1 to <16 x i8>
+        ret <16 x i8> %Y
+}
+
+define <4 x i32> @cgtitest(<4 x i32> %A) {
+        call <4 x i32> @llvm.spu.si.cgti(<4 x i32> %A, i16 65)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <8 x i16> @cgthitest(<8 x i16> %A) {
+        call <8 x i16> @llvm.spu.si.cgthi(<8 x i16> %A, i16 65)
+        %Y = bitcast <8 x i16> %1 to <8 x i16>
+        ret <8 x i16> %Y
+}
+
+define <16 x i8> @cgtbitest(<16 x i8> %A) {
+        call <16 x i8> @llvm.spu.si.cgtbi(<16 x i8> %A, i8 65)
+        %Y = bitcast <16 x i8> %1 to <16 x i8>
+        ret <16 x i8> %Y
+}
+
+define <4 x i32> @clgttest(<4 x i32> %A, <4 x i32> %B) {
+        call <4 x i32> @llvm.spu.si.clgt(<4 x i32> %A, <4 x i32> %B)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <8 x i16> @clgthtest(<8 x i16> %A, <8 x i16> %B) {
+        call <8 x i16> @llvm.spu.si.clgth(<8 x i16> %A, <8 x i16> %B)
+        %Y = bitcast <8 x i16> %1 to <8 x i16>
+        ret <8 x i16> %Y
+}
+
+define <16 x i8> @clgtbtest(<16 x i8> %A, <16 x i8> %B) {
+        call <16 x i8> @llvm.spu.si.clgtb(<16 x i8> %A, <16 x i8> %B)
+        %Y = bitcast <16 x i8> %1 to <16 x i8>
+        ret <16 x i8> %Y
+}
+
+define <4 x i32> @clgtitest(<4 x i32> %A) {
+        call <4 x i32> @llvm.spu.si.clgti(<4 x i32> %A, i16 65)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <8 x i16> @clgthitest(<8 x i16> %A) {
+        call <8 x i16> @llvm.spu.si.clgthi(<8 x i16> %A, i16 65)
+        %Y = bitcast <8 x i16> %1 to <8 x i16>
+        ret <8 x i16> %Y
+}
+
+define <16 x i8> @clgtbitest(<16 x i8> %A) {
+        call <16 x i8> @llvm.spu.si.clgtbi(<16 x i8> %A, i8 65)
+        %Y = bitcast <16 x i8> %1 to <16 x i8>
+        ret <16 x i8> %Y
+}
diff --git a/final/test/CodeGen/CellSPU/intrinsics_float.ll b/final/test/CodeGen/CellSPU/intrinsics_float.ll
new file mode 100644
index 00000000000..81373470d06
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/intrinsics_float.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep fa      %t1.s | count 5
+; RUN: grep fs      %t1.s | count 5
+; RUN: grep fm      %t1.s | count 15
+; RUN: grep fceq    %t1.s | count 5
+; RUN: grep fcmeq   %t1.s | count 5
+; RUN: grep fcgt    %t1.s | count 5
+; RUN: grep fcmgt   %t1.s | count 5
+; RUN: grep fma     %t1.s | count 5
+; RUN: grep fnms    %t1.s | count 5
+; RUN: grep fms     %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.shli(<4 x i32>, i8)
+
+declare <4 x float> @llvm.spu.si.fa(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fs(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fm(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.spu.si.fceq(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcmeq(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcgt(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fcmgt(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.spu.si.fma(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fnms(<4 x float>, <4 x float>, <4 x float>)
+declare <4 x float> @llvm.spu.si.fms(<4 x float>, <4 x float>, <4 x float>)
+
+define <4 x i32> @test(<4 x i32> %A) {
+        call <4 x i32> @llvm.spu.si.shli(<4 x i32> %A, i8 3)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <4 x float> @fatest(<4 x float> %A, <4 x float> %B) {
+        call <4 x float> @llvm.spu.si.fa(<4 x float> %A, <4 x float> %B)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fstest(<4 x float> %A, <4 x float> %B) {
+        call <4 x float> @llvm.spu.si.fs(<4 x float> %A, <4 x float> %B)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fmtest(<4 x float> %A, <4 x float> %B) {
+        call <4 x float> @llvm.spu.si.fm(<4 x float> %A, <4 x float> %B)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fceqtest(<4 x float> %A, <4 x float> %B) {
+        call <4 x float> @llvm.spu.si.fceq(<4 x float> %A, <4 x float> %B)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fcmeqtest(<4 x float> %A, <4 x float> %B) {
+        call <4 x float> @llvm.spu.si.fcmeq(<4 x float> %A, <4 x float> %B)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fcgttest(<4 x float> %A, <4 x float> %B) {
+        call <4 x float> @llvm.spu.si.fcgt(<4 x float> %A, <4 x float> %B)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fcmgttest(<4 x float> %A, <4 x float> %B) {
+        call <4 x float> @llvm.spu.si.fcmgt(<4 x float> %A, <4 x float> %B)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fmatest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+        call <4 x float> @llvm.spu.si.fma(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fnmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+        call <4 x float> @llvm.spu.si.fnms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
+
+define <4 x float> @fmstest(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
+        call <4 x float> @llvm.spu.si.fms(<4 x float> %A, <4 x float> %B, <4 x float> %C)
+        %Y = bitcast <4 x float> %1 to <4 x float>
+        ret <4 x float> %Y
+}
diff --git a/final/test/CodeGen/CellSPU/intrinsics_logical.ll b/final/test/CodeGen/CellSPU/intrinsics_logical.ll
new file mode 100644
index 00000000000..a29ee4c2405
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/intrinsics_logical.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep and       %t1.s | count 20
+; RUN: grep andc      %t1.s | count 5
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+declare <4 x i32> @llvm.spu.si.and(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.andc(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.andi(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.andhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.andbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.or(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.orc(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.ori(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.orhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.orbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.xor(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.xori(<4 x i32>, i16)
+declare <8 x i16> @llvm.spu.si.xorhi(<8 x i16>, i16)
+declare <16 x i8> @llvm.spu.si.xorbi(<16 x i8>, i8)
+
+declare <4 x i32> @llvm.spu.si.nand(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.spu.si.nor(<4 x i32>, <4 x i32>)
+
+define <4 x i32> @andtest(<4 x i32> %A, <4 x i32> %B) {
+        call <4 x i32> @llvm.spu.si.and(<4 x i32> %A, <4 x i32> %B)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <4 x i32> @andctest(<4 x i32> %A, <4 x i32> %B) {
+        call <4 x i32> @llvm.spu.si.andc(<4 x i32> %A, <4 x i32> %B)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <4 x i32> @anditest(<4 x i32> %A) {
+        call <4 x i32> @llvm.spu.si.andi(<4 x i32> %A, i16 65)
+        %Y = bitcast <4 x i32> %1 to <4 x i32>
+        ret <4 x i32> %Y
+}
+
+define <8 x i16> @andhitest(<8 x i16> %A) {
+        call <8 x i16> @llvm.spu.si.andhi(<8 x i16> %A, i16 65)
+        %Y = bitcast <8 x i16> %1 to <8 x i16>
+        ret <8 x i16> %Y
+}
diff --git a/final/test/CodeGen/CellSPU/jumptable.ll b/final/test/CodeGen/CellSPU/jumptable.ll
new file mode 100644
index 00000000000..42b41b3bf29
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/jumptable.ll
@@ -0,0 +1,21 @@
+;RUN: llc --march=cellspu %s -o - | FileCheck %s
+; This is to check that emitting jumptables doesn't crash llc
+define i32 @test(i32 %param) {
+entry:
+;CHECK:        ai      {{\$.}}, $3, -1
+;CHECK:        clgti   {{\$., \$.}}, 3
+;CHECK:        brnz    {{\$.}},.LBB0_2
+  switch i32 %param, label %bb1 [
+    i32 1, label %bb3
+    i32 2, label %bb2
+    i32 3, label %bb3
+    i32 4, label %bb1
+  ]
+
+bb1:                                            
+  ret i32 1
+bb2:      
+  ret i32 2
+bb3:     
+  ret i32 3
+}
diff --git a/final/test/CodeGen/CellSPU/loads.ll b/final/test/CodeGen/CellSPU/loads.ll
new file mode 100644
index 00000000000..4771752f5f4
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/loads.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=cellspu | FileCheck %s
+
+; ModuleID = 'loads.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define <4 x float> @load_v4f32_1(<4 x float>* %a) nounwind readonly {
+entry:
+	%tmp1 = load <4 x float>* %a
+	ret <4 x float> %tmp1
+; CHECK:	lqd	$3, 0($3)
+}
+
+define <4 x float> @load_v4f32_2(<4 x float>* %a) nounwind readonly {
+entry:
+	%arrayidx = getelementptr <4 x float>* %a, i32 1
+	%tmp1 = load <4 x float>* %arrayidx
+	ret <4 x float> %tmp1
+; CHECK:	lqd	$3, 16($3)
+}
+
+
+declare <4 x i32>* @getv4f32ptr()
+define <4 x i32> @func() {
+	;CHECK: brasl
+	; we need to have some instruction to move the result to safety.
+	; which instruction (lr, stqd...) depends on the regalloc
+	;CHECK: {{.*}}
+	;CHECK: brasl
+	%rv1 = call <4 x i32>* @getv4f32ptr()
+	%rv2 = call <4 x i32>* @getv4f32ptr()
+	%rv3 = load <4 x i32>* %rv1
+	ret <4 x i32> %rv3
+}
+
+define <4 x float> @load_undef(){
+	; CHECK: lqd	$3, 0($3)
+	%val = load <4 x float>* undef
+	ret <4 x float> %val
+}
+
+;check that 'misaligned' loads that may span two memory chunks
+;have two loads. Don't check for the bitmanipulation, as that 
+;might change with improved algorithms or scheduling 
+define i32 @load_misaligned( i32* %ptr ){
+;CHECK: load_misaligned
+;CHECK: lqd
+;CHECK: lqd
+;CHECK: bi $lr
+  %rv = load i32* %ptr, align 2
+  ret i32 %rv
+}
+
+define <4 x i32> @load_null_vec( ) {
+;CHECK: lqa
+;CHECK: bi $lr
+	%rv = load <4 x i32>* null
+	ret <4 x i32> %rv
+}
diff --git a/final/test/CodeGen/CellSPU/mul-with-overflow.ll b/final/test/CodeGen/CellSPU/mul-with-overflow.ll
new file mode 100644
index 00000000000..d15da12649e
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/mul-with-overflow.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=cellspu
+
+declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
+define i1 @a(i16 %x) zeroext nounwind {
+  %res = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %x, i16 3)
+  %obil = extractvalue {i16, i1} %res, 1
+  ret i1 %obil
+}
+
+declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
+define i1 @b(i16 %x) zeroext nounwind {
+  %res = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %x, i16 3)
+  %obil = extractvalue {i16, i1} %res, 1
+  ret i1 %obil
+}
diff --git a/final/test/CodeGen/CellSPU/mul_ops.ll b/final/test/CodeGen/CellSPU/mul_ops.ll
new file mode 100644
index 00000000000..1e28fc7a918
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/mul_ops.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep mpy     %t1.s | count 44
+; RUN: grep mpyu    %t1.s | count 4
+; RUN: grep mpyh    %t1.s | count 10
+; RUN: grep mpyhh   %t1.s | count 2
+; RUN: grep rotma   %t1.s | count 12
+; RUN: grep rotmahi %t1.s | count 4
+; RUN: grep and     %t1.s | count 2
+; RUN: grep selb    %t1.s | count 6
+; RUN: grep fsmbi   %t1.s | count 4
+; RUN: grep shli    %t1.s | count 4
+; RUN: grep shlhi   %t1.s | count 4
+; RUN: grep ila     %t1.s | count 2
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; 32-bit multiply instruction generation:
+define <4 x i32> @mpy_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+entry:
+        %A = mul <4 x i32> %arg1, %arg2
+        ret <4 x i32> %A
+}
+
+define <4 x i32> @mpy_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+entry:
+        %A = mul <4 x i32> %arg2, %arg1
+        ret <4 x i32> %A
+}
+
+define <8 x i16> @mpy_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+entry:
+        %A = mul <8 x i16> %arg1, %arg2
+        ret <8 x i16> %A
+}
+
+define <8 x i16> @mpy_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+entry:
+        %A = mul <8 x i16> %arg2, %arg1
+        ret <8 x i16> %A
+}
+
+define <16 x i8> @mul_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+entry:
+        %A = mul <16 x i8> %arg2, %arg1
+        ret <16 x i8> %A
+}
+
+define <16 x i8> @mul_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+entry:
+        %A = mul <16 x i8> %arg1, %arg2
+        ret <16 x i8> %A
+}
+
+define i32 @mul_i32_1(i32 %arg1, i32 %arg2) {
+entry:
+        %A = mul i32 %arg2, %arg1
+        ret i32 %A
+}
+
+define i32 @mul_i32_2(i32 %arg1, i32 %arg2) {
+entry:
+        %A = mul i32 %arg1, %arg2
+        ret i32 %A
+}
+
+define i16 @mul_i16_1(i16 %arg1, i16 %arg2) {
+entry:
+        %A = mul i16 %arg2, %arg1
+        ret i16 %A
+}
+
+define i16 @mul_i16_2(i16 %arg1, i16 %arg2) {
+entry:
+        %A = mul i16 %arg1, %arg2
+        ret i16 %A
+}
+
+define i8 @mul_i8_1(i8 %arg1, i8 %arg2) {
+entry:
+        %A = mul i8 %arg2, %arg1
+        ret i8 %A
+}
+
+define i8 @mul_i8_2(i8 %arg1, i8 %arg2) {
+entry:
+        %A = mul i8 %arg1, %arg2
+        ret i8 %A
+}
diff --git a/final/test/CodeGen/CellSPU/nand.ll b/final/test/CodeGen/CellSPU/nand.ll
new file mode 100644
index 00000000000..e1419232ece
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/nand.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep nand   %t1.s | count 90
+; RUN: grep and    %t1.s | count 94
+; RUN: grep xsbh   %t1.s | count 2
+; RUN: grep xshw   %t1.s | count 4
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define <4 x i32> @nand_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = and <4 x i32> %arg2, %arg1      ; <<4 x i32>> [#uses=1]
+        %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        ret <4 x i32> %B
+}
+
+define <4 x i32> @nand_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = and <4 x i32> %arg1, %arg2      ; <<4 x i32>> [#uses=1]
+        %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        ret <4 x i32> %B
+}
+
+define <8 x i16> @nand_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = and <8 x i16> %arg2, %arg1      ; <<8 x i16>> [#uses=1]
+        %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                 i16 -1, i16 -1, i16 -1, i16 -1 >
+        ret <8 x i16> %B
+}
+
+define <8 x i16> @nand_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = and <8 x i16> %arg1, %arg2      ; <<8 x i16>> [#uses=1]
+        %B = xor <8 x i16> %A, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                 i16 -1, i16 -1, i16 -1, i16 -1 >
+        ret <8 x i16> %B
+}
+
+define <16 x i8> @nand_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = and <16 x i8> %arg2, %arg1      ; <<16 x i8>> [#uses=1]
+        %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1 >
+        ret <16 x i8> %B
+}
+
+define <16 x i8> @nand_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = and <16 x i8> %arg1, %arg2      ; <<16 x i8>> [#uses=1]
+        %B = xor <16 x i8> %A, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1 >
+        ret <16 x i8> %B
+}
+
+define i32 @nand_i32_1(i32 %arg1, i32 %arg2) {
+        %A = and i32 %arg2, %arg1            ; <i32> [#uses=1]
+        %B = xor i32 %A, -1                  ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i32 @nand_i32_2(i32 %arg1, i32 %arg2) {
+        %A = and i32 %arg1, %arg2            ; <i32> [#uses=1]
+        %B = xor i32 %A, -1                  ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i16 @nand_i16_1(i16 signext  %arg1, i16 signext  %arg2) signext  {
+        %A = and i16 %arg2, %arg1            ; <i16> [#uses=1]
+        %B = xor i16 %A, -1                  ; <i16> [#uses=1]
+        ret i16 %B
+}
+
+define i16 @nand_i16_2(i16 signext  %arg1, i16 signext  %arg2) signext  {
+        %A = and i16 %arg1, %arg2            ; <i16> [#uses=1]
+        %B = xor i16 %A, -1                  ; <i16> [#uses=1]
+        ret i16 %B
+}
+
+define i16 @nand_i16u_1(i16 zeroext  %arg1, i16 zeroext  %arg2) zeroext  {
+        %A = and i16 %arg2, %arg1            ; <i16> [#uses=1]
+        %B = xor i16 %A, -1                  ; <i16> [#uses=1]
+        ret i16 %B
+}
+
+define i16 @nand_i16u_2(i16 zeroext  %arg1, i16 zeroext  %arg2) zeroext  {
+        %A = and i16 %arg1, %arg2            ; <i16> [#uses=1]
+        %B = xor i16 %A, -1                  ; <i16> [#uses=1]
+        ret i16 %B
+}
+
+define i8 @nand_i8u_1(i8 zeroext  %arg1, i8 zeroext  %arg2) zeroext  {
+        %A = and i8 %arg2, %arg1             ; <i8> [#uses=1]
+        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
+        ret i8 %B
+}
+
+define i8 @nand_i8u_2(i8 zeroext  %arg1, i8 zeroext  %arg2) zeroext  {
+        %A = and i8 %arg1, %arg2             ; <i8> [#uses=1]
+        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
+        ret i8 %B
+}
+
+define i8 @nand_i8_1(i8 signext  %arg1, i8 signext  %arg2) signext  {
+        %A = and i8 %arg2, %arg1             ; <i8> [#uses=1]
+        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
+        ret i8 %B
+}
+
+define i8 @nand_i8_2(i8 signext  %arg1, i8 signext  %arg2) signext  {
+        %A = and i8 %arg1, %arg2             ; <i8> [#uses=1]
+        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
+        ret i8 %B
+}
+
+define i8 @nand_i8_3(i8 %arg1, i8 %arg2) {
+        %A = and i8 %arg2, %arg1             ; <i8> [#uses=1]
+        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
+        ret i8 %B
+}
+
+define i8 @nand_i8_4(i8 %arg1, i8 %arg2) {
+        %A = and i8 %arg1, %arg2             ; <i8> [#uses=1]
+        %B = xor i8 %A, -1                   ; <i8> [#uses=1]
+        ret i8 %B
+}
diff --git a/final/test/CodeGen/CellSPU/or_ops.ll b/final/test/CodeGen/CellSPU/or_ops.ll
new file mode 100644
index 00000000000..8aa1e998bd0
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/or_ops.ll
@@ -0,0 +1,264 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep and    %t1.s | count 2
+; RUN: grep orc    %t1.s | count 85
+; RUN: grep ori    %t1.s | count 30
+; RUN: grep orhi   %t1.s | count 30
+; RUN: grep orbi   %t1.s | count 15
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; OR instruction generation:
+define <4 x i32> @or_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = or <4 x i32> %arg1, %arg2
+        ret <4 x i32> %A
+}
+
+define <4 x i32> @or_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = or <4 x i32> %arg2, %arg1
+        ret <4 x i32> %A
+}
+
+define <8 x i16> @or_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = or <8 x i16> %arg1, %arg2
+        ret <8 x i16> %A
+}
+
+define <8 x i16> @or_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = or <8 x i16> %arg2, %arg1
+        ret <8 x i16> %A
+}
+
+define <16 x i8> @or_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = or <16 x i8> %arg2, %arg1
+        ret <16 x i8> %A
+}
+
+define <16 x i8> @or_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = or <16 x i8> %arg1, %arg2
+        ret <16 x i8> %A
+}
+
+define i32 @or_i32_1(i32 %arg1, i32 %arg2) {
+        %A = or i32 %arg2, %arg1
+        ret i32 %A
+}
+
+define i32 @or_i32_2(i32 %arg1, i32 %arg2) {
+        %A = or i32 %arg1, %arg2
+        ret i32 %A
+}
+
+define i16 @or_i16_1(i16 %arg1, i16 %arg2) {
+        %A = or i16 %arg2, %arg1
+        ret i16 %A
+}
+
+define i16 @or_i16_2(i16 %arg1, i16 %arg2) {
+        %A = or i16 %arg1, %arg2
+        ret i16 %A
+}
+
+define i8 @or_i8_1(i8 %arg1, i8 %arg2) {
+        %A = or i8 %arg2, %arg1
+        ret i8 %A
+}
+
+define i8 @or_i8_2(i8 %arg1, i8 %arg2) {
+        %A = or i8 %arg1, %arg2
+        ret i8 %A
+}
+
+; ORC instruction generation:
+define <4 x i32> @orc_v4i32_1(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = xor <4 x i32> %arg2, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = or <4 x i32> %arg1, %A
+        ret <4 x i32> %B
+}
+
+define <4 x i32> @orc_v4i32_2(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = or <4 x i32> %arg2, %A
+        ret <4 x i32> %B
+}
+
+define <4 x i32> @orc_v4i32_3(<4 x i32> %arg1, <4 x i32> %arg2) {
+        %A = xor <4 x i32> %arg1, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = or <4 x i32> %A, %arg2
+        ret <4 x i32> %B
+}
+
+define <8 x i16> @orc_v8i16_1(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = xor <8 x i16> %arg2, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                    i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = or <8 x i16> %arg1, %A
+        ret <8 x i16> %B
+}
+
+define <8 x i16> @orc_v8i16_2(<8 x i16> %arg1, <8 x i16> %arg2) {
+        %A = xor <8 x i16> %arg1, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                    i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = or <8 x i16> %arg2, %A
+        ret <8 x i16> %B
+}
+
+define <16 x i8> @orc_v16i8_1(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = xor <16 x i8> %arg1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = or <16 x i8> %arg2, %A
+        ret <16 x i8> %B
+}
+
+define <16 x i8> @orc_v16i8_2(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = or <16 x i8> %arg1, %A
+        ret <16 x i8> %B
+}
+
+define <16 x i8> @orc_v16i8_3(<16 x i8> %arg1, <16 x i8> %arg2) {
+        %A = xor <16 x i8> %arg2, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
+                                    i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = or <16 x i8> %A, %arg1
+        ret <16 x i8> %B
+}
+
+define i32 @orc_i32_1(i32 %arg1, i32 %arg2) {
+        %A = xor i32 %arg2, -1
+        %B = or i32 %A, %arg1
+        ret i32 %B
+}
+
+define i32 @orc_i32_2(i32 %arg1, i32 %arg2) {
+        %A = xor i32 %arg1, -1
+        %B = or i32 %A, %arg2
+        ret i32 %B
+}
+
+define i32 @orc_i32_3(i32 %arg1, i32 %arg2) {
+        %A = xor i32 %arg2, -1
+        %B = or i32 %arg1, %A
+        ret i32 %B
+}
+
+define i16 @orc_i16_1(i16 %arg1, i16 %arg2) {
+        %A = xor i16 %arg2, -1
+        %B = or i16 %A, %arg1
+        ret i16 %B
+}
+
+define i16 @orc_i16_2(i16 %arg1, i16 %arg2) {
+        %A = xor i16 %arg1, -1
+        %B = or i16 %A, %arg2
+        ret i16 %B
+}
+
+define i16 @orc_i16_3(i16 %arg1, i16 %arg2) {
+        %A = xor i16 %arg2, -1
+        %B = or i16 %arg1, %A
+        ret i16 %B
+}
+
+define i8 @orc_i8_1(i8 %arg1, i8 %arg2) {
+        %A = xor i8 %arg2, -1
+        %B = or i8 %A, %arg1
+        ret i8 %B
+}
+
+define i8 @orc_i8_2(i8 %arg1, i8 %arg2) {
+        %A = xor i8 %arg1, -1
+        %B = or i8 %A, %arg2
+        ret i8 %B
+}
+
+define i8 @orc_i8_3(i8 %arg1, i8 %arg2) {
+        %A = xor i8 %arg2, -1
+        %B = or i8 %arg1, %A
+        ret i8 %B
+}
+
+; ORI instruction generation (i32 data type):
+define <4 x i32> @ori_v4i32_1(<4 x i32> %in) {
+        %tmp2 = or <4 x i32> %in, < i32 511, i32 511, i32 511, i32 511 >
+        ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @ori_v4i32_2(<4 x i32> %in) {
+        %tmp2 = or <4 x i32> %in, < i32 510, i32 510, i32 510, i32 510 >
+        ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @ori_v4i32_3(<4 x i32> %in) {
+        %tmp2 = or <4 x i32> %in, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        ret <4 x i32> %tmp2
+}
+
+define <4 x i32> @ori_v4i32_4(<4 x i32> %in) {
+        %tmp2 = or <4 x i32> %in, < i32 -512, i32 -512, i32 -512, i32 -512 >
+        ret <4 x i32> %tmp2
+}
+
+define i32 @ori_u32(i32 zeroext  %in) zeroext  {
+        %tmp37 = or i32 %in, 37         ; <i32> [#uses=1]
+        ret i32 %tmp37
+}
+
+define i32 @ori_i32(i32 signext  %in) signext  {
+        %tmp38 = or i32 %in, 37         ; <i32> [#uses=1]
+        ret i32 %tmp38
+}
+
+; ORHI instruction generation (i16 data type):
+define <8 x i16> @orhi_v8i16_1(<8 x i16> %in) {
+        %tmp2 = or <8 x i16> %in, < i16 511, i16 511, i16 511, i16 511,
+                                    i16 511, i16 511, i16 511, i16 511 >
+        ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @orhi_v8i16_2(<8 x i16> %in) {
+        %tmp2 = or <8 x i16> %in, < i16 510, i16 510, i16 510, i16 510,
+                                    i16 510, i16 510, i16 510, i16 510 >
+        ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @orhi_v8i16_3(<8 x i16> %in) {
+        %tmp2 = or <8 x i16> %in, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1,
+                                    i16 -1, i16 -1, i16 -1 >
+        ret <8 x i16> %tmp2
+}
+
+define <8 x i16> @orhi_v8i16_4(<8 x i16> %in) {
+        %tmp2 = or <8 x i16> %in, < i16 -512, i16 -512, i16 -512, i16 -512,
+                                    i16 -512, i16 -512, i16 -512, i16 -512 >
+        ret <8 x i16> %tmp2
+}
+
+define i16 @orhi_u16(i16 zeroext  %in) zeroext  {
+        %tmp37 = or i16 %in, 37         ; <i16> [#uses=1]
+        ret i16 %tmp37
+}
+
+define i16 @orhi_i16(i16 signext  %in) signext  {
+        %tmp38 = or i16 %in, 37         ; <i16> [#uses=1]
+        ret i16 %tmp38
+}
+
+; ORBI instruction generation (i8 data type):
+define <16 x i8> @orbi_v16i8(<16 x i8> %in) {
+        %tmp2 = or <16 x i8> %in, < i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
+                                    i8 42, i8 42, i8 42, i8 42, i8 42, i8 42,
+                                    i8 42, i8 42, i8 42, i8 42 >
+        ret <16 x i8> %tmp2
+}
+
+define i8 @orbi_u8(i8 zeroext  %in) zeroext  {
+        %tmp37 = or i8 %in, 37         ; <i8> [#uses=1]
+        ret i8 %tmp37
+}
+
+define i8 @orbi_i8(i8 signext  %in) signext  {
+        %tmp38 = or i8 %in, 37         ; <i8> [#uses=1]
+        ret i8 %tmp38
+}
diff --git a/final/test/CodeGen/CellSPU/private.ll b/final/test/CodeGen/CellSPU/private.ll
new file mode 100644
index 00000000000..56f72e75b16
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/private.ll
@@ -0,0 +1,22 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s -march=cellspu > %t
+; RUN: grep .Lfoo: %t
+; RUN: grep brsl.*\.Lfoo %t
+; RUN: grep .Lbaz: %t
+; RUN: grep ila.*\.Lbaz %t
+
+
+declare void @foo()
+
+define private void @foo() {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}
diff --git a/final/test/CodeGen/CellSPU/rotate_ops.ll b/final/test/CodeGen/CellSPU/rotate_ops.ll
new file mode 100644
index 00000000000..e1172089c70
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/rotate_ops.ll
@@ -0,0 +1,172 @@
+; RUN: llc < %s -march=cellspu -o %t1.s
+; RUN: grep rot          %t1.s | count 86
+; RUN: grep roth         %t1.s | count 8
+; RUN: grep roti.*5      %t1.s | count 1
+; RUN: grep roti.*27     %t1.s | count 1
+; RUN grep rothi.*5      %t1.s | count 2
+; RUN grep rothi.*11     %t1.s | count 1
+; RUN grep rothi.*,.3    %t1.s | count 1
+; RUN: grep andhi        %t1.s | count 4
+; RUN: grep shlhi        %t1.s | count 4
+; RUN: cat %t1.s | FileCheck %s
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; Vector rotates are not currently supported in gcc or llvm assembly. These are
+; not tested.
+
+; 32-bit rotates:
+define i32 @rotl32_1a(i32 %arg1, i8 %arg2) {
+        %tmp1 = zext i8 %arg2 to i32    ; <i32> [#uses=1]
+        %B = shl i32 %arg1, %tmp1       ; <i32> [#uses=1]
+        %arg22 = sub i8 32, %arg2       ; <i8> [#uses=1]
+        %tmp2 = zext i8 %arg22 to i32   ; <i32> [#uses=1]
+        %C = lshr i32 %arg1, %tmp2      ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @rotl32_1b(i32 %arg1, i16 %arg2) {
+        %tmp1 = zext i16 %arg2 to i32   ; <i32> [#uses=1]
+        %B = shl i32 %arg1, %tmp1       ; <i32> [#uses=1]
+        %arg22 = sub i16 32, %arg2      ; <i8> [#uses=1]
+        %tmp2 = zext i16 %arg22 to i32  ; <i32> [#uses=1]
+        %C = lshr i32 %arg1, %tmp2      ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @rotl32_2(i32 %arg1, i32 %arg2) {
+        %B = shl i32 %arg1, %arg2       ; <i32> [#uses=1]
+        %tmp1 = sub i32 32, %arg2       ; <i32> [#uses=1]
+        %C = lshr i32 %arg1, %tmp1      ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @rotl32_3(i32 %arg1, i32 %arg2) {
+        %tmp1 = sub i32 32, %arg2       ; <i32> [#uses=1]
+        %B = shl i32 %arg1, %arg2       ; <i32> [#uses=1]
+        %C = lshr i32 %arg1, %tmp1      ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @rotl32_4(i32 %arg1, i32 %arg2) {
+        %tmp1 = sub i32 32, %arg2       ; <i32> [#uses=1]
+        %C = lshr i32 %arg1, %tmp1      ; <i32> [#uses=1]
+        %B = shl i32 %arg1, %arg2       ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @rotr32_1(i32 %A, i8 %Amt) {
+        %tmp1 = zext i8 %Amt to i32     ; <i32> [#uses=1]
+        %B = lshr i32 %A, %tmp1         ; <i32> [#uses=1]
+        %Amt2 = sub i8 32, %Amt         ; <i8> [#uses=1]
+        %tmp2 = zext i8 %Amt2 to i32    ; <i32> [#uses=1]
+        %C = shl i32 %A, %tmp2          ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @rotr32_2(i32 %A, i8 %Amt) {
+        %Amt2 = sub i8 32, %Amt         ; <i8> [#uses=1]
+        %tmp1 = zext i8 %Amt to i32     ; <i32> [#uses=1]
+        %B = lshr i32 %A, %tmp1         ; <i32> [#uses=1]
+        %tmp2 = zext i8 %Amt2 to i32    ; <i32> [#uses=1]
+        %C = shl i32 %A, %tmp2          ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+; Rotate left with immediate
+define i32 @rotli32(i32 %A) {
+        %B = shl i32 %A, 5              ; <i32> [#uses=1]
+        %C = lshr i32 %A, 27            ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+; Rotate right with immediate
+define i32 @rotri32(i32 %A) {
+        %B = lshr i32 %A, 5             ; <i32> [#uses=1]
+        %C = shl i32 %A, 27             ; <i32> [#uses=1]
+        %D = or i32 %B, %C              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+; 16-bit rotates:
+define i16 @rotr16_1(i16 %arg1, i8 %arg) {
+        %tmp1 = zext i8 %arg to i16             ; <i16> [#uses=1]
+        %B = lshr i16 %arg1, %tmp1              ; <i16> [#uses=1]
+        %arg2 = sub i8 16, %arg                 ; <i8> [#uses=1]
+        %tmp2 = zext i8 %arg2 to i16            ; <i16> [#uses=1]
+        %C = shl i16 %arg1, %tmp2               ; <i16> [#uses=1]
+        %D = or i16 %B, %C                      ; <i16> [#uses=1]
+        ret i16 %D
+}
+
+define i16 @rotr16_2(i16 %arg1, i16 %arg) {
+        %B = lshr i16 %arg1, %arg       ; <i16> [#uses=1]
+        %tmp1 = sub i16 16, %arg        ; <i16> [#uses=1]
+        %C = shl i16 %arg1, %tmp1       ; <i16> [#uses=1]
+        %D = or i16 %B, %C              ; <i16> [#uses=1]
+        ret i16 %D
+}
+
+define i16 @rotli16(i16 %A) {
+        %B = shl i16 %A, 5              ; <i16> [#uses=1]
+        %C = lshr i16 %A, 11            ; <i16> [#uses=1]
+        %D = or i16 %B, %C              ; <i16> [#uses=1]
+        ret i16 %D
+}
+
+define i16 @rotri16(i16 %A) {
+        %B = lshr i16 %A, 5             ; <i16> [#uses=1]
+        %C = shl i16 %A, 11             ; <i16> [#uses=1]
+        %D = or i16 %B, %C              ; <i16> [#uses=1]
+        ret i16 %D
+}
+
+define i8 @rotl8(i8 %A, i8 %Amt) {
+        %B = shl i8 %A, %Amt            ; <i8> [#uses=1]
+        %Amt2 = sub i8 8, %Amt          ; <i8> [#uses=1]
+        %C = lshr i8 %A, %Amt2          ; <i8> [#uses=1]
+        %D = or i8 %B, %C               ; <i8> [#uses=1]
+        ret i8 %D
+}
+
+define i8 @rotr8(i8 %A, i8 %Amt) {
+        %B = lshr i8 %A, %Amt           ; <i8> [#uses=1]
+        %Amt2 = sub i8 8, %Amt          ; <i8> [#uses=1]
+        %C = shl i8 %A, %Amt2           ; <i8> [#uses=1]
+        %D = or i8 %B, %C               ; <i8> [#uses=1]
+        ret i8 %D
+}
+
+define i8 @rotli8(i8 %A) {
+        %B = shl i8 %A, 5               ; <i8> [#uses=1]
+        %C = lshr i8 %A, 3              ; <i8> [#uses=1]
+        %D = or i8 %B, %C               ; <i8> [#uses=1]
+        ret i8 %D
+}
+
+define i8 @rotri8(i8 %A) {
+        %B = lshr i8 %A, 5              ; <i8> [#uses=1]
+        %C = shl i8 %A, 3               ; <i8> [#uses=1]
+        %D = or i8 %B, %C               ; <i8> [#uses=1]
+        ret i8 %D
+}
+
+define <2 x float> @test1(<4 x float> %param )
+{
+; CHECK: test1
+; CHECK: rotqbyi
+  %el = extractelement <4 x float> %param, i32 1
+  %vec1 = insertelement <1 x float> undef, float %el, i32 0
+  %rv = shufflevector <1 x float> %vec1, <1 x float> undef, <2 x i32><i32 0,i32 0>
+; CHECK: bi $lr
+  ret <2 x float> %rv
+} 
diff --git a/final/test/CodeGen/CellSPU/select_bits.ll b/final/test/CodeGen/CellSPU/select_bits.ll
new file mode 100644
index 00000000000..c804256f513
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/select_bits.ll
@@ -0,0 +1,569 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep selb   %t1.s | count 56
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; v2i64
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define <2 x i64> @selectbits_v2i64_01(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+        %C = and <2 x i64> %rC, %rB
+        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+        %B = and <2 x i64> %A, %rA
+        %D = or <2 x i64> %C, %B
+        ret <2 x i64> %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define <2 x i64> @selectbits_v2i64_02(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+        %C = and <2 x i64> %rB, %rC
+        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+        %B = and <2 x i64> %A, %rA
+        %D = or <2 x i64> %C, %B
+        ret <2 x i64> %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define <2 x i64> @selectbits_v2i64_03(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+        %B = and <2 x i64> %A, %rA
+        %C = and <2 x i64> %rB, %rC
+        %D = or <2 x i64> %C, %B
+        ret <2 x i64> %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define <2 x i64> @selectbits_v2i64_04(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+        %B = and <2 x i64> %A, %rA
+        %C = and <2 x i64> %rC, %rB
+        %D = or <2 x i64> %C, %B
+        ret <2 x i64> %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define <2 x i64> @selectbits_v2i64_05(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+        %C = and <2 x i64> %rC, %rB
+        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+        %B = and <2 x i64> %rA, %A
+        %D = or <2 x i64> %C, %B
+        ret <2 x i64> %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define <2 x i64> @selectbits_v2i64_06(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+        %C = and <2 x i64> %rB, %rC
+        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+        %B = and <2 x i64> %rA, %A
+        %D = or <2 x i64> %C, %B
+        ret <2 x i64> %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define <2 x i64> @selectbits_v2i64_07(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+        %B = and <2 x i64> %rA, %A
+        %C = and <2 x i64> %rB, %rC
+        %D = or <2 x i64> %C, %B
+        ret <2 x i64> %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define <2 x i64> @selectbits_v2i64_08(<2 x i64> %rA, <2 x i64> %rB, <2 x i64> %rC) {
+        %A = xor <2 x i64> %rC, < i64 -1, i64 -1 >
+        %B = and <2 x i64> %rA, %A
+        %C = and <2 x i64> %rC, %rB
+        %D = or <2 x i64> %C, %B
+        ret <2 x i64> %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; v4i32
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define <4 x i32> @selectbits_v4i32_01(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+        %C = and <4 x i32> %rC, %rB
+        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = and <4 x i32> %A, %rA
+        %D = or <4 x i32> %C, %B
+        ret <4 x i32> %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define <4 x i32> @selectbits_v4i32_02(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+        %C = and <4 x i32> %rB, %rC
+        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = and <4 x i32> %A, %rA
+        %D = or <4 x i32> %C, %B
+        ret <4 x i32> %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define <4 x i32> @selectbits_v4i32_03(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1 >
+        %B = and <4 x i32> %A, %rA
+        %C = and <4 x i32> %rB, %rC
+        %D = or <4 x i32> %C, %B
+        ret <4 x i32> %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define <4 x i32> @selectbits_v4i32_04(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
+        %B = and <4 x i32> %A, %rA
+        %C = and <4 x i32> %rC, %rB
+        %D = or <4 x i32> %C, %B
+        ret <4 x i32> %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define <4 x i32> @selectbits_v4i32_05(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+        %C = and <4 x i32> %rC, %rB
+        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
+        %B = and <4 x i32> %rA, %A
+        %D = or <4 x i32> %C, %B
+        ret <4 x i32> %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define <4 x i32> @selectbits_v4i32_06(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+        %C = and <4 x i32> %rB, %rC
+        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
+        %B = and <4 x i32> %rA, %A
+        %D = or <4 x i32> %C, %B
+        ret <4 x i32> %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define <4 x i32> @selectbits_v4i32_07(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
+        %B = and <4 x i32> %rA, %A
+        %C = and <4 x i32> %rB, %rC
+        %D = or <4 x i32> %C, %B
+        ret <4 x i32> %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define <4 x i32> @selectbits_v4i32_08(<4 x i32> %rA, <4 x i32> %rB, <4 x i32> %rC) {
+        %A = xor <4 x i32> %rC, < i32 -1, i32 -1, i32 -1, i32 -1>
+        %B = and <4 x i32> %rA, %A
+        %C = and <4 x i32> %rC, %rB
+        %D = or <4 x i32> %C, %B
+        ret <4 x i32> %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; v8i16
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define <8 x i16> @selectbits_v8i16_01(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+        %C = and <8 x i16> %rC, %rB
+        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                  i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %A, %rA
+        %D = or <8 x i16> %C, %B
+        ret <8 x i16> %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define <8 x i16> @selectbits_v8i16_02(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+        %C = and <8 x i16> %rB, %rC
+        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                  i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %A, %rA
+        %D = or <8 x i16> %C, %B
+        ret <8 x i16> %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define <8 x i16> @selectbits_v8i16_03(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                  i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %A, %rA
+        %C = and <8 x i16> %rB, %rC
+        %D = or <8 x i16> %C, %B
+        ret <8 x i16> %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define <8 x i16> @selectbits_v8i16_04(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                  i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %A, %rA
+        %C = and <8 x i16> %rC, %rB
+        %D = or <8 x i16> %C, %B
+        ret <8 x i16> %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define <8 x i16> @selectbits_v8i16_05(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+        %C = and <8 x i16> %rC, %rB
+        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                  i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %rA, %A
+        %D = or <8 x i16> %C, %B
+        ret <8 x i16> %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define <8 x i16> @selectbits_v8i16_06(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+        %C = and <8 x i16> %rB, %rC
+        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                  i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %rA, %A
+        %D = or <8 x i16> %C, %B
+        ret <8 x i16> %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define <8 x i16> @selectbits_v8i16_07(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                  i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %rA, %A
+        %C = and <8 x i16> %rB, %rC
+        %D = or <8 x i16> %C, %B
+        ret <8 x i16> %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define <8 x i16> @selectbits_v8i16_08(<8 x i16> %rA, <8 x i16> %rB, <8 x i16> %rC) {
+        %A = xor <8 x i16> %rC, < i16 -1, i16 -1, i16 -1, i16 -1,
+                                  i16 -1, i16 -1, i16 -1, i16 -1 >
+        %B = and <8 x i16> %rA, %A
+        %C = and <8 x i16> %rC, %rB
+        %D = or <8 x i16> %C, %B
+        ret <8 x i16> %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; v16i8
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define <16 x i8> @selectbits_v16i8_01(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+        %C = and <16 x i8> %rC, %rB
+        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %A, %rA
+        %D = or <16 x i8> %C, %B
+        ret <16 x i8> %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define <16 x i8> @selectbits_v16i8_02(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+        %C = and <16 x i8> %rB, %rC
+        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %A, %rA
+        %D = or <16 x i8> %C, %B
+        ret <16 x i8> %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define <16 x i8> @selectbits_v16i8_03(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %A, %rA
+        %C = and <16 x i8> %rB, %rC
+        %D = or <16 x i8> %C, %B
+        ret <16 x i8> %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define <16 x i8> @selectbits_v16i8_04(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %A, %rA
+        %C = and <16 x i8> %rC, %rB
+        %D = or <16 x i8> %C, %B
+        ret <16 x i8> %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define <16 x i8> @selectbits_v16i8_05(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+        %C = and <16 x i8> %rC, %rB
+        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %rA, %A
+        %D = or <16 x i8> %C, %B
+        ret <16 x i8> %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define <16 x i8> @selectbits_v16i8_06(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+        %C = and <16 x i8> %rB, %rC
+        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %rA, %A
+        %D = or <16 x i8> %C, %B
+        ret <16 x i8> %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define <16 x i8> @selectbits_v16i8_07(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %rA, %A
+        %C = and <16 x i8> %rB, %rC
+        %D = or <16 x i8> %C, %B
+        ret <16 x i8> %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define <16 x i8> @selectbits_v16i8_08(<16 x i8> %rA, <16 x i8> %rB, <16 x i8> %rC) {
+        %A = xor <16 x i8> %rC, < i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1,
+                                  i8 -1, i8 -1, i8 -1, i8 -1 >
+        %B = and <16 x i8> %rA, %A
+        %C = and <16 x i8> %rC, %rB
+        %D = or <16 x i8> %C, %B
+        ret <16 x i8> %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; i32
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define i32 @selectbits_i32_01(i32 %rA, i32 %rB, i32 %rC) {
+        %C = and i32 %rC, %rB
+        %A = xor i32 %rC, -1
+        %B = and i32 %A, %rA
+        %D = or i32 %C, %B
+        ret i32 %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define i32 @selectbits_i32_02(i32 %rA, i32 %rB, i32 %rC) {
+        %C = and i32 %rB, %rC
+        %A = xor i32 %rC, -1
+        %B = and i32 %A, %rA
+        %D = or i32 %C, %B
+        ret i32 %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define i32 @selectbits_i32_03(i32 %rA, i32 %rB, i32 %rC) {
+        %A = xor i32 %rC, -1
+        %B = and i32 %A, %rA
+        %C = and i32 %rB, %rC
+        %D = or i32 %C, %B
+        ret i32 %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define i32 @selectbits_i32_04(i32 %rA, i32 %rB, i32 %rC) {
+        %A = xor i32 %rC, -1
+        %B = and i32 %A, %rA
+        %C = and i32 %rC, %rB
+        %D = or i32 %C, %B
+        ret i32 %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define i32 @selectbits_i32_05(i32 %rA, i32 %rB, i32 %rC) {
+        %C = and i32 %rC, %rB
+        %A = xor i32 %rC, -1
+        %B = and i32 %rA, %A
+        %D = or i32 %C, %B
+        ret i32 %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define i32 @selectbits_i32_06(i32 %rA, i32 %rB, i32 %rC) {
+        %C = and i32 %rB, %rC
+        %A = xor i32 %rC, -1
+        %B = and i32 %rA, %A
+        %D = or i32 %C, %B
+        ret i32 %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define i32 @selectbits_i32_07(i32 %rA, i32 %rB, i32 %rC) {
+        %A = xor i32 %rC, -1
+        %B = and i32 %rA, %A
+        %C = and i32 %rB, %rC
+        %D = or i32 %C, %B
+        ret i32 %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define i32 @selectbits_i32_08(i32 %rA, i32 %rB, i32 %rC) {
+        %A = xor i32 %rC, -1
+        %B = and i32 %rA, %A
+        %C = and i32 %rC, %rB
+        %D = or i32 %C, %B
+        ret i32 %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; i16
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define i16 @selectbits_i16_01(i16 %rA, i16 %rB, i16 %rC) {
+        %C = and i16 %rC, %rB
+        %A = xor i16 %rC, -1
+        %B = and i16 %A, %rA
+        %D = or i16 %C, %B
+        ret i16 %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define i16 @selectbits_i16_02(i16 %rA, i16 %rB, i16 %rC) {
+        %C = and i16 %rB, %rC
+        %A = xor i16 %rC, -1
+        %B = and i16 %A, %rA
+        %D = or i16 %C, %B
+        ret i16 %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define i16 @selectbits_i16_03(i16 %rA, i16 %rB, i16 %rC) {
+        %A = xor i16 %rC, -1
+        %B = and i16 %A, %rA
+        %C = and i16 %rB, %rC
+        %D = or i16 %C, %B
+        ret i16 %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define i16 @selectbits_i16_04(i16 %rA, i16 %rB, i16 %rC) {
+        %A = xor i16 %rC, -1
+        %B = and i16 %A, %rA
+        %C = and i16 %rC, %rB
+        %D = or i16 %C, %B
+        ret i16 %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define i16 @selectbits_i16_05(i16 %rA, i16 %rB, i16 %rC) {
+        %C = and i16 %rC, %rB
+        %A = xor i16 %rC, -1
+        %B = and i16 %rA, %A
+        %D = or i16 %C, %B
+        ret i16 %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define i16 @selectbits_i16_06(i16 %rA, i16 %rB, i16 %rC) {
+        %C = and i16 %rB, %rC
+        %A = xor i16 %rC, -1
+        %B = and i16 %rA, %A
+        %D = or i16 %C, %B
+        ret i16 %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define i16 @selectbits_i16_07(i16 %rA, i16 %rB, i16 %rC) {
+        %A = xor i16 %rC, -1
+        %B = and i16 %rA, %A
+        %C = and i16 %rB, %rC
+        %D = or i16 %C, %B
+        ret i16 %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define i16 @selectbits_i16_08(i16 %rA, i16 %rB, i16 %rC) {
+        %A = xor i16 %rC, -1
+        %B = and i16 %rA, %A
+        %C = and i16 %rC, %rB
+        %D = or i16 %C, %B
+        ret i16 %D
+}
+
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+; i8
+;-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
+
+; (or (and rC, rB), (and (not rC), rA))
+define i8 @selectbits_i8_01(i8 %rA, i8 %rB, i8 %rC) {
+        %C = and i8 %rC, %rB
+        %A = xor i8 %rC, -1
+        %B = and i8 %A, %rA
+        %D = or i8 %C, %B
+        ret i8 %D
+}
+
+; (or (and rB, rC), (and (not rC), rA))
+define i8 @selectbits_i8_02(i8 %rA, i8 %rB, i8 %rC) {
+        %C = and i8 %rB, %rC
+        %A = xor i8 %rC, -1
+        %B = and i8 %A, %rA
+        %D = or i8 %C, %B
+        ret i8 %D
+}
+
+; (or (and (not rC), rA), (and rB, rC))
+define i8 @selectbits_i8_03(i8 %rA, i8 %rB, i8 %rC) {
+        %A = xor i8 %rC, -1
+        %B = and i8 %A, %rA
+        %C = and i8 %rB, %rC
+        %D = or i8 %C, %B
+        ret i8 %D
+}
+
+; (or (and (not rC), rA), (and rC, rB))
+define i8 @selectbits_i8_04(i8 %rA, i8 %rB, i8 %rC) {
+        %A = xor i8 %rC, -1
+        %B = and i8 %A, %rA
+        %C = and i8 %rC, %rB
+        %D = or i8 %C, %B
+        ret i8 %D
+}
+
+; (or (and rC, rB), (and rA, (not rC)))
+define i8 @selectbits_i8_05(i8 %rA, i8 %rB, i8 %rC) {
+        %C = and i8 %rC, %rB
+        %A = xor i8 %rC, -1
+        %B = and i8 %rA, %A
+        %D = or i8 %C, %B
+        ret i8 %D
+}
+
+; (or (and rB, rC), (and rA, (not rC)))
+define i8 @selectbits_i8_06(i8 %rA, i8 %rB, i8 %rC) {
+        %C = and i8 %rB, %rC
+        %A = xor i8 %rC, -1
+        %B = and i8 %rA, %A
+        %D = or i8 %C, %B
+        ret i8 %D
+}
+
+; (or (and rA, (not rC)), (and rB, rC))
+define i8 @selectbits_i8_07(i8 %rA, i8 %rB, i8 %rC) {
+        %A = xor i8 %rC, -1
+        %B = and i8 %rA, %A
+        %C = and i8 %rB, %rC
+        %D = or i8 %C, %B
+        ret i8 %D
+}
+
+; (or (and rA, (not rC)), (and rC, rB))
+define i8 @selectbits_i8_08(i8 %rA, i8 %rB, i8 %rC) {
+        %A = xor i8 %rC, -1
+        %B = and i8 %rA, %A
+        %C = and i8 %rC, %rB
+        %D = or i8 %C, %B
+        ret i8 %D
+}
diff --git a/final/test/CodeGen/CellSPU/sext128.ll b/final/test/CodeGen/CellSPU/sext128.ll
new file mode 100644
index 00000000000..6ae9aa51202
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/sext128.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=cellspu | FileCheck %s 
+
+; ModuleID = 'sext128.bc'
+target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:128:128-v128:128:128-a0:0:128-s0:128:128"
+target triple = "spu"
+
+define i128 @sext_i64_i128(i64 %a) {
+entry:
+        %0 = sext i64 %a to i128
+        ret i128 %0
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK:	long	66051
+; CHECK: 	long	67438087
+; CHECK-NOT: rotqmbyi
+; CHECK:	lqa
+; CHECK: 	rotmai
+; CHECK:	shufb
+}
+
+define i128 @sext_i32_i128(i32 %a) {
+entry:
+        %0 = sext i32 %a to i128
+        ret i128 %0
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK:	long	66051
+; CHECK-NOT: rotqmbyi
+; CHECK:	lqa
+; CHECK: 	rotmai
+; CHECK:	shufb
+}
+
+define i128 @sext_i32_i128a(float %a) {
+entry:
+  %0 = call i32 @myfunc(float %a)
+  %1 = sext i32 %0 to i128
+  ret i128 %1
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK: 	long	269488144
+; CHECK:	long	66051
+; CHECK-NOT: rotqmbyi
+; CHECK:	lqa
+; CHECK: 	rotmai
+; CHECK:	shufb
+}
+
+declare i32 @myfunc(float)
+
+define i128 @func1(i8 %u) {
+entry:
+; CHECK: xsbh
+; CHECK: xshw
+; CHECK: rotmai
+; CHECK: shufb
+; CHECK: bi $lr
+      %0 = sext i8 %u to i128
+      ret i128 %0
+}
+
+define i128 @func2(i16 %u) {
+entry:
+; CHECK: xshw
+; CHECK: rotmai
+; CHECK: shufb
+; CHECK: bi $lr
+      %0 = sext i16 %u to i128
+      ret i128 %0
+}
diff --git a/final/test/CodeGen/CellSPU/shift_ops.ll b/final/test/CodeGen/CellSPU/shift_ops.ll
new file mode 100644
index 00000000000..c4a5abd2904
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/shift_ops.ll
@@ -0,0 +1,344 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep {shlh	}  %t1.s | count 10
+; RUN: grep {shlhi	}  %t1.s | count 3
+; RUN: grep {shl	}  %t1.s | count 11
+; RUN: grep {shli	}  %t1.s | count 3
+; RUN: grep {xshw	}  %t1.s | count 5
+; RUN: grep {and	}  %t1.s | count 14
+; RUN: grep {andi	}  %t1.s | count 2
+; RUN: grep {rotmi	}  %t1.s | count 2
+; RUN: grep {rotqmbyi	}  %t1.s | count 1
+; RUN: grep {rotqmbii	}  %t1.s | count 2
+; RUN: grep {rotqmby	}  %t1.s | count 1
+; RUN: grep {rotqmbi	}  %t1.s | count 2
+; RUN: grep {rotqbyi	}  %t1.s | count 1
+; RUN: grep {rotqbii	}  %t1.s | count 2
+; RUN: grep {rotqbybi	}  %t1.s | count 1
+; RUN: grep {sfi	}  %t1.s | count 6
+; RUN: cat %t1.s | FileCheck %s
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; Shift left i16 via register, note that the second operand to shl is promoted
+; to a 32-bit type:
+
+define i16 @shlh_i16_1(i16 %arg1, i16 %arg2) {
+        %A = shl i16 %arg1, %arg2
+        ret i16 %A
+}
+
+define i16 @shlh_i16_2(i16 %arg1, i16 %arg2) {
+        %A = shl i16 %arg2, %arg1
+        ret i16 %A
+}
+
+define i16 @shlh_i16_3(i16 signext %arg1, i16 signext %arg2) signext {
+        %A = shl i16 %arg1, %arg2
+        ret i16 %A
+}
+
+define i16 @shlh_i16_4(i16 signext %arg1, i16 signext %arg2) signext {
+        %A = shl i16 %arg2, %arg1
+        ret i16 %A
+}
+
+define i16 @shlh_i16_5(i16 zeroext %arg1, i16 zeroext %arg2) zeroext {
+        %A = shl i16 %arg1, %arg2
+        ret i16 %A
+}
+
+define i16 @shlh_i16_6(i16 zeroext %arg1, i16 zeroext %arg2) zeroext {
+        %A = shl i16 %arg2, %arg1
+        ret i16 %A
+}
+
+; Shift left i16 with immediate:
+define i16 @shlhi_i16_1(i16 %arg1) {
+        %A = shl i16 %arg1, 12
+        ret i16 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define i16 @shlhi_i16_2(i16 %arg1) {
+        %A = shl i16 %arg1, 0
+        ret i16 %A
+}
+
+define i16 @shlhi_i16_3(i16 %arg1) {
+        %A = shl i16 16383, %arg1
+        ret i16 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define i16 @shlhi_i16_4(i16 %arg1) {
+        %A = shl i16 0, %arg1
+        ret i16 %A
+}
+
+define i16 @shlhi_i16_5(i16 signext %arg1) signext {
+        %A = shl i16 %arg1, 12
+        ret i16 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define i16 @shlhi_i16_6(i16 signext %arg1) signext {
+        %A = shl i16 %arg1, 0
+        ret i16 %A
+}
+
+define i16 @shlhi_i16_7(i16 signext %arg1) signext {
+        %A = shl i16 16383, %arg1
+        ret i16 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define i16 @shlhi_i16_8(i16 signext %arg1) signext {
+        %A = shl i16 0, %arg1
+        ret i16 %A
+}
+
+define i16 @shlhi_i16_9(i16 zeroext %arg1) zeroext {
+        %A = shl i16 %arg1, 12
+        ret i16 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define i16 @shlhi_i16_10(i16 zeroext %arg1) zeroext {
+        %A = shl i16 %arg1, 0
+        ret i16 %A
+}
+
+define i16 @shlhi_i16_11(i16 zeroext %arg1) zeroext {
+        %A = shl i16 16383, %arg1
+        ret i16 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define i16 @shlhi_i16_12(i16 zeroext %arg1) zeroext {
+        %A = shl i16 0, %arg1
+        ret i16 %A
+}
+
+; Shift left i32 via register, note that the second operand to shl is promoted
+; to a 32-bit type:
+
+define i32 @shl_i32_1(i32 %arg1, i32 %arg2) {
+        %A = shl i32 %arg1, %arg2
+        ret i32 %A
+}
+
+define i32 @shl_i32_2(i32 %arg1, i32 %arg2) {
+        %A = shl i32 %arg2, %arg1
+        ret i32 %A
+}
+
+define i32 @shl_i32_3(i32 signext %arg1, i32 signext %arg2) signext {
+        %A = shl i32 %arg1, %arg2
+        ret i32 %A
+}
+
+define i32 @shl_i32_4(i32 signext %arg1, i32 signext %arg2) signext {
+        %A = shl i32 %arg2, %arg1
+        ret i32 %A
+}
+
+define i32 @shl_i32_5(i32 zeroext %arg1, i32 zeroext %arg2) zeroext {
+        %A = shl i32 %arg1, %arg2
+        ret i32 %A
+}
+
+define i32 @shl_i32_6(i32 zeroext %arg1, i32 zeroext %arg2) zeroext {
+        %A = shl i32 %arg2, %arg1
+        ret i32 %A
+}
+
+; Shift left i32 with immediate:
+define i32 @shli_i32_1(i32 %arg1) {
+        %A = shl i32 %arg1, 12
+        ret i32 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define i32 @shli_i32_2(i32 %arg1) {
+        %A = shl i32 %arg1, 0
+        ret i32 %A
+}
+
+define i32 @shli_i32_3(i32 %arg1) {
+        %A = shl i32 16383, %arg1
+        ret i32 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define i32 @shli_i32_4(i32 %arg1) {
+        %A = shl i32 0, %arg1
+        ret i32 %A
+}
+
+define i32 @shli_i32_5(i32 signext %arg1) signext {
+        %A = shl i32 %arg1, 12
+        ret i32 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define i32 @shli_i32_6(i32 signext %arg1) signext {
+        %A = shl i32 %arg1, 0
+        ret i32 %A
+}
+
+define i32 @shli_i32_7(i32 signext %arg1) signext {
+        %A = shl i32 16383, %arg1
+        ret i32 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define i32 @shli_i32_8(i32 signext %arg1) signext {
+        %A = shl i32 0, %arg1
+        ret i32 %A
+}
+
+define i32 @shli_i32_9(i32 zeroext %arg1) zeroext {
+        %A = shl i32 %arg1, 12
+        ret i32 %A
+}
+
+; Should not generate anything other than the return, arg1 << 0 = arg1
+define i32 @shli_i32_10(i32 zeroext %arg1) zeroext {
+        %A = shl i32 %arg1, 0
+        ret i32 %A
+}
+
+define i32 @shli_i32_11(i32 zeroext %arg1) zeroext {
+        %A = shl i32 16383, %arg1
+        ret i32 %A
+}
+
+; Should generate 0, 0 << arg1 = 0
+define i32 @shli_i32_12(i32 zeroext %arg1) zeroext {
+        %A = shl i32 0, %arg1
+        ret i32 %A
+}
+
+;; i64 shift left
+
+define i64 @shl_i64_1(i64 %arg1) {
+	%A = shl i64 %arg1, 9
+	ret i64 %A
+}
+
+define i64 @shl_i64_2(i64 %arg1) {
+	%A = shl i64 %arg1, 3
+	ret i64 %A
+}
+
+define i64 @shl_i64_3(i64 %arg1, i32 %shift) {
+	%1 = zext i32 %shift to i64
+	%2 = shl i64 %arg1, %1
+	ret i64 %2
+}
+
+;; i64 shift right logical (shift 0s from the right)
+
+define i64 @lshr_i64_1(i64 %arg1) {
+	%1 = lshr i64 %arg1, 9
+	ret i64 %1
+}
+
+define i64 @lshr_i64_2(i64 %arg1) {
+	%1 = lshr i64 %arg1, 3
+	ret i64 %1
+}
+
+define i64 @lshr_i64_3(i64 %arg1, i32 %shift) {
+	%1 = zext i32 %shift to i64
+	%2 = lshr i64 %arg1, %1
+	ret i64 %2
+}
+
+;; i64 shift right arithmetic (shift 1s from the right)
+
+define i64 @ashr_i64_1(i64 %arg) {
+	%1 = ashr i64 %arg, 9
+	ret i64 %1
+}
+
+define i64 @ashr_i64_2(i64 %arg) {
+	%1 = ashr i64 %arg, 3
+	ret i64 %1
+}
+
+define i64 @ashr_i64_3(i64 %arg1, i32 %shift) {
+	%1 = zext i32 %shift to i64
+	%2 = ashr i64 %arg1, %1
+	ret i64 %2
+}
+
+define i32 @hi32_i64(i64 %arg) {
+	%1 = lshr i64 %arg, 32
+	%2 = trunc i64 %1 to i32
+	ret i32 %2
+}
+
+; some random tests
+define i128 @test_lshr_i128( i128 %val ) {
+ 	;CHECK: test_lshr_i128
+	;CHECK: sfi
+	;CHECK: rotqmbi
+	;CHECK: rotqmbybi
+	;CHECK: bi $lr
+	%rv = lshr i128 %val, 64
+	ret i128 %rv
+}
+
+;Vector shifts
+define <2 x i32> @shl_v2i32(<2 x i32> %val, <2 x i32> %sh) {
+;CHECK: shl
+;CHECK: bi $lr
+	%rv = shl <2 x i32> %val, %sh
+	ret <2 x i32> %rv
+}
+
+define <4 x i32> @shl_v4i32(<4 x i32> %val, <4 x i32> %sh) {
+;CHECK: shl
+;CHECK: bi $lr
+	%rv = shl <4 x i32> %val, %sh
+	ret <4 x i32> %rv
+}
+
+define <8 x i16> @shl_v8i16(<8 x i16> %val, <8 x i16> %sh) {
+;CHECK: shlh
+;CHECK: bi $lr
+	%rv = shl <8 x i16> %val, %sh
+	ret <8 x i16> %rv
+}
+
+define <4 x i32> @lshr_v4i32(<4 x i32> %val, <4 x i32> %sh) {
+;CHECK: rotm
+;CHECK: bi $lr
+	%rv = lshr <4 x i32> %val, %sh
+	ret <4 x i32> %rv
+}
+
+define <8 x i16> @lshr_v8i16(<8 x i16> %val, <8 x i16> %sh) {
+;CHECK: sfhi
+;CHECK: rothm
+;CHECK: bi $lr
+	%rv = lshr <8 x i16> %val, %sh
+	ret <8 x i16> %rv
+}
+
+define <4 x i32> @ashr_v4i32(<4 x i32> %val, <4 x i32> %sh) {
+;CHECK: rotma
+;CHECK: bi $lr
+	%rv = ashr <4 x i32> %val, %sh
+	ret <4 x i32> %rv
+}
+
+define <8 x i16> @ashr_v8i16(<8 x i16> %val, <8 x i16> %sh) {
+;CHECK: sfhi
+;CHECK: rotmah
+;CHECK: bi $lr
+	%rv = ashr <8 x i16> %val, %sh
+	ret <8 x i16> %rv
+}
diff --git a/final/test/CodeGen/CellSPU/shuffles.ll b/final/test/CodeGen/CellSPU/shuffles.ll
new file mode 100644
index 00000000000..c88a258c26c
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/shuffles.ll
@@ -0,0 +1,67 @@
+; RUN: llc -O1  --march=cellspu < %s | FileCheck %s
+
+define <4 x float> @shuffle(<4 x float> %param1, <4 x float> %param2) {
+  ; CHECK: cwd {{\$.}}, 0($sp)
+  ; CHECK: shufb {{\$., \$4, \$3, \$.}}
+  %val= shufflevector <4 x float> %param1, <4 x float> %param2, <4 x i32> <i32 4,i32 1,i32 2,i32 3>
+  ret <4 x float> %val
+}
+ 
+define <4 x float> @splat(float %param1) {
+  ; CHECK: lqa
+  ; CHECK: shufb $3
+  ; CHECK: bi
+  %vec = insertelement <1 x float> undef, float %param1, i32 0
+  %val= shufflevector <1 x float> %vec, <1 x float> undef, <4 x i32> <i32 0,i32 0,i32 0,i32 0>
+  ret <4 x float> %val  
+}
+
+define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
+  %sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0
+;CHECK:	lqa	$6,
+;CHECK:	shufb	$4, $4, $5, $6
+  %sl2_17 = insertelement <2 x float> %sl2_17_tmp1, float %val2, i32 1
+
+;CHECK: cdd	$5, 0($3)
+;CHECK: lqd	$6, 0($3)
+;CHECK: shufb	$4, $4, $6, $5
+;CHECK: stqd	$4, 0($3)
+;CHECK:	bi	$lr
+  store <2 x float> %sl2_17, <2 x float>* %ptr
+  ret void 
+}
+
+define <4 x float>  @test_insert_1(<4 x float> %vparam, float %eltparam) {
+;CHECK: cwd     $5, 4($sp)
+;CHECK: shufb   $3, $4, $3, $5
+;CHECK: bi      $lr
+  %rv = insertelement <4 x float> %vparam, float %eltparam, i32 1
+  ret <4 x float> %rv
+}
+
+define <2 x i32> @test_v2i32(<4 x i32>%vec)
+{
+;CHECK: rotqbyi $3, $3, 4
+;CHECK: bi $lr
+  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, <2 x i32><i32 1,i32 2>
+  ret <2 x i32> %rv
+}
+
+define <4 x i32> @test_v4i32_rot8(<4 x i32>%vec)
+{
+;CHECK: rotqbyi $3, $3, 8
+;CHECK: bi $lr
+  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
+        <4 x i32> <i32 2,i32 3,i32 0, i32 1>
+  ret <4 x i32> %rv
+}
+
+define <4 x i32> @test_v4i32_rot4(<4 x i32>%vec)
+{
+;CHECK: rotqbyi $3, $3, 4
+;CHECK: bi $lr
+  %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
+        <4 x i32> <i32 1,i32 2,i32 3, i32 0>
+  ret <4 x i32> %rv
+}
+
diff --git a/final/test/CodeGen/CellSPU/sp_farith.ll b/final/test/CodeGen/CellSPU/sp_farith.ll
new file mode 100644
index 00000000000..80bf47ccf5d
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/sp_farith.ll
@@ -0,0 +1,90 @@
+; RUN: llc < %s -march=cellspu -enable-unsafe-fp-math > %t1.s
+; RUN: grep fa %t1.s | count 2
+; RUN: grep fs %t1.s | count 2
+; RUN: grep fm %t1.s | count 6
+; RUN: grep fma %t1.s | count 2
+; RUN: grep fms %t1.s | count 2
+; RUN: grep fnms %t1.s | count 3
+;
+; This file includes standard floating point arithmetic instructions
+; NOTE fdiv is tested separately since it is a compound operation
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define float @fp_add(float %arg1, float %arg2) {
+        %A = fadd float %arg1, %arg2     ; <float> [#uses=1]
+        ret float %A
+}
+
+define <4 x float> @fp_add_vec(<4 x float> %arg1, <4 x float> %arg2) {
+        %A = fadd <4 x float> %arg1, %arg2       ; <<4 x float>> [#uses=1]
+        ret <4 x float> %A
+}
+
+define float @fp_sub(float %arg1, float %arg2) {
+        %A = fsub float %arg1,  %arg2    ; <float> [#uses=1]
+        ret float %A
+}
+
+define <4 x float> @fp_sub_vec(<4 x float> %arg1, <4 x float> %arg2) {
+        %A = fsub <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
+        ret <4 x float> %A
+}
+
+define float @fp_mul(float %arg1, float %arg2) {
+        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
+        ret float %A
+}
+
+define <4 x float> @fp_mul_vec(<4 x float> %arg1, <4 x float> %arg2) {
+        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
+        ret <4 x float> %A
+}
+
+define float @fp_mul_add(float %arg1, float %arg2, float %arg3) {
+        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
+        %B = fadd float %A, %arg3        ; <float> [#uses=1]
+        ret float %B
+}
+
+define <4 x float> @fp_mul_add_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
+        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
+        %B = fadd <4 x float> %A, %arg3  ; <<4 x float>> [#uses=1]
+        ret <4 x float> %B
+}
+
+define float @fp_mul_sub(float %arg1, float %arg2, float %arg3) {
+        %A = fmul float %arg1,  %arg2    ; <float> [#uses=1]
+        %B = fsub float %A, %arg3        ; <float> [#uses=1]
+        ret float %B
+}
+
+define <4 x float> @fp_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
+        %A = fmul <4 x float> %arg1,  %arg2      ; <<4 x float>> [#uses=1]
+        %B = fsub <4 x float> %A, %arg3  ; <<4 x float>> [#uses=1]
+        ret <4 x float> %B
+}
+
+; Test the straightforward way of getting fnms
+; c - a * b
+define float @fp_neg_mul_sub_1(float %arg1, float %arg2, float %arg3) {
+        %A = fmul float %arg1,  %arg2
+        %B = fsub float %arg3, %A
+        ret float %B
+}
+
+; Test another way of getting fnms
+; - ( a *b -c ) = c - a * b
+define float @fp_neg_mul_sub_2(float %arg1, float %arg2, float %arg3) {
+        %A = fmul float %arg1,  %arg2
+        %B = fsub float %A, %arg3
+        %C = fsub float -0.0, %B
+        ret float %C
+}
+
+define <4 x float> @fp_neg_mul_sub_vec(<4 x float> %arg1, <4 x float> %arg2, <4 x float> %arg3) {
+        %A = fmul <4 x float> %arg1,  %arg2
+        %B = fsub <4 x float> %A, %arg3
+        %D = fsub <4 x float> < float -0.0, float -0.0, float -0.0, float -0.0 >, %B
+        ret <4 x float> %D
+}
diff --git a/final/test/CodeGen/CellSPU/stores.ll b/final/test/CodeGen/CellSPU/stores.ll
new file mode 100644
index 00000000000..6ca5b089230
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/stores.ll
@@ -0,0 +1,181 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep {stqd.*0(\$3)}      %t1.s | count 4
+; RUN: grep {stqd.*16(\$3)}     %t1.s | count 4
+; RUN: grep 16256               %t1.s | count 2
+; RUN: grep 16384               %t1.s | count 1
+; RUN: grep 771                 %t1.s | count 4
+; RUN: grep 515                 %t1.s | count 2
+; RUN: grep 1799                %t1.s | count 2
+; RUN: grep 1543                %t1.s | count 5
+; RUN: grep 1029                %t1.s | count 3
+; RUN: grep {shli.*, 4}         %t1.s | count 4
+; RUN: grep stqx                %t1.s | count 4
+; RUN: grep ilhu                %t1.s | count 11
+; RUN: grep iohl                %t1.s | count 8
+; RUN: grep shufb               %t1.s | count 15
+; RUN: grep frds                %t1.s | count 1
+; RUN: llc < %s -march=cellspu | FileCheck %s
+
+; ModuleID = 'stores.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+define void @store_v16i8_1(<16 x i8>* %a) nounwind {
+entry:
+	store <16 x i8> < i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1, i8 1, i8 2, i8 1, i8 1 >, <16 x i8>* %a
+	ret void
+}
+
+define void @store_v16i8_2(<16 x i8>* %a) nounwind {
+entry:
+	%arrayidx = getelementptr <16 x i8>* %a, i32 1
+	store <16 x i8> < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 >, <16 x i8>* %arrayidx
+	ret void
+}
+
+define void @store_v16i8_3(<16 x i8>* %a, i32 %i) nounwind {
+entry:
+        %arrayidx = getelementptr <16 x i8>* %a, i32 %i
+	store <16 x i8> < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >, <16 x i8>* %arrayidx
+        ret void
+}
+
+define void @store_v8i16_1(<8 x i16>* %a) nounwind {
+entry:
+	store <8 x i16> < i16 1, i16 2, i16 1, i16 1, i16 1, i16 2, i16 1, i16 1 >, <8 x i16>* %a
+	ret void
+}
+
+define void @store_v8i16_2(<8 x i16>* %a) nounwind {
+entry:
+	%arrayidx = getelementptr <8 x i16>* %a, i16 1
+	store <8 x i16> < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2 >, <8 x i16>* %arrayidx
+	ret void
+}
+
+define void @store_v8i16_3(<8 x i16>* %a, i32 %i) nounwind {
+entry:
+        %arrayidx = getelementptr <8 x i16>* %a, i32 %i
+	store <8 x i16> < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >, <8 x i16>* %arrayidx
+        ret void
+}
+
+define void @store_v4i32_1(<4 x i32>* %a) nounwind {
+entry:
+	store <4 x i32> < i32 1, i32 2, i32 1, i32 1 >, <4 x i32>* %a
+	ret void
+}
+
+define void @store_v4i32_2(<4 x i32>* %a) nounwind {
+entry:
+	%arrayidx = getelementptr <4 x i32>* %a, i32 1
+	store <4 x i32> < i32 2, i32 2, i32 2, i32 2 >, <4 x i32>* %arrayidx
+	ret void
+}
+
+define void @store_v4i32_3(<4 x i32>* %a, i32 %i) nounwind {
+entry:
+        %arrayidx = getelementptr <4 x i32>* %a, i32 %i
+        store <4 x i32> < i32 1, i32 1, i32 1, i32 1 >, <4 x i32>* %arrayidx
+        ret void
+}
+
+define void @store_v4f32_1(<4 x float>* %a) nounwind {
+entry:
+	store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %a
+	ret void
+}
+
+define void @store_v4f32_2(<4 x float>* %a) nounwind {
+entry:
+	%arrayidx = getelementptr <4 x float>* %a, i32 1
+	store <4 x float> < float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00 >, <4 x float>* %arrayidx
+	ret void
+}
+
+define void @store_v4f32_3(<4 x float>* %a, i32 %i) nounwind {
+entry:
+        %arrayidx = getelementptr <4 x float>* %a, i32 %i
+        store <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x float>* %arrayidx
+        ret void
+}
+
+; Test truncating stores:
+
+define zeroext i8 @tstore_i16_i8(i16 signext %val, i8* %dest) nounwind {
+entry:
+	%conv = trunc i16 %val to i8
+	store i8 %conv, i8* %dest
+	ret i8 %conv
+}
+
+define zeroext i8 @tstore_i32_i8(i32 %val, i8* %dest) nounwind {
+entry:
+	%conv = trunc i32 %val to i8
+	store i8 %conv, i8* %dest
+	ret i8 %conv
+}
+
+define signext i16 @tstore_i32_i16(i32 %val, i16* %dest) nounwind {
+entry:
+	%conv = trunc i32 %val to i16
+	store i16 %conv, i16* %dest
+	ret i16 %conv
+}
+
+define zeroext i8 @tstore_i64_i8(i64 %val, i8* %dest) nounwind {
+entry:
+	%conv = trunc i64 %val to i8
+	store i8 %conv, i8* %dest
+	ret i8 %conv
+}
+
+define signext i16 @tstore_i64_i16(i64 %val, i16* %dest) nounwind {
+entry:
+	%conv = trunc i64 %val to i16
+	store i16 %conv, i16* %dest
+	ret i16 %conv
+}
+
+define i32 @tstore_i64_i32(i64 %val, i32* %dest) nounwind {
+entry:
+	%conv = trunc i64 %val to i32
+	store i32 %conv, i32* %dest
+	ret i32 %conv
+}
+
+define float @tstore_f64_f32(double %val, float* %dest) nounwind {
+entry:
+	%conv = fptrunc double %val to float
+	store float %conv, float* %dest
+	ret float %conv
+}
+
+;Check stores that might span two 16 byte memory blocks
+define void @store_misaligned( i32 %val, i32* %ptr) {	
+;CHECK: store_misaligned
+;CHECK: lqd
+;CHECK: lqd
+;CHECK: stqd
+;CHECK: stqd
+;CHECK: bi $lr
+	store i32 %val, i32*%ptr, align 2
+	ret void
+}
+
+define void @store_v8( <8 x float> %val, <8 x float>* %ptr )
+{
+;CHECK: stq
+;CHECK: stq
+;CHECK: bi $lr
+	store <8 x float> %val, <8 x float>* %ptr
+	ret void
+}
+
+define void @store_null_vec( <4 x i32> %val ) {
+; FIXME - this is for some reason compiled into a il+stqd, not a sta. 
+;CHECK: stqd
+;CHECK: bi $lr
+	store <4 x i32> %val, <4 x i32>* null
+	ret void
+}
diff --git a/final/test/CodeGen/CellSPU/storestruct.ll b/final/test/CodeGen/CellSPU/storestruct.ll
new file mode 100644
index 00000000000..47185e82966
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/storestruct.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=cellspu | FileCheck %s
+
+%0 = type {i32, i32} 
+@buffer = global [ 72 x %0 ] zeroinitializer
+
+define void@test( ) {
+; Check that there is no illegal "a rt, ra, imm" instruction 
+; CHECK-NOT:	a	 {{\$., \$., 5..}}
+; CHECK:	a	{{\$., \$., \$.}}
+	store %0 {i32 1, i32 2} , 
+                %0* getelementptr ([72 x %0]* @buffer, i32 0, i32 71)
+	ret void
+}
diff --git a/final/test/CodeGen/CellSPU/struct_1.ll b/final/test/CodeGen/CellSPU/struct_1.ll
new file mode 100644
index 00000000000..8ee7d932251
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/struct_1.ll
@@ -0,0 +1,144 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
+; RUN: grep lqa     %t1.s | count 5
+; RUN: grep lqd     %t1.s | count 11
+; RUN: grep rotqbyi %t1.s | count 7
+; RUN: grep xshw    %t1.s | count 1
+; RUN: grep andi    %t1.s | count 5
+; RUN: grep cbd     %t1.s | count 3
+; RUN: grep chd     %t1.s | count 1
+; RUN: grep cwd     %t1.s | count 3
+; RUN: grep shufb   %t1.s | count 7
+; RUN: grep stqd    %t1.s | count 7
+; RUN: grep iohl    %t2.s | count 16
+; RUN: grep ilhu    %t2.s | count 16
+; RUN: grep lqd     %t2.s | count 16
+; RUN: grep rotqbyi %t2.s | count 7
+; RUN: grep xshw    %t2.s | count 1
+; RUN: grep andi    %t2.s | count 5
+; RUN: grep cbd     %t2.s | count 3
+; RUN: grep chd     %t2.s | count 1
+; RUN: grep cwd     %t2.s | count 3
+; RUN: grep shufb   %t2.s | count 7
+; RUN: grep stqd    %t2.s | count 7
+
+; ModuleID = 'struct_1.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
+target triple = "spu"
+
+; struct hackstate {
+;   unsigned char c1;   // offset 0 (rotate left by 13 bytes to byte 3)
+;   unsigned char c2;   // offset 1 (rotate left by 14 bytes to byte 3)
+;   unsigned char c3;   // offset 2 (rotate left by 15 bytes to byte 3)
+;   int           i1;   // offset 4 (rotate left by 4 bytes to byte 0)
+;   short         s1;   // offset 8 (rotate left by 6 bytes to byte 2)
+;   int           i2;   // offset 12 [ignored]
+;   unsigned char c4;   // offset 16 [ignored]
+;   unsigned char c5;   // offset 17 [ignored]
+;   unsigned char c6;   // offset 18 (rotate left by 14 bytes to byte 3)
+;   unsigned char c7;   // offset 19 (no rotate, in preferred slot)
+;   int           i3;   // offset 20 [ignored]
+;   int           i4;   // offset 24 [ignored]
+;   int           i5;   // offset 28 [ignored]
+;   int           i6;   // offset 32 (no rotate, in preferred slot)
+; }
+%struct.hackstate = type { i8, i8, i8, i32, i16, i32, i8, i8, i8, i8, i32, i32, i32, i32 }
+
+; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+@state = global %struct.hackstate zeroinitializer, align 16
+
+define i8 @get_hackstate_c1() zeroext nounwind  {
+entry:
+        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
+        ret i8 %tmp2
+}
+
+define i8 @get_hackstate_c2() zeroext nounwind  {
+entry:
+        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
+        ret i8 %tmp2
+}
+
+define i8 @get_hackstate_c3() zeroext nounwind  {
+entry:
+        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
+        ret i8 %tmp2
+}
+
+define i32 @get_hackstate_i1() nounwind  {
+entry:
+        %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
+        ret i32 %tmp2
+}
+
+define i16 @get_hackstate_s1() signext nounwind  {
+entry:
+        %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
+        ret i16 %tmp2
+}
+
+define i8 @get_hackstate_c6() zeroext nounwind  {
+entry:
+        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 8), align 16
+        ret i8 %tmp2
+}
+
+define i8 @get_hackstate_c7() zeroext nounwind  {
+entry:
+        %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16
+        ret i8 %tmp2
+}
+
+define i32 @get_hackstate_i3() nounwind  {
+entry:
+        %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16
+        ret i32 %tmp2
+}
+
+define i32 @get_hackstate_i6() nounwind  {
+entry:
+        %tmp2 = load i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
+        ret i32 %tmp2
+}
+
+define void @set_hackstate_c1(i8 zeroext  %c) nounwind  {
+entry:
+        store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
+        ret void
+}
+
+define void @set_hackstate_c2(i8 zeroext  %c) nounwind  {
+entry:
+        store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
+        ret void
+}
+
+define void @set_hackstate_c3(i8 zeroext  %c) nounwind  {
+entry:
+        store i8 %c, i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
+        ret void
+}
+
+define void @set_hackstate_i1(i32 %i) nounwind  {
+entry:
+        store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 3), align 16
+        ret void
+}
+
+define void @set_hackstate_s1(i16 signext  %s) nounwind  {
+entry:
+        store i16 %s, i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
+        ret void
+}
+
+define void @set_hackstate_i3(i32 %i) nounwind  {
+entry:
+        store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 10), align 16
+        ret void
+}
+
+define void @set_hackstate_i6(i32 %i) nounwind  {
+entry:
+        store i32 %i, i32* getelementptr (%struct.hackstate* @state, i32 0, i32 13), align 16
+        ret void
+}
diff --git a/final/test/CodeGen/CellSPU/sub_ops.ll b/final/test/CodeGen/CellSPU/sub_ops.ll
new file mode 100644
index 00000000000..f0c40d37ce9
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/sub_ops.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=cellspu | FileCheck %s
+
+define i32 @subword( i32 %param1, i32 %param2) {
+; Check ordering of registers ret=param1-param2 -> rt=rb-ra
+; CHECK-NOT:	sf	$3, $3, $4
+; CHECK:	sf	$3, $4, $3
+	%1 = sub i32 %param1, %param2
+	ret i32 %1
+}
+
+define i16 @subhword( i16 %param1, i16 %param2) {
+; Check ordering of registers ret=param1-param2 -> rt=rb-ra
+; CHECK-NOT:	sfh	$3, $3, $4
+; CHECK:	sfh	$3, $4, $3
+	%1 = sub i16 %param1, %param2
+	ret i16 %1
+}
+
+define float @subfloat( float %param1, float %param2) {
+; Check ordering of registers ret=param1-param2 -> rt=ra-rb 
+; (yes this is reverse of i32 instruction)
+; CHECK-NOT:	fs	$3, $4, $3 
+; CHECK:	fs	$3, $3, $4
+	%1 = fsub float %param1, %param2
+	ret float %1
+}
diff --git a/final/test/CodeGen/CellSPU/trunc.ll b/final/test/CodeGen/CellSPU/trunc.ll
new file mode 100644
index 00000000000..d16185238af
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/trunc.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep shufb   %t1.s | count 19
+; RUN: grep {ilhu.*1799}  %t1.s | count 1
+; RUN: grep {ilhu.*771}  %t1.s | count 2
+; RUN: grep {ilhu.*1543}  %t1.s | count 1
+; RUN: grep {ilhu.*1029}  %t1.s | count 1
+; RUN: grep {ilhu.*515}  %t1.s | count 1
+; RUN: grep {ilhu.*3855}  %t1.s | count 1
+; RUN: grep {ilhu.*3599}  %t1.s | count 1
+; RUN: grep {ilhu.*3085}  %t1.s | count 1
+; RUN: grep {iohl.*3855}  %t1.s | count 1
+; RUN: grep {iohl.*3599}  %t1.s | count 2
+; RUN: grep {iohl.*1543}  %t1.s | count 2
+; RUN: grep {iohl.*771}  %t1.s | count 2
+; RUN: grep {iohl.*515}  %t1.s | count 1
+; RUN: grep {iohl.*1799}  %t1.s | count 1
+; RUN: grep lqa  %t1.s | count 1
+; RUN: grep cbd  %t1.s | count 4
+; RUN: grep chd  %t1.s | count 3
+; RUN: grep cwd  %t1.s | count 1
+; RUN: grep cdd  %t1.s | count 1
+
+; ModuleID = 'trunc.bc'
+target datalayout = "E-p:32:32:128-i1:8:128-i8:8:128-i16:16:128-i32:32:128-i64:32:128-f32:32:128-f64:64:128-v64:64:64-v128:128:128-a0:0:128-s0:128:128"
+target triple = "spu"
+
+define <16 x i8> @trunc_i128_i8(i128 %u, <16 x i8> %v) {
+entry:
+	%0 = trunc i128 %u to i8
+    %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 15 
+    ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @trunc_i128_i16(i128 %u, <8 x i16> %v) {
+entry:
+    %0 = trunc i128 %u to i16
+    %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 8 
+    ret <8 x i16> %tmp1
+}
+
+define <4 x i32> @trunc_i128_i32(i128 %u, <4 x i32> %v) {
+entry:
+    %0 = trunc i128 %u to i32
+    %tmp1 = insertelement <4 x i32> %v, i32 %0, i32 2
+    ret <4 x i32> %tmp1
+}
+
+define <2 x i64> @trunc_i128_i64(i128 %u, <2 x i64> %v) {
+entry:
+    %0 = trunc i128 %u to i64
+    %tmp1 = insertelement <2 x i64> %v, i64 %0, i32 1
+    ret <2 x i64> %tmp1
+}
+
+define <16 x i8> @trunc_i64_i8(i64 %u, <16 x i8> %v) {
+entry:
+    %0 = trunc i64 %u to i8
+    %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 10
+    ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @trunc_i64_i16(i64 %u, <8 x i16> %v) {
+entry:
+    %0 = trunc i64 %u to i16
+    %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 6
+    ret <8 x i16> %tmp1
+}
+
+define i32 @trunc_i64_i32(i64 %u) {
+entry:
+    %0 = trunc i64 %u to i32
+    ret i32 %0
+}
+
+define <16 x i8> @trunc_i32_i8(i32 %u, <16 x i8> %v) {
+entry:
+    %0 = trunc i32 %u to i8
+    %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 7
+    ret <16 x i8> %tmp1
+}
+
+define <8 x i16> @trunc_i32_i16(i32 %u, <8 x i16> %v) {
+entry:
+    %0 = trunc i32 %u to i16
+    %tmp1 = insertelement <8 x i16> %v, i16 %0, i32 3
+    ret <8 x i16> %tmp1
+}
+
+define <16 x i8> @trunc_i16_i8(i16 %u, <16 x i8> %v) {
+entry:
+    %0 = trunc i16 %u to i8
+    %tmp1 = insertelement <16 x i8> %v, i8 %0, i32 5
+    ret <16 x i8> %tmp1
+}
diff --git a/final/test/CodeGen/CellSPU/useful-harnesses/README.txt b/final/test/CodeGen/CellSPU/useful-harnesses/README.txt
new file mode 100644
index 00000000000..d87b3989e4f
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/useful-harnesses/README.txt
@@ -0,0 +1,5 @@
+This directory contains code that's not part of the DejaGNU test suite,
+but is generally useful as various test harnesses.
+
+vecoperations.c: Various vector operation sanity checks, e.g., shuffles,
+  8-bit vector add and multiply.
diff --git a/final/test/CodeGen/CellSPU/useful-harnesses/i32operations.c b/final/test/CodeGen/CellSPU/useful-harnesses/i32operations.c
new file mode 100644
index 00000000000..12fc30bf65d
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/useful-harnesses/i32operations.c
@@ -0,0 +1,69 @@
+#include <stdio.h>
+
+typedef unsigned int  		uint32_t;
+typedef int           		int32_t;
+
+const char *boolstring(int val) {
+  return val ? "true" : "false";
+}
+
+int i32_eq(int32_t a, int32_t b) {
+  return (a == b);
+}
+
+int i32_neq(int32_t a, int32_t b) {
+  return (a != b);
+}
+
+int32_t i32_eq_select(int32_t a, int32_t b, int32_t c, int32_t d) {
+  return ((a == b) ? c : d);
+}
+
+int32_t i32_neq_select(int32_t a, int32_t b, int32_t c, int32_t d) {
+  return ((a != b) ? c : d);
+}
+
+struct pred_s {
+  const char *name;
+  int (*predfunc)(int32_t, int32_t);
+  int (*selfunc)(int32_t, int32_t, int32_t, int32_t);
+};
+
+struct pred_s preds[] = {
+  { "eq",  i32_eq,  i32_eq_select },
+  { "neq", i32_neq, i32_neq_select }
+};
+
+int main(void) {
+  int i;
+  int32_t a = 1234567890;
+  int32_t b =  345678901;
+  int32_t c = 1234500000;
+  int32_t d =      10001;
+  int32_t e =      10000;
+
+  printf("a = %12d (0x%08x)\n", a, a);
+  printf("b = %12d (0x%08x)\n", b, b);
+  printf("c = %12d (0x%08x)\n", c, c);
+  printf("d = %12d (0x%08x)\n", d, d);
+  printf("e = %12d (0x%08x)\n", e, e);
+  printf("----------------------------------------\n");
+
+  for (i = 0; i < sizeof(preds)/sizeof(preds[0]); ++i) {
+    printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
+    printf("a %s a = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, a)));
+    printf("a %s b = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, b)));
+    printf("a %s c = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(a, c)));
+    printf("d %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(d, e)));
+    printf("e %s e = %s\n", preds[i].name, boolstring((*preds[i].predfunc)(e, e)));
+
+    printf("a %s a ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, a, c, d));
+    printf("a %s a ? c : d == c (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, a, c, d) == c));
+    printf("a %s b ? c : d = %d\n", preds[i].name, (*preds[i].selfunc)(a, b, c, d));
+    printf("a %s b ? c : d == d (%s)\n", preds[i].name, boolstring((*preds[i].selfunc)(a, b, c, d) == d));
+
+    printf("----------------------------------------\n");
+  }
+
+  return 0;
+}
diff --git a/final/test/CodeGen/CellSPU/useful-harnesses/i64operations.c b/final/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
new file mode 100644
index 00000000000..b613bd872e2
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/useful-harnesses/i64operations.c
@@ -0,0 +1,673 @@
+#include <stdio.h>
+#include "i64operations.h"
+
+int64_t         tval_a = 1234567890003LL;
+int64_t         tval_b = 2345678901235LL;
+int64_t         tval_c = 1234567890001LL;
+int64_t         tval_d = 10001LL;
+int64_t         tval_e = 10000LL;
+uint64_t        tval_f = 0xffffff0750135eb9;
+int64_t		tval_g = -1;
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+int
+i64_eq(int64_t a, int64_t b)
+{
+  return (a == b);
+}
+
+int
+i64_neq(int64_t a, int64_t b)
+{
+  return (a != b);
+}
+
+int
+i64_gt(int64_t a, int64_t b)
+{
+  return (a > b);
+}
+
+int
+i64_le(int64_t a, int64_t b)
+{
+  return (a <= b);
+}
+
+int
+i64_ge(int64_t a, int64_t b) {
+  return (a >= b);
+}
+
+int
+i64_lt(int64_t a, int64_t b) {
+  return (a < b);
+}
+
+int
+i64_uge(uint64_t a, uint64_t b)
+{
+  return (a >= b);
+}
+
+int
+i64_ult(uint64_t a, uint64_t b)
+{
+  return (a < b);
+}
+
+int
+i64_ugt(uint64_t a, uint64_t b)
+{
+  return (a > b);
+}
+
+int
+i64_ule(uint64_t a, uint64_t b)
+{
+  return (a <= b);
+}
+
+int64_t
+i64_eq_select(int64_t a, int64_t b, int64_t c, int64_t d)
+{
+  return ((a == b) ? c : d);
+}
+
+int64_t
+i64_neq_select(int64_t a, int64_t b, int64_t c, int64_t d)
+{
+  return ((a != b) ? c : d);
+}
+
+int64_t
+i64_gt_select(int64_t a, int64_t b, int64_t c, int64_t d) {
+  return ((a > b) ? c : d);
+}
+
+int64_t
+i64_le_select(int64_t a, int64_t b, int64_t c, int64_t d) {
+  return ((a <= b) ? c : d);
+}
+
+int64_t
+i64_ge_select(int64_t a, int64_t b, int64_t c, int64_t d) {
+  return ((a >= b) ? c : d);
+}
+
+int64_t
+i64_lt_select(int64_t a, int64_t b, int64_t c, int64_t d) {
+  return ((a < b) ? c : d);
+}
+
+uint64_t
+i64_ugt_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
+{
+  return ((a > b) ? c : d);
+}
+
+uint64_t
+i64_ule_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d)
+{
+  return ((a <= b) ? c : d);
+}
+
+uint64_t
+i64_uge_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
+  return ((a >= b) ? c : d);
+}
+
+uint64_t
+i64_ult_select(uint64_t a, uint64_t b, uint64_t c, uint64_t d) {
+  return ((a < b) ? c : d);
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+struct harness_int64_pred int64_tests_eq[] = {
+  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
+};
+
+struct harness_int64_pred int64_tests_neq[] = {
+  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
+};
+
+struct harness_int64_pred int64_tests_sgt[] = {
+  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
+};
+
+struct harness_int64_pred int64_tests_sle[] = {
+  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
+};
+
+struct harness_int64_pred int64_tests_sge[] = {
+  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, TRUE_VAL, &tval_c}
+};
+
+struct harness_int64_pred int64_tests_slt[] = {
+  {"a %s a", &tval_a, &tval_a, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"a %s b", &tval_a, &tval_b, &tval_c, &tval_d, TRUE_VAL, &tval_c},
+  {"a %s c", &tval_a, &tval_c, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"d %s e", &tval_d, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d},
+  {"e %s e", &tval_e, &tval_e, &tval_c, &tval_d, FALSE_VAL, &tval_d}
+};
+
+struct int64_pred_s int64_preds[] = {
+  {"eq", i64_eq, i64_eq_select,
+     int64_tests_eq, ARR_SIZE(int64_tests_eq)},
+  {"neq", i64_neq, i64_neq_select,
+     int64_tests_neq, ARR_SIZE(int64_tests_neq)},
+  {"gt", i64_gt, i64_gt_select,
+     int64_tests_sgt, ARR_SIZE(int64_tests_sgt)},
+  {"le", i64_le, i64_le_select,
+     int64_tests_sle, ARR_SIZE(int64_tests_sle)},
+  {"ge", i64_ge, i64_ge_select,
+     int64_tests_sge, ARR_SIZE(int64_tests_sge)},
+  {"lt", i64_lt, i64_lt_select,
+     int64_tests_slt, ARR_SIZE(int64_tests_slt)}
+};
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+struct harness_uint64_pred uint64_tests_ugt[] = {
+  {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+  {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d },
+  {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c },
+  {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c },
+  {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d }
+};
+
+struct harness_uint64_pred uint64_tests_ule[] = {
+  {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+  {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+  {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+  {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+  {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}
+};
+
+struct harness_uint64_pred uint64_tests_uge[] = {
+  {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+  {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+  {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+  {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+  {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c}
+};
+
+struct harness_uint64_pred uint64_tests_ult[] = {
+  {"a %s a", (uint64_t *) &tval_a, (uint64_t *) &tval_a, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+  {"a %s b", (uint64_t *) &tval_a, (uint64_t *) &tval_b, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, TRUE_VAL, (uint64_t *) &tval_c},
+  {"a %s c", (uint64_t *) &tval_a, (uint64_t *) &tval_c, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+  {"d %s e", (uint64_t *) &tval_d, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d},
+  {"e %s e", (uint64_t *) &tval_e, (uint64_t *) &tval_e, (uint64_t *) &tval_c,
+     (uint64_t *) &tval_d, FALSE_VAL, (uint64_t *) &tval_d}
+};
+
+struct uint64_pred_s uint64_preds[] = {
+  {"ugt", i64_ugt, i64_ugt_select,
+     uint64_tests_ugt, ARR_SIZE(uint64_tests_ugt)},
+  {"ule", i64_ule, i64_ule_select,
+     uint64_tests_ule, ARR_SIZE(uint64_tests_ule)},
+  {"uge", i64_uge, i64_uge_select,
+     uint64_tests_uge, ARR_SIZE(uint64_tests_uge)},
+  {"ult", i64_ult, i64_ult_select,
+     uint64_tests_ult, ARR_SIZE(uint64_tests_ult)}
+};
+
+int
+compare_expect_int64(const struct int64_pred_s * pred)
+{
+  int             j, failed = 0;
+
+  for (j = 0; j < pred->n_tests; ++j) {
+    int             pred_result;
+
+    pred_result = (*pred->predfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs);
+
+    if (pred_result != pred->tests[j].expected) {
+      char            str[64];
+
+      sprintf(str, pred->tests[j].fmt_string, pred->name);
+      printf("%s: returned value is %d, expecting %d\n", str,
+	     pred_result, pred->tests[j].expected);
+      printf("  lhs = %19lld (0x%016llx)\n", *pred->tests[j].lhs,
+             *pred->tests[j].lhs);
+      printf("  rhs = %19lld (0x%016llx)\n", *pred->tests[j].rhs,
+             *pred->tests[j].rhs);
+      ++failed;
+    } else {
+      int64_t         selresult;
+
+      selresult = (pred->selfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs,
+                                   *pred->tests[j].select_a,
+                                   *pred->tests[j].select_b);
+
+      if (selresult != *pred->tests[j].select_expected) {
+	char            str[64];
+
+	sprintf(str, pred->tests[j].fmt_string, pred->name);
+	printf("%s select: returned value is %d, expecting %d\n", str,
+	       pred_result, pred->tests[j].expected);
+	printf("  lhs   = %19lld (0x%016llx)\n", *pred->tests[j].lhs,
+	       *pred->tests[j].lhs);
+	printf("  rhs   = %19lld (0x%016llx)\n", *pred->tests[j].rhs,
+	       *pred->tests[j].rhs);
+	printf("  true  = %19lld (0x%016llx)\n", *pred->tests[j].select_a,
+	       *pred->tests[j].select_a);
+	printf("  false = %19lld (0x%016llx)\n", *pred->tests[j].select_b,
+	       *pred->tests[j].select_b);
+	++failed;
+      }
+    }
+  }
+
+  printf("  %d tests performed, should be %d.\n", j, pred->n_tests);
+
+  return failed;
+}
+
+int
+compare_expect_uint64(const struct uint64_pred_s * pred)
+{
+  int             j, failed = 0;
+
+  for (j = 0; j < pred->n_tests; ++j) {
+    int             pred_result;
+
+    pred_result = (*pred->predfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs);
+    if (pred_result != pred->tests[j].expected) {
+      char            str[64];
+
+      sprintf(str, pred->tests[j].fmt_string, pred->name);
+      printf("%s: returned value is %d, expecting %d\n", str,
+	     pred_result, pred->tests[j].expected);
+      printf("  lhs = %19llu (0x%016llx)\n", *pred->tests[j].lhs,
+             *pred->tests[j].lhs);
+      printf("  rhs = %19llu (0x%016llx)\n", *pred->tests[j].rhs,
+             *pred->tests[j].rhs);
+      ++failed;
+    } else {
+      uint64_t        selresult;
+
+      selresult = (pred->selfunc) (*pred->tests[j].lhs, *pred->tests[j].rhs,
+                                   *pred->tests[j].select_a,
+                                   *pred->tests[j].select_b);
+      if (selresult != *pred->tests[j].select_expected) {
+	char            str[64];
+
+	sprintf(str, pred->tests[j].fmt_string, pred->name);
+	printf("%s select: returned value is %d, expecting %d\n", str,
+	       pred_result, pred->tests[j].expected);
+	printf("  lhs   = %19llu (0x%016llx)\n", *pred->tests[j].lhs,
+	       *pred->tests[j].lhs);
+	printf("  rhs   = %19llu (0x%016llx)\n", *pred->tests[j].rhs,
+	       *pred->tests[j].rhs);
+	printf("  true  = %19llu (0x%016llx)\n", *pred->tests[j].select_a,
+	       *pred->tests[j].select_a);
+	printf("  false = %19llu (0x%016llx)\n", *pred->tests[j].select_b,
+	       *pred->tests[j].select_b);
+	++failed;
+      }
+    }
+  }
+
+  printf("  %d tests performed, should be %d.\n", j, pred->n_tests);
+
+  return failed;
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+int
+test_i64_sext_i32(int in, int64_t expected) {
+  int64_t result = (int64_t) in;
+
+  if (result != expected) {
+    char str[64];
+    sprintf(str, "i64_sext_i32(%d) returns %lld\n", in, result);
+    return 1;
+  }
+
+  return 0;
+}
+
+int
+test_i64_sext_i16(short in, int64_t expected) {
+  int64_t result = (int64_t) in;
+
+  if (result != expected) {
+    char str[64];
+    sprintf(str, "i64_sext_i16(%hd) returns %lld\n", in, result);
+    return 1;
+  }
+
+  return 0;
+}
+
+int
+test_i64_sext_i8(signed char in, int64_t expected) {
+  int64_t result = (int64_t) in;
+
+  if (result != expected) {
+    char str[64];
+    sprintf(str, "i64_sext_i8(%d) returns %lld\n", in, result);
+    return 1;
+  }
+
+  return 0;
+}
+
+int
+test_i64_zext_i32(unsigned int in, uint64_t expected) {
+  uint64_t result = (uint64_t) in;
+
+  if (result != expected) {
+    char str[64];
+    sprintf(str, "i64_zext_i32(%u) returns %llu\n", in, result);
+    return 1;
+  }
+
+  return 0;
+}
+
+int
+test_i64_zext_i16(unsigned short in, uint64_t expected) {
+  uint64_t result = (uint64_t) in;
+
+  if (result != expected) {
+    char str[64];
+    sprintf(str, "i64_zext_i16(%hu) returns %llu\n", in, result);
+    return 1;
+  }
+
+  return 0;
+}
+
+int
+test_i64_zext_i8(unsigned char in, uint64_t expected) {
+  uint64_t result = (uint64_t) in;
+
+  if (result != expected) {
+    char str[64];
+    sprintf(str, "i64_zext_i8(%u) returns %llu\n", in, result);
+    return 1;
+  }
+
+  return 0;
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+int64_t
+i64_shl_const(int64_t a) {
+  return a << 10;
+}
+
+int64_t
+i64_shl(int64_t a, int amt) {
+  return a << amt;
+}
+
+uint64_t
+u64_shl_const(uint64_t a) {
+  return a << 10;
+}
+
+uint64_t
+u64_shl(uint64_t a, int amt) {
+  return a << amt;
+}
+
+int64_t
+i64_srl_const(int64_t a) {
+  return a >> 10;
+}
+
+int64_t
+i64_srl(int64_t a, int amt) {
+  return a >> amt;
+}
+
+uint64_t
+u64_srl_const(uint64_t a) {
+  return a >> 10;
+}
+
+uint64_t
+u64_srl(uint64_t a, int amt) {
+  return a >> amt;
+}
+
+int64_t
+i64_sra_const(int64_t a) {
+  return a >> 10;
+}
+
+int64_t
+i64_sra(int64_t a, int amt) {
+  return a >> amt;
+}
+
+uint64_t
+u64_sra_const(uint64_t a) {
+  return a >> 10;
+}
+
+uint64_t
+u64_sra(uint64_t a, int amt) {
+  return a >> amt;
+}
+
+int
+test_u64_constant_shift(const char *func_name, uint64_t (*func)(uint64_t), uint64_t a, uint64_t expected) {
+  uint64_t result = (*func)(a);
+
+  if (result != expected) {
+    printf("%s(0x%016llx) returns 0x%016llx, expected 0x%016llx\n", func_name, a, result, expected);
+    return 1;
+  }
+
+  return 0;
+}
+
+int
+test_i64_constant_shift(const char *func_name, int64_t (*func)(int64_t), int64_t a, int64_t expected) {
+  int64_t result = (*func)(a);
+
+  if (result != expected) {
+    printf("%s(0x%016llx) returns 0x%016llx, expected 0x%016llx\n", func_name, a, result, expected);
+    return 1;
+  }
+
+  return 0;
+}
+
+int
+test_u64_variable_shift(const char *func_name, uint64_t (*func)(uint64_t, int), uint64_t a, unsigned int b, uint64_t expected) {
+  uint64_t result = (*func)(a, b);
+
+  if (result != expected) {
+    printf("%s(0x%016llx, %d) returns 0x%016llx, expected 0x%016llx\n", func_name, a, b, result, expected);
+    return 1;
+  }
+
+  return 0;
+}
+
+int
+test_i64_variable_shift(const char *func_name, int64_t (*func)(int64_t, int), int64_t a, unsigned int b, int64_t expected) {
+  int64_t result = (*func)(a, b);
+
+  if (result != expected) {
+    printf("%s(0x%016llx, %d) returns 0x%016llx, expected 0x%016llx\n", func_name, a, b, result, expected);
+    return 1;
+  }
+
+  return 0;
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+int64_t i64_mul(int64_t a, int64_t b) {
+  return a * b;
+}
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+
+int
+main(void)
+{
+  int             i, j, failed = 0;
+  const char     *something_failed = "  %d tests failed.\n";
+  const char     *all_tests_passed = "  All tests passed.\n";
+
+  printf("tval_a = %20lld (0x%016llx)\n", tval_a, tval_a);
+  printf("tval_b = %20lld (0x%016llx)\n", tval_b, tval_b);
+  printf("tval_c = %20lld (0x%016llx)\n", tval_c, tval_c);
+  printf("tval_d = %20lld (0x%016llx)\n", tval_d, tval_d);
+  printf("tval_e = %20lld (0x%016llx)\n", tval_e, tval_e);
+  printf("tval_f = %20llu (0x%016llx)\n", tval_f, tval_f);
+  printf("tval_g = %20llu (0x%016llx)\n", tval_g, tval_g);
+  printf("----------------------------------------\n");
+
+  for (i = 0; i < ARR_SIZE(int64_preds); ++i) {
+    printf("%s series:\n", int64_preds[i].name);
+    if ((failed = compare_expect_int64(int64_preds + i)) > 0) {
+      printf(something_failed, failed);
+    } else {
+      printf(all_tests_passed);
+    }
+
+    printf("----------------------------------------\n");
+  }
+
+  for (i = 0; i < ARR_SIZE(uint64_preds); ++i) {
+    printf("%s series:\n", uint64_preds[i].name);
+    if ((failed = compare_expect_uint64(uint64_preds + i)) > 0) {
+      printf(something_failed, failed);
+    } else {
+      printf(all_tests_passed);
+    }
+
+    printf("----------------------------------------\n");
+  }
+
+  /*----------------------------------------------------------------------*/
+
+  puts("signed/zero-extend tests:");
+
+  failed = 0;
+  failed += test_i64_sext_i32(-1, -1LL);
+  failed += test_i64_sext_i32(10, 10LL);
+  failed += test_i64_sext_i32(0x7fffffff, 0x7fffffffLL);
+  failed += test_i64_sext_i16(-1, -1LL);
+  failed += test_i64_sext_i16(10, 10LL);
+  failed += test_i64_sext_i16(0x7fff, 0x7fffLL);
+  failed += test_i64_sext_i8(-1, -1LL);
+  failed += test_i64_sext_i8(10, 10LL);
+  failed += test_i64_sext_i8(0x7f, 0x7fLL);
+
+  failed += test_i64_zext_i32(0xffffffff, 0x00000000ffffffffLLU);
+  failed += test_i64_zext_i32(0x01234567, 0x0000000001234567LLU);
+  failed += test_i64_zext_i16(0xffff,     0x000000000000ffffLLU);
+  failed += test_i64_zext_i16(0x569a,     0x000000000000569aLLU);
+  failed += test_i64_zext_i8(0xff,        0x00000000000000ffLLU);
+  failed += test_i64_zext_i8(0xa0,        0x00000000000000a0LLU);
+
+  if (failed > 0) {
+    printf("  %d tests failed.\n", failed);
+  } else {
+    printf("  All tests passed.\n");
+  }
+
+  printf("----------------------------------------\n");
+
+  failed = 0;
+  puts("signed left/right shift tests:");
+  failed += test_i64_constant_shift("i64_shl_const", i64_shl_const, tval_a,     0x00047dc7ec114c00LL);
+  failed += test_i64_variable_shift("i64_shl",       i64_shl,       tval_a, 10, 0x00047dc7ec114c00LL);
+  failed += test_i64_constant_shift("i64_srl_const", i64_srl_const, tval_a,     0x0000000047dc7ec1LL);
+  failed += test_i64_variable_shift("i64_srl",       i64_srl,       tval_a, 10, 0x0000000047dc7ec1LL);
+  failed += test_i64_constant_shift("i64_sra_const", i64_sra_const, tval_a,     0x0000000047dc7ec1LL);
+  failed += test_i64_variable_shift("i64_sra",       i64_sra,       tval_a, 10, 0x0000000047dc7ec1LL);
+
+  if (failed > 0) {
+    printf("  %d tests ailed.\n", failed);
+  } else {
+    printf("  All tests passed.\n");
+  }
+
+  printf("----------------------------------------\n");
+
+  failed = 0;
+  puts("unsigned left/right shift tests:");
+  failed += test_u64_constant_shift("u64_shl_const", u64_shl_const,  tval_f,     0xfffc1d404d7ae400LL);
+  failed += test_u64_variable_shift("u64_shl",       u64_shl,        tval_f, 10, 0xfffc1d404d7ae400LL);
+  failed += test_u64_constant_shift("u64_srl_const", u64_srl_const,  tval_f,     0x003fffffc1d404d7LL);
+  failed += test_u64_variable_shift("u64_srl",       u64_srl,        tval_f, 10, 0x003fffffc1d404d7LL);
+  failed += test_i64_constant_shift("i64_sra_const", i64_sra_const,  tval_f,     0xffffffffc1d404d7LL);
+  failed += test_i64_variable_shift("i64_sra",       i64_sra,        tval_f, 10, 0xffffffffc1d404d7LL);
+  failed += test_u64_constant_shift("u64_sra_const", u64_sra_const,  tval_f,     0x003fffffc1d404d7LL);
+  failed += test_u64_variable_shift("u64_sra",       u64_sra,        tval_f, 10, 0x003fffffc1d404d7LL);
+
+  if (failed > 0) {
+    printf("  %d tests ailed.\n", failed);
+  } else {
+    printf("  All tests passed.\n");
+  }
+
+  printf("----------------------------------------\n");
+
+  int64_t result;
+  
+  result = i64_mul(tval_g, tval_g);
+  printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_g, tval_g, result, result);
+  result = i64_mul(tval_d, tval_e);
+  printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_d, tval_e, result, result);
+  /* 0xba7a664f13077c9 */
+  result = i64_mul(tval_a, tval_b);
+  printf("%20lld * %20lld = %20lld (0x%016llx)\n", tval_a, tval_b, result, result);
+
+  printf("----------------------------------------\n");
+
+  return 0;
+}
diff --git a/final/test/CodeGen/CellSPU/useful-harnesses/i64operations.h b/final/test/CodeGen/CellSPU/useful-harnesses/i64operations.h
new file mode 100644
index 00000000000..7a02794cd7e
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/useful-harnesses/i64operations.h
@@ -0,0 +1,43 @@
+#define TRUE_VAL (!0)
+#define FALSE_VAL 0
+#define ARR_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
+
+typedef unsigned long long int uint64_t;
+typedef long long int int64_t;
+
+/* ~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~- */
+struct harness_int64_pred {
+  const char     *fmt_string;
+  int64_t        *lhs;
+  int64_t        *rhs;
+  int64_t        *select_a;
+  int64_t        *select_b;
+  int             expected;
+  int64_t        *select_expected;
+};
+
+struct harness_uint64_pred {
+  const char     *fmt_string;
+  uint64_t       *lhs;
+  uint64_t       *rhs;
+  uint64_t       *select_a;
+  uint64_t       *select_b;
+  int             expected;
+  uint64_t       *select_expected;
+};
+
+struct int64_pred_s {
+  const char     *name;
+  int             (*predfunc) (int64_t, int64_t);
+  int64_t         (*selfunc) (int64_t, int64_t, int64_t, int64_t);
+  struct harness_int64_pred *tests;
+  int             n_tests;
+};
+
+struct uint64_pred_s {
+  const char     *name;
+  int             (*predfunc) (uint64_t, uint64_t);
+  uint64_t        (*selfunc) (uint64_t, uint64_t, uint64_t, uint64_t);
+  struct harness_uint64_pred *tests;
+  int             n_tests;
+};
diff --git a/final/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg b/final/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg
new file mode 100644
index 00000000000..e6f55eef7af
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/useful-harnesses/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = []
diff --git a/final/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c b/final/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c
new file mode 100644
index 00000000000..c4c86e37635
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/useful-harnesses/vecoperations.c
@@ -0,0 +1,179 @@
+#include <stdio.h>
+
+typedef unsigned char v16i8 __attribute__((ext_vector_type(16))); 
+typedef short         v8i16 __attribute__((ext_vector_type(16))); 
+typedef int           v4i32 __attribute__((ext_vector_type(4))); 
+typedef float         v4f32 __attribute__((ext_vector_type(4))); 
+typedef long long     v2i64 __attribute__((ext_vector_type(2))); 
+typedef double        v2f64 __attribute__((ext_vector_type(2))); 
+
+void print_v16i8(const char *str, const v16i8 v) {
+  union {
+    unsigned char elts[16];
+    v16i8 vec;
+  } tv;
+  tv.vec = v;
+  printf("%s = { %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, "
+                "%hhu, %hhu, %hhu, %hhu, %hhu, %hhu, %hhu, "
+		"%hhu, %hhu }\n",
+	str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], tv.elts[5],
+	tv.elts[6], tv.elts[7], tv.elts[8], tv.elts[9], tv.elts[10], tv.elts[11],
+	tv.elts[12], tv.elts[13], tv.elts[14], tv.elts[15]);
+}
+
+void print_v16i8_hex(const char *str, const v16i8 v) {
+  union {
+    unsigned char elts[16];
+    v16i8 vec;
+  } tv;
+  tv.vec = v;
+  printf("%s = { 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, "
+                "0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, 0x%02hhx, "
+		"0x%02hhx, 0x%02hhx }\n",
+	str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4], tv.elts[5],
+	tv.elts[6], tv.elts[7], tv.elts[8], tv.elts[9], tv.elts[10], tv.elts[11],
+	tv.elts[12], tv.elts[13], tv.elts[14], tv.elts[15]);
+}
+
+void print_v8i16_hex(const char *str, v8i16 v) {
+  union {
+    short elts[8];
+    v8i16 vec;
+  } tv;
+  tv.vec = v;
+  printf("%s = { 0x%04hx, 0x%04hx, 0x%04hx, 0x%04hx, 0x%04hx, "
+                "0x%04hx, 0x%04hx, 0x%04hx }\n",
+	str, tv.elts[0], tv.elts[1], tv.elts[2], tv.elts[3], tv.elts[4],
+	tv.elts[5], tv.elts[6], tv.elts[7]);
+}
+
+void print_v4i32(const char *str, v4i32 v) {
+  printf("%s = { %d, %d, %d, %d }\n", str, v.x, v.y, v.z, v.w);
+}
+
+void print_v4f32(const char *str, v4f32 v) {
+  printf("%s = { %f, %f, %f, %f }\n", str, v.x, v.y, v.z, v.w);
+}
+
+void print_v2i64(const char *str, v2i64 v) {
+  printf("%s = { %lld, %lld }\n", str, v.x, v.y);
+}
+
+void print_v2f64(const char *str, v2f64 v) {
+  printf("%s = { %g, %g }\n", str, v.x, v.y);
+}
+
+/*----------------------------------------------------------------------*/
+
+v16i8 v16i8_mpy(v16i8 v1, v16i8 v2) {
+  return v1 * v2;
+}
+
+v16i8 v16i8_add(v16i8 v1, v16i8 v2) {
+  return v1 + v2;
+}
+
+v4i32 v4i32_shuffle_1(v4i32 a) {
+  v4i32 c2 = a.yzwx;
+  return c2;
+}
+
+v4i32 v4i32_shuffle_2(v4i32 a) {
+  v4i32 c2 = a.zwxy;
+  return c2;
+}
+
+v4i32 v4i32_shuffle_3(v4i32 a) {
+  v4i32 c2 = a.wxyz;
+  return c2;
+}
+
+v4i32 v4i32_shuffle_4(v4i32 a) {
+  v4i32 c2 = a.xyzw;
+  return c2;
+}
+
+v4i32 v4i32_shuffle_5(v4i32 a) {
+  v4i32 c2 = a.xwzy;
+  return c2;
+}
+
+v4f32 v4f32_shuffle_1(v4f32 a) {
+  v4f32 c2 = a.yzwx;
+  return c2;
+}
+
+v4f32 v4f32_shuffle_2(v4f32 a) {
+  v4f32 c2 = a.zwxy;
+  return c2;
+}
+
+v4f32 v4f32_shuffle_3(v4f32 a) {
+  v4f32 c2 = a.wxyz;
+  return c2;
+}
+
+v4f32 v4f32_shuffle_4(v4f32 a) {
+  v4f32 c2 = a.xyzw;
+  return c2;
+}
+
+v4f32 v4f32_shuffle_5(v4f32 a) {
+  v4f32 c2 = a.xwzy;
+  return c2;
+}
+
+v2i64 v2i64_shuffle(v2i64 a) {
+  v2i64 c2 = a.yx;
+  return c2;
+}
+
+v2f64 v2f64_shuffle(v2f64 a) {
+  v2f64 c2 = a.yx;
+  return c2;
+}
+
+int main(void) {
+  v16i8 v00 = { 0xf4, 0xad, 0x01, 0xe9, 0x51, 0x78, 0xc1, 0x8a,
+                0x94, 0x7c, 0x49, 0x6c, 0x21, 0x32, 0xb2, 0x04 };
+  v16i8 va0 = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+                0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10 };
+  v16i8 va1 = { 0x11, 0x83, 0x4b, 0x63, 0xff, 0x90, 0x32, 0xe5,
+                0x5a, 0xaa, 0x20, 0x01, 0x0d, 0x15, 0x77, 0x05 };
+  v8i16 v01 = { 0x1a87, 0x0a14, 0x5014, 0xfff0,
+                0xe194, 0x0184, 0x801e, 0x5940 };
+  v4i32 v1 = { 1, 2, 3, 4 };
+  v4f32 v2 = { 1.0, 2.0, 3.0, 4.0 };
+  v2i64 v3 = { 691043ll, 910301513ll };
+  v2f64 v4 = { 5.8e56, 9.103e-62 };
+
+  puts("---- vector tests start ----");
+
+  print_v16i8_hex("v00                        ", v00);
+  print_v16i8_hex("va0                        ", va0);
+  print_v16i8_hex("va1                        ", va1);
+  print_v16i8_hex("va0 x va1                  ", v16i8_mpy(va0, va1));
+  print_v16i8_hex("va0 + va1                  ", v16i8_add(va0, va1));
+  print_v8i16_hex("v01                        ", v01);
+
+  print_v4i32("v4i32_shuffle_1(1, 2, 3, 4)", v4i32_shuffle_1(v1));
+  print_v4i32("v4i32_shuffle_2(1, 2, 3, 4)", v4i32_shuffle_2(v1));
+  print_v4i32("v4i32_shuffle_3(1, 2, 3, 4)", v4i32_shuffle_3(v1));
+  print_v4i32("v4i32_shuffle_4(1, 2, 3, 4)", v4i32_shuffle_4(v1));
+  print_v4i32("v4i32_shuffle_5(1, 2, 3, 4)", v4i32_shuffle_5(v1));
+
+  print_v4f32("v4f32_shuffle_1(1, 2, 3, 4)", v4f32_shuffle_1(v2));
+  print_v4f32("v4f32_shuffle_2(1, 2, 3, 4)", v4f32_shuffle_2(v2));
+  print_v4f32("v4f32_shuffle_3(1, 2, 3, 4)", v4f32_shuffle_3(v2));
+  print_v4f32("v4f32_shuffle_4(1, 2, 3, 4)", v4f32_shuffle_4(v2));
+  print_v4f32("v4f32_shuffle_5(1, 2, 3, 4)", v4f32_shuffle_5(v2));
+
+  print_v2i64("v3                         ", v3);
+  print_v2i64("v2i64_shuffle              ", v2i64_shuffle(v3));
+  print_v2f64("v4                         ", v4);
+  print_v2f64("v2f64_shuffle              ", v2f64_shuffle(v4));
+
+  puts("---- vector tests end ----");
+
+  return 0;
+}
diff --git a/final/test/CodeGen/CellSPU/v2f32.ll b/final/test/CodeGen/CellSPU/v2f32.ll
new file mode 100644
index 00000000000..efd03203100
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/v2f32.ll
@@ -0,0 +1,74 @@
+;RUN: llc --march=cellspu %s -o - | FileCheck %s
+%vec = type <2 x float>
+
+define %vec @test_ret(%vec %param)
+{
+;CHECK: bi $lr
+ ret %vec %param
+}
+
+define %vec @test_add(%vec %param)
+{
+;CHECK: fa {{\$.}}, $3, $3
+ %1 = fadd %vec %param, %param
+;CHECK: bi $lr
+ ret %vec %1
+}
+
+define %vec @test_sub(%vec %param)
+{
+;CHECK: fs {{\$.}}, $3, $3
+ %1 = fsub %vec %param, %param
+
+;CHECK: bi $lr
+ ret %vec %1
+}
+
+define %vec @test_mul(%vec %param)
+{
+;CHECK: fm {{\$.}}, $3, $3
+ %1 = fmul %vec %param, %param
+
+;CHECK: bi $lr
+ ret %vec %1
+}
+
+define %vec @test_splat(float %param ) {
+;CHECK: lqa
+;CHECK: shufb
+  %sv = insertelement <1 x float> undef, float %param, i32 0 
+  %rv = shufflevector <1 x float> %sv, <1 x float> undef, <2 x i32> zeroinitializer 
+;CHECK: bi $lr
+  ret %vec %rv
+}
+
+define void @test_store(%vec %val, %vec* %ptr){
+
+;CHECK: stqd 
+  store %vec undef, %vec* null
+
+;CHECK: stqd $3, 0(${{.}})
+;CHECK: bi $lr
+  store %vec %val, %vec* %ptr
+  ret void
+}
+
+define %vec @test_insert(){
+;CHECK: cwd
+;CHECK: shufb $3
+  %rv = insertelement %vec undef, float 0.0e+00, i32 undef
+;CHECK: bi $lr
+  ret %vec %rv
+}
+
+define void @test_unaligned_store()  {
+;CHECK:	cdd
+;CHECK:	shufb
+;CHECK:	stqd
+  %data = alloca [4 x float], align 16         ; <[4 x float]*> [#uses=1]
+  %ptr = getelementptr [4 x float]* %data, i32 0, i32 2 ; <float*> [#uses=1]
+  %vptr = bitcast float* %ptr to  <2 x float>* ; <[1 x <2 x float>]*> [#uses=1]
+  store <2 x float> undef, <2 x float>* %vptr
+  ret void
+}
+
diff --git a/final/test/CodeGen/CellSPU/v2i32.ll b/final/test/CodeGen/CellSPU/v2i32.ll
new file mode 100644
index 00000000000..71d4aba6333
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/v2i32.ll
@@ -0,0 +1,77 @@
+;RUN: llc --march=cellspu %s -o - | FileCheck %s
+%vec = type <2 x i32>
+
+define %vec @test_ret(%vec %param)
+{
+;CHECK:	bi	$lr
+  ret %vec %param
+}
+
+define %vec @test_add(%vec %param)
+{
+;CHECK: a {{\$.}}, $3, $3
+  %1 = add %vec %param, %param
+;CHECK: bi $lr
+  ret %vec %1
+}
+
+define %vec @test_sub(%vec %param)
+{
+;CHECK: sf {{\$.}}, $4, $3
+  %1 = sub %vec %param, <i32 1, i32 1>
+
+;CHECK: bi $lr
+  ret %vec %1
+}
+
+define %vec @test_mul(%vec %param)
+{
+;CHECK: mpyu
+;CHECK: mpyh
+;CHECK: a {{\$., \$., \$.}}
+;CHECK: a {{\$., \$., \$.}}
+  %1 = mul %vec %param, %param
+
+;CHECK: bi $lr
+  ret %vec %1
+}
+
+define <2 x i32> @test_splat(i32 %param ) {
+;see svn log for why this is here...
+;CHECK-NOT: or $3, $3, $3
+;CHECK: lqa
+;CHECK: shufb
+  %sv = insertelement <1 x i32> undef, i32 %param, i32 0 
+  %rv = shufflevector <1 x i32> %sv, <1 x i32> undef, <2 x i32> zeroinitializer 
+;CHECK: bi $lr
+  ret <2 x i32> %rv
+}
+
+define i32 @test_extract() {
+;CHECK: shufb $3
+  %rv = extractelement <2 x i32> zeroinitializer, i32 undef ; <i32> [#uses=1]
+;CHECK: bi $lr
+  ret i32 %rv
+}
+
+define void @test_store( %vec %val, %vec* %ptr)
+{
+;CHECK: stqd $3, 0(${{.}})
+;CHECK: bi $lr
+  store %vec %val, %vec* %ptr
+  ret void
+}
+
+;Alignment of <2 x i32> is not *directly* defined in the ABI
+;It probably is safe to interpret it as an array, thus having 8 byte
+;alignment (according to ABI). This tests that the size of
+;[2 x <2 x i32>] is 16 bytes, i.e. there is no padding between the
+;two arrays
+define <2 x i32>* @test_alignment( [2 x <2 x i32>]* %ptr)
+{
+; CHECK-NOT:	ai	$3, $3, 16
+; CHECK:	ai	$3, $3, 8
+; CHECK:	bi	$lr
+   %rv = getelementptr [2 x <2 x i32>]* %ptr, i32 0, i32 1
+   ret <2 x i32>* %rv
+}
diff --git a/final/test/CodeGen/CellSPU/vec_const.ll b/final/test/CodeGen/CellSPU/vec_const.ll
new file mode 100644
index 00000000000..24c05c68408
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/vec_const.ll
@@ -0,0 +1,154 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
+; RUN: grep -w il  %t1.s | count 3
+; RUN: grep ilhu   %t1.s | count 8
+; RUN: grep -w ilh %t1.s | count 5
+; RUN: grep iohl   %t1.s | count 7
+; RUN: grep lqa    %t1.s | count 6
+; RUN: grep 24672  %t1.s | count 2
+; RUN: grep 16429  %t1.s | count 1
+; RUN: grep 63572  %t1.s | count 1
+; RUN: grep  4660  %t1.s | count 1
+; RUN: grep 22136  %t1.s | count 1
+; RUN: grep 43981  %t1.s | count 1
+; RUN: grep 61202  %t1.s | count 1
+; RUN: grep 16393  %t1.s | count 1
+; RUN: grep  8699  %t1.s | count 1
+; RUN: grep 21572  %t1.s | count 1
+; RUN: grep 11544  %t1.s | count 1
+; RUN: grep 1311768467750121234 %t1.s | count 1
+; RUN: grep lqd    %t2.s | count 6
+
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
+target triple = "spu-unknown-elf"
+
+; Vector constant load tests:
+
+; IL <reg>, 2
+define <4 x i32> @v4i32_constvec() {
+        ret <4 x i32> < i32 2, i32 2, i32 2, i32 2 >
+}
+
+; Spill to constant pool
+define <4 x i32> @v4i32_constpool() {
+        ret <4 x i32> < i32 2, i32 1, i32 1, i32 2 >
+}
+
+; Max negative range for IL
+define <4 x i32> @v4i32_constvec_2() {
+        ret <4 x i32> < i32 -32768, i32 -32768, i32 -32768, i32 -32768 >
+}
+
+; ILHU <reg>, 73 (0x49)
+; 4784128 = 0x490000
+define <4 x i32> @v4i32_constvec_3() {
+        ret <4 x i32> < i32 4784128, i32 4784128,
+                        i32 4784128, i32 4784128 >
+}
+
+; ILHU <reg>, 61 (0x3d)
+; IOHL <reg>, 15395 (0x3c23)
+define <4 x i32> @v4i32_constvec_4() {
+        ret <4 x i32> < i32 4013091, i32 4013091,
+                        i32 4013091, i32 4013091 >
+}
+
+; ILHU <reg>, 0x5050 (20560)
+; IOHL <reg>, 0x5050 (20560)
+; Tests for whether we expand the size of the bit pattern properly, because
+; this could be interpreted as an i8 pattern (0x50)
+define <4 x i32> @v4i32_constvec_5() {
+        ret <4 x i32> < i32 1347440720, i32 1347440720,
+                        i32 1347440720, i32 1347440720 >
+}
+
+; ILH
+define <8 x i16> @v8i16_constvec_1() {
+        ret <8 x i16> < i16 32767, i16 32767, i16 32767, i16 32767,
+                        i16 32767, i16 32767, i16 32767, i16 32767 >
+}
+
+; ILH
+define <8 x i16> @v8i16_constvec_2() {
+        ret <8 x i16> < i16 511, i16 511, i16 511, i16 511, i16 511,
+                        i16 511, i16 511, i16 511 >
+}
+
+; ILH
+define <8 x i16> @v8i16_constvec_3() {
+        ret <8 x i16> < i16 -512, i16 -512, i16 -512, i16 -512, i16 -512,
+                        i16 -512, i16 -512, i16 -512 >
+}
+
+; ILH <reg>, 24672 (0x6060)
+; Tests whether we expand the size of the bit pattern properly, because
+; this could be interpreted as an i8 pattern (0x60)
+define <8 x i16> @v8i16_constvec_4() {
+        ret <8 x i16> < i16 24672, i16 24672, i16 24672, i16 24672, i16 24672,
+                        i16 24672, i16 24672, i16 24672 >
+}
+
+; ILH <reg>, 24672 (0x6060)
+; Tests whether we expand the size of the bit pattern properly, because
+; this is an i8 pattern but has to be expanded out to i16 to load it
+; properly into the vector register.
+define <16 x i8> @v16i8_constvec_1() {
+        ret <16 x i8> < i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96,
+                        i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96, i8 96 >
+}
+
+define <4 x float> @v4f32_constvec_1() {
+entry:
+        ret <4 x float> < float 0x4005BF0A80000000,
+                          float 0x4005BF0A80000000,
+                          float 0x4005BF0A80000000,
+                          float 0x4005BF0A80000000 >
+}
+
+define <4 x float> @v4f32_constvec_2() {
+entry:
+        ret <4 x float> < float 0.000000e+00,
+                          float 0.000000e+00,
+                          float 0.000000e+00,
+                          float 0.000000e+00 >
+}
+
+
+define <4 x float> @v4f32_constvec_3() {
+entry:
+        ret <4 x float> < float 0x4005BF0A80000000,
+                          float 0x3810000000000000,
+                          float 0x47EFFFFFE0000000,
+                          float 0x400921FB60000000 >
+}
+
+;  1311768467750121234 => 0x 12345678 abcdef12
+;  HI32_hi:  4660
+;  HI32_lo: 22136
+;  LO32_hi: 43981
+;  LO32_lo: 61202
+define <2 x i64> @i64_constvec_1() {
+entry:
+        ret <2 x i64> < i64 1311768467750121234,
+                        i64 1311768467750121234 >
+}
+
+define <2 x i64> @i64_constvec_2() {
+entry:
+        ret <2 x i64> < i64 1, i64 1311768467750121234 >
+}
+
+define <2 x double> @f64_constvec_1() {
+entry:
+ ret <2 x double> < double 0x400921fb54442d18,
+                    double 0xbff6a09e667f3bcd >
+}
+
+; 0x400921fb 54442d18 ->
+;   (ILHU 0x4009 [16393]/IOHL 0x21fb [ 8699])
+;   (ILHU 0x5444 [21572]/IOHL 0x2d18 [11544])
+define <2 x double> @f64_constvec_2() {
+entry:
+ ret <2 x double> < double 0x400921fb54442d18,
+                    double 0x400921fb54442d18 >
+}
diff --git a/final/test/CodeGen/CellSPU/vecinsert.ll b/final/test/CodeGen/CellSPU/vecinsert.ll
new file mode 100644
index 00000000000..8dcab1d84c9
--- /dev/null
+++ b/final/test/CodeGen/CellSPU/vecinsert.ll
@@ -0,0 +1,131 @@
+; RUN: llc < %s -march=cellspu > %t1.s
+; RUN: grep cbd     %t1.s | count 5
+; RUN: grep chd     %t1.s | count 5
+; RUN: grep cwd     %t1.s | count 11
+; RUN: grep -w il   %t1.s | count 5
+; RUN: grep -w ilh  %t1.s | count 6
+; RUN: grep iohl    %t1.s | count 1
+; RUN: grep ilhu    %t1.s | count 4
+; RUN: grep shufb   %t1.s | count 27
+; RUN: grep 17219   %t1.s | count 1 
+; RUN: grep 22598   %t1.s | count 1
+; RUN: grep -- -39  %t1.s | count 1
+; RUN: grep    24   %t1.s | count 1
+; RUN: grep  1159   %t1.s | count 1
+; RUN: FileCheck %s < %t1.s
+
+; ModuleID = 'vecinsert.bc'
+target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128"
+target triple = "spu-unknown-elf"
+
+; 67 -> 0x43, as 8-bit vector constant load = 0x4343 (17219)0x4343
+define <16 x i8> @test_v16i8(<16 x i8> %P, i8 %x) {
+entry:
+        %tmp1 = insertelement <16 x i8> %P, i8 %x, i32 10
+        %tmp1.1 = insertelement <16 x i8> %tmp1, i8 67, i32 7
+        %tmp1.2 = insertelement <16 x i8> %tmp1.1, i8 %x, i32 15
+        ret <16 x i8> %tmp1.2
+}
+
+; 22598 -> 0x5846
+define <8 x i16> @test_v8i16(<8 x i16> %P, i16 %x) {
+entry:
+        %tmp1 = insertelement <8 x i16> %P, i16 %x, i32 5
+        %tmp1.1 = insertelement <8 x i16> %tmp1, i16 22598, i32 7
+        %tmp1.2 = insertelement <8 x i16> %tmp1.1, i16 %x, i32 2
+        ret <8 x i16> %tmp1.2
+}
+
+; 1574023 -> 0x180487 (ILHU 24/IOHL 1159)
+define <4 x i32> @test_v4i32_1(<4 x i32> %P, i32 %x) {
+entry:
+        %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2
+        %tmp1.1 = insertelement <4 x i32> %tmp1, i32 1574023, i32 1
+        %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3
+        ret <4 x i32> %tmp1.2
+}
+
+; Should generate IL for the load
+define <4 x i32> @test_v4i32_2(<4 x i32> %P, i32 %x) {
+entry:
+        %tmp1 = insertelement <4 x i32> %P, i32 %x, i32 2
+        %tmp1.1 = insertelement <4 x i32> %tmp1, i32 -39, i32 1
+        %tmp1.2 = insertelement <4 x i32> %tmp1.1, i32 %x, i32 3
+        ret <4 x i32> %tmp1.2
+}
+
+define void @variable_v16i8_1(<16 x i8>* %a, i32 %i) nounwind {
+entry:
+	%arrayidx = getelementptr <16 x i8>* %a, i32 %i
+	%tmp2 = load <16 x i8>* %arrayidx
+	%tmp3 = insertelement <16 x i8> %tmp2, i8 1, i32 1
+	%tmp8 = insertelement <16 x i8> %tmp3, i8 2, i32 11
+	store <16 x i8> %tmp8, <16 x i8>* %arrayidx
+	ret void
+}
+
+define void @variable_v8i16_1(<8 x i16>* %a, i32 %i) nounwind {
+entry:
+	%arrayidx = getelementptr <8 x i16>* %a, i32 %i
+	%tmp2 = load <8 x i16>* %arrayidx
+	%tmp3 = insertelement <8 x i16> %tmp2, i16 1, i32 1
+	%tmp8 = insertelement <8 x i16> %tmp3, i16 2, i32 6
+	store <8 x i16> %tmp8, <8 x i16>* %arrayidx
+	ret void
+}
+
+define void @variable_v4i32_1(<4 x i32>* %a, i32 %i) nounwind {
+entry:
+	%arrayidx = getelementptr <4 x i32>* %a, i32 %i
+	%tmp2 = load <4 x i32>* %arrayidx
+	%tmp3 = insertelement <4 x i32> %tmp2, i32 1, i32 1
+	%tmp8 = insertelement <4 x i32> %tmp3, i32 2, i32 2
+	store <4 x i32> %tmp8, <4 x i32>* %arrayidx
+	ret void
+}
+
+define void @variable_v4f32_1(<4 x float>* %a, i32 %i) nounwind {
+entry:
+	%arrayidx = getelementptr <4 x float>* %a, i32 %i
+	%tmp2 = load <4 x float>* %arrayidx
+	%tmp3 = insertelement <4 x float> %tmp2, float 1.000000e+00, i32 1
+	%tmp8 = insertelement <4 x float> %tmp3, float 2.000000e+00, i32 2
+	store <4 x float> %tmp8, <4 x float>* %arrayidx
+	ret void
+}
+
+define void @variable_v2i64_1(<2 x i64>* %a, i32 %i) nounwind {
+entry:
+	%arrayidx = getelementptr <2 x i64>* %a, i32 %i
+	%tmp2 = load <2 x i64>* %arrayidx
+	%tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 0
+	store <2 x i64> %tmp3, <2 x i64>* %arrayidx
+	ret void
+}
+
+define void @variable_v2i64_2(<2 x i64>* %a, i32 %i) nounwind {
+entry:
+	%arrayidx = getelementptr <2 x i64>* %a, i32 %i
+	%tmp2 = load <2 x i64>* %arrayidx
+	%tmp3 = insertelement <2 x i64> %tmp2, i64 615, i32 1
+	store <2 x i64> %tmp3, <2 x i64>* %arrayidx
+	ret void
+}
+
+define void @variable_v2f64_1(<2 x double>* %a, i32 %i) nounwind {
+entry:
+	%arrayidx = getelementptr <2 x double>* %a, i32 %i
+	%tmp2 = load <2 x double>* %arrayidx
+	%tmp3 = insertelement <2 x double> %tmp2, double 1.000000e+00, i32 1
+	store <2 x double> %tmp3, <2 x double>* %arrayidx
+	ret void
+}
+
+define <4 x i32> @undef_v4i32( i32 %param ) {
+	;CHECK: cwd
+	;CHECK: lqa
+	;CHECK: shufb
+	%val = insertelement <4 x i32> <i32 1, i32 2, i32 3, i32 4>, i32 %param, i32 undef 
+	ret <4 x i32> %val
+}
+
diff --git a/final/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll b/final/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll
new file mode 100644
index 00000000000..dd382cfcb24
--- /dev/null
+++ b/final/test/CodeGen/Generic/2002-04-14-UnexpectedUnsignedType.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s
+
+; This caused the backend to assert out with:
+; SparcInstrInfo.cpp:103: failed assertion `0 && "Unexpected unsigned type"'
+;
+
+declare void @bar(i8*)
+
+define void @foo() {
+        %cast225 = inttoptr i64 123456 to i8*           ; <i8*> [#uses=1]
+        call void @bar( i8* %cast225 )
+        ret void
+}
diff --git a/final/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll b/final/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll
new file mode 100644
index 00000000000..751ed407456
--- /dev/null
+++ b/final/test/CodeGen/Generic/2002-04-16-StackFrameSizeAlignment.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s
+
+; Compiling this file produces:
+; Sparc.cpp:91: failed assertion `(offset - OFFSET) % getStackFrameSizeAlignment() == 0'
+;
+declare i32 @SIM(i8*, i8*, i32, i32, i32, [256 x i32]*, i32, i32, i32)
+
+define void @foo() {
+bb0:
+        %V = alloca [256 x i32], i32 256                ; <[256 x i32]*> [#uses=1]
+        call i32 @SIM( i8* null, i8* null, i32 0, i32 0, i32 0, [256 x i32]* %V, i32 0, i32 0, i32 2 )          ; <i32>:0 [#uses=0]
+        ret void
+}
+
diff --git a/final/test/CodeGen/Generic/2003-05-27-phifcmpd.ll b/final/test/CodeGen/Generic/2003-05-27-phifcmpd.ll
new file mode 100644
index 00000000000..6fb17991e73
--- /dev/null
+++ b/final/test/CodeGen/Generic/2003-05-27-phifcmpd.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s
+
+define void @QRiterate(i32 %p.1, double %tmp.212) {
+entry:
+        %tmp.184 = icmp sgt i32 %p.1, 0         ; <i1> [#uses=1]
+        br i1 %tmp.184, label %shortcirc_next.1, label %shortcirc_done.1
+
+shortcirc_next.1:               ; preds = %shortcirc_done.1, %entry
+        %tmp.213 = fcmp une double %tmp.212, 0.000000e+00               ; <i1> [#uses=1]
+        br label %shortcirc_done.1
+
+shortcirc_done.1:               ; preds = %shortcirc_next.1, %entry
+        %val.1 = phi i1 [ false, %entry ], [ %tmp.213, %shortcirc_next.1 ]              ; <i1> [#uses=1]
+        br i1 %val.1, label %shortcirc_next.1, label %exit.1
+
+exit.1:         ; preds = %shortcirc_done.1
+        ret void
+}
+
diff --git a/final/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll b/final/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll
new file mode 100644
index 00000000000..14bb00048d2
--- /dev/null
+++ b/final/test/CodeGen/Generic/2003-05-27-useboolinotherbb.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s
+
+define void @QRiterate(double %tmp.212) {
+        %tmp.213 = fcmp une double %tmp.212, 0.000000e+00               ; <i1> [#uses=1]
+        br label %shortcirc_next.1
+
+shortcirc_next.1:               ; preds = %shortcirc_next.1, %0
+        br i1 %tmp.213, label %shortcirc_next.1, label %exit.1
+
+exit.1:         ; preds = %shortcirc_next.1
+        ret void
+}
+
diff --git a/final/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll b/final/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll
new file mode 100644
index 00000000000..cc0eb5cd137
--- /dev/null
+++ b/final/test/CodeGen/Generic/2003-05-27-usefsubasbool.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s
+
+define void @QRiterate(double %tmp.212) {
+entry:
+        br label %shortcirc_next.1
+
+shortcirc_next.1:               ; preds = %shortcirc_next.1, %entry
+        %tmp.213 = fcmp une double %tmp.212, 0.000000e+00               ; <i1> [#uses=1]
+        br i1 %tmp.213, label %shortcirc_next.1, label %exit.1
+
+exit.1:         ; preds = %shortcirc_next.1
+        ret void
+}
+
diff --git a/final/test/CodeGen/Generic/2003-05-28-ManyArgs.ll b/final/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
new file mode 100644
index 00000000000..c6fbdaef829
--- /dev/null
+++ b/final/test/CodeGen/Generic/2003-05-28-ManyArgs.ll
@@ -0,0 +1,153 @@
+; RUN: llc < %s
+
+;; Date:     May 28, 2003.
+;; From:     test/Programs/External/SPEC/CINT2000/175.vpr.llvm.bc
+;; Function: int %main(int %argc.1, sbyte** %argv.1)
+;;
+;; Error:    A function call with about 56 arguments causes an assertion failure
+;;           in llc because the register allocator cannot find a register
+;;           not used explicitly by the call instruction.
+;;
+;; Cause:    Regalloc was not keeping track of free registers correctly.
+;;           It was counting the registers allocated to all outgoing arguments,
+;;           even though most of those are copied to the stack (so those
+;;           registers are not actually used by the call instruction).
+;;
+;; Fixed:    By rewriting selection and allocation so that selection explicitly
+;;           inserts all copy operations required for passing arguments and
+;;           for the return value of a call, copying to/from registers
+;;           and/or to stack locations as needed.
+;;
+	%struct..s_annealing_sched = type { i32, float, float, float, float }
+	%struct..s_chan = type { i32, float, float, float, float }
+	%struct..s_det_routing_arch = type { i32, float, float, float, i32, i32, i16, i16, i16, float, float }
+	%struct..s_placer_opts = type { i32, float, i32, i32, i8*, i32, i32 }
+	%struct..s_router_opts = type { float, float, float, float, float, i32, i32, i32, i32 }
+	%struct..s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }
+	%struct..s_switch_inf = type { i32, float, float, float, float }
+
+define i32 @main(i32 %argc.1, i8** %argv.1) {
+entry:
+	%net_file = alloca [300 x i8]		; <[300 x i8]*> [#uses=1]
+	%place_file = alloca [300 x i8]		; <[300 x i8]*> [#uses=1]
+	%arch_file = alloca [300 x i8]		; <[300 x i8]*> [#uses=1]
+	%route_file = alloca [300 x i8]		; <[300 x i8]*> [#uses=1]
+	%full_stats = alloca i32		; <i32*> [#uses=1]
+	%operation = alloca i32		; <i32*> [#uses=1]
+	%verify_binary_search = alloca i32		; <i32*> [#uses=1]
+	%show_graphics = alloca i32		; <i32*> [#uses=1]
+	%annealing_sched = alloca %struct..s_annealing_sched		; <%struct..s_annealing_sched*> [#uses=5]
+	%placer_opts = alloca %struct..s_placer_opts		; <%struct..s_placer_opts*> [#uses=7]
+	%router_opts = alloca %struct..s_router_opts		; <%struct..s_router_opts*> [#uses=9]
+	%det_routing_arch = alloca %struct..s_det_routing_arch		; <%struct..s_det_routing_arch*> [#uses=11]
+	%segment_inf = alloca %struct..s_segment_inf*		; <%struct..s_segment_inf**> [#uses=1]
+	%timing_inf = alloca { i32, float, float, float, float, float, float, float, float, float, float }		; <{ i32, float, float, float, float, float, float, float, float, float, float }*> [#uses=11]
+	%tmp.101 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 4		; <i8**> [#uses=1]
+	%tmp.105 = getelementptr [300 x i8]* %net_file, i64 0, i64 0		; <i8*> [#uses=1]
+	%tmp.106 = getelementptr [300 x i8]* %arch_file, i64 0, i64 0		; <i8*> [#uses=1]
+	%tmp.107 = getelementptr [300 x i8]* %place_file, i64 0, i64 0		; <i8*> [#uses=1]
+	%tmp.108 = getelementptr [300 x i8]* %route_file, i64 0, i64 0		; <i8*> [#uses=1]
+	%tmp.109 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 0		; <i32*> [#uses=1]
+	%tmp.112 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 0		; <i32*> [#uses=1]
+	%tmp.114 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 6		; <i32*> [#uses=1]
+	%tmp.118 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 7		; <i32*> [#uses=1]
+	%tmp.135 = load i32* %operation		; <i32> [#uses=1]
+	%tmp.137 = load i32* %tmp.112		; <i32> [#uses=1]
+	%tmp.138 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 1		; <float*> [#uses=1]
+	%tmp.139 = load float* %tmp.138		; <float> [#uses=1]
+	%tmp.140 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 2		; <i32*> [#uses=1]
+	%tmp.141 = load i32* %tmp.140		; <i32> [#uses=1]
+	%tmp.142 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 3		; <i32*> [#uses=1]
+	%tmp.143 = load i32* %tmp.142		; <i32> [#uses=1]
+	%tmp.145 = load i8** %tmp.101		; <i8*> [#uses=1]
+	%tmp.146 = getelementptr %struct..s_placer_opts* %placer_opts, i64 0, i32 5		; <i32*> [#uses=1]
+	%tmp.147 = load i32* %tmp.146		; <i32> [#uses=1]
+	%tmp.149 = load i32* %tmp.114		; <i32> [#uses=1]
+	%tmp.154 = load i32* %full_stats		; <i32> [#uses=1]
+	%tmp.155 = load i32* %verify_binary_search		; <i32> [#uses=1]
+	%tmp.156 = getelementptr %struct..s_annealing_sched* %annealing_sched, i64 0, i32 0		; <i32*> [#uses=1]
+	%tmp.157 = load i32* %tmp.156		; <i32> [#uses=1]
+	%tmp.158 = getelementptr %struct..s_annealing_sched* %annealing_sched, i64 0, i32 1		; <float*> [#uses=1]
+	%tmp.159 = load float* %tmp.158		; <float> [#uses=1]
+	%tmp.160 = getelementptr %struct..s_annealing_sched* %annealing_sched, i64 0, i32 2		; <float*> [#uses=1]
+	%tmp.161 = load float* %tmp.160		; <float> [#uses=1]
+	%tmp.162 = getelementptr %struct..s_annealing_sched* %annealing_sched, i64 0, i32 3		; <float*> [#uses=1]
+	%tmp.163 = load float* %tmp.162		; <float> [#uses=1]
+	%tmp.164 = getelementptr %struct..s_annealing_sched* %annealing_sched, i64 0, i32 4		; <float*> [#uses=1]
+	%tmp.165 = load float* %tmp.164		; <float> [#uses=1]
+	%tmp.166 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 0		; <float*> [#uses=1]
+	%tmp.167 = load float* %tmp.166		; <float> [#uses=1]
+	%tmp.168 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 1		; <float*> [#uses=1]
+	%tmp.169 = load float* %tmp.168		; <float> [#uses=1]
+	%tmp.170 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 2		; <float*> [#uses=1]
+	%tmp.171 = load float* %tmp.170		; <float> [#uses=1]
+	%tmp.172 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 3		; <float*> [#uses=1]
+	%tmp.173 = load float* %tmp.172		; <float> [#uses=1]
+	%tmp.174 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 4		; <float*> [#uses=1]
+	%tmp.175 = load float* %tmp.174		; <float> [#uses=1]
+	%tmp.176 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 5		; <i32*> [#uses=1]
+	%tmp.177 = load i32* %tmp.176		; <i32> [#uses=1]
+	%tmp.178 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 6		; <i32*> [#uses=1]
+	%tmp.179 = load i32* %tmp.178		; <i32> [#uses=1]
+	%tmp.181 = load i32* %tmp.118		; <i32> [#uses=1]
+	%tmp.182 = getelementptr %struct..s_router_opts* %router_opts, i64 0, i32 8		; <i32*> [#uses=1]
+	%tmp.183 = load i32* %tmp.182		; <i32> [#uses=1]
+	%tmp.184 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 0		; <i32*> [#uses=1]
+	%tmp.185 = load i32* %tmp.184		; <i32> [#uses=1]
+	%tmp.186 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 1		; <float*> [#uses=1]
+	%tmp.187 = load float* %tmp.186		; <float> [#uses=1]
+	%tmp.188 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 2		; <float*> [#uses=1]
+	%tmp.189 = load float* %tmp.188		; <float> [#uses=1]
+	%tmp.190 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 3		; <float*> [#uses=1]
+	%tmp.191 = load float* %tmp.190		; <float> [#uses=1]
+	%tmp.192 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 4		; <i32*> [#uses=1]
+	%tmp.193 = load i32* %tmp.192		; <i32> [#uses=1]
+	%tmp.194 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 5		; <i32*> [#uses=1]
+	%tmp.195 = load i32* %tmp.194		; <i32> [#uses=1]
+	%tmp.196 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 6		; <i16*> [#uses=1]
+	%tmp.197 = load i16* %tmp.196		; <i16> [#uses=1]
+	%tmp.198 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 7		; <i16*> [#uses=1]
+	%tmp.199 = load i16* %tmp.198		; <i16> [#uses=1]
+	%tmp.200 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 8		; <i16*> [#uses=1]
+	%tmp.201 = load i16* %tmp.200		; <i16> [#uses=1]
+	%tmp.202 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 9		; <float*> [#uses=1]
+	%tmp.203 = load float* %tmp.202		; <float> [#uses=1]
+	%tmp.204 = getelementptr %struct..s_det_routing_arch* %det_routing_arch, i64 0, i32 10		; <float*> [#uses=1]
+	%tmp.205 = load float* %tmp.204		; <float> [#uses=1]
+	%tmp.206 = load %struct..s_segment_inf** %segment_inf		; <%struct..s_segment_inf*> [#uses=1]
+	%tmp.208 = load i32* %tmp.109		; <i32> [#uses=1]
+	%tmp.209 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 1		; <float*> [#uses=1]
+	%tmp.210 = load float* %tmp.209		; <float> [#uses=1]
+	%tmp.211 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 2		; <float*> [#uses=1]
+	%tmp.212 = load float* %tmp.211		; <float> [#uses=1]
+	%tmp.213 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 3		; <float*> [#uses=1]
+	%tmp.214 = load float* %tmp.213		; <float> [#uses=1]
+	%tmp.215 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 4		; <float*> [#uses=1]
+	%tmp.216 = load float* %tmp.215		; <float> [#uses=1]
+	%tmp.217 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 5		; <float*> [#uses=1]
+	%tmp.218 = load float* %tmp.217		; <float> [#uses=1]
+	%tmp.219 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 6		; <float*> [#uses=1]
+	%tmp.220 = load float* %tmp.219		; <float> [#uses=1]
+	%tmp.221 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 7		; <float*> [#uses=1]
+	%tmp.222 = load float* %tmp.221		; <float> [#uses=1]
+	%tmp.223 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 8		; <float*> [#uses=1]
+	%tmp.224 = load float* %tmp.223		; <float> [#uses=1]
+	%tmp.225 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 9		; <float*> [#uses=1]
+	%tmp.226 = load float* %tmp.225		; <float> [#uses=1]
+	%tmp.227 = getelementptr { i32, float, float, float, float, float, float, float, float, float, float }* %timing_inf, i64 0, i32 10		; <float*> [#uses=1]
+	%tmp.228 = load float* %tmp.227		; <float> [#uses=1]
+	call void @place_and_route( i32 %tmp.135, i32 %tmp.137, float %tmp.139, i32 %tmp.141, i32 %tmp.143, i8* %tmp.145, i32 %tmp.147, i32 %tmp.149, i8* %tmp.107, i8* %tmp.105, i8* %tmp.106, i8* %tmp.108, i32 %tmp.154, i32 %tmp.155, i32 %tmp.157, float %tmp.159, float %tmp.161, float %tmp.163, float %tmp.165, float %tmp.167, float %tmp.169, float %tmp.171, float %tmp.173, float %tmp.175, i32 %tmp.177, i32 %tmp.179, i32 %tmp.181, i32 %tmp.183, i32 %tmp.185, float %tmp.187, float %tmp.189, float %tmp.191, i32 %tmp.193, i32 %tmp.195, i16 %tmp.197, i16 %tmp.199, i16 %tmp.201, float %tmp.203, float %tmp.205, %struct..s_segment_inf* %tmp.206, i32 %tmp.208, float %tmp.210, float %tmp.212, float %tmp.214, float %tmp.216, float %tmp.218, float %tmp.220, float %tmp.222, float %tmp.224, float %tmp.226, float %tmp.228 )
+	%tmp.231 = load i32* %show_graphics		; <i32> [#uses=1]
+	%tmp.232 = icmp ne i32 %tmp.231, 0		; <i1> [#uses=1]
+	br i1 %tmp.232, label %then.2, label %endif.2
+
+then.2:		; preds = %entry
+	br label %endif.2
+
+endif.2:		; preds = %then.2, %entry
+	ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
+
+declare void @place_and_route(i32, i32, float, i32, i32, i8*, i32, i32, i8*, i8*, i8*, i8*, i32, i32, i32, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, float, float, float, i32, i32, i16, i16, i16, float, float, %struct..s_segment_inf*, i32, float, float, float, float, float, float, float, float, float, float)
diff --git a/final/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll b/final/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
new file mode 100644
index 00000000000..10d3a11a519
--- /dev/null
+++ b/final/test/CodeGen/Generic/2003-05-30-BadFoldGEP.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s
+
+;; Date:     May 28, 2003.
+;; From:     test/Programs/External/SPEC/CINT2000/254.gap.llvm.bc
+;; Function: int %OpenOutput(sbyte* %filename.1)
+;;
+;; Error:    A sequence of GEPs is folded incorrectly by llc during selection
+;;	     causing an assertion about a dynamic casting error.
+;;	     This code sequence was produced (correctly) by preselection
+;;	     from a nested pair of ConstantExpr getelementptrs.
+;;	     The code below is the output of preselection.
+;;	     The original ConstantExprs are included in a comment.
+;;
+;; Cause:    FoldGetElemChain() was inserting an extra leading 0 even though
+;;	     the first instruction in the sequence contributes no indices.
+;;	     The next instruction contributes a leading non-zero so another
+;;	     zero should not be added before it!
+;;
+        %FileType = type { i32, [256 x i8], i32, i32, i32, i32 }
+@OutputFiles = external global [16 x %FileType]         ; <[16 x %FileType]*> [#uses=1]
+@Output = internal global %FileType* null               ; <%FileType**> [#uses=1]
+
+define internal i32 @OpenOutput(i8* %filename.1) {
+entry:
+        %tmp.0 = load %FileType** @Output               ; <%FileType*> [#uses=1]
+        %tmp.4 = getelementptr %FileType* %tmp.0, i64 1         ; <%FileType*> [#uses=1]
+        %addrOfGlobal = getelementptr [16 x %FileType]* @OutputFiles, i64 0             ; <[16 x %FileType]*> [#uses=1]
+        %constantGEP = getelementptr [16 x %FileType]* %addrOfGlobal, i64 1             ; <[16 x %FileType]*> [#uses=1]
+        %constantGEP.upgrd.1 = getelementptr [16 x %FileType]* %constantGEP, i64 0, i64 0               ; <%FileType*> [#uses=1]
+        %tmp.10 = icmp eq %FileType* %tmp.4, %constantGEP.upgrd.1               ; <i1> [#uses=1]
+        br i1 %tmp.10, label %return, label %endif.0
+
+endif.0:                ; preds = %entry
+        ret i32 0
+
+return:         ; preds = %entry
+        ret i32 1
+}
+
diff --git a/final/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll b/final/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll
new file mode 100644
index 00000000000..f7c3e42dc48
--- /dev/null
+++ b/final/test/CodeGen/Generic/2003-05-30-BadPreselectPhi.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s
+
+;; Date:     May 28, 2003.
+;; From:     test/Programs/SingleSource/richards_benchmark.c
+;; Function: struct task *handlerfn(struct packet *pkt)
+;;
+;; Error:    PreSelection puts the arguments of the Phi just before
+;;           the Phi instead of in predecessor blocks.  This later
+;;           causes llc to produces an invalid register <NULL VALUE>
+;;           for the phi arguments.
+
+        %struct..packet = type { %struct..packet*, i32, i32, i32, [4 x i8] }
+        %struct..task = type { %struct..task*, i32, i32, %struct..packet*, i32, %struct..task* (%struct..packet*)*, i32, i32 }
+@v1 = external global i32               ; <i32*> [#uses=1]
+@v2 = external global i32               ; <i32*> [#uses=1]
+
+define %struct..task* @handlerfn(%struct..packet* %pkt.2) {
+entry:
+        %tmp.1 = icmp ne %struct..packet* %pkt.2, null          ; <i1> [#uses=1]
+        br i1 %tmp.1, label %cond_false, label %cond_continue
+
+cond_false:             ; preds = %entry
+        br label %cond_continue
+
+cond_continue:          ; preds = %cond_false, %entry
+        %mem_tmp.0 = phi i32* [ @v2, %cond_false ], [ @v1, %entry ]             ; <i32*> [#uses=1]
+        %tmp.12 = bitcast i32* %mem_tmp.0 to %struct..packet*           ; <%struct..packet*> [#uses=1]
+        call void @append( %struct..packet* %pkt.2, %struct..packet* %tmp.12 )
+        ret %struct..task* null
+}
+
+declare void @append(%struct..packet*, %struct..packet*)
+
diff --git a/final/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll b/final/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
new file mode 100644
index 00000000000..1d1aad5f27e
--- /dev/null
+++ b/final/test/CodeGen/Generic/2003-07-06-BadIntCmp.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s
+
+;; Date: May 28, 2003.
+;; From: test/Programs/MultiSource/Olden-perimeter/maketree.c
+;; Function: int CheckOutside(int x, int y)
+;; 
+;; Note: The .ll code below for this regression test has identical
+;;	 behavior to the above function up to the error, but then prints
+;; 	 true/false on the two branches.
+;; 
+;; Error: llc generates a branch-on-xcc instead of branch-on-icc, which
+;;        is wrong because the value being compared (int euclid = x*x + y*y)
+;;	  overflows, so that the 64-bit and 32-bit compares are not equal.
+
+@.str_1 = internal constant [6 x i8] c"true\0A\00"              ; <[6 x i8]*> [#uses=1]
+@.str_2 = internal constant [7 x i8] c"false\0A\00"             ; <[7 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define internal void @__main() {
+entry:
+        ret void
+}
+
+define internal void @CheckOutside(i32 %x.1, i32 %y.1) {
+entry:
+        %tmp.2 = mul i32 %x.1, %x.1             ; <i32> [#uses=1]
+        %tmp.5 = mul i32 %y.1, %y.1             ; <i32> [#uses=1]
+        %tmp.6 = add i32 %tmp.2, %tmp.5         ; <i32> [#uses=1]
+        %tmp.8 = icmp sle i32 %tmp.6, 4194304           ; <i1> [#uses=1]
+        br i1 %tmp.8, label %then, label %else
+
+then:           ; preds = %entry
+        %tmp.11 = call i32 (i8*, ...)* @printf( i8* getelementptr ([6 x i8]* @.str_1, i64 0, i64 0) )           ; <i32> [#uses=0]
+        br label %UnifiedExitNode
+
+else:           ; preds = %entry
+        %tmp.13 = call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @.str_2, i64 0, i64 0) )           ; <i32> [#uses=0]
+        br label %UnifiedExitNode
+
+UnifiedExitNode:                ; preds = %else, %then
+        ret void
+}
+
+define i32 @main() {
+entry:
+        call void @__main( )
+        call void @CheckOutside( i32 2097152, i32 2097152 )
+        ret i32 0
+}
+
diff --git a/final/test/CodeGen/Generic/2003-07-07-BadLongConst.ll b/final/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
new file mode 100644
index 00000000000..64312ba09a5
--- /dev/null
+++ b/final/test/CodeGen/Generic/2003-07-07-BadLongConst.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s
+
+@.str_1 = internal constant [42 x i8] c"   ui = %u (0x%x)\09\09UL-ui = %lld (0x%llx)\0A\00"             ; <[42 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define internal i64 @getL() {
+entry:
+        ret i64 -5787213826675591005
+}
+
+define i32 @main(i32 %argc.1, i8** %argv.1) {
+entry:
+        %tmp.11 = call i64 @getL( )             ; <i64> [#uses=2]
+        %tmp.5 = trunc i64 %tmp.11 to i32               ; <i32> [#uses=2]
+        %tmp.23 = and i64 %tmp.11, -4294967296          ; <i64> [#uses=2]
+        %tmp.16 = call i32 (i8*, ...)* @printf( i8* getelementptr ([42 x i8]* @.str_1, i64 0, i64 0), i32 %tmp.5, i32 %tmp.5, i64 %tmp.23, i64 %tmp.23 )              ; <i32> [#uses=0]
+        ret i32 0
+}
+
diff --git a/final/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll b/final/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
new file mode 100644
index 00000000000..8019caa832d
--- /dev/null
+++ b/final/test/CodeGen/Generic/2003-07-08-BadCastToBool.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s
+
+;; Date:     Jul 8, 2003.
+;; From:     test/Programs/MultiSource/Olden-perimeter
+;; Function: int %adj(uint %d.1, uint %ct.1)
+;;
+;; Errors: (1) cast-int-to-bool was being treated as a NOP (i.e., the int
+;;	       register was treated as effectively true if non-zero).
+;;	       This cannot be used for later boolean operations.
+;;	   (2) (A or NOT(B)) was being folded into A orn B, which is ok
+;;	       for bitwise operations but not booleans!  For booleans,
+;;	       the result has to be compared with 0.
+
+@.str_1 = internal constant [30 x i8] c"d = %d, ct = %d, d ^ ct = %d\0A\00"
+
+declare i32 @printf(i8*, ...)
+
+define i32 @adj(i32 %d.1, i32 %ct.1) {
+entry:
+        %tmp.19 = icmp eq i32 %ct.1, 2          ; <i1> [#uses=1]
+        %tmp.22.not = trunc i32 %ct.1 to i1              ; <i1> [#uses=1]
+        %tmp.221 = xor i1 %tmp.22.not, true             ; <i1> [#uses=1]
+        %tmp.26 = or i1 %tmp.19, %tmp.221               ; <i1> [#uses=1]
+        %tmp.27 = zext i1 %tmp.26 to i32                ; <i32> [#uses=1]
+        ret i32 %tmp.27
+}
+
+define i32 @main() {
+entry:
+        %result = call i32 @adj( i32 3, i32 2 )         ; <i32> [#uses=1]
+        %tmp.0 = call i32 (i8*, ...)* @printf( i8* getelementptr ([30 x i8]* @.str_1, i64 0, i64 0), i32 3, i32 2, i32 %result )              ; <i32> [#uses=0]
+        ret i32 0
+}
+
diff --git a/final/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll b/final/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
new file mode 100644
index 00000000000..4e6fe1cf8bf
--- /dev/null
+++ b/final/test/CodeGen/Generic/2003-07-29-BadConstSbyte.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s
+
+;; Date:     Jul 29, 2003.
+;; From:     test/Programs/MultiSource/Ptrdist-bc
+;; Function: ---
+;; Global:   %yy_ec = internal constant [256 x sbyte] ...
+;;           A subset of this array is used in the test below.
+;;
+;; Error:    Character '\07' was being emitted as '\a', at yy_ec[38].
+;;	     When loaded, this returned the value 97 ('a'), instead of 7.
+;; 
+;; Incorrect LLC Output for the array yy_ec was:
+;; yy_ec_1094:
+;; 	.ascii	"\000\001\001\001\001\001\001\001\001\002\003\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\002\004\005\001\001\006\a\001\b\t\n\v\f\r\016\017\020\020\020\020\020\020\020\020\020\020\001\021\022\023\024\001\001\025\025\025\025\025\025\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\026\027\030\031\032\001\033\034\035\036\037 !\"#$%&'()*+,-./$0$1$234\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001\001"
+;;
+
+@yy_ec = internal constant [6 x i8] c"\06\07\01\08\01\09"               ; <[6 x i8]*> [#uses=1]
+@.str_3 = internal constant [8 x i8] c"[%d] = \00"              ; <[8 x i8]*> [#uses=1]
+@.str_4 = internal constant [4 x i8] c"%d\0A\00"                ; <[4 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+entry:
+        br label %loopentry
+
+loopentry:              ; preds = %loopentry, %entry
+        %i = phi i64 [ 0, %entry ], [ %inc.i, %loopentry ]              ; <i64> [#uses=3]
+        %cptr = getelementptr [6 x i8]* @yy_ec, i64 0, i64 %i           ; <i8*> [#uses=1]
+        %c = load i8* %cptr             ; <i8> [#uses=1]
+        %ignore = call i32 (i8*, ...)* @printf( i8* getelementptr ([8 x i8]* @.str_3, i64 0, i64 0), i64 %i )        ; <i32> [#uses=0]
+        %ignore2 = call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @.str_4, i64 0, i64 0), i8 %c )        ; <i32> [#uses=0]
+        %inc.i = add i64 %i, 1          ; <i64> [#uses=2]
+        %done = icmp sle i64 %inc.i, 5          ; <i1> [#uses=1]
+        br i1 %done, label %loopentry, label %exit.1
+
+exit.1:         ; preds = %loopentry
+        ret i32 0
+}
+
diff --git a/final/test/CodeGen/Generic/2004-02-08-UnwindSupport.ll b/final/test/CodeGen/Generic/2004-02-08-UnwindSupport.ll
new file mode 100644
index 00000000000..393062abf78
--- /dev/null
+++ b/final/test/CodeGen/Generic/2004-02-08-UnwindSupport.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -enable-correct-eh-support
+
+define i32 @test() {
+        unwind
+}
+
+define i32 @main() {
+        %X = invoke i32 @test( )
+                        to label %cont unwind label %EH         ; <i32> [#uses=0]
+
+cont:           ; preds = %0
+        ret i32 1
+
+EH:             ; preds = %0
+        ret i32 0
+}
+
diff --git a/final/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll b/final/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
new file mode 100644
index 00000000000..d4a4cf88ce0
--- /dev/null
+++ b/final/test/CodeGen/Generic/2004-05-09-LiveVarPartialRegister.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s
+@global_long_1 = linkonce global i64 7          ; <i64*> [#uses=1]
+@global_long_2 = linkonce global i64 49         ; <i64*> [#uses=1]
+
+define i32 @main() {
+        %l1 = load i64* @global_long_1          ; <i64> [#uses=1]
+        %l2 = load i64* @global_long_2          ; <i64> [#uses=1]
+        %cond = icmp sle i64 %l1, %l2           ; <i1> [#uses=1]
+        %cast2 = zext i1 %cond to i32           ; <i32> [#uses=1]
+        %RV = sub i32 1, %cast2         ; <i32> [#uses=1]
+        ret i32 %RV
+}
+
diff --git a/final/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll b/final/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll
new file mode 100644
index 00000000000..7fd23612fb5
--- /dev/null
+++ b/final/test/CodeGen/Generic/2005-01-18-SetUO-InfLoop.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s
+
+define void @intersect_pixel() {
+entry:
+        %tmp125 = fcmp uno double 0.000000e+00, 0.000000e+00            ; <i1> [#uses=1]
+        %tmp126 = or i1 %tmp125, false          ; <i1> [#uses=1]
+        %tmp126.not = xor i1 %tmp126, true              ; <i1> [#uses=1]
+        %brmerge1 = or i1 %tmp126.not, false            ; <i1> [#uses=1]
+        br i1 %brmerge1, label %bb154, label %cond_false133
+
+cond_false133:          ; preds = %entry
+        ret void
+
+bb154:          ; preds = %entry
+        %tmp164 = icmp eq i32 0, 0              ; <i1> [#uses=0]
+        ret void
+}
+
+declare i1 @llvm.isunordered.f64(double, double)
+
diff --git a/final/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll b/final/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll
new file mode 100644
index 00000000000..353e411b088
--- /dev/null
+++ b/final/test/CodeGen/Generic/2005-04-09-GlobalInPHI.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s 
+        %struct.TypHeader = type { i32, %struct.TypHeader**, [3 x i8], i8 }
+@.str_67 = external global [4 x i8]             ; <[4 x i8]*> [#uses=1]
+@.str_87 = external global [17 x i8]            ; <[17 x i8]*> [#uses=1]
+
+define void @PrBinop() {
+entry:
+        br i1 false, label %cond_true, label %else.0
+
+cond_true:              ; preds = %entry
+        br label %else.0
+
+else.0:         ; preds = %cond_true, %entry
+        %tmp.167.1 = phi i32 [ ptrtoint ([17 x i8]* @.str_87 to i32), %entry ], [ 0, %cond_true ]               ; <i32> [#uses=0]
+        call void @Pr( i8* getelementptr ([4 x i8]* @.str_67, i32 0, i32 0), i32 0, i32 0 )
+        ret void
+}
+
+declare void @Pr(i8*, i32, i32)
+
diff --git a/final/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll b/final/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
new file mode 100644
index 00000000000..733202c8a96
--- /dev/null
+++ b/final/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s
+; Test that llvm.memcpy works with a i64 length operand on all targets.
+
+declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
+
+define void @l12_l94_bc_divide_endif_2E_3_2E_ce() {
+newFuncRoot:
+        tail call void @llvm.memcpy.i64( i8* null, i8* null, i64 0, i32 1 )
+        unreachable
+}
+
diff --git a/final/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll b/final/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll
new file mode 100644
index 00000000000..08060bf3d6f
--- /dev/null
+++ b/final/test/CodeGen/Generic/2005-10-18-ZeroSizeStackObject.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s
+
+define void @test() {
+        %X = alloca {  }                ; <{  }*> [#uses=0]
+        ret void
+}
diff --git a/final/test/CodeGen/Generic/2005-10-21-longlonggtu.ll b/final/test/CodeGen/Generic/2005-10-21-longlonggtu.ll
new file mode 100644
index 00000000000..53a9cd0f265
--- /dev/null
+++ b/final/test/CodeGen/Generic/2005-10-21-longlonggtu.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s
+
+define float @t(i64 %u_arg) {
+        %u = bitcast i64 %u_arg to i64          ; <i64> [#uses=1]
+        %tmp5 = add i64 %u, 9007199254740991            ; <i64> [#uses=1]
+        %tmp = icmp ugt i64 %tmp5, 18014398509481982            ; <i1> [#uses=1]
+        br i1 %tmp, label %T, label %F
+
+T:              ; preds = %0
+        ret float 1.000000e+00
+
+F:              ; preds = %0
+        call float @t( i64 0 )          ; <float>:1 [#uses=0]
+        ret float 0.000000e+00
+}
+
diff --git a/final/test/CodeGen/Generic/2005-12-01-Crash.ll b/final/test/CodeGen/Generic/2005-12-01-Crash.ll
new file mode 100644
index 00000000000..a9eeddedc54
--- /dev/null
+++ b/final/test/CodeGen/Generic/2005-12-01-Crash.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s
+@str = external global [36 x i8]		; <[36 x i8]*> [#uses=0]
+@str.upgrd.1 = external global [29 x i8]		; <[29 x i8]*> [#uses=0]
+@str1 = external global [29 x i8]		; <[29 x i8]*> [#uses=0]
+@str2 = external global [29 x i8]		; <[29 x i8]*> [#uses=1]
+@str.upgrd.2 = external global [2 x i8]		; <[2 x i8]*> [#uses=0]
+@str3 = external global [2 x i8]		; <[2 x i8]*> [#uses=0]
+@str4 = external global [2 x i8]		; <[2 x i8]*> [#uses=0]
+@str5 = external global [2 x i8]		; <[2 x i8]*> [#uses=0]
+
+define void @printArgsNoRet(i32 %a1, float %a2, i8 %a3, double %a4, i8* %a5, i32 %a6, float %a7, i8 %a8, double %a9, i8* %a10, i32 %a11, float %a12, i8 %a13, double %a14, i8* %a15) {
+entry:
+	%tmp17 = sext i8 %a13 to i32		; <i32> [#uses=1]
+	%tmp23 = call i32 (i8*, ...)* @printf( i8* getelementptr ([29 x i8]* @str2, i32 0, i64 0), i32 %a11, double 0.000000e+00, i32 %tmp17, double %a14, i32 0 )		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @printf(i8*, ...)
+
+declare i32 @main(i32, i8**)
diff --git a/final/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll b/final/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll
new file mode 100644
index 00000000000..349540fb384
--- /dev/null
+++ b/final/test/CodeGen/Generic/2005-12-12-ExpandSextInreg.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s
+
+define i64 @test(i64 %A) {
+        %B = trunc i64 %A to i8         ; <i8> [#uses=1]
+        %C = sext i8 %B to i64          ; <i64> [#uses=1]
+        ret i64 %C
+}
diff --git a/final/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll b/final/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll
new file mode 100644
index 00000000000..42e8ed02ca5
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-01-12-BadSetCCFold.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s
+; ModuleID = '2006-01-12-BadSetCCFold.ll'
+	%struct.node_t = type { double*, %struct.node_t*, %struct.node_t**, double**, double*, i32, i32 }
+
+define void @main() {
+entry:
+	br i1 false, label %then.2.i, label %endif.2.i
+
+then.2.i:		; preds = %entry
+	br label %dealwithargs.exit
+
+endif.2.i:		; preds = %entry
+	br i1 false, label %then.3.i, label %dealwithargs.exit
+
+then.3.i:		; preds = %endif.2.i
+	br label %dealwithargs.exit
+
+dealwithargs.exit:		; preds = %then.3.i, %endif.2.i, %then.2.i
+	%n_nodes.4 = phi i32 [ 64, %then.3.i ], [ 64, %then.2.i ], [ 64, %endif.2.i ]		; <i32> [#uses=1]
+	%tmp.14.i1134.i.i = icmp sgt i32 %n_nodes.4, 1		; <i1> [#uses=2]
+	br i1 %tmp.14.i1134.i.i, label %no_exit.i12.i.i, label %fill_table.exit22.i.i
+
+no_exit.i12.i.i:		; preds = %no_exit.i12.i.i, %dealwithargs.exit
+	br i1 false, label %fill_table.exit22.i.i, label %no_exit.i12.i.i
+
+fill_table.exit22.i.i:		; preds = %no_exit.i12.i.i, %dealwithargs.exit
+	%cur_node.0.i8.1.i.i = phi %struct.node_t* [ undef, %dealwithargs.exit ], [ null, %no_exit.i12.i.i ]		; <%struct.node_t*> [#uses=0]
+	br i1 %tmp.14.i1134.i.i, label %no_exit.i.preheader.i.i, label %make_tables.exit.i
+
+no_exit.i.preheader.i.i:		; preds = %fill_table.exit22.i.i
+	ret void
+
+make_tables.exit.i:		; preds = %fill_table.exit22.i.i
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll b/final/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll
new file mode 100644
index 00000000000..f06d3412a9d
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-01-18-InvalidBranchOpcodeAssert.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s
+; This crashed the PPC backend.
+
+define void @test() {
+        %tmp125 = fcmp uno double 0.000000e+00, 0.000000e+00            ; <i1> [#uses=1]
+        br i1 %tmp125, label %bb154, label %cond_false133
+
+cond_false133:          ; preds = %0
+        ret void
+
+bb154:          ; preds = %0
+        %tmp164 = icmp eq i32 0, 0              ; <i1> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll b/final/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
new file mode 100644
index 00000000000..5508272b555
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-02-12-InsertLibcall.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s
+@G = external global i32		; <i32*> [#uses=1]
+
+define void @encode_one_frame(i64 %tmp.2i) {
+entry:
+	%tmp.9 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp.9, label %endif.0, label %shortcirc_next.0
+
+then.5.i:		; preds = %shortcirc_next.i
+	%tmp.114.i = sdiv i64 %tmp.2i, 3		; <i64> [#uses=1]
+	%tmp.111.i = call i64 @lseek( i32 0, i64 %tmp.114.i, i32 1 )		; <i64> [#uses=0]
+	ret void
+
+shortcirc_next.0:		; preds = %entry
+	ret void
+
+endif.0:		; preds = %entry
+	%tmp.324.i = icmp eq i32 0, 0		; <i1> [#uses=2]
+	%tmp.362.i = icmp slt i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp.324.i, label %else.4.i, label %then.11.i37
+
+then.11.i37:		; preds = %endif.0
+	ret void
+
+else.4.i:		; preds = %endif.0
+	br i1 %tmp.362.i, label %else.5.i, label %then.12.i
+
+then.12.i:		; preds = %else.4.i
+	ret void
+
+else.5.i:		; preds = %else.4.i
+	br i1 %tmp.324.i, label %then.0.i40, label %then.17.i
+
+then.17.i:		; preds = %else.5.i
+	ret void
+
+then.0.i40:		; preds = %else.5.i
+	%tmp.8.i42 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp.8.i42, label %else.1.i56, label %then.1.i52
+
+then.1.i52:		; preds = %then.0.i40
+	ret void
+
+else.1.i56:		; preds = %then.0.i40
+	%tmp.28.i = load i32* @G		; <i32> [#uses=1]
+	%tmp.29.i = icmp eq i32 %tmp.28.i, 1		; <i1> [#uses=1]
+	br i1 %tmp.29.i, label %shortcirc_next.i, label %shortcirc_done.i
+
+shortcirc_next.i:		; preds = %else.1.i56
+	%tmp.34.i = icmp eq i32 0, 3		; <i1> [#uses=1]
+	br i1 %tmp.34.i, label %then.5.i, label %endif.5.i
+
+shortcirc_done.i:		; preds = %else.1.i56
+	ret void
+
+endif.5.i:		; preds = %shortcirc_next.i
+	ret void
+}
+
+declare i64 @lseek(i32, i64, i32)
diff --git a/final/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll b/final/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
new file mode 100644
index 00000000000..2a6cc0c9cdd
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-03-01-dagcombineinfloop.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s
+; Infinite loop in the dag combiner, reduced from 176.gcc.	
+%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.anon = type { i32 }
+	%struct.lang_decl = type opaque
+	%struct.lang_type = type { i32, [1 x %struct.tree_node*] }
+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 }
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct.anon] }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, i8, i8, i8, i8 }
+	%struct.tree_decl = type { [12 x i8], i8*, i32, %struct.tree_node*, i32, i8, i8, i8, i8, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.anon, { %struct.rtx_def* }, %struct.tree_node*, %struct.lang_decl* }
+	%struct.tree_list = type { [12 x i8], %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.tree_type = type { [12 x i8], %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32, i8, i8, i8, i8, i32, %struct.tree_node*, %struct.tree_node*, %struct.anon, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.obstack*, %struct.lang_type* }
+@void_type_node = external global %struct.tree_node*		; <%struct.tree_node**> [#uses=1]
+@char_type_node = external global %struct.tree_node*		; <%struct.tree_node**> [#uses=1]
+@short_integer_type_node = external global %struct.tree_node*		; <%struct.tree_node**> [#uses=1]
+@short_unsigned_type_node = external global %struct.tree_node*		; <%struct.tree_node**> [#uses=1]
+@float_type_node = external global %struct.tree_node*		; <%struct.tree_node**> [#uses=1]
+@signed_char_type_node = external global %struct.tree_node*		; <%struct.tree_node**> [#uses=1]
+@unsigned_char_type_node = external global %struct.tree_node*		; <%struct.tree_node**> [#uses=1]
+
+define fastcc i32 @self_promoting_args_p(%struct.tree_node* %parms) {
+entry:
+	%tmp915 = icmp eq %struct.tree_node* %parms, null		; <i1> [#uses=1]
+	br i1 %tmp915, label %return, label %cond_true92.preheader
+
+cond_true:		; preds = %cond_true92
+	%tmp9.not = icmp ne %struct.tree_node* %tmp2, %tmp7		; <i1> [#uses=1]
+	%tmp14 = icmp eq %struct.tree_node* %tmp2, null		; <i1> [#uses=1]
+	%bothcond = or i1 %tmp9.not, %tmp14		; <i1> [#uses=1]
+	br i1 %bothcond, label %return, label %cond_next18
+
+cond_next12:		; preds = %cond_true92
+	%tmp14.old = icmp eq %struct.tree_node* %tmp2, null		; <i1> [#uses=1]
+	br i1 %tmp14.old, label %return, label %cond_next18
+
+cond_next18:		; preds = %cond_next12, %cond_true
+	%tmp20 = bitcast %struct.tree_node* %tmp2 to %struct.tree_type*		; <%struct.tree_type*> [#uses=1]
+	%tmp21 = getelementptr %struct.tree_type* %tmp20, i32 0, i32 17		; <%struct.tree_node**> [#uses=1]
+	%tmp22 = load %struct.tree_node** %tmp21		; <%struct.tree_node*> [#uses=6]
+	%tmp24 = icmp eq %struct.tree_node* %tmp22, %tmp23		; <i1> [#uses=1]
+	br i1 %tmp24, label %return, label %cond_next28
+
+cond_next28:		; preds = %cond_next18
+	%tmp30 = bitcast %struct.tree_node* %tmp2 to %struct.tree_common*		; <%struct.tree_common*> [#uses=1]
+	%tmp = getelementptr %struct.tree_common* %tmp30, i32 0, i32 2		; <i8*> [#uses=1]
+	%tmp.upgrd.1 = bitcast i8* %tmp to i32*		; <i32*> [#uses=1]
+	%tmp.upgrd.2 = load i32* %tmp.upgrd.1		; <i32> [#uses=1]
+	%tmp32 = trunc i32 %tmp.upgrd.2 to i8		; <i8> [#uses=1]
+	%tmp33 = icmp eq i8 %tmp32, 7		; <i1> [#uses=1]
+	br i1 %tmp33, label %cond_true34, label %cond_next84
+
+cond_true34:		; preds = %cond_next28
+	%tmp40 = icmp eq %struct.tree_node* %tmp22, %tmp39		; <i1> [#uses=1]
+	%tmp49 = icmp eq %struct.tree_node* %tmp22, %tmp48		; <i1> [#uses=1]
+	%bothcond6 = or i1 %tmp40, %tmp49		; <i1> [#uses=1]
+	%tmp58 = icmp eq %struct.tree_node* %tmp22, %tmp57		; <i1> [#uses=1]
+	%bothcond7 = or i1 %bothcond6, %tmp58		; <i1> [#uses=1]
+	%tmp67 = icmp eq %struct.tree_node* %tmp22, %tmp66		; <i1> [#uses=1]
+	%bothcond8 = or i1 %bothcond7, %tmp67		; <i1> [#uses=1]
+	%tmp76 = icmp eq %struct.tree_node* %tmp22, %tmp75		; <i1> [#uses=1]
+	%bothcond9 = or i1 %bothcond8, %tmp76		; <i1> [#uses=2]
+	%brmerge = or i1 %bothcond9, %tmp.upgrd.6		; <i1> [#uses=1]
+	%bothcond9.upgrd.3 = zext i1 %bothcond9 to i32		; <i32> [#uses=1]
+	%.mux = xor i32 %bothcond9.upgrd.3, 1		; <i32> [#uses=1]
+	br i1 %brmerge, label %return, label %cond_true92
+
+cond_next84:		; preds = %cond_next28
+	br i1 %tmp.upgrd.6, label %return, label %cond_true92
+
+cond_true92.preheader:		; preds = %entry
+	%tmp7 = load %struct.tree_node** @void_type_node		; <%struct.tree_node*> [#uses=1]
+	%tmp23 = load %struct.tree_node** @float_type_node		; <%struct.tree_node*> [#uses=1]
+	%tmp39 = load %struct.tree_node** @char_type_node		; <%struct.tree_node*> [#uses=1]
+	%tmp48 = load %struct.tree_node** @signed_char_type_node		; <%struct.tree_node*> [#uses=1]
+	%tmp57 = load %struct.tree_node** @unsigned_char_type_node		; <%struct.tree_node*> [#uses=1]
+	%tmp66 = load %struct.tree_node** @short_integer_type_node		; <%struct.tree_node*> [#uses=1]
+	%tmp75 = load %struct.tree_node** @short_unsigned_type_node		; <%struct.tree_node*> [#uses=1]
+	br label %cond_true92
+
+cond_true92:		; preds = %cond_true92.preheader, %cond_next84, %cond_true34
+	%t.0.0 = phi %struct.tree_node* [ %parms, %cond_true92.preheader ], [ %tmp6, %cond_true34 ], [ %tmp6, %cond_next84 ]		; <%struct.tree_node*> [#uses=2]
+	%tmp.upgrd.4 = bitcast %struct.tree_node* %t.0.0 to %struct.tree_list*		; <%struct.tree_list*> [#uses=1]
+	%tmp.upgrd.5 = getelementptr %struct.tree_list* %tmp.upgrd.4, i32 0, i32 2		; <%struct.tree_node**> [#uses=1]
+	%tmp2 = load %struct.tree_node** %tmp.upgrd.5		; <%struct.tree_node*> [#uses=5]
+	%tmp4 = bitcast %struct.tree_node* %t.0.0 to %struct.tree_common*		; <%struct.tree_common*> [#uses=1]
+	%tmp5 = getelementptr %struct.tree_common* %tmp4, i32 0, i32 0		; <%struct.tree_node**> [#uses=1]
+	%tmp6 = load %struct.tree_node** %tmp5		; <%struct.tree_node*> [#uses=3]
+	%tmp.upgrd.6 = icmp eq %struct.tree_node* %tmp6, null		; <i1> [#uses=3]
+	br i1 %tmp.upgrd.6, label %cond_true, label %cond_next12
+
+return:		; preds = %cond_next84, %cond_true34, %cond_next18, %cond_next12, %cond_true, %entry
+	%retval.0 = phi i32 [ 1, %entry ], [ 1, %cond_next84 ], [ %.mux, %cond_true34 ], [ 0, %cond_next18 ], [ 0, %cond_next12 ], [ 0, %cond_true ]		; <i32> [#uses=1]
+	ret i32 %retval.0
+}
diff --git a/final/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll b/final/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
new file mode 100644
index 00000000000..8465b829e29
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-04-26-SetCCAnd.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s
+; PR748
+@G = external global i16		; <i16*> [#uses=1]
+
+define void @OmNewObjHdr() {
+entry:
+	br i1 false, label %endif.4, label %then.0
+
+then.0:		; preds = %entry
+	ret void
+
+endif.4:		; preds = %entry
+	br i1 false, label %else.3, label %shortcirc_next.3
+
+shortcirc_next.3:		; preds = %endif.4
+	ret void
+
+else.3:		; preds = %endif.4
+	switch i32 0, label %endif.10 [
+		 i32 5001, label %then.10
+		 i32 -5008, label %then.10
+	]
+
+then.10:		; preds = %else.3, %else.3
+	%tmp.112 = load i16* null		; <i16> [#uses=2]
+	%tmp.113 = load i16* @G		; <i16> [#uses=2]
+	%tmp.114 = icmp ugt i16 %tmp.112, %tmp.113		; <i1> [#uses=1]
+	%tmp.120 = icmp ult i16 %tmp.112, %tmp.113		; <i1> [#uses=1]
+	%bothcond = and i1 %tmp.114, %tmp.120		; <i1> [#uses=1]
+	br i1 %bothcond, label %else.4, label %then.11
+
+then.11:		; preds = %then.10
+	ret void
+
+else.4:		; preds = %then.10
+	ret void
+
+endif.10:		; preds = %else.3
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll b/final/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll
new file mode 100644
index 00000000000..22d8f99beea
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-04-28-Sign-extend-bool.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s
+
+define i32 @test(i32 %tmp93) {
+        %tmp98 = shl i32 %tmp93, 31             ; <i32> [#uses=1]
+        %tmp99 = ashr i32 %tmp98, 31            ; <i32> [#uses=1]
+        %tmp99.upgrd.1 = trunc i32 %tmp99 to i8         ; <i8> [#uses=1]
+        %tmp99100 = sext i8 %tmp99.upgrd.1 to i32               ; <i32> [#uses=1]
+        ret i32 %tmp99100
+}
diff --git a/final/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll b/final/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
new file mode 100644
index 00000000000..1a9fa9f5de6
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-05-06-GEP-Cast-Sink-Crash.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s	
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.SYMBOL_TABLE_ENTRY = type { [9 x i8], [9 x i8], i32, i32, i32, %struct.SYMBOL_TABLE_ENTRY* }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+@str14 = external global [6 x i8]		; <[6 x i8]*> [#uses=0]
+
+declare void @fprintf(i32, ...)
+
+define void @OUTPUT_TABLE(%struct.SYMBOL_TABLE_ENTRY* %SYM_TAB) {
+entry:
+	%tmp11 = getelementptr %struct.SYMBOL_TABLE_ENTRY* %SYM_TAB, i32 0, i32 1, i32 0		; <i8*> [#uses=2]
+	%tmp.i = bitcast i8* %tmp11 to i8*		; <i8*> [#uses=1]
+	br label %bb.i
+
+bb.i:		; preds = %cond_next.i, %entry
+	%s1.0.i = phi i8* [ %tmp.i, %entry ], [ null, %cond_next.i ]		; <i8*> [#uses=0]
+	br i1 false, label %cond_true.i31, label %cond_next.i
+
+cond_true.i31:		; preds = %bb.i
+	call void (i32, ...)* @fprintf( i32 0, i8* %tmp11, i8* null )
+	ret void
+
+cond_next.i:		; preds = %bb.i
+	br i1 false, label %bb.i, label %bb19.i
+
+bb19.i:		; preds = %cond_next.i
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll b/final/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll
new file mode 100644
index 00000000000..a3720a9e3ce
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-06-12-LowerSwitchCrash.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -O0
+
+define float @test(i32 %tmp12771278) {
+        switch i32 %tmp12771278, label %bb1279 [
+        ]
+
+bb1279:         ; preds = %0
+        ret float 1.000000e+00
+}
+
diff --git a/final/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll b/final/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
new file mode 100644
index 00000000000..bd922b3aa85
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-06-13-ComputeMaskedBitsCrash.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -O0
+	
+%struct.cl_perfunc_opts = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32 }
+@cl_pf_opts = external global %struct.cl_perfunc_opts		; <%struct.cl_perfunc_opts*> [#uses=2]
+
+define void @set_flags_from_O() {
+entry:
+	%tmp22 = icmp sgt i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp22, label %cond_true23, label %cond_next159
+
+cond_true23:		; preds = %entry
+	%tmp138 = getelementptr %struct.cl_perfunc_opts* @cl_pf_opts, i32 0, i32 8		; <i8*> [#uses=1]
+	%tmp138.upgrd.1 = bitcast i8* %tmp138 to i32*		; <i32*> [#uses=2]
+	%tmp139 = load i32* %tmp138.upgrd.1		; <i32> [#uses=1]
+	%tmp140 = shl i32 1, 27		; <i32> [#uses=1]
+	%tmp141 = and i32 %tmp140, 134217728		; <i32> [#uses=1]
+	%tmp142 = and i32 %tmp139, -134217729		; <i32> [#uses=1]
+	%tmp143 = or i32 %tmp142, %tmp141		; <i32> [#uses=1]
+	store i32 %tmp143, i32* %tmp138.upgrd.1
+	%tmp144 = getelementptr %struct.cl_perfunc_opts* @cl_pf_opts, i32 0, i32 8		; <i8*> [#uses=1]
+	%tmp144.upgrd.2 = bitcast i8* %tmp144 to i32*		; <i32*> [#uses=1]
+	%tmp145 = load i32* %tmp144.upgrd.2		; <i32> [#uses=1]
+	%tmp146 = shl i32 %tmp145, 22		; <i32> [#uses=1]
+	%tmp147 = lshr i32 %tmp146, 31		; <i32> [#uses=1]
+	%tmp147.upgrd.3 = trunc i32 %tmp147 to i8		; <i8> [#uses=1]
+	%tmp148 = icmp eq i8 %tmp147.upgrd.3, 0		; <i1> [#uses=1]
+	br i1 %tmp148, label %cond_true149, label %cond_next159
+
+cond_true149:		; preds = %cond_true23
+	%tmp150 = bitcast i8* null to i32*		; <i32*> [#uses=0]
+	ret void
+
+cond_next159:		; preds = %cond_true23, %entry
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll b/final/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
new file mode 100644
index 00000000000..c4f2fb0c472
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-06-28-SimplifySetCCCrash.ll
@@ -0,0 +1,279 @@
+; RUN: llc < %s	
+%struct.rtunion = type { i64 }
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] }
+@ix86_cpu = external global i32		; <i32*> [#uses=1]
+@which_alternative = external global i32		; <i32*> [#uses=3]
+
+declare fastcc i32 @recog()
+
+define void @athlon_fp_unit_ready_cost() {
+entry:
+	%tmp = icmp slt i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %cond_true.i, label %cond_true
+
+cond_true:		; preds = %entry
+	ret void
+
+cond_true.i:		; preds = %entry
+	%tmp8.i = tail call fastcc i32 @recog( )		; <i32> [#uses=1]
+	switch i32 %tmp8.i, label %UnifiedReturnBlock [
+		 i32 -1, label %bb2063
+		 i32 19, label %bb2035
+		 i32 20, label %bb2035
+		 i32 21, label %bb2035
+		 i32 23, label %bb2035
+		 i32 24, label %bb2035
+		 i32 27, label %bb2035
+		 i32 32, label %bb2035
+		 i32 33, label %bb1994
+		 i32 35, label %bb2035
+		 i32 36, label %bb1994
+		 i32 90, label %bb1948
+		 i32 94, label %bb1948
+		 i32 95, label %bb1948
+		 i32 101, label %bb1648
+		 i32 102, label %bb1648
+		 i32 103, label %bb1648
+		 i32 104, label %bb1648
+		 i32 133, label %bb1419
+		 i32 135, label %bb1238
+		 i32 136, label %bb1238
+		 i32 137, label %bb1238
+		 i32 138, label %bb1238
+		 i32 139, label %bb1201
+		 i32 140, label %bb1201
+		 i32 141, label %bb1154
+		 i32 142, label %bb1126
+		 i32 144, label %bb1201
+		 i32 145, label %bb1126
+		 i32 146, label %bb1201
+		 i32 147, label %bb1126
+		 i32 148, label %bb1201
+		 i32 149, label %bb1126
+		 i32 150, label %bb1201
+		 i32 151, label %bb1126
+		 i32 152, label %bb1096
+		 i32 153, label %bb1096
+		 i32 154, label %bb1096
+		 i32 157, label %bb1096
+		 i32 158, label %bb1096
+		 i32 159, label %bb1096
+		 i32 162, label %bb1096
+		 i32 163, label %bb1096
+		 i32 164, label %bb1096
+		 i32 167, label %bb1201
+		 i32 168, label %bb1201
+		 i32 170, label %bb1201
+		 i32 171, label %bb1201
+		 i32 173, label %bb1201
+		 i32 174, label %bb1201
+		 i32 176, label %bb1201
+		 i32 177, label %bb1201
+		 i32 179, label %bb993
+		 i32 180, label %bb993
+		 i32 181, label %bb993
+		 i32 182, label %bb993
+		 i32 183, label %bb993
+		 i32 184, label %bb993
+		 i32 365, label %bb1126
+		 i32 366, label %bb1126
+		 i32 367, label %bb1126
+		 i32 368, label %bb1126
+		 i32 369, label %bb1126
+		 i32 370, label %bb1126
+		 i32 371, label %bb1126
+		 i32 372, label %bb1126
+		 i32 373, label %bb1126
+		 i32 384, label %bb1126
+		 i32 385, label %bb1126
+		 i32 386, label %bb1126
+		 i32 387, label %bb1126
+		 i32 388, label %bb1126
+		 i32 389, label %bb1126
+		 i32 390, label %bb1126
+		 i32 391, label %bb1126
+		 i32 392, label %bb1126
+		 i32 525, label %bb919
+		 i32 526, label %bb839
+		 i32 528, label %bb919
+		 i32 529, label %bb839
+		 i32 531, label %cond_next6.i119
+		 i32 532, label %cond_next6.i97
+		 i32 533, label %cond_next6.i81
+		 i32 534, label %bb495
+		 i32 536, label %cond_next6.i81
+		 i32 537, label %cond_next6.i81
+		 i32 538, label %bb396
+		 i32 539, label %bb288
+		 i32 541, label %bb396
+		 i32 542, label %bb396
+		 i32 543, label %bb396
+		 i32 544, label %bb396
+		 i32 545, label %bb189
+		 i32 546, label %cond_next6.i
+		 i32 547, label %bb189
+		 i32 548, label %cond_next6.i
+		 i32 549, label %bb189
+		 i32 550, label %cond_next6.i
+		 i32 551, label %bb189
+		 i32 552, label %cond_next6.i
+		 i32 553, label %bb189
+		 i32 554, label %cond_next6.i
+		 i32 555, label %bb189
+		 i32 556, label %cond_next6.i
+		 i32 557, label %bb189
+		 i32 558, label %cond_next6.i
+		 i32 618, label %bb40
+		 i32 619, label %bb18
+		 i32 620, label %bb40
+		 i32 621, label %bb10
+		 i32 622, label %bb10
+	]
+
+bb10:		; preds = %cond_true.i, %cond_true.i
+	ret void
+
+bb18:		; preds = %cond_true.i
+	ret void
+
+bb40:		; preds = %cond_true.i, %cond_true.i
+	ret void
+
+cond_next6.i:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i
+	ret void
+
+bb189:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i
+	ret void
+
+bb288:		; preds = %cond_true.i
+	ret void
+
+bb396:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i
+	ret void
+
+bb495:		; preds = %cond_true.i
+	ret void
+
+cond_next6.i81:		; preds = %cond_true.i, %cond_true.i, %cond_true.i
+	ret void
+
+cond_next6.i97:		; preds = %cond_true.i
+	ret void
+
+cond_next6.i119:		; preds = %cond_true.i
+	%tmp.i126 = icmp eq i16 0, 78		; <i1> [#uses=1]
+	br i1 %tmp.i126, label %cond_next778, label %bb802
+
+cond_next778:		; preds = %cond_next6.i119
+	%tmp781 = icmp eq i32 0, 1		; <i1> [#uses=1]
+	br i1 %tmp781, label %cond_next784, label %bb790
+
+cond_next784:		; preds = %cond_next778
+	%tmp785 = load i32* @ix86_cpu		; <i32> [#uses=1]
+	%tmp786 = icmp eq i32 %tmp785, 5		; <i1> [#uses=1]
+	br i1 %tmp786, label %UnifiedReturnBlock, label %bb790
+
+bb790:		; preds = %cond_next784, %cond_next778
+	%tmp793 = icmp eq i32 0, 1		; <i1> [#uses=0]
+	ret void
+
+bb802:		; preds = %cond_next6.i119
+	ret void
+
+bb839:		; preds = %cond_true.i, %cond_true.i
+	ret void
+
+bb919:		; preds = %cond_true.i, %cond_true.i
+	ret void
+
+bb993:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i
+	ret void
+
+bb1096:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i
+	ret void
+
+bb1126:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i
+	ret void
+
+bb1154:		; preds = %cond_true.i
+	ret void
+
+bb1201:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i
+	ret void
+
+bb1238:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i
+	ret void
+
+bb1419:		; preds = %cond_true.i
+	ret void
+
+bb1648:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i
+	%tmp1650 = load i32* @which_alternative		; <i32> [#uses=1]
+	switch i32 %tmp1650, label %bb1701 [
+		 i32 0, label %cond_next1675
+		 i32 1, label %cond_next1675
+		 i32 2, label %cond_next1675
+	]
+
+cond_next1675:		; preds = %bb1648, %bb1648, %bb1648
+	ret void
+
+bb1701:		; preds = %bb1648
+	%tmp1702 = load i32* @which_alternative		; <i32> [#uses=1]
+	switch i32 %tmp1702, label %bb1808 [
+		 i32 0, label %cond_next1727
+		 i32 1, label %cond_next1727
+		 i32 2, label %cond_next1727
+	]
+
+cond_next1727:		; preds = %bb1701, %bb1701, %bb1701
+	ret void
+
+bb1808:		; preds = %bb1701
+	%bothcond696 = or i1 false, false		; <i1> [#uses=1]
+	br i1 %bothcond696, label %bb1876, label %cond_next1834
+
+cond_next1834:		; preds = %bb1808
+	ret void
+
+bb1876:		; preds = %bb1808
+	%tmp1877signed = load i32* @which_alternative		; <i32> [#uses=4]
+	%tmp1877 = bitcast i32 %tmp1877signed to i32		; <i32> [#uses=1]
+	%bothcond699 = icmp ult i32 %tmp1877, 2		; <i1> [#uses=1]
+	%tmp1888 = icmp eq i32 %tmp1877signed, 2		; <i1> [#uses=1]
+	%bothcond700 = or i1 %bothcond699, %tmp1888		; <i1> [#uses=1]
+	%bothcond700.not = xor i1 %bothcond700, true		; <i1> [#uses=1]
+	%tmp1894 = icmp eq i32 %tmp1877signed, 3		; <i1> [#uses=1]
+	%bothcond701 = or i1 %tmp1894, %bothcond700.not		; <i1> [#uses=1]
+	%bothcond702 = or i1 %bothcond701, false		; <i1> [#uses=1]
+	br i1 %bothcond702, label %UnifiedReturnBlock, label %cond_next1902
+
+cond_next1902:		; preds = %bb1876
+	switch i32 %tmp1877signed, label %cond_next1937 [
+		 i32 0, label %bb1918
+		 i32 1, label %bb1918
+		 i32 2, label %bb1918
+	]
+
+bb1918:		; preds = %cond_next1902, %cond_next1902, %cond_next1902
+	ret void
+
+cond_next1937:		; preds = %cond_next1902
+	ret void
+
+bb1948:		; preds = %cond_true.i, %cond_true.i, %cond_true.i
+	ret void
+
+bb1994:		; preds = %cond_true.i, %cond_true.i
+	ret void
+
+bb2035:		; preds = %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i, %cond_true.i
+	ret void
+
+bb2063:		; preds = %cond_true.i
+	ret void
+
+UnifiedReturnBlock:		; preds = %bb1876, %cond_next784, %cond_true.i
+	%UnifiedRetVal = phi i32 [ 100, %bb1876 ], [ 100, %cond_true.i ], [ 4, %cond_next784 ]		; <i32> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/2006-07-03-schedulers.ll b/final/test/CodeGen/Generic/2006-07-03-schedulers.ll
new file mode 100644
index 00000000000..756bd5ddb1a
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-07-03-schedulers.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -pre-RA-sched=default
+; RUN: llc < %s -pre-RA-sched=list-burr
+; RUN: llc < %s -pre-RA-sched=fast
+; PR859
+
+; The top-down schedulers are excluded here because they don't yet support
+; targets that use physreg defs.
+
+declare i32 @printf(i8*, i32, float)
+
+define i32 @testissue(i32 %i, float %x, float %y) {
+	br label %bb1
+
+bb1:		; preds = %bb1, %0
+	%x1 = fmul float %x, %y		; <float> [#uses=1]
+	%y1 = fmul float %y, 7.500000e-01		; <float> [#uses=1]
+	%z1 = fadd float %x1, %y1		; <float> [#uses=1]
+	%x2 = fmul float %x, 5.000000e-01		; <float> [#uses=1]
+	%y2 = fmul float %y, 0x3FECCCCCC0000000		; <float> [#uses=1]
+	%z2 = fadd float %x2, %y2		; <float> [#uses=1]
+	%z3 = fadd float %z1, %z2		; <float> [#uses=1]
+	%i1 = shl i32 %i, 3		; <i32> [#uses=1]
+	%j1 = add i32 %i, 7		; <i32> [#uses=1]
+	%m1 = add i32 %i1, %j1		; <i32> [#uses=2]
+	%b = icmp sle i32 %m1, 6		; <i1> [#uses=1]
+	br i1 %b, label %bb1, label %bb2
+
+bb2:		; preds = %bb1
+	%Msg = inttoptr i64 0 to i8*		; <i8*> [#uses=1]
+	call i32 @printf( i8* %Msg, i32 %m1, float %z3 )		; <i32>:1 [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll b/final/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll
new file mode 100644
index 00000000000..cbe8b15a2e8
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-08-30-CoalescerCrash.ll
@@ -0,0 +1,112 @@
+; RUN: llc < %s	
+%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
+	%struct.bb_ann_d = type { %struct.tree_node*, i8, %struct.edge_prediction* }
+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+	%struct.cost_pair = type { %struct.iv_cand*, i32, %struct.bitmap_head_def* }
+	%struct.dataflow_d = type { %struct.varray_head_tag*, [2 x %struct.tree_node*] }
+	%struct.def_operand_ptr = type { %struct.tree_node** }
+	%struct.def_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
+	%struct.edge_def_insns = type { %struct.rtx_def* }
+	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.et_node = type opaque
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i1, i1, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
+	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i32, i32, i32, i32, i32, i8* (i32, i32)*, void (i8*)*, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.iv = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i1, i1, i32 }
+	%struct.iv_cand = type { i32, i1, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.iv*, i32 }
+	%struct.iv_use = type { i32, i32, %struct.iv*, %struct.tree_node*, %struct.tree_node**, %struct.bitmap_head_def*, i32, %struct.cost_pair*, %struct.iv_cand* }
+	%struct.ivopts_data = type { %struct.loop*, %struct.htab*, i32, %struct.version_info*, %struct.bitmap_head_def*, i32, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.bitmap_head_def*, i1 }
+	%struct.lang_decl = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.loop = type { i32, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.lpt_decision, i32, i32, %struct.edge_def**, i32, %struct.basic_block_def*, %struct.basic_block_def*, i32, %struct.edge_def**, i32, %struct.edge_def**, i32, %struct.simple_bitmap_def*, i32, %struct.loop**, i32, %struct.loop*, %struct.loop*, %struct.loop*, %struct.loop*, i32, i8*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound*, %struct.edge_def*, i1 }
+	%struct.lpt_decision = type { i32, i32 }
+	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
+	%struct.nb_iter_bound = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound* }
+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.simple_bitmap_def = type { i32, i32, i32, [1 x i64] }
+	%struct.stack_local_entry = type opaque
+	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.dataflow_d*, %struct.bitmap_head_def*, i32 }
+	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.def_optype_d*, %struct.v_may_def_optype_d*, %struct.vuse_optype_d*, %struct.v_may_def_optype_d* }
+	%struct.temp_slot = type opaque
+	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
+	%struct.tree_ann_d = type { %struct.stmt_ann_d }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.u = type { [1 x i64] }
+	%struct.v_def_use_operand_type_t = type { %struct.tree_node*, %struct.tree_node* }
+	%struct.v_may_def_optype_d = type { i32, [1 x %struct.v_def_use_operand_type_t] }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }
+	%struct.version_info = type { %struct.tree_node*, %struct.iv*, i1, i32, i1 }
+	%struct.vuse_optype_d = type { i32, [1 x %struct.tree_node*] }
+
+define i1 @determine_use_iv_cost(%struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand) {
+entry:
+	switch i32 0, label %bb91 [
+		 i32 0, label %bb
+		 i32 1, label %bb6
+		 i32 3, label %cond_next135
+	]
+
+bb:		; preds = %entry
+	ret i1 false
+
+bb6:		; preds = %entry
+	br i1 false, label %bb87, label %cond_next27
+
+cond_next27:		; preds = %bb6
+	br i1 false, label %cond_true30, label %cond_next55
+
+cond_true30:		; preds = %cond_next27
+	br i1 false, label %cond_next41, label %cond_true35
+
+cond_true35:		; preds = %cond_true30
+	ret i1 false
+
+cond_next41:		; preds = %cond_true30
+	%tmp44 = call i32 @force_var_cost( %struct.ivopts_data* %data, %struct.tree_node* null, %struct.bitmap_head_def** null )		; <i32> [#uses=2]
+	%tmp46 = udiv i32 %tmp44, 5		; <i32> [#uses=1]
+	call void @set_use_iv_cost( %struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand, i32 %tmp46, %struct.bitmap_head_def* null )
+	%tmp44.off = add i32 %tmp44, -50000000		; <i32> [#uses=1]
+	%tmp52 = icmp ugt i32 %tmp44.off, 4		; <i1> [#uses=1]
+	%tmp52.upgrd.1 = zext i1 %tmp52 to i32		; <i32> [#uses=1]
+	br label %bb87
+
+cond_next55:		; preds = %cond_next27
+	ret i1 false
+
+bb87:		; preds = %cond_next41, %bb6
+	%tmp2.0 = phi i32 [ %tmp52.upgrd.1, %cond_next41 ], [ 1, %bb6 ]		; <i32> [#uses=0]
+	ret i1 false
+
+bb91:		; preds = %entry
+	ret i1 false
+
+cond_next135:		; preds = %entry
+	%tmp193 = call i1 @determine_use_iv_cost_generic( %struct.ivopts_data* %data, %struct.iv_use* %use, %struct.iv_cand* %cand )		; <i1> [#uses=0]
+	ret i1 false
+}
+
+declare void @set_use_iv_cost(%struct.ivopts_data*, %struct.iv_use*, %struct.iv_cand*, i32, %struct.bitmap_head_def*)
+
+declare i32 @force_var_cost(%struct.ivopts_data*, %struct.tree_node*, %struct.bitmap_head_def**)
+
+declare i1 @determine_use_iv_cost_generic(%struct.ivopts_data*, %struct.iv_use*, %struct.iv_cand*)
diff --git a/final/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll b/final/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
new file mode 100644
index 00000000000..928edc4f478
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll
@@ -0,0 +1,117 @@
+; RUN: llc < %s -regalloc=fast
+	
+%struct.CHESS_POSITION = type { i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, i8, i8, [64 x i8], i8, i8, i8, i8, i8 }
+@search = external global %struct.CHESS_POSITION		; <%struct.CHESS_POSITION*> [#uses=2]
+@bishop_shift_rl45 = external global [64 x i32]		; <[64 x i32]*> [#uses=1]
+@bishop_shift_rr45 = external global [64 x i32]		; <[64 x i32]*> [#uses=1]
+@black_outpost = external global [64 x i8]		; <[64 x i8]*> [#uses=1]
+@bishop_mobility_rl45 = external global [64 x [256 x i32]]		; <[64 x [256 x i32]]*> [#uses=1]
+@bishop_mobility_rr45 = external global [64 x [256 x i32]]		; <[64 x [256 x i32]]*> [#uses=1]
+
+declare fastcc i32 @FirstOne()
+
+define fastcc void @Evaluate() {
+entry:
+	br i1 false, label %cond_false186, label %cond_true
+
+cond_true:		; preds = %entry
+	ret void
+
+cond_false186:		; preds = %entry
+	br i1 false, label %cond_true293, label %bb203
+
+bb203:		; preds = %cond_false186
+	ret void
+
+cond_true293:		; preds = %cond_false186
+	br i1 false, label %cond_true298, label %cond_next317
+
+cond_true298:		; preds = %cond_true293
+	br i1 false, label %cond_next518, label %cond_true397.preheader
+
+cond_next317:		; preds = %cond_true293
+	ret void
+
+cond_true397.preheader:		; preds = %cond_true298
+	ret void
+
+cond_next518:		; preds = %cond_true298
+	br i1 false, label %bb1069, label %cond_true522
+
+cond_true522:		; preds = %cond_next518
+	ret void
+
+bb1069:		; preds = %cond_next518
+	br i1 false, label %cond_next1131, label %bb1096
+
+bb1096:		; preds = %bb1069
+	ret void
+
+cond_next1131:		; preds = %bb1069
+	br i1 false, label %cond_next1207, label %cond_true1150
+
+cond_true1150:		; preds = %cond_next1131
+	ret void
+
+cond_next1207:		; preds = %cond_next1131
+	br i1 false, label %cond_next1219, label %cond_true1211
+
+cond_true1211:		; preds = %cond_next1207
+	ret void
+
+cond_next1219:		; preds = %cond_next1207
+	br i1 false, label %cond_true1223, label %cond_next1283
+
+cond_true1223:		; preds = %cond_next1219
+	br i1 false, label %cond_true1254, label %cond_true1264
+
+cond_true1254:		; preds = %cond_true1223
+	br i1 false, label %bb1567, label %cond_true1369.preheader
+
+cond_true1264:		; preds = %cond_true1223
+	ret void
+
+cond_next1283:		; preds = %cond_next1219
+	ret void
+
+cond_true1369.preheader:		; preds = %cond_true1254
+	ret void
+
+bb1567:		; preds = %cond_true1254
+	%tmp1580 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 3)		; <i64> [#uses=1]
+	%tmp1591 = load i64* getelementptr (%struct.CHESS_POSITION* @search, i32 0, i32 4)		; <i64> [#uses=1]
+	%tmp1572 = tail call fastcc i32 @FirstOne( )		; <i32> [#uses=5]
+	%tmp1582 = getelementptr [64 x i32]* @bishop_shift_rl45, i32 0, i32 %tmp1572		; <i32*> [#uses=1]
+	%tmp1583 = load i32* %tmp1582		; <i32> [#uses=1]
+	%tmp1583.upgrd.1 = trunc i32 %tmp1583 to i8		; <i8> [#uses=1]
+	%shift.upgrd.2 = zext i8 %tmp1583.upgrd.1 to i64		; <i64> [#uses=1]
+	%tmp1584 = lshr i64 %tmp1580, %shift.upgrd.2		; <i64> [#uses=1]
+	%tmp1584.upgrd.3 = trunc i64 %tmp1584 to i32		; <i32> [#uses=1]
+	%tmp1585 = and i32 %tmp1584.upgrd.3, 255		; <i32> [#uses=1]
+	%gep.upgrd.4 = zext i32 %tmp1585 to i64		; <i64> [#uses=1]
+	%tmp1587 = getelementptr [64 x [256 x i32]]* @bishop_mobility_rl45, i32 0, i32 %tmp1572, i64 %gep.upgrd.4		; <i32*> [#uses=1]
+	%tmp1588 = load i32* %tmp1587		; <i32> [#uses=1]
+	%tmp1593 = getelementptr [64 x i32]* @bishop_shift_rr45, i32 0, i32 %tmp1572		; <i32*> [#uses=1]
+	%tmp1594 = load i32* %tmp1593		; <i32> [#uses=1]
+	%tmp1594.upgrd.5 = trunc i32 %tmp1594 to i8		; <i8> [#uses=1]
+	%shift.upgrd.6 = zext i8 %tmp1594.upgrd.5 to i64		; <i64> [#uses=1]
+	%tmp1595 = lshr i64 %tmp1591, %shift.upgrd.6		; <i64> [#uses=1]
+	%tmp1595.upgrd.7 = trunc i64 %tmp1595 to i32		; <i32> [#uses=1]
+	%tmp1596 = and i32 %tmp1595.upgrd.7, 255		; <i32> [#uses=1]
+	%gep.upgrd.8 = zext i32 %tmp1596 to i64		; <i64> [#uses=1]
+	%tmp1598 = getelementptr [64 x [256 x i32]]* @bishop_mobility_rr45, i32 0, i32 %tmp1572, i64 %gep.upgrd.8		; <i32*> [#uses=1]
+	%tmp1599 = load i32* %tmp1598		; <i32> [#uses=1]
+	%tmp1600.neg = sub i32 0, %tmp1588		; <i32> [#uses=1]
+	%tmp1602 = sub i32 %tmp1600.neg, %tmp1599		; <i32> [#uses=1]
+	%tmp1604 = getelementptr [64 x i8]* @black_outpost, i32 0, i32 %tmp1572		; <i8*> [#uses=1]
+	%tmp1605 = load i8* %tmp1604		; <i8> [#uses=1]
+	%tmp1606 = icmp eq i8 %tmp1605, 0		; <i1> [#uses=1]
+	br i1 %tmp1606, label %cond_next1637, label %cond_true1607
+
+cond_true1607:		; preds = %bb1567
+	ret void
+
+cond_next1637:		; preds = %bb1567
+	%tmp1662 = sub i32 %tmp1602, 0		; <i32> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll b/final/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll
new file mode 100644
index 00000000000..3d592b3a38a
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-09-06-SwitchLowering.ll
@@ -0,0 +1,96 @@
+; RUN: llc < %s
+
+define void @foo() {
+	br label %cond_true813.i
+
+cond_true813.i:		; preds = %0
+	br i1 false, label %cond_true818.i, label %cond_next1146.i
+
+cond_true818.i:		; preds = %cond_true813.i
+	br i1 false, label %recog_memoized.exit52, label %cond_next1146.i
+
+recog_memoized.exit52:		; preds = %cond_true818.i
+	switch i32 0, label %bb886.i.preheader [
+		 i32 0, label %bb907.i
+		 i32 44, label %bb866.i
+		 i32 103, label %bb874.i
+		 i32 114, label %bb874.i
+	]
+
+bb857.i:		; preds = %bb886.i, %bb866.i
+	%tmp862.i494.24 = phi i8* [ null, %bb866.i ], [ %tmp862.i494.26, %bb886.i ]		; <i8*> [#uses=4]
+	switch i32 0, label %bb886.i.preheader [
+		 i32 0, label %bb907.i
+		 i32 44, label %bb866.i
+		 i32 103, label %bb874.i
+		 i32 114, label %bb874.i
+	]
+
+bb866.i.loopexit:		; preds = %bb874.i
+	br label %bb866.i
+
+bb866.i.loopexit31:		; preds = %cond_true903.i
+	br label %bb866.i
+
+bb866.i:		; preds = %bb866.i.loopexit31, %bb866.i.loopexit, %bb857.i, %recog_memoized.exit52
+	br i1 false, label %bb907.i, label %bb857.i
+
+bb874.i.preheader.loopexit:		; preds = %cond_true903.i, %cond_true903.i
+	ret void
+
+bb874.i:		; preds = %bb857.i, %bb857.i, %recog_memoized.exit52, %recog_memoized.exit52
+	%tmp862.i494.25 = phi i8* [ %tmp862.i494.24, %bb857.i ], [ %tmp862.i494.24, %bb857.i ], [ undef, %recog_memoized.exit52 ], [ undef, %recog_memoized.exit52 ]		; <i8*> [#uses=1]
+	switch i32 0, label %bb886.i.preheader.loopexit [
+		 i32 0, label %bb907.i
+		 i32 44, label %bb866.i.loopexit
+		 i32 103, label %bb874.i.backedge
+		 i32 114, label %bb874.i.backedge
+	]
+
+bb874.i.backedge:		; preds = %bb874.i, %bb874.i
+	ret void
+
+bb886.i.preheader.loopexit:		; preds = %bb874.i
+	ret void
+
+bb886.i.preheader:		; preds = %bb857.i, %recog_memoized.exit52
+	%tmp862.i494.26 = phi i8* [ undef, %recog_memoized.exit52 ], [ %tmp862.i494.24, %bb857.i ]		; <i8*> [#uses=1]
+	br label %bb886.i
+
+bb886.i:		; preds = %cond_true903.i, %bb886.i.preheader
+	br i1 false, label %bb857.i, label %cond_true903.i
+
+cond_true903.i:		; preds = %bb886.i
+	switch i32 0, label %bb886.i [
+		 i32 0, label %bb907.i
+		 i32 44, label %bb866.i.loopexit31
+		 i32 103, label %bb874.i.preheader.loopexit
+		 i32 114, label %bb874.i.preheader.loopexit
+	]
+
+bb907.i:		; preds = %cond_true903.i, %bb874.i, %bb866.i, %bb857.i, %recog_memoized.exit52
+	%tmp862.i494.0 = phi i8* [ %tmp862.i494.24, %bb857.i ], [ null, %bb866.i ], [ undef, %recog_memoized.exit52 ], [ %tmp862.i494.25, %bb874.i ], [ null, %cond_true903.i ]		; <i8*> [#uses=1]
+	br i1 false, label %cond_next1146.i, label %cond_true910.i
+
+cond_true910.i:		; preds = %bb907.i
+	ret void
+
+cond_next1146.i:		; preds = %bb907.i, %cond_true818.i, %cond_true813.i
+	%tmp862.i494.1 = phi i8* [ %tmp862.i494.0, %bb907.i ], [ undef, %cond_true818.i ], [ undef, %cond_true813.i ]		; <i8*> [#uses=0]
+	ret void
+
+bb2060.i:		; No predecessors!
+	br i1 false, label %cond_true2064.i, label %bb2067.i
+
+cond_true2064.i:		; preds = %bb2060.i
+	unreachable
+
+bb2067.i:		; preds = %bb2060.i
+	ret void
+
+cond_next3473:		; No predecessors!
+	ret void
+
+cond_next3521:		; No predecessors!
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/2006-10-27-CondFolding.ll b/final/test/CodeGen/Generic/2006-10-27-CondFolding.ll
new file mode 100644
index 00000000000..51902c867f6
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-10-27-CondFolding.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s 
+
+define void @start_pass_huff(i32 %gather_statistics) {
+entry:
+        %tmp = icmp eq i32 %gather_statistics, 0                ; <i1> [#uses=1]
+        br i1 false, label %cond_next22, label %bb166
+
+cond_next22:            ; preds = %entry
+        %bothcond = and i1 false, %tmp          ; <i1> [#uses=1]
+        br i1 %bothcond, label %bb34, label %bb46
+
+bb34:           ; preds = %cond_next22
+        ret void
+
+bb46:           ; preds = %cond_next22
+        ret void
+
+bb166:          ; preds = %entry
+        ret void
+}
+
diff --git a/final/test/CodeGen/Generic/2006-10-29-Crash.ll b/final/test/CodeGen/Generic/2006-10-29-Crash.ll
new file mode 100644
index 00000000000..7dcb52cf00f
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-10-29-Crash.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s
+
+define void @form_component_prediction(i32 %dy) {
+entry:
+        %tmp7 = and i32 %dy, 1          ; <i32> [#uses=1]
+        %tmp27 = icmp eq i32 %tmp7, 0           ; <i1> [#uses=1]
+        br i1 false, label %cond_next30, label %bb115
+
+cond_next30:            ; preds = %entry
+        ret void
+
+bb115:          ; preds = %entry
+        %bothcond1 = or i1 %tmp27, false                ; <i1> [#uses=1]
+        br i1 %bothcond1, label %bb228, label %cond_next125
+
+cond_next125:           ; preds = %bb115
+        ret void
+
+bb228:          ; preds = %bb115
+        ret void
+}
+
diff --git a/final/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll b/final/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
new file mode 100644
index 00000000000..26d0f4f96ae
--- /dev/null
+++ b/final/test/CodeGen/Generic/2006-11-20-DAGCombineCrash.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s
+; PR1011	
+%struct.mng_data = type { i8* (%struct.mng_data*, i32)*, i32, i32, i32, i8, i8, i32, i32, i32, i32, i32 }
+
+define void @mng_display_bgr565() {
+entry:
+	br i1 false, label %bb.preheader, label %return
+
+bb.preheader:		; preds = %entry
+	br i1 false, label %cond_true48, label %cond_next80
+
+cond_true48:		; preds = %bb.preheader
+	%tmp = load i8* null		; <i8> [#uses=1]
+	%tmp51 = zext i8 %tmp to i16		; <i16> [#uses=1]
+	%tmp99 = load i8* null		; <i8> [#uses=1]
+	%tmp54 = bitcast i8 %tmp99 to i8		; <i8> [#uses=1]
+	%tmp54.upgrd.1 = zext i8 %tmp54 to i32		; <i32> [#uses=1]
+	%tmp55 = lshr i32 %tmp54.upgrd.1, 3		; <i32> [#uses=1]
+	%tmp55.upgrd.2 = trunc i32 %tmp55 to i16		; <i16> [#uses=1]
+	%tmp52 = shl i16 %tmp51, 5		; <i16> [#uses=1]
+	%tmp56 = and i16 %tmp55.upgrd.2, 28		; <i16> [#uses=1]
+	%tmp57 = or i16 %tmp56, %tmp52		; <i16> [#uses=1]
+	%tmp60 = zext i16 %tmp57 to i32		; <i32> [#uses=1]
+	%tmp62 = xor i32 0, 65535		; <i32> [#uses=1]
+	%tmp63 = mul i32 %tmp60, %tmp62		; <i32> [#uses=1]
+	%tmp65 = add i32 0, %tmp63		; <i32> [#uses=1]
+	%tmp69 = add i32 0, %tmp65		; <i32> [#uses=1]
+	%tmp70 = lshr i32 %tmp69, 16		; <i32> [#uses=1]
+	%tmp70.upgrd.3 = trunc i32 %tmp70 to i16		; <i16> [#uses=1]
+	%tmp75 = lshr i16 %tmp70.upgrd.3, 8		; <i16> [#uses=1]
+	%tmp75.upgrd.4 = trunc i16 %tmp75 to i8		; <i8> [#uses=1]
+	%tmp76 = lshr i8 %tmp75.upgrd.4, 5		; <i8> [#uses=1]
+	store i8 %tmp76, i8* null
+	ret void
+
+cond_next80:		; preds = %bb.preheader
+	ret void
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll b/final/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
new file mode 100644
index 00000000000..255b12092a7
--- /dev/null
+++ b/final/test/CodeGen/Generic/2007-01-15-LoadSelectCycle.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+; PR1114
+
+declare i1 @foo()
+
+define i32 @test(i32* %A, i32* %B) {
+	%a = load i32* %A
+	%b = load i32* %B
+	%cond = call i1 @foo()
+	%c = select i1 %cond, i32 %a, i32 %b
+	ret i32 %c
+}
diff --git a/final/test/CodeGen/Generic/2007-02-25-invoke.ll b/final/test/CodeGen/Generic/2007-02-25-invoke.ll
new file mode 100644
index 00000000000..6e20eaae3bd
--- /dev/null
+++ b/final/test/CodeGen/Generic/2007-02-25-invoke.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+
+; PR1224
+
+declare i32 @test()
+define i32 @test2() {
+        %A = invoke i32 @test() to label %invcont unwind label %blat
+invcont:
+        ret i32 %A
+blat:
+        ret i32 0
+}
diff --git a/final/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll b/final/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll
new file mode 100644
index 00000000000..339f0f71ed5
--- /dev/null
+++ b/final/test/CodeGen/Generic/2007-04-08-MultipleFrameIndices.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s
+; XFAIL: sparc-sun-solaris2
+; PR1308
+; PR1557
+
+define i32 @stuff(i32, ...) {
+        %foo = alloca i8*
+        %bar = alloca i32*
+        %A = call i32 asm sideeffect "inline asm $0 $2 $3 $4", "=r,0,i,m,m"( i32 0, i32 1, i8** %foo, i32** %bar )
+        ret i32 %A
+}
diff --git a/final/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll b/final/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll
new file mode 100644
index 00000000000..9c3c804aa75
--- /dev/null
+++ b/final/test/CodeGen/Generic/2007-04-13-SwitchLowerBadPhi.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -O0
+; PR 1323
+
+	%struct.comp = type { i8*, i32, i8*, [3 x i8], i32 }
+
+define void @regbranch() {
+cond_next240.i:
+	br i1 false, label %cond_true251.i, label %cond_next272.i
+
+cond_true251.i:		; preds = %cond_next240.i
+	switch i8 0, label %cond_next272.i [
+		 i8 42, label %bb268.i
+		 i8 43, label %bb268.i
+		 i8 63, label %bb268.i
+	]
+
+bb268.i:		; preds = %cond_true251.i, %cond_true251.i, %cond_true251.i
+	br label %cond_next272.i
+
+cond_next272.i:		; preds = %bb268.i, %cond_true251.i, %cond_next240.i
+	%len.2.i = phi i32 [ 0, %bb268.i ], [ 0, %cond_next240.i ], [ 0, %cond_true251.i ]		; <i32> [#uses=1]
+	%tmp278.i = icmp eq i32 %len.2.i, 1		; <i1> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/2007-04-17-lsr-crash.ll b/final/test/CodeGen/Generic/2007-04-17-lsr-crash.ll
new file mode 100644
index 00000000000..98f87e5c514
--- /dev/null
+++ b/final/test/CodeGen/Generic/2007-04-17-lsr-crash.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s
+
+define void @foo(i32 %inTextSize) {
+entry:
+	br label %bb236.outer
+
+cond_next193:		; preds = %bb236
+	%tmp211 = add i32 %inTextSize_addr.1.ph17, -2		; <i32> [#uses=1]
+	br i1 false, label %cond_next232, label %cond_true227
+
+cond_true227:		; preds = %cond_next193
+	ret void
+
+cond_next232:		; preds = %cond_next193
+	%indvar.next49 = add i32 %indvar48, 1		; <i32> [#uses=1]
+	br label %bb236.outer
+
+bb236.outer:		; preds = %cond_next232, %entry
+	%indvar48 = phi i32 [ %indvar.next49, %cond_next232 ], [ 0, %entry ]		; <i32> [#uses=2]
+	%inTextSize_addr.1.ph17 = phi i32 [ %tmp211, %cond_next232 ], [ %inTextSize, %entry ]		; <i32> [#uses=3]
+	%tmp.50 = sub i32 0, %indvar48		; <i32> [#uses=1]
+	%tmp219 = icmp eq i32 %tmp.50, 0		; <i1> [#uses=1]
+	br i1 %tmp219, label %bb236.us, label %bb236
+
+bb236.us:		; preds = %bb236.outer
+	%inTextSize_addr.1.us = add i32 0, %inTextSize_addr.1.ph17		; <i32> [#uses=0]
+	ret void
+
+bb236:		; preds = %bb236.outer
+	%tmp238 = icmp eq i32 %inTextSize_addr.1.ph17, 0		; <i1> [#uses=1]
+	br i1 %tmp238, label %exit, label %cond_next193
+
+exit:		; preds = %bb236
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll b/final/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll
new file mode 100644
index 00000000000..af522dc4c58
--- /dev/null
+++ b/final/test/CodeGen/Generic/2007-04-27-InlineAsm-X-Dest.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s
+
+; Test that we can have an "X" output constraint.
+
+define void @test(i16 * %t) {
+        call void asm sideeffect "foo $0", "=*X,~{dirflag},~{fpsr},~{flags},~{memory}"( i16* %t )
+        ret void
+}
diff --git a/final/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll b/final/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll
new file mode 100644
index 00000000000..f2c9b7f849b
--- /dev/null
+++ b/final/test/CodeGen/Generic/2007-04-27-LargeMemObject.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s
+
+        %struct..0anon = type { [100 x i32] }
+
+define void @test() {
+entry:
+        %currfpu = alloca %struct..0anon, align 16              ; <%struct..0anon*> [#uses=2]
+        %mxcsr = alloca %struct..0anon, align 16                ; <%struct..0anon*> [#uses=1]
+        call void asm sideeffect "fnstenv $0", "=*m,~{dirflag},~{fpsr},~{flags}"( %struct..0anon* %currfpu )
+        call void asm sideeffect "$0  $1", "=*m,*m,~{dirflag},~{fpsr},~{flags}"( %struct..0anon* %mxcsr, %struct..0anon* %currfpu )
+        ret void
+}
+
diff --git a/final/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll b/final/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
new file mode 100644
index 00000000000..568b88f4df1
--- /dev/null
+++ b/final/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s 
+; PR1228
+
+	"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
+	"struct.std::locale" = type { "struct.std::locale::_Impl"* }
+	"struct.std::locale::_Impl" = type { i32, "struct.std::locale::facet"**, i32, "struct.std::locale::facet"**, i8** }
+	"struct.std::locale::facet" = type { i32 (...)**, i32 }
+	"struct.std::string" = type { "struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" }
+
+define void @_ZNKSt6locale4nameEv("struct.std::string"* %agg.result) {
+entry:
+	%tmp105 = icmp eq i8* null, null		; <i1> [#uses=1]
+	br i1 %tmp105, label %cond_true, label %cond_true222
+
+cond_true:		; preds = %entry
+	invoke void @_ZNSs14_M_replace_auxEjjjc( )
+			to label %cond_next1328 unwind label %cond_true1402
+
+cond_true222:		; preds = %cond_true222, %entry
+	%tmp207 = call i32 @strcmp( )		; <i32> [#uses=1]
+	%tmp208 = icmp eq i32 %tmp207, 0		; <i1> [#uses=2]
+	%bothcond1480 = and i1 %tmp208, false		; <i1> [#uses=1]
+	br i1 %bothcond1480, label %cond_true222, label %cond_next226.loopexit
+
+cond_next226.loopexit:		; preds = %cond_true222
+	%phitmp = xor i1 %tmp208, true		; <i1> [#uses=1]
+	br i1 %phitmp, label %cond_false280, label %cond_true235
+
+cond_true235:		; preds = %cond_next226.loopexit
+	invoke void @_ZNSs6assignEPKcj( )
+			to label %cond_next1328 unwind label %cond_true1402
+
+cond_false280:		; preds = %cond_next226.loopexit
+	invoke void @_ZNSs7reserveEj( )
+			to label %invcont282 unwind label %cond_true1402
+
+invcont282:		; preds = %cond_false280
+	invoke void @_ZNSs6appendEPKcj( )
+			to label %invcont317 unwind label %cond_true1402
+
+invcont317:		; preds = %invcont282
+	ret void
+
+cond_next1328:		; preds = %cond_true235, %cond_true
+	ret void
+
+cond_true1402:		; preds = %invcont282, %cond_false280, %cond_true235, %cond_true
+	ret void
+}
+
+declare void @_ZNSs14_M_replace_auxEjjjc()
+
+declare i32 @strcmp()
+
+declare void @_ZNSs6assignEPKcj()
+
+declare void @_ZNSs7reserveEj()
+
+declare void @_ZNSs6appendEPKcj()
diff --git a/final/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll b/final/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
new file mode 100644
index 00000000000..81347a23b86
--- /dev/null
+++ b/final/test/CodeGen/Generic/2007-05-03-EHTypeInfo.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+
+	%struct.exception = type { i8, i8, i32, i8*, i8*, i32, i8* }
+@program_error = external global %struct.exception		; <%struct.exception*> [#uses=1]
+
+define void @typeinfo() {
+entry:
+	%eh_typeid = tail call i32 @llvm.eh.typeid.for.i32( i8* getelementptr (%struct.exception* @program_error, i32 0, i32 0) )		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @llvm.eh.typeid.for.i32(i8*)
diff --git a/final/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll b/final/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
new file mode 100644
index 00000000000..b989819f403
--- /dev/null
+++ b/final/test/CodeGen/Generic/2007-05-15-InfiniteRecursion.ll
@@ -0,0 +1,90 @@
+; RUN: llc < %s
+
+	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
+	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
+	%struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64 }
+	%struct.AVEvalExpr = type opaque
+	%struct.AVFrame = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*] }
+	%struct.AVOption = type opaque
+	%struct.AVPaletteControl = type { i32, [256 x i32] }
+	%struct.AVPanScan = type { i32, i32, i32, [3 x [2 x i16]] }
+	%struct.AVRational = type { i32, i32 }
+	%struct.DSPContext = type { void (i16*, i8*, i32)*, void (i16*, i8*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, void (i16*)*, i32 (i8*, i32)*, i32 (i8*, i32)*, [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], i32 (i8*, i16*, i32)*, [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [2 x void (i8*, i8*, i8*, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [8 x void (i8*, i8*, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [10 x void (i8*, i32, i32, i32, i32)*], [10 x void (i8*, i8*, i32, i32, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i16*, i32)*, [2 x [4 x i32 (i8*, i8*, i8*, i32, i32)*]], void (i8*, i8*, i32)*, void (i8*, i8*, i8*, i32)*, void (i8*, i8*, i8*, i32, i32*, i32*)*, void (i32*, i32*, i32)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32)*, void (i8*, i32, i32, i32)*, void ([4 x [4 x i16]]*, i8*, [40 x i8]*, [40 x [2 x i16]]*, i32, i32, i32, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32)*, void (float*, float*, i32)*, void (float*, float*, i32)*, void (float*, float*, float*, i32)*, void (float*, float*, float*, float*, i32, i32, i32)*, void (i16*, float*, i32)*, void (i16*)*, void (i16*)*, void (i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, [64 x i8], i32, i32 (i16*, i16*, i16*, i32)*, void (i16*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void ([4 x i16]*)*, void (i32*, i32*, i32*, i32*, i32*, i32*, i32)*, void (i32*, i32)*, void (i8*, i32, i8**, i32, i32, i32, i32, i32, %struct.slice_buffer*, i32, i8*)*, void (i8*, i32, i32)*, [4 x void (i8*, i32, i8*, i32, i32, i32)*], void (i16*)*, void (i16*, i32)*, void (i16*, i32)*, void (i16*, i32)*, void (i8*, i32)*, void (i8*, i32)*, [16 x void (i8*, i8*, i32, i32)*] }
+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct.GetBitContext = type { i8*, i8*, i32*, i32, i32, i32, i32 }
+	%struct.MJpegContext = type opaque
+	%struct.MotionEstContext = type { %struct.AVCodecContext*, i32, [4 x [2 x i32]], [4 x [2 x i32]], i8*, i8*, [2 x i8*], i8*, i32, i32*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [4 x [4 x i8*]], [4 x [4 x i8*]], i32, i32, i32, i32, i32, [4 x void (i8*, i8*, i32, i32)*]*, [4 x void (i8*, i8*, i32, i32)*]*, [16 x void (i8*, i8*, i32)*]*, [16 x void (i8*, i8*, i32)*]*, [4097 x i8]*, i8*, i32 (%struct.MpegEncContext*, i32*, i32*, i32, i32, i32, i32, i32)* }
+	%struct.MpegEncContext = type { %struct.AVCodecContext*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.PutBitContext, i32, i32, i32, i32, i32, i32, i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Picture*, %struct.Picture**, %struct.Picture**, i32, i32, [8 x %struct.MpegEncContext*], %struct.Picture, %struct.Picture, %struct.Picture, %struct.Picture, %struct.Picture*, %struct.Picture*, %struct.Picture*, [3 x i8*], [3 x i32], i16*, [3 x i16*], [20 x i16], i32, i32, i8*, i8*, i8*, i8*, i8*, [16 x i16]*, [3 x [16 x i16]*], i32, i8*, i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, [5 x i32], i32, i32, i32, i32, %struct.DSPContext, i32, i32, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x [2 x [2 x i16]*]], [2 x [2 x [2 x [2 x i16]*]]], [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x [2 x [2 x i16]*]], [2 x [2 x [2 x [2 x i16]*]]], [2 x i8*], [2 x [2 x i8*]], i32, i32, i32, [2 x [4 x [2 x i32]]], [2 x [2 x i32]], [2 x [2 x [2 x i32]]], i8*, [2 x [64 x i16]], %struct.MotionEstContext, i32, i32, i32, i32, i32, i32, i16*, [6 x i32], [6 x i32], [3 x i8*], i32*, [64 x i16], [64 x i16], [64 x i16], [64 x i16], i32, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i8*, [8 x i32], [64 x i32]*, [64 x i32]*, [2 x [64 x i16]]*, [2 x [64 x i16]]*, [12 x i32], %struct.ScanTable, %struct.ScanTable, %struct.ScanTable, %struct.ScanTable, [64 x i32]*, [2 x i32], [64 x i16]*, i8*, i64, i64, i32, i32, %struct.RateControlContext, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i32, i32, %struct.GetBitContext, i32, i32, i32, %struct.ParseContext, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i16, i16, i16, i16, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x [2 x i32]], [2 x [2 x i32]], [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.PutBitContext, %struct.PutBitContext, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, [3 x i32], %struct.MJpegContext*, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x [65 x [65 x [2 x i32]]]]*, i32, i32, %struct.GetBitContext, i32, i32, i32, i8*, i32, [2 x [2 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i32], i32, i32, i32, i32, i8*, i32, [12 x i16*], [64 x i16]*, [8 x [64 x i16]]*, i32 (%struct.MpegEncContext*, [64 x i16]*)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, i32 (%struct.MpegEncContext*, i16*, i32, i32, i32*)*, i32 (%struct.MpegEncContext*, i16*, i32, i32, i32*)*, void (%struct.MpegEncContext*, i16*)* }
+	%struct.ParseContext = type { i8*, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.Picture = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*], [3 x i8*], [2 x [2 x i16]*], i32*, [2 x i32], i32, i32, i32, i32, [2 x [16 x i32]], [2 x i32], i32, i32, i16*, i16*, i8*, i32*, i32 }
+	%struct.Predictor = type { double, double, double }
+	%struct.PutBitContext = type { i32, i32, i8*, i8*, i8* }
+	%struct.RateControlContext = type { %struct.FILE*, i32, %struct.RateControlEntry*, double, [5 x %struct.Predictor], double, double, double, double, double, [5 x double], i32, i32, [5 x i64], [5 x i64], [5 x i64], [5 x i64], [5 x i32], i32, i8*, float, i32, %struct.AVEvalExpr* }
+	%struct.RateControlEntry = type { i32, float, i32, i32, i32, i32, i32, i64, i32, float, i32, i32, i32, i32, i32, i32 }
+	%struct.RcOverride = type { i32, i32, i32, float }
+	%struct.ScanTable = type { i8*, [64 x i8], [64 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+	%struct.slice_buffer = type opaque
+
+define float @ff_rate_estimate_qscale(%struct.MpegEncContext* %s, i32 %dry_run) {
+entry:
+	br i1 false, label %cond_false163, label %cond_true135
+
+cond_true135:		; preds = %entry
+	ret float 0.000000e+00
+
+cond_false163:		; preds = %entry
+	br i1 false, label %cond_true203, label %cond_next211
+
+cond_true203:		; preds = %cond_false163
+	ret float 0.000000e+00
+
+cond_next211:		; preds = %cond_false163
+	br i1 false, label %cond_false243, label %cond_true220
+
+cond_true220:		; preds = %cond_next211
+	br i1 false, label %cond_next237, label %cond_true225
+
+cond_true225:		; preds = %cond_true220
+	ret float 0.000000e+00
+
+cond_next237:		; preds = %cond_true220
+	br i1 false, label %cond_false785, label %cond_true735
+
+cond_false243:		; preds = %cond_next211
+	ret float 0.000000e+00
+
+cond_true735:		; preds = %cond_next237
+	ret float 0.000000e+00
+
+cond_false785:		; preds = %cond_next237
+	br i1 false, label %cond_true356.i.preheader, label %bb359.i
+
+cond_true356.i.preheader:		; preds = %cond_false785
+	%tmp116117.i = zext i8 0 to i32		; <i32> [#uses=1]
+	br i1 false, label %cond_false.i, label %cond_next159.i
+
+cond_false.i:		; preds = %cond_true356.i.preheader
+	ret float 0.000000e+00
+
+cond_next159.i:		; preds = %cond_true356.i.preheader
+	%tmp178.i = add i32 %tmp116117.i, -128		; <i32> [#uses=2]
+	%tmp181.i = mul i32 %tmp178.i, %tmp178.i		; <i32> [#uses=1]
+	%tmp181182.i = sitofp i32 %tmp181.i to float		; <float> [#uses=1]
+	%tmp199200.pn.in.i = fmul float %tmp181182.i, 0.000000e+00		; <float> [#uses=1]
+	%tmp199200.pn.i = fpext float %tmp199200.pn.in.i to double		; <double> [#uses=1]
+	%tmp201.pn.i = fsub double 1.000000e+00, %tmp199200.pn.i		; <double> [#uses=1]
+	%factor.2.in.i = fmul double 0.000000e+00, %tmp201.pn.i		; <double> [#uses=1]
+	%factor.2.i = fptrunc double %factor.2.in.i to float		; <float> [#uses=1]
+	br i1 false, label %cond_next312.i, label %cond_false222.i
+
+cond_false222.i:		; preds = %cond_next159.i
+	ret float 0.000000e+00
+
+cond_next312.i:		; preds = %cond_next159.i
+	%tmp313314.i = fpext float %factor.2.i to double		; <double> [#uses=0]
+	ret float 0.000000e+00
+
+bb359.i:		; preds = %cond_false785
+	ret float 0.000000e+00
+}
diff --git a/final/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll b/final/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
new file mode 100644
index 00000000000..3090857d964
--- /dev/null
+++ b/final/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
@@ -0,0 +1,157 @@
+; RUN: llc < %s -o -
+
+	%struct.RETURN = type { i32, i32 }
+	%struct.ada__finalization__controlled = type { %struct.system__finalization_root__root_controlled }
+	%struct.ada__streams__root_stream_type = type { %struct.ada__tags__dispatch_table* }
+	%struct.ada__strings__unbounded__string_access = type { i8*, %struct.RETURN* }
+	%struct.ada__strings__unbounded__unbounded_string = type { %struct.ada__finalization__controlled, %struct.ada__strings__unbounded__string_access, i32 }
+	%struct.ada__tags__dispatch_table = type { [1 x i32] }
+	%struct.exception = type { i8, i8, i32, i8*, i8*, i32, i8* }
+	%struct.system__finalization_root__root_controlled = type { %struct.ada__streams__root_stream_type, %struct.system__finalization_root__root_controlled*, %struct.system__finalization_root__root_controlled* }
+	%struct.system__standard_library__exception_data = type { i8, i8, i32, i32, %struct.system__standard_library__exception_data*, i32, void ()* }
+@C.495.7639 = internal constant %struct.RETURN { i32 1, i32 16 }		; <%struct.RETURN*> [#uses=1]
+@ada__strings__index_error = external global %struct.exception		; <%struct.exception*> [#uses=1]
+@.str5 = internal constant [16 x i8] c"a-strunb.adb:690"		; <[16 x i8]*> [#uses=1]
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare void @ada__strings__unbounded__realloc_for_chunk(%struct.ada__strings__unbounded__unbounded_string*, i32)
+
+declare void @__gnat_raise_exception(%struct.system__standard_library__exception_data*, i64)
+
+define void @ada__strings__unbounded__insert__2(%struct.ada__strings__unbounded__unbounded_string* %source, i32 %before, i64 %new_item.0.0) {
+entry:
+	%tmp24636 = lshr i64 %new_item.0.0, 32		; <i64> [#uses=1]
+	%tmp24637 = trunc i64 %tmp24636 to i32		; <i32> [#uses=1]
+	%tmp24638 = inttoptr i32 %tmp24637 to %struct.RETURN*		; <%struct.RETURN*> [#uses=2]
+	%tmp25 = getelementptr %struct.RETURN* %tmp24638, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp26 = load i32* %tmp25, align 4		; <i32> [#uses=1]
+	%tmp29 = getelementptr %struct.RETURN* %tmp24638, i32 0, i32 1		; <i32*> [#uses=1]
+	%tmp30 = load i32* %tmp29, align 4		; <i32> [#uses=1]
+	%tmp63 = getelementptr %struct.ada__strings__unbounded__unbounded_string* %source, i32 0, i32 1, i32 1		; <%struct.RETURN**> [#uses=5]
+	%tmp64 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
+	%tmp65 = getelementptr %struct.RETURN* %tmp64, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp66 = load i32* %tmp65, align 4		; <i32> [#uses=1]
+	%tmp67 = icmp sgt i32 %tmp66, %before		; <i1> [#uses=1]
+	br i1 %tmp67, label %bb77, label %bb
+
+bb:		; preds = %entry
+	%tmp71 = getelementptr %struct.ada__strings__unbounded__unbounded_string* %source, i32 0, i32 2		; <i32*> [#uses=4]
+	%tmp72 = load i32* %tmp71, align 4		; <i32> [#uses=1]
+	%tmp73 = add i32 %tmp72, 1		; <i32> [#uses=1]
+	%tmp74 = icmp slt i32 %tmp73, %before		; <i1> [#uses=1]
+	br i1 %tmp74, label %bb77, label %bb84
+
+bb77:		; preds = %bb, %entry
+	tail call void @__gnat_raise_exception( %struct.system__standard_library__exception_data* bitcast (%struct.exception* @ada__strings__index_error to %struct.system__standard_library__exception_data*), i64 or (i64 zext (i32 ptrtoint ([16 x i8]* @.str5 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.RETURN* @C.495.7639 to i32) to i64), i64 32)) )
+	unreachable
+
+bb84:		; preds = %bb
+	%tmp93 = sub i32 %tmp30, %tmp26		; <i32> [#uses=2]
+	%tmp9394 = sext i32 %tmp93 to i36		; <i36> [#uses=1]
+	%tmp95 = shl i36 %tmp9394, 3		; <i36> [#uses=1]
+	%tmp96 = add i36 %tmp95, 8		; <i36> [#uses=2]
+	%tmp97 = icmp sgt i36 %tmp96, -1		; <i1> [#uses=1]
+	%tmp100 = select i1 %tmp97, i36 %tmp96, i36 0		; <i36> [#uses=2]
+	%tmp101 = icmp slt i36 %tmp100, 17179869177		; <i1> [#uses=1]
+	%tmp100.cast = trunc i36 %tmp100 to i32		; <i32> [#uses=1]
+	%min102 = select i1 %tmp101, i32 %tmp100.cast, i32 -8		; <i32> [#uses=1]
+	tail call void @ada__strings__unbounded__realloc_for_chunk( %struct.ada__strings__unbounded__unbounded_string* %source, i32 %min102 )
+	%tmp148 = load i32* %tmp71, align 4		; <i32> [#uses=4]
+	%tmp152 = add i32 %tmp93, 1		; <i32> [#uses=2]
+	%tmp153 = icmp sgt i32 %tmp152, -1		; <i1> [#uses=1]
+	%max154 = select i1 %tmp153, i32 %tmp152, i32 0		; <i32> [#uses=5]
+	%tmp155 = add i32 %tmp148, %max154		; <i32> [#uses=5]
+	%tmp315 = getelementptr %struct.ada__strings__unbounded__unbounded_string* %source, i32 0, i32 1, i32 0		; <i8**> [#uses=4]
+	%tmp328 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
+	%tmp329 = getelementptr %struct.RETURN* %tmp328, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp330 = load i32* %tmp329, align 4		; <i32> [#uses=4]
+	%tmp324 = add i32 %max154, %before		; <i32> [#uses=3]
+	%tmp331 = sub i32 %tmp324, %tmp330		; <i32> [#uses=1]
+	%tmp349 = sub i32 %before, %tmp330		; <i32> [#uses=1]
+	%tmp356 = icmp sgt i32 %tmp331, %tmp349		; <i1> [#uses=1]
+	%tmp431 = icmp sgt i32 %tmp324, %tmp155		; <i1> [#uses=2]
+	br i1 %tmp356, label %bb420, label %bb359
+
+bb359:		; preds = %bb84
+	br i1 %tmp431, label %bb481, label %bb382
+
+bb382:		; preds = %bb382, %bb359
+	%indvar = phi i32 [ 0, %bb359 ], [ %indvar.next, %bb382 ]		; <i32> [#uses=2]
+	%max379.pn = phi i32 [ %max154, %bb359 ], [ %L492b.0, %bb382 ]		; <i32> [#uses=1]
+	%before.pn = phi i32 [ %before, %bb359 ], [ 1, %bb382 ]		; <i32> [#uses=1]
+	%L492b.0 = add i32 %before.pn, %max379.pn		; <i32> [#uses=3]
+	%tmp386 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
+	%tmp387 = getelementptr %struct.RETURN* %tmp386, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp388 = load i32* %tmp387, align 4		; <i32> [#uses=2]
+	%tmp392 = load i8** %tmp315, align 4		; <i8*> [#uses=2]
+	%R493b.0 = add i32 %indvar, %before		; <i32> [#uses=1]
+	%tmp405 = sub i32 %R493b.0, %tmp388		; <i32> [#uses=1]
+	%tmp406 = getelementptr i8* %tmp392, i32 %tmp405		; <i8*> [#uses=1]
+	%tmp407 = load i8* %tmp406, align 1		; <i8> [#uses=1]
+	%tmp408 = sub i32 %L492b.0, %tmp388		; <i32> [#uses=1]
+	%tmp409 = getelementptr i8* %tmp392, i32 %tmp408		; <i8*> [#uses=1]
+	store i8 %tmp407, i8* %tmp409, align 1
+	%tmp414 = icmp eq i32 %L492b.0, %tmp155		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp414, label %bb481, label %bb382
+
+bb420:		; preds = %bb84
+	br i1 %tmp431, label %bb481, label %bb436.preheader
+
+bb436.preheader:		; preds = %bb420
+	%tmp4468 = load i8** %tmp315, align 4		; <i8*> [#uses=2]
+	%tmp4599 = sub i32 %tmp148, %tmp330		; <i32> [#uses=1]
+	%tmp46010 = getelementptr i8* %tmp4468, i32 %tmp4599		; <i8*> [#uses=1]
+	%tmp46111 = load i8* %tmp46010, align 1		; <i8> [#uses=1]
+	%tmp46212 = sub i32 %tmp155, %tmp330		; <i32> [#uses=1]
+	%tmp46313 = getelementptr i8* %tmp4468, i32 %tmp46212		; <i8*> [#uses=1]
+	store i8 %tmp46111, i8* %tmp46313, align 1
+	%exitcond14 = icmp eq i32 %tmp155, %tmp324		; <i1> [#uses=1]
+	br i1 %exitcond14, label %bb481, label %bb.nph
+
+bb.nph:		; preds = %bb436.preheader
+	%tmp5 = sub i32 %tmp148, %before		; <i32> [#uses=1]
+	br label %bb478
+
+bb478:		; preds = %bb478, %bb.nph
+	%indvar6422 = phi i32 [ 0, %bb.nph ], [ %indvar.next643, %bb478 ]		; <i32> [#uses=1]
+	%indvar.next643 = add i32 %indvar6422, 1		; <i32> [#uses=4]
+	%L490b.0 = sub i32 %tmp155, %indvar.next643		; <i32> [#uses=1]
+	%R491b.0 = sub i32 %tmp148, %indvar.next643		; <i32> [#uses=1]
+	%tmp440 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
+	%tmp441 = getelementptr %struct.RETURN* %tmp440, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp442 = load i32* %tmp441, align 4		; <i32> [#uses=2]
+	%tmp446 = load i8** %tmp315, align 4		; <i8*> [#uses=2]
+	%tmp459 = sub i32 %R491b.0, %tmp442		; <i32> [#uses=1]
+	%tmp460 = getelementptr i8* %tmp446, i32 %tmp459		; <i8*> [#uses=1]
+	%tmp461 = load i8* %tmp460, align 1		; <i8> [#uses=1]
+	%tmp462 = sub i32 %L490b.0, %tmp442		; <i32> [#uses=1]
+	%tmp463 = getelementptr i8* %tmp446, i32 %tmp462		; <i8*> [#uses=1]
+	store i8 %tmp461, i8* %tmp463, align 1
+	%exitcond = icmp eq i32 %indvar.next643, %tmp5		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb481, label %bb478
+
+bb481:		; preds = %bb478, %bb436.preheader, %bb420, %bb382, %bb359
+	%tmp577 = add i32 %before, -1		; <i32> [#uses=3]
+	%tmp578 = add i32 %max154, %tmp577		; <i32> [#uses=2]
+	%tmp581 = icmp sge i32 %tmp578, %tmp577		; <i1> [#uses=1]
+	%max582 = select i1 %tmp581, i32 %tmp578, i32 %tmp577		; <i32> [#uses=1]
+	%tmp584 = sub i32 %max582, %before		; <i32> [#uses=1]
+	%tmp585 = add i32 %tmp584, 1		; <i32> [#uses=2]
+	%tmp586 = icmp sgt i32 %tmp585, -1		; <i1> [#uses=1]
+	%max587 = select i1 %tmp586, i32 %tmp585, i32 0		; <i32> [#uses=1]
+	%tmp591 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
+	%tmp592 = getelementptr %struct.RETURN* %tmp591, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp593 = load i32* %tmp592, align 4		; <i32> [#uses=1]
+	%tmp597 = load i8** %tmp315, align 4		; <i8*> [#uses=1]
+	%tmp600621 = trunc i64 %new_item.0.0 to i32		; <i32> [#uses=1]
+	%tmp600622 = inttoptr i32 %tmp600621 to i8*		; <i8*> [#uses=1]
+	%tmp601 = sub i32 %before, %tmp593		; <i32> [#uses=1]
+	%tmp602 = getelementptr i8* %tmp597, i32 %tmp601		; <i8*> [#uses=1]
+	tail call void @llvm.memcpy.i32( i8* %tmp602, i8* %tmp600622, i32 %max587, i32 1 )
+	%tmp606 = load i32* %tmp71, align 4		; <i32> [#uses=1]
+	%tmp613 = add i32 %tmp606, %max154		; <i32> [#uses=1]
+	store i32 %tmp613, i32* %tmp71, align 4
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll b/final/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll
new file mode 100644
index 00000000000..5df2200dec7
--- /dev/null
+++ b/final/test/CodeGen/Generic/2007-12-17-InvokeAsm.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s
+
+define fastcc void @bc__support__high_resolution_time__initialize_clock_rate() {
+entry:
+	invoke void asm "rdtsc\0A\09movl %eax, $0\0A\09movl %edx, $1", "=*imr,=*imr,~{dirflag},~{fpsr},~{flags},~{dx},~{ax}"( i32* null, i32* null )
+			to label %.noexc unwind label %cleanup144
+
+.noexc:		; preds = %entry
+	ret void
+
+cleanup144:		; preds = %entry
+	unwind
+}
diff --git a/final/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll b/final/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
new file mode 100644
index 00000000000..00e027b8d3c
--- /dev/null
+++ b/final/test/CodeGen/Generic/2007-12-31-UnusedSelector.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s
+; PR1833
+
+	%struct.__class_type_info_pseudo = type { %struct.__type_info_pseudo }
+	%struct.__type_info_pseudo = type { i8*, i8* }
+@_ZTI2e1 = external constant %struct.__class_type_info_pseudo		; <%struct.__class_type_info_pseudo*> [#uses=1]
+
+define void @_Z7ex_testv() {
+entry:
+	invoke void @__cxa_throw( i8* null, i8* bitcast (%struct.__class_type_info_pseudo* @_ZTI2e1 to i8*), void (i8*)* null ) noreturn 
+			to label %UnifiedUnreachableBlock unwind label %lpad
+
+bb14:		; preds = %lpad
+	unreachable
+
+lpad:		; preds = %entry
+	invoke void @__cxa_end_catch( )
+			to label %bb14 unwind label %lpad17
+
+lpad17:		; preds = %lpad
+	%eh_select20 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* null, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null )		; <i32> [#uses=0]
+	unreachable
+
+UnifiedUnreachableBlock:		; preds = %entry
+	unreachable
+}
+
+declare void @__cxa_throw(i8*, i8*, void (i8*)*) noreturn 
+
+declare i32 @llvm.eh.selector.i32(i8*, i8*, ...)
+
+declare void @__cxa_end_catch()
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/final/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll b/final/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
new file mode 100644
index 00000000000..314bb05c678
--- /dev/null
+++ b/final/test/CodeGen/Generic/2008-01-25-dag-combine-mul.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s
+; rdar://5707064
+
+define i32 @f(i16* %pc) {
+entry:
+	%acc = alloca i64, align 8		; <i64*> [#uses=4]
+	%tmp97 = load i64* %acc, align 8		; <i64> [#uses=1]
+	%tmp98 = and i64 %tmp97, 4294967295		; <i64> [#uses=1]
+	%tmp99 = load i64* null, align 8		; <i64> [#uses=1]
+	%tmp100 = and i64 %tmp99, 4294967295		; <i64> [#uses=1]
+	%tmp101 = mul i64 %tmp98, %tmp100		; <i64> [#uses=1]
+	%tmp103 = lshr i64 %tmp101, 0		; <i64> [#uses=1]
+	%tmp104 = load i64* %acc, align 8		; <i64> [#uses=1]
+	%.cast105 = zext i32 32 to i64		; <i64> [#uses=1]
+	%tmp106 = lshr i64 %tmp104, %.cast105		; <i64> [#uses=1]
+	%tmp107 = load i64* null, align 8		; <i64> [#uses=1]
+	%tmp108 = and i64 %tmp107, 4294967295		; <i64> [#uses=1]
+	%tmp109 = mul i64 %tmp106, %tmp108		; <i64> [#uses=1]
+	%tmp112 = add i64 %tmp109, 0		; <i64> [#uses=1]
+	%tmp116 = add i64 %tmp112, 0		; <i64> [#uses=1]
+	%tmp117 = add i64 %tmp103, %tmp116		; <i64> [#uses=1]
+	%tmp118 = load i64* %acc, align 8		; <i64> [#uses=1]
+	%tmp120 = lshr i64 %tmp118, 0		; <i64> [#uses=1]
+	%tmp121 = load i64* null, align 8		; <i64> [#uses=1]
+	%tmp123 = lshr i64 %tmp121, 0		; <i64> [#uses=1]
+	%tmp124 = mul i64 %tmp120, %tmp123		; <i64> [#uses=1]
+	%tmp126 = shl i64 %tmp124, 0		; <i64> [#uses=1]
+	%tmp127 = add i64 %tmp117, %tmp126		; <i64> [#uses=1]
+	store i64 %tmp127, i64* %acc, align 8
+	ret i32 0
+}
diff --git a/final/test/CodeGen/Generic/2008-01-30-LoadCrash.ll b/final/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
new file mode 100644
index 00000000000..70c3aaabedc
--- /dev/null
+++ b/final/test/CodeGen/Generic/2008-01-30-LoadCrash.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s
+
+@letters.3100 = external constant [63 x i8]		; <[63 x i8]*> [#uses=2]
+
+define i32 @mkstemps(i8* %pattern, i32 %suffix_len, i64 %tmp42.rle) nounwind  {
+bb20:
+	br label %bb41
+
+bb41:		; preds = %bb20
+	%tmp8182 = trunc i64 %tmp42.rle to i32		; <i32> [#uses=1]
+	%tmp83 = getelementptr [63 x i8]* @letters.3100, i32 0, i32 %tmp8182		; <i8*> [#uses=1]
+	%tmp84 = load i8* %tmp83, align 1		; <i8> [#uses=1]
+	store i8 %tmp84, i8* null, align 1
+	%tmp90 = urem i64 %tmp42.rle, 62		; <i64> [#uses=1]
+	%tmp9091 = trunc i64 %tmp90 to i32		; <i32> [#uses=1]
+	%tmp92 = getelementptr [63 x i8]* @letters.3100, i32 0, i32 %tmp9091		; <i8*> [#uses=1]
+	store i8* %tmp92, i8** null, align 1
+	ret i32 -1
+}
diff --git a/final/test/CodeGen/Generic/2008-02-04-Ctlz.ll b/final/test/CodeGen/Generic/2008-02-04-Ctlz.ll
new file mode 100644
index 00000000000..288bfd245da
--- /dev/null
+++ b/final/test/CodeGen/Generic/2008-02-04-Ctlz.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s
+
+@.str = internal constant [14 x i8] c"%lld %d %d %d\00"
+
+define i32 @main(i64 %arg) nounwind  {
+entry:
+	%tmp37 = tail call i64 @llvm.ctlz.i64( i64 %arg )		; <i64> [#uses=1]
+	%tmp47 = tail call i64 @llvm.cttz.i64( i64 %arg )		; <i64> [#uses=1]
+	%tmp57 = tail call i64 @llvm.ctpop.i64( i64 %arg )		; <i64> [#uses=1]
+	%tmp38 = trunc i64 %tmp37 to i32		; <i32>:0 [#uses=1]
+	%tmp48 = trunc i64 %tmp47 to i32		; <i32>:0 [#uses=1]
+	%tmp58 = trunc i64 %tmp57 to i32		; <i32>:0 [#uses=1]
+	%tmp40 = tail call i32 (i8*, ...)* @printf( i8* noalias  getelementptr ([14 x i8]* @.str, i32 0, i32 0), i64 %arg, i32 %tmp38, i32 %tmp48, i32 %tmp58 ) nounwind 		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @printf(i8* noalias , ...) nounwind 
+
+declare i64 @llvm.ctlz.i64(i64) nounwind readnone 
+declare i64 @llvm.cttz.i64(i64) nounwind readnone 
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone 
diff --git a/final/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll b/final/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
new file mode 100644
index 00000000000..8bf82dfe186
--- /dev/null
+++ b/final/test/CodeGen/Generic/2008-02-04-ExtractSubvector.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s
+
+define i32 @main() nounwind  {
+entry:
+	br label %bb15
+
+bb15:		; preds = %bb15, %entry
+	%tmp21 = fadd <8 x double> zeroinitializer, zeroinitializer		; <<8 x double>> [#uses=1]
+	br i1 false, label %bb30, label %bb15
+
+bb30:		; preds = %bb15
+	store <8 x double> %tmp21, <8 x double>* null, align 64
+	ret i32 0
+}
diff --git a/final/test/CodeGen/Generic/2008-02-20-MatchingMem.ll b/final/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
new file mode 100644
index 00000000000..da1aeb556a3
--- /dev/null
+++ b/final/test/CodeGen/Generic/2008-02-20-MatchingMem.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s
+; PR1133
+define void @test(i32* %X) nounwind  {
+entry:
+	%tmp1 = getelementptr i32* %X, i32 10		; <i32*> [#uses=2]
+	tail call void asm sideeffect " $0 $1 ", "=*im,*im,~{memory}"( i32* %tmp1, i32* %tmp1 ) nounwind 
+	ret void
+}
+
diff --git a/final/test/CodeGen/Generic/2008-02-25-NegateZero.ll b/final/test/CodeGen/Generic/2008-02-25-NegateZero.ll
new file mode 100644
index 00000000000..97db667dc13
--- /dev/null
+++ b/final/test/CodeGen/Generic/2008-02-25-NegateZero.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s 
+; rdar://5763967
+
+define void @test() {
+entry:
+	%tmp98 = load float* null, align 4		; <float> [#uses=1]
+	%tmp106 = load float* null, align 4		; <float> [#uses=1]
+	%tmp113 = fadd float %tmp98, %tmp106		; <float> [#uses=1]
+	%tmp119 = fsub float %tmp113, 0.000000e+00		; <float> [#uses=1]
+	call void (i32, ...)* @foo( i32 0, float 0.000000e+00, float %tmp119 ) nounwind 
+	ret void
+}
+
+declare void @foo(i32, ...)
diff --git a/final/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll b/final/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
new file mode 100644
index 00000000000..10b3d444c68
--- /dev/null
+++ b/final/test/CodeGen/Generic/2008-02-26-NegatableCrash.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s
+; PR2096
+	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
+	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
+	%struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i32, float }
+	%struct.AVFrame = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*] }
+	%struct.AVOption = type opaque
+	%struct.AVPaletteControl = type { i32, [256 x i32] }
+	%struct.AVPanScan = type { i32, i32, i32, [3 x [2 x i16]] }
+	%struct.AVRational = type { i32, i32 }
+	%struct.RcOverride = type { i32, i32, i32, float }
+
+define i32 @sonic_encode_frame(%struct.AVCodecContext* %avctx, i8* %buf, i32 %buf_size, i8* %data) {
+entry:
+	switch i32 0, label %bb429 [
+		 i32 0, label %bb244.preheader
+		 i32 1, label %bb279.preheader
+	]
+
+bb279.preheader:		; preds = %entry
+	ret i32 0
+
+bb244.preheader:		; preds = %entry
+	ret i32 0
+
+bb429:		; preds = %entry
+	br i1 false, label %bb.nph1770, label %bb627
+
+bb.nph1770:		; preds = %bb429
+	br i1 false, label %bb471, label %bb505
+
+bb471:		; preds = %bb471, %bb.nph1770
+	%tmp487 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	br i1 false, label %bb505, label %bb471
+
+bb505:		; preds = %bb471, %bb.nph1770
+	%xy.0.lcssa = phi double [ 0.000000e+00, %bb.nph1770 ], [ %tmp487, %bb471 ]		; <double> [#uses=1]
+	%tmp507 = fsub double -0.000000e+00, %xy.0.lcssa		; <double> [#uses=1]
+	%tmp509 = fdiv double %tmp507, 0.000000e+00		; <double> [#uses=1]
+	%tmp510 = fmul double %tmp509, 1.024000e+03		; <double> [#uses=1]
+	%tmp516 = fdiv double %tmp510, 0.000000e+00		; <double> [#uses=1]
+	%tmp517 = fadd double %tmp516, 5.000000e-01		; <double> [#uses=1]
+	%tmp518 = tail call double @floor( double %tmp517 ) nounwind readnone 		; <double> [#uses=0]
+	ret i32 0
+
+bb627:		; preds = %bb429
+	ret i32 0
+}
+
+declare double @floor(double) nounwind readnone 
diff --git a/final/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll b/final/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
new file mode 100644
index 00000000000..00ca8c756b4
--- /dev/null
+++ b/final/test/CodeGen/Generic/2008-08-07-PtrToInt-SmallerInt.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s
+; PR2603
+        %struct.A = type { i8 }
+        %struct.B = type { i8, [1 x i8] }
+@Foo = constant %struct.A { i8 ptrtoint (i8* getelementptr ([1 x i8]* inttoptr (i32 17 to [1 x i8]*), i32 0, i32 -16) to i8) }          ; <%struct.A*> [#uses=0]
diff --git a/final/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll b/final/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
new file mode 100644
index 00000000000..6281ada73fc
--- /dev/null
+++ b/final/test/CodeGen/Generic/2009-03-17-LSR-APInt.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s
+; PR3806
+
+	%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
+	%struct.Alignment = type { i32 }
+	%struct.QDesignerFormWindowInterface = type { %struct.QWidget }
+	%struct.QFont = type { %struct.QFontPrivate*, i32 }
+	%struct.QFontPrivate = type opaque
+	%"struct.QHash<QString,QList<QAbstractExtensionFactory*> >" = type { %"struct.QHash<QString,QList<QAbstractExtensionFactory*> >::._120" }
+	%"struct.QHash<QString,QList<QAbstractExtensionFactory*> >::._120" = type { %struct.QHashData* }
+	%struct.QHashData = type { %"struct.QHashData::Node"*, %"struct.QHashData::Node"**, %struct.Alignment, i32, i32, i16, i16, i32, i8 }
+	%"struct.QHashData::Node" = type { %"struct.QHashData::Node"*, i32 }
+	%"struct.QList<QAbstractExtensionFactory*>" = type { %"struct.QList<QAbstractExtensionFactory*>::._101" }
+	%"struct.QList<QAbstractExtensionFactory*>::._101" = type { %struct.QListData }
+	%struct.QListData = type { %"struct.QListData::Data"* }
+	%"struct.QListData::Data" = type { %struct.Alignment, i32, i32, i32, i8, [1 x i8*] }
+	%struct.QObject = type { i32 (...)**, %struct.QObjectData* }
+	%struct.QObjectData = type { i32 (...)**, %struct.QObject*, %struct.QObject*, %"struct.QList<QAbstractExtensionFactory*>", i32, i32 }
+	%struct.QPaintDevice.base = type { i32 (...)**, i16 }
+	%"struct.QPair<int,int>" = type { i32, i32 }
+	%struct.QPalette = type { %struct.QPalettePrivate*, i32 }
+	%struct.QPalettePrivate = type opaque
+	%struct.QRect = type { i32, i32, i32, i32 }
+	%struct.QWidget = type { %struct.QObject, %struct.QPaintDevice.base, %struct.QWidgetData* }
+	%struct.QWidgetData = type { i64, i32, %struct.Alignment, i8, i8, i16, %struct.QRect, %struct.QPalette, %struct.QFont, %struct.QRect }
+	%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* }
+	%struct.pthread_attr_t = type { i64, [48 x i8] }
+	%struct.pthread_mutex_t = type { %struct..0__pthread_mutex_s }
+	%"struct.qdesigner_internal::Grid" = type { i32, i32, %struct.QWidget**, i8*, i8* }
+	%"struct.qdesigner_internal::GridLayout" = type { %"struct.qdesigner_internal::Layout", %"struct.QPair<int,int>", %"struct.qdesigner_internal::Grid"* }
+	%"struct.qdesigner_internal::Layout" = type { %struct.QObject, %"struct.QList<QAbstractExtensionFactory*>", %struct.QWidget*, %"struct.QHash<QString,QList<QAbstractExtensionFactory*> >", %struct.QWidget*, %struct.QDesignerFormWindowInterface*, i8, %"struct.QPair<int,int>", %struct.QRect, i8 }
+
+@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
+@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
+@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i64)* @pthread_cancel		; <i32 (i64)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %struct.Alignment*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct.Alignment*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%struct.Alignment*)* @pthread_mutexattr_init		; <i32 (%struct.Alignment*)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%struct.Alignment*, i32)* @pthread_mutexattr_settype		; <i32 (%struct.Alignment*, i32)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%struct.Alignment*)* @pthread_mutexattr_destroy		; <i32 (%struct.Alignment*)*> [#uses=0]
+
+define void @_ZN18qdesigner_internal10GridLayout9buildGridEv(%"struct.qdesigner_internal::GridLayout"* %this) nounwind {
+entry:
+	br label %bb44
+
+bb44:		; preds = %bb47, %entry
+	%indvar = phi i128 [ %indvar.next144, %bb47 ], [ 0, %entry ]		; <i128> [#uses=2]
+	br i1 false, label %bb46, label %bb47
+
+bb46:		; preds = %bb44
+	%tmp = shl i128 %indvar, 64		; <i128> [#uses=1]
+	%tmp96 = and i128 %tmp, 79228162495817593519834398720		; <i128> [#uses=0]
+	br label %bb47
+
+bb47:		; preds = %bb46, %bb44
+	%indvar.next144 = add i128 %indvar, 1		; <i128> [#uses=1]
+	br label %bb44
+}
+
+declare i32 @pthread_once(i32*, void ()*)
+
+declare i8* @pthread_getspecific(i32)
+
+declare i32 @pthread_setspecific(i32, i8*)
+
+declare i32 @pthread_create(i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
+
+declare i32 @pthread_cancel(i64)
+
+declare i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
+
+declare i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
+
+declare i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
+
+declare i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct.Alignment*)
+
+declare i32 @pthread_key_create(i32*, void (i8*)*)
+
+declare i32 @pthread_key_delete(i32)
+
+declare i32 @pthread_mutexattr_init(%struct.Alignment*)
+
+declare i32 @pthread_mutexattr_settype(%struct.Alignment*, i32)
+
+declare i32 @pthread_mutexattr_destroy(%struct.Alignment*)
diff --git a/final/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll b/final/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
new file mode 100644
index 00000000000..45b561affff
--- /dev/null
+++ b/final/test/CodeGen/Generic/2009-03-29-SoftFloatVectorExtract.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -soft-float
+; PR3899
+
+@m = external global <2 x double>
+
+define double @vector_ex() nounwind {
+       %v = load <2 x double>* @m
+       %x = extractelement <2 x double> %v, i32 1
+       ret double %x
+}
diff --git a/final/test/CodeGen/Generic/2009-04-10-SinkCrash.ll b/final/test/CodeGen/Generic/2009-04-10-SinkCrash.ll
new file mode 100644
index 00000000000..125f87594b8
--- /dev/null
+++ b/final/test/CodeGen/Generic/2009-04-10-SinkCrash.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s
+
+define void @QRiterate(i32 %p.1, double %tmp.212) nounwind {
+entry:
+	br i1 false, label %shortcirc_next.1, label %exit.1.critedge
+
+shortcirc_next.1:		; preds = %shortcirc_next.1, %entry
+	%tmp.213 = fcmp une double %tmp.212, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %tmp.213, label %shortcirc_next.1, label %exit.1
+
+exit.1.critedge:		; preds = %entry
+	ret void
+
+exit.1:		; preds = %shortcirc_next.1
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll b/final/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
new file mode 100644
index 00000000000..b62f811e8d1
--- /dev/null
+++ b/final/test/CodeGen/Generic/2009-04-28-i128-cmp-crash.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s
+; rdar://6836460
+; rdar://7516906
+; PR5963
+
+define i32 @test(i128* %P) nounwind {
+entry:
+	%tmp48 = load i128* %P
+	%and49 = and i128 %tmp48, 18446744073709551616		; <i128> [#uses=1]
+	%tobool = icmp ne i128 %and49, 0		; <i1> [#uses=1]
+	br i1 %tobool, label %if.then50, label %if.end61
+
+if.then50:		; preds = %if.then20
+	ret i32 1241
+
+if.end61:		; preds = %if.then50, %if.then20, %entry
+	ret i32 123
+}
+
+define i32 @test2(i320* %P) nounwind {
+entry:
+	%tmp48 = load i320* %P
+	%and49 = and i320 %tmp48, 25108406941546723055343157692830665664409421777856138051584
+	%tobool = icmp ne i320 %and49, 0		; <i1> [#uses=1]
+	br i1 %tobool, label %if.then50, label %if.end61
+
+if.then50:		; preds = %if.then20
+	ret i32 1241
+
+if.end61:		; preds = %if.then50, %if.then20, %entry
+	ret i32 123
+}
diff --git a/final/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll b/final/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
new file mode 100644
index 00000000000..112cac4f964
--- /dev/null
+++ b/final/test/CodeGen/Generic/2009-06-03-UnreachableSplitPad.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s
+; PR4317
+
+declare i32 @b()
+
+define void @a() {
+entry:
+  ret void
+
+dummy:
+  invoke i32 @b() to label %reg unwind label %reg
+
+reg:
+  ret void
+}
diff --git a/final/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll b/final/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
new file mode 100644
index 00000000000..22bd4d7e6a4
--- /dev/null
+++ b/final/test/CodeGen/Generic/2009-11-16-BadKillsCrash.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s
+; PR5495
+
+%"struct.std::__ctype_abstract_base<wchar_t>" = type { %"struct.std::locale::facet" }
+%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::__ctype_abstract_base<wchar_t>"*, %"struct.std::__ctype_abstract_base<wchar_t>"* }
+%"struct.std::basic_istream<char,std::char_traits<char> >" = type { i32 (...)**, i32, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 }
+%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
+%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+%"struct.std::ios_base::_Words" = type { i8*, i32 }
+%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+%union..0._15 = type { i32 }
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i8* @__cxa_begin_catch(i8*) nounwind
+
+declare %"struct.std::ctype<char>"* @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale(%"struct.std::locale"*)
+
+define %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_(%"struct.std::basic_istream<char,std::char_traits<char> >"* %__in, i8* nocapture %__s) {
+entry:
+  %0 = invoke %"struct.std::ctype<char>"* @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale(%"struct.std::locale"* undef)
+          to label %invcont8 unwind label %lpad74 ; <%"struct.std::ctype<char>"*> [#uses=0]
+
+invcont8:                                         ; preds = %entry
+  %1 = invoke i32 undef(%"struct.std::basic_streambuf<char,std::char_traits<char> >"* undef)
+          to label %bb26.preheader unwind label %lpad ; <i32> [#uses=0]
+
+bb26.preheader:                                   ; preds = %invcont8
+  br label %invcont38
+
+bb1.i100:                                         ; preds = %invcont38
+  %2 = add nsw i32 1, %__extracted.0  ; <i32> [#uses=3]
+  br i1 undef, label %bb.i97, label %bb1.i
+
+bb.i97:                                           ; preds = %bb1.i100
+  br label %invcont38
+
+bb1.i:                                            ; preds = %bb1.i100
+  %3 = invoke i32 undef(%"struct.std::basic_streambuf<char,std::char_traits<char> >"* undef)
+          to label %invcont38 unwind label %lpad ; <i32> [#uses=0]
+
+invcont24:                                        ; preds = %invcont38
+  %4 = invoke i32 undef(%"struct.std::basic_streambuf<char,std::char_traits<char> >"* undef)
+          to label %_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv.exit.i unwind label %lpad ; <i32> [#uses=0]
+
+_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv.exit.i: ; preds = %invcont24
+  br i1 undef, label %invcont25, label %bb.i93
+
+bb.i93:                                           ; preds = %_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv.exit.i
+  %5 = invoke i32 undef(%"struct.std::basic_streambuf<char,std::char_traits<char> >"* undef)
+          to label %invcont25 unwind label %lpad ; <i32> [#uses=0]
+
+invcont25:                                        ; preds = %bb.i93, %_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv.exit.i
+  br label %invcont38
+
+invcont38:                                        ; preds = %invcont25, %bb1.i, %bb.i97, %bb26.preheader
+  %__extracted.0 = phi i32 [ 0, %bb26.preheader ], [ undef, %invcont25 ], [ %2, %bb.i97 ], [ %2, %bb1.i ] ; <i32> [#uses=1]
+  br i1 false, label %bb1.i100, label %invcont24
+
+lpad:                                             ; preds = %bb.i93, %invcont24, %bb1.i, %invcont8
+  %__extracted.1 = phi i32 [ 0, %invcont8 ], [ %2, %bb1.i ], [ undef, %bb.i93 ], [ undef, %invcont24 ] ; <i32> [#uses=0]
+  %eh_ptr = call i8* @llvm.eh.exception() ; <i8*> [#uses=1]
+  %6 = call i8* @__cxa_begin_catch(i8* %eh_ptr) nounwind ; <i8*> [#uses=0]
+  unreachable
+
+lpad74:                                           ; preds = %entry
+  unreachable
+}
diff --git a/final/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll b/final/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll
new file mode 100644
index 00000000000..a2945aaec33
--- /dev/null
+++ b/final/test/CodeGen/Generic/2010-07-27-DAGCombineCrash.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s
+
+define float @test1()
+{
+	ret float extractelement (<2 x float> bitcast (<1 x double> <double 0x3f800000> to <2 x float>), i32 1);
+}
diff --git a/final/test/CodeGen/Generic/2010-11-04-BigByval.ll b/final/test/CodeGen/Generic/2010-11-04-BigByval.ll
new file mode 100644
index 00000000000..df2ca4c18a0
--- /dev/null
+++ b/final/test/CodeGen/Generic/2010-11-04-BigByval.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s
+; PR7170
+
+%big = type [131072 x i8]
+
+declare void @foo(%big* byval align 1)
+
+define void @bar(%big* byval align 1 %x) {
+  call void @foo(%big* byval align 1 %x)
+  ret void
+}
diff --git a/final/test/CodeGen/Generic/2010-ZeroSizedArg.ll b/final/test/CodeGen/Generic/2010-ZeroSizedArg.ll
new file mode 100644
index 00000000000..d9d83744781
--- /dev/null
+++ b/final/test/CodeGen/Generic/2010-ZeroSizedArg.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s
+; PR4975
+
+%0 = type <{ [0 x i32] }>
+%union.T0 = type { }
+
+@.str = private constant [1 x i8] c" "
+
+define void @t(%0) nounwind {
+entry:
+  %arg0 = alloca %union.T0
+  %1 = bitcast %union.T0* %arg0 to %0*
+  store %0 %0, %0* %1, align 1
+  ret void
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/final/test/CodeGen/Generic/2011-01-06-BigNumberCrash.ll b/final/test/CodeGen/Generic/2011-01-06-BigNumberCrash.ll
new file mode 100644
index 00000000000..05fdf4c7449
--- /dev/null
+++ b/final/test/CodeGen/Generic/2011-01-06-BigNumberCrash.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s
+; PR8582
+
+define void @uint82() nounwind {
+entry:
+  %tmp3 = select i1 undef, i960 4872657003430991806293355221650511486142000513558154090491761976385142772940676648094983476628187266917101386048750715027104076737938178423519545241493072038894065019132638919037781494702597609951702322267198307200588774905587225212622510286498675097141625012190497682454879271766334636032, i960 0
+  br i1 undef, label %for.body25.for.body25_crit_edge, label %if.end
+
+for.body25.for.body25_crit_edge:                  ; preds = %entry
+  %ins = or i960 %tmp3, undef
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+}
diff --git a/final/test/CodeGen/Generic/2011-02-12-shuffle.ll b/final/test/CodeGen/Generic/2011-02-12-shuffle.ll
new file mode 100644
index 00000000000..b4d56d193ca
--- /dev/null
+++ b/final/test/CodeGen/Generic/2011-02-12-shuffle.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s
+; PR9165
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-win32"
+
+define void @m_387() nounwind {
+entry:
+  br i1 undef, label %if.end, label %UnifiedReturnBlock
+
+if.end:                                           ; preds = %entry
+  %tmp1067 = load <16 x i32> addrspace(1)* null, align 64
+  %tmp1082 = shufflevector         <16 x i32> <i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef>, 
+                                                                                                                <16 x i32> %tmp1067, 
+                                                                                                                <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 26, i32 5, i32 6, i32 undef, i32 8, i32 9, i32 31, i32 30, i32 12, i32 undef, i32 undef, i32 undef>
+  
+  %tmp1100 = shufflevector         <16 x i32> %tmp1082, 
+                                                                                                                <16 x i32> %tmp1067, 
+                                                                                                                <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 4, i32 5, i32 6, i32 18, i32 8, i32 9, i32 10, i32 11, i32 12, i32 25, i32 undef, i32 17>
+  
+  %tmp1112 = shufflevector         <16 x i32> %tmp1100, 
+                                                                                                                <16 x i32> %tmp1067, 
+                                                                                                                <16 x i32> <i32 0, i32 1, i32 2, i32 24, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 18, i32 15>
+  
+  store <16 x i32> %tmp1112, <16 x i32> addrspace(1)* undef, align 64
+  
+  ret void
+
+UnifiedReturnBlock:                               ; preds = %entry
+  ret void
+}
+
diff --git a/final/test/CodeGen/Generic/APIntLoadStore.ll b/final/test/CodeGen/Generic/APIntLoadStore.ll
new file mode 100644
index 00000000000..7c71a33fc3f
--- /dev/null
+++ b/final/test/CodeGen/Generic/APIntLoadStore.ll
@@ -0,0 +1,2049 @@
+; RUN: llc < %s > %t
+@i1_l = external global i1		; <i1*> [#uses=1]
+@i1_s = external global i1		; <i1*> [#uses=1]
+@i2_l = external global i2		; <i2*> [#uses=1]
+@i2_s = external global i2		; <i2*> [#uses=1]
+@i3_l = external global i3		; <i3*> [#uses=1]
+@i3_s = external global i3		; <i3*> [#uses=1]
+@i4_l = external global i4		; <i4*> [#uses=1]
+@i4_s = external global i4		; <i4*> [#uses=1]
+@i5_l = external global i5		; <i5*> [#uses=1]
+@i5_s = external global i5		; <i5*> [#uses=1]
+@i6_l = external global i6		; <i6*> [#uses=1]
+@i6_s = external global i6		; <i6*> [#uses=1]
+@i7_l = external global i7		; <i7*> [#uses=1]
+@i7_s = external global i7		; <i7*> [#uses=1]
+@i8_l = external global i8		; <i8*> [#uses=1]
+@i8_s = external global i8		; <i8*> [#uses=1]
+@i9_l = external global i9		; <i9*> [#uses=1]
+@i9_s = external global i9		; <i9*> [#uses=1]
+@i10_l = external global i10		; <i10*> [#uses=1]
+@i10_s = external global i10		; <i10*> [#uses=1]
+@i11_l = external global i11		; <i11*> [#uses=1]
+@i11_s = external global i11		; <i11*> [#uses=1]
+@i12_l = external global i12		; <i12*> [#uses=1]
+@i12_s = external global i12		; <i12*> [#uses=1]
+@i13_l = external global i13		; <i13*> [#uses=1]
+@i13_s = external global i13		; <i13*> [#uses=1]
+@i14_l = external global i14		; <i14*> [#uses=1]
+@i14_s = external global i14		; <i14*> [#uses=1]
+@i15_l = external global i15		; <i15*> [#uses=1]
+@i15_s = external global i15		; <i15*> [#uses=1]
+@i16_l = external global i16		; <i16*> [#uses=1]
+@i16_s = external global i16		; <i16*> [#uses=1]
+@i17_l = external global i17		; <i17*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+@i18_l = external global i18		; <i18*> [#uses=1]
+@i18_s = external global i18		; <i18*> [#uses=1]
+@i19_l = external global i19		; <i19*> [#uses=1]
+@i19_s = external global i19		; <i19*> [#uses=1]
+@i20_l = external global i20		; <i20*> [#uses=1]
+@i20_s = external global i20		; <i20*> [#uses=1]
+@i21_l = external global i21		; <i21*> [#uses=1]
+@i21_s = external global i21		; <i21*> [#uses=1]
+@i22_l = external global i22		; <i22*> [#uses=1]
+@i22_s = external global i22		; <i22*> [#uses=1]
+@i23_l = external global i23		; <i23*> [#uses=1]
+@i23_s = external global i23		; <i23*> [#uses=1]
+@i24_l = external global i24		; <i24*> [#uses=1]
+@i24_s = external global i24		; <i24*> [#uses=1]
+@i25_l = external global i25		; <i25*> [#uses=1]
+@i25_s = external global i25		; <i25*> [#uses=1]
+@i26_l = external global i26		; <i26*> [#uses=1]
+@i26_s = external global i26		; <i26*> [#uses=1]
+@i27_l = external global i27		; <i27*> [#uses=1]
+@i27_s = external global i27		; <i27*> [#uses=1]
+@i28_l = external global i28		; <i28*> [#uses=1]
+@i28_s = external global i28		; <i28*> [#uses=1]
+@i29_l = external global i29		; <i29*> [#uses=1]
+@i29_s = external global i29		; <i29*> [#uses=1]
+@i30_l = external global i30		; <i30*> [#uses=1]
+@i30_s = external global i30		; <i30*> [#uses=1]
+@i31_l = external global i31		; <i31*> [#uses=1]
+@i31_s = external global i31		; <i31*> [#uses=1]
+@i32_l = external global i32		; <i32*> [#uses=1]
+@i32_s = external global i32		; <i32*> [#uses=1]
+@i33_l = external global i33		; <i33*> [#uses=1]
+@i33_s = external global i33		; <i33*> [#uses=1]
+@i34_l = external global i34		; <i34*> [#uses=1]
+@i34_s = external global i34		; <i34*> [#uses=1]
+@i35_l = external global i35		; <i35*> [#uses=1]
+@i35_s = external global i35		; <i35*> [#uses=1]
+@i36_l = external global i36		; <i36*> [#uses=1]
+@i36_s = external global i36		; <i36*> [#uses=1]
+@i37_l = external global i37		; <i37*> [#uses=1]
+@i37_s = external global i37		; <i37*> [#uses=1]
+@i38_l = external global i38		; <i38*> [#uses=1]
+@i38_s = external global i38		; <i38*> [#uses=1]
+@i39_l = external global i39		; <i39*> [#uses=1]
+@i39_s = external global i39		; <i39*> [#uses=1]
+@i40_l = external global i40		; <i40*> [#uses=1]
+@i40_s = external global i40		; <i40*> [#uses=1]
+@i41_l = external global i41		; <i41*> [#uses=1]
+@i41_s = external global i41		; <i41*> [#uses=1]
+@i42_l = external global i42		; <i42*> [#uses=1]
+@i42_s = external global i42		; <i42*> [#uses=1]
+@i43_l = external global i43		; <i43*> [#uses=1]
+@i43_s = external global i43		; <i43*> [#uses=1]
+@i44_l = external global i44		; <i44*> [#uses=1]
+@i44_s = external global i44		; <i44*> [#uses=1]
+@i45_l = external global i45		; <i45*> [#uses=1]
+@i45_s = external global i45		; <i45*> [#uses=1]
+@i46_l = external global i46		; <i46*> [#uses=1]
+@i46_s = external global i46		; <i46*> [#uses=1]
+@i47_l = external global i47		; <i47*> [#uses=1]
+@i47_s = external global i47		; <i47*> [#uses=1]
+@i48_l = external global i48		; <i48*> [#uses=1]
+@i48_s = external global i48		; <i48*> [#uses=1]
+@i49_l = external global i49		; <i49*> [#uses=1]
+@i49_s = external global i49		; <i49*> [#uses=1]
+@i50_l = external global i50		; <i50*> [#uses=1]
+@i50_s = external global i50		; <i50*> [#uses=1]
+@i51_l = external global i51		; <i51*> [#uses=1]
+@i51_s = external global i51		; <i51*> [#uses=1]
+@i52_l = external global i52		; <i52*> [#uses=1]
+@i52_s = external global i52		; <i52*> [#uses=1]
+@i53_l = external global i53		; <i53*> [#uses=1]
+@i53_s = external global i53		; <i53*> [#uses=1]
+@i54_l = external global i54		; <i54*> [#uses=1]
+@i54_s = external global i54		; <i54*> [#uses=1]
+@i55_l = external global i55		; <i55*> [#uses=1]
+@i55_s = external global i55		; <i55*> [#uses=1]
+@i56_l = external global i56		; <i56*> [#uses=1]
+@i56_s = external global i56		; <i56*> [#uses=1]
+@i57_l = external global i57		; <i57*> [#uses=1]
+@i57_s = external global i57		; <i57*> [#uses=1]
+@i58_l = external global i58		; <i58*> [#uses=1]
+@i58_s = external global i58		; <i58*> [#uses=1]
+@i59_l = external global i59		; <i59*> [#uses=1]
+@i59_s = external global i59		; <i59*> [#uses=1]
+@i60_l = external global i60		; <i60*> [#uses=1]
+@i60_s = external global i60		; <i60*> [#uses=1]
+@i61_l = external global i61		; <i61*> [#uses=1]
+@i61_s = external global i61		; <i61*> [#uses=1]
+@i62_l = external global i62		; <i62*> [#uses=1]
+@i62_s = external global i62		; <i62*> [#uses=1]
+@i63_l = external global i63		; <i63*> [#uses=1]
+@i63_s = external global i63		; <i63*> [#uses=1]
+@i64_l = external global i64		; <i64*> [#uses=1]
+@i64_s = external global i64		; <i64*> [#uses=1]
+@i65_l = external global i65		; <i65*> [#uses=1]
+@i65_s = external global i65		; <i65*> [#uses=1]
+@i66_l = external global i66		; <i66*> [#uses=1]
+@i66_s = external global i66		; <i66*> [#uses=1]
+@i67_l = external global i67		; <i67*> [#uses=1]
+@i67_s = external global i67		; <i67*> [#uses=1]
+@i68_l = external global i68		; <i68*> [#uses=1]
+@i68_s = external global i68		; <i68*> [#uses=1]
+@i69_l = external global i69		; <i69*> [#uses=1]
+@i69_s = external global i69		; <i69*> [#uses=1]
+@i70_l = external global i70		; <i70*> [#uses=1]
+@i70_s = external global i70		; <i70*> [#uses=1]
+@i71_l = external global i71		; <i71*> [#uses=1]
+@i71_s = external global i71		; <i71*> [#uses=1]
+@i72_l = external global i72		; <i72*> [#uses=1]
+@i72_s = external global i72		; <i72*> [#uses=1]
+@i73_l = external global i73		; <i73*> [#uses=1]
+@i73_s = external global i73		; <i73*> [#uses=1]
+@i74_l = external global i74		; <i74*> [#uses=1]
+@i74_s = external global i74		; <i74*> [#uses=1]
+@i75_l = external global i75		; <i75*> [#uses=1]
+@i75_s = external global i75		; <i75*> [#uses=1]
+@i76_l = external global i76		; <i76*> [#uses=1]
+@i76_s = external global i76		; <i76*> [#uses=1]
+@i77_l = external global i77		; <i77*> [#uses=1]
+@i77_s = external global i77		; <i77*> [#uses=1]
+@i78_l = external global i78		; <i78*> [#uses=1]
+@i78_s = external global i78		; <i78*> [#uses=1]
+@i79_l = external global i79		; <i79*> [#uses=1]
+@i79_s = external global i79		; <i79*> [#uses=1]
+@i80_l = external global i80		; <i80*> [#uses=1]
+@i80_s = external global i80		; <i80*> [#uses=1]
+@i81_l = external global i81		; <i81*> [#uses=1]
+@i81_s = external global i81		; <i81*> [#uses=1]
+@i82_l = external global i82		; <i82*> [#uses=1]
+@i82_s = external global i82		; <i82*> [#uses=1]
+@i83_l = external global i83		; <i83*> [#uses=1]
+@i83_s = external global i83		; <i83*> [#uses=1]
+@i84_l = external global i84		; <i84*> [#uses=1]
+@i84_s = external global i84		; <i84*> [#uses=1]
+@i85_l = external global i85		; <i85*> [#uses=1]
+@i85_s = external global i85		; <i85*> [#uses=1]
+@i86_l = external global i86		; <i86*> [#uses=1]
+@i86_s = external global i86		; <i86*> [#uses=1]
+@i87_l = external global i87		; <i87*> [#uses=1]
+@i87_s = external global i87		; <i87*> [#uses=1]
+@i88_l = external global i88		; <i88*> [#uses=1]
+@i88_s = external global i88		; <i88*> [#uses=1]
+@i89_l = external global i89		; <i89*> [#uses=1]
+@i89_s = external global i89		; <i89*> [#uses=1]
+@i90_l = external global i90		; <i90*> [#uses=1]
+@i90_s = external global i90		; <i90*> [#uses=1]
+@i91_l = external global i91		; <i91*> [#uses=1]
+@i91_s = external global i91		; <i91*> [#uses=1]
+@i92_l = external global i92		; <i92*> [#uses=1]
+@i92_s = external global i92		; <i92*> [#uses=1]
+@i93_l = external global i93		; <i93*> [#uses=1]
+@i93_s = external global i93		; <i93*> [#uses=1]
+@i94_l = external global i94		; <i94*> [#uses=1]
+@i94_s = external global i94		; <i94*> [#uses=1]
+@i95_l = external global i95		; <i95*> [#uses=1]
+@i95_s = external global i95		; <i95*> [#uses=1]
+@i96_l = external global i96		; <i96*> [#uses=1]
+@i96_s = external global i96		; <i96*> [#uses=1]
+@i97_l = external global i97		; <i97*> [#uses=1]
+@i97_s = external global i97		; <i97*> [#uses=1]
+@i98_l = external global i98		; <i98*> [#uses=1]
+@i98_s = external global i98		; <i98*> [#uses=1]
+@i99_l = external global i99		; <i99*> [#uses=1]
+@i99_s = external global i99		; <i99*> [#uses=1]
+@i100_l = external global i100		; <i100*> [#uses=1]
+@i100_s = external global i100		; <i100*> [#uses=1]
+@i101_l = external global i101		; <i101*> [#uses=1]
+@i101_s = external global i101		; <i101*> [#uses=1]
+@i102_l = external global i102		; <i102*> [#uses=1]
+@i102_s = external global i102		; <i102*> [#uses=1]
+@i103_l = external global i103		; <i103*> [#uses=1]
+@i103_s = external global i103		; <i103*> [#uses=1]
+@i104_l = external global i104		; <i104*> [#uses=1]
+@i104_s = external global i104		; <i104*> [#uses=1]
+@i105_l = external global i105		; <i105*> [#uses=1]
+@i105_s = external global i105		; <i105*> [#uses=1]
+@i106_l = external global i106		; <i106*> [#uses=1]
+@i106_s = external global i106		; <i106*> [#uses=1]
+@i107_l = external global i107		; <i107*> [#uses=1]
+@i107_s = external global i107		; <i107*> [#uses=1]
+@i108_l = external global i108		; <i108*> [#uses=1]
+@i108_s = external global i108		; <i108*> [#uses=1]
+@i109_l = external global i109		; <i109*> [#uses=1]
+@i109_s = external global i109		; <i109*> [#uses=1]
+@i110_l = external global i110		; <i110*> [#uses=1]
+@i110_s = external global i110		; <i110*> [#uses=1]
+@i111_l = external global i111		; <i111*> [#uses=1]
+@i111_s = external global i111		; <i111*> [#uses=1]
+@i112_l = external global i112		; <i112*> [#uses=1]
+@i112_s = external global i112		; <i112*> [#uses=1]
+@i113_l = external global i113		; <i113*> [#uses=1]
+@i113_s = external global i113		; <i113*> [#uses=1]
+@i114_l = external global i114		; <i114*> [#uses=1]
+@i114_s = external global i114		; <i114*> [#uses=1]
+@i115_l = external global i115		; <i115*> [#uses=1]
+@i115_s = external global i115		; <i115*> [#uses=1]
+@i116_l = external global i116		; <i116*> [#uses=1]
+@i116_s = external global i116		; <i116*> [#uses=1]
+@i117_l = external global i117		; <i117*> [#uses=1]
+@i117_s = external global i117		; <i117*> [#uses=1]
+@i118_l = external global i118		; <i118*> [#uses=1]
+@i118_s = external global i118		; <i118*> [#uses=1]
+@i119_l = external global i119		; <i119*> [#uses=1]
+@i119_s = external global i119		; <i119*> [#uses=1]
+@i120_l = external global i120		; <i120*> [#uses=1]
+@i120_s = external global i120		; <i120*> [#uses=1]
+@i121_l = external global i121		; <i121*> [#uses=1]
+@i121_s = external global i121		; <i121*> [#uses=1]
+@i122_l = external global i122		; <i122*> [#uses=1]
+@i122_s = external global i122		; <i122*> [#uses=1]
+@i123_l = external global i123		; <i123*> [#uses=1]
+@i123_s = external global i123		; <i123*> [#uses=1]
+@i124_l = external global i124		; <i124*> [#uses=1]
+@i124_s = external global i124		; <i124*> [#uses=1]
+@i125_l = external global i125		; <i125*> [#uses=1]
+@i125_s = external global i125		; <i125*> [#uses=1]
+@i126_l = external global i126		; <i126*> [#uses=1]
+@i126_s = external global i126		; <i126*> [#uses=1]
+@i127_l = external global i127		; <i127*> [#uses=1]
+@i127_s = external global i127		; <i127*> [#uses=1]
+@i128_l = external global i128		; <i128*> [#uses=1]
+@i128_s = external global i128		; <i128*> [#uses=1]
+@i129_l = external global i129		; <i129*> [#uses=1]
+@i129_s = external global i129		; <i129*> [#uses=1]
+@i130_l = external global i130		; <i130*> [#uses=1]
+@i130_s = external global i130		; <i130*> [#uses=1]
+@i131_l = external global i131		; <i131*> [#uses=1]
+@i131_s = external global i131		; <i131*> [#uses=1]
+@i132_l = external global i132		; <i132*> [#uses=1]
+@i132_s = external global i132		; <i132*> [#uses=1]
+@i133_l = external global i133		; <i133*> [#uses=1]
+@i133_s = external global i133		; <i133*> [#uses=1]
+@i134_l = external global i134		; <i134*> [#uses=1]
+@i134_s = external global i134		; <i134*> [#uses=1]
+@i135_l = external global i135		; <i135*> [#uses=1]
+@i135_s = external global i135		; <i135*> [#uses=1]
+@i136_l = external global i136		; <i136*> [#uses=1]
+@i136_s = external global i136		; <i136*> [#uses=1]
+@i137_l = external global i137		; <i137*> [#uses=1]
+@i137_s = external global i137		; <i137*> [#uses=1]
+@i138_l = external global i138		; <i138*> [#uses=1]
+@i138_s = external global i138		; <i138*> [#uses=1]
+@i139_l = external global i139		; <i139*> [#uses=1]
+@i139_s = external global i139		; <i139*> [#uses=1]
+@i140_l = external global i140		; <i140*> [#uses=1]
+@i140_s = external global i140		; <i140*> [#uses=1]
+@i141_l = external global i141		; <i141*> [#uses=1]
+@i141_s = external global i141		; <i141*> [#uses=1]
+@i142_l = external global i142		; <i142*> [#uses=1]
+@i142_s = external global i142		; <i142*> [#uses=1]
+@i143_l = external global i143		; <i143*> [#uses=1]
+@i143_s = external global i143		; <i143*> [#uses=1]
+@i144_l = external global i144		; <i144*> [#uses=1]
+@i144_s = external global i144		; <i144*> [#uses=1]
+@i145_l = external global i145		; <i145*> [#uses=1]
+@i145_s = external global i145		; <i145*> [#uses=1]
+@i146_l = external global i146		; <i146*> [#uses=1]
+@i146_s = external global i146		; <i146*> [#uses=1]
+@i147_l = external global i147		; <i147*> [#uses=1]
+@i147_s = external global i147		; <i147*> [#uses=1]
+@i148_l = external global i148		; <i148*> [#uses=1]
+@i148_s = external global i148		; <i148*> [#uses=1]
+@i149_l = external global i149		; <i149*> [#uses=1]
+@i149_s = external global i149		; <i149*> [#uses=1]
+@i150_l = external global i150		; <i150*> [#uses=1]
+@i150_s = external global i150		; <i150*> [#uses=1]
+@i151_l = external global i151		; <i151*> [#uses=1]
+@i151_s = external global i151		; <i151*> [#uses=1]
+@i152_l = external global i152		; <i152*> [#uses=1]
+@i152_s = external global i152		; <i152*> [#uses=1]
+@i153_l = external global i153		; <i153*> [#uses=1]
+@i153_s = external global i153		; <i153*> [#uses=1]
+@i154_l = external global i154		; <i154*> [#uses=1]
+@i154_s = external global i154		; <i154*> [#uses=1]
+@i155_l = external global i155		; <i155*> [#uses=1]
+@i155_s = external global i155		; <i155*> [#uses=1]
+@i156_l = external global i156		; <i156*> [#uses=1]
+@i156_s = external global i156		; <i156*> [#uses=1]
+@i157_l = external global i157		; <i157*> [#uses=1]
+@i157_s = external global i157		; <i157*> [#uses=1]
+@i158_l = external global i158		; <i158*> [#uses=1]
+@i158_s = external global i158		; <i158*> [#uses=1]
+@i159_l = external global i159		; <i159*> [#uses=1]
+@i159_s = external global i159		; <i159*> [#uses=1]
+@i160_l = external global i160		; <i160*> [#uses=1]
+@i160_s = external global i160		; <i160*> [#uses=1]
+@i161_l = external global i161		; <i161*> [#uses=1]
+@i161_s = external global i161		; <i161*> [#uses=1]
+@i162_l = external global i162		; <i162*> [#uses=1]
+@i162_s = external global i162		; <i162*> [#uses=1]
+@i163_l = external global i163		; <i163*> [#uses=1]
+@i163_s = external global i163		; <i163*> [#uses=1]
+@i164_l = external global i164		; <i164*> [#uses=1]
+@i164_s = external global i164		; <i164*> [#uses=1]
+@i165_l = external global i165		; <i165*> [#uses=1]
+@i165_s = external global i165		; <i165*> [#uses=1]
+@i166_l = external global i166		; <i166*> [#uses=1]
+@i166_s = external global i166		; <i166*> [#uses=1]
+@i167_l = external global i167		; <i167*> [#uses=1]
+@i167_s = external global i167		; <i167*> [#uses=1]
+@i168_l = external global i168		; <i168*> [#uses=1]
+@i168_s = external global i168		; <i168*> [#uses=1]
+@i169_l = external global i169		; <i169*> [#uses=1]
+@i169_s = external global i169		; <i169*> [#uses=1]
+@i170_l = external global i170		; <i170*> [#uses=1]
+@i170_s = external global i170		; <i170*> [#uses=1]
+@i171_l = external global i171		; <i171*> [#uses=1]
+@i171_s = external global i171		; <i171*> [#uses=1]
+@i172_l = external global i172		; <i172*> [#uses=1]
+@i172_s = external global i172		; <i172*> [#uses=1]
+@i173_l = external global i173		; <i173*> [#uses=1]
+@i173_s = external global i173		; <i173*> [#uses=1]
+@i174_l = external global i174		; <i174*> [#uses=1]
+@i174_s = external global i174		; <i174*> [#uses=1]
+@i175_l = external global i175		; <i175*> [#uses=1]
+@i175_s = external global i175		; <i175*> [#uses=1]
+@i176_l = external global i176		; <i176*> [#uses=1]
+@i176_s = external global i176		; <i176*> [#uses=1]
+@i177_l = external global i177		; <i177*> [#uses=1]
+@i177_s = external global i177		; <i177*> [#uses=1]
+@i178_l = external global i178		; <i178*> [#uses=1]
+@i178_s = external global i178		; <i178*> [#uses=1]
+@i179_l = external global i179		; <i179*> [#uses=1]
+@i179_s = external global i179		; <i179*> [#uses=1]
+@i180_l = external global i180		; <i180*> [#uses=1]
+@i180_s = external global i180		; <i180*> [#uses=1]
+@i181_l = external global i181		; <i181*> [#uses=1]
+@i181_s = external global i181		; <i181*> [#uses=1]
+@i182_l = external global i182		; <i182*> [#uses=1]
+@i182_s = external global i182		; <i182*> [#uses=1]
+@i183_l = external global i183		; <i183*> [#uses=1]
+@i183_s = external global i183		; <i183*> [#uses=1]
+@i184_l = external global i184		; <i184*> [#uses=1]
+@i184_s = external global i184		; <i184*> [#uses=1]
+@i185_l = external global i185		; <i185*> [#uses=1]
+@i185_s = external global i185		; <i185*> [#uses=1]
+@i186_l = external global i186		; <i186*> [#uses=1]
+@i186_s = external global i186		; <i186*> [#uses=1]
+@i187_l = external global i187		; <i187*> [#uses=1]
+@i187_s = external global i187		; <i187*> [#uses=1]
+@i188_l = external global i188		; <i188*> [#uses=1]
+@i188_s = external global i188		; <i188*> [#uses=1]
+@i189_l = external global i189		; <i189*> [#uses=1]
+@i189_s = external global i189		; <i189*> [#uses=1]
+@i190_l = external global i190		; <i190*> [#uses=1]
+@i190_s = external global i190		; <i190*> [#uses=1]
+@i191_l = external global i191		; <i191*> [#uses=1]
+@i191_s = external global i191		; <i191*> [#uses=1]
+@i192_l = external global i192		; <i192*> [#uses=1]
+@i192_s = external global i192		; <i192*> [#uses=1]
+@i193_l = external global i193		; <i193*> [#uses=1]
+@i193_s = external global i193		; <i193*> [#uses=1]
+@i194_l = external global i194		; <i194*> [#uses=1]
+@i194_s = external global i194		; <i194*> [#uses=1]
+@i195_l = external global i195		; <i195*> [#uses=1]
+@i195_s = external global i195		; <i195*> [#uses=1]
+@i196_l = external global i196		; <i196*> [#uses=1]
+@i196_s = external global i196		; <i196*> [#uses=1]
+@i197_l = external global i197		; <i197*> [#uses=1]
+@i197_s = external global i197		; <i197*> [#uses=1]
+@i198_l = external global i198		; <i198*> [#uses=1]
+@i198_s = external global i198		; <i198*> [#uses=1]
+@i199_l = external global i199		; <i199*> [#uses=1]
+@i199_s = external global i199		; <i199*> [#uses=1]
+@i200_l = external global i200		; <i200*> [#uses=1]
+@i200_s = external global i200		; <i200*> [#uses=1]
+@i201_l = external global i201		; <i201*> [#uses=1]
+@i201_s = external global i201		; <i201*> [#uses=1]
+@i202_l = external global i202		; <i202*> [#uses=1]
+@i202_s = external global i202		; <i202*> [#uses=1]
+@i203_l = external global i203		; <i203*> [#uses=1]
+@i203_s = external global i203		; <i203*> [#uses=1]
+@i204_l = external global i204		; <i204*> [#uses=1]
+@i204_s = external global i204		; <i204*> [#uses=1]
+@i205_l = external global i205		; <i205*> [#uses=1]
+@i205_s = external global i205		; <i205*> [#uses=1]
+@i206_l = external global i206		; <i206*> [#uses=1]
+@i206_s = external global i206		; <i206*> [#uses=1]
+@i207_l = external global i207		; <i207*> [#uses=1]
+@i207_s = external global i207		; <i207*> [#uses=1]
+@i208_l = external global i208		; <i208*> [#uses=1]
+@i208_s = external global i208		; <i208*> [#uses=1]
+@i209_l = external global i209		; <i209*> [#uses=1]
+@i209_s = external global i209		; <i209*> [#uses=1]
+@i210_l = external global i210		; <i210*> [#uses=1]
+@i210_s = external global i210		; <i210*> [#uses=1]
+@i211_l = external global i211		; <i211*> [#uses=1]
+@i211_s = external global i211		; <i211*> [#uses=1]
+@i212_l = external global i212		; <i212*> [#uses=1]
+@i212_s = external global i212		; <i212*> [#uses=1]
+@i213_l = external global i213		; <i213*> [#uses=1]
+@i213_s = external global i213		; <i213*> [#uses=1]
+@i214_l = external global i214		; <i214*> [#uses=1]
+@i214_s = external global i214		; <i214*> [#uses=1]
+@i215_l = external global i215		; <i215*> [#uses=1]
+@i215_s = external global i215		; <i215*> [#uses=1]
+@i216_l = external global i216		; <i216*> [#uses=1]
+@i216_s = external global i216		; <i216*> [#uses=1]
+@i217_l = external global i217		; <i217*> [#uses=1]
+@i217_s = external global i217		; <i217*> [#uses=1]
+@i218_l = external global i218		; <i218*> [#uses=1]
+@i218_s = external global i218		; <i218*> [#uses=1]
+@i219_l = external global i219		; <i219*> [#uses=1]
+@i219_s = external global i219		; <i219*> [#uses=1]
+@i220_l = external global i220		; <i220*> [#uses=1]
+@i220_s = external global i220		; <i220*> [#uses=1]
+@i221_l = external global i221		; <i221*> [#uses=1]
+@i221_s = external global i221		; <i221*> [#uses=1]
+@i222_l = external global i222		; <i222*> [#uses=1]
+@i222_s = external global i222		; <i222*> [#uses=1]
+@i223_l = external global i223		; <i223*> [#uses=1]
+@i223_s = external global i223		; <i223*> [#uses=1]
+@i224_l = external global i224		; <i224*> [#uses=1]
+@i224_s = external global i224		; <i224*> [#uses=1]
+@i225_l = external global i225		; <i225*> [#uses=1]
+@i225_s = external global i225		; <i225*> [#uses=1]
+@i226_l = external global i226		; <i226*> [#uses=1]
+@i226_s = external global i226		; <i226*> [#uses=1]
+@i227_l = external global i227		; <i227*> [#uses=1]
+@i227_s = external global i227		; <i227*> [#uses=1]
+@i228_l = external global i228		; <i228*> [#uses=1]
+@i228_s = external global i228		; <i228*> [#uses=1]
+@i229_l = external global i229		; <i229*> [#uses=1]
+@i229_s = external global i229		; <i229*> [#uses=1]
+@i230_l = external global i230		; <i230*> [#uses=1]
+@i230_s = external global i230		; <i230*> [#uses=1]
+@i231_l = external global i231		; <i231*> [#uses=1]
+@i231_s = external global i231		; <i231*> [#uses=1]
+@i232_l = external global i232		; <i232*> [#uses=1]
+@i232_s = external global i232		; <i232*> [#uses=1]
+@i233_l = external global i233		; <i233*> [#uses=1]
+@i233_s = external global i233		; <i233*> [#uses=1]
+@i234_l = external global i234		; <i234*> [#uses=1]
+@i234_s = external global i234		; <i234*> [#uses=1]
+@i235_l = external global i235		; <i235*> [#uses=1]
+@i235_s = external global i235		; <i235*> [#uses=1]
+@i236_l = external global i236		; <i236*> [#uses=1]
+@i236_s = external global i236		; <i236*> [#uses=1]
+@i237_l = external global i237		; <i237*> [#uses=1]
+@i237_s = external global i237		; <i237*> [#uses=1]
+@i238_l = external global i238		; <i238*> [#uses=1]
+@i238_s = external global i238		; <i238*> [#uses=1]
+@i239_l = external global i239		; <i239*> [#uses=1]
+@i239_s = external global i239		; <i239*> [#uses=1]
+@i240_l = external global i240		; <i240*> [#uses=1]
+@i240_s = external global i240		; <i240*> [#uses=1]
+@i241_l = external global i241		; <i241*> [#uses=1]
+@i241_s = external global i241		; <i241*> [#uses=1]
+@i242_l = external global i242		; <i242*> [#uses=1]
+@i242_s = external global i242		; <i242*> [#uses=1]
+@i243_l = external global i243		; <i243*> [#uses=1]
+@i243_s = external global i243		; <i243*> [#uses=1]
+@i244_l = external global i244		; <i244*> [#uses=1]
+@i244_s = external global i244		; <i244*> [#uses=1]
+@i245_l = external global i245		; <i245*> [#uses=1]
+@i245_s = external global i245		; <i245*> [#uses=1]
+@i246_l = external global i246		; <i246*> [#uses=1]
+@i246_s = external global i246		; <i246*> [#uses=1]
+@i247_l = external global i247		; <i247*> [#uses=1]
+@i247_s = external global i247		; <i247*> [#uses=1]
+@i248_l = external global i248		; <i248*> [#uses=1]
+@i248_s = external global i248		; <i248*> [#uses=1]
+@i249_l = external global i249		; <i249*> [#uses=1]
+@i249_s = external global i249		; <i249*> [#uses=1]
+@i250_l = external global i250		; <i250*> [#uses=1]
+@i250_s = external global i250		; <i250*> [#uses=1]
+@i251_l = external global i251		; <i251*> [#uses=1]
+@i251_s = external global i251		; <i251*> [#uses=1]
+@i252_l = external global i252		; <i252*> [#uses=1]
+@i252_s = external global i252		; <i252*> [#uses=1]
+@i253_l = external global i253		; <i253*> [#uses=1]
+@i253_s = external global i253		; <i253*> [#uses=1]
+@i254_l = external global i254		; <i254*> [#uses=1]
+@i254_s = external global i254		; <i254*> [#uses=1]
+@i255_l = external global i255		; <i255*> [#uses=1]
+@i255_s = external global i255		; <i255*> [#uses=1]
+@i256_l = external global i256		; <i256*> [#uses=1]
+@i256_s = external global i256		; <i256*> [#uses=1]
+
+define void @i1_ls() nounwind  {
+	%tmp = load i1* @i1_l		; <i1> [#uses=1]
+	store i1 %tmp, i1* @i1_s
+	ret void
+}
+
+define void @i2_ls() nounwind  {
+	%tmp = load i2* @i2_l		; <i2> [#uses=1]
+	store i2 %tmp, i2* @i2_s
+	ret void
+}
+
+define void @i3_ls() nounwind  {
+	%tmp = load i3* @i3_l		; <i3> [#uses=1]
+	store i3 %tmp, i3* @i3_s
+	ret void
+}
+
+define void @i4_ls() nounwind  {
+	%tmp = load i4* @i4_l		; <i4> [#uses=1]
+	store i4 %tmp, i4* @i4_s
+	ret void
+}
+
+define void @i5_ls() nounwind  {
+	%tmp = load i5* @i5_l		; <i5> [#uses=1]
+	store i5 %tmp, i5* @i5_s
+	ret void
+}
+
+define void @i6_ls() nounwind  {
+	%tmp = load i6* @i6_l		; <i6> [#uses=1]
+	store i6 %tmp, i6* @i6_s
+	ret void
+}
+
+define void @i7_ls() nounwind  {
+	%tmp = load i7* @i7_l		; <i7> [#uses=1]
+	store i7 %tmp, i7* @i7_s
+	ret void
+}
+
+define void @i8_ls() nounwind  {
+	%tmp = load i8* @i8_l		; <i8> [#uses=1]
+	store i8 %tmp, i8* @i8_s
+	ret void
+}
+
+define void @i9_ls() nounwind  {
+	%tmp = load i9* @i9_l		; <i9> [#uses=1]
+	store i9 %tmp, i9* @i9_s
+	ret void
+}
+
+define void @i10_ls() nounwind  {
+	%tmp = load i10* @i10_l		; <i10> [#uses=1]
+	store i10 %tmp, i10* @i10_s
+	ret void
+}
+
+define void @i11_ls() nounwind  {
+	%tmp = load i11* @i11_l		; <i11> [#uses=1]
+	store i11 %tmp, i11* @i11_s
+	ret void
+}
+
+define void @i12_ls() nounwind  {
+	%tmp = load i12* @i12_l		; <i12> [#uses=1]
+	store i12 %tmp, i12* @i12_s
+	ret void
+}
+
+define void @i13_ls() nounwind  {
+	%tmp = load i13* @i13_l		; <i13> [#uses=1]
+	store i13 %tmp, i13* @i13_s
+	ret void
+}
+
+define void @i14_ls() nounwind  {
+	%tmp = load i14* @i14_l		; <i14> [#uses=1]
+	store i14 %tmp, i14* @i14_s
+	ret void
+}
+
+define void @i15_ls() nounwind  {
+	%tmp = load i15* @i15_l		; <i15> [#uses=1]
+	store i15 %tmp, i15* @i15_s
+	ret void
+}
+
+define void @i16_ls() nounwind  {
+	%tmp = load i16* @i16_l		; <i16> [#uses=1]
+	store i16 %tmp, i16* @i16_s
+	ret void
+}
+
+define void @i17_ls() nounwind  {
+	%tmp = load i17* @i17_l		; <i17> [#uses=1]
+	store i17 %tmp, i17* @i17_s
+	ret void
+}
+
+define void @i18_ls() nounwind  {
+	%tmp = load i18* @i18_l		; <i18> [#uses=1]
+	store i18 %tmp, i18* @i18_s
+	ret void
+}
+
+define void @i19_ls() nounwind  {
+	%tmp = load i19* @i19_l		; <i19> [#uses=1]
+	store i19 %tmp, i19* @i19_s
+	ret void
+}
+
+define void @i20_ls() nounwind  {
+	%tmp = load i20* @i20_l		; <i20> [#uses=1]
+	store i20 %tmp, i20* @i20_s
+	ret void
+}
+
+define void @i21_ls() nounwind  {
+	%tmp = load i21* @i21_l		; <i21> [#uses=1]
+	store i21 %tmp, i21* @i21_s
+	ret void
+}
+
+define void @i22_ls() nounwind  {
+	%tmp = load i22* @i22_l		; <i22> [#uses=1]
+	store i22 %tmp, i22* @i22_s
+	ret void
+}
+
+define void @i23_ls() nounwind  {
+	%tmp = load i23* @i23_l		; <i23> [#uses=1]
+	store i23 %tmp, i23* @i23_s
+	ret void
+}
+
+define void @i24_ls() nounwind  {
+	%tmp = load i24* @i24_l		; <i24> [#uses=1]
+	store i24 %tmp, i24* @i24_s
+	ret void
+}
+
+define void @i25_ls() nounwind  {
+	%tmp = load i25* @i25_l		; <i25> [#uses=1]
+	store i25 %tmp, i25* @i25_s
+	ret void
+}
+
+define void @i26_ls() nounwind  {
+	%tmp = load i26* @i26_l		; <i26> [#uses=1]
+	store i26 %tmp, i26* @i26_s
+	ret void
+}
+
+define void @i27_ls() nounwind  {
+	%tmp = load i27* @i27_l		; <i27> [#uses=1]
+	store i27 %tmp, i27* @i27_s
+	ret void
+}
+
+define void @i28_ls() nounwind  {
+	%tmp = load i28* @i28_l		; <i28> [#uses=1]
+	store i28 %tmp, i28* @i28_s
+	ret void
+}
+
+define void @i29_ls() nounwind  {
+	%tmp = load i29* @i29_l		; <i29> [#uses=1]
+	store i29 %tmp, i29* @i29_s
+	ret void
+}
+
+define void @i30_ls() nounwind  {
+	%tmp = load i30* @i30_l		; <i30> [#uses=1]
+	store i30 %tmp, i30* @i30_s
+	ret void
+}
+
+define void @i31_ls() nounwind  {
+	%tmp = load i31* @i31_l		; <i31> [#uses=1]
+	store i31 %tmp, i31* @i31_s
+	ret void
+}
+
+define void @i32_ls() nounwind  {
+	%tmp = load i32* @i32_l		; <i32> [#uses=1]
+	store i32 %tmp, i32* @i32_s
+	ret void
+}
+
+define void @i33_ls() nounwind  {
+	%tmp = load i33* @i33_l		; <i33> [#uses=1]
+	store i33 %tmp, i33* @i33_s
+	ret void
+}
+
+define void @i34_ls() nounwind  {
+	%tmp = load i34* @i34_l		; <i34> [#uses=1]
+	store i34 %tmp, i34* @i34_s
+	ret void
+}
+
+define void @i35_ls() nounwind  {
+	%tmp = load i35* @i35_l		; <i35> [#uses=1]
+	store i35 %tmp, i35* @i35_s
+	ret void
+}
+
+define void @i36_ls() nounwind  {
+	%tmp = load i36* @i36_l		; <i36> [#uses=1]
+	store i36 %tmp, i36* @i36_s
+	ret void
+}
+
+define void @i37_ls() nounwind  {
+	%tmp = load i37* @i37_l		; <i37> [#uses=1]
+	store i37 %tmp, i37* @i37_s
+	ret void
+}
+
+define void @i38_ls() nounwind  {
+	%tmp = load i38* @i38_l		; <i38> [#uses=1]
+	store i38 %tmp, i38* @i38_s
+	ret void
+}
+
+define void @i39_ls() nounwind  {
+	%tmp = load i39* @i39_l		; <i39> [#uses=1]
+	store i39 %tmp, i39* @i39_s
+	ret void
+}
+
+define void @i40_ls() nounwind  {
+	%tmp = load i40* @i40_l		; <i40> [#uses=1]
+	store i40 %tmp, i40* @i40_s
+	ret void
+}
+
+define void @i41_ls() nounwind  {
+	%tmp = load i41* @i41_l		; <i41> [#uses=1]
+	store i41 %tmp, i41* @i41_s
+	ret void
+}
+
+define void @i42_ls() nounwind  {
+	%tmp = load i42* @i42_l		; <i42> [#uses=1]
+	store i42 %tmp, i42* @i42_s
+	ret void
+}
+
+define void @i43_ls() nounwind  {
+	%tmp = load i43* @i43_l		; <i43> [#uses=1]
+	store i43 %tmp, i43* @i43_s
+	ret void
+}
+
+define void @i44_ls() nounwind  {
+	%tmp = load i44* @i44_l		; <i44> [#uses=1]
+	store i44 %tmp, i44* @i44_s
+	ret void
+}
+
+define void @i45_ls() nounwind  {
+	%tmp = load i45* @i45_l		; <i45> [#uses=1]
+	store i45 %tmp, i45* @i45_s
+	ret void
+}
+
+define void @i46_ls() nounwind  {
+	%tmp = load i46* @i46_l		; <i46> [#uses=1]
+	store i46 %tmp, i46* @i46_s
+	ret void
+}
+
+define void @i47_ls() nounwind  {
+	%tmp = load i47* @i47_l		; <i47> [#uses=1]
+	store i47 %tmp, i47* @i47_s
+	ret void
+}
+
+define void @i48_ls() nounwind  {
+	%tmp = load i48* @i48_l		; <i48> [#uses=1]
+	store i48 %tmp, i48* @i48_s
+	ret void
+}
+
+define void @i49_ls() nounwind  {
+	%tmp = load i49* @i49_l		; <i49> [#uses=1]
+	store i49 %tmp, i49* @i49_s
+	ret void
+}
+
+define void @i50_ls() nounwind  {
+	%tmp = load i50* @i50_l		; <i50> [#uses=1]
+	store i50 %tmp, i50* @i50_s
+	ret void
+}
+
+define void @i51_ls() nounwind  {
+	%tmp = load i51* @i51_l		; <i51> [#uses=1]
+	store i51 %tmp, i51* @i51_s
+	ret void
+}
+
+define void @i52_ls() nounwind  {
+	%tmp = load i52* @i52_l		; <i52> [#uses=1]
+	store i52 %tmp, i52* @i52_s
+	ret void
+}
+
+define void @i53_ls() nounwind  {
+	%tmp = load i53* @i53_l		; <i53> [#uses=1]
+	store i53 %tmp, i53* @i53_s
+	ret void
+}
+
+define void @i54_ls() nounwind  {
+	%tmp = load i54* @i54_l		; <i54> [#uses=1]
+	store i54 %tmp, i54* @i54_s
+	ret void
+}
+
+define void @i55_ls() nounwind  {
+	%tmp = load i55* @i55_l		; <i55> [#uses=1]
+	store i55 %tmp, i55* @i55_s
+	ret void
+}
+
+define void @i56_ls() nounwind  {
+	%tmp = load i56* @i56_l		; <i56> [#uses=1]
+	store i56 %tmp, i56* @i56_s
+	ret void
+}
+
+define void @i57_ls() nounwind  {
+	%tmp = load i57* @i57_l		; <i57> [#uses=1]
+	store i57 %tmp, i57* @i57_s
+	ret void
+}
+
+define void @i58_ls() nounwind  {
+	%tmp = load i58* @i58_l		; <i58> [#uses=1]
+	store i58 %tmp, i58* @i58_s
+	ret void
+}
+
+define void @i59_ls() nounwind  {
+	%tmp = load i59* @i59_l		; <i59> [#uses=1]
+	store i59 %tmp, i59* @i59_s
+	ret void
+}
+
+define void @i60_ls() nounwind  {
+	%tmp = load i60* @i60_l		; <i60> [#uses=1]
+	store i60 %tmp, i60* @i60_s
+	ret void
+}
+
+define void @i61_ls() nounwind  {
+	%tmp = load i61* @i61_l		; <i61> [#uses=1]
+	store i61 %tmp, i61* @i61_s
+	ret void
+}
+
+define void @i62_ls() nounwind  {
+	%tmp = load i62* @i62_l		; <i62> [#uses=1]
+	store i62 %tmp, i62* @i62_s
+	ret void
+}
+
+define void @i63_ls() nounwind  {
+	%tmp = load i63* @i63_l		; <i63> [#uses=1]
+	store i63 %tmp, i63* @i63_s
+	ret void
+}
+
+define void @i64_ls() nounwind  {
+	%tmp = load i64* @i64_l		; <i64> [#uses=1]
+	store i64 %tmp, i64* @i64_s
+	ret void
+}
+
+define void @i65_ls() nounwind  {
+	%tmp = load i65* @i65_l		; <i65> [#uses=1]
+	store i65 %tmp, i65* @i65_s
+	ret void
+}
+
+define void @i66_ls() nounwind  {
+	%tmp = load i66* @i66_l		; <i66> [#uses=1]
+	store i66 %tmp, i66* @i66_s
+	ret void
+}
+
+define void @i67_ls() nounwind  {
+	%tmp = load i67* @i67_l		; <i67> [#uses=1]
+	store i67 %tmp, i67* @i67_s
+	ret void
+}
+
+define void @i68_ls() nounwind  {
+	%tmp = load i68* @i68_l		; <i68> [#uses=1]
+	store i68 %tmp, i68* @i68_s
+	ret void
+}
+
+define void @i69_ls() nounwind  {
+	%tmp = load i69* @i69_l		; <i69> [#uses=1]
+	store i69 %tmp, i69* @i69_s
+	ret void
+}
+
+define void @i70_ls() nounwind  {
+	%tmp = load i70* @i70_l		; <i70> [#uses=1]
+	store i70 %tmp, i70* @i70_s
+	ret void
+}
+
+define void @i71_ls() nounwind  {
+	%tmp = load i71* @i71_l		; <i71> [#uses=1]
+	store i71 %tmp, i71* @i71_s
+	ret void
+}
+
+define void @i72_ls() nounwind  {
+	%tmp = load i72* @i72_l		; <i72> [#uses=1]
+	store i72 %tmp, i72* @i72_s
+	ret void
+}
+
+define void @i73_ls() nounwind  {
+	%tmp = load i73* @i73_l		; <i73> [#uses=1]
+	store i73 %tmp, i73* @i73_s
+	ret void
+}
+
+define void @i74_ls() nounwind  {
+	%tmp = load i74* @i74_l		; <i74> [#uses=1]
+	store i74 %tmp, i74* @i74_s
+	ret void
+}
+
+define void @i75_ls() nounwind  {
+	%tmp = load i75* @i75_l		; <i75> [#uses=1]
+	store i75 %tmp, i75* @i75_s
+	ret void
+}
+
+define void @i76_ls() nounwind  {
+	%tmp = load i76* @i76_l		; <i76> [#uses=1]
+	store i76 %tmp, i76* @i76_s
+	ret void
+}
+
+define void @i77_ls() nounwind  {
+	%tmp = load i77* @i77_l		; <i77> [#uses=1]
+	store i77 %tmp, i77* @i77_s
+	ret void
+}
+
+define void @i78_ls() nounwind  {
+	%tmp = load i78* @i78_l		; <i78> [#uses=1]
+	store i78 %tmp, i78* @i78_s
+	ret void
+}
+
+define void @i79_ls() nounwind  {
+	%tmp = load i79* @i79_l		; <i79> [#uses=1]
+	store i79 %tmp, i79* @i79_s
+	ret void
+}
+
+define void @i80_ls() nounwind  {
+	%tmp = load i80* @i80_l		; <i80> [#uses=1]
+	store i80 %tmp, i80* @i80_s
+	ret void
+}
+
+define void @i81_ls() nounwind  {
+	%tmp = load i81* @i81_l		; <i81> [#uses=1]
+	store i81 %tmp, i81* @i81_s
+	ret void
+}
+
+define void @i82_ls() nounwind  {
+	%tmp = load i82* @i82_l		; <i82> [#uses=1]
+	store i82 %tmp, i82* @i82_s
+	ret void
+}
+
+define void @i83_ls() nounwind  {
+	%tmp = load i83* @i83_l		; <i83> [#uses=1]
+	store i83 %tmp, i83* @i83_s
+	ret void
+}
+
+define void @i84_ls() nounwind  {
+	%tmp = load i84* @i84_l		; <i84> [#uses=1]
+	store i84 %tmp, i84* @i84_s
+	ret void
+}
+
+define void @i85_ls() nounwind  {
+	%tmp = load i85* @i85_l		; <i85> [#uses=1]
+	store i85 %tmp, i85* @i85_s
+	ret void
+}
+
+define void @i86_ls() nounwind  {
+	%tmp = load i86* @i86_l		; <i86> [#uses=1]
+	store i86 %tmp, i86* @i86_s
+	ret void
+}
+
+define void @i87_ls() nounwind  {
+	%tmp = load i87* @i87_l		; <i87> [#uses=1]
+	store i87 %tmp, i87* @i87_s
+	ret void
+}
+
+define void @i88_ls() nounwind  {
+	%tmp = load i88* @i88_l		; <i88> [#uses=1]
+	store i88 %tmp, i88* @i88_s
+	ret void
+}
+
+define void @i89_ls() nounwind  {
+	%tmp = load i89* @i89_l		; <i89> [#uses=1]
+	store i89 %tmp, i89* @i89_s
+	ret void
+}
+
+define void @i90_ls() nounwind  {
+	%tmp = load i90* @i90_l		; <i90> [#uses=1]
+	store i90 %tmp, i90* @i90_s
+	ret void
+}
+
+define void @i91_ls() nounwind  {
+	%tmp = load i91* @i91_l		; <i91> [#uses=1]
+	store i91 %tmp, i91* @i91_s
+	ret void
+}
+
+define void @i92_ls() nounwind  {
+	%tmp = load i92* @i92_l		; <i92> [#uses=1]
+	store i92 %tmp, i92* @i92_s
+	ret void
+}
+
+define void @i93_ls() nounwind  {
+	%tmp = load i93* @i93_l		; <i93> [#uses=1]
+	store i93 %tmp, i93* @i93_s
+	ret void
+}
+
+define void @i94_ls() nounwind  {
+	%tmp = load i94* @i94_l		; <i94> [#uses=1]
+	store i94 %tmp, i94* @i94_s
+	ret void
+}
+
+define void @i95_ls() nounwind  {
+	%tmp = load i95* @i95_l		; <i95> [#uses=1]
+	store i95 %tmp, i95* @i95_s
+	ret void
+}
+
+define void @i96_ls() nounwind  {
+	%tmp = load i96* @i96_l		; <i96> [#uses=1]
+	store i96 %tmp, i96* @i96_s
+	ret void
+}
+
+define void @i97_ls() nounwind  {
+	%tmp = load i97* @i97_l		; <i97> [#uses=1]
+	store i97 %tmp, i97* @i97_s
+	ret void
+}
+
+define void @i98_ls() nounwind  {
+	%tmp = load i98* @i98_l		; <i98> [#uses=1]
+	store i98 %tmp, i98* @i98_s
+	ret void
+}
+
+define void @i99_ls() nounwind  {
+	%tmp = load i99* @i99_l		; <i99> [#uses=1]
+	store i99 %tmp, i99* @i99_s
+	ret void
+}
+
+define void @i100_ls() nounwind  {
+	%tmp = load i100* @i100_l		; <i100> [#uses=1]
+	store i100 %tmp, i100* @i100_s
+	ret void
+}
+
+define void @i101_ls() nounwind  {
+	%tmp = load i101* @i101_l		; <i101> [#uses=1]
+	store i101 %tmp, i101* @i101_s
+	ret void
+}
+
+define void @i102_ls() nounwind  {
+	%tmp = load i102* @i102_l		; <i102> [#uses=1]
+	store i102 %tmp, i102* @i102_s
+	ret void
+}
+
+define void @i103_ls() nounwind  {
+	%tmp = load i103* @i103_l		; <i103> [#uses=1]
+	store i103 %tmp, i103* @i103_s
+	ret void
+}
+
+define void @i104_ls() nounwind  {
+	%tmp = load i104* @i104_l		; <i104> [#uses=1]
+	store i104 %tmp, i104* @i104_s
+	ret void
+}
+
+define void @i105_ls() nounwind  {
+	%tmp = load i105* @i105_l		; <i105> [#uses=1]
+	store i105 %tmp, i105* @i105_s
+	ret void
+}
+
+define void @i106_ls() nounwind  {
+	%tmp = load i106* @i106_l		; <i106> [#uses=1]
+	store i106 %tmp, i106* @i106_s
+	ret void
+}
+
+define void @i107_ls() nounwind  {
+	%tmp = load i107* @i107_l		; <i107> [#uses=1]
+	store i107 %tmp, i107* @i107_s
+	ret void
+}
+
+define void @i108_ls() nounwind  {
+	%tmp = load i108* @i108_l		; <i108> [#uses=1]
+	store i108 %tmp, i108* @i108_s
+	ret void
+}
+
+define void @i109_ls() nounwind  {
+	%tmp = load i109* @i109_l		; <i109> [#uses=1]
+	store i109 %tmp, i109* @i109_s
+	ret void
+}
+
+define void @i110_ls() nounwind  {
+	%tmp = load i110* @i110_l		; <i110> [#uses=1]
+	store i110 %tmp, i110* @i110_s
+	ret void
+}
+
+define void @i111_ls() nounwind  {
+	%tmp = load i111* @i111_l		; <i111> [#uses=1]
+	store i111 %tmp, i111* @i111_s
+	ret void
+}
+
+define void @i112_ls() nounwind  {
+	%tmp = load i112* @i112_l		; <i112> [#uses=1]
+	store i112 %tmp, i112* @i112_s
+	ret void
+}
+
+define void @i113_ls() nounwind  {
+	%tmp = load i113* @i113_l		; <i113> [#uses=1]
+	store i113 %tmp, i113* @i113_s
+	ret void
+}
+
+define void @i114_ls() nounwind  {
+	%tmp = load i114* @i114_l		; <i114> [#uses=1]
+	store i114 %tmp, i114* @i114_s
+	ret void
+}
+
+define void @i115_ls() nounwind  {
+	%tmp = load i115* @i115_l		; <i115> [#uses=1]
+	store i115 %tmp, i115* @i115_s
+	ret void
+}
+
+define void @i116_ls() nounwind  {
+	%tmp = load i116* @i116_l		; <i116> [#uses=1]
+	store i116 %tmp, i116* @i116_s
+	ret void
+}
+
+define void @i117_ls() nounwind  {
+	%tmp = load i117* @i117_l		; <i117> [#uses=1]
+	store i117 %tmp, i117* @i117_s
+	ret void
+}
+
+define void @i118_ls() nounwind  {
+	%tmp = load i118* @i118_l		; <i118> [#uses=1]
+	store i118 %tmp, i118* @i118_s
+	ret void
+}
+
+define void @i119_ls() nounwind  {
+	%tmp = load i119* @i119_l		; <i119> [#uses=1]
+	store i119 %tmp, i119* @i119_s
+	ret void
+}
+
+define void @i120_ls() nounwind  {
+	%tmp = load i120* @i120_l		; <i120> [#uses=1]
+	store i120 %tmp, i120* @i120_s
+	ret void
+}
+
+define void @i121_ls() nounwind  {
+	%tmp = load i121* @i121_l		; <i121> [#uses=1]
+	store i121 %tmp, i121* @i121_s
+	ret void
+}
+
+define void @i122_ls() nounwind  {
+	%tmp = load i122* @i122_l		; <i122> [#uses=1]
+	store i122 %tmp, i122* @i122_s
+	ret void
+}
+
+define void @i123_ls() nounwind  {
+	%tmp = load i123* @i123_l		; <i123> [#uses=1]
+	store i123 %tmp, i123* @i123_s
+	ret void
+}
+
+define void @i124_ls() nounwind  {
+	%tmp = load i124* @i124_l		; <i124> [#uses=1]
+	store i124 %tmp, i124* @i124_s
+	ret void
+}
+
+define void @i125_ls() nounwind  {
+	%tmp = load i125* @i125_l		; <i125> [#uses=1]
+	store i125 %tmp, i125* @i125_s
+	ret void
+}
+
+define void @i126_ls() nounwind  {
+	%tmp = load i126* @i126_l		; <i126> [#uses=1]
+	store i126 %tmp, i126* @i126_s
+	ret void
+}
+
+define void @i127_ls() nounwind  {
+	%tmp = load i127* @i127_l		; <i127> [#uses=1]
+	store i127 %tmp, i127* @i127_s
+	ret void
+}
+
+define void @i128_ls() nounwind  {
+	%tmp = load i128* @i128_l		; <i128> [#uses=1]
+	store i128 %tmp, i128* @i128_s
+	ret void
+}
+
+define void @i129_ls() nounwind  {
+	%tmp = load i129* @i129_l		; <i129> [#uses=1]
+	store i129 %tmp, i129* @i129_s
+	ret void
+}
+
+define void @i130_ls() nounwind  {
+	%tmp = load i130* @i130_l		; <i130> [#uses=1]
+	store i130 %tmp, i130* @i130_s
+	ret void
+}
+
+define void @i131_ls() nounwind  {
+	%tmp = load i131* @i131_l		; <i131> [#uses=1]
+	store i131 %tmp, i131* @i131_s
+	ret void
+}
+
+define void @i132_ls() nounwind  {
+	%tmp = load i132* @i132_l		; <i132> [#uses=1]
+	store i132 %tmp, i132* @i132_s
+	ret void
+}
+
+define void @i133_ls() nounwind  {
+	%tmp = load i133* @i133_l		; <i133> [#uses=1]
+	store i133 %tmp, i133* @i133_s
+	ret void
+}
+
+define void @i134_ls() nounwind  {
+	%tmp = load i134* @i134_l		; <i134> [#uses=1]
+	store i134 %tmp, i134* @i134_s
+	ret void
+}
+
+define void @i135_ls() nounwind  {
+	%tmp = load i135* @i135_l		; <i135> [#uses=1]
+	store i135 %tmp, i135* @i135_s
+	ret void
+}
+
+define void @i136_ls() nounwind  {
+	%tmp = load i136* @i136_l		; <i136> [#uses=1]
+	store i136 %tmp, i136* @i136_s
+	ret void
+}
+
+define void @i137_ls() nounwind  {
+	%tmp = load i137* @i137_l		; <i137> [#uses=1]
+	store i137 %tmp, i137* @i137_s
+	ret void
+}
+
+define void @i138_ls() nounwind  {
+	%tmp = load i138* @i138_l		; <i138> [#uses=1]
+	store i138 %tmp, i138* @i138_s
+	ret void
+}
+
+define void @i139_ls() nounwind  {
+	%tmp = load i139* @i139_l		; <i139> [#uses=1]
+	store i139 %tmp, i139* @i139_s
+	ret void
+}
+
+define void @i140_ls() nounwind  {
+	%tmp = load i140* @i140_l		; <i140> [#uses=1]
+	store i140 %tmp, i140* @i140_s
+	ret void
+}
+
+define void @i141_ls() nounwind  {
+	%tmp = load i141* @i141_l		; <i141> [#uses=1]
+	store i141 %tmp, i141* @i141_s
+	ret void
+}
+
+define void @i142_ls() nounwind  {
+	%tmp = load i142* @i142_l		; <i142> [#uses=1]
+	store i142 %tmp, i142* @i142_s
+	ret void
+}
+
+define void @i143_ls() nounwind  {
+	%tmp = load i143* @i143_l		; <i143> [#uses=1]
+	store i143 %tmp, i143* @i143_s
+	ret void
+}
+
+define void @i144_ls() nounwind  {
+	%tmp = load i144* @i144_l		; <i144> [#uses=1]
+	store i144 %tmp, i144* @i144_s
+	ret void
+}
+
+define void @i145_ls() nounwind  {
+	%tmp = load i145* @i145_l		; <i145> [#uses=1]
+	store i145 %tmp, i145* @i145_s
+	ret void
+}
+
+define void @i146_ls() nounwind  {
+	%tmp = load i146* @i146_l		; <i146> [#uses=1]
+	store i146 %tmp, i146* @i146_s
+	ret void
+}
+
+define void @i147_ls() nounwind  {
+	%tmp = load i147* @i147_l		; <i147> [#uses=1]
+	store i147 %tmp, i147* @i147_s
+	ret void
+}
+
+define void @i148_ls() nounwind  {
+	%tmp = load i148* @i148_l		; <i148> [#uses=1]
+	store i148 %tmp, i148* @i148_s
+	ret void
+}
+
+define void @i149_ls() nounwind  {
+	%tmp = load i149* @i149_l		; <i149> [#uses=1]
+	store i149 %tmp, i149* @i149_s
+	ret void
+}
+
+define void @i150_ls() nounwind  {
+	%tmp = load i150* @i150_l		; <i150> [#uses=1]
+	store i150 %tmp, i150* @i150_s
+	ret void
+}
+
+define void @i151_ls() nounwind  {
+	%tmp = load i151* @i151_l		; <i151> [#uses=1]
+	store i151 %tmp, i151* @i151_s
+	ret void
+}
+
+define void @i152_ls() nounwind  {
+	%tmp = load i152* @i152_l		; <i152> [#uses=1]
+	store i152 %tmp, i152* @i152_s
+	ret void
+}
+
+define void @i153_ls() nounwind  {
+	%tmp = load i153* @i153_l		; <i153> [#uses=1]
+	store i153 %tmp, i153* @i153_s
+	ret void
+}
+
+define void @i154_ls() nounwind  {
+	%tmp = load i154* @i154_l		; <i154> [#uses=1]
+	store i154 %tmp, i154* @i154_s
+	ret void
+}
+
+define void @i155_ls() nounwind  {
+	%tmp = load i155* @i155_l		; <i155> [#uses=1]
+	store i155 %tmp, i155* @i155_s
+	ret void
+}
+
+define void @i156_ls() nounwind  {
+	%tmp = load i156* @i156_l		; <i156> [#uses=1]
+	store i156 %tmp, i156* @i156_s
+	ret void
+}
+
+define void @i157_ls() nounwind  {
+	%tmp = load i157* @i157_l		; <i157> [#uses=1]
+	store i157 %tmp, i157* @i157_s
+	ret void
+}
+
+define void @i158_ls() nounwind  {
+	%tmp = load i158* @i158_l		; <i158> [#uses=1]
+	store i158 %tmp, i158* @i158_s
+	ret void
+}
+
+define void @i159_ls() nounwind  {
+	%tmp = load i159* @i159_l		; <i159> [#uses=1]
+	store i159 %tmp, i159* @i159_s
+	ret void
+}
+
+define void @i160_ls() nounwind  {
+	%tmp = load i160* @i160_l		; <i160> [#uses=1]
+	store i160 %tmp, i160* @i160_s
+	ret void
+}
+
+define void @i161_ls() nounwind  {
+	%tmp = load i161* @i161_l		; <i161> [#uses=1]
+	store i161 %tmp, i161* @i161_s
+	ret void
+}
+
+define void @i162_ls() nounwind  {
+	%tmp = load i162* @i162_l		; <i162> [#uses=1]
+	store i162 %tmp, i162* @i162_s
+	ret void
+}
+
+define void @i163_ls() nounwind  {
+	%tmp = load i163* @i163_l		; <i163> [#uses=1]
+	store i163 %tmp, i163* @i163_s
+	ret void
+}
+
+define void @i164_ls() nounwind  {
+	%tmp = load i164* @i164_l		; <i164> [#uses=1]
+	store i164 %tmp, i164* @i164_s
+	ret void
+}
+
+define void @i165_ls() nounwind  {
+	%tmp = load i165* @i165_l		; <i165> [#uses=1]
+	store i165 %tmp, i165* @i165_s
+	ret void
+}
+
+define void @i166_ls() nounwind  {
+	%tmp = load i166* @i166_l		; <i166> [#uses=1]
+	store i166 %tmp, i166* @i166_s
+	ret void
+}
+
+define void @i167_ls() nounwind  {
+	%tmp = load i167* @i167_l		; <i167> [#uses=1]
+	store i167 %tmp, i167* @i167_s
+	ret void
+}
+
+define void @i168_ls() nounwind  {
+	%tmp = load i168* @i168_l		; <i168> [#uses=1]
+	store i168 %tmp, i168* @i168_s
+	ret void
+}
+
+define void @i169_ls() nounwind  {
+	%tmp = load i169* @i169_l		; <i169> [#uses=1]
+	store i169 %tmp, i169* @i169_s
+	ret void
+}
+
+define void @i170_ls() nounwind  {
+	%tmp = load i170* @i170_l		; <i170> [#uses=1]
+	store i170 %tmp, i170* @i170_s
+	ret void
+}
+
+define void @i171_ls() nounwind  {
+	%tmp = load i171* @i171_l		; <i171> [#uses=1]
+	store i171 %tmp, i171* @i171_s
+	ret void
+}
+
+define void @i172_ls() nounwind  {
+	%tmp = load i172* @i172_l		; <i172> [#uses=1]
+	store i172 %tmp, i172* @i172_s
+	ret void
+}
+
+define void @i173_ls() nounwind  {
+	%tmp = load i173* @i173_l		; <i173> [#uses=1]
+	store i173 %tmp, i173* @i173_s
+	ret void
+}
+
+define void @i174_ls() nounwind  {
+	%tmp = load i174* @i174_l		; <i174> [#uses=1]
+	store i174 %tmp, i174* @i174_s
+	ret void
+}
+
+define void @i175_ls() nounwind  {
+	%tmp = load i175* @i175_l		; <i175> [#uses=1]
+	store i175 %tmp, i175* @i175_s
+	ret void
+}
+
+define void @i176_ls() nounwind  {
+	%tmp = load i176* @i176_l		; <i176> [#uses=1]
+	store i176 %tmp, i176* @i176_s
+	ret void
+}
+
+define void @i177_ls() nounwind  {
+	%tmp = load i177* @i177_l		; <i177> [#uses=1]
+	store i177 %tmp, i177* @i177_s
+	ret void
+}
+
+define void @i178_ls() nounwind  {
+	%tmp = load i178* @i178_l		; <i178> [#uses=1]
+	store i178 %tmp, i178* @i178_s
+	ret void
+}
+
+define void @i179_ls() nounwind  {
+	%tmp = load i179* @i179_l		; <i179> [#uses=1]
+	store i179 %tmp, i179* @i179_s
+	ret void
+}
+
+define void @i180_ls() nounwind  {
+	%tmp = load i180* @i180_l		; <i180> [#uses=1]
+	store i180 %tmp, i180* @i180_s
+	ret void
+}
+
+define void @i181_ls() nounwind  {
+	%tmp = load i181* @i181_l		; <i181> [#uses=1]
+	store i181 %tmp, i181* @i181_s
+	ret void
+}
+
+define void @i182_ls() nounwind  {
+	%tmp = load i182* @i182_l		; <i182> [#uses=1]
+	store i182 %tmp, i182* @i182_s
+	ret void
+}
+
+define void @i183_ls() nounwind  {
+	%tmp = load i183* @i183_l		; <i183> [#uses=1]
+	store i183 %tmp, i183* @i183_s
+	ret void
+}
+
+define void @i184_ls() nounwind  {
+	%tmp = load i184* @i184_l		; <i184> [#uses=1]
+	store i184 %tmp, i184* @i184_s
+	ret void
+}
+
+define void @i185_ls() nounwind  {
+	%tmp = load i185* @i185_l		; <i185> [#uses=1]
+	store i185 %tmp, i185* @i185_s
+	ret void
+}
+
+define void @i186_ls() nounwind  {
+	%tmp = load i186* @i186_l		; <i186> [#uses=1]
+	store i186 %tmp, i186* @i186_s
+	ret void
+}
+
+define void @i187_ls() nounwind  {
+	%tmp = load i187* @i187_l		; <i187> [#uses=1]
+	store i187 %tmp, i187* @i187_s
+	ret void
+}
+
+define void @i188_ls() nounwind  {
+	%tmp = load i188* @i188_l		; <i188> [#uses=1]
+	store i188 %tmp, i188* @i188_s
+	ret void
+}
+
+define void @i189_ls() nounwind  {
+	%tmp = load i189* @i189_l		; <i189> [#uses=1]
+	store i189 %tmp, i189* @i189_s
+	ret void
+}
+
+define void @i190_ls() nounwind  {
+	%tmp = load i190* @i190_l		; <i190> [#uses=1]
+	store i190 %tmp, i190* @i190_s
+	ret void
+}
+
+define void @i191_ls() nounwind  {
+	%tmp = load i191* @i191_l		; <i191> [#uses=1]
+	store i191 %tmp, i191* @i191_s
+	ret void
+}
+
+define void @i192_ls() nounwind  {
+	%tmp = load i192* @i192_l		; <i192> [#uses=1]
+	store i192 %tmp, i192* @i192_s
+	ret void
+}
+
+define void @i193_ls() nounwind  {
+	%tmp = load i193* @i193_l		; <i193> [#uses=1]
+	store i193 %tmp, i193* @i193_s
+	ret void
+}
+
+define void @i194_ls() nounwind  {
+	%tmp = load i194* @i194_l		; <i194> [#uses=1]
+	store i194 %tmp, i194* @i194_s
+	ret void
+}
+
+define void @i195_ls() nounwind  {
+	%tmp = load i195* @i195_l		; <i195> [#uses=1]
+	store i195 %tmp, i195* @i195_s
+	ret void
+}
+
+define void @i196_ls() nounwind  {
+	%tmp = load i196* @i196_l		; <i196> [#uses=1]
+	store i196 %tmp, i196* @i196_s
+	ret void
+}
+
+define void @i197_ls() nounwind  {
+	%tmp = load i197* @i197_l		; <i197> [#uses=1]
+	store i197 %tmp, i197* @i197_s
+	ret void
+}
+
+define void @i198_ls() nounwind  {
+	%tmp = load i198* @i198_l		; <i198> [#uses=1]
+	store i198 %tmp, i198* @i198_s
+	ret void
+}
+
+define void @i199_ls() nounwind  {
+	%tmp = load i199* @i199_l		; <i199> [#uses=1]
+	store i199 %tmp, i199* @i199_s
+	ret void
+}
+
+define void @i200_ls() nounwind  {
+	%tmp = load i200* @i200_l		; <i200> [#uses=1]
+	store i200 %tmp, i200* @i200_s
+	ret void
+}
+
+define void @i201_ls() nounwind  {
+	%tmp = load i201* @i201_l		; <i201> [#uses=1]
+	store i201 %tmp, i201* @i201_s
+	ret void
+}
+
+define void @i202_ls() nounwind  {
+	%tmp = load i202* @i202_l		; <i202> [#uses=1]
+	store i202 %tmp, i202* @i202_s
+	ret void
+}
+
+define void @i203_ls() nounwind  {
+	%tmp = load i203* @i203_l		; <i203> [#uses=1]
+	store i203 %tmp, i203* @i203_s
+	ret void
+}
+
+define void @i204_ls() nounwind  {
+	%tmp = load i204* @i204_l		; <i204> [#uses=1]
+	store i204 %tmp, i204* @i204_s
+	ret void
+}
+
+define void @i205_ls() nounwind  {
+	%tmp = load i205* @i205_l		; <i205> [#uses=1]
+	store i205 %tmp, i205* @i205_s
+	ret void
+}
+
+define void @i206_ls() nounwind  {
+	%tmp = load i206* @i206_l		; <i206> [#uses=1]
+	store i206 %tmp, i206* @i206_s
+	ret void
+}
+
+define void @i207_ls() nounwind  {
+	%tmp = load i207* @i207_l		; <i207> [#uses=1]
+	store i207 %tmp, i207* @i207_s
+	ret void
+}
+
+define void @i208_ls() nounwind  {
+	%tmp = load i208* @i208_l		; <i208> [#uses=1]
+	store i208 %tmp, i208* @i208_s
+	ret void
+}
+
+define void @i209_ls() nounwind  {
+	%tmp = load i209* @i209_l		; <i209> [#uses=1]
+	store i209 %tmp, i209* @i209_s
+	ret void
+}
+
+define void @i210_ls() nounwind  {
+	%tmp = load i210* @i210_l		; <i210> [#uses=1]
+	store i210 %tmp, i210* @i210_s
+	ret void
+}
+
+define void @i211_ls() nounwind  {
+	%tmp = load i211* @i211_l		; <i211> [#uses=1]
+	store i211 %tmp, i211* @i211_s
+	ret void
+}
+
+define void @i212_ls() nounwind  {
+	%tmp = load i212* @i212_l		; <i212> [#uses=1]
+	store i212 %tmp, i212* @i212_s
+	ret void
+}
+
+define void @i213_ls() nounwind  {
+	%tmp = load i213* @i213_l		; <i213> [#uses=1]
+	store i213 %tmp, i213* @i213_s
+	ret void
+}
+
+define void @i214_ls() nounwind  {
+	%tmp = load i214* @i214_l		; <i214> [#uses=1]
+	store i214 %tmp, i214* @i214_s
+	ret void
+}
+
+define void @i215_ls() nounwind  {
+	%tmp = load i215* @i215_l		; <i215> [#uses=1]
+	store i215 %tmp, i215* @i215_s
+	ret void
+}
+
+define void @i216_ls() nounwind  {
+	%tmp = load i216* @i216_l		; <i216> [#uses=1]
+	store i216 %tmp, i216* @i216_s
+	ret void
+}
+
+define void @i217_ls() nounwind  {
+	%tmp = load i217* @i217_l		; <i217> [#uses=1]
+	store i217 %tmp, i217* @i217_s
+	ret void
+}
+
+define void @i218_ls() nounwind  {
+	%tmp = load i218* @i218_l		; <i218> [#uses=1]
+	store i218 %tmp, i218* @i218_s
+	ret void
+}
+
+define void @i219_ls() nounwind  {
+	%tmp = load i219* @i219_l		; <i219> [#uses=1]
+	store i219 %tmp, i219* @i219_s
+	ret void
+}
+
+define void @i220_ls() nounwind  {
+	%tmp = load i220* @i220_l		; <i220> [#uses=1]
+	store i220 %tmp, i220* @i220_s
+	ret void
+}
+
+define void @i221_ls() nounwind  {
+	%tmp = load i221* @i221_l		; <i221> [#uses=1]
+	store i221 %tmp, i221* @i221_s
+	ret void
+}
+
+define void @i222_ls() nounwind  {
+	%tmp = load i222* @i222_l		; <i222> [#uses=1]
+	store i222 %tmp, i222* @i222_s
+	ret void
+}
+
+define void @i223_ls() nounwind  {
+	%tmp = load i223* @i223_l		; <i223> [#uses=1]
+	store i223 %tmp, i223* @i223_s
+	ret void
+}
+
+define void @i224_ls() nounwind  {
+	%tmp = load i224* @i224_l		; <i224> [#uses=1]
+	store i224 %tmp, i224* @i224_s
+	ret void
+}
+
+define void @i225_ls() nounwind  {
+	%tmp = load i225* @i225_l		; <i225> [#uses=1]
+	store i225 %tmp, i225* @i225_s
+	ret void
+}
+
+define void @i226_ls() nounwind  {
+	%tmp = load i226* @i226_l		; <i226> [#uses=1]
+	store i226 %tmp, i226* @i226_s
+	ret void
+}
+
+define void @i227_ls() nounwind  {
+	%tmp = load i227* @i227_l		; <i227> [#uses=1]
+	store i227 %tmp, i227* @i227_s
+	ret void
+}
+
+define void @i228_ls() nounwind  {
+	%tmp = load i228* @i228_l		; <i228> [#uses=1]
+	store i228 %tmp, i228* @i228_s
+	ret void
+}
+
+define void @i229_ls() nounwind  {
+	%tmp = load i229* @i229_l		; <i229> [#uses=1]
+	store i229 %tmp, i229* @i229_s
+	ret void
+}
+
+define void @i230_ls() nounwind  {
+	%tmp = load i230* @i230_l		; <i230> [#uses=1]
+	store i230 %tmp, i230* @i230_s
+	ret void
+}
+
+define void @i231_ls() nounwind  {
+	%tmp = load i231* @i231_l		; <i231> [#uses=1]
+	store i231 %tmp, i231* @i231_s
+	ret void
+}
+
+define void @i232_ls() nounwind  {
+	%tmp = load i232* @i232_l		; <i232> [#uses=1]
+	store i232 %tmp, i232* @i232_s
+	ret void
+}
+
+define void @i233_ls() nounwind  {
+	%tmp = load i233* @i233_l		; <i233> [#uses=1]
+	store i233 %tmp, i233* @i233_s
+	ret void
+}
+
+define void @i234_ls() nounwind  {
+	%tmp = load i234* @i234_l		; <i234> [#uses=1]
+	store i234 %tmp, i234* @i234_s
+	ret void
+}
+
+define void @i235_ls() nounwind  {
+	%tmp = load i235* @i235_l		; <i235> [#uses=1]
+	store i235 %tmp, i235* @i235_s
+	ret void
+}
+
+define void @i236_ls() nounwind  {
+	%tmp = load i236* @i236_l		; <i236> [#uses=1]
+	store i236 %tmp, i236* @i236_s
+	ret void
+}
+
+define void @i237_ls() nounwind  {
+	%tmp = load i237* @i237_l		; <i237> [#uses=1]
+	store i237 %tmp, i237* @i237_s
+	ret void
+}
+
+define void @i238_ls() nounwind  {
+	%tmp = load i238* @i238_l		; <i238> [#uses=1]
+	store i238 %tmp, i238* @i238_s
+	ret void
+}
+
+define void @i239_ls() nounwind  {
+	%tmp = load i239* @i239_l		; <i239> [#uses=1]
+	store i239 %tmp, i239* @i239_s
+	ret void
+}
+
+define void @i240_ls() nounwind  {
+	%tmp = load i240* @i240_l		; <i240> [#uses=1]
+	store i240 %tmp, i240* @i240_s
+	ret void
+}
+
+define void @i241_ls() nounwind  {
+	%tmp = load i241* @i241_l		; <i241> [#uses=1]
+	store i241 %tmp, i241* @i241_s
+	ret void
+}
+
+define void @i242_ls() nounwind  {
+	%tmp = load i242* @i242_l		; <i242> [#uses=1]
+	store i242 %tmp, i242* @i242_s
+	ret void
+}
+
+define void @i243_ls() nounwind  {
+	%tmp = load i243* @i243_l		; <i243> [#uses=1]
+	store i243 %tmp, i243* @i243_s
+	ret void
+}
+
+define void @i244_ls() nounwind  {
+	%tmp = load i244* @i244_l		; <i244> [#uses=1]
+	store i244 %tmp, i244* @i244_s
+	ret void
+}
+
+define void @i245_ls() nounwind  {
+	%tmp = load i245* @i245_l		; <i245> [#uses=1]
+	store i245 %tmp, i245* @i245_s
+	ret void
+}
+
+define void @i246_ls() nounwind  {
+	%tmp = load i246* @i246_l		; <i246> [#uses=1]
+	store i246 %tmp, i246* @i246_s
+	ret void
+}
+
+define void @i247_ls() nounwind  {
+	%tmp = load i247* @i247_l		; <i247> [#uses=1]
+	store i247 %tmp, i247* @i247_s
+	ret void
+}
+
+define void @i248_ls() nounwind  {
+	%tmp = load i248* @i248_l		; <i248> [#uses=1]
+	store i248 %tmp, i248* @i248_s
+	ret void
+}
+
+define void @i249_ls() nounwind  {
+	%tmp = load i249* @i249_l		; <i249> [#uses=1]
+	store i249 %tmp, i249* @i249_s
+	ret void
+}
+
+define void @i250_ls() nounwind  {
+	%tmp = load i250* @i250_l		; <i250> [#uses=1]
+	store i250 %tmp, i250* @i250_s
+	ret void
+}
+
+define void @i251_ls() nounwind  {
+	%tmp = load i251* @i251_l		; <i251> [#uses=1]
+	store i251 %tmp, i251* @i251_s
+	ret void
+}
+
+define void @i252_ls() nounwind  {
+	%tmp = load i252* @i252_l		; <i252> [#uses=1]
+	store i252 %tmp, i252* @i252_s
+	ret void
+}
+
+define void @i253_ls() nounwind  {
+	%tmp = load i253* @i253_l		; <i253> [#uses=1]
+	store i253 %tmp, i253* @i253_s
+	ret void
+}
+
+define void @i254_ls() nounwind  {
+	%tmp = load i254* @i254_l		; <i254> [#uses=1]
+	store i254 %tmp, i254* @i254_s
+	ret void
+}
+
+define void @i255_ls() nounwind  {
+	%tmp = load i255* @i255_l		; <i255> [#uses=1]
+	store i255 %tmp, i255* @i255_s
+	ret void
+}
+
+define void @i256_ls() nounwind  {
+	%tmp = load i256* @i256_l		; <i256> [#uses=1]
+	store i256 %tmp, i256* @i256_s
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/APIntParam.ll b/final/test/CodeGen/Generic/APIntParam.ll
new file mode 100644
index 00000000000..8aa0b494c26
--- /dev/null
+++ b/final/test/CodeGen/Generic/APIntParam.ll
@@ -0,0 +1,1537 @@
+; RUN: llc < %s > %t
+@i1_s = external global i1		; <i1*> [#uses=1]
+@i2_s = external global i2		; <i2*> [#uses=1]
+@i3_s = external global i3		; <i3*> [#uses=1]
+@i4_s = external global i4		; <i4*> [#uses=1]
+@i5_s = external global i5		; <i5*> [#uses=1]
+@i6_s = external global i6		; <i6*> [#uses=1]
+@i7_s = external global i7		; <i7*> [#uses=1]
+@i8_s = external global i8		; <i8*> [#uses=1]
+@i9_s = external global i9		; <i9*> [#uses=1]
+@i10_s = external global i10		; <i10*> [#uses=1]
+@i11_s = external global i11		; <i11*> [#uses=1]
+@i12_s = external global i12		; <i12*> [#uses=1]
+@i13_s = external global i13		; <i13*> [#uses=1]
+@i14_s = external global i14		; <i14*> [#uses=1]
+@i15_s = external global i15		; <i15*> [#uses=1]
+@i16_s = external global i16		; <i16*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+@i18_s = external global i18		; <i18*> [#uses=1]
+@i19_s = external global i19		; <i19*> [#uses=1]
+@i20_s = external global i20		; <i20*> [#uses=1]
+@i21_s = external global i21		; <i21*> [#uses=1]
+@i22_s = external global i22		; <i22*> [#uses=1]
+@i23_s = external global i23		; <i23*> [#uses=1]
+@i24_s = external global i24		; <i24*> [#uses=1]
+@i25_s = external global i25		; <i25*> [#uses=1]
+@i26_s = external global i26		; <i26*> [#uses=1]
+@i27_s = external global i27		; <i27*> [#uses=1]
+@i28_s = external global i28		; <i28*> [#uses=1]
+@i29_s = external global i29		; <i29*> [#uses=1]
+@i30_s = external global i30		; <i30*> [#uses=1]
+@i31_s = external global i31		; <i31*> [#uses=1]
+@i32_s = external global i32		; <i32*> [#uses=1]
+@i33_s = external global i33		; <i33*> [#uses=1]
+@i34_s = external global i34		; <i34*> [#uses=1]
+@i35_s = external global i35		; <i35*> [#uses=1]
+@i36_s = external global i36		; <i36*> [#uses=1]
+@i37_s = external global i37		; <i37*> [#uses=1]
+@i38_s = external global i38		; <i38*> [#uses=1]
+@i39_s = external global i39		; <i39*> [#uses=1]
+@i40_s = external global i40		; <i40*> [#uses=1]
+@i41_s = external global i41		; <i41*> [#uses=1]
+@i42_s = external global i42		; <i42*> [#uses=1]
+@i43_s = external global i43		; <i43*> [#uses=1]
+@i44_s = external global i44		; <i44*> [#uses=1]
+@i45_s = external global i45		; <i45*> [#uses=1]
+@i46_s = external global i46		; <i46*> [#uses=1]
+@i47_s = external global i47		; <i47*> [#uses=1]
+@i48_s = external global i48		; <i48*> [#uses=1]
+@i49_s = external global i49		; <i49*> [#uses=1]
+@i50_s = external global i50		; <i50*> [#uses=1]
+@i51_s = external global i51		; <i51*> [#uses=1]
+@i52_s = external global i52		; <i52*> [#uses=1]
+@i53_s = external global i53		; <i53*> [#uses=1]
+@i54_s = external global i54		; <i54*> [#uses=1]
+@i55_s = external global i55		; <i55*> [#uses=1]
+@i56_s = external global i56		; <i56*> [#uses=1]
+@i57_s = external global i57		; <i57*> [#uses=1]
+@i58_s = external global i58		; <i58*> [#uses=1]
+@i59_s = external global i59		; <i59*> [#uses=1]
+@i60_s = external global i60		; <i60*> [#uses=1]
+@i61_s = external global i61		; <i61*> [#uses=1]
+@i62_s = external global i62		; <i62*> [#uses=1]
+@i63_s = external global i63		; <i63*> [#uses=1]
+@i64_s = external global i64		; <i64*> [#uses=1]
+@i65_s = external global i65		; <i65*> [#uses=1]
+@i66_s = external global i66		; <i66*> [#uses=1]
+@i67_s = external global i67		; <i67*> [#uses=1]
+@i68_s = external global i68		; <i68*> [#uses=1]
+@i69_s = external global i69		; <i69*> [#uses=1]
+@i70_s = external global i70		; <i70*> [#uses=1]
+@i71_s = external global i71		; <i71*> [#uses=1]
+@i72_s = external global i72		; <i72*> [#uses=1]
+@i73_s = external global i73		; <i73*> [#uses=1]
+@i74_s = external global i74		; <i74*> [#uses=1]
+@i75_s = external global i75		; <i75*> [#uses=1]
+@i76_s = external global i76		; <i76*> [#uses=1]
+@i77_s = external global i77		; <i77*> [#uses=1]
+@i78_s = external global i78		; <i78*> [#uses=1]
+@i79_s = external global i79		; <i79*> [#uses=1]
+@i80_s = external global i80		; <i80*> [#uses=1]
+@i81_s = external global i81		; <i81*> [#uses=1]
+@i82_s = external global i82		; <i82*> [#uses=1]
+@i83_s = external global i83		; <i83*> [#uses=1]
+@i84_s = external global i84		; <i84*> [#uses=1]
+@i85_s = external global i85		; <i85*> [#uses=1]
+@i86_s = external global i86		; <i86*> [#uses=1]
+@i87_s = external global i87		; <i87*> [#uses=1]
+@i88_s = external global i88		; <i88*> [#uses=1]
+@i89_s = external global i89		; <i89*> [#uses=1]
+@i90_s = external global i90		; <i90*> [#uses=1]
+@i91_s = external global i91		; <i91*> [#uses=1]
+@i92_s = external global i92		; <i92*> [#uses=1]
+@i93_s = external global i93		; <i93*> [#uses=1]
+@i94_s = external global i94		; <i94*> [#uses=1]
+@i95_s = external global i95		; <i95*> [#uses=1]
+@i96_s = external global i96		; <i96*> [#uses=1]
+@i97_s = external global i97		; <i97*> [#uses=1]
+@i98_s = external global i98		; <i98*> [#uses=1]
+@i99_s = external global i99		; <i99*> [#uses=1]
+@i100_s = external global i100		; <i100*> [#uses=1]
+@i101_s = external global i101		; <i101*> [#uses=1]
+@i102_s = external global i102		; <i102*> [#uses=1]
+@i103_s = external global i103		; <i103*> [#uses=1]
+@i104_s = external global i104		; <i104*> [#uses=1]
+@i105_s = external global i105		; <i105*> [#uses=1]
+@i106_s = external global i106		; <i106*> [#uses=1]
+@i107_s = external global i107		; <i107*> [#uses=1]
+@i108_s = external global i108		; <i108*> [#uses=1]
+@i109_s = external global i109		; <i109*> [#uses=1]
+@i110_s = external global i110		; <i110*> [#uses=1]
+@i111_s = external global i111		; <i111*> [#uses=1]
+@i112_s = external global i112		; <i112*> [#uses=1]
+@i113_s = external global i113		; <i113*> [#uses=1]
+@i114_s = external global i114		; <i114*> [#uses=1]
+@i115_s = external global i115		; <i115*> [#uses=1]
+@i116_s = external global i116		; <i116*> [#uses=1]
+@i117_s = external global i117		; <i117*> [#uses=1]
+@i118_s = external global i118		; <i118*> [#uses=1]
+@i119_s = external global i119		; <i119*> [#uses=1]
+@i120_s = external global i120		; <i120*> [#uses=1]
+@i121_s = external global i121		; <i121*> [#uses=1]
+@i122_s = external global i122		; <i122*> [#uses=1]
+@i123_s = external global i123		; <i123*> [#uses=1]
+@i124_s = external global i124		; <i124*> [#uses=1]
+@i125_s = external global i125		; <i125*> [#uses=1]
+@i126_s = external global i126		; <i126*> [#uses=1]
+@i127_s = external global i127		; <i127*> [#uses=1]
+@i128_s = external global i128		; <i128*> [#uses=1]
+@i129_s = external global i129		; <i129*> [#uses=1]
+@i130_s = external global i130		; <i130*> [#uses=1]
+@i131_s = external global i131		; <i131*> [#uses=1]
+@i132_s = external global i132		; <i132*> [#uses=1]
+@i133_s = external global i133		; <i133*> [#uses=1]
+@i134_s = external global i134		; <i134*> [#uses=1]
+@i135_s = external global i135		; <i135*> [#uses=1]
+@i136_s = external global i136		; <i136*> [#uses=1]
+@i137_s = external global i137		; <i137*> [#uses=1]
+@i138_s = external global i138		; <i138*> [#uses=1]
+@i139_s = external global i139		; <i139*> [#uses=1]
+@i140_s = external global i140		; <i140*> [#uses=1]
+@i141_s = external global i141		; <i141*> [#uses=1]
+@i142_s = external global i142		; <i142*> [#uses=1]
+@i143_s = external global i143		; <i143*> [#uses=1]
+@i144_s = external global i144		; <i144*> [#uses=1]
+@i145_s = external global i145		; <i145*> [#uses=1]
+@i146_s = external global i146		; <i146*> [#uses=1]
+@i147_s = external global i147		; <i147*> [#uses=1]
+@i148_s = external global i148		; <i148*> [#uses=1]
+@i149_s = external global i149		; <i149*> [#uses=1]
+@i150_s = external global i150		; <i150*> [#uses=1]
+@i151_s = external global i151		; <i151*> [#uses=1]
+@i152_s = external global i152		; <i152*> [#uses=1]
+@i153_s = external global i153		; <i153*> [#uses=1]
+@i154_s = external global i154		; <i154*> [#uses=1]
+@i155_s = external global i155		; <i155*> [#uses=1]
+@i156_s = external global i156		; <i156*> [#uses=1]
+@i157_s = external global i157		; <i157*> [#uses=1]
+@i158_s = external global i158		; <i158*> [#uses=1]
+@i159_s = external global i159		; <i159*> [#uses=1]
+@i160_s = external global i160		; <i160*> [#uses=1]
+@i161_s = external global i161		; <i161*> [#uses=1]
+@i162_s = external global i162		; <i162*> [#uses=1]
+@i163_s = external global i163		; <i163*> [#uses=1]
+@i164_s = external global i164		; <i164*> [#uses=1]
+@i165_s = external global i165		; <i165*> [#uses=1]
+@i166_s = external global i166		; <i166*> [#uses=1]
+@i167_s = external global i167		; <i167*> [#uses=1]
+@i168_s = external global i168		; <i168*> [#uses=1]
+@i169_s = external global i169		; <i169*> [#uses=1]
+@i170_s = external global i170		; <i170*> [#uses=1]
+@i171_s = external global i171		; <i171*> [#uses=1]
+@i172_s = external global i172		; <i172*> [#uses=1]
+@i173_s = external global i173		; <i173*> [#uses=1]
+@i174_s = external global i174		; <i174*> [#uses=1]
+@i175_s = external global i175		; <i175*> [#uses=1]
+@i176_s = external global i176		; <i176*> [#uses=1]
+@i177_s = external global i177		; <i177*> [#uses=1]
+@i178_s = external global i178		; <i178*> [#uses=1]
+@i179_s = external global i179		; <i179*> [#uses=1]
+@i180_s = external global i180		; <i180*> [#uses=1]
+@i181_s = external global i181		; <i181*> [#uses=1]
+@i182_s = external global i182		; <i182*> [#uses=1]
+@i183_s = external global i183		; <i183*> [#uses=1]
+@i184_s = external global i184		; <i184*> [#uses=1]
+@i185_s = external global i185		; <i185*> [#uses=1]
+@i186_s = external global i186		; <i186*> [#uses=1]
+@i187_s = external global i187		; <i187*> [#uses=1]
+@i188_s = external global i188		; <i188*> [#uses=1]
+@i189_s = external global i189		; <i189*> [#uses=1]
+@i190_s = external global i190		; <i190*> [#uses=1]
+@i191_s = external global i191		; <i191*> [#uses=1]
+@i192_s = external global i192		; <i192*> [#uses=1]
+@i193_s = external global i193		; <i193*> [#uses=1]
+@i194_s = external global i194		; <i194*> [#uses=1]
+@i195_s = external global i195		; <i195*> [#uses=1]
+@i196_s = external global i196		; <i196*> [#uses=1]
+@i197_s = external global i197		; <i197*> [#uses=1]
+@i198_s = external global i198		; <i198*> [#uses=1]
+@i199_s = external global i199		; <i199*> [#uses=1]
+@i200_s = external global i200		; <i200*> [#uses=1]
+@i201_s = external global i201		; <i201*> [#uses=1]
+@i202_s = external global i202		; <i202*> [#uses=1]
+@i203_s = external global i203		; <i203*> [#uses=1]
+@i204_s = external global i204		; <i204*> [#uses=1]
+@i205_s = external global i205		; <i205*> [#uses=1]
+@i206_s = external global i206		; <i206*> [#uses=1]
+@i207_s = external global i207		; <i207*> [#uses=1]
+@i208_s = external global i208		; <i208*> [#uses=1]
+@i209_s = external global i209		; <i209*> [#uses=1]
+@i210_s = external global i210		; <i210*> [#uses=1]
+@i211_s = external global i211		; <i211*> [#uses=1]
+@i212_s = external global i212		; <i212*> [#uses=1]
+@i213_s = external global i213		; <i213*> [#uses=1]
+@i214_s = external global i214		; <i214*> [#uses=1]
+@i215_s = external global i215		; <i215*> [#uses=1]
+@i216_s = external global i216		; <i216*> [#uses=1]
+@i217_s = external global i217		; <i217*> [#uses=1]
+@i218_s = external global i218		; <i218*> [#uses=1]
+@i219_s = external global i219		; <i219*> [#uses=1]
+@i220_s = external global i220		; <i220*> [#uses=1]
+@i221_s = external global i221		; <i221*> [#uses=1]
+@i222_s = external global i222		; <i222*> [#uses=1]
+@i223_s = external global i223		; <i223*> [#uses=1]
+@i224_s = external global i224		; <i224*> [#uses=1]
+@i225_s = external global i225		; <i225*> [#uses=1]
+@i226_s = external global i226		; <i226*> [#uses=1]
+@i227_s = external global i227		; <i227*> [#uses=1]
+@i228_s = external global i228		; <i228*> [#uses=1]
+@i229_s = external global i229		; <i229*> [#uses=1]
+@i230_s = external global i230		; <i230*> [#uses=1]
+@i231_s = external global i231		; <i231*> [#uses=1]
+@i232_s = external global i232		; <i232*> [#uses=1]
+@i233_s = external global i233		; <i233*> [#uses=1]
+@i234_s = external global i234		; <i234*> [#uses=1]
+@i235_s = external global i235		; <i235*> [#uses=1]
+@i236_s = external global i236		; <i236*> [#uses=1]
+@i237_s = external global i237		; <i237*> [#uses=1]
+@i238_s = external global i238		; <i238*> [#uses=1]
+@i239_s = external global i239		; <i239*> [#uses=1]
+@i240_s = external global i240		; <i240*> [#uses=1]
+@i241_s = external global i241		; <i241*> [#uses=1]
+@i242_s = external global i242		; <i242*> [#uses=1]
+@i243_s = external global i243		; <i243*> [#uses=1]
+@i244_s = external global i244		; <i244*> [#uses=1]
+@i245_s = external global i245		; <i245*> [#uses=1]
+@i246_s = external global i246		; <i246*> [#uses=1]
+@i247_s = external global i247		; <i247*> [#uses=1]
+@i248_s = external global i248		; <i248*> [#uses=1]
+@i249_s = external global i249		; <i249*> [#uses=1]
+@i250_s = external global i250		; <i250*> [#uses=1]
+@i251_s = external global i251		; <i251*> [#uses=1]
+@i252_s = external global i252		; <i252*> [#uses=1]
+@i253_s = external global i253		; <i253*> [#uses=1]
+@i254_s = external global i254		; <i254*> [#uses=1]
+@i255_s = external global i255		; <i255*> [#uses=1]
+@i256_s = external global i256		; <i256*> [#uses=1]
+
+define void @i1_ls(i1 %x) nounwind  {
+	store i1 %x, i1* @i1_s
+	ret void
+}
+
+define void @i2_ls(i2 %x) nounwind  {
+	store i2 %x, i2* @i2_s
+	ret void
+}
+
+define void @i3_ls(i3 %x) nounwind  {
+	store i3 %x, i3* @i3_s
+	ret void
+}
+
+define void @i4_ls(i4 %x) nounwind  {
+	store i4 %x, i4* @i4_s
+	ret void
+}
+
+define void @i5_ls(i5 %x) nounwind  {
+	store i5 %x, i5* @i5_s
+	ret void
+}
+
+define void @i6_ls(i6 %x) nounwind  {
+	store i6 %x, i6* @i6_s
+	ret void
+}
+
+define void @i7_ls(i7 %x) nounwind  {
+	store i7 %x, i7* @i7_s
+	ret void
+}
+
+define void @i8_ls(i8 %x) nounwind  {
+	store i8 %x, i8* @i8_s
+	ret void
+}
+
+define void @i9_ls(i9 %x) nounwind  {
+	store i9 %x, i9* @i9_s
+	ret void
+}
+
+define void @i10_ls(i10 %x) nounwind  {
+	store i10 %x, i10* @i10_s
+	ret void
+}
+
+define void @i11_ls(i11 %x) nounwind  {
+	store i11 %x, i11* @i11_s
+	ret void
+}
+
+define void @i12_ls(i12 %x) nounwind  {
+	store i12 %x, i12* @i12_s
+	ret void
+}
+
+define void @i13_ls(i13 %x) nounwind  {
+	store i13 %x, i13* @i13_s
+	ret void
+}
+
+define void @i14_ls(i14 %x) nounwind  {
+	store i14 %x, i14* @i14_s
+	ret void
+}
+
+define void @i15_ls(i15 %x) nounwind  {
+	store i15 %x, i15* @i15_s
+	ret void
+}
+
+define void @i16_ls(i16 %x) nounwind  {
+	store i16 %x, i16* @i16_s
+	ret void
+}
+
+define void @i17_ls(i17 %x) nounwind  {
+	store i17 %x, i17* @i17_s
+	ret void
+}
+
+define void @i18_ls(i18 %x) nounwind  {
+	store i18 %x, i18* @i18_s
+	ret void
+}
+
+define void @i19_ls(i19 %x) nounwind  {
+	store i19 %x, i19* @i19_s
+	ret void
+}
+
+define void @i20_ls(i20 %x) nounwind  {
+	store i20 %x, i20* @i20_s
+	ret void
+}
+
+define void @i21_ls(i21 %x) nounwind  {
+	store i21 %x, i21* @i21_s
+	ret void
+}
+
+define void @i22_ls(i22 %x) nounwind  {
+	store i22 %x, i22* @i22_s
+	ret void
+}
+
+define void @i23_ls(i23 %x) nounwind  {
+	store i23 %x, i23* @i23_s
+	ret void
+}
+
+define void @i24_ls(i24 %x) nounwind  {
+	store i24 %x, i24* @i24_s
+	ret void
+}
+
+define void @i25_ls(i25 %x) nounwind  {
+	store i25 %x, i25* @i25_s
+	ret void
+}
+
+define void @i26_ls(i26 %x) nounwind  {
+	store i26 %x, i26* @i26_s
+	ret void
+}
+
+define void @i27_ls(i27 %x) nounwind  {
+	store i27 %x, i27* @i27_s
+	ret void
+}
+
+define void @i28_ls(i28 %x) nounwind  {
+	store i28 %x, i28* @i28_s
+	ret void
+}
+
+define void @i29_ls(i29 %x) nounwind  {
+	store i29 %x, i29* @i29_s
+	ret void
+}
+
+define void @i30_ls(i30 %x) nounwind  {
+	store i30 %x, i30* @i30_s
+	ret void
+}
+
+define void @i31_ls(i31 %x) nounwind  {
+	store i31 %x, i31* @i31_s
+	ret void
+}
+
+define void @i32_ls(i32 %x) nounwind  {
+	store i32 %x, i32* @i32_s
+	ret void
+}
+
+define void @i33_ls(i33 %x) nounwind  {
+	store i33 %x, i33* @i33_s
+	ret void
+}
+
+define void @i34_ls(i34 %x) nounwind  {
+	store i34 %x, i34* @i34_s
+	ret void
+}
+
+define void @i35_ls(i35 %x) nounwind  {
+	store i35 %x, i35* @i35_s
+	ret void
+}
+
+define void @i36_ls(i36 %x) nounwind  {
+	store i36 %x, i36* @i36_s
+	ret void
+}
+
+define void @i37_ls(i37 %x) nounwind  {
+	store i37 %x, i37* @i37_s
+	ret void
+}
+
+define void @i38_ls(i38 %x) nounwind  {
+	store i38 %x, i38* @i38_s
+	ret void
+}
+
+define void @i39_ls(i39 %x) nounwind  {
+	store i39 %x, i39* @i39_s
+	ret void
+}
+
+define void @i40_ls(i40 %x) nounwind  {
+	store i40 %x, i40* @i40_s
+	ret void
+}
+
+define void @i41_ls(i41 %x) nounwind  {
+	store i41 %x, i41* @i41_s
+	ret void
+}
+
+define void @i42_ls(i42 %x) nounwind  {
+	store i42 %x, i42* @i42_s
+	ret void
+}
+
+define void @i43_ls(i43 %x) nounwind  {
+	store i43 %x, i43* @i43_s
+	ret void
+}
+
+define void @i44_ls(i44 %x) nounwind  {
+	store i44 %x, i44* @i44_s
+	ret void
+}
+
+define void @i45_ls(i45 %x) nounwind  {
+	store i45 %x, i45* @i45_s
+	ret void
+}
+
+define void @i46_ls(i46 %x) nounwind  {
+	store i46 %x, i46* @i46_s
+	ret void
+}
+
+define void @i47_ls(i47 %x) nounwind  {
+	store i47 %x, i47* @i47_s
+	ret void
+}
+
+define void @i48_ls(i48 %x) nounwind  {
+	store i48 %x, i48* @i48_s
+	ret void
+}
+
+define void @i49_ls(i49 %x) nounwind  {
+	store i49 %x, i49* @i49_s
+	ret void
+}
+
+define void @i50_ls(i50 %x) nounwind  {
+	store i50 %x, i50* @i50_s
+	ret void
+}
+
+define void @i51_ls(i51 %x) nounwind  {
+	store i51 %x, i51* @i51_s
+	ret void
+}
+
+define void @i52_ls(i52 %x) nounwind  {
+	store i52 %x, i52* @i52_s
+	ret void
+}
+
+define void @i53_ls(i53 %x) nounwind  {
+	store i53 %x, i53* @i53_s
+	ret void
+}
+
+define void @i54_ls(i54 %x) nounwind  {
+	store i54 %x, i54* @i54_s
+	ret void
+}
+
+define void @i55_ls(i55 %x) nounwind  {
+	store i55 %x, i55* @i55_s
+	ret void
+}
+
+define void @i56_ls(i56 %x) nounwind  {
+	store i56 %x, i56* @i56_s
+	ret void
+}
+
+define void @i57_ls(i57 %x) nounwind  {
+	store i57 %x, i57* @i57_s
+	ret void
+}
+
+define void @i58_ls(i58 %x) nounwind  {
+	store i58 %x, i58* @i58_s
+	ret void
+}
+
+define void @i59_ls(i59 %x) nounwind  {
+	store i59 %x, i59* @i59_s
+	ret void
+}
+
+define void @i60_ls(i60 %x) nounwind  {
+	store i60 %x, i60* @i60_s
+	ret void
+}
+
+define void @i61_ls(i61 %x) nounwind  {
+	store i61 %x, i61* @i61_s
+	ret void
+}
+
+define void @i62_ls(i62 %x) nounwind  {
+	store i62 %x, i62* @i62_s
+	ret void
+}
+
+define void @i63_ls(i63 %x) nounwind  {
+	store i63 %x, i63* @i63_s
+	ret void
+}
+
+define void @i64_ls(i64 %x) nounwind  {
+	store i64 %x, i64* @i64_s
+	ret void
+}
+
+define void @i65_ls(i65 %x) nounwind  {
+	store i65 %x, i65* @i65_s
+	ret void
+}
+
+define void @i66_ls(i66 %x) nounwind  {
+	store i66 %x, i66* @i66_s
+	ret void
+}
+
+define void @i67_ls(i67 %x) nounwind  {
+	store i67 %x, i67* @i67_s
+	ret void
+}
+
+define void @i68_ls(i68 %x) nounwind  {
+	store i68 %x, i68* @i68_s
+	ret void
+}
+
+define void @i69_ls(i69 %x) nounwind  {
+	store i69 %x, i69* @i69_s
+	ret void
+}
+
+define void @i70_ls(i70 %x) nounwind  {
+	store i70 %x, i70* @i70_s
+	ret void
+}
+
+define void @i71_ls(i71 %x) nounwind  {
+	store i71 %x, i71* @i71_s
+	ret void
+}
+
+define void @i72_ls(i72 %x) nounwind  {
+	store i72 %x, i72* @i72_s
+	ret void
+}
+
+define void @i73_ls(i73 %x) nounwind  {
+	store i73 %x, i73* @i73_s
+	ret void
+}
+
+define void @i74_ls(i74 %x) nounwind  {
+	store i74 %x, i74* @i74_s
+	ret void
+}
+
+define void @i75_ls(i75 %x) nounwind  {
+	store i75 %x, i75* @i75_s
+	ret void
+}
+
+define void @i76_ls(i76 %x) nounwind  {
+	store i76 %x, i76* @i76_s
+	ret void
+}
+
+define void @i77_ls(i77 %x) nounwind  {
+	store i77 %x, i77* @i77_s
+	ret void
+}
+
+define void @i78_ls(i78 %x) nounwind  {
+	store i78 %x, i78* @i78_s
+	ret void
+}
+
+define void @i79_ls(i79 %x) nounwind  {
+	store i79 %x, i79* @i79_s
+	ret void
+}
+
+define void @i80_ls(i80 %x) nounwind  {
+	store i80 %x, i80* @i80_s
+	ret void
+}
+
+define void @i81_ls(i81 %x) nounwind  {
+	store i81 %x, i81* @i81_s
+	ret void
+}
+
+define void @i82_ls(i82 %x) nounwind  {
+	store i82 %x, i82* @i82_s
+	ret void
+}
+
+define void @i83_ls(i83 %x) nounwind  {
+	store i83 %x, i83* @i83_s
+	ret void
+}
+
+define void @i84_ls(i84 %x) nounwind  {
+	store i84 %x, i84* @i84_s
+	ret void
+}
+
+define void @i85_ls(i85 %x) nounwind  {
+	store i85 %x, i85* @i85_s
+	ret void
+}
+
+define void @i86_ls(i86 %x) nounwind  {
+	store i86 %x, i86* @i86_s
+	ret void
+}
+
+define void @i87_ls(i87 %x) nounwind  {
+	store i87 %x, i87* @i87_s
+	ret void
+}
+
+define void @i88_ls(i88 %x) nounwind  {
+	store i88 %x, i88* @i88_s
+	ret void
+}
+
+define void @i89_ls(i89 %x) nounwind  {
+	store i89 %x, i89* @i89_s
+	ret void
+}
+
+define void @i90_ls(i90 %x) nounwind  {
+	store i90 %x, i90* @i90_s
+	ret void
+}
+
+define void @i91_ls(i91 %x) nounwind  {
+	store i91 %x, i91* @i91_s
+	ret void
+}
+
+define void @i92_ls(i92 %x) nounwind  {
+	store i92 %x, i92* @i92_s
+	ret void
+}
+
+define void @i93_ls(i93 %x) nounwind  {
+	store i93 %x, i93* @i93_s
+	ret void
+}
+
+define void @i94_ls(i94 %x) nounwind  {
+	store i94 %x, i94* @i94_s
+	ret void
+}
+
+define void @i95_ls(i95 %x) nounwind  {
+	store i95 %x, i95* @i95_s
+	ret void
+}
+
+define void @i96_ls(i96 %x) nounwind  {
+	store i96 %x, i96* @i96_s
+	ret void
+}
+
+define void @i97_ls(i97 %x) nounwind  {
+	store i97 %x, i97* @i97_s
+	ret void
+}
+
+define void @i98_ls(i98 %x) nounwind  {
+	store i98 %x, i98* @i98_s
+	ret void
+}
+
+define void @i99_ls(i99 %x) nounwind  {
+	store i99 %x, i99* @i99_s
+	ret void
+}
+
+define void @i100_ls(i100 %x) nounwind  {
+	store i100 %x, i100* @i100_s
+	ret void
+}
+
+define void @i101_ls(i101 %x) nounwind  {
+	store i101 %x, i101* @i101_s
+	ret void
+}
+
+define void @i102_ls(i102 %x) nounwind  {
+	store i102 %x, i102* @i102_s
+	ret void
+}
+
+define void @i103_ls(i103 %x) nounwind  {
+	store i103 %x, i103* @i103_s
+	ret void
+}
+
+define void @i104_ls(i104 %x) nounwind  {
+	store i104 %x, i104* @i104_s
+	ret void
+}
+
+define void @i105_ls(i105 %x) nounwind  {
+	store i105 %x, i105* @i105_s
+	ret void
+}
+
+define void @i106_ls(i106 %x) nounwind  {
+	store i106 %x, i106* @i106_s
+	ret void
+}
+
+define void @i107_ls(i107 %x) nounwind  {
+	store i107 %x, i107* @i107_s
+	ret void
+}
+
+define void @i108_ls(i108 %x) nounwind  {
+	store i108 %x, i108* @i108_s
+	ret void
+}
+
+define void @i109_ls(i109 %x) nounwind  {
+	store i109 %x, i109* @i109_s
+	ret void
+}
+
+define void @i110_ls(i110 %x) nounwind  {
+	store i110 %x, i110* @i110_s
+	ret void
+}
+
+define void @i111_ls(i111 %x) nounwind  {
+	store i111 %x, i111* @i111_s
+	ret void
+}
+
+define void @i112_ls(i112 %x) nounwind  {
+	store i112 %x, i112* @i112_s
+	ret void
+}
+
+define void @i113_ls(i113 %x) nounwind  {
+	store i113 %x, i113* @i113_s
+	ret void
+}
+
+define void @i114_ls(i114 %x) nounwind  {
+	store i114 %x, i114* @i114_s
+	ret void
+}
+
+define void @i115_ls(i115 %x) nounwind  {
+	store i115 %x, i115* @i115_s
+	ret void
+}
+
+define void @i116_ls(i116 %x) nounwind  {
+	store i116 %x, i116* @i116_s
+	ret void
+}
+
+define void @i117_ls(i117 %x) nounwind  {
+	store i117 %x, i117* @i117_s
+	ret void
+}
+
+define void @i118_ls(i118 %x) nounwind  {
+	store i118 %x, i118* @i118_s
+	ret void
+}
+
+define void @i119_ls(i119 %x) nounwind  {
+	store i119 %x, i119* @i119_s
+	ret void
+}
+
+define void @i120_ls(i120 %x) nounwind  {
+	store i120 %x, i120* @i120_s
+	ret void
+}
+
+define void @i121_ls(i121 %x) nounwind  {
+	store i121 %x, i121* @i121_s
+	ret void
+}
+
+define void @i122_ls(i122 %x) nounwind  {
+	store i122 %x, i122* @i122_s
+	ret void
+}
+
+define void @i123_ls(i123 %x) nounwind  {
+	store i123 %x, i123* @i123_s
+	ret void
+}
+
+define void @i124_ls(i124 %x) nounwind  {
+	store i124 %x, i124* @i124_s
+	ret void
+}
+
+define void @i125_ls(i125 %x) nounwind  {
+	store i125 %x, i125* @i125_s
+	ret void
+}
+
+define void @i126_ls(i126 %x) nounwind  {
+	store i126 %x, i126* @i126_s
+	ret void
+}
+
+define void @i127_ls(i127 %x) nounwind  {
+	store i127 %x, i127* @i127_s
+	ret void
+}
+
+define void @i128_ls(i128 %x) nounwind  {
+	store i128 %x, i128* @i128_s
+	ret void
+}
+
+define void @i129_ls(i129 %x) nounwind  {
+	store i129 %x, i129* @i129_s
+	ret void
+}
+
+define void @i130_ls(i130 %x) nounwind  {
+	store i130 %x, i130* @i130_s
+	ret void
+}
+
+define void @i131_ls(i131 %x) nounwind  {
+	store i131 %x, i131* @i131_s
+	ret void
+}
+
+define void @i132_ls(i132 %x) nounwind  {
+	store i132 %x, i132* @i132_s
+	ret void
+}
+
+define void @i133_ls(i133 %x) nounwind  {
+	store i133 %x, i133* @i133_s
+	ret void
+}
+
+define void @i134_ls(i134 %x) nounwind  {
+	store i134 %x, i134* @i134_s
+	ret void
+}
+
+define void @i135_ls(i135 %x) nounwind  {
+	store i135 %x, i135* @i135_s
+	ret void
+}
+
+define void @i136_ls(i136 %x) nounwind  {
+	store i136 %x, i136* @i136_s
+	ret void
+}
+
+define void @i137_ls(i137 %x) nounwind  {
+	store i137 %x, i137* @i137_s
+	ret void
+}
+
+define void @i138_ls(i138 %x) nounwind  {
+	store i138 %x, i138* @i138_s
+	ret void
+}
+
+define void @i139_ls(i139 %x) nounwind  {
+	store i139 %x, i139* @i139_s
+	ret void
+}
+
+define void @i140_ls(i140 %x) nounwind  {
+	store i140 %x, i140* @i140_s
+	ret void
+}
+
+define void @i141_ls(i141 %x) nounwind  {
+	store i141 %x, i141* @i141_s
+	ret void
+}
+
+define void @i142_ls(i142 %x) nounwind  {
+	store i142 %x, i142* @i142_s
+	ret void
+}
+
+define void @i143_ls(i143 %x) nounwind  {
+	store i143 %x, i143* @i143_s
+	ret void
+}
+
+define void @i144_ls(i144 %x) nounwind  {
+	store i144 %x, i144* @i144_s
+	ret void
+}
+
+define void @i145_ls(i145 %x) nounwind  {
+	store i145 %x, i145* @i145_s
+	ret void
+}
+
+define void @i146_ls(i146 %x) nounwind  {
+	store i146 %x, i146* @i146_s
+	ret void
+}
+
+define void @i147_ls(i147 %x) nounwind  {
+	store i147 %x, i147* @i147_s
+	ret void
+}
+
+define void @i148_ls(i148 %x) nounwind  {
+	store i148 %x, i148* @i148_s
+	ret void
+}
+
+define void @i149_ls(i149 %x) nounwind  {
+	store i149 %x, i149* @i149_s
+	ret void
+}
+
+define void @i150_ls(i150 %x) nounwind  {
+	store i150 %x, i150* @i150_s
+	ret void
+}
+
+define void @i151_ls(i151 %x) nounwind  {
+	store i151 %x, i151* @i151_s
+	ret void
+}
+
+define void @i152_ls(i152 %x) nounwind  {
+	store i152 %x, i152* @i152_s
+	ret void
+}
+
+define void @i153_ls(i153 %x) nounwind  {
+	store i153 %x, i153* @i153_s
+	ret void
+}
+
+define void @i154_ls(i154 %x) nounwind  {
+	store i154 %x, i154* @i154_s
+	ret void
+}
+
+define void @i155_ls(i155 %x) nounwind  {
+	store i155 %x, i155* @i155_s
+	ret void
+}
+
+define void @i156_ls(i156 %x) nounwind  {
+	store i156 %x, i156* @i156_s
+	ret void
+}
+
+define void @i157_ls(i157 %x) nounwind  {
+	store i157 %x, i157* @i157_s
+	ret void
+}
+
+define void @i158_ls(i158 %x) nounwind  {
+	store i158 %x, i158* @i158_s
+	ret void
+}
+
+define void @i159_ls(i159 %x) nounwind  {
+	store i159 %x, i159* @i159_s
+	ret void
+}
+
+define void @i160_ls(i160 %x) nounwind  {
+	store i160 %x, i160* @i160_s
+	ret void
+}
+
+define void @i161_ls(i161 %x) nounwind  {
+	store i161 %x, i161* @i161_s
+	ret void
+}
+
+define void @i162_ls(i162 %x) nounwind  {
+	store i162 %x, i162* @i162_s
+	ret void
+}
+
+define void @i163_ls(i163 %x) nounwind  {
+	store i163 %x, i163* @i163_s
+	ret void
+}
+
+define void @i164_ls(i164 %x) nounwind  {
+	store i164 %x, i164* @i164_s
+	ret void
+}
+
+define void @i165_ls(i165 %x) nounwind  {
+	store i165 %x, i165* @i165_s
+	ret void
+}
+
+define void @i166_ls(i166 %x) nounwind  {
+	store i166 %x, i166* @i166_s
+	ret void
+}
+
+define void @i167_ls(i167 %x) nounwind  {
+	store i167 %x, i167* @i167_s
+	ret void
+}
+
+define void @i168_ls(i168 %x) nounwind  {
+	store i168 %x, i168* @i168_s
+	ret void
+}
+
+define void @i169_ls(i169 %x) nounwind  {
+	store i169 %x, i169* @i169_s
+	ret void
+}
+
+define void @i170_ls(i170 %x) nounwind  {
+	store i170 %x, i170* @i170_s
+	ret void
+}
+
+define void @i171_ls(i171 %x) nounwind  {
+	store i171 %x, i171* @i171_s
+	ret void
+}
+
+define void @i172_ls(i172 %x) nounwind  {
+	store i172 %x, i172* @i172_s
+	ret void
+}
+
+define void @i173_ls(i173 %x) nounwind  {
+	store i173 %x, i173* @i173_s
+	ret void
+}
+
+define void @i174_ls(i174 %x) nounwind  {
+	store i174 %x, i174* @i174_s
+	ret void
+}
+
+define void @i175_ls(i175 %x) nounwind  {
+	store i175 %x, i175* @i175_s
+	ret void
+}
+
+define void @i176_ls(i176 %x) nounwind  {
+	store i176 %x, i176* @i176_s
+	ret void
+}
+
+define void @i177_ls(i177 %x) nounwind  {
+	store i177 %x, i177* @i177_s
+	ret void
+}
+
+define void @i178_ls(i178 %x) nounwind  {
+	store i178 %x, i178* @i178_s
+	ret void
+}
+
+define void @i179_ls(i179 %x) nounwind  {
+	store i179 %x, i179* @i179_s
+	ret void
+}
+
+define void @i180_ls(i180 %x) nounwind  {
+	store i180 %x, i180* @i180_s
+	ret void
+}
+
+define void @i181_ls(i181 %x) nounwind  {
+	store i181 %x, i181* @i181_s
+	ret void
+}
+
+define void @i182_ls(i182 %x) nounwind  {
+	store i182 %x, i182* @i182_s
+	ret void
+}
+
+define void @i183_ls(i183 %x) nounwind  {
+	store i183 %x, i183* @i183_s
+	ret void
+}
+
+define void @i184_ls(i184 %x) nounwind  {
+	store i184 %x, i184* @i184_s
+	ret void
+}
+
+define void @i185_ls(i185 %x) nounwind  {
+	store i185 %x, i185* @i185_s
+	ret void
+}
+
+define void @i186_ls(i186 %x) nounwind  {
+	store i186 %x, i186* @i186_s
+	ret void
+}
+
+define void @i187_ls(i187 %x) nounwind  {
+	store i187 %x, i187* @i187_s
+	ret void
+}
+
+define void @i188_ls(i188 %x) nounwind  {
+	store i188 %x, i188* @i188_s
+	ret void
+}
+
+define void @i189_ls(i189 %x) nounwind  {
+	store i189 %x, i189* @i189_s
+	ret void
+}
+
+define void @i190_ls(i190 %x) nounwind  {
+	store i190 %x, i190* @i190_s
+	ret void
+}
+
+define void @i191_ls(i191 %x) nounwind  {
+	store i191 %x, i191* @i191_s
+	ret void
+}
+
+define void @i192_ls(i192 %x) nounwind  {
+	store i192 %x, i192* @i192_s
+	ret void
+}
+
+define void @i193_ls(i193 %x) nounwind  {
+	store i193 %x, i193* @i193_s
+	ret void
+}
+
+define void @i194_ls(i194 %x) nounwind  {
+	store i194 %x, i194* @i194_s
+	ret void
+}
+
+define void @i195_ls(i195 %x) nounwind  {
+	store i195 %x, i195* @i195_s
+	ret void
+}
+
+define void @i196_ls(i196 %x) nounwind  {
+	store i196 %x, i196* @i196_s
+	ret void
+}
+
+define void @i197_ls(i197 %x) nounwind  {
+	store i197 %x, i197* @i197_s
+	ret void
+}
+
+define void @i198_ls(i198 %x) nounwind  {
+	store i198 %x, i198* @i198_s
+	ret void
+}
+
+define void @i199_ls(i199 %x) nounwind  {
+	store i199 %x, i199* @i199_s
+	ret void
+}
+
+define void @i200_ls(i200 %x) nounwind  {
+	store i200 %x, i200* @i200_s
+	ret void
+}
+
+define void @i201_ls(i201 %x) nounwind  {
+	store i201 %x, i201* @i201_s
+	ret void
+}
+
+define void @i202_ls(i202 %x) nounwind  {
+	store i202 %x, i202* @i202_s
+	ret void
+}
+
+define void @i203_ls(i203 %x) nounwind  {
+	store i203 %x, i203* @i203_s
+	ret void
+}
+
+define void @i204_ls(i204 %x) nounwind  {
+	store i204 %x, i204* @i204_s
+	ret void
+}
+
+define void @i205_ls(i205 %x) nounwind  {
+	store i205 %x, i205* @i205_s
+	ret void
+}
+
+define void @i206_ls(i206 %x) nounwind  {
+	store i206 %x, i206* @i206_s
+	ret void
+}
+
+define void @i207_ls(i207 %x) nounwind  {
+	store i207 %x, i207* @i207_s
+	ret void
+}
+
+define void @i208_ls(i208 %x) nounwind  {
+	store i208 %x, i208* @i208_s
+	ret void
+}
+
+define void @i209_ls(i209 %x) nounwind  {
+	store i209 %x, i209* @i209_s
+	ret void
+}
+
+define void @i210_ls(i210 %x) nounwind  {
+	store i210 %x, i210* @i210_s
+	ret void
+}
+
+define void @i211_ls(i211 %x) nounwind  {
+	store i211 %x, i211* @i211_s
+	ret void
+}
+
+define void @i212_ls(i212 %x) nounwind  {
+	store i212 %x, i212* @i212_s
+	ret void
+}
+
+define void @i213_ls(i213 %x) nounwind  {
+	store i213 %x, i213* @i213_s
+	ret void
+}
+
+define void @i214_ls(i214 %x) nounwind  {
+	store i214 %x, i214* @i214_s
+	ret void
+}
+
+define void @i215_ls(i215 %x) nounwind  {
+	store i215 %x, i215* @i215_s
+	ret void
+}
+
+define void @i216_ls(i216 %x) nounwind  {
+	store i216 %x, i216* @i216_s
+	ret void
+}
+
+define void @i217_ls(i217 %x) nounwind  {
+	store i217 %x, i217* @i217_s
+	ret void
+}
+
+define void @i218_ls(i218 %x) nounwind  {
+	store i218 %x, i218* @i218_s
+	ret void
+}
+
+define void @i219_ls(i219 %x) nounwind  {
+	store i219 %x, i219* @i219_s
+	ret void
+}
+
+define void @i220_ls(i220 %x) nounwind  {
+	store i220 %x, i220* @i220_s
+	ret void
+}
+
+define void @i221_ls(i221 %x) nounwind  {
+	store i221 %x, i221* @i221_s
+	ret void
+}
+
+define void @i222_ls(i222 %x) nounwind  {
+	store i222 %x, i222* @i222_s
+	ret void
+}
+
+define void @i223_ls(i223 %x) nounwind  {
+	store i223 %x, i223* @i223_s
+	ret void
+}
+
+define void @i224_ls(i224 %x) nounwind  {
+	store i224 %x, i224* @i224_s
+	ret void
+}
+
+define void @i225_ls(i225 %x) nounwind  {
+	store i225 %x, i225* @i225_s
+	ret void
+}
+
+define void @i226_ls(i226 %x) nounwind  {
+	store i226 %x, i226* @i226_s
+	ret void
+}
+
+define void @i227_ls(i227 %x) nounwind  {
+	store i227 %x, i227* @i227_s
+	ret void
+}
+
+define void @i228_ls(i228 %x) nounwind  {
+	store i228 %x, i228* @i228_s
+	ret void
+}
+
+define void @i229_ls(i229 %x) nounwind  {
+	store i229 %x, i229* @i229_s
+	ret void
+}
+
+define void @i230_ls(i230 %x) nounwind  {
+	store i230 %x, i230* @i230_s
+	ret void
+}
+
+define void @i231_ls(i231 %x) nounwind  {
+	store i231 %x, i231* @i231_s
+	ret void
+}
+
+define void @i232_ls(i232 %x) nounwind  {
+	store i232 %x, i232* @i232_s
+	ret void
+}
+
+define void @i233_ls(i233 %x) nounwind  {
+	store i233 %x, i233* @i233_s
+	ret void
+}
+
+define void @i234_ls(i234 %x) nounwind  {
+	store i234 %x, i234* @i234_s
+	ret void
+}
+
+define void @i235_ls(i235 %x) nounwind  {
+	store i235 %x, i235* @i235_s
+	ret void
+}
+
+define void @i236_ls(i236 %x) nounwind  {
+	store i236 %x, i236* @i236_s
+	ret void
+}
+
+define void @i237_ls(i237 %x) nounwind  {
+	store i237 %x, i237* @i237_s
+	ret void
+}
+
+define void @i238_ls(i238 %x) nounwind  {
+	store i238 %x, i238* @i238_s
+	ret void
+}
+
+define void @i239_ls(i239 %x) nounwind  {
+	store i239 %x, i239* @i239_s
+	ret void
+}
+
+define void @i240_ls(i240 %x) nounwind  {
+	store i240 %x, i240* @i240_s
+	ret void
+}
+
+define void @i241_ls(i241 %x) nounwind  {
+	store i241 %x, i241* @i241_s
+	ret void
+}
+
+define void @i242_ls(i242 %x) nounwind  {
+	store i242 %x, i242* @i242_s
+	ret void
+}
+
+define void @i243_ls(i243 %x) nounwind  {
+	store i243 %x, i243* @i243_s
+	ret void
+}
+
+define void @i244_ls(i244 %x) nounwind  {
+	store i244 %x, i244* @i244_s
+	ret void
+}
+
+define void @i245_ls(i245 %x) nounwind  {
+	store i245 %x, i245* @i245_s
+	ret void
+}
+
+define void @i246_ls(i246 %x) nounwind  {
+	store i246 %x, i246* @i246_s
+	ret void
+}
+
+define void @i247_ls(i247 %x) nounwind  {
+	store i247 %x, i247* @i247_s
+	ret void
+}
+
+define void @i248_ls(i248 %x) nounwind  {
+	store i248 %x, i248* @i248_s
+	ret void
+}
+
+define void @i249_ls(i249 %x) nounwind  {
+	store i249 %x, i249* @i249_s
+	ret void
+}
+
+define void @i250_ls(i250 %x) nounwind  {
+	store i250 %x, i250* @i250_s
+	ret void
+}
+
+define void @i251_ls(i251 %x) nounwind  {
+	store i251 %x, i251* @i251_s
+	ret void
+}
+
+define void @i252_ls(i252 %x) nounwind  {
+	store i252 %x, i252* @i252_s
+	ret void
+}
+
+define void @i253_ls(i253 %x) nounwind  {
+	store i253 %x, i253* @i253_s
+	ret void
+}
+
+define void @i254_ls(i254 %x) nounwind  {
+	store i254 %x, i254* @i254_s
+	ret void
+}
+
+define void @i255_ls(i255 %x) nounwind  {
+	store i255 %x, i255* @i255_s
+	ret void
+}
+
+define void @i256_ls(i256 %x) nounwind  {
+	store i256 %x, i256* @i256_s
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/APIntSextParam.ll b/final/test/CodeGen/Generic/APIntSextParam.ll
new file mode 100644
index 00000000000..acc0eebcada
--- /dev/null
+++ b/final/test/CodeGen/Generic/APIntSextParam.ll
@@ -0,0 +1,1537 @@
+; RUN: llc < %s > %t
+@i1_s = external global i1		; <i1*> [#uses=1]
+@i2_s = external global i2		; <i2*> [#uses=1]
+@i3_s = external global i3		; <i3*> [#uses=1]
+@i4_s = external global i4		; <i4*> [#uses=1]
+@i5_s = external global i5		; <i5*> [#uses=1]
+@i6_s = external global i6		; <i6*> [#uses=1]
+@i7_s = external global i7		; <i7*> [#uses=1]
+@i8_s = external global i8		; <i8*> [#uses=1]
+@i9_s = external global i9		; <i9*> [#uses=1]
+@i10_s = external global i10		; <i10*> [#uses=1]
+@i11_s = external global i11		; <i11*> [#uses=1]
+@i12_s = external global i12		; <i12*> [#uses=1]
+@i13_s = external global i13		; <i13*> [#uses=1]
+@i14_s = external global i14		; <i14*> [#uses=1]
+@i15_s = external global i15		; <i15*> [#uses=1]
+@i16_s = external global i16		; <i16*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+@i18_s = external global i18		; <i18*> [#uses=1]
+@i19_s = external global i19		; <i19*> [#uses=1]
+@i20_s = external global i20		; <i20*> [#uses=1]
+@i21_s = external global i21		; <i21*> [#uses=1]
+@i22_s = external global i22		; <i22*> [#uses=1]
+@i23_s = external global i23		; <i23*> [#uses=1]
+@i24_s = external global i24		; <i24*> [#uses=1]
+@i25_s = external global i25		; <i25*> [#uses=1]
+@i26_s = external global i26		; <i26*> [#uses=1]
+@i27_s = external global i27		; <i27*> [#uses=1]
+@i28_s = external global i28		; <i28*> [#uses=1]
+@i29_s = external global i29		; <i29*> [#uses=1]
+@i30_s = external global i30		; <i30*> [#uses=1]
+@i31_s = external global i31		; <i31*> [#uses=1]
+@i32_s = external global i32		; <i32*> [#uses=1]
+@i33_s = external global i33		; <i33*> [#uses=1]
+@i34_s = external global i34		; <i34*> [#uses=1]
+@i35_s = external global i35		; <i35*> [#uses=1]
+@i36_s = external global i36		; <i36*> [#uses=1]
+@i37_s = external global i37		; <i37*> [#uses=1]
+@i38_s = external global i38		; <i38*> [#uses=1]
+@i39_s = external global i39		; <i39*> [#uses=1]
+@i40_s = external global i40		; <i40*> [#uses=1]
+@i41_s = external global i41		; <i41*> [#uses=1]
+@i42_s = external global i42		; <i42*> [#uses=1]
+@i43_s = external global i43		; <i43*> [#uses=1]
+@i44_s = external global i44		; <i44*> [#uses=1]
+@i45_s = external global i45		; <i45*> [#uses=1]
+@i46_s = external global i46		; <i46*> [#uses=1]
+@i47_s = external global i47		; <i47*> [#uses=1]
+@i48_s = external global i48		; <i48*> [#uses=1]
+@i49_s = external global i49		; <i49*> [#uses=1]
+@i50_s = external global i50		; <i50*> [#uses=1]
+@i51_s = external global i51		; <i51*> [#uses=1]
+@i52_s = external global i52		; <i52*> [#uses=1]
+@i53_s = external global i53		; <i53*> [#uses=1]
+@i54_s = external global i54		; <i54*> [#uses=1]
+@i55_s = external global i55		; <i55*> [#uses=1]
+@i56_s = external global i56		; <i56*> [#uses=1]
+@i57_s = external global i57		; <i57*> [#uses=1]
+@i58_s = external global i58		; <i58*> [#uses=1]
+@i59_s = external global i59		; <i59*> [#uses=1]
+@i60_s = external global i60		; <i60*> [#uses=1]
+@i61_s = external global i61		; <i61*> [#uses=1]
+@i62_s = external global i62		; <i62*> [#uses=1]
+@i63_s = external global i63		; <i63*> [#uses=1]
+@i64_s = external global i64		; <i64*> [#uses=1]
+@i65_s = external global i65		; <i65*> [#uses=1]
+@i66_s = external global i66		; <i66*> [#uses=1]
+@i67_s = external global i67		; <i67*> [#uses=1]
+@i68_s = external global i68		; <i68*> [#uses=1]
+@i69_s = external global i69		; <i69*> [#uses=1]
+@i70_s = external global i70		; <i70*> [#uses=1]
+@i71_s = external global i71		; <i71*> [#uses=1]
+@i72_s = external global i72		; <i72*> [#uses=1]
+@i73_s = external global i73		; <i73*> [#uses=1]
+@i74_s = external global i74		; <i74*> [#uses=1]
+@i75_s = external global i75		; <i75*> [#uses=1]
+@i76_s = external global i76		; <i76*> [#uses=1]
+@i77_s = external global i77		; <i77*> [#uses=1]
+@i78_s = external global i78		; <i78*> [#uses=1]
+@i79_s = external global i79		; <i79*> [#uses=1]
+@i80_s = external global i80		; <i80*> [#uses=1]
+@i81_s = external global i81		; <i81*> [#uses=1]
+@i82_s = external global i82		; <i82*> [#uses=1]
+@i83_s = external global i83		; <i83*> [#uses=1]
+@i84_s = external global i84		; <i84*> [#uses=1]
+@i85_s = external global i85		; <i85*> [#uses=1]
+@i86_s = external global i86		; <i86*> [#uses=1]
+@i87_s = external global i87		; <i87*> [#uses=1]
+@i88_s = external global i88		; <i88*> [#uses=1]
+@i89_s = external global i89		; <i89*> [#uses=1]
+@i90_s = external global i90		; <i90*> [#uses=1]
+@i91_s = external global i91		; <i91*> [#uses=1]
+@i92_s = external global i92		; <i92*> [#uses=1]
+@i93_s = external global i93		; <i93*> [#uses=1]
+@i94_s = external global i94		; <i94*> [#uses=1]
+@i95_s = external global i95		; <i95*> [#uses=1]
+@i96_s = external global i96		; <i96*> [#uses=1]
+@i97_s = external global i97		; <i97*> [#uses=1]
+@i98_s = external global i98		; <i98*> [#uses=1]
+@i99_s = external global i99		; <i99*> [#uses=1]
+@i100_s = external global i100		; <i100*> [#uses=1]
+@i101_s = external global i101		; <i101*> [#uses=1]
+@i102_s = external global i102		; <i102*> [#uses=1]
+@i103_s = external global i103		; <i103*> [#uses=1]
+@i104_s = external global i104		; <i104*> [#uses=1]
+@i105_s = external global i105		; <i105*> [#uses=1]
+@i106_s = external global i106		; <i106*> [#uses=1]
+@i107_s = external global i107		; <i107*> [#uses=1]
+@i108_s = external global i108		; <i108*> [#uses=1]
+@i109_s = external global i109		; <i109*> [#uses=1]
+@i110_s = external global i110		; <i110*> [#uses=1]
+@i111_s = external global i111		; <i111*> [#uses=1]
+@i112_s = external global i112		; <i112*> [#uses=1]
+@i113_s = external global i113		; <i113*> [#uses=1]
+@i114_s = external global i114		; <i114*> [#uses=1]
+@i115_s = external global i115		; <i115*> [#uses=1]
+@i116_s = external global i116		; <i116*> [#uses=1]
+@i117_s = external global i117		; <i117*> [#uses=1]
+@i118_s = external global i118		; <i118*> [#uses=1]
+@i119_s = external global i119		; <i119*> [#uses=1]
+@i120_s = external global i120		; <i120*> [#uses=1]
+@i121_s = external global i121		; <i121*> [#uses=1]
+@i122_s = external global i122		; <i122*> [#uses=1]
+@i123_s = external global i123		; <i123*> [#uses=1]
+@i124_s = external global i124		; <i124*> [#uses=1]
+@i125_s = external global i125		; <i125*> [#uses=1]
+@i126_s = external global i126		; <i126*> [#uses=1]
+@i127_s = external global i127		; <i127*> [#uses=1]
+@i128_s = external global i128		; <i128*> [#uses=1]
+@i129_s = external global i129		; <i129*> [#uses=1]
+@i130_s = external global i130		; <i130*> [#uses=1]
+@i131_s = external global i131		; <i131*> [#uses=1]
+@i132_s = external global i132		; <i132*> [#uses=1]
+@i133_s = external global i133		; <i133*> [#uses=1]
+@i134_s = external global i134		; <i134*> [#uses=1]
+@i135_s = external global i135		; <i135*> [#uses=1]
+@i136_s = external global i136		; <i136*> [#uses=1]
+@i137_s = external global i137		; <i137*> [#uses=1]
+@i138_s = external global i138		; <i138*> [#uses=1]
+@i139_s = external global i139		; <i139*> [#uses=1]
+@i140_s = external global i140		; <i140*> [#uses=1]
+@i141_s = external global i141		; <i141*> [#uses=1]
+@i142_s = external global i142		; <i142*> [#uses=1]
+@i143_s = external global i143		; <i143*> [#uses=1]
+@i144_s = external global i144		; <i144*> [#uses=1]
+@i145_s = external global i145		; <i145*> [#uses=1]
+@i146_s = external global i146		; <i146*> [#uses=1]
+@i147_s = external global i147		; <i147*> [#uses=1]
+@i148_s = external global i148		; <i148*> [#uses=1]
+@i149_s = external global i149		; <i149*> [#uses=1]
+@i150_s = external global i150		; <i150*> [#uses=1]
+@i151_s = external global i151		; <i151*> [#uses=1]
+@i152_s = external global i152		; <i152*> [#uses=1]
+@i153_s = external global i153		; <i153*> [#uses=1]
+@i154_s = external global i154		; <i154*> [#uses=1]
+@i155_s = external global i155		; <i155*> [#uses=1]
+@i156_s = external global i156		; <i156*> [#uses=1]
+@i157_s = external global i157		; <i157*> [#uses=1]
+@i158_s = external global i158		; <i158*> [#uses=1]
+@i159_s = external global i159		; <i159*> [#uses=1]
+@i160_s = external global i160		; <i160*> [#uses=1]
+@i161_s = external global i161		; <i161*> [#uses=1]
+@i162_s = external global i162		; <i162*> [#uses=1]
+@i163_s = external global i163		; <i163*> [#uses=1]
+@i164_s = external global i164		; <i164*> [#uses=1]
+@i165_s = external global i165		; <i165*> [#uses=1]
+@i166_s = external global i166		; <i166*> [#uses=1]
+@i167_s = external global i167		; <i167*> [#uses=1]
+@i168_s = external global i168		; <i168*> [#uses=1]
+@i169_s = external global i169		; <i169*> [#uses=1]
+@i170_s = external global i170		; <i170*> [#uses=1]
+@i171_s = external global i171		; <i171*> [#uses=1]
+@i172_s = external global i172		; <i172*> [#uses=1]
+@i173_s = external global i173		; <i173*> [#uses=1]
+@i174_s = external global i174		; <i174*> [#uses=1]
+@i175_s = external global i175		; <i175*> [#uses=1]
+@i176_s = external global i176		; <i176*> [#uses=1]
+@i177_s = external global i177		; <i177*> [#uses=1]
+@i178_s = external global i178		; <i178*> [#uses=1]
+@i179_s = external global i179		; <i179*> [#uses=1]
+@i180_s = external global i180		; <i180*> [#uses=1]
+@i181_s = external global i181		; <i181*> [#uses=1]
+@i182_s = external global i182		; <i182*> [#uses=1]
+@i183_s = external global i183		; <i183*> [#uses=1]
+@i184_s = external global i184		; <i184*> [#uses=1]
+@i185_s = external global i185		; <i185*> [#uses=1]
+@i186_s = external global i186		; <i186*> [#uses=1]
+@i187_s = external global i187		; <i187*> [#uses=1]
+@i188_s = external global i188		; <i188*> [#uses=1]
+@i189_s = external global i189		; <i189*> [#uses=1]
+@i190_s = external global i190		; <i190*> [#uses=1]
+@i191_s = external global i191		; <i191*> [#uses=1]
+@i192_s = external global i192		; <i192*> [#uses=1]
+@i193_s = external global i193		; <i193*> [#uses=1]
+@i194_s = external global i194		; <i194*> [#uses=1]
+@i195_s = external global i195		; <i195*> [#uses=1]
+@i196_s = external global i196		; <i196*> [#uses=1]
+@i197_s = external global i197		; <i197*> [#uses=1]
+@i198_s = external global i198		; <i198*> [#uses=1]
+@i199_s = external global i199		; <i199*> [#uses=1]
+@i200_s = external global i200		; <i200*> [#uses=1]
+@i201_s = external global i201		; <i201*> [#uses=1]
+@i202_s = external global i202		; <i202*> [#uses=1]
+@i203_s = external global i203		; <i203*> [#uses=1]
+@i204_s = external global i204		; <i204*> [#uses=1]
+@i205_s = external global i205		; <i205*> [#uses=1]
+@i206_s = external global i206		; <i206*> [#uses=1]
+@i207_s = external global i207		; <i207*> [#uses=1]
+@i208_s = external global i208		; <i208*> [#uses=1]
+@i209_s = external global i209		; <i209*> [#uses=1]
+@i210_s = external global i210		; <i210*> [#uses=1]
+@i211_s = external global i211		; <i211*> [#uses=1]
+@i212_s = external global i212		; <i212*> [#uses=1]
+@i213_s = external global i213		; <i213*> [#uses=1]
+@i214_s = external global i214		; <i214*> [#uses=1]
+@i215_s = external global i215		; <i215*> [#uses=1]
+@i216_s = external global i216		; <i216*> [#uses=1]
+@i217_s = external global i217		; <i217*> [#uses=1]
+@i218_s = external global i218		; <i218*> [#uses=1]
+@i219_s = external global i219		; <i219*> [#uses=1]
+@i220_s = external global i220		; <i220*> [#uses=1]
+@i221_s = external global i221		; <i221*> [#uses=1]
+@i222_s = external global i222		; <i222*> [#uses=1]
+@i223_s = external global i223		; <i223*> [#uses=1]
+@i224_s = external global i224		; <i224*> [#uses=1]
+@i225_s = external global i225		; <i225*> [#uses=1]
+@i226_s = external global i226		; <i226*> [#uses=1]
+@i227_s = external global i227		; <i227*> [#uses=1]
+@i228_s = external global i228		; <i228*> [#uses=1]
+@i229_s = external global i229		; <i229*> [#uses=1]
+@i230_s = external global i230		; <i230*> [#uses=1]
+@i231_s = external global i231		; <i231*> [#uses=1]
+@i232_s = external global i232		; <i232*> [#uses=1]
+@i233_s = external global i233		; <i233*> [#uses=1]
+@i234_s = external global i234		; <i234*> [#uses=1]
+@i235_s = external global i235		; <i235*> [#uses=1]
+@i236_s = external global i236		; <i236*> [#uses=1]
+@i237_s = external global i237		; <i237*> [#uses=1]
+@i238_s = external global i238		; <i238*> [#uses=1]
+@i239_s = external global i239		; <i239*> [#uses=1]
+@i240_s = external global i240		; <i240*> [#uses=1]
+@i241_s = external global i241		; <i241*> [#uses=1]
+@i242_s = external global i242		; <i242*> [#uses=1]
+@i243_s = external global i243		; <i243*> [#uses=1]
+@i244_s = external global i244		; <i244*> [#uses=1]
+@i245_s = external global i245		; <i245*> [#uses=1]
+@i246_s = external global i246		; <i246*> [#uses=1]
+@i247_s = external global i247		; <i247*> [#uses=1]
+@i248_s = external global i248		; <i248*> [#uses=1]
+@i249_s = external global i249		; <i249*> [#uses=1]
+@i250_s = external global i250		; <i250*> [#uses=1]
+@i251_s = external global i251		; <i251*> [#uses=1]
+@i252_s = external global i252		; <i252*> [#uses=1]
+@i253_s = external global i253		; <i253*> [#uses=1]
+@i254_s = external global i254		; <i254*> [#uses=1]
+@i255_s = external global i255		; <i255*> [#uses=1]
+@i256_s = external global i256		; <i256*> [#uses=1]
+
+define void @i1_ls(i1 signext %x) nounwind  {
+	store i1 %x, i1* @i1_s
+	ret void
+}
+
+define void @i2_ls(i2 signext %x) nounwind  {
+	store i2 %x, i2* @i2_s
+	ret void
+}
+
+define void @i3_ls(i3 signext %x) nounwind  {
+	store i3 %x, i3* @i3_s
+	ret void
+}
+
+define void @i4_ls(i4 signext %x) nounwind  {
+	store i4 %x, i4* @i4_s
+	ret void
+}
+
+define void @i5_ls(i5 signext %x) nounwind  {
+	store i5 %x, i5* @i5_s
+	ret void
+}
+
+define void @i6_ls(i6 signext %x) nounwind  {
+	store i6 %x, i6* @i6_s
+	ret void
+}
+
+define void @i7_ls(i7 signext %x) nounwind  {
+	store i7 %x, i7* @i7_s
+	ret void
+}
+
+define void @i8_ls(i8 signext %x) nounwind  {
+	store i8 %x, i8* @i8_s
+	ret void
+}
+
+define void @i9_ls(i9 signext %x) nounwind  {
+	store i9 %x, i9* @i9_s
+	ret void
+}
+
+define void @i10_ls(i10 signext %x) nounwind  {
+	store i10 %x, i10* @i10_s
+	ret void
+}
+
+define void @i11_ls(i11 signext %x) nounwind  {
+	store i11 %x, i11* @i11_s
+	ret void
+}
+
+define void @i12_ls(i12 signext %x) nounwind  {
+	store i12 %x, i12* @i12_s
+	ret void
+}
+
+define void @i13_ls(i13 signext %x) nounwind  {
+	store i13 %x, i13* @i13_s
+	ret void
+}
+
+define void @i14_ls(i14 signext %x) nounwind  {
+	store i14 %x, i14* @i14_s
+	ret void
+}
+
+define void @i15_ls(i15 signext %x) nounwind  {
+	store i15 %x, i15* @i15_s
+	ret void
+}
+
+define void @i16_ls(i16 signext %x) nounwind  {
+	store i16 %x, i16* @i16_s
+	ret void
+}
+
+define void @i17_ls(i17 signext %x) nounwind  {
+	store i17 %x, i17* @i17_s
+	ret void
+}
+
+define void @i18_ls(i18 signext %x) nounwind  {
+	store i18 %x, i18* @i18_s
+	ret void
+}
+
+define void @i19_ls(i19 signext %x) nounwind  {
+	store i19 %x, i19* @i19_s
+	ret void
+}
+
+define void @i20_ls(i20 signext %x) nounwind  {
+	store i20 %x, i20* @i20_s
+	ret void
+}
+
+define void @i21_ls(i21 signext %x) nounwind  {
+	store i21 %x, i21* @i21_s
+	ret void
+}
+
+define void @i22_ls(i22 signext %x) nounwind  {
+	store i22 %x, i22* @i22_s
+	ret void
+}
+
+define void @i23_ls(i23 signext %x) nounwind  {
+	store i23 %x, i23* @i23_s
+	ret void
+}
+
+define void @i24_ls(i24 signext %x) nounwind  {
+	store i24 %x, i24* @i24_s
+	ret void
+}
+
+define void @i25_ls(i25 signext %x) nounwind  {
+	store i25 %x, i25* @i25_s
+	ret void
+}
+
+define void @i26_ls(i26 signext %x) nounwind  {
+	store i26 %x, i26* @i26_s
+	ret void
+}
+
+define void @i27_ls(i27 signext %x) nounwind  {
+	store i27 %x, i27* @i27_s
+	ret void
+}
+
+define void @i28_ls(i28 signext %x) nounwind  {
+	store i28 %x, i28* @i28_s
+	ret void
+}
+
+define void @i29_ls(i29 signext %x) nounwind  {
+	store i29 %x, i29* @i29_s
+	ret void
+}
+
+define void @i30_ls(i30 signext %x) nounwind  {
+	store i30 %x, i30* @i30_s
+	ret void
+}
+
+define void @i31_ls(i31 signext %x) nounwind  {
+	store i31 %x, i31* @i31_s
+	ret void
+}
+
+define void @i32_ls(i32 signext %x) nounwind  {
+	store i32 %x, i32* @i32_s
+	ret void
+}
+
+define void @i33_ls(i33 signext %x) nounwind  {
+	store i33 %x, i33* @i33_s
+	ret void
+}
+
+define void @i34_ls(i34 signext %x) nounwind  {
+	store i34 %x, i34* @i34_s
+	ret void
+}
+
+define void @i35_ls(i35 signext %x) nounwind  {
+	store i35 %x, i35* @i35_s
+	ret void
+}
+
+define void @i36_ls(i36 signext %x) nounwind  {
+	store i36 %x, i36* @i36_s
+	ret void
+}
+
+define void @i37_ls(i37 signext %x) nounwind  {
+	store i37 %x, i37* @i37_s
+	ret void
+}
+
+define void @i38_ls(i38 signext %x) nounwind  {
+	store i38 %x, i38* @i38_s
+	ret void
+}
+
+define void @i39_ls(i39 signext %x) nounwind  {
+	store i39 %x, i39* @i39_s
+	ret void
+}
+
+define void @i40_ls(i40 signext %x) nounwind  {
+	store i40 %x, i40* @i40_s
+	ret void
+}
+
+define void @i41_ls(i41 signext %x) nounwind  {
+	store i41 %x, i41* @i41_s
+	ret void
+}
+
+define void @i42_ls(i42 signext %x) nounwind  {
+	store i42 %x, i42* @i42_s
+	ret void
+}
+
+define void @i43_ls(i43 signext %x) nounwind  {
+	store i43 %x, i43* @i43_s
+	ret void
+}
+
+define void @i44_ls(i44 signext %x) nounwind  {
+	store i44 %x, i44* @i44_s
+	ret void
+}
+
+define void @i45_ls(i45 signext %x) nounwind  {
+	store i45 %x, i45* @i45_s
+	ret void
+}
+
+define void @i46_ls(i46 signext %x) nounwind  {
+	store i46 %x, i46* @i46_s
+	ret void
+}
+
+define void @i47_ls(i47 signext %x) nounwind  {
+	store i47 %x, i47* @i47_s
+	ret void
+}
+
+define void @i48_ls(i48 signext %x) nounwind  {
+	store i48 %x, i48* @i48_s
+	ret void
+}
+
+define void @i49_ls(i49 signext %x) nounwind  {
+	store i49 %x, i49* @i49_s
+	ret void
+}
+
+define void @i50_ls(i50 signext %x) nounwind  {
+	store i50 %x, i50* @i50_s
+	ret void
+}
+
+define void @i51_ls(i51 signext %x) nounwind  {
+	store i51 %x, i51* @i51_s
+	ret void
+}
+
+define void @i52_ls(i52 signext %x) nounwind  {
+	store i52 %x, i52* @i52_s
+	ret void
+}
+
+define void @i53_ls(i53 signext %x) nounwind  {
+	store i53 %x, i53* @i53_s
+	ret void
+}
+
+define void @i54_ls(i54 signext %x) nounwind  {
+	store i54 %x, i54* @i54_s
+	ret void
+}
+
+define void @i55_ls(i55 signext %x) nounwind  {
+	store i55 %x, i55* @i55_s
+	ret void
+}
+
+define void @i56_ls(i56 signext %x) nounwind  {
+	store i56 %x, i56* @i56_s
+	ret void
+}
+
+define void @i57_ls(i57 signext %x) nounwind  {
+	store i57 %x, i57* @i57_s
+	ret void
+}
+
+define void @i58_ls(i58 signext %x) nounwind  {
+	store i58 %x, i58* @i58_s
+	ret void
+}
+
+define void @i59_ls(i59 signext %x) nounwind  {
+	store i59 %x, i59* @i59_s
+	ret void
+}
+
+define void @i60_ls(i60 signext %x) nounwind  {
+	store i60 %x, i60* @i60_s
+	ret void
+}
+
+define void @i61_ls(i61 signext %x) nounwind  {
+	store i61 %x, i61* @i61_s
+	ret void
+}
+
+define void @i62_ls(i62 signext %x) nounwind  {
+	store i62 %x, i62* @i62_s
+	ret void
+}
+
+define void @i63_ls(i63 signext %x) nounwind  {
+	store i63 %x, i63* @i63_s
+	ret void
+}
+
+define void @i64_ls(i64 signext %x) nounwind  {
+	store i64 %x, i64* @i64_s
+	ret void
+}
+
+define void @i65_ls(i65 signext %x) nounwind  {
+	store i65 %x, i65* @i65_s
+	ret void
+}
+
+define void @i66_ls(i66 signext %x) nounwind  {
+	store i66 %x, i66* @i66_s
+	ret void
+}
+
+define void @i67_ls(i67 signext %x) nounwind  {
+	store i67 %x, i67* @i67_s
+	ret void
+}
+
+define void @i68_ls(i68 signext %x) nounwind  {
+	store i68 %x, i68* @i68_s
+	ret void
+}
+
+define void @i69_ls(i69 signext %x) nounwind  {
+	store i69 %x, i69* @i69_s
+	ret void
+}
+
+define void @i70_ls(i70 signext %x) nounwind  {
+	store i70 %x, i70* @i70_s
+	ret void
+}
+
+define void @i71_ls(i71 signext %x) nounwind  {
+	store i71 %x, i71* @i71_s
+	ret void
+}
+
+define void @i72_ls(i72 signext %x) nounwind  {
+	store i72 %x, i72* @i72_s
+	ret void
+}
+
+define void @i73_ls(i73 signext %x) nounwind  {
+	store i73 %x, i73* @i73_s
+	ret void
+}
+
+define void @i74_ls(i74 signext %x) nounwind  {
+	store i74 %x, i74* @i74_s
+	ret void
+}
+
+define void @i75_ls(i75 signext %x) nounwind  {
+	store i75 %x, i75* @i75_s
+	ret void
+}
+
+define void @i76_ls(i76 signext %x) nounwind  {
+	store i76 %x, i76* @i76_s
+	ret void
+}
+
+define void @i77_ls(i77 signext %x) nounwind  {
+	store i77 %x, i77* @i77_s
+	ret void
+}
+
+define void @i78_ls(i78 signext %x) nounwind  {
+	store i78 %x, i78* @i78_s
+	ret void
+}
+
+define void @i79_ls(i79 signext %x) nounwind  {
+	store i79 %x, i79* @i79_s
+	ret void
+}
+
+define void @i80_ls(i80 signext %x) nounwind  {
+	store i80 %x, i80* @i80_s
+	ret void
+}
+
+define void @i81_ls(i81 signext %x) nounwind  {
+	store i81 %x, i81* @i81_s
+	ret void
+}
+
+define void @i82_ls(i82 signext %x) nounwind  {
+	store i82 %x, i82* @i82_s
+	ret void
+}
+
+define void @i83_ls(i83 signext %x) nounwind  {
+	store i83 %x, i83* @i83_s
+	ret void
+}
+
+define void @i84_ls(i84 signext %x) nounwind  {
+	store i84 %x, i84* @i84_s
+	ret void
+}
+
+define void @i85_ls(i85 signext %x) nounwind  {
+	store i85 %x, i85* @i85_s
+	ret void
+}
+
+define void @i86_ls(i86 signext %x) nounwind  {
+	store i86 %x, i86* @i86_s
+	ret void
+}
+
+define void @i87_ls(i87 signext %x) nounwind  {
+	store i87 %x, i87* @i87_s
+	ret void
+}
+
+define void @i88_ls(i88 signext %x) nounwind  {
+	store i88 %x, i88* @i88_s
+	ret void
+}
+
+define void @i89_ls(i89 signext %x) nounwind  {
+	store i89 %x, i89* @i89_s
+	ret void
+}
+
+define void @i90_ls(i90 signext %x) nounwind  {
+	store i90 %x, i90* @i90_s
+	ret void
+}
+
+define void @i91_ls(i91 signext %x) nounwind  {
+	store i91 %x, i91* @i91_s
+	ret void
+}
+
+define void @i92_ls(i92 signext %x) nounwind  {
+	store i92 %x, i92* @i92_s
+	ret void
+}
+
+define void @i93_ls(i93 signext %x) nounwind  {
+	store i93 %x, i93* @i93_s
+	ret void
+}
+
+define void @i94_ls(i94 signext %x) nounwind  {
+	store i94 %x, i94* @i94_s
+	ret void
+}
+
+define void @i95_ls(i95 signext %x) nounwind  {
+	store i95 %x, i95* @i95_s
+	ret void
+}
+
+define void @i96_ls(i96 signext %x) nounwind  {
+	store i96 %x, i96* @i96_s
+	ret void
+}
+
+define void @i97_ls(i97 signext %x) nounwind  {
+	store i97 %x, i97* @i97_s
+	ret void
+}
+
+define void @i98_ls(i98 signext %x) nounwind  {
+	store i98 %x, i98* @i98_s
+	ret void
+}
+
+define void @i99_ls(i99 signext %x) nounwind  {
+	store i99 %x, i99* @i99_s
+	ret void
+}
+
+define void @i100_ls(i100 signext %x) nounwind  {
+	store i100 %x, i100* @i100_s
+	ret void
+}
+
+define void @i101_ls(i101 signext %x) nounwind  {
+	store i101 %x, i101* @i101_s
+	ret void
+}
+
+define void @i102_ls(i102 signext %x) nounwind  {
+	store i102 %x, i102* @i102_s
+	ret void
+}
+
+define void @i103_ls(i103 signext %x) nounwind  {
+	store i103 %x, i103* @i103_s
+	ret void
+}
+
+define void @i104_ls(i104 signext %x) nounwind  {
+	store i104 %x, i104* @i104_s
+	ret void
+}
+
+define void @i105_ls(i105 signext %x) nounwind  {
+	store i105 %x, i105* @i105_s
+	ret void
+}
+
+define void @i106_ls(i106 signext %x) nounwind  {
+	store i106 %x, i106* @i106_s
+	ret void
+}
+
+define void @i107_ls(i107 signext %x) nounwind  {
+	store i107 %x, i107* @i107_s
+	ret void
+}
+
+define void @i108_ls(i108 signext %x) nounwind  {
+	store i108 %x, i108* @i108_s
+	ret void
+}
+
+define void @i109_ls(i109 signext %x) nounwind  {
+	store i109 %x, i109* @i109_s
+	ret void
+}
+
+define void @i110_ls(i110 signext %x) nounwind  {
+	store i110 %x, i110* @i110_s
+	ret void
+}
+
+define void @i111_ls(i111 signext %x) nounwind  {
+	store i111 %x, i111* @i111_s
+	ret void
+}
+
+define void @i112_ls(i112 signext %x) nounwind  {
+	store i112 %x, i112* @i112_s
+	ret void
+}
+
+define void @i113_ls(i113 signext %x) nounwind  {
+	store i113 %x, i113* @i113_s
+	ret void
+}
+
+define void @i114_ls(i114 signext %x) nounwind  {
+	store i114 %x, i114* @i114_s
+	ret void
+}
+
+define void @i115_ls(i115 signext %x) nounwind  {
+	store i115 %x, i115* @i115_s
+	ret void
+}
+
+define void @i116_ls(i116 signext %x) nounwind  {
+	store i116 %x, i116* @i116_s
+	ret void
+}
+
+define void @i117_ls(i117 signext %x) nounwind  {
+	store i117 %x, i117* @i117_s
+	ret void
+}
+
+define void @i118_ls(i118 signext %x) nounwind  {
+	store i118 %x, i118* @i118_s
+	ret void
+}
+
+define void @i119_ls(i119 signext %x) nounwind  {
+	store i119 %x, i119* @i119_s
+	ret void
+}
+
+define void @i120_ls(i120 signext %x) nounwind  {
+	store i120 %x, i120* @i120_s
+	ret void
+}
+
+define void @i121_ls(i121 signext %x) nounwind  {
+	store i121 %x, i121* @i121_s
+	ret void
+}
+
+define void @i122_ls(i122 signext %x) nounwind  {
+	store i122 %x, i122* @i122_s
+	ret void
+}
+
+define void @i123_ls(i123 signext %x) nounwind  {
+	store i123 %x, i123* @i123_s
+	ret void
+}
+
+define void @i124_ls(i124 signext %x) nounwind  {
+	store i124 %x, i124* @i124_s
+	ret void
+}
+
+define void @i125_ls(i125 signext %x) nounwind  {
+	store i125 %x, i125* @i125_s
+	ret void
+}
+
+define void @i126_ls(i126 signext %x) nounwind  {
+	store i126 %x, i126* @i126_s
+	ret void
+}
+
+define void @i127_ls(i127 signext %x) nounwind  {
+	store i127 %x, i127* @i127_s
+	ret void
+}
+
+define void @i128_ls(i128 signext %x) nounwind  {
+	store i128 %x, i128* @i128_s
+	ret void
+}
+
+define void @i129_ls(i129 signext %x) nounwind  {
+	store i129 %x, i129* @i129_s
+	ret void
+}
+
+define void @i130_ls(i130 signext %x) nounwind  {
+	store i130 %x, i130* @i130_s
+	ret void
+}
+
+define void @i131_ls(i131 signext %x) nounwind  {
+	store i131 %x, i131* @i131_s
+	ret void
+}
+
+define void @i132_ls(i132 signext %x) nounwind  {
+	store i132 %x, i132* @i132_s
+	ret void
+}
+
+define void @i133_ls(i133 signext %x) nounwind  {
+	store i133 %x, i133* @i133_s
+	ret void
+}
+
+define void @i134_ls(i134 signext %x) nounwind  {
+	store i134 %x, i134* @i134_s
+	ret void
+}
+
+define void @i135_ls(i135 signext %x) nounwind  {
+	store i135 %x, i135* @i135_s
+	ret void
+}
+
+define void @i136_ls(i136 signext %x) nounwind  {
+	store i136 %x, i136* @i136_s
+	ret void
+}
+
+define void @i137_ls(i137 signext %x) nounwind  {
+	store i137 %x, i137* @i137_s
+	ret void
+}
+
+define void @i138_ls(i138 signext %x) nounwind  {
+	store i138 %x, i138* @i138_s
+	ret void
+}
+
+define void @i139_ls(i139 signext %x) nounwind  {
+	store i139 %x, i139* @i139_s
+	ret void
+}
+
+define void @i140_ls(i140 signext %x) nounwind  {
+	store i140 %x, i140* @i140_s
+	ret void
+}
+
+define void @i141_ls(i141 signext %x) nounwind  {
+	store i141 %x, i141* @i141_s
+	ret void
+}
+
+define void @i142_ls(i142 signext %x) nounwind  {
+	store i142 %x, i142* @i142_s
+	ret void
+}
+
+define void @i143_ls(i143 signext %x) nounwind  {
+	store i143 %x, i143* @i143_s
+	ret void
+}
+
+define void @i144_ls(i144 signext %x) nounwind  {
+	store i144 %x, i144* @i144_s
+	ret void
+}
+
+define void @i145_ls(i145 signext %x) nounwind  {
+	store i145 %x, i145* @i145_s
+	ret void
+}
+
+define void @i146_ls(i146 signext %x) nounwind  {
+	store i146 %x, i146* @i146_s
+	ret void
+}
+
+define void @i147_ls(i147 signext %x) nounwind  {
+	store i147 %x, i147* @i147_s
+	ret void
+}
+
+define void @i148_ls(i148 signext %x) nounwind  {
+	store i148 %x, i148* @i148_s
+	ret void
+}
+
+define void @i149_ls(i149 signext %x) nounwind  {
+	store i149 %x, i149* @i149_s
+	ret void
+}
+
+define void @i150_ls(i150 signext %x) nounwind  {
+	store i150 %x, i150* @i150_s
+	ret void
+}
+
+define void @i151_ls(i151 signext %x) nounwind  {
+	store i151 %x, i151* @i151_s
+	ret void
+}
+
+define void @i152_ls(i152 signext %x) nounwind  {
+	store i152 %x, i152* @i152_s
+	ret void
+}
+
+define void @i153_ls(i153 signext %x) nounwind  {
+	store i153 %x, i153* @i153_s
+	ret void
+}
+
+define void @i154_ls(i154 signext %x) nounwind  {
+	store i154 %x, i154* @i154_s
+	ret void
+}
+
+define void @i155_ls(i155 signext %x) nounwind  {
+	store i155 %x, i155* @i155_s
+	ret void
+}
+
+define void @i156_ls(i156 signext %x) nounwind  {
+	store i156 %x, i156* @i156_s
+	ret void
+}
+
+define void @i157_ls(i157 signext %x) nounwind  {
+	store i157 %x, i157* @i157_s
+	ret void
+}
+
+define void @i158_ls(i158 signext %x) nounwind  {
+	store i158 %x, i158* @i158_s
+	ret void
+}
+
+define void @i159_ls(i159 signext %x) nounwind  {
+	store i159 %x, i159* @i159_s
+	ret void
+}
+
+define void @i160_ls(i160 signext %x) nounwind  {
+	store i160 %x, i160* @i160_s
+	ret void
+}
+
+define void @i161_ls(i161 signext %x) nounwind  {
+	store i161 %x, i161* @i161_s
+	ret void
+}
+
+define void @i162_ls(i162 signext %x) nounwind  {
+	store i162 %x, i162* @i162_s
+	ret void
+}
+
+define void @i163_ls(i163 signext %x) nounwind  {
+	store i163 %x, i163* @i163_s
+	ret void
+}
+
+define void @i164_ls(i164 signext %x) nounwind  {
+	store i164 %x, i164* @i164_s
+	ret void
+}
+
+define void @i165_ls(i165 signext %x) nounwind  {
+	store i165 %x, i165* @i165_s
+	ret void
+}
+
+define void @i166_ls(i166 signext %x) nounwind  {
+	store i166 %x, i166* @i166_s
+	ret void
+}
+
+define void @i167_ls(i167 signext %x) nounwind  {
+	store i167 %x, i167* @i167_s
+	ret void
+}
+
+define void @i168_ls(i168 signext %x) nounwind  {
+	store i168 %x, i168* @i168_s
+	ret void
+}
+
+define void @i169_ls(i169 signext %x) nounwind  {
+	store i169 %x, i169* @i169_s
+	ret void
+}
+
+define void @i170_ls(i170 signext %x) nounwind  {
+	store i170 %x, i170* @i170_s
+	ret void
+}
+
+define void @i171_ls(i171 signext %x) nounwind  {
+	store i171 %x, i171* @i171_s
+	ret void
+}
+
+define void @i172_ls(i172 signext %x) nounwind  {
+	store i172 %x, i172* @i172_s
+	ret void
+}
+
+define void @i173_ls(i173 signext %x) nounwind  {
+	store i173 %x, i173* @i173_s
+	ret void
+}
+
+define void @i174_ls(i174 signext %x) nounwind  {
+	store i174 %x, i174* @i174_s
+	ret void
+}
+
+define void @i175_ls(i175 signext %x) nounwind  {
+	store i175 %x, i175* @i175_s
+	ret void
+}
+
+define void @i176_ls(i176 signext %x) nounwind  {
+	store i176 %x, i176* @i176_s
+	ret void
+}
+
+define void @i177_ls(i177 signext %x) nounwind  {
+	store i177 %x, i177* @i177_s
+	ret void
+}
+
+define void @i178_ls(i178 signext %x) nounwind  {
+	store i178 %x, i178* @i178_s
+	ret void
+}
+
+define void @i179_ls(i179 signext %x) nounwind  {
+	store i179 %x, i179* @i179_s
+	ret void
+}
+
+define void @i180_ls(i180 signext %x) nounwind  {
+	store i180 %x, i180* @i180_s
+	ret void
+}
+
+define void @i181_ls(i181 signext %x) nounwind  {
+	store i181 %x, i181* @i181_s
+	ret void
+}
+
+define void @i182_ls(i182 signext %x) nounwind  {
+	store i182 %x, i182* @i182_s
+	ret void
+}
+
+define void @i183_ls(i183 signext %x) nounwind  {
+	store i183 %x, i183* @i183_s
+	ret void
+}
+
+define void @i184_ls(i184 signext %x) nounwind  {
+	store i184 %x, i184* @i184_s
+	ret void
+}
+
+define void @i185_ls(i185 signext %x) nounwind  {
+	store i185 %x, i185* @i185_s
+	ret void
+}
+
+define void @i186_ls(i186 signext %x) nounwind  {
+	store i186 %x, i186* @i186_s
+	ret void
+}
+
+define void @i187_ls(i187 signext %x) nounwind  {
+	store i187 %x, i187* @i187_s
+	ret void
+}
+
+define void @i188_ls(i188 signext %x) nounwind  {
+	store i188 %x, i188* @i188_s
+	ret void
+}
+
+define void @i189_ls(i189 signext %x) nounwind  {
+	store i189 %x, i189* @i189_s
+	ret void
+}
+
+define void @i190_ls(i190 signext %x) nounwind  {
+	store i190 %x, i190* @i190_s
+	ret void
+}
+
+define void @i191_ls(i191 signext %x) nounwind  {
+	store i191 %x, i191* @i191_s
+	ret void
+}
+
+define void @i192_ls(i192 signext %x) nounwind  {
+	store i192 %x, i192* @i192_s
+	ret void
+}
+
+define void @i193_ls(i193 signext %x) nounwind  {
+	store i193 %x, i193* @i193_s
+	ret void
+}
+
+define void @i194_ls(i194 signext %x) nounwind  {
+	store i194 %x, i194* @i194_s
+	ret void
+}
+
+define void @i195_ls(i195 signext %x) nounwind  {
+	store i195 %x, i195* @i195_s
+	ret void
+}
+
+define void @i196_ls(i196 signext %x) nounwind  {
+	store i196 %x, i196* @i196_s
+	ret void
+}
+
+define void @i197_ls(i197 signext %x) nounwind  {
+	store i197 %x, i197* @i197_s
+	ret void
+}
+
+define void @i198_ls(i198 signext %x) nounwind  {
+	store i198 %x, i198* @i198_s
+	ret void
+}
+
+define void @i199_ls(i199 signext %x) nounwind  {
+	store i199 %x, i199* @i199_s
+	ret void
+}
+
+define void @i200_ls(i200 signext %x) nounwind  {
+	store i200 %x, i200* @i200_s
+	ret void
+}
+
+define void @i201_ls(i201 signext %x) nounwind  {
+	store i201 %x, i201* @i201_s
+	ret void
+}
+
+define void @i202_ls(i202 signext %x) nounwind  {
+	store i202 %x, i202* @i202_s
+	ret void
+}
+
+define void @i203_ls(i203 signext %x) nounwind  {
+	store i203 %x, i203* @i203_s
+	ret void
+}
+
+define void @i204_ls(i204 signext %x) nounwind  {
+	store i204 %x, i204* @i204_s
+	ret void
+}
+
+define void @i205_ls(i205 signext %x) nounwind  {
+	store i205 %x, i205* @i205_s
+	ret void
+}
+
+define void @i206_ls(i206 signext %x) nounwind  {
+	store i206 %x, i206* @i206_s
+	ret void
+}
+
+define void @i207_ls(i207 signext %x) nounwind  {
+	store i207 %x, i207* @i207_s
+	ret void
+}
+
+define void @i208_ls(i208 signext %x) nounwind  {
+	store i208 %x, i208* @i208_s
+	ret void
+}
+
+define void @i209_ls(i209 signext %x) nounwind  {
+	store i209 %x, i209* @i209_s
+	ret void
+}
+
+define void @i210_ls(i210 signext %x) nounwind  {
+	store i210 %x, i210* @i210_s
+	ret void
+}
+
+define void @i211_ls(i211 signext %x) nounwind  {
+	store i211 %x, i211* @i211_s
+	ret void
+}
+
+define void @i212_ls(i212 signext %x) nounwind  {
+	store i212 %x, i212* @i212_s
+	ret void
+}
+
+define void @i213_ls(i213 signext %x) nounwind  {
+	store i213 %x, i213* @i213_s
+	ret void
+}
+
+define void @i214_ls(i214 signext %x) nounwind  {
+	store i214 %x, i214* @i214_s
+	ret void
+}
+
+define void @i215_ls(i215 signext %x) nounwind  {
+	store i215 %x, i215* @i215_s
+	ret void
+}
+
+define void @i216_ls(i216 signext %x) nounwind  {
+	store i216 %x, i216* @i216_s
+	ret void
+}
+
+define void @i217_ls(i217 signext %x) nounwind  {
+	store i217 %x, i217* @i217_s
+	ret void
+}
+
+define void @i218_ls(i218 signext %x) nounwind  {
+	store i218 %x, i218* @i218_s
+	ret void
+}
+
+define void @i219_ls(i219 signext %x) nounwind  {
+	store i219 %x, i219* @i219_s
+	ret void
+}
+
+define void @i220_ls(i220 signext %x) nounwind  {
+	store i220 %x, i220* @i220_s
+	ret void
+}
+
+define void @i221_ls(i221 signext %x) nounwind  {
+	store i221 %x, i221* @i221_s
+	ret void
+}
+
+define void @i222_ls(i222 signext %x) nounwind  {
+	store i222 %x, i222* @i222_s
+	ret void
+}
+
+define void @i223_ls(i223 signext %x) nounwind  {
+	store i223 %x, i223* @i223_s
+	ret void
+}
+
+define void @i224_ls(i224 signext %x) nounwind  {
+	store i224 %x, i224* @i224_s
+	ret void
+}
+
+define void @i225_ls(i225 signext %x) nounwind  {
+	store i225 %x, i225* @i225_s
+	ret void
+}
+
+define void @i226_ls(i226 signext %x) nounwind  {
+	store i226 %x, i226* @i226_s
+	ret void
+}
+
+define void @i227_ls(i227 signext %x) nounwind  {
+	store i227 %x, i227* @i227_s
+	ret void
+}
+
+define void @i228_ls(i228 signext %x) nounwind  {
+	store i228 %x, i228* @i228_s
+	ret void
+}
+
+define void @i229_ls(i229 signext %x) nounwind  {
+	store i229 %x, i229* @i229_s
+	ret void
+}
+
+define void @i230_ls(i230 signext %x) nounwind  {
+	store i230 %x, i230* @i230_s
+	ret void
+}
+
+define void @i231_ls(i231 signext %x) nounwind  {
+	store i231 %x, i231* @i231_s
+	ret void
+}
+
+define void @i232_ls(i232 signext %x) nounwind  {
+	store i232 %x, i232* @i232_s
+	ret void
+}
+
+define void @i233_ls(i233 signext %x) nounwind  {
+	store i233 %x, i233* @i233_s
+	ret void
+}
+
+define void @i234_ls(i234 signext %x) nounwind  {
+	store i234 %x, i234* @i234_s
+	ret void
+}
+
+define void @i235_ls(i235 signext %x) nounwind  {
+	store i235 %x, i235* @i235_s
+	ret void
+}
+
+define void @i236_ls(i236 signext %x) nounwind  {
+	store i236 %x, i236* @i236_s
+	ret void
+}
+
+define void @i237_ls(i237 signext %x) nounwind  {
+	store i237 %x, i237* @i237_s
+	ret void
+}
+
+define void @i238_ls(i238 signext %x) nounwind  {
+	store i238 %x, i238* @i238_s
+	ret void
+}
+
+define void @i239_ls(i239 signext %x) nounwind  {
+	store i239 %x, i239* @i239_s
+	ret void
+}
+
+define void @i240_ls(i240 signext %x) nounwind  {
+	store i240 %x, i240* @i240_s
+	ret void
+}
+
+define void @i241_ls(i241 signext %x) nounwind  {
+	store i241 %x, i241* @i241_s
+	ret void
+}
+
+define void @i242_ls(i242 signext %x) nounwind  {
+	store i242 %x, i242* @i242_s
+	ret void
+}
+
+define void @i243_ls(i243 signext %x) nounwind  {
+	store i243 %x, i243* @i243_s
+	ret void
+}
+
+define void @i244_ls(i244 signext %x) nounwind  {
+	store i244 %x, i244* @i244_s
+	ret void
+}
+
+define void @i245_ls(i245 signext %x) nounwind  {
+	store i245 %x, i245* @i245_s
+	ret void
+}
+
+define void @i246_ls(i246 signext %x) nounwind  {
+	store i246 %x, i246* @i246_s
+	ret void
+}
+
+define void @i247_ls(i247 signext %x) nounwind  {
+	store i247 %x, i247* @i247_s
+	ret void
+}
+
+define void @i248_ls(i248 signext %x) nounwind  {
+	store i248 %x, i248* @i248_s
+	ret void
+}
+
+define void @i249_ls(i249 signext %x) nounwind  {
+	store i249 %x, i249* @i249_s
+	ret void
+}
+
+define void @i250_ls(i250 signext %x) nounwind  {
+	store i250 %x, i250* @i250_s
+	ret void
+}
+
+define void @i251_ls(i251 signext %x) nounwind  {
+	store i251 %x, i251* @i251_s
+	ret void
+}
+
+define void @i252_ls(i252 signext %x) nounwind  {
+	store i252 %x, i252* @i252_s
+	ret void
+}
+
+define void @i253_ls(i253 signext %x) nounwind  {
+	store i253 %x, i253* @i253_s
+	ret void
+}
+
+define void @i254_ls(i254 signext %x) nounwind  {
+	store i254 %x, i254* @i254_s
+	ret void
+}
+
+define void @i255_ls(i255 signext %x) nounwind  {
+	store i255 %x, i255* @i255_s
+	ret void
+}
+
+define void @i256_ls(i256 signext %x) nounwind  {
+	store i256 %x, i256* @i256_s
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/APIntZextParam.ll b/final/test/CodeGen/Generic/APIntZextParam.ll
new file mode 100644
index 00000000000..173b9fd74ca
--- /dev/null
+++ b/final/test/CodeGen/Generic/APIntZextParam.ll
@@ -0,0 +1,1537 @@
+; RUN: llc < %s > %t
+@i1_s = external global i1		; <i1*> [#uses=1]
+@i2_s = external global i2		; <i2*> [#uses=1]
+@i3_s = external global i3		; <i3*> [#uses=1]
+@i4_s = external global i4		; <i4*> [#uses=1]
+@i5_s = external global i5		; <i5*> [#uses=1]
+@i6_s = external global i6		; <i6*> [#uses=1]
+@i7_s = external global i7		; <i7*> [#uses=1]
+@i8_s = external global i8		; <i8*> [#uses=1]
+@i9_s = external global i9		; <i9*> [#uses=1]
+@i10_s = external global i10		; <i10*> [#uses=1]
+@i11_s = external global i11		; <i11*> [#uses=1]
+@i12_s = external global i12		; <i12*> [#uses=1]
+@i13_s = external global i13		; <i13*> [#uses=1]
+@i14_s = external global i14		; <i14*> [#uses=1]
+@i15_s = external global i15		; <i15*> [#uses=1]
+@i16_s = external global i16		; <i16*> [#uses=1]
+@i17_s = external global i17		; <i17*> [#uses=1]
+@i18_s = external global i18		; <i18*> [#uses=1]
+@i19_s = external global i19		; <i19*> [#uses=1]
+@i20_s = external global i20		; <i20*> [#uses=1]
+@i21_s = external global i21		; <i21*> [#uses=1]
+@i22_s = external global i22		; <i22*> [#uses=1]
+@i23_s = external global i23		; <i23*> [#uses=1]
+@i24_s = external global i24		; <i24*> [#uses=1]
+@i25_s = external global i25		; <i25*> [#uses=1]
+@i26_s = external global i26		; <i26*> [#uses=1]
+@i27_s = external global i27		; <i27*> [#uses=1]
+@i28_s = external global i28		; <i28*> [#uses=1]
+@i29_s = external global i29		; <i29*> [#uses=1]
+@i30_s = external global i30		; <i30*> [#uses=1]
+@i31_s = external global i31		; <i31*> [#uses=1]
+@i32_s = external global i32		; <i32*> [#uses=1]
+@i33_s = external global i33		; <i33*> [#uses=1]
+@i34_s = external global i34		; <i34*> [#uses=1]
+@i35_s = external global i35		; <i35*> [#uses=1]
+@i36_s = external global i36		; <i36*> [#uses=1]
+@i37_s = external global i37		; <i37*> [#uses=1]
+@i38_s = external global i38		; <i38*> [#uses=1]
+@i39_s = external global i39		; <i39*> [#uses=1]
+@i40_s = external global i40		; <i40*> [#uses=1]
+@i41_s = external global i41		; <i41*> [#uses=1]
+@i42_s = external global i42		; <i42*> [#uses=1]
+@i43_s = external global i43		; <i43*> [#uses=1]
+@i44_s = external global i44		; <i44*> [#uses=1]
+@i45_s = external global i45		; <i45*> [#uses=1]
+@i46_s = external global i46		; <i46*> [#uses=1]
+@i47_s = external global i47		; <i47*> [#uses=1]
+@i48_s = external global i48		; <i48*> [#uses=1]
+@i49_s = external global i49		; <i49*> [#uses=1]
+@i50_s = external global i50		; <i50*> [#uses=1]
+@i51_s = external global i51		; <i51*> [#uses=1]
+@i52_s = external global i52		; <i52*> [#uses=1]
+@i53_s = external global i53		; <i53*> [#uses=1]
+@i54_s = external global i54		; <i54*> [#uses=1]
+@i55_s = external global i55		; <i55*> [#uses=1]
+@i56_s = external global i56		; <i56*> [#uses=1]
+@i57_s = external global i57		; <i57*> [#uses=1]
+@i58_s = external global i58		; <i58*> [#uses=1]
+@i59_s = external global i59		; <i59*> [#uses=1]
+@i60_s = external global i60		; <i60*> [#uses=1]
+@i61_s = external global i61		; <i61*> [#uses=1]
+@i62_s = external global i62		; <i62*> [#uses=1]
+@i63_s = external global i63		; <i63*> [#uses=1]
+@i64_s = external global i64		; <i64*> [#uses=1]
+@i65_s = external global i65		; <i65*> [#uses=1]
+@i66_s = external global i66		; <i66*> [#uses=1]
+@i67_s = external global i67		; <i67*> [#uses=1]
+@i68_s = external global i68		; <i68*> [#uses=1]
+@i69_s = external global i69		; <i69*> [#uses=1]
+@i70_s = external global i70		; <i70*> [#uses=1]
+@i71_s = external global i71		; <i71*> [#uses=1]
+@i72_s = external global i72		; <i72*> [#uses=1]
+@i73_s = external global i73		; <i73*> [#uses=1]
+@i74_s = external global i74		; <i74*> [#uses=1]
+@i75_s = external global i75		; <i75*> [#uses=1]
+@i76_s = external global i76		; <i76*> [#uses=1]
+@i77_s = external global i77		; <i77*> [#uses=1]
+@i78_s = external global i78		; <i78*> [#uses=1]
+@i79_s = external global i79		; <i79*> [#uses=1]
+@i80_s = external global i80		; <i80*> [#uses=1]
+@i81_s = external global i81		; <i81*> [#uses=1]
+@i82_s = external global i82		; <i82*> [#uses=1]
+@i83_s = external global i83		; <i83*> [#uses=1]
+@i84_s = external global i84		; <i84*> [#uses=1]
+@i85_s = external global i85		; <i85*> [#uses=1]
+@i86_s = external global i86		; <i86*> [#uses=1]
+@i87_s = external global i87		; <i87*> [#uses=1]
+@i88_s = external global i88		; <i88*> [#uses=1]
+@i89_s = external global i89		; <i89*> [#uses=1]
+@i90_s = external global i90		; <i90*> [#uses=1]
+@i91_s = external global i91		; <i91*> [#uses=1]
+@i92_s = external global i92		; <i92*> [#uses=1]
+@i93_s = external global i93		; <i93*> [#uses=1]
+@i94_s = external global i94		; <i94*> [#uses=1]
+@i95_s = external global i95		; <i95*> [#uses=1]
+@i96_s = external global i96		; <i96*> [#uses=1]
+@i97_s = external global i97		; <i97*> [#uses=1]
+@i98_s = external global i98		; <i98*> [#uses=1]
+@i99_s = external global i99		; <i99*> [#uses=1]
+@i100_s = external global i100		; <i100*> [#uses=1]
+@i101_s = external global i101		; <i101*> [#uses=1]
+@i102_s = external global i102		; <i102*> [#uses=1]
+@i103_s = external global i103		; <i103*> [#uses=1]
+@i104_s = external global i104		; <i104*> [#uses=1]
+@i105_s = external global i105		; <i105*> [#uses=1]
+@i106_s = external global i106		; <i106*> [#uses=1]
+@i107_s = external global i107		; <i107*> [#uses=1]
+@i108_s = external global i108		; <i108*> [#uses=1]
+@i109_s = external global i109		; <i109*> [#uses=1]
+@i110_s = external global i110		; <i110*> [#uses=1]
+@i111_s = external global i111		; <i111*> [#uses=1]
+@i112_s = external global i112		; <i112*> [#uses=1]
+@i113_s = external global i113		; <i113*> [#uses=1]
+@i114_s = external global i114		; <i114*> [#uses=1]
+@i115_s = external global i115		; <i115*> [#uses=1]
+@i116_s = external global i116		; <i116*> [#uses=1]
+@i117_s = external global i117		; <i117*> [#uses=1]
+@i118_s = external global i118		; <i118*> [#uses=1]
+@i119_s = external global i119		; <i119*> [#uses=1]
+@i120_s = external global i120		; <i120*> [#uses=1]
+@i121_s = external global i121		; <i121*> [#uses=1]
+@i122_s = external global i122		; <i122*> [#uses=1]
+@i123_s = external global i123		; <i123*> [#uses=1]
+@i124_s = external global i124		; <i124*> [#uses=1]
+@i125_s = external global i125		; <i125*> [#uses=1]
+@i126_s = external global i126		; <i126*> [#uses=1]
+@i127_s = external global i127		; <i127*> [#uses=1]
+@i128_s = external global i128		; <i128*> [#uses=1]
+@i129_s = external global i129		; <i129*> [#uses=1]
+@i130_s = external global i130		; <i130*> [#uses=1]
+@i131_s = external global i131		; <i131*> [#uses=1]
+@i132_s = external global i132		; <i132*> [#uses=1]
+@i133_s = external global i133		; <i133*> [#uses=1]
+@i134_s = external global i134		; <i134*> [#uses=1]
+@i135_s = external global i135		; <i135*> [#uses=1]
+@i136_s = external global i136		; <i136*> [#uses=1]
+@i137_s = external global i137		; <i137*> [#uses=1]
+@i138_s = external global i138		; <i138*> [#uses=1]
+@i139_s = external global i139		; <i139*> [#uses=1]
+@i140_s = external global i140		; <i140*> [#uses=1]
+@i141_s = external global i141		; <i141*> [#uses=1]
+@i142_s = external global i142		; <i142*> [#uses=1]
+@i143_s = external global i143		; <i143*> [#uses=1]
+@i144_s = external global i144		; <i144*> [#uses=1]
+@i145_s = external global i145		; <i145*> [#uses=1]
+@i146_s = external global i146		; <i146*> [#uses=1]
+@i147_s = external global i147		; <i147*> [#uses=1]
+@i148_s = external global i148		; <i148*> [#uses=1]
+@i149_s = external global i149		; <i149*> [#uses=1]
+@i150_s = external global i150		; <i150*> [#uses=1]
+@i151_s = external global i151		; <i151*> [#uses=1]
+@i152_s = external global i152		; <i152*> [#uses=1]
+@i153_s = external global i153		; <i153*> [#uses=1]
+@i154_s = external global i154		; <i154*> [#uses=1]
+@i155_s = external global i155		; <i155*> [#uses=1]
+@i156_s = external global i156		; <i156*> [#uses=1]
+@i157_s = external global i157		; <i157*> [#uses=1]
+@i158_s = external global i158		; <i158*> [#uses=1]
+@i159_s = external global i159		; <i159*> [#uses=1]
+@i160_s = external global i160		; <i160*> [#uses=1]
+@i161_s = external global i161		; <i161*> [#uses=1]
+@i162_s = external global i162		; <i162*> [#uses=1]
+@i163_s = external global i163		; <i163*> [#uses=1]
+@i164_s = external global i164		; <i164*> [#uses=1]
+@i165_s = external global i165		; <i165*> [#uses=1]
+@i166_s = external global i166		; <i166*> [#uses=1]
+@i167_s = external global i167		; <i167*> [#uses=1]
+@i168_s = external global i168		; <i168*> [#uses=1]
+@i169_s = external global i169		; <i169*> [#uses=1]
+@i170_s = external global i170		; <i170*> [#uses=1]
+@i171_s = external global i171		; <i171*> [#uses=1]
+@i172_s = external global i172		; <i172*> [#uses=1]
+@i173_s = external global i173		; <i173*> [#uses=1]
+@i174_s = external global i174		; <i174*> [#uses=1]
+@i175_s = external global i175		; <i175*> [#uses=1]
+@i176_s = external global i176		; <i176*> [#uses=1]
+@i177_s = external global i177		; <i177*> [#uses=1]
+@i178_s = external global i178		; <i178*> [#uses=1]
+@i179_s = external global i179		; <i179*> [#uses=1]
+@i180_s = external global i180		; <i180*> [#uses=1]
+@i181_s = external global i181		; <i181*> [#uses=1]
+@i182_s = external global i182		; <i182*> [#uses=1]
+@i183_s = external global i183		; <i183*> [#uses=1]
+@i184_s = external global i184		; <i184*> [#uses=1]
+@i185_s = external global i185		; <i185*> [#uses=1]
+@i186_s = external global i186		; <i186*> [#uses=1]
+@i187_s = external global i187		; <i187*> [#uses=1]
+@i188_s = external global i188		; <i188*> [#uses=1]
+@i189_s = external global i189		; <i189*> [#uses=1]
+@i190_s = external global i190		; <i190*> [#uses=1]
+@i191_s = external global i191		; <i191*> [#uses=1]
+@i192_s = external global i192		; <i192*> [#uses=1]
+@i193_s = external global i193		; <i193*> [#uses=1]
+@i194_s = external global i194		; <i194*> [#uses=1]
+@i195_s = external global i195		; <i195*> [#uses=1]
+@i196_s = external global i196		; <i196*> [#uses=1]
+@i197_s = external global i197		; <i197*> [#uses=1]
+@i198_s = external global i198		; <i198*> [#uses=1]
+@i199_s = external global i199		; <i199*> [#uses=1]
+@i200_s = external global i200		; <i200*> [#uses=1]
+@i201_s = external global i201		; <i201*> [#uses=1]
+@i202_s = external global i202		; <i202*> [#uses=1]
+@i203_s = external global i203		; <i203*> [#uses=1]
+@i204_s = external global i204		; <i204*> [#uses=1]
+@i205_s = external global i205		; <i205*> [#uses=1]
+@i206_s = external global i206		; <i206*> [#uses=1]
+@i207_s = external global i207		; <i207*> [#uses=1]
+@i208_s = external global i208		; <i208*> [#uses=1]
+@i209_s = external global i209		; <i209*> [#uses=1]
+@i210_s = external global i210		; <i210*> [#uses=1]
+@i211_s = external global i211		; <i211*> [#uses=1]
+@i212_s = external global i212		; <i212*> [#uses=1]
+@i213_s = external global i213		; <i213*> [#uses=1]
+@i214_s = external global i214		; <i214*> [#uses=1]
+@i215_s = external global i215		; <i215*> [#uses=1]
+@i216_s = external global i216		; <i216*> [#uses=1]
+@i217_s = external global i217		; <i217*> [#uses=1]
+@i218_s = external global i218		; <i218*> [#uses=1]
+@i219_s = external global i219		; <i219*> [#uses=1]
+@i220_s = external global i220		; <i220*> [#uses=1]
+@i221_s = external global i221		; <i221*> [#uses=1]
+@i222_s = external global i222		; <i222*> [#uses=1]
+@i223_s = external global i223		; <i223*> [#uses=1]
+@i224_s = external global i224		; <i224*> [#uses=1]
+@i225_s = external global i225		; <i225*> [#uses=1]
+@i226_s = external global i226		; <i226*> [#uses=1]
+@i227_s = external global i227		; <i227*> [#uses=1]
+@i228_s = external global i228		; <i228*> [#uses=1]
+@i229_s = external global i229		; <i229*> [#uses=1]
+@i230_s = external global i230		; <i230*> [#uses=1]
+@i231_s = external global i231		; <i231*> [#uses=1]
+@i232_s = external global i232		; <i232*> [#uses=1]
+@i233_s = external global i233		; <i233*> [#uses=1]
+@i234_s = external global i234		; <i234*> [#uses=1]
+@i235_s = external global i235		; <i235*> [#uses=1]
+@i236_s = external global i236		; <i236*> [#uses=1]
+@i237_s = external global i237		; <i237*> [#uses=1]
+@i238_s = external global i238		; <i238*> [#uses=1]
+@i239_s = external global i239		; <i239*> [#uses=1]
+@i240_s = external global i240		; <i240*> [#uses=1]
+@i241_s = external global i241		; <i241*> [#uses=1]
+@i242_s = external global i242		; <i242*> [#uses=1]
+@i243_s = external global i243		; <i243*> [#uses=1]
+@i244_s = external global i244		; <i244*> [#uses=1]
+@i245_s = external global i245		; <i245*> [#uses=1]
+@i246_s = external global i246		; <i246*> [#uses=1]
+@i247_s = external global i247		; <i247*> [#uses=1]
+@i248_s = external global i248		; <i248*> [#uses=1]
+@i249_s = external global i249		; <i249*> [#uses=1]
+@i250_s = external global i250		; <i250*> [#uses=1]
+@i251_s = external global i251		; <i251*> [#uses=1]
+@i252_s = external global i252		; <i252*> [#uses=1]
+@i253_s = external global i253		; <i253*> [#uses=1]
+@i254_s = external global i254		; <i254*> [#uses=1]
+@i255_s = external global i255		; <i255*> [#uses=1]
+@i256_s = external global i256		; <i256*> [#uses=1]
+
+define void @i1_ls(i1 zeroext %x) nounwind  {
+	store i1 %x, i1* @i1_s
+	ret void
+}
+
+define void @i2_ls(i2 zeroext %x) nounwind  {
+	store i2 %x, i2* @i2_s
+	ret void
+}
+
+define void @i3_ls(i3 zeroext %x) nounwind  {
+	store i3 %x, i3* @i3_s
+	ret void
+}
+
+define void @i4_ls(i4 zeroext %x) nounwind  {
+	store i4 %x, i4* @i4_s
+	ret void
+}
+
+define void @i5_ls(i5 zeroext %x) nounwind  {
+	store i5 %x, i5* @i5_s
+	ret void
+}
+
+define void @i6_ls(i6 zeroext %x) nounwind  {
+	store i6 %x, i6* @i6_s
+	ret void
+}
+
+define void @i7_ls(i7 zeroext %x) nounwind  {
+	store i7 %x, i7* @i7_s
+	ret void
+}
+
+define void @i8_ls(i8 zeroext %x) nounwind  {
+	store i8 %x, i8* @i8_s
+	ret void
+}
+
+define void @i9_ls(i9 zeroext %x) nounwind  {
+	store i9 %x, i9* @i9_s
+	ret void
+}
+
+define void @i10_ls(i10 zeroext %x) nounwind  {
+	store i10 %x, i10* @i10_s
+	ret void
+}
+
+define void @i11_ls(i11 zeroext %x) nounwind  {
+	store i11 %x, i11* @i11_s
+	ret void
+}
+
+define void @i12_ls(i12 zeroext %x) nounwind  {
+	store i12 %x, i12* @i12_s
+	ret void
+}
+
+define void @i13_ls(i13 zeroext %x) nounwind  {
+	store i13 %x, i13* @i13_s
+	ret void
+}
+
+define void @i14_ls(i14 zeroext %x) nounwind  {
+	store i14 %x, i14* @i14_s
+	ret void
+}
+
+define void @i15_ls(i15 zeroext %x) nounwind  {
+	store i15 %x, i15* @i15_s
+	ret void
+}
+
+define void @i16_ls(i16 zeroext %x) nounwind  {
+	store i16 %x, i16* @i16_s
+	ret void
+}
+
+define void @i17_ls(i17 zeroext %x) nounwind  {
+	store i17 %x, i17* @i17_s
+	ret void
+}
+
+define void @i18_ls(i18 zeroext %x) nounwind  {
+	store i18 %x, i18* @i18_s
+	ret void
+}
+
+define void @i19_ls(i19 zeroext %x) nounwind  {
+	store i19 %x, i19* @i19_s
+	ret void
+}
+
+define void @i20_ls(i20 zeroext %x) nounwind  {
+	store i20 %x, i20* @i20_s
+	ret void
+}
+
+define void @i21_ls(i21 zeroext %x) nounwind  {
+	store i21 %x, i21* @i21_s
+	ret void
+}
+
+define void @i22_ls(i22 zeroext %x) nounwind  {
+	store i22 %x, i22* @i22_s
+	ret void
+}
+
+define void @i23_ls(i23 zeroext %x) nounwind  {
+	store i23 %x, i23* @i23_s
+	ret void
+}
+
+define void @i24_ls(i24 zeroext %x) nounwind  {
+	store i24 %x, i24* @i24_s
+	ret void
+}
+
+define void @i25_ls(i25 zeroext %x) nounwind  {
+	store i25 %x, i25* @i25_s
+	ret void
+}
+
+define void @i26_ls(i26 zeroext %x) nounwind  {
+	store i26 %x, i26* @i26_s
+	ret void
+}
+
+define void @i27_ls(i27 zeroext %x) nounwind  {
+	store i27 %x, i27* @i27_s
+	ret void
+}
+
+define void @i28_ls(i28 zeroext %x) nounwind  {
+	store i28 %x, i28* @i28_s
+	ret void
+}
+
+define void @i29_ls(i29 zeroext %x) nounwind  {
+	store i29 %x, i29* @i29_s
+	ret void
+}
+
+define void @i30_ls(i30 zeroext %x) nounwind  {
+	store i30 %x, i30* @i30_s
+	ret void
+}
+
+define void @i31_ls(i31 zeroext %x) nounwind  {
+	store i31 %x, i31* @i31_s
+	ret void
+}
+
+define void @i32_ls(i32 zeroext %x) nounwind  {
+	store i32 %x, i32* @i32_s
+	ret void
+}
+
+define void @i33_ls(i33 zeroext %x) nounwind  {
+	store i33 %x, i33* @i33_s
+	ret void
+}
+
+define void @i34_ls(i34 zeroext %x) nounwind  {
+	store i34 %x, i34* @i34_s
+	ret void
+}
+
+define void @i35_ls(i35 zeroext %x) nounwind  {
+	store i35 %x, i35* @i35_s
+	ret void
+}
+
+define void @i36_ls(i36 zeroext %x) nounwind  {
+	store i36 %x, i36* @i36_s
+	ret void
+}
+
+define void @i37_ls(i37 zeroext %x) nounwind  {
+	store i37 %x, i37* @i37_s
+	ret void
+}
+
+define void @i38_ls(i38 zeroext %x) nounwind  {
+	store i38 %x, i38* @i38_s
+	ret void
+}
+
+define void @i39_ls(i39 zeroext %x) nounwind  {
+	store i39 %x, i39* @i39_s
+	ret void
+}
+
+define void @i40_ls(i40 zeroext %x) nounwind  {
+	store i40 %x, i40* @i40_s
+	ret void
+}
+
+define void @i41_ls(i41 zeroext %x) nounwind  {
+	store i41 %x, i41* @i41_s
+	ret void
+}
+
+define void @i42_ls(i42 zeroext %x) nounwind  {
+	store i42 %x, i42* @i42_s
+	ret void
+}
+
+define void @i43_ls(i43 zeroext %x) nounwind  {
+	store i43 %x, i43* @i43_s
+	ret void
+}
+
+define void @i44_ls(i44 zeroext %x) nounwind  {
+	store i44 %x, i44* @i44_s
+	ret void
+}
+
+define void @i45_ls(i45 zeroext %x) nounwind  {
+	store i45 %x, i45* @i45_s
+	ret void
+}
+
+define void @i46_ls(i46 zeroext %x) nounwind  {
+	store i46 %x, i46* @i46_s
+	ret void
+}
+
+define void @i47_ls(i47 zeroext %x) nounwind  {
+	store i47 %x, i47* @i47_s
+	ret void
+}
+
+define void @i48_ls(i48 zeroext %x) nounwind  {
+	store i48 %x, i48* @i48_s
+	ret void
+}
+
+define void @i49_ls(i49 zeroext %x) nounwind  {
+	store i49 %x, i49* @i49_s
+	ret void
+}
+
+define void @i50_ls(i50 zeroext %x) nounwind  {
+	store i50 %x, i50* @i50_s
+	ret void
+}
+
+define void @i51_ls(i51 zeroext %x) nounwind  {
+	store i51 %x, i51* @i51_s
+	ret void
+}
+
+define void @i52_ls(i52 zeroext %x) nounwind  {
+	store i52 %x, i52* @i52_s
+	ret void
+}
+
+define void @i53_ls(i53 zeroext %x) nounwind  {
+	store i53 %x, i53* @i53_s
+	ret void
+}
+
+define void @i54_ls(i54 zeroext %x) nounwind  {
+	store i54 %x, i54* @i54_s
+	ret void
+}
+
+define void @i55_ls(i55 zeroext %x) nounwind  {
+	store i55 %x, i55* @i55_s
+	ret void
+}
+
+define void @i56_ls(i56 zeroext %x) nounwind  {
+	store i56 %x, i56* @i56_s
+	ret void
+}
+
+define void @i57_ls(i57 zeroext %x) nounwind  {
+	store i57 %x, i57* @i57_s
+	ret void
+}
+
+define void @i58_ls(i58 zeroext %x) nounwind  {
+	store i58 %x, i58* @i58_s
+	ret void
+}
+
+define void @i59_ls(i59 zeroext %x) nounwind  {
+	store i59 %x, i59* @i59_s
+	ret void
+}
+
+define void @i60_ls(i60 zeroext %x) nounwind  {
+	store i60 %x, i60* @i60_s
+	ret void
+}
+
+define void @i61_ls(i61 zeroext %x) nounwind  {
+	store i61 %x, i61* @i61_s
+	ret void
+}
+
+define void @i62_ls(i62 zeroext %x) nounwind  {
+	store i62 %x, i62* @i62_s
+	ret void
+}
+
+define void @i63_ls(i63 zeroext %x) nounwind  {
+	store i63 %x, i63* @i63_s
+	ret void
+}
+
+define void @i64_ls(i64 zeroext %x) nounwind  {
+	store i64 %x, i64* @i64_s
+	ret void
+}
+
+define void @i65_ls(i65 zeroext %x) nounwind  {
+	store i65 %x, i65* @i65_s
+	ret void
+}
+
+define void @i66_ls(i66 zeroext %x) nounwind  {
+	store i66 %x, i66* @i66_s
+	ret void
+}
+
+define void @i67_ls(i67 zeroext %x) nounwind  {
+	store i67 %x, i67* @i67_s
+	ret void
+}
+
+define void @i68_ls(i68 zeroext %x) nounwind  {
+	store i68 %x, i68* @i68_s
+	ret void
+}
+
+define void @i69_ls(i69 zeroext %x) nounwind  {
+	store i69 %x, i69* @i69_s
+	ret void
+}
+
+define void @i70_ls(i70 zeroext %x) nounwind  {
+	store i70 %x, i70* @i70_s
+	ret void
+}
+
+define void @i71_ls(i71 zeroext %x) nounwind  {
+	store i71 %x, i71* @i71_s
+	ret void
+}
+
+define void @i72_ls(i72 zeroext %x) nounwind  {
+	store i72 %x, i72* @i72_s
+	ret void
+}
+
+define void @i73_ls(i73 zeroext %x) nounwind  {
+	store i73 %x, i73* @i73_s
+	ret void
+}
+
+define void @i74_ls(i74 zeroext %x) nounwind  {
+	store i74 %x, i74* @i74_s
+	ret void
+}
+
+define void @i75_ls(i75 zeroext %x) nounwind  {
+	store i75 %x, i75* @i75_s
+	ret void
+}
+
+define void @i76_ls(i76 zeroext %x) nounwind  {
+	store i76 %x, i76* @i76_s
+	ret void
+}
+
+define void @i77_ls(i77 zeroext %x) nounwind  {
+	store i77 %x, i77* @i77_s
+	ret void
+}
+
+define void @i78_ls(i78 zeroext %x) nounwind  {
+	store i78 %x, i78* @i78_s
+	ret void
+}
+
+define void @i79_ls(i79 zeroext %x) nounwind  {
+	store i79 %x, i79* @i79_s
+	ret void
+}
+
+define void @i80_ls(i80 zeroext %x) nounwind  {
+	store i80 %x, i80* @i80_s
+	ret void
+}
+
+define void @i81_ls(i81 zeroext %x) nounwind  {
+	store i81 %x, i81* @i81_s
+	ret void
+}
+
+define void @i82_ls(i82 zeroext %x) nounwind  {
+	store i82 %x, i82* @i82_s
+	ret void
+}
+
+define void @i83_ls(i83 zeroext %x) nounwind  {
+	store i83 %x, i83* @i83_s
+	ret void
+}
+
+define void @i84_ls(i84 zeroext %x) nounwind  {
+	store i84 %x, i84* @i84_s
+	ret void
+}
+
+define void @i85_ls(i85 zeroext %x) nounwind  {
+	store i85 %x, i85* @i85_s
+	ret void
+}
+
+define void @i86_ls(i86 zeroext %x) nounwind  {
+	store i86 %x, i86* @i86_s
+	ret void
+}
+
+define void @i87_ls(i87 zeroext %x) nounwind  {
+	store i87 %x, i87* @i87_s
+	ret void
+}
+
+define void @i88_ls(i88 zeroext %x) nounwind  {
+	store i88 %x, i88* @i88_s
+	ret void
+}
+
+define void @i89_ls(i89 zeroext %x) nounwind  {
+	store i89 %x, i89* @i89_s
+	ret void
+}
+
+define void @i90_ls(i90 zeroext %x) nounwind  {
+	store i90 %x, i90* @i90_s
+	ret void
+}
+
+define void @i91_ls(i91 zeroext %x) nounwind  {
+	store i91 %x, i91* @i91_s
+	ret void
+}
+
+define void @i92_ls(i92 zeroext %x) nounwind  {
+	store i92 %x, i92* @i92_s
+	ret void
+}
+
+define void @i93_ls(i93 zeroext %x) nounwind  {
+	store i93 %x, i93* @i93_s
+	ret void
+}
+
+define void @i94_ls(i94 zeroext %x) nounwind  {
+	store i94 %x, i94* @i94_s
+	ret void
+}
+
+define void @i95_ls(i95 zeroext %x) nounwind  {
+	store i95 %x, i95* @i95_s
+	ret void
+}
+
+define void @i96_ls(i96 zeroext %x) nounwind  {
+	store i96 %x, i96* @i96_s
+	ret void
+}
+
+define void @i97_ls(i97 zeroext %x) nounwind  {
+	store i97 %x, i97* @i97_s
+	ret void
+}
+
+define void @i98_ls(i98 zeroext %x) nounwind  {
+	store i98 %x, i98* @i98_s
+	ret void
+}
+
+define void @i99_ls(i99 zeroext %x) nounwind  {
+	store i99 %x, i99* @i99_s
+	ret void
+}
+
+define void @i100_ls(i100 zeroext %x) nounwind  {
+	store i100 %x, i100* @i100_s
+	ret void
+}
+
+define void @i101_ls(i101 zeroext %x) nounwind  {
+	store i101 %x, i101* @i101_s
+	ret void
+}
+
+define void @i102_ls(i102 zeroext %x) nounwind  {
+	store i102 %x, i102* @i102_s
+	ret void
+}
+
+define void @i103_ls(i103 zeroext %x) nounwind  {
+	store i103 %x, i103* @i103_s
+	ret void
+}
+
+define void @i104_ls(i104 zeroext %x) nounwind  {
+	store i104 %x, i104* @i104_s
+	ret void
+}
+
+define void @i105_ls(i105 zeroext %x) nounwind  {
+	store i105 %x, i105* @i105_s
+	ret void
+}
+
+define void @i106_ls(i106 zeroext %x) nounwind  {
+	store i106 %x, i106* @i106_s
+	ret void
+}
+
+define void @i107_ls(i107 zeroext %x) nounwind  {
+	store i107 %x, i107* @i107_s
+	ret void
+}
+
+define void @i108_ls(i108 zeroext %x) nounwind  {
+	store i108 %x, i108* @i108_s
+	ret void
+}
+
+define void @i109_ls(i109 zeroext %x) nounwind  {
+	store i109 %x, i109* @i109_s
+	ret void
+}
+
+define void @i110_ls(i110 zeroext %x) nounwind  {
+	store i110 %x, i110* @i110_s
+	ret void
+}
+
+define void @i111_ls(i111 zeroext %x) nounwind  {
+	store i111 %x, i111* @i111_s
+	ret void
+}
+
+define void @i112_ls(i112 zeroext %x) nounwind  {
+	store i112 %x, i112* @i112_s
+	ret void
+}
+
+define void @i113_ls(i113 zeroext %x) nounwind  {
+	store i113 %x, i113* @i113_s
+	ret void
+}
+
+define void @i114_ls(i114 zeroext %x) nounwind  {
+	store i114 %x, i114* @i114_s
+	ret void
+}
+
+define void @i115_ls(i115 zeroext %x) nounwind  {
+	store i115 %x, i115* @i115_s
+	ret void
+}
+
+define void @i116_ls(i116 zeroext %x) nounwind  {
+	store i116 %x, i116* @i116_s
+	ret void
+}
+
+define void @i117_ls(i117 zeroext %x) nounwind  {
+	store i117 %x, i117* @i117_s
+	ret void
+}
+
+define void @i118_ls(i118 zeroext %x) nounwind  {
+	store i118 %x, i118* @i118_s
+	ret void
+}
+
+define void @i119_ls(i119 zeroext %x) nounwind  {
+	store i119 %x, i119* @i119_s
+	ret void
+}
+
+define void @i120_ls(i120 zeroext %x) nounwind  {
+	store i120 %x, i120* @i120_s
+	ret void
+}
+
+define void @i121_ls(i121 zeroext %x) nounwind  {
+	store i121 %x, i121* @i121_s
+	ret void
+}
+
+define void @i122_ls(i122 zeroext %x) nounwind  {
+	store i122 %x, i122* @i122_s
+	ret void
+}
+
+define void @i123_ls(i123 zeroext %x) nounwind  {
+	store i123 %x, i123* @i123_s
+	ret void
+}
+
+define void @i124_ls(i124 zeroext %x) nounwind  {
+	store i124 %x, i124* @i124_s
+	ret void
+}
+
+define void @i125_ls(i125 zeroext %x) nounwind  {
+	store i125 %x, i125* @i125_s
+	ret void
+}
+
+define void @i126_ls(i126 zeroext %x) nounwind  {
+	store i126 %x, i126* @i126_s
+	ret void
+}
+
+define void @i127_ls(i127 zeroext %x) nounwind  {
+	store i127 %x, i127* @i127_s
+	ret void
+}
+
+define void @i128_ls(i128 zeroext %x) nounwind  {
+	store i128 %x, i128* @i128_s
+	ret void
+}
+
+define void @i129_ls(i129 zeroext %x) nounwind  {
+	store i129 %x, i129* @i129_s
+	ret void
+}
+
+define void @i130_ls(i130 zeroext %x) nounwind  {
+	store i130 %x, i130* @i130_s
+	ret void
+}
+
+define void @i131_ls(i131 zeroext %x) nounwind  {
+	store i131 %x, i131* @i131_s
+	ret void
+}
+
+define void @i132_ls(i132 zeroext %x) nounwind  {
+	store i132 %x, i132* @i132_s
+	ret void
+}
+
+define void @i133_ls(i133 zeroext %x) nounwind  {
+	store i133 %x, i133* @i133_s
+	ret void
+}
+
+define void @i134_ls(i134 zeroext %x) nounwind  {
+	store i134 %x, i134* @i134_s
+	ret void
+}
+
+define void @i135_ls(i135 zeroext %x) nounwind  {
+	store i135 %x, i135* @i135_s
+	ret void
+}
+
+define void @i136_ls(i136 zeroext %x) nounwind  {
+	store i136 %x, i136* @i136_s
+	ret void
+}
+
+define void @i137_ls(i137 zeroext %x) nounwind  {
+	store i137 %x, i137* @i137_s
+	ret void
+}
+
+define void @i138_ls(i138 zeroext %x) nounwind  {
+	store i138 %x, i138* @i138_s
+	ret void
+}
+
+define void @i139_ls(i139 zeroext %x) nounwind  {
+	store i139 %x, i139* @i139_s
+	ret void
+}
+
+define void @i140_ls(i140 zeroext %x) nounwind  {
+	store i140 %x, i140* @i140_s
+	ret void
+}
+
+define void @i141_ls(i141 zeroext %x) nounwind  {
+	store i141 %x, i141* @i141_s
+	ret void
+}
+
+define void @i142_ls(i142 zeroext %x) nounwind  {
+	store i142 %x, i142* @i142_s
+	ret void
+}
+
+define void @i143_ls(i143 zeroext %x) nounwind  {
+	store i143 %x, i143* @i143_s
+	ret void
+}
+
+define void @i144_ls(i144 zeroext %x) nounwind  {
+	store i144 %x, i144* @i144_s
+	ret void
+}
+
+define void @i145_ls(i145 zeroext %x) nounwind  {
+	store i145 %x, i145* @i145_s
+	ret void
+}
+
+define void @i146_ls(i146 zeroext %x) nounwind  {
+	store i146 %x, i146* @i146_s
+	ret void
+}
+
+define void @i147_ls(i147 zeroext %x) nounwind  {
+	store i147 %x, i147* @i147_s
+	ret void
+}
+
+define void @i148_ls(i148 zeroext %x) nounwind  {
+	store i148 %x, i148* @i148_s
+	ret void
+}
+
+define void @i149_ls(i149 zeroext %x) nounwind  {
+	store i149 %x, i149* @i149_s
+	ret void
+}
+
+define void @i150_ls(i150 zeroext %x) nounwind  {
+	store i150 %x, i150* @i150_s
+	ret void
+}
+
+define void @i151_ls(i151 zeroext %x) nounwind  {
+	store i151 %x, i151* @i151_s
+	ret void
+}
+
+define void @i152_ls(i152 zeroext %x) nounwind  {
+	store i152 %x, i152* @i152_s
+	ret void
+}
+
+define void @i153_ls(i153 zeroext %x) nounwind  {
+	store i153 %x, i153* @i153_s
+	ret void
+}
+
+define void @i154_ls(i154 zeroext %x) nounwind  {
+	store i154 %x, i154* @i154_s
+	ret void
+}
+
+define void @i155_ls(i155 zeroext %x) nounwind  {
+	store i155 %x, i155* @i155_s
+	ret void
+}
+
+define void @i156_ls(i156 zeroext %x) nounwind  {
+	store i156 %x, i156* @i156_s
+	ret void
+}
+
+define void @i157_ls(i157 zeroext %x) nounwind  {
+	store i157 %x, i157* @i157_s
+	ret void
+}
+
+define void @i158_ls(i158 zeroext %x) nounwind  {
+	store i158 %x, i158* @i158_s
+	ret void
+}
+
+define void @i159_ls(i159 zeroext %x) nounwind  {
+	store i159 %x, i159* @i159_s
+	ret void
+}
+
+define void @i160_ls(i160 zeroext %x) nounwind  {
+	store i160 %x, i160* @i160_s
+	ret void
+}
+
+define void @i161_ls(i161 zeroext %x) nounwind  {
+	store i161 %x, i161* @i161_s
+	ret void
+}
+
+define void @i162_ls(i162 zeroext %x) nounwind  {
+	store i162 %x, i162* @i162_s
+	ret void
+}
+
+define void @i163_ls(i163 zeroext %x) nounwind  {
+	store i163 %x, i163* @i163_s
+	ret void
+}
+
+define void @i164_ls(i164 zeroext %x) nounwind  {
+	store i164 %x, i164* @i164_s
+	ret void
+}
+
+define void @i165_ls(i165 zeroext %x) nounwind  {
+	store i165 %x, i165* @i165_s
+	ret void
+}
+
+define void @i166_ls(i166 zeroext %x) nounwind  {
+	store i166 %x, i166* @i166_s
+	ret void
+}
+
+define void @i167_ls(i167 zeroext %x) nounwind  {
+	store i167 %x, i167* @i167_s
+	ret void
+}
+
+define void @i168_ls(i168 zeroext %x) nounwind  {
+	store i168 %x, i168* @i168_s
+	ret void
+}
+
+define void @i169_ls(i169 zeroext %x) nounwind  {
+	store i169 %x, i169* @i169_s
+	ret void
+}
+
+define void @i170_ls(i170 zeroext %x) nounwind  {
+	store i170 %x, i170* @i170_s
+	ret void
+}
+
+define void @i171_ls(i171 zeroext %x) nounwind  {
+	store i171 %x, i171* @i171_s
+	ret void
+}
+
+define void @i172_ls(i172 zeroext %x) nounwind  {
+	store i172 %x, i172* @i172_s
+	ret void
+}
+
+define void @i173_ls(i173 zeroext %x) nounwind  {
+	store i173 %x, i173* @i173_s
+	ret void
+}
+
+define void @i174_ls(i174 zeroext %x) nounwind  {
+	store i174 %x, i174* @i174_s
+	ret void
+}
+
+define void @i175_ls(i175 zeroext %x) nounwind  {
+	store i175 %x, i175* @i175_s
+	ret void
+}
+
+define void @i176_ls(i176 zeroext %x) nounwind  {
+	store i176 %x, i176* @i176_s
+	ret void
+}
+
+define void @i177_ls(i177 zeroext %x) nounwind  {
+	store i177 %x, i177* @i177_s
+	ret void
+}
+
+define void @i178_ls(i178 zeroext %x) nounwind  {
+	store i178 %x, i178* @i178_s
+	ret void
+}
+
+define void @i179_ls(i179 zeroext %x) nounwind  {
+	store i179 %x, i179* @i179_s
+	ret void
+}
+
+define void @i180_ls(i180 zeroext %x) nounwind  {
+	store i180 %x, i180* @i180_s
+	ret void
+}
+
+define void @i181_ls(i181 zeroext %x) nounwind  {
+	store i181 %x, i181* @i181_s
+	ret void
+}
+
+define void @i182_ls(i182 zeroext %x) nounwind  {
+	store i182 %x, i182* @i182_s
+	ret void
+}
+
+define void @i183_ls(i183 zeroext %x) nounwind  {
+	store i183 %x, i183* @i183_s
+	ret void
+}
+
+define void @i184_ls(i184 zeroext %x) nounwind  {
+	store i184 %x, i184* @i184_s
+	ret void
+}
+
+define void @i185_ls(i185 zeroext %x) nounwind  {
+	store i185 %x, i185* @i185_s
+	ret void
+}
+
+define void @i186_ls(i186 zeroext %x) nounwind  {
+	store i186 %x, i186* @i186_s
+	ret void
+}
+
+define void @i187_ls(i187 zeroext %x) nounwind  {
+	store i187 %x, i187* @i187_s
+	ret void
+}
+
+define void @i188_ls(i188 zeroext %x) nounwind  {
+	store i188 %x, i188* @i188_s
+	ret void
+}
+
+define void @i189_ls(i189 zeroext %x) nounwind  {
+	store i189 %x, i189* @i189_s
+	ret void
+}
+
+define void @i190_ls(i190 zeroext %x) nounwind  {
+	store i190 %x, i190* @i190_s
+	ret void
+}
+
+define void @i191_ls(i191 zeroext %x) nounwind  {
+	store i191 %x, i191* @i191_s
+	ret void
+}
+
+define void @i192_ls(i192 zeroext %x) nounwind  {
+	store i192 %x, i192* @i192_s
+	ret void
+}
+
+define void @i193_ls(i193 zeroext %x) nounwind  {
+	store i193 %x, i193* @i193_s
+	ret void
+}
+
+define void @i194_ls(i194 zeroext %x) nounwind  {
+	store i194 %x, i194* @i194_s
+	ret void
+}
+
+define void @i195_ls(i195 zeroext %x) nounwind  {
+	store i195 %x, i195* @i195_s
+	ret void
+}
+
+define void @i196_ls(i196 zeroext %x) nounwind  {
+	store i196 %x, i196* @i196_s
+	ret void
+}
+
+define void @i197_ls(i197 zeroext %x) nounwind  {
+	store i197 %x, i197* @i197_s
+	ret void
+}
+
+define void @i198_ls(i198 zeroext %x) nounwind  {
+	store i198 %x, i198* @i198_s
+	ret void
+}
+
+define void @i199_ls(i199 zeroext %x) nounwind  {
+	store i199 %x, i199* @i199_s
+	ret void
+}
+
+define void @i200_ls(i200 zeroext %x) nounwind  {
+	store i200 %x, i200* @i200_s
+	ret void
+}
+
+define void @i201_ls(i201 zeroext %x) nounwind  {
+	store i201 %x, i201* @i201_s
+	ret void
+}
+
+define void @i202_ls(i202 zeroext %x) nounwind  {
+	store i202 %x, i202* @i202_s
+	ret void
+}
+
+define void @i203_ls(i203 zeroext %x) nounwind  {
+	store i203 %x, i203* @i203_s
+	ret void
+}
+
+define void @i204_ls(i204 zeroext %x) nounwind  {
+	store i204 %x, i204* @i204_s
+	ret void
+}
+
+define void @i205_ls(i205 zeroext %x) nounwind  {
+	store i205 %x, i205* @i205_s
+	ret void
+}
+
+define void @i206_ls(i206 zeroext %x) nounwind  {
+	store i206 %x, i206* @i206_s
+	ret void
+}
+
+define void @i207_ls(i207 zeroext %x) nounwind  {
+	store i207 %x, i207* @i207_s
+	ret void
+}
+
+define void @i208_ls(i208 zeroext %x) nounwind  {
+	store i208 %x, i208* @i208_s
+	ret void
+}
+
+define void @i209_ls(i209 zeroext %x) nounwind  {
+	store i209 %x, i209* @i209_s
+	ret void
+}
+
+define void @i210_ls(i210 zeroext %x) nounwind  {
+	store i210 %x, i210* @i210_s
+	ret void
+}
+
+define void @i211_ls(i211 zeroext %x) nounwind  {
+	store i211 %x, i211* @i211_s
+	ret void
+}
+
+define void @i212_ls(i212 zeroext %x) nounwind  {
+	store i212 %x, i212* @i212_s
+	ret void
+}
+
+define void @i213_ls(i213 zeroext %x) nounwind  {
+	store i213 %x, i213* @i213_s
+	ret void
+}
+
+define void @i214_ls(i214 zeroext %x) nounwind  {
+	store i214 %x, i214* @i214_s
+	ret void
+}
+
+define void @i215_ls(i215 zeroext %x) nounwind  {
+	store i215 %x, i215* @i215_s
+	ret void
+}
+
+define void @i216_ls(i216 zeroext %x) nounwind  {
+	store i216 %x, i216* @i216_s
+	ret void
+}
+
+define void @i217_ls(i217 zeroext %x) nounwind  {
+	store i217 %x, i217* @i217_s
+	ret void
+}
+
+define void @i218_ls(i218 zeroext %x) nounwind  {
+	store i218 %x, i218* @i218_s
+	ret void
+}
+
+define void @i219_ls(i219 zeroext %x) nounwind  {
+	store i219 %x, i219* @i219_s
+	ret void
+}
+
+define void @i220_ls(i220 zeroext %x) nounwind  {
+	store i220 %x, i220* @i220_s
+	ret void
+}
+
+define void @i221_ls(i221 zeroext %x) nounwind  {
+	store i221 %x, i221* @i221_s
+	ret void
+}
+
+define void @i222_ls(i222 zeroext %x) nounwind  {
+	store i222 %x, i222* @i222_s
+	ret void
+}
+
+define void @i223_ls(i223 zeroext %x) nounwind  {
+	store i223 %x, i223* @i223_s
+	ret void
+}
+
+define void @i224_ls(i224 zeroext %x) nounwind  {
+	store i224 %x, i224* @i224_s
+	ret void
+}
+
+define void @i225_ls(i225 zeroext %x) nounwind  {
+	store i225 %x, i225* @i225_s
+	ret void
+}
+
+define void @i226_ls(i226 zeroext %x) nounwind  {
+	store i226 %x, i226* @i226_s
+	ret void
+}
+
+define void @i227_ls(i227 zeroext %x) nounwind  {
+	store i227 %x, i227* @i227_s
+	ret void
+}
+
+define void @i228_ls(i228 zeroext %x) nounwind  {
+	store i228 %x, i228* @i228_s
+	ret void
+}
+
+define void @i229_ls(i229 zeroext %x) nounwind  {
+	store i229 %x, i229* @i229_s
+	ret void
+}
+
+define void @i230_ls(i230 zeroext %x) nounwind  {
+	store i230 %x, i230* @i230_s
+	ret void
+}
+
+define void @i231_ls(i231 zeroext %x) nounwind  {
+	store i231 %x, i231* @i231_s
+	ret void
+}
+
+define void @i232_ls(i232 zeroext %x) nounwind  {
+	store i232 %x, i232* @i232_s
+	ret void
+}
+
+define void @i233_ls(i233 zeroext %x) nounwind  {
+	store i233 %x, i233* @i233_s
+	ret void
+}
+
+define void @i234_ls(i234 zeroext %x) nounwind  {
+	store i234 %x, i234* @i234_s
+	ret void
+}
+
+define void @i235_ls(i235 zeroext %x) nounwind  {
+	store i235 %x, i235* @i235_s
+	ret void
+}
+
+define void @i236_ls(i236 zeroext %x) nounwind  {
+	store i236 %x, i236* @i236_s
+	ret void
+}
+
+define void @i237_ls(i237 zeroext %x) nounwind  {
+	store i237 %x, i237* @i237_s
+	ret void
+}
+
+define void @i238_ls(i238 zeroext %x) nounwind  {
+	store i238 %x, i238* @i238_s
+	ret void
+}
+
+define void @i239_ls(i239 zeroext %x) nounwind  {
+	store i239 %x, i239* @i239_s
+	ret void
+}
+
+define void @i240_ls(i240 zeroext %x) nounwind  {
+	store i240 %x, i240* @i240_s
+	ret void
+}
+
+define void @i241_ls(i241 zeroext %x) nounwind  {
+	store i241 %x, i241* @i241_s
+	ret void
+}
+
+define void @i242_ls(i242 zeroext %x) nounwind  {
+	store i242 %x, i242* @i242_s
+	ret void
+}
+
+define void @i243_ls(i243 zeroext %x) nounwind  {
+	store i243 %x, i243* @i243_s
+	ret void
+}
+
+define void @i244_ls(i244 zeroext %x) nounwind  {
+	store i244 %x, i244* @i244_s
+	ret void
+}
+
+define void @i245_ls(i245 zeroext %x) nounwind  {
+	store i245 %x, i245* @i245_s
+	ret void
+}
+
+define void @i246_ls(i246 zeroext %x) nounwind  {
+	store i246 %x, i246* @i246_s
+	ret void
+}
+
+define void @i247_ls(i247 zeroext %x) nounwind  {
+	store i247 %x, i247* @i247_s
+	ret void
+}
+
+define void @i248_ls(i248 zeroext %x) nounwind  {
+	store i248 %x, i248* @i248_s
+	ret void
+}
+
+define void @i249_ls(i249 zeroext %x) nounwind  {
+	store i249 %x, i249* @i249_s
+	ret void
+}
+
+define void @i250_ls(i250 zeroext %x) nounwind  {
+	store i250 %x, i250* @i250_s
+	ret void
+}
+
+define void @i251_ls(i251 zeroext %x) nounwind  {
+	store i251 %x, i251* @i251_s
+	ret void
+}
+
+define void @i252_ls(i252 zeroext %x) nounwind  {
+	store i252 %x, i252* @i252_s
+	ret void
+}
+
+define void @i253_ls(i253 zeroext %x) nounwind  {
+	store i253 %x, i253* @i253_s
+	ret void
+}
+
+define void @i254_ls(i254 zeroext %x) nounwind  {
+	store i254 %x, i254* @i254_s
+	ret void
+}
+
+define void @i255_ls(i255 zeroext %x) nounwind  {
+	store i255 %x, i255* @i255_s
+	ret void
+}
+
+define void @i256_ls(i256 zeroext %x) nounwind  {
+	store i256 %x, i256* @i256_s
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/BasicInstrs.ll b/final/test/CodeGen/Generic/BasicInstrs.ll
new file mode 100644
index 00000000000..578431e8efa
--- /dev/null
+++ b/final/test/CodeGen/Generic/BasicInstrs.ll
@@ -0,0 +1,54 @@
+; New testcase, this contains a bunch of simple instructions that should be
+; handled by a code generator.
+
+; RUN: llc < %s
+
+define i32 @add(i32 %A, i32 %B) {
+	%R = add i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @sub(i32 %A, i32 %B) {
+	%R = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @mul(i32 %A, i32 %B) {
+	%R = mul i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @sdiv(i32 %A, i32 %B) {
+	%R = sdiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @udiv(i32 %A, i32 %B) {
+	%R = udiv i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @srem(i32 %A, i32 %B) {
+	%R = srem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @urem(i32 %A, i32 %B) {
+	%R = urem i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @and(i32 %A, i32 %B) {
+	%R = and i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @or(i32 %A, i32 %B) {
+	%R = or i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @xor(i32 %A, i32 %B) {
+	%R = xor i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %R
+}
diff --git a/final/test/CodeGen/Generic/BurgBadRegAlloc.ll b/final/test/CodeGen/Generic/BurgBadRegAlloc.ll
new file mode 100644
index 00000000000..99d856aea99
--- /dev/null
+++ b/final/test/CodeGen/Generic/BurgBadRegAlloc.ll
@@ -0,0 +1,829 @@
+; RUN: llc < %s
+
+;; Register allocation is doing a very poor job on this routine from yyparse
+;; in Burg:
+;; -- at least two long-lived values are being allocated to %o? registers
+;; -- even worse, those registers are being saved and restored repeatedly
+;;    at function calls, even though there are no intervening uses.
+;; -- outgoing args of some function calls have to be swapped, causing
+;;    another write/read from stack to do the exchange (use -dregalloc=y).
+;;	
+%Arity = type %struct.arity*
+	%Binding = type %struct.binding*
+	%DeltaCost = type [4 x i16]
+	%Dimension = type %struct.dimension*
+	%Index_Map = type { i32, %Item_Set* }
+	%IntList = type %struct.intlist*
+	%Item = type { %DeltaCost, %Rule }
+	%ItemArray = type %Item*
+	%Item_Set = type %struct.item_set*
+	%List = type %struct.list*
+	%Mapping = type %struct.mapping*
+	%NonTerminal = type %struct.nonterminal*
+	%Operator = type %struct.operator*
+	%Pattern = type %struct.pattern*
+	%PatternAST = type %struct.patternAST*
+	%Plank = type %struct.plank*
+	%PlankMap = type %struct.plankMap*
+	%ReadFn = type i32 ()*
+	%Rule = type %struct.rule*
+	%RuleAST = type %struct.ruleAST*
+	%StateMap = type %struct.stateMap*
+	%StrTableElement = type %struct.strTableElement*
+	%Symbol = type %struct.symbol*
+	%Table = type %struct.table*
+	%YYSTYPE = type { %IntList }
+	%struct.arity = type { i32, %List }
+	%struct.binding = type { i8*, i32 }
+	%struct.dimension = type { i16*, %Index_Map, %Mapping, i32, %PlankMap }
+	%struct.index_map = type { i32, %Item_Set* }
+	%struct.intlist = type { i32, %IntList }
+	%struct.item = type { %DeltaCost, %Rule }
+	%struct.item_set = type { i32, i32, %Operator, [2 x %Item_Set], %Item_Set, i16*, %ItemArray, %ItemArray }
+	%struct.list = type { i8*, %List }
+	%struct.mapping = type { %List*, i32, i32, i32, %Item_Set* }
+	%struct.nonterminal = type { i8*, i32, i32, i32, %PlankMap, %Rule }
+	%struct.operator = type { i8*, i32, i32, i32, i32, i32, %Table }
+	%struct.pattern = type { %NonTerminal, %Operator, [2 x %NonTerminal] }
+	%struct.patternAST = type { %Symbol, i8*, %List }
+	%struct.plank = type { i8*, %List, i32 }
+	%struct.plankMap = type { %List, i32, %StateMap }
+	%struct.rule = type { %DeltaCost, i32, i32, i32, %NonTerminal, %Pattern, i32 }
+	%struct.ruleAST = type { i8*, %PatternAST, i32, %IntList, %Rule, %StrTableElement, %StrTableElement }
+	%struct.stateMap = type { i8*, %Plank, i32, i16* }
+	%struct.strTableElement = type { i8*, %IntList, i8* }
+	%struct.symbol = type { i8*, i32, { %Operator } }
+	%struct.table = type { %Operator, %List, i16*, [2 x %Dimension], %Item_Set* }
+@yylval = external global %YYSTYPE		; <%YYSTYPE*> [#uses=1]
+@yylhs = external global [25 x i16]		; <[25 x i16]*> [#uses=1]
+@yylen = external global [25 x i16]		; <[25 x i16]*> [#uses=1]
+@yydefred = external global [43 x i16]		; <[43 x i16]*> [#uses=1]
+@yydgoto = external global [12 x i16]		; <[12 x i16]*> [#uses=1]
+@yysindex = external global [43 x i16]		; <[43 x i16]*> [#uses=2]
+@yyrindex = external global [43 x i16]		; <[43 x i16]*> [#uses=1]
+@yygindex = external global [12 x i16]		; <[12 x i16]*> [#uses=1]
+@yytable = external global [263 x i16]		; <[263 x i16]*> [#uses=4]
+@yycheck = external global [263 x i16]		; <[263 x i16]*> [#uses=4]
+@yynerrs = external global i32		; <i32*> [#uses=3]
+@yyerrflag = external global i32		; <i32*> [#uses=6]
+@yychar = external global i32		; <i32*> [#uses=15]
+@yyssp = external global i16*		; <i16**> [#uses=15]
+@yyvsp = external global %YYSTYPE*		; <%YYSTYPE**> [#uses=30]
+@yyval = external global %YYSTYPE		; <%YYSTYPE*> [#uses=1]
+@yyss = external global i16*		; <i16**> [#uses=3]
+@yysslim = external global i16*		; <i16**> [#uses=3]
+@yyvs = external global %YYSTYPE*		; <%YYSTYPE**> [#uses=1]
+@.LC01 = external global [13 x i8]		; <[13 x i8]*> [#uses=1]
+@.LC1 = external global [20 x i8]		; <[20 x i8]*> [#uses=1]
+
+define i32 @yyparse() {
+bb0:
+	store i32 0, i32* @yynerrs
+	store i32 0, i32* @yyerrflag
+	store i32 -1, i32* @yychar
+	%reg113 = load i16** @yyss		; <i16*> [#uses=1]
+	%cond581 = icmp ne i16* %reg113, null		; <i1> [#uses=1]
+	br i1 %cond581, label %bb3, label %bb2
+
+bb2:		; preds = %bb0
+	%reg584 = call i32 @yygrowstack( )		; <i32> [#uses=1]
+	%cond584 = icmp ne i32 %reg584, 0		; <i1> [#uses=1]
+	br i1 %cond584, label %bb113, label %bb3
+
+bb3:		; preds = %bb2, %bb0
+	%reg115 = load i16** @yyss		; <i16*> [#uses=1]
+	store i16* %reg115, i16** @yyssp
+	%reg116 = load %YYSTYPE** @yyvs		; <%YYSTYPE*> [#uses=1]
+	store %YYSTYPE* %reg116, %YYSTYPE** @yyvsp
+	%reg117 = load i16** @yyssp		; <i16*> [#uses=1]
+	store i16 0, i16* %reg117
+	br label %bb4
+
+bb4:		; preds = %bb112, %bb102, %bb35, %bb31, %bb15, %bb14, %bb3
+	%reg458 = phi i32 [ %reg476, %bb112 ], [ 1, %bb102 ], [ %reg458, %bb35 ], [ %cast768, %bb31 ], [ %cast658, %bb15 ], [ %cast658, %bb14 ], [ 0, %bb3 ]		; <i32> [#uses=2]
+	%reg458-idxcast = zext i32 %reg458 to i64		; <i64> [#uses=3]
+	%reg594 = getelementptr [43 x i16]* @yydefred, i64 0, i64 %reg458-idxcast		; <i16*> [#uses=1]
+	%reg125 = load i16* %reg594		; <i16> [#uses=1]
+	%cast599 = sext i16 %reg125 to i32		; <i32> [#uses=2]
+	%cond600 = icmp ne i32 %cast599, 0		; <i1> [#uses=1]
+	br i1 %cond600, label %bb36, label %bb5
+
+bb5:		; preds = %bb4
+	%reg127 = load i32* @yychar		; <i32> [#uses=1]
+	%cond603 = icmp sge i32 %reg127, 0		; <i1> [#uses=1]
+	br i1 %cond603, label %bb8, label %bb6
+
+bb6:		; preds = %bb5
+	%reg607 = call i32 @yylex( )		; <i32> [#uses=1]
+	store i32 %reg607, i32* @yychar
+	%reg129 = load i32* @yychar		; <i32> [#uses=1]
+	%cond609 = icmp sge i32 %reg129, 0		; <i1> [#uses=1]
+	br i1 %cond609, label %bb8, label %bb7
+
+bb7:		; preds = %bb6
+	store i32 0, i32* @yychar
+	br label %bb8
+
+bb8:		; preds = %bb7, %bb6, %bb5
+	%reg615 = getelementptr [43 x i16]* @yysindex, i64 0, i64 %reg458-idxcast		; <i16*> [#uses=1]
+	%reg137 = load i16* %reg615		; <i16> [#uses=1]
+	%cast620 = sext i16 %reg137 to i32		; <i32> [#uses=2]
+	%cond621 = icmp eq i32 %cast620, 0		; <i1> [#uses=1]
+	br i1 %cond621, label %bb16, label %bb9
+
+bb9:		; preds = %bb8
+	%reg139 = load i32* @yychar		; <i32> [#uses=2]
+	%reg460 = add i32 %cast620, %reg139		; <i32> [#uses=3]
+	%cond624 = icmp slt i32 %reg460, 0		; <i1> [#uses=1]
+	br i1 %cond624, label %bb16, label %bb10
+
+bb10:		; preds = %bb9
+	%cond627 = icmp sgt i32 %reg460, 262		; <i1> [#uses=1]
+	br i1 %cond627, label %bb16, label %bb11
+
+bb11:		; preds = %bb10
+	%reg460-idxcast = sext i32 %reg460 to i64		; <i64> [#uses=2]
+	%reg632 = getelementptr [263 x i16]* @yycheck, i64 0, i64 %reg460-idxcast		; <i16*> [#uses=1]
+	%reg148 = load i16* %reg632		; <i16> [#uses=1]
+	%cast637 = sext i16 %reg148 to i32		; <i32> [#uses=1]
+	%cond639 = icmp ne i32 %cast637, %reg139		; <i1> [#uses=1]
+	br i1 %cond639, label %bb16, label %bb12
+
+bb12:		; preds = %bb11
+	%reg150 = load i16** @yyssp		; <i16*> [#uses=1]
+	%cast640 = bitcast i16* %reg150 to i8*		; <i8*> [#uses=1]
+	%reg151 = load i16** @yysslim		; <i16*> [#uses=1]
+	%cast641 = bitcast i16* %reg151 to i8*		; <i8*> [#uses=1]
+	%cond642 = icmp ult i8* %cast640, %cast641		; <i1> [#uses=1]
+	br i1 %cond642, label %bb14, label %bb13
+
+bb13:		; preds = %bb12
+	%reg644 = call i32 @yygrowstack( )		; <i32> [#uses=1]
+	%cond644 = icmp ne i32 %reg644, 0		; <i1> [#uses=1]
+	br i1 %cond644, label %bb113, label %bb14
+
+bb14:		; preds = %bb13, %bb12
+	%reg153 = load i16** @yyssp		; <i16*> [#uses=1]
+	%reg647 = getelementptr i16* %reg153, i64 1		; <i16*> [#uses=2]
+	store i16* %reg647, i16** @yyssp
+	%reg653 = getelementptr [263 x i16]* @yytable, i64 0, i64 %reg460-idxcast		; <i16*> [#uses=1]
+	%reg162 = load i16* %reg653		; <i16> [#uses=2]
+	%cast658 = sext i16 %reg162 to i32		; <i32> [#uses=2]
+	store i16 %reg162, i16* %reg647
+	%reg164 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
+	%reg661 = getelementptr %YYSTYPE* %reg164, i64 1		; <%YYSTYPE*> [#uses=1]
+	store %YYSTYPE* %reg661, %YYSTYPE** @yyvsp
+	%reg167 = load %IntList* getelementptr (%YYSTYPE* @yylval, i64 0, i32 0)		; <%IntList> [#uses=1]
+	%reg661.idx1 = getelementptr %YYSTYPE* %reg164, i64 1, i32 0		; <%IntList*> [#uses=1]
+	store %IntList %reg167, %IntList* %reg661.idx1
+	store i32 -1, i32* @yychar
+	%reg169 = load i32* @yyerrflag		; <i32> [#uses=2]
+	%cond669 = icmp sle i32 %reg169, 0		; <i1> [#uses=1]
+	br i1 %cond669, label %bb4, label %bb15
+
+bb15:		; preds = %bb14
+	%reg171 = add i32 %reg169, -1		; <i32> [#uses=1]
+	store i32 %reg171, i32* @yyerrflag
+	br label %bb4
+
+bb16:		; preds = %bb11, %bb10, %bb9, %bb8
+	%reg677 = getelementptr [43 x i16]* @yyrindex, i64 0, i64 %reg458-idxcast		; <i16*> [#uses=1]
+	%reg178 = load i16* %reg677		; <i16> [#uses=1]
+	%cast682 = sext i16 %reg178 to i32		; <i32> [#uses=2]
+	%cond683 = icmp eq i32 %cast682, 0		; <i1> [#uses=1]
+	br i1 %cond683, label %bb21, label %bb17
+
+bb17:		; preds = %bb16
+	%reg180 = load i32* @yychar		; <i32> [#uses=2]
+	%reg463 = add i32 %cast682, %reg180		; <i32> [#uses=3]
+	%cond686 = icmp slt i32 %reg463, 0		; <i1> [#uses=1]
+	br i1 %cond686, label %bb21, label %bb18
+
+bb18:		; preds = %bb17
+	%cond689 = icmp sgt i32 %reg463, 262		; <i1> [#uses=1]
+	br i1 %cond689, label %bb21, label %bb19
+
+bb19:		; preds = %bb18
+	%reg463-idxcast = sext i32 %reg463 to i64		; <i64> [#uses=2]
+	%reg694 = getelementptr [263 x i16]* @yycheck, i64 0, i64 %reg463-idxcast		; <i16*> [#uses=1]
+	%reg189 = load i16* %reg694		; <i16> [#uses=1]
+	%cast699 = sext i16 %reg189 to i32		; <i32> [#uses=1]
+	%cond701 = icmp ne i32 %cast699, %reg180		; <i1> [#uses=1]
+	br i1 %cond701, label %bb21, label %bb20
+
+bb20:		; preds = %bb19
+	%reg704 = getelementptr [263 x i16]* @yytable, i64 0, i64 %reg463-idxcast		; <i16*> [#uses=1]
+	%reg197 = load i16* %reg704		; <i16> [#uses=1]
+	%cast709 = sext i16 %reg197 to i32		; <i32> [#uses=1]
+	br label %bb36
+
+bb21:		; preds = %bb19, %bb18, %bb17, %bb16
+	%reg198 = load i32* @yyerrflag		; <i32> [#uses=1]
+	%cond711 = icmp ne i32 %reg198, 0		; <i1> [#uses=1]
+	br i1 %cond711, label %bb23, label %bb22
+
+bb22:		; preds = %bb21
+	call void @yyerror( i8* getelementptr ([13 x i8]* @.LC01, i64 0, i64 0) )
+	%reg200 = load i32* @yynerrs		; <i32> [#uses=1]
+	%reg201 = add i32 %reg200, 1		; <i32> [#uses=1]
+	store i32 %reg201, i32* @yynerrs
+	br label %bb23
+
+bb23:		; preds = %bb22, %bb21
+	%reg202 = load i32* @yyerrflag		; <i32> [#uses=1]
+	%cond719 = icmp sgt i32 %reg202, 2		; <i1> [#uses=1]
+	br i1 %cond719, label %bb34, label %bb24
+
+bb24:		; preds = %bb23
+	store i32 3, i32* @yyerrflag
+	%reg241 = load i16** @yyss		; <i16*> [#uses=1]
+	%cast778 = bitcast i16* %reg241 to i8*		; <i8*> [#uses=1]
+	br label %bb25
+
+bb25:		; preds = %bb33, %bb24
+	%reg204 = load i16** @yyssp		; <i16*> [#uses=4]
+	%reg206 = load i16* %reg204		; <i16> [#uses=1]
+	%reg206-idxcast = sext i16 %reg206 to i64		; <i64> [#uses=1]
+	%reg727 = getelementptr [43 x i16]* @yysindex, i64 0, i64 %reg206-idxcast		; <i16*> [#uses=1]
+	%reg212 = load i16* %reg727		; <i16> [#uses=2]
+	%cast732 = sext i16 %reg212 to i32		; <i32> [#uses=2]
+	%cond733 = icmp eq i32 %cast732, 0		; <i1> [#uses=1]
+	br i1 %cond733, label %bb32, label %bb26
+
+bb26:		; preds = %bb25
+	%reg466 = add i32 %cast732, 256		; <i32> [#uses=2]
+	%cond736 = icmp slt i32 %reg466, 0		; <i1> [#uses=1]
+	br i1 %cond736, label %bb32, label %bb27
+
+bb27:		; preds = %bb26
+	%cond739 = icmp sgt i32 %reg466, 262		; <i1> [#uses=1]
+	br i1 %cond739, label %bb32, label %bb28
+
+bb28:		; preds = %bb27
+	%reg212-idxcast = sext i16 %reg212 to i64		; <i64> [#uses=1]
+	%reg212-idxcast-offset = add i64 %reg212-idxcast, 256		; <i64> [#uses=2]
+	%reg744 = getelementptr [263 x i16]* @yycheck, i64 0, i64 %reg212-idxcast-offset		; <i16*> [#uses=1]
+	%reg221 = load i16* %reg744		; <i16> [#uses=1]
+	%cond748 = icmp ne i16 %reg221, 256		; <i1> [#uses=1]
+	br i1 %cond748, label %bb32, label %bb29
+
+bb29:		; preds = %bb28
+	%cast750 = bitcast i16* %reg204 to i8*		; <i8*> [#uses=1]
+	%reg223 = load i16** @yysslim		; <i16*> [#uses=1]
+	%cast751 = bitcast i16* %reg223 to i8*		; <i8*> [#uses=1]
+	%cond752 = icmp ult i8* %cast750, %cast751		; <i1> [#uses=1]
+	br i1 %cond752, label %bb31, label %bb30
+
+bb30:		; preds = %bb29
+	%reg754 = call i32 @yygrowstack( )		; <i32> [#uses=1]
+	%cond754 = icmp ne i32 %reg754, 0		; <i1> [#uses=1]
+	br i1 %cond754, label %bb113, label %bb31
+
+bb31:		; preds = %bb30, %bb29
+	%reg225 = load i16** @yyssp		; <i16*> [#uses=1]
+	%reg757 = getelementptr i16* %reg225, i64 1		; <i16*> [#uses=2]
+	store i16* %reg757, i16** @yyssp
+	%reg763 = getelementptr [263 x i16]* @yytable, i64 0, i64 %reg212-idxcast-offset		; <i16*> [#uses=1]
+	%reg234 = load i16* %reg763		; <i16> [#uses=2]
+	%cast768 = sext i16 %reg234 to i32		; <i32> [#uses=1]
+	store i16 %reg234, i16* %reg757
+	%reg236 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
+	%reg771 = getelementptr %YYSTYPE* %reg236, i64 1		; <%YYSTYPE*> [#uses=1]
+	store %YYSTYPE* %reg771, %YYSTYPE** @yyvsp
+	%reg239 = load %IntList* getelementptr (%YYSTYPE* @yylval, i64 0, i32 0)		; <%IntList> [#uses=1]
+	%reg771.idx1 = getelementptr %YYSTYPE* %reg236, i64 1, i32 0		; <%IntList*> [#uses=1]
+	store %IntList %reg239, %IntList* %reg771.idx1
+	br label %bb4
+
+bb32:		; preds = %bb28, %bb27, %bb26, %bb25
+	%cast777 = bitcast i16* %reg204 to i8*		; <i8*> [#uses=1]
+	%cond779 = icmp ule i8* %cast777, %cast778		; <i1> [#uses=1]
+	br i1 %cond779, label %UnifiedExitNode, label %bb33
+
+bb33:		; preds = %bb32
+	%reg781 = getelementptr i16* %reg204, i64 -1		; <i16*> [#uses=1]
+	store i16* %reg781, i16** @yyssp
+	%reg244 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=1]
+	%reg786 = getelementptr %YYSTYPE* %reg244, i64 -1		; <%YYSTYPE*> [#uses=1]
+	store %YYSTYPE* %reg786, %YYSTYPE** @yyvsp
+	br label %bb25
+
+bb34:		; preds = %bb23
+	%reg246 = load i32* @yychar		; <i32> [#uses=1]
+	%cond791 = icmp eq i32 %reg246, 0		; <i1> [#uses=1]
+	br i1 %cond791, label %UnifiedExitNode, label %bb35
+
+bb35:		; preds = %bb34
+	store i32 -1, i32* @yychar
+	br label %bb4
+
+bb36:		; preds = %bb20, %bb4
+	%reg468 = phi i32 [ %cast709, %bb20 ], [ %cast599, %bb4 ]		; <i32> [#uses=31]
+	%reg468-idxcast = sext i32 %reg468 to i64		; <i64> [#uses=2]
+	%reg796 = getelementptr [25 x i16]* @yylen, i64 0, i64 %reg468-idxcast		; <i16*> [#uses=1]
+	%reg254 = load i16* %reg796		; <i16> [#uses=2]
+	%reg259 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=1]
+	%reg254-idxcast = sext i16 %reg254 to i64		; <i64> [#uses=1]
+	%reg254-idxcast-scale = mul i64 %reg254-idxcast, -1		; <i64> [#uses=1]
+	%reg254-idxcast-scale-offset = add i64 %reg254-idxcast-scale, 1		; <i64> [#uses=1]
+	%reg261.idx1 = getelementptr %YYSTYPE* %reg259, i64 %reg254-idxcast-scale-offset, i32 0		; <%IntList*> [#uses=1]
+	%reg261 = load %IntList* %reg261.idx1		; <%IntList> [#uses=1]
+	store %IntList %reg261, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	%cond812 = icmp eq i32 %reg468, 13		; <i1> [#uses=1]
+	br i1 %cond812, label %bb85, label %bb37
+
+bb37:		; preds = %bb36
+	%cond814 = icmp sgt i32 %reg468, 13		; <i1> [#uses=1]
+	br i1 %cond814, label %bb56, label %bb38
+
+bb38:		; preds = %bb37
+	%cond817 = icmp eq i32 %reg468, 7		; <i1> [#uses=1]
+	br i1 %cond817, label %bb79, label %bb39
+
+bb39:		; preds = %bb38
+	%cond819 = icmp sgt i32 %reg468, 7		; <i1> [#uses=1]
+	br i1 %cond819, label %bb48, label %bb40
+
+bb40:		; preds = %bb39
+	%cond822 = icmp eq i32 %reg468, 4		; <i1> [#uses=1]
+	br i1 %cond822, label %bb76, label %bb41
+
+bb41:		; preds = %bb40
+	%cond824 = icmp sgt i32 %reg468, 4		; <i1> [#uses=1]
+	br i1 %cond824, label %bb45, label %bb42
+
+bb42:		; preds = %bb41
+	%cond827 = icmp eq i32 %reg468, 2		; <i1> [#uses=1]
+	br i1 %cond827, label %bb74, label %bb43
+
+bb43:		; preds = %bb42
+	%cond829 = icmp eq i32 %reg468, 3		; <i1> [#uses=1]
+	br i1 %cond829, label %bb75, label %bb97
+
+bb45:		; preds = %bb41
+	%cond831 = icmp eq i32 %reg468, 5		; <i1> [#uses=1]
+	br i1 %cond831, label %bb77, label %bb46
+
+bb46:		; preds = %bb45
+	%cond833 = icmp eq i32 %reg468, 6		; <i1> [#uses=1]
+	br i1 %cond833, label %bb78, label %bb97
+
+bb48:		; preds = %bb39
+	%cond835 = icmp eq i32 %reg468, 10		; <i1> [#uses=1]
+	br i1 %cond835, label %bb82, label %bb49
+
+bb49:		; preds = %bb48
+	%cond837 = icmp sgt i32 %reg468, 10		; <i1> [#uses=1]
+	br i1 %cond837, label %bb53, label %bb50
+
+bb50:		; preds = %bb49
+	%cond840 = icmp eq i32 %reg468, 8		; <i1> [#uses=1]
+	br i1 %cond840, label %bb80, label %bb51
+
+bb51:		; preds = %bb50
+	%cond842 = icmp eq i32 %reg468, 9		; <i1> [#uses=1]
+	br i1 %cond842, label %bb81, label %bb97
+
+bb53:		; preds = %bb49
+	%cond844 = icmp eq i32 %reg468, 11		; <i1> [#uses=1]
+	br i1 %cond844, label %bb83, label %bb54
+
+bb54:		; preds = %bb53
+	%cond846 = icmp eq i32 %reg468, 12		; <i1> [#uses=1]
+	br i1 %cond846, label %bb84, label %bb97
+
+bb56:		; preds = %bb37
+	%cond848 = icmp eq i32 %reg468, 19		; <i1> [#uses=1]
+	br i1 %cond848, label %bb91, label %bb57
+
+bb57:		; preds = %bb56
+	%cond850 = icmp sgt i32 %reg468, 19		; <i1> [#uses=1]
+	br i1 %cond850, label %bb66, label %bb58
+
+bb58:		; preds = %bb57
+	%cond853 = icmp eq i32 %reg468, 16		; <i1> [#uses=1]
+	br i1 %cond853, label %bb88, label %bb59
+
+bb59:		; preds = %bb58
+	%cond855 = icmp sgt i32 %reg468, 16		; <i1> [#uses=1]
+	br i1 %cond855, label %bb63, label %bb60
+
+bb60:		; preds = %bb59
+	%cond858 = icmp eq i32 %reg468, 14		; <i1> [#uses=1]
+	br i1 %cond858, label %bb86, label %bb61
+
+bb61:		; preds = %bb60
+	%cond860 = icmp eq i32 %reg468, 15		; <i1> [#uses=1]
+	br i1 %cond860, label %bb87, label %bb97
+
+bb63:		; preds = %bb59
+	%cond862 = icmp eq i32 %reg468, 17		; <i1> [#uses=1]
+	br i1 %cond862, label %bb89, label %bb64
+
+bb64:		; preds = %bb63
+	%cond864 = icmp eq i32 %reg468, 18		; <i1> [#uses=1]
+	br i1 %cond864, label %bb90, label %bb97
+
+bb66:		; preds = %bb57
+	%cond866 = icmp eq i32 %reg468, 22		; <i1> [#uses=1]
+	br i1 %cond866, label %bb94, label %bb67
+
+bb67:		; preds = %bb66
+	%cond868 = icmp sgt i32 %reg468, 22		; <i1> [#uses=1]
+	br i1 %cond868, label %bb71, label %bb68
+
+bb68:		; preds = %bb67
+	%cond871 = icmp eq i32 %reg468, 20		; <i1> [#uses=1]
+	br i1 %cond871, label %bb92, label %bb69
+
+bb69:		; preds = %bb68
+	%cond873 = icmp eq i32 %reg468, 21		; <i1> [#uses=1]
+	br i1 %cond873, label %bb93, label %bb97
+
+bb71:		; preds = %bb67
+	%cond875 = icmp eq i32 %reg468, 23		; <i1> [#uses=1]
+	br i1 %cond875, label %bb95, label %bb72
+
+bb72:		; preds = %bb71
+	%cond877 = icmp eq i32 %reg468, 24		; <i1> [#uses=1]
+	br i1 %cond877, label %bb96, label %bb97
+
+bb74:		; preds = %bb42
+	call void @yyfinished( )
+	br label %bb97
+
+bb75:		; preds = %bb43
+	%reg262 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
+	%reg264.idx1 = getelementptr %YYSTYPE* %reg262, i64 -2, i32 0		; <%IntList*> [#uses=1]
+	%reg264 = load %IntList* %reg264.idx1		; <%IntList> [#uses=1]
+	%reg265.idx = getelementptr %YYSTYPE* %reg262, i64 0, i32 0		; <%IntList*> [#uses=1]
+	%reg265 = load %IntList* %reg265.idx		; <%IntList> [#uses=1]
+	%cast889 = bitcast %IntList %reg265 to %List		; <%List> [#uses=1]
+	%cast890 = bitcast %IntList %reg264 to %List		; <%List> [#uses=1]
+	call void @doSpec( %List %cast890, %List %cast889 )
+	br label %bb97
+
+bb76:		; preds = %bb40
+	store %IntList null, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb77:		; preds = %bb45
+	%reg269 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
+	%cast894 = getelementptr %YYSTYPE* %reg269, i64 0, i32 0		; <%IntList*> [#uses=1]
+	%reg271 = load %IntList* %cast894		; <%IntList> [#uses=1]
+	%reg271.upgrd.1 = bitcast %IntList %reg271 to i8*		; <i8*> [#uses=1]
+	%reg272.idx1 = getelementptr %YYSTYPE* %reg269, i64 -1, i32 0		; <%IntList*> [#uses=1]
+	%reg272 = load %IntList* %reg272.idx1		; <%IntList> [#uses=1]
+	%cast901 = bitcast %IntList %reg272 to %List		; <%List> [#uses=1]
+	%reg901 = call %List @newList( i8* %reg271.upgrd.1, %List %cast901 )		; <%List> [#uses=1]
+	bitcast %List %reg901 to %IntList		; <%IntList>:0 [#uses=1]
+	store %IntList %0, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb78:		; preds = %bb46
+	%reg275 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=1]
+	%reg277.idx = getelementptr %YYSTYPE* %reg275, i64 0, i32 0		; <%IntList*> [#uses=1]
+	%reg277 = load %IntList* %reg277.idx		; <%IntList> [#uses=1]
+	%cast907 = bitcast %IntList %reg277 to %List		; <%List> [#uses=1]
+	%reg907 = call %Arity @newArity( i32 -1, %List %cast907 )		; <%Arity> [#uses=1]
+	bitcast %Arity %reg907 to %IntList		; <%IntList>:1 [#uses=1]
+	store %IntList %1, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb79:		; preds = %bb38
+	store %IntList null, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	%reg281 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=1]
+	%cast912 = getelementptr %YYSTYPE* %reg281, i64 0, i32 0		; <%IntList*> [#uses=1]
+	%reg282 = load %IntList* %cast912		; <%IntList> [#uses=1]
+	%reg282.upgrd.2 = bitcast %IntList %reg282 to %List		; <%List> [#uses=1]
+	call void @doGram( %List %reg282.upgrd.2 )
+	br label %bb97
+
+bb80:		; preds = %bb50
+	store %IntList null, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	%reg285 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=1]
+	%cast917 = getelementptr %YYSTYPE* %reg285, i64 0, i32 0		; <%IntList*> [#uses=1]
+	%reg286 = load %IntList* %cast917		; <%IntList> [#uses=1]
+	%reg286.upgrd.3 = bitcast %IntList %reg286 to i8*		; <i8*> [#uses=1]
+	call void @doStart( i8* %reg286.upgrd.3 )
+	br label %bb97
+
+bb81:		; preds = %bb51
+	store %IntList null, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb82:		; preds = %bb48
+	%reg290 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
+	%cast923 = getelementptr %YYSTYPE* %reg290, i64 0, i32 0		; <%IntList*> [#uses=1]
+	%reg292 = load %IntList* %cast923		; <%IntList> [#uses=1]
+	%reg292.upgrd.4 = bitcast %IntList %reg292 to i8*		; <i8*> [#uses=1]
+	%reg293.idx1 = getelementptr %YYSTYPE* %reg290, i64 -1, i32 0		; <%IntList*> [#uses=1]
+	%reg293 = load %IntList* %reg293.idx1		; <%IntList> [#uses=1]
+	%cast930 = bitcast %IntList %reg293 to %List		; <%List> [#uses=1]
+	%reg930 = call %List @newList( i8* %reg292.upgrd.4, %List %cast930 )		; <%List> [#uses=1]
+	bitcast %List %reg930 to %IntList		; <%IntList>:2 [#uses=1]
+	store %IntList %2, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb83:		; preds = %bb53
+	store %IntList null, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb84:		; preds = %bb54
+	%reg298 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
+	%cast936 = getelementptr %YYSTYPE* %reg298, i64 0, i32 0		; <%IntList*> [#uses=1]
+	%reg300 = load %IntList* %cast936		; <%IntList> [#uses=1]
+	%reg300.upgrd.5 = bitcast %IntList %reg300 to i8*		; <i8*> [#uses=1]
+	%reg301.idx1 = getelementptr %YYSTYPE* %reg298, i64 -1, i32 0		; <%IntList*> [#uses=1]
+	%reg301 = load %IntList* %reg301.idx1		; <%IntList> [#uses=1]
+	%cast943 = bitcast %IntList %reg301 to %List		; <%List> [#uses=1]
+	%reg943 = call %List @newList( i8* %reg300.upgrd.5, %List %cast943 )		; <%List> [#uses=1]
+	bitcast %List %reg943 to %IntList		; <%IntList>:3 [#uses=1]
+	store %IntList %3, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb85:		; preds = %bb36
+	%reg304 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
+	%cast9521 = getelementptr %YYSTYPE* %reg304, i64 -2, i32 0		; <%IntList*> [#uses=1]
+	%reg306 = load %IntList* %cast9521		; <%IntList> [#uses=1]
+	%reg306.upgrd.6 = bitcast %IntList %reg306 to i8*		; <i8*> [#uses=1]
+	%cast953 = bitcast %YYSTYPE* %reg304 to i32*		; <i32*> [#uses=1]
+	%reg307 = load i32* %cast953		; <i32> [#uses=1]
+	%reg955 = call %Binding @newBinding( i8* %reg306.upgrd.6, i32 %reg307 )		; <%Binding> [#uses=1]
+	bitcast %Binding %reg955 to %IntList		; <%IntList>:4 [#uses=1]
+	store %IntList %4, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb86:		; preds = %bb60
+	store %IntList null, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb87:		; preds = %bb61
+	%reg312 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
+	%cast961 = getelementptr %YYSTYPE* %reg312, i64 0, i32 0		; <%IntList*> [#uses=1]
+	%reg314 = load %IntList* %cast961		; <%IntList> [#uses=1]
+	%reg314.upgrd.7 = bitcast %IntList %reg314 to i8*		; <i8*> [#uses=1]
+	%reg315.idx1 = getelementptr %YYSTYPE* %reg312, i64 -1, i32 0		; <%IntList*> [#uses=1]
+	%reg315 = load %IntList* %reg315.idx1		; <%IntList> [#uses=1]
+	%cast968 = bitcast %IntList %reg315 to %List		; <%List> [#uses=1]
+	%reg968 = call %List @newList( i8* %reg314.upgrd.7, %List %cast968 )		; <%List> [#uses=1]
+	bitcast %List %reg968 to %IntList		; <%IntList>:5 [#uses=1]
+	store %IntList %5, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb88:		; preds = %bb58
+	%reg318 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=4]
+	%cast9791 = getelementptr %YYSTYPE* %reg318, i64 -6, i32 0		; <%IntList*> [#uses=1]
+	%reg322 = load %IntList* %cast9791		; <%IntList> [#uses=1]
+	%reg322.upgrd.8 = bitcast %IntList %reg322 to i8*		; <i8*> [#uses=1]
+	%reg323.idx1 = getelementptr %YYSTYPE* %reg318, i64 -4, i32 0		; <%IntList*> [#uses=1]
+	%reg323 = load %IntList* %reg323.idx1		; <%IntList> [#uses=1]
+	%reg987 = getelementptr %YYSTYPE* %reg318, i64 -2		; <%YYSTYPE*> [#uses=1]
+	%cast989 = bitcast %YYSTYPE* %reg987 to i32*		; <i32*> [#uses=1]
+	%reg324 = load i32* %cast989		; <i32> [#uses=1]
+	%reg325.idx1 = getelementptr %YYSTYPE* %reg318, i64 -1, i32 0		; <%IntList*> [#uses=1]
+	%reg325 = load %IntList* %reg325.idx1		; <%IntList> [#uses=1]
+	%cast998 = bitcast %IntList %reg323 to %PatternAST		; <%PatternAST> [#uses=1]
+	%reg996 = call %RuleAST @newRuleAST( i8* %reg322.upgrd.8, %PatternAST %cast998, i32 %reg324, %IntList %reg325 )		; <%RuleAST> [#uses=1]
+	bitcast %RuleAST %reg996 to %IntList		; <%IntList>:6 [#uses=1]
+	store %IntList %6, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb89:		; preds = %bb63
+	%reg328 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=1]
+	%cast1002 = getelementptr %YYSTYPE* %reg328, i64 0, i32 0		; <%IntList*> [#uses=1]
+	%reg329 = load %IntList* %cast1002		; <%IntList> [#uses=1]
+	%reg329.upgrd.9 = bitcast %IntList %reg329 to i8*		; <i8*> [#uses=1]
+	%reg1004 = call %PatternAST @newPatternAST( i8* %reg329.upgrd.9, %List null )		; <%PatternAST> [#uses=1]
+	bitcast %PatternAST %reg1004 to %IntList		; <%IntList>:7 [#uses=1]
+	store %IntList %7, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb90:		; preds = %bb64
+	%reg333 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
+	%cast10131 = getelementptr %YYSTYPE* %reg333, i64 -1, i32 0		; <%IntList*> [#uses=1]
+	%reg335 = load %IntList* %cast10131		; <%IntList> [#uses=1]
+	%reg335.upgrd.10 = bitcast %IntList %reg335 to i8*		; <i8*> [#uses=1]
+	%reg1015 = call %List @newList( i8* %reg335.upgrd.10, %List null )		; <%List> [#uses=1]
+	%cast10211 = getelementptr %YYSTYPE* %reg333, i64 -3, i32 0		; <%IntList*> [#uses=1]
+	%reg338 = load %IntList* %cast10211		; <%IntList> [#uses=1]
+	%reg338.upgrd.11 = bitcast %IntList %reg338 to i8*		; <i8*> [#uses=1]
+	%reg1023 = call %PatternAST @newPatternAST( i8* %reg338.upgrd.11, %List %reg1015 )		; <%PatternAST> [#uses=1]
+	bitcast %PatternAST %reg1023 to %IntList		; <%IntList>:8 [#uses=1]
+	store %IntList %8, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb91:		; preds = %bb56
+	%reg341 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=3]
+	%cast10331 = getelementptr %YYSTYPE* %reg341, i64 -1, i32 0		; <%IntList*> [#uses=1]
+	%reg344 = load %IntList* %cast10331		; <%IntList> [#uses=1]
+	%reg344.upgrd.12 = bitcast %IntList %reg344 to i8*		; <i8*> [#uses=1]
+	%reg1035 = call %List @newList( i8* %reg344.upgrd.12, %List null )		; <%List> [#uses=1]
+	%cast10411 = getelementptr %YYSTYPE* %reg341, i64 -3, i32 0		; <%IntList*> [#uses=1]
+	%reg347 = load %IntList* %cast10411		; <%IntList> [#uses=1]
+	%reg347.upgrd.13 = bitcast %IntList %reg347 to i8*		; <i8*> [#uses=1]
+	%reg1043 = call %List @newList( i8* %reg347.upgrd.13, %List %reg1035 )		; <%List> [#uses=1]
+	%cast10491 = getelementptr %YYSTYPE* %reg341, i64 -5, i32 0		; <%IntList*> [#uses=1]
+	%reg349 = load %IntList* %cast10491		; <%IntList> [#uses=1]
+	%reg349.upgrd.14 = bitcast %IntList %reg349 to i8*		; <i8*> [#uses=1]
+	%reg1051 = call %PatternAST @newPatternAST( i8* %reg349.upgrd.14, %List %reg1043 )		; <%PatternAST> [#uses=1]
+	bitcast %PatternAST %reg1051 to %IntList		; <%IntList>:9 [#uses=1]
+	store %IntList %9, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb92:		; preds = %bb68
+	store %IntList null, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb93:		; preds = %bb69
+	%reg354 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
+	%reg1059 = getelementptr %YYSTYPE* %reg354, i64 -2		; <%YYSTYPE*> [#uses=1]
+	%cast1061 = bitcast %YYSTYPE* %reg1059 to i32*		; <i32*> [#uses=1]
+	%reg356 = load i32* %cast1061		; <i32> [#uses=1]
+	%reg357.idx1 = getelementptr %YYSTYPE* %reg354, i64 -1, i32 0		; <%IntList*> [#uses=1]
+	%reg357 = load %IntList* %reg357.idx1		; <%IntList> [#uses=1]
+	%reg1068 = call %IntList @newIntList( i32 %reg356, %IntList %reg357 )		; <%IntList> [#uses=1]
+	store %IntList %reg1068, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb94:		; preds = %bb66
+	store %IntList null, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb95:		; preds = %bb71
+	%reg362 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
+	%reg1076 = getelementptr %YYSTYPE* %reg362, i64 -1		; <%YYSTYPE*> [#uses=1]
+	%cast1078 = bitcast %YYSTYPE* %reg1076 to i32*		; <i32*> [#uses=1]
+	%reg364 = load i32* %cast1078		; <i32> [#uses=1]
+	%reg365.idx = getelementptr %YYSTYPE* %reg362, i64 0, i32 0		; <%IntList*> [#uses=1]
+	%reg365 = load %IntList* %reg365.idx		; <%IntList> [#uses=1]
+	%reg1081 = call %IntList @newIntList( i32 %reg364, %IntList %reg365 )		; <%IntList> [#uses=1]
+	store %IntList %reg1081, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb96:		; preds = %bb72
+	%reg368 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
+	%reg1088 = getelementptr %YYSTYPE* %reg368, i64 -1		; <%YYSTYPE*> [#uses=1]
+	%cast1090 = bitcast %YYSTYPE* %reg1088 to i32*		; <i32*> [#uses=1]
+	%reg370 = load i32* %cast1090		; <i32> [#uses=1]
+	%reg371.idx = getelementptr %YYSTYPE* %reg368, i64 0, i32 0		; <%IntList*> [#uses=1]
+	%reg371 = load %IntList* %reg371.idx		; <%IntList> [#uses=1]
+	%reg1093 = call %IntList @newIntList( i32 %reg370, %IntList %reg371 )		; <%IntList> [#uses=1]
+	store %IntList %reg1093, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
+	br label %bb97
+
+bb97:		; preds = %bb96, %bb95, %bb94, %bb93, %bb92, %bb91, %bb90, %bb89, %bb88, %bb87, %bb86, %bb85, %bb84, %bb83, %bb82, %bb81, %bb80, %bb79, %bb78, %bb77, %bb76, %bb75, %bb74, %bb72, %bb69, %bb64, %bb61, %bb54, %bb51, %bb46, %bb43
+	%cast1097 = sext i16 %reg254 to i64		; <i64> [#uses=3]
+	%reg375 = add i64 %cast1097, %cast1097		; <i64> [#uses=1]
+	%reg377 = load i16** @yyssp		; <i16*> [#uses=1]
+	%cast379 = ptrtoint i16* %reg377 to i64		; <i64> [#uses=1]
+	%reg381 = sub i64 %cast379, %reg375		; <i64> [#uses=1]
+	%cast1099 = inttoptr i64 %reg381 to i16*		; <i16*> [#uses=1]
+	store i16* %cast1099, i16** @yyssp
+	%reg382 = load i16** @yyssp		; <i16*> [#uses=3]
+	%reg383 = load i16* %reg382		; <i16> [#uses=1]
+	%cast1103 = sext i16 %reg383 to i32		; <i32> [#uses=3]
+	%reg385 = mul i64 %cast1097, 8		; <i64> [#uses=1]
+	%reg387 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=1]
+	%cast389 = ptrtoint %YYSTYPE* %reg387 to i64		; <i64> [#uses=1]
+	%reg391 = sub i64 %cast389, %reg385		; <i64> [#uses=1]
+	%cast1108 = inttoptr i64 %reg391 to %YYSTYPE*		; <%YYSTYPE*> [#uses=1]
+	store %YYSTYPE* %cast1108, %YYSTYPE** @yyvsp
+	%reg1111 = getelementptr [25 x i16]* @yylhs, i64 0, i64 %reg468-idxcast		; <i16*> [#uses=1]
+	%reg398 = load i16* %reg1111		; <i16> [#uses=2]
+	%cast1116 = sext i16 %reg398 to i32		; <i32> [#uses=1]
+	%cond1117 = icmp ne i32 %cast1103, 0		; <i1> [#uses=1]
+	br i1 %cond1117, label %bb104, label %bb98
+
+bb98:		; preds = %bb97
+	%cond1119 = icmp ne i32 %cast1116, 0		; <i1> [#uses=1]
+	br i1 %cond1119, label %bb104, label %bb99
+
+bb99:		; preds = %bb98
+	%reg1122 = getelementptr i16* %reg382, i64 1		; <i16*> [#uses=2]
+	store i16* %reg1122, i16** @yyssp
+	store i16 1, i16* %reg1122
+	%reg403 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
+	%reg1128 = getelementptr %YYSTYPE* %reg403, i64 1		; <%YYSTYPE*> [#uses=1]
+	store %YYSTYPE* %reg1128, %YYSTYPE** @yyvsp
+	%reg406 = load %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)		; <%IntList> [#uses=1]
+	%reg1128.idx1 = getelementptr %YYSTYPE* %reg403, i64 1, i32 0		; <%IntList*> [#uses=1]
+	store %IntList %reg406, %IntList* %reg1128.idx1
+	%reg407 = load i32* @yychar		; <i32> [#uses=1]
+	%cond1135 = icmp sge i32 %reg407, 0		; <i1> [#uses=1]
+	br i1 %cond1135, label %bb102, label %bb100
+
+bb100:		; preds = %bb99
+	%reg1139 = call i32 @yylex( )		; <i32> [#uses=1]
+	store i32 %reg1139, i32* @yychar
+	%reg409 = load i32* @yychar		; <i32> [#uses=1]
+	%cond1141 = icmp sge i32 %reg409, 0		; <i1> [#uses=1]
+	br i1 %cond1141, label %bb102, label %bb101
+
+bb101:		; preds = %bb100
+	store i32 0, i32* @yychar
+	br label %bb102
+
+bb102:		; preds = %bb101, %bb100, %bb99
+	%reg411 = load i32* @yychar		; <i32> [#uses=1]
+	%cond1146 = icmp ne i32 %reg411, 0		; <i1> [#uses=1]
+	br i1 %cond1146, label %bb4, label %UnifiedExitNode
+
+bb104:		; preds = %bb98, %bb97
+	%reg398-idxcast = sext i16 %reg398 to i64		; <i64> [#uses=2]
+	%reg1150 = getelementptr [12 x i16]* @yygindex, i64 0, i64 %reg398-idxcast		; <i16*> [#uses=1]
+	%reg418 = load i16* %reg1150		; <i16> [#uses=1]
+	%cast1155 = sext i16 %reg418 to i32		; <i32> [#uses=2]
+	%cond1156 = icmp eq i32 %cast1155, 0		; <i1> [#uses=1]
+	br i1 %cond1156, label %bb109, label %bb105
+
+bb105:		; preds = %bb104
+	%reg473 = add i32 %cast1155, %cast1103		; <i32> [#uses=3]
+	%cond1158 = icmp slt i32 %reg473, 0		; <i1> [#uses=1]
+	br i1 %cond1158, label %bb109, label %bb106
+
+bb106:		; preds = %bb105
+	%cond1161 = icmp sgt i32 %reg473, 262		; <i1> [#uses=1]
+	br i1 %cond1161, label %bb109, label %bb107
+
+bb107:		; preds = %bb106
+	%reg473-idxcast = sext i32 %reg473 to i64		; <i64> [#uses=2]
+	%reg1166 = getelementptr [263 x i16]* @yycheck, i64 0, i64 %reg473-idxcast		; <i16*> [#uses=1]
+	%reg428 = load i16* %reg1166		; <i16> [#uses=1]
+	%cast1171 = sext i16 %reg428 to i32		; <i32> [#uses=1]
+	%cond1172 = icmp ne i32 %cast1171, %cast1103		; <i1> [#uses=1]
+	br i1 %cond1172, label %bb109, label %bb108
+
+bb108:		; preds = %bb107
+	%reg1175 = getelementptr [263 x i16]* @yytable, i64 0, i64 %reg473-idxcast		; <i16*> [#uses=1]
+	%reg435 = load i16* %reg1175		; <i16> [#uses=1]
+	%cast1180 = sext i16 %reg435 to i32		; <i32> [#uses=1]
+	br label %bb110
+
+bb109:		; preds = %bb107, %bb106, %bb105, %bb104
+	%reg1183 = getelementptr [12 x i16]* @yydgoto, i64 0, i64 %reg398-idxcast		; <i16*> [#uses=1]
+	%reg442 = load i16* %reg1183		; <i16> [#uses=1]
+	%cast1188 = sext i16 %reg442 to i32		; <i32> [#uses=1]
+	br label %bb110
+
+bb110:		; preds = %bb109, %bb108
+	%reg476 = phi i32 [ %cast1188, %bb109 ], [ %cast1180, %bb108 ]		; <i32> [#uses=2]
+	%cast1189 = bitcast i16* %reg382 to i8*		; <i8*> [#uses=1]
+	%reg444 = load i16** @yysslim		; <i16*> [#uses=1]
+	%cast1190 = bitcast i16* %reg444 to i8*		; <i8*> [#uses=1]
+	%cond1191 = icmp ult i8* %cast1189, %cast1190		; <i1> [#uses=1]
+	br i1 %cond1191, label %bb112, label %bb111
+
+bb111:		; preds = %bb110
+	%reg1193 = call i32 @yygrowstack( )		; <i32> [#uses=1]
+	%cond1193 = icmp ne i32 %reg1193, 0		; <i1> [#uses=1]
+	br i1 %cond1193, label %bb113, label %bb112
+
+bb112:		; preds = %bb111, %bb110
+	%reg446 = load i16** @yyssp		; <i16*> [#uses=1]
+	%reg1196 = getelementptr i16* %reg446, i64 1		; <i16*> [#uses=2]
+	store i16* %reg1196, i16** @yyssp
+	%cast1357 = trunc i32 %reg476 to i16		; <i16> [#uses=1]
+	store i16 %cast1357, i16* %reg1196
+	%reg449 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
+	%reg1202 = getelementptr %YYSTYPE* %reg449, i64 1		; <%YYSTYPE*> [#uses=1]
+	store %YYSTYPE* %reg1202, %YYSTYPE** @yyvsp
+	%reg452 = load %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)		; <%IntList> [#uses=1]
+	%reg1202.idx1 = getelementptr %YYSTYPE* %reg449, i64 1, i32 0		; <%IntList*> [#uses=1]
+	store %IntList %reg452, %IntList* %reg1202.idx1
+	br label %bb4
+
+bb113:		; preds = %bb111, %bb30, %bb13, %bb2
+	call void @yyerror( i8* getelementptr ([20 x i8]* @.LC1, i64 0, i64 0) )
+	br label %UnifiedExitNode
+
+UnifiedExitNode:		; preds = %bb113, %bb102, %bb34, %bb32
+	%UnifiedRetVal = phi i32 [ 1, %bb113 ], [ 1, %bb34 ], [ 1, %bb32 ], [ 0, %bb102 ]		; <i32> [#uses=1]
+	ret i32 %UnifiedRetVal
+}
+
+declare %List @newList(i8*, %List)
+
+declare %IntList @newIntList(i32, %IntList)
+
+declare void @doStart(i8*)
+
+declare void @yyerror(i8*)
+
+declare void @doSpec(%List, %List)
+
+declare %Arity @newArity(i32, %List)
+
+declare %Binding @newBinding(i8*, i32)
+
+declare %PatternAST @newPatternAST(i8*, %List)
+
+declare %RuleAST @newRuleAST(i8*, %PatternAST, i32, %IntList)
+
+declare void @yyfinished()
+
+declare i32 @yylex()
+
+declare void @doGram(%List)
+
+declare i32 @yygrowstack()
diff --git a/final/test/CodeGen/Generic/ConstantExprLowering.ll b/final/test/CodeGen/Generic/ConstantExprLowering.ll
new file mode 100644
index 00000000000..428d712462d
--- /dev/null
+++ b/final/test/CodeGen/Generic/ConstantExprLowering.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s
+
+@.str_1 = internal constant [16 x i8] c"%d %d %d %d %d\0A\00"           ; <[16 x i8]*> [#uses=1]
+@XA = external global i32               ; <i32*> [#uses=1]
+@XB = external global i32               ; <i32*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define void @test(i32 %A, i32 %B, i32 %C, i32 %D) {
+entry:
+        %t1 = icmp slt i32 %A, 0                ; <i1> [#uses=1]
+        br i1 %t1, label %less, label %not_less
+
+less:           ; preds = %entry
+        br label %not_less
+
+not_less:               ; preds = %less, %entry
+        %t2 = phi i32 [ sub (i32 ptrtoint (i32* @XA to i32), i32 ptrtoint (i32* @XB to i32)), %less ], [ sub (i32 ptrtoint (i32* @XA to i32), i32 ptrtoint (i32* @XB to i32)), %entry ]               ; <i32> [#uses=1]
+        %tmp.39 = call i32 (i8*, ...)* @printf( i8* getelementptr ([16 x i8]* @.str_1, i64 0, i64 0), i32 %t2 )      ; <i32> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/CodeGen/Generic/Makefile b/final/test/CodeGen/Generic/Makefile
new file mode 100644
index 00000000000..26ebc316a21
--- /dev/null
+++ b/final/test/CodeGen/Generic/Makefile
@@ -0,0 +1,23 @@
+# Makefile for running ad-hoc custom LLVM tests
+#
+%.bc: %.ll
+	llvm-as $< 
+	
+%.llc.s: %.bc
+	llc $< -o $@ 
+
+%.gcc.s: %.c
+	gcc -O0 -S $< -o $@
+
+%.nat: %.s
+	gcc -O0 -lm $< -o $@
+
+%.cbe.out: %.cbe.nat
+	./$< > $@
+
+%.out: %.nat
+	./$< > $@
+
+%.clean:
+	rm -f $(patsubst %.clean,%.bc,$@) $(patsubst %.clean,%.*.s,$@) \
+	      $(patsubst %.clean,%.*.nat,$@) $(patsubst %.clean,%.*.out,$@) 
diff --git a/final/test/CodeGen/Generic/add-with-overflow-128.ll b/final/test/CodeGen/Generic/add-with-overflow-128.ll
new file mode 100644
index 00000000000..33f44d6e443
--- /dev/null
+++ b/final/test/CodeGen/Generic/add-with-overflow-128.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+
+
+define i1 @func2(i128 zeroext %v1, i128 zeroext %v2) nounwind {
+entry:
+  %t = call {i128, i1} @llvm.uadd.with.overflow.i128(i128 %v1, i128 %v2)
+  %sum = extractvalue {i128, i1} %t, 0
+  %sum32 = trunc i128 %sum to i32
+  %obit = extractvalue {i128, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
+  ret i1 true
+
+carry:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i96, i1} @llvm.sadd.with.overflow.i96(i96, i96)
+declare {i128, i1} @llvm.uadd.with.overflow.i128(i128, i128)
+
+define i1 @func1(i96 signext %v1, i96 signext %v2) nounwind {
+entry:
+  %t = call {i96, i1} @llvm.sadd.with.overflow.i96(i96 %v1, i96 %v2)
+  %obit = extractvalue {i96, i1} %t, 1
+  ret i1 %obit
+}
diff --git a/final/test/CodeGen/Generic/add-with-overflow-24.ll b/final/test/CodeGen/Generic/add-with-overflow-24.ll
new file mode 100644
index 00000000000..63f5a222a00
--- /dev/null
+++ b/final/test/CodeGen/Generic/add-with-overflow-24.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func1(i24 signext %v1, i24 signext %v2) nounwind {
+entry:
+  %t = call {i24, i1} @llvm.sadd.with.overflow.i24(i24 %v1, i24 %v2)
+  %sum = extractvalue {i24, i1} %t, 0
+  %sum32 = sext i24 %sum to i32
+  %obit = extractvalue {i24, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
+  ret i1 true
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+define i1 @func2(i24 zeroext %v1, i24 zeroext %v2) nounwind {
+entry:
+  %t = call {i24, i1} @llvm.uadd.with.overflow.i24(i24 %v1, i24 %v2)
+  %sum = extractvalue {i24, i1} %t, 0
+  %sum32 = zext i24 %sum to i32
+  %obit = extractvalue {i24, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum32 ) nounwind
+  ret i1 true
+
+carry:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i24, i1} @llvm.sadd.with.overflow.i24(i24, i24)
+declare {i24, i1} @llvm.uadd.with.overflow.i24(i24, i24)
diff --git a/final/test/CodeGen/Generic/add-with-overflow.ll b/final/test/CodeGen/Generic/add-with-overflow.ll
new file mode 100644
index 00000000000..0c2c9608deb
--- /dev/null
+++ b/final/test/CodeGen/Generic/add-with-overflow.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s
+; RUN: llc < %s -fast-isel
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+define i1 @func2(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
diff --git a/final/test/CodeGen/Generic/addr-label.ll b/final/test/CodeGen/Generic/addr-label.ll
new file mode 100644
index 00000000000..0dbe5021bbf
--- /dev/null
+++ b/final/test/CodeGen/Generic/addr-label.ll
@@ -0,0 +1,81 @@
+; RUN: llc %s -o -
+
+;; Reference to a label that gets deleted.
+define i8* @test1() nounwind {
+entry:
+	ret i8* blockaddress(@test1b, %test_label)
+}
+
+define i32 @test1b() nounwind {
+entry:
+	ret i32 -1
+test_label:
+	br label %ret
+ret:
+	ret i32 -1
+}
+
+
+;; Issues with referring to a label that gets RAUW'd later.
+define i32 @test2a() nounwind {
+entry:
+        %target = bitcast i8* blockaddress(@test2b, %test_label) to i8*
+
+        call i32 @test2b(i8* %target)
+
+        ret i32 0
+}
+
+define i32 @test2b(i8* %target) nounwind {
+entry:
+        indirectbr i8* %target, [label %test_label]
+
+test_label:
+; assume some code here...
+        br label %ret
+
+ret:
+        ret i32 -1
+}
+
+; Issues with a BB that gets RAUW'd to another one after references are
+; generated.
+define void @test3(i8** %P, i8** %Q) nounwind {
+entry:
+  store i8* blockaddress(@test3b, %test_label), i8** %P
+  store i8* blockaddress(@test3b, %ret), i8** %Q
+  ret void
+}
+
+define i32 @test3b() nounwind {
+entry:
+	br label %test_label
+test_label:
+	br label %ret
+ret:
+	ret i32 -1
+}
+
+
+; PR6673
+
+define i64 @test4a() {
+	%target = bitcast i8* blockaddress(@test4b, %usermain) to i8*
+	%ret = call i64 @test4b(i8* %target)
+
+	ret i64 %ret
+}
+
+define i64 @test4b(i8* %Code) {
+entry:
+	indirectbr i8* %Code, [label %usermain]
+usermain:
+	br label %label_line_0
+
+label_line_0:
+	br label %label_line_1
+
+label_line_1:
+	%target = ptrtoint i8* blockaddress(@test4b, %label_line_0) to i64
+	ret i64 %target
+}
diff --git a/final/test/CodeGen/Generic/asm-large-immediate.ll b/final/test/CodeGen/Generic/asm-large-immediate.ll
new file mode 100644
index 00000000000..605665bef6d
--- /dev/null
+++ b/final/test/CodeGen/Generic/asm-large-immediate.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s | grep 68719476738
+
+define void @test() {
+entry:
+        tail call void asm sideeffect "/* result: ${0:c} */", "i,~{dirflag},~{fpsr},~{flags}"( i64 68719476738 )
+        ret void
+}
+
diff --git a/final/test/CodeGen/Generic/badCallArgLRLLVM.ll b/final/test/CodeGen/Generic/badCallArgLRLLVM.ll
new file mode 100644
index 00000000000..4ed88df4a53
--- /dev/null
+++ b/final/test/CodeGen/Generic/badCallArgLRLLVM.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s
+
+; This caused a problem because the argument of a call was defined by
+; the return value of another call that appears later in the code.
+; When processing the first call, the second call has not yet been processed
+; so no LiveRange has been created for its return value.
+; 
+; llc dies in UltraSparcRegInfo::suggestRegs4CallArgs() with:
+;     ERROR: In call instr, no LR for arg: 0x1009e0740 
+;
+
+declare i32 @getInt(i32)
+
+define i32 @main(i32 %argc, i8** %argv) {
+bb0:
+        br label %bb2
+
+bb1:            ; preds = %bb2
+        %reg222 = call i32 @getInt( i32 %reg218 )               ; <i32> [#uses=1]
+        %reg110 = add i32 %reg222, 1            ; <i32> [#uses=2]
+        %b = icmp sle i32 %reg110, 0            ; <i1> [#uses=1]
+        br i1 %b, label %bb2, label %bb3
+
+bb2:            ; preds = %bb1, %bb0
+        %reg218 = call i32 @getInt( i32 %argc )         ; <i32> [#uses=1]
+        br label %bb1
+
+bb3:            ; preds = %bb1
+        ret i32 %reg110
+}
+
diff --git a/final/test/CodeGen/Generic/badFoldGEP.ll b/final/test/CodeGen/Generic/badFoldGEP.ll
new file mode 100644
index 00000000000..2d4474bdf93
--- /dev/null
+++ b/final/test/CodeGen/Generic/badFoldGEP.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s
+
+;; GetMemInstArgs() folded the two getElementPtr instructions together,
+;; producing an illegal getElementPtr.  That's because the type generated
+;; by the last index for the first one is a structure field, not an array
+;; element, and the second one indexes off that structure field.
+;; The code is legal but not type-safe and the two GEPs should not be folded.
+;; 
+;; This code fragment is from Spec/CINT2000/197.parser/197.parser.bc,
+;; file post_process.c, function build_domain().
+;; (Modified to replace store with load and return load value.)
+;; 
+        %Domain = type { i8*, i32, i32*, i32, i32, i32*, %Domain* }
+@domain_array = external global [497 x %Domain]         ; <[497 x %Domain]*> [#uses=2]
+
+declare void @opaque([497 x %Domain]*)
+
+define i32 @main(i32 %argc, i8** %argv) {
+bb0:
+        call void @opaque( [497 x %Domain]* @domain_array )
+        %cann-indvar-idxcast = sext i32 %argc to i64            ; <i64> [#uses=1]
+        %reg841 = getelementptr [497 x %Domain]* @domain_array, i64 0, i64 %cann-indvar-idxcast, i32 3          ; <i32*> [#uses=1]
+        %reg846 = getelementptr i32* %reg841, i64 1             ; <i32*> [#uses=1]
+        %reg820 = load i32* %reg846             ; <i32> [#uses=1]
+        ret i32 %reg820
+}
+
diff --git a/final/test/CodeGen/Generic/badarg6.ll b/final/test/CodeGen/Generic/badarg6.ll
new file mode 100644
index 00000000000..d6e5ac5791e
--- /dev/null
+++ b/final/test/CodeGen/Generic/badarg6.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s
+
+; On this code, llc did not pass the sixth argument (%reg321) to printf.
+; It passed the first five in %o0 - %o4, but never initialized %o5.
+@.LC12 = internal global [44 x i8] c"\09\09M = %g, I = %g, V = %g\0A\09\09O = %g, E = %g\0A\0A\00"		; <[44 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+declare double @opaque(double)
+
+define i32 @main(i32 %argc, i8** %argv) {
+bb25:
+	%b = icmp sle i32 %argc, 2		; <i1> [#uses=1]
+	br i1 %b, label %bb42, label %bb43
+
+bb42:		; preds = %bb25
+	%reg315 = call double @opaque( double 3.000000e+00 )		; <double> [#uses=1]
+	%reg316 = call double @opaque( double 3.100000e+00 )		; <double> [#uses=1]
+	%reg317 = call double @opaque( double 3.200000e+00 )		; <double> [#uses=1]
+	%reg318 = call double @opaque( double 3.300000e+00 )		; <double> [#uses=1]
+	%reg319 = call double @opaque( double 3.400000e+00 )		; <double> [#uses=1]
+	br label %bb43
+
+bb43:		; preds = %bb42, %bb25
+	%reg321 = phi double [ 2.000000e-01, %bb25 ], [ %reg315, %bb42 ]		; <double> [#uses=1]
+	%reg322 = phi double [ 6.000000e+00, %bb25 ], [ %reg316, %bb42 ]		; <double> [#uses=1]
+	%reg323 = phi double [ -1.000000e+00, %bb25 ], [ %reg317, %bb42 ]		; <double> [#uses=1]
+	%reg324 = phi double [ -1.000000e+00, %bb25 ], [ %reg318, %bb42 ]		; <double> [#uses=1]
+	%reg325 = phi double [ 1.000000e+00, %bb25 ], [ %reg319, %bb42 ]		; <double> [#uses=1]
+	%reg609 = call i32 (i8*, ...)* @printf( i8* getelementptr ([44 x i8]* @.LC12, i64 0, i64 0), double %reg325, double %reg324, double %reg323, double %reg322, double %reg321 )		; <i32> [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/CodeGen/Generic/badlive.ll b/final/test/CodeGen/Generic/badlive.ll
new file mode 100644
index 00000000000..43b03e31fa8
--- /dev/null
+++ b/final/test/CodeGen/Generic/badlive.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s
+
+define i32 @main() {
+bb0:
+        %reg109 = malloc i32, i32 100           ; <i32*> [#uses=2]
+        br label %bb2
+
+bb2:            ; preds = %bb2, %bb0
+        %cann-indvar1 = phi i32 [ 0, %bb0 ], [ %add1-indvar1, %bb2 ]            ; <i32> [#uses=2]
+        %reg127 = mul i32 %cann-indvar1, 2              ; <i32> [#uses=1]
+        %add1-indvar1 = add i32 %cann-indvar1, 1                ; <i32> [#uses=1]
+        store i32 999, i32* %reg109
+        %cond1015 = icmp sle i32 1, 99          ; <i1> [#uses=1]
+        %reg128 = add i32 %reg127, 2            ; <i32> [#uses=0]
+        br i1 %cond1015, label %bb2, label %bb4
+
+bb4:            ; preds = %bb4, %bb2
+        %cann-indvar = phi i32 [ %add1-indvar, %bb4 ], [ 0, %bb2 ]              ; <i32> [#uses=1]
+        %add1-indvar = add i32 %cann-indvar, 1          ; <i32> [#uses=2]
+        store i32 333, i32* %reg109
+        %reg131 = add i32 %add1-indvar, 3               ; <i32> [#uses=1]
+        %cond1017 = icmp ule i32 %reg131, 99            ; <i1> [#uses=1]
+        br i1 %cond1017, label %bb4, label %bb5
+
+bb5:            ; preds = %bb4
+        ret i32 0
+}
+
diff --git a/final/test/CodeGen/Generic/bool-to-double.ll b/final/test/CodeGen/Generic/bool-to-double.ll
new file mode 100644
index 00000000000..81350a40b4d
--- /dev/null
+++ b/final/test/CodeGen/Generic/bool-to-double.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s
+define double @test(i1 %X) {
+        %Y = uitofp i1 %X to double             ; <double> [#uses=1]
+        ret double %Y
+}
+
diff --git a/final/test/CodeGen/Generic/bool-vector.ll b/final/test/CodeGen/Generic/bool-vector.ll
new file mode 100644
index 00000000000..4758697286a
--- /dev/null
+++ b/final/test/CodeGen/Generic/bool-vector.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s
+; PR1845
+
+define void @boolVectorSelect(<4 x i1>* %boolVectorPtr) {
+Body:
+        %castPtr = bitcast <4 x i1>* %boolVectorPtr to <4 x i1>*
+        %someBools = load <4 x i1>* %castPtr, align 1           ; <<4 x i1>>
+        %internal = alloca <4 x i1>, align 16           ; <<4 x i1>*> [#uses=1]
+        store <4 x i1> %someBools, <4 x i1>* %internal, align 1
+        ret void
+}
diff --git a/final/test/CodeGen/Generic/call-ret0.ll b/final/test/CodeGen/Generic/call-ret0.ll
new file mode 100644
index 00000000000..a8e00cd54ef
--- /dev/null
+++ b/final/test/CodeGen/Generic/call-ret0.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s
+define i32 @foo(i32 %x) {
+        ret i32 %x
+}
+
+define i32 @main() {
+        %r = call i32 @foo( i32 0 )             ; <i32> [#uses=1]
+        ret i32 %r
+}
+
diff --git a/final/test/CodeGen/Generic/call-ret42.ll b/final/test/CodeGen/Generic/call-ret42.ll
new file mode 100644
index 00000000000..95cc28625aa
--- /dev/null
+++ b/final/test/CodeGen/Generic/call-ret42.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s
+
+define i32 @foo(i32 %x) {
+        ret i32 42
+}
+
+define i32 @main() {
+        %r = call i32 @foo( i32 15 )            ; <i32> [#uses=1]
+        ret i32 %r
+}
diff --git a/final/test/CodeGen/Generic/call-void.ll b/final/test/CodeGen/Generic/call-void.ll
new file mode 100644
index 00000000000..9ed41794159
--- /dev/null
+++ b/final/test/CodeGen/Generic/call-void.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s
+
+define void @foo() {
+        ret void
+}
+
+define i32 @main() {
+        call void @foo( )
+        ret i32 0
+}
+
diff --git a/final/test/CodeGen/Generic/call2-ret0.ll b/final/test/CodeGen/Generic/call2-ret0.ll
new file mode 100644
index 00000000000..4e57ef804f2
--- /dev/null
+++ b/final/test/CodeGen/Generic/call2-ret0.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s
+
+define i32 @bar(i32 %x) {
+        ret i32 0
+}
+
+define i32 @foo(i32 %x) {
+        %q = call i32 @bar( i32 1 )             ; <i32> [#uses=1]
+        ret i32 %q
+}
+
+define i32 @main() {
+        %r = call i32 @foo( i32 2 )             ; <i32> [#uses=1]
+        ret i32 %r
+}
+
diff --git a/final/test/CodeGen/Generic/cast-fp.ll b/final/test/CodeGen/Generic/cast-fp.ll
new file mode 100644
index 00000000000..590b7ceee4b
--- /dev/null
+++ b/final/test/CodeGen/Generic/cast-fp.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s
+@a_fstr = internal constant [8 x i8] c"a = %f\0A\00"		; <[8 x i8]*> [#uses=1]
+@a_lstr = internal constant [10 x i8] c"a = %lld\0A\00"		; <[10 x i8]*> [#uses=1]
+@a_dstr = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]
+@b_dstr = internal constant [8 x i8] c"b = %d\0A\00"		; <[8 x i8]*> [#uses=1]
+@b_fstr = internal constant [8 x i8] c"b = %f\0A\00"		; <[8 x i8]*> [#uses=1]
+@A = global double 2.000000e+00		; <double*> [#uses=1]
+@B = global i32 2		; <i32*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%a = load double* @A		; <double> [#uses=4]
+	%a_fs = getelementptr [8 x i8]* @a_fstr, i64 0, i64 0		; <i8*> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %a_fs, double %a )		; <i32>:1 [#uses=0]
+	%a_d2l = fptosi double %a to i64		; <i64> [#uses=1]
+	%a_ls = getelementptr [10 x i8]* @a_lstr, i64 0, i64 0		; <i8*> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %a_ls, i64 %a_d2l )		; <i32>:2 [#uses=0]
+	%a_d2i = fptosi double %a to i32		; <i32> [#uses=2]
+	%a_ds = getelementptr [8 x i8]* @a_dstr, i64 0, i64 0		; <i8*> [#uses=3]
+	call i32 (i8*, ...)* @printf( i8* %a_ds, i32 %a_d2i )		; <i32>:3 [#uses=0]
+	%a_d2sb = fptosi double %a to i8		; <i8> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %a_ds, i8 %a_d2sb )		; <i32>:4 [#uses=0]
+	%a_d2i2sb = trunc i32 %a_d2i to i8		; <i8> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %a_ds, i8 %a_d2i2sb )		; <i32>:5 [#uses=0]
+	%b = load i32* @B		; <i32> [#uses=2]
+	%b_ds = getelementptr [8 x i8]* @b_dstr, i64 0, i64 0		; <i8*> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %b_ds, i32 %b )		; <i32>:6 [#uses=0]
+	%b_i2d = sitofp i32 %b to double		; <double> [#uses=1]
+	%b_fs = getelementptr [8 x i8]* @b_fstr, i64 0, i64 0		; <i8*> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %b_fs, double %b_i2d )		; <i32>:7 [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/CodeGen/Generic/constindices.ll b/final/test/CodeGen/Generic/constindices.ll
new file mode 100644
index 00000000000..7deb30f43d1
--- /dev/null
+++ b/final/test/CodeGen/Generic/constindices.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s
+
+; Test that a sequence of constant indices are folded correctly
+; into the equivalent offset at compile-time.
+
+        %MixedA = type { float, [15 x i32], i8, float }
+        %MixedB = type { float, %MixedA, float }
+@fmtArg = internal global [44 x i8] c"sqrt(2) = %g\0Aexp(1) = %g\0Api = %g\0Afive = %g\0A\00"           ; <[44 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+        %ScalarA = alloca %MixedA               ; <%MixedA*> [#uses=1]
+        %ScalarB = alloca %MixedB               ; <%MixedB*> [#uses=1]
+        %ArrayA = alloca %MixedA, i32 4         ; <%MixedA*> [#uses=3]
+        %ArrayB = alloca %MixedB, i32 3         ; <%MixedB*> [#uses=2]
+        %I1 = getelementptr %MixedA* %ScalarA, i64 0, i32 0             ; <float*> [#uses=2]
+        store float 0x3FF6A09020000000, float* %I1
+        %I2 = getelementptr %MixedB* %ScalarB, i64 0, i32 1, i32 0              ; <float*> [#uses=2]
+        store float 0x4005BF1420000000, float* %I2
+        %fptrA = getelementptr %MixedA* %ArrayA, i64 1, i32 0           ; <float*> [#uses=1]
+        %fptrB = getelementptr %MixedB* %ArrayB, i64 2, i32 1, i32 0            ; <float*> [#uses=1]
+        store float 0x400921CAC0000000, float* %fptrA
+        store float 5.000000e+00, float* %fptrB
+
+        ;; Test that a sequence of GEPs with constant indices are folded right
+        %fptrA1 = getelementptr %MixedA* %ArrayA, i64 3         ; <%MixedA*> [#uses=1]
+        %fptrA2 = getelementptr %MixedA* %fptrA1, i64 0, i32 1          ; <[15 x i32]*> [#uses=1]
+        %fptrA3 = getelementptr [15 x i32]* %fptrA2, i64 0, i64 8               ; <i32*> [#uses=1]
+        store i32 5, i32* %fptrA3
+        %sqrtTwo = load float* %I1              ; <float> [#uses=1]
+        %exp = load float* %I2          ; <float> [#uses=1]
+        %I3 = getelementptr %MixedA* %ArrayA, i64 1, i32 0              ; <float*> [#uses=1]
+        %pi = load float* %I3           ; <float> [#uses=1]
+        %I4 = getelementptr %MixedB* %ArrayB, i64 2, i32 1, i32 0               ; <float*> [#uses=1]
+        %five = load float* %I4         ; <float> [#uses=1]
+        %dsqrtTwo = fpext float %sqrtTwo to double              ; <double> [#uses=1]
+        %dexp = fpext float %exp to double              ; <double> [#uses=1]
+        %dpi = fpext float %pi to double                ; <double> [#uses=1]
+        %dfive = fpext float %five to double            ; <double> [#uses=1]
+        %castFmt = getelementptr [44 x i8]* @fmtArg, i64 0, i64 0               ; <i8*> [#uses=1]
+        call i32 (i8*, ...)* @printf( i8* %castFmt, double %dsqrtTwo, double %dexp, double %dpi, double %dfive )     ; <i32>:1 [#uses=0]
+        ret i32 0
+}
diff --git a/final/test/CodeGen/Generic/crash.ll b/final/test/CodeGen/Generic/crash.ll
new file mode 100644
index 00000000000..042739884df
--- /dev/null
+++ b/final/test/CodeGen/Generic/crash.ll
@@ -0,0 +1,40 @@
+; RUN: llc %s -o -
+
+; PR6332
+%struct.AVCodecTag = type opaque
+@ff_codec_bmp_tags = external global [0 x %struct.AVCodecTag]
+@tags = global [1 x %struct.AVCodecTag*] [%struct.AVCodecTag* getelementptr
+inbounds ([0 x %struct.AVCodecTag]* @ff_codec_bmp_tags, i32 0, i32 0)]
+
+
+; rdar://8878965
+
+%struct.CAMERA = type { [3 x double], [3 x double], [3 x double], [3 x double], [3 x double], [3 x double], double, double, i32, double, double, i32, double, i32* }
+
+define void @Parse_Camera(%struct.CAMERA** nocapture %Camera_Ptr) nounwind {
+entry:
+%.pre = load %struct.CAMERA** %Camera_Ptr, align 4
+%0 = getelementptr inbounds %struct.CAMERA* %.pre, i32 0, i32 1, i32 0
+%1 = getelementptr inbounds %struct.CAMERA* %.pre, i32 0, i32 1, i32 2
+br label %bb32
+
+bb32:                                             ; preds = %bb6
+%2 = load double* %0, align 4
+%3 = load double* %1, align 4
+%4 = load double* %0, align 4
+call void @Parse_Vector(double* %0) nounwind
+%5 = call i32 @llvm.objectsize.i32(i8* undef, i1 false)
+%6 = icmp eq i32 %5, -1
+br i1 %6, label %bb34, label %bb33
+
+bb33:                                             ; preds = %bb32
+unreachable
+
+bb34:                                             ; preds = %bb32
+unreachable
+
+}
+
+declare void @Parse_Vector(double*)
+declare i32 @llvm.objectsize.i32(i8*, i1)
+
diff --git a/final/test/CodeGen/Generic/dbg_value.ll b/final/test/CodeGen/Generic/dbg_value.ll
new file mode 100644
index 00000000000..ce3364d45ed
--- /dev/null
+++ b/final/test/CodeGen/Generic/dbg_value.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s
+; rdar://7759395
+
+%0 = type { i32, i32 }
+
+define void @t(%0*, i32, i32, i32, i32) nounwind {
+  tail call void @llvm.dbg.value(metadata !{%0* %0}, i64 0, metadata !0)
+  unreachable
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 0} ;
diff --git a/final/test/CodeGen/Generic/dg.exp b/final/test/CodeGen/Generic/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/CodeGen/Generic/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/CodeGen/Generic/div-neg-power-2.ll b/final/test/CodeGen/Generic/div-neg-power-2.ll
new file mode 100644
index 00000000000..246cd033e27
--- /dev/null
+++ b/final/test/CodeGen/Generic/div-neg-power-2.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s
+
+define i32 @test(i32 %X) {
+        %Y = sdiv i32 %X, -2            ; <i32> [#uses=1]
+        ret i32 %Y
+}
+
diff --git a/final/test/CodeGen/Generic/empty-load-store.ll b/final/test/CodeGen/Generic/empty-load-store.ll
new file mode 100644
index 00000000000..bca73054447
--- /dev/null
+++ b/final/test/CodeGen/Generic/empty-load-store.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s
+; PR2612
+
+@current_foo = internal global {  } zeroinitializer
+
+define i32 @foo() {
+entry:
+        %retval = alloca i32
+        store i32 0, i32* %retval
+        %local_foo = alloca {  }
+        load {  }* @current_foo
+        store {  } %0, {  }* %local_foo
+        br label %return
+
+return:
+        load i32* %retval
+        ret i32 %1
+}
diff --git a/final/test/CodeGen/Generic/externally_available.ll b/final/test/CodeGen/Generic/externally_available.ll
new file mode 100644
index 00000000000..7976cc97188
--- /dev/null
+++ b/final/test/CodeGen/Generic/externally_available.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s | not grep test_
+
+; test_function should not be emitted to the .s file.
+define available_externally i32 @test_function() {
+  ret i32 4
+}
+
+; test_global should not be emitted to the .s file.
+@test_global = available_externally global i32 4
+
diff --git a/final/test/CodeGen/Generic/fastcall.ll b/final/test/CodeGen/Generic/fastcall.ll
new file mode 100644
index 00000000000..35e04f1863a
--- /dev/null
+++ b/final/test/CodeGen/Generic/fastcall.ll
@@ -0,0 +1,14 @@
+; Test fastcc works. Test from bug 2770.
+; RUN: llc < %s -relocation-model=pic
+
+
+%struct.__gcov_var = type {  i32 }
+@__gcov_var = external global %struct.__gcov_var
+
+define fastcc void @gcov_read_words(i32 %words) {
+entry:
+        store i32 %words, i32* getelementptr (%struct.__gcov_var* 
+@__gcov_var,
+i32 0, i32 0)
+        ret void
+}
diff --git a/final/test/CodeGen/Generic/fneg-fabs.ll b/final/test/CodeGen/Generic/fneg-fabs.ll
new file mode 100644
index 00000000000..2f2f59762cb
--- /dev/null
+++ b/final/test/CodeGen/Generic/fneg-fabs.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s
+
+define double @fneg(double %X) {
+        %Y = fsub double -0.000000e+00, %X               ; <double> [#uses=1]
+        ret double %Y
+}
+
+define float @fnegf(float %X) {
+        %Y = fsub float -0.000000e+00, %X                ; <float> [#uses=1]
+        ret float %Y
+}
+
+declare double @fabs(double)
+
+declare float @fabsf(float)
+
+define double @fabstest(double %X) {
+        %Y = call double @fabs( double %X )             ; <double> [#uses=1]
+        ret double %Y
+}
+
+define float @fabsftest(float %X) {
+        %Y = call float @fabsf( float %X )              ; <float> [#uses=1]
+        ret float %Y
+}
+
diff --git a/final/test/CodeGen/Generic/fp-to-int-invalid.ll b/final/test/CodeGen/Generic/fp-to-int-invalid.ll
new file mode 100644
index 00000000000..cdcc3a277b6
--- /dev/null
+++ b/final/test/CodeGen/Generic/fp-to-int-invalid.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s
+; PR4057
+define void @test_cast_float_to_char(i8* %result) nounwind {
+entry:
+	%result_addr = alloca i8*		; <i8**> [#uses=2]
+	%test = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i8* %result, i8** %result_addr
+	store float 0x40B2AFA160000000, float* %test, align 4
+	%0 = load float* %test, align 4		; <float> [#uses=1]
+	%1 = fptosi float %0 to i8		; <i8> [#uses=1]
+	%2 = load i8** %result_addr, align 4		; <i8*> [#uses=1]
+	store i8 %1, i8* %2, align 1
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/fp_to_int.ll b/final/test/CodeGen/Generic/fp_to_int.ll
new file mode 100644
index 00000000000..ad944132d33
--- /dev/null
+++ b/final/test/CodeGen/Generic/fp_to_int.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s
+
+define i8 @test1(double %X) {
+	%tmp.1 = fptosi double %X to i8		; <i8> [#uses=1]
+	ret i8 %tmp.1
+}
+
+define i16 @test2(double %X) {
+	%tmp.1 = fptosi double %X to i16		; <i16> [#uses=1]
+	ret i16 %tmp.1
+}
+
+define i32 @test3(double %X) {
+	%tmp.1 = fptosi double %X to i32		; <i32> [#uses=1]
+	ret i32 %tmp.1
+}
+
+define i64 @test4(double %X) {
+	%tmp.1 = fptosi double %X to i64		; <i64> [#uses=1]
+	ret i64 %tmp.1
+}
+
+define i8 @test1u(double %X) {
+	%tmp.1 = fptoui double %X to i8		; <i8> [#uses=1]
+	ret i8 %tmp.1
+}
+
+define i16 @test2u(double %X) {
+	%tmp.1 = fptoui double %X to i16		; <i16> [#uses=1]
+	ret i16 %tmp.1
+}
+
+define i32 @test3u(double %X) {
+	%tmp.1 = fptoui double %X to i32		; <i32> [#uses=1]
+	ret i32 %tmp.1
+}
+
+define i64 @test4u(double %X) {
+	%tmp.1 = fptoui double %X to i64		; <i64> [#uses=1]
+	ret i64 %tmp.1
+}
+
+define i8 @test1f(float %X) {
+	%tmp.1 = fptosi float %X to i8		; <i8> [#uses=1]
+	ret i8 %tmp.1
+}
+
+define i16 @test2f(float %X) {
+	%tmp.1 = fptosi float %X to i16		; <i16> [#uses=1]
+	ret i16 %tmp.1
+}
+
+define i32 @test3f(float %X) {
+	%tmp.1 = fptosi float %X to i32		; <i32> [#uses=1]
+	ret i32 %tmp.1
+}
+
+define i64 @test4f(float %X) {
+	%tmp.1 = fptosi float %X to i64		; <i64> [#uses=1]
+	ret i64 %tmp.1
+}
+
+define i8 @test1uf(float %X) {
+	%tmp.1 = fptoui float %X to i8		; <i8> [#uses=1]
+	ret i8 %tmp.1
+}
+
+define i16 @test2uf(float %X) {
+	%tmp.1 = fptoui float %X to i16		; <i16> [#uses=1]
+	ret i16 %tmp.1
+}
+
+define i32 @test3uf(float %X) {
+	%tmp.1 = fptoui float %X to i32		; <i32> [#uses=1]
+	ret i32 %tmp.1
+}
+
+define i64 @test4uf(float %X) {
+	%tmp.1 = fptoui float %X to i64		; <i64> [#uses=1]
+	ret i64 %tmp.1
+}
diff --git a/final/test/CodeGen/Generic/fpowi-promote.ll b/final/test/CodeGen/Generic/fpowi-promote.ll
new file mode 100644
index 00000000000..8dacebed737
--- /dev/null
+++ b/final/test/CodeGen/Generic/fpowi-promote.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s
+
+; PR1239
+
+define float @test(float %tmp23302331, i32 %tmp23282329 ) {
+
+%tmp2339 = call float @llvm.powi.f32( float %tmp23302331, i32 %tmp23282329 )
+	ret float %tmp2339
+}
+
+declare float @llvm.powi.f32(float,i32)
diff --git a/final/test/CodeGen/Generic/fwdtwice.ll b/final/test/CodeGen/Generic/fwdtwice.ll
new file mode 100644
index 00000000000..6b38f04673d
--- /dev/null
+++ b/final/test/CodeGen/Generic/fwdtwice.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s
+
+;;
+;; Test the sequence:
+;;	cast -> setle 0, %cast -> br %cond
+;; This sequence should cause the cast value to be forwarded twice,
+;; i.e., cast is forwarded to the setle and the setle is forwarded
+;; to the branch.
+;; register argument of the "branch-on-register" instruction, i.e.,
+;; 
+;; This produces the bogus output instruction:
+;;	brlez   <NULL VALUE>, .L_SumArray_bb3.
+;; This came from %bb1 of sumarrray.ll generated from sumarray.c.
+
+define i32 @SumArray(i32 %Num) {
+        %Num.upgrd.1 = alloca i32               ; <i32*> [#uses=2]
+        br label %Top
+
+Top:            ; preds = %Top, %0
+        store i32 %Num, i32* %Num.upgrd.1
+        %reg108 = load i32* %Num.upgrd.1                ; <i32> [#uses=1]
+        %cast1006 = bitcast i32 %reg108 to i32          ; <i32> [#uses=1]
+        %cond1001 = icmp ule i32 %cast1006, 0           ; <i1> [#uses=1]
+        br i1 %cond1001, label %bb6, label %Top
+
+bb6:            ; preds = %Top
+        ret i32 42
+}
+
diff --git a/final/test/CodeGen/Generic/getresult-undef.ll b/final/test/CodeGen/Generic/getresult-undef.ll
new file mode 100644
index 00000000000..c675535335a
--- /dev/null
+++ b/final/test/CodeGen/Generic/getresult-undef.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s
+
+define double @foo() {
+  %t = getresult {double, double} undef, 1
+  ret double %t
+}
diff --git a/final/test/CodeGen/Generic/global-ret0.ll b/final/test/CodeGen/Generic/global-ret0.ll
new file mode 100644
index 00000000000..74bff876f88
--- /dev/null
+++ b/final/test/CodeGen/Generic/global-ret0.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s
+
+@g = global i32 0               ; <i32*> [#uses=1]
+
+define i32 @main() {
+        %h = load i32* @g               ; <i32> [#uses=1]
+        ret i32 %h
+}
diff --git a/final/test/CodeGen/Generic/hello.ll b/final/test/CodeGen/Generic/hello.ll
new file mode 100644
index 00000000000..705945cf198
--- /dev/null
+++ b/final/test/CodeGen/Generic/hello.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s
+
+@.str_1 = internal constant [7 x i8] c"hello\0A\00"             ; <[7 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+        %s = getelementptr [7 x i8]* @.str_1, i64 0, i64 0              ; <i8*> [#uses=1]
+        call i32 (i8*, ...)* @printf( i8* %s )          ; <i32>:1 [#uses=0]
+        ret i32 0
+}
diff --git a/final/test/CodeGen/Generic/i128-addsub.ll b/final/test/CodeGen/Generic/i128-addsub.ll
new file mode 100644
index 00000000000..e7cbf4aaf78
--- /dev/null
+++ b/final/test/CodeGen/Generic/i128-addsub.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s
+
+define void @test_add(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
+	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
+	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
+	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
+	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
+	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
+	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
+	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
+	%tmp15 = add i128 %tmp12, %tmp5		; <i128> [#uses=2]
+	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
+	store i64 %tmp1617, i64* %RL
+	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
+	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
+	store i64 %tmp2122, i64* %RH
+	ret void
+}
+
+define void @test_sub(i64 %AL, i64 %AH, i64 %BL, i64 %BH, i64* %RL, i64* %RH) {
+entry:
+	%tmp1 = zext i64 %AL to i128		; <i128> [#uses=1]
+	%tmp23 = zext i64 %AH to i128		; <i128> [#uses=1]
+	%tmp4 = shl i128 %tmp23, 64		; <i128> [#uses=1]
+	%tmp5 = or i128 %tmp4, %tmp1		; <i128> [#uses=1]
+	%tmp67 = zext i64 %BL to i128		; <i128> [#uses=1]
+	%tmp89 = zext i64 %BH to i128		; <i128> [#uses=1]
+	%tmp11 = shl i128 %tmp89, 64		; <i128> [#uses=1]
+	%tmp12 = or i128 %tmp11, %tmp67		; <i128> [#uses=1]
+	%tmp15 = sub i128 %tmp5, %tmp12		; <i128> [#uses=2]
+	%tmp1617 = trunc i128 %tmp15 to i64		; <i64> [#uses=1]
+	store i64 %tmp1617, i64* %RL
+	%tmp21 = lshr i128 %tmp15, 64		; <i128> [#uses=1]
+	%tmp2122 = trunc i128 %tmp21 to i64		; <i64> [#uses=1]
+	store i64 %tmp2122, i64* %RH
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/i128-arith.ll b/final/test/CodeGen/Generic/i128-arith.ll
new file mode 100644
index 00000000000..cf10463143c
--- /dev/null
+++ b/final/test/CodeGen/Generic/i128-arith.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s 
+
+define i64 @foo(i64 %x, i64 %y, i32 %amt) {
+        %tmp0 = zext i64 %x to i128
+        %tmp1 = sext i64 %y to i128
+        %tmp2 = or i128 %tmp0, %tmp1
+        %tmp7 = zext i32 13 to i128
+        %tmp3 = lshr i128 %tmp2, %tmp7
+        %tmp4 = trunc i128 %tmp3 to i64
+        ret i64 %tmp4
+}
diff --git a/final/test/CodeGen/Generic/inline-asm-special-strings.ll b/final/test/CodeGen/Generic/inline-asm-special-strings.ll
new file mode 100644
index 00000000000..d18221ef934
--- /dev/null
+++ b/final/test/CodeGen/Generic/inline-asm-special-strings.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s | grep "foo 0 0"
+
+define void @bar() nounwind {
+	tail call void asm sideeffect "foo ${:uid} ${:uid}", ""() nounwind
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/intrinsics.ll b/final/test/CodeGen/Generic/intrinsics.ll
new file mode 100644
index 00000000000..29bc499adfc
--- /dev/null
+++ b/final/test/CodeGen/Generic/intrinsics.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s
+
+;; SQRT
+declare float @llvm.sqrt.f32(float)
+
+declare double @llvm.sqrt.f64(double)
+
+define double @test_sqrt(float %F) {
+        %G = call float @llvm.sqrt.f32( float %F )              ; <float> [#uses=1]
+        %H = fpext float %G to double           ; <double> [#uses=1]
+        %I = call double @llvm.sqrt.f64( double %H )            ; <double> [#uses=1]
+        ret double %I
+}
+
+
+; SIN
+declare float @sinf(float) readonly
+
+declare double @sin(double) readonly
+
+define double @test_sin(float %F) {
+        %G = call float @sinf( float %F )               ; <float> [#uses=1]
+        %H = fpext float %G to double           ; <double> [#uses=1]
+        %I = call double @sin( double %H )              ; <double> [#uses=1]
+        ret double %I
+}
+
+
+; COS
+declare float @cosf(float) readonly
+
+declare double @cos(double) readonly
+
+define double @test_cos(float %F) {
+        %G = call float @cosf( float %F )               ; <float> [#uses=1]
+        %H = fpext float %G to double           ; <double> [#uses=1]
+        %I = call double @cos( double %H )              ; <double> [#uses=1]
+        ret double %I
+}
+
diff --git a/final/test/CodeGen/Generic/invalid-memcpy.ll b/final/test/CodeGen/Generic/invalid-memcpy.ll
new file mode 100644
index 00000000000..8448565a2b8
--- /dev/null
+++ b/final/test/CodeGen/Generic/invalid-memcpy.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s 
+
+; This testcase is invalid (the alignment specified for memcpy is 
+; greater than the alignment guaranteed for Qux or C.0.1173), but it
+; should compile, not crash the code generator.
+
+@C.0.1173 = external constant [33 x i8]         ; <[33 x i8]*> [#uses=1]
+
+define void @Bork() {
+entry:
+        %Qux = alloca [33 x i8]         ; <[33 x i8]*> [#uses=1]
+        %Qux1 = bitcast [33 x i8]* %Qux to i8*          ; <i8*> [#uses=1]
+        call void @llvm.memcpy.i64( i8* %Qux1, i8* getelementptr ([33 x i8]* @C.0.1173, i32 0, i32 0), i64 33, i32 8 )
+        ret void
+}
+
+declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
+
+
diff --git a/final/test/CodeGen/Generic/isunord.ll b/final/test/CodeGen/Generic/isunord.ll
new file mode 100644
index 00000000000..ebbba010793
--- /dev/null
+++ b/final/test/CodeGen/Generic/isunord.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s
+
+declare i1 @llvm.isunordered.f64(double, double)
+
+define i1 @test(double %X, double %Y) {
+        %tmp27 = fcmp uno double %X, %Y         ; <i1> [#uses=1]
+        ret i1 %tmp27
+}
+
diff --git a/final/test/CodeGen/Generic/legalize-dbg-value.ll b/final/test/CodeGen/Generic/legalize-dbg-value.ll
new file mode 100644
index 00000000000..b71aa8a9273
--- /dev/null
+++ b/final/test/CodeGen/Generic/legalize-dbg-value.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -o /dev/null
+
+; llvm.dbg.value instructions can have types which are not legal for the
+; target. CodeGen should handle this.
+
+define i128 @__mulvti3(i128 %a, i128 %b) nounwind {
+entry:
+  tail call void @llvm.dbg.value(metadata !0, i64 0, metadata !1), !dbg !11
+  unreachable
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i128 170141183460469231731687303715884105727} 
+!1 = metadata !{i32 524544, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8} ; [ DW_TAG_auto_variable ]
+!2 = metadata !{i32 524299, metadata !3, i32 26, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"__mulvti3", metadata !"__mulvti3", metadata !"__mulvti3", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 524329, metadata !"mulvti3.c", metadata !"/Volumes/Sandbox/llvm/swb/Libcompiler_rt-6.roots/Libcompiler_rt-6/lib", metadata !5} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 524305, i32 0, i32 1, metadata !"mulvti3.c", metadata !"/Volumes/Sandbox/llvm/swb/Libcompiler_rt-6.roots/Libcompiler_rt-6/lib", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2328)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{metadata !8, metadata !8, metadata !8}
+!8 = metadata !{i32 524310, metadata !4, metadata !"ti_int", metadata !9, i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!9 = metadata !{i32 524329, metadata !"int_lib.h", metadata !"/Volumes/Sandbox/llvm/swb/Libcompiler_rt-6.roots/Libcompiler_rt-6/lib", metadata !5} ; [ DW_TAG_file_type ]
+!10 = metadata !{i32 524324, metadata !4, metadata !"", metadata !4, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 29, i32 0, metadata !2, null}
diff --git a/final/test/CodeGen/Generic/llvm-ct-intrinsics.ll b/final/test/CodeGen/Generic/llvm-ct-intrinsics.ll
new file mode 100644
index 00000000000..1db75497592
--- /dev/null
+++ b/final/test/CodeGen/Generic/llvm-ct-intrinsics.ll
@@ -0,0 +1,62 @@
+; Make sure this testcase is supported by all code generators
+; RUN: llc < %s
+
+declare i64 @llvm.ctpop.i64(i64)
+
+declare i32 @llvm.ctpop.i32(i32)
+
+declare i16 @llvm.ctpop.i16(i16)
+
+declare i8 @llvm.ctpop.i8(i8)
+
+define void @ctpoptest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
+	%a = call i8 @llvm.ctpop.i8( i8 %A )		; <i8> [#uses=1]
+	%b = call i16 @llvm.ctpop.i16( i16 %B )		; <i16> [#uses=1]
+	%c = call i32 @llvm.ctpop.i32( i32 %C )		; <i32> [#uses=1]
+	%d = call i64 @llvm.ctpop.i64( i64 %D )		; <i64> [#uses=1]
+	store i8 %a, i8* %AP
+	store i16 %b, i16* %BP
+	store i32 %c, i32* %CP
+	store i64 %d, i64* %DP
+	ret void
+}
+
+declare i64 @llvm.ctlz.i64(i64)
+
+declare i32 @llvm.ctlz.i32(i32)
+
+declare i16 @llvm.ctlz.i16(i16)
+
+declare i8 @llvm.ctlz.i8(i8)
+
+define void @ctlztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
+	%a = call i8 @llvm.ctlz.i8( i8 %A )		; <i8> [#uses=1]
+	%b = call i16 @llvm.ctlz.i16( i16 %B )		; <i16> [#uses=1]
+	%c = call i32 @llvm.ctlz.i32( i32 %C )		; <i32> [#uses=1]
+	%d = call i64 @llvm.ctlz.i64( i64 %D )		; <i64> [#uses=1]
+	store i8 %a, i8* %AP
+	store i16 %b, i16* %BP
+	store i32 %c, i32* %CP
+	store i64 %d, i64* %DP
+	ret void
+}
+
+declare i64 @llvm.cttz.i64(i64)
+
+declare i32 @llvm.cttz.i32(i32)
+
+declare i16 @llvm.cttz.i16(i16)
+
+declare i8 @llvm.cttz.i8(i8)
+
+define void @cttztest(i8 %A, i16 %B, i32 %C, i64 %D, i8* %AP, i16* %BP, i32* %CP, i64* %DP) {
+	%a = call i8 @llvm.cttz.i8( i8 %A )		; <i8> [#uses=1]
+	%b = call i16 @llvm.cttz.i16( i16 %B )		; <i16> [#uses=1]
+	%c = call i32 @llvm.cttz.i32( i32 %C )		; <i32> [#uses=1]
+	%d = call i64 @llvm.cttz.i64( i64 %D )		; <i64> [#uses=1]
+	store i8 %a, i8* %AP
+	store i16 %b, i16* %BP
+	store i32 %c, i32* %CP
+	store i64 %d, i64* %DP
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll b/final/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
new file mode 100644
index 00000000000..282e973ff9a
--- /dev/null
+++ b/final/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s
+
+declare { i64, double } @wild()
+
+define void @foo(i64* %p, double* %q) nounwind {
+        %t = invoke { i64, double } @wild() to label %normal unwind label %handler
+
+normal:
+        %mrv_gr = getresult { i64, double } %t, 0
+        store i64 %mrv_gr, i64* %p
+        %mrv_gr12681 = getresult { i64, double } %t, 1   
+        store double %mrv_gr12681, double* %q
+	ret void
+  
+handler:
+	ret void
+}
+
diff --git a/final/test/CodeGen/Generic/negintconst.ll b/final/test/CodeGen/Generic/negintconst.ll
new file mode 100644
index 00000000000..67d775e1688
--- /dev/null
+++ b/final/test/CodeGen/Generic/negintconst.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s
+
+; Test that a negative constant smaller than 64 bits (e.g., int)
+; is correctly implemented with sign-extension.
+; In particular, the current code generated is:
+;
+; main:
+; .L_main_LL_0:
+;         save    %o6, -224, %o6
+;         setx    .G_fmtArg_1, %o1, %o0
+;         setuw   1, %o1		! i = 1
+;         setuw   4294967295, %o3	! THE BUG: 0x00000000ffffffff
+;         setsw   0, %i0
+;         add     %i6, 1999, %o2	! fval
+;         add     %o1, %g0, %o1
+;         add     %o0, 0, %o0
+;         mulx    %o1, %o3, %o1		! ERROR: 0xffffffff; should be -1
+;         add     %o1, 3, %o1		! ERROR: 0x100000002; should be 0x2
+;         mulx    %o1, 12, %o3		! 
+;         add     %o2, %o3, %o3		! produces bad address!
+;         call    printf
+;         nop     
+;         jmpl    %i7+8, %g0
+;         restore %g0, 0, %g0
+; 
+;   llc produces:
+; ioff = 2        fval = 0xffffffff7fffec90       &fval[2] = 0xb7fffeca8
+;   instead of:
+; ioff = 2        fval = 0xffffffff7fffec90       &fval[2] = 0xffffffff7fffeca8
+; 
+        %Results = type { float, float, float }
+@fmtArg = internal global [39 x i8] c"ioff = %u\09fval = 0x%p\09&fval[2] = 0x%p\0A\00"          ; <[39 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+        %fval = alloca %Results, i32 4          ; <%Results*> [#uses=2]
+        %i = add i32 1, 0               ; <i32> [#uses=1]
+        %iscale = mul i32 %i, -1                ; <i32> [#uses=1]
+        %ioff = add i32 %iscale, 3              ; <i32> [#uses=2]
+        %ioff.upgrd.1 = zext i32 %ioff to i64           ; <i64> [#uses=1]
+        %fptr = getelementptr %Results* %fval, i64 %ioff.upgrd.1                ; <%Results*> [#uses=1]
+        %castFmt = getelementptr [39 x i8]* @fmtArg, i64 0, i64 0               ; <i8*> [#uses=1]
+        call i32 (i8*, ...)* @printf( i8* %castFmt, i32 %ioff, %Results* %fval, %Results* %fptr )               ; <i32>:1 [#uses=0]
+        ret i32 0
+}
+
diff --git a/final/test/CodeGen/Generic/nested-select.ll b/final/test/CodeGen/Generic/nested-select.ll
new file mode 100644
index 00000000000..f81fed332df
--- /dev/null
+++ b/final/test/CodeGen/Generic/nested-select.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -o /dev/null
+
+; Test that select of a select works
+
+%typedef.tree = type opaque
+
+define i32 @ic_test(double %p.0.2.0.val, double %p.0.2.1.val, double %p.0.2.2.val, %typedef.tree* %t) {
+        %result.1.0 = zext i1 false to i32              ; <i32> [#uses=1]
+        %tmp.55 = fcmp oge double 0.000000e+00, 1.000000e+00            ; <i1> [#uses=1]
+        %tmp.66 = fdiv double 0.000000e+00, 0.000000e+00                ; <double> [#uses=1]
+        br label %N
+
+N:              ; preds = %0
+        %result.1.1 = select i1 %tmp.55, i32 0, i32 %result.1.0         ; <i32> [#uses=1]
+        %tmp.75 = fcmp oge double %tmp.66, 1.000000e+00         ; <i1> [#uses=1]
+        %retval1 = select i1 %tmp.75, i32 0, i32 %result.1.1            ; <i32> [#uses=1]
+        ret i32 %retval1
+}
+
diff --git a/final/test/CodeGen/Generic/overflow.ll b/final/test/CodeGen/Generic/overflow.ll
new file mode 100644
index 00000000000..4196855c4ee
--- /dev/null
+++ b/final/test/CodeGen/Generic/overflow.ll
@@ -0,0 +1,220 @@
+; RUN: llc < %s
+; Verify codegen's don't crash on overflow intrinsics.
+
+;; SADD
+
+define zeroext i8 @sadd_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %sadd = tail call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %sadd, 1
+  %sadd.result = extractvalue { i8, i1 } %sadd, 0
+  %X = select i1 %cmp, i8 %sadd.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.sadd.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @sadd_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %sadd = tail call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %sadd, 1
+  %sadd.result = extractvalue { i16, i1 } %sadd, 0
+  %X = select i1 %cmp, i16 %sadd.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @sadd_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %sadd = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %sadd, 1
+  %sadd.result = extractvalue { i32, i1 } %sadd, 0
+  %X = select i1 %cmp, i32 %sadd.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+
+
+;; UADD
+
+define zeroext i8 @uadd_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %uadd = tail call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %uadd, 1
+  %uadd.result = extractvalue { i8, i1 } %uadd, 0
+  %X = select i1 %cmp, i8 %uadd.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.uadd.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @uadd_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %uadd = tail call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %uadd, 1
+  %uadd.result = extractvalue { i16, i1 } %uadd, 0
+  %X = select i1 %cmp, i16 %uadd.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @uadd_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %uadd = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %uadd, 1
+  %uadd.result = extractvalue { i32, i1 } %uadd, 0
+  %X = select i1 %cmp, i32 %uadd.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+
+
+
+;; ssub
+
+define zeroext i8 @ssub_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %ssub = tail call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %ssub, 1
+  %ssub.result = extractvalue { i8, i1 } %ssub, 0
+  %X = select i1 %cmp, i8 %ssub.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.ssub.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @ssub_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %ssub = tail call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %ssub, 1
+  %ssub.result = extractvalue { i16, i1 } %ssub, 0
+  %X = select i1 %cmp, i16 %ssub.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.ssub.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @ssub_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %ssub = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %ssub, 1
+  %ssub.result = extractvalue { i32, i1 } %ssub, 0
+  %X = select i1 %cmp, i32 %ssub.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+
+
+;; usub
+
+define zeroext i8 @usub_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %usub = tail call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %usub, 1
+  %usub.result = extractvalue { i8, i1 } %usub, 0
+  %X = select i1 %cmp, i8 %usub.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.usub.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @usub_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %usub = tail call { i16, i1 } @llvm.usub.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %usub, 1
+  %usub.result = extractvalue { i16, i1 } %usub, 0
+  %X = select i1 %cmp, i16 %usub.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.usub.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @usub_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %usub = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %usub, 1
+  %usub.result = extractvalue { i32, i1 } %usub, 0
+  %X = select i1 %cmp, i32 %usub.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+
+
+
+;; smul
+
+define zeroext i8 @smul_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %smul = tail call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %smul, 1
+  %smul.result = extractvalue { i8, i1 } %smul, 0
+  %X = select i1 %cmp, i8 %smul.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.smul.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @smul_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %smul = tail call { i16, i1 } @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %smul, 1
+  %smul.result = extractvalue { i16, i1 } %smul, 0
+  %X = select i1 %cmp, i16 %smul.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.smul.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @smul_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %smul = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %smul, 1
+  %smul.result = extractvalue { i32, i1 } %smul, 0
+  %X = select i1 %cmp, i32 %smul.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
+
+
+;; umul
+
+define zeroext i8 @umul_i8(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %umul = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 %b)
+  %cmp = extractvalue { i8, i1 } %umul, 1
+  %umul.result = extractvalue { i8, i1 } %umul, 0
+  %X = select i1 %cmp, i8 %umul.result, i8 42
+  ret i8 %X
+}
+
+declare { i8, i1 } @llvm.umul.with.overflow.i8(i8, i8) nounwind readnone
+
+define zeroext i16 @umul_i16(i16 signext %a, i16 signext %b) nounwind ssp {
+entry:
+  %umul = tail call { i16, i1 } @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
+  %cmp = extractvalue { i16, i1 } %umul, 1
+  %umul.result = extractvalue { i16, i1 } %umul, 0
+  %X = select i1 %cmp, i16 %umul.result, i16 42
+  ret i16 %X
+}
+
+declare { i16, i1 } @llvm.umul.with.overflow.i16(i16, i16) nounwind readnone
+
+define zeroext i32 @umul_i32(i32 signext %a, i32 signext %b) nounwind ssp {
+entry:
+  %umul = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+  %cmp = extractvalue { i32, i1 } %umul, 1
+  %umul.result = extractvalue { i32, i1 } %umul, 0
+  %X = select i1 %cmp, i32 %umul.result, i32 42
+  ret i32 %X
+}
+
+declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+
diff --git a/final/test/CodeGen/Generic/pr2625.ll b/final/test/CodeGen/Generic/pr2625.ll
new file mode 100644
index 00000000000..3e3dc4b2d2b
--- /dev/null
+++ b/final/test/CodeGen/Generic/pr2625.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s
+; PR2625
+
+define i32 @main({ i32, { i32 } }*) {
+entry:
+        %state = alloca { i32, { i32 } }*               ; <{ i32, { i32 } }**> [#uses=2]
+        store { i32, { i32 } }* %0, { i32, { i32 } }** %state
+        %retval = alloca i32            ; <i32*> [#uses=2]
+        store i32 0, i32* %retval
+        load { i32, { i32 } }** %state          ; <{ i32, { i32 } }*>:1 [#uses=1]
+        store { i32, { i32 } } zeroinitializer, { i32, { i32 } }* %1
+        br label %return
+
+return:         ; preds = %entry
+        load i32* %retval               ; <i32>:2 [#uses=1]
+        ret i32 %2
+}
diff --git a/final/test/CodeGen/Generic/pr3288.ll b/final/test/CodeGen/Generic/pr3288.ll
new file mode 100644
index 00000000000..b62710f31ec
--- /dev/null
+++ b/final/test/CodeGen/Generic/pr3288.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s
+; PR3288
+
+define void @a() {
+  %i = insertvalue [2 x [2 x i32]] undef, [2 x i32] undef, 1
+  ret void
+}
+define void @b() {
+  %i = insertvalue {{i32,float},{i16,double}} undef, {i16,double} undef, 1
+  ret void
+}
+define void @c() {
+  %i = insertvalue [2 x [2 x i32]] zeroinitializer, [2 x i32] zeroinitializer, 1
+  ret void
+}
+define void @d() {
+  %i = insertvalue {{i32,float},{i16,double}} zeroinitializer, {i16,double} zeroinitializer, 1
+  ret void
+}
+define void @e() {
+  %i = insertvalue [2 x [2 x i32]] undef, [2 x i32] undef, 0
+  ret void
+}
+define void @f() {
+  %i = insertvalue {{i32,float},{i16,double}} undef, {i32,float} undef, 0
+  ret void
+}
+define void @g() {
+  %i = insertvalue [2 x [2 x i32]] zeroinitializer, [2 x i32] zeroinitializer, 0
+  ret void
+}
+define void @h() {
+  %i = insertvalue {{i32,float},{i16,double}} zeroinitializer, {i32,float} zeroinitializer, 0
+  ret void
+}
+define void @ax() {
+  %i = insertvalue [2 x [2 x i32]] undef, i32 undef, 1, 1
+  ret void
+}
+define void @bx() {
+  %i = insertvalue {{i32,float},{i16,double}} undef, double undef, 1, 1
+  ret void
+}
+define void @cx() {
+  %i = insertvalue [2 x [2 x i32]] zeroinitializer, i32 zeroinitializer, 1, 1
+  ret void
+}
+define void @dx() {
+  %i = insertvalue {{i32,float},{i16,double}} zeroinitializer, double zeroinitializer, 1, 1
+  ret void
+}
+define void @ex() {
+  %i = insertvalue [2 x [2 x i32]] undef, i32 undef, 0, 1
+  ret void
+}
+define void @fx() {
+  %i = insertvalue {{i32,float},{i16,double}} undef, float undef, 0, 1
+  ret void
+}
+define void @gx() {
+  %i = insertvalue [2 x [2 x i32]] zeroinitializer, i32 zeroinitializer, 0, 1
+  ret void
+}
+define void @hx() {
+  %i = insertvalue {{i32,float},{i16,double}} zeroinitializer, float zeroinitializer, 0, 1
+  ret void
+}
diff --git a/final/test/CodeGen/Generic/print-add.ll b/final/test/CodeGen/Generic/print-add.ll
new file mode 100644
index 00000000000..95608dc60b5
--- /dev/null
+++ b/final/test/CodeGen/Generic/print-add.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s
+
+@.str_1 = internal constant [4 x i8] c"%d\0A\00"                ; <[4 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+        %f = getelementptr [4 x i8]* @.str_1, i64 0, i64 0              ; <i8*> [#uses=3]
+        %d = add i32 1, 0               ; <i32> [#uses=3]
+        call i32 (i8*, ...)* @printf( i8* %f, i32 %d )          ; <i32>:1 [#uses=0]
+        %e = add i32 38, 2              ; <i32> [#uses=2]
+        call i32 (i8*, ...)* @printf( i8* %f, i32 %e )          ; <i32>:2 [#uses=0]
+        %g = add i32 %d, %d             ; <i32> [#uses=1]
+        %h = add i32 %e, %g             ; <i32> [#uses=1]
+        call i32 (i8*, ...)* @printf( i8* %f, i32 %h )          ; <i32>:3 [#uses=0]
+        ret i32 0
+}
+
diff --git a/final/test/CodeGen/Generic/print-arith-fp.ll b/final/test/CodeGen/Generic/print-arith-fp.ll
new file mode 100644
index 00000000000..d129ff85870
--- /dev/null
+++ b/final/test/CodeGen/Generic/print-arith-fp.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s
+@a_str = internal constant [8 x i8] c"a = %f\0A\00"		; <[8 x i8]*> [#uses=1]
+@b_str = internal constant [8 x i8] c"b = %f\0A\00"		; <[8 x i8]*> [#uses=1]
+@add_str = internal constant [12 x i8] c"a + b = %f\0A\00"		; <[12 x i8]*> [#uses=1]
+@sub_str = internal constant [12 x i8] c"a - b = %f\0A\00"		; <[12 x i8]*> [#uses=1]
+@mul_str = internal constant [12 x i8] c"a * b = %f\0A\00"		; <[12 x i8]*> [#uses=1]
+@div_str = internal constant [12 x i8] c"b / a = %f\0A\00"		; <[12 x i8]*> [#uses=1]
+@rem_str = internal constant [13 x i8] c"b %% a = %f\0A\00"		; <[13 x i8]*> [#uses=1]
+@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"		; <[12 x i8]*> [#uses=1]
+@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"		; <[13 x i8]*> [#uses=1]
+@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"		; <[12 x i8]*> [#uses=1]
+@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"		; <[13 x i8]*> [#uses=1]
+@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"		; <[13 x i8]*> [#uses=1]
+@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"		; <[13 x i8]*> [#uses=1]
+@A = global double 2.000000e+00		; <double*> [#uses=1]
+@B = global double 5.000000e+00		; <double*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%a = load double* @A		; <double> [#uses=12]
+	%b = load double* @B		; <double> [#uses=12]
+	%a_s = getelementptr [8 x i8]* @a_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%b_s = getelementptr [8 x i8]* @b_str, i64 0, i64 0		; <i8*> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %a_s, double %a )		; <i32>:1 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %b_s, double %b )		; <i32>:2 [#uses=0]
+	%add_r = fadd double %a, %b		; <double> [#uses=1]
+	%sub_r = fsub double %a, %b		; <double> [#uses=1]
+	%mul_r = fmul double %a, %b		; <double> [#uses=1]
+	%div_r = fdiv double %b, %a		; <double> [#uses=1]
+	%rem_r = frem double %b, %a		; <double> [#uses=1]
+	%add_s = getelementptr [12 x i8]* @add_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%sub_s = getelementptr [12 x i8]* @sub_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%mul_s = getelementptr [12 x i8]* @mul_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%div_s = getelementptr [12 x i8]* @div_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%rem_s = getelementptr [13 x i8]* @rem_str, i64 0, i64 0		; <i8*> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %add_s, double %add_r )		; <i32>:3 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %sub_s, double %sub_r )		; <i32>:4 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %mul_s, double %mul_r )		; <i32>:5 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %div_s, double %div_r )		; <i32>:6 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %rem_s, double %rem_r )		; <i32>:7 [#uses=0]
+	%lt_r = fcmp olt double %a, %b		; <i1> [#uses=1]
+	%le_r = fcmp ole double %a, %b		; <i1> [#uses=1]
+	%gt_r = fcmp ogt double %a, %b		; <i1> [#uses=1]
+	%ge_r = fcmp oge double %a, %b		; <i1> [#uses=1]
+	%eq_r = fcmp oeq double %a, %b		; <i1> [#uses=1]
+	%ne_r = fcmp une double %a, %b		; <i1> [#uses=1]
+	%lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0		; <i8*> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %lt_s, i1 %lt_r )		; <i32>:8 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %le_s, i1 %le_r )		; <i32>:9 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %gt_s, i1 %gt_r )		; <i32>:10 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %ge_s, i1 %ge_r )		; <i32>:11 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %eq_s, i1 %eq_r )		; <i32>:12 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %ne_s, i1 %ne_r )		; <i32>:13 [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/CodeGen/Generic/print-arith-int.ll b/final/test/CodeGen/Generic/print-arith-int.ll
new file mode 100644
index 00000000000..ce938cf05b9
--- /dev/null
+++ b/final/test/CodeGen/Generic/print-arith-int.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s
+@a_str = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]
+@b_str = internal constant [8 x i8] c"b = %d\0A\00"		; <[8 x i8]*> [#uses=1]
+@add_str = internal constant [12 x i8] c"a + b = %d\0A\00"		; <[12 x i8]*> [#uses=1]
+@sub_str = internal constant [12 x i8] c"a - b = %d\0A\00"		; <[12 x i8]*> [#uses=1]
+@mul_str = internal constant [12 x i8] c"a * b = %d\0A\00"		; <[12 x i8]*> [#uses=1]
+@div_str = internal constant [12 x i8] c"b / a = %d\0A\00"		; <[12 x i8]*> [#uses=1]
+@rem_str = internal constant [13 x i8] c"b \5C% a = %d\0A\00"		; <[13 x i8]*> [#uses=1]
+@lt_str = internal constant [12 x i8] c"a < b = %d\0A\00"		; <[12 x i8]*> [#uses=1]
+@le_str = internal constant [13 x i8] c"a <= b = %d\0A\00"		; <[13 x i8]*> [#uses=1]
+@gt_str = internal constant [12 x i8] c"a > b = %d\0A\00"		; <[12 x i8]*> [#uses=1]
+@ge_str = internal constant [13 x i8] c"a >= b = %d\0A\00"		; <[13 x i8]*> [#uses=1]
+@eq_str = internal constant [13 x i8] c"a == b = %d\0A\00"		; <[13 x i8]*> [#uses=1]
+@ne_str = internal constant [13 x i8] c"a != b = %d\0A\00"		; <[13 x i8]*> [#uses=1]
+@and_str = internal constant [12 x i8] c"a & b = %d\0A\00"		; <[12 x i8]*> [#uses=1]
+@or_str = internal constant [12 x i8] c"a | b = %d\0A\00"		; <[12 x i8]*> [#uses=1]
+@xor_str = internal constant [12 x i8] c"a ^ b = %d\0A\00"		; <[12 x i8]*> [#uses=1]
+@shl_str = internal constant [13 x i8] c"b << a = %d\0A\00"		; <[13 x i8]*> [#uses=1]
+@shr_str = internal constant [13 x i8] c"b >> a = %d\0A\00"		; <[13 x i8]*> [#uses=1]
+@A = global i32 2		; <i32*> [#uses=1]
+@B = global i32 5		; <i32*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%a = load i32* @A		; <i32> [#uses=16]
+	%b = load i32* @B		; <i32> [#uses=17]
+	%a_s = getelementptr [8 x i8]* @a_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%b_s = getelementptr [8 x i8]* @b_str, i64 0, i64 0		; <i8*> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a )		; <i32>:1 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %b_s, i32 %b )		; <i32>:2 [#uses=0]
+	%add_r = add i32 %a, %b		; <i32> [#uses=1]
+	%sub_r = sub i32 %a, %b		; <i32> [#uses=1]
+	%mul_r = mul i32 %a, %b		; <i32> [#uses=1]
+	%div_r = sdiv i32 %b, %a		; <i32> [#uses=1]
+	%rem_r = srem i32 %b, %a		; <i32> [#uses=1]
+	%add_s = getelementptr [12 x i8]* @add_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%sub_s = getelementptr [12 x i8]* @sub_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%mul_s = getelementptr [12 x i8]* @mul_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%div_s = getelementptr [12 x i8]* @div_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%rem_s = getelementptr [13 x i8]* @rem_str, i64 0, i64 0		; <i8*> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %add_s, i32 %add_r )		; <i32>:3 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %sub_s, i32 %sub_r )		; <i32>:4 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %mul_s, i32 %mul_r )		; <i32>:5 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %div_s, i32 %div_r )		; <i32>:6 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %rem_s, i32 %rem_r )		; <i32>:7 [#uses=0]
+	%lt_r = icmp slt i32 %a, %b		; <i1> [#uses=1]
+	%le_r = icmp sle i32 %a, %b		; <i1> [#uses=1]
+	%gt_r = icmp sgt i32 %a, %b		; <i1> [#uses=1]
+	%ge_r = icmp sge i32 %a, %b		; <i1> [#uses=1]
+	%eq_r = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	%ne_r = icmp ne i32 %a, %b		; <i1> [#uses=1]
+	%lt_s = getelementptr [12 x i8]* @lt_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%le_s = getelementptr [13 x i8]* @le_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%gt_s = getelementptr [12 x i8]* @gt_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%ge_s = getelementptr [13 x i8]* @ge_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%eq_s = getelementptr [13 x i8]* @eq_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%ne_s = getelementptr [13 x i8]* @ne_str, i64 0, i64 0		; <i8*> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %lt_s, i1 %lt_r )		; <i32>:8 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %le_s, i1 %le_r )		; <i32>:9 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %gt_s, i1 %gt_r )		; <i32>:10 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %ge_s, i1 %ge_r )		; <i32>:11 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %eq_s, i1 %eq_r )		; <i32>:12 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %ne_s, i1 %ne_r )		; <i32>:13 [#uses=0]
+	%and_r = and i32 %a, %b		; <i32> [#uses=1]
+	%or_r = or i32 %a, %b		; <i32> [#uses=1]
+	%xor_r = xor i32 %a, %b		; <i32> [#uses=1]
+	%u = trunc i32 %a to i8		; <i8> [#uses=2]
+	%shift.upgrd.1 = zext i8 %u to i32		; <i32> [#uses=1]
+	%shl_r = shl i32 %b, %shift.upgrd.1		; <i32> [#uses=1]
+	%shift.upgrd.2 = zext i8 %u to i32		; <i32> [#uses=1]
+	%shr_r = ashr i32 %b, %shift.upgrd.2		; <i32> [#uses=1]
+	%and_s = getelementptr [12 x i8]* @and_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%or_s = getelementptr [12 x i8]* @or_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%xor_s = getelementptr [12 x i8]* @xor_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%shl_s = getelementptr [13 x i8]* @shl_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%shr_s = getelementptr [13 x i8]* @shr_str, i64 0, i64 0		; <i8*> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %and_s, i32 %and_r )		; <i32>:14 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %or_s, i32 %or_r )		; <i32>:15 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %xor_s, i32 %xor_r )		; <i32>:16 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %shl_s, i32 %shl_r )		; <i32>:17 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %shr_s, i32 %shr_r )		; <i32>:18 [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/CodeGen/Generic/print-int.ll b/final/test/CodeGen/Generic/print-int.ll
new file mode 100644
index 00000000000..7ca4b3de48a
--- /dev/null
+++ b/final/test/CodeGen/Generic/print-int.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s
+
+@.str_1 = internal constant [4 x i8] c"%d\0A\00"                ; <[4 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+        %f = getelementptr [4 x i8]* @.str_1, i64 0, i64 0              ; <i8*> [#uses=1]
+        %d = add i32 0, 0               ; <i32> [#uses=1]
+        %tmp.0 = call i32 (i8*, ...)* @printf( i8* %f, i32 %d )         ; <i32> [#uses=0]
+        ret i32 0
+}
+
diff --git a/final/test/CodeGen/Generic/print-mul-exp.ll b/final/test/CodeGen/Generic/print-mul-exp.ll
new file mode 100644
index 00000000000..90fc55b2583
--- /dev/null
+++ b/final/test/CodeGen/Generic/print-mul-exp.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s
+
+@a_str = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]
+@a_mul_str = internal constant [13 x i8] c"a * %d = %d\0A\00"		; <[13 x i8]*> [#uses=1]
+@A = global i32 2		; <i32*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%a = load i32* @A		; <i32> [#uses=21]
+	%a_s = getelementptr [8 x i8]* @a_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%a_mul_s = getelementptr [13 x i8]* @a_mul_str, i64 0, i64 0		; <i8*> [#uses=20]
+	call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a )		; <i32>:1 [#uses=0]
+	%r_0 = mul i32 %a, 0		; <i32> [#uses=1]
+	%r_1 = mul i32 %a, 1		; <i32> [#uses=1]
+	%r_2 = mul i32 %a, 2		; <i32> [#uses=1]
+	%r_3 = mul i32 %a, 3		; <i32> [#uses=1]
+	%r_4 = mul i32 %a, 4		; <i32> [#uses=1]
+	%r_5 = mul i32 %a, 5		; <i32> [#uses=1]
+	%r_6 = mul i32 %a, 6		; <i32> [#uses=1]
+	%r_7 = mul i32 %a, 7		; <i32> [#uses=1]
+	%r_8 = mul i32 %a, 8		; <i32> [#uses=1]
+	%r_9 = mul i32 %a, 9		; <i32> [#uses=1]
+	%r_10 = mul i32 %a, 10		; <i32> [#uses=1]
+	%r_11 = mul i32 %a, 11		; <i32> [#uses=1]
+	%r_12 = mul i32 %a, 12		; <i32> [#uses=1]
+	%r_13 = mul i32 %a, 13		; <i32> [#uses=1]
+	%r_14 = mul i32 %a, 14		; <i32> [#uses=1]
+	%r_15 = mul i32 %a, 15		; <i32> [#uses=1]
+	%r_16 = mul i32 %a, 16		; <i32> [#uses=1]
+	%r_17 = mul i32 %a, 17		; <i32> [#uses=1]
+	%r_18 = mul i32 %a, 18		; <i32> [#uses=1]
+	%r_19 = mul i32 %a, 19		; <i32> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 0, i32 %r_0 )		; <i32>:2 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 1, i32 %r_1 )		; <i32>:3 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 2, i32 %r_2 )		; <i32>:4 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 3, i32 %r_3 )		; <i32>:5 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 4, i32 %r_4 )		; <i32>:6 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 5, i32 %r_5 )		; <i32>:7 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 6, i32 %r_6 )		; <i32>:8 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 7, i32 %r_7 )		; <i32>:9 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 8, i32 %r_8 )		; <i32>:10 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 9, i32 %r_9 )		; <i32>:11 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 10, i32 %r_10 )		; <i32>:12 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 11, i32 %r_11 )		; <i32>:13 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 12, i32 %r_12 )		; <i32>:14 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 13, i32 %r_13 )		; <i32>:15 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 14, i32 %r_14 )		; <i32>:16 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 15, i32 %r_15 )		; <i32>:17 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 16, i32 %r_16 )		; <i32>:18 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 17, i32 %r_17 )		; <i32>:19 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 18, i32 %r_18 )		; <i32>:20 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 19, i32 %r_19 )		; <i32>:21 [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/CodeGen/Generic/print-mul.ll b/final/test/CodeGen/Generic/print-mul.ll
new file mode 100644
index 00000000000..0707f3c2318
--- /dev/null
+++ b/final/test/CodeGen/Generic/print-mul.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s
+
+@a_str = internal constant [8 x i8] c"a = %d\0A\00"		; <[8 x i8]*> [#uses=1]
+@b_str = internal constant [8 x i8] c"b = %d\0A\00"		; <[8 x i8]*> [#uses=1]
+@a_mul_str = internal constant [13 x i8] c"a * %d = %d\0A\00"		; <[13 x i8]*> [#uses=1]
+@A = global i32 2		; <i32*> [#uses=1]
+@B = global i32 5		; <i32*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+entry:
+	%a = load i32* @A		; <i32> [#uses=2]
+	%b = load i32* @B		; <i32> [#uses=1]
+	%a_s = getelementptr [8 x i8]* @a_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%b_s = getelementptr [8 x i8]* @b_str, i64 0, i64 0		; <i8*> [#uses=1]
+	%a_mul_s = getelementptr [13 x i8]* @a_mul_str, i64 0, i64 0		; <i8*> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a )		; <i32>:0 [#uses=0]
+	call i32 (i8*, ...)* @printf( i8* %b_s, i32 %b )		; <i32>:1 [#uses=0]
+	br label %shl_test
+
+shl_test:		; preds = %shl_test, %entry
+	%s = phi i32 [ 0, %entry ], [ %s_inc, %shl_test ]		; <i32> [#uses=4]
+	%result = mul i32 %a, %s		; <i32> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %a_mul_s, i32 %s, i32 %result )		; <i32>:2 [#uses=0]
+	%s_inc = add i32 %s, 1		; <i32> [#uses=1]
+	%done = icmp eq i32 %s, 256		; <i1> [#uses=1]
+	br i1 %done, label %fini, label %shl_test
+
+fini:		; preds = %shl_test
+	ret i32 0
+}
diff --git a/final/test/CodeGen/Generic/print-shift.ll b/final/test/CodeGen/Generic/print-shift.ll
new file mode 100644
index 00000000000..6c5d222209b
--- /dev/null
+++ b/final/test/CodeGen/Generic/print-shift.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s
+
+@a_str = internal constant [8 x i8] c"a = %d\0A\00"             ; <[8 x i8]*> [#uses=1]
+@b_str = internal constant [8 x i8] c"b = %d\0A\00"             ; <[8 x i8]*> [#uses=1]
+@a_shl_str = internal constant [14 x i8] c"a << %d = %d\0A\00"          ; <[14 x i8]*> [#uses=1]
+@A = global i32 2               ; <i32*> [#uses=1]
+@B = global i32 5               ; <i32*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+entry:
+        %a = load i32* @A               ; <i32> [#uses=2]
+        %b = load i32* @B               ; <i32> [#uses=1]
+        %a_s = getelementptr [8 x i8]* @a_str, i64 0, i64 0             ; <i8*> [#uses=1]
+        %b_s = getelementptr [8 x i8]* @b_str, i64 0, i64 0             ; <i8*> [#uses=1]
+        %a_shl_s = getelementptr [14 x i8]* @a_shl_str, i64 0, i64 0            ; <i8*> [#uses=1]
+        call i32 (i8*, ...)* @printf( i8* %a_s, i32 %a )                ; <i32>:0 [#uses=0]
+        call i32 (i8*, ...)* @printf( i8* %b_s, i32 %b )                ; <i32>:1 [#uses=0]
+        br label %shl_test
+
+shl_test:               ; preds = %shl_test, %entry
+        %s = phi i8 [ 0, %entry ], [ %s_inc, %shl_test ]                ; <i8> [#uses=4]
+        %shift.upgrd.1 = zext i8 %s to i32              ; <i32> [#uses=1]
+        %result = shl i32 %a, %shift.upgrd.1            ; <i32> [#uses=1]
+        call i32 (i8*, ...)* @printf( i8* %a_shl_s, i8 %s, i32 %result )                ; <i32>:2 [#uses=0]
+        %s_inc = add i8 %s, 1           ; <i8> [#uses=1]
+        %done = icmp eq i8 %s, 32               ; <i1> [#uses=1]
+        br i1 %done, label %fini, label %shl_test
+
+fini:           ; preds = %shl_test
+        ret i32 0
+}
+
diff --git a/final/test/CodeGen/Generic/ret0.ll b/final/test/CodeGen/Generic/ret0.ll
new file mode 100644
index 00000000000..9e628a1a140
--- /dev/null
+++ b/final/test/CodeGen/Generic/ret0.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s
+
+define i32 @main() {  
+  ret i32 0
+}
diff --git a/final/test/CodeGen/Generic/ret42.ll b/final/test/CodeGen/Generic/ret42.ll
new file mode 100644
index 00000000000..f5cd33dc0b2
--- /dev/null
+++ b/final/test/CodeGen/Generic/ret42.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s
+
+define i32 @main() {  
+  ret i32 42
+}
diff --git a/final/test/CodeGen/Generic/select-cc.ll b/final/test/CodeGen/Generic/select-cc.ll
new file mode 100644
index 00000000000..b653e2a46dc
--- /dev/null
+++ b/final/test/CodeGen/Generic/select-cc.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s
+; PR2504
+
+define <2 x double> @vector_select(<2 x double> %x, <2 x double> %y) nounwind  {
+	%x.lo = extractelement <2 x double> %x, i32 0		; <double> [#uses=1]
+	%x.lo.ge = fcmp oge double %x.lo, 0.000000e+00		; <i1> [#uses=1]
+	%a.d = select i1 %x.lo.ge, <2 x double> %y, <2 x double> %x		; <<2 x double>> [#uses=1]
+	ret <2 x double> %a.d
+}
diff --git a/final/test/CodeGen/Generic/select.ll b/final/test/CodeGen/Generic/select.ll
new file mode 100644
index 00000000000..63052c1a284
--- /dev/null
+++ b/final/test/CodeGen/Generic/select.ll
@@ -0,0 +1,187 @@
+; RUN: llc < %s
+
+%Domain = type { i8*, i32, i32*, i32, i32, i32*, %Domain* }
+@AConst = constant i32 123              ; <i32*> [#uses=1]
+
+; Test setting values of different constants in registers.
+; 
+define void @testConsts(i32 %N, float %X) {
+        %a = add i32 %N, 1              ; <i32> [#uses=0]
+        %i = add i32 %N, 12345678               ; <i32> [#uses=0]
+        %b = add i16 4, 3               ; <i16> [#uses=0]
+        %c = fadd float %X, 0.000000e+00         ; <float> [#uses=0]
+        %d = fadd float %X, 0x400921CAC0000000           ; <float> [#uses=0]
+        %f = add i32 -1, 10             ; <i32> [#uses=0]
+        %g = add i16 20, -1             ; <i16> [#uses=0]
+        %j = add i16 -1, 30             ; <i16> [#uses=0]
+        %h = add i8 40, -1              ; <i8> [#uses=0]
+        %k = add i8 -1, 50              ; <i8> [#uses=0]
+        ret void
+}
+
+; A SetCC whose result is used should produce instructions to
+; compute the boolean value in a register.  One whose result
+; is unused will only generate the condition code but not
+; the boolean result.
+; 
+define void @unusedBool(i32* %x, i32* %y) {
+        icmp eq i32* %x, %y             ; <i1>:1 [#uses=1]
+        xor i1 %1, true         ; <i1>:2 [#uses=0]
+        icmp ne i32* %x, %y             ; <i1>:3 [#uses=0]
+        ret void
+}
+
+; A constant argument to a Phi produces a Cast instruction in the
+; corresponding predecessor basic block.  This checks a few things:
+; -- phi arguments coming from the bottom of the same basic block
+;    (they should not be forward substituted in the machine code!)
+; -- code generation for casts of various types
+; -- use of immediate fields for integral constants of different sizes
+; -- branch on a constant condition
+; 
+define void @mergeConstants(i32* %x, i32* %y) {
+; <label>:0
+        br label %Top
+
+Top:            ; preds = %Next, %Top, %0
+        phi i32 [ 0, %0 ], [ 1, %Top ], [ 524288, %Next ]               ; <i32>:1 [#uses=0]
+        phi float [ 0.000000e+00, %0 ], [ 1.000000e+00, %Top ], [ 2.000000e+00, %Next ]         ; <float>:2 [#uses=0]
+        phi double [ 5.000000e-01, %0 ], [ 1.500000e+00, %Top ], [ 2.500000e+00, %Next ]         
+        phi i1 [ true, %0 ], [ false, %Top ], [ true, %Next ]           ; <i1>:4 [#uses=0]
+        br i1 true, label %Top, label %Next
+
+Next:           ; preds = %Top
+        br label %Top
+}
+
+
+
+; A constant argument to a cast used only once should be forward substituted
+; and loaded where needed, which happens is:
+; -- User of cast has no immediate field
+; -- User of cast has immediate field but constant is too large to fit
+;    or constant is not resolved until later (e.g., global address)
+; -- User of cast uses it as a call arg. or return value so it is an implicit
+;    use but has to be loaded into a virtual register so that the reg.
+;    allocator can allocate the appropriate phys. reg. for it
+;  
+define i32* @castconst(float) {
+        %castbig = trunc i64 99999999 to i32            ; <i32> [#uses=1]
+        %castsmall = trunc i64 1 to i32         ; <i32> [#uses=1]
+        %usebig = add i32 %castbig, %castsmall          ; <i32> [#uses=0]
+        %castglob = bitcast i32* @AConst to i64*                ; <i64*> [#uses=1]
+        %dummyl = load i64* %castglob           ; <i64> [#uses=0]
+        %castnull = inttoptr i64 0 to i32*              ; <i32*> [#uses=1]
+        ret i32* %castnull
+}
+
+; Test branch-on-comparison-with-zero, in two ways:
+; 1. can be folded
+; 2. cannot be folded because result of comparison is used twice
+;
+define void @testbool(i32 %A, i32 %B) {
+        br label %Top
+
+Top:            ; preds = %loop, %0
+        %D = add i32 %A, %B             ; <i32> [#uses=2]
+        %E = sub i32 %D, -4             ; <i32> [#uses=1]
+        %C = icmp sle i32 %E, 0         ; <i1> [#uses=1]
+        br i1 %C, label %retlbl, label %loop
+
+loop:           ; preds = %loop, %Top
+        %F = add i32 %A, %B             ; <i32> [#uses=0]
+        %G = sub i32 %D, -4             ; <i32> [#uses=1]
+        %D.upgrd.1 = icmp sle i32 %G, 0         ; <i1> [#uses=1]
+        %E.upgrd.2 = xor i1 %D.upgrd.1, true            ; <i1> [#uses=1]
+        br i1 %E.upgrd.2, label %loop, label %Top
+
+retlbl:         ; preds = %Top
+        ret void
+}
+
+
+;; Test use of a boolean result in cast operations.
+;; Requires converting a condition code result into a 0/1 value in a reg.
+;; 
+define i32 @castbool(i32 %A, i32 %B) {
+bb0:
+        %cond213 = icmp slt i32 %A, %B          ; <i1> [#uses=1]
+        %cast110 = zext i1 %cond213 to i8               ; <i8> [#uses=1]
+        %cast109 = zext i8 %cast110 to i32              ; <i32> [#uses=1]
+        ret i32 %cast109
+}
+
+;; Test use of a boolean result in arithmetic and logical operations.
+;; Requires converting a condition code result into a 0/1 value in a reg.
+;; 
+define i1 @boolexpr(i1 %b, i32 %N) {
+        %b2 = icmp sge i32 %N, 0                ; <i1> [#uses=1]
+        %b3 = and i1 %b, %b2            ; <i1> [#uses=1]
+        ret i1 %b3
+}
+
+; Test branch on floating point comparison
+;
+define void @testfloatbool(float %x, float %y) {
+        br label %Top
+
+Top:            ; preds = %Top, %0
+        %p = fadd float %x, %y           ; <float> [#uses=1]
+        %z = fsub float %x, %y           ; <float> [#uses=1]
+        %b = fcmp ole float %p, %z              ; <i1> [#uses=2]
+        %c = xor i1 %b, true            ; <i1> [#uses=0]
+        br i1 %b, label %Top, label %goon
+
+goon:           ; preds = %Top
+        ret void
+}
+
+
+; Test cases where an LLVM instruction requires no machine
+; instructions (e.g., cast int* to long).  But there are 2 cases:
+; 1. If the result register has only a single use and the use is in the
+;    same basic block, the operand will be copy-propagated during
+;    instruction selection.
+; 2. If the result register has multiple uses or is in a different
+;    basic block, it cannot (or will not) be copy propagated during
+;    instruction selection.  It will generate a
+;    copy instruction (add-with-0), but this copy should get coalesced
+;    away by the register allocator.
+;
+define i32 @checkForward(i32 %N, i32* %A) {
+bb2:
+        %reg114 = shl i32 %N, 2         ; <i32> [#uses=1]
+        %cast115 = sext i32 %reg114 to i64              ; <i64> [#uses=1]
+        %cast116 = ptrtoint i32* %A to i64              ; <i64> [#uses=1]
+        %reg116 = add i64 %cast116, %cast115            ; <i64> [#uses=1]
+        %castPtr = inttoptr i64 %reg116 to i32*         ; <i32*> [#uses=1]
+        %reg118 = load i32* %castPtr            ; <i32> [#uses=1]
+        %cast117 = sext i32 %reg118 to i64              ; <i64> [#uses=2]
+        %reg159 = add i64 1234567, %cast117             ; <i64> [#uses=0]
+        %reg160 = add i64 7654321, %cast117             ; <i64> [#uses=0]
+        ret i32 0
+}
+
+
+; Test case for unary NOT operation constructed from XOR.
+; 
+define void @checkNot(i1 %b, i32 %i) {
+        %notB = xor i1 %b, true         ; <i1> [#uses=1]
+        %notI = xor i32 %i, -1          ; <i32> [#uses=2]
+        %F = icmp sge i32 %notI, 100            ; <i1> [#uses=1]
+        %J = add i32 %i, %i             ; <i32> [#uses=1]
+        %andNotB = and i1 %F, %notB             ; <i1> [#uses=0]
+        %andNotI = and i32 %J, %notI            ; <i32> [#uses=0]
+        %notB2 = xor i1 true, %b                ; <i1> [#uses=0]
+        %notI2 = xor i32 -1, %i         ; <i32> [#uses=0]
+        ret void
+}
+
+; Test case for folding getelementptr into a load/store
+;
+define i32 @checkFoldGEP(%Domain* %D, i64 %idx) {
+        %reg841 = getelementptr %Domain* %D, i64 0, i32 1               ; <i32*> [#uses=1]
+        %reg820 = load i32* %reg841             ; <i32> [#uses=1]
+        ret i32 %reg820
+}
+
diff --git a/final/test/CodeGen/Generic/shift-int64.ll b/final/test/CodeGen/Generic/shift-int64.ll
new file mode 100644
index 00000000000..670ef20e084
--- /dev/null
+++ b/final/test/CodeGen/Generic/shift-int64.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+
+define i64 @test_imm(i64 %X) {
+        %Y = ashr i64 %X, 17            ; <i64> [#uses=1]
+        ret i64 %Y
+}
+
+define i64 @test_variable(i64 %X, i8 %Amt) {
+        %shift.upgrd.1 = zext i8 %Amt to i64            ; <i64> [#uses=1]
+        %Y = ashr i64 %X, %shift.upgrd.1                ; <i64> [#uses=1]
+        ret i64 %Y
+}
diff --git a/final/test/CodeGen/Generic/spillccr.ll b/final/test/CodeGen/Generic/spillccr.ll
new file mode 100644
index 00000000000..0a774c64f82
--- /dev/null
+++ b/final/test/CodeGen/Generic/spillccr.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s
+
+; July 6, 2002 -- LLC Regression test
+; This test case checks if the integer CC register %xcc (or %ccr)
+; is correctly spilled.  The code fragment came from function
+; MakeGraph in Olden-mst.
+; The original code made all comparisons with 0, so that the %xcc
+; register is not needed for the branch in the first basic block.
+; Replace 0 with 1 in the first comparson so that the
+; branch-on-register instruction cannot be used directly, i.e.,
+; the %xcc register is needed for the first branch.
+;
+
+        %Graph = type %struct.graph_st*
+        %Hash = type %struct.hash*
+        %HashEntry = type %struct.hash_entry*
+        %Vertex = type %struct.vert_st*
+        %struct.graph_st = type { [1 x %Vertex] }
+        %struct.hash = type { %HashEntry*, i32 (i32)*, i32 }
+        %struct.hash_entry = type { i32, i8*, %HashEntry }
+        %struct.vert_st = type { i32, %Vertex, %Hash }
+@HashRange = external global i32                ; <i32*> [#uses=0]
+@.LC0 = internal global [13 x i8] c"Make phase 2\00"            ; <[13 x i8]*> [#uses=0]
+@.LC1 = internal global [13 x i8] c"Make phase 3\00"            ; <[13 x i8]*> [#uses=0]
+@.LC2 = internal global [13 x i8] c"Make phase 4\00"            ; <[13 x i8]*> [#uses=0]
+@.LC3 = internal global [15 x i8] c"Make returning\00"          ; <[15 x i8]*> [#uses=0]
+
+define %Graph @MakeGraph(i32 %numvert, i32 %numproc) {
+bb1:
+        %reg111 = add i32 %numproc, -1          ; <i32> [#uses=2]
+        %cond275 = icmp slt i32 %reg111, 1              ; <i1> [#uses=1]
+        %cond276 = icmp sle i32 %reg111, 0              ; <i1> [#uses=1]
+        %cond277 = icmp sge i32 %numvert, 0             ; <i1> [#uses=1]
+        %reg162 = add i32 %numvert, 3           ; <i32> [#uses=0]
+        br i1 %cond275, label %bb7, label %bb4
+
+bb4:            ; preds = %bb1
+        br i1 %cond276, label %bb7, label %bb5
+
+bb5:            ; preds = %bb4
+        br i1 %cond277, label %bb7, label %bb6
+
+bb6:            ; preds = %bb5
+        ret %Graph null
+
+bb7:            ; preds = %bb5, %bb4, %bb1
+        ret %Graph null
+}
+
diff --git a/final/test/CodeGen/Generic/stacksave-restore.ll b/final/test/CodeGen/Generic/stacksave-restore.ll
new file mode 100644
index 00000000000..b124b5f9b7d
--- /dev/null
+++ b/final/test/CodeGen/Generic/stacksave-restore.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s
+
+declare i8* @llvm.stacksave()
+
+declare void @llvm.stackrestore(i8*)
+
+define i32* @test(i32 %N) {
+        %tmp = call i8* @llvm.stacksave( )              ; <i8*> [#uses=1]
+        %P = alloca i32, i32 %N         ; <i32*> [#uses=1]
+        call void @llvm.stackrestore( i8* %tmp )
+        %Q = alloca i32, i32 %N         ; <i32*> [#uses=0]
+        ret i32* %P
+}
+
diff --git a/final/test/CodeGen/Generic/storetrunc-fp.ll b/final/test/CodeGen/Generic/storetrunc-fp.ll
new file mode 100644
index 00000000000..7f7c7f71b3a
--- /dev/null
+++ b/final/test/CodeGen/Generic/storetrunc-fp.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s
+
+define void @foo(double %a, double %b, float* %fp) {
+	%c = fadd double %a, %b
+	%d = fptrunc double %c to float
+	store float %d, float* %fp
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/switch-lower-feature.ll b/final/test/CodeGen/Generic/switch-lower-feature.ll
new file mode 100644
index 00000000000..1e9dbeeda34
--- /dev/null
+++ b/final/test/CodeGen/Generic/switch-lower-feature.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s
+
+define i32 @test(i32 %tmp158) {
+entry:
+        switch i32 %tmp158, label %bb336 [
+	         i32 120, label %bb338
+	         i32 121, label %bb338
+                 i32 122, label %bb338
+                 i32 123, label %bb338
+                 i32 124, label %bb338
+                 i32 125, label %bb338
+                 i32 126, label %bb338
+		 i32 1024, label %bb338
+                 i32 0, label %bb338
+                 i32 1, label %bb338
+                 i32 2, label %bb338
+                 i32 3, label %bb338
+                 i32 4, label %bb338
+		 i32 5, label %bb338
+        ]
+bb336:
+  ret i32 10
+bb338:
+  ret i32 11
+}
+
+define i32 @test2(i32 %tmp158) {
+entry:
+        switch i32 %tmp158, label %bb336 [
+	         i32 -2147483648, label %bb338
+		 i32 -2147483647, label %bb338
+		 i32 -2147483646, label %bb338
+	         i32 120, label %bb338
+	         i32 121, label %bb339
+                 i32 122, label %bb340
+                 i32 123, label %bb341
+                 i32 124, label %bb342
+                 i32 125, label %bb343
+                 i32 126, label %bb336
+		 i32 1024, label %bb338
+                 i32 0, label %bb338
+                 i32 1, label %bb338
+                 i32 2, label %bb338
+                 i32 3, label %bb338
+                 i32 4, label %bb338
+		 i32 5, label %bb338
+        ]
+bb336:
+  ret i32 10
+bb338:
+  ret i32 11
+bb339:
+  ret i32 12
+bb340:
+  ret i32 13
+bb341:
+  ret i32 14
+bb342:
+  ret i32 15
+bb343:
+  ret i32 18
+
+}
diff --git a/final/test/CodeGen/Generic/switch-lower.ll b/final/test/CodeGen/Generic/switch-lower.ll
new file mode 100644
index 00000000000..1cefe82ce3d
--- /dev/null
+++ b/final/test/CodeGen/Generic/switch-lower.ll
@@ -0,0 +1,348 @@
+; RUN: llc < %s
+
+
+; PR5421
+define void @test1() {
+entry:
+  switch i128 undef, label %exit [
+    i128 55340232221128654848, label %exit
+    i128 92233720368547758080, label %exit
+    i128 73786976294838206464, label %exit
+    i128 147573952589676412928, label %exit
+  ]
+exit:
+  unreachable
+}
+
+
+; PR1197
+define void @test2() {
+entry:
+	br i1 false, label %cond_next954, label %cond_true924
+
+cond_true924:		; preds = %entry
+	ret void
+
+cond_next954:		; preds = %entry
+	switch i8 0, label %cleanup7419 [
+		 i8 1, label %bb956
+		 i8 2, label %bb1069
+		 i8 4, label %bb7328
+		 i8 5, label %bb1267
+		 i8 8, label %bb1348
+		 i8 9, label %bb7328
+		 i8 11, label %bb1439
+		 i8 12, label %bb1484
+		 i8 13, label %bb1706
+		 i8 14, label %bb1783
+		 i8 17, label %bb1925
+		 i8 18, label %bb1929
+		 i8 19, label %bb2240
+		 i8 25, label %bb2447
+		 i8 27, label %bb2480
+		 i8 29, label %bb2590
+		 i8 30, label %bb2594
+		 i8 31, label %bb2621
+		 i8 32, label %bb2664
+		 i8 33, label %bb2697
+		 i8 34, label %bb2735
+		 i8 37, label %bb2786
+		 i8 38, label %bb2849
+		 i8 39, label %bb3269
+		 i8 41, label %bb3303
+		 i8 42, label %bb3346
+		 i8 43, label %bb3391
+		 i8 44, label %bb3395
+		 i8 50, label %bb3673
+		 i8 52, label %bb3677
+		 i8 53, label %bb3693
+		 i8 54, label %bb7328
+		 i8 56, label %bb3758
+		 i8 57, label %bb3787
+		 i8 64, label %bb5019
+		 i8 68, label %cond_true4235
+		 i8 69, label %bb4325
+		 i8 70, label %bb4526
+		 i8 72, label %bb4618
+		 i8 73, label %bb4991
+		 i8 80, label %bb5012
+		 i8 82, label %bb5019
+		 i8 84, label %bb5518
+		 i8 86, label %bb5752
+		 i8 87, label %bb5953
+		 i8 89, label %bb6040
+		 i8 90, label %bb6132
+		 i8 92, label %bb6186
+		 i8 93, label %bb6151
+		 i8 94, label %bb6155
+		 i8 97, label %bb6355
+		 i8 98, label %bb5019
+		 i8 99, label %bb6401
+		 i8 101, label %bb5019
+		 i8 102, label %bb1484
+		 i8 104, label %bb7064
+		 i8 105, label %bb7068
+		 i8 106, label %bb7072
+		 i8 108, label %bb1065
+		 i8 109, label %bb1702
+		 i8 110, label %bb2200
+		 i8 111, label %bb2731
+		 i8 112, label %bb2782
+		 i8 113, label %bb2845
+		 i8 114, label %bb2875
+		 i8 115, label %bb3669
+		 i8 116, label %bb7316
+		 i8 117, label %bb7316
+		 i8 118, label %bb3875
+		 i8 119, label %bb4359
+		 i8 120, label %bb4987
+		 i8 121, label %bb5008
+		 i8 122, label %bb5786
+		 i8 123, label %bb6147
+		 i8 124, label %bb6916
+		 i8 125, label %bb6920
+		 i8 126, label %bb6955
+		 i8 127, label %bb6990
+		 i8 -128, label %bb7027
+		 i8 -127, label %bb3879
+		 i8 -126, label %bb4700
+		 i8 -125, label %bb7076
+		 i8 -124, label %bb2366
+		 i8 -123, label %bb2366
+		 i8 -122, label %bb5490
+	]
+
+bb956:		; preds = %cond_next954
+	ret void
+
+bb1065:		; preds = %cond_next954
+	ret void
+
+bb1069:		; preds = %cond_next954
+	ret void
+
+bb1267:		; preds = %cond_next954
+	ret void
+
+bb1348:		; preds = %cond_next954
+	ret void
+
+bb1439:		; preds = %cond_next954
+	ret void
+
+bb1484:		; preds = %cond_next954, %cond_next954
+	ret void
+
+bb1702:		; preds = %cond_next954
+	ret void
+
+bb1706:		; preds = %cond_next954
+	ret void
+
+bb1783:		; preds = %cond_next954
+	ret void
+
+bb1925:		; preds = %cond_next954
+	ret void
+
+bb1929:		; preds = %cond_next954
+	ret void
+
+bb2200:		; preds = %cond_next954
+	ret void
+
+bb2240:		; preds = %cond_next954
+	ret void
+
+bb2366:		; preds = %cond_next954, %cond_next954
+	ret void
+
+bb2447:		; preds = %cond_next954
+	ret void
+
+bb2480:		; preds = %cond_next954
+	ret void
+
+bb2590:		; preds = %cond_next954
+	ret void
+
+bb2594:		; preds = %cond_next954
+	ret void
+
+bb2621:		; preds = %cond_next954
+	ret void
+
+bb2664:		; preds = %cond_next954
+	ret void
+
+bb2697:		; preds = %cond_next954
+	ret void
+
+bb2731:		; preds = %cond_next954
+	ret void
+
+bb2735:		; preds = %cond_next954
+	ret void
+
+bb2782:		; preds = %cond_next954
+	ret void
+
+bb2786:		; preds = %cond_next954
+	ret void
+
+bb2845:		; preds = %cond_next954
+	ret void
+
+bb2849:		; preds = %cond_next954
+	ret void
+
+bb2875:		; preds = %cond_next954
+	ret void
+
+bb3269:		; preds = %cond_next954
+	ret void
+
+bb3303:		; preds = %cond_next954
+	ret void
+
+bb3346:		; preds = %cond_next954
+	ret void
+
+bb3391:		; preds = %cond_next954
+	ret void
+
+bb3395:		; preds = %cond_next954
+	ret void
+
+bb3669:		; preds = %cond_next954
+	ret void
+
+bb3673:		; preds = %cond_next954
+	ret void
+
+bb3677:		; preds = %cond_next954
+	ret void
+
+bb3693:		; preds = %cond_next954
+	ret void
+
+bb3758:		; preds = %cond_next954
+	ret void
+
+bb3787:		; preds = %cond_next954
+	ret void
+
+bb3875:		; preds = %cond_next954
+	ret void
+
+bb3879:		; preds = %cond_next954
+	ret void
+
+cond_true4235:		; preds = %cond_next954
+	ret void
+
+bb4325:		; preds = %cond_next954
+	ret void
+
+bb4359:		; preds = %cond_next954
+	ret void
+
+bb4526:		; preds = %cond_next954
+	ret void
+
+bb4618:		; preds = %cond_next954
+	ret void
+
+bb4700:		; preds = %cond_next954
+	ret void
+
+bb4987:		; preds = %cond_next954
+	ret void
+
+bb4991:		; preds = %cond_next954
+	ret void
+
+bb5008:		; preds = %cond_next954
+	ret void
+
+bb5012:		; preds = %cond_next954
+	ret void
+
+bb5019:		; preds = %cond_next954, %cond_next954, %cond_next954, %cond_next954
+	ret void
+
+bb5490:		; preds = %cond_next954
+	ret void
+
+bb5518:		; preds = %cond_next954
+	ret void
+
+bb5752:		; preds = %cond_next954
+	ret void
+
+bb5786:		; preds = %cond_next954
+	ret void
+
+bb5953:		; preds = %cond_next954
+	ret void
+
+bb6040:		; preds = %cond_next954
+	ret void
+
+bb6132:		; preds = %cond_next954
+	ret void
+
+bb6147:		; preds = %cond_next954
+	ret void
+
+bb6151:		; preds = %cond_next954
+	ret void
+
+bb6155:		; preds = %cond_next954
+	ret void
+
+bb6186:		; preds = %cond_next954
+	ret void
+
+bb6355:		; preds = %cond_next954
+	ret void
+
+bb6401:		; preds = %cond_next954
+	ret void
+
+bb6916:		; preds = %cond_next954
+	ret void
+
+bb6920:		; preds = %cond_next954
+	ret void
+
+bb6955:		; preds = %cond_next954
+	ret void
+
+bb6990:		; preds = %cond_next954
+	ret void
+
+bb7027:		; preds = %cond_next954
+	ret void
+
+bb7064:		; preds = %cond_next954
+	ret void
+
+bb7068:		; preds = %cond_next954
+	ret void
+
+bb7072:		; preds = %cond_next954
+	ret void
+
+bb7076:		; preds = %cond_next954
+	ret void
+
+bb7316:		; preds = %cond_next954, %cond_next954
+	ret void
+
+bb7328:		; preds = %cond_next954, %cond_next954, %cond_next954
+	ret void
+
+cleanup7419:		; preds = %cond_next954
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/trap.ll b/final/test/CodeGen/Generic/trap.ll
new file mode 100644
index 00000000000..67d1a7a347f
--- /dev/null
+++ b/final/test/CodeGen/Generic/trap.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s
+define i32 @test() noreturn nounwind  {
+entry:
+	tail call void @llvm.trap( )
+	unreachable
+}
+
+declare void @llvm.trap() nounwind 
+
diff --git a/final/test/CodeGen/Generic/v-split.ll b/final/test/CodeGen/Generic/v-split.ll
new file mode 100644
index 00000000000..634b5621aa9
--- /dev/null
+++ b/final/test/CodeGen/Generic/v-split.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s
+%f8 = type <8 x float>
+
+define void @test_f8(%f8 *%P, %f8* %Q, %f8 *%S) {
+  %p = load %f8* %P
+  %q = load %f8* %Q
+  %R = fadd %f8 %p, %q
+  store %f8 %R, %f8 *%S
+  ret void
+}
+
diff --git a/final/test/CodeGen/Generic/vector-casts.ll b/final/test/CodeGen/Generic/vector-casts.ll
new file mode 100644
index 00000000000..a26918b8f24
--- /dev/null
+++ b/final/test/CodeGen/Generic/vector-casts.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s
+; PR2671
+
+define void @a(<2 x double>* %p, <2 x i8>* %q) {
+        %t = load <2 x double>* %p
+	%r = fptosi <2 x double> %t to <2 x i8>
+        store <2 x i8> %r, <2 x i8>* %q
+	ret void
+}
+define void @b(<2 x double>* %p, <2 x i8>* %q) {
+        %t = load <2 x double>* %p
+	%r = fptoui <2 x double> %t to <2 x i8>
+        store <2 x i8> %r, <2 x i8>* %q
+	ret void
+}
+define void @c(<2 x i8>* %p, <2 x double>* %q) {
+        %t = load <2 x i8>* %p
+	%r = sitofp <2 x i8> %t to <2 x double>
+        store <2 x double> %r, <2 x double>* %q
+	ret void
+}
+define void @d(<2 x i8>* %p, <2 x double>* %q) {
+        %t = load <2 x i8>* %p
+	%r = uitofp <2 x i8> %t to <2 x double>
+        store <2 x double> %r, <2 x double>* %q
+	ret void
+}
+define void @e(<2 x i8>* %p, <2 x i16>* %q) {
+        %t = load <2 x i8>* %p
+	%r = sext <2 x i8> %t to <2 x i16>
+        store <2 x i16> %r, <2 x i16>* %q
+	ret void
+}
+define void @f(<2 x i8>* %p, <2 x i16>* %q) {
+        %t = load <2 x i8>* %p
+	%r = zext <2 x i8> %t to <2 x i16>
+        store <2 x i16> %r, <2 x i16>* %q
+	ret void
+}
+define void @g(<2 x i16>* %p, <2 x i8>* %q) {
+        %t = load <2 x i16>* %p
+	%r = trunc <2 x i16> %t to <2 x i8>
+        store <2 x i8> %r, <2 x i8>* %q
+	ret void
+}
diff --git a/final/test/CodeGen/Generic/vector-constantexpr.ll b/final/test/CodeGen/Generic/vector-constantexpr.ll
new file mode 100644
index 00000000000..d8e0258221c
--- /dev/null
+++ b/final/test/CodeGen/Generic/vector-constantexpr.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s
+	
+define void @""(float* %inregs, float* %outregs) {
+        %a_addr.i = alloca <4 x float>          ; <<4 x float>*> [#uses=1]
+        store <4 x float> < float undef, float undef, float undef, float undef >, <4 x float>* %a_addr.i
+        ret void
+}
diff --git a/final/test/CodeGen/Generic/vector-identity-shuffle.ll b/final/test/CodeGen/Generic/vector-identity-shuffle.ll
new file mode 100644
index 00000000000..332d6d8c253
--- /dev/null
+++ b/final/test/CodeGen/Generic/vector-identity-shuffle.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s 
+
+
+define void @test(<4 x float>* %tmp2.i) {
+        %tmp2.i.upgrd.1 = load <4 x float>* %tmp2.i             ; <<4 x float>> [#uses=4]
+        %xFloat0.48 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 0         ; <float> [#uses=1]
+        %inFloat0.49 = insertelement <4 x float> undef, float %xFloat0.48, i32 0                ; <<4 x float>> [#uses=1]
+        %xFloat1.50 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 1         ; <float> [#uses=1]
+        %inFloat1.52 = insertelement <4 x float> %inFloat0.49, float %xFloat1.50, i32 1         ; <<4 x float>> [#uses=1]
+        %xFloat2.53 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 2         ; <float> [#uses=1]
+        %inFloat2.55 = insertelement <4 x float> %inFloat1.52, float %xFloat2.53, i32 2         ; <<4 x float>> [#uses=1]
+        %xFloat3.56 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 3         ; <float> [#uses=1]
+        %inFloat3.58 = insertelement <4 x float> %inFloat2.55, float %xFloat3.56, i32 3         ; <<4 x float>> [#uses=1]
+        store <4 x float> %inFloat3.58, <4 x float>* %tmp2.i
+        ret void
+}
+
diff --git a/final/test/CodeGen/Generic/vector.ll b/final/test/CodeGen/Generic/vector.ll
new file mode 100644
index 00000000000..a0f9a02d4cb
--- /dev/null
+++ b/final/test/CodeGen/Generic/vector.ll
@@ -0,0 +1,154 @@
+; Test that vectors are scalarized/lowered correctly.
+; RUN: llc < %s
+
+
+%d8 = type <8 x double>
+%f1 = type <1 x float>
+%f2 = type <2 x float>
+%f4 = type <4 x float>
+%f8 = type <8 x float>
+%i4 = type <4 x i32>
+
+;;; TEST HANDLING OF VARIOUS VECTOR SIZES
+
+define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
+	%p = load %f1* %P		; <%f1> [#uses=1]
+	%q = load %f1* %Q		; <%f1> [#uses=1]
+	%R = fadd %f1 %p, %q		; <%f1> [#uses=1]
+	store %f1 %R, %f1* %S
+	ret void
+}
+
+define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
+	%p = load %f2* %P		; <%f2> [#uses=1]
+	%q = load %f2* %Q		; <%f2> [#uses=1]
+	%R = fadd %f2 %p, %q		; <%f2> [#uses=1]
+	store %f2 %R, %f2* %S
+	ret void
+}
+
+define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
+	%p = load %f4* %P		; <%f4> [#uses=1]
+	%q = load %f4* %Q		; <%f4> [#uses=1]
+	%R = fadd %f4 %p, %q		; <%f4> [#uses=1]
+	store %f4 %R, %f4* %S
+	ret void
+}
+
+define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
+	%p = load %f8* %P		; <%f8> [#uses=1]
+	%q = load %f8* %Q		; <%f8> [#uses=1]
+	%R = fadd %f8 %p, %q		; <%f8> [#uses=1]
+	store %f8 %R, %f8* %S
+	ret void
+}
+
+define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
+	%p = load %f8* %P		; <%f8> [#uses=1]
+	%q = load %f8* %Q		; <%f8> [#uses=1]
+	%R = fmul %f8 %p, %q		; <%f8> [#uses=1]
+	store %f8 %R, %f8* %S
+	ret void
+}
+
+define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
+	%p = load %f8* %P		; <%f8> [#uses=1]
+	%q = load %f8* %Q		; <%f8> [#uses=1]
+	%R = fdiv %f8 %p, %q		; <%f8> [#uses=1]
+	store %f8 %R, %f8* %S
+	ret void
+}
+
+;;; TEST VECTOR CONSTRUCTS
+
+
+define void @test_cst(%f4* %P, %f4* %S) {
+	%p = load %f4* %P		; <%f4> [#uses=1]
+	%R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 >		; <%f4> [#uses=1]
+	store %f4 %R, %f4* %S
+	ret void
+}
+
+define void @test_zero(%f4* %P, %f4* %S) {
+	%p = load %f4* %P		; <%f4> [#uses=1]
+	%R = fadd %f4 %p, zeroinitializer		; <%f4> [#uses=1]
+	store %f4 %R, %f4* %S
+	ret void
+}
+
+define void @test_undef(%f4* %P, %f4* %S) {
+	%p = load %f4* %P		; <%f4> [#uses=1]
+	%R = fadd %f4 %p, undef		; <%f4> [#uses=1]
+	store %f4 %R, %f4* %S
+	ret void
+}
+
+define void @test_constant_insert(%f4* %S) {
+	%R = insertelement %f4 zeroinitializer, float 1.000000e+01, i32 0		; <%f4> [#uses=1]
+	store %f4 %R, %f4* %S
+	ret void
+}
+
+define void @test_variable_buildvector(float %F, %f4* %S) {
+	%R = insertelement %f4 zeroinitializer, float %F, i32 0		; <%f4> [#uses=1]
+	store %f4 %R, %f4* %S
+	ret void
+}
+
+define void @test_scalar_to_vector(float %F, %f4* %S) {
+	%R = insertelement %f4 undef, float %F, i32 0		; <%f4> [#uses=1]
+	store %f4 %R, %f4* %S
+	ret void
+}
+
+define float @test_extract_elt(%f8* %P) {
+	%p = load %f8* %P		; <%f8> [#uses=1]
+	%R = extractelement %f8 %p, i32 3		; <float> [#uses=1]
+	ret float %R
+}
+
+define double @test_extract_elt2(%d8* %P) {
+	%p = load %d8* %P		; <%d8> [#uses=1]
+	%R = extractelement %d8 %p, i32 3		; <double> [#uses=1]
+	ret double %R
+}
+
+define void @test_cast_1(%f4* %b, %i4* %a) {
+	%tmp = load %f4* %b		; <%f4> [#uses=1]
+	%tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >		; <%f4> [#uses=1]
+	%tmp3 = bitcast %f4 %tmp2 to %i4		; <%i4> [#uses=1]
+	%tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 >		; <%i4> [#uses=1]
+	store %i4 %tmp4, %i4* %a
+	ret void
+}
+
+define void @test_cast_2(%f8* %a, <8 x i32>* %b) {
+	%T = load %f8* %a		; <%f8> [#uses=1]
+	%T2 = bitcast %f8 %T to <8 x i32>		; <<8 x i32>> [#uses=1]
+	store <8 x i32> %T2, <8 x i32>* %b
+	ret void
+}
+
+;;; TEST IMPORTANT IDIOMS
+
+define void @splat(%f4* %P, %f4* %Q, float %X) {
+	%tmp = insertelement %f4 undef, float %X, i32 0		; <%f4> [#uses=1]
+	%tmp2 = insertelement %f4 %tmp, float %X, i32 1		; <%f4> [#uses=1]
+	%tmp4 = insertelement %f4 %tmp2, float %X, i32 2		; <%f4> [#uses=1]
+	%tmp6 = insertelement %f4 %tmp4, float %X, i32 3		; <%f4> [#uses=1]
+	%q = load %f4* %Q		; <%f4> [#uses=1]
+	%R = fadd %f4 %q, %tmp6		; <%f4> [#uses=1]
+	store %f4 %R, %f4* %P
+	ret void
+}
+
+define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {
+	%tmp = insertelement %i4 undef, i32 %X, i32 0		; <%i4> [#uses=1]
+	%tmp2 = insertelement %i4 %tmp, i32 %X, i32 1		; <%i4> [#uses=1]
+	%tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2		; <%i4> [#uses=1]
+	%tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3		; <%i4> [#uses=1]
+	%q = load %i4* %Q		; <%i4> [#uses=1]
+	%R = add %i4 %q, %tmp6		; <%i4> [#uses=1]
+	store %i4 %R, %i4* %P
+	ret void
+}
diff --git a/final/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll b/final/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 00000000000..d8970eac900
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=mblaze -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/final/test/CodeGen/MBlaze/brind.ll b/final/test/CodeGen/MBlaze/brind.ll
new file mode 100644
index 00000000000..2229a873827
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/brind.ll
@@ -0,0 +1,72 @@
+; Ensure that the select instruction is supported and is lowered to 
+; some sort of branch instruction.
+;
+; RUN: llc < %s -march=mblaze -mattr=+mul,+fpu,+barrel | FileCheck %s
+
+declare i32 @printf(i8*, ...)
+@MSG = internal constant [13 x i8] c"Message: %d\0A\00"
+
+@BLKS = private constant [5 x i8*]
+    [ i8* blockaddress(@brind, %L1),
+      i8* blockaddress(@brind, %L2),
+      i8* blockaddress(@brind, %L3),
+      i8* blockaddress(@brind, %L4),
+      i8* blockaddress(@brind, %L5) ]
+
+define i32 @brind(i32 %a, i32 %b)
+{
+    ; CHECK:        brind:
+entry:
+    br label %loop
+
+loop:
+    %tmp.0 = phi i32 [ 0, %entry ], [ %tmp.8, %finish ]
+    %dst.0 = getelementptr [5 x i8*]* @BLKS, i32 0, i32 %tmp.0
+    %dst.1 = load i8** %dst.0
+    indirectbr i8* %dst.1, [ label %L1,
+                             label %L2,
+                             label %L3,
+                             label %L4,
+                             label %L5 ]
+    ; CHECK:        brad {{r[0-9]*}}
+
+L1:
+    %tmp.1 = add i32 %a, %b
+    br label %finish
+    ; CHECK:        brid
+
+L2:
+    %tmp.2 = sub i32 %a, %b
+    br label %finish
+    ; CHECK:        brid
+
+L3:
+    %tmp.3 = mul i32 %a, %b
+    br label %finish
+    ; CHECK:        brid
+
+L4:
+    %tmp.4 = sdiv i32 %a, %b
+    br label %finish
+    ; CHECK:        brid
+
+L5:
+    %tmp.5 = srem i32 %a, %b
+    br label %finish
+
+finish:
+    %tmp.6 = phi i32 [ %tmp.1, %L1 ],
+                     [ %tmp.2, %L2 ],
+                     [ %tmp.3, %L3 ],
+                     [ %tmp.4, %L4 ],
+                     [ %tmp.5, %L5 ]
+
+    call i32 (i8*,...)* @printf( i8* getelementptr([13 x i8]* @MSG,i32 0,i32 0),
+                                 i32 %tmp.6)
+
+    %tmp.7 = add i32 %tmp.0, 1
+    %tmp.8 = urem i32 %tmp.7, 5
+
+    br label %loop
+    ; CHECK:        brad {{r[0-9]*}}
+}
diff --git a/final/test/CodeGen/MBlaze/callind.ll b/final/test/CodeGen/MBlaze/callind.ll
new file mode 100644
index 00000000000..bfc8d001fd5
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/callind.ll
@@ -0,0 +1,80 @@
+; Ensure that indirect calls work and that they are lowered to some
+; sort of branch and link instruction.
+;
+; RUN: llc < %s -march=mblaze -mattr=+mul,+fpu,+barrel | FileCheck %s
+
+declare i32 @printf(i8*, ...)
+@MSG = internal constant [13 x i8] c"Message: %d\0A\00"
+
+@FUNS = private constant [5 x i32 (i32,i32)*]
+    [ i32 (i32,i32)* @doadd,
+      i32 (i32,i32)* @dosub,
+      i32 (i32,i32)* @domul,
+      i32 (i32,i32)* @dodiv,
+      i32 (i32,i32)* @dorem ]
+
+define i32 @doadd(i32 %a, i32 %b)
+{
+    ; CHECK:        doadd:
+    %tmp.0 = add i32 %a, %b
+    ret i32 %tmp.0
+    ; CHECK:        rtsd
+}
+
+define i32 @dosub(i32 %a, i32 %b)
+{
+    ; CHECK:        dosub:
+    %tmp.0 = sub i32 %a, %b
+    ret i32 %tmp.0
+    ; CHECK:        rtsd
+}
+
+define i32 @domul(i32 %a, i32 %b)
+{
+    ; CHECK:        domul:
+    %tmp.0 = mul i32 %a, %b
+    ret i32 %tmp.0
+    ; CHECK:        rtsd
+}
+
+define i32 @dodiv(i32 %a, i32 %b)
+{
+    ; CHECK:        dodiv:
+    %tmp.0 = sdiv i32 %a, %b
+    ret i32 %tmp.0
+    ; CHECK:        rtsd
+}
+
+define i32 @dorem(i32 %a, i32 %b)
+{
+    ; CHECK:        dorem:
+    %tmp.0 = srem i32 %a, %b
+    ret i32 %tmp.0
+    ; CHECK:        rtsd
+}
+
+define i32 @callind(i32 %a, i32 %b)
+{
+    ; CHECK:        callind:
+entry:
+    br label %loop
+
+loop:
+    %tmp.0 = phi i32 [ 0, %entry ], [ %tmp.3, %loop ]
+    %dst.0 = getelementptr [5 x i32 (i32,i32)*]* @FUNS, i32 0, i32 %tmp.0
+    %dst.1 = load i32 (i32,i32)** %dst.0
+    %tmp.1 = call i32 %dst.1(i32 %a, i32 %b)
+    ; CHECK-NOT:    brli
+    ; CHECK-NOT:    brlai
+    ; CHECK:        brl
+
+    call i32 (i8*,...)* @printf( i8* getelementptr([13 x i8]* @MSG,i32 0,i32 0),
+                                 i32 %tmp.1)
+    ; CHECK:        brl
+
+    %tmp.2 = add i32 %tmp.0, 1
+    %tmp.3 = urem i32 %tmp.2, 5
+
+    br label %loop
+    ; CHECK:        br
+}
diff --git a/final/test/CodeGen/MBlaze/cc.ll b/final/test/CodeGen/MBlaze/cc.ll
new file mode 100644
index 00000000000..b1eb22aee9f
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/cc.ll
@@ -0,0 +1,266 @@
+; Test some of the calling convention lowering done by the MBlaze backend.
+; We test that integer values are passed in the correct registers and
+; returned in the correct registers. Additionally, we test that the stack
+; is used as appropriate for passing arguments that cannot be placed into
+; registers.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+declare i32 @printf(i8*, ...)
+@MSG = internal constant [13 x i8] c"Message: %d\0A\00"
+
+define void @params0_noret() {
+    ; CHECK:        params0_noret:
+    ret void
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i8 @params0_8bitret() {
+    ; CHECK:        params0_8bitret:
+    ret i8 1
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r0, 1}}
+}
+
+define i16 @params0_16bitret() {
+    ; CHECK:        params0_16bitret:
+    ret i16 1
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r0, 1}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+}
+
+define i32 @params0_32bitret() {
+    ; CHECK:        params0_32bitret:
+    ret i32 1
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r0, 1}}
+}
+
+define i64 @params0_64bitret() {
+    ; CHECK:        params0_64bitret:
+    ret i64 1
+    ; CHECK:        {{.* r3, r0, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r4, r0, 1}}
+}
+
+define i32 @params1_32bitret(i32 %a) {
+    ; CHECK:        params1_32bitret:
+    ret i32 %a
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r5, r0}}
+}
+
+define i32 @params2_32bitret(i32 %a, i32 %b) {
+    ; CHECK:        params2_32bitret:
+    ret i32 %b
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r6, r0}}
+}
+
+define i32 @params3_32bitret(i32 %a, i32 %b, i32 %c) {
+    ; CHECK:        params3_32bitret:
+    ret i32 %c
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r7, r0}}
+}
+
+define i32 @params4_32bitret(i32 %a, i32 %b, i32 %c, i32 %d) {
+    ; CHECK:        params4_32bitret:
+    ret i32 %d
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r8, r0}}
+}
+
+define i32 @params5_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+    ; CHECK:        params5_32bitret:
+    ret i32 %e
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r9, r0}}
+}
+
+define i32 @params6_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
+    ; CHECK:        params6_32bitret:
+    ret i32 %f
+    ; CHECK-NOT:    {{.* r3, .*, .*}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+    ; CHECK:        {{.* r3, r10, r0}}
+}
+
+define i32 @params7_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
+                             i32 %g) {
+    ; CHECK:        params7_32bitret:
+    ret i32 %g
+    ; CHECK:        {{lwi? r3, r1, 32}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params8_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
+                             i32 %g, i32 %h) {
+    ; CHECK:        params8_32bitret:
+    ret i32 %h
+    ; CHECK:        {{lwi? r3, r1, 36}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params9_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
+                             i32 %g, i32 %h, i32 %i) {
+    ; CHECK:        params9_32bitret:
+    ret i32 %i
+    ; CHECK:        {{lwi? r3, r1, 40}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define i32 @params10_32bitret(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
+                              i32 %g, i32 %h, i32 %i, i32 %j) {
+    ; CHECK:        params10_32bitret:
+    ret i32 %j
+    ; CHECK:        {{lwi? r3, r1, 44}}
+    ; CHECK-NOT:    {{.* r4, .*, .*}}
+    ; CHECK:        rtsd
+}
+
+define void @testing() {
+    %MSG.1 = getelementptr [13 x i8]* @MSG, i32 0, i32 0
+
+    call void @params0_noret()
+    ; CHECK:        brlid
+
+    %tmp.1 = call i8 @params0_8bitret()
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i8 %tmp.1)
+
+    %tmp.2 = call i16 @params0_16bitret()
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i16 %tmp.2)
+
+    %tmp.3 = call i32 @params0_32bitret()
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.3)
+
+    %tmp.4 = call i64 @params0_64bitret()
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i64 %tmp.4)
+
+    %tmp.5 = call i32 @params1_32bitret(i32 1)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.5)
+
+    %tmp.6 = call i32 @params2_32bitret(i32 1, i32 2)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.6)
+
+    %tmp.7 = call i32 @params3_32bitret(i32 1, i32 2, i32 3)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.7)
+
+    %tmp.8 = call i32 @params4_32bitret(i32 1, i32 2, i32 3, i32 4)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.8)
+
+    %tmp.9 = call i32 @params5_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.9)
+
+    %tmp.10 = call i32 @params6_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                         i32 6)
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        {{.* r10, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.10)
+
+    %tmp.11 = call i32 @params7_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                         i32 6, i32 7)
+    ; CHECK:        {{swi? .*, r1, 28}}
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        {{.* r10, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.11)
+
+    %tmp.12 = call i32 @params8_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                         i32 6, i32 7, i32 8)
+    ; CHECK:        {{swi? .*, r1, 28}}
+    ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        {{.* r10, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.12)
+
+    %tmp.13 = call i32 @params9_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                         i32 6, i32 7, i32 8, i32 9)
+    ; CHECK:        {{swi? .*, r1, 28}}
+    ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{swi? .*, r1, 36}}
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        {{.* r10, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.13)
+
+    %tmp.14 = call i32 @params10_32bitret(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                          i32 6, i32 7, i32 8, i32 9, i32 10)
+    ; CHECK:        {{swi? .*, r1, 28}}
+    ; CHECK:        {{swi? .*, r1, 32}}
+    ; CHECK:        {{swi? .*, r1, 36}}
+    ; CHECK:        {{swi? .*, r1, 40}}
+    ; CHECK:        {{.* r5, .*, .*}}
+    ; CHECK:        {{.* r6, .*, .*}}
+    ; CHECK:        {{.* r7, .*, .*}}
+    ; CHECK:        {{.* r8, .*, .*}}
+    ; CHECK:        {{.* r9, .*, .*}}
+    ; CHECK:        {{.* r10, .*, .*}}
+    ; CHECK:        brlid
+    call i32 (i8*,...)* @printf(i8* %MSG.1, i32 %tmp.14)
+
+    ret void
+}
diff --git a/final/test/CodeGen/MBlaze/dg.exp b/final/test/CodeGen/MBlaze/dg.exp
new file mode 100644
index 00000000000..bfd5e471574
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target MBlaze] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/MBlaze/div.ll b/final/test/CodeGen/MBlaze/div.ll
new file mode 100644
index 00000000000..fae9830619d
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/div.ll
@@ -0,0 +1,75 @@
+; Ensure that multiplication is lowered to function calls when the multiplier
+; unit is not available in the hardware and that function calls are not used
+; when the multiplier unit is available in the hardware.
+;
+; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
+; RUN: llc < %s -march=mblaze -mattr=+div | FileCheck -check-prefix=DIV %s
+
+define i8 @test_i8(i8 %a, i8 %b) {
+    ; FUN:        test_i8:
+    ; DIV:        test_i8:
+
+    %tmp.1 = udiv i8 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV:        idivu
+
+    %tmp.2 = sdiv i8 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV-NOT:    idivu
+    ; DIV:        idiv
+
+    %tmp.3 = add i8 %tmp.1, %tmp.2
+    ret i8 %tmp.3
+    ; FUN:        rtsd
+    ; DIV:        rtsd
+}
+
+define i16 @test_i16(i16 %a, i16 %b) {
+    ; FUN:        test_i16:
+    ; DIV:        test_i16:
+
+    %tmp.1 = udiv i16 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV:        idivu
+
+    %tmp.2 = sdiv i16 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV-NOT:    idivu
+    ; DIV:        idiv
+
+    %tmp.3 = add i16 %tmp.1, %tmp.2
+    ret i16 %tmp.3
+    ; FUN:        rtsd
+    ; DIV:        rtsd
+}
+
+define i32 @test_i32(i32 %a, i32 %b) {
+    ; FUN:        test_i32:
+    ; DIV:        test_i32:
+
+    %tmp.1 = udiv i32 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV:        idivu
+
+    %tmp.2 = sdiv i32 %a, %b
+    ; FUN-NOT:    idiv
+    ; FUN:        brlid
+    ; DIV-NOT:    brlid
+    ; DIV-NOT:    idivu
+    ; DIV:        idiv
+
+    %tmp.3 = add i32 %tmp.1, %tmp.2
+    ret i32 %tmp.3
+    ; FUN:        rtsd
+    ; DIV:        rtsd
+}
diff --git a/final/test/CodeGen/MBlaze/fpu.ll b/final/test/CodeGen/MBlaze/fpu.ll
new file mode 100644
index 00000000000..2aef4fd6410
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/fpu.ll
@@ -0,0 +1,66 @@
+; Ensure that floating point operations are lowered to function calls when the
+; FPU is not available in the hardware and that function calls are not used
+; when the FPU is available in the hardware.
+;
+; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
+; RUN: llc < %s -march=mblaze -mattr=+fpu | FileCheck -check-prefix=FPU %s
+
+define float @test_add(float %a, float %b) {
+    ; FUN:        test_add:
+    ; FPU:        test_add:
+
+    %tmp.1 = fadd float %a, %b
+    ; FUN:        brlid
+    ; FPU-NOT:    brlid
+
+    ret float %tmp.1
+    ; FUN:        rtsd
+    ; FPU:        rtsd
+    ; FUN-NOT:    fadd
+    ; FPU-NEXT:   fadd
+}
+
+define float @test_sub(float %a, float %b) {
+    ; FUN:        test_sub:
+    ; FPU:        test_sub:
+
+    %tmp.1 = fsub float %a, %b
+    ; FUN:        brlid
+    ; FPU-NOT:    brlid
+
+    ret float %tmp.1
+    ; FUN:        rtsd
+    ; FPU:        rtsd
+    ; FUN-NOT:    frsub
+    ; FPU-NEXT:   frsub
+}
+
+define float @test_mul(float %a, float %b) {
+    ; FUN:        test_mul:
+    ; FPU:        test_mul:
+
+    %tmp.1 = fmul float %a, %b
+    ; FUN:        brlid
+    ; FPU-NOT:    brlid
+
+    ret float %tmp.1
+    ; FUN:        rtsd
+    ; FPU:        rtsd
+    ; FUN-NOT:    fmul
+    ; FPU-NEXT:   fmul
+}
+
+define float @test_div(float %a, float %b) {
+    ; FUN:        test_div:
+    ; FPU:        test_div:
+
+    %tmp.1 = fdiv float %a, %b
+    ; FUN:        brlid
+    ; FPU-NOT:    brlid
+
+    ret float %tmp.1
+    ; FUN:        rtsd
+    ; FPU:        rtsd
+    ; FUN-NOT:    fdiv
+    ; FPU-NEXT:   fdiv
+}
diff --git a/final/test/CodeGen/MBlaze/fsl.ll b/final/test/CodeGen/MBlaze/fsl.ll
new file mode 100644
index 00000000000..f9c6205bc19
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/fsl.ll
@@ -0,0 +1,323 @@
+; Ensure that the FSL instrinsic instruction generate single FSL instructions
+; at the machine level. Additionally, ensure that dynamic values use the
+; dynamic version of the instructions and that constant values use the
+; constant version of the instructions.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+declare i32 @llvm.mblaze.fsl.get(i32 %port)
+declare i32 @llvm.mblaze.fsl.aget(i32 %port)
+declare i32 @llvm.mblaze.fsl.cget(i32 %port)
+declare i32 @llvm.mblaze.fsl.caget(i32 %port)
+declare i32 @llvm.mblaze.fsl.eget(i32 %port)
+declare i32 @llvm.mblaze.fsl.eaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.ecget(i32 %port)
+declare i32 @llvm.mblaze.fsl.ecaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.nget(i32 %port)
+declare i32 @llvm.mblaze.fsl.naget(i32 %port)
+declare i32 @llvm.mblaze.fsl.ncget(i32 %port)
+declare i32 @llvm.mblaze.fsl.ncaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.neget(i32 %port)
+declare i32 @llvm.mblaze.fsl.neaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.necget(i32 %port)
+declare i32 @llvm.mblaze.fsl.necaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tget(i32 %port)
+declare i32 @llvm.mblaze.fsl.taget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tcget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tcaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.teget(i32 %port)
+declare i32 @llvm.mblaze.fsl.teaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tecget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tecaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tnget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tnaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tncget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tncaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tneget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tneaget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tnecget(i32 %port)
+declare i32 @llvm.mblaze.fsl.tnecaget(i32 %port)
+
+declare void @llvm.mblaze.fsl.put(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.aput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.cput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.caput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.nput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.naput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.ncput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.ncaput(i32 %value, i32 %port)
+declare void @llvm.mblaze.fsl.tput(i32 %port)
+declare void @llvm.mblaze.fsl.taput(i32 %port)
+declare void @llvm.mblaze.fsl.tcput(i32 %port)
+declare void @llvm.mblaze.fsl.tcaput(i32 %port)
+declare void @llvm.mblaze.fsl.tnput(i32 %port)
+declare void @llvm.mblaze.fsl.tnaput(i32 %port)
+declare void @llvm.mblaze.fsl.tncput(i32 %port)
+declare void @llvm.mblaze.fsl.tncaput(i32 %port)
+
+define i32 @fsl_get(i32 %port)
+{
+    ; CHECK:        fsl_get:
+    %v0  = call i32 @llvm.mblaze.fsl.get(i32 %port)
+    ; CHECK:        getd
+    %v1  = call i32 @llvm.mblaze.fsl.aget(i32 %port)
+    ; CHECK-NEXT:   agetd
+    %v2  = call i32 @llvm.mblaze.fsl.cget(i32 %port)
+    ; CHECK-NEXT:   cgetd
+    %v3  = call i32 @llvm.mblaze.fsl.caget(i32 %port)
+    ; CHECK-NEXT:   cagetd
+    %v4  = call i32 @llvm.mblaze.fsl.eget(i32 %port)
+    ; CHECK-NEXT:   egetd
+    %v5  = call i32 @llvm.mblaze.fsl.eaget(i32 %port)
+    ; CHECK-NEXT:   eagetd
+    %v6  = call i32 @llvm.mblaze.fsl.ecget(i32 %port)
+    ; CHECK-NEXT:   ecgetd
+    %v7  = call i32 @llvm.mblaze.fsl.ecaget(i32 %port)
+    ; CHECK-NEXT:   ecagetd
+    %v8  = call i32 @llvm.mblaze.fsl.nget(i32 %port)
+    ; CHECK-NEXT:   ngetd
+    %v9  = call i32 @llvm.mblaze.fsl.naget(i32 %port)
+    ; CHECK-NEXT:   nagetd
+    %v10 = call i32 @llvm.mblaze.fsl.ncget(i32 %port)
+    ; CHECK-NEXT:   ncgetd
+    %v11 = call i32 @llvm.mblaze.fsl.ncaget(i32 %port)
+    ; CHECK-NEXT:   ncagetd
+    %v12 = call i32 @llvm.mblaze.fsl.neget(i32 %port)
+    ; CHECK-NEXT:   negetd
+    %v13 = call i32 @llvm.mblaze.fsl.neaget(i32 %port)
+    ; CHECK-NEXT:   neagetd
+    %v14 = call i32 @llvm.mblaze.fsl.necget(i32 %port)
+    ; CHECK-NEXT:   necgetd
+    %v15 = call i32 @llvm.mblaze.fsl.necaget(i32 %port)
+    ; CHECK-NEXT:   necagetd
+    %v16 = call i32 @llvm.mblaze.fsl.tget(i32 %port)
+    ; CHECK-NEXT:   tgetd
+    %v17 = call i32 @llvm.mblaze.fsl.taget(i32 %port)
+    ; CHECK-NEXT:   tagetd
+    %v18 = call i32 @llvm.mblaze.fsl.tcget(i32 %port)
+    ; CHECK-NEXT:   tcgetd
+    %v19 = call i32 @llvm.mblaze.fsl.tcaget(i32 %port)
+    ; CHECK-NEXT:   tcagetd
+    %v20 = call i32 @llvm.mblaze.fsl.teget(i32 %port)
+    ; CHECK-NEXT:   tegetd
+    %v21 = call i32 @llvm.mblaze.fsl.teaget(i32 %port)
+    ; CHECK-NEXT:   teagetd
+    %v22 = call i32 @llvm.mblaze.fsl.tecget(i32 %port)
+    ; CHECK-NEXT:   tecgetd
+    %v23 = call i32 @llvm.mblaze.fsl.tecaget(i32 %port)
+    ; CHECK-NEXT:   tecagetd
+    %v24 = call i32 @llvm.mblaze.fsl.tnget(i32 %port)
+    ; CHECK-NEXT:   tngetd
+    %v25 = call i32 @llvm.mblaze.fsl.tnaget(i32 %port)
+    ; CHECK-NEXT:   tnagetd
+    %v26 = call i32 @llvm.mblaze.fsl.tncget(i32 %port)
+    ; CHECK-NEXT:   tncgetd
+    %v27 = call i32 @llvm.mblaze.fsl.tncaget(i32 %port)
+    ; CHECK-NEXT:   tncagetd
+    %v28 = call i32 @llvm.mblaze.fsl.tneget(i32 %port)
+    ; CHECK-NEXT:   tnegetd
+    %v29 = call i32 @llvm.mblaze.fsl.tneaget(i32 %port)
+    ; CHECK-NEXT:   tneagetd
+    %v30 = call i32 @llvm.mblaze.fsl.tnecget(i32 %port)
+    ; CHECK-NEXT:   tnecgetd
+    %v31 = call i32 @llvm.mblaze.fsl.tnecaget(i32 %port)
+    ; CHECK-NEXT:   tnecagetd
+    ret i32 1
+    ; CHECK:        rtsd
+}
+
+define i32 @fslc_get()
+{
+    ; CHECK:        fslc_get:
+    %v0  = call i32 @llvm.mblaze.fsl.get(i32 1)
+    ; CHECK:        get
+    %v1  = call i32 @llvm.mblaze.fsl.aget(i32 1)
+    ; CHECK-NOT:    agetd
+    ; CHECK:        aget
+    %v2  = call i32 @llvm.mblaze.fsl.cget(i32 1)
+    ; CHECK-NOT:    cgetd
+    ; CHECK:        cget
+    %v3  = call i32 @llvm.mblaze.fsl.caget(i32 1)
+    ; CHECK-NOT:    cagetd
+    ; CHECK:        caget
+    %v4  = call i32 @llvm.mblaze.fsl.eget(i32 1)
+    ; CHECK-NOT:    egetd
+    ; CHECK:        eget
+    %v5  = call i32 @llvm.mblaze.fsl.eaget(i32 1)
+    ; CHECK-NOT:    eagetd
+    ; CHECK:        eaget
+    %v6  = call i32 @llvm.mblaze.fsl.ecget(i32 1)
+    ; CHECK-NOT:    ecgetd
+    ; CHECK:        ecget
+    %v7  = call i32 @llvm.mblaze.fsl.ecaget(i32 1)
+    ; CHECK-NOT:    ecagetd
+    ; CHECK:        ecaget
+    %v8  = call i32 @llvm.mblaze.fsl.nget(i32 1)
+    ; CHECK-NOT:    ngetd
+    ; CHECK:        nget
+    %v9  = call i32 @llvm.mblaze.fsl.naget(i32 1)
+    ; CHECK-NOT:    nagetd
+    ; CHECK:        naget
+    %v10 = call i32 @llvm.mblaze.fsl.ncget(i32 1)
+    ; CHECK-NOT:    ncgetd
+    ; CHECK:        ncget
+    %v11 = call i32 @llvm.mblaze.fsl.ncaget(i32 1)
+    ; CHECK-NOT:    ncagetd
+    ; CHECK:        ncaget
+    %v12 = call i32 @llvm.mblaze.fsl.neget(i32 1)
+    ; CHECK-NOT:    negetd
+    ; CHECK:        neget
+    %v13 = call i32 @llvm.mblaze.fsl.neaget(i32 1)
+    ; CHECK-NOT:    neagetd
+    ; CHECK:        neaget
+    %v14 = call i32 @llvm.mblaze.fsl.necget(i32 1)
+    ; CHECK-NOT:    necgetd
+    ; CHECK:        necget
+    %v15 = call i32 @llvm.mblaze.fsl.necaget(i32 1)
+    ; CHECK-NOT:    necagetd
+    ; CHECK:        necaget
+    %v16 = call i32 @llvm.mblaze.fsl.tget(i32 1)
+    ; CHECK-NOT:    tgetd
+    ; CHECK:        tget
+    %v17 = call i32 @llvm.mblaze.fsl.taget(i32 1)
+    ; CHECK-NOT:    tagetd
+    ; CHECK:        taget
+    %v18 = call i32 @llvm.mblaze.fsl.tcget(i32 1)
+    ; CHECK-NOT:    tcgetd
+    ; CHECK:        tcget
+    %v19 = call i32 @llvm.mblaze.fsl.tcaget(i32 1)
+    ; CHECK-NOT:    tcagetd
+    ; CHECK:        tcaget
+    %v20 = call i32 @llvm.mblaze.fsl.teget(i32 1)
+    ; CHECK-NOT:    tegetd
+    ; CHECK:        teget
+    %v21 = call i32 @llvm.mblaze.fsl.teaget(i32 1)
+    ; CHECK-NOT:    teagetd
+    ; CHECK:        teaget
+    %v22 = call i32 @llvm.mblaze.fsl.tecget(i32 1)
+    ; CHECK-NOT:    tecgetd
+    ; CHECK:        tecget
+    %v23 = call i32 @llvm.mblaze.fsl.tecaget(i32 1)
+    ; CHECK-NOT:    tecagetd
+    ; CHECK:        tecaget
+    %v24 = call i32 @llvm.mblaze.fsl.tnget(i32 1)
+    ; CHECK-NOT:    tngetd
+    ; CHECK:        tnget
+    %v25 = call i32 @llvm.mblaze.fsl.tnaget(i32 1)
+    ; CHECK-NOT:    tnagetd
+    ; CHECK:        tnaget
+    %v26 = call i32 @llvm.mblaze.fsl.tncget(i32 1)
+    ; CHECK-NOT:    tncgetd
+    ; CHECK:        tncget
+    %v27 = call i32 @llvm.mblaze.fsl.tncaget(i32 1)
+    ; CHECK-NOT:    tncagetd
+    ; CHECK:        tncaget
+    %v28 = call i32 @llvm.mblaze.fsl.tneget(i32 1)
+    ; CHECK-NOT:    tnegetd
+    ; CHECK:        tneget
+    %v29 = call i32 @llvm.mblaze.fsl.tneaget(i32 1)
+    ; CHECK-NOT:    tneagetd
+    ; CHECK:        tneaget
+    %v30 = call i32 @llvm.mblaze.fsl.tnecget(i32 1)
+    ; CHECK-NOT:    tnecgetd
+    ; CHECK:        tnecget
+    %v31 = call i32 @llvm.mblaze.fsl.tnecaget(i32 1)
+    ; CHECK-NOT:    tnecagetd
+    ; CHECK:        tnecaget
+    ret i32 1
+    ; CHECK:        rtsd
+}
+
+define void @putfsl(i32 %value, i32 %port)
+{
+    ; CHECK:        putfsl:
+    call void @llvm.mblaze.fsl.put(i32 %value, i32 %port)
+    ; CHECK:        putd
+    call void @llvm.mblaze.fsl.aput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   aputd
+    call void @llvm.mblaze.fsl.cput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   cputd
+    call void @llvm.mblaze.fsl.caput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   caputd
+    call void @llvm.mblaze.fsl.nput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   nputd
+    call void @llvm.mblaze.fsl.naput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   naputd
+    call void @llvm.mblaze.fsl.ncput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   ncputd
+    call void @llvm.mblaze.fsl.ncaput(i32 %value, i32 %port)
+    ; CHECK-NEXT:   ncaputd
+    call void @llvm.mblaze.fsl.tput(i32 %port)
+    ; CHECK-NEXT:   tputd
+    call void @llvm.mblaze.fsl.taput(i32 %port)
+    ; CHECK-NEXT:   taputd
+    call void @llvm.mblaze.fsl.tcput(i32 %port)
+    ; CHECK-NEXT:   tcputd
+    call void @llvm.mblaze.fsl.tcaput(i32 %port)
+    ; CHECK-NEXT:   tcaputd
+    call void @llvm.mblaze.fsl.tnput(i32 %port)
+    ; CHECK-NEXT:   tnputd
+    call void @llvm.mblaze.fsl.tnaput(i32 %port)
+    ; CHECK-NEXT:   tnaputd
+    call void @llvm.mblaze.fsl.tncput(i32 %port)
+    ; CHECK-NEXT:   tncputd
+    call void @llvm.mblaze.fsl.tncaput(i32 %port)
+    ; CHECK-NEXT:   tncaputd
+    ret void
+    ; CHECK:        rtsd
+}
+
+define void @putfsl_const(i32 %value)
+{
+    ; CHECK:        putfsl_const:
+    call void @llvm.mblaze.fsl.put(i32 %value, i32 1)
+    ; CHECK-NOT:    putd
+    ; CHECK:        put
+    call void @llvm.mblaze.fsl.aput(i32 %value, i32 1)
+    ; CHECK-NOT:    aputd
+    ; CHECK:        aput
+    call void @llvm.mblaze.fsl.cput(i32 %value, i32 1)
+    ; CHECK-NOT:    cputd
+    ; CHECK:        cput
+    call void @llvm.mblaze.fsl.caput(i32 %value, i32 1)
+    ; CHECK-NOT:    caputd
+    ; CHECK:        caput
+    call void @llvm.mblaze.fsl.nput(i32 %value, i32 1)
+    ; CHECK-NOT:    nputd
+    ; CHECK:        nput
+    call void @llvm.mblaze.fsl.naput(i32 %value, i32 1)
+    ; CHECK-NOT:    naputd
+    ; CHECK:        naput
+    call void @llvm.mblaze.fsl.ncput(i32 %value, i32 1)
+    ; CHECK-NOT:    ncputd
+    ; CHECK:        ncput
+    call void @llvm.mblaze.fsl.ncaput(i32 %value, i32 1)
+    ; CHECK-NOT:    ncaputd
+    ; CHECK:        ncaput
+    call void @llvm.mblaze.fsl.tput(i32 1)
+    ; CHECK-NOT:    tputd
+    ; CHECK:        tput
+    call void @llvm.mblaze.fsl.taput(i32 1)
+    ; CHECK-NOT:    taputd
+    ; CHECK:        taput
+    call void @llvm.mblaze.fsl.tcput(i32 1)
+    ; CHECK-NOT:    tcputd
+    ; CHECK:        tcput
+    call void @llvm.mblaze.fsl.tcaput(i32 1)
+    ; CHECK-NOT:    tcaputd
+    ; CHECK:        tcaput
+    call void @llvm.mblaze.fsl.tnput(i32 1)
+    ; CHECK-NOT:    tnputd
+    ; CHECK:        tnput
+    call void @llvm.mblaze.fsl.tnaput(i32 1)
+    ; CHECK-NOT:    tnaputd
+    ; CHECK:        tnaput
+    call void @llvm.mblaze.fsl.tncput(i32 1)
+    ; CHECK-NOT:    tncputd
+    ; CHECK:        tncput
+    call void @llvm.mblaze.fsl.tncaput(i32 1)
+    ; CHECK-NOT:    tncaputd
+    ; CHECK:        tncaput
+    ret void
+    ; CHECK:        rtsd
+}
diff --git a/final/test/CodeGen/MBlaze/imm.ll b/final/test/CodeGen/MBlaze/imm.ll
new file mode 100644
index 00000000000..6effd3e09a2
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/imm.ll
@@ -0,0 +1,70 @@
+; Ensure that all immediate values that are 32-bits or less can be loaded 
+; using a single instruction and that immediate values 64-bits or less can
+; be loaded using two instructions.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+; RUN: llc < %s -march=mblaze -mattr=+fpu | FileCheck -check-prefix=FPU %s
+
+define i8 @retimm_i8() {
+    ; CHECK:        retimm_i8:
+    ; CHECK:        rtsd
+    ; CHECK-NEXT:   add
+    ; FPU:          retimm_i8:
+    ; FPU:          rtsd
+    ; FPU-NEXT:     add
+    ret i8 123
+}
+
+define i16 @retimm_i16() {
+    ; CHECK:        retimm_i16:
+    ; CHECK:        rtsd
+    ; CHECK-NEXT:   add
+    ; FPU:          retimm_i16:
+    ; FPU:          rtsd
+    ; FPU-NEXT:     add
+    ret i16 31212
+}
+
+define i32 @retimm_i32() {
+    ; CHECK:        retimm_i32:
+    ; CHECK:        add
+    ; CHECK-NEXT:   rtsd
+    ; FPU:          retimm_i32:
+    ; FPU:          add
+    ; FPU-NEXT:     rtsd
+    ret i32 2938128
+}
+
+define i64 @retimm_i64() {
+    ; CHECK:        retimm_i64:
+    ; CHECK:        add
+    ; CHECK-NEXT:   rtsd
+    ; CHECK-NEXT:   add
+    ; FPU:          retimm_i64:
+    ; FPU:          add
+    ; FPU-NEXT:     rtsd
+    ; FPU-NEXT:     add
+    ret i64 94581823
+}
+
+define float @retimm_float() {
+    ; CHECK:        retimm_float:
+    ; CHECK:        add
+    ; CHECK-NEXT:   rtsd
+    ; FPU:          retimm_float:
+    ; FPU:          or
+    ; FPU-NEXT:     rtsd
+    ret float 12.0
+}
+
+define double @retimm_double() {
+    ; CHECK:        retimm_double:
+    ; CHECK:        add
+    ; CHECK-NEXT:   add
+    ; CHECK-NEXT:   rtsd
+    ; FPU:          retimm_double:
+    ; FPU:          add
+    ; FPU-NEXT:     add
+    ; FPU-NEXT:     rtsd
+    ret double 598382.39283873
+}
diff --git a/final/test/CodeGen/MBlaze/intr.ll b/final/test/CodeGen/MBlaze/intr.ll
new file mode 100644
index 00000000000..79c6bffd00c
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/intr.ll
@@ -0,0 +1,48 @@
+; Ensure that the MBlaze interrupt_handler calling convention (cc73) is handled
+; correctly correctly by the MBlaze backend.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+@.str = private constant [28 x i8] c"The interrupt has gone off\0A\00"
+@_interrupt_handler = alias void ()* @myintr
+
+define cc73 void @myintr() nounwind noinline {
+  ; CHECK:        myintr:
+  ; CHECK:        swi   r3, r1
+  ; CHECK:        swi   r4, r1
+  ; CHECK:        swi   r5, r1
+  ; CHECK:        swi   r6, r1
+  ; CHECK:        swi   r7, r1
+  ; CHECK:        swi   r8, r1
+  ; CHECK:        swi   r9, r1
+  ; CHECK:        swi   r10, r1
+  ; CHECK:        swi   r11, r1
+  ; CHECK:        swi   r12, r1
+  ; CHECK:        swi   r17, r1
+  ; CHECK:        swi   r18, r1
+  ; CHECK:        mfs   r11, rmsr
+  ; CHECK:        swi   r11, r1
+  entry:
+    %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i32 0, i32 0))
+      ret void
+
+  ; CHECK:        lwi   r11, r1
+  ; CHECK:        mts   rmsr, r11
+  ; CHECK:        lwi   r18, r1
+  ; CHECK:        lwi   r17, r1
+  ; CHECK:        lwi   r12, r1
+  ; CHECK:        lwi   r11, r1
+  ; CHECK:        lwi   r10, r1
+  ; CHECK:        lwi   r9, r1
+  ; CHECK:        lwi   r8, r1
+  ; CHECK:        lwi   r7, r1
+  ; CHECK:        lwi   r6, r1
+  ; CHECK:        lwi   r5, r1
+  ; CHECK:        lwi   r4, r1
+  ; CHECK:        lwi   r3, r1
+  ; CHECK:        rtid  r14, 0
+}
+
+  ; CHECK:    .globl  _interrupt_handler
+  ; CHECK:    _interrupt_handler = myintr
+declare i32 @printf(i8*, ...)
diff --git a/final/test/CodeGen/MBlaze/jumptable.ll b/final/test/CodeGen/MBlaze/jumptable.ll
new file mode 100644
index 00000000000..299084d0ed2
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/jumptable.ll
@@ -0,0 +1,79 @@
+; Ensure that jump tables can be handled by the mblaze backend. The
+; jump table should be lowered to a "br" instruction using one of the
+; available registers.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+define i32 @jmptable(i32 %arg)
+{
+    ; CHECK:        jmptable:
+    switch i32 %arg, label %DEFAULT [ i32 0, label %L0
+                                      i32 1, label %L1
+                                      i32 2, label %L2
+                                      i32 3, label %L3
+                                      i32 4, label %L4
+                                      i32 5, label %L5
+                                      i32 6, label %L6
+                                      i32 7, label %L7
+                                      i32 8, label %L8
+                                      i32 9, label %L9 ]
+
+    ; CHECK:        lw   [[REG:r[0-9]*]]
+    ; CHECK:        brad [[REG]]
+L0:
+    %var0 = add i32 %arg, 0
+    br label %DONE
+
+L1:
+    %var1 = add i32 %arg, 1
+    br label %DONE
+
+L2:
+    %var2 = add i32 %arg, 2
+    br label %DONE
+
+L3:
+    %var3 = add i32 %arg, 3
+    br label %DONE
+
+L4:
+    %var4 = add i32 %arg, 4
+    br label %DONE
+
+L5:
+    %var5 = add i32 %arg, 5
+    br label %DONE
+
+L6:
+    %var6 = add i32 %arg, 6
+    br label %DONE
+
+L7:
+    %var7 = add i32 %arg, 7
+    br label %DONE
+
+L8:
+    %var8 = add i32 %arg, 8
+    br label %DONE
+
+L9:
+    %var9 = add i32 %arg, 9
+    br label %DONE
+
+DEFAULT:
+    unreachable
+
+DONE:
+    %rval = phi i32 [ %var0, %L0 ],
+                    [ %var1, %L1 ],
+                    [ %var2, %L2 ],
+                    [ %var3, %L3 ],
+                    [ %var4, %L4 ],
+                    [ %var5, %L5 ],
+                    [ %var6, %L6 ],
+                    [ %var7, %L7 ],
+                    [ %var8, %L8 ],
+                    [ %var9, %L9 ]
+    ret i32 %rval
+    ; CHECK:        rtsd
+}
diff --git a/final/test/CodeGen/MBlaze/loop.ll b/final/test/CodeGen/MBlaze/loop.ll
new file mode 100644
index 00000000000..8973f75aa1d
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/loop.ll
@@ -0,0 +1,46 @@
+; Test some complicated looping constructs to ensure that they
+; compile successfully and that some sort of branching is used
+; in the resulting code.
+;
+; RUN: llc < %s -march=mblaze -mattr=+mul,+fpu,+barrel | FileCheck %s
+
+declare i32 @printf(i8*, ...)
+@MSG = internal constant [19 x i8] c"Message: %d %d %d\0A\00"
+
+define i32 @loop(i32 %a, i32 %b)
+{
+    ; CHECK:        loop:
+entry:
+    br label %loop_outer
+
+loop_outer:
+    %outer.0 = phi i32 [ 0, %entry ], [ %outer.2, %loop_outer_finish ]
+    br label %loop_inner
+
+loop_inner:
+    %inner.0 = phi i32 [ %a, %loop_outer ], [ %inner.3, %loop_inner_finish ]
+    %inner.1 = phi i32 [ %b, %loop_outer ], [ %inner.4, %loop_inner_finish ]
+    %inner.2 = phi i32 [  0, %loop_outer ], [ %inner.5, %loop_inner_finish ]
+    %inner.3 = add i32 %inner.0, %inner.1
+    %inner.4 = mul i32 %inner.2, 11
+    br label %loop_inner_finish
+
+loop_inner_finish:
+    %inner.5 = add i32 %inner.2, 1
+    call i32 (i8*,...)* @printf( i8* getelementptr([19 x i8]* @MSG,i32 0,i32 0),
+                                 i32 %inner.0, i32 %inner.1, i32 %inner.2 )
+    ; CHECK:        brlid
+    ; CHECK:        addik {{.*, 1}}
+
+    %inner.6 = icmp eq i32 %inner.5, 100
+    ; CHECK:        cmp
+
+    br i1 %inner.6, label %loop_inner, label %loop_outer_finish
+    ; CHECK:        {{beq|bne}}
+
+loop_outer_finish:
+    %outer.1 = add i32 %outer.0, 1
+    %outer.2 = urem i32 %outer.1, 1500
+    br label %loop_outer
+    ; CHECK:        br
+}
diff --git a/final/test/CodeGen/MBlaze/mul.ll b/final/test/CodeGen/MBlaze/mul.ll
new file mode 100644
index 00000000000..cefdb8d56f2
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/mul.ll
@@ -0,0 +1,51 @@
+; Ensure that multiplication is lowered to function calls when the multiplier
+; unit is not available in the hardware and that function calls are not used
+; when the multiplier unit is available in the hardware.
+;
+; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
+; RUN: llc < %s -march=mblaze -mattr=+mul | FileCheck -check-prefix=MUL %s
+
+define i8 @test_i8(i8 %a, i8 %b) {
+    ; FUN:        test_i8:
+    ; MUL:        test_i8:
+
+    %tmp.1 = mul i8 %a, %b
+    ; FUN-NOT:    mul
+    ; FUN:        brlid
+    ; MUL-NOT:    brlid
+
+    ret i8 %tmp.1
+    ; FUN:        rtsd
+    ; MUL:        rtsd
+    ; MUL:        mul
+}
+
+define i16 @test_i16(i16 %a, i16 %b) {
+    ; FUN:        test_i16:
+    ; MUL:        test_i16:
+
+    %tmp.1 = mul i16 %a, %b
+    ; FUN-NOT:    mul
+    ; FUN:        brlid
+    ; MUL-NOT:    brlid
+
+    ret i16 %tmp.1
+    ; FUN:        rtsd
+    ; MUL:        rtsd
+    ; MUL:        mul
+}
+
+define i32 @test_i32(i32 %a, i32 %b) {
+    ; FUN:        test_i32:
+    ; MUL:        test_i32:
+
+    %tmp.1 = mul i32 %a, %b
+    ; FUN-NOT:    mul
+    ; FUN:        brlid
+    ; MUL-NOT:    brlid
+
+    ret i32 %tmp.1
+    ; FUN:        rtsd
+    ; MUL:        rtsd
+    ; MUL:        mul
+}
diff --git a/final/test/CodeGen/MBlaze/mul64.ll b/final/test/CodeGen/MBlaze/mul64.ll
new file mode 100644
index 00000000000..e0ef4138af7
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/mul64.ll
@@ -0,0 +1,23 @@
+; Ensure that multiplication is lowered to function calls when the 64-bit
+; multiplier unit is not available in the hardware and that function calls
+; are not used when the 64-bit multiplier unit is available in the hardware.
+;
+; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
+; RUN: llc < %s -march=mblaze -mattr=+mul,+mul64 | \
+; RUN:      FileCheck -check-prefix=MUL %s
+
+define i64 @test_i64(i64 %a, i64 %b) {
+    ; FUN:        test_i64:
+    ; MUL:        test_i64:
+
+    %tmp.1 = mul i64 %a, %b
+    ; FUN-NOT:    mul
+    ; FUN:        brlid
+    ; MUL-NOT:    brlid
+    ; MUL:        mulh
+    ; MUL:        mul
+
+    ret i64 %tmp.1
+    ; FUN:        rtsd
+    ; MUL:        rtsd
+}
diff --git a/final/test/CodeGen/MBlaze/select.ll b/final/test/CodeGen/MBlaze/select.ll
new file mode 100644
index 00000000000..47a88a1e3c2
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/select.ll
@@ -0,0 +1,15 @@
+; Ensure that the select instruction is supported and is lowered to 
+; some sort of branch instruction.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+define i32 @testsel(i32 %a, i32 %b)
+{
+    ; CHECK:        testsel:
+    %tmp.1 = icmp eq i32 %a, %b
+    ; CHECK:        cmp
+    %tmp.2 = select i1 %tmp.1, i32 %a, i32 %b
+    ; CHECK:        {{bne|beq}}
+    ret i32 %tmp.2
+    ; CHECK:        rtsd
+}
diff --git a/final/test/CodeGen/MBlaze/shift.ll b/final/test/CodeGen/MBlaze/shift.ll
new file mode 100644
index 00000000000..99f0519c020
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/shift.ll
@@ -0,0 +1,115 @@
+; Ensure that shifts are lowered to loops when the barrel shifter unit is
+; not available in the hardware and that loops are not used when the
+; barrel shifter unit is available in the hardware.
+;
+; RUN: llc < %s -march=mblaze | FileCheck -check-prefix=FUN %s
+; RUN: llc < %s -march=mblaze -mattr=+barrel | FileCheck -check-prefix=SHT %s
+
+define i8 @test_i8(i8 %a, i8 %b) {
+    ; FUN:        test_i8:
+    ; SHT:        test_i8:
+
+    %tmp.1 = shl i8 %a, %b
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    bnei
+
+    ret i8 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bsll
+}
+
+define i8 @testc_i8(i8 %a, i8 %b) {
+    ; FUN:        testc_i8:
+    ; SHT:        testc_i8:
+
+    %tmp.1 = shl i8 %a, 5
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    andi
+    ; SHT-NOT:    add
+    ; SHT-NOT:    bnei
+
+    ret i8 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bslli
+}
+
+define i16 @test_i16(i16 %a, i16 %b) {
+    ; FUN:        test_i16:
+    ; SHT:        test_i16:
+
+    %tmp.1 = shl i16 %a, %b
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    bnei
+
+    ret i16 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bsll
+}
+
+define i16 @testc_i16(i16 %a, i16 %b) {
+    ; FUN:        testc_i16:
+    ; SHT:        testc_i16:
+
+    %tmp.1 = shl i16 %a, 5
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    andi
+    ; SHT-NOT:    add
+    ; SHT-NOT:    bnei
+
+    ret i16 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bslli
+}
+
+define i32 @test_i32(i32 %a, i32 %b) {
+    ; FUN:        test_i32:
+    ; SHT:        test_i32:
+
+    %tmp.1 = shl i32 %a, %b
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    andi
+    ; SHT-NOT:    bnei
+
+    ret i32 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bsll
+}
+
+define i32 @testc_i32(i32 %a, i32 %b) {
+    ; FUN:        testc_i32:
+    ; SHT:        testc_i32:
+
+    %tmp.1 = shl i32 %a, 5
+    ; FUN:        andi
+    ; FUN:        add
+    ; FUN:        bnei
+    ; SHT-NOT:    andi
+    ; SHT-NOT:    add
+    ; SHT-NOT:    bnei
+
+    ret i32 %tmp.1
+    ; FUN:        rtsd
+    ; SHT:        rtsd
+    ; FUN-NOT:    bsll
+    ; SHT-NEXT:   bslli
+}
diff --git a/final/test/CodeGen/MBlaze/svol.ll b/final/test/CodeGen/MBlaze/svol.ll
new file mode 100644
index 00000000000..c1e96202845
--- /dev/null
+++ b/final/test/CodeGen/MBlaze/svol.ll
@@ -0,0 +1,80 @@
+; Ensure that the MBlaze save_volatiles calling convention (cc74) is handled
+; correctly correctly by the MBlaze backend.
+;
+; RUN: llc < %s -march=mblaze | FileCheck %s
+
+@.str = private constant [28 x i8] c"The interrupt has gone off\0A\00"
+
+define cc74 void @mysvol() nounwind noinline {
+  ; CHECK:        mysvol:
+  ; CHECK:        swi   r3, r1
+  ; CHECK:        swi   r4, r1
+  ; CHECK:        swi   r5, r1
+  ; CHECK:        swi   r6, r1
+  ; CHECK:        swi   r7, r1
+  ; CHECK:        swi   r8, r1
+  ; CHECK:        swi   r9, r1
+  ; CHECK:        swi   r10, r1
+  ; CHECK:        swi   r11, r1
+  ; CHECK:        swi   r12, r1
+  ; CHECK:        swi   r17, r1
+  ; CHECK:        swi   r18, r1
+  ; CHECK-NOT:    mfs   r11, rmsr
+  entry:
+    %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i32 0, i32 0))
+      ret void
+
+  ; CHECK-NOT:    mts   rmsr, r11
+  ; CHECK:        lwi   r18, r1
+  ; CHECK:        lwi   r17, r1
+  ; CHECK:        lwi   r12, r1
+  ; CHECK:        lwi   r11, r1
+  ; CHECK:        lwi   r10, r1
+  ; CHECK:        lwi   r9, r1
+  ; CHECK:        lwi   r8, r1
+  ; CHECK:        lwi   r7, r1
+  ; CHECK:        lwi   r6, r1
+  ; CHECK:        lwi   r5, r1
+  ; CHECK:        lwi   r4, r1
+  ; CHECK:        lwi   r3, r1
+  ; CHECK:        rtsd  r15, 8
+}
+
+define cc74 void @mysvol2() nounwind noinline {
+  ; CHECK:        mysvol2:
+  ; CHECK-NOT:    swi   r3, r1
+  ; CHECK-NOT:    swi   r4, r1
+  ; CHECK-NOT:    swi   r5, r1
+  ; CHECK-NOT:    swi   r6, r1
+  ; CHECK-NOT:    swi   r7, r1
+  ; CHECK-NOT:    swi   r8, r1
+  ; CHECK-NOT:    swi   r9, r1
+  ; CHECK-NOT:    swi   r10, r1
+  ; CHECK-NOT:    swi   r11, r1
+  ; CHECK-NOT:    swi   r12, r1
+  ; CHECK:        swi   r17, r1
+  ; CHECK:        swi   r18, r1
+  ; CHECK-NOT:    mfs   r11, rmsr
+entry:
+
+  ; CHECK-NOT:    mts   rmsr, r11
+  ; CHECK:        lwi   r18, r1
+  ; CHECK:        lwi   r17, r1
+  ; CHECK-NOT:    lwi   r12, r1
+  ; CHECK-NOT:    lwi   r11, r1
+  ; CHECK-NOT:    lwi   r10, r1
+  ; CHECK-NOT:    lwi   r9, r1
+  ; CHECK-NOT:    lwi   r8, r1
+  ; CHECK-NOT:    lwi   r7, r1
+  ; CHECK-NOT:    lwi   r6, r1
+  ; CHECK-NOT:    lwi   r5, r1
+  ; CHECK-NOT:    lwi   r4, r1
+  ; CHECK-NOT:    lwi   r3, r1
+  ; CHECK:        rtsd  r15, 8
+  ret void
+}
+
+  ; CHECK-NOT:    .globl  _interrupt_handler
+  ; CHECK-NOT:    _interrupt_handler = mysvol
+  ; CHECK-NOT:    _interrupt_handler = mysvol2
+declare i32 @printf(i8*, ...)
diff --git a/final/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll b/final/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
new file mode 100644
index 00000000000..f339373ffc7
--- /dev/null
+++ b/final/test/CodeGen/MSP430/2009-05-10-CyclicDAG.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s
+; PR4136
+
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-unknown-linux-gnu"
+@uip_len = external global i16		; <i16*> [#uses=2]
+
+define void @uip_arp_arpin() nounwind {
+entry:
+	%tmp = volatile load i16* @uip_len		; <i16> [#uses=1]
+	%cmp = icmp ult i16 %tmp, 42		; <i1> [#uses=1]
+	volatile store i16 0, i16* @uip_len
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	ret void
+
+if.end:		; preds = %entry
+	switch i16 0, label %return [
+		i16 256, label %sw.bb
+		i16 512, label %sw.bb18
+	]
+
+sw.bb:		; preds = %if.end
+	ret void
+
+sw.bb18:		; preds = %if.end
+	ret void
+
+return:		; preds = %if.end
+	ret void
+}
diff --git a/final/test/CodeGen/MSP430/2009-05-17-Rot.ll b/final/test/CodeGen/MSP430/2009-05-17-Rot.ll
new file mode 100644
index 00000000000..d622aa71164
--- /dev/null
+++ b/final/test/CodeGen/MSP430/2009-05-17-Rot.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=msp430
+
+define i16 @rol1u16(i16 %x.arg) nounwind {
+        %retval = alloca i16
+        %x = alloca i16
+        store i16 %x.arg, i16* %x
+        %1 = load i16* %x
+        %2 = shl i16 %1, 1
+        %3 = load i16* %x
+        %4 = lshr i16 %3, 15
+        %5 = or i16 %2, %4
+        store i16 %5, i16* %retval
+        br label %return
+return:
+        %6 = load i16* %retval
+        ret i16 %6
+}
diff --git a/final/test/CodeGen/MSP430/2009-05-17-Shift.ll b/final/test/CodeGen/MSP430/2009-05-17-Shift.ll
new file mode 100644
index 00000000000..e23df785166
--- /dev/null
+++ b/final/test/CodeGen/MSP430/2009-05-17-Shift.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=msp430 | grep rra | count 1
+
+define i16 @lsr2u16(i16 %x.arg) nounwind {
+        %retval = alloca i16
+        %x = alloca i16
+        store i16 %x.arg, i16* %x
+        %1 = load i16* %x
+        %2 = lshr i16 %1, 2
+        store i16 %2, i16* %retval
+        br label %return
+return:
+        %3 = load i16* %retval
+        ret i16 %3
+
+}
diff --git a/final/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll b/final/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
new file mode 100644
index 00000000000..54eb7ff5c0b
--- /dev/null
+++ b/final/test/CodeGen/MSP430/2009-05-19-DoubleSplit.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=msp430
+
+define i16 @test(double %d) nounwind {
+entry:
+        %add = fadd double %d, 1.000000e+00
+        %call = tail call i16 @funct(double %add) nounwind
+        ret i16 %call
+}
+
+declare i16 @funct(double)
+
diff --git a/final/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll b/final/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
new file mode 100644
index 00000000000..088d3e1e7b3
--- /dev/null
+++ b/final/test/CodeGen/MSP430/2009-08-25-DynamicStackAlloc.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s
+; PR4769
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i16 @foo() nounwind readnone {
+entry:
+  %result = alloca i16, align 1                   ; <i16*> [#uses=2]
+  volatile store i16 0, i16* %result
+  %tmp = volatile load i16* %result               ; <i16> [#uses=1]
+  ret i16 %tmp
+}
+
+define i16 @main() nounwind {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %call = call i16 @bar() nounwind                ; <i16> [#uses=1]
+  %tobool = icmp eq i16 %call, 0                  ; <i1> [#uses=1]
+  br i1 %tobool, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  %result.i = alloca i16, align 1                 ; <i16*> [#uses=2]
+  volatile store i16 0, i16* %result.i
+  %tmp.i = volatile load i16* %result.i           ; <i16> [#uses=0]
+  ret i16 0
+}
+
+declare i16 @bar()
diff --git a/final/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll b/final/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
new file mode 100644
index 00000000000..4d7d9b96c7d
--- /dev/null
+++ b/final/test/CodeGen/MSP430/2009-09-18-AbsoluteAddr.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | grep 0x0021 | count 2
+; PR4776
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-unknown-unknown"
+
+@"\010x0021" = external global i8, align 1        ; <i8*> [#uses=2]
+
+define zeroext i8 @foo(i8 zeroext %x) nounwind {
+entry:
+  %retval = alloca i8                             ; <i8*> [#uses=2]
+  %x.addr = alloca i8                             ; <i8*> [#uses=2]
+  %tmp = alloca i8, align 1                       ; <i8*> [#uses=2]
+  store i8 %x, i8* %x.addr
+  %tmp1 = volatile load i8* @"\010x0021"          ; <i8> [#uses=1]
+  store i8 %tmp1, i8* %tmp
+  %tmp2 = load i8* %x.addr                        ; <i8> [#uses=1]
+  volatile store i8 %tmp2, i8* @"\010x0021"
+  %tmp3 = load i8* %tmp                           ; <i8> [#uses=1]
+  store i8 %tmp3, i8* %retval
+  %0 = load i8* %retval                           ; <i8> [#uses=1]
+  ret i8 %0
+}
diff --git a/final/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll b/final/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
new file mode 100644
index 00000000000..856eb9db3f6
--- /dev/null
+++ b/final/test/CodeGen/MSP430/2009-10-10-OrImpDef.ll
@@ -0,0 +1,14 @@
+; RUN: llc -march=msp430 < %s
+; PR4779 
+define void @foo() nounwind {
+entry:
+	%r = alloca i8		; <i8*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	volatile load i8* %r, align 1		; <i8>:0 [#uses=1]
+	or i8 %0, 1		; <i8>:1 [#uses=1]
+	volatile store i8 %1, i8* %r, align 1
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll b/final/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll
new file mode 100644
index 00000000000..94fe5c70e84
--- /dev/null
+++ b/final/test/CodeGen/MSP430/2009-11-05-8BitLibcalls.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-elf"
+
+@g_29 = common global i8 0, align 1               ; <i8*> [#uses=0]
+
+define signext i8 @foo(i8 signext %_si1, i8 signext %_si2) nounwind readnone {
+entry:
+; CHECK: foo:
+; CHECK: call #__mulqi3
+  %mul = mul i8 %_si2, %_si1                      ; <i8> [#uses=1]
+  ret i8 %mul
+}
+
+define void @uint81(i16* nocapture %p_32) nounwind {
+entry:
+  %call = tail call i16 @bar(i8* bitcast (i8 (i8, i8)* @foo to i8*)) nounwind ; <i16> [#uses=0]
+  ret void
+}
+
+declare i16 @bar(i8*)
diff --git a/final/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll b/final/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll
new file mode 100644
index 00000000000..d232aeae5b5
--- /dev/null
+++ b/final/test/CodeGen/MSP430/2009-11-08-InvalidResNo.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-elf"
+
+%struct.httpd_fs_file = type { i8*, i16 }
+%struct.psock = type { %struct.pt, %struct.pt, i8*, i8*, i8*, i16, i16, %struct.httpd_fs_file, i16, i8, i8 }
+%struct.pt = type { i16 }
+
+@foo = external global i8*
+
+define signext i8 @psock_readto(%struct.psock* nocapture %psock, i8 zeroext %c) nounwind {
+entry:
+  switch i16 undef, label %sw.epilog [
+    i16 0, label %sw.bb
+    i16 283, label %if.else.i
+  ]
+
+sw.bb:                                            ; preds = %entry
+  br label %do.body
+
+do.body:                                          ; preds = %while.cond36.i, %while.end.i, %sw.bb
+  br label %while.cond.i
+
+if.else.i:                                        ; preds = %entry
+  br i1 undef, label %psock_newdata.exit, label %if.else11.i
+
+if.else11.i:                                      ; preds = %if.else.i
+  ret i8 0
+
+psock_newdata.exit:                               ; preds = %if.else.i
+  ret i8 0
+
+while.cond.i:                                     ; preds = %while.body.i, %do.body
+  br i1 undef, label %while.end.i, label %while.body.i
+
+while.body.i:                                     ; preds = %while.cond.i
+  br i1 undef, label %do.end41, label %while.cond.i
+
+while.end.i:                                      ; preds = %while.cond.i
+  br i1 undef, label %do.body, label %while.cond36.i.preheader
+
+while.cond36.i.preheader:                         ; preds = %while.end.i
+  br label %while.cond36.i
+
+while.cond36.i:                                   ; preds = %while.body41.i, %while.cond36.i.preheader
+  br i1 undef, label %do.body, label %while.body41.i
+
+while.body41.i:                                   ; preds = %while.cond36.i
+  %tmp43.i = load i8** @foo                      ; <i8*> [#uses=2]
+  %tmp44.i = load i8* %tmp43.i                    ; <i8> [#uses=1]
+  %ptrincdec50.i = getelementptr inbounds i8* %tmp43.i, i16 1 ; <i8*> [#uses=1]
+  store i8* %ptrincdec50.i, i8** @foo
+  %cmp55.i = icmp eq i8 %tmp44.i, %c              ; <i1> [#uses=1]
+  br i1 %cmp55.i, label %do.end41, label %while.cond36.i
+
+do.end41:                                         ; preds = %while.body41.i, %while.body.i
+  br i1 undef, label %if.then46, label %sw.epilog
+
+if.then46:                                        ; preds = %do.end41
+  ret i8 0
+
+sw.epilog:                                        ; preds = %do.end41, %entry
+  ret i8 2
+}
diff --git a/final/test/CodeGen/MSP430/2009-11-20-NewNode.ll b/final/test/CodeGen/MSP430/2009-11-20-NewNode.ll
new file mode 100644
index 00000000000..887c7d6fa24
--- /dev/null
+++ b/final/test/CodeGen/MSP430/2009-11-20-NewNode.ll
@@ -0,0 +1,36 @@
+; RUN: llc -march=msp430 < %s
+; PR5558
+
+define i64 @_strtoll_r(i16 %base) nounwind {
+entry:
+  br i1 undef, label %if.then, label %if.end27
+
+if.then:                                          ; preds = %do.end
+  br label %if.end27
+
+if.end27:                                         ; preds = %if.then, %do.end
+  %cond66 = select i1 undef, i64 -9223372036854775808, i64 9223372036854775807 ; <i64> [#uses=3]
+  %conv69 = sext i16 %base to i64                 ; <i64> [#uses=1]
+  %div = udiv i64 %cond66, %conv69                ; <i64> [#uses=1]
+  br label %for.cond
+
+for.cond:                                         ; preds = %if.end116, %if.end27
+  br i1 undef, label %if.then152, label %if.then93
+
+if.then93:                                        ; preds = %for.cond
+  br i1 undef, label %if.end116, label %if.then152
+
+if.end116:                                        ; preds = %if.then93
+  %cmp123 = icmp ugt i64 undef, %div              ; <i1> [#uses=1]
+  %or.cond = or i1 undef, %cmp123                 ; <i1> [#uses=0]
+  br label %for.cond
+
+if.then152:                                       ; preds = %if.then93, %for.cond
+  br i1 undef, label %if.end182, label %if.then172
+
+if.then172:                                       ; preds = %if.then152
+  ret i64 %cond66
+
+if.end182:                                        ; preds = %if.then152
+  ret i64 %cond66
+}
diff --git a/final/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll b/final/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll
new file mode 100644
index 00000000000..b92477bed57
--- /dev/null
+++ b/final/test/CodeGen/MSP430/2009-12-21-FrameAddr.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s
+; PR5703
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-unknown-linux-gnu"
+
+define msp430_intrcc void @foo() nounwind {
+entry:
+	%fa = call i16* @llvm.frameaddress(i32 0)
+	store i16 0, i16* %fa
+	ret void
+}
+
+declare i16* @llvm.frameaddress(i32)
diff --git a/final/test/CodeGen/MSP430/2009-12-22-InlineAsm.ll b/final/test/CodeGen/MSP430/2009-12-22-InlineAsm.ll
new file mode 100644
index 00000000000..a9df1a3e974
--- /dev/null
+++ b/final/test/CodeGen/MSP430/2009-12-22-InlineAsm.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s
+; PR 5570
+; ModuleID = 'test.c'
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8:16"
+target triple = "msp430-unknown-unknown"
+
+@buf = common global [10 x i8] zeroinitializer, align 1 ; <[10 x i8]*> [#uses=2]
+
+define i16 @main() noreturn nounwind {
+entry:
+  %0 = tail call i8* asm "", "=r,0"(i8* getelementptr inbounds ([10 x i8]* @buf, i16 0, i16 0)) nounwind ; <i8*> [#uses=1]
+  %sub.ptr = getelementptr inbounds i8* %0, i16 1 ; <i8*> [#uses=1]
+  %sub.ptr.lhs.cast = ptrtoint i8* %sub.ptr to i16 ; <i16> [#uses=1]
+  %sub.ptr.sub = sub i16 %sub.ptr.lhs.cast, ptrtoint ([10 x i8]* @buf to i16) ; <i16> [#uses=1]
+  %cmp = icmp eq i16 %sub.ptr.sub, 1              ; <i1> [#uses=1]
+  br i1 %cmp, label %bar.exit, label %if.then.i
+
+if.then.i:                                        ; preds = %entry
+  tail call void @abort() nounwind
+  br label %bar.exit
+
+bar.exit:                                         ; preds = %entry, %if.then.i
+  tail call void @exit(i16 0) nounwind
+  unreachable
+}
+
+declare void @exit(i16) noreturn
+
+declare void @abort()
diff --git a/final/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll b/final/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 00000000000..9d549da8a93
--- /dev/null
+++ b/final/test/CodeGen/MSP430/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=msp430 -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/final/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll b/final/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll
new file mode 100644
index 00000000000..99100377034
--- /dev/null
+++ b/final/test/CodeGen/MSP430/2010-05-01-CombinerAnd.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s
+; PR7001
+
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430-elf"
+
+define i16 @main() nounwind {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %entry
+  br i1 undef, label %land.rhs, label %land.end
+
+land.rhs:                                         ; preds = %while.cond
+  br label %land.end
+
+land.end:                                         ; preds = %land.rhs, %while.cond
+  %0 = phi i1 [ false, %while.cond ], [ undef, %land.rhs ] ; <i1> [#uses=1]
+  br i1 %0, label %while.body, label %while.end
+
+while.body:                                       ; preds = %land.end
+  %tmp4 = load i16* undef                         ; <i16> [#uses=0]
+  br label %while.cond
+
+while.end:                                        ; preds = %land.end
+  ret i16 undef
+}
diff --git a/final/test/CodeGen/MSP430/AddrMode-bis-rx.ll b/final/test/CodeGen/MSP430/AddrMode-bis-rx.ll
new file mode 100644
index 00000000000..4f9a7248bbf
--- /dev/null
+++ b/final/test/CodeGen/MSP430/AddrMode-bis-rx.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
+target triple = "msp430-generic-generic"
+
+define i16 @am1(i16 %x, i16* %a) nounwind {
+	%1 = load i16* %a
+	%2 = or i16 %1,%x
+	ret i16 %2
+}
+; CHECK: am1:
+; CHECK:		bis.w	0(r14), r15
+
+@foo = external global i16
+
+define i16 @am2(i16 %x) nounwind {
+	%1 = load i16* @foo
+	%2 = or i16 %1,%x
+	ret i16 %2
+}
+; CHECK: am2:
+; CHECK:		bis.w	&foo, r15
+
+@bar = internal constant [2 x i8] [ i8 32, i8 64 ]
+
+define i8 @am3(i8 %x, i16 %n) nounwind {
+	%1 = getelementptr [2 x i8]* @bar, i16 0, i16 %n
+	%2 = load i8* %1
+	%3 = or i8 %2,%x
+	ret i8 %3
+}
+; CHECK: am3:
+; CHECK:		bis.b	bar(r14), r15
+
+define i16 @am4(i16 %x) nounwind {
+	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%2 = or i16 %1,%x
+	ret i16 %2
+}
+; CHECK: am4:
+; CHECK:		bis.w	&32, r15
+
+define i16 @am5(i16 %x, i16* %a) nounwind {
+	%1 = getelementptr i16* %a, i16 2
+	%2 = load i16* %1
+	%3 = or i16 %2,%x
+	ret i16 %3
+}
+; CHECK: am5:
+; CHECK:		bis.w	4(r14), r15
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer, align 1
+
+define i16 @am6(i16 %x) nounwind {
+	%1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+	%2 = or i16 %1,%x
+	ret i16 %2
+}
+; CHECK: am6:
+; CHECK:		bis.w	&baz+2, r15
+
+%T = type { i16, [2 x i8] }
+@duh = internal constant %T { i16 16, [2 x i8][i8 32, i8 64 ] }
+
+define i8 @am7(i8 %x, i16 %n) nounwind {
+	%1 = getelementptr %T* @duh, i32 0, i32 1
+	%2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+	%3= load i8* %2
+	%4 = or i8 %3,%x
+	ret i8 %4
+}
+; CHECK: am7:
+; CHECK:		bis.b	duh+2(r14), r15
+
diff --git a/final/test/CodeGen/MSP430/AddrMode-bis-xr.ll b/final/test/CodeGen/MSP430/AddrMode-bis-xr.ll
new file mode 100644
index 00000000000..17ebd873680
--- /dev/null
+++ b/final/test/CodeGen/MSP430/AddrMode-bis-xr.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:16"
+target triple = "msp430-generic-generic"
+
+define void @am1(i16* %a, i16 %x) nounwind {
+	%1 = load i16* %a
+	%2 = or i16 %x, %1
+	store i16 %2, i16* %a
+	ret void
+}
+; CHECK: am1:
+; CHECK:		bis.w	r14, 0(r15)
+
+@foo = external global i16
+
+define void @am2(i16 %x) nounwind {
+	%1 = load i16* @foo
+	%2 = or i16 %x, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+; CHECK: am2:
+; CHECK:		bis.w	r15, &foo
+
+@bar = external global [2 x i8]
+
+define void @am3(i16 %i, i8 %x) nounwind {
+	%1 = getelementptr [2 x i8]* @bar, i16 0, i16 %i
+	%2 = load i8* %1
+	%3 = or i8 %x, %2
+	store i8 %3, i8* %1
+	ret void
+}
+; CHECK: am3:
+; CHECK:		bis.b	r14, bar(r15)
+
+define void @am4(i16 %x) nounwind {
+	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	%2 = or i16 %x, %1
+	volatile store i16 %2, i16* inttoptr(i16 32 to i16*)
+	ret void
+}
+; CHECK: am4:
+; CHECK:		bis.w	r15, &32
+
+define void @am5(i16* %a, i16 %x) readonly {
+	%1 = getelementptr inbounds i16* %a, i16 2
+	%2 = load i16* %1
+	%3 = or i16 %x, %2
+	store i16 %3, i16* %1
+	ret void
+}
+; CHECK: am5:
+; CHECK:		bis.w	r14, 4(r15)
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer
+
+define void @am6(i16 %x) nounwind {
+	%1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+	%2 = or i16 %x, %1
+	store i16 %2, i16* getelementptr (%S* @baz, i32 0, i32 1)
+	ret void
+}
+; CHECK: am6:
+; CHECK:		bis.w	r15, &baz+2
+
+%T = type { i16, [2 x i8] }
+@duh = external global %T
+
+define void @am7(i16 %n, i8 %x) nounwind {
+	%1 = getelementptr %T* @duh, i32 0, i32 1
+	%2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+	%3 = load i8* %2
+	%4 = or i8 %x, %3
+	store i8 %4, i8* %2
+	ret void
+}
+; CHECK: am7:
+; CHECK:		bis.b	r14, duh+2(r15)
+
diff --git a/final/test/CodeGen/MSP430/AddrMode-mov-rx.ll b/final/test/CodeGen/MSP430/AddrMode-mov-rx.ll
new file mode 100644
index 00000000000..6676b88cd14
--- /dev/null
+++ b/final/test/CodeGen/MSP430/AddrMode-mov-rx.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
+target triple = "msp430-generic-generic"
+
+define i16 @am1(i16* %a) nounwind {
+	%1 = load i16* %a
+	ret i16 %1
+}
+; CHECK: am1:
+; CHECK:		mov.w	0(r15), r15
+
+@foo = external global i16
+
+define i16 @am2() nounwind {
+	%1 = load i16* @foo
+	ret i16 %1
+}
+; CHECK: am2:
+; CHECK:		mov.w	&foo, r15
+
+@bar = internal constant [2 x i8] [ i8 32, i8 64 ]
+
+define i8 @am3(i16 %n) nounwind {
+	%1 = getelementptr [2 x i8]* @bar, i16 0, i16 %n
+	%2 = load i8* %1
+	ret i8 %2
+}
+; CHECK: am3:
+; CHECK:		mov.b	bar(r15), r15
+
+define i16 @am4() nounwind {
+	%1 = volatile load i16* inttoptr(i16 32 to i16*)
+	ret i16 %1
+}
+; CHECK: am4:
+; CHECK:		mov.w	&32, r15
+
+define i16 @am5(i16* %a) nounwind {
+	%1 = getelementptr i16* %a, i16 2
+	%2 = load i16* %1
+	ret i16 %2
+}
+; CHECK: am5:
+; CHECK:		mov.w	4(r15), r15
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer, align 1
+
+define i16 @am6() nounwind {
+	%1 = load i16* getelementptr (%S* @baz, i32 0, i32 1)
+	ret i16 %1
+}
+; CHECK: am6:
+; CHECK:		mov.w	&baz+2, r15
+
+%T = type { i16, [2 x i8] }
+@duh = internal constant %T { i16 16, [2 x i8][i8 32, i8 64 ] }
+
+define i8 @am7(i16 %n) nounwind {
+	%1 = getelementptr %T* @duh, i32 0, i32 1
+	%2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+	%3= load i8* %2
+	ret i8 %3
+}
+; CHECK: am7:
+; CHECK:		mov.b	duh+2(r15), r15
+
diff --git a/final/test/CodeGen/MSP430/AddrMode-mov-xr.ll b/final/test/CodeGen/MSP430/AddrMode-mov-xr.ll
new file mode 100644
index 00000000000..4b327b0578f
--- /dev/null
+++ b/final/test/CodeGen/MSP430/AddrMode-mov-xr.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:16"
+target triple = "msp430-generic-generic"
+
+define void @am1(i16* %a, i16 %b) nounwind {
+	store i16 %b, i16* %a
+	ret void
+}
+; CHECK: am1:
+; CHECK:		mov.w	r14, 0(r15)
+
+@foo = external global i16
+
+define void @am2(i16 %a) nounwind {
+	store i16 %a, i16* @foo
+	ret void
+}
+; CHECK: am2:
+; CHECK:		mov.w	r15, &foo
+
+@bar = external global [2 x i8]
+
+define void @am3(i16 %i, i8 %a) nounwind {
+	%1 = getelementptr [2 x i8]* @bar, i16 0, i16 %i
+	store i8 %a, i8* %1
+	ret void
+}
+; CHECK: am3:
+; CHECK:		mov.b	r14, bar(r15)
+
+define void @am4(i16 %a) nounwind {
+	volatile store i16 %a, i16* inttoptr(i16 32 to i16*)
+	ret void
+}
+; CHECK: am4:
+; CHECK:		mov.w	r15, &32
+
+define void @am5(i16* nocapture %p, i16 %a) nounwind readonly {
+	%1 = getelementptr inbounds i16* %p, i16 2
+	store i16 %a, i16* %1
+	ret void
+}
+; CHECK: am5:
+; CHECK:		mov.w	r14, 4(r15)
+
+%S = type { i16, i16 }
+@baz = common global %S zeroinitializer, align 1
+
+define void @am6(i16 %a) nounwind {
+	store i16 %a, i16* getelementptr (%S* @baz, i32 0, i32 1)
+	ret void
+}
+; CHECK: am6:
+; CHECK:		mov.w	r15, &baz+2
+
+%T = type { i16, [2 x i8] }
+@duh = external global %T
+
+define void @am7(i16 %n, i8 %a) nounwind {
+	%1 = getelementptr %T* @duh, i32 0, i32 1
+	%2 = getelementptr [2 x i8]* %1, i16 0, i16 %n
+	store i8 %a, i8* %2
+	ret void
+}
+; CHECK: am7:
+; CHECK:		mov.b	r14, duh+2(r15)
+
diff --git a/final/test/CodeGen/MSP430/Inst16mi.ll b/final/test/CodeGen/MSP430/Inst16mi.ll
new file mode 100644
index 00000000000..33d7aa495d3
--- /dev/null
+++ b/final/test/CodeGen/MSP430/Inst16mi.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.w	#2, &foo
+	store i16 2, i16 * @foo
+	ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = add i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = and i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = or i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.w	#2, &foo
+	%1 = load i16* @foo
+	%2 = xor i16 %1, 2
+	store i16 %2, i16 * @foo
+	ret void
+}
diff --git a/final/test/CodeGen/MSP430/Inst16mm.ll b/final/test/CodeGen/MSP430/Inst16mm.ll
new file mode 100644
index 00000000000..2337c2c0f24
--- /dev/null
+++ b/final/test/CodeGen/MSP430/Inst16mm.ll
@@ -0,0 +1,69 @@
+; RUN: llc -march=msp430 -combiner-alias-analysis < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+@bar = common global i16 0, align 2
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.w	&bar, &foo
+        %1 = load i16* @bar
+        store i16 %1, i16* @foo
+        ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = add i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = and i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = or i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.w	&bar, &foo
+	%1 = load i16* @bar
+	%2 = load i16* @foo
+	%3 = xor i16 %2, %1
+	store i16 %3, i16* @foo
+	ret void
+}
+
+define i16 @mov2() nounwind {
+entry:
+ %retval = alloca i16                            ; <i16*> [#uses=3]
+ %x = alloca i32, align 2                        ; <i32*> [#uses=1]
+ %y = alloca i32, align 2                        ; <i32*> [#uses=1]
+ store i16 0, i16* %retval
+ %tmp = load i32* %y                             ; <i32> [#uses=1]
+ store i32 %tmp, i32* %x
+ store i16 0, i16* %retval
+ %0 = load i16* %retval                          ; <i16> [#uses=1]
+ ret i16 %0
+; CHECK: mov2:
+; CHECK:	mov.w	0(r1), 4(r1)
+; CHECK:	mov.w	2(r1), 6(r1)
+}
diff --git a/final/test/CodeGen/MSP430/Inst16mr.ll b/final/test/CodeGen/MSP430/Inst16mr.ll
new file mode 100644
index 00000000000..2613f019585
--- /dev/null
+++ b/final/test/CodeGen/MSP430/Inst16mr.ll
@@ -0,0 +1,58 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+
+define void @mov(i16 %a) nounwind {
+; CHECK: mov:
+; CHECK: mov.w	r15, &foo
+	store i16 %a, i16* @foo
+	ret void
+}
+
+define void @add(i16 %a) nounwind {
+; CHECK: add:
+; CHECK: add.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = add i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
+define void @and(i16 %a) nounwind {
+; CHECK: and:
+; CHECK: and.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = and i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
+define void @bis(i16 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = or i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
+define void @bic(i16 zeroext %m) nounwind {
+; CHECK: bic:
+; CHECK: bic.w   r15, &foo
+        %1 = xor i16 %m, -1
+        %2 = load i16* @foo
+        %3 = and i16 %2, %1
+        store i16 %3, i16* @foo
+        ret void
+}
+
+define void @xor(i16 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	r15, &foo
+	%1 = load i16* @foo
+	%2 = xor i16 %a, %1
+	store i16 %2, i16* @foo
+	ret void
+}
+
diff --git a/final/test/CodeGen/MSP430/Inst16ri.ll b/final/test/CodeGen/MSP430/Inst16ri.ll
new file mode 100644
index 00000000000..5115a236929
--- /dev/null
+++ b/final/test/CodeGen/MSP430/Inst16ri.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i16 @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.w	#1, r15
+	ret i16 1
+}
+
+define i16 @add(i16 %a, i16 %b) nounwind {
+; CHECK: add:
+; CHECK: add.w	#1, r15
+	%1 = add i16 %a, 1
+	ret i16 %1
+}
+
+define i16 @and(i16 %a, i16 %b) nounwind {
+; CHECK: and:
+; CHECK: and.w	#1, r15
+	%1 = and i16 %a, 1
+	ret i16 %1
+}
+
+define i16 @bis(i16 %a, i16 %b) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	#1, r15
+	%1 = or i16 %a, 1
+	ret i16 %1
+}
+
+define i16 @xor(i16 %a, i16 %b) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	#1, r15
+	%1 = xor i16 %a, 1
+	ret i16 %1
+}
diff --git a/final/test/CodeGen/MSP430/Inst16rm.ll b/final/test/CodeGen/MSP430/Inst16rm.ll
new file mode 100644
index 00000000000..02e89c7cac7
--- /dev/null
+++ b/final/test/CodeGen/MSP430/Inst16rm.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i16 0, align 2
+
+define i16 @add(i16 %a) nounwind {
+; CHECK: add:
+; CHECK: add.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = add i16 %a, %1
+	ret i16 %2
+}
+
+define i16 @and(i16 %a) nounwind {
+; CHECK: and:
+; CHECK: and.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = and i16 %a, %1
+	ret i16 %2
+}
+
+define i16 @bis(i16 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = or i16 %a, %1
+	ret i16 %2
+}
+
+define i16  @bic(i16 %a) nounwind {
+; CHECK: bic:
+; CHECK: bic.w	&foo, r15
+        %1 = load i16* @foo
+        %2 = xor i16 %1, -1
+        %3 = and i16 %a, %2
+        ret i16 %3
+}
+
+define i16 @xor(i16 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	&foo, r15
+	%1 = load i16* @foo
+	%2 = xor i16 %a, %1
+	ret i16 %2
+}
+
diff --git a/final/test/CodeGen/MSP430/Inst16rr.ll b/final/test/CodeGen/MSP430/Inst16rr.ll
new file mode 100644
index 00000000000..2f1ba5b4f13
--- /dev/null
+++ b/final/test/CodeGen/MSP430/Inst16rr.ll
@@ -0,0 +1,45 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i16 @mov(i16 %a, i16 %b) nounwind {
+; CHECK: mov:
+; CHECK: mov.w	r14, r15
+	ret i16 %b
+}
+
+define i16 @add(i16 %a, i16 %b) nounwind {
+; CHECK: add:
+; CHECK: add.w	r14, r15
+	%1 = add i16 %a, %b
+	ret i16 %1
+}
+
+define i16 @and(i16 %a, i16 %b) nounwind {
+; CHECK: and:
+; CHECK: and.w	r14, r15
+	%1 = and i16 %a, %b
+	ret i16 %1
+}
+
+define i16 @bis(i16 %a, i16 %b) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	r14, r15
+	%1 = or i16 %a, %b
+	ret i16 %1
+}
+
+define i16 @bic(i16 %a, i16 %b) nounwind {
+; CHECK: bic:
+; CHECK: bic.w	r14, r15
+        %1 = xor i16 %b, -1
+        %2 = and i16 %a, %1
+        ret i16 %2
+}
+
+define i16 @xor(i16 %a, i16 %b) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	r14, r15
+	%1 = xor i16 %a, %b
+	ret i16 %1
+}
diff --git a/final/test/CodeGen/MSP430/Inst8mi.ll b/final/test/CodeGen/MSP430/Inst8mi.ll
new file mode 100644
index 00000000000..ef318ce1590
--- /dev/null
+++ b/final/test/CodeGen/MSP430/Inst8mi.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i8:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i8 0, align 1
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.b	#2, &foo
+	store i8 2, i8 * @foo
+	ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = add i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = and i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = or i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.b	#2, &foo
+	%1 = load i8* @foo
+	%2 = xor i8 %1, 2
+	store i8 %2, i8 * @foo
+	ret void
+}
+
diff --git a/final/test/CodeGen/MSP430/Inst8mm.ll b/final/test/CodeGen/MSP430/Inst8mm.ll
new file mode 100644
index 00000000000..a2987ac9b46
--- /dev/null
+++ b/final/test/CodeGen/MSP430/Inst8mm.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+@foo = common global i8 0, align 1
+@bar = common global i8 0, align 1
+
+define void @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.b	&bar, &foo
+        %1 = load i8* @bar
+        store i8 %1, i8* @foo
+        ret void
+}
+
+define void @add() nounwind {
+; CHECK: add:
+; CHECK: add.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = add i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
+define void @and() nounwind {
+; CHECK: and:
+; CHECK: and.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = and i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
+define void @bis() nounwind {
+; CHECK: bis:
+; CHECK: bis.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = or i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
+define void @xor() nounwind {
+; CHECK: xor:
+; CHECK: xor.b	&bar, &foo
+	%1 = load i8* @bar
+	%2 = load i8* @foo
+	%3 = xor i8 %2, %1
+	store i8 %3, i8* @foo
+	ret void
+}
+
diff --git a/final/test/CodeGen/MSP430/Inst8mr.ll b/final/test/CodeGen/MSP430/Inst8mr.ll
new file mode 100644
index 00000000000..428d1fa38d1
--- /dev/null
+++ b/final/test/CodeGen/MSP430/Inst8mr.ll
@@ -0,0 +1,58 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i8 0, align 1
+
+define void @mov(i8 %a) nounwind {
+; CHECK: mov:
+; CHECK: mov.b	r15, &foo
+	store i8 %a, i8* @foo
+	ret void
+}
+
+define void @and(i8 %a) nounwind {
+; CHECK: and:
+; CHECK: and.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = and i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
+define void @add(i8 %a) nounwind {
+; CHECK: add:
+; CHECK: add.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = add i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
+define void @bis(i8 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = or i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
+define void @bic(i8 zeroext %m) nounwind {
+; CHECK: bic:
+; CHECK: bic.b   r15, &foo
+        %1 = xor i8 %m, -1
+        %2 = load i8* @foo
+        %3 = and i8 %2, %1
+        store i8 %3, i8* @foo
+        ret void
+}
+
+define void @xor(i8 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.b	r15, &foo
+	%1 = load i8* @foo
+	%2 = xor i8 %a, %1
+	store i8 %2, i8* @foo
+	ret void
+}
+
diff --git a/final/test/CodeGen/MSP430/Inst8ri.ll b/final/test/CodeGen/MSP430/Inst8ri.ll
new file mode 100644
index 00000000000..ac3418aa6c7
--- /dev/null
+++ b/final/test/CodeGen/MSP430/Inst8ri.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i8 @mov() nounwind {
+; CHECK: mov:
+; CHECK: mov.b	#1, r15
+	ret i8 1
+}
+
+define i8 @add(i8 %a, i8 %b) nounwind {
+; CHECK: add:
+; CHECK: add.b	#1, r15
+	%1 = add i8 %a, 1
+	ret i8 %1
+}
+
+define i8 @and(i8 %a, i8 %b) nounwind {
+; CHECK: and:
+; CHECK: and.b	#1, r15
+	%1 = and i8 %a, 1
+	ret i8 %1
+}
+
+define i8 @bis(i8 %a, i8 %b) nounwind {
+; CHECK: bis:
+; CHECK: bis.b	#1, r15
+	%1 = or i8 %a, 1
+	ret i8 %1
+}
+
+define i8 @xor(i8 %a, i8 %b) nounwind {
+; CHECK: xor:
+; CHECK: xor.b	#1, r15
+	%1 = xor i8 %a, 1
+	ret i8 %1
+}
diff --git a/final/test/CodeGen/MSP430/Inst8rm.ll b/final/test/CodeGen/MSP430/Inst8rm.ll
new file mode 100644
index 00000000000..c062f04c6b4
--- /dev/null
+++ b/final/test/CodeGen/MSP430/Inst8rm.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i8:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+@foo = common global i8 0, align 1
+
+define i8 @add(i8 %a) nounwind {
+; CHECK: add:
+; CHECK: add.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = add i8 %a, %1
+	ret i8 %2
+}
+
+define i8 @and(i8 %a) nounwind {
+; CHECK: and:
+; CHECK: and.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = and i8 %a, %1
+	ret i8 %2
+}
+
+define i8 @bis(i8 %a) nounwind {
+; CHECK: bis:
+; CHECK: bis.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = or i8 %a, %1
+	ret i8 %2
+}
+
+define i8  @bic(i8 %a) nounwind {
+; CHECK: bic:
+; CHECK: bic.b  &foo, r15
+        %1 = load i8* @foo
+        %2 = xor i8 %1, -1
+        %3 = and i8 %a, %2
+        ret i8 %3
+}
+
+define i8 @xor(i8 %a) nounwind {
+; CHECK: xor:
+; CHECK: xor.b	&foo, r15
+	%1 = load i8* @foo
+	%2 = xor i8 %a, %1
+	ret i8 %2
+}
+
diff --git a/final/test/CodeGen/MSP430/Inst8rr.ll b/final/test/CodeGen/MSP430/Inst8rr.ll
new file mode 100644
index 00000000000..0f5fc12b62e
--- /dev/null
+++ b/final/test/CodeGen/MSP430/Inst8rr.ll
@@ -0,0 +1,46 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i8:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define i8 @mov(i8 %a, i8 %b) nounwind {
+; CHECK: mov:
+; CHECK: mov.b	r14, r15
+	ret i8 %b
+}
+
+define i8 @add(i8 %a, i8 %b) nounwind {
+; CHECK: add:
+; CHECK: add.b	r12, r15
+	%1 = add i8 %a, %b
+	ret i8 %1
+}
+
+define i8 @and(i8 %a, i8 %b) nounwind {
+; CHECK: and:
+; CHECK: and.w	r14, r15
+	%1 = and i8 %a, %b
+	ret i8 %1
+}
+
+define i8 @bis(i8 %a, i8 %b) nounwind {
+; CHECK: bis:
+; CHECK: bis.w	r14, r15
+	%1 = or i8 %a, %b
+	ret i8 %1
+}
+
+define i8 @bic(i8 %a, i8 %b) nounwind {
+; CHECK: bic:
+; CHECK: bic.b  r14, r15
+        %1 = xor i8 %b, -1
+        %2 = and i8 %a, %1
+        ret i8 %2
+}
+
+define i8 @xor(i8 %a, i8 %b) nounwind {
+; CHECK: xor:
+; CHECK: xor.w	r14, r15
+	%1 = xor i8 %a, %b
+	ret i8 %1
+}
+
diff --git a/final/test/CodeGen/MSP430/bit.ll b/final/test/CodeGen/MSP430/bit.ll
new file mode 100644
index 00000000000..03d672bcbe6
--- /dev/null
+++ b/final/test/CodeGen/MSP430/bit.ll
@@ -0,0 +1,166 @@
+; RUN: llc < %s -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:32"
+target triple = "msp430-generic-generic"
+
+@foo8 = external global i8
+@bar8 = external global i8
+
+define i8 @bitbrr(i8 %a, i8 %b) nounwind {
+	%t1 = and i8 %a, %b
+	%t2 = icmp ne i8 %t1, 0
+	%t3 = zext i1 %t2 to i8
+	ret i8 %t3
+}
+; CHECK: bitbrr:
+; CHECK: bit.b	r14, r15
+
+define i8 @bitbri(i8 %a) nounwind {
+	%t1 = and i8 %a, 15
+	%t2 = icmp ne i8 %t1, 0
+	%t3 = zext i1 %t2 to i8
+	ret i8 %t3
+}
+; CHECK: bitbri:
+; CHECK: bit.b	#15, r15
+
+define i8 @bitbir(i8 %a) nounwind {
+	%t1 = and i8 15, %a
+	%t2 = icmp ne i8 %t1, 0
+	%t3 = zext i1 %t2 to i8
+	ret i8 %t3
+}
+; CHECK: bitbir:
+; CHECK: bit.b	#15, r15
+
+define i8 @bitbmi() nounwind {
+	%t1 = load i8* @foo8
+	%t2 = and i8 %t1, 15
+	%t3 = icmp ne i8 %t2, 0
+	%t4 = zext i1 %t3 to i8
+	ret i8 %t4
+}
+; CHECK: bitbmi:
+; CHECK: bit.b	#15, &foo8
+
+define i8 @bitbim() nounwind {
+	%t1 = load i8* @foo8
+	%t2 = and i8 15, %t1
+	%t3 = icmp ne i8 %t2, 0
+	%t4 = zext i1 %t3 to i8
+	ret i8 %t4
+}
+; CHECK: bitbim:
+; CHECK: bit.b	#15, &foo8
+
+define i8 @bitbrm(i8 %a) nounwind {
+	%t1 = load i8* @foo8
+	%t2 = and i8 %a, %t1
+	%t3 = icmp ne i8 %t2, 0
+	%t4 = zext i1 %t3 to i8
+	ret i8 %t4
+}
+; CHECK: bitbrm:
+; CHECK: bit.b	&foo8, r15
+
+define i8 @bitbmr(i8 %a) nounwind {
+	%t1 = load i8* @foo8
+	%t2 = and i8 %t1, %a
+	%t3 = icmp ne i8 %t2, 0
+	%t4 = zext i1 %t3 to i8
+	ret i8 %t4
+}
+; CHECK: bitbmr:
+; CHECK: bit.b	r15, &foo8
+
+define i8 @bitbmm() nounwind {
+	%t1 = load i8* @foo8
+	%t2 = load i8* @bar8
+	%t3 = and i8 %t1, %t2
+	%t4 = icmp ne i8 %t3, 0
+	%t5 = zext i1 %t4 to i8
+	ret i8 %t5
+}
+; CHECK: bitbmm:
+; CHECK: bit.b	&bar8, &foo8
+
+@foo16 = external global i16
+@bar16 = external global i16
+
+define i16 @bitwrr(i16 %a, i16 %b) nounwind {
+	%t1 = and i16 %a, %b
+	%t2 = icmp ne i16 %t1, 0
+	%t3 = zext i1 %t2 to i16
+	ret i16 %t3
+}
+; CHECK: bitwrr:
+; CHECK: bit.w	r14, r15
+
+define i16 @bitwri(i16 %a) nounwind {
+	%t1 = and i16 %a, 4080
+	%t2 = icmp ne i16 %t1, 0
+	%t3 = zext i1 %t2 to i16
+	ret i16 %t3
+}
+; CHECK: bitwri:
+; CHECK: bit.w	#4080, r15
+
+define i16 @bitwir(i16 %a) nounwind {
+	%t1 = and i16 4080, %a
+	%t2 = icmp ne i16 %t1, 0
+	%t3 = zext i1 %t2 to i16
+	ret i16 %t3
+}
+; CHECK: bitwir:
+; CHECK: bit.w	#4080, r15
+
+define i16 @bitwmi() nounwind {
+	%t1 = load i16* @foo16
+	%t2 = and i16 %t1, 4080
+	%t3 = icmp ne i16 %t2, 0
+	%t4 = zext i1 %t3 to i16
+	ret i16 %t4
+}
+; CHECK: bitwmi:
+; CHECK: bit.w	#4080, &foo16
+
+define i16 @bitwim() nounwind {
+	%t1 = load i16* @foo16
+	%t2 = and i16 4080, %t1
+	%t3 = icmp ne i16 %t2, 0
+	%t4 = zext i1 %t3 to i16
+	ret i16 %t4
+}
+; CHECK: bitwim:
+; CHECK: bit.w	#4080, &foo16
+
+define i16 @bitwrm(i16 %a) nounwind {
+	%t1 = load i16* @foo16
+	%t2 = and i16 %a, %t1
+	%t3 = icmp ne i16 %t2, 0
+	%t4 = zext i1 %t3 to i16
+	ret i16 %t4
+}
+; CHECK: bitwrm:
+; CHECK: bit.w	&foo16, r15
+
+define i16 @bitwmr(i16 %a) nounwind {
+	%t1 = load i16* @foo16
+	%t2 = and i16 %t1, %a
+	%t3 = icmp ne i16 %t2, 0
+	%t4 = zext i1 %t3 to i16
+	ret i16 %t4
+}
+; CHECK: bitwmr:
+; CHECK: bit.w	r15, &foo16
+
+define i16 @bitwmm() nounwind {
+	%t1 = load i16* @foo16
+	%t2 = load i16* @bar16
+	%t3 = and i16 %t1, %t2
+	%t4 = icmp ne i16 %t3, 0
+	%t5 = zext i1 %t4 to i16
+	ret i16 %t5
+}
+; CHECK: bitwmm:
+; CHECK: bit.w	&bar16, &foo16
+
diff --git a/final/test/CodeGen/MSP430/dg.exp b/final/test/CodeGen/MSP430/dg.exp
new file mode 100644
index 00000000000..e4ea13a4064
--- /dev/null
+++ b/final/test/CodeGen/MSP430/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target MSP430] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/MSP430/indirectbr.ll b/final/test/CodeGen/MSP430/indirectbr.ll
new file mode 100644
index 00000000000..2a62c9135c6
--- /dev/null
+++ b/final/test/CodeGen/MSP430/indirectbr.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=msp430 < %s
+
+@nextaddr = global i8* null                       ; <i8**> [#uses=2]
+@C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
+
+define internal i16 @foo(i16 %i) nounwind {
+entry:
+  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
+  br i1 %1, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb3, %entry
+  %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
+  indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
+
+bb3:                                              ; preds = %entry
+  %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
+  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  br label %bb2
+
+L5:                                               ; preds = %bb2
+  br label %L4
+
+L4:                                               ; preds = %L5, %bb2
+  %res.0 = phi i16 [ 385, %L5 ], [ 35, %bb2 ]     ; <i16> [#uses=1]
+  br label %L3
+
+L3:                                               ; preds = %L4, %bb2
+  %res.1 = phi i16 [ %res.0, %L4 ], [ 5, %bb2 ]   ; <i16> [#uses=1]
+  br label %L2
+
+L2:                                               ; preds = %L3, %bb2
+  %res.2 = phi i16 [ %res.1, %L3 ], [ 1, %bb2 ]   ; <i16> [#uses=1]
+  %phitmp = mul i16 %res.2, 6                     ; <i16> [#uses=1]
+  br label %L1
+
+L1:                                               ; preds = %L2, %bb2
+  %res.3 = phi i16 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i16> [#uses=1]
+  store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
+  ret i16 %res.3
+}
diff --git a/final/test/CodeGen/MSP430/indirectbr2.ll b/final/test/CodeGen/MSP430/indirectbr2.ll
new file mode 100644
index 00000000000..93cfb2506bb
--- /dev/null
+++ b/final/test/CodeGen/MSP430/indirectbr2.ll
@@ -0,0 +1,29 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+@C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
+
+define internal i16 @foo(i16 %i) nounwind {
+entry:
+  %tmp1 = getelementptr inbounds [5 x i8*]* @C.0.2070, i16 0, i16 %i ; <i8**> [#uses=1]
+  %gotovar.4.0 = load i8** %tmp1, align 4        ; <i8*> [#uses=1]
+; CHECK: mov.w   .LC.0.2070(r15), pc
+  indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
+
+L5:                                               ; preds = %bb2
+  br label %L4
+
+L4:                                               ; preds = %L5, %bb2
+  %res.0 = phi i16 [ 385, %L5 ], [ 35, %entry ]     ; <i16> [#uses=1]
+  br label %L3
+
+L3:                                               ; preds = %L4, %bb2
+  %res.1 = phi i16 [ %res.0, %L4 ], [ 5, %entry ]   ; <i16> [#uses=1]
+  br label %L2
+
+L2:                                               ; preds = %L3, %bb2
+  %res.2 = phi i16 [ %res.1, %L3 ], [ 1, %entry ]   ; <i16> [#uses=1]
+  br label %L1
+
+L1:                                               ; preds = %L2, %bb2
+  %res.3 = phi i16 [ %res.2, %L2 ], [ 2, %entry ]  ; <i16> [#uses=1]
+  ret i16 %res.3
+}
diff --git a/final/test/CodeGen/MSP430/inline-asm.ll b/final/test/CodeGen/MSP430/inline-asm.ll
new file mode 100644
index 00000000000..0e7886a4721
--- /dev/null
+++ b/final/test/CodeGen/MSP430/inline-asm.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430-generic-generic"
+
+define void @imm() nounwind {
+        call void asm sideeffect "bic\09$0,r2", "i"(i16 32) nounwind
+        ret void
+}
+
+define void @reg(i16 %a) nounwind {
+        call void asm sideeffect "bic\09$0,r2", "r"(i16 %a) nounwind
+        ret void
+}
+
+@foo = global i16 0, align 2
+
+define void @immmem() nounwind {
+        call void asm sideeffect "bic\09$0,r2", "i"(i16* getelementptr(i16* @foo, i32 1)) nounwind
+        ret void
+}
+
+define void @mem() nounwind {
+        %fooval = load i16* @foo
+        call void asm sideeffect "bic\09$0,r2", "m"(i16 %fooval) nounwind
+        ret void
+}
diff --git a/final/test/CodeGen/MSP430/mult-alt-generic-msp430.ll b/final/test/CodeGen/MSP430/mult-alt-generic-msp430.ll
new file mode 100644
index 00000000000..342afed6605
--- /dev/null
+++ b/final/test/CodeGen/MSP430/mult-alt-generic-msp430.ll
@@ -0,0 +1,323 @@
+; RUN: llc < %s -march=msp430
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"
+target triple = "msp430"
+
+@mout0 = common global i16 0, align 2
+@min1 = common global i16 0, align 2
+@marray = common global [2 x i16] zeroinitializer, align 2
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i16* @mout0, i16* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %index = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %index, align 2
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,<r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* %in1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r,r<"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,>r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* %in1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r,r>"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,i"(i16 1) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,n"(i16 1) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,imr"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* @min1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r,imr"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  %2 = call i16 asm "foo $1,$0", "=r,imr"(i16 1) nounwind
+  store i16 %2, i16* %out0, align 2
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,X"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* @min1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r,X"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  %2 = call i16 asm "foo $1,$0", "=r,X"(i16 1) nounwind
+  store i16 %2, i16* %out0, align 2
+  %3 = call i16 asm "foo $1,$0", "=r,X"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+  store i16 %3, i16* %out0, align 2
+; No lowering support.
+;  %4 = call i16 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+;  store i16 %4, i16* %out0, align 2
+;  %5 = call i16 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+;  store i16 %5, i16* %out0, align 2
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r,r"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i16* @min1, align 2
+  call void asm "foo $1,$0", "=*m|r,m|r"(i16* @mout0, i16 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %index = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %index, align 2
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|<r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* %in1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r|r,r|r<"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|>r"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* %in1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r|r,r|r>"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|m"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|i"(i16 1) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|n"(i16 1) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* @min1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  %2 = call i16 asm "foo $1,$0", "=r|r,r|imr"(i16 1) nounwind
+  store i16 %2, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  %in1 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  store i16 1, i16* %in1, align 2
+  %tmp = load i16* %in1, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 %tmp) nounwind
+  store i16 %0, i16* %out0, align 2
+  %tmp1 = load i16* @min1, align 2
+  %1 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 %tmp1) nounwind
+  store i16 %1, i16* %out0, align 2
+  %2 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16 1) nounwind
+  store i16 %2, i16* %out0, align 2
+  %3 = call i16 asm "foo $1,$0", "=r|r,r|X"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+  store i16 %3, i16* %out0, align 2
+; No lowering support.
+;  %4 = call i16 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+;  store i16 %4, i16* %out0, align 2
+;  %5 = call i16 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+;  store i16 %5, i16* %out0, align 2
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i16, align 2
+  store i16 0, i16* %out0, align 2
+  %0 = call i16 asm "foo $1,$0", "=r|r,r|r"(i16* getelementptr inbounds ([2 x i16]* @marray, i32 0, i32 0)) nounwind
+  store i16 %0, i16* %out0, align 2
+  ret void
+}
diff --git a/final/test/CodeGen/MSP430/postinc.ll b/final/test/CodeGen/MSP430/postinc.ll
new file mode 100644
index 00000000000..8f01b832588
--- /dev/null
+++ b/final/test/CodeGen/MSP430/postinc.ll
@@ -0,0 +1,114 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"
+target triple = "msp430"
+
+define zeroext i16 @add(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: add:
+; CHECK: add.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = add i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @sub(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: sub:
+; CHECK: sub.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = sub i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @or(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: or:
+; CHECK: bis.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = or i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @xor(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: xor:
+; CHECK: xor.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = xor i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
+define zeroext i16 @and(i16* nocapture %a, i16 zeroext %n) nounwind readonly {
+entry:
+  %cmp8 = icmp eq i16 %n, 0                       ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.010 = phi i16 [ 0, %entry ], [ %inc, %for.body ] ; <i16> [#uses=2]
+  %sum.09 = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  %arrayidx = getelementptr i16* %a, i16 %i.010   ; <i16*> [#uses=1]
+; CHECK: and:
+; CHECK: and.w @r{{[0-9]+}}+, r{{[0-9]+}}
+  %tmp4 = load i16* %arrayidx                     ; <i16> [#uses=1]
+  %add = and i16 %tmp4, %sum.09                   ; <i16> [#uses=2]
+  %inc = add i16 %i.010, 1                        ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %inc, %n                ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  %sum.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ] ; <i16> [#uses=1]
+  ret i16 %sum.0.lcssa
+}
+
diff --git a/final/test/CodeGen/MSP430/setcc.ll b/final/test/CodeGen/MSP430/setcc.ll
new file mode 100644
index 00000000000..c99b17e1436
--- /dev/null
+++ b/final/test/CodeGen/MSP430/setcc.ll
@@ -0,0 +1,116 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:32"
+target triple = "msp430-generic-generic"
+
+define i16 @sccweqand(i16 %a, i16 %b) nounwind {
+	%t1 = and i16 %a, %b
+	%t2 = icmp eq i16 %t1, 0
+	%t3 = zext i1 %t2 to i16
+	ret i16 %t3
+}
+; CHECK: sccweqand:
+; CHECK:	bit.w	r14, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	rra.w   r15
+; CHECK:	and.w	#1, r15
+
+define i16 @sccwneand(i16 %a, i16 %b) nounwind {
+	%t1 = and i16 %a, %b
+	%t2 = icmp ne i16 %t1, 0
+	%t3 = zext i1 %t2 to i16
+	ret i16 %t3
+}
+; CHECK: sccwneand:
+; CHECK: 	bit.w	r14, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	and.w	#1, r15
+
+define i16 @sccwne(i16 %a, i16 %b) nounwind {
+	%t1 = icmp ne i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccwne:
+; CHECK:	cmp.w	r14, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	rra.w	r15
+; CHECK:	and.w	#1, r15
+; CHECK:	xor.w   #1, r15
+
+define i16 @sccweq(i16 %a, i16 %b) nounwind {
+	%t1 = icmp eq i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccweq:
+; CHECK:	cmp.w	r14, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	rra.w	r15
+; CHECK:	and.w	#1, r15
+
+define i16 @sccwugt(i16 %a, i16 %b) nounwind {
+	%t1 = icmp ugt i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccwugt:
+; CHECK:	cmp.w	r15, r14
+; CHECK:	mov.w	r2, r15
+; CHECK:	and.w	#1, r15
+; CHECK:	xor.w	#1, r15
+
+define i16 @sccwuge(i16 %a, i16 %b) nounwind {
+	%t1 = icmp uge i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccwuge:
+; CHECK:	cmp.w	r14, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	and.w	#1, r15
+
+define i16 @sccwult(i16 %a, i16 %b) nounwind {
+	%t1 = icmp ult i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccwult:
+; CHECK:	cmp.w	r14, r15
+; CHECK:	mov.w	r2, r15
+; CHECK:	and.w	#1, r15
+; CHECK:	xor.w	#1, r15
+
+define i16 @sccwule(i16 %a, i16 %b) nounwind {
+	%t1 = icmp ule i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccwule:
+; CHECK:	cmp.w	r15, r14
+; CHECK:	mov.w	r2, r15
+; CHECK:	and.w	#1, r15
+
+define i16 @sccwsgt(i16 %a, i16 %b) nounwind {
+	%t1 = icmp sgt i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+
+define i16 @sccwsge(i16 %a, i16 %b) nounwind {
+	%t1 = icmp sge i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+
+define i16 @sccwslt(i16 %a, i16 %b) nounwind {
+	%t1 = icmp slt i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+
+define i16 @sccwsle(i16 %a, i16 %b) nounwind {
+	%t1 = icmp sle i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+
diff --git a/final/test/CodeGen/MSP430/shifts.ll b/final/test/CodeGen/MSP430/shifts.ll
new file mode 100644
index 00000000000..b5b3054b962
--- /dev/null
+++ b/final/test/CodeGen/MSP430/shifts.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8:16"
+target triple = "msp430-elf"
+
+define zeroext i8 @lshr8(i8 zeroext %a, i8 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: lshr8:
+; CHECK: rrc.b
+  %shr = lshr i8 %a, %cnt
+  ret i8 %shr
+}
+
+define signext i8 @ashr8(i8 signext %a, i8 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: ashr8:
+; CHECK: rra.b
+  %shr = ashr i8 %a, %cnt
+  ret i8 %shr
+}
+
+define zeroext i8 @shl8(i8 zeroext %a, i8 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: shl8
+; CHECK: rla.b
+  %shl = shl i8 %a, %cnt
+  ret i8 %shl
+}
+
+define zeroext i16 @lshr16(i16 zeroext %a, i16 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: lshr16:
+; CHECK: rrc.w
+  %shr = lshr i16 %a, %cnt
+  ret i16 %shr
+}
+
+define signext i16 @ashr16(i16 signext %a, i16 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: ashr16:
+; CHECK: rra.w
+  %shr = ashr i16 %a, %cnt
+  ret i16 %shr
+}
+
+define zeroext i16 @shl16(i16 zeroext %a, i16 zeroext %cnt) nounwind readnone {
+entry:
+; CHECK: shl16:
+; CHECK: rla.w
+  %shl = shl i16 %a, %cnt
+  ret i16 %shl
+}
diff --git a/final/test/CodeGen/Mips/2008-06-05-Carry.ll b/final/test/CodeGen/Mips/2008-06-05-Carry.ll
new file mode 100644
index 00000000000..9d8e391f874
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-06-05-Carry.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=mips -o %t
+; RUN: grep subu %t | count 2
+; RUN: grep addu %t | count 4
+
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define i64 @add64(i64 %u, i64 %v) nounwind  {
+entry:
+	%tmp2 = add i64 %u, %v	
+  ret i64 %tmp2
+}
+
+define i64 @sub64(i64 %u, i64 %v) nounwind  {
+entry:
+  %tmp2 = sub i64 %u, %v
+  ret i64 %tmp2
+}
diff --git a/final/test/CodeGen/Mips/2008-07-03-SRet.ll b/final/test/CodeGen/Mips/2008-07-03-SRet.ll
new file mode 100644
index 00000000000..b1d20d93f18
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-07-03-SRet.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=mips | grep {sw.*(\$4)} | count 3
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+	%struct.sret0 = type { i32, i32, i32 }
+
+define void @test0(%struct.sret0* noalias sret %agg.result, i32 %dummy) nounwind {
+entry:
+	getelementptr %struct.sret0* %agg.result, i32 0, i32 0		; <i32*>:0 [#uses=1]
+	store i32 %dummy, i32* %0, align 4
+	getelementptr %struct.sret0* %agg.result, i32 0, i32 1		; <i32*>:1 [#uses=1]
+	store i32 %dummy, i32* %1, align 4
+	getelementptr %struct.sret0* %agg.result, i32 0, i32 2		; <i32*>:2 [#uses=1]
+	store i32 %dummy, i32* %2, align 4
+	ret void
+}
+
diff --git a/final/test/CodeGen/Mips/2008-07-05-ByVal.ll b/final/test/CodeGen/Mips/2008-07-05-ByVal.ll
new file mode 100644
index 00000000000..a1f05044b6c
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-07-05-ByVal.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=mips | grep {lw.*(\$4)} | count 2
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+	%struct.byval0 = type { i32, i32 }
+
+define i64 @test0(%struct.byval0* byval  %b, i64 %sum) nounwind  {
+entry:
+	getelementptr %struct.byval0* %b, i32 0, i32 0		; <i32*>:0 [#uses=1]
+	load i32* %0, align 4		; <i32>:1 [#uses=1]
+	getelementptr %struct.byval0* %b, i32 0, i32 1		; <i32*>:2 [#uses=1]
+	load i32* %2, align 4		; <i32>:3 [#uses=1]
+	add i32 %3, %1		; <i32>:4 [#uses=1]
+	sext i32 %4 to i64		; <i64>:5 [#uses=1]
+	add i64 %5, %sum		; <i64>:6 [#uses=1]
+	ret i64 %6
+}
+
diff --git a/final/test/CodeGen/Mips/2008-07-06-fadd64.ll b/final/test/CodeGen/Mips/2008-07-06-fadd64.ll
new file mode 100644
index 00000000000..ecd8521027a
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-07-06-fadd64.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=mips | grep __adddf3
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define double @dofloat(double %a, double %b) nounwind {
+entry:
+	fadd double %a, %b		; <double>:0 [#uses=1]
+	ret double %0
+}
diff --git a/final/test/CodeGen/Mips/2008-07-07-FPExtend.ll b/final/test/CodeGen/Mips/2008-07-07-FPExtend.ll
new file mode 100644
index 00000000000..681788e9819
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-07-07-FPExtend.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=mips | grep __extendsfdf2
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define double @dofloat(float %a) nounwind {
+entry:
+	fpext float %a to double		; <double>:0 [#uses=1]
+	ret double %0
+}
diff --git a/final/test/CodeGen/Mips/2008-07-07-Float2Int.ll b/final/test/CodeGen/Mips/2008-07-07-Float2Int.ll
new file mode 100644
index 00000000000..d804c7dcf31
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-07-07-Float2Int.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=mips | grep trunc.w.s | count 3
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define i32 @fptoint(float %a) nounwind {
+entry:
+	fptosi float %a to i32		; <i32>:0 [#uses=1]
+	ret i32 %0
+}
+
+define i32 @fptouint(float %a) nounwind {
+entry:
+	fptoui float %a to i32		; <i32>:0 [#uses=1]
+	ret i32 %0
+}
diff --git a/final/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll b/final/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll
new file mode 100644
index 00000000000..b8b4c5c610d
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-07-07-IntDoubleConvertions.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=mips -o %t
+; RUN: grep __floatsidf   %t | count 1
+; RUN: grep __floatunsidf %t | count 1
+; RUN: grep __fixdfsi %t | count 1
+; RUN: grep __fixunsdfsi %t  | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define double @int2fp(i32 %a) nounwind {
+entry:
+	sitofp i32 %a to double		; <double>:0 [#uses=1]
+	ret double %0
+}
+
+define double @uint2double(i32 %a) nounwind {
+entry:
+	uitofp i32 %a to double		; <double>:0 [#uses=1]
+	ret double %0
+}
+
+define i32 @double2int(double %a) nounwind {
+entry:
+  fptosi double %a to i32   ; <i32>:0 [#uses=1]
+  ret i32 %0
+}
+
+define i32 @double2uint(double %a) nounwind {
+entry:
+  fptoui double %a to i32   ; <i32>:0 [#uses=1]
+  ret i32 %0
+}
+
diff --git a/final/test/CodeGen/Mips/2008-07-15-InternalConstant.ll b/final/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
new file mode 100644
index 00000000000..c3db6387aff
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-07-15-InternalConstant.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=mips -o %t
+; RUN: grep {rodata.str1.4,"aMS",@progbits}  %t | count 1
+; RUN: grep {r.data,}  %t | count 1
+; RUN: grep {\%hi} %t | count 2
+; RUN: grep {\%lo} %t | count 2
+; RUN: not grep {gp_rel} %t
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+@.str = internal unnamed_addr constant [10 x i8] c"AAAAAAAAA\00"
+@i0 = internal unnamed_addr constant [5 x i32] [ i32 0, i32 1, i32 2, i32 3, i32 4 ] 
+
+define i8* @foo() nounwind {
+entry:
+	ret i8* getelementptr ([10 x i8]* @.str, i32 0, i32 0)
+}
+
+define i32* @bar() nounwind  {
+entry:
+  ret i32* getelementptr ([5 x i32]* @i0, i32 0, i32 0)
+}
+
diff --git a/final/test/CodeGen/Mips/2008-07-15-SmallSection.ll b/final/test/CodeGen/Mips/2008-07-15-SmallSection.ll
new file mode 100644
index 00000000000..91efd68622a
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-07-15-SmallSection.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mips-ssection-threshold=8 -march=mips -o %t0
+; RUN: llc < %s -mips-ssection-threshold=0 -march=mips -o %t1
+; RUN: grep {sdata} %t0 | count 1
+; RUN: grep {sbss} %t0 | count 1
+; RUN: grep {gp_rel} %t0 | count 2
+; RUN: not grep {sdata} %t1 
+; RUN: not grep {sbss} %t1 
+; RUN: not grep {gp_rel} %t1
+; RUN: grep {\%hi} %t1 | count 2
+; RUN: grep {\%lo} %t1 | count 2
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+  %struct.anon = type { i32, i32 }
+@s0 = global [8 x i8] c"AAAAAAA\00", align 4
+@foo = global %struct.anon { i32 2, i32 3 }
+@bar = global %struct.anon zeroinitializer 
+
+define i8* @A0() nounwind {
+entry:
+	ret i8* getelementptr ([8 x i8]* @s0, i32 0, i32 0)
+}
+
+define i32 @A1() nounwind {
+entry:
+  load i32* getelementptr (%struct.anon* @foo, i32 0, i32 0), align 8 
+  load i32* getelementptr (%struct.anon* @foo, i32 0, i32 1), align 4 
+  add i32 %1, %0
+  ret i32 %2
+}
+
diff --git a/final/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/final/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
new file mode 100644
index 00000000000..41ae5dd65f5
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=mips -o %t
+; RUN: grep seh %t | count 1
+; RUN: grep seb %t | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define i8 @A(i8 %e.0, i8 signext %sum) signext nounwind {
+entry:
+	add i8 %sum, %e.0		; <i8>:0 [#uses=1]
+	ret i8 %0
+}
+
+define i16 @B(i16 %e.0, i16 signext %sum) signext nounwind {
+entry:
+	add i16 %sum, %e.0		; <i16>:0 [#uses=1]
+	ret i16 %0
+}
+
diff --git a/final/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/final/test/CodeGen/Mips/2008-07-22-Cstpool.ll
new file mode 100644
index 00000000000..94dfe35faba
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-07-22-Cstpool.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=mips -o %t
+; RUN: grep {CPI\[01\]_\[01\]:} %t | count 2
+; RUN: grep {.rodata.cst4,"aM",@progbits} %t | count 1
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define float @F(float %a) nounwind {
+entry:
+	fadd float %a, 0x4011333340000000		; <float>:0 [#uses=1]
+	fadd float %0, 0x4010666660000000		; <float>:1 [#uses=1]
+	ret float %1
+}
diff --git a/final/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/final/test/CodeGen/Mips/2008-07-23-fpcmp.ll
new file mode 100644
index 00000000000..ca837ffd2a5
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-07-23-fpcmp.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=mips -o %t
+; RUN: grep {c\\..*\\.s} %t | count 3
+; RUN: grep {bc1\[tf\]} %t | count 3
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define float @A(float %a, float %b) nounwind {
+entry:
+	fcmp ogt float %a, 1.000000e+00		; <i1>:0 [#uses=1]
+	br i1 %0, label %bb, label %bb2
+
+bb:		; preds = %entry
+	fadd float %a, 1.000000e+00		; <float>:1 [#uses=1]
+	ret float %1
+
+bb2:		; preds = %entry
+	ret float %b
+}
+
+define float @B(float %a, float %b) nounwind {
+entry:
+  fcmp ogt float %a, 1.000000e+00   ; <i1>:0 [#uses=1]
+  %.0 = select i1 %0, float %a, float %b    ; <float> [#uses=1]
+  ret float %.0
+}
+
+define i32 @C(i32 %a, i32 %b, float %j) nounwind {
+entry:
+  fcmp ogt float %j, 1.000000e+00   ; <i1>:0 [#uses=1]
+  %.0 = select i1 %0, i32 %a, i32 %b    ; <i32> [#uses=1]
+  ret i32 %.0
+}
+
diff --git a/final/test/CodeGen/Mips/2008-07-29-icmp.ll b/final/test/CodeGen/Mips/2008-07-29-icmp.ll
new file mode 100644
index 00000000000..52a4b081ddb
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-07-29-icmp.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=mips | grep {b\[ne\]\[eq\]} | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define float @A(float %a, float %b, i32 %j) nounwind {
+entry:
+	icmp sgt i32 %j, 1		; <i1>:0 [#uses=1]
+	%.0 = select i1 %0, float %a, float %b		; <float> [#uses=1]
+	ret float %.0
+}
diff --git a/final/test/CodeGen/Mips/2008-07-31-fcopysign.ll b/final/test/CodeGen/Mips/2008-07-31-fcopysign.ll
new file mode 100644
index 00000000000..47382f989ca
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-07-31-fcopysign.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=mips -o %t
+; RUN: grep abs.s  %t | count 1
+; RUN: grep neg.s %t | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define float @A(float %i, float %j) nounwind  {
+entry:
+	tail call float @copysignf( float %i, float %j ) nounwind readnone 		; <float>:0 [#uses=1]
+	ret float %0
+}
+
+declare float @copysignf(float, float) nounwind readnone 
diff --git a/final/test/CodeGen/Mips/2008-08-01-AsmInline.ll b/final/test/CodeGen/Mips/2008-08-01-AsmInline.ll
new file mode 100644
index 00000000000..23ed64a96d8
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-08-01-AsmInline.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=mips -o %t
+; RUN: grep mfhi  %t | count 1
+; RUN: grep mflo  %t | count 1
+; RUN: grep multu %t | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+	%struct.DWstruct = type { i32, i32 }
+
+define i32 @A0(i32 %u, i32 %v) nounwind  {
+entry:
+	%asmtmp = tail call %struct.DWstruct asm "multu $2,$3", "={lo},={hi},d,d"( i32 %u, i32 %v ) nounwind
+	%asmresult = extractvalue %struct.DWstruct %asmtmp, 0
+	%asmresult1 = extractvalue %struct.DWstruct %asmtmp, 1		; <i32> [#uses=1]
+  %res = add i32 %asmresult, %asmresult1
+	ret i32 %res
+}
diff --git a/final/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll b/final/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
new file mode 100644
index 00000000000..c41d5213c17
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-08-03-ReturnDouble.ll
@@ -0,0 +1,18 @@
+; Double return in abicall (default)
+; RUN: llc < %s -march=mips
+; PR2615
+
+define double @main(...) {
+entry:
+        %retval = alloca double         ; <double*> [#uses=3]
+        store double 0.000000e+00, double* %retval
+        %r = alloca double              ; <double*> [#uses=1]
+        load double* %r         ; <double>:0 [#uses=1]
+        store double %0, double* %retval
+        br label %return
+
+return:         ; preds = %entry
+        load double* %retval            ; <double>:1 [#uses=1]
+        ret double %1
+}
+
diff --git a/final/test/CodeGen/Mips/2008-08-03-fabs64.ll b/final/test/CodeGen/Mips/2008-08-03-fabs64.ll
new file mode 100644
index 00000000000..0fc45f7d1b0
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-08-03-fabs64.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=mips -o %t
+; RUN: grep {lui.*32767} %t | count 1
+; RUN: grep {ori.*65535} %t | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define double @A(double %c, double %d) nounwind readnone  {
+entry:
+	tail call double @fabs( double %c ) nounwind readnone 		; <double>:0 [#uses=1]
+	tail call double @fabs( double %d ) nounwind readnone 		; <double>:0 [#uses=1]
+  fadd double %0, %1
+  ret double %2
+}
+
+declare double @fabs(double) nounwind readnone 
diff --git a/final/test/CodeGen/Mips/2008-08-04-Bitconvert.ll b/final/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
new file mode 100644
index 00000000000..f8eb0285597
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-08-04-Bitconvert.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=mips -o %t
+; RUN: grep mtc1 %t | count 1
+; RUN: grep mfc1 %t | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define float @A(i32 %u) nounwind  {
+entry:
+	bitcast i32 %u to float
+	ret float %0
+}
+
+define i32 @B(float %u) nounwind  {
+entry:
+	bitcast float %u to i32
+	ret i32 %0
+}
diff --git a/final/test/CodeGen/Mips/2008-08-06-Alloca.ll b/final/test/CodeGen/Mips/2008-08-06-Alloca.ll
new file mode 100644
index 00000000000..7be7974e0ff
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-08-06-Alloca.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=mips | grep {subu.*sp} | count 2
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define i32 @twoalloca(i32 %size) nounwind {
+entry:
+	alloca i8, i32 %size		; <i8*>:0 [#uses=1]
+	alloca i8, i32 %size		; <i8*>:1 [#uses=1]
+	call i32 @foo( i8* %0 ) nounwind		; <i32>:2 [#uses=1]
+	call i32 @foo( i8* %1 ) nounwind		; <i32>:3 [#uses=1]
+	add i32 %3, %2		; <i32>:4 [#uses=1]
+	ret i32 %4
+}
+
+declare i32 @foo(i8*)
diff --git a/final/test/CodeGen/Mips/2008-08-07-CC.ll b/final/test/CodeGen/Mips/2008-08-07-CC.ll
new file mode 100644
index 00000000000..63c25951423
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-08-07-CC.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=mips
+; Mips must ignore fastcc
+
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define internal fastcc i32 @A(i32 %u) nounwind  {
+entry:
+  ret i32 %u 
+}
+
diff --git a/final/test/CodeGen/Mips/2008-08-07-FPRound.ll b/final/test/CodeGen/Mips/2008-08-07-FPRound.ll
new file mode 100644
index 00000000000..67f86d74114
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-08-07-FPRound.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=mips | grep __truncdfsf2 | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define float @round2float(double %a) nounwind {
+entry:
+	fptrunc double %a to float		; <float>:0 [#uses=1]
+	ret float %0
+}
diff --git a/final/test/CodeGen/Mips/2008-08-08-bswap.ll b/final/test/CodeGen/Mips/2008-08-08-bswap.ll
new file mode 100644
index 00000000000..83289d97cfd
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-08-08-bswap.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s | grep wsbw | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "psp"
+
+define i32 @__bswapsi2(i32 %u) nounwind {
+entry:
+	tail call i32 @llvm.bswap.i32( i32 %u )		; <i32>:0 [#uses=1]
+	ret i32 %0
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
diff --git a/final/test/CodeGen/Mips/2008-08-08-ctlz.ll b/final/test/CodeGen/Mips/2008-08-08-ctlz.ll
new file mode 100644
index 00000000000..fb3332329d6
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-08-08-ctlz.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=mips | grep clz | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "mipsallegrexel-unknown-psp-elf"
+
+define i32 @A0(i32 %u) nounwind  {
+entry:
+	call i32 @llvm.ctlz.i32( i32 %u )
+  ret i32 %0
+}
+
+declare i32 @llvm.ctlz.i32(i32) nounwind readnone 
diff --git a/final/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll b/final/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
new file mode 100644
index 00000000000..18f5b3d7ff7
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-10-13-LegalizerBug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=mips
+; PR2794
+
+define i32 @main(i8*) nounwind {
+entry:
+        br label %continue.outer
+
+continue.outer:         ; preds = %case4, %entry
+        %p.0.ph.rec = phi i32 [ 0, %entry ], [ %indvar.next, %case4 ]          ; <i32> [#uses=2]
+        %p.0.ph = getelementptr i8* %0, i32 %p.0.ph.rec         ; <i8*> [#uses=1]
+        %1 = load i8* %p.0.ph           ; <i8> [#uses=1]
+        switch i8 %1, label %infloop [
+                i8 0, label %return.split
+                i8 76, label %case4
+                i8 108, label %case4
+                i8 104, label %case4
+                i8 42, label %case4
+        ]
+
+case4:          ; preds = %continue.outer, %continue.outer, %continue.outer, %continue.outer
+        %indvar.next = add i32 %p.0.ph.rec, 1           ; <i32> [#uses=1]
+        br label %continue.outer
+
+return.split:           ; preds = %continue.outer
+        ret i32 0
+
+infloop:                ; preds = %infloop, %continue.outer
+        br label %infloop
+}
diff --git a/final/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll b/final/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
new file mode 100644
index 00000000000..f5188434670
--- /dev/null
+++ b/final/test/CodeGen/Mips/2008-11-10-xint_to_fp.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s
+; PR2667
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "psp"
+	%struct._Bigint = type { %struct._Bigint*, i32, i32, i32, i32, [1 x i32] }
+	%struct.__FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*, i8*, i32)*, i32 (i8*, i8*, i32)*, i32 (i8*, i32, i32)*, i32 (i8*)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i32, %struct._reent*, i32 }
+	%struct.__sbuf = type { i8*, i32 }
+	%struct._atexit = type { %struct._atexit*, i32, [32 x void ()*], %struct._on_exit_args }
+	%struct._glue = type { %struct._glue*, i32, %struct.__FILE* }
+	%struct._on_exit_args = type { [32 x i8*], [32 x i8*], i32, i32 }
+	%struct._reent = type { i32, %struct.__FILE*, %struct.__FILE*, %struct.__FILE*, i32, [25 x i8], i32, i8*, i32, void (%struct._reent*)*, %struct._Bigint*, i32, %struct._Bigint*, %struct._Bigint**, i32, i8*, { { [30 x i8*], [30 x i32] } }, %struct._atexit*, %struct._atexit, void (i32)**, %struct._glue, [3 x %struct.__FILE] }
+@_impure_ptr = external global %struct._reent*		; <%struct._reent**> [#uses=1]
+
+define double @_erand48_r(%struct._reent* %r, i16* %xseed) nounwind {
+entry:
+	tail call void @__dorand48( %struct._reent* %r, i16* %xseed ) nounwind
+	load i16* %xseed, align 2		; <i16>:0 [#uses=1]
+	uitofp i16 %0 to double		; <double>:1 [#uses=1]
+	tail call double @ldexp( double %1, i32 -48 ) nounwind		; <double>:2 [#uses=1]
+	getelementptr i16* %xseed, i32 1		; <i16*>:3 [#uses=1]
+	load i16* %3, align 2		; <i16>:4 [#uses=1]
+	uitofp i16 %4 to double		; <double>:5 [#uses=1]
+	tail call double @ldexp( double %5, i32 -32 ) nounwind		; <double>:6 [#uses=1]
+	fadd double %2, %6		; <double>:7 [#uses=1]
+	getelementptr i16* %xseed, i32 2		; <i16*>:8 [#uses=1]
+	load i16* %8, align 2		; <i16>:9 [#uses=1]
+	uitofp i16 %9 to double		; <double>:10 [#uses=1]
+	tail call double @ldexp( double %10, i32 -16 ) nounwind		; <double>:11 [#uses=1]
+	fadd double %7, %11		; <double>:12 [#uses=1]
+	ret double %12
+}
+
+declare void @__dorand48(%struct._reent*, i16*)
+
+declare double @ldexp(double, i32)
+
+define double @erand48(i16* %xseed) nounwind {
+entry:
+	load %struct._reent** @_impure_ptr, align 4		; <%struct._reent*>:0 [#uses=1]
+	tail call void @__dorand48( %struct._reent* %0, i16* %xseed ) nounwind
+	load i16* %xseed, align 2		; <i16>:1 [#uses=1]
+	uitofp i16 %1 to double		; <double>:2 [#uses=1]
+	tail call double @ldexp( double %2, i32 -48 ) nounwind		; <double>:3 [#uses=1]
+	getelementptr i16* %xseed, i32 1		; <i16*>:4 [#uses=1]
+	load i16* %4, align 2		; <i16>:5 [#uses=1]
+	uitofp i16 %5 to double		; <double>:6 [#uses=1]
+	tail call double @ldexp( double %6, i32 -32 ) nounwind		; <double>:7 [#uses=1]
+	fadd double %3, %7		; <double>:8 [#uses=1]
+	getelementptr i16* %xseed, i32 2		; <i16*>:9 [#uses=1]
+	load i16* %9, align 2		; <i16>:10 [#uses=1]
+	uitofp i16 %10 to double		; <double>:11 [#uses=1]
+	tail call double @ldexp( double %11, i32 -16 ) nounwind		; <double>:12 [#uses=1]
+	fadd double %8, %12		; <double>:13 [#uses=1]
+	ret double %13
+}
diff --git a/final/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll b/final/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
new file mode 100644
index 00000000000..b8d68269af4
--- /dev/null
+++ b/final/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-n32"
+target triple = "mips-unknown-linux"
+
+define float @h() nounwind readnone {
+entry:
+; CHECK: lw $2, %got($CPI0_0)($gp)
+; CHECK: lwc1 $f0, %lo($CPI0_0)($2)
+  ret float 0x400B333340000000
+}
diff --git a/final/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll b/final/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 00000000000..994e19af4f8
--- /dev/null
+++ b/final/test/CodeGen/Mips/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=mips -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/final/test/CodeGen/Mips/2010-07-20-Select.ll b/final/test/CodeGen/Mips/2010-07-20-Select.ll
new file mode 100644
index 00000000000..891b5d9e188
--- /dev/null
+++ b/final/test/CodeGen/Mips/2010-07-20-Select.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s
+; Fix PR7473
+
+define i32 @main() nounwind readnone {
+entry:
+  %a = alloca i32, align 4                        ; <i32*> [#uses=2]
+  %c = alloca i32, align 4                        ; <i32*> [#uses=2]
+  volatile store i32 1, i32* %a, align 4
+  volatile store i32 0, i32* %c, align 4
+  %0 = volatile load i32* %a, align 4             ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+; CHECK: addiu $3, $zero, 0
+  %iftmp.0.0 = select i1 %1, i32 3, i32 0         ; <i32> [#uses=1]
+  %2 = volatile load i32* %c, align 4             ; <i32> [#uses=1]
+  %3 = icmp eq i32 %2, 0                          ; <i1> [#uses=1]
+; CHECK: addiu $3, $zero, 3
+; CHECK: addu $2, $5, $3
+  %iftmp.2.0 = select i1 %3, i32 0, i32 5         ; <i32> [#uses=1]
+  %4 = add nsw i32 %iftmp.2.0, %iftmp.0.0         ; <i32> [#uses=1]
+  ret i32 %4
+}
diff --git a/final/test/CodeGen/Mips/2010-07-20-Switch.ll b/final/test/CodeGen/Mips/2010-07-20-Switch.ll
new file mode 100644
index 00000000000..07fc10cae18
--- /dev/null
+++ b/final/test/CodeGen/Mips/2010-07-20-Switch.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s
+
+define i32 @main() nounwind readnone {
+entry:
+  %x = alloca i32, align 4                        ; <i32*> [#uses=2]
+  volatile store i32 2, i32* %x, align 4
+  %0 = volatile load i32* %x, align 4             ; <i32> [#uses=1]
+; CHECK: lui $3, %hi($JTI0_0)
+; CHECK: sll $2, $2, 2
+; CHECK: addiu $3, $3, %lo($JTI0_0)
+  switch i32 %0, label %bb4 [
+    i32 0, label %bb5
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+  ]
+
+bb1:                                              ; preds = %entry
+  ret i32 2
+
+; CHECK: $BB0_2
+bb2:                                              ; preds = %entry
+  ret i32 0
+
+bb3:                                              ; preds = %entry
+  ret i32 3
+
+bb4:                                              ; preds = %entry
+  ret i32 4
+
+bb5:                                              ; preds = %entry
+  ret i32 1
+}
diff --git a/final/test/CodeGen/Mips/2010-11-09-CountLeading.ll b/final/test/CodeGen/Mips/2010-11-09-CountLeading.ll
new file mode 100644
index 00000000000..d592fef331a
--- /dev/null
+++ b/final/test/CodeGen/Mips/2010-11-09-CountLeading.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+; CHECK: clz $2, $4
+define i32 @t1(i32 %X) nounwind readnone {
+entry:
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X)
+  ret i32 %tmp1
+}
+
+declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+
+; CHECK: clz $2, $4
+define i32 @t2(i32 %X) nounwind readnone {
+entry:
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %X)
+  ret i32 %tmp1
+}
+
+; CHECK: clo $2, $4
+define i32 @t3(i32 %X) nounwind readnone {
+entry:
+  %neg = xor i32 %X, -1
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg)
+  ret i32 %tmp1
+}
+
+; CHECK: clo $2, $4
+define i32 @t4(i32 %X) nounwind readnone {
+entry:
+  %neg = xor i32 %X, -1
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %neg)
+  ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/Mips/2010-11-09-Mul.ll b/final/test/CodeGen/Mips/2010-11-09-Mul.ll
new file mode 100644
index 00000000000..65a10b5836c
--- /dev/null
+++ b/final/test/CodeGen/Mips/2010-11-09-Mul.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+; CHECK: mul $2, $5, $4
+define i32 @mul1(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %mul = mul i32 %b, %a
+  ret i32 %mul
+}
+
+; CHECK: mul $2, $5, $4
+define i32 @mul2(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %mul = mul nsw i32 %b, %a
+  ret i32 %mul
+}
diff --git a/final/test/CodeGen/Mips/blockaddr.ll b/final/test/CodeGen/Mips/blockaddr.ll
new file mode 100644
index 00000000000..2b0631428c5
--- /dev/null
+++ b/final/test/CodeGen/Mips/blockaddr.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+@reg = common global i8* null, align 4
+
+define i8* @dummy(i8* %x) nounwind readnone noinline {
+entry:
+  ret i8* %x
+}
+
+; CHECK: lw  $2, %got($tmp1)($gp)
+; CHECK: addiu $4, $2, %lo($tmp1)
+; CHECK: lw  $2, %got($tmp2)($gp)
+; CHECK: addiu $2, $2, %lo($tmp2)
+define void @f() nounwind {
+entry:
+  %call = tail call i8* @dummy(i8* blockaddress(@f, %baz))
+  indirectbr i8* %call, [label %baz, label %foo]
+
+foo:                                              ; preds = %foo, %entry
+  store i8* blockaddress(@f, %foo), i8** @reg, align 4
+  br label %foo
+
+baz:                                              ; preds = %entry
+  store i8* null, i8** @reg, align 4
+  ret void
+}
diff --git a/final/test/CodeGen/Mips/cmov.ll b/final/test/CodeGen/Mips/cmov.ll
new file mode 100755
index 00000000000..7d3e0252e3c
--- /dev/null
+++ b/final/test/CodeGen/Mips/cmov.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+@i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4
+@i3 = common global i32* null, align 4
+
+; CHECK:  lw  $3, %got(i3)($gp)
+; CHECK:  addiu $5, $gp, %got(i1)
+define i32* @cmov1(i32 %s) nounwind readonly {
+entry:
+  %tobool = icmp ne i32 %s, 0
+  %tmp1 = load i32** @i3, align 4
+  %cond = select i1 %tobool, i32* getelementptr inbounds ([3 x i32]* @i1, i32 0, i32 0), i32* %tmp1
+  ret i32* %cond
+}
+
diff --git a/final/test/CodeGen/Mips/dg.exp b/final/test/CodeGen/Mips/dg.exp
new file mode 100644
index 00000000000..adb2cac9a6b
--- /dev/null
+++ b/final/test/CodeGen/Mips/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target Mips] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/Mips/divrem.ll b/final/test/CodeGen/Mips/divrem.ll
new file mode 100644
index 00000000000..398d1b78bd4
--- /dev/null
+++ b/final/test/CodeGen/Mips/divrem.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=mips < %s | FileCheck %s
+
+; CHECK: div $zero,
+define i32 @sdiv1(i32 %a0, i32 %a1) nounwind readnone {
+entry:
+  %div = sdiv i32 %a0, %a1
+  ret i32 %div
+}
+
+; CHECK: div $zero,
+define i32 @srem1(i32 %a0, i32 %a1) nounwind readnone {
+entry:
+  %rem = srem i32 %a0, %a1
+  ret i32 %rem
+}
+
+; CHECK: divu $zero,
+define i32 @udiv1(i32 %a0, i32 %a1) nounwind readnone {
+entry:
+  %div = udiv i32 %a0, %a1
+  ret i32 %div
+}
+
+; CHECK: divu $zero,
+define i32 @urem1(i32 %a0, i32 %a1) nounwind readnone {
+entry:
+  %rem = urem i32 %a0, %a1
+  ret i32 %rem
+}
+
+; CHECK: div $zero,
+define i32 @sdivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
+entry:
+  %rem = srem i32 %a0, %a1
+  store i32 %rem, i32* %r, align 4, !tbaa !0
+  %div = sdiv i32 %a0, %a1
+  ret i32 %div
+}
+
+; CHECK: divu $zero,
+define i32 @udivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind {
+entry:
+  %rem = urem i32 %a0, %a1
+  store i32 %rem, i32* %r, align 4, !tbaa !0
+  %div = udiv i32 %a0, %a1
+  ret i32 %div
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/final/test/CodeGen/Mips/largeimm1.ll b/final/test/CodeGen/Mips/largeimm1.ll
new file mode 100644
index 00000000000..d65cc025d08
--- /dev/null
+++ b/final/test/CodeGen/Mips/largeimm1.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=mipsel < %s | FileCheck %s
+
+; CHECK: lui $at, 49152
+; CHECK: lui $at, 16384
+define void @f() nounwind {
+entry:
+  %a1 = alloca [1073741824 x i8], align 1
+  %arrayidx = getelementptr inbounds [1073741824 x i8]* %a1, i32 0, i32 1048676
+  call void @f2(i8* %arrayidx) nounwind
+  ret void
+}
+
+declare void @f2(i8*)
diff --git a/final/test/CodeGen/Mips/madd-msub.ll b/final/test/CodeGen/Mips/madd-msub.ll
new file mode 100644
index 00000000000..4a205b1f3ff
--- /dev/null
+++ b/final/test/CodeGen/Mips/madd-msub.ll
@@ -0,0 +1,65 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+; CHECK: madd $5, $4
+define i64 @madd1(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv2, %conv
+  %conv4 = sext i32 %c to i64
+  %add = add nsw i64 %mul, %conv4
+  ret i64 %add
+}
+
+; CHECK: maddu $5, $4
+define i64 @madd2(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %conv = zext i32 %a to i64
+  %conv2 = zext i32 %b to i64
+  %mul = mul nsw i64 %conv2, %conv
+  %conv4 = zext i32 %c to i64
+  %add = add nsw i64 %mul, %conv4
+  ret i64 %add
+}
+
+; CHECK: madd $5, $4
+define i64 @madd3(i32 %a, i32 %b, i64 %c) nounwind readnone {
+entry:
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv2, %conv
+  %add = add nsw i64 %mul, %c
+  ret i64 %add
+}
+
+; CHECK: msub $5, $4
+define i64 @msub1(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %conv = sext i32 %c to i64
+  %conv2 = sext i32 %a to i64
+  %conv4 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv4, %conv2
+  %sub = sub nsw i64 %conv, %mul
+  ret i64 %sub
+}
+
+; CHECK: msubu $5, $4
+define i64 @msub2(i32 %a, i32 %b, i32 %c) nounwind readnone {
+entry:
+  %conv = zext i32 %c to i64
+  %conv2 = zext i32 %a to i64
+  %conv4 = zext i32 %b to i64
+  %mul = mul nsw i64 %conv4, %conv2
+  %sub = sub nsw i64 %conv, %mul
+  ret i64 %sub
+}
+
+; CHECK: msub $5, $4
+define i64 @msub3(i32 %a, i32 %b, i64 %c) nounwind readnone {
+entry:
+  %conv = sext i32 %a to i64
+  %conv3 = sext i32 %b to i64
+  %mul = mul nsw i64 %conv3, %conv
+  %sub = sub nsw i64 %c, %mul
+  ret i64 %sub
+}
diff --git a/final/test/CodeGen/Mips/o32_cc.ll b/final/test/CodeGen/Mips/o32_cc.ll
new file mode 100644
index 00000000000..b6df62be660
--- /dev/null
+++ b/final/test/CodeGen/Mips/o32_cc.ll
@@ -0,0 +1,325 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+; FIXME: Disabled because it unpredictably fails on certain platforms.
+; REQUIRES: disabled
+
+; $f12, $f14
+; CHECK: ldc1 $f12, %lo
+; CHECK: ldc1 $f14, %lo
+define void @testlowercall0() nounwind {
+entry:
+  tail call void @f0(double 5.000000e+00, double 6.000000e+00) nounwind
+  ret void
+}
+
+declare void @f0(double, double)
+
+; $f12, $f14
+; CHECK: lwc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+define void @testlowercall1() nounwind {
+entry:
+  tail call void @f1(float 8.000000e+00, float 9.000000e+00) nounwind
+  ret void
+}
+
+declare void @f1(float, float)
+
+; $f12, $f14
+; CHECK: lwc1 $f12, %lo
+; CHECK: ldc1 $f14, %lo
+define void @testlowercall2() nounwind {
+entry:
+  tail call void @f2(float 8.000000e+00, double 6.000000e+00) nounwind
+  ret void
+}
+
+declare void @f2(float, double)
+
+; $f12, $f14
+; CHECK: ldc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+define void @testlowercall3() nounwind {
+entry:
+  tail call void @f3(double 5.000000e+00, float 9.000000e+00) nounwind
+  ret void
+}
+
+declare void @f3(double, float)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 12
+; CHECK: addiu $5, $zero, 13
+; CHECK: addiu $6, $zero, 14
+; CHECK: addiu $7, $zero, 15
+define void @testlowercall4() nounwind {
+entry:
+  tail call void @f4(i32 12, i32 13, i32 14, i32 15) nounwind
+  ret void
+}
+
+declare void @f4(i32, i32, i32, i32)
+
+; $f12, $6, stack
+; CHECK: sw  $2, 16($sp)
+; CHECK: sw  $zero, 20($sp)
+; CHECK: ldc1 $f12, %lo
+; CHECK: addiu $6, $zero, 23
+define void @testlowercall5() nounwind {
+entry:
+  tail call void @f5(double 1.500000e+01, i32 23, double 1.700000e+01) nounwind
+  ret void
+}
+
+declare void @f5(double, i32, double)
+
+; $f12, $6, $7
+; CHECK: ldc1 $f12, %lo
+; CHECK: addiu $6, $zero, 33
+; CHECK: addiu $7, $zero, 24
+define void @testlowercall6() nounwind {
+entry:
+  tail call void @f6(double 2.500000e+01, i32 33, i32 24) nounwind
+  ret void
+}
+
+declare void @f6(double, i32, i32)
+
+; $f12, $5, $6
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 43
+; CHECK: addiu $6, $zero, 34
+define void @testlowercall7() nounwind {
+entry:
+  tail call void @f7(float 1.800000e+01, i32 43, i32 34) nounwind
+  ret void
+}
+
+declare void @f7(float, i32, i32)
+
+; $4, $5, $6, stack
+; CHECK: sw  $2, 16($sp)
+; CHECK: sw  $zero, 20($sp)
+; CHECK: addiu $4, $zero, 22
+; CHECK: addiu $5, $zero, 53
+; CHECK: addiu $6, $zero, 44
+define void @testlowercall8() nounwind {
+entry:
+  tail call void @f8(i32 22, i32 53, i32 44, double 4.000000e+00) nounwind
+  ret void
+}
+
+declare void @f8(i32, i32, i32, double)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 32
+; CHECK: addiu $5, $zero, 63
+; CHECK: addiu $6, $zero, 54
+; CHECK: ori $7, $2, 0
+define void @testlowercall9() nounwind {
+entry:
+  tail call void @f9(i32 32, i32 63, i32 54, float 1.100000e+01) nounwind
+  ret void
+}
+
+declare void @f9(i32, i32, i32, float)
+
+; $4, $5, ($6, $7)
+; CHECK: addiu $4, $zero, 42
+; CHECK: addiu $5, $zero, 73
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7, $2, 0
+define void @testlowercall10() nounwind {
+entry:
+  tail call void @f10(i32 42, i32 73, double 2.700000e+01) nounwind
+  ret void
+}
+
+declare void @f10(i32, i32, double)
+
+; $4, ($6, $7)
+; CHECK: addiu $4, $zero, 52
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7, $2, 0
+define void @testlowercall11() nounwind {
+entry:
+  tail call void @f11(i32 52, double 1.600000e+01) nounwind
+  ret void
+}
+
+declare void @f11(i32, double)
+
+; $f12, $f14, $6, $7
+; CHECK: lwc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+; CHECK: ori $6, $4, 0
+; CHECK: ori $7, $5, 0
+define void @testlowercall12() nounwind {
+entry:
+  tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind
+  ret void
+}
+
+declare void @f12(float, float, float, float)
+
+; $f12, $5, $6, $7
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 83
+; CHECK: ori $6, $3, 0
+; CHECK: addiu $7, $zero, 25
+define void @testlowercall13() nounwind {
+entry:
+  tail call void @f13(float 3.800000e+01, i32 83, float 2.000000e+01, i32 25) nounwind
+  ret void
+}
+
+
+declare void @f13(float, i32, float, i32)
+
+; $f12, $f14, $7
+; CHECK: ldc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+; CHECK: ori $7, $4, 0
+define void @testlowercall14() nounwind {
+entry:
+  tail call void @f14(double 3.500000e+01, float 2.900000e+01, float 3.000000e+01) nounwind
+  ret void
+}
+
+declare void @f14(double, float, float)
+
+; $f12, $f14, ($6, $7)
+; CHECK: lwc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7, $4, 32768
+define void @testlowercall15() nounwind {
+entry:
+  tail call void @f15(float 4.800000e+01, float 3.900000e+01, double 3.700000e+01) nounwind
+  ret void
+}
+
+declare void @f15(float, float, double)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 62
+; CHECK: ori $5, $2, 0
+; CHECK: addiu $6, $zero, 64
+; CHECK: ori $7, $3, 0
+define void @testlowercall16() nounwind {
+entry:
+  tail call void @f16(i32 62, float 4.900000e+01, i32 64, float 3.100000e+01) nounwind
+  ret void
+}
+
+declare void @f16(i32, float, i32, float)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 72
+; CHECK: ori $5, $2, 0
+; CHECK: addiu $6, $zero, 74
+; CHECK: addiu $7, $zero, 35
+define void @testlowercall17() nounwind {
+entry:
+  tail call void @f17(i32 72, float 5.900000e+01, i32 74, i32 35) nounwind
+  ret void
+}
+
+declare void @f17(i32, float, i32, i32)
+
+; $4, $5, $6, $7
+; CHECK: addiu $4, $zero, 82
+; CHECK: addiu $5, $zero, 93
+; CHECK: ori $6, $2, 0
+; CHECK: addiu $7, $zero, 45
+define void @testlowercall18() nounwind {
+entry:
+  tail call void @f18(i32 82, i32 93, float 4.000000e+01, i32 45) nounwind
+  ret void
+}
+
+declare void @f18(i32, i32, float, i32)
+
+
+; $4, ($6, $7), stack
+; CHECK: sw  $2, 16($sp)
+; CHECK: sw  $zero, 20($sp)
+; CHECK: addiu $4, $zero, 92
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7, $3, 0
+define void @testlowercall20() nounwind {
+entry:
+  tail call void @f20(i32 92, double 2.600000e+01, double 4.700000e+01) nounwind
+  ret void
+}
+
+declare void @f20(i32, double, double)
+
+; $f12, $5
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 103
+define void @testlowercall21() nounwind {
+entry:
+  tail call void @f21(float 5.800000e+01, i32 103) nounwind
+  ret void
+}
+
+declare void @f21(float, i32)
+
+; $f12, $5, ($6, $7)
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 113
+; CHECK: addiu $6, $zero, 0
+; CHECK: ori $7, $3, 32768
+define void @testlowercall22() nounwind {
+entry:
+  tail call void @f22(float 6.800000e+01, i32 113, double 5.700000e+01) nounwind
+  ret void
+}
+
+declare void @f22(float, i32, double)
+
+; $f12, f6
+; CHECK: ldc1 $f12, %lo
+; CHECK: addiu $6, $zero, 123
+define void @testlowercall23() nounwind {
+entry:
+  tail call void @f23(double 4.500000e+01, i32 123) nounwind
+  ret void
+}
+
+declare void @f23(double, i32)
+
+; $f12,$6, stack
+; CHECK: sw  $2, 16($sp)
+; CHECK: sw  $zero, 20($sp)
+; CHECK: ldc1 $f12, %lo
+; CHECK: addiu $6, $zero, 133
+define void @testlowercall24() nounwind {
+entry:
+  tail call void @f24(double 5.500000e+01, i32 133, double 6.700000e+01) nounwind
+  ret void
+}
+
+declare void @f24(double, i32, double)
+
+; CHECK: lwc1 $f12, %lo
+; lwc1 $f12, %lo
+; CHECK: lwc1 $f14, %lo
+; CHECK: ori $6, $4, 0
+; CHECK: ori $7, $5, 0
+; CHECK: lwc1 $f12, %lo
+; CHECK: addiu $5, $zero, 83
+; CHECK: ori $6, $3, 0
+; CHECK: addiu $7, $zero, 25
+; CHECK: addiu $4, $zero, 82
+; CHECK: addiu $5, $zero, 93
+; CHECK: ori $6, $2, 0
+; CHECK: addiu $7, $zero, 45
+define void @testlowercall25() nounwind {
+entry:
+  tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind
+  tail call void @f13(float 3.800000e+01, i32 83, float 2.000000e+01, i32 25) nounwind
+  tail call void @f18(i32 82, i32 93, float 4.000000e+01, i32 45) nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/Mips/private.ll b/final/test/CodeGen/Mips/private.ll
new file mode 100644
index 00000000000..34b75477b68
--- /dev/null
+++ b/final/test/CodeGen/Mips/private.ll
@@ -0,0 +1,21 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s -march=mips > %t
+; RUN: grep \\\$foo: %t
+; RUN: grep call.*\\\$foo %t
+; RUN: grep \\\$baz: %t
+; RUN: grep lw.*\\\$baz %t
+
+declare void @foo()
+
+define private void @foo() {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}
diff --git a/final/test/CodeGen/Mips/rotate.ll b/final/test/CodeGen/Mips/rotate.ll
new file mode 100644
index 00000000000..e7dc3093214
--- /dev/null
+++ b/final/test/CodeGen/Mips/rotate.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s
+
+; CHECK:  rotrv $2, $4, $2
+define i32 @rot0(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %shl = shl i32 %a, %b
+  %sub = sub i32 32, %b
+  %shr = lshr i32 %a, %sub
+  %or = or i32 %shr, %shl
+  ret i32 %or
+}
+
+; CHECK:  rotr  $2, $4, 22
+define i32 @rot1(i32 %a) nounwind readnone {
+entry:
+  %shl = shl i32 %a, 10
+  %shr = lshr i32 %a, 22
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+; CHECK:  rotrv $2, $4, $5
+define i32 @rot2(i32 %a, i32 %b) nounwind readnone {
+entry:
+  %shr = lshr i32 %a, %b
+  %sub = sub i32 32, %b
+  %shl = shl i32 %a, %sub
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+; CHECK:  rotr  $2, $4, 10
+define i32 @rot3(i32 %a) nounwind readnone {
+entry:
+  %shr = lshr i32 %a, 10
+  %shl = shl i32 %a, 22
+  %or = or i32 %shr, %shl
+  ret i32 %or
+}
+
diff --git a/final/test/CodeGen/PTX/add.ll b/final/test/CodeGen/PTX/add.ll
new file mode 100644
index 00000000000..598591c0fcb
--- /dev/null
+++ b/final/test/CodeGen/PTX/add.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
+; CHECK: add.u16 rh0, rh1, rh2;
+; CHECK-NEXT: ret;
+	%z = add i16 %x, %y
+	ret i16 %z
+}
+
+define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
+; CHECK: add.u32 r0, r1, r2;
+; CHECK-NEXT: ret;
+	%z = add i32 %x, %y
+	ret i32 %z
+}
+
+define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
+; CHECK: add.u64 rd0, rd1, rd2;
+; CHECK-NEXT: ret;
+	%z = add i64 %x, %y
+	ret i64 %z
+}
+
+define ptx_device float @t1_f32(float %x, float %y) {
+; CHECK: add.f32 f0, f1, f2
+; CHECK-NEXT: ret;
+  %z = fadd float %x, %y
+  ret float %z
+}
+
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: add.f64 fd0, fd1, fd2
+; CHECK-NEXT: ret;
+  %z = fadd double %x, %y
+  ret double %z
+}
+
+define ptx_device i16 @t2_u16(i16 %x) {
+; CHECK: add.u16 rh0, rh1, 1;
+; CHECK-NEXT: ret;
+	%z = add i16 %x, 1
+	ret i16 %z
+}
+
+define ptx_device i32 @t2_u32(i32 %x) {
+; CHECK: add.u32 r0, r1, 1;
+; CHECK-NEXT: ret;
+	%z = add i32 %x, 1
+	ret i32 %z
+}
+
+define ptx_device i64 @t2_u64(i64 %x) {
+; CHECK: add.u64 rd0, rd1, 1;
+; CHECK-NEXT: ret;
+	%z = add i64 %x, 1
+	ret i64 %z
+}
+
+define ptx_device float @t2_f32(float %x) {
+; CHECK: add.f32 f0, f1, 0F3F800000;
+; CHECK-NEXT: ret;
+  %z = fadd float %x, 1.0
+  ret float %z
+}
+
+define ptx_device double @t2_f64(double %x) {
+; CHECK: add.f64 fd0, fd1, 0D3FF0000000000000;
+; CHECK-NEXT: ret;
+  %z = fadd double %x, 1.0
+  ret double %z
+}
diff --git a/final/test/CodeGen/PTX/dg.exp b/final/test/CodeGen/PTX/dg.exp
new file mode 100644
index 00000000000..2c304b57741
--- /dev/null
+++ b/final/test/CodeGen/PTX/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target PTX] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/PTX/exit.ll b/final/test/CodeGen/PTX/exit.ll
new file mode 100644
index 00000000000..4071babb80c
--- /dev/null
+++ b/final/test/CodeGen/PTX/exit.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_kernel void @t1() {
+; CHECK: exit;
+; CHECK-NOT: ret;
+  ret void
+}
+
+define ptx_kernel void @t2(i32* %p, i32 %x) {
+  store i32 %x, i32* %p
+; CHECK: exit;
+; CHECK-NOT: ret;
+  ret void
+}
diff --git a/final/test/CodeGen/PTX/intrinsic.ll b/final/test/CodeGen/PTX/intrinsic.ll
new file mode 100644
index 00000000000..24c328b7736
--- /dev/null
+++ b/final/test/CodeGen/PTX/intrinsic.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device i16 @tid_x() {
+; CHECK: mov.u16 rh0, tid.x;
+; CHECK-NEXT: ret;
+	%x = call i16 @llvm.ptx.read.tid.x()
+	ret i16 %x
+}
+
+define ptx_device i16 @tid_y() {
+; CHECK: mov.u16 rh0, tid.y;
+; CHECK-NEXT: ret;
+	%x = call i16 @llvm.ptx.read.tid.y()
+	ret i16 %x
+}
+
+define ptx_device i16 @tid_z() {
+; CHECK: mov.u16 rh0, tid.z;
+; CHECK-NEXT: ret;
+	%x = call i16 @llvm.ptx.read.tid.z()
+	ret i16 %x
+}
+
+define ptx_device i16 @tid_w() {
+; CHECK: mov.u16 rh0, tid.w;
+; CHECK-NEXT: ret;
+	%x = call i16 @llvm.ptx.read.tid.w()
+	ret i16 %x
+}
+
+define ptx_device void @bar_sync() {
+; CHECK: bar.sync 0
+; CHECK-NEXT: ret;
+	call void @llvm.ptx.bar.sync(i32 0)
+	ret void
+}
+
+declare i16 @llvm.ptx.read.tid.x()
+declare i16 @llvm.ptx.read.tid.y()
+declare i16 @llvm.ptx.read.tid.z()
+declare i16 @llvm.ptx.read.tid.w()
+
+declare void @llvm.ptx.bar.sync(i32 %i)
diff --git a/final/test/CodeGen/PTX/ld.ll b/final/test/CodeGen/PTX/ld.ll
new file mode 100644
index 00000000000..e7cc92e3c99
--- /dev/null
+++ b/final/test/CodeGen/PTX/ld.ll
@@ -0,0 +1,422 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+;CHECK: .extern .global .u16 array_i16[];
+@array_i16 = external global [10 x i16]
+
+;CHECK: .extern .const .u16 array_constant_i16[];
+@array_constant_i16 = external addrspace(1) constant [10 x i16]
+
+;CHECK: .extern .local .u16 array_local_i16[];
+@array_local_i16 = external addrspace(2) global [10 x i16]
+
+;CHECK: .extern .shared .u16 array_shared_i16[];
+@array_shared_i16 = external addrspace(4) global [10 x i16]
+
+;CHECK: .extern .global .u32 array_i32[];
+@array_i32 = external global [10 x i32]
+
+;CHECK: .extern .const .u32 array_constant_i32[];
+@array_constant_i32 = external addrspace(1) constant [10 x i32]
+
+;CHECK: .extern .local .u32 array_local_i32[];
+@array_local_i32 = external addrspace(2) global [10 x i32]
+
+;CHECK: .extern .shared .u32 array_shared_i32[];
+@array_shared_i32 = external addrspace(4) global [10 x i32]
+
+;CHECK: .extern .global .u64 array_i64[];
+@array_i64 = external global [10 x i64]
+
+;CHECK: .extern .const .u64 array_constant_i64[];
+@array_constant_i64 = external addrspace(1) constant [10 x i64]
+
+;CHECK: .extern .local .u64 array_local_i64[];
+@array_local_i64 = external addrspace(2) global [10 x i64]
+
+;CHECK: .extern .shared .u64 array_shared_i64[];
+@array_shared_i64 = external addrspace(4) global [10 x i64]
+
+;CHECK: .extern .global .f32 array_float[];
+@array_float = external global [10 x float]
+
+;CHECK: .extern .const .f32 array_constant_float[];
+@array_constant_float = external addrspace(1) constant [10 x float]
+
+;CHECK: .extern .local .f32 array_local_float[];
+@array_local_float = external addrspace(2) global [10 x float]
+
+;CHECK: .extern .shared .f32 array_shared_float[];
+@array_shared_float = external addrspace(4) global [10 x float]
+
+;CHECK: .extern .global .f64 array_double[];
+@array_double = external global [10 x double]
+
+;CHECK: .extern .const .f64 array_constant_double[];
+@array_constant_double = external addrspace(1) constant [10 x double]
+
+;CHECK: .extern .local .f64 array_local_double[];
+@array_local_double = external addrspace(2) global [10 x double]
+
+;CHECK: .extern .shared .f64 array_shared_double[];
+@array_shared_double = external addrspace(4) global [10 x double]
+
+
+define ptx_device i16 @t1_u16(i16* %p) {
+entry:
+;CHECK: ld.global.u16 rh0, [r1];
+;CHECK-NEXT; ret;
+  %x = load i16* %p
+  ret i16 %x
+}
+
+define ptx_device i32 @t1_u32(i32* %p) {
+entry:
+;CHECK: ld.global.u32 r0, [r1];
+;CHECK-NEXT: ret;
+  %x = load i32* %p
+  ret i32 %x
+}
+
+define ptx_device i64 @t1_u64(i64* %p) {
+entry:
+;CHECK: ld.global.u64 rd0, [r1];
+;CHECK-NEXT: ret;
+  %x = load i64* %p
+  ret i64 %x
+}
+
+define ptx_device float @t1_f32(float* %p) {
+entry:
+;CHECK: ld.global.f32 f0, [r1];
+;CHECK-NEXT: ret;
+  %x = load float* %p
+  ret float %x
+}
+
+define ptx_device double @t1_f64(double* %p) {
+entry:
+;CHECK: ld.global.f64 fd0, [r1];
+;CHECK-NEXT: ret;
+  %x = load double* %p
+  ret double %x
+}
+
+define ptx_device i16 @t2_u16(i16* %p) {
+entry:
+;CHECK: ld.global.u16 rh0, [r1+2];
+;CHECK-NEXT: ret;
+  %i = getelementptr i16* %p, i32 1
+  %x = load i16* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t2_u32(i32* %p) {
+entry:
+;CHECK: ld.global.u32 r0, [r1+4];
+;CHECK-NEXT: ret;
+  %i = getelementptr i32* %p, i32 1
+  %x = load i32* %i
+  ret i32 %x
+}
+
+define ptx_device i64 @t2_u64(i64* %p) {
+entry:
+;CHECK: ld.global.u64 rd0, [r1+8];
+;CHECK-NEXT: ret;
+  %i = getelementptr i64* %p, i32 1
+  %x = load i64* %i
+  ret i64 %x
+}
+
+define ptx_device float @t2_f32(float* %p) {
+entry:
+;CHECK: ld.global.f32 f0, [r1+4];
+;CHECK-NEXT: ret;
+  %i = getelementptr float* %p, i32 1
+  %x = load float* %i
+  ret float %x
+}
+
+define ptx_device double @t2_f64(double* %p) {
+entry:
+;CHECK: ld.global.f64 fd0, [r1+8];
+;CHECK-NEXT: ret;
+  %i = getelementptr double* %p, i32 1
+  %x = load double* %i
+  ret double %x
+}
+
+define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 r0, r2, 1;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: ld.global.u16 rh0, [r0];
+  %i = getelementptr i16* %p, i32 %q
+  %x = load i16* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t3_u32(i32* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 r0, r2, 2;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: ld.global.u32 r0, [r0];
+  %i = getelementptr i32* %p, i32 %q
+  %x = load i32* %i
+  ret i32 %x
+}
+
+define ptx_device i64 @t3_u64(i64* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 r0, r2, 3;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: ld.global.u64 rd0, [r0];
+  %i = getelementptr i64* %p, i32 %q
+  %x = load i64* %i
+  ret i64 %x
+}
+
+define ptx_device float @t3_f32(float* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 r0, r2, 2;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: ld.global.f32 f0, [r0];
+  %i = getelementptr float* %p, i32 %q
+  %x = load float* %i
+  ret float %x
+}
+
+define ptx_device double @t3_f64(double* %p, i32 %q) {
+entry:
+;CHECK: shl.b32 r0, r2, 3;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: ld.global.f64 fd0, [r0];
+  %i = getelementptr double* %p, i32 %q
+  %x = load double* %i
+  ret double %x
+}
+
+define ptx_device i16 @t4_global_u16() {
+entry:
+;CHECK: ld.global.u16 rh0, [array_i16];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0
+  %x = load i16* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t4_global_u32() {
+entry:
+;CHECK: ld.global.u32 r0, [array_i32];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
+  %x = load i32* %i
+  ret i32 %x
+}
+
+define ptx_device i64 @t4_global_u64() {
+entry:
+;CHECK: ld.global.u64 rd0, [array_i64];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
+  %x = load i64* %i
+  ret i64 %x
+}
+
+define ptx_device float @t4_global_f32() {
+entry:
+;CHECK: ld.global.f32 f0, [array_float];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float]* @array_float, i32 0, i32 0
+  %x = load float* %i
+  ret float %x
+}
+
+define ptx_device double @t4_global_f64() {
+entry:
+;CHECK: ld.global.f64 fd0, [array_double];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double]* @array_double, i32 0, i32 0
+  %x = load double* %i
+  ret double %x
+}
+
+define ptx_device i16 @t4_const_u16() {
+entry:
+;CHECK: ld.const.u16 rh0, [array_constant_i16];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0
+  %x = load i16 addrspace(1)* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t4_const_u32() {
+entry:
+;CHECK: ld.const.u32 r0, [array_constant_i32];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0
+  %x = load i32 addrspace(1)* %i
+  ret i32 %x
+}
+
+define ptx_device i64 @t4_const_u64() {
+entry:
+;CHECK: ld.const.u64 rd0, [array_constant_i64];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0
+  %x = load i64 addrspace(1)* %i
+  ret i64 %x
+}
+
+define ptx_device float @t4_const_f32() {
+entry:
+;CHECK: ld.const.f32 f0, [array_constant_float];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0
+  %x = load float addrspace(1)* %i
+  ret float %x
+}
+
+define ptx_device double @t4_const_f64() {
+entry:
+;CHECK: ld.const.f64 fd0, [array_constant_double];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0
+  %x = load double addrspace(1)* %i
+  ret double %x
+}
+
+define ptx_device i16 @t4_local_u16() {
+entry:
+;CHECK: ld.local.u16 rh0, [array_local_i16];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
+  %x = load i16 addrspace(2)* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t4_local_u32() {
+entry:
+;CHECK: ld.local.u32 r0, [array_local_i32];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
+  %x = load i32 addrspace(2)* %i
+  ret i32 %x
+}
+
+define ptx_device i64 @t4_local_u64() {
+entry:
+;CHECK: ld.local.u64 rd0, [array_local_i64];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
+  %x = load i64 addrspace(2)* %i
+  ret i64 %x
+}
+
+define ptx_device float @t4_local_f32() {
+entry:
+;CHECK: ld.local.f32 f0, [array_local_float];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
+  %x = load float addrspace(2)* %i
+  ret float %x
+}
+
+define ptx_device double @t4_local_f64() {
+entry:
+;CHECK: ld.local.f64 fd0, [array_local_double];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
+  %x = load double addrspace(2)* %i
+  ret double %x
+}
+
+define ptx_device i16 @t4_shared_u16() {
+entry:
+;CHECK: ld.shared.u16 rh0, [array_shared_i16];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
+  %x = load i16 addrspace(4)* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t4_shared_u32() {
+entry:
+;CHECK: ld.shared.u32 r0, [array_shared_i32];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
+  %x = load i32 addrspace(4)* %i
+  ret i32 %x
+}
+
+define ptx_device i64 @t4_shared_u64() {
+entry:
+;CHECK: ld.shared.u64 rd0, [array_shared_i64];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
+  %x = load i64 addrspace(4)* %i
+  ret i64 %x
+}
+
+define ptx_device float @t4_shared_f32() {
+entry:
+;CHECK: ld.shared.f32 f0, [array_shared_float];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
+  %x = load float addrspace(4)* %i
+  ret float %x
+}
+
+define ptx_device double @t4_shared_f64() {
+entry:
+;CHECK: ld.shared.f64 fd0, [array_shared_double];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
+  %x = load double addrspace(4)* %i
+  ret double %x
+}
+
+define ptx_device i16 @t5_u16() {
+entry:
+;CHECK: ld.global.u16 rh0, [array_i16+2];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
+  %x = load i16* %i
+  ret i16 %x
+}
+
+define ptx_device i32 @t5_u32() {
+entry:
+;CHECK: ld.global.u32 r0, [array_i32+4];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
+  %x = load i32* %i
+  ret i32 %x
+}
+
+define ptx_device i64 @t5_u64() {
+entry:
+;CHECK: ld.global.u64 rd0, [array_i64+8];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
+  %x = load i64* %i
+  ret i64 %x
+}
+
+define ptx_device float @t5_f32() {
+entry:
+;CHECK: ld.global.f32 f0, [array_float+4];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float]* @array_float, i32 0, i32 1
+  %x = load float* %i
+  ret float %x
+}
+
+define ptx_device double @t5_f64() {
+entry:
+;CHECK: ld.global.f64 fd0, [array_double+8];
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double]* @array_double, i32 0, i32 1
+  %x = load double* %i
+  ret double %x
+}
diff --git a/final/test/CodeGen/PTX/mov.ll b/final/test/CodeGen/PTX/mov.ll
new file mode 100644
index 00000000000..00dcf19f1da
--- /dev/null
+++ b/final/test/CodeGen/PTX/mov.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device i16 @t1_u16() {
+; CHECK: mov.u16 rh0, 0;
+; CHECK: ret;
+	ret i16 0
+}
+
+define ptx_device i32 @t1_u32() {
+; CHECK: mov.u32 r0, 0;
+; CHECK: ret;
+	ret i32 0
+}
+
+define ptx_device i64 @t1_u64() {
+; CHECK: mov.u64 rd0, 0;
+; CHECK: ret;
+	ret i64 0
+}
+
+define ptx_device float @t1_f32() {
+; CHECK: mov.f32 f0, 0F00000000;
+; CHECK: ret;
+	ret float 0.0
+}
+
+define ptx_device double @t1_f64() {
+; CHECK: mov.f64 fd0, 0D0000000000000000;
+; CHECK: ret;
+	ret double 0.0
+}
+
+define ptx_device i16 @t2_u16(i16 %x) {
+; CHECK: mov.u16 rh0, rh1;
+; CHECK: ret;
+	ret i16 %x
+}
+
+define ptx_device i32 @t2_u32(i32 %x) {
+; CHECK: mov.u32 r0, r1;
+; CHECK: ret;
+	ret i32 %x
+}
+
+define ptx_device i64 @t2_u64(i64 %x) {
+; CHECK: mov.u64 rd0, rd1;
+; CHECK: ret;
+	ret i64 %x
+}
+
+define ptx_device float @t3_f32(float %x) {
+; CHECK: mov.f32 f0, f1;
+; CHECK-NEXT: ret;
+	ret float %x
+}
+
+define ptx_device double @t3_f64(double %x) {
+; CHECK: mov.f64 fd0, fd1;
+; CHECK-NEXT: ret;
+	ret double %x
+}
+
diff --git a/final/test/CodeGen/PTX/mul.ll b/final/test/CodeGen/PTX/mul.ll
new file mode 100644
index 00000000000..fd0788fce66
--- /dev/null
+++ b/final/test/CodeGen/PTX/mul.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+;define ptx_device i32 @t1(i32 %x, i32 %y) {
+;	%z = mul i32 %x, %y
+;	ret i32 %z
+;}
+
+;define ptx_device i32 @t2(i32 %x) {
+;	%z = mul i32 %x, 1
+;	ret i32 %z
+;}
+
+define ptx_device float @t1_f32(float %x, float %y) {
+; CHECK: mul.f32 f0, f1, f2
+; CHECK-NEXT: ret;
+  %z = fmul float %x, %y
+  ret float %z
+}
+
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: mul.f64 fd0, fd1, fd2
+; CHECK-NEXT: ret;
+  %z = fmul double %x, %y
+  ret double %z
+}
+
+define ptx_device float @t2_f32(float %x) {
+; CHECK: mul.f32 f0, f1, 0F40A00000;
+; CHECK-NEXT: ret;
+  %z = fmul float %x, 5.0
+  ret float %z
+}
+
+define ptx_device double @t2_f64(double %x) {
+; CHECK: mul.f64 fd0, fd1, 0D4014000000000000;
+; CHECK-NEXT: ret;
+  %z = fmul double %x, 5.0
+  ret double %z
+}
diff --git a/final/test/CodeGen/PTX/options.ll b/final/test/CodeGen/PTX/options.ll
new file mode 100644
index 00000000000..1435537e007
--- /dev/null
+++ b/final/test/CodeGen/PTX/options.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ptx -mattr=ptx14 | grep ".version 1.4"
+; RUN: llc < %s -march=ptx -mattr=ptx20 | grep ".version 2.0"
+; RUN: llc < %s -march=ptx -mattr=ptx21 | grep ".version 2.1"
+; RUN: llc < %s -march=ptx -mattr=sm20 | grep ".target sm_20"
+; RUN: llc < %s -march=ptx -mattr=sm13 | grep ".target sm_13"
+
+define ptx_device void @t1() {
+	ret void
+}
diff --git a/final/test/CodeGen/PTX/ret.ll b/final/test/CodeGen/PTX/ret.ll
new file mode 100644
index 00000000000..d5037f25fd3
--- /dev/null
+++ b/final/test/CodeGen/PTX/ret.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device void @t1() {
+; CHECK: ret;
+; CHECK-NOT: exit;
+	ret void
+}
diff --git a/final/test/CodeGen/PTX/shl.ll b/final/test/CodeGen/PTX/shl.ll
new file mode 100644
index 00000000000..b564b43ab93
--- /dev/null
+++ b/final/test/CodeGen/PTX/shl.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device i32 @t1(i32 %x, i32 %y) {
+; CHECK: shl.b32 r0, r1, r2
+	%z = shl i32 %x, %y
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t2(i32 %x) {
+; CHECK: shl.b32 r0, r1, 3
+	%z = shl i32 %x, 3
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t3(i32 %x) {
+; CHECK: shl.b32 r0, 3, r1
+	%z = shl i32 3, %x
+; CHECK: ret;
+	ret i32 %z
+}
diff --git a/final/test/CodeGen/PTX/shr.ll b/final/test/CodeGen/PTX/shr.ll
new file mode 100644
index 00000000000..3f8ade862b7
--- /dev/null
+++ b/final/test/CodeGen/PTX/shr.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device i32 @t1(i32 %x, i32 %y) {
+; CHECK: shr.u32 r0, r1, r2
+	%z = lshr i32 %x, %y
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t2(i32 %x) {
+; CHECK: shr.u32 r0, r1, 3
+	%z = lshr i32 %x, 3
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t3(i32 %x) {
+; CHECK: shr.u32 r0, 3, r1
+	%z = lshr i32 3, %x
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t4(i32 %x, i32 %y) {
+; CHECK: shr.s32 r0, r1, r2
+	%z = ashr i32 %x, %y
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t5(i32 %x) {
+; CHECK: shr.s32 r0, r1, 3
+	%z = ashr i32 %x, 3
+; CHECK: ret;
+	ret i32 %z
+}
+
+define ptx_device i32 @t6(i32 %x) {
+; CHECK: shr.s32 r0, -3, r1
+	%z = ashr i32 -3, %x
+; CHECK: ret;
+	ret i32 %z
+}
diff --git a/final/test/CodeGen/PTX/st.ll b/final/test/CodeGen/PTX/st.ll
new file mode 100644
index 00000000000..bbe89a10648
--- /dev/null
+++ b/final/test/CodeGen/PTX/st.ll
@@ -0,0 +1,382 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+;CHECK: .extern .global .u16 array_i16[];
+@array_i16 = external global [10 x i16]
+
+;CHECK: .extern .const .u16 array_constant_i16[];
+@array_constant_i16 = external addrspace(1) constant [10 x i16]
+
+;CHECK: .extern .local .u16 array_local_i16[];
+@array_local_i16 = external addrspace(2) global [10 x i16]
+
+;CHECK: .extern .shared .u16 array_shared_i16[];
+@array_shared_i16 = external addrspace(4) global [10 x i16]
+
+;CHECK: .extern .global .u32 array_i32[];
+@array_i32 = external global [10 x i32]
+
+;CHECK: .extern .const .u32 array_constant_i32[];
+@array_constant_i32 = external addrspace(1) constant [10 x i32]
+
+;CHECK: .extern .local .u32 array_local_i32[];
+@array_local_i32 = external addrspace(2) global [10 x i32]
+
+;CHECK: .extern .shared .u32 array_shared_i32[];
+@array_shared_i32 = external addrspace(4) global [10 x i32]
+
+;CHECK: .extern .global .u64 array_i64[];
+@array_i64 = external global [10 x i64]
+
+;CHECK: .extern .const .u64 array_constant_i64[];
+@array_constant_i64 = external addrspace(1) constant [10 x i64]
+
+;CHECK: .extern .local .u64 array_local_i64[];
+@array_local_i64 = external addrspace(2) global [10 x i64]
+
+;CHECK: .extern .shared .u64 array_shared_i64[];
+@array_shared_i64 = external addrspace(4) global [10 x i64]
+
+;CHECK: .extern .global .f32 array_float[];
+@array_float = external global [10 x float]
+
+;CHECK: .extern .const .f32 array_constant_float[];
+@array_constant_float = external addrspace(1) constant [10 x float]
+
+;CHECK: .extern .local .f32 array_local_float[];
+@array_local_float = external addrspace(2) global [10 x float]
+
+;CHECK: .extern .shared .f32 array_shared_float[];
+@array_shared_float = external addrspace(4) global [10 x float]
+
+;CHECK: .extern .global .f64 array_double[];
+@array_double = external global [10 x double]
+
+;CHECK: .extern .const .f64 array_constant_double[];
+@array_constant_double = external addrspace(1) constant [10 x double]
+
+;CHECK: .extern .local .f64 array_local_double[];
+@array_local_double = external addrspace(2) global [10 x double]
+
+;CHECK: .extern .shared .f64 array_shared_double[];
+@array_shared_double = external addrspace(4) global [10 x double]
+
+
+define ptx_device void @t1_u16(i16* %p, i16 %x) {
+entry:
+;CHECK: st.global.u16 [r1], rh1;
+;CHECK-NEXT: ret;
+  store i16 %x, i16* %p
+  ret void
+}
+
+define ptx_device void @t1_u32(i32* %p, i32 %x) {
+entry:
+;CHECK: st.global.u32 [r1], r2;
+;CHECK-NEXT: ret;
+  store i32 %x, i32* %p
+  ret void
+}
+
+define ptx_device void @t1_u64(i64* %p, i64 %x) {
+entry:
+;CHECK: st.global.u64 [r1], rd1;
+;CHECK-NEXT: ret;
+  store i64 %x, i64* %p
+  ret void
+}
+
+define ptx_device void @t1_f32(float* %p, float %x) {
+entry:
+;CHECK: st.global.f32 [r1], f1;
+;CHECK-NEXT: ret;
+  store float %x, float* %p
+  ret void
+}
+
+define ptx_device void @t1_f64(double* %p, double %x) {
+entry:
+;CHECK: st.global.f64 [r1], fd1;
+;CHECK-NEXT: ret;
+  store double %x, double* %p
+  ret void
+}
+
+define ptx_device void @t2_u16(i16* %p, i16 %x) {
+entry:
+;CHECK: st.global.u16 [r1+2], rh1;
+;CHECK-NEXT: ret;
+  %i = getelementptr i16* %p, i32 1
+  store i16 %x, i16* %i
+  ret void
+}
+
+define ptx_device void @t2_u32(i32* %p, i32 %x) {
+entry:
+;CHECK: st.global.u32 [r1+4], r2;
+;CHECK-NEXT: ret;
+  %i = getelementptr i32* %p, i32 1
+  store i32 %x, i32* %i
+  ret void
+}
+
+define ptx_device void @t2_u64(i64* %p, i64 %x) {
+entry:
+;CHECK: st.global.u64 [r1+8], rd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr i64* %p, i32 1
+  store i64 %x, i64* %i
+  ret void
+}
+
+define ptx_device void @t2_f32(float* %p, float %x) {
+entry:
+;CHECK: st.global.f32 [r1+4], f1;
+;CHECK-NEXT: ret;
+  %i = getelementptr float* %p, i32 1
+  store float %x, float* %i
+  ret void
+}
+
+define ptx_device void @t2_f64(double* %p, double %x) {
+entry:
+;CHECK: st.global.f64 [r1+8], fd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr double* %p, i32 1
+  store double %x, double* %i
+  ret void
+}
+
+define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) {
+entry:
+;CHECK: shl.b32 r0, r2, 1;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: st.global.u16 [r0], rh1;
+;CHECK-NEXT: ret;
+  %i = getelementptr i16* %p, i32 %q
+  store i16 %x, i16* %i
+  ret void
+}
+
+define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) {
+entry:
+;CHECK: shl.b32 r0, r2, 2;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: st.global.u32 [r0], r3;
+;CHECK-NEXT: ret;
+  %i = getelementptr i32* %p, i32 %q
+  store i32 %x, i32* %i
+  ret void
+}
+
+define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) {
+entry:
+;CHECK: shl.b32 r0, r2, 3;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: st.global.u64 [r0], rd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr i64* %p, i32 %q
+  store i64 %x, i64* %i
+  ret void
+}
+
+define ptx_device void @t3_f32(float* %p, i32 %q, float %x) {
+entry:
+;CHECK: shl.b32 r0, r2, 2;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: st.global.f32 [r0], f1;
+;CHECK-NEXT: ret;
+  %i = getelementptr float* %p, i32 %q
+  store float %x, float* %i
+  ret void
+}
+
+define ptx_device void @t3_f64(double* %p, i32 %q, double %x) {
+entry:
+;CHECK: shl.b32 r0, r2, 3;
+;CHECK-NEXT: add.u32 r0, r1, r0;
+;CHECK-NEXT: st.global.f64 [r0], fd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr double* %p, i32 %q
+  store double %x, double* %i
+  ret void
+}
+
+define ptx_device void @t4_global_u16(i16 %x) {
+entry:
+;CHECK: st.global.u16 [array_i16], rh1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0
+  store i16 %x, i16* %i
+  ret void
+}
+
+define ptx_device void @t4_global_u32(i32 %x) {
+entry:
+;CHECK: st.global.u32 [array_i32], r1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
+  store i32 %x, i32* %i
+  ret void
+}
+
+define ptx_device void @t4_global_u64(i64 %x) {
+entry:
+;CHECK: st.global.u64 [array_i64], rd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
+  store i64 %x, i64* %i
+  ret void
+}
+
+define ptx_device void @t4_global_f32(float %x) {
+entry:
+;CHECK: st.global.f32 [array_float], f1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float]* @array_float, i32 0, i32 0
+  store float %x, float* %i
+  ret void
+}
+
+define ptx_device void @t4_global_f64(double %x) {
+entry:
+;CHECK: st.global.f64 [array_double], fd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double]* @array_double, i32 0, i32 0
+  store double %x, double* %i
+  ret void
+}
+
+define ptx_device void @t4_local_u16(i16 %x) {
+entry:
+;CHECK: st.local.u16 [array_local_i16], rh1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
+  store i16 %x, i16 addrspace(2)* %i
+  ret void
+}
+
+define ptx_device void @t4_local_u32(i32 %x) {
+entry:
+;CHECK: st.local.u32 [array_local_i32], r1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
+  store i32 %x, i32 addrspace(2)* %i
+  ret void
+}
+
+define ptx_device void @t4_local_u64(i64 %x) {
+entry:
+;CHECK: st.local.u64 [array_local_i64], rd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
+  store i64 %x, i64 addrspace(2)* %i
+  ret void
+}
+
+define ptx_device void @t4_local_f32(float %x) {
+entry:
+;CHECK: st.local.f32 [array_local_float], f1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
+  store float %x, float addrspace(2)* %i
+  ret void
+}
+
+define ptx_device void @t4_local_f64(double %x) {
+entry:
+;CHECK: st.local.f64 [array_local_double], fd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
+  store double %x, double addrspace(2)* %i
+  ret void
+}
+
+define ptx_device void @t4_shared_u16(i16 %x) {
+entry:
+;CHECK: st.shared.u16 [array_shared_i16], rh1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
+  store i16 %x, i16 addrspace(4)* %i
+  ret void
+}
+
+define ptx_device void @t4_shared_u32(i32 %x) {
+entry:
+;CHECK: st.shared.u32 [array_shared_i32], r1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
+  store i32 %x, i32 addrspace(4)* %i
+  ret void
+}
+
+define ptx_device void @t4_shared_u64(i64 %x) {
+entry:
+;CHECK: st.shared.u64 [array_shared_i64], rd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
+  store i64 %x, i64 addrspace(4)* %i
+  ret void
+}
+
+define ptx_device void @t4_shared_f32(float %x) {
+entry:
+;CHECK: st.shared.f32 [array_shared_float], f1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
+  store float %x, float addrspace(4)* %i
+  ret void
+}
+
+define ptx_device void @t4_shared_f64(double %x) {
+entry:
+;CHECK: st.shared.f64 [array_shared_double], fd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
+  store double %x, double addrspace(4)* %i
+  ret void
+}
+
+define ptx_device void @t5_u16(i16 %x) {
+entry:
+;CHECK: st.global.u16 [array_i16+2], rh1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
+  store i16 %x, i16* %i
+  ret void
+}
+
+define ptx_device void @t5_u32(i32 %x) {
+entry:
+;CHECK: st.global.u32 [array_i32+4], r1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
+  store i32 %x, i32* %i
+  ret void
+}
+
+define ptx_device void @t5_u64(i64 %x) {
+entry:
+;CHECK: st.global.u64 [array_i64+8], rd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
+  store i64 %x, i64* %i
+  ret void
+}
+
+define ptx_device void @t5_f32(float %x) {
+entry:
+;CHECK: st.global.f32 [array_float+4], f1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x float]* @array_float, i32 0, i32 1
+  store float %x, float* %i
+  ret void
+}
+
+define ptx_device void @t5_f64(double %x) {
+entry:
+;CHECK: st.global.f64 [array_double+8], fd1;
+;CHECK-NEXT: ret;
+  %i = getelementptr [10 x double]* @array_double, i32 0, i32 1
+  store double %x, double* %i
+  ret void
+}
diff --git a/final/test/CodeGen/PTX/sub.ll b/final/test/CodeGen/PTX/sub.ll
new file mode 100644
index 00000000000..4810e4fc055
--- /dev/null
+++ b/final/test/CodeGen/PTX/sub.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=ptx | FileCheck %s
+
+define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
+; CHECK: sub.u16 rh0, rh1, rh2;
+; CHECK-NEXT: ret;
+	%z = sub i16 %x, %y
+	ret i16 %z
+}
+
+define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
+; CHECK: sub.u32 r0, r1, r2;
+; CHECK-NEXT: ret;
+	%z = sub i32 %x, %y
+	ret i32 %z
+}
+
+define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
+; CHECK: sub.u64 rd0, rd1, rd2;
+; CHECK-NEXT: ret;
+	%z = sub i64 %x, %y
+	ret i64 %z
+}
+
+define ptx_device float @t1_f32(float %x, float %y) {
+; CHECK: sub.f32 f0, f1, f2
+; CHECK-NEXT: ret;
+  %z = fsub float %x, %y
+  ret float %z
+}
+
+define ptx_device double @t1_f64(double %x, double %y) {
+; CHECK: sub.f64 fd0, fd1, fd2
+; CHECK-NEXT: ret;
+  %z = fsub double %x, %y
+  ret double %z
+}
+
+define ptx_device i16 @t2_u16(i16 %x) {
+; CHECK: add.u16 rh0, rh1, -1;
+; CHECK-NEXT: ret;
+	%z = sub i16 %x, 1
+	ret i16 %z
+}
+
+define ptx_device i32 @t2_u32(i32 %x) {
+; CHECK: add.u32 r0, r1, -1;
+; CHECK-NEXT: ret;
+	%z = sub i32 %x, 1
+	ret i32 %z
+}
+
+define ptx_device i64 @t2_u64(i64 %x) {
+; CHECK: add.u64 rd0, rd1, -1;
+; CHECK-NEXT: ret;
+	%z = sub i64 %x, 1
+	ret i64 %z
+}
+
+define ptx_device float @t2_f32(float %x) {
+; CHECK: add.f32 f0, f1, 0FBF800000;
+; CHECK-NEXT: ret;
+  %z = fsub float %x, 1.0
+  ret float %z
+}
+
+define ptx_device double @t2_f64(double %x) {
+; CHECK: add.f64 fd0, fd1, 0DBFF0000000000000;
+; CHECK-NEXT: ret;
+  %z = fsub double %x, 1.0
+  ret double %z
+}
diff --git a/final/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll b/final/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll
new file mode 100644
index 00000000000..f95465cfc53
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2004-11-29-ShrCrash.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=ppc32
+define void @test() {
+	%tr1 = lshr i32 1, 0		; <i32> [#uses=0]
+	ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll b/final/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll
new file mode 100644
index 00000000000..c3bfa49115b
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2004-11-30-shift-crash.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=ppc32
+
+define void @main() {
+        %tr4 = shl i64 1, 0             ; <i64> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll b/final/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll
new file mode 100644
index 00000000000..dea654ac0c0
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2004-11-30-shr-var-crash.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc32
+
+define void @main() {
+        %shamt = add i8 0, 1            ; <i8> [#uses=1]
+        %shift.upgrd.1 = zext i8 %shamt to i64          ; <i64> [#uses=1]
+        %tr2 = ashr i64 1, %shift.upgrd.1               ; <i64> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll b/final/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll
new file mode 100644
index 00000000000..fc190a486e6
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2004-12-12-ZeroSizeCommon.ll
@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=ppc32 | not grep .comm.*X,0
+
+@X = linkonce global {  } zeroinitializer               ; <{  }*> [#uses=0]
+
diff --git a/final/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll b/final/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll
new file mode 100644
index 00000000000..ad02ece900c
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2005-01-14-SetSelectCrash.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 
+
+define i32 @main() {
+        %setle = icmp sle i64 1, 0              ; <i1> [#uses=1]
+        %select = select i1 true, i1 %setle, i1 true            ; <i1> [#uses=0]
+        ret i32 0
+}
+
diff --git a/final/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll b/final/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll
new file mode 100644
index 00000000000..671bf804ed3
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2005-01-14-UndefLong.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=ppc32
+
+define i64 @test() {
+        ret i64 undef
+}
diff --git a/final/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll b/final/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll
new file mode 100644
index 00000000000..95012c30fc5
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2005-08-12-rlwimi-crash.ll
@@ -0,0 +1,13 @@
+; this should not crash the ppc backend
+
+; RUN: llc < %s -march=ppc32
+
+
+define i32 @test(i32 %j.0.0.i) {
+        %tmp.85.i = and i32 %j.0.0.i, 7         ; <i32> [#uses=1]
+        %tmp.161278.i = bitcast i32 %tmp.85.i to i32            ; <i32> [#uses=1]
+        %tmp.5.i77.i = lshr i32 %tmp.161278.i, 3                ; <i32> [#uses=1]
+        ret i32 %tmp.5.i77.i
+}
+
+
diff --git a/final/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll b/final/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
new file mode 100644
index 00000000000..5d1df468a66
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2005-09-02-LegalizeDuplicatesCalls.ll
@@ -0,0 +1,11 @@
+; This function should have exactly one call to fixdfdi, no more!
+
+; RUN: llc < %s -march=ppc32 -mattr=-64bit | \
+; RUN:    grep {bl .*fixdfdi} | count 1
+
+define double @test2(double %tmp.7705) {
+        %mem_tmp.2.0.in = fptosi double %tmp.7705 to i64                ; <i64> [#uses=1]
+        %mem_tmp.2.0 = sitofp i64 %mem_tmp.2.0.in to double             ; <double> [#uses=1]
+        ret double %mem_tmp.2.0
+}
+
diff --git a/final/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll b/final/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll
new file mode 100644
index 00000000000..8a5d3b0fa2c
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2005-10-08-ArithmeticRotate.ll
@@ -0,0 +1,11 @@
+; This was erroneously being turned into an rlwinm instruction.
+; The sign bit does matter in this case.
+
+; RUN: llc < %s -march=ppc32 | grep srawi
+
+define i32 @test(i32 %X) {
+        %Y = and i32 %X, -2             ; <i32> [#uses=1]
+        %Z = ashr i32 %Y, 11            ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
diff --git a/final/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll b/final/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
new file mode 100644
index 00000000000..047a12bedd8
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2005-11-30-vastart-crash.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:32:32"
+target triple = "powerpc-apple-darwin8.2.0"
+
+define void @bar(i32 %G, i32 %E, i32 %F, i32 %A, i32 %B, i32 %C, i32 %D, i8* %fmt, ...) {
+        %ap = alloca i8*                ; <i8**> [#uses=2]
+        %va.upgrd.1 = bitcast i8** %ap to i8*           ; <i8*> [#uses=1]
+        call void @llvm.va_start( i8* %va.upgrd.1 )
+        %tmp.1 = load i8** %ap          ; <i8*> [#uses=1]
+        %tmp.0 = call double @foo( i8* %tmp.1 )         ; <double> [#uses=0]
+        ret void
+}
+
+declare void @llvm.va_start(i8*)
+
+declare double @foo(i8*)
+
diff --git a/final/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll b/final/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
new file mode 100644
index 00000000000..97bb48e96e5
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-01-11-darwin-fp-argument.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s | not grep {, f1}
+
+target datalayout = "E-p:32:32"
+target triple = "powerpc-apple-darwin8.2.0"
+
+; Dead argument should reserve an FP register.
+define double @bar(double %DEAD, double %X, double %Y) {
+        %tmp.2 = fadd double %X, %Y              ; <double> [#uses=1]
+        ret double %tmp.2
+}
diff --git a/final/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll b/final/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
new file mode 100644
index 00000000000..fbf254082ee
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-01-20-ShiftPartsCrash.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s
+
+define void @iterative_hash_host_wide_int() {
+        %zero = alloca i32              ; <i32*> [#uses=2]
+        %b = alloca i32         ; <i32*> [#uses=1]
+        store i32 0, i32* %zero
+        %tmp = load i32* %zero          ; <i32> [#uses=1]
+        %tmp5 = bitcast i32 %tmp to i32         ; <i32> [#uses=1]
+        %tmp6.u = add i32 %tmp5, 32             ; <i32> [#uses=1]
+        %tmp6 = bitcast i32 %tmp6.u to i32              ; <i32> [#uses=1]
+        %tmp7 = load i64* null          ; <i64> [#uses=1]
+        %tmp6.upgrd.1 = trunc i32 %tmp6 to i8           ; <i8> [#uses=1]
+        %shift.upgrd.2 = zext i8 %tmp6.upgrd.1 to i64           ; <i64> [#uses=1]
+        %tmp8 = ashr i64 %tmp7, %shift.upgrd.2          ; <i64> [#uses=1]
+        %tmp8.upgrd.3 = trunc i64 %tmp8 to i32          ; <i32> [#uses=1]
+        store i32 %tmp8.upgrd.3, i32* %b
+        unreachable
+}
+
diff --git a/final/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll b/final/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll
new file mode 100644
index 00000000000..172e34849d1
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-04-01-FloatDoubleExtend.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc32
+
+
+define double @CalcSpeed(float %tmp127) {
+        %tmp145 = fpext float %tmp127 to double         ; <double> [#uses=1]
+        %tmp150 = call double asm "frsqrte $0,$1", "=f,f"( double %tmp145 )             ; <double> [#uses=1]
+        ret double %tmp150
+}
+
diff --git a/final/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll b/final/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
new file mode 100644
index 00000000000..969772ee2be
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-04-05-splat-ish.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
+; RUN:   grep {vspltish v.*, 10}
+
+define void @test(<8 x i16>* %P) {
+        %tmp = load <8 x i16>* %P               ; <<8 x i16>> [#uses=1]
+        %tmp1 = add <8 x i16> %tmp, < i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10 >          ; <<8 x i16>> [#uses=1]
+        store <8 x i16> %tmp1, <8 x i16>* %P
+        ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll b/final/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll
new file mode 100644
index 00000000000..d2256642fbf
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-04-19-vmaddfp-crash.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5
+; END.
+
+define void @test(i8* %stack) {
+entry:
+	%tmp9 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	%tmp30 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp30, label %cond_next54, label %cond_true31
+cond_true860:		; preds = %bb855
+	%tmp879 = tail call <4 x float> @llvm.ppc.altivec.vmaddfp( <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x float> zeroinitializer )		; <<4 x float>> [#uses=1]
+	%tmp880 = bitcast <4 x float> %tmp879 to <4 x i32>		; <<4 x i32>> [#uses=2]
+	%tmp883 = shufflevector <4 x i32> %tmp880, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x i32>> [#uses=1]
+	%tmp883.upgrd.1 = bitcast <4 x i32> %tmp883 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp885 = shufflevector <4 x i32> %tmp880, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x i32>> [#uses=1]
+	%tmp885.upgrd.2 = bitcast <4 x i32> %tmp885 to <4 x float>		; <<4 x float>> [#uses=1]
+	br label %cond_next905
+cond_true31:		; preds = %entry
+	ret void
+cond_next54:		; preds = %entry
+	br i1 %tmp9, label %cond_false385, label %bb279
+bb279:		; preds = %cond_next54
+	ret void
+cond_false385:		; preds = %cond_next54
+	%tmp388 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp388, label %cond_next463, label %cond_true389
+cond_true389:		; preds = %cond_false385
+	ret void
+cond_next463:		; preds = %cond_false385
+	%tmp1208107 = icmp ugt i8* null, %stack		; <i1> [#uses=1]
+	br i1 %tmp1208107, label %cond_true1209.preheader, label %bb1212
+cond_true498:		; preds = %cond_true1209.preheader
+	ret void
+cond_true519:		; preds = %cond_true1209.preheader
+	%bothcond = or i1 false, false		; <i1> [#uses=1]
+	br i1 %bothcond, label %bb855, label %bb980
+cond_false548:		; preds = %cond_true1209.preheader
+	ret void
+bb855:		; preds = %cond_true519
+	%tmp859 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp859, label %cond_true860, label %cond_next905
+cond_next905:		; preds = %bb855, %cond_true860
+	%vfpw2.4 = phi <4 x float> [ %tmp885.upgrd.2, %cond_true860 ], [ undef, %bb855 ]		; <<4 x float>> [#uses=0]
+	%vfpw1.4 = phi <4 x float> [ %tmp883.upgrd.1, %cond_true860 ], [ undef, %bb855 ]		; <<4 x float>> [#uses=0]
+	%tmp930 = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=0]
+	ret void
+bb980:		; preds = %cond_true519
+	ret void
+cond_true1209.preheader:		; preds = %cond_next463
+	%tmp496 = and i32 0, 12288		; <i32> [#uses=1]
+	switch i32 %tmp496, label %cond_false548 [
+		 i32 0, label %cond_true498
+		 i32 4096, label %cond_true519
+	]
+bb1212:		; preds = %cond_next463
+	ret void
+}
+
+declare <4 x float> @llvm.ppc.altivec.vmaddfp(<4 x float>, <4 x float>, <4 x float>)
diff --git a/final/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll b/final/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
new file mode 100644
index 00000000000..0205d10a795
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-05-12-rlwimi-crash.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=ppc32
+; END.
+
+	%struct.attr_desc = type { i8*, %struct.attr_desc*, %struct.attr_value*, %struct.attr_value*, i32 }
+	%struct.attr_value = type { %struct.rtx_def*, %struct.attr_value*, %struct.insn_ent*, i32, i32 }
+	%struct.insn_def = type { %struct.insn_def*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
+	%struct.insn_ent = type { %struct.insn_ent*, %struct.insn_def* }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.u = type { [1 x i64] }
+
+define void @find_attr() {
+entry:
+	%tmp26 = icmp eq %struct.attr_desc* null, null		; <i1> [#uses=1]
+	br i1 %tmp26, label %bb30, label %cond_true27
+cond_true27:		; preds = %entry
+	ret void
+bb30:		; preds = %entry
+	%tmp67 = icmp eq %struct.attr_desc* null, null		; <i1> [#uses=1]
+	br i1 %tmp67, label %cond_next92, label %cond_true68
+cond_true68:		; preds = %bb30
+	ret void
+cond_next92:		; preds = %bb30
+	%tmp173 = getelementptr %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=2]
+	%tmp174 = load i32* %tmp173		; <i32> [#uses=1]
+	%tmp177 = and i32 %tmp174, -9		; <i32> [#uses=1]
+	store i32 %tmp177, i32* %tmp173
+	%tmp180 = getelementptr %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=1]
+	%tmp181 = load i32* %tmp180		; <i32> [#uses=1]
+	%tmp185 = getelementptr %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=2]
+	%tmp186 = load i32* %tmp185		; <i32> [#uses=1]
+	%tmp183187 = shl i32 %tmp181, 1		; <i32> [#uses=1]
+	%tmp188 = and i32 %tmp183187, 16		; <i32> [#uses=1]
+	%tmp190 = and i32 %tmp186, -17		; <i32> [#uses=1]
+	%tmp191 = or i32 %tmp190, %tmp188		; <i32> [#uses=1]
+	store i32 %tmp191, i32* %tmp185
+	%tmp193 = getelementptr %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=1]
+	%tmp194 = load i32* %tmp193		; <i32> [#uses=1]
+	%tmp198 = getelementptr %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=2]
+	%tmp199 = load i32* %tmp198		; <i32> [#uses=1]
+	%tmp196200 = shl i32 %tmp194, 2		; <i32> [#uses=1]
+	%tmp201 = and i32 %tmp196200, 64		; <i32> [#uses=1]
+	%tmp203 = and i32 %tmp199, -65		; <i32> [#uses=1]
+	%tmp204 = or i32 %tmp203, %tmp201		; <i32> [#uses=1]
+	store i32 %tmp204, i32* %tmp198
+	%tmp206 = getelementptr %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=1]
+	%tmp207 = load i32* %tmp206		; <i32> [#uses=1]
+	%tmp211 = getelementptr %struct.attr_desc* null, i32 0, i32 4		; <i32*> [#uses=2]
+	%tmp212 = load i32* %tmp211		; <i32> [#uses=1]
+	%tmp209213 = shl i32 %tmp207, 1		; <i32> [#uses=1]
+	%tmp214 = and i32 %tmp209213, 128		; <i32> [#uses=1]
+	%tmp216 = and i32 %tmp212, -129		; <i32> [#uses=1]
+	%tmp217 = or i32 %tmp216, %tmp214		; <i32> [#uses=1]
+	store i32 %tmp217, i32* %tmp211
+	ret void
+}
diff --git a/final/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll b/final/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
new file mode 100644
index 00000000000..1b8b064ee91
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-07-07-ComputeMaskedBits.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin | grep extsw | count 2
+
+@lens = external global i8*             ; <i8**> [#uses=1]
+@vals = external global i32*            ; <i32**> [#uses=1]
+
+define i32 @test(i32 %i) {
+        %tmp = load i8** @lens          ; <i8*> [#uses=1]
+        %tmp1 = getelementptr i8* %tmp, i32 %i          ; <i8*> [#uses=1]
+        %tmp.upgrd.1 = load i8* %tmp1           ; <i8> [#uses=1]
+        %tmp2 = zext i8 %tmp.upgrd.1 to i32             ; <i32> [#uses=1]
+        %tmp3 = load i32** @vals                ; <i32*> [#uses=1]
+        %tmp5 = sub i32 1, %tmp2                ; <i32> [#uses=1]
+        %tmp6 = getelementptr i32* %tmp3, i32 %tmp5             ; <i32*> [#uses=1]
+        %tmp7 = load i32* %tmp6         ; <i32> [#uses=1]
+        ret i32 %tmp7
+}
+
diff --git a/final/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll b/final/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
new file mode 100644
index 00000000000..65dd568b1ee
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-07-19-stwbrx-crash.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=ppc32
+
+define void @img2buf(i32 %symbol_size_in_bytes, i16* %ui16) nounwind {
+        %tmp93 = load i16* null         ; <i16> [#uses=1]
+        %tmp99 = call i16 @llvm.bswap.i16( i16 %tmp93 )         ; <i16> [#uses=1]
+        store i16 %tmp99, i16* %ui16
+        ret void
+}
+
+declare i16 @llvm.bswap.i16(i16)
+
diff --git a/final/test/CodeGen/PowerPC/2006-08-11-RetVector.ll b/final/test/CodeGen/PowerPC/2006-08-11-RetVector.ll
new file mode 100644
index 00000000000..a947e5cd9c5
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-08-11-RetVector.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsldoi
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vor
+
+define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) {
+        %tmp76 = shufflevector <4 x float> %fp0, <4 x float> %fp1, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >     ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp76
+}
+
diff --git a/final/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll b/final/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
new file mode 100644
index 00000000000..cb76b5c70cf
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-08-15-SelectionCrash.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s
+
+	%struct..0anon = type { i32 }
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
+
+define fastcc void @immed_double_const(i32 %i0, i32 %i1) {
+entry:
+	%tmp1 = load i32* null		; <i32> [#uses=1]
+	switch i32 %tmp1, label %bb103 [
+		 i32 1, label %bb
+		 i32 3, label %bb
+	]
+bb:		; preds = %entry, %entry
+	%tmp14 = icmp sgt i32 0, 31		; <i1> [#uses=1]
+	br i1 %tmp14, label %cond_next77, label %cond_next17
+cond_next17:		; preds = %bb
+	ret void
+cond_next77:		; preds = %bb
+	%tmp79.not = icmp ne i32 %i1, 0		; <i1> [#uses=1]
+	%tmp84 = icmp slt i32 %i0, 0		; <i1> [#uses=2]
+	%bothcond1 = or i1 %tmp79.not, %tmp84		; <i1> [#uses=1]
+	br i1 %bothcond1, label %bb88, label %bb99
+bb88:		; preds = %cond_next77
+	%bothcond2 = and i1 false, %tmp84		; <i1> [#uses=0]
+	ret void
+bb99:		; preds = %cond_next77
+	ret void
+bb103:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/PowerPC/2006-09-28-shift_64.ll b/final/test/CodeGen/PowerPC/2006-09-28-shift_64.ll
new file mode 100644
index 00000000000..f748a8bf1d6
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-09-28-shift_64.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc64
+
+target datalayout = "E-p:64:64"
+target triple = "powerpc64-apple-darwin8"
+
+define void @glArrayElement_CompExec() {
+entry:
+        %tmp3 = and i64 0, -8388609             ; <i64> [#uses=1]
+        br label %cond_true24
+cond_false:             ; preds = %cond_true24
+        ret void
+cond_true24:            ; preds = %cond_true24, %entry
+        %indvar.ph = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true24 ]              ; <i32> [#uses=1]
+        %indvar = add i32 0, %indvar.ph         ; <i32> [#uses=2]
+        %code.0 = trunc i32 %indvar to i8               ; <i8> [#uses=1]
+        %tmp5 = add i8 %code.0, 16              ; <i8> [#uses=1]
+        %shift.upgrd.1 = zext i8 %tmp5 to i64           ; <i64> [#uses=1]
+        %tmp7 = lshr i64 %tmp3, %shift.upgrd.1          ; <i64> [#uses=1]
+        %tmp7.upgrd.2 = trunc i64 %tmp7 to i32          ; <i32> [#uses=1]
+        %tmp8 = and i32 %tmp7.upgrd.2, 1                ; <i32> [#uses=1]
+        %tmp8.upgrd.3 = icmp eq i32 %tmp8, 0            ; <i1> [#uses=1]
+        %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
+        br i1 %tmp8.upgrd.3, label %cond_false, label %cond_true24
+}
+
diff --git a/final/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll b/final/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
new file mode 100644
index 00000000000..57ed250abc0
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-10-11-combiner-aa-regression.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=ppc32 -combiner-alias-analysis | grep f5
+
+target datalayout = "E-p:32:32"
+target triple = "powerpc-apple-darwin8.2.0"
+        %struct.Point = type { double, double, double }
+
+define void @offset(%struct.Point* %pt, double %x, double %y, double %z) {
+entry:
+        %tmp = getelementptr %struct.Point* %pt, i32 0, i32 0           ; <double*> [#uses=2]
+        %tmp.upgrd.1 = load double* %tmp                ; <double> [#uses=1]
+        %tmp2 = fadd double %tmp.upgrd.1, %x             ; <double> [#uses=1]
+        store double %tmp2, double* %tmp
+        %tmp6 = getelementptr %struct.Point* %pt, i32 0, i32 1          ; <double*> [#uses=2]
+        %tmp7 = load double* %tmp6              ; <double> [#uses=1]
+        %tmp9 = fadd double %tmp7, %y            ; <double> [#uses=1]
+        store double %tmp9, double* %tmp6
+        %tmp13 = getelementptr %struct.Point* %pt, i32 0, i32 2         ; <double*> [#uses=2]
+        %tmp14 = load double* %tmp13            ; <double> [#uses=1]
+        %tmp16 = fadd double %tmp14, %z          ; <double> [#uses=1]
+        store double %tmp16, double* %tmp13
+        ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll b/final/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
new file mode 100644
index 00000000000..002a0644183
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-10-13-Miscompile.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ppc32 | not grep IMPLICIT_DEF
+
+define void @foo(i64 %X) {
+entry:
+        %tmp1 = and i64 %X, 3           ; <i64> [#uses=1]
+        %tmp = icmp sgt i64 %tmp1, 2            ; <i1> [#uses=1]
+        br i1 %tmp, label %UnifiedReturnBlock, label %cond_true
+cond_true:              ; preds = %entry
+        %tmp.upgrd.1 = tail call i32 (...)* @bar( )             ; <i32> [#uses=0]
+        ret void
+UnifiedReturnBlock:             ; preds = %entry
+        ret void
+}
+
+declare i32 @bar(...)
+
diff --git a/final/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll b/final/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
new file mode 100644
index 00000000000..3d462b4d146
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-10-17-brcc-miscompile.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=ppc32 | grep xor 
+
+target datalayout = "E-p:32:32"
+target triple = "powerpc-apple-darwin8.7.0"
+
+define void @foo(i32 %X) {
+entry:
+        %tmp1 = and i32 %X, 3           ; <i32> [#uses=1]
+        %tmp2 = xor i32 %tmp1, 1                ; <i32> [#uses=1]
+        %tmp = icmp eq i32 %tmp2, 0             ; <i1> [#uses=1]
+        br i1 %tmp, label %UnifiedReturnBlock, label %cond_true
+cond_true:              ; preds = %entry
+        tail call i32 (...)* @bar( )            ; <i32>:0 [#uses=0]
+        ret void
+UnifiedReturnBlock:             ; preds = %entry
+        ret void
+}
+
+declare i32 @bar(...)
+
diff --git a/final/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll b/final/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll
new file mode 100644
index 00000000000..3284f0a624f
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-10-17-ppc64-alloca.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=ppc64
+
+define i32* @foo(i32 %n) {
+        %A = alloca i32, i32 %n         ; <i32*> [#uses=1]
+        ret i32* %A
+}
+
diff --git a/final/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll b/final/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll
new file mode 100644
index 00000000000..49b3b9d18fa
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-11-10-DAGCombineMiscompile.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=ppc32 | grep rlwimi
+
+define void @test(i16 %div.0.i.i.i.i, i32 %L_num.0.i.i.i.i, i32 %tmp1.i.i206.i.i, i16* %P) {
+        %X = shl i16 %div.0.i.i.i.i, 1          ; <i16> [#uses=1]
+        %tmp28.i.i.i.i = shl i32 %L_num.0.i.i.i.i, 1            ; <i32> [#uses=1]
+        %tmp31.i.i.i.i = icmp slt i32 %tmp28.i.i.i.i, %tmp1.i.i206.i.i          ; <i1> [#uses=1]
+        %tmp31.i.i.i.i.upgrd.1 = zext i1 %tmp31.i.i.i.i to i16          ; <i16> [#uses=1]
+        %tmp371.i.i.i.i1 = or i16 %tmp31.i.i.i.i.upgrd.1, %X            ; <i16> [#uses=1]
+        %div.0.be.i.i.i.i = xor i16 %tmp371.i.i.i.i1, 1         ; <i16> [#uses=1]
+        store i16 %div.0.be.i.i.i.i, i16* %P
+        ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll b/final/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll
new file mode 100644
index 00000000000..61b99676189
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-11-29-AltivecFPSplat.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5
+
+define void @glgRunProcessor15() {
+        %tmp26355.i = shufflevector <4 x float> zeroinitializer, <4 x float> < float 0x379FFFE000000000, float 0x379FFFE000000000, float 0x379FFFE000000000, float 0x379FFFE000000000 >, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >; <<4 x float>> [#uses=1]
+        %tmp3030030304.i = bitcast <4 x float> %tmp26355.i to <8 x i16>         ; <<8 x i16>> [#uses=1]
+        %tmp30305.i = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp3030030304.i, <8 x i32> < i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15 >               ; <<8 x i16>> [#uses=1]
+        %tmp30305.i.upgrd.1 = bitcast <8 x i16> %tmp30305.i to <4 x i32>                ; <<4 x i32>> [#uses=1]
+        store <4 x i32> %tmp30305.i.upgrd.1, <4 x i32>* null
+        ret void
+}
diff --git a/final/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll b/final/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
new file mode 100644
index 00000000000..ba863047be9
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-12-07-LargeAlloca.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s 
+
+define void @bitap() {
+entry:
+        %RMask.i = alloca [256 x i32], align 16         ; <[256 x i32]*> [#uses=1]
+        %buffer = alloca [147456 x i8], align 16                ; <[147456 x i8]*> [#uses=0]
+        br i1 false, label %bb19, label %bb.preheader
+bb.preheader:           ; preds = %entry
+        ret void
+bb19:           ; preds = %entry
+        br i1 false, label %bb12.i, label %cond_next39
+bb12.i:         ; preds = %bb12.i, %bb19
+        %i.0.i = phi i32 [ %tmp11.i, %bb12.i ], [ 0, %bb19 ]            ; <i32> [#uses=2]
+        %gep.upgrd.1 = zext i32 %i.0.i to i64           ; <i64> [#uses=1]
+        %tmp9.i = getelementptr [256 x i32]* %RMask.i, i32 0, i64 %gep.upgrd.1          ; <i32*> [#uses=1]
+        store i32 0, i32* %tmp9.i
+        %tmp11.i = add i32 %i.0.i, 1            ; <i32> [#uses=1]
+        br label %bb12.i
+cond_next39:            ; preds = %bb19
+        ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll b/final/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
new file mode 100644
index 00000000000..6d9a3fa7b10
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2006-12-07-SelectCrash.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=ppc64
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s
+
+@qsz.b = external global i1             ; <i1*> [#uses=1]
+
+define fastcc void @qst() {
+entry:
+        br i1 true, label %cond_next71, label %cond_true
+cond_true:              ; preds = %entry
+        ret void
+cond_next71:            ; preds = %entry
+        %tmp73.b = load i1* @qsz.b              ; <i1> [#uses=1]
+        %ii.4.ph = select i1 %tmp73.b, i64 4, i64 0             ; <i64> [#uses=1]
+        br label %bb139
+bb82:           ; preds = %bb139
+        ret void
+bb139:          ; preds = %bb139, %cond_next71
+        %exitcond89 = icmp eq i64 0, %ii.4.ph           ; <i1> [#uses=1]
+        br i1 %exitcond89, label %bb82, label %bb139
+}
+
diff --git a/final/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll b/final/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll
new file mode 100644
index 00000000000..805528cf2ef
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-01-04-ArgExtension.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32 | grep extsb
+; RUN: llc < %s -march=ppc32 | grep extsh
+
+define i32 @p1(i8 %c, i16 %s) {
+entry:
+        %tmp = sext i8 %c to i32                ; <i32> [#uses=1]
+        %tmp1 = sext i16 %s to i32              ; <i32> [#uses=1]
+        %tmp2 = add i32 %tmp1, %tmp             ; <i32> [#uses=1]
+        ret i32 %tmp2
+}
diff --git a/final/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll b/final/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
new file mode 100644
index 00000000000..7b00ac69b91
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN:    grep cntlzw
+
+define i32 @foo() nounwind {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=2]
+	%temp = alloca i32, align 4		; <i32*> [#uses=2]
+	%ctz_x = alloca i32, align 4		; <i32*> [#uses=3]
+	%ctz_c = alloca i32, align 4		; <i32*> [#uses=2]
+	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 61440, i32* %ctz_x
+	%tmp = load i32* %ctz_x		; <i32> [#uses=1]
+	%tmp1 = sub i32 0, %tmp		; <i32> [#uses=1]
+	%tmp2 = load i32* %ctz_x		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp1, %tmp2		; <i32> [#uses=1]
+	%tmp4 = call i32 asm "$(cntlz$|cntlzw$) $0,$1", "=r,r,~{dirflag},~{fpsr},~{flags}"( i32 %tmp3 )		; <i32> [#uses=1]
+	store i32 %tmp4, i32* %ctz_c
+	%tmp5 = load i32* %ctz_c		; <i32> [#uses=1]
+	store i32 %tmp5, i32* %temp
+	%tmp6 = load i32* %temp		; <i32> [#uses=1]
+	store i32 %tmp6, i32* %retval
+	br label %return
+
+return:		; preds = %entry
+	%retval2 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval2
+}
diff --git a/final/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll b/final/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll
new file mode 100644
index 00000000000..0c454729290
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-01-29-lbrx-asm.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
+
+define i16 @test(i8* %d1, i16* %d2) {
+	%tmp237 = call i16 asm "lhbrx $0, $2, $1", "=r,r,bO,m"( i8* %d1, i32 0, i16* %d2 )		; <i16> [#uses=1]
+	ret i16 %tmp237
+}
diff --git a/final/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll b/final/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
new file mode 100644
index 00000000000..fe5145d1523
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-01-31-InlineAsmAddrMode.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
+
+; Test two things: 1) that a frameidx can be rewritten in an inline asm
+; 2) that inline asms can handle reg+imm addr modes.
+
+	%struct.A = type { i32, i32 }
+
+
+define void @test1() {
+entry:
+	%Out = alloca %struct.A, align 4		; <%struct.A*> [#uses=1]
+	%tmp2 = getelementptr %struct.A* %Out, i32 0, i32 1
+	%tmp5 = call i32 asm "lwbrx $0, $1", "=r,m"(i32* %tmp2 )
+	ret void
+}
+
+define void @test2() {
+entry:
+	%Out = alloca %struct.A, align 4		; <%struct.A*> [#uses=1]
+	%tmp2 = getelementptr %struct.A* %Out, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp5 = call i32 asm "lwbrx $0, $2, $1", "=r,r,bO,m"( i8* null, i32 0, i32* %tmp2 )		; <i32> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll b/final/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll
new file mode 100644
index 00000000000..621d43b5c22
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-02-16-AlignPacked.ll
@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | \
+; RUN:   grep align.*3
+
+@X = global <{i32, i32}> <{ i32 1, i32 123 }>
diff --git a/final/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll b/final/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll
new file mode 100644
index 00000000000..f48f3656ddf
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-02-16-InlineAsmNConstraint.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:32:32"
+target triple = "powerpc-apple-darwin8.8.0"
+
+
+define void @blargh() {
+entry:
+	%tmp4 = call i32 asm "rlwimi $0,$2,$3,$4,$5", "=r,0,r,n,n,n"( i32 0, i32 0, i32 0, i32 24, i32 31 )		; <i32> [#uses=0]
+	unreachable
+}
diff --git a/final/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll b/final/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
new file mode 100644
index 00000000000..0473857ae70
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-02-23-lr-saved-twice.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s | grep mflr | count 1
+
+target datalayout = "e-p:32:32"
+target triple = "powerpc-apple-darwin8"
+@str = internal constant [18 x i8] c"hello world!, %d\0A\00"            ; <[18 x i8]*> [#uses=1]
+
+
+define i32 @main() {
+entry:
+        %tmp = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([18 x i8]* @str, i32 0, i32 0) )                ; <i32> [#uses=0]
+        ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/final/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll b/final/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
new file mode 100644
index 00000000000..cca9e658ad5
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-03-24-cntlzd.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=ppc64 -mcpu=g5 | grep cntlzd
+
+define i32 @_ZNK4llvm5APInt17countLeadingZerosEv(i64 *%t) nounwind {
+        %tmp19 = load i64* %t
+        %tmp22 = tail call i64 @llvm.ctlz.i64( i64 %tmp19 )             ; <i64> [#uses=1]
+        %tmp23 = trunc i64 %tmp22 to i32
+        %tmp89 = add i32 %tmp23, -64          ; <i32> [#uses=1]
+        %tmp90 = add i32 %tmp89, 0            ; <i32> [#uses=1]
+        ret i32 %tmp90
+}
+
+declare i64 @llvm.ctlz.i64(i64)
diff --git a/final/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll b/final/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
new file mode 100644
index 00000000000..d43916d4f3c
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-03-30-SpillerCrash.ll
@@ -0,0 +1,1801 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5
+
+define void @test(<4 x float>*, { { i16, i16, i32 } }*) {
+xOperationInitMasks.exit:
+	%.sub7896 = getelementptr [4 x <4 x i32>]* null, i32 0, i32 0		; <<4 x i32>*> [#uses=24]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 175, i32 3		; <<4 x float>*>:2 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 174, i32 2		; <<4 x float>*>:3 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 174, i32 3		; <<4 x float>*>:4 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 1		; <<4 x float>*>:5 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 2		; <<4 x float>*>:6 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 173, i32 3		; <<4 x float>*>:7 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 1		; <<4 x float>*>:8 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 2		; <<4 x float>*>:9 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 172, i32 3		; <<4 x float>*>:10 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 1		; <<4 x float>*>:11 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 2		; <<4 x float>*>:12 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 171, i32 3		; <<4 x float>*>:13 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 1		; <<4 x float>*>:14 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 2		; <<4 x float>*>:15 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 170, i32 3		; <<4 x float>*>:16 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 1		; <<4 x float>*>:17 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 2		; <<4 x float>*>:18 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 169, i32 3		; <<4 x float>*>:19 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 1		; <<4 x float>*>:20 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 2		; <<4 x float>*>:21 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 168, i32 3		; <<4 x float>*>:22 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 1		; <<4 x float>*>:23 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 2		; <<4 x float>*>:24 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 167, i32 3		; <<4 x float>*>:25 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 1		; <<4 x float>*>:26 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 2		; <<4 x float>*>:27 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 166, i32 3		; <<4 x float>*>:28 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 1		; <<4 x float>*>:29 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 2		; <<4 x float>*>:30 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 165, i32 3		; <<4 x float>*>:31 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 1		; <<4 x float>*>:32 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 2		; <<4 x float>*>:33 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 164, i32 3		; <<4 x float>*>:34 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 1		; <<4 x float>*>:35 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 2		; <<4 x float>*>:36 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 163, i32 3		; <<4 x float>*>:37 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 1		; <<4 x float>*>:38 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 2		; <<4 x float>*>:39 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 162, i32 3		; <<4 x float>*>:40 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 1		; <<4 x float>*>:41 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 2		; <<4 x float>*>:42 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 161, i32 3		; <<4 x float>*>:43 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 1		; <<4 x float>*>:44 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 2		; <<4 x float>*>:45 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 160, i32 3		; <<4 x float>*>:46 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 1		; <<4 x float>*>:47 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 2		; <<4 x float>*>:48 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 159, i32 3		; <<4 x float>*>:49 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 1		; <<4 x float>*>:50 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 2		; <<4 x float>*>:51 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 158, i32 3		; <<4 x float>*>:52 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 1		; <<4 x float>*>:53 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 2		; <<4 x float>*>:54 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 157, i32 3		; <<4 x float>*>:55 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 1		; <<4 x float>*>:56 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 2		; <<4 x float>*>:57 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 156, i32 3		; <<4 x float>*>:58 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 1		; <<4 x float>*>:59 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 2		; <<4 x float>*>:60 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 155, i32 3		; <<4 x float>*>:61 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 1		; <<4 x float>*>:62 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 2		; <<4 x float>*>:63 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 154, i32 3		; <<4 x float>*>:64 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 1		; <<4 x float>*>:65 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 2		; <<4 x float>*>:66 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 153, i32 3		; <<4 x float>*>:67 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 1		; <<4 x float>*>:68 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 2		; <<4 x float>*>:69 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 152, i32 3		; <<4 x float>*>:70 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 1		; <<4 x float>*>:71 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 2		; <<4 x float>*>:72 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 151, i32 3		; <<4 x float>*>:73 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 1		; <<4 x float>*>:74 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 2		; <<4 x float>*>:75 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 150, i32 3		; <<4 x float>*>:76 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 1		; <<4 x float>*>:77 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 2		; <<4 x float>*>:78 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 149, i32 3		; <<4 x float>*>:79 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 1		; <<4 x float>*>:80 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 2		; <<4 x float>*>:81 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 148, i32 3		; <<4 x float>*>:82 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 1		; <<4 x float>*>:83 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 2		; <<4 x float>*>:84 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 147, i32 3		; <<4 x float>*>:85 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 1		; <<4 x float>*>:86 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 2		; <<4 x float>*>:87 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 146, i32 3		; <<4 x float>*>:88 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 1		; <<4 x float>*>:89 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 2		; <<4 x float>*>:90 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 145, i32 3		; <<4 x float>*>:91 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 1		; <<4 x float>*>:92 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 2		; <<4 x float>*>:93 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 144, i32 3		; <<4 x float>*>:94 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 1		; <<4 x float>*>:95 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 2		; <<4 x float>*>:96 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 143, i32 3		; <<4 x float>*>:97 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 1		; <<4 x float>*>:98 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 2		; <<4 x float>*>:99 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 142, i32 3		; <<4 x float>*>:100 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 1		; <<4 x float>*>:101 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 2		; <<4 x float>*>:102 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 141, i32 3		; <<4 x float>*>:103 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 1		; <<4 x float>*>:104 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 2		; <<4 x float>*>:105 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 140, i32 3		; <<4 x float>*>:106 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 1		; <<4 x float>*>:107 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 2		; <<4 x float>*>:108 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 139, i32 3		; <<4 x float>*>:109 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 1		; <<4 x float>*>:110 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 2		; <<4 x float>*>:111 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 138, i32 3		; <<4 x float>*>:112 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 1		; <<4 x float>*>:113 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 2		; <<4 x float>*>:114 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 137, i32 3		; <<4 x float>*>:115 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 1		; <<4 x float>*>:116 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 2		; <<4 x float>*>:117 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 136, i32 3		; <<4 x float>*>:118 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 1		; <<4 x float>*>:119 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 2		; <<4 x float>*>:120 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 135, i32 3		; <<4 x float>*>:121 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 1		; <<4 x float>*>:122 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 2		; <<4 x float>*>:123 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 134, i32 3		; <<4 x float>*>:124 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 1		; <<4 x float>*>:125 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 2		; <<4 x float>*>:126 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 133, i32 3		; <<4 x float>*>:127 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 1		; <<4 x float>*>:128 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 2		; <<4 x float>*>:129 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 132, i32 3		; <<4 x float>*>:130 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 1		; <<4 x float>*>:131 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 2		; <<4 x float>*>:132 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 131, i32 3		; <<4 x float>*>:133 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 1		; <<4 x float>*>:134 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 2		; <<4 x float>*>:135 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 130, i32 3		; <<4 x float>*>:136 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 1		; <<4 x float>*>:137 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 2		; <<4 x float>*>:138 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 129, i32 3		; <<4 x float>*>:139 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 1		; <<4 x float>*>:140 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 2		; <<4 x float>*>:141 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 128, i32 3		; <<4 x float>*>:142 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 1		; <<4 x float>*>:143 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 2		; <<4 x float>*>:144 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 127, i32 3		; <<4 x float>*>:145 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 1		; <<4 x float>*>:146 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 2		; <<4 x float>*>:147 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 126, i32 3		; <<4 x float>*>:148 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 1		; <<4 x float>*>:149 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 2		; <<4 x float>*>:150 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 125, i32 3		; <<4 x float>*>:151 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 1		; <<4 x float>*>:152 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 2		; <<4 x float>*>:153 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 124, i32 3		; <<4 x float>*>:154 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 1		; <<4 x float>*>:155 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 2		; <<4 x float>*>:156 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 123, i32 3		; <<4 x float>*>:157 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 1		; <<4 x float>*>:158 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 2		; <<4 x float>*>:159 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 122, i32 3		; <<4 x float>*>:160 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 1		; <<4 x float>*>:161 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 2		; <<4 x float>*>:162 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 121, i32 3		; <<4 x float>*>:163 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 1		; <<4 x float>*>:164 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 2		; <<4 x float>*>:165 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 120, i32 3		; <<4 x float>*>:166 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 1		; <<4 x float>*>:167 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 2		; <<4 x float>*>:168 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 119, i32 3		; <<4 x float>*>:169 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 1		; <<4 x float>*>:170 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 2		; <<4 x float>*>:171 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 118, i32 3		; <<4 x float>*>:172 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 1		; <<4 x float>*>:173 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 2		; <<4 x float>*>:174 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 117, i32 3		; <<4 x float>*>:175 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 1		; <<4 x float>*>:176 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 2		; <<4 x float>*>:177 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 116, i32 3		; <<4 x float>*>:178 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 1		; <<4 x float>*>:179 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 2		; <<4 x float>*>:180 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 115, i32 3		; <<4 x float>*>:181 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 1		; <<4 x float>*>:182 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 2		; <<4 x float>*>:183 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 114, i32 3		; <<4 x float>*>:184 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 1		; <<4 x float>*>:185 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 2		; <<4 x float>*>:186 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 113, i32 3		; <<4 x float>*>:187 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 1		; <<4 x float>*>:188 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 2		; <<4 x float>*>:189 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 112, i32 3		; <<4 x float>*>:190 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 1		; <<4 x float>*>:191 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 2		; <<4 x float>*>:192 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 111, i32 3		; <<4 x float>*>:193 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 1		; <<4 x float>*>:194 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 2		; <<4 x float>*>:195 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 110, i32 3		; <<4 x float>*>:196 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 1		; <<4 x float>*>:197 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 2		; <<4 x float>*>:198 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 109, i32 3		; <<4 x float>*>:199 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 1		; <<4 x float>*>:200 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 2		; <<4 x float>*>:201 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 108, i32 3		; <<4 x float>*>:202 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 1		; <<4 x float>*>:203 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 2		; <<4 x float>*>:204 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 107, i32 3		; <<4 x float>*>:205 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 1		; <<4 x float>*>:206 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 2		; <<4 x float>*>:207 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 106, i32 3		; <<4 x float>*>:208 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 1		; <<4 x float>*>:209 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 2		; <<4 x float>*>:210 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 105, i32 3		; <<4 x float>*>:211 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 1		; <<4 x float>*>:212 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 2		; <<4 x float>*>:213 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 104, i32 3		; <<4 x float>*>:214 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 1		; <<4 x float>*>:215 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 2		; <<4 x float>*>:216 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 103, i32 3		; <<4 x float>*>:217 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 1		; <<4 x float>*>:218 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 2		; <<4 x float>*>:219 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 102, i32 3		; <<4 x float>*>:220 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 1		; <<4 x float>*>:221 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 2		; <<4 x float>*>:222 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 101, i32 3		; <<4 x float>*>:223 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 1		; <<4 x float>*>:224 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 2		; <<4 x float>*>:225 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 100, i32 3		; <<4 x float>*>:226 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 1		; <<4 x float>*>:227 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 2		; <<4 x float>*>:228 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 99, i32 3		; <<4 x float>*>:229 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 1		; <<4 x float>*>:230 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 2		; <<4 x float>*>:231 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 98, i32 3		; <<4 x float>*>:232 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 1		; <<4 x float>*>:233 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 2		; <<4 x float>*>:234 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 97, i32 3		; <<4 x float>*>:235 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 1		; <<4 x float>*>:236 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 2		; <<4 x float>*>:237 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 96, i32 3		; <<4 x float>*>:238 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 1		; <<4 x float>*>:239 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 2		; <<4 x float>*>:240 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 95, i32 3		; <<4 x float>*>:241 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 1		; <<4 x float>*>:242 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 2		; <<4 x float>*>:243 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 94, i32 3		; <<4 x float>*>:244 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 1		; <<4 x float>*>:245 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 2		; <<4 x float>*>:246 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 93, i32 3		; <<4 x float>*>:247 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 1		; <<4 x float>*>:248 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 2		; <<4 x float>*>:249 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 92, i32 3		; <<4 x float>*>:250 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 1		; <<4 x float>*>:251 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 2		; <<4 x float>*>:252 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 91, i32 3		; <<4 x float>*>:253 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 1		; <<4 x float>*>:254 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 2		; <<4 x float>*>:255 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 90, i32 3		; <<4 x float>*>:256 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 1		; <<4 x float>*>:257 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 2		; <<4 x float>*>:258 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 89, i32 3		; <<4 x float>*>:259 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 1		; <<4 x float>*>:260 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 2		; <<4 x float>*>:261 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 88, i32 3		; <<4 x float>*>:262 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 1		; <<4 x float>*>:263 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 2		; <<4 x float>*>:264 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 87, i32 3		; <<4 x float>*>:265 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 1		; <<4 x float>*>:266 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 2		; <<4 x float>*>:267 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 86, i32 3		; <<4 x float>*>:268 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 1		; <<4 x float>*>:269 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 2		; <<4 x float>*>:270 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 85, i32 3		; <<4 x float>*>:271 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 1		; <<4 x float>*>:272 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 2		; <<4 x float>*>:273 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 84, i32 3		; <<4 x float>*>:274 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 1		; <<4 x float>*>:275 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 2		; <<4 x float>*>:276 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 83, i32 3		; <<4 x float>*>:277 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 1		; <<4 x float>*>:278 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 2		; <<4 x float>*>:279 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 82, i32 3		; <<4 x float>*>:280 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 1		; <<4 x float>*>:281 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 2		; <<4 x float>*>:282 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 81, i32 3		; <<4 x float>*>:283 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 1		; <<4 x float>*>:284 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 2		; <<4 x float>*>:285 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 80, i32 3		; <<4 x float>*>:286 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 1		; <<4 x float>*>:287 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 2		; <<4 x float>*>:288 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 79, i32 3		; <<4 x float>*>:289 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 1		; <<4 x float>*>:290 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 2		; <<4 x float>*>:291 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 78, i32 3		; <<4 x float>*>:292 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 1		; <<4 x float>*>:293 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 2		; <<4 x float>*>:294 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 77, i32 3		; <<4 x float>*>:295 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 1		; <<4 x float>*>:296 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 2		; <<4 x float>*>:297 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 76, i32 3		; <<4 x float>*>:298 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 1		; <<4 x float>*>:299 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 2		; <<4 x float>*>:300 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 75, i32 3		; <<4 x float>*>:301 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 1		; <<4 x float>*>:302 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 2		; <<4 x float>*>:303 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 74, i32 3		; <<4 x float>*>:304 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 1		; <<4 x float>*>:305 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 2		; <<4 x float>*>:306 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 73, i32 3		; <<4 x float>*>:307 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 1		; <<4 x float>*>:308 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 2		; <<4 x float>*>:309 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 72, i32 3		; <<4 x float>*>:310 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 1		; <<4 x float>*>:311 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 2		; <<4 x float>*>:312 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 71, i32 3		; <<4 x float>*>:313 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 1		; <<4 x float>*>:314 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 2		; <<4 x float>*>:315 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 70, i32 3		; <<4 x float>*>:316 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 1		; <<4 x float>*>:317 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 2		; <<4 x float>*>:318 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 69, i32 3		; <<4 x float>*>:319 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 1		; <<4 x float>*>:320 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 2		; <<4 x float>*>:321 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 68, i32 3		; <<4 x float>*>:322 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 1		; <<4 x float>*>:323 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 2		; <<4 x float>*>:324 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 67, i32 3		; <<4 x float>*>:325 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 1		; <<4 x float>*>:326 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 2		; <<4 x float>*>:327 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 66, i32 3		; <<4 x float>*>:328 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 1		; <<4 x float>*>:329 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 2		; <<4 x float>*>:330 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 65, i32 3		; <<4 x float>*>:331 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 1		; <<4 x float>*>:332 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 2		; <<4 x float>*>:333 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 64, i32 3		; <<4 x float>*>:334 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 1		; <<4 x float>*>:335 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 2		; <<4 x float>*>:336 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 63, i32 3		; <<4 x float>*>:337 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 1		; <<4 x float>*>:338 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 2		; <<4 x float>*>:339 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 62, i32 3		; <<4 x float>*>:340 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 1		; <<4 x float>*>:341 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 2		; <<4 x float>*>:342 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 61, i32 3		; <<4 x float>*>:343 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 1		; <<4 x float>*>:344 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 2		; <<4 x float>*>:345 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 60, i32 3		; <<4 x float>*>:346 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 1		; <<4 x float>*>:347 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 2		; <<4 x float>*>:348 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 59, i32 3		; <<4 x float>*>:349 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 1		; <<4 x float>*>:350 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 2		; <<4 x float>*>:351 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 58, i32 3		; <<4 x float>*>:352 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 1		; <<4 x float>*>:353 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 2		; <<4 x float>*>:354 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 57, i32 3		; <<4 x float>*>:355 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 1		; <<4 x float>*>:356 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 2		; <<4 x float>*>:357 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 56, i32 3		; <<4 x float>*>:358 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 1		; <<4 x float>*>:359 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 2		; <<4 x float>*>:360 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 55, i32 3		; <<4 x float>*>:361 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 1		; <<4 x float>*>:362 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 2		; <<4 x float>*>:363 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 54, i32 3		; <<4 x float>*>:364 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 1		; <<4 x float>*>:365 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 2		; <<4 x float>*>:366 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 53, i32 3		; <<4 x float>*>:367 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 1		; <<4 x float>*>:368 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 2		; <<4 x float>*>:369 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 52, i32 3		; <<4 x float>*>:370 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 1		; <<4 x float>*>:371 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 2		; <<4 x float>*>:372 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 51, i32 3		; <<4 x float>*>:373 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 1		; <<4 x float>*>:374 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 2		; <<4 x float>*>:375 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 50, i32 3		; <<4 x float>*>:376 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 1		; <<4 x float>*>:377 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 2		; <<4 x float>*>:378 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 49, i32 3		; <<4 x float>*>:379 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 1		; <<4 x float>*>:380 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 2		; <<4 x float>*>:381 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 48, i32 3		; <<4 x float>*>:382 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 1		; <<4 x float>*>:383 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 2		; <<4 x float>*>:384 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 47, i32 3		; <<4 x float>*>:385 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 1		; <<4 x float>*>:386 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 2		; <<4 x float>*>:387 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 46, i32 3		; <<4 x float>*>:388 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 1		; <<4 x float>*>:389 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 2		; <<4 x float>*>:390 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 45, i32 3		; <<4 x float>*>:391 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 1		; <<4 x float>*>:392 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 2		; <<4 x float>*>:393 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 44, i32 3		; <<4 x float>*>:394 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 1		; <<4 x float>*>:395 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 2		; <<4 x float>*>:396 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 43, i32 3		; <<4 x float>*>:397 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 1		; <<4 x float>*>:398 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 2		; <<4 x float>*>:399 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 42, i32 3		; <<4 x float>*>:400 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 1		; <<4 x float>*>:401 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 2		; <<4 x float>*>:402 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 41, i32 3		; <<4 x float>*>:403 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 1		; <<4 x float>*>:404 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 2		; <<4 x float>*>:405 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 40, i32 3		; <<4 x float>*>:406 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 1		; <<4 x float>*>:407 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 2		; <<4 x float>*>:408 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 39, i32 3		; <<4 x float>*>:409 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 1		; <<4 x float>*>:410 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 2		; <<4 x float>*>:411 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 38, i32 3		; <<4 x float>*>:412 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 1		; <<4 x float>*>:413 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 2		; <<4 x float>*>:414 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 37, i32 3		; <<4 x float>*>:415 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 1		; <<4 x float>*>:416 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 2		; <<4 x float>*>:417 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 36, i32 3		; <<4 x float>*>:418 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 1		; <<4 x float>*>:419 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 2		; <<4 x float>*>:420 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 35, i32 3		; <<4 x float>*>:421 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 1		; <<4 x float>*>:422 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 2		; <<4 x float>*>:423 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 34, i32 3		; <<4 x float>*>:424 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 1		; <<4 x float>*>:425 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 2		; <<4 x float>*>:426 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 33, i32 3		; <<4 x float>*>:427 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 1		; <<4 x float>*>:428 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 2		; <<4 x float>*>:429 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 32, i32 3		; <<4 x float>*>:430 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 1		; <<4 x float>*>:431 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 2		; <<4 x float>*>:432 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 31, i32 3		; <<4 x float>*>:433 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 1		; <<4 x float>*>:434 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 2		; <<4 x float>*>:435 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 30, i32 3		; <<4 x float>*>:436 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 1		; <<4 x float>*>:437 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 2		; <<4 x float>*>:438 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 29, i32 3		; <<4 x float>*>:439 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 1		; <<4 x float>*>:440 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 2		; <<4 x float>*>:441 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 28, i32 3		; <<4 x float>*>:442 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 1		; <<4 x float>*>:443 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 2		; <<4 x float>*>:444 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 27, i32 3		; <<4 x float>*>:445 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 1		; <<4 x float>*>:446 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 2		; <<4 x float>*>:447 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 26, i32 3		; <<4 x float>*>:448 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 1		; <<4 x float>*>:449 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 2		; <<4 x float>*>:450 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 25, i32 3		; <<4 x float>*>:451 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 1		; <<4 x float>*>:452 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 2		; <<4 x float>*>:453 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 24, i32 3		; <<4 x float>*>:454 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 1		; <<4 x float>*>:455 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 2		; <<4 x float>*>:456 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 23, i32 3		; <<4 x float>*>:457 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 1		; <<4 x float>*>:458 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 2		; <<4 x float>*>:459 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 22, i32 3		; <<4 x float>*>:460 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 1		; <<4 x float>*>:461 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 2		; <<4 x float>*>:462 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 21, i32 3		; <<4 x float>*>:463 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 1		; <<4 x float>*>:464 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 2		; <<4 x float>*>:465 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 20, i32 3		; <<4 x float>*>:466 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 1		; <<4 x float>*>:467 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 2		; <<4 x float>*>:468 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 19, i32 3		; <<4 x float>*>:469 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 1		; <<4 x float>*>:470 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 2		; <<4 x float>*>:471 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 18, i32 3		; <<4 x float>*>:472 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 1		; <<4 x float>*>:473 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 2		; <<4 x float>*>:474 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 17, i32 3		; <<4 x float>*>:475 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 1		; <<4 x float>*>:476 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 2		; <<4 x float>*>:477 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 16, i32 3		; <<4 x float>*>:478 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 1		; <<4 x float>*>:479 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 2		; <<4 x float>*>:480 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 15, i32 3		; <<4 x float>*>:481 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 1		; <<4 x float>*>:482 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 2		; <<4 x float>*>:483 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 14, i32 3		; <<4 x float>*>:484 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:485 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:486 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:487 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1		; <<4 x float>*>:488 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 2		; <<4 x float>*>:489 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 3		; <<4 x float>*>:490 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 1		; <<4 x float>*>:491 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 2		; <<4 x float>*>:492 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 180, i32 3		; <<4 x float>*>:493 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 1		; <<4 x float>*>:494 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 2		; <<4 x float>*>:495 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 181, i32 3		; <<4 x float>*>:496 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 1		; <<4 x float>*>:497 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 2		; <<4 x float>*>:498 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 182, i32 3		; <<4 x float>*>:499 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 1		; <<4 x float>*>:500 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 2		; <<4 x float>*>:501 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 183, i32 3		; <<4 x float>*>:502 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 1		; <<4 x float>*>:503 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 2		; <<4 x float>*>:504 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 184, i32 3		; <<4 x float>*>:505 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 1		; <<4 x float>*>:506 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 2		; <<4 x float>*>:507 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 185, i32 3		; <<4 x float>*>:508 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 1		; <<4 x float>*>:509 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 2		; <<4 x float>*>:510 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 190, i32 3		; <<4 x float>*>:511 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 1		; <<4 x float>*>:512 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 2		; <<4 x float>*>:513 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 9, i32 3		; <<4 x float>*>:514 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 1		; <<4 x float>*>:515 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 2		; <<4 x float>*>:516 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 10, i32 3		; <<4 x float>*>:517 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 1		; <<4 x float>*>:518 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 2		; <<4 x float>*>:519 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 11, i32 3		; <<4 x float>*>:520 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 1		; <<4 x float>*>:521 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 2		; <<4 x float>*>:522 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 12, i32 3		; <<4 x float>*>:523 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 1		; <<4 x float>*>:524 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 2		; <<4 x float>*>:525 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 13, i32 3		; <<4 x float>*>:526 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1		; <<4 x float>*>:527 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2		; <<4 x float>*>:528 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3		; <<4 x float>*>:529 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1		; <<4 x float>*>:530 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2		; <<4 x float>*>:531 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:532 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1		; <<4 x float>*>:533 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2		; <<4 x float>*>:534 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3		; <<4 x float>*>:535 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 1		; <<4 x float>*>:536 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 2		; <<4 x float>*>:537 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 6, i32 3		; <<4 x float>*>:538 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 1		; <<4 x float>*>:539 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 2		; <<4 x float>*>:540 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 7, i32 3		; <<4 x float>*>:541 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1		; <<4 x float>*>:542 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2		; <<4 x float>*>:543 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3		; <<4 x float>*>:544 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 1		; <<4 x float>*>:545 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 2		; <<4 x float>*>:546 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 187, i32 3		; <<4 x float>*>:547 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 1		; <<4 x float>*>:548 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 2		; <<4 x float>*>:549 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 8, i32 3		; <<4 x float>*>:550 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:551 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 1		; <<4 x float>*>:552 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 2		; <<4 x float>*>:553 [#uses=1]
+	load <4 x float>* %553		; <<4 x float>>:554 [#uses=1]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 3		; <<4 x float>*>:555 [#uses=0]
+	shufflevector <4 x float> %554, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:556 [#uses=1]
+	call <4 x i32> @llvm.ppc.altivec.vcmpgtfp( <4 x float> zeroinitializer, <4 x float> %556 )		; <<4 x i32>>:557 [#uses=0]
+	bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>>:558 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0		; <<4 x float>*>:559 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 2		; <<4 x float>*>:560 [#uses=1]
+	store <4 x float> zeroinitializer, <4 x float>* %560
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3		; <<4 x float>*>:561 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:562 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 2		; <<4 x float>*>:563 [#uses=0]
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:564 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:565 [#uses=1]
+	store <4 x float> %565, <4 x float>* null
+	icmp eq i32 0, 0		; <i1>:566 [#uses=1]
+	br i1 %566, label %.critedge, label %xPIF.exit
+
+.critedge:		; preds = %xOperationInitMasks.exit
+	getelementptr [4 x <4 x i32>]* null, i32 0, i32 3		; <<4 x i32>*>:567 [#uses=0]
+	and <4 x i32> zeroinitializer, zeroinitializer		; <<4 x i32>>:568 [#uses=0]
+	or <4 x i32> zeroinitializer, zeroinitializer		; <<4 x i32>>:569 [#uses=0]
+	icmp eq i32 0, 0		; <i1>:570 [#uses=1]
+	br i1 %570, label %.critedge7898, label %xPBRK.exit
+
+.critedge7898:		; preds = %.critedge
+	br label %xPIF.exit
+
+xPIF.exit:		; preds = %.critedge7898, %xOperationInitMasks.exit
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 1		; <<4 x float>*>:571 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:572 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:573 [#uses=0]
+	icmp eq i32 0, 0		; <i1>:574 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1		; <<4 x float>*>:575 [#uses=0]
+	load <4 x float>* %0		; <<4 x float>>:576 [#uses=0]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:577 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 0		; <<4 x float>*>:578 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1		; <<4 x float>*>:579 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2		; <<4 x float>*>:580 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3		; <<4 x float>*>:581 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:582 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:583 [#uses=1]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:584 [#uses=1]
+	load <4 x float>* %584		; <<4 x float>>:585 [#uses=1]
+	load <4 x float>* null		; <<4 x float>>:586 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:587 [#uses=1]
+	load <4 x float>* %587		; <<4 x float>>:588 [#uses=1]
+	shufflevector <4 x float> %583, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:589 [#uses=1]
+	shufflevector <4 x float> %585, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:590 [#uses=1]
+	shufflevector <4 x float> %588, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:591 [#uses=1]
+	fmul <4 x float> zeroinitializer, %589		; <<4 x float>>:592 [#uses=0]
+	fmul <4 x float> zeroinitializer, %590		; <<4 x float>>:593 [#uses=0]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:594 [#uses=1]
+	fmul <4 x float> zeroinitializer, %591		; <<4 x float>>:595 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0		; <<4 x float>*>:596 [#uses=2]
+	load <4 x float>* %596		; <<4 x float>>:597 [#uses=0]
+	store <4 x float> zeroinitializer, <4 x float>* %596
+	load <4 x float>* null		; <<4 x float>>:598 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:599 [#uses=0]
+	shufflevector <4 x float> %594, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:600 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:601 [#uses=2]
+	load <4 x float>* %601		; <<4 x float>>:602 [#uses=0]
+	store <4 x float> zeroinitializer, <4 x float>* %601
+	load <4 x float>* null		; <<4 x float>>:603 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:604 [#uses=1]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:605 [#uses=1]
+	load <4 x float>* %605		; <<4 x float>>:606 [#uses=1]
+	fsub <4 x float> zeroinitializer, %604		; <<4 x float>>:607 [#uses=2]
+	fsub <4 x float> zeroinitializer, %606		; <<4 x float>>:608 [#uses=2]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:609 [#uses=0]
+	br i1 false, label %617, label %610
+
+; <label>:610		; preds = %xPIF.exit
+	load <4 x float>* null		; <<4 x float>>:611 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:612 [#uses=2]
+	load <4 x float>* %612		; <<4 x float>>:613 [#uses=1]
+	shufflevector <4 x float> %607, <4 x float> %613, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:614 [#uses=1]
+	store <4 x float> %614, <4 x float>* %612
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:615 [#uses=2]
+	load <4 x float>* %615		; <<4 x float>>:616 [#uses=0]
+	store <4 x float> zeroinitializer, <4 x float>* %615
+	br label %xST.exit400
+
+; <label>:617		; preds = %xPIF.exit
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:618 [#uses=0]
+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x i32>>:619 [#uses=1]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %619, <4 x i32> zeroinitializer )		; <i32>:620 [#uses=1]
+	icmp eq i32 %620, 0		; <i1>:621 [#uses=1]
+	br i1 %621, label %625, label %622
+
+; <label>:622		; preds = %617
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:623 [#uses=0]
+	shufflevector <4 x float> %607, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:624 [#uses=0]
+	br label %625
+
+; <label>:625		; preds = %622, %617
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:626 [#uses=0]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:627 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:628 [#uses=1]
+	load <4 x float>* %628		; <<4 x float>>:629 [#uses=0]
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:630 [#uses=0]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:631 [#uses=1]
+	icmp eq i32 %631, 0		; <i1>:632 [#uses=1]
+	br i1 %632, label %xST.exit400, label %633
+
+; <label>:633		; preds = %625
+	load <4 x float>* null		; <<4 x float>>:634 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %634, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:635 [#uses=1]
+	store <4 x float> %635, <4 x float>* null
+	br label %xST.exit400
+
+xST.exit400:		; preds = %633, %625, %610
+	%.17218 = phi <4 x float> [ zeroinitializer, %610 ], [ %608, %633 ], [ %608, %625 ]		; <<4 x float>> [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 0		; <<4 x float>*>:636 [#uses=1]
+	load <4 x float>* %636		; <<4 x float>>:637 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:638 [#uses=2]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:639 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:640 [#uses=2]
+	fmul <4 x float> %638, %638		; <<4 x float>>:641 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:642 [#uses=0]
+	fmul <4 x float> %640, %640		; <<4 x float>>:643 [#uses=2]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>>:644 [#uses=0]
+	shufflevector <4 x float> %643, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>>:645 [#uses=1]
+	fadd <4 x float> %645, %643		; <<4 x float>>:646 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>>:647 [#uses=1]
+	shufflevector <4 x float> %641, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>>:648 [#uses=1]
+	fadd <4 x float> zeroinitializer, %647		; <<4 x float>>:649 [#uses=2]
+	fadd <4 x float> zeroinitializer, %648		; <<4 x float>>:650 [#uses=0]
+	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:651 [#uses=2]
+	call <4 x float> @llvm.ppc.altivec.vrsqrtefp( <4 x float> %649 )		; <<4 x float>>:652 [#uses=1]
+	fmul <4 x float> %652, %649		; <<4 x float>>:653 [#uses=1]
+	call <4 x float> @llvm.ppc.altivec.vrsqrtefp( <4 x float> %651 )		; <<4 x float>>:654 [#uses=1]
+	fmul <4 x float> %654, %651		; <<4 x float>>:655 [#uses=0]
+	icmp eq i32 0, 0		; <i1>:656 [#uses=1]
+	br i1 %656, label %665, label %657
+
+; <label>:657		; preds = %xST.exit400
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0		; <<4 x float>*>:658 [#uses=0]
+	shufflevector <4 x float> %653, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:659 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:660 [#uses=1]
+	load <4 x float>* %660		; <<4 x float>>:661 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:662 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:663 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:664 [#uses=0]
+	br label %xST.exit402
+
+; <label>:665		; preds = %xST.exit400
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:666 [#uses=0]
+	br i1 false, label %669, label %667
+
+; <label>:667		; preds = %665
+	load <4 x float>* null		; <<4 x float>>:668 [#uses=0]
+	br label %669
+
+; <label>:669		; preds = %667, %665
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:670 [#uses=0]
+	br label %xST.exit402
+
+xST.exit402:		; preds = %669, %657
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0		; <<4 x float>*>:671 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:672 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2		; <<4 x float>*>:673 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:674 [#uses=1]
+	load <4 x float>* %674		; <<4 x float>>:675 [#uses=1]
+	load <4 x float>* null		; <<4 x float>>:676 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:677 [#uses=1]
+	shufflevector <4 x float> %675, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:678 [#uses=1]
+	fmul <4 x float> zeroinitializer, %677		; <<4 x float>>:679 [#uses=0]
+	fmul <4 x float> zeroinitializer, %678		; <<4 x float>>:680 [#uses=0]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:681 [#uses=1]
+	icmp eq i32 0, 0		; <i1>:682 [#uses=1]
+	br i1 %682, label %689, label %683
+
+; <label>:683		; preds = %xST.exit402
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1		; <<4 x float>*>:684 [#uses=1]
+	load <4 x float>* %684		; <<4 x float>>:685 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2		; <<4 x float>*>:686 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3		; <<4 x float>*>:687 [#uses=0]
+	shufflevector <4 x float> %681, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:688 [#uses=0]
+	br label %xST.exit405
+
+; <label>:689		; preds = %xST.exit402
+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:690 [#uses=0]
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:691 [#uses=1]
+	shufflevector <4 x i32> %691, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:692 [#uses=1]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %692, <4 x i32> zeroinitializer )		; <i32>:693 [#uses=1]
+	icmp eq i32 %693, 0		; <i1>:694 [#uses=0]
+	br label %xST.exit405
+
+xST.exit405:		; preds = %689, %683
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:695 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:696 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:697 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:698 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2		; <<4 x float>*>:699 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:700 [#uses=1]
+	fadd <4 x float> zeroinitializer, %700		; <<4 x float>>:701 [#uses=0]
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:702 [#uses=1]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %702, <4 x i32> zeroinitializer )		; <i32>:703 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1		; <<4 x float>*>:704 [#uses=2]
+	load <4 x float>* %704		; <<4 x float>>:705 [#uses=0]
+	store <4 x float> zeroinitializer, <4 x float>* %704
+	load <4 x float>* null		; <<4 x float>>:706 [#uses=0]
+	store <4 x float> zeroinitializer, <4 x float>* null
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:707 [#uses=2]
+	load <4 x float>* %707		; <<4 x float>>:708 [#uses=0]
+	store <4 x float> zeroinitializer, <4 x float>* %707
+	load <4 x float>* null		; <<4 x float>>:709 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:710 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:711 [#uses=1]
+	shufflevector <4 x float> %711, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>>:712 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:713 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:714 [#uses=1]
+	load <4 x float>* %714		; <<4 x float>>:715 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:716 [#uses=0]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:717 [#uses=1]
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:718 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 0		; <<4 x float>*>:719 [#uses=1]
+	store <4 x float> zeroinitializer, <4 x float>* %719
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 1		; <<4 x float>*>:720 [#uses=1]
+	shufflevector <4 x float> %717, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:721 [#uses=1]
+	store <4 x float> %721, <4 x float>* %720
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2		; <<4 x float>*>:722 [#uses=1]
+	load <4 x float>* %722		; <<4 x float>>:723 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %723, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:724 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3		; <<4 x float>*>:725 [#uses=1]
+	store <4 x float> zeroinitializer, <4 x float>* %725
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 2		; <<4 x float>*>:726 [#uses=1]
+	load <4 x float>* %726		; <<4 x float>>:727 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 5, i32 3		; <<4 x float>*>:728 [#uses=1]
+	load <4 x float>* %728		; <<4 x float>>:729 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0		; <<4 x float>*>:730 [#uses=1]
+	load <4 x float>* %730		; <<4 x float>>:731 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:732 [#uses=1]
+	load <4 x float>* %732		; <<4 x float>>:733 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:734 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:735 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:736 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:737 [#uses=1]
+	fmul <4 x float> zeroinitializer, %735		; <<4 x float>>:738 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:739 [#uses=1]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:740 [#uses=1]
+	icmp eq i32 %740, 0		; <i1>:741 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 0		; <<4 x float>*>:742 [#uses=2]
+	load <4 x float>* %742		; <<4 x float>>:743 [#uses=1]
+	shufflevector <4 x float> %736, <4 x float> %743, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:744 [#uses=1]
+	store <4 x float> %744, <4 x float>* %742
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:745 [#uses=1]
+	load <4 x float>* %745		; <<4 x float>>:746 [#uses=1]
+	shufflevector <4 x float> %737, <4 x float> %746, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:747 [#uses=0]
+	shufflevector <4 x float> %738, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:748 [#uses=1]
+	store <4 x float> %748, <4 x float>* null
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:749 [#uses=1]
+	load <4 x float>* %749		; <<4 x float>>:750 [#uses=1]
+	shufflevector <4 x float> %739, <4 x float> %750, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:751 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0		; <<4 x float>*>:752 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1		; <<4 x float>*>:753 [#uses=1]
+	load <4 x float>* %753		; <<4 x float>>:754 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2		; <<4 x float>*>:755 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:756 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:757 [#uses=1]
+	shufflevector <4 x float> %756, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:758 [#uses=1]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:759 [#uses=1]
+	load <4 x float>* %759		; <<4 x float>>:760 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:761 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:762 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:763 [#uses=1]
+	fadd <4 x float> %757, zeroinitializer		; <<4 x float>>:764 [#uses=0]
+	fadd <4 x float> %758, %763		; <<4 x float>>:765 [#uses=0]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:766 [#uses=1]
+	br i1 false, label %773, label %767
+
+; <label>:767		; preds = %xST.exit405
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:768 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:769 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %769, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:770 [#uses=1]
+	store <4 x float> %770, <4 x float>* null
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:771 [#uses=1]
+	load <4 x float>* %771		; <<4 x float>>:772 [#uses=0]
+	br label %xST.exit422
+
+; <label>:773		; preds = %xST.exit405
+	br label %xST.exit422
+
+xST.exit422:		; preds = %773, %767
+	%.07267 = phi <4 x float> [ %766, %767 ], [ undef, %773 ]		; <<4 x float>> [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:774 [#uses=0]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:775 [#uses=0]
+	icmp eq i32 0, 0		; <i1>:776 [#uses=1]
+	br i1 %776, label %780, label %777
+
+; <label>:777		; preds = %xST.exit422
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:778 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:779 [#uses=0]
+	br label %xST.exit431
+
+; <label>:780		; preds = %xST.exit422
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:781 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:782 [#uses=2]
+	load <4 x float>* %782		; <<4 x float>>:783 [#uses=0]
+	store <4 x float> zeroinitializer, <4 x float>* %782
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:784 [#uses=1]
+	shufflevector <4 x i32> %784, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:785 [#uses=0]
+	icmp eq i32 0, 0		; <i1>:786 [#uses=0]
+	br label %xST.exit431
+
+xST.exit431:		; preds = %780, %777
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:787 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:788 [#uses=0]
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:789 [#uses=2]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %789, <4 x i32> zeroinitializer )		; <i32>:790 [#uses=1]
+	icmp eq i32 %790, 0		; <i1>:791 [#uses=0]
+	shufflevector <4 x i32> %789, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:792 [#uses=1]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %792, <4 x i32> zeroinitializer )		; <i32>:793 [#uses=1]
+	icmp eq i32 %793, 0		; <i1>:794 [#uses=1]
+	br i1 %794, label %797, label %795
+
+; <label>:795		; preds = %xST.exit431
+	load <4 x float>* null		; <<4 x float>>:796 [#uses=0]
+	store <4 x float> zeroinitializer, <4 x float>* null
+	br label %797
+
+; <label>:797		; preds = %795, %xST.exit431
+	%.07332 = phi <4 x float> [ zeroinitializer, %795 ], [ undef, %xST.exit431 ]		; <<4 x float>> [#uses=0]
+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x i32>>:798 [#uses=0]
+	br i1 false, label %xST.exit434, label %799
+
+; <label>:799		; preds = %797
+	load <4 x float>* null		; <<4 x float>>:800 [#uses=0]
+	store <4 x float> zeroinitializer, <4 x float>* null
+	br label %xST.exit434
+
+xST.exit434:		; preds = %799, %797
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:801 [#uses=1]
+	shufflevector <4 x i32> %801, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x i32>>:802 [#uses=0]
+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:803 [#uses=0]
+	icmp eq i32 0, 0		; <i1>:804 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 0		; <<4 x float>*>:805 [#uses=1]
+	load <4 x float>* %805		; <<4 x float>>:806 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:807 [#uses=1]
+	load <4 x float>* %807		; <<4 x float>>:808 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:809 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:810 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0		; <<4 x float>*>:811 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2		; <<4 x float>*>:812 [#uses=1]
+	load <4 x float>* %812		; <<4 x float>>:813 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:814 [#uses=1]
+	load <4 x float>* %814		; <<4 x float>>:815 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:816 [#uses=0]
+	unreachable
+
+xPBRK.exit:		; preds = %.critedge
+	store <4 x i32> < i32 -1, i32 -1, i32 -1, i32 -1 >, <4 x i32>* %.sub7896
+	store <4 x i32> zeroinitializer, <4 x i32>* null
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1		; <<4 x float>*>:817 [#uses=1]
+	load <4 x float>* %817		; <<4 x float>>:818 [#uses=1]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2		; <<4 x float>*>:819 [#uses=1]
+	load <4 x float>* %819		; <<4 x float>>:820 [#uses=1]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3		; <<4 x float>*>:821 [#uses=1]
+	load <4 x float>* %821		; <<4 x float>>:822 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:823 [#uses=1]
+	shufflevector <4 x float> %818, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:824 [#uses=1]
+	shufflevector <4 x float> %820, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:825 [#uses=1]
+	shufflevector <4 x float> %822, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:826 [#uses=1]
+	shufflevector <4 x float> %823, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:827 [#uses=0]
+	shufflevector <4 x float> %824, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:828 [#uses=1]
+	store <4 x float> %828, <4 x float>* null
+	load <4 x float>* null		; <<4 x float>>:829 [#uses=1]
+	shufflevector <4 x float> %825, <4 x float> %829, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:830 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3		; <<4 x float>*>:831 [#uses=2]
+	load <4 x float>* %831		; <<4 x float>>:832 [#uses=1]
+	shufflevector <4 x float> %826, <4 x float> %832, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:833 [#uses=1]
+	store <4 x float> %833, <4 x float>* %831
+	br label %xLS.exit449
+
+xLS.exit449:		; preds = %1215, %xPBRK.exit
+	%.27464 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.17463, %1215 ]		; <<4 x float>> [#uses=2]
+	%.27469 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.17468, %1215 ]		; <<4 x float>> [#uses=2]
+	%.27474 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=1]
+	%.17482 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]
+	%.17486 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]
+	%.17490 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07489, %1215 ]		; <<4 x float>> [#uses=2]
+	%.17494 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]
+	%.27504 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]
+	%.17513 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]
+	%.17517 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]
+	%.17552 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07551, %1215 ]		; <<4 x float>> [#uses=2]
+	%.17556 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07555, %1215 ]		; <<4 x float>> [#uses=2]
+	%.17560 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]
+	%.17583 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07582, %1215 ]		; <<4 x float>> [#uses=2]
+	%.17591 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07590, %1215 ]		; <<4 x float>> [#uses=2]
+	%.17599 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]
+	%.17618 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07617, %1215 ]		; <<4 x float>> [#uses=2]
+	%.17622 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07621, %1215 ]		; <<4 x float>> [#uses=2]
+	%.17626 = phi <4 x float> [ undef, %xPBRK.exit ], [ zeroinitializer, %1215 ]		; <<4 x float>> [#uses=0]
+	%.17653 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07652, %1215 ]		; <<4 x float>> [#uses=2]
+	%.17657 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07656, %1215 ]		; <<4 x float>> [#uses=2]
+	%.17661 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07660, %1215 ]		; <<4 x float>> [#uses=2]
+	%.17665 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07664, %1215 ]		; <<4 x float>> [#uses=2]
+	%.17723 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07722, %1215 ]		; <<4 x float>> [#uses=2]
+	%.17727 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07726, %1215 ]		; <<4 x float>> [#uses=2]
+	%.17731 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07730, %1215 ]		; <<4 x float>> [#uses=2]
+	%.17735 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07734, %1215 ]		; <<4 x float>> [#uses=2]
+	%.17770 = phi <4 x float> [ undef, %xPBRK.exit ], [ %.07769, %1215 ]		; <<4 x float>> [#uses=2]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 0		; <<4 x float>*>:834 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:835 [#uses=1]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2		; <<4 x float>*>:836 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3		; <<4 x float>*>:837 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:838 [#uses=0]
+	shufflevector <4 x float> %835, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:839 [#uses=1]
+	getelementptr <4 x float>* null, i32 878		; <<4 x float>*>:840 [#uses=1]
+	load <4 x float>* %840		; <<4 x float>>:841 [#uses=0]
+	call <4 x float> @llvm.ppc.altivec.vcfsx( <4 x i32> zeroinitializer, i32 0 )		; <<4 x float>>:842 [#uses=1]
+	shufflevector <4 x float> %842, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:843 [#uses=2]
+	call <4 x i32> @llvm.ppc.altivec.vcmpgtfp( <4 x float> %843, <4 x float> %839 )		; <<4 x i32>>:844 [#uses=1]
+	bitcast <4 x i32> %844 to <4 x float>		; <<4 x float>>:845 [#uses=1]
+	call <4 x i32> @llvm.ppc.altivec.vcmpgtfp( <4 x float> %843, <4 x float> zeroinitializer )		; <<4 x i32>>:846 [#uses=0]
+	bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>>:847 [#uses=1]
+	icmp eq i32 0, 0		; <i1>:848 [#uses=1]
+	br i1 %848, label %854, label %849
+
+; <label>:849		; preds = %xLS.exit449
+	shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:850 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:851 [#uses=1]
+	store <4 x float> zeroinitializer, <4 x float>* %851
+	shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:852 [#uses=1]
+	store <4 x float> %852, <4 x float>* null
+	shufflevector <4 x float> %847, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:853 [#uses=0]
+	br label %xST.exit451
+
+; <label>:854		; preds = %xLS.exit449
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:855 [#uses=0]
+	br i1 false, label %859, label %856
+
+; <label>:856		; preds = %854
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0		; <<4 x float>*>:857 [#uses=2]
+	load <4 x float>* %857		; <<4 x float>>:858 [#uses=0]
+	store <4 x float> zeroinitializer, <4 x float>* %857
+	br label %859
+
+; <label>:859		; preds = %856, %854
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:860 [#uses=0]
+	br i1 false, label %864, label %861
+
+; <label>:861		; preds = %859
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:862 [#uses=1]
+	shufflevector <4 x float> %845, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:863 [#uses=1]
+	store <4 x float> %863, <4 x float>* %862
+	br label %864
+
+; <label>:864		; preds = %861, %859
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:865 [#uses=1]
+	shufflevector <4 x i32> %865, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x i32>>:866 [#uses=0]
+	br i1 false, label %868, label %867
+
+; <label>:867		; preds = %864
+	store <4 x float> zeroinitializer, <4 x float>* null
+	br label %868
+
+; <label>:868		; preds = %867, %864
+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:869 [#uses=0]
+	br label %xST.exit451
+
+xST.exit451:		; preds = %868, %849
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0		; <<4 x float>*>:870 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:871 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:872 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:873 [#uses=1]
+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:874 [#uses=1]
+	xor <4 x i32> %874, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>>:875 [#uses=0]
+	bitcast <4 x float> %873 to <4 x i32>		; <<4 x i32>>:876 [#uses=1]
+	xor <4 x i32> %876, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>>:877 [#uses=0]
+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:878 [#uses=1]
+	xor <4 x i32> %878, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>>:879 [#uses=1]
+	bitcast <4 x i32> %879 to <4 x float>		; <<4 x float>>:880 [#uses=0]
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:881 [#uses=1]
+	icmp eq i32 0, 0		; <i1>:882 [#uses=1]
+	br i1 %882, label %888, label %883
+
+; <label>:883		; preds = %xST.exit451
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 0		; <<4 x float>*>:884 [#uses=1]
+	store <4 x float> zeroinitializer, <4 x float>* %884
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:885 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:886 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3		; <<4 x float>*>:887 [#uses=0]
+	br label %xST.exit453
+
+; <label>:888		; preds = %xST.exit451
+	shufflevector <4 x i32> %881, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:889 [#uses=0]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:890 [#uses=0]
+	br i1 false, label %894, label %891
+
+; <label>:891		; preds = %888
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:892 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:893 [#uses=1]
+	store <4 x float> %893, <4 x float>* %892
+	br label %894
+
+; <label>:894		; preds = %891, %888
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:895 [#uses=1]
+	icmp eq i32 %895, 0		; <i1>:896 [#uses=1]
+	br i1 %896, label %898, label %897
+
+; <label>:897		; preds = %894
+	br label %898
+
+; <label>:898		; preds = %897, %894
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:899 [#uses=0]
+	br i1 false, label %xST.exit453, label %900
+
+; <label>:900		; preds = %898
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3		; <<4 x float>*>:901 [#uses=1]
+	load <4 x float>* %901		; <<4 x float>>:902 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %902, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:903 [#uses=0]
+	br label %xST.exit453
+
+xST.exit453:		; preds = %900, %898, %883
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 1		; <<4 x float>*>:904 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:905 [#uses=1]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 189, i32 3		; <<4 x float>*>:906 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:907 [#uses=1]
+	shufflevector <4 x float> %905, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:908 [#uses=1]
+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:909 [#uses=0]
+	bitcast <4 x float> %908 to <4 x i32>		; <<4 x i32>>:910 [#uses=0]
+	bitcast <4 x float> %907 to <4 x i32>		; <<4 x i32>>:911 [#uses=0]
+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:912 [#uses=0]
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:913 [#uses=0]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 2, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:914 [#uses=0]
+	br i1 false, label %915, label %xPIF.exit455
+
+; <label>:915		; preds = %xST.exit453
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:916 [#uses=0]
+	getelementptr [4 x <4 x i32>]* null, i32 0, i32 3		; <<4 x i32>*>:917 [#uses=1]
+	store <4 x i32> zeroinitializer, <4 x i32>* %917
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:918 [#uses=1]
+	and <4 x i32> %918, zeroinitializer		; <<4 x i32>>:919 [#uses=0]
+	br label %.critedge7899
+
+.critedge7899:		; preds = %.critedge7899, %915
+	or <4 x i32> zeroinitializer, zeroinitializer		; <<4 x i32>>:920 [#uses=1]
+	br i1 false, label %.critedge7899, label %xPBRK.exit456
+
+xPBRK.exit456:		; preds = %.critedge7899
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 2, <4 x i32> %920, <4 x i32> zeroinitializer )		; <i32>:921 [#uses=0]
+	unreachable
+
+xPIF.exit455:		; preds = %xST.exit453
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 0		; <<4 x float>*>:922 [#uses=1]
+	load <4 x float>* %922		; <<4 x float>>:923 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 1		; <<4 x float>*>:924 [#uses=1]
+	load <4 x float>* %924		; <<4 x float>>:925 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 2		; <<4 x float>*>:926 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 186, i32 3		; <<4 x float>*>:927 [#uses=0]
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:928 [#uses=0]
+	bitcast { { i16, i16, i32 } }* %1 to <4 x float>*		; <<4 x float>*>:929 [#uses=0]
+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:930 [#uses=0]
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:931 [#uses=0]
+	icmp eq i32 0, 0		; <i1>:932 [#uses=1]
+	br i1 %932, label %934, label %933
+
+; <label>:933		; preds = %xPIF.exit455
+	store <4 x float> zeroinitializer, <4 x float>* null
+	br label %934
+
+; <label>:934		; preds = %933, %xPIF.exit455
+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x i32>>:935 [#uses=0]
+	icmp eq i32 0, 0		; <i1>:936 [#uses=1]
+	br i1 %936, label %xST.exit459, label %937
+
+; <label>:937		; preds = %934
+	br label %xST.exit459
+
+xST.exit459:		; preds = %937, %934
+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x i32>>:938 [#uses=1]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %938, <4 x i32> zeroinitializer )		; <i32>:939 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 188, i32 2		; <<4 x float>*>:940 [#uses=1]
+	store <4 x float> zeroinitializer, <4 x float>* %940
+	load <4 x float>* null		; <<4 x float>>:941 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %941, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:942 [#uses=1]
+	store <4 x float> %942, <4 x float>* null
+	shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:943 [#uses=0]
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:944 [#uses=0]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:945 [#uses=0]
+	br i1 false, label %947, label %946
+
+; <label>:946		; preds = %xST.exit459
+	br label %947
+
+; <label>:947		; preds = %946, %xST.exit459
+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x i32>>:948 [#uses=0]
+	icmp eq i32 0, 0		; <i1>:949 [#uses=1]
+	br i1 %949, label %952, label %950
+
+; <label>:950		; preds = %947
+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:951 [#uses=1]
+	call void @llvm.ppc.altivec.stvewx( <4 x i32> %951, i8* null )
+	br label %952
+
+; <label>:952		; preds = %950, %947
+	br i1 false, label %955, label %953
+
+; <label>:953		; preds = %952
+	getelementptr [4 x <4 x i32>]* null, i32 0, i32 2		; <<4 x i32>*>:954 [#uses=0]
+	br label %955
+
+; <label>:955		; preds = %953, %952
+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:956 [#uses=0]
+	icmp eq i32 0, 0		; <i1>:957 [#uses=1]
+	br i1 %957, label %xStoreDestAddressWithMask.exit461, label %958
+
+; <label>:958		; preds = %955
+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:959 [#uses=1]
+	call void @llvm.ppc.altivec.stvewx( <4 x i32> %959, i8* null )
+	br label %xStoreDestAddressWithMask.exit461
+
+xStoreDestAddressWithMask.exit461:		; preds = %958, %955
+	load <4 x float>* %0		; <<4 x float>>:960 [#uses=0]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:961 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 0		; <<4 x float>*>:962 [#uses=0]
+	br i1 false, label %968, label %xST.exit463
+
+xST.exit463:		; preds = %xStoreDestAddressWithMask.exit461
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 1		; <<4 x float>*>:963 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 2		; <<4 x float>*>:964 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 3, i32 3		; <<4 x float>*>:965 [#uses=0]
+	load <4 x float>* %0		; <<4 x float>>:966 [#uses=3]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:967 [#uses=0]
+	br i1 false, label %972, label %969
+
+; <label>:968		; preds = %xStoreDestAddressWithMask.exit461
+	unreachable
+
+; <label>:969		; preds = %xST.exit463
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 1		; <<4 x float>*>:970 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 2		; <<4 x float>*>:971 [#uses=1]
+	store <4 x float> %966, <4 x float>* %971
+	store <4 x float> %966, <4 x float>* null
+	br label %xST.exit465
+
+; <label>:972		; preds = %xST.exit463
+	call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <<4 x i32>>:973 [#uses=0]
+	store <4 x float> zeroinitializer, <4 x float>* null
+	store <4 x float> zeroinitializer, <4 x float>* null
+	load <4 x float>* null		; <<4 x float>>:974 [#uses=0]
+	bitcast <4 x float> %966 to <4 x i32>		; <<4 x i32>>:975 [#uses=1]
+	call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> zeroinitializer, <4 x i32> %975, <4 x i32> zeroinitializer )		; <<4 x i32>>:976 [#uses=1]
+	bitcast <4 x i32> %976 to <4 x float>		; <<4 x float>>:977 [#uses=1]
+	store <4 x float> %977, <4 x float>* null
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 2, i32 3		; <<4 x float>*>:978 [#uses=0]
+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:979 [#uses=1]
+	call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> %979, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <<4 x i32>>:980 [#uses=1]
+	bitcast <4 x i32> %980 to <4 x float>		; <<4 x float>>:981 [#uses=0]
+	br label %xST.exit465
+
+xST.exit465:		; preds = %972, %969
+	load <4 x float>* %0		; <<4 x float>>:982 [#uses=3]
+	icmp eq i32 0, 0		; <i1>:983 [#uses=1]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 0		; <<4 x float>*>:984 [#uses=1]
+	br i1 %983, label %989, label %985
+
+; <label>:985		; preds = %xST.exit465
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1		; <<4 x float>*>:986 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2		; <<4 x float>*>:987 [#uses=1]
+	store <4 x float> %982, <4 x float>* %987
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:988 [#uses=0]
+	br label %xST.exit467
+
+; <label>:989		; preds = %xST.exit465
+	bitcast <4 x float> %982 to <4 x i32>		; <<4 x i32>>:990 [#uses=0]
+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:991 [#uses=0]
+	store <4 x float> zeroinitializer, <4 x float>* %984
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 1		; <<4 x float>*>:992 [#uses=0]
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:993 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 2		; <<4 x float>*>:994 [#uses=0]
+	bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>>:995 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 1, i32 3		; <<4 x float>*>:996 [#uses=0]
+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:997 [#uses=1]
+	bitcast <4 x float> %982 to <4 x i32>		; <<4 x i32>>:998 [#uses=1]
+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:999 [#uses=1]
+	call <4 x i32> @llvm.ppc.altivec.vsel( <4 x i32> %997, <4 x i32> %998, <4 x i32> %999 )		; <<4 x i32>>:1000 [#uses=1]
+	bitcast <4 x i32> %1000 to <4 x float>		; <<4 x float>>:1001 [#uses=0]
+	br label %xST.exit467
+
+xST.exit467:		; preds = %989, %985
+	load <4 x float>* %0		; <<4 x float>>:1002 [#uses=5]
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:1003 [#uses=2]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1003, <4 x i32> zeroinitializer )		; <i32>:1004 [#uses=0]
+	br i1 false, label %1011, label %1005
+
+; <label>:1005		; preds = %xST.exit467
+	load <4 x float>* null		; <<4 x float>>:1006 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:1007 [#uses=1]
+	load <4 x float>* %1007		; <<4 x float>>:1008 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:1009 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:1010 [#uses=0]
+	br label %xST.exit469
+
+; <label>:1011		; preds = %xST.exit467
+	shufflevector <4 x i32> %1003, <4 x i32> undef, <4 x i32> zeroinitializer		; <<4 x i32>>:1012 [#uses=0]
+	icmp eq i32 0, 0		; <i1>:1013 [#uses=1]
+	br i1 %1013, label %1015, label %1014
+
+; <label>:1014		; preds = %1011
+	br label %1015
+
+; <label>:1015		; preds = %1014, %1011
+	%.07472 = phi <4 x float> [ %1002, %1014 ], [ %.27474, %1011 ]		; <<4 x float>> [#uses=0]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:1016 [#uses=1]
+	icmp eq i32 %1016, 0		; <i1>:1017 [#uses=1]
+	br i1 %1017, label %1021, label %1018
+
+; <label>:1018		; preds = %1015
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 1		; <<4 x float>*>:1019 [#uses=0]
+	shufflevector <4 x float> %1002, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:1020 [#uses=0]
+	br label %1021
+
+; <label>:1021		; preds = %1018, %1015
+	%.07467 = phi <4 x float> [ %1002, %1018 ], [ %.27469, %1015 ]		; <<4 x float>> [#uses=2]
+	icmp eq i32 0, 0		; <i1>:1022 [#uses=1]
+	br i1 %1022, label %1025, label %1023
+
+; <label>:1023		; preds = %1021
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:1024 [#uses=1]
+	store <4 x float> zeroinitializer, <4 x float>* %1024
+	br label %1025
+
+; <label>:1025		; preds = %1023, %1021
+	%.07462 = phi <4 x float> [ %1002, %1023 ], [ %.27464, %1021 ]		; <<4 x float>> [#uses=2]
+	icmp eq i32 0, 0		; <i1>:1026 [#uses=1]
+	br i1 %1026, label %xST.exit469, label %1027
+
+; <label>:1027		; preds = %1025
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:1028 [#uses=0]
+	br label %xST.exit469
+
+xST.exit469:		; preds = %1027, %1025, %1005
+	%.17463 = phi <4 x float> [ %.27464, %1005 ], [ %.07462, %1027 ], [ %.07462, %1025 ]		; <<4 x float>> [#uses=1]
+	%.17468 = phi <4 x float> [ %.27469, %1005 ], [ %.07467, %1027 ], [ %.07467, %1025 ]		; <<4 x float>> [#uses=1]
+	%.07489 = phi <4 x float> [ %1002, %1005 ], [ %.17490, %1027 ], [ %.17490, %1025 ]		; <<4 x float>> [#uses=1]
+	load <4 x float>* null		; <<4 x float>>:1029 [#uses=0]
+	load <4 x float>* null		; <<4 x float>>:1030 [#uses=0]
+	fsub <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1031 [#uses=1]
+	br i1 false, label %1037, label %1032
+
+; <label>:1032		; preds = %xST.exit469
+	load <4 x float>* null		; <<4 x float>>:1033 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 2		; <<4 x float>*>:1034 [#uses=1]
+	load <4 x float>* %1034		; <<4 x float>>:1035 [#uses=0]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 3		; <<4 x float>*>:1036 [#uses=0]
+	br label %xST.exit472
+
+; <label>:1037		; preds = %xST.exit469
+	icmp eq i32 0, 0		; <i1>:1038 [#uses=1]
+	br i1 %1038, label %1040, label %1039
+
+; <label>:1039		; preds = %1037
+	br label %1040
+
+; <label>:1040		; preds = %1039, %1037
+	%.07507 = phi <4 x float> [ zeroinitializer, %1039 ], [ zeroinitializer, %1037 ]		; <<4 x float>> [#uses=0]
+	icmp eq i32 0, 0		; <i1>:1041 [#uses=1]
+	br i1 %1041, label %1045, label %1042
+
+; <label>:1042		; preds = %1040
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 4, i32 1		; <<4 x float>*>:1043 [#uses=1]
+	load <4 x float>* %1043		; <<4 x float>>:1044 [#uses=0]
+	br label %1045
+
+; <label>:1045		; preds = %1042, %1040
+	br i1 false, label %1048, label %1046
+
+; <label>:1046		; preds = %1045
+	shufflevector <4 x float> %1031, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:1047 [#uses=0]
+	br label %1048
+
+; <label>:1048		; preds = %1046, %1045
+	icmp eq i32 0, 0		; <i1>:1049 [#uses=1]
+	br i1 %1049, label %xST.exit472, label %1050
+
+; <label>:1050		; preds = %1048
+	br label %xST.exit472
+
+xST.exit472:		; preds = %1050, %1048, %1032
+	br i1 false, label %1052, label %1051
+
+; <label>:1051		; preds = %xST.exit472
+	br label %xST.exit474
+
+; <label>:1052		; preds = %xST.exit472
+	br i1 false, label %1054, label %1053
+
+; <label>:1053		; preds = %1052
+	br label %1054
+
+; <label>:1054		; preds = %1053, %1052
+	br i1 false, label %1056, label %1055
+
+; <label>:1055		; preds = %1054
+	br label %1056
+
+; <label>:1056		; preds = %1055, %1054
+	br i1 false, label %1058, label %1057
+
+; <label>:1057		; preds = %1056
+	br label %1058
+
+; <label>:1058		; preds = %1057, %1056
+	br i1 false, label %xST.exit474, label %1059
+
+; <label>:1059		; preds = %1058
+	br label %xST.exit474
+
+xST.exit474:		; preds = %1059, %1058, %1051
+	load <4 x float>* null		; <<4 x float>>:1060 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1061 [#uses=1]
+	fmul <4 x float> %1060, zeroinitializer		; <<4 x float>>:1062 [#uses=2]
+	br i1 false, label %1065, label %1063
+
+; <label>:1063		; preds = %xST.exit474
+	shufflevector <4 x float> %1062, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:1064 [#uses=1]
+	store <4 x float> %1064, <4 x float>* null
+	br label %xST.exit476
+
+; <label>:1065		; preds = %xST.exit474
+	br i1 false, label %1067, label %1066
+
+; <label>:1066		; preds = %1065
+	br label %1067
+
+; <label>:1067		; preds = %1066, %1065
+	shufflevector <4 x i32> zeroinitializer, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x i32>>:1068 [#uses=0]
+	br i1 false, label %1070, label %1069
+
+; <label>:1069		; preds = %1067
+	br label %1070
+
+; <label>:1070		; preds = %1069, %1067
+	br i1 false, label %1072, label %1071
+
+; <label>:1071		; preds = %1070
+	br label %1072
+
+; <label>:1072		; preds = %1071, %1070
+	br i1 false, label %xST.exit476, label %1073
+
+; <label>:1073		; preds = %1072
+	br label %xST.exit476
+
+xST.exit476:		; preds = %1073, %1072, %1063
+	%.07551 = phi <4 x float> [ %1062, %1063 ], [ %.17552, %1073 ], [ %.17552, %1072 ]		; <<4 x float>> [#uses=1]
+	%.07555 = phi <4 x float> [ %1061, %1063 ], [ %.17556, %1073 ], [ %.17556, %1072 ]		; <<4 x float>> [#uses=1]
+	br i1 false, label %1075, label %1074
+
+; <label>:1074		; preds = %xST.exit476
+	br label %xST.exit479
+
+; <label>:1075		; preds = %xST.exit476
+	br i1 false, label %1077, label %1076
+
+; <label>:1076		; preds = %1075
+	br label %1077
+
+; <label>:1077		; preds = %1076, %1075
+	br i1 false, label %1079, label %1078
+
+; <label>:1078		; preds = %1077
+	br label %1079
+
+; <label>:1079		; preds = %1078, %1077
+	br i1 false, label %1081, label %1080
+
+; <label>:1080		; preds = %1079
+	br label %1081
+
+; <label>:1081		; preds = %1080, %1079
+	br i1 false, label %xST.exit479, label %1082
+
+; <label>:1082		; preds = %1081
+	br label %xST.exit479
+
+xST.exit479:		; preds = %1082, %1081, %1074
+	br i1 false, label %1084, label %1083
+
+; <label>:1083		; preds = %xST.exit479
+	br label %xST.exit482
+
+; <label>:1084		; preds = %xST.exit479
+	br i1 false, label %1086, label %1085
+
+; <label>:1085		; preds = %1084
+	br label %1086
+
+; <label>:1086		; preds = %1085, %1084
+	br i1 false, label %1088, label %1087
+
+; <label>:1087		; preds = %1086
+	br label %1088
+
+; <label>:1088		; preds = %1087, %1086
+	br i1 false, label %1090, label %1089
+
+; <label>:1089		; preds = %1088
+	br label %1090
+
+; <label>:1090		; preds = %1089, %1088
+	br i1 false, label %xST.exit482, label %1091
+
+; <label>:1091		; preds = %1090
+	br label %xST.exit482
+
+xST.exit482:		; preds = %1091, %1090, %1083
+	br i1 false, label %1093, label %1092
+
+; <label>:1092		; preds = %xST.exit482
+	br label %xST.exit486
+
+; <label>:1093		; preds = %xST.exit482
+	br i1 false, label %1095, label %1094
+
+; <label>:1094		; preds = %1093
+	br label %1095
+
+; <label>:1095		; preds = %1094, %1093
+	br i1 false, label %1097, label %1096
+
+; <label>:1096		; preds = %1095
+	br label %1097
+
+; <label>:1097		; preds = %1096, %1095
+	br i1 false, label %1099, label %1098
+
+; <label>:1098		; preds = %1097
+	br label %1099
+
+; <label>:1099		; preds = %1098, %1097
+	br i1 false, label %xST.exit486, label %1100
+
+; <label>:1100		; preds = %1099
+	br label %xST.exit486
+
+xST.exit486:		; preds = %1100, %1099, %1092
+	br i1 false, label %1102, label %1101
+
+; <label>:1101		; preds = %xST.exit486
+	br label %xST.exit489
+
+; <label>:1102		; preds = %xST.exit486
+	br i1 false, label %1104, label %1103
+
+; <label>:1103		; preds = %1102
+	br label %1104
+
+; <label>:1104		; preds = %1103, %1102
+	br i1 false, label %1106, label %1105
+
+; <label>:1105		; preds = %1104
+	br label %1106
+
+; <label>:1106		; preds = %1105, %1104
+	br i1 false, label %1108, label %1107
+
+; <label>:1107		; preds = %1106
+	br label %1108
+
+; <label>:1108		; preds = %1107, %1106
+	br i1 false, label %xST.exit489, label %1109
+
+; <label>:1109		; preds = %1108
+	br label %xST.exit489
+
+xST.exit489:		; preds = %1109, %1108, %1101
+	br i1 false, label %1111, label %1110
+
+; <label>:1110		; preds = %xST.exit489
+	br label %xST.exit492
+
+; <label>:1111		; preds = %xST.exit489
+	br i1 false, label %1113, label %1112
+
+; <label>:1112		; preds = %1111
+	br label %1113
+
+; <label>:1113		; preds = %1112, %1111
+	br i1 false, label %1115, label %1114
+
+; <label>:1114		; preds = %1113
+	br label %1115
+
+; <label>:1115		; preds = %1114, %1113
+	br i1 false, label %1117, label %1116
+
+; <label>:1116		; preds = %1115
+	br label %1117
+
+; <label>:1117		; preds = %1116, %1115
+	br i1 false, label %xST.exit492, label %1118
+
+; <label>:1118		; preds = %1117
+	br label %xST.exit492
+
+xST.exit492:		; preds = %1118, %1117, %1110
+	load <4 x float>* null		; <<4 x float>>:1119 [#uses=1]
+	fmul <4 x float> %1119, zeroinitializer		; <<4 x float>>:1120 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1121 [#uses=1]
+	br i1 false, label %1123, label %1122
+
+; <label>:1122		; preds = %xST.exit492
+	br label %xST.exit495
+
+; <label>:1123		; preds = %xST.exit492
+	br i1 false, label %1125, label %1124
+
+; <label>:1124		; preds = %1123
+	br label %1125
+
+; <label>:1125		; preds = %1124, %1123
+	br i1 false, label %1127, label %1126
+
+; <label>:1126		; preds = %1125
+	br label %1127
+
+; <label>:1127		; preds = %1126, %1125
+	br i1 false, label %1129, label %1128
+
+; <label>:1128		; preds = %1127
+	br label %1129
+
+; <label>:1129		; preds = %1128, %1127
+	br i1 false, label %xST.exit495, label %1130
+
+; <label>:1130		; preds = %1129
+	br label %xST.exit495
+
+xST.exit495:		; preds = %1130, %1129, %1122
+	%.07582 = phi <4 x float> [ %1121, %1122 ], [ %.17583, %1130 ], [ %.17583, %1129 ]		; <<4 x float>> [#uses=1]
+	%.07590 = phi <4 x float> [ %1120, %1122 ], [ %.17591, %1130 ], [ %.17591, %1129 ]		; <<4 x float>> [#uses=1]
+	load <4 x float>* null		; <<4 x float>>:1131 [#uses=1]
+	fadd <4 x float> %1131, zeroinitializer		; <<4 x float>>:1132 [#uses=1]
+	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1133 [#uses=1]
+	br i1 false, label %1135, label %1134
+
+; <label>:1134		; preds = %xST.exit495
+	br label %xST.exit498
+
+; <label>:1135		; preds = %xST.exit495
+	br i1 false, label %1137, label %1136
+
+; <label>:1136		; preds = %1135
+	br label %1137
+
+; <label>:1137		; preds = %1136, %1135
+	br i1 false, label %1139, label %1138
+
+; <label>:1138		; preds = %1137
+	br label %1139
+
+; <label>:1139		; preds = %1138, %1137
+	br i1 false, label %1141, label %1140
+
+; <label>:1140		; preds = %1139
+	br label %1141
+
+; <label>:1141		; preds = %1140, %1139
+	br i1 false, label %xST.exit498, label %1142
+
+; <label>:1142		; preds = %1141
+	br label %xST.exit498
+
+xST.exit498:		; preds = %1142, %1141, %1134
+	%.07617 = phi <4 x float> [ %1133, %1134 ], [ %.17618, %1142 ], [ %.17618, %1141 ]		; <<4 x float>> [#uses=1]
+	%.07621 = phi <4 x float> [ %1132, %1134 ], [ %.17622, %1142 ], [ %.17622, %1141 ]		; <<4 x float>> [#uses=1]
+	load <4 x float>* null		; <<4 x float>>:1143 [#uses=1]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:1144 [#uses=1]
+	load <4 x float>* %1144		; <<4 x float>>:1145 [#uses=1]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:1146 [#uses=1]
+	load <4 x float>* %1146		; <<4 x float>>:1147 [#uses=1]
+	shufflevector <4 x float> %1143, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:1148 [#uses=1]
+	shufflevector <4 x float> %1145, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:1149 [#uses=1]
+	shufflevector <4 x float> %1147, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:1150 [#uses=1]
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1151 [#uses=1]
+	fmul <4 x float> zeroinitializer, %1148		; <<4 x float>>:1152 [#uses=1]
+	fmul <4 x float> zeroinitializer, %1149		; <<4 x float>>:1153 [#uses=1]
+	fmul <4 x float> zeroinitializer, %1150		; <<4 x float>>:1154 [#uses=1]
+	br i1 false, label %1156, label %1155
+
+; <label>:1155		; preds = %xST.exit498
+	br label %xST.exit501
+
+; <label>:1156		; preds = %xST.exit498
+	br i1 false, label %1158, label %1157
+
+; <label>:1157		; preds = %1156
+	br label %1158
+
+; <label>:1158		; preds = %1157, %1156
+	br i1 false, label %1160, label %1159
+
+; <label>:1159		; preds = %1158
+	br label %1160
+
+; <label>:1160		; preds = %1159, %1158
+	br i1 false, label %1162, label %1161
+
+; <label>:1161		; preds = %1160
+	br label %1162
+
+; <label>:1162		; preds = %1161, %1160
+	br i1 false, label %xST.exit501, label %1163
+
+; <label>:1163		; preds = %1162
+	br label %xST.exit501
+
+xST.exit501:		; preds = %1163, %1162, %1155
+	%.07652 = phi <4 x float> [ %1154, %1155 ], [ %.17653, %1163 ], [ %.17653, %1162 ]		; <<4 x float>> [#uses=1]
+	%.07656 = phi <4 x float> [ %1153, %1155 ], [ %.17657, %1163 ], [ %.17657, %1162 ]		; <<4 x float>> [#uses=1]
+	%.07660 = phi <4 x float> [ %1152, %1155 ], [ %.17661, %1163 ], [ %.17661, %1162 ]		; <<4 x float>> [#uses=1]
+	%.07664 = phi <4 x float> [ %1151, %1155 ], [ %.17665, %1163 ], [ %.17665, %1162 ]		; <<4 x float>> [#uses=1]
+	load <4 x float>* null		; <<4 x float>>:1164 [#uses=1]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 2		; <<4 x float>*>:1165 [#uses=1]
+	load <4 x float>* %1165		; <<4 x float>>:1166 [#uses=1]
+	getelementptr [193 x [4 x <4 x float>]]* null, i32 0, i32 0, i32 3		; <<4 x float>*>:1167 [#uses=1]
+	load <4 x float>* %1167		; <<4 x float>>:1168 [#uses=1]
+	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1169 [#uses=1]
+	fadd <4 x float> zeroinitializer, %1164		; <<4 x float>>:1170 [#uses=1]
+	fadd <4 x float> zeroinitializer, %1166		; <<4 x float>>:1171 [#uses=1]
+	fadd <4 x float> zeroinitializer, %1168		; <<4 x float>>:1172 [#uses=1]
+	br i1 false, label %1174, label %1173
+
+; <label>:1173		; preds = %xST.exit501
+	br label %xST.exit504
+
+; <label>:1174		; preds = %xST.exit501
+	br i1 false, label %1176, label %1175
+
+; <label>:1175		; preds = %1174
+	br label %1176
+
+; <label>:1176		; preds = %1175, %1174
+	br i1 false, label %1178, label %1177
+
+; <label>:1177		; preds = %1176
+	br label %1178
+
+; <label>:1178		; preds = %1177, %1176
+	br i1 false, label %1180, label %1179
+
+; <label>:1179		; preds = %1178
+	br label %1180
+
+; <label>:1180		; preds = %1179, %1178
+	br i1 false, label %xST.exit504, label %1181
+
+; <label>:1181		; preds = %1180
+	br label %xST.exit504
+
+xST.exit504:		; preds = %1181, %1180, %1173
+	%.07722 = phi <4 x float> [ %1172, %1173 ], [ %.17723, %1181 ], [ %.17723, %1180 ]		; <<4 x float>> [#uses=1]
+	%.07726 = phi <4 x float> [ %1171, %1173 ], [ %.17727, %1181 ], [ %.17727, %1180 ]		; <<4 x float>> [#uses=1]
+	%.07730 = phi <4 x float> [ %1170, %1173 ], [ %.17731, %1181 ], [ %.17731, %1180 ]		; <<4 x float>> [#uses=1]
+	%.07734 = phi <4 x float> [ %1169, %1173 ], [ %.17735, %1181 ], [ %.17735, %1180 ]		; <<4 x float>> [#uses=1]
+	fadd <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:1182 [#uses=1]
+	br i1 false, label %1184, label %1183
+
+; <label>:1183		; preds = %xST.exit504
+	br label %xST.exit507
+
+; <label>:1184		; preds = %xST.exit504
+	br i1 false, label %1186, label %1185
+
+; <label>:1185		; preds = %1184
+	br label %1186
+
+; <label>:1186		; preds = %1185, %1184
+	br i1 false, label %1188, label %1187
+
+; <label>:1187		; preds = %1186
+	store <4 x float> zeroinitializer, <4 x float>* null
+	br label %1188
+
+; <label>:1188		; preds = %1187, %1186
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:1189 [#uses=1]
+	shufflevector <4 x i32> %1189, <4 x i32> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x i32>>:1190 [#uses=1]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1190, <4 x i32> zeroinitializer )		; <i32>:1191 [#uses=1]
+	icmp eq i32 %1191, 0		; <i1>:1192 [#uses=1]
+	br i1 %1192, label %1196, label %1193
+
+; <label>:1193		; preds = %1188
+	load <4 x float>* null		; <<4 x float>>:1194 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %1194, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:1195 [#uses=1]
+	store <4 x float> %1195, <4 x float>* null
+	br label %1196
+
+; <label>:1196		; preds = %1193, %1188
+	%.07742 = phi <4 x float> [ zeroinitializer, %1193 ], [ zeroinitializer, %1188 ]		; <<4 x float>> [#uses=0]
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:1197 [#uses=1]
+	shufflevector <4 x i32> %1197, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>>:1198 [#uses=1]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1198, <4 x i32> zeroinitializer )		; <i32>:1199 [#uses=1]
+	icmp eq i32 %1199, 0		; <i1>:1200 [#uses=1]
+	br i1 %1200, label %xST.exit507, label %1201
+
+; <label>:1201		; preds = %1196
+	store <4 x float> zeroinitializer, <4 x float>* null
+	br label %xST.exit507
+
+xST.exit507:		; preds = %1201, %1196, %1183
+	%.07769 = phi <4 x float> [ %1182, %1183 ], [ %.17770, %1201 ], [ %.17770, %1196 ]		; <<4 x float>> [#uses=1]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32>:1202 [#uses=1]
+	icmp eq i32 %1202, 0		; <i1>:1203 [#uses=1]
+	br i1 %1203, label %1207, label %1204
+
+; <label>:1204		; preds = %xST.exit507
+	load <4 x float>* null		; <<4 x float>>:1205 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %1205, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:1206 [#uses=1]
+	store <4 x float> %1206, <4 x float>* null
+	br label %1207
+
+; <label>:1207		; preds = %1204, %xST.exit507
+	load <4 x i32>* %.sub7896		; <<4 x i32>>:1208 [#uses=1]
+	shufflevector <4 x i32> %1208, <4 x i32> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x i32>>:1209 [#uses=1]
+	call i32 @llvm.ppc.altivec.vcmpequw.p( i32 0, <4 x i32> %1209, <4 x i32> zeroinitializer )		; <i32>:1210 [#uses=1]
+	icmp eq i32 %1210, 0		; <i1>:1211 [#uses=1]
+	br i1 %1211, label %1215, label %1212
+
+; <label>:1212		; preds = %1207
+	load <4 x float>* null		; <<4 x float>>:1213 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %1213, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:1214 [#uses=1]
+	store <4 x float> %1214, <4 x float>* null
+	br label %1215
+
+; <label>:1215		; preds = %1212, %1207
+	store <4 x float> zeroinitializer, <4 x float>* null
+	br label %xLS.exit449
+}
+
+declare <4 x i32> @llvm.ppc.altivec.vsel(<4 x i32>, <4 x i32>, <4 x i32>)
+
+declare void @llvm.ppc.altivec.stvewx(<4 x i32>, i8*)
+
+declare <4 x float> @llvm.ppc.altivec.vrsqrtefp(<4 x float>)
+
+declare <4 x float> @llvm.ppc.altivec.vcfsx(<4 x i32>, i32)
+
+declare i32 @llvm.ppc.altivec.vcmpequw.p(i32, <4 x i32>, <4 x i32>)
+
+declare <4 x i32> @llvm.ppc.altivec.vcmpgtfp(<4 x float>, <4 x float>)
diff --git a/final/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll b/final/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
new file mode 100644
index 00000000000..86fd9475029
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-04-24-InlineAsm-I-Modifier.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {foo r3, r4}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8.8.0 | grep {bari r3, 47}
+
+; PR1351
+
+define i32 @test1(i32 %Y, i32 %X) nounwind {
+	%tmp1 = tail call i32 asm "foo${1:I} $0, $1", "=r,rI"( i32 %X )
+	ret i32 %tmp1
+}
+
+define i32 @test2(i32 %Y, i32 %X) nounwind {
+	%tmp1 = tail call i32 asm "bar${1:I} $0, $1", "=r,rI"( i32 47 )
+	ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll b/final/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
new file mode 100644
index 00000000000..3489477e4ce
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-04-30-InlineAsmEarlyClobber.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s | FileCheck %s
+; RUN: llc < %s -regalloc=fast | FileCheck %s
+; The first argument of subfc must not be the same as any other register.
+
+; CHECK: subfc [[REG:r.]],
+; CHECK-NOT: [[REG]]
+; CHECK: InlineAsm End
+; PR1357
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "powerpc-apple-darwin8.8.0"
+
+;long long test(int A, int B, int C) {
+;  unsigned X, Y;
+;  __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"
+;                 : "=r" (X), "=&r" (Y)
+;                 : "r" (A), "rI" (B), "r" (C));
+;  return ((long long)Y << 32) | X;
+;}
+
+define i64 @test(i32 %A, i32 %B, i32 %C) nounwind {
+entry:
+	%Y = alloca i32, align 4		; <i32*> [#uses=2]
+	%tmp4 = call i32 asm "subf${3:I}c $1,$4,$3\0A\09subfze $0,$2", "=r,=*&r,r,rI,r"( i32* %Y, i32 %A, i32 %B, i32 %C )		; <i32> [#uses=1]
+	%tmp5 = load i32* %Y		; <i32> [#uses=1]
+	%tmp56 = zext i32 %tmp5 to i64		; <i64> [#uses=1]
+	%tmp7 = shl i64 %tmp56, 32		; <i64> [#uses=1]
+	%tmp89 = zext i32 %tmp4 to i64		; <i64> [#uses=1]
+	%tmp10 = or i64 %tmp7, %tmp89		; <i64> [#uses=1]
+	ret i64 %tmp10
+}
diff --git a/final/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll b/final/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
new file mode 100644
index 00000000000..1df51406fac
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-05-03-InlineAsm-S-Constraint.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+; PR1382
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "powerpc-apple-darwin8.8.0"
+@x = global [2 x i32] [ i32 1, i32 2 ]		; <[2 x i32]*> [#uses=1]
+
+define void @foo() {
+entry:
+	tail call void asm sideeffect "$0 $1", "s,i"( i8* bitcast (i32* getelementptr ([2 x i32]* @x, i32 0, i32 1) to i8*), i8* bitcast (i32* getelementptr ([2 x i32]* @x, i32 0, i32 1) to i8*) )
+	ret void
+}
diff --git a/final/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll b/final/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
new file mode 100644
index 00000000000..e4e931492ac
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-05-14-InlineAsmSelectCrash.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc32
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "powerpc-apple-darwin8.8.0"
+	%struct..0anon = type { i32 }
+	%struct.A = type { %struct.anon }
+	%struct.anon = type <{  }>
+
+define void @bork(%struct.A* %In0P) {
+entry:
+	%tmp56 = bitcast %struct.A* %In0P to float*		; <float*> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.035.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%tmp8 = getelementptr float* %tmp56, i32 %i.035.0		; <float*> [#uses=2]
+	%tmp101112 = bitcast float* %tmp8 to i8*		; <i8*> [#uses=1]
+	%tmp1617 = bitcast float* %tmp8 to i32*		; <i32*> [#uses=1]
+	%tmp21 = tail call i32 asm "lwbrx $0, $2, $1", "=r,r,bO,*m"( i8* %tmp101112, i32 0, i32* %tmp1617 )		; <i32> [#uses=0]
+	%indvar.next = add i32 %i.035.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 4		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
diff --git a/final/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll b/final/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
new file mode 100644
index 00000000000..42f215281a8
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=ppc32 | grep bl.*baz | count 2
+; RUN: llc < %s -march=ppc32 | grep bl.*quux | count 2
+; RUN: llc < %s -march=ppc32 -enable-tail-merge | grep bl.*baz | count 1
+; RUN: llc < %s -march=ppc32 -enable-tail-merge=1 | grep bl.*quux | count 1
+; Check that tail merging is not the default on ppc, and that -enable-tail-merge works.
+
+; ModuleID = 'tail.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define i32 @f(i32 %i, i32 %q) {
+entry:
+	%i_addr = alloca i32		; <i32*> [#uses=2]
+	%q_addr = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %i, i32* %i_addr
+	store i32 %q, i32* %q_addr
+	%tmp = load i32* %i_addr		; <i32> [#uses=1]
+	%tmp1 = icmp ne i32 %tmp, 0		; <i1> [#uses=1]
+	%tmp12 = zext i1 %tmp1 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp12, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	%tmp3 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp4 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
+	%tmp7 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp8 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
+	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]
+	%toBool10 = icmp ne i8 %tmp89, 0		; <i1> [#uses=1]
+	br i1 %toBool10, label %cond_true11, label %cond_false15
+
+cond_false:		; preds = %entry
+	%tmp5 = call i32 (...)* @foo( )		; <i32> [#uses=0]
+	%tmp6 = call i32 (...)* @baz( i32 5, i32 6 )		; <i32> [#uses=0]
+	%tmp27 = load i32* %q_addr		; <i32> [#uses=1]
+	%tmp28 = icmp ne i32 %tmp27, 0		; <i1> [#uses=1]
+	%tmp289 = zext i1 %tmp28 to i8		; <i8> [#uses=1]
+	%toBool210 = icmp ne i8 %tmp289, 0		; <i1> [#uses=1]
+	br i1 %toBool210, label %cond_true11, label %cond_false15
+
+cond_true11:		; preds = %cond_next
+	%tmp13 = call i32 (...)* @foo( )		; <i32> [#uses=0]
+	%tmp14 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]
+	br label %cond_next18
+
+cond_false15:		; preds = %cond_next
+	%tmp16 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp17 = call i32 (...)* @quux( i32 3, i32 4 )		; <i32> [#uses=0]
+	br label %cond_next18
+
+cond_next18:		; preds = %cond_false15, %cond_true11
+	%tmp19 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %cond_next18
+	%retval20 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval20
+}
+
+declare i32 @bar(...)
+
+declare i32 @baz(...)
+
+declare i32 @foo(...)
+
+declare i32 @quux(...)
diff --git a/final/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll b/final/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
new file mode 100644
index 00000000000..2938c70c48b
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
@@ -0,0 +1,14 @@
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "powerpc-apple-darwin8.8.0"
+
+; RUN: llc < %s -march=ppc32 | grep {rlwinm r3, r3, 23, 30, 30}
+; PR1473
+
+define i8 @foo(i16 zeroext  %a) zeroext  {
+        %tmp2 = lshr i16 %a, 10         ; <i16> [#uses=1]
+        %tmp23 = trunc i16 %tmp2 to i8          ; <i8> [#uses=1]
+        %tmp4 = shl i8 %tmp23, 1                ; <i8> [#uses=1]
+        %tmp5 = and i8 %tmp4, 2         ; <i8> [#uses=1]
+        ret i8 %tmp5
+}
+
diff --git a/final/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll b/final/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
new file mode 100644
index 00000000000..6de7a09128f
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-06-28-BCCISelBug.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -march=ppc32 -mattr=+altivec
+
+	%struct.XATest = type { float, i16, i8, i8 }
+	%struct.XArrayRange = type { i8, i8, i8, i8 }
+	%struct.XBlendMode = type { i16, i16, i16, i16, %struct.GIC4, i16, i16, i8, i8, i8, i8 }
+	%struct.XClearC = type { double, %struct.GIC4, %struct.GIC4, float, i32 }
+	%struct.XClipPlane = type { i32, [6 x %struct.GIC4] }
+	%struct.XCBuffer = type { i16, i16, [8 x i16] }
+	%struct.XCMatrix = type { [16 x float]*, %struct.XICSS }
+	%struct.XConvolution = type { %struct.GIC4, %struct.XICSS, i16, i16, float*, i32, i32 }
+	%struct.XDepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
+	%struct.XFixedFunctionProgram = type { %struct.PPSToken* }
+	%struct.XFogMode = type { %struct.GIC4, float, float, float, float, float, i16, i16, i16, i8, i8 }
+	%struct.XFramebufferAttachment = type { i32, i32, i32, i32 }
+	%struct.XHintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
+	%struct.XHistogram = type { %struct.XFramebufferAttachment*, i32, i16, i8, i8 }
+	%struct.XICSS = type { %struct.GTCoord2, %struct.GTCoord2, %struct.GTCoord2, %struct.GTCoord2 }
+	%struct.XISubset = type { %struct.XConvolution, %struct.XConvolution, %struct.XConvolution, %struct.XCMatrix, %struct.XMinmax, %struct.XHistogram, %struct.XICSS, %struct.XICSS, %struct.XICSS, %struct.XICSS, i32 }
+	%struct.XLight = type { %struct.GIC4, %struct.GIC4, %struct.GIC4, %struct.GIC4, %struct.XPointLineLimits, float, float, float, float, float, %struct.XPointLineLimits, float, float, float, float, float }
+	%struct.XLightModel = type { %struct.GIC4, [8 x %struct.XLight], [2 x %struct.XMaterial], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 }
+	%struct.XLightProduct = type { %struct.GIC4, %struct.GIC4, %struct.GIC4 }
+	%struct.XLineMode = type { float, i32, i16, i16, i8, i8, i8, i8 }
+	%struct.XLogicOp = type { i16, i8, i8 }
+	%struct.XMaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.XMaterial = type { %struct.GIC4, %struct.GIC4, %struct.GIC4, %struct.GIC4, float, float, float, float, [8 x %struct.XLightProduct], %struct.GIC4, [6 x i32], [2 x i32] }
+	%struct.XMinmax = type { %struct.XMinmaxTable*, i16, i8, i8 }
+	%struct.XMinmaxTable = type { %struct.GIC4, %struct.GIC4 }
+	%struct.XMipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, float*, i8*, i16, i16, i16, i16, [2 x float] }
+	%struct.XMultisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.XPipelineProgramState = type { i8, i8, i8, i8, %struct.GIC4* }
+	%struct.XPMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.XPMode = type { float, float, %struct.XPStore, %struct.XPTransfer, %struct.XPMap, %struct.XISubset, i32, i32 }
+	%struct.XPPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
+	%struct.XPStore = type { %struct.XPPack, %struct.XPPack }
+	%struct.XPTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }
+	%struct.XPointLineLimits = type { float, float, float }
+	%struct.XPointMode = type { float, float, float, float, %struct.XPointLineLimits, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 }
+	%struct.XPGMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.XRegisterCCs = type { i8, i8, i8, i8, i32, [2 x %struct.GIC4], [8 x %struct.XRegisterCCsPerStageState], %struct.XRegisterCCsFinalStageState }
+	%struct.XRegisterCCsFinalStageState = type { i8, i8, i8, i8, [7 x %struct.XRegisterCCsPerVariableState] }
+	%struct.XRegisterCCsPerPortionState = type { [4 x %struct.XRegisterCCsPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
+	%struct.XRegisterCCsPerStageState = type { [2 x %struct.XRegisterCCsPerPortionState], [2 x %struct.GIC4] }
+	%struct.XRegisterCCsPerVariableState = type { i16, i16, i16, i16 }
+	%struct.XScissorTest = type { %struct.XFramebufferAttachment, i8, i8, i8, i8 }
+	%struct.XState = type { i16, i16, i16, i16, i32, i32, [256 x %struct.GIC4], [128 x %struct.GIC4], %struct.XViewport, %struct.XXF, %struct.XLightModel, %struct.XATest, %struct.XBlendMode, %struct.XClearC, %struct.XCBuffer, %struct.XDepthTest, %struct.XArrayRange, %struct.XFogMode, %struct.XHintMode, %struct.XLineMode, %struct.XLogicOp, %struct.XMaskMode, %struct.XPMode, %struct.XPointMode, %struct.XPGMode, %struct.XScissorTest, i32, %struct.XStencilTest, [16 x %struct.XTMode], %struct.XArrayRange, [8 x %struct.XTCoordGen], %struct.XClipPlane, %struct.XMultisample, %struct.XRegisterCCs, %struct.XArrayRange, %struct.XArrayRange, [3 x %struct.XPipelineProgramState], %struct.XXFFeedback, i32*, %struct.XFixedFunctionProgram, [3 x i32] }
+	%struct.XStencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
+	%struct.XTCoordGen = type { { i16, i16, %struct.GIC4, %struct.GIC4 }, { i16, i16, %struct.GIC4, %struct.GIC4 }, { i16, i16, %struct.GIC4, %struct.GIC4 }, { i16, i16, %struct.GIC4, %struct.GIC4 }, i8, i8, i8, i8 }
+	%struct.XTGeomState = type { i16, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] }
+	%struct.XTLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* }
+	%struct.XTMode = type { %struct.GIC4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
+	%struct.XTParamState = type { i16, i16, i16, i16, i16, i16, %struct.GIC4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* }
+	%struct.XTRec = type { %struct.XTState*, float, float, float, float, %struct.XMipmaplevel*, %struct.XMipmaplevel*, i32, i32, i32, i32, i32, i32, i32, [2 x %struct.PPSToken] }
+	%struct.XTState = type { i16, i8, i8, i16, i16, float, i32, %struct.GISWRSurface*, %struct.XTParamState, %struct.XTGeomState, %struct.XTLevel, [6 x [15 x %struct.XTLevel]] }
+	%struct.XXF = type { [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }
+	%struct.XXFFeedback = type { i8, i8, i8, i8, [16 x i32], [16 x i32] }
+	%struct.XViewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
+	%struct.GIC4 = type { float, float, float, float }
+	%struct.GISWRSurface = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 }
+	%struct.GTCoord2 = type { float, float }
+	%struct.GVMFPContext = type { float, i32, i32, i32, float, [3 x float] }
+	%struct.GVMFPStack = type { [8 x i8*], i8*, i8*, i32, i32, { <4 x float> }, { <4 x float> }, <4 x i32> }
+	%struct.GVMFGAttrib = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, [8 x <4 x float>] }
+	%struct.GVMTs = type { [16 x %struct.XTRec*] }
+	%struct.PPSToken = type { { i16, i16, i32 } }
+	%struct._GVMConstants = type { <4 x i32>, <4 x i32>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8] }
+
+declare <4 x i32> @llvm.ppc.altivec.lvewx(i8*)
+
+declare i32 @llvm.ppc.altivec.vcmpequw.p(i32, <4 x i32>, <4 x i32>)
+
+define void @test(%struct.XState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._GVMConstants* %cnstn, %struct.PPSToken* %pstrm, %struct.GVMFPContext* %vmctx, %struct.GVMTs* %txtrs, %struct.GVMFPStack* %fpstk, %struct.GVMFGAttrib* %start, %struct.GVMFGAttrib* %deriv, i32 %fragx, i32 %fragy) {
+bb58.i:
+	%tmp3405.i = getelementptr %struct.XTRec* null, i32 0, i32 1		; <float*> [#uses=1]
+	%tmp34053406.i = bitcast float* %tmp3405.i to i8*		; <i8*> [#uses=1]
+	%tmp3407.i = call <4 x i32> @llvm.ppc.altivec.lvewx( i8* %tmp34053406.i )		; <<4 x i32>> [#uses=0]
+	%tmp4146.i = call i32 @llvm.ppc.altivec.vcmpequw.p( i32 3, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <i32> [#uses=1]
+	%tmp4147.i = icmp eq i32 %tmp4146.i, 0		; <i1> [#uses=1]
+	br i1 %tmp4147.i, label %bb8799.i, label %bb4150.i
+
+bb4150.i:		; preds = %bb58.i
+	br label %bb8799.i
+
+bb8799.i:		; preds = %bb4150.i, %bb58.i
+	ret void
+}
diff --git a/final/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll b/final/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll
new file mode 100644
index 00000000000..06f40d98c68
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-08-04-CoalescerAssert.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=ppc64
+; PR1596
+
+	%struct._obstack_chunk = type { i8* }
+	%struct.obstack = type { i8*, %struct._obstack_chunk* (i8*, i64)*, i8*, i8 }
+
+define i32 @_obstack_newchunk(%struct.obstack* %h, i32 %length) {
+entry:
+	br i1 false, label %cond_false, label %cond_true
+
+cond_true:		; preds = %entry
+	br i1 false, label %cond_true28, label %cond_next30
+
+cond_false:		; preds = %entry
+	%tmp22 = tail call %struct._obstack_chunk* null( i64 undef )		; <%struct._obstack_chunk*> [#uses=2]
+	br i1 false, label %cond_true28, label %cond_next30
+
+cond_true28:		; preds = %cond_false, %cond_true
+	%iftmp.0.043.0 = phi %struct._obstack_chunk* [ null, %cond_true ], [ %tmp22, %cond_false ]		; <%struct._obstack_chunk*> [#uses=1]
+	tail call void null( )
+	br label %cond_next30
+
+cond_next30:		; preds = %cond_true28, %cond_false, %cond_true
+	%iftmp.0.043.1 = phi %struct._obstack_chunk* [ %iftmp.0.043.0, %cond_true28 ], [ null, %cond_true ], [ %tmp22, %cond_false ]		; <%struct._obstack_chunk*> [#uses=1]
+	%tmp41 = getelementptr %struct._obstack_chunk* %iftmp.0.043.1, i32 0, i32 0		; <i8**> [#uses=1]
+	store i8* null, i8** %tmp41, align 8
+	ret i32 undef
+}
diff --git a/final/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll b/final/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
new file mode 100644
index 00000000000..82ef2b82cbe
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-09-04-AltivecDST.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ppc64 | grep dst | count 4
+
+define hidden void @_Z4borkPc(i8* %image) {
+entry:
+	tail call void @llvm.ppc.altivec.dst( i8* %image, i32 8, i32 0 )
+	tail call void @llvm.ppc.altivec.dstt( i8* %image, i32 8, i32 0 )
+	tail call void @llvm.ppc.altivec.dstst( i8* %image, i32 8, i32 0 )
+	tail call void @llvm.ppc.altivec.dststt( i8* %image, i32 8, i32 0 )
+	ret void
+}
+
+declare void @llvm.ppc.altivec.dst(i8*, i32, i32)
+declare void @llvm.ppc.altivec.dstt(i8*, i32, i32)
+declare void @llvm.ppc.altivec.dstst(i8*, i32, i32)
+declare void @llvm.ppc.altivec.dststt(i8*, i32, i32)
diff --git a/final/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll b/final/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
new file mode 100644
index 00000000000..ea7de9847ea
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-09-07-LoadStoreIdxForms.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ppc64 | grep lwzx
+
+        %struct.__db_region = type { %struct.__mutex_t, [4 x i8], %struct.anon, i32, [1 x i32] }
+        %struct.__mutex_t = type { i32 }
+        %struct.anon = type { i64, i64 }
+
+define void @foo() {
+entry:
+        %ttype = alloca i32, align 4            ; <i32*> [#uses=1]
+        %regs = alloca [1024 x %struct.__db_region], align 16           ; <[1024 x %struct.__db_region]*> [#uses=0]
+        %tmp = load i32* %ttype, align 4                ; <i32> [#uses=1]
+        %tmp1 = call i32 (...)* @bork( i32 %tmp )               ; <i32> [#uses=0]
+        ret void
+}
+
+declare i32 @bork(...)
diff --git a/final/test/CodeGen/PowerPC/2007-09-08-unaligned.ll b/final/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
new file mode 100644
index 00000000000..898c470b172
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-09-08-unaligned.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s | grep stfd | count 3
+; RUN: llc < %s | grep stfs | count 1
+; RUN: llc < %s | grep lfd | count 2
+; RUN: llc < %s | grep lfs | count 2
+; ModuleID = 'foo.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin8"
+	%struct.anon = type <{ i8, float }>
+@s = global %struct.anon <{ i8 3, float 0x4014666660000000 }>		; <%struct.anon*> [#uses=1]
+@u = global <{ i8, double }> <{ i8 3, double 5.100000e+00 }>		; <<{ i8, double }>*> [#uses=1]
+@t = weak global %struct.anon zeroinitializer		; <%struct.anon*> [#uses=2]
+@v = weak global <{ i8, double }> zeroinitializer		; <<{ i8, double }>*> [#uses=2]
+@.str = internal constant [8 x i8] c"%f %lf\0A\00"		; <[8 x i8]*> [#uses=1]
+
+define i32 @foo() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = getelementptr %struct.anon* @s, i32 0, i32 1		; <float*> [#uses=1]
+	%tmp1 = load float* %tmp, align 1		; <float> [#uses=1]
+	%tmp2 = getelementptr %struct.anon* @t, i32 0, i32 1		; <float*> [#uses=1]
+	store float %tmp1, float* %tmp2, align 1
+	%tmp3 = getelementptr <{ i8, double }>* @u, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp4 = load double* %tmp3, align 1		; <double> [#uses=1]
+	%tmp5 = getelementptr <{ i8, double }>* @v, i32 0, i32 1		; <double*> [#uses=1]
+	store double %tmp4, double* %tmp5, align 1
+	br label %return
+
+return:		; preds = %entry
+	%retval6 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval6
+}
+
+define i32 @main() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = call i32 @foo( )		; <i32> [#uses=0]
+	%tmp1 = getelementptr %struct.anon* @t, i32 0, i32 1		; <float*> [#uses=1]
+	%tmp2 = load float* %tmp1, align 1		; <float> [#uses=1]
+	%tmp23 = fpext float %tmp2 to double		; <double> [#uses=1]
+	%tmp4 = getelementptr <{ i8, double }>* @v, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp5 = load double* %tmp4, align 1		; <double> [#uses=1]
+	%tmp6 = getelementptr [8 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp7 = call i32 (i8*, ...)* @printf( i8* %tmp6, double %tmp23, double %tmp5 )		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	%retval8 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval8
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/final/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll b/final/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll
new file mode 100644
index 00000000000..d12698b9a00
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-09-11-RegCoalescerAssert.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc64
+
+        %struct.TCMalloc_SpinLock = type { i32 }
+
+define void @_ZN17TCMalloc_SpinLock4LockEv(%struct.TCMalloc_SpinLock* %this) {
+entry:
+        %tmp3 = call i32 asm sideeffect "1: lwarx $0, 0, $1\0A\09stwcx. $2, 0, $1\0A\09bne- 1b\0A\09isync", "=&r,=*r,r,1,~{dirflag},~{fpsr},~{flags},~{memory}"( i32** null, i32 1, i32* null )         ; <i32> [#uses=0]
+        unreachable
+}
diff --git a/final/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll b/final/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll
new file mode 100644
index 00000000000..5cfe54e1582
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-09-12-LiveIntervalsAssert.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin
+
+declare void @cxa_atexit_check_1(i8*)
+
+define i32 @check_cxa_atexit(i32 (void (i8*)*, i8*, i8*)* %cxa_atexit, void (i8*)* %cxa_finalize) {
+entry:
+        %tmp7 = call i32 null( void (i8*)* @cxa_atexit_check_1, i8* null, i8* null )            ; <i32> [#uses=0]
+        br i1 false, label %cond_true, label %cond_next
+
+cond_true:    ; preds = %entry
+        ret i32 0
+
+cond_next:        ; preds = %entry
+        ret i32 0
+}
diff --git a/final/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll b/final/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll
new file mode 100644
index 00000000000..c4152b4fc8d
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-10-16-InlineAsmFrameOffset.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc32
+; rdar://5538377
+
+        %struct.disk_unsigned = type { i32 }
+        %struct._StorePageMax = type { %struct.disk_unsigned, %struct.disk_unsigned, [65536 x i8] }
+
+define i32 @test() {
+entry:
+        %data = alloca i32              ; <i32*> [#uses=1]
+        %compressedPage = alloca %struct._StorePageMax          ; <%struct._StorePageMax*> [#uses=0]
+        %tmp107 = call i32 asm "lwbrx $0, $2, $1", "=r,r,bO,*m"( i8* null, i32 0, i32* %data )          ; <i32> [#uses=0]
+        unreachable
+}
+
diff --git a/final/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll b/final/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
new file mode 100644
index 00000000000..84fadd1b046
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-10-18-PtrArithmetic.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=ppc64 -mattr=+altivec
+	%struct.inoutprops = type <{ i8, [3 x i8] }>
+
+define void @bork(float* %argA, float* %argB, float* %res, i8 %inoutspec.0) {
+entry:
+	%.mask = and i8 %inoutspec.0, -16		; <i8> [#uses=1]
+	%tmp6 = icmp eq i8 %.mask, 16		; <i1> [#uses=1]
+	br i1 %tmp6, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	%tmp89 = bitcast float* %res to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%tmp1011 = bitcast float* %argA to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%tmp14 = load <4 x i32>* %tmp1011, align 16		; <<4 x i32>> [#uses=1]
+	%tmp1516 = bitcast float* %argB to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%tmp18 = load <4 x i32>* %tmp1516, align 16		; <<4 x i32>> [#uses=1]
+	%tmp19 = sdiv <4 x i32> %tmp14, %tmp18		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp19, <4 x i32>* %tmp89, align 16
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll b/final/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
new file mode 100644
index 00000000000..556a4a1c402
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=fast -relocation-model=pic
+
+	%struct.NSError = type opaque
+	%struct.NSManagedObjectContext = type opaque
+	%struct.NSPersistentStoreCoordinator = type opaque
+	%struct.NSString = type opaque
+	%struct.NSURL = type opaque
+	%struct._message_ref_t = type { %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*, %struct.objc_selector* }
+	%struct.objc_object = type {  }
+	%struct.objc_selector = type opaque
+@"\01L_OBJC_MESSAGE_REF_2" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=1]
+@"\01L_OBJC_MESSAGE_REF_6" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=1]
+@NSXMLStoreType = external constant %struct.NSString*		; <%struct.NSString**> [#uses=1]
+@"\01L_OBJC_MESSAGE_REF_5" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=2]
+@"\01L_OBJC_MESSAGE_REF_4" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=1]
+
+define %struct.NSManagedObjectContext* @"+[ListGenerator(Private) managedObjectContextWithModelURL:storeURL:]"(%struct.objc_object* %self, %struct._message_ref_t* %_cmd, %struct.NSURL* %modelURL, %struct.NSURL* %storeURL) {
+entry:
+	%storeCoordinator = alloca %struct.NSPersistentStoreCoordinator*		; <%struct.NSPersistentStoreCoordinator**> [#uses=0]
+	%tmp29 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2" )		; <%struct.objc_object*> [#uses=0]
+	%tmp34 = load %struct.NSString** @NSXMLStoreType, align 8		; <%struct.NSString*> [#uses=1]
+	%tmp37 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_5", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
+	%tmp42 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 1 )		; <%struct.objc_object*> [#uses=1]
+	%tmp45 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp37( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_5", %struct.objc_object* %tmp42, %struct.NSString* null )		; <%struct.objc_object*> [#uses=1]
+	%tmp48 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* null( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", %struct.NSString* %tmp34, i8* null, %struct.NSURL* null, %struct.objc_object* %tmp45, %struct.NSError** null )		; <%struct.objc_object*> [#uses=0]
+	unreachable
+}
diff --git a/final/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll b/final/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
new file mode 100644
index 00000000000..b3b92804674
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-10-21-LocalRegAllocAssert2.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9 -regalloc=fast -relocation-model=pic
+
+	%struct.NSError = type opaque
+	%struct.NSManagedObjectContext = type opaque
+	%struct.NSString = type opaque
+	%struct.NSURL = type opaque
+	%struct._message_ref_t = type { %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*, %struct.objc_selector* }
+	%struct.objc_object = type {  }
+	%struct.objc_selector = type opaque
+@"\01L_OBJC_MESSAGE_REF_2" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=2]
+@"\01L_OBJC_MESSAGE_REF_6" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=2]
+@NSXMLStoreType = external constant %struct.NSString*		; <%struct.NSString**> [#uses=1]
+@"\01L_OBJC_MESSAGE_REF_4" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=2]
+
+define %struct.NSManagedObjectContext* @"+[ListGenerator(Private) managedObjectContextWithModelURL:storeURL:]"(%struct.objc_object* %self, %struct._message_ref_t* %_cmd, %struct.NSURL* %modelURL, %struct.NSURL* %storeURL) {
+entry:
+	%tmp27 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
+	%tmp29 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp27( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_2" )		; <%struct.objc_object*> [#uses=0]
+	%tmp33 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
+	%tmp34 = load %struct.NSString** @NSXMLStoreType, align 8		; <%struct.NSString*> [#uses=1]
+	%tmp40 = load %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)** getelementptr (%struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 0, i32 0), align 8		; <%struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)*> [#uses=1]
+	%tmp42 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp40( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_4", i32 1 )		; <%struct.objc_object*> [#uses=0]
+	%tmp48 = call %struct.objc_object* (%struct.objc_object*, %struct._message_ref_t*, ...)* %tmp33( %struct.objc_object* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_6", %struct.NSString* %tmp34, i8* null, %struct.NSURL* null, %struct.objc_object* null, %struct.NSError** null )		; <%struct.objc_object*> [#uses=0]
+	unreachable
+}
diff --git a/final/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll b/final/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll
new file mode 100644
index 00000000000..a9f242ba5b1
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-11-04-CoalescerCrash.ll
@@ -0,0 +1,148 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
+
+	%struct.HDescriptor = type <{ i32, i32 }>
+
+declare void @bcopy(i8*, i8*, i32)
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	br i1 false, label %bb31, label %bb
+
+bb:		; preds = %entry
+	ret i32 -6
+
+bb31:		; preds = %entry
+	switch i32 0, label %bb189 [
+		 i32 73, label %cond_next209
+		 i32 74, label %bb74
+		 i32 77, label %bb57
+		 i32 78, label %cond_next209
+		 i32 85, label %cond_next209
+		 i32 97, label %cond_next209
+		 i32 100, label %cond_next209
+		 i32 107, label %cond_next209
+		 i32 109, label %bb57
+		 i32 112, label %bb43
+		 i32 115, label %cond_next209
+		 i32 117, label %bb51
+	]
+
+bb43:		; preds = %bb31
+	br i1 false, label %cond_true48, label %cond_true200.critedge2117
+
+cond_true48:		; preds = %bb43
+	br i1 false, label %cond_next372, label %AllDone
+
+bb51:		; preds = %bb31
+	ret i32 0
+
+bb57:		; preds = %bb31, %bb31
+	ret i32 0
+
+bb74:		; preds = %bb31
+	ret i32 0
+
+bb189:		; preds = %bb31
+	ret i32 0
+
+cond_true200.critedge2117:		; preds = %bb43
+	ret i32 0
+
+cond_next209:		; preds = %bb31, %bb31, %bb31, %bb31, %bb31, %bb31, %bb31
+	ret i32 0
+
+cond_next372:		; preds = %cond_true48
+	switch i32 0, label %bb1728 [
+		 i32 73, label %bb1723
+		 i32 74, label %cond_true1700
+		 i32 78, label %bb1718
+		 i32 85, label %bb1713
+		 i32 97, label %bb1620
+		 i32 107, label %AllDone
+		 i32 112, label %cond_next423
+		 i32 117, label %cond_next1453
+	]
+
+cond_next423:		; preds = %cond_next372
+	switch i16 0, label %cond_next691 [
+		 i16 18475, label %cond_next807
+		 i16 18520, label %cond_next807
+	]
+
+cond_next691:		; preds = %cond_next423
+	ret i32 0
+
+cond_next807:		; preds = %cond_next423, %cond_next423
+	switch i16 0, label %cond_true1192 [
+		 i16 18475, label %cond_next21.i
+		 i16 18520, label %cond_next21.i
+	]
+
+cond_next21.i:		; preds = %cond_next807, %cond_next807
+	br i1 false, label %cond_next934, label %free.i
+
+free.i:		; preds = %cond_next21.i
+	ret i32 0
+
+cond_next934:		; preds = %bb1005, %cond_next21.i
+	%listsize.1 = phi i32 [ 0, %bb1005 ], [ 64, %cond_next21.i ]		; <i32> [#uses=1]
+	%catalogExtents.2 = phi %struct.HDescriptor* [ %catalogExtents.1.reg2mem.1, %bb1005 ], [ null, %cond_next21.i ]		; <%struct.HDescriptor*> [#uses=3]
+	br i1 false, label %cond_next942, label %Return1020
+
+cond_next942:		; preds = %cond_next934
+	br i1 false, label %bb1005, label %bb947
+
+bb947:		; preds = %cond_next971, %cond_next942
+	%indvar = phi i32 [ 0, %cond_next942 ], [ %indvar.next2140, %cond_next971 ]		; <i32> [#uses=2]
+	%catalogExtents.1.reg2mem.0 = phi %struct.HDescriptor* [ %catalogExtents.2, %cond_next942 ], [ %tmp977978, %cond_next971 ]		; <%struct.HDescriptor*> [#uses=1]
+	%extents.0.reg2mem.0 = phi %struct.HDescriptor* [ null, %cond_next942 ], [ %tmp977978, %cond_next971 ]		; <%struct.HDescriptor*> [#uses=1]
+	br i1 false, label %cond_next971, label %Return1020
+
+cond_next971:		; preds = %bb947
+	%tmp = shl i32 %indvar, 6		; <i32> [#uses=1]
+	%listsize.0.reg2mem.0 = add i32 %tmp, %listsize.1		; <i32> [#uses=1]
+	%tmp973 = add i32 %listsize.0.reg2mem.0, 64		; <i32> [#uses=1]
+	%tmp974975 = bitcast %struct.HDescriptor* %extents.0.reg2mem.0 to i8*		; <i8*> [#uses=1]
+	%tmp977 = call i8* @realloc( i8* %tmp974975, i32 %tmp973 )		; <i8*> [#uses=1]
+	%tmp977978 = bitcast i8* %tmp977 to %struct.HDescriptor*		; <%struct.HDescriptor*> [#uses=3]
+	call void @bcopy( i8* null, i8* null, i32 64 )
+	%indvar.next2140 = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 false, label %bb1005, label %bb947
+
+bb1005:		; preds = %cond_next971, %cond_next942
+	%catalogExtents.1.reg2mem.1 = phi %struct.HDescriptor* [ %catalogExtents.2, %cond_next942 ], [ %tmp977978, %cond_next971 ]		; <%struct.HDescriptor*> [#uses=2]
+	br i1 false, label %Return1020, label %cond_next934
+
+Return1020:		; preds = %bb1005, %bb947, %cond_next934
+	%catalogExtents.3 = phi %struct.HDescriptor* [ %catalogExtents.1.reg2mem.0, %bb947 ], [ %catalogExtents.2, %cond_next934 ], [ %catalogExtents.1.reg2mem.1, %bb1005 ]		; <%struct.HDescriptor*> [#uses=0]
+	ret i32 0
+
+cond_true1192:		; preds = %cond_next807
+	ret i32 0
+
+cond_next1453:		; preds = %cond_next372
+	ret i32 0
+
+bb1620:		; preds = %cond_next372
+	ret i32 0
+
+cond_true1700:		; preds = %cond_next372
+	ret i32 0
+
+bb1713:		; preds = %cond_next372
+	ret i32 0
+
+bb1718:		; preds = %cond_next372
+	ret i32 0
+
+bb1723:		; preds = %cond_next372
+	ret i32 0
+
+bb1728:		; preds = %cond_next372
+	ret i32 -6
+
+AllDone:		; preds = %cond_next372, %cond_true48
+	ret i32 0
+}
+
+declare i8* @realloc(i8*, i32)
diff --git a/final/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll b/final/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
new file mode 100644
index 00000000000..0c116740944
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-11-16-landingpad-split.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s
+;; Formerly crashed, see PR 1508
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc64-apple-darwin8"
+	%struct.Range = type { i64, i64 }
+
+define void @Bork(i64 %range.0.0, i64 %range.0.1, i64 %size) {
+entry:
+	%effectiveRange = alloca %struct.Range, align 8		; <%struct.Range*> [#uses=2]
+	%tmp4 = call i8* @llvm.stacksave()		; <i8*> [#uses=1]
+	%size1 = trunc i64 %size to i32		; <i32> [#uses=1]
+	%tmp17 = alloca i8*, i32 %size1		; <i8**> [#uses=1]
+	invoke void @Foo(i8** %tmp17)
+			to label %bb30.preheader unwind label %unwind
+
+bb30.preheader:		; preds = %entry
+	%tmp26 = getelementptr %struct.Range* %effectiveRange, i64 0, i32 1		; <i64*> [#uses=1]
+	br label %bb30
+
+unwind:		; preds = %cond_true, %entry
+	%eh_ptr = call i8* @llvm.eh.exception()		; <i8*> [#uses=2]
+	%eh_select = call i64 (i8*, i8*, ...)* @llvm.eh.selector.i64(i8* %eh_ptr, i8* bitcast (void ()* @__gxx_personality_v0 to i8*), i8* null)		; <i64> [#uses=0]
+	call void @llvm.stackrestore(i8* %tmp4)
+	call void @_Unwind_Resume(i8* %eh_ptr)
+	unreachable
+
+invcont23:		; preds = %cond_true
+	%tmp27 = load i64* %tmp26, align 8		; <i64> [#uses=1]
+	%tmp28 = sub i64 %range_addr.1.0, %tmp27		; <i64> [#uses=1]
+	br label %bb30
+
+bb30:		; preds = %invcont23, %bb30.preheader
+	%range_addr.1.0 = phi i64 [ %tmp28, %invcont23 ], [ %range.0.1, %bb30.preheader ]		; <i64> [#uses=2]
+	%tmp33 = icmp eq i64 %range_addr.1.0, 0		; <i1> [#uses=1]
+	br i1 %tmp33, label %cleanup, label %cond_true
+
+cond_true:		; preds = %bb30
+	invoke void @Bar(i64 %range.0.0, %struct.Range* %effectiveRange)
+			to label %invcont23 unwind label %unwind
+
+cleanup:		; preds = %bb30
+	ret void
+}
+
+declare i8* @llvm.stacksave() nounwind
+
+declare void @Foo(i8**)
+
+declare i8* @llvm.eh.exception() nounwind
+
+declare i64 @llvm.eh.selector.i64(i8*, i8*, ...) nounwind
+
+declare void @__gxx_personality_v0()
+
+declare void @_Unwind_Resume(i8*)
+
+declare void @Bar(i64, %struct.Range*)
+
+declare void @llvm.stackrestore(i8*) nounwind
diff --git a/final/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll b/final/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
new file mode 100644
index 00000000000..d1f02858616
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2007-11-19-VectorSplitting.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s 
+; RUN: llc < %s -march=ppc32 -mcpu=g3
+; RUN: llc < %s -march=ppc32 -mcpu=g5
+; PR1811
+
+define void @execute_shader(<4 x float>* %OUT, <4 x float>* %IN, <4 x float>*
+%CONST) {
+entry:
+        %input2 = load <4 x float>* null, align 16               ; <<4 x float>>
+       	%shuffle7 = shufflevector <4 x float> %input2, <4 x float> < float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>> [#uses=1]
+
+        %mul1 = fmul <4 x float> %shuffle7, zeroinitializer              ; <<4 x
+        %add2 = fadd <4 x float> %mul1, %input2          ; <<4 x float>>
+        store <4 x float> %add2, <4 x float>* null, align 16
+        ret void
+}
diff --git a/final/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll b/final/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll
new file mode 100644
index 00000000000..791e9e61065
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-02-05-LiveIntervalsAssert.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
+
+	%struct.Handle = type { %struct.oopDesc** }
+	%struct.JNI_ArgumentPusher = type { %struct.SignatureIterator, %struct.JavaCallArguments* }
+	%struct.JNI_ArgumentPusherArray = type { %struct.JNI_ArgumentPusher, %struct.JvmtiEventEnabled* }
+	%struct.JavaCallArguments = type { [9 x i32], [9 x i32], i32*, i32*, i32, i32, i32 }
+	%struct.JvmtiEventEnabled = type { i64 }
+	%struct.KlassHandle = type { %struct.Handle }
+	%struct.SignatureIterator = type { i32 (...)**, %struct.KlassHandle, i32, i32, i32 }
+	%struct.instanceOopDesc = type { %struct.oopDesc }
+	%struct.oopDesc = type { %struct.instanceOopDesc*, %struct.instanceOopDesc* }
+@.str = external constant [44 x i8]		; <[44 x i8]*> [#uses=1]
+
+define void @_ZN23JNI_ArgumentPusherArray7iterateEy(%struct.JNI_ArgumentPusherArray* %this, i64 %fingerprint) nounwind  {
+entry:
+	br label %bb113
+
+bb22.preheader:		; preds = %bb113
+	ret void
+
+bb32.preheader:		; preds = %bb113
+	ret void
+
+bb42.preheader:		; preds = %bb113
+	ret void
+
+bb52:		; preds = %bb113
+	br label %bb113
+
+bb62.preheader:		; preds = %bb113
+	ret void
+
+bb72.preheader:		; preds = %bb113
+	ret void
+
+bb82:		; preds = %bb113
+	br label %bb113
+
+bb93:		; preds = %bb113
+	br label %bb113
+
+bb103.preheader:		; preds = %bb113
+	ret void
+
+bb113:		; preds = %bb113, %bb93, %bb82, %bb52, %entry
+	%fingerprint_addr.0.reg2mem.9 = phi i64 [ 0, %entry ], [ 0, %bb52 ], [ 0, %bb82 ], [ 0, %bb93 ], [ %tmp118, %bb113 ]		; <i64> [#uses=1]
+	tail call void @_Z28report_should_not_reach_herePKci( i8* getelementptr ([44 x i8]* @.str, i32 0, i32 0), i32 817 ) nounwind 
+	%tmp118 = lshr i64 %fingerprint_addr.0.reg2mem.9, 4		; <i64> [#uses=2]
+	%tmp21158 = and i64 %tmp118, 15		; <i64> [#uses=1]
+	switch i64 %tmp21158, label %bb113 [
+		 i64 1, label %bb22.preheader
+		 i64 2, label %bb52
+		 i64 3, label %bb32.preheader
+		 i64 4, label %bb42.preheader
+		 i64 5, label %bb62.preheader
+		 i64 6, label %bb82
+		 i64 7, label %bb93
+		 i64 8, label %bb103.preheader
+		 i64 9, label %bb72.preheader
+		 i64 10, label %UnifiedReturnBlock
+	]
+
+UnifiedReturnBlock:		; preds = %bb113
+	ret void
+}
+
+declare void @_Z28report_should_not_reach_herePKci(i8*, i32)
diff --git a/final/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll b/final/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
new file mode 100644
index 00000000000..e03bd9e2792
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-02-09-LocalRegAllocAssert.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -regalloc=fast
+
+define i32 @bork(i64 %foo, i64 %bar) {
+entry:
+        %tmp = load i64* null, align 8          ; <i64> [#uses=2]
+        %tmp2 = icmp ule i64 %tmp, 0            ; <i1> [#uses=1]
+        %min = select i1 %tmp2, i64 %tmp, i64 0   ; <i64> [#uses=1]
+        store i64 %min, i64* null, align 8
+        ret i32 0
+}
diff --git a/final/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll b/final/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
new file mode 100644
index 00000000000..e50fac4472a
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-03-05-RegScavengerAssert.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -enable-ppc32-regscavenger
+
+declare i8* @bar(i32)
+
+define void @foo(i8* %pp) nounwind  {
+entry:
+	%tmp2 = tail call i8* @bar( i32 14 ) nounwind 		; <i8*> [#uses=0]
+	%tmp28 = bitcast i8* %pp to void ()**		; <void ()**> [#uses=1]
+	%tmp38 = load void ()** %tmp28, align 4		; <void ()*> [#uses=2]
+	br i1 false, label %bb34, label %bb25
+bb25:		; preds = %entry
+	%tmp30 = bitcast void ()* %tmp38 to void (i8*)*		; <void (i8*)*> [#uses=1]
+	tail call void %tmp30( i8* null ) nounwind 
+	ret void
+bb34:		; preds = %entry
+	tail call void %tmp38( ) nounwind 
+	ret void
+}
diff --git a/final/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll b/final/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
new file mode 100644
index 00000000000..9f35b8346c6
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-03-17-RegScavengerCrash.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=ppc32 -enable-ppc32-regscavenger
+
+	%struct._cpp_strbuf = type { i8*, i32, i32 }
+	%struct.cpp_string = type { i32, i8* }
+
+declare fastcc void @emit_numeric_escape(i32, i32, %struct._cpp_strbuf*, i32) nounwind 
+
+define i32 @cpp_interpret_string(i32 %pfile, %struct.cpp_string* %from, i32 %wide) nounwind  {
+entry:
+	%tmp61 = load i32* null, align 4		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %wide, 0		; <i1> [#uses=2]
+	%iftmp.87.0 = select i1 %toBool, i32 %tmp61, i32 0		; <i32> [#uses=2]
+	%tmp69 = icmp ult i32 %iftmp.87.0, 33		; <i1> [#uses=1]
+	%min = select i1 %tmp69, i32 %iftmp.87.0, i32 32		; <i32> [#uses=1]
+	%tmp71 = icmp ugt i32 %min, 31		; <i1> [#uses=1]
+	br i1 %tmp71, label %bb79, label %bb75
+bb75:		; preds = %entry
+	ret i32 0
+bb79:		; preds = %entry
+	br i1 %toBool, label %bb103, label %bb94
+bb94:		; preds = %bb79
+	br i1 false, label %bb729, label %bb130.preheader
+bb103:		; preds = %bb79
+	ret i32 0
+bb130.preheader:		; preds = %bb94
+	%tmp134 = getelementptr %struct.cpp_string* %from, i32 0, i32 1		; <i8**> [#uses=0]
+	ret i32 0
+bb729:		; preds = %bb94
+	call fastcc void @emit_numeric_escape( i32 %pfile, i32 0, %struct._cpp_strbuf* null, i32 %wide ) nounwind 
+	ret i32 1
+}
diff --git a/final/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll b/final/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
new file mode 100644
index 00000000000..dd425f59822
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-03-18-RegScavengerAssert.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=ppc64 -enable-ppc64-regscavenger
+
+define i16 @test(i8* %d1, i16* %d2) {
+ %tmp237 = call i16 asm "lhbrx $0, $2, $1", "=r,r,bO,m"( i8* %d1, i32 0, i16* %d2 )
+ ret i16 %tmp237
+}
diff --git a/final/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll b/final/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
new file mode 100644
index 00000000000..a8fef05b1ad
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-03-24-AddressRegImm.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc64
+
+define fastcc i8* @page_rec_get_next(i8* %rec) nounwind  {
+entry:
+	%tmp2627 = ptrtoint i8* %rec to i64		; <i64> [#uses=2]
+	%tmp28 = and i64 %tmp2627, -16384		; <i64> [#uses=2]
+	%tmp2829 = inttoptr i64 %tmp28 to i8*		; <i8*> [#uses=1]
+	%tmp37 = getelementptr i8* %tmp2829, i64 42		; <i8*> [#uses=1]
+	%tmp40 = load i8* %tmp37, align 1		; <i8> [#uses=1]
+	%tmp4041 = zext i8 %tmp40 to i64		; <i64> [#uses=1]
+	%tmp42 = shl i64 %tmp4041, 8		; <i64> [#uses=1]
+	%tmp47 = add i64 %tmp42, 0		; <i64> [#uses=1]
+	%tmp52 = and i64 %tmp47, 32768		; <i64> [#uses=1]
+	%tmp72 = icmp eq i64 %tmp52, 0		; <i1> [#uses=1]
+	br i1 %tmp72, label %bb91, label %bb
+bb:		; preds = %entry
+	ret i8* null
+bb91:		; preds = %entry
+	br i1 false, label %bb100, label %bb185
+bb100:		; preds = %bb91
+	%tmp106 = sub i64 %tmp2627, %tmp28		; <i64> [#uses=0]
+	ret i8* null
+bb185:		; preds = %bb91
+	ret i8* null
+}
diff --git a/final/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll b/final/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll
new file mode 100644
index 00000000000..8776d9a3eda
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=ppc32
+
+	%struct..0objc_object = type { %struct.objc_class* }
+	%struct.NSArray = type { %struct..0objc_object }
+	%struct.NSMutableArray = type { %struct.NSArray }
+	%struct.PFTPersistentSymbols = type { %struct..0objc_object, %struct.VMUSymbolicator*, %struct.NSMutableArray*, %struct.__CFDictionary*, %struct.__CFDictionary*, %struct.__CFDictionary*, %struct.__CFDictionary*, %struct.NSMutableArray*, i8, %struct.pthread_mutex_t, %struct.NSMutableArray*, %struct.pthread_rwlock_t }
+	%struct.VMUMachTaskContainer = type { %struct..0objc_object, i32, i32 }
+	%struct.VMUSymbolicator = type { %struct..0objc_object, %struct.NSMutableArray*, %struct.NSArray*, %struct.NSArray*, %struct.VMUMachTaskContainer*, i8 }
+	%struct.__CFDictionary = type opaque
+	%struct.__builtin_CFString = type { i32*, i32, i8*, i32 }
+	%struct.objc_class = type opaque
+	%struct.objc_selector = type opaque
+	%struct.pthread_mutex_t = type { i32, [40 x i8] }
+	%struct.pthread_rwlock_t = type { i32, [124 x i8] }
+external constant %struct.__builtin_CFString		; <%struct.__builtin_CFString*>:0 [#uses=1]
+
+define void @"-[PFTPersistentSymbols saveSymbolWithName:address:path:lineNumber:flags:owner:]"(%struct.PFTPersistentSymbols* %self, %struct.objc_selector* %_cmd, %struct.NSArray* %name, i64 %address, %struct.NSArray* %path, i32 %lineNumber, i64 %flags, %struct..0objc_object* %owner) nounwind  {
+entry:
+	br i1 false, label %bb12, label %bb21
+bb12:		; preds = %entry
+	%tmp17 = tail call i8 inttoptr (i64 4294901504 to i8 (%struct..0objc_object*, %struct.objc_selector*, %struct.NSArray*)*)( %struct..0objc_object* null, %struct.objc_selector* null, %struct.NSArray* bitcast (%struct.__builtin_CFString* @0 to %struct.NSArray*) ) signext nounwind 		; <i8> [#uses=0]
+	br i1 false, label %bb25, label %bb21
+bb21:		; preds = %bb12, %entry
+	%tmp24 = or i64 %flags, 4		; <i64> [#uses=1]
+	br label %bb25
+bb25:		; preds = %bb21, %bb12
+	%flags_addr.0 = phi i64 [ %tmp24, %bb21 ], [ %flags, %bb12 ]		; <i64> [#uses=1]
+	%tmp3233 = trunc i64 %flags_addr.0 to i32		; <i32> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll b/final/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
new file mode 100644
index 00000000000..8e5bf567b12
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-03-26-CoalescerBug.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
+
+define i32 @t(i64 %byteStart, i32 %activeIndex) nounwind  {
+entry:
+	%tmp50 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp5051 = zext i32 %tmp50 to i64		; <i64> [#uses=3]
+	%tmp53 = udiv i64 %byteStart, %tmp5051		; <i64> [#uses=1]
+	%tmp5354 = trunc i64 %tmp53 to i32		; <i32> [#uses=1]
+	%tmp62 = urem i64 %byteStart, %tmp5051		; <i64> [#uses=1]
+	%tmp94 = add i32 0, 1		; <i32> [#uses=1]
+	%tmp100 = urem i32 %tmp94, 0		; <i32> [#uses=2]
+	%tmp108 = add i32 0, %activeIndex		; <i32> [#uses=1]
+	%tmp110 = sub i32 %tmp108, 0		; <i32> [#uses=1]
+	%tmp112 = urem i32 %tmp110, 0		; <i32> [#uses=2]
+	%tmp122 = icmp ult i32 %tmp112, %tmp100		; <i1> [#uses=1]
+	%iftmp.175.0 = select i1 %tmp122, i32 %tmp112, i32 %tmp100		; <i32> [#uses=1]
+	%tmp119 = add i32 %tmp5354, 0		; <i32> [#uses=1]
+	%tmp131 = add i32 %tmp119, %iftmp.175.0		; <i32> [#uses=1]
+	%tmp131132 = zext i32 %tmp131 to i64		; <i64> [#uses=1]
+	%tmp147 = mul i64 %tmp131132, %tmp5051		; <i64> [#uses=1]
+	br i1 false, label %bb164, label %bb190
+bb164:		; preds = %entry
+	%tmp171172 = and i64 %tmp62, 4294967295		; <i64> [#uses=1]
+	%tmp173 = add i64 %tmp171172, %tmp147		; <i64> [#uses=0]
+	ret i32 0
+bb190:		; preds = %entry
+	ret i32 0
+}
diff --git a/final/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll b/final/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll
new file mode 100644
index 00000000000..27063378607
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-04-10-LiveIntervalCrash.ll
@@ -0,0 +1,100 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
+
+define fastcc i64 @nonzero_bits1() nounwind  {
+entry:
+	switch i32 0, label %bb1385 [
+		 i32 28, label %bb235
+		 i32 35, label %bb153
+		 i32 37, label %bb951
+		 i32 40, label %bb289
+		 i32 44, label %bb1344
+		 i32 46, label %bb651
+		 i32 47, label %bb651
+		 i32 48, label %bb322
+		 i32 49, label %bb651
+		 i32 50, label %bb651
+		 i32 51, label %bb651
+		 i32 52, label %bb651
+		 i32 53, label %bb651
+		 i32 54, label %bb535
+		 i32 55, label %bb565
+		 i32 56, label %bb565
+		 i32 58, label %bb1100
+		 i32 59, label %bb1100
+		 i32 60, label %bb1100
+		 i32 61, label %bb1100
+		 i32 63, label %bb565
+		 i32 64, label %bb565
+		 i32 65, label %bb565
+		 i32 66, label %bb565
+		 i32 73, label %bb302
+		 i32 74, label %bb302
+		 i32 75, label %bb302
+		 i32 76, label %bb302
+		 i32 77, label %bb302
+		 i32 78, label %bb302
+		 i32 79, label %bb302
+		 i32 80, label %bb302
+		 i32 81, label %bb302
+		 i32 82, label %bb302
+		 i32 83, label %bb302
+		 i32 84, label %bb302
+		 i32 85, label %bb302
+		 i32 86, label %bb302
+		 i32 87, label %bb302
+		 i32 88, label %bb302
+		 i32 89, label %bb302
+		 i32 90, label %bb302
+		 i32 91, label %bb507
+		 i32 92, label %bb375
+		 i32 93, label %bb355
+		 i32 103, label %bb1277
+		 i32 104, label %bb1310
+		 i32 105, label %UnifiedReturnBlock
+		 i32 106, label %bb1277
+		 i32 107, label %bb1343
+	]
+bb153:		; preds = %entry
+	ret i64 0
+bb235:		; preds = %entry
+	br i1 false, label %bb245, label %UnifiedReturnBlock
+bb245:		; preds = %bb235
+	ret i64 0
+bb289:		; preds = %entry
+	ret i64 0
+bb302:		; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
+	ret i64 0
+bb322:		; preds = %entry
+	ret i64 0
+bb355:		; preds = %entry
+	ret i64 0
+bb375:		; preds = %entry
+	ret i64 0
+bb507:		; preds = %entry
+	ret i64 0
+bb535:		; preds = %entry
+	ret i64 0
+bb565:		; preds = %entry, %entry, %entry, %entry, %entry, %entry
+	ret i64 0
+bb651:		; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry
+	ret i64 0
+bb951:		; preds = %entry
+	ret i64 0
+bb1100:		; preds = %entry, %entry, %entry, %entry
+	ret i64 0
+bb1277:		; preds = %entry, %entry
+	br i1 false, label %UnifiedReturnBlock, label %bb1284
+bb1284:		; preds = %bb1277
+	ret i64 0
+bb1310:		; preds = %entry
+	ret i64 0
+bb1343:		; preds = %entry
+	ret i64 1
+bb1344:		; preds = %entry
+	ret i64 0
+bb1385:		; preds = %entry
+	ret i64 0
+UnifiedReturnBlock:		; preds = %bb1277, %bb235, %entry
+	%UnifiedRetVal = phi i64 [ 0, %bb235 ], [ undef, %bb1277 ], [ -1, %entry ]		; <i64> [#uses=1]
+	ret i64 %UnifiedRetVal
+}
diff --git a/final/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll b/final/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll
new file mode 100644
index 00000000000..839098ef5c2
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-04-16-CoalescerBug.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
+; Avoid reading memory that's already freed.
+
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i64)* @_Z13GetSectorSizey to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @_Z13GetSectorSizey(i64 %Base) nounwind  {
+entry:
+	br i1 false, label %bb, label %UnifiedReturnBlock
+bb:		; preds = %entry
+	%tmp10 = and i64 0, %Base		; <i64> [#uses=0]
+	ret i32 0
+UnifiedReturnBlock:		; preds = %entry
+	ret i32 131072
+}
diff --git a/final/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll b/final/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
new file mode 100644
index 00000000000..7b6d4916c1a
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-04-23-CoalescerCrash.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
+
+@_ZL10DeviceCode = internal global i16 0		; <i16*> [#uses=1]
+@.str19 = internal constant [64 x i8] c"unlock_then_erase_sector: failed to erase block (status= 0x%x)\0A\00"		; <[64 x i8]*> [#uses=1]
+@.str34 = internal constant [68 x i8] c"ProgramByWords - Erasing sector 0x%llx to 0x%llx (size 0x%x bytes)\0A\00"		; <[68 x i8]*> [#uses=1]
+@.str35 = internal constant [37 x i8] c"ProgramByWords - Done erasing flash\0A\00"		; <[37 x i8]*> [#uses=1]
+@.str36 = internal constant [48 x i8] c"ProgramByWords - Starting to write to FLASH...\0A\00"		; <[48 x i8]*> [#uses=1]
+
+declare void @IOLog(i8*, ...)
+
+declare void @IODelay(i32)
+
+define i32 @_Z14ProgramByWordsPvyy(i8* %buffer, i64 %Offset, i64 %bufferSize) nounwind  {
+entry:
+	volatile store i8 -1, i8* null, align 1
+	%tmp28 = icmp eq i8 0, 0		; <i1> [#uses=1]
+	br i1 %tmp28, label %bb107, label %bb
+
+bb:		; preds = %entry
+	%tmp9596430 = zext i32 0 to i64		; <i64> [#uses=1]
+	%tmp98431 = add i64 %tmp9596430, %Offset		; <i64> [#uses=1]
+	%tmp100433 = icmp ugt i64 %tmp98431, %Offset		; <i1> [#uses=1]
+	br i1 %tmp100433, label %bb31, label %bb103
+
+bb31:		; preds = %_Z24unlock_then_erase_sectory.exit, %bb
+	%Pos.0.reg2mem.0 = phi i64 [ %tmp93, %_Z24unlock_then_erase_sectory.exit ], [ %Offset, %bb ]		; <i64> [#uses=3]
+	%tmp35 = load i16* @_ZL10DeviceCode, align 2		; <i16> [#uses=1]
+	%tmp3536 = zext i16 %tmp35 to i32		; <i32> [#uses=2]
+	%tmp37 = and i32 %tmp3536, 65520		; <i32> [#uses=1]
+	%tmp38 = icmp eq i32 %tmp37, 35008		; <i1> [#uses=1]
+	%tmp34 = sub i64 %Pos.0.reg2mem.0, %Offset		; <i64> [#uses=2]
+	br i1 %tmp38, label %bb41, label %bb68
+
+bb41:		; preds = %bb31
+	%tmp43 = add i32 0, -1		; <i32> [#uses=1]
+	%tmp4344 = zext i32 %tmp43 to i64		; <i64> [#uses=1]
+	%tmp46 = and i64 %tmp4344, %tmp34		; <i64> [#uses=0]
+	%tmp49 = and i32 %tmp3536, 1		; <i32> [#uses=0]
+	ret i32 0
+
+bb68:		; preds = %bb31
+	tail call void (i8*, ...)* @IOLog( i8* getelementptr ([68 x i8]* @.str34, i32 0, i32 0), i64 %tmp34, i64 0, i32 131072 ) nounwind 
+	%tmp2021.i = trunc i64 %Pos.0.reg2mem.0 to i32		; <i32> [#uses=1]
+	%tmp202122.i = inttoptr i32 %tmp2021.i to i8*		; <i8*> [#uses=1]
+	tail call void @IODelay( i32 500 ) nounwind 
+	%tmp53.i = volatile load i16* null, align 2		; <i16> [#uses=2]
+	%tmp5455.i = zext i16 %tmp53.i to i32		; <i32> [#uses=1]
+	br i1 false, label %bb.i, label %bb65.i
+
+bb.i:		; preds = %bb68
+	ret i32 0
+
+bb65.i:		; preds = %bb68
+	%tmp67.i = icmp eq i16 %tmp53.i, 128		; <i1> [#uses=1]
+	br i1 %tmp67.i, label %_Z24unlock_then_erase_sectory.exit, label %bb70.i
+
+bb70.i:		; preds = %bb65.i
+	tail call void (i8*, ...)* @IOLog( i8* getelementptr ([64 x i8]* @.str19, i32 0, i32 0), i32 %tmp5455.i ) nounwind 
+	ret i32 0
+
+_Z24unlock_then_erase_sectory.exit:		; preds = %bb65.i
+	volatile store i8 -1, i8* %tmp202122.i, align 1
+	%tmp93 = add i64 0, %Pos.0.reg2mem.0		; <i64> [#uses=2]
+	%tmp98 = add i64 0, %Offset		; <i64> [#uses=1]
+	%tmp100 = icmp ugt i64 %tmp98, %tmp93		; <i1> [#uses=1]
+	br i1 %tmp100, label %bb31, label %bb103
+
+bb103:		; preds = %_Z24unlock_then_erase_sectory.exit, %bb
+	tail call void (i8*, ...)* @IOLog( i8* getelementptr ([37 x i8]* @.str35, i32 0, i32 0) ) nounwind 
+	ret i32 0
+
+bb107:		; preds = %entry
+	tail call void (i8*, ...)* @IOLog( i8* getelementptr ([48 x i8]* @.str36, i32 0, i32 0) ) nounwind 
+	%tmp114115 = bitcast i8* %buffer to i16*		; <i16*> [#uses=1]
+	%tmp256 = lshr i64 %bufferSize, 1		; <i64> [#uses=1]
+	%tmp256257 = trunc i64 %tmp256 to i32		; <i32> [#uses=1]
+	%tmp258 = getelementptr i16* %tmp114115, i32 %tmp256257		; <i16*> [#uses=0]
+	ret i32 0
+}
+
+define i32 @_Z17program_64B_blockyPm(i64 %Base, i32* %pData) nounwind  {
+entry:
+	unreachable
+}
+
+define i32 @_Z15ProgramByBlocksyy(i64 %Offset, i64 %bufferSize) nounwind  {
+entry:
+	ret i32 0
+}
diff --git a/final/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll b/final/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll
new file mode 100644
index 00000000000..d42c814a46a
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-05-01-ppc_fp128.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ppc32
+target triple = "powerpc-apple-darwin9.2.2"
+
+define i256 @func(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind readnone  {
+entry:
+	br i1 false, label %bb36, label %bb484
+
+bb36:		; preds = %entry
+	%tmp124 = fcmp ord ppc_fp128 %b, 0xM00000000000000000000000000000000		; <i1> [#uses=1]
+	%tmp140 = and i1 %tmp124, fcmp une (ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 0xM00000000000000000000000000000000)		; <i1> [#uses=0]
+	unreachable
+
+bb484:		; preds = %entry
+	ret i256 0
+}
diff --git a/final/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll b/final/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll
new file mode 100644
index 00000000000..6b40b2462da
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-06-19-LegalizerCrash.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=ppc32
+
+define void @t() nounwind {
+	call void null( ppc_fp128 undef )
+	unreachable
+}
diff --git a/final/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll b/final/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
new file mode 100644
index 00000000000..862559b109c
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-06-21-F128LoadStore.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32
+
+@g = external global ppc_fp128
+@h = external global ppc_fp128
+
+define void @f() {
+	%tmp = load ppc_fp128* @g
+	store ppc_fp128 %tmp, ppc_fp128* @h
+	ret void
+}
diff --git a/final/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll b/final/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
new file mode 100644
index 00000000000..83c5511878c
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-06-23-LiveVariablesCrash.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc32
+; <rdar://problem/6020042>
+
+define i32 @bork() nounwind  {
+entry:
+	br i1 true, label %bb1, label %bb3
+
+bb1:
+	%tmp1 = load i8* null, align 1
+	%tmp2 = icmp eq i8 %tmp1, 0
+	br label %bb2
+
+bb2:
+	%val1 = phi i32 [ 0, %bb1 ], [ %val2, %bb2 ]
+	%val2 = select i1 %tmp2, i32 -1, i32 %val1
+	switch i32 %val2, label %bb2 [
+		 i32 -1, label %bb3
+		 i32 0, label %bb1
+		 i32 1, label %bb3
+		 i32 2, label %bb1
+	]
+
+bb3:
+	ret i32 -1
+}
diff --git a/final/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll b/final/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
new file mode 100644
index 00000000000..8802b97d2a6
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vadduhm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubuhm
+
+define <4 x i32> @test() nounwind {
+	ret <4 x i32> < i32 4293066722, i32 4293066722, i32 4293066722, i32 4293066722>
+}
+
+define <4 x i32> @test2() nounwind {
+	ret <4 x i32> < i32 1114129, i32 1114129, i32 1114129, i32 1114129>
+}
diff --git a/final/test/CodeGen/PowerPC/2008-07-15-Bswap.ll b/final/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
new file mode 100644
index 00000000000..4a834f93a20
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-07-15-Bswap.ll
@@ -0,0 +1,386 @@
+; RUN: llc < %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9"
+	%struct.BiPartSrcDescriptor = type <{ %"struct.BiPartSrcDescriptor::$_105" }>
+	%"struct.BiPartSrcDescriptor::$_105" = type { %struct.BiPartSrcDescriptor_NO_VECTOR_ALIGNMENT_size_is_16 }
+	%struct.BiPartSrcDescriptor_NO_VECTOR_ALIGNMENT_size_is_16 = type { [2 x %struct.MotionVectors], [2 x i8], %struct.Map4x4ToPartIdx, [2 x i8], i8, i8 }
+	%struct.Condv = type opaque
+	%struct.DHBFLayerId = type { i8 }
+	%struct.DecodeComplexityInfo = type { i32, i32, i32, i32, %"struct.DecodeComplexityInfo::IntraStats", %"struct.DecodeComplexityInfo::InterStats" }
+	%"struct.DecodeComplexityInfo::InterStats" = type { i32, i32, i32, i32, [5 x i32], [3 x i32], [4 x [4 x i32]], [4 x i32], i32, %struct.MotionVectors, %struct.MotionVectors }
+	%"struct.DecodeComplexityInfo::IntraStats" = type { i32, i32, i32, [5 x i32], [3 x i32], [4 x i32], [3 x i32] }
+	%struct.DecodeComplexityOptions = type { i8, i8, i32, double, i8, float, i8, float, i8, i8, i8, i8, i8 }
+	%struct.DescriptorAllocator = type { %struct.Mutex*, %struct.Mutex*, i8**, i32, i32, i8**, i32, i32, i8**, i32, i32 }
+	%struct.DetailsFromSliceType = type <{ i8 }>
+	%struct.FlatnessAnalysis = type { i16, i16, i32, i32*, i8*, [512 x i32], [256 x i32] }
+	%struct.Frame = type <{ i8, i8, i8, i8, i8, [3 x i8], i32, i32, %struct.Mutex*, %struct.Condv*, [8 x i8], %struct.FramePixels, %struct.FrameMotionVectorCache, %struct.FrameIndex, i32, i8*, i8*, i8*, i8*, i16*, %struct.FlatnessAnalysis, %struct.NoiseAnalysis, %struct.VisualActivity, %struct.FrameMotionInfo, %struct.FrameMotionAnalysis, %struct.FrameDataRateParameters, %struct.FrameEncoderTags, %struct.DecodeComplexityInfo, %struct.DecodeComplexityOptions, %struct.MotionInfoFor16x16_FasterSP*, [1 x i32] }>
+	%struct.FrameDataRateParameters = type { i32, float, i8, i8 }
+	%struct.FrameEncoderTags = type { i8, i8, i32, i8, i8, float }
+	%struct.FrameIndex = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i32, i32, %struct.Frame*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, %struct.DHBFLayerId }
+	%struct.FrameMotionAnalysis = type { i32, i32, i32, %struct.MoEstMotion16x16*, %struct.MbAnalysis*, i32, i32, i16, i16, i32, i32, i32, i32, i8, i8 }
+	%struct.FrameMotionInfo = type { i32, i32, %struct.MoEstMbMotionInfo*, i32, i32, i32, i32, i32 }
+	%struct.FrameMotionVectorCache = type <{ %struct.ThreadAllocator**, i32, i32, i32, %struct.BiPartSrcDescriptor, %struct.BiPartSrcDescriptor, %struct.BiPartSrcDescriptor, [3 x %struct.BiPartSrcDescriptor*], %struct.BiPartSrcDescriptor** }>
+	%struct.FramePixels = type <{ i8, i8, i8, i8, i8, i8, i8, i8, i8*, i8*, i32, [4 x i8*], [4 x i8*], [2 x [4 x i32]], [2 x [4 x i32]], %struct.PixelData, %struct.InterpolationCache*, %struct.InterpolationCache*, %struct.InterpolationCache*, [16 x i16], [16 x i16], [12 x i8], %"struct.PortableSInt32Array<4>", %"struct.PortableSInt32Array<8>", %struct.ICOffsetArraysY, %struct.UVSrcOffsetEtcX_Struct*, i32*, i32*, [3 x i32] }>
+	%struct.ICOffsetArraysY = type { [21 x i32], [21 x i32], [4 x [21 x i32]] }
+	%struct.InterpolationCache = type opaque
+	%struct.LoopFilterInfo = type { %struct.BiPartSrcDescriptor**, i32, i32, i32, i32, i32*, i32, %"struct.LoopFilterInfo::SliceInfoStruct"*, i32, %struct.Mutex*, i16*, %struct.FramePixels*, i8*, i8*, i8*, i8*, i8*, %struct.PerMacroblockBoundaryStrengths*, %struct.Mutex*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8*, i8*, i8, void (i8*, i8*, i32, i32, i32, i32, i32, i8*, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32, i8*, i32, i8*)*, i32 }
+	%"struct.LoopFilterInfo::SliceInfoStruct" = type { %"struct.LoopFilterInfo::SliceInfoStruct::LFDisableStats", i8, i8, i8, i8, [17 x %struct.Frame*], [17 x %struct.Frame*] }
+	%"struct.LoopFilterInfo::SliceInfoStruct::LFDisableStats" = type { i32, i32 }
+	%struct.LoopFilterParam = type { i32, %struct.LoopFilterInfo*, %struct.FramePixels*, %struct.FrameMotionVectorCache* }
+	%struct.Map4x4ToPartIdx = type { i16 }
+	%struct.MbAnalysis = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, %struct.RdCost, %struct.RdCost, i32 }
+	%struct.MoEstMbMotionInfo = type { i32, i32, i32, i32, [16 x %struct.MoEstPartMotionInfo] }
+	%struct.MoEstMotion16x16 = type { [2 x i8], [2 x %struct.MotionVectors], i8, [3 x %struct.MoEstPredCost] }
+	%struct.MoEstPartMotionInfo = type { i32, %struct.PartGeom, i32, i32, [2 x %struct.MotionVectors], [2 x i8], i16 }
+	%struct.MoEstPredCost = type { i32, i16, i16 }
+	%struct.MotionInfoFor16x16_FasterSP = type { [2 x %struct.MotionVectors], [2 x i8], i8, [2 x i32], i32, i32 }
+	%struct.MotionVectors = type { %"struct.MotionVectors::$_103" }
+	%"struct.MotionVectors::$_103" = type { i32 }
+	%struct.Mutex = type opaque
+	%struct.NoiseAnalysis = type { i16, i16, i32, i8*, i8*, i8*, [512 x i32] }
+	%struct.PartGeom = type { %struct.Map4x4ToPartIdx }
+	%struct.PerMacroblockBoundaryStrengths = type { [16 x i8], [16 x i8], [4 x i8], [4 x i8], [2 x i32] }
+	%struct.PixelData = type { i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i8 }
+	%"struct.PortableSInt32Array<4>" = type { [4 x i32] }
+	%"struct.PortableSInt32Array<8>" = type { [8 x i32] }
+	%struct.RdCost = type { i32, i32, i32, double }
+	%struct.ThreadAllocator = type { %struct.DescriptorAllocator*, %struct.BiPartSrcDescriptor*, [256 x %struct.BiPartSrcDescriptor*], i32, i32, i32 }
+	%struct.ThreadedBatch = type opaque
+	%struct.UVSrcOffsetEtcX_Struct = type <{ i16 }>
+	%struct.VisualActivity = type { i16, i16, i32, i32, i32*, i32*, i32, i32, i32*, i32, i32, i32, i32, i32, i8*, i32, [2 x i32], i32, i32, i32, i16*, i16, i16, i16, i16, float, i8*, i32*, i32, i32, i8 }
+@_ZL33table_8_14_indexA_to_alpha_scalar = external constant [64 x i8]		; <[64 x i8]*> [#uses=0]
+@_ZL32table_8_14_indexB_to_beta_scalar = external constant [64 x i8]		; <[64 x i8]*> [#uses=0]
+@_ZL34table_8_15_indexA_bS_to_tc0_scalar = external constant [64 x [4 x i8]]		; <[64 x [4 x i8]]*> [#uses=0]
+@gkDummy = external global i32		; <i32*> [#uses=0]
+@gkDetailsFromSliceTypeArray = external constant [10 x %struct.DetailsFromSliceType]		; <[10 x %struct.DetailsFromSliceType]*> [#uses=0]
+
+declare i32 @_Z20LoopFilter_ConstructP14LoopFilterInfojj(%struct.LoopFilterInfo*, i32, i32)
+
+declare i32 @_Z25LF_Threading2_assert_doneP14LoopFilterInfo(%struct.LoopFilterInfo*) nounwind 
+
+declare i32 @_Z54S_CalcIfLargeMVDeltaForBMbBothPredictionsFromSameFramePK19BiPartSrcDescriptorS1_ijj(%struct.BiPartSrcDescriptor*, %struct.BiPartSrcDescriptor*, i32, i32, i32) nounwind 
+
+declare void @_Z30LoopFilter_Internal_FilterLumaPhiiiiii(i8*, i32, i32, i32, i32, i32, i32) nounwind 
+
+declare void @_Z33LoopFilter_Internal_FilterChromaVPhiiiiiiiiii(i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind 
+
+declare void @_Z33LoopFilter_Internal_FilterChromaHPhiiiiii(i8*, i32, i32, i32, i32, i32, i32) nounwind 
+
+declare void @_Z42LoopFilter_Internal_filter_macroblock_lumaPK14LoopFilterInfoPhS2_iiiPK30PerMacroblockBoundaryStrengthsjj(%struct.LoopFilterInfo*, i8*, i8*, i32, i32, i32, %struct.PerMacroblockBoundaryStrengths*, i32, i32) nounwind 
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+
+declare i32 @_Z40LoopFilter_Internal_FilterLumaPlaneMBAFFPK14LoopFilterInfojjj(%struct.LoopFilterInfo*, i32, i32, i32) nounwind 
+
+declare void @_Z18LoopFilter_DestroyP14LoopFilterInfo(%struct.LoopFilterInfo*)
+
+declare void @MutexDispose(%struct.Mutex*)
+
+declare void @_ZdaPv(i8*) nounwind 
+
+declare void @jvtDisposePTRVectorAligned(i8*)
+
+declare void @jvtDisposePTR(i8*)
+
+declare void @jvtDisposePTRMemAligned(i8*)
+
+declare void @_Z31LoopFilter_Internal_ResetTablesP14LoopFilterInfo(%struct.LoopFilterInfo*) nounwind 
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind 
+
+define i32 @_Z60LoopFilter_Internal_CalculateBoundaryStrengths_MbaffFramePicPK14LoopFilterInfoP22FrameMotionVectorCachejj(%struct.LoopFilterInfo* %lfiPtr, %struct.FrameMotionVectorCache* %frameMotionVectorCachePtr, i32 %mbY_min, i32 %mbY_maxPlus1) nounwind  {
+entry:
+	icmp ult i32 %mbY_min, %mbY_maxPlus1		; <i1>:0 [#uses=1]
+	br i1 %0, label %bb16, label %bb642
+
+bb16:		; preds = %entry
+	bitcast %struct.PerMacroblockBoundaryStrengths* null to i32*		; <i32*>:1 [#uses=3]
+	getelementptr i32* %1, i32 1		; <i32*>:2 [#uses=0]
+	getelementptr i32* %1, i32 2		; <i32*>:3 [#uses=0]
+	getelementptr i32* %1, i32 3		; <i32*>:4 [#uses=0]
+	bitcast [16 x i8]* null to i32*		; <i32*>:5 [#uses=3]
+	getelementptr i32* %5, i32 1		; <i32*>:6 [#uses=0]
+	getelementptr i32* %5, i32 2		; <i32*>:7 [#uses=0]
+	getelementptr i32* %5, i32 3		; <i32*>:8 [#uses=0]
+	icmp eq i32 0, 0		; <i1>:9 [#uses=0]
+	lshr i32 0, 30		; <i32>:10 [#uses=0]
+	and i32 0, 268435455		; <i32>:11 [#uses=0]
+	lshr i32 0, 28		; <i32>:12 [#uses=1]
+	and i32 %12, 3		; <i32>:13 [#uses=0]
+	and i32 0, 1		; <i32>:14 [#uses=1]
+	icmp eq i32 %14, 0		; <i1>:15 [#uses=0]
+	zext i8 0 to i32		; <i32>:16 [#uses=1]
+	%.not656 = icmp ne i32 0, 0		; <i1> [#uses=1]
+	icmp eq i8 0, 0		; <i1>:17 [#uses=0]
+	trunc i32 0 to i8		; <i8>:18 [#uses=2]
+	add i32 0, 1		; <i32>:19 [#uses=1]
+	%.not658 = icmp ne i32 0, 0		; <i1> [#uses=1]
+	and i32 0, 268369920		; <i32>:20 [#uses=1]
+	icmp eq i32 %20, 268369920		; <i1>:21 [#uses=2]
+	getelementptr %struct.PerMacroblockBoundaryStrengths* null, i32 0, i32 2		; <[4 x i8]*>:22 [#uses=1]
+	getelementptr %struct.PerMacroblockBoundaryStrengths* null, i32 0, i32 2, i32 0		; <i8*>:23 [#uses=0]
+	and i32 0, -2		; <i32>:24 [#uses=1]
+	add i32 %24, -1		; <i32>:25 [#uses=0]
+	bitcast [4 x i8]* %22 to i32*		; <i32*>:26 [#uses=3]
+	getelementptr i32* %26, i32 1		; <i32*>:27 [#uses=0]
+	getelementptr i32* %26, i32 2		; <i32*>:28 [#uses=0]
+	getelementptr i32* %26, i32 3		; <i32*>:29 [#uses=0]
+	br label %bb144
+
+bb144:		; preds = %bb395, %bb16
+	%idxEachField11.0773 = phi i32 [ 0, %bb16 ], [ %162, %bb395 ]		; <i32> [#uses=3]
+	%mbYLeft.2776 = phi i32 [ 0, %bb16 ], [ %mbYLeft.2776, %bb395 ]		; <i32> [#uses=3]
+	%mbXYLeft.2775 = phi i32 [ 0, %bb16 ], [ %mbXYLeft.2775, %bb395 ]		; <i32> [#uses=1]
+	%mixedModeLeftEdgeOfMbFlag.2774 = phi i32 [ 0, %bb16 ], [ 0, %bb395 ]		; <i32> [#uses=0]
+	%mbIndexLeft.2772 = phi i32 [ 0, %bb16 ], [ %mbIndexLeft.2772, %bb395 ]		; <i32> [#uses=2]
+	%boundaryStrengthsV.1771 = phi i8* [ null, %bb16 ], [ %158, %bb395 ]		; <i8*> [#uses=2]
+	%numEdgesToTest.1770 = phi i32 [ 4, %bb16 ], [ %numEdgesToTest.2, %bb395 ]		; <i32> [#uses=1]
+	icmp eq i32 %idxEachField11.0773, 0		; <i1>:30 [#uses=0]
+	getelementptr %struct.BiPartSrcDescriptor** null, i32 %mbIndexLeft.2772		; <%struct.BiPartSrcDescriptor**>:31 [#uses=1]
+	load %struct.BiPartSrcDescriptor** %31, align 4		; <%struct.BiPartSrcDescriptor*>:32 [#uses=0]
+	%fMacroblockHasNonZeroBS.4 = select i1 %21, i32 1, i32 0		; <i32> [#uses=1]
+	%numEdgesToTest.2 = select i1 %21, i32 1, i32 %numEdgesToTest.1770		; <i32> [#uses=2]
+	store i8 32, i8* %boundaryStrengthsV.1771, align 1
+	br label %labelContinueEdgesLoopV
+
+bb200:		; preds = %labelContinueEdgesLoopV
+	lshr i32 %159, 28		; <i32>:33 [#uses=2]
+	and i32 %160, %16		; <i32>:34 [#uses=1]
+	icmp eq i32 %34, 0		; <i1>:35 [#uses=0]
+	icmp eq i32 %160, 0		; <i1>:36 [#uses=3]
+	zext i1 %36 to i32		; <i32>:37 [#uses=1]
+	or i32 %37, -1		; <i32>:38 [#uses=1]
+	or i32 %38, %33		; <i32>:39 [#uses=1]
+	icmp eq i32 %39, 0		; <i1>:40 [#uses=1]
+	br i1 %40, label %bb205, label %bb206
+
+bb205:		; preds = %bb200
+	store i8 32, i8* %158, align 1
+	br label %labelContinueEdgesLoopV
+
+bb206:		; preds = %bb200
+	icmp eq i32 %33, 15		; <i1>:41 [#uses=1]
+	br i1 %41, label %labelContinueEdgesLoopV, label %bb210.preheader
+
+bb210.preheader:		; preds = %bb206
+	add i32 %160, 0		; <i32>:42 [#uses=2]
+	%bothcond657 = and i1 %36, %.not656		; <i1> [#uses=0]
+	shl i32 %idxEachField11.0773, 1		; <i32>:43 [#uses=1]
+	add i32 %43, 0		; <i32>:44 [#uses=0]
+	shl i32 %mbYLeft.2776, 2		; <i32>:45 [#uses=0]
+	add i32 %42, -1		; <i32>:46 [#uses=1]
+	icmp eq i32 0, 0		; <i1>:47 [#uses=1]
+	%brmerge689.not = and i1 %47, false		; <i1> [#uses=0]
+	%bothcond659 = and i1 %36, %.not658		; <i1> [#uses=0]
+	shl i32 %mbYLeft.2776, 1		; <i32>:48 [#uses=1]
+	or i32 %48, 0		; <i32>:49 [#uses=1]
+	shl i32 %49, 1		; <i32>:50 [#uses=0]
+	add i32 0, 0		; <i32>:51 [#uses=2]
+	mul i32 %51, 0		; <i32>:52 [#uses=1]
+	add i32 %52, %42		; <i32>:53 [#uses=1]
+	mul i32 %51, 0		; <i32>:54 [#uses=1]
+	add i32 %46, %54		; <i32>:55 [#uses=1]
+	getelementptr %struct.BiPartSrcDescriptor** null, i32 %53		; <%struct.BiPartSrcDescriptor**>:56 [#uses=1]
+	load %struct.BiPartSrcDescriptor** %56, align 4		; <%struct.BiPartSrcDescriptor*>:57 [#uses=7]
+	getelementptr %struct.BiPartSrcDescriptor** null, i32 %55		; <%struct.BiPartSrcDescriptor**>:58 [#uses=1]
+	load %struct.BiPartSrcDescriptor** %58, align 4		; <%struct.BiPartSrcDescriptor*>:59 [#uses=5]
+	icmp slt i32 %159, 0		; <i1>:60 [#uses=0]
+	icmp eq %struct.BiPartSrcDescriptor* %57, %59		; <i1>:61 [#uses=0]
+	bitcast %struct.BiPartSrcDescriptor* %57 to i16*		; <i16*>:62 [#uses=5]
+	load i16* %62, align 2		; <i16>:63 [#uses=2]
+	getelementptr i16* %62, i32 1		; <i16*>:64 [#uses=1]
+	load i16* %64, align 2		; <i16>:65 [#uses=2]
+	getelementptr i16* %62, i32 2		; <i16*>:66 [#uses=1]
+	load i16* %66, align 2		; <i16>:67 [#uses=2]
+	getelementptr i16* %62, i32 3		; <i16*>:68 [#uses=1]
+	load i16* %68, align 2		; <i16>:69 [#uses=2]
+	getelementptr i16* %62, i32 6		; <i16*>:70 [#uses=1]
+	load i16* %70, align 2		; <i16>:71 [#uses=2]
+	bitcast %struct.BiPartSrcDescriptor* %59 to i16*		; <i16*>:72 [#uses=5]
+	load i16* %72, align 2		; <i16>:73 [#uses=2]
+	getelementptr i16* %72, i32 1		; <i16*>:74 [#uses=1]
+	load i16* %74, align 2		; <i16>:75 [#uses=2]
+	getelementptr i16* %72, i32 2		; <i16*>:76 [#uses=1]
+	load i16* %76, align 2		; <i16>:77 [#uses=2]
+	getelementptr i16* %72, i32 3		; <i16*>:78 [#uses=1]
+	load i16* %78, align 2		; <i16>:79 [#uses=2]
+	getelementptr i16* %72, i32 6		; <i16*>:80 [#uses=1]
+	load i16* %80, align 2		; <i16>:81 [#uses=2]
+	sub i16 %63, %73		; <i16>:82 [#uses=3]
+	sub i16 %65, %75		; <i16>:83 [#uses=3]
+	sub i16 %67, %77		; <i16>:84 [#uses=3]
+	sub i16 %69, %79		; <i16>:85 [#uses=3]
+	sub i16 %71, %81		; <i16>:86 [#uses=3]
+	sub i16 0, %82		; <i16>:87 [#uses=1]
+	icmp slt i16 %82, 0		; <i1>:88 [#uses=1]
+	%. = select i1 %88, i16 %87, i16 %82		; <i16> [#uses=1]
+	sub i16 0, %83		; <i16>:89 [#uses=1]
+	icmp slt i16 %83, 0		; <i1>:90 [#uses=1]
+	%.660 = select i1 %90, i16 %89, i16 %83		; <i16> [#uses=1]
+	sub i16 0, %84		; <i16>:91 [#uses=1]
+	icmp slt i16 %84, 0		; <i1>:92 [#uses=1]
+	%.661 = select i1 %92, i16 %91, i16 %84		; <i16> [#uses=1]
+	sub i16 0, %85		; <i16>:93 [#uses=1]
+	icmp slt i16 %85, 0		; <i1>:94 [#uses=1]
+	%.662 = select i1 %94, i16 %93, i16 %85		; <i16> [#uses=1]
+	sub i16 0, %86		; <i16>:95 [#uses=1]
+	icmp slt i16 %86, 0		; <i1>:96 [#uses=1]
+	%.663 = select i1 %96, i16 %95, i16 %86		; <i16> [#uses=1]
+	getelementptr %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 1, i32 0		; <i8*>:97 [#uses=1]
+	load i8* %97, align 1		; <i8>:98 [#uses=1]
+	zext i8 %98 to i32		; <i32>:99 [#uses=1]
+	getelementptr %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 1, i32 1		; <i8*>:100 [#uses=1]
+	load i8* %100, align 1		; <i8>:101 [#uses=1]
+	zext i8 %101 to i32		; <i32>:102 [#uses=1]
+	getelementptr %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 3, i32 0		; <i8*>:103 [#uses=1]
+	load i8* %103, align 1		; <i8>:104 [#uses=2]
+	zext i8 %104 to i32		; <i32>:105 [#uses=1]
+	getelementptr %struct.BiPartSrcDescriptor* %59, i32 0, i32 0, i32 0, i32 3, i32 0		; <i8*>:106 [#uses=1]
+	load i8* %106, align 1		; <i8>:107 [#uses=2]
+	zext i8 %107 to i32		; <i32>:108 [#uses=1]
+	getelementptr %struct.BiPartSrcDescriptor* %57, i32 0, i32 0, i32 0, i32 3, i32 1		; <i8*>:109 [#uses=1]
+	load i8* %109, align 1		; <i8>:110 [#uses=1]
+	zext i8 %110 to i32		; <i32>:111 [#uses=1]
+	getelementptr %struct.BiPartSrcDescriptor* %59, i32 0, i32 0, i32 0, i32 3, i32 1		; <i8*>:112 [#uses=1]
+	load i8* %112, align 1		; <i8>:113 [#uses=1]
+	zext i8 %113 to i32		; <i32>:114 [#uses=1]
+	lshr i32 %99, 4		; <i32>:115 [#uses=1]
+	and i32 %115, 2		; <i32>:116 [#uses=1]
+	lshr i32 %102, 5		; <i32>:117 [#uses=1]
+	or i32 %116, %117		; <i32>:118 [#uses=3]
+	icmp eq i32 %118, 0		; <i1>:119 [#uses=0]
+	icmp eq i32 %118, 1		; <i1>:120 [#uses=1]
+	br i1 %120, label %bb297, label %bb298
+
+bb297:		; preds = %bb210.preheader
+	br label %bb298
+
+bb298:		; preds = %bb297, %bb210.preheader
+	%vu8Mask_0.1 = phi i8 [ -1, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=1]
+	%vu8Mask_1.1 = phi i8 [ -1, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=1]
+	%vu8Mask_2.1 = phi i8 [ -1, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=0]
+	%vu8Mask_3.1 = phi i8 [ -1, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=1]
+	%vu8Mask_4.1 = phi i8 [ 0, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=0]
+	%vu8Mask_5.1 = phi i8 [ 0, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=1]
+	%vu8Mask_6.1 = phi i8 [ 0, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=0]
+	%vu8Mask_7.1 = phi i8 [ 0, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=1]
+	%vu8Mask_12.1 = phi i8 [ -1, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=0]
+	%vu8Mask_13.1 = phi i8 [ -1, %bb297 ], [ 0, %bb210.preheader ]		; <i8> [#uses=0]
+	icmp eq i32 %118, 2		; <i1>:121 [#uses=0]
+	and i8 %vu8Mask_1.1, 3		; <i8>:122 [#uses=0]
+	and i8 %vu8Mask_5.1, 3		; <i8>:123 [#uses=0]
+	and i8 %vu8Mask_3.1, %18		; <i8>:124 [#uses=0]
+	and i8 %vu8Mask_7.1, %18		; <i8>:125 [#uses=0]
+	icmp eq i8 %104, %107		; <i1>:126 [#uses=1]
+	br i1 %126, label %bb328, label %bb303
+
+bb303:		; preds = %bb298
+	call i16 @llvm.bswap.i16( i16 %81 )		; <i16>:127 [#uses=1]
+	sub i16 %63, %77		; <i16>:128 [#uses=3]
+	sub i16 %65, %79		; <i16>:129 [#uses=3]
+	sub i16 %67, %73		; <i16>:130 [#uses=3]
+	sub i16 %69, %75		; <i16>:131 [#uses=3]
+	sub i16 %71, %127		; <i16>:132 [#uses=3]
+	sub i16 0, %128		; <i16>:133 [#uses=1]
+	icmp slt i16 %128, 0		; <i1>:134 [#uses=1]
+	%.673 = select i1 %134, i16 %133, i16 %128		; <i16> [#uses=1]
+	sub i16 0, %129		; <i16>:135 [#uses=1]
+	icmp slt i16 %129, 0		; <i1>:136 [#uses=1]
+	%.674 = select i1 %136, i16 %135, i16 %129		; <i16> [#uses=1]
+	sub i16 0, %130		; <i16>:137 [#uses=1]
+	icmp slt i16 %130, 0		; <i1>:138 [#uses=1]
+	%.675 = select i1 %138, i16 %137, i16 %130		; <i16> [#uses=1]
+	sub i16 0, %131		; <i16>:139 [#uses=1]
+	icmp slt i16 %131, 0		; <i1>:140 [#uses=1]
+	%.676 = select i1 %140, i16 %139, i16 %131		; <i16> [#uses=1]
+	sub i16 0, %132		; <i16>:141 [#uses=1]
+	icmp slt i16 %132, 0		; <i1>:142 [#uses=1]
+	%.677 = select i1 %142, i16 %141, i16 %132		; <i16> [#uses=1]
+	br label %bb328
+
+bb328:		; preds = %bb303, %bb298
+	%vu16Delta_0.0 = phi i16 [ %.673, %bb303 ], [ %., %bb298 ]		; <i16> [#uses=1]
+	%vu16Delta_1.0 = phi i16 [ %.674, %bb303 ], [ %.660, %bb298 ]		; <i16> [#uses=0]
+	%vu16Delta_2.0 = phi i16 [ %.675, %bb303 ], [ %.661, %bb298 ]		; <i16> [#uses=0]
+	%vu16Delta_3.0 = phi i16 [ %.676, %bb303 ], [ %.662, %bb298 ]		; <i16> [#uses=0]
+	%vu16Delta_6.0 = phi i16 [ %.677, %bb303 ], [ %.663, %bb298 ]		; <i16> [#uses=0]
+	lshr i16 %vu16Delta_0.0, 8		; <i16>:143 [#uses=1]
+	trunc i16 %143 to i8		; <i8>:144 [#uses=1]
+	and i8 %144, %vu8Mask_0.1		; <i8>:145 [#uses=1]
+	icmp eq i8 %145, 0		; <i1>:146 [#uses=0]
+	sub i32 %105, %114		; <i32>:147 [#uses=1]
+	sub i32 %111, %108		; <i32>:148 [#uses=1]
+	or i32 %147, %148		; <i32>:149 [#uses=1]
+	icmp eq i32 %149, 0		; <i1>:150 [#uses=0]
+	call i32 @_Z54S_CalcIfLargeMVDeltaForBMbBothPredictionsFromSameFramePK19BiPartSrcDescriptorS1_ijj( %struct.BiPartSrcDescriptor* %57, %struct.BiPartSrcDescriptor* %59, i32 %19, i32 0, i32 0 ) nounwind 		; <i32>:151 [#uses=0]
+	unreachable
+
+labelContinueEdgesLoopV:		; preds = %bb206, %bb205, %bb144
+	%fEdgeHasNonZeroBS.0 = phi i32 [ 0, %bb205 ], [ 0, %bb144 ], [ 1, %bb206 ]		; <i32> [#uses=2]
+	%fMacroblockHasNonZeroBS.6 = phi i32 [ %152, %bb205 ], [ %fMacroblockHasNonZeroBS.4, %bb144 ], [ %152, %bb206 ]		; <i32> [#uses=1]
+	%ixEdge.1 = phi i32 [ %160, %bb205 ], [ 0, %bb144 ], [ %160, %bb206 ]		; <i32> [#uses=1]
+	%bfNZ12.2 = phi i32 [ %159, %bb205 ], [ 0, %bb144 ], [ %159, %bb206 ]		; <i32> [#uses=1]
+	%boundaryStrengthsV.3 = phi i8* [ %158, %bb205 ], [ %boundaryStrengthsV.1771, %bb144 ], [ %158, %bb206 ]		; <i8*> [#uses=3]
+	or i32 %fMacroblockHasNonZeroBS.6, %fEdgeHasNonZeroBS.0		; <i32>:152 [#uses=2]
+	load i8* %boundaryStrengthsV.3, align 1		; <i8>:153 [#uses=1]
+	trunc i32 %fEdgeHasNonZeroBS.0 to i8		; <i8>:154 [#uses=1]
+	shl i8 %154, 5		; <i8>:155 [#uses=1]
+	xor i8 %155, 32		; <i8>:156 [#uses=1]
+	or i8 %153, %156		; <i8>:157 [#uses=1]
+	store i8 %157, i8* %boundaryStrengthsV.3, align 1
+	getelementptr i8* %boundaryStrengthsV.3, i32 4		; <i8*>:158 [#uses=4]
+	shl i32 %bfNZ12.2, 4		; <i32>:159 [#uses=4]
+	add i32 %ixEdge.1, 1		; <i32>:160 [#uses=6]
+	icmp ult i32 %160, %numEdgesToTest.2		; <i1>:161 [#uses=1]
+	br i1 %161, label %bb200, label %bb395
+
+bb395:		; preds = %labelContinueEdgesLoopV
+	add i32 %idxEachField11.0773, 1		; <i32>:162 [#uses=2]
+	icmp ugt i32 %162, 0		; <i1>:163 [#uses=1]
+	br i1 %163, label %bb398, label %bb144
+
+bb398:		; preds = %bb395
+	call void asm sideeffect "dcbt $0, $1", "b%,r,~{memory}"( i32 19, i32* null ) nounwind 
+	unreachable
+
+bb642:		; preds = %entry
+	ret i32 0
+}
+
+declare i16 @llvm.bswap.i16(i16) nounwind readnone 
+
+declare i8* @jvtNewPtrVectorAligned(i32)
+
+declare i8* @jvtNewPtr(i32)
+
+declare i8* @jvtNewPtrMemAligned(i32)
+
+declare %struct.Mutex* @MutexNew()
+
+declare i8* @_Znam(i32)
+
+declare i32 @_Z24LoopFilter_FilterMbGroupP14LoopFilterInfoP11FramePixelsP22FrameMotionVectorCacheP19ThreadedBatchStructjjij(%struct.LoopFilterInfo*, %struct.FramePixels*, %struct.FrameMotionVectorCache*, %struct.ThreadedBatch*, i32, i32, i32, i32)
+
+declare void @MutexLock(%struct.Mutex*)
+
+declare void @MutexUnlock(%struct.Mutex*)
+
+declare i32 @_Z35LoopFilter_Internal_FilterLumaPlanePK14LoopFilterInfojjjjj(%struct.LoopFilterInfo*, i32, i32, i32, i32, i32)
+
+declare i32 @_Z37LoopFilter_Internal_FilterChromaPlanePK14LoopFilterInfojjjjj(%struct.LoopFilterInfo*, i32, i32, i32, i32, i32)
+
+declare void @_Z44LoopFilter_Internal_filter_macroblock_chromaPK14LoopFilterInfoPhS2_iiiPK30PerMacroblockBoundaryStrengthsjj(%struct.LoopFilterInfo*, i8*, i8*, i32, i32, i32, %struct.PerMacroblockBoundaryStrengths*, i32, i32) nounwind 
+
+declare i32 @_Z42LoopFilter_Internal_FilterChromaPlaneMBAFFPK14LoopFilterInfojjj(%struct.LoopFilterInfo*, i32, i32, i32) nounwind 
+
+declare i32 @_Z26LF_Threading2_ProcessTasksP14LoopFilterInfoP11FramePixelsP22FrameMotionVectorCacheij(%struct.LoopFilterInfo*, %struct.FramePixels*, %struct.FrameMotionVectorCache*, i32, i32)
+
+declare i32 @_Z46LoopFilter_Internal_CalculateBoundaryStrengthsPK14LoopFilterInfoP22FrameMotionVectorCachejj(%struct.LoopFilterInfo*, %struct.FrameMotionVectorCache*, i32, i32)
+
+declare i32 @_Z44LoopFilter_Internal_FilterLumaChromaPlane_PPP14LoopFilterInfojjjjj(%struct.LoopFilterInfo*, i32, i32, i32, i32, i32)
+
+declare i32 @_Z22LoopFilter_FilterFrameP14LoopFilterInfoP11FramePixelsP22FrameMotionVectorCacheP19ThreadedBatchStructij(%struct.LoopFilterInfo*, %struct.FramePixels*, %struct.FrameMotionVectorCache*, %struct.ThreadedBatch*, i32, i32)
+
+declare void @_Z34LF_Threading2_ProcessTasks_WrapperPv(i8*)
+
+declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind 
diff --git a/final/test/CodeGen/PowerPC/2008-07-15-Fabs.ll b/final/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
new file mode 100644
index 00000000000..17737d9d3b2
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-07-15-Fabs.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9"
+
+define hidden i256 @__divtc3(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind readnone  {
+entry:
+	call ppc_fp128 @fabsl( ppc_fp128 %d ) nounwind readnone 		; <ppc_fp128>:0 [#uses=1]
+	fcmp olt ppc_fp128 0xM00000000000000000000000000000000, %0		; <i1>:1 [#uses=1]
+	%.pn106 = select i1 %1, ppc_fp128 %a, ppc_fp128 0xM00000000000000000000000000000000		; <ppc_fp128> [#uses=1]
+	%.pn = fsub ppc_fp128 0xM00000000000000000000000000000000, %.pn106		; <ppc_fp128> [#uses=1]
+	%y.0 = fdiv ppc_fp128 %.pn, 0xM00000000000000000000000000000000		; <ppc_fp128> [#uses=1]
+	fmul ppc_fp128 %y.0, 0xM3FF00000000000000000000000000000		; <ppc_fp128>:2 [#uses=1]
+	fadd ppc_fp128 %2, fmul (ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 0xM00000000000000000000000000000000)		; <ppc_fp128>:3 [#uses=1]
+	%tmpi = fadd ppc_fp128 %3, 0xM00000000000000000000000000000000		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmpi, ppc_fp128* null, align 16
+	ret i256 0
+}
+
+declare ppc_fp128 @fabsl(ppc_fp128) nounwind readnone 
diff --git a/final/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll b/final/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
new file mode 100644
index 00000000000..5cd8c348b4d
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9"
+
+define i16 @t(i16* %dct) signext nounwind  {
+entry:
+         load i16* null, align 2         ; <i16>:0 [#uses=2]
+         lshr i16 %0, 11         ; <i16>:1 [#uses=0]
+         trunc i16 %0 to i8              ; <i8>:2 [#uses=1]
+         sext i8 %2 to i16               ; <i16>:3 [#uses=1]
+         add i16 0, %3           ; <i16>:4 [#uses=1]
+         sext i16 %4 to i32              ; <i32>:5 [#uses=1]
+         %dcval.0.in = shl i32 %5, 0             ; <i32> [#uses=1]
+         %dcval.0 = trunc i32 %dcval.0.in to i16         ; <i16>  [#uses=1]
+         store i16 %dcval.0, i16* %dct, align 2
+         ret i16 0
+}
diff --git a/final/test/CodeGen/PowerPC/2008-07-17-Fneg.ll b/final/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
new file mode 100644
index 00000000000..dc1e9369825
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-07-17-Fneg.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9"
+
+define hidden i64 @__fixunstfdi(ppc_fp128 %a) nounwind  {
+entry:
+	br i1 false, label %bb3, label %bb4
+
+bb3:		; preds = %entry
+	fsub ppc_fp128 0xM80000000000000000000000000000000, 0xM00000000000000000000000000000000		; <ppc_fp128>:0 [#uses=1]
+	fptoui ppc_fp128 %0 to i32		; <i32>:1 [#uses=1]
+	zext i32 %1 to i64		; <i64>:2 [#uses=1]
+	sub i64 0, %2		; <i64>:3 [#uses=1]
+	ret i64 %3
+
+bb4:		; preds = %entry
+	ret i64 0
+}
diff --git a/final/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll b/final/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
new file mode 100644
index 00000000000..c9c05e1cc36
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-07-24-PPC64-CCBug.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin | grep lwz | grep 228
+
+@"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+
+define void @llvm_static_func(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, i32 %a7, i32 %a8, i32 %a9, i32 %a10, i32 %a11, i32 %a12, i32 %a13, i32 %a14, i32 %a15) nounwind  {
+entry:
+	tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i64 0), i32 %a8 ) nounwind 		; <i32>:0 [#uses=0]
+	ret void
+}
+
+declare i32 @printf(i8*, ...) nounwind 
diff --git a/final/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll b/final/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
new file mode 100644
index 00000000000..97844dd7486
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-09-12-CoalescerBug.ll
@@ -0,0 +1,254 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin
+
+	%struct.CGLDI = type { %struct.cgli*, i32, i32, i32, i32, i32, i8*, i32, void (%struct.CGLSI*, i32, %struct.CGLDI*)*, i8*, %struct.vv_t }
+	%struct.cgli = type { i32, %struct.cgli*, void (%struct.cgli*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32)*, i32, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i8*, i32*, %struct._cgro*, %struct._cgro*, float, float, float, float, i32, i8*, float, i8*, [16 x i32] }
+	%struct.CGLSI = type { %struct.cgli*, i32, i8*, i8*, i32, i32, i8*, void (%struct.cgli*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32)*, %struct.vv_t, %struct.vv_t, %struct.xx_t* }
+	%struct._cgro = type opaque
+	%struct.xx_t = type { [3 x %struct.vv_t], [2 x %struct.vv_t], [2 x [3 x i8*]] }
+	%struct.vv_t = type { <16 x i8> }
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (void (%struct.CGLSI*, i32, %struct.CGLDI*)* @lb to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @lb(%struct.CGLSI* %src, i32 %n, %struct.CGLDI* %dst) nounwind {
+entry:
+	%0 = load i32* null, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb.nph4945, label %return
+
+bb.nph4945:		; preds = %entry
+	%2 = bitcast [2 x %struct.vv_t]* null to i64*		; <i64*> [#uses=6]
+	%3 = getelementptr [2 x i64]* null, i32 0, i32 1		; <i64*> [#uses=6]
+	%4 = bitcast %struct.vv_t* null to i64*		; <i64*> [#uses=5]
+	%5 = getelementptr [2 x i64]* null, i32 0, i32 1		; <i64*> [#uses=3]
+	br label %bb2326
+
+bb2217:		; preds = %bb2326
+	%6 = or i64 0, 0		; <i64> [#uses=2]
+	%7 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%8 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%9 = getelementptr float* null, i32 2		; <float*> [#uses=1]
+	%10 = load float* %9, align 4		; <float> [#uses=1]
+	%11 = getelementptr float* null, i32 3		; <float*> [#uses=1]
+	%12 = load float* %11, align 4		; <float> [#uses=1]
+	%13 = fmul float %10, 6.553500e+04		; <float> [#uses=1]
+	%14 = fadd float %13, 5.000000e-01		; <float> [#uses=1]
+	%15 = fmul float %12, 6.553500e+04		; <float> [#uses=1]
+	%16 = fadd float %15, 5.000000e-01		; <float> [#uses=3]
+	%17 = fcmp olt float %14, 0.000000e+00		; <i1> [#uses=0]
+	%18 = fcmp olt float %16, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %18, label %bb2265, label %bb2262
+
+bb2262:		; preds = %bb2217
+	%19 = fcmp ogt float %16, 6.553500e+04		; <i1> [#uses=1]
+	br i1 %19, label %bb2264, label %bb2265
+
+bb2264:		; preds = %bb2262
+	br label %bb2265
+
+bb2265:		; preds = %bb2264, %bb2262, %bb2217
+	%f3596.0 = phi float [ 6.553500e+04, %bb2264 ], [ 0.000000e+00, %bb2217 ], [ %16, %bb2262 ]		; <float> [#uses=1]
+	%20 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%21 = fptosi float %f3596.0 to i32		; <i32> [#uses=1]
+	%22 = zext i32 %7 to i64		; <i64> [#uses=1]
+	%23 = shl i64 %22, 48		; <i64> [#uses=1]
+	%24 = zext i32 %8 to i64		; <i64> [#uses=1]
+	%25 = shl i64 %24, 32		; <i64> [#uses=1]
+	%26 = sext i32 %20 to i64		; <i64> [#uses=1]
+	%27 = shl i64 %26, 16		; <i64> [#uses=1]
+	%28 = sext i32 %21 to i64		; <i64> [#uses=1]
+	%29 = or i64 %25, %23		; <i64> [#uses=1]
+	%30 = or i64 %29, %27		; <i64> [#uses=1]
+	%31 = or i64 %30, %28		; <i64> [#uses=2]
+	%32 = shl i64 %6, 48		; <i64> [#uses=1]
+	%33 = shl i64 %31, 32		; <i64> [#uses=1]
+	%34 = and i64 %33, 281470681743360		; <i64> [#uses=1]
+	store i64 %6, i64* %2, align 16
+	store i64 %31, i64* %3, align 8
+	%35 = getelementptr i8* null, i32 0		; <i8*> [#uses=1]
+	%36 = bitcast i8* %35 to float*		; <float*> [#uses=4]
+	%37 = load float* %36, align 4		; <float> [#uses=1]
+	%38 = getelementptr float* %36, i32 1		; <float*> [#uses=1]
+	%39 = load float* %38, align 4		; <float> [#uses=1]
+	%40 = fmul float %37, 6.553500e+04		; <float> [#uses=1]
+	%41 = fadd float %40, 5.000000e-01		; <float> [#uses=1]
+	%42 = fmul float %39, 6.553500e+04		; <float> [#uses=1]
+	%43 = fadd float %42, 5.000000e-01		; <float> [#uses=3]
+	%44 = fcmp olt float %41, 0.000000e+00		; <i1> [#uses=0]
+	%45 = fcmp olt float %43, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %45, label %bb2277, label %bb2274
+
+bb2274:		; preds = %bb2265
+	%46 = fcmp ogt float %43, 6.553500e+04		; <i1> [#uses=0]
+	br label %bb2277
+
+bb2277:		; preds = %bb2274, %bb2265
+	%f1582.0 = phi float [ 0.000000e+00, %bb2265 ], [ %43, %bb2274 ]		; <float> [#uses=1]
+	%47 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%48 = fptosi float %f1582.0 to i32		; <i32> [#uses=1]
+	%49 = getelementptr float* %36, i32 2		; <float*> [#uses=1]
+	%50 = load float* %49, align 4		; <float> [#uses=1]
+	%51 = getelementptr float* %36, i32 3		; <float*> [#uses=1]
+	%52 = load float* %51, align 4		; <float> [#uses=1]
+	%53 = fmul float %50, 6.553500e+04		; <float> [#uses=1]
+	%54 = fadd float %53, 5.000000e-01		; <float> [#uses=1]
+	%55 = fmul float %52, 6.553500e+04		; <float> [#uses=1]
+	%56 = fadd float %55, 5.000000e-01		; <float> [#uses=1]
+	%57 = fcmp olt float %54, 0.000000e+00		; <i1> [#uses=0]
+	%58 = fcmp olt float %56, 0.000000e+00		; <i1> [#uses=0]
+	%59 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%60 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%61 = zext i32 %47 to i64		; <i64> [#uses=1]
+	%62 = shl i64 %61, 48		; <i64> [#uses=1]
+	%63 = zext i32 %48 to i64		; <i64> [#uses=1]
+	%64 = shl i64 %63, 32		; <i64> [#uses=1]
+	%65 = sext i32 %59 to i64		; <i64> [#uses=1]
+	%66 = shl i64 %65, 16		; <i64> [#uses=1]
+	%67 = sext i32 %60 to i64		; <i64> [#uses=1]
+	%68 = or i64 %64, %62		; <i64> [#uses=1]
+	%69 = or i64 %68, %66		; <i64> [#uses=1]
+	%70 = or i64 %69, %67		; <i64> [#uses=2]
+	%71 = getelementptr i8* null, i32 0		; <i8*> [#uses=1]
+	%72 = bitcast i8* %71 to float*		; <float*> [#uses=4]
+	%73 = load float* %72, align 4		; <float> [#uses=1]
+	%74 = getelementptr float* %72, i32 1		; <float*> [#uses=1]
+	%75 = load float* %74, align 4		; <float> [#uses=1]
+	%76 = fmul float %73, 6.553500e+04		; <float> [#uses=1]
+	%77 = fadd float %76, 5.000000e-01		; <float> [#uses=3]
+	%78 = fmul float %75, 6.553500e+04		; <float> [#uses=1]
+	%79 = fadd float %78, 5.000000e-01		; <float> [#uses=1]
+	%80 = fcmp olt float %77, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %80, label %bb2295, label %bb2292
+
+bb2292:		; preds = %bb2277
+	%81 = fcmp ogt float %77, 6.553500e+04		; <i1> [#uses=1]
+	br i1 %81, label %bb2294, label %bb2295
+
+bb2294:		; preds = %bb2292
+	br label %bb2295
+
+bb2295:		; preds = %bb2294, %bb2292, %bb2277
+	%f0569.0 = phi float [ 6.553500e+04, %bb2294 ], [ 0.000000e+00, %bb2277 ], [ %77, %bb2292 ]		; <float> [#uses=1]
+	%82 = fcmp olt float %79, 0.000000e+00		; <i1> [#uses=0]
+	%83 = fptosi float %f0569.0 to i32		; <i32> [#uses=1]
+	%84 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%85 = getelementptr float* %72, i32 2		; <float*> [#uses=1]
+	%86 = load float* %85, align 4		; <float> [#uses=1]
+	%87 = getelementptr float* %72, i32 3		; <float*> [#uses=1]
+	%88 = load float* %87, align 4		; <float> [#uses=1]
+	%89 = fmul float %86, 6.553500e+04		; <float> [#uses=1]
+	%90 = fadd float %89, 5.000000e-01		; <float> [#uses=1]
+	%91 = fmul float %88, 6.553500e+04		; <float> [#uses=1]
+	%92 = fadd float %91, 5.000000e-01		; <float> [#uses=1]
+	%93 = fcmp olt float %90, 0.000000e+00		; <i1> [#uses=0]
+	%94 = fcmp olt float %92, 0.000000e+00		; <i1> [#uses=0]
+	%95 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%96 = fptosi float 0.000000e+00 to i32		; <i32> [#uses=1]
+	%97 = zext i32 %83 to i64		; <i64> [#uses=1]
+	%98 = shl i64 %97, 48		; <i64> [#uses=1]
+	%99 = zext i32 %84 to i64		; <i64> [#uses=1]
+	%100 = shl i64 %99, 32		; <i64> [#uses=1]
+	%101 = sext i32 %95 to i64		; <i64> [#uses=1]
+	%102 = shl i64 %101, 16		; <i64> [#uses=1]
+	%103 = sext i32 %96 to i64		; <i64> [#uses=1]
+	%104 = or i64 %100, %98		; <i64> [#uses=1]
+	%105 = or i64 %104, %102		; <i64> [#uses=1]
+	%106 = or i64 %105, %103		; <i64> [#uses=2]
+	%107 = shl i64 %70, 16		; <i64> [#uses=1]
+	%108 = and i64 %107, 4294901760		; <i64> [#uses=1]
+	%109 = and i64 %106, 65535		; <i64> [#uses=1]
+	%110 = or i64 %34, %32		; <i64> [#uses=1]
+	%111 = or i64 %110, %108		; <i64> [#uses=1]
+	%112 = or i64 %111, %109		; <i64> [#uses=1]
+	store i64 %70, i64* %4, align 16
+	store i64 %106, i64* %5, align 8
+	%113 = icmp eq i64 %112, 0		; <i1> [#uses=1]
+	br i1 %113, label %bb2325, label %bb2315
+
+bb2315:		; preds = %bb2295
+	%114 = icmp eq %struct.xx_t* %159, null		; <i1> [#uses=1]
+	br i1 %114, label %bb2318, label %bb2317
+
+bb2317:		; preds = %bb2315
+	%115 = load i64* %2, align 16		; <i64> [#uses=1]
+	%116 = call i32 (...)* @_u16a_cm( i64 %115, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
+	%117 = sext i32 %116 to i64		; <i64> [#uses=1]
+	store i64 %117, i64* %2, align 16
+	%118 = load i64* %3, align 8		; <i64> [#uses=1]
+	%119 = call i32 (...)* @_u16a_cm( i64 %118, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
+	%120 = sext i32 %119 to i64		; <i64> [#uses=1]
+	store i64 %120, i64* %3, align 8
+	%121 = load i64* %4, align 16		; <i64> [#uses=1]
+	%122 = call i32 (...)* @_u16a_cm( i64 %121, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=1]
+	%123 = sext i32 %122 to i64		; <i64> [#uses=1]
+	store i64 %123, i64* %4, align 16
+	%124 = load i64* %5, align 8		; <i64> [#uses=1]
+	%125 = call i32 (...)* @_u16a_cm( i64 %124, %struct.xx_t* %159, double 0.000000e+00, double 1.047551e+06 ) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb2318:		; preds = %bb2315
+	%126 = getelementptr %struct.CGLSI* %src, i32 %indvar5021, i32 8		; <%struct.vv_t*> [#uses=1]
+	%127 = bitcast %struct.vv_t* %126 to i64*		; <i64*> [#uses=1]
+	%128 = load i64* %127, align 8		; <i64> [#uses=1]
+	%129 = trunc i64 %128 to i32		; <i32> [#uses=4]
+	%130 = load i64* %2, align 16		; <i64> [#uses=1]
+	%131 = call i32 (...)* @_u16_ff( i64 %130, i32 %129 ) nounwind		; <i32> [#uses=1]
+	%132 = sext i32 %131 to i64		; <i64> [#uses=1]
+	store i64 %132, i64* %2, align 16
+	%133 = load i64* %3, align 8		; <i64> [#uses=1]
+	%134 = call i32 (...)* @_u16_ff( i64 %133, i32 %129 ) nounwind		; <i32> [#uses=1]
+	%135 = sext i32 %134 to i64		; <i64> [#uses=1]
+	store i64 %135, i64* %3, align 8
+	%136 = load i64* %4, align 16		; <i64> [#uses=1]
+	%137 = call i32 (...)* @_u16_ff( i64 %136, i32 %129 ) nounwind		; <i32> [#uses=1]
+	%138 = sext i32 %137 to i64		; <i64> [#uses=1]
+	store i64 %138, i64* %4, align 16
+	%139 = load i64* %5, align 8		; <i64> [#uses=1]
+	%140 = call i32 (...)* @_u16_ff( i64 %139, i32 %129 ) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb2319:		; preds = %bb2326
+	%141 = getelementptr %struct.CGLSI* %src, i32 %indvar5021, i32 2		; <i8**> [#uses=1]
+	%142 = load i8** %141, align 4		; <i8*> [#uses=4]
+	%143 = getelementptr i8* %142, i32 0		; <i8*> [#uses=1]
+	%144 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %143 ) nounwind		; <i32> [#uses=1]
+	%145 = sext i32 %144 to i64		; <i64> [#uses=2]
+	%146 = getelementptr i8* %142, i32 0		; <i8*> [#uses=1]
+	%147 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %146 ) nounwind		; <i32> [#uses=1]
+	%148 = sext i32 %147 to i64		; <i64> [#uses=2]
+	%149 = shl i64 %145, 48		; <i64> [#uses=0]
+	%150 = shl i64 %148, 32		; <i64> [#uses=1]
+	%151 = and i64 %150, 281470681743360		; <i64> [#uses=0]
+	store i64 %145, i64* %2, align 16
+	store i64 %148, i64* %3, align 8
+	%152 = getelementptr i8* %142, i32 0		; <i8*> [#uses=1]
+	%153 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %152 ) nounwind		; <i32> [#uses=1]
+	%154 = sext i32 %153 to i64		; <i64> [#uses=0]
+	%155 = getelementptr i8* %142, i32 0		; <i8*> [#uses=1]
+	%156 = call i32 (...)* @_u16_sf32( double 0.000000e+00, double 6.553500e+04, double 5.000000e-01, i8* %155 ) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb2325:		; preds = %bb2326, %bb2295
+	%indvar.next5145 = add i32 %indvar5021, 1		; <i32> [#uses=1]
+	br label %bb2326
+
+bb2326:		; preds = %bb2325, %bb.nph4945
+	%indvar5021 = phi i32 [ 0, %bb.nph4945 ], [ %indvar.next5145, %bb2325 ]		; <i32> [#uses=6]
+	%157 = icmp slt i32 %indvar5021, %n		; <i1> [#uses=0]
+	%158 = getelementptr %struct.CGLSI* %src, i32 %indvar5021, i32 10		; <%struct.xx_t**> [#uses=1]
+	%159 = load %struct.xx_t** %158, align 4		; <%struct.xx_t*> [#uses=5]
+	%160 = getelementptr %struct.CGLSI* %src, i32 %indvar5021, i32 1		; <i32*> [#uses=1]
+	%161 = load i32* %160, align 4		; <i32> [#uses=1]
+	%162 = and i32 %161, 255		; <i32> [#uses=1]
+	switch i32 %162, label %bb2325 [
+		 i32 59, label %bb2217
+		 i32 60, label %bb2319
+	]
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i32 @_u16_ff(...)
+
+declare i32 @_u16a_cm(...)
+
+declare i32 @_u16_sf32(...)
diff --git a/final/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll b/final/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll
new file mode 100644
index 00000000000..91c36efc522
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-10-17-AsmMatchingOperands.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s
+; XFAIL: *
+; PR2356
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9"
+
+define i32 @test(i64 %x, i32* %p) nounwind {
+	%asmtmp = call i32 asm "", "=r,0"(i64 0) nounwind		; <i32> [#uses=0]
+	%y = add i32 %asmtmp, 1
+	ret i32 %y
+}
diff --git a/final/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll b/final/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
new file mode 100644
index 00000000000..f474a6d7cc2
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-10-28-UnprocessedNode.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=ppc64
+
+define void @__divtc3({ ppc_fp128, ppc_fp128 }* noalias sret %agg.result, ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind {
+entry:
+        %imag59 = load ppc_fp128* null, align 8         ; <ppc_fp128> [#uses=1]
+        %0 = fmul ppc_fp128 0xM00000000000000000000000000000000, %imag59         ; <ppc_fp128> [#uses=1]
+        %1 = fmul ppc_fp128 0xM00000000000000000000000000000000, 0xM00000000000000000000000000000000             ; <ppc_fp128> [#uses=1]
+        %2 = fadd ppc_fp128 %0, %1               ; <ppc_fp128> [#uses=1]
+        store ppc_fp128 %2, ppc_fp128* null, align 16
+        unreachable
+}
diff --git a/final/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll b/final/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
new file mode 100644
index 00000000000..f4c06fba6df
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-10-28-f128-i32.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=ppc32 -o - | not grep fixunstfsi
+
+define i64 @__fixunstfdi(ppc_fp128 %a) nounwind readnone {
+entry:
+	%0 = fcmp olt ppc_fp128 %a, 0xM00000000000000000000000000000000		; <i1> [#uses=1]
+	br i1 %0, label %bb5, label %bb1
+
+bb1:		; preds = %entry
+	%1 = fmul ppc_fp128 %a, 0xM3DF00000000000000000000000000000		; <ppc_fp128> [#uses=1]
+	%2 = fptoui ppc_fp128 %1 to i32		; <i32> [#uses=1]
+	%3 = zext i32 %2 to i64		; <i64> [#uses=1]
+	%4 = shl i64 %3, 32		; <i64> [#uses=3]
+	%5 = uitofp i64 %4 to ppc_fp128		; <ppc_fp128> [#uses=1]
+	%6 = fsub ppc_fp128 %a, %5		; <ppc_fp128> [#uses=3]
+	%7 = fcmp olt ppc_fp128 %6, 0xM00000000000000000000000000000000		; <i1> [#uses=1]
+	br i1 %7, label %bb2, label %bb3
+
+bb2:		; preds = %bb1
+	%8 = fsub ppc_fp128 0xM80000000000000000000000000000000, %6		; <ppc_fp128> [#uses=1]
+	%9 = fptoui ppc_fp128 %8 to i32		; <i32> [#uses=1]
+	%10 = zext i32 %9 to i64		; <i64> [#uses=1]
+	%11 = sub i64 %4, %10		; <i64> [#uses=1]
+	ret i64 %11
+
+bb3:		; preds = %bb1
+	%12 = fptoui ppc_fp128 %6 to i32		; <i32> [#uses=1]
+	%13 = zext i32 %12 to i64		; <i64> [#uses=1]
+	%14 = or i64 %13, %4		; <i64> [#uses=1]
+	ret i64 %14
+
+bb5:		; preds = %entry
+	ret i64 0
+}
diff --git a/final/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll b/final/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
new file mode 100644
index 00000000000..83f3f6f8a76
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc32
+; PR2986
+@argc = external global i32		; <i32*> [#uses=1]
+@buffer = external global [32 x i8], align 4		; <[32 x i8]*> [#uses=1]
+
+define void @test1() nounwind noinline {
+entry:
+	%0 = load i32* @argc, align 4		; <i32> [#uses=1]
+	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
+	tail call void @llvm.memset.i32(i8* getelementptr ([32 x i8]* @buffer, i32 0, i32 0), i8 %1, i32 17, i32 4)
+	unreachable
+}
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
diff --git a/final/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll b/final/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
new file mode 100644
index 00000000000..8322a843081
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-10-31-PPCF128Libcalls.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s
+; PR2988
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin10.0"
+@a = common global ppc_fp128 0xM00000000000000000000000000000000, align 16		; <ppc_fp128*> [#uses=2]
+@b = common global ppc_fp128 0xM00000000000000000000000000000000, align 16		; <ppc_fp128*> [#uses=2]
+@c = common global ppc_fp128 0xM00000000000000000000000000000000, align 16		; <ppc_fp128*> [#uses=3]
+@d = common global ppc_fp128 0xM00000000000000000000000000000000, align 16		; <ppc_fp128*> [#uses=2]
+
+define void @foo() nounwind {
+entry:
+	%0 = load ppc_fp128* @a, align 16		; <ppc_fp128> [#uses=1]
+	%1 = call ppc_fp128 @llvm.sqrt.ppcf128(ppc_fp128 %0)		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %1, ppc_fp128* @a, align 16
+	%2 = load ppc_fp128* @b, align 16		; <ppc_fp128> [#uses=1]
+	%3 = call ppc_fp128 @"\01_sinl$LDBL128"(ppc_fp128 %2) nounwind readonly		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %3, ppc_fp128* @b, align 16
+	%4 = load ppc_fp128* @c, align 16		; <ppc_fp128> [#uses=1]
+	%5 = call ppc_fp128 @"\01_cosl$LDBL128"(ppc_fp128 %4) nounwind readonly		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %5, ppc_fp128* @c, align 16
+	%6 = load ppc_fp128* @d, align 16		; <ppc_fp128> [#uses=1]
+	%7 = load ppc_fp128* @c, align 16		; <ppc_fp128> [#uses=1]
+	%8 = call ppc_fp128 @llvm.pow.ppcf128(ppc_fp128 %6, ppc_fp128 %7)		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %8, ppc_fp128* @d, align 16
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare ppc_fp128 @llvm.sqrt.ppcf128(ppc_fp128) nounwind readonly
+
+declare ppc_fp128 @"\01_sinl$LDBL128"(ppc_fp128) nounwind readonly
+
+declare ppc_fp128 @"\01_cosl$LDBL128"(ppc_fp128) nounwind readonly
+
+declare ppc_fp128 @llvm.pow.ppcf128(ppc_fp128, ppc_fp128) nounwind readonly
+
+declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128)
+
+define ppc_fp128 @cs(ppc_fp128 %from, ppc_fp128 %to) {
+  %tmp = call ppc_fp128 @copysignl(ppc_fp128 %from, ppc_fp128 %to)
+  ret ppc_fp128 %tmp
+}
diff --git a/final/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll b/final/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll
new file mode 100644
index 00000000000..9ed7f6f82dc
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-12-02-LegalizeTypeAssert.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin9.5
+
+define void @__multc3({ ppc_fp128, ppc_fp128 }* noalias sret %agg.result, ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c, ppc_fp128 %d) nounwind {
+entry:
+	%.pre139 = and i1 false, false		; <i1> [#uses=1]
+	br i1 false, label %bb6, label %bb21
+
+bb6:		; preds = %entry
+	%0 = tail call ppc_fp128 @copysignl(ppc_fp128 0xM00000000000000000000000000000000, ppc_fp128 %a) nounwind readnone		; <ppc_fp128> [#uses=0]
+	%iftmp.1.0 = select i1 %.pre139, ppc_fp128 0xM3FF00000000000000000000000000000, ppc_fp128 0xM00000000000000000000000000000000		; <ppc_fp128> [#uses=1]
+	%1 = tail call ppc_fp128 @copysignl(ppc_fp128 %iftmp.1.0, ppc_fp128 %b) nounwind readnone		; <ppc_fp128> [#uses=0]
+	unreachable
+
+bb21:		; preds = %entry
+	unreachable
+}
+
+declare ppc_fp128 @copysignl(ppc_fp128, ppc_fp128) nounwind readnone
diff --git a/final/test/CodeGen/PowerPC/2008-12-12-EH.ll b/final/test/CodeGen/PowerPC/2008-12-12-EH.ll
new file mode 100644
index 00000000000..2315e36ff46
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2008-12-12-EH.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s  -march=ppc32 -mtriple=powerpc-apple-darwin9 | grep ^__Z1fv.eh
+
+define void @_Z1fv() {
+entry:
+	br label %return
+
+return:
+	ret void
+}
diff --git a/final/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll b/final/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll
new file mode 100644
index 00000000000..d49d58deeaf
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2009-01-16-DeclareISelBug.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9.5
+; rdar://6499616
+
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str = internal constant [11 x i8] c"testcase.c\00"		; <[11 x i8]*> [#uses=1]
+@.str1 = internal constant [30 x i8] c"/Volumes/SandBox/NightlyTest/\00"		; <[30 x i8]*> [#uses=1]
+@.str2 = internal constant [57 x i8] c"4.2.1 (Based on Apple Inc. build 5628) (LLVM build 9999)\00"		; <[57 x i8]*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([11 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([30 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([57 x i8]* @.str2, i32 0, i32 0) }		; <%llvm.dbg.compile_unit.type*> [#uses=0]
+@"\01LC" = internal constant [13 x i8] c"conftest.val\00"		; <[13 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+	%0 = call i8* @fopen(i8* getelementptr ([13 x i8]* @"\01LC", i32 0, i32 0), i8* null) nounwind		; <i8*> [#uses=0]
+	unreachable
+}
+
+declare i8* @fopen(i8*, i8*)
diff --git a/final/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll b/final/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll
new file mode 100644
index 00000000000..172531e5db4
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2009-03-17-LSRBug.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin10
+; rdar://6692215
+
+define fastcc void @_qsort(i8* %a, i32 %n, i32 %es, i32 (i8*, i8*)* %cmp, i32 %depth_limit) nounwind optsize ssp {
+entry:
+	br i1 false, label %bb21, label %bb20.loopexit
+
+bb20.loopexit:		; preds = %entry
+	ret void
+
+bb21:		; preds = %entry
+	%0 = getelementptr i8* %a, i32 0		; <i8*> [#uses=2]
+	br label %bb35
+
+bb29:		; preds = %bb35
+	br i1 false, label %bb7.i252, label %bb34
+
+bb7.i252:		; preds = %bb7.i252, %bb29
+	%pj.0.rec.i247 = phi i32 [ %indvar.next488, %bb7.i252 ], [ 0, %bb29 ]		; <i32> [#uses=2]
+	%pi.0.i248 = getelementptr i8* %pa.1, i32 %pj.0.rec.i247		; <i8*> [#uses=0]
+	%indvar.next488 = add i32 %pj.0.rec.i247, 1		; <i32> [#uses=1]
+	br i1 false, label %bb34, label %bb7.i252
+
+bb34:		; preds = %bb7.i252, %bb29
+	%indvar.next505 = add i32 %indvar504, 1		; <i32> [#uses=1]
+	br label %bb35
+
+bb35:		; preds = %bb34, %bb21
+	%indvar504 = phi i32 [ %indvar.next505, %bb34 ], [ 0, %bb21 ]		; <i32> [#uses=2]
+	%pa.1 = phi i8* [ null, %bb34 ], [ %0, %bb21 ]		; <i8*> [#uses=2]
+	%pb.0.rec = mul i32 %indvar504, %es		; <i32> [#uses=1]
+	br i1 false, label %bb43, label %bb29
+
+bb43:		; preds = %bb43, %bb35
+	br i1 false, label %bb50, label %bb43
+
+bb50:		; preds = %bb43
+	%1 = ptrtoint i8* %pa.1 to i32		; <i32> [#uses=1]
+	%2 = sub i32 %1, 0		; <i32> [#uses=2]
+	%3 = icmp sle i32 0, %2		; <i1> [#uses=1]
+	%min = select i1 %3, i32 0, i32 %2		; <i32> [#uses=1]
+	br label %bb7.i161
+
+bb7.i161:		; preds = %bb7.i161, %bb50
+	%pj.0.rec.i156 = phi i32 [ %indvar.next394, %bb7.i161 ], [ 0, %bb50 ]		; <i32> [#uses=2]
+	%.sum279 = sub i32 %pj.0.rec.i156, %min		; <i32> [#uses=1]
+	%pb.0.sum542 = add i32 %pb.0.rec, %.sum279		; <i32> [#uses=1]
+	%pj.0.i158 = getelementptr i8* %0, i32 %pb.0.sum542		; <i8*> [#uses=0]
+	%indvar.next394 = add i32 %pj.0.rec.i156, 1		; <i32> [#uses=1]
+	br label %bb7.i161
+}
diff --git a/final/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll b/final/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll
new file mode 100644
index 00000000000..29d115dc6a4
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2009-05-28-LegalizeBRCC.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin10
+; PR4280
+
+define i32 @__fixunssfsi(float %a) nounwind readnone {
+entry:
+	%0 = fcmp ult float %a, 0x41E0000000000000		; <i1> [#uses=1]
+	br i1 %0, label %bb1, label %bb
+
+bb:		; preds = %entry
+	ret i32 1
+
+bb1:		; preds = %entry
+	ret i32 0
+}
+
diff --git a/final/test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll b/final/test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll
new file mode 100644
index 00000000000..f64e3dcf732
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2009-07-16-InlineAsm-M-Operand.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=ppc32 -verify-machineinstrs
+
+; Machine code verifier will call isRegTiedToDefOperand() on /all/ register use
+; operands.  We must make sure that the operand flag is found correctly.
+
+; This test case is actually not specific to PowerPC, but the (imm, reg) format
+; of PowerPC "m" operands trigger this bug.
+
+define void @memory_asm_operand(i32 %a) {
+  ; "m" operand will be represented as:
+  ; INLINEASM <es:fake $0>, 10, %R2, 20, -4, %R1
+  ; It is difficult to find the flag operand (20) when starting from %R1
+  call i32 asm "lbzx $0, $1", "=r,m" (i32 %a)
+  ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll b/final/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
new file mode 100644
index 00000000000..6a3c440bc9e
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2009-08-17-inline-asm-addr-mode-breakage.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin10 -mcpu=g5 | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin10.0"
+; It is wrong on powerpc to substitute reg+reg for $0; the stw opcode
+; would have to change.
+
+@x = external global [0 x i32]                    ; <[0 x i32]*> [#uses=1]
+
+define void @foo(i32 %y) nounwind ssp {
+entry:
+; CHECK: foo
+; CHECK: add r3
+; CHECK: 0(r3)
+  %y_addr = alloca i32                            ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 %y, i32* %y_addr
+  %0 = load i32* %y_addr, align 4                 ; <i32> [#uses=1]
+  %1 = getelementptr inbounds [0 x i32]* @x, i32 0, i32 %0 ; <i32*> [#uses=1]
+  call void asm sideeffect "isync\0A\09eieio\0A\09stw $1, $0", "=*o,r,~{memory}"(i32* %1, i32 0) nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/final/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll b/final/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll
new file mode 100644
index 00000000000..0bde2d517b1
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 -mtriple=ppc-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private_weak hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
diff --git a/final/test/CodeGen/PowerPC/2009-09-18-carrybit.ll b/final/test/CodeGen/PowerPC/2009-09-18-carrybit.ll
new file mode 100644
index 00000000000..6c23a6162c9
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2009-09-18-carrybit.ll
@@ -0,0 +1,62 @@
+; RUN: llc -march=ppc32 < %s | FileCheck %s
+; ModuleID = '<stdin>'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9.6"
+
+define i64 @foo(i64 %r.0.ph, i64 %q.0.ph, i32 %sr1.1.ph) nounwind {
+entry:
+; CHECK: foo:
+; CHECK: subfc
+; CHECK: subfe
+; CHECK: subfc
+; CHECK: subfe
+  %tmp0 = add i64 %r.0.ph, -1                           ; <i64> [#uses=1]
+  br label %bb40
+
+bb40:                                             ; preds = %bb40, %entry
+  %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb40 ] ; <i32> [#uses=1]
+  %carry.0274 = phi i32 [ 0, %entry ], [%tmp122, %bb40 ] ; <i32> [#uses=1]
+  %r.0273 = phi i64 [ %r.0.ph, %entry ], [ %tmp124, %bb40 ] ; <i64> [#uses=2]
+  %q.0272 = phi i64 [ %q.0.ph, %entry ], [ %ins169, %bb40 ] ; <i64> [#uses=3]
+  %tmp1 = lshr i64 %r.0273, 31                     ; <i64> [#uses=1]
+  %tmp2 = trunc i64 %tmp1 to i32                    ; <i32> [#uses=1]
+  %tmp3 = and i32 %tmp2, -2                         ; <i32> [#uses=1]
+  %tmp213 = trunc i64 %r.0273 to i32              ; <i32> [#uses=2]
+  %tmp106 = lshr i32 %tmp213, 31                     ; <i32> [#uses=1]
+  %tmp107 = or i32 %tmp3, %tmp106                        ; <i32> [#uses=1]
+  %tmp215 = zext i32 %tmp107 to i64                  ; <i64> [#uses=1]
+  %tmp216 = shl i64 %tmp215, 32                   ; <i64> [#uses=1]
+  %tmp108 = shl i32 %tmp213, 1                       ; <i32> [#uses=1]
+  %tmp109 = lshr i64 %q.0272, 63                     ; <i64> [#uses=1]
+  %tmp110 = trunc i64 %tmp109 to i32                    ; <i32> [#uses=1]
+  %tmp111 = or i32 %tmp108, %tmp110                        ; <i32> [#uses=1]
+  %tmp222 = zext i32 %tmp111 to i64                  ; <i64> [#uses=1]
+  %ins224 = or i64 %tmp216, %tmp222               ; <i64> [#uses=2]
+  %tmp112 = lshr i64 %q.0272, 31                     ; <i64> [#uses=1]
+  %tmp113 = trunc i64 %tmp112 to i32                    ; <i32> [#uses=1]
+  %tmp114 = and i32 %tmp113, -2                         ; <i32> [#uses=1]
+  %tmp158 = trunc i64 %q.0272 to i32              ; <i32> [#uses=2]
+  %tmp115 = lshr i32 %tmp158, 31                     ; <i32> [#uses=1]
+  %tmp116 = or i32 %tmp114, %tmp115                        ; <i32> [#uses=1]
+  %tmp160 = zext i32 %tmp116 to i64                  ; <i64> [#uses=1]
+  %tmp161 = shl i64 %tmp160, 32                   ; <i64> [#uses=1]
+  %tmp117 = shl i32 %tmp158, 1                       ; <i32> [#uses=1]
+  %tmp118 = or i32 %tmp117, %carry.0274                 ; <i32> [#uses=1]
+  %tmp167 = zext i32 %tmp118 to i64                  ; <i64> [#uses=1]
+  %ins169 = or i64 %tmp161, %tmp167               ; <i64> [#uses=2]
+  %tmp119 = sub i64 %tmp0, %ins224                    ; <i64> [#uses=1]
+  %tmp120 = ashr i64 %tmp119, 63                        ; <i64> [#uses=2]
+  %tmp121 = trunc i64 %tmp120 to i32                    ; <i32> [#uses=1]
+  %tmp122 = and i32 %tmp121, 1                          ; <i32> [#uses=2]
+  %tmp123 = and i64 %tmp120, %q.0.ph                         ; <i64> [#uses=1]
+  %tmp124 = sub i64 %ins224, %tmp123                    ; <i64> [#uses=2]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %sr1.1.ph ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb41.bb42_crit_edge, label %bb40
+
+bb41.bb42_crit_edge:                              ; preds = %bb40
+  %phitmp278 = zext i32 %tmp122 to i64               ; <i64> [#uses=1]
+  %tmp125 = shl i64 %ins169, 1                    ; <i64> [#uses=1]
+  %tmp126 = or i64 %phitmp278, %tmp125              ; <i64> [#uses=2]
+  ret i64 %tmp126
+}
diff --git a/final/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll b/final/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll
new file mode 100644
index 00000000000..2d9d16ae6d8
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2009-11-15-ProcImpDefsBug.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8
+
+define void @gcov_exit() nounwind {
+entry:
+  br i1 undef, label %return, label %bb.nph341
+
+bb.nph341:                                        ; preds = %entry
+  br label %bb25
+
+bb25:                                             ; preds = %read_fatal, %bb.nph341
+  br i1 undef, label %bb49.1, label %bb48
+
+bb48:                                             ; preds = %bb25
+  br label %bb49.1
+
+bb51:                                             ; preds = %bb48.4, %bb49.3
+  switch i32 undef, label %bb58 [
+    i32 0, label %rewrite
+    i32 1734567009, label %bb59
+  ]
+
+bb58:                                             ; preds = %bb51
+  br label %read_fatal
+
+bb59:                                             ; preds = %bb51
+  br i1 undef, label %bb60, label %bb3.i156
+
+bb3.i156:                                         ; preds = %bb59
+  br label %read_fatal
+
+bb60:                                             ; preds = %bb59
+  br i1 undef, label %bb78.preheader, label %rewrite
+
+bb78.preheader:                                   ; preds = %bb60
+  br i1 undef, label %bb62, label %bb80
+
+bb62:                                             ; preds = %bb78.preheader
+  br i1 undef, label %bb64, label %read_mismatch
+
+bb64:                                             ; preds = %bb62
+  br i1 undef, label %bb65, label %read_mismatch
+
+bb65:                                             ; preds = %bb64
+  br i1 undef, label %bb75, label %read_mismatch
+
+read_mismatch:                                    ; preds = %bb98, %bb119.preheader, %bb72, %bb71, %bb65, %bb64, %bb62
+  br label %read_fatal
+
+bb71:                                             ; preds = %bb75
+  br i1 undef, label %bb72, label %read_mismatch
+
+bb72:                                             ; preds = %bb71
+  br i1 undef, label %bb73, label %read_mismatch
+
+bb73:                                             ; preds = %bb72
+  unreachable
+
+bb74:                                             ; preds = %bb75
+  br label %bb75
+
+bb75:                                             ; preds = %bb74, %bb65
+  br i1 undef, label %bb74, label %bb71
+
+bb80:                                             ; preds = %bb78.preheader
+  unreachable
+
+read_fatal:                                       ; preds = %read_mismatch, %bb3.i156, %bb58
+  br i1 undef, label %return, label %bb25
+
+rewrite:                                          ; preds = %bb60, %bb51
+  br i1 undef, label %bb94, label %bb119.preheader
+
+bb94:                                             ; preds = %rewrite
+  unreachable
+
+bb119.preheader:                                  ; preds = %rewrite
+  br i1 undef, label %read_mismatch, label %bb98
+
+bb98:                                             ; preds = %bb119.preheader
+  br label %read_mismatch
+
+return:                                           ; preds = %read_fatal, %entry
+  ret void
+
+bb49.1:                                           ; preds = %bb48, %bb25
+  br i1 undef, label %bb49.2, label %bb48.2
+
+bb49.2:                                           ; preds = %bb48.2, %bb49.1
+  br i1 undef, label %bb49.3, label %bb48.3
+
+bb48.2:                                           ; preds = %bb49.1
+  br label %bb49.2
+
+bb49.3:                                           ; preds = %bb48.3, %bb49.2
+  %c_ix.0.3 = phi i32 [ undef, %bb48.3 ], [ undef, %bb49.2 ] ; <i32> [#uses=1]
+  br i1 undef, label %bb51, label %bb48.4
+
+bb48.3:                                           ; preds = %bb49.2
+  store i64* undef, i64** undef, align 4
+  br label %bb49.3
+
+bb48.4:                                           ; preds = %bb49.3
+  %0 = getelementptr inbounds [5 x i64*]* undef, i32 0, i32 %c_ix.0.3 ; <i64**> [#uses=0]
+  br label %bb51
+}
diff --git a/final/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll b/final/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll
new file mode 100644
index 00000000000..54f4b2ef689
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll
@@ -0,0 +1,155 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__gcov_var = type { %struct.FILE*, i32, i32, i32, i32, i32, i32, [1025 x i32] }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+%struct.gcov_ctr_info = type { i32, i64*, void (i64*, i32)* }
+%struct.gcov_ctr_summary = type { i32, i32, i64, i64, i64 }
+%struct.gcov_fn_info = type { i32, i32, [0 x i32] }
+%struct.gcov_info = type { i32, %struct.gcov_info*, i32, i8*, i32, %struct.gcov_fn_info*, i32, [0 x %struct.gcov_ctr_info] }
+%struct.gcov_summary = type { i32, [1 x %struct.gcov_ctr_summary] }
+
+@__gcov_var = external global %struct.__gcov_var  ; <%struct.__gcov_var*> [#uses=1]
+@__sF = external global [0 x %struct.FILE]        ; <[0 x %struct.FILE]*> [#uses=1]
+@.str = external constant [56 x i8], align 4      ; <[56 x i8]*> [#uses=1]
+@gcov_list = external global %struct.gcov_info*   ; <%struct.gcov_info**> [#uses=1]
+@.str7 = external constant [35 x i8], align 4     ; <[35 x i8]*> [#uses=1]
+@.str8 = external constant [9 x i8], align 4      ; <[9 x i8]*> [#uses=1]
+@.str9 = external constant [10 x i8], align 4     ; <[10 x i8]*> [#uses=1]
+@.str10 = external constant [36 x i8], align 4    ; <[36 x i8]*> [#uses=1]
+
+declare i32 @"\01_fprintf$LDBL128"(%struct.FILE*, i8*, ...) nounwind
+
+define void @gcov_exit() nounwind {
+entry:
+  %gi_ptr.0357 = load %struct.gcov_info** @gcov_list, align 4 ; <%struct.gcov_info*> [#uses=1]
+  %0 = alloca i8, i32 undef, align 1              ; <i8*> [#uses=3]
+  br i1 undef, label %return, label %bb.nph341
+
+bb.nph341:                                        ; preds = %entry
+  %object27 = bitcast %struct.gcov_summary* undef to i8* ; <i8*> [#uses=1]
+  br label %bb25
+
+bb25:                                             ; preds = %read_fatal, %bb.nph341
+  %gi_ptr.1329 = phi %struct.gcov_info* [ %gi_ptr.0357, %bb.nph341 ], [ undef, %read_fatal ] ; <%struct.gcov_info*> [#uses=1]
+  call void @llvm.memset.i32(i8* %object27, i8 0, i32 36, i32 8)
+  br i1 undef, label %bb49.1, label %bb48
+
+bb48:                                             ; preds = %bb25
+  br label %bb49.1
+
+bb51:                                             ; preds = %bb48.4, %bb49.3
+  switch i32 undef, label %bb58 [
+    i32 0, label %rewrite
+    i32 1734567009, label %bb59
+  ]
+
+bb58:                                             ; preds = %bb51
+  %1 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([35 x i8]* @.str7, i32 0, i32 0), i8* %0) nounwind ; <i32> [#uses=0]
+  br label %read_fatal
+
+bb59:                                             ; preds = %bb51
+  br i1 undef, label %bb60, label %bb3.i156
+
+bb3.i156:                                         ; preds = %bb59
+  store i8 52, i8* undef, align 1
+  store i8 42, i8* undef, align 1
+  %2 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([56 x i8]* @.str, i32 0, i32 0), i8* %0, i8* undef, i8* undef) nounwind ; <i32> [#uses=0]
+  br label %read_fatal
+
+bb60:                                             ; preds = %bb59
+  br i1 undef, label %bb78.preheader, label %rewrite
+
+bb78.preheader:                                   ; preds = %bb60
+  br i1 undef, label %bb62, label %bb80
+
+bb62:                                             ; preds = %bb78.preheader
+  br i1 undef, label %bb64, label %read_mismatch
+
+bb64:                                             ; preds = %bb62
+  br i1 undef, label %bb65, label %read_mismatch
+
+bb65:                                             ; preds = %bb64
+  br i1 undef, label %bb75, label %read_mismatch
+
+read_mismatch:                                    ; preds = %bb98, %bb119.preheader, %bb72, %bb71, %bb65, %bb64, %bb62
+  %3 = icmp eq i32 undef, -1                      ; <i1> [#uses=1]
+  %iftmp.11.0 = select i1 %3, i8* getelementptr inbounds ([10 x i8]* @.str9, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* @.str8, i32 0, i32 0) ; <i8*> [#uses=1]
+  %4 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([36 x i8]* @.str10, i32 0, i32 0), i8* %0, i8* %iftmp.11.0) nounwind ; <i32> [#uses=0]
+  br label %read_fatal
+
+bb71:                                             ; preds = %bb75
+  %5 = load i32* undef, align 4                   ; <i32> [#uses=1]
+  %6 = getelementptr inbounds %struct.gcov_info* %gi_ptr.1329, i32 0, i32 7, i32 undef, i32 2 ; <void (i64*, i32)**> [#uses=1]
+  %7 = load void (i64*, i32)** %6, align 4        ; <void (i64*, i32)*> [#uses=1]
+  %8 = call i32 @__gcov_read_unsigned() nounwind  ; <i32> [#uses=1]
+  %9 = call i32 @__gcov_read_unsigned() nounwind  ; <i32> [#uses=1]
+  %10 = icmp eq i32 %tmp386, %8                   ; <i1> [#uses=1]
+  br i1 %10, label %bb72, label %read_mismatch
+
+bb72:                                             ; preds = %bb71
+  %11 = icmp eq i32 undef, %9                     ; <i1> [#uses=1]
+  br i1 %11, label %bb73, label %read_mismatch
+
+bb73:                                             ; preds = %bb72
+  call void %7(i64* null, i32 %5) nounwind
+  unreachable
+
+bb74:                                             ; preds = %bb75
+  %12 = add i32 %13, 1                            ; <i32> [#uses=1]
+  br label %bb75
+
+bb75:                                             ; preds = %bb74, %bb65
+  %13 = phi i32 [ %12, %bb74 ], [ 0, %bb65 ]      ; <i32> [#uses=2]
+  %tmp386 = add i32 0, 27328512                   ; <i32> [#uses=1]
+  %14 = shl i32 1, %13                            ; <i32> [#uses=1]
+  %15 = load i32* undef, align 4                  ; <i32> [#uses=1]
+  %16 = and i32 %15, %14                          ; <i32> [#uses=1]
+  %17 = icmp eq i32 %16, 0                        ; <i1> [#uses=1]
+  br i1 %17, label %bb74, label %bb71
+
+bb80:                                             ; preds = %bb78.preheader
+  unreachable
+
+read_fatal:                                       ; preds = %read_mismatch, %bb3.i156, %bb58
+  br i1 undef, label %return, label %bb25
+
+rewrite:                                          ; preds = %bb60, %bb51
+  store i32 -1, i32* getelementptr inbounds (%struct.__gcov_var* @__gcov_var, i32 0, i32 6), align 4
+  br i1 undef, label %bb94, label %bb119.preheader
+
+bb94:                                             ; preds = %rewrite
+  unreachable
+
+bb119.preheader:                                  ; preds = %rewrite
+  br i1 undef, label %read_mismatch, label %bb98
+
+bb98:                                             ; preds = %bb119.preheader
+  br label %read_mismatch
+
+return:                                           ; preds = %read_fatal, %entry
+  ret void
+
+bb49.1:                                           ; preds = %bb48, %bb25
+  br i1 undef, label %bb49.2, label %bb48.2
+
+bb49.2:                                           ; preds = %bb48.2, %bb49.1
+  br i1 undef, label %bb49.3, label %bb48.3
+
+bb48.2:                                           ; preds = %bb49.1
+  br label %bb49.2
+
+bb49.3:                                           ; preds = %bb48.3, %bb49.2
+  br i1 undef, label %bb51, label %bb48.4
+
+bb48.3:                                           ; preds = %bb49.2
+  br label %bb49.3
+
+bb48.4:                                           ; preds = %bb49.3
+  br label %bb51
+}
+
+declare i32 @__gcov_read_unsigned() nounwind
+
+declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind
diff --git a/final/test/CodeGen/PowerPC/2009-11-25-ImpDefBug.ll b/final/test/CodeGen/PowerPC/2009-11-25-ImpDefBug.ll
new file mode 100644
index 00000000000..9a22a6f76c2
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2009-11-25-ImpDefBug.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9.5 -mcpu=g5
+; rdar://7422268
+
+%struct..0EdgeT = type { i32, i32, float, float, i32, i32, i32, float, i32, i32 }
+
+define void @smooth_color_z_triangle(i32 %v0, i32 %v1, i32 %v2, i32 %pv) nounwind {
+entry:
+  br i1 undef, label %return, label %bb14
+
+bb14:                                             ; preds = %entry
+  br i1 undef, label %bb15, label %return
+
+bb15:                                             ; preds = %bb14
+  br i1 undef, label %bb16, label %bb17
+
+bb16:                                             ; preds = %bb15
+  br label %bb17
+
+bb17:                                             ; preds = %bb16, %bb15
+  %0 = fcmp olt float undef, 0.000000e+00         ; <i1> [#uses=2]
+  %eTop.eMaj = select i1 %0, %struct..0EdgeT* undef, %struct..0EdgeT* null ; <%struct..0EdgeT*> [#uses=1]
+  br label %bb69
+
+bb24:                                             ; preds = %bb69
+  br i1 undef, label %bb25, label %bb28
+
+bb25:                                             ; preds = %bb24
+  br label %bb33
+
+bb28:                                             ; preds = %bb24
+  br i1 undef, label %return, label %bb32
+
+bb32:                                             ; preds = %bb28
+  br i1 %0, label %bb38, label %bb33
+
+bb33:                                             ; preds = %bb32, %bb25
+  br i1 undef, label %bb34, label %bb38
+
+bb34:                                             ; preds = %bb33
+  br label %bb38
+
+bb38:                                             ; preds = %bb34, %bb33, %bb32
+  %eRight.08 = phi %struct..0EdgeT* [ %eTop.eMaj, %bb32 ], [ undef, %bb34 ], [ undef, %bb33 ] ; <%struct..0EdgeT*> [#uses=0]
+  %fdgOuter.0 = phi i32 [ %fdgOuter.1, %bb32 ], [ undef, %bb34 ], [ %fdgOuter.1, %bb33 ] ; <i32> [#uses=1]
+  %fz.3 = phi i32 [ %fz.2, %bb32 ], [ 2147483647, %bb34 ], [ %fz.2, %bb33 ] ; <i32> [#uses=1]
+  %1 = add i32 undef, 1                           ; <i32> [#uses=0]
+  br label %bb69
+
+bb69:                                             ; preds = %bb38, %bb17
+  %fdgOuter.1 = phi i32 [ undef, %bb17 ], [ %fdgOuter.0, %bb38 ] ; <i32> [#uses=2]
+  %fz.2 = phi i32 [ undef, %bb17 ], [ %fz.3, %bb38 ] ; <i32> [#uses=2]
+  br i1 undef, label %bb24, label %return
+
+return:                                           ; preds = %bb69, %bb28, %bb14, %entry
+  ret void
+}
diff --git a/final/test/CodeGen/PowerPC/2010-02-04-EmptyGlobal.ll b/final/test/CodeGen/PowerPC/2010-02-04-EmptyGlobal.ll
new file mode 100644
index 00000000000..1ba11d3fd03
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2010-02-04-EmptyGlobal.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin10 -relocation-model=pic -disable-fp-elim | FileCheck %s
+; <rdar://problem/7604010>
+
+%cmd.type = type { }
+
+@_cmd = constant %cmd.type zeroinitializer
+
+; CHECK:      .globl __cmd
+; CHECK-NEXT: .align 3
+; CHECK-NEXT: __cmd:
+; CHECK-NEXT: .byte 0
+
+; PR6340
+
+%Ty = type { i32, {}, i32 }
+@k = global %Ty { i32 1, {} zeroinitializer, i32 3 }
+
+; CHECK: _k:
+; CHECK-NEXT:	.long	1
+; CHECK-NEXT:	.long	3
diff --git a/final/test/CodeGen/PowerPC/2010-02-12-saveCR.ll b/final/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
new file mode 100644
index 00000000000..b73382e6ebf
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2010-02-12-saveCR.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck %s
+; ModuleID = 'hh.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+target triple = "powerpc-apple-darwin9.6"
+; This formerly used R0 for both the stack address and CR.
+
+define void @foo() nounwind {
+entry:
+;CHECK:  mfcr r2
+;CHECK:  rlwinm r2, r2, 8, 0, 31
+;CHECK:  lis r0, 1
+;CHECK:  ori r0, r0, 34540
+;CHECK:  stwx r2, r1, r0
+  %x = alloca [100000 x i8]                       ; <[100000 x i8]*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %x1 = bitcast [100000 x i8]* %x to i8*          ; <i8*> [#uses=1]
+  call void @bar(i8* %x1) nounwind
+  call void asm sideeffect "", "~{cr2}"() nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+;CHECK:  lis r0, 1
+;CHECK:  ori r0, r0, 34540
+;CHECK:  lwzx r2, r1, r0
+;CHECK:  rlwinm r2, r2, 24, 0, 31
+;CHECK:  mtcrf 32, r2
+  ret void
+}
+
+declare void @bar(i8*)
diff --git a/final/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll b/final/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
new file mode 100644
index 00000000000..d09450950d0
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2010-03-09-indirect-call.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -mtriple=powerpc-apple-darwin10.0 | FileCheck %s
+; ModuleID = 'nn.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin11.0"
+; Indirect calls must use R12 on Darwin (i.e., R12 must contain the address of
+; the function being called; the mtctr is not required to use it).
+
+@p = external global void (...)*                  ; <void (...)**> [#uses=1]
+
+define void @foo() nounwind ssp {
+entry:
+; CHECK: mtctr r12
+  %0 = load void (...)** @p, align 4              ; <void (...)*> [#uses=1]
+  call void (...)* %0() nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/final/test/CodeGen/PowerPC/2010-04-01-MachineCSEBug.ll b/final/test/CodeGen/PowerPC/2010-04-01-MachineCSEBug.ll
new file mode 100644
index 00000000000..8fd05502ba8
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2010-04-01-MachineCSEBug.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin10.0
+; rdar://7819990
+
+%0 = type { i32 }
+%1 = type { i64 }
+%struct.Buffer = type { [1024 x i8], i64, i64, i64 }
+%struct.InStream = type { %struct.Buffer, %0, %1, i32*, %struct.InStreamMethods* }
+%struct.InStreamMethods = type { void (%struct.InStream*, i8*, i32)*, void (%struct.InStream*, i64)*, i64 (%struct.InStream*)*, void (%struct.InStream*)* }
+
+define i64 @t(%struct.InStream* %is) nounwind optsize ssp {
+entry:
+  br i1 undef, label %is_read_byte.exit, label %bb.i
+
+bb.i:                                             ; preds = %entry
+  br label %is_read_byte.exit
+
+is_read_byte.exit:                                ; preds = %bb.i, %entry
+  br i1 undef, label %is_read_byte.exit22, label %bb.i21
+
+bb.i21:                                           ; preds = %is_read_byte.exit
+  unreachable
+
+is_read_byte.exit22:                              ; preds = %is_read_byte.exit
+  br i1 undef, label %is_read_byte.exit19, label %bb.i18
+
+bb.i18:                                           ; preds = %is_read_byte.exit22
+  br label %is_read_byte.exit19
+
+is_read_byte.exit19:                              ; preds = %bb.i18, %is_read_byte.exit22
+  br i1 undef, label %is_read_byte.exit16, label %bb.i15
+
+bb.i15:                                           ; preds = %is_read_byte.exit19
+  unreachable
+
+is_read_byte.exit16:                              ; preds = %is_read_byte.exit19
+  %0 = shl i64 undef, 32                          ; <i64> [#uses=1]
+  br i1 undef, label %is_read_byte.exit13, label %bb.i12
+
+bb.i12:                                           ; preds = %is_read_byte.exit16
+  unreachable
+
+is_read_byte.exit13:                              ; preds = %is_read_byte.exit16
+  %1 = shl i64 undef, 24                          ; <i64> [#uses=1]
+  br i1 undef, label %is_read_byte.exit10, label %bb.i9
+
+bb.i9:                                            ; preds = %is_read_byte.exit13
+  unreachable
+
+is_read_byte.exit10:                              ; preds = %is_read_byte.exit13
+  %2 = shl i64 undef, 16                          ; <i64> [#uses=1]
+  br i1 undef, label %is_read_byte.exit7, label %bb.i6
+
+bb.i6:                                            ; preds = %is_read_byte.exit10
+  br label %is_read_byte.exit7
+
+is_read_byte.exit7:                               ; preds = %bb.i6, %is_read_byte.exit10
+  %3 = shl i64 undef, 8                           ; <i64> [#uses=1]
+  br i1 undef, label %is_read_byte.exit4, label %bb.i3
+
+bb.i3:                                            ; preds = %is_read_byte.exit7
+  unreachable
+
+is_read_byte.exit4:                               ; preds = %is_read_byte.exit7
+  %4 = or i64 0, %0                               ; <i64> [#uses=1]
+  %5 = or i64 %4, %1                              ; <i64> [#uses=1]
+  %6 = or i64 %5, %2                              ; <i64> [#uses=1]
+  %7 = or i64 %6, %3                              ; <i64> [#uses=1]
+  %8 = or i64 %7, 0                               ; <i64> [#uses=1]
+  ret i64 %8
+}
diff --git a/final/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll b/final/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 00000000000..4a850984a90
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=ppc32 -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/final/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll b/final/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
new file mode 100644
index 00000000000..b10920a6c10
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 | FileCheck %s
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+define i8* @g2() nounwind readnone {
+entry:
+; CHECK: _g2:
+; CHECK: lwz r3, 0(r1)
+  %0 = tail call i8* @llvm.frameaddress(i32 1)    ; <i8*> [#uses=1]
+  ret i8* %0
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
+
+define i8* @g() nounwind readnone {
+entry:
+; CHECK: _g:
+; CHECK:  mflr r0
+; CHECK:  stw r0, 8(r1)
+; CHECK:  lwz r3, 0(r1)
+; CHECK:  lwz r3, 8(r3)
+  %0 = tail call i8* @llvm.returnaddress(i32 1)   ; <i8*> [#uses=1]
+  ret i8* %0
+}
diff --git a/final/test/CodeGen/PowerPC/2010-10-11-Fast-Varargs.ll b/final/test/CodeGen/PowerPC/2010-10-11-Fast-Varargs.ll
new file mode 100644
index 00000000000..da77b287854
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2010-10-11-Fast-Varargs.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -O0
+; PR8357
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-freebsd9.0"
+
+; RegAllocFast requires that each physreg only be used once. The varargs
+; lowering code needs to use virtual registers when storing live-in registers on
+; the stack.
+
+define i32 @testing(i32 %x, float %a, ...) nounwind {
+  %1 = alloca i32, align 4
+  %2 = alloca float, align 4
+  store i32 %x, i32* %1, align 4
+  store float %a, float* %2, align 4
+  ret i32 0
+}
diff --git a/final/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll b/final/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
new file mode 100644
index 00000000000..bf3d577a367
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/2010-12-18-PPCStackRefs.ll
@@ -0,0 +1,22 @@
+; RUN: llc -disable-fp-elim < %s | FileCheck %s
+; PR8749
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+target triple = "powerpc-apple-darwin9.8"
+
+define i32 @main() nounwind {
+entry:
+; Make sure we're generating references using the red zone
+; CHECK: main:
+; CHECK: stw r3, -12(r1)
+  %retval = alloca i32
+  %0 = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  store i32 0, i32* %0, align 4
+  %1 = load i32* %0, align 4
+  store i32 %1, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval
+  ret i32 %retval1
+}
diff --git a/final/test/CodeGen/PowerPC/Atomics-32.ll b/final/test/CodeGen/PowerPC/Atomics-32.ll
new file mode 100644
index 00000000000..03905a36dcf
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/Atomics-32.ll
@@ -0,0 +1,749 @@
+; RUN: llc < %s -march=ppc32
+; ModuleID = 'Atomics.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin9"
+@sc = common global i8 0		; <i8*> [#uses=52]
+@uc = common global i8 0		; <i8*> [#uses=100]
+@ss = common global i16 0		; <i16*> [#uses=15]
+@us = common global i16 0		; <i16*> [#uses=15]
+@si = common global i32 0		; <i32*> [#uses=15]
+@ui = common global i32 0		; <i32*> [#uses=23]
+@sl = common global i32 0		; <i32*> [#uses=15]
+@ul = common global i32 0		; <i32*> [#uses=15]
+@sll = common global i64 0, align 8		; <i64*> [#uses=1]
+@ull = common global i64 0, align 8		; <i64*> [#uses=1]
+
+define void @test_op_ignore() nounwind {
+entry:
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 1 )		; <i8>:0 [#uses=0]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 1 )		; <i8>:1 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %2, i16 1 )		; <i16>:3 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %4, i16 1 )		; <i16>:5 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %6, i32 1 )		; <i32>:7 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %8, i32 1 )		; <i32>:9 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:10 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %10, i32 1 )		; <i32>:11 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:12 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %12, i32 1 )		; <i32>:13 [#uses=0]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 1 )		; <i8>:14 [#uses=0]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 1 )		; <i8>:15 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:16 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %16, i16 1 )		; <i16>:17 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:18 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %18, i16 1 )		; <i16>:19 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:20 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %20, i32 1 )		; <i32>:21 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:22 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %22, i32 1 )		; <i32>:23 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:24 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %24, i32 1 )		; <i32>:25 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:26 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %26, i32 1 )		; <i32>:27 [#uses=0]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 1 )		; <i8>:28 [#uses=0]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 1 )		; <i8>:29 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:30 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %30, i16 1 )		; <i16>:31 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:32 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %32, i16 1 )		; <i16>:33 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:34 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %34, i32 1 )		; <i32>:35 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:36 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %36, i32 1 )		; <i32>:37 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:38 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %38, i32 1 )		; <i32>:39 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:40 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %40, i32 1 )		; <i32>:41 [#uses=0]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 1 )		; <i8>:42 [#uses=0]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 1 )		; <i8>:43 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:44 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %44, i16 1 )		; <i16>:45 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:46 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %46, i16 1 )		; <i16>:47 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:48 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %48, i32 1 )		; <i32>:49 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:50 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %50, i32 1 )		; <i32>:51 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:52 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %52, i32 1 )		; <i32>:53 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:54 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %54, i32 1 )		; <i32>:55 [#uses=0]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 1 )		; <i8>:56 [#uses=0]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 1 )		; <i8>:57 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:58 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %58, i16 1 )		; <i16>:59 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:60 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %60, i16 1 )		; <i16>:61 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:62 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %62, i32 1 )		; <i32>:63 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:64 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %64, i32 1 )		; <i32>:65 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:66 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %66, i32 1 )		; <i32>:67 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:68 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %68, i32 1 )		; <i32>:69 [#uses=0]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 1 )		; <i8>:70 [#uses=0]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 1 )		; <i8>:71 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:72 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %72, i16 1 )		; <i16>:73 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:74 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %74, i16 1 )		; <i16>:75 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:76 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %76, i32 1 )		; <i32>:77 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:78 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %78, i32 1 )		; <i32>:79 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:80 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %80, i32 1 )		; <i32>:81 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:82 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %82, i32 1 )		; <i32>:83 [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.add.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind
+
+declare i8 @llvm.atomic.load.sub.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.sub.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind
+
+declare i8 @llvm.atomic.load.or.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.or.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.or.i32.p0i32(i32*, i32) nounwind
+
+declare i8 @llvm.atomic.load.xor.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.xor.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.xor.i32.p0i32(i32*, i32) nounwind
+
+declare i8 @llvm.atomic.load.and.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.and.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.and.i32.p0i32(i32*, i32) nounwind
+
+declare i8 @llvm.atomic.load.nand.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.nand.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.nand.i32.p0i32(i32*, i32) nounwind
+
+define void @test_fetch_and_op() nounwind {
+entry:
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 11 )		; <i8>:0 [#uses=1]
+	store i8 %0, i8* @sc, align 1
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 11 )		; <i8>:1 [#uses=1]
+	store i8 %1, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %2, i16 11 )		; <i16>:3 [#uses=1]
+	store i16 %3, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %4, i16 11 )		; <i16>:5 [#uses=1]
+	store i16 %5, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %6, i32 11 )		; <i32>:7 [#uses=1]
+	store i32 %7, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %8, i32 11 )		; <i32>:9 [#uses=1]
+	store i32 %9, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:10 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %10, i32 11 )		; <i32>:11 [#uses=1]
+	store i32 %11, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:12 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %12, i32 11 )		; <i32>:13 [#uses=1]
+	store i32 %13, i32* @ul, align 4
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 11 )		; <i8>:14 [#uses=1]
+	store i8 %14, i8* @sc, align 1
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 11 )		; <i8>:15 [#uses=1]
+	store i8 %15, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:16 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %16, i16 11 )		; <i16>:17 [#uses=1]
+	store i16 %17, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:18 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %18, i16 11 )		; <i16>:19 [#uses=1]
+	store i16 %19, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:20 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %20, i32 11 )		; <i32>:21 [#uses=1]
+	store i32 %21, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:22 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %22, i32 11 )		; <i32>:23 [#uses=1]
+	store i32 %23, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:24 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %24, i32 11 )		; <i32>:25 [#uses=1]
+	store i32 %25, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:26 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %26, i32 11 )		; <i32>:27 [#uses=1]
+	store i32 %27, i32* @ul, align 4
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 11 )		; <i8>:28 [#uses=1]
+	store i8 %28, i8* @sc, align 1
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 11 )		; <i8>:29 [#uses=1]
+	store i8 %29, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:30 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %30, i16 11 )		; <i16>:31 [#uses=1]
+	store i16 %31, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:32 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %32, i16 11 )		; <i16>:33 [#uses=1]
+	store i16 %33, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:34 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %34, i32 11 )		; <i32>:35 [#uses=1]
+	store i32 %35, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:36 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %36, i32 11 )		; <i32>:37 [#uses=1]
+	store i32 %37, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:38 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %38, i32 11 )		; <i32>:39 [#uses=1]
+	store i32 %39, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:40 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %40, i32 11 )		; <i32>:41 [#uses=1]
+	store i32 %41, i32* @ul, align 4
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 11 )		; <i8>:42 [#uses=1]
+	store i8 %42, i8* @sc, align 1
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 11 )		; <i8>:43 [#uses=1]
+	store i8 %43, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:44 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %44, i16 11 )		; <i16>:45 [#uses=1]
+	store i16 %45, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:46 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %46, i16 11 )		; <i16>:47 [#uses=1]
+	store i16 %47, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:48 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %48, i32 11 )		; <i32>:49 [#uses=1]
+	store i32 %49, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:50 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %50, i32 11 )		; <i32>:51 [#uses=1]
+	store i32 %51, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:52 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %52, i32 11 )		; <i32>:53 [#uses=1]
+	store i32 %53, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:54 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %54, i32 11 )		; <i32>:55 [#uses=1]
+	store i32 %55, i32* @ul, align 4
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 11 )		; <i8>:56 [#uses=1]
+	store i8 %56, i8* @sc, align 1
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 11 )		; <i8>:57 [#uses=1]
+	store i8 %57, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:58 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %58, i16 11 )		; <i16>:59 [#uses=1]
+	store i16 %59, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:60 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %60, i16 11 )		; <i16>:61 [#uses=1]
+	store i16 %61, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:62 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %62, i32 11 )		; <i32>:63 [#uses=1]
+	store i32 %63, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:64 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %64, i32 11 )		; <i32>:65 [#uses=1]
+	store i32 %65, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:66 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %66, i32 11 )		; <i32>:67 [#uses=1]
+	store i32 %67, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:68 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %68, i32 11 )		; <i32>:69 [#uses=1]
+	store i32 %69, i32* @ul, align 4
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 11 )		; <i8>:70 [#uses=1]
+	store i8 %70, i8* @sc, align 1
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 11 )		; <i8>:71 [#uses=1]
+	store i8 %71, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:72 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %72, i16 11 )		; <i16>:73 [#uses=1]
+	store i16 %73, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:74 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %74, i16 11 )		; <i16>:75 [#uses=1]
+	store i16 %75, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:76 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %76, i32 11 )		; <i32>:77 [#uses=1]
+	store i32 %77, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:78 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %78, i32 11 )		; <i32>:79 [#uses=1]
+	store i32 %79, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:80 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %80, i32 11 )		; <i32>:81 [#uses=1]
+	store i32 %81, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:82 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %82, i32 11 )		; <i32>:83 [#uses=1]
+	store i32 %83, i32* @ul, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @test_op_and_fetch() nounwind {
+entry:
+	load i8* @uc, align 1		; <i8>:0 [#uses=2]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 %0 )		; <i8>:1 [#uses=1]
+	add i8 %1, %0		; <i8>:2 [#uses=1]
+	store i8 %2, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:3 [#uses=2]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 %3 )		; <i8>:4 [#uses=1]
+	add i8 %4, %3		; <i8>:5 [#uses=1]
+	store i8 %5, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:6 [#uses=1]
+	zext i8 %6 to i16		; <i16>:7 [#uses=2]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:8 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %8, i16 %7 )		; <i16>:9 [#uses=1]
+	add i16 %9, %7		; <i16>:10 [#uses=1]
+	store i16 %10, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:11 [#uses=1]
+	zext i8 %11 to i16		; <i16>:12 [#uses=2]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:13 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %13, i16 %12 )		; <i16>:14 [#uses=1]
+	add i16 %14, %12		; <i16>:15 [#uses=1]
+	store i16 %15, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:16 [#uses=1]
+	zext i8 %16 to i32		; <i32>:17 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:18 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %18, i32 %17 )		; <i32>:19 [#uses=1]
+	add i32 %19, %17		; <i32>:20 [#uses=1]
+	store i32 %20, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:21 [#uses=1]
+	zext i8 %21 to i32		; <i32>:22 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:23 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %23, i32 %22 )		; <i32>:24 [#uses=1]
+	add i32 %24, %22		; <i32>:25 [#uses=1]
+	store i32 %25, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:26 [#uses=1]
+	zext i8 %26 to i32		; <i32>:27 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:28 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %28, i32 %27 )		; <i32>:29 [#uses=1]
+	add i32 %29, %27		; <i32>:30 [#uses=1]
+	store i32 %30, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:31 [#uses=1]
+	zext i8 %31 to i32		; <i32>:32 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:33 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %33, i32 %32 )		; <i32>:34 [#uses=1]
+	add i32 %34, %32		; <i32>:35 [#uses=1]
+	store i32 %35, i32* @ul, align 4
+	load i8* @uc, align 1		; <i8>:36 [#uses=2]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 %36 )		; <i8>:37 [#uses=1]
+	sub i8 %37, %36		; <i8>:38 [#uses=1]
+	store i8 %38, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:39 [#uses=2]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 %39 )		; <i8>:40 [#uses=1]
+	sub i8 %40, %39		; <i8>:41 [#uses=1]
+	store i8 %41, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:42 [#uses=1]
+	zext i8 %42 to i16		; <i16>:43 [#uses=2]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:44 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %44, i16 %43 )		; <i16>:45 [#uses=1]
+	sub i16 %45, %43		; <i16>:46 [#uses=1]
+	store i16 %46, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:47 [#uses=1]
+	zext i8 %47 to i16		; <i16>:48 [#uses=2]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:49 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %49, i16 %48 )		; <i16>:50 [#uses=1]
+	sub i16 %50, %48		; <i16>:51 [#uses=1]
+	store i16 %51, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:52 [#uses=1]
+	zext i8 %52 to i32		; <i32>:53 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:54 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %54, i32 %53 )		; <i32>:55 [#uses=1]
+	sub i32 %55, %53		; <i32>:56 [#uses=1]
+	store i32 %56, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:57 [#uses=1]
+	zext i8 %57 to i32		; <i32>:58 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:59 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %59, i32 %58 )		; <i32>:60 [#uses=1]
+	sub i32 %60, %58		; <i32>:61 [#uses=1]
+	store i32 %61, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:62 [#uses=1]
+	zext i8 %62 to i32		; <i32>:63 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:64 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %64, i32 %63 )		; <i32>:65 [#uses=1]
+	sub i32 %65, %63		; <i32>:66 [#uses=1]
+	store i32 %66, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:67 [#uses=1]
+	zext i8 %67 to i32		; <i32>:68 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:69 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %69, i32 %68 )		; <i32>:70 [#uses=1]
+	sub i32 %70, %68		; <i32>:71 [#uses=1]
+	store i32 %71, i32* @ul, align 4
+	load i8* @uc, align 1		; <i8>:72 [#uses=2]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 %72 )		; <i8>:73 [#uses=1]
+	or i8 %73, %72		; <i8>:74 [#uses=1]
+	store i8 %74, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:75 [#uses=2]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 %75 )		; <i8>:76 [#uses=1]
+	or i8 %76, %75		; <i8>:77 [#uses=1]
+	store i8 %77, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:78 [#uses=1]
+	zext i8 %78 to i16		; <i16>:79 [#uses=2]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:80 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %80, i16 %79 )		; <i16>:81 [#uses=1]
+	or i16 %81, %79		; <i16>:82 [#uses=1]
+	store i16 %82, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:83 [#uses=1]
+	zext i8 %83 to i16		; <i16>:84 [#uses=2]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:85 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %85, i16 %84 )		; <i16>:86 [#uses=1]
+	or i16 %86, %84		; <i16>:87 [#uses=1]
+	store i16 %87, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:88 [#uses=1]
+	zext i8 %88 to i32		; <i32>:89 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:90 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %90, i32 %89 )		; <i32>:91 [#uses=1]
+	or i32 %91, %89		; <i32>:92 [#uses=1]
+	store i32 %92, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:93 [#uses=1]
+	zext i8 %93 to i32		; <i32>:94 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:95 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %95, i32 %94 )		; <i32>:96 [#uses=1]
+	or i32 %96, %94		; <i32>:97 [#uses=1]
+	store i32 %97, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:98 [#uses=1]
+	zext i8 %98 to i32		; <i32>:99 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:100 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %100, i32 %99 )		; <i32>:101 [#uses=1]
+	or i32 %101, %99		; <i32>:102 [#uses=1]
+	store i32 %102, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:103 [#uses=1]
+	zext i8 %103 to i32		; <i32>:104 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:105 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %105, i32 %104 )		; <i32>:106 [#uses=1]
+	or i32 %106, %104		; <i32>:107 [#uses=1]
+	store i32 %107, i32* @ul, align 4
+	load i8* @uc, align 1		; <i8>:108 [#uses=2]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 %108 )		; <i8>:109 [#uses=1]
+	xor i8 %109, %108		; <i8>:110 [#uses=1]
+	store i8 %110, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:111 [#uses=2]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 %111 )		; <i8>:112 [#uses=1]
+	xor i8 %112, %111		; <i8>:113 [#uses=1]
+	store i8 %113, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:114 [#uses=1]
+	zext i8 %114 to i16		; <i16>:115 [#uses=2]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:116 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %116, i16 %115 )		; <i16>:117 [#uses=1]
+	xor i16 %117, %115		; <i16>:118 [#uses=1]
+	store i16 %118, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:119 [#uses=1]
+	zext i8 %119 to i16		; <i16>:120 [#uses=2]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:121 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %121, i16 %120 )		; <i16>:122 [#uses=1]
+	xor i16 %122, %120		; <i16>:123 [#uses=1]
+	store i16 %123, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:124 [#uses=1]
+	zext i8 %124 to i32		; <i32>:125 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:126 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %126, i32 %125 )		; <i32>:127 [#uses=1]
+	xor i32 %127, %125		; <i32>:128 [#uses=1]
+	store i32 %128, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:129 [#uses=1]
+	zext i8 %129 to i32		; <i32>:130 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:131 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %131, i32 %130 )		; <i32>:132 [#uses=1]
+	xor i32 %132, %130		; <i32>:133 [#uses=1]
+	store i32 %133, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:134 [#uses=1]
+	zext i8 %134 to i32		; <i32>:135 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:136 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %136, i32 %135 )		; <i32>:137 [#uses=1]
+	xor i32 %137, %135		; <i32>:138 [#uses=1]
+	store i32 %138, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:139 [#uses=1]
+	zext i8 %139 to i32		; <i32>:140 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:141 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %141, i32 %140 )		; <i32>:142 [#uses=1]
+	xor i32 %142, %140		; <i32>:143 [#uses=1]
+	store i32 %143, i32* @ul, align 4
+	load i8* @uc, align 1		; <i8>:144 [#uses=2]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 %144 )		; <i8>:145 [#uses=1]
+	and i8 %145, %144		; <i8>:146 [#uses=1]
+	store i8 %146, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:147 [#uses=2]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 %147 )		; <i8>:148 [#uses=1]
+	and i8 %148, %147		; <i8>:149 [#uses=1]
+	store i8 %149, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:150 [#uses=1]
+	zext i8 %150 to i16		; <i16>:151 [#uses=2]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:152 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %152, i16 %151 )		; <i16>:153 [#uses=1]
+	and i16 %153, %151		; <i16>:154 [#uses=1]
+	store i16 %154, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:155 [#uses=1]
+	zext i8 %155 to i16		; <i16>:156 [#uses=2]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:157 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %157, i16 %156 )		; <i16>:158 [#uses=1]
+	and i16 %158, %156		; <i16>:159 [#uses=1]
+	store i16 %159, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:160 [#uses=1]
+	zext i8 %160 to i32		; <i32>:161 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:162 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %162, i32 %161 )		; <i32>:163 [#uses=1]
+	and i32 %163, %161		; <i32>:164 [#uses=1]
+	store i32 %164, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:165 [#uses=1]
+	zext i8 %165 to i32		; <i32>:166 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:167 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %167, i32 %166 )		; <i32>:168 [#uses=1]
+	and i32 %168, %166		; <i32>:169 [#uses=1]
+	store i32 %169, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:170 [#uses=1]
+	zext i8 %170 to i32		; <i32>:171 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:172 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %172, i32 %171 )		; <i32>:173 [#uses=1]
+	and i32 %173, %171		; <i32>:174 [#uses=1]
+	store i32 %174, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:175 [#uses=1]
+	zext i8 %175 to i32		; <i32>:176 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:177 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %177, i32 %176 )		; <i32>:178 [#uses=1]
+	and i32 %178, %176		; <i32>:179 [#uses=1]
+	store i32 %179, i32* @ul, align 4
+	load i8* @uc, align 1		; <i8>:180 [#uses=2]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 %180 )		; <i8>:181 [#uses=1]
+	xor i8 %181, -1		; <i8>:182 [#uses=1]
+	and i8 %182, %180		; <i8>:183 [#uses=1]
+	store i8 %183, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:184 [#uses=2]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 %184 )		; <i8>:185 [#uses=1]
+	xor i8 %185, -1		; <i8>:186 [#uses=1]
+	and i8 %186, %184		; <i8>:187 [#uses=1]
+	store i8 %187, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:188 [#uses=1]
+	zext i8 %188 to i16		; <i16>:189 [#uses=2]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:190 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %190, i16 %189 )		; <i16>:191 [#uses=1]
+	xor i16 %191, -1		; <i16>:192 [#uses=1]
+	and i16 %192, %189		; <i16>:193 [#uses=1]
+	store i16 %193, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:194 [#uses=1]
+	zext i8 %194 to i16		; <i16>:195 [#uses=2]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:196 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %196, i16 %195 )		; <i16>:197 [#uses=1]
+	xor i16 %197, -1		; <i16>:198 [#uses=1]
+	and i16 %198, %195		; <i16>:199 [#uses=1]
+	store i16 %199, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:200 [#uses=1]
+	zext i8 %200 to i32		; <i32>:201 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:202 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %202, i32 %201 )		; <i32>:203 [#uses=1]
+	xor i32 %203, -1		; <i32>:204 [#uses=1]
+	and i32 %204, %201		; <i32>:205 [#uses=1]
+	store i32 %205, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:206 [#uses=1]
+	zext i8 %206 to i32		; <i32>:207 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:208 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %208, i32 %207 )		; <i32>:209 [#uses=1]
+	xor i32 %209, -1		; <i32>:210 [#uses=1]
+	and i32 %210, %207		; <i32>:211 [#uses=1]
+	store i32 %211, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:212 [#uses=1]
+	zext i8 %212 to i32		; <i32>:213 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:214 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %214, i32 %213 )		; <i32>:215 [#uses=1]
+	xor i32 %215, -1		; <i32>:216 [#uses=1]
+	and i32 %216, %213		; <i32>:217 [#uses=1]
+	store i32 %217, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:218 [#uses=1]
+	zext i8 %218 to i32		; <i32>:219 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:220 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %220, i32 %219 )		; <i32>:221 [#uses=1]
+	xor i32 %221, -1		; <i32>:222 [#uses=1]
+	and i32 %222, %219		; <i32>:223 [#uses=1]
+	store i32 %223, i32* @ul, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @test_compare_and_swap() nounwind {
+entry:
+	load i8* @uc, align 1		; <i8>:0 [#uses=1]
+	load i8* @sc, align 1		; <i8>:1 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @sc, i8 %0, i8 %1 )		; <i8>:2 [#uses=1]
+	store i8 %2, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:3 [#uses=1]
+	load i8* @sc, align 1		; <i8>:4 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @uc, i8 %3, i8 %4 )		; <i8>:5 [#uses=1]
+	store i8 %5, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:6 [#uses=1]
+	zext i8 %6 to i16		; <i16>:7 [#uses=1]
+	load i8* @sc, align 1		; <i8>:8 [#uses=1]
+	sext i8 %8 to i16		; <i16>:9 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:10 [#uses=1]
+	call i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* %10, i16 %7, i16 %9 )		; <i16>:11 [#uses=1]
+	store i16 %11, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:12 [#uses=1]
+	zext i8 %12 to i16		; <i16>:13 [#uses=1]
+	load i8* @sc, align 1		; <i8>:14 [#uses=1]
+	sext i8 %14 to i16		; <i16>:15 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:16 [#uses=1]
+	call i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* %16, i16 %13, i16 %15 )		; <i16>:17 [#uses=1]
+	store i16 %17, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:18 [#uses=1]
+	zext i8 %18 to i32		; <i32>:19 [#uses=1]
+	load i8* @sc, align 1		; <i8>:20 [#uses=1]
+	sext i8 %20 to i32		; <i32>:21 [#uses=1]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:22 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %22, i32 %19, i32 %21 )		; <i32>:23 [#uses=1]
+	store i32 %23, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:24 [#uses=1]
+	zext i8 %24 to i32		; <i32>:25 [#uses=1]
+	load i8* @sc, align 1		; <i8>:26 [#uses=1]
+	sext i8 %26 to i32		; <i32>:27 [#uses=1]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:28 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %28, i32 %25, i32 %27 )		; <i32>:29 [#uses=1]
+	store i32 %29, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:30 [#uses=1]
+	zext i8 %30 to i32		; <i32>:31 [#uses=1]
+	load i8* @sc, align 1		; <i8>:32 [#uses=1]
+	sext i8 %32 to i32		; <i32>:33 [#uses=1]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:34 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %34, i32 %31, i32 %33 )		; <i32>:35 [#uses=1]
+	store i32 %35, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:36 [#uses=1]
+	zext i8 %36 to i32		; <i32>:37 [#uses=1]
+	load i8* @sc, align 1		; <i8>:38 [#uses=1]
+	sext i8 %38 to i32		; <i32>:39 [#uses=1]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:40 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %40, i32 %37, i32 %39 )		; <i32>:41 [#uses=1]
+	store i32 %41, i32* @ul, align 4
+	load i8* @uc, align 1		; <i8>:42 [#uses=2]
+	load i8* @sc, align 1		; <i8>:43 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @sc, i8 %42, i8 %43 )		; <i8>:44 [#uses=1]
+	icmp eq i8 %44, %42		; <i1>:45 [#uses=1]
+	zext i1 %45 to i32		; <i32>:46 [#uses=1]
+	store i32 %46, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:47 [#uses=2]
+	load i8* @sc, align 1		; <i8>:48 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @uc, i8 %47, i8 %48 )		; <i8>:49 [#uses=1]
+	icmp eq i8 %49, %47		; <i1>:50 [#uses=1]
+	zext i1 %50 to i32		; <i32>:51 [#uses=1]
+	store i32 %51, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:52 [#uses=1]
+	zext i8 %52 to i16		; <i16>:53 [#uses=2]
+	load i8* @sc, align 1		; <i8>:54 [#uses=1]
+	sext i8 %54 to i16		; <i16>:55 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:56 [#uses=1]
+	call i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* %56, i16 %53, i16 %55 )		; <i16>:57 [#uses=1]
+	icmp eq i16 %57, %53		; <i1>:58 [#uses=1]
+	zext i1 %58 to i32		; <i32>:59 [#uses=1]
+	store i32 %59, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:60 [#uses=1]
+	zext i8 %60 to i16		; <i16>:61 [#uses=2]
+	load i8* @sc, align 1		; <i8>:62 [#uses=1]
+	sext i8 %62 to i16		; <i16>:63 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:64 [#uses=1]
+	call i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* %64, i16 %61, i16 %63 )		; <i16>:65 [#uses=1]
+	icmp eq i16 %65, %61		; <i1>:66 [#uses=1]
+	zext i1 %66 to i32		; <i32>:67 [#uses=1]
+	store i32 %67, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:68 [#uses=1]
+	zext i8 %68 to i32		; <i32>:69 [#uses=2]
+	load i8* @sc, align 1		; <i8>:70 [#uses=1]
+	sext i8 %70 to i32		; <i32>:71 [#uses=1]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:72 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %72, i32 %69, i32 %71 )		; <i32>:73 [#uses=1]
+	icmp eq i32 %73, %69		; <i1>:74 [#uses=1]
+	zext i1 %74 to i32		; <i32>:75 [#uses=1]
+	store i32 %75, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:76 [#uses=1]
+	zext i8 %76 to i32		; <i32>:77 [#uses=2]
+	load i8* @sc, align 1		; <i8>:78 [#uses=1]
+	sext i8 %78 to i32		; <i32>:79 [#uses=1]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:80 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %80, i32 %77, i32 %79 )		; <i32>:81 [#uses=1]
+	icmp eq i32 %81, %77		; <i1>:82 [#uses=1]
+	zext i1 %82 to i32		; <i32>:83 [#uses=1]
+	store i32 %83, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:84 [#uses=1]
+	zext i8 %84 to i32		; <i32>:85 [#uses=2]
+	load i8* @sc, align 1		; <i8>:86 [#uses=1]
+	sext i8 %86 to i32		; <i32>:87 [#uses=1]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:88 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %88, i32 %85, i32 %87 )		; <i32>:89 [#uses=1]
+	icmp eq i32 %89, %85		; <i1>:90 [#uses=1]
+	zext i1 %90 to i32		; <i32>:91 [#uses=1]
+	store i32 %91, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:92 [#uses=1]
+	zext i8 %92 to i32		; <i32>:93 [#uses=2]
+	load i8* @sc, align 1		; <i8>:94 [#uses=1]
+	sext i8 %94 to i32		; <i32>:95 [#uses=1]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:96 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %96, i32 %93, i32 %95 )		; <i32>:97 [#uses=1]
+	icmp eq i32 %97, %93		; <i1>:98 [#uses=1]
+	zext i1 %98 to i32		; <i32>:99 [#uses=1]
+	store i32 %99, i32* @ui, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8*, i8, i8) nounwind
+
+declare i16 @llvm.atomic.cmp.swap.i16.p0i16(i16*, i16, i16) nounwind
+
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32*, i32, i32) nounwind
+
+define void @test_lock() nounwind {
+entry:
+	call i8 @llvm.atomic.swap.i8.p0i8( i8* @sc, i8 1 )		; <i8>:0 [#uses=1]
+	store i8 %0, i8* @sc, align 1
+	call i8 @llvm.atomic.swap.i8.p0i8( i8* @uc, i8 1 )		; <i8>:1 [#uses=1]
+	store i8 %1, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.swap.i16.p0i16( i16* %2, i16 1 )		; <i16>:3 [#uses=1]
+	store i16 %3, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.swap.i16.p0i16( i16* %4, i16 1 )		; <i16>:5 [#uses=1]
+	store i16 %5, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %6, i32 1 )		; <i32>:7 [#uses=1]
+	store i32 %7, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %8, i32 1 )		; <i32>:9 [#uses=1]
+	store i32 %9, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:10 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %10, i32 1 )		; <i32>:11 [#uses=1]
+	store i32 %11, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:12 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %12, i32 1 )		; <i32>:13 [#uses=1]
+	store i32 %13, i32* @ul, align 4
+	call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true, i1 false )
+	volatile store i8 0, i8* @sc, align 1
+	volatile store i8 0, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:14 [#uses=1]
+	volatile store i16 0, i16* %14, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:15 [#uses=1]
+	volatile store i16 0, i16* %15, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:16 [#uses=1]
+	volatile store i32 0, i32* %16, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:17 [#uses=1]
+	volatile store i32 0, i32* %17, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:18 [#uses=1]
+	volatile store i32 0, i32* %18, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:19 [#uses=1]
+	volatile store i32 0, i32* %19, align 4
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:20 [#uses=1]
+	volatile store i64 0, i64* %20, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:21 [#uses=1]
+	volatile store i64 0, i64* %21, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.swap.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.swap.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind
diff --git a/final/test/CodeGen/PowerPC/Atomics-64.ll b/final/test/CodeGen/PowerPC/Atomics-64.ll
new file mode 100644
index 00000000000..1dc4310761c
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/Atomics-64.ll
@@ -0,0 +1,773 @@
+; RUN: llc < %s -march=ppc64
+; ModuleID = 'Atomics.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc64-apple-darwin9"
+@sc = common global i8 0		; <i8*> [#uses=52]
+@uc = common global i8 0		; <i8*> [#uses=100]
+@ss = common global i16 0		; <i16*> [#uses=15]
+@us = common global i16 0		; <i16*> [#uses=15]
+@si = common global i32 0		; <i32*> [#uses=15]
+@ui = common global i32 0		; <i32*> [#uses=23]
+@sl = common global i64 0, align 8		; <i64*> [#uses=15]
+@ul = common global i64 0, align 8		; <i64*> [#uses=15]
+@sll = common global i64 0, align 8		; <i64*> [#uses=1]
+@ull = common global i64 0, align 8		; <i64*> [#uses=1]
+
+define void @test_op_ignore() nounwind {
+entry:
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 1 )		; <i8>:0 [#uses=0]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 1 )		; <i8>:1 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %2, i16 1 )		; <i16>:3 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %4, i16 1 )		; <i16>:5 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %6, i32 1 )		; <i32>:7 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %8, i32 1 )		; <i32>:9 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:10 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %10, i64 1 )		; <i64>:11 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:12 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %12, i64 1 )		; <i64>:13 [#uses=0]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 1 )		; <i8>:14 [#uses=0]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 1 )		; <i8>:15 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:16 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %16, i16 1 )		; <i16>:17 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:18 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %18, i16 1 )		; <i16>:19 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:20 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %20, i32 1 )		; <i32>:21 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:22 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %22, i32 1 )		; <i32>:23 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:24 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %24, i64 1 )		; <i64>:25 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:26 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %26, i64 1 )		; <i64>:27 [#uses=0]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 1 )		; <i8>:28 [#uses=0]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 1 )		; <i8>:29 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:30 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %30, i16 1 )		; <i16>:31 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:32 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %32, i16 1 )		; <i16>:33 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:34 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %34, i32 1 )		; <i32>:35 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:36 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %36, i32 1 )		; <i32>:37 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:38 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %38, i64 1 )		; <i64>:39 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:40 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %40, i64 1 )		; <i64>:41 [#uses=0]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 1 )		; <i8>:42 [#uses=0]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 1 )		; <i8>:43 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:44 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %44, i16 1 )		; <i16>:45 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:46 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %46, i16 1 )		; <i16>:47 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:48 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %48, i32 1 )		; <i32>:49 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:50 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %50, i32 1 )		; <i32>:51 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:52 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %52, i64 1 )		; <i64>:53 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:54 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %54, i64 1 )		; <i64>:55 [#uses=0]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 1 )		; <i8>:56 [#uses=0]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 1 )		; <i8>:57 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:58 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %58, i16 1 )		; <i16>:59 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:60 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %60, i16 1 )		; <i16>:61 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:62 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %62, i32 1 )		; <i32>:63 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:64 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %64, i32 1 )		; <i32>:65 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:66 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %66, i64 1 )		; <i64>:67 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:68 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %68, i64 1 )		; <i64>:69 [#uses=0]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 1 )		; <i8>:70 [#uses=0]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 1 )		; <i8>:71 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:72 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %72, i16 1 )		; <i16>:73 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:74 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %74, i16 1 )		; <i16>:75 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:76 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %76, i32 1 )		; <i32>:77 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:78 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %78, i32 1 )		; <i32>:79 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:80 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %80, i64 1 )		; <i64>:81 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:82 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %82, i64 1 )		; <i64>:83 [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.add.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.sub.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.sub.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.sub.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.or.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.or.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.or.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.or.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.xor.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.xor.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.xor.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.xor.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.and.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.and.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.and.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.and.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.nand.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.nand.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.nand.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.nand.i64.p0i64(i64*, i64) nounwind
+
+define void @test_fetch_and_op() nounwind {
+entry:
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 11 )		; <i8>:0 [#uses=1]
+	store i8 %0, i8* @sc, align 1
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 11 )		; <i8>:1 [#uses=1]
+	store i8 %1, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %2, i16 11 )		; <i16>:3 [#uses=1]
+	store i16 %3, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %4, i16 11 )		; <i16>:5 [#uses=1]
+	store i16 %5, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %6, i32 11 )		; <i32>:7 [#uses=1]
+	store i32 %7, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %8, i32 11 )		; <i32>:9 [#uses=1]
+	store i32 %9, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:10 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %10, i64 11 )		; <i64>:11 [#uses=1]
+	store i64 %11, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:12 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %12, i64 11 )		; <i64>:13 [#uses=1]
+	store i64 %13, i64* @ul, align 8
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 11 )		; <i8>:14 [#uses=1]
+	store i8 %14, i8* @sc, align 1
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 11 )		; <i8>:15 [#uses=1]
+	store i8 %15, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:16 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %16, i16 11 )		; <i16>:17 [#uses=1]
+	store i16 %17, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:18 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %18, i16 11 )		; <i16>:19 [#uses=1]
+	store i16 %19, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:20 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %20, i32 11 )		; <i32>:21 [#uses=1]
+	store i32 %21, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:22 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %22, i32 11 )		; <i32>:23 [#uses=1]
+	store i32 %23, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:24 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %24, i64 11 )		; <i64>:25 [#uses=1]
+	store i64 %25, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:26 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %26, i64 11 )		; <i64>:27 [#uses=1]
+	store i64 %27, i64* @ul, align 8
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 11 )		; <i8>:28 [#uses=1]
+	store i8 %28, i8* @sc, align 1
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 11 )		; <i8>:29 [#uses=1]
+	store i8 %29, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:30 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %30, i16 11 )		; <i16>:31 [#uses=1]
+	store i16 %31, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:32 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %32, i16 11 )		; <i16>:33 [#uses=1]
+	store i16 %33, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:34 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %34, i32 11 )		; <i32>:35 [#uses=1]
+	store i32 %35, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:36 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %36, i32 11 )		; <i32>:37 [#uses=1]
+	store i32 %37, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:38 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %38, i64 11 )		; <i64>:39 [#uses=1]
+	store i64 %39, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:40 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %40, i64 11 )		; <i64>:41 [#uses=1]
+	store i64 %41, i64* @ul, align 8
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 11 )		; <i8>:42 [#uses=1]
+	store i8 %42, i8* @sc, align 1
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 11 )		; <i8>:43 [#uses=1]
+	store i8 %43, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:44 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %44, i16 11 )		; <i16>:45 [#uses=1]
+	store i16 %45, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:46 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %46, i16 11 )		; <i16>:47 [#uses=1]
+	store i16 %47, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:48 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %48, i32 11 )		; <i32>:49 [#uses=1]
+	store i32 %49, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:50 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %50, i32 11 )		; <i32>:51 [#uses=1]
+	store i32 %51, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:52 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %52, i64 11 )		; <i64>:53 [#uses=1]
+	store i64 %53, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:54 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %54, i64 11 )		; <i64>:55 [#uses=1]
+	store i64 %55, i64* @ul, align 8
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 11 )		; <i8>:56 [#uses=1]
+	store i8 %56, i8* @sc, align 1
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 11 )		; <i8>:57 [#uses=1]
+	store i8 %57, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:58 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %58, i16 11 )		; <i16>:59 [#uses=1]
+	store i16 %59, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:60 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %60, i16 11 )		; <i16>:61 [#uses=1]
+	store i16 %61, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:62 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %62, i32 11 )		; <i32>:63 [#uses=1]
+	store i32 %63, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:64 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %64, i32 11 )		; <i32>:65 [#uses=1]
+	store i32 %65, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:66 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %66, i64 11 )		; <i64>:67 [#uses=1]
+	store i64 %67, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:68 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %68, i64 11 )		; <i64>:69 [#uses=1]
+	store i64 %69, i64* @ul, align 8
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 11 )		; <i8>:70 [#uses=1]
+	store i8 %70, i8* @sc, align 1
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 11 )		; <i8>:71 [#uses=1]
+	store i8 %71, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:72 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %72, i16 11 )		; <i16>:73 [#uses=1]
+	store i16 %73, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:74 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %74, i16 11 )		; <i16>:75 [#uses=1]
+	store i16 %75, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:76 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %76, i32 11 )		; <i32>:77 [#uses=1]
+	store i32 %77, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:78 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %78, i32 11 )		; <i32>:79 [#uses=1]
+	store i32 %79, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:80 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %80, i64 11 )		; <i64>:81 [#uses=1]
+	store i64 %81, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:82 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %82, i64 11 )		; <i64>:83 [#uses=1]
+	store i64 %83, i64* @ul, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @test_op_and_fetch() nounwind {
+entry:
+	load i8* @uc, align 1		; <i8>:0 [#uses=2]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 %0 )		; <i8>:1 [#uses=1]
+	add i8 %1, %0		; <i8>:2 [#uses=1]
+	store i8 %2, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:3 [#uses=2]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 %3 )		; <i8>:4 [#uses=1]
+	add i8 %4, %3		; <i8>:5 [#uses=1]
+	store i8 %5, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:6 [#uses=1]
+	zext i8 %6 to i16		; <i16>:7 [#uses=2]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:8 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %8, i16 %7 )		; <i16>:9 [#uses=1]
+	add i16 %9, %7		; <i16>:10 [#uses=1]
+	store i16 %10, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:11 [#uses=1]
+	zext i8 %11 to i16		; <i16>:12 [#uses=2]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:13 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %13, i16 %12 )		; <i16>:14 [#uses=1]
+	add i16 %14, %12		; <i16>:15 [#uses=1]
+	store i16 %15, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:16 [#uses=1]
+	zext i8 %16 to i32		; <i32>:17 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:18 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %18, i32 %17 )		; <i32>:19 [#uses=1]
+	add i32 %19, %17		; <i32>:20 [#uses=1]
+	store i32 %20, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:21 [#uses=1]
+	zext i8 %21 to i32		; <i32>:22 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:23 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %23, i32 %22 )		; <i32>:24 [#uses=1]
+	add i32 %24, %22		; <i32>:25 [#uses=1]
+	store i32 %25, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:26 [#uses=1]
+	zext i8 %26 to i64		; <i64>:27 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:28 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %28, i64 %27 )		; <i64>:29 [#uses=1]
+	add i64 %29, %27		; <i64>:30 [#uses=1]
+	store i64 %30, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:31 [#uses=1]
+	zext i8 %31 to i64		; <i64>:32 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:33 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %33, i64 %32 )		; <i64>:34 [#uses=1]
+	add i64 %34, %32		; <i64>:35 [#uses=1]
+	store i64 %35, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:36 [#uses=2]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 %36 )		; <i8>:37 [#uses=1]
+	sub i8 %37, %36		; <i8>:38 [#uses=1]
+	store i8 %38, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:39 [#uses=2]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 %39 )		; <i8>:40 [#uses=1]
+	sub i8 %40, %39		; <i8>:41 [#uses=1]
+	store i8 %41, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:42 [#uses=1]
+	zext i8 %42 to i16		; <i16>:43 [#uses=2]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:44 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %44, i16 %43 )		; <i16>:45 [#uses=1]
+	sub i16 %45, %43		; <i16>:46 [#uses=1]
+	store i16 %46, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:47 [#uses=1]
+	zext i8 %47 to i16		; <i16>:48 [#uses=2]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:49 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %49, i16 %48 )		; <i16>:50 [#uses=1]
+	sub i16 %50, %48		; <i16>:51 [#uses=1]
+	store i16 %51, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:52 [#uses=1]
+	zext i8 %52 to i32		; <i32>:53 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:54 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %54, i32 %53 )		; <i32>:55 [#uses=1]
+	sub i32 %55, %53		; <i32>:56 [#uses=1]
+	store i32 %56, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:57 [#uses=1]
+	zext i8 %57 to i32		; <i32>:58 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:59 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %59, i32 %58 )		; <i32>:60 [#uses=1]
+	sub i32 %60, %58		; <i32>:61 [#uses=1]
+	store i32 %61, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:62 [#uses=1]
+	zext i8 %62 to i64		; <i64>:63 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:64 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %64, i64 %63 )		; <i64>:65 [#uses=1]
+	sub i64 %65, %63		; <i64>:66 [#uses=1]
+	store i64 %66, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:67 [#uses=1]
+	zext i8 %67 to i64		; <i64>:68 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:69 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %69, i64 %68 )		; <i64>:70 [#uses=1]
+	sub i64 %70, %68		; <i64>:71 [#uses=1]
+	store i64 %71, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:72 [#uses=2]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 %72 )		; <i8>:73 [#uses=1]
+	or i8 %73, %72		; <i8>:74 [#uses=1]
+	store i8 %74, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:75 [#uses=2]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 %75 )		; <i8>:76 [#uses=1]
+	or i8 %76, %75		; <i8>:77 [#uses=1]
+	store i8 %77, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:78 [#uses=1]
+	zext i8 %78 to i16		; <i16>:79 [#uses=2]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:80 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %80, i16 %79 )		; <i16>:81 [#uses=1]
+	or i16 %81, %79		; <i16>:82 [#uses=1]
+	store i16 %82, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:83 [#uses=1]
+	zext i8 %83 to i16		; <i16>:84 [#uses=2]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:85 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %85, i16 %84 )		; <i16>:86 [#uses=1]
+	or i16 %86, %84		; <i16>:87 [#uses=1]
+	store i16 %87, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:88 [#uses=1]
+	zext i8 %88 to i32		; <i32>:89 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:90 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %90, i32 %89 )		; <i32>:91 [#uses=1]
+	or i32 %91, %89		; <i32>:92 [#uses=1]
+	store i32 %92, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:93 [#uses=1]
+	zext i8 %93 to i32		; <i32>:94 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:95 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %95, i32 %94 )		; <i32>:96 [#uses=1]
+	or i32 %96, %94		; <i32>:97 [#uses=1]
+	store i32 %97, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:98 [#uses=1]
+	zext i8 %98 to i64		; <i64>:99 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:100 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %100, i64 %99 )		; <i64>:101 [#uses=1]
+	or i64 %101, %99		; <i64>:102 [#uses=1]
+	store i64 %102, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:103 [#uses=1]
+	zext i8 %103 to i64		; <i64>:104 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:105 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %105, i64 %104 )		; <i64>:106 [#uses=1]
+	or i64 %106, %104		; <i64>:107 [#uses=1]
+	store i64 %107, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:108 [#uses=2]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 %108 )		; <i8>:109 [#uses=1]
+	xor i8 %109, %108		; <i8>:110 [#uses=1]
+	store i8 %110, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:111 [#uses=2]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 %111 )		; <i8>:112 [#uses=1]
+	xor i8 %112, %111		; <i8>:113 [#uses=1]
+	store i8 %113, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:114 [#uses=1]
+	zext i8 %114 to i16		; <i16>:115 [#uses=2]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:116 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %116, i16 %115 )		; <i16>:117 [#uses=1]
+	xor i16 %117, %115		; <i16>:118 [#uses=1]
+	store i16 %118, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:119 [#uses=1]
+	zext i8 %119 to i16		; <i16>:120 [#uses=2]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:121 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %121, i16 %120 )		; <i16>:122 [#uses=1]
+	xor i16 %122, %120		; <i16>:123 [#uses=1]
+	store i16 %123, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:124 [#uses=1]
+	zext i8 %124 to i32		; <i32>:125 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:126 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %126, i32 %125 )		; <i32>:127 [#uses=1]
+	xor i32 %127, %125		; <i32>:128 [#uses=1]
+	store i32 %128, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:129 [#uses=1]
+	zext i8 %129 to i32		; <i32>:130 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:131 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %131, i32 %130 )		; <i32>:132 [#uses=1]
+	xor i32 %132, %130		; <i32>:133 [#uses=1]
+	store i32 %133, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:134 [#uses=1]
+	zext i8 %134 to i64		; <i64>:135 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:136 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %136, i64 %135 )		; <i64>:137 [#uses=1]
+	xor i64 %137, %135		; <i64>:138 [#uses=1]
+	store i64 %138, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:139 [#uses=1]
+	zext i8 %139 to i64		; <i64>:140 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:141 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %141, i64 %140 )		; <i64>:142 [#uses=1]
+	xor i64 %142, %140		; <i64>:143 [#uses=1]
+	store i64 %143, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:144 [#uses=2]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 %144 )		; <i8>:145 [#uses=1]
+	and i8 %145, %144		; <i8>:146 [#uses=1]
+	store i8 %146, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:147 [#uses=2]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 %147 )		; <i8>:148 [#uses=1]
+	and i8 %148, %147		; <i8>:149 [#uses=1]
+	store i8 %149, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:150 [#uses=1]
+	zext i8 %150 to i16		; <i16>:151 [#uses=2]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:152 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %152, i16 %151 )		; <i16>:153 [#uses=1]
+	and i16 %153, %151		; <i16>:154 [#uses=1]
+	store i16 %154, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:155 [#uses=1]
+	zext i8 %155 to i16		; <i16>:156 [#uses=2]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:157 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %157, i16 %156 )		; <i16>:158 [#uses=1]
+	and i16 %158, %156		; <i16>:159 [#uses=1]
+	store i16 %159, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:160 [#uses=1]
+	zext i8 %160 to i32		; <i32>:161 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:162 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %162, i32 %161 )		; <i32>:163 [#uses=1]
+	and i32 %163, %161		; <i32>:164 [#uses=1]
+	store i32 %164, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:165 [#uses=1]
+	zext i8 %165 to i32		; <i32>:166 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:167 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %167, i32 %166 )		; <i32>:168 [#uses=1]
+	and i32 %168, %166		; <i32>:169 [#uses=1]
+	store i32 %169, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:170 [#uses=1]
+	zext i8 %170 to i64		; <i64>:171 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:172 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %172, i64 %171 )		; <i64>:173 [#uses=1]
+	and i64 %173, %171		; <i64>:174 [#uses=1]
+	store i64 %174, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:175 [#uses=1]
+	zext i8 %175 to i64		; <i64>:176 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:177 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %177, i64 %176 )		; <i64>:178 [#uses=1]
+	and i64 %178, %176		; <i64>:179 [#uses=1]
+	store i64 %179, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:180 [#uses=2]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 %180 )		; <i8>:181 [#uses=1]
+	xor i8 %181, -1		; <i8>:182 [#uses=1]
+	and i8 %182, %180		; <i8>:183 [#uses=1]
+	store i8 %183, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:184 [#uses=2]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 %184 )		; <i8>:185 [#uses=1]
+	xor i8 %185, -1		; <i8>:186 [#uses=1]
+	and i8 %186, %184		; <i8>:187 [#uses=1]
+	store i8 %187, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:188 [#uses=1]
+	zext i8 %188 to i16		; <i16>:189 [#uses=2]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:190 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %190, i16 %189 )		; <i16>:191 [#uses=1]
+	xor i16 %191, -1		; <i16>:192 [#uses=1]
+	and i16 %192, %189		; <i16>:193 [#uses=1]
+	store i16 %193, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:194 [#uses=1]
+	zext i8 %194 to i16		; <i16>:195 [#uses=2]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:196 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %196, i16 %195 )		; <i16>:197 [#uses=1]
+	xor i16 %197, -1		; <i16>:198 [#uses=1]
+	and i16 %198, %195		; <i16>:199 [#uses=1]
+	store i16 %199, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:200 [#uses=1]
+	zext i8 %200 to i32		; <i32>:201 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:202 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %202, i32 %201 )		; <i32>:203 [#uses=1]
+	xor i32 %203, -1		; <i32>:204 [#uses=1]
+	and i32 %204, %201		; <i32>:205 [#uses=1]
+	store i32 %205, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:206 [#uses=1]
+	zext i8 %206 to i32		; <i32>:207 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:208 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %208, i32 %207 )		; <i32>:209 [#uses=1]
+	xor i32 %209, -1		; <i32>:210 [#uses=1]
+	and i32 %210, %207		; <i32>:211 [#uses=1]
+	store i32 %211, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:212 [#uses=1]
+	zext i8 %212 to i64		; <i64>:213 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:214 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %214, i64 %213 )		; <i64>:215 [#uses=1]
+	xor i64 %215, -1		; <i64>:216 [#uses=1]
+	and i64 %216, %213		; <i64>:217 [#uses=1]
+	store i64 %217, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:218 [#uses=1]
+	zext i8 %218 to i64		; <i64>:219 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:220 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %220, i64 %219 )		; <i64>:221 [#uses=1]
+	xor i64 %221, -1		; <i64>:222 [#uses=1]
+	and i64 %222, %219		; <i64>:223 [#uses=1]
+	store i64 %223, i64* @ul, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @test_compare_and_swap() nounwind {
+entry:
+	load i8* @uc, align 1		; <i8>:0 [#uses=1]
+	load i8* @sc, align 1		; <i8>:1 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @sc, i8 %0, i8 %1 )		; <i8>:2 [#uses=1]
+	store i8 %2, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:3 [#uses=1]
+	load i8* @sc, align 1		; <i8>:4 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @uc, i8 %3, i8 %4 )		; <i8>:5 [#uses=1]
+	store i8 %5, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:6 [#uses=1]
+	zext i8 %6 to i16		; <i16>:7 [#uses=1]
+	load i8* @sc, align 1		; <i8>:8 [#uses=1]
+	sext i8 %8 to i16		; <i16>:9 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:10 [#uses=1]
+	call i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* %10, i16 %7, i16 %9 )		; <i16>:11 [#uses=1]
+	store i16 %11, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:12 [#uses=1]
+	zext i8 %12 to i16		; <i16>:13 [#uses=1]
+	load i8* @sc, align 1		; <i8>:14 [#uses=1]
+	sext i8 %14 to i16		; <i16>:15 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:16 [#uses=1]
+	call i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* %16, i16 %13, i16 %15 )		; <i16>:17 [#uses=1]
+	store i16 %17, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:18 [#uses=1]
+	zext i8 %18 to i32		; <i32>:19 [#uses=1]
+	load i8* @sc, align 1		; <i8>:20 [#uses=1]
+	sext i8 %20 to i32		; <i32>:21 [#uses=1]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:22 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %22, i32 %19, i32 %21 )		; <i32>:23 [#uses=1]
+	store i32 %23, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:24 [#uses=1]
+	zext i8 %24 to i32		; <i32>:25 [#uses=1]
+	load i8* @sc, align 1		; <i8>:26 [#uses=1]
+	sext i8 %26 to i32		; <i32>:27 [#uses=1]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:28 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %28, i32 %25, i32 %27 )		; <i32>:29 [#uses=1]
+	store i32 %29, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:30 [#uses=1]
+	zext i8 %30 to i64		; <i64>:31 [#uses=1]
+	load i8* @sc, align 1		; <i8>:32 [#uses=1]
+	sext i8 %32 to i64		; <i64>:33 [#uses=1]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:34 [#uses=1]
+	call i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* %34, i64 %31, i64 %33 )		; <i64>:35 [#uses=1]
+	store i64 %35, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:36 [#uses=1]
+	zext i8 %36 to i64		; <i64>:37 [#uses=1]
+	load i8* @sc, align 1		; <i8>:38 [#uses=1]
+	sext i8 %38 to i64		; <i64>:39 [#uses=1]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:40 [#uses=1]
+	call i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* %40, i64 %37, i64 %39 )		; <i64>:41 [#uses=1]
+	store i64 %41, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:42 [#uses=2]
+	load i8* @sc, align 1		; <i8>:43 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @sc, i8 %42, i8 %43 )		; <i8>:44 [#uses=1]
+	icmp eq i8 %44, %42		; <i1>:45 [#uses=1]
+	zext i1 %45 to i8		; <i8>:46 [#uses=1]
+	zext i8 %46 to i32		; <i32>:47 [#uses=1]
+	store i32 %47, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:48 [#uses=2]
+	load i8* @sc, align 1		; <i8>:49 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @uc, i8 %48, i8 %49 )		; <i8>:50 [#uses=1]
+	icmp eq i8 %50, %48		; <i1>:51 [#uses=1]
+	zext i1 %51 to i8		; <i8>:52 [#uses=1]
+	zext i8 %52 to i32		; <i32>:53 [#uses=1]
+	store i32 %53, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:54 [#uses=1]
+	zext i8 %54 to i16		; <i16>:55 [#uses=2]
+	load i8* @sc, align 1		; <i8>:56 [#uses=1]
+	sext i8 %56 to i16		; <i16>:57 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:58 [#uses=1]
+	call i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* %58, i16 %55, i16 %57 )		; <i16>:59 [#uses=1]
+	icmp eq i16 %59, %55		; <i1>:60 [#uses=1]
+	zext i1 %60 to i8		; <i8>:61 [#uses=1]
+	zext i8 %61 to i32		; <i32>:62 [#uses=1]
+	store i32 %62, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:63 [#uses=1]
+	zext i8 %63 to i16		; <i16>:64 [#uses=2]
+	load i8* @sc, align 1		; <i8>:65 [#uses=1]
+	sext i8 %65 to i16		; <i16>:66 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:67 [#uses=1]
+	call i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* %67, i16 %64, i16 %66 )		; <i16>:68 [#uses=1]
+	icmp eq i16 %68, %64		; <i1>:69 [#uses=1]
+	zext i1 %69 to i8		; <i8>:70 [#uses=1]
+	zext i8 %70 to i32		; <i32>:71 [#uses=1]
+	store i32 %71, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:72 [#uses=1]
+	zext i8 %72 to i32		; <i32>:73 [#uses=2]
+	load i8* @sc, align 1		; <i8>:74 [#uses=1]
+	sext i8 %74 to i32		; <i32>:75 [#uses=1]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:76 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %76, i32 %73, i32 %75 )		; <i32>:77 [#uses=1]
+	icmp eq i32 %77, %73		; <i1>:78 [#uses=1]
+	zext i1 %78 to i8		; <i8>:79 [#uses=1]
+	zext i8 %79 to i32		; <i32>:80 [#uses=1]
+	store i32 %80, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:81 [#uses=1]
+	zext i8 %81 to i32		; <i32>:82 [#uses=2]
+	load i8* @sc, align 1		; <i8>:83 [#uses=1]
+	sext i8 %83 to i32		; <i32>:84 [#uses=1]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:85 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %85, i32 %82, i32 %84 )		; <i32>:86 [#uses=1]
+	icmp eq i32 %86, %82		; <i1>:87 [#uses=1]
+	zext i1 %87 to i8		; <i8>:88 [#uses=1]
+	zext i8 %88 to i32		; <i32>:89 [#uses=1]
+	store i32 %89, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:90 [#uses=1]
+	zext i8 %90 to i64		; <i64>:91 [#uses=2]
+	load i8* @sc, align 1		; <i8>:92 [#uses=1]
+	sext i8 %92 to i64		; <i64>:93 [#uses=1]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:94 [#uses=1]
+	call i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* %94, i64 %91, i64 %93 )		; <i64>:95 [#uses=1]
+	icmp eq i64 %95, %91		; <i1>:96 [#uses=1]
+	zext i1 %96 to i8		; <i8>:97 [#uses=1]
+	zext i8 %97 to i32		; <i32>:98 [#uses=1]
+	store i32 %98, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:99 [#uses=1]
+	zext i8 %99 to i64		; <i64>:100 [#uses=2]
+	load i8* @sc, align 1		; <i8>:101 [#uses=1]
+	sext i8 %101 to i64		; <i64>:102 [#uses=1]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:103 [#uses=1]
+	call i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* %103, i64 %100, i64 %102 )		; <i64>:104 [#uses=1]
+	icmp eq i64 %104, %100		; <i1>:105 [#uses=1]
+	zext i1 %105 to i8		; <i8>:106 [#uses=1]
+	zext i8 %106 to i32		; <i32>:107 [#uses=1]
+	store i32 %107, i32* @ui, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8*, i8, i8) nounwind
+
+declare i16 @llvm.atomic.cmp.swap.i16.p0i16(i16*, i16, i16) nounwind
+
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32*, i32, i32) nounwind
+
+declare i64 @llvm.atomic.cmp.swap.i64.p0i64(i64*, i64, i64) nounwind
+
+define void @test_lock() nounwind {
+entry:
+	call i8 @llvm.atomic.swap.i8.p0i8( i8* @sc, i8 1 )		; <i8>:0 [#uses=1]
+	store i8 %0, i8* @sc, align 1
+	call i8 @llvm.atomic.swap.i8.p0i8( i8* @uc, i8 1 )		; <i8>:1 [#uses=1]
+	store i8 %1, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.swap.i16.p0i16( i16* %2, i16 1 )		; <i16>:3 [#uses=1]
+	store i16 %3, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.swap.i16.p0i16( i16* %4, i16 1 )		; <i16>:5 [#uses=1]
+	store i16 %5, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %6, i32 1 )		; <i32>:7 [#uses=1]
+	store i32 %7, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %8, i32 1 )		; <i32>:9 [#uses=1]
+	store i32 %9, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:10 [#uses=1]
+	call i64 @llvm.atomic.swap.i64.p0i64( i64* %10, i64 1 )		; <i64>:11 [#uses=1]
+	store i64 %11, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:12 [#uses=1]
+	call i64 @llvm.atomic.swap.i64.p0i64( i64* %12, i64 1 )		; <i64>:13 [#uses=1]
+	store i64 %13, i64* @ul, align 8
+	call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true, i1 false )
+	volatile store i8 0, i8* @sc, align 1
+	volatile store i8 0, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:14 [#uses=1]
+	volatile store i16 0, i16* %14, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:15 [#uses=1]
+	volatile store i16 0, i16* %15, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:16 [#uses=1]
+	volatile store i32 0, i32* %16, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:17 [#uses=1]
+	volatile store i32 0, i32* %17, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:18 [#uses=1]
+	volatile store i64 0, i64* %18, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:19 [#uses=1]
+	volatile store i64 0, i64* %19, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:20 [#uses=1]
+	volatile store i64 0, i64* %20, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:21 [#uses=1]
+	volatile store i64 0, i64* %21, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.swap.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.swap.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.swap.i64.p0i64(i64*, i64) nounwind
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind
diff --git a/final/test/CodeGen/PowerPC/Frames-alloca.ll b/final/test/CodeGen/PowerPC/Frames-alloca.ll
new file mode 100644
index 00000000000..466ae803419
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/Frames-alloca.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC32
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=PPC64
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-NOFP
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-NOFP
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS-NOFP
+
+; CHECK-PPC32: stw r31, -4(r1)
+; CHECK-PPC32: lwz r1, 0(r1)
+; CHECK-PPC32: lwz r31, -4(r1)
+; CHECK-PPC32-NOFP: stw r31, -4(r1)
+; CHECK-PPC32-NOFP: lwz r1, 0(r1)
+; CHECK-PPC32-NOFP: lwz r31, -4(r1)
+; CHECK-PPC32-RS: stwu r1, -80(r1)
+; CHECK-PPC32-RS-NOFP: stwu r1, -80(r1)
+
+; CHECK-PPC64: std r31, -8(r1)
+; CHECK-PPC64: stdu r1, -128(r1)
+; CHECK-PPC64: ld r1, 0(r1)
+; CHECK-PPC64: ld r31, -8(r1)
+; CHECK-PPC64-NOFP: std r31, -8(r1)
+; CHECK-PPC64-NOFP: stdu r1, -128(r1)
+; CHECK-PPC64-NOFP: ld r1, 0(r1)
+; CHECK-PPC64-NOFP: ld r31, -8(r1)
+
+define i32* @f1(i32 %n) nounwind {
+	%tmp = alloca i32, i32 %n		; <i32*> [#uses=1]
+	ret i32* %tmp
+}
diff --git a/final/test/CodeGen/PowerPC/Frames-large.ll b/final/test/CodeGen/PowerPC/Frames-large.ll
new file mode 100644
index 00000000000..302d3df2843
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/Frames-large.ll
@@ -0,0 +1,52 @@
+; RUN: llvm-as < %s > %t.bc
+; RUN: llc < %t.bc -march=ppc32 | FileCheck %s -check-prefix=PPC32-NOFP
+; RUN: llc < %t.bc -march=ppc32 -disable-fp-elim | FileCheck %s -check-prefix=PPC32-FP
+
+; RUN: llc < %t.bc -march=ppc64 | FileCheck %s -check-prefix=PPC64-NOFP
+; RUN: llc < %t.bc -march=ppc64 -disable-fp-elim | FileCheck %s -check-prefix=PPC64-FP
+
+
+target triple = "powerpc-apple-darwin8"
+
+define i32* @f1() nounwind {
+        %tmp = alloca i32, i32 8191             ; <i32*> [#uses=1]
+        ret i32* %tmp
+}
+
+; PPC32-NOFP: _f1:
+; PPC32-NOFP: 	lis r0, -1
+; PPC32-NOFP: 	ori r0, r0, 32704
+; PPC32-NOFP: 	stwux r1, r1, r0
+; PPC32-NOFP: 	addi r3, r1, 68
+; PPC32-NOFP: 	lwz r1, 0(r1)
+; PPC32-NOFP: 	blr 
+
+; PPC32-FP: _f1:
+; PPC32-FP:	stw r31, -4(r1)
+; PPC32-FP:	lis r0, -1
+; PPC32-FP:	ori r0, r0, 32704
+; PPC32-FP:	stwux r1, r1, r0
+; ...
+; PPC32-FP:	lwz r1, 0(r1)
+; PPC32-FP:	lwz r31, -4(r1)
+; PPC32-FP:	blr 
+
+
+; PPC64-NOFP: _f1:
+; PPC64-NOFP: 	lis r0, -1
+; PPC64-NOFP: 	ori r0, r0, 32656
+; PPC64-NOFP: 	stdux r1, r1, r0
+; PPC64-NOFP: 	addi r3, r1, 116
+; PPC64-NOFP: 	ld r1, 0(r1)
+; PPC64-NOFP: 	blr 
+
+
+; PPC64-FP: _f1:
+; PPC64-FP:	std r31, -8(r1)
+; PPC64-FP:	lis r0, -1
+; PPC64-FP:	ori r0, r0, 32640
+; PPC64-FP:	stdux r1, r1, r0
+; ...
+; PPC64-FP:	ld r1, 0(r1)
+; PPC64-FP:	ld r31, -8(r1)
+; PPC64-FP:	blr 
diff --git a/final/test/CodeGen/PowerPC/Frames-leaf.ll b/final/test/CodeGen/PowerPC/Frames-leaf.ll
new file mode 100644
index 00000000000..c2e1d6bddc5
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/Frames-leaf.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=ppc32 | \
+; RUN:   not grep {stw r31, 20(r1)}
+; RUN: llc < %s -march=ppc32 | \
+; RUN:   not grep {stwu r1, -.*(r1)}
+; RUN: llc < %s -march=ppc32 | \
+; RUN:   not grep {addi r1, r1, }
+; RUN: llc < %s -march=ppc32 | \
+; RUN:   not grep {lwz r31, 20(r1)}
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
+; RUN:   not grep {stw r31, 20(r1)}
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
+; RUN:   not grep {stwu r1, -.*(r1)}
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
+; RUN:   not grep {addi r1, r1, }
+; RUN: llc < %s -march=ppc32 -disable-fp-elim | \
+; RUN:   not grep {lwz r31, 20(r1)}
+; RUN: llc < %s -march=ppc64 | \
+; RUN:   not grep {std r31, 40(r1)}
+; RUN: llc < %s -march=ppc64 | \
+; RUN:   not grep {stdu r1, -.*(r1)}
+; RUN: llc < %s -march=ppc64 | \
+; RUN:   not grep {addi r1, r1, }
+; RUN: llc < %s -march=ppc64 | \
+; RUN:   not grep {ld r31, 40(r1)}
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
+; RUN:   not grep {stw r31, 40(r1)}
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
+; RUN:   not grep {stdu r1, -.*(r1)}
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
+; RUN:   not grep {addi r1, r1, }
+; RUN: llc < %s -march=ppc64 -disable-fp-elim | \
+; RUN:   not grep {ld r31, 40(r1)}
+
+define i32* @f1() {
+        %tmp = alloca i32, i32 2                ; <i32*> [#uses=1]
+        ret i32* %tmp
+}
diff --git a/final/test/CodeGen/PowerPC/Frames-small.ll b/final/test/CodeGen/PowerPC/Frames-small.ll
new file mode 100644
index 00000000000..404fdd01966
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/Frames-small.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1
+; RUN  not grep {stw r31, -4(r1)} %t1
+; RUN: grep {stwu r1, -16448(r1)} %t1
+; RUN: grep {addi r1, r1, 16448} %t1
+; RUN: llc < %s -march=ppc32 | \
+; RUN: not grep {lwz r31, -4(r1)}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
+; RUN:   -o %t2
+; RUN: grep {stw r31, -4(r1)} %t2
+; RUN: grep {stwu r1, -16448(r1)} %t2
+; RUN: grep {addi r1, r1, 16448} %t2
+; RUN: grep {lwz r31, -4(r1)} %t2
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3
+; RUN: not grep {std r31, -8(r1)} %t3
+; RUN: grep {stdu r1, -16496(r1)} %t3
+; RUN: grep {addi r1, r1, 16496} %t3
+; RUN: not grep {ld r31, -8(r1)} %t3
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \
+; RUN:   -o %t4
+; RUN: grep {std r31, -8(r1)} %t4
+; RUN: grep {stdu r1, -16512(r1)} %t4
+; RUN: grep {addi r1, r1, 16512} %t4
+; RUN: grep {ld r31, -8(r1)} %t4
+
+define i32* @f1() {
+        %tmp = alloca i32, i32 4095             ; <i32*> [#uses=1]
+        ret i32* %tmp
+}
+
diff --git a/final/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll b/final/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
new file mode 100644
index 00000000000..b10a9968676
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/LargeAbsoluteAddr.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | \
+; RUN:   grep {stw r3, 32751}
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
+; RUN:   grep {stw r3, 32751}
+; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin | \
+; RUN:   grep {std r3, 9024}
+
+define void @test() nounwind {
+	store i32 0, i32* inttoptr (i64 48725999 to i32*)
+	ret void
+}
+
+define void @test2() nounwind {
+	store i64 0, i64* inttoptr (i64 74560 to i64*)
+	ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/addc.ll b/final/test/CodeGen/PowerPC/addc.ll
new file mode 100644
index 00000000000..8c928ce8bca
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/addc.ll
@@ -0,0 +1,33 @@
+; All of these should be codegen'd without loading immediates
+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck %s
+
+define i64 @add_ll(i64 %a, i64 %b) nounwind {
+entry:
+        %tmp.2 = add i64 %b, %a         ; <i64> [#uses=1]
+        ret i64 %tmp.2
+; CHECK: add_ll:
+; CHECK: addc r4, r6, r4
+; CHECK: adde r3, r5, r3
+; CHECK: blr
+}
+
+define i64 @add_l_5(i64 %a) nounwind {
+entry:
+        %tmp.1 = add i64 %a, 5          ; <i64> [#uses=1]
+        ret i64 %tmp.1
+; CHECK: add_l_5:
+; CHECK: addic r4, r4, 5
+; CHECK: addze r3, r3
+; CHECK: blr
+}
+
+define i64 @add_l_m5(i64 %a) nounwind {
+entry:
+        %tmp.1 = add i64 %a, -5         ; <i64> [#uses=1]
+        ret i64 %tmp.1
+; CHECK: add_l_m5:
+; CHECK: addic r4, r4, -5
+; CHECK: addme r3, r3
+; CHECK: blr
+}
+
diff --git a/final/test/CodeGen/PowerPC/addi-reassoc.ll b/final/test/CodeGen/PowerPC/addi-reassoc.ll
new file mode 100644
index 00000000000..2b71ce65f6b
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/addi-reassoc.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=ppc32 | not grep addi
+
+        %struct.X = type { [5 x i8] }
+
+define i32 @test1([4 x i32]* %P, i32 %i) {
+        %tmp.2 = add i32 %i, 2          ; <i32> [#uses=1]
+        %tmp.4 = getelementptr [4 x i32]* %P, i32 %tmp.2, i32 1         ; <i32*> [#uses=1]
+        %tmp.5 = load i32* %tmp.4               ; <i32> [#uses=1]
+        ret i32 %tmp.5
+}
+
+define i32 @test2(%struct.X* %P, i32 %i) {
+        %tmp.2 = add i32 %i, 2          ; <i32> [#uses=1]
+        %tmp.5 = getelementptr %struct.X* %P, i32 %tmp.2, i32 0, i32 1          ; <i8*> [#uses=1]
+        %tmp.6 = load i8* %tmp.5                ; <i8> [#uses=1]
+        %tmp.7 = sext i8 %tmp.6 to i32          ; <i32> [#uses=1]
+        ret i32 %tmp.7
+}
+
diff --git a/final/test/CodeGen/PowerPC/align.ll b/final/test/CodeGen/PowerPC/align.ll
new file mode 100644
index 00000000000..0797ca8d0be
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/align.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=powerpc-linux-gnu | FileCheck %s -check-prefix=ELF
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9 | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 | FileCheck %s -check-prefix=DARWIN8
+
+@a = global i1 true
+; no alignment
+
+@b = global i8 1
+; no alignment
+
+@c = global i16 2
+;ELF: .align 1
+;ELF: c:
+;DARWIN: .align 1
+;DARWIN: _c:
+
+@d = global i32 3
+;ELF: .align 2
+;ELF: d:
+;DARWIN: .align 2
+;DARWIN: _d:
+
+@e = global i64 4
+;ELF: .align 3
+;ELF: e
+;DARWIN: .align 3
+;DARWIN: _e:
+
+@f = global float 5.0
+;ELF: .align 2
+;ELF: f:
+;DARWIN: .align 2
+;DARWIN: _f:
+
+@g = global double 6.0
+;ELF: .align 3
+;ELF: g:
+;DARWIN: .align 3
+;DARWIN: _g:
+
+@bar = common global [75 x i8] zeroinitializer, align 128
+;ELF: .comm bar,75,128
+;DARWIN: .comm _bar,75,7
+
+;; Darwin8 doesn't support aligned comm.  Just miscompile this.
+; DARWIN8: .comm _bar,75 ;
diff --git a/final/test/CodeGen/PowerPC/and-branch.ll b/final/test/CodeGen/PowerPC/and-branch.ll
new file mode 100644
index 00000000000..0484f882ec7
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/and-branch.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=ppc32 | not grep mfcr
+
+define void @foo(i32 %X, i32 %Y, i32 %Z) {
+entry:
+        %tmp = icmp eq i32 %X, 0                ; <i1> [#uses=1]
+        %tmp3 = icmp slt i32 %Y, 5              ; <i1> [#uses=1]
+        %tmp4 = and i1 %tmp3, %tmp              ; <i1> [#uses=1]
+        br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock
+cond_true:              ; preds = %entry
+        %tmp5 = tail call i32 (...)* @bar( )            ; <i32> [#uses=0]
+        ret void
+UnifiedReturnBlock:             ; preds = %entry
+        ret void
+}
+
+declare i32 @bar(...)
+
diff --git a/final/test/CodeGen/PowerPC/and-elim.ll b/final/test/CodeGen/PowerPC/and-elim.ll
new file mode 100644
index 00000000000..36853614c40
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/and-elim.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=ppc32 | not grep rlwin
+
+define void @test(i8* %P) {
+	%W = load i8* %P
+	%X = shl i8 %W, 1
+	%Y = add i8 %X, 2
+	%Z = and i8 %Y, 254        ; dead and
+	store i8 %Z, i8* %P
+	ret void
+}
+
+define i16 @test2(i16 zeroext %crc) zeroext { 
+        ; No and's should be needed for the i16s here.
+        %tmp.1 = lshr i16 %crc, 1
+        %tmp.7 = xor i16 %tmp.1, 40961
+        ret i16 %tmp.7
+}
+
diff --git a/final/test/CodeGen/PowerPC/and-imm.ll b/final/test/CodeGen/PowerPC/and-imm.ll
new file mode 100644
index 00000000000..64a45e50c0a
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/and-imm.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc32 | not grep {ori\\|lis}
+
+; andi. r3, r3, 32769	
+define i32 @test(i32 %X) {
+        %Y = and i32 %X, 32769          ; <i32> [#uses=1]
+        ret i32 %Y
+}
+
+; andis. r3, r3, 32769
+define i32 @test2(i32 %X) {
+        %Y = and i32 %X, -2147418112            ; <i32> [#uses=1]
+        ret i32 %Y
+}
+
diff --git a/final/test/CodeGen/PowerPC/and_add.ll b/final/test/CodeGen/PowerPC/and_add.ll
new file mode 100644
index 00000000000..517e775172c
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/and_add.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ppc32 -o %t
+; RUN: grep slwi %t
+; RUN: not grep addi %t
+; RUN: not grep rlwinm %t
+
+define i32 @test(i32 %A) {
+        ;; shift
+        %B = mul i32 %A, 8              ; <i32> [#uses=1]
+        ;; dead, no demanded bits.
+        %C = add i32 %B, 7              ; <i32> [#uses=1]
+        ;; dead once add is gone.
+        %D = and i32 %C, -8             ; <i32> [#uses=1]
+        ret i32 %D
+}
+
diff --git a/final/test/CodeGen/PowerPC/and_sext.ll b/final/test/CodeGen/PowerPC/and_sext.ll
new file mode 100644
index 00000000000..c6d234ea665
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/and_sext.ll
@@ -0,0 +1,28 @@
+; These tests should not contain a sign extend.
+; RUN: llc < %s -march=ppc32 | not grep extsh
+; RUN: llc < %s -march=ppc32 | not grep extsb
+
+define i32 @test1(i32 %mode.0.i.0) {
+        %tmp.79 = trunc i32 %mode.0.i.0 to i16
+        %tmp.80 = sext i16 %tmp.79 to i32
+        %tmp.81 = and i32 %tmp.80, 24
+        ret i32 %tmp.81
+}
+
+define i16 @test2(i16 signext %X, i16 signext %x) signext {
+        %tmp = sext i16 %X to i32
+        %tmp1 = sext i16 %x to i32
+        %tmp2 = add i32 %tmp, %tmp1
+        %tmp4 = ashr i32 %tmp2, 1
+        %tmp5 = trunc i32 %tmp4 to i16
+        %tmp45 = sext i16 %tmp5 to i32
+        %retval = trunc i32 %tmp45 to i16
+        ret i16 %retval
+}
+
+define i16 @test3(i32 zeroext %X) signext {
+        %tmp1 = lshr i32 %X, 16
+        %tmp2 = trunc i32 %tmp1 to i16
+        ret i16 %tmp2
+}
+
diff --git a/final/test/CodeGen/PowerPC/and_sra.ll b/final/test/CodeGen/PowerPC/and_sra.ll
new file mode 100644
index 00000000000..e6c02d80452
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/and_sra.ll
@@ -0,0 +1,27 @@
+; Neither of these functions should contain algebraic right shifts
+; RUN: llc < %s -march=ppc32 | not grep srawi 
+
+define i32 @test1(i32 %mode.0.i.0) {
+        %tmp.79 = bitcast i32 %mode.0.i.0 to i32                ; <i32> [#uses=1]
+        %tmp.80 = ashr i32 %tmp.79, 15          ; <i32> [#uses=1]
+        %tmp.81 = and i32 %tmp.80, 24           ; <i32> [#uses=1]
+        ret i32 %tmp.81
+}
+
+define i32 @test2(i32 %mode.0.i.0) {
+        %tmp.79 = bitcast i32 %mode.0.i.0 to i32                ; <i32> [#uses=1]
+        %tmp.80 = ashr i32 %tmp.79, 15          ; <i32> [#uses=1]
+        %tmp.81 = lshr i32 %mode.0.i.0, 16              ; <i32> [#uses=1]
+        %tmp.82 = bitcast i32 %tmp.81 to i32            ; <i32> [#uses=1]
+        %tmp.83 = and i32 %tmp.80, %tmp.82              ; <i32> [#uses=1]
+        ret i32 %tmp.83
+}
+
+define i32 @test3(i32 %specbits.6.1) {
+        %tmp.2540 = ashr i32 %specbits.6.1, 11          ; <i32> [#uses=1]
+        %tmp.2541 = bitcast i32 %tmp.2540 to i32                ; <i32> [#uses=1]
+        %tmp.2542 = shl i32 %tmp.2541, 13               ; <i32> [#uses=1]
+        %tmp.2543 = and i32 %tmp.2542, 8192             ; <i32> [#uses=1]
+        ret i32 %tmp.2543
+}
+
diff --git a/final/test/CodeGen/PowerPC/atomic-1.ll b/final/test/CodeGen/PowerPC/atomic-1.ll
new file mode 100644
index 00000000000..ec4e42defdc
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/atomic-1.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=ppc32 | grep lwarx  | count 3
+; RUN: llc < %s -march=ppc32 | grep stwcx. | count 4
+
+define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind  {
+	%tmp = call i32 @llvm.atomic.load.add.i32( i32* %mem, i32 %val )
+	ret i32 %tmp
+}
+
+define i32 @exchange_and_cmp(i32* %mem) nounwind  {
+       	%tmp = call i32 @llvm.atomic.cmp.swap.i32( i32* %mem, i32 0, i32 1 )
+	ret i32 %tmp
+}
+
+define i32 @exchange(i32* %mem, i32 %val) nounwind  {
+	%tmp = call i32 @llvm.atomic.swap.i32( i32* %mem, i32 1 )
+	ret i32 %tmp
+}
+
+declare i32 @llvm.atomic.load.add.i32(i32*, i32) nounwind 
+declare i32 @llvm.atomic.cmp.swap.i32(i32*, i32, i32) nounwind 
+declare i32 @llvm.atomic.swap.i32(i32*, i32) nounwind 
diff --git a/final/test/CodeGen/PowerPC/atomic-2.ll b/final/test/CodeGen/PowerPC/atomic-2.ll
new file mode 100644
index 00000000000..6d9daef9285
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/atomic-2.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=ppc64 | grep ldarx  | count 3
+; RUN: llc < %s -march=ppc64 | grep stdcx. | count 4
+
+define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind  {
+	%tmp = call i64 @llvm.atomic.load.add.i64( i64* %mem, i64 %val )
+	ret i64 %tmp
+}
+
+define i64 @exchange_and_cmp(i64* %mem) nounwind  {
+       	%tmp = call i64 @llvm.atomic.cmp.swap.i64( i64* %mem, i64 0, i64 1 )
+	ret i64 %tmp
+}
+
+define i64 @exchange(i64* %mem, i64 %val) nounwind  {
+	%tmp = call i64 @llvm.atomic.swap.i64( i64* %mem, i64 1 )
+	ret i64 %tmp
+}
+
+declare i64 @llvm.atomic.load.add.i64(i64*, i64) nounwind 
+declare i64 @llvm.atomic.cmp.swap.i64(i64*, i64, i64) nounwind 
+declare i64 @llvm.atomic.swap.i64(i64*, i64) nounwind 
diff --git a/final/test/CodeGen/PowerPC/available-externally.ll b/final/test/CodeGen/PowerPC/available-externally.ll
new file mode 100644
index 00000000000..fdead7dd8b3
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/available-externally.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DYNAMIC
+; PR4482
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "powerpc-apple-darwin8"
+
+define i32 @foo(i64 %x) nounwind {
+entry:
+; STATIC: _foo:
+; STATIC: bl _exact_log2
+; STATIC: blr
+; STATIC: .subsections_via_symbols
+
+; PIC: _foo:
+; PIC: bl L_exact_log2$stub
+; PIC: blr
+
+; DYNAMIC: _foo:
+; DYNAMIC: bl L_exact_log2$stub
+; DYNAMIC: blr
+
+        %A = call i32 @exact_log2(i64 %x) nounwind
+	ret i32 %A
+}
+
+define available_externally i32 @exact_log2(i64 %x) nounwind {
+entry:
+	ret i32 42
+}
+
+
+; PIC: .section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+; PIC: L_exact_log2$stub:
+; PIC: .indirect_symbol _exact_log2
+; PIC: mflr r0
+; PIC: bcl 20,31,L_exact_log2$stub$tmp
+
+; PIC: L_exact_log2$stub$tmp:
+; PIC: mflr r11
+; PIC: addis r11,r11,ha16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)
+; PIC: mtlr r0
+; PIC: lwzu r12,lo16(L_exact_log2$lazy_ptr-L_exact_log2$stub$tmp)(r11)
+; PIC: mtctr r12
+; PIC: bctr
+
+; PIC: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; PIC: L_exact_log2$lazy_ptr:
+; PIC: .indirect_symbol _exact_log2
+; PIC: .long dyld_stub_binding_helper
+
+; PIC: .subsections_via_symbols
+
+
+; DYNAMIC: .section __TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
+; DYNAMIC: L_exact_log2$stub:
+; DYNAMIC: .indirect_symbol _exact_log2
+; DYNAMIC: lis r11,ha16(L_exact_log2$lazy_ptr)
+; DYNAMIC: lwzu r12,lo16(L_exact_log2$lazy_ptr)(r11)
+; DYNAMIC: mtctr r12
+; DYNAMIC: bctr
+
+; DYNAMIC: .section __DATA,__la_symbol_ptr,lazy_symbol_pointers
+; DYNAMIC: L_exact_log2$lazy_ptr:
+; DYNAMIC: .indirect_symbol _exact_log2
+; DYNAMIC: .long dyld_stub_binding_helper
+
+
+
+
+
diff --git a/final/test/CodeGen/PowerPC/big-endian-actual-args.ll b/final/test/CodeGen/PowerPC/big-endian-actual-args.ll
new file mode 100644
index 00000000000..009f46811e7
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/big-endian-actual-args.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN:   grep {addc 4, 4, 6}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN:   grep {adde 3, 3, 5}
+
+define i64 @foo(i64 %x, i64 %y) {
+  %z = add i64 %x, %y
+  ret i64 %z
+}
diff --git a/final/test/CodeGen/PowerPC/big-endian-call-result.ll b/final/test/CodeGen/PowerPC/big-endian-call-result.ll
new file mode 100644
index 00000000000..fe85404cb94
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/big-endian-call-result.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN:   grep {addic 4, 4, 1}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN:   grep {addze 3, 3}
+
+declare i64 @foo()
+
+define i64 @bar()
+{
+  %t = call i64 @foo()
+  %s = add i64 %t, 1
+  ret i64 %s
+}
diff --git a/final/test/CodeGen/PowerPC/big-endian-formal-args.ll b/final/test/CodeGen/PowerPC/big-endian-formal-args.ll
new file mode 100644
index 00000000000..e46e1ec8d77
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/big-endian-formal-args.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN:   grep {li 6, 3}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN:   grep {li 4, 2}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN:   grep {li 3, 0}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-unknown-linux-gnu | \
+; RUN:   grep {mr 5, 3}
+
+declare void @bar(i64 %x, i64 %y)
+
+define void @foo() {
+  call void @bar(i64 2, i64 3)
+  ret void
+}
diff --git a/final/test/CodeGen/PowerPC/branch-opt.ll b/final/test/CodeGen/PowerPC/branch-opt.ll
new file mode 100644
index 00000000000..cc02e406aa6
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/branch-opt.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=ppc32 | \
+; RUN:   grep {b LBB.*} | count 4
+
+target datalayout = "E-p:32:32"
+target triple = "powerpc-apple-darwin8.7.0"
+
+define void @foo(i32 %W, i32 %X, i32 %Y, i32 %Z) {
+entry:
+	%tmp1 = and i32 %W, 1		; <i32> [#uses=1]
+	%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp1.upgrd.1, label %cond_false, label %bb5
+bb:		; preds = %bb5, %bb
+	%indvar77 = phi i32 [ %indvar.next78, %bb ], [ 0, %bb5 ]		; <i32> [#uses=1]
+	%tmp2 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%indvar.next78 = add i32 %indvar77, 1		; <i32> [#uses=2]
+	%exitcond79 = icmp eq i32 %indvar.next78, %X		; <i1> [#uses=1]
+	br i1 %exitcond79, label %cond_next48, label %bb
+bb5:		; preds = %entry
+	%tmp = icmp eq i32 %X, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %cond_next48, label %bb
+cond_false:		; preds = %entry
+	%tmp10 = and i32 %W, 2		; <i32> [#uses=1]
+	%tmp10.upgrd.2 = icmp eq i32 %tmp10, 0		; <i1> [#uses=1]
+	br i1 %tmp10.upgrd.2, label %cond_false20, label %bb16
+bb12:		; preds = %bb16, %bb12
+	%indvar72 = phi i32 [ %indvar.next73, %bb12 ], [ 0, %bb16 ]		; <i32> [#uses=1]
+	%tmp13 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%indvar.next73 = add i32 %indvar72, 1		; <i32> [#uses=2]
+	%exitcond74 = icmp eq i32 %indvar.next73, %Y		; <i1> [#uses=1]
+	br i1 %exitcond74, label %cond_next48, label %bb12
+bb16:		; preds = %cond_false
+	%tmp18 = icmp eq i32 %Y, 0		; <i1> [#uses=1]
+	br i1 %tmp18, label %cond_next48, label %bb12
+cond_false20:		; preds = %cond_false
+	%tmp23 = and i32 %W, 4		; <i32> [#uses=1]
+	%tmp23.upgrd.3 = icmp eq i32 %tmp23, 0		; <i1> [#uses=1]
+	br i1 %tmp23.upgrd.3, label %cond_false33, label %bb29
+bb25:		; preds = %bb29, %bb25
+	%indvar67 = phi i32 [ %indvar.next68, %bb25 ], [ 0, %bb29 ]		; <i32> [#uses=1]
+	%tmp26 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%indvar.next68 = add i32 %indvar67, 1		; <i32> [#uses=2]
+	%exitcond69 = icmp eq i32 %indvar.next68, %Z		; <i1> [#uses=1]
+	br i1 %exitcond69, label %cond_next48, label %bb25
+bb29:		; preds = %cond_false20
+	%tmp31 = icmp eq i32 %Z, 0		; <i1> [#uses=1]
+	br i1 %tmp31, label %cond_next48, label %bb25
+cond_false33:		; preds = %cond_false20
+	%tmp36 = and i32 %W, 8		; <i32> [#uses=1]
+	%tmp36.upgrd.4 = icmp eq i32 %tmp36, 0		; <i1> [#uses=1]
+	br i1 %tmp36.upgrd.4, label %cond_next48, label %bb42
+bb38:		; preds = %bb42
+	%tmp39 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br label %bb42
+bb42:		; preds = %bb38, %cond_false33
+	%indvar = phi i32 [ %indvar.next, %bb38 ], [ 0, %cond_false33 ]		; <i32> [#uses=4]
+	%W_addr.0 = sub i32 %W, %indvar		; <i32> [#uses=1]
+	%exitcond = icmp eq i32 %indvar, %W		; <i1> [#uses=1]
+	br i1 %exitcond, label %cond_next48, label %bb38
+cond_next48:		; preds = %bb42, %cond_false33, %bb29, %bb25, %bb16, %bb12, %bb5, %bb
+	%W_addr.1 = phi i32 [ %W, %bb5 ], [ %W, %bb16 ], [ %W, %bb29 ], [ %W, %cond_false33 ], [ %W_addr.0, %bb42 ], [ %W, %bb25 ], [ %W, %bb12 ], [ %W, %bb ]		; <i32> [#uses=1]
+	%tmp50 = icmp eq i32 %W_addr.1, 0		; <i1> [#uses=1]
+	br i1 %tmp50, label %UnifiedReturnBlock, label %cond_true51
+cond_true51:		; preds = %cond_next48
+	%tmp52 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	ret void
+UnifiedReturnBlock:		; preds = %cond_next48
+	ret void
+}
+
+declare i32 @bar(...)
diff --git a/final/test/CodeGen/PowerPC/bswap-load-store.ll b/final/test/CodeGen/PowerPC/bswap-load-store.ll
new file mode 100644
index 00000000000..4f6bfc72991
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/bswap-load-store.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=ppc32 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=X64
+
+
+define void @STWBRX(i32 %i, i8* %ptr, i32 %off) {
+        %tmp1 = getelementptr i8* %ptr, i32 %off                ; <i8*> [#uses=1]
+        %tmp1.upgrd.1 = bitcast i8* %tmp1 to i32*               ; <i32*> [#uses=1]
+        %tmp13 = tail call i32 @llvm.bswap.i32( i32 %i )                ; <i32> [#uses=1]
+        store i32 %tmp13, i32* %tmp1.upgrd.1
+        ret void
+}
+
+define i32 @LWBRX(i8* %ptr, i32 %off) {
+        %tmp1 = getelementptr i8* %ptr, i32 %off                ; <i8*> [#uses=1]
+        %tmp1.upgrd.2 = bitcast i8* %tmp1 to i32*               ; <i32*> [#uses=1]
+        %tmp = load i32* %tmp1.upgrd.2          ; <i32> [#uses=1]
+        %tmp14 = tail call i32 @llvm.bswap.i32( i32 %tmp )              ; <i32> [#uses=1]
+        ret i32 %tmp14
+}
+
+define void @STHBRX(i16 %s, i8* %ptr, i32 %off) {
+        %tmp1 = getelementptr i8* %ptr, i32 %off                ; <i8*> [#uses=1]
+        %tmp1.upgrd.3 = bitcast i8* %tmp1 to i16*               ; <i16*> [#uses=1]
+        %tmp5 = call i16 @llvm.bswap.i16( i16 %s )              ; <i16> [#uses=1]
+        store i16 %tmp5, i16* %tmp1.upgrd.3
+        ret void
+}
+
+define i16 @LHBRX(i8* %ptr, i32 %off) {
+        %tmp1 = getelementptr i8* %ptr, i32 %off                ; <i8*> [#uses=1]
+        %tmp1.upgrd.4 = bitcast i8* %tmp1 to i16*               ; <i16*> [#uses=1]
+        %tmp = load i16* %tmp1.upgrd.4          ; <i16> [#uses=1]
+        %tmp6 = call i16 @llvm.bswap.i16( i16 %tmp )            ; <i16> [#uses=1]
+        ret i16 %tmp6
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+declare i16 @llvm.bswap.i16(i16)
+
+
+; X32: stwbrx
+; X32: lwbrx
+; X32: sthbrx
+; X32: lhbrx
+
+; X64: stwbrx
+; X64: lwbrx
+; X64: sthbrx
+; X64: lhbrx
+
diff --git a/final/test/CodeGen/PowerPC/buildvec_canonicalize.ll b/final/test/CodeGen/PowerPC/buildvec_canonicalize.ll
new file mode 100644
index 00000000000..0454c584bcf
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/buildvec_canonicalize.ll
@@ -0,0 +1,24 @@
+; There should be exactly one vxor here.
+; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
+; RUN:   grep vxor | count 1
+
+; There should be exactly one vsplti here.
+; RUN: llc < %s -march=ppc32 -mcpu=g5 --enable-unsafe-fp-math | \
+; RUN:   grep vsplti | count 1
+
+define void @VXOR(<4 x float>* %P1, <4 x i32>* %P2, <4 x float>* %P3) {
+        %tmp = load <4 x float>* %P3            ; <<4 x float>> [#uses=1]
+        %tmp3 = load <4 x float>* %P1           ; <<4 x float>> [#uses=1]
+        %tmp4 = fmul <4 x float> %tmp, %tmp3             ; <<4 x float>> [#uses=1]
+        store <4 x float> %tmp4, <4 x float>* %P3
+        store <4 x float> zeroinitializer, <4 x float>* %P1
+        store <4 x i32> zeroinitializer, <4 x i32>* %P2
+        ret void
+}
+
+define void @VSPLTI(<4 x i32>* %P2, <8 x i16>* %P3) {
+        store <4 x i32> bitcast (<16 x i8> < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > to <4 x i32>), <4 x i32>* %P2
+        store <8 x i16> < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 >, <8 x i16>* %P3
+        ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/calls.ll b/final/test/CodeGen/PowerPC/calls.ll
new file mode 100644
index 00000000000..0db184f7285
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/calls.ll
@@ -0,0 +1,32 @@
+; Test various forms of calls.
+
+; RUN: llc < %s -march=ppc32 | \
+; RUN:   grep {bl } | count 2
+; RUN: llc < %s -march=ppc32 | \
+; RUN:   grep {bctrl} | count 1
+; RUN: llc < %s -march=ppc32 | \
+; RUN:   grep {bla } | count 1
+
+declare void @foo()
+
+define void @test_direct() {
+        call void @foo( )
+        ret void
+}
+
+define void @test_extsym(i8* %P) {
+        free i8* %P
+        ret void
+}
+
+define void @test_indirect(void ()* %fp) {
+        call void %fp( )
+        ret void
+}
+
+define void @test_abs() {
+        %fp = inttoptr i32 400 to void ()*              ; <void ()*> [#uses=1]
+        call void %fp( )
+        ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/cmp-cmp.ll b/final/test/CodeGen/PowerPC/cmp-cmp.ll
new file mode 100644
index 00000000000..35a5e427853
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/cmp-cmp.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=ppc32 | not grep mfcr
+
+define void @test(i64 %X) {
+        %tmp1 = and i64 %X, 3           ; <i64> [#uses=1]
+        %tmp = icmp sgt i64 %tmp1, 2            ; <i1> [#uses=1]
+        br i1 %tmp, label %UnifiedReturnBlock, label %cond_true
+cond_true:              ; preds = %0
+        tail call void @test( i64 0 )
+        ret void
+UnifiedReturnBlock:             ; preds = %0
+        ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/compare-duplicate.ll b/final/test/CodeGen/PowerPC/compare-duplicate.ll
new file mode 100644
index 00000000000..f5108c37a8a
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/compare-duplicate.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8  | not grep slwi
+
+define i32 @test(i32 %A, i32 %B) {
+	%C = sub i32 %B, %A
+	%D = icmp eq i32 %C, %A
+	br i1 %D, label %T, label %F
+T:
+	ret i32 19123
+F:
+	ret i32 %C
+}
diff --git a/final/test/CodeGen/PowerPC/compare-simm.ll b/final/test/CodeGen/PowerPC/compare-simm.ll
new file mode 100644
index 00000000000..92d1dbe902a
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/compare-simm.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN:   grep {cmpwi cr0, r3, -1}
+
+define i32 @test(i32 %x) nounwind {
+        %c = icmp eq i32 %x, -1
+	br i1 %c, label %T, label %F
+T:
+	%A = call i32 @test(i32 123)
+	%B = add i32 %A, 43
+	ret i32 %B
+F:
+	%G = add i32 %x, 1234
+	ret i32 %G
+}
diff --git a/final/test/CodeGen/PowerPC/constants.ll b/final/test/CodeGen/PowerPC/constants.ll
new file mode 100644
index 00000000000..8901e02d3b8
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/constants.ll
@@ -0,0 +1,52 @@
+; All of these routines should be perform optimal load of constants.
+; RUN: llc < %s -march=ppc32 | \
+; RUN:   grep lis | count 5
+; RUN: llc < %s -march=ppc32 | \
+; RUN:   grep ori | count 3
+; RUN: llc < %s -march=ppc32 | \
+; RUN:   grep {li } | count 4
+
+define i32 @f1() {
+entry:
+	ret i32 1
+}
+
+define i32 @f2() {
+entry:
+	ret i32 -1
+}
+
+define i32 @f3() {
+entry:
+	ret i32 0
+}
+
+define i32 @f4() {
+entry:
+	ret i32 32767
+}
+
+define i32 @f5() {
+entry:
+	ret i32 65535
+}
+
+define i32 @f6() {
+entry:
+	ret i32 65536
+}
+
+define i32 @f7() {
+entry:
+	ret i32 131071
+}
+
+define i32 @f8() {
+entry:
+	ret i32 2147483647
+}
+
+define i32 @f9() {
+entry:
+	ret i32 -2147483648
+}
diff --git a/final/test/CodeGen/PowerPC/cr_spilling.ll b/final/test/CodeGen/PowerPC/cr_spilling.ll
new file mode 100644
index 00000000000..8bd809fe594
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/cr_spilling.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=ppc32 -regalloc=fast -O0 -relocation-model=pic -o -
+; PR1638
+
+@.str242 = external constant [3 x i8]		; <[3 x i8]*> [#uses=1]
+
+define fastcc void @ParseContent(i8* %buf, i32 %bufsize) {
+entry:
+	%items = alloca [10000 x i8*], align 16		; <[10000 x i8*]*> [#uses=0]
+	%tmp86 = add i32 0, -1		; <i32> [#uses=1]
+	br i1 false, label %cond_true94, label %cond_next99
+
+cond_true94:		; preds = %entry
+	%tmp98 = call i32 (i8*, ...)* @printf(i8* getelementptr ([3 x i8]* @.str242, i32 0, i32 0), i8* null)		; <i32> [#uses=0]
+	%tmp20971 = icmp sgt i32 %tmp86, 0		; <i1> [#uses=1]
+	br i1 %tmp20971, label %bb101, label %bb212
+
+cond_next99:		; preds = %entry
+	ret void
+
+bb101:		; preds = %cond_true94
+	ret void
+
+bb212:		; preds = %cond_true94
+	ret void
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/final/test/CodeGen/PowerPC/cttz.ll b/final/test/CodeGen/PowerPC/cttz.ll
new file mode 100644
index 00000000000..ab493a068a3
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/cttz.ll
@@ -0,0 +1,11 @@
+; Make sure this testcase does not use ctpop
+; RUN: llc < %s -march=ppc32 | grep -i cntlzw
+
+declare i32 @llvm.cttz.i32(i32)
+
+define i32 @bar(i32 %x) {
+entry:
+        %tmp.1 = call i32 @llvm.cttz.i32( i32 %x )              ; <i32> [#uses=1]
+        ret i32 %tmp.1
+}
+
diff --git a/final/test/CodeGen/PowerPC/darwin-labels.ll b/final/test/CodeGen/PowerPC/darwin-labels.ll
new file mode 100644
index 00000000000..af233697403
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/darwin-labels.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s | grep {foo bar":}
+
+target datalayout = "E-p:32:32"
+target triple = "powerpc-apple-darwin8.2.0"
+@"foo bar" = global i32 4               ; <i32*> [#uses=0]
+
diff --git a/final/test/CodeGen/PowerPC/delete-node.ll b/final/test/CodeGen/PowerPC/delete-node.ll
new file mode 100644
index 00000000000..a26c2115482
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/delete-node.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=ppc32
+
+; The DAGCombiner leaves behind a dead node in this testcase. Currently
+; ISel is ignoring dead nodes, though it would be preferable for
+; DAGCombiner to be able to eliminate the dead node.
+
+define void @GrayATo32ARGBTabB(i8* %baseAddr, i16** %cmp, i32 %rowBytes) nounwind {
+entry:
+      	br label %bb1
+
+bb1:            ; preds = %bb1, %entry
+        %0 = load i16* null, align 2            ; <i16> [#uses=1]
+        %1 = ashr i16 %0, 4             ; <i16> [#uses=1]
+        %2 = sext i16 %1 to i32         ; <i32> [#uses=1]
+        %3 = getelementptr i8* null, i32 %2             ; <i8*> [#uses=1]
+        %4 = load i8* %3, align 1               ; <i8> [#uses=1]
+        %5 = zext i8 %4 to i32          ; <i32> [#uses=1]
+        %6 = shl i32 %5, 24             ; <i32> [#uses=1]
+        %7 = or i32 0, %6               ; <i32> [#uses=1]
+        store i32 %7, i32* null, align 4
+        br label %bb1
+}
diff --git a/final/test/CodeGen/PowerPC/dg.exp b/final/test/CodeGen/PowerPC/dg.exp
new file mode 100644
index 00000000000..9e50b558aa3
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target PowerPC] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/PowerPC/div-2.ll b/final/test/CodeGen/PowerPC/div-2.ll
new file mode 100644
index 00000000000..2fc916ff005
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/div-2.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=ppc32 | not grep srawi 
+; RUN: llc < %s -march=ppc32 | grep blr
+
+define i32 @test1(i32 %X) {
+        %Y = and i32 %X, 15             ; <i32> [#uses=1]
+        %Z = sdiv i32 %Y, 4             ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+define i32 @test2(i32 %W) {
+        %X = and i32 %W, 15             ; <i32> [#uses=1]
+        %Y = sub i32 16, %X             ; <i32> [#uses=1]
+        %Z = sdiv i32 %Y, 4             ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+define i32 @test3(i32 %W) {
+        %X = and i32 %W, 15             ; <i32> [#uses=1]
+        %Y = sub i32 15, %X             ; <i32> [#uses=1]
+        %Z = sdiv i32 %Y, 4             ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+define i32 @test4(i32 %W) {
+        %X = and i32 %W, 2              ; <i32> [#uses=1]
+        %Y = sub i32 5, %X              ; <i32> [#uses=1]
+        %Z = sdiv i32 %Y, 2             ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
diff --git a/final/test/CodeGen/PowerPC/empty-functions.ll b/final/test/CodeGen/PowerPC/empty-functions.ll
new file mode 100644
index 00000000000..3a2907d5d7b
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/empty-functions.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck -check-prefix=CHECK-NO-FP %s
+; RUN: llc < %s -mtriple=powerpc-apple-darwin -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+
+define void @func() {
+entry:
+  unreachable
+}
+; CHECK-NO-FP:     _func:
+; CHECK-NO-FP:     nop
+
+; CHECK-FP:      _func:
+; CHECK-FP:      nop
diff --git a/final/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll b/final/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
new file mode 100644
index 00000000000..f99089b3bb0
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -march=ppc32 | \
+; RUN:   grep eqv | count 3
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
+; RUN:   grep andc | count 3
+; RUN: llc < %s -march=ppc32 | \
+; RUN:   grep orc | count 2
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
+; RUN:   grep nor | count 3
+; RUN: llc < %s -march=ppc32 | \
+; RUN:   grep nand | count 1
+
+define i32 @EQV1(i32 %X, i32 %Y) nounwind {
+	%A = xor i32 %X, %Y		; <i32> [#uses=1]
+	%B = xor i32 %A, -1		; <i32> [#uses=1]
+	ret i32 %B
+}
+
+define i32 @EQV2(i32 %X, i32 %Y) nounwind {
+	%A = xor i32 %X, -1		; <i32> [#uses=1]
+	%B = xor i32 %A, %Y		; <i32> [#uses=1]
+	ret i32 %B
+}
+
+define i32 @EQV3(i32 %X, i32 %Y) nounwind {
+	%A = xor i32 %X, -1		; <i32> [#uses=1]
+	%B = xor i32 %Y, %A		; <i32> [#uses=1]
+	ret i32 %B
+}
+
+define i32 @ANDC1(i32 %X, i32 %Y) nounwind {
+	%A = xor i32 %Y, -1		; <i32> [#uses=1]
+	%B = and i32 %X, %A		; <i32> [#uses=1]
+	ret i32 %B
+}
+
+define i32 @ANDC2(i32 %X, i32 %Y) nounwind {
+	%A = xor i32 %X, -1		; <i32> [#uses=1]
+	%B = and i32 %A, %Y		; <i32> [#uses=1]
+	ret i32 %B
+}
+
+define i32 @ORC1(i32 %X, i32 %Y) nounwind {
+	%A = xor i32 %Y, -1		; <i32> [#uses=1]
+	%B = or i32 %X, %A		; <i32> [#uses=1]
+	ret i32 %B
+}
+
+define i32 @ORC2(i32 %X, i32 %Y) nounwind {
+	%A = xor i32 %X, -1		; <i32> [#uses=1]
+	%B = or i32 %A, %Y		; <i32> [#uses=1]
+	ret i32 %B
+}
+
+define i32 @NOR1(i32 %X) nounwind {
+	%Y = xor i32 %X, -1		; <i32> [#uses=1]
+	ret i32 %Y
+}
+
+define i32 @NOR2(i32 %X, i32 %Y) nounwind {
+	%Z = or i32 %X, %Y		; <i32> [#uses=1]
+	%R = xor i32 %Z, -1		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @NAND1(i32 %X, i32 %Y) nounwind {
+	%Z = and i32 %X, %Y		; <i32> [#uses=1]
+	%W = xor i32 %Z, -1		; <i32> [#uses=1]
+	ret i32 %W
+}
+
+define void @VNOR(<4 x float>* %P, <4 x float>* %Q) nounwind {
+	%tmp = load <4 x float>* %P		; <<4 x float>> [#uses=1]
+	%tmp.upgrd.1 = bitcast <4 x float> %tmp to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp2 = load <4 x float>* %Q		; <<4 x float>> [#uses=1]
+	%tmp2.upgrd.2 = bitcast <4 x float> %tmp2 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp3 = or <4 x i32> %tmp.upgrd.1, %tmp2.upgrd.2		; <<4 x i32>> [#uses=1]
+	%tmp4 = xor <4 x i32> %tmp3, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%tmp4.upgrd.3 = bitcast <4 x i32> %tmp4 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp4.upgrd.3, <4 x float>* %P
+	ret void
+}
+
+define void @VANDC(<4 x float>* %P, <4 x float>* %Q) nounwind {
+	%tmp = load <4 x float>* %P		; <<4 x float>> [#uses=1]
+	%tmp.upgrd.4 = bitcast <4 x float> %tmp to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp2 = load <4 x float>* %Q		; <<4 x float>> [#uses=1]
+	%tmp2.upgrd.5 = bitcast <4 x float> %tmp2 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp4 = xor <4 x i32> %tmp2.upgrd.5, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%tmp3 = and <4 x i32> %tmp.upgrd.4, %tmp4		; <<4 x i32>> [#uses=1]
+	%tmp4.upgrd.6 = bitcast <4 x i32> %tmp3 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp4.upgrd.6, <4 x float>* %P
+	ret void
+}
diff --git a/final/test/CodeGen/PowerPC/extsh.ll b/final/test/CodeGen/PowerPC/extsh.ll
new file mode 100644
index 00000000000..506ff86051f
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/extsh.ll
@@ -0,0 +1,8 @@
+; This should turn into a single extsh
+; RUN: llc < %s -march=ppc32 | grep extsh | count 1
+define i32 @test(i32 %X) {
+        %tmp.81 = shl i32 %X, 16                ; <i32> [#uses=1]
+        %tmp.82 = ashr i32 %tmp.81, 16          ; <i32> [#uses=1]
+        ret i32 %tmp.82
+}
+
diff --git a/final/test/CodeGen/PowerPC/fabs.ll b/final/test/CodeGen/PowerPC/fabs.ll
new file mode 100644
index 00000000000..6ef740f835c
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/fabs.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | grep {fabs f1, f1}
+
+define double @fabs(double %f) {
+entry:
+	%tmp2 = tail call double @fabs( double %f )		; <double> [#uses=1]
+	ret double %tmp2
+}
diff --git a/final/test/CodeGen/PowerPC/fma.ll b/final/test/CodeGen/PowerPC/fma.ll
new file mode 100644
index 00000000000..815c72c1f8a
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/fma.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=ppc32 | \
+; RUN:   egrep {fn?madd|fn?msub} | count 8
+
+define double @test_FMADD1(double %A, double %B, double %C) {
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fadd double %D, %C		; <double> [#uses=1]
+	ret double %E
+}
+
+define double @test_FMADD2(double %A, double %B, double %C) {
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fadd double %D, %C		; <double> [#uses=1]
+	ret double %E
+}
+
+define double @test_FMSUB(double %A, double %B, double %C) {
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fsub double %D, %C		; <double> [#uses=1]
+	ret double %E
+}
+
+define double @test_FNMADD1(double %A, double %B, double %C) {
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fadd double %D, %C		; <double> [#uses=1]
+	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
+	ret double %F
+}
+
+define double @test_FNMADD2(double %A, double %B, double %C) {
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fadd double %C, %D		; <double> [#uses=1]
+	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
+	ret double %F
+}
+
+define double @test_FNMSUB1(double %A, double %B, double %C) {
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fsub double %C, %D		; <double> [#uses=1]
+	ret double %E
+}
+
+define double @test_FNMSUB2(double %A, double %B, double %C) {
+	%D = fmul double %A, %B		; <double> [#uses=1]
+	%E = fsub double %D, %C		; <double> [#uses=1]
+	%F = fsub double -0.000000e+00, %E		; <double> [#uses=1]
+	ret double %F
+}
+
+define float @test_FNMSUBS(float %A, float %B, float %C) {
+	%D = fmul float %A, %B		; <float> [#uses=1]
+	%E = fsub float %D, %C		; <float> [#uses=1]
+	%F = fsub float -0.000000e+00, %E		; <float> [#uses=1]
+	ret float %F
+}
diff --git a/final/test/CodeGen/PowerPC/fnabs.ll b/final/test/CodeGen/PowerPC/fnabs.ll
new file mode 100644
index 00000000000..bbd5c7159ed
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/fnabs.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32 | grep fnabs
+
+declare double @fabs(double)
+
+define double @test(double %X) {
+        %Y = call double @fabs( double %X )             ; <double> [#uses=1]
+        %Z = fsub double -0.000000e+00, %Y               ; <double> [#uses=1]
+        ret double %Z
+}
+
diff --git a/final/test/CodeGen/PowerPC/fneg.ll b/final/test/CodeGen/PowerPC/fneg.ll
new file mode 100644
index 00000000000..0bd31bb082c
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/fneg.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=ppc32 | not grep fneg
+
+define double @test1(double %a, double %b, double %c, double %d) {
+entry:
+        %tmp2 = fsub double -0.000000e+00, %c            ; <double> [#uses=1]
+        %tmp4 = fmul double %tmp2, %d            ; <double> [#uses=1]
+        %tmp7 = fmul double %a, %b               ; <double> [#uses=1]
+        %tmp9 = fsub double %tmp7, %tmp4         ; <double> [#uses=1]
+        ret double %tmp9
+}
+
+
diff --git a/final/test/CodeGen/PowerPC/fold-li.ll b/final/test/CodeGen/PowerPC/fold-li.ll
new file mode 100644
index 00000000000..92d8da500e8
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/fold-li.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ppc32  | \
+; RUN:   grep -v align | not grep li
+
+;; Test that immediates are folded into these instructions correctly.
+
+define i32 @ADD(i32 %X) nounwind {
+        %Y = add i32 %X, 65537          ; <i32> [#uses=1]
+        ret i32 %Y
+}
+
+define i32 @SUB(i32 %X) nounwind {
+        %Y = sub i32 %X, 65537          ; <i32> [#uses=1]
+        ret i32 %Y
+}
+
diff --git a/final/test/CodeGen/PowerPC/fp-branch.ll b/final/test/CodeGen/PowerPC/fp-branch.ll
new file mode 100644
index 00000000000..673da027e22
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/fp-branch.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=ppc32 | grep fcmp | count 1
+
+declare i1 @llvm.isunordered.f64(double, double)
+
+define i1 @intcoord_cond_next55(double %tmp48.reload) {
+newFuncRoot:
+        br label %cond_next55
+
+bb72.exitStub:          ; preds = %cond_next55
+        ret i1 true
+
+cond_next62.exitStub:           ; preds = %cond_next55
+        ret i1 false
+
+cond_next55:            ; preds = %newFuncRoot
+        %tmp57 = fcmp oge double %tmp48.reload, 1.000000e+00            ; <i1> [#uses=1]
+        %tmp58 = fcmp uno double %tmp48.reload, 1.000000e+00            ; <i1> [#uses=1]
+        %tmp59 = or i1 %tmp57, %tmp58           ; <i1> [#uses=1]
+        br i1 %tmp59, label %bb72.exitStub, label %cond_next62.exitStub
+}
+
diff --git a/final/test/CodeGen/PowerPC/fp-int-fp.ll b/final/test/CodeGen/PowerPC/fp-int-fp.ll
new file mode 100644
index 00000000000..18f7f83852a
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/fp-int-fp.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep r1
+
+define double @test1(double %X) {
+        %Y = fptosi double %X to i64            ; <i64> [#uses=1]
+        %Z = sitofp i64 %Y to double            ; <double> [#uses=1]
+        ret double %Z
+}
+
+define float @test2(double %X) {
+        %Y = fptosi double %X to i64            ; <i64> [#uses=1]
+        %Z = sitofp i64 %Y to float             ; <float> [#uses=1]
+        ret float %Z
+}
+
+define double @test3(float %X) {
+        %Y = fptosi float %X to i64             ; <i64> [#uses=1]
+        %Z = sitofp i64 %Y to double            ; <double> [#uses=1]
+        ret double %Z
+}
+
+define float @test4(float %X) {
+        %Y = fptosi float %X to i64             ; <i64> [#uses=1]
+        %Z = sitofp i64 %Y to float             ; <float> [#uses=1]
+        ret float %Z
+}
+
+
diff --git a/final/test/CodeGen/PowerPC/fp_to_uint.ll b/final/test/CodeGen/PowerPC/fp_to_uint.ll
new file mode 100644
index 00000000000..1360b62d273
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/fp_to_uint.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 | grep fctiwz | count 1
+
+define i16 @foo(float %a) {
+entry:
+        %tmp.1 = fptoui float %a to i16         ; <i16> [#uses=1]
+        ret i16 %tmp.1
+}
+
diff --git a/final/test/CodeGen/PowerPC/fpcopy.ll b/final/test/CodeGen/PowerPC/fpcopy.ll
new file mode 100644
index 00000000000..7b9446baac0
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/fpcopy.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=ppc32 | not grep fmr
+
+define double @test(float %F) {
+        %F.upgrd.1 = fpext float %F to double           ; <double> [#uses=1]
+        ret double %F.upgrd.1
+}
+
diff --git a/final/test/CodeGen/PowerPC/frounds.ll b/final/test/CodeGen/PowerPC/frounds.ll
new file mode 100644
index 00000000000..8eeadc3a346
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/frounds.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=ppc32
+
+define i32 @foo() {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%tmp = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp1 = call i32 @llvm.flt.rounds( )		; <i32> [#uses=1]
+	store i32 %tmp1, i32* %tmp, align 4
+	%tmp2 = load i32* %tmp, align 4		; <i32> [#uses=1]
+	store i32 %tmp2, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval3 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval3
+}
+
+declare i32 @llvm.flt.rounds() nounwind 
diff --git a/final/test/CodeGen/PowerPC/fsqrt.ll b/final/test/CodeGen/PowerPC/fsqrt.ll
new file mode 100644
index 00000000000..74a8725eb12
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/fsqrt.ll
@@ -0,0 +1,19 @@
+; fsqrt should be generated when the fsqrt feature is enabled, but not 
+; otherwise.
+
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=+fsqrt | \
+; RUN:   grep {fsqrt f1, f1}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g5 | \
+; RUN:   grep {fsqrt f1, f1}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-fsqrt | \
+; RUN:   not grep {fsqrt f1, f1}
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mcpu=g4 | \
+; RUN:   not grep {fsqrt f1, f1}
+
+declare double @llvm.sqrt.f64(double)
+
+define double @X(double %Y) {
+        %Z = call double @llvm.sqrt.f64( double %Y )            ; <double> [#uses=1]
+        ret double %Z
+}
+
diff --git a/final/test/CodeGen/PowerPC/hello.ll b/final/test/CodeGen/PowerPC/hello.ll
new file mode 100644
index 00000000000..ea27e9257a6
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/hello.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
+; PR1399
+
+@.str = internal constant [13 x i8] c"Hello World!\00"
+
+define i32 @main() {
+	%tmp2 = tail call i32 @puts( i8* getelementptr ([13 x i8]* @.str, i32 0, i64 0) )
+	ret i32 0
+}
+
+declare i32 @puts(i8*)
diff --git a/final/test/CodeGen/PowerPC/hidden-vis-2.ll b/final/test/CodeGen/PowerPC/hidden-vis-2.ll
new file mode 100644
index 00000000000..e9e2c0a93a0
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/hidden-vis-2.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9 | grep non_lazy_ptr | count 6
+
+@x = external hidden global i32		; <i32*> [#uses=1]
+@y = extern_weak hidden global i32	; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	%1 = load i32* @y, align 4		; <i32> [#uses=1]
+	%2 = add i32 %1, %0		; <i32> [#uses=1]
+	ret i32 %2
+}
diff --git a/final/test/CodeGen/PowerPC/hidden-vis.ll b/final/test/CodeGen/PowerPC/hidden-vis.ll
new file mode 100644
index 00000000000..b2cc1431ebd
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/hidden-vis.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin9 | not grep non_lazy_ptr
+
+@x = weak hidden global i32 0		; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
diff --git a/final/test/CodeGen/PowerPC/i128-and-beyond.ll b/final/test/CodeGen/PowerPC/i128-and-beyond.ll
new file mode 100644
index 00000000000..51bcab24411
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/i128-and-beyond.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 | grep 4294967295 | count 28
+
+; These static initializers are too big to hand off to assemblers
+; as monolithic blobs.
+
+@x = global i128 -1
+@y = global i256 -1
+@z = global i512 -1
diff --git a/final/test/CodeGen/PowerPC/i64_fp.ll b/final/test/CodeGen/PowerPC/i64_fp.ll
new file mode 100644
index 00000000000..d53c9487840
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/i64_fp.ll
@@ -0,0 +1,26 @@
+; fcfid and fctid should be generated when the 64bit feature is enabled, but not
+; otherwise.
+
+; RUN: llc < %s -march=ppc32 -mattr=+64bit | \
+; RUN:   grep fcfid
+; RUN: llc < %s -march=ppc32 -mattr=+64bit | \
+; RUN:   grep fctidz
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
+; RUN:   grep fcfid
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | \
+; RUN:   grep fctidz
+; RUN: llc < %s -march=ppc32 -mattr=-64bit | \
+; RUN:   not grep fcfid
+; RUN: llc < %s -march=ppc32 -mattr=-64bit | \
+; RUN:   not grep fctidz
+; RUN: llc < %s -march=ppc32 -mcpu=g4 | \
+; RUN:   not grep fcfid
+; RUN: llc < %s -march=ppc32 -mcpu=g4 | \
+; RUN:   not grep fctidz
+
+define double @X(double %Y) {
+        %A = fptosi double %Y to i64            ; <i64> [#uses=1]
+        %B = sitofp i64 %A to double            ; <double> [#uses=1]
+        ret double %B
+}
+
diff --git a/final/test/CodeGen/PowerPC/iabs.ll b/final/test/CodeGen/PowerPC/iabs.ll
new file mode 100644
index 00000000000..a43f09c7d56
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/iabs.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ppc32 -stats |& \
+; RUN:   grep {4 .*Number of machine instrs printed}
+
+;; Integer absolute value, should produce something as good as:
+;;      srawi r2, r3, 31
+;;      add r3, r3, r2
+;;      xor r3, r3, r2
+;;      blr 
+define i32 @test(i32 %a) {
+        %tmp1neg = sub i32 0, %a
+        %b = icmp sgt i32 %a, -1
+        %abs = select i1 %b, i32 %a, i32 %tmp1neg
+        ret i32 %abs
+}
+
diff --git a/final/test/CodeGen/PowerPC/illegal-element-type.ll b/final/test/CodeGen/PowerPC/illegal-element-type.ll
new file mode 100644
index 00000000000..58bd0558e2b
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/illegal-element-type.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g3
+
+define void @foo() {
+entry:
+        br label %bb
+
+bb:             ; preds = %bb, %entry
+        br i1 false, label %bb26, label %bb
+
+bb19:           ; preds = %bb26
+        ret void
+
+bb26:           ; preds = %bb
+        br i1 false, label %bb30, label %bb19
+
+bb30:           ; preds = %bb26
+        br label %bb45
+
+bb45:           ; preds = %bb45, %bb30
+        %V.0 = phi <8 x i16> [ %tmp42, %bb45 ], [ zeroinitializer, %bb30 ]     ; <<8 x i16>> [#uses=1]
+        %tmp42 = mul <8 x i16> zeroinitializer, %V.0            ; <<8 x i16>> [#uses=1]
+        br label %bb45
+}
diff --git a/final/test/CodeGen/PowerPC/indirectbr.ll b/final/test/CodeGen/PowerPC/indirectbr.ll
new file mode 100644
index 00000000000..5122ab39d23
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/indirectbr.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -relocation-model=pic -march=ppc32 -mtriple=powerpc-apple-darwin | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -relocation-model=static -march=ppc32 -mtriple=powerpc-apple-darwin | FileCheck %s -check-prefix=STATIC
+
+@nextaddr = global i8* null                       ; <i8**> [#uses=2]
+@C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
+
+define internal i32 @foo(i32 %i) nounwind {
+; PIC: foo:
+; STATIC: foo:
+entry:
+  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
+  br i1 %1, label %bb3, label %bb2
+
+bb2:                                              ; preds = %entry, %bb3
+  %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
+; PIC: mtctr
+; PIC-NEXT: bctr
+; STATIC: mtctr
+; STATIC-NEXT: bctr
+  indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
+
+bb3:                                              ; preds = %entry
+  %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
+  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  br label %bb2
+
+L5:                                               ; preds = %bb2
+  br label %L4
+
+L4:                                               ; preds = %L5, %bb2
+  %res.0 = phi i32 [ 385, %L5 ], [ 35, %bb2 ]     ; <i32> [#uses=1]
+  br label %L3
+
+L3:                                               ; preds = %L4, %bb2
+  %res.1 = phi i32 [ %res.0, %L4 ], [ 5, %bb2 ]   ; <i32> [#uses=1]
+  br label %L2
+
+L2:                                               ; preds = %L3, %bb2
+  %res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ]   ; <i32> [#uses=1]
+  %phitmp = mul i32 %res.2, 6                     ; <i32> [#uses=1]
+  br label %L1
+
+L1:                                               ; preds = %L2, %bb2
+  %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
+; PIC: addis r4, r4, ha16(Ltmp0-L0$pb)
+; PIC: li r6, lo16(Ltmp0-L0$pb)
+; PIC: add r4, r4, r6
+; PIC: stw r4
+; STATIC: li r5, lo16(Ltmp0)
+; STATIC: addis r5, r5, ha16(Ltmp0)
+; STATIC: stw r5
+  store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
+  ret i32 %res.3
+}
diff --git a/final/test/CodeGen/PowerPC/inlineasm-copy.ll b/final/test/CodeGen/PowerPC/inlineasm-copy.ll
new file mode 100644
index 00000000000..e1ff82d5f9b
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/inlineasm-copy.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc32 | not grep mr
+
+define i32 @test(i32 %Y, i32 %X) {
+entry:
+        %tmp = tail call i32 asm "foo $0", "=r"( )              ; <i32> [#uses=1]
+        ret i32 %tmp
+}
+
+define i32 @test2(i32 %Y, i32 %X) {
+entry:
+        %tmp1 = tail call i32 asm "foo $0, $1", "=r,r"( i32 %X )                ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
diff --git a/final/test/CodeGen/PowerPC/int-fp-conv-0.ll b/final/test/CodeGen/PowerPC/int-fp-conv-0.ll
new file mode 100644
index 00000000000..983d2b823f1
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/int-fp-conv-0.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=ppc64 > %t
+; RUN: grep  __floattitf %t
+; RUN: grep  __fixunstfti %t
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc64-apple-darwin9.2.0"
+
+define ppc_fp128 @foo(i128 %a) nounwind  {
+entry:
+	%tmp2829 = uitofp i128 %a to ppc_fp128		; <i64> [#uses=1]
+	ret ppc_fp128 %tmp2829
+}
+define i128 @boo(ppc_fp128 %a) nounwind  {
+entry:
+	%tmp2829 = fptoui ppc_fp128 %a to i128		; <i64> [#uses=1]
+	ret i128 %tmp2829
+}
diff --git a/final/test/CodeGen/PowerPC/int-fp-conv-1.ll b/final/test/CodeGen/PowerPC/int-fp-conv-1.ll
new file mode 100644
index 00000000000..6c827235192
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/int-fp-conv-1.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=ppc64 | grep __floatditf
+
+define i64 @__fixunstfdi(ppc_fp128 %a) nounwind  {
+entry:
+	%tmp1213 = uitofp i64 0 to ppc_fp128		; <ppc_fp128> [#uses=1]
+	%tmp15 = fsub ppc_fp128 %a, %tmp1213		; <ppc_fp128> [#uses=1]
+	%tmp2829 = fptoui ppc_fp128 %tmp15 to i32		; <i32> [#uses=1]
+	%tmp282930 = zext i32 %tmp2829 to i64		; <i64> [#uses=1]
+	%tmp32 = add i64 %tmp282930, 0		; <i64> [#uses=1]
+	ret i64 %tmp32
+}
diff --git a/final/test/CodeGen/PowerPC/invalid-memcpy.ll b/final/test/CodeGen/PowerPC/invalid-memcpy.ll
new file mode 100644
index 00000000000..3b1f3060a1c
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/invalid-memcpy.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
+
+; This testcase is invalid (the alignment specified for memcpy is 
+; greater than the alignment guaranteed for Qux or C.0.1173, but it
+; should compile, not crash the code generator.
+
+@C.0.1173 = external constant [33 x i8]         ; <[33 x i8]*> [#uses=1]
+
+define void @Bork() {
+entry:
+        %Qux = alloca [33 x i8]         ; <[33 x i8]*> [#uses=1]
+        %Qux1 = bitcast [33 x i8]* %Qux to i8*          ; <i8*> [#uses=1]
+        call void @llvm.memcpy.i64( i8* %Qux1, i8* getelementptr ([33 x i8]* @C.0.1173, i32 0, i32 0), i64 33, i32 8 )
+        ret void
+}
+
+declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
+
+
diff --git a/final/test/CodeGen/PowerPC/inverted-bool-compares.ll b/final/test/CodeGen/PowerPC/inverted-bool-compares.ll
new file mode 100644
index 00000000000..aa7e4d68602
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/inverted-bool-compares.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=ppc32 | not grep xori
+
+define i32 @test(i1 %B, i32* %P) {
+        br i1 %B, label %T, label %F
+
+T:              ; preds = %0
+        store i32 123, i32* %P
+        ret i32 0
+
+F:              ; preds = %0
+        ret i32 17
+}
+
diff --git a/final/test/CodeGen/PowerPC/ispositive.ll b/final/test/CodeGen/PowerPC/ispositive.ll
new file mode 100644
index 00000000000..4161e3438a4
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/ispositive.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN:   grep {srwi r3, r3, 31}
+
+define i32 @test1(i32 %X) {
+entry:
+        icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
+        zext i1 %0 to i32               ; <i32>:1 [#uses=1]
+        ret i32 %1
+}
+
diff --git a/final/test/CodeGen/PowerPC/itofp128.ll b/final/test/CodeGen/PowerPC/itofp128.ll
new file mode 100644
index 00000000000..6d9ef959039
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/itofp128.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc64
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc64-apple-darwin9.2.0"
+
+define i128 @__fixunstfti(ppc_fp128 %a) nounwind  {
+entry:
+        %tmp1213 = uitofp i128 0 to ppc_fp128           ; <ppc_fp128> [#uses=1]
+        %tmp15 = fsub ppc_fp128 %a, %tmp1213             ; <ppc_fp128> [#uses=1]
+        %tmp2829 = fptoui ppc_fp128 %tmp15 to i64               ; <i64> [#uses=1]
+        %tmp282930 = zext i64 %tmp2829 to i128          ; <i128> [#uses=1]
+        %tmp32 = add i128 %tmp282930, 0         ; <i128> [#uses=1]
+        ret i128 %tmp32
+}
diff --git a/final/test/CodeGen/PowerPC/lha.ll b/final/test/CodeGen/PowerPC/lha.ll
new file mode 100644
index 00000000000..3a100c1aae6
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/lha.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 | grep lha
+
+define i32 @test(i16* %a) {
+        %tmp.1 = load i16* %a           ; <i16> [#uses=1]
+        %tmp.2 = sext i16 %tmp.1 to i32         ; <i32> [#uses=1]
+        ret i32 %tmp.2
+}
+
diff --git a/final/test/CodeGen/PowerPC/load-constant-addr.ll b/final/test/CodeGen/PowerPC/load-constant-addr.ll
new file mode 100644
index 00000000000..f1d061c1ad5
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/load-constant-addr.ll
@@ -0,0 +1,9 @@
+; Should fold the ori into the lfs.
+; RUN: llc < %s -march=ppc32 | grep lfs
+; RUN: llc < %s -march=ppc32 | not grep ori
+
+define float @test() {
+        %tmp.i = load float* inttoptr (i32 186018016 to float*)         ; <float> [#uses=1]
+        ret float %tmp.i
+}
+
diff --git a/final/test/CodeGen/PowerPC/long-compare.ll b/final/test/CodeGen/PowerPC/long-compare.ll
new file mode 100644
index 00000000000..94c2526cf5b
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/long-compare.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc32 | grep cntlzw 
+; RUN: llc < %s -march=ppc32 | not grep xori 
+; RUN: llc < %s -march=ppc32 | not grep {li }
+; RUN: llc < %s -march=ppc32 | not grep {mr }
+
+define i1 @test(i64 %x) {
+  %tmp = icmp ult i64 %x, 4294967296
+  ret i1 %tmp
+}
diff --git a/final/test/CodeGen/PowerPC/longdbl-truncate.ll b/final/test/CodeGen/PowerPC/longdbl-truncate.ll
new file mode 100644
index 00000000000..e5f63c64418
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/longdbl-truncate.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin8"
+
+define double @SolveCubic(ppc_fp128 %X) {
+entry:
+	%Y = fptrunc ppc_fp128 %X to double
+	ret double %Y
+}
diff --git a/final/test/CodeGen/PowerPC/lsr-postinc-pos.ll b/final/test/CodeGen/PowerPC/lsr-postinc-pos.ll
new file mode 100644
index 00000000000..f441e42da2f
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/lsr-postinc-pos.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -print-lsr-output |& FileCheck %s
+
+; The icmp is a post-inc use, and the increment is in %bb11, but the
+; scevgep needs to be inserted in %bb so that it is dominated by %t.
+
+; CHECK: %t = load i8** undef
+; CHECK: %scevgep = getelementptr i8* %t, i32 %lsr.iv.next
+; CHECK: %c1 = icmp ult i8* %scevgep, undef
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+target triple = "powerpc-apple-darwin9"
+
+define void @foo() nounwind {
+entry:
+  br label %bb11
+
+bb11:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %bb ] ; <i32> [#uses=3]
+  %ii = shl i32 %i, 2                       ; <i32> [#uses=1]
+  %c0 = icmp eq i32 %i, undef                ; <i1> [#uses=1]
+  br i1 %c0, label %bb13, label %bb
+
+bb:
+  %t = load i8** undef, align 16                ; <i8*> [#uses=1]
+  %p = getelementptr i8* %t, i32 %ii ; <i8*> [#uses=1]
+  %c1 = icmp ult i8* %p, undef          ; <i1> [#uses=1]
+  %i.next = add i32 %i, 1                        ; <i32> [#uses=1]
+  br i1 %c1, label %bb11, label %bb13
+
+bb13:
+  unreachable
+}
diff --git a/final/test/CodeGen/PowerPC/mask64.ll b/final/test/CodeGen/PowerPC/mask64.ll
new file mode 100644
index 00000000000..139621af1f2
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/mask64.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc64-apple-darwin9.2.0"
+	%struct.re_pattern_buffer = type <{ i8*, i64, i8, [7 x i8] }>
+
+define i32 @xre_search_2(%struct.re_pattern_buffer* %bufp, i32 %range) nounwind  {
+entry:
+	br i1 false, label %bb16, label %bb49
+
+bb16:		; preds = %entry
+	%tmp19 = load i8** null, align 1		; <i8*> [#uses=1]
+	%tmp21 = load i8* %tmp19, align 1		; <i8> [#uses=1]
+	switch i8 %tmp21, label %bb49 [
+		 i8 0, label %bb45
+		 i8 1, label %bb34
+	]
+
+bb34:		; preds = %bb16
+	ret i32 0
+
+bb45:		; preds = %bb16
+	ret i32 -1
+
+bb49:		; preds = %bb16, %entry
+	ret i32 0
+}
diff --git a/final/test/CodeGen/PowerPC/mem-rr-addr-mode.ll b/final/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
new file mode 100644
index 00000000000..5661ef9768d
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/mem-rr-addr-mode.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep li.*16
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep addi
+
+; Codegen lvx (R+16) as t = li 16,  lvx t,R
+; This shares the 16 between the two loads.
+
+define void @func(<4 x float>* %a, <4 x float>* %b) {
+        %tmp1 = getelementptr <4 x float>* %b, i32 1            ; <<4 x float>*> [#uses=1]
+        %tmp = load <4 x float>* %tmp1          ; <<4 x float>> [#uses=1]
+        %tmp3 = getelementptr <4 x float>* %a, i32 1            ; <<4 x float>*> [#uses=1]
+        %tmp4 = load <4 x float>* %tmp3         ; <<4 x float>> [#uses=1]
+        %tmp5 = fmul <4 x float> %tmp, %tmp4             ; <<4 x float>> [#uses=1]
+        %tmp8 = load <4 x float>* %b            ; <<4 x float>> [#uses=1]
+        %tmp9 = fadd <4 x float> %tmp5, %tmp8            ; <<4 x float>> [#uses=1]
+        store <4 x float> %tmp9, <4 x float>* %a
+        ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/mem_update.ll b/final/test/CodeGen/PowerPC/mem_update.ll
new file mode 100644
index 00000000000..17e7e2849c9
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/mem_update.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=ppc32 -enable-ppc-preinc | \
+; RUN:   not grep addi
+; RUN: llc < %s -march=ppc64 -enable-ppc-preinc | \
+; RUN:   not grep addi
+
+@Glob = global i64 4
+
+define i32* @test0(i32* %X, i32* %dest) nounwind {
+	%Y = getelementptr i32* %X, i32 4
+	%A = load i32* %Y
+	store i32 %A, i32* %dest
+	ret i32* %Y
+}
+
+define i32* @test1(i32* %X, i32* %dest) nounwind {
+	%Y = getelementptr i32* %X, i32 4
+	%A = load i32* %Y
+	store i32 %A, i32* %dest
+	ret i32* %Y
+}
+
+define i16* @test2(i16* %X, i32* %dest) nounwind {
+	%Y = getelementptr i16* %X, i32 4
+	%A = load i16* %Y
+	%B = sext i16 %A to i32
+	store i32 %B, i32* %dest
+	ret i16* %Y
+}
+
+define i16* @test3(i16* %X, i32* %dest) nounwind {
+	%Y = getelementptr i16* %X, i32 4
+	%A = load i16* %Y
+	%B = zext i16 %A to i32
+	store i32 %B, i32* %dest
+	ret i16* %Y
+}
+
+define i16* @test3a(i16* %X, i64* %dest) nounwind {
+	%Y = getelementptr i16* %X, i32 4
+	%A = load i16* %Y
+	%B = sext i16 %A to i64
+	store i64 %B, i64* %dest
+	ret i16* %Y
+}
+
+define i64* @test4(i64* %X, i64* %dest) nounwind {
+	%Y = getelementptr i64* %X, i32 4
+	%A = load i64* %Y
+	store i64 %A, i64* %dest
+	ret i64* %Y
+}
+
+define i16* @test5(i16* %X) nounwind {
+	%Y = getelementptr i16* %X, i32 4
+	store i16 7, i16* %Y
+	ret i16* %Y
+}
+
+define i64* @test6(i64* %X, i64 %A) nounwind {
+	%Y = getelementptr i64* %X, i32 4
+	store i64 %A, i64* %Y
+	ret i64* %Y
+}
+
+define i64* @test7(i64* %X, i64 %A) nounwind {
+	store i64 %A, i64* @Glob
+	ret i64* @Glob
+}
diff --git a/final/test/CodeGen/PowerPC/mul-neg-power-2.ll b/final/test/CodeGen/PowerPC/mul-neg-power-2.ll
new file mode 100644
index 00000000000..9688d6e3d51
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/mul-neg-power-2.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 | not grep mul
+
+define i32 @test1(i32 %a) {
+        %tmp.1 = mul i32 %a, -2         ; <i32> [#uses=1]
+        %tmp.2 = add i32 %tmp.1, 63             ; <i32> [#uses=1]
+        ret i32 %tmp.2
+}
+
diff --git a/final/test/CodeGen/PowerPC/mul-with-overflow.ll b/final/test/CodeGen/PowerPC/mul-with-overflow.ll
new file mode 100644
index 00000000000..f03e3cb5cd4
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/mul-with-overflow.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ppc32
+
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+define i1 @a(i32 %x) zeroext nounwind {
+  %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
+  %obil = extractvalue {i32, i1} %res, 1
+  ret i1 %obil
+}
+
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+define i1 @b(i32 %x) zeroext nounwind {
+  %res = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %x, i32 3)
+  %obil = extractvalue {i32, i1} %res, 1
+  ret i1 %obil
+}
diff --git a/final/test/CodeGen/PowerPC/mulhs.ll b/final/test/CodeGen/PowerPC/mulhs.ll
new file mode 100644
index 00000000000..9ab8d997c0d
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/mulhs.ll
@@ -0,0 +1,17 @@
+; All of these ands and shifts should be folded into rlwimi's
+; RUN: llc < %s -march=ppc32 -o %t
+; RUN: not grep mulhwu %t
+; RUN: not grep srawi %t 
+; RUN: not grep add %t 
+; RUN: grep mulhw %t | count 1
+
+define i32 @mulhs(i32 %a, i32 %b) {
+entry:
+        %tmp.1 = sext i32 %a to i64             ; <i64> [#uses=1]
+        %tmp.3 = sext i32 %b to i64             ; <i64> [#uses=1]
+        %tmp.4 = mul i64 %tmp.3, %tmp.1         ; <i64> [#uses=1]
+        %tmp.6 = lshr i64 %tmp.4, 32            ; <i64> [#uses=1]
+        %tmp.7 = trunc i64 %tmp.6 to i32                ; <i32> [#uses=1]
+        ret i32 %tmp.7
+}
+
diff --git a/final/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll b/final/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
new file mode 100644
index 00000000000..659cdf74d02
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/mult-alt-generic-powerpc.ll
@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=ppc32
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/final/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll b/final/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
new file mode 100644
index 00000000000..3da06f65db8
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/mult-alt-generic-powerpc64.ll
@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=ppc64
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/final/test/CodeGen/PowerPC/multiple-return-values.ll b/final/test/CodeGen/PowerPC/multiple-return-values.ll
new file mode 100644
index 00000000000..b9317f90c1d
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/multiple-return-values.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=ppc32
+; RUN: llc < %s -march=ppc64
+
+define {i64, float} @bar(i64 %a, float %b) {
+        %y = add i64 %a, 7
+        %z = fadd float %b, 7.0
+	ret i64 %y, float %z
+}
+
+define i64 @foo() {
+	%M = call {i64, float} @bar(i64 21, float 21.0)
+        %N = getresult {i64, float} %M, 0
+        %O = getresult {i64, float} %M, 1
+        %P = fptosi float %O to i64
+        %Q = add i64 %P, %N
+	ret i64 %Q
+}
diff --git a/final/test/CodeGen/PowerPC/neg.ll b/final/test/CodeGen/PowerPC/neg.ll
new file mode 100644
index 00000000000..c673912d2ef
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/neg.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=ppc32 | grep neg
+
+define i32 @test(i32 %X) {
+        %Y = sub i32 0, %X              ; <i32> [#uses=1]
+        ret i32 %Y
+}
+
diff --git a/final/test/CodeGen/PowerPC/no-dead-strip.ll b/final/test/CodeGen/PowerPC/no-dead-strip.ll
new file mode 100644
index 00000000000..34594132530
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/no-dead-strip.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s | grep {no_dead_strip.*_X}
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "powerpc-apple-darwin8.8.0"
+@X = weak global i32 0          ; <i32*> [#uses=1]
+@.str = internal constant [4 x i8] c"t.c\00", section "llvm.metadata"          ; <[4 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i32* @X to i8*) ], section "llvm.metadata"       ; <[1 x i8*]*> [#uses=0]
+
diff --git a/final/test/CodeGen/PowerPC/or-addressing-mode.ll b/final/test/CodeGen/PowerPC/or-addressing-mode.ll
new file mode 100644
index 00000000000..e50374e3069
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/or-addressing-mode.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 | not grep ori
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 | not grep rlwimi
+
+define i32 @test1(i8* %P) {
+        %tmp.2.i = ptrtoint i8* %P to i32               ; <i32> [#uses=2]
+        %tmp.4.i = and i32 %tmp.2.i, -65536             ; <i32> [#uses=1]
+        %tmp.10.i = lshr i32 %tmp.2.i, 5                ; <i32> [#uses=1]
+        %tmp.11.i = and i32 %tmp.10.i, 2040             ; <i32> [#uses=1]
+        %tmp.13.i = or i32 %tmp.11.i, %tmp.4.i          ; <i32> [#uses=1]
+        %tmp.14.i = inttoptr i32 %tmp.13.i to i32*              ; <i32*> [#uses=1]
+        %tmp.3 = load i32* %tmp.14.i            ; <i32> [#uses=1]
+        ret i32 %tmp.3
+}
+
+define i32 @test2(i32 %P) {
+        %tmp.2 = shl i32 %P, 4          ; <i32> [#uses=1]
+        %tmp.3 = or i32 %tmp.2, 2               ; <i32> [#uses=1]
+        %tmp.4 = inttoptr i32 %tmp.3 to i32*            ; <i32*> [#uses=1]
+        %tmp.5 = load i32* %tmp.4               ; <i32> [#uses=1]
+        ret i32 %tmp.5
+}
+
diff --git a/final/test/CodeGen/PowerPC/ppc-prologue.ll b/final/test/CodeGen/PowerPC/ppc-prologue.ll
new file mode 100644
index 00000000000..2ebfd3c319f
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/ppc-prologue.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin8 -disable-fp-elim | FileCheck %s
+
+define i32 @_Z4funci(i32 %a) ssp {
+; CHECK:       mflr r0
+; CHECK-NEXT:  stw r31, -4(r1)
+; CHECK-NEXT:  stw r0, 8(r1)
+; CHECK-NEXT:  stwu r1, -80(r1)
+; CHECK-NEXT: Ltmp0:
+; CHECK-NEXT:  mr r31, r1
+; CHECK-NEXT: Ltmp1:
+entry:
+  %a_addr = alloca i32                            ; <i32*> [#uses=2]
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 %a, i32* %a_addr
+  %1 = call i32 @_Z3barPi(i32* %a_addr)           ; <i32> [#uses=1]
+  store i32 %1, i32* %0, align 4
+  %2 = load i32* %0, align 4                      ; <i32> [#uses=1]
+  store i32 %2, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval                    ; <i32> [#uses=1]
+  ret i32 %retval1
+}
+
+declare i32 @_Z3barPi(i32*)
diff --git a/final/test/CodeGen/PowerPC/ppcf128-1-opt.ll b/final/test/CodeGen/PowerPC/ppcf128-1-opt.ll
new file mode 100644
index 00000000000..2fc17209ccf
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/ppcf128-1-opt.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s > %t
+; ModuleID = '<stdin>'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin8"
+
+define ppc_fp128 @plus(ppc_fp128 %x, ppc_fp128 %y) {
+entry:
+	%tmp3 = fadd ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
+	ret ppc_fp128 %tmp3
+}
+
+define ppc_fp128 @minus(ppc_fp128 %x, ppc_fp128 %y) {
+entry:
+	%tmp3 = fsub ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
+	ret ppc_fp128 %tmp3
+}
+
+define ppc_fp128 @times(ppc_fp128 %x, ppc_fp128 %y) {
+entry:
+	%tmp3 = fmul ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
+	ret ppc_fp128 %tmp3
+}
+
+define ppc_fp128 @divide(ppc_fp128 %x, ppc_fp128 %y) {
+entry:
+	%tmp3 = fdiv ppc_fp128 %x, %y		; <ppc_fp128> [#uses=1]
+	ret ppc_fp128 %tmp3
+}
+
diff --git a/final/test/CodeGen/PowerPC/ppcf128-1.ll b/final/test/CodeGen/PowerPC/ppcf128-1.ll
new file mode 100644
index 00000000000..1047fe5d3ba
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/ppcf128-1.ll
@@ -0,0 +1,92 @@
+; RUN: opt < %s -std-compile-opts | llc > %t
+; ModuleID = 'ld3.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin8"
+
+define ppc_fp128 @plus(ppc_fp128 %x, ppc_fp128 %y) {
+entry:
+	%x_addr = alloca ppc_fp128		; <ppc_fp128*> [#uses=2]
+	%y_addr = alloca ppc_fp128		; <ppc_fp128*> [#uses=2]
+	%retval = alloca ppc_fp128, align 16		; <ppc_fp128*> [#uses=2]
+	%tmp = alloca ppc_fp128, align 16		; <ppc_fp128*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store ppc_fp128 %x, ppc_fp128* %x_addr
+	store ppc_fp128 %y, ppc_fp128* %y_addr
+	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp3 = fadd ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
+	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load ppc_fp128* %retval		; <ppc_fp128> [#uses=1]
+	ret ppc_fp128 %retval5
+}
+
+define ppc_fp128 @minus(ppc_fp128 %x, ppc_fp128 %y) {
+entry:
+	%x_addr = alloca ppc_fp128		; <ppc_fp128*> [#uses=2]
+	%y_addr = alloca ppc_fp128		; <ppc_fp128*> [#uses=2]
+	%retval = alloca ppc_fp128, align 16		; <ppc_fp128*> [#uses=2]
+	%tmp = alloca ppc_fp128, align 16		; <ppc_fp128*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store ppc_fp128 %x, ppc_fp128* %x_addr
+	store ppc_fp128 %y, ppc_fp128* %y_addr
+	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp3 = fsub ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
+	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load ppc_fp128* %retval		; <ppc_fp128> [#uses=1]
+	ret ppc_fp128 %retval5
+}
+
+define ppc_fp128 @times(ppc_fp128 %x, ppc_fp128 %y) {
+entry:
+	%x_addr = alloca ppc_fp128		; <ppc_fp128*> [#uses=2]
+	%y_addr = alloca ppc_fp128		; <ppc_fp128*> [#uses=2]
+	%retval = alloca ppc_fp128, align 16		; <ppc_fp128*> [#uses=2]
+	%tmp = alloca ppc_fp128, align 16		; <ppc_fp128*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store ppc_fp128 %x, ppc_fp128* %x_addr
+	store ppc_fp128 %y, ppc_fp128* %y_addr
+	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp3 = fmul ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
+	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load ppc_fp128* %retval		; <ppc_fp128> [#uses=1]
+	ret ppc_fp128 %retval5
+}
+
+define ppc_fp128 @divide(ppc_fp128 %x, ppc_fp128 %y) {
+entry:
+	%x_addr = alloca ppc_fp128		; <ppc_fp128*> [#uses=2]
+	%y_addr = alloca ppc_fp128		; <ppc_fp128*> [#uses=2]
+	%retval = alloca ppc_fp128, align 16		; <ppc_fp128*> [#uses=2]
+	%tmp = alloca ppc_fp128, align 16		; <ppc_fp128*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store ppc_fp128 %x, ppc_fp128* %x_addr
+	store ppc_fp128 %y, ppc_fp128* %y_addr
+	%tmp1 = load ppc_fp128* %x_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp2 = load ppc_fp128* %y_addr, align 16		; <ppc_fp128> [#uses=1]
+	%tmp3 = fdiv ppc_fp128 %tmp1, %tmp2		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp3, ppc_fp128* %tmp, align 16
+	%tmp4 = load ppc_fp128* %tmp, align 16		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp4, ppc_fp128* %retval, align 16
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load ppc_fp128* %retval		; <ppc_fp128> [#uses=1]
+	ret ppc_fp128 %retval5
+}
diff --git a/final/test/CodeGen/PowerPC/ppcf128-2.ll b/final/test/CodeGen/PowerPC/ppcf128-2.ll
new file mode 100644
index 00000000000..7eee3542d3b
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/ppcf128-2.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc64
+
+define i64 @__fixtfdi(ppc_fp128 %a) nounwind  {
+entry:
+        br i1 false, label %bb, label %bb8
+bb:             ; preds = %entry
+        %tmp5 = fsub ppc_fp128 0xM80000000000000000000000000000000, %a           ; <ppc_fp128> [#uses=1]
+        %tmp6 = tail call i64 @__fixunstfdi( ppc_fp128 %tmp5 ) nounwind                 ; <i64> [#uses=0]
+        ret i64 0
+bb8:            ; preds = %entry
+        ret i64 0
+}
+
+declare i64 @__fixunstfdi(ppc_fp128)
diff --git a/final/test/CodeGen/PowerPC/ppcf128-3.ll b/final/test/CodeGen/PowerPC/ppcf128-3.ll
new file mode 100644
index 00000000000..5043b622584
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/ppcf128-3.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=ppc32
+	%struct.stp_sequence = type { double, double }
+
+define i32 @stp_sequence_set_short_data(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
+entry:
+	%tmp1112 = sitofp i16 0 to ppc_fp128		; <ppc_fp128> [#uses=1]
+	%tmp13 = call i32 (...)* @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind 		; <i32> [#uses=0]
+	ret i32 0
+}
+
+define i32 @stp_sequence_set_short_data2(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
+entry:
+	%tmp1112 = sitofp i8 0 to ppc_fp128		; <ppc_fp128> [#uses=1]
+	%tmp13 = call i32 (...)* @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind 		; <i32> [#uses=0]
+	ret i32 0
+}
+
+define i32 @stp_sequence_set_short_data3(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
+entry:
+	%tmp1112 = uitofp i16 0 to ppc_fp128		; <ppc_fp128> [#uses=1]
+	%tmp13 = call i32 (...)* @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind 		; <i32> [#uses=0]
+	ret i32 0
+}
+
+define i32 @stp_sequence_set_short_data4(%struct.stp_sequence* %sequence, i32 %count, i16* %data) {
+entry:
+	%tmp1112 = uitofp i8 0 to ppc_fp128		; <ppc_fp128> [#uses=1]
+	%tmp13 = call i32 (...)* @__inline_isfinite( ppc_fp128 %tmp1112 ) nounwind 		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @__inline_isfinite(...)
diff --git a/final/test/CodeGen/PowerPC/ppcf128-4.ll b/final/test/CodeGen/PowerPC/ppcf128-4.ll
new file mode 100644
index 00000000000..104a25eb43f
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/ppcf128-4.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32
+
+define ppc_fp128 @__floatditf(i64 %u) nounwind  {
+entry:
+        %tmp6 = fmul ppc_fp128 0xM00000000000000000000000000000000, 0xM41F00000000000000000000000000000
+        %tmp78 = trunc i64 %u to i32
+        %tmp789 = uitofp i32 %tmp78 to ppc_fp128
+        %tmp11 = fadd ppc_fp128 %tmp789, %tmp6
+        ret ppc_fp128 %tmp11
+}
diff --git a/final/test/CodeGen/PowerPC/pr3711_widen_bit.ll b/final/test/CodeGen/PowerPC/pr3711_widen_bit.ll
new file mode 100644
index 00000000000..7abdedad980
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/pr3711_widen_bit.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5
+
+; Test that causes a abort in expanding a bit convert due to a missing support
+; for widening.
+
+define i32 @main() nounwind {
+entry:
+	br i1 icmp ne (i32 trunc (i64 bitcast (<2 x i32> <i32 2, i32 2> to i64) to i32), i32 2), label %bb, label %bb1
+
+bb:		; preds = %entry
+	tail call void @abort() noreturn nounwind
+	unreachable
+
+bb1:		; preds = %entry
+	ret i32 0
+}
+
+declare void @abort() noreturn nounwind
diff --git a/final/test/CodeGen/PowerPC/private.ll b/final/test/CodeGen/PowerPC/private.ll
new file mode 100644
index 00000000000..f9405f6af2f
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/private.ll
@@ -0,0 +1,24 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu > %t
+; RUN: grep .Lfoo: %t
+; RUN: grep bl.*\.Lfoo %t
+; RUN: grep .Lbaz: %t
+; RUN: grep lis.*\.Lbaz %t
+; RUN: llc < %s -mtriple=powerpc-apple-darwin > %t
+; RUN: grep L_foo: %t
+; RUN: grep bl.*\L_foo %t
+; RUN: grep L_baz: %t
+; RUN: grep lis.*\L_baz %t
+
+define private void @foo() nounwind {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() nounwind {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}
diff --git a/final/test/CodeGen/PowerPC/reg-coalesce-simple.ll b/final/test/CodeGen/PowerPC/reg-coalesce-simple.ll
new file mode 100644
index 00000000000..e0ddb4250fd
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/reg-coalesce-simple.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=ppc32  | not grep or
+
+%struct.foo = type { i32, i32, [0 x i8] }
+
+define i32 @test(%struct.foo* %X) nounwind {
+        %tmp1 = getelementptr %struct.foo* %X, i32 0, i32 2, i32 100            ; <i8*> [#uses=1]
+        %tmp = load i8* %tmp1           ; <i8> [#uses=1]
+        %tmp2 = zext i8 %tmp to i32             ; <i32> [#uses=1]
+        ret i32 %tmp2
+}
+
+
diff --git a/final/test/CodeGen/PowerPC/retaddr.ll b/final/test/CodeGen/PowerPC/retaddr.ll
new file mode 100644
index 00000000000..cf16b4c26f6
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/retaddr.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=ppc32 | grep mflr
+; RUN: llc < %s -march=ppc32 | grep lwz
+; RUN: llc < %s -march=ppc64 | grep {ld r., 16(r1)}
+
+target triple = "powerpc-apple-darwin8"
+
+define void @foo(i8** %X) nounwind {
+entry:
+	%tmp = tail call i8* @llvm.returnaddress( i32 0 )		; <i8*> [#uses=1]
+	store i8* %tmp, i8** %X, align 4
+	ret void
+}
+
+declare i8* @llvm.returnaddress(i32)
+
diff --git a/final/test/CodeGen/PowerPC/return-val-i128.ll b/final/test/CodeGen/PowerPC/return-val-i128.ll
new file mode 100644
index 00000000000..e14a43809a7
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/return-val-i128.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=ppc64
+
+define i128 @__fixsfdi(float %a) {
+entry:
+	%a_addr = alloca float		; <float*> [#uses=4]
+	%retval = alloca i128, align 16		; <i128*> [#uses=2]
+	%tmp = alloca i128, align 16		; <i128*> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float %a, float* %a_addr
+	%tmp1 = load float* %a_addr, align 4		; <float> [#uses=1]
+	%tmp2 = fcmp olt float %tmp1, 0.000000e+00		; <i1> [#uses=1]
+	%tmp23 = zext i1 %tmp2 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp23, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb8
+bb:		; preds = %entry
+	%tmp4 = load float* %a_addr, align 4		; <float> [#uses=1]
+	%tmp5 = fsub float -0.000000e+00, %tmp4		; <float> [#uses=1]
+	%tmp6 = call i128 @__fixunssfDI( float %tmp5 ) nounwind 		; <i128> [#uses=1]
+	%tmp7 = sub i128 0, %tmp6		; <i128> [#uses=1]
+	store i128 %tmp7, i128* %tmp, align 16
+	br label %bb11
+bb8:		; preds = %entry
+	%tmp9 = load float* %a_addr, align 4		; <float> [#uses=1]
+	%tmp10 = call i128 @__fixunssfDI( float %tmp9 ) nounwind 		; <i128> [#uses=1]
+	store i128 %tmp10, i128* %tmp, align 16
+	br label %bb11
+bb11:		; preds = %bb8, %bb
+	%tmp12 = load i128* %tmp, align 16		; <i128> [#uses=1]
+	store i128 %tmp12, i128* %retval, align 16
+	br label %return
+return:		; preds = %bb11
+	%retval13 = load i128* %retval		; <i128> [#uses=1]
+	ret i128 %retval13
+}
+
+declare i128 @__fixunssfDI(float)
diff --git a/final/test/CodeGen/PowerPC/rlwimi-commute.ll b/final/test/CodeGen/PowerPC/rlwimi-commute.ll
new file mode 100644
index 00000000000..6410c63234d
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/rlwimi-commute.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=ppc32 | grep rlwimi
+; RUN: llc < %s -march=ppc32 | not grep {or }
+
+; Make sure there is no register-register copies here.
+
+define void @test1(i32* %A, i32* %B, i32* %D, i32* %E) {
+	%A.upgrd.1 = load i32* %A		; <i32> [#uses=2]
+	%B.upgrd.2 = load i32* %B		; <i32> [#uses=1]
+	%X = and i32 %A.upgrd.1, 15		; <i32> [#uses=1]
+	%Y = and i32 %B.upgrd.2, -16		; <i32> [#uses=1]
+	%Z = or i32 %X, %Y		; <i32> [#uses=1]
+	store i32 %Z, i32* %D
+	store i32 %A.upgrd.1, i32* %E
+	ret void
+}
+
+define void @test2(i32* %A, i32* %B, i32* %D, i32* %E) {
+	%A.upgrd.3 = load i32* %A		; <i32> [#uses=1]
+	%B.upgrd.4 = load i32* %B		; <i32> [#uses=2]
+	%X = and i32 %A.upgrd.3, 15		; <i32> [#uses=1]
+	%Y = and i32 %B.upgrd.4, -16		; <i32> [#uses=1]
+	%Z = or i32 %X, %Y		; <i32> [#uses=1]
+	store i32 %Z, i32* %D
+	store i32 %B.upgrd.4, i32* %E
+	ret void
+}
+
+define i32 @test3(i32 %a, i32 %b) {
+	%tmp.1 = and i32 %a, 15		; <i32> [#uses=1]
+	%tmp.3 = and i32 %b, 240		; <i32> [#uses=1]
+	%tmp.4 = or i32 %tmp.3, %tmp.1		; <i32> [#uses=1]
+	ret i32 %tmp.4
+}
+
diff --git a/final/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll b/final/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll
new file mode 100644
index 00000000000..3dc8061a52a
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | FileCheck %s
+; Formerly dropped the RHS of %tmp6 when constructing rlwimi.
+; 7346117
+
+@foo = external global i32
+
+define void @xxx(i32 %a, i32 %b, i32 %c, i32 %d) nounwind optsize {
+; CHECK: _xxx:
+; CHECK: or
+; CHECK: and
+; CHECK: rlwimi
+entry:
+  %tmp0 = ashr i32 %d, 31
+  %tmp1 = and i32 %tmp0, 255
+  %tmp2 = xor i32 %tmp1, 255
+  %tmp3 = ashr i32 %b, 31
+  %tmp4 = ashr i32 %a, 4
+  %tmp5 = or i32 %tmp3, %tmp4
+  %tmp6 = and i32 %tmp2, %tmp5
+  %tmp7 = shl i32 %c, 8
+  %tmp8 = or i32 %tmp6, %tmp7
+  store i32 %tmp8, i32* @foo, align 4
+  br label %return
+
+return:
+  ret void
+; CHECK: blr
+}
diff --git a/final/test/CodeGen/PowerPC/rlwimi.ll b/final/test/CodeGen/PowerPC/rlwimi.ll
new file mode 100644
index 00000000000..556ca3d4a8c
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/rlwimi.ll
@@ -0,0 +1,70 @@
+; All of these ands and shifts should be folded into rlwimi's
+; RUN: llc < %s -march=ppc32 | not grep and
+; RUN: llc < %s -march=ppc32 | grep rlwimi | count 8
+
+define i32 @test1(i32 %x, i32 %y) {
+entry:
+	%tmp.3 = shl i32 %x, 16		; <i32> [#uses=1]
+	%tmp.7 = and i32 %y, 65535		; <i32> [#uses=1]
+	%tmp.9 = or i32 %tmp.7, %tmp.3		; <i32> [#uses=1]
+	ret i32 %tmp.9
+}
+
+define i32 @test2(i32 %x, i32 %y) {
+entry:
+	%tmp.7 = and i32 %x, 65535		; <i32> [#uses=1]
+	%tmp.3 = shl i32 %y, 16		; <i32> [#uses=1]
+	%tmp.9 = or i32 %tmp.7, %tmp.3		; <i32> [#uses=1]
+	ret i32 %tmp.9
+}
+
+define i32 @test3(i32 %x, i32 %y) {
+entry:
+	%tmp.3 = lshr i32 %x, 16		; <i32> [#uses=1]
+	%tmp.6 = and i32 %y, -65536		; <i32> [#uses=1]
+	%tmp.7 = or i32 %tmp.6, %tmp.3		; <i32> [#uses=1]
+	ret i32 %tmp.7
+}
+
+define i32 @test4(i32 %x, i32 %y) {
+entry:
+	%tmp.6 = and i32 %x, -65536		; <i32> [#uses=1]
+	%tmp.3 = lshr i32 %y, 16		; <i32> [#uses=1]
+	%tmp.7 = or i32 %tmp.6, %tmp.3		; <i32> [#uses=1]
+	ret i32 %tmp.7
+}
+
+define i32 @test5(i32 %x, i32 %y) {
+entry:
+	%tmp.3 = shl i32 %x, 1		; <i32> [#uses=1]
+	%tmp.4 = and i32 %tmp.3, -65536		; <i32> [#uses=1]
+	%tmp.7 = and i32 %y, 65535		; <i32> [#uses=1]
+	%tmp.9 = or i32 %tmp.4, %tmp.7		; <i32> [#uses=1]
+	ret i32 %tmp.9
+}
+
+define i32 @test6(i32 %x, i32 %y) {
+entry:
+	%tmp.7 = and i32 %x, 65535		; <i32> [#uses=1]
+	%tmp.3 = shl i32 %y, 1		; <i32> [#uses=1]
+	%tmp.4 = and i32 %tmp.3, -65536		; <i32> [#uses=1]
+	%tmp.9 = or i32 %tmp.4, %tmp.7		; <i32> [#uses=1]
+	ret i32 %tmp.9
+}
+
+define i32 @test7(i32 %x, i32 %y) {
+entry:
+	%tmp.2 = and i32 %x, -65536		; <i32> [#uses=1]
+	%tmp.5 = and i32 %y, 65535		; <i32> [#uses=1]
+	%tmp.7 = or i32 %tmp.5, %tmp.2		; <i32> [#uses=1]
+	ret i32 %tmp.7
+}
+
+define i32 @test8(i32 %bar) {
+entry:
+	%tmp.3 = shl i32 %bar, 1		; <i32> [#uses=1]
+	%tmp.4 = and i32 %tmp.3, 2		; <i32> [#uses=1]
+	%tmp.6 = and i32 %bar, -3		; <i32> [#uses=1]
+	%tmp.7 = or i32 %tmp.4, %tmp.6		; <i32> [#uses=1]
+	ret i32 %tmp.7
+}
diff --git a/final/test/CodeGen/PowerPC/rlwimi2.ll b/final/test/CodeGen/PowerPC/rlwimi2.ll
new file mode 100644
index 00000000000..1bee4e03f1b
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/rlwimi2.ll
@@ -0,0 +1,29 @@
+; All of these ands and shifts should be folded into rlwimi's
+; RUN: llc < %s -march=ppc32 -o %t
+; RUN: grep rlwimi %t | count 3
+; RUN: grep srwi   %t | count 1
+; RUN: not grep slwi %t
+
+define i16 @test1(i32 %srcA, i32 %srcB, i32 %alpha) nounwind {
+entry:
+	%tmp.1 = shl i32 %srcA, 15		; <i32> [#uses=1]
+	%tmp.4 = and i32 %tmp.1, 32505856		; <i32> [#uses=1]
+	%tmp.6 = and i32 %srcA, 31775		; <i32> [#uses=1]
+	%tmp.7 = or i32 %tmp.4, %tmp.6		; <i32> [#uses=1]
+	%tmp.9 = shl i32 %srcB, 15		; <i32> [#uses=1]
+	%tmp.12 = and i32 %tmp.9, 32505856		; <i32> [#uses=1]
+	%tmp.14 = and i32 %srcB, 31775		; <i32> [#uses=1]
+	%tmp.15 = or i32 %tmp.12, %tmp.14		; <i32> [#uses=1]
+	%tmp.18 = mul i32 %tmp.7, %alpha		; <i32> [#uses=1]
+	%tmp.20 = sub i32 32, %alpha		; <i32> [#uses=1]
+	%tmp.22 = mul i32 %tmp.15, %tmp.20		; <i32> [#uses=1]
+	%tmp.23 = add i32 %tmp.22, %tmp.18		; <i32> [#uses=2]
+	%tmp.27 = lshr i32 %tmp.23, 5		; <i32> [#uses=1]
+	%tmp.28 = trunc i32 %tmp.27 to i16		; <i16> [#uses=1]
+	%tmp.29 = and i16 %tmp.28, 31775		; <i16> [#uses=1]
+	%tmp.33 = lshr i32 %tmp.23, 20		; <i32> [#uses=1]
+	%tmp.34 = trunc i32 %tmp.33 to i16		; <i16> [#uses=1]
+	%tmp.35 = and i16 %tmp.34, 992		; <i16> [#uses=1]
+	%tmp.36 = or i16 %tmp.29, %tmp.35		; <i16> [#uses=1]
+	ret i16 %tmp.36
+}
diff --git a/final/test/CodeGen/PowerPC/rlwimi3.ll b/final/test/CodeGen/PowerPC/rlwimi3.ll
new file mode 100644
index 00000000000..05d37bf1625
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/rlwimi3.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=ppc32 -stats |& \
+; RUN:   grep {Number of machine instrs printed} | grep 12
+
+define i16 @Trans16Bit(i32 %srcA, i32 %srcB, i32 %alpha) {
+	%tmp1 = shl i32 %srcA, 15		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 32505856		; <i32> [#uses=1]
+	%tmp4 = and i32 %srcA, 31775		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp2, %tmp4		; <i32> [#uses=1]
+	%tmp7 = shl i32 %srcB, 15		; <i32> [#uses=1]
+	%tmp8 = and i32 %tmp7, 32505856		; <i32> [#uses=1]
+	%tmp10 = and i32 %srcB, 31775		; <i32> [#uses=1]
+	%tmp11 = or i32 %tmp8, %tmp10		; <i32> [#uses=1]
+	%tmp14 = mul i32 %tmp5, %alpha		; <i32> [#uses=1]
+	%tmp16 = sub i32 32, %alpha		; <i32> [#uses=1]
+	%tmp18 = mul i32 %tmp11, %tmp16		; <i32> [#uses=1]
+	%tmp19 = add i32 %tmp18, %tmp14		; <i32> [#uses=2]
+	%tmp21 = lshr i32 %tmp19, 5		; <i32> [#uses=1]
+	%tmp21.upgrd.1 = trunc i32 %tmp21 to i16		; <i16> [#uses=1]
+	%tmp = and i16 %tmp21.upgrd.1, 31775		; <i16> [#uses=1]
+	%tmp23 = lshr i32 %tmp19, 20		; <i32> [#uses=1]
+	%tmp23.upgrd.2 = trunc i32 %tmp23 to i16		; <i16> [#uses=1]
+	%tmp24 = and i16 %tmp23.upgrd.2, 992		; <i16> [#uses=1]
+	%tmp25 = or i16 %tmp, %tmp24		; <i16> [#uses=1]
+	ret i16 %tmp25
+}
diff --git a/final/test/CodeGen/PowerPC/rlwinm.ll b/final/test/CodeGen/PowerPC/rlwinm.ll
new file mode 100644
index 00000000000..699f6e78356
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/rlwinm.ll
@@ -0,0 +1,61 @@
+; All of these ands and shifts should be folded into rlwimi's
+; RUN: llc < %s -march=ppc32 -o %t
+; RUN: not grep and %t
+; RUN: not grep srawi %t
+; RUN: not grep srwi %t
+; RUN: not grep slwi %t
+; RUN: grep rlwinm %t | count 8
+
+define i32 @test1(i32 %a) {
+entry:
+	%tmp.1 = and i32 %a, 268431360		; <i32> [#uses=1]
+	ret i32 %tmp.1
+}
+
+define i32 @test2(i32 %a) {
+entry:
+	%tmp.1 = and i32 %a, -268435441		; <i32> [#uses=1]
+	ret i32 %tmp.1
+}
+
+define i32 @test3(i32 %a) {
+entry:
+	%tmp.2 = ashr i32 %a, 8		; <i32> [#uses=1]
+	%tmp.3 = and i32 %tmp.2, 255		; <i32> [#uses=1]
+	ret i32 %tmp.3
+}
+
+define i32 @test4(i32 %a) {
+entry:
+	%tmp.3 = lshr i32 %a, 8		; <i32> [#uses=1]
+	%tmp.4 = and i32 %tmp.3, 255		; <i32> [#uses=1]
+	ret i32 %tmp.4
+}
+
+define i32 @test5(i32 %a) {
+entry:
+	%tmp.2 = shl i32 %a, 8		; <i32> [#uses=1]
+	%tmp.3 = and i32 %tmp.2, -8388608		; <i32> [#uses=1]
+	ret i32 %tmp.3
+}
+
+define i32 @test6(i32 %a) {
+entry:
+	%tmp.1 = and i32 %a, 65280		; <i32> [#uses=1]
+	%tmp.2 = ashr i32 %tmp.1, 8		; <i32> [#uses=1]
+	ret i32 %tmp.2
+}
+
+define i32 @test7(i32 %a) {
+entry:
+	%tmp.1 = and i32 %a, 65280		; <i32> [#uses=1]
+	%tmp.2 = lshr i32 %tmp.1, 8		; <i32> [#uses=1]
+	ret i32 %tmp.2
+}
+
+define i32 @test8(i32 %a) {
+entry:
+	%tmp.1 = and i32 %a, 16711680		; <i32> [#uses=1]
+	%tmp.2 = shl i32 %tmp.1, 8		; <i32> [#uses=1]
+	ret i32 %tmp.2
+}
diff --git a/final/test/CodeGen/PowerPC/rlwinm2.ll b/final/test/CodeGen/PowerPC/rlwinm2.ll
new file mode 100644
index 00000000000..46542d8e09b
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/rlwinm2.ll
@@ -0,0 +1,28 @@
+; All of these ands and shifts should be folded into rlw[i]nm instructions
+; RUN: llc < %s -march=ppc32 -o %t
+; RUN: not grep and %t
+; RUN: not grep srawi %t 
+; RUN: not grep srwi %t 
+; RUN: not grep slwi %t 
+; RUN: grep rlwnm %t | count 1
+; RUN: grep rlwinm %t | count 1
+
+define i32 @test1(i32 %X, i32 %Y) {
+entry:
+	%tmp = trunc i32 %Y to i8		; <i8> [#uses=2]
+	%tmp1 = shl i32 %X, %Y		; <i32> [#uses=1]
+	%tmp2 = sub i32 32, %Y		; <i8> [#uses=1]
+	%tmp3 = lshr i32 %X, %tmp2		; <i32> [#uses=1]
+	%tmp4 = or i32 %tmp1, %tmp3		; <i32> [#uses=1]
+	%tmp6 = and i32 %tmp4, 127		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test2(i32 %X) {
+entry:
+	%tmp1 = lshr i32 %X, 27		; <i32> [#uses=1]
+	%tmp2 = shl i32 %X, 5		; <i32> [#uses=1]
+	%tmp2.masked = and i32 %tmp2, 96		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp1, %tmp2.masked		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
diff --git a/final/test/CodeGen/PowerPC/rotl-2.ll b/final/test/CodeGen/PowerPC/rotl-2.ll
new file mode 100644
index 00000000000..d32ef59be6c
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/rotl-2.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=ppc32  | grep rlwinm | count 4
+; RUN: llc < %s -march=ppc32  | grep rlwnm | count 2
+; RUN: llc < %s -march=ppc32  | not grep or
+
+define i32 @rotl32(i32 %A, i8 %Amt) nounwind {
+	%shift.upgrd.1 = zext i8 %Amt to i32		; <i32> [#uses=1]
+	%B = shl i32 %A, %shift.upgrd.1		; <i32> [#uses=1]
+	%Amt2 = sub i8 32, %Amt		; <i8> [#uses=1]
+	%shift.upgrd.2 = zext i8 %Amt2 to i32		; <i32> [#uses=1]
+	%C = lshr i32 %A, %shift.upgrd.2		; <i32> [#uses=1]
+	%D = or i32 %B, %C		; <i32> [#uses=1]
+	ret i32 %D
+}
+
+define i32 @rotr32(i32 %A, i8 %Amt) nounwind {
+	%shift.upgrd.3 = zext i8 %Amt to i32		; <i32> [#uses=1]
+	%B = lshr i32 %A, %shift.upgrd.3		; <i32> [#uses=1]
+	%Amt2 = sub i8 32, %Amt		; <i8> [#uses=1]
+	%shift.upgrd.4 = zext i8 %Amt2 to i32		; <i32> [#uses=1]
+	%C = shl i32 %A, %shift.upgrd.4		; <i32> [#uses=1]
+	%D = or i32 %B, %C		; <i32> [#uses=1]
+	ret i32 %D
+}
+
+define i32 @rotli32(i32 %A) nounwind {
+	%B = shl i32 %A, 5		; <i32> [#uses=1]
+	%C = lshr i32 %A, 27		; <i32> [#uses=1]
+	%D = or i32 %B, %C		; <i32> [#uses=1]
+	ret i32 %D
+}
+
+define i32 @rotri32(i32 %A) nounwind {
+	%B = lshr i32 %A, 5		; <i32> [#uses=1]
+	%C = shl i32 %A, 27		; <i32> [#uses=1]
+	%D = or i32 %B, %C		; <i32> [#uses=1]
+	ret i32 %D
+}
+
diff --git a/final/test/CodeGen/PowerPC/rotl-64.ll b/final/test/CodeGen/PowerPC/rotl-64.ll
new file mode 100644
index 00000000000..674c9e4cc95
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/rotl-64.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=ppc64 | grep rldicl
+; RUN: llc < %s -march=ppc64 | grep rldcl
+; PR1613
+
+define i64 @t1(i64 %A) {
+	%tmp1 = lshr i64 %A, 57
+        %tmp2 = shl i64 %A, 7
+        %tmp3 = or i64 %tmp1, %tmp2
+	ret i64 %tmp3
+}
+
+define i64 @t2(i64 %A, i8 zeroext %Amt) {
+	%Amt1 = zext i8 %Amt to i64
+	%tmp1 = lshr i64 %A, %Amt1
+        %Amt2  = sub i8 64, %Amt
+	%Amt3 = zext i8 %Amt2 to i64
+        %tmp2 = shl i64 %A, %Amt3
+        %tmp3 = or i64 %tmp1, %tmp2
+	ret i64 %tmp3
+}
diff --git a/final/test/CodeGen/PowerPC/rotl.ll b/final/test/CodeGen/PowerPC/rotl.ll
new file mode 100644
index 00000000000..56fc4a8c911
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/rotl.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=ppc32 | grep rlwnm | count 2
+; RUN: llc < %s -march=ppc32 | grep rlwinm | count 2
+
+define i32 @rotlw(i32 %x, i32 %sh) {
+entry:
+	%tmp.7 = sub i32 32, %sh		; <i32> [#uses=1]
+	%tmp.10 = lshr i32 %x, %tmp.7		; <i32> [#uses=2]
+	%tmp.4 = shl i32 %x, %sh 		; <i32> [#uses=1]
+	%tmp.12 = or i32 %tmp.10, %tmp.4		; <i32> [#uses=1]
+	ret i32 %tmp.12
+}
+
+define i32 @rotrw(i32 %x, i32 %sh) {
+entry:
+	%tmp.3 = trunc i32 %sh to i8		; <i8> [#uses=1]
+	%tmp.4 = lshr i32 %x, %sh		; <i32> [#uses=2]
+	%tmp.7 = sub i32 32, %sh		; <i32> [#uses=1]
+	%tmp.10 = shl i32 %x, %tmp.7    	; <i32> [#uses=1]
+	%tmp.12 = or i32 %tmp.4, %tmp.10		; <i32> [#uses=1]
+	ret i32 %tmp.12
+}
+
+define i32 @rotlwi(i32 %x) {
+entry:
+	%tmp.7 = lshr i32 %x, 27		; <i32> [#uses=2]
+	%tmp.3 = shl i32 %x, 5		; <i32> [#uses=1]
+	%tmp.9 = or i32 %tmp.3, %tmp.7		; <i32> [#uses=1]
+	ret i32 %tmp.9
+}
+
+define i32 @rotrwi(i32 %x) {
+entry:
+	%tmp.3 = lshr i32 %x, 5		; <i32> [#uses=2]
+	%tmp.7 = shl i32 %x, 27		; <i32> [#uses=1]
+	%tmp.9 = or i32 %tmp.3, %tmp.7		; <i32> [#uses=1]
+	ret i32 %tmp.9
+}
diff --git a/final/test/CodeGen/PowerPC/sections.ll b/final/test/CodeGen/PowerPC/sections.ll
new file mode 100644
index 00000000000..0ff4a89ff37
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/sections.ll
@@ -0,0 +1,8 @@
+; Test to make sure that bss sections are printed with '.section' directive.
+; RUN: llc < %s -mtriple=powerpc-unknown-linux-gnu | FileCheck %s
+
+@A = global i32 0
+
+; CHECK:  .section  .bss,"aw",@nobits
+; CHECK:  .globl A
+
diff --git a/final/test/CodeGen/PowerPC/select-cc.ll b/final/test/CodeGen/PowerPC/select-cc.ll
new file mode 100644
index 00000000000..ccc64898a34
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/select-cc.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc32
+; PR3011
+
+define <2 x double> @vector_select(<2 x double> %x, <2 x double> %y) nounwind  {
+	%x.lo = extractelement <2 x double> %x, i32 0		; <double> [#uses=1]
+	%x.lo.ge = fcmp oge double %x.lo, 0.000000e+00		; <i1> [#uses=1]
+	%a.d = select i1 %x.lo.ge, <2 x double> %y, <2 x double> %x		; <<2 x double>> [#uses=1]
+	ret <2 x double> %a.d
+}
diff --git a/final/test/CodeGen/PowerPC/select_lt0.ll b/final/test/CodeGen/PowerPC/select_lt0.ll
new file mode 100644
index 00000000000..95ba84ac6e2
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/select_lt0.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=ppc32 | not grep cmp
+
+define i32 @seli32_1(i32 %a) {
+entry:
+	%tmp.1 = icmp slt i32 %a, 0		; <i1> [#uses=1]
+	%retval = select i1 %tmp.1, i32 5, i32 0		; <i32> [#uses=1]
+	ret i32 %retval
+}
+
+define i32 @seli32_2(i32 %a, i32 %b) {
+entry:
+	%tmp.1 = icmp slt i32 %a, 0		; <i1> [#uses=1]
+	%retval = select i1 %tmp.1, i32 %b, i32 0		; <i32> [#uses=1]
+	ret i32 %retval
+}
+
+define i32 @seli32_3(i32 %a, i16 %b) {
+entry:
+	%tmp.2 = sext i16 %b to i32		; <i32> [#uses=1]
+	%tmp.1 = icmp slt i32 %a, 0		; <i1> [#uses=1]
+	%retval = select i1 %tmp.1, i32 %tmp.2, i32 0		; <i32> [#uses=1]
+	ret i32 %retval
+}
+
+define i32 @seli32_4(i32 %a, i16 %b) {
+entry:
+	%tmp.2 = zext i16 %b to i32		; <i32> [#uses=1]
+	%tmp.1 = icmp slt i32 %a, 0		; <i1> [#uses=1]
+	%retval = select i1 %tmp.1, i32 %tmp.2, i32 0		; <i32> [#uses=1]
+	ret i32 %retval
+}
+
+define i16 @seli16_1(i16 %a) {
+entry:
+	%tmp.1 = icmp slt i16 %a, 0		; <i1> [#uses=1]
+	%retval = select i1 %tmp.1, i16 7, i16 0		; <i16> [#uses=1]
+	ret i16 %retval
+}
+
+define i16 @seli16_2(i32 %a, i16 %b) {
+	%tmp.1 = icmp slt i32 %a, 0		; <i1> [#uses=1]
+	%retval = select i1 %tmp.1, i16 %b, i16 0		; <i16> [#uses=1]
+	ret i16 %retval
+}
+
+define i32 @seli32_a_a(i32 %a) {
+	%tmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
+	%min = select i1 %tmp, i32 %a, i32 0		; <i32> [#uses=1]
+	ret i32 %min
+}
diff --git a/final/test/CodeGen/PowerPC/setcc_no_zext.ll b/final/test/CodeGen/PowerPC/setcc_no_zext.ll
new file mode 100644
index 00000000000..9b2036e1dc5
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/setcc_no_zext.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc32 | not grep rlwinm
+
+define i32 @setcc_one_or_zero(i32* %a) {
+entry:
+        %tmp.1 = icmp ne i32* %a, null          ; <i1> [#uses=1]
+        %inc.1 = zext i1 %tmp.1 to i32          ; <i32> [#uses=1]
+        ret i32 %inc.1
+}
+
diff --git a/final/test/CodeGen/PowerPC/seteq-0.ll b/final/test/CodeGen/PowerPC/seteq-0.ll
new file mode 100644
index 00000000000..688b29aa124
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/seteq-0.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 | \
+; RUN:   grep {srwi r., r., 5}
+
+define i32 @eq0(i32 %a) {
+        %tmp.1 = icmp eq i32 %a, 0              ; <i1> [#uses=1]
+        %tmp.2 = zext i1 %tmp.1 to i32          ; <i32> [#uses=1]
+        ret i32 %tmp.2
+}
+
diff --git a/final/test/CodeGen/PowerPC/shift128.ll b/final/test/CodeGen/PowerPC/shift128.ll
new file mode 100644
index 00000000000..8e518c12795
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/shift128.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc64 | grep sld | count 5
+
+define i128 @foo_lshr(i128 %x, i128 %y) {
+  %r = lshr i128 %x, %y
+  ret i128 %r
+}
+define i128 @foo_ashr(i128 %x, i128 %y) {
+  %r = ashr i128 %x, %y
+  ret i128 %r
+}
+define i128 @foo_shl(i128 %x, i128 %y) {
+  %r = shl i128 %x, %y
+  ret i128 %r
+}
diff --git a/final/test/CodeGen/PowerPC/shl_elim.ll b/final/test/CodeGen/PowerPC/shl_elim.ll
new file mode 100644
index 00000000000..f177c4a3f48
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/shl_elim.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=ppc32 | not grep slwi
+
+define i32 @test1(i64 %a) {
+        %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]
+        %tmp23 = trunc i64 %tmp29 to i32                ; <i32> [#uses=1]
+        %tmp410 = lshr i32 %tmp23, 9            ; <i32> [#uses=1]
+        %tmp45 = trunc i32 %tmp410 to i16               ; <i16> [#uses=1]
+        %tmp456 = sext i16 %tmp45 to i32                ; <i32> [#uses=1]
+        ret i32 %tmp456
+}
+
diff --git a/final/test/CodeGen/PowerPC/shl_sext.ll b/final/test/CodeGen/PowerPC/shl_sext.ll
new file mode 100644
index 00000000000..1f35eb4c55a
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/shl_sext.ll
@@ -0,0 +1,18 @@
+; This test should not contain a sign extend
+; RUN: llc < %s -march=ppc32 | not grep extsb 
+
+define i32 @test(i32 %mode.0.i.0) {
+        %tmp.79 = trunc i32 %mode.0.i.0 to i8           ; <i8> [#uses=1]
+        %tmp.80 = sext i8 %tmp.79 to i32                ; <i32> [#uses=1]
+        %tmp.81 = shl i32 %tmp.80, 24           ; <i32> [#uses=1]
+        ret i32 %tmp.81
+}
+
+define i32 @test2(i32 %mode.0.i.0) {
+        %tmp.79 = trunc i32 %mode.0.i.0 to i8           ; <i8> [#uses=1]
+        %tmp.80 = sext i8 %tmp.79 to i32                ; <i32> [#uses=1]
+        %tmp.81 = shl i32 %tmp.80, 16           ; <i32> [#uses=1]
+        %tmp.82 = and i32 %tmp.81, 16711680             ; <i32> [#uses=1]
+        ret i32 %tmp.82
+}
+
diff --git a/final/test/CodeGen/PowerPC/sign_ext_inreg1.ll b/final/test/CodeGen/PowerPC/sign_ext_inreg1.ll
new file mode 100644
index 00000000000..2679c8e6ae8
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/sign_ext_inreg1.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=ppc32 | grep srwi
+; RUN: llc < %s -march=ppc32 | not grep rlwimi
+
+define i32 @baz(i64 %a) {
+        %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]
+        %tmp23 = trunc i64 %tmp29 to i32                ; <i32> [#uses=1]
+        %tmp410 = lshr i32 %tmp23, 9            ; <i32> [#uses=1]
+        %tmp45 = trunc i32 %tmp410 to i16               ; <i16> [#uses=1]
+        %tmp456 = sext i16 %tmp45 to i32                ; <i32> [#uses=1]
+        ret i32 %tmp456
+}
+
diff --git a/final/test/CodeGen/PowerPC/small-arguments.ll b/final/test/CodeGen/PowerPC/small-arguments.ll
new file mode 100644
index 00000000000..31bcee6bc81
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/small-arguments.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=ppc32 | not grep {extsh\\|rlwinm}
+
+declare i16 @foo() signext 
+
+define i32 @test1(i16 signext %X) {
+	%Y = sext i16 %X to i32  ;; dead
+	ret i32 %Y
+}
+
+define i32 @test2(i16 zeroext %X) {
+	%Y = sext i16 %X to i32
+	%Z = and i32 %Y, 65535      ;; dead
+	ret i32 %Z
+}
+
+define void @test3() {
+	%tmp.0 = call i16 @foo() signext            ;; no extsh!
+	%tmp.1 = icmp slt i16 %tmp.0, 1234
+	br i1 %tmp.1, label %then, label %UnifiedReturnBlock
+
+then:	
+	call i32 @test1(i16 0 signext)
+	ret void
+UnifiedReturnBlock:
+	ret void
+}
+
+define i32 @test4(i16* %P) {
+        %tmp.1 = load i16* %P
+        %tmp.2 = zext i16 %tmp.1 to i32
+        %tmp.3 = and i32 %tmp.2, 255
+        ret i32 %tmp.3
+}
+
+define i32 @test5(i16* %P) {
+        %tmp.1 = load i16* %P
+        %tmp.2 = bitcast i16 %tmp.1 to i16
+        %tmp.3 = zext i16 %tmp.2 to i32
+        %tmp.4 = and i32 %tmp.3, 255
+        ret i32 %tmp.4
+}
+
+define i32 @test6(i32* %P) {
+        %tmp.1 = load i32* %P
+        %tmp.2 = and i32 %tmp.1, 255
+        ret i32 %tmp.2
+}
+
+define i16 @test7(float %a) zeroext {
+        %tmp.1 = fptoui float %a to i16
+        ret i16 %tmp.1
+}
diff --git a/final/test/CodeGen/PowerPC/stack-protector.ll b/final/test/CodeGen/PowerPC/stack-protector.ll
new file mode 100644
index 00000000000..20203612502
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/stack-protector.ll
@@ -0,0 +1,25 @@
+; RUN: llc -march=ppc32 < %s -o - | grep {__stack_chk_guard}
+; RUN: llc -march=ppc32 < %s -o - | grep {__stack_chk_fail}
+
+@"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00"		; <[11 x i8]*> [#uses=1]
+
+define void @test(i8* %a) nounwind ssp {
+entry:
+	%a_addr = alloca i8*		; <i8**> [#uses=2]
+	%buf = alloca [8 x i8]		; <[8 x i8]*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i8* %a, i8** %a_addr
+	%buf1 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
+	%0 = load i8** %a_addr, align 4		; <i8*> [#uses=1]
+	%1 = call i8* @strcpy(i8* %buf1, i8* %0) nounwind		; <i8*> [#uses=0]
+  %buf2 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
+	%2 = call i32 (i8*, ...)* @printf(i8* getelementptr ([11 x i8]* @"\01LC", i32 0, i32 0), i8* %buf2) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8* @strcpy(i8*, i8*) nounwind
+
+declare i32 @printf(i8*, ...) nounwind
diff --git a/final/test/CodeGen/PowerPC/stfiwx-2.ll b/final/test/CodeGen/PowerPC/stfiwx-2.ll
new file mode 100644
index 00000000000..c49b25cc230
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/stfiwx-2.ll
@@ -0,0 +1,11 @@
+; This cannot be a stfiwx
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep stb
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep stfiwx
+
+define void @test(float %F, i8* %P) {
+	%I = fptosi float %F to i32
+	%X = trunc i32 %I to i8
+	store i8 %X, i8* %P
+	ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/stfiwx.ll b/final/test/CodeGen/PowerPC/stfiwx.ll
new file mode 100644
index 00000000000..1ad558c6abc
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/stfiwx.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=stfiwx -o %t1
+; RUN: grep stfiwx %t1
+; RUN: not grep r1 %t1
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -mattr=-stfiwx \
+; RUN:   -o %t2
+; RUN: not grep stfiwx %t2
+; RUN: grep r1 %t2
+
+define void @test(float %a, i32* %b) nounwind {
+        %tmp.2 = fptosi float %a to i32         ; <i32> [#uses=1]
+        store i32 %tmp.2, i32* %b
+        ret void
+}
+
+define void @test2(float %a, i32* %b, i32 %i) nounwind {
+        %tmp.2 = getelementptr i32* %b, i32 1           ; <i32*> [#uses=1]
+        %tmp.5 = getelementptr i32* %b, i32 %i          ; <i32*> [#uses=1]
+        %tmp.7 = fptosi float %a to i32         ; <i32> [#uses=3]
+        store i32 %tmp.7, i32* %tmp.5
+        store i32 %tmp.7, i32* %tmp.2
+        store i32 %tmp.7, i32* %b
+        ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/store-load-fwd.ll b/final/test/CodeGen/PowerPC/store-load-fwd.ll
new file mode 100644
index 00000000000..25663c1ac68
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/store-load-fwd.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 | not grep lwz
+
+define i32 @test(i32* %P) {
+        store i32 1, i32* %P
+        %V = load i32* %P               ; <i32> [#uses=1]
+        ret i32 %V
+}
+
diff --git a/final/test/CodeGen/PowerPC/stubs.ll b/final/test/CodeGen/PowerPC/stubs.ll
new file mode 100644
index 00000000000..4889263b4c4
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/stubs.ll
@@ -0,0 +1,22 @@
+; RUN: llc %s -o - -mtriple=powerpc-apple-darwin8 | FileCheck %s
+define ppc_fp128 @test1(i64 %X) nounwind readnone {
+entry:
+  %0 = sitofp i64 %X to ppc_fp128
+  ret ppc_fp128 %0
+}
+
+; CHECK: _test1:
+; CHECK: bl ___floatditf$stub
+; CHECK: 	.section	__TEXT,__symbol_stub1,symbol_stubs,pure_instructions,16
+; CHECK: ___floatditf$stub:
+; CHECK: 	.indirect_symbol ___floatditf
+; CHECK: 	lis r11,ha16(___floatditf$lazy_ptr)
+; CHECK: 	lwzu r12,lo16(___floatditf$lazy_ptr)(r11)
+; CHECK: 	mtctr r12
+; CHECK: 	bctr
+; CHECK: 	.section	__DATA,__la_symbol_ptr,lazy_symbol_pointers
+; CHECK: ___floatditf$lazy_ptr:
+; CHECK: 	.indirect_symbol ___floatditf
+; CHECK: 	.long dyld_stub_binding_helper
+
+
diff --git a/final/test/CodeGen/PowerPC/subc.ll b/final/test/CodeGen/PowerPC/subc.ll
new file mode 100644
index 00000000000..5914dcad94b
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/subc.ll
@@ -0,0 +1,25 @@
+; All of these should be codegen'd without loading immediates
+; RUN: llc < %s -march=ppc32 -o %t
+; RUN: grep subfc %t | count 1
+; RUN: grep subfe %t | count 1
+; RUN: grep subfze %t | count 1
+; RUN: grep subfme %t | count 1
+; RUN: grep subfic %t | count 2
+
+define i64 @sub_ll(i64 %a, i64 %b) {
+entry:
+	%tmp.2 = sub i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %tmp.2
+}
+
+define i64 @sub_l_5(i64 %a) {
+entry:
+	%tmp.1 = sub i64 5, %a		; <i64> [#uses=1]
+	ret i64 %tmp.1
+}
+
+define i64 @sub_l_m5(i64 %a) {
+entry:
+	%tmp.1 = sub i64 -5, %a		; <i64> [#uses=1]
+	ret i64 %tmp.1
+}
diff --git a/final/test/CodeGen/PowerPC/tailcall1-64.ll b/final/test/CodeGen/PowerPC/tailcall1-64.ll
new file mode 100644
index 00000000000..e9c83a54880
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/tailcall1-64.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=ppc64 -tailcallopt | grep TC_RETURNd8
+define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+entry:
+	ret i32 %a3
+}
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
+entry:
+	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
+	ret i32 %tmp11
+}
diff --git a/final/test/CodeGen/PowerPC/tailcall1.ll b/final/test/CodeGen/PowerPC/tailcall1.ll
new file mode 100644
index 00000000000..08f3392c9d7
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/tailcall1.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=ppc32 -tailcallopt | grep TC_RETURN
+define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+entry:
+	ret i32 %a3
+}
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
+entry:
+	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
+	ret i32 %tmp11
+}
diff --git a/final/test/CodeGen/PowerPC/tailcallpic1.ll b/final/test/CodeGen/PowerPC/tailcallpic1.ll
new file mode 100644
index 00000000000..f3f5028cf4a
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/tailcallpic1.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s  -tailcallopt -mtriple=powerpc-apple-darwin -relocation-model=pic | grep TC_RETURN
+
+
+
+define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+entry:
+	ret i32 %a3
+}
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
+entry:
+	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
+	ret i32 %tmp11
+}
diff --git a/final/test/CodeGen/PowerPC/trampoline.ll b/final/test/CodeGen/PowerPC/trampoline.ll
new file mode 100644
index 00000000000..bc05bb17635
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/trampoline.ll
@@ -0,0 +1,166 @@
+; RUN: llc < %s -march=ppc32 | grep {__trampoline_setup}
+
+module asm "\09.lazy_reference .objc_class_name_NSImageRep"
+module asm "\09.objc_class_name_NSBitmapImageRep=0"
+module asm "\09.globl .objc_class_name_NSBitmapImageRep"
+	%struct.CGImage = type opaque
+	%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]" = type { %struct.NSBitmapImageRep*, void (%struct.__block_1*, %struct.CGImage*)* }
+	%struct.NSBitmapImageRep = type { %struct.NSImageRep }
+	%struct.NSImageRep = type {  }
+	%struct.NSZone = type opaque
+	%struct.__block_1 = type { %struct.__invoke_impl, %struct.NSZone*, %struct.NSBitmapImageRep** }
+	%struct.__builtin_trampoline = type { [40 x i8] }
+	%struct.__invoke_impl = type { i8*, i32, i32, i8* }
+	%struct._objc__method_prototype_list = type opaque
+	%struct._objc_class = type { %struct._objc_class*, %struct._objc_class*, i8*, i32, i32, i32, %struct._objc_ivar_list*, %struct._objc_method_list*, %struct.objc_cache*, %struct._objc_protocol**, i8*, %struct._objc_class_ext* }
+	%struct._objc_class_ext = type opaque
+	%struct._objc_ivar_list = type opaque
+	%struct._objc_method = type { %struct.objc_selector*, i8*, i8* }
+	%struct._objc_method_list = type opaque
+	%struct._objc_module = type { i32, i32, i8*, %struct._objc_symtab* }
+	%struct._objc_protocol = type { %struct._objc_protocol_extension*, i8*, %struct._objc_protocol**, %struct._objc__method_prototype_list*, %struct._objc__method_prototype_list* }
+	%struct._objc_protocol_extension = type opaque
+	%struct._objc_super = type { %struct.objc_object*, %struct._objc_class* }
+	%struct._objc_symtab = type { i32, %struct.objc_selector**, i16, i16, [1 x i8*] }
+	%struct.anon = type { %struct._objc__method_prototype_list*, i32, [1 x %struct._objc_method] }
+	%struct.objc_cache = type opaque
+	%struct.objc_object = type opaque
+	%struct.objc_selector = type opaque
+	%struct.objc_super = type opaque
+@_NSConcreteStackBlock = external global i8*		; <i8**> [#uses=1]
+@"\01L_OBJC_SELECTOR_REFERENCES_1" = internal global %struct.objc_selector* bitcast ([34 x i8]* @"\01L_OBJC_METH_VAR_NAME_1" to %struct.objc_selector*), section "__OBJC,__message_refs,literal_pointers,no_dead_strip"		; <%struct.objc_selector**> [#uses=2]
+@"\01L_OBJC_CLASS_NSBitmapImageRep" = internal global %struct._objc_class { %struct._objc_class* @"\01L_OBJC_METACLASS_NSBitmapImageRep", %struct._objc_class* bitcast ([11 x i8]* @"\01L_OBJC_CLASS_NAME_1" to %struct._objc_class*), i8* getelementptr ([17 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i32 0, i32 1, i32 0, %struct._objc_ivar_list* null, %struct._objc_method_list* bitcast ({ i8*, i32, [1 x %struct._objc_method] }* @"\01L_OBJC_INSTANCE_METHODS_NSBitmapImageRep" to %struct._objc_method_list*), %struct.objc_cache* null, %struct._objc_protocol** null, i8* null, %struct._objc_class_ext* null }, section "__OBJC,__class,regular,no_dead_strip"		; <%struct._objc_class*> [#uses=3]
+@"\01L_OBJC_SELECTOR_REFERENCES_0" = internal global %struct.objc_selector* bitcast ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_0" to %struct.objc_selector*), section "__OBJC,__message_refs,literal_pointers,no_dead_strip"		; <%struct.objc_selector**> [#uses=2]
+@"\01L_OBJC_SYMBOLS" = internal global { i32, %struct.objc_selector**, i16, i16, [1 x %struct._objc_class*] } { i32 0, %struct.objc_selector** null, i16 1, i16 0, [1 x %struct._objc_class*] [ %struct._objc_class* @"\01L_OBJC_CLASS_NSBitmapImageRep" ] }, section "__OBJC,__symbols,regular,no_dead_strip"		; <{ i32, %struct.objc_selector**, i16, i16, [1 x %struct._objc_class*] }*> [#uses=2]
+@"\01L_OBJC_METH_VAR_NAME_0" = internal global [14 x i8] c"copyWithZone:\00", section "__TEXT,__cstring,cstring_literals", align 4		; <[14 x i8]*> [#uses=2]
+@"\01L_OBJC_METH_VAR_TYPE_0" = internal global [20 x i8] c"@12@0:4^{_NSZone=}8\00", section "__TEXT,__cstring,cstring_literals", align 4		; <[20 x i8]*> [#uses=1]
+@"\01L_OBJC_INSTANCE_METHODS_NSBitmapImageRep" = internal global { i8*, i32, [1 x %struct._objc_method] } { i8* null, i32 1, [1 x %struct._objc_method] [ %struct._objc_method { %struct.objc_selector* bitcast ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_0" to %struct.objc_selector*), i8* getelementptr ([20 x i8]* @"\01L_OBJC_METH_VAR_TYPE_0", i32 0, i32 0), i8* bitcast (%struct.objc_object* (%struct.NSBitmapImageRep*, %struct.objc_selector*, %struct.NSZone*)* @"-[NSBitmapImageRep copyWithZone:]" to i8*) } ] }, section "__OBJC,__inst_meth,regular,no_dead_strip"		; <{ i8*, i32, [1 x %struct._objc_method] }*> [#uses=2]
+@"\01L_OBJC_CLASS_NAME_0" = internal global [17 x i8] c"NSBitmapImageRep\00", section "__TEXT,__cstring,cstring_literals", align 4		; <[17 x i8]*> [#uses=1]
+@"\01L_OBJC_CLASS_NAME_1" = internal global [11 x i8] c"NSImageRep\00", section "__TEXT,__cstring,cstring_literals", align 4		; <[11 x i8]*> [#uses=2]
+@"\01L_OBJC_METACLASS_NSBitmapImageRep" = internal global %struct._objc_class { %struct._objc_class* bitcast ([11 x i8]* @"\01L_OBJC_CLASS_NAME_1" to %struct._objc_class*), %struct._objc_class* bitcast ([11 x i8]* @"\01L_OBJC_CLASS_NAME_1" to %struct._objc_class*), i8* getelementptr ([17 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i32 0, i32 2, i32 48, %struct._objc_ivar_list* null, %struct._objc_method_list* null, %struct.objc_cache* null, %struct._objc_protocol** null, i8* null, %struct._objc_class_ext* null }, section "__OBJC,__meta_class,regular,no_dead_strip"		; <%struct._objc_class*> [#uses=2]
+@"\01L_OBJC_METH_VAR_NAME_1" = internal global [34 x i8] c"_performBlockUsingBackingCGImage:\00", section "__TEXT,__cstring,cstring_literals", align 4		; <[34 x i8]*> [#uses=2]
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] zeroinitializer, section "__OBJC, __image_info,regular"		; <[2 x i32]*> [#uses=1]
+@"\01L_OBJC_CLASS_NAME_2" = internal global [1 x i8] zeroinitializer, section "__TEXT,__cstring,cstring_literals", align 4		; <[1 x i8]*> [#uses=1]
+@"\01L_OBJC_MODULES" = internal global %struct._objc_module { i32 7, i32 16, i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_2", i32 0, i32 0), %struct._objc_symtab* bitcast ({ i32, %struct.objc_selector**, i16, i16, [1 x %struct._objc_class*] }* @"\01L_OBJC_SYMBOLS" to %struct._objc_symtab*) }, section "__OBJC,__module_info,regular,no_dead_strip"		; <%struct._objc_module*> [#uses=1]
+@llvm.used = appending global [14 x i8*] [ i8* bitcast (%struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_1" to i8*), i8* bitcast (%struct._objc_class* @"\01L_OBJC_CLASS_NSBitmapImageRep" to i8*), i8* bitcast (%struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0" to i8*), i8* bitcast ({ i32, %struct.objc_selector**, i16, i16, [1 x %struct._objc_class*] }* @"\01L_OBJC_SYMBOLS" to i8*), i8* getelementptr ([14 x i8]* @"\01L_OBJC_METH_VAR_NAME_0", i32 0, i32 0), i8* getelementptr ([20 x i8]* @"\01L_OBJC_METH_VAR_TYPE_0", i32 0, i32 0), i8* bitcast ({ i8*, i32, [1 x %struct._objc_method] }* @"\01L_OBJC_INSTANCE_METHODS_NSBitmapImageRep" to i8*), i8* getelementptr ([17 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i8* getelementptr ([11 x i8]* @"\01L_OBJC_CLASS_NAME_1", i32 0, i32 0), i8* bitcast (%struct._objc_class* @"\01L_OBJC_METACLASS_NSBitmapImageRep" to i8*), i8* getelementptr ([34 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_2", i32 0, i32 0), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*) ], section "llvm.metadata"		; <[14 x i8*]*> [#uses=0]
+
+define internal %struct.objc_object* @"-[NSBitmapImageRep copyWithZone:]"(%struct.NSBitmapImageRep* %self, %struct.objc_selector* %_cmd, %struct.NSZone* %zone) nounwind {
+entry:
+	%self_addr = alloca %struct.NSBitmapImageRep*		; <%struct.NSBitmapImageRep**> [#uses=2]
+	%_cmd_addr = alloca %struct.objc_selector*		; <%struct.objc_selector**> [#uses=1]
+	%zone_addr = alloca %struct.NSZone*		; <%struct.NSZone**> [#uses=2]
+	%retval = alloca %struct.objc_object*		; <%struct.objc_object**> [#uses=1]
+	%__block_holder_tmp_1.0 = alloca %struct.__block_1		; <%struct.__block_1*> [#uses=7]
+	%new = alloca %struct.NSBitmapImageRep*		; <%struct.NSBitmapImageRep**> [#uses=2]
+	%self.1 = alloca %struct.objc_object*		; <%struct.objc_object**> [#uses=2]
+	%0 = alloca i8*		; <i8**> [#uses=2]
+	%TRAMP.9 = alloca %struct.__builtin_trampoline, align 4		; <%struct.__builtin_trampoline*> [#uses=1]
+	%1 = alloca void (%struct.__block_1*, %struct.CGImage*)*		; <void (%struct.__block_1*, %struct.CGImage*)**> [#uses=2]
+	%2 = alloca %struct.NSBitmapImageRep*		; <%struct.NSBitmapImageRep**> [#uses=2]
+	%FRAME.7 = alloca %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"		; <%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*> [#uses=5]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store %struct.NSBitmapImageRep* %self, %struct.NSBitmapImageRep** %self_addr
+	store %struct.objc_selector* %_cmd, %struct.objc_selector** %_cmd_addr
+	store %struct.NSZone* %zone, %struct.NSZone** %zone_addr
+	%3 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 0		; <%struct.NSBitmapImageRep**> [#uses=1]
+	%4 = load %struct.NSBitmapImageRep** %self_addr, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
+	store %struct.NSBitmapImageRep* %4, %struct.NSBitmapImageRep** %3, align 4
+	%TRAMP.91 = bitcast %struct.__builtin_trampoline* %TRAMP.9 to i8*		; <i8*> [#uses=1]
+	%FRAME.72 = bitcast %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7 to i8*		; <i8*> [#uses=1]
+	%tramp = call i8* @llvm.init.trampoline(i8* %TRAMP.91, i8* bitcast (void (%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*, %struct.__block_1*, %struct.CGImage*)* @__helper_1.1632 to i8*), i8* %FRAME.72)		; <i8*> [#uses=1]
+	store i8* %tramp, i8** %0, align 4
+	%5 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 1		; <void (%struct.__block_1*, %struct.CGImage*)**> [#uses=1]
+	%6 = load i8** %0, align 4		; <i8*> [#uses=1]
+	%7 = bitcast i8* %6 to void (%struct.__block_1*, %struct.CGImage*)*		; <void (%struct.__block_1*, %struct.CGImage*)*> [#uses=1]
+	store void (%struct.__block_1*, %struct.CGImage*)* %7, void (%struct.__block_1*, %struct.CGImage*)** %5, align 4
+	store %struct.NSBitmapImageRep* null, %struct.NSBitmapImageRep** %new, align 4
+	%8 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0		; <%struct.__invoke_impl*> [#uses=1]
+	%9 = getelementptr %struct.__invoke_impl* %8, i32 0, i32 0		; <i8**> [#uses=1]
+	store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %9, align 4
+	%10 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0		; <%struct.__invoke_impl*> [#uses=1]
+	%11 = getelementptr %struct.__invoke_impl* %10, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 67108864, i32* %11, align 4
+	%12 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0		; <%struct.__invoke_impl*> [#uses=1]
+	%13 = getelementptr %struct.__invoke_impl* %12, i32 0, i32 2		; <i32*> [#uses=1]
+	store i32 24, i32* %13, align 4
+	%14 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 1		; <void (%struct.__block_1*, %struct.CGImage*)**> [#uses=1]
+	%15 = load void (%struct.__block_1*, %struct.CGImage*)** %14, align 4		; <void (%struct.__block_1*, %struct.CGImage*)*> [#uses=1]
+	store void (%struct.__block_1*, %struct.CGImage*)* %15, void (%struct.__block_1*, %struct.CGImage*)** %1, align 4
+	%16 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 0		; <%struct.__invoke_impl*> [#uses=1]
+	%17 = getelementptr %struct.__invoke_impl* %16, i32 0, i32 3		; <i8**> [#uses=1]
+	%18 = load void (%struct.__block_1*, %struct.CGImage*)** %1, align 4		; <void (%struct.__block_1*, %struct.CGImage*)*> [#uses=1]
+	%19 = bitcast void (%struct.__block_1*, %struct.CGImage*)* %18 to i8*		; <i8*> [#uses=1]
+	store i8* %19, i8** %17, align 4
+	%20 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 1		; <%struct.NSZone**> [#uses=1]
+	%21 = load %struct.NSZone** %zone_addr, align 4		; <%struct.NSZone*> [#uses=1]
+	store %struct.NSZone* %21, %struct.NSZone** %20, align 4
+	%22 = getelementptr %struct.__block_1* %__block_holder_tmp_1.0, i32 0, i32 2		; <%struct.NSBitmapImageRep***> [#uses=1]
+	store %struct.NSBitmapImageRep** %new, %struct.NSBitmapImageRep*** %22, align 4
+	%23 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %FRAME.7, i32 0, i32 0		; <%struct.NSBitmapImageRep**> [#uses=1]
+	%24 = load %struct.NSBitmapImageRep** %23, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
+	store %struct.NSBitmapImageRep* %24, %struct.NSBitmapImageRep** %2, align 4
+	%25 = load %struct.NSBitmapImageRep** %2, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
+	%26 = bitcast %struct.NSBitmapImageRep* %25 to %struct.objc_object*		; <%struct.objc_object*> [#uses=1]
+	store %struct.objc_object* %26, %struct.objc_object** %self.1, align 4
+	%27 = load %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_1", align 4		; <%struct.objc_selector*> [#uses=1]
+	%__block_holder_tmp_1.03 = bitcast %struct.__block_1* %__block_holder_tmp_1.0 to void (%struct.CGImage*)*		; <void (%struct.CGImage*)*> [#uses=1]
+	%28 = load %struct.objc_object** %self.1, align 4		; <%struct.objc_object*> [#uses=1]
+	%29 = call %struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...)* inttoptr (i64 4294901504 to %struct.objc_object* (%struct.objc_object*, %struct.objc_selector*, ...)*)(%struct.objc_object* %28, %struct.objc_selector* %27, void (%struct.CGImage*)* %__block_holder_tmp_1.03) nounwind		; <%struct.objc_object*> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load %struct.objc_object** %retval		; <%struct.objc_object*> [#uses=1]
+	ret %struct.objc_object* %retval5
+}
+
+declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind
+
+define internal void @__helper_1.1632(%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* nest %CHAIN.8, %struct.__block_1* %_self, %struct.CGImage* %cgImage) nounwind {
+entry:
+	%CHAIN.8_addr = alloca %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*		; <%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"**> [#uses=2]
+	%_self_addr = alloca %struct.__block_1*		; <%struct.__block_1**> [#uses=3]
+	%cgImage_addr = alloca %struct.CGImage*		; <%struct.CGImage**> [#uses=1]
+	%zone = alloca %struct.NSZone*		; <%struct.NSZone**> [#uses=2]
+	%objc_super = alloca %struct._objc_super		; <%struct._objc_super*> [#uses=3]
+	%new = alloca %struct.NSBitmapImageRep**		; <%struct.NSBitmapImageRep***> [#uses=2]
+	%objc_super.5 = alloca %struct.objc_super*		; <%struct.objc_super**> [#uses=2]
+	%0 = alloca %struct.NSBitmapImageRep*		; <%struct.NSBitmapImageRep**> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %CHAIN.8, %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"** %CHAIN.8_addr
+	store %struct.__block_1* %_self, %struct.__block_1** %_self_addr
+	store %struct.CGImage* %cgImage, %struct.CGImage** %cgImage_addr
+	%1 = load %struct.__block_1** %_self_addr, align 4		; <%struct.__block_1*> [#uses=1]
+	%2 = getelementptr %struct.__block_1* %1, i32 0, i32 2		; <%struct.NSBitmapImageRep***> [#uses=1]
+	%3 = load %struct.NSBitmapImageRep*** %2, align 4		; <%struct.NSBitmapImageRep**> [#uses=1]
+	store %struct.NSBitmapImageRep** %3, %struct.NSBitmapImageRep*** %new, align 4
+	%4 = load %struct.__block_1** %_self_addr, align 4		; <%struct.__block_1*> [#uses=1]
+	%5 = getelementptr %struct.__block_1* %4, i32 0, i32 1		; <%struct.NSZone**> [#uses=1]
+	%6 = load %struct.NSZone** %5, align 4		; <%struct.NSZone*> [#uses=1]
+	store %struct.NSZone* %6, %struct.NSZone** %zone, align 4
+	%7 = load %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"** %CHAIN.8_addr, align 4		; <%"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"*> [#uses=1]
+	%8 = getelementptr %"struct.FRAME.-[NSBitmapImageRep copyWithZone:]"* %7, i32 0, i32 0		; <%struct.NSBitmapImageRep**> [#uses=1]
+	%9 = load %struct.NSBitmapImageRep** %8, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
+	store %struct.NSBitmapImageRep* %9, %struct.NSBitmapImageRep** %0, align 4
+	%10 = load %struct.NSBitmapImageRep** %0, align 4		; <%struct.NSBitmapImageRep*> [#uses=1]
+	%11 = bitcast %struct.NSBitmapImageRep* %10 to %struct.objc_object*		; <%struct.objc_object*> [#uses=1]
+	%12 = getelementptr %struct._objc_super* %objc_super, i32 0, i32 0		; <%struct.objc_object**> [#uses=1]
+	store %struct.objc_object* %11, %struct.objc_object** %12, align 4
+	%13 = load %struct._objc_class** getelementptr (%struct._objc_class* @"\01L_OBJC_CLASS_NSBitmapImageRep", i32 0, i32 1), align 4		; <%struct._objc_class*> [#uses=1]
+	%14 = getelementptr %struct._objc_super* %objc_super, i32 0, i32 1		; <%struct._objc_class**> [#uses=1]
+	store %struct._objc_class* %13, %struct._objc_class** %14, align 4
+	%objc_super1 = bitcast %struct._objc_super* %objc_super to %struct.objc_super*		; <%struct.objc_super*> [#uses=1]
+	store %struct.objc_super* %objc_super1, %struct.objc_super** %objc_super.5, align 4
+	%15 = load %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_0", align 4		; <%struct.objc_selector*> [#uses=1]
+	%16 = load %struct.objc_super** %objc_super.5, align 4		; <%struct.objc_super*> [#uses=1]
+	%17 = load %struct.NSZone** %zone, align 4		; <%struct.NSZone*> [#uses=1]
+	%18 = call %struct.objc_object* (%struct.objc_super*, %struct.objc_selector*, ...)* @objc_msgSendSuper(%struct.objc_super* %16, %struct.objc_selector* %15, %struct.NSZone* %17) nounwind		; <%struct.objc_object*> [#uses=1]
+	%19 = bitcast %struct.objc_object* %18 to %struct.NSBitmapImageRep*		; <%struct.NSBitmapImageRep*> [#uses=1]
+	%20 = load %struct.NSBitmapImageRep*** %new, align 4		; <%struct.NSBitmapImageRep**> [#uses=1]
+	store %struct.NSBitmapImageRep* %19, %struct.NSBitmapImageRep** %20, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare %struct.objc_object* @objc_msgSendSuper(%struct.objc_super*, %struct.objc_selector*, ...)
diff --git a/final/test/CodeGen/PowerPC/unsafe-math.ll b/final/test/CodeGen/PowerPC/unsafe-math.ll
new file mode 100644
index 00000000000..b0bdcc28d28
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/unsafe-math.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=ppc32 | grep fmul | count 2
+; RUN: llc < %s -march=ppc32 -enable-unsafe-fp-math | \
+; RUN:   grep fmul | count 1
+
+define double @foo(double %X) nounwind {
+        %tmp1 = fmul double %X, 1.23
+        %tmp2 = fmul double %tmp1, 4.124
+        ret double %tmp2
+}
+
diff --git a/final/test/CodeGen/PowerPC/varargs.ll b/final/test/CodeGen/PowerPC/varargs.ll
new file mode 100644
index 00000000000..1769be957ac
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/varargs.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=powerpc-apple-darwin | FileCheck -check-prefix=P32 %s
+; RUN: llc < %s -mtriple=powerpc64-apple-darwin | FileCheck -check-prefix=P64 %s
+
+; PR8327
+define i8* @test1(i8** %foo) nounwind {
+  %A = va_arg i8** %foo, i8*
+  ret i8* %A
+}
+
+; P32: test1:
+; P32: 	lwz r4, 0(r3)
+; P32:	addi r5, r4, 4
+; P32:	stw r5, 0(r3)
+; P32:	lwz r3, 0(r4)
+; P32:	blr 
+
+; P64: test1:
+; P64: ld r4, 0(r3)
+; P64: addi r5, r4, 8
+; P64: std r5, 0(r3)
+; P64: ld r3, 0(r4)
+; P64: blr
diff --git a/final/test/CodeGen/PowerPC/vcmp-fold.ll b/final/test/CodeGen/PowerPC/vcmp-fold.ll
new file mode 100644
index 00000000000..7a42c27d2b4
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vcmp-fold.ll
@@ -0,0 +1,22 @@
+; This should fold the "vcmpbfp." and "vcmpbfp" instructions into a single
+; "vcmpbfp.".
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vcmpbfp | count 1
+
+
+define void @test(<4 x float>* %x, <4 x float>* %y, i32* %P) {
+entry:
+	%tmp = load <4 x float>* %x		; <<4 x float>> [#uses=1]
+	%tmp2 = load <4 x float>* %y		; <<4 x float>> [#uses=1]
+	%tmp.upgrd.1 = call i32 @llvm.ppc.altivec.vcmpbfp.p( i32 1, <4 x float> %tmp, <4 x float> %tmp2 )		; <i32> [#uses=1]
+	%tmp4 = load <4 x float>* %x		; <<4 x float>> [#uses=1]
+	%tmp6 = load <4 x float>* %y		; <<4 x float>> [#uses=1]
+	%tmp.upgrd.2 = call <4 x i32> @llvm.ppc.altivec.vcmpbfp( <4 x float> %tmp4, <4 x float> %tmp6 )		; <<4 x i32>> [#uses=1]
+	%tmp7 = bitcast <4 x i32> %tmp.upgrd.2 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp7, <4 x float>* %x
+	store i32 %tmp.upgrd.1, i32* %P
+	ret void
+}
+
+declare i32 @llvm.ppc.altivec.vcmpbfp.p(i32, <4 x float>, <4 x float>)
+
+declare <4 x i32> @llvm.ppc.altivec.vcmpbfp(<4 x float>, <4 x float>)
diff --git a/final/test/CodeGen/PowerPC/vec_auto_constant.ll b/final/test/CodeGen/PowerPC/vec_auto_constant.ll
new file mode 100644
index 00000000000..973f0890b13
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vec_auto_constant.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 | FileCheck %s
+; Formerly produced .long, 7320806 (partial)
+; CHECK: .byte  22
+; CHECK: .byte  21
+; CHECK: .byte  20
+; CHECK: .byte  3
+; CHECK: .byte  25
+; CHECK: .byte  24
+; CHECK: .byte  23
+; CHECK: .byte  3
+; CHECK: .byte  28
+; CHECK: .byte  27
+; CHECK: .byte  26
+; CHECK: .byte  3
+; CHECK: .byte  31
+; CHECK: .byte  30
+; CHECK: .byte  29
+; CHECK: .byte  3
+@baz = common global <16 x i8> zeroinitializer    ; <<16 x i8>*> [#uses=1]
+
+define void @foo(<16 x i8> %x) nounwind ssp {
+entry:
+  %x_addr = alloca <16 x i8>                      ; <<16 x i8>*> [#uses=2]
+  %temp = alloca <16 x i8>                        ; <<16 x i8>*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store <16 x i8> %x, <16 x i8>* %x_addr
+  store <16 x i8> <i8 22, i8 21, i8 20, i8 3, i8 25, i8 24, i8 23, i8 3, i8 28, i8 27, i8 26, i8 3, i8 31, i8 30, i8 29, i8 3>, <16 x i8>* %temp, align 16
+  %0 = load <16 x i8>* %x_addr, align 16          ; <<16 x i8>> [#uses=1]
+  %1 = load <16 x i8>* %temp, align 16            ; <<16 x i8>> [#uses=1]
+  %tmp = add <16 x i8> %0, %1                     ; <<16 x i8>> [#uses=1]
+  store <16 x i8> %tmp, <16 x i8>* @baz, align 16
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/final/test/CodeGen/PowerPC/vec_br_cmp.ll b/final/test/CodeGen/PowerPC/vec_br_cmp.ll
new file mode 100644
index 00000000000..c34d850c0ac
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vec_br_cmp.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -o %t
+; RUN: grep vcmpeqfp. %t
+; RUN: not grep mfcr %t
+
+; A predicate compare used immediately by a branch should not generate an mfcr.
+
+define void @test(<4 x float>* %A, <4 x float>* %B) {
+	%tmp = load <4 x float>* %A		; <<4 x float>> [#uses=1]
+	%tmp3 = load <4 x float>* %B		; <<4 x float>> [#uses=1]
+	%tmp.upgrd.1 = tail call i32 @llvm.ppc.altivec.vcmpeqfp.p( i32 1, <4 x float> %tmp, <4 x float> %tmp3 )		; <i32> [#uses=1]
+	%tmp.upgrd.2 = icmp eq i32 %tmp.upgrd.1, 0		; <i1> [#uses=1]
+	br i1 %tmp.upgrd.2, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %0
+	store <4 x float> zeroinitializer, <4 x float>* %B
+	ret void
+
+UnifiedReturnBlock:		; preds = %0
+	ret void
+}
+
+declare i32 @llvm.ppc.altivec.vcmpeqfp.p(i32, <4 x float>, <4 x float>)
diff --git a/final/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll b/final/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll
new file mode 100644
index 00000000000..015c08605fe
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mattr=+altivec  | FileCheck %s
+; Formerly this did byte loads and word stores.
+@a = external global <16 x i8>
+@b = external global <16 x i8>
+@c = external global <16 x i8>
+
+define void @foo() nounwind ssp {
+; CHECK: _foo:
+; CHECK-NOT: stw
+entry:
+    %tmp0 = load <16 x i8>* @a, align 16
+  %tmp180.i = extractelement <16 x i8> %tmp0, i32 0 ; <i8> [#uses=1]
+  %tmp181.i = insertelement <16 x i8> <i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp180.i, i32 2 ; <<16 x i8>> [#uses=1]
+  %tmp182.i = extractelement <16 x i8> %tmp0, i32 1 ; <i8> [#uses=1]
+  %tmp183.i = insertelement <16 x i8> %tmp181.i, i8 %tmp182.i, i32 3 ; <<16 x i8>> [#uses=1]
+  %tmp184.i = insertelement <16 x i8> %tmp183.i, i8 0, i32 4 ; <<16 x i8>> [#uses=1]
+  %tmp185.i = insertelement <16 x i8> %tmp184.i, i8 0, i32 5 ; <<16 x i8>> [#uses=1]
+  %tmp186.i = extractelement <16 x i8> %tmp0, i32 4 ; <i8> [#uses=1]
+  %tmp187.i = insertelement <16 x i8> %tmp185.i, i8 %tmp186.i, i32 6 ; <<16 x i8>> [#uses=1]
+  %tmp188.i = extractelement <16 x i8> %tmp0, i32 5 ; <i8> [#uses=1]
+  %tmp189.i = insertelement <16 x i8> %tmp187.i, i8 %tmp188.i, i32 7 ; <<16 x i8>> [#uses=1]
+  %tmp190.i = insertelement <16 x i8> %tmp189.i, i8 0, i32 8 ; <<16 x i8>> [#uses=1]
+  %tmp191.i = insertelement <16 x i8> %tmp190.i, i8 0, i32 9 ; <<16 x i8>> [#uses=1]
+  %tmp192.i = extractelement <16 x i8> %tmp0, i32 8 ; <i8> [#uses=1]
+  %tmp193.i = insertelement <16 x i8> %tmp191.i, i8 %tmp192.i, i32 10 ; <<16 x i8>> [#uses=1]
+  %tmp194.i = extractelement <16 x i8> %tmp0, i32 9 ; <i8> [#uses=1]
+  %tmp195.i = insertelement <16 x i8> %tmp193.i, i8 %tmp194.i, i32 11 ; <<16 x i8>> [#uses=1]
+  %tmp196.i = insertelement <16 x i8> %tmp195.i, i8 0, i32 12 ; <<16 x i8>> [#uses=1]
+  %tmp197.i = insertelement <16 x i8> %tmp196.i, i8 0, i32 13 ; <<16 x i8>> [#uses=1]
+%tmp201 = shufflevector <16 x i8> %tmp197.i, <16 x i8> %tmp0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 28, i32 29>; ModuleID = 'try.c'
+    store <16 x i8> %tmp201, <16 x i8>* @c, align 16
+    br label %return
+
+return:		; preds = %bb2
+	ret void
+; CHECK: blr
+}
diff --git a/final/test/CodeGen/PowerPC/vec_call.ll b/final/test/CodeGen/PowerPC/vec_call.ll
new file mode 100644
index 00000000000..4511315c3bf
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vec_call.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5
+
+define <4 x i32> @test_arg(<4 x i32> %A, <4 x i32> %B) {
+	%C = add <4 x i32> %A, %B		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %C
+}
+
+define <4 x i32> @foo() {
+	%X = call <4 x i32> @test_arg( <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %X
+}
diff --git a/final/test/CodeGen/PowerPC/vec_constants.ll b/final/test/CodeGen/PowerPC/vec_constants.ll
new file mode 100644
index 00000000000..399f19f8d2e
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vec_constants.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep CPI
+
+define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind {
+	%tmp = load <4 x i32>* %P1		; <<4 x i32>> [#uses=1]
+	%tmp4 = and <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp4, <4 x i32>* %P1
+	%tmp7 = load <4 x i32>* %P2		; <<4 x i32>> [#uses=1]
+	%tmp9 = and <4 x i32> %tmp7, < i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647 >		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp9, <4 x i32>* %P2
+	%tmp.upgrd.1 = load <4 x float>* %P3		; <<4 x float>> [#uses=1]
+	%tmp11 = bitcast <4 x float> %tmp.upgrd.1 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp12 = and <4 x i32> %tmp11, < i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647 >		; <<4 x i32>> [#uses=1]
+	%tmp13 = bitcast <4 x i32> %tmp12 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp13, <4 x float>* %P3
+	ret void
+}
+
+define <4 x i32> @test_30() nounwind {
+	ret <4 x i32> < i32 30, i32 30, i32 30, i32 30 >
+}
+
+define <4 x i32> @test_29() nounwind {
+	ret <4 x i32> < i32 29, i32 29, i32 29, i32 29 >
+}
+
+define <8 x i16> @test_n30() nounwind {
+	ret <8 x i16> < i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30 >
+}
+
+define <16 x i8> @test_n104() nounwind {
+	ret <16 x i8> < i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104 >
+}
+
+define <4 x i32> @test_vsldoi() nounwind {
+	ret <4 x i32> < i32 512, i32 512, i32 512, i32 512 >
+}
+
+define <8 x i16> @test_vsldoi_65023() nounwind {
+	ret <8 x i16> < i16 65023, i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023 >
+}
+
+define <4 x i32> @test_rol() nounwind {
+	ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 >
+}
diff --git a/final/test/CodeGen/PowerPC/vec_fneg.ll b/final/test/CodeGen/PowerPC/vec_fneg.ll
new file mode 100644
index 00000000000..e01e65979f6
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vec_fneg.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubfp
+
+define void @t(<4 x float>* %A) {
+	%tmp2 = load <4 x float>* %A
+	%tmp3 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp2
+	store <4 x float> %tmp3, <4 x float>* %A
+	ret void
+}
diff --git a/final/test/CodeGen/PowerPC/vec_insert.ll b/final/test/CodeGen/PowerPC/vec_insert.ll
new file mode 100644
index 00000000000..185454cbd31
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vec_insert.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep sth
+
+define <8 x i16> @insert(<8 x i16> %foo, i16 %a) nounwind  {
+entry:
+	%vecext = insertelement <8 x i16> %foo, i16 %a, i32 7		; <i8> [#uses=1]
+	ret <8 x i16> %vecext
+}
+
diff --git a/final/test/CodeGen/PowerPC/vec_misaligned.ll b/final/test/CodeGen/PowerPC/vec_misaligned.ll
new file mode 100644
index 00000000000..d7ed64a5b1c
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vec_misaligned.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin8"
+	%struct.S2203 = type { %struct.u16qi }
+	%struct.u16qi = type { <16 x i8> }
+@s = weak global %struct.S2203 zeroinitializer		; <%struct.S2203*> [#uses=1]
+
+define void @foo(i32 %x, ...) {
+entry:
+	%x_addr = alloca i32		; <i32*> [#uses=1]
+	%ap = alloca i8*		; <i8**> [#uses=3]
+	%ap.0 = alloca i8*		; <i8**> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %x, i32* %x_addr
+	%ap1 = bitcast i8** %ap to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_start( i8* %ap1 )
+	%tmp = load i8** %ap, align 4		; <i8*> [#uses=1]
+	store i8* %tmp, i8** %ap.0, align 4
+	%tmp2 = load i8** %ap.0, align 4		; <i8*> [#uses=1]
+	%tmp3 = getelementptr i8* %tmp2, i64 16		; <i8*> [#uses=1]
+	store i8* %tmp3, i8** %ap, align 4
+	%tmp4 = load i8** %ap.0, align 4		; <i8*> [#uses=1]
+	%tmp45 = bitcast i8* %tmp4 to %struct.S2203*		; <%struct.S2203*> [#uses=1]
+	%tmp6 = getelementptr %struct.S2203* @s, i32 0, i32 0		; <%struct.u16qi*> [#uses=1]
+	%tmp7 = getelementptr %struct.S2203* %tmp45, i32 0, i32 0		; <%struct.u16qi*> [#uses=1]
+	%tmp8 = getelementptr %struct.u16qi* %tmp6, i32 0, i32 0		; <<16 x i8>*> [#uses=1]
+	%tmp9 = getelementptr %struct.u16qi* %tmp7, i32 0, i32 0		; <<16 x i8>*> [#uses=1]
+	%tmp10 = load <16 x i8>* %tmp9, align 4		; <<16 x i8>> [#uses=1]
+	store <16 x i8> %tmp10, <16 x i8>* %tmp8, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind 
diff --git a/final/test/CodeGen/PowerPC/vec_mul.ll b/final/test/CodeGen/PowerPC/vec_mul.ll
new file mode 100644
index 00000000000..80f4de4a172
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vec_mul.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep mullw
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vmsumuhm
+
+define <4 x i32> @test_v4i32(<4 x i32>* %X, <4 x i32>* %Y) {
+	%tmp = load <4 x i32>* %X		; <<4 x i32>> [#uses=1]
+	%tmp2 = load <4 x i32>* %Y		; <<4 x i32>> [#uses=1]
+	%tmp3 = mul <4 x i32> %tmp, %tmp2		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp3
+}
+
+define <8 x i16> @test_v8i16(<8 x i16>* %X, <8 x i16>* %Y) {
+	%tmp = load <8 x i16>* %X		; <<8 x i16>> [#uses=1]
+	%tmp2 = load <8 x i16>* %Y		; <<8 x i16>> [#uses=1]
+	%tmp3 = mul <8 x i16> %tmp, %tmp2		; <<8 x i16>> [#uses=1]
+	ret <8 x i16> %tmp3
+}
+
+define <16 x i8> @test_v16i8(<16 x i8>* %X, <16 x i8>* %Y) {
+	%tmp = load <16 x i8>* %X		; <<16 x i8>> [#uses=1]
+	%tmp2 = load <16 x i8>* %Y		; <<16 x i8>> [#uses=1]
+	%tmp3 = mul <16 x i8> %tmp, %tmp2		; <<16 x i8>> [#uses=1]
+	ret <16 x i8> %tmp3
+}
diff --git a/final/test/CodeGen/PowerPC/vec_perf_shuffle.ll b/final/test/CodeGen/PowerPC/vec_perf_shuffle.ll
new file mode 100644
index 00000000000..2c3594d224f
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vec_perf_shuffle.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vperm
+
+define <4 x float> @test_uu72(<4 x float>* %P1, <4 x float>* %P2) {
+	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
+	%V2 = load <4 x float>* %P2		; <<4 x float>> [#uses=1]
+	%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 undef, i32 undef, i32 7, i32 2 >		; <<4 x float>> [#uses=1]
+	ret <4 x float> %V3
+}
+
+define <4 x float> @test_30u5(<4 x float>* %P1, <4 x float>* %P2) {
+	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
+	%V2 = load <4 x float>* %P2		; <<4 x float>> [#uses=1]
+	%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 3, i32 0, i32 undef, i32 5 >		; <<4 x float>> [#uses=1]
+	ret <4 x float> %V3
+}
+
+define <4 x float> @test_3u73(<4 x float>* %P1, <4 x float>* %P2) {
+	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
+	%V2 = load <4 x float>* %P2		; <<4 x float>> [#uses=1]
+	%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 3, i32 undef, i32 7, i32 3 >		; <<4 x float>> [#uses=1]
+	ret <4 x float> %V3
+}
+
+define <4 x float> @test_3774(<4 x float>* %P1, <4 x float>* %P2) {
+	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
+	%V2 = load <4 x float>* %P2		; <<4 x float>> [#uses=1]
+	%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 3, i32 7, i32 7, i32 4 >		; <<4 x float>> [#uses=1]
+	ret <4 x float> %V3
+}
+
+define <4 x float> @test_4450(<4 x float>* %P1, <4 x float>* %P2) {
+	%V1 = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
+	%V2 = load <4 x float>* %P2		; <<4 x float>> [#uses=1]
+	%V3 = shufflevector <4 x float> %V1, <4 x float> %V2, <4 x i32> < i32 4, i32 4, i32 5, i32 0 >		; <<4 x float>> [#uses=1]
+	ret <4 x float> %V3
+}
diff --git a/final/test/CodeGen/PowerPC/vec_shift.ll b/final/test/CodeGen/PowerPC/vec_shift.ll
new file mode 100644
index 00000000000..646fb5f3866
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vec_shift.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s  -march=ppc32 -mcpu=g5
+; PR3628
+
+define void @update(<4 x i32> %val, <4 x i32>* %dst) nounwind {
+entry:
+	%shl = shl <4 x i32> %val, < i32 4, i32 3, i32 2, i32 1 >
+	%shr = ashr <4 x i32> %shl, < i32 1, i32 2, i32 3, i32 4 >
+	store <4 x i32> %shr, <4 x i32>* %dst
+	ret void
+}
diff --git a/final/test/CodeGen/PowerPC/vec_shuffle.ll b/final/test/CodeGen/PowerPC/vec_shuffle.ll
new file mode 100644
index 00000000000..82706321c1c
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vec_shuffle.ll
@@ -0,0 +1,504 @@
+; RUN: opt < %s -instcombine | \
+; RUN:   llc -march=ppc32 -mcpu=g5 | not grep vperm
+; RUN: llc < %s -march=ppc32 -mcpu=g5 > %t
+; RUN: grep vsldoi  %t | count 2
+; RUN: grep vmrgh   %t | count 7
+; RUN: grep vmrgl   %t | count 6
+; RUN: grep vpkuhum %t | count 1
+; RUN: grep vpkuwum %t | count 1
+
+define void @VSLDOI_xy(<8 x i16>* %A, <8 x i16>* %B) {
+entry:
+	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=1]
+	%tmp2 = load <8 x i16>* %B		; <<8 x i16>> [#uses=1]
+	%tmp.upgrd.1 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=11]
+	%tmp2.upgrd.2 = bitcast <8 x i16> %tmp2 to <16 x i8>		; <<16 x i8>> [#uses=5]
+	%tmp.upgrd.3 = extractelement <16 x i8> %tmp.upgrd.1, i32 5		; <i8> [#uses=1]
+	%tmp3 = extractelement <16 x i8> %tmp.upgrd.1, i32 6		; <i8> [#uses=1]
+	%tmp4 = extractelement <16 x i8> %tmp.upgrd.1, i32 7		; <i8> [#uses=1]
+	%tmp5 = extractelement <16 x i8> %tmp.upgrd.1, i32 8		; <i8> [#uses=1]
+	%tmp6 = extractelement <16 x i8> %tmp.upgrd.1, i32 9		; <i8> [#uses=1]
+	%tmp7 = extractelement <16 x i8> %tmp.upgrd.1, i32 10		; <i8> [#uses=1]
+	%tmp8 = extractelement <16 x i8> %tmp.upgrd.1, i32 11		; <i8> [#uses=1]
+	%tmp9 = extractelement <16 x i8> %tmp.upgrd.1, i32 12		; <i8> [#uses=1]
+	%tmp10 = extractelement <16 x i8> %tmp.upgrd.1, i32 13		; <i8> [#uses=1]
+	%tmp11 = extractelement <16 x i8> %tmp.upgrd.1, i32 14		; <i8> [#uses=1]
+	%tmp12 = extractelement <16 x i8> %tmp.upgrd.1, i32 15		; <i8> [#uses=1]
+	%tmp13 = extractelement <16 x i8> %tmp2.upgrd.2, i32 0		; <i8> [#uses=1]
+	%tmp14 = extractelement <16 x i8> %tmp2.upgrd.2, i32 1		; <i8> [#uses=1]
+	%tmp15 = extractelement <16 x i8> %tmp2.upgrd.2, i32 2		; <i8> [#uses=1]
+	%tmp16 = extractelement <16 x i8> %tmp2.upgrd.2, i32 3		; <i8> [#uses=1]
+	%tmp17 = extractelement <16 x i8> %tmp2.upgrd.2, i32 4		; <i8> [#uses=1]
+	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.3, i32 0		; <<16 x i8>> [#uses=1]
+	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
+	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
+	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
+	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
+	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
+	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
+	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
+	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
+	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
+	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
+	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
+	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
+	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
+	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
+	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
+	%tmp33.upgrd.4 = bitcast <16 x i8> %tmp33 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	store <8 x i16> %tmp33.upgrd.4, <8 x i16>* %A
+	ret void
+}
+
+define void @VSLDOI_xx(<8 x i16>* %A, <8 x i16>* %B) {
+	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=1]
+	%tmp2 = load <8 x i16>* %A		; <<8 x i16>> [#uses=1]
+	%tmp.upgrd.5 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=11]
+	%tmp2.upgrd.6 = bitcast <8 x i16> %tmp2 to <16 x i8>		; <<16 x i8>> [#uses=5]
+	%tmp.upgrd.7 = extractelement <16 x i8> %tmp.upgrd.5, i32 5		; <i8> [#uses=1]
+	%tmp3 = extractelement <16 x i8> %tmp.upgrd.5, i32 6		; <i8> [#uses=1]
+	%tmp4 = extractelement <16 x i8> %tmp.upgrd.5, i32 7		; <i8> [#uses=1]
+	%tmp5 = extractelement <16 x i8> %tmp.upgrd.5, i32 8		; <i8> [#uses=1]
+	%tmp6 = extractelement <16 x i8> %tmp.upgrd.5, i32 9		; <i8> [#uses=1]
+	%tmp7 = extractelement <16 x i8> %tmp.upgrd.5, i32 10		; <i8> [#uses=1]
+	%tmp8 = extractelement <16 x i8> %tmp.upgrd.5, i32 11		; <i8> [#uses=1]
+	%tmp9 = extractelement <16 x i8> %tmp.upgrd.5, i32 12		; <i8> [#uses=1]
+	%tmp10 = extractelement <16 x i8> %tmp.upgrd.5, i32 13		; <i8> [#uses=1]
+	%tmp11 = extractelement <16 x i8> %tmp.upgrd.5, i32 14		; <i8> [#uses=1]
+	%tmp12 = extractelement <16 x i8> %tmp.upgrd.5, i32 15		; <i8> [#uses=1]
+	%tmp13 = extractelement <16 x i8> %tmp2.upgrd.6, i32 0		; <i8> [#uses=1]
+	%tmp14 = extractelement <16 x i8> %tmp2.upgrd.6, i32 1		; <i8> [#uses=1]
+	%tmp15 = extractelement <16 x i8> %tmp2.upgrd.6, i32 2		; <i8> [#uses=1]
+	%tmp16 = extractelement <16 x i8> %tmp2.upgrd.6, i32 3		; <i8> [#uses=1]
+	%tmp17 = extractelement <16 x i8> %tmp2.upgrd.6, i32 4		; <i8> [#uses=1]
+	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.7, i32 0		; <<16 x i8>> [#uses=1]
+	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
+	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
+	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
+	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
+	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
+	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
+	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
+	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
+	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
+	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
+	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
+	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
+	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
+	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
+	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
+	%tmp33.upgrd.8 = bitcast <16 x i8> %tmp33 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	store <8 x i16> %tmp33.upgrd.8, <8 x i16>* %A
+	ret void
+}
+
+define void @VPERM_promote(<8 x i16>* %A, <8 x i16>* %B) {
+entry:
+	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=1]
+	%tmp.upgrd.9 = bitcast <8 x i16> %tmp to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp2 = load <8 x i16>* %B		; <<8 x i16>> [#uses=1]
+	%tmp2.upgrd.10 = bitcast <8 x i16> %tmp2 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp3 = call <4 x i32> @llvm.ppc.altivec.vperm( <4 x i32> %tmp.upgrd.9, <4 x i32> %tmp2.upgrd.10, <16 x i8> < i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14, i8 14 > )		; <<4 x i32>> [#uses=1]
+	%tmp3.upgrd.11 = bitcast <4 x i32> %tmp3 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	store <8 x i16> %tmp3.upgrd.11, <8 x i16>* %A
+	ret void
+}
+
+declare <4 x i32> @llvm.ppc.altivec.vperm(<4 x i32>, <4 x i32>, <16 x i8>)
+
+define void @tb_l(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+	%tmp = load <16 x i8>* %A		; <<16 x i8>> [#uses=8]
+	%tmp2 = load <16 x i8>* %B		; <<16 x i8>> [#uses=8]
+	%tmp.upgrd.12 = extractelement <16 x i8> %tmp, i32 8		; <i8> [#uses=1]
+	%tmp3 = extractelement <16 x i8> %tmp2, i32 8		; <i8> [#uses=1]
+	%tmp4 = extractelement <16 x i8> %tmp, i32 9		; <i8> [#uses=1]
+	%tmp5 = extractelement <16 x i8> %tmp2, i32 9		; <i8> [#uses=1]
+	%tmp6 = extractelement <16 x i8> %tmp, i32 10		; <i8> [#uses=1]
+	%tmp7 = extractelement <16 x i8> %tmp2, i32 10		; <i8> [#uses=1]
+	%tmp8 = extractelement <16 x i8> %tmp, i32 11		; <i8> [#uses=1]
+	%tmp9 = extractelement <16 x i8> %tmp2, i32 11		; <i8> [#uses=1]
+	%tmp10 = extractelement <16 x i8> %tmp, i32 12		; <i8> [#uses=1]
+	%tmp11 = extractelement <16 x i8> %tmp2, i32 12		; <i8> [#uses=1]
+	%tmp12 = extractelement <16 x i8> %tmp, i32 13		; <i8> [#uses=1]
+	%tmp13 = extractelement <16 x i8> %tmp2, i32 13		; <i8> [#uses=1]
+	%tmp14 = extractelement <16 x i8> %tmp, i32 14		; <i8> [#uses=1]
+	%tmp15 = extractelement <16 x i8> %tmp2, i32 14		; <i8> [#uses=1]
+	%tmp16 = extractelement <16 x i8> %tmp, i32 15		; <i8> [#uses=1]
+	%tmp17 = extractelement <16 x i8> %tmp2, i32 15		; <i8> [#uses=1]
+	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.12, i32 0		; <<16 x i8>> [#uses=1]
+	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
+	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
+	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
+	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
+	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
+	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
+	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
+	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
+	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
+	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
+	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
+	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
+	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
+	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
+	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
+	store <16 x i8> %tmp33, <16 x i8>* %A
+	ret void
+}
+
+define void @th_l(<8 x i16>* %A, <8 x i16>* %B) {
+entry:
+	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=4]
+	%tmp2 = load <8 x i16>* %B		; <<8 x i16>> [#uses=4]
+	%tmp.upgrd.13 = extractelement <8 x i16> %tmp, i32 4		; <i16> [#uses=1]
+	%tmp3 = extractelement <8 x i16> %tmp2, i32 4		; <i16> [#uses=1]
+	%tmp4 = extractelement <8 x i16> %tmp, i32 5		; <i16> [#uses=1]
+	%tmp5 = extractelement <8 x i16> %tmp2, i32 5		; <i16> [#uses=1]
+	%tmp6 = extractelement <8 x i16> %tmp, i32 6		; <i16> [#uses=1]
+	%tmp7 = extractelement <8 x i16> %tmp2, i32 6		; <i16> [#uses=1]
+	%tmp8 = extractelement <8 x i16> %tmp, i32 7		; <i16> [#uses=1]
+	%tmp9 = extractelement <8 x i16> %tmp2, i32 7		; <i16> [#uses=1]
+	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.13, i32 0		; <<8 x i16>> [#uses=1]
+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]
+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]
+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]
+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4		; <<8 x i16>> [#uses=1]
+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5		; <<8 x i16>> [#uses=1]
+	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6		; <<8 x i16>> [#uses=1]
+	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7		; <<8 x i16>> [#uses=1]
+	store <8 x i16> %tmp17, <8 x i16>* %A
+	ret void
+}
+
+define void @tw_l(<4 x i32>* %A, <4 x i32>* %B) {
+entry:
+	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=2]
+	%tmp2 = load <4 x i32>* %B		; <<4 x i32>> [#uses=2]
+	%tmp.upgrd.14 = extractelement <4 x i32> %tmp, i32 2		; <i32> [#uses=1]
+	%tmp3 = extractelement <4 x i32> %tmp2, i32 2		; <i32> [#uses=1]
+	%tmp4 = extractelement <4 x i32> %tmp, i32 3		; <i32> [#uses=1]
+	%tmp5 = extractelement <4 x i32> %tmp2, i32 3		; <i32> [#uses=1]
+	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.14, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
+	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]
+	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp9, <4 x i32>* %A
+	ret void
+}
+
+define void @tb_h(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+	%tmp = load <16 x i8>* %A		; <<16 x i8>> [#uses=8]
+	%tmp2 = load <16 x i8>* %B		; <<16 x i8>> [#uses=8]
+	%tmp.upgrd.15 = extractelement <16 x i8> %tmp, i32 0		; <i8> [#uses=1]
+	%tmp3 = extractelement <16 x i8> %tmp2, i32 0		; <i8> [#uses=1]
+	%tmp4 = extractelement <16 x i8> %tmp, i32 1		; <i8> [#uses=1]
+	%tmp5 = extractelement <16 x i8> %tmp2, i32 1		; <i8> [#uses=1]
+	%tmp6 = extractelement <16 x i8> %tmp, i32 2		; <i8> [#uses=1]
+	%tmp7 = extractelement <16 x i8> %tmp2, i32 2		; <i8> [#uses=1]
+	%tmp8 = extractelement <16 x i8> %tmp, i32 3		; <i8> [#uses=1]
+	%tmp9 = extractelement <16 x i8> %tmp2, i32 3		; <i8> [#uses=1]
+	%tmp10 = extractelement <16 x i8> %tmp, i32 4		; <i8> [#uses=1]
+	%tmp11 = extractelement <16 x i8> %tmp2, i32 4		; <i8> [#uses=1]
+	%tmp12 = extractelement <16 x i8> %tmp, i32 5		; <i8> [#uses=1]
+	%tmp13 = extractelement <16 x i8> %tmp2, i32 5		; <i8> [#uses=1]
+	%tmp14 = extractelement <16 x i8> %tmp, i32 6		; <i8> [#uses=1]
+	%tmp15 = extractelement <16 x i8> %tmp2, i32 6		; <i8> [#uses=1]
+	%tmp16 = extractelement <16 x i8> %tmp, i32 7		; <i8> [#uses=1]
+	%tmp17 = extractelement <16 x i8> %tmp2, i32 7		; <i8> [#uses=1]
+	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.15, i32 0		; <<16 x i8>> [#uses=1]
+	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
+	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
+	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
+	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
+	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
+	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
+	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
+	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
+	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
+	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
+	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
+	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
+	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
+	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
+	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
+	store <16 x i8> %tmp33, <16 x i8>* %A
+	ret void
+}
+
+define void @th_h(<8 x i16>* %A, <8 x i16>* %B) {
+entry:
+	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=4]
+	%tmp2 = load <8 x i16>* %B		; <<8 x i16>> [#uses=4]
+	%tmp.upgrd.16 = extractelement <8 x i16> %tmp, i32 0		; <i16> [#uses=1]
+	%tmp3 = extractelement <8 x i16> %tmp2, i32 0		; <i16> [#uses=1]
+	%tmp4 = extractelement <8 x i16> %tmp, i32 1		; <i16> [#uses=1]
+	%tmp5 = extractelement <8 x i16> %tmp2, i32 1		; <i16> [#uses=1]
+	%tmp6 = extractelement <8 x i16> %tmp, i32 2		; <i16> [#uses=1]
+	%tmp7 = extractelement <8 x i16> %tmp2, i32 2		; <i16> [#uses=1]
+	%tmp8 = extractelement <8 x i16> %tmp, i32 3		; <i16> [#uses=1]
+	%tmp9 = extractelement <8 x i16> %tmp2, i32 3		; <i16> [#uses=1]
+	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.16, i32 0		; <<8 x i16>> [#uses=1]
+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]
+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]
+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]
+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4		; <<8 x i16>> [#uses=1]
+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5		; <<8 x i16>> [#uses=1]
+	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6		; <<8 x i16>> [#uses=1]
+	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7		; <<8 x i16>> [#uses=1]
+	store <8 x i16> %tmp17, <8 x i16>* %A
+	ret void
+}
+
+define void @tw_h(<4 x i32>* %A, <4 x i32>* %B) {
+entry:
+	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=2]
+	%tmp2 = load <4 x i32>* %B		; <<4 x i32>> [#uses=2]
+	%tmp.upgrd.17 = extractelement <4 x i32> %tmp2, i32 0		; <i32> [#uses=1]
+	%tmp3 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]
+	%tmp4 = extractelement <4 x i32> %tmp2, i32 1		; <i32> [#uses=1]
+	%tmp5 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]
+	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.17, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
+	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]
+	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp9, <4 x i32>* %A
+	ret void
+}
+
+define void @tw_h_flop(<4 x i32>* %A, <4 x i32>* %B) {
+	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=2]
+	%tmp2 = load <4 x i32>* %B		; <<4 x i32>> [#uses=2]
+	%tmp.upgrd.18 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]
+	%tmp3 = extractelement <4 x i32> %tmp2, i32 0		; <i32> [#uses=1]
+	%tmp4 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]
+	%tmp5 = extractelement <4 x i32> %tmp2, i32 1		; <i32> [#uses=1]
+	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.18, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
+	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]
+	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp9, <4 x i32>* %A
+	ret void
+}
+
+define void @VMRG_UNARY_tb_l(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+	%tmp = load <16 x i8>* %A		; <<16 x i8>> [#uses=16]
+	%tmp.upgrd.19 = extractelement <16 x i8> %tmp, i32 8		; <i8> [#uses=1]
+	%tmp3 = extractelement <16 x i8> %tmp, i32 8		; <i8> [#uses=1]
+	%tmp4 = extractelement <16 x i8> %tmp, i32 9		; <i8> [#uses=1]
+	%tmp5 = extractelement <16 x i8> %tmp, i32 9		; <i8> [#uses=1]
+	%tmp6 = extractelement <16 x i8> %tmp, i32 10		; <i8> [#uses=1]
+	%tmp7 = extractelement <16 x i8> %tmp, i32 10		; <i8> [#uses=1]
+	%tmp8 = extractelement <16 x i8> %tmp, i32 11		; <i8> [#uses=1]
+	%tmp9 = extractelement <16 x i8> %tmp, i32 11		; <i8> [#uses=1]
+	%tmp10 = extractelement <16 x i8> %tmp, i32 12		; <i8> [#uses=1]
+	%tmp11 = extractelement <16 x i8> %tmp, i32 12		; <i8> [#uses=1]
+	%tmp12 = extractelement <16 x i8> %tmp, i32 13		; <i8> [#uses=1]
+	%tmp13 = extractelement <16 x i8> %tmp, i32 13		; <i8> [#uses=1]
+	%tmp14 = extractelement <16 x i8> %tmp, i32 14		; <i8> [#uses=1]
+	%tmp15 = extractelement <16 x i8> %tmp, i32 14		; <i8> [#uses=1]
+	%tmp16 = extractelement <16 x i8> %tmp, i32 15		; <i8> [#uses=1]
+	%tmp17 = extractelement <16 x i8> %tmp, i32 15		; <i8> [#uses=1]
+	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.19, i32 0		; <<16 x i8>> [#uses=1]
+	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
+	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
+	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
+	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
+	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
+	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
+	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
+	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
+	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
+	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
+	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
+	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
+	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
+	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
+	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
+	store <16 x i8> %tmp33, <16 x i8>* %A
+	ret void
+}
+
+define void @VMRG_UNARY_th_l(<8 x i16>* %A, <8 x i16>* %B) {
+entry:
+	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=8]
+	%tmp.upgrd.20 = extractelement <8 x i16> %tmp, i32 4		; <i16> [#uses=1]
+	%tmp3 = extractelement <8 x i16> %tmp, i32 4		; <i16> [#uses=1]
+	%tmp4 = extractelement <8 x i16> %tmp, i32 5		; <i16> [#uses=1]
+	%tmp5 = extractelement <8 x i16> %tmp, i32 5		; <i16> [#uses=1]
+	%tmp6 = extractelement <8 x i16> %tmp, i32 6		; <i16> [#uses=1]
+	%tmp7 = extractelement <8 x i16> %tmp, i32 6		; <i16> [#uses=1]
+	%tmp8 = extractelement <8 x i16> %tmp, i32 7		; <i16> [#uses=1]
+	%tmp9 = extractelement <8 x i16> %tmp, i32 7		; <i16> [#uses=1]
+	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.20, i32 0		; <<8 x i16>> [#uses=1]
+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]
+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]
+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]
+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4		; <<8 x i16>> [#uses=1]
+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5		; <<8 x i16>> [#uses=1]
+	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6		; <<8 x i16>> [#uses=1]
+	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7		; <<8 x i16>> [#uses=1]
+	store <8 x i16> %tmp17, <8 x i16>* %A
+	ret void
+}
+
+define void @VMRG_UNARY_tw_l(<4 x i32>* %A, <4 x i32>* %B) {
+entry:
+	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=4]
+	%tmp.upgrd.21 = extractelement <4 x i32> %tmp, i32 2		; <i32> [#uses=1]
+	%tmp3 = extractelement <4 x i32> %tmp, i32 2		; <i32> [#uses=1]
+	%tmp4 = extractelement <4 x i32> %tmp, i32 3		; <i32> [#uses=1]
+	%tmp5 = extractelement <4 x i32> %tmp, i32 3		; <i32> [#uses=1]
+	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.21, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
+	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]
+	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp9, <4 x i32>* %A
+	ret void
+}
+
+define void @VMRG_UNARY_tb_h(<16 x i8>* %A, <16 x i8>* %B) {
+entry:
+	%tmp = load <16 x i8>* %A		; <<16 x i8>> [#uses=16]
+	%tmp.upgrd.22 = extractelement <16 x i8> %tmp, i32 0		; <i8> [#uses=1]
+	%tmp3 = extractelement <16 x i8> %tmp, i32 0		; <i8> [#uses=1]
+	%tmp4 = extractelement <16 x i8> %tmp, i32 1		; <i8> [#uses=1]
+	%tmp5 = extractelement <16 x i8> %tmp, i32 1		; <i8> [#uses=1]
+	%tmp6 = extractelement <16 x i8> %tmp, i32 2		; <i8> [#uses=1]
+	%tmp7 = extractelement <16 x i8> %tmp, i32 2		; <i8> [#uses=1]
+	%tmp8 = extractelement <16 x i8> %tmp, i32 3		; <i8> [#uses=1]
+	%tmp9 = extractelement <16 x i8> %tmp, i32 3		; <i8> [#uses=1]
+	%tmp10 = extractelement <16 x i8> %tmp, i32 4		; <i8> [#uses=1]
+	%tmp11 = extractelement <16 x i8> %tmp, i32 4		; <i8> [#uses=1]
+	%tmp12 = extractelement <16 x i8> %tmp, i32 5		; <i8> [#uses=1]
+	%tmp13 = extractelement <16 x i8> %tmp, i32 5		; <i8> [#uses=1]
+	%tmp14 = extractelement <16 x i8> %tmp, i32 6		; <i8> [#uses=1]
+	%tmp15 = extractelement <16 x i8> %tmp, i32 6		; <i8> [#uses=1]
+	%tmp16 = extractelement <16 x i8> %tmp, i32 7		; <i8> [#uses=1]
+	%tmp17 = extractelement <16 x i8> %tmp, i32 7		; <i8> [#uses=1]
+	%tmp18 = insertelement <16 x i8> undef, i8 %tmp.upgrd.22, i32 0		; <<16 x i8>> [#uses=1]
+	%tmp19 = insertelement <16 x i8> %tmp18, i8 %tmp3, i32 1		; <<16 x i8>> [#uses=1]
+	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 2		; <<16 x i8>> [#uses=1]
+	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 3		; <<16 x i8>> [#uses=1]
+	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 4		; <<16 x i8>> [#uses=1]
+	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 5		; <<16 x i8>> [#uses=1]
+	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 6		; <<16 x i8>> [#uses=1]
+	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 7		; <<16 x i8>> [#uses=1]
+	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 8		; <<16 x i8>> [#uses=1]
+	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 9		; <<16 x i8>> [#uses=1]
+	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 10		; <<16 x i8>> [#uses=1]
+	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 11		; <<16 x i8>> [#uses=1]
+	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 12		; <<16 x i8>> [#uses=1]
+	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 13		; <<16 x i8>> [#uses=1]
+	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 14		; <<16 x i8>> [#uses=1]
+	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 15		; <<16 x i8>> [#uses=1]
+	store <16 x i8> %tmp33, <16 x i8>* %A
+	ret void
+}
+
+define void @VMRG_UNARY_th_h(<8 x i16>* %A, <8 x i16>* %B) {
+entry:
+	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=8]
+	%tmp.upgrd.23 = extractelement <8 x i16> %tmp, i32 0		; <i16> [#uses=1]
+	%tmp3 = extractelement <8 x i16> %tmp, i32 0		; <i16> [#uses=1]
+	%tmp4 = extractelement <8 x i16> %tmp, i32 1		; <i16> [#uses=1]
+	%tmp5 = extractelement <8 x i16> %tmp, i32 1		; <i16> [#uses=1]
+	%tmp6 = extractelement <8 x i16> %tmp, i32 2		; <i16> [#uses=1]
+	%tmp7 = extractelement <8 x i16> %tmp, i32 2		; <i16> [#uses=1]
+	%tmp8 = extractelement <8 x i16> %tmp, i32 3		; <i16> [#uses=1]
+	%tmp9 = extractelement <8 x i16> %tmp, i32 3		; <i16> [#uses=1]
+	%tmp10 = insertelement <8 x i16> undef, i16 %tmp.upgrd.23, i32 0		; <<8 x i16>> [#uses=1]
+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 1		; <<8 x i16>> [#uses=1]
+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 2		; <<8 x i16>> [#uses=1]
+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 3		; <<8 x i16>> [#uses=1]
+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 4		; <<8 x i16>> [#uses=1]
+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 5		; <<8 x i16>> [#uses=1]
+	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 6		; <<8 x i16>> [#uses=1]
+	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 7		; <<8 x i16>> [#uses=1]
+	store <8 x i16> %tmp17, <8 x i16>* %A
+	ret void
+}
+
+define void @VMRG_UNARY_tw_h(<4 x i32>* %A, <4 x i32>* %B) {
+entry:
+	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=4]
+	%tmp.upgrd.24 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]
+	%tmp3 = extractelement <4 x i32> %tmp, i32 0		; <i32> [#uses=1]
+	%tmp4 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]
+	%tmp5 = extractelement <4 x i32> %tmp, i32 1		; <i32> [#uses=1]
+	%tmp6 = insertelement <4 x i32> undef, i32 %tmp.upgrd.24, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp7 = insertelement <4 x i32> %tmp6, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
+	%tmp8 = insertelement <4 x i32> %tmp7, i32 %tmp4, i32 2		; <<4 x i32>> [#uses=1]
+	%tmp9 = insertelement <4 x i32> %tmp8, i32 %tmp5, i32 3		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp9, <4 x i32>* %A
+	ret void
+}
+
+define void @VPCKUHUM_unary(<8 x i16>* %A, <8 x i16>* %B) {
+entry:
+	%tmp = load <8 x i16>* %A		; <<8 x i16>> [#uses=2]
+	%tmp.upgrd.25 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=8]
+	%tmp3 = bitcast <8 x i16> %tmp to <16 x i8>		; <<16 x i8>> [#uses=8]
+	%tmp.upgrd.26 = extractelement <16 x i8> %tmp.upgrd.25, i32 1		; <i8> [#uses=1]
+	%tmp4 = extractelement <16 x i8> %tmp.upgrd.25, i32 3		; <i8> [#uses=1]
+	%tmp5 = extractelement <16 x i8> %tmp.upgrd.25, i32 5		; <i8> [#uses=1]
+	%tmp6 = extractelement <16 x i8> %tmp.upgrd.25, i32 7		; <i8> [#uses=1]
+	%tmp7 = extractelement <16 x i8> %tmp.upgrd.25, i32 9		; <i8> [#uses=1]
+	%tmp8 = extractelement <16 x i8> %tmp.upgrd.25, i32 11		; <i8> [#uses=1]
+	%tmp9 = extractelement <16 x i8> %tmp.upgrd.25, i32 13		; <i8> [#uses=1]
+	%tmp10 = extractelement <16 x i8> %tmp.upgrd.25, i32 15		; <i8> [#uses=1]
+	%tmp11 = extractelement <16 x i8> %tmp3, i32 1		; <i8> [#uses=1]
+	%tmp12 = extractelement <16 x i8> %tmp3, i32 3		; <i8> [#uses=1]
+	%tmp13 = extractelement <16 x i8> %tmp3, i32 5		; <i8> [#uses=1]
+	%tmp14 = extractelement <16 x i8> %tmp3, i32 7		; <i8> [#uses=1]
+	%tmp15 = extractelement <16 x i8> %tmp3, i32 9		; <i8> [#uses=1]
+	%tmp16 = extractelement <16 x i8> %tmp3, i32 11		; <i8> [#uses=1]
+	%tmp17 = extractelement <16 x i8> %tmp3, i32 13		; <i8> [#uses=1]
+	%tmp18 = extractelement <16 x i8> %tmp3, i32 15		; <i8> [#uses=1]
+	%tmp19 = insertelement <16 x i8> undef, i8 %tmp.upgrd.26, i32 0		; <<16 x i8>> [#uses=1]
+	%tmp20 = insertelement <16 x i8> %tmp19, i8 %tmp4, i32 1		; <<16 x i8>> [#uses=1]
+	%tmp21 = insertelement <16 x i8> %tmp20, i8 %tmp5, i32 2		; <<16 x i8>> [#uses=1]
+	%tmp22 = insertelement <16 x i8> %tmp21, i8 %tmp6, i32 3		; <<16 x i8>> [#uses=1]
+	%tmp23 = insertelement <16 x i8> %tmp22, i8 %tmp7, i32 4		; <<16 x i8>> [#uses=1]
+	%tmp24 = insertelement <16 x i8> %tmp23, i8 %tmp8, i32 5		; <<16 x i8>> [#uses=1]
+	%tmp25 = insertelement <16 x i8> %tmp24, i8 %tmp9, i32 6		; <<16 x i8>> [#uses=1]
+	%tmp26 = insertelement <16 x i8> %tmp25, i8 %tmp10, i32 7		; <<16 x i8>> [#uses=1]
+	%tmp27 = insertelement <16 x i8> %tmp26, i8 %tmp11, i32 8		; <<16 x i8>> [#uses=1]
+	%tmp28 = insertelement <16 x i8> %tmp27, i8 %tmp12, i32 9		; <<16 x i8>> [#uses=1]
+	%tmp29 = insertelement <16 x i8> %tmp28, i8 %tmp13, i32 10		; <<16 x i8>> [#uses=1]
+	%tmp30 = insertelement <16 x i8> %tmp29, i8 %tmp14, i32 11		; <<16 x i8>> [#uses=1]
+	%tmp31 = insertelement <16 x i8> %tmp30, i8 %tmp15, i32 12		; <<16 x i8>> [#uses=1]
+	%tmp32 = insertelement <16 x i8> %tmp31, i8 %tmp16, i32 13		; <<16 x i8>> [#uses=1]
+	%tmp33 = insertelement <16 x i8> %tmp32, i8 %tmp17, i32 14		; <<16 x i8>> [#uses=1]
+	%tmp34 = insertelement <16 x i8> %tmp33, i8 %tmp18, i32 15		; <<16 x i8>> [#uses=1]
+	%tmp34.upgrd.27 = bitcast <16 x i8> %tmp34 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	store <8 x i16> %tmp34.upgrd.27, <8 x i16>* %A
+	ret void
+}
+
+define void @VPCKUWUM_unary(<4 x i32>* %A, <4 x i32>* %B) {
+entry:
+	%tmp = load <4 x i32>* %A		; <<4 x i32>> [#uses=2]
+	%tmp.upgrd.28 = bitcast <4 x i32> %tmp to <8 x i16>		; <<8 x i16>> [#uses=4]
+	%tmp3 = bitcast <4 x i32> %tmp to <8 x i16>		; <<8 x i16>> [#uses=4]
+	%tmp.upgrd.29 = extractelement <8 x i16> %tmp.upgrd.28, i32 1		; <i16> [#uses=1]
+	%tmp4 = extractelement <8 x i16> %tmp.upgrd.28, i32 3		; <i16> [#uses=1]
+	%tmp5 = extractelement <8 x i16> %tmp.upgrd.28, i32 5		; <i16> [#uses=1]
+	%tmp6 = extractelement <8 x i16> %tmp.upgrd.28, i32 7		; <i16> [#uses=1]
+	%tmp7 = extractelement <8 x i16> %tmp3, i32 1		; <i16> [#uses=1]
+	%tmp8 = extractelement <8 x i16> %tmp3, i32 3		; <i16> [#uses=1]
+	%tmp9 = extractelement <8 x i16> %tmp3, i32 5		; <i16> [#uses=1]
+	%tmp10 = extractelement <8 x i16> %tmp3, i32 7		; <i16> [#uses=1]
+	%tmp11 = insertelement <8 x i16> undef, i16 %tmp.upgrd.29, i32 0		; <<8 x i16>> [#uses=1]
+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 1		; <<8 x i16>> [#uses=1]
+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 2		; <<8 x i16>> [#uses=1]
+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 3		; <<8 x i16>> [#uses=1]
+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 4		; <<8 x i16>> [#uses=1]
+	%tmp16 = insertelement <8 x i16> %tmp15, i16 %tmp8, i32 5		; <<8 x i16>> [#uses=1]
+	%tmp17 = insertelement <8 x i16> %tmp16, i16 %tmp9, i32 6		; <<8 x i16>> [#uses=1]
+	%tmp18 = insertelement <8 x i16> %tmp17, i16 %tmp10, i32 7		; <<8 x i16>> [#uses=1]
+	%tmp18.upgrd.30 = bitcast <8 x i16> %tmp18 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp18.upgrd.30, <4 x i32>* %A
+	ret void
+}
diff --git a/final/test/CodeGen/PowerPC/vec_splat.ll b/final/test/CodeGen/PowerPC/vec_splat.ll
new file mode 100644
index 00000000000..61237284d36
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vec_splat.ll
@@ -0,0 +1,71 @@
+; Test that vectors are scalarized/lowered correctly.
+; RUN: llc < %s -march=ppc32 -mcpu=g3 | \
+; RUN:    grep stfs | count 4
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -o %t
+; RUN: grep vspltw %t | count 2
+; RUN: grep vsplti %t | count 3
+; RUN: grep vsplth %t | count 1
+
+        %f4 = type <4 x float>
+        %i4 = type <4 x i32>
+
+define void @splat(%f4* %P, %f4* %Q, float %X) nounwind {
+        %tmp = insertelement %f4 undef, float %X, i32 0         ; <%f4> [#uses=1]
+        %tmp2 = insertelement %f4 %tmp, float %X, i32 1         ; <%f4> [#uses=1]
+        %tmp4 = insertelement %f4 %tmp2, float %X, i32 2                ; <%f4> [#uses=1]
+        %tmp6 = insertelement %f4 %tmp4, float %X, i32 3                ; <%f4> [#uses=1]
+        %q = load %f4* %Q               ; <%f4> [#uses=1]
+        %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %P
+        ret void
+}
+
+define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) nounwind {
+        %tmp = insertelement %i4 undef, i32 %X, i32 0           ; <%i4> [#uses=1]
+        %tmp2 = insertelement %i4 %tmp, i32 %X, i32 1           ; <%i4> [#uses=1]
+        %tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2          ; <%i4> [#uses=1]
+        %tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3          ; <%i4> [#uses=1]
+        %q = load %i4* %Q               ; <%i4> [#uses=1]
+        %R = add %i4 %q, %tmp6          ; <%i4> [#uses=1]
+        store %i4 %R, %i4* %P
+        ret void
+}
+
+define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind {
+        %q = load %i4* %Q               ; <%i4> [#uses=1]
+        %R = add %i4 %q, < i32 -1, i32 -1, i32 -1, i32 -1 >             ; <%i4> [#uses=1]
+        store %i4 %R, %i4* %P
+        ret void
+}
+
+define void @splat_imm_i16(%i4* %P, %i4* %Q, i32 %X) nounwind {
+        %q = load %i4* %Q               ; <%i4> [#uses=1]
+        %R = add %i4 %q, < i32 65537, i32 65537, i32 65537, i32 65537 >         ; <%i4> [#uses=1]
+        store %i4 %R, %i4* %P
+        ret void
+}
+
+define void @splat_h(i16 %tmp, <16 x i8>* %dst) nounwind {
+        %tmp.upgrd.1 = insertelement <8 x i16> undef, i16 %tmp, i32 0           
+        %tmp72 = insertelement <8 x i16> %tmp.upgrd.1, i16 %tmp, i32 1 
+        %tmp73 = insertelement <8 x i16> %tmp72, i16 %tmp, i32 2 
+        %tmp74 = insertelement <8 x i16> %tmp73, i16 %tmp, i32 3
+        %tmp75 = insertelement <8 x i16> %tmp74, i16 %tmp, i32 4 
+        %tmp76 = insertelement <8 x i16> %tmp75, i16 %tmp, i32 5
+        %tmp77 = insertelement <8 x i16> %tmp76, i16 %tmp, i32 6 
+        %tmp78 = insertelement <8 x i16> %tmp77, i16 %tmp, i32 7 
+        %tmp78.upgrd.2 = bitcast <8 x i16> %tmp78 to <16 x i8>  
+        store <16 x i8> %tmp78.upgrd.2, <16 x i8>* %dst
+        ret void
+}
+
+define void @spltish(<16 x i8>* %A, <16 x i8>* %B) nounwind {
+        %tmp = load <16 x i8>* %B               ; <<16 x i8>> [#uses=1]
+        %tmp.s = bitcast <16 x i8> %tmp to <16 x i8>            ; <<16 x i8>> [#uses=1]
+        %tmp4 = sub <16 x i8> %tmp.s, bitcast (<8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16
+ 15, i16 15, i16 15 > to <16 x i8>)             ; <<16 x i8>> [#uses=1]
+        %tmp4.u = bitcast <16 x i8> %tmp4 to <16 x i8>          ; <<16 x i8>> [#uses=1]
+        store <16 x i8> %tmp4.u, <16 x i8>* %A
+        ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/vec_splat_constant.ll b/final/test/CodeGen/PowerPC/vec_splat_constant.ll
new file mode 100644
index 00000000000..b227794421f
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vec_splat_constant.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 | FileCheck %s
+; Formerly incorrectly inserted vsldoi (endian confusion)
+
+@baz = common global <16 x i8> zeroinitializer    ; <<16 x i8>*> [#uses=1]
+
+define void @foo(<16 x i8> %x) nounwind ssp {
+entry:
+; CHECK: _foo:
+; CHECK-NOT: vsldoi
+  %x_addr = alloca <16 x i8>                      ; <<16 x i8>*> [#uses=2]
+  %temp = alloca <16 x i8>                        ; <<16 x i8>*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store <16 x i8> %x, <16 x i8>* %x_addr
+  store <16 x i8> <i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14, i8 0, i8 0, i8 0, i8 14>, <16 x i8>* %temp, align 16
+  %0 = load <16 x i8>* %x_addr, align 16          ; <<16 x i8>> [#uses=1]
+  %1 = load <16 x i8>* %temp, align 16            ; <<16 x i8>> [#uses=1]
+  %tmp = add <16 x i8> %0, %1                     ; <<16 x i8>> [#uses=1]
+  store <16 x i8> %tmp, <16 x i8>* @baz, align 16
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: blr
+}
diff --git a/final/test/CodeGen/PowerPC/vec_vrsave.ll b/final/test/CodeGen/PowerPC/vec_vrsave.ll
new file mode 100644
index 00000000000..2a03d5819b8
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vec_vrsave.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 -o %t
+; RUN: grep vrlw %t
+; RUN: not grep spr %t
+; RUN: not grep vrsave %t
+
+define <4 x i32> @test_rol() {
+        ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 >
+}
+
+define <4 x i32> @test_arg(<4 x i32> %A, <4 x i32> %B) {
+        %C = add <4 x i32> %A, %B               ; <<4 x i32>> [#uses=1]
+        ret <4 x i32> %C
+}
+
diff --git a/final/test/CodeGen/PowerPC/vec_zero.ll b/final/test/CodeGen/PowerPC/vec_zero.ll
new file mode 100644
index 00000000000..f862b2cb4c4
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vec_zero.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vxor
+
+define void @foo(<4 x float>* %P) {
+        %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
+        %S = fadd <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1]
+        store <4 x float> %S, <4 x float>* %P
+        ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/vector-identity-shuffle.ll b/final/test/CodeGen/PowerPC/vector-identity-shuffle.ll
new file mode 100644
index 00000000000..dfa2e35435a
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vector-identity-shuffle.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep test:
+; RUN: llc < %s -march=ppc32 -mcpu=g5 | not grep vperm
+
+define void @test(<4 x float>* %tmp2.i) {
+        %tmp2.i.upgrd.1 = load <4 x float>* %tmp2.i             ; <<4 x float>> [#uses=4]
+        %xFloat0.48 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 0      ; <float> [#uses=1]
+        %inFloat0.49 = insertelement <4 x float> undef, float %xFloat0.48, i32 0              ; <<4 x float>> [#uses=1]
+        %xFloat1.50 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 1      ; <float> [#uses=1]
+        %inFloat1.52 = insertelement <4 x float> %inFloat0.49, float %xFloat1.50, i32 1               ; <<4 x float>> [#uses=1]
+        %xFloat2.53 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 2      ; <float> [#uses=1]
+        %inFloat2.55 = insertelement <4 x float> %inFloat1.52, float %xFloat2.53, i32 2               ; <<4 x float>> [#uses=1]
+        %xFloat3.56 = extractelement <4 x float> %tmp2.i.upgrd.1, i32 3      ; <float> [#uses=1]
+        %inFloat3.58 = insertelement <4 x float> %inFloat2.55, float %xFloat3.56, i32 3               ; <<4 x float>> [#uses=1]
+        store <4 x float> %inFloat3.58, <4 x float>* %tmp2.i
+        ret void
+}
+
diff --git a/final/test/CodeGen/PowerPC/vector.ll b/final/test/CodeGen/PowerPC/vector.ll
new file mode 100644
index 00000000000..ee4da315f92
--- /dev/null
+++ b/final/test/CodeGen/PowerPC/vector.ll
@@ -0,0 +1,158 @@
+; Test that vectors are scalarized/lowered correctly.
+; RUN: llc < %s -march=ppc32 -mcpu=g5 > %t
+; RUN: llc < %s -march=ppc32 -mcpu=g3 > %t
+
+%d8 = type <8 x double>
+%f1 = type <1 x float>
+%f2 = type <2 x float>
+%f4 = type <4 x float>
+%f8 = type <8 x float>
+%i4 = type <4 x i32>
+
+;;; TEST HANDLING OF VARIOUS VECTOR SIZES
+
+define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
+        %p = load %f1* %P               ; <%f1> [#uses=1]
+        %q = load %f1* %Q               ; <%f1> [#uses=1]
+        %R = fadd %f1 %p, %q             ; <%f1> [#uses=1]
+        store %f1 %R, %f1* %S
+        ret void
+}
+
+define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
+        %p = load %f2* %P               ; <%f2> [#uses=1]
+        %q = load %f2* %Q               ; <%f2> [#uses=1]
+        %R = fadd %f2 %p, %q             ; <%f2> [#uses=1]
+        store %f2 %R, %f2* %S
+        ret void
+}
+
+define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
+        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %q = load %f4* %Q               ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, %q             ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
+        %p = load %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %R = fadd %f8 %p, %q             ; <%f8> [#uses=1]
+        store %f8 %R, %f8* %S
+        ret void
+}
+
+define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
+        %p = load %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %R = fmul %f8 %p, %q             ; <%f8> [#uses=1]
+        store %f8 %R, %f8* %S
+        ret void
+}
+
+define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
+        %p = load %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %R = fdiv %f8 %p, %q            ; <%f8> [#uses=1]
+        store %f8 %R, %f8* %S
+        ret void
+}
+
+;;; TEST VECTOR CONSTRUCTS
+
+define void @test_cst(%f4* %P, %f4* %S) {
+        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float
+ 2.000000e+00, float 4.500000e+00 >             ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_zero(%f4* %P, %f4* %S) {
+        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, zeroinitializer                ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_undef(%f4* %P, %f4* %S) {
+        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, undef          ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_constant_insert(%f4* %S) {
+        %R = insertelement %f4 zeroinitializer, float 1.000000e+01, i32 0     
+                ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_variable_buildvector(float %F, %f4* %S) {
+        %R = insertelement %f4 zeroinitializer, float %F, i32 0        
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_scalar_to_vector(float %F, %f4* %S) {
+        %R = insertelement %f4 undef, float %F, i32 0           
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define float @test_extract_elt(%f8* %P) {
+        %p = load %f8* %P               ; <%f8> [#uses=1]
+        %R = extractelement %f8 %p, i32 3               ; <float> [#uses=1]
+        ret float %R
+}
+
+define double @test_extract_elt2(%d8* %P) {
+        %p = load %d8* %P               ; <%d8> [#uses=1]
+        %R = extractelement %d8 %p, i32 3               ; <double> [#uses=1]
+        ret double %R
+}
+
+define void @test_cast_1(%f4* %b, %i4* %a) {
+        %tmp = load %f4* %b             ; <%f4> [#uses=1]
+        %tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float
+3.000000e+00, float 4.000000e+00 >              ; <%f4> [#uses=1]
+        %tmp3 = bitcast %f4 %tmp2 to %i4                ; <%i4> [#uses=1]
+        %tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 >           
+        store %i4 %tmp4, %i4* %a
+        ret void
+}
+
+define void @test_cast_2(%f8* %a, <8 x i32>* %b) {
+        %T = load %f8* %a               ; <%f8> [#uses=1]
+        %T2 = bitcast %f8 %T to <8 x i32>               
+        store <8 x i32> %T2, <8 x i32>* %b
+        ret void
+}
+
+
+;;; TEST IMPORTANT IDIOMS
+
+define void @splat(%f4* %P, %f4* %Q, float %X) {
+        %tmp = insertelement %f4 undef, float %X, i32 0        
+        %tmp2 = insertelement %f4 %tmp, float %X, i32 1       
+        %tmp4 = insertelement %f4 %tmp2, float %X, i32 2    
+        %tmp6 = insertelement %f4 %tmp4, float %X, i32 3   
+        %q = load %f4* %Q               ; <%f4> [#uses=1]
+        %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %P
+        ret void
+}
+
+define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {
+        %tmp = insertelement %i4 undef, i32 %X, i32 0          
+        %tmp2 = insertelement %i4 %tmp, i32 %X, i32 1         
+        %tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2       
+        %tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3     
+        %q = load %i4* %Q               ; <%i4> [#uses=1]
+        %R = add %i4 %q, %tmp6          ; <%i4> [#uses=1]
+        store %i4 %R, %i4* %P
+        ret void
+}
+
diff --git a/final/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll b/final/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
new file mode 100644
index 00000000000..082f9f40f28
--- /dev/null
+++ b/final/test/CodeGen/SPARC/2006-01-22-BitConvertLegalize.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=sparc
+
+define void @execute_list() {
+        %tmp.33.i = fdiv float 0.000000e+00, 0.000000e+00               ; <float> [#uses=1]
+        %tmp.37.i = fmul float 0.000000e+00, %tmp.33.i           ; <float> [#uses=1]
+        %tmp.42.i = fadd float %tmp.37.i, 0.000000e+00           ; <float> [#uses=1]
+        call void @gl_EvalCoord1f( float %tmp.42.i )
+        ret void
+}
+
+declare void @gl_EvalCoord1f(float)
+
diff --git a/final/test/CodeGen/SPARC/2007-05-09-JumpTables.ll b/final/test/CodeGen/SPARC/2007-05-09-JumpTables.ll
new file mode 100644
index 00000000000..41ad3b27b5a
--- /dev/null
+++ b/final/test/CodeGen/SPARC/2007-05-09-JumpTables.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=sparc
+
+; We cannot emit jump tables on Sparc, but we should correctly handle this case.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+
+define i32 @foo(i32 %f) {
+entry:
+	switch i32 %f, label %bb14 [
+		 i32 0, label %UnifiedReturnBlock
+		 i32 1, label %bb4
+		 i32 2, label %bb7
+		 i32 3, label %bb10
+	]
+
+bb4:		; preds = %entry
+	ret i32 2
+
+bb7:		; preds = %entry
+	ret i32 5
+
+bb10:		; preds = %entry
+	ret i32 9
+
+bb14:		; preds = %entry
+	ret i32 0
+
+UnifiedReturnBlock:		; preds = %entry
+	ret i32 1
+}
diff --git a/final/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll b/final/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll
new file mode 100644
index 00000000000..77c20028824
--- /dev/null
+++ b/final/test/CodeGen/SPARC/2007-07-05-LiveIntervalAssert.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=sparc
+; PR1540
+
+declare float @sinf(float)
+declare double @sin(double)
+define double @test_sin(float %F) {
+        %G = call float @sinf( float %F )               ; <float> [#uses=1]
+        %H = fpext float %G to double           ; <double> [#uses=1]
+        %I = call double @sin( double %H )              ; <double> [#uses=1]
+        ret double %I
+}
diff --git a/final/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll b/final/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
new file mode 100644
index 00000000000..e8315f17ebb
--- /dev/null
+++ b/final/test/CodeGen/SPARC/2008-10-10-InlineAsmMemoryOperand.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=sparc
+; PR 1557
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @set_fast_math } ]		; <[1 x { i32, void ()* }]*> [#uses=0]
+
+define internal void @set_fast_math() nounwind {
+entry:
+	%fsr = alloca i32		; <i32*> [#uses=4]
+	call void asm "st %fsr, $0", "=*m"(i32* %fsr) nounwind
+	%0 = load i32* %fsr, align 4		; <i32> [#uses=1]
+	%1 = or i32 %0, 4194304		; <i32> [#uses=1]
+	store i32 %1, i32* %fsr, align 4
+	call void asm sideeffect "ld $0, %fsr", "*m"(i32* %fsr) nounwind
+	ret void
+}
diff --git a/final/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll b/final/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
new file mode 100644
index 00000000000..c12e9c13409
--- /dev/null
+++ b/final/test/CodeGen/SPARC/2008-10-10-InlineAsmRegOperand.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=sparc
+; PR 1557
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f128:128:128"
+module asm "\09.section\09\22.ctors\22,#alloc,#write"
+module asm "\09.section\09\22.dtors\22,#alloc,#write"
+
+define void @frame_dummy() nounwind {
+entry:
+	%asmtmp = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0"(void (i8*)* @_Jv_RegisterClasses) nounwind		; <void (i8*)*> [#uses=0]
+	unreachable
+}
+
+declare void @_Jv_RegisterClasses(i8*)
diff --git a/final/test/CodeGen/SPARC/2009-08-28-PIC.ll b/final/test/CodeGen/SPARC/2009-08-28-PIC.ll
new file mode 100644
index 00000000000..a2ba0d02d45
--- /dev/null
+++ b/final/test/CodeGen/SPARC/2009-08-28-PIC.ll
@@ -0,0 +1,9 @@
+; RUN: llc -march=sparc --relocation-model=pic < %s | grep _GLOBAL_OFFSET_TABLE_
+
+@foo = global i32 0                               ; <i32*> [#uses=1]
+
+define i32 @func() nounwind readonly {
+entry:
+  %0 = load i32* @foo, align 4                    ; <i32> [#uses=1]
+  ret i32 %0
+}
diff --git a/final/test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll b/final/test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll
new file mode 100644
index 00000000000..0167d3237aa
--- /dev/null
+++ b/final/test/CodeGen/SPARC/2009-08-28-WeakLinkage.ll
@@ -0,0 +1,6 @@
+; RUN: llc -march=sparc < %s | grep weak
+
+define weak i32 @func() nounwind {
+entry:
+  ret i32 0
+}
diff --git a/final/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll b/final/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 00000000000..3b644986f2e
--- /dev/null
+++ b/final/test/CodeGen/SPARC/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=sparc -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/final/test/CodeGen/SPARC/2011-01-11-CC.ll b/final/test/CodeGen/SPARC/2011-01-11-CC.ll
new file mode 100755
index 00000000000..3ceda958de6
--- /dev/null
+++ b/final/test/CodeGen/SPARC/2011-01-11-CC.ll
@@ -0,0 +1,105 @@
+; RUN: llc -march=sparc <%s | FileCheck %s -check-prefix=V8
+; RUN: llc -march=sparc -mattr=v9 <%s | FileCheck %s -check-prefix=V9
+
+
+define i32 @test_addx(i64 %a, i64 %b, i64 %c) nounwind readnone noinline {
+entry:
+; V8: addcc
+; V8-NOT: subcc
+; V8: addx
+; V9: addcc
+; V9-NOT: subcc
+; V9: addx
+; V9: mov{{e|ne}} %icc
+  %0 = add i64 %a, %b
+  %1 = icmp ugt i64 %0, %c
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+
+define i32 @test_select_int_icc(i32 %a, i32 %b, i32 %c) nounwind readnone noinline {
+entry:
+; V8: test_select_int_icc
+; V8: subcc
+; V8: {{be|bne}}
+; V9: test_select_int_icc
+; V9: subcc
+; V9-NOT: {{be|bne}}
+; V9: mov{{e|ne}} %icc
+  %0 = icmp eq i32 %a, 0
+  %1 = select i1 %0, i32 %b, i32 %c
+  ret i32 %1
+}
+
+
+define float @test_select_fp_icc(i32 %a, float %f1, float %f2) nounwind readnone noinline {
+entry:
+; V8: test_select_fp_icc
+; V8: subcc
+; V8: {{be|bne}}
+; V9: test_select_fp_icc
+; V9: subcc
+; V9-NOT: {{be|bne}}
+; V9: fmovs{{e|ne}} %icc
+  %0 = icmp eq i32 %a, 0
+  %1 = select i1 %0, float %f1, float %f2
+  ret float %1
+}
+
+define double @test_select_dfp_icc(i32 %a, double %f1, double %f2) nounwind readnone noinline {
+entry:
+; V8: test_select_dfp_icc
+; V8: subcc
+; V8: {{be|bne}}
+; V9: test_select_dfp_icc
+; V9: subcc
+; V9=NOT: {{be|bne}}
+; V9: fmovd{{e|ne}} %icc
+  %0 = icmp eq i32 %a, 0
+  %1 = select i1 %0, double %f1, double %f2
+  ret double %1
+}
+
+define i32 @test_select_int_fcc(float %f, i32 %a, i32 %b) nounwind readnone noinline {
+entry:
+;V8: test_select_int_fcc
+;V8: fcmps
+;V8: {{fbe|fbne}}
+;V9: test_select_int_fcc
+;V9: fcmps
+;V9-NOT: {{fbe|fbne}}
+;V9: mov{{e|ne}} %fcc0
+  %0 = fcmp une float %f, 0.000000e+00
+  %a.b = select i1 %0, i32 %a, i32 %b
+  ret i32 %a.b
+}
+
+
+define float @test_select_fp_fcc(float %f, float %f1, float %f2) nounwind readnone noinline {
+entry:
+;V8: test_select_fp_fcc
+;V8: fcmps
+;V8: {{fbe|fbne}}
+;V9: test_select_fp_fcc
+;V9: fcmps
+;V9-NOT: {{fbe|fbne}}
+;V9: fmovs{{e|ne}} %fcc0
+  %0 = fcmp une float %f, 0.000000e+00
+  %1 = select i1 %0, float %f1, float %f2
+  ret float %1
+}
+
+define double @test_select_dfp_fcc(double %f, double %f1, double %f2) nounwind readnone noinline {
+entry:
+;V8: test_select_dfp_fcc
+;V8: fcmpd
+;V8: {{fbne|fbe}}
+;V9: test_select_dfp_fcc
+;V9: fcmpd
+;V9-NOT: {{fbne|fbe}}
+;V9: fmovd{{e|ne}} %fcc0
+  %0 = fcmp une double %f, 0.000000e+00
+  %1 = select i1 %0, double %f1, double %f2
+  ret double %1
+}
diff --git a/final/test/CodeGen/SPARC/2011-01-11-Call.ll b/final/test/CodeGen/SPARC/2011-01-11-Call.ll
new file mode 100644
index 00000000000..7350e923242
--- /dev/null
+++ b/final/test/CodeGen/SPARC/2011-01-11-Call.ll
@@ -0,0 +1,13 @@
+; RUN: llc -march=sparc -O0 <%s
+
+define void @test() nounwind {
+entry:
+ %0 = tail call i32 (...)* @foo() nounwind
+ tail call void (...)* @bar() nounwind
+ ret void
+}
+
+declare i32 @foo(...)
+
+declare void @bar(...)
+
diff --git a/final/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll b/final/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
new file mode 100644
index 00000000000..fbf72427056
--- /dev/null
+++ b/final/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll
@@ -0,0 +1,64 @@
+;RUN: llc -march=sparc < %s | FileCheck %s -check-prefix=V8
+;RUN: llc -march=sparc -mattr=v9 < %s | FileCheck %s -check-prefix=V9
+
+define i8* @frameaddr() nounwind readnone {
+entry:
+;V8: frameaddr
+;V8: or %g0, %fp, {{.+}}
+
+;V9: frameaddr
+;V9: or %g0, %fp, {{.+}}
+  %0 = tail call i8* @llvm.frameaddress(i32 0)
+  ret i8* %0
+}
+
+define i8* @frameaddr2() nounwind readnone {
+entry:
+;V8: frameaddr2
+;V8: ta 3
+;V8: ld [%fp+56], {{.+}}
+;V8: ld [{{.+}}+56], {{.+}}
+;V8: ld [{{.+}}+56], {{.+}}
+
+;V9: frameaddr2
+;V9: flushw
+;V9: ld [%fp+56], {{.+}}
+;V9: ld [{{.+}}+56], {{.+}}
+;V9: ld [{{.+}}+56], {{.+}}
+  %0 = tail call i8* @llvm.frameaddress(i32 3)
+  ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+
+
+define i8* @retaddr() nounwind readnone {
+entry:
+;V8: retaddr
+;V8: or %g0, %i7, {{.+}}
+
+;V9: retaddr
+;V9: or %g0, %i7, {{.+}}
+  %0 = tail call i8* @llvm.returnaddress(i32 0)
+  ret i8* %0
+}
+
+define i8* @retaddr2() nounwind readnone {
+entry:
+;V8: retaddr2
+;V8: ta 3
+;V8: ld [%fp+56], {{.+}}
+;V8: ld [{{.+}}+56], {{.+}}
+;V8: ld [{{.+}}+60], {{.+}}
+
+;V9: retaddr2
+;V9: flushw
+;V9: ld [%fp+56], {{.+}}
+;V9: ld [{{.+}}+56], {{.+}}
+;V9: ld [{{.+}}+60], {{.+}}
+  %0 = tail call i8* @llvm.returnaddress(i32 3)
+  ret i8* %0
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
diff --git a/final/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/final/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
new file mode 100644
index 00000000000..71fdb4e0d60
--- /dev/null
+++ b/final/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
@@ -0,0 +1,90 @@
+;RUN: llc -march=sparc < %s | FileCheck %s
+;RUN: llc -march=sparc -O0 < %s | FileCheck %s -check-prefix=UNOPT
+
+
+define i32 @test(i32 %a) nounwind {
+entry:
+; CHECK: test
+; CHECK: call bar
+; CHECK-NOT: nop
+; CHECK: jmp
+; CHECK-NEXT: restore
+  %0 = tail call i32 @bar(i32 %a) nounwind
+  ret i32 %0
+}
+
+define i32 @test_jmpl(i32 (i32, i32)* nocapture %f, i32 %a, i32 %b) nounwind {
+entry:
+; CHECK:      test_jmpl
+; CHECK:      call
+; CHECK-NOT:  nop
+; CHECK:      jmp
+; CHECK-NEXT: restore
+  %0 = tail call i32 %f(i32 %a, i32 %b) nounwind
+  ret i32 %0
+}
+
+define i32 @test_loop(i32 %a, i32 %b) nounwind readnone {
+; CHECK: test_loop
+entry:
+  %0 = icmp sgt i32 %b, 0
+  br i1 %0, label %bb, label %bb5
+
+bb:                                               ; preds = %entry, %bb
+  %a_addr.18 = phi i32 [ %a_addr.0, %bb ], [ %a, %entry ]
+  %1 = phi i32 [ %3, %bb ], [ 0, %entry ]
+  %tmp9 = mul i32 %1, %b
+  %2 = and i32 %1, 1
+  %tmp = xor i32 %2, 1
+  %.pn = shl i32 %tmp9, %tmp
+  %a_addr.0 = add i32 %.pn, %a_addr.18
+  %3 = add nsw i32 %1, 1
+  %exitcond = icmp eq i32 %3, %b
+;CHECK:      subcc
+;CHECK:      bne
+;CHECK-NOT:  nop
+  br i1 %exitcond, label %bb5, label %bb
+
+bb5:                                              ; preds = %bb, %entry
+  %a_addr.1.lcssa = phi i32 [ %a, %entry ], [ %a_addr.0, %bb ]
+;CHECK:      jmp
+;CHECK-NEXT: restore
+  ret i32 %a_addr.1.lcssa
+}
+
+define i32 @test_inlineasm(i32 %a) nounwind {
+entry:
+;CHECK:      test_inlineasm
+;CHECK:      sethi
+;CHECK:      !NO_APP
+;CHECK-NEXT: subcc
+;CHECK-NEXT: bg
+;CHECK-NEXT: nop
+  tail call void asm sideeffect "sethi 0, %g0", ""() nounwind
+  %0 = icmp slt i32 %a, 0
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %1 = tail call i32 (...)* @foo(i32 %a) nounwind
+  ret i32 %1
+
+bb1:                                              ; preds = %entry
+  %2 = tail call i32 @bar(i32 %a) nounwind
+  ret i32 %2
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(i32)
+
+
+define i32 @test_implicit_def() nounwind {
+entry:
+;UNOPT:       test_implicit_def
+;UNOPT:       call func
+;UNOPT-NEXT:  nop
+  %0 = tail call i32 @func(i32* undef) nounwind
+  ret i32 0
+}
+
+declare i32 @func(i32*)
diff --git a/final/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll b/final/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll
new file mode 100644
index 00000000000..85c16e4684e
--- /dev/null
+++ b/final/test/CodeGen/SPARC/2011-01-21-ByValArgs.ll
@@ -0,0 +1,18 @@
+;RUN: llc -march=sparc < %s | FileCheck %s
+
+%struct.foo_t = type { i32, i32, i32 }
+
+@s = internal unnamed_addr global %struct.foo_t { i32 10, i32 20, i32 30 }
+
+define i32 @test() nounwind {
+entry:
+;CHECK:     test
+;CHECK:     st
+;CHECK:     st
+;CHECK:     st
+;CHECK:     bar
+  %0 = tail call i32 @bar(%struct.foo_t* byval @s) nounwind
+  ret i32 %0
+}
+
+declare i32 @bar(%struct.foo_t* byval)
diff --git a/final/test/CodeGen/SPARC/2011-01-22-SRet.ll b/final/test/CodeGen/SPARC/2011-01-22-SRet.ll
new file mode 100644
index 00000000000..506d3a8f87a
--- /dev/null
+++ b/final/test/CodeGen/SPARC/2011-01-22-SRet.ll
@@ -0,0 +1,37 @@
+;RUN: llc -march=sparc < %s | FileCheck %s
+
+%struct.foo_t = type { i32, i32, i32 }
+
+define weak void @make_foo(%struct.foo_t* noalias sret %agg.result, i32 %a, i32 %b, i32 %c) nounwind {
+entry:
+;CHECK: make_foo
+;CHECK: ld [%fp+64], {{.+}}
+;CHECK: or {{.+}}, {{.+}}, %i0
+;CHECK: jmp %i7+12
+  %0 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 0
+  store i32 %a, i32* %0, align 4
+  %1 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 1
+  store i32 %b, i32* %1, align 4
+  %2 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 2
+  store i32 %c, i32* %2, align 4
+  ret void
+}
+
+define i32 @test() nounwind {
+entry:
+;CHECK: test
+;CHECK: st {{.+}}, [%sp+64]
+;CHECK: make_foo
+;CHECK: unimp 12
+  %f = alloca %struct.foo_t, align 8
+  call void @make_foo(%struct.foo_t* noalias sret %f, i32 10, i32 20, i32 30) nounwind
+  %0 = getelementptr inbounds %struct.foo_t* %f, i32 0, i32 0
+  %1 = load i32* %0, align 8
+  %2 = getelementptr inbounds %struct.foo_t* %f, i32 0, i32 1
+  %3 = load i32* %2, align 4
+  %4 = getelementptr inbounds %struct.foo_t* %f, i32 0, i32 2
+  %5 = load i32* %4, align 8
+  %6 = add nsw i32 %3, %1
+  %7 = add nsw i32 %6, %5
+  ret i32 %7
+}
diff --git a/final/test/CodeGen/SPARC/basictest.ll b/final/test/CodeGen/SPARC/basictest.ll
new file mode 100644
index 00000000000..4352e624630
--- /dev/null
+++ b/final/test/CodeGen/SPARC/basictest.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=sparc | FileCheck %s
+
+define i32 @test0(i32 %X) {
+	%tmp.1 = add i32 %X, 1
+	ret i32 %tmp.1
+; CHECK: test0:
+; CHECK: add %i0, 1, %i0
+}
+
+
+;; xnor tests.
+define i32 @test1(i32 %X, i32 %Y) {
+        %A = xor i32 %X, %Y
+        %B = xor i32 %A, -1
+        ret i32 %B
+; CHECK: test1:
+; CHECK: xnor %i0, %i1, %i0
+}
+
+define i32 @test2(i32 %X, i32 %Y) {
+        %A = xor i32 %X, -1
+        %B = xor i32 %A, %Y
+        ret i32 %B
+; CHECK: test2:
+; CHECK: xnor %i0, %i1, %i0
+}
diff --git a/final/test/CodeGen/SPARC/ctpop.ll b/final/test/CodeGen/SPARC/ctpop.ll
new file mode 100644
index 00000000000..e56f4947b52
--- /dev/null
+++ b/final/test/CodeGen/SPARC/ctpop.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=sparc -mattr=-v9 | not grep popc
+; RUN: llc < %s -march=sparcv9 -mattr=v9 | grep popc
+
+declare i32 @llvm.ctpop.i32(i32)
+
+define i32 @test(i32 %X) {
+        %Y = call i32 @llvm.ctpop.i32( i32 %X )         ; <i32> [#uses=1]
+        ret i32 %Y
+}
+
diff --git a/final/test/CodeGen/SPARC/dg.exp b/final/test/CodeGen/SPARC/dg.exp
new file mode 100644
index 00000000000..6c0a9975fe4
--- /dev/null
+++ b/final/test/CodeGen/SPARC/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target Sparc] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/SPARC/mult-alt-generic-sparc.ll b/final/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
new file mode 100644
index 00000000000..6013b17d937
--- /dev/null
+++ b/final/test/CodeGen/SPARC/mult-alt-generic-sparc.ll
@@ -0,0 +1,323 @@
+; RUN: llc < %s -march=sparc
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-n32"
+target triple = "sparc"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+; No lowering support.
+;  %4 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+001) nounwind
+;  store i32 %4, i32* %out0, align 4
+;  %5 = call i32 asm "foo $1,$0", "=r,X"(double 1.000000e+000) nounwind
+;  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+; No lowering support.
+;  %4 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+001) nounwind
+;  store i32 %4, i32* %out0, align 4
+;  %5 = call i32 asm "foo $1,$0", "=r|r,r|X"(double 1.000000e+000) nounwind
+;  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|r"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/final/test/CodeGen/SPARC/private.ll b/final/test/CodeGen/SPARC/private.ll
new file mode 100644
index 00000000000..f091aa63d70
--- /dev/null
+++ b/final/test/CodeGen/SPARC/private.ll
@@ -0,0 +1,21 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s  -march=sparc > %t
+; RUN: grep .foo: %t
+; RUN: grep call.*\.foo %t
+; RUN: grep .baz: %t
+; RUN: grep ld.*\.baz %t
+
+declare void @foo()
+
+define private void @foo() {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}
diff --git a/final/test/CodeGen/SystemZ/00-RetVoid.ll b/final/test/CodeGen/SystemZ/00-RetVoid.ll
new file mode 100644
index 00000000000..6f3cbac738f
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/00-RetVoid.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+
+define void @foo() {
+entry:
+    ret void
+}
diff --git a/final/test/CodeGen/SystemZ/01-RetArg.ll b/final/test/CodeGen/SystemZ/01-RetArg.ll
new file mode 100644
index 00000000000..8e1ff49c26f
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/01-RetArg.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    ret i64 %b
+}
diff --git a/final/test/CodeGen/SystemZ/01-RetImm.ll b/final/test/CodeGen/SystemZ/01-RetImm.ll
new file mode 100644
index 00000000000..8b99e68dc7e
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/01-RetImm.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=systemz | grep lghi  | count 1
+; RUN: llc < %s -march=systemz | grep llill | count 1
+; RUN: llc < %s -march=systemz | grep llilh | count 1
+; RUN: llc < %s -march=systemz | grep llihl | count 1
+; RUN: llc < %s -march=systemz | grep llihh | count 1
+; RUN: llc < %s -march=systemz | grep lgfi  | count 1
+; RUN: llc < %s -march=systemz | grep llilf | count 1
+; RUN: llc < %s -march=systemz | grep llihf | count 1
+
+
+define i64 @foo1() {
+entry:
+    ret i64 1
+}
+
+define i64 @foo2() {
+entry:
+    ret i64 65535 
+}
+
+define i64 @foo3() {
+entry:
+    ret i64 131072
+}
+
+define i64 @foo4() {
+entry:
+    ret i64 8589934592
+}
+
+define i64 @foo5() {
+entry:
+    ret i64 562949953421312
+}
+
+define i64 @foo6() {
+entry:
+    ret i64 65537
+}
+
+define i64 @foo7() {
+entry:
+    ret i64 4294967295
+}
+
+define i64 @foo8() {
+entry:
+    ret i64 281483566645248
+}
diff --git a/final/test/CodeGen/SystemZ/02-MemArith.ll b/final/test/CodeGen/SystemZ/02-MemArith.ll
new file mode 100644
index 00000000000..04022a063f1
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/02-MemArith.ll
@@ -0,0 +1,133 @@
+; RUN: llc < %s -march=systemz | FileCheck %s
+
+define i32 @foo1(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo1:
+; CHECK:  a %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = add i32 %a, %c
+    ret i32 %d
+}
+
+define i32 @foo2(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo2:
+; CHECK:  ay %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = add i32 %a, %c
+    ret i32 %d
+}
+
+define i64 @foo3(i64 %a, i64 *%b, i64 %idx) signext {
+; CHECK: foo3:
+; CHECK:  ag %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = add i64 %a, %c
+    ret i64 %d
+}
+
+define i32 @foo4(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo4:
+; CHECK:  n %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = and i32 %a, %c
+    ret i32 %d
+}
+
+define i32 @foo5(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo5:
+; CHECK:  ny %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = and i32 %a, %c
+    ret i32 %d
+}
+
+define i64 @foo6(i64 %a, i64 *%b, i64 %idx) signext {
+; CHECK: foo6:
+; CHECK:  ng %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = and i64 %a, %c
+    ret i64 %d
+}
+
+define i32 @foo7(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo7:
+; CHECK:  o %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = or i32 %a, %c
+    ret i32 %d
+}
+
+define i32 @foo8(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo8:
+; CHECK:  oy %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = or i32 %a, %c
+    ret i32 %d
+}
+
+define i64 @foo9(i64 %a, i64 *%b, i64 %idx) signext {
+; CHECK: foo9:
+; CHECK:  og %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = or i64 %a, %c
+    ret i64 %d
+}
+
+define i32 @foo10(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo10:
+; CHECK:  x %r2, 4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = xor i32 %a, %c
+    ret i32 %d
+}
+
+define i32 @foo11(i32 %a, i32 *%b, i64 %idx) signext {
+; CHECK: foo11:
+; CHECK:  xy %r2, -4(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, -1         ; <i64> [#uses=1]
+    %ptr = getelementptr i32* %b, i64 %idx2          ; <i32*> [#uses=1]
+    %c = load i32* %ptr
+    %d = xor i32 %a, %c
+    ret i32 %d
+}
+
+define i64 @foo12(i64 %a, i64 *%b, i64 %idx) signext {
+; CHECK: foo12:
+; CHECK:  xg %r2, 8(%r1,%r3)
+entry:
+    %idx2 = add i64 %idx, 1         ; <i64> [#uses=1]
+    %ptr = getelementptr i64* %b, i64 %idx2          ; <i64*> [#uses=1]
+    %c = load i64* %ptr
+    %d = xor i64 %a, %c
+    ret i64 %d
+}
diff --git a/final/test/CodeGen/SystemZ/02-RetAdd.ll b/final/test/CodeGen/SystemZ/02-RetAdd.ll
new file mode 100644
index 00000000000..d5dfa220ad2
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/02-RetAdd.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = add i64 %a, %b
+    ret i64 %c
+}
diff --git a/final/test/CodeGen/SystemZ/02-RetAddImm.ll b/final/test/CodeGen/SystemZ/02-RetAddImm.ll
new file mode 100644
index 00000000000..40f6cce936b
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/02-RetAddImm.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = add i64 %a, 1
+    ret i64 %c
+}
diff --git a/final/test/CodeGen/SystemZ/02-RetAnd.ll b/final/test/CodeGen/SystemZ/02-RetAnd.ll
new file mode 100644
index 00000000000..b568a57f8be
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/02-RetAnd.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, %b
+    ret i64 %c
+}
diff --git a/final/test/CodeGen/SystemZ/02-RetAndImm.ll b/final/test/CodeGen/SystemZ/02-RetAndImm.ll
new file mode 100644
index 00000000000..53c5e54528b
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/02-RetAndImm.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=systemz | grep ngr   | count 4
+; RUN: llc < %s -march=systemz | grep llilh | count 1
+; RUN: llc < %s -march=systemz | grep llihl | count 1
+; RUN: llc < %s -march=systemz | grep llihh | count 1
+
+define i64 @foo1(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 1
+    ret i64 %c
+}
+
+define i64 @foo2(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 131072
+    ret i64 %c
+}
+
+define i64 @foo3(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 8589934592
+    ret i64 %c
+}
+
+define i64 @foo4(i64 %a, i64 %b) {
+entry:
+    %c = and i64 %a, 562949953421312
+    ret i64 %c
+}
diff --git a/final/test/CodeGen/SystemZ/02-RetNeg.ll b/final/test/CodeGen/SystemZ/02-RetNeg.ll
new file mode 100644
index 00000000000..3f6ba2f27fd
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/02-RetNeg.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz | grep lcgr | count 1
+
+define i64 @foo(i64 %a) {
+entry:
+    %c = sub i64 0, %a
+    ret i64 %c
+}
diff --git a/final/test/CodeGen/SystemZ/02-RetOr.ll b/final/test/CodeGen/SystemZ/02-RetOr.ll
new file mode 100644
index 00000000000..a1ddb63d04a
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/02-RetOr.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, %b
+    ret i64 %c
+}
diff --git a/final/test/CodeGen/SystemZ/02-RetOrImm.ll b/final/test/CodeGen/SystemZ/02-RetOrImm.ll
new file mode 100644
index 00000000000..68cd24d07f4
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/02-RetOrImm.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=systemz | grep oill | count 1
+; RUN: llc < %s -march=systemz | grep oilh | count 1
+; RUN: llc < %s -march=systemz | grep oihl | count 1
+; RUN: llc < %s -march=systemz | grep oihh | count 1
+
+define i64 @foo1(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 1
+    ret i64 %c
+}
+
+define i64 @foo2(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 131072
+    ret i64 %c
+}
+
+define i64 @foo3(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 8589934592
+    ret i64 %c
+}
+
+define i64 @foo4(i64 %a, i64 %b) {
+entry:
+    %c = or i64 %a, 562949953421312
+    ret i64 %c
+}
diff --git a/final/test/CodeGen/SystemZ/02-RetSub.ll b/final/test/CodeGen/SystemZ/02-RetSub.ll
new file mode 100644
index 00000000000..98e1861365f
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/02-RetSub.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = sub i64 %a, %b
+    ret i64 %c
+}
diff --git a/final/test/CodeGen/SystemZ/02-RetSubImm.ll b/final/test/CodeGen/SystemZ/02-RetSubImm.ll
new file mode 100644
index 00000000000..8479fbf8656
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/02-RetSubImm.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=systemz
+
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = sub i64 %a, 1
+    ret i64 %c
+}
diff --git a/final/test/CodeGen/SystemZ/02-RetXor.ll b/final/test/CodeGen/SystemZ/02-RetXor.ll
new file mode 100644
index 00000000000..4d1adf2f367
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/02-RetXor.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = xor i64 %a, %b
+    ret i64 %c
+}
diff --git a/final/test/CodeGen/SystemZ/02-RetXorImm.ll b/final/test/CodeGen/SystemZ/02-RetXorImm.ll
new file mode 100644
index 00000000000..473bbf74f5b
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/02-RetXorImm.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=systemz
+define i64 @foo(i64 %a, i64 %b) {
+entry:
+    %c = xor i64 %a, 1
+    ret i64 %c
+}
diff --git a/final/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll b/final/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
new file mode 100644
index 00000000000..0a812715ae5
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=systemz | grep ahi   | count 3
+; RUN: llc < %s -march=systemz | grep afi   | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 4
+; RUN: llc < %s -march=systemz | grep llgfr | count 2
+
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = add i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = add i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = add i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) zeroext {
+entry:
+    %c = add i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) signext {
+entry:
+    %c = add i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = add i32 %a, 131072
+    ret i32 %c
+}
+
diff --git a/final/test/CodeGen/SystemZ/03-RetAddSubreg.ll b/final/test/CodeGen/SystemZ/03-RetAddSubreg.ll
new file mode 100644
index 00000000000..2787083f162
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/03-RetAddSubreg.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=systemz | grep ar    | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 2
+; RUN: llc < %s -march=systemz | grep llgfr | count 1
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = add i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = add i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = add i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/final/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll b/final/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
new file mode 100644
index 00000000000..32673dd014c
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=systemz | grep ngr  | count 6
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = and i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = and i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = and i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) signext {
+entry:
+    %c = and i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) zeroext {
+entry:
+    %c = and i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = and i32 %a, 131072
+    ret i32 %c
+}
+
diff --git a/final/test/CodeGen/SystemZ/03-RetAndSubreg.ll b/final/test/CodeGen/SystemZ/03-RetAndSubreg.ll
new file mode 100644
index 00000000000..ed5e5269525
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/03-RetAndSubreg.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=systemz | grep ngr | count 3
+; RUN: llc < %s -march=systemz | grep nihf | count 1
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = and i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = and i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = and i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/final/test/CodeGen/SystemZ/03-RetArgSubreg.ll b/final/test/CodeGen/SystemZ/03-RetArgSubreg.ll
new file mode 100644
index 00000000000..0c9bb14eef3
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/03-RetArgSubreg.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=systemz | grep lgr   | count 2
+; RUN: llc < %s -march=systemz | grep nihf  | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 1
+
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    ret i32 %b
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    ret i32 %b
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    ret i32 %b
+}
diff --git a/final/test/CodeGen/SystemZ/03-RetImmSubreg.ll b/final/test/CodeGen/SystemZ/03-RetImmSubreg.ll
new file mode 100644
index 00000000000..343e30b7213
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/03-RetImmSubreg.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=systemz | grep lghi  | count 2
+; RUN: llc < %s -march=systemz | grep llill | count 1
+; RUN: llc < %s -march=systemz | grep llilh | count 1
+; RUN: llc < %s -march=systemz | grep lgfi  | count 1
+; RUN: llc < %s -march=systemz | grep llilf | count 2
+
+
+define i32 @foo1() {
+entry:
+    ret i32 1
+}
+
+define i32 @foo2() {
+entry:
+    ret i32 65535 
+}
+
+define i32 @foo3() {
+entry:
+    ret i32 131072
+}
+
+define i32 @foo4() {
+entry:
+    ret i32 65537
+}
+
+define i32 @foo5() {
+entry:
+    ret i32 4294967295
+}
+
+define i32 @foo6() zeroext {
+entry:
+    ret i32 4294967295
+}
+
+define i32 @foo7() signext {
+entry:
+    ret i32 4294967295
+}
+
diff --git a/final/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll b/final/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll
new file mode 100644
index 00000000000..87ebcc1f0a4
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/03-RetNegImmSubreg.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=systemz | grep lcr | count 1
+
+define i32 @foo(i32 %a) {
+entry:
+    %c = sub i32 0, %a
+    ret i32 %c
+}
+
diff --git a/final/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll b/final/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
new file mode 100644
index 00000000000..6d118b5e3d4
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -march=systemz | grep oill  | count 3
+; RUN: llc < %s -march=systemz | grep oilh  | count 3
+; RUN: llc < %s -march=systemz | grep oilf  | count 3
+; RUN: llc < %s -march=systemz | grep llgfr | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 6
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo7(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, 123456
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = or i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo8(i32 %a, i32 %b) zeroext {
+entry:
+    %c = or i32 %a, 123456
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) signext {
+entry:
+    %c = or i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) zeroext {
+entry:
+    %c = or i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = or i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo9(i32 %a, i32 %b) signext {
+entry:
+    %c = or i32 %a, 123456
+    ret i32 %c
+}
+
diff --git a/final/test/CodeGen/SystemZ/03-RetOrSubreg.ll b/final/test/CodeGen/SystemZ/03-RetOrSubreg.ll
new file mode 100644
index 00000000000..4d7661acb71
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/03-RetOrSubreg.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=systemz | grep ogr   | count 3
+; RUN: llc < %s -march=systemz | grep nihf  | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 1
+
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = or i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = or i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = or i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/final/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll b/final/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
new file mode 100644
index 00000000000..11ca796c7b1
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=systemz | grep ahi   | count 3
+; RUN: llc < %s -march=systemz | grep afi   | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 4
+; RUN: llc < %s -march=systemz | grep llgfr | count 2
+
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = sub i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = sub i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = sub i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) signext {
+entry:
+    %c = sub i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) zeroext {
+entry:
+    %c = sub i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = sub i32 %a, 131072
+    ret i32 %c
+}
+
diff --git a/final/test/CodeGen/SystemZ/03-RetSubSubreg.ll b/final/test/CodeGen/SystemZ/03-RetSubSubreg.ll
new file mode 100644
index 00000000000..b3e1ac26e08
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/03-RetSubSubreg.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=systemz | grep sr    | count 3
+; RUN: llc < %s -march=systemz | grep llgfr | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 2
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = sub i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = sub i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = sub i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/final/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll b/final/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
new file mode 100644
index 00000000000..0033126369e
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=systemz | grep xilf  | count 9
+; RUN: llc < %s -march=systemz | grep llgfr | count 3
+; RUN: llc < %s -march=systemz | grep lgfr  | count 6
+
+define i32 @foo1(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo7(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, 123456
+    ret i32 %c
+}
+
+define i32 @foo3(i32 %a, i32 %b) zeroext {
+entry:
+    %c = xor i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo8(i32 %a, i32 %b) zeroext {
+entry:
+    %c = xor i32 %a, 123456
+    ret i32 %c
+}
+
+define i32 @foo4(i32 %a, i32 %b) signext {
+entry:
+    %c = xor i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo5(i32 %a, i32 %b) zeroext {
+entry:
+    %c = xor i32 %a, 1
+    ret i32 %c
+}
+
+define i32 @foo6(i32 %a, i32 %b) signext {
+entry:
+    %c = xor i32 %a, 131072
+    ret i32 %c
+}
+
+define i32 @foo9(i32 %a, i32 %b) signext {
+entry:
+    %c = xor i32 %a, 123456
+    ret i32 %c
+}
+
diff --git a/final/test/CodeGen/SystemZ/03-RetXorSubreg.ll b/final/test/CodeGen/SystemZ/03-RetXorSubreg.ll
new file mode 100644
index 00000000000..a9af23197ef
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/03-RetXorSubreg.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=systemz | grep xgr   | count 3
+; RUN: llc < %s -march=systemz | grep nihf  | count 1
+; RUN: llc < %s -march=systemz | grep lgfr  | count 1
+
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+    %c = xor i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo1(i32 %a, i32 %b) zeroext {
+entry:
+    %c = xor i32 %a, %b
+    ret i32 %c
+}
+
+define i32 @foo2(i32 %a, i32 %b) signext {
+entry:
+    %c = xor i32 %a, %b
+    ret i32 %c
+}
+
diff --git a/final/test/CodeGen/SystemZ/04-RetShifts.ll b/final/test/CodeGen/SystemZ/04-RetShifts.ll
new file mode 100644
index 00000000000..cccdc4737f7
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/04-RetShifts.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -march=systemz | grep sra   | count 6
+; RUN: llc < %s -march=systemz | grep srag  | count 3
+; RUN: llc < %s -march=systemz | grep srl   | count 6
+; RUN: llc < %s -march=systemz | grep srlg  | count 3
+; RUN: llc < %s -march=systemz | grep sll   | count 6
+; RUN: llc < %s -march=systemz | grep sllg  | count 3
+
+define signext i32 @foo1(i32 %a, i32 %idx) nounwind readnone {
+entry:
+	%add = add i32 %idx, 1		; <i32> [#uses=1]
+	%shr = ashr i32 %a, %add		; <i32> [#uses=1]
+	ret i32 %shr
+}
+
+define signext i32 @foo2(i32 %a, i32 %idx) nounwind readnone {
+entry:
+	%add = add i32 %idx, 1		; <i32> [#uses=1]
+	%shr = shl i32 %a, %add		; <i32> [#uses=1]
+	ret i32 %shr
+}
+
+define signext i32 @foo3(i32 %a, i32 %idx) nounwind readnone {
+entry:
+	%add = add i32 %idx, 1		; <i32> [#uses=1]
+	%shr = lshr i32 %a, %add		; <i32> [#uses=1]
+	ret i32 %shr
+}
+
+define signext i64 @foo4(i64 %a, i64 %idx) nounwind readnone {
+entry:
+	%add = add i64 %idx, 1		; <i64> [#uses=1]
+	%shr = ashr i64 %a, %add		; <i64> [#uses=1]
+	ret i64 %shr
+}
+
+define signext i64 @foo5(i64 %a, i64 %idx) nounwind readnone {
+entry:
+	%add = add i64 %idx, 1		; <i64> [#uses=1]
+	%shr = shl i64 %a, %add		; <i64> [#uses=1]
+	ret i64 %shr
+}
+
+define signext i64 @foo6(i64 %a, i64 %idx) nounwind readnone {
+entry:
+	%add = add i64 %idx, 1		; <i64> [#uses=1]
+	%shr = lshr i64 %a, %add		; <i64> [#uses=1]
+	ret i64 %shr
+}
+
+define signext i32 @foo7(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = ashr i32 %a, 1
+        ret i32 %shr
+}
+
+define signext i32 @foo8(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = shl i32 %a, 1
+        ret i32 %shr
+}
+
+define signext i32 @foo9(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = lshr i32 %a, 1
+        ret i32 %shr
+}
+
+define signext i32 @foo10(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = ashr i32 %a, %idx
+        ret i32 %shr
+}
+
+define signext i32 @foo11(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = shl i32 %a, %idx
+        ret i32 %shr
+}
+
+define signext i32 @foo12(i32 %a, i32 %idx) nounwind readnone {
+entry:
+        %shr = lshr i32 %a, %idx
+        ret i32 %shr
+}
+
+define signext i64 @foo13(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = ashr i64 %a, 1
+        ret i64 %shr
+}
+
+define signext i64 @foo14(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = shl i64 %a, 1
+        ret i64 %shr
+}
+
+define signext i64 @foo15(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = lshr i64 %a, 1
+        ret i64 %shr
+}
+
+define signext i64 @foo16(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = ashr i64 %a, %idx
+        ret i64 %shr
+}
+
+define signext i64 @foo17(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = shl i64 %a, %idx
+        ret i64 %shr
+}
+
+define signext i64 @foo18(i64 %a, i64 %idx) nounwind readnone {
+entry:
+        %shr = lshr i64 %a, %idx
+        ret i64 %shr
+}
+
diff --git a/final/test/CodeGen/SystemZ/05-LoadAddr.ll b/final/test/CodeGen/SystemZ/05-LoadAddr.ll
new file mode 100644
index 00000000000..cf026428393
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/05-LoadAddr.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | grep lay | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64* @foo(i64* %a, i64 %idx) nounwind readnone {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	ret i64* %add.ptr2
+}
diff --git a/final/test/CodeGen/SystemZ/05-MemImmStores.ll b/final/test/CodeGen/SystemZ/05-MemImmStores.ll
new file mode 100644
index 00000000000..3cf21ccd931
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/05-MemImmStores.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mattr=+z10 | grep mvghi | count 1
+; RUN: llc < %s -mattr=+z10 | grep mvhi  | count 1
+; RUN: llc < %s -mattr=+z10 | grep mvhhi | count 1
+; RUN: llc < %s | grep mvi   | count 2
+; RUN: llc < %s | grep mviy  | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo1(i64* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i64* %a, i64 1		; <i64*> [#uses=1]
+	store i64 1, i64* %add.ptr
+	ret void
+}
+
+define void @foo2(i32* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i32* %a, i64 1		; <i32*> [#uses=1]
+	store i32 2, i32* %add.ptr
+	ret void
+}
+
+define void @foo3(i16* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %a, i64 1		; <i16*> [#uses=1]
+	store i16 3, i16* %add.ptr
+	ret void
+}
+
+define void @foo4(i8* nocapture %a, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i8* %a, i64 1		; <i8*> [#uses=1]
+	store i8 4, i8* %add.ptr
+	ret void
+}
+
+define void @foo5(i8* nocapture %a, i64 %idx) nounwind {
+entry:
+        %add.ptr = getelementptr i8* %a, i64 -1         ; <i8*> [#uses=1]
+        store i8 4, i8* %add.ptr
+        ret void
+}
+
+define void @foo6(i16* nocapture %a, i64 %idx) nounwind {
+entry:
+        %add.ptr = getelementptr i16* %a, i64 -1         ; <i16*> [#uses=1]
+        store i16 3, i16* %add.ptr
+        ret void
+}
diff --git a/final/test/CodeGen/SystemZ/05-MemLoadsStores.ll b/final/test/CodeGen/SystemZ/05-MemLoadsStores.ll
new file mode 100644
index 00000000000..eabeb0a4225
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/05-MemLoadsStores.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s | grep ly     | count 2
+; RUN: llc < %s | grep sty    | count 2
+; RUN: llc < %s | grep {l	%}  | count 2
+; RUN: llc < %s | grep {st	%} | count 2
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+define void @foo1(i32* nocapture %foo, i32* nocapture %bar) nounwind {
+entry:
+	%tmp1 = load i32* %foo		; <i32> [#uses=1]
+	store i32 %tmp1, i32* %bar
+	ret void
+}
+
+define void @foo2(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i32* %foo, i64 1		; <i32*> [#uses=1]
+	%tmp1 = load i32* %add.ptr		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %tmp1, i32* %add.ptr5
+	ret void
+}
+
+define void @foo3(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%sub.ptr = getelementptr i32* %foo, i64 -1		; <i32*> [#uses=1]
+	%tmp1 = load i32* %sub.ptr		; <i32> [#uses=1]
+	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
+	%add.ptr = getelementptr i32* %bar, i64 %sub.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %tmp1, i32* %add.ptr
+	ret void
+}
+
+define void @foo4(i32* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i32* %foo, i64 8192		; <i32*> [#uses=1]
+	%tmp1 = load i32* %add.ptr		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %tmp1, i32* %add.ptr5
+	ret void
+}
diff --git a/final/test/CodeGen/SystemZ/05-MemLoadsStores16.ll b/final/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
new file mode 100644
index 00000000000..53bb641cf1e
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/05-MemLoadsStores16.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s | grep {sthy.%} | count 2
+; RUN: llc < %s | grep {lhy.%}  | count 2
+; RUN: llc < %s | grep {lh.%}   | count 6
+; RUN: llc < %s | grep {sth.%}  | count 2
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+define void @foo1(i16* nocapture %foo, i16* nocapture %bar) nounwind {
+entry:
+	%tmp1 = load i16* %foo		; <i16> [#uses=1]
+	store i16 %tmp1, i16* %bar
+	ret void
+}
+
+define void @foo2(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i16* %bar, i64 %add.ptr3.sum		; <i16*> [#uses=1]
+	store i16 %tmp1, i16* %add.ptr5
+	ret void
+}
+
+define void @foo3(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%sub.ptr = getelementptr i16* %foo, i64 -1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %sub.ptr		; <i16> [#uses=1]
+	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
+	%add.ptr = getelementptr i16* %bar, i64 %sub.ptr3.sum		; <i16*> [#uses=1]
+	store i16 %tmp1, i16* %add.ptr
+	ret void
+}
+
+define void @foo4(i16* nocapture %foo, i16* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 8192		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i16* %bar, i64 %add.ptr3.sum		; <i16*> [#uses=1]
+	store i16 %tmp1, i16* %add.ptr5
+	ret void
+}
+
+define void @foo5(i16* nocapture %foo, i32* nocapture %bar) nounwind {
+entry:
+	%tmp1 = load i16* %foo		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	store i32 %conv, i32* %bar
+	ret void
+}
+
+define void @foo6(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %conv, i32* %add.ptr5
+	ret void
+}
+
+define void @foo7(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%sub.ptr = getelementptr i16* %foo, i64 -1		; <i16*> [#uses=1]
+	%tmp1 = load i16* %sub.ptr		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	%sub.ptr3.sum = add i64 %idx, -1		; <i64> [#uses=1]
+	%add.ptr = getelementptr i32* %bar, i64 %sub.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %conv, i32* %add.ptr
+	ret void
+}
+
+define void @foo8(i16* nocapture %foo, i32* nocapture %bar, i64 %idx) nounwind {
+entry:
+	%add.ptr = getelementptr i16* %foo, i64 8192		; <i16*> [#uses=1]
+	%tmp1 = load i16* %add.ptr		; <i16> [#uses=1]
+	%conv = sext i16 %tmp1 to i32		; <i32> [#uses=1]
+	%add.ptr3.sum = add i64 %idx, 8192		; <i64> [#uses=1]
+	%add.ptr5 = getelementptr i32* %bar, i64 %add.ptr3.sum		; <i32*> [#uses=1]
+	store i32 %conv, i32* %add.ptr5
+	ret void
+}
diff --git a/final/test/CodeGen/SystemZ/05-MemRegLoads.ll b/final/test/CodeGen/SystemZ/05-MemRegLoads.ll
new file mode 100644
index 00000000000..f690a488996
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/05-MemRegLoads.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -march=systemz | not grep aghi
+; RUN: llc < %s -march=systemz | grep llgf | count 1
+; RUN: llc < %s -march=systemz | grep llgh | count 1
+; RUN: llc < %s -march=systemz | grep llgc | count 1
+; RUN: llc < %s -march=systemz | grep lgf  | count 2
+; RUN: llc < %s -march=systemz | grep lgh  | count 2
+; RUN: llc < %s -march=systemz | grep lgb  | count 1
+
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define zeroext i64 @foo1(i64* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	%tmp3 = load i64* %add.ptr2		; <i64> [#uses=1]
+	ret i64 %tmp3
+}
+
+define zeroext i32 @foo2(i32* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	%tmp3 = load i32* %add.ptr2		; <i32> [#uses=1]
+	ret i32 %tmp3
+}
+
+define zeroext i16 @foo3(i16* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	%tmp3 = load i16* %add.ptr2		; <i16> [#uses=1]
+	ret i16 %tmp3
+}
+
+define zeroext i8 @foo4(i8* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	%tmp3 = load i8* %add.ptr2		; <i8> [#uses=1]
+	ret i8 %tmp3
+}
+
+define signext i64 @foo5(i64* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	%tmp3 = load i64* %add.ptr2		; <i64> [#uses=1]
+	ret i64 %tmp3
+}
+
+define signext i32 @foo6(i32* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	%tmp3 = load i32* %add.ptr2		; <i32> [#uses=1]
+	ret i32 %tmp3
+}
+
+define signext i16 @foo7(i16* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	%tmp3 = load i16* %add.ptr2		; <i16> [#uses=1]
+	ret i16 %tmp3
+}
+
+define signext i8 @foo8(i8* nocapture %a, i64 %idx) nounwind readonly {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	%tmp3 = load i8* %add.ptr2		; <i8> [#uses=1]
+	ret i8 %tmp3
+}
diff --git a/final/test/CodeGen/SystemZ/05-MemRegStores.ll b/final/test/CodeGen/SystemZ/05-MemRegStores.ll
new file mode 100644
index 00000000000..b851c3fa6e0
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/05-MemRegStores.ll
@@ -0,0 +1,79 @@
+; RUN: llc < %s | not grep aghi
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo1(i64* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+
+; CHECK: foo1:
+; CHECK:   stg %r4, 8(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* %a, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	store i64 %val, i64* %add.ptr2
+	ret void
+}
+
+define void @foo2(i32* nocapture %a, i64 %idx, i32 %val) nounwind {
+entry:
+; CHECK: foo2:
+; CHECK:   st %r4, 4(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	store i32 %val, i32* %add.ptr2
+	ret void
+}
+
+define void @foo3(i16* nocapture %a, i64 %idx, i16 zeroext %val) nounwind {
+entry:
+; CHECK: foo3:
+; CHECK: sth     %r4, 2(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	store i16 %val, i16* %add.ptr2
+	ret void
+}
+
+define void @foo4(i8* nocapture %a, i64 %idx, i8 zeroext %val) nounwind {
+entry:
+; CHECK: foo4:
+; CHECK: stc     %r4, 1(%r3,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	store i8 %val, i8* %add.ptr2
+	ret void
+}
+
+define void @foo5(i8* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+; CHECK: foo5:
+; CHECK: stc     %r4, 1(%r3,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i8* %a, i64 %add.ptr.sum		; <i8*> [#uses=1]
+	%conv = trunc i64 %val to i8		; <i8> [#uses=1]
+	store i8 %conv, i8* %add.ptr2
+	ret void
+}
+
+define void @foo6(i16* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+; CHECK: foo6:
+; CHECK: sth     %r4, 2(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i16* %a, i64 %add.ptr.sum		; <i16*> [#uses=1]
+	%conv = trunc i64 %val to i16		; <i16> [#uses=1]
+	store i16 %conv, i16* %add.ptr2
+	ret void
+}
+
+define void @foo7(i32* nocapture %a, i64 %idx, i64 %val) nounwind {
+entry:
+; CHECK: foo7:
+; CHECK: st      %r4, 4(%r1,%r2)
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i32* %a, i64 %add.ptr.sum		; <i32*> [#uses=1]
+	%conv = trunc i64 %val to i32		; <i32> [#uses=1]
+	store i32 %conv, i32* %add.ptr2
+	ret void
+}
diff --git a/final/test/CodeGen/SystemZ/06-CallViaStack.ll b/final/test/CodeGen/SystemZ/06-CallViaStack.ll
new file mode 100644
index 00000000000..e904f49de15
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/06-CallViaStack.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s | grep 168 | count 1
+; RUN: llc < %s | grep 160 | count 3
+; RUN: llc < %s | grep 328 | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g) nounwind {
+entry:
+	%a = alloca i64, align 8		; <i64*> [#uses=3]
+	store i64 %g, i64* %a
+	call void @bar(i64* %a) nounwind
+	%tmp1 = load i64* %a		; <i64> [#uses=1]
+	ret i64 %tmp1
+}
+
+declare void @bar(i64*)
diff --git a/final/test/CodeGen/SystemZ/06-FrameIdxLoad.ll b/final/test/CodeGen/SystemZ/06-FrameIdxLoad.ll
new file mode 100644
index 00000000000..c71da9b4418
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/06-FrameIdxLoad.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | grep 160 | count 1
+; RUN: llc < %s | grep 168 | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %g) nounwind readnone {
+entry:
+        ret i64 %f
+}
+
+define i64 @bar(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64* %g) nounwind readnone {
+entry:
+	%conv = ptrtoint i64* %g to i64		; <i64> [#uses=1]
+	ret i64 %conv
+}
diff --git a/final/test/CodeGen/SystemZ/06-LocalFrame.ll b/final/test/CodeGen/SystemZ/06-LocalFrame.ll
new file mode 100644
index 00000000000..d89b0dfc76c
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/06-LocalFrame.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s | grep 160 | count 1
+; RUN: llc < %s | grep 328 | count 1
+; RUN: llc < %s | grep 168 | count 1
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define noalias i64* @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f) nounwind readnone {
+entry:
+	%g = alloca i64, align 8		; <i64*> [#uses=1]
+	%add.ptr = getelementptr i64* %g, i64 %f		; <i64*> [#uses=1]
+	ret i64* %add.ptr
+}
diff --git a/final/test/CodeGen/SystemZ/06-SimpleCall.ll b/final/test/CodeGen/SystemZ/06-SimpleCall.ll
new file mode 100644
index 00000000000..fd4b5029c73
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/06-SimpleCall.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo() nounwind {
+entry:
+	tail call void @bar() nounwind
+	ret void
+}
+
+declare void @bar()
diff --git a/final/test/CodeGen/SystemZ/07-BrCond.ll b/final/test/CodeGen/SystemZ/07-BrCond.ll
new file mode 100644
index 00000000000..859971713aa
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/07-BrCond.ll
@@ -0,0 +1,141 @@
+; RUN: llc < %s | grep je  | count 1
+; RUN: llc < %s | grep jne | count 1
+; RUN: llc < %s | grep jhe | count 2
+; RUN: llc < %s | grep jle | count 2
+; RUN: llc < %s | grep jh  | count 4
+; RUN: llc < %s | grep jl  | count 4
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/SystemZ/07-BrCond32.ll b/final/test/CodeGen/SystemZ/07-BrCond32.ll
new file mode 100644
index 00000000000..8ece5ac0984
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/07-BrCond32.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s | grep je  | count 1
+; RUN: llc < %s | grep jne | count 1
+; RUN: llc < %s | grep jhe | count 2
+; RUN: llc < %s | grep jle | count 2
+; RUN: llc < %s | grep jh  | count 4
+; RUN: llc < %s | grep jl  | count 4
+
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ult i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ult i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, %b		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/SystemZ/07-BrUnCond.ll b/final/test/CodeGen/SystemZ/07-BrUnCond.ll
new file mode 100644
index 00000000000..ac6067abbee
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/07-BrUnCond.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-ibm-linux"
+
+define void @foo() noreturn nounwind {
+entry:
+	tail call void @baz() nounwind
+	br label %l1
+
+l1:		; preds = %entry, %l1
+	tail call void @bar() nounwind
+	br label %l1
+}
+
+declare void @bar()
+
+declare void @baz()
diff --git a/final/test/CodeGen/SystemZ/07-CmpImm.ll b/final/test/CodeGen/SystemZ/07-CmpImm.ll
new file mode 100644
index 00000000000..4d0ebda0c03
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/07-CmpImm.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s | grep cgfi | count 8
+; RUN: llc < %s | grep clgfi | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i64 %a, i64 %b) nounwind {
+entry:
+	%cmp = icmp ugt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i64 %a) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i64 %a) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i64 %a) nounwind {
+entry:
+	%cmp = icmp eq i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i64 %a) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i64 %a) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i64 %a) nounwind {
+entry:
+	%cmp = icmp sgt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i64 %a) nounwind {
+entry:
+	%cmp = icmp slt i64 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/SystemZ/07-CmpImm32.ll b/final/test/CodeGen/SystemZ/07-CmpImm32.ll
new file mode 100644
index 00000000000..add34faafd3
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/07-CmpImm32.ll
@@ -0,0 +1,139 @@
+; RUN: llc < %s | grep jl  | count 3
+; RUN: llc < %s | grep jh  | count 3
+; RUN: llc < %s | grep je  | count 2
+; RUN: llc < %s | grep jne | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define void @foo(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+declare void @bar()
+
+define void @foo1(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo2(i32 %a, i32 %b) nounwind {
+entry:
+	%cmp = icmp ugt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo3(i32 %a) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 0		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo4(i32 %a) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo5(i32 %a) nounwind {
+entry:
+	%cmp = icmp eq i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo6(i32 %a) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo7(i32 %a) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then, label %if.end
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo8(i32 %a) nounwind {
+entry:
+	%cmp = icmp sgt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
+
+define void @foo9(i32 %a) nounwind {
+entry:
+	%cmp = icmp slt i32 %a, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	tail call void @bar() nounwind
+	ret void
+
+if.end:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/SystemZ/07-SelectCC.ll b/final/test/CodeGen/SystemZ/07-SelectCC.ll
new file mode 100644
index 00000000000..aa4b36e7d5d
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/07-SelectCC.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | grep clgr
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%cmp = icmp ult i64 %a, %b		; <i1> [#uses=1]
+	%cond = select i1 %cmp, i64 %a, i64 %b		; <i64> [#uses=1]
+	ret i64 %cond
+}
diff --git a/final/test/CodeGen/SystemZ/08-DivRem.ll b/final/test/CodeGen/SystemZ/08-DivRem.ll
new file mode 100644
index 00000000000..ff1e441882a
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/08-DivRem.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s | grep dsgr  | count 2
+; RUN: llc < %s | grep dsgfr | count 2
+; RUN: llc < %s | grep dlr   | count 2
+; RUN: llc < %s | grep dlgr  | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @div(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%div = sdiv i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %div
+}
+
+define i32 @div1(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%div = sdiv i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %div
+}
+
+define i64 @div2(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%div = udiv i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %div
+}
+
+define i32 @div3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%div = udiv i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %div
+}
+
+define i64 @rem(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%rem = srem i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %rem
+}
+
+define i32 @rem1(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%rem = srem i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %rem
+}
+
+define i64 @rem2(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%rem = urem i64 %a, %b		; <i64> [#uses=1]
+	ret i64 %rem
+}
+
+define i32 @rem3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%rem = urem i32 %a, %b		; <i32> [#uses=1]
+	ret i32 %rem
+}
diff --git a/final/test/CodeGen/SystemZ/08-DivRemMemOp.ll b/final/test/CodeGen/SystemZ/08-DivRemMemOp.ll
new file mode 100644
index 00000000000..d6ec0e7440a
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/08-DivRemMemOp.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s | grep {dsgf.%} | count 2
+; RUN: llc < %s | grep {dsg.%}  | count 2
+; RUN: llc < %s | grep {dl.%}   | count 2
+; RUN: llc < %s | grep dlg      | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @div(i64 %a, i64* %b) nounwind readnone {
+entry:
+	%b1 = load i64* %b
+	%div = sdiv i64 %a, %b1
+	ret i64 %div
+}
+
+define i64 @div1(i64 %a, i64* %b) nounwind readnone {
+entry:
+        %b1 = load i64* %b
+        %div = udiv i64 %a, %b1
+        ret i64 %div
+}
+
+define i64 @rem(i64 %a, i64* %b) nounwind readnone {
+entry:
+        %b1 = load i64* %b
+        %div = srem i64 %a, %b1
+        ret i64 %div
+}
+
+define i64 @rem1(i64 %a, i64* %b) nounwind readnone {
+entry:
+        %b1 = load i64* %b
+        %div = urem i64 %a, %b1
+        ret i64 %div
+}
+
+define i32 @div2(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = sdiv i32 %a, %b1
+        ret i32 %div
+}
+
+define i32 @div3(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = udiv i32 %a, %b1
+        ret i32 %div
+}
+
+define i32 @rem2(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = srem i32 %a, %b1
+        ret i32 %div
+}
+
+define i32 @rem3(i32 %a, i32* %b) nounwind readnone {
+entry:
+        %b1 = load i32* %b
+        %div = urem i32 %a, %b1
+        ret i32 %div
+}
+
diff --git a/final/test/CodeGen/SystemZ/08-SimpleMuls.ll b/final/test/CodeGen/SystemZ/08-SimpleMuls.ll
new file mode 100644
index 00000000000..1ab88d6ee7d
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/08-SimpleMuls.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s | grep msgr | count 2
+; RUN: llc < %s | grep msr  | count 2
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i64 @foo(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%mul = mul i64 %b, %a		; <i64> [#uses=1]
+	ret i64 %mul
+}
+
+define i64 @foo2(i64 %a, i64 %b) nounwind readnone {
+entry:
+	%mul = mul i64 %b, %a		; <i64> [#uses=1]
+	ret i64 %mul
+}
+
+define i32 @foo3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%mul = mul i32 %b, %a		; <i32> [#uses=1]
+	ret i32 %mul
+}
+
+define i32 @foo4(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%mul = mul i32 %b, %a		; <i32> [#uses=1]
+	ret i32 %mul
+}
diff --git a/final/test/CodeGen/SystemZ/09-DynamicAlloca.ll b/final/test/CodeGen/SystemZ/09-DynamicAlloca.ll
new file mode 100644
index 00000000000..30810ce6eb9
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/09-DynamicAlloca.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+define void @foo(i64 %N) nounwind {
+entry:
+	%N3 = trunc i64 %N to i32		; <i32> [#uses=1]
+	%vla = alloca i8, i32 %N3, align 2		; <i8*> [#uses=1]
+	call void @bar(i8* %vla) nounwind
+	ret void
+}
+
+declare void @bar(i8*)
diff --git a/final/test/CodeGen/SystemZ/09-Globals.ll b/final/test/CodeGen/SystemZ/09-Globals.ll
new file mode 100644
index 00000000000..50a26e2a451
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/09-Globals.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s | grep larl | count 3
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-ibm-linux"
+@bar = common global i64 0, align 8		; <i64*> [#uses=3]
+
+define i64 @foo() nounwind readonly {
+entry:
+	%tmp = load i64* @bar		; <i64> [#uses=1]
+	ret i64 %tmp
+}
+
+define i64* @foo2() nounwind readnone {
+entry:
+	ret i64* @bar
+}
+
+define i64* @foo3(i64 %idx) nounwind readnone {
+entry:
+	%add.ptr.sum = add i64 %idx, 1		; <i64> [#uses=1]
+	%add.ptr2 = getelementptr i64* @bar, i64 %add.ptr.sum		; <i64*> [#uses=1]
+	ret i64* %add.ptr2
+}
diff --git a/final/test/CodeGen/SystemZ/09-Switches.ll b/final/test/CodeGen/SystemZ/09-Switches.ll
new file mode 100644
index 00000000000..32aaa62a58c
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/09-Switches.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=systemz | grep larl
+
+define i32 @main(i32 %tmp158) {
+entry:
+        switch i32 %tmp158, label %bb336 [
+		 i32 -2147483648, label %bb338
+		 i32 -2147483647, label %bb338
+		 i32 -2147483646, label %bb338
+		 i32 120, label %bb338
+		 i32 121, label %bb339
+		 i32 122, label %bb340
+                 i32 123, label %bb341
+                 i32 124, label %bb342
+                 i32 125, label %bb343
+                 i32 126, label %bb336
+		 i32 1024, label %bb338
+                 i32 0, label %bb338
+                 i32 1, label %bb338
+                 i32 2, label %bb338
+                 i32 3, label %bb338
+                 i32 4, label %bb338
+		 i32 5, label %bb338
+        ]
+bb336:
+  ret i32 10
+bb338:
+  ret i32 11
+bb339:
+  ret i32 12
+bb340:
+  ret i32 13
+bb341:
+  ret i32 14
+bb342:
+  ret i32 15
+bb343:
+  ret i32 18
+
+}
diff --git a/final/test/CodeGen/SystemZ/10-FuncsPic.ll b/final/test/CodeGen/SystemZ/10-FuncsPic.ll
new file mode 100644
index 00000000000..f291e5ff42b
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/10-FuncsPic.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 3
+; RUN: llc < %s -relocation-model=pic | grep PLT | count 1
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+@ptr = external global void (...)*		; <void (...)**> [#uses=2]
+
+define void @foo1() nounwind {
+entry:
+	store void (...)* @func, void (...)** @ptr
+	ret void
+}
+
+declare void @func(...)
+
+define void @foo2() nounwind {
+entry:
+	tail call void (...)* @func() nounwind
+	ret void
+}
+
+define void @foo3() nounwind {
+entry:
+	%tmp = load void (...)** @ptr		; <void (...)*> [#uses=1]
+	tail call void (...)* %tmp() nounwind
+	ret void
+}
diff --git a/final/test/CodeGen/SystemZ/10-GlobalsPic.ll b/final/test/CodeGen/SystemZ/10-GlobalsPic.ll
new file mode 100644
index 00000000000..c581ad9c457
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/10-GlobalsPic.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -relocation-model=pic | grep GOTENT | count 6
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+@src = external global i32		; <i32*> [#uses=2]
+@dst = external global i32		; <i32*> [#uses=2]
+@ptr = external global i32*		; <i32**> [#uses=2]
+
+define void @foo1() nounwind {
+entry:
+	%tmp = load i32* @src		; <i32> [#uses=1]
+	store i32 %tmp, i32* @dst
+	ret void
+}
+
+define void @foo2() nounwind {
+entry:
+	store i32* @dst, i32** @ptr
+	ret void
+}
+
+define void @foo3() nounwind {
+entry:
+	%tmp = load i32* @src		; <i32> [#uses=1]
+	%tmp1 = load i32** @ptr		; <i32*> [#uses=1]
+	%arrayidx = getelementptr i32* %tmp1, i64 1		; <i32*> [#uses=1]
+	store i32 %tmp, i32* %arrayidx
+	ret void
+}
diff --git a/final/test/CodeGen/SystemZ/11-BSwap.ll b/final/test/CodeGen/SystemZ/11-BSwap.ll
new file mode 100644
index 00000000000..b170a8044a9
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/11-BSwap.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s | FileCheck %s
+
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+
+define i16 @foo(i16 zeroext %a) zeroext {
+	%res = tail call i16 @llvm.bswap.i16(i16 %a)
+	ret i16 %res
+}
+
+define i32 @foo2(i32 zeroext %a) zeroext {
+; CHECK: foo2:
+; CHECK:  lrvr %r1, %r2
+        %res = tail call i32 @llvm.bswap.i32(i32 %a)
+        ret i32 %res
+}
+
+define i64 @foo3(i64 %a) zeroext {
+; CHECK: foo3:
+; CHECK:  lrvgr %r2, %r2
+        %res = tail call i64 @llvm.bswap.i64(i64 %a)
+        ret i64 %res
+}
+
+define i16 @foo4(i16* %b) zeroext {
+	%a = load i16* %b
+        %res = tail call i16 @llvm.bswap.i16(i16 %a)
+        ret i16 %res
+}
+
+define i32 @foo5(i32* %b) zeroext {
+; CHECK: foo5:
+; CHECK:  lrv %r1, 0(%r2)
+	%a = load i32* %b
+        %res = tail call i32 @llvm.bswap.i32(i32 %a)
+        ret i32 %res
+}
+
+define i64 @foo6(i64* %b) {
+; CHECK: foo6:
+; CHECK:  lrvg %r2, 0(%r2)
+	%a = load i64* %b
+        %res = tail call i64 @llvm.bswap.i64(i64 %a)
+        ret i64 %res
+}
+
+define void @foo7(i16 %a, i16* %b) {
+        %res = tail call i16 @llvm.bswap.i16(i16 %a)
+        store i16 %res, i16* %b
+        ret void
+}
+
+define void @foo8(i32 %a, i32* %b) {
+; CHECK: foo8:
+; CHECK:  strv %r2, 0(%r3)
+        %res = tail call i32 @llvm.bswap.i32(i32 %a)
+        store i32 %res, i32* %b
+        ret void
+}
+
+define void @foo9(i64 %a, i64* %b) {
+; CHECK: foo9:
+; CHECK:  strvg %r2, 0(%r3)
+        %res = tail call i64 @llvm.bswap.i64(i64 %a)
+        store i64 %res, i64* %b
+        ret void
+}
+
+declare i16 @llvm.bswap.i16(i16) nounwind readnone
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+
diff --git a/final/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll b/final/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll
new file mode 100644
index 00000000000..65f8e14a9ee
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/2009-05-29-InvalidRetResult.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-unknown-linux-gnu"
+
+define i32 @main() nounwind {
+entry:
+	%call = call i32 (...)* @random() nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @random(...)
diff --git a/final/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll b/final/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll
new file mode 100644
index 00000000000..3cfa97dfc2b
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/2009-06-02-And32Imm.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=systemz | grep nilf | count 1
+; RUN: llc < %s -march=systemz | grep nill | count 1
+
+define i32 @gnu_dev_major(i64 %__dev) nounwind readnone {
+entry:
+        %shr = lshr i64 %__dev, 8               ; <i64> [#uses=1]
+        %shr8 = trunc i64 %shr to i32           ; <i32> [#uses=1]
+        %shr2 = lshr i64 %__dev, 32             ; <i64> [#uses=1]
+        %conv = trunc i64 %shr2 to i32          ; <i32> [#uses=1]
+        %and3 = and i32 %conv, -4096            ; <i32> [#uses=1]
+        %and6 = and i32 %shr8, 4095             ; <i32> [#uses=1]
+        %conv5 = or i32 %and6, %and3            ; <i32> [#uses=1]
+        ret i32 %conv5
+}
diff --git a/final/test/CodeGen/SystemZ/2009-06-02-Rotate.ll b/final/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
new file mode 100644
index 00000000000..54424e18f68
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/2009-06-02-Rotate.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=systemz | grep rll
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+	%shl = shl i32 %x, 1		; <i32> [#uses=1]
+	%sub = sub i32 32, 1		; <i32> [#uses=1]
+	%shr = lshr i32 %x, %sub		; <i32> [#uses=1]
+	%or = or i32 %shr, %shl		; <i32> [#uses=1]
+	ret i32 %or
+}
diff --git a/final/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll b/final/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll
new file mode 100644
index 00000000000..5f6ec50df6c
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/2009-06-05-InvalidArgLoad.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128"
+target triple = "s390x-ibm-linux"
+	%struct.re_pattern_buffer = type <{ i8*, i64, i64, i64, i8*, i8*, i64, i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%struct.re_registers = type <{ i32, i8, i8, i8, i8, i32*, i32* }>
+
+define i32 @xre_search_2(%struct.re_pattern_buffer* nocapture %bufp, i8* %string1, i32 %size1, i8* %string2, i32 %size2, i32 %startpos, i32 %range, %struct.re_registers* %regs, i32 %stop) nounwind {
+entry:
+	%cmp17.i = icmp slt i32 undef, %startpos		; <i1> [#uses=1]
+	%or.cond.i = or i1 undef, %cmp17.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %byte_re_search_2.exit, label %if.then20.i
+
+if.then20.i:		; preds = %entry
+	ret i32 -2
+
+byte_re_search_2.exit:		; preds = %entry
+	ret i32 -1
+}
diff --git a/final/test/CodeGen/SystemZ/2009-07-04-Shl32.ll b/final/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
new file mode 100644
index 00000000000..89b22251eb2
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/2009-07-04-Shl32.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+define void @compdecomp(i8* nocapture %data, i64 %data_len) nounwind {
+entry:
+	br label %for.body38
+
+for.body38:		; preds = %for.body38, %entry
+	br i1 undef, label %for.cond220, label %for.body38
+
+for.cond220:		; preds = %for.cond220, %for.body38
+	br i1 false, label %for.cond220, label %for.end297
+
+for.end297:		; preds = %for.cond220
+	%tmp334 = load i8* undef		; <i8> [#uses=1]
+	%conv343 = zext i8 %tmp334 to i32		; <i32> [#uses=1]
+	%sub344 = add i32 %conv343, -1		; <i32> [#uses=1]
+	%shl345 = shl i32 1, %sub344		; <i32> [#uses=1]
+	%conv346 = sext i32 %shl345 to i64		; <i64> [#uses=1]
+	br label %for.body356
+
+for.body356:		; preds = %for.body356, %for.end297
+	%mask.1633 = phi i64 [ %conv346, %for.end297 ], [ undef, %for.body356 ]		; <i64> [#uses=0]
+	br label %for.body356
+}
diff --git a/final/test/CodeGen/SystemZ/2009-07-05-Shifts.ll b/final/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
new file mode 100644
index 00000000000..68ccb848980
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/2009-07-05-Shifts.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+define signext i32 @bit_place_piece(i32 signext %col, i32 signext %player, i64* nocapture %b1, i64* nocapture %b2) nounwind {
+entry:
+	br i1 undef, label %for.body, label %return
+
+for.body:		; preds = %entry
+	%add = add i32 0, %col		; <i32> [#uses=1]
+	%sh_prom = zext i32 %add to i64		; <i64> [#uses=1]
+	%shl = shl i64 1, %sh_prom		; <i64> [#uses=1]
+	br i1 undef, label %if.then13, label %if.else
+
+if.then13:		; preds = %for.body
+	ret i32 0
+
+if.else:		; preds = %for.body
+	%or34 = or i64 undef, %shl		; <i64> [#uses=0]
+	ret i32 0
+
+return:		; preds = %entry
+	ret i32 1
+}
diff --git a/final/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/final/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
new file mode 100644
index 00000000000..98feb83231d
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+declare void @rdft(i32 signext, i32 signext, double*, i32* nocapture, double*) nounwind
+
+declare double @mp_mul_d2i_test(i32 signext, i32 signext, double* nocapture) nounwind
+
+define void @mp_mul_radix_test_bb3(i32 %radix, i32 %nfft, double* %tmpfft, i32* %ip, double* %w, double* %arrayidx44.reload, double* %call.out) nounwind {
+; CHECK: lg %r11, 328(%r15)
+
+newFuncRoot:
+	br label %bb3
+
+bb4.exitStub:		; preds = %bb3
+	store double %call, double* %call.out
+	ret void
+
+bb3:		; preds = %newFuncRoot
+	tail call void @rdft(i32 signext %nfft, i32 signext -1, double* %arrayidx44.reload, i32* %ip, double* %w) nounwind
+	%call = tail call double @mp_mul_d2i_test(i32 signext %radix, i32 signext %nfft, double* %tmpfft)		; <double> [#uses=1]
+	br label %bb4.exitStub
+}
diff --git a/final/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll b/final/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
new file mode 100644
index 00000000000..f4e176eb442
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/2009-07-11-FloatBitConvert.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+define float @foo(i32 signext %a) {
+entry:
+    %b = bitcast i32 %a to float
+    ret float %b
+}
+
+define i32 @bar(float %a) {
+entry:
+    %b = bitcast float %a to i32
+    ret i32 %b
+}
diff --git a/final/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll b/final/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
new file mode 100644
index 00000000000..63fd8553b32
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/2009-07-11-InvalidRIISel.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s
+
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16"
+target triple = "s390x-ibm-linux"
+
+define signext i32 @dfg_parse() nounwind {
+entry:
+	br i1 undef, label %if.then2208, label %if.else2360
+
+if.then2208:		; preds = %entry
+	br i1 undef, label %bb.nph3189, label %for.end2270
+
+bb.nph3189:		; preds = %if.then2208
+	unreachable
+
+for.end2270:		; preds = %if.then2208
+	%call2279 = call i64 @strlen(i8* undef) nounwind		; <i64> [#uses=1]
+	%add2281 = add i64 0, %call2279		; <i64> [#uses=1]
+	%tmp2283 = trunc i64 %add2281 to i32		; <i32> [#uses=1]
+	%tmp2284 = alloca i8, i32 %tmp2283, align 2		; <i8*> [#uses=1]
+	%yyd.0.i2561.13 = getelementptr i8* %tmp2284, i64 13		; <i8*> [#uses=1]
+	store i8 117, i8* %yyd.0.i2561.13
+	br label %while.cond.i2558
+
+while.cond.i2558:		; preds = %while.cond.i2558, %for.end2270
+	br label %while.cond.i2558
+
+if.else2360:		; preds = %entry
+	unreachable
+}
+
+declare i64 @strlen(i8* nocapture) nounwind readonly
diff --git a/final/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll b/final/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll
new file mode 100644
index 00000000000..f7686f14da9
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/2009-08-21-InlineAsmRConstraint.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:16:16-f128:128:128"
+target triple = "s390x-ibm-linux-gnu"
+
+@__JCR_LIST__ = internal global [0 x i8*] zeroinitializer, section ".jcr", align 8 ; <[0 x i8*]*> [#uses=1]
+
+define internal void @frame_dummy() nounwind {
+entry:
+  %asmtmp = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0"(void (i8*)* @_Jv_RegisterClasses) nounwind ; <void (i8*)*> [#uses=2]
+  %0 = icmp eq void (i8*)* %asmtmp, null          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb3
+
+bb3:                                              ; preds = %entry
+  tail call void %asmtmp(i8* bitcast ([0 x i8*]* @__JCR_LIST__ to i8*)) nounwind
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare extern_weak void @_Jv_RegisterClasses(i8*)
diff --git a/final/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll b/final/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll
new file mode 100644
index 00000000000..fde7d9d281c
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/2009-08-22-FCopySign.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:16:16-f128:128:128"
+target triple = "s390x-ibm-linux-gnu"
+
+define double @foo(double %a, double %b) nounwind {
+entry:
+; CHECK: cpsdr %f0, %f2, %f0
+  %0 = tail call double @copysign(double %a, double %b) nounwind readnone
+  ret double %0
+}
+
+define float @bar(float %a, float %b) nounwind {
+entry:
+; CHECK: cpsdr %f0, %f2, %f0
+  %0 = tail call float @copysignf(float %a, float %b) nounwind readnone
+  ret float %0
+}
+
+
+declare double @copysign(double, double) nounwind readnone
+declare float @copysignf(float, float) nounwind readnone
diff --git a/final/test/CodeGen/SystemZ/2010-01-04-DivMem.ll b/final/test/CodeGen/SystemZ/2010-01-04-DivMem.ll
new file mode 100644
index 00000000000..d730beca245
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/2010-01-04-DivMem.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s
+target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16-n32:64"
+target triple = "s390x-elf"
+
+@REGISTER = external global [10 x i32]            ; <[10 x i32]*> [#uses=2]
+
+define void @DIVR_P(i32 signext %PRINT_EFFECT) nounwind {
+entry:
+  %REG1 = alloca i32, align 4                     ; <i32*> [#uses=2]
+  %REG2 = alloca i32, align 4                     ; <i32*> [#uses=2]
+  %call = call signext i32 (...)* @FORMAT2(i32* %REG1, i32* %REG2) nounwind ; <i32> [#uses=0]
+  %tmp = load i32* %REG1                          ; <i32> [#uses=1]
+  %idxprom = sext i32 %tmp to i64                 ; <i64> [#uses=1]
+  %arrayidx = getelementptr inbounds [10 x i32]* @REGISTER, i64 0, i64 %idxprom ; <i32*> [#uses=2]
+  %tmp1 = load i32* %arrayidx                     ; <i32> [#uses=2]
+  %tmp2 = load i32* %REG2                         ; <i32> [#uses=1]
+  %idxprom3 = sext i32 %tmp2 to i64               ; <i64> [#uses=1]
+  %arrayidx4 = getelementptr inbounds [10 x i32]* @REGISTER, i64 0, i64 %idxprom3 ; <i32*> [#uses=3]
+  %tmp5 = load i32* %arrayidx4                    ; <i32> [#uses=3]
+  %cmp6 = icmp sgt i32 %tmp5, 8388607             ; <i1> [#uses=1]
+  %REG2_SIGN.0 = select i1 %cmp6, i32 -1, i32 1   ; <i32> [#uses=2]
+  %cmp10 = icmp eq i32 %REG2_SIGN.0, 1            ; <i1> [#uses=1]
+  %not.cmp = icmp slt i32 %tmp1, 8388608          ; <i1> [#uses=2]
+  %or.cond = and i1 %cmp10, %not.cmp              ; <i1> [#uses=1]
+  br i1 %or.cond, label %if.then13, label %if.end25
+
+if.then13:                                        ; preds = %entry
+  %div = sdiv i32 %tmp5, %tmp1                    ; <i32> [#uses=2]
+  store i32 %div, i32* %arrayidx4
+  br label %if.end25
+
+if.end25:                                         ; preds = %if.then13, %entry
+  %tmp35 = phi i32 [ %div, %if.then13 ], [ %tmp5, %entry ] ; <i32> [#uses=1]
+  %cmp27 = icmp eq i32 %REG2_SIGN.0, -1           ; <i1> [#uses=1]
+  %or.cond46 = and i1 %cmp27, %not.cmp            ; <i1> [#uses=1]
+  br i1 %or.cond46, label %if.then31, label %if.end45
+
+if.then31:                                        ; preds = %if.end25
+  %sub = sub i32 16777216, %tmp35                 ; <i32> [#uses=1]
+  %tmp39 = load i32* %arrayidx                    ; <i32> [#uses=1]
+  %div40 = udiv i32 %sub, %tmp39                  ; <i32> [#uses=1]
+  %sub41 = sub i32 16777216, %div40               ; <i32> [#uses=1]
+  store i32 %sub41, i32* %arrayidx4
+  ret void
+
+if.end45:                                         ; preds = %if.end25
+  ret void
+}
+
+declare signext i32 @FORMAT2(...)
diff --git a/final/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll b/final/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 00000000000..c2877ac55ed
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=systemz -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/final/test/CodeGen/SystemZ/dg.exp b/final/test/CodeGen/SystemZ/dg.exp
new file mode 100644
index 00000000000..e9624bac68e
--- /dev/null
+++ b/final/test/CodeGen/SystemZ/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target SystemZ] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll b/final/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
new file mode 100644
index 00000000000..1e61b235a2b
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin
+
+%struct.rtx_def = type { i8 }
+@str = external global [7 x i8]
+
+define void @f1() {
+	%D = alloca %struct.rtx_def, align 1
+	%tmp1 = bitcast %struct.rtx_def* %D to i32*
+	%tmp7 = load i32* %tmp1
+	%tmp14 = lshr i32 %tmp7, 1
+	%tmp1415 = and i32 %tmp14, 1
+	call void (i32, ...)* @printf( i32 undef, i32 0, i32 %tmp1415 )
+	ret void
+}
+
+declare void @printf(i32, ...)
diff --git a/final/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll b/final/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll
new file mode 100644
index 00000000000..be2b839c21d
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin
+
+	%struct.color_sample = type { i32 }
+	%struct.ref = type { %struct.color_sample, i16, i16 }
+
+define void @zcvrs() {
+	br i1 false, label %bb22, label %UnifiedReturnBlock
+
+bb22:
+	br i1 false, label %bb64, label %UnifiedReturnBlock
+
+bb64:
+	%tmp67 = urem i32 0, 0
+	%tmp69 = icmp slt i32 %tmp67, 10
+	%iftmp.13.0 = select i1 %tmp69, i8 48, i8 55
+	%tmp75 = add i8 %iftmp.13.0, 0
+	store i8 %tmp75, i8* null
+	%tmp81 = udiv i32 0, 0
+	%tmp83 = icmp eq i32 %tmp81, 0
+	br i1 %tmp83, label %bb85, label %bb64
+
+bb85:
+	ret void
+
+UnifiedReturnBlock:
+	ret void
+}
diff --git a/final/test/CodeGen/Thumb/2007-03-06-AddR7.ll b/final/test/CodeGen/Thumb/2007-03-06-AddR7.ll
new file mode 100644
index 00000000000..8d139e92bd3
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2007-03-06-AddR7.ll
@@ -0,0 +1,117 @@
+; RUN: llc < %s -march=thumb
+; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic \
+; RUN:   -mattr=+v6,+vfp2 | not grep {add r., r7, #2 \\* 4}
+
+	%struct.__fooAllocator = type opaque
+	%struct.__fooY = type { %struct.fooXBase, %struct.__fooString*, %struct.__fooU*, %struct.__fooV*, i8** }
+	%struct.__fooZ = type opaque
+	%struct.__fooU = type opaque
+	%struct.__fooString = type opaque
+	%struct.__fooV = type opaque
+	%struct.fooXBase = type { i32, [4 x i8] }
+	%struct.fooXClass = type { i32, i8*, void (i8*)*, i8* (%struct.__fooAllocator*, i8*)*, void (i8*)*, i8 (i8*, i8*) zeroext *, i32 (i8*)*, %struct.__fooString* (i8*, %struct.__fooZ*)*, %struct.__fooString* (i8*)* }
+	%struct.aa_cache = type { i32, i32, [1 x %struct.aa_method*] }
+	%struct.aa_class = type { %struct.aa_class*, %struct.aa_class*, i8*, i32, i32, i32, %struct.aa_ivar_list*, %struct.aa_method_list**, %struct.aa_cache*, %struct.aa_protocol_list* }
+	%struct.aa_ivar = type { i8*, i8*, i32 }
+	%struct.aa_ivar_list = type { i32, [1 x %struct.aa_ivar] }
+	%struct.aa_method = type { %struct.aa_ss*, i8*, %struct.aa_object* (%struct.aa_object*, %struct.aa_ss*, ...)* }
+	%struct.aa_method_list = type { %struct.aa_method_list*, i32, [1 x %struct.aa_method] }
+	%struct.aa_object = type { %struct.aa_class* }
+	%struct.aa_protocol_list = type { %struct.aa_protocol_list*, i32, [1 x %struct.aa_object*] }
+	%struct.aa_ss = type opaque
+@__kfooYTypeID = external global i32		; <i32*> [#uses=3]
+@__fooYClass = external constant %struct.fooXClass		; <%struct.fooXClass*> [#uses=1]
+@__fooXClassTableSize = external global i32		; <i32*> [#uses=1]
+@__fooXAaClassTable = external global i32*		; <i32**> [#uses=1]
+@s.10319 = external global %struct.aa_ss*		; <%struct.aa_ss**> [#uses=2]
+@str15 = external constant [24 x i8]		; <[24 x i8]*> [#uses=1]
+
+
+define i8 @test(%struct.__fooY* %calendar, double* %atp, i8* %componentDesc, ...) zeroext  {
+entry:
+	%args = alloca i8*, align 4		; <i8**> [#uses=5]
+	%args4 = bitcast i8** %args to i8*		; <i8*> [#uses=2]
+	call void @llvm.va_start( i8* %args4 )
+	%tmp6 = load i32* @__kfooYTypeID		; <i32> [#uses=1]
+	icmp eq i32 %tmp6, 0		; <i1>:0 [#uses=1]
+	br i1 %0, label %cond_true, label %cond_next
+
+cond_true:		; preds = %entry
+	%tmp7 = call i32 @_fooXRegisterClass( %struct.fooXClass* @__fooYClass )		; <i32> [#uses=1]
+	store i32 %tmp7, i32* @__kfooYTypeID
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp8 = load i32* @__kfooYTypeID		; <i32> [#uses=2]
+	%tmp15 = load i32* @__fooXClassTableSize		; <i32> [#uses=1]
+	icmp ugt i32 %tmp15, %tmp8		; <i1>:1 [#uses=1]
+	br i1 %1, label %cond_next18, label %cond_true58
+
+cond_next18:		; preds = %cond_next
+	%tmp21 = getelementptr %struct.__fooY* %calendar, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp22 = load i32* %tmp21		; <i32> [#uses=2]
+	%tmp29 = load i32** @__fooXAaClassTable		; <i32*> [#uses=1]
+	%tmp31 = getelementptr i32* %tmp29, i32 %tmp8		; <i32*> [#uses=1]
+	%tmp32 = load i32* %tmp31		; <i32> [#uses=1]
+	icmp eq i32 %tmp22, %tmp32		; <i1>:2 [#uses=1]
+	%.not = xor i1 %2, true		; <i1> [#uses=1]
+	icmp ugt i32 %tmp22, 4095		; <i1>:3 [#uses=1]
+	%bothcond = and i1 %.not, %3		; <i1> [#uses=1]
+	br i1 %bothcond, label %cond_true58, label %bb48
+
+bb48:		; preds = %cond_next18
+	%tmp78 = call i32 @strlen( i8* %componentDesc )		; <i32> [#uses=4]
+	%tmp92 = alloca i32, i32 %tmp78		; <i32*> [#uses=2]
+	icmp sgt i32 %tmp78, 0		; <i1>:4 [#uses=1]
+	br i1 %4, label %cond_true111, label %bb114
+
+cond_true58:		; preds = %cond_next18, %cond_next
+	%tmp59 = load %struct.aa_ss** @s.10319		; <%struct.aa_ss*> [#uses=2]
+	icmp eq %struct.aa_ss* %tmp59, null		; <i1>:5 [#uses=1]
+	%tmp6869 = bitcast %struct.__fooY* %calendar to i8*		; <i8*> [#uses=2]
+	br i1 %5, label %cond_true60, label %cond_next64
+
+cond_true60:		; preds = %cond_true58
+	%tmp63 = call %struct.aa_ss* @sel_registerName( i8* getelementptr ([24 x i8]* @str15, i32 0, i32 0) )		; <%struct.aa_ss*> [#uses=2]
+	store %struct.aa_ss* %tmp63, %struct.aa_ss** @s.10319
+	%tmp66137 = volatile load i8** %args		; <i8*> [#uses=1]
+	%tmp73138 = call i8 (i8*, %struct.aa_ss*, ...) zeroext * bitcast (%struct.aa_object* (%struct.aa_object*, %struct.aa_ss*, ...)* @aa_mm to i8 (i8*, %struct.aa_ss*, ...) zeroext *)( i8* %tmp6869, %struct.aa_ss* %tmp63, double* %atp, i8* %componentDesc, i8* %tmp66137) zeroext 		; <i8> [#uses=1]
+	ret i8 %tmp73138
+
+cond_next64:		; preds = %cond_true58
+	%tmp66 = volatile load i8** %args		; <i8*> [#uses=1]
+	%tmp73 = call i8 (i8*, %struct.aa_ss*, ...) zeroext * bitcast (%struct.aa_object* (%struct.aa_object*, %struct.aa_ss*, ...)* @aa_mm to i8 (i8*, %struct.aa_ss*, ...) zeroext *)( i8* %tmp6869, %struct.aa_ss* %tmp59, double* %atp, i8* %componentDesc, i8* %tmp66 ) zeroext 		; <i8> [#uses=1]
+	ret i8 %tmp73
+
+cond_true111:		; preds = %cond_true111, %bb48
+	%idx.2132.0 = phi i32 [ 0, %bb48 ], [ %indvar.next, %cond_true111 ]		; <i32> [#uses=2]
+	%tmp95 = volatile load i8** %args		; <i8*> [#uses=2]
+	%tmp97 = getelementptr i8* %tmp95, i32 4		; <i8*> [#uses=1]
+	volatile store i8* %tmp97, i8** %args
+	%tmp9899 = bitcast i8* %tmp95 to i32*		; <i32*> [#uses=1]
+	%tmp100 = load i32* %tmp9899		; <i32> [#uses=1]
+	%tmp104 = getelementptr i32* %tmp92, i32 %idx.2132.0		; <i32*> [#uses=1]
+	store i32 %tmp100, i32* %tmp104
+	%indvar.next = add i32 %idx.2132.0, 1		; <i32> [#uses=2]
+	icmp eq i32 %indvar.next, %tmp78		; <i1>:6 [#uses=1]
+	br i1 %6, label %bb114, label %cond_true111
+
+bb114:		; preds = %cond_true111, %bb48
+	call void @llvm.va_end( i8* %args4 )
+	%tmp122 = call i8 @_fooYCCV( %struct.__fooY* %calendar, double* %atp, i8* %componentDesc, i32* %tmp92, i32 %tmp78 ) zeroext 		; <i8> [#uses=1]
+	ret i8 %tmp122
+}
+
+declare i32 @_fooXRegisterClass(%struct.fooXClass*)
+
+declare i8 @_fooYCCV(%struct.__fooY*, double*, i8*, i32*, i32) zeroext 
+
+declare %struct.aa_object* @aa_mm(%struct.aa_object*, %struct.aa_ss*, ...)
+
+declare %struct.aa_ss* @sel_registerName(i8*)
+
+declare void @llvm.va_start(i8*)
+
+declare i32 @strlen(i8*)
+
+declare void @llvm.va_end(i8*)
diff --git a/final/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll b/final/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
new file mode 100644
index 00000000000..929c472d1ef
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s | not grep r11
+
+target triple = "thumb-unknown-linux-gnueabi"
+	%struct.__sched_param = type { i32 }
+	%struct.pthread_attr_t = type { i32, i32, %struct.__sched_param, i32, i32, i32, i32, i8*, i32 }
+@i.1882 = internal global i32 1		; <i32*> [#uses=2]
+@.str = internal constant [14 x i8] c"Thread 1: %d\0A\00"		; <[14 x i8]*> [#uses=1]
+@.str1 = internal constant [14 x i8] c"Thread 2: %d\0A\00"		; <[14 x i8]*> [#uses=1]
+
+define i8* @f(i8* %a) {
+entry:
+	%tmp1 = load i32* @i.1882		; <i32> [#uses=1]
+	%tmp2 = add i32 %tmp1, 1		; <i32> [#uses=2]
+	store i32 %tmp2, i32* @i.1882
+	%tmp34 = inttoptr i32 %tmp2 to i8*		; <i8*> [#uses=1]
+	ret i8* %tmp34
+}
+
+define i32 @main() {
+entry:
+	%t = alloca i32, align 4		; <i32*> [#uses=4]
+	%ret = alloca i32, align 4		; <i32*> [#uses=3]
+	%tmp1 = call i32 @pthread_create( i32* %t, %struct.pthread_attr_t* null, i8* (i8*)* @f, i8* null )		; <i32> [#uses=0]
+	%tmp2 = load i32* %t		; <i32> [#uses=1]
+	%ret3 = bitcast i32* %ret to i8**		; <i8**> [#uses=2]
+	%tmp4 = call i32 @pthread_join( i32 %tmp2, i8** %ret3 )		; <i32> [#uses=0]
+	%tmp5 = load i32* %ret		; <i32> [#uses=1]
+	%tmp7 = call i32 (i8*, ...)* @printf( i8* getelementptr ([14 x i8]* @.str, i32 0, i32 0), i32 %tmp5 )		; <i32> [#uses=0]
+	%tmp8 = call i32 @pthread_create( i32* %t, %struct.pthread_attr_t* null, i8* (i8*)* @f, i8* null )		; <i32> [#uses=0]
+	%tmp9 = load i32* %t		; <i32> [#uses=1]
+	%tmp11 = call i32 @pthread_join( i32 %tmp9, i8** %ret3 )		; <i32> [#uses=0]
+	%tmp12 = load i32* %ret		; <i32> [#uses=1]
+	%tmp14 = call i32 (i8*, ...)* @printf( i8* getelementptr ([14 x i8]* @.str1, i32 0, i32 0), i32 %tmp12 )		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @pthread_create(i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
+
+declare i32 @pthread_join(i32, i8**)
+
+declare i32 @printf(i8*, ...)
diff --git a/final/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll b/final/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
new file mode 100644
index 00000000000..5c883b3930d
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb | grep r0 | count 1
+
+define i32 @a(i32 %x, i32 %y) nounwind readnone {
+entry:
+	%mul = mul i32 %y, %x		; <i32> [#uses=1]
+	ret i32 %mul
+}
+
diff --git a/final/test/CodeGen/Thumb/2009-07-19-SPDecBug.ll b/final/test/CodeGen/Thumb/2009-07-19-SPDecBug.ll
new file mode 100644
index 00000000000..9cdcd3101b9
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2009-07-19-SPDecBug.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=thumbv6-elf | not grep "subs sp"
+; PR4567
+
+define i8* @__gets_chk(i8* %s, i32 %slen) nounwind {
+entry:
+	br i1 undef, label %bb, label %bb1
+
+bb:		; preds = %entry
+	ret i8* undef
+
+bb1:		; preds = %entry
+	br i1 undef, label %bb3, label %bb2
+
+bb2:		; preds = %bb1
+	%0 = alloca i8, i32 undef, align 4		; <i8*> [#uses=0]
+	br label %bb4
+
+bb3:		; preds = %bb1
+	%1 = malloc i8, i32 undef		; <i8*> [#uses=0]
+	br label %bb4
+
+bb4:		; preds = %bb3, %bb2
+	br i1 undef, label %bb5, label %bb6
+
+bb5:		; preds = %bb4
+	%2 = call  i8* @gets(i8* %s) nounwind		; <i8*> [#uses=1]
+	ret i8* %2
+
+bb6:		; preds = %bb4
+	unreachable
+}
+
+declare i8* @gets(i8*) nounwind
diff --git a/final/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll b/final/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll
new file mode 100644
index 00000000000..d4651a1f3fa
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin10
+
+@Time.2535 = external global i64		; <i64*> [#uses=2]
+
+define i64 @millisecs() nounwind {
+entry:
+	%0 = load i64* @Time.2535, align 4		; <i64> [#uses=2]
+	%1 = add i64 %0, 1		; <i64> [#uses=1]
+	store i64 %1, i64* @Time.2535, align 4
+	ret i64 %0
+}
diff --git a/final/test/CodeGen/Thumb/2009-07-27-PEIAssert.ll b/final/test/CodeGen/Thumb/2009-07-27-PEIAssert.ll
new file mode 100644
index 00000000000..aaca3a7f3db
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2009-07-27-PEIAssert.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic -disable-fp-elim
+
+	%struct.LinkList = type { i32, %struct.LinkList* }
+	%struct.List = type { i32, i32* }
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @main to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @main() nounwind {
+entry:
+	%ll = alloca %struct.LinkList*, align 4		; <%struct.LinkList**> [#uses=1]
+	%0 = call  i32 @ReadList(%struct.LinkList** %ll, %struct.List** null) nounwind		; <i32> [#uses=1]
+	switch i32 %0, label %bb5 [
+		i32 7, label %bb4
+		i32 42, label %bb3
+	]
+
+bb3:		; preds = %entry
+	ret i32 1
+
+bb4:		; preds = %entry
+	ret i32 0
+
+bb5:		; preds = %entry
+	ret i32 1
+}
+
+declare i32 @ReadList(%struct.LinkList** nocapture, %struct.List** nocapture) nounwind
diff --git a/final/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll b/final/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll
new file mode 100644
index 00000000000..5b420fc7450
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll
@@ -0,0 +1,737 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin
+
+	%struct.BF_KEY = type { [18 x i32], [1024 x i32] }
+
+define void @BF_encrypt(i32* nocapture %data, %struct.BF_KEY* nocapture %key, i32 %encrypt) nounwind {
+entry:
+	%0 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 0; <i32*> [#uses=2]
+	%1 = load i32* %data, align 4             ; <i32> [#uses=2]
+	%2 = load i32* undef, align 4             ; <i32> [#uses=2]
+	br i1 undef, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+	%3 = load i32* %0, align 4                ; <i32> [#uses=1]
+	%4 = xor i32 %3, %1                       ; <i32> [#uses=4]
+	%5 = load i32* null, align 4              ; <i32> [#uses=1]
+	%6 = lshr i32 %4, 24                      ; <i32> [#uses=1]
+	%7 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %6; <i32*> [#uses=1]
+	%8 = load i32* %7, align 4                ; <i32> [#uses=1]
+	%9 = lshr i32 %4, 16                      ; <i32> [#uses=1]
+	%10 = or i32 %9, 256                      ; <i32> [#uses=1]
+	%11 = and i32 %10, 511                    ; <i32> [#uses=1]
+	%12 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %11; <i32*> [#uses=1]
+	%13 = load i32* %12, align 4              ; <i32> [#uses=1]
+	%14 = add i32 %13, %8                     ; <i32> [#uses=1]
+	%15 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 undef; <i32*> [#uses=1]
+	%16 = load i32* %15, align 4              ; <i32> [#uses=1]
+	%17 = xor i32 %14, %16                    ; <i32> [#uses=1]
+	%18 = or i32 %4, 768                      ; <i32> [#uses=1]
+	%19 = and i32 %18, 1023                   ; <i32> [#uses=1]
+	%20 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %19; <i32*> [#uses=1]
+	%21 = load i32* %20, align 4              ; <i32> [#uses=1]
+	%22 = add i32 %17, %21                    ; <i32> [#uses=1]
+	%23 = xor i32 %5, %2                      ; <i32> [#uses=1]
+	%24 = xor i32 %23, %22                    ; <i32> [#uses=5]
+	%25 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 2; <i32*> [#uses=1]
+	%26 = load i32* %25, align 4              ; <i32> [#uses=1]
+	%27 = lshr i32 %24, 24                    ; <i32> [#uses=1]
+	%28 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %27; <i32*> [#uses=1]
+	%29 = load i32* %28, align 4              ; <i32> [#uses=1]
+	%30 = lshr i32 %24, 16                    ; <i32> [#uses=1]
+	%31 = or i32 %30, 256                     ; <i32> [#uses=1]
+	%32 = and i32 %31, 511                    ; <i32> [#uses=1]
+	%33 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %32; <i32*> [#uses=1]
+	%34 = load i32* %33, align 4              ; <i32> [#uses=1]
+	%35 = add i32 %34, %29                    ; <i32> [#uses=1]
+	%36 = lshr i32 %24, 8                     ; <i32> [#uses=1]
+	%37 = or i32 %36, 512                     ; <i32> [#uses=1]
+	%38 = and i32 %37, 767                    ; <i32> [#uses=1]
+	%39 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %38; <i32*> [#uses=1]
+	%40 = load i32* %39, align 4              ; <i32> [#uses=1]
+	%41 = xor i32 %35, %40                    ; <i32> [#uses=1]
+	%42 = or i32 %24, 768                     ; <i32> [#uses=1]
+	%43 = and i32 %42, 1023                   ; <i32> [#uses=1]
+	%44 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %43; <i32*> [#uses=1]
+	%45 = load i32* %44, align 4              ; <i32> [#uses=1]
+	%46 = add i32 %41, %45                    ; <i32> [#uses=1]
+	%47 = xor i32 %26, %4                     ; <i32> [#uses=1]
+	%48 = xor i32 %47, %46                    ; <i32> [#uses=5]
+	%49 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 3; <i32*> [#uses=1]
+	%50 = load i32* %49, align 4              ; <i32> [#uses=1]
+	%51 = lshr i32 %48, 24                    ; <i32> [#uses=1]
+	%52 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %51; <i32*> [#uses=1]
+	%53 = load i32* %52, align 4              ; <i32> [#uses=1]
+	%54 = lshr i32 %48, 16                    ; <i32> [#uses=1]
+	%55 = or i32 %54, 256                     ; <i32> [#uses=1]
+	%56 = and i32 %55, 511                    ; <i32> [#uses=1]
+	%57 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %56; <i32*> [#uses=1]
+	%58 = load i32* %57, align 4              ; <i32> [#uses=1]
+	%59 = add i32 %58, %53                    ; <i32> [#uses=1]
+	%60 = lshr i32 %48, 8                     ; <i32> [#uses=1]
+	%61 = or i32 %60, 512                     ; <i32> [#uses=1]
+	%62 = and i32 %61, 767                    ; <i32> [#uses=1]
+	%63 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %62; <i32*> [#uses=1]
+	%64 = load i32* %63, align 4              ; <i32> [#uses=1]
+	%65 = xor i32 %59, %64                    ; <i32> [#uses=1]
+	%66 = or i32 %48, 768                     ; <i32> [#uses=1]
+	%67 = and i32 %66, 1023                   ; <i32> [#uses=1]
+	%68 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %67; <i32*> [#uses=1]
+	%69 = load i32* %68, align 4              ; <i32> [#uses=1]
+	%70 = add i32 %65, %69                    ; <i32> [#uses=1]
+	%71 = xor i32 %50, %24                    ; <i32> [#uses=1]
+	%72 = xor i32 %71, %70                    ; <i32> [#uses=5]
+	%73 = load i32* null, align 4             ; <i32> [#uses=1]
+	%74 = lshr i32 %72, 24                    ; <i32> [#uses=1]
+	%75 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %74; <i32*> [#uses=1]
+	%76 = load i32* %75, align 4              ; <i32> [#uses=1]
+	%77 = lshr i32 %72, 16                    ; <i32> [#uses=1]
+	%78 = or i32 %77, 256                     ; <i32> [#uses=1]
+	%79 = and i32 %78, 511                    ; <i32> [#uses=1]
+	%80 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %79; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4              ; <i32> [#uses=1]
+	%82 = add i32 %81, %76                    ; <i32> [#uses=1]
+	%83 = lshr i32 %72, 8                     ; <i32> [#uses=1]
+	%84 = or i32 %83, 512                     ; <i32> [#uses=1]
+	%85 = and i32 %84, 767                    ; <i32> [#uses=1]
+	%86 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %85; <i32*> [#uses=1]
+	%87 = load i32* %86, align 4              ; <i32> [#uses=1]
+	%88 = xor i32 %82, %87                    ; <i32> [#uses=1]
+	%89 = or i32 %72, 768                     ; <i32> [#uses=1]
+	%90 = and i32 %89, 1023                   ; <i32> [#uses=1]
+	%91 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %90; <i32*> [#uses=1]
+	%92 = load i32* %91, align 4              ; <i32> [#uses=1]
+	%93 = add i32 %88, %92                    ; <i32> [#uses=1]
+	%94 = xor i32 %73, %48                    ; <i32> [#uses=1]
+	%95 = xor i32 %94, %93                    ; <i32> [#uses=5]
+	%96 = load i32* undef, align 4            ; <i32> [#uses=1]
+	%97 = lshr i32 %95, 24                    ; <i32> [#uses=1]
+	%98 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %97; <i32*> [#uses=1]
+	%99 = load i32* %98, align 4              ; <i32> [#uses=1]
+	%100 = lshr i32 %95, 16                   ; <i32> [#uses=1]
+	%101 = or i32 %100, 256                   ; <i32> [#uses=1]
+	%102 = and i32 %101, 511                  ; <i32> [#uses=1]
+	%103 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %102; <i32*> [#uses=1]
+	%104 = load i32* %103, align 4            ; <i32> [#uses=1]
+	%105 = add i32 %104, %99                  ; <i32> [#uses=1]
+	%106 = lshr i32 %95, 8                    ; <i32> [#uses=1]
+	%107 = or i32 %106, 512                   ; <i32> [#uses=1]
+	%108 = and i32 %107, 767                  ; <i32> [#uses=1]
+	%109 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %108; <i32*> [#uses=1]
+	%110 = load i32* %109, align 4            ; <i32> [#uses=1]
+	%111 = xor i32 %105, %110                 ; <i32> [#uses=1]
+	%112 = or i32 %95, 768                    ; <i32> [#uses=1]
+	%113 = and i32 %112, 1023                 ; <i32> [#uses=1]
+	%114 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %113; <i32*> [#uses=1]
+	%115 = load i32* %114, align 4            ; <i32> [#uses=1]
+	%116 = add i32 %111, %115                 ; <i32> [#uses=1]
+	%117 = xor i32 %96, %72                   ; <i32> [#uses=1]
+	%118 = xor i32 %117, %116                 ; <i32> [#uses=5]
+	%119 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 6; <i32*> [#uses=1]
+	%120 = load i32* %119, align 4            ; <i32> [#uses=1]
+	%121 = lshr i32 %118, 24                  ; <i32> [#uses=1]
+	%122 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %121; <i32*> [#uses=1]
+	%123 = load i32* %122, align 4            ; <i32> [#uses=1]
+	%124 = lshr i32 %118, 16                  ; <i32> [#uses=1]
+	%125 = or i32 %124, 256                   ; <i32> [#uses=1]
+	%126 = and i32 %125, 511                  ; <i32> [#uses=1]
+	%127 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %126; <i32*> [#uses=1]
+	%128 = load i32* %127, align 4            ; <i32> [#uses=1]
+	%129 = add i32 %128, %123                 ; <i32> [#uses=1]
+	%130 = lshr i32 %118, 8                   ; <i32> [#uses=1]
+	%131 = or i32 %130, 512                   ; <i32> [#uses=1]
+	%132 = and i32 %131, 767                  ; <i32> [#uses=1]
+	%133 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %132; <i32*> [#uses=1]
+	%134 = load i32* %133, align 4            ; <i32> [#uses=1]
+	%135 = xor i32 %129, %134                 ; <i32> [#uses=1]
+	%136 = or i32 %118, 768                   ; <i32> [#uses=1]
+	%137 = and i32 %136, 1023                 ; <i32> [#uses=1]
+	%138 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %137; <i32*> [#uses=1]
+	%139 = load i32* %138, align 4            ; <i32> [#uses=1]
+	%140 = add i32 %135, %139                 ; <i32> [#uses=1]
+	%141 = xor i32 %120, %95                  ; <i32> [#uses=1]
+	%142 = xor i32 %141, %140                 ; <i32> [#uses=5]
+	%143 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 7; <i32*> [#uses=1]
+	%144 = load i32* %143, align 4            ; <i32> [#uses=1]
+	%145 = lshr i32 %142, 24                  ; <i32> [#uses=1]
+	%146 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %145; <i32*> [#uses=1]
+	%147 = load i32* %146, align 4            ; <i32> [#uses=1]
+	%148 = lshr i32 %142, 16                  ; <i32> [#uses=1]
+	%149 = or i32 %148, 256                   ; <i32> [#uses=1]
+	%150 = and i32 %149, 511                  ; <i32> [#uses=1]
+	%151 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %150; <i32*> [#uses=1]
+	%152 = load i32* %151, align 4            ; <i32> [#uses=1]
+	%153 = add i32 %152, %147                 ; <i32> [#uses=1]
+	%154 = lshr i32 %142, 8                   ; <i32> [#uses=1]
+	%155 = or i32 %154, 512                   ; <i32> [#uses=1]
+	%156 = and i32 %155, 767                  ; <i32> [#uses=1]
+	%157 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %156; <i32*> [#uses=1]
+	%158 = load i32* %157, align 4            ; <i32> [#uses=1]
+	%159 = xor i32 %153, %158                 ; <i32> [#uses=1]
+	%160 = or i32 %142, 768                   ; <i32> [#uses=1]
+	%161 = and i32 %160, 1023                 ; <i32> [#uses=1]
+	%162 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %161; <i32*> [#uses=1]
+	%163 = load i32* %162, align 4            ; <i32> [#uses=1]
+	%164 = add i32 %159, %163                 ; <i32> [#uses=1]
+	%165 = xor i32 %144, %118                 ; <i32> [#uses=1]
+	%166 = xor i32 %165, %164                 ; <i32> [#uses=5]
+	%167 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%168 = lshr i32 %166, 24                  ; <i32> [#uses=1]
+	%169 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %168; <i32*> [#uses=1]
+	%170 = load i32* %169, align 4            ; <i32> [#uses=1]
+	%171 = lshr i32 %166, 16                  ; <i32> [#uses=1]
+	%172 = or i32 %171, 256                   ; <i32> [#uses=1]
+	%173 = and i32 %172, 511                  ; <i32> [#uses=1]
+	%174 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %173; <i32*> [#uses=1]
+	%175 = load i32* %174, align 4            ; <i32> [#uses=1]
+	%176 = add i32 %175, %170                 ; <i32> [#uses=1]
+	%177 = lshr i32 %166, 8                   ; <i32> [#uses=1]
+	%178 = or i32 %177, 512                   ; <i32> [#uses=1]
+	%179 = and i32 %178, 767                  ; <i32> [#uses=1]
+	%180 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %179; <i32*> [#uses=1]
+	%181 = load i32* %180, align 4            ; <i32> [#uses=1]
+	%182 = xor i32 %176, %181                 ; <i32> [#uses=1]
+	%183 = or i32 %166, 768                   ; <i32> [#uses=1]
+	%184 = and i32 %183, 1023                 ; <i32> [#uses=1]
+	%185 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %184; <i32*> [#uses=1]
+	%186 = load i32* %185, align 4            ; <i32> [#uses=1]
+	%187 = add i32 %182, %186                 ; <i32> [#uses=1]
+	%188 = xor i32 %167, %142                 ; <i32> [#uses=1]
+	%189 = xor i32 %188, %187                 ; <i32> [#uses=5]
+	%190 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 9; <i32*> [#uses=1]
+	%191 = load i32* %190, align 4            ; <i32> [#uses=1]
+	%192 = lshr i32 %189, 24                  ; <i32> [#uses=1]
+	%193 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %192; <i32*> [#uses=1]
+	%194 = load i32* %193, align 4            ; <i32> [#uses=1]
+	%195 = lshr i32 %189, 16                  ; <i32> [#uses=1]
+	%196 = or i32 %195, 256                   ; <i32> [#uses=1]
+	%197 = and i32 %196, 511                  ; <i32> [#uses=1]
+	%198 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %197; <i32*> [#uses=1]
+	%199 = load i32* %198, align 4            ; <i32> [#uses=1]
+	%200 = add i32 %199, %194                 ; <i32> [#uses=1]
+	%201 = lshr i32 %189, 8                   ; <i32> [#uses=1]
+	%202 = or i32 %201, 512                   ; <i32> [#uses=1]
+	%203 = and i32 %202, 767                  ; <i32> [#uses=1]
+	%204 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %203; <i32*> [#uses=1]
+	%205 = load i32* %204, align 4            ; <i32> [#uses=1]
+	%206 = xor i32 %200, %205                 ; <i32> [#uses=1]
+	%207 = or i32 %189, 768                   ; <i32> [#uses=1]
+	%208 = and i32 %207, 1023                 ; <i32> [#uses=1]
+	%209 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %208; <i32*> [#uses=1]
+	%210 = load i32* %209, align 4            ; <i32> [#uses=1]
+	%211 = add i32 %206, %210                 ; <i32> [#uses=1]
+	%212 = xor i32 %191, %166                 ; <i32> [#uses=1]
+	%213 = xor i32 %212, %211                 ; <i32> [#uses=5]
+	%214 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 10; <i32*> [#uses=1]
+	%215 = load i32* %214, align 4            ; <i32> [#uses=1]
+	%216 = lshr i32 %213, 24                  ; <i32> [#uses=1]
+	%217 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %216; <i32*> [#uses=1]
+	%218 = load i32* %217, align 4            ; <i32> [#uses=1]
+	%219 = lshr i32 %213, 16                  ; <i32> [#uses=1]
+	%220 = or i32 %219, 256                   ; <i32> [#uses=1]
+	%221 = and i32 %220, 511                  ; <i32> [#uses=1]
+	%222 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %221; <i32*> [#uses=1]
+	%223 = load i32* %222, align 4            ; <i32> [#uses=1]
+	%224 = add i32 %223, %218                 ; <i32> [#uses=1]
+	%225 = lshr i32 %213, 8                   ; <i32> [#uses=1]
+	%226 = or i32 %225, 512                   ; <i32> [#uses=1]
+	%227 = and i32 %226, 767                  ; <i32> [#uses=1]
+	%228 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %227; <i32*> [#uses=1]
+	%229 = load i32* %228, align 4            ; <i32> [#uses=1]
+	%230 = xor i32 %224, %229                 ; <i32> [#uses=1]
+	%231 = or i32 %213, 768                   ; <i32> [#uses=1]
+	%232 = and i32 %231, 1023                 ; <i32> [#uses=1]
+	%233 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %232; <i32*> [#uses=1]
+	%234 = load i32* %233, align 4            ; <i32> [#uses=1]
+	%235 = add i32 %230, %234                 ; <i32> [#uses=1]
+	%236 = xor i32 %215, %189                 ; <i32> [#uses=1]
+	%237 = xor i32 %236, %235                 ; <i32> [#uses=5]
+	%238 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 11; <i32*> [#uses=1]
+	%239 = load i32* %238, align 4            ; <i32> [#uses=1]
+	%240 = lshr i32 %237, 24                  ; <i32> [#uses=1]
+	%241 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %240; <i32*> [#uses=1]
+	%242 = load i32* %241, align 4            ; <i32> [#uses=1]
+	%243 = lshr i32 %237, 16                  ; <i32> [#uses=1]
+	%244 = or i32 %243, 256                   ; <i32> [#uses=1]
+	%245 = and i32 %244, 511                  ; <i32> [#uses=1]
+	%246 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %245; <i32*> [#uses=1]
+	%247 = load i32* %246, align 4            ; <i32> [#uses=1]
+	%248 = add i32 %247, %242                 ; <i32> [#uses=1]
+	%249 = lshr i32 %237, 8                   ; <i32> [#uses=1]
+	%250 = or i32 %249, 512                   ; <i32> [#uses=1]
+	%251 = and i32 %250, 767                  ; <i32> [#uses=1]
+	%252 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %251; <i32*> [#uses=1]
+	%253 = load i32* %252, align 4            ; <i32> [#uses=1]
+	%254 = xor i32 %248, %253                 ; <i32> [#uses=1]
+	%255 = or i32 %237, 768                   ; <i32> [#uses=1]
+	%256 = and i32 %255, 1023                 ; <i32> [#uses=1]
+	%257 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %256; <i32*> [#uses=1]
+	%258 = load i32* %257, align 4            ; <i32> [#uses=1]
+	%259 = add i32 %254, %258                 ; <i32> [#uses=1]
+	%260 = xor i32 %239, %213                 ; <i32> [#uses=1]
+	%261 = xor i32 %260, %259                 ; <i32> [#uses=5]
+	%262 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%263 = lshr i32 %261, 24                  ; <i32> [#uses=1]
+	%264 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %263; <i32*> [#uses=1]
+	%265 = load i32* %264, align 4            ; <i32> [#uses=1]
+	%266 = lshr i32 %261, 16                  ; <i32> [#uses=1]
+	%267 = or i32 %266, 256                   ; <i32> [#uses=1]
+	%268 = and i32 %267, 511                  ; <i32> [#uses=1]
+	%269 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %268; <i32*> [#uses=1]
+	%270 = load i32* %269, align 4            ; <i32> [#uses=1]
+	%271 = add i32 %270, %265                 ; <i32> [#uses=1]
+	%272 = lshr i32 %261, 8                   ; <i32> [#uses=1]
+	%273 = or i32 %272, 512                   ; <i32> [#uses=1]
+	%274 = and i32 %273, 767                  ; <i32> [#uses=1]
+	%275 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %274; <i32*> [#uses=1]
+	%276 = load i32* %275, align 4            ; <i32> [#uses=1]
+	%277 = xor i32 %271, %276                 ; <i32> [#uses=1]
+	%278 = or i32 %261, 768                   ; <i32> [#uses=1]
+	%279 = and i32 %278, 1023                 ; <i32> [#uses=1]
+	%280 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %279; <i32*> [#uses=1]
+	%281 = load i32* %280, align 4            ; <i32> [#uses=1]
+	%282 = add i32 %277, %281                 ; <i32> [#uses=1]
+	%283 = xor i32 %262, %237                 ; <i32> [#uses=1]
+	%284 = xor i32 %283, %282                 ; <i32> [#uses=4]
+	%285 = load i32* null, align 4            ; <i32> [#uses=1]
+	%286 = lshr i32 %284, 24                  ; <i32> [#uses=1]
+	%287 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %286; <i32*> [#uses=1]
+	%288 = load i32* %287, align 4            ; <i32> [#uses=1]
+	%289 = lshr i32 %284, 16                  ; <i32> [#uses=1]
+	%290 = or i32 %289, 256                   ; <i32> [#uses=1]
+	%291 = and i32 %290, 511                  ; <i32> [#uses=1]
+	%292 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %291; <i32*> [#uses=1]
+	%293 = load i32* %292, align 4            ; <i32> [#uses=1]
+	%294 = add i32 %293, %288                 ; <i32> [#uses=1]
+	%295 = lshr i32 %284, 8                   ; <i32> [#uses=1]
+	%296 = or i32 %295, 512                   ; <i32> [#uses=1]
+	%297 = and i32 %296, 767                  ; <i32> [#uses=1]
+	%298 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %297; <i32*> [#uses=1]
+	%299 = load i32* %298, align 4            ; <i32> [#uses=1]
+	%300 = xor i32 %294, %299                 ; <i32> [#uses=1]
+	%301 = or i32 %284, 768                   ; <i32> [#uses=1]
+	%302 = and i32 %301, 1023                 ; <i32> [#uses=1]
+	%303 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %302; <i32*> [#uses=1]
+	%304 = load i32* %303, align 4            ; <i32> [#uses=1]
+	%305 = add i32 %300, %304                 ; <i32> [#uses=1]
+	%306 = xor i32 %285, %261                 ; <i32> [#uses=1]
+	%307 = xor i32 %306, %305                 ; <i32> [#uses=1]
+	%308 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 15; <i32*> [#uses=1]
+	%309 = load i32* %308, align 4            ; <i32> [#uses=1]
+	%310 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 0; <i32*> [#uses=1]
+	%311 = load i32* %310, align 4            ; <i32> [#uses=1]
+	%312 = or i32 0, 256                      ; <i32> [#uses=1]
+	%313 = and i32 %312, 511                  ; <i32> [#uses=1]
+	%314 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %313; <i32*> [#uses=1]
+	%315 = load i32* %314, align 4            ; <i32> [#uses=1]
+	%316 = add i32 %315, %311                 ; <i32> [#uses=1]
+	%317 = or i32 0, 512                      ; <i32> [#uses=1]
+	%318 = and i32 %317, 767                  ; <i32> [#uses=1]
+	%319 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %318; <i32*> [#uses=1]
+	%320 = load i32* %319, align 4            ; <i32> [#uses=1]
+	%321 = xor i32 %316, %320                 ; <i32> [#uses=1]
+	%322 = or i32 0, 768                      ; <i32> [#uses=1]
+	%323 = and i32 %322, 1023                 ; <i32> [#uses=1]
+	%324 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %323; <i32*> [#uses=1]
+	%325 = load i32* %324, align 4            ; <i32> [#uses=1]
+	%326 = add i32 %321, %325                 ; <i32> [#uses=1]
+	%327 = xor i32 %309, %307                 ; <i32> [#uses=1]
+	%328 = xor i32 %327, %326                 ; <i32> [#uses=5]
+	%329 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 17; <i32*> [#uses=1]
+	br label %bb2
+
+bb1:                                              ; preds = %entry
+	%330 = load i32* null, align 4            ; <i32> [#uses=1]
+	%331 = xor i32 %330, %1                   ; <i32> [#uses=4]
+	%332 = load i32* null, align 4            ; <i32> [#uses=1]
+	%333 = lshr i32 %331, 24                  ; <i32> [#uses=1]
+	%334 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %333; <i32*> [#uses=1]
+	%335 = load i32* %334, align 4            ; <i32> [#uses=1]
+	%336 = load i32* null, align 4            ; <i32> [#uses=1]
+	%337 = add i32 %336, %335                 ; <i32> [#uses=1]
+	%338 = lshr i32 %331, 8                   ; <i32> [#uses=1]
+	%339 = or i32 %338, 512                   ; <i32> [#uses=1]
+	%340 = and i32 %339, 767                  ; <i32> [#uses=1]
+	%341 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %340; <i32*> [#uses=1]
+	%342 = load i32* %341, align 4            ; <i32> [#uses=1]
+	%343 = xor i32 %337, %342                 ; <i32> [#uses=1]
+	%344 = or i32 %331, 768                   ; <i32> [#uses=1]
+	%345 = and i32 %344, 1023                 ; <i32> [#uses=1]
+	%346 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %345; <i32*> [#uses=1]
+	%347 = load i32* %346, align 4            ; <i32> [#uses=1]
+	%348 = add i32 %343, %347                 ; <i32> [#uses=1]
+	%349 = xor i32 %332, %2                   ; <i32> [#uses=1]
+	%350 = xor i32 %349, %348                 ; <i32> [#uses=5]
+	%351 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 15; <i32*> [#uses=1]
+	%352 = load i32* %351, align 4            ; <i32> [#uses=1]
+	%353 = lshr i32 %350, 24                  ; <i32> [#uses=1]
+	%354 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %353; <i32*> [#uses=1]
+	%355 = load i32* %354, align 4            ; <i32> [#uses=1]
+	%356 = lshr i32 %350, 16                  ; <i32> [#uses=1]
+	%357 = or i32 %356, 256                   ; <i32> [#uses=1]
+	%358 = and i32 %357, 511                  ; <i32> [#uses=1]
+	%359 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %358; <i32*> [#uses=1]
+	%360 = load i32* %359, align 4            ; <i32> [#uses=1]
+	%361 = add i32 %360, %355                 ; <i32> [#uses=1]
+	%362 = lshr i32 %350, 8                   ; <i32> [#uses=1]
+	%363 = or i32 %362, 512                   ; <i32> [#uses=1]
+	%364 = and i32 %363, 767                  ; <i32> [#uses=1]
+	%365 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %364; <i32*> [#uses=1]
+	%366 = load i32* %365, align 4            ; <i32> [#uses=1]
+	%367 = xor i32 %361, %366                 ; <i32> [#uses=1]
+	%368 = or i32 %350, 768                   ; <i32> [#uses=1]
+	%369 = and i32 %368, 1023                 ; <i32> [#uses=1]
+	%370 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %369; <i32*> [#uses=1]
+	%371 = load i32* %370, align 4            ; <i32> [#uses=1]
+	%372 = add i32 %367, %371                 ; <i32> [#uses=1]
+	%373 = xor i32 %352, %331                 ; <i32> [#uses=1]
+	%374 = xor i32 %373, %372                 ; <i32> [#uses=5]
+	%375 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 14; <i32*> [#uses=1]
+	%376 = load i32* %375, align 4            ; <i32> [#uses=1]
+	%377 = lshr i32 %374, 24                  ; <i32> [#uses=1]
+	%378 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %377; <i32*> [#uses=1]
+	%379 = load i32* %378, align 4            ; <i32> [#uses=1]
+	%380 = lshr i32 %374, 16                  ; <i32> [#uses=1]
+	%381 = or i32 %380, 256                   ; <i32> [#uses=1]
+	%382 = and i32 %381, 511                  ; <i32> [#uses=1]
+	%383 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %382; <i32*> [#uses=1]
+	%384 = load i32* %383, align 4            ; <i32> [#uses=1]
+	%385 = add i32 %384, %379                 ; <i32> [#uses=1]
+	%386 = lshr i32 %374, 8                   ; <i32> [#uses=1]
+	%387 = or i32 %386, 512                   ; <i32> [#uses=1]
+	%388 = and i32 %387, 767                  ; <i32> [#uses=1]
+	%389 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %388; <i32*> [#uses=1]
+	%390 = load i32* %389, align 4            ; <i32> [#uses=1]
+	%391 = xor i32 %385, %390                 ; <i32> [#uses=1]
+	%392 = or i32 %374, 768                   ; <i32> [#uses=1]
+	%393 = and i32 %392, 1023                 ; <i32> [#uses=1]
+	%394 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %393; <i32*> [#uses=1]
+	%395 = load i32* %394, align 4            ; <i32> [#uses=1]
+	%396 = add i32 %391, %395                 ; <i32> [#uses=1]
+	%397 = xor i32 %376, %350                 ; <i32> [#uses=1]
+	%398 = xor i32 %397, %396                 ; <i32> [#uses=5]
+	%399 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 13; <i32*> [#uses=1]
+	%400 = load i32* %399, align 4            ; <i32> [#uses=1]
+	%401 = lshr i32 %398, 24                  ; <i32> [#uses=1]
+	%402 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %401; <i32*> [#uses=1]
+	%403 = load i32* %402, align 4            ; <i32> [#uses=1]
+	%404 = lshr i32 %398, 16                  ; <i32> [#uses=1]
+	%405 = or i32 %404, 256                   ; <i32> [#uses=1]
+	%406 = and i32 %405, 511                  ; <i32> [#uses=1]
+	%407 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %406; <i32*> [#uses=1]
+	%408 = load i32* %407, align 4            ; <i32> [#uses=1]
+	%409 = add i32 %408, %403                 ; <i32> [#uses=1]
+	%410 = lshr i32 %398, 8                   ; <i32> [#uses=1]
+	%411 = or i32 %410, 512                   ; <i32> [#uses=1]
+	%412 = and i32 %411, 767                  ; <i32> [#uses=1]
+	%413 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %412; <i32*> [#uses=1]
+	%414 = load i32* %413, align 4            ; <i32> [#uses=1]
+	%415 = xor i32 %409, %414                 ; <i32> [#uses=1]
+	%416 = or i32 %398, 768                   ; <i32> [#uses=1]
+	%417 = and i32 %416, 1023                 ; <i32> [#uses=1]
+	%418 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %417; <i32*> [#uses=1]
+	%419 = load i32* %418, align 4            ; <i32> [#uses=1]
+	%420 = add i32 %415, %419                 ; <i32> [#uses=1]
+	%421 = xor i32 %400, %374                 ; <i32> [#uses=1]
+	%422 = xor i32 %421, %420                 ; <i32> [#uses=5]
+	%423 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 12; <i32*> [#uses=1]
+	%424 = load i32* %423, align 4            ; <i32> [#uses=1]
+	%425 = lshr i32 %422, 24                  ; <i32> [#uses=1]
+	%426 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %425; <i32*> [#uses=1]
+	%427 = load i32* %426, align 4            ; <i32> [#uses=1]
+	%428 = lshr i32 %422, 16                  ; <i32> [#uses=1]
+	%429 = or i32 %428, 256                   ; <i32> [#uses=1]
+	%430 = and i32 %429, 511                  ; <i32> [#uses=1]
+	%431 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %430; <i32*> [#uses=1]
+	%432 = load i32* %431, align 4            ; <i32> [#uses=1]
+	%433 = add i32 %432, %427                 ; <i32> [#uses=1]
+	%434 = lshr i32 %422, 8                   ; <i32> [#uses=1]
+	%435 = or i32 %434, 512                   ; <i32> [#uses=1]
+	%436 = and i32 %435, 767                  ; <i32> [#uses=1]
+	%437 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %436; <i32*> [#uses=1]
+	%438 = load i32* %437, align 4            ; <i32> [#uses=1]
+	%439 = xor i32 %433, %438                 ; <i32> [#uses=1]
+	%440 = or i32 %422, 768                   ; <i32> [#uses=1]
+	%441 = and i32 %440, 1023                 ; <i32> [#uses=1]
+	%442 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %441; <i32*> [#uses=1]
+	%443 = load i32* %442, align 4            ; <i32> [#uses=1]
+	%444 = add i32 %439, %443                 ; <i32> [#uses=1]
+	%445 = xor i32 %424, %398                 ; <i32> [#uses=1]
+	%446 = xor i32 %445, %444                 ; <i32> [#uses=5]
+	%447 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%448 = lshr i32 %446, 24                  ; <i32> [#uses=1]
+	%449 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %448; <i32*> [#uses=1]
+	%450 = load i32* %449, align 4            ; <i32> [#uses=1]
+	%451 = lshr i32 %446, 16                  ; <i32> [#uses=1]
+	%452 = or i32 %451, 256                   ; <i32> [#uses=1]
+	%453 = and i32 %452, 511                  ; <i32> [#uses=1]
+	%454 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %453; <i32*> [#uses=1]
+	%455 = load i32* %454, align 4            ; <i32> [#uses=1]
+	%456 = add i32 %455, %450                 ; <i32> [#uses=1]
+	%457 = lshr i32 %446, 8                   ; <i32> [#uses=1]
+	%458 = or i32 %457, 512                   ; <i32> [#uses=1]
+	%459 = and i32 %458, 767                  ; <i32> [#uses=1]
+	%460 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %459; <i32*> [#uses=1]
+	%461 = load i32* %460, align 4            ; <i32> [#uses=1]
+	%462 = xor i32 %456, %461                 ; <i32> [#uses=1]
+	%463 = or i32 %446, 768                   ; <i32> [#uses=1]
+	%464 = and i32 %463, 1023                 ; <i32> [#uses=1]
+	%465 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %464; <i32*> [#uses=1]
+	%466 = load i32* %465, align 4            ; <i32> [#uses=1]
+	%467 = add i32 %462, %466                 ; <i32> [#uses=1]
+	%468 = xor i32 %447, %422                 ; <i32> [#uses=1]
+	%469 = xor i32 %468, %467                 ; <i32> [#uses=5]
+	%470 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 10; <i32*> [#uses=1]
+	%471 = load i32* %470, align 4            ; <i32> [#uses=1]
+	%472 = lshr i32 %469, 24                  ; <i32> [#uses=1]
+	%473 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %472; <i32*> [#uses=1]
+	%474 = load i32* %473, align 4            ; <i32> [#uses=1]
+	%475 = lshr i32 %469, 16                  ; <i32> [#uses=1]
+	%476 = or i32 %475, 256                   ; <i32> [#uses=1]
+	%477 = and i32 %476, 511                  ; <i32> [#uses=1]
+	%478 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %477; <i32*> [#uses=1]
+	%479 = load i32* %478, align 4            ; <i32> [#uses=1]
+	%480 = add i32 %479, %474                 ; <i32> [#uses=1]
+	%481 = lshr i32 %469, 8                   ; <i32> [#uses=1]
+	%482 = or i32 %481, 512                   ; <i32> [#uses=1]
+	%483 = and i32 %482, 767                  ; <i32> [#uses=1]
+	%484 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %483; <i32*> [#uses=1]
+	%485 = load i32* %484, align 4            ; <i32> [#uses=1]
+	%486 = xor i32 %480, %485                 ; <i32> [#uses=1]
+	%487 = or i32 %469, 768                   ; <i32> [#uses=1]
+	%488 = and i32 %487, 1023                 ; <i32> [#uses=1]
+	%489 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %488; <i32*> [#uses=1]
+	%490 = load i32* %489, align 4            ; <i32> [#uses=1]
+	%491 = add i32 %486, %490                 ; <i32> [#uses=1]
+	%492 = xor i32 %471, %446                 ; <i32> [#uses=1]
+	%493 = xor i32 %492, %491                 ; <i32> [#uses=5]
+	%494 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 9; <i32*> [#uses=1]
+	%495 = load i32* %494, align 4            ; <i32> [#uses=1]
+	%496 = lshr i32 %493, 24                  ; <i32> [#uses=1]
+	%497 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %496; <i32*> [#uses=1]
+	%498 = load i32* %497, align 4            ; <i32> [#uses=1]
+	%499 = lshr i32 %493, 16                  ; <i32> [#uses=1]
+	%500 = or i32 %499, 256                   ; <i32> [#uses=1]
+	%501 = and i32 %500, 511                  ; <i32> [#uses=1]
+	%502 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %501; <i32*> [#uses=1]
+	%503 = load i32* %502, align 4            ; <i32> [#uses=1]
+	%504 = add i32 %503, %498                 ; <i32> [#uses=1]
+	%505 = lshr i32 %493, 8                   ; <i32> [#uses=1]
+	%506 = or i32 %505, 512                   ; <i32> [#uses=1]
+	%507 = and i32 %506, 767                  ; <i32> [#uses=1]
+	%508 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %507; <i32*> [#uses=1]
+	%509 = load i32* %508, align 4            ; <i32> [#uses=1]
+	%510 = xor i32 %504, %509                 ; <i32> [#uses=1]
+	%511 = or i32 %493, 768                   ; <i32> [#uses=1]
+	%512 = and i32 %511, 1023                 ; <i32> [#uses=1]
+	%513 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %512; <i32*> [#uses=1]
+	%514 = load i32* %513, align 4            ; <i32> [#uses=1]
+	%515 = add i32 %510, %514                 ; <i32> [#uses=1]
+	%516 = xor i32 %495, %469                 ; <i32> [#uses=1]
+	%517 = xor i32 %516, %515                 ; <i32> [#uses=5]
+	%518 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 8; <i32*> [#uses=1]
+	%519 = load i32* %518, align 4            ; <i32> [#uses=1]
+	%520 = lshr i32 %517, 24                  ; <i32> [#uses=1]
+	%521 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %520; <i32*> [#uses=1]
+	%522 = load i32* %521, align 4            ; <i32> [#uses=1]
+	%523 = lshr i32 %517, 16                  ; <i32> [#uses=1]
+	%524 = or i32 %523, 256                   ; <i32> [#uses=1]
+	%525 = and i32 %524, 511                  ; <i32> [#uses=1]
+	%526 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %525; <i32*> [#uses=1]
+	%527 = load i32* %526, align 4            ; <i32> [#uses=1]
+	%528 = add i32 %527, %522                 ; <i32> [#uses=1]
+	%529 = lshr i32 %517, 8                   ; <i32> [#uses=1]
+	%530 = or i32 %529, 512                   ; <i32> [#uses=1]
+	%531 = and i32 %530, 767                  ; <i32> [#uses=1]
+	%532 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %531; <i32*> [#uses=1]
+	%533 = load i32* %532, align 4            ; <i32> [#uses=1]
+	%534 = xor i32 %528, %533                 ; <i32> [#uses=1]
+	%535 = or i32 %517, 768                   ; <i32> [#uses=1]
+	%536 = and i32 %535, 1023                 ; <i32> [#uses=1]
+	%537 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %536; <i32*> [#uses=1]
+	%538 = load i32* %537, align 4            ; <i32> [#uses=1]
+	%539 = add i32 %534, %538                 ; <i32> [#uses=1]
+	%540 = xor i32 %519, %493                 ; <i32> [#uses=1]
+	%541 = xor i32 %540, %539                 ; <i32> [#uses=5]
+	%542 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%543 = lshr i32 %541, 24                  ; <i32> [#uses=1]
+	%544 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %543; <i32*> [#uses=1]
+	%545 = load i32* %544, align 4            ; <i32> [#uses=1]
+	%546 = lshr i32 %541, 16                  ; <i32> [#uses=1]
+	%547 = or i32 %546, 256                   ; <i32> [#uses=1]
+	%548 = and i32 %547, 511                  ; <i32> [#uses=1]
+	%549 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %548; <i32*> [#uses=1]
+	%550 = load i32* %549, align 4            ; <i32> [#uses=1]
+	%551 = add i32 %550, %545                 ; <i32> [#uses=1]
+	%552 = lshr i32 %541, 8                   ; <i32> [#uses=1]
+	%553 = or i32 %552, 512                   ; <i32> [#uses=1]
+	%554 = and i32 %553, 767                  ; <i32> [#uses=1]
+	%555 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %554; <i32*> [#uses=1]
+	%556 = load i32* %555, align 4            ; <i32> [#uses=1]
+	%557 = xor i32 %551, %556                 ; <i32> [#uses=1]
+	%558 = or i32 %541, 768                   ; <i32> [#uses=1]
+	%559 = and i32 %558, 1023                 ; <i32> [#uses=1]
+	%560 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %559; <i32*> [#uses=1]
+	%561 = load i32* %560, align 4            ; <i32> [#uses=1]
+	%562 = add i32 %557, %561                 ; <i32> [#uses=1]
+	%563 = xor i32 %542, %517                 ; <i32> [#uses=1]
+	%564 = xor i32 %563, %562                 ; <i32> [#uses=5]
+	%565 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 6; <i32*> [#uses=1]
+	%566 = load i32* %565, align 4            ; <i32> [#uses=1]
+	%567 = lshr i32 %564, 24                  ; <i32> [#uses=1]
+	%568 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %567; <i32*> [#uses=1]
+	%569 = load i32* %568, align 4            ; <i32> [#uses=1]
+	%570 = lshr i32 %564, 16                  ; <i32> [#uses=1]
+	%571 = or i32 %570, 256                   ; <i32> [#uses=1]
+	%572 = and i32 %571, 511                  ; <i32> [#uses=1]
+	%573 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %572; <i32*> [#uses=1]
+	%574 = load i32* %573, align 4            ; <i32> [#uses=1]
+	%575 = add i32 %574, %569                 ; <i32> [#uses=1]
+	%576 = lshr i32 %564, 8                   ; <i32> [#uses=1]
+	%577 = or i32 %576, 512                   ; <i32> [#uses=1]
+	%578 = and i32 %577, 767                  ; <i32> [#uses=1]
+	%579 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %578; <i32*> [#uses=1]
+	%580 = load i32* %579, align 4            ; <i32> [#uses=1]
+	%581 = xor i32 %575, %580                 ; <i32> [#uses=1]
+	%582 = or i32 %564, 768                   ; <i32> [#uses=1]
+	%583 = and i32 %582, 1023                 ; <i32> [#uses=1]
+	%584 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %583; <i32*> [#uses=1]
+	%585 = load i32* %584, align 4            ; <i32> [#uses=1]
+	%586 = add i32 %581, %585                 ; <i32> [#uses=1]
+	%587 = xor i32 %566, %541                 ; <i32> [#uses=1]
+	%588 = xor i32 %587, %586                 ; <i32> [#uses=5]
+	%589 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 5; <i32*> [#uses=1]
+	%590 = load i32* %589, align 4            ; <i32> [#uses=1]
+	%591 = lshr i32 %588, 24                  ; <i32> [#uses=1]
+	%592 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %591; <i32*> [#uses=1]
+	%593 = load i32* %592, align 4            ; <i32> [#uses=1]
+	%594 = lshr i32 %588, 16                  ; <i32> [#uses=1]
+	%595 = or i32 %594, 256                   ; <i32> [#uses=1]
+	%596 = and i32 %595, 511                  ; <i32> [#uses=1]
+	%597 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %596; <i32*> [#uses=1]
+	%598 = load i32* %597, align 4            ; <i32> [#uses=1]
+	%599 = add i32 %598, %593                 ; <i32> [#uses=1]
+	%600 = lshr i32 %588, 8                   ; <i32> [#uses=1]
+	%601 = or i32 %600, 512                   ; <i32> [#uses=1]
+	%602 = and i32 %601, 767                  ; <i32> [#uses=1]
+	%603 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %602; <i32*> [#uses=1]
+	%604 = load i32* %603, align 4            ; <i32> [#uses=1]
+	%605 = xor i32 %599, %604                 ; <i32> [#uses=1]
+	%606 = or i32 %588, 768                   ; <i32> [#uses=1]
+	%607 = and i32 %606, 1023                 ; <i32> [#uses=1]
+	%608 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %607; <i32*> [#uses=1]
+	%609 = load i32* %608, align 4            ; <i32> [#uses=1]
+	%610 = add i32 %605, %609                 ; <i32> [#uses=1]
+	%611 = xor i32 %590, %564                 ; <i32> [#uses=1]
+	%612 = xor i32 %611, %610                 ; <i32> [#uses=5]
+	%613 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 4; <i32*> [#uses=1]
+	%614 = load i32* %613, align 4            ; <i32> [#uses=1]
+	%615 = lshr i32 %612, 24                  ; <i32> [#uses=1]
+	%616 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %615; <i32*> [#uses=1]
+	%617 = load i32* %616, align 4            ; <i32> [#uses=1]
+	%618 = lshr i32 %612, 16                  ; <i32> [#uses=1]
+	%619 = or i32 %618, 256                   ; <i32> [#uses=1]
+	%620 = and i32 %619, 511                  ; <i32> [#uses=1]
+	%621 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %620; <i32*> [#uses=1]
+	%622 = load i32* %621, align 4            ; <i32> [#uses=1]
+	%623 = add i32 %622, %617                 ; <i32> [#uses=1]
+	%624 = lshr i32 %612, 8                   ; <i32> [#uses=1]
+	%625 = or i32 %624, 512                   ; <i32> [#uses=1]
+	%626 = and i32 %625, 767                  ; <i32> [#uses=1]
+	%627 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %626; <i32*> [#uses=1]
+	%628 = load i32* %627, align 4            ; <i32> [#uses=1]
+	%629 = xor i32 %623, %628                 ; <i32> [#uses=1]
+	%630 = or i32 %612, 768                   ; <i32> [#uses=1]
+	%631 = and i32 %630, 1023                 ; <i32> [#uses=1]
+	%632 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %631; <i32*> [#uses=1]
+	%633 = load i32* %632, align 4            ; <i32> [#uses=1]
+	%634 = add i32 %629, %633                 ; <i32> [#uses=1]
+	%635 = xor i32 %614, %588                 ; <i32> [#uses=1]
+	%636 = xor i32 %635, %634                 ; <i32> [#uses=5]
+	%637 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 3; <i32*> [#uses=1]
+	%638 = load i32* %637, align 4            ; <i32> [#uses=1]
+	%639 = lshr i32 %636, 24                  ; <i32> [#uses=1]
+	%640 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %639; <i32*> [#uses=1]
+	%641 = load i32* %640, align 4            ; <i32> [#uses=1]
+	%642 = lshr i32 %636, 16                  ; <i32> [#uses=1]
+	%643 = or i32 %642, 256                   ; <i32> [#uses=1]
+	%644 = and i32 %643, 511                  ; <i32> [#uses=1]
+	%645 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %644; <i32*> [#uses=1]
+	%646 = load i32* %645, align 4            ; <i32> [#uses=1]
+	%647 = add i32 %646, %641                 ; <i32> [#uses=1]
+	%648 = lshr i32 %636, 8                   ; <i32> [#uses=1]
+	%649 = or i32 %648, 512                   ; <i32> [#uses=1]
+	%650 = and i32 %649, 767                  ; <i32> [#uses=1]
+	%651 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %650; <i32*> [#uses=1]
+	%652 = load i32* %651, align 4            ; <i32> [#uses=1]
+	%653 = xor i32 %647, %652                 ; <i32> [#uses=1]
+	%654 = or i32 %636, 768                   ; <i32> [#uses=1]
+	%655 = and i32 %654, 1023                 ; <i32> [#uses=1]
+	%656 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %655; <i32*> [#uses=1]
+	%657 = load i32* %656, align 4            ; <i32> [#uses=1]
+	%658 = add i32 %653, %657                 ; <i32> [#uses=1]
+	%659 = xor i32 %638, %612                 ; <i32> [#uses=1]
+	%660 = xor i32 %659, %658                 ; <i32> [#uses=5]
+	%661 = load i32* undef, align 4           ; <i32> [#uses=1]
+	%662 = lshr i32 %660, 24                  ; <i32> [#uses=1]
+	%663 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %662; <i32*> [#uses=1]
+	%664 = load i32* %663, align 4            ; <i32> [#uses=1]
+	%665 = lshr i32 %660, 16                  ; <i32> [#uses=1]
+	%666 = or i32 %665, 256                   ; <i32> [#uses=1]
+	%667 = and i32 %666, 511                  ; <i32> [#uses=1]
+	%668 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %667; <i32*> [#uses=1]
+	%669 = load i32* %668, align 4            ; <i32> [#uses=1]
+	%670 = add i32 %669, %664                 ; <i32> [#uses=1]
+	%671 = lshr i32 %660, 8                   ; <i32> [#uses=1]
+	%672 = or i32 %671, 512                   ; <i32> [#uses=1]
+	%673 = and i32 %672, 767                  ; <i32> [#uses=1]
+	%674 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %673; <i32*> [#uses=1]
+	%675 = load i32* %674, align 4            ; <i32> [#uses=1]
+	%676 = xor i32 %670, %675                 ; <i32> [#uses=1]
+	%677 = or i32 %660, 768                   ; <i32> [#uses=1]
+	%678 = and i32 %677, 1023                 ; <i32> [#uses=1]
+	%679 = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %678; <i32*> [#uses=1]
+	%680 = load i32* %679, align 4            ; <i32> [#uses=1]
+	%681 = add i32 %676, %680                 ; <i32> [#uses=1]
+	%682 = xor i32 %661, %636                 ; <i32> [#uses=1]
+	%683 = xor i32 %682, %681                 ; <i32> [#uses=5]
+	%684 = getelementptr %struct.BF_KEY* %key, i32 0, i32 0, i32 1; <i32*> [#uses=1]
+	br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+	%.pn2.in = phi i32* [ %329, %bb ], [ %0, %bb1 ]; <i32*> [#uses=1]
+	%.pn3 = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn15.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn14.in.in.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn13.in.in.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn10.in.in = phi i32 [ %328, %bb ], [ %683, %bb1 ]; <i32> [#uses=1]
+	%.pn4.in = phi i32* [ null, %bb ], [ %684, %bb1 ]; <i32*> [#uses=1]
+	%.pn5 = phi i32 [ 0, %bb ], [ %660, %bb1 ]; <i32> [#uses=1]
+	%.pn14.in.in = lshr i32 %.pn14.in.in.in, 16; <i32> [#uses=1]
+	%.pn14.in = or i32 %.pn14.in.in, 256      ; <i32> [#uses=1]
+	%.pn13.in.in = lshr i32 %.pn13.in.in.in, 8; <i32> [#uses=1]
+	%.pn15 = lshr i32 %.pn15.in, 24           ; <i32> [#uses=1]
+	%.pn14 = and i32 %.pn14.in, 511           ; <i32> [#uses=1]
+	%.pn13.in = or i32 %.pn13.in.in, 512      ; <i32> [#uses=1]
+	%.pn11.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn15; <i32*> [#uses=1]
+	%.pn12.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn14; <i32*> [#uses=1]
+	%.pn13 = and i32 %.pn13.in, 767           ; <i32> [#uses=1]
+	%.pn10.in = or i32 %.pn10.in.in, 768      ; <i32> [#uses=1]
+	%.pn11 = load i32* %.pn11.in              ; <i32> [#uses=1]
+	%.pn12 = load i32* %.pn12.in              ; <i32> [#uses=1]
+	%.pn9.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn13; <i32*> [#uses=1]
+	%.pn10 = and i32 %.pn10.in, 1023          ; <i32> [#uses=1]
+	%.pn8 = add i32 %.pn12, %.pn11            ; <i32> [#uses=1]
+	%.pn9 = load i32* %.pn9.in                ; <i32> [#uses=1]
+	%.pn7.in = getelementptr %struct.BF_KEY* %key, i32 0, i32 1, i32 %.pn10; <i32*> [#uses=1]
+	%.pn6 = xor i32 %.pn8, %.pn9              ; <i32> [#uses=1]
+	%.pn7 = load i32* %.pn7.in                ; <i32> [#uses=1]
+	%.pn4 = load i32* %.pn4.in                ; <i32> [#uses=1]
+	%.pn2 = load i32* %.pn2.in                ; <i32> [#uses=1]
+	%.pn = add i32 %.pn6, %.pn7               ; <i32> [#uses=1]
+	%r.0 = xor i32 %.pn2, %.pn3               ; <i32> [#uses=1]
+	%.pn1 = xor i32 %.pn, %.pn5               ; <i32> [#uses=1]
+	%l.0 = xor i32 %.pn1, %.pn4               ; <i32> [#uses=1]
+	store i32 %l.0, i32* undef, align 4
+	store i32 %r.0, i32* %data, align 4
+	ret void
+}
diff --git a/final/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll b/final/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll
new file mode 100644
index 00000000000..041306db9f0
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin
+
+	%struct.vorbis_comment = type { i8**, i32*, i32, i8* }
+@.str16 = external constant [2 x i8], align 1     ; <[2 x i8]*> [#uses=1]
+
+declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
+
+declare i8* @__strcat_chk(i8*, i8*, i32) nounwind
+
+define i8* @vorbis_comment_query(%struct.vorbis_comment* nocapture %vc, i8* %tag, i32 %count) nounwind {
+entry:
+	%0 = alloca i8, i32 undef, align 4        ; <i8*> [#uses=2]
+	%1 = call  i8* @__strcpy_chk(i8* %0, i8* %tag, i32 -1) nounwind; <i8*> [#uses=0]
+	%2 = call  i8* @__strcat_chk(i8* %0, i8* getelementptr ([2 x i8]* @.str16, i32 0, i32 0), i32 -1) nounwind; <i8*> [#uses=0]
+	%3 = getelementptr %struct.vorbis_comment* %vc, i32 0, i32 0; <i8***> [#uses=1]
+	br label %bb11
+
+bb6:                                              ; preds = %bb11
+	%4 = load i8*** %3, align 4               ; <i8**> [#uses=1]
+	%scevgep = getelementptr i8** %4, i32 %8  ; <i8**> [#uses=1]
+	%5 = load i8** %scevgep, align 4          ; <i8*> [#uses=1]
+	br label %bb3.i
+
+bb3.i:                                            ; preds = %bb3.i, %bb6
+	%scevgep7.i = getelementptr i8* %5, i32 0 ; <i8*> [#uses=1]
+	%6 = load i8* %scevgep7.i, align 1        ; <i8> [#uses=0]
+	br i1 undef, label %bb3.i, label %bb10
+
+bb10:                                             ; preds = %bb3.i
+	%7 = add i32 %8, 1                        ; <i32> [#uses=1]
+	br label %bb11
+
+bb11:                                             ; preds = %bb10, %entry
+	%8 = phi i32 [ %7, %bb10 ], [ 0, %entry ] ; <i32> [#uses=3]
+	%9 = icmp sgt i32 undef, %8               ; <i1> [#uses=1]
+	br i1 %9, label %bb6, label %bb13
+
+bb13:                                             ; preds = %bb11
+	ret i8* null
+}
diff --git a/final/test/CodeGen/Thumb/2009-08-20-ISelBug.ll b/final/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
new file mode 100644
index 00000000000..39612c00e4f
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2009-08-20-ISelBug.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic -disable-fp-elim -mattr=+v6 | FileCheck %s
+; rdar://7157006
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+%struct.asl_file_t = type { i32, i32, i32, %struct.file_string_t*, i64, i64, i64, i64, i64, i64, i32, %struct.FILE*, i8*, i8* }
+%struct.file_string_t = type { i64, i32, %struct.file_string_t*, [0 x i8] }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.asl_file_t*, i64, i64*)* @t to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define i32 @t(%struct.asl_file_t* %s, i64 %off, i64* %out) nounwind optsize {
+; CHECK: t:
+; CHECK: adds r0, #8
+entry:
+  %val = alloca i64, align 4                      ; <i64*> [#uses=3]
+  %0 = icmp eq %struct.asl_file_t* %s, null       ; <i1> [#uses=1]
+  br i1 %0, label %bb13, label %bb1
+
+bb1:                                              ; preds = %entry
+  %1 = getelementptr inbounds %struct.asl_file_t* %s, i32 0, i32 11 ; <%struct.FILE**> [#uses=2]
+  %2 = load %struct.FILE** %1, align 4            ; <%struct.FILE*> [#uses=2]
+  %3 = icmp eq %struct.FILE* %2, null             ; <i1> [#uses=1]
+  br i1 %3, label %bb13, label %bb3
+
+bb3:                                              ; preds = %bb1
+  %4 = add nsw i64 %off, 8                        ; <i64> [#uses=1]
+  %5 = getelementptr inbounds %struct.asl_file_t* %s, i32 0, i32 10 ; <i32*> [#uses=1]
+  %6 = load i32* %5, align 4                      ; <i32> [#uses=1]
+  %7 = zext i32 %6 to i64                         ; <i64> [#uses=1]
+  %8 = icmp sgt i64 %4, %7                        ; <i1> [#uses=1]
+  br i1 %8, label %bb13, label %bb5
+
+bb5:                                              ; preds = %bb3
+  %9 = call  i32 @fseeko(%struct.FILE* %2, i64 %off, i32 0) nounwind ; <i32> [#uses=1]
+  %10 = icmp eq i32 %9, 0                         ; <i1> [#uses=1]
+  br i1 %10, label %bb7, label %bb13
+
+bb7:                                              ; preds = %bb5
+  store i64 0, i64* %val, align 4
+  %11 = load %struct.FILE** %1, align 4           ; <%struct.FILE*> [#uses=1]
+  %val8 = bitcast i64* %val to i8*                ; <i8*> [#uses=1]
+  %12 = call  i32 @fread(i8* noalias %val8, i32 8, i32 1, %struct.FILE* noalias %11) nounwind ; <i32> [#uses=1]
+  %13 = icmp eq i32 %12, 1                        ; <i1> [#uses=1]
+  br i1 %13, label %bb10, label %bb13
+
+bb10:                                             ; preds = %bb7
+  %14 = icmp eq i64* %out, null                   ; <i1> [#uses=1]
+  br i1 %14, label %bb13, label %bb11
+
+bb11:                                             ; preds = %bb10
+  %15 = load i64* %val, align 4                   ; <i64> [#uses=1]
+  %16 = call  i64 @asl_core_ntohq(i64 %15) nounwind ; <i64> [#uses=1]
+  store i64 %16, i64* %out, align 4
+  ret i32 0
+
+bb13:                                             ; preds = %bb10, %bb7, %bb5, %bb3, %bb1, %entry
+  %.0 = phi i32 [ 2, %entry ], [ 2, %bb1 ], [ 7, %bb3 ], [ 7, %bb5 ], [ 7, %bb7 ], [ 0, %bb10 ] ; <i32> [#uses=1]
+  ret i32 %.0
+}
+
+declare i32 @fseeko(%struct.FILE* nocapture, i64, i32) nounwind
+
+declare i32 @fread(i8* noalias nocapture, i32, i32, %struct.FILE* noalias nocapture) nounwind
+
+declare i64 @asl_core_ntohq(i64)
diff --git a/final/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll b/final/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll
new file mode 100644
index 00000000000..132d9acf674
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2009-12-17-pre-regalloc-taildup.ll
@@ -0,0 +1,66 @@
+; RUN: llc -O3 < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; This test should not produce any spills, even when tail duplication creates lots of phi nodes.
+; CHECK-NOT: push
+; CHECK-NOT: pop
+; CHECK: bx lr
+
+@codetable.2928 = internal constant [5 x i8*] [i8* blockaddress(@interpret_threaded, %RETURN), i8* blockaddress(@interpret_threaded, %INCREMENT), i8* blockaddress(@interpret_threaded, %DECREMENT), i8* blockaddress(@interpret_threaded, %DOUBLE), i8* blockaddress(@interpret_threaded, %SWAPWORD)] ; <[5 x i8*]*> [#uses=5]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i8*)* @interpret_threaded to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define i32 @interpret_threaded(i8* nocapture %opcodes) nounwind readonly optsize {
+entry:
+  %0 = load i8* %opcodes, align 1                 ; <i8> [#uses=1]
+  %1 = zext i8 %0 to i32                          ; <i32> [#uses=1]
+  %2 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %1 ; <i8**> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb.backedge, %entry
+  %indvar = phi i32 [ %phitmp, %bb.backedge ], [ 1, %entry ] ; <i32> [#uses=2]
+  %gotovar.22.0.in = phi i8** [ %gotovar.22.0.in.be, %bb.backedge ], [ %2, %entry ] ; <i8**> [#uses=1]
+  %result.0 = phi i32 [ %result.0.be, %bb.backedge ], [ 0, %entry ] ; <i32> [#uses=6]
+  %opcodes_addr.0 = getelementptr i8* %opcodes, i32 %indvar ; <i8*> [#uses=4]
+  %gotovar.22.0 = load i8** %gotovar.22.0.in, align 4 ; <i8*> [#uses=1]
+  indirectbr i8* %gotovar.22.0, [label %RETURN, label %INCREMENT, label %DECREMENT, label %DOUBLE, label %SWAPWORD]
+
+RETURN:                                           ; preds = %bb
+  ret i32 %result.0
+
+INCREMENT:                                        ; preds = %bb
+  %3 = add nsw i32 %result.0, 1                   ; <i32> [#uses=1]
+  %4 = load i8* %opcodes_addr.0, align 1          ; <i8> [#uses=1]
+  %5 = zext i8 %4 to i32                          ; <i32> [#uses=1]
+  %6 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %5 ; <i8**> [#uses=1]
+  br label %bb.backedge
+
+bb.backedge:                                      ; preds = %SWAPWORD, %DOUBLE, %DECREMENT, %INCREMENT
+  %gotovar.22.0.in.be = phi i8** [ %20, %SWAPWORD ], [ %14, %DOUBLE ], [ %10, %DECREMENT ], [ %6, %INCREMENT ] ; <i8**> [#uses=1]
+  %result.0.be = phi i32 [ %17, %SWAPWORD ], [ %11, %DOUBLE ], [ %7, %DECREMENT ], [ %3, %INCREMENT ] ; <i32> [#uses=1]
+  %phitmp = add i32 %indvar, 1                    ; <i32> [#uses=1]
+  br label %bb
+
+DECREMENT:                                        ; preds = %bb
+  %7 = add i32 %result.0, -1                      ; <i32> [#uses=1]
+  %8 = load i8* %opcodes_addr.0, align 1          ; <i8> [#uses=1]
+  %9 = zext i8 %8 to i32                          ; <i32> [#uses=1]
+  %10 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %9 ; <i8**> [#uses=1]
+  br label %bb.backedge
+
+DOUBLE:                                           ; preds = %bb
+  %11 = shl i32 %result.0, 1                      ; <i32> [#uses=1]
+  %12 = load i8* %opcodes_addr.0, align 1         ; <i8> [#uses=1]
+  %13 = zext i8 %12 to i32                        ; <i32> [#uses=1]
+  %14 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %13 ; <i8**> [#uses=1]
+  br label %bb.backedge
+
+SWAPWORD:                                         ; preds = %bb
+  %15 = shl i32 %result.0, 16                     ; <i32> [#uses=1]
+  %16 = ashr i32 %result.0, 16                    ; <i32> [#uses=1]
+  %17 = or i32 %15, %16                           ; <i32> [#uses=1]
+  %18 = load i8* %opcodes_addr.0, align 1         ; <i8> [#uses=1]
+  %19 = zext i8 %18 to i32                        ; <i32> [#uses=1]
+  %20 = getelementptr inbounds [5 x i8*]* @codetable.2928, i32 0, i32 %19 ; <i8**> [#uses=1]
+  br label %bb.backedge
+}
diff --git a/final/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll b/final/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll
new file mode 100644
index 00000000000..fad26693e76
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -regalloc=fast -relocation-model=pic | FileCheck %s
+
+target triple = "thumbv6-apple-darwin10"
+
+@fred = internal global i32 0              ; <i32*> [#uses=1]
+
+define void @foo() nounwind {
+entry:
+; CHECK: str r0, [sp
+  %0 = call  i32 (...)* @bar() nounwind ; <i32> [#uses=1]
+; CHECK: blx _bar
+; CHECK: ldr r1, [sp
+  store i32 %0, i32* @fred, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare i32 @bar(...)
diff --git a/final/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll b/final/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 00000000000..b9039774d42
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=thumb -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/final/test/CodeGen/Thumb/2010-06-18-SibCallCrash.ll b/final/test/CodeGen/Thumb/2010-06-18-SibCallCrash.ll
new file mode 100644
index 00000000000..ad8b064bf4b
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2010-06-18-SibCallCrash.ll
@@ -0,0 +1,8 @@
+; RUN: llc -march=thumb < %s
+; rdar://8104457
+
+define arm_apcscc void @t(i32* %m) nounwind {
+entry:
+  tail call arm_apcscc  void undef(i32* %m, i16 zeroext undef) nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb/2010-07-01-FuncAlign.ll b/final/test/CodeGen/Thumb/2010-07-01-FuncAlign.ll
new file mode 100644
index 00000000000..8e09441feba
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2010-07-01-FuncAlign.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; Radar 8143571: Function alignments were off by a power of two.
+; CHECK: .align 1
+define void @test() {
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/final/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
new file mode 100644
index 00000000000..06c0dfec5ba
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll
@@ -0,0 +1,147 @@
+; RUN: llc -mtriple=thumbv6-apple-darwin10 < %s | FileCheck %s
+; RUN: opt -strip-debug < %s | llc -mtriple=thumbv6-apple-darwin10 | FileCheck %s
+; Stripping out debug info formerly caused the last two multiplies to be emitted in
+; the other order.  7797940 (part of it dated 6/29/2010..7/15/2010).
+
+%0 = type { [3 x double] }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%0*, i32, i32)* @_Z19getClosestDiagonal3ii to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind {
+; CHECK: blx ___muldf3
+; CHECK: blx ___muldf3
+; CHECK: beq LBB0_7
+; CHECK: blx ___muldf3
+; <label>:3
+  switch i32 %1, label %4 [
+    i32 0, label %5
+    i32 3, label %5
+  ]
+
+; <label>:4                                       ; preds = %3
+  br label %5, !dbg !0
+
+; <label>:5                                       ; preds = %4, %3, %3
+  %storemerge = phi double [ -1.000000e+00, %4 ], [ 1.000000e+00, %3 ], [ 1.000000e+00, %3 ] ; <double> [#uses=1]
+  %v_6 = icmp slt i32 %1, 2                         ; <i1> [#uses=1]
+  %storemerge1 = select i1 %v_6, double 1.000000e+00, double -1.000000e+00 ; <double> [#uses=3]
+  call void @llvm.dbg.value(metadata !{double %storemerge}, i64 0, metadata !91), !dbg !0
+  %v_7 = icmp eq i32 %2, 1, !dbg !92                ; <i1> [#uses=1]
+  %storemerge2 = select i1 %v_7, double 1.000000e+00, double -1.000000e+00 ; <double> [#uses=3]
+  %v_8 = getelementptr inbounds %0* %0, i32 0, i32 0, i32 0 ; <double*> [#uses=1]
+  %v_10 = getelementptr inbounds %0* %0, i32 0, i32 0, i32 2 ; <double*> [#uses=1]
+  %v_11 = fmul double %storemerge1, %storemerge1, !dbg !93 ; <double> [#uses=1]
+  %v_15 = tail call double @sqrt(double %v_11) nounwind readonly, !dbg !93 ; <double> [#uses=1]
+  %v_16 = fdiv double 1.000000e+00, %v_15, !dbg !93   ; <double> [#uses=3]
+  %v_17 = fmul double %storemerge, %v_16, !dbg !97    ; <double> [#uses=1]
+  store double %v_17, double* %v_8, align 4, !dbg !97
+  %v_19 = fmul double %storemerge2, %v_16, !dbg !97   ; <double> [#uses=1]
+  store double %v_19, double* %v_10, align 4, !dbg !97
+  ret void, !dbg !98
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare double @sqrt(double) nounwind readonly
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 46, i32 0, metadata !1, null}
+!1 = metadata !{i32 524299, metadata !2, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 524299, metadata !3, i32 44, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"getClosestDiagonal3", metadata !"getClosestDiagonal3", metadata !"_Z19getClosestDiagonal3ii", metadata !4, i32 44, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 524329, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 524305, i32 0, i32 4, metadata !"ggEdgeDiscrepancy.cc", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 00)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{metadata !8, metadata !22, metadata !22}
+!8 = metadata !{i32 524307, metadata !4, metadata !"ggVector3", metadata !9, i32 66, i64 192, i64 32, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 524329, metadata !"ggVector3.h", metadata !"/Volumes/Home/grosbaj/sources/llvm-externals/speccpu2000/benchspec/CINT2000/252.eon/src", metadata !5} ; [ DW_TAG_file_type ]
+!10 = metadata !{metadata !11, metadata !16, metadata !23, metadata !26, metadata !29, metadata !30, metadata !35, metadata !36, metadata !37, metadata !41, metadata !42, metadata !43, metadata !46, metadata !47, metadata !48, metadata !52, metadata !53, metadata !54, metadata !57, metadata !60, metadata !63, metadata !66, metadata !70, metadata !71, metadata !74, metadata !75, metadata !76, metadata !77, metadata !78, metadata !81, metadata !82, metadata !83, metadata !84, metadata !85, metadata !88, metadata !89, metadata !90}
+!11 = metadata !{i32 524301, metadata !8, metadata !"e", metadata !9, i32 160, i64 192, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_member ]
+!12 = metadata !{i32 524289, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !13, metadata !14, i32 0, null} ; [ DW_TAG_array_type ]
+!13 = metadata !{i32 524324, metadata !4, metadata !"double", metadata !4, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 524321, i64 0, i64 2}        ; [ DW_TAG_subrange_type ]
+!16 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 72, metadata !17, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!18 = metadata !{null, metadata !19, metadata !20}
+!19 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
+!20 = metadata !{i32 524310, metadata !21, metadata !"ggBoolean", metadata !21, i32 478, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_typedef ]
+!21 = metadata !{i32 524329, metadata !"math.h", metadata !"/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS4.2.Internal.sdk/usr/include/architecture/arm", metadata !5} ; [ DW_TAG_file_type ]
+!22 = metadata !{i32 524324, metadata !4, metadata !"int", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!23 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 73, metadata !24, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!24 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!25 = metadata !{null, metadata !19}
+!26 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 74, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!27 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !28, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!28 = metadata !{null, metadata !19, metadata !13, metadata !13, metadata !13}
+!29 = metadata !{i32 524334, i32 0, metadata !8, metadata !"Set", metadata !"Set", metadata !"_ZN9ggVector33SetEddd", metadata !9, i32 81, metadata !27, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!30 = metadata !{i32 524334, i32 0, metadata !8, metadata !"x", metadata !"x", metadata !"_ZNK9ggVector31xEv", metadata !9, i32 82, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!31 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !32, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!32 = metadata !{metadata !13, metadata !33}
+!33 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 64, metadata !34} ; [ DW_TAG_pointer_type ]
+!34 = metadata !{i32 524326, metadata !4, metadata !"", metadata !4, i32 0, i64 192, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
+!35 = metadata !{i32 524334, i32 0, metadata !8, metadata !"y", metadata !"y", metadata !"_ZNK9ggVector31yEv", metadata !9, i32 83, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!36 = metadata !{i32 524334, i32 0, metadata !8, metadata !"z", metadata !"z", metadata !"_ZNK9ggVector31zEv", metadata !9, i32 84, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!37 = metadata !{i32 524334, i32 0, metadata !8, metadata !"x", metadata !"x", metadata !"_ZN9ggVector31xEv", metadata !9, i32 85, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!38 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !39, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!39 = metadata !{metadata !40, metadata !19}
+!40 = metadata !{i32 524304, metadata !4, metadata !"double", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !13} ; [ DW_TAG_reference_type ]
+!41 = metadata !{i32 524334, i32 0, metadata !8, metadata !"y", metadata !"y", metadata !"_ZN9ggVector31yEv", metadata !9, i32 86, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!42 = metadata !{i32 524334, i32 0, metadata !8, metadata !"z", metadata !"z", metadata !"_ZN9ggVector31zEv", metadata !9, i32 87, metadata !38, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!43 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetX", metadata !"SetX", metadata !"_ZN9ggVector34SetXEd", metadata !9, i32 88, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!44 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !45, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!45 = metadata !{null, metadata !19, metadata !13}
+!46 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetY", metadata !"SetY", metadata !"_ZN9ggVector34SetYEd", metadata !9, i32 89, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!47 = metadata !{i32 524334, i32 0, metadata !8, metadata !"SetZ", metadata !"SetZ", metadata !"_ZN9ggVector34SetZEd", metadata !9, i32 90, metadata !44, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!48 = metadata !{i32 524334, i32 0, metadata !8, metadata !"ggVector3", metadata !"ggVector3", metadata !"", metadata !9, i32 92, metadata !49, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!49 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !50, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!50 = metadata !{null, metadata !19, metadata !51}
+!51 = metadata !{i32 524304, metadata !4, metadata !"", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !34} ; [ DW_TAG_reference_type ]
+!52 = metadata !{i32 524334, i32 0, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZNK9ggVector39toleranceEv", metadata !9, i32 100, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!53 = metadata !{i32 524334, i32 0, metadata !8, metadata !"tolerance", metadata !"tolerance", metadata !"_ZN9ggVector39toleranceEv", metadata !9, i32 101, metadata !38, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!54 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator+", metadata !"operator+", metadata !"_ZNK9ggVector3psEv", metadata !9, i32 107, metadata !55, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!55 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !56, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!56 = metadata !{metadata !51, metadata !33}
+!57 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator-", metadata !"operator-", metadata !"_ZNK9ggVector3ngEv", metadata !9, i32 108, metadata !58, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!58 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !59, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!59 = metadata !{metadata !8, metadata !33}
+!60 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZNK9ggVector3ixEi", metadata !9, i32 290, metadata !61, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!61 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !62, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!62 = metadata !{metadata !13, metadata !33, metadata !22}
+!63 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator[]", metadata !"operator[]", metadata !"_ZN9ggVector3ixEi", metadata !9, i32 278, metadata !64, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!64 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !65, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!65 = metadata !{metadata !40, metadata !19, metadata !22}
+!66 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator+=", metadata !"operator+=", metadata !"_ZN9ggVector3pLERKS_", metadata !9, i32 303, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!67 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !68, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!68 = metadata !{metadata !69, metadata !19, metadata !51}
+!69 = metadata !{i32 524304, metadata !4, metadata !"ggVector3", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_reference_type ]
+!70 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator-=", metadata !"operator-=", metadata !"_ZN9ggVector3mIERKS_", metadata !9, i32 310, metadata !67, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!71 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator*=", metadata !"operator*=", metadata !"_ZN9ggVector3mLEd", metadata !9, i32 317, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!72 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !73, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!73 = metadata !{metadata !69, metadata !19, metadata !13}
+!74 = metadata !{i32 524334, i32 0, metadata !8, metadata !"operator/=", metadata !"operator/=", metadata !"_ZN9ggVector3dVEd", metadata !9, i32 324, metadata !72, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!75 = metadata !{i32 524334, i32 0, metadata !8, metadata !"length", metadata !"length", metadata !"_ZNK9ggVector36lengthEv", metadata !9, i32 121, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!76 = metadata !{i32 524334, i32 0, metadata !8, metadata !"squaredLength", metadata !"squaredLength", metadata !"_ZNK9ggVector313squaredLengthEv", metadata !9, i32 122, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!77 = metadata !{i32 524334, i32 0, metadata !8, metadata !"MakeUnitVector", metadata !"MakeUnitVector", metadata !"_ZN9ggVector314MakeUnitVectorEv", metadata !9, i32 217, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!78 = metadata !{i32 524334, i32 0, metadata !8, metadata !"Perturb", metadata !"Perturb", metadata !"_ZNK9ggVector37PerturbEdd", metadata !9, i32 126, metadata !79, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!79 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !80, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!80 = metadata !{metadata !8, metadata !33, metadata !13, metadata !13}
+!81 = metadata !{i32 524334, i32 0, metadata !8, metadata !"maxComponent", metadata !"maxComponent", metadata !"_ZNK9ggVector312maxComponentEv", metadata !9, i32 128, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!82 = metadata !{i32 524334, i32 0, metadata !8, metadata !"minComponent", metadata !"minComponent", metadata !"_ZNK9ggVector312minComponentEv", metadata !9, i32 129, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!83 = metadata !{i32 524334, i32 0, metadata !8, metadata !"maxAbsComponent", metadata !"maxAbsComponent", metadata !"_ZNK9ggVector315maxAbsComponentEv", metadata !9, i32 131, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!84 = metadata !{i32 524334, i32 0, metadata !8, metadata !"minAbsComponent", metadata !"minAbsComponent", metadata !"_ZNK9ggVector315minAbsComponentEv", metadata !9, i32 132, metadata !31, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!85 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMinComponent", metadata !"indexOfMinComponent", metadata !"_ZNK9ggVector319indexOfMinComponentEv", metadata !9, i32 133, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!86 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !87, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!87 = metadata !{metadata !22, metadata !33}
+!88 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMinAbsComponent", metadata !"indexOfMinAbsComponent", metadata !"_ZNK9ggVector322indexOfMinAbsComponentEv", metadata !9, i32 137, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!89 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMaxComponent", metadata !"indexOfMaxComponent", metadata !"_ZNK9ggVector319indexOfMaxComponentEv", metadata !9, i32 146, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!90 = metadata !{i32 524334, i32 0, metadata !8, metadata !"indexOfMaxAbsComponent", metadata !"indexOfMaxAbsComponent", metadata !"_ZNK9ggVector322indexOfMaxAbsComponentEv", metadata !9, i32 150, metadata !86, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!91 = metadata !{i32 524544, metadata !1, metadata !"vx", metadata !4, i32 46, metadata !13} ; [ DW_TAG_auto_variable ]
+!92 = metadata !{i32 48, i32 0, metadata !1, null}
+!93 = metadata !{i32 218, i32 0, metadata !94, metadata !96}
+!94 = metadata !{i32 524299, metadata !95, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
+!95 = metadata !{i32 524299, metadata !77, i32 217, i32 0} ; [ DW_TAG_lexical_block ]
+!96 = metadata !{i32 51, i32 0, metadata !1, null}
+!97 = metadata !{i32 227, i32 0, metadata !94, metadata !96}
+!98 = metadata !{i32 52, i32 0, metadata !1, null}
diff --git a/final/test/CodeGen/Thumb/2011-EpilogueBug.ll b/final/test/CodeGen/Thumb/2011-EpilogueBug.ll
new file mode 100644
index 00000000000..16789e66cc1
--- /dev/null
+++ b/final/test/CodeGen/Thumb/2011-EpilogueBug.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=thumbv6-apple-darwin < %s | FileCheck %s
+; r8869722
+
+%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
+%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+
+define void @t1(%struct.state* %v) {
+; CHECK: push {r4
+  %tmp6 = load i32* null
+  %tmp8 = alloca float, i32 %tmp6
+  store i32 1, i32* null
+  br label %return
+
+return:                                           ; preds = %0
+; CHECK: mov sp, r4
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb/asmprinter-bug.ll b/final/test/CodeGen/Thumb/asmprinter-bug.ll
new file mode 100644
index 00000000000..f73f93d919a
--- /dev/null
+++ b/final/test/CodeGen/Thumb/asmprinter-bug.ll
@@ -0,0 +1,288 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin10 | grep rsbs | grep {#0}
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.adpcm_state = type { i16, i8 }
+@stepsizeTable = internal constant [89 x i32] [i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 16, i32 17, i32 19, i32 21, i32 23, i32 25, i32 28, i32 31, i32 34, i32 37, i32 41, i32 45, i32 50, i32 55, i32 60, i32 66, i32 73, i32 80, i32 88, i32 97, i32 107, i32 118, i32 130, i32 143, i32 157, i32 173, i32 190, i32 209, i32 230, i32 253, i32 279, i32 307, i32 337, i32 371, i32 408, i32 449, i32 494, i32 544, i32 598, i32 658, i32 724, i32 796, i32 876, i32 963, i32 1060, i32 1166, i32 1282, i32 1411, i32 1552, i32 1707, i32 1878, i32 2066, i32 2272, i32 2499, i32 2749, i32 3024, i32 3327, i32 3660, i32 4026, i32 4428, i32 4871, i32 5358, i32 5894, i32 6484, i32 7132, i32 7845, i32 8630, i32 9493, i32 10442, i32 11487, i32 12635, i32 13899, i32 15289, i32 16818, i32 18500, i32 20350, i32 22385, i32 24623, i32 27086, i32 29794, i32 32767]		; <[89 x i32]*> [#uses=4]
+@indexTable = internal constant [16 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 2, i32 4, i32 6, i32 8, i32 -1, i32 -1, i32 -1, i32 -1, i32 2, i32 4, i32 6, i32 8]		; <[16 x i32]*> [#uses=2]
+@abuf = common global [500 x i8] zeroinitializer		; <[500 x i8]*> [#uses=1]
+@.str = private constant [11 x i8] c"input file\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[11 x i8]*> [#uses=1]
+@sbuf = common global [1000 x i16] zeroinitializer		; <[1000 x i16]*> [#uses=1]
+@state = common global %struct.adpcm_state zeroinitializer		; <%struct.adpcm_state*> [#uses=3]
+@__stderrp = external global %struct.FILE*		; <%struct.FILE**> [#uses=1]
+@.str1 = private constant [28 x i8] c"Final valprev=%d, index=%d\0A\00", section "__TEXT,__cstring,cstring_literals", align 1		; <[28 x i8]*> [#uses=1]
+
+define void @adpcm_coder(i16* nocapture %indata, i8* nocapture %outdata, i32 %len, %struct.adpcm_state* nocapture %state) nounwind {
+entry:
+	%0 = getelementptr %struct.adpcm_state* %state, i32 0, i32 0		; <i16*> [#uses=2]
+	%1 = load i16* %0, align 2		; <i16> [#uses=1]
+	%2 = sext i16 %1 to i32		; <i32> [#uses=2]
+	%3 = getelementptr %struct.adpcm_state* %state, i32 0, i32 1		; <i8*> [#uses=2]
+	%4 = load i8* %3, align 2		; <i8> [#uses=1]
+	%5 = sext i8 %4 to i32		; <i32> [#uses=3]
+	%6 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %5		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%8 = icmp sgt i32 %len, 0		; <i1> [#uses=1]
+	br i1 %8, label %bb, label %bb27
+
+bb:		; preds = %bb25, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb25 ]		; <i32> [#uses=2]
+	%outp.136 = phi i8* [ %outdata, %entry ], [ %outp.0, %bb25 ]		; <i8*> [#uses=3]
+	%bufferstep.035 = phi i32 [ 1, %entry ], [ %tmp, %bb25 ]		; <i32> [#uses=3]
+	%outputbuffer.134 = phi i32 [ undef, %entry ], [ %outputbuffer.0, %bb25 ]		; <i32> [#uses=2]
+	%index.033 = phi i32 [ %5, %entry ], [ %index.2, %bb25 ]		; <i32> [#uses=1]
+	%valpred.132 = phi i32 [ %2, %entry ], [ %valpred.2, %bb25 ]		; <i32> [#uses=2]
+	%step.031 = phi i32 [ %7, %entry ], [ %36, %bb25 ]		; <i32> [#uses=5]
+	%inp.038 = getelementptr i16* %indata, i32 %indvar		; <i16*> [#uses=1]
+	%9 = load i16* %inp.038, align 2		; <i16> [#uses=1]
+	%10 = sext i16 %9 to i32		; <i32> [#uses=1]
+	%11 = sub i32 %10, %valpred.132		; <i32> [#uses=3]
+	%12 = icmp slt i32 %11, 0		; <i1> [#uses=1]
+	%iftmp.1.0 = select i1 %12, i32 8, i32 0		; <i32> [#uses=2]
+	%13 = sub i32 0, %11		; <i32> [#uses=1]
+	%14 = icmp eq i32 %iftmp.1.0, 0		; <i1> [#uses=2]
+	%. = select i1 %14, i32 %11, i32 %13		; <i32> [#uses=2]
+	%15 = ashr i32 %step.031, 3		; <i32> [#uses=1]
+	%16 = icmp slt i32 %., %step.031		; <i1> [#uses=2]
+	%delta.0 = select i1 %16, i32 0, i32 4		; <i32> [#uses=2]
+	%17 = select i1 %16, i32 0, i32 %step.031		; <i32> [#uses=2]
+	%diff.1 = sub i32 %., %17		; <i32> [#uses=2]
+	%18 = ashr i32 %step.031, 1		; <i32> [#uses=2]
+	%19 = icmp slt i32 %diff.1, %18		; <i1> [#uses=2]
+	%20 = or i32 %delta.0, 2		; <i32> [#uses=1]
+	%21 = select i1 %19, i32 %delta.0, i32 %20		; <i32> [#uses=1]
+	%22 = select i1 %19, i32 0, i32 %18		; <i32> [#uses=2]
+	%diff.2 = sub i32 %diff.1, %22		; <i32> [#uses=1]
+	%23 = ashr i32 %step.031, 2		; <i32> [#uses=2]
+	%24 = icmp slt i32 %diff.2, %23		; <i1> [#uses=2]
+	%25 = zext i1 %24 to i32		; <i32> [#uses=1]
+	%26 = select i1 %24, i32 0, i32 %23		; <i32> [#uses=1]
+	%vpdiff.0 = add i32 %17, %15		; <i32> [#uses=1]
+	%vpdiff.1 = add i32 %vpdiff.0, %22		; <i32> [#uses=1]
+	%vpdiff.2 = add i32 %vpdiff.1, %26		; <i32> [#uses=2]
+	%tmp30 = sub i32 0, %vpdiff.2		; <i32> [#uses=1]
+	%valpred.0.p = select i1 %14, i32 %vpdiff.2, i32 %tmp30		; <i32> [#uses=1]
+	%valpred.0 = add i32 %valpred.0.p, %valpred.132		; <i32> [#uses=3]
+	%27 = icmp sgt i32 %valpred.0, 32767		; <i1> [#uses=1]
+	br i1 %27, label %bb18, label %bb16
+
+bb16:		; preds = %bb
+	%28 = icmp slt i32 %valpred.0, -32768		; <i1> [#uses=1]
+	br i1 %28, label %bb17, label %bb18
+
+bb17:		; preds = %bb16
+	br label %bb18
+
+bb18:		; preds = %bb17, %bb16, %bb
+	%valpred.2 = phi i32 [ -32768, %bb17 ], [ 32767, %bb ], [ %valpred.0, %bb16 ]		; <i32> [#uses=2]
+	%delta.1 = or i32 %21, %iftmp.1.0		; <i32> [#uses=1]
+	%delta.2 = or i32 %delta.1, %25		; <i32> [#uses=1]
+	%29 = xor i32 %delta.2, 1		; <i32> [#uses=3]
+	%30 = getelementptr [16 x i32]* @indexTable, i32 0, i32 %29		; <i32*> [#uses=1]
+	%31 = load i32* %30, align 4		; <i32> [#uses=1]
+	%32 = add i32 %31, %index.033		; <i32> [#uses=2]
+	%33 = icmp slt i32 %32, 0		; <i1> [#uses=1]
+	%index.1 = select i1 %33, i32 0, i32 %32		; <i32> [#uses=2]
+	%34 = icmp sgt i32 %index.1, 88		; <i1> [#uses=1]
+	%index.2 = select i1 %34, i32 88, i32 %index.1		; <i32> [#uses=3]
+	%35 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %index.2		; <i32*> [#uses=1]
+	%36 = load i32* %35, align 4		; <i32> [#uses=1]
+	%37 = icmp eq i32 %bufferstep.035, 0		; <i1> [#uses=1]
+	br i1 %37, label %bb24, label %bb23
+
+bb23:		; preds = %bb18
+	%38 = shl i32 %29, 4		; <i32> [#uses=1]
+	%39 = and i32 %38, 240		; <i32> [#uses=1]
+	br label %bb25
+
+bb24:		; preds = %bb18
+	%40 = trunc i32 %29 to i8		; <i8> [#uses=1]
+	%41 = and i8 %40, 15		; <i8> [#uses=1]
+	%42 = trunc i32 %outputbuffer.134 to i8		; <i8> [#uses=1]
+	%43 = or i8 %41, %42		; <i8> [#uses=1]
+	store i8 %43, i8* %outp.136, align 1
+	%44 = getelementptr i8* %outp.136, i32 1		; <i8*> [#uses=1]
+	br label %bb25
+
+bb25:		; preds = %bb24, %bb23
+	%outputbuffer.0 = phi i32 [ %39, %bb23 ], [ %outputbuffer.134, %bb24 ]		; <i32> [#uses=2]
+	%outp.0 = phi i8* [ %outp.136, %bb23 ], [ %44, %bb24 ]		; <i8*> [#uses=2]
+	%tmp = xor i32 %bufferstep.035, 1		; <i32> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %len		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb26.bb27_crit_edge, label %bb
+
+bb26.bb27_crit_edge:		; preds = %bb25
+	%phitmp44 = icmp eq i32 %bufferstep.035, 1		; <i1> [#uses=1]
+	br label %bb27
+
+bb27:		; preds = %bb26.bb27_crit_edge, %entry
+	%outp.1.lcssa = phi i8* [ %outp.0, %bb26.bb27_crit_edge ], [ %outdata, %entry ]		; <i8*> [#uses=1]
+	%bufferstep.0.lcssa = phi i1 [ %phitmp44, %bb26.bb27_crit_edge ], [ false, %entry ]		; <i1> [#uses=1]
+	%outputbuffer.1.lcssa = phi i32 [ %outputbuffer.0, %bb26.bb27_crit_edge ], [ undef, %entry ]		; <i32> [#uses=1]
+	%index.0.lcssa = phi i32 [ %index.2, %bb26.bb27_crit_edge ], [ %5, %entry ]		; <i32> [#uses=1]
+	%valpred.1.lcssa = phi i32 [ %valpred.2, %bb26.bb27_crit_edge ], [ %2, %entry ]		; <i32> [#uses=1]
+	br i1 %bufferstep.0.lcssa, label %bb28, label %bb29
+
+bb28:		; preds = %bb27
+	%45 = trunc i32 %outputbuffer.1.lcssa to i8		; <i8> [#uses=1]
+	store i8 %45, i8* %outp.1.lcssa, align 1
+	br label %bb29
+
+bb29:		; preds = %bb28, %bb27
+	%46 = trunc i32 %valpred.1.lcssa to i16		; <i16> [#uses=1]
+	store i16 %46, i16* %0, align 2
+	%47 = trunc i32 %index.0.lcssa to i8		; <i8> [#uses=1]
+	store i8 %47, i8* %3, align 2
+	ret void
+}
+
+define void @adpcm_decoder(i8* nocapture %indata, i16* nocapture %outdata, i32 %len, %struct.adpcm_state* nocapture %state) nounwind {
+entry:
+	%0 = getelementptr %struct.adpcm_state* %state, i32 0, i32 0		; <i16*> [#uses=2]
+	%1 = load i16* %0, align 2		; <i16> [#uses=1]
+	%2 = sext i16 %1 to i32		; <i32> [#uses=2]
+	%3 = getelementptr %struct.adpcm_state* %state, i32 0, i32 1		; <i8*> [#uses=2]
+	%4 = load i8* %3, align 2		; <i8> [#uses=1]
+	%5 = sext i8 %4 to i32		; <i32> [#uses=3]
+	%6 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %5		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%8 = icmp sgt i32 %len, 0		; <i1> [#uses=1]
+	br i1 %8, label %bb, label %bb22
+
+bb:		; preds = %bb20, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb20 ]		; <i32> [#uses=2]
+	%inp.131 = phi i8* [ %indata, %entry ], [ %inp.0, %bb20 ]		; <i8*> [#uses=3]
+	%bufferstep.028 = phi i32 [ 0, %entry ], [ %tmp, %bb20 ]		; <i32> [#uses=2]
+	%inputbuffer.127 = phi i32 [ undef, %entry ], [ %inputbuffer.0, %bb20 ]		; <i32> [#uses=2]
+	%index.026 = phi i32 [ %5, %entry ], [ %index.2, %bb20 ]		; <i32> [#uses=1]
+	%valpred.125 = phi i32 [ %2, %entry ], [ %valpred.2, %bb20 ]		; <i32> [#uses=1]
+	%step.024 = phi i32 [ %7, %entry ], [ %35, %bb20 ]		; <i32> [#uses=4]
+	%outp.030 = getelementptr i16* %outdata, i32 %indvar		; <i16*> [#uses=1]
+	%9 = icmp eq i32 %bufferstep.028, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb2, label %bb3
+
+bb2:		; preds = %bb
+	%10 = load i8* %inp.131, align 1		; <i8> [#uses=1]
+	%11 = sext i8 %10 to i32		; <i32> [#uses=2]
+	%12 = getelementptr i8* %inp.131, i32 1		; <i8*> [#uses=1]
+	%13 = ashr i32 %11, 4		; <i32> [#uses=1]
+	br label %bb3
+
+bb3:		; preds = %bb2, %bb
+	%inputbuffer.0 = phi i32 [ %11, %bb2 ], [ %inputbuffer.127, %bb ]		; <i32> [#uses=1]
+	%delta.0.in = phi i32 [ %13, %bb2 ], [ %inputbuffer.127, %bb ]		; <i32> [#uses=5]
+	%inp.0 = phi i8* [ %12, %bb2 ], [ %inp.131, %bb ]		; <i8*> [#uses=1]
+	%delta.0 = and i32 %delta.0.in, 15		; <i32> [#uses=1]
+	%tmp = xor i32 %bufferstep.028, 1		; <i32> [#uses=1]
+	%14 = getelementptr [16 x i32]* @indexTable, i32 0, i32 %delta.0		; <i32*> [#uses=1]
+	%15 = load i32* %14, align 4		; <i32> [#uses=1]
+	%16 = add i32 %15, %index.026		; <i32> [#uses=2]
+	%17 = icmp slt i32 %16, 0		; <i1> [#uses=1]
+	%index.1 = select i1 %17, i32 0, i32 %16		; <i32> [#uses=2]
+	%18 = icmp sgt i32 %index.1, 88		; <i1> [#uses=1]
+	%index.2 = select i1 %18, i32 88, i32 %index.1		; <i32> [#uses=3]
+	%19 = and i32 %delta.0.in, 8		; <i32> [#uses=1]
+	%20 = ashr i32 %step.024, 3		; <i32> [#uses=1]
+	%21 = and i32 %delta.0.in, 4		; <i32> [#uses=1]
+	%22 = icmp eq i32 %21, 0		; <i1> [#uses=1]
+	%23 = select i1 %22, i32 0, i32 %step.024		; <i32> [#uses=1]
+	%vpdiff.0 = add i32 %23, %20		; <i32> [#uses=2]
+	%24 = and i32 %delta.0.in, 2		; <i32> [#uses=1]
+	%25 = icmp eq i32 %24, 0		; <i1> [#uses=1]
+	br i1 %25, label %bb11, label %bb10
+
+bb10:		; preds = %bb3
+	%26 = ashr i32 %step.024, 1		; <i32> [#uses=1]
+	%27 = add i32 %vpdiff.0, %26		; <i32> [#uses=1]
+	br label %bb11
+
+bb11:		; preds = %bb10, %bb3
+	%vpdiff.1 = phi i32 [ %27, %bb10 ], [ %vpdiff.0, %bb3 ]		; <i32> [#uses=2]
+	%28 = and i32 %delta.0.in, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %28, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb13, label %bb12
+
+bb12:		; preds = %bb11
+	%29 = ashr i32 %step.024, 2		; <i32> [#uses=1]
+	%30 = add i32 %vpdiff.1, %29		; <i32> [#uses=1]
+	br label %bb13
+
+bb13:		; preds = %bb12, %bb11
+	%vpdiff.2 = phi i32 [ %30, %bb12 ], [ %vpdiff.1, %bb11 ]		; <i32> [#uses=2]
+	%31 = icmp eq i32 %19, 0		; <i1> [#uses=1]
+	%tmp23 = sub i32 0, %vpdiff.2		; <i32> [#uses=1]
+	%valpred.0.p = select i1 %31, i32 %vpdiff.2, i32 %tmp23		; <i32> [#uses=1]
+	%valpred.0 = add i32 %valpred.0.p, %valpred.125		; <i32> [#uses=3]
+	%32 = icmp sgt i32 %valpred.0, 32767		; <i1> [#uses=1]
+	br i1 %32, label %bb20, label %bb18
+
+bb18:		; preds = %bb13
+	%33 = icmp slt i32 %valpred.0, -32768		; <i1> [#uses=1]
+	br i1 %33, label %bb19, label %bb20
+
+bb19:		; preds = %bb18
+	br label %bb20
+
+bb20:		; preds = %bb19, %bb18, %bb13
+	%valpred.2 = phi i32 [ -32768, %bb19 ], [ 32767, %bb13 ], [ %valpred.0, %bb18 ]		; <i32> [#uses=3]
+	%34 = getelementptr [89 x i32]* @stepsizeTable, i32 0, i32 %index.2		; <i32*> [#uses=1]
+	%35 = load i32* %34, align 4		; <i32> [#uses=1]
+	%36 = trunc i32 %valpred.2 to i16		; <i16> [#uses=1]
+	store i16 %36, i16* %outp.030, align 2
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %len		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb22, label %bb
+
+bb22:		; preds = %bb20, %entry
+	%index.0.lcssa = phi i32 [ %5, %entry ], [ %index.2, %bb20 ]		; <i32> [#uses=1]
+	%valpred.1.lcssa = phi i32 [ %2, %entry ], [ %valpred.2, %bb20 ]		; <i32> [#uses=1]
+	%37 = trunc i32 %valpred.1.lcssa to i16		; <i16> [#uses=1]
+	store i16 %37, i16* %0, align 2
+	%38 = trunc i32 %index.0.lcssa to i8		; <i8> [#uses=1]
+	store i8 %38, i8* %3, align 2
+	ret void
+}
+
+define i32 @main() nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb3, %entry
+	%0 = tail call  i32 (...)* @read(i32 0, i8* getelementptr ([500 x i8]* @abuf, i32 0, i32 0), i32 500) nounwind		; <i32> [#uses=4]
+	%1 = icmp slt i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb1, label %bb2
+
+bb1:		; preds = %bb
+	tail call  void @perror(i8* getelementptr ([11 x i8]* @.str, i32 0, i32 0)) nounwind
+	ret i32 1
+
+bb2:		; preds = %bb
+	%2 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb4, label %bb3
+
+bb3:		; preds = %bb2
+	%3 = shl i32 %0, 1		; <i32> [#uses=1]
+	tail call  void @adpcm_decoder(i8* getelementptr ([500 x i8]* @abuf, i32 0, i32 0), i16* getelementptr ([1000 x i16]* @sbuf, i32 0, i32 0), i32 %3, %struct.adpcm_state* @state) nounwind
+	%4 = shl i32 %0, 2		; <i32> [#uses=1]
+	%5 = tail call  i32 (...)* @write(i32 1, i16* getelementptr ([1000 x i16]* @sbuf, i32 0, i32 0), i32 %4) nounwind		; <i32> [#uses=0]
+	br label %bb
+
+bb4:		; preds = %bb2
+	%6 = load %struct.FILE** @__stderrp, align 4		; <%struct.FILE*> [#uses=1]
+	%7 = load i16* getelementptr (%struct.adpcm_state* @state, i32 0, i32 0), align 4		; <i16> [#uses=1]
+	%8 = sext i16 %7 to i32		; <i32> [#uses=1]
+	%9 = load i8* getelementptr (%struct.adpcm_state* @state, i32 0, i32 1), align 2		; <i8> [#uses=1]
+	%10 = sext i8 %9 to i32		; <i32> [#uses=1]
+	%11 = tail call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %6, i8* getelementptr ([28 x i8]* @.str1, i32 0, i32 0), i32 %8, i32 %10) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @read(...)
+
+declare void @perror(i8* nocapture) nounwind
+
+declare i32 @write(...)
+
+declare i32 @fprintf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind
diff --git a/final/test/CodeGen/Thumb/barrier.ll b/final/test/CodeGen/Thumb/barrier.ll
new file mode 100644
index 00000000000..419c3baa3da
--- /dev/null
+++ b/final/test/CodeGen/Thumb/barrier.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=thumbv6-apple-darwin  | FileCheck %s -check-prefix=V6
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=-db | FileCheck %s -check-prefix=V6
+; RUN: llc < %s -march=thumb -mattr=+v6m       | FileCheck %s -check-prefix=V6M
+
+declare void @llvm.memory.barrier(i1 , i1 , i1 , i1 , i1)
+
+define void @t1() {
+; V6: t1:
+; V6: blx {{_*}}sync_synchronize
+
+; V6M: t1:
+; V6M: dmb st
+  call void @llvm.memory.barrier(i1 false, i1 false, i1 false, i1 true, i1 true)
+  ret void
+}
+
+define void @t2() {
+; V6: t2:
+; V6: blx {{_*}}sync_synchronize
+
+; V6M: t2:
+; V6M: dmb ish
+  call void @llvm.memory.barrier(i1 true, i1 false, i1 false, i1 true, i1 false)
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb/dg.exp b/final/test/CodeGen/Thumb/dg.exp
new file mode 100644
index 00000000000..3ff359aab39
--- /dev/null
+++ b/final/test/CodeGen/Thumb/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target ARM] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/Thumb/dyn-stackalloc.ll b/final/test/CodeGen/Thumb/dyn-stackalloc.ll
new file mode 100644
index 00000000000..1f31dca0524
--- /dev/null
+++ b/final/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -0,0 +1,74 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+
+	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
+	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+
+define void @t1(%struct.state* %v) {
+; CHECK: t1:
+; CHECK: push
+; CHECK: add r7, sp, #12
+; CHECK: mov r2, sp
+; CHECK: subs r4, r2, r1
+; CHECK: mov sp, r4
+	%tmp6 = load i32* null
+	%tmp8 = alloca float, i32 %tmp6
+	store i32 1, i32* null
+	br i1 false, label %bb123.preheader, label %return
+
+bb123.preheader:
+	br i1 false, label %bb43, label %return
+
+bb43:
+	call fastcc void @f1( float* %tmp8, float* null, i32 0 )
+	%tmp70 = load i32* null
+	%tmp85 = getelementptr float* %tmp8, i32 0
+	call fastcc void @f2( float* null, float* null, float* %tmp85, i32 %tmp70 )
+	ret void
+
+return:
+	ret void
+}
+
+declare fastcc void @f1(float*, float*, i32)
+
+declare fastcc void @f2(float*, float*, float*, i32)
+
+	%struct.comment = type { i8**, i32*, i32, i8* }
+@str215 = external global [2 x i8]
+
+define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
+; CHECK: t2:
+; CHECK: push
+; CHECK: add r7, sp, #12
+; CHECK: sub sp, #8
+; CHECK: mov r6, sp
+; CHECK: str r2, [r6, #4]
+; CHECK: str r0, [r6]
+; CHECK-NOT: ldr r0, [sp
+; CHECK: ldr r0, [r6, #4]
+; CHECK: mov r0, sp
+; CHECK: subs r5, r0, r1
+; CHECK: mov sp, r5
+	%tmp1 = call i32 @strlen( i8* %tag )
+	%tmp3 = call i32 @strlen( i8* %contents )
+	%tmp4 = add i32 %tmp1, 2
+	%tmp5 = add i32 %tmp4, %tmp3
+	%tmp6 = alloca i8, i32 %tmp5
+	%tmp9 = call i8* @strcpy( i8* %tmp6, i8* %tag )
+	%tmp6.len = call i32 @strlen( i8* %tmp6 )
+	%tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len
+	call void @llvm.memcpy.i32( i8* %tmp6.indexed, i8* getelementptr ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1 )
+	%tmp15 = call i8* @strcat( i8* %tmp6, i8* %contents )
+	call fastcc void @comment_add( %struct.comment* %vc, i8* %tmp6 )
+	ret void
+}
+
+declare i32 @strlen(i8*)
+
+declare i8* @strcat(i8*, i8*)
+
+declare fastcc void @comment_add(%struct.comment*, i8*)
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare i8* @strcpy(i8*, i8*)
diff --git a/final/test/CodeGen/Thumb/fpconv.ll b/final/test/CodeGen/Thumb/fpconv.ll
new file mode 100644
index 00000000000..7da36ddf58e
--- /dev/null
+++ b/final/test/CodeGen/Thumb/fpconv.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -march=thumb
+
+define float @f1(double %x) {
+entry:
+	%tmp1 = fptrunc double %x to float		; <float> [#uses=1]
+	ret float %tmp1
+}
+
+define double @f2(float %x) {
+entry:
+	%tmp1 = fpext float %x to double		; <double> [#uses=1]
+	ret double %tmp1
+}
+
+define i32 @f3(float %x) {
+entry:
+	%tmp = fptosi float %x to i32		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define i32 @f4(float %x) {
+entry:
+	%tmp = fptoui float %x to i32		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define i32 @f5(double %x) {
+entry:
+	%tmp = fptosi double %x to i32		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define i32 @f6(double %x) {
+entry:
+	%tmp = fptoui double %x to i32		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define float @f7(i32 %a) {
+entry:
+	%tmp = sitofp i32 %a to float		; <float> [#uses=1]
+	ret float %tmp
+}
+
+define double @f8(i32 %a) {
+entry:
+	%tmp = sitofp i32 %a to double		; <double> [#uses=1]
+	ret double %tmp
+}
+
+define float @f9(i32 %a) {
+entry:
+	%tmp = uitofp i32 %a to float		; <float> [#uses=1]
+	ret float %tmp
+}
+
+define double @f10(i32 %a) {
+entry:
+	%tmp = uitofp i32 %a to double		; <double> [#uses=1]
+	ret double %tmp
+}
diff --git a/final/test/CodeGen/Thumb/fpow.ll b/final/test/CodeGen/Thumb/fpow.ll
new file mode 100644
index 00000000000..be3dc0b3c1f
--- /dev/null
+++ b/final/test/CodeGen/Thumb/fpow.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=thumb
+
+define double @t(double %x, double %y) nounwind optsize {
+entry:
+	%0 = tail call double @llvm.pow.f64( double %x, double %y )		; <double> [#uses=1]
+	ret double %0
+}
+
+declare double @llvm.pow.f64(double, double) nounwind readonly
diff --git a/final/test/CodeGen/Thumb/frame_thumb.ll b/final/test/CodeGen/Thumb/frame_thumb.ll
new file mode 100644
index 00000000000..0cac7554be0
--- /dev/null
+++ b/final/test/CodeGen/Thumb/frame_thumb.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin \
+; RUN:     -disable-fp-elim | not grep {r11}
+; RUN: llc < %s -mtriple=thumb-linux-gnueabi \
+; RUN:     -disable-fp-elim | not grep {r11}
+
+define i32 @f() {
+entry:
+	ret i32 10
+}
diff --git a/final/test/CodeGen/Thumb/iabs.ll b/final/test/CodeGen/Thumb/iabs.ll
new file mode 100644
index 00000000000..d7cdcd8149a
--- /dev/null
+++ b/final/test/CodeGen/Thumb/iabs.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=thumb -stats |& \
+; RUN:   grep {4 .*Number of machine instrs printed}
+
+;; Integer absolute value, should produce something as good as:
+;; Thumb:
+;;   asr r2, r0, #31
+;;   add r0, r0, r2
+;;   eor r0, r2
+;;   bx lr
+
+define i32 @test(i32 %a) {
+        %tmp1neg = sub i32 0, %a
+        %b = icmp sgt i32 %a, -1
+        %abs = select i1 %b, i32 %a, i32 %tmp1neg
+        ret i32 %abs
+}
+
diff --git a/final/test/CodeGen/Thumb/inlineasm-imm-thumb.ll b/final/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
new file mode 100644
index 00000000000..5c8a52af59e
--- /dev/null
+++ b/final/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=thumb
+
+; Test Thumb-mode "I" constraint, for ADD immediate.
+define i32 @testI(i32 %x) {
+	%y = call i32 asm "add $0, $1, $2", "=r,r,I"( i32 %x, i32 255 ) nounwind
+	ret i32 %y
+}
+
+; Test Thumb-mode "J" constraint, for negated ADD immediates.
+define void @testJ() {
+	tail call void asm sideeffect ".word $0", "J"( i32 -255 ) nounwind
+	ret void
+}
+
+; Test Thumb-mode "K" constraint, for compatibility with GCC's internal use.
+define void @testK() {
+	tail call void asm sideeffect ".word $0", "K"( i32 65280 ) nounwind
+	ret void
+}
+
+; Test Thumb-mode "L" constraint, for 3-operand ADD immediates.
+define i32 @testL(i32 %x) {
+	%y = call i32 asm "add $0, $1, $2", "=r,r,L"( i32 %x, i32 -7 ) nounwind
+	ret i32 %y
+}
+
+; Test Thumb-mode "M" constraint, for "ADD r = sp + imm".
+define i32 @testM() {
+	%y = call i32 asm "add $0, sp, $1", "=r,M"( i32 1020 ) nounwind
+	ret i32 %y
+}
+
+; Test Thumb-mode "N" constraint, for values between 0 and 31.
+define i32 @testN(i32 %x) {
+	%y = call i32 asm "lsl $0, $1, $2", "=r,r,N"( i32 %x, i32 31 ) nounwind
+	ret i32 %y
+}
+
+; Test Thumb-mode "O" constraint, for "ADD sp = sp + imm".
+define void @testO() {
+	tail call void asm sideeffect "add sp, sp, $0; add sp, sp, $1", "O,O"( i32 -508, i32 508 ) nounwind
+        ret void
+}
diff --git a/final/test/CodeGen/Thumb/ispositive.ll b/final/test/CodeGen/Thumb/ispositive.ll
new file mode 100644
index 00000000000..eac3ef28377
--- /dev/null
+++ b/final/test/CodeGen/Thumb/ispositive.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=thumb | FileCheck %s
+
+define i32 @test1(i32 %X) {
+entry:
+; CHECK: test1:
+; CHECK: lsrs r0, r0, #31
+        icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
+        zext i1 %0 to i32               ; <i32>:1 [#uses=1]
+        ret i32 %1
+}
+
diff --git a/final/test/CodeGen/Thumb/large-stack.ll b/final/test/CodeGen/Thumb/large-stack.ll
new file mode 100644
index 00000000000..fbacabaedc3
--- /dev/null
+++ b/final/test/CodeGen/Thumb/large-stack.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+
+define void @test1() {
+; CHECK: test1:
+; CHECK: sub sp, #256
+; CHECK: add sp, #256
+    %tmp = alloca [ 64 x i32 ] , align 4
+    ret void
+}
+
+define void @test2() {
+; CHECK: test2:
+; CHECK: ldr.n r0, LCPI
+; CHECK: add sp, r0
+; CHECK: subs r4, r7, #4
+; CHECK: mov sp, r4
+    %tmp = alloca [ 4168 x i8 ] , align 4
+    ret void
+}
+
+define i32 @test3() {
+; CHECK: test3:
+; CHECK: ldr.n r2, LCPI
+; CHECK: add sp, r2
+; CHECK: ldr.n r1, LCPI
+; CHECK: add r1, sp
+; CHECK: subs r4, r7, #4
+; CHECK: mov sp, r4
+    %retval = alloca i32, align 4
+    %tmp = alloca i32, align 4
+    %a = alloca [805306369 x i8], align 16
+    store i32 0, i32* %tmp
+    %tmp1 = load i32* %tmp
+    ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/Thumb/ldr_ext.ll b/final/test/CodeGen/Thumb/ldr_ext.ll
new file mode 100644
index 00000000000..9a28124b84c
--- /dev/null
+++ b/final/test/CodeGen/Thumb/ldr_ext.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -march=thumb | FileCheck %s -check-prefix=V5
+; RUN: llc < %s -march=thumb -mattr=+v6 | FileCheck %s -check-prefix=V6
+
+; rdar://7176514
+
+define i32 @test1(i8* %t1) nounwind {
+; V5: ldrb
+
+; V6: ldrb
+    %tmp.u = load i8* %t1
+    %tmp1.s = zext i8 %tmp.u to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test2(i16* %t1) nounwind {
+; V5: ldrh
+
+; V6: ldrh
+    %tmp.u = load i16* %t1
+    %tmp1.s = zext i16 %tmp.u to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test3(i8* %t0) nounwind {
+; V5: ldrb
+; V5: lsls
+; V5: asrs
+
+; V6: ldrb
+; V6: sxtb
+    %tmp.s = load i8* %t0
+    %tmp1.s = sext i8 %tmp.s to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test4(i16* %t0) nounwind {
+; V5: ldrh
+; V5: lsls
+; V5: asrs
+
+; V6: ldrh
+; V6: sxth
+    %tmp.s = load i16* %t0
+    %tmp1.s = sext i16 %tmp.s to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test5() nounwind {
+; V5: movs r0, #0
+; V5: ldrsh
+
+; V6: movs r0, #0
+; V6: ldrsh
+    %tmp.s = load i16* null
+    %tmp1.s = sext i16 %tmp.s to i32
+    ret i32 %tmp1.s
+}
diff --git a/final/test/CodeGen/Thumb/ldr_frame.ll b/final/test/CodeGen/Thumb/ldr_frame.ll
new file mode 100644
index 00000000000..81782cda4a9
--- /dev/null
+++ b/final/test/CodeGen/Thumb/ldr_frame.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=thumb | FileCheck %s
+
+define i32 @f1() {
+; CHECK: f1:
+; CHECK: ldr r0
+	%buf = alloca [32 x i32], align 4
+	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 0
+	%tmp1 = load i32* %tmp
+	ret i32 %tmp1
+}
+
+define i32 @f2() {
+; CHECK: f2:
+; CHECK: mov r0
+; CHECK: ldrb
+	%buf = alloca [32 x i8], align 4
+	%tmp = getelementptr [32 x i8]* %buf, i32 0, i32 0
+	%tmp1 = load i8* %tmp
+        %tmp2 = zext i8 %tmp1 to i32
+	ret i32 %tmp2
+}
+
+define i32 @f3() {
+; CHECK: f3:
+; CHECK: ldr r0
+	%buf = alloca [32 x i32], align 4
+	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 32
+	%tmp1 = load i32* %tmp
+	ret i32 %tmp1
+}
+
+define i32 @f4() {
+; CHECK: f4:
+; CHECK: mov r0
+; CHECK: ldrb
+	%buf = alloca [32 x i8], align 4
+	%tmp = getelementptr [32 x i8]* %buf, i32 0, i32 2
+	%tmp1 = load i8* %tmp
+        %tmp2 = zext i8 %tmp1 to i32
+	ret i32 %tmp2
+}
diff --git a/final/test/CodeGen/Thumb/long-setcc.ll b/final/test/CodeGen/Thumb/long-setcc.ll
new file mode 100644
index 00000000000..8f2d98fc43c
--- /dev/null
+++ b/final/test/CodeGen/Thumb/long-setcc.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=thumb | grep cmp | count 1
+
+
+define i1 @t1(i64 %x) {
+	%B = icmp slt i64 %x, 0
+	ret i1 %B
+}
+
+define i1 @t2(i64 %x) {
+	%tmp = icmp ult i64 %x, 4294967296
+	ret i1 %tmp
+}
+
+define i1 @t3(i32 %x) {
+	%tmp = icmp ugt i32 %x, -1
+	ret i1 %tmp
+}
diff --git a/final/test/CodeGen/Thumb/long.ll b/final/test/CodeGen/Thumb/long.ll
new file mode 100644
index 00000000000..197e19e31b4
--- /dev/null
+++ b/final/test/CodeGen/Thumb/long.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=thumb | \
+; RUN:   grep mvn | count 1
+; RUN: llc < %s -march=thumb | \
+; RUN:   grep adc | count 1
+; RUN: llc < %s -march=thumb | \
+; RUN:   grep sbc | count 1
+; RUN: llc < %s -mtriple=thumb-apple-darwin | grep __muldi3
+
+define i64 @f1() {
+entry:
+        ret i64 0
+}
+
+define i64 @f2() {
+entry:
+        ret i64 1
+}
+
+define i64 @f3() {
+entry:
+        ret i64 2147483647
+}
+
+define i64 @f4() {
+entry:
+        ret i64 2147483648
+}
+
+define i64 @f5() {
+entry:
+        ret i64 9223372036854775807
+}
+
+define i64 @f6(i64 %x, i64 %y) {
+entry:
+        %tmp1 = add i64 %y, 1           ; <i64> [#uses=1]
+        ret i64 %tmp1
+}
+
+define void @f7() {
+entry:
+        %tmp = call i64 @f8( )          ; <i64> [#uses=0]
+        ret void
+}
+
+declare i64 @f8()
+
+define i64 @f9(i64 %a, i64 %b) {
+entry:
+        %tmp = sub i64 %a, %b           ; <i64> [#uses=1]
+        ret i64 %tmp
+}
+
+define i64 @f(i32 %a, i32 %b) {
+entry:
+        %tmp = sext i32 %a to i64               ; <i64> [#uses=1]
+        %tmp1 = sext i32 %b to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        ret i64 %tmp2
+}
+
+define i64 @g(i32 %a, i32 %b) {
+entry:
+        %tmp = zext i32 %a to i64               ; <i64> [#uses=1]
+        %tmp1 = zext i32 %b to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        ret i64 %tmp2
+}
+
+define i64 @f10() {
+entry:
+        %a = alloca i64, align 8                ; <i64*> [#uses=1]
+        %retval = load i64* %a          ; <i64> [#uses=1]
+        ret i64 %retval
+}
+
diff --git a/final/test/CodeGen/Thumb/long_shift.ll b/final/test/CodeGen/Thumb/long_shift.ll
new file mode 100644
index 00000000000..24317141fca
--- /dev/null
+++ b/final/test/CodeGen/Thumb/long_shift.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=thumb
+
+define i64 @f0(i64 %A, i64 %B) {
+        %tmp = bitcast i64 %A to i64
+        %tmp2 = lshr i64 %B, 1
+        %tmp3 = sub i64 %tmp, %tmp2
+        ret i64 %tmp3
+}
+
+define i32 @f1(i64 %x, i64 %y) {
+        %a = shl i64 %x, %y
+        %b = trunc i64 %a to i32
+        ret i32 %b
+}
+
+define i32 @f2(i64 %x, i64 %y) {
+        %a = ashr i64 %x, %y
+        %b = trunc i64 %a to i32
+        ret i32 %b
+}
+
+define i32 @f3(i64 %x, i64 %y) {
+        %a = lshr i64 %x, %y
+        %b = trunc i64 %a to i32
+        ret i32 %b
+}
diff --git a/final/test/CodeGen/Thumb/mul.ll b/final/test/CodeGen/Thumb/mul.ll
new file mode 100644
index 00000000000..c1a2fb29477
--- /dev/null
+++ b/final/test/CodeGen/Thumb/mul.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=thumb | grep mul | count 3
+; RUN: llc < %s -march=thumb | grep lsl | count 1
+
+define i32 @f1(i32 %u) {
+    %tmp = mul i32 %u, %u
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %u, i32 %v) {
+    %tmp = mul i32 %u, %v
+    ret i32 %tmp
+}
+
+define i32 @f3(i32 %u) {
+    %tmp = mul i32 %u, 5
+    ret i32 %tmp
+}
+
+define i32 @f4(i32 %u) {
+    %tmp = mul i32 %u, 4
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/Thumb/pop.ll b/final/test/CodeGen/Thumb/pop.ll
new file mode 100644
index 00000000000..63f2feb765f
--- /dev/null
+++ b/final/test/CodeGen/Thumb/pop.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; rdar://7268481
+
+define void @t(i8* %a, ...) nounwind {
+; CHECK:      t:
+; CHECK:      pop {r3}
+; CHECK-NEXT: add sp, #12
+; CHECK-NEXT: bx r3
+entry:
+  %a.addr = alloca i8*
+  store i8* %a, i8** %a.addr
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb/push.ll b/final/test/CodeGen/Thumb/push.ll
new file mode 100644
index 00000000000..94ef8e90043
--- /dev/null
+++ b/final/test/CodeGen/Thumb/push.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -disable-fp-elim | FileCheck %s
+; rdar://7268481
+
+define void @t() nounwind {
+; CHECK: t:
+; CHECK: push {r7}
+entry:
+  call void asm sideeffect alignstack ".long 0xe7ffdefe", ""() nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb/select.ll b/final/test/CodeGen/Thumb/select.ll
new file mode 100644
index 00000000000..780e5fac02b
--- /dev/null
+++ b/final/test/CodeGen/Thumb/select.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=thumb | grep beq | count 1
+; RUN: llc < %s -march=thumb | grep bgt | count 1
+; RUN: llc < %s -march=thumb | grep blt | count 3
+; RUN: llc < %s -march=thumb | grep ble | count 1
+; RUN: llc < %s -march=thumb | grep bls | count 1
+; RUN: llc < %s -march=thumb | grep bhi | count 1
+; RUN: llc < %s -mtriple=thumb-apple-darwin | grep __ltdf2
+
+define i32 @f1(i32 %a.s) {
+entry:
+    %tmp = icmp eq i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f2(i32 %a.s) {
+entry:
+    %tmp = icmp sgt i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f3(i32 %a.s, i32 %b.s) {
+entry:
+    %tmp = icmp slt i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f4(i32 %a.s, i32 %b.s) {
+entry:
+    %tmp = icmp sle i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f5(i32 %a.u, i32 %b.u) {
+entry:
+    %tmp = icmp ule i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f6(i32 %a.u, i32 %b.u) {
+entry:
+    %tmp = icmp ugt i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define double @f7(double %a, double %b) {
+    %tmp = fcmp olt double %a, 1.234e+00
+    %tmp1 = select i1 %tmp, double -1.000e+00, double %b
+    ret double %tmp1
+}
diff --git a/final/test/CodeGen/Thumb/stack-frame.ll b/final/test/CodeGen/Thumb/stack-frame.ll
new file mode 100644
index 00000000000..b103b331b79
--- /dev/null
+++ b/final/test/CodeGen/Thumb/stack-frame.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=thumb
+; RUN: llc < %s -march=thumb | grep add | count 1
+
+define void @f1() {
+	%c = alloca i8, align 1
+	ret void
+}
+
+define i32 @f2() {
+	ret i32 1
+}
+
+
diff --git a/final/test/CodeGen/Thumb/thumb-imm.ll b/final/test/CodeGen/Thumb/thumb-imm.ll
new file mode 100644
index 00000000000..74a57ff271b
--- /dev/null
+++ b/final/test/CodeGen/Thumb/thumb-imm.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=thumb | not grep CPI
+
+
+define i32 @test1() {
+  ret i32 1000
+}
+
+define i32 @test2() {
+  ret i32 -256
+}
diff --git a/final/test/CodeGen/Thumb/trap.ll b/final/test/CodeGen/Thumb/trap.ll
new file mode 100644
index 00000000000..04cd3eed0fc
--- /dev/null
+++ b/final/test/CodeGen/Thumb/trap.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=thumb | FileCheck %s
+; rdar://7961298
+
+define void @t() nounwind {
+entry:
+; CHECK: t:
+; CHECK: trap
+  call void @llvm.trap()
+  unreachable
+}
+
+declare void @llvm.trap() nounwind
diff --git a/final/test/CodeGen/Thumb/tst_teq.ll b/final/test/CodeGen/Thumb/tst_teq.ll
new file mode 100644
index 00000000000..21ada3ed83a
--- /dev/null
+++ b/final/test/CodeGen/Thumb/tst_teq.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=thumb | grep tst
+
+define i32 @f(i32 %a) {
+entry:
+	%tmp2 = and i32 %a, 255		; <i32> [#uses=1]
+	icmp eq i32 %tmp2, 0		; <i1>:0 [#uses=1]
+	%retval = select i1 %0, i32 20, i32 10		; <i32> [#uses=1]
+	ret i32 %retval
+}
+
+define i32 @g(i32 %a) {
+entry:
+        %tmp2 = xor i32 %a, 255
+	icmp eq i32 %tmp2, 0		; <i1>:0 [#uses=1]
+	%retval = select i1 %0, i32 20, i32 10		; <i32> [#uses=1]
+	ret i32 %retval
+}
diff --git a/final/test/CodeGen/Thumb/unord.ll b/final/test/CodeGen/Thumb/unord.ll
new file mode 100644
index 00000000000..39458ae7b7b
--- /dev/null
+++ b/final/test/CodeGen/Thumb/unord.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=thumb | grep bne | count 1
+; RUN: llc < %s -march=thumb | grep beq | count 1
+
+define i32 @f1(float %X, float %Y) {
+	%tmp = fcmp uno float %X, %Y
+	%retval = select i1 %tmp, i32 1, i32 -1
+	ret i32 %retval
+}
+
+define i32 @f2(float %X, float %Y) {
+	%tmp = fcmp ord float %X, %Y
+	%retval = select i1 %tmp, i32 1, i32 -1
+	ret i32 %retval
+}
diff --git a/final/test/CodeGen/Thumb/vargs.ll b/final/test/CodeGen/Thumb/vargs.ll
new file mode 100644
index 00000000000..c2ba208e4ae
--- /dev/null
+++ b/final/test/CodeGen/Thumb/vargs.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=thumb
+; RUN: llc < %s -mtriple=thumb-linux | grep pop | count 2
+; RUN: llc < %s -mtriple=thumb-darwin | grep pop | count 2
+
+@str = internal constant [4 x i8] c"%d\0A\00"           ; <[4 x i8]*> [#uses=1]
+
+define void @f(i32 %a, ...) {
+entry:
+        %va = alloca i8*, align 4               ; <i8**> [#uses=4]
+        %va.upgrd.1 = bitcast i8** %va to i8*           ; <i8*> [#uses=1]
+        call void @llvm.va_start( i8* %va.upgrd.1 )
+        br label %bb
+
+bb:             ; preds = %bb, %entry
+        %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp5, %bb ]              ; <i32> [#uses=2]
+        %tmp = volatile load i8** %va           ; <i8*> [#uses=2]
+        %tmp2 = getelementptr i8* %tmp, i32 4           ; <i8*> [#uses=1]
+        volatile store i8* %tmp2, i8** %va
+        %tmp5 = add i32 %a_addr.0, -1           ; <i32> [#uses=1]
+        %tmp.upgrd.2 = icmp eq i32 %a_addr.0, 1         ; <i1> [#uses=1]
+        br i1 %tmp.upgrd.2, label %bb7, label %bb
+
+bb7:            ; preds = %bb
+        %tmp3 = bitcast i8* %tmp to i32*                ; <i32*> [#uses=1]
+        %tmp.upgrd.3 = load i32* %tmp3          ; <i32> [#uses=1]
+        %tmp10 = call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @str, i32 0, i64 0), i32 %tmp.upgrd.3 )                ; <i32> [#uses=0]
+        %va.upgrd.4 = bitcast i8** %va to i8*           ; <i8*> [#uses=1]
+        call void @llvm.va_end( i8* %va.upgrd.4 )
+        ret void
+}
+
+declare void @llvm.va_start(i8*)
+
+declare i32 @printf(i8*, ...)
+
+declare void @llvm.va_end(i8*)
diff --git a/final/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll b/final/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll
new file mode 100644
index 00000000000..76ffe2a18f1
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv6t2-elf"
+	%struct.dwarf_cie = type <{ i32, i32, i8, [0 x i8], [3 x i8] }>
+
+declare i8* @read_sleb128(i8*, i32* nocapture) nounwind
+
+define i32 @get_cie_encoding(%struct.dwarf_cie* %cie) nounwind {
+entry:
+	br i1 undef, label %bb1, label %bb13
+
+bb1:		; preds = %entry
+	%tmp38 = add i32 undef, 10		; <i32> [#uses=1]
+	br label %bb.i
+
+bb.i:		; preds = %bb.i, %bb1
+	%indvar.i = phi i32 [ 0, %bb1 ], [ %2, %bb.i ]		; <i32> [#uses=3]
+	%tmp39 = add i32 %indvar.i, %tmp38		; <i32> [#uses=1]
+	%p_addr.0.i = getelementptr i8* undef, i32 %tmp39		; <i8*> [#uses=1]
+	%0 = load i8* %p_addr.0.i, align 1		; <i8> [#uses=1]
+	%1 = icmp slt i8 %0, 0		; <i1> [#uses=1]
+	%2 = add i32 %indvar.i, 1		; <i32> [#uses=1]
+	br i1 %1, label %bb.i, label %read_uleb128.exit
+
+read_uleb128.exit:		; preds = %bb.i
+	%.sum40 = add i32 %indvar.i, undef		; <i32> [#uses=1]
+	%.sum31 = add i32 %.sum40, 2		; <i32> [#uses=1]
+	%scevgep.i = getelementptr %struct.dwarf_cie* %cie, i32 0, i32 3, i32 %.sum31		; <i8*> [#uses=1]
+	%3 = call  i8* @read_sleb128(i8* %scevgep.i, i32* undef)		; <i8*> [#uses=0]
+	unreachable
+
+bb13:		; preds = %entry
+	ret i32 0
+}
diff --git a/final/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll b/final/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
new file mode 100644
index 00000000000..4e1394ff273
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-07-21-ISelBug.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+vfp2,+thumb2 | FileCheck %s
+; rdar://7076238
+
+@"\01LC" = external constant [36 x i8], align 1		; <[36 x i8]*> [#uses=1]
+
+define i32 @t(i32, ...) nounwind {
+entry:
+; CHECK: t:
+; CHECK: add r7, sp, #12
+	%1 = load i8** undef, align 4		; <i8*> [#uses=3]
+	%2 = getelementptr i8* %1, i32 4		; <i8*> [#uses=1]
+	%3 = getelementptr i8* %1, i32 8		; <i8*> [#uses=1]
+	%4 = bitcast i8* %2 to i32*		; <i32*> [#uses=1]
+	%5 = load i32* %4, align 4		; <i32> [#uses=1]
+	%6 = trunc i32 %5 to i8		; <i8> [#uses=1]
+	%7 = getelementptr i8* %1, i32 12		; <i8*> [#uses=1]
+	%8 = bitcast i8* %3 to i32*		; <i32*> [#uses=1]
+	%9 = load i32* %8, align 4		; <i32> [#uses=1]
+	%10 = trunc i32 %9 to i16		; <i16> [#uses=1]
+	%11 = bitcast i8* %7 to i32*		; <i32*> [#uses=1]
+	%12 = load i32* %11, align 4		; <i32> [#uses=1]
+	%13 = trunc i32 %12 to i16		; <i16> [#uses=1]
+	%14 = load i32* undef, align 4		; <i32> [#uses=2]
+	%15 = sext i8 %6 to i32		; <i32> [#uses=2]
+	%16 = sext i16 %10 to i32		; <i32> [#uses=2]
+	%17 = sext i16 %13 to i32		; <i32> [#uses=2]
+	%18 = call  i32 (i8*, ...)* @printf(i8* getelementptr ([36 x i8]* @"\01LC", i32 0, i32 0), i32 -128, i32 0, i32 %15, i32 %16, i32 %17, i32 0, i32 %14) nounwind		; <i32> [#uses=0]
+	%19 = add i32 0, %15		; <i32> [#uses=1]
+	%20 = add i32 %19, %16		; <i32> [#uses=1]
+	%21 = add i32 %20, %14		; <i32> [#uses=1]
+	%22 = add i32 %21, %17		; <i32> [#uses=1]
+	%23 = add i32 %22, 0		; <i32> [#uses=1]
+	ret i32 %23
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/final/test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll b/final/test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll
new file mode 100644
index 00000000000..43573662d9a
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mattr=+vfp2,+thumb2
+; rdar://7083961
+
+define i32 @value(i64 %b1, i64 %b2) nounwind readonly {
+entry:
+	%0 = icmp eq i32 undef, 0		; <i1> [#uses=1]
+	%mod.0.ph.ph = select i1 %0, float -1.000000e+00, float 1.000000e+00		; <float> [#uses=1]
+	br label %bb7
+
+bb7:		; preds = %bb7, %entry
+	br i1 undef, label %bb86.preheader, label %bb7
+
+bb86.preheader:		; preds = %bb7
+	%1 = fmul float %mod.0.ph.ph, 5.000000e+00		; <float> [#uses=0]
+	br label %bb79
+
+bb79:		; preds = %bb79, %bb86.preheader
+	br i1 undef, label %bb119, label %bb79
+
+bb119:		; preds = %bb79
+	ret i32 undef
+}
diff --git a/final/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll b/final/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll
new file mode 100644
index 00000000000..3e076189892
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-07-30-PEICrash.ll
@@ -0,0 +1,193 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.JHUFF_TBL = type { [17 x i8], [256 x i8], i32 }
+	%struct.JQUANT_TBL = type { [64 x i16], i32 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.anon = type { [8 x i32], [48 x i8] }
+	%struct.backing_store_info = type { void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*)*, %struct.FILE*, [64 x i8] }
+	%struct.jpeg_color_deconverter = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32, i8**, i32)* }
+	%struct.jpeg_color_quantizer = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i8**, i32)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_common_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32 }
+	%struct.jpeg_component_info = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.JQUANT_TBL*, i8* }
+	%struct.jpeg_d_coef_controller = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, i8***)*, %struct.jvirt_barray_control** }
+	%struct.jpeg_d_main_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i32*, i32)* }
+	%struct.jpeg_d_post_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)* }
+	%struct.jpeg_decomp_master = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 }
+	%struct.jpeg_decompress_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32, %struct.jpeg_source_mgr*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %struct.JQUANT_TBL*], [4 x %struct.JHUFF_TBL*], [4 x %struct.JHUFF_TBL*], i32, %struct.jpeg_component_info*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i16, i16, i32, i8, i32, i32, i32, i32, i32, i8*, i32, [4 x %struct.jpeg_component_info*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %struct.jpeg_decomp_master*, %struct.jpeg_d_main_controller*, %struct.jpeg_d_coef_controller*, %struct.jpeg_d_post_controller*, %struct.jpeg_input_controller*, %struct.jpeg_marker_reader*, %struct.jpeg_entropy_decoder*, %struct.jpeg_inverse_dct*, %struct.jpeg_upsampler*, %struct.jpeg_color_deconverter*, %struct.jpeg_color_quantizer* }
+	%struct.jpeg_entropy_decoder = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, [64 x i16]**)* }
+	%struct.jpeg_error_mgr = type { void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i8*)*, void (%struct.jpeg_common_struct*)*, i32, %struct.anon, i32, i32, i8**, i32, i8**, i32, i32 }
+	%struct.jpeg_input_controller = type { i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32, i32 }
+	%struct.jpeg_inverse_dct = type { void (%struct.jpeg_decompress_struct*)*, [10 x void (%struct.jpeg_decompress_struct*, %struct.jpeg_component_info*, i16*, i8**, i32)*] }
+	%struct.jpeg_marker_reader = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, [16 x i32 (%struct.jpeg_decompress_struct*)*], i32, i32, i32, i32 }
+	%struct.jpeg_memory_mgr = type { i8* (%struct.jpeg_common_struct*, i32, i32)*, i8* (%struct.jpeg_common_struct*, i32, i32)*, i8** (%struct.jpeg_common_struct*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, i32, i32, i32)*, %struct.jvirt_sarray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, %struct.jvirt_barray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, void (%struct.jpeg_common_struct*)*, i8** (%struct.jpeg_common_struct*, %struct.jvirt_sarray_control*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, %struct.jvirt_barray_control*, i32, i32, i32)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, i32 }
+	%struct.jpeg_progress_mgr = type { void (%struct.jpeg_common_struct*)*, i32, i32, i32, i32 }
+	%struct.jpeg_source_mgr = type { i8*, i32, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i32)*, i32 (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_upsampler = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
+	%struct.jvirt_barray_control = type { [64 x i16]**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_barray_control*, %struct.backing_store_info }
+	%struct.jvirt_sarray_control = type { i8**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_sarray_control*, %struct.backing_store_info }
+
+define void @jpeg_idct_float(%struct.jpeg_decompress_struct* nocapture %cinfo, %struct.jpeg_component_info* nocapture %compptr, i16* nocapture %coef_block, i8** nocapture %output_buf, i32 %output_col) nounwind {
+entry:
+	%workspace = alloca [64 x float], align 4		; <[64 x float]*> [#uses=11]
+	%0 = load i8** undef, align 4		; <i8*> [#uses=5]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=11]
+	%tmp39 = add i32 %indvar, 8		; <i32> [#uses=0]
+	%tmp41 = add i32 %indvar, 16		; <i32> [#uses=2]
+	%scevgep42 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp41		; <float*> [#uses=1]
+	%tmp43 = add i32 %indvar, 24		; <i32> [#uses=1]
+	%scevgep44 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp43		; <float*> [#uses=1]
+	%tmp45 = add i32 %indvar, 32		; <i32> [#uses=1]
+	%scevgep46 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp45		; <float*> [#uses=1]
+	%tmp47 = add i32 %indvar, 40		; <i32> [#uses=1]
+	%scevgep48 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp47		; <float*> [#uses=1]
+	%tmp49 = add i32 %indvar, 48		; <i32> [#uses=1]
+	%scevgep50 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp49		; <float*> [#uses=1]
+	%tmp51 = add i32 %indvar, 56		; <i32> [#uses=1]
+	%scevgep52 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp51		; <float*> [#uses=1]
+	%wsptr.119 = getelementptr [64 x float]* %workspace, i32 0, i32 %indvar		; <float*> [#uses=1]
+	%tmp54 = shl i32 %indvar, 2		; <i32> [#uses=1]
+	%scevgep76 = getelementptr i8* undef, i32 %tmp54		; <i8*> [#uses=1]
+	%quantptr.118 = bitcast i8* %scevgep76 to float*		; <float*> [#uses=1]
+	%scevgep79 = getelementptr i16* %coef_block, i32 %tmp41		; <i16*> [#uses=0]
+	%inptr.117 = getelementptr i16* %coef_block, i32 %indvar		; <i16*> [#uses=1]
+	%1 = load i16* null, align 2		; <i16> [#uses=1]
+	%2 = load i16* undef, align 2		; <i16> [#uses=1]
+	%3 = load i16* %inptr.117, align 2		; <i16> [#uses=1]
+	%4 = sitofp i16 %3 to float		; <float> [#uses=1]
+	%5 = load float* %quantptr.118, align 4		; <float> [#uses=1]
+	%6 = fmul float %4, %5		; <float> [#uses=1]
+	%7 = fsub float %6, undef		; <float> [#uses=2]
+	%8 = fmul float undef, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%9 = fsub float %8, 0.000000e+00		; <float> [#uses=2]
+	%10 = fadd float undef, 0.000000e+00		; <float> [#uses=2]
+	%11 = fadd float %7, %9		; <float> [#uses=2]
+	%12 = fsub float %7, %9		; <float> [#uses=2]
+	%13 = sitofp i16 %1 to float		; <float> [#uses=1]
+	%14 = fmul float %13, undef		; <float> [#uses=2]
+	%15 = sitofp i16 %2 to float		; <float> [#uses=1]
+	%16 = load float* undef, align 4		; <float> [#uses=1]
+	%17 = fmul float %15, %16		; <float> [#uses=1]
+	%18 = fadd float %14, undef		; <float> [#uses=2]
+	%19 = fsub float %14, undef		; <float> [#uses=2]
+	%20 = fadd float undef, %17		; <float> [#uses=2]
+	%21 = fadd float %20, %18		; <float> [#uses=3]
+	%22 = fsub float %20, %18		; <float> [#uses=1]
+	%23 = fmul float %22, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%24 = fadd float %19, undef		; <float> [#uses=1]
+	%25 = fmul float %24, 0x3FFD906BC0000000		; <float> [#uses=2]
+	%26 = fmul float undef, 0x3FF1517A80000000		; <float> [#uses=1]
+	%27 = fsub float %26, %25		; <float> [#uses=1]
+	%28 = fmul float %19, 0xC004E7AEA0000000		; <float> [#uses=1]
+	%29 = fadd float %28, %25		; <float> [#uses=1]
+	%30 = fsub float %29, %21		; <float> [#uses=3]
+	%31 = fsub float %23, %30		; <float> [#uses=3]
+	%32 = fadd float %27, %31		; <float> [#uses=1]
+	%33 = fadd float %10, %21		; <float> [#uses=1]
+	store float %33, float* %wsptr.119, align 4
+	%34 = fsub float %10, %21		; <float> [#uses=1]
+	store float %34, float* %scevgep52, align 4
+	%35 = fadd float %11, %30		; <float> [#uses=1]
+	store float %35, float* null, align 4
+	%36 = fsub float %11, %30		; <float> [#uses=1]
+	store float %36, float* %scevgep50, align 4
+	%37 = fadd float %12, %31		; <float> [#uses=1]
+	store float %37, float* %scevgep42, align 4
+	%38 = fsub float %12, %31		; <float> [#uses=1]
+	store float %38, float* %scevgep48, align 4
+	%39 = fadd float undef, %32		; <float> [#uses=1]
+	store float %39, float* %scevgep46, align 4
+	store float undef, float* %scevgep44, align 4
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb6, label %bb
+
+bb6:		; preds = %bb
+	%.sum10 = add i32 %output_col, 1		; <i32> [#uses=1]
+	%.sum8 = add i32 %output_col, 6		; <i32> [#uses=1]
+	%.sum6 = add i32 %output_col, 2		; <i32> [#uses=1]
+	%.sum = add i32 %output_col, 3		; <i32> [#uses=1]
+	br label %bb8
+
+bb8:		; preds = %bb8, %bb6
+	%ctr.116 = phi i32 [ 0, %bb6 ], [ %88, %bb8 ]		; <i32> [#uses=3]
+	%scevgep = getelementptr i8** %output_buf, i32 %ctr.116		; <i8**> [#uses=1]
+	%tmp = shl i32 %ctr.116, 3		; <i32> [#uses=5]
+	%tmp2392 = or i32 %tmp, 4		; <i32> [#uses=1]
+	%scevgep24 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2392		; <float*> [#uses=1]
+	%tmp2591 = or i32 %tmp, 2		; <i32> [#uses=1]
+	%scevgep26 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2591		; <float*> [#uses=1]
+	%tmp2790 = or i32 %tmp, 6		; <i32> [#uses=1]
+	%scevgep28 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp2790		; <float*> [#uses=1]
+	%tmp3586 = or i32 %tmp, 7		; <i32> [#uses=0]
+	%wsptr.215 = getelementptr [64 x float]* %workspace, i32 0, i32 %tmp		; <float*> [#uses=1]
+	%40 = load i8** %scevgep, align 4		; <i8*> [#uses=4]
+	%41 = load float* %wsptr.215, align 4		; <float> [#uses=1]
+	%42 = load float* %scevgep24, align 4		; <float> [#uses=1]
+	%43 = fadd float %41, %42		; <float> [#uses=1]
+	%44 = load float* %scevgep26, align 4		; <float> [#uses=1]
+	%45 = load float* %scevgep28, align 4		; <float> [#uses=1]
+	%46 = fadd float %44, %45		; <float> [#uses=1]
+	%47 = fsub float %43, %46		; <float> [#uses=2]
+	%48 = fsub float undef, 0.000000e+00		; <float> [#uses=1]
+	%49 = fadd float 0.000000e+00, undef		; <float> [#uses=1]
+	%50 = fptosi float %49 to i32		; <i32> [#uses=1]
+	%51 = add i32 %50, 4		; <i32> [#uses=1]
+	%52 = lshr i32 %51, 3		; <i32> [#uses=1]
+	%53 = and i32 %52, 1023		; <i32> [#uses=1]
+	%.sum14 = add i32 %53, 128		; <i32> [#uses=1]
+	%54 = getelementptr i8* %0, i32 %.sum14		; <i8*> [#uses=1]
+	%55 = load i8* %54, align 1		; <i8> [#uses=1]
+	store i8 %55, i8* null, align 1
+	%56 = getelementptr i8* %40, i32 %.sum10		; <i8*> [#uses=1]
+	store i8 0, i8* %56, align 1
+	%57 = load i8* null, align 1		; <i8> [#uses=1]
+	%58 = getelementptr i8* %40, i32 %.sum8		; <i8*> [#uses=1]
+	store i8 %57, i8* %58, align 1
+	%59 = fadd float undef, %48		; <float> [#uses=1]
+	%60 = fptosi float %59 to i32		; <i32> [#uses=1]
+	%61 = add i32 %60, 4		; <i32> [#uses=1]
+	%62 = lshr i32 %61, 3		; <i32> [#uses=1]
+	%63 = and i32 %62, 1023		; <i32> [#uses=1]
+	%.sum7 = add i32 %63, 128		; <i32> [#uses=1]
+	%64 = getelementptr i8* %0, i32 %.sum7		; <i8*> [#uses=1]
+	%65 = load i8* %64, align 1		; <i8> [#uses=1]
+	%66 = getelementptr i8* %40, i32 %.sum6		; <i8*> [#uses=1]
+	store i8 %65, i8* %66, align 1
+	%67 = fptosi float undef to i32		; <i32> [#uses=1]
+	%68 = add i32 %67, 4		; <i32> [#uses=1]
+	%69 = lshr i32 %68, 3		; <i32> [#uses=1]
+	%70 = and i32 %69, 1023		; <i32> [#uses=1]
+	%.sum5 = add i32 %70, 128		; <i32> [#uses=1]
+	%71 = getelementptr i8* %0, i32 %.sum5		; <i8*> [#uses=1]
+	%72 = load i8* %71, align 1		; <i8> [#uses=1]
+	store i8 %72, i8* undef, align 1
+	%73 = fadd float %47, undef		; <float> [#uses=1]
+	%74 = fptosi float %73 to i32		; <i32> [#uses=1]
+	%75 = add i32 %74, 4		; <i32> [#uses=1]
+	%76 = lshr i32 %75, 3		; <i32> [#uses=1]
+	%77 = and i32 %76, 1023		; <i32> [#uses=1]
+	%.sum3 = add i32 %77, 128		; <i32> [#uses=1]
+	%78 = getelementptr i8* %0, i32 %.sum3		; <i8*> [#uses=1]
+	%79 = load i8* %78, align 1		; <i8> [#uses=1]
+	store i8 %79, i8* undef, align 1
+	%80 = fsub float %47, undef		; <float> [#uses=1]
+	%81 = fptosi float %80 to i32		; <i32> [#uses=1]
+	%82 = add i32 %81, 4		; <i32> [#uses=1]
+	%83 = lshr i32 %82, 3		; <i32> [#uses=1]
+	%84 = and i32 %83, 1023		; <i32> [#uses=1]
+	%.sum1 = add i32 %84, 128		; <i32> [#uses=1]
+	%85 = getelementptr i8* %0, i32 %.sum1		; <i8*> [#uses=1]
+	%86 = load i8* %85, align 1		; <i8> [#uses=1]
+	%87 = getelementptr i8* %40, i32 %.sum		; <i8*> [#uses=1]
+	store i8 %86, i8* %87, align 1
+	%88 = add i32 %ctr.116, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %88, 8		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb8
+
+return:		; preds = %bb8
+	ret void
+}
diff --git a/final/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll b/final/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
new file mode 100644
index 00000000000..095aecce9e5
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
+
+@csize = external global [100 x [20 x [4 x i8]]]		; <[100 x [20 x [4 x i8]]]*> [#uses=1]
+@vsize = external global [100 x [20 x [4 x i8]]]		; <[100 x [20 x [4 x i8]]]*> [#uses=1]
+@cll = external global [20 x [10 x i8]]		; <[20 x [10 x i8]]*> [#uses=1]
+@lefline = external global [100 x [20 x i32]]		; <[100 x [20 x i32]]*> [#uses=1]
+@sep = external global [20 x i32]		; <[20 x i32]*> [#uses=1]
+
+define void @main(i32 %argc, i8** %argv) noreturn nounwind {
+; CHECK: main:
+; CHECK: ldrb
+entry:
+	%nb.i.i.i = alloca [25 x i8], align 1		; <[25 x i8]*> [#uses=0]
+	%line.i.i.i = alloca [200 x i8], align 1		; <[200 x i8]*> [#uses=1]
+	%line.i = alloca [1024 x i8], align 1		; <[1024 x i8]*> [#uses=0]
+	br i1 undef, label %bb.i.i, label %bb4.preheader.i
+
+bb.i.i:		; preds = %entry
+	unreachable
+
+bb4.preheader.i:		; preds = %entry
+	br i1 undef, label %tbl.exit, label %bb.i.preheader
+
+bb.i.preheader:		; preds = %bb4.preheader.i
+	%line3.i.i.i = getelementptr [200 x i8]* %line.i.i.i, i32 0, i32 0		; <i8*> [#uses=1]
+	br label %bb.i
+
+bb.i:		; preds = %bb4.backedge.i, %bb.i.preheader
+	br i1 undef, label %bb3.i, label %bb4.backedge.i
+
+bb3.i:		; preds = %bb.i
+	br i1 undef, label %bb2.i184.i.i, label %bb.i183.i.i
+
+bb.i183.i.i:		; preds = %bb.i183.i.i, %bb3.i
+	br i1 undef, label %bb2.i184.i.i, label %bb.i183.i.i
+
+bb2.i184.i.i:		; preds = %bb.i183.i.i, %bb3.i
+	br i1 undef, label %bb5.i185.i.i, label %bb35.preheader.i.i.i
+
+bb35.preheader.i.i.i:		; preds = %bb2.i184.i.i
+	%0 = load i8* %line3.i.i.i, align 1		; <i8> [#uses=1]
+	%1 = icmp eq i8 %0, 59		; <i1> [#uses=1]
+	br i1 %1, label %bb36.i.i.i, label %bb9.i186.i.i
+
+bb5.i185.i.i:		; preds = %bb2.i184.i.i
+	br label %bb.i171.i.i
+
+bb9.i186.i.i:		; preds = %bb35.preheader.i.i.i
+	unreachable
+
+bb36.i.i.i:		; preds = %bb35.preheader.i.i.i
+	br label %bb.i171.i.i
+
+bb.i171.i.i:		; preds = %bb3.i176.i.i, %bb36.i.i.i, %bb5.i185.i.i
+	%2 = phi i32 [ %4, %bb3.i176.i.i ], [ 0, %bb36.i.i.i ], [ 0, %bb5.i185.i.i ]		; <i32> [#uses=6]
+	%scevgep16.i.i.i = getelementptr [20 x i32]* @sep, i32 0, i32 %2		; <i32*> [#uses=1]
+	%scevgep18.i.i.i = getelementptr [20 x [10 x i8]]* @cll, i32 0, i32 %2, i32 0		; <i8*> [#uses=0]
+	store i32 -1, i32* %scevgep16.i.i.i, align 4
+	br label %bb1.i175.i.i
+
+bb1.i175.i.i:		; preds = %bb1.i175.i.i, %bb.i171.i.i
+	%i.03.i172.i.i = phi i32 [ 0, %bb.i171.i.i ], [ %3, %bb1.i175.i.i ]		; <i32> [#uses=4]
+	%scevgep11.i.i.i = getelementptr [100 x [20 x i32]]* @lefline, i32 0, i32 %i.03.i172.i.i, i32 %2		; <i32*> [#uses=1]
+	%scevgep12.i.i.i = getelementptr [100 x [20 x [4 x i8]]]* @vsize, i32 0, i32 %i.03.i172.i.i, i32 %2, i32 0		; <i8*> [#uses=1]
+	%scevgep13.i.i.i = getelementptr [100 x [20 x [4 x i8]]]* @csize, i32 0, i32 %i.03.i172.i.i, i32 %2, i32 0		; <i8*> [#uses=0]
+	store i8 0, i8* %scevgep12.i.i.i, align 1
+	store i32 0, i32* %scevgep11.i.i.i, align 4
+	store i32 108, i32* undef, align 4
+	%3 = add i32 %i.03.i172.i.i, 1		; <i32> [#uses=2]
+	%exitcond.i174.i.i = icmp eq i32 %3, 100		; <i1> [#uses=1]
+	br i1 %exitcond.i174.i.i, label %bb3.i176.i.i, label %bb1.i175.i.i
+
+bb3.i176.i.i:		; preds = %bb1.i175.i.i
+	%4 = add i32 %2, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb5.i177.i.i, label %bb.i171.i.i
+
+bb5.i177.i.i:		; preds = %bb3.i176.i.i
+	unreachable
+
+bb4.backedge.i:		; preds = %bb.i
+	br i1 undef, label %tbl.exit, label %bb.i
+
+tbl.exit:		; preds = %bb4.backedge.i, %bb4.preheader.i
+	unreachable
+}
diff --git a/final/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll b/final/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
new file mode 100644
index 00000000000..41b30291baa
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim
+
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16)*, i32 }		; type %0
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*)*, i32 }		; type %1
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*, %"struct.xalanc_1_8::XalanDOMString"*)*, i32 }		; type %2
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32, i32)*, i32 }		; type %3
+	type { void (%"struct.xalanc_1_8::FormatterToXML"*)*, i32 }		; type %4
+	%"struct.std::CharVectorType" = type { %"struct.std::_Vector_base<char,std::allocator<char> >" }
+	%"struct.std::_Bit_const_iterator" = type { %"struct.std::_Bit_iterator_base" }
+	%"struct.std::_Bit_iterator_base" = type { i32*, i32 }
+	%"struct.std::_Bvector_base<std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" }
+	%"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" = type { %"struct.std::_Bit_const_iterator", %"struct.std::_Bit_const_iterator", i32* }
+	%"struct.std::_Vector_base<char,std::allocator<char> >" = type { %"struct.std::_Vector_base<char,std::allocator<char> >::_Vector_impl" }
+	%"struct.std::_Vector_base<char,std::allocator<char> >::_Vector_impl" = type { i8*, i8*, i8* }
+	%"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >" = type { %"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >::_Vector_impl" }
+	%"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >::_Vector_impl" = type { i16*, i16*, i16* }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >.base" = type { i32 (...)** }
+	%"struct.std::vector<bool,std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >" }
+	%"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >" = type { %"struct.std::_Vector_base<short unsigned int,std::allocator<short unsigned int> >" }
+	%"struct.xalanc_1_8::FormatterListener" = type { %"struct.std::basic_ostream<char,std::char_traits<char> >.base", %"struct.std::basic_ostream<char,std::char_traits<char> >.base"*, i32 }
+	%"struct.xalanc_1_8::FormatterToXML" = type { %"struct.xalanc_1_8::FormatterListener", %"struct.std::basic_ostream<char,std::char_traits<char> >.base"*, %"struct.xalanc_1_8::XalanOutputStream"*, i16, [256 x i16], [256 x i16], i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", i32, i32, %"struct.std::vector<bool,std::allocator<bool> >", %"struct.xalanc_1_8::XalanDOMString", i8, i8, i8, i8, i8, %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.xalanc_1_8::XalanDOMString", %"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >", i32, %"struct.std::CharVectorType", %"struct.std::vector<bool,std::allocator<bool> >", %0, %1, %2, %3, %0, %1, %2, %3, %4, i16*, i32 }
+	%"struct.xalanc_1_8::XalanDOMString" = type { %"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >", i32 }
+	%"struct.xalanc_1_8::XalanOutputStream" = type { i32 (...)**, i32, %"struct.std::basic_ostream<char,std::char_traits<char> >.base"*, i32, %"struct.std::vector<short unsigned int,std::allocator<short unsigned int> >", %"struct.xalanc_1_8::XalanDOMString", i8, i8, %"struct.std::CharVectorType" }
+
+declare void @_ZN10xalanc_1_814FormatterToXML17writeParentTagEndEv(%"struct.xalanc_1_8::FormatterToXML"*)
+
+define void @_ZN10xalanc_1_814FormatterToXML5cdataEPKtj(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 %length) {
+entry:
+	%0 = getelementptr %"struct.xalanc_1_8::FormatterToXML"* %this, i32 0, i32 13		; <i8*> [#uses=1]
+	br i1 undef, label %bb4, label %bb
+
+bb:		; preds = %entry
+	store i8 0, i8* %0, align 1
+	%1 = getelementptr %"struct.xalanc_1_8::FormatterToXML"* %this, i32 0, i32 0, i32 0, i32 0		; <i32 (...)***> [#uses=1]
+	%2 = load i32 (...)*** %1, align 4		; <i32 (...)**> [#uses=1]
+	%3 = getelementptr i32 (...)** %2, i32 11		; <i32 (...)**> [#uses=1]
+	%4 = load i32 (...)** %3, align 4		; <i32 (...)*> [#uses=1]
+	%5 = bitcast i32 (...)* %4 to void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32)*		; <void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32)*> [#uses=1]
+	tail call  void %5(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 %length)
+	ret void
+
+bb4:		; preds = %entry
+	tail call  void @_ZN10xalanc_1_814FormatterToXML17writeParentTagEndEv(%"struct.xalanc_1_8::FormatterToXML"* %this)
+	tail call  void undef(%"struct.xalanc_1_8::FormatterToXML"* %this, i16* %ch, i32 0, i32 %length, i8 zeroext undef)
+	ret void
+}
diff --git a/final/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll b/final/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll
new file mode 100644
index 00000000000..acff2615cbb
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi
+; PR4681
+
+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+@.str2 = external constant [30 x i8], align 1		; <[30 x i8]*> [#uses=1]
+
+define i32 @__mf_heuristic_check(i32 %ptr, i32 %ptr_high) nounwind {
+entry:
+	br i1 undef, label %bb1, label %bb
+
+bb:		; preds = %entry
+	unreachable
+
+bb1:		; preds = %entry
+	br i1 undef, label %bb9, label %bb2
+
+bb2:		; preds = %bb1
+	%0 = call i8* @llvm.frameaddress(i32 0)		; <i8*> [#uses=1]
+	%1 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* noalias undef, i8* noalias getelementptr ([30 x i8]* @.str2, i32 0, i32 0), i8* %0, i8* null) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb9:		; preds = %bb1
+	ret i32 undef
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
+
+declare i32 @fprintf(%struct.FILE* noalias nocapture, i8* noalias nocapture, ...) nounwind
diff --git a/final/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll b/final/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
new file mode 100644
index 00000000000..b8326373924
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
@@ -0,0 +1,153 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim
+
+	type { %struct.GAP }		; type %0
+	type { i16, i8, i8 }		; type %1
+	type { [2 x i32], [2 x i32] }		; type %2
+	type { %struct.rec* }		; type %3
+	type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { %1 }
+	%struct.STYLE = type { %0, %0, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %2 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, %3, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.rec = type { %struct.head_type }
+@.str24239 = external constant [20 x i8], align 1		; <[20 x i8]*> [#uses=1]
+@no_file_pos = external global %4		; <%4*> [#uses=1]
+@zz_tmp = external global %struct.rec*		; <%struct.rec**> [#uses=1]
+@.str81872 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@out_fp = external global %struct.FILE*		; <%struct.FILE**> [#uses=2]
+@cpexists = external global i32		; <i32*> [#uses=2]
+@.str212784 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@.str1822946 = external constant [8 x i8], align 1		; <[8 x i8]*> [#uses=1]
+@.str1842948 = external constant [11 x i8], align 1		; <[11 x i8]*> [#uses=1]
+
+declare i32 @fprintf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind
+
+declare i32 @"\01_fwrite"(i8*, i32, i32, i8*)
+
+declare %struct.FILE* @OpenIncGraphicFile(i8*, i8 zeroext, %struct.rec** nocapture, %struct.FILE_POS*, i32* nocapture) nounwind
+
+declare void @Error(i32, i32, i8*, i32, %struct.FILE_POS*, ...) nounwind
+
+declare i8* @fgets(i8*, i32, %struct.FILE* nocapture) nounwind
+
+define void @PS_PrintGraphicInclude(%struct.rec* %x, i32 %colmark, i32 %rowmark) nounwind {
+entry:
+	br label %bb5
+
+bb5:		; preds = %bb5, %entry
+	%.pn = phi %struct.rec* [ %y.0, %bb5 ], [ undef, %entry ]		; <%struct.rec*> [#uses=1]
+	%y.0.in = getelementptr %struct.rec* %.pn, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
+	%y.0 = load %struct.rec** %y.0.in		; <%struct.rec*> [#uses=2]
+	br i1 undef, label %bb5, label %bb6
+
+bb6:		; preds = %bb5
+	%0 = call  %struct.FILE* @OpenIncGraphicFile(i8* undef, i8 zeroext 0, %struct.rec** undef, %struct.FILE_POS* null, i32* undef) nounwind		; <%struct.FILE*> [#uses=1]
+	br i1 false, label %bb.i, label %FontHalfXHeight.exit
+
+bb.i:		; preds = %bb6
+	br label %FontHalfXHeight.exit
+
+FontHalfXHeight.exit:		; preds = %bb.i, %bb6
+	br i1 undef, label %bb.i1, label %FontSize.exit
+
+bb.i1:		; preds = %FontHalfXHeight.exit
+	br label %FontSize.exit
+
+FontSize.exit:		; preds = %bb.i1, %FontHalfXHeight.exit
+	%1 = load i32* undef, align 4		; <i32> [#uses=1]
+	%2 = icmp ult i32 0, undef		; <i1> [#uses=1]
+	br i1 %2, label %bb.i5, label %FontName.exit
+
+bb.i5:		; preds = %FontSize.exit
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8]* @.str81872, i32 0, i32 0)) nounwind
+	br label %FontName.exit
+
+FontName.exit:		; preds = %bb.i5, %FontSize.exit
+	%3 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8]* @.str1822946, i32 0, i32 0), i32 %1, i8* undef) nounwind		; <i32> [#uses=0]
+	%4 = call  i32 @"\01_fwrite"(i8* getelementptr ([11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind		; <i32> [#uses=0]
+	%5 = sub i32 %colmark, undef		; <i32> [#uses=1]
+	%6 = sub i32 %rowmark, undef		; <i32> [#uses=1]
+	%7 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%8 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %7, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 %5, i32 %6) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	%9 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
+	%10 = load i32* %9, align 4		; <i32> [#uses=1]
+	%11 = sub i32 0, %10		; <i32> [#uses=1]
+	%12 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%13 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %12, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %11) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	br label %bb100.outer.outer
+
+bb100.outer.outer:		; preds = %bb79.critedge, %bb1.i3, %FontName.exit
+	%x_addr.0.ph.ph = phi %struct.rec* [ %x, %FontName.exit ], [ null, %bb79.critedge ], [ null, %bb1.i3 ]		; <%struct.rec*> [#uses=1]
+	%14 = getelementptr %struct.rec* %x_addr.0.ph.ph, i32 0, i32 0, i32 1, i32 0		; <%struct.FILE_POS*> [#uses=0]
+	br label %bb100.outer
+
+bb.i80:		; preds = %bb3.i85
+	br i1 undef, label %bb2.i84, label %bb2.i51
+
+bb2.i84:		; preds = %bb100.outer, %bb.i80
+	br i1 undef, label %bb3.i77, label %bb3.i85
+
+bb3.i85:		; preds = %bb2.i84
+	br i1 false, label %StringBeginsWith.exit88, label %bb.i80
+
+StringBeginsWith.exit88:		; preds = %bb3.i85
+	br i1 undef, label %bb3.i77, label %bb2.i51
+
+bb2.i.i68:		; preds = %bb3.i77
+	br label %bb3.i77
+
+bb3.i77:		; preds = %bb2.i.i68, %StringBeginsWith.exit88, %bb2.i84
+	br i1 false, label %bb1.i58, label %bb2.i.i68
+
+bb1.i58:		; preds = %bb3.i77
+	unreachable
+
+bb.i47:		; preds = %bb3.i52
+	br i1 undef, label %bb2.i51, label %bb2.i.i15.critedge
+
+bb2.i51:		; preds = %bb.i47, %StringBeginsWith.exit88, %bb.i80
+	%15 = load i8* undef, align 1		; <i8> [#uses=0]
+	br i1 false, label %StringBeginsWith.exit55thread-split, label %bb3.i52
+
+bb3.i52:		; preds = %bb2.i51
+	br i1 false, label %StringBeginsWith.exit55, label %bb.i47
+
+StringBeginsWith.exit55thread-split:		; preds = %bb2.i51
+	br label %StringBeginsWith.exit55
+
+StringBeginsWith.exit55:		; preds = %StringBeginsWith.exit55thread-split, %bb3.i52
+	br label %bb2.i41
+
+bb2.i41:		; preds = %bb2.i41, %StringBeginsWith.exit55
+	br label %bb2.i41
+
+bb2.i.i15.critedge:		; preds = %bb.i47
+	%16 = call  i8* @fgets(i8* undef, i32 512, %struct.FILE* %0) nounwind		; <i8*> [#uses=0]
+	%iftmp.560.0 = select i1 undef, i32 2, i32 0		; <i32> [#uses=1]
+	br label %bb100.outer
+
+bb2.i8:		; preds = %bb100.outer
+	br i1 undef, label %bb1.i3, label %bb79.critedge
+
+bb1.i3:		; preds = %bb2.i8
+	br label %bb100.outer.outer
+
+bb79.critedge:		; preds = %bb2.i8
+	store %struct.rec* null, %struct.rec** @zz_tmp, align 4
+	br label %bb100.outer.outer
+
+bb100.outer:		; preds = %bb2.i.i15.critedge, %bb100.outer.outer
+	%state.0.ph = phi i32 [ 0, %bb100.outer.outer ], [ %iftmp.560.0, %bb2.i.i15.critedge ]		; <i32> [#uses=1]
+	%cond = icmp eq i32 %state.0.ph, 1		; <i1> [#uses=1]
+	br i1 %cond, label %bb2.i8, label %bb2.i84
+}
diff --git a/final/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll b/final/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
new file mode 100644
index 00000000000..02fad4b930c
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
@@ -0,0 +1,508 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -O3
+
+	type { i16, i8, i8 }		; type %0
+	type { [2 x i32], [2 x i32] }		; type %1
+	type { %struct.GAP }		; type %2
+	type { %struct.rec* }		; type %3
+	type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
+	type { i8, i8, i8, i8 }		; type %5
+	%struct.COMPOSITE = type { i8, i16, i16 }
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FONT_INFO = type { %struct.metrics*, i8*, i16*, %struct.COMPOSITE*, i32, %struct.rec*, %struct.rec*, i16, i16, i16*, i8*, i8*, i16* }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { %0 }
+	%struct.STYLE = type { %2, %2, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %1 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, %3, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.metrics = type { i16, i16, i16, i16, i16 }
+	%struct.rec = type { %struct.head_type }
+@.str24239 = external constant [20 x i8], align 1		; <[20 x i8]*> [#uses=1]
+@no_file_pos = external global %4		; <%4*> [#uses=1]
+@.str19294 = external constant [9 x i8], align 1		; <[9 x i8]*> [#uses=1]
+@zz_lengths = external global [150 x i8]		; <[150 x i8]*> [#uses=1]
+@next_free.4772 = external global i8**		; <i8***> [#uses=3]
+@top_free.4773 = external global i8**		; <i8***> [#uses=2]
+@.str1575 = external constant [32 x i8], align 1		; <[32 x i8]*> [#uses=1]
+@zz_free = external global [524 x %struct.rec*]		; <[524 x %struct.rec*]*> [#uses=2]
+@zz_hold = external global %struct.rec*		; <%struct.rec**> [#uses=5]
+@zz_tmp = external global %struct.rec*		; <%struct.rec**> [#uses=2]
+@zz_res = external global %struct.rec*		; <%struct.rec**> [#uses=2]
+@xx_link = external global %struct.rec*		; <%struct.rec**> [#uses=2]
+@font_count = external global i32		; <i32*> [#uses=1]
+@.str81872 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@.str101874 = external constant [30 x i8], align 1		; <[30 x i8]*> [#uses=1]
+@.str111875 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@.str141878 = external constant [27 x i8], align 1		; <[27 x i8]*> [#uses=1]
+@out_fp = external global %struct.FILE*		; <%struct.FILE**> [#uses=3]
+@.str192782 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@cpexists = external global i32		; <i32*> [#uses=2]
+@.str212784 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@currentfont = external global i32		; <i32*> [#uses=3]
+@wordcount = external global i32		; <i32*> [#uses=1]
+@needs = external global %struct.rec*		; <%struct.rec**> [#uses=1]
+@.str742838 = external constant [6 x i8], align 1		; <[6 x i8]*> [#uses=1]
+@.str752839 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@.str1802944 = external constant [40 x i8], align 1		; <[40 x i8]*> [#uses=1]
+@.str1822946 = external constant [8 x i8], align 1		; <[8 x i8]*> [#uses=1]
+@.str1842948 = external constant [11 x i8], align 1		; <[11 x i8]*> [#uses=1]
+@.str1852949 = external constant [23 x i8], align 1		; <[23 x i8]*> [#uses=1]
+@.str1872951 = external constant [17 x i8], align 1		; <[17 x i8]*> [#uses=1]
+@.str1932957 = external constant [26 x i8], align 1		; <[26 x i8]*> [#uses=1]
+
+declare i32 @fprintf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind
+
+declare i32 @"\01_fwrite"(i8*, i32, i32, i8*)
+
+declare i32 @remove(i8* nocapture) nounwind
+
+declare %struct.FILE* @OpenIncGraphicFile(i8*, i8 zeroext, %struct.rec** nocapture, %struct.FILE_POS*, i32* nocapture) nounwind
+
+declare %struct.rec* @MakeWord(i32, i8* nocapture, %struct.FILE_POS*) nounwind
+
+declare void @Error(i32, i32, i8*, i32, %struct.FILE_POS*, ...) nounwind
+
+declare i32 @"\01_fputs"(i8*, %struct.FILE*)
+
+declare noalias i8* @calloc(i32, i32) nounwind
+
+declare i8* @fgets(i8*, i32, %struct.FILE* nocapture) nounwind
+
+define void @PS_PrintGraphicInclude(%struct.rec* %x, i32 %colmark, i32 %rowmark) nounwind {
+entry:
+	%buff = alloca [512 x i8], align 4		; <[512 x i8]*> [#uses=5]
+	%0 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 1, i32 0, i32 0		; <i8*> [#uses=2]
+	%1 = load i8* %0, align 4		; <i8> [#uses=1]
+	%2 = add i8 %1, -94		; <i8> [#uses=1]
+	%3 = icmp ugt i8 %2, 1		; <i1> [#uses=1]
+	br i1 %3, label %bb, label %bb1
+
+bb:		; preds = %entry
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%4 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%5 = bitcast %struct.SECOND_UNION* %4 to %5*		; <%5*> [#uses=1]
+	%6 = getelementptr %5* %5, i32 0, i32 1		; <i8*> [#uses=1]
+	%7 = load i8* %6, align 1		; <i8> [#uses=1]
+	%8 = icmp eq i8 %7, 0		; <i1> [#uses=1]
+	br i1 %8, label %bb2, label %bb3
+
+bb2:		; preds = %bb1
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([40 x i8]* @.str1802944, i32 0, i32 0)) nounwind
+	br label %bb3
+
+bb3:		; preds = %bb2, %bb1
+	%9 = load %struct.rec** undef, align 4		; <%struct.rec*> [#uses=0]
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb3
+	%y.0 = load %struct.rec** null		; <%struct.rec*> [#uses=2]
+	br i1 false, label %bb5, label %bb6
+
+bb6:		; preds = %bb5
+	%10 = load i8* %0, align 4		; <i8> [#uses=1]
+	%11 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 1, i32 0		; <%struct.FILE_POS*> [#uses=1]
+	%12 = call  %struct.FILE* @OpenIncGraphicFile(i8* undef, i8 zeroext %10, %struct.rec** null, %struct.FILE_POS* %11, i32* undef) nounwind		; <%struct.FILE*> [#uses=4]
+	br i1 false, label %bb7, label %bb8
+
+bb7:		; preds = %bb6
+	unreachable
+
+bb8:		; preds = %bb6
+	%13 = and i32 undef, 4095		; <i32> [#uses=2]
+	%14 = load i32* @currentfont, align 4		; <i32> [#uses=0]
+	br i1 false, label %bb10, label %bb9
+
+bb9:		; preds = %bb8
+	%15 = icmp ult i32 0, %13		; <i1> [#uses=1]
+	br i1 %15, label %bb.i, label %FontHalfXHeight.exit
+
+bb.i:		; preds = %bb9
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([17 x i8]* @.str111875, i32 0, i32 0)) nounwind
+	%.pre186 = load i32* @currentfont, align 4		; <i32> [#uses=1]
+	br label %FontHalfXHeight.exit
+
+FontHalfXHeight.exit:		; preds = %bb.i, %bb9
+	%16 = phi i32 [ %.pre186, %bb.i ], [ %13, %bb9 ]		; <i32> [#uses=1]
+	br i1 false, label %bb.i1, label %bb1.i
+
+bb.i1:		; preds = %FontHalfXHeight.exit
+	br label %bb1.i
+
+bb1.i:		; preds = %bb.i1, %FontHalfXHeight.exit
+	br i1 undef, label %bb2.i, label %FontSize.exit
+
+bb2.i:		; preds = %bb1.i
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 37, i32 61, i8* getelementptr ([30 x i8]* @.str101874, i32 0, i32 0), i32 1, %struct.FILE_POS* null) nounwind
+	unreachable
+
+FontSize.exit:		; preds = %bb1.i
+	%17 = getelementptr %struct.FONT_INFO* undef, i32 %16, i32 5		; <%struct.rec**> [#uses=0]
+	%18 = load i32* undef, align 4		; <i32> [#uses=1]
+	%19 = load i32* @currentfont, align 4		; <i32> [#uses=2]
+	%20 = load i32* @font_count, align 4		; <i32> [#uses=1]
+	%21 = icmp ult i32 %20, %19		; <i1> [#uses=1]
+	br i1 %21, label %bb.i5, label %FontName.exit
+
+bb.i5:		; preds = %FontSize.exit
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 1, i32 2, i8* getelementptr ([20 x i8]* @.str24239, i32 0, i32 0), i32 0, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*), i8* getelementptr ([10 x i8]* @.str81872, i32 0, i32 0)) nounwind
+	br label %FontName.exit
+
+FontName.exit:		; preds = %bb.i5, %FontSize.exit
+	%22 = phi %struct.FONT_INFO* [ undef, %bb.i5 ], [ undef, %FontSize.exit ]		; <%struct.FONT_INFO*> [#uses=1]
+	%23 = getelementptr %struct.FONT_INFO* %22, i32 %19, i32 5		; <%struct.rec**> [#uses=0]
+	%24 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([8 x i8]* @.str1822946, i32 0, i32 0), i32 %18, i8* null) nounwind		; <i32> [#uses=0]
+	br label %bb10
+
+bb10:		; preds = %FontName.exit, %bb8
+	%25 = call  i32 @"\01_fwrite"(i8* getelementptr ([11 x i8]* @.str1842948, i32 0, i32 0), i32 1, i32 10, i8* undef) nounwind		; <i32> [#uses=0]
+	%26 = sub i32 %rowmark, undef		; <i32> [#uses=1]
+	%27 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%28 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %27, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 undef, i32 %26) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	%29 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([17 x i8]* @.str192782, i32 0, i32 0), double 2.000000e+01, double 2.000000e+01) nounwind		; <i32> [#uses=0]
+	%30 = getelementptr %struct.rec* %y.0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
+	%31 = load i32* %30, align 4		; <i32> [#uses=1]
+	%32 = sub i32 0, %31		; <i32> [#uses=1]
+	%33 = load i32* undef, align 4		; <i32> [#uses=1]
+	%34 = sub i32 0, %33		; <i32> [#uses=1]
+	%35 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%36 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %35, i8* getelementptr ([17 x i8]* @.str212784, i32 0, i32 0), i32 %32, i32 %34) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @cpexists, align 4
+	%37 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	%38 = getelementptr %struct.rec* %37, i32 0, i32 0, i32 4		; <%struct.FOURTH_UNION*> [#uses=1]
+	%39 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* undef, i8* getelementptr ([23 x i8]* @.str1852949, i32 0, i32 0), %struct.FOURTH_UNION* %38) nounwind		; <i32> [#uses=0]
+	%buff14 = getelementptr [512 x i8]* %buff, i32 0, i32 0		; <i8*> [#uses=5]
+	%40 = call  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=0]
+	%iftmp.506.0 = select i1 undef, i32 2, i32 0		; <i32> [#uses=1]
+	%41 = getelementptr [512 x i8]* %buff, i32 0, i32 26		; <i8*> [#uses=1]
+	br label %bb100.outer.outer
+
+bb100.outer.outer:		; preds = %bb83, %bb10
+	%state.0.ph.ph = phi i32 [ %iftmp.506.0, %bb10 ], [ undef, %bb83 ]		; <i32> [#uses=1]
+	%x_addr.0.ph.ph = phi %struct.rec* [ %x, %bb10 ], [ %71, %bb83 ]		; <%struct.rec*> [#uses=1]
+	%42 = getelementptr %struct.rec* %x_addr.0.ph.ph, i32 0, i32 0, i32 1, i32 0		; <%struct.FILE_POS*> [#uses=0]
+	br label %bb100.outer
+
+bb.i80:		; preds = %bb3.i85
+	%43 = icmp eq i8 %44, %46		; <i1> [#uses=1]
+	%indvar.next.i79 = add i32 %indvar.i81, 1		; <i32> [#uses=1]
+	br i1 %43, label %bb2.i84, label %bb2.i51
+
+bb2.i84:		; preds = %bb100.outer, %bb.i80
+	%indvar.i81 = phi i32 [ %indvar.next.i79, %bb.i80 ], [ 0, %bb100.outer ]		; <i32> [#uses=3]
+	%pp.0.i82 = getelementptr [27 x i8]* @.str141878, i32 0, i32 %indvar.i81		; <i8*> [#uses=2]
+	%sp.0.i83 = getelementptr [512 x i8]* %buff, i32 0, i32 %indvar.i81		; <i8*> [#uses=1]
+	%44 = load i8* %sp.0.i83, align 1		; <i8> [#uses=2]
+	%45 = icmp eq i8 %44, 0		; <i1> [#uses=1]
+	br i1 %45, label %StringBeginsWith.exit88thread-split, label %bb3.i85
+
+bb3.i85:		; preds = %bb2.i84
+	%46 = load i8* %pp.0.i82, align 1		; <i8> [#uses=3]
+	%47 = icmp eq i8 %46, 0		; <i1> [#uses=1]
+	br i1 %47, label %StringBeginsWith.exit88, label %bb.i80
+
+StringBeginsWith.exit88thread-split:		; preds = %bb2.i84
+	%.pr = load i8* %pp.0.i82		; <i8> [#uses=1]
+	br label %StringBeginsWith.exit88
+
+StringBeginsWith.exit88:		; preds = %StringBeginsWith.exit88thread-split, %bb3.i85
+	%48 = phi i8 [ %.pr, %StringBeginsWith.exit88thread-split ], [ %46, %bb3.i85 ]		; <i8> [#uses=1]
+	%phitmp91 = icmp eq i8 %48, 0		; <i1> [#uses=1]
+	br i1 %phitmp91, label %bb3.i77, label %bb2.i51
+
+bb2.i.i68:		; preds = %bb3.i77
+	br i1 false, label %bb2.i51, label %bb2.i75
+
+bb2.i75:		; preds = %bb2.i.i68
+	br label %bb3.i77
+
+bb3.i77:		; preds = %bb2.i75, %StringBeginsWith.exit88
+	%sp.0.i76 = getelementptr [512 x i8]* %buff, i32 0, i32 undef		; <i8*> [#uses=1]
+	%49 = load i8* %sp.0.i76, align 1		; <i8> [#uses=1]
+	%50 = icmp eq i8 %49, 0		; <i1> [#uses=1]
+	br i1 %50, label %bb24, label %bb2.i.i68
+
+bb24:		; preds = %bb3.i77
+	%51 = call  %struct.rec* @MakeWord(i32 11, i8* %41, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind		; <%struct.rec*> [#uses=0]
+	%52 = load i8* getelementptr ([150 x i8]* @zz_lengths, i32 0, i32 0), align 4		; <i8> [#uses=1]
+	%53 = zext i8 %52 to i32		; <i32> [#uses=2]
+	%54 = getelementptr [524 x %struct.rec*]* @zz_free, i32 0, i32 %53		; <%struct.rec**> [#uses=2]
+	%55 = load %struct.rec** %54, align 4		; <%struct.rec*> [#uses=3]
+	%56 = icmp eq %struct.rec* %55, null		; <i1> [#uses=1]
+	br i1 %56, label %bb27, label %bb28
+
+bb27:		; preds = %bb24
+	br i1 undef, label %bb.i56, label %GetMemory.exit62
+
+bb.i56:		; preds = %bb27
+	br i1 undef, label %bb1.i58, label %bb2.i60
+
+bb1.i58:		; preds = %bb.i56
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 31, i32 1, i8* getelementptr ([32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
+	br label %bb2.i60
+
+bb2.i60:		; preds = %bb1.i58, %bb.i56
+	%.pre1.i59 = phi i8** [ undef, %bb1.i58 ], [ undef, %bb.i56 ]		; <i8**> [#uses=1]
+	store i8** undef, i8*** @top_free.4773, align 4
+	br label %GetMemory.exit62
+
+GetMemory.exit62:		; preds = %bb2.i60, %bb27
+	%57 = phi i8** [ %.pre1.i59, %bb2.i60 ], [ undef, %bb27 ]		; <i8**> [#uses=1]
+	%58 = getelementptr i8** %57, i32 %53		; <i8**> [#uses=1]
+	store i8** %58, i8*** @next_free.4772, align 4
+	store %struct.rec* undef, %struct.rec** @zz_hold, align 4
+	br label %bb29
+
+bb28:		; preds = %bb24
+	store %struct.rec* %55, %struct.rec** @zz_hold, align 4
+	%59 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %59, %struct.rec** %54, align 4
+	br label %bb29
+
+bb29:		; preds = %bb28, %GetMemory.exit62
+	%.pre184 = phi %struct.rec* [ %55, %bb28 ], [ undef, %GetMemory.exit62 ]		; <%struct.rec*> [#uses=3]
+	store i8 0, i8* undef
+	store %struct.rec* %.pre184, %struct.rec** @xx_link, align 4
+	br i1 undef, label %bb35, label %bb31
+
+bb31:		; preds = %bb29
+	store %struct.rec* %.pre184, %struct.rec** undef
+	br label %bb35
+
+bb35:		; preds = %bb31, %bb29
+	br i1 undef, label %bb41, label %bb37
+
+bb37:		; preds = %bb35
+	%60 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %60, %struct.rec** undef
+	store %struct.rec* undef, %struct.rec** null
+	store %struct.rec* %.pre184, %struct.rec** null, align 4
+	br label %bb41
+
+bb41:		; preds = %bb37, %bb35
+	%61 = call  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=1]
+	%62 = icmp eq i8* %61, null		; <i1> [#uses=1]
+	%iftmp.554.0 = select i1 %62, i32 2, i32 1		; <i32> [#uses=1]
+	br label %bb100.outer
+
+bb.i47:		; preds = %bb3.i52
+	%63 = icmp eq i8 %64, %65		; <i1> [#uses=1]
+	br i1 %63, label %bb2.i51, label %bb2.i41
+
+bb2.i51:		; preds = %bb.i47, %bb2.i.i68, %StringBeginsWith.exit88, %bb.i80
+	%pp.0.i49 = getelementptr [17 x i8]* @.str1872951, i32 0, i32 0		; <i8*> [#uses=1]
+	%64 = load i8* null, align 1		; <i8> [#uses=1]
+	br i1 false, label %StringBeginsWith.exit55thread-split, label %bb3.i52
+
+bb3.i52:		; preds = %bb2.i51
+	%65 = load i8* %pp.0.i49, align 1		; <i8> [#uses=1]
+	br i1 false, label %StringBeginsWith.exit55, label %bb.i47
+
+StringBeginsWith.exit55thread-split:		; preds = %bb2.i51
+	br label %StringBeginsWith.exit55
+
+StringBeginsWith.exit55:		; preds = %StringBeginsWith.exit55thread-split, %bb3.i52
+	br i1 false, label %bb49, label %bb2.i41
+
+bb49:		; preds = %StringBeginsWith.exit55
+	br label %bb2.i41
+
+bb2.i41:		; preds = %bb2.i41, %bb49, %StringBeginsWith.exit55, %bb.i47
+	br i1 false, label %bb2.i41, label %bb2.i.i15
+
+bb2.i.i15:		; preds = %bb2.i41
+	%pp.0.i.i13 = getelementptr [6 x i8]* @.str742838, i32 0, i32 0		; <i8*> [#uses=1]
+	br i1 false, label %StringBeginsWith.exitthread-split.i18, label %bb3.i.i16
+
+bb3.i.i16:		; preds = %bb2.i.i15
+	%66 = load i8* %pp.0.i.i13, align 1		; <i8> [#uses=1]
+	br label %StringBeginsWith.exit.i20
+
+StringBeginsWith.exitthread-split.i18:		; preds = %bb2.i.i15
+	br label %StringBeginsWith.exit.i20
+
+StringBeginsWith.exit.i20:		; preds = %StringBeginsWith.exitthread-split.i18, %bb3.i.i16
+	%67 = phi i8 [ undef, %StringBeginsWith.exitthread-split.i18 ], [ %66, %bb3.i.i16 ]		; <i8> [#uses=1]
+	%phitmp.i19 = icmp eq i8 %67, 0		; <i1> [#uses=1]
+	br i1 %phitmp.i19, label %bb58, label %bb2.i6.i26
+
+bb2.i6.i26:		; preds = %bb2.i6.i26, %StringBeginsWith.exit.i20
+	%indvar.i3.i23 = phi i32 [ %indvar.next.i1.i21, %bb2.i6.i26 ], [ 0, %StringBeginsWith.exit.i20 ]		; <i32> [#uses=3]
+	%sp.0.i5.i25 = getelementptr [512 x i8]* %buff, i32 0, i32 %indvar.i3.i23		; <i8*> [#uses=0]
+	%pp.0.i4.i24 = getelementptr [10 x i8]* @.str752839, i32 0, i32 %indvar.i3.i23		; <i8*> [#uses=1]
+	%68 = load i8* %pp.0.i4.i24, align 1		; <i8> [#uses=0]
+	%indvar.next.i1.i21 = add i32 %indvar.i3.i23, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb2.i6.i26, label %bb55
+
+bb55:		; preds = %bb2.i6.i26
+	%69 = call  i32 @"\01_fputs"(i8* %buff14, %struct.FILE* undef) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb58:		; preds = %StringBeginsWith.exit.i20
+	%70 = call  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=0]
+	%iftmp.560.0 = select i1 undef, i32 2, i32 0		; <i32> [#uses=1]
+	br label %bb100.outer
+
+bb.i7:		; preds = %bb3.i
+	br i1 false, label %bb2.i8, label %bb2.i.i
+
+bb2.i8:		; preds = %bb100.outer, %bb.i7
+	br i1 undef, label %StringBeginsWith.exitthread-split, label %bb3.i
+
+bb3.i:		; preds = %bb2.i8
+	br i1 undef, label %StringBeginsWith.exit, label %bb.i7
+
+StringBeginsWith.exitthread-split:		; preds = %bb2.i8
+	br label %StringBeginsWith.exit
+
+StringBeginsWith.exit:		; preds = %StringBeginsWith.exitthread-split, %bb3.i
+	%phitmp93 = icmp eq i8 undef, 0		; <i1> [#uses=1]
+	br i1 %phitmp93, label %bb66, label %bb2.i.i
+
+bb66:		; preds = %StringBeginsWith.exit
+	%71 = call  %struct.rec* @MakeWord(i32 11, i8* undef, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind		; <%struct.rec*> [#uses=4]
+	%72 = load i8* getelementptr ([150 x i8]* @zz_lengths, i32 0, i32 0), align 4		; <i8> [#uses=1]
+	%73 = zext i8 %72 to i32		; <i32> [#uses=2]
+	%74 = getelementptr [524 x %struct.rec*]* @zz_free, i32 0, i32 %73		; <%struct.rec**> [#uses=2]
+	%75 = load %struct.rec** %74, align 4		; <%struct.rec*> [#uses=3]
+	%76 = icmp eq %struct.rec* %75, null		; <i1> [#uses=1]
+	br i1 %76, label %bb69, label %bb70
+
+bb69:		; preds = %bb66
+	br i1 undef, label %bb.i2, label %GetMemory.exit
+
+bb.i2:		; preds = %bb69
+	%77 = call  noalias i8* @calloc(i32 1020, i32 4) nounwind		; <i8*> [#uses=1]
+	%78 = bitcast i8* %77 to i8**		; <i8**> [#uses=3]
+	store i8** %78, i8*** @next_free.4772, align 4
+	br i1 undef, label %bb1.i3, label %bb2.i4
+
+bb1.i3:		; preds = %bb.i2
+	call  void (i32, i32, i8*, i32, %struct.FILE_POS*, ...)* @Error(i32 31, i32 1, i8* getelementptr ([32 x i8]* @.str1575, i32 0, i32 0), i32 1, %struct.FILE_POS* bitcast (%4* @no_file_pos to %struct.FILE_POS*)) nounwind
+	br label %bb2.i4
+
+bb2.i4:		; preds = %bb1.i3, %bb.i2
+	%.pre1.i = phi i8** [ undef, %bb1.i3 ], [ %78, %bb.i2 ]		; <i8**> [#uses=1]
+	%79 = phi i8** [ undef, %bb1.i3 ], [ %78, %bb.i2 ]		; <i8**> [#uses=1]
+	%80 = getelementptr i8** %79, i32 1020		; <i8**> [#uses=1]
+	store i8** %80, i8*** @top_free.4773, align 4
+	br label %GetMemory.exit
+
+GetMemory.exit:		; preds = %bb2.i4, %bb69
+	%81 = phi i8** [ %.pre1.i, %bb2.i4 ], [ undef, %bb69 ]		; <i8**> [#uses=2]
+	%82 = bitcast i8** %81 to %struct.rec*		; <%struct.rec*> [#uses=3]
+	%83 = getelementptr i8** %81, i32 %73		; <i8**> [#uses=1]
+	store i8** %83, i8*** @next_free.4772, align 4
+	store %struct.rec* %82, %struct.rec** @zz_hold, align 4
+	br label %bb71
+
+bb70:		; preds = %bb66
+	%84 = load %struct.rec** null, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %84, %struct.rec** %74, align 4
+	br label %bb71
+
+bb71:		; preds = %bb70, %GetMemory.exit
+	%.pre185 = phi %struct.rec* [ %75, %bb70 ], [ %82, %GetMemory.exit ]		; <%struct.rec*> [#uses=8]
+	%85 = phi %struct.rec* [ %75, %bb70 ], [ %82, %GetMemory.exit ]		; <%struct.rec*> [#uses=1]
+	%86 = getelementptr %struct.rec* %85, i32 0, i32 0, i32 1, i32 0, i32 0		; <i8*> [#uses=0]
+	%87 = getelementptr %struct.rec* %.pre185, i32 0, i32 0, i32 0, i32 1, i32 1		; <%struct.rec**> [#uses=0]
+	%88 = getelementptr %struct.rec* %.pre185, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
+	store %struct.rec* %.pre185, %struct.rec** @xx_link, align 4
+	store %struct.rec* %.pre185, %struct.rec** @zz_res, align 4
+	%89 = load %struct.rec** @needs, align 4		; <%struct.rec*> [#uses=2]
+	store %struct.rec* %89, %struct.rec** @zz_hold, align 4
+	br i1 false, label %bb77, label %bb73
+
+bb73:		; preds = %bb71
+	%90 = getelementptr %struct.rec* %89, i32 0, i32 0, i32 0, i32 0, i32 0		; <%struct.rec**> [#uses=1]
+	store %struct.rec* null, %struct.rec** @zz_tmp, align 4
+	store %struct.rec* %.pre185, %struct.rec** %90
+	store %struct.rec* %.pre185, %struct.rec** undef, align 4
+	br label %bb77
+
+bb77:		; preds = %bb73, %bb71
+	store %struct.rec* %.pre185, %struct.rec** @zz_res, align 4
+	store %struct.rec* %71, %struct.rec** @zz_hold, align 4
+	br i1 undef, label %bb83, label %bb79
+
+bb79:		; preds = %bb77
+	%91 = getelementptr %struct.rec* %71, i32 0, i32 0, i32 0, i32 1, i32 0		; <%struct.rec**> [#uses=1]
+	store %struct.rec* null, %struct.rec** @zz_tmp, align 4
+	%92 = load %struct.rec** %88, align 4		; <%struct.rec*> [#uses=1]
+	store %struct.rec* %92, %struct.rec** %91
+	%93 = getelementptr %struct.rec* undef, i32 0, i32 0, i32 0, i32 1, i32 1		; <%struct.rec**> [#uses=1]
+	store %struct.rec* %71, %struct.rec** %93, align 4
+	store %struct.rec* %.pre185, %struct.rec** undef, align 4
+	br label %bb83
+
+bb83:		; preds = %bb79, %bb77
+	br label %bb100.outer.outer
+
+bb.i.i:		; preds = %bb3.i.i
+	br i1 undef, label %bb2.i.i, label %bb2.i6.i
+
+bb2.i.i:		; preds = %bb.i.i, %StringBeginsWith.exit, %bb.i7
+	br i1 undef, label %StringBeginsWith.exitthread-split.i, label %bb3.i.i
+
+bb3.i.i:		; preds = %bb2.i.i
+	br i1 undef, label %StringBeginsWith.exit.i, label %bb.i.i
+
+StringBeginsWith.exitthread-split.i:		; preds = %bb2.i.i
+	br label %StringBeginsWith.exit.i
+
+StringBeginsWith.exit.i:		; preds = %StringBeginsWith.exitthread-split.i, %bb3.i.i
+	br i1 false, label %bb94, label %bb2.i6.i
+
+bb.i2.i:		; preds = %bb3.i7.i
+	br i1 false, label %bb2.i6.i, label %bb91
+
+bb2.i6.i:		; preds = %bb.i2.i, %StringBeginsWith.exit.i, %bb.i.i
+	br i1 undef, label %strip_out.exitthread-split, label %bb3.i7.i
+
+bb3.i7.i:		; preds = %bb2.i6.i
+	%94 = load i8* undef, align 1		; <i8> [#uses=1]
+	br i1 undef, label %strip_out.exit, label %bb.i2.i
+
+strip_out.exitthread-split:		; preds = %bb2.i6.i
+	%.pr100 = load i8* undef		; <i8> [#uses=1]
+	br label %strip_out.exit
+
+strip_out.exit:		; preds = %strip_out.exitthread-split, %bb3.i7.i
+	%95 = phi i8 [ %.pr100, %strip_out.exitthread-split ], [ %94, %bb3.i7.i ]		; <i8> [#uses=0]
+	br i1 undef, label %bb94, label %bb91
+
+bb91:		; preds = %strip_out.exit, %bb.i2.i
+	unreachable
+
+bb94:		; preds = %strip_out.exit, %StringBeginsWith.exit.i
+	%96 = call  i8* @fgets(i8* %buff14, i32 512, %struct.FILE* %12) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb100.outer:		; preds = %bb58, %bb41, %bb100.outer.outer
+	%state.0.ph = phi i32 [ %state.0.ph.ph, %bb100.outer.outer ], [ %iftmp.560.0, %bb58 ], [ %iftmp.554.0, %bb41 ]		; <i32> [#uses=1]
+	switch i32 %state.0.ph, label %bb2.i84 [
+		i32 2, label %bb101.split
+		i32 1, label %bb2.i8
+	]
+
+bb101.split:		; preds = %bb100.outer
+	%97 = icmp eq i32 undef, 0		; <i1> [#uses=1]
+	br i1 %97, label %bb103, label %bb102
+
+bb102:		; preds = %bb101.split
+	%98 = call  i32 @remove(i8* getelementptr ([9 x i8]* @.str19294, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb103:		; preds = %bb101.split
+	%99 = load %struct.FILE** @out_fp, align 4		; <%struct.FILE*> [#uses=1]
+	%100 = call  i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* %99, i8* getelementptr ([26 x i8]* @.str1932957, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	store i32 0, i32* @wordcount, align 4
+	ret void
+}
diff --git a/final/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll b/final/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
new file mode 100644
index 00000000000..bfea003fb46
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | not grep fcpys
+; rdar://7117307
+
+	%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+	%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+	%struct.Patient = type { i32, i32, i32, %struct.Village* }
+	%struct.Results = type { float, float, float }
+	%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define void @get_results(%struct.Results* noalias nocapture sret %agg.result, %struct.Village* %village) nounwind {
+entry:
+	br i1 undef, label %bb, label %bb6.preheader
+
+bb6.preheader:		; preds = %entry
+	call void @llvm.memcpy.i32(i8* undef, i8* undef, i32 12, i32 4)
+	br i1 undef, label %bb15, label %bb13
+
+bb:		; preds = %entry
+	ret void
+
+bb13:		; preds = %bb13, %bb6.preheader
+	%0 = fadd float undef, undef		; <float> [#uses=1]
+	%1 = fadd float undef, 1.000000e+00		; <float> [#uses=1]
+	br i1 undef, label %bb15, label %bb13
+
+bb15:		; preds = %bb13, %bb6.preheader
+	%r1.0.0.lcssa = phi float [ 0.000000e+00, %bb6.preheader ], [ %1, %bb13 ]		; <float> [#uses=1]
+	%r1.1.0.lcssa = phi float [ undef, %bb6.preheader ], [ %0, %bb13 ]		; <float> [#uses=0]
+	store float %r1.0.0.lcssa, float* undef, align 4
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
diff --git a/final/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll b/final/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll
new file mode 100644
index 00000000000..9d4fc313cf9
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug2.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8
+; rdar://7117307
+
+	%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+	%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+	%struct.Patient = type { i32, i32, i32, %struct.Village* }
+	%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define %struct.List* @sim(%struct.Village* %village) nounwind {
+entry:
+	br i1 undef, label %bb14, label %bb3.preheader
+
+bb3.preheader:		; preds = %entry
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb3.preheader
+	br i1 undef, label %bb11, label %bb5
+
+bb11:		; preds = %bb5
+	%0 = fmul float undef, 0x41E0000000000000		; <float> [#uses=1]
+	%1 = fptosi float %0 to i32		; <i32> [#uses=1]
+	store i32 %1, i32* undef, align 4
+	br i1 undef, label %generate_patient.exit, label %generate_patient.exit.thread
+
+generate_patient.exit.thread:		; preds = %bb11
+	ret %struct.List* null
+
+generate_patient.exit:		; preds = %bb11
+	br i1 undef, label %bb14, label %bb12
+
+bb12:		; preds = %generate_patient.exit
+	br i1 undef, label %bb.i, label %bb1.i
+
+bb.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb1.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb14:		; preds = %generate_patient.exit, %entry
+	ret %struct.List* undef
+}
diff --git a/final/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll b/final/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll
new file mode 100644
index 00000000000..ad32dc9d0a0
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug3.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8
+; rdar://7117307
+
+	%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
+	%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
+	%struct.Patient = type { i32, i32, i32, %struct.Village* }
+	%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
+
+define %struct.List* @sim(%struct.Village* %village) nounwind {
+entry:
+	br i1 undef, label %bb14, label %bb3.preheader
+
+bb3.preheader:		; preds = %entry
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb3.preheader
+	br i1 undef, label %bb11, label %bb5
+
+bb11:		; preds = %bb5
+	%0 = load i32* undef, align 4		; <i32> [#uses=1]
+	%1 = xor i32 %0, 123459876		; <i32> [#uses=1]
+	%2 = sdiv i32 %1, 127773		; <i32> [#uses=1]
+	%3 = mul i32 %2, 2836		; <i32> [#uses=1]
+	%4 = sub i32 0, %3		; <i32> [#uses=1]
+	%5 = xor i32 %4, 123459876		; <i32> [#uses=1]
+	%idum_addr.0.i.i = select i1 undef, i32 undef, i32 %5		; <i32> [#uses=1]
+	%6 = sitofp i32 %idum_addr.0.i.i to double		; <double> [#uses=1]
+	%7 = fmul double %6, 0x3E00000000200000		; <double> [#uses=1]
+	%8 = fptrunc double %7 to float		; <float> [#uses=2]
+	%9 = fmul float %8, 0x41E0000000000000		; <float> [#uses=1]
+	%10 = fptosi float %9 to i32		; <i32> [#uses=1]
+	store i32 %10, i32* undef, align 4
+	%11 = fpext float %8 to double		; <double> [#uses=1]
+	%12 = fcmp ogt double %11, 6.660000e-01		; <i1> [#uses=1]
+	br i1 %12, label %generate_patient.exit, label %generate_patient.exit.thread
+
+generate_patient.exit.thread:		; preds = %bb11
+	ret %struct.List* null
+
+generate_patient.exit:		; preds = %bb11
+	br i1 undef, label %bb14, label %bb12
+
+bb12:		; preds = %generate_patient.exit
+	br i1 undef, label %bb.i, label %bb1.i
+
+bb.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb1.i:		; preds = %bb12
+	ret %struct.List* null
+
+bb14:		; preds = %generate_patient.exit, %entry
+	ret %struct.List* undef
+}
diff --git a/final/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/final/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
new file mode 100644
index 00000000000..550b3efae99
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s
+; PR4659
+; PR4682
+
+define hidden i32 @__gcov_execlp(i8* %path, i8* %arg, ...) nounwind {
+entry:
+; CHECK: __gcov_execlp:
+; CHECK: sub sp, #8
+; CHECK: push
+; CHECK: add r7, sp, #4
+; CHECK: subs r4, r7, #4
+; CHECK: mov sp, r4
+; CHECK-NOT: mov sp, r7
+; CHECK: add sp, #8
+	call void @__gcov_flush() nounwind
+	br i1 undef, label %bb5, label %bb
+
+bb:		; preds = %bb, %entry
+	br i1 undef, label %bb5, label %bb
+
+bb5:		; preds = %bb, %entry
+	%0 = alloca i8*, i32 undef, align 4		; <i8**> [#uses=1]
+	%1 = call i32 @execvp(i8* %path, i8** %0) nounwind		; <i32> [#uses=1]
+	ret i32 %1
+}
+
+declare hidden void @__gcov_flush()
+
+declare i32 @execvp(i8*, i8**) nounwind
diff --git a/final/test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll b/final/test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll
new file mode 100644
index 00000000000..93f5a0f6c41
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-08-07-CoalescerBug.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+vfp2
+; PR4686
+
+	%a = type { i32 (...)** }
+	%b = type { %a }
+	%c = type { float, float, float, float }
+
+declare arm_aapcs_vfpcc float @bar(%c*)
+
+define arm_aapcs_vfpcc void @foo(%b* %x, %c* %y) {
+entry:
+	%0 = call arm_aapcs_vfpcc  float @bar(%c* %y)		; <float> [#uses=0]
+	%1 = fadd float undef, undef		; <float> [#uses=1]
+	store float %1, float* undef, align 8
+	ret void
+}
diff --git a/final/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll b/final/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
new file mode 100644
index 00000000000..f3baeb74e2c
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.JHUFF_TBL = type { [17 x i8], [256 x i8], i32 }
+	%struct.JQUANT_TBL = type { [64 x i16], i32 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.anon = type { [8 x i32], [48 x i8] }
+	%struct.backing_store_info = type { void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*, i8*, i32, i32)*, void (%struct.jpeg_common_struct*, %struct.backing_store_info*)*, %struct.FILE*, [64 x i8] }
+	%struct.jpeg_color_deconverter = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32, i8**, i32)* }
+	%struct.jpeg_color_quantizer = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i8**, i32)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_common_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32 }
+	%struct.jpeg_component_info = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.JQUANT_TBL*, i8* }
+	%struct.jpeg_d_coef_controller = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, i8***)*, %struct.jvirt_barray_control** }
+	%struct.jpeg_d_main_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8**, i32*, i32)* }
+	%struct.jpeg_d_post_controller = type { void (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)* }
+	%struct.jpeg_decomp_master = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32 }
+	%struct.jpeg_decompress_struct = type { %struct.jpeg_error_mgr*, %struct.jpeg_memory_mgr*, %struct.jpeg_progress_mgr*, i32, i32, %struct.jpeg_source_mgr*, i32, i32, i32, i32, i32, i32, i32, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8**, i32, i32, i32, i32, i32, [64 x i32]*, [4 x %struct.JQUANT_TBL*], [4 x %struct.JHUFF_TBL*], [4 x %struct.JHUFF_TBL*], i32, %struct.jpeg_component_info*, i32, i32, [16 x i8], [16 x i8], [16 x i8], i32, i32, i8, i16, i16, i32, i8, i32, i32, i32, i32, i32, i8*, i32, [4 x %struct.jpeg_component_info*], i32, i32, i32, [10 x i32], i32, i32, i32, i32, i32, %struct.jpeg_decomp_master*, %struct.jpeg_d_main_controller*, %struct.jpeg_d_coef_controller*, %struct.jpeg_d_post_controller*, %struct.jpeg_input_controller*, %struct.jpeg_marker_reader*, %struct.jpeg_entropy_decoder*, %struct.jpeg_inverse_dct*, %struct.jpeg_upsampler*, %struct.jpeg_color_deconverter*, %struct.jpeg_color_quantizer* }
+	%struct.jpeg_entropy_decoder = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*, [64 x i16]**)* }
+	%struct.jpeg_error_mgr = type { void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, void (%struct.jpeg_common_struct*, i8*)*, void (%struct.jpeg_common_struct*)*, i32, %struct.anon, i32, i32, i8**, i32, i8**, i32, i32 }
+	%struct.jpeg_input_controller = type { i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*)*, i32, i32 }
+	%struct.jpeg_inverse_dct = type { void (%struct.jpeg_decompress_struct*)*, [10 x void (%struct.jpeg_decompress_struct*, %struct.jpeg_component_info*, i16*, i8**, i32)*] }
+	%struct.jpeg_marker_reader = type { void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, [16 x i32 (%struct.jpeg_decompress_struct*)*], i32, i32, i32, i32 }
+	%struct.jpeg_memory_mgr = type { i8* (%struct.jpeg_common_struct*, i32, i32)*, i8* (%struct.jpeg_common_struct*, i32, i32)*, i8** (%struct.jpeg_common_struct*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, i32, i32, i32)*, %struct.jvirt_sarray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, %struct.jvirt_barray_control* (%struct.jpeg_common_struct*, i32, i32, i32, i32, i32)*, void (%struct.jpeg_common_struct*)*, i8** (%struct.jpeg_common_struct*, %struct.jvirt_sarray_control*, i32, i32, i32)*, [64 x i16]** (%struct.jpeg_common_struct*, %struct.jvirt_barray_control*, i32, i32, i32)*, void (%struct.jpeg_common_struct*, i32)*, void (%struct.jpeg_common_struct*)*, i32 }
+	%struct.jpeg_progress_mgr = type { void (%struct.jpeg_common_struct*)*, i32, i32, i32, i32 }
+	%struct.jpeg_source_mgr = type { i8*, i32, void (%struct.jpeg_decompress_struct*)*, i32 (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i32)*, i32 (%struct.jpeg_decompress_struct*, i32)*, void (%struct.jpeg_decompress_struct*)* }
+	%struct.jpeg_upsampler = type { void (%struct.jpeg_decompress_struct*)*, void (%struct.jpeg_decompress_struct*, i8***, i32*, i32, i8**, i32*, i32)*, i32 }
+	%struct.jvirt_barray_control = type { [64 x i16]**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_barray_control*, %struct.backing_store_info }
+	%struct.jvirt_sarray_control = type { i8**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.jvirt_sarray_control*, %struct.backing_store_info }
+
+define void @jpeg_idct_float(%struct.jpeg_decompress_struct* nocapture %cinfo, %struct.jpeg_component_info* nocapture %compptr, i16* nocapture %coef_block, i8** nocapture %output_buf, i32 %output_col) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%0 = load float* undef, align 4		; <float> [#uses=1]
+	%1 = fmul float undef, %0		; <float> [#uses=2]
+	%tmp73 = add i32 0, 224		; <i32> [#uses=1]
+	%scevgep74 = getelementptr i8* null, i32 %tmp73		; <i8*> [#uses=1]
+	%scevgep7475 = bitcast i8* %scevgep74 to float*		; <float*> [#uses=1]
+	%2 = load float* null, align 4		; <float> [#uses=1]
+	%3 = fmul float 0.000000e+00, %2		; <float> [#uses=2]
+	%4 = fadd float %1, %3		; <float> [#uses=1]
+	%5 = fsub float %1, %3		; <float> [#uses=2]
+	%6 = fadd float undef, 0.000000e+00		; <float> [#uses=2]
+	%7 = fmul float undef, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%8 = fsub float %7, %6		; <float> [#uses=2]
+	%9 = fsub float %4, %6		; <float> [#uses=1]
+	%10 = fadd float %5, %8		; <float> [#uses=2]
+	%11 = fsub float %5, %8		; <float> [#uses=1]
+	%12 = sitofp i16 undef to float		; <float> [#uses=1]
+	%13 = fmul float %12, 0.000000e+00		; <float> [#uses=2]
+	%14 = sitofp i16 undef to float		; <float> [#uses=1]
+	%15 = load float* %scevgep7475, align 4		; <float> [#uses=1]
+	%16 = fmul float %14, %15		; <float> [#uses=2]
+	%17 = fadd float undef, undef		; <float> [#uses=2]
+	%18 = fadd float %13, %16		; <float> [#uses=2]
+	%19 = fsub float %13, %16		; <float> [#uses=1]
+	%20 = fadd float %18, %17		; <float> [#uses=2]
+	%21 = fsub float %18, %17		; <float> [#uses=1]
+	%22 = fmul float %21, 0x3FF6A09E60000000		; <float> [#uses=1]
+	%23 = fmul float undef, 0x3FFD906BC0000000		; <float> [#uses=2]
+	%24 = fmul float %19, 0x3FF1517A80000000		; <float> [#uses=1]
+	%25 = fsub float %24, %23		; <float> [#uses=1]
+	%26 = fadd float undef, %23		; <float> [#uses=1]
+	%27 = fsub float %26, %20		; <float> [#uses=3]
+	%28 = fsub float %22, %27		; <float> [#uses=2]
+	%29 = fadd float %25, %28		; <float> [#uses=1]
+	%30 = fadd float undef, %20		; <float> [#uses=1]
+	store float %30, float* undef, align 4
+	%31 = fadd float %10, %27		; <float> [#uses=1]
+	store float %31, float* undef, align 4
+	%32 = fsub float %10, %27		; <float> [#uses=1]
+	store float %32, float* undef, align 4
+	%33 = fadd float %11, %28		; <float> [#uses=1]
+	store float %33, float* undef, align 4
+	%34 = fsub float %9, %29		; <float> [#uses=1]
+	store float %34, float* undef, align 4
+	br label %bb
+}
diff --git a/final/test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll b/final/test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll
new file mode 100644
index 00000000000..e3c23ac025f
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=armv7-eabi -mattr=+vfp2
+; PR4686
+
+@g_d = external global double		; <double*> [#uses=1]
+
+define void @foo(float %yIncr) {
+entry:
+	br i1 undef, label %bb, label %bb4
+
+bb:		; preds = %entry
+	%0 = call arm_aapcs_vfpcc  float @bar()		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	store double %1, double* @g_d, align 8
+	br label %bb4
+
+bb4:		; preds = %bb, %entry
+	unreachable
+}
+
+declare arm_aapcs_vfpcc float @bar()
diff --git a/final/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll b/final/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll
new file mode 100644
index 00000000000..974ce50d6d4
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-08-10-ISelBug.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+vfp2
+
+define float @t1(i32 %v0) nounwind {
+entry:
+	store i32 undef, i32* undef, align 4
+	%0 = load [4 x i8]** undef, align 4		; <[4 x i8]*> [#uses=1]
+	%1 = load i8* undef, align 1		; <i8> [#uses=1]
+	%2 = zext i8 %1 to i32		; <i32> [#uses=1]
+	%3 = getelementptr [4 x i8]* %0, i32 %v0, i32 0		; <i8*> [#uses=1]
+	%4 = load i8* %3, align 1		; <i8> [#uses=1]
+	%5 = zext i8 %4 to i32		; <i32> [#uses=1]
+	%6 = sub i32 %5, %2		; <i32> [#uses=1]
+	%7 = sitofp i32 %6 to float		; <float> [#uses=1]
+	ret float %7
+}
diff --git a/final/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll b/final/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
new file mode 100644
index 00000000000..5cfc68d0940
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-08-21-PostRAKill4.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -post-RA-scheduler
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-apple-darwin9"
+
+@.str = external constant [36 x i8], align 1      ; <[36 x i8]*> [#uses=0]
+@.str1 = external constant [31 x i8], align 1     ; <[31 x i8]*> [#uses=1]
+@.str2 = external constant [4 x i8], align 1      ; <[4 x i8]*> [#uses=1]
+
+declare i32 @getUnknown(i32, ...) nounwind
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define i32 @main() nounwind {
+entry:
+  %0 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; <i32> [#uses=0]
+  %1 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; <i32> [#uses=0]
+  %2 = tail call  i32 (i32, ...)* @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; <i32> [#uses=1]
+  %3 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
diff --git a/final/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll b/final/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
new file mode 100644
index 00000000000..06a152d56e4
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-09-01-PostRAProlog.ll
@@ -0,0 +1,106 @@
+; RUN: llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumbv7-apple-darwin9"
+
+@history = internal global [2 x [56 x i32]] [[56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0], [56 x i32] [i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 2, i32 5, i32 8, i32 10, i32 8, i32 5, i32 2, i32 -1, i32 1, i32 3, i32 5, i32 7, i32 5, i32 3, i32 1, i32 -1, i32 0, i32 1, i32 2, i32 4, i32 2, i32 1, i32 0]] ; <[2 x [56 x i32]]*> [#uses=3]
+@nodes = internal global i64 0                    ; <i64*> [#uses=4]
+@.str = private constant [9 x i8] c"##-<=>+#\00", align 1 ; <[9 x i8]*> [#uses=2]
+@.str1 = private constant [6 x i8] c"%c%d\0A\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str2 = private constant [16 x i8] c"Fhourstones 2.0\00", align 1 ; <[16 x i8]*> [#uses=1]
+@.str3 = private constant [54 x i8] c"Using %d transposition table entries with %d probes.\0A\00", align 1 ; <[54 x i8]*> [#uses=1]
+@.str4 = private constant [31 x i8] c"Solving %d-ply position after \00", align 1 ; <[31 x i8]*> [#uses=1]
+@.str5 = private constant [7 x i8] c" . . .\00", align 1 ; <[7 x i8]*> [#uses=1]
+@.str6 = private constant [28 x i8] c"score = %d (%c)  work = %d\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
+@.str7 = private constant [36 x i8] c"%lu pos / %lu msec = %.1f Kpos/sec\0A\00", align 1 ; <[36 x i8]*> [#uses=1]
+@plycnt = internal global i32 0                   ; <i32*> [#uses=21]
+@dias = internal global [19 x i32] zeroinitializer ; <[19 x i32]*> [#uses=43]
+@columns = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=18]
+@height = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=21]
+@rows = internal global [8 x i32] zeroinitializer ; <[8 x i32]*> [#uses=20]
+@colthr = internal global [128 x i32] zeroinitializer ; <[128 x i32]*> [#uses=5]
+@moves = internal global [44 x i32] zeroinitializer ; <[44 x i32]*> [#uses=9]
+@.str8 = private constant [3 x i8] c"%d\00", align 1 ; <[3 x i8]*> [#uses=1]
+@he = internal global i8* null                    ; <i8**> [#uses=9]
+@hits = internal global i64 0                     ; <i64*> [#uses=8]
+@posed = internal global i64 0                    ; <i64*> [#uses=7]
+@ht = internal global i32* null                   ; <i32**> [#uses=5]
+@.str16 = private constant [19 x i8] c"store rate = %.3f\0A\00", align 1 ; <[19 x i8]*> [#uses=1]
+@.str117 = private constant [45 x i8] c"- %5.3f  < %5.3f  = %5.3f  > %5.3f  + %5.3f\0A\00", align 1 ; <[45 x i8]*> [#uses=1]
+@.str218 = private constant [6 x i8] c"%7d%c\00", align 1 ; <[6 x i8]*> [#uses=1]
+@.str319 = private constant [30 x i8] c"Failed to allocate %u bytes.\0A\00", align 1 ; <[30 x i8]*> [#uses=1]
+
+declare i32 @puts(i8* nocapture) nounwind
+
+declare i32 @getchar() nounwind
+
+define internal i32 @transpose() nounwind readonly {
+; CHECK: push
+entry:
+  %0 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 1), align 4 ; <i32> [#uses=1]
+  %1 = shl i32 %0, 7                              ; <i32> [#uses=1]
+  %2 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 2), align 4 ; <i32> [#uses=1]
+  %3 = or i32 %1, %2                              ; <i32> [#uses=1]
+  %4 = shl i32 %3, 7                              ; <i32> [#uses=1]
+  %5 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 3), align 4 ; <i32> [#uses=1]
+  %6 = or i32 %4, %5                              ; <i32> [#uses=3]
+  %7 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 7), align 4 ; <i32> [#uses=1]
+  %8 = shl i32 %7, 7                              ; <i32> [#uses=1]
+  %9 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 6), align 4 ; <i32> [#uses=1]
+  %10 = or i32 %8, %9                             ; <i32> [#uses=1]
+  %11 = shl i32 %10, 7                            ; <i32> [#uses=1]
+  %12 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 5), align 4 ; <i32> [#uses=1]
+  %13 = or i32 %11, %12                           ; <i32> [#uses=3]
+  %14 = icmp ugt i32 %6, %13                      ; <i1> [#uses=2]
+  %.pn2.in.i = select i1 %14, i32 %6, i32 %13     ; <i32> [#uses=1]
+  %.pn1.in.i = select i1 %14, i32 %13, i32 %6     ; <i32> [#uses=1]
+  %.pn2.i = shl i32 %.pn2.in.i, 7                 ; <i32> [#uses=1]
+  %.pn3.i = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 4) ; <i32> [#uses=1]
+  %.pn.in.in.i = or i32 %.pn2.i, %.pn3.i          ; <i32> [#uses=1]
+  %.pn.in.i = zext i32 %.pn.in.in.i to i64        ; <i64> [#uses=1]
+  %.pn.i = shl i64 %.pn.in.i, 21                  ; <i64> [#uses=1]
+  %.pn1.i = zext i32 %.pn1.in.i to i64            ; <i64> [#uses=1]
+  %iftmp.22.0.i = or i64 %.pn.i, %.pn1.i          ; <i64> [#uses=2]
+  %15 = lshr i64 %iftmp.22.0.i, 17                ; <i64> [#uses=1]
+  %16 = trunc i64 %15 to i32                      ; <i32> [#uses=2]
+  %17 = urem i64 %iftmp.22.0.i, 1050011           ; <i64> [#uses=1]
+  %18 = trunc i64 %17 to i32                      ; <i32> [#uses=1]
+  %19 = urem i32 %16, 179                         ; <i32> [#uses=1]
+  %20 = or i32 %19, 131072                        ; <i32> [#uses=1]
+  %21 = load i32** @ht, align 4                   ; <i32*> [#uses=1]
+  br label %bb5
+
+bb:                                               ; preds = %bb5
+  %22 = getelementptr inbounds i32* %21, i32 %x.0 ; <i32*> [#uses=1]
+  %23 = load i32* %22, align 4                    ; <i32> [#uses=1]
+  %24 = icmp eq i32 %23, %16                      ; <i1> [#uses=1]
+  br i1 %24, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  %25 = load i8** @he, align 4                    ; <i8*> [#uses=1]
+  %26 = getelementptr inbounds i8* %25, i32 %x.0  ; <i8*> [#uses=1]
+  %27 = load i8* %26, align 1                     ; <i8> [#uses=1]
+  %28 = sext i8 %27 to i32                        ; <i32> [#uses=1]
+  ret i32 %28
+
+bb2:                                              ; preds = %bb
+  %29 = add nsw i32 %20, %x.0                     ; <i32> [#uses=3]
+  %30 = add i32 %29, -1050011                     ; <i32> [#uses=1]
+  %31 = icmp sgt i32 %29, 1050010                 ; <i1> [#uses=1]
+  %. = select i1 %31, i32 %30, i32 %29            ; <i32> [#uses=1]
+  %32 = add i32 %33, 1                            ; <i32> [#uses=1]
+  br label %bb5
+
+bb5:                                              ; preds = %bb2, %entry
+  %33 = phi i32 [ 0, %entry ], [ %32, %bb2 ]      ; <i32> [#uses=2]
+  %x.0 = phi i32 [ %18, %entry ], [ %., %bb2 ]    ; <i32> [#uses=3]
+  %34 = icmp sgt i32 %33, 7                       ; <i1> [#uses=1]
+  br i1 %34, label %bb7, label %bb
+
+bb7:                                              ; preds = %bb5
+  ret i32 -128
+}
+
+declare noalias i8* @calloc(i32, i32) nounwind
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
diff --git a/final/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll b/final/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
new file mode 100644
index 00000000000..b2ed8fc7a67
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll
@@ -0,0 +1,152 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+
+%struct.pix_pos = type { i32, i32, i32, i32, i32, i32 }
+
+@getNeighbour = external global void (i32, i32, i32, i32, %struct.pix_pos*)*, align 4 ; <void (i32, i32, i32, i32, %struct.pix_pos*)**> [#uses=2]
+
+define void @t() nounwind {
+; CHECK: t:
+; CHECK:      it eq
+; CHECK-NEXT: cmpeq
+entry:
+  %pix_a.i294 = alloca [4 x %struct.pix_pos], align 4 ; <[4 x %struct.pix_pos]*> [#uses=2]
+  br i1 undef, label %land.rhs, label %lor.end
+
+land.rhs:                                         ; preds = %entry
+  br label %lor.end
+
+lor.end:                                          ; preds = %land.rhs, %entry
+  switch i32 0, label %if.end371 [
+    i32 10, label %if.then366
+    i32 14, label %if.then366
+  ]
+
+if.then366:                                       ; preds = %lor.end, %lor.end
+  unreachable
+
+if.end371:                                        ; preds = %lor.end
+  %arrayidx56.2.i = getelementptr [4 x %struct.pix_pos]* %pix_a.i294, i32 0, i32 2 ; <%struct.pix_pos*> [#uses=1]
+  %arrayidx56.3.i = getelementptr [4 x %struct.pix_pos]* %pix_a.i294, i32 0, i32 3 ; <%struct.pix_pos*> [#uses=1]
+  br i1 undef, label %for.body1857, label %for.end4557
+
+for.body1857:                                     ; preds = %if.end371
+  br i1 undef, label %if.then1867, label %for.cond1933
+
+if.then1867:                                      ; preds = %for.body1857
+  unreachable
+
+for.cond1933:                                     ; preds = %for.body1857
+  br i1 undef, label %for.body1940, label %if.then4493
+
+for.body1940:                                     ; preds = %for.cond1933
+  %shl = shl i32 undef, 2                         ; <i32> [#uses=1]
+  %shl1959 = shl i32 undef, 2                     ; <i32> [#uses=4]
+  br i1 undef, label %if.then1992, label %if.else2003
+
+if.then1992:                                      ; preds = %for.body1940
+  %tmp14.i302 = load i32* undef                   ; <i32> [#uses=4]
+  %add.i307452 = or i32 %shl1959, 1               ; <i32> [#uses=1]
+  %sub.i308 = add i32 %shl, -1                    ; <i32> [#uses=4]
+  call  void undef(i32 %tmp14.i302, i32 %sub.i308, i32 %shl1959, i32 0, %struct.pix_pos* undef) nounwind
+  %tmp49.i309 = load void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
+  call  void %tmp49.i309(i32 %tmp14.i302, i32 %sub.i308, i32 %add.i307452, i32 0, %struct.pix_pos* null) nounwind
+  %tmp49.1.i = load void (i32, i32, i32, i32, %struct.pix_pos*)** @getNeighbour ; <void (i32, i32, i32, i32, %struct.pix_pos*)*> [#uses=1]
+  call  void %tmp49.1.i(i32 %tmp14.i302, i32 %sub.i308, i32 undef, i32 0, %struct.pix_pos* %arrayidx56.2.i) nounwind
+  call  void undef(i32 %tmp14.i302, i32 %sub.i308, i32 undef, i32 0, %struct.pix_pos* %arrayidx56.3.i) nounwind
+  unreachable
+
+if.else2003:                                      ; preds = %for.body1940
+  switch i32 undef, label %if.then2015 [
+    i32 10, label %if.then4382
+    i32 14, label %if.then4382
+  ]
+
+if.then2015:                                      ; preds = %if.else2003
+  br i1 undef, label %if.else2298, label %if.then2019
+
+if.then2019:                                      ; preds = %if.then2015
+  br i1 undef, label %if.then2065, label %if.else2081
+
+if.then2065:                                      ; preds = %if.then2019
+  br label %if.end2128
+
+if.else2081:                                      ; preds = %if.then2019
+  br label %if.end2128
+
+if.end2128:                                       ; preds = %if.else2081, %if.then2065
+  unreachable
+
+if.else2298:                                      ; preds = %if.then2015
+  br i1 undef, label %land.lhs.true2813, label %cond.end2841
+
+land.lhs.true2813:                                ; preds = %if.else2298
+  br i1 undef, label %cond.end2841, label %cond.true2824
+
+cond.true2824:                                    ; preds = %land.lhs.true2813
+  br label %cond.end2841
+
+cond.end2841:                                     ; preds = %cond.true2824, %land.lhs.true2813, %if.else2298
+  br i1 undef, label %for.cond2882.preheader, label %for.cond2940.preheader
+
+for.cond2882.preheader:                           ; preds = %cond.end2841
+  %mul3693 = shl i32 undef, 1                     ; <i32> [#uses=2]
+  br i1 undef, label %if.then3689, label %if.else3728
+
+for.cond2940.preheader:                           ; preds = %cond.end2841
+  br label %for.inc3040
+
+for.inc3040:                                      ; preds = %for.inc3040, %for.cond2940.preheader
+  br label %for.inc3040
+
+if.then3689:                                      ; preds = %for.cond2882.preheader
+  %add3695 = add nsw i32 %mul3693, %shl1959       ; <i32> [#uses=1]
+  %mul3697 = shl i32 %add3695, 2                  ; <i32> [#uses=2]
+  %arrayidx3705 = getelementptr inbounds i16* undef, i32 1 ; <i16*> [#uses=1]
+  %tmp3706 = load i16* %arrayidx3705              ; <i16> [#uses=1]
+  %conv3707 = sext i16 %tmp3706 to i32            ; <i32> [#uses=1]
+  %add3708 = add nsw i32 %conv3707, %mul3697      ; <i32> [#uses=1]
+  %arrayidx3724 = getelementptr inbounds i16* null, i32 1 ; <i16*> [#uses=1]
+  %tmp3725 = load i16* %arrayidx3724              ; <i16> [#uses=1]
+  %conv3726 = sext i16 %tmp3725 to i32            ; <i32> [#uses=1]
+  %add3727 = add nsw i32 %conv3726, %mul3697      ; <i32> [#uses=1]
+  br label %if.end3770
+
+if.else3728:                                      ; preds = %for.cond2882.preheader
+  %mul3733 = add i32 %shl1959, 1073741816         ; <i32> [#uses=1]
+  %add3735 = add nsw i32 %mul3733, %mul3693       ; <i32> [#uses=1]
+  %mul3737 = shl i32 %add3735, 2                  ; <i32> [#uses=2]
+  %tmp3746 = load i16* undef                      ; <i16> [#uses=1]
+  %conv3747 = sext i16 %tmp3746 to i32            ; <i32> [#uses=1]
+  %add3748 = add nsw i32 %conv3747, %mul3737      ; <i32> [#uses=1]
+  %arrayidx3765 = getelementptr inbounds i16* null, i32 1 ; <i16*> [#uses=1]
+  %tmp3766 = load i16* %arrayidx3765              ; <i16> [#uses=1]
+  %conv3767 = sext i16 %tmp3766 to i32            ; <i32> [#uses=1]
+  %add3768 = add nsw i32 %conv3767, %mul3737      ; <i32> [#uses=1]
+  br label %if.end3770
+
+if.end3770:                                       ; preds = %if.else3728, %if.then3689
+  %vec2_y.1 = phi i32 [ %add3727, %if.then3689 ], [ %add3768, %if.else3728 ] ; <i32> [#uses=0]
+  %vec1_y.2 = phi i32 [ %add3708, %if.then3689 ], [ %add3748, %if.else3728 ] ; <i32> [#uses=0]
+  unreachable
+
+if.then4382:                                      ; preds = %if.else2003, %if.else2003
+  switch i32 undef, label %if.then4394 [
+    i32 10, label %if.else4400
+    i32 14, label %if.else4400
+  ]
+
+if.then4394:                                      ; preds = %if.then4382
+  unreachable
+
+if.else4400:                                      ; preds = %if.then4382, %if.then4382
+  br label %for.cond4451.preheader
+
+for.cond4451.preheader:                           ; preds = %for.cond4451.preheader, %if.else4400
+  br label %for.cond4451.preheader
+
+if.then4493:                                      ; preds = %for.cond1933
+  unreachable
+
+for.end4557:                                      ; preds = %if.end371
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/final/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
new file mode 100644
index 00000000000..45d356c3dc6
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=thumbv7-eabi -mcpu=cortex-a8 -float-abi=hard | FileCheck %s
+; PR5204
+
+%"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" = type { i8* }
+%"struct.__gnu_cxx::new_allocator<char>" = type <{ i8 }>
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" }
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" }
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" = type { i32, i32, i32 }
+
+
+define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) {
+; CHECK: _ZNKSs7compareERKSs:
+; CHECK:      it  eq
+; CHECK-NEXT: subeq r0, r6, r7
+; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r7, r8, pc}
+entry:
+  %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
+  %1 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
+  %2 = icmp ult i32 %1, %0                        ; <i1> [#uses=1]
+  %3 = select i1 %2, i32 %1, i32 %0               ; <i32> [#uses=1]
+  %4 = tail call arm_aapcs_vfpcc  i8* @_ZNKSs7_M_dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i8*> [#uses=1]
+  %5 = tail call arm_aapcs_vfpcc  i8* @_ZNKSs4dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i8*> [#uses=1]
+  %6 = tail call arm_aapcs_vfpcc  i32 @memcmp(i8* %4, i8* %5, i32 %3) nounwind readonly ; <i32> [#uses=2]
+  %7 = icmp eq i32 %6, 0                          ; <i1> [#uses=1]
+  br i1 %7, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %8 = sub i32 %0, %1                             ; <i32> [#uses=1]
+  ret i32 %8
+
+bb1:                                              ; preds = %entry
+  ret i32 %6
+}
+
+declare arm_aapcs_vfpcc i32 @memcmp(i8* nocapture, i8* nocapture, i32) nounwind readonly
+
+declare arm_aapcs_vfpcc i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this)
+
+declare arm_aapcs_vfpcc i8* @_ZNKSs7_M_dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this)
+
+declare arm_aapcs_vfpcc i8* @_ZNKSs4dataEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this)
diff --git a/final/test/CodeGen/Thumb2/2009-11-01-CopyReg2RegBug.ll b/final/test/CodeGen/Thumb2/2009-11-01-CopyReg2RegBug.ll
new file mode 100644
index 00000000000..45880185395
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-11-01-CopyReg2RegBug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8
+
+define void @get_initial_mb16x16_cost() nounwind {
+entry:
+  br i1 undef, label %bb4, label %bb1
+
+bb1:                                              ; preds = %entry
+  br label %bb7
+
+bb4:                                              ; preds = %entry
+  br i1 undef, label %bb7.thread, label %bb5
+
+bb5:                                              ; preds = %bb4
+  br label %bb7
+
+bb7.thread:                                       ; preds = %bb4
+  br label %bb8
+
+bb7:                                              ; preds = %bb5, %bb1
+  br i1 undef, label %bb8, label %bb10
+
+bb8:                                              ; preds = %bb7, %bb7.thread
+  %0 = phi double [ 5.120000e+02, %bb7.thread ], [ undef, %bb7 ] ; <double> [#uses=1]
+  %1 = fdiv double %0, undef                      ; <double> [#uses=0]
+  unreachable
+
+bb10:                                             ; preds = %bb7
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll b/final/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll
new file mode 100644
index 00000000000..956263b4fe2
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+
+%struct.OP = type { %struct.OP*, %struct.OP*, %struct.OP* ()*, i32, i16, i16, i8, i8 }
+%struct.SV = type { i8*, i32, i32 }
+
+declare void @Perl_mg_set(%struct.SV*) nounwind
+
+define %struct.OP* @Perl_pp_complement() nounwind {
+entry:
+  %0 = load %struct.SV** null, align 4            ; <%struct.SV*> [#uses=2]
+  br i1 undef, label %bb21, label %bb5
+
+bb5:                                              ; preds = %entry
+  br i1 undef, label %bb13, label %bb6
+
+bb6:                                              ; preds = %bb5
+  br i1 undef, label %bb8, label %bb7
+
+bb7:                                              ; preds = %bb6
+  %1 = getelementptr inbounds %struct.SV* %0, i32 0, i32 0 ; <i8**> [#uses=1]
+  %2 = load i8** %1, align 4                      ; <i8*> [#uses=1]
+  %3 = getelementptr inbounds i8* %2, i32 12      ; <i8*> [#uses=1]
+  %4 = bitcast i8* %3 to i32*                     ; <i32*> [#uses=1]
+  %5 = load i32* %4, align 4                      ; <i32> [#uses=1]
+  %storemerge5 = xor i32 %5, -1                   ; <i32> [#uses=1]
+  call  void @Perl_sv_setiv(%struct.SV* undef, i32 %storemerge5) nounwind
+  %6 = getelementptr inbounds %struct.SV* undef, i32 0, i32 2 ; <i32*> [#uses=1]
+  %7 = load i32* %6, align 4                      ; <i32> [#uses=1]
+  %8 = and i32 %7, 16384                          ; <i32> [#uses=1]
+  %9 = icmp eq i32 %8, 0                          ; <i1> [#uses=1]
+  br i1 %9, label %bb12, label %bb11
+
+bb8:                                              ; preds = %bb6
+  unreachable
+
+bb11:                                             ; preds = %bb7
+  call  void @Perl_mg_set(%struct.SV* undef) nounwind
+  br label %bb12
+
+bb12:                                             ; preds = %bb11, %bb7
+  store %struct.SV* undef, %struct.SV** null, align 4
+  br label %bb44
+
+bb13:                                             ; preds = %bb5
+  %10 = call  i32 @Perl_sv_2uv(%struct.SV* %0) nounwind ; <i32> [#uses=0]
+  br i1 undef, label %bb.i, label %bb1.i
+
+bb.i:                                             ; preds = %bb13
+  call  void @Perl_sv_setiv(%struct.SV* undef, i32 undef) nounwind
+  br label %Perl_sv_setuv.exit
+
+bb1.i:                                            ; preds = %bb13
+  br label %Perl_sv_setuv.exit
+
+Perl_sv_setuv.exit:                               ; preds = %bb1.i, %bb.i
+  %11 = getelementptr inbounds %struct.SV* undef, i32 0, i32 2 ; <i32*> [#uses=1]
+  %12 = load i32* %11, align 4                    ; <i32> [#uses=1]
+  %13 = and i32 %12, 16384                        ; <i32> [#uses=1]
+  %14 = icmp eq i32 %13, 0                        ; <i1> [#uses=1]
+  br i1 %14, label %bb20, label %bb19
+
+bb19:                                             ; preds = %Perl_sv_setuv.exit
+  call  void @Perl_mg_set(%struct.SV* undef) nounwind
+  br label %bb20
+
+bb20:                                             ; preds = %bb19, %Perl_sv_setuv.exit
+  store %struct.SV* undef, %struct.SV** null, align 4
+  br label %bb44
+
+bb21:                                             ; preds = %entry
+  br i1 undef, label %bb23, label %bb22
+
+bb22:                                             ; preds = %bb21
+  unreachable
+
+bb23:                                             ; preds = %bb21
+  unreachable
+
+bb44:                                             ; preds = %bb20, %bb12
+  ret %struct.OP* undef
+}
+
+declare void @Perl_sv_setiv(%struct.SV*, i32) nounwind
+
+declare i32 @Perl_sv_2uv(%struct.SV*) nounwind
diff --git a/final/test/CodeGen/Thumb2/2009-11-13-STRDBug.ll b/final/test/CodeGen/Thumb2/2009-11-13-STRDBug.ll
new file mode 100644
index 00000000000..0c9fa5efa0b
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-11-13-STRDBug.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10
+; rdar://7394794
+
+define void @lshift_double(i64 %l1, i64 %h1, i64 %count, i32 %prec, i64* nocapture %lv, i64* nocapture %hv, i32 %arith) nounwind {
+entry:
+  %..i = select i1 false, i64 0, i64 0            ; <i64> [#uses=1]
+  br i1 undef, label %bb11.i, label %bb6.i
+
+bb6.i:                                            ; preds = %entry
+  %0 = lshr i64 %h1, 0                            ; <i64> [#uses=1]
+  store i64 %0, i64* %hv, align 4
+  %1 = lshr i64 %l1, 0                            ; <i64> [#uses=1]
+  %2 = or i64 0, %1                               ; <i64> [#uses=1]
+  store i64 %2, i64* %lv, align 4
+  br label %bb11.i
+
+bb11.i:                                           ; preds = %bb6.i, %entry
+  store i64 %..i, i64* %lv, align 4
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll b/final/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
new file mode 100644
index 00000000000..8ca001c7d72
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
@@ -0,0 +1,128 @@
+; RUN: opt < %s -std-compile-opts | \
+; RUN:   llc -mtriple=thumbv7-apple-darwin10 -mattr=+neon | FileCheck %s
+
+define void @fred(i32 %three_by_three, i8* %in, double %dt1, i32 %x_size, i32 %y_size, i8* %bp) nounwind {
+entry:
+; -- The loop following the load should only use a single add-literation
+;    instruction.
+; CHECK: ldr.64
+; CHECK: adds r{{[0-9]+}}, #1
+; CHECK-NOT: adds r{{[0-9]+}}, #1
+; CHECK: subsections_via_symbols
+
+
+  %three_by_three_addr = alloca i32               ; <i32*> [#uses=2]
+  %in_addr = alloca i8*                           ; <i8**> [#uses=2]
+  %dt_addr = alloca float                         ; <float*> [#uses=4]
+  %x_size_addr = alloca i32                       ; <i32*> [#uses=2]
+  %y_size_addr = alloca i32                       ; <i32*> [#uses=1]
+  %bp_addr = alloca i8*                           ; <i8**> [#uses=1]
+  %tmp_image = alloca i8*                         ; <i8**> [#uses=0]
+  %out = alloca i8*                               ; <i8**> [#uses=1]
+  %cp = alloca i8*                                ; <i8**> [#uses=0]
+  %dpt = alloca i8*                               ; <i8**> [#uses=4]
+  %dp = alloca i8*                                ; <i8**> [#uses=2]
+  %ip = alloca i8*                                ; <i8**> [#uses=0]
+  %centre = alloca i32                            ; <i32*> [#uses=0]
+  %tmp = alloca i32                               ; <i32*> [#uses=0]
+  %brightness = alloca i32                        ; <i32*> [#uses=0]
+  %area = alloca i32                              ; <i32*> [#uses=0]
+  %y = alloca i32                                 ; <i32*> [#uses=0]
+  %x = alloca i32                                 ; <i32*> [#uses=2]
+  %j = alloca i32                                 ; <i32*> [#uses=6]
+  %i = alloca i32                                 ; <i32*> [#uses=1]
+  %mask_size = alloca i32                         ; <i32*> [#uses=5]
+  %increment = alloca i32                         ; <i32*> [#uses=1]
+  %n_max = alloca i32                             ; <i32*> [#uses=4]
+  %temp = alloca float                            ; <float*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 %three_by_three, i32* %three_by_three_addr
+  store i8* %in, i8** %in_addr
+  %dt = fptrunc double %dt1 to float              ; <float> [#uses=1]
+  store float %dt, float* %dt_addr
+  store i32 %x_size, i32* %x_size_addr
+  store i32 %y_size, i32* %y_size_addr
+  store i8* %bp, i8** %bp_addr
+  %0 = load i8** %in_addr, align 4                ; <i8*> [#uses=1]
+  store i8* %0, i8** %out, align 4
+  %1 = call  i32 (...)* @foo() nounwind ; <i32> [#uses=1]
+  store i32 %1, i32* %i, align 4
+  %2 = load i32* %three_by_three_addr, align 4    ; <i32> [#uses=1]
+  %3 = icmp eq i32 %2, 0                          ; <i1> [#uses=1]
+  br i1 %3, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  %4 = load float* %dt_addr, align 4              ; <float> [#uses=1]
+  %5 = fpext float %4 to double                   ; <double> [#uses=1]
+  %6 = fmul double %5, 1.500000e+00               ; <double> [#uses=1]
+  %7 = fptosi double %6 to i32                    ; <i32> [#uses=1]
+  %8 = add nsw i32 %7, 1                          ; <i32> [#uses=1]
+  store i32 %8, i32* %mask_size, align 4
+  br label %bb3
+
+bb2:                                              ; preds = %entry
+  store i32 1, i32* %mask_size, align 4
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %9 = load i32* %mask_size, align 4              ; <i32> [#uses=1]
+  %10 = mul i32 %9, 2                             ; <i32> [#uses=1]
+  %11 = add nsw i32 %10, 1                        ; <i32> [#uses=1]
+  store i32 %11, i32* %n_max, align 4
+  %12 = load i32* %x_size_addr, align 4           ; <i32> [#uses=1]
+  %13 = load i32* %n_max, align 4                 ; <i32> [#uses=1]
+  %14 = sub i32 %12, %13                          ; <i32> [#uses=1]
+  store i32 %14, i32* %increment, align 4
+  %15 = load i32* %n_max, align 4                 ; <i32> [#uses=1]
+  %16 = load i32* %n_max, align 4                 ; <i32> [#uses=1]
+  %17 = mul i32 %15, %16                          ; <i32> [#uses=1]
+  %18 = call  noalias i8* @malloc(i32 %17) nounwind ; <i8*> [#uses=1]
+  store i8* %18, i8** %dp, align 4
+  %19 = load i8** %dp, align 4                    ; <i8*> [#uses=1]
+  store i8* %19, i8** %dpt, align 4
+  %20 = load float* %dt_addr, align 4             ; <float> [#uses=1]
+  %21 = load float* %dt_addr, align 4             ; <float> [#uses=1]
+  %22 = fmul float %20, %21                       ; <float> [#uses=1]
+  %23 = fsub float -0.000000e+00, %22             ; <float> [#uses=1]
+  store float %23, float* %temp, align 4
+  %24 = load i32* %mask_size, align 4             ; <i32> [#uses=1]
+  %25 = sub i32 0, %24                            ; <i32> [#uses=1]
+  store i32 %25, i32* %j, align 4
+  br label %bb5
+
+bb4:                                              ; preds = %bb5
+  %26 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %27 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %28 = mul i32 %26, %27                          ; <i32> [#uses=1]
+  %29 = sitofp i32 %28 to double                  ; <double> [#uses=1]
+  %30 = fmul double %29, 1.234000e+00             ; <double> [#uses=1]
+  %31 = fptosi double %30 to i32                  ; <i32> [#uses=1]
+  store i32 %31, i32* %x, align 4
+  %32 = load i32* %x, align 4                     ; <i32> [#uses=1]
+  %33 = trunc i32 %32 to i8                       ; <i8> [#uses=1]
+  %34 = load i8** %dpt, align 4                   ; <i8*> [#uses=1]
+  store i8 %33, i8* %34, align 1
+  %35 = load i8** %dpt, align 4                   ; <i8*> [#uses=1]
+  %36 = getelementptr inbounds i8* %35, i64 1     ; <i8*> [#uses=1]
+  store i8* %36, i8** %dpt, align 4
+  %37 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %38 = add nsw i32 %37, 1                        ; <i32> [#uses=1]
+  store i32 %38, i32* %j, align 4
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb3
+  %39 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %40 = load i32* %mask_size, align 4             ; <i32> [#uses=1]
+  %41 = icmp sle i32 %39, %40                     ; <i1> [#uses=1]
+  br i1 %41, label %bb4, label %bb6
+
+bb6:                                              ; preds = %bb5
+  br label %return
+
+return:                                           ; preds = %bb6
+  ret void
+}
+
+declare i32 @foo(...)
+
+declare noalias i8* @malloc(i32) nounwind
diff --git a/final/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll b/final/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
new file mode 100644
index 00000000000..af7d716446b
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2010-01-06-TailDuplicateLabels.ll
@@ -0,0 +1,89 @@
+; RUN: llc -relocation-model=pic < %s | grep {:$} | sort | uniq -d | count 0
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; This function produces a duplicate LPC label unless special care is taken when duplicating a t2LDRpci_pic instruction.
+
+%struct.PlatformMutex = type { i32, [40 x i8] }
+%struct.SpinLock = type { %struct.PlatformMutex }
+%"struct.WTF::TCMalloc_ThreadCache" = type { i32, %struct._opaque_pthread_t*, i8, [68 x %"struct.WTF::TCMalloc_ThreadCache_FreeList"], i32, i32, %"struct.WTF::TCMalloc_ThreadCache"*, %"struct.WTF::TCMalloc_ThreadCache"* }
+%"struct.WTF::TCMalloc_ThreadCache_FreeList" = type { i8*, i16, i16 }
+%struct.__darwin_pthread_handler_rec = type { void (i8*)*, i8*, %struct.__darwin_pthread_handler_rec* }
+%struct._opaque_pthread_t = type { i32, %struct.__darwin_pthread_handler_rec*, [596 x i8] }
+
+@_ZN3WTFL8heap_keyE = internal global i32 0       ; <i32*> [#uses=1]
+@_ZN3WTFL10tsd_initedE.b = internal global i1 false ; <i1*> [#uses=2]
+@_ZN3WTFL13pageheap_lockE = internal global %struct.SpinLock { %struct.PlatformMutex { i32 850045863, [40 x i8] zeroinitializer } } ; <%struct.SpinLock*> [#uses=1]
+@_ZN3WTFL12thread_heapsE = internal global %"struct.WTF::TCMalloc_ThreadCache"* null ; <%"struct.WTF::TCMalloc_ThreadCache"**> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (%"struct.WTF::TCMalloc_ThreadCache"* ()* @_ZN3WTF20TCMalloc_ThreadCache22CreateCacheIfNecessaryEv to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define %"struct.WTF::TCMalloc_ThreadCache"* @_ZN3WTF20TCMalloc_ThreadCache22CreateCacheIfNecessaryEv() nounwind {
+entry:
+  %0 = tail call  i32 @pthread_mutex_lock(%struct.PlatformMutex* getelementptr inbounds (%struct.SpinLock* @_ZN3WTFL13pageheap_lockE, i32 0, i32 0)) nounwind
+  %.b24 = load i1* @_ZN3WTFL10tsd_initedE.b, align 4 ; <i1> [#uses=1]
+  br i1 %.b24, label %bb5, label %bb6
+
+bb5:                                              ; preds = %entry
+  %1 = tail call  %struct._opaque_pthread_t* @pthread_self() nounwind
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %entry
+  %me.0 = phi %struct._opaque_pthread_t* [ %1, %bb5 ], [ null, %entry ] ; <%struct._opaque_pthread_t*> [#uses=2]
+  br label %bb11
+
+bb7:                                              ; preds = %bb11
+  %2 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache"* %h.0, i32 0, i32 1
+  %3 = load %struct._opaque_pthread_t** %2, align 4
+  %4 = tail call  i32 @pthread_equal(%struct._opaque_pthread_t* %3, %struct._opaque_pthread_t* %me.0) nounwind
+  %5 = icmp eq i32 %4, 0
+  br i1 %5, label %bb10, label %bb14
+
+bb10:                                             ; preds = %bb7
+  %6 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache"* %h.0, i32 0, i32 6
+  br label %bb11
+
+bb11:                                             ; preds = %bb10, %bb6
+  %h.0.in = phi %"struct.WTF::TCMalloc_ThreadCache"** [ @_ZN3WTFL12thread_heapsE, %bb6 ], [ %6, %bb10 ] ; <%"struct.WTF::TCMalloc_ThreadCache"**> [#uses=1]
+  %h.0 = load %"struct.WTF::TCMalloc_ThreadCache"** %h.0.in, align 4 ; <%"struct.WTF::TCMalloc_ThreadCache"*> [#uses=4]
+  %7 = icmp eq %"struct.WTF::TCMalloc_ThreadCache"* %h.0, null
+  br i1 %7, label %bb13, label %bb7
+
+bb13:                                             ; preds = %bb11
+  %8 = tail call  %"struct.WTF::TCMalloc_ThreadCache"* @_ZN3WTF20TCMalloc_ThreadCache7NewHeapEP17_opaque_pthread_t(%struct._opaque_pthread_t* %me.0) nounwind
+  br label %bb14
+
+bb14:                                             ; preds = %bb13, %bb7
+  %heap.1 = phi %"struct.WTF::TCMalloc_ThreadCache"* [ %8, %bb13 ], [ %h.0, %bb7 ] ; <%"struct.WTF::TCMalloc_ThreadCache"*> [#uses=4]
+  %9 = tail call  i32 @pthread_mutex_unlock(%struct.PlatformMutex* getelementptr inbounds (%struct.SpinLock* @_ZN3WTFL13pageheap_lockE, i32 0, i32 0)) nounwind
+  %10 = getelementptr inbounds %"struct.WTF::TCMalloc_ThreadCache"* %heap.1, i32 0, i32 2
+  %11 = load i8* %10, align 4
+  %toBool15not = icmp eq i8 %11, 0                ; <i1> [#uses=1]
+  br i1 %toBool15not, label %bb19, label %bb22
+
+bb19:                                             ; preds = %bb14
+  %.b = load i1* @_ZN3WTFL10tsd_initedE.b, align 4 ; <i1> [#uses=1]
+  br i1 %.b, label %bb21, label %bb22
+
+bb21:                                             ; preds = %bb19
+  store i8 1, i8* %10, align 4
+  %12 = load i32* @_ZN3WTFL8heap_keyE, align 4
+  %13 = bitcast %"struct.WTF::TCMalloc_ThreadCache"* %heap.1 to i8*
+  %14 = tail call  i32 @pthread_setspecific(i32 %12, i8* %13) nounwind
+  ret %"struct.WTF::TCMalloc_ThreadCache"* %heap.1
+
+bb22:                                             ; preds = %bb19, %bb14
+  ret %"struct.WTF::TCMalloc_ThreadCache"* %heap.1
+}
+
+declare i32 @pthread_mutex_lock(%struct.PlatformMutex*)
+
+declare i32 @pthread_mutex_unlock(%struct.PlatformMutex*)
+
+declare hidden %"struct.WTF::TCMalloc_ThreadCache"* @_ZN3WTF20TCMalloc_ThreadCache7NewHeapEP17_opaque_pthread_t(%struct._opaque_pthread_t*) nounwind
+
+declare i32 @pthread_setspecific(i32, i8*)
+
+declare %struct._opaque_pthread_t* @pthread_self()
+
+declare i32 @pthread_equal(%struct._opaque_pthread_t*, %struct._opaque_pthread_t*)
+
diff --git a/final/test/CodeGen/Thumb2/2010-01-19-RemovePredicates.ll b/final/test/CodeGen/Thumb2/2010-01-19-RemovePredicates.ll
new file mode 100644
index 00000000000..771a4f81363
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2010-01-19-RemovePredicates.ll
@@ -0,0 +1,53 @@
+; RUN: llc -O3 -relocation-model=pic -mcpu=cortex-a8 -mattr=+thumb2 < %s
+;
+; This test creates a predicated t2ADDri instruction that is then turned into a t2MOVgpr2gpr instr.
+; Test that that the predicate operands are removed properly.
+;
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+declare void @etoe53(i16* nocapture, i16* nocapture) nounwind
+
+define void @earith(double* nocapture %value, i32 %icode, double* nocapture %r1, double* nocapture %r2) nounwind {
+entry:
+  %v = alloca [6 x i16], align 4                  ; <[6 x i16]*> [#uses=1]
+  br i1 undef, label %bb2.i, label %bb5
+
+bb2.i:                                            ; preds = %entry
+  %0 = bitcast double* %value to i16*             ; <i16*> [#uses=1]
+  call  void @etoe53(i16* null, i16* %0) nounwind
+  ret void
+
+bb5:                                              ; preds = %entry
+  switch i32 %icode, label %bb10 [
+    i32 57, label %bb14
+    i32 58, label %bb18
+    i32 67, label %bb22
+    i32 76, label %bb26
+    i32 77, label %bb35
+  ]
+
+bb10:                                             ; preds = %bb5
+  br label %bb46
+
+bb14:                                             ; preds = %bb5
+  unreachable
+
+bb18:                                             ; preds = %bb5
+  unreachable
+
+bb22:                                             ; preds = %bb5
+  unreachable
+
+bb26:                                             ; preds = %bb5
+  br label %bb46
+
+bb35:                                             ; preds = %bb5
+  unreachable
+
+bb46:                                             ; preds = %bb26, %bb10
+  %1 = bitcast double* %value to i16*             ; <i16*> [#uses=1]
+  %v47 = getelementptr inbounds [6 x i16]* %v, i32 0, i32 0 ; <i16*> [#uses=1]
+  call  void @etoe53(i16* %v47, i16* %1) nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll b/final/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
new file mode 100644
index 00000000000..c153092288a
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+
+define i32 @test(i32 %n) nounwind {
+; CHECK: test:
+; CHECK-NOT: mov
+; CHECK: return
+entry:
+  %0 = icmp eq i32 %n, 1                          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %tmp = add i32 %n, -1                           ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb.nph, %bb
+  %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=1]
+  %u.05 = phi i64 [ undef, %bb.nph ], [ %ins, %bb ] ; <i64> [#uses=1]
+  %1 = tail call  i32 @f() nounwind    ; <i32> [#uses=1]
+  %tmp4 = zext i32 %1 to i64                      ; <i64> [#uses=1]
+  %mask = and i64 %u.05, -4294967296              ; <i64> [#uses=1]
+  %ins = or i64 %tmp4, %mask                      ; <i64> [#uses=2]
+  tail call  void @g(i64 %ins) nounwind
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %tmp      ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret i32 undef
+}
+
+define i32 @test_dead_cycle(i32 %n) nounwind {
+; CHECK: test_dead_cycle:
+; CHECK: blx
+; CHECK-NOT: mov
+; CHECK: blx
+entry:
+  %0 = icmp eq i32 %n, 1                          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %tmp = add i32 %n, -1                           ; <i32> [#uses=2]
+  br label %bb
+
+bb:                                               ; preds = %bb.nph, %bb2
+  %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb2 ] ; <i32> [#uses=2]
+  %u.17 = phi i64 [ undef, %bb.nph ], [ %u.0, %bb2 ] ; <i64> [#uses=2]
+  %tmp9 = sub i32 %tmp, %indvar                   ; <i32> [#uses=1]
+  %1 = icmp sgt i32 %tmp9, 1                      ; <i1> [#uses=1]
+  br i1 %1, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+  %2 = tail call  i32 @f() nounwind    ; <i32> [#uses=1]
+  %tmp6 = zext i32 %2 to i64                      ; <i64> [#uses=1]
+  %mask = and i64 %u.17, -4294967296              ; <i64> [#uses=1]
+  %ins = or i64 %tmp6, %mask                      ; <i64> [#uses=1]
+  tail call  void @g(i64 %ins) nounwind
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+; also check for duplicate induction variables (radar 7645034)
+; CHECK: subs r{{.*}}, #1
+; CHECK-NOT: subs r{{.*}}, #1
+; CHECK: pop
+  %u.0 = phi i64 [ %ins, %bb1 ], [ %u.17, %bb ]   ; <i64> [#uses=2]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %tmp      ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb2, %entry
+  ret i32 undef
+}
+
+declare i32 @f()
+
+declare void @g(i64)
diff --git a/final/test/CodeGen/Thumb2/2010-02-24-BigStack.ll b/final/test/CodeGen/Thumb2/2010-02-24-BigStack.ll
new file mode 100644
index 00000000000..2b53747f996
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2010-02-24-BigStack.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -O0 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 -mattr=+vfp2
+; This test creates a big stack frame without spilling any callee-saved registers.
+; Make sure the whole stack frame is addrerssable wiothout scavenger crashes.
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin3.0.0-iphoneos"
+
+define void @FindMin(double* %panelTDEL, i8* %dclOfRow, i32 %numRows, i32 %numCols, double* %retMin_RES_TDEL) {
+entry:
+  %panelTDEL.addr = alloca double*, align 4       ; <double**> [#uses=1]
+  %panelResTDEL = alloca [2560 x double], align 4 ; <[2560 x double]*> [#uses=0]
+  store double* %panelTDEL, double** %panelTDEL.addr
+  store double* %retMin_RES_TDEL, double** undef
+  store i32 0, i32* undef
+  unreachable
+}
diff --git a/final/test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll b/final/test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll
new file mode 100644
index 00000000000..7ce3c258667
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll
@@ -0,0 +1,266 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin
+
+@.str41196 = external constant [2 x i8], align 4  ; <[2 x i8]*> [#uses=1]
+
+declare void @syStopraw(i32) nounwind
+
+declare i32 @SyFopen(i8*, i8*) nounwind
+
+declare i8* @SyFgets(i8*, i32) nounwind
+
+define void @SyHelp(i8* nocapture %topic, i32 %fin) nounwind {
+entry:
+  %line = alloca [256 x i8], align 4              ; <[256 x i8]*> [#uses=1]
+  %secname = alloca [1024 x i8], align 4          ; <[1024 x i8]*> [#uses=0]
+  %last = alloca [256 x i8], align 4              ; <[256 x i8]*> [#uses=1]
+  %last2 = alloca [256 x i8], align 4             ; <[256 x i8]*> [#uses=1]
+  br i1 undef, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb, %entry
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %storemerge = phi i32 [ 0, %bb2 ], [ 1, %bb ]   ; <i32> [#uses=1]
+  br i1 undef, label %bb19, label %bb20
+
+bb19:                                             ; preds = %bb3
+  br label %bb20
+
+bb20:                                             ; preds = %bb19, %bb3
+  br i1 undef, label %bb25, label %bb26
+
+bb25:                                             ; preds = %bb20
+  br label %bb26
+
+bb26:                                             ; preds = %bb25, %bb20
+  %offset.2 = phi i32 [ -2, %bb25 ], [ 0, %bb20 ] ; <i32> [#uses=1]
+  br i1 undef, label %bb.nph508, label %bb49
+
+bb.nph508:                                        ; preds = %bb26
+  unreachable
+
+bb49:                                             ; preds = %bb26
+  br i1 undef, label %bb51, label %bb50
+
+bb50:                                             ; preds = %bb49
+  br i1 undef, label %bb51, label %bb104
+
+bb51:                                             ; preds = %bb50, %bb49
+  unreachable
+
+bb104:                                            ; preds = %bb50
+  br i1 undef, label %bb106, label %bb105
+
+bb105:                                            ; preds = %bb104
+  br i1 undef, label %bb106, label %bb161
+
+bb106:                                            ; preds = %bb105, %bb104
+  unreachable
+
+bb161:                                            ; preds = %bb105
+  br i1 false, label %bb163, label %bb162
+
+bb162:                                            ; preds = %bb161
+  br i1 undef, label %bb163, label %bb224
+
+bb163:                                            ; preds = %bb162, %bb161
+  unreachable
+
+bb224:                                            ; preds = %bb162
+  %0 = call  i32 @SyFopen(i8* undef, i8* getelementptr inbounds ([2 x i8]* @.str41196, i32 0, i32 0)) nounwind ; <i32> [#uses=2]
+  br i1 false, label %bb297, label %bb300
+
+bb297:                                            ; preds = %bb224
+  unreachable
+
+bb300:                                            ; preds = %bb224
+  %1 = icmp eq i32 %offset.2, -1                  ; <i1> [#uses=1]
+  br label %bb440
+
+bb307:                                            ; preds = %isdigit1498.exit67
+  br label %bb308
+
+bb308:                                            ; preds = %bb440, %bb307
+  br i1 undef, label %bb309, label %isdigit1498.exit67
+
+isdigit1498.exit67:                               ; preds = %bb308
+  br i1 undef, label %bb309, label %bb307
+
+bb309:                                            ; preds = %isdigit1498.exit67, %bb308
+  br i1 undef, label %bb310, label %bb313
+
+bb310:                                            ; preds = %bb309
+  br label %bb313
+
+bb313:                                            ; preds = %bb310, %bb309
+  br i1 false, label %bb318, label %bb317
+
+bb317:                                            ; preds = %bb313
+  %2 = icmp sgt i8 undef, -1                      ; <i1> [#uses=1]
+  br i1 %2, label %bb.i.i73, label %bb1.i.i74
+
+bb.i.i73:                                         ; preds = %bb317
+  br i1 false, label %bb318, label %bb329.outer
+
+bb1.i.i74:                                        ; preds = %bb317
+  unreachable
+
+bb318:                                            ; preds = %bb.i.i73, %bb313
+  ret void
+
+bb329.outer:                                      ; preds = %bb.i.i73
+  br i1 undef, label %bb333, label %bb329.us.us
+
+bb329.us.us:                                      ; preds = %bb329.us.us, %bb329.outer
+  br i1 undef, label %bb333, label %bb329.us.us
+
+bb333:                                            ; preds = %bb329.us.us, %bb329.outer
+  %match.0.lcssa = phi i32 [ undef, %bb329.us.us ], [ 2, %bb329.outer ] ; <i32> [#uses=2]
+  br i1 undef, label %bb335, label %bb388
+
+bb335:                                            ; preds = %bb333
+  %3 = and i1 undef, %1                           ; <i1> [#uses=1]
+  br i1 %3, label %bb339, label %bb348
+
+bb339:                                            ; preds = %bb335
+  br i1 false, label %bb340, label %bb345
+
+bb340:                                            ; preds = %bb339
+  br i1 undef, label %return, label %bb341
+
+bb341:                                            ; preds = %bb340
+  ret void
+
+bb345:                                            ; preds = %bb345, %bb339
+  %4 = phi i8 [ %5, %bb345 ], [ undef, %bb339 ]   ; <i8> [#uses=0]
+  %indvar670 = phi i32 [ %tmp673, %bb345 ], [ 0, %bb339 ] ; <i32> [#uses=1]
+  %tmp673 = add i32 %indvar670, 1                 ; <i32> [#uses=2]
+  %scevgep674 = getelementptr [256 x i8]* %last, i32 0, i32 %tmp673 ; <i8*> [#uses=1]
+  %5 = load i8* %scevgep674, align 1              ; <i8> [#uses=1]
+  br i1 undef, label %bb347, label %bb345
+
+bb347:                                            ; preds = %bb345
+  br label %bb348
+
+bb348:                                            ; preds = %bb347, %bb335
+  br i1 false, label %bb352, label %bb356
+
+bb352:                                            ; preds = %bb348
+  unreachable
+
+bb356:                                            ; preds = %bb348
+  br i1 undef, label %bb360, label %bb369
+
+bb360:                                            ; preds = %bb356
+  br i1 false, label %bb361, label %bb366
+
+bb361:                                            ; preds = %bb360
+  br i1 undef, label %return, label %bb362
+
+bb362:                                            ; preds = %bb361
+  ret void
+
+bb366:                                            ; preds = %bb366, %bb360
+  %indvar662 = phi i32 [ %tmp665, %bb366 ], [ 0, %bb360 ] ; <i32> [#uses=1]
+  %tmp665 = add i32 %indvar662, 1                 ; <i32> [#uses=2]
+  %scevgep666 = getelementptr [256 x i8]* %last2, i32 0, i32 %tmp665 ; <i8*> [#uses=1]
+  %6 = load i8* %scevgep666, align 1              ; <i8> [#uses=0]
+  br i1 false, label %bb368, label %bb366
+
+bb368:                                            ; preds = %bb366
+  br label %bb369
+
+bb369:                                            ; preds = %bb368, %bb356
+  br i1 undef, label %bb373, label %bb388
+
+bb373:                                            ; preds = %bb383, %bb369
+  %7 = call  i8* @SyFgets(i8* undef, i32 %0) nounwind ; <i8*> [#uses=1]
+  %8 = icmp eq i8* %7, null                       ; <i1> [#uses=1]
+  br i1 %8, label %bb375, label %bb383
+
+bb375:                                            ; preds = %bb373
+  %9 = icmp eq i32 %storemerge, 0                 ; <i1> [#uses=1]
+  br i1 %9, label %return, label %bb376
+
+bb376:                                            ; preds = %bb375
+  ret void
+
+bb383:                                            ; preds = %bb373
+  %10 = load i8* undef, align 1                   ; <i8> [#uses=1]
+  %cond1 = icmp eq i8 %10, 46                     ; <i1> [#uses=1]
+  br i1 %cond1, label %bb373, label %bb388
+
+bb388:                                            ; preds = %bb383, %bb369, %bb333
+  %match.1140 = phi i32 [ %match.0.lcssa, %bb369 ], [ 0, %bb333 ], [ %match.0.lcssa, %bb383 ] ; <i32> [#uses=1]
+  br label %bb391
+
+bb390:                                            ; preds = %isdigit1498.exit83, %bb392
+  %indvar.next725 = add i32 %indvar724, 1         ; <i32> [#uses=1]
+  br label %bb391
+
+bb391:                                            ; preds = %bb390, %bb388
+  %indvar724 = phi i32 [ %indvar.next725, %bb390 ], [ 0, %bb388 ] ; <i32> [#uses=2]
+  %11 = load i8* undef, align 1                   ; <i8> [#uses=0]
+  br i1 false, label %bb395, label %bb392
+
+bb392:                                            ; preds = %bb391
+  br i1 undef, label %bb390, label %isdigit1498.exit83
+
+isdigit1498.exit83:                               ; preds = %bb392
+  br i1 undef, label %bb390, label %bb395
+
+bb394:                                            ; preds = %isdigit1498.exit87
+  br label %bb395
+
+bb395:                                            ; preds = %bb394, %isdigit1498.exit83, %bb391
+  %storemerge14.sum = add i32 %indvar724, undef   ; <i32> [#uses=1]
+  %p.26 = getelementptr [256 x i8]* %line, i32 0, i32 %storemerge14.sum ; <i8*> [#uses=1]
+  br i1 undef, label %bb400, label %isdigit1498.exit87
+
+isdigit1498.exit87:                               ; preds = %bb395
+  br i1 false, label %bb400, label %bb394
+
+bb400:                                            ; preds = %isdigit1498.exit87, %bb395
+  br i1 undef, label %bb402, label %bb403
+
+bb402:                                            ; preds = %bb400
+  %12 = getelementptr inbounds i8* %p.26, i32 undef ; <i8*> [#uses=1]
+  br label %bb403
+
+bb403:                                            ; preds = %bb402, %bb400
+  %p.29 = phi i8* [ %12, %bb402 ], [ undef, %bb400 ] ; <i8*> [#uses=0]
+  br i1 undef, label %bb405, label %bb404
+
+bb404:                                            ; preds = %bb403
+  br i1 undef, label %bb405, label %bb407
+
+bb405:                                            ; preds = %bb404, %bb403
+  br i1 undef, label %return, label %bb406
+
+bb406:                                            ; preds = %bb405
+  call  void @syStopraw(i32 %fin) nounwind
+  ret void
+
+bb407:                                            ; preds = %bb404
+  %cond = icmp eq i32 %match.1140, 2              ; <i1> [#uses=1]
+  br i1 %cond, label %bb408, label %bb428
+
+bb408:                                            ; preds = %bb407
+  unreachable
+
+bb428:                                            ; preds = %bb407
+  br label %bb440
+
+bb440:                                            ; preds = %bb428, %bb300
+  %13 = call  i8* @SyFgets(i8* undef, i32 %0) nounwind ; <i8*> [#uses=0]
+  br i1 false, label %bb442, label %bb308
+
+bb442:                                            ; preds = %bb440
+  unreachable
+
+return:                                           ; preds = %bb405, %bb375, %bb361, %bb340
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll b/final/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
new file mode 100644
index 00000000000..458569ec93b
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 \
+; RUN:   -pre-RA-sched=source | FileCheck -check-prefix=SOURCE %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 \
+; RUN:   -pre-RA-sched=list-hybrid | FileCheck -check-prefix=HYBRID %s
+; Radar 7459078
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+
+%0 = type { i32, i32 }
+%s1 = type { %s3, i32, %s4, i8*, void (i8*, i8*)*, i8*, i32*, i32*, i32*, i32, i64, [1 x i32] }
+%s2 = type { i32 (...)**, %s4 }
+%s3 = type { %s2, i32, i32, i32*, [4 x i8], float, %s4, i8*, i8* }
+%s4 = type { %s5 }
+%s5 = type { i32 }
+
+; Make sure the cmp is not scheduled before the InlineAsm that clobbers cc.
+; SOURCE: InlineAsm End
+; SOURCE: cmp
+; SOURCE: beq
+; HYBRID: InlineAsm End
+; HYBRID: cbz
+define void @test(%s1* %this, i32 %format, i32 %w, i32 %h, i32 %levels, i32* %s, i8* %data, i32* nocapture %rowbytes, void (i8*, i8*)* %release, i8* %info) nounwind {
+entry:
+  %tmp1 = getelementptr inbounds %s1* %this, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0
+  volatile store i32 1, i32* %tmp1, align 4
+  %tmp12 = getelementptr inbounds %s1* %this, i32 0, i32 1
+  store i32 %levels, i32* %tmp12, align 4
+  %tmp13 = getelementptr inbounds %s1* %this, i32 0, i32 3
+  store i8* %data, i8** %tmp13, align 4
+  %tmp14 = getelementptr inbounds %s1* %this, i32 0, i32 4
+  store void (i8*, i8*)* %release, void (i8*, i8*)** %tmp14, align 4
+  %tmp15 = getelementptr inbounds %s1* %this, i32 0, i32 5
+  store i8* %info, i8** %tmp15, align 4
+  %tmp16 = getelementptr inbounds %s1* %this, i32 0, i32 6
+  store i32* null, i32** %tmp16, align 4
+  %tmp17 = getelementptr inbounds %s1* %this, i32 0, i32 7
+  store i32* null, i32** %tmp17, align 4
+  %tmp19 = getelementptr inbounds %s1* %this, i32 0, i32 10
+  store i64 0, i64* %tmp19, align 4
+  %tmp20 = getelementptr inbounds %s1* %this, i32 0, i32 0
+  tail call  void @f1(%s3* %tmp20, i32* %s) nounwind
+  %tmp21 = shl i32 %format, 6
+  %tmp22 = tail call  zeroext i8 @f2(i32 %format) nounwind
+  %toBoolnot = icmp eq i8 %tmp22, 0
+  %tmp23 = zext i1 %toBoolnot to i32
+  %flags.0 = or i32 %tmp23, %tmp21
+  %tmp24 = shl i32 %flags.0, 16
+  %asmtmp.i.i.i = tail call %0 asm sideeffect "\0A0:\09ldrex $1, [$2]\0A\09orr $1, $1, $3\0A\09strex $0, $1, [$2]\0A\09cmp $0, #0\0A\09bne 0b", "=&r,=&r,r,r,~{memory},~{cc}"(i32* %tmp1, i32 %tmp24) nounwind
+  %tmp25 = getelementptr inbounds %s1* %this, i32 0, i32 2, i32 0, i32 0
+  volatile store i32 1, i32* %tmp25, align 4
+  %tmp26 = icmp eq i32 %levels, 0
+  br i1 %tmp26, label %return, label %bb4
+
+bb4:
+  %l.09 = phi i32 [ %tmp28, %bb4 ], [ 0, %entry ]
+  %scevgep = getelementptr %s1* %this, i32 0, i32 11, i32 %l.09
+  %scevgep10 = getelementptr i32* %rowbytes, i32 %l.09
+  %tmp27 = load i32* %scevgep10, align 4
+  store i32 %tmp27, i32* %scevgep, align 4
+  %tmp28 = add i32 %l.09, 1
+  %exitcond = icmp eq i32 %tmp28, %levels
+  br i1 %exitcond, label %return, label %bb4
+
+return:
+  ret void
+}
+
+declare void @f1(%s3*, i32*)
+declare zeroext i8 @f2(i32)
diff --git a/final/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll b/final/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
new file mode 100644
index 00000000000..2246de35e03
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -O3 | FileCheck %s
+; rdar://7493908
+
+; Make sure the result of the first dynamic_alloc isn't copied back to sp more
+; than once. We'll deal with poor codegen later.
+
+define void @t() nounwind ssp {
+entry:
+; CHECK: t:
+  %size = mul i32 8, 2
+; CHECK:  subs  r0, #16
+; CHECK:  mov sp, r0
+  %vla_a = alloca i8, i32 %size, align 8
+; CHECK:  subs  r0, #16
+; CHECK:  mov sp, r0
+  %vla_b = alloca i8, i32 %size, align 8
+  unreachable
+}
diff --git a/final/test/CodeGen/Thumb2/2010-04-26-CopyRegCrash.ll b/final/test/CodeGen/Thumb2/2010-04-26-CopyRegCrash.ll
new file mode 100644
index 00000000000..3be016fbd1d
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2010-04-26-CopyRegCrash.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin
+; Radar 7896289
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+define void @test(i32 %mode) nounwind optsize noinline {
+entry:
+  br i1 undef, label %return, label %bb3
+
+bb3:                                              ; preds = %entry
+  br i1 undef, label %bb15, label %bb18
+
+bb15:                                             ; preds = %bb3
+  unreachable
+
+bb18:                                             ; preds = %bb3
+  switch i32 %mode, label %return [
+    i32 0, label %bb26
+    i32 1, label %bb56
+    i32 2, label %bb107
+    i32 6, label %bb150.preheader
+    i32 9, label %bb310.preheader
+    i32 13, label %bb414.preheader
+    i32 15, label %bb468.preheader
+    i32 16, label %bb522.preheader
+  ]
+
+bb150.preheader:                                  ; preds = %bb18
+  br i1 undef, label %bb154, label %bb160
+
+bb310.preheader:                                  ; preds = %bb18
+  unreachable
+
+bb414.preheader:                                  ; preds = %bb18
+  unreachable
+
+bb468.preheader:                                  ; preds = %bb18
+  unreachable
+
+bb522.preheader:                                  ; preds = %bb18
+  unreachable
+
+bb26:                                             ; preds = %bb18
+  unreachable
+
+bb56:                                             ; preds = %bb18
+  unreachable
+
+bb107:                                            ; preds = %bb18
+  br label %bb110
+
+bb110:                                            ; preds = %bb122, %bb107
+  %asmtmp.i.i179 = tail call i16 asm "rev16 $0, $1\0A", "=l,l"(i16 undef) nounwind ; <i16> [#uses=1]
+  %asmtmp.i.i178 = tail call i16 asm "rev16 $0, $1\0A", "=l,l"(i16 %asmtmp.i.i179) nounwind ; <i16> [#uses=1]
+  store i16 %asmtmp.i.i178, i16* undef, align 2
+  br i1 undef, label %bb122, label %bb121
+
+bb121:                                            ; preds = %bb110
+  br label %bb122
+
+bb122:                                            ; preds = %bb121, %bb110
+  br label %bb110
+
+bb154:                                            ; preds = %bb150.preheader
+  unreachable
+
+bb160:                                            ; preds = %bb150.preheader
+  unreachable
+
+return:                                           ; preds = %bb18, %entry
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb2/2010-05-24-rsbs.ll b/final/test/CodeGen/Thumb2/2010-05-24-rsbs.ll
new file mode 100644
index 00000000000..e72d542b31f
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2010-05-24-rsbs.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; Radar 8017376: Missing 's' suffix for t2RSBS instructions.
+; CHECK: rsbs
+
+define i64 @test(i64 %x) nounwind readnone {
+entry:
+  %0 = sub nsw i64 1, %x                          ; <i64> [#uses=1]
+  ret i64 %0
+}
diff --git a/final/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/final/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
new file mode 100644
index 00000000000..9ed6a01255f
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -O3 -relocation-model=pic -mattr=+thumb2 -mcpu=cortex-a8 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; This is a case where the coalescer was too eager. These two copies were
+; considered equivalent and coalescable:
+;
+; 140 %reg1038:dsub_0<def> = VMOVD %reg1047:dsub_0, pred:14, pred:%reg0
+; 148 %reg1038:dsub_1<def> = VMOVD %reg1047:dsub_0, pred:14, pred:%reg0
+;
+; Only one can be coalesced.
+
+@.str = private constant [7 x i8] c"%g %g\0A\00", align 4 ; <[7 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** nocapture %Argv) nounwind {
+entry:
+  %0 = icmp eq i32 %argc, 2123                    ; <i1> [#uses=1]
+  %U.0 = select i1 %0, double 3.282190e+01, double 8.731834e+02 ; <double> [#uses=2]
+  %1 = icmp eq i32 %argc, 5123                    ; <i1> [#uses=1]
+  %V.0.ph = select i1 %1, double 7.779980e+01, double 0x409CCB9C779A6B51 ; <double> [#uses=1]
+  %2 = insertelement <2 x double> undef, double %U.0, i32 0 ; <<2 x double>> [#uses=2]
+  %3 = insertelement <2 x double> %2, double %U.0, i32 1 ; <<2 x double>> [#uses=2]
+  %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]
+; Constant pool load followed by add.
+; Then clobber the loaded register, not the sum.
+; CHECK: vldr.64 [[LDR:d.*]],
+; CHECK: LPC0_0:
+; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]]
+; CHECK: vmov.f64 [[LDR]]
+  %5 = fadd <2 x double> %3, %3                   ; <<2 x double>> [#uses=2]
+  %6 = fadd <2 x double> %4, %4                   ; <<2 x double>> [#uses=2]
+  %tmp7 = extractelement <2 x double> %5, i32 0   ; <double> [#uses=1]
+  %tmp5 = extractelement <2 x double> %5, i32 1   ; <double> [#uses=1]
+; CHECK: printf
+  %7 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), double %tmp7, double %tmp5) nounwind ; <i32> [#uses=0]
+  %tmp3 = extractelement <2 x double> %6, i32 0   ; <double> [#uses=1]
+  %tmp1 = extractelement <2 x double> %6, i32 1   ; <double> [#uses=1]
+  %8 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), double %tmp3, double %tmp1) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/final/test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll b/final/test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll
new file mode 100644
index 00000000000..501f763bda2
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8
+; rdar://8110842
+
+declare arm_apcscc i32 @__maskrune(i32, i32)
+
+define arm_apcscc i32 @strncmpic(i8* nocapture %s1, i8* nocapture %s2, i32 %n) nounwind {
+entry:
+  br i1 undef, label %bb11, label %bb19
+
+bb11:                                             ; preds = %entry
+  %0 = sext i8 0 to i32                           ; <i32> [#uses=1]
+  br i1 undef, label %bb.i.i10, label %bb1.i.i11
+
+bb.i.i10:                                         ; preds = %bb11
+  br label %isupper144.exit12
+
+bb1.i.i11:                                        ; preds = %bb11
+  %1 = tail call arm_apcscc  i32 @__maskrune(i32 %0, i32 32768) nounwind ; <i32> [#uses=1]
+  %2 = icmp ne i32 %1, 0                          ; <i1> [#uses=1]
+  %3 = zext i1 %2 to i32                          ; <i32> [#uses=1]
+  %.pre = load i8* undef, align 1                 ; <i8> [#uses=1]
+  br label %isupper144.exit12
+
+isupper144.exit12:                                ; preds = %bb1.i.i11, %bb.i.i10
+  %4 = phi i8 [ %.pre, %bb1.i.i11 ], [ 0, %bb.i.i10 ] ; <i8> [#uses=1]
+  %5 = phi i32 [ %3, %bb1.i.i11 ], [ undef, %bb.i.i10 ] ; <i32> [#uses=1]
+  %6 = icmp eq i32 %5, 0                          ; <i1> [#uses=1]
+  %7 = sext i8 %4 to i32                          ; <i32> [#uses=1]
+  %storemerge1 = select i1 %6, i32 %7, i32 undef  ; <i32> [#uses=1]
+  %8 = sub nsw i32 %storemerge1, 0                ; <i32> [#uses=1]
+  ret i32 %8
+
+bb19:                                             ; preds = %entry
+  ret i32 0
+}
diff --git a/final/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll b/final/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
new file mode 100644
index 00000000000..f91e1c9febe
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll
@@ -0,0 +1,127 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic | FileCheck %s
+; rdar://8115404
+; Tail merging must not split an IT block.
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct._RuneCharClass = type { [14 x i8], i32 }
+%struct._RuneEntry = type { i32, i32, i32, i32* }
+%struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i32, i8**)*, i32 (i32, i8*, i32, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32, i32, %struct._RuneCharClass* }
+%struct._RuneRange = type { i32, %struct._RuneEntry* }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+
+@finput = external global %struct.FILE*           ; <%struct.FILE**> [#uses=1]
+@_DefaultRuneLocale = external global %struct._RuneLocale ; <%struct._RuneLocale*> [#uses=0]
+@token_buffer = external global [1025 x i8], align 4 ; <[1025 x i8]*> [#uses=1]
+@.str73 = external constant [6 x i8], align 4     ; <[6 x i8]*> [#uses=0]
+@.str174 = external constant [5 x i8], align 4    ; <[5 x i8]*> [#uses=0]
+@.str275 = external constant [6 x i8], align 4    ; <[6 x i8]*> [#uses=0]
+@.str376 = external constant [5 x i8], align 4    ; <[5 x i8]*> [#uses=0]
+@.str477 = external constant [6 x i8], align 4    ; <[6 x i8]*> [#uses=0]
+@.str578 = external constant [6 x i8], align 4    ; <[6 x i8]*> [#uses=0]
+@.str679 = external constant [7 x i8], align 4    ; <[7 x i8]*> [#uses=0]
+@.str780 = external constant [6 x i8], align 4    ; <[6 x i8]*> [#uses=0]
+@.str881 = external constant [5 x i8], align 4    ; <[5 x i8]*> [#uses=0]
+@.str982 = external constant [6 x i8], align 4    ; <[6 x i8]*> [#uses=0]
+@.str1083 = external constant [9 x i8], align 4   ; <[9 x i8]*> [#uses=0]
+@.str1184 = external constant [7 x i8], align 4   ; <[7 x i8]*> [#uses=0]
+@.str1285 = external constant [16 x i8], align 4  ; <[16 x i8]*> [#uses=0]
+@.str1386 = external constant [12 x i8], align 4  ; <[12 x i8]*> [#uses=0]
+@.str1487 = external constant [5 x i8], align 4   ; <[5 x i8]*> [#uses=0]
+@llvm.used = external global [1 x i8*]            ; <[1 x i8*]*> [#uses=0]
+
+define fastcc i32 @parse_percent_token() nounwind {
+entry:
+; CHECK: ittt eq
+; CHECK: ittt eq
+; CHECK: ittt eq
+; CHECK: ittt eq
+; CHECK: ittt eq
+; CHECK: moveq r0
+; CHECK-NOT: LBB0_
+; CHECK: ldreq
+; CHECK: popeq
+  switch i32 undef, label %bb7 [
+    i32 37, label %bb43
+    i32 48, label %bb5
+    i32 50, label %bb4
+    i32 60, label %bb2
+    i32 61, label %bb6
+    i32 62, label %bb3
+    i32 123, label %bb1
+  ]
+
+bb1:                                              ; preds = %entry
+  ret i32 8
+
+bb2:                                              ; preds = %entry
+  ret i32 15
+
+bb3:                                              ; preds = %entry
+  ret i32 16
+
+bb4:                                              ; preds = %entry
+  ret i32 17
+
+bb5:                                              ; preds = %entry
+  ret i32 9
+
+bb6:                                              ; preds = %entry
+  ret i32 18
+
+bb7:                                              ; preds = %entry
+  br i1 undef, label %bb.i.i, label %bb1.i.i
+
+bb.i.i:                                           ; preds = %bb7
+  br i1 undef, label %bb43, label %bb12
+
+bb1.i.i:                                          ; preds = %bb7
+  unreachable
+
+bb9:                                              ; preds = %bb.i.i2
+  br i1 undef, label %bb10, label %bb11
+
+bb10:                                             ; preds = %bb9
+  br label %bb11
+
+bb11:                                             ; preds = %bb10, %bb9
+  %p.0 = phi i8* [ undef, %bb10 ], [ %p.1, %bb9 ] ; <i8*> [#uses=1]
+  %0 = load %struct.FILE** @finput, align 4       ; <%struct.FILE*> [#uses=1]
+  %1 = tail call i32 @getc(%struct.FILE* %0) nounwind ; <i32> [#uses=0]
+  br label %bb12
+
+bb12:                                             ; preds = %bb11, %bb.i.i
+  %p.1 = phi i8* [ %p.0, %bb11 ], [ getelementptr inbounds ([1025 x i8]* @token_buffer, i32 0, i32 0), %bb.i.i ] ; <i8*> [#uses=2]
+  %2 = icmp ult i32 undef, 128                    ; <i1> [#uses=1]
+  br i1 %2, label %bb.i.i2, label %bb1.i.i3
+
+bb.i.i2:                                          ; preds = %bb12
+  %3 = load i32* null, align 4                    ; <i32> [#uses=1]
+  %4 = lshr i32 %3, 8                             ; <i32> [#uses=1]
+  %.lobit.i1 = and i32 %4, 1                      ; <i32> [#uses=1]
+  %.not = icmp ne i32 %.lobit.i1, 0               ; <i1> [#uses=1]
+  %or.cond = or i1 %.not, undef                   ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb9, label %bb14
+
+bb1.i.i3:                                         ; preds = %bb12
+  unreachable
+
+bb14:                                             ; preds = %bb.i.i2
+  store i8 0, i8* %p.1, align 1
+  br i1 undef, label %bb43, label %bb15
+
+bb15:                                             ; preds = %bb14
+  unreachable
+
+bb43:                                             ; preds = %bb14, %bb.i.i, %entry
+  %.0 = phi i32 [ 7, %entry ], [ 24, %bb.i.i ], [ 9, %bb14 ] ; <i32> [#uses=1]
+  ret i32 %.0
+}
+
+declare i32 @getc(%struct.FILE* nocapture) nounwind
+
+declare i32 @strcmp(i8* nocapture, i8* nocapture) nounwind readonly
+
+declare i32 @__maskrune(i32, i32)
+
+declare i32 @ungetc(i32, %struct.FILE* nocapture) nounwind
diff --git a/final/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll b/final/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
new file mode 100644
index 00000000000..41f7f299555
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2010-08-10-VarSizedAllocaBug.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -O3 | FileCheck %s
+
+@.str = private constant [4 x i8] c"%d\0A\00", align 4 ; <[4 x i8]*> [#uses=1]
+
+define internal fastcc i32 @Callee(i32 %i) nounwind {
+entry:
+; CHECK: Callee:
+; CHECK: push
+; CHECK: mov r4, sp
+; CHECK: sub.w r12, r4, #1000
+; CHECK: mov sp, r12
+  %0 = icmp eq i32 %i, 0                          ; <i1> [#uses=1]
+  br i1 %0, label %bb2, label %bb
+
+bb:                                               ; preds = %entry
+  %1 = alloca [1000 x i8], align 4                ; <[1000 x i8]*> [#uses=1]
+  %.sub = getelementptr inbounds [1000 x i8]* %1, i32 0, i32 0 ; <i8*> [#uses=2]
+  %2 = call i32 (i8*, i32, i32, i8*, ...)* @__sprintf_chk(i8* %.sub, i32 0, i32 1000, i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %i) nounwind ; <i32> [#uses=0]
+  %3 = load i8* %.sub, align 4                    ; <i8> [#uses=1]
+  %4 = sext i8 %3 to i32                          ; <i32> [#uses=1]
+  ret i32 %4
+
+bb2:                                              ; preds = %entry
+; Must restore sp from fp here. Make sure not to leave sp in a temporarily invalid
+; state though. rdar://8465407
+; CHECK-NOT: mov sp, r7
+; CHECK: sub.w r4, r7, #8
+; CHECK: mov sp, r4
+; CHECK: pop
+  ret i32 0
+}
+
+declare i32 @__sprintf_chk(i8*, i32, i32, i8*, ...) nounwind
+
+define i32 @main() nounwind {
+; CHECK: main:
+bb.nph:
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %0 = phi i32 [ 0, %bb.nph ], [ %3, %bb ]        ; <i32> [#uses=2]
+  %j.01 = phi i32 [ 0, %bb.nph ], [ %2, %bb ]     ; <i32> [#uses=1]
+  %1 = tail call fastcc i32 @Callee(i32 %0) nounwind ; <i32> [#uses=1]
+  %2 = add nsw i32 %1, %j.01                      ; <i32> [#uses=2]
+  %3 = add nsw i32 %0, 1                          ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %3, 10000               ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb2, label %bb
+
+bb2:                                              ; preds = %bb
+; No need to restore sp from fp here.
+; CHECK: printf
+; CHECK-NOT: mov sp, r7
+; CHECK-NOT: sub sp, #12
+; CHECK: pop
+  %4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), i32 %2) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/final/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll b/final/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
new file mode 100644
index 00000000000..313728c1b56
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll
@@ -0,0 +1,34 @@
+; rdar://8465407
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+%struct.buf = type opaque
+
+declare void @bar() nounwind optsize
+
+define void @foo() nounwind optsize {
+; CHECK: foo:
+; CHECK: push
+; CHECK: add r7, sp, #4
+; CHECK: sub sp, #4
+entry:
+  %m.i = alloca %struct.buf*, align 4
+  br label %bb
+
+bb:
+  br i1 undef, label %bb3, label %bb2
+
+bb2:
+  call void @bar() nounwind optsize
+  br i1 undef, label %bb, label %bb3
+
+bb3:
+  br i1 undef, label %return, label %bb
+
+return:
+; CHECK: %return
+; 'mov sp, r7' would have left sp in an invalid state
+; CHECK-NOT: mov sp, r7
+; CHECK-NOT: sub, sp, #4
+; CHECK: add sp, #4
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb2/2010-12-03-AddSPNarrowing.ll b/final/test/CodeGen/Thumb2/2010-12-03-AddSPNarrowing.ll
new file mode 100644
index 00000000000..5b91a5f65ae
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/2010-12-03-AddSPNarrowing.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; Radar 8724703: Make sure that a t2ADDrSPi instruction with SP as the
+; destination register is narrowed to tADDspi instead of tADDrSPi.
+
+define void @test() nounwind {
+entry:
+; CHECK: sub.w
+; CHECK: add.w
+  %Buffer.i = alloca [512 x i8], align 4
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb2/bfi.ll b/final/test/CodeGen/Thumb2/bfi.ll
new file mode 100644
index 00000000000..6fb2fc888d9
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/bfi.ll
@@ -0,0 +1,51 @@
+; RUN: llc -march=thumb -mattr=+v6t2 < %s | FileCheck %s
+
+%struct.F = type { [3 x i8], i8 }
+
+@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1]
+
+define void @f1([1 x i32] %f.coerce0) nounwind {
+entry:
+; CHECK: f1
+; CHECK: movs r2, #10
+; CHECK: bfi r1, r2, #22, #4
+  %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
+  %1 = and i32 %0, -62914561                      ; <i32> [#uses=1]
+  %2 = or i32 %1, 41943040                        ; <i32> [#uses=1]
+  store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4
+  ret void
+}
+
+define i32 @f2(i32 %A, i32 %B) nounwind readnone optsize {
+entry:
+; CHECK: f2
+; CHECK: lsrs  r1, r1, #7
+; CHECK: bfi r0, r1, #7, #16
+  %and = and i32 %A, -8388481                     ; <i32> [#uses=1]
+  %and2 = and i32 %B, 8388480                     ; <i32> [#uses=1]
+  %or = or i32 %and2, %and                        ; <i32> [#uses=1]
+  ret i32 %or
+}
+
+define i32 @f3(i32 %A, i32 %B) nounwind readnone optsize {
+entry:
+; CHECK: f3
+; CHECK: lsrs  r2, r0, #7
+; CHECK: mov r0, r1
+; CHECK: bfi r0, r2, #7, #16
+  %and = and i32 %A, 8388480                      ; <i32> [#uses=1]
+  %and2 = and i32 %B, -8388481                    ; <i32> [#uses=1]
+  %or = or i32 %and2, %and                        ; <i32> [#uses=1]
+  ret i32 %or
+}
+
+; rdar://8752056
+define i32 @f4(i32 %a) nounwind {
+; CHECK: f4
+; CHECK: movw r1, #3137
+; CHECK: bfi r1, r0, #15, #5
+  %1 = shl i32 %a, 15
+  %ins7 = and i32 %1, 1015808
+  %ins12 = or i32 %ins7, 3137
+  ret i32 %ins12
+}
diff --git a/final/test/CodeGen/Thumb2/bfx.ll b/final/test/CodeGen/Thumb2/bfx.ll
new file mode 100644
index 00000000000..489349d6155
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/bfx.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @sbfx1(i32 %a) {
+; CHECK: sbfx1
+; CHECK: sbfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = trunc i32 %t1 to i11
+	%t3 = sext i11 %t2 to i32
+	ret i32 %t3
+}
+
+define i32 @ubfx1(i32 %a) {
+; CHECK: ubfx1
+; CHECK: ubfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = trunc i32 %t1 to i11
+	%t3 = zext i11 %t2 to i32
+	ret i32 %t3
+}
+
+define i32 @ubfx2(i32 %a) {
+; CHECK: ubfx2
+; CHECK: ubfx r0, r0, #7, #11
+	%t1 = lshr i32 %a, 7
+	%t2 = and i32 %t1, 2047
+	ret i32 %t2
+}
+
diff --git a/final/test/CodeGen/Thumb2/buildvector-crash.ll b/final/test/CodeGen/Thumb2/buildvector-crash.ll
new file mode 100644
index 00000000000..01ef472d310
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/buildvector-crash.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O3 -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
+; Formerly crashed, 3573915.
+
+define void @RotateStarsFP_Vec() nounwind {
+bb.nph372:
+  br label %bb8
+
+bb8:                                              ; preds = %bb8, %bb.nph372
+  %0 = fadd <4 x float> undef, <float 0xBFEE353F80000000, float 0xBFEE353F80000000, float 0xBFEE353F80000000, float 0xBFEE353F80000000>
+  %1 = fmul <4 x float> %0, undef
+  %2 = fmul <4 x float> %1, undef
+  %3 = fadd <4 x float> undef, %2
+  store <4 x float> %3, <4 x float>* undef, align 4
+  br label %bb8
+; CHECK: RotateStarsFP_Vec:
+; CHECK: vldmia
+}
diff --git a/final/test/CodeGen/Thumb2/carry.ll b/final/test/CodeGen/Thumb2/carry.ll
new file mode 100644
index 00000000000..de6f6e260de
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/carry.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i64 @f1(i64 %a, i64 %b) {
+entry:
+; CHECK: f1:
+; CHECK: subs r0, r0, r2
+; CHECK: sbcs r1, r3
+	%tmp = sub i64 %a, %b
+	ret i64 %tmp
+}
+
+define i64 @f2(i64 %a, i64 %b) {
+entry:
+; CHECK: f2:
+; CHECK: adds r0, r0, r0
+; CHECK: adcs r1, r1
+; CHECK: subs r0, r0, r2
+; CHECK: sbcs r1, r3
+        %tmp1 = shl i64 %a, 1
+	%tmp2 = sub i64 %tmp1, %b
+	ret i64 %tmp2
+}
diff --git a/final/test/CodeGen/Thumb2/cortex-fp.ll b/final/test/CodeGen/Thumb2/cortex-fp.ll
new file mode 100644
index 00000000000..d06f8a7beeb
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/cortex-fp.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=CORTEXM3
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-m4 | FileCheck %s -check-prefix=CORTEXM4
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -march=thumb -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8
+
+
+define float @foo(float %a, float %b) {
+entry:
+; CHECK: foo
+; CORTEXM3: blx ___mulsf3
+; CORTEXM4: vmul.f32  s0, s1, s0
+; CORTEXA8: vmul.f32  d0, d1, d0
+  %0 = fmul float %a, %b
+  ret float %0
+}
+
+define double @bar(double %a, double %b) {
+entry:
+; CHECK: bar
+  %0 = fmul double %a, %b
+; CORTEXM3: blx ___muldf3
+; CORTEXM4: blx ___muldf3
+; CORTEXA8: vmul.f64  d16, d17, d16
+  ret double %0
+}
diff --git a/final/test/CodeGen/Thumb2/crash.ll b/final/test/CodeGen/Thumb2/crash.ll
new file mode 100644
index 00000000000..d8b51ec82de
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/crash.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; This function would crash LiveIntervalAnalysis by creating a chain of 4 INSERT_SUBREGs of the same register.
+define arm_apcscc void @NEON_vst4q_u32(i32* nocapture %sp0, i32* nocapture %sp1, i32* nocapture %sp2, i32* nocapture %sp3, i32* %dp) nounwind {
+entry:
+  %0 = bitcast i32* %sp0 to <4 x i32>*            ; <<4 x i32>*> [#uses=1]
+  %1 = load <4 x i32>* %0, align 16               ; <<4 x i32>> [#uses=1]
+  %2 = bitcast i32* %sp1 to <4 x i32>*            ; <<4 x i32>*> [#uses=1]
+  %3 = load <4 x i32>* %2, align 16               ; <<4 x i32>> [#uses=1]
+  %4 = bitcast i32* %sp2 to <4 x i32>*            ; <<4 x i32>*> [#uses=1]
+  %5 = load <4 x i32>* %4, align 16               ; <<4 x i32>> [#uses=1]
+  %6 = bitcast i32* %sp3 to <4 x i32>*            ; <<4 x i32>*> [#uses=1]
+  %7 = load <4 x i32>* %6, align 16               ; <<4 x i32>> [#uses=1]
+  %8 = bitcast i32* %dp to i8*                    ; <i8*> [#uses=1]
+  tail call void @llvm.arm.neon.vst4.v4i32(i8* %8, <4 x i32> %1, <4 x i32> %3, <4 x i32> %5, <4 x i32> %7, i32 1)
+  ret void
+}
+
+declare void @llvm.arm.neon.vst4.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
+
+@sbuf = common global [16 x i32] zeroinitializer, align 16 ; <[16 x i32]*> [#uses=5]
+@dbuf = common global [16 x i32] zeroinitializer  ; <[16 x i32]*> [#uses=2]
+
+; This function creates 4 chained INSERT_SUBREGS and then invokes the register scavenger.
+; The first INSERT_SUBREG needs an <undef> use operand for that to work.
+define arm_apcscc i32 @main() nounwind {
+bb.nph:
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %0 = phi i32 [ 0, %bb.nph ], [ %1, %bb ]        ; <i32> [#uses=4]
+  %scevgep = getelementptr [16 x i32]* @sbuf, i32 0, i32 %0 ; <i32*> [#uses=1]
+  %scevgep5 = getelementptr [16 x i32]* @dbuf, i32 0, i32 %0 ; <i32*> [#uses=1]
+  store i32 %0, i32* %scevgep, align 4
+  store i32 -1, i32* %scevgep5, align 4
+  %1 = add nsw i32 %0, 1                          ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %1, 16                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb2, label %bb
+
+bb2:                                              ; preds = %bb
+  %2 = load <4 x i32>* bitcast ([16 x i32]* @sbuf to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+  %3 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 4) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+  %4 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 8) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+  %5 = load <4 x i32>* bitcast (i32* getelementptr inbounds ([16 x i32]* @sbuf, i32 0, i32 12) to <4 x i32>*), align 16 ; <<4 x i32>> [#uses=1]
+  tail call void @llvm.arm.neon.vst4.v4i32(i8* bitcast ([16 x i32]* @dbuf to i8*), <4 x i32> %2, <4 x i32> %3, <4 x i32> %4, <4 x i32> %5, i32 1) nounwind
+  ret i32 0
+}
diff --git a/final/test/CodeGen/Thumb2/cross-rc-coalescing-1.ll b/final/test/CodeGen/Thumb2/cross-rc-coalescing-1.ll
new file mode 100644
index 00000000000..c71c3ca5762
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/cross-rc-coalescing-1.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+
+declare i32 @fgetc(%struct.FILE* nocapture) nounwind
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+  br i1 undef, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb1:                                              ; preds = %entry
+  br i1 undef, label %bb.i1, label %bb1.i2
+
+bb.i1:                                            ; preds = %bb1
+  unreachable
+
+bb1.i2:                                           ; preds = %bb1
+  %0 = call  i32 @fgetc(%struct.FILE* undef) nounwind ; <i32> [#uses=0]
+  br i1 undef, label %bb2.i3, label %bb3.i4
+
+bb2.i3:                                           ; preds = %bb1.i2
+  br i1 undef, label %bb4.i, label %bb3.i4
+
+bb3.i4:                                           ; preds = %bb2.i3, %bb1.i2
+  unreachable
+
+bb4.i:                                            ; preds = %bb2.i3
+  br i1 undef, label %bb5.i, label %get_image.exit
+
+bb5.i:                                            ; preds = %bb4.i
+  unreachable
+
+get_image.exit:                                   ; preds = %bb4.i
+  br i1 undef, label %bb28, label %bb27
+
+bb27:                                             ; preds = %get_image.exit
+  br label %bb.i
+
+bb.i:                                             ; preds = %bb.i, %bb27
+  %1 = fptrunc double undef to float              ; <float> [#uses=1]
+  %2 = fptoui float %1 to i8                      ; <i8> [#uses=1]
+  store i8 %2, i8* undef, align 1
+  br label %bb.i
+
+bb28:                                             ; preds = %get_image.exit
+  unreachable
+}
diff --git a/final/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/final/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
new file mode 100644
index 00000000000..b8c8cb122a1
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll
@@ -0,0 +1,75 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | FileCheck %s
+
+define void @fht(float* nocapture %fz, i16 signext %n) nounwind {
+; CHECK: fht:
+entry:
+  br label %bb5
+
+bb5:                                              ; preds = %bb5, %entry
+; CHECK: %bb5
+; CHECK: bne
+  br i1 undef, label %bb5, label %bb.nph
+
+bb.nph:                                           ; preds = %bb5
+  br label %bb7
+
+; Loop preheader
+; CHECK: vmov.f32
+bb7:                                              ; preds = %bb9, %bb.nph
+  %s1.02 = phi float [ undef, %bb.nph ], [ %35, %bb9 ] ; <float> [#uses=3]
+  %tmp79 = add i32 undef, undef                   ; <i32> [#uses=1]
+  %tmp53 = sub i32 undef, undef                   ; <i32> [#uses=1]
+  %0 = fadd float 0.000000e+00, 1.000000e+00      ; <float> [#uses=2]
+  %1 = fmul float 0.000000e+00, 0.000000e+00      ; <float> [#uses=2]
+  br label %bb8
+
+bb8:                                              ; preds = %bb8, %bb7
+; CHECK: %bb8
+; CHECK-NOT: vmov.f32
+; CHECK: blt
+  %tmp54 = add i32 0, %tmp53                      ; <i32> [#uses=0]
+  %fi.1 = getelementptr float* %fz, i32 undef     ; <float*> [#uses=2]
+  %tmp80 = add i32 0, %tmp79                      ; <i32> [#uses=1]
+  %scevgep81 = getelementptr float* %fz, i32 %tmp80 ; <float*> [#uses=1]
+  %2 = load float* undef, align 4                 ; <float> [#uses=1]
+  %3 = fmul float %2, %1                          ; <float> [#uses=1]
+  %4 = load float* null, align 4                  ; <float> [#uses=2]
+  %5 = fmul float %4, %0                          ; <float> [#uses=1]
+  %6 = fsub float %3, %5                          ; <float> [#uses=1]
+  %7 = fmul float %4, %1                          ; <float> [#uses=1]
+  %8 = fadd float undef, %7                       ; <float> [#uses=2]
+  %9 = load float* %fi.1, align 4                 ; <float> [#uses=2]
+  %10 = fsub float %9, %8                         ; <float> [#uses=1]
+  %11 = fadd float %9, %8                         ; <float> [#uses=1]
+  %12 = fsub float 0.000000e+00, %6               ; <float> [#uses=1]
+  %13 = fsub float 0.000000e+00, undef            ; <float> [#uses=2]
+  %14 = fmul float undef, %0                      ; <float> [#uses=1]
+  %15 = fadd float %14, undef                     ; <float> [#uses=2]
+  %16 = load float* %scevgep81, align 4           ; <float> [#uses=2]
+  %17 = fsub float %16, %15                       ; <float> [#uses=1]
+  %18 = fadd float %16, %15                       ; <float> [#uses=2]
+  %19 = load float* undef, align 4                ; <float> [#uses=2]
+  %20 = fsub float %19, %13                       ; <float> [#uses=2]
+  %21 = fadd float %19, %13                       ; <float> [#uses=1]
+  %22 = fmul float %s1.02, %18                    ; <float> [#uses=1]
+  %23 = fmul float 0.000000e+00, %20              ; <float> [#uses=1]
+  %24 = fsub float %22, %23                       ; <float> [#uses=1]
+  %25 = fmul float 0.000000e+00, %18              ; <float> [#uses=1]
+  %26 = fmul float %s1.02, %20                    ; <float> [#uses=1]
+  %27 = fadd float %25, %26                       ; <float> [#uses=1]
+  %28 = fadd float %11, %27                       ; <float> [#uses=1]
+  store float %28, float* %fi.1, align 4
+  %29 = fadd float %12, %24                       ; <float> [#uses=1]
+  store float %29, float* null, align 4
+  %30 = fmul float 0.000000e+00, %21              ; <float> [#uses=1]
+  %31 = fmul float %s1.02, %17                    ; <float> [#uses=1]
+  %32 = fsub float %30, %31                       ; <float> [#uses=1]
+  %33 = fsub float %10, %32                       ; <float> [#uses=1]
+  store float %33, float* undef, align 4
+  %34 = icmp slt i32 undef, undef                 ; <i1> [#uses=1]
+  br i1 %34, label %bb8, label %bb9
+
+bb9:                                              ; preds = %bb8
+  %35 = fadd float 0.000000e+00, undef            ; <float> [#uses=1]
+  br label %bb7
+}
diff --git a/final/test/CodeGen/Thumb2/dg.exp b/final/test/CodeGen/Thumb2/dg.exp
new file mode 100644
index 00000000000..3ff359aab39
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target ARM] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/Thumb2/div.ll b/final/test/CodeGen/Thumb2/div.ll
new file mode 100644
index 00000000000..2c00c70c0db
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/div.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=thumb-apple-darwin -mattr=+thumb2 \
+; RUN:    | FileCheck %s -check-prefix=CHECK-THUMB
+; RUN: llc < %s -march=thumb -mcpu=cortex-m3 -mattr=+thumb2 \
+; RUN:    | FileCheck %s -check-prefix=CHECK-THUMBV7M
+
+define i32 @f1(i32 %a, i32 %b) {
+entry:
+; CHECK-THUMB: f1
+; CHECK-THUMB: __divsi3
+; CHECK-THUMBV7M: f1
+; CHECK-THUMBV7M: sdiv
+        %tmp1 = sdiv i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+entry:
+; CHECK-THUMB: f2
+; CHECK-THUMB: __udivsi3
+; CHECK-THUMBV7M: f2
+; CHECK-THUMBV7M: udiv
+        %tmp1 = udiv i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+entry:
+; CHECK-THUMB: f3
+; CHECK-THUMB: __modsi3
+; CHECK-THUMBV7M: f3
+; CHECK-THUMBV7M: sdiv
+        %tmp1 = srem i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+entry:
+; CHECK-THUMB: f4
+; CHECK-THUMB: __umodsi3
+; CHECK-THUMBV7M: f4
+; CHECK-THUMBV7M: udiv
+        %tmp1 = urem i32 %a, %b         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
diff --git a/final/test/CodeGen/Thumb2/frameless.ll b/final/test/CodeGen/Thumb2/frameless.ll
new file mode 100644
index 00000000000..fa8d5d87dfc
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/frameless.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-fp-elim | not grep mov
+; RUN: llc < %s -mtriple=thumbv7-linux -disable-fp-elim | not grep mov
+
+define void @t() nounwind readnone {
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb2/frameless2.ll b/final/test/CodeGen/Thumb2/frameless2.ll
new file mode 100644
index 00000000000..c5d32390266
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/frameless2.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-fp-elim | not grep r7
+
+%struct.noise3 = type { [3 x [17 x i32]] }
+%struct.noiseguard = type { i32, i32, i32 }
+
+define void @vorbis_encode_noisebias_setup(i8* nocapture %vi.0.7.val, double %s, i32 %block, i32* nocapture %suppress, %struct.noise3* nocapture %in, %struct.noiseguard* nocapture %guard, double %userbias) nounwind {
+entry:
+  %0 = getelementptr %struct.noiseguard* %guard, i32 %block, i32 2; <i32*> [#uses=1]
+  %1 = load i32* %0, align 4                      ; <i32> [#uses=1]
+  store i32 %1, i32* undef, align 4
+  unreachable
+}
diff --git a/final/test/CodeGen/Thumb2/ifcvt-neon.ll b/final/test/CodeGen/Thumb2/ifcvt-neon.ll
new file mode 100644
index 00000000000..68320539693
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/ifcvt-neon.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s
+; rdar://7368193
+
+@a = common global float 0.000000e+00             ; <float*> [#uses=2]
+@b = common global float 0.000000e+00             ; <float*> [#uses=1]
+
+define float @t(i32 %c) nounwind {
+entry:
+  %0 = icmp sgt i32 %c, 1                         ; <i1> [#uses=1]
+  %1 = load float* @a, align 4                    ; <float> [#uses=2]
+  %2 = load float* @b, align 4                    ; <float> [#uses=2]
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+; CHECK:      ite lt
+; CHECK:      vsublt.f32
+; CHECK-NEXT: vaddge.f32
+  %3 = fadd float %1, %2                          ; <float> [#uses=1]
+  br label %bb2
+
+bb1:                                              ; preds = %entry
+  %4 = fsub float %1, %2                          ; <float> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  %storemerge = phi float [ %4, %bb1 ], [ %3, %bb ] ; <float> [#uses=2]
+  store float %storemerge, float* @a
+  ret float %storemerge
+}
diff --git a/final/test/CodeGen/Thumb2/large-stack.ll b/final/test/CodeGen/Thumb2/large-stack.ll
new file mode 100644
index 00000000000..68b5d1cc94f
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/large-stack.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=LINUX
+
+define void @test1() {
+; DARWIN: test1:
+; DARWIN: sub sp, #256
+; LINUX: test1:
+; LINUX: sub sp, #256
+    %tmp = alloca [ 64 x i32 ] , align 4
+    ret void
+}
+
+define void @test2() {
+; DARWIN: test2:
+; DARWIN: sub.w sp, sp, #4160
+; DARWIN: sub sp, #8
+; LINUX: test2:
+; LINUX: sub.w sp, sp, #4160
+; LINUX: sub sp, #8
+    %tmp = alloca [ 4168 x i8 ] , align 4
+    ret void
+}
+
+define i32 @test3() {
+; DARWIN: test3:
+; DARWIN: push    {r4, r7, lr}
+; DARWIN: sub.w sp, sp, #805306368
+; DARWIN: sub sp, #20
+; LINUX: test3:
+; LINUX: push.w {r4, r7, r11, lr}
+; LINUX: sub.w sp, sp, #805306368
+; LINUX: sub sp, #16
+    %retval = alloca i32, align 4
+    %tmp = alloca i32, align 4
+    %a = alloca [805306369 x i8], align 16
+    store i32 0, i32* %tmp
+    %tmp1 = load i32* %tmp
+    ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/Thumb2/ldr-str-imm12.ll b/final/test/CodeGen/Thumb2/ldr-str-imm12.ll
new file mode 100644
index 00000000000..650d788cb4d
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s
+; rdar://7352504
+; Make sure we use "str r9, [sp, #+28]" instead of "sub.w r4, r7, #256" followed by "str r9, [r4, #-32]".
+
+%0 = type { i16, i8, i8 }
+%1 = type { [2 x i32], [2 x i32] }
+%2 = type { %union.rec* }
+%struct.FILE_POS = type { i8, i8, i16, i32 }
+%struct.GAP = type { i8, i8, i16 }
+%struct.LIST = type { %union.rec*, %union.rec* }
+%struct.STYLE = type { %union.anon, %union.anon, i16, i16, i32 }
+%struct.head_type = type { [2 x %struct.LIST], %union.FIRST_UNION, %union.SECOND_UNION, %union.THIRD_UNION, %union.FOURTH_UNION, %union.rec*, %2, %union.rec*, %union.rec*, %union.rec*, %union.rec*, %union.rec*, %union.rec*, %union.rec*, %union.rec*, i32 }
+%union.FIRST_UNION = type { %struct.FILE_POS }
+%union.FOURTH_UNION = type { %struct.STYLE }
+%union.SECOND_UNION = type { %0 }
+%union.THIRD_UNION = type { %1 }
+%union.anon = type { %struct.GAP }
+%union.rec = type { %struct.head_type }
+
+@zz_hold = external global %union.rec*            ; <%union.rec**> [#uses=2]
+@zz_res = external global %union.rec*             ; <%union.rec**> [#uses=1]
+
+define %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind {
+entry:
+; CHECK:       ldr.w	{{(r[0-9])|(lr)}}, [r7, #28]
+  %xgaps.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
+  %ycomp.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
+  br label %bb20
+
+bb20:                                             ; preds = %entry
+  switch i32 undef, label %bb1287 [
+    i32 11, label %bb119
+    i32 12, label %bb119
+    i32 21, label %bb420
+    i32 23, label %bb420
+    i32 45, label %bb438
+    i32 46, label %bb438
+    i32 55, label %bb533
+    i32 56, label %bb569
+    i32 64, label %bb745
+    i32 78, label %bb1098
+  ]
+
+bb119:                                            ; preds = %bb20, %bb20
+  unreachable
+
+bb420:                                            ; preds = %bb20, %bb20
+; CHECK: bb420
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #4]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #8]
+; CHECK: str{{(.w)?}} r{{[0-9]+}}, [sp, #24]
+  store %union.rec* null, %union.rec** @zz_hold, align 4
+  store %union.rec* null, %union.rec** @zz_res, align 4
+  store %union.rec* %x, %union.rec** @zz_hold, align 4
+  %0 = call  %union.rec* @Manifest(%union.rec* undef, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind ; <%union.rec*> [#uses=0]
+  unreachable
+
+bb438:                                            ; preds = %bb20, %bb20
+  unreachable
+
+bb533:                                            ; preds = %bb20
+  ret %union.rec* %x
+
+bb569:                                            ; preds = %bb20
+  unreachable
+
+bb745:                                            ; preds = %bb20
+  unreachable
+
+bb1098:                                           ; preds = %bb20
+  unreachable
+
+bb1287:                                           ; preds = %bb20
+  unreachable
+}
diff --git a/final/test/CodeGen/Thumb2/lsr-deficiency.ll b/final/test/CodeGen/Thumb2/lsr-deficiency.ll
new file mode 100644
index 00000000000..ad957a1fcb4
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s
+; rdar://7387640
+
+; This now reduces to a single induction variable.
+
+; TODO: It still gets a GPR shuffle at the end of the loop
+; This is because something in instruction selection has decided
+; that comparing the pre-incremented value with zero is better
+; than comparing the post-incremented value with -4.
+
+@G = external global i32                          ; <i32*> [#uses=2]
+@array = external global i32*                     ; <i32**> [#uses=1]
+
+define void @t() nounwind optsize {
+; CHECK: t:
+; CHECK: mov.w r2, #1000
+entry:
+  %.pre = load i32* @G, align 4                   ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %entry
+; CHECK: LBB0_1:
+; CHECK: cmp r2, #0
+; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], r2, #1
+; CHECK: mov r2, [[REGISTER]]
+
+  %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ]     ; <i32> [#uses=1]
+  %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
+  %tmp5 = sub i32 1000, %indvar                   ; <i32> [#uses=1]
+  %1 = load i32** @array, align 4                 ; <i32*> [#uses=1]
+  %scevgep = getelementptr i32* %1, i32 %tmp5     ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = add nsw i32 %2, %0                         ; <i32> [#uses=2]
+  store i32 %3, i32* @G, align 4
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, 1001      ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb2/machine-licm.ll b/final/test/CodeGen/Thumb2/machine-licm.ll
new file mode 100644
index 00000000000..5e776dd8937
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/machine-licm.ll
@@ -0,0 +1,122 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-fp-elim                       | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim | FileCheck %s --check-prefix=PIC
+; rdar://7353541
+; rdar://7354376
+
+@GV = external global i32                         ; <i32*> [#uses=2]
+
+define void @t1(i32* nocapture %vals, i32 %c) nounwind {
+entry:
+; CHECK: t1:
+; CHECK: cbz
+  %0 = icmp eq i32 %c, 0                          ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+; CHECK: BB#1
+; CHECK: movw r2, :lower16:L_GV$non_lazy_ptr
+; CHECK: movt r2, :upper16:L_GV$non_lazy_ptr
+; CHECK: ldr r2, [r2]
+; CHECK: ldr r3, [r2]
+; CHECK: LBB0_2
+; CHECK-NOT: LCPI0_0:
+
+; PIC: BB#1
+; PIC: movw r2, :lower16:(L_GV$non_lazy_ptr-(LPC0_0+4))
+; PIC: movt r2, :upper16:(L_GV$non_lazy_ptr-(LPC0_0+4))
+; PIC: add r2, pc
+; PIC: ldr r2, [r2]
+; PIC: ldr r3, [r2]
+; PIC: LBB0_2
+; PIC-NOT: LCPI0_0:
+; PIC: .section
+  %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %1 = phi i32 [ %.pre, %bb.nph ], [ %3, %bb ]    ; <i32> [#uses=1]
+  %i.03 = phi i32 [ 0, %bb.nph ], [ %4, %bb ]     ; <i32> [#uses=2]
+  %scevgep = getelementptr i32* %vals, i32 %i.03  ; <i32*> [#uses=1]
+  %2 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %3 = add nsw i32 %1, %2                         ; <i32> [#uses=2]
+  store i32 %3, i32* @GV, align 4
+  %4 = add i32 %i.03, 1                           ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %4, %c                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+; rdar://8001136
+define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: mov.w r3, #1065353216
+; CHECK: vdup.32 q{{.*}}, r3
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+; CHECK-NEXT: %bb1
+  %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]
+  %tmp1 = shl i32 %indvar, 2
+  %gep1 = getelementptr i8* %ptr1, i32 %tmp1
+  %tmp2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %gep1, i32 1)
+  %tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, <4 x float> %tmp2)
+  %gep2 = getelementptr i8* %ptr2, i32 %tmp1
+  call void @llvm.arm.neon.vst1.v4f32(i8* %gep2, <4 x float> %tmp3, i32 1)
+  %indvar.next = add i32 %indvar, 1
+  %cond = icmp eq i32 %indvar.next, 10
+  br i1 %cond, label %bb2, label %bb1
+
+bb2:
+  ret void
+}
+
+; CHECK-NOT: LCPI1_0:
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
+
+declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
+
+; rdar://8241368
+; isel should not fold immediate into eor's which would have prevented LICM.
+define zeroext i16 @t3(i8 zeroext %data, i16 zeroext %crc) nounwind readnone {
+; CHECK: t3:
+bb.nph:
+; CHECK: bb.nph
+; CHECK: movw {{(r[0-9])|(lr)}}, #32768
+; CHECK: movs {{(r[0-9])|(lr)}}, #8
+; CHECK: movw [[REGISTER:(r[0-9])|(lr)]], #16386
+; CHECK: movw {{(r[0-9])|(lr)}}, #65534
+; CHECK: movt {{(r[0-9])|(lr)}}, #65535
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+; CHECK: bb
+; CHECK: eor.w {{(r[0-9])|(lr)}}, {{(r[0-9])|(lr)}}, [[REGISTER]]
+; CHECK: eor.w
+; CHECK-NOT: eor
+; CHECK: and
+  %data_addr.013 = phi i8 [ %data, %bb.nph ], [ %8, %bb ] ; <i8> [#uses=2]
+  %crc_addr.112 = phi i16 [ %crc, %bb.nph ], [ %crc_addr.2, %bb ] ; <i16> [#uses=3]
+  %i.011 = phi i8 [ 0, %bb.nph ], [ %7, %bb ]     ; <i8> [#uses=1]
+  %0 = trunc i16 %crc_addr.112 to i8              ; <i8> [#uses=1]
+  %1 = xor i8 %data_addr.013, %0                  ; <i8> [#uses=1]
+  %2 = and i8 %1, 1                               ; <i8> [#uses=1]
+  %3 = icmp eq i8 %2, 0                           ; <i1> [#uses=2]
+  %4 = xor i16 %crc_addr.112, 16386               ; <i16> [#uses=1]
+  %crc_addr.0 = select i1 %3, i16 %crc_addr.112, i16 %4 ; <i16> [#uses=1]
+  %5 = lshr i16 %crc_addr.0, 1                    ; <i16> [#uses=2]
+  %6 = or i16 %5, -32768                          ; <i16> [#uses=1]
+  %crc_addr.2 = select i1 %3, i16 %5, i16 %6      ; <i16> [#uses=2]
+  %7 = add i8 %i.011, 1                           ; <i8> [#uses=2]
+  %8 = lshr i8 %data_addr.013, 1                  ; <i8> [#uses=1]
+  %exitcond = icmp eq i8 %7, 8                    ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb8, label %bb
+
+bb8:                                              ; preds = %bb
+  ret i16 %crc_addr.2
+}
diff --git a/final/test/CodeGen/Thumb2/mul_const.ll b/final/test/CodeGen/Thumb2/mul_const.ll
new file mode 100644
index 00000000000..9a2ec93a5ad
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/mul_const.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; rdar://7069502
+
+define i32 @t1(i32 %v) nounwind readnone {
+entry:
+; CHECK: t1:
+; CHECK: add.w r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 9
+	ret i32 %0
+}
+
+define i32 @t2(i32 %v) nounwind readnone {
+entry:
+; CHECK: t2:
+; CHECK: rsb r0, r0, r0, lsl #3
+	%0 = mul i32 %v, 7
+	ret i32 %0
+}
diff --git a/final/test/CodeGen/Thumb2/pic-load.ll b/final/test/CodeGen/Thumb2/pic-load.ll
new file mode 100644
index 00000000000..35a03e77731
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/pic-load.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -relocation-model=pic | FileCheck %s
+
+	%struct.anon = type { void ()* }
+	%struct.one_atexit_routine = type { %struct.anon, i32, i8* }
+@__dso_handle = external global { }		; <{ }*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (void ()*)* @atexit to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define hidden i32 @atexit(void ()* %func) nounwind {
+entry:
+; CHECK: atexit:
+; CHECK: add r0, pc
+	%r = alloca %struct.one_atexit_routine, align 4		; <%struct.one_atexit_routine*> [#uses=3]
+	%0 = getelementptr %struct.one_atexit_routine* %r, i32 0, i32 0, i32 0		; <void ()**> [#uses=1]
+	store void ()* %func, void ()** %0, align 4
+	%1 = getelementptr %struct.one_atexit_routine* %r, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 0, i32* %1, align 4
+	%2 = call  i32 @atexit_common(%struct.one_atexit_routine* %r, i8* bitcast ({ }* @__dso_handle to i8*)) nounwind		; <i32> [#uses=1]
+	ret i32 %2
+}
+
+declare i32 @atexit_common(%struct.one_atexit_routine*, i8*) nounwind
diff --git a/final/test/CodeGen/Thumb2/thumb2-adc.ll b/final/test/CodeGen/Thumb2/thumb2-adc.ll
new file mode 100644
index 00000000000..702df91c859
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-adc.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 734439407618 = 0x000000ab00000002
+define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: adds r0, #2
+    %tmp = add i64 %a, 734439407618
+    ret i64 %tmp
+}
+
+; 5066626890203138 = 0x0012001200000002
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: adds r0, #2
+    %tmp = add i64 %a, 5066626890203138
+    ret i64 %tmp
+}
+
+; 3747052064576897026 = 0x3400340000000002
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: adds r0, #2
+    %tmp = add i64 %a, 3747052064576897026
+    ret i64 %tmp
+}
+
+; 6221254862626095106 = 0x5656565600000002
+define i64 @f4(i64 %a) {
+; CHECK: f4:
+; CHECK: adds r0, #2
+    %tmp = add i64 %a, 6221254862626095106 
+    ret i64 %tmp
+}
+
+; 287104476244869122 = 0x03fc000000000002
+define i64 @f5(i64 %a) {
+; CHECK: f5:
+; CHECK: adds r0, #2
+    %tmp = add i64 %a, 287104476244869122
+    ret i64 %tmp
+}
+
+define i64 @f6(i64 %a, i64 %b) {
+; CHECK: f6:
+; CHECK: adds r0, r0, r2
+    %tmp = add i64 %a, %b
+    ret i64 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-add.ll b/final/test/CodeGen/Thumb2/thumb2-add.ll
new file mode 100644
index 00000000000..5e25cf64bcc
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-add.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #255
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #256
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #257
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #4094
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #4095
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #4096
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep lsl | grep #8
+
+define i32 @t2ADDrc_255(i32 %lhs) {
+    %Rd = add i32 %lhs, 255
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrc_256(i32 %lhs) {
+    %Rd = add i32 %lhs, 256
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrc_257(i32 %lhs) {
+    %Rd = add i32 %lhs, 257
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrc_4094(i32 %lhs) {
+    %Rd = add i32 %lhs, 4094
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrc_4095(i32 %lhs) {
+    %Rd = add i32 %lhs, 4095
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrc_4096(i32 %lhs) {
+    %Rd = add i32 %lhs, 4096
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrr(i32 %lhs, i32 %rhs) {
+    %Rd = add i32 %lhs, %rhs
+    ret i32 %Rd
+}
+
+define i32 @t2ADDrs(i32 %lhs, i32 %rhs) {
+    %tmp = shl i32 %rhs, 8
+    %Rd = add i32 %lhs, %tmp
+    ret i32 %Rd
+}
+
diff --git a/final/test/CodeGen/Thumb2/thumb2-add2.ll b/final/test/CodeGen/Thumb2/thumb2-add2.ll
new file mode 100644
index 00000000000..e496654706e
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-add2.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 171 = 0x000000ab
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: adds r0, #171
+    %tmp = add i32 %a, 171
+    ret i32 %tmp
+}
+
+; 1179666 = 0x00120012
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: add.w r0, r0, #1179666
+    %tmp = add i32 %a, 1179666
+    ret i32 %tmp
+}
+
+; 872428544 = 0x34003400
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: add.w r0, r0, #872428544
+    %tmp = add i32 %a, 872428544
+    ret i32 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: add.w r0, r0, #1448498774
+    %tmp = add i32 %a, 1448498774
+    ret i32 %tmp
+}
+
+; 510 = 0x000001fe
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: add.w r0, r0, #510
+    %tmp = add i32 %a, 510
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-add3.ll b/final/test/CodeGen/Thumb2/thumb2-add3.ll
new file mode 100644
index 00000000000..58fc33372cf
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-add3.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+    %tmp = add i32 %a, 4095
+    ret i32 %tmp
+}
+
+; CHECK: f1:
+; CHECK: 	addw	r0, r0, #4095
diff --git a/final/test/CodeGen/Thumb2/thumb2-add4.ll b/final/test/CodeGen/Thumb2/thumb2-add4.ll
new file mode 100644
index 00000000000..b94e84daee1
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-add4.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 171 = 0x000000ab
+define i64 @f1(i64 %a) {
+; CHECK: f1:
+; CHECK: adds r0, #171
+; CHECK: adc r1, r1, #0
+    %tmp = add i64 %a, 171
+    ret i64 %tmp
+}
+
+; 1179666 = 0x00120012
+define i64 @f2(i64 %a) {
+; CHECK: f2:
+; CHECK: adds.w r0, r0, #1179666
+; CHECK: adc r1, r1, #0
+    %tmp = add i64 %a, 1179666
+    ret i64 %tmp
+}
+
+; 872428544 = 0x34003400
+define i64 @f3(i64 %a) {
+; CHECK: f3:
+; CHECK: adds.w r0, r0, #872428544
+; CHECK: adc r1, r1, #0
+    %tmp = add i64 %a, 872428544
+    ret i64 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i64 @f4(i64 %a) {
+; CHECK: f4:
+; CHECK: adds.w r0, r0, #1448498774
+; CHECK: adc r1, r1, #0
+    %tmp = add i64 %a, 1448498774
+    ret i64 %tmp
+}
+
+; 66846720 = 0x03fc0000
+define i64 @f5(i64 %a) {
+; CHECK: f5:
+; CHECK: adds.w r0, r0, #66846720
+; CHECK: adc r1, r1, #0
+    %tmp = add i64 %a, 66846720
+    ret i64 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-add5.ll b/final/test/CodeGen/Thumb2/thumb2-add5.ll
new file mode 100644
index 00000000000..8b3a4f6d12a
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-add5.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: add r0, r1
+    %tmp = add i32 %a, %b
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: add.w r0, r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp1 = add i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: add.w r0, r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp1 = add i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: add.w r0, r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp1 = add i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: add.w r0, r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = add i32 %a, %tmp
+    ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-add6.ll b/final/test/CodeGen/Thumb2/thumb2-add6.ll
new file mode 100644
index 00000000000..0ecaa793909
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-add6.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: adds r0, r0, r2
+; CHECK: adcs r1, r3
+    %tmp = add i64 %a, %b
+    ret i64 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-and.ll b/final/test/CodeGen/Thumb2/thumb2-and.ll
new file mode 100644
index 00000000000..8e2245a8592
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-and.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: ands r0, r1
+    %tmp = and i32 %a, %b
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: and.w r0, r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: and.w r0, r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: and.w r0, r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: and.w r0, r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-and2.ll b/final/test/CodeGen/Thumb2/thumb2-and2.ll
new file mode 100644
index 00000000000..7b0432de9bb
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-and2.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 171 = 0x000000ab
+define i32 @f1(i32 %a) {
+    %tmp = and i32 %a, 171
+    ret i32 %tmp
+}
+; CHECK: f1:
+; CHECK: 	and	r0, r0, #171
+
+; 1179666 = 0x00120012
+define i32 @f2(i32 %a) {
+    %tmp = and i32 %a, 1179666
+    ret i32 %tmp
+}
+; CHECK: f2:
+; CHECK: 	and	r0, r0, #1179666
+
+; 872428544 = 0x34003400
+define i32 @f3(i32 %a) {
+    %tmp = and i32 %a, 872428544
+    ret i32 %tmp
+}
+; CHECK: f3:
+; CHECK: 	and	r0, r0, #872428544
+
+; 1448498774 = 0x56565656
+define i32 @f4(i32 %a) {
+    %tmp = and i32 %a, 1448498774
+    ret i32 %tmp
+}
+; CHECK: f4:
+; CHECK: bic r0, r0, #-1448498775
+
+; 66846720 = 0x03fc0000
+define i32 @f5(i32 %a) {
+    %tmp = and i32 %a, 66846720
+    ret i32 %tmp
+}
+; CHECK: f5:
+; CHECK: 	and	r0, r0, #66846720
diff --git a/final/test/CodeGen/Thumb2/thumb2-asr.ll b/final/test/CodeGen/Thumb2/thumb2-asr.ll
new file mode 100644
index 00000000000..a0a60e68989
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-asr.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: asrs r0, r1
+    %tmp = ashr i32 %a, %b
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-asr2.ll b/final/test/CodeGen/Thumb2/thumb2-asr2.ll
new file mode 100644
index 00000000000..9c8634f7097
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-asr2.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: asrs r0, r0, #17
+    %tmp = ashr i32 %a, 17
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-barrier.ll b/final/test/CodeGen/Thumb2/thumb2-barrier.ll
new file mode 100644
index 00000000000..93ae7c428bd
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-barrier.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s
+
+declare void @llvm.memory.barrier(i1 , i1 , i1 , i1 , i1)
+
+define void @t_st() {
+; CHECK: t_st:
+; CHECK: dmb st
+  call void @llvm.memory.barrier(i1 false, i1 false, i1 false, i1 true, i1 true)
+  ret void
+}
+
+define void @t_sy() {
+; CHECK: t_sy:
+; CHECK: dmb sy
+  call void @llvm.memory.barrier(i1 true, i1 false, i1 false, i1 true, i1 true)
+  ret void
+}
+
+define void @t_ishst() {
+; CHECK: t_ishst:
+; CHECK: dmb ishst
+  call void @llvm.memory.barrier(i1 false, i1 false, i1 false, i1 true, i1 false)
+  ret void
+}
+
+define void @t_ish() {
+; CHECK: t_ish:
+; CHECK: dmb ish
+  call void @llvm.memory.barrier(i1 true, i1 false, i1 false, i1 true, i1 false)
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-bcc.ll b/final/test/CodeGen/Thumb2/thumb2-bcc.ll
new file mode 100644
index 00000000000..aae9f5c0af7
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-bcc.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | not grep it
+
+define i32 @t1(i32 %a, i32 %b, i32 %c) {
+; CHECK: t1:
+; CHECK: cbz
+	%tmp2 = icmp eq i32 %a, 0
+	br i1 %tmp2, label %cond_false, label %cond_true
+
+cond_true:
+	%tmp5 = add i32 %b, 1
+        %tmp6 = and i32 %tmp5, %c
+	ret i32 %tmp6
+
+cond_false:
+	%tmp7 = add i32 %b, -1
+        %tmp8 = xor i32 %tmp7, %c
+	ret i32 %tmp8
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-bfc.ll b/final/test/CodeGen/Thumb2/thumb2-bfc.ll
new file mode 100644
index 00000000000..b486045ab50
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-bfc.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 4278190095 = 0xff00000f
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: bfc r
+    %tmp = and i32 %a, 4278190095
+    ret i32 %tmp
+}
+
+; 4286578688 = 0xff800000
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: bfc r
+    %tmp = and i32 %a, 4286578688
+    ret i32 %tmp
+}
+
+; 4095 = 0x00000fff
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: bfc r
+    %tmp = and i32 %a, 4095
+    ret i32 %tmp
+}
+
+; 2147483646 = 0x7ffffffe   not implementable w/ BFC
+define i32 @f4(i32 %a) {
+; CHECK: f4:
+    %tmp = and i32 %a, 2147483646
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-bic.ll b/final/test/CodeGen/Thumb2/thumb2-bic.ll
new file mode 100644
index 00000000000..4e35383997d
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-bic.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: bics r0, r1
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: bics r0, r1
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = and i32 %tmp, %a
+    ret i32 %tmp1
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: bics r0, r1
+    %tmp = xor i32 4294967295, %b
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: bics r0, r1
+    %tmp = xor i32 4294967295, %b
+    %tmp1 = and i32 %tmp, %a
+    ret i32 %tmp1
+}
+
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: bic.w r0, r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp1 = xor i32 4294967295, %tmp
+    %tmp2 = and i32 %a, %tmp1
+    ret i32 %tmp2
+}
+
+define i32 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: bic.w r0, r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp1 = xor i32 %tmp, 4294967295
+    %tmp2 = and i32 %tmp1, %a
+    ret i32 %tmp2
+}
+
+define i32 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: bic.w r0, r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp1 = xor i32 %tmp, 4294967295
+    %tmp2 = and i32 %a, %tmp1
+    ret i32 %tmp2
+}
+
+define i32 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: bic.w r0, r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = xor i32 4294967295, %tmp
+    %tmp2 = and i32 %tmp1, %a
+    ret i32 %tmp2
+}
+
+; ~0x000000bb = 4294967108
+define i32 @f9(i32 %a) {
+    %tmp = and i32 %a, 4294967108
+    ret i32 %tmp
+    
+; CHECK: f9:
+; CHECK: bic r0, r0, #187
+}
+
+; ~0x00aa00aa = 4283826005
+define i32 @f10(i32 %a) {
+    %tmp = and i32 %a, 4283826005
+    ret i32 %tmp
+    
+; CHECK: f10:
+; CHECK: bic r0, r0, #11141290
+}
+
+; ~0xcc00cc00 = 872363007
+define i32 @f11(i32 %a) {
+    %tmp = and i32 %a, 872363007
+    ret i32 %tmp
+; CHECK: f11:
+; CHECK: bic r0, r0, #-872363008
+}
+
+; ~0x00110000 = 4293853183
+define i32 @f12(i32 %a) {
+    %tmp = and i32 %a, 4293853183
+    ret i32 %tmp
+; CHECK: f12:
+; CHECK: bic r0, r0, #1114112
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-branch.ll b/final/test/CodeGen/Thumb2/thumb2-branch.ll
new file mode 100644
index 00000000000..1d2af7a5474
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-branch.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f1:
+; CHECK: bne LBB
+        %tmp = icmp eq i32 %a, %b               ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
+
+cond_true:              ; preds = %entry
+        store i32 0, i32* %v
+        ret i32 0
+
+return:         ; preds = %entry
+        ret i32 1
+}
+
+define i32 @f2(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f2:
+; CHECK: bge LBB
+        %tmp = icmp slt i32 %a, %b              ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
+
+cond_true:              ; preds = %entry
+        store i32 0, i32* %v
+        ret i32 0
+
+return:         ; preds = %entry
+        ret i32 1
+}
+
+define i32 @f3(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f3:
+; CHECK: bhs LBB
+        %tmp = icmp ult i32 %a, %b              ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
+
+cond_true:              ; preds = %entry
+        store i32 0, i32* %v
+        ret i32 0
+
+return:         ; preds = %entry
+        ret i32 1
+}
+
+define i32 @f4(i32 %a, i32 %b, i32* %v) {
+entry:
+; CHECK: f4:
+; CHECK: blo LBB
+        %tmp = icmp ult i32 %a, %b              ; <i1> [#uses=1]
+        br i1 %tmp, label %return, label %cond_true
+
+cond_true:              ; preds = %entry
+        store i32 0, i32* %v
+        ret i32 0
+
+return:         ; preds = %entry
+        ret i32 1
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-call-tc.ll b/final/test/CodeGen/Thumb2/thumb2-call-tc.ll
new file mode 100644
index 00000000000..2e4da1b289b
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-call-tc.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX
+; XFAIL: *
+
+@t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
+
+declare void @g(i32, i32, i32, i32)
+
+define void @f() {
+; DARWIN: f:
+; DARWIN: blx _g
+
+; LINUX: f:
+; LINUX: bl g
+        tail call void @g( i32 1, i32 2, i32 3, i32 4 )
+        ret void
+}
+
+define void @h() {
+; DARWIN: h:
+; DARWIN: bx r0 @ TAILCALL
+
+; LINUX: h:
+; LINUX: bx r0 @ TAILCALL
+        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
+        %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
+        ret void
+}
+
+define void @j() {
+; DARWIN: j:
+; DARWIN: b.w _f  @ TAILCALL
+
+; LINUX: j:
+; LINUX: b.w f  @ TAILCALL
+        tail call void @f()
+        ret void
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-call.ll b/final/test/CodeGen/Thumb2/thumb2-call.ll
new file mode 100644
index 00000000000..8513cfb404c
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-call.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=thumbv7-linux -mattr=+thumb2 | FileCheck %s -check-prefix=LINUX
+
+@t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
+
+declare void @g(i32, i32, i32, i32)
+
+define void @f() {
+; DARWIN: f:
+; DARWIN: blx _g
+
+; LINUX: f:
+; LINUX: bl g
+        call void @g( i32 1, i32 2, i32 3, i32 4 )
+        ret void
+}
+
+define void @h() {
+; DARWIN: h:
+; DARWIN: blx r0
+
+; LINUX: h:
+; LINUX: blx r0
+        %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
+        %tmp.upgrd.2 = call i32 %tmp( )            ; <i32> [#uses=0]
+        ret void
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-cbnz.ll b/final/test/CodeGen/Thumb2/thumb2-cbnz.ll
new file mode 100644
index 00000000000..10a4985d173
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-cbnz.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s
+; rdar://7354379
+
+declare double @floor(double) nounwind readnone
+
+define void @t(i1 %a, double %b) {
+entry:
+  br i1 %a, label %bb3, label %bb1
+
+bb1:                                              ; preds = %entry
+  unreachable
+
+bb3:                                              ; preds = %entry
+  br i1 %a, label %bb7, label %bb5
+
+bb5:                                              ; preds = %bb3
+  unreachable
+
+bb7:                                              ; preds = %bb3
+  br i1 %a, label %bb11, label %bb9
+
+bb9:                                              ; preds = %bb7
+; CHECK:      cmp r0, #0
+; CHECK:      cmp r0, #0
+; CHECK-NEXT: cbnz
+  %0 = tail call  double @floor(double %b) nounwind readnone ; <double> [#uses=0]
+  br label %bb11
+
+bb11:                                             ; preds = %bb9, %bb7
+  %1 = getelementptr i32* undef, i32 0
+  store i32 0, i32* %1
+  ret void
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-clz.ll b/final/test/CodeGen/Thumb2/thumb2-clz.ll
new file mode 100644
index 00000000000..74728bfcc5a
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-clz.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7a | FileCheck %s
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: clz r
+    %tmp = tail call i32 @llvm.ctlz.i32(i32 %a)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.ctlz.i32(i32) nounwind readnone
diff --git a/final/test/CodeGen/Thumb2/thumb2-cmn.ll b/final/test/CodeGen/Thumb2/thumb2-cmn.ll
new file mode 100644
index 00000000000..eeaaa7fbdf9
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-cmn.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i1 @f1(i32 %a, i32 %b) {
+    %nb = sub i32 0, %b
+    %tmp = icmp ne i32 %a, %nb
+    ret i1 %tmp
+}
+; CHECK: f1:
+; CHECK: 	cmn.w	r0, r1
+
+define i1 @f2(i32 %a, i32 %b) {
+    %nb = sub i32 0, %b
+    %tmp = icmp ne i32 %nb, %a
+    ret i1 %tmp
+}
+; CHECK: f2:
+; CHECK: 	cmn.w	r0, r1
+
+define i1 @f3(i32 %a, i32 %b) {
+    %nb = sub i32 0, %b
+    %tmp = icmp eq i32 %a, %nb
+    ret i1 %tmp
+}
+; CHECK: f3:
+; CHECK: 	cmn.w	r0, r1
+
+define i1 @f4(i32 %a, i32 %b) {
+    %nb = sub i32 0, %b
+    %tmp = icmp eq i32 %nb, %a
+    ret i1 %tmp
+}
+; CHECK: f4:
+; CHECK: 	cmn.w	r0, r1
+
+define i1 @f5(i32 %a, i32 %b) {
+    %tmp = shl i32 %b, 5
+    %nb = sub i32 0, %tmp
+    %tmp1 = icmp eq i32 %nb, %a
+    ret i1 %tmp1
+}
+; CHECK: f5:
+; CHECK: 	cmn.w	r0, r1, lsl #5
+
+define i1 @f6(i32 %a, i32 %b) {
+    %tmp = lshr i32 %b, 6
+    %nb = sub i32 0, %tmp
+    %tmp1 = icmp ne i32 %nb, %a
+    ret i1 %tmp1
+}
+; CHECK: f6:
+; CHECK: 	cmn.w	r0, r1, lsr #6
+
+define i1 @f7(i32 %a, i32 %b) {
+    %tmp = ashr i32 %b, 7
+    %nb = sub i32 0, %tmp
+    %tmp1 = icmp eq i32 %a, %nb
+    ret i1 %tmp1
+}
+; CHECK: f7:
+; CHECK: 	cmn.w	r0, r1, asr #7
+
+define i1 @f8(i32 %a, i32 %b) {
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %nb = sub i32 0, %tmp
+    %tmp1 = icmp ne i32 %a, %nb
+    ret i1 %tmp1
+}
+; CHECK: f8:
+; CHECK: 	cmn.w	r0, r0, ror #8
+
diff --git a/final/test/CodeGen/Thumb2/thumb2-cmn2.ll b/final/test/CodeGen/Thumb2/thumb2-cmn2.ll
new file mode 100644
index 00000000000..c0e19f63a30
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-cmn2.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; -0x000000bb = 4294967109
+define i1 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: cmn.w {{r.*}}, #187
+    %tmp = icmp ne i32 %a, 4294967109
+    ret i1 %tmp
+}
+
+; -0x00aa00aa = 4283826006
+define i1 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: cmn.w {{r.*}}, #11141290
+    %tmp = icmp eq i32 %a, 4283826006
+    ret i1 %tmp
+}
+
+; -0xcc00cc00 = 872363008
+define i1 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: cmn.w {{r.*}}, #-872363008
+    %tmp = icmp ne i32 %a, 872363008
+    ret i1 %tmp
+}
+
+; -0x00110000 = 4293853184
+define i1 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: cmn.w {{r.*}}, #1114112
+    %tmp = icmp eq i32 %a, 4293853184
+    ret i1 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-cmp.ll b/final/test/CodeGen/Thumb2/thumb2-cmp.ll
new file mode 100644
index 00000000000..63249f4cf14
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-cmp.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 0x000000bb = 187
+define i1 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: cmp r0, #187
+    %tmp = icmp ne i32 %a, 187
+    ret i1 %tmp
+}
+
+; 0x00aa00aa = 11141290
+define i1 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: cmp.w r0, #11141290
+    %tmp = icmp eq i32 %a, 11141290 
+    ret i1 %tmp
+}
+
+; 0xcc00cc00 = 3422604288
+define i1 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: cmp.w r0, #-872363008
+    %tmp = icmp ne i32 %a, 3422604288
+    ret i1 %tmp
+}
+
+; 0xdddddddd = 3722304989
+define i1 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: cmp.w r0, #-572662307
+    %tmp = icmp ne i32 %a, 3722304989
+    ret i1 %tmp
+}
+
+; 0x00110000 = 1114112
+define i1 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: cmp.w r0, #1114112
+    %tmp = icmp eq i32 %a, 1114112
+    ret i1 %tmp
+}
+
+; Check that we don't do an invalid (a > b) --> !(a < b + 1) transform.
+;
+; CHECK: f6:
+; CHECK-NOT: cmp.w r0, #-2147483648
+; CHECK: bx lr
+define i32 @f6(i32 %a) {
+    %tmp = icmp sgt i32 %a, 2147483647
+    br i1 %tmp, label %true, label %false
+true:
+    ret i32 2
+false:
+    ret i32 0
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-cmp2.ll b/final/test/CodeGen/Thumb2/thumb2-cmp2.ll
new file mode 100644
index 00000000000..55c321dc2b3
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-cmp2.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i1 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: cmp r0, r1
+    %tmp = icmp ne i32 %a, %b
+    ret i1 %tmp
+}
+
+define i1 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: cmp r0, r1
+    %tmp = icmp eq i32 %a, %b
+    ret i1 %tmp
+}
+
+define i1 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: cmp.w r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp1 = icmp eq i32 %tmp, %a
+    ret i1 %tmp1
+}
+
+define i1 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: cmp.w r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp1 = icmp ne i32 %tmp, %a
+    ret i1 %tmp1
+}
+
+define i1 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: cmp.w r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp1 = icmp eq i32 %a, %tmp
+    ret i1 %tmp1
+}
+
+define i1 @f9(i32 %a, i32 %b) {
+; CHECK: f9:
+; CHECK: cmp.w r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = icmp ne i32 %a, %tmp
+    ret i1 %tmp1
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-eor.ll b/final/test/CodeGen/Thumb2/thumb2-eor.ll
new file mode 100644
index 00000000000..116a1a3519a
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-eor.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: eors r0, r1
+    %tmp = xor i32 %a, %b
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: eors r0, r1
+    %tmp = xor i32 %b, %a
+    ret i32 %tmp
+}
+
+define i32 @f2b(i32 %a, i32 %b, i32 %c) {
+; CHECK: f2b:
+; CHECK: eor.w r0, r1, r2
+    %tmp = xor i32 %b, %c
+    ret i32 %tmp
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: eor.w r0, r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp1 = xor i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: eor.w r0, r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp1 = xor i32 %tmp, %a
+    ret i32 %tmp1
+}
+
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: eor.w r0, r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp1 = xor i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: eor.w r0, r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = xor i32 %tmp, %a
+    ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-eor2.ll b/final/test/CodeGen/Thumb2/thumb2-eor2.ll
new file mode 100644
index 00000000000..6b2e9dcf3d1
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-eor2.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 0x000000bb = 187
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: eor {{.*}}#187
+    %tmp = xor i32 %a, 187
+    ret i32 %tmp
+}
+
+; 0x00aa00aa = 11141290
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: eor {{.*}}#11141290
+    %tmp = xor i32 %a, 11141290 
+    ret i32 %tmp
+}
+
+; 0xcc00cc00 = 3422604288
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: eor {{.*}}#-872363008
+    %tmp = xor i32 %a, 3422604288
+    ret i32 %tmp
+}
+
+; 0xdddddddd = 3722304989
+define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: eor {{.*}}#-572662307
+    %tmp = xor i32 %a, 3722304989
+    ret i32 %tmp
+}
+
+; 0x00110000 = 1114112
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: eor {{.*}}#1114112
+    %tmp = xor i32 %a, 1114112
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll b/final/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
new file mode 100644
index 00000000000..5315535db04
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; XFAIL: *
+
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; CHECK: t1:
+; CHECK: it ne
+; CHECK: cmpne
+	switch i32 %c, label %cond_next [
+		 i32 1, label %cond_true
+		 i32 7, label %cond_true
+	]
+
+cond_true:
+	%tmp12 = add i32 %a, 1
+	%tmp1518 = add i32 %tmp12, %b
+	ret i32 %tmp1518
+
+cond_next:
+	%tmp15 = add i32 %b, %a
+	ret i32 %tmp15
+}
+
+; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
+define i32 @t2(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: ite gt
+; CHECK: subgt
+; CHECK: suble
+	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %tmp1434, label %bb17, label %bb.outer
+
+bb.outer:		; preds = %cond_false, %entry
+	%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]		; <i32> [#uses=5]
+	%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %cond_true, %bb.outer
+	%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%tmp. = sub i32 0, %b_addr.021.0.ph		; <i32> [#uses=1]
+	%tmp.40 = mul i32 %indvar, %tmp.		; <i32> [#uses=1]
+	%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph		; <i32> [#uses=6]
+	%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph		; <i1> [#uses=1]
+	br i1 %tmp3, label %cond_true, label %cond_false
+
+cond_true:		; preds = %bb
+	%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph		; <i32> [#uses=2]
+	%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp1437, label %bb17, label %bb
+
+cond_false:		; preds = %bb
+	%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0		; <i32> [#uses=2]
+	%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb17, label %bb.outer
+
+bb17:		; preds = %cond_false, %cond_true, %entry
+	%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	ret i32 %a_addr.026.1
+}
+
+@x = external global i32*		; <i32**> [#uses=1]
+
+define void @foo(i32 %a) nounwind {
+entry:
+	%tmp = load i32** @x		; <i32*> [#uses=1]
+	store i32 %a, i32* %tmp
+	ret void
+}
+
+; Tail call prevents use of ifcvt in this one.  Seems like a win though.
+define void @t3(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: t3:
+; CHECK-NOT: it lt
+; CHECK-NOT: poplt
+; CHECK: b.w _foo @ TAILCALL
+	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
+	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	tail call void @foo( i32 %b )
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/final/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
new file mode 100644
index 00000000000..d842d4d8799
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; CHECK: t1:
+; CHECK: it ne
+; CHECK: cmpne
+	switch i32 %c, label %cond_next [
+		 i32 1, label %cond_true
+		 i32 7, label %cond_true
+	]
+
+cond_true:
+	%tmp12 = add i32 %a, 1
+	%tmp1518 = add i32 %tmp12, %b
+	ret i32 %tmp1518
+
+cond_next:
+	%tmp15 = add i32 %b, %a
+	ret i32 %tmp15
+}
+
+; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
+define i32 @t2(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: ite gt
+; CHECK: subgt
+; CHECK: suble
+	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %tmp1434, label %bb17, label %bb.outer
+
+bb.outer:		; preds = %cond_false, %entry
+	%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]		; <i32> [#uses=5]
+	%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %cond_true, %bb.outer
+	%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%tmp. = sub i32 0, %b_addr.021.0.ph		; <i32> [#uses=1]
+	%tmp.40 = mul i32 %indvar, %tmp.		; <i32> [#uses=1]
+	%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph		; <i32> [#uses=6]
+	%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph		; <i1> [#uses=1]
+	br i1 %tmp3, label %cond_true, label %cond_false
+
+cond_true:		; preds = %bb
+	%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph		; <i32> [#uses=2]
+	%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp1437, label %bb17, label %bb
+
+cond_false:		; preds = %bb
+	%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0		; <i32> [#uses=2]
+	%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb17, label %bb.outer
+
+bb17:		; preds = %cond_false, %cond_true, %entry
+	%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	ret i32 %a_addr.026.1
+}
+
+@x = external global i32*		; <i32**> [#uses=1]
+
+define void @foo(i32 %a) nounwind {
+entry:
+	%tmp = load i32** @x		; <i32*> [#uses=1]
+	store i32 %a, i32* %tmp
+	ret void
+}
+
+define void @t3(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK: t3:
+; CHECK: it lt
+; CHECK: poplt {r7, pc}
+	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
+	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	call void @foo( i32 %b )
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-ifcvt2.ll b/final/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
new file mode 100644
index 00000000000..2c5734881d5
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-ifcvt2.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+define void @foo(i32 %X, i32 %Y) {
+entry:
+; CHECK: foo:
+; CHECK: it ne
+; CHECK: cmpne
+; CHECK: it hi
+; CHECK: pophi {r7, pc}
+	%tmp1 = icmp ult i32 %X, 4		; <i1> [#uses=1]
+	%tmp4 = icmp eq i32 %Y, 0		; <i1> [#uses=1]
+	%tmp7 = or i1 %tmp4, %tmp1		; <i1> [#uses=1]
+	br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	%tmp10 = call i32 (...)* @bar( )		; <i32> [#uses=0]
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+declare i32 @bar(...)
+
+; FIXME: Need post-ifcvt branch folding to get rid of the extra br at end of BB1.
+
+	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define fastcc i32 @CountTree(%struct.quad_struct* %tree) {
+entry:
+; CHECK: CountTree:
+; CHECK: it eq
+; CHECK: cmpeq
+; CHECK: bne
+; CHECK: cmp
+; CHECK: itt eq
+; CHECK: moveq
+; CHECK: popeq
+	br label %tailrecurse
+
+tailrecurse:		; preds = %bb, %entry
+	%tmp6 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp9 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=2]
+	%tmp12 = load %struct.quad_struct** null		; <%struct.quad_struct*> [#uses=1]
+	%tmp14 = icmp eq %struct.quad_struct* null, null		; <i1> [#uses=1]
+	%tmp17 = icmp eq %struct.quad_struct* %tmp6, null		; <i1> [#uses=1]
+	%tmp23 = icmp eq %struct.quad_struct* %tmp9, null		; <i1> [#uses=1]
+	%tmp29 = icmp eq %struct.quad_struct* %tmp12, null		; <i1> [#uses=1]
+	%bothcond = and i1 %tmp17, %tmp14		; <i1> [#uses=1]
+	%bothcond1 = and i1 %bothcond, %tmp23		; <i1> [#uses=1]
+	%bothcond2 = and i1 %bothcond1, %tmp29		; <i1> [#uses=1]
+	br i1 %bothcond2, label %return, label %bb
+
+bb:		; preds = %tailrecurse
+	%tmp41 = tail call fastcc i32 @CountTree( %struct.quad_struct* %tmp9 )		; <i32> [#uses=0]
+	br label %tailrecurse
+
+return:		; preds = %tailrecurse
+	ret i32 0
+}
+
+	%struct.SString = type { i8*, i32, i32 }
+
+declare void @abort()
+
+define fastcc void @t1(%struct.SString* %word, i8 signext  %c) {
+entry:
+; CHECK: t1:
+; CHECK: it ne
+; CHECK: popne {r7, pc}
+	%tmp1 = icmp eq %struct.SString* %word, null		; <i1> [#uses=1]
+	br i1 %tmp1, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	tail call void @abort( )
+	unreachable
+
+cond_false:		; preds = %entry
+	ret void
+}
+
+define fastcc void @t2() nounwind {
+entry:
+; CHECK: t2:
+; CHECK: cmp r0, #0
+; CHECK: beq
+	br i1 undef, label %bb.i.i3, label %growMapping.exit
+
+bb.i.i3:		; preds = %entry
+	unreachable
+
+growMapping.exit:		; preds = %entry
+	unreachable
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-ifcvt3.ll b/final/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
new file mode 100644
index 00000000000..bcf10eff729
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-ifcvt3.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+; There shouldn't be a unconditional branch at end of bb52.
+; rdar://7184787
+
+@posed = external global i64                      ; <i64*> [#uses=1]
+
+define i1 @ab_bb52(i64 %.reload78, i64* %.out, i64* %.out1) nounwind {
+newFuncRoot:
+  br label %bb52
+
+bb52.bb55_crit_edge.exitStub:                     ; preds = %bb52
+  store i64 %0, i64* %.out
+  store i64 %2, i64* %.out1
+  ret i1 true
+
+bb52.bb53_crit_edge.exitStub:                     ; preds = %bb52
+  store i64 %0, i64* %.out
+  store i64 %2, i64* %.out1
+  ret i1 false
+
+bb52:                                             ; preds = %newFuncRoot
+; CHECK: movne
+; CHECK: moveq
+; CHECK: pop
+  %0 = load i64* @posed, align 4                  ; <i64> [#uses=3]
+  %1 = sub i64 %0, %.reload78                     ; <i64> [#uses=1]
+  %2 = ashr i64 %1, 1                             ; <i64> [#uses=3]
+  %3 = icmp eq i64 %2, 0                          ; <i1> [#uses=1]
+  br i1 %3, label %bb52.bb55_crit_edge.exitStub, label %bb52.bb53_crit_edge.exitStub
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-jtb.ll b/final/test/CodeGen/Thumb2/thumb2-jtb.ll
new file mode 100644
index 00000000000..f5a56e5ace0
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-jtb.ll
@@ -0,0 +1,120 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 -arm-adjust-jump-tables=0 | not grep tbb
+
+; Do not use tbb / tbh if any destination is before the jumptable.
+; rdar://7102917
+
+define i16 @main__getopt_internal_2E_exit_2E_ce(i32) nounwind {
+newFuncRoot:
+	br label %_getopt_internal.exit.ce
+
+codeRepl127.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 0
+
+parse_options.exit.loopexit.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 1
+
+bb1.i.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 2
+
+bb90.i.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 3
+
+codeRepl104.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 4
+
+codeRepl113.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 5
+
+codeRepl51.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 6
+
+codeRepl70.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 7
+
+codeRepl119.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 8
+
+codeRepl93.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 9
+
+codeRepl101.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 10
+
+codeRepl120.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 11
+
+codeRepl89.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 12
+
+codeRepl45.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 13
+
+codeRepl58.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 14
+
+codeRepl46.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 15
+
+codeRepl50.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 16
+
+codeRepl52.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 17
+
+codeRepl53.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 18
+
+codeRepl61.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 19
+
+codeRepl85.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 20
+
+codeRepl97.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 21
+
+codeRepl79.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 22
+
+codeRepl102.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 23
+
+codeRepl54.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 24
+
+codeRepl57.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 25
+
+codeRepl103.exitStub:		; preds = %_getopt_internal.exit.ce
+	ret i16 26
+
+_getopt_internal.exit.ce:		; preds = %newFuncRoot
+	switch i32 %0, label %codeRepl127.exitStub [
+		i32 -1, label %parse_options.exit.loopexit.exitStub
+		i32 0, label %bb1.i.exitStub
+		i32 63, label %bb90.i.exitStub
+		i32 66, label %codeRepl104.exitStub
+		i32 67, label %codeRepl113.exitStub
+		i32 71, label %codeRepl51.exitStub
+		i32 77, label %codeRepl70.exitStub
+		i32 78, label %codeRepl119.exitStub
+		i32 80, label %codeRepl93.exitStub
+		i32 81, label %codeRepl101.exitStub
+		i32 82, label %codeRepl120.exitStub
+		i32 88, label %codeRepl89.exitStub
+		i32 97, label %codeRepl45.exitStub
+		i32 98, label %codeRepl58.exitStub
+		i32 99, label %codeRepl46.exitStub
+		i32 100, label %codeRepl50.exitStub
+		i32 104, label %codeRepl52.exitStub
+		i32 108, label %codeRepl53.exitStub
+		i32 109, label %codeRepl61.exitStub
+		i32 110, label %codeRepl85.exitStub
+		i32 111, label %codeRepl97.exitStub
+		i32 113, label %codeRepl79.exitStub
+		i32 114, label %codeRepl102.exitStub
+		i32 115, label %codeRepl54.exitStub
+		i32 116, label %codeRepl57.exitStub
+		i32 118, label %codeRepl103.exitStub
+	]
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-ldm.ll b/final/test/CodeGen/Thumb2/thumb2-ldm.ll
new file mode 100644
index 00000000000..c5f7e84c89d
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-ldm.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+
+@X = external global [0 x i32]          ; <[0 x i32]*> [#uses=5]
+
+define i32 @t1() {
+; CHECK: t1:
+; CHECK: push {r7, lr}
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)           ; <i32> [#uses=1]
+        %tmp4 = call i32 @f1( i32 %tmp, i32 %tmp3 )                ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define i32 @t2() {
+; CHECK: t2:
+; CHECK: push {r7, lr}
+; CHECK: ldmia
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4)           ; <i32> [#uses=1]
+        %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+define i32 @t3() {
+; CHECK: t3:
+; CHECK: push {r7, lr}
+; CHECK: pop {r7, pc}
+        %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2)           ; <i32> [#uses=1]
+        %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3)           ; <i32> [#uses=1]
+        %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 )             ; <i32> [#uses=1]
+        ret i32 %tmp6
+}
+
+declare i32 @f1(i32, i32)
+
+declare i32 @f2(i32, i32, i32)
diff --git a/final/test/CodeGen/Thumb2/thumb2-ldr.ll b/final/test/CodeGen/Thumb2/thumb2-ldr.ll
new file mode 100644
index 00000000000..88434f1c7d8
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-ldr.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32* %v) {
+entry:
+; CHECK: f1:
+; CHECK: ldr r0, [r0]
+        %tmp = load i32* %v
+        ret i32 %tmp
+}
+
+define i32 @f2(i32* %v) {
+entry:
+; CHECK: f2:
+; CHECK: ldr.w r0, [r0, #4092]
+        %tmp2 = getelementptr i32* %v, i32 1023
+        %tmp = load i32* %tmp2
+        ret i32 %tmp
+}
+
+define i32 @f3(i32* %v) {
+entry:
+; CHECK: f3:
+; CHECK: mov.w r1, #4096
+; CHECK: ldr r0, [r0, r1]
+        %tmp2 = getelementptr i32* %v, i32 1024
+        %tmp = load i32* %tmp2
+        ret i32 %tmp
+}
+
+define i32 @f4(i32 %base) {
+entry:
+; CHECK: f4:
+; CHECK: ldr r0, [r0, #-128]
+        %tmp1 = sub i32 %base, 128
+        %tmp2 = inttoptr i32 %tmp1 to i32*
+        %tmp3 = load i32* %tmp2
+        ret i32 %tmp3
+}
+
+define i32 @f5(i32 %base, i32 %offset) {
+entry:
+; CHECK: f5:
+; CHECK: ldr r0, [r0, r1]
+        %tmp1 = add i32 %base, %offset
+        %tmp2 = inttoptr i32 %tmp1 to i32*
+        %tmp3 = load i32* %tmp2
+        ret i32 %tmp3
+}
+
+define i32 @f6(i32 %base, i32 %offset) {
+entry:
+; CHECK: f6:
+; CHECK: ldr.w r0, [r0, r1, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i32*
+        %tmp4 = load i32* %tmp3
+        ret i32 %tmp4
+}
+
+define i32 @f7(i32 %base, i32 %offset) {
+entry:
+; CHECK: f7:
+; CHECK: lsrs r1, r1, #2
+; CHECK: ldr r0, [r0, r1]
+
+        %tmp1 = lshr i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i32*
+        %tmp4 = load i32* %tmp3
+        ret i32 %tmp4
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-ldr_ext.ll b/final/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
new file mode 100644
index 00000000000..9e6aef4e097
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-ldr_ext.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrb | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrh | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrsb | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep ldrsh | count 1
+
+define i32 @test1(i8* %v.pntr.s0.u1) {
+    %tmp.u = load i8* %v.pntr.s0.u1
+    %tmp1.s = zext i8 %tmp.u to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test2(i16* %v.pntr.s0.u1) {
+    %tmp.u = load i16* %v.pntr.s0.u1
+    %tmp1.s = zext i16 %tmp.u to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test3(i8* %v.pntr.s1.u0) {
+    %tmp.s = load i8* %v.pntr.s1.u0
+    %tmp1.s = sext i8 %tmp.s to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test4() {
+    %tmp.s = load i16* null
+    %tmp1.s = sext i16 %tmp.s to i32
+    ret i32 %tmp1.s
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-ldr_post.ll b/final/test/CodeGen/Thumb2/thumb2-ldr_post.ll
new file mode 100644
index 00000000000..d1af4ba47fe
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-ldr_post.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep {ldr.*\\\[.*\],} | count 1
+
+define i32 @test(i32 %a, i32 %b, i32 %c) {
+        %tmp1 = mul i32 %a, %b          ; <i32> [#uses=2]
+        %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
+        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]
+        %tmp4 = sub i32 %tmp1, 8               ; <i32> [#uses=1]
+        %tmp5 = mul i32 %tmp4, %tmp3            ; <i32> [#uses=1]
+        ret i32 %tmp5
+}
+
diff --git a/final/test/CodeGen/Thumb2/thumb2-ldr_pre.ll b/final/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
new file mode 100644
index 00000000000..9cc3f4a2eda
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-ldr_pre.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep {ldr.*\\!} | count 3
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | \
+; RUN:   grep {ldrsb.*\\!} | count 1
+
+define i32* @test1(i32* %X, i32* %dest) {
+        %Y = getelementptr i32* %X, i32 4               ; <i32*> [#uses=2]
+        %A = load i32* %Y               ; <i32> [#uses=1]
+        store i32 %A, i32* %dest
+        ret i32* %Y
+}
+
+define i32 @test2(i32 %a, i32 %b) {
+        %tmp1 = sub i32 %a, 64          ; <i32> [#uses=2]
+        %tmp2 = inttoptr i32 %tmp1 to i32*              ; <i32*> [#uses=1]
+        %tmp3 = load i32* %tmp2         ; <i32> [#uses=1]
+        %tmp4 = sub i32 %tmp1, %b               ; <i32> [#uses=1]
+        %tmp5 = add i32 %tmp4, %tmp3            ; <i32> [#uses=1]
+        ret i32 %tmp5
+}
+
+define i8* @test3(i8* %X, i32* %dest) {
+        %tmp1 = getelementptr i8* %X, i32 4
+        %tmp2 = load i8* %tmp1
+        %tmp3 = sext i8 %tmp2 to i32
+        store i32 %tmp3, i32* %dest
+        ret i8* %tmp1
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-ldrb.ll b/final/test/CodeGen/Thumb2/thumb2-ldrb.ll
new file mode 100644
index 00000000000..bf1009743af
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-ldrb.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i8 @f1(i8* %v) {
+entry:
+; CHECK: f1:
+; CHECK: ldrb r0, [r0]
+        %tmp = load i8* %v
+        ret i8 %tmp
+}
+
+define i8 @f2(i8* %v) {
+entry:
+; CHECK: f2:
+; CHECK: ldrb r0, [r0, #-1]
+        %tmp2 = getelementptr i8* %v, i8 1023
+        %tmp = load i8* %tmp2
+        ret i8 %tmp
+}
+
+define i8 @f3(i32 %base) {
+entry:
+; CHECK: f3:
+; CHECK: mov.w r1, #4096
+; CHECK: ldrb r0, [r0, r1]
+        %tmp1 = add i32 %base, 4096
+        %tmp2 = inttoptr i32 %tmp1 to i8*
+        %tmp3 = load i8* %tmp2
+        ret i8 %tmp3
+}
+
+define i8 @f4(i32 %base) {
+entry:
+; CHECK: f4:
+; CHECK: ldrb r0, [r0, #-128]
+        %tmp1 = sub i32 %base, 128
+        %tmp2 = inttoptr i32 %tmp1 to i8*
+        %tmp3 = load i8* %tmp2
+        ret i8 %tmp3
+}
+
+define i8 @f5(i32 %base, i32 %offset) {
+entry:
+; CHECK: f5:
+; CHECK: ldrb r0, [r0, r1]
+        %tmp1 = add i32 %base, %offset
+        %tmp2 = inttoptr i32 %tmp1 to i8*
+        %tmp3 = load i8* %tmp2
+        ret i8 %tmp3
+}
+
+define i8 @f6(i32 %base, i32 %offset) {
+entry:
+; CHECK: f6:
+; CHECK: ldrb.w r0, [r0, r1, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i8*
+        %tmp4 = load i8* %tmp3
+        ret i8 %tmp4
+}
+
+define i8 @f7(i32 %base, i32 %offset) {
+entry:
+; CHECK: f7:
+; CHECK: lsrs r1, r1, #2
+; CHECK: ldrb r0, [r0, r1]
+        %tmp1 = lshr i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i8*
+        %tmp4 = load i8* %tmp3
+        ret i8 %tmp4
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-ldrd.ll b/final/test/CodeGen/Thumb2/thumb2-ldrd.ll
new file mode 100644
index 00000000000..a747d5f7569
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-ldrd.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+
+@b = external global i64*
+
+define i64 @t(i64 %a) nounwind readonly {
+entry:
+;CHECK: ldrd r2, r3, [r2]
+	%0 = load i64** @b, align 4
+	%1 = load i64* %0, align 4
+	%2 = mul i64 %1, %a
+	ret i64 %2
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-ldrh.ll b/final/test/CodeGen/Thumb2/thumb2-ldrh.ll
new file mode 100644
index 00000000000..fee97bf6891
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-ldrh.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i16 @f1(i16* %v) {
+entry:
+; CHECK: f1:
+; CHECK: ldrh r0, [r0]
+        %tmp = load i16* %v
+        ret i16 %tmp
+}
+
+define i16 @f2(i16* %v) {
+entry:
+; CHECK: f2:
+; CHECK: ldrh.w r0, [r0, #2046]
+        %tmp2 = getelementptr i16* %v, i16 1023
+        %tmp = load i16* %tmp2
+        ret i16 %tmp
+}
+
+define i16 @f3(i16* %v) {
+entry:
+; CHECK: f3:
+; CHECK: mov.w r1, #4096
+; CHECK: ldrh r0, [r0, r1]
+        %tmp2 = getelementptr i16* %v, i16 2048
+        %tmp = load i16* %tmp2
+        ret i16 %tmp
+}
+
+define i16 @f4(i32 %base) {
+entry:
+; CHECK: f4:
+; CHECK: ldrh r0, [r0, #-128]
+        %tmp1 = sub i32 %base, 128
+        %tmp2 = inttoptr i32 %tmp1 to i16*
+        %tmp3 = load i16* %tmp2
+        ret i16 %tmp3
+}
+
+define i16 @f5(i32 %base, i32 %offset) {
+entry:
+; CHECK: f5:
+; CHECK: ldrh r0, [r0, r1]
+        %tmp1 = add i32 %base, %offset
+        %tmp2 = inttoptr i32 %tmp1 to i16*
+        %tmp3 = load i16* %tmp2
+        ret i16 %tmp3
+}
+
+define i16 @f6(i32 %base, i32 %offset) {
+entry:
+; CHECK: f6:
+; CHECK: ldrh.w r0, [r0, r1, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i16*
+        %tmp4 = load i16* %tmp3
+        ret i16 %tmp4
+}
+
+define i16 @f7(i32 %base, i32 %offset) {
+entry:
+; CHECK: f7:
+; CHECK: lsrs r1, r1, #2
+; CHECK: ldrh r0, [r0, r1]
+        %tmp1 = lshr i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i16*
+        %tmp4 = load i16* %tmp3
+        ret i16 %tmp4
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-lsl.ll b/final/test/CodeGen/Thumb2/thumb2-lsl.ll
new file mode 100644
index 00000000000..6b0818a34b9
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-lsl.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: lsls r0, r0, #5
+    %tmp = shl i32 %a, 5
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-lsl2.ll b/final/test/CodeGen/Thumb2/thumb2-lsl2.ll
new file mode 100644
index 00000000000..f283eef89a3
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-lsl2.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: lsls r0, r1
+    %tmp = shl i32 %a, %b
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-lsr.ll b/final/test/CodeGen/Thumb2/thumb2-lsr.ll
new file mode 100644
index 00000000000..7cbee54f381
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-lsr.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: lsrs r0, r0, #13
+    %tmp = lshr i32 %a, 13
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-lsr2.ll b/final/test/CodeGen/Thumb2/thumb2-lsr2.ll
new file mode 100644
index 00000000000..87800f9d73f
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-lsr2.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: lsrs r0, r1
+    %tmp = lshr i32 %a, %b
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-lsr3.ll b/final/test/CodeGen/Thumb2/thumb2-lsr3.ll
new file mode 100644
index 00000000000..5cfd3f5198b
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-lsr3.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2
+
+define i1 @test1(i64 %poscnt, i32 %work) {
+entry:
+; CHECK: rrx r0, r0
+; CHECK: lsrs.w r1, r1, #1
+	%0 = lshr i64 %poscnt, 1
+	%1 = icmp eq i64 %0, 0
+	ret i1 %1
+}
+
+define i1 @test2(i64 %poscnt, i32 %work) {
+entry:
+; CHECK: rrx r0, r0
+; CHECK: asrs.w r1, r1, #1
+	%0 = ashr i64 %poscnt, 1
+	%1 = icmp eq i64 %0, 0
+	ret i1 %1
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-mla.ll b/final/test/CodeGen/Thumb2/thumb2-mla.ll
new file mode 100644
index 00000000000..c4cc749ea5c
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-mla.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = add i32 %c, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f1:
+; CHECK: 	mla	r0, r0, r1, r2
+
+define i32 @f2(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = add i32 %tmp1, %c
+    ret i32 %tmp2
+}
+; CHECK: f2:
+; CHECK: 	mla	r0, r0, r1, r2
diff --git a/final/test/CodeGen/Thumb2/thumb2-mls.ll b/final/test/CodeGen/Thumb2/thumb2-mls.ll
new file mode 100644
index 00000000000..fc9e6bab48c
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-mls.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = sub i32 %c, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f1:
+; CHECK: 	mls	r0, r0, r1, r2
+
+; sub doesn't commute, so no mls for this one
+define i32 @f2(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = sub i32 %tmp1, %c
+    ret i32 %tmp2
+}
+; CHECK: f2:
+; CHECK: 	muls	r0, r1
+
diff --git a/final/test/CodeGen/Thumb2/thumb2-mov.ll b/final/test/CodeGen/Thumb2/thumb2-mov.ll
new file mode 100644
index 00000000000..adb6dde2c78
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-mov.ll
@@ -0,0 +1,266 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; Test #<const>
+
+; var 2.1 - 0x00ab00ab
+define i32 @t2_const_var2_1_ok_1(i32 %lhs) {
+;CHECK: t2_const_var2_1_ok_1:
+;CHECK: add.w   r0, r0, #11206827
+    %ret = add i32 %lhs, 11206827 ; 0x00ab00ab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_1_ok_2(i32 %lhs) {
+;CHECK: t2_const_var2_1_ok_2:
+;CHECK: add.w   r0, r0, #11206656
+;CHECK: adds    r0, #187
+    %ret = add i32 %lhs, 11206843 ; 0x00ab00bb
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_1_ok_3(i32 %lhs) {
+;CHECK: t2_const_var2_1_ok_3:
+;CHECK: add.w   r0, r0, #11206827
+;CHECK: add.w   r0, r0, #16777216
+    %ret = add i32 %lhs, 27984043 ; 0x01ab00ab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_1_ok_4(i32 %lhs) {
+;CHECK: t2_const_var2_1_ok_4:
+;CHECK: add.w   r0, r0, #16777472
+;CHECK: add.w   r0, r0, #11206827
+    %ret = add i32 %lhs, 27984299 ; 0x01ab01ab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_1_fail_1(i32 %lhs) {
+;CHECK: t2_const_var2_1_fail_1:
+;CHECK: movw    r1, #43777
+;CHECK: movt    r1, #427
+;CHECK: add     r0, r1
+    %ret = add i32 %lhs, 28027649 ; 0x01abab01
+    ret i32 %ret
+}
+
+; var 2.2 - 0xab00ab00
+define i32 @t2_const_var2_2_ok_1(i32 %lhs) {
+;CHECK: t2_const_var2_2_ok_1:
+;CHECK: add.w   r0, r0, #-1426019584
+    %ret = add i32 %lhs, 2868947712 ; 0xab00ab00
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_2_ok_2(i32 %lhs) {
+;CHECK: t2_const_var2_2_ok_2:
+;CHECK: add.w   r0, r0, #2868903936
+;CHECK: add.w   r0, r0, #47616
+    %ret = add i32 %lhs, 2868951552 ; 0xab00ba00
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_2_ok_3(i32 %lhs) {
+;CHECK: t2_const_var2_2_ok_3:
+;CHECK: add.w   r0, r0, #2868947712
+;CHECK: adds    r0, #16
+    %ret = add i32 %lhs, 2868947728 ; 0xab00ab10
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_2_ok_4(i32 %lhs) {
+;CHECK: t2_const_var2_2_ok_4:
+;CHECK: add.w   r0, r0, #2868947712
+;CHECK: add.w   r0, r0, #1048592
+    %ret = add i32 %lhs, 2869996304 ; 0xab10ab10
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_2_fail_1(i32 %lhs) {
+;CHECK: t2_const_var2_2_fail_1:
+;CHECK: movw    r1, #43792
+;CHECK: movt    r1, #4267
+;CHECK: add     r0, r1
+    %ret = add i32 %lhs, 279685904 ; 0x10abab10
+    ret i32 %ret
+}
+
+; var 2.3 - 0xabababab
+define i32 @t2_const_var2_3_ok_1(i32 %lhs) {
+;CHECK: t2_const_var2_3_ok_1:
+;CHECK: add.w   r0, r0, #-1414812757
+    %ret = add i32 %lhs, 2880154539 ; 0xabababab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_3_fail_1(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_1:
+;CHECK: movw    r1, #43962
+;CHECK: movt    r1, #43947
+;CHECK: add     r0, r1
+    %ret = add i32 %lhs, 2880154554 ; 0xabababba
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_3_fail_2(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_2:
+;CHECK: movw    r1, #47787
+;CHECK: movt    r1, #43947
+;CHECK: add     r0, r1
+    %ret = add i32 %lhs, 2880158379 ; 0xababbaab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_3_fail_3(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_3:
+;CHECK: movw    r1, #43947
+;CHECK: movt    r1, #43962
+;CHECK: add     r0, r1
+    %ret = add i32 %lhs, 2881137579 ; 0xabbaabab
+    ret i32 %ret
+}
+
+define i32 @t2_const_var2_3_fail_4(i32 %lhs) {
+;CHECK: t2_const_var2_3_fail_4:
+;CHECK: movw    r1, #43947
+;CHECK: movt    r1, #47787
+;CHECK: add     r0, r1
+    %ret = add i32 %lhs, 3131812779 ; 0xbaababab
+    ret i32 %ret
+}
+
+; var 3 - 0x0F000000
+define i32 @t2_const_var3_1_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_1_ok_1:
+;CHECK: add.w   r0, r0, #251658240
+    %ret = add i32 %lhs, 251658240 ; 0x0F000000
+    ret i32 %ret
+}
+
+define i32 @t2_const_var3_2_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_2_ok_1:
+;CHECK: add.w   r0, r0, #3948544
+    %ret = add i32 %lhs, 3948544 ; 0b00000000001111000100000000000000
+    ret i32 %ret
+}
+
+define i32 @t2_const_var3_2_ok_2(i32 %lhs) {
+;CHECK: t2_const_var3_2_ok_2:
+;CHECK: add.w   r0, r0, #2097152
+;CHECK: add.w   r0, r0, #1843200
+    %ret = add i32 %lhs, 3940352 ; 0b00000000001111000010000000000000
+    ret i32 %ret
+}
+
+define i32 @t2_const_var3_3_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_3_ok_1:
+;CHECK: add.w   r0, r0, #258
+    %ret = add i32 %lhs, 258 ; 0b00000000000000000000000100000010
+    ret i32 %ret
+}
+
+define i32 @t2_const_var3_4_ok_1(i32 %lhs) {
+;CHECK: t2_const_var3_4_ok_1:
+;CHECK: add.w   r0, r0, #-268435456
+    %ret = add i32 %lhs, 4026531840 ; 0xF0000000
+    ret i32 %ret
+}
+
+define i32 @t2MOVTi16_ok_1(i32 %a) {
+; CHECK: t2MOVTi16_ok_1:
+; CHECK: movt r0, #1234
+    %1 = and i32 %a, 65535
+    %2 = shl i32 1234, 16
+    %3 = or  i32 %1, %2
+
+    ret i32 %3
+}
+
+define i32 @t2MOVTi16_test_1(i32 %a) {
+; CHECK: t2MOVTi16_test_1:
+; CHECK: movt r0, #1234
+    %1 = shl i32  255,   8
+    %2 = shl i32 1234,   8
+    %3 = or  i32   %1, 255  ; This gives us 0xFFFF in %3
+    %4 = shl i32   %2,   8  ; This gives us (1234 << 16) in %4
+    %5 = and i32   %a,  %3
+    %6 = or  i32   %4,  %5
+
+    ret i32 %6
+}
+
+define i32 @t2MOVTi16_test_2(i32 %a) {
+; CHECK: t2MOVTi16_test_2:
+; CHECK: movt r0, #1234
+    %1 = shl i32  255,   8
+    %2 = shl i32 1234,   8
+    %3 = or  i32   %1, 255  ; This gives us 0xFFFF in %3
+    %4 = shl i32   %2,   6
+    %5 = and i32   %a,  %3
+    %6 = shl i32   %4,   2  ; This gives us (1234 << 16) in %6
+    %7 = or  i32   %5,  %6
+
+    ret i32 %7
+}
+
+define i32 @t2MOVTi16_test_3(i32 %a) {
+; CHECK: t2MOVTi16_test_3:
+; CHECK: movt r0, #1234
+    %1 = shl i32  255,   8
+    %2 = shl i32 1234,   8
+    %3 = or  i32   %1, 255  ; This gives us 0xFFFF in %3
+    %4 = shl i32   %2,   6
+    %5 = and i32   %a,  %3
+    %6 = shl i32   %4,   2  ; This gives us (1234 << 16) in %6
+    %7 = lshr i32  %6,   6
+    %8 = shl i32   %7,   6
+    %9 = or  i32   %5,  %8
+
+    ret i32 %8
+}
+
+; 171 = 0x000000ab
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: movs r0, #171
+    %tmp = add i32 0, 171
+    ret i32 %tmp
+}
+
+; 1179666 = 0x00120012
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: mov.w r0, #1179666
+    %tmp = add i32 0, 1179666
+    ret i32 %tmp
+}
+
+; 872428544 = 0x34003400
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: mov.w r0, #872428544
+    %tmp = add i32 0, 872428544
+    ret i32 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: mov.w r0, #1448498774
+    %tmp = add i32 0, 1448498774
+    ret i32 %tmp
+}
+
+; 66846720 = 0x03fc0000
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: mov.w r0, #66846720
+    %tmp = add i32 0, 66846720
+    ret i32 %tmp
+}
+
+define i32 @f6(i32 %a) {
+;CHECK: f6
+;CHECK: movw    r0, #65535
+    %tmp = add i32 0, 65535
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-mul.ll b/final/test/CodeGen/Thumb2/thumb2-mul.ll
new file mode 100644
index 00000000000..8d1de55b4dc
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+; CHECK: f1:
+; CHECK: muls r0, r1
+    %tmp = mul i32 %a, %b
+    ret i32 %tmp
+}
+
+%struct.CMPoint = type { %struct.Point, float, float, [5 x float] }
+%struct.Point = type { float, float }
+
+define %struct.CMPoint* @t1(i32 %i, i32 %j, i32 %n, %struct.CMPoint* %thePoints) nounwind readnone ssp {
+entry:
+; CHECK: t1:
+; CHECK: mla     r0, r2, r0, r1
+; CHECK: add.w   r0, r0, r0, lsl #3
+; CHECL: add.w   r0, r3, r0, lsl #2
+  %mul = mul i32 %n, %i
+  %add = add i32 %mul, %j
+  %0 = ptrtoint %struct.CMPoint* %thePoints to i32
+  %mul5 = mul i32 %add, 36
+  %add6 = add i32 %mul5, %0
+  %1 = inttoptr i32 %add6 to %struct.CMPoint*
+  ret %struct.CMPoint* %1
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-mulhi.ll b/final/test/CodeGen/Thumb2/thumb2-mulhi.ll
new file mode 100644
index 00000000000..5d47770aed3
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-mulhi.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep smmul | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep umull | count 1
+
+define i32 @smulhi(i32 %x, i32 %y) {
+        %tmp = sext i32 %x to i64               ; <i64> [#uses=1]
+        %tmp1 = sext i32 %y to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        %tmp3 = lshr i64 %tmp2, 32              ; <i64> [#uses=1]
+        %tmp3.upgrd.1 = trunc i64 %tmp3 to i32          ; <i32> [#uses=1]
+        ret i32 %tmp3.upgrd.1
+}
+
+define i32 @umulhi(i32 %x, i32 %y) {
+        %tmp = zext i32 %x to i64               ; <i64> [#uses=1]
+        %tmp1 = zext i32 %y to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        %tmp3 = lshr i64 %tmp2, 32              ; <i64> [#uses=1]
+        %tmp3.upgrd.2 = trunc i64 %tmp3 to i32          ; <i32> [#uses=1]
+        ret i32 %tmp3.upgrd.2
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-mvn.ll b/final/test/CodeGen/Thumb2/thumb2-mvn.ll
new file mode 100644
index 00000000000..a8c8f831c75
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-mvn.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+
+; 0x000000bb = 187
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: mvn r0, #187
+    %tmp = xor i32 4294967295, 187
+    ret i32 %tmp
+}
+
+; 0x00aa00aa = 11141290
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: mvn r0, #11141290
+    %tmp = xor i32 4294967295, 11141290 
+    ret i32 %tmp
+}
+
+; 0xcc00cc00 = 3422604288
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: mvn r0, #-872363008
+    %tmp = xor i32 4294967295, 3422604288
+    ret i32 %tmp
+}
+
+; 0x00110000 = 1114112
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: mvn r0, #1114112
+    %tmp = xor i32 4294967295, 1114112
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-mvn2.ll b/final/test/CodeGen/Thumb2/thumb2-mvn2.ll
new file mode 100644
index 00000000000..375d0aad502
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-mvn2.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: mvns r0, r0
+    %tmp = xor i32 4294967295, %a
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: mvns r0, r0
+    %tmp = xor i32 %a, 4294967295
+    ret i32 %tmp
+}
+
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: mvn.w r0, r0, lsl #5
+    %tmp = shl i32 %a, 5
+    %tmp1 = xor i32 %tmp, 4294967295
+    ret i32 %tmp1
+}
+
+define i32 @f6(i32 %a) {
+; CHECK: f6:
+; CHECK: mvn.w r0, r0, lsr #6
+    %tmp = lshr i32 %a, 6
+    %tmp1 = xor i32 %tmp, 4294967295
+    ret i32 %tmp1
+}
+
+define i32 @f7(i32 %a) {
+; CHECK: f7:
+; CHECK: mvn.w r0, r0, asr #7
+    %tmp = ashr i32 %a, 7
+    %tmp1 = xor i32 %tmp, 4294967295
+    ret i32 %tmp1
+}
+
+define i32 @f8(i32 %a) {
+; CHECK: f8:
+; CHECK: mvn.w r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = xor i32 %tmp, 4294967295
+    ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-neg.ll b/final/test/CodeGen/Thumb2/thumb2-neg.ll
new file mode 100644
index 00000000000..6bf11ec9062
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-neg.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: rsbs r0, r0, #0
+    %tmp = sub i32 0, %a
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-orn.ll b/final/test/CodeGen/Thumb2/thumb2-orn.ll
new file mode 100644
index 00000000000..97a3fd75f06
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-orn.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = or i32 %a, %tmp
+    ret i32 %tmp1
+}
+; CHECK: f1:
+; CHECK: 	orn	r0, r0, r1
+
+define i32 @f2(i32 %a, i32 %b) {
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = or i32 %tmp, %a
+    ret i32 %tmp1
+}
+; CHECK: f2:
+; CHECK: 	orn	r0, r0, r1
+
+define i32 @f3(i32 %a, i32 %b) {
+    %tmp = xor i32 4294967295, %b
+    %tmp1 = or i32 %a, %tmp
+    ret i32 %tmp1
+}
+; CHECK: f3:
+; CHECK: 	orn	r0, r0, r1
+
+define i32 @f4(i32 %a, i32 %b) {
+    %tmp = xor i32 4294967295, %b
+    %tmp1 = or i32 %tmp, %a
+    ret i32 %tmp1
+}
+; CHECK: f4:
+; CHECK: 	orn	r0, r0, r1
+
+define i32 @f5(i32 %a, i32 %b) {
+    %tmp = shl i32 %b, 5
+    %tmp1 = xor i32 4294967295, %tmp
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f5:
+; CHECK: 	orn	r0, r0, r1, lsl #5
+
+define i32 @f6(i32 %a, i32 %b) {
+    %tmp = lshr i32 %b, 6
+    %tmp1 = xor i32 4294967295, %tmp
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f6:
+; CHECK: 	orn	r0, r0, r1, lsr #6
+
+define i32 @f7(i32 %a, i32 %b) {
+    %tmp = ashr i32 %b, 7
+    %tmp1 = xor i32 4294967295, %tmp
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f7:
+; CHECK: 	orn	r0, r0, r1, asr #7
+
+define i32 @f8(i32 %a, i32 %b) {
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = xor i32 4294967295, %tmp
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f8:
+; CHECK: 	orn	r0, r0, r0, ror #8
diff --git a/final/test/CodeGen/Thumb2/thumb2-orn2.ll b/final/test/CodeGen/Thumb2/thumb2-orn2.ll
new file mode 100644
index 00000000000..34ab3a56663
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-orn2.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+
+; 0x000000bb = 187
+define i32 @f1(i32 %a) {
+    %tmp1 = xor i32 4294967295, 187
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f1:
+; CHECK: 	orn	r0, r0, #187
+
+; 0x00aa00aa = 11141290
+define i32 @f2(i32 %a) {
+    %tmp1 = xor i32 4294967295, 11141290 
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f2:
+; CHECK: 	orn	r0, r0, #11141290
+
+; 0xcc00cc00 = 3422604288
+define i32 @f3(i32 %a) {
+    %tmp1 = xor i32 4294967295, 3422604288
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f3:
+; CHECK: 	orn	r0, r0, #-872363008
+
+; 0x00110000 = 1114112
+define i32 @f5(i32 %a) {
+    %tmp1 = xor i32 4294967295, 1114112
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+; CHECK: f5:
+; CHECK: 	orn	r0, r0, #1114112
diff --git a/final/test/CodeGen/Thumb2/thumb2-orr.ll b/final/test/CodeGen/Thumb2/thumb2-orr.ll
new file mode 100644
index 00000000000..89ab7b1edf7
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-orr.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: orrs r0, r1
+    %tmp2 = or i32 %a, %b
+    ret i32 %tmp2
+}
+
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: orr.w r0, r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp2 = or i32 %a, %tmp
+    ret i32 %tmp2
+}
+
+define i32 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: orr.w r0, r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp2 = or i32 %a, %tmp
+    ret i32 %tmp2
+}
+
+define i32 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: orr.w r0, r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp2 = or i32 %a, %tmp
+    ret i32 %tmp2
+}
+
+define i32 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: orr.w r0, r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp2 = or i32 %a, %tmp
+    ret i32 %tmp2
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-orr2.ll b/final/test/CodeGen/Thumb2/thumb2-orr2.ll
new file mode 100644
index 00000000000..8f7a3c2a61a
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-orr2.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+
+; 0x000000bb = 187
+define i32 @f1(i32 %a) {
+    %tmp2 = or i32 %a, 187
+    ret i32 %tmp2
+}
+; CHECK: f1:
+; CHECK: 	orr	r0, r0, #187
+
+; 0x00aa00aa = 11141290
+define i32 @f2(i32 %a) {
+    %tmp2 = or i32 %a, 11141290 
+    ret i32 %tmp2
+}
+; CHECK: f2:
+; CHECK: 	orr	r0, r0, #11141290
+
+; 0xcc00cc00 = 3422604288
+define i32 @f3(i32 %a) {
+    %tmp2 = or i32 %a, 3422604288
+    ret i32 %tmp2
+}
+; CHECK: f3:
+; CHECK: 	orr	r0, r0, #-872363008
+
+; 0x44444444 = 1145324612
+define i32 @f4(i32 %a) {
+    %tmp2 = or i32 %a, 1145324612
+    ret i32 %tmp2
+}
+; CHECK: f4:
+; CHECK: 	orr	r0, r0, #1145324612
+
+; 0x00110000 = 1114112
+define i32 @f5(i32 %a) {
+    %tmp2 = or i32 %a, 1114112
+    ret i32 %tmp2
+}
+; CHECK: f5:
+; CHECK: 	orr	r0, r0, #1114112
diff --git a/final/test/CodeGen/Thumb2/thumb2-pack.ll b/final/test/CodeGen/Thumb2/thumb2-pack.ll
new file mode 100644
index 00000000000..2e8bb1d6093
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-pack.ll
@@ -0,0 +1,97 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+
+; CHECK: test1
+; CHECK: pkhbt   r0, r0, r1, lsl #16
+define i32 @test1(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp4 = shl i32 %Y, 16		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+; CHECK: test1a
+; CHECK: pkhbt   r0, r0, r1, lsl #16
+define i32 @test1a(i32 %X, i32 %Y) {
+	%tmp19 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp37 = shl i32 %Y, 16		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp37, %tmp19		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+; CHECK: test2
+; CHECK: pkhbt   r0, r0, r1, lsl #12
+define i32 @test2(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp3 = shl i32 %Y, 12		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp3, -65536		; <i32> [#uses=1]
+	%tmp57 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp57
+}
+
+; CHECK: test3
+; CHECK: pkhbt   r0, r0, r1, lsl #18
+define i32 @test3(i32 %X, i32 %Y) {
+	%tmp19 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp37 = shl i32 %Y, 18		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp37, %tmp19		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+; CHECK: test4
+; CHECK: pkhbt   r0, r0, r1
+define i32 @test4(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, 65535		; <i32> [#uses=1]
+	%tmp3 = and i32 %Y, -65536		; <i32> [#uses=1]
+	%tmp46 = or i32 %tmp3, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp46
+}
+
+; CHECK: test5
+; CHECK: pkhtb   r0, r0, r1, asr #16
+define i32 @test5(i32 %X, i32 %Y) {
+	%tmp17 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp2 = bitcast i32 %Y to i32		; <i32> [#uses=1]
+	%tmp4 = lshr i32 %tmp2, 16		; <i32> [#uses=2]
+	%tmp5 = or i32 %tmp4, %tmp17		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+; CHECK: test5a
+; CHECK: pkhtb   r0, r0, r1, asr #16
+define i32 @test5a(i32 %X, i32 %Y) {
+	%tmp110 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp37 = lshr i32 %Y, 16		; <i32> [#uses=1]
+	%tmp39 = bitcast i32 %tmp37 to i32		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp39, %tmp110		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+; CHECK: test6
+; CHECK: pkhtb   r0, r0, r1, asr #12
+define i32 @test6(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp37 = lshr i32 %Y, 12		; <i32> [#uses=1]
+	%tmp38 = bitcast i32 %tmp37 to i32		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp38, 65535		; <i32> [#uses=1]
+	%tmp59 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp59
+}
+
+; CHECK: test7
+; CHECK: pkhtb   r0, r0, r1, asr #18
+define i32 @test7(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536		; <i32> [#uses=1]
+	%tmp3 = ashr i32 %Y, 18		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp3, 65535		; <i32> [#uses=1]
+	%tmp57 = or i32 %tmp4, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp57
+}
+
+; CHECK: test8
+; CHECK: pkhtb   r0, r0, r1, asr #22
+define i32 @test8(i32 %X, i32 %Y) {
+	%tmp1 = and i32 %X, -65536
+	%tmp3 = lshr i32 %Y, 22
+	%tmp57 = or i32 %tmp3, %tmp1
+	ret i32 %tmp57
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-rev.ll b/final/test/CodeGen/Thumb2/thumb2-rev.ll
new file mode 100644
index 00000000000..2cee2e3d6fc
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-rev.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7a,+t2xtpk | FileCheck %s
+
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: rev r0, r0
+    %tmp = tail call i32 @llvm.bswap.i32(i32 %a)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+
+define i32 @f2(i32 %X) {
+; CHECK: f2:
+; CHECK: revsh r0, r0
+        %tmp1 = lshr i32 %X, 8
+        %tmp1.upgrd.1 = trunc i32 %tmp1 to i16
+        %tmp3 = trunc i32 %X to i16
+        %tmp2 = and i16 %tmp1.upgrd.1, 255
+        %tmp4 = shl i16 %tmp3, 8
+        %tmp5 = or i16 %tmp2, %tmp4
+        %tmp5.upgrd.2 = sext i16 %tmp5 to i32
+        ret i32 %tmp5.upgrd.2
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-rev16.ll b/final/test/CodeGen/Thumb2/thumb2-rev16.ll
new file mode 100644
index 00000000000..39b6ac3f002
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-rev16.ll
@@ -0,0 +1,32 @@
+; XFAIL: *
+; fixme rev16 pattern is not matching
+
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rev16\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1
+
+; 0xff00ff00 = 4278255360
+; 0x00ff00ff = 16711935
+define i32 @f1(i32 %a) {
+    %l8 = shl i32 %a, 8
+    %r8 = lshr i32 %a, 8
+    %mask_l8 = and i32 %l8, 4278255360
+    %mask_r8 = and i32 %r8, 16711935
+    %tmp = or i32 %mask_l8, %mask_r8
+    ret i32 %tmp
+}
+
+; 0xff000000 = 4278190080
+; 0x00ff0000 = 16711680
+; 0x0000ff00 = 65280
+; 0x000000ff = 255
+define i32 @f2(i32 %a) {
+    %l8 = shl i32 %a, 8
+    %r8 = lshr i32 %a, 8
+    %masklo_l8 = and i32 %l8, 65280
+    %maskhi_l8 = and i32 %l8, 4278190080
+    %masklo_r8 = and i32 %r8, 255
+    %maskhi_r8 = and i32 %r8, 16711680
+    %tmp1 = or i32 %masklo_l8, %masklo_r8
+    %tmp2 = or i32 %maskhi_l8, %maskhi_r8
+    %tmp = or i32 %tmp1, %tmp2
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-ror.ll b/final/test/CodeGen/Thumb2/thumb2-ror.ll
new file mode 100644
index 00000000000..0200116fc31
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-ror.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+
+define i32 @f1(i32 %a) {
+    %l8 = shl i32 %a, 10
+    %r8 = lshr i32 %a, 22
+    %tmp = or i32 %l8, %r8
+    ret i32 %tmp
+}
+; CHECK: f1:
+; CHECK: 	ror.w	r0, r0, #22
diff --git a/final/test/CodeGen/Thumb2/thumb2-ror2.ll b/final/test/CodeGen/Thumb2/thumb2-ror2.ll
new file mode 100644
index 00000000000..ffd1dd7dc61
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-ror2.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: rors r0, r1
+    %db = sub i32 32, %b
+    %l8 = shl i32 %a, %b
+    %r8 = lshr i32 %a, %db
+    %tmp = or i32 %l8, %r8
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-rsb.ll b/final/test/CodeGen/Thumb2/thumb2-rsb.ll
new file mode 100644
index 00000000000..15185be9462
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-rsb.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp = shl i32 %b, 5
+    %tmp1 = sub i32 %tmp, %a
+    ret i32 %tmp1
+}
+; CHECK: f1:
+; CHECK: 	rsb	r0, r0, r1, lsl #5
+
+define i32 @f2(i32 %a, i32 %b) {
+    %tmp = lshr i32 %b, 6
+    %tmp1 = sub i32 %tmp, %a
+    ret i32 %tmp1
+}
+; CHECK: f2:
+; CHECK: 	rsb	r0, r0, r1, lsr #6
+
+define i32 @f3(i32 %a, i32 %b) {
+    %tmp = ashr i32 %b, 7
+    %tmp1 = sub i32 %tmp, %a
+    ret i32 %tmp1
+}
+; CHECK: f3:
+; CHECK: 	rsb	r0, r0, r1, asr #7
+
+define i32 @f4(i32 %a, i32 %b) {
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = sub i32 %tmp, %a
+    ret i32 %tmp1
+}
+; CHECK: f4:
+; CHECK: 	rsb	r0, r0, r0, ror #8
diff --git a/final/test/CodeGen/Thumb2/thumb2-rsb2.ll b/final/test/CodeGen/Thumb2/thumb2-rsb2.ll
new file mode 100644
index 00000000000..61fb619c40e
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-rsb2.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 171 = 0x000000ab
+define i32 @f1(i32 %a) {
+    %tmp = sub i32 171, %a
+    ret i32 %tmp
+}
+; CHECK: f1:
+; CHECK: 	rsb.w	r0, r0, #171
+
+; 1179666 = 0x00120012
+define i32 @f2(i32 %a) {
+    %tmp = sub i32 1179666, %a
+    ret i32 %tmp
+}
+; CHECK: f2:
+; CHECK: 	rsb.w	r0, r0, #1179666
+
+; 872428544 = 0x34003400
+define i32 @f3(i32 %a) {
+    %tmp = sub i32 872428544, %a
+    ret i32 %tmp
+}
+; CHECK: f3:
+; CHECK: 	rsb.w	r0, r0, #872428544
+
+; 1448498774 = 0x56565656
+define i32 @f4(i32 %a) {
+    %tmp = sub i32 1448498774, %a
+    ret i32 %tmp
+}
+; CHECK: f4:
+; CHECK: 	rsb.w	r0, r0, #1448498774
+
+; 66846720 = 0x03fc0000
+define i32 @f5(i32 %a) {
+    %tmp = sub i32 66846720, %a
+    ret i32 %tmp
+}
+; CHECK: f5:
+; CHECK: 	rsb.w	r0, r0, #66846720
diff --git a/final/test/CodeGen/Thumb2/thumb2-sbc.ll b/final/test/CodeGen/Thumb2/thumb2-sbc.ll
new file mode 100644
index 00000000000..de6502d2e63
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-sbc.ll
@@ -0,0 +1,54 @@
+; RUN: llc -march=thumb -mattr=+thumb2 < %s | FileCheck %s
+
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1
+; CHECK: subs r0, r0, r2
+    %tmp = sub i64 %a, %b
+    ret i64 %tmp
+}
+
+; 734439407618 = 0x000000ab00000002
+define i64 @f2(i64 %a) {
+; CHECK: f2
+; CHECK: subs r0, #2
+; CHECK: sbc r1, r1, #171
+    %tmp = sub i64 %a, 734439407618
+    ret i64 %tmp
+}
+
+; 5066626890203138 = 0x0012001200000002
+define i64 @f3(i64 %a) {
+; CHECK: f3
+; CHECK: subs  r0, #2
+; CHECK: sbc r1, r1, #1179666
+    %tmp = sub i64 %a, 5066626890203138
+    ret i64 %tmp
+}
+
+; 3747052064576897026 = 0x3400340000000002
+define i64 @f4(i64 %a) {
+; CHECK: f4
+; CHECK: subs  r0, #2
+; CHECK: sbc r1, r1, #872428544
+    %tmp = sub i64 %a, 3747052064576897026
+    ret i64 %tmp
+}
+
+; 6221254862626095106 = 0x5656565600000002
+define i64 @f5(i64 %a) {
+; CHECK: f5
+; CHECK: subs  r0, #2
+; CHECK: adc r1, r1, #-1448498775
+    %tmp = sub i64 %a, 6221254862626095106 
+    ret i64 %tmp
+}
+
+; 287104476244869122 = 0x03fc000000000002
+define i64 @f6(i64 %a) {
+; CHECK: f6
+; CHECK: subs  r0, #2
+; CHECK: sbc r1, r1, #66846720
+    %tmp = sub i64 %a, 287104476244869122 
+    ret i64 %tmp
+}
+
diff --git a/final/test/CodeGen/Thumb2/thumb2-select.ll b/final/test/CodeGen/Thumb2/thumb2-select.ll
new file mode 100644
index 00000000000..2dcf8aaa24c
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-select.ll
@@ -0,0 +1,98 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a.s) {
+entry:
+; CHECK: f1:
+; CHECK: it eq
+; CHECK: moveq
+
+    %tmp = icmp eq i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f2(i32 %a.s) {
+entry:
+; CHECK: f2:
+; CHECK: it gt
+; CHECK: movgt
+    %tmp = icmp sgt i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f3(i32 %a.s, i32 %b.s) {
+entry:
+; CHECK: f3:
+; CHECK: it lt
+; CHECK: movlt
+    %tmp = icmp slt i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f4(i32 %a.s, i32 %b.s) {
+entry:
+; CHECK: f4:
+; CHECK: it le
+; CHECK: movle
+
+    %tmp = icmp sle i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f5(i32 %a.u, i32 %b.u) {
+entry:
+; CHECK: f5:
+; CHECK: it ls
+; CHECK: movls
+    %tmp = icmp ule i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f6(i32 %a.u, i32 %b.u) {
+entry:
+; CHECK: f6:
+; CHECK: it hi
+; CHECK: movhi
+    %tmp = icmp ugt i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f7(i32 %a, i32 %b, i32 %c) {
+entry:
+; CHECK: f7:
+; CHECK: it hi
+; CHECK: lsrhi.w
+    %tmp1 = icmp ugt i32 %a, %b
+    %tmp2 = udiv i32 %c, 3
+    %tmp3 = select i1 %tmp1, i32 %tmp2, i32 3
+    ret i32 %tmp3
+}
+
+define i32 @f8(i32 %a, i32 %b, i32 %c) {
+entry:
+; CHECK: f8:
+; CHECK: it lo
+; CHECK: lsllo.w
+    %tmp1 = icmp ult i32 %a, %b
+    %tmp2 = mul i32 %c, 4
+    %tmp3 = select i1 %tmp1, i32 %tmp2, i32 3
+    ret i32 %tmp3
+}
+
+define i32 @f9(i32 %a, i32 %b, i32 %c) {
+entry:
+; CHECK: f9:
+; CHECK: it ge
+; CHECK: rorge.w
+    %tmp1 = icmp sge i32 %a, %b
+    %tmp2 = shl i32 %c, 10
+    %tmp3 = lshr i32 %c, 22
+    %tmp4 = or i32 %tmp2, %tmp3
+    %tmp5 = select i1 %tmp1, i32 %tmp4, i32 3
+    ret i32 %tmp5
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-select_xform.ll b/final/test/CodeGen/Thumb2/thumb2-select_xform.ll
new file mode 100644
index 00000000000..ceefabbbfa2
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-select_xform.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
+; CHECK: t1
+; CHECK: mvn r0, #-2147483648
+; CHECK: add r0, r1
+; CHECK: cmp r2, #10
+; CHECK: it  gt
+; CHECK: movgt r0, r1
+        %tmp1 = icmp sgt i32 %c, 10
+        %tmp2 = select i1 %tmp1, i32 0, i32 2147483647
+        %tmp3 = add i32 %tmp2, %b
+        ret i32 %tmp3
+}
+
+define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind {
+; CHECK: t2
+; CHECK: add.w r0, r1, #-2147483648
+; CHECK: cmp r2, #10
+; CHECK: it  gt
+; CHECK: movgt r0, r1
+
+        %tmp1 = icmp sgt i32 %c, 10
+        %tmp2 = select i1 %tmp1, i32 0, i32 2147483648
+        %tmp3 = add i32 %tmp2, %b
+        ret i32 %tmp3
+}
+
+define i32 @t3(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+; CHECK: t3
+; CHECK: sub.w r0, r1, #10
+; CHECK: cmp r2, #10
+; CHECK: it  gt
+; CHECK: movgt r0, r1
+        %tmp1 = icmp sgt i32 %c, 10
+        %tmp2 = select i1 %tmp1, i32 0, i32 10
+        %tmp3 = sub i32 %b, %tmp2
+        ret i32 %tmp3
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-shifter.ll b/final/test/CodeGen/Thumb2/thumb2-shifter.ll
new file mode 100644
index 00000000000..98854a1205f
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-shifter.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+
+define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) {
+; CHECK: t2ADDrs_lsl
+; CHECK: add.w  r0, r0, r1, lsl #16
+        %A = shl i32 %Y, 16
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_lsr(i32 %X, i32 %Y) {
+; CHECK: t2ADDrs_lsr
+; CHECK: add.w  r0, r0, r1, lsr #16
+        %A = lshr i32 %Y, 16
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+define i32 @t2ADDrs_asr(i32 %X, i32 %Y) {
+; CHECK: t2ADDrs_asr
+; CHECK: add.w  r0, r0, r1, asr #16
+        %A = ashr i32 %Y, 16
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
+; i32 ror(n) = (x >> n) | (x << (32 - n))
+define i32 @t2ADDrs_ror(i32 %X, i32 %Y) {
+; CHECK: t2ADDrs_ror
+; CHECK: add.w  r0, r0, r1, ror #16
+        %A = lshr i32 %Y, 16
+        %B = shl  i32 %Y, 16
+        %C = or   i32 %B, %A
+        %R = add  i32 %X, %C
+        ret i32 %R
+}
+
+define i32 @t2ADDrs_noRegShift(i32 %X, i32 %Y, i8 %sh) {
+; CHECK: t2ADDrs_noRegShift
+; CHECK: uxtb r2, r2
+; CHECK: lsls r1, r2
+; CHECK: add  r0, r1
+        %shift.upgrd.1 = zext i8 %sh to i32
+        %A = shl i32 %Y, %shift.upgrd.1
+        %B = add i32 %X, %A
+        ret i32 %B
+}
+
diff --git a/final/test/CodeGen/Thumb2/thumb2-smla.ll b/final/test/CodeGen/Thumb2/thumb2-smla.ll
new file mode 100644
index 00000000000..bd4dcbe622f
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-smla.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+
+define i32 @f3(i32 %a, i16 %x, i32 %y) {
+; CHECK: f3
+; CHECK: smlabt r0, r1, r2, r0
+        %tmp = sext i16 %x to i32               ; <i32> [#uses=1]
+        %tmp2 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp3 = mul i32 %tmp2, %tmp             ; <i32> [#uses=1]
+        %tmp5 = add i32 %tmp3, %a               ; <i32> [#uses=1]
+        ret i32 %tmp5
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-smul.ll b/final/test/CodeGen/Thumb2/thumb2-smul.ll
new file mode 100644
index 00000000000..ae175355059
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-smul.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk |  FileCheck %s
+
+@x = weak global i16 0          ; <i16*> [#uses=1]
+@y = weak global i16 0          ; <i16*> [#uses=0]
+
+define i32 @f1(i32 %y) {
+; CHECK: f1
+; CHECK: smulbt r0, r1, r0
+        %tmp = load i16* @x             ; <i16> [#uses=1]
+        %tmp1 = add i16 %tmp, 2         ; <i16> [#uses=1]
+        %tmp2 = sext i16 %tmp1 to i32           ; <i32> [#uses=1]
+        %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp4 = mul i32 %tmp2, %tmp3            ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define i32 @f2(i32 %x, i32 %y) {
+; CHECK: f2
+; CHECK: smultt r0, r1, r0
+        %tmp1 = ashr i32 %x, 16         ; <i32> [#uses=1]
+        %tmp3 = ashr i32 %y, 16         ; <i32> [#uses=1]
+        %tmp4 = mul i32 %tmp3, %tmp1            ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-spill-q.ll b/final/test/CodeGen/Thumb2/thumb2-spill-q.ll
new file mode 100644
index 00000000000..d9a0617f5a4
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s -mtriple=thumbv7-elf -mattr=+neon | FileCheck %s
+; PR4789
+
+%bar = type { float, float, float }
+%baz = type { i32, [16 x %bar], [16 x float], [16 x i32], i8 }
+%foo = type { <4 x float> }
+%quux = type { i32 (...)**, %baz*, i32 }
+%quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo }
+
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
+
+define void @aaa(%quuz* %this, i8* %block) {
+; CHECK: aaa:
+; CHECK: bic r4, r4, #15
+; CHECK: vst1.64 {{.*}}[{{.*}}, :128]
+; CHECK: vld1.64 {{.*}}[{{.*}}, :128]
+entry:
+  %aligned_vec = alloca <4 x float>, align 16
+  %"alloca point" = bitcast i32 0 to i32
+  %vecptr = bitcast <4 x float>* %aligned_vec to i8*
+  %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %vecptr, i32 1) nounwind 
+  store float 6.300000e+01, float* undef, align 4
+  %1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  store float 0.000000e+00, float* undef, align 4
+  %2 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind ; <<4 x float>> [#uses=1]
+  %ld3 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld4 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld5 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld6 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld7 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld8 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld9 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld10 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld11 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %ld12 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) nounwind
+  store float 0.000000e+00, float* undef, align 4
+  %val173 = load <4 x float>* undef               ; <<4 x float>> [#uses=1]
+  br label %bb4
+
+bb4:                                              ; preds = %bb193, %entry
+  %besterror.0.2264 = phi <4 x float> [ undef, %entry ], [ %besterror.0.0, %bb193 ] ; <<4 x float>> [#uses=2]
+  %part0.0.0261 = phi <4 x float> [ zeroinitializer, %entry ], [ %23, %bb193 ] ; <<4 x float>> [#uses=2]
+  %3 = fmul <4 x float> zeroinitializer, %0       ; <<4 x float>> [#uses=2]
+  %4 = fadd <4 x float> %3, %part0.0.0261         ; <<4 x float>> [#uses=1]
+  %5 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %6 = shufflevector <2 x float> %5, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> ; <<4 x float>> [#uses=1]
+  %7 = fmul <4 x float> %1, undef                 ; <<4 x float>> [#uses=1]
+  %8 = fadd <4 x float> %7, <float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01> ; <<4 x float>> [#uses=1]
+  %9 = fptosi <4 x float> %8 to <4 x i32>         ; <<4 x i32>> [#uses=1]
+  %10 = sitofp <4 x i32> %9 to <4 x float>        ; <<4 x float>> [#uses=1]
+  %11 = fmul <4 x float> %10, %2                  ; <<4 x float>> [#uses=1]
+  %12 = fmul <4 x float> undef, %6                ; <<4 x float>> [#uses=1]
+  %13 = fmul <4 x float> %11, %4                  ; <<4 x float>> [#uses=1]
+  %14 = fsub <4 x float> %12, %13                 ; <<4 x float>> [#uses=1]
+  %15 = fsub <4 x float> %14, undef               ; <<4 x float>> [#uses=1]
+  %16 = fmul <4 x float> %15, <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> ; <<4 x float>> [#uses=1]
+  %17 = fadd <4 x float> %16, undef               ; <<4 x float>> [#uses=1]
+  %18 = fmul <4 x float> %17, %val173             ; <<4 x float>> [#uses=1]
+  %19 = shufflevector <4 x float> %18, <4 x float> undef, <2 x i32> <i32 2, i32 3> ; <<2 x float>> [#uses=1]
+  %20 = shufflevector <2 x float> %19, <2 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
+  %tmp1 = fadd <4 x float> %20, %ld3
+  %tmp2 = fadd <4 x float> %tmp1, %ld4
+  %tmp3 = fadd <4 x float> %tmp2, %ld5
+  %tmp4 = fadd <4 x float> %tmp3, %ld6
+  %tmp5 = fadd <4 x float> %tmp4, %ld7
+  %tmp6 = fadd <4 x float> %tmp5, %ld8
+  %tmp7 = fadd <4 x float> %tmp6, %ld9
+  %tmp8 = fadd <4 x float> %tmp7, %ld10
+  %tmp9 = fadd <4 x float> %tmp8, %ld11
+  %21 = fadd <4 x float> %tmp9, %ld12
+  %22 = fcmp ogt <4 x float> %besterror.0.2264, %21 ; <<4 x i1>> [#uses=0]
+  %tmp = extractelement <4 x i1> %22, i32 0
+  br i1 %tmp, label %bb193, label %bb186
+
+bb186:                                            ; preds = %bb4
+  br label %bb193
+
+bb193:                                            ; preds = %bb186, %bb4
+  %besterror.0.0 = phi <4 x float> [ %21, %bb186 ], [ %besterror.0.2264, %bb4 ] ; <<4 x float>> [#uses=1]
+  %23 = fadd <4 x float> %part0.0.0261, zeroinitializer ; <<4 x float>> [#uses=1]
+  br label %bb4
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-str.ll b/final/test/CodeGen/Thumb2/thumb2-str.ll
new file mode 100644
index 00000000000..11bb936d1e6
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-str.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32* %v) {
+; CHECK: f1:
+; CHECK: str r0, [r1]
+        store i32 %a, i32* %v
+        ret i32 %a
+}
+
+define i32 @f2(i32 %a, i32* %v) {
+; CHECK: f2:
+; CHECK: str.w r0, [r1, #4092]
+        %tmp2 = getelementptr i32* %v, i32 1023
+        store i32 %a, i32* %tmp2
+        ret i32 %a
+}
+
+define i32 @f2a(i32 %a, i32* %v) {
+; CHECK: f2a:
+; CHECK: str r0, [r1, #-128]
+        %tmp2 = getelementptr i32* %v, i32 -32
+        store i32 %a, i32* %tmp2
+        ret i32 %a
+}
+
+define i32 @f3(i32 %a, i32* %v) {
+; CHECK: f3:
+; CHECK: mov.w r2, #4096
+; CHECK: str r0, [r1, r2]
+        %tmp2 = getelementptr i32* %v, i32 1024
+        store i32 %a, i32* %tmp2
+        ret i32 %a
+}
+
+define i32 @f4(i32 %a, i32 %base) {
+entry:
+; CHECK: f4:
+; CHECK: str r0, [r1, #-128]
+        %tmp1 = sub i32 %base, 128
+        %tmp2 = inttoptr i32 %tmp1 to i32*
+        store i32 %a, i32* %tmp2
+        ret i32 %a
+}
+
+define i32 @f5(i32 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f5:
+; CHECK: str r0, [r1, r2]
+        %tmp1 = add i32 %base, %offset
+        %tmp2 = inttoptr i32 %tmp1 to i32*
+        store i32 %a, i32* %tmp2
+        ret i32 %a
+}
+
+define i32 @f6(i32 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f6:
+; CHECK: str.w r0, [r1, r2, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i32*
+        store i32 %a, i32* %tmp3
+        ret i32 %a
+}
+
+define i32 @f7(i32 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f7:
+; CHECK: lsrs r2, r2, #2
+; CHECK: str r0, [r1, r2]
+        %tmp1 = lshr i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i32*
+        store i32 %a, i32* %tmp3
+        ret i32 %a
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-str_post.ll b/final/test/CodeGen/Thumb2/thumb2-str_post.ll
new file mode 100644
index 00000000000..bbfb447ca3e
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-str_post.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i16 @test1(i32* %X, i16* %A) {
+; CHECK: test1:
+; CHECK: strh {{.*}}[{{.*}}], #-4
+        %Y = load i32* %X               ; <i32> [#uses=1]
+        %tmp1 = trunc i32 %Y to i16             ; <i16> [#uses=1]
+        store i16 %tmp1, i16* %A
+        %tmp2 = ptrtoint i16* %A to i16         ; <i16> [#uses=1]
+        %tmp3 = sub i16 %tmp2, 4                ; <i16> [#uses=1]
+        ret i16 %tmp3
+}
+
+define i32 @test2(i32* %X, i32* %A) {
+; CHECK: test2:
+; CHECK: str {{.*}}[{{.*}}],
+        %Y = load i32* %X               ; <i32> [#uses=1]
+        store i32 %Y, i32* %A
+        %tmp1 = ptrtoint i32* %A to i32         ; <i32> [#uses=1]
+        %tmp2 = sub i32 %tmp1, 4                ; <i32> [#uses=1]
+        ret i32 %tmp2
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-str_pre.ll b/final/test/CodeGen/Thumb2/thumb2-str_pre.ll
new file mode 100644
index 00000000000..1e6616a91cc
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-str_pre.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define void @test1(i32* %X, i32* %A, i32** %dest) {
+; CHECK: test1
+; CHECK: str  r1, [r0, #16]!
+        %B = load i32* %A               ; <i32> [#uses=1]
+        %Y = getelementptr i32* %X, i32 4               ; <i32*> [#uses=2]
+        store i32 %B, i32* %Y
+        store i32* %Y, i32** %dest
+        ret void
+}
+
+define i16* @test2(i16* %X, i32* %A) {
+; CHECK: test2
+; CHECK: strh r1, [r0, #8]!
+        %B = load i32* %A               ; <i32> [#uses=1]
+        %Y = getelementptr i16* %X, i32 4               ; <i16*> [#uses=2]
+        %tmp = trunc i32 %B to i16              ; <i16> [#uses=1]
+        store i16 %tmp, i16* %Y
+        ret i16* %Y
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-strb.ll b/final/test/CodeGen/Thumb2/thumb2-strb.ll
new file mode 100644
index 00000000000..7978e7fa918
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-strb.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i8 @f1(i8 %a, i8* %v) {
+; CHECK: f1:
+; CHECK: strb r0, [r1]
+        store i8 %a, i8* %v
+        ret i8 %a
+}
+
+define i8 @f2(i8 %a, i8* %v) {
+; CHECK: f2:
+; CHECK: strb.w r0, [r1, #4092]
+        %tmp2 = getelementptr i8* %v, i32 4092
+        store i8 %a, i8* %tmp2
+        ret i8 %a
+}
+
+define i8 @f2a(i8 %a, i8* %v) {
+; CHECK: f2a:
+; CHECK: strb r0, [r1, #-128]
+        %tmp2 = getelementptr i8* %v, i32 -128
+        store i8 %a, i8* %tmp2
+        ret i8 %a
+}
+
+define i8 @f3(i8 %a, i8* %v) {
+; CHECK: f3:
+; CHECK: mov.w r2, #4096
+; CHECK: strb r0, [r1, r2]
+        %tmp2 = getelementptr i8* %v, i32 4096
+        store i8 %a, i8* %tmp2
+        ret i8 %a
+}
+
+define i8 @f4(i8 %a, i32 %base) {
+entry:
+; CHECK: f4:
+; CHECK: strb r0, [r1, #-128]
+        %tmp1 = sub i32 %base, 128
+        %tmp2 = inttoptr i32 %tmp1 to i8*
+        store i8 %a, i8* %tmp2
+        ret i8 %a
+}
+
+define i8 @f5(i8 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f5:
+; CHECK: strb r0, [r1, r2]
+        %tmp1 = add i32 %base, %offset
+        %tmp2 = inttoptr i32 %tmp1 to i8*
+        store i8 %a, i8* %tmp2
+        ret i8 %a
+}
+
+define i8 @f6(i8 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f6:
+; CHECK: strb.w r0, [r1, r2, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i8*
+        store i8 %a, i8* %tmp3
+        ret i8 %a
+}
+
+define i8 @f7(i8 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f7:
+; CHECK: lsrs r2, r2, #2
+; CHECK: strb r0, [r1, r2]
+        %tmp1 = lshr i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i8*
+        store i8 %a, i8* %tmp3
+        ret i8 %a
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-strh.ll b/final/test/CodeGen/Thumb2/thumb2-strh.ll
new file mode 100644
index 00000000000..97110a726f4
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-strh.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i16 @f1(i16 %a, i16* %v) {
+; CHECK: f1:
+; CHECK: strh r0, [r1]
+        store i16 %a, i16* %v
+        ret i16 %a
+}
+
+define i16 @f2(i16 %a, i16* %v) {
+; CHECK: f2:
+; CHECK: strh.w r0, [r1, #4092]
+        %tmp2 = getelementptr i16* %v, i32 2046
+        store i16 %a, i16* %tmp2
+        ret i16 %a
+}
+
+define i16 @f2a(i16 %a, i16* %v) {
+; CHECK: f2a:
+; CHECK: strh r0, [r1, #-128]
+        %tmp2 = getelementptr i16* %v, i32 -64
+        store i16 %a, i16* %tmp2
+        ret i16 %a
+}
+
+define i16 @f3(i16 %a, i16* %v) {
+; CHECK: f3:
+; CHECK: mov.w r2, #4096
+; CHECK: strh r0, [r1, r2]
+        %tmp2 = getelementptr i16* %v, i32 2048
+        store i16 %a, i16* %tmp2
+        ret i16 %a
+}
+
+define i16 @f4(i16 %a, i32 %base) {
+entry:
+; CHECK: f4:
+; CHECK: strh r0, [r1, #-128]
+        %tmp1 = sub i32 %base, 128
+        %tmp2 = inttoptr i32 %tmp1 to i16*
+        store i16 %a, i16* %tmp2
+        ret i16 %a
+}
+
+define i16 @f5(i16 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f5:
+; CHECK: strh r0, [r1, r2]
+        %tmp1 = add i32 %base, %offset
+        %tmp2 = inttoptr i32 %tmp1 to i16*
+        store i16 %a, i16* %tmp2
+        ret i16 %a
+}
+
+define i16 @f6(i16 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f6:
+; CHECK: strh.w r0, [r1, r2, lsl #2]
+        %tmp1 = shl i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i16*
+        store i16 %a, i16* %tmp3
+        ret i16 %a
+}
+
+define i16 @f7(i16 %a, i32 %base, i32 %offset) {
+entry:
+; CHECK: f7:
+; CHECK: lsrs r2, r2, #2
+; CHECK: strh r0, [r1, r2]
+        %tmp1 = lshr i32 %offset, 2
+        %tmp2 = add i32 %base, %tmp1
+        %tmp3 = inttoptr i32 %tmp2 to i16*
+        store i16 %a, i16* %tmp3
+        ret i16 %a
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-sub.ll b/final/test/CodeGen/Thumb2/thumb2-sub.ll
new file mode 100644
index 00000000000..95335a2ee2c
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-sub.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+; 171 = 0x000000ab
+define i32 @f1(i32 %a) {
+; CHECK: f1:
+; CHECK: subs r0, #171
+    %tmp = sub i32 %a, 171
+    ret i32 %tmp
+}
+
+; 1179666 = 0x00120012
+define i32 @f2(i32 %a) {
+; CHECK: f2:
+; CHECK: sub.w r0, r0, #1179666
+    %tmp = sub i32 %a, 1179666
+    ret i32 %tmp
+}
+
+; 872428544 = 0x34003400
+define i32 @f3(i32 %a) {
+; CHECK: f3:
+; CHECK: sub.w r0, r0, #872428544
+    %tmp = sub i32 %a, 872428544
+    ret i32 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i32 @f4(i32 %a) {
+; CHECK: f4:
+; CHECK: sub.w r0, r0, #1448498774
+    %tmp = sub i32 %a, 1448498774
+    ret i32 %tmp
+}
+
+; 510 = 0x000001fe
+define i32 @f5(i32 %a) {
+; CHECK: f5:
+; CHECK: sub.w r0, r0, #510
+    %tmp = sub i32 %a, 510
+    ret i32 %tmp
+}
+
+; Don't change this to an add.
+define i32 @f6(i32 %a) {
+; CHECK: f6:
+; CHECK: subs r0, #1
+    %tmp = sub i32 %a, 1
+    ret i32 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-sub2.ll b/final/test/CodeGen/Thumb2/thumb2-sub2.ll
new file mode 100644
index 00000000000..bb99cbd67fc
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-sub2.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a) {
+    %tmp = sub i32 %a, 4095
+    ret i32 %tmp
+}
+; CHECK: f1:
+; CHECK: 	subw	r0, r0, #4095
diff --git a/final/test/CodeGen/Thumb2/thumb2-sub3.ll b/final/test/CodeGen/Thumb2/thumb2-sub3.ll
new file mode 100644
index 00000000000..855ad06a63f
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-sub3.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=thumb -mattr=+thumb2 < %s | FileCheck %s
+
+; 171 = 0x000000ab
+define i64 @f1(i64 %a) {
+; CHECK: f1
+; CHECK: subs  r0, #171
+; CHECK: adc r1, r1, #-1
+    %tmp = sub i64 %a, 171
+    ret i64 %tmp
+}
+
+; 1179666 = 0x00120012
+define i64 @f2(i64 %a) {
+; CHECK: f2
+; CHECK: subs.w  r0, r0, #1179666
+; CHECK: adc r1, r1, #-1
+    %tmp = sub i64 %a, 1179666
+    ret i64 %tmp
+}
+
+; 872428544 = 0x34003400
+define i64 @f3(i64 %a) {
+; CHECK: f3
+; CHECK: subs.w  r0, r0, #872428544
+; CHECK: adc r1, r1, #-1
+    %tmp = sub i64 %a, 872428544
+    ret i64 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i64 @f4(i64 %a) {
+; CHECK: f4
+; CHECK: subs.w  r0, r0, #1448498774
+; CHECK: adc r1, r1, #-1
+    %tmp = sub i64 %a, 1448498774
+    ret i64 %tmp
+}
+
+; 66846720 = 0x03fc0000
+define i64 @f5(i64 %a) {
+; CHECK: f5
+; CHECK: subs.w  r0, r0, #66846720
+; CHECK: adc r1, r1, #-1
+    %tmp = sub i64 %a, 66846720
+    ret i64 %tmp
+}
+
+; 734439407618 = 0x000000ab00000002
+define i64 @f6(i64 %a) {
+; CHECK: f6
+; CHECK: subs r0, #2
+; CHECK: sbc r1, r1, #171
+   %tmp = sub i64 %a, 734439407618
+   ret i64 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-sub4.ll b/final/test/CodeGen/Thumb2/thumb2-sub4.ll
new file mode 100644
index 00000000000..a040d170f93
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-sub4.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i32 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: subs r0, r0, r1
+    %tmp = sub i32 %a, %b
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: sub.w r0, r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp1 = sub i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: sub.w r0, r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp1 = sub i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: sub.w r0, r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp1 = sub i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f5(i32 %a, i32 %b) {
+; CHECK: f5:
+; CHECK: sub.w r0, r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = sub i32 %a, %tmp
+    ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-sub5.ll b/final/test/CodeGen/Thumb2/thumb2-sub5.ll
new file mode 100644
index 00000000000..c3b56bc09c8
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-sub5.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i64 @f1(i64 %a, i64 %b) {
+; CHECK: f1:
+; CHECK: subs r0, r0, r2
+; CHECK: sbcs r1, r3
+    %tmp = sub i64 %a, %b
+    ret i64 %tmp
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-sxt_rot.ll b/final/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
new file mode 100644
index 00000000000..4b685a86fd8
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+
+define i32 @test0(i8 %A) {
+; CHECK: test0
+; CHECK: sxtb r0, r0
+        %B = sext i8 %A to i32
+	ret i32 %B
+}
+
+define i8 @test1(i32 %A) signext {
+; CHECK: test1
+; CHECK: sxtb.w r0, r0, ror #8
+	%B = lshr i32 %A, 8
+	%C = shl i32 %A, 24
+	%D = or i32 %B, %C
+	%E = trunc i32 %D to i8
+	ret i8 %E
+}
+
+define i32 @test2(i32 %A, i32 %X) signext {
+; CHECK: test2
+; CHECK: lsrs r0, r0, #8
+; CHECK: sxtab  r0, r1, r0
+	%B = lshr i32 %A, 8
+	%C = shl i32 %A, 24
+	%D = or i32 %B, %C
+	%E = trunc i32 %D to i8
+        %F = sext i8 %E to i32
+        %G = add i32 %F, %X
+	ret i32 %G
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-tbb.ll b/final/test/CodeGen/Thumb2/thumb2-tbb.ll
new file mode 100644
index 00000000000..5dc3cc3ce70
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-tbb.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s
+
+define void @bar(i32 %n.u) {
+entry:
+; CHECK: bar:
+; CHECK: tbb
+; CHECK: .align 1
+
+    switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
+bb:
+    tail call void(...)* @foo1()
+    ret void
+bb1:
+    tail call void(...)* @foo2()
+    ret void
+bb2:
+    tail call void(...)* @foo6()
+    ret void
+bb3:
+    tail call void(...)* @foo3()
+    ret void
+bb4:
+    tail call void(...)* @foo4()
+    ret void
+bb5:
+    tail call void(...)* @foo5()
+    ret void
+bb6:
+    tail call void(...)* @foo1()
+    ret void
+bb7:
+    tail call void(...)* @foo2()
+    ret void
+bb8:
+    tail call void(...)* @foo6()
+    ret void
+bb9:
+    tail call void(...)* @foo3()
+    ret void
+bb10:
+    tail call void(...)* @foo4()
+    ret void
+bb11:
+    tail call void(...)* @foo5()
+    ret void
+bb12:
+    tail call void(...)* @foo6()
+    ret void
+}
+
+declare void @foo1(...)
+declare void @foo2(...)
+declare void @foo6(...)
+declare void @foo3(...)
+declare void @foo4(...)
+declare void @foo5(...)
diff --git a/final/test/CodeGen/Thumb2/thumb2-tbh.ll b/final/test/CodeGen/Thumb2/thumb2-tbh.ll
new file mode 100644
index 00000000000..cd9c8e1015b
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-tbh.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic | FileCheck %s
+
+; Thumb2 target should reorder the bb's in order to use tbb / tbh.
+
+	%struct.R_flstr = type { i32, i32, i8* }
+	%struct._T_tstr = type { i32, %struct.R_flstr*, %struct._T_tstr* }
+@_C_nextcmd = external global i32		; <i32*> [#uses=3]
+@.str31 = external constant [28 x i8], align 1		; <[28 x i8]*> [#uses=1]
+@_T_gtol = external global %struct._T_tstr*		; <%struct._T_tstr**> [#uses=2]
+
+declare i32 @strlen(i8* nocapture) nounwind readonly
+
+declare void @Z_fatal(i8*) noreturn nounwind
+
+declare noalias i8* @calloc(i32, i32) nounwind
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+; CHECK: main:
+; CHECK: tbb
+entry:
+	br label %bb42.i
+
+bb1.i2:		; preds = %bb42.i
+	br label %bb40.i
+
+bb5.i:		; preds = %bb42.i
+	%0 = or i32 %argc, 32		; <i32> [#uses=1]
+	br label %bb40.i
+
+bb7.i:		; preds = %bb42.i
+	call  void @_T_addtol(%struct._T_tstr** @_T_gtol, i32 0, i8* null) nounwind
+	unreachable
+
+bb15.i:		; preds = %bb42.i
+	call  void @_T_addtol(%struct._T_tstr** @_T_gtol, i32 2, i8* null) nounwind
+	unreachable
+
+bb23.i:		; preds = %bb42.i
+	%1 = call  i32 @strlen(i8* null) nounwind readonly		; <i32> [#uses=0]
+	unreachable
+
+bb33.i:		; preds = %bb42.i
+	store i32 0, i32* @_C_nextcmd, align 4
+	%2 = call  noalias i8* @calloc(i32 21, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb34.i:		; preds = %bb42.i
+	%3 = load i32* @_C_nextcmd, align 4		; <i32> [#uses=1]
+	%4 = add i32 %3, 1		; <i32> [#uses=1]
+	store i32 %4, i32* @_C_nextcmd, align 4
+	%5 = call  noalias i8* @calloc(i32 22, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb35.i:		; preds = %bb42.i
+	%6 = call  noalias i8* @calloc(i32 20, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb37.i:		; preds = %bb42.i
+	%7 = call  noalias i8* @calloc(i32 14, i32 1) nounwind		; <i8*> [#uses=0]
+	unreachable
+
+bb39.i:		; preds = %bb42.i
+	call  void @Z_fatal(i8* getelementptr ([28 x i8]* @.str31, i32 0, i32 0)) nounwind
+	unreachable
+
+bb40.i:		; preds = %bb42.i, %bb5.i, %bb1.i2
+	br label %bb42.i
+
+bb42.i:		; preds = %bb40.i, %entry
+	switch i32 %argc, label %bb39.i [
+		i32 67, label %bb33.i
+		i32 70, label %bb35.i
+		i32 77, label %bb37.i
+		i32 83, label %bb34.i
+		i32 97, label %bb7.i
+		i32 100, label %bb5.i
+		i32 101, label %bb40.i
+		i32 102, label %bb23.i
+		i32 105, label %bb15.i
+		i32 116, label %bb1.i2
+	]
+}
+
+declare void @_T_addtol(%struct._T_tstr** nocapture, i32, i8*) nounwind
diff --git a/final/test/CodeGen/Thumb2/thumb2-teq.ll b/final/test/CodeGen/Thumb2/thumb2-teq.ll
new file mode 100644
index 00000000000..69f03837f4b
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-teq.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+
+; 0x000000bb = 187
+define i1 @f1(i32 %a) {
+    %tmp = xor i32 %a, 187
+    %tmp1 = icmp ne i32 %tmp, 0
+    ret i1 %tmp1
+}
+; CHECK: f1:
+; CHECK: 	teq.w	r0, #187
+
+; 0x000000bb = 187
+define i1 @f2(i32 %a) {
+    %tmp = xor i32 %a, 187
+    %tmp1 = icmp eq i32 0, %tmp
+    ret i1 %tmp1
+}
+; CHECK: f2:
+; CHECK: 	teq.w	r0, #187
+
+; 0x00aa00aa = 11141290
+define i1 @f3(i32 %a) {
+    %tmp = xor i32 %a, 11141290 
+    %tmp1 = icmp eq i32 %tmp, 0
+    ret i1 %tmp1
+}
+; CHECK: f3:
+; CHECK: 	teq.w	r0, #11141290
+
+; 0x00aa00aa = 11141290
+define i1 @f4(i32 %a) {
+    %tmp = xor i32 %a, 11141290 
+    %tmp1 = icmp ne i32 0, %tmp
+    ret i1 %tmp1
+}
+; CHECK: f4:
+; CHECK: 	teq.w	r0, #11141290
+
+; 0xcc00cc00 = 3422604288
+define i1 @f5(i32 %a) {
+    %tmp = xor i32 %a, 3422604288
+    %tmp1 = icmp ne i32 %tmp, 0
+    ret i1 %tmp1
+}
+; CHECK: f5:
+; CHECK: 	teq.w	r0, #-872363008
+
+; 0xcc00cc00 = 3422604288
+define i1 @f6(i32 %a) {
+    %tmp = xor i32 %a, 3422604288
+    %tmp1 = icmp eq i32 0, %tmp
+    ret i1 %tmp1
+}
+; CHECK: f6:
+; CHECK: 	teq.w	r0, #-872363008
+
+; 0xdddddddd = 3722304989
+define i1 @f7(i32 %a) {
+    %tmp = xor i32 %a, 3722304989
+    %tmp1 = icmp eq i32 %tmp, 0
+    ret i1 %tmp1
+}
+; CHECK: f7:
+; CHECK: 	teq.w	r0, #-572662307
+
+; 0xdddddddd = 3722304989
+define i1 @f8(i32 %a) {
+    %tmp = xor i32 %a, 3722304989
+    %tmp1 = icmp ne i32 0, %tmp
+    ret i1 %tmp1
+}
+; CHECK: f8:
+; CHECK: 	teq.w	r0, #-572662307
+
+; 0x00110000 = 1114112
+define i1 @f9(i32 %a) {
+    %tmp = xor i32 %a, 1114112
+    %tmp1 = icmp ne i32 %tmp, 0
+    ret i1 %tmp1
+}
+; CHECK: f9:
+; CHECK: 	teq.w	r0, #1114112
+
+; 0x00110000 = 1114112
+define i1 @f10(i32 %a) {
+    %tmp = xor i32 %a, 1114112
+    %tmp1 = icmp eq i32 0, %tmp
+    ret i1 %tmp1
+}
+; CHECK: f10:
+; CHECK: 	teq.w	r0, #1114112
+
diff --git a/final/test/CodeGen/Thumb2/thumb2-teq2.ll b/final/test/CodeGen/Thumb2/thumb2-teq2.ll
new file mode 100644
index 00000000000..0f122f25384
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-teq2.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i1 @f1(i32 %a, i32 %b) {
+; CHECK: f1
+; CHECK: teq.w  r0, r1
+    %tmp = xor i32 %a, %b
+    %tmp1 = icmp ne i32 %tmp, 0
+    ret i1 %tmp1
+}
+
+define i1 @f2(i32 %a, i32 %b) {
+; CHECK: f2
+; CHECK: teq.w r0, r1
+    %tmp = xor i32 %a, %b
+    %tmp1 = icmp eq i32 %tmp, 0
+    ret i1 %tmp1
+}
+
+define i1 @f3(i32 %a, i32 %b) {
+; CHECK: f3
+; CHECK: teq.w  r0, r1
+    %tmp = xor i32 %a, %b
+    %tmp1 = icmp ne i32 0, %tmp
+    ret i1 %tmp1
+}
+
+define i1 @f4(i32 %a, i32 %b) {
+; CHECK: f4
+; CHECK: teq.w  r0, r1
+    %tmp = xor i32 %a, %b
+    %tmp1 = icmp eq i32 0, %tmp
+    ret i1 %tmp1
+}
+
+define i1 @f6(i32 %a, i32 %b) {
+; CHECK: f6
+; CHECK: teq.w  r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp1 = xor i32 %a, %tmp
+    %tmp2 = icmp eq i32 %tmp1, 0
+    ret i1 %tmp2
+}
+
+define i1 @f7(i32 %a, i32 %b) {
+; CHECK: f7
+; CHECK: teq.w  r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp1 = xor i32 %a, %tmp
+    %tmp2 = icmp eq i32 %tmp1, 0
+    ret i1 %tmp2
+}
+
+define i1 @f8(i32 %a, i32 %b) {
+; CHECK: f8
+; CHECK: teq.w  r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp1 = xor i32 %a, %tmp
+    %tmp2 = icmp eq i32 %tmp1, 0
+    ret i1 %tmp2
+}
+
+define i1 @f9(i32 %a, i32 %b) {
+; CHECK: f9
+; CHECK: teq.w  r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = xor i32 %a, %tmp
+    %tmp2 = icmp eq i32 %tmp1, 0
+    ret i1 %tmp2
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-tst.ll b/final/test/CodeGen/Thumb2/thumb2-tst.ll
new file mode 100644
index 00000000000..d905217189f
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-tst.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+
+; 0x000000bb = 187
+define i1 @f1(i32 %a) {
+    %tmp = and i32 %a, 187
+    %tmp1 = icmp ne i32 %tmp, 0
+    ret i1 %tmp1
+}
+; CHECK: f1:
+; CHECK: 	tst.w	r0, #187
+
+; 0x000000bb = 187
+define i1 @f2(i32 %a) {
+    %tmp = and i32 %a, 187
+    %tmp1 = icmp eq i32 0, %tmp
+    ret i1 %tmp1
+}
+; CHECK: f2:
+; CHECK: 	tst.w	r0, #187
+
+; 0x00aa00aa = 11141290
+define i1 @f3(i32 %a) {
+    %tmp = and i32 %a, 11141290 
+    %tmp1 = icmp eq i32 %tmp, 0
+    ret i1 %tmp1
+}
+; CHECK: f3:
+; CHECK: 	tst.w	r0, #11141290
+
+; 0x00aa00aa = 11141290
+define i1 @f4(i32 %a) {
+    %tmp = and i32 %a, 11141290 
+    %tmp1 = icmp ne i32 0, %tmp
+    ret i1 %tmp1
+}
+; CHECK: f4:
+; CHECK: 	tst.w	r0, #11141290
+
+; 0xcc00cc00 = 3422604288
+define i1 @f5(i32 %a) {
+    %tmp = and i32 %a, 3422604288
+    %tmp1 = icmp ne i32 %tmp, 0
+    ret i1 %tmp1
+}
+; CHECK: f5:
+; CHECK: 	tst.w	r0, #-872363008
+
+; 0xcc00cc00 = 3422604288
+define i1 @f6(i32 %a) {
+    %tmp = and i32 %a, 3422604288
+    %tmp1 = icmp eq i32 0, %tmp
+    ret i1 %tmp1
+}
+; CHECK: f6:
+; CHECK: 	tst.w	r0, #-872363008
+
+; 0xdddddddd = 3722304989
+define i1 @f7(i32 %a) {
+    %tmp = and i32 %a, 3722304989
+    %tmp1 = icmp eq i32 %tmp, 0
+    ret i1 %tmp1
+}
+; CHECK: f7:
+; CHECK: 	tst.w	r0, #-572662307
+
+; 0xdddddddd = 3722304989
+define i1 @f8(i32 %a) {
+    %tmp = and i32 %a, 3722304989
+    %tmp1 = icmp ne i32 0, %tmp
+    ret i1 %tmp1
+}
+; CHECK: f8:
+; CHECK: 	tst.w	r0, #-572662307
+
+; 0x00110000 = 1114112
+define i1 @f9(i32 %a) {
+    %tmp = and i32 %a, 1114112
+    %tmp1 = icmp ne i32 %tmp, 0
+    ret i1 %tmp1
+}
+; CHECK: f9:
+; CHECK: 	tst.w	r0, #1114112
+
+; 0x00110000 = 1114112
+define i1 @f10(i32 %a) {
+    %tmp = and i32 %a, 1114112
+    %tmp1 = icmp eq i32 0, %tmp
+    ret i1 %tmp1
+}
+; CHECK: f10:
+; CHECK: 	tst.w	r0, #1114112
diff --git a/final/test/CodeGen/Thumb2/thumb2-tst2.ll b/final/test/CodeGen/Thumb2/thumb2-tst2.ll
new file mode 100644
index 00000000000..db202dd2cbc
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-tst2.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
+
+define i1 @f1(i32 %a, i32 %b) {
+; CHECK: f1:
+; CHECK: tst r0, r1
+    %tmp = and i32 %a, %b
+    %tmp1 = icmp ne i32 %tmp, 0
+    ret i1 %tmp1
+}
+
+define i1 @f2(i32 %a, i32 %b) {
+; CHECK: f2:
+; CHECK: tst r0, r1
+    %tmp = and i32 %a, %b
+    %tmp1 = icmp eq i32 %tmp, 0
+    ret i1 %tmp1
+}
+
+define i1 @f3(i32 %a, i32 %b) {
+; CHECK: f3:
+; CHECK: tst r0, r1
+    %tmp = and i32 %a, %b
+    %tmp1 = icmp ne i32 0, %tmp
+    ret i1 %tmp1
+}
+
+define i1 @f4(i32 %a, i32 %b) {
+; CHECK: f4:
+; CHECK: tst r0, r1
+    %tmp = and i32 %a, %b
+    %tmp1 = icmp eq i32 0, %tmp
+    ret i1 %tmp1
+}
+
+define i1 @f6(i32 %a, i32 %b) {
+; CHECK: f6:
+; CHECK: tst.w r0, r1, lsl #5
+    %tmp = shl i32 %b, 5
+    %tmp1 = and i32 %a, %tmp
+    %tmp2 = icmp eq i32 %tmp1, 0
+    ret i1 %tmp2
+}
+
+define i1 @f7(i32 %a, i32 %b) {
+; CHECK: f7:
+; CHECK: tst.w r0, r1, lsr #6
+    %tmp = lshr i32 %b, 6
+    %tmp1 = and i32 %a, %tmp
+    %tmp2 = icmp eq i32 %tmp1, 0
+    ret i1 %tmp2
+}
+
+define i1 @f8(i32 %a, i32 %b) {
+; CHECK: f8:
+; CHECK: tst.w r0, r1, asr #7
+    %tmp = ashr i32 %b, 7
+    %tmp1 = and i32 %a, %tmp
+    %tmp2 = icmp eq i32 %tmp1, 0
+    ret i1 %tmp2
+}
+
+define i1 @f9(i32 %a, i32 %b) {
+; CHECK: f9:
+; CHECK: tst.w r0, r0, ror #8
+    %l8 = shl i32 %a, 24
+    %r8 = lshr i32 %a, 8
+    %tmp = or i32 %l8, %r8
+    %tmp1 = and i32 %a, %tmp
+    %tmp2 = icmp eq i32 %tmp1, 0
+    ret i1 %tmp2
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/final/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
new file mode 100644
index 00000000000..b8e43812ed9
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+
+define i8 @test1(i32 %A.u) zeroext {
+; CHECK: test1
+; CHECK: uxtb r0, r0
+    %B.u = trunc i32 %A.u to i8
+    ret i8 %B.u
+}
+
+define i32 @test2(i32 %A.u, i32 %B.u) zeroext {
+; CHECK: test2
+; CHECK: uxtab  r0, r0, r1
+    %C.u = trunc i32 %B.u to i8
+    %D.u = zext i8 %C.u to i32
+    %E.u = add i32 %A.u, %D.u
+    ret i32 %E.u
+}
+
+define i32 @test3(i32 %A.u) zeroext {
+; CHECK: test3
+; CHECK: uxth.w r0, r0, ror #8
+    %B.u = lshr i32 %A.u, 8
+    %C.u = shl i32 %A.u, 24
+    %D.u = or i32 %B.u, %C.u
+    %E.u = trunc i32 %D.u to i16
+    %F.u = zext i16 %E.u to i32
+    ret i32 %F.u
+}
diff --git a/final/test/CodeGen/Thumb2/thumb2-uxtb.ll b/final/test/CodeGen/Thumb2/thumb2-uxtb.ll
new file mode 100644
index 00000000000..2074f98cb60
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/thumb2-uxtb.ll
@@ -0,0 +1,141 @@
+; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s -check-prefix=ARMv7A
+; RUN: llc < %s -march=thumb -mcpu=cortex-m3 | FileCheck %s -check-prefix=ARMv7M
+
+define i32 @test1(i32 %x) {
+; ARMv7A: test1
+; ARMv7A: uxtb16 r0, r0
+
+; ARMv7M: test1
+; ARMv7M: bic r0, r0, #-16711936
+	%tmp1 = and i32 %x, 16711935		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+; PR7503
+define i32 @test2(i32 %x) {
+; ARMv7A: test2
+; ARMv7A: uxtb16  r0, r0, ror #8
+
+; ARMv7M: test2
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, lsr #8
+	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @test3(i32 %x) {
+; ARMv7A: test3
+; ARMv7A: uxtb16  r0, r0, ror #8
+
+; ARMv7M: test3
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, lsr #8
+	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @test4(i32 %x) {
+; ARMv7A: test4
+; ARMv7A: uxtb16  r0, r0, ror #8
+
+; ARMv7M: test4
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, lsr #8
+	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
+	%tmp6 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test5(i32 %x) {
+; ARMv7A: test5
+; ARMv7A: uxtb16  r0, r0, ror #8
+
+; ARMv7M: test5
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, lsr #8
+	%tmp1 = lshr i32 %x, 8		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 16711935		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @test6(i32 %x) {
+; ARMv7A: test6
+; ARMv7A: uxtb16  r0, r0, ror #16
+
+; ARMv7M: test6
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, ror #16
+	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
+	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]
+	%tmp6 = or i32 %tmp2, %tmp5		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test7(i32 %x) {
+; ARMv7A: test7
+; ARMv7A: uxtb16  r0, r0, ror #16
+
+; ARMv7M: test7
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, ror #16
+	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
+	%tmp4 = shl i32 %x, 16		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]
+	%tmp6 = or i32 %tmp2, %tmp5		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test8(i32 %x) {
+; ARMv7A: test8
+; ARMv7A: uxtb16  r0, r0, ror #24
+
+; ARMv7M: test8
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, ror #24
+	%tmp1 = shl i32 %x, 8		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 16711680		; <i32> [#uses=1]
+	%tmp5 = lshr i32 %x, 24		; <i32> [#uses=1]
+	%tmp6 = or i32 %tmp2, %tmp5		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test9(i32 %x) {
+; ARMv7A: test9
+; ARMv7A: uxtb16  r0, r0, ror #24
+
+; ARMv7M: test9
+; ARMv7M: mov.w r1, #16711935
+; ARMv7M: and.w r0, r1, r0, ror #24
+	%tmp1 = lshr i32 %x, 24		; <i32> [#uses=1]
+	%tmp4 = shl i32 %x, 8		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]
+	%tmp6 = or i32 %tmp5, %tmp1		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test10(i32 %p0) {
+; ARMv7A: test10
+; ARMv7A: mov.w r1, #16253176
+; ARMv7A: and.w r0, r1, r0, lsr #7
+; ARMv7A: lsrs  r1, r0, #5
+; ARMv7A: uxtb16  r1, r1
+; ARMv7A: orrs r0, r1
+
+; ARMv7M: test10
+; ARMv7M: mov.w r1, #16253176
+; ARMv7M: and.w r0, r1, r0, lsr #7
+; ARMv7M: mov.w r1, #458759
+; ARMv7M: and.w r1, r1, r0, lsr #5
+; ARMv7M: orrs r0, r1
+	%tmp1 = lshr i32 %p0, 7		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 16253176		; <i32> [#uses=2]
+	%tmp4 = lshr i32 %tmp2, 5		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp4, 458759		; <i32> [#uses=1]
+	%tmp7 = or i32 %tmp5, %tmp2		; <i32> [#uses=1]
+	ret i32 %tmp7
+}
diff --git a/final/test/CodeGen/Thumb2/tls1.ll b/final/test/CodeGen/Thumb2/tls1.ll
new file mode 100644
index 00000000000..1e555571c05
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/tls1.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \
+; RUN:     grep {i(tpoff)}
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | \
+; RUN:     grep {__aeabi_read_tp}
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi \
+; RUN:     -relocation-model=pic | grep {__tls_get_addr}
+
+
+@i = thread_local global i32 15		; <i32*> [#uses=2]
+
+define i32 @f() {
+entry:
+	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+define i32* @g() {
+entry:
+	ret i32* @i
+}
diff --git a/final/test/CodeGen/Thumb2/tls2.ll b/final/test/CodeGen/Thumb2/tls2.ll
new file mode 100644
index 00000000000..b8a0657c906
--- /dev/null
+++ b/final/test/CodeGen/Thumb2/tls2.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi | FileCheck %s -check-prefix=CHECK-NOT-PIC
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=CHECK-PIC
+
+@i = external thread_local global i32		; <i32*> [#uses=2]
+
+define i32 @f() {
+entry:
+; CHECK-NOT-PIC: f:
+; CHECK-NOT-PIC: add r0, pc
+; CHECK-NOT-PIC: ldr r1, [r0]
+; CHECK-NOT-PIC: i(gottpoff)
+
+; CHECK-PIC: f:
+; CHECK-PIC: bl __tls_get_addr(PLT)
+	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+define i32* @g() {
+entry:
+; CHECK-NOT-PIC: g:
+; CHECK-NOT-PIC: add r0, pc
+; CHECK-NOT-PIC: ldr r1, [r0]
+; CHECK-NOT-PIC: i(gottpoff)
+
+; CHECK-PIC: g:
+; CHECK-PIC: bl __tls_get_addr(PLT)
+	ret i32* @i
+}
diff --git a/final/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll b/final/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
new file mode 100644
index 00000000000..24848602baf
--- /dev/null
+++ b/final/test/CodeGen/X86/2003-08-03-CallArgLiveRanges.ll
@@ -0,0 +1,18 @@
+; The old instruction selector used to load all arguments to a call up in 
+; registers, then start pushing them all onto the stack.  This is bad news as
+; it makes a ton of annoying overlapping live ranges.  This code should not
+; cause spills!
+;
+; RUN: llc < %s -march=x86 -stats |& not grep spilled
+
+target datalayout = "e-p:32:32"
+
+define i32 @test(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) {
+        ret i32 0
+}
+
+define i32 @main() {
+        %X = call i32 @test( i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10 )            ; <i32> [#uses=1]
+        ret i32 %X
+}
+
diff --git a/final/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll b/final/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll
new file mode 100644
index 00000000000..5c40eeaa1ea
--- /dev/null
+++ b/final/test/CodeGen/X86/2003-08-23-DeadBlockTest.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86
+
+define i32 @test() {
+entry:
+        ret i32 7
+Test:           ; No predecessors!
+        %A = call i32 @test( )          ; <i32> [#uses=1]
+        %B = call i32 @test( )          ; <i32> [#uses=1]
+        %C = add i32 %A, %B             ; <i32> [#uses=1]
+        ret i32 %C
+}
+
diff --git a/final/test/CodeGen/X86/2003-11-03-GlobalBool.ll b/final/test/CodeGen/X86/2003-11-03-GlobalBool.ll
new file mode 100644
index 00000000000..8b0a18550da
--- /dev/null
+++ b/final/test/CodeGen/X86/2003-11-03-GlobalBool.ll
@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=x86 | \
+; RUN:   not grep {.byte\[\[:space:\]\]*true}
+
+@X = global i1 true             ; <i1*> [#uses=0]
diff --git a/final/test/CodeGen/X86/2004-02-12-Memcpy.ll b/final/test/CodeGen/X86/2004-02-12-Memcpy.ll
new file mode 100644
index 00000000000..f15a1b44181
--- /dev/null
+++ b/final/test/CodeGen/X86/2004-02-12-Memcpy.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
+
+@A = global [32 x i32] zeroinitializer
+@B = global [32 x i32] zeroinitializer
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+define void @main() nounwind {
+  ; dword copy
+  call void @llvm.memcpy.i32(i8* bitcast ([32 x i32]* @A to i8*),
+                           i8* bitcast ([32 x i32]* @B to i8*),
+                           i32 128, i32 4 )
+
+  ; word copy
+  call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
+                           i8* bitcast ([32 x i32]* @B to i8*),
+                           i32 128, i32 2 )
+
+  ; byte copy
+  call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
+                           i8* bitcast ([32 x i32]* @B to i8*),
+                            i32 128, i32 1 )
+
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll b/final/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
new file mode 100644
index 00000000000..fea2b54d763
--- /dev/null
+++ b/final/test/CodeGen/X86/2004-02-13-FrameReturnAddress.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | grep {(%esp}
+; RUN: llc < %s -march=x86 | grep {pushl	%ebp} | count 1
+; RUN: llc < %s -march=x86 | grep {popl	%ebp} | count 1
+
+declare i8* @llvm.returnaddress(i32)
+
+declare i8* @llvm.frameaddress(i32)
+
+define i8* @test1() {
+        %X = call i8* @llvm.returnaddress( i32 0 )              ; <i8*> [#uses=1]
+        ret i8* %X
+}
+
+define i8* @test2() {
+        %X = call i8* @llvm.frameaddress( i32 0 )               ; <i8*> [#uses=1]
+        ret i8* %X
+}
+
diff --git a/final/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll b/final/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll
new file mode 100644
index 00000000000..f986ebd35f8
--- /dev/null
+++ b/final/test/CodeGen/X86/2004-02-14-InefficientStackPointer.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 | grep -i ESP | not grep sub
+
+define i32 @test(i32 %X) {
+        ret i32 %X
+}
diff --git a/final/test/CodeGen/X86/2004-02-22-Casts.ll b/final/test/CodeGen/X86/2004-02-22-Casts.ll
new file mode 100644
index 00000000000..dabf7d3c15b
--- /dev/null
+++ b/final/test/CodeGen/X86/2004-02-22-Casts.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86
+define i1 @test1(double %X) {
+        %V = fcmp one double %X, 0.000000e+00           ; <i1> [#uses=1]
+        ret i1 %V
+}
+
+define double @test2(i64 %X) {
+        %V = uitofp i64 %X to double            ; <double> [#uses=1]
+        ret double %V
+}
+
+
diff --git a/final/test/CodeGen/X86/2004-03-30-Select-Max.ll b/final/test/CodeGen/X86/2004-03-30-Select-Max.ll
new file mode 100644
index 00000000000..c44d10ac5b5
--- /dev/null
+++ b/final/test/CodeGen/X86/2004-03-30-Select-Max.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep {j\[lgbe\]}
+
+define i32 @max(i32 %A, i32 %B) nounwind {
+        %gt = icmp sgt i32 %A, %B               ; <i1> [#uses=1]
+        %R = select i1 %gt, i32 %A, i32 %B              ; <i32> [#uses=1]
+        ret i32 %R
+}
+
diff --git a/final/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll b/final/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
new file mode 100644
index 00000000000..c62fee1bd26
--- /dev/null
+++ b/final/test/CodeGen/X86/2004-04-09-SameValueCoalescing.ll
@@ -0,0 +1,13 @@
+; Linear scan does not currently coalesce any two variables that have
+; overlapping live intervals. When two overlapping intervals have the same
+; value, they can be joined though.
+;
+; RUN: llc < %s -march=x86 -regalloc=linearscan | \
+; RUN:   not grep {mov %\[A-Z\]\\\{2,3\\\}, %\[A-Z\]\\\{2,3\\\}}
+
+define i64 @test(i64 %x) {
+entry:
+        %tmp.1 = mul i64 %x, 4294967297         ; <i64> [#uses=1]
+        ret i64 %tmp.1
+}
+
diff --git a/final/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll b/final/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll
new file mode 100644
index 00000000000..f8ed016f99b
--- /dev/null
+++ b/final/test/CodeGen/X86/2004-04-13-FPCMOV-Crash.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86
+
+define double @test(double %d) {
+        %X = select i1 false, double %d, double %d              ; <double> [#uses=1]
+        ret double %X
+}
+
diff --git a/final/test/CodeGen/X86/2004-06-10-StackifierCrash.ll b/final/test/CodeGen/X86/2004-06-10-StackifierCrash.ll
new file mode 100644
index 00000000000..036aa6a77f4
--- /dev/null
+++ b/final/test/CodeGen/X86/2004-06-10-StackifierCrash.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86
+
+define i1 @T(double %X) {
+        %V = fcmp oeq double %X, %X             ; <i1> [#uses=1]
+        ret i1 %V
+}
diff --git a/final/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll b/final/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll
new file mode 100644
index 00000000000..db3af0139ce
--- /dev/null
+++ b/final/test/CodeGen/X86/2004-10-08-SelectSetCCFold.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86
+
+define i1 @test(i1 %C, i1 %D, i32 %X, i32 %Y) {
+        %E = icmp slt i32 %X, %Y                ; <i1> [#uses=1]
+        %F = select i1 %C, i1 %D, i1 %E         ; <i1> [#uses=1]
+        ret i1 %F
+}
+
diff --git a/final/test/CodeGen/X86/2005-01-17-CycleInDAG.ll b/final/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
new file mode 100644
index 00000000000..fe6674da041
--- /dev/null
+++ b/final/test/CodeGen/X86/2005-01-17-CycleInDAG.ll
@@ -0,0 +1,17 @@
+; This testcase was distilled from 132.ijpeg.  Bsaically we cannot fold the
+; load into the sub instruction here as it induces a cycle in the dag, which
+; is invalid code (there is no correct way to order the instruction).  Check
+; that we do not fold the load into the sub.
+
+; RUN: llc < %s -march=x86 | not grep sub.*GLOBAL
+
+@GLOBAL = external global i32           ; <i32*> [#uses=1]
+
+define i32 @test(i32* %P1, i32* %P2, i32* %P3) nounwind {
+        %L = load i32* @GLOBAL          ; <i32> [#uses=1]
+        store i32 12, i32* %P2
+        %Y = load i32* %P3              ; <i32> [#uses=1]
+        %Z = sub i32 %Y, %L             ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
diff --git a/final/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll b/final/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
new file mode 100644
index 00000000000..30a6ac6fbdf
--- /dev/null
+++ b/final/test/CodeGen/X86/2005-02-14-IllegalAssembler.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 | not grep 18446744073709551612
+
+@A = external global i32                ; <i32*> [#uses=1]
+@Y = global i32* getelementptr (i32* @A, i32 -1)                ; <i32**> [#uses=0]
+
diff --git a/final/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll b/final/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
new file mode 100644
index 00000000000..5266009c55a
--- /dev/null
+++ b/final/test/CodeGen/X86/2005-05-08-FPStackifierPHI.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86 -mcpu=generic
+; Make sure LLC doesn't crash in the stackifier due to FP PHI nodes.
+
+define void @radfg_() {
+entry:
+        br i1 false, label %no_exit.16.preheader, label %loopentry.0
+loopentry.0:            ; preds = %entry
+        ret void
+no_exit.16.preheader:           ; preds = %entry
+        br label %no_exit.16
+no_exit.16:             ; preds = %no_exit.16, %no_exit.16.preheader
+        br i1 false, label %loopexit.16.loopexit, label %no_exit.16
+loopexit.16.loopexit:           ; preds = %no_exit.16
+        br label %no_exit.18
+no_exit.18:             ; preds = %loopexit.20, %loopexit.16.loopexit
+        %tmp.882 = fadd float 0.000000e+00, 0.000000e+00         ; <float> [#uses=2]
+        br i1 false, label %loopexit.19, label %no_exit.19.preheader
+no_exit.19.preheader:           ; preds = %no_exit.18
+        ret void
+loopexit.19:            ; preds = %no_exit.18
+        br i1 false, label %loopexit.20, label %no_exit.20
+no_exit.20:             ; preds = %loopexit.21, %loopexit.19
+        %ai2.1122.tmp.3 = phi float [ %tmp.958, %loopexit.21 ], [ %tmp.882, %loopexit.19 ]              ; <float> [#uses=1]
+        %tmp.950 = fmul float %tmp.882, %ai2.1122.tmp.3          ; <float> [#uses=1]
+        %tmp.951 = fsub float 0.000000e+00, %tmp.950             ; <float> [#uses=1]
+        %tmp.958 = fadd float 0.000000e+00, 0.000000e+00         ; <float> [#uses=1]
+        br i1 false, label %loopexit.21, label %no_exit.21.preheader
+no_exit.21.preheader:           ; preds = %no_exit.20
+        ret void
+loopexit.21:            ; preds = %no_exit.20
+        br i1 false, label %loopexit.20, label %no_exit.20
+loopexit.20:            ; preds = %loopexit.21, %loopexit.19
+        %ar2.1124.tmp.2 = phi float [ 0.000000e+00, %loopexit.19 ], [ %tmp.951, %loopexit.21 ]          ; <float> [#uses=0]
+        br i1 false, label %loopexit.18.loopexit, label %no_exit.18
+loopexit.18.loopexit:           ; preds = %loopexit.20
+        ret void
+}
+
diff --git a/final/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll b/final/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
new file mode 100644
index 00000000000..d906da43fe1
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-01-19-ISelFoldingBug.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | \
+; RUN:   grep shld | count 1
+;
+; Check that the isel does not fold the shld, which already folds a load
+; and has two uses, into a store.
+
+@A = external global i32                ; <i32*> [#uses=2]
+
+define i32 @test5(i32 %B, i8 %C) {
+        %tmp.1 = load i32* @A           ; <i32> [#uses=1]
+        %shift.upgrd.1 = zext i8 %C to i32              ; <i32> [#uses=1]
+        %tmp.2 = shl i32 %tmp.1, %shift.upgrd.1         ; <i32> [#uses=1]
+        %tmp.3 = sub i8 32, %C          ; <i8> [#uses=1]
+        %shift.upgrd.2 = zext i8 %tmp.3 to i32          ; <i32> [#uses=1]
+        %tmp.4 = lshr i32 %B, %shift.upgrd.2            ; <i32> [#uses=1]
+        %tmp.5 = or i32 %tmp.4, %tmp.2          ; <i32> [#uses=2]
+        store i32 %tmp.5, i32* @A
+        ret i32 %tmp.5
+}
+
diff --git a/final/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll b/final/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
new file mode 100644
index 00000000000..dc69ef83103
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 | not grep {subl.*%esp}
+
+define i32 @f(i32 %a, i32 %b) {
+        %tmp.2 = mul i32 %a, %a         ; <i32> [#uses=1]
+        %tmp.5 = shl i32 %a, 1          ; <i32> [#uses=1]
+        %tmp.6 = mul i32 %tmp.5, %b             ; <i32> [#uses=1]
+        %tmp.10 = mul i32 %b, %b                ; <i32> [#uses=1]
+        %tmp.7 = add i32 %tmp.10, %tmp.2                ; <i32> [#uses=1]
+        %tmp.11 = add i32 %tmp.7, %tmp.6                ; <i32> [#uses=1]
+        ret i32 %tmp.11
+}
+
diff --git a/final/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll b/final/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
new file mode 100644
index 00000000000..0421896922b
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-03-02-InstrSchedBug.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86  -stats |& \
+; RUN:   grep asm-printer | grep 7
+
+define i32 @g(i32 %a, i32 %b) nounwind {
+        %tmp.1 = shl i32 %b, 1          ; <i32> [#uses=1]
+        %tmp.3 = add i32 %tmp.1, %a             ; <i32> [#uses=1]
+        %tmp.5 = mul i32 %tmp.3, %a             ; <i32> [#uses=1]
+        %tmp.8 = mul i32 %b, %b         ; <i32> [#uses=1]
+        %tmp.9 = add i32 %tmp.5, %tmp.8         ; <i32> [#uses=1]
+        ret i32 %tmp.9
+}
+
diff --git a/final/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll b/final/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
new file mode 100644
index 00000000000..3f67097ddc0
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-04-04-CrossBlockCrash.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah
+; END.
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8.6.1"
+	%struct.GLTColor4 = type { float, float, float, float }
+	%struct.GLTCoord3 = type { float, float, float }
+	%struct.__GLIContextRec = type { { %struct.anon, { [24 x [16 x float]], [24 x [16 x float]] }, %struct.GLTColor4, { float, float, float, float, %struct.GLTCoord3, float } }, { float, float, float, float, float, float, float, float, [4 x i32], [4 x i32], [4 x i32] } }
+	%struct.__GLvertex = type { %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4, %struct.GLTCoord3, float, %struct.GLTColor4, float, float, float, i8, i8, i8, i8, [4 x float], [2 x i8*], i32, i32, [16 x %struct.GLTColor4] }
+	%struct.anon = type { float, float, float, float, float, float, float, float }
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
+
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
+
+declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
+
+define void @gleLLVMVecInterpolateClip() {
+entry:
+	br i1 false, label %cond_false, label %cond_false183
+cond_false:		; preds = %entry
+	br i1 false, label %cond_false183, label %cond_true69
+cond_true69:		; preds = %cond_false
+	ret void
+cond_false183:		; preds = %cond_false, %entry
+	%vuizmsk.0.1 = phi <4 x i32> [ < i32 -1, i32 -1, i32 -1, i32 0 >, %entry ], [ < i32 -1, i32 0, i32 0, i32 0 >, %cond_false ]		; <<4 x i32>> [#uses=2]
+	%tmp192 = extractelement <4 x i32> %vuizmsk.0.1, i32 2		; <i32> [#uses=1]
+	%tmp193 = extractelement <4 x i32> %vuizmsk.0.1, i32 3		; <i32> [#uses=2]
+	%tmp195 = insertelement <4 x i32> zeroinitializer, i32 %tmp192, i32 1		; <<4 x i32>> [#uses=1]
+	%tmp196 = insertelement <4 x i32> %tmp195, i32 %tmp193, i32 2		; <<4 x i32>> [#uses=1]
+	%tmp197 = insertelement <4 x i32> %tmp196, i32 %tmp193, i32 3		; <<4 x i32>> [#uses=1]
+	%tmp336 = and <4 x i32> zeroinitializer, %tmp197		; <<4 x i32>> [#uses=1]
+	%tmp337 = bitcast <4 x i32> %tmp336 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp378 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp337, <4 x float> zeroinitializer, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp379 = bitcast <4 x float> %tmp378 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp388 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> zeroinitializer, <4 x i32> %tmp379 )		; <<4 x i32>> [#uses=1]
+	%tmp392 = bitcast <8 x i16> %tmp388 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp399 = extractelement <8 x i16> %tmp392, i32 7		; <i16> [#uses=1]
+	%tmp423 = insertelement <8 x i16> zeroinitializer, i16 %tmp399, i32 7		; <<8 x i16>> [#uses=1]
+	%tmp427 = bitcast <8 x i16> %tmp423 to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%tmp428 = tail call i32 @llvm.x86.sse2.pmovmskb.128( <16 x i8> %tmp427 )		; <i32> [#uses=1]
+	%tmp432 = trunc i32 %tmp428 to i8		; <i8> [#uses=1]
+	%tmp = and i8 %tmp432, 42		; <i8> [#uses=1]
+	%tmp436 = bitcast i8 %tmp to i8		; <i8> [#uses=1]
+	%tmp446 = zext i8 %tmp436 to i32		; <i32> [#uses=1]
+	%tmp447 = shl i32 %tmp446, 24		; <i32> [#uses=1]
+	%tmp449 = or i32 0, %tmp447		; <i32> [#uses=1]
+	store i32 %tmp449, i32* null
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll b/final/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
new file mode 100644
index 00000000000..8783a11c060
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-04-27-ISelFoldingBug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin8 -relocation-model=static > %t
+; RUN: grep {movl	_last} %t | count 1
+; RUN: grep {cmpl.*_last} %t | count 1
+
+@block = external global i8*            ; <i8**> [#uses=1]
+@last = external global i32             ; <i32*> [#uses=3]
+
+define i1 @loadAndRLEsource_no_exit_2E_1_label_2E_0(i32 %tmp.21.reload, i32 %tmp.8) {
+newFuncRoot:
+        br label %label.0
+label.0.no_exit.1_crit_edge.exitStub:           ; preds = %label.0
+        ret i1 true
+codeRepl5.exitStub:             ; preds = %label.0
+        ret i1 false
+label.0:                ; preds = %newFuncRoot
+        %tmp.35 = load i32* @last               ; <i32> [#uses=1]
+        %inc.1 = add i32 %tmp.35, 1             ; <i32> [#uses=2]
+        store i32 %inc.1, i32* @last
+        %tmp.36 = load i8** @block              ; <i8*> [#uses=1]
+        %tmp.38 = getelementptr i8* %tmp.36, i32 %inc.1         ; <i8*> [#uses=1]
+        %tmp.40 = trunc i32 %tmp.21.reload to i8                ; <i8> [#uses=1]
+        store i8 %tmp.40, i8* %tmp.38
+        %tmp.910 = load i32* @last              ; <i32> [#uses=1]
+        %tmp.1111 = icmp slt i32 %tmp.910, %tmp.8               ; <i1> [#uses=1]
+        %tmp.1412 = icmp ne i32 %tmp.21.reload, 257             ; <i1> [#uses=1]
+        %tmp.1613 = and i1 %tmp.1111, %tmp.1412         ; <i1> [#uses=1]
+        br i1 %tmp.1613, label %label.0.no_exit.1_crit_edge.exitStub, label %codeRepl5.exitStub
+}
+
diff --git a/final/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll b/final/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
new file mode 100644
index 00000000000..b0453299669
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-05-01-SchedCausingSpills.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& \
+; RUN:   not grep {Number of register spills}
+; END.
+
+
+define i32 @foo(<4 x float>* %a, <4 x float>* %b, <4 x float>* %c, <4 x float>* %d) {
+	%tmp44 = load <4 x float>* %a		; <<4 x float>> [#uses=9]
+	%tmp46 = load <4 x float>* %b		; <<4 x float>> [#uses=1]
+	%tmp48 = load <4 x float>* %c		; <<4 x float>> [#uses=1]
+	%tmp50 = load <4 x float>* %d		; <<4 x float>> [#uses=1]
+	%tmp51 = bitcast <4 x float> %tmp44 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp = shufflevector <4 x i32> %tmp51, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>> [#uses=2]
+	%tmp52 = bitcast <4 x i32> %tmp to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp60 = xor <4 x i32> %tmp, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >		; <<4 x i32>> [#uses=1]
+	%tmp61 = bitcast <4 x i32> %tmp60 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp74 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp52, <4 x float> %tmp44, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp75 = bitcast <4 x float> %tmp74 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp88 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp61, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp89 = bitcast <4 x float> %tmp88 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp98 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp75, <4 x i32> %tmp89 )		; <<4 x i32>> [#uses=1]
+	%tmp102 = bitcast <8 x i16> %tmp98 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp.upgrd.1 = shufflevector <8 x i16> %tmp102, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp105 = shufflevector <8 x i16> %tmp.upgrd.1, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp105.upgrd.2 = bitcast <8 x i16> %tmp105 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp105.upgrd.2, <4 x float>* %a
+	%tmp108 = bitcast <4 x float> %tmp46 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp109 = shufflevector <4 x i32> %tmp108, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>> [#uses=2]
+	%tmp109.upgrd.3 = bitcast <4 x i32> %tmp109 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp119 = xor <4 x i32> %tmp109, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >		; <<4 x i32>> [#uses=1]
+	%tmp120 = bitcast <4 x i32> %tmp119 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp133 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp109.upgrd.3, <4 x float> %tmp44, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp134 = bitcast <4 x float> %tmp133 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp147 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp120, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp148 = bitcast <4 x float> %tmp147 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp159 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp134, <4 x i32> %tmp148 )		; <<4 x i32>> [#uses=1]
+	%tmp163 = bitcast <8 x i16> %tmp159 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp164 = shufflevector <8 x i16> %tmp163, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp166 = shufflevector <8 x i16> %tmp164, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp166.upgrd.4 = bitcast <8 x i16> %tmp166 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp166.upgrd.4, <4 x float>* %b
+	%tmp169 = bitcast <4 x float> %tmp48 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp170 = shufflevector <4 x i32> %tmp169, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>> [#uses=2]
+	%tmp170.upgrd.5 = bitcast <4 x i32> %tmp170 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp180 = xor <4 x i32> %tmp170, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >		; <<4 x i32>> [#uses=1]
+	%tmp181 = bitcast <4 x i32> %tmp180 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp194 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp170.upgrd.5, <4 x float> %tmp44, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp195 = bitcast <4 x float> %tmp194 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp208 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp181, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp209 = bitcast <4 x float> %tmp208 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp220 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp195, <4 x i32> %tmp209 )		; <<4 x i32>> [#uses=1]
+	%tmp224 = bitcast <8 x i16> %tmp220 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp225 = shufflevector <8 x i16> %tmp224, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp227 = shufflevector <8 x i16> %tmp225, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp227.upgrd.6 = bitcast <8 x i16> %tmp227 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp227.upgrd.6, <4 x float>* %c
+	%tmp230 = bitcast <4 x float> %tmp50 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp231 = shufflevector <4 x i32> %tmp230, <4 x i32> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x i32>> [#uses=2]
+	%tmp231.upgrd.7 = bitcast <4 x i32> %tmp231 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp241 = xor <4 x i32> %tmp231, < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >		; <<4 x i32>> [#uses=1]
+	%tmp242 = bitcast <4 x i32> %tmp241 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp255 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp231.upgrd.7, <4 x float> %tmp44, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp256 = bitcast <4 x float> %tmp255 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp269 = tail call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %tmp44, <4 x float> %tmp242, i8 1 )		; <<4 x float>> [#uses=1]
+	%tmp270 = bitcast <4 x float> %tmp269 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp281 = tail call <8 x i16> @llvm.x86.sse2.packssdw.128( <4 x i32> %tmp256, <4 x i32> %tmp270 )		; <<4 x i32>> [#uses=1]
+	%tmp285 = bitcast <8 x i16> %tmp281 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp286 = shufflevector <8 x i16> %tmp285, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 4, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp288 = shufflevector <8 x i16> %tmp286, <8 x i16> undef, <8 x i32> < i32 2, i32 1, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp288.upgrd.8 = bitcast <8 x i16> %tmp288 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp288.upgrd.8, <4 x float>* %d
+	ret i32 0
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
+
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
diff --git a/final/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/final/test/CodeGen/X86/2006-05-02-InstrSched1.ll
new file mode 100644
index 00000000000..7d0a6ab0a04
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-05-02-InstrSched1.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -relocation-model=static -stats |& \
+; RUN:   grep asm-printer | grep 14
+;
+@size20 = external global i32		; <i32*> [#uses=1]
+@in5 = external global i8*		; <i8**> [#uses=1]
+
+define i32 @compare(i8* %a, i8* %b) nounwind {
+	%tmp = bitcast i8* %a to i32*		; <i32*> [#uses=1]
+	%tmp1 = bitcast i8* %b to i32*		; <i32*> [#uses=1]
+	%tmp.upgrd.1 = load i32* @size20		; <i32> [#uses=1]
+	%tmp.upgrd.2 = load i8** @in5		; <i8*> [#uses=2]
+	%tmp3 = load i32* %tmp1		; <i32> [#uses=1]
+	%gep.upgrd.3 = zext i32 %tmp3 to i64		; <i64> [#uses=1]
+	%tmp4 = getelementptr i8* %tmp.upgrd.2, i64 %gep.upgrd.3		; <i8*> [#uses=2]
+	%tmp7 = load i32* %tmp		; <i32> [#uses=1]
+	%gep.upgrd.4 = zext i32 %tmp7 to i64		; <i64> [#uses=1]
+	%tmp8 = getelementptr i8* %tmp.upgrd.2, i64 %gep.upgrd.4		; <i8*> [#uses=2]
+	%tmp.upgrd.5 = tail call i32 @memcmp( i8* %tmp8, i8* %tmp4, i32 %tmp.upgrd.1 )		; <i32> [#uses=1]
+	ret i32 %tmp.upgrd.5
+}
+
+declare i32 @memcmp(i8*, i8*, i32)
+
diff --git a/final/test/CodeGen/X86/2006-05-02-InstrSched2.ll b/final/test/CodeGen/X86/2006-05-02-InstrSched2.ll
new file mode 100644
index 00000000000..23954d76a5d
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-05-02-InstrSched2.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -stats  |& \
+; RUN:   grep asm-printer | grep 13
+
+define void @_ZN9__gnu_cxx9hashtableISt4pairIKPKciES3_NS_4hashIS3_EESt10_Select1stIS5_E5eqstrSaIiEE14find_or_insertERKS5__cond_true456.i(i8* %tmp435.i, i32* %tmp449.i.out) nounwind {
+newFuncRoot:
+	br label %cond_true456.i
+bb459.i.exitStub:		; preds = %cond_true456.i
+	store i32 %tmp449.i, i32* %tmp449.i.out
+	ret void
+cond_true456.i:		; preds = %cond_true456.i, %newFuncRoot
+	%__s441.2.4.i = phi i8* [ %tmp451.i.upgrd.1, %cond_true456.i ], [ %tmp435.i, %newFuncRoot ]		; <i8*> [#uses=2]
+	%__h.2.4.i = phi i32 [ %tmp449.i, %cond_true456.i ], [ 0, %newFuncRoot ]	; <i32> [#uses=1]
+	%tmp446.i = mul i32 %__h.2.4.i, 5		; <i32> [#uses=1]
+	%tmp.i = load i8* %__s441.2.4.i		; <i8> [#uses=1]
+	%tmp448.i = sext i8 %tmp.i to i32		; <i32> [#uses=1]
+	%tmp449.i = add i32 %tmp448.i, %tmp446.i		; <i32> [#uses=2]
+	%tmp450.i = ptrtoint i8* %__s441.2.4.i to i32		; <i32> [#uses=1]
+	%tmp451.i = add i32 %tmp450.i, 1		; <i32> [#uses=1]
+	%tmp451.i.upgrd.1 = inttoptr i32 %tmp451.i to i8*		; <i8*> [#uses=2]
+	%tmp45435.i = load i8* %tmp451.i.upgrd.1		; <i8> [#uses=1]
+	%tmp45536.i = icmp eq i8 %tmp45435.i, 0		; <i1> [#uses=1]
+	br i1 %tmp45536.i, label %bb459.i.exitStub, label %cond_true456.i
+}
+
diff --git a/final/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll b/final/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
new file mode 100644
index 00000000000..8421483ecb5
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-05-08-CoalesceSubRegClass.ll
@@ -0,0 +1,25 @@
+; Coalescing from R32 to a subset R32_. Once another register coalescer bug is
+; fixed, the movb should go away as well.
+
+; RUN: llc < %s -march=x86 -relocation-model=static | \
+; RUN:   grep movl
+
+@B = external global i32		; <i32*> [#uses=2]
+@C = external global i16*		; <i16**> [#uses=2]
+
+define void @test(i32 %A) {
+	%A.upgrd.1 = trunc i32 %A to i8		; <i8> [#uses=1]
+	%tmp2 = load i32* @B		; <i32> [#uses=1]
+	%tmp3 = and i8 %A.upgrd.1, 16		; <i8> [#uses=1]
+	%shift.upgrd.2 = zext i8 %tmp3 to i32		; <i32> [#uses=1]
+	%tmp4 = shl i32 %tmp2, %shift.upgrd.2		; <i32> [#uses=1]
+	store i32 %tmp4, i32* @B
+	%tmp6 = lshr i32 %A, 3		; <i32> [#uses=1]
+	%tmp = load i16** @C		; <i16*> [#uses=1]
+	%tmp8 = ptrtoint i16* %tmp to i32		; <i32> [#uses=1]
+	%tmp9 = add i32 %tmp8, %tmp6		; <i32> [#uses=1]
+	%tmp9.upgrd.3 = inttoptr i32 %tmp9 to i16*		; <i16*> [#uses=1]
+	store i16* %tmp9.upgrd.3, i16** @C
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/2006-05-08-InstrSched.ll b/final/test/CodeGen/X86/2006-05-08-InstrSched.ll
new file mode 100644
index 00000000000..d58d638562c
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-05-08-InstrSched.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -relocation-model=static | not grep {subl.*%esp}
+
+@A = external global i16*		; <i16**> [#uses=1]
+@B = external global i32		; <i32*> [#uses=1]
+@C = external global i32		; <i32*> [#uses=2]
+
+define void @test() {
+	%tmp = load i16** @A		; <i16*> [#uses=1]
+	%tmp1 = getelementptr i16* %tmp, i32 1		; <i16*> [#uses=1]
+	%tmp.upgrd.1 = load i16* %tmp1		; <i16> [#uses=1]
+	%tmp3 = zext i16 %tmp.upgrd.1 to i32		; <i32> [#uses=1]
+	%tmp.upgrd.2 = load i32* @B		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp.upgrd.2, 16		; <i32> [#uses=1]
+	%tmp5 = load i32* @C		; <i32> [#uses=1]
+	%tmp6 = trunc i32 %tmp4 to i8		; <i8> [#uses=2]
+	%shift.upgrd.3 = zext i8 %tmp6 to i32		; <i32> [#uses=1]
+	%tmp7 = shl i32 %tmp5, %shift.upgrd.3		; <i32> [#uses=1]
+	%tmp9 = xor i8 %tmp6, 16		; <i8> [#uses=1]
+	%shift.upgrd.4 = zext i8 %tmp9 to i32		; <i32> [#uses=1]
+	%tmp11 = lshr i32 %tmp3, %shift.upgrd.4		; <i32> [#uses=1]
+	%tmp12 = or i32 %tmp11, %tmp7		; <i32> [#uses=1]
+	store i32 %tmp12, i32* @C
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/2006-05-11-InstrSched.ll b/final/test/CodeGen/X86/2006-05-11-InstrSched.ll
new file mode 100644
index 00000000000..56d6aa960e2
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-05-11-InstrSched.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
+; RUN:     grep {asm-printer} | grep 34
+
+target datalayout = "e-p:32:32"
+define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
+entry:
+	%tmp9 = icmp slt i32 %M, 5		; <i1> [#uses=1]
+	br i1 %tmp9, label %return, label %cond_true
+
+cond_true:		; preds = %cond_true, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%tmp. = shl i32 %indvar, 2		; <i32> [#uses=1]
+	%tmp.10 = add nsw i32 %tmp., 1		; <i32> [#uses=2]
+	%tmp31 = add nsw i32 %tmp.10, -1		; <i32> [#uses=4]
+	%tmp32 = getelementptr i32* %mpp, i32 %tmp31		; <i32*> [#uses=1]
+	%tmp34 = bitcast i32* %tmp32 to <16 x i8>*		; <i8*> [#uses=1]
+	%tmp = load <16 x i8>* %tmp34, align 1
+	%tmp42 = getelementptr i32* %tpmm, i32 %tmp31		; <i32*> [#uses=1]
+	%tmp42.upgrd.1 = bitcast i32* %tmp42 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%tmp46 = load <4 x i32>* %tmp42.upgrd.1		; <<4 x i32>> [#uses=1]
+	%tmp54 = bitcast <16 x i8> %tmp to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp55 = add <4 x i32> %tmp54, %tmp46		; <<4 x i32>> [#uses=2]
+	%tmp55.upgrd.2 = bitcast <4 x i32> %tmp55 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp62 = getelementptr i32* %ip, i32 %tmp31		; <i32*> [#uses=1]
+	%tmp65 = bitcast i32* %tmp62 to <16 x i8>*		; <i8*> [#uses=1]
+	%tmp66 = load <16 x i8>* %tmp65, align 1
+	%tmp73 = getelementptr i32* %tpim, i32 %tmp31		; <i32*> [#uses=1]
+	%tmp73.upgrd.3 = bitcast i32* %tmp73 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%tmp77 = load <4 x i32>* %tmp73.upgrd.3		; <<4 x i32>> [#uses=1]
+	%tmp87 = bitcast <16 x i8> %tmp66 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp88 = add <4 x i32> %tmp87, %tmp77		; <<4 x i32>> [#uses=2]
+	%tmp88.upgrd.4 = bitcast <4 x i32> %tmp88 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp99 = tail call <4 x i32> @llvm.x86.sse2.pcmpgt.d( <4 x i32> %tmp88, <4 x i32> %tmp55 )		; <<4 x i32>> [#uses=1]
+	%tmp99.upgrd.5 = bitcast <4 x i32> %tmp99 to <2 x i64>		; <<2 x i64>> [#uses=2]
+	%tmp110 = xor <2 x i64> %tmp99.upgrd.5, < i64 -1, i64 -1 >		; <<2 x i64>> [#uses=1]
+	%tmp111 = and <2 x i64> %tmp110, %tmp55.upgrd.2		; <<2 x i64>> [#uses=1]
+	%tmp121 = and <2 x i64> %tmp99.upgrd.5, %tmp88.upgrd.4		; <<2 x i64>> [#uses=1]
+	%tmp131 = or <2 x i64> %tmp121, %tmp111		; <<2 x i64>> [#uses=1]
+	%tmp137 = getelementptr i32* %mc, i32 %tmp.10		; <i32*> [#uses=1]
+	%tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>*		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7
+	%tmp147 = add nsw i32 %tmp.10, 8		; <i32> [#uses=1]
+	%tmp.upgrd.8 = icmp ne i32 %tmp147, %M		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp.upgrd.8, label %cond_true, label %return
+
+return:		; preds = %cond_true, %entry
+	ret void
+}
+
+declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>)
diff --git a/final/test/CodeGen/X86/2006-05-17-VectorArg.ll b/final/test/CodeGen/X86/2006-05-17-VectorArg.ll
new file mode 100644
index 00000000000..b36d61e0f31
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-05-17-VectorArg.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define <4 x float> @opRSQ(<4 x float> %a) nounwind {
+entry:
+	%tmp2 = extractelement <4 x float> %a, i32 3		; <float> [#uses=2]
+	%abscond = fcmp oge float %tmp2, -0.000000e+00		; <i1> [#uses=1]
+	%abs = select i1 %abscond, float %tmp2, float 0.000000e+00		; <float> [#uses=1]
+	%tmp3 = tail call float @llvm.sqrt.f32( float %abs )		; <float> [#uses=1]
+	%tmp4 = fdiv float 1.000000e+00, %tmp3		; <float> [#uses=1]
+	%tmp11 = insertelement <4 x float> zeroinitializer, float %tmp4, i32 3		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp11
+}
+
+declare float @llvm.sqrt.f32(float)
+
diff --git a/final/test/CodeGen/X86/2006-05-22-FPSetEQ.ll b/final/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
new file mode 100644
index 00000000000..35b0159d39c
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-05-22-FPSetEQ.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | grep setnp
+; RUN: llc < %s -march=x86 -enable-unsafe-fp-math -enable-no-nans-fp-math | \
+; RUN:   not grep setnp
+
+define i32 @test(float %f) {
+	%tmp = fcmp oeq float %f, 0.000000e+00		; <i1> [#uses=1]
+	%tmp.upgrd.1 = zext i1 %tmp to i32		; <i32> [#uses=1]
+	ret i32 %tmp.upgrd.1
+}
+
diff --git a/final/test/CodeGen/X86/2006-05-25-CycleInDAG.ll b/final/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
new file mode 100644
index 00000000000..0288278d626
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-05-25-CycleInDAG.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86
+
+define i32 @test() {
+	br i1 false, label %cond_next33, label %cond_true12
+cond_true12:		; preds = %0
+	ret i32 0
+cond_next33:		; preds = %0
+	%tmp44.i = call double @foo( double 0.000000e+00, i32 32 )		; <double> [#uses=1]
+	%tmp61.i = load i8* null		; <i8> [#uses=1]
+	%tmp61.i.upgrd.1 = zext i8 %tmp61.i to i32		; <i32> [#uses=1]
+	%tmp58.i = or i32 0, %tmp61.i.upgrd.1		; <i32> [#uses=1]
+	%tmp62.i = or i32 %tmp58.i, 0		; <i32> [#uses=1]
+	%tmp62.i.upgrd.2 = sitofp i32 %tmp62.i to double		; <double> [#uses=1]
+	%tmp64.i = fadd double %tmp62.i.upgrd.2, %tmp44.i		; <double> [#uses=1]
+	%tmp68.i = call double @foo( double %tmp64.i, i32 0 )		; <double> [#uses=0]
+	ret i32 0
+}
+
+declare double @foo(double, i32)
+
diff --git a/final/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll b/final/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll
new file mode 100644
index 00000000000..4ea364d57e5
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-07-10-InlineAsmAConstraint.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86
+; PR825
+
+define i64 @test() {
+	%tmp.i5 = call i64 asm sideeffect "rdtsc", "=A,~{dirflag},~{fpsr},~{flags}"( )		; <i64> [#uses=1]
+	ret i64 %tmp.i5
+}
+
diff --git a/final/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll b/final/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll
new file mode 100644
index 00000000000..568fbbcc4f4
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-07-12-InlineAsmQConstraint.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86
+; PR828
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define void @_ZN5() {
+cond_true9:
+	%tmp3.i.i = call i32 asm sideeffect "lock; cmpxchg $1,$2", "={ax},q,m,0,~{dirflag},~{fpsr},~{flags},~{memory}"( i32 0, i32* null, i32 0 )		; <i32> [#uses=0]
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/2006-07-19-ATTAsm.ll b/final/test/CodeGen/X86/2006-07-19-ATTAsm.ll
new file mode 100644
index 00000000000..c8fd10f7009
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-07-19-ATTAsm.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att
+; PR834
+; END.
+
+target datalayout = "e-p:32:32"
+target triple = "i386-unknown-freebsd6.1"
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%llvm.dbg.basictype.type = type { i32, {  }*, i8*, {  }*, i32, i64, i64, i64, i32, i32 }
+	%llvm.dbg.compile_unit.type = type { i32, {  }*, i32, i8*, i8*, i8* }
+	%llvm.dbg.global_variable.type = type { i32, {  }*, {  }*, i8*, i8 *, i8*, {  }*, i32, {  }*, i1, i1, {  }* }
+@x = global i32 0		; <i32*> [#uses=1]
+@llvm.dbg.global_variable = internal constant %llvm.dbg.global_variable.type {
+    i32 327732,
+    {  }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.global_variables to {  }*), 
+    {  }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to {  }*), 
+    i8* getelementptr ([2 x i8]* @str, i64 0, i64 0), 
+    i8* getelementptr ([2 x i8]* @str, i64 0, i64 0), 
+    i8* null, 
+    {  }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to {  }*), 
+    i32 1, 
+    {  }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to {  }*), 
+    i1 false, 
+    i1 true, 
+    {  }* bitcast (i32* @x to {  }*) }, section "llvm.metadata"		; <%llvm.dbg.global_variable.type*> [#uses=0]
+@llvm.dbg.global_variables = linkonce constant %llvm.dbg.anchor.type { i32 327680, i32 52 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type {
+    i32 327697, 
+    {  }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to {  }*), 
+    i32 4, 
+    i8* getelementptr ([10 x i8]* @str1, i64 0, i64 0), 
+    i8* getelementptr ([32 x i8]* @str2, i64 0, i64 0), 
+    i8* getelementptr ([45 x i8]* @str3, i64 0, i64 0) }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 327680, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@str1 = internal constant [10 x i8] c"testb.cpp\00", section "llvm.metadata"		; <[10 x i8]*> [#uses=1]
+@str2 = internal constant [32 x i8] c"/Sources/Projects/DwarfTesting/\00", section "llvm.metadata"		; <[32 x i8]*> [#uses=1]
+@str3 = internal constant [45 x i8] c"4.0.1 LLVM (Apple Computer, Inc. build 5400)\00", section "llvm.metadata"		; <[45 x i8]*> [#uses=1]
+@str = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type {
+    i32 327716, 
+    {  }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to {  }*), 
+    i8* getelementptr ([4 x i8]* @str4, i64 0, i64 0), 
+    {  }* null, 
+    i32 0, 
+    i64 32, 
+    i64 32, 
+    i64 0, 
+    i32 0, 
+    i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
diff --git a/final/test/CodeGen/X86/2006-07-20-InlineAsm.ll b/final/test/CodeGen/X86/2006-07-20-InlineAsm.ll
new file mode 100644
index 00000000000..cac47cdab6d
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-07-20-InlineAsm.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86
+; PR833
+
+@G = weak global i32 0		; <i32*> [#uses=3]
+
+define i32 @foo(i32 %X) {
+entry:
+	%X_addr = alloca i32		; <i32*> [#uses=3]
+	store i32 %X, i32* %X_addr
+	call void asm sideeffect "xchg{l} {$0,$1|$1,$0}", "=*m,=*r,m,1,~{dirflag},~{fpsr},~{flags}"( i32* @G, i32* %X_addr, i32* @G, i32 %X )
+	%tmp1 = load i32* %X_addr		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+define i32 @foo2(i32 %X) {
+entry:
+	%X_addr = alloca i32		; <i32*> [#uses=3]
+	store i32 %X, i32* %X_addr
+	call void asm sideeffect "xchg{l} {$0,$1|$1,$0}", "=*m,=*r,1,~{dirflag},~{fpsr},~{flags}"( i32* @G, i32* %X_addr, i32 %X )
+	%tmp1 = load i32* %X_addr		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
diff --git a/final/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll b/final/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll
new file mode 100644
index 00000000000..deae086cf76
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-07-28-AsmPrint-Long-As-Pointer.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 | grep -- 4294967240
+; PR853
+
+@X = global i32* inttoptr (i64 -56 to i32*)		; <i32**> [#uses=0]
+
diff --git a/final/test/CodeGen/X86/2006-07-31-SingleRegClass.ll b/final/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
new file mode 100644
index 00000000000..3159cec8553
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-07-31-SingleRegClass.ll
@@ -0,0 +1,10 @@
+; PR850
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att > %t
+; RUN: grep {movl 4(%eax),%ebp} %t
+; RUN: grep {movl 0(%eax), %ebx} %t
+
+define i32 @foo(i32 %__s.i.i, i32 %tmp5.i.i, i32 %tmp6.i.i, i32 %tmp7.i.i, i32 %tmp8.i.i) {
+	%tmp9.i.i = call i32 asm sideeffect "push %ebp\0Apush %ebx\0Amovl 4($2),%ebp\0Amovl 0($2), %ebx\0Amovl $1,%eax\0Aint  $$0x80\0Apop  %ebx\0Apop %ebp", "={ax},i,0,{cx},{dx},{si},{di}"( i32 192, i32 %__s.i.i, i32 %tmp5.i.i, i32 %tmp6.i.i, i32 %tmp7.i.i, i32 %tmp8.i.i )		; <i32> [#uses=1]
+	ret i32 %tmp9.i.i
+}
+
diff --git a/final/test/CodeGen/X86/2006-08-07-CycleInDAG.ll b/final/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
new file mode 100644
index 00000000000..aea707ee8fe
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-08-07-CycleInDAG.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+	%struct.foo = type opaque
+
+define fastcc i32 @test(%struct.foo* %v, %struct.foo* %vi) {
+	br i1 false, label %ilog2.exit, label %cond_true.i
+
+cond_true.i:		; preds = %0
+	ret i32 0
+
+ilog2.exit:		; preds = %0
+	%tmp24.i = load i32* null		; <i32> [#uses=1]
+	%tmp13.i12.i = tail call double @ldexp( double 0.000000e+00, i32 0 )		; <double> [#uses=1]
+	%tmp13.i13.i = fptrunc double %tmp13.i12.i to float		; <float> [#uses=1]
+	%tmp11.s = load i32* null		; <i32> [#uses=1]
+	%tmp11.i = bitcast i32 %tmp11.s to i32		; <i32> [#uses=1]
+	%n.i = bitcast i32 %tmp24.i to i32		; <i32> [#uses=1]
+	%tmp13.i7 = mul i32 %tmp11.i, %n.i		; <i32> [#uses=1]
+	%tmp.i8 = tail call i8* @calloc( i32 %tmp13.i7, i32 4 )		; <i8*> [#uses=0]
+	br i1 false, label %bb224.preheader.i, label %bb.i
+
+bb.i:		; preds = %ilog2.exit
+	ret i32 0
+
+bb224.preheader.i:		; preds = %ilog2.exit
+	%tmp165.i = fpext float %tmp13.i13.i to double		; <double> [#uses=0]
+	ret i32 0
+}
+
+declare i8* @calloc(i32, i32)
+
+declare double @ldexp(double, i32)
diff --git a/final/test/CodeGen/X86/2006-08-16-CycleInDAG.ll b/final/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
new file mode 100644
index 00000000000..5fee326d530
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-08-16-CycleInDAG.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86
+	%struct.expr = type { %struct.rtx_def*, i32, %struct.expr*, %struct.occr*, %struct.occr*, %struct.rtx_def* }
+	%struct.hash_table = type { %struct.expr**, i32, i32, i32 }
+	%struct.occr = type { %struct.occr*, %struct.rtx_def*, i8, i8 }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.u = type { [1 x i64] }
+
+define void @test() {
+	%tmp = load i32* null		; <i32> [#uses=1]
+	%tmp8 = call i32 @hash_rtx( )		; <i32> [#uses=1]
+	%tmp11 = urem i32 %tmp8, %tmp		; <i32> [#uses=1]
+	br i1 false, label %cond_next, label %return
+
+cond_next:		; preds = %0
+	%gep.upgrd.1 = zext i32 %tmp11 to i64		; <i64> [#uses=1]
+	%tmp17 = getelementptr %struct.expr** null, i64 %gep.upgrd.1		; <%struct.expr**> [#uses=0]
+	ret void
+
+return:		; preds = %0
+	ret void
+}
+
+declare i32 @hash_rtx()
diff --git a/final/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll b/final/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
new file mode 100644
index 00000000000..a19d8f7092c
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-08-21-ExtraMovInst.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mcpu=i386 | \
+; RUN:    not grep {movl %eax, %edx}
+
+define i32 @foo(i32 %t, i32 %C) {
+entry:
+        br label %cond_true
+
+cond_true:              ; preds = %cond_true, %entry
+        %t_addr.0.0 = phi i32 [ %t, %entry ], [ %tmp7, %cond_true ]             ; <i32> [#uses=2]
+        %tmp7 = add i32 %t_addr.0.0, 1          ; <i32> [#uses=1]
+        %tmp = icmp sgt i32 %C, 39              ; <i1> [#uses=1]
+        br i1 %tmp, label %bb12, label %cond_true
+
+bb12:           ; preds = %cond_true
+        ret i32 %t_addr.0.0
+}
+
diff --git a/final/test/CodeGen/X86/2006-09-01-CycleInDAG.ll b/final/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
new file mode 100644
index 00000000000..1e890bbc02e
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-09-01-CycleInDAG.ll
@@ -0,0 +1,131 @@
+; RUN: llc < %s -march=x86
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8"
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
+	%struct.VEC_tree = type { i32, i32, [1 x %struct.tree_node*] }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct._var_map = type { %struct.partition_def*, i32*, i32*, %struct.tree_node**, i32, i32, i32* }
+	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
+	%struct.bb_ann_d = type { %struct.tree_node*, i8, %struct.edge_prediction* }
+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+	%struct.bitmap_iterator = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, i32 }
+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+	%struct.block_stmt_iterator = type { %struct.tree_stmt_iterator, %struct.basic_block_def* }
+	%struct.coalesce_list_d = type { %struct._var_map*, %struct.partition_pair_d**, i1 }
+	%struct.conflict_graph_def = type opaque
+	%struct.dataflow_d = type { %struct.varray_head_tag*, [2 x %struct.tree_node*] }
+	%struct.def_operand_ptr = type { %struct.tree_node** }
+	%struct.def_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
+	%struct.die_struct = type opaque
+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.__sbuf*, i32, i32, i64, i32 }
+	%struct.edge_def_insns = type { %struct.rtx_def* }
+	%struct.edge_iterator = type { i32, %struct.VEC_edge** }
+	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.elt_list = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.__sbuf, i32, i8*, %struct.rtx_def** }
+	%struct.et_node = type opaque
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i1, i1, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.__sbuf, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
+	%struct.ht_identifier = type { i8*, i32, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type opaque
+	%struct.lang_type = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.loop = type opaque
+	%struct.machine_function = type { i32, i32, i8*, i32, i32 }
+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+	%struct.partition_def = type { i32, [1 x %struct.partition_elem] }
+	%struct.partition_elem = type { i32, %struct.partition_elem*, i32 }
+	%struct.partition_pair_d = type { i32, i32, i32, %struct.partition_pair_d* }
+	%struct.phi_arg_d = type { %struct.tree_node*, i1 }
+	%struct.pointer_set_t = type opaque
+	%struct.ptr_info_def = type { i8, %struct.bitmap_head_def*, %struct.tree_node* }
+	%struct.real_value = type opaque
+	%struct.reg_info_def = type opaque
+	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
+	%struct.rtvec_def = type opaque
+	%struct.rtx_def = type opaque
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.simple_bitmap_def = type { i32, i32, i32, [1 x i64] }
+	%struct.ssa_op_iter = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.stmt_operands_d*, i1 }
+	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.dataflow_d*, %struct.bitmap_head_def*, i32 }
+	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.def_optype_d*, %struct.v_may_def_optype_d*, %struct.vuse_optype_d*, %struct.v_may_def_optype_d* }
+	%struct.temp_slot = type opaque
+	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
+	%struct.tree_ann_d = type { %struct.stmt_ann_d }
+	%struct.tree_binfo = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree }
+	%struct.tree_block = type { %struct.tree_common, i8, [3 x i8], %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_complex = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_decl = type { %struct.tree_common, %struct.__sbuf, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u1_a = type { i32 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_exp = type { %struct.tree_common, %struct.__sbuf*, i32, %struct.tree_node*, [1 x %struct.tree_node*] }
+	%struct.tree_identifier = type { %struct.tree_common, %struct.ht_identifier }
+	%struct.tree_int_cst = type { %struct.tree_common, %struct.tree_int_cst_lowhi }
+	%struct.tree_int_cst_lowhi = type { i64, i64 }
+	%struct.tree_list = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_live_info_d = type { %struct._var_map*, %struct.bitmap_head_def*, %struct.bitmap_head_def**, i32, %struct.bitmap_head_def** }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.tree_partition_associator_d = type { %struct.varray_head_tag*, %struct.varray_head_tag*, i32*, i32*, i32, i32, %struct._var_map* }
+	%struct.tree_phi_node = type { %struct.tree_common, %struct.tree_node*, i32, i32, i32, %struct.basic_block_def*, %struct.dataflow_d*, [1 x %struct.phi_arg_d] }
+	%struct.tree_real_cst = type { %struct.tree_common, %struct.real_value* }
+	%struct.tree_ssa_name = type { %struct.tree_common, %struct.tree_node*, i32, %struct.ptr_info_def*, %struct.tree_node*, i8* }
+	%struct.tree_statement_list = type { %struct.tree_common, %struct.tree_statement_list_node*, %struct.tree_statement_list_node* }
+	%struct.tree_statement_list_node = type { %struct.tree_statement_list_node*, %struct.tree_statement_list_node*, %struct.tree_node* }
+	%struct.tree_stmt_iterator = type { %struct.tree_statement_list_node*, %struct.tree_node* }
+	%struct.tree_string = type { %struct.tree_common, i32, [1 x i8] }
+	%struct.tree_type = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32, i16, i8, i8, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_decl_u1_a, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_type* }
+	%struct.tree_type_symtab = type { i32 }
+	%struct.tree_value_handle = type { %struct.tree_common, %struct.value_set*, i32 }
+	%struct.tree_vec = type { %struct.tree_common, i32, [1 x %struct.tree_node*] }
+	%struct.tree_vector = type { %struct.tree_common, %struct.tree_node* }
+	%struct.use_operand_ptr = type { %struct.tree_node** }
+	%struct.use_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
+	%struct.v_def_use_operand_type_t = type { %struct.tree_node*, %struct.tree_node* }
+	%struct.v_may_def_optype_d = type { i32, [1 x %struct.v_def_use_operand_type_t] }
+	%struct.v_must_def_optype_d = type { i32, [1 x %struct.v_def_use_operand_type_t] }
+	%struct.value_set = type opaque
+	%struct.var_ann_d = type { %struct.tree_ann_common_d, i8, i8, %struct.tree_node*, %struct.varray_head_tag*, i32, i32, i32, %struct.tree_node*, %struct.tree_node* }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_data = type { [1 x i64] }
+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.varray_data }
+	%struct.vuse_optype_d = type { i32, [1 x %struct.tree_node*] }
+@basic_block_info = external global %struct.varray_head_tag*		; <%struct.varray_head_tag**> [#uses=1]
+
+define void @calculate_live_on_entry_cond_true3632(%struct.varray_head_tag* %stack3023.6, i32* %tmp3629, %struct.VEC_edge*** %tmp3397.out) {
+newFuncRoot:
+	br label %cond_true3632
+
+bb3502.exitStub:		; preds = %cond_true3632
+	store %struct.VEC_edge** %tmp3397, %struct.VEC_edge*** %tmp3397.out
+	ret void
+
+cond_true3632:		; preds = %newFuncRoot
+	%tmp3378 = load i32* %tmp3629		; <i32> [#uses=1]
+	%tmp3379 = add i32 %tmp3378, -1		; <i32> [#uses=1]
+	%tmp3381 = getelementptr %struct.varray_head_tag* %stack3023.6, i32 0, i32 4		; <%struct.varray_data*> [#uses=1]
+	%tmp3382 = bitcast %struct.varray_data* %tmp3381 to [1 x i32]*		; <[1 x i32]*> [#uses=1]
+	%gep.upgrd.1 = zext i32 %tmp3379 to i64		; <i64> [#uses=1]
+	%tmp3383 = getelementptr [1 x i32]* %tmp3382, i32 0, i64 %gep.upgrd.1		; <i32*> [#uses=1]
+	%tmp3384 = load i32* %tmp3383		; <i32> [#uses=1]
+	%tmp3387 = load i32* %tmp3629		; <i32> [#uses=1]
+	%tmp3388 = add i32 %tmp3387, -1		; <i32> [#uses=1]
+	store i32 %tmp3388, i32* %tmp3629
+	%tmp3391 = load %struct.varray_head_tag** @basic_block_info		; <%struct.varray_head_tag*> [#uses=1]
+	%tmp3393 = getelementptr %struct.varray_head_tag* %tmp3391, i32 0, i32 4		; <%struct.varray_data*> [#uses=1]
+	%tmp3394 = bitcast %struct.varray_data* %tmp3393 to [1 x %struct.basic_block_def*]*		; <[1 x %struct.basic_block_def*]*> [#uses=1]
+	%tmp3395 = getelementptr [1 x %struct.basic_block_def*]* %tmp3394, i32 0, i32 %tmp3384		; <%struct.basic_block_def**> [#uses=1]
+	%tmp3396 = load %struct.basic_block_def** %tmp3395		; <%struct.basic_block_def*> [#uses=1]
+	%tmp3397 = getelementptr %struct.basic_block_def* %tmp3396, i32 0, i32 3		; <%struct.VEC_edge**> [#uses=1]
+	br label %bb3502.exitStub
+}
diff --git a/final/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll b/final/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
new file mode 100644
index 00000000000..795d4647a3f
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-10-02-BoolRetCrash.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s 
+; PR933
+
+define fastcc i1 @test() {
+        ret i1 true
+}
+
diff --git a/final/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll b/final/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
new file mode 100644
index 00000000000..d09d0614769
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-10-07-ScalarSSEMiscompile.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=sse | grep movaps
+; Test that the load is NOT folded into the intrinsic, which would zero the top
+; elts of the loaded vector.
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8.7.2"
+
+define <4 x float> @test(<4 x float> %A, <4 x float>* %B) nounwind {
+        %BV = load <4 x float>* %B              ; <<4 x float>> [#uses=1]
+        %tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %A, <4 x float> %BV )       ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp28
+}
+
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
+
diff --git a/final/test/CodeGen/X86/2006-10-09-CycleInDAG.ll b/final/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
new file mode 100644
index 00000000000..fbb14ee1615
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-10-09-CycleInDAG.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86
+
+define void @_ZN13QFSFileEngine4readEPcx() {
+	%tmp201 = load i32* null		; <i32> [#uses=1]
+	%tmp201.upgrd.1 = sext i32 %tmp201 to i64		; <i64> [#uses=1]
+	%tmp202 = load i64* null		; <i64> [#uses=1]
+	%tmp203 = add i64 %tmp201.upgrd.1, %tmp202		; <i64> [#uses=1]
+	store i64 %tmp203, i64* null
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll b/final/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
new file mode 100644
index 00000000000..b1f04518aca
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-10-10-FindModifiedNodeSlotBug.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 | grep shrl
+; Bug in FindModifiedNodeSlot cause tmp14 load to become a zextload and shr 31
+; is then optimized away.
+@tree_code_type = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
+
+define void @copy_if_shared_r() {
+	%tmp = load i32* null		; <i32> [#uses=1]
+	%tmp56 = and i32 %tmp, 255		; <i32> [#uses=1]
+	%gep.upgrd.1 = zext i32 %tmp56 to i64		; <i64> [#uses=1]
+	%tmp8 = getelementptr [0 x i32]* @tree_code_type, i32 0, i64 %gep.upgrd.1	; <i32*> [#uses=1]
+	%tmp9 = load i32* %tmp8		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, -1		; <i32> [#uses=1]
+	%tmp.upgrd.2 = icmp ugt i32 %tmp10, 2		; <i1> [#uses=1]
+	%tmp14 = load i32* null		; <i32> [#uses=1]
+	%tmp15 = lshr i32 %tmp14, 31		; <i32> [#uses=1]
+	%tmp15.upgrd.3 = trunc i32 %tmp15 to i8		; <i8> [#uses=1]
+	%tmp16 = icmp ne i8 %tmp15.upgrd.3, 0		; <i1> [#uses=1]
+	br i1 %tmp.upgrd.2, label %cond_false25, label %cond_true
+cond_true:		; preds = %0
+	br i1 %tmp16, label %cond_true17, label %cond_false
+cond_true17:		; preds = %cond_true
+	ret void
+cond_false:		; preds = %cond_true
+	ret void
+cond_false25:		; preds = %0
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/2006-10-12-CycleInDAG.ll b/final/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
new file mode 100644
index 00000000000..3b987ac79f9
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-10-12-CycleInDAG.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=x86
+	%struct.function = type opaque
+	%struct.lang_decl = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.rtx_def = type opaque
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%union.tree_ann_d = type opaque
+
+define void @check_format_arg() {
+	br i1 false, label %cond_next196, label %bb12.preheader
+
+bb12.preheader:		; preds = %0
+	ret void
+
+cond_next196:		; preds = %0
+	br i1 false, label %cond_next330, label %cond_true304
+
+cond_true304:		; preds = %cond_next196
+	ret void
+
+cond_next330:		; preds = %cond_next196
+	br i1 false, label %cond_next472, label %bb441
+
+bb441:		; preds = %cond_next330
+	ret void
+
+cond_next472:		; preds = %cond_next330
+	%tmp490 = load %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
+	%tmp492 = getelementptr %struct.tree_node* %tmp490, i32 0, i32 0, i32 0, i32 3		; <i8*> [#uses=1]
+	%tmp492.upgrd.1 = bitcast i8* %tmp492 to i32*		; <i32*> [#uses=1]
+	%tmp493 = load i32* %tmp492.upgrd.1		; <i32> [#uses=1]
+	%tmp495 = trunc i32 %tmp493 to i8		; <i8> [#uses=1]
+	%tmp496 = icmp eq i8 %tmp495, 11		; <i1> [#uses=1]
+	%tmp496.upgrd.2 = zext i1 %tmp496 to i8		; <i8> [#uses=1]
+	store i8 %tmp496.upgrd.2, i8* null
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2006-10-13-CycleInDAG.ll b/final/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
new file mode 100644
index 00000000000..6ed2e7bb575
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-10-13-CycleInDAG.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86
+@str = external global [18 x i8]		; <[18 x i8]*> [#uses=1]
+
+define void @test() {
+bb.i:
+	%tmp.i660 = load <4 x float>* null		; <<4 x float>> [#uses=1]
+	call void (i32, ...)* @printf( i32 0, i8* getelementptr ([18 x i8]* @str, i32 0, i64 0), double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 )
+	%tmp152.i = load <4 x i32>* null		; <<4 x i32>> [#uses=1]
+	%tmp156.i = bitcast <4 x i32> %tmp152.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp175.i = bitcast <4 x float> %tmp.i660 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp176.i = xor <4 x i32> %tmp156.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%tmp177.i = and <4 x i32> %tmp176.i, %tmp175.i		; <<4 x i32>> [#uses=1]
+	%tmp190.i = or <4 x i32> %tmp177.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp191.i = bitcast <4 x i32> %tmp190.i to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp191.i, <4 x float>* null
+	ret void
+}
+
+declare void @printf(i32, ...)
diff --git a/final/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll b/final/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
new file mode 100644
index 00000000000..88e8b4a4fd9
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-10-19-SwitchUnnecessaryBranching.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -asm-verbose | FileCheck %s
+
+@str = internal constant [14 x i8] c"Hello world!\0A\00"		; <[14 x i8]*> [#uses=1]
+@str.upgrd.1 = internal constant [13 x i8] c"Blah world!\0A\00"		; <[13 x i8]*> [#uses=1]
+
+define i32 @test(i32 %argc, i8** %argv) nounwind {
+entry:
+; CHECK: cmpl	$2
+; CHECK-NEXT: je
+; CHECK-NEXT: %entry
+
+	switch i32 %argc, label %UnifiedReturnBlock [
+		 i32 1, label %bb
+		 i32 2, label %bb2
+	]
+
+bb:		; preds = %entry
+	%tmp1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([14 x i8]* @str, i32 0, i64 0) )		; <i32> [#uses=0]
+	ret i32 0
+
+bb2:		; preds = %entry
+	%tmp4 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([13 x i8]* @str.upgrd.1, i32 0, i64 0) )		; <i32> [#uses=0]
+	ret i32 0
+
+UnifiedReturnBlock:		; preds = %entry
+	ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/final/test/CodeGen/X86/2006-11-12-CSRetCC.ll b/final/test/CodeGen/X86/2006-11-12-CSRetCC.ll
new file mode 100644
index 00000000000..91210ea90c6
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-11-12-CSRetCC.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=x86 | grep {subl	\$4, %esp}
+
+target triple = "i686-pc-linux-gnu"
+@str = internal constant [9 x i8] c"%f+%f*i\0A\00"              ; <[9 x i8]*> [#uses=1]
+
+define i32 @main() {
+entry:
+        %retval = alloca i32, align 4           ; <i32*> [#uses=1]
+        %tmp = alloca { double, double }, align 16              ; <{ double, double }*> [#uses=4]
+        %tmp1 = alloca { double, double }, align 16             ; <{ double, double }*> [#uses=4]
+        %tmp2 = alloca { double, double }, align 16             ; <{ double, double }*> [#uses=3]
+        %pi = alloca double, align 8            ; <double*> [#uses=2]
+        %z = alloca { double, double }, align 16                ; <{ double, double }*> [#uses=4]
+        %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+        store double 0x400921FB54442D18, double* %pi
+        %tmp.upgrd.1 = load double* %pi         ; <double> [#uses=1]
+        %real = getelementptr { double, double }* %tmp1, i64 0, i32 0           ; <double*> [#uses=1]
+        store double 0.000000e+00, double* %real
+        %real3 = getelementptr { double, double }* %tmp1, i64 0, i32 1          ; <double*> [#uses=1]
+        store double %tmp.upgrd.1, double* %real3
+        %tmp.upgrd.2 = getelementptr { double, double }* %tmp, i64 0, i32 0             ; <double*> [#uses=1]
+        %tmp4 = getelementptr { double, double }* %tmp1, i64 0, i32 0           ; <double*> [#uses=1]
+        %tmp5 = load double* %tmp4              ; <double> [#uses=1]
+        store double %tmp5, double* %tmp.upgrd.2
+        %tmp6 = getelementptr { double, double }* %tmp, i64 0, i32 1            ; <double*> [#uses=1]
+        %tmp7 = getelementptr { double, double }* %tmp1, i64 0, i32 1           ; <double*> [#uses=1]
+        %tmp8 = load double* %tmp7              ; <double> [#uses=1]
+        store double %tmp8, double* %tmp6
+        %tmp.upgrd.3 = bitcast { double, double }* %tmp to { i64, i64 }*                ; <{ i64, i64 }*> [#uses=1]
+        %tmp.upgrd.4 = getelementptr { i64, i64 }* %tmp.upgrd.3, i64 0, i32 0           ; <i64*> [#uses=1]
+        %tmp.upgrd.5 = load i64* %tmp.upgrd.4           ; <i64> [#uses=1]
+        %tmp9 = bitcast { double, double }* %tmp to { i64, i64 }*               ; <{ i64, i64 }*> [#uses=1]
+        %tmp10 = getelementptr { i64, i64 }* %tmp9, i64 0, i32 1                ; <i64*> [#uses=1]
+        %tmp11 = load i64* %tmp10               ; <i64> [#uses=1]
+        call void @cexp( { double, double }* sret  %tmp2, i64 %tmp.upgrd.5, i64 %tmp11 )
+        %tmp12 = getelementptr { double, double }* %z, i64 0, i32 0             ; <double*> [#uses=1]
+        %tmp13 = getelementptr { double, double }* %tmp2, i64 0, i32 0          ; <double*> [#uses=1]
+        %tmp14 = load double* %tmp13            ; <double> [#uses=1]
+        store double %tmp14, double* %tmp12
+        %tmp15 = getelementptr { double, double }* %z, i64 0, i32 1             ; <double*> [#uses=1]
+        %tmp16 = getelementptr { double, double }* %tmp2, i64 0, i32 1          ; <double*> [#uses=1]
+        %tmp17 = load double* %tmp16            ; <double> [#uses=1]
+        store double %tmp17, double* %tmp15
+        %tmp18 = getelementptr { double, double }* %z, i64 0, i32 1             ; <double*> [#uses=1]
+        %tmp19 = load double* %tmp18            ; <double> [#uses=1]
+        %tmp20 = getelementptr { double, double }* %z, i64 0, i32 0             ; <double*> [#uses=1]
+        %tmp21 = load double* %tmp20            ; <double> [#uses=1]
+        %tmp.upgrd.6 = getelementptr [9 x i8]* @str, i32 0, i64 0               ; <i8*> [#uses=1]
+        %tmp.upgrd.7 = call i32 (i8*, ...)* @printf( i8* %tmp.upgrd.6, double %tmp21, double %tmp19 )           ; <i32> [#uses=0]
+        br label %return
+return:         ; preds = %entry
+        %retval.upgrd.8 = load i32* %retval             ; <i32> [#uses=1]
+        ret i32 %retval.upgrd.8
+}
+
+declare void @cexp({ double, double }* sret , i64, i64)
+
+declare i32 @printf(i8*, ...)
+
diff --git a/final/test/CodeGen/X86/2006-11-17-IllegalMove.ll b/final/test/CodeGen/X86/2006-11-17-IllegalMove.ll
new file mode 100644
index 00000000000..affb7afb1c5
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-11-17-IllegalMove.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep movb %t | count 2
+; RUN: grep {movzb\[wl\]} %t
+
+
+define void @handle_vector_size_attribute() nounwind {
+entry:
+	%tmp69 = load i32* null		; <i32> [#uses=1]
+	switch i32 %tmp69, label %bb84 [
+		 i32 2, label %bb77
+		 i32 1, label %bb77
+	]
+
+bb77:		; preds = %entry, %entry
+	%tmp99 = udiv i64 0, 0		; <i64> [#uses=1]
+	%tmp = load i8* null		; <i8> [#uses=1]
+	%tmp114 = icmp eq i64 0, 0		; <i1> [#uses=1]
+	br label %cond_true115
+
+bb84:		; preds = %entry
+	ret void
+
+cond_true115:		; preds = %bb77
+	%tmp118 = load i8* null		; <i8> [#uses=1]
+	br label %cond_true120
+
+cond_true120:		; preds = %cond_true115
+	%tmp127 = udiv i8 %tmp, %tmp118		; <i8> [#uses=1]
+	%tmp127.upgrd.1 = zext i8 %tmp127 to i64		; <i64> [#uses=1]
+	br label %cond_next129
+
+cond_next129:		; preds = %cond_true120, %cond_true115
+	%iftmp.30.0 = phi i64 [ %tmp127.upgrd.1, %cond_true120 ]		; <i64> [#uses=1]
+	%tmp132 = icmp eq i64 %iftmp.30.0, %tmp99		; <i1> [#uses=1]
+	br i1 %tmp132, label %cond_false148, label %cond_next136
+
+cond_next136:		; preds = %cond_next129, %bb77
+	ret void
+
+cond_false148:		; preds = %cond_next129
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2006-11-27-SelectLegalize.ll b/final/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
new file mode 100644
index 00000000000..ea2e6db61e1
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-11-27-SelectLegalize.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep test.*1
+; PR1016
+
+define i32 @test(i32 %A, i32 %B, i32 %C) {
+        %a = trunc i32 %A to i1         ; <i1> [#uses=1]
+        %D = select i1 %a, i32 %B, i32 %C               ; <i32> [#uses=1]
+        ret i32 %D
+}
+
diff --git a/final/test/CodeGen/X86/2006-11-28-Memcpy.ll b/final/test/CodeGen/X86/2006-11-28-Memcpy.ll
new file mode 100644
index 00000000000..8c1573f130b
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-11-28-Memcpy.ll
@@ -0,0 +1,34 @@
+; PR1022, PR1023
+; RUN: llc < %s -march=x86 | grep -- -573785174 | count 2
+; RUN: llc < %s -march=x86 | grep -E {movl	_?bytes2} | count 1
+
+@fmt = constant [4 x i8] c"%x\0A\00"            ; <[4 x i8]*> [#uses=2]
+@bytes = constant [4 x i8] c"\AA\BB\CC\DD"              ; <[4 x i8]*> [#uses=1]
+@bytes2 = global [4 x i8] c"\AA\BB\CC\DD"               ; <[4 x i8]*> [#uses=1]
+
+define i32 @test1() nounwind {
+        %y = alloca i32         ; <i32*> [#uses=2]
+        %c = bitcast i32* %y to i8*             ; <i8*> [#uses=1]
+        %z = getelementptr [4 x i8]* @bytes, i32 0, i32 0               ; <i8*> [#uses=1]
+        call void @llvm.memcpy.i32( i8* %c, i8* %z, i32 4, i32 1 )
+        %r = load i32* %y               ; <i32> [#uses=1]
+        %t = bitcast [4 x i8]* @fmt to i8*              ; <i8*> [#uses=1]
+        %tmp = call i32 (i8*, ...)* @printf( i8* %t, i32 %r )           ; <i32> [#uses=0]
+        ret i32 0
+}
+
+define void @test2() nounwind {
+        %y = alloca i32         ; <i32*> [#uses=2]
+        %c = bitcast i32* %y to i8*             ; <i8*> [#uses=1]
+        %z = getelementptr [4 x i8]* @bytes2, i32 0, i32 0              ; <i8*> [#uses=1]
+        call void @llvm.memcpy.i32( i8* %c, i8* %z, i32 4, i32 1 )
+        %r = load i32* %y               ; <i32> [#uses=1]
+        %t = bitcast [4 x i8]* @fmt to i8*              ; <i8*> [#uses=1]
+        %tmp = call i32 (i8*, ...)* @printf( i8* %t, i32 %r )           ; <i32> [#uses=0]
+        ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare i32 @printf(i8*, ...)
+
diff --git a/final/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll b/final/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll
new file mode 100644
index 00000000000..50a244b9e05
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-12-16-InlineAsmCrash.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86
+; PR1049
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.QBasicAtomic = type { i32 }
+	%struct.QByteArray = type { %"struct.QByteArray::Data"* }
+	%"struct.QByteArray::Data" = type { %struct.QBasicAtomic, i32, i32, i8*, [1 x i8] }
+	%struct.QFactoryLoader = type { %struct.QObject }
+	%struct.QImageIOHandler = type { i32 (...)**, %struct.QImageIOHandlerPrivate* }
+	%struct.QImageIOHandlerPrivate = type opaque
+	%struct.QImageWriter = type { %struct.QImageWriterPrivate* }
+	%struct.QImageWriterPrivate = type { %struct.QByteArray, %struct.QFactoryLoader*, i1, %struct.QImageIOHandler*, i32, float, %struct.QString, %struct.QString, i32, %struct.QString, %struct.QImageWriter* }
+	%"struct.QList<QByteArray>" = type { %"struct.QList<QByteArray>::._20" }
+	%"struct.QList<QByteArray>::._20" = type { %struct.QListData }
+	%struct.QListData = type { %"struct.QListData::Data"* }
+	%"struct.QListData::Data" = type { %struct.QBasicAtomic, i32, i32, i32, i8, [1 x i8*] }
+	%struct.QObject = type { i32 (...)**, %struct.QObjectData* }
+	%struct.QObjectData = type { i32 (...)**, %struct.QObject*, %struct.QObject*, %"struct.QList<QByteArray>", i8, [3 x i8], i32, i32 }
+	%struct.QString = type { %"struct.QString::Data"* }
+	%"struct.QString::Data" = type { %struct.QBasicAtomic, i32, i32, i16*, i8, i8, [1 x i16] }
+
+define i1 @_ZNK12QImageWriter8canWriteEv() {
+	%tmp62 = load %struct.QImageWriterPrivate** null		; <%struct.QImageWriterPrivate*> [#uses=1]
+	%tmp = getelementptr %struct.QImageWriterPrivate* %tmp62, i32 0, i32 9		; <%struct.QString*> [#uses=1]
+	%tmp75 = call %struct.QString* @_ZN7QStringaSERKS_( %struct.QString* %tmp, %struct.QString* null )		; <%struct.QString*> [#uses=0]
+	call void asm sideeffect "lock\0Adecl $0\0Asetne 1", "=*m"( i32* null )
+	ret i1 false
+}
+
+declare %struct.QString* @_ZN7QStringaSERKS_(%struct.QString*, %struct.QString*)
diff --git a/final/test/CodeGen/X86/2006-12-19-IntelSyntax.ll b/final/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
new file mode 100644
index 00000000000..f81b303e3b8
--- /dev/null
+++ b/final/test/CodeGen/X86/2006-12-19-IntelSyntax.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel
+; PR1061
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define void @bar(i32 %n) {
+entry:
+	switch i32 %n, label %bb12 [
+		 i32 1, label %bb
+		 i32 2, label %bb6
+		 i32 4, label %bb7
+		 i32 5, label %bb8
+		 i32 6, label %bb10
+		 i32 7, label %bb1
+		 i32 8, label %bb3
+		 i32 9, label %bb4
+		 i32 10, label %bb9
+		 i32 11, label %bb2
+		 i32 12, label %bb5
+		 i32 13, label %bb11
+	]
+
+bb:		; preds = %entry
+	call void (...)* @foo1( )
+	ret void
+
+bb1:		; preds = %entry
+	call void (...)* @foo2( )
+	ret void
+
+bb2:		; preds = %entry
+	call void (...)* @foo6( )
+	ret void
+
+bb3:		; preds = %entry
+	call void (...)* @foo3( )
+	ret void
+
+bb4:		; preds = %entry
+	call void (...)* @foo4( )
+	ret void
+
+bb5:		; preds = %entry
+	call void (...)* @foo5( )
+	ret void
+
+bb6:		; preds = %entry
+	call void (...)* @foo1( )
+	ret void
+
+bb7:		; preds = %entry
+	call void (...)* @foo2( )
+	ret void
+
+bb8:		; preds = %entry
+	call void (...)* @foo6( )
+	ret void
+
+bb9:		; preds = %entry
+	call void (...)* @foo3( )
+	ret void
+
+bb10:		; preds = %entry
+	call void (...)* @foo4( )
+	ret void
+
+bb11:		; preds = %entry
+	call void (...)* @foo5( )
+	ret void
+
+bb12:		; preds = %entry
+	call void (...)* @foo6( )
+	ret void
+}
+
+declare void @foo1(...)
+
+declare void @foo2(...)
+
+declare void @foo6(...)
+
+declare void @foo3(...)
+
+declare void @foo4(...)
+
+declare void @foo5(...)
diff --git a/final/test/CodeGen/X86/2007-01-08-InstrSched.ll b/final/test/CodeGen/X86/2007-01-08-InstrSched.ll
new file mode 100644
index 00000000000..6f8b89c3240
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -0,0 +1,22 @@
+; PR1075
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -O3 | FileCheck %s
+
+define float @foo(float %x) nounwind {
+    %tmp1 = fmul float %x, 3.000000e+00
+    %tmp3 = fmul float %x, 5.000000e+00
+    %tmp5 = fmul float %x, 7.000000e+00
+    %tmp7 = fmul float %x, 1.100000e+01
+    %tmp10 = fadd float %tmp1, %tmp3
+    %tmp12 = fadd float %tmp10, %tmp5
+    %tmp14 = fadd float %tmp12, %tmp7
+    ret float %tmp14
+
+; CHECK: mulss
+; CHECK: mulss
+; CHECK: addss
+; CHECK: mulss
+; CHECK: addss
+; CHECK: mulss
+; CHECK: addss
+; CHECK: ret
+}
diff --git a/final/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll b/final/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll
new file mode 100644
index 00000000000..3458550aa10
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK-NOT: {{addq.*8}}
+; CHECK:     ({{%rdi|%rcx}},%rax,8)
+; CHECK-NOT: {{addq.*8}}
+
+define void @foo(double* %y) nounwind {
+entry:
+        br label %bb
+
+bb:
+        %i = phi i64 [ 0, %entry ], [ %k, %bb ]
+        %j = getelementptr double* %y, i64 %i
+        store double 0.000000e+00, double* %j
+        %k = add i64 %i, 1
+        %n = icmp eq i64 %k, 0
+        br i1 %n, label %return, label %bb
+
+return:
+        ret void
+}
+
diff --git a/final/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll b/final/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
new file mode 100644
index 00000000000..a2288986362
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-01-13-StackPtrIndex.ll
@@ -0,0 +1,461 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: not grep {,%rsp)} %t
+; PR1103
+
+target datalayout = "e-p:64:64"
+@i6000 = global [128 x i64] zeroinitializer, align 16
+
+
+define void @foo(i32* %a0, i32* %a1, i32* %a2, i32* %a3, i32* %a4, i32* %a5) {
+b:
+	%r = load i32* %a0
+	%r2 = load i32* %a1
+	%r4 = load i32* %a2
+	%r6 = load i32* %a3
+	%r8 = load i32* %a4
+	%r14 = load i32* %a5
+	%rx = sext i32 %r2 to i64
+	%r9 = sext i32 %r to i64
+	%r11 = add i64 %rx, 0
+	%ras = icmp slt i64 %r11, 0
+	%r12 = select i1 %ras, i64 0, i64 %r11
+	%r16 = sext i32 %r14 to i64
+	%r17 = sext i32 %r8 to i64
+	%r18 = sub i64 %r16, 0
+	%r19 = add i64 %r18, 0
+	%r20 = icmp slt i64 %r19, 0
+	%r19h = add i64 %r18, 0
+	%r22 = select i1 %r20, i64 1, i64 %r19h
+	%r23 = mul i64 %r22, 0
+	%r23a = trunc i64 %r23 to i32
+	%r24 = shl i32 %r23a, 0
+	%r25 = add i32 %r24, 0
+	%ras2 = alloca i8, i32 %r25, align 16
+	%r28 = getelementptr i8* %ras2, i32 0
+	%r38 = shl i64 %r12, 0
+	%s2013 = add i64 %r38, 0
+	%c22012 = getelementptr i8* %ras2, i64 %s2013
+	%r42 = shl i64 %r12, 0
+	%s2011 = add i64 %r42, 16
+	%c22010 = getelementptr i8* %ras2, i64 %s2011
+	%r50 = add i64 %r16, 0
+	%r51 = icmp slt i64 %r50, 0
+	%r50sh = shl i64 %r50, 0
+	%r50j = add i64 %r50sh, 0
+	%r54 = select i1 %r51, i64 0, i64 %r50j
+	%r56 = mul i64 %r54, %r12
+	%r28s = add i64 %r56, 16
+	%c2 = getelementptr i8* %ras2, i64 %r28s
+	%r60 = sub i32 %r2, %r
+	%r61 = icmp slt i32 %r60, 0
+	br i1 %r61, label %a29b, label %b63
+a29b:
+	%r155 = sub i32 %r6, %r4
+	%r156 = icmp slt i32 %r155, 0
+	br i1 %r156, label %a109b, label %b158
+b63:
+	%r66 = sext i32 %r60 to i64
+	%r67 = add i64 %r66, 0
+	%r76 = mul i64 %r17, 0
+	%r82 = add i64 %r76, 0
+	%r84 = icmp slt i64 %r67, 0
+	br i1 %r84, label %b85, label %a25b
+b85:
+	%e641 = phi i64 [ 0, %b63 ], [ %r129, %a25b ]
+	%r137 = icmp slt i64 %e641, 0
+	br i1 %r137, label %a25b140q, label %a29b
+a25b140q:
+	br label %a25b140
+a25b:
+	%w1989 = phi i64 [ 0, %b63 ], [ %v1990, %a25b ]
+	%e642 = shl i64 %w1989, 0
+	%r129 = add i64 %e642, 0
+	%r132 = add i64 %e642, 0
+	%r134 = icmp slt i64 %r132, 0
+	%v1990 = add i64 %w1989, 0
+	br i1 %r134, label %b85, label %a25b
+a25b140:
+	%w1982 = phi i64 [ 0, %a25b140q ], [ %v1983, %a25b140 ]
+	%r145 = add i64 %r82, 0
+	%v1983 = add i64 %w1982, 0
+	%u1987 = icmp slt i64 %v1983, 0
+	br i1 %u1987, label %a29b, label %a25b140
+b158:
+	%r161 = sext i32 %r to i64
+	%r163 = sext i32 %r4 to i64
+	br label %a29b173
+a29b173:
+	%w1964 = phi i64 [ 0, %b158 ], [ %v1973, %b1606 ]
+	%b1974 = mul i64 %r163, 0
+	%b1975 = add i64 %r161, 0
+	%b1976 = mul i64 %w1964, 0
+	%b1977 = add i64 %b1976, 0
+	%s761 = bitcast i64 %b1977 to i64
+	%b1980 = mul i64 %w1964, 0
+	%s661 = add i64 %b1980, 0
+	br i1 %r61, label %a33b, label %b179
+a33b:
+	%r328 = icmp slt i32 %r14, 0
+	%r335 = or i1 %r328, %r61
+	br i1 %r335, label %a50b, label %b341
+b179:
+	%r182 = sext i32 %r60 to i64
+	%r183 = add i64 %r182, 0
+	%r187 = icmp slt i64 %r183, 0
+	br i1 %r187, label %b188, label %a30b
+b188:
+	%e653 = phi i64 [ 0, %b179 ], [ %r283, %a30b ]
+	%r291 = icmp slt i64 %e653, 0
+	br i1 %r291, label %a30b294q, label %a33b
+a30b294q:
+	br label %a30b294
+a30b:
+	%w = phi i64 [ 0, %b179 ], [ %v, %a30b ]
+	%b2 = shl i64 %w, 0
+	%r283 = add i64 %b2, 0
+	%r286 = add i64 %b2, 0
+	%r288 = icmp slt i64 %r286, 0
+	%v = add i64 %w, 0
+	br i1 %r288, label %b188, label %a30b
+a30b294:
+	%w1847 = phi i64 [ 0, %a30b294q ], [ %v1848, %a30b294 ]
+	%v1848 = add i64 %w1847, 0
+	%u = icmp slt i64 %v1848, 0
+	br i1 %u, label %a33b, label %a30b294
+a50b:
+	%r814 = add i32 %r14, 0
+	%r815 = icmp slt i32 %r814, 0
+	%r817 = or i1 %r61, %r815
+	br i1 %r817, label %a57b, label %b820
+b341:
+	%w1874 = phi i64 [ 0, %a33b ], [ %v1880, %b463 ]
+	%d753 = bitcast i64 %w1874 to i64
+	%r343 = add i64 %s661, 0
+	%r346 = add i64 %r343, 0
+	%r347 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r346
+	%r348 = load float* %r347
+	%r352 = add i64 %r343, 0
+	%r353 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r352
+	%r354 = load float* %r353
+	%r362 = load float* bitcast ([128 x i64]* @i6000 to float*)
+	%r363 = fadd float 0.000000e+00, %r362
+	%r370 = load float* bitcast ([128 x i64]* @i6000 to float*)
+	%r376 = icmp slt i64 %r16, 0
+	br i1 %r376, label %b377, label %a35b
+b377:
+	%d753p = phi i64 [ %d753, %b341 ], [ %r411, %a35b ]
+	%s761p = phi i64 [ %s761, %b341 ], [ 322, %a35b ]
+	%e784 = phi i64 [ 0, %b341 ], [ %r454, %a35b ]
+	%s794 = add i64 %d753p, 0
+	%r462 = icmp slt i64 %e784, 0
+	br i1 %r462, label %a35b465, label %b463
+a35b:
+	%w1865 = phi i64 [ 0, %b341 ], [ %v1866, %a35b ]
+	%e785 = shl i64 %w1865, 0
+	%b1877 = mul i64 %w1865, 0
+	%s795 = add i64 %b1877, 0
+	%r399 = fadd float %r354, 0.000000e+00
+	%r402 = fadd float %r370, 0.000000e+00
+	%r403 = fadd float %r348, 0.000000e+00
+	%r411 = add i64 %s795, 0
+	%r431 = fadd float %r362, 0.000000e+00
+	%r454 = add i64 %e785, 0
+	%r457 = add i64 %e785, 0
+	%r459 = icmp slt i64 %r457, 0
+	%v1866 = add i64 %w1865, 0
+	br i1 %r459, label %b377, label %a35b
+b463:
+	%r506 = add i64 %d753, 0
+	%r511 = sext i32 %r60 to i64
+	%r512 = add i64 %r511, 0
+	%r513 = icmp slt i64 %r506, 0
+	%v1880 = add i64 %w1874, 0
+	br i1 %r513, label %b341, label %b514
+a35b465:
+	%r469 = add i64 %s794, 0
+	br label %b463
+b514:
+	%r525 = mul i64 %r17, 0
+	%r533 = add i64 %r525, 0
+	br label %b535
+b535:
+	%w1855 = phi i64 [ 0, %b514 ], [ %v1856, %b712 ]
+	%s923 = phi i64 [ 0, %b514 ], [ %r799, %b712 ]
+	%s933 = phi i64 [ %r533, %b514 ], [ %r795, %b712 ]
+	%r538 = add i64 %w1855, 0
+	%r539 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r538
+	%r540 = load float* %r539
+	%r551 = load float* bitcast ([128 x i64]* @i6000 to float*)
+	%r562 = sub i64 %s933, 0
+	%r564 = icmp slt i64 %r512, 0
+	br i1 %r564, label %b565, label %a45b
+b565:
+	%e944 = phi i64 [ 0, %b535 ], [ %r703, %a45b ]
+	%r711 = icmp slt i64 %e944, 0
+	br i1 %r711, label %a45b714, label %b712
+a45b:
+	%w1852 = phi i64 [ 0, %b535 ], [ %v1853, %a45b ]
+	%e945 = shl i64 %w1852, 0
+	%r609 = add i64 %r562, 0
+	%r703 = add i64 %e945, 0
+	%r706 = add i64 %e945, 0
+	%r708 = icmp slt i64 %r706, 0
+	%v1853 = add i64 %w1852, 0
+	br i1 %r708, label %b565, label %a45b
+b712:
+	%r795 = add i64 %rx, 0
+	%r799 = add i64 %s923, 0
+	%r802 = add i64 %w1855, 0
+	%r807 = icmp slt i64 %r802, 0
+	%v1856 = add i64 %w1855, 0
+	br i1 %r807, label %b535, label %a50b
+a45b714:
+	%r717 = add i64 %e944, 0
+	%r720 = add i64 %r717, 0
+	%r721 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r720
+	%r722 = load float* %r721
+	%r726 = add i64 %r717, 0
+	%r727 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r726
+	%r728 = load float* %r727
+	%r732 = add i64 %r717, 0
+	%r733 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r732
+	%r734 = load float* %r733
+	%r738 = add i64 %r717, 0
+	%r739 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r738
+	%r740 = load float* %r739
+	%r744 = add i64 %r717, 0
+	%r745 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r744
+	%r746 = load float* %r745
+	%r750 = add i64 %r717, 0
+	%r751 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r750
+	%r752 = load float* %r751
+	%r753 = fadd float %r752, %r746
+	%r754 = fadd float %r728, %r722
+	%r755 = fadd float %r734, %r754
+	%r756 = fadd float %r755, %r740
+	%r757 = fadd float %r753, %r756
+	%r759 = fadd float %r757, %r540
+	%r770 = add i64 %r717, 0
+	%r771 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r770
+	%r772 = load float* %r771
+	%r776 = add i64 %r717, 0
+	%r777 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r776
+	%r778 = load float* %r777
+	%r781 = fadd float %r363, %r772
+	%r782 = fadd float %r781, %r778
+	%r783 = fadd float %r551, %r782
+	br label %b712
+a57b:
+	br i1 %r335, label %a66b, label %b1086
+b820:
+	%r823 = sext i32 %r2 to i64
+	%r834 = sext i32 %r8 to i64
+	%r844 = add i64 %r16, 0
+	%r846 = sext i32 %r60 to i64
+	%r847 = add i64 %r846, 0
+	%r851 = load float* bitcast ([128 x i64]* @i6000 to float*)
+	%r856 = sub i64 %rx, 0
+	br label %b858
+b858:
+	%w1891 = phi i64 [ 0, %b820 ], [ %v1892, %b1016 ]
+	%s1193 = phi i64 [ 0, %b820 ], [ %r1068, %b1016 ]
+	%b1894 = mul i64 %r834, 0
+	%b1896 = shl i64 %r823, 0
+	%b1902 = mul i64 %w1891, 0
+	%s1173 = add i64 %b1902, 0
+	%r859 = add i64 %r856, 0
+	%r862 = add i64 %w1891, 0
+	%r863 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r862
+	%r864 = load float* %r863
+	%r868 = add i64 %w1891, 0
+	%r869 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r868
+	%r870 = load float* %r869
+	%r873 = sub i64 %r859, 0
+	%r876 = sub i64 %s1173, 0
+	%r878 = icmp slt i64 %r847, 0
+	br i1 %r878, label %b879, label %a53b
+b879:
+	%e1204 = phi i64 [ 0, %b858 ], [ %r1007, %a53b ]
+	%r1015 = icmp slt i64 %e1204, 0
+	br i1 %r1015, label %a53b1019q, label %b1016
+a53b1019q:
+	%b1888 = sub i64 %r846, 0
+	%b1889 = add i64 %b1888, 0
+	br label %a53b1019
+a53b:
+	%w1881 = phi i64 [ 0, %b858 ], [ %v1882, %a53b ]
+	%e1205 = shl i64 %w1881, 0
+	%r1007 = add i64 %e1205, 0
+	%r1010 = add i64 %e1205, 0
+	%r1012 = icmp slt i64 %r1010, 0
+	%v1882 = add i64 %w1881, 0
+	br i1 %r1012, label %b879, label %a53b
+b1016:
+	%r1068 = add i64 %s1193, 0
+	%r1071 = add i64 %w1891, 0
+	%r1073 = icmp slt i64 %r1071, %r844
+	%v1892 = add i64 %w1891, 0
+	br i1 %r1073, label %b858, label %a57b
+a53b1019:
+	%w1885 = phi i64 [ 0, %a53b1019q ], [ %v1886, %a53b1019 ]
+	%r1022 = add i64 %r876, 0
+	%r1024 = bitcast i8* %c2 to float*
+	%r1025 = add i64 %r1022, 0
+	%r1026 = getelementptr float* %r1024, i64 %r1025
+	%r1027 = load float* %r1026
+	%r1032 = add i64 %r873, 0
+	%r1033 = add i64 %r1032, 0
+	%r1034 = getelementptr float* %r1024, i64 %r1033
+	%r1035 = load float* %r1034
+	%r1037 = bitcast i8* %c22010 to float*
+	%r1040 = getelementptr float* %r1037, i64 %r1025
+	%r1044 = fadd float %r864, %r1035
+	%r1046 = fadd float %r870, %r1027
+	%r1047 = fadd float %r1044, %r1046
+	%r1048 = fadd float %r851, %r1047
+	%v1886 = add i64 %w1885, 0
+	%u1890 = icmp slt i64 %v1886, %b1889
+	br i1 %u1890, label %b1016, label %a53b1019
+a66b:
+	br i1 %r817, label %a93b, label %b1321
+b1086:
+	%r1089 = sext i32 %r2 to i64
+	%r1090 = add i64 %rx, 0
+	%r1096 = mul i64 %r9, 0
+	%r1101 = sext i32 %r8 to i64
+	%r1104 = add i64 %r1096, 0
+	%r1108 = sub i64 %r1104, 0
+	%r1110 = sext i32 %r60 to i64
+	%r1111 = add i64 %r1110, 0
+	%r1113 = sext i32 %r14 to i64
+	%r1114 = add i64 %r16, 0
+	br label %b1117
+b1117:
+	%w1915 = phi i64 [ 0, %b1086 ], [ %v1957, %b1263 ]
+	%d1353 = bitcast i64 %w1915 to i64
+	%r1120 = add i64 %s661, 0
+	%r1121 = add i64 %r1120, 0
+	%r1122 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1121
+	%r1123 = load float* %r1122
+	%r1132 = bitcast i8* %c22012 to float*
+	%r1134 = getelementptr float* %r1132, i64 %w1915
+	%r1135 = load float* %r1134
+	%r1136 = fadd float %r1123, %r1135
+	%r1138 = icmp slt i64 %r1114, 0
+	br i1 %r1138, label %b1139, label %a63b
+b1139:
+	%e1364 = phi i64 [ 0, %b1117 ], [ %r1254, %a63b ]
+	%p1998 = phi i64 [ %s761, %b1117 ], [ %r1216, %a63b ]
+	%r1108p = phi i64 [ %r1108, %b1117 ], [ %r1219, %a63b ]
+	%p2004 = phi i64 [ %d1353, %b1117 ], [ %r1090, %a63b ]
+	%s1374 = phi i64 [ 0, %b1117 ], [ %r1251, %a63b ]
+	%s1384 = add i64 %r1108p, 0
+	%s1394 = add i64 %p1998, 0
+	%r1262 = icmp slt i64 %e1364, %r1114
+	br i1 %r1262, label %a63b1266q, label %b1263
+a63b1266q:
+	%b1947 = sub i64 %r1113, 0
+	%b1948 = add i64 %b1947, 0
+	br label %a63b1266
+a63b:
+	%w1904 = phi i64 [ 0, %b1117 ], [ %v1905, %a63b ]
+	%s1375 = phi i64 [ 0, %b1117 ], [ %r1251, %a63b ]
+	%b1906 = add i64 %r1089, 0
+	%b1907 = mul i64 %r1101, 0
+	%b1929 = mul i64 %w1904, 0
+	%s1395 = add i64 %b1929, 0
+	%e1365 = shl i64 %w1904, 0
+	%r1163 = add i64 %r1090, 0
+	%r1167 = add i64 %s1375, 0
+	%r1191 = add i64 %r1163, 0
+	%r1195 = add i64 %r1167, 0
+	%r1216 = add i64 %s1395, 0
+	%r1219 = add i64 %r1191, 0
+	%r1223 = add i64 %r1195, 0
+	%r1251 = add i64 %r1223, 0
+	%r1254 = add i64 %e1365, 0
+	%r1257 = add i64 %e1365, 0
+	%r1259 = icmp slt i64 %r1257, %r1114
+	%v1905 = add i64 %w1904, 0
+	br i1 %r1259, label %b1139, label %a63b
+b1263:
+	%r1306 = add i64 %d1353, 0
+	%r1308 = icmp slt i64 %r1306, %r1111
+	%v1957 = add i64 %w1915, 0
+	br i1 %r1308, label %b1117, label %a66b
+a63b1266:
+	%w1944 = phi i64 [ 0, %a63b1266q ], [ %v1945, %a63b1266 ]
+	%s1377 = phi i64 [ %s1374, %a63b1266q ], [ %r1297, %a63b1266 ]
+	%r1282 = fadd float %r1136, 0.000000e+00
+	%r1297 = add i64 %s1377, 0
+	%v1945 = add i64 %w1944, 0
+	%u1949 = icmp slt i64 %v1945, %b1948
+	br i1 %u1949, label %b1263, label %a63b1266
+a93b:
+	br i1 %r61, label %b1606, label %a97b
+b1321:
+	%r1331 = mul i64 %r17, 0
+	%r1339 = add i64 %r1331, 0
+	br label %b1342
+b1342:
+	%w1960 = phi i64 [ 0, %b1321 ], [ %v1961, %b1582 ]
+	%s1523 = phi i64 [ %r1339, %b1321 ], [ %r1587, %b1582 ]
+	%s1563 = phi i64 [ 0, %b1321 ], [ %r1591, %b1582 ]
+	%d1533 = bitcast i64 %w1960 to i64
+	%b1968 = mul i64 %w1960, 0
+	%s1543 = add i64 %b1968, 0
+	%r1345 = add i64 %s1523, 0
+	%r1348 = sub i64 %r1345, 0
+	%r1352 = add i64 %s1523, 0
+	%r1355 = sub i64 %r1352, 0
+	%r1370 = add i64 %d1533, 0
+	%r1371 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1370
+	%r1372 = load float* %r1371
+	br label %a74b
+a74b:
+	%w1958 = phi i64 [ 0, %b1342 ], [ %v1959, %a74b ]
+	%r1379 = add i64 %s1543, 0
+	%r1403 = add i64 %r1355, 0
+	%r1422 = add i64 %r1348, 0
+	%r1526 = fadd float %r1372, 0.000000e+00
+	%r1573 = add i64 %w1958, 0
+	%r1581 = icmp slt i64 %r1573, 0
+	%v1959 = add i64 %w1958, 0
+	br i1 %r1581, label %a74b, label %b1582
+b1582:
+	%r1587 = add i64 %rx, 0
+	%r1591 = add i64 %s1563, 0
+	%r1596 = add i64 %d1533, 0
+	%r1601 = icmp slt i64 %r1596, 0
+	%v1961 = add i64 %w1960, 0
+	br i1 %r1601, label %b1342, label %a93b
+b1606:
+	%r1833 = add i64 %w1964, 0
+	%r1840 = icmp slt i64 %r1833, 0
+	%v1973 = add i64 %w1964, 0
+	br i1 %r1840, label %a29b173, label %a109b
+a97b:
+	%w1970 = phi i64 [ 0, %a93b ], [ %v1971, %a97b ]
+	%r1613 = add i64 %w1964, 0
+	%r1614 = mul i64 %r1613, 0
+	%r1622 = add i64 %r1614, 0
+	%r1754 = bitcast i8* %r28 to float*
+	%r1756 = getelementptr float* %r1754, i64 %w1970
+	%r1757 = load float* %r1756
+	%r1761 = add i64 %r1622, 0
+	%r1762 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1761
+	%r1763 = load float* %r1762
+	%r1767 = add i64 %r1622, 0
+	%r1768 = getelementptr float* bitcast ([128 x i64]* @i6000 to float*), i64 %r1767
+	%r1772 = fadd float %r1763, 0.000000e+00
+	%r1773 = fadd float %r1772, 0.000000e+00
+	%r1809 = fadd float %r1757, 0.000000e+00
+	%r1810 = fadd float %r1773, %r1809
+	store float %r1810, float* %r1768
+	%r1818 = add i64 %w1970, 0
+	%r1826 = icmp slt i64 %r1818, 0
+	%v1971 = add i64 %w1970, 0
+	br i1 %r1826, label %a97b, label %b1606
+a109b:
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll b/final/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll
new file mode 100644
index 00000000000..e83e2e54e45
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-01-29-InlineAsm-ir.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86
+; Test 'ri' constraint.
+
+define void @run_init_process() {
+          %tmp = call i32 asm sideeffect "push %ebx ; movl $2,%ebx ; int $$0x80 ; pop %ebx", "={ax},0,ri,{cx},{dx},~{dirflag},~{fpsr},~{flags},~{memory}"( i32 11, i32 0, i32 0, i32 0 )          
+          unreachable
+  }
diff --git a/final/test/CodeGen/X86/2007-02-04-OrAddrMode.ll b/final/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
new file mode 100644
index 00000000000..10bbe744200
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 | grep {orl	\$1, %eax}
+; RUN: llc < %s -march=x86 | grep {leal	3(,%eax,8)}
+
+;; This example can't fold the or into an LEA.
+define i32 @test(float ** %tmp2, i32 %tmp12) nounwind {
+	%tmp3 = load float** %tmp2
+	%tmp132 = shl i32 %tmp12, 2		; <i32> [#uses=1]
+	%tmp4 = bitcast float* %tmp3 to i8*		; <i8*> [#uses=1]
+	%ctg2 = getelementptr i8* %tmp4, i32 %tmp132		; <i8*> [#uses=1]
+	%tmp6 = ptrtoint i8* %ctg2 to i32		; <i32> [#uses=1]
+	%tmp14 = or i32 %tmp6, 1		; <i32> [#uses=1]
+	ret i32 %tmp14
+}
+
+
+;; This can!
+define i32 @test2(i32 %a, i32 %b) nounwind {
+	%c = shl i32 %a, 3
+	%d = or i32 %c, 3
+	ret i32 %d
+}
diff --git a/final/test/CodeGen/X86/2007-02-16-BranchFold.ll b/final/test/CodeGen/X86/2007-02-16-BranchFold.ll
new file mode 100644
index 00000000000..6bf5631b4e3
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-02-16-BranchFold.ll
@@ -0,0 +1,95 @@
+; PR 1200
+; RUN: llc < %s -enable-tail-merge=0 | not grep jmp 
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8"
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.Index_Map = type { i32, %struct.item_set** }
+	%struct.Item = type { [4 x i16], %struct.rule* }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.dimension = type { i16*, %struct.Index_Map, %struct.mapping*, i32, %struct.plankMap* }
+	%struct.item_set = type { i32, i32, %struct.operator*, [2 x %struct.item_set*], %struct.item_set*, i16*, %struct.Item*, %struct.Item* }
+	%struct.list = type { i8*, %struct.list* }
+	%struct.mapping = type { %struct.list**, i32, i32, i32, %struct.item_set** }
+	%struct.nonterminal = type { i8*, i32, i32, i32, %struct.plankMap*, %struct.rule* }
+	%struct.operator = type { i8*, i8, i32, i32, i32, i32, %struct.table* }
+	%struct.pattern = type { %struct.nonterminal*, %struct.operator*, [2 x %struct.nonterminal*] }
+	%struct.plank = type { i8*, %struct.list*, i32 }
+	%struct.plankMap = type { %struct.list*, i32, %struct.stateMap* }
+	%struct.rule = type { [4 x i16], i32, i32, i32, %struct.nonterminal*, %struct.pattern*, i8 }
+	%struct.stateMap = type { i8*, %struct.plank*, i32, i16* }
+	%struct.table = type { %struct.operator*, %struct.list*, i16*, [2 x %struct.dimension*], %struct.item_set** }
+@outfile = external global %struct.FILE*		; <%struct.FILE**> [#uses=1]
+@str1 = external global [11 x i8]		; <[11 x i8]*> [#uses=1]
+
+declare i32 @fprintf(%struct.FILE*, i8*, ...)
+
+define i16 @main_bb_2E_i9_2E_i_2E_i932_2E_ce(%struct.list* %l_addr.01.0.i2.i.i929, %struct.operator** %tmp66.i62.i.out) {
+newFuncRoot:
+	br label %bb.i9.i.i932.ce
+
+NewDefault:		; preds = %LeafBlock, %LeafBlock1, %LeafBlock2, %LeafBlock3
+	br label %bb36.i.i.exitStub
+
+bb36.i.i.exitStub:		; preds = %NewDefault
+	store %struct.operator* %tmp66.i62.i, %struct.operator** %tmp66.i62.i.out
+	ret i16 0
+
+bb.i14.i.exitStub:		; preds = %LeafBlock
+	store %struct.operator* %tmp66.i62.i, %struct.operator** %tmp66.i62.i.out
+	ret i16 1
+
+bb12.i.i935.exitStub:		; preds = %LeafBlock1
+	store %struct.operator* %tmp66.i62.i, %struct.operator** %tmp66.i62.i.out
+	ret i16 2
+
+bb20.i.i937.exitStub:		; preds = %LeafBlock2
+	store %struct.operator* %tmp66.i62.i, %struct.operator** %tmp66.i62.i.out
+	ret i16 3
+
+bb28.i.i938.exitStub:		; preds = %LeafBlock3
+	store %struct.operator* %tmp66.i62.i, %struct.operator** %tmp66.i62.i.out
+	ret i16 4
+
+bb.i9.i.i932.ce:		; preds = %newFuncRoot
+	%tmp1.i3.i.i930 = getelementptr %struct.list* %l_addr.01.0.i2.i.i929, i32 0, i32 0		; <i8**> [#uses=1]
+	%tmp2.i4.i.i931 = load i8** %tmp1.i3.i.i930		; <i8*> [#uses=1]
+	%tmp66.i62.i = bitcast i8* %tmp2.i4.i.i931 to %struct.operator*		; <%struct.operator*> [#uses=7]
+	%tmp1.i6.i = getelementptr %struct.operator* %tmp66.i62.i, i32 0, i32 2		; <i32*> [#uses=1]
+	%tmp2.i7.i = load i32* %tmp1.i6.i		; <i32> [#uses=1]
+	%tmp3.i8.i = load %struct.FILE** @outfile		; <%struct.FILE*> [#uses=1]
+	%tmp5.i9.i = call i32 (%struct.FILE*, i8*, ...)* @fprintf( %struct.FILE* %tmp3.i8.i, i8* getelementptr ([11 x i8]* @str1, i32 0, i32 0), i32 %tmp2.i7.i )		; <i32> [#uses=0]
+	%tmp7.i10.i = getelementptr %struct.operator* %tmp66.i62.i, i32 0, i32 5		; <i32*> [#uses=1]
+	%tmp8.i11.i = load i32* %tmp7.i10.i		; <i32> [#uses=7]
+	br label %NodeBlock5
+
+NodeBlock5:		; preds = %bb.i9.i.i932.ce
+	icmp slt i32 %tmp8.i11.i, 1		; <i1>:0 [#uses=1]
+	br i1 %0, label %NodeBlock, label %NodeBlock4
+
+NodeBlock4:		; preds = %NodeBlock5
+	icmp slt i32 %tmp8.i11.i, 2		; <i1>:1 [#uses=1]
+	br i1 %1, label %LeafBlock2, label %LeafBlock3
+
+LeafBlock3:		; preds = %NodeBlock4
+	icmp eq i32 %tmp8.i11.i, 2		; <i1>:2 [#uses=1]
+	br i1 %2, label %bb28.i.i938.exitStub, label %NewDefault
+
+LeafBlock2:		; preds = %NodeBlock4
+	icmp eq i32 %tmp8.i11.i, 1		; <i1>:3 [#uses=1]
+	br i1 %3, label %bb20.i.i937.exitStub, label %NewDefault
+
+NodeBlock:		; preds = %NodeBlock5
+	icmp slt i32 %tmp8.i11.i, 0		; <i1>:4 [#uses=1]
+	br i1 %4, label %LeafBlock, label %LeafBlock1
+
+LeafBlock1:		; preds = %NodeBlock
+	icmp eq i32 %tmp8.i11.i, 0		; <i1>:5 [#uses=1]
+	br i1 %5, label %bb12.i.i935.exitStub, label %NewDefault
+
+LeafBlock:		; preds = %NodeBlock
+	icmp eq i32 %tmp8.i11.i, -1		; <i1>:6 [#uses=1]
+	br i1 %6, label %bb.i14.i.exitStub, label %NewDefault
+}
diff --git a/final/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll b/final/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
new file mode 100644
index 00000000000..954c95d6961
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-02-19-LiveIntervalAssert.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu -relocation-model=pic
+; PR1027
+
+	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+@stderr = external global %struct._IO_FILE*
+
+define void @__eprintf(i8* %string, i8* %expression, i32 %line, i8* %filename) {
+	%tmp = load %struct._IO_FILE** @stderr
+	%tmp5 = tail call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp, i8* %string, i8* %expression, i32 %line, i8* %filename )
+	%tmp6 = load %struct._IO_FILE** @stderr
+	%tmp7 = tail call i32 @fflush( %struct._IO_FILE* %tmp6 )
+	tail call void @abort( )
+	unreachable
+}
+
+declare i32 @fprintf(%struct._IO_FILE*, i8*, ...)
+
+declare i32 @fflush(%struct._IO_FILE*)
+
+declare void @abort()
diff --git a/final/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll b/final/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll
new file mode 100644
index 00000000000..a8f0e576b95
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll
@@ -0,0 +1,13 @@
+; PR1219
+; RUN: llc < %s -march=x86 | grep {movl	\$1, %eax}
+
+define i32 @test(i1 %X) {
+old_entry1:
+        %hvar2 = zext i1 %X to i32
+	%C = icmp sgt i32 %hvar2, -1
+	br i1 %C, label %cond_true15, label %cond_true
+cond_true15:
+        ret i32 1
+cond_true:
+        ret i32 2
+}
diff --git a/final/test/CodeGen/X86/2007-02-25-FastCCStack.ll b/final/test/CodeGen/X86/2007-02-25-FastCCStack.ll
new file mode 100644
index 00000000000..2e2b56d04a2
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-02-25-FastCCStack.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 -mcpu=pentium3
+
+define internal fastcc double @ggc_rlimit_bound(double %limit) {
+    ret double %limit
+}
diff --git a/final/test/CodeGen/X86/2007-03-01-SpillerCrash.ll b/final/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
new file mode 100644
index 00000000000..112d1ab65e7
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-03-01-SpillerCrash.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -mattr=+sse2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin8 -mattr=+sse2 | not grep movhlps
+
+define void @test() nounwind {
+test.exit:
+	fmul <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>>:0 [#uses=4]
+	load <4 x float>* null		; <<4 x float>>:1 [#uses=1]
+	shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>>:2 [#uses=1]
+	fmul <4 x float> %0, %2		; <<4 x float>>:3 [#uses=1]
+	fsub <4 x float> zeroinitializer, %3		; <<4 x float>>:4 [#uses=1]
+	fmul <4 x float> %4, zeroinitializer		; <<4 x float>>:5 [#uses=2]
+	bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>>:6 [#uses=1]
+	and <4 x i32> %6, < i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647 >		; <<4 x i32>>:7 [#uses=1]
+	bitcast <4 x i32> %7 to <4 x float>		; <<4 x float>>:8 [#uses=2]
+	extractelement <4 x float> %8, i32 0		; <float>:9 [#uses=1]
+	extractelement <4 x float> %8, i32 1		; <float>:10 [#uses=2]
+	br i1 false, label %11, label %19
+
+; <label>:11		; preds = %test.exit
+	br i1 false, label %17, label %12
+
+; <label>:12		; preds = %11
+	br i1 false, label %19, label %13
+
+; <label>:13		; preds = %12
+	fsub float -0.000000e+00, 0.000000e+00		; <float>:14 [#uses=1]
+	%tmp207 = extractelement <4 x float> zeroinitializer, i32 0		; <float> [#uses=1]
+	%tmp208 = extractelement <4 x float> zeroinitializer, i32 2		; <float> [#uses=1]
+	fsub float -0.000000e+00, %tmp208		; <float>:15 [#uses=1]
+	%tmp155 = extractelement <4 x float> zeroinitializer, i32 0		; <float> [#uses=1]
+	%tmp156 = extractelement <4 x float> zeroinitializer, i32 2		; <float> [#uses=1]
+	fsub float -0.000000e+00, %tmp156		; <float>:16 [#uses=1]
+	br label %19
+
+; <label>:17		; preds = %11
+	br i1 false, label %19, label %18
+
+; <label>:18		; preds = %17
+	br label %19
+
+; <label>:19		; preds = %18, %17, %13, %12, %test.exit
+	phi i32 [ 5, %18 ], [ 3, %13 ], [ 1, %test.exit ], [ 2, %12 ], [ 4, %17 ]		; <i32>:20 [#uses=0]
+	phi float [ 0.000000e+00, %18 ], [ %16, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:21 [#uses=1]
+	phi float [ 0.000000e+00, %18 ], [ %tmp155, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:22 [#uses=1]
+	phi float [ 0.000000e+00, %18 ], [ %15, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:23 [#uses=1]
+	phi float [ 0.000000e+00, %18 ], [ %tmp207, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:24 [#uses=1]
+	phi float [ 0.000000e+00, %18 ], [ %10, %13 ], [ %9, %test.exit ], [ %10, %12 ], [ 0.000000e+00, %17 ]		; <float>:25 [#uses=2]
+	phi float [ 0.000000e+00, %18 ], [ %14, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:26 [#uses=1]
+	phi float [ 0.000000e+00, %18 ], [ 0.000000e+00, %13 ], [ 0.000000e+00, %test.exit ], [ 0.000000e+00, %12 ], [ 0.000000e+00, %17 ]		; <float>:27 [#uses=1]
+	insertelement <4 x float> undef, float %27, i32 0		; <<4 x float>>:28 [#uses=1]
+	insertelement <4 x float> %28, float %26, i32 1		; <<4 x float>>:29 [#uses=0]
+	insertelement <4 x float> undef, float %24, i32 0		; <<4 x float>>:30 [#uses=1]
+	insertelement <4 x float> %30, float %23, i32 1		; <<4 x float>>:31 [#uses=1]
+	insertelement <4 x float> %31, float %25, i32 2		; <<4 x float>>:32 [#uses=1]
+	insertelement <4 x float> %32, float %25, i32 3		; <<4 x float>>:33 [#uses=1]
+	fdiv <4 x float> %33, zeroinitializer		; <<4 x float>>:34 [#uses=1]
+	fmul <4 x float> %34, < float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01 >		; <<4 x float>>:35 [#uses=1]
+	insertelement <4 x float> undef, float %22, i32 0		; <<4 x float>>:36 [#uses=1]
+	insertelement <4 x float> %36, float %21, i32 1		; <<4 x float>>:37 [#uses=0]
+	br i1 false, label %foo.exit, label %38
+
+; <label>:38		; preds = %19
+	extractelement <4 x float> %0, i32 0		; <float>:39 [#uses=1]
+	fcmp ogt float %39, 0.000000e+00		; <i1>:40 [#uses=1]
+	extractelement <4 x float> %0, i32 2		; <float>:41 [#uses=1]
+	extractelement <4 x float> %0, i32 1		; <float>:42 [#uses=1]
+	fsub float -0.000000e+00, %42		; <float>:43 [#uses=2]
+	%tmp189 = extractelement <4 x float> %5, i32 2		; <float> [#uses=1]
+	br i1 %40, label %44, label %46
+
+; <label>:44		; preds = %38
+	fsub float -0.000000e+00, %tmp189		; <float>:45 [#uses=0]
+	br label %foo.exit
+
+; <label>:46		; preds = %38
+	%tmp192 = extractelement <4 x float> %5, i32 1		; <float> [#uses=1]
+	fsub float -0.000000e+00, %tmp192		; <float>:47 [#uses=1]
+	br label %foo.exit
+
+foo.exit:		; preds = %46, %44, %19
+	phi float [ 0.000000e+00, %44 ], [ %47, %46 ], [ 0.000000e+00, %19 ]		; <float>:48 [#uses=0]
+	phi float [ %43, %44 ], [ %43, %46 ], [ 0.000000e+00, %19 ]		; <float>:49 [#uses=0]
+	phi float [ 0.000000e+00, %44 ], [ %41, %46 ], [ 0.000000e+00, %19 ]		; <float>:50 [#uses=0]
+	shufflevector <4 x float> %35, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:51 [#uses=0]
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll b/final/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
new file mode 100644
index 00000000000..e1f890192d1
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-03-15-GEP-Idx-Sink.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-darwin | \
+; RUN:   grep push | count 3
+
+define void @foo(i8** %buf, i32 %size, i32 %col, i8* %p) nounwind {
+entry:
+	icmp sgt i32 %size, 0		; <i1>:0 [#uses=1]
+	br i1 %0, label %bb.preheader, label %return
+
+bb.preheader:		; preds = %entry
+	%tmp5.sum72 = add i32 %col, 7		; <i32> [#uses=1]
+	%tmp5.sum71 = add i32 %col, 5		; <i32> [#uses=1]
+	%tmp5.sum70 = add i32 %col, 3		; <i32> [#uses=1]
+	%tmp5.sum69 = add i32 %col, 2		; <i32> [#uses=1]
+	%tmp5.sum68 = add i32 %col, 1		; <i32> [#uses=1]
+	%tmp5.sum66 = add i32 %col, 4		; <i32> [#uses=1]
+	%tmp5.sum = add i32 %col, 6		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.preheader
+	%i.073.0 = phi i32 [ 0, %bb.preheader ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%p_addr.076.0.rec = mul i32 %i.073.0, 9		; <i32> [#uses=9]
+	%p_addr.076.0 = getelementptr i8* %p, i32 %p_addr.076.0.rec		; <i8*> [#uses=1]
+	%tmp2 = getelementptr i8** %buf, i32 %i.073.0		; <i8**> [#uses=1]
+	%tmp3 = load i8** %tmp2		; <i8*> [#uses=8]
+	%tmp5 = getelementptr i8* %tmp3, i32 %col		; <i8*> [#uses=1]
+	%tmp7 = load i8* %p_addr.076.0		; <i8> [#uses=1]
+	store i8 %tmp7, i8* %tmp5
+	%p_addr.076.0.sum93 = add i32 %p_addr.076.0.rec, 1		; <i32> [#uses=1]
+	%tmp11 = getelementptr i8* %p, i32 %p_addr.076.0.sum93		; <i8*> [#uses=1]
+	%tmp13 = load i8* %tmp11		; <i8> [#uses=1]
+	%tmp15 = getelementptr i8* %tmp3, i32 %tmp5.sum72		; <i8*> [#uses=1]
+	store i8 %tmp13, i8* %tmp15
+	%p_addr.076.0.sum92 = add i32 %p_addr.076.0.rec, 2		; <i32> [#uses=1]
+	%tmp17 = getelementptr i8* %p, i32 %p_addr.076.0.sum92		; <i8*> [#uses=1]
+	%tmp19 = load i8* %tmp17		; <i8> [#uses=1]
+	%tmp21 = getelementptr i8* %tmp3, i32 %tmp5.sum71		; <i8*> [#uses=1]
+	store i8 %tmp19, i8* %tmp21
+	%p_addr.076.0.sum91 = add i32 %p_addr.076.0.rec, 3		; <i32> [#uses=1]
+	%tmp23 = getelementptr i8* %p, i32 %p_addr.076.0.sum91		; <i8*> [#uses=1]
+	%tmp25 = load i8* %tmp23		; <i8> [#uses=1]
+	%tmp27 = getelementptr i8* %tmp3, i32 %tmp5.sum70		; <i8*> [#uses=1]
+	store i8 %tmp25, i8* %tmp27
+	%p_addr.076.0.sum90 = add i32 %p_addr.076.0.rec, 4		; <i32> [#uses=1]
+	%tmp29 = getelementptr i8* %p, i32 %p_addr.076.0.sum90		; <i8*> [#uses=1]
+	%tmp31 = load i8* %tmp29		; <i8> [#uses=1]
+	%tmp33 = getelementptr i8* %tmp3, i32 %tmp5.sum69		; <i8*> [#uses=2]
+	store i8 %tmp31, i8* %tmp33
+	%p_addr.076.0.sum89 = add i32 %p_addr.076.0.rec, 5		; <i32> [#uses=1]
+	%tmp35 = getelementptr i8* %p, i32 %p_addr.076.0.sum89		; <i8*> [#uses=1]
+	%tmp37 = load i8* %tmp35		; <i8> [#uses=1]
+	%tmp39 = getelementptr i8* %tmp3, i32 %tmp5.sum68		; <i8*> [#uses=1]
+	store i8 %tmp37, i8* %tmp39
+	%p_addr.076.0.sum88 = add i32 %p_addr.076.0.rec, 6		; <i32> [#uses=1]
+	%tmp41 = getelementptr i8* %p, i32 %p_addr.076.0.sum88		; <i8*> [#uses=1]
+	%tmp43 = load i8* %tmp41		; <i8> [#uses=1]
+	store i8 %tmp43, i8* %tmp33
+	%p_addr.076.0.sum87 = add i32 %p_addr.076.0.rec, 7		; <i32> [#uses=1]
+	%tmp47 = getelementptr i8* %p, i32 %p_addr.076.0.sum87		; <i8*> [#uses=1]
+	%tmp49 = load i8* %tmp47		; <i8> [#uses=1]
+	%tmp51 = getelementptr i8* %tmp3, i32 %tmp5.sum66		; <i8*> [#uses=1]
+	store i8 %tmp49, i8* %tmp51
+	%p_addr.076.0.sum = add i32 %p_addr.076.0.rec, 8		; <i32> [#uses=1]
+	%tmp53 = getelementptr i8* %p, i32 %p_addr.076.0.sum		; <i8*> [#uses=1]
+	%tmp55 = load i8* %tmp53		; <i8> [#uses=1]
+	%tmp57 = getelementptr i8* %tmp3, i32 %tmp5.sum		; <i8*> [#uses=1]
+	store i8 %tmp55, i8* %tmp57
+	%indvar.next = add i32 %i.073.0, 1		; <i32> [#uses=2]
+	icmp eq i32 %indvar.next, %size		; <i1>:1 [#uses=1]
+	br i1 %1, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2007-03-16-InlineAsm.ll b/final/test/CodeGen/X86/2007-03-16-InlineAsm.ll
new file mode 100644
index 00000000000..9580726ce02
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-03-16-InlineAsm.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86
+
+; ModuleID = 'a.bc'
+
+define i32 @foo(i32 %A, i32 %B) {
+entry:
+	%A_addr = alloca i32		; <i32*> [#uses=2]
+	%B_addr = alloca i32		; <i32*> [#uses=1]
+	%retval = alloca i32, align 4		; <i32*> [#uses=2]
+	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
+	%ret = alloca i32, align 4		; <i32*> [#uses=2]
+	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %A, i32* %A_addr
+	store i32 %B, i32* %B_addr
+	%tmp1 = load i32* %A_addr		; <i32> [#uses=1]
+	%tmp2 = call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"( i32 7, i32 %tmp1 )		; <i32> [#uses=1]
+	store i32 %tmp2, i32* %ret
+	%tmp3 = load i32* %ret		; <i32> [#uses=1]
+	store i32 %tmp3, i32* %tmp
+	%tmp4 = load i32* %tmp		; <i32> [#uses=1]
+	store i32 %tmp4, i32* %retval
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval5
+}
diff --git a/final/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll b/final/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll
new file mode 100644
index 00000000000..70936fbc928
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-03-18-LiveIntervalAssert.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86
+; PR1259
+
+define void @test() {
+        %tmp2 = call i32 asm "...", "=r,~{dirflag},~{fpsr},~{flags},~{dx},~{cx},~{ax}"( )
+        unreachable
+}
diff --git a/final/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll b/final/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll
new file mode 100644
index 00000000000..44d68dd0493
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-03-24-InlineAsmMultiRegConstraint.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86
+
+define i32 @test(i16 %tmp40414244) {
+  %tmp48 = call i32 asm sideeffect "inl ${1:w}, $0", "={ax},N{dx},~{dirflag},~{fpsr},~{flags}"( i16 %tmp40414244 )
+  ret i32 %tmp48
+}
+
+define i32 @test2(i16 %tmp40414244) {
+  %tmp48 = call i32 asm sideeffect "inl ${1:w}, $0", "={ax},N{dx},~{dirflag},~{fpsr},~{flags}"( i16 14 )
+  ret i32 %tmp48
+}
diff --git a/final/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll b/final/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
new file mode 100644
index 00000000000..3312e01b3d8
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-03-24-InlineAsmPModifier.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | grep {mov %gs:72, %eax}
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin9"
+
+define void @test() {
+	%tmp1 = tail call i32* asm sideeffect "mov %gs:${1:P}, $0", "=r,i,~{dirflag},~{fpsr},~{flags}"( i32 72 )		; <%struct._pthread*> [#uses=1]
+	ret void
+}
+
+
diff --git a/final/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll b/final/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
new file mode 100644
index 00000000000..c1b1ad1c730
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-03-24-InlineAsmVectorOp.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mcpu=yonah -march=x86 | \
+; RUN:   grep {cmpltsd %xmm0, %xmm0}
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin9"
+
+
+define void @acoshf() {
+	%tmp19 = tail call <2 x double> asm sideeffect "pcmpeqd $0, $0 \0A\09 cmpltsd $0, $0", "=x,0,~{dirflag},~{fpsr},~{flags}"( <2 x double> zeroinitializer )		; <<2 x double>> [#uses=0]
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll b/final/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
new file mode 100644
index 00000000000..30453d5266b
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep {psrlw \$8, %xmm0}
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin9"
+
+define void @test() {
+        tail call void asm sideeffect "psrlw $0, %xmm0", "X,~{dirflag},~{fpsr},~{flags}"( i32 8 )
+        ret void
+}
+
diff --git a/final/test/CodeGen/X86/2007-03-26-CoalescerBug.ll b/final/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
new file mode 100644
index 00000000000..9676f143bca
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-03-26-CoalescerBug.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86
+
+@data = external global [339 x i64]
+
+define void @foo(...) {
+bb1:
+	%t43 = load i64* getelementptr ([339 x i64]* @data, i32 0, i64 212), align 4
+	br i1 false, label %bb80, label %bb6
+bb6:
+	br i1 false, label %bb38, label %bb265
+bb265:
+	ret void
+bb38:
+	br i1 false, label %bb80, label %bb49
+bb80:
+	br i1 false, label %bb146, label %bb268
+bb49:
+	ret void
+bb113:
+	ret void
+bb268:
+	%t1062 = shl i64 %t43, 3
+	%t1066 = shl i64 0, 3
+	br label %bb85
+bb85:
+	%t1025 = phi i64 [ 0, %bb268 ], [ %t102.0, %bb234 ]
+	%t1028 = phi i64 [ 0, %bb268 ], [ %t1066, %bb234 ]
+	%t1031 = phi i64 [ 0, %bb268 ], [ %t103.0, %bb234 ]
+	%t1034 = phi i64 [ 0, %bb268 ], [ %t1066, %bb234 ]
+	%t102.0 = add i64 %t1028, %t1025
+	%t103.0 = add i64 %t1034, %t1031
+	br label %bb86
+bb86:
+	%t108.0 = phi i64 [ %t102.0, %bb85 ], [ %t1139, %bb248 ]
+	%t110.0 = phi i64 [ %t103.0, %bb85 ], [ %t1142, %bb248 ]
+	br label %bb193
+bb193:
+	%t1081 = add i64 %t110.0, -8
+	%t1087 = add i64 %t108.0, -8
+	br i1 false, label %bb193, label %bb248
+bb248:
+	%t1139 = add i64 %t108.0, %t1062
+	%t1142 = add i64 %t110.0, %t1062
+	br i1 false, label %bb86, label %bb234
+bb234:
+	br i1 false, label %bb85, label %bb113
+bb146:
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll b/final/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
new file mode 100644
index 00000000000..9f09e88664c
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-04-08-InlineAsmCrash.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s
+; PR1314
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.CycleCount = type { i64, i64 }
+	%struct.bc_struct = type { i32, i32, i32, i32, %struct.bc_struct*, i8*, i8* }
+@_programStartTime = external global %struct.CycleCount		; <%struct.CycleCount*> [#uses=1]
+
+define fastcc i32 @bc_divide(%struct.bc_struct* %n1, %struct.bc_struct* %n2, %struct.bc_struct** %quot, i32 %scale) nounwind {
+entry:
+	%tmp7.i46 = tail call i64 asm sideeffect ".byte 0x0f,0x31", "={dx},=*{ax},~{dirflag},~{fpsr},~{flags}"( i64* getelementptr (%struct.CycleCount* @_programStartTime, i32 0, i32 1) )		; <i64> [#uses=0]
+	%tmp221 = sdiv i32 10, 0		; <i32> [#uses=1]
+	tail call fastcc void @_one_mult( i8* null, i32 0, i32 %tmp221, i8* null )
+	ret i32 0
+}
+
+declare fastcc void @_one_mult(i8*, i32, i32, i8*)
diff --git a/final/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll b/final/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
new file mode 100644
index 00000000000..f48c13259c4
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-04-11-InlineAsmVectorResult.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define void @test(<4 x float> %tmp42i) {
+	%tmp42 = call <4 x float> asm "movss $1, $0", "=x,m,~{dirflag},~{fpsr},~{flags}"( float* null )		; <<4 x float>> [#uses=1]
+	%tmp49 = shufflevector <4 x float> %tmp42, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %cond_true10
+	%tmp52 = bitcast <4 x float> %tmp49 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp53 = call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> %tmp52, <4 x i32> < i32 8, i32 undef, i32 undef, i32 undef > )		; <<4 x i32>> [#uses=1]
+	%tmp105 = bitcast <4 x i32> %tmp53 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp108 = fsub <4 x float> zeroinitializer, %tmp105		; <<4 x float>> [#uses=0]
+	br label %bb
+
+return:		; preds = %entry
+	ret void
+}
+
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>)
diff --git a/final/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll b/final/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
new file mode 100644
index 00000000000..4604f46c533
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-04-17-LiveIntervalAssert.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic --disable-fp-elim
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.partition_def = type { i32, [1 x %struct.partition_elem] }
+	%struct.partition_elem = type { i32, %struct.partition_elem*, i32 }
+
+define void @partition_print(%struct.partition_def* %part) {
+entry:
+	br i1 false, label %bb.preheader, label %bb99
+
+bb.preheader:		; preds = %entry
+	br i1 false, label %cond_true, label %cond_next90
+
+cond_true:		; preds = %bb.preheader
+	br i1 false, label %bb32, label %bb87.critedge
+
+bb32:		; preds = %bb32, %cond_true
+	%i.2115.0 = phi i32 [ 0, %cond_true ], [ %indvar.next127, %bb32 ]		; <i32> [#uses=1]
+	%c.2112.0 = phi i32 [ 0, %cond_true ], [ %tmp49, %bb32 ]		; <i32> [#uses=1]
+	%tmp43 = getelementptr %struct.partition_def* %part, i32 0, i32 1, i32 %c.2112.0, i32 1		; <%struct.partition_elem**> [#uses=1]
+	%tmp44 = load %struct.partition_elem** %tmp43		; <%struct.partition_elem*> [#uses=1]
+	%tmp4445 = ptrtoint %struct.partition_elem* %tmp44 to i32		; <i32> [#uses=1]
+	%tmp48 = sub i32 %tmp4445, 0		; <i32> [#uses=1]
+	%tmp49 = sdiv i32 %tmp48, 12		; <i32> [#uses=1]
+	%indvar.next127 = add i32 %i.2115.0, 1		; <i32> [#uses=2]
+	%exitcond128 = icmp eq i32 %indvar.next127, 0		; <i1> [#uses=1]
+	br i1 %exitcond128, label %bb58, label %bb32
+
+bb58:		; preds = %bb32
+	ret void
+
+bb87.critedge:		; preds = %cond_true
+	ret void
+
+cond_next90:		; preds = %bb.preheader
+	ret void
+
+bb99:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2007-04-24-Huge-Stack.ll b/final/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
new file mode 100644
index 00000000000..7528129971a
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-04-24-Huge-Stack.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 | not grep 4294967112
+; PR1348
+
+	%struct.md5_ctx = type { i32, i32, i32, i32, [2 x i32], i32, [128 x i8], [4294967288 x i8] }
+
+define i8* @md5_buffer(i8* %buffer, i64 %len, i8* %resblock) {
+entry:
+	%ctx = alloca %struct.md5_ctx, align 16		; <%struct.md5_ctx*> [#uses=3]
+	call void @md5_init_ctx( %struct.md5_ctx* %ctx )
+	call void @md5_process_bytes( i8* %buffer, i64 %len, %struct.md5_ctx* %ctx )
+	%tmp4 = call i8* @md5_finish_ctx( %struct.md5_ctx* %ctx, i8* %resblock )		; <i8*> [#uses=1]
+	ret i8* %tmp4
+}
+
+declare void @md5_init_ctx(%struct.md5_ctx*)
+
+declare i8* @md5_finish_ctx(%struct.md5_ctx*, i8*)
+
+declare void @md5_process_bytes(i8*, i64, %struct.md5_ctx*)
diff --git a/final/test/CodeGen/X86/2007-04-24-VectorCrash.ll b/final/test/CodeGen/X86/2007-04-24-VectorCrash.ll
new file mode 100644
index 00000000000..e38992d8b30
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-04-24-VectorCrash.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -mcpu=yonah
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>)
+
+define void @test(float* %P) {
+entry:
+	or <4 x i32> zeroinitializer, and (<4 x i32> bitcast (<4 x float> shufflevector (<4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer) to <4 x i32>), <4 x i32> < i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648 >)		; <<4 x i32>>:0 [#uses=1]
+	bitcast <4 x i32> %0 to <4 x float>		; <<4 x float>>:1 [#uses=1]
+	fsub <4 x float> %1, zeroinitializer		; <<4 x float>>:2 [#uses=1]
+	fsub <4 x float> shufflevector (<4 x float> undef, <4 x float> undef, <4 x i32> zeroinitializer), %2		; <<4 x float>>:3 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %3, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:4 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %4, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:5 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %5, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:6 [#uses=1]
+	shufflevector <4 x float> %6, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:7 [#uses=1]
+	shufflevector <4 x float> %7, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:8 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %8, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:9 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %9, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:10 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %10, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:11 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %11, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:12 [#uses=1]
+	shufflevector <4 x float> %12, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:13 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %13, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:14 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %14, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:15 [#uses=1]
+	shufflevector <4 x float> %15, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:16 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %16, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:17 [#uses=1]
+	shufflevector <4 x float> %17, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:18 [#uses=1]
+	shufflevector <4 x float> %18, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:19 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %19, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:20 [#uses=1]
+	shufflevector <4 x float> %20, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:21 [#uses=1]
+	shufflevector <4 x float> %21, <4 x float> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:22 [#uses=1]
+	fmul <4 x float> %22, zeroinitializer		; <<4 x float>>:23 [#uses=1]
+	shufflevector <4 x float> %23, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>>:24 [#uses=1]
+	call <4 x float> @llvm.x86.sse.add.ss( <4 x float> zeroinitializer, <4 x float> %24 )		; <<4 x float>>:25 [#uses=1]
+	shufflevector <4 x float> %25, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>>:26 [#uses=1]
+	shufflevector <4 x float> %26, <4 x float> zeroinitializer, <4 x i32> zeroinitializer		; <<4 x float>>:27 [#uses=1]
+	shufflevector <4 x float> %27, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:28 [#uses=1]
+	fmul <4 x float> zeroinitializer, %28		; <<4 x float>>:29 [#uses=1]
+	fadd <4 x float> %29, zeroinitializer		; <<4 x float>>:30 [#uses=1]
+	fmul <4 x float> zeroinitializer, %30		; <<4 x float>>:31 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %31, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:32 [#uses=1]
+	fmul <4 x float> zeroinitializer, %32		; <<4 x float>>:33 [#uses=1]
+	shufflevector <4 x float> %33, <4 x float> zeroinitializer, <4 x i32> zeroinitializer		; <<4 x float>>:34 [#uses=1]
+	fmul <4 x float> zeroinitializer, %34		; <<4 x float>>:35 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %35, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >		; <<4 x float>>:36 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %36, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:37 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %37, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:38 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %38, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:39 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %39, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:40 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %40, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:41 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %41, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:42 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %42, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:43 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %43, <4 x i32> < i32 4, i32 1, i32 6, i32 7 >		; <<4 x float>>:44 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %44, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:45 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %45, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:46 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %46, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:47 [#uses=1]
+	shufflevector <4 x float> zeroinitializer, <4 x float> %47, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >		; <<4 x float>>:48 [#uses=1]
+	shufflevector <4 x float> %48, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>>:49 [#uses=1]
+	fadd <4 x float> %49, zeroinitializer		; <<4 x float>>:50 [#uses=1]
+	%tmp5845 = extractelement <4 x float> %50, i32 2		; <float> [#uses=1]
+	store float %tmp5845, float* %P
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll b/final/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
new file mode 100644
index 00000000000..a662dd58df5
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -o - -march=x86 -mattr=+mmx | FileCheck %s
+; There are no MMX instructions here.  We use add+adcl for the adds.
+
+define <1 x i64> @unsigned_add3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) nounwind {
+entry:
+	%tmp2942 = icmp eq i32 %count, 0		; <i1> [#uses=1]
+	br i1 %tmp2942, label %bb31, label %bb26
+
+bb26:		; preds = %bb26, %entry
+
+; CHECK:  addl  %e
+; CHECK:  adcl  %e
+
+	%i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ]		; <i32> [#uses=3]
+	%sum.035.0 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ]		; <<1 x i64>> [#uses=1]
+	%tmp13 = getelementptr <1 x i64>* %b, i32 %i.037.0		; <<1 x i64>*> [#uses=1]
+	%tmp14 = load <1 x i64>* %tmp13		; <<1 x i64>> [#uses=1]
+	%tmp18 = getelementptr <1 x i64>* %a, i32 %i.037.0		; <<1 x i64>*> [#uses=1]
+	%tmp19 = load <1 x i64>* %tmp18		; <<1 x i64>> [#uses=1]
+	%tmp21 = add <1 x i64> %tmp19, %tmp14		; <<1 x i64>> [#uses=1]
+	%tmp22 = add <1 x i64> %tmp21, %sum.035.0		; <<1 x i64>> [#uses=2]
+	%tmp25 = add i32 %i.037.0, 1		; <i32> [#uses=2]
+	%tmp29 = icmp ult i32 %tmp25, %count		; <i1> [#uses=1]
+	br i1 %tmp29, label %bb26, label %bb31
+
+bb31:		; preds = %bb26, %entry
+	%sum.035.1 = phi <1 x i64> [ zeroinitializer, %entry ], [ %tmp22, %bb26 ]		; <<1 x i64>> [#uses=1]
+	ret <1 x i64> %sum.035.1
+}
+
+
+; This is the original test converted to use MMX intrinsics.
+
+define <1 x i64> @unsigned_add3a(x86_mmx* %a, x86_mmx* %b, i32 %count) nounwind {
+entry:
+        %tmp2943 = bitcast <1 x i64><i64 0> to x86_mmx
+	%tmp2942 = icmp eq i32 %count, 0		; <i1> [#uses=1]
+	br i1 %tmp2942, label %bb31, label %bb26
+
+bb26:		; preds = %bb26, %entry
+
+; CHECK:  movq	({{.*}},8), %mm
+; CHECK:  paddq	({{.*}},8), %mm
+; CHECK:  paddq	%mm{{[0-7]}}, %mm
+
+	%i.037.0 = phi i32 [ 0, %entry ], [ %tmp25, %bb26 ]		; <i32> [#uses=3]
+	%sum.035.0 = phi x86_mmx [ %tmp2943, %entry ], [ %tmp22, %bb26 ]		; <x86_mmx> [#uses=1]
+	%tmp13 = getelementptr x86_mmx* %b, i32 %i.037.0		; <x86_mmx*> [#uses=1]
+	%tmp14 = load x86_mmx* %tmp13		; <x86_mmx> [#uses=1]
+	%tmp18 = getelementptr x86_mmx* %a, i32 %i.037.0		; <x86_mmx*> [#uses=1]
+	%tmp19 = load x86_mmx* %tmp18		; <x86_mmx> [#uses=1]
+	%tmp21 = call x86_mmx @llvm.x86.mmx.padd.q (x86_mmx %tmp19, x86_mmx %tmp14)		; <x86_mmx> [#uses=1]
+	%tmp22 = call x86_mmx @llvm.x86.mmx.padd.q (x86_mmx %tmp21, x86_mmx %sum.035.0)		; <x86_mmx> [#uses=2]
+	%tmp25 = add i32 %i.037.0, 1		; <i32> [#uses=2]
+	%tmp29 = icmp ult i32 %tmp25, %count		; <i1> [#uses=1]
+	br i1 %tmp29, label %bb26, label %bb31
+
+bb31:		; preds = %bb26, %entry
+	%sum.035.1 = phi x86_mmx [ %tmp2943, %entry ], [ %tmp22, %bb26 ]		; <x86_mmx> [#uses=1]
+        %t = bitcast x86_mmx %sum.035.1 to <1 x i64>
+	ret <1 x i64> %t
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/final/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll b/final/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
new file mode 100644
index 00000000000..85a2ecc959a
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-04-27-InlineAsm-IntMemInput.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s | not grep {bsrl.*10}
+; PR1356
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define i32 @main() {
+entry:
+        %tmp4 = tail call i32 asm "bsrl  $1, $0", "=r,ro,~{dirflag},~{fpsr},~{flags},~{cc}"( i32 10 )           ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
diff --git a/final/test/CodeGen/X86/2007-05-05-Personality.ll b/final/test/CodeGen/X86/2007-05-05-Personality.ll
new file mode 100644
index 00000000000..a9b17d3b8f3
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-05-05-Personality.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -o - | grep zPL
+
+@error = external global i8		; <i8*> [#uses=2]
+
+define void @_ada_x() {
+entry:
+	invoke void @raise( )
+			to label %eh_then unwind label %unwind
+
+unwind:		; preds = %entry
+	%eh_ptr = tail call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i8* @error )		; <i32> [#uses=1]
+	%eh_typeid = tail call i32 @llvm.eh.typeid.for.i32( i8* @error )		; <i32> [#uses=1]
+	%tmp2 = icmp eq i32 %eh_select, %eh_typeid		; <i1> [#uses=1]
+	br i1 %tmp2, label %eh_then, label %Unwind
+
+eh_then:		; preds = %unwind, %entry
+	ret void
+
+Unwind:		; preds = %unwind
+	tail call i32 (...)* @_Unwind_Resume( i8* %eh_ptr )		; <i32>:0 [#uses=0]
+	unreachable
+}
+
+declare void @raise()
+
+declare i8* @llvm.eh.exception()
+
+declare i32 @llvm.eh.selector.i32(i8*, i8*, ...)
+
+declare i32 @llvm.eh.typeid.for.i32(i8*)
+
+declare i32 @__gnat_eh_personality(...)
+
+declare i32 @_Unwind_Resume(...)
diff --git a/final/test/CodeGen/X86/2007-05-05-VecCastExpand.ll b/final/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
new file mode 100644
index 00000000000..e58b1932197
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-05-05-VecCastExpand.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mcpu=i386 -mattr=+sse
+; PR1371
+
+@str = external global [18 x i8]		; <[18 x i8]*> [#uses=1]
+
+define void @test() {
+bb.i:
+	%tmp.i660 = load <4 x float>* null		; <<4 x float>> [#uses=1]
+	call void (i32, ...)* @printf( i32 0, i8* getelementptr ([18 x i8]* @str, i32 0, i64 0), double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 )
+	%tmp152.i = load <4 x i32>* null		; <<4 x i32>> [#uses=1]
+	%tmp156.i = bitcast <4 x i32> %tmp152.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp175.i = bitcast <4 x float> %tmp.i660 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp176.i = xor <4 x i32> %tmp156.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%tmp177.i = and <4 x i32> %tmp176.i, %tmp175.i		; <<4 x i32>> [#uses=1]
+	%tmp190.i = or <4 x i32> %tmp177.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp191.i = bitcast <4 x i32> %tmp190.i to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp191.i, <4 x float>* null
+	ret void
+}
+
+declare void @printf(i32, ...)
diff --git a/final/test/CodeGen/X86/2007-05-07-InvokeSRet.ll b/final/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
new file mode 100644
index 00000000000..ae49bd00220
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -disable-fp-elim | not grep {addl .12, %esp}
+; PR1398
+
+	%struct.S = type { i32, i32 }
+
+declare void @invokee(%struct.S* sret )
+
+define void @invoker(%struct.S* %name.0.0) {
+entry:
+	invoke void @invokee( %struct.S* %name.0.0 sret  )
+			to label %return unwind label %return
+
+return:		; preds = %entry, %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll b/final/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
new file mode 100644
index 00000000000..8ef253822bd
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86-64
+
+	%struct.XDesc = type <{ i32, %struct.OpaqueXDataStorageType** }>
+	%struct.OpaqueXDataStorageType = type opaque
+
+declare i16 @GetParamDesc(%struct.XDesc*, i32, i32, %struct.XDesc*) signext 
+
+declare void @r_raise(i64, i8*, ...)
+
+define i64 @app_send_event(i64 %self, i64 %event_class, i64 %event_id, i64 %params, i64 %need_retval) {
+entry:
+	br i1 false, label %cond_true109, label %bb83.preheader
+
+bb83.preheader:		; preds = %entry
+	ret i64 0
+
+cond_true109:		; preds = %entry
+	br i1 false, label %cond_next164, label %cond_true239
+
+cond_next164:		; preds = %cond_true109
+	%tmp176 = call i16 @GetParamDesc( %struct.XDesc* null, i32 1701999219, i32 1413830740, %struct.XDesc* null ) signext 		; <i16> [#uses=0]
+	call void (i64, i8*, ...)* @r_raise( i64 0, i8* null )
+	unreachable
+
+cond_true239:		; preds = %cond_true109
+	ret i64 0
+}
diff --git a/final/test/CodeGen/X86/2007-05-15-maskmovq.ll b/final/test/CodeGen/X86/2007-05-15-maskmovq.ll
new file mode 100644
index 00000000000..006cf2e43a2
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-05-15-maskmovq.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mcpu=yonah
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define void @test(<1 x i64> %c64, <1 x i64> %mask1, i8* %P) {
+entry:
+	%tmp4 = bitcast <1 x i64> %mask1 to x86_mmx		; <x86_mmx> [#uses=1]
+	%tmp6 = bitcast <1 x i64> %c64 to x86_mmx		; <x86_mmx> [#uses=1]
+	tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp4, x86_mmx %tmp6, i8* %P )
+	ret void
+}
+
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*)
diff --git a/final/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll b/final/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
new file mode 100644
index 00000000000..b27ef836960
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-05-17-ShuffleISelBug.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep punpckhwd
+
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
+
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>)
+
+define fastcc void @test(i32* %src, i32 %sbpr, i32* %dst, i32 %dbpr, i32 %w, i32 %h, i32 %dstalpha, i32 %mask) {
+	%tmp633 = shufflevector <8 x i16> zeroinitializer, <8 x i16> undef, <8 x i32> < i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7 >
+	%tmp715 = mul <8 x i16> zeroinitializer, %tmp633
+	%tmp776 = bitcast <8 x i16> %tmp715 to <4 x i32>
+	%tmp777 = add <4 x i32> %tmp776, shufflevector (<4 x i32> < i32 65537, i32 0, i32 0, i32 0 >, <4 x i32> < i32 65537, i32 0, i32 0, i32 0 >, <4 x i32> zeroinitializer)
+	%tmp805 = add <4 x i32> %tmp777, zeroinitializer
+	%tmp832 = bitcast <4 x i32> %tmp805 to <8 x i16>
+	%tmp838 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp832, <8 x i16> < i16 8, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef > )
+	%tmp1020 = tail call <16 x i8> @llvm.x86.sse2.packuswb.128( <8 x i16> zeroinitializer, <8 x i16> %tmp838 )
+	%tmp1030 = bitcast <16 x i8> %tmp1020 to <4 x i32>
+	%tmp1033 = add <4 x i32> zeroinitializer, %tmp1030
+	%tmp1048 = bitcast <4 x i32> %tmp1033 to <2 x i64>
+	%tmp1049 = or <2 x i64> %tmp1048, zeroinitializer
+	store <2 x i64> %tmp1049, <2 x i64>* null
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll b/final/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
new file mode 100644
index 00000000000..321e11651b6
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-06-04-X86-64-CtorAsmBugs.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep GOTPCREL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep ".align.*3"
+
+	%struct.A = type { [1024 x i8] }
+@_ZN1A1aE = global %struct.A zeroinitializer, align 32		; <%struct.A*> [#uses=1]
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN1A1aE } ]		; <[1 x { i32, void ()* }]*> [#uses=0]
+
+define internal void @_GLOBAL__I__ZN1A1aE() section "__TEXT,__StaticInit,regular,pure_instructions" {
+entry:
+	br label %bb.i
+
+bb.i:		; preds = %bb.i, %entry
+	%i.1.i1.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb.i ]		; <i32> [#uses=2]
+	%tmp1012.i = sext i32 %i.1.i1.0 to i64		; <i64> [#uses=1]
+	%tmp13.i = getelementptr %struct.A* @_ZN1A1aE, i32 0, i32 0, i64 %tmp1012.i		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp13.i
+	%indvar.next = add i32 %i.1.i1.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 1024		; <i1> [#uses=1]
+	br i1 %exitcond, label %_Z41__static_initialization_and_destruction_0ii.exit, label %bb.i
+
+_Z41__static_initialization_and_destruction_0ii.exit:		; preds = %bb.i
+	ret void
+}
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/2007-06-04-tailmerge4.ll b/final/test/CodeGen/X86/2007-06-04-tailmerge4.ll
new file mode 100644
index 00000000000..d5ec0898b9a
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-06-04-tailmerge4.ll
@@ -0,0 +1,454 @@
+; RUN: llc < %s -asm-verbose | grep invcont131
+; PR 1496:  tail merge was incorrectly removing this block
+
+; ModuleID = 'report.1.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+  %struct.ALLOC = type { %struct.string___XUB, [2 x i8] }
+  %struct.RETURN = type { i32, i32, i32, i64 }
+  %struct.ada__streams__root_stream_type = type { %struct.ada__tags__dispatch_table* }
+  %struct.ada__tags__dispatch_table = type { [1 x i8*] }
+  %struct.ada__text_io__text_afcb = type { %struct.system__file_control_block__afcb, i32, i32, i32, i32, i32, %struct.ada__text_io__text_afcb*, i8, i8 }
+  %struct.string___XUB = type { i32, i32 }
+  %struct.string___XUP = type { i8*, %struct.string___XUB* }
+  %struct.system__file_control_block__afcb = type { %struct.ada__streams__root_stream_type, i32, %struct.string___XUP, i32, %struct.string___XUP, i8, i8, i8, i8, i8, i8, i8, %struct.system__file_control_block__afcb*, %struct.system__file_control_block__afcb* }
+  %struct.system__secondary_stack__mark_id = type { i8*, i32 }
+  %struct.wide_string___XUP = type { i16*, %struct.string___XUB* }
+@report_E = global i8 0   ; <i8*> [#uses=0]
+@report__test_status = internal global i8 1   ; <i8*> [#uses=8]
+@report__test_name = internal global [15 x i8] zeroinitializer    ; <[15 x i8]*> [#uses=10]
+@report__test_name_len = internal global i32 0    ; <i32*> [#uses=15]
+@.str = internal constant [12 x i8] c"report.adb\00\00"   ; <[12 x i8]*> [#uses=1]
+@C.26.599 = internal constant %struct.string___XUB { i32 1, i32 1 }   ; <%struct.string___XUB*> [#uses=1]
+@.str1 = internal constant [1 x i8] c":"    ; <[1 x i8]*> [#uses=1]
+@.str2 = internal constant [1 x i8] c" "    ; <[1 x i8]*> [#uses=1]
+@.str3 = internal constant [1 x i8] c"-"    ; <[1 x i8]*> [#uses=1]
+@.str5 = internal constant [10 x i8] c"0123456789"    ; <[10 x i8]*> [#uses=12]
+@C.59.855 = internal constant %struct.string___XUB { i32 1, i32 0 }   ; <%struct.string___XUB*> [#uses=1]
+@C.69.876 = internal constant %struct.string___XUB { i32 1, i32 3 }   ; <%struct.string___XUB*> [#uses=1]
+@C.70.879 = internal constant %struct.string___XUB { i32 1, i32 6 }   ; <%struct.string___XUB*> [#uses=1]
+@C.81.900 = internal constant %struct.string___XUB { i32 1, i32 5 }   ; <%struct.string___XUB*> [#uses=1]
+@.str6 = internal constant [0 x i8] zeroinitializer   ; <[0 x i8]*> [#uses=1]
+@.str7 = internal constant [3 x i8] c"2.5"    ; <[3 x i8]*> [#uses=1]
+@.str8 = internal constant [6 x i8] c"ACATS "   ; <[6 x i8]*> [#uses=1]
+@.str9 = internal constant [5 x i8] c",.,. "    ; <[5 x i8]*> [#uses=1]
+@.str10 = internal constant [1 x i8] c"."   ; <[1 x i8]*> [#uses=1]
+@.str11 = internal constant [5 x i8] c"---- "   ; <[5 x i8]*> [#uses=1]
+@.str12 = internal constant [5 x i8] c"   - "   ; <[5 x i8]*> [#uses=1]
+@.str13 = internal constant [5 x i8] c"   * "   ; <[5 x i8]*> [#uses=1]
+@.str14 = internal constant [5 x i8] c"   + "   ; <[5 x i8]*> [#uses=1]
+@.str15 = internal constant [5 x i8] c"   ! "   ; <[5 x i8]*> [#uses=1]
+@C.209.1380 = internal constant %struct.string___XUB { i32 1, i32 37 }    ; <%struct.string___XUB*> [#uses=1]
+@.str16 = internal constant [37 x i8] c" PASSED ============================."    ; <[37 x i8]*> [#uses=1]
+@.str17 = internal constant [5 x i8] c"==== "   ; <[5 x i8]*> [#uses=1]
+@.str18 = internal constant [37 x i8] c" NOT-APPLICABLE ++++++++++++++++++++."    ; <[37 x i8]*> [#uses=1]
+@.str19 = internal constant [5 x i8] c"++++ "   ; <[5 x i8]*> [#uses=1]
+@.str20 = internal constant [37 x i8] c" TENTATIVELY PASSED !!!!!!!!!!!!!!!!."    ; <[37 x i8]*> [#uses=1]
+@.str21 = internal constant [5 x i8] c"!!!! "   ; <[5 x i8]*> [#uses=1]
+@.str22 = internal constant [37 x i8] c" SEE '!' COMMENTS FOR SPECIAL NOTES!!"    ; <[37 x i8]*> [#uses=1]
+@.str23 = internal constant [37 x i8] c" FAILED ****************************."    ; <[37 x i8]*> [#uses=1]
+@.str24 = internal constant [5 x i8] c"**** "   ; <[5 x i8]*> [#uses=1]
+@__gnat_others_value = external constant i32    ; <i32*> [#uses=2]
+@system__soft_links__abort_undefer = external global void ()*   ; <void ()**> [#uses=1]
+@C.320.1854 = internal constant %struct.string___XUB { i32 2, i32 6 }   ; <%struct.string___XUB*> [#uses=1]
+
+declare void @report__put_msg(i64 %msg.0.0)
+
+declare void @__gnat_rcheck_05(i8*, i32)
+
+declare void @__gnat_rcheck_12(i8*, i32)
+
+declare %struct.ada__text_io__text_afcb* @ada__text_io__standard_output()
+
+declare void @ada__text_io__set_col(%struct.ada__text_io__text_afcb*, i32)
+
+declare void @ada__text_io__put_line(%struct.ada__text_io__text_afcb*, i64)
+
+declare void @report__time_stamp(%struct.string___XUP* sret  %agg.result)
+
+declare i64 @ada__calendar__clock()
+
+declare void @ada__calendar__split(%struct.RETURN* sret , i64)
+
+declare void @system__string_ops_concat_5__str_concat_5(%struct.string___XUP* sret , i64, i64, i64, i64, i64)
+
+declare void @system__string_ops_concat_3__str_concat_3(%struct.string___XUP* sret , i64, i64, i64)
+
+declare i8* @system__secondary_stack__ss_allocate(i32)
+
+declare void @report__test(i64 %name.0.0, i64 %descr.0.0)
+
+declare void @system__secondary_stack__ss_mark(%struct.system__secondary_stack__mark_id* sret )
+
+declare i8* @llvm.eh.exception()
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...)
+
+declare i32 @llvm.eh.typeid.for(i8*)
+
+declare i32 @__gnat_eh_personality(...)
+
+declare i32 @_Unwind_Resume(...)
+
+declare void @__gnat_rcheck_07(i8*, i32)
+
+declare void @system__secondary_stack__ss_release(i64)
+
+declare void @report__comment(i64 %descr.0.0)
+
+declare void @report__failed(i64 %descr.0.0)
+
+declare void @report__not_applicable(i64 %descr.0.0)
+
+declare void @report__special_action(i64 %descr.0.0)
+
+define void @report__result() {
+entry:
+  %tmp = alloca %struct.system__secondary_stack__mark_id, align 8   ; <%struct.system__secondary_stack__mark_id*> [#uses=3]
+  %A.210 = alloca %struct.string___XUB, align 8   ; <%struct.string___XUB*> [#uses=3]
+  %tmp5 = alloca %struct.string___XUP, align 8    ; <%struct.string___XUP*> [#uses=3]
+  %A.229 = alloca %struct.string___XUB, align 8   ; <%struct.string___XUB*> [#uses=3]
+  %tmp10 = alloca %struct.string___XUP, align 8   ; <%struct.string___XUP*> [#uses=3]
+  %A.248 = alloca %struct.string___XUB, align 8   ; <%struct.string___XUB*> [#uses=3]
+  %tmp15 = alloca %struct.string___XUP, align 8   ; <%struct.string___XUP*> [#uses=3]
+  %A.270 = alloca %struct.string___XUB, align 8   ; <%struct.string___XUB*> [#uses=3]
+  %tmp20 = alloca %struct.string___XUP, align 8   ; <%struct.string___XUP*> [#uses=3]
+  %A.284 = alloca %struct.string___XUB, align 8   ; <%struct.string___XUB*> [#uses=3]
+  %tmp25 = alloca %struct.string___XUP, align 8   ; <%struct.string___XUP*> [#uses=3]
+  call void @system__secondary_stack__ss_mark( %struct.system__secondary_stack__mark_id* %tmp sret  )
+  %tmp28 = getelementptr %struct.system__secondary_stack__mark_id* %tmp, i32 0, i32 0   ; <i8**> [#uses=1]
+  %tmp29 = load i8** %tmp28   ; <i8*> [#uses=2]
+  %tmp31 = getelementptr %struct.system__secondary_stack__mark_id* %tmp, i32 0, i32 1   ; <i32*> [#uses=1]
+  %tmp32 = load i32* %tmp31   ; <i32> [#uses=2]
+  %tmp33 = load i8* @report__test_status    ; <i8> [#uses=1]
+  switch i8 %tmp33, label %bb483 [
+     i8 0, label %bb
+     i8 2, label %bb143
+     i8 3, label %bb261
+  ]
+
+bb:   ; preds = %entry
+  %tmp34 = load i32* @report__test_name_len   ; <i32> [#uses=4]
+  %tmp35 = icmp sgt i32 %tmp34, 0   ; <i1> [#uses=2]
+  %tmp40 = icmp sgt i32 %tmp34, 15    ; <i1> [#uses=1]
+  %bothcond139 = and i1 %tmp35, %tmp40    ; <i1> [#uses=1]
+  br i1 %bothcond139, label %cond_true43, label %cond_next44
+
+cond_true43:    ; preds = %bb
+  invoke void @__gnat_rcheck_12( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 212 )
+      to label %UnifiedUnreachableBlock unwind label %unwind
+
+unwind:   ; preds = %invcont589, %cond_next567, %bb555, %cond_true497, %invcont249, %cond_next227, %bb215, %cond_true157, %invcont131, %cond_next109, %bb97, %cond_true43
+  %eh_ptr = call i8* @llvm.eh.exception( )    ; <i8*> [#uses=1]
+  br label %cleanup717
+
+cond_next44:    ; preds = %bb
+  %tmp72 = getelementptr %struct.string___XUB* %A.210, i32 0, i32 0   ; <i32*> [#uses=1]
+  store i32 1, i32* %tmp72
+  %tmp73 = getelementptr %struct.string___XUB* %A.210, i32 0, i32 1   ; <i32*> [#uses=1]
+  store i32 %tmp34, i32* %tmp73
+  br i1 %tmp35, label %cond_true80, label %cond_next109
+
+cond_true80:    ; preds = %cond_next44
+  %tmp45.off = add i32 %tmp34, -1   ; <i32> [#uses=1]
+  %bothcond = icmp ugt i32 %tmp45.off, 14   ; <i1> [#uses=1]
+  br i1 %bothcond, label %bb97, label %cond_next109
+
+bb97:   ; preds = %cond_true80
+  invoke void @__gnat_rcheck_05( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 212 )
+      to label %UnifiedUnreachableBlock unwind label %unwind
+
+cond_next109:   ; preds = %cond_true80, %cond_next44
+  %A.210128 = ptrtoint %struct.string___XUB* %A.210 to i32    ; <i32> [#uses=1]
+  %A.210128129 = zext i32 %A.210128 to i64    ; <i64> [#uses=1]
+  %A.210128129130 = shl i64 %A.210128129, 32    ; <i64> [#uses=1]
+  %A.210128129130.ins = or i64 %A.210128129130, zext (i32 ptrtoint ([15 x i8]* @report__test_name to i32) to i64)   ; <i64> [#uses=1]
+  invoke void @system__string_ops_concat_3__str_concat_3( %struct.string___XUP* %tmp5 sret , i64 or (i64 zext (i32 ptrtoint ([5 x i8]* @.str17 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.81.900 to i32) to i64), i64 32)), i64 %A.210128129130.ins, i64 or (i64 zext (i32 ptrtoint ([37 x i8]* @.str16 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.209.1380 to i32) to i64), i64 32)) )
+      to label %invcont131 unwind label %unwind
+
+invcont131:   ; preds = %cond_next109
+  %tmp133 = getelementptr %struct.string___XUP* %tmp5, i32 0, i32 0   ; <i8**> [#uses=1]
+  %tmp134 = load i8** %tmp133   ; <i8*> [#uses=1]
+  %tmp134120 = ptrtoint i8* %tmp134 to i32    ; <i32> [#uses=1]
+  %tmp134120121 = zext i32 %tmp134120 to i64    ; <i64> [#uses=1]
+  %tmp136 = getelementptr %struct.string___XUP* %tmp5, i32 0, i32 1   ; <%struct.string___XUB**> [#uses=1]
+  %tmp137 = load %struct.string___XUB** %tmp136   ; <%struct.string___XUB*> [#uses=1]
+  %tmp137116 = ptrtoint %struct.string___XUB* %tmp137 to i32    ; <i32> [#uses=1]
+  %tmp137116117 = zext i32 %tmp137116 to i64    ; <i64> [#uses=1]
+  %tmp137116117118 = shl i64 %tmp137116117, 32    ; <i64> [#uses=1]
+  %tmp137116117118.ins = or i64 %tmp137116117118, %tmp134120121   ; <i64> [#uses=1]
+  invoke fastcc void @report__put_msg( i64 %tmp137116117118.ins )
+      to label %cond_next618 unwind label %unwind
+
+bb143:    ; preds = %entry
+  %tmp144 = load i32* @report__test_name_len    ; <i32> [#uses=4]
+  %tmp147 = icmp sgt i32 %tmp144, 0   ; <i1> [#uses=2]
+  %tmp154 = icmp sgt i32 %tmp144, 15    ; <i1> [#uses=1]
+  %bothcond140 = and i1 %tmp147, %tmp154    ; <i1> [#uses=1]
+  br i1 %bothcond140, label %cond_true157, label %cond_next160
+
+cond_true157:   ; preds = %bb143
+  invoke void @__gnat_rcheck_12( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 215 )
+      to label %UnifiedUnreachableBlock unwind label %unwind
+
+cond_next160:   ; preds = %bb143
+  %tmp189 = getelementptr %struct.string___XUB* %A.229, i32 0, i32 0    ; <i32*> [#uses=1]
+  store i32 1, i32* %tmp189
+  %tmp190 = getelementptr %struct.string___XUB* %A.229, i32 0, i32 1    ; <i32*> [#uses=1]
+  store i32 %tmp144, i32* %tmp190
+  br i1 %tmp147, label %cond_true197, label %cond_next227
+
+cond_true197:   ; preds = %cond_next160
+  %tmp161.off = add i32 %tmp144, -1   ; <i32> [#uses=1]
+  %bothcond1 = icmp ugt i32 %tmp161.off, 14   ; <i1> [#uses=1]
+  br i1 %bothcond1, label %bb215, label %cond_next227
+
+bb215:    ; preds = %cond_true197
+  invoke void @__gnat_rcheck_05( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 215 )
+      to label %UnifiedUnreachableBlock unwind label %unwind
+
+cond_next227:   ; preds = %cond_true197, %cond_next160
+  %A.229105 = ptrtoint %struct.string___XUB* %A.229 to i32    ; <i32> [#uses=1]
+  %A.229105106 = zext i32 %A.229105 to i64    ; <i64> [#uses=1]
+  %A.229105106107 = shl i64 %A.229105106, 32    ; <i64> [#uses=1]
+  %A.229105106107.ins = or i64 %A.229105106107, zext (i32 ptrtoint ([15 x i8]* @report__test_name to i32) to i64)   ; <i64> [#uses=1]
+  invoke void @system__string_ops_concat_3__str_concat_3( %struct.string___XUP* %tmp10 sret , i64 or (i64 zext (i32 ptrtoint ([5 x i8]* @.str19 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.81.900 to i32) to i64), i64 32)), i64 %A.229105106107.ins, i64 or (i64 zext (i32 ptrtoint ([37 x i8]* @.str18 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.209.1380 to i32) to i64), i64 32)) )
+      to label %invcont249 unwind label %unwind
+
+invcont249:   ; preds = %cond_next227
+  %tmp251 = getelementptr %struct.string___XUP* %tmp10, i32 0, i32 0    ; <i8**> [#uses=1]
+  %tmp252 = load i8** %tmp251   ; <i8*> [#uses=1]
+  %tmp25297 = ptrtoint i8* %tmp252 to i32   ; <i32> [#uses=1]
+  %tmp2529798 = zext i32 %tmp25297 to i64   ; <i64> [#uses=1]
+  %tmp254 = getelementptr %struct.string___XUP* %tmp10, i32 0, i32 1    ; <%struct.string___XUB**> [#uses=1]
+  %tmp255 = load %struct.string___XUB** %tmp254   ; <%struct.string___XUB*> [#uses=1]
+  %tmp25593 = ptrtoint %struct.string___XUB* %tmp255 to i32   ; <i32> [#uses=1]
+  %tmp2559394 = zext i32 %tmp25593 to i64   ; <i64> [#uses=1]
+  %tmp255939495 = shl i64 %tmp2559394, 32   ; <i64> [#uses=1]
+  %tmp255939495.ins = or i64 %tmp255939495, %tmp2529798   ; <i64> [#uses=1]
+  invoke fastcc void @report__put_msg( i64 %tmp255939495.ins )
+      to label %cond_next618 unwind label %unwind
+
+bb261:    ; preds = %entry
+  %tmp262 = call i8* @llvm.stacksave( )   ; <i8*> [#uses=2]
+  %tmp263 = load i32* @report__test_name_len    ; <i32> [#uses=4]
+  %tmp266 = icmp sgt i32 %tmp263, 0   ; <i1> [#uses=2]
+  %tmp273 = icmp sgt i32 %tmp263, 15    ; <i1> [#uses=1]
+  %bothcond141 = and i1 %tmp266, %tmp273    ; <i1> [#uses=1]
+  br i1 %bothcond141, label %cond_true276, label %cond_next281
+
+cond_true276:   ; preds = %bb261
+  invoke void @__gnat_rcheck_12( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 218 )
+      to label %UnifiedUnreachableBlock unwind label %unwind277
+
+unwind277:    ; preds = %invcont467, %cond_next442, %invcont370, %cond_next348, %bb336, %cond_true276
+  %eh_ptr278 = call i8* @llvm.eh.exception( )   ; <i8*> [#uses=1]
+  call void @llvm.stackrestore( i8* %tmp262 )
+  br label %cleanup717
+
+cond_next281:   ; preds = %bb261
+  %tmp310 = getelementptr %struct.string___XUB* %A.248, i32 0, i32 0    ; <i32*> [#uses=1]
+  store i32 1, i32* %tmp310
+  %tmp311 = getelementptr %struct.string___XUB* %A.248, i32 0, i32 1    ; <i32*> [#uses=1]
+  store i32 %tmp263, i32* %tmp311
+  br i1 %tmp266, label %cond_true318, label %cond_next348
+
+cond_true318:   ; preds = %cond_next281
+  %tmp282.off = add i32 %tmp263, -1   ; <i32> [#uses=1]
+  %bothcond2 = icmp ugt i32 %tmp282.off, 14   ; <i1> [#uses=1]
+  br i1 %bothcond2, label %bb336, label %cond_next348
+
+bb336:    ; preds = %cond_true318
+  invoke void @__gnat_rcheck_05( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 218 )
+      to label %UnifiedUnreachableBlock unwind label %unwind277
+
+cond_next348:   ; preds = %cond_true318, %cond_next281
+  %A.24882 = ptrtoint %struct.string___XUB* %A.248 to i32   ; <i32> [#uses=1]
+  %A.2488283 = zext i32 %A.24882 to i64   ; <i64> [#uses=1]
+  %A.248828384 = shl i64 %A.2488283, 32   ; <i64> [#uses=1]
+  %A.248828384.ins = or i64 %A.248828384, zext (i32 ptrtoint ([15 x i8]* @report__test_name to i32) to i64)   ; <i64> [#uses=1]
+  invoke void @system__string_ops_concat_3__str_concat_3( %struct.string___XUP* %tmp15 sret , i64 or (i64 zext (i32 ptrtoint ([5 x i8]* @.str21 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.81.900 to i32) to i64), i64 32)), i64 %A.248828384.ins, i64 or (i64 zext (i32 ptrtoint ([37 x i8]* @.str20 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.209.1380 to i32) to i64), i64 32)) )
+      to label %invcont370 unwind label %unwind277
+
+invcont370:   ; preds = %cond_next348
+  %tmp372 = getelementptr %struct.string___XUP* %tmp15, i32 0, i32 0    ; <i8**> [#uses=1]
+  %tmp373 = load i8** %tmp372   ; <i8*> [#uses=1]
+  %tmp37374 = ptrtoint i8* %tmp373 to i32   ; <i32> [#uses=1]
+  %tmp3737475 = zext i32 %tmp37374 to i64   ; <i64> [#uses=1]
+  %tmp375 = getelementptr %struct.string___XUP* %tmp15, i32 0, i32 1    ; <%struct.string___XUB**> [#uses=1]
+  %tmp376 = load %struct.string___XUB** %tmp375   ; <%struct.string___XUB*> [#uses=1]
+  %tmp37670 = ptrtoint %struct.string___XUB* %tmp376 to i32   ; <i32> [#uses=1]
+  %tmp3767071 = zext i32 %tmp37670 to i64   ; <i64> [#uses=1]
+  %tmp376707172 = shl i64 %tmp3767071, 32   ; <i64> [#uses=1]
+  %tmp376707172.ins = or i64 %tmp376707172, %tmp3737475   ; <i64> [#uses=1]
+  invoke fastcc void @report__put_msg( i64 %tmp376707172.ins )
+      to label %invcont381 unwind label %unwind277
+
+invcont381:   ; preds = %invcont370
+  %tmp382 = load i32* @report__test_name_len    ; <i32> [#uses=6]
+  %tmp415 = icmp sgt i32 %tmp382, -1    ; <i1> [#uses=1]
+  %max416 = select i1 %tmp415, i32 %tmp382, i32 0   ; <i32> [#uses=1]
+  %tmp417 = alloca i8, i32 %max416    ; <i8*> [#uses=3]
+  %tmp423 = icmp sgt i32 %tmp382, 0   ; <i1> [#uses=1]
+  br i1 %tmp423, label %bb427, label %cond_next442
+
+bb427:    ; preds = %invcont381
+  store i8 32, i8* %tmp417
+  %tmp434 = icmp eq i32 %tmp382, 1    ; <i1> [#uses=1]
+  br i1 %tmp434, label %cond_next442, label %cond_next438.preheader
+
+cond_next438.preheader:   ; preds = %bb427
+  %tmp. = add i32 %tmp382, -1   ; <i32> [#uses=1]
+  br label %cond_next438
+
+cond_next438:   ; preds = %cond_next438, %cond_next438.preheader
+  %indvar = phi i32 [ 0, %cond_next438.preheader ], [ %J130b.513.5, %cond_next438 ]   ; <i32> [#uses=1]
+  %J130b.513.5 = add i32 %indvar, 1   ; <i32> [#uses=3]
+  %tmp43118 = getelementptr i8* %tmp417, i32 %J130b.513.5   ; <i8*> [#uses=1]
+  store i8 32, i8* %tmp43118
+  %exitcond = icmp eq i32 %J130b.513.5, %tmp.   ; <i1> [#uses=1]
+  br i1 %exitcond, label %cond_next442, label %cond_next438
+
+cond_next442:   ; preds = %cond_next438, %bb427, %invcont381
+  %tmp448 = getelementptr %struct.string___XUB* %A.270, i32 0, i32 0    ; <i32*> [#uses=1]
+  store i32 1, i32* %tmp448
+  %tmp449 = getelementptr %struct.string___XUB* %A.270, i32 0, i32 1    ; <i32*> [#uses=1]
+  store i32 %tmp382, i32* %tmp449
+  %tmp41762 = ptrtoint i8* %tmp417 to i32   ; <i32> [#uses=1]
+  %tmp4176263 = zext i32 %tmp41762 to i64   ; <i64> [#uses=1]
+  %A.27058 = ptrtoint %struct.string___XUB* %A.270 to i32   ; <i32> [#uses=1]
+  %A.2705859 = zext i32 %A.27058 to i64   ; <i64> [#uses=1]
+  %A.270585960 = shl i64 %A.2705859, 32   ; <i64> [#uses=1]
+  %A.270585960.ins = or i64 %tmp4176263, %A.270585960   ; <i64> [#uses=1]
+  invoke void @system__string_ops_concat_3__str_concat_3( %struct.string___XUP* %tmp20 sret , i64 or (i64 zext (i32 ptrtoint ([5 x i8]* @.str21 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.81.900 to i32) to i64), i64 32)), i64 %A.270585960.ins, i64 or (i64 zext (i32 ptrtoint ([37 x i8]* @.str22 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.209.1380 to i32) to i64), i64 32)) )
+      to label %invcont467 unwind label %unwind277
+
+invcont467:   ; preds = %cond_next442
+  %tmp469 = getelementptr %struct.string___XUP* %tmp20, i32 0, i32 0    ; <i8**> [#uses=1]
+  %tmp470 = load i8** %tmp469   ; <i8*> [#uses=1]
+  %tmp47050 = ptrtoint i8* %tmp470 to i32   ; <i32> [#uses=1]
+  %tmp4705051 = zext i32 %tmp47050 to i64   ; <i64> [#uses=1]
+  %tmp472 = getelementptr %struct.string___XUP* %tmp20, i32 0, i32 1    ; <%struct.string___XUB**> [#uses=1]
+  %tmp473 = load %struct.string___XUB** %tmp472   ; <%struct.string___XUB*> [#uses=1]
+  %tmp47346 = ptrtoint %struct.string___XUB* %tmp473 to i32   ; <i32> [#uses=1]
+  %tmp4734647 = zext i32 %tmp47346 to i64   ; <i64> [#uses=1]
+  %tmp473464748 = shl i64 %tmp4734647, 32   ; <i64> [#uses=1]
+  %tmp473464748.ins = or i64 %tmp473464748, %tmp4705051   ; <i64> [#uses=1]
+  invoke fastcc void @report__put_msg( i64 %tmp473464748.ins )
+      to label %cleanup unwind label %unwind277
+
+cleanup:    ; preds = %invcont467
+  call void @llvm.stackrestore( i8* %tmp262 )
+  br label %cond_next618
+
+bb483:    ; preds = %entry
+  %tmp484 = load i32* @report__test_name_len    ; <i32> [#uses=4]
+  %tmp487 = icmp sgt i32 %tmp484, 0   ; <i1> [#uses=2]
+  %tmp494 = icmp sgt i32 %tmp484, 15    ; <i1> [#uses=1]
+  %bothcond142 = and i1 %tmp487, %tmp494    ; <i1> [#uses=1]
+  br i1 %bothcond142, label %cond_true497, label %cond_next500
+
+cond_true497:   ; preds = %bb483
+  invoke void @__gnat_rcheck_12( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 223 )
+      to label %UnifiedUnreachableBlock unwind label %unwind
+
+cond_next500:   ; preds = %bb483
+  %tmp529 = getelementptr %struct.string___XUB* %A.284, i32 0, i32 0    ; <i32*> [#uses=1]
+  store i32 1, i32* %tmp529
+  %tmp530 = getelementptr %struct.string___XUB* %A.284, i32 0, i32 1    ; <i32*> [#uses=1]
+  store i32 %tmp484, i32* %tmp530
+  br i1 %tmp487, label %cond_true537, label %cond_next567
+
+cond_true537:   ; preds = %cond_next500
+  %tmp501.off = add i32 %tmp484, -1   ; <i32> [#uses=1]
+  %bothcond3 = icmp ugt i32 %tmp501.off, 14   ; <i1> [#uses=1]
+  br i1 %bothcond3, label %bb555, label %cond_next567
+
+bb555:    ; preds = %cond_true537
+  invoke void @__gnat_rcheck_05( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 223 )
+      to label %UnifiedUnreachableBlock unwind label %unwind
+
+cond_next567:   ; preds = %cond_true537, %cond_next500
+  %A.28435 = ptrtoint %struct.string___XUB* %A.284 to i32   ; <i32> [#uses=1]
+  %A.2843536 = zext i32 %A.28435 to i64   ; <i64> [#uses=1]
+  %A.284353637 = shl i64 %A.2843536, 32   ; <i64> [#uses=1]
+  %A.284353637.ins = or i64 %A.284353637, zext (i32 ptrtoint ([15 x i8]* @report__test_name to i32) to i64)   ; <i64> [#uses=1]
+  invoke void @system__string_ops_concat_3__str_concat_3( %struct.string___XUP* %tmp25 sret , i64 or (i64 zext (i32 ptrtoint ([5 x i8]* @.str24 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.81.900 to i32) to i64), i64 32)), i64 %A.284353637.ins, i64 or (i64 zext (i32 ptrtoint ([37 x i8]* @.str23 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.209.1380 to i32) to i64), i64 32)) )
+      to label %invcont589 unwind label %unwind
+
+invcont589:   ; preds = %cond_next567
+  %tmp591 = getelementptr %struct.string___XUP* %tmp25, i32 0, i32 0    ; <i8**> [#uses=1]
+  %tmp592 = load i8** %tmp591   ; <i8*> [#uses=1]
+  %tmp59228 = ptrtoint i8* %tmp592 to i32   ; <i32> [#uses=1]
+  %tmp5922829 = zext i32 %tmp59228 to i64   ; <i64> [#uses=1]
+  %tmp594 = getelementptr %struct.string___XUP* %tmp25, i32 0, i32 1    ; <%struct.string___XUB**> [#uses=1]
+  %tmp595 = load %struct.string___XUB** %tmp594   ; <%struct.string___XUB*> [#uses=1]
+  %tmp59524 = ptrtoint %struct.string___XUB* %tmp595 to i32   ; <i32> [#uses=1]
+  %tmp5952425 = zext i32 %tmp59524 to i64   ; <i64> [#uses=1]
+  %tmp595242526 = shl i64 %tmp5952425, 32   ; <i64> [#uses=1]
+  %tmp595242526.ins = or i64 %tmp595242526, %tmp5922829   ; <i64> [#uses=1]
+  invoke fastcc void @report__put_msg( i64 %tmp595242526.ins )
+      to label %cond_next618 unwind label %unwind
+
+cond_next618:   ; preds = %invcont589, %cleanup, %invcont249, %invcont131
+  store i8 1, i8* @report__test_status
+  store i32 7, i32* @report__test_name_len
+  store i8 78, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 0)
+  store i8 79, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 1)
+  store i8 95, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 2)
+  store i8 78, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 3)
+  store i8 65, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 4)
+  store i8 77, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 5)
+  store i8 69, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 6)
+  %CHAIN.310.0.0.0.val5.i = ptrtoint i8* %tmp29 to i32    ; <i32> [#uses=1]
+  %CHAIN.310.0.0.0.val56.i = zext i32 %CHAIN.310.0.0.0.val5.i to i64    ; <i64> [#uses=1]
+  %CHAIN.310.0.0.1.val2.i = zext i32 %tmp32 to i64    ; <i64> [#uses=1]
+  %CHAIN.310.0.0.1.val23.i = shl i64 %CHAIN.310.0.0.1.val2.i, 32    ; <i64> [#uses=1]
+  %CHAIN.310.0.0.1.val23.ins.i = or i64 %CHAIN.310.0.0.1.val23.i, %CHAIN.310.0.0.0.val56.i    ; <i64> [#uses=1]
+  call void @system__secondary_stack__ss_release( i64 %CHAIN.310.0.0.1.val23.ins.i )
+  ret void
+
+cleanup717:   ; preds = %unwind277, %unwind
+  %eh_exception.0 = phi i8* [ %eh_ptr278, %unwind277 ], [ %eh_ptr, %unwind ]    ; <i8*> [#uses=1]
+  %CHAIN.310.0.0.0.val5.i8 = ptrtoint i8* %tmp29 to i32   ; <i32> [#uses=1]
+  %CHAIN.310.0.0.0.val56.i9 = zext i32 %CHAIN.310.0.0.0.val5.i8 to i64    ; <i64> [#uses=1]
+  %CHAIN.310.0.0.1.val2.i10 = zext i32 %tmp32 to i64    ; <i64> [#uses=1]
+  %CHAIN.310.0.0.1.val23.i11 = shl i64 %CHAIN.310.0.0.1.val2.i10, 32    ; <i64> [#uses=1]
+  %CHAIN.310.0.0.1.val23.ins.i12 = or i64 %CHAIN.310.0.0.1.val23.i11, %CHAIN.310.0.0.0.val56.i9   ; <i64> [#uses=1]
+  call void @system__secondary_stack__ss_release( i64 %CHAIN.310.0.0.1.val23.ins.i12 )
+  call i32 (...)* @_Unwind_Resume( i8* %eh_exception.0 )    ; <i32>:0 [#uses=0]
+  unreachable
+
+UnifiedUnreachableBlock:    ; preds = %bb555, %cond_true497, %bb336, %cond_true276, %bb215, %cond_true157, %bb97, %cond_true43
+  unreachable
+}
+
+declare i8* @llvm.stacksave()
+
+declare void @llvm.stackrestore(i8*)
+
+declare i32 @report__ident_int(i32 %x)
+
+declare i8 @report__equal(i32 %x, i32 %y)
+
+declare i8 @report__ident_char(i8 zeroext  %x)
+
+declare i16 @report__ident_wide_char(i16 zeroext  %x)
+
+declare i8 @report__ident_bool(i8 %x)
+
+declare void @report__ident_str(%struct.string___XUP* sret  %agg.result, i64 %x.0.0)
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare void @report__ident_wide_str(%struct.wide_string___XUP* sret  %agg.result, i64 %x.0.0)
+
+declare void @__gnat_begin_handler(i8*)
+
+declare void @__gnat_end_handler(i8*)
+
+declare void @report__legal_file_name(%struct.string___XUP* sret  %agg.result, i32 %x, i64 %nam.0.0)
+
+declare void @__gnat_rcheck_06(i8*, i32)
+
+declare void @system__string_ops__str_concat_cs(%struct.string___XUP* sret , i8 zeroext , i64)
diff --git a/final/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll b/final/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
new file mode 100644
index 00000000000..36a97ef9c3c
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
@@ -0,0 +1,129 @@
+; PR1495
+; RUN: llc < %s -march=x86
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+	%struct.AVRational = type { i32, i32 }
+	%struct.FFTComplex = type { float, float }
+	%struct.FFTContext = type { i32, i32, i16*, %struct.FFTComplex*, %struct.FFTComplex*, void (%struct.FFTContext*, %struct.FFTComplex*)*, void (%struct.MDCTContext*, float*, float*, float*)* }
+	%struct.MDCTContext = type { i32, i32, float*, float*, %struct.FFTContext }
+	%struct.Minima = type { i32, i32, i32, i32 }
+	%struct.codebook_t = type { i32, i8*, i32*, i32, float, float, i32, i32, i32*, float*, float* }
+	%struct.floor_class_t = type { i32, i32, i32, i32* }
+	%struct.floor_t = type { i32, i32*, i32, %struct.floor_class_t*, i32, i32, i32, %struct.Minima* }
+	%struct.mapping_t = type { i32, i32*, i32*, i32*, i32, i32*, i32* }
+	%struct.residue_t = type { i32, i32, i32, i32, i32, i32, [8 x i8]*, [2 x float]* }
+	%struct.venc_context_t = type { i32, i32, [2 x i32], [2 x %struct.MDCTContext], [2 x float*], i32, float*, float*, float*, float*, float, i32, %struct.codebook_t*, i32, %struct.floor_t*, i32, %struct.residue_t*, i32, %struct.mapping_t*, i32, %struct.AVRational* }
+
+define fastcc i32 @put_main_header(%struct.venc_context_t* %venc, i8** %out) {
+entry:
+	br i1 false, label %bb1820, label %bb288.bb148_crit_edge
+
+bb288.bb148_crit_edge:		; preds = %entry
+	ret i32 0
+
+cond_next1712:		; preds = %bb1820.bb1680_crit_edge
+	ret i32 0
+
+bb1817:		; preds = %bb1820.bb1680_crit_edge
+	br label %bb1820
+
+bb1820:		; preds = %bb1817, %entry
+	%pb.1.50 = phi i32 [ %tmp1693, %bb1817 ], [ 8, %entry ]		; <i32> [#uses=3]
+	br i1 false, label %bb2093, label %bb1820.bb1680_crit_edge
+
+bb1820.bb1680_crit_edge:		; preds = %bb1820
+	%tmp1693 = add i32 %pb.1.50, 8		; <i32> [#uses=2]
+	%tmp1702 = icmp slt i32 %tmp1693, 0		; <i1> [#uses=1]
+	br i1 %tmp1702, label %cond_next1712, label %bb1817
+
+bb2093:		; preds = %bb1820
+	%tmp2102 = add i32 %pb.1.50, 65		; <i32> [#uses=0]
+	%tmp2236 = add i32 %pb.1.50, 72		; <i32> [#uses=1]
+	%tmp2237 = sdiv i32 %tmp2236, 8		; <i32> [#uses=2]
+	br i1 false, label %bb2543, label %bb2536.bb2396_crit_edge
+
+bb2536.bb2396_crit_edge:		; preds = %bb2093
+	ret i32 0
+
+bb2543:		; preds = %bb2093
+	br i1 false, label %cond_next2576, label %bb2690
+
+cond_next2576:		; preds = %bb2543
+	ret i32 0
+
+bb2682:		; preds = %bb2690
+	ret i32 0
+
+bb2690:		; preds = %bb2543
+	br i1 false, label %bb2682, label %bb2698
+
+bb2698:		; preds = %bb2690
+	br i1 false, label %cond_next2726, label %bb2831
+
+cond_next2726:		; preds = %bb2698
+	ret i32 0
+
+bb2831:		; preds = %bb2698
+	br i1 false, label %cond_next2859, label %bb2964
+
+cond_next2859:		; preds = %bb2831
+	br i1 false, label %bb2943, label %cond_true2866
+
+cond_true2866:		; preds = %cond_next2859
+	br i1 false, label %cond_true2874, label %cond_false2897
+
+cond_true2874:		; preds = %cond_true2866
+	ret i32 0
+
+cond_false2897:		; preds = %cond_true2866
+	ret i32 0
+
+bb2943:		; preds = %cond_next2859
+	ret i32 0
+
+bb2964:		; preds = %bb2831
+	br i1 false, label %cond_next2997, label %bb4589
+
+cond_next2997:		; preds = %bb2964
+	ret i32 0
+
+bb3103:		; preds = %bb4589
+	ret i32 0
+
+bb4589:		; preds = %bb2964
+	br i1 false, label %bb3103, label %bb4597
+
+bb4597:		; preds = %bb4589
+	br i1 false, label %cond_next4630, label %bb4744
+
+cond_next4630:		; preds = %bb4597
+	br i1 false, label %bb4744, label %cond_true4724
+
+cond_true4724:		; preds = %cond_next4630
+	br i1 false, label %bb4736, label %bb7531
+
+bb4736:		; preds = %cond_true4724
+	ret i32 0
+
+bb4744:		; preds = %cond_next4630, %bb4597
+	ret i32 0
+
+bb7531:		; preds = %cond_true4724
+	%v_addr.023.0.i6 = add i32 %tmp2237, -255		; <i32> [#uses=1]
+	br label %bb.i14
+
+bb.i14:		; preds = %bb.i14, %bb7531
+	%n.021.0.i8 = phi i32 [ 0, %bb7531 ], [ %indvar.next, %bb.i14 ]		; <i32> [#uses=2]
+	%tmp..i9 = mul i32 %n.021.0.i8, -255		; <i32> [#uses=1]
+	%tmp5.i11 = add i32 %v_addr.023.0.i6, %tmp..i9		; <i32> [#uses=1]
+	%tmp10.i12 = icmp ugt i32 %tmp5.i11, 254		; <i1> [#uses=1]
+	%indvar.next = add i32 %n.021.0.i8, 1		; <i32> [#uses=1]
+	br i1 %tmp10.i12, label %bb.i14, label %bb12.loopexit.i18
+
+bb12.loopexit.i18:		; preds = %bb.i14
+	call void @llvm.memcpy.i32( i8* null, i8* null, i32 %tmp2237, i32 1 )
+	ret i32 0
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/final/test/CodeGen/X86/2007-06-15-IntToMMX.ll b/final/test/CodeGen/X86/2007-06-15-IntToMMX.ll
new file mode 100644
index 00000000000..660d4fe7b19
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-06-15-IntToMMX.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep paddusw
+@R = external global x86_mmx          ; <x86_mmx*> [#uses=1]
+
+define void @foo(<1 x i64> %A, <1 x i64> %B) {
+entry:
+        %tmp2 = bitcast <1 x i64> %A to x86_mmx
+        %tmp3 = bitcast <1 x i64> %B to x86_mmx
+        %tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp2, x86_mmx %tmp3 )   ; <x86_mmx> [#uses=1]
+        store x86_mmx %tmp7, x86_mmx* @R
+        tail call void @llvm.x86.mmx.emms( )
+        ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
+
+declare void @llvm.x86.mmx.emms()
diff --git a/final/test/CodeGen/X86/2007-06-28-X86-64-isel.ll b/final/test/CodeGen/X86/2007-06-28-X86-64-isel.ll
new file mode 100644
index 00000000000..9d42c49317f
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-06-28-X86-64-isel.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2
+
+define void @test() {
+	%tmp1 = call <8 x i16> @llvm.x86.sse2.pmins.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 7, i32 7, i32 7, i32 7 > to <8 x i16>) )
+	%tmp2 = bitcast <8 x i16> %tmp1 to <4 x i32>
+	br i1 false, label %bb1, label %bb2
+
+bb2:
+	%tmp38007.i = extractelement <4 x i32> %tmp2, i32 3
+	ret void
+
+bb1:
+	ret void
+}
+
+declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>)
diff --git a/final/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll b/final/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll
new file mode 100644
index 00000000000..d2d6388c078
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-06-29-DAGCombinerBug.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define void @test() {
+entry:
+	br i1 false, label %bb13944.preheader, label %cond_true418
+
+cond_true418:		; preds = %entry
+	ret void
+
+bb13944.preheader:		; preds = %entry
+	br i1 false, label %bb3517, label %bb13968.preheader
+
+bb3517:		; preds = %bb13944.preheader
+	br i1 false, label %cond_false7408, label %cond_next11422
+
+cond_false7408:		; preds = %bb3517
+	switch i32 0, label %cond_false10578 [
+		 i32 7, label %cond_next11422
+		 i32 6, label %cond_true7828
+		 i32 1, label %cond_true10095
+		 i32 3, label %cond_true10095
+		 i32 5, label %cond_true10176
+		 i32 24, label %cond_true10176
+	]
+
+cond_true7828:		; preds = %cond_false7408
+	br i1 false, label %cond_next8191, label %cond_true8045
+
+cond_true8045:		; preds = %cond_true7828
+	ret void
+
+cond_next8191:		; preds = %cond_true7828
+	%tmp8234 = sub <4 x i32> < i32 939524096, i32 939524096, i32 939524096, i32 939524096 >, zeroinitializer		; <<4 x i32>> [#uses=0]
+	ret void
+
+cond_true10095:		; preds = %cond_false7408, %cond_false7408
+	ret void
+
+cond_true10176:		; preds = %cond_false7408, %cond_false7408
+	ret void
+
+cond_false10578:		; preds = %cond_false7408
+	ret void
+
+cond_next11422:		; preds = %cond_false7408, %bb3517
+	ret void
+
+bb13968.preheader:		; preds = %bb13944.preheader
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll b/final/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
new file mode 100644
index 00000000000..dc11eec9c17
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-06-29-VecFPConstantCSEBug.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define void @test(<4 x float>* %arg) {
+	%tmp89 = getelementptr <4 x float>* %arg, i64 3
+	%tmp1144 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, zeroinitializer
+	store <4 x float> %tmp1144, <4 x float>* null
+	%tmp1149 = load <4 x float>* %tmp89
+	%tmp1150 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %tmp1149
+	store <4 x float> %tmp1150, <4 x float>* %tmp89
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll b/final/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
new file mode 100644
index 00000000000..1c5e6766fd6
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-07-03-GR64ToVR64.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rsi, %mm0}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd	%rdi, %mm1}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {paddusw	%mm0, %mm1}
+
+@R = external global x86_mmx		; <x86_mmx*> [#uses=1]
+
+define void @foo(<1 x i64> %A, <1 x i64> %B) nounwind {
+entry:
+	%tmp4 = bitcast <1 x i64> %B to x86_mmx		; <<4 x i16>> [#uses=1]
+	%tmp6 = bitcast <1 x i64> %A to x86_mmx		; <<4 x i16>> [#uses=1]
+	%tmp7 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp6, x86_mmx %tmp4 )		; <x86_mmx> [#uses=1]
+	store x86_mmx %tmp7, x86_mmx* @R
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
+declare void @llvm.x86.mmx.emms()
diff --git a/final/test/CodeGen/X86/2007-07-10-StackerAssert.ll b/final/test/CodeGen/X86/2007-07-10-StackerAssert.ll
new file mode 100644
index 00000000000..d611677942c
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-07-10-StackerAssert.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -mcpu=athlon -relocation-model=pic
+; PR1545
+
+@.str97 = external constant [56 x i8]		; <[56 x i8]*> [#uses=1]
+
+declare void @PR_LogPrint(i8*, ...)
+
+define i32 @_ZN13nsPrintEngine19SetupToPrintContentEP16nsIDeviceContextP12nsIDOMWindow() {
+entry:
+	br i1 false, label %cond_true122, label %cond_next453
+
+cond_true122:		; preds = %entry
+	br i1 false, label %bb164, label %cond_true136
+
+cond_true136:		; preds = %cond_true122
+	ret i32 0
+
+bb164:		; preds = %cond_true122
+	br i1 false, label %bb383, label %cond_true354
+
+cond_true354:		; preds = %bb164
+	ret i32 0
+
+bb383:		; preds = %bb164
+	%tmp408 = load float* null		; <float> [#uses=2]
+	br i1 false, label %cond_true425, label %cond_next443
+
+cond_true425:		; preds = %bb383
+	%tmp430 = load float* null		; <float> [#uses=1]
+	%tmp432 = fsub float %tmp430, %tmp408		; <float> [#uses=1]
+	%tmp432433 = fpext float %tmp432 to double		; <double> [#uses=1]
+	%tmp434435 = fpext float %tmp408 to double		; <double> [#uses=1]
+	call void (i8*, ...)* @PR_LogPrint( i8* getelementptr ([56 x i8]* @.str97, i32 0, i32 0), double 0.000000e+00, double %tmp434435, double %tmp432433 )
+	ret i32 0
+
+cond_next443:		; preds = %bb383
+	ret i32 0
+
+cond_next453:		; preds = %entry
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/2007-07-18-Vector-Extract.ll b/final/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
new file mode 100644
index 00000000000..6288c4a892c
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-07-18-Vector-Extract.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse | FileCheck %s
+; CHECK: movq ([[A0:%rdi|%rcx]]), %rax
+; CHECK: movq 8([[A0]]), %rax
+define i64 @foo_0(<2 x i64>* %val) {
+entry:
+        %val12 = getelementptr <2 x i64>* %val, i32 0, i32 0            ; <i64*> [#uses=1]
+        %tmp7 = load i64* %val12                ; <i64> [#uses=1]
+        ret i64 %tmp7
+}
+
+define i64 @foo_1(<2 x i64>* %val) {
+entry:
+        %tmp2.gep = getelementptr <2 x i64>* %val, i32 0, i32 1         ; <i64*> [#uses=1]
+        %tmp4 = load i64* %tmp2.gep             ; <i64> [#uses=1]
+        ret i64 %tmp4
+}
diff --git a/final/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll b/final/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
new file mode 100644
index 00000000000..3cd8052a732
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | not grep movl
+
+define i8 @t(i8 zeroext  %x, i8 zeroext  %y) zeroext  {
+	%tmp2 = add i8 %x, 2
+	%tmp4 = add i8 %y, -2
+	%tmp5 = mul i8 %tmp4, %tmp2
+	ret i8 %tmp5
+}
diff --git a/final/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/final/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
new file mode 100644
index 00000000000..7768f36efae
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll
@@ -0,0 +1,235 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep "movb   %ah, %r"
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, [4 x i8], i64 }
+	%struct.PyBoolScalarObject = type { i64, %struct._typeobject*, i8 }
+	%struct.PyBufferProcs = type { i64 (%struct.PyObject*, i64, i8**)*, i64 (%struct.PyObject*, i64, i8**)*, i64 (%struct.PyObject*, i64*)*, i64 (%struct.PyObject*, i64, i8**)* }
+	%struct.PyGetSetDef = type { i8*, %struct.PyObject* (%struct.PyObject*, i8*)*, i32 (%struct.PyObject*, %struct.PyObject*, i8*)*, i8*, i8* }
+	%struct.PyMappingMethods = type { i64 (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, i32 (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)* }
+	%struct.PyMemberDef = type opaque
+	%struct.PyMethodDef = type { i8*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, i32, i8* }
+	%struct.PyNumberMethods = type { %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, i32 (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, i32 (%struct.PyObject**, %struct.PyObject**)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)* }
+	%struct.PyObject = type { i64, %struct._typeobject* }
+	%struct.PySequenceMethods = type { i64 (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, i64)*, %struct.PyObject* (%struct.PyObject*, i64)*, %struct.PyObject* (%struct.PyObject*, i64, i64)*, i32 (%struct.PyObject*, i64, %struct.PyObject*)*, i32 (%struct.PyObject*, i64, i64, %struct.PyObject*)*, i32 (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, i64)* }
+	%struct.PyTupleObject = type { i64, %struct._typeobject*, i64, [1 x %struct.PyObject*] }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct._typeobject = type { i64, %struct._typeobject*, i64, i8*, i64, i64, void (%struct.PyObject*)*, i32 (%struct.PyObject*, %struct.FILE*, i32)*, %struct.PyObject* (%struct.PyObject*, i8*)*, i32 (%struct.PyObject*, i8*, %struct.PyObject*)*, i32 (%struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyNumberMethods*, %struct.PySequenceMethods*, %struct.PyMappingMethods*, i64 (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*, i32 (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, %struct.PyBufferProcs*, i64, i8*, i32 (%struct.PyObject*, i32 (%struct.PyObject*, i8*)*, i8*)*, i32 (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*, i32)*, i64, %struct.PyObject* (%struct.PyObject*)*, %struct.PyObject* (%struct.PyObject*)*, %struct.PyMethodDef*, %struct.PyMemberDef*, %struct.PyGetSetDef*, %struct._typeobject*, %struct.PyObject*, %struct.PyObject* (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, i32 (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, i64, i32 (%struct.PyObject*, %struct.PyObject*, %struct.PyObject*)*, %struct.PyObject* (%struct._typeobject*, i64)*, %struct.PyObject* (%struct._typeobject*, %struct.PyObject*, %struct.PyObject*)*, void (i8*)*, i32 (%struct.PyObject*)*, %struct.PyObject*, %struct.PyObject*, %struct.PyObject*, %struct.PyObject*, %struct.PyObject*, void (%struct.PyObject*)* }
+@PyArray_API = external global i8**		; <i8***> [#uses=4]
+@PyUFunc_API = external global i8**		; <i8***> [#uses=4]
+@.str5 = external constant [14 x i8]		; <[14 x i8]*> [#uses=1]
+
+define %struct.PyObject* @ubyte_divmod(%struct.PyObject* %a, %struct.PyObject* %b) {
+entry:
+	%arg1 = alloca i8, align 1		; <i8*> [#uses=3]
+	%arg2 = alloca i8, align 1		; <i8*> [#uses=3]
+	%first = alloca i32, align 4		; <i32*> [#uses=2]
+	%bufsize = alloca i32, align 4		; <i32*> [#uses=1]
+	%errmask = alloca i32, align 4		; <i32*> [#uses=2]
+	%errobj = alloca %struct.PyObject*, align 8		; <%struct.PyObject**> [#uses=2]
+	%tmp3.i = call fastcc i32 @_ubyte_convert_to_ctype( %struct.PyObject* %a, i8* %arg1 )		; <i32> [#uses=2]
+	%tmp5.i = icmp slt i32 %tmp3.i, 0		; <i1> [#uses=1]
+	br i1 %tmp5.i, label %_ubyte_convert2_to_ctypes.exit, label %cond_next.i
+
+cond_next.i:		; preds = %entry
+	%tmp11.i = call fastcc i32 @_ubyte_convert_to_ctype( %struct.PyObject* %b, i8* %arg2 )		; <i32> [#uses=2]
+	%tmp13.i = icmp slt i32 %tmp11.i, 0		; <i1> [#uses=1]
+	%retval.i = select i1 %tmp13.i, i32 %tmp11.i, i32 0		; <i32> [#uses=1]
+	switch i32 %retval.i, label %bb35 [
+		 i32 -2, label %bb17
+		 i32 -1, label %bb4
+	]
+
+_ubyte_convert2_to_ctypes.exit:		; preds = %entry
+	switch i32 %tmp3.i, label %bb35 [
+		 i32 -2, label %bb17
+		 i32 -1, label %bb4
+	]
+
+bb4:		; preds = %_ubyte_convert2_to_ctypes.exit, %cond_next.i
+	%tmp5 = load i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
+	%tmp6 = getelementptr i8** %tmp5, i64 2		; <i8**> [#uses=1]
+	%tmp7 = load i8** %tmp6		; <i8*> [#uses=1]
+	%tmp78 = bitcast i8* %tmp7 to %struct._typeobject*		; <%struct._typeobject*> [#uses=1]
+	%tmp9 = getelementptr %struct._typeobject* %tmp78, i32 0, i32 12		; <%struct.PyNumberMethods**> [#uses=1]
+	%tmp10 = load %struct.PyNumberMethods** %tmp9		; <%struct.PyNumberMethods*> [#uses=1]
+	%tmp11 = getelementptr %struct.PyNumberMethods* %tmp10, i32 0, i32 5		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)**> [#uses=1]
+	%tmp12 = load %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)** %tmp11		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*> [#uses=1]
+	%tmp15 = call %struct.PyObject* %tmp12( %struct.PyObject* %a, %struct.PyObject* %b )		; <%struct.PyObject*> [#uses=1]
+	ret %struct.PyObject* %tmp15
+
+bb17:		; preds = %_ubyte_convert2_to_ctypes.exit, %cond_next.i
+	%tmp18 = call %struct.PyObject* @PyErr_Occurred( )		; <%struct.PyObject*> [#uses=1]
+	%tmp19 = icmp eq %struct.PyObject* %tmp18, null		; <i1> [#uses=1]
+	br i1 %tmp19, label %cond_next, label %UnifiedReturnBlock
+
+cond_next:		; preds = %bb17
+	%tmp22 = load i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
+	%tmp23 = getelementptr i8** %tmp22, i64 10		; <i8**> [#uses=1]
+	%tmp24 = load i8** %tmp23		; <i8*> [#uses=1]
+	%tmp2425 = bitcast i8* %tmp24 to %struct._typeobject*		; <%struct._typeobject*> [#uses=1]
+	%tmp26 = getelementptr %struct._typeobject* %tmp2425, i32 0, i32 12		; <%struct.PyNumberMethods**> [#uses=1]
+	%tmp27 = load %struct.PyNumberMethods** %tmp26		; <%struct.PyNumberMethods*> [#uses=1]
+	%tmp28 = getelementptr %struct.PyNumberMethods* %tmp27, i32 0, i32 5		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)**> [#uses=1]
+	%tmp29 = load %struct.PyObject* (%struct.PyObject*, %struct.PyObject*)** %tmp28		; <%struct.PyObject* (%struct.PyObject*, %struct.PyObject*)*> [#uses=1]
+	%tmp32 = call %struct.PyObject* %tmp29( %struct.PyObject* %a, %struct.PyObject* %b )		; <%struct.PyObject*> [#uses=1]
+	ret %struct.PyObject* %tmp32
+
+bb35:		; preds = %_ubyte_convert2_to_ctypes.exit, %cond_next.i
+	%tmp36 = load i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
+	%tmp37 = getelementptr i8** %tmp36, i64 27		; <i8**> [#uses=1]
+	%tmp38 = load i8** %tmp37		; <i8*> [#uses=1]
+	%tmp3839 = bitcast i8* %tmp38 to void ()*		; <void ()*> [#uses=1]
+	call void %tmp3839( )
+	%tmp40 = load i8* %arg2, align 1		; <i8> [#uses=4]
+	%tmp1.i = icmp eq i8 %tmp40, 0		; <i1> [#uses=2]
+	br i1 %tmp1.i, label %cond_true.i, label %cond_false.i
+
+cond_true.i:		; preds = %bb35
+	%tmp3.i196 = call i32 @feraiseexcept( i32 4 )		; <i32> [#uses=0]
+	%tmp46207 = load i8* %arg2, align 1		; <i8> [#uses=3]
+	%tmp48208 = load i8* %arg1, align 1		; <i8> [#uses=2]
+	%tmp1.i197210 = icmp eq i8 %tmp48208, 0		; <i1> [#uses=1]
+	%tmp4.i212 = icmp eq i8 %tmp46207, 0		; <i1> [#uses=1]
+	%tmp7.i198213 = or i1 %tmp1.i197210, %tmp4.i212		; <i1> [#uses=1]
+	br i1 %tmp7.i198213, label %cond_true.i200, label %cond_next17.i
+
+cond_false.i:		; preds = %bb35
+	%tmp42 = load i8* %arg1, align 1		; <i8> [#uses=3]
+	%tmp7.i = udiv i8 %tmp42, %tmp40		; <i8> [#uses=2]
+	%tmp1.i197 = icmp eq i8 %tmp42, 0		; <i1> [#uses=1]
+	%tmp7.i198 = or i1 %tmp1.i197, %tmp1.i		; <i1> [#uses=1]
+	br i1 %tmp7.i198, label %cond_true.i200, label %cond_next17.i
+
+cond_true.i200:		; preds = %cond_false.i, %cond_true.i
+	%out.0 = phi i8 [ 0, %cond_true.i ], [ %tmp7.i, %cond_false.i ]		; <i8> [#uses=2]
+	%tmp46202.0 = phi i8 [ %tmp46207, %cond_true.i ], [ %tmp40, %cond_false.i ]		; <i8> [#uses=1]
+	%tmp11.i199 = icmp eq i8 %tmp46202.0, 0		; <i1> [#uses=1]
+	br i1 %tmp11.i199, label %cond_true14.i, label %ubyte_ctype_remainder.exit
+
+cond_true14.i:		; preds = %cond_true.i200
+	%tmp15.i = call i32 @feraiseexcept( i32 4 )		; <i32> [#uses=0]
+	br label %ubyte_ctype_remainder.exit
+
+cond_next17.i:		; preds = %cond_false.i, %cond_true.i
+	%out.1 = phi i8 [ 0, %cond_true.i ], [ %tmp7.i, %cond_false.i ]		; <i8> [#uses=1]
+	%tmp46202.1 = phi i8 [ %tmp46207, %cond_true.i ], [ %tmp40, %cond_false.i ]		; <i8> [#uses=1]
+	%tmp48205.1 = phi i8 [ %tmp48208, %cond_true.i ], [ %tmp42, %cond_false.i ]		; <i8> [#uses=1]
+	%tmp20.i = urem i8 %tmp48205.1, %tmp46202.1		; <i8> [#uses=1]
+	br label %ubyte_ctype_remainder.exit
+
+ubyte_ctype_remainder.exit:		; preds = %cond_next17.i, %cond_true14.i, %cond_true.i200
+	%out2.0 = phi i8 [ %tmp20.i, %cond_next17.i ], [ 0, %cond_true14.i ], [ 0, %cond_true.i200 ]		; <i8> [#uses=1]
+	%out.2 = phi i8 [ %out.1, %cond_next17.i ], [ %out.0, %cond_true14.i ], [ %out.0, %cond_true.i200 ]		; <i8> [#uses=1]
+	%tmp52 = load i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
+	%tmp53 = getelementptr i8** %tmp52, i64 28		; <i8**> [#uses=1]
+	%tmp54 = load i8** %tmp53		; <i8*> [#uses=1]
+	%tmp5455 = bitcast i8* %tmp54 to i32 ()*		; <i32 ()*> [#uses=1]
+	%tmp56 = call i32 %tmp5455( )		; <i32> [#uses=2]
+	%tmp58 = icmp eq i32 %tmp56, 0		; <i1> [#uses=1]
+	br i1 %tmp58, label %cond_next89, label %cond_true61
+
+cond_true61:		; preds = %ubyte_ctype_remainder.exit
+	%tmp62 = load i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
+	%tmp63 = getelementptr i8** %tmp62, i64 25		; <i8**> [#uses=1]
+	%tmp64 = load i8** %tmp63		; <i8*> [#uses=1]
+	%tmp6465 = bitcast i8* %tmp64 to i32 (i8*, i32*, i32*, %struct.PyObject**)*		; <i32 (i8*, i32*, i32*, %struct.PyObject**)*> [#uses=1]
+	%tmp67 = call i32 %tmp6465( i8* getelementptr ([14 x i8]* @.str5, i32 0, i64 0), i32* %bufsize, i32* %errmask, %struct.PyObject** %errobj )		; <i32> [#uses=1]
+	%tmp68 = icmp slt i32 %tmp67, 0		; <i1> [#uses=1]
+	br i1 %tmp68, label %UnifiedReturnBlock, label %cond_next73
+
+cond_next73:		; preds = %cond_true61
+	store i32 1, i32* %first, align 4
+	%tmp74 = load i8*** @PyUFunc_API, align 8		; <i8**> [#uses=1]
+	%tmp75 = getelementptr i8** %tmp74, i64 29		; <i8**> [#uses=1]
+	%tmp76 = load i8** %tmp75		; <i8*> [#uses=1]
+	%tmp7677 = bitcast i8* %tmp76 to i32 (i32, %struct.PyObject*, i32, i32*)*		; <i32 (i32, %struct.PyObject*, i32, i32*)*> [#uses=1]
+	%tmp79 = load %struct.PyObject** %errobj, align 8		; <%struct.PyObject*> [#uses=1]
+	%tmp80 = load i32* %errmask, align 4		; <i32> [#uses=1]
+	%tmp82 = call i32 %tmp7677( i32 %tmp80, %struct.PyObject* %tmp79, i32 %tmp56, i32* %first )		; <i32> [#uses=1]
+	%tmp83 = icmp eq i32 %tmp82, 0		; <i1> [#uses=1]
+	br i1 %tmp83, label %cond_next89, label %UnifiedReturnBlock
+
+cond_next89:		; preds = %cond_next73, %ubyte_ctype_remainder.exit
+	%tmp90 = call %struct.PyObject* @PyTuple_New( i64 2 )		; <%struct.PyObject*> [#uses=9]
+	%tmp92 = icmp eq %struct.PyObject* %tmp90, null		; <i1> [#uses=1]
+	br i1 %tmp92, label %UnifiedReturnBlock, label %cond_next97
+
+cond_next97:		; preds = %cond_next89
+	%tmp98 = load i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
+	%tmp99 = getelementptr i8** %tmp98, i64 25		; <i8**> [#uses=1]
+	%tmp100 = load i8** %tmp99		; <i8*> [#uses=1]
+	%tmp100101 = bitcast i8* %tmp100 to %struct._typeobject*		; <%struct._typeobject*> [#uses=2]
+	%tmp102 = getelementptr %struct._typeobject* %tmp100101, i32 0, i32 38		; <%struct.PyObject* (%struct._typeobject*, i64)**> [#uses=1]
+	%tmp103 = load %struct.PyObject* (%struct._typeobject*, i64)** %tmp102		; <%struct.PyObject* (%struct._typeobject*, i64)*> [#uses=1]
+	%tmp108 = call %struct.PyObject* %tmp103( %struct._typeobject* %tmp100101, i64 0 )		; <%struct.PyObject*> [#uses=3]
+	%tmp110 = icmp eq %struct.PyObject* %tmp108, null		; <i1> [#uses=1]
+	br i1 %tmp110, label %cond_true113, label %cond_next135
+
+cond_true113:		; preds = %cond_next97
+	%tmp115 = getelementptr %struct.PyObject* %tmp90, i32 0, i32 0		; <i64*> [#uses=2]
+	%tmp116 = load i64* %tmp115		; <i64> [#uses=1]
+	%tmp117 = add i64 %tmp116, -1		; <i64> [#uses=2]
+	store i64 %tmp117, i64* %tmp115
+	%tmp123 = icmp eq i64 %tmp117, 0		; <i1> [#uses=1]
+	br i1 %tmp123, label %cond_true126, label %UnifiedReturnBlock
+
+cond_true126:		; preds = %cond_true113
+	%tmp128 = getelementptr %struct.PyObject* %tmp90, i32 0, i32 1		; <%struct._typeobject**> [#uses=1]
+	%tmp129 = load %struct._typeobject** %tmp128		; <%struct._typeobject*> [#uses=1]
+	%tmp130 = getelementptr %struct._typeobject* %tmp129, i32 0, i32 6		; <void (%struct.PyObject*)**> [#uses=1]
+	%tmp131 = load void (%struct.PyObject*)** %tmp130		; <void (%struct.PyObject*)*> [#uses=1]
+	call void %tmp131( %struct.PyObject* %tmp90 )
+	ret %struct.PyObject* null
+
+cond_next135:		; preds = %cond_next97
+	%tmp136137 = bitcast %struct.PyObject* %tmp108 to %struct.PyBoolScalarObject*		; <%struct.PyBoolScalarObject*> [#uses=1]
+	%tmp139 = getelementptr %struct.PyBoolScalarObject* %tmp136137, i32 0, i32 2		; <i8*> [#uses=1]
+	store i8 %out.2, i8* %tmp139
+	%tmp140141 = bitcast %struct.PyObject* %tmp90 to %struct.PyTupleObject*		; <%struct.PyTupleObject*> [#uses=2]
+	%tmp143 = getelementptr %struct.PyTupleObject* %tmp140141, i32 0, i32 3, i64 0		; <%struct.PyObject**> [#uses=1]
+	store %struct.PyObject* %tmp108, %struct.PyObject** %tmp143
+	%tmp145 = load i8*** @PyArray_API, align 8		; <i8**> [#uses=1]
+	%tmp146 = getelementptr i8** %tmp145, i64 25		; <i8**> [#uses=1]
+	%tmp147 = load i8** %tmp146		; <i8*> [#uses=1]
+	%tmp147148 = bitcast i8* %tmp147 to %struct._typeobject*		; <%struct._typeobject*> [#uses=2]
+	%tmp149 = getelementptr %struct._typeobject* %tmp147148, i32 0, i32 38		; <%struct.PyObject* (%struct._typeobject*, i64)**> [#uses=1]
+	%tmp150 = load %struct.PyObject* (%struct._typeobject*, i64)** %tmp149		; <%struct.PyObject* (%struct._typeobject*, i64)*> [#uses=1]
+	%tmp155 = call %struct.PyObject* %tmp150( %struct._typeobject* %tmp147148, i64 0 )		; <%struct.PyObject*> [#uses=3]
+	%tmp157 = icmp eq %struct.PyObject* %tmp155, null		; <i1> [#uses=1]
+	br i1 %tmp157, label %cond_true160, label %cond_next182
+
+cond_true160:		; preds = %cond_next135
+	%tmp162 = getelementptr %struct.PyObject* %tmp90, i32 0, i32 0		; <i64*> [#uses=2]
+	%tmp163 = load i64* %tmp162		; <i64> [#uses=1]
+	%tmp164 = add i64 %tmp163, -1		; <i64> [#uses=2]
+	store i64 %tmp164, i64* %tmp162
+	%tmp170 = icmp eq i64 %tmp164, 0		; <i1> [#uses=1]
+	br i1 %tmp170, label %cond_true173, label %UnifiedReturnBlock
+
+cond_true173:		; preds = %cond_true160
+	%tmp175 = getelementptr %struct.PyObject* %tmp90, i32 0, i32 1		; <%struct._typeobject**> [#uses=1]
+	%tmp176 = load %struct._typeobject** %tmp175		; <%struct._typeobject*> [#uses=1]
+	%tmp177 = getelementptr %struct._typeobject* %tmp176, i32 0, i32 6		; <void (%struct.PyObject*)**> [#uses=1]
+	%tmp178 = load void (%struct.PyObject*)** %tmp177		; <void (%struct.PyObject*)*> [#uses=1]
+	call void %tmp178( %struct.PyObject* %tmp90 )
+	ret %struct.PyObject* null
+
+cond_next182:		; preds = %cond_next135
+	%tmp183184 = bitcast %struct.PyObject* %tmp155 to %struct.PyBoolScalarObject*		; <%struct.PyBoolScalarObject*> [#uses=1]
+	%tmp186 = getelementptr %struct.PyBoolScalarObject* %tmp183184, i32 0, i32 2		; <i8*> [#uses=1]
+	store i8 %out2.0, i8* %tmp186
+	%tmp190 = getelementptr %struct.PyTupleObject* %tmp140141, i32 0, i32 3, i64 1		; <%struct.PyObject**> [#uses=1]
+	store %struct.PyObject* %tmp155, %struct.PyObject** %tmp190
+	ret %struct.PyObject* %tmp90
+
+UnifiedReturnBlock:		; preds = %cond_true160, %cond_true113, %cond_next89, %cond_next73, %cond_true61, %bb17
+	ret %struct.PyObject* null
+}
+
+declare i32 @feraiseexcept(i32)
+
+declare fastcc i32 @_ubyte_convert_to_ctype(%struct.PyObject*, i8*)
+
+declare %struct.PyObject* @PyErr_Occurred()
+
+declare %struct.PyObject* @PyTuple_New(i64)
diff --git a/final/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll b/final/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
new file mode 100644
index 00000000000..e93092f355c
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | grep {movsbl}
+
+@X = global i32 0               ; <i32*> [#uses=1]
+
+define i8 @_Z3fooi(i32 %x) signext  {
+entry:
+        store i32 %x, i32* @X, align 4
+        %retval67 = trunc i32 %x to i8          ; <i8> [#uses=1]
+        ret i8 %retval67
+}
diff --git a/final/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll b/final/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
new file mode 100644
index 00000000000..c90a85f1694
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-08-13-AppendingLinkage.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 | not grep drectve
+; PR1607
+
+%hlvm_programs_element = type { i8*, i32 (i32, i8**)* }
+@hlvm_programs = appending constant [1 x %hlvm_programs_element]
+zeroinitializer
+
+define %hlvm_programs_element* @hlvm_get_programs() {
+entry:
+  ret %hlvm_programs_element* getelementptr([1 x %hlvm_programs_element]*  
+                                            @hlvm_programs, i32 0, i32 0)
+}
diff --git a/final/test/CodeGen/X86/2007-09-05-InvalidAsm.ll b/final/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
new file mode 100644
index 00000000000..5acb05134c7
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-09-05-InvalidAsm.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -x86-asm-syntax=intel | not grep {lea\[\[:space:\]\]R}
+
+	%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
+	%struct.AGenericManager = type <{ i8 }>
+	%struct.ComponentInstanceRecord = type opaque
+	%struct.ComponentParameters = type { [1 x i64] }
+
+define i32 @_ZN12AGenericCall10MapIDPtrAtEsRP23ComponentInstanceRecord(%struct.AGenericCall* %this, i16 signext  %param, %struct.ComponentInstanceRecord** %instance) {
+entry:
+	%tmp4 = icmp slt i16 %param, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %cond_true, label %cond_next
+
+cond_true:		; preds = %entry
+	%tmp1415 = shl i16 %param, 3		; <i16> [#uses=1]
+	%tmp17 = getelementptr %struct.AGenericCall* %this, i32 0, i32 1		; <%struct.ComponentParameters**> [#uses=1]
+	%tmp18 = load %struct.ComponentParameters** %tmp17, align 8		; <%struct.ComponentParameters*> [#uses=1]
+	%tmp1920 = bitcast %struct.ComponentParameters* %tmp18 to i8*		; <i8*> [#uses=1]
+	%tmp212223 = sext i16 %tmp1415 to i64		; <i64> [#uses=1]
+	%tmp24 = getelementptr i8* %tmp1920, i64 %tmp212223		; <i8*> [#uses=1]
+	%tmp2425 = bitcast i8* %tmp24 to i64*		; <i64*> [#uses=1]
+	%tmp28 = load i64* %tmp2425, align 8		; <i64> [#uses=1]
+	%tmp2829 = inttoptr i64 %tmp28 to i32*		; <i32*> [#uses=1]
+	%tmp31 = getelementptr %struct.AGenericCall* %this, i32 0, i32 2		; <i32**> [#uses=1]
+	store i32* %tmp2829, i32** %tmp31, align 8
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp4243 = shl i16 %param, 3		; <i16> [#uses=1]
+	%tmp46 = getelementptr %struct.AGenericCall* %this, i32 0, i32 1		; <%struct.ComponentParameters**> [#uses=1]
+	%tmp47 = load %struct.ComponentParameters** %tmp46, align 8		; <%struct.ComponentParameters*> [#uses=1]
+	%tmp4849 = bitcast %struct.ComponentParameters* %tmp47 to i8*		; <i8*> [#uses=1]
+	%tmp505152 = sext i16 %tmp4243 to i64		; <i64> [#uses=1]
+	%tmp53 = getelementptr i8* %tmp4849, i64 %tmp505152		; <i8*> [#uses=1]
+	%tmp5354 = bitcast i8* %tmp53 to i64*		; <i64*> [#uses=1]
+	%tmp58 = load i64* %tmp5354, align 8		; <i64> [#uses=1]
+	%tmp59 = icmp eq i64 %tmp58, 0		; <i1> [#uses=1]
+	br i1 %tmp59, label %UnifiedReturnBlock, label %cond_true63
+
+cond_true63:		; preds = %cond_next
+	%tmp65 = getelementptr %struct.AGenericCall* %this, i32 0, i32 0		; <%struct.AGenericManager**> [#uses=1]
+	%tmp66 = load %struct.AGenericManager** %tmp65, align 8		; <%struct.AGenericManager*> [#uses=1]
+	%tmp69 = tail call i32 @_ZN15AGenericManager24DefaultComponentInstanceERP23ComponentInstanceRecord( %struct.AGenericManager* %tmp66, %struct.ComponentInstanceRecord** %instance )		; <i32> [#uses=1]
+	ret i32 %tmp69
+
+UnifiedReturnBlock:		; preds = %cond_next
+	ret i32 undef
+}
+
+declare i32 @_ZN15AGenericManager24DefaultComponentInstanceERP23ComponentInstanceRecord(%struct.AGenericManager*, %struct.ComponentInstanceRecord**)
diff --git a/final/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll b/final/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
new file mode 100644
index 00000000000..c5d2a46f92c
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-09-06-ExtWeakAliasee.ll
@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=x86 | grep weak | count 2
+@__gthrw_pthread_once = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
+
+declare extern_weak i32 @pthread_once(i32*, void ()*)
diff --git a/final/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll b/final/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
new file mode 100644
index 00000000000..c3403a0b4ee
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep {isNullOrNil].eh"} | count 2
+
+	%struct.NSString = type {  }
+	%struct._objc__method_prototype_list = type opaque
+	%struct._objc_category = type { i8*, i8*, %struct._objc_method_list*, %struct._objc_method_list*, %struct._objc_protocol**, i32, %struct._prop_list_t* }
+	%struct._objc_method = type { %struct.objc_selector*, i8*, i8* }
+	%struct._objc_method_list = type opaque
+	%struct._objc_module = type { i32, i32, i8*, %struct._objc_symtab* }
+	%struct._objc_protocol = type { %struct._objc_protocol_extension*, i8*, %struct._objc_protocol**, %struct._objc__method_prototype_list*, %struct._objc__method_prototype_list* }
+	%struct._objc_protocol_extension = type opaque
+	%struct._objc_symtab = type { i32, %struct.objc_selector**, i16, i16, [1 x i8*] }
+	%struct._prop_list_t = type opaque
+	%struct.anon = type { %struct._objc__method_prototype_list*, i32, [1 x %struct._objc_method] }
+	%struct.objc_selector = type opaque
+@"\01L_OBJC_SYMBOLS" = internal global { i32, i32, i16, i16, [1 x %struct._objc_category*] } {
+    i32 0, 
+    i32 0, 
+    i16 0, 
+    i16 1, 
+    [1 x %struct._objc_category*] [ %struct._objc_category* bitcast ({ i8*, i8*, %struct._objc_method_list*, i32, i32, i32, i32 }* @"\01L_OBJC_CATEGORY_NSString_local" to %struct._objc_category*) ] }, section "__OBJC,__symbols,regular,no_dead_strip"		; <{ i32, i32, i16, i16, [1 x %struct._objc_category*] }*> [#uses=2]
+@"\01L_OBJC_CATEGORY_INSTANCE_METHODS_NSString_local" = internal global { i32, i32, [1 x %struct._objc_method] } {
+    i32 0, 
+    i32 1, 
+    [1 x %struct._objc_method] [ %struct._objc_method {
+        %struct.objc_selector* bitcast ([12 x i8]* @"\01L_OBJC_METH_VAR_NAME_0" to %struct.objc_selector*), 
+        i8* getelementptr ([7 x i8]* @"\01L_OBJC_METH_VAR_TYPE_0", i32 0, i32 0), 
+        i8* bitcast (i8 (%struct.NSString*, %struct.objc_selector*) signext * @"-[NSString(local) isNullOrNil]" to i8*) } ] }, section "__OBJC,__cat_inst_meth,regular,no_dead_strip"		; <{ i32, i32, [1 x %struct._objc_method] }*> [#uses=3]
+@"\01L_OBJC_CATEGORY_NSString_local" = internal global { i8*, i8*, %struct._objc_method_list*, i32, i32, i32, i32 } {
+    i8* getelementptr ([6 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), 
+    i8* getelementptr ([9 x i8]* @"\01L_OBJC_CLASS_NAME_1", i32 0, i32 0), 
+    %struct._objc_method_list* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01L_OBJC_CATEGORY_INSTANCE_METHODS_NSString_local" to %struct._objc_method_list*), 
+    i32 0, 
+    i32 0, 
+    i32 28, 
+    i32 0 }, section "__OBJC,__category,regular,no_dead_strip"		; <{ i8*, i8*, %struct._objc_method_list*, i32, i32, i32, i32 }*> [#uses=2]
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] zeroinitializer, section "__OBJC,__image_info,regular"		; <[2 x i32]*> [#uses=1]
+@"\01L_OBJC_MODULES" = internal global %struct._objc_module {
+    i32 7, 
+    i32 16, 
+    i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_2", i32 0, i32 0), 
+    %struct._objc_symtab* bitcast ({ i32, i32, i16, i16, [1 x %struct._objc_category*] }* @"\01L_OBJC_SYMBOLS" to %struct._objc_symtab*) }, section "__OBJC,__module_info,regular,no_dead_strip"		; <%struct._objc_module*> [#uses=1]
+@"\01.objc_class_ref_NSString" = internal global i8* @"\01.objc_class_name_NSString"		; <i8**> [#uses=0]
+@"\01.objc_class_name_NSString" = external global i8		; <i8*> [#uses=1]
+@"\01.objc_category_name_NSString_local" = constant i32 0		; <i32*> [#uses=1]
+@"\01L_OBJC_CLASS_NAME_2" = internal global [1 x i8] zeroinitializer, section "__TEXT,__cstring,cstring_literals"		; <[1 x i8]*> [#uses=2]
+@"\01L_OBJC_CLASS_NAME_1" = internal global [9 x i8] c"NSString\00", section "__TEXT,__cstring,cstring_literals"		; <[9 x i8]*> [#uses=2]
+@"\01L_OBJC_CLASS_NAME_0" = internal global [6 x i8] c"local\00", section "__TEXT,__cstring,cstring_literals"		; <[6 x i8]*> [#uses=2]
+@"\01L_OBJC_METH_VAR_NAME_0" = internal global [12 x i8] c"isNullOrNil\00", section "__TEXT,__cstring,cstring_literals"		; <[12 x i8]*> [#uses=3]
+@"\01L_OBJC_METH_VAR_TYPE_0" = internal global [7 x i8] c"c8@0:4\00", section "__TEXT,__cstring,cstring_literals"		; <[7 x i8]*> [#uses=2]
+@llvm.used = appending global [11 x i8*] [ i8* bitcast ({ i32, i32, i16, i16, [1 x %struct._objc_category*] }* @"\01L_OBJC_SYMBOLS" to i8*), i8* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01L_OBJC_CATEGORY_INSTANCE_METHODS_NSString_local" to i8*), i8* bitcast ({ i8*, i8*, %struct._objc_method_list*, i32, i32, i32, i32 }* @"\01L_OBJC_CATEGORY_NSString_local" to i8*), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*), i8* bitcast (i32* @"\01.objc_category_name_NSString_local" to i8*), i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_2", i32 0, i32 0), i8* getelementptr ([9 x i8]* @"\01L_OBJC_CLASS_NAME_1", i32 0, i32 0), i8* getelementptr ([6 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i8* getelementptr ([12 x i8]* @"\01L_OBJC_METH_VAR_NAME_0", i32 0, i32 0), i8* getelementptr ([7 x i8]* @"\01L_OBJC_METH_VAR_TYPE_0", i32 0, i32 0) ], section "llvm.metadata"		; <[11 x i8*]*> [#uses=0]
+
+define internal i8 @"-[NSString(local) isNullOrNil]"(%struct.NSString* %self, %struct.objc_selector* %_cmd) signext  {
+entry:
+	%self_addr = alloca %struct.NSString*		; <%struct.NSString**> [#uses=1]
+	%_cmd_addr = alloca %struct.objc_selector*		; <%struct.objc_selector**> [#uses=1]
+	%retval = alloca i8, align 1		; <i8*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store %struct.NSString* %self, %struct.NSString** %self_addr
+	store %struct.objc_selector* %_cmd, %struct.objc_selector** %_cmd_addr
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i8* %retval		; <i8> [#uses=1]
+	ret i8 %retval1
+}
diff --git a/final/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll b/final/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
new file mode 100644
index 00000000000..0ae1897e60e
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-09-18-ShuffleXformBug.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep -- -86
+
+define i16 @f(<4 x float>* %tmp116117.i1061.i) nounwind {
+entry:
+	alloca [4 x <4 x float>]		; <[4 x <4 x float>]*>:0 [#uses=167]
+	alloca [4 x <4 x float>]		; <[4 x <4 x float>]*>:1 [#uses=170]
+	alloca [4 x <4 x i32>]		; <[4 x <4 x i32>]*>:2 [#uses=12]
+	%.sub6235.i = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0		; <<4 x float>*> [#uses=76]
+	%.sub.i = getelementptr [4 x <4 x float>]* %1, i32 0, i32 0		; <<4 x float>*> [#uses=59]
+
+	%tmp124.i1062.i = getelementptr <4 x float>* %tmp116117.i1061.i, i32 63		; <<4 x float>*> [#uses=1]
+	%tmp125.i1063.i = load <4 x float>* %tmp124.i1062.i		; <<4 x float>> [#uses=5]
+	%tmp828.i1077.i = shufflevector <4 x float> %tmp125.i1063.i, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>> [#uses=4]
+	%tmp704.i1085.i = load <4 x float>* %.sub6235.i		; <<4 x float>> [#uses=1]
+	%tmp712.i1086.i = call <4 x float> @llvm.x86.sse.max.ps( <4 x float> %tmp704.i1085.i, <4 x float> %tmp828.i1077.i )		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp712.i1086.i, <4 x float>* %.sub.i
+
+	%tmp2587.i1145.gep.i = getelementptr [4 x <4 x float>]* %1, i32 0, i32 0, i32 2		; <float*> [#uses=1]
+	%tmp5334.i = load float* %tmp2587.i1145.gep.i		; <float> [#uses=5]
+	%tmp2723.i1170.i = insertelement <4 x float> undef, float %tmp5334.i, i32 2		; <<4 x float>> [#uses=5]
+	store <4 x float> %tmp2723.i1170.i, <4 x float>* %.sub6235.i
+
+	%tmp1406.i1367.i = shufflevector <4 x float> %tmp2723.i1170.i, <4 x float> undef, <4 x i32> < i32 2, i32 2, i32 2, i32 2 >		; <<4 x float>> [#uses=1]
+	%tmp84.i1413.i = load <4 x float>* %.sub6235.i		; <<4 x float>> [#uses=1]
+	%tmp89.i1415.i = fmul <4 x float> %tmp84.i1413.i, %tmp1406.i1367.i		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp89.i1415.i, <4 x float>* %.sub.i
+        ret i16 0
+}
+
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
diff --git a/final/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll b/final/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
new file mode 100644
index 00000000000..4d6971586c2
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define x86_fp80 @foo(x86_fp80 %x) nounwind{
+entry:
+	%tmp2 = call x86_fp80 @llvm.sqrt.f80( x86_fp80 %x )
+	ret x86_fp80 %tmp2
+        
+; CHECK: foo:
+; CHECK: fldt 4(%esp)
+; CHECK-NEXT: fsqrt
+; CHECK-NEXT: ret
+}
+
+declare x86_fp80 @llvm.sqrt.f80(x86_fp80)
+
+define x86_fp80 @bar(x86_fp80 %x) nounwind {
+entry:
+	%tmp2 = call x86_fp80 @llvm.powi.f80( x86_fp80 %x, i32 3 )
+	ret x86_fp80 %tmp2
+; CHECK: bar:
+; CHECK: fldt 4(%esp)
+; CHECK-NEXT: fld	%st(0)
+; CHECK-NEXT: fmul	%st(1)
+; CHECK-NEXT: fmulp	%st(1)
+; CHECK-NEXT: ret
+}
+
+declare x86_fp80 @llvm.powi.f80(x86_fp80, i32)
diff --git a/final/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll b/final/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
new file mode 100644
index 00000000000..6fc8ec907ea
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-10-04-AvoidEFLAGSCopy.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | not grep pushf
+
+	%struct.gl_texture_image = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8* }
+	%struct.gl_texture_object = type { i32, i32, i32, float, [4 x i32], i32, i32, i32, i32, i32, float, [11 x %struct.gl_texture_image*], [1024 x i8], i32, i32, i32, i8, i8*, i8, void (%struct.gl_texture_object*, i32, float*, float*, float*, float*, i8*, i8*, i8*, i8*)*, %struct.gl_texture_object* }
+
+define fastcc void @sample_3d_linear(%struct.gl_texture_object* %tObj, %struct.gl_texture_image* %img, float %s, float %t, float %r, i8* %red, i8* %green, i8* %blue, i8* %alpha) {
+entry:
+	%tmp15 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp16 = icmp eq i32 %tmp15, 10497		; <i1> [#uses=1]
+	%tmp2152 = call float @floorf( float 0.000000e+00 )		; <float> [#uses=0]
+	br i1 %tmp16, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	ret void
+
+cond_false:		; preds = %entry
+	ret void
+}
+
+declare float @floorf(float)
diff --git a/final/test/CodeGen/X86/2007-10-05-3AddrConvert.ll b/final/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
new file mode 100644
index 00000000000..2c2706de5d3
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86 | grep lea
+
+	%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
+	%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
+	%struct.node = type { i16, double, [3 x double], i32, i32 }
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+	%0 = malloc %struct.anon		; <%struct.anon*> [#uses=2]
+	%1 = getelementptr %struct.anon* %0, i32 0, i32 2		; <%struct.node**> [#uses=1]
+	br label %bb14.i
+
+bb14.i:		; preds = %bb14.i, %entry
+	%i8.0.reg2mem.0.i = phi i32 [ 0, %entry ], [ %2, %bb14.i ]		; <i32> [#uses=1]
+	%2 = add i32 %i8.0.reg2mem.0.i, 1		; <i32> [#uses=2]
+	%exitcond74.i = icmp eq i32 %2, 32		; <i1> [#uses=1]
+	br i1 %exitcond74.i, label %bb32.i, label %bb14.i
+
+bb32.i:		; preds = %bb32.i, %bb14.i
+	%tmp.0.reg2mem.0.i = phi i32 [ %indvar.next63.i, %bb32.i ], [ 0, %bb14.i ]		; <i32> [#uses=1]
+	%indvar.next63.i = add i32 %tmp.0.reg2mem.0.i, 1		; <i32> [#uses=2]
+	%exitcond64.i = icmp eq i32 %indvar.next63.i, 64		; <i1> [#uses=1]
+	br i1 %exitcond64.i, label %bb47.loopexit.i, label %bb32.i
+
+bb.i.i:		; preds = %bb47.loopexit.i
+	unreachable
+
+stepsystem.exit.i:		; preds = %bb47.loopexit.i
+	store %struct.node* null, %struct.node** %1, align 4
+	br label %bb.i6.i
+
+bb.i6.i:		; preds = %bb.i6.i, %stepsystem.exit.i
+	%tmp.0.i.i = add i32 0, -1		; <i32> [#uses=1]
+	%3 = icmp slt i32 %tmp.0.i.i, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb107.i.i, label %bb.i6.i
+
+bb107.i.i:		; preds = %bb107.i.i, %bb.i6.i
+	%q_addr.0.i.i.in = phi %struct.bnode** [ null, %bb107.i.i ], [ %4, %bb.i6.i ]		; <%struct.bnode**> [#uses=1]
+	%q_addr.0.i.i = load %struct.bnode** %q_addr.0.i.i.in		; <%struct.bnode*> [#uses=1]
+	%q_addr.1 = getelementptr %struct.anon* %0, i32 0, i32 4, i32 1
+	store %struct.bnode* %q_addr.0.i.i, %struct.bnode** %q_addr.1, align 4
+	br label %bb107.i.i
+
+bb47.loopexit.i:		; preds = %bb32.i
+	%4 = getelementptr %struct.anon* %0, i32 0, i32 4, i32 0		; <%struct.bnode**> [#uses=1]
+	%5 = icmp eq %struct.node* null, null		; <i1> [#uses=1]
+	br i1 %5, label %stepsystem.exit.i, label %bb.i.i
+}
diff --git a/final/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll b/final/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
new file mode 100644
index 00000000000..db13fde9f67
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 | not grep movb
+
+define i16 @f(i32* %bp, i32* %ss) signext  {
+entry:
+	br label %cond_next127
+
+cond_next127:		; preds = %cond_next391, %entry
+	%v.1 = phi i32 [ undef, %entry ], [ %tmp411, %cond_next391 ]		; <i32> [#uses=1]
+	%tmp149 = mul i32 0, %v.1		; <i32> [#uses=0]
+	%tmp254 = and i32 0, 15		; <i32> [#uses=1]
+	%tmp256 = and i32 0, 15		; <i32> [#uses=2]
+	br label %cond_next391
+
+cond_next391:		; preds = %cond_next127
+	%tmp393 = load i32* %ss, align 4		; <i32> [#uses=1]
+	%tmp395 = load i32* %bp, align 4		; <i32> [#uses=2]
+	%tmp396 = shl i32 %tmp393, %tmp395		; <i32> [#uses=2]
+	%tmp398 = sub i32 32, %tmp256		; <i32> [#uses=2]
+	%tmp399 = lshr i32 %tmp396, %tmp398		; <i32> [#uses=1]
+	%tmp405 = lshr i32 %tmp396, 31		; <i32> [#uses=1]
+	%tmp406 = add i32 %tmp405, -1		; <i32> [#uses=1]
+	%tmp409 = lshr i32 %tmp406, %tmp398		; <i32> [#uses=1]
+	%tmp411 = sub i32 %tmp399, %tmp409		; <i32> [#uses=1]
+	%tmp422445 = add i32 %tmp254, 0		; <i32> [#uses=1]
+	%tmp426447 = add i32 %tmp395, %tmp256		; <i32> [#uses=1]
+	store i32 %tmp426447, i32* %bp, align 4
+	%tmp429448 = icmp ult i32 %tmp422445, 63		; <i1> [#uses=1]
+	br i1 %tmp429448, label %cond_next127, label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %cond_next391
+	ret i16 0
+}
diff --git a/final/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll b/final/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
new file mode 100644
index 00000000000..ea1bbc46469
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-10-12-SpillerUnfold1.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep addss | not grep esp
+
+define fastcc void @fht(float* %fz, i16 signext  %n) {
+entry:
+	br i1 true, label %bb171.preheader, label %bb431
+
+bb171.preheader:		; preds = %entry
+	%tmp176 = fadd float 0.000000e+00, 1.000000e+00		; <float> [#uses=2]
+	%gi.1 = getelementptr float* %fz, i32 0		; <float*> [#uses=2]
+	%tmp240 = load float* %gi.1, align 4		; <float> [#uses=1]
+	%tmp242 = fsub float %tmp240, 0.000000e+00		; <float> [#uses=2]
+	%tmp251 = getelementptr float* %fz, i32 0		; <float*> [#uses=1]
+	%tmp252 = load float* %tmp251, align 4		; <float> [#uses=1]
+	%tmp258 = getelementptr float* %fz, i32 0		; <float*> [#uses=2]
+	%tmp259 = load float* %tmp258, align 4		; <float> [#uses=2]
+	%tmp261 = fmul float %tmp259, %tmp176		; <float> [#uses=1]
+	%tmp262 = fsub float 0.000000e+00, %tmp261		; <float> [#uses=2]
+	%tmp269 = fmul float %tmp252, %tmp176		; <float> [#uses=1]
+	%tmp276 = fmul float %tmp259, 0.000000e+00		; <float> [#uses=1]
+	%tmp277 = fadd float %tmp269, %tmp276		; <float> [#uses=2]
+	%tmp281 = getelementptr float* %fz, i32 0		; <float*> [#uses=1]
+	%tmp282 = load float* %tmp281, align 4		; <float> [#uses=2]
+	%tmp284 = fsub float %tmp282, %tmp277		; <float> [#uses=1]
+	%tmp291 = fadd float %tmp282, %tmp277		; <float> [#uses=1]
+	%tmp298 = fsub float 0.000000e+00, %tmp262		; <float> [#uses=1]
+	%tmp305 = fadd float 0.000000e+00, %tmp262		; <float> [#uses=1]
+	%tmp315 = fmul float 0.000000e+00, %tmp291		; <float> [#uses=1]
+	%tmp318 = fmul float 0.000000e+00, %tmp298		; <float> [#uses=1]
+	%tmp319 = fadd float %tmp315, %tmp318		; <float> [#uses=1]
+	%tmp329 = fadd float 0.000000e+00, %tmp319		; <float> [#uses=1]
+	store float %tmp329, float* null, align 4
+	%tmp336 = fsub float %tmp242, 0.000000e+00		; <float> [#uses=1]
+	store float %tmp336, float* %tmp258, align 4
+	%tmp343 = fadd float %tmp242, 0.000000e+00		; <float> [#uses=1]
+	store float %tmp343, float* null, align 4
+	%tmp355 = fmul float 0.000000e+00, %tmp305		; <float> [#uses=1]
+	%tmp358 = fmul float 0.000000e+00, %tmp284		; <float> [#uses=1]
+	%tmp359 = fadd float %tmp355, %tmp358		; <float> [#uses=1]
+	%tmp369 = fadd float 0.000000e+00, %tmp359		; <float> [#uses=1]
+	store float %tmp369, float* %gi.1, align 4
+	ret void
+
+bb431:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll b/final/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
new file mode 100644
index 00000000000..a3872ad47e9
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -march=x86 | grep sarl | not grep esp
+
+define i16 @t(i16* %qmatrix, i16* %dct, i16* %acBaseTable, i16* %acExtTable, i16 signext  %acBaseRes, i16 signext  %acMaskRes, i16 signext  %acExtRes, i32* %bitptr, i32* %source, i32 %markerPrefix, i8** %byteptr, i32 %scale, i32 %round, i32 %bits) signext  {
+entry:
+	br label %cond_next127
+
+cond_next127:		; preds = %cond_next391, %entry
+	%tmp151 = add i32 0, %round		; <i32> [#uses=1]
+	%tmp153 = ashr i32 %tmp151, %scale		; <i32> [#uses=1]
+	%tmp158 = xor i32 0, %tmp153		; <i32> [#uses=1]
+	%tmp160 = or i32 %tmp158, 0		; <i32> [#uses=1]
+	%tmp180181 = sext i16 0 to i32		; <i32> [#uses=1]
+	%tmp183 = add i32 %tmp160, 1		; <i32> [#uses=1]
+	br i1 false, label %cond_true188, label %cond_next245
+
+cond_true188:		; preds = %cond_next127
+	ret i16 0
+
+cond_next245:		; preds = %cond_next127
+	%tmp253444 = lshr i32 %tmp180181, 4		; <i32> [#uses=1]
+	%tmp254 = and i32 %tmp253444, 15		; <i32> [#uses=1]
+	br i1 false, label %cond_true267, label %cond_next391
+
+cond_true267:		; preds = %cond_next245
+	%tmp269 = load i8** %byteptr, align 4		; <i8*> [#uses=3]
+	%tmp270 = load i8* %tmp269, align 1		; <i8> [#uses=1]
+	%tmp270271 = zext i8 %tmp270 to i32		; <i32> [#uses=1]
+	%tmp272 = getelementptr i8* %tmp269, i32 1		; <i8*> [#uses=2]
+	store i8* %tmp272, i8** %byteptr, align 4
+	%tmp276 = load i8* %tmp272, align 1		; <i8> [#uses=1]
+	%tmp278 = getelementptr i8* %tmp269, i32 2		; <i8*> [#uses=1]
+	store i8* %tmp278, i8** %byteptr, align 4
+	%tmp286 = icmp eq i32 %tmp270271, %markerPrefix		; <i1> [#uses=1]
+	%cond = icmp eq i8 %tmp276, 0		; <i1> [#uses=1]
+	%bothcond = and i1 %tmp286, %cond		; <i1> [#uses=1]
+	br i1 %bothcond, label %cond_true294, label %cond_next327
+
+cond_true294:		; preds = %cond_true267
+	ret i16 0
+
+cond_next327:		; preds = %cond_true267
+	br i1 false, label %cond_true343, label %cond_next391
+
+cond_true343:		; preds = %cond_next327
+	%tmp345 = load i8** %byteptr, align 4		; <i8*> [#uses=1]
+	store i8* null, i8** %byteptr, align 4
+	store i8* %tmp345, i8** %byteptr, align 4
+	br label %cond_next391
+
+cond_next391:		; preds = %cond_true343, %cond_next327, %cond_next245
+	%tmp422445 = add i32 %tmp254, %tmp183		; <i32> [#uses=1]
+	%tmp429448 = icmp ult i32 %tmp422445, 63		; <i1> [#uses=1]
+	br i1 %tmp429448, label %cond_next127, label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %cond_next391
+	ret i16 0
+}
diff --git a/final/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll b/final/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
new file mode 100644
index 00000000000..8a55935cc1f
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-10-14-CoalescerCrash.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+
+        %struct._Unwind_Context = type {  }
+
+define i32 @execute_stack_op(i8* %op_ptr, i8* %op_end, %struct._Unwind_Context* %context, i64 %initial) {
+entry:
+        br i1 false, label %bb, label %return
+
+bb:             ; preds = %bb31, %entry
+        br i1 false, label %bb6, label %bb31
+
+bb6:            ; preds = %bb
+        %tmp10 = load i64* null, align 8                ; <i64> [#uses=1]
+        %tmp16 = load i64* null, align 8                ; <i64> [#uses=1]
+        br i1 false, label %bb23, label %bb31
+
+bb23:           ; preds = %bb6
+        %tmp2526.cast = and i64 %tmp16, 4294967295              ; <i64> [#uses=1]
+        %tmp27 = ashr i64 %tmp10, %tmp2526.cast         ; <i64> [#uses=1]
+        br label %bb31
+
+bb31:           ; preds = %bb23, %bb6, %bb
+        %result.0 = phi i64 [ %tmp27, %bb23 ], [ 0, %bb ], [ 0, %bb6 ]          ; <i64> [#uses=0]
+        br i1 false, label %bb, label %return
+
+return:         ; preds = %bb31, %entry
+        ret i32 undef
+}
diff --git a/final/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll b/final/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
new file mode 100644
index 00000000000..1e4ae846458
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
@@ -0,0 +1,400 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu
+; PR1729
+
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
+	%struct.VEC_tree = type { i32, i32, [1 x %struct.tree_node*] }
+	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.addr_diff_vec_flags = type <{ i8, i8, i8, i8 }>
+	%struct.alloc_pool_def = type { i8*, i64, i64, %struct.alloc_pool_list_def*, i64, i64, i64, %struct.alloc_pool_list_def*, i64, i64 }
+	%struct.alloc_pool_list_def = type { %struct.alloc_pool_list_def* }
+	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
+	%struct.bb_ann_d = type opaque
+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [2 x i64] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+	%struct.cselib_val_struct = type opaque
+	%struct.dataflow_d = type opaque
+	%struct.die_struct = type opaque
+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
+	%struct.edge_def_insns = type { %struct.rtx_def* }
+	%struct.edge_iterator = type { i32, %struct.VEC_edge** }
+	%struct.eh_status = type opaque
+	%struct.elt_list = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.et_node = type opaque
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, %struct.tree_node*, i8, i8, i8 }
+	%struct.ht_identifier = type { i8*, i32, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type opaque
+	%struct.lang_type = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.loop = type opaque
+	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
+	%struct.mem_attrs = type { i64, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32 }
+	%struct.obstack = type { i64, %struct._obstack_chunk*, i8*, i8*, i8*, i64, i32, %struct._obstack_chunk* (i8*, i64)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+	%struct.phi_arg_d = type { %struct.tree_node*, i8 }
+	%struct.ptr_info_def = type opaque
+	%struct.real_value = type opaque
+	%struct.reg_attrs = type { %struct.tree_node*, i64 }
+	%struct.reg_info_def = type { i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
+	%struct.rtunion = type { i8* }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.simple_bitmap_def = type { i32, i32, i32, [1 x i64] }
+	%struct.stack_local_entry = type opaque
+	%struct.temp_slot = type opaque
+	%struct.tree_binfo = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree }
+	%struct.tree_block = type { %struct.tree_common, i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_complex = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u1_a = type <{ i32 }>
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_exp = type { %struct.tree_common, %struct.location_t*, i32, %struct.tree_node*, [1 x %struct.tree_node*] }
+	%struct.tree_identifier = type { %struct.tree_common, %struct.ht_identifier }
+	%struct.tree_int_cst = type { %struct.tree_common, %struct.tree_int_cst_lowhi }
+	%struct.tree_int_cst_lowhi = type { i64, i64 }
+	%struct.tree_list = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.tree_phi_node = type { %struct.tree_common, %struct.tree_node*, i32, i32, i32, %struct.basic_block_def*, %struct.dataflow_d*, [1 x %struct.phi_arg_d] }
+	%struct.tree_real_cst = type { %struct.tree_common, %struct.real_value* }
+	%struct.tree_ssa_name = type { %struct.tree_common, %struct.tree_node*, i32, %struct.ptr_info_def*, %struct.tree_node*, i8* }
+	%struct.tree_statement_list = type { %struct.tree_common, %struct.tree_statement_list_node*, %struct.tree_statement_list_node* }
+	%struct.tree_statement_list_node = type { %struct.tree_statement_list_node*, %struct.tree_statement_list_node*, %struct.tree_node* }
+	%struct.tree_string = type { %struct.tree_common, i32, [1 x i8] }
+	%struct.tree_type = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32, i16, i8, i8, i32, %struct.tree_node*, %struct.tree_node*, %struct.rtunion, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_type* }
+	%struct.tree_type_symtab = type { i8* }
+	%struct.tree_value_handle = type { %struct.tree_common, %struct.value_set*, i32 }
+	%struct.tree_vec = type { %struct.tree_common, i32, [1 x %struct.tree_node*] }
+	%struct.tree_vector = type { %struct.tree_common, %struct.tree_node* }
+	%struct.u = type { [1 x %struct.rtunion] }
+	%struct.value_set = type opaque
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_data = type { [1 x i64] }
+	%struct.varray_head_tag = type { i64, i64, i32, i8*, %struct.varray_data }
+	%union.tree_ann_d = type opaque
+@first_edge_aux_obj = external global i8*		; <i8**> [#uses=0]
+@first_block_aux_obj = external global i8*		; <i8**> [#uses=0]
+@n_edges = external global i32		; <i32*> [#uses=0]
+@ENTRY_BLOCK_PTR = external global %struct.basic_block_def*		; <%struct.basic_block_def**> [#uses=0]
+@EXIT_BLOCK_PTR = external global %struct.basic_block_def*		; <%struct.basic_block_def**> [#uses=0]
+@n_basic_blocks = external global i32		; <i32*> [#uses=0]
+@.str = external constant [9 x i8]		; <[9 x i8]*> [#uses=0]
+@rbi_pool = external global %struct.alloc_pool_def*		; <%struct.alloc_pool_def**> [#uses=0]
+@__FUNCTION__.19643 = external constant [18 x i8]		; <[18 x i8]*> [#uses=0]
+@.str1 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@__FUNCTION__.19670 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@basic_block_info = external global %struct.varray_head_tag*		; <%struct.varray_head_tag**> [#uses=0]
+@last_basic_block = external global i32		; <i32*> [#uses=0]
+@__FUNCTION__.19696 = external constant [14 x i8]		; <[14 x i8]*> [#uses=0]
+@__FUNCTION__.20191 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@block_aux_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=0]
+@__FUNCTION__.20301 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@__FUNCTION__.20316 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@edge_aux_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=0]
+@stderr = external global %struct._IO_FILE*		; <%struct._IO_FILE**> [#uses=0]
+@__FUNCTION__.20463 = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@.str2 = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@.str3 = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@.str4 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str5 = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@.str6 = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@.str7 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@bitnames.20157 = external constant [13 x i8*]		; <[13 x i8*]*> [#uses=0]
+@.str8 = external constant [9 x i8]		; <[9 x i8]*> [#uses=0]
+@.str9 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str10 = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@.str11 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str12 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str13 = external constant [9 x i8]		; <[9 x i8]*> [#uses=0]
+@.str14 = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+@.str15 = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@.str16 = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@.str17 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@.str18 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str19 = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@.str20 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str21 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str22 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@__FUNCTION__.19709 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@.str23 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str24 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@__FUNCTION__.19813 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@.str25 = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@.str26 = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@initialized.20241.b = external global i1		; <i1*> [#uses=0]
+@__FUNCTION__.20244 = external constant [21 x i8]		; <[21 x i8]*> [#uses=0]
+@__FUNCTION__.19601 = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@__FUNCTION__.14571 = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@__FUNCTION__.14535 = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+@.str27 = external constant [28 x i8]		; <[28 x i8]*> [#uses=0]
+@__FUNCTION__.14589 = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@__FUNCTION__.19792 = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@__FUNCTION__.19851 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@profile_status = external global i32		; <i32*> [#uses=0]
+@.str29 = external constant [46 x i8]		; <[46 x i8]*> [#uses=0]
+@.str30 = external constant [49 x i8]		; <[49 x i8]*> [#uses=0]
+@.str31 = external constant [54 x i8]		; <[54 x i8]*> [#uses=0]
+@.str32 = external constant [49 x i8]		; <[49 x i8]*> [#uses=1]
+@__FUNCTION__.19948 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@reg_n_info = external global %struct.varray_head_tag*		; <%struct.varray_head_tag**> [#uses=0]
+@reload_completed = external global i32		; <i32*> [#uses=0]
+@.str33 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@.str34 = external constant [43 x i8]		; <[43 x i8]*> [#uses=0]
+@.str35 = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+@.str36 = external constant [1 x i8]		; <[1 x i8]*> [#uses=0]
+@.str37 = external constant [2 x i8]		; <[2 x i8]*> [#uses=0]
+@.str38 = external constant [16 x i8]		; <[16 x i8]*> [#uses=0]
+@cfun = external global %struct.function*		; <%struct.function**> [#uses=0]
+@.str39 = external constant [14 x i8]		; <[14 x i8]*> [#uses=0]
+@.str40 = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@.str41 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@.str42 = external constant [17 x i8]		; <[17 x i8]*> [#uses=0]
+@.str43 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@mode_size = external global [48 x i8]		; <[48 x i8]*> [#uses=0]
+@target_flags = external global i32		; <i32*> [#uses=0]
+@.str44 = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@reg_class_names = external global [0 x i8*]		; <[0 x i8*]*> [#uses=0]
+@.str45 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@.str46 = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+@.str47 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@.str48 = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@.str49 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@.str50 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str51 = external constant [29 x i8]		; <[29 x i8]*> [#uses=0]
+@.str52 = external constant [17 x i8]		; <[17 x i8]*> [#uses=0]
+@.str53 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@.str54 = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@.str55 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@.str56 = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@.str57 = external constant [26 x i8]		; <[26 x i8]*> [#uses=0]
+@.str58 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@.str59 = external constant [14 x i8]		; <[14 x i8]*> [#uses=0]
+@.str60 = external constant [26 x i8]		; <[26 x i8]*> [#uses=0]
+@.str61 = external constant [24 x i8]		; <[24 x i8]*> [#uses=0]
+@initialized.20366.b = external global i1		; <i1*> [#uses=0]
+@__FUNCTION__.20369 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@__FUNCTION__.20442 = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@bb_bitnames.20476 = external constant [6 x i8*]		; <[6 x i8*]*> [#uses=0]
+@.str62 = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@.str63 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str64 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@.str65 = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@.str66 = external constant [17 x i8]		; <[17 x i8]*> [#uses=0]
+@.str67 = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@.str68 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@.str69 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str70 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@__FUNCTION__.20520 = external constant [32 x i8]		; <[32 x i8]*> [#uses=0]
+@dump_file = external global %struct._IO_FILE*		; <%struct._IO_FILE**> [#uses=0]
+@.str71 = external constant [86 x i8]		; <[86 x i8]*> [#uses=0]
+@.str72 = external constant [94 x i8]		; <[94 x i8]*> [#uses=0]
+@reg_obstack = external global %struct.bitmap_obstack		; <%struct.bitmap_obstack*> [#uses=0]
+
+declare void @init_flow()
+
+declare i8* @ggc_alloc_cleared_stat(i64)
+
+declare fastcc void @free_edge(%struct.edge_def*)
+
+declare void @ggc_free(i8*)
+
+declare %struct.basic_block_def* @alloc_block()
+
+declare void @alloc_rbi_pool()
+
+declare %struct.alloc_pool_def* @create_alloc_pool(i8*, i64, i64)
+
+declare void @free_rbi_pool()
+
+declare void @free_alloc_pool(%struct.alloc_pool_def*)
+
+declare void @initialize_bb_rbi(%struct.basic_block_def*)
+
+declare void @fancy_abort(i8*, i32, i8*)
+
+declare i8* @pool_alloc(%struct.alloc_pool_def*)
+
+declare void @llvm.memset.i64(i8*, i8, i64, i32)
+
+declare void @link_block(%struct.basic_block_def*, %struct.basic_block_def*)
+
+declare void @unlink_block(%struct.basic_block_def*)
+
+declare void @compact_blocks()
+
+declare void @varray_check_failed(%struct.varray_head_tag*, i64, i8*, i32, i8*)
+
+declare void @expunge_block(%struct.basic_block_def*)
+
+declare void @clear_bb_flags()
+
+declare void @alloc_aux_for_block(%struct.basic_block_def*, i32)
+
+declare void @_obstack_newchunk(%struct.obstack*, i32)
+
+declare void @clear_aux_for_blocks()
+
+declare void @free_aux_for_blocks()
+
+declare void @obstack_free(%struct.obstack*, i8*)
+
+declare void @alloc_aux_for_edge(%struct.edge_def*, i32)
+
+declare void @debug_bb(%struct.basic_block_def*)
+
+declare void @dump_bb(%struct.basic_block_def*, %struct._IO_FILE*, i32)
+
+declare %struct.basic_block_def* @debug_bb_n(i32)
+
+declare void @dump_edge_info(%struct._IO_FILE*, %struct.edge_def*, i32)
+
+declare i32 @fputs_unlocked(i8* noalias , %struct._IO_FILE* noalias )
+
+declare i32 @fprintf(%struct._IO_FILE* noalias , i8* noalias , ...)
+
+declare i64 @fwrite(i8*, i64, i64, i8*)
+
+declare i32 @__overflow(%struct._IO_FILE*, i32)
+
+declare %struct.edge_def* @unchecked_make_edge(%struct.basic_block_def*, %struct.basic_block_def*, i32)
+
+declare i8* @vec_gc_p_reserve(i8*, i32)
+
+declare void @vec_assert_fail(i8*, i8*, i8*, i32, i8*)
+
+declare void @execute_on_growing_pred(%struct.edge_def*)
+
+declare %struct.edge_def* @make_edge(%struct.basic_block_def*, %struct.basic_block_def*, i32)
+
+declare %struct.edge_def* @find_edge(%struct.basic_block_def*, %struct.basic_block_def*)
+
+declare %struct.edge_def* @make_single_succ_edge(%struct.basic_block_def*, %struct.basic_block_def*, i32)
+
+declare %struct.edge_def* @cached_make_edge(%struct.simple_bitmap_def**, %struct.basic_block_def*, %struct.basic_block_def*, i32)
+
+declare void @redirect_edge_succ(%struct.edge_def*, %struct.basic_block_def*)
+
+declare void @execute_on_shrinking_pred(%struct.edge_def*)
+
+declare void @alloc_aux_for_blocks(i32)
+
+declare i8* @xmalloc(i64)
+
+declare i32 @_obstack_begin(%struct.obstack*, i32, i32, i8* (i64)*, void (i8*)*)
+
+declare void @free(i8*)
+
+declare void @clear_edges()
+
+declare void @remove_edge(%struct.edge_def*)
+
+declare %struct.edge_def* @redirect_edge_succ_nodup(%struct.edge_def*, %struct.basic_block_def*)
+
+declare void @redirect_edge_pred(%struct.edge_def*, %struct.basic_block_def*)
+
+define void @check_bb_profile(%struct.basic_block_def* %bb, %struct._IO_FILE* %file) {
+entry:
+	br i1 false, label %cond_false759.preheader, label %cond_false149.preheader
+
+cond_false149.preheader:		; preds = %entry
+	ret void
+
+cond_false759.preheader:		; preds = %entry
+	br i1 false, label %cond_next873, label %cond_true794
+
+bb644:		; preds = %cond_next873
+	ret void
+
+cond_true794:		; preds = %cond_false759.preheader
+	ret void
+
+cond_next873:		; preds = %cond_false759.preheader
+	br i1 false, label %bb882, label %bb644
+
+bb882:		; preds = %cond_next873
+	br i1 false, label %cond_true893, label %cond_next901
+
+cond_true893:		; preds = %bb882
+	br label %cond_false1036
+
+cond_next901:		; preds = %bb882
+	ret void
+
+bb929:		; preds = %cond_next1150
+	%tmp934 = add i64 0, %lsum.11225.0		; <i64> [#uses=1]
+	br i1 false, label %cond_next979, label %cond_true974
+
+cond_true974:		; preds = %bb929
+	ret void
+
+cond_next979:		; preds = %bb929
+	br label %cond_false1036
+
+cond_false1036:		; preds = %cond_next979, %cond_true893
+	%lsum.11225.0 = phi i64 [ 0, %cond_true893 ], [ %tmp934, %cond_next979 ]		; <i64> [#uses=2]
+	br i1 false, label %cond_next1056, label %cond_true1051
+
+cond_true1051:		; preds = %cond_false1036
+	ret void
+
+cond_next1056:		; preds = %cond_false1036
+	br i1 false, label %cond_next1150, label %cond_true1071
+
+cond_true1071:		; preds = %cond_next1056
+	ret void
+
+cond_next1150:		; preds = %cond_next1056
+	%tmp1156 = icmp eq %struct.edge_def* null, null		; <i1> [#uses=1]
+	br i1 %tmp1156, label %bb1159, label %bb929
+
+bb1159:		; preds = %cond_next1150
+	br i1 false, label %cond_true1169, label %UnifiedReturnBlock
+
+cond_true1169:		; preds = %bb1159
+	%tmp11741175 = trunc i64 %lsum.11225.0 to i32		; <i32> [#uses=1]
+	%tmp1178 = tail call i32 (%struct._IO_FILE* noalias , i8* noalias , ...)* @fprintf( %struct._IO_FILE* %file noalias , i8* getelementptr ([49 x i8]* @.str32, i32 0, i64 0) noalias , i32 %tmp11741175, i32 0 )		; <i32> [#uses=0]
+	ret void
+
+UnifiedReturnBlock:		; preds = %bb1159
+	ret void
+}
+
+declare void @dump_flow_info(%struct._IO_FILE*)
+
+declare i32 @max_reg_num()
+
+declare void @rtl_check_failed_flag(i8*, %struct.rtx_def*, i8*, i32, i8*)
+
+declare i32 @reg_preferred_class(i32)
+
+declare i32 @reg_alternate_class(i32)
+
+declare i8 @maybe_hot_bb_p(%struct.basic_block_def*) zeroext 
+
+declare i8 @probably_never_executed_bb_p(%struct.basic_block_def*) zeroext 
+
+declare void @dump_regset(%struct.bitmap_head_def*, %struct._IO_FILE*)
+
+declare void @debug_flow_info()
+
+declare void @alloc_aux_for_edges(i32)
+
+declare void @clear_aux_for_edges()
+
+declare void @free_aux_for_edges()
+
+declare void @brief_dump_cfg(%struct._IO_FILE*)
+
+declare i32 @fputc(i32, i8*)
+
+declare void @update_bb_profile_for_threading(%struct.basic_block_def*, i32, i64, %struct.edge_def*)
diff --git a/final/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll b/final/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll
new file mode 100644
index 00000000000..fbcac50875c
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-10-16-CoalescerCrash.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+
+define i64 @__ashldi3(i64 %u, i64 %b) {
+entry:
+        br i1 false, label %UnifiedReturnBlock, label %cond_next
+
+cond_next:              ; preds = %entry
+        %tmp9 = sub i64 32, %b          ; <i64> [#uses=2]
+        %tmp11 = icmp slt i64 %tmp9, 1          ; <i1> [#uses=1]
+        %tmp2180 = trunc i64 %u to i32          ; <i32> [#uses=2]
+        %tmp2223 = trunc i64 %tmp9 to i32               ; <i32> [#uses=2]
+        br i1 %tmp11, label %cond_true14, label %cond_false
+
+cond_true14:            ; preds = %cond_next
+        %tmp24 = sub i32 0, %tmp2223            ; <i32> [#uses=1]
+        %tmp25 = shl i32 %tmp2180, %tmp24               ; <i32> [#uses=1]
+        %tmp2569 = zext i32 %tmp25 to i64               ; <i64> [#uses=1]
+        %tmp256970 = shl i64 %tmp2569, 32               ; <i64> [#uses=1]
+        ret i64 %tmp256970
+
+cond_false:             ; preds = %cond_next
+        %tmp35 = lshr i32 %tmp2180, %tmp2223            ; <i32> [#uses=1]
+        %tmp54 = or i32 %tmp35, 0               ; <i32> [#uses=1]
+        %tmp5464 = zext i32 %tmp54 to i64               ; <i64> [#uses=1]
+        %tmp546465 = shl i64 %tmp5464, 32               ; <i64> [#uses=1]
+        %tmp546465.ins = or i64 %tmp546465, 0           ; <i64> [#uses=1]
+        ret i64 %tmp546465.ins
+
+UnifiedReturnBlock:
+        ret i64 %u
+}
diff --git a/final/test/CodeGen/X86/2007-10-17-IllegalAsm.ll b/final/test/CodeGen/X86/2007-10-17-IllegalAsm.ll
new file mode 100644
index 00000000000..c0bb55ed14e
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-10-17-IllegalAsm.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep addb | not grep x
+; RUN: llc < %s -mtriple=x86_64-linux-gnu | grep cmpb | not grep x
+; PR1734
+
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, %struct.tree_node*, i8, i8, i8 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.machine_function = type { %struct.stack_local_entry*, i8*, %struct.rtx_def*, i32, i32, i32, i32, i32 }
+	%struct.rtunion = type { i8* }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.stack_local_entry = type opaque
+	%struct.temp_slot = type opaque
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.u = type { [1 x %struct.rtunion] }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_data = type { [1 x i64] }
+	%struct.varray_head_tag = type { i64, i64, i32, i8*, %struct.varray_data }
+	%union.tree_ann_d = type opaque
+
+define void @layout_type(%struct.tree_node* %type) {
+entry:
+	%tmp32 = load i32* null, align 8		; <i32> [#uses=3]
+	%tmp3435 = trunc i32 %tmp32 to i8		; <i8> [#uses=1]
+	%tmp53 = icmp eq %struct.tree_node* null, null		; <i1> [#uses=1]
+	br i1 %tmp53, label %cond_next57, label %UnifiedReturnBlock
+
+cond_next57:		; preds = %entry
+	%tmp65 = and i32 %tmp32, 255		; <i32> [#uses=1]
+	switch i32 %tmp65, label %UnifiedReturnBlock [
+		 i32 6, label %bb140
+		 i32 7, label %bb140
+		 i32 8, label %bb140
+		 i32 13, label %bb478
+	]
+
+bb140:		; preds = %cond_next57, %cond_next57, %cond_next57
+	%tmp219 = load i32* null, align 8		; <i32> [#uses=1]
+	%tmp221222 = trunc i32 %tmp219 to i8		; <i8> [#uses=1]
+	%tmp223 = icmp eq i8 %tmp221222, 24		; <i1> [#uses=1]
+	br i1 %tmp223, label %cond_true226, label %cond_next340
+
+cond_true226:		; preds = %bb140
+	switch i8 %tmp3435, label %cond_true288 [
+		 i8 6, label %cond_next340
+		 i8 9, label %cond_next340
+		 i8 7, label %cond_next340
+		 i8 8, label %cond_next340
+		 i8 10, label %cond_next340
+	]
+
+cond_true288:		; preds = %cond_true226
+	unreachable
+
+cond_next340:		; preds = %cond_true226, %cond_true226, %cond_true226, %cond_true226, %cond_true226, %bb140
+	ret void
+
+bb478:		; preds = %cond_next57
+	br i1 false, label %cond_next500, label %cond_true497
+
+cond_true497:		; preds = %bb478
+	unreachable
+
+cond_next500:		; preds = %bb478
+	%tmp513 = load i32* null, align 8		; <i32> [#uses=1]
+	%tmp545 = and i32 %tmp513, 8192		; <i32> [#uses=1]
+	%tmp547 = and i32 %tmp32, -8193		; <i32> [#uses=1]
+	%tmp548 = or i32 %tmp547, %tmp545		; <i32> [#uses=1]
+	store i32 %tmp548, i32* null, align 8
+	ret void
+
+UnifiedReturnBlock:		; preds = %cond_next57, %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll b/final/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
new file mode 100644
index 00000000000..600bd1f1784
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep inc | not grep PTR
+
+define i16 @t(i32* %bitptr, i32* %source, i8** %byteptr, i32 %scale, i32 %round) signext  {
+entry:
+	br label %bb
+
+bb:		; preds = %cond_next391, %entry
+	%cnt.0 = phi i32 [ 0, %entry ], [ %tmp422445, %cond_next391 ]		; <i32> [#uses=1]
+	%v.1 = phi i32 [ undef, %entry ], [ %tmp411, %cond_next391 ]		; <i32> [#uses=0]
+	br i1 false, label %cond_true, label %cond_next127
+
+cond_true:		; preds = %bb
+	store i8* null, i8** %byteptr, align 4
+	store i8* null, i8** %byteptr, align 4
+	br label %cond_next127
+
+cond_next127:		; preds = %cond_true, %bb
+	%tmp151 = add i32 0, %round		; <i32> [#uses=1]
+	%tmp153 = ashr i32 %tmp151, %scale		; <i32> [#uses=2]
+	%tmp154155 = trunc i32 %tmp153 to i16		; <i16> [#uses=1]
+	%tmp154155156 = sext i16 %tmp154155 to i32		; <i32> [#uses=1]
+	%tmp158 = xor i32 %tmp154155156, %tmp153		; <i32> [#uses=1]
+	%tmp160 = or i32 %tmp158, %cnt.0		; <i32> [#uses=1]
+	%tmp171 = load i32* %bitptr, align 4		; <i32> [#uses=1]
+	%tmp180181 = sext i16 0 to i32		; <i32> [#uses=3]
+	%tmp183 = add i32 %tmp160, 1		; <i32> [#uses=1]
+	br i1 false, label %cond_true188, label %cond_next245
+
+cond_true188:		; preds = %cond_next127
+	ret i16 0
+
+cond_next245:		; preds = %cond_next127
+	%tmp249 = ashr i32 %tmp180181, 8		; <i32> [#uses=1]
+	%tmp250 = add i32 %tmp171, %tmp249		; <i32> [#uses=1]
+	%tmp253444 = lshr i32 %tmp180181, 4		; <i32> [#uses=1]
+	%tmp254 = and i32 %tmp253444, 15		; <i32> [#uses=1]
+	%tmp256 = and i32 %tmp180181, 15		; <i32> [#uses=2]
+	%tmp264 = icmp ugt i32 %tmp250, 15		; <i1> [#uses=1]
+	br i1 %tmp264, label %cond_true267, label %cond_next391
+
+cond_true267:		; preds = %cond_next245
+	store i8* null, i8** %byteptr, align 4
+	store i8* null, i8** %byteptr, align 4
+	br i1 false, label %cond_true289, label %cond_next327
+
+cond_true289:		; preds = %cond_true267
+	ret i16 0
+
+cond_next327:		; preds = %cond_true267
+	br i1 false, label %cond_true343, label %cond_next385
+
+cond_true343:		; preds = %cond_next327
+	%tmp345 = load i8** %byteptr, align 4		; <i8*> [#uses=1]
+	store i8* null, i8** %byteptr, align 4
+	br i1 false, label %cond_next385, label %cond_true352
+
+cond_true352:		; preds = %cond_true343
+	store i8* %tmp345, i8** %byteptr, align 4
+	br i1 false, label %cond_true364, label %cond_next385
+
+cond_true364:		; preds = %cond_true352
+	ret i16 0
+
+cond_next385:		; preds = %cond_true352, %cond_true343, %cond_next327
+	br label %cond_next391
+
+cond_next391:		; preds = %cond_next385, %cond_next245
+	%tmp393 = load i32* %source, align 4		; <i32> [#uses=1]
+	%tmp395 = load i32* %bitptr, align 4		; <i32> [#uses=2]
+	%tmp396 = shl i32 %tmp393, %tmp395		; <i32> [#uses=1]
+	%tmp398 = sub i32 32, %tmp256		; <i32> [#uses=1]
+	%tmp405 = lshr i32 %tmp396, 31		; <i32> [#uses=1]
+	%tmp406 = add i32 %tmp405, -1		; <i32> [#uses=1]
+	%tmp409 = lshr i32 %tmp406, %tmp398		; <i32> [#uses=1]
+	%tmp411 = sub i32 0, %tmp409		; <i32> [#uses=1]
+	%tmp422445 = add i32 %tmp254, %tmp183		; <i32> [#uses=2]
+	%tmp426447 = add i32 %tmp395, %tmp256		; <i32> [#uses=1]
+	store i32 %tmp426447, i32* %bitptr, align 4
+	%tmp429448 = icmp ult i32 %tmp422445, 63		; <i1> [#uses=1]
+	br i1 %tmp429448, label %bb, label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %cond_next391
+	ret i16 0
+}
diff --git a/final/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll b/final/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll
new file mode 100644
index 00000000000..984094d86a2
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-10-28-inlineasm-q-modifier.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s
+; PR1748
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @kernel_init(i8* %unused) {
+entry:
+	call void asm sideeffect "foo ${0:q}", "=*imr"( i64* null )
+	ret i32 0
+}
+
diff --git a/final/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll b/final/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
new file mode 100644
index 00000000000..86d3bbf4f4e
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 | grep mov | count 1
+
+define i16 @t() signext  {
+entry:
+	%tmp180 = load i16* null, align 2		; <i16> [#uses=3]
+	%tmp180181 = sext i16 %tmp180 to i32		; <i32> [#uses=1]
+	%tmp185 = icmp slt i16 %tmp180, 0		; <i1> [#uses=1]
+	br i1 %tmp185, label %cond_true188, label %cond_next245
+
+cond_true188:		; preds = %entry
+	%tmp195196 = trunc i16 %tmp180 to i8		; <i8> [#uses=0]
+	ret i16 0
+
+cond_next245:		; preds = %entry
+	%tmp256 = and i32 %tmp180181, 15		; <i32> [#uses=0]
+	ret i16 0
+}
diff --git a/final/test/CodeGen/X86/2007-10-30-LSRCrash.ll b/final/test/CodeGen/X86/2007-10-30-LSRCrash.ll
new file mode 100644
index 00000000000..42db98b4475
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-10-30-LSRCrash.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86
+
+define i32 @unique(i8* %full, i32 %p, i32 %len, i32 %mode, i32 %verbos, i32 %flags) {
+entry:
+	br i1 false, label %cond_true15, label %cond_next107
+
+cond_true15:		; preds = %entry
+	br i1 false, label %bb98.preheader, label %bb
+
+bb:		; preds = %cond_true15
+	ret i32 0
+
+bb98.preheader:		; preds = %cond_true15
+	br i1 false, label %bb103, label %bb69.outer
+
+bb76.split:		; preds = %bb69.outer.split.split, %bb69.us208
+	br i1 false, label %bb103, label %bb69.outer
+
+bb69.outer:		; preds = %bb76.split, %bb98.preheader
+	%from.0.reg2mem.0.ph.rec = phi i32 [ %tmp75.rec, %bb76.split ], [ 0, %bb98.preheader ]		; <i32> [#uses=1]
+	%tmp75.rec = add i32 %from.0.reg2mem.0.ph.rec, 1		; <i32> [#uses=2]
+	%tmp75 = getelementptr i8* null, i32 %tmp75.rec		; <i8*> [#uses=6]
+	br i1 false, label %bb69.us208, label %bb69.outer.split.split
+
+bb69.us208:		; preds = %bb69.outer
+	switch i32 0, label %bb76.split [
+		 i32 47, label %bb89
+		 i32 58, label %bb89
+		 i32 92, label %bb89
+	]
+
+bb69.outer.split.split:		; preds = %bb69.outer
+	switch i8 0, label %bb76.split [
+		 i8 47, label %bb89
+		 i8 58, label %bb89
+		 i8 92, label %bb89
+	]
+
+bb89:		; preds = %bb69.outer.split.split, %bb69.outer.split.split, %bb69.outer.split.split, %bb69.us208, %bb69.us208, %bb69.us208
+	%tmp75.lcssa189 = phi i8* [ %tmp75, %bb69.us208 ], [ %tmp75, %bb69.us208 ], [ %tmp75, %bb69.us208 ], [ %tmp75, %bb69.outer.split.split ], [ %tmp75, %bb69.outer.split.split ], [ %tmp75, %bb69.outer.split.split ]		; <i8*> [#uses=0]
+	ret i32 0
+
+bb103:		; preds = %bb76.split, %bb98.preheader
+	ret i32 0
+
+cond_next107:		; preds = %entry
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/2007-10-31-extractelement-i64.ll b/final/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
new file mode 100644
index 00000000000..1b8e67dcc9b
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-10-31-extractelement-i64.ll
@@ -0,0 +1,82 @@
+; RUN: llc < %s -march=x86 -mattr=sse2
+; ModuleID = 'yyy.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define <1 x i64> @a(<2 x i64> %__A) {
+entry:
+	%__A_addr = alloca <2 x i64>		; <<2 x i64>*> [#uses=2]
+	%retval = alloca <1 x i64>, align 8		; <<1 x i64>*> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <2 x i64> %__A, <2 x i64>* %__A_addr
+	%tmp = load <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
+	%tmp1 = bitcast <2 x i64> %tmp to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp2 = extractelement <2 x i64> %tmp1, i32 0		; <i64> [#uses=1]
+	%tmp3 = bitcast i64 %tmp2 to <1 x i64>		; <<1 x i64>> [#uses=1]
+	store <1 x i64> %tmp3, <1 x i64>* %retval, align 8
+	%tmp4 = load <1 x i64>* %retval, align 8		; <<1 x i64>> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load <1 x i64>* %retval		; <<1 x i64>> [#uses=1]
+	ret <1 x i64> %retval5
+}
+
+define <1 x i64> @b(<2 x i64> %__A) {
+entry:
+	%__A_addr = alloca <2 x i64>		; <<2 x i64>*> [#uses=2]
+	%retval = alloca <1 x i64>, align 8		; <<1 x i64>*> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <2 x i64> %__A, <2 x i64>* %__A_addr
+	%tmp = load <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
+	%tmp1 = bitcast <2 x i64> %tmp to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp2 = extractelement <2 x i64> %tmp1, i32 1		; <i64> [#uses=1]
+	%tmp3 = bitcast i64 %tmp2 to <1 x i64>		; <<1 x i64>> [#uses=1]
+	store <1 x i64> %tmp3, <1 x i64>* %retval, align 8
+	%tmp4 = load <1 x i64>* %retval, align 8		; <<1 x i64>> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load <1 x i64>* %retval		; <<1 x i64>> [#uses=1]
+	ret <1 x i64> %retval5
+}
+
+define i64 @c(<2 x i64> %__A) {
+entry:
+	%__A_addr = alloca <2 x i64>		; <<2 x i64>*> [#uses=2]
+	%retval = alloca i64, align 8		; <i64*> [#uses=2]
+	%tmp = alloca i64, align 8		; <i64*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <2 x i64> %__A, <2 x i64>* %__A_addr
+	%tmp1 = load <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
+	%tmp2 = bitcast <2 x i64> %tmp1 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp3 = extractelement <2 x i64> %tmp2, i32 0		; <i64> [#uses=1]
+	store i64 %tmp3, i64* %tmp, align 8
+	%tmp4 = load i64* %tmp, align 8		; <i64> [#uses=1]
+	store i64 %tmp4, i64* %retval, align 8
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load i64* %retval		; <i64> [#uses=1]
+	ret i64 %retval5
+}
+
+define i64 @d(<2 x i64> %__A) {
+entry:
+	%__A_addr = alloca <2 x i64>		; <<2 x i64>*> [#uses=2]
+	%retval = alloca i64, align 8		; <i64*> [#uses=2]
+	%tmp = alloca i64, align 8		; <i64*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store <2 x i64> %__A, <2 x i64>* %__A_addr
+	%tmp1 = load <2 x i64>* %__A_addr, align 16		; <<2 x i64>> [#uses=1]
+	%tmp2 = bitcast <2 x i64> %tmp1 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp3 = extractelement <2 x i64> %tmp2, i32 1		; <i64> [#uses=1]
+	store i64 %tmp3, i64* %tmp, align 8
+	%tmp4 = load i64* %tmp, align 8		; <i64> [#uses=1]
+	store i64 %tmp4, i64* %retval, align 8
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load i64* %retval		; <i64> [#uses=1]
+	ret i64 %retval5
+}
diff --git a/final/test/CodeGen/X86/2007-11-01-ISelCrash.ll b/final/test/CodeGen/X86/2007-11-01-ISelCrash.ll
new file mode 100644
index 00000000000..019c6a8cc0d
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-11-01-ISelCrash.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86
+
+        %"struct.K::JL" = type <{ i8 }>
+        %struct.jv = type { i64 }
+
+declare fastcc i64 @f(i32, %"struct.K::JL"*, i8*, i8*, %struct.jv*)
+
+define void @t(%"struct.K::JL"* %obj, i8* %name, i8* %sig, %struct.jv* %args) {
+entry:
+        %tmp5 = tail call fastcc i64 @f( i32 1, %"struct.K::JL"* %obj, i8* %name, i8* %sig, %struct.jv* %args )         ; <i64> [#uses=0]
+        ret void
+}
diff --git a/final/test/CodeGen/X86/2007-11-02-BadAsm.ll b/final/test/CodeGen/X86/2007-11-02-BadAsm.ll
new file mode 100644
index 00000000000..4e11cda92e6
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-11-02-BadAsm.ll
@@ -0,0 +1,144 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl | not grep rax
+
+	%struct.color_sample = type { i64 }
+	%struct.gs_matrix = type { float, i64, float, i64, float, i64, float, i64, float, i64, float, i64 }
+	%struct.ref = type { %struct.color_sample, i16, i16 }
+	%struct.status = type { %struct.gs_matrix, i8*, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i32 }
+
+define i32 @ztype1imagepath(%struct.ref* %op) {
+entry:
+	br i1 false, label %cond_next, label %UnifiedReturnBlock
+
+cond_next:		; preds = %entry
+	br i1 false, label %cond_next68, label %UnifiedReturnBlock
+
+cond_next68:		; preds = %cond_next
+	%tmp5.i.i = malloc i8, i32 0		; <i8*> [#uses=2]
+	br i1 false, label %bb81.outer.i, label %xit.i
+
+bb81.outer.i:		; preds = %bb87.i, %cond_next68
+	%tmp67.i = add i32 0, 1		; <i32> [#uses=1]
+	br label %bb81.i
+
+bb61.i:		; preds = %bb81.i
+	%tmp71.i = getelementptr i8* %tmp5.i.i, i64 0		; <i8*> [#uses=1]
+	%tmp72.i = load i8* %tmp71.i, align 1		; <i8> [#uses=1]
+	%tmp73.i = icmp eq i8 %tmp72.i, 0		; <i1> [#uses=1]
+	br i1 %tmp73.i, label %bb81.i, label %xit.i
+
+bb81.i:		; preds = %bb61.i, %bb81.outer.i
+	br i1 false, label %bb87.i, label %bb61.i
+
+bb87.i:		; preds = %bb81.i
+	br i1 false, label %bb81.outer.i, label %xit.i
+
+xit.i:		; preds = %bb87.i, %bb61.i, %cond_next68
+	%lsbx.0.reg2mem.1.i = phi i32 [ 0, %cond_next68 ], [ 0, %bb61.i ], [ %tmp67.i, %bb87.i ]		; <i32> [#uses=1]
+	%tmp6162.i.i = fptrunc double 0.000000e+00 to float		; <float> [#uses=1]
+	%tmp67.i15.i = fptrunc double 0.000000e+00 to float		; <float> [#uses=1]
+	%tmp24.i27.i = icmp eq i64 0, 0		; <i1> [#uses=1]
+	br i1 %tmp24.i27.i, label %cond_next.i79.i, label %cond_true.i34.i
+
+cond_true.i34.i:		; preds = %xit.i
+	ret i32 0
+
+cond_next.i79.i:		; preds = %xit.i
+	%phitmp167.i = fptosi double 0.000000e+00 to i64		; <i64> [#uses=1]
+	%tmp142143.i = fpext float %tmp6162.i.i to double		; <double> [#uses=1]
+	%tmp2.i139.i = fadd double %tmp142143.i, 5.000000e-01		; <double> [#uses=1]
+	%tmp23.i140.i = fptosi double %tmp2.i139.i to i64		; <i64> [#uses=1]
+	br i1 false, label %cond_true.i143.i, label %round_coord.exit148.i
+
+cond_true.i143.i:		; preds = %cond_next.i79.i
+	%tmp8.i142.i = icmp sgt i64 %tmp23.i140.i, -32768		; <i1> [#uses=1]
+	br i1 %tmp8.i142.i, label %cond_true11.i145.i, label %round_coord.exit148.i
+
+cond_true11.i145.i:		; preds = %cond_true.i143.i
+	ret i32 0
+
+round_coord.exit148.i:		; preds = %cond_true.i143.i, %cond_next.i79.i
+	%tmp144149.i = phi i32 [ 32767, %cond_next.i79.i ], [ -32767, %cond_true.i143.i ]		; <i32> [#uses=1]
+	store i32 %tmp144149.i, i32* null, align 8
+	%tmp147148.i = fpext float %tmp67.i15.i to double		; <double> [#uses=1]
+	%tmp2.i128.i = fadd double %tmp147148.i, 5.000000e-01		; <double> [#uses=1]
+	%tmp23.i129.i = fptosi double %tmp2.i128.i to i64		; <i64> [#uses=2]
+	%tmp5.i130.i = icmp slt i64 %tmp23.i129.i, 32768		; <i1> [#uses=1]
+	br i1 %tmp5.i130.i, label %cond_true.i132.i, label %round_coord.exit137.i
+
+cond_true.i132.i:		; preds = %round_coord.exit148.i
+	%tmp8.i131.i = icmp sgt i64 %tmp23.i129.i, -32768		; <i1> [#uses=1]
+	br i1 %tmp8.i131.i, label %cond_true11.i134.i, label %round_coord.exit137.i
+
+cond_true11.i134.i:		; preds = %cond_true.i132.i
+	br label %round_coord.exit137.i
+
+round_coord.exit137.i:		; preds = %cond_true11.i134.i, %cond_true.i132.i, %round_coord.exit148.i
+	%tmp149138.i = phi i32 [ 0, %cond_true11.i134.i ], [ 32767, %round_coord.exit148.i ], [ -32767, %cond_true.i132.i ]		; <i32> [#uses=1]
+	br i1 false, label %cond_true.i121.i, label %round_coord.exit126.i
+
+cond_true.i121.i:		; preds = %round_coord.exit137.i
+	br i1 false, label %cond_true11.i123.i, label %round_coord.exit126.i
+
+cond_true11.i123.i:		; preds = %cond_true.i121.i
+	br label %round_coord.exit126.i
+
+round_coord.exit126.i:		; preds = %cond_true11.i123.i, %cond_true.i121.i, %round_coord.exit137.i
+	%tmp153127.i = phi i32 [ 0, %cond_true11.i123.i ], [ 32767, %round_coord.exit137.i ], [ -32767, %cond_true.i121.i ]		; <i32> [#uses=1]
+	br i1 false, label %cond_true.i110.i, label %round_coord.exit115.i
+
+cond_true.i110.i:		; preds = %round_coord.exit126.i
+	br i1 false, label %cond_true11.i112.i, label %round_coord.exit115.i
+
+cond_true11.i112.i:		; preds = %cond_true.i110.i
+	br label %round_coord.exit115.i
+
+round_coord.exit115.i:		; preds = %cond_true11.i112.i, %cond_true.i110.i, %round_coord.exit126.i
+	%tmp157116.i = phi i32 [ 0, %cond_true11.i112.i ], [ 32767, %round_coord.exit126.i ], [ -32767, %cond_true.i110.i ]		; <i32> [#uses=2]
+	br i1 false, label %cond_true.i99.i, label %round_coord.exit104.i
+
+cond_true.i99.i:		; preds = %round_coord.exit115.i
+	br i1 false, label %cond_true11.i101.i, label %round_coord.exit104.i
+
+cond_true11.i101.i:		; preds = %cond_true.i99.i
+	%tmp1213.i100.i = trunc i64 %phitmp167.i to i32		; <i32> [#uses=1]
+	br label %cond_next172.i
+
+round_coord.exit104.i:		; preds = %cond_true.i99.i, %round_coord.exit115.i
+	%UnifiedRetVal.i102.i = phi i32 [ 32767, %round_coord.exit115.i ], [ -32767, %cond_true.i99.i ]		; <i32> [#uses=1]
+	%tmp164.i = call fastcc i32 @put_int( %struct.status* null, i32 %tmp157116.i )		; <i32> [#uses=0]
+	br label %cond_next172.i
+
+cond_next172.i:		; preds = %round_coord.exit104.i, %cond_true11.i101.i
+	%tmp161105.reg2mem.0.i = phi i32 [ %tmp1213.i100.i, %cond_true11.i101.i ], [ %UnifiedRetVal.i102.i, %round_coord.exit104.i ]		; <i32> [#uses=1]
+	%tmp174.i = icmp eq i32 %tmp153127.i, 0		; <i1> [#uses=1]
+	%bothcond.i = and i1 false, %tmp174.i		; <i1> [#uses=1]
+	%tmp235.i = call fastcc i32 @put_int( %struct.status* null, i32 %tmp149138.i )		; <i32> [#uses=0]
+	%tmp245.i = load i8** null, align 8		; <i8*> [#uses=2]
+	%tmp246.i = getelementptr i8* %tmp245.i, i64 1		; <i8*> [#uses=1]
+	br i1 %bothcond.i, label %cond_next254.i, label %bb259.i
+
+cond_next254.i:		; preds = %cond_next172.i
+	store i8 13, i8* %tmp245.i, align 1
+	br label %bb259.i
+
+bb259.i:		; preds = %cond_next254.i, %cond_next172.i
+	%storemerge.i = phi i8* [ %tmp246.i, %cond_next254.i ], [ null, %cond_next172.i ]		; <i8*> [#uses=0]
+	%tmp261.i = shl i32 %lsbx.0.reg2mem.1.i, 2		; <i32> [#uses=1]
+	store i32 %tmp261.i, i32* null, align 8
+	%tmp270.i = add i32 0, %tmp157116.i		; <i32> [#uses=1]
+	store i32 %tmp270.i, i32* null, align 8
+	%tmp275.i = add i32 0, %tmp161105.reg2mem.0.i		; <i32> [#uses=0]
+	br i1 false, label %trace_cells.exit.i, label %bb.preheader.i.i
+
+bb.preheader.i.i:		; preds = %bb259.i
+	ret i32 0
+
+trace_cells.exit.i:		; preds = %bb259.i
+	free i8* %tmp5.i.i
+	ret i32 0
+
+UnifiedReturnBlock:		; preds = %cond_next, %entry
+	ret i32 -20
+}
+
+declare fastcc i32 @put_int(%struct.status*, i32)
diff --git a/final/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll b/final/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll
new file mode 100644
index 00000000000..27ec8260d06
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-11-03-x86-64-q-constraint.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s
+; PR1763
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @yield() {
+        %tmp9 = call i64 asm sideeffect "xchgb ${0:b},$1", "=q,*m,0,~{dirflag},~{fpsr},~{flags},~{memory}"( i64* null, i64 0 )   ; <i64>
+        ret void
+}
diff --git a/final/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll b/final/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
new file mode 100644
index 00000000000..404561848b7
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-11-04-LiveIntervalCrash.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR1766
+
+        %struct.dentry = type { %struct.dentry_operations* }
+        %struct.dentry_operations = type { i32 (%struct.dentry*, %struct.qstr*)* }
+        %struct.qstr = type { i32, i32, i8* }
+
+define %struct.dentry* @d_hash_and_lookup(%struct.dentry* %dir, %struct.qstr* %name) {
+entry:
+        br i1 false, label %bb37, label %bb
+
+bb:             ; preds = %bb, %entry
+        %name8.0.reg2mem.0.rec = phi i64 [ %indvar.next, %bb ], [ 0, %entry ]           ; <i64> [#uses=1]
+        %hash.0.reg2mem.0 = phi i64 [ %tmp27, %bb ], [ 0, %entry ]              ; <i64> [#uses=1]
+        %tmp13 = load i8* null, align 1         ; <i8> [#uses=1]
+        %tmp1314 = zext i8 %tmp13 to i64                ; <i64> [#uses=1]
+        %tmp25 = lshr i64 %tmp1314, 4           ; <i64> [#uses=1]
+        %tmp22 = add i64 %tmp25, %hash.0.reg2mem.0              ; <i64> [#uses=1]
+        %tmp26 = add i64 %tmp22, 0              ; <i64> [#uses=1]
+        %tmp27 = mul i64 %tmp26, 11             ; <i64> [#uses=2]
+        %indvar.next = add i64 %name8.0.reg2mem.0.rec, 1                ; <i64> [#uses=2]
+        %exitcond = icmp eq i64 %indvar.next, 0         ; <i1> [#uses=1]
+        br i1 %exitcond, label %bb37.loopexit, label %bb
+
+bb37.loopexit:          ; preds = %bb
+        %phitmp = trunc i64 %tmp27 to i32               ; <i32> [#uses=1]
+        br label %bb37
+
+bb37:           ; preds = %bb37.loopexit, %entry
+        %hash.0.reg2mem.1 = phi i32 [ %phitmp, %bb37.loopexit ], [ 0, %entry ]          ; <i32> [#uses=1]
+        store i32 %hash.0.reg2mem.1, i32* null, align 8
+        %tmp75 = tail call i32 null( %struct.dentry* %dir, %struct.qstr* %name )                ; <i32> [#uses=0]
+        %tmp84 = tail call i32 (...)* @d_lookup( %struct.dentry* %dir, %struct.qstr* %name )            ; <i32> [#uses=0]
+        ret %struct.dentry* null
+}
+
+declare i32 @d_lookup(...)
diff --git a/final/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll b/final/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll
new file mode 100644
index 00000000000..6b871aa3a4d
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-11-04-LiveVariablesBug.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR1767
+
+define void @xor_sse_2(i64 %bytes, i64* %p1, i64* %p2) {
+entry:
+        %p2_addr = alloca i64*          ; <i64**> [#uses=2]
+        %lines = alloca i32             ; <i32*> [#uses=2]
+        store i64* %p2, i64** %p2_addr, align 8
+        %tmp1 = lshr i64 %bytes, 8              ; <i64> [#uses=1]
+        %tmp12 = trunc i64 %tmp1 to i32         ; <i32> [#uses=2]
+        store i32 %tmp12, i32* %lines, align 4
+        %tmp6 = call i64* asm sideeffect "foo",
+"=r,=*r,=*r,r,0,1,2,~{dirflag},~{fpsr},~{flags},~{memory}"( i64** %p2_addr,
+i32* %lines, i64 256, i64* %p1, i64* %p2, i32 %tmp12 )              ; <i64*> [#uses=0]
+        ret void
+}
diff --git a/final/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll b/final/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
new file mode 100644
index 00000000000..228a915e3e5
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-11-04-rip-immediate-constant.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -relocation-model=static | grep {foo str$}
+; PR1761
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-pc-linux"
+@str = internal constant [12 x i8] c"init/main.c\00"		; <[12 x i8]*> [#uses=1]
+
+define i32 @unknown_bootoption() {
+entry:
+	tail call void asm sideeffect "foo ${0:c}\0A", "i,~{dirflag},~{fpsr},~{flags}"( i8* getelementptr ([12 x i8]* @str, i32 0, i64 0) )
+	ret i32 undef
+}
diff --git a/final/test/CodeGen/X86/2007-11-06-InstrSched.ll b/final/test/CodeGen/X86/2007-11-06-InstrSched.ll
new file mode 100644
index 00000000000..f6db0d0379e
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-11-06-InstrSched.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lea
+
+define float @foo(i32* %x, float* %y, i32 %c) nounwind {
+entry:
+	%tmp2132 = icmp eq i32 %c, 0		; <i1> [#uses=1]
+	br i1 %tmp2132, label %bb23, label %bb18
+
+bb18:		; preds = %bb18, %entry
+	%i.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp17, %bb18 ]		; <i32> [#uses=3]
+	%res.0.reg2mem.0 = phi float [ 0.000000e+00, %entry ], [ %tmp14, %bb18 ]		; <float> [#uses=1]
+	%tmp3 = getelementptr i32* %x, i32 %i.0.reg2mem.0		; <i32*> [#uses=1]
+	%tmp4 = load i32* %tmp3, align 4		; <i32> [#uses=1]
+	%tmp45 = sitofp i32 %tmp4 to float		; <float> [#uses=1]
+	%tmp8 = getelementptr float* %y, i32 %i.0.reg2mem.0		; <float*> [#uses=1]
+	%tmp9 = load float* %tmp8, align 4		; <float> [#uses=1]
+	%tmp11 = fmul float %tmp9, %tmp45		; <float> [#uses=1]
+	%tmp14 = fadd float %tmp11, %res.0.reg2mem.0		; <float> [#uses=2]
+	%tmp17 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%tmp21 = icmp ult i32 %tmp17, %c		; <i1> [#uses=1]
+	br i1 %tmp21, label %bb18, label %bb23
+
+bb23:		; preds = %bb18, %entry
+	%res.0.reg2mem.1 = phi float [ 0.000000e+00, %entry ], [ %tmp14, %bb18 ]		; <float> [#uses=1]
+	ret float %res.0.reg2mem.1
+}
diff --git a/final/test/CodeGen/X86/2007-11-07-MulBy4.ll b/final/test/CodeGen/X86/2007-11-07-MulBy4.ll
new file mode 100644
index 00000000000..d5b630b59d9
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-11-07-MulBy4.ll
@@ -0,0 +1,129 @@
+; RUN: llc < %s -march=x86 | not grep imul
+
+	%struct.eebb = type { %struct.eebb*, i16* }
+	%struct.hf = type { %struct.hf*, i16*, i8*, i32, i32, %struct.eebb*, i32, i32, i8*, i8*, i8*, i8*, i16*, i8*, i16*, %struct.ri, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [30 x i32], %struct.eebb, i32, i8* }
+	%struct.foo_data = type { i32, i32, i32, i32*, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i16*, i16*, i16*, i16*, i32, i32, i32, %struct.ri*, i8*, %struct.hf* }
+	%struct.ri = type { %struct.ri*, i32, i8*, i16*, i32*, i32 }
+
+define fastcc i32 @foo(i16* %eptr, i8* %ecode, %struct.foo_data* %md, i32 %ims) {
+entry:
+	%tmp36 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp37 = icmp ult i32 0, %tmp36		; <i1> [#uses=1]
+	br i1 %tmp37, label %cond_next79, label %cond_true
+
+cond_true:		; preds = %entry
+	ret i32 0
+
+cond_next79:		; preds = %entry
+	%tmp85 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp86 = icmp ult i32 0, %tmp85		; <i1> [#uses=1]
+	br i1 %tmp86, label %cond_next130, label %cond_true89
+
+cond_true89:		; preds = %cond_next79
+	ret i32 0
+
+cond_next130:		; preds = %cond_next79
+	%tmp173 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp173, label %cond_next201, label %cond_true176
+
+cond_true176:		; preds = %cond_next130
+	ret i32 0
+
+cond_next201:		; preds = %cond_next130
+	switch i32 0, label %bb19955 [
+		 i32 0, label %bb1266
+		 i32 1, label %bb5018
+		 i32 2, label %bb5075
+		 i32 3, label %cond_true5534
+		 i32 4, label %cond_true5534
+		 i32 5, label %bb6039
+		 i32 6, label %bb6181
+		 i32 7, label %bb6323
+		 i32 8, label %bb6463
+		 i32 9, label %bb6605
+		 i32 10, label %bb6746
+		 i32 11, label %cond_next5871
+		 i32 16, label %bb5452
+		 i32 17, label %bb5395
+		 i32 19, label %bb4883
+		 i32 20, label %bb5136
+		 i32 23, label %bb12899
+		 i32 64, label %bb2162
+		 i32 69, label %bb1447
+		 i32 70, label %bb1737
+		 i32 71, label %bb1447
+		 i32 72, label %bb1737
+		 i32 73, label %cond_true1984
+		 i32 75, label %bb740
+		 i32 80, label %bb552
+	]
+
+bb552:		; preds = %cond_next201
+	ret i32 0
+
+bb740:		; preds = %cond_next201
+	ret i32 0
+
+bb1266:		; preds = %cond_next201
+	ret i32 0
+
+bb1447:		; preds = %cond_next201, %cond_next201
+	ret i32 0
+
+bb1737:		; preds = %cond_next201, %cond_next201
+	ret i32 0
+
+cond_true1984:		; preds = %cond_next201
+	ret i32 0
+
+bb2162:		; preds = %cond_next201
+	ret i32 0
+
+bb4883:		; preds = %cond_next201
+	ret i32 0
+
+bb5018:		; preds = %cond_next201
+	ret i32 0
+
+bb5075:		; preds = %cond_next201
+	ret i32 0
+
+bb5136:		; preds = %cond_next201
+	ret i32 0
+
+bb5395:		; preds = %cond_next201
+	ret i32 0
+
+bb5452:		; preds = %cond_next201
+	ret i32 0
+
+cond_true5534:		; preds = %cond_next201, %cond_next201
+	ret i32 0
+
+cond_next5871:		; preds = %cond_next201
+	ret i32 0
+
+bb6039:		; preds = %cond_next201
+	ret i32 0
+
+bb6181:		; preds = %cond_next201
+	ret i32 0
+
+bb6323:		; preds = %cond_next201
+	ret i32 0
+
+bb6463:		; preds = %cond_next201
+	ret i32 0
+
+bb6605:		; preds = %cond_next201
+	ret i32 0
+
+bb6746:		; preds = %cond_next201
+	ret i32 0
+
+bb12899:		; preds = %cond_next201
+	ret i32 0
+
+bb19955:		; preds = %cond_next201
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/final/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
new file mode 100644
index 00000000000..8e315f4d80f
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; Increment in loop bb.i28.i adjusted to 2, to prevent loop reversal from
+; kicking in.
+
+declare fastcc void @rdft(i32, i32, double*, i32*, double*)
+
+define fastcc void @mp_sqrt(i32 %n, i32 %radix, i32* %in, i32* %out, i32* %tmp1, i32* %tmp2, i32 %nfft, double* %tmp1fft, double* %tmp2fft, i32* %ip, double* %w) nounwind {
+entry:
+	br label %bb.i5
+
+bb.i5:		; preds = %bb.i5, %entry
+	%nfft_init.0.i = phi i32 [ 1, %entry ], [ %tmp7.i3, %bb.i5 ]		; <i32> [#uses=1]
+	%foo = phi i1 [1, %entry], [0, %bb.i5]
+	%tmp7.i3 = shl i32 %nfft_init.0.i, 1		; <i32> [#uses=2]
+	br i1 %foo, label %bb.i5, label %mp_unexp_mp2d.exit.i
+
+mp_unexp_mp2d.exit.i:		; preds = %bb.i5
+	br i1 %foo, label %cond_next.i, label %cond_true.i
+
+cond_true.i:		; preds = %mp_unexp_mp2d.exit.i
+	ret void
+
+cond_next.i:		; preds = %mp_unexp_mp2d.exit.i
+	%tmp22.i = sdiv i32 0, 2		; <i32> [#uses=2]
+	br i1 %foo, label %cond_true29.i, label %cond_next36.i
+
+cond_true29.i:		; preds = %cond_next.i
+	ret void
+
+cond_next36.i:		; preds = %cond_next.i
+	store i32 %tmp22.i, i32* null, align 4
+	%tmp8.i14.i = select i1 %foo, i32 1, i32 0		; <i32> [#uses=1]
+	br label %bb.i28.i
+
+bb.i28.i:		; preds = %bb.i28.i, %cond_next36.i
+; CHECK: %bb.i28.i
+; CHECK: addl $2
+; CHECK: addl $-2
+	%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ]		; <i32> [#uses=2]
+	%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ]		; <double> [#uses=1]
+	%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32		; <i32> [#uses=2]
+	%tmp4.i19.i = icmp slt i32 %tmp1.i18.i, %radix		; <i1> [#uses=1]
+	%x.0.i21.i = select i1 %tmp4.i19.i, i32 %tmp1.i18.i, i32 0		; <i32> [#uses=1]
+	%tmp41.sum.i = add i32 %j.0.reg2mem.0.i16.i, 2		; <i32> [#uses=0]
+	%tmp1213.i23.i = sitofp i32 %x.0.i21.i to double		; <double> [#uses=1]
+	%tmp15.i24.i = fsub double 0.000000e+00, %tmp1213.i23.i		; <double> [#uses=1]
+	%tmp16.i25.i = fmul double 0.000000e+00, %tmp15.i24.i		; <double> [#uses=1]
+	%indvar.next39.i = add i32 %j.0.reg2mem.0.i16.i, 2		; <i32> [#uses=2]
+	%exitcond40.i = icmp eq i32 %indvar.next39.i, %tmp8.i14.i		; <i1> [#uses=1]
+	br i1 %exitcond40.i, label %mp_unexp_d2mp.exit29.i, label %bb.i28.i
+
+mp_unexp_d2mp.exit29.i:		; preds = %bb.i28.i
+	%tmp46.i = sub i32 0, %tmp22.i		; <i32> [#uses=1]
+	store i32 %tmp46.i, i32* null, align 4
+	br i1 %exitcond40.i, label %bb.i.i, label %mp_sqrt_init.exit
+
+bb.i.i:		; preds = %bb.i.i, %mp_unexp_d2mp.exit29.i
+	br label %bb.i.i
+
+mp_sqrt_init.exit:		; preds = %mp_unexp_d2mp.exit29.i
+	tail call fastcc void @mp_mul_csqu( i32 0, double* %tmp1fft )
+	tail call fastcc void @rdft( i32 0, i32 -1, double* null, i32* %ip, double* %w )
+	tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 0, double* %tmp1fft, i32* %tmp2 )
+	br i1 %exitcond40.i, label %cond_false.i, label %cond_true36.i
+
+cond_true36.i:		; preds = %mp_sqrt_init.exit
+	ret void
+
+cond_false.i:		; preds = %mp_sqrt_init.exit
+	tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, i32* %out )
+	tail call fastcc void @mp_add( i32 0, i32 %radix, i32* %tmp1, i32* %tmp2, i32* %tmp1 )
+	tail call fastcc void @mp_sub( i32 0, i32 %radix, i32* %in, i32* %tmp2, i32* %tmp2 )
+	tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, i32* %tmp1 )
+	tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 %tmp7.i3, double* %tmp2fft, i32* %tmp2 )
+	ret void
+}
+
+declare fastcc void @mp_add(i32, i32, i32*, i32*, i32*)
+
+declare fastcc void @mp_sub(i32, i32, i32*, i32*, i32*)
+
+declare fastcc void @mp_round(i32, i32, i32, i32*)
+
+declare fastcc void @mp_mul_csqu(i32, double*)
+
+declare fastcc void @mp_mul_d2i(i32, i32, i32, double*, i32*)
diff --git a/final/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll b/final/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
new file mode 100644
index 00000000000..ca995cc3f65
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
@@ -0,0 +1,680 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin | not grep IMPLICIT_DEF
+
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.ggBRDF = type { i32 (...)** }
+	%"struct.ggBST<ggMaterial>" = type { %"struct.ggBSTNode<ggMaterial>"*, i32 }
+	%"struct.ggBST<ggRasterSurfaceTexture>" = type { %"struct.ggBSTNode<ggRasterSurfaceTexture>"*, i32 }
+	%"struct.ggBST<ggSolidTexture>" = type { %"struct.ggBSTNode<ggSolidTexture>"*, i32 }
+	%"struct.ggBST<ggSpectrum>" = type { %"struct.ggBSTNode<ggSpectrum>"*, i32 }
+	%"struct.ggBST<mrObjectRecord>" = type { %"struct.ggBSTNode<mrObjectRecord>"*, i32 }
+	%"struct.ggBSTNode<ggMaterial>" = type { %"struct.ggBSTNode<ggMaterial>"*, %"struct.ggBSTNode<ggMaterial>"*, %struct.ggString, %struct.ggMaterial* }
+	%"struct.ggBSTNode<ggRasterSurfaceTexture>" = type { %"struct.ggBSTNode<ggRasterSurfaceTexture>"*, %"struct.ggBSTNode<ggRasterSurfaceTexture>"*, %struct.ggString, %struct.ggRasterSurfaceTexture* }
+	%"struct.ggBSTNode<ggSolidTexture>" = type { %"struct.ggBSTNode<ggSolidTexture>"*, %"struct.ggBSTNode<ggSolidTexture>"*, %struct.ggString, %struct.ggBRDF* }
+	%"struct.ggBSTNode<ggSpectrum>" = type { %"struct.ggBSTNode<ggSpectrum>"*, %"struct.ggBSTNode<ggSpectrum>"*, %struct.ggString, %struct.ggSpectrum* }
+	%"struct.ggBSTNode<mrObjectRecord>" = type { %"struct.ggBSTNode<mrObjectRecord>"*, %"struct.ggBSTNode<mrObjectRecord>"*, %struct.ggString, %struct.mrObjectRecord* }
+	%"struct.ggDictionary<ggMaterial>" = type { %"struct.ggBST<ggMaterial>" }
+	%"struct.ggDictionary<ggRasterSurfaceTexture>" = type { %"struct.ggBST<ggRasterSurfaceTexture>" }
+	%"struct.ggDictionary<ggSolidTexture>" = type { %"struct.ggBST<ggSolidTexture>" }
+	%"struct.ggDictionary<ggSpectrum>" = type { %"struct.ggBST<ggSpectrum>" }
+	%"struct.ggDictionary<mrObjectRecord>" = type { %"struct.ggBST<mrObjectRecord>" }
+	%struct.ggHAffineMatrix3 = type { %struct.ggHMatrix3 }
+	%struct.ggHBoxMatrix3 = type { %struct.ggHAffineMatrix3 }
+	%struct.ggHMatrix3 = type { [4 x [4 x double]] }
+	%struct.ggMaterial = type { i32 (...)**, %struct.ggBRDF* }
+	%struct.ggPoint3 = type { [3 x double] }
+	%"struct.ggRGBPixel<char>" = type { [3 x i8], i8 }
+	%"struct.ggRaster<ggRGBPixel<unsigned char> >" = type { i32, i32, %"struct.ggRGBPixel<char>"* }
+	%struct.ggRasterSurfaceTexture = type { %"struct.ggRaster<ggRGBPixel<unsigned char> >"* }
+	%struct.ggSolidNoise3 = type { i32, [256 x %struct.ggPoint3], [256 x i32] }
+	%struct.ggSpectrum = type { [8 x float] }
+	%struct.ggString = type { %"struct.ggString::StringRep"* }
+	%"struct.ggString::StringRep" = type { i32, i32, [1 x i8] }
+	%"struct.ggTrain<mrPixelRenderer*>" = type { %struct.ggBRDF**, i32, i32 }
+	%struct.mrObjectRecord = type { %struct.ggHBoxMatrix3, %struct.ggHBoxMatrix3, %struct.mrSurfaceList, %struct.ggMaterial*, i32, %struct.ggRasterSurfaceTexture*, %struct.ggBRDF*, i32, i32 }
+	%struct.mrScene = type { %struct.ggSpectrum, %struct.ggSpectrum, %struct.ggBRDF*, %struct.ggBRDF*, %struct.ggBRDF*, i32, double, %"struct.ggDictionary<mrObjectRecord>", %"struct.ggDictionary<ggRasterSurfaceTexture>", %"struct.ggDictionary<ggSolidTexture>", %"struct.ggDictionary<ggSpectrum>", %"struct.ggDictionary<ggMaterial>" }
+	%struct.mrSurfaceList = type { %struct.ggBRDF, %"struct.ggTrain<mrPixelRenderer*>" }
+	%"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>" = type { %"struct.std::locale::facet" }
+	%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"* }
+	%"struct.std::basic_istream<char,std::char_traits<char> >" = type { i32 (...)**, i32, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }
+	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %struct.__sbuf, [8 x %struct.__sbuf], i32, %struct.__sbuf*, %"struct.std::locale" }
+	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+@.str80 = external constant [7 x i8]		; <[7 x i8]*> [#uses=1]
+@.str81 = external constant [11 x i8]		; <[11 x i8]*> [#uses=1]
+
+define fastcc void @_ZN7mrScene4ReadERSi(%struct.mrScene* %this, %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces) {
+entry:
+	%tmp6.i.i8288 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit unwind label %lpad		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit:		; preds = %entry
+	%tmp6.i.i8995 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit96 unwind label %lpad3825		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit96:		; preds = %_ZN8ggStringC1Ei.exit
+	%tmp6.i.i97103 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit104 unwind label %lpad3829		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit104:		; preds = %_ZN8ggStringC1Ei.exit96
+	%tmp6.i.i105111 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit112 unwind label %lpad3833		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit112:		; preds = %_ZN8ggStringC1Ei.exit104
+	%tmp6.i.i122128 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit129 unwind label %lpad3837		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit129:		; preds = %_ZN8ggStringC1Ei.exit112
+	%tmp6.i.i132138 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit139 unwind label %lpad3841		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit139:		; preds = %_ZN8ggStringC1Ei.exit129
+	%tmp295 = invoke i8* @_Znwm( i32 16 )
+			to label %invcont294 unwind label %lpad3845		; <i8*> [#uses=0]
+
+invcont294:		; preds = %_ZN8ggStringC1Ei.exit139
+	%tmp10.i.i141 = invoke i8* @_Znam( i32 16 )
+			to label %_ZN13mrSurfaceListC1Ev.exit unwind label %lpad3849		; <i8*> [#uses=0]
+
+_ZN13mrSurfaceListC1Ev.exit:		; preds = %invcont294
+	%tmp3.i148 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i.noexc:		; preds = %_ZN13mrSurfaceListC1Ev.exit
+	%tmp15.i149 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i.noexc unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i.noexc:		; preds = %tmp3.i.noexc
+	br i1 false, label %bb308, label %bb.i
+
+bb.i:		; preds = %tmp15.i.noexc
+	ret void
+
+bb308:		; preds = %tmp15.i.noexc
+	br i1 false, label %bb3743.preheader, label %bb315
+
+bb3743.preheader:		; preds = %bb308
+	%tmp16.i3862 = getelementptr %struct.ggPoint3* null, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp16.i3859 = getelementptr %struct.ggPoint3* null, i32 0, i32 0, i32 0		; <double*> [#uses=3]
+	br label %bb3743
+
+bb315:		; preds = %bb308
+	ret void
+
+bb333:		; preds = %invcont3758, %invcont335
+	%tmp3.i167180 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i167.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i167.noexc:		; preds = %bb333
+	%tmp15.i182 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i.noexc181 unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i.noexc181:		; preds = %tmp3.i167.noexc
+	br i1 false, label %invcont335, label %bb.i178
+
+bb.i178:		; preds = %tmp15.i.noexc181
+	ret void
+
+invcont335:		; preds = %tmp15.i.noexc181
+	br i1 false, label %bb3743, label %bb333
+
+bb345:		; preds = %invcont3758
+	br i1 false, label %bb353, label %bb360
+
+bb353:		; preds = %bb345
+	%tmp356 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
+			to label %bb3743 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+bb360:		; preds = %bb345
+	br i1 false, label %bb368, label %bb374
+
+bb368:		; preds = %bb360
+	%tmp373 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
+			to label %bb3743 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+bb374:		; preds = %bb360
+	br i1 false, label %bb396, label %bb421
+
+bb396:		; preds = %bb374
+	ret void
+
+bb421:		; preds = %bb374
+	br i1 false, label %bb429, label %bb530
+
+bb429:		; preds = %bb421
+	ret void
+
+bb530:		; preds = %bb421
+	br i1 false, label %bb538, label %bb673
+
+bb538:		; preds = %bb530
+	ret void
+
+bb673:		; preds = %bb530
+	br i1 false, label %bb681, label %bb778
+
+bb681:		; preds = %bb673
+	ret void
+
+bb778:		; preds = %bb673
+	br i1 false, label %bb786, label %bb891
+
+bb786:		; preds = %bb778
+	ret void
+
+bb891:		; preds = %bb778
+	br i1 false, label %bb899, label %bb998
+
+bb899:		; preds = %bb891
+	ret void
+
+bb998:		; preds = %bb891
+	br i1 false, label %bb1168, label %bb1190
+
+bb1168:		; preds = %bb998
+	ret void
+
+bb1190:		; preds = %bb998
+	br i1 false, label %bb1198, label %bb1220
+
+bb1198:		; preds = %bb1190
+	ret void
+
+bb1220:		; preds = %bb1190
+	br i1 false, label %bb1228, label %bb1250
+
+bb1228:		; preds = %bb1220
+	ret void
+
+bb1250:		; preds = %bb1220
+	br i1 false, label %bb1258, label %bb1303
+
+bb1258:		; preds = %bb1250
+	ret void
+
+bb1303:		; preds = %bb1250
+	br i1 false, label %bb1311, label %bb1366
+
+bb1311:		; preds = %bb1303
+	ret void
+
+bb1366:		; preds = %bb1303
+	br i1 false, label %bb1374, label %bb1432
+
+bb1374:		; preds = %bb1366
+	ret void
+
+bb1432:		; preds = %bb1366
+	br i1 false, label %bb1440, label %bb1495
+
+bb1440:		; preds = %bb1432
+	ret void
+
+bb1495:		; preds = %bb1432
+	br i1 false, label %bb1503, label %bb1561
+
+bb1503:		; preds = %bb1495
+	ret void
+
+bb1561:		; preds = %bb1495
+	br i1 false, label %bb1569, label %bb1624
+
+bb1569:		; preds = %bb1561
+	ret void
+
+bb1624:		; preds = %bb1561
+	br i1 false, label %bb1632, label %bb1654
+
+bb1632:		; preds = %bb1624
+	store double 0.000000e+00, double* %tmp16.i3859, align 8
+	%tmp3.i38383852 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3838.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3838.noexc:		; preds = %bb1632
+	%tmp15.i38473853 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i3847.noexc unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i3847.noexc:		; preds = %tmp3.i3838.noexc
+	br i1 false, label %invcont1634, label %bb.i3850
+
+bb.i3850:		; preds = %tmp15.i3847.noexc
+	ret void
+
+invcont1634:		; preds = %tmp15.i3847.noexc
+	%tmp3.i38173831 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3817.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3817.noexc:		; preds = %invcont1634
+	%tmp15.i38263832 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i3826.noexc unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i3826.noexc:		; preds = %tmp3.i3817.noexc
+	br i1 false, label %invcont1636, label %bb.i3829
+
+bb.i3829:		; preds = %tmp15.i3826.noexc
+	ret void
+
+invcont1636:		; preds = %tmp15.i3826.noexc
+	%tmp8.i38083811 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* %tmp16.i3862 )
+			to label %tmp8.i3808.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+tmp8.i3808.noexc:		; preds = %invcont1636
+	%tmp9.i38093812 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp8.i38083811, double* null )
+			to label %tmp9.i3809.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+tmp9.i3809.noexc:		; preds = %tmp8.i3808.noexc
+	%tmp10.i38103813 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp9.i38093812, double* null )
+			to label %invcont1638 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+invcont1638:		; preds = %tmp9.i3809.noexc
+	%tmp8.i37983801 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* %tmp16.i3859 )
+			to label %tmp8.i3798.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+tmp8.i3798.noexc:		; preds = %invcont1638
+	%tmp9.i37993802 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp8.i37983801, double* null )
+			to label %tmp9.i3799.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+tmp9.i3799.noexc:		; preds = %tmp8.i3798.noexc
+	%tmp10.i38003803 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp9.i37993802, double* null )
+			to label %invcont1640 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+invcont1640:		; preds = %tmp9.i3799.noexc
+	%tmp3.i3778 = load double* %tmp16.i3859, align 8		; <double> [#uses=1]
+	%tmp1643 = invoke i8* @_Znwm( i32 76 )
+			to label %invcont1642 unwind label %lpad3845		; <i8*> [#uses=0]
+
+invcont1642:		; preds = %invcont1640
+	%tmp18.i3770 = fsub double %tmp3.i3778, 0.000000e+00		; <double> [#uses=0]
+	invoke fastcc void @_ZN7mrScene9AddObjectEP9mrSurfaceRK8ggStringS4_i( %struct.mrScene* %this, %struct.ggBRDF* null, %struct.ggString* null, %struct.ggString* null, i32 0 )
+			to label %bb3743 unwind label %lpad3845
+
+bb1654:		; preds = %bb1624
+	br i1 false, label %bb1662, label %bb1693
+
+bb1662:		; preds = %bb1654
+	%tmp3.i37143728 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3714.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3714.noexc:		; preds = %bb1662
+	%tmp15.i37233729 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i3723.noexc unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i3723.noexc:		; preds = %tmp3.i3714.noexc
+	ret void
+
+bb1693:		; preds = %bb1654
+	br i1 false, label %bb1701, label %bb1745
+
+bb1701:		; preds = %bb1693
+	%tmp3.i36493663 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3649.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3649.noexc:		; preds = %bb1701
+	ret void
+
+bb1745:		; preds = %bb1693
+	br i1 false, label %bb1753, label %bb1797
+
+bb1753:		; preds = %bb1745
+	ret void
+
+bb1797:		; preds = %bb1745
+	br i1 false, label %bb1805, label %bb1847
+
+bb1805:		; preds = %bb1797
+	ret void
+
+bb1847:		; preds = %bb1797
+	br i1 false, label %bb1855, label %bb1897
+
+bb1855:		; preds = %bb1847
+	%tmp3.i34633477 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3463.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3463.noexc:		; preds = %bb1855
+	%tmp15.i34723478 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i3472.noexc unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i3472.noexc:		; preds = %tmp3.i3463.noexc
+	br i1 false, label %invcont1857, label %bb.i3475
+
+bb.i3475:		; preds = %tmp15.i3472.noexc
+	invoke fastcc void @_ZN8ggStringaSEPKc( %struct.ggString* null, i8* null )
+			to label %invcont1857 unwind label %lpad3845
+
+invcont1857:		; preds = %bb.i3475, %tmp15.i3472.noexc
+	%tmp1860 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
+			to label %invcont1859 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+invcont1859:		; preds = %invcont1857
+	%tmp1862 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp1860, double* null )
+			to label %invcont1861 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+invcont1861:		; preds = %invcont1859
+	%tmp1864 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp1862, double* null )
+			to label %invcont1863 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+invcont1863:		; preds = %invcont1861
+	%tmp1866 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp1864, double* null )
+			to label %invcont1865 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+invcont1865:		; preds = %invcont1863
+	%tmp1868 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp1866, double* null )
+			to label %invcont1867 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+invcont1867:		; preds = %invcont1865
+	%tmp1881 = invoke i8 @_ZNKSt9basic_iosIcSt11char_traitsIcEE4goodEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null ) zeroext 
+			to label %invcont1880 unwind label %lpad3845		; <i8> [#uses=0]
+
+invcont1880:		; preds = %invcont1867
+	%tmp1883 = invoke i8* @_Znwm( i32 24 )
+			to label %invcont1882 unwind label %lpad3845		; <i8*> [#uses=0]
+
+invcont1882:		; preds = %invcont1880
+	invoke fastcc void @_ZN7mrScene9AddObjectEP9mrSurfaceRK8ggStringS4_i( %struct.mrScene* %this, %struct.ggBRDF* null, %struct.ggString* null, %struct.ggString* null, i32 0 )
+			to label %bb3743 unwind label %lpad3845
+
+bb1897:		; preds = %bb1847
+	br i1 false, label %bb1905, label %bb1947
+
+bb1905:		; preds = %bb1897
+	ret void
+
+bb1947:		; preds = %bb1897
+	br i1 false, label %bb1955, label %bb2000
+
+bb1955:		; preds = %bb1947
+	ret void
+
+bb2000:		; preds = %bb1947
+	br i1 false, label %bb2008, label %bb2053
+
+bb2008:		; preds = %bb2000
+	ret void
+
+bb2053:		; preds = %bb2000
+	br i1 false, label %bb2061, label %bb2106
+
+bb2061:		; preds = %bb2053
+	%tmp3.i32433257 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3243.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3243.noexc:		; preds = %bb2061
+	%tmp15.i32523258 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %bb.i3255 unwind label %lpad3845		; <i8*> [#uses=0]
+
+bb.i3255:		; preds = %tmp3.i3243.noexc
+	invoke fastcc void @_ZN8ggStringaSEPKc( %struct.ggString* null, i8* null )
+			to label %invcont2063 unwind label %lpad3845
+
+invcont2063:		; preds = %bb.i3255
+	ret void
+
+bb2106:		; preds = %bb2053
+	%tmp7.i3214 = call i32 @strcmp( i8* %tmp5.i161, i8* getelementptr ([7 x i8]* @.str80, i32 0, i32 0) ) nounwind readonly 		; <i32> [#uses=0]
+	br i1 false, label %bb2114, label %bb2136
+
+bb2114:		; preds = %bb2106
+	%tmp3.i31923206 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3192.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3192.noexc:		; preds = %bb2114
+	%tmp15.i32013207 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i3201.noexc unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i3201.noexc:		; preds = %tmp3.i3192.noexc
+	br i1 false, label %invcont2116, label %bb.i3204
+
+bb.i3204:		; preds = %tmp15.i3201.noexc
+	ret void
+
+invcont2116:		; preds = %tmp15.i3201.noexc
+	%tmp3.i31713185 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3171.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3171.noexc:		; preds = %invcont2116
+	%tmp15.i31803186 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i3180.noexc unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i3180.noexc:		; preds = %tmp3.i3171.noexc
+	br i1 false, label %invcont2118, label %bb.i3183
+
+bb.i3183:		; preds = %tmp15.i3180.noexc
+	ret void
+
+invcont2118:		; preds = %tmp15.i3180.noexc
+	%tmp8.i31623165 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
+			to label %tmp8.i3162.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+tmp8.i3162.noexc:		; preds = %invcont2118
+	%tmp9.i31633166 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp8.i31623165, double* null )
+			to label %tmp9.i3163.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+tmp9.i3163.noexc:		; preds = %tmp8.i3162.noexc
+	%tmp10.i31643167 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp9.i31633166, double* null )
+			to label %invcont2120 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+invcont2120:		; preds = %tmp9.i3163.noexc
+	%tmp2123 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
+			to label %invcont2122 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+invcont2122:		; preds = %invcont2120
+	%tmp2125 = invoke i8* @_Znwm( i32 36 )
+			to label %invcont2124 unwind label %lpad3845		; <i8*> [#uses=0]
+
+invcont2124:		; preds = %invcont2122
+	invoke fastcc void @_ZN7mrScene9AddObjectEP9mrSurfaceRK8ggStringS4_i( %struct.mrScene* %this, %struct.ggBRDF* null, %struct.ggString* null, %struct.ggString* null, i32 0 )
+			to label %bb3743 unwind label %lpad3845
+
+bb2136:		; preds = %bb2106
+	%tmp7.i3128 = call i32 @strcmp( i8* %tmp5.i161, i8* getelementptr ([11 x i8]* @.str81, i32 0, i32 0) ) nounwind readonly 		; <i32> [#uses=0]
+	br i1 false, label %bb2144, label %bb3336
+
+bb2144:		; preds = %bb2136
+	%tmp6.i.i31173123 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit3124 unwind label %lpad3845		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit3124:		; preds = %bb2144
+	%tmp3.i30983112 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i3098.noexc unwind label %lpad3921		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i3098.noexc:		; preds = %_ZN8ggStringC1Ei.exit3124
+	%tmp15.i31073113 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i3107.noexc unwind label %lpad3921		; <i8*> [#uses=0]
+
+tmp15.i3107.noexc:		; preds = %tmp3.i3098.noexc
+	br i1 false, label %invcont2147, label %bb.i3110
+
+bb.i3110:		; preds = %tmp15.i3107.noexc
+	ret void
+
+invcont2147:		; preds = %tmp15.i3107.noexc
+	%tmp2161 = invoke i8 @_ZNKSt9basic_iosIcSt11char_traitsIcEE4goodEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null ) zeroext 
+			to label %invcont2160 unwind label %lpad3921		; <i8> [#uses=0]
+
+invcont2160:		; preds = %invcont2147
+	%tmp4.i30933094 = invoke fastcc %struct.ggSpectrum* @_ZN5ggBSTI10ggSpectrumE4findERK8ggString3( %"struct.ggBSTNode<ggSpectrum>"* null, %struct.ggString* null )
+			to label %invcont2164 unwind label %lpad3921		; <%struct.ggSpectrum*> [#uses=0]
+
+invcont2164:		; preds = %invcont2160
+	br i1 false, label %bb2170, label %bb2181
+
+bb2170:		; preds = %invcont2164
+	ret void
+
+bb2181:		; preds = %invcont2164
+	invoke fastcc void @_ZN8ggStringD1Ev( %struct.ggString* null )
+			to label %bb3743 unwind label %lpad3845
+
+bb3336:		; preds = %bb2136
+	br i1 false, label %bb3344, label %bb3734
+
+bb3344:		; preds = %bb3336
+	%tmp6.i.i773779 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit780 unwind label %lpad3845		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit780:		; preds = %bb3344
+	%tmp6.i.i765771 = invoke i8* @_Znam( i32 12 )
+			to label %_ZN8ggStringC1Ei.exit772 unwind label %lpad4025		; <i8*> [#uses=0]
+
+_ZN8ggStringC1Ei.exit772:		; preds = %_ZN8ggStringC1Ei.exit780
+	%tmp3.i746760 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i746.noexc unwind label %lpad4029		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i746.noexc:		; preds = %_ZN8ggStringC1Ei.exit772
+	%tmp15.i755761 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i755.noexc unwind label %lpad4029		; <i8*> [#uses=0]
+
+tmp15.i755.noexc:		; preds = %tmp3.i746.noexc
+	br i1 false, label %invcont3348, label %bb.i758
+
+bb.i758:		; preds = %tmp15.i755.noexc
+	ret void
+
+invcont3348:		; preds = %tmp15.i755.noexc
+	%tmp3.i726740 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i726.noexc unwind label %lpad4029		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i726.noexc:		; preds = %invcont3348
+	%tmp15.i735741 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i735.noexc unwind label %lpad4029		; <i8*> [#uses=0]
+
+tmp15.i735.noexc:		; preds = %tmp3.i726.noexc
+	br i1 false, label %bb3458, label %bb.i738
+
+bb.i738:		; preds = %tmp15.i735.noexc
+	ret void
+
+bb3458:		; preds = %tmp15.i735.noexc
+	br i1 false, label %bb3466, label %bb3491
+
+bb3466:		; preds = %bb3458
+	%tmp3469 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
+			to label %invcont3468 unwind label %lpad4029		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+invcont3468:		; preds = %bb3466
+	%tmp3471 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp3469, double* null )
+			to label %invcont3470 unwind label %lpad4029		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
+
+invcont3470:		; preds = %invcont3468
+	%tmp3473 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERi( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp3471, i32* null )
+			to label %invcont3472 unwind label %lpad4029		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+invcont3472:		; preds = %invcont3470
+	%tmp3475 = invoke i8* @_Znwm( i32 7196 )
+			to label %invcont3474 unwind label %lpad4029		; <i8*> [#uses=1]
+
+invcont3474:		; preds = %invcont3472
+	invoke fastcc void @_ZN13ggSolidNoise3C1Ev( %struct.ggSolidNoise3* null )
+			to label %_ZN22ggCoverageSolidTextureC1Eddi.exit unwind label %lpad4045
+
+_ZN22ggCoverageSolidTextureC1Eddi.exit:		; preds = %invcont3474
+	%tmp34823483 = bitcast i8* %tmp3475 to %struct.ggBRDF*		; <%struct.ggBRDF*> [#uses=2]
+	invoke fastcc void @_ZN5ggBSTI14ggSolidTextureE17InsertIntoSubtreeERK8ggStringPS0_RP9ggBSTNodeIS0_E( %"struct.ggBST<ggSolidTexture>"* null, %struct.ggString* null, %struct.ggBRDF* %tmp34823483, %"struct.ggBSTNode<ggSolidTexture>"** null )
+			to label %bb3662 unwind label %lpad4029
+
+bb3491:		; preds = %bb3458
+	ret void
+
+bb3662:		; preds = %_ZN22ggCoverageSolidTextureC1Eddi.exit
+	invoke fastcc void @_ZN8ggStringD1Ev( %struct.ggString* null )
+			to label %invcont3663 unwind label %lpad4025
+
+invcont3663:		; preds = %bb3662
+	invoke fastcc void @_ZN8ggStringD1Ev( %struct.ggString* null )
+			to label %bb3743 unwind label %lpad3845
+
+bb3734:		; preds = %bb3336
+	ret void
+
+bb3743:		; preds = %invcont3663, %bb2181, %invcont2124, %invcont1882, %invcont1642, %bb368, %bb353, %invcont335, %bb3743.preheader
+	%tex1.3 = phi %struct.ggBRDF* [ undef, %bb3743.preheader ], [ %tex1.3, %bb368 ], [ %tex1.3, %invcont1642 ], [ %tex1.3, %invcont1882 ], [ %tex1.3, %invcont2124 ], [ %tex1.3, %bb2181 ], [ %tex1.3, %invcont335 ], [ %tmp34823483, %invcont3663 ], [ %tex1.3, %bb353 ]		; <%struct.ggBRDF*> [#uses=7]
+	%tmp3.i312325 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
+			to label %tmp3.i312.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
+
+tmp3.i312.noexc:		; preds = %bb3743
+	%tmp15.i327 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %tmp15.i.noexc326 unwind label %lpad3845		; <i8*> [#uses=0]
+
+tmp15.i.noexc326:		; preds = %tmp3.i312.noexc
+	br i1 false, label %invcont3745, label %bb.i323
+
+bb.i323:		; preds = %tmp15.i.noexc326
+	ret void
+
+invcont3745:		; preds = %tmp15.i.noexc326
+	%tmp3759 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
+			to label %invcont3758 unwind label %lpad3845		; <i8*> [#uses=0]
+
+invcont3758:		; preds = %invcont3745
+	%tmp5.i161 = getelementptr %"struct.ggString::StringRep"* null, i32 0, i32 2, i32 0		; <i8*> [#uses=2]
+	br i1 false, label %bb333, label %bb345
+
+lpad:		; preds = %entry
+	ret void
+
+lpad3825:		; preds = %_ZN8ggStringC1Ei.exit
+	ret void
+
+lpad3829:		; preds = %_ZN8ggStringC1Ei.exit96
+	ret void
+
+lpad3833:		; preds = %_ZN8ggStringC1Ei.exit104
+	ret void
+
+lpad3837:		; preds = %_ZN8ggStringC1Ei.exit112
+	ret void
+
+lpad3841:		; preds = %_ZN8ggStringC1Ei.exit129
+	ret void
+
+lpad3845:		; preds = %invcont3745, %tmp3.i312.noexc, %bb3743, %invcont3663, %bb3344, %bb2181, %bb2144, %invcont2124, %invcont2122, %invcont2120, %tmp9.i3163.noexc, %tmp8.i3162.noexc, %invcont2118, %tmp3.i3171.noexc, %invcont2116, %tmp3.i3192.noexc, %bb2114, %bb.i3255, %tmp3.i3243.noexc, %bb2061, %invcont1882, %invcont1880, %invcont1867, %invcont1865, %invcont1863, %invcont1861, %invcont1859, %invcont1857, %bb.i3475, %tmp3.i3463.noexc, %bb1855, %bb1701, %tmp3.i3714.noexc, %bb1662, %invcont1642, %invcont1640, %tmp9.i3799.noexc, %tmp8.i3798.noexc, %invcont1638, %tmp9.i3809.noexc, %tmp8.i3808.noexc, %invcont1636, %tmp3.i3817.noexc, %invcont1634, %tmp3.i3838.noexc, %bb1632, %bb368, %bb353, %tmp3.i167.noexc, %bb333, %tmp3.i.noexc, %_ZN13mrSurfaceListC1Ev.exit, %_ZN8ggStringC1Ei.exit139
+	ret void
+
+lpad3849:		; preds = %invcont294
+	ret void
+
+lpad3921:		; preds = %invcont2160, %invcont2147, %tmp3.i3098.noexc, %_ZN8ggStringC1Ei.exit3124
+	ret void
+
+lpad4025:		; preds = %bb3662, %_ZN8ggStringC1Ei.exit780
+	ret void
+
+lpad4029:		; preds = %_ZN22ggCoverageSolidTextureC1Eddi.exit, %invcont3472, %invcont3470, %invcont3468, %bb3466, %tmp3.i726.noexc, %invcont3348, %tmp3.i746.noexc, %_ZN8ggStringC1Ei.exit772
+	ret void
+
+lpad4045:		; preds = %invcont3474
+	ret void
+}
+
+declare fastcc void @_ZN8ggStringD1Ev(%struct.ggString*)
+
+declare i8* @_Znam(i32)
+
+declare fastcc void @_ZN8ggStringaSEPKc(%struct.ggString*, i8*)
+
+declare i32 @strcmp(i8*, i8*) nounwind readonly 
+
+declare %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERi(%"struct.std::basic_istream<char,std::char_traits<char> >"*, i32*)
+
+declare i8* @_Znwm(i32)
+
+declare i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv(%"struct.std::basic_ios<char,std::char_traits<char> >"*)
+
+declare %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd(%"struct.std::basic_istream<char,std::char_traits<char> >"*, double*)
+
+declare %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_(%"struct.std::basic_istream<char,std::char_traits<char> >"*, i8*)
+
+declare fastcc void @_ZN13ggSolidNoise3C1Ev(%struct.ggSolidNoise3*)
+
+declare i8 @_ZNKSt9basic_iosIcSt11char_traitsIcEE4goodEv(%"struct.std::basic_ios<char,std::char_traits<char> >"*) zeroext 
+
+declare fastcc %struct.ggSpectrum* @_ZN5ggBSTI10ggSpectrumE4findERK8ggString3(%"struct.ggBSTNode<ggSpectrum>"*, %struct.ggString*)
+
+declare fastcc void @_ZN5ggBSTI14ggSolidTextureE17InsertIntoSubtreeERK8ggStringPS0_RP9ggBSTNodeIS0_E(%"struct.ggBST<ggSolidTexture>"*, %struct.ggString*, %struct.ggBRDF*, %"struct.ggBSTNode<ggSolidTexture>"**)
+
+declare fastcc void @_ZN7mrScene9AddObjectEP9mrSurfaceRK8ggStringS4_i(%struct.mrScene*, %struct.ggBRDF*, %struct.ggString*, %struct.ggString*, i32)
diff --git a/final/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll b/final/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
new file mode 100644
index 00000000000..455de91d30a
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-12-16-BURRSchedCrash.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu
+; PR1799
+
+	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
+	%struct.c34007g__pkg__parent = type { i32*, %struct.c34007g__designated___XUB* }
+
+define void @_ada_c34007g() {
+entry:
+	%x8 = alloca %struct.c34007g__pkg__parent, align 8		; <%struct.c34007g__pkg__parent*> [#uses=2]
+	br i1 true, label %bb1271, label %bb848
+
+bb848:		; preds = %entry
+	ret void
+
+bb1271:		; preds = %bb898
+	%tmp1272 = getelementptr %struct.c34007g__pkg__parent* %x8, i32 0, i32 0		; <i32**> [#uses=1]
+	%x82167 = bitcast %struct.c34007g__pkg__parent* %x8 to i64*		; <i64*> [#uses=1]
+	br i1 true, label %bb4668, label %bb848
+
+bb4668:		; preds = %bb4648
+	%tmp5464 = load i64* %x82167, align 8		; <i64> [#uses=1]
+	%tmp5467 = icmp ne i64 0, %tmp5464		; <i1> [#uses=1]
+	%tmp5470 = load i32** %tmp1272, align 8		; <i32*> [#uses=1]
+	%tmp5471 = icmp eq i32* %tmp5470, null		; <i1> [#uses=1]
+	call fastcc void @c34007g__pkg__create.311( %struct.c34007g__pkg__parent* null, i32 7, i32 9, i32 2, i32 4, i32 1 )
+	%tmp5475 = or i1 %tmp5471, %tmp5467		; <i1> [#uses=1]
+	%tmp5497 = or i1 %tmp5475, false		; <i1> [#uses=1]
+	br i1 %tmp5497, label %bb848, label %bb5507
+
+bb5507:		; preds = %bb4668
+	ret void
+
+}
+
+declare fastcc void @c34007g__pkg__create.311(%struct.c34007g__pkg__parent*, i32, i32, i32, i32, i32)
diff --git a/final/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll b/final/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
new file mode 100644
index 00000000000..265d9685485
--- /dev/null
+++ b/final/test/CodeGen/X86/2007-12-18-LoadCSEBug.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 | grep {(%esp)} | count 2
+; PR1872
+
+	%struct.c34007g__designated___XUB = type { i32, i32, i32, i32 }
+	%struct.c34007g__pkg__parent = type { i32*, %struct.c34007g__designated___XUB* }
+
+define void @_ada_c34007g() {
+entry:
+	%x8 = alloca %struct.c34007g__pkg__parent, align 8		; <%struct.c34007g__pkg__parent*> [#uses=2]
+	%tmp1272 = getelementptr %struct.c34007g__pkg__parent* %x8, i32 0, i32 0		; <i32**> [#uses=1]
+	%x82167 = bitcast %struct.c34007g__pkg__parent* %x8 to i64*		; <i64*> [#uses=1]
+	br i1 true, label %bb4668, label %bb848
+
+bb4668:		; preds = %bb4648
+	%tmp5464 = load i64* %x82167, align 8		; <i64> [#uses=1]
+	%tmp5467 = icmp ne i64 0, %tmp5464		; <i1> [#uses=1]
+	%tmp5470 = load i32** %tmp1272, align 8		; <i32*> [#uses=1]
+	%tmp5471 = icmp eq i32* %tmp5470, null		; <i1> [#uses=1]
+	%tmp5475 = or i1 %tmp5471, %tmp5467		; <i1> [#uses=1]
+	%tmp5497 = or i1 %tmp5475, false		; <i1> [#uses=1]
+	br i1 %tmp5497, label %bb848, label %bb5507
+
+bb848:		; preds = %entry
+	ret void
+
+bb5507:		; preds = %bb4668
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2008-01-08-IllegalCMP.ll b/final/test/CodeGen/X86/2008-01-08-IllegalCMP.ll
new file mode 100644
index 00000000000..7aec613e2ab
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-01-08-IllegalCMP.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define i64 @__absvdi2(i64 %a) nounwind  {
+entry:
+	%w.0 = select i1 false, i64 0, i64 %a		; <i64> [#uses=2]
+	%tmp9 = icmp slt i64 %w.0, 0		; <i1> [#uses=1]
+	br i1 %tmp9, label %bb12, label %bb13
+
+bb12:		; preds = %entry
+	unreachable
+
+bb13:		; preds = %entry
+	ret i64 %w.0
+}
diff --git a/final/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll b/final/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
new file mode 100644
index 00000000000..b040095195c
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-01-08-SchedulerCrash.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 | not grep pushf
+
+	%struct.indexentry = type { i32, i8*, i8*, i8*, i8*, i8* }
+
+define i32 @_bfd_stab_section_find_nearest_line(i32 %offset) nounwind  {
+entry:
+	%tmp910 = add i32 0, %offset		; <i32> [#uses=1]
+	br i1 true, label %bb951, label %bb917
+
+bb917:		; preds = %entry
+	ret i32 0
+
+bb951:		; preds = %bb986, %entry
+	%tmp955 = sdiv i32 0, 2		; <i32> [#uses=3]
+	%tmp961 = getelementptr %struct.indexentry* null, i32 %tmp955, i32 0		; <i32*> [#uses=1]
+	br i1 true, label %bb986, label %bb967
+
+bb967:		; preds = %bb951
+	ret i32 0
+
+bb986:		; preds = %bb951
+	%tmp993 = load i32* %tmp961, align 4		; <i32> [#uses=1]
+	%tmp995 = icmp ugt i32 %tmp993, %tmp910		; <i1> [#uses=2]
+	%tmp1002 = add i32 %tmp955, 1		; <i32> [#uses=1]
+	%low.0 = select i1 %tmp995, i32 0, i32 %tmp1002		; <i32> [#uses=1]
+	%high.0 = select i1 %tmp995, i32 %tmp955, i32 0		; <i32> [#uses=1]
+	%tmp1006 = icmp eq i32 %low.0, %high.0		; <i1> [#uses=1]
+	br i1 %tmp1006, label %UnifiedReturnBlock, label %bb951
+
+UnifiedReturnBlock:		; preds = %bb986
+	ret i32 1
+}
diff --git a/final/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll b/final/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
new file mode 100644
index 00000000000..6997d535ff9
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-01-09-LongDoubleSin.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -o - | grep sinl
+
+target triple = "i686-pc-linux-gnu"
+
+define x86_fp80 @f(x86_fp80 %x) nounwind  {
+entry:
+	%tmp2 = tail call x86_fp80 @sinl( x86_fp80 %x ) nounwind readonly 		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %tmp2
+}
+
+declare x86_fp80 @sinl(x86_fp80) nounwind readonly 
diff --git a/final/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll b/final/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
new file mode 100644
index 00000000000..0091397ca6b
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -regalloc=fast
+
+define void @SolveCubic(double %a, double %b, double %c, double %d, i32* %solutions, double* %x) {
+entry:
+	%tmp71 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
+	%tmp72 = fdiv x86_fp80 %tmp71, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
+	%tmp73 = fadd x86_fp80 0xK00000000000000000000, %tmp72		; <x86_fp80> [#uses=1]
+	%tmp7374 = fptrunc x86_fp80 %tmp73 to double		; <double> [#uses=1]
+	store double %tmp7374, double* null, align 8
+	%tmp81 = load double* null, align 8		; <double> [#uses=1]
+	%tmp82 = fadd double %tmp81, 0x401921FB54442D18		; <double> [#uses=1]
+	%tmp83 = fdiv double %tmp82, 3.000000e+00		; <double> [#uses=1]
+	%tmp84 = call double @cos( double %tmp83 )		; <double> [#uses=1]
+	%tmp85 = fmul double 0.000000e+00, %tmp84		; <double> [#uses=1]
+	%tmp8586 = fpext double %tmp85 to x86_fp80		; <x86_fp80> [#uses=1]
+	%tmp87 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
+	%tmp88 = fdiv x86_fp80 %tmp87, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
+	%tmp89 = fadd x86_fp80 %tmp8586, %tmp88		; <x86_fp80> [#uses=1]
+	%tmp8990 = fptrunc x86_fp80 %tmp89 to double		; <double> [#uses=1]
+	store double %tmp8990, double* null, align 8
+	%tmp97 = load double* null, align 8		; <double> [#uses=1]
+	%tmp98 = fadd double %tmp97, 0x402921FB54442D18		; <double> [#uses=1]
+	%tmp99 = fdiv double %tmp98, 3.000000e+00		; <double> [#uses=1]
+	%tmp100 = call double @cos( double %tmp99 )		; <double> [#uses=1]
+	%tmp101 = fmul double 0.000000e+00, %tmp100		; <double> [#uses=1]
+	%tmp101102 = fpext double %tmp101 to x86_fp80		; <x86_fp80> [#uses=1]
+	%tmp103 = load x86_fp80* null, align 16		; <x86_fp80> [#uses=1]
+	%tmp104 = fdiv x86_fp80 %tmp103, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
+	%tmp105 = fadd x86_fp80 %tmp101102, %tmp104		; <x86_fp80> [#uses=1]
+	%tmp105106 = fptrunc x86_fp80 %tmp105 to double		; <double> [#uses=1]
+	store double %tmp105106, double* null, align 8
+	ret void
+}
+
+declare double @cos(double)
diff --git a/final/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll b/final/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
new file mode 100644
index 00000000000..e91f52ef056
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-01-16-InvalidDAGCombineXform.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 | not grep IMPLICIT_DEF
+
+	%struct.node_t = type { double*, %struct.node_t*, %struct.node_t**, double**, double*, i32, i32 }
+
+define void @localize_local_bb19_bb(%struct.node_t** %cur_node) {
+newFuncRoot:
+	%tmp1 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp2 = getelementptr %struct.node_t* %tmp1, i32 0, i32 4		; <double**> [#uses=1]
+	%tmp3 = load double** %tmp2, align 4		; <double*> [#uses=1]
+	%tmp4 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp5 = getelementptr %struct.node_t* %tmp4, i32 0, i32 4		; <double**> [#uses=1]
+	store double* %tmp3, double** %tmp5, align 4
+	%tmp6 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp7 = getelementptr %struct.node_t* %tmp6, i32 0, i32 3		; <double***> [#uses=1]
+	%tmp8 = load double*** %tmp7, align 4		; <double**> [#uses=1]
+	%tmp9 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp10 = getelementptr %struct.node_t* %tmp9, i32 0, i32 3		; <double***> [#uses=1]
+	store double** %tmp8, double*** %tmp10, align 4
+	%tmp11 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp12 = getelementptr %struct.node_t* %tmp11, i32 0, i32 0		; <double**> [#uses=1]
+	%tmp13 = load double** %tmp12, align 4		; <double*> [#uses=1]
+	%tmp14 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp15 = getelementptr %struct.node_t* %tmp14, i32 0, i32 0		; <double**> [#uses=1]
+	store double* %tmp13, double** %tmp15, align 4
+	%tmp16 = load %struct.node_t** %cur_node, align 4		; <%struct.node_t*> [#uses=1]
+	%tmp17 = getelementptr %struct.node_t* %tmp16, i32 0, i32 1		; <%struct.node_t**> [#uses=1]
+	%tmp18 = load %struct.node_t** %tmp17, align 4		; <%struct.node_t*> [#uses=1]
+	store %struct.node_t* %tmp18, %struct.node_t** %cur_node, align 4
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2008-01-16-Trampoline.ll b/final/test/CodeGen/X86/2008-01-16-Trampoline.ll
new file mode 100644
index 00000000000..704b2bab4a2
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-01-16-Trampoline.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+
+	%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets = type { i32, i32, void (i32, i32)*, i8 (i32, i32)* }
+
+define fastcc i32 @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets.5146(i64 %table.0.0, i64 %table.0.1, i32 %last, i32 %pos) {
+entry:
+	%tramp22 = call i8* @llvm.init.trampoline( i8* null, i8* bitcast (void (%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets*, i32, i32)* @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets__move.5177 to i8*), i8* null )		; <i8*> [#uses=0]
+	unreachable
+}
+
+declare void @gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets__move.5177(%struct.FRAME.gnat__perfect_hash_generators__select_char_position__build_identical_keys_sets* nest , i32, i32) nounwind 
+
+declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind 
diff --git a/final/test/CodeGen/X86/2008-02-05-ISelCrash.ll b/final/test/CodeGen/X86/2008-02-05-ISelCrash.ll
new file mode 100644
index 00000000000..443a32de3b4
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-02-05-ISelCrash.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86
+; PR1975
+
+@nodes = external global i64		; <i64*> [#uses=2]
+
+define fastcc i32 @ab(i32 %alpha, i32 %beta) nounwind  {
+entry:
+	%tmp1 = load i64* @nodes, align 8		; <i64> [#uses=1]
+	%tmp2 = add i64 %tmp1, 1		; <i64> [#uses=1]
+	store i64 %tmp2, i64* @nodes, align 8
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll b/final/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
new file mode 100644
index 00000000000..d2d5149de3a
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-02-06-LoadFoldingBug.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xor | grep CPI
+
+define void @casin({ double, double }* sret  %agg.result, double %z.0, double %z.1) nounwind  {
+entry:
+	%memtmp = alloca { double, double }, align 8		; <{ double, double }*> [#uses=3]
+	%tmp4 = fsub double -0.000000e+00, %z.1		; <double> [#uses=1]
+	call void @casinh( { double, double }* sret  %memtmp, double %tmp4, double %z.0 ) nounwind 
+	%tmp19 = getelementptr { double, double }* %memtmp, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp20 = load double* %tmp19, align 8		; <double> [#uses=1]
+	%tmp22 = getelementptr { double, double }* %memtmp, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp23 = load double* %tmp22, align 8		; <double> [#uses=1]
+	%tmp32 = fsub double -0.000000e+00, %tmp20		; <double> [#uses=1]
+	%tmp37 = getelementptr { double, double }* %agg.result, i32 0, i32 0		; <double*> [#uses=1]
+	store double %tmp23, double* %tmp37, align 8
+	%tmp40 = getelementptr { double, double }* %agg.result, i32 0, i32 1		; <double*> [#uses=1]
+	store double %tmp32, double* %tmp40, align 8
+	ret void
+}
+
+declare void @casinh({ double, double }* sret , double, double) nounwind 
diff --git a/final/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll b/final/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
new file mode 100644
index 00000000000..b772d77f640
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-02-08-LoadFoldingBug.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep andpd | not grep esp
+
+declare double @llvm.sqrt.f64(double) nounwind readnone 
+
+declare fastcc void @ApplyGivens(double**, double, double, i32, i32, i32, i32) nounwind 
+
+declare double @fabs(double)
+
+define void @main_bb114_2E_outer_2E_i_bb3_2E_i27(double** %tmp12.sub.i.i, [51 x double*]* %tmp12.i.i.i, i32 %i.0.reg2mem.0.ph.i, i32 %tmp11688.i, i32 %tmp19.i, i32 %tmp24.i, [51 x double*]* %tmp12.i.i) {
+newFuncRoot:
+	br label %bb3.i27
+
+bb111.i77.bb121.i_crit_edge.exitStub:		; preds = %bb111.i77
+	ret void
+
+bb3.i27:		; preds = %bb111.i77.bb3.i27_crit_edge, %newFuncRoot
+	%indvar94.i = phi i32 [ 0, %newFuncRoot ], [ %tmp113.i76, %bb111.i77.bb3.i27_crit_edge ]		; <i32> [#uses=6]
+	%tmp6.i20 = getelementptr [51 x double*]* %tmp12.i.i, i32 0, i32 %indvar94.i		; <double**> [#uses=1]
+	%tmp7.i21 = load double** %tmp6.i20, align 4		; <double*> [#uses=2]
+	%tmp10.i = add i32 %indvar94.i, %i.0.reg2mem.0.ph.i		; <i32> [#uses=5]
+	%tmp11.i22 = getelementptr double* %tmp7.i21, i32 %tmp10.i		; <double*> [#uses=1]
+	%tmp12.i23 = load double* %tmp11.i22, align 8		; <double> [#uses=4]
+	%tmp20.i24 = add i32 %tmp19.i, %indvar94.i		; <i32> [#uses=3]
+	%tmp21.i = getelementptr double* %tmp7.i21, i32 %tmp20.i24		; <double*> [#uses=1]
+	%tmp22.i25 = load double* %tmp21.i, align 8		; <double> [#uses=3]
+	%tmp1.i.i26 = fcmp oeq double %tmp12.i23, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %tmp1.i.i26, label %bb3.i27.Givens.exit.i49_crit_edge, label %bb5.i.i31
+
+bb5.i.i31:		; preds = %bb3.i27
+	%tmp7.i.i28 = call double @fabs( double %tmp12.i23 ) nounwind 		; <double> [#uses=1]
+	%tmp9.i.i29 = call double @fabs( double %tmp22.i25 ) nounwind 		; <double> [#uses=1]
+	%tmp10.i.i30 = fcmp ogt double %tmp7.i.i28, %tmp9.i.i29		; <i1> [#uses=1]
+	br i1 %tmp10.i.i30, label %bb13.i.i37, label %bb30.i.i43
+
+bb13.i.i37:		; preds = %bb5.i.i31
+	%tmp15.i.i32 = fsub double -0.000000e+00, %tmp22.i25		; <double> [#uses=1]
+	%tmp17.i.i33 = fdiv double %tmp15.i.i32, %tmp12.i23		; <double> [#uses=3]
+	%tmp20.i4.i = fmul double %tmp17.i.i33, %tmp17.i.i33		; <double> [#uses=1]
+	%tmp21.i.i34 = fadd double %tmp20.i4.i, 1.000000e+00		; <double> [#uses=1]
+	%tmp22.i.i35 = call double @llvm.sqrt.f64( double %tmp21.i.i34 ) nounwind 		; <double> [#uses=1]
+	%tmp23.i5.i = fdiv double 1.000000e+00, %tmp22.i.i35		; <double> [#uses=2]
+	%tmp28.i.i36 = fmul double %tmp23.i5.i, %tmp17.i.i33		; <double> [#uses=1]
+	br label %Givens.exit.i49
+
+bb30.i.i43:		; preds = %bb5.i.i31
+	%tmp32.i.i38 = fsub double -0.000000e+00, %tmp12.i23		; <double> [#uses=1]
+	%tmp34.i.i39 = fdiv double %tmp32.i.i38, %tmp22.i25		; <double> [#uses=3]
+	%tmp37.i6.i = fmul double %tmp34.i.i39, %tmp34.i.i39		; <double> [#uses=1]
+	%tmp38.i.i40 = fadd double %tmp37.i6.i, 1.000000e+00		; <double> [#uses=1]
+	%tmp39.i7.i = call double @llvm.sqrt.f64( double %tmp38.i.i40 ) nounwind 		; <double> [#uses=1]
+	%tmp40.i.i41 = fdiv double 1.000000e+00, %tmp39.i7.i		; <double> [#uses=2]
+	%tmp45.i.i42 = fmul double %tmp40.i.i41, %tmp34.i.i39		; <double> [#uses=1]
+	br label %Givens.exit.i49
+
+Givens.exit.i49:		; preds = %bb3.i27.Givens.exit.i49_crit_edge, %bb30.i.i43, %bb13.i.i37
+	%s.0.i44 = phi double [ %tmp45.i.i42, %bb30.i.i43 ], [ %tmp23.i5.i, %bb13.i.i37 ], [ 0.000000e+00, %bb3.i27.Givens.exit.i49_crit_edge ]		; <double> [#uses=2]
+	%c.0.i45 = phi double [ %tmp40.i.i41, %bb30.i.i43 ], [ %tmp28.i.i36, %bb13.i.i37 ], [ 1.000000e+00, %bb3.i27.Givens.exit.i49_crit_edge ]		; <double> [#uses=2]
+	%tmp26.i46 = add i32 %tmp24.i, %indvar94.i		; <i32> [#uses=2]
+	%tmp27.i47 = icmp slt i32 %tmp26.i46, 51		; <i1> [#uses=1]
+	%min.i48 = select i1 %tmp27.i47, i32 %tmp26.i46, i32 50		; <i32> [#uses=1]
+	call fastcc void @ApplyGivens( double** %tmp12.sub.i.i, double %s.0.i44, double %c.0.i45, i32 %tmp20.i24, i32 %tmp10.i, i32 %indvar94.i, i32 %min.i48 ) nounwind 
+	br label %codeRepl
+
+codeRepl:		; preds = %Givens.exit.i49
+	call void @main_bb114_2E_outer_2E_i_bb3_2E_i27_bb_2E_i48_2E_i( i32 %tmp10.i, i32 %tmp20.i24, double %s.0.i44, double %c.0.i45, [51 x double*]* %tmp12.i.i.i )
+	br label %ApplyRGivens.exit49.i
+
+ApplyRGivens.exit49.i:		; preds = %codeRepl
+	%tmp10986.i = icmp sgt i32 %tmp11688.i, %tmp10.i		; <i1> [#uses=1]
+	br i1 %tmp10986.i, label %ApplyRGivens.exit49.i.bb52.i57_crit_edge, label %ApplyRGivens.exit49.i.bb111.i77_crit_edge
+
+codeRepl1:		; preds = %ApplyRGivens.exit49.i.bb52.i57_crit_edge
+	call void @main_bb114_2E_outer_2E_i_bb3_2E_i27_bb52_2E_i57( i32 %tmp10.i, double** %tmp12.sub.i.i, [51 x double*]* %tmp12.i.i.i, i32 %i.0.reg2mem.0.ph.i, i32 %tmp11688.i, i32 %tmp19.i, i32 %tmp24.i, [51 x double*]* %tmp12.i.i )
+	br label %bb105.i.bb111.i77_crit_edge
+
+bb111.i77:		; preds = %bb105.i.bb111.i77_crit_edge, %ApplyRGivens.exit49.i.bb111.i77_crit_edge
+	%tmp113.i76 = add i32 %indvar94.i, 1		; <i32> [#uses=2]
+	%tmp118.i = icmp sgt i32 %tmp11688.i, %tmp113.i76		; <i1> [#uses=1]
+	br i1 %tmp118.i, label %bb111.i77.bb3.i27_crit_edge, label %bb111.i77.bb121.i_crit_edge.exitStub
+
+bb3.i27.Givens.exit.i49_crit_edge:		; preds = %bb3.i27
+	br label %Givens.exit.i49
+
+ApplyRGivens.exit49.i.bb52.i57_crit_edge:		; preds = %ApplyRGivens.exit49.i
+	br label %codeRepl1
+
+ApplyRGivens.exit49.i.bb111.i77_crit_edge:		; preds = %ApplyRGivens.exit49.i
+	br label %bb111.i77
+
+bb105.i.bb111.i77_crit_edge:		; preds = %codeRepl1
+	br label %bb111.i77
+
+bb111.i77.bb3.i27_crit_edge:		; preds = %bb111.i77
+	br label %bb3.i27
+}
+
+declare void @main_bb114_2E_outer_2E_i_bb3_2E_i27_bb_2E_i48_2E_i(i32, i32, double, double, [51 x double*]*)
+
+declare void @main_bb114_2E_outer_2E_i_bb3_2E_i27_bb52_2E_i57(i32, double**, [51 x double*]*, i32, i32, i32, i32, [51 x double*]*)
diff --git a/final/test/CodeGen/X86/2008-02-14-BitMiscompile.ll b/final/test/CodeGen/X86/2008-02-14-BitMiscompile.ll
new file mode 100644
index 00000000000..1983f1d19c6
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-02-14-BitMiscompile.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep and
+define i32 @test(i1 %A) {
+	%B = zext i1 %A to i32		; <i32> [#uses=1]
+	%C = sub i32 0, %B		; <i32> [#uses=1]
+	%D = and i32 %C, 255		; <i32> [#uses=1]
+	ret i32 %D
+}
+
diff --git a/final/test/CodeGen/X86/2008-02-18-TailMergingBug.ll b/final/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
new file mode 100644
index 00000000000..bdacf507112
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
@@ -0,0 +1,219 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah -stats |& grep {Number of block tails merged} | grep 16
+; PR1909
+
+@.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00"		; <[48 x i8]*> [#uses=1]
+
+define void @minmax(float* %result) nounwind optsize {
+entry:
+	%tmp2 = load float* %result, align 4		; <float> [#uses=6]
+	%tmp4 = getelementptr float* %result, i32 2		; <float*> [#uses=5]
+	%tmp5 = load float* %tmp4, align 4		; <float> [#uses=10]
+	%tmp7 = getelementptr float* %result, i32 4		; <float*> [#uses=5]
+	%tmp8 = load float* %tmp7, align 4		; <float> [#uses=8]
+	%tmp10 = getelementptr float* %result, i32 6		; <float*> [#uses=3]
+	%tmp11 = load float* %tmp10, align 4		; <float> [#uses=8]
+	%tmp12 = fcmp olt float %tmp8, %tmp11		; <i1> [#uses=5]
+	br i1 %tmp12, label %bb, label %bb21
+
+bb:		; preds = %entry
+	%tmp23469 = fcmp olt float %tmp5, %tmp8		; <i1> [#uses=1]
+	br i1 %tmp23469, label %bb26, label %bb30
+
+bb21:		; preds = %entry
+	%tmp23 = fcmp olt float %tmp5, %tmp11		; <i1> [#uses=1]
+	br i1 %tmp23, label %bb26, label %bb30
+
+bb26:		; preds = %bb21, %bb
+	%tmp52471 = fcmp olt float %tmp2, %tmp5		; <i1> [#uses=1]
+	br i1 %tmp52471, label %bb111, label %bb59
+
+bb30:		; preds = %bb21, %bb
+	br i1 %tmp12, label %bb40, label %bb50
+
+bb40:		; preds = %bb30
+	%tmp52473 = fcmp olt float %tmp2, %tmp8		; <i1> [#uses=1]
+	br i1 %tmp52473, label %bb111, label %bb59
+
+bb50:		; preds = %bb30
+	%tmp52 = fcmp olt float %tmp2, %tmp11		; <i1> [#uses=1]
+	br i1 %tmp52, label %bb111, label %bb59
+
+bb59:		; preds = %bb50, %bb40, %bb26
+	br i1 %tmp12, label %bb72, label %bb80
+
+bb72:		; preds = %bb59
+	%tmp82475 = fcmp olt float %tmp5, %tmp8		; <i1> [#uses=2]
+	%brmerge786 = or i1 %tmp82475, %tmp12		; <i1> [#uses=1]
+	%tmp4.mux787 = select i1 %tmp82475, float* %tmp4, float* %tmp7		; <float*> [#uses=1]
+	br i1 %brmerge786, label %bb111, label %bb103
+
+bb80:		; preds = %bb59
+	%tmp82 = fcmp olt float %tmp5, %tmp11		; <i1> [#uses=2]
+	%brmerge = or i1 %tmp82, %tmp12		; <i1> [#uses=1]
+	%tmp4.mux = select i1 %tmp82, float* %tmp4, float* %tmp7		; <float*> [#uses=1]
+	br i1 %brmerge, label %bb111, label %bb103
+
+bb103:		; preds = %bb80, %bb72
+	br label %bb111
+
+bb111:		; preds = %bb103, %bb80, %bb72, %bb50, %bb40, %bb26
+	%iftmp.0.0.in = phi float* [ %tmp10, %bb103 ], [ %result, %bb26 ], [ %result, %bb40 ], [ %result, %bb50 ], [ %tmp4.mux, %bb80 ], [ %tmp4.mux787, %bb72 ]		; <float*> [#uses=1]
+	%iftmp.0.0 = load float* %iftmp.0.0.in		; <float> [#uses=1]
+	%tmp125 = fcmp ogt float %tmp8, %tmp11		; <i1> [#uses=5]
+	br i1 %tmp125, label %bb128, label %bb136
+
+bb128:		; preds = %bb111
+	%tmp138477 = fcmp ogt float %tmp5, %tmp8		; <i1> [#uses=1]
+	br i1 %tmp138477, label %bb141, label %bb145
+
+bb136:		; preds = %bb111
+	%tmp138 = fcmp ogt float %tmp5, %tmp11		; <i1> [#uses=1]
+	br i1 %tmp138, label %bb141, label %bb145
+
+bb141:		; preds = %bb136, %bb128
+	%tmp167479 = fcmp ogt float %tmp2, %tmp5		; <i1> [#uses=1]
+	br i1 %tmp167479, label %bb226, label %bb174
+
+bb145:		; preds = %bb136, %bb128
+	br i1 %tmp125, label %bb155, label %bb165
+
+bb155:		; preds = %bb145
+	%tmp167481 = fcmp ogt float %tmp2, %tmp8		; <i1> [#uses=1]
+	br i1 %tmp167481, label %bb226, label %bb174
+
+bb165:		; preds = %bb145
+	%tmp167 = fcmp ogt float %tmp2, %tmp11		; <i1> [#uses=1]
+	br i1 %tmp167, label %bb226, label %bb174
+
+bb174:		; preds = %bb165, %bb155, %bb141
+	br i1 %tmp125, label %bb187, label %bb195
+
+bb187:		; preds = %bb174
+	%tmp197483 = fcmp ogt float %tmp5, %tmp8		; <i1> [#uses=2]
+	%brmerge790 = or i1 %tmp197483, %tmp125		; <i1> [#uses=1]
+	%tmp4.mux791 = select i1 %tmp197483, float* %tmp4, float* %tmp7		; <float*> [#uses=1]
+	br i1 %brmerge790, label %bb226, label %bb218
+
+bb195:		; preds = %bb174
+	%tmp197 = fcmp ogt float %tmp5, %tmp11		; <i1> [#uses=2]
+	%brmerge788 = or i1 %tmp197, %tmp125		; <i1> [#uses=1]
+	%tmp4.mux789 = select i1 %tmp197, float* %tmp4, float* %tmp7		; <float*> [#uses=1]
+	br i1 %brmerge788, label %bb226, label %bb218
+
+bb218:		; preds = %bb195, %bb187
+	br label %bb226
+
+bb226:		; preds = %bb218, %bb195, %bb187, %bb165, %bb155, %bb141
+	%iftmp.7.0.in = phi float* [ %tmp10, %bb218 ], [ %result, %bb141 ], [ %result, %bb155 ], [ %result, %bb165 ], [ %tmp4.mux789, %bb195 ], [ %tmp4.mux791, %bb187 ]		; <float*> [#uses=1]
+	%iftmp.7.0 = load float* %iftmp.7.0.in		; <float> [#uses=1]
+	%tmp229 = getelementptr float* %result, i32 1		; <float*> [#uses=7]
+	%tmp230 = load float* %tmp229, align 4		; <float> [#uses=6]
+	%tmp232 = getelementptr float* %result, i32 3		; <float*> [#uses=5]
+	%tmp233 = load float* %tmp232, align 4		; <float> [#uses=10]
+	%tmp235 = getelementptr float* %result, i32 5		; <float*> [#uses=5]
+	%tmp236 = load float* %tmp235, align 4		; <float> [#uses=8]
+	%tmp238 = getelementptr float* %result, i32 7		; <float*> [#uses=3]
+	%tmp239 = load float* %tmp238, align 4		; <float> [#uses=8]
+	%tmp240 = fcmp olt float %tmp236, %tmp239		; <i1> [#uses=5]
+	br i1 %tmp240, label %bb243, label %bb251
+
+bb243:		; preds = %bb226
+	%tmp253485 = fcmp olt float %tmp233, %tmp236		; <i1> [#uses=1]
+	br i1 %tmp253485, label %bb256, label %bb260
+
+bb251:		; preds = %bb226
+	%tmp253 = fcmp olt float %tmp233, %tmp239		; <i1> [#uses=1]
+	br i1 %tmp253, label %bb256, label %bb260
+
+bb256:		; preds = %bb251, %bb243
+	%tmp282487 = fcmp olt float %tmp230, %tmp233		; <i1> [#uses=1]
+	br i1 %tmp282487, label %bb341, label %bb289
+
+bb260:		; preds = %bb251, %bb243
+	br i1 %tmp240, label %bb270, label %bb280
+
+bb270:		; preds = %bb260
+	%tmp282489 = fcmp olt float %tmp230, %tmp236		; <i1> [#uses=1]
+	br i1 %tmp282489, label %bb341, label %bb289
+
+bb280:		; preds = %bb260
+	%tmp282 = fcmp olt float %tmp230, %tmp239		; <i1> [#uses=1]
+	br i1 %tmp282, label %bb341, label %bb289
+
+bb289:		; preds = %bb280, %bb270, %bb256
+	br i1 %tmp240, label %bb302, label %bb310
+
+bb302:		; preds = %bb289
+	%tmp312491 = fcmp olt float %tmp233, %tmp236		; <i1> [#uses=2]
+	%brmerge793 = or i1 %tmp312491, %tmp240		; <i1> [#uses=1]
+	%tmp232.mux794 = select i1 %tmp312491, float* %tmp232, float* %tmp235		; <float*> [#uses=1]
+	br i1 %brmerge793, label %bb341, label %bb333
+
+bb310:		; preds = %bb289
+	%tmp312 = fcmp olt float %tmp233, %tmp239		; <i1> [#uses=2]
+	%brmerge792 = or i1 %tmp312, %tmp240		; <i1> [#uses=1]
+	%tmp232.mux = select i1 %tmp312, float* %tmp232, float* %tmp235		; <float*> [#uses=1]
+	br i1 %brmerge792, label %bb341, label %bb333
+
+bb333:		; preds = %bb310, %bb302
+	br label %bb341
+
+bb341:		; preds = %bb333, %bb310, %bb302, %bb280, %bb270, %bb256
+	%iftmp.14.0.in = phi float* [ %tmp238, %bb333 ], [ %tmp229, %bb280 ], [ %tmp229, %bb270 ], [ %tmp229, %bb256 ], [ %tmp232.mux, %bb310 ], [ %tmp232.mux794, %bb302 ]		; <float*> [#uses=1]
+	%iftmp.14.0 = load float* %iftmp.14.0.in		; <float> [#uses=1]
+	%tmp355 = fcmp ogt float %tmp236, %tmp239		; <i1> [#uses=5]
+	br i1 %tmp355, label %bb358, label %bb366
+
+bb358:		; preds = %bb341
+	%tmp368493 = fcmp ogt float %tmp233, %tmp236		; <i1> [#uses=1]
+	br i1 %tmp368493, label %bb371, label %bb375
+
+bb366:		; preds = %bb341
+	%tmp368 = fcmp ogt float %tmp233, %tmp239		; <i1> [#uses=1]
+	br i1 %tmp368, label %bb371, label %bb375
+
+bb371:		; preds = %bb366, %bb358
+	%tmp397495 = fcmp ogt float %tmp230, %tmp233		; <i1> [#uses=1]
+	br i1 %tmp397495, label %bb456, label %bb404
+
+bb375:		; preds = %bb366, %bb358
+	br i1 %tmp355, label %bb385, label %bb395
+
+bb385:		; preds = %bb375
+	%tmp397497 = fcmp ogt float %tmp230, %tmp236		; <i1> [#uses=1]
+	br i1 %tmp397497, label %bb456, label %bb404
+
+bb395:		; preds = %bb375
+	%tmp397 = fcmp ogt float %tmp230, %tmp239		; <i1> [#uses=1]
+	br i1 %tmp397, label %bb456, label %bb404
+
+bb404:		; preds = %bb395, %bb385, %bb371
+	br i1 %tmp355, label %bb417, label %bb425
+
+bb417:		; preds = %bb404
+	%tmp427499 = fcmp ogt float %tmp233, %tmp236		; <i1> [#uses=2]
+	%brmerge797 = or i1 %tmp427499, %tmp355		; <i1> [#uses=1]
+	%tmp232.mux798 = select i1 %tmp427499, float* %tmp232, float* %tmp235		; <float*> [#uses=1]
+	br i1 %brmerge797, label %bb456, label %bb448
+
+bb425:		; preds = %bb404
+	%tmp427 = fcmp ogt float %tmp233, %tmp239		; <i1> [#uses=2]
+	%brmerge795 = or i1 %tmp427, %tmp355		; <i1> [#uses=1]
+	%tmp232.mux796 = select i1 %tmp427, float* %tmp232, float* %tmp235		; <float*> [#uses=1]
+	br i1 %brmerge795, label %bb456, label %bb448
+
+bb448:		; preds = %bb425, %bb417
+	br label %bb456
+
+bb456:		; preds = %bb448, %bb425, %bb417, %bb395, %bb385, %bb371
+	%iftmp.21.0.in = phi float* [ %tmp238, %bb448 ], [ %tmp229, %bb395 ], [ %tmp229, %bb385 ], [ %tmp229, %bb371 ], [ %tmp232.mux796, %bb425 ], [ %tmp232.mux798, %bb417 ]		; <float*> [#uses=1]
+	%iftmp.21.0 = load float* %iftmp.21.0.in		; <float> [#uses=1]
+	%tmp458459 = fpext float %iftmp.21.0 to double		; <double> [#uses=1]
+	%tmp460461 = fpext float %iftmp.7.0 to double		; <double> [#uses=1]
+	%tmp462463 = fpext float %iftmp.14.0 to double		; <double> [#uses=1]
+	%tmp464465 = fpext float %iftmp.0.0 to double		; <double> [#uses=1]
+	%tmp467 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([48 x i8]* @.str, i32 0, i32 0), double %tmp464465, double %tmp462463, double %tmp460461, double %tmp458459 ) nounwind 		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @printf(i8*, ...) nounwind 
diff --git a/final/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll b/final/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
new file mode 100644
index 00000000000..5115e48365f
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-02-20-InlineAsmClobber.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s | grep {a:} | not grep ax
+; RUN: llc < %s | grep {b:} | not grep ax
+; PR2078
+; The clobber list says that "ax" is clobbered.  Make sure that eax isn't 
+; allocated to the input/output register.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@pixels = weak global i32 0		; <i32*> [#uses=2]
+
+define void @test() nounwind  {
+entry:
+	%tmp = load i32* @pixels, align 4		; <i32> [#uses=1]
+	%tmp1 = tail call i32 asm sideeffect "a: $0 $1", "=r,0,~{dirflag},~{fpsr},~{flags},~{ax}"( i32 %tmp ) nounwind 		; <i32> [#uses=1]
+	store i32 %tmp1, i32* @pixels, align 4
+	ret void
+}
+
+define void @test2(i16* %block, i8* %pixels, i32 %line_size) nounwind  {
+entry:
+	%tmp1 = getelementptr i16* %block, i32 64		; <i16*> [#uses=1]
+	%tmp3 = tail call i8* asm sideeffect "b: $0 $1 $2", "=r,r,0,~{dirflag},~{fpsr},~{flags},~{ax}"( i16* %tmp1, i8* %pixels ) nounwind 		; <i8*> [#uses=0]
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll b/final/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
new file mode 100644
index 00000000000..da029079c6f
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -regalloc=fast -march=x86 -mattr=+mmx | grep esi
+; PR2082
+; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of
+; registers.
+define void @transpose4x4(i8* %dst, i8* %src, i32 %dst_stride, i32 %src_stride) {
+entry:
+	%dst_addr = alloca i8*		; <i8**> [#uses=5]
+	%src_addr = alloca i8*		; <i8**> [#uses=5]
+	%dst_stride_addr = alloca i32		; <i32*> [#uses=4]
+	%src_stride_addr = alloca i32		; <i32*> [#uses=4]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i8* %dst, i8** %dst_addr
+	store i8* %src, i8** %src_addr
+	store i32 %dst_stride, i32* %dst_stride_addr
+	store i32 %src_stride, i32* %src_stride_addr
+	%tmp = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
+	%tmp1 = getelementptr i8* %tmp, i32 0		; <i8*> [#uses=1]
+	%tmp12 = bitcast i8* %tmp1 to i32*		; <i32*> [#uses=1]
+	%tmp3 = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
+	%tmp4 = load i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp5 = getelementptr i8* %tmp3, i32 %tmp4		; <i8*> [#uses=1]
+	%tmp56 = bitcast i8* %tmp5 to i32*		; <i32*> [#uses=1]
+	%tmp7 = load i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp8 = mul i32 %tmp7, 2		; <i32> [#uses=1]
+	%tmp9 = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
+	%tmp10 = getelementptr i8* %tmp9, i32 %tmp8		; <i8*> [#uses=1]
+	%tmp1011 = bitcast i8* %tmp10 to i32*		; <i32*> [#uses=1]
+	%tmp13 = load i32* %dst_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp14 = mul i32 %tmp13, 3		; <i32> [#uses=1]
+	%tmp15 = load i8** %dst_addr, align 4		; <i8*> [#uses=1]
+	%tmp16 = getelementptr i8* %tmp15, i32 %tmp14		; <i8*> [#uses=1]
+	%tmp1617 = bitcast i8* %tmp16 to i32*		; <i32*> [#uses=1]
+	%tmp18 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
+	%tmp19 = getelementptr i8* %tmp18, i32 0		; <i8*> [#uses=1]
+	%tmp1920 = bitcast i8* %tmp19 to i32*		; <i32*> [#uses=1]
+	%tmp21 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
+	%tmp22 = load i32* %src_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp23 = getelementptr i8* %tmp21, i32 %tmp22		; <i8*> [#uses=1]
+	%tmp2324 = bitcast i8* %tmp23 to i32*		; <i32*> [#uses=1]
+	%tmp25 = load i32* %src_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp26 = mul i32 %tmp25, 2		; <i32> [#uses=1]
+	%tmp27 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
+	%tmp28 = getelementptr i8* %tmp27, i32 %tmp26		; <i8*> [#uses=1]
+	%tmp2829 = bitcast i8* %tmp28 to i32*		; <i32*> [#uses=1]
+	%tmp30 = load i32* %src_stride_addr, align 4		; <i32> [#uses=1]
+	%tmp31 = mul i32 %tmp30, 3		; <i32> [#uses=1]
+	%tmp32 = load i8** %src_addr, align 4		; <i8*> [#uses=1]
+	%tmp33 = getelementptr i8* %tmp32, i32 %tmp31		; <i8*> [#uses=1]
+	%tmp3334 = bitcast i8* %tmp33 to i32*		; <i32*> [#uses=1]
+	call void asm sideeffect "movd  $4, %mm0                \0A\09movd  $5, %mm1                \0A\09movd  $6, %mm2                \0A\09movd  $7, %mm3                \0A\09punpcklbw %mm1, %mm0         \0A\09punpcklbw %mm3, %mm2         \0A\09movq %mm0, %mm1              \0A\09punpcklwd %mm2, %mm0         \0A\09punpckhwd %mm2, %mm1         \0A\09movd  %mm0, $0                \0A\09punpckhdq %mm0, %mm0         \0A\09movd  %mm0, $1                \0A\09movd  %mm1, $2                \0A\09punpckhdq %mm1, %mm1         \0A\09movd  %mm1, $3                \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* %tmp12, i32* %tmp56, i32* %tmp1011, i32* %tmp1617, i32* %tmp1920, i32* %tmp2324, i32* %tmp2829, i32* %tmp3334 ) nounwind 
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2008-02-22-ReMatBug.ll b/final/test/CodeGen/X86/2008-02-22-ReMatBug.ll
new file mode 100644
index 00000000000..a91ac27f98d
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-02-22-ReMatBug.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 2
+; rdar://5761454
+
+	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define  %struct.quad_struct* @MakeTree(i32 %size, i32 %center_x, i32 %center_y, i32 %lo_proc, i32 %hi_proc, %struct.quad_struct* %parent, i32 %ct, i32 %level) nounwind  {
+entry:
+	br i1 true, label %bb43.i, label %bb.i
+
+bb.i:		; preds = %entry
+	ret %struct.quad_struct* null
+
+bb43.i:		; preds = %entry
+	br i1 true, label %CheckOutside.exit40.i, label %bb11.i38.i
+
+bb11.i38.i:		; preds = %bb43.i
+	ret %struct.quad_struct* null
+
+CheckOutside.exit40.i:		; preds = %bb43.i
+	br i1 true, label %CheckOutside.exit30.i, label %bb11.i28.i
+
+bb11.i28.i:		; preds = %CheckOutside.exit40.i
+	ret %struct.quad_struct* null
+
+CheckOutside.exit30.i:		; preds = %CheckOutside.exit40.i
+	br i1 true, label %CheckOutside.exit20.i, label %bb11.i18.i
+
+bb11.i18.i:		; preds = %CheckOutside.exit30.i
+	ret %struct.quad_struct* null
+
+CheckOutside.exit20.i:		; preds = %CheckOutside.exit30.i
+	br i1 true, label %bb34, label %bb11.i8.i
+
+bb11.i8.i:		; preds = %CheckOutside.exit20.i
+	ret %struct.quad_struct* null
+
+bb34:		; preds = %CheckOutside.exit20.i
+	%tmp15.reg2mem.0 = sdiv i32 %size, 2		; <i32> [#uses=7]
+	%tmp85 = sub i32 %center_y, %tmp15.reg2mem.0		; <i32> [#uses=2]
+	%tmp88 = sub i32 %center_x, %tmp15.reg2mem.0		; <i32> [#uses=2]
+	%tmp92 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp85, i32 0, i32 %hi_proc, %struct.quad_struct* null, i32 2, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
+	%tmp99 = add i32 0, %hi_proc		; <i32> [#uses=1]
+	%tmp100 = sdiv i32 %tmp99, 2		; <i32> [#uses=1]
+	%tmp110 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp85, i32 0, i32 %tmp100, %struct.quad_struct* null, i32 3, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
+	%tmp122 = add i32 %tmp15.reg2mem.0, %center_y		; <i32> [#uses=2]
+	%tmp129 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 0, i32 %tmp122, i32 0, i32 0, %struct.quad_struct* null, i32 1, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
+	%tmp147 = tail call  %struct.quad_struct* @MakeTree( i32 %tmp15.reg2mem.0, i32 %tmp88, i32 %tmp122, i32 %lo_proc, i32 0, %struct.quad_struct* null, i32 0, i32 0 ) nounwind 		; <%struct.quad_struct*> [#uses=0]
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll b/final/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
new file mode 100644
index 00000000000..1d31859f46c
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-02-25-InlineAsmBug.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -mattr=+sse2
+; PR2076
+
+define void @h264_h_loop_filter_luma_mmx2(i8* %pix, i32 %stride, i32 %alpha, i32 %beta, i8* %tc0) nounwind  {
+entry:
+	%tmp164 = getelementptr [16 x i32]* null, i32 0, i32 11		; <i32*> [#uses=1]
+	%tmp169 = getelementptr [16 x i32]* null, i32 0, i32 13		; <i32*> [#uses=1]
+	%tmp174 = getelementptr [16 x i32]* null, i32 0, i32 15		; <i32*> [#uses=1]
+	%tmp154.sum317 = add i32 0, %stride		; <i32> [#uses=1]
+	%tmp154.sum315 = mul i32 %stride, 6		; <i32> [#uses=1]
+	%tmp154.sum = mul i32 %stride, 7		; <i32> [#uses=1]
+	%pix_addr.0327.rec = mul i32 0, 0		; <i32> [#uses=4]
+	br i1 false, label %bb292, label %bb32
+
+bb32:		; preds = %entry
+	%pix_addr.0327.sum340 = add i32 %pix_addr.0327.rec, 0		; <i32> [#uses=1]
+	%tmp154 = getelementptr i8* %pix, i32 %pix_addr.0327.sum340		; <i8*> [#uses=1]
+	%tmp177178 = bitcast i8* %tmp154 to i32*		; <i32*> [#uses=1]
+	%pix_addr.0327.sum339 = add i32 %pix_addr.0327.rec, %tmp154.sum317		; <i32> [#uses=1]
+	%tmp181 = getelementptr i8* %pix, i32 %pix_addr.0327.sum339		; <i8*> [#uses=1]
+	%tmp181182 = bitcast i8* %tmp181 to i32*		; <i32*> [#uses=1]
+	%pix_addr.0327.sum338 = add i32 %pix_addr.0327.rec, %tmp154.sum315		; <i32> [#uses=1]
+	%tmp186 = getelementptr i8* %pix, i32 %pix_addr.0327.sum338		; <i8*> [#uses=1]
+	%tmp186187 = bitcast i8* %tmp186 to i32*		; <i32*> [#uses=1]
+	%pix_addr.0327.sum337 = add i32 %pix_addr.0327.rec, %tmp154.sum		; <i32> [#uses=1]
+	%tmp191 = getelementptr i8* %pix, i32 %pix_addr.0327.sum337		; <i8*> [#uses=1]
+	%tmp191192 = bitcast i8* %tmp191 to i32*		; <i32*> [#uses=1]
+	call void asm sideeffect "movd  $4, %mm0                \0A\09movd  $5, %mm1                \0A\09movd  $6, %mm2                \0A\09movd  $7, %mm3                \0A\09punpcklbw %mm1, %mm0         \0A\09punpcklbw %mm3, %mm2         \0A\09movq %mm0, %mm1              \0A\09punpcklwd %mm2, %mm0         \0A\09punpckhwd %mm2, %mm1         \0A\09movd  %mm0, $0                \0A\09punpckhdq %mm0, %mm0         \0A\09movd  %mm0, $1                \0A\09movd  %mm1, $2                \0A\09punpckhdq %mm1, %mm1         \0A\09movd  %mm1, $3                \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( i32* null, i32* %tmp164, i32* %tmp169, i32* %tmp174, i32* %tmp177178, i32* %tmp181182, i32* %tmp186187, i32* %tmp191192 ) nounwind 
+	unreachable
+
+bb292:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll b/final/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
new file mode 100644
index 00000000000..6615b8c6207
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=x86-64
+
+	%struct.XX = type <{ i8 }>
+	%struct.YY = type { i64 }
+	%struct.ZZ = type opaque
+
+define i8 @f(%struct.XX*** %fontMap, %struct.XX* %uen) signext  {
+entry:
+	%tmp45 = add i16 0, 1		; <i16> [#uses=2]
+	br i1 false, label %bb124, label %bb53
+
+bb53:		; preds = %entry
+	%tmp55 = call %struct.YY** @AA( i64 1, %struct.XX* %uen )		; <%struct.YY**> [#uses=3]
+	%tmp2728128 = load %struct.XX** null		; <%struct.XX*> [#uses=1]
+	%tmp61 = load %struct.YY** %tmp55, align 8		; <%struct.YY*> [#uses=1]
+	%tmp62 = getelementptr %struct.YY* %tmp61, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp63 = load i64* %tmp62, align 8		; <i64> [#uses=1]
+	%tmp6566 = zext i16 %tmp45 to i64		; <i64> [#uses=1]
+	%tmp67 = shl i64 %tmp6566, 1		; <i64> [#uses=1]
+	call void @BB( %struct.YY** %tmp55, i64 %tmp67, i8 signext  0, %struct.XX* %uen )
+	%tmp121131 = icmp eq i16 %tmp45, 1		; <i1> [#uses=1]
+	br i1 %tmp121131, label %bb124, label %bb70.preheader
+
+bb70.preheader:		; preds = %bb53
+	%tmp72 = bitcast %struct.XX* %tmp2728128 to %struct.ZZ***		; <%struct.ZZ***> [#uses=1]
+	br label %bb70
+
+bb70:		; preds = %bb119, %bb70.preheader
+	%indvar133 = phi i32 [ %indvar.next134, %bb119 ], [ 0, %bb70.preheader ]		; <i32> [#uses=2]
+	%tmp.135 = trunc i64 %tmp63 to i32		; <i32> [#uses=1]
+	%tmp136 = shl i32 %indvar133, 1		; <i32> [#uses=1]
+	%DD = add i32 %tmp136, %tmp.135		; <i32> [#uses=1]
+	%tmp73 = load %struct.ZZ*** %tmp72, align 8		; <%struct.ZZ**> [#uses=0]
+	br i1 false, label %bb119, label %bb77
+
+bb77:		; preds = %bb70
+	%tmp8384 = trunc i32 %DD to i16		; <i16> [#uses=1]
+	%tmp85 = sub i16 0, %tmp8384		; <i16> [#uses=1]
+	store i16 %tmp85, i16* null, align 8
+	call void @CC( %struct.YY** %tmp55, i64 0, i64 2, i8* null, %struct.XX* %uen )
+	ret i8 0
+
+bb119:		; preds = %bb70
+	%indvar.next134 = add i32 %indvar133, 1		; <i32> [#uses=1]
+	br label %bb70
+
+bb124:		; preds = %bb53, %entry
+	ret i8 undef
+}
+
+declare %struct.YY** @AA(i64, %struct.XX*)
+
+declare void @BB(%struct.YY**, i64, i8 signext , %struct.XX*)
+
+declare void @CC(%struct.YY**, i64, i64, i8*, %struct.XX*)
diff --git a/final/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll b/final/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll
new file mode 100644
index 00000000000..0b4eb3a3b9b
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-02-26-AsmDirectMemOp.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define void @dct_unquantize_h263_intra_mmx(i16* %block, i32 %n, i32 %qscale) nounwind  {
+entry:
+	%tmp1 = shl i32 %qscale, 1		; <i32> [#uses=1]
+	br i1 false, label %bb46, label %bb59
+
+bb46:		; preds = %entry
+	ret void
+
+bb59:		; preds = %entry
+	tail call void asm sideeffect "movd $1, %mm6                 \0A\09packssdw %mm6, %mm6          \0A\09packssdw %mm6, %mm6          \0A\09movd $2, %mm5                 \0A\09pxor %mm7, %mm7              \0A\09packssdw %mm5, %mm5          \0A\09packssdw %mm5, %mm5          \0A\09psubw %mm5, %mm7             \0A\09pxor %mm4, %mm4              \0A\09.align 1<<4\0A\091:                             \0A\09movq ($0, $3), %mm0           \0A\09movq 8($0, $3), %mm1          \0A\09pmullw %mm6, %mm0            \0A\09pmullw %mm6, %mm1            \0A\09movq ($0, $3), %mm2           \0A\09movq 8($0, $3), %mm3          \0A\09pcmpgtw %mm4, %mm2           \0A\09pcmpgtw %mm4, %mm3           \0A\09pxor %mm2, %mm0              \0A\09pxor %mm3, %mm1              \0A\09paddw %mm7, %mm0             \0A\09paddw %mm7, %mm1             \0A\09pxor %mm0, %mm2              \0A\09pxor %mm1, %mm3              \0A\09pcmpeqw %mm7, %mm0           \0A\09pcmpeqw %mm7, %mm1           \0A\09pandn %mm2, %mm0             \0A\09pandn %mm3, %mm1             \0A\09movq %mm0, ($0, $3)           \0A\09movq %mm1, 8($0, $3)          \0A\09add $$16, $3                    \0A\09jng 1b                         \0A\09", "r,imr,imr,r,~{dirflag},~{fpsr},~{flags},~{memory}"( i16* null, i32 %tmp1, i32 0, i32 0 ) nounwind 
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll b/final/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
new file mode 100644
index 00000000000..ad7950ccd8e
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-02-27-DeadSlotElimBug.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86
+
+	%struct.CompAtom = type <{ %struct.Position, float, i32 }>
+	%struct.Lattice = type { %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, %struct.Position, i32, i32, i32 }
+	%struct.Position = type { double, double, double }
+
+define fastcc %struct.CompAtom* @_ZNK7Lattice6createEP8CompAtomii(%struct.Lattice* %this, %struct.CompAtom* %d, i32 %n, i32 %i) {
+entry:
+	%tmp18 = tail call i8* @_Znam( i32 0 )		; <i8*> [#uses=1]
+	%tmp1819 = bitcast i8* %tmp18 to %struct.CompAtom*		; <%struct.CompAtom*> [#uses=4]
+	%tmp3286 = icmp eq i32 %n, 0		; <i1> [#uses=1]
+	br i1 %tmp3286, label %bb35, label %bb24
+
+bb24:		; preds = %bb24, %entry
+	%tmp9.0.reg2mem.0.rec = phi i32 [ %indvar.next, %bb24 ], [ 0, %entry ]		; <i32> [#uses=3]
+	%tmp3.i.i = getelementptr %struct.CompAtom* %tmp1819, i32 %tmp9.0.reg2mem.0.rec, i32 0, i32 1		; <double*> [#uses=0]
+	%tmp5.i.i = getelementptr %struct.CompAtom* %tmp1819, i32 %tmp9.0.reg2mem.0.rec, i32 0, i32 2		; <double*> [#uses=1]
+	store double -9.999900e+04, double* %tmp5.i.i, align 4
+	%indvar.next = add i32 %tmp9.0.reg2mem.0.rec, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %n		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb35, label %bb24
+
+bb35:		; preds = %bb24, %entry
+	%tmp42 = sdiv i32 %i, 9		; <i32> [#uses=1]
+	%tmp43 = add i32 %tmp42, -1		; <i32> [#uses=1]
+	%tmp4344 = sitofp i32 %tmp43 to double		; <double> [#uses=1]
+	%tmp17.i76 = fmul double %tmp4344, 0.000000e+00		; <double> [#uses=1]
+	%tmp48 = sdiv i32 %i, 3		; <i32> [#uses=1]
+	%tmp49 = srem i32 %tmp48, 3		; <i32> [#uses=1]
+	%tmp50 = add i32 %tmp49, -1		; <i32> [#uses=1]
+	%tmp5051 = sitofp i32 %tmp50 to double		; <double> [#uses=1]
+	%tmp17.i63 = fmul double %tmp5051, 0.000000e+00		; <double> [#uses=1]
+	%tmp55 = srem i32 %i, 3		; <i32> [#uses=1]
+	%tmp56 = add i32 %tmp55, -1		; <i32> [#uses=1]
+	%tmp5657 = sitofp i32 %tmp56 to double		; <double> [#uses=1]
+	%tmp15.i49 = getelementptr %struct.Lattice* %this, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp16.i50 = load double* %tmp15.i49, align 4		; <double> [#uses=1]
+	%tmp17.i = fmul double %tmp5657, %tmp16.i50		; <double> [#uses=1]
+	%tmp20.i39 = fadd double %tmp17.i, %tmp17.i63		; <double> [#uses=1]
+	%tmp20.i23 = fadd double %tmp20.i39, %tmp17.i76		; <double> [#uses=1]
+	br i1 false, label %bb58.preheader, label %bb81
+
+bb58.preheader:		; preds = %bb35
+	%smax = select i1 false, i32 1, i32 %n		; <i32> [#uses=1]
+	br label %bb58
+
+bb58:		; preds = %bb58, %bb58.preheader
+	%tmp20.i7 = getelementptr %struct.CompAtom* %d, i32 0, i32 2		; <i32*> [#uses=2]
+	%tmp25.i = getelementptr %struct.CompAtom* %tmp1819, i32 0, i32 2		; <i32*> [#uses=2]
+	%tmp74.i = load i32* %tmp20.i7, align 1		; <i32> [#uses=1]
+	%tmp82.i = and i32 %tmp74.i, 134217728		; <i32> [#uses=1]
+	%tmp85.i = or i32 0, %tmp82.i		; <i32> [#uses=1]
+	store i32 %tmp85.i, i32* %tmp25.i, align 1
+	%tmp88.i = load i32* %tmp20.i7, align 1		; <i32> [#uses=1]
+	%tmp95.i = and i32 %tmp88.i, -268435456		; <i32> [#uses=1]
+	%tmp97.i = or i32 0, %tmp95.i		; <i32> [#uses=1]
+	store i32 %tmp97.i, i32* %tmp25.i, align 1
+	%tmp6.i = fadd double 0.000000e+00, %tmp20.i23		; <double> [#uses=0]
+	%exitcond96 = icmp eq i32 0, %smax		; <i1> [#uses=1]
+	br i1 %exitcond96, label %bb81, label %bb58
+
+bb81:		; preds = %bb58, %bb35
+	ret %struct.CompAtom* %tmp1819
+}
+
+declare i8* @_Znam(i32)
diff --git a/final/test/CodeGen/X86/2008-02-27-PEICrash.ll b/final/test/CodeGen/X86/2008-02-27-PEICrash.ll
new file mode 100644
index 00000000000..d842967561a
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-02-27-PEICrash.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define i64 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone  {
+entry:
+	br i1 false, label %bb56, label %bb33
+
+bb33:		; preds = %entry
+	br label %bb56
+
+bb56:		; preds = %bb33, %entry
+	%tmp36.pn = phi float [ 0.000000e+00, %bb33 ], [ 0.000000e+00, %entry ]		; <float> [#uses=1]
+	%b.pn509 = phi float [ %b, %bb33 ], [ %a, %entry ]		; <float> [#uses=1]
+	%a.pn = phi float [ %a, %bb33 ], [ %b, %entry ]		; <float> [#uses=1]
+	%tmp41.pn508 = phi float [ 0.000000e+00, %bb33 ], [ 0.000000e+00, %entry ]		; <float> [#uses=1]
+	%tmp51.pn = phi float [ 0.000000e+00, %bb33 ], [ %a, %entry ]		; <float> [#uses=1]
+	%tmp44.pn = fmul float %tmp36.pn, %b.pn509		; <float> [#uses=1]
+	%tmp46.pn = fadd float %tmp44.pn, %a.pn		; <float> [#uses=1]
+	%tmp53.pn = fsub float 0.000000e+00, %tmp51.pn		; <float> [#uses=1]
+	%x.0 = fdiv float %tmp46.pn, %tmp41.pn508		; <float> [#uses=1]
+	%y.0 = fdiv float %tmp53.pn, 0.000000e+00		; <float> [#uses=1]
+	br i1 false, label %bb433, label %bb98
+
+bb98:		; preds = %bb56
+	%tmp102 = fmul float 0.000000e+00, %a		; <float> [#uses=1]
+	%tmp106 = fmul float 0.000000e+00, %b		; <float> [#uses=1]
+	br label %bb433
+
+bb433:		; preds = %bb98, %bb56
+	%x.1 = phi float [ %tmp102, %bb98 ], [ %x.0, %bb56 ]		; <float> [#uses=0]
+	%y.1 = phi float [ %tmp106, %bb98 ], [ %y.0, %bb56 ]		; <float> [#uses=1]
+	%tmp460 = fadd float %y.1, 0.000000e+00		; <float> [#uses=0]
+	ret i64 0
+}
diff --git a/final/test/CodeGen/X86/2008-03-06-frem-fpstack.ll b/final/test/CodeGen/X86/2008-03-06-frem-fpstack.ll
new file mode 100644
index 00000000000..70a83b5c9f5
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-03-06-frem-fpstack.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mcpu=i386
+; PR2122
+define float @func(float %a, float %b) nounwind  {
+entry:
+        %tmp3 = frem float %a, %b               ; <float> [#uses=1]
+        ret float %tmp3
+}
diff --git a/final/test/CodeGen/X86/2008-03-07-APIntBug.ll b/final/test/CodeGen/X86/2008-03-07-APIntBug.ll
new file mode 100644
index 00000000000..84e4827d041
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-03-07-APIntBug.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -march=x86 -mcpu=i386 | not grep 255
+
+	%struct.CONSTRAINT = type { i32, i32, i32, i32 }
+	%struct.FIRST_UNION = type { %struct.anon }
+	%struct.FOURTH_UNION = type { %struct.CONSTRAINT }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { { i16, i8, i8 } }
+	%struct.THIRD_UNION = type { { [2 x i32], [2 x i32] } }
+	%struct.anon = type { i8, i8, i32 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, { %struct.rec* }, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.rec = type { %struct.head_type }
+	%struct.symbol_type = type <{ [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i16, i16, i8, i8, i8, i8 }>
+	%struct.word_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, [4 x i8] }
+
+define void @InsertSym_bb1163(%struct.rec** %s) {
+newFuncRoot:
+	br label %bb1163
+bb1233.exitStub:		; preds = %bb1163
+	ret void
+bb1163:		; preds = %newFuncRoot
+	%tmp1164 = load %struct.rec** %s, align 4		; <%struct.rec*> [#uses=1]
+	%tmp1165 = getelementptr %struct.rec* %tmp1164, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp11651166 = bitcast %struct.head_type* %tmp1165 to %struct.symbol_type*		; <%struct.symbol_type*> [#uses=1]
+	%tmp1167 = getelementptr %struct.symbol_type* %tmp11651166, i32 0, i32 3		; <%struct.rec**> [#uses=1]
+	%tmp1168 = load %struct.rec** %tmp1167, align 1		; <%struct.rec*> [#uses=2]
+	%tmp1169 = load %struct.rec** %s, align 4		; <%struct.rec*> [#uses=1]
+	%tmp1170 = getelementptr %struct.rec* %tmp1169, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp11701171 = bitcast %struct.head_type* %tmp1170 to %struct.symbol_type*		; <%struct.symbol_type*> [#uses=1]
+	%tmp1172 = getelementptr %struct.symbol_type* %tmp11701171, i32 0, i32 3		; <%struct.rec**> [#uses=1]
+	%tmp1173 = load %struct.rec** %tmp1172, align 1		; <%struct.rec*> [#uses=2]
+	%tmp1174 = getelementptr %struct.rec* %tmp1173, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp11741175 = bitcast %struct.head_type* %tmp1174 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
+	%tmp1176 = getelementptr %struct.word_type* %tmp11741175, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%tmp1177 = getelementptr %struct.SECOND_UNION* %tmp1176, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
+	%tmp11771178 = bitcast { i16, i8, i8 }* %tmp1177 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
+	%tmp1179 = getelementptr <{ i8, i8, i8, i8 }>* %tmp11771178, i32 0, i32 2		; <i8*> [#uses=2]
+	%mask1180 = and i8 1, 1		; <i8> [#uses=2]
+	%tmp1181 = load i8* %tmp1179, align 1		; <i8> [#uses=1]
+	%tmp1182 = shl i8 %mask1180, 7		; <i8> [#uses=1]
+	%tmp1183 = and i8 %tmp1181, 127		; <i8> [#uses=1]
+	%tmp1184 = or i8 %tmp1183, %tmp1182		; <i8> [#uses=1]
+	store i8 %tmp1184, i8* %tmp1179, align 1
+	%mask1185 = and i8 %mask1180, 1		; <i8> [#uses=0]
+	%tmp1186 = getelementptr %struct.rec* %tmp1173, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp11861187 = bitcast %struct.head_type* %tmp1186 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
+	%tmp1188 = getelementptr %struct.word_type* %tmp11861187, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%tmp1189 = getelementptr %struct.SECOND_UNION* %tmp1188, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
+	%tmp11891190 = bitcast { i16, i8, i8 }* %tmp1189 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
+	%tmp1191 = getelementptr <{ i8, i8, i8, i8 }>* %tmp11891190, i32 0, i32 2		; <i8*> [#uses=1]
+	%tmp1192 = load i8* %tmp1191, align 1		; <i8> [#uses=1]
+	%tmp1193 = lshr i8 %tmp1192, 7		; <i8> [#uses=1]
+	%mask1194 = and i8 %tmp1193, 1		; <i8> [#uses=2]
+	%mask1195 = and i8 %mask1194, 1		; <i8> [#uses=0]
+	%tmp1196 = getelementptr %struct.rec* %tmp1168, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp11961197 = bitcast %struct.head_type* %tmp1196 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
+	%tmp1198 = getelementptr %struct.word_type* %tmp11961197, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%tmp1199 = getelementptr %struct.SECOND_UNION* %tmp1198, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
+	%tmp11991200 = bitcast { i16, i8, i8 }* %tmp1199 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
+	%tmp1201 = getelementptr <{ i8, i8, i8, i8 }>* %tmp11991200, i32 0, i32 1		; <i8*> [#uses=2]
+	%mask1202 = and i8 %mask1194, 1		; <i8> [#uses=2]
+	%tmp1203 = load i8* %tmp1201, align 1		; <i8> [#uses=1]
+	%tmp1204 = shl i8 %mask1202, 1		; <i8> [#uses=1]
+	%tmp1205 = and i8 %tmp1204, 2		; <i8> [#uses=1]
+	%tmp1206 = and i8 %tmp1203, -3		; <i8> [#uses=1]
+	%tmp1207 = or i8 %tmp1206, %tmp1205		; <i8> [#uses=1]
+	store i8 %tmp1207, i8* %tmp1201, align 1
+	%mask1208 = and i8 %mask1202, 1		; <i8> [#uses=0]
+	%tmp1209 = getelementptr %struct.rec* %tmp1168, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp12091210 = bitcast %struct.head_type* %tmp1209 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
+	%tmp1211 = getelementptr %struct.word_type* %tmp12091210, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%tmp1212 = getelementptr %struct.SECOND_UNION* %tmp1211, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
+	%tmp12121213 = bitcast { i16, i8, i8 }* %tmp1212 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
+	%tmp1214 = getelementptr <{ i8, i8, i8, i8 }>* %tmp12121213, i32 0, i32 1		; <i8*> [#uses=1]
+	%tmp1215 = load i8* %tmp1214, align 1		; <i8> [#uses=1]
+	%tmp1216 = shl i8 %tmp1215, 6		; <i8> [#uses=1]
+	%tmp1217 = lshr i8 %tmp1216, 7		; <i8> [#uses=1]
+	%mask1218 = and i8 %tmp1217, 1		; <i8> [#uses=2]
+	%mask1219 = and i8 %mask1218, 1		; <i8> [#uses=0]
+	%tmp1220 = load %struct.rec** %s, align 4		; <%struct.rec*> [#uses=1]
+	%tmp1221 = getelementptr %struct.rec* %tmp1220, i32 0, i32 0		; <%struct.head_type*> [#uses=1]
+	%tmp12211222 = bitcast %struct.head_type* %tmp1221 to %struct.word_type*		; <%struct.word_type*> [#uses=1]
+	%tmp1223 = getelementptr %struct.word_type* %tmp12211222, i32 0, i32 2		; <%struct.SECOND_UNION*> [#uses=1]
+	%tmp1224 = getelementptr %struct.SECOND_UNION* %tmp1223, i32 0, i32 0		; <{ i16, i8, i8 }*> [#uses=1]
+	%tmp12241225 = bitcast { i16, i8, i8 }* %tmp1224 to <{ i8, i8, i8, i8 }>*		; <<{ i8, i8, i8, i8 }>*> [#uses=1]
+	%tmp1226 = getelementptr <{ i8, i8, i8, i8 }>* %tmp12241225, i32 0, i32 1		; <i8*> [#uses=2]
+	%mask1227 = and i8 %mask1218, 1		; <i8> [#uses=2]
+	%tmp1228 = load i8* %tmp1226, align 1		; <i8> [#uses=1]
+	%tmp1229 = and i8 %mask1227, 1		; <i8> [#uses=1]
+	%tmp1230 = and i8 %tmp1228, -2		; <i8> [#uses=1]
+	%tmp1231 = or i8 %tmp1230, %tmp1229		; <i8> [#uses=1]
+	store i8 %tmp1231, i8* %tmp1226, align 1
+	%mask1232 = and i8 %mask1227, 1		; <i8> [#uses=0]
+	br label %bb1233.exitStub
+}
diff --git a/final/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll b/final/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
new file mode 100644
index 00000000000..40aafb4c54d
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -relocation-model=pic -disable-fp-elim
+; PR2134
+
+declare fastcc i8* @w_addchar(i8*, i32*, i32*, i8 signext ) nounwind 
+
+define x86_stdcallcc i32 @parse_backslash(i8** inreg  %word, i32* inreg  %word_length, i32* inreg  %max_length) nounwind  {
+entry:
+	%tmp6 = load i8* null, align 1		; <i8> [#uses=1]
+	br label %bb13
+bb13:		; preds = %entry
+	%tmp26 = call fastcc i8* @w_addchar( i8* null, i32* %word_length, i32* %max_length, i8 signext  %tmp6 ) nounwind 		; <i8*> [#uses=1]
+	store i8* %tmp26, i8** %word, align 4
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll b/final/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
new file mode 100644
index 00000000000..e673d315a43
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-03-12-ThreadLocalAlias.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -relocation-model=pic | grep TLSGD | count 2
+; PR2137
+
+; ModuleID = '1.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+	%struct.__res_state = type { i32 }
+@__resp = thread_local global %struct.__res_state* @_res		; <%struct.__res_state**> [#uses=1]
+@_res = global %struct.__res_state zeroinitializer, section ".bss"		; <%struct.__res_state*> [#uses=1]
+
+@__libc_resp = hidden alias %struct.__res_state** @__resp		; <%struct.__res_state**> [#uses=2]
+
+define i32 @foo() {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = load %struct.__res_state** @__libc_resp, align 4		; <%struct.__res_state*> [#uses=1]
+	%tmp1 = getelementptr %struct.__res_state* %tmp, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp1, align 4
+	br label %return
+return:		; preds = %entry
+	%retval2 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval2
+}
+
+define i32 @bar() {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = load %struct.__res_state** @__libc_resp, align 4		; <%struct.__res_state*> [#uses=1]
+	%tmp1 = getelementptr %struct.__res_state* %tmp, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp1, align 4
+	br label %return
+return:		; preds = %entry
+	%retval2 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval2
+}
diff --git a/final/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll b/final/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
new file mode 100644
index 00000000000..c6ba22ea3da
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=x86
+
+define i16 @t(i32 %depth) signext nounwind  {
+entry:
+	br i1 false, label %bb74, label %bb
+bb:		; preds = %entry
+	ret i16 0
+bb74:		; preds = %entry
+	switch i32 0, label %bail [
+		 i32 17, label %bb84
+		 i32 18, label %bb81
+		 i32 33, label %bb80
+		 i32 34, label %bb84
+	]
+bb80:		; preds = %bb74
+	switch i32 %depth, label %bb103 [
+		 i32 16, label %bb96
+		 i32 32, label %bb91
+		 i32 846624121, label %bb96
+		 i32 1094862674, label %bb91
+		 i32 1096368963, label %bb91
+		 i32 1111970369, label %bb91
+		 i32 1278555445, label %bb96
+		 i32 1278555701, label %bb96
+		 i32 1380401729, label %bb91
+		 i32 1668118891, label %bb91
+		 i32 1916022840, label %bb91
+		 i32 1983131704, label %bb91
+		 i32 2037741171, label %bb96
+		 i32 2037741173, label %bb96
+	]
+bb81:		; preds = %bb74
+	ret i16 0
+bb84:		; preds = %bb74, %bb74
+	switch i32 %depth, label %bb103 [
+		 i32 16, label %bb96
+		 i32 32, label %bb91
+		 i32 846624121, label %bb96
+		 i32 1094862674, label %bb91
+		 i32 1096368963, label %bb91
+		 i32 1111970369, label %bb91
+		 i32 1278555445, label %bb96
+		 i32 1278555701, label %bb96
+		 i32 1380401729, label %bb91
+		 i32 1668118891, label %bb91
+		 i32 1916022840, label %bb91
+		 i32 1983131704, label %bb91
+		 i32 2037741171, label %bb96
+		 i32 2037741173, label %bb96
+	]
+bb91:		; preds = %bb84, %bb84, %bb84, %bb84, %bb84, %bb84, %bb84, %bb84, %bb80, %bb80, %bb80, %bb80, %bb80, %bb80, %bb80, %bb80
+	%wMB.0.reg2mem.0 = phi i16 [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 16, %bb80 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ], [ 0, %bb84 ]		; <i16> [#uses=2]
+	%tmp941478 = shl i16 %wMB.0.reg2mem.0, 2		; <i16> [#uses=1]
+	br label %bb103
+bb96:		; preds = %bb84, %bb84, %bb84, %bb84, %bb84, %bb84, %bb80, %bb80, %bb80, %bb80, %bb80, %bb80
+	ret i16 0
+bb103:		; preds = %bb91, %bb84, %bb80
+	%wMB.0.reg2mem.2 = phi i16 [ %wMB.0.reg2mem.0, %bb91 ], [ 16, %bb80 ], [ 0, %bb84 ]		; <i16> [#uses=1]
+	%bBump.0 = phi i16 [ %tmp941478, %bb91 ], [ 16, %bb80 ], [ 0, %bb84 ]		; <i16> [#uses=0]
+	br i1 false, label %bb164, label %UnifiedReturnBlock
+bb164:		; preds = %bb103
+	%tmp167168 = sext i16 %wMB.0.reg2mem.2 to i32		; <i32> [#uses=0]
+	ret i16 0
+bail:		; preds = %bb74
+	ret i16 0
+UnifiedReturnBlock:		; preds = %bb103
+	ret i16 0
+}
diff --git a/final/test/CodeGen/X86/2008-03-14-SpillerCrash.ll b/final/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
new file mode 100644
index 00000000000..8946415108f
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-03-14-SpillerCrash.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu
+; PR2138
+
+	%struct.__locale_struct = type { [13 x %struct.locale_data*], i16*, i32*, i32*, [13 x i8*] }
+	%struct.anon = type { i8* }
+	%struct.locale_data = type { i8*, i8*, i32, i32, { void (%struct.locale_data*)*, %struct.anon }, i32, i32, i32, [0 x %struct.locale_data_value] }
+	%struct.locale_data_value = type { i32* }
+
+@wcstoll_l = alias i64 (i32*, i32**, i32, %struct.__locale_struct*)* @__wcstoll_l		; <i64 (i32*, i32**, i32, %struct.__locale_struct*)*> [#uses=0]
+
+define i64 @____wcstoll_l_internal(i32* %nptr, i32** %endptr, i32 %base, i32 %group, %struct.__locale_struct* %loc) nounwind  {
+entry:
+	%tmp27 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp83 = getelementptr i32* %nptr, i32 1		; <i32*> [#uses=1]
+	%tmp233 = add i32 0, -48		; <i32> [#uses=1]
+	br label %bb271.us
+bb271.us:		; preds = %entry
+	br label %bb374.outer
+bb311.split:		; preds = %bb305.us
+	%tmp313 = add i32 %tmp378.us, -48		; <i32> [#uses=1]
+	br i1 false, label %bb374.outer, label %bb383
+bb327.split:		; preds = %bb314.us
+	ret i64 0
+bb374.outer:		; preds = %bb311.split, %bb271.us
+	%tmp370371552.pn.in = phi i32 [ %tmp233, %bb271.us ], [ %tmp313, %bb311.split ]		; <i32> [#uses=1]
+	%tmp278279.pn = phi i64 [ 0, %bb271.us ], [ %tmp373.reg2mem.0.ph, %bb311.split ]		; <i64> [#uses=1]
+	%s.5.ph = phi i32* [ null, %bb271.us ], [ %tmp376.us, %bb311.split ]		; <i32*> [#uses=1]
+	%tmp366367550.pn = sext i32 %base to i64		; <i64> [#uses=1]
+	%tmp370371552.pn = zext i32 %tmp370371552.pn.in to i64		; <i64> [#uses=1]
+	%tmp369551.pn = mul i64 %tmp278279.pn, %tmp366367550.pn		; <i64> [#uses=1]
+	%tmp373.reg2mem.0.ph = add i64 %tmp370371552.pn, %tmp369551.pn		; <i64> [#uses=1]
+	br label %bb374.us
+bb374.us:		; preds = %bb314.us, %bb374.outer
+	%tmp376.us = getelementptr i32* %s.5.ph, i32 0		; <i32*> [#uses=3]
+	%tmp378.us = load i32* %tmp376.us, align 4		; <i32> [#uses=2]
+	%tmp302.us = icmp eq i32* %tmp376.us, %tmp83		; <i1> [#uses=1]
+	%bothcond484.us = or i1 false, %tmp302.us		; <i1> [#uses=1]
+	br i1 %bothcond484.us, label %bb383, label %bb305.us
+bb305.us:		; preds = %bb374.us
+	br i1 false, label %bb311.split, label %bb314.us
+bb314.us:		; preds = %bb305.us
+	%tmp320.us = icmp eq i32 %tmp378.us, %tmp27		; <i1> [#uses=1]
+	br i1 %tmp320.us, label %bb374.us, label %bb327.split
+bb383:		; preds = %bb374.us, %bb311.split
+	ret i64 0
+}
+
+declare i64 @__wcstoll_l(i32*, i32**, i32, %struct.__locale_struct*) nounwind 
diff --git a/final/test/CodeGen/X86/2008-03-18-CoalescerBug.ll b/final/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
new file mode 100644
index 00000000000..3ae50261972
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-03-18-CoalescerBug.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep movss | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim -stats |& grep {Number of re-materialization} | grep 1
+
+	%struct..0objc_object = type opaque
+	%struct.OhBoy = type {  }
+	%struct.BooHoo = type { i32 }
+	%struct.objc_selector = type opaque
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (void (%struct.OhBoy*, %struct.objc_selector*, i32, %struct.BooHoo*)* @"-[MessageHeaderDisplay adjustFontSizeBy:viewingState:]" to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @"-[MessageHeaderDisplay adjustFontSizeBy:viewingState:]"(%struct.OhBoy* %self, %struct.objc_selector* %_cmd, i32 %delta, %struct.BooHoo* %viewingState) nounwind  {
+entry:
+	%tmp19 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp24 = tail call float bitcast (void (%struct..0objc_object*, ...)* @objc_msgSend_fpret to float (%struct..0objc_object*, %struct.objc_selector*)*)( %struct..0objc_object* null, %struct.objc_selector* null ) nounwind 		; <float> [#uses=2]
+	%tmp30 = icmp sgt i32 %delta, 0		; <i1> [#uses=1]
+	br i1 %tmp30, label %bb33, label %bb87.preheader
+bb33:		; preds = %entry
+	%tmp28 = fadd float 0.000000e+00, %tmp24		; <float> [#uses=1]
+	%tmp35 = fcmp ogt float %tmp28, 1.800000e+01		; <i1> [#uses=1]
+	br i1 %tmp35, label %bb38, label %bb87.preheader
+bb38:		; preds = %bb33
+	%tmp53 = add i32 %tmp19, %delta		; <i32> [#uses=2]
+	br label %bb43
+bb43:		; preds = %bb38
+	store i32 %tmp53, i32* null, align 4
+	ret void
+bb50:		; preds = %bb38
+	%tmp56 = fsub float 1.800000e+01, %tmp24		; <float> [#uses=1]
+	%tmp57 = fcmp ugt float 0.000000e+00, %tmp56		; <i1> [#uses=1]
+	br i1 %tmp57, label %bb64, label %bb87.preheader
+bb64:		; preds = %bb50
+	ret void
+bb87.preheader:		; preds = %bb50, %bb33, %entry
+	%usableDelta.0 = phi i32 [ %delta, %entry ], [ %delta, %bb33 ], [ %tmp53, %bb50 ]		; <i32> [#uses=1]
+	%tmp100 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* null, %struct.objc_selector* null, %struct..0objc_object* null ) nounwind 		; <%struct..0objc_object*> [#uses=2]
+	%tmp106 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp100, %struct.objc_selector* null ) nounwind 		; <%struct..0objc_object*> [#uses=0]
+	%umax = select i1 false, i32 1, i32 0		; <i32> [#uses=1]
+	br label %bb108
+bb108:		; preds = %bb108, %bb87.preheader
+	%attachmentIndex.0.reg2mem.0 = phi i32 [ 0, %bb87.preheader ], [ %indvar.next, %bb108 ]		; <i32> [#uses=2]
+	%tmp114 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp100, %struct.objc_selector* null, i32 %attachmentIndex.0.reg2mem.0 ) nounwind 		; <%struct..0objc_object*> [#uses=1]
+	%tmp121 = tail call %struct..0objc_object* (%struct..0objc_object*, %struct.objc_selector*, ...)* @objc_msgSend( %struct..0objc_object* %tmp114, %struct.objc_selector* null, i32 %usableDelta.0 ) nounwind 		; <%struct..0objc_object*> [#uses=0]
+	%indvar.next = add i32 %attachmentIndex.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb130, label %bb108
+bb130:		; preds = %bb108
+	ret void
+}
+
+declare %struct..0objc_object* @objc_msgSend(%struct..0objc_object*, %struct.objc_selector*, ...)
+
+declare void @objc_msgSend_fpret(%struct..0objc_object*, ...)
diff --git a/final/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll b/final/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll
new file mode 100644
index 00000000000..eaa883c963f
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-03-19-DAGCombinerBug.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86
+
+define i32 @t() nounwind  {
+entry:
+	%tmp54 = add i32 0, 1		; <i32> [#uses=1]
+	br i1 false, label %bb71, label %bb77
+bb71:		; preds = %entry
+	%tmp74 = shl i32 %tmp54, 1		; <i32> [#uses=1]
+	%tmp76 = ashr i32 %tmp74, 3		; <i32> [#uses=1]
+	br label %bb77
+bb77:		; preds = %bb71, %entry
+	%payLoadSize.0 = phi i32 [ %tmp76, %bb71 ], [ 0, %entry ]		; <i32> [#uses=0]
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll b/final/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
new file mode 100644
index 00000000000..4dc3a10f464
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-03-23-DarwinAsmComments.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose | grep {#} | not grep -v {##}
+
+	%struct.AGenericCall = type { %struct.AGenericManager*, %struct.ComponentParameters*, i32* }
+	%struct.AGenericManager = type <{ i8 }>
+	%struct.ComponentInstanceRecord = type opaque
+	%struct.ComponentParameters = type { [1 x i64] }
+
+define i32 @_ZN12AGenericCall10MapIDPtrAtEsRP23ComponentInstanceRecord(%struct.AGenericCall* %this, i16 signext  %param, %struct.ComponentInstanceRecord** %instance) {
+entry:
+	%tmp4 = icmp slt i16 %param, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %cond_true, label %cond_next
+
+cond_true:		; preds = %entry
+	%tmp1415 = shl i16 %param, 3		; <i16> [#uses=1]
+	%tmp17 = getelementptr %struct.AGenericCall* %this, i32 0, i32 1		; <%struct.ComponentParameters**> [#uses=1]
+	%tmp18 = load %struct.ComponentParameters** %tmp17, align 8		; <%struct.ComponentParameters*> [#uses=1]
+	%tmp1920 = bitcast %struct.ComponentParameters* %tmp18 to i8*		; <i8*> [#uses=1]
+	%tmp212223 = sext i16 %tmp1415 to i64		; <i64> [#uses=1]
+	%tmp24 = getelementptr i8* %tmp1920, i64 %tmp212223		; <i8*> [#uses=1]
+	%tmp2425 = bitcast i8* %tmp24 to i64*		; <i64*> [#uses=1]
+	%tmp28 = load i64* %tmp2425, align 8		; <i64> [#uses=1]
+	%tmp2829 = inttoptr i64 %tmp28 to i32*		; <i32*> [#uses=1]
+	%tmp31 = getelementptr %struct.AGenericCall* %this, i32 0, i32 2		; <i32**> [#uses=1]
+	store i32* %tmp2829, i32** %tmp31, align 8
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp4243 = shl i16 %param, 3		; <i16> [#uses=1]
+	%tmp46 = getelementptr %struct.AGenericCall* %this, i32 0, i32 1		; <%struct.ComponentParameters**> [#uses=1]
+	%tmp47 = load %struct.ComponentParameters** %tmp46, align 8		; <%struct.ComponentParameters*> [#uses=1]
+	%tmp4849 = bitcast %struct.ComponentParameters* %tmp47 to i8*		; <i8*> [#uses=1]
+	%tmp505152 = sext i16 %tmp4243 to i64		; <i64> [#uses=1]
+	%tmp53 = getelementptr i8* %tmp4849, i64 %tmp505152		; <i8*> [#uses=1]
+	%tmp5354 = bitcast i8* %tmp53 to i64*		; <i64*> [#uses=1]
+	%tmp58 = load i64* %tmp5354, align 8		; <i64> [#uses=1]
+	%tmp59 = icmp eq i64 %tmp58, 0		; <i1> [#uses=1]
+	br i1 %tmp59, label %UnifiedReturnBlock, label %cond_true63
+
+cond_true63:		; preds = %cond_next
+	%tmp65 = getelementptr %struct.AGenericCall* %this, i32 0, i32 0		; <%struct.AGenericManager**> [#uses=1]
+	%tmp66 = load %struct.AGenericManager** %tmp65, align 8		; <%struct.AGenericManager*> [#uses=1]
+	%tmp69 = tail call i32 @_ZN15AGenericManager24DefaultComponentInstanceERP23ComponentInstanceRecord( %struct.AGenericManager* %tmp66, %struct.ComponentInstanceRecord** %instance )		; <i32> [#uses=1]
+	ret i32 %tmp69
+
+UnifiedReturnBlock:		; preds = %cond_next
+	ret i32 undef
+}
+
+declare i32 @_ZN15AGenericManager24DefaultComponentInstanceERP23ComponentInstanceRecord(%struct.AGenericManager*, %struct.ComponentInstanceRecord**)
diff --git a/final/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll b/final/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
new file mode 100644
index 00000000000..2d868e0f612
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-03-25-TwoAddrPassBug.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define void @t() {
+entry:
+	%tmp455 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> < i32 1, i32 0, i32 3, i32 2 >		; <<4 x float>> [#uses=1]
+	%tmp457 = fmul <4 x float> zeroinitializer, %tmp455		; <<4 x float>> [#uses=2]
+	%tmp461 = shufflevector <4 x float> %tmp457, <4 x float> undef, <4 x i32> zeroinitializer		; <<4 x float>> [#uses=1]
+	%tmp465 = shufflevector <4 x float> %tmp457, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>> [#uses=1]
+	%tmp466 = fsub <4 x float> %tmp461, %tmp465		; <<4 x float>> [#uses=1]
+	%tmp536 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp466, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
+	%tmp542 = shufflevector <4 x float> %tmp536, <4 x float> zeroinitializer, <4 x i32> < i32 6, i32 7, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
+	%tmp580 = bitcast <4 x float> %tmp542 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp582 = and <4 x i32> %tmp580, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp591 = or <4 x i32> %tmp582, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp592 = bitcast <4 x i32> %tmp591 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp609 = fdiv <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, %tmp592		; <<4 x float>> [#uses=1]
+	%tmp652 = shufflevector <4 x float> %tmp609, <4 x float> undef, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >		; <<4 x float>> [#uses=1]
+	%tmp662 = fmul <4 x float> zeroinitializer, %tmp652		; <<4 x float>> [#uses=1]
+	%tmp678 = shufflevector <4 x float> %tmp662, <4 x float> undef, <4 x i32> < i32 1, i32 1, i32 1, i32 1 >		; <<4 x float>> [#uses=1]
+	%tmp753 = fmul <4 x float> zeroinitializer, %tmp678		; <<4 x float>> [#uses=1]
+	%tmp754 = fsub <4 x float> zeroinitializer, %tmp753		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp754, <4 x float>* null, align 16
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll b/final/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
new file mode 100644
index 00000000000..305968ac377
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-03-31-SpillerFoldingBug.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim | grep add | grep 12 | not grep non_lazy_ptr
+; Don't fold re-materialized load into a two address instruction
+
+	%"struct.Smarts::Runnable" = type { i32 (...)**, i32 }
+	%struct.__sbuf = type { i8*, i32 }
+	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %struct.__sbuf, [8 x %struct.__sbuf], i32, %struct.__sbuf*, %"struct.std::locale" }
+	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.Smarts::Runnable"**, i32, %"struct.Smarts::Runnable"**, i8** }
+@_ZTVSt9basic_iosIcSt11char_traitsIcEE = external constant [4 x i32 (...)*]		; <[4 x i32 (...)*]*> [#uses=1]
+@_ZTTSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE = external constant [4 x i8*]		; <[4 x i8*]*> [#uses=1]
+@_ZTVSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE = external constant [10 x i32 (...)*]		; <[10 x i32 (...)*]*> [#uses=2]
+@_ZTVSt15basic_streambufIcSt11char_traitsIcEE = external constant [16 x i32 (...)*]		; <[16 x i32 (...)*]*> [#uses=1]
+@_ZTVSt15basic_stringbufIcSt11char_traitsIcESaIcEE = external constant [16 x i32 (...)*]		; <[16 x i32 (...)*]*> [#uses=1]
+
+define void @_GLOBAL__I__ZN5Pooma5pinfoE() nounwind  {
+entry:
+	store i32 (...)** getelementptr ([10 x i32 (...)*]* @_ZTVSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i32 8), i32 (...)*** null, align 4
+	%tmp96.i.i142.i = call i8* @_Znwm( i32 180 ) nounwind 		; <i8*> [#uses=2]
+	call void @_ZNSt8ios_baseC2Ev( %"struct.std::ios_base"* null ) nounwind 
+	store i32 (...)** getelementptr ([4 x i32 (...)*]* @_ZTVSt9basic_iosIcSt11char_traitsIcEE, i32 0, i32 2), i32 (...)*** null, align 4
+	store i32 (...)** null, i32 (...)*** null, align 4
+	%ctg2242.i.i163.i = getelementptr i8* %tmp96.i.i142.i, i32 0		; <i8*> [#uses=1]
+	%tmp150.i.i164.i = load i8** getelementptr ([4 x i8*]* @_ZTTSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i64 2), align 4		; <i8*> [#uses=1]
+	%tmp150151.i.i165.i = bitcast i8* %tmp150.i.i164.i to i32 (...)**		; <i32 (...)**> [#uses=1]
+	%tmp153.i.i166.i = bitcast i8* %ctg2242.i.i163.i to i32 (...)***		; <i32 (...)***> [#uses=1]
+	store i32 (...)** %tmp150151.i.i165.i, i32 (...)*** %tmp153.i.i166.i, align 4
+	%tmp159.i.i167.i = bitcast i8* %tmp96.i.i142.i to i32 (...)***		; <i32 (...)***> [#uses=1]
+	store i32 (...)** getelementptr ([10 x i32 (...)*]* @_ZTVSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE, i32 0, i32 3), i32 (...)*** %tmp159.i.i167.i, align 4
+	store i32 (...)** getelementptr ([16 x i32 (...)*]* @_ZTVSt15basic_streambufIcSt11char_traitsIcEE, i32 0, i32 2), i32 (...)*** null, align 4
+	call void @_ZNSt6localeC1Ev( %"struct.std::locale"* null ) nounwind 
+	store i32 (...)** getelementptr ([16 x i32 (...)*]* @_ZTVSt15basic_stringbufIcSt11char_traitsIcESaIcEE, i32 0, i32 2), i32 (...)*** null, align 4
+	unreachable
+}
+
+declare i8* @_Znwm(i32)
+
+declare void @_ZNSt8ios_baseC2Ev(%"struct.std::ios_base"*)
+
+declare void @_ZNSt6localeC1Ev(%"struct.std::locale"*) nounwind 
diff --git a/final/test/CodeGen/X86/2008-04-02-unnamedEH.ll b/final/test/CodeGen/X86/2008-04-02-unnamedEH.ll
new file mode 100644
index 00000000000..27bbbaa2962
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-04-02-unnamedEH.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @_Z3bazv() {
+	call void @0( )		; <i32>:1 [#uses=0]
+	ret void
+}
+
+define internal void @""() {
+	call i32 @_Z3barv( )		; <i32>:4 [#uses=1]
+	ret void
+}
+; CHECK: unnamed_1.eh
+
+declare i32 @_Z3barv()
diff --git a/final/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll b/final/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
new file mode 100644
index 00000000000..5089e8c5b69
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-04-08-CoalescerCrash.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx
+
+define i32 @t2() nounwind  {
+entry:
+	tail call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{di},~{si},~{dx},~{cx},~{ax}"( ) nounwind 
+	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	tail call void asm sideeffect ".line 8", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	%tmp1 = tail call x86_mmx asm sideeffect "movd $1, $0", "=={mm4},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( i32 undef ) nounwind 		; <x86_mmx> [#uses=1]
+	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	tail call void asm sideeffect ".line 9", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	%tmp3 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm3},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx undef ) nounwind 		; <i32> [#uses=1]
+	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	tail call void asm sideeffect ".line 10", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	tail call void asm sideeffect "movntq $0, 0($1,$2)", "{mm0},{di},{bp},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx undef, i32 undef, i32 %tmp3 ) nounwind 
+	tail call void asm sideeffect ".file \224443946.c\22", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	tail call void asm sideeffect ".line 11", "~{dirflag},~{fpsr},~{flags}"( ) nounwind 
+	%tmp8 = tail call i32 asm sideeffect "movd $1, $0", "=={bp},{mm4},~{dirflag},~{fpsr},~{flags},~{memory}"( x86_mmx %tmp1 ) nounwind 		; <i32> [#uses=0]
+	ret i32 undef
+}
diff --git a/final/test/CodeGen/X86/2008-04-09-BranchFolding.ll b/final/test/CodeGen/X86/2008-04-09-BranchFolding.ll
new file mode 100644
index 00000000000..f4b2d719ae1
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-04-09-BranchFolding.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86 | not grep jmp
+
+	%struct..0anon = type { i32 }
+	%struct.binding_level = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.binding_level*, i8, i8, i8, i8, i8, i32, %struct.tree_node* }
+	%struct.lang_decl = type opaque
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
+	%struct.tree_decl = type { [12 x i8], i8*, i32, %struct.tree_node*, i32, i8, i8, i8, i8, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct..0anon, { %struct.rtx_def* }, %struct.tree_node*, %struct.lang_decl* }
+	%struct.tree_node = type { %struct.tree_decl }
+
+define fastcc %struct.tree_node* @pushdecl(%struct.tree_node* %x) nounwind  {
+entry:
+	%tmp3.i40 = icmp eq %struct.binding_level* null, null		; <i1> [#uses=2]
+	br label %bb140
+bb140:		; preds = %entry
+	br i1 %tmp3.i40, label %bb160, label %bb17.i
+bb17.i:		; preds = %bb140
+	ret %struct.tree_node* null
+bb143:		; preds = %entry
+	%tmp8.i43 = load %struct.tree_node** null, align 4		; <%struct.tree_node*> [#uses=1]
+	br i1 %tmp3.i40, label %bb160, label %bb9.i48
+bb9.i48:		; preds = %bb143
+	ret %struct.tree_node* null
+bb160:		; preds = %bb143, %bb140
+	%t.0.reg2mem.0 = phi %struct.tree_node* [ null, %bb140 ], [ %tmp8.i43, %bb143 ]		; <%struct.tree_node*> [#uses=1]
+	%tmp162 = icmp eq %struct.tree_node* %t.0.reg2mem.0, null		; <i1> [#uses=2]
+	br i1 %tmp162, label %bb174, label %bb165
+bb165:		; preds = %bb160
+	br label %bb174
+bb174:		; preds = %bb165, %bb160
+	%line.0 = phi i32 [ 0, %bb165 ], [ undef, %bb160 ]		; <i32> [#uses=1]
+	%file.0 = phi i8* [ null, %bb165 ], [ undef, %bb160 ]		; <i8*> [#uses=1]
+	br i1 %tmp162, label %bb344, label %bb73.i
+bb73.i:		; preds = %bb174
+	br i1 false, label %bb226.i, label %bb220.i
+bb220.i:		; preds = %bb73.i
+	ret %struct.tree_node* null
+bb226.i:		; preds = %bb73.i
+	br i1 false, label %bb260, label %bb273.i
+bb273.i:		; preds = %bb226.i
+	ret %struct.tree_node* null
+bb260:		; preds = %bb226.i
+	tail call void (i8*, i32, ...)* @pedwarn_with_file_and_line( i8* %file.0, i32 %line.0, i8* null ) nounwind 
+	ret %struct.tree_node* null
+bb344:		; preds = %bb174
+	ret %struct.tree_node* null
+}
+
+declare void @pedwarn_with_file_and_line(i8*, i32, ...) nounwind 
diff --git a/final/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll b/final/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
new file mode 100644
index 00000000000..0742371dc9b
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-04-15-LiveVariableBug.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim -O0 -regalloc=fast
+; PR5534
+
+	%struct.CGPoint = type { double, double }
+	%struct.NSArray = type { %struct.NSObject }
+	%struct.NSAssertionHandler = type { %struct.NSObject, i8* }
+	%struct.NSDockTile = type { %struct.NSObject, %struct.NSObject*, i8*, %struct.NSView*, %struct.NSView*, %struct.NSView*, %struct.NSArray*, %struct._SPFlags, %struct.CGPoint, [5 x %struct.NSObject*] }
+	%struct.NSDocument = type { %struct.NSObject, %struct.NSWindow*, %struct.NSObject*, %struct.NSURL*, %struct.NSArray*, %struct.NSPrintInfo*, i64, %struct.NSView*, %struct.NSObject*, %struct.NSObject*, %struct.NSUndoManager*, %struct._BCFlags2, %struct.NSArray* }
+	%struct.AA = type { %struct.NSObject, %struct.NSDocument*, %struct.NSURL*, %struct.NSArray*, %struct.NSArray* }
+	%struct.NSError = type { %struct.NSObject, i8*, i64, %struct.NSArray*, %struct.NSArray* }
+	%struct.NSImage = type { %struct.NSObject, %struct.NSArray*, %struct.CGPoint, %struct._BCFlags2, %struct.NSObject*, %struct._NSImageAuxiliary* }
+	%struct.NSMutableArray = type { %struct.NSArray }
+	%struct.NSObject = type { %struct.NSObject* }
+	%struct.NSPrintInfo = type { %struct.NSObject, %struct.NSMutableArray*, %struct.NSObject* }
+	%struct.NSRect = type { %struct.CGPoint, %struct.CGPoint }
+	%struct.NSRegion = type opaque
+	%struct.NSResponder = type { %struct.NSObject, %struct.NSObject* }
+	%struct.NSToolbar = type { %struct.NSObject, %struct.NSArray*, %struct.NSMutableArray*, %struct.NSMutableArray*, %struct.NSArray*, %struct.NSObject*, %struct.NSArray*, i8*, %struct.NSObject*, %struct.NSWindow*, %struct.NSObject*, %struct.NSObject*, i64, %struct._BCFlags2, i64, %struct.NSObject* }
+	%struct.NSURL = type { %struct.NSObject, %struct.NSArray*, %struct.NSURL*, i8*, i8* }
+	%struct.NSUndoManager = type { %struct.NSObject, %struct.NSObject*, %struct.NSObject*, %struct.NSArray*, i64, %struct._SPFlags, %struct.NSObject*, i8*, i8*, i8* }
+	%struct.NSView = type { %struct.NSResponder, %struct.NSRect, %struct.NSRect, %struct.NSObject*, %struct.NSObject*, %struct.NSWindow*, %struct.NSObject*, %struct.NSObject*, %struct.NSObject*, %struct.NSObject*, %struct._NSViewAuxiliary*, %struct._BCFlags, %struct._SPFlags }
+	%struct.NSWindow = type { %struct.NSResponder, %struct.NSRect, %struct.NSObject*, %struct.NSObject*, %struct.NSResponder*, %struct.NSView*, %struct.NSView*, %struct.NSObject*, %struct.NSObject*, i32, i64, i32, %struct.NSArray*, %struct.NSObject*, i8, i8, i8, i8, i8*, i8*, %struct.NSImage*, i32, %struct.NSMutableArray*, %struct.NSURL*, %struct.CGPoint*, %struct.NSArray*, %struct.NSArray*, %struct.__wFlags, %struct.NSObject*, %struct.NSView*, %struct.NSWindowAuxiliary* }
+	%struct.NSWindowAuxiliary = type { %struct.NSObject, %struct.NSArray*, %struct.NSDockTile*, %struct._NSWindowAnimator*, %struct.NSRect, i32, %struct.NSAssertionHandler*, %struct.NSUndoManager*, %struct.NSWindowController*, %struct.NSAssertionHandler*, %struct.NSObject*, i32, %struct.__CFRunLoopObserver*, %struct.__CFRunLoopObserver*, %struct.NSArray*, %struct.NSArray*, %struct.NSView*, %struct.NSRegion*, %struct.NSWindow*, %struct.NSWindow*, %struct.NSArray*, %struct.NSMutableArray*, %struct.NSArray*, %struct.NSWindow*, %struct.CGPoint, %struct.NSObject*, i8*, i8*, i32, %struct.NSObject*, %struct.NSArray*, double, %struct.CGPoint, %struct.NSArray*, %struct.NSMutableArray*, %struct.NSMutableArray*, %struct.NSWindow*, %struct.NSView*, %struct.NSArray*, %struct.__auxWFlags, i32, i8*, double, %struct.NSObject*, %struct.NSObject*, %struct.__CFArray*, %struct.NSRegion*, %struct.NSArray*, %struct.NSRect, %struct.NSToolbar*, %struct.NSRect, %struct.NSMutableArray* }
+	%struct.NSWindowController = type { %struct.NSResponder, %struct.NSWindow*, %struct.NSArray*, %struct.NSDocument*, %struct.NSArray*, %struct.NSObject*, %struct._SPFlags, %struct.NSArray*, %struct.NSObject* }
+	%struct._BCFlags = type <{ i8, i8, i8, i8 }>
+	%struct._BCFlags2 = type <{ i8, [3 x i8] }>
+	%struct._NSImageAuxiliary = type opaque
+	%struct._NSViewAuxiliary = type opaque
+	%struct._NSWindowAnimator = type opaque
+	%struct._SPFlags = type <{ i32 }>
+	%struct.__CFArray = type opaque
+	%struct.__CFRunLoopObserver = type opaque
+	%struct.__auxWFlags = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16 }
+	%struct.__wFlags = type <{ i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%struct._message_ref_t = type { %struct.NSObject* (%struct.NSObject*, %struct._message_ref_t*, ...)*, %struct.objc_selector* }
+	%struct.objc_selector = type opaque
+@"\01L_OBJC_MESSAGE_REF_228" = internal global %struct._message_ref_t zeroinitializer		; <%struct._message_ref_t*> [#uses=1]
+@llvm.used1 = appending global [1 x i8*] [ i8* bitcast (void (%struct.AA*, %struct._message_ref_t*, %struct.NSError*, i64, %struct.NSObject*, %struct.objc_selector*, i8*)* @"-[AA BB:optionIndex:delegate:CC:contextInfo:]" to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @"-[AA BB:optionIndex:delegate:CC:contextInfo:]"(%struct.AA* %self, %struct._message_ref_t* %_cmd, %struct.NSError* %inError, i64 %inOptionIndex, %struct.NSObject* %inDelegate, %struct.objc_selector* %inDidRecoverSelector, i8* %inContextInfo) {
+entry:
+	%tmp105 = load %struct.NSArray** null, align 8		; <%struct.NSArray*> [#uses=1]
+	%tmp107 = load %struct.NSObject** null, align 8		; <%struct.NSObject*> [#uses=1]
+	call void null( %struct.NSObject* %tmp107, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_228", %struct.NSArray* %tmp105, i8 signext  0 )
+	%tmp111 = call %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)* @objc_msgSend( %struct.NSObject* null, %struct.objc_selector* null, i32 0, i8* null )		; <%struct.NSObject*> [#uses=0]
+	ret void
+}
+
+declare %struct.NSObject* @objc_msgSend(%struct.NSObject*, %struct.objc_selector*, ...)
diff --git a/final/test/CodeGen/X86/2008-04-16-CoalescerBug.ll b/final/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
new file mode 100644
index 00000000000..3ccc0fe1634
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-04-16-CoalescerBug.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86
+
+define void @Hubba(i8* %saveunder, i32 %firstBlob, i32 %select) nounwind  {
+entry:
+	br i1 false, label %bb53.us, label %bb53
+bb53.us:		; preds = %bb94.us, %bb53.us, %entry
+	switch i8 1, label %bb71.us [
+		 i8 0, label %bb53.us
+		 i8 1, label %bb94.us
+	]
+bb94.us:		; preds = %bb71.us, %bb53.us
+	%result.0.us = phi i32 [ %tmp93.us, %bb71.us ], [ 0, %bb53.us ]		; <i32> [#uses=2]
+	%tmp101.us = lshr i32 %result.0.us, 3		; <i32> [#uses=1]
+	%result.0163.us = trunc i32 %result.0.us to i16		; <i16> [#uses=2]
+	shl i16 %result.0163.us, 7		; <i16>:0 [#uses=1]
+	%tmp106.us = and i16 %0, -1024		; <i16> [#uses=1]
+	shl i16 %result.0163.us, 2		; <i16>:1 [#uses=1]
+	%tmp109.us = and i16 %1, -32		; <i16> [#uses=1]
+	%tmp111112.us = trunc i32 %tmp101.us to i16		; <i16> [#uses=1]
+	%tmp110.us = or i16 %tmp109.us, %tmp111112.us		; <i16> [#uses=1]
+	%tmp113.us = or i16 %tmp110.us, %tmp106.us		; <i16> [#uses=1]
+	store i16 %tmp113.us, i16* null, align 2
+	br label %bb53.us
+bb71.us:		; preds = %bb53.us
+	%tmp80.us = load i8* null, align 1		; <i8> [#uses=1]
+	%tmp8081.us = zext i8 %tmp80.us to i32		; <i32> [#uses=1]
+	%tmp87.us = mul i32 %tmp8081.us, 0		; <i32> [#uses=1]
+	%tmp92.us = add i32 0, %tmp87.us		; <i32> [#uses=1]
+	%tmp93.us = udiv i32 %tmp92.us, 255		; <i32> [#uses=1]
+	br label %bb94.us
+bb53:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2008-04-16-ReMatBug.ll b/final/test/CodeGen/X86/2008-04-16-ReMatBug.ll
new file mode 100644
index 00000000000..6e8891bfd5b
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-04-16-ReMatBug.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep movw | not grep {, %e}
+
+	%struct.DBC_t = type { i32, i8*, i16, %struct.DBC_t*, i8*, i8*, i8*, i8*, i8*, %struct.DBC_t*, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16, i32*, i8, i16, %struct.DRVOPT*, i16 }
+	%struct.DRVOPT = type { i16, i32, i8, %struct.DRVOPT* }
+	%struct.GENV_t = type { i32, i8*, i16, i8*, i8*, i32, i32, i32, i32, %struct.DBC_t*, i16 }
+	%struct.pthread_mutex_t = type { i32, [40 x i8] }
+@iodbcdm_global_lock = external global %struct.pthread_mutex_t		; <%struct.pthread_mutex_t*> [#uses=1]
+
+define i16 @SQLDriversW(i8* %henv, i16 zeroext  %fDir, i32* %szDrvDesc, i16 signext  %cbDrvDescMax, i16* %pcbDrvDesc, i32* %szDrvAttr, i16 signext  %cbDrvAttrMax, i16* %pcbDrvAttr) signext nounwind  {
+entry:
+	%tmp12 = bitcast i8* %henv to %struct.GENV_t*		; <%struct.GENV_t*> [#uses=1]
+	br i1 true, label %bb28, label %bb
+bb:		; preds = %entry
+	ret i16 0
+bb28:		; preds = %entry
+	br i1 false, label %bb37, label %done
+bb37:		; preds = %bb28
+	%tmp46 = getelementptr %struct.GENV_t* %tmp12, i32 0, i32 10		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp46, align 4
+	br i1 false, label %bb74, label %bb92
+bb74:		; preds = %bb37
+	br label %bb92
+bb92:		; preds = %bb74, %bb37
+	%tmp95180 = shl i16 %cbDrvAttrMax, 2		; <i16> [#uses=1]
+	%tmp100178 = shl i16 %cbDrvDescMax, 2		; <i16> [#uses=1]
+	%tmp113 = tail call i16 @SQLDrivers_Internal( i8* %henv, i16 zeroext  %fDir, i8* null, i16 signext  %tmp100178, i16* %pcbDrvDesc, i8* null, i16 signext  %tmp95180, i16* %pcbDrvAttr, i8 zeroext  87 ) signext nounwind 		; <i16> [#uses=1]
+	br i1 false, label %done, label %bb137
+bb137:		; preds = %bb92
+	ret i16 0
+done:		; preds = %bb92, %bb28
+	%retcode.0 = phi i16 [ -2, %bb28 ], [ %tmp113, %bb92 ]		; <i16> [#uses=2]
+	br i1 false, label %bb167, label %bb150
+bb150:		; preds = %done
+	%tmp157158 = sext i16 %retcode.0 to i32		; <i32> [#uses=1]
+	tail call void @trace_SQLDriversW( i32 1, i32 %tmp157158, i8* %henv, i16 zeroext  %fDir, i32* %szDrvDesc, i16 signext  %cbDrvDescMax, i16* %pcbDrvDesc, i32* %szDrvAttr, i16 signext  %cbDrvAttrMax, i16* %pcbDrvAttr ) nounwind 
+	ret i16 0
+bb167:		; preds = %done
+	%tmp168 = tail call i32 @pthread_mutex_unlock( %struct.pthread_mutex_t* @iodbcdm_global_lock ) nounwind 		; <i32> [#uses=0]
+	ret i16 %retcode.0
+}
+
+declare i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
+
+declare i16 @SQLDrivers_Internal(i8*, i16 zeroext , i8*, i16 signext , i16*, i8*, i16 signext , i16*, i8 zeroext ) signext nounwind 
+
+declare void @trace_SQLDriversW(i32, i32, i8*, i16 zeroext , i32*, i16 signext , i16*, i32*, i16 signext , i16*)
diff --git a/final/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/final/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
new file mode 100644
index 00000000000..ac482850b83
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
@@ -0,0 +1,171 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep xorl | grep {%e}
+; Make sure xorl operands are 32-bit registers.
+
+	%struct.tm = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* }
+	%struct.wxDateTime = type { %struct.wxLongLong }
+	%"struct.wxDateTime::TimeZone" = type { i32 }
+	%struct.wxLongLong = type { i64 }
+	%struct.wxString = type { %struct.wxStringBase }
+	%struct.wxStringBase = type { i32* }
+@.str = external constant [27 x i32]		; <[27 x i32]*> [#uses=1]
+@.str4 = external constant [14 x i32]		; <[14 x i32]*> [#uses=1]
+@_ZZNK10wxDateTime5GetTmERKNS_8TimeZoneEE12__FUNCTION__ = external constant [6 x i8]		; <[6 x i8]*> [#uses=1]
+@.str33 = external constant [29 x i32]		; <[29 x i32]*> [#uses=1]
+@.str89 = external constant [5 x i32]		; <[5 x i32]*> [#uses=1]
+
+define void @_ZNK10wxDateTime6FormatEPKwRKNS_8TimeZoneE(%struct.wxString* noalias sret  %agg.result, %struct.wxDateTime* %this, i32* %format, %"struct.wxDateTime::TimeZone"* %tz, i1 %foo) {
+entry:
+	br i1 %foo, label %bb116.i, label %bb115.critedge.i
+bb115.critedge.i:		; preds = %entry
+	ret void
+bb116.i:		; preds = %entry
+	br i1 %foo, label %bb52.i.i, label %bb3118
+bb3118:		; preds = %bb116.i
+	ret void
+bb52.i.i:		; preds = %bb116.i
+	br i1 %foo, label %bb142.i, label %bb115.critedge.i.i
+bb115.critedge.i.i:		; preds = %bb52.i.i
+	ret void
+bb142.i:		; preds = %bb52.i.i
+	br i1 %foo, label %bb161.i, label %bb182.i
+bb161.i:		; preds = %bb142.i
+	br label %bb3261
+bb182.i:		; preds = %bb142.i
+	ret void
+bb3261:		; preds = %bb7834, %bb161.i
+	%tmp3263 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp3264 = icmp eq i32 %tmp3263, 37		; <i1> [#uses=1]
+	br i1 %tmp3264, label %bb3306, label %bb3267
+bb3267:		; preds = %bb3261
+	ret void
+bb3306:		; preds = %bb3261
+	%tmp3310 = invoke %struct.wxStringBase* @_ZN12wxStringBaseaSEPKw( %struct.wxStringBase* null, i32* getelementptr ([5 x i32]* @.str89, i32 0, i32 0) )
+			to label %bb3314 unwind label %lpad		; <%struct.wxStringBase*> [#uses=0]
+bb3314:		; preds = %bb3306
+	%tmp3316 = load i32* null, align 4		; <i32> [#uses=1]
+	switch i32 %tmp3316, label %bb7595 [
+		 i32 0, label %bb7819
+		 i32 37, label %bb7806
+		 i32 66, label %bb3477
+		 i32 72, label %bb5334
+		 i32 73, label %bb5484
+		 i32 77, label %bb6118
+		 i32 83, label %bb6406
+		 i32 85, label %bb6556
+		 i32 87, label %bb6708
+		 i32 89, label %bb7308
+		 i32 98, label %bb3477
+		 i32 99, label %bb3626
+		 i32 100, label %bb5184
+		 i32 106, label %bb5657
+		 i32 108, label %bb5809
+		 i32 109, label %bb5968
+		 i32 119, label %bb6860
+		 i32 120, label %bb3626
+		 i32 121, label %bb7158
+	]
+bb3477:		; preds = %bb3314, %bb3314
+	ret void
+bb3626:		; preds = %bb3314, %bb3314
+	ret void
+bb5184:		; preds = %bb3314
+	ret void
+bb5334:		; preds = %bb3314
+	ret void
+bb5484:		; preds = %bb3314
+	ret void
+bb5657:		; preds = %bb3314
+	%tmp5661 = invoke i16 @_ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE( %struct.wxDateTime* %this, %"struct.wxDateTime::TimeZone"* %tz ) zeroext 
+			to label %invcont5660 unwind label %lpad		; <i16> [#uses=0]
+invcont5660:		; preds = %bb5657
+	ret void
+bb5809:		; preds = %bb3314
+	%tmp61.i.i8486 = icmp sgt i64 0, -1		; <i1> [#uses=1]
+	%tmp95.i.i8490 = icmp slt i64 0, 2147483647000		; <i1> [#uses=1]
+	%bothcond9308 = and i1 %tmp61.i.i8486, %tmp95.i.i8490		; <i1> [#uses=1]
+	br i1 %bothcond9308, label %bb91.i8504, label %bb115.critedge.i.i8492
+bb115.critedge.i.i8492:		; preds = %bb5809
+	ret void
+bb91.i8504:		; preds = %bb5809
+	br i1 %foo, label %bb155.i8541, label %bb182.i8560
+bb155.i8541:		; preds = %bb91.i8504
+	%tmp156.i85398700 = invoke %struct.tm* @gmtime_r( i32* null, %struct.tm* null )
+			to label %bb182.i8560 unwind label %lpad		; <%struct.tm*> [#uses=1]
+bb182.i8560:		; preds = %bb155.i8541, %bb91.i8504
+	%tm48.0.i8558 = phi %struct.tm* [ null, %bb91.i8504 ], [ %tmp156.i85398700, %bb155.i8541 ]		; <%struct.tm*> [#uses=0]
+	br i1 %foo, label %bb278.i8617, label %bb187.i8591
+bb187.i8591:		; preds = %bb182.i8560
+	%tmp245.i8588 = srem i64 0, 86400000		; <i64> [#uses=1]
+	br i1 %foo, label %bb264.i8592, label %bb265.i8606
+bb264.i8592:		; preds = %bb187.i8591
+	ret void
+bb265.i8606:		; preds = %bb187.i8591
+	%tmp268269.i8593 = trunc i64 %tmp245.i8588 to i32		; <i32> [#uses=1]
+	%tmp273.i8594 = srem i32 %tmp268269.i8593, 1000		; <i32> [#uses=1]
+	%tmp273274.i8595 = trunc i32 %tmp273.i8594 to i16		; <i16> [#uses=1]
+	br label %invcont5814
+bb278.i8617:		; preds = %bb182.i8560
+	%timeOnly50.0.i8622 = add i32 0, 0		; <i32> [#uses=1]
+	br i1 %foo, label %bb440.i8663, label %bb448.i8694
+bb440.i8663:		; preds = %bb278.i8617
+	invoke void @_Z10wxOnAssertPKwiPKcS0_S0_( i32* getelementptr ([27 x i32]* @.str, i32 0, i32 0), i32 1717, i8* getelementptr ([6 x i8]* @_ZZNK10wxDateTime5GetTmERKNS_8TimeZoneEE12__FUNCTION__, i32 0, i32 0), i32* getelementptr ([29 x i32]* @.str33, i32 0, i32 0), i32* getelementptr ([14 x i32]* @.str4, i32 0, i32 0) )
+			to label %bb448.i8694 unwind label %lpad
+bb448.i8694:		; preds = %bb440.i8663, %bb278.i8617
+	%tmp477.i8669 = srem i32 %timeOnly50.0.i8622, 1000		; <i32> [#uses=1]
+	%tmp477478.i8670 = trunc i32 %tmp477.i8669 to i16		; <i16> [#uses=1]
+	br label %invcont5814
+invcont5814:		; preds = %bb448.i8694, %bb265.i8606
+	%tmp812.0.0 = phi i16 [ %tmp477478.i8670, %bb448.i8694 ], [ %tmp273274.i8595, %bb265.i8606 ]		; <i16> [#uses=1]
+	%tmp58165817 = zext i16 %tmp812.0.0 to i32		; <i32> [#uses=1]
+	invoke void (%struct.wxString*, i32*, ...)* @_ZN8wxString6FormatEPKwz( %struct.wxString* noalias sret  null, i32* null, i32 %tmp58165817 )
+			to label %invcont5831 unwind label %lpad
+invcont5831:		; preds = %invcont5814
+	%tmp5862 = invoke i8 @_ZN12wxStringBase10ConcatSelfEmPKwm( %struct.wxStringBase* null, i32 0, i32* null, i32 0 ) zeroext 
+			to label %bb7834 unwind label %lpad8185		; <i8> [#uses=0]
+bb5968:		; preds = %bb3314
+	invoke void (%struct.wxString*, i32*, ...)* @_ZN8wxString6FormatEPKwz( %struct.wxString* noalias sret  null, i32* null, i32 0 )
+			to label %invcont5981 unwind label %lpad
+invcont5981:		; preds = %bb5968
+	ret void
+bb6118:		; preds = %bb3314
+	ret void
+bb6406:		; preds = %bb3314
+	ret void
+bb6556:		; preds = %bb3314
+	ret void
+bb6708:		; preds = %bb3314
+	ret void
+bb6860:		; preds = %bb3314
+	ret void
+bb7158:		; preds = %bb3314
+	ret void
+bb7308:		; preds = %bb3314
+	ret void
+bb7595:		; preds = %bb3314
+	ret void
+bb7806:		; preds = %bb3314
+	%tmp7814 = invoke %struct.wxStringBase* @_ZN12wxStringBase6appendEmw( %struct.wxStringBase* null, i32 1, i32 0 )
+			to label %bb7834 unwind label %lpad		; <%struct.wxStringBase*> [#uses=0]
+bb7819:		; preds = %bb3314
+	ret void
+bb7834:		; preds = %bb7806, %invcont5831
+	br label %bb3261
+lpad:		; preds = %bb7806, %bb5968, %invcont5814, %bb440.i8663, %bb155.i8541, %bb5657, %bb3306
+	ret void
+lpad8185:		; preds = %invcont5831
+	ret void
+}
+
+declare void @_Z10wxOnAssertPKwiPKcS0_S0_(i32*, i32, i8*, i32*, i32*)
+
+declare i8 @_ZN12wxStringBase10ConcatSelfEmPKwm(%struct.wxStringBase*, i32, i32*, i32) zeroext 
+
+declare %struct.tm* @gmtime_r(i32*, %struct.tm*)
+
+declare i16 @_ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE(%struct.wxDateTime*, %"struct.wxDateTime::TimeZone"*) zeroext 
+
+declare %struct.wxStringBase* @_ZN12wxStringBase6appendEmw(%struct.wxStringBase*, i32, i32)
+
+declare %struct.wxStringBase* @_ZN12wxStringBaseaSEPKw(%struct.wxStringBase*, i32*)
+
+declare void @_ZN8wxString6FormatEPKwz(%struct.wxString* noalias sret , i32*, ...)
diff --git a/final/test/CodeGen/X86/2008-04-24-MemCpyBug.ll b/final/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
new file mode 100644
index 00000000000..6389267aa4e
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-04-24-MemCpyBug.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 | not grep 120
+; Don't accidentally add the offset twice for trailing bytes.
+
+	%struct.S63 = type { [63 x i8] }
+@g1s63 = external global %struct.S63		; <%struct.S63*> [#uses=1]
+
+declare void @test63(%struct.S63* byval align 4 ) nounwind 
+
+define void @testit63_entry_2E_ce() nounwind  {
+	tail call void @test63( %struct.S63* byval align 4  @g1s63 ) nounwind 
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll b/final/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
new file mode 100644
index 00000000000..4eaca17c886
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-04-24-pblendw-fold-crash.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mattr=+sse41
+; rdar://5886601
+; gcc testsuite:  gcc.target/i386/sse4_1-pblendw.c
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define i32 @main() nounwind  {
+entry:
+	%tmp122 = load <2 x i64>* null, align 16		; <<2 x i64>> [#uses=1]
+	%tmp126 = bitcast <2 x i64> %tmp122 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp129 = call <8 x i16> @llvm.x86.sse41.pblendw( <8 x i16> zeroinitializer, <8 x i16> %tmp126, i32 2 ) nounwind 		; <<8 x i16>> [#uses=0]
+	ret i32 0
+}
+
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind 
diff --git a/final/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll b/final/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
new file mode 100644
index 00000000000..38d6aa6d172
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s | grep {1 \$2 3}
+; rdar://5720231
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @test() nounwind  {
+entry:
+	tail call void asm sideeffect " ${0:c} $1 ${2:c} ", "imr,imr,i,~{dirflag},~{fpsr},~{flags}"( i32 1, i32 2, i32 3 ) nounwind 
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/2008-04-28-CoalescerBug.ll b/final/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
new file mode 100644
index 00000000000..5b97eb71cbf
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-04-28-CoalescerBug.ll
@@ -0,0 +1,167 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl > %t
+; RUN: not grep {r\[abcd\]x} %t
+; RUN: not grep {r\[ds\]i} %t
+; RUN: not grep {r\[bs\]p} %t
+
+	%struct.BITMAP = type { i16, i16, i32, i32, i32, i32, i32, i32, i8*, i8* }
+	%struct.BltData = type { float, float, float, float }
+	%struct.BltDepth = type { i32, i8**, i32, %struct.BITMAP* (%struct.BltDepth**, %struct.BITMAP*, i32, i32, float*, float, i32)*, i32 (%struct.BltDepth**, %struct.BltOp*)*, i32 (%struct.BltDepth**, %struct.BltOp*, %struct.BltImg*)*, i32 (%struct.BltDepth**, %struct.BltOp*, %struct.BltSh*)*, [28 x [2 x [2 x i32]]]*, %struct.BltData* }
+	%struct.BltImg = type { i32, i8, i8, i8, float, float*, float*, i32, i32, float*, i32 (i8*, i8*, i8**, i32*, i8**, i32*)*, i8* }
+	%struct.BltOp = type { i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i8* }
+	%struct.BltSh = type { i8, i8, i8, i8, float, float*, float*, float*, float*, i32, i32, float*, float*, float* }
+
+define void @t(%struct.BltDepth* %depth, %struct.BltOp* %bop, i32 %mode) nounwind  {
+entry:
+	switch i32 %mode, label %return [
+		 i32 1, label %bb2898.us
+		 i32 18, label %bb13086.preheader
+	]
+
+bb13086.preheader:		; preds = %entry
+	%tmp13098 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	%tmp13238 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br label %bb13088
+
+bb2898.us:		; preds = %bb2898.us, %entry
+	br label %bb2898.us
+
+bb13088:		; preds = %bb13572, %bb13567, %bb13107, %bb13086.preheader
+	br i1 %tmp13098, label %bb13107, label %bb13101
+
+bb13101:		; preds = %bb13088
+	br label %bb13107
+
+bb13107:		; preds = %bb13101, %bb13088
+	%iftmp.684.0 = phi i32 [ 0, %bb13101 ], [ 65535, %bb13088 ]		; <i32> [#uses=2]
+	%tmp13111 = load i64* null, align 8		; <i64> [#uses=3]
+	%tmp13116 = lshr i64 %tmp13111, 16		; <i64> [#uses=1]
+	%tmp1311613117 = trunc i64 %tmp13116 to i32		; <i32> [#uses=1]
+	%tmp13118 = and i32 %tmp1311613117, 65535		; <i32> [#uses=1]
+	%tmp13120 = lshr i64 %tmp13111, 32		; <i64> [#uses=1]
+	%tmp1312013121 = trunc i64 %tmp13120 to i32		; <i32> [#uses=1]
+	%tmp13122 = and i32 %tmp1312013121, 65535		; <i32> [#uses=2]
+	%tmp13124 = lshr i64 %tmp13111, 48		; <i64> [#uses=1]
+	%tmp1312413125 = trunc i64 %tmp13124 to i32		; <i32> [#uses=2]
+	%tmp1314013141not = xor i16 0, -1		; <i16> [#uses=1]
+	%tmp1314013141not13142 = zext i16 %tmp1314013141not to i32		; <i32> [#uses=3]
+	%tmp13151 = mul i32 %tmp13122, %tmp1314013141not13142		; <i32> [#uses=1]
+	%tmp13154 = mul i32 %tmp1312413125, %tmp1314013141not13142		; <i32> [#uses=1]
+	%tmp13157 = mul i32 %iftmp.684.0, %tmp1314013141not13142		; <i32> [#uses=1]
+	%tmp13171 = add i32 %tmp13151, 1		; <i32> [#uses=1]
+	%tmp13172 = add i32 %tmp13171, 0		; <i32> [#uses=1]
+	%tmp13176 = add i32 %tmp13154, 1		; <i32> [#uses=1]
+	%tmp13177 = add i32 %tmp13176, 0		; <i32> [#uses=1]
+	%tmp13181 = add i32 %tmp13157, 1		; <i32> [#uses=1]
+	%tmp13182 = add i32 %tmp13181, 0		; <i32> [#uses=1]
+	%tmp13188 = lshr i32 %tmp13172, 16		; <i32> [#uses=1]
+	%tmp13190 = lshr i32 %tmp13177, 16		; <i32> [#uses=1]
+	%tmp13192 = lshr i32 %tmp13182, 16		; <i32> [#uses=1]
+	%tmp13198 = sub i32 %tmp13118, 0		; <i32> [#uses=1]
+	%tmp13201 = sub i32 %tmp13122, %tmp13188		; <i32> [#uses=1]
+	%tmp13204 = sub i32 %tmp1312413125, %tmp13190		; <i32> [#uses=1]
+	%tmp13207 = sub i32 %iftmp.684.0, %tmp13192		; <i32> [#uses=1]
+	%tmp1320813209 = zext i32 %tmp13204 to i64		; <i64> [#uses=1]
+	%tmp13211 = shl i64 %tmp1320813209, 48		; <i64> [#uses=1]
+	%tmp1321213213 = zext i32 %tmp13201 to i64		; <i64> [#uses=1]
+	%tmp13214 = shl i64 %tmp1321213213, 32		; <i64> [#uses=1]
+	%tmp13215 = and i64 %tmp13214, 281470681743360		; <i64> [#uses=1]
+	%tmp1321713218 = zext i32 %tmp13198 to i64		; <i64> [#uses=1]
+	%tmp13219 = shl i64 %tmp1321713218, 16		; <i64> [#uses=1]
+	%tmp13220 = and i64 %tmp13219, 4294901760		; <i64> [#uses=1]
+	%tmp13216 = or i64 %tmp13211, 0		; <i64> [#uses=1]
+	%tmp13221 = or i64 %tmp13216, %tmp13215		; <i64> [#uses=1]
+	%tmp13225 = or i64 %tmp13221, %tmp13220		; <i64> [#uses=4]
+	%tmp1322713228 = trunc i32 %tmp13207 to i16		; <i16> [#uses=4]
+	%tmp13233 = icmp eq i16 %tmp1322713228, 0		; <i1> [#uses=1]
+	br i1 %tmp13233, label %bb13088, label %bb13236
+
+bb13236:		; preds = %bb13107
+	br i1 false, label %bb13567, label %bb13252
+
+bb13252:		; preds = %bb13236
+	%tmp1329013291 = zext i16 %tmp1322713228 to i64		; <i64> [#uses=8]
+	%tmp13296 = lshr i64 %tmp13225, 16		; <i64> [#uses=1]
+	%tmp13297 = and i64 %tmp13296, 65535		; <i64> [#uses=1]
+	%tmp13299 = lshr i64 %tmp13225, 32		; <i64> [#uses=1]
+	%tmp13300 = and i64 %tmp13299, 65535		; <i64> [#uses=1]
+	%tmp13302 = lshr i64 %tmp13225, 48		; <i64> [#uses=1]
+	%tmp13306 = sub i64 %tmp1329013291, 0		; <i64> [#uses=0]
+	%tmp13309 = sub i64 %tmp1329013291, %tmp13297		; <i64> [#uses=1]
+	%tmp13312 = sub i64 %tmp1329013291, %tmp13300		; <i64> [#uses=1]
+	%tmp13315 = sub i64 %tmp1329013291, %tmp13302		; <i64> [#uses=1]
+	%tmp13318 = mul i64 %tmp1329013291, %tmp1329013291		; <i64> [#uses=1]
+	br i1 false, label %bb13339, label %bb13324
+
+bb13324:		; preds = %bb13252
+	br i1 false, label %bb13339, label %bb13330
+
+bb13330:		; preds = %bb13324
+	%tmp13337 = sdiv i64 0, 0		; <i64> [#uses=1]
+	br label %bb13339
+
+bb13339:		; preds = %bb13330, %bb13324, %bb13252
+	%r0120.0 = phi i64 [ %tmp13337, %bb13330 ], [ 0, %bb13252 ], [ 4294836225, %bb13324 ]		; <i64> [#uses=1]
+	br i1 false, label %bb13360, label %bb13345
+
+bb13345:		; preds = %bb13339
+	br i1 false, label %bb13360, label %bb13351
+
+bb13351:		; preds = %bb13345
+	%tmp13354 = mul i64 0, %tmp13318		; <i64> [#uses=1]
+	%tmp13357 = sub i64 %tmp1329013291, %tmp13309		; <i64> [#uses=1]
+	%tmp13358 = sdiv i64 %tmp13354, %tmp13357		; <i64> [#uses=1]
+	br label %bb13360
+
+bb13360:		; preds = %bb13351, %bb13345, %bb13339
+	%r1121.0 = phi i64 [ %tmp13358, %bb13351 ], [ 0, %bb13339 ], [ 4294836225, %bb13345 ]		; <i64> [#uses=1]
+	br i1 false, label %bb13402, label %bb13387
+
+bb13387:		; preds = %bb13360
+	br label %bb13402
+
+bb13402:		; preds = %bb13387, %bb13360
+	%r3123.0 = phi i64 [ 0, %bb13360 ], [ 4294836225, %bb13387 ]		; <i64> [#uses=1]
+	%tmp13404 = icmp eq i16 %tmp1322713228, -1		; <i1> [#uses=1]
+	br i1 %tmp13404, label %bb13435, label %bb13407
+
+bb13407:		; preds = %bb13402
+	br label %bb13435
+
+bb13435:		; preds = %bb13407, %bb13402
+	%r0120.1 = phi i64 [ 0, %bb13407 ], [ %r0120.0, %bb13402 ]		; <i64> [#uses=0]
+	%r1121.1 = phi i64 [ 0, %bb13407 ], [ %r1121.0, %bb13402 ]		; <i64> [#uses=0]
+	%r3123.1 = phi i64 [ 0, %bb13407 ], [ %r3123.0, %bb13402 ]		; <i64> [#uses=0]
+	%tmp13450 = mul i64 0, %tmp13312		; <i64> [#uses=0]
+	%tmp13455 = mul i64 0, %tmp13315		; <i64> [#uses=0]
+	%tmp13461 = add i64 0, %tmp1329013291		; <i64> [#uses=1]
+	%tmp13462 = mul i64 %tmp13461, 65535		; <i64> [#uses=1]
+	%tmp13466 = sub i64 %tmp13462, 0		; <i64> [#uses=1]
+	%tmp13526 = add i64 %tmp13466, 1		; <i64> [#uses=1]
+	%tmp13527 = add i64 %tmp13526, 0		; <i64> [#uses=1]
+	%tmp13528 = ashr i64 %tmp13527, 16		; <i64> [#uses=4]
+	%tmp13536 = sub i64 %tmp13528, 0		; <i64> [#uses=1]
+	%tmp13537 = shl i64 %tmp13536, 32		; <i64> [#uses=1]
+	%tmp13538 = and i64 %tmp13537, 281470681743360		; <i64> [#uses=1]
+	%tmp13542 = sub i64 %tmp13528, 0		; <i64> [#uses=1]
+	%tmp13543 = shl i64 %tmp13542, 16		; <i64> [#uses=1]
+	%tmp13544 = and i64 %tmp13543, 4294901760		; <i64> [#uses=1]
+	%tmp13548 = sub i64 %tmp13528, 0		; <i64> [#uses=1]
+	%tmp13549 = and i64 %tmp13548, 65535		; <i64> [#uses=1]
+	%tmp13539 = or i64 %tmp13538, 0		; <i64> [#uses=1]
+	%tmp13545 = or i64 %tmp13539, %tmp13549		; <i64> [#uses=1]
+	%tmp13550 = or i64 %tmp13545, %tmp13544		; <i64> [#uses=1]
+	%tmp1355213553 = trunc i64 %tmp13528 to i16		; <i16> [#uses=1]
+	br label %bb13567
+
+bb13567:		; preds = %bb13435, %bb13236
+	%tsp1040.0.0 = phi i64 [ %tmp13550, %bb13435 ], [ %tmp13225, %bb13236 ]		; <i64> [#uses=0]
+	%tsp1040.1.0 = phi i16 [ %tmp1355213553, %bb13435 ], [ %tmp1322713228, %bb13236 ]		; <i16> [#uses=1]
+	br i1 %tmp13238, label %bb13088, label %bb13572
+
+bb13572:		; preds = %bb13567
+	store i16 %tsp1040.1.0, i16* null, align 2
+	br label %bb13088
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll b/final/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll
new file mode 100644
index 00000000000..6e8e98d865b
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-04-28-CyclicSchedUnit.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86
+
+define i64 @t(i64 %maxIdleDuration) nounwind  {
+	call void asm sideeffect "wrmsr", "{cx},A,~{dirflag},~{fpsr},~{flags}"( i32 416, i64 0 ) nounwind 
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll b/final/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
new file mode 100644
index 00000000000..a708224dd0d
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-05-01-InvalidOrdCompare.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86 | grep jnp
+; rdar://5902801
+
+declare void @test2()
+
+define i32 @test(double %p) nounwind {
+	%tmp5 = fcmp uno double %p, 0.000000e+00
+	br i1 %tmp5, label %bb, label %UnifiedReturnBlock
+bb:
+	call void @test2()
+	ret i32 17
+UnifiedReturnBlock:
+	ret i32 42
+}
+
diff --git a/final/test/CodeGen/X86/2008-05-09-PHIElimBug.ll b/final/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
new file mode 100644
index 00000000000..cea0076076d
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-05-09-PHIElimBug.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86
+
+	%struct.V = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x i32>, float*, float*, float*, float*, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define fastcc void @t() nounwind  {
+entry:
+	br i1 false, label %bb23816.preheader, label %bb23821
+
+bb23816.preheader:		; preds = %entry
+	%tmp23735 = and i32 0, 2		; <i32> [#uses=0]
+	br label %bb23830
+
+bb23821:		; preds = %entry
+	br i1 false, label %bb23830, label %bb23827
+
+bb23827:		; preds = %bb23821
+	%tmp23829 = getelementptr %struct.V* null, i32 0, i32 42		; <i32*> [#uses=0]
+	br label %bb23830
+
+bb23830:		; preds = %bb23827, %bb23821, %bb23816.preheader
+	%scaledInDst.2.reg2mem.5 = phi i8 [ undef, %bb23827 ], [ undef, %bb23821 ], [ undef, %bb23816.preheader ]		; <i8> [#uses=1]
+	%toBool35047 = icmp eq i8 %scaledInDst.2.reg2mem.5, 0		; <i1> [#uses=1]
+	%bothcond39107 = or i1 %toBool35047, false		; <i1> [#uses=0]
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll b/final/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
new file mode 100644
index 00000000000..5ceb5464d2b
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-05-09-ShuffleLoweringBug.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define fastcc void @glgVectorFloatConversion() nounwind  {
+	%tmp12745 = load <4 x float>* null, align 16		; <<4 x float>> [#uses=1]
+	%tmp12773 = insertelement <4 x float> %tmp12745, float 1.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp12774 = insertelement <4 x float> %tmp12773, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp12775 = insertelement <4 x float> %tmp12774, float 1.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp12775, <4 x float>* null, align 16
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2008-05-12-tailmerge-5.ll b/final/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
new file mode 100644
index 00000000000..4852e89c4d9
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
@@ -0,0 +1,145 @@
+; RUN: llc < %s | grep abort | count 1
+; Calls to abort should all be merged
+
+; ModuleID = '5898899.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+	%struct.BoundaryAlignment = type { [3 x i8], i8, i16, i16, i8, [2 x i8] }
+
+define void @passing2(i64 %str.0, i64 %str.1, i16 signext  %s, i32 %j, i8 signext  %c, i16 signext  %t, i16 signext  %u, i8 signext  %d) nounwind optsize {
+entry:
+	%str_addr = alloca %struct.BoundaryAlignment		; <%struct.BoundaryAlignment*> [#uses=7]
+	%s_addr = alloca i16		; <i16*> [#uses=1]
+	%j_addr = alloca i32		; <i32*> [#uses=2]
+	%c_addr = alloca i8		; <i8*> [#uses=2]
+	%t_addr = alloca i16		; <i16*> [#uses=2]
+	%u_addr = alloca i16		; <i16*> [#uses=2]
+	%d_addr = alloca i8		; <i8*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = bitcast %struct.BoundaryAlignment* %str_addr to { i64, i64 }*		; <{ i64, i64 }*> [#uses=1]
+	%tmp1 = getelementptr { i64, i64 }* %tmp, i32 0, i32 0		; <i64*> [#uses=1]
+	store i64 %str.0, i64* %tmp1
+	%tmp2 = bitcast %struct.BoundaryAlignment* %str_addr to { i64, i64 }*		; <{ i64, i64 }*> [#uses=1]
+	%tmp3 = getelementptr { i64, i64 }* %tmp2, i32 0, i32 1		; <i64*> [#uses=1]
+	%bc = bitcast i64* %tmp3 to i8*		; <i8*> [#uses=2]
+	%byte = trunc i64 %str.1 to i8		; <i8> [#uses=1]
+	store i8 %byte, i8* %bc
+	%shft = lshr i64 %str.1, 8		; <i64> [#uses=2]
+	%Loc = getelementptr i8* %bc, i32 1		; <i8*> [#uses=2]
+	%byte4 = trunc i64 %shft to i8		; <i8> [#uses=1]
+	store i8 %byte4, i8* %Loc
+	%shft5 = lshr i64 %shft, 8		; <i64> [#uses=2]
+	%Loc6 = getelementptr i8* %Loc, i32 1		; <i8*> [#uses=2]
+	%byte7 = trunc i64 %shft5 to i8		; <i8> [#uses=1]
+	store i8 %byte7, i8* %Loc6
+	%shft8 = lshr i64 %shft5, 8		; <i64> [#uses=2]
+	%Loc9 = getelementptr i8* %Loc6, i32 1		; <i8*> [#uses=2]
+	%byte10 = trunc i64 %shft8 to i8		; <i8> [#uses=1]
+	store i8 %byte10, i8* %Loc9
+	%shft11 = lshr i64 %shft8, 8		; <i64> [#uses=0]
+	%Loc12 = getelementptr i8* %Loc9, i32 1		; <i8*> [#uses=0]
+	store i16 %s, i16* %s_addr
+	store i32 %j, i32* %j_addr
+	store i8 %c, i8* %c_addr
+	store i16 %t, i16* %t_addr
+	store i16 %u, i16* %u_addr
+	store i8 %d, i8* %d_addr
+	%tmp13 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 0		; <[3 x i8]*> [#uses=1]
+	%tmp1314 = bitcast [3 x i8]* %tmp13 to i32*		; <i32*> [#uses=1]
+	%tmp15 = load i32* %tmp1314, align 4		; <i32> [#uses=1]
+	%tmp16 = shl i32 %tmp15, 14		; <i32> [#uses=1]
+	%tmp17 = ashr i32 %tmp16, 23		; <i32> [#uses=1]
+	%tmp1718 = trunc i32 %tmp17 to i16		; <i16> [#uses=1]
+	%sextl = shl i16 %tmp1718, 7		; <i16> [#uses=1]
+	%sextr = ashr i16 %sextl, 7		; <i16> [#uses=2]
+	%sextl19 = shl i16 %sextr, 7		; <i16> [#uses=1]
+	%sextr20 = ashr i16 %sextl19, 7		; <i16> [#uses=0]
+	%sextl21 = shl i16 %sextr, 7		; <i16> [#uses=1]
+	%sextr22 = ashr i16 %sextl21, 7		; <i16> [#uses=1]
+	%sextr2223 = sext i16 %sextr22 to i32		; <i32> [#uses=1]
+	%tmp24 = load i32* %j_addr, align 4		; <i32> [#uses=1]
+	%tmp25 = icmp ne i32 %sextr2223, %tmp24		; <i1> [#uses=1]
+	%tmp2526 = zext i1 %tmp25 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp2526, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb27
+
+bb:		; preds = %entry
+	call void (...)* @abort( ) noreturn nounwind 
+	unreachable
+
+bb27:		; preds = %entry
+	%tmp28 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 1		; <i8*> [#uses=1]
+	%tmp29 = load i8* %tmp28, align 4		; <i8> [#uses=1]
+	%tmp30 = load i8* %c_addr, align 1		; <i8> [#uses=1]
+	%tmp31 = icmp ne i8 %tmp29, %tmp30		; <i1> [#uses=1]
+	%tmp3132 = zext i1 %tmp31 to i8		; <i8> [#uses=1]
+	%toBool33 = icmp ne i8 %tmp3132, 0		; <i1> [#uses=1]
+	br i1 %toBool33, label %bb34, label %bb35
+
+bb34:		; preds = %bb27
+	call void (...)* @abort( ) noreturn nounwind 
+	unreachable
+
+bb35:		; preds = %bb27
+	%tmp36 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 2		; <i16*> [#uses=1]
+	%tmp37 = load i16* %tmp36, align 4		; <i16> [#uses=1]
+	%tmp38 = shl i16 %tmp37, 7		; <i16> [#uses=1]
+	%tmp39 = ashr i16 %tmp38, 7		; <i16> [#uses=1]
+	%sextl40 = shl i16 %tmp39, 7		; <i16> [#uses=1]
+	%sextr41 = ashr i16 %sextl40, 7		; <i16> [#uses=2]
+	%sextl42 = shl i16 %sextr41, 7		; <i16> [#uses=1]
+	%sextr43 = ashr i16 %sextl42, 7		; <i16> [#uses=0]
+	%sextl44 = shl i16 %sextr41, 7		; <i16> [#uses=1]
+	%sextr45 = ashr i16 %sextl44, 7		; <i16> [#uses=1]
+	%tmp46 = load i16* %t_addr, align 2		; <i16> [#uses=1]
+	%tmp47 = icmp ne i16 %sextr45, %tmp46		; <i1> [#uses=1]
+	%tmp4748 = zext i1 %tmp47 to i8		; <i8> [#uses=1]
+	%toBool49 = icmp ne i8 %tmp4748, 0		; <i1> [#uses=1]
+	br i1 %toBool49, label %bb50, label %bb51
+
+bb50:		; preds = %bb35
+	call void (...)* @abort( ) noreturn nounwind 
+	unreachable
+
+bb51:		; preds = %bb35
+	%tmp52 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 3		; <i16*> [#uses=1]
+	%tmp53 = load i16* %tmp52, align 4		; <i16> [#uses=1]
+	%tmp54 = shl i16 %tmp53, 7		; <i16> [#uses=1]
+	%tmp55 = ashr i16 %tmp54, 7		; <i16> [#uses=1]
+	%sextl56 = shl i16 %tmp55, 7		; <i16> [#uses=1]
+	%sextr57 = ashr i16 %sextl56, 7		; <i16> [#uses=2]
+	%sextl58 = shl i16 %sextr57, 7		; <i16> [#uses=1]
+	%sextr59 = ashr i16 %sextl58, 7		; <i16> [#uses=0]
+	%sextl60 = shl i16 %sextr57, 7		; <i16> [#uses=1]
+	%sextr61 = ashr i16 %sextl60, 7		; <i16> [#uses=1]
+	%tmp62 = load i16* %u_addr, align 2		; <i16> [#uses=1]
+	%tmp63 = icmp ne i16 %sextr61, %tmp62		; <i1> [#uses=1]
+	%tmp6364 = zext i1 %tmp63 to i8		; <i8> [#uses=1]
+	%toBool65 = icmp ne i8 %tmp6364, 0		; <i1> [#uses=1]
+	br i1 %toBool65, label %bb66, label %bb67
+
+bb66:		; preds = %bb51
+	call void (...)* @abort( ) noreturn nounwind 
+	unreachable
+
+bb67:		; preds = %bb51
+	%tmp68 = getelementptr %struct.BoundaryAlignment* %str_addr, i32 0, i32 4		; <i8*> [#uses=1]
+	%tmp69 = load i8* %tmp68, align 4		; <i8> [#uses=1]
+	%tmp70 = load i8* %d_addr, align 1		; <i8> [#uses=1]
+	%tmp71 = icmp ne i8 %tmp69, %tmp70		; <i1> [#uses=1]
+	%tmp7172 = zext i1 %tmp71 to i8		; <i8> [#uses=1]
+	%toBool73 = icmp ne i8 %tmp7172, 0		; <i1> [#uses=1]
+	br i1 %toBool73, label %bb74, label %bb75
+
+bb74:		; preds = %bb67
+	call void (...)* @abort( ) noreturn nounwind 
+	unreachable
+
+bb75:		; preds = %bb67
+	br label %return
+
+return:		; preds = %bb75
+	ret void
+}
+
+declare void @abort(...) noreturn nounwind 
diff --git a/final/test/CodeGen/X86/2008-05-21-CoalescerBug.ll b/final/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
new file mode 100644
index 00000000000..e5dda4ac754
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-05-21-CoalescerBug.ll
@@ -0,0 +1,98 @@
+; RUN: llc < %s -march=x86 -O0 -fast-isel=false -regalloc=linearscan | grep mov | count 5
+; PR2343
+
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.VEC_basic_block_base = type { i32, i32, [1 x %struct.basic_block_def*] }
+	%struct.VEC_basic_block_gc = type { %struct.VEC_basic_block_base }
+	%struct.VEC_edge_base = type { i32, i32, [1 x %struct.edge_def*] }
+	%struct.VEC_edge_gc = type { %struct.VEC_edge_base }
+	%struct.VEC_rtx_base = type { i32, i32, [1 x %struct.rtx_def*] }
+	%struct.VEC_rtx_gc = type { %struct.VEC_rtx_base }
+	%struct.VEC_temp_slot_p_base = type { i32, i32, [1 x %struct.temp_slot*] }
+	%struct.VEC_temp_slot_p_gc = type { %struct.VEC_temp_slot_p_base }
+	%struct.VEC_tree_base = type { i32, i32, [1 x %struct.tree_node*] }
+	%struct.VEC_tree_gc = type { %struct.VEC_tree_base }
+	%struct.__sbuf = type { i8*, i32 }
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.basic_block_def = type { %struct.tree_node*, %struct.VEC_edge_gc*, %struct.VEC_edge_gc*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_il_dependent, %struct.tree_node*, %struct.edge_prediction*, i64, i32, i32, i32, i32 }
+	%struct.basic_block_il_dependent = type { %struct.rtl_bb_info* }
+	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
+	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
+	%struct.block_symbol = type { [3 x %struct.cfg_stats_d], %struct.object_block*, i64 }
+	%struct.cfg_stats_d = type { i32 }
+	%struct.control_flow_graph = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.VEC_basic_block_gc*, i32, i32, i32, %struct.VEC_basic_block_gc*, i32 }
+	%struct.def_optype_d = type { %struct.def_optype_d*, %struct.tree_node** }
+	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.__sbuf*, i32, i32, i64, i32 }
+	%struct.edge_def_insns = type { %struct.rtx_def* }
+	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.__sbuf, i32, i8*, %struct.rtx_def** }
+	%struct.et_node = type opaque
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.control_flow_graph*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.VEC_temp_slot_p_gc*, %struct.temp_slot*, %struct.var_refs_queue*, i32, i32, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.htab*, %struct.rtx_def*, i32, i32, i32, %struct.__sbuf, %struct.VEC_tree_gc*, %struct.tree_node*, i8*, i8*, i8*, i8*, i8*, %struct.tree_node*, i8, i8, i8, i8, i8, i8 }
+	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i32, i32, i32, i32, i32, i8* (i32, i32)*, void (i8*)*, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i32 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type opaque
+	%struct.language_function = type opaque
+	%struct.loop = type { i32, %struct.basic_block_def*, %struct.basic_block_def*, %llvm.dbg.anchor.type, i32, i32, i32, i32, %struct.loop**, i32, %struct.loop*, %struct.loop*, %struct.loop*, %struct.loop*, i8*, %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound*, %struct.edge_def*, i32 }
+	%struct.machine_function = type opaque
+	%struct.maydef_optype_d = type { %struct.maydef_optype_d*, %struct.tree_node*, %struct.tree_node*, %struct.ssa_use_operand_d }
+	%struct.nb_iter_bound = type { %struct.tree_node*, %struct.tree_node*, %struct.nb_iter_bound* }
+	%struct.object_block = type { %struct.section*, i32, i64, %struct.VEC_rtx_gc*, %struct.VEC_rtx_gc* }
+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
+	%struct.rtl_bb_info = type { %struct.rtx_def*, %struct.rtx_def*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, %struct.rtx_def*, %struct.rtx_def*, i32 }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.section = type { %struct.unnamed_section }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.ssa_use_operand_d = type { %struct.ssa_use_operand_d*, %struct.ssa_use_operand_d*, %struct.tree_node*, %struct.tree_node** }
+	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.bitmap_head_def*, i32, i8* }
+	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.use_optype_d*, %struct.maydef_optype_d*, %struct.vuse_optype_d*, %struct.maydef_optype_d* }
+	%struct.temp_slot = type opaque
+	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
+	%struct.tree_ann_d = type { %struct.stmt_ann_d }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl_common = type { %struct.tree_decl_minimal, %struct.tree_node*, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_minimal = type { %struct.tree_common, %struct.__sbuf, i32, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_decl_non_common = type { %struct.tree_decl_with_vis, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_with_rtl = type { %struct.tree_decl_common, %struct.rtx_def*, i32 }
+	%struct.tree_decl_with_vis = type { %struct.tree_decl_with_rtl, %struct.tree_node*, %struct.tree_node*, i8, i8, i8 }
+	%struct.tree_function_decl = type { %struct.tree_decl_non_common, i8, i8, i64, %struct.function* }
+	%struct.tree_node = type { %struct.tree_function_decl }
+	%struct.u = type { %struct.block_symbol }
+	%struct.unnamed_section = type { %struct.cfg_stats_d, void (i8*)*, i8*, %struct.section* }
+	%struct.use_optype_d = type { %struct.use_optype_d*, %struct.ssa_use_operand_d }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.vuse_optype_d = type { %struct.vuse_optype_d*, %struct.tree_node*, %struct.ssa_use_operand_d }
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (%struct.edge_def* (%struct.edge_def*, %struct.basic_block_def*)* @tree_redirect_edge_and_branch to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define %struct.edge_def* @tree_redirect_edge_and_branch(%struct.edge_def* %e1, %struct.basic_block_def* %dest2) nounwind  {
+entry:
+	br label %bb497
+
+bb483:		; preds = %bb497
+	%tmp496 = load %struct.tree_node** null, align 4		; <%struct.tree_node*> [#uses=1]
+	br label %bb497
+
+bb497:		; preds = %bb483, %entry
+	%cases.0 = phi %struct.tree_node* [ %tmp496, %bb483 ], [ null, %entry ]		; <%struct.tree_node*> [#uses=1]
+	%last.0 = phi %struct.tree_node* [ %cases.0, %bb483 ], [ undef, %entry ]		; <%struct.tree_node*> [#uses=1]
+	%foo = phi i1 [ 0, %bb483 ], [ 1, %entry ]
+	br i1 %foo, label %bb483, label %bb502
+
+bb502:		; preds = %bb497
+	br i1 %foo, label %bb507, label %bb841
+
+bb507:		; preds = %bb502
+	%tmp517 = getelementptr %struct.tree_node* %last.0, i32 0, i32 0		; <%struct.tree_function_decl*> [#uses=1]
+	%tmp517518 = bitcast %struct.tree_function_decl* %tmp517 to %struct.tree_common*		; <%struct.tree_common*> [#uses=1]
+	%tmp519 = getelementptr %struct.tree_common* %tmp517518, i32 0, i32 0		; <%struct.tree_node**> [#uses=1]
+	store %struct.tree_node* null, %struct.tree_node** %tmp519, align 4
+	br label %bb841
+
+bb841:		; preds = %bb507, %bb502
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll b/final/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
new file mode 100644
index 00000000000..19a73543c65
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-05-22-FoldUnalignedLoad.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movups | count 2
+
+define void @a(<4 x float>* %x) nounwind  {
+entry:
+        %tmp2 = load <4 x float>* %x, align 1
+        %inv = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %tmp2)
+        store <4 x float> %inv, <4 x float>* %x, align 1
+        ret void
+}
+
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>)
diff --git a/final/test/CodeGen/X86/2008-05-28-CoalescerBug.ll b/final/test/CodeGen/X86/2008-05-28-CoalescerBug.ll
new file mode 100644
index 00000000000..32bf8d49416
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-05-28-CoalescerBug.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR2289
+
+define void @_ada_ca11001() {
+entry:
+        %tmp59 = call i16 @ca11001_0__cartesian_assign( i8 zeroext  0, i8 zeroext  0, i16 undef )               ; <i16> [#uses=0]
+        unreachable
+}
+
+declare i16 @ca11001_0__cartesian_assign(i8 zeroext , i8 zeroext , i16)
diff --git a/final/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll b/final/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
new file mode 100644
index 00000000000..94c95d40c65
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -regalloc=fast
+
+@_ZTVN10Evaluation10GridOutputILi3EEE = external constant [5 x i32 (...)*]		; <[5 x i32 (...)*]*> [#uses=1]
+
+declare i8* @llvm.eh.exception() nounwind 
+
+declare i8* @_Znwm(i32)
+
+declare i8* @__cxa_begin_catch(i8*) nounwind 
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	br i1 false, label %bb37, label %bb34
+
+bb34:		; preds = %entry
+	ret i32 1
+
+bb37:		; preds = %entry
+	%tmp12.i.i.i.i.i66 = invoke i8* @_Znwm( i32 12 )
+			to label %tmp12.i.i.i.i.i.noexc65 unwind label %lpad243		; <i8*> [#uses=0]
+
+tmp12.i.i.i.i.i.noexc65:		; preds = %bb37
+	unreachable
+
+lpad243:		; preds = %bb37
+	%eh_ptr244 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=1]
+	store i32 (...)** getelementptr ([5 x i32 (...)*]* @_ZTVN10Evaluation10GridOutputILi3EEE, i32 0, i32 2), i32 (...)*** null, align 8
+	%tmp133 = call i8* @__cxa_begin_catch( i8* %eh_ptr244 ) nounwind 		; <i8*> [#uses=0]
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll b/final/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
new file mode 100644
index 00000000000..236b7cd6121
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep subl | grep 24
+
+	%struct.argument_t = type { i8*, %struct.argument_t*, i32, %struct.ipc_type_t*, i32, void (...)*, void (...)*, void (...)*, void (...)*, void (...)*, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, %struct.routine*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, i32, i32, i32, i32, i32, i32 }
+	%struct.ipc_type_t = type { i8*, %struct.ipc_type_t*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, %struct.ipc_type_t*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
+	%struct.routine = type opaque
+@"\01LC" = external constant [11 x i8]		; <[11 x i8]*> [#uses=1]
+
+define i8* @InArgMsgField(%struct.argument_t* %arg, i8* %str) nounwind  {
+entry:
+	%who = alloca [20 x i8]		; <[20 x i8]*> [#uses=1]
+	%who1 = getelementptr [20 x i8]* %who, i32 0, i32 0		; <i8*> [#uses=2]
+	call void @llvm.memset.i32( i8* %who1, i8 0, i32 20, i32 1 )
+	call void @llvm.memcpy.i32( i8* %who1, i8* getelementptr ([11 x i8]* @"\01LC", i32 0, i32 0), i32 11, i32 1 )
+	unreachable
+}
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind 
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
diff --git a/final/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll b/final/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
new file mode 100644
index 00000000000..90af3870bd4
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-06-13-NotVolatileLoadStore.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 | not grep movsd
+; RUN: llc < %s -march=x86 | grep movw
+; RUN: llc < %s -march=x86 | grep addw
+; These transforms are turned off for volatile loads and stores.
+; Check that they weren't turned off for all loads and stores!
+
+@atomic = global double 0.000000e+00		; <double*> [#uses=1]
+@atomic2 = global double 0.000000e+00		; <double*> [#uses=1]
+@ioport = global i32 0		; <i32*> [#uses=1]
+@ioport2 = global i32 0		; <i32*> [#uses=1]
+
+define i16 @f(i64 %x) {
+	%b = bitcast i64 %x to double		; <double> [#uses=1]
+	store double %b, double* @atomic
+	store double 0.000000e+00, double* @atomic2
+	%l = load i32* @ioport		; <i32> [#uses=1]
+	%t = trunc i32 %l to i16		; <i16> [#uses=1]
+	%l2 = load i32* @ioport2		; <i32> [#uses=1]
+	%tmp = lshr i32 %l2, 16		; <i32> [#uses=1]
+	%t2 = trunc i32 %tmp to i16		; <i16> [#uses=1]
+	%f = add i16 %t, %t2		; <i16> [#uses=1]
+	ret i16 %f
+}
diff --git a/final/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll b/final/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
new file mode 100644
index 00000000000..86652826aea
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-06-13-VolatileLoadStore.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movsd | count 5
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movl | count 2
+
+@atomic = global double 0.000000e+00		; <double*> [#uses=1]
+@atomic2 = global double 0.000000e+00		; <double*> [#uses=1]
+@anything = global i64 0		; <i64*> [#uses=1]
+@ioport = global i32 0		; <i32*> [#uses=2]
+
+define i16 @f(i64 %x, double %y) {
+	%b = bitcast i64 %x to double		; <double> [#uses=1]
+	volatile store double %b, double* @atomic ; one processor operation only
+	volatile store double 0.000000e+00, double* @atomic2 ; one processor operation only
+	%b2 = bitcast double %y to i64		; <i64> [#uses=1]
+	volatile store i64 %b2, i64* @anything ; may transform to store of double
+	%l = volatile load i32* @ioport		; must not narrow
+	%t = trunc i32 %l to i16		; <i16> [#uses=1]
+	%l2 = volatile load i32* @ioport		; must not narrow
+	%tmp = lshr i32 %l2, 16		; <i32> [#uses=1]
+	%t2 = trunc i32 %tmp to i16		; <i16> [#uses=1]
+	%f = add i16 %t, %t2		; <i16> [#uses=1]
+	ret i16 %f
+}
diff --git a/final/test/CodeGen/X86/2008-06-16-SubregsBug.ll b/final/test/CodeGen/X86/2008-06-16-SubregsBug.ll
new file mode 100644
index 00000000000..4d4819ab05d
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-06-16-SubregsBug.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep mov | count 4
+
+define i16 @test(i16* %tmp179) nounwind  {
+	%tmp180 = load i16* %tmp179, align 2		; <i16> [#uses=2]
+	%tmp184 = and i16 %tmp180, -1024		; <i16> [#uses=1]
+	%tmp186 = icmp eq i16 %tmp184, -32768		; <i1> [#uses=1]
+	br i1 %tmp186, label %bb189, label %bb288
+
+bb189:		; preds = %0
+	ret i16 %tmp180
+
+bb288:		; preds = %0
+	ret i16 32
+}
diff --git a/final/test/CodeGen/X86/2008-06-18-BadShuffle.ll b/final/test/CodeGen/X86/2008-06-18-BadShuffle.ll
new file mode 100644
index 00000000000..66f9065799e
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-06-18-BadShuffle.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mcpu=i386 -mattr=+sse2 | grep pinsrw
+
+; Test to make sure we actually insert the bottom element of the vector
+define <8 x i16> @a(<8 x i16> %a) nounwind  {
+entry:
+	shufflevector <8 x i16> %a, <8 x i16> zeroinitializer, <8 x i32> < i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8 >
+	%add = add <8 x i16> %0, %a
+	ret <8 x i16> %add
+}
+
diff --git a/final/test/CodeGen/X86/2008-06-25-VecISelBug.ll b/final/test/CodeGen/X86/2008-06-25-VecISelBug.ll
new file mode 100644
index 00000000000..72d190758f8
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-06-25-VecISelBug.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep pslldq
+
+define void @t() nounwind  {
+entry:
+	%tmp1 = shufflevector <4 x float> zeroinitializer, <4 x float> < float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >
+	%tmp2 = insertelement <4 x float> %tmp1, float 1.000000e+00, i32 3
+	store <4 x float> %tmp2, <4 x float>* null, align 16
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll b/final/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
new file mode 100644
index 00000000000..46341fc8710
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-07-07-DanglingDeadInsts.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9
+
+	%struct.ogg_stream_state = type { i8*, i32, i32, i32, i32*, i64*, i32, i32, i32, i32, [282 x i8], i32, i32, i32, i32, i32, i64, i64 }
+	%struct.res_state = type { i32, i32, i32, i32, float*, float*, i32, i32 }
+	%struct.vorbis_comment = type { i8**, i32*, i32, i8* }
+
+declare i32 @strlen(i8*) nounwind readonly 
+
+define i32 @res_init(%struct.res_state* %state, i32 %channels, i32 %outfreq, i32 %infreq, i32 %op1, ...) nounwind  {
+entry:
+	br i1 false, label %bb95, label %bb
+
+bb:		; preds = %entry
+	br i1 false, label %bb95, label %bb24
+
+bb24:		; preds = %bb
+	br i1 false, label %bb40.preheader, label %bb26
+
+bb26:		; preds = %bb24
+	ret i32 -1
+
+bb40.preheader:		; preds = %bb24
+	br i1 false, label %bb39, label %bb49.outer
+
+bb39:		; preds = %bb39, %bb40.preheader
+	shl i32 0, 1		; <i32>:0 [#uses=0]
+	br i1 false, label %bb39, label %bb49.outer
+
+bb49.outer:		; preds = %bb39, %bb40.preheader
+	getelementptr %struct.res_state* %state, i32 0, i32 3		; <i32*>:1 [#uses=0]
+	getelementptr %struct.res_state* %state, i32 0, i32 7		; <i32*>:2 [#uses=0]
+	%base10.1 = select i1 false, float* null, float* null		; <float*> [#uses=1]
+	br label %bb74
+
+bb69:		; preds = %bb74
+	br label %bb71
+
+bb71:		; preds = %bb74, %bb69
+	store float 0.000000e+00, float* null, align 4
+	add i32 0, 1		; <i32>:3 [#uses=1]
+	%indvar.next137 = add i32 %indvar136, 1		; <i32> [#uses=1]
+	br i1 false, label %bb74, label %bb73
+
+bb73:		; preds = %bb71
+	%.rec = add i32 %base10.2.ph.rec, 1		; <i32> [#uses=2]
+	getelementptr float* %base10.1, i32 %.rec		; <float*>:4 [#uses=1]
+	br label %bb74
+
+bb74:		; preds = %bb73, %bb71, %bb49.outer
+	%N13.1.ph = phi i32 [ 0, %bb49.outer ], [ 0, %bb73 ], [ %N13.1.ph, %bb71 ]		; <i32> [#uses=1]
+	%dest12.2.ph = phi float* [ null, %bb49.outer ], [ %4, %bb73 ], [ %dest12.2.ph, %bb71 ]		; <float*> [#uses=1]
+	%x8.0.ph = phi i32 [ 0, %bb49.outer ], [ %3, %bb73 ], [ %x8.0.ph, %bb71 ]		; <i32> [#uses=1]
+	%base10.2.ph.rec = phi i32 [ 0, %bb49.outer ], [ %.rec, %bb73 ], [ %base10.2.ph.rec, %bb71 ]		; <i32> [#uses=2]
+	%indvar136 = phi i32 [ %indvar.next137, %bb71 ], [ 0, %bb73 ], [ 0, %bb49.outer ]		; <i32> [#uses=1]
+	br i1 false, label %bb71, label %bb69
+
+bb95:		; preds = %bb, %entry
+	ret i32 -1
+}
+
+define i32 @read_resampled(i8* %d, float** %buffer, i32 %samples) nounwind  {
+entry:
+	br i1 false, label %bb17.preheader, label %bb30
+
+bb17.preheader:		; preds = %entry
+	load i32* null, align 4		; <i32>:0 [#uses=0]
+	br label %bb16
+
+bb16:		; preds = %bb16, %bb17.preheader
+	%i1.036 = phi i32 [ 0, %bb17.preheader ], [ %1, %bb16 ]		; <i32> [#uses=1]
+	add i32 %i1.036, 1		; <i32>:1 [#uses=2]
+	icmp ult i32 %1, 0		; <i1>:2 [#uses=0]
+	br label %bb16
+
+bb30:		; preds = %entry
+	ret i32 0
+}
+
+define i32 @ogg_stream_reset_serialno(%struct.ogg_stream_state* %os, i32 %serialno) nounwind  {
+entry:
+	unreachable
+}
+
+define void @vorbis_lsp_to_curve(float* %curve, i32* %map, i32 %n, i32 %ln, float* %lsp, i32 %m, float %amp, float %ampoffset) nounwind  {
+entry:
+	unreachable
+}
+
+define i32 @vorbis_comment_query_count(%struct.vorbis_comment* %vc, i8* %tag) nounwind  {
+entry:
+	%strlen = call i32 @strlen( i8* null )		; <i32> [#uses=1]
+	%endptr = getelementptr i8* null, i32 %strlen		; <i8*> [#uses=0]
+	unreachable
+}
+
+define fastcc i32 @push(%struct.res_state* %state, float* %pool, i32* %poolfill, i32* %offset, float* %dest, i32 %dststep, float* %source, i32 %srcstep, i32 %srclen) nounwind  {
+entry:
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll b/final/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll
new file mode 100644
index 00000000000..1a786ef7a90
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-07-09-ELFSectionAttributes.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s | grep ax
+; PR2024
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define i32 @foo(i32 %A, i32 %B) nounwind  section ".init.text" {
+entry:
+	tail call i32 @bar( i32 %A, i32 %B ) nounwind 		; <i32>:0 [#uses=1]
+	ret i32 %0
+}
+
+declare i32 @bar(i32, i32)
diff --git a/final/test/CodeGen/X86/2008-07-11-SHLBy1.ll b/final/test/CodeGen/X86/2008-07-11-SHLBy1.ll
new file mode 100644
index 00000000000..ff2b05fb08e
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-07-11-SHLBy1.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86-64 -o - | not grep shr
+define i128 @sl(i128 %x) {
+        %t = shl i128 %x, 1
+        ret i128 %t
+}
diff --git a/final/test/CodeGen/X86/2008-07-11-SpillerBug.ll b/final/test/CodeGen/X86/2008-07-11-SpillerBug.ll
new file mode 100644
index 00000000000..dee7415b083
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-07-11-SpillerBug.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=x86 -relocation-model=static -disable-fp-elim -post-RA-scheduler=false -asm-verbose=0 | FileCheck %s
+; PR2536
+
+; CHECK: andl    $65534, %
+; CHECK-NEXT: movl %
+; CHECK-NEXT: movzwl
+
+@g_5 = external global i16		; <i16*> [#uses=2]
+@g_107 = external global i16		; <i16*> [#uses=1]
+@g_229 = external global i32		; <i32*> [#uses=1]
+@g_227 = external global i16		; <i16*> [#uses=1]
+
+define i32 @func_54(i32 %p_55, i16 zeroext  %p_56) nounwind  {
+entry:
+	load i16* @g_5, align 2		; <i16>:0 [#uses=1]
+	zext i16 %0 to i32		; <i32>:1 [#uses=1]
+	%.mask = and i32 %1, 65534		; <i32> [#uses=1]
+	icmp eq i32 %.mask, 0		; <i1>:2 [#uses=1]
+	load i32* @g_229, align 4		; <i32>:3 [#uses=1]
+	load i16* @g_227, align 2		; <i16>:4 [#uses=1]
+	icmp eq i16 %4, 0		; <i1>:5 [#uses=1]
+	load i16* @g_5, align 2		; <i16>:6 [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb7.preheader, %entry
+	%indvar4 = phi i32 [ 0, %entry ], [ %indvar.next5, %bb7.preheader ]		; <i32> [#uses=1]
+	%p_56_addr.1.reg2mem.0 = phi i16 [ %p_56, %entry ], [ %p_56_addr.0, %bb7.preheader ]		; <i16> [#uses=2]
+	br i1 %2, label %bb7.preheader, label %bb5
+
+bb5:		; preds = %bb
+	store i16 %6, i16* @g_107, align 2
+	br label %bb7.preheader
+
+bb7.preheader:		; preds = %bb5, %bb
+	icmp eq i16 %p_56_addr.1.reg2mem.0, 0		; <i1>:7 [#uses=1]
+	%.0 = select i1 %7, i32 1, i32 %3		; <i32> [#uses=1]
+	urem i32 1, %.0		; <i32>:8 [#uses=1]
+	icmp eq i32 %8, 0		; <i1>:9 [#uses=1]
+	%.not = xor i1 %9, true		; <i1> [#uses=1]
+	%.not1 = xor i1 %5, true		; <i1> [#uses=1]
+	%brmerge = or i1 %.not, %.not1		; <i1> [#uses=1]
+	%iftmp.6.0 = select i1 %brmerge, i32 3, i32 0		; <i32> [#uses=1]
+	mul i32 %iftmp.6.0, %3		; <i32>:10 [#uses=1]
+	icmp eq i32 %10, 0		; <i1>:11 [#uses=1]
+	%p_56_addr.0 = select i1 %11, i16 %p_56_addr.1.reg2mem.0, i16 1		; <i16> [#uses=1]
+	%indvar.next5 = add i32 %indvar4, 1		; <i32> [#uses=2]
+	%exitcond6 = icmp eq i32 %indvar.next5, 17		; <i1> [#uses=1]
+	br i1 %exitcond6, label %bb25, label %bb
+
+bb25:		; preds = %bb7.preheader
+	ret i32 1
+}
diff --git a/final/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll b/final/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
new file mode 100644
index 00000000000..f56604b75bd
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-07-16-CoalescerCrash.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+
+	%struct.SV = type { i8*, i64, i64 }
+@"\01LC25" = external constant [8 x i8]		; <[8 x i8]*> [#uses=1]
+
+declare void @Perl_sv_catpvf(%struct.SV*, i8*, ...) nounwind 
+
+declare fastcc i64 @Perl_utf8n_to_uvuni(i8*, i64, i64*, i64) nounwind 
+
+define fastcc i8* @Perl_pv_uni_display(%struct.SV* %dsv, i8* %spv, i64 %len, i64 %pvlim, i64 %flags) nounwind  {
+entry:
+	br i1 false, label %bb, label %bb40
+
+bb:		; preds = %entry
+	tail call fastcc i64 @Perl_utf8n_to_uvuni( i8* null, i64 13, i64* null, i64 255 ) nounwind 		; <i64>:0 [#uses=1]
+	br i1 false, label %bb6, label %bb33
+
+bb6:		; preds = %bb
+	br i1 false, label %bb30, label %bb31
+
+bb30:		; preds = %bb6
+	unreachable
+
+bb31:		; preds = %bb6
+	icmp eq i8 0, 0		; <i1>:1 [#uses=0]
+	br label %bb33
+
+bb33:		; preds = %bb31, %bb
+	tail call void (%struct.SV*, i8*, ...)* @Perl_sv_catpvf( %struct.SV* %dsv, i8* getelementptr ([8 x i8]* @"\01LC25", i32 0, i64 0), i64 %0 ) nounwind 
+	unreachable
+
+bb40:		; preds = %entry
+	ret i8* null
+}
diff --git a/final/test/CodeGen/X86/2008-07-19-movups-spills.ll b/final/test/CodeGen/X86/2008-07-19-movups-spills.ll
new file mode 100644
index 00000000000..368af6d8abd
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-07-19-movups-spills.ll
@@ -0,0 +1,639 @@
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=1 -mattr=sse2 | grep movups | count 33
+; RUN: llc < %s -mtriple=i686-pc-linux -realign-stack=0 -mattr=sse2 | grep movups | count 33
+; PR2539
+; PR8969 - make 32-bit linux have a 16-byte aligned stack
+; Verify that movups is still generated with an aligned stack for the globals
+; that must be accessed unaligned
+
+external global <4 x float>, align 1		; <<4 x float>*>:0 [#uses=2]
+external global <4 x float>, align 1		; <<4 x float>*>:1 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:2 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:3 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:4 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:5 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:6 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:7 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:8 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:9 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:10 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:11 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:12 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:13 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:14 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:15 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:16 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:17 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:18 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:19 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:20 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:21 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:22 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:23 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:24 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:25 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:26 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:27 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:28 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:29 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:30 [#uses=1]
+external global <4 x float>, align 1		; <<4 x float>*>:31 [#uses=1]
+
+declare void @abort()
+
+define void @""() {
+	load <4 x float>* @0, align 1		; <<4 x float>>:1 [#uses=2]
+	load <4 x float>* @1, align 1		; <<4 x float>>:2 [#uses=3]
+	load <4 x float>* @2, align 1		; <<4 x float>>:3 [#uses=4]
+	load <4 x float>* @3, align 1		; <<4 x float>>:4 [#uses=5]
+	load <4 x float>* @4, align 1		; <<4 x float>>:5 [#uses=6]
+	load <4 x float>* @5, align 1		; <<4 x float>>:6 [#uses=7]
+	load <4 x float>* @6, align 1		; <<4 x float>>:7 [#uses=8]
+	load <4 x float>* @7, align 1		; <<4 x float>>:8 [#uses=9]
+	load <4 x float>* @8, align 1		; <<4 x float>>:9 [#uses=10]
+	load <4 x float>* @9, align 1		; <<4 x float>>:10 [#uses=11]
+	load <4 x float>* @10, align 1		; <<4 x float>>:11 [#uses=12]
+	load <4 x float>* @11, align 1		; <<4 x float>>:12 [#uses=13]
+	load <4 x float>* @12, align 1		; <<4 x float>>:13 [#uses=14]
+	load <4 x float>* @13, align 1		; <<4 x float>>:14 [#uses=15]
+	load <4 x float>* @14, align 1		; <<4 x float>>:15 [#uses=16]
+	load <4 x float>* @15, align 1		; <<4 x float>>:16 [#uses=17]
+	load <4 x float>* @16, align 1		; <<4 x float>>:17 [#uses=18]
+	load <4 x float>* @17, align 1		; <<4 x float>>:18 [#uses=19]
+	load <4 x float>* @18, align 1		; <<4 x float>>:19 [#uses=20]
+	load <4 x float>* @19, align 1		; <<4 x float>>:20 [#uses=21]
+	load <4 x float>* @20, align 1		; <<4 x float>>:21 [#uses=22]
+	load <4 x float>* @21, align 1		; <<4 x float>>:22 [#uses=23]
+	load <4 x float>* @22, align 1		; <<4 x float>>:23 [#uses=24]
+	load <4 x float>* @23, align 1		; <<4 x float>>:24 [#uses=25]
+	load <4 x float>* @24, align 1		; <<4 x float>>:25 [#uses=26]
+	load <4 x float>* @25, align 1		; <<4 x float>>:26 [#uses=27]
+	load <4 x float>* @26, align 1		; <<4 x float>>:27 [#uses=28]
+	load <4 x float>* @27, align 1		; <<4 x float>>:28 [#uses=29]
+	load <4 x float>* @28, align 1		; <<4 x float>>:29 [#uses=30]
+	load <4 x float>* @29, align 1		; <<4 x float>>:30 [#uses=31]
+	load <4 x float>* @30, align 1		; <<4 x float>>:31 [#uses=32]
+	load <4 x float>* @31, align 1		; <<4 x float>>:32 [#uses=33]
+	fmul <4 x float> %1, %1		; <<4 x float>>:33 [#uses=1]
+	fmul <4 x float> %33, %2		; <<4 x float>>:34 [#uses=1]
+	fmul <4 x float> %34, %3		; <<4 x float>>:35 [#uses=1]
+	fmul <4 x float> %35, %4		; <<4 x float>>:36 [#uses=1]
+	fmul <4 x float> %36, %5		; <<4 x float>>:37 [#uses=1]
+	fmul <4 x float> %37, %6		; <<4 x float>>:38 [#uses=1]
+	fmul <4 x float> %38, %7		; <<4 x float>>:39 [#uses=1]
+	fmul <4 x float> %39, %8		; <<4 x float>>:40 [#uses=1]
+	fmul <4 x float> %40, %9		; <<4 x float>>:41 [#uses=1]
+	fmul <4 x float> %41, %10		; <<4 x float>>:42 [#uses=1]
+	fmul <4 x float> %42, %11		; <<4 x float>>:43 [#uses=1]
+	fmul <4 x float> %43, %12		; <<4 x float>>:44 [#uses=1]
+	fmul <4 x float> %44, %13		; <<4 x float>>:45 [#uses=1]
+	fmul <4 x float> %45, %14		; <<4 x float>>:46 [#uses=1]
+	fmul <4 x float> %46, %15		; <<4 x float>>:47 [#uses=1]
+	fmul <4 x float> %47, %16		; <<4 x float>>:48 [#uses=1]
+	fmul <4 x float> %48, %17		; <<4 x float>>:49 [#uses=1]
+	fmul <4 x float> %49, %18		; <<4 x float>>:50 [#uses=1]
+	fmul <4 x float> %50, %19		; <<4 x float>>:51 [#uses=1]
+	fmul <4 x float> %51, %20		; <<4 x float>>:52 [#uses=1]
+	fmul <4 x float> %52, %21		; <<4 x float>>:53 [#uses=1]
+	fmul <4 x float> %53, %22		; <<4 x float>>:54 [#uses=1]
+	fmul <4 x float> %54, %23		; <<4 x float>>:55 [#uses=1]
+	fmul <4 x float> %55, %24		; <<4 x float>>:56 [#uses=1]
+	fmul <4 x float> %56, %25		; <<4 x float>>:57 [#uses=1]
+	fmul <4 x float> %57, %26		; <<4 x float>>:58 [#uses=1]
+	fmul <4 x float> %58, %27		; <<4 x float>>:59 [#uses=1]
+	fmul <4 x float> %59, %28		; <<4 x float>>:60 [#uses=1]
+	fmul <4 x float> %60, %29		; <<4 x float>>:61 [#uses=1]
+	fmul <4 x float> %61, %30		; <<4 x float>>:62 [#uses=1]
+	fmul <4 x float> %62, %31		; <<4 x float>>:63 [#uses=1]
+	fmul <4 x float> %63, %32		; <<4 x float>>:64 [#uses=3]
+	fmul <4 x float> %2, %2		; <<4 x float>>:65 [#uses=1]
+	fmul <4 x float> %65, %3		; <<4 x float>>:66 [#uses=1]
+	fmul <4 x float> %66, %4		; <<4 x float>>:67 [#uses=1]
+	fmul <4 x float> %67, %5		; <<4 x float>>:68 [#uses=1]
+	fmul <4 x float> %68, %6		; <<4 x float>>:69 [#uses=1]
+	fmul <4 x float> %69, %7		; <<4 x float>>:70 [#uses=1]
+	fmul <4 x float> %70, %8		; <<4 x float>>:71 [#uses=1]
+	fmul <4 x float> %71, %9		; <<4 x float>>:72 [#uses=1]
+	fmul <4 x float> %72, %10		; <<4 x float>>:73 [#uses=1]
+	fmul <4 x float> %73, %11		; <<4 x float>>:74 [#uses=1]
+	fmul <4 x float> %74, %12		; <<4 x float>>:75 [#uses=1]
+	fmul <4 x float> %75, %13		; <<4 x float>>:76 [#uses=1]
+	fmul <4 x float> %76, %14		; <<4 x float>>:77 [#uses=1]
+	fmul <4 x float> %77, %15		; <<4 x float>>:78 [#uses=1]
+	fmul <4 x float> %78, %16		; <<4 x float>>:79 [#uses=1]
+	fmul <4 x float> %79, %17		; <<4 x float>>:80 [#uses=1]
+	fmul <4 x float> %80, %18		; <<4 x float>>:81 [#uses=1]
+	fmul <4 x float> %81, %19		; <<4 x float>>:82 [#uses=1]
+	fmul <4 x float> %82, %20		; <<4 x float>>:83 [#uses=1]
+	fmul <4 x float> %83, %21		; <<4 x float>>:84 [#uses=1]
+	fmul <4 x float> %84, %22		; <<4 x float>>:85 [#uses=1]
+	fmul <4 x float> %85, %23		; <<4 x float>>:86 [#uses=1]
+	fmul <4 x float> %86, %24		; <<4 x float>>:87 [#uses=1]
+	fmul <4 x float> %87, %25		; <<4 x float>>:88 [#uses=1]
+	fmul <4 x float> %88, %26		; <<4 x float>>:89 [#uses=1]
+	fmul <4 x float> %89, %27		; <<4 x float>>:90 [#uses=1]
+	fmul <4 x float> %90, %28		; <<4 x float>>:91 [#uses=1]
+	fmul <4 x float> %91, %29		; <<4 x float>>:92 [#uses=1]
+	fmul <4 x float> %92, %30		; <<4 x float>>:93 [#uses=1]
+	fmul <4 x float> %93, %31		; <<4 x float>>:94 [#uses=1]
+	fmul <4 x float> %94, %32		; <<4 x float>>:95 [#uses=1]
+	fmul <4 x float> %3, %3		; <<4 x float>>:96 [#uses=1]
+	fmul <4 x float> %96, %4		; <<4 x float>>:97 [#uses=1]
+	fmul <4 x float> %97, %5		; <<4 x float>>:98 [#uses=1]
+	fmul <4 x float> %98, %6		; <<4 x float>>:99 [#uses=1]
+	fmul <4 x float> %99, %7		; <<4 x float>>:100 [#uses=1]
+	fmul <4 x float> %100, %8		; <<4 x float>>:101 [#uses=1]
+	fmul <4 x float> %101, %9		; <<4 x float>>:102 [#uses=1]
+	fmul <4 x float> %102, %10		; <<4 x float>>:103 [#uses=1]
+	fmul <4 x float> %103, %11		; <<4 x float>>:104 [#uses=1]
+	fmul <4 x float> %104, %12		; <<4 x float>>:105 [#uses=1]
+	fmul <4 x float> %105, %13		; <<4 x float>>:106 [#uses=1]
+	fmul <4 x float> %106, %14		; <<4 x float>>:107 [#uses=1]
+	fmul <4 x float> %107, %15		; <<4 x float>>:108 [#uses=1]
+	fmul <4 x float> %108, %16		; <<4 x float>>:109 [#uses=1]
+	fmul <4 x float> %109, %17		; <<4 x float>>:110 [#uses=1]
+	fmul <4 x float> %110, %18		; <<4 x float>>:111 [#uses=1]
+	fmul <4 x float> %111, %19		; <<4 x float>>:112 [#uses=1]
+	fmul <4 x float> %112, %20		; <<4 x float>>:113 [#uses=1]
+	fmul <4 x float> %113, %21		; <<4 x float>>:114 [#uses=1]
+	fmul <4 x float> %114, %22		; <<4 x float>>:115 [#uses=1]
+	fmul <4 x float> %115, %23		; <<4 x float>>:116 [#uses=1]
+	fmul <4 x float> %116, %24		; <<4 x float>>:117 [#uses=1]
+	fmul <4 x float> %117, %25		; <<4 x float>>:118 [#uses=1]
+	fmul <4 x float> %118, %26		; <<4 x float>>:119 [#uses=1]
+	fmul <4 x float> %119, %27		; <<4 x float>>:120 [#uses=1]
+	fmul <4 x float> %120, %28		; <<4 x float>>:121 [#uses=1]
+	fmul <4 x float> %121, %29		; <<4 x float>>:122 [#uses=1]
+	fmul <4 x float> %122, %30		; <<4 x float>>:123 [#uses=1]
+	fmul <4 x float> %123, %31		; <<4 x float>>:124 [#uses=1]
+	fmul <4 x float> %124, %32		; <<4 x float>>:125 [#uses=1]
+	fmul <4 x float> %4, %4		; <<4 x float>>:126 [#uses=1]
+	fmul <4 x float> %126, %5		; <<4 x float>>:127 [#uses=1]
+	fmul <4 x float> %127, %6		; <<4 x float>>:128 [#uses=1]
+	fmul <4 x float> %128, %7		; <<4 x float>>:129 [#uses=1]
+	fmul <4 x float> %129, %8		; <<4 x float>>:130 [#uses=1]
+	fmul <4 x float> %130, %9		; <<4 x float>>:131 [#uses=1]
+	fmul <4 x float> %131, %10		; <<4 x float>>:132 [#uses=1]
+	fmul <4 x float> %132, %11		; <<4 x float>>:133 [#uses=1]
+	fmul <4 x float> %133, %12		; <<4 x float>>:134 [#uses=1]
+	fmul <4 x float> %134, %13		; <<4 x float>>:135 [#uses=1]
+	fmul <4 x float> %135, %14		; <<4 x float>>:136 [#uses=1]
+	fmul <4 x float> %136, %15		; <<4 x float>>:137 [#uses=1]
+	fmul <4 x float> %137, %16		; <<4 x float>>:138 [#uses=1]
+	fmul <4 x float> %138, %17		; <<4 x float>>:139 [#uses=1]
+	fmul <4 x float> %139, %18		; <<4 x float>>:140 [#uses=1]
+	fmul <4 x float> %140, %19		; <<4 x float>>:141 [#uses=1]
+	fmul <4 x float> %141, %20		; <<4 x float>>:142 [#uses=1]
+	fmul <4 x float> %142, %21		; <<4 x float>>:143 [#uses=1]
+	fmul <4 x float> %143, %22		; <<4 x float>>:144 [#uses=1]
+	fmul <4 x float> %144, %23		; <<4 x float>>:145 [#uses=1]
+	fmul <4 x float> %145, %24		; <<4 x float>>:146 [#uses=1]
+	fmul <4 x float> %146, %25		; <<4 x float>>:147 [#uses=1]
+	fmul <4 x float> %147, %26		; <<4 x float>>:148 [#uses=1]
+	fmul <4 x float> %148, %27		; <<4 x float>>:149 [#uses=1]
+	fmul <4 x float> %149, %28		; <<4 x float>>:150 [#uses=1]
+	fmul <4 x float> %150, %29		; <<4 x float>>:151 [#uses=1]
+	fmul <4 x float> %151, %30		; <<4 x float>>:152 [#uses=1]
+	fmul <4 x float> %152, %31		; <<4 x float>>:153 [#uses=1]
+	fmul <4 x float> %153, %32		; <<4 x float>>:154 [#uses=1]
+	fmul <4 x float> %5, %5		; <<4 x float>>:155 [#uses=1]
+	fmul <4 x float> %155, %6		; <<4 x float>>:156 [#uses=1]
+	fmul <4 x float> %156, %7		; <<4 x float>>:157 [#uses=1]
+	fmul <4 x float> %157, %8		; <<4 x float>>:158 [#uses=1]
+	fmul <4 x float> %158, %9		; <<4 x float>>:159 [#uses=1]
+	fmul <4 x float> %159, %10		; <<4 x float>>:160 [#uses=1]
+	fmul <4 x float> %160, %11		; <<4 x float>>:161 [#uses=1]
+	fmul <4 x float> %161, %12		; <<4 x float>>:162 [#uses=1]
+	fmul <4 x float> %162, %13		; <<4 x float>>:163 [#uses=1]
+	fmul <4 x float> %163, %14		; <<4 x float>>:164 [#uses=1]
+	fmul <4 x float> %164, %15		; <<4 x float>>:165 [#uses=1]
+	fmul <4 x float> %165, %16		; <<4 x float>>:166 [#uses=1]
+	fmul <4 x float> %166, %17		; <<4 x float>>:167 [#uses=1]
+	fmul <4 x float> %167, %18		; <<4 x float>>:168 [#uses=1]
+	fmul <4 x float> %168, %19		; <<4 x float>>:169 [#uses=1]
+	fmul <4 x float> %169, %20		; <<4 x float>>:170 [#uses=1]
+	fmul <4 x float> %170, %21		; <<4 x float>>:171 [#uses=1]
+	fmul <4 x float> %171, %22		; <<4 x float>>:172 [#uses=1]
+	fmul <4 x float> %172, %23		; <<4 x float>>:173 [#uses=1]
+	fmul <4 x float> %173, %24		; <<4 x float>>:174 [#uses=1]
+	fmul <4 x float> %174, %25		; <<4 x float>>:175 [#uses=1]
+	fmul <4 x float> %175, %26		; <<4 x float>>:176 [#uses=1]
+	fmul <4 x float> %176, %27		; <<4 x float>>:177 [#uses=1]
+	fmul <4 x float> %177, %28		; <<4 x float>>:178 [#uses=1]
+	fmul <4 x float> %178, %29		; <<4 x float>>:179 [#uses=1]
+	fmul <4 x float> %179, %30		; <<4 x float>>:180 [#uses=1]
+	fmul <4 x float> %180, %31		; <<4 x float>>:181 [#uses=1]
+	fmul <4 x float> %181, %32		; <<4 x float>>:182 [#uses=1]
+	fmul <4 x float> %6, %6		; <<4 x float>>:183 [#uses=1]
+	fmul <4 x float> %183, %7		; <<4 x float>>:184 [#uses=1]
+	fmul <4 x float> %184, %8		; <<4 x float>>:185 [#uses=1]
+	fmul <4 x float> %185, %9		; <<4 x float>>:186 [#uses=1]
+	fmul <4 x float> %186, %10		; <<4 x float>>:187 [#uses=1]
+	fmul <4 x float> %187, %11		; <<4 x float>>:188 [#uses=1]
+	fmul <4 x float> %188, %12		; <<4 x float>>:189 [#uses=1]
+	fmul <4 x float> %189, %13		; <<4 x float>>:190 [#uses=1]
+	fmul <4 x float> %190, %14		; <<4 x float>>:191 [#uses=1]
+	fmul <4 x float> %191, %15		; <<4 x float>>:192 [#uses=1]
+	fmul <4 x float> %192, %16		; <<4 x float>>:193 [#uses=1]
+	fmul <4 x float> %193, %17		; <<4 x float>>:194 [#uses=1]
+	fmul <4 x float> %194, %18		; <<4 x float>>:195 [#uses=1]
+	fmul <4 x float> %195, %19		; <<4 x float>>:196 [#uses=1]
+	fmul <4 x float> %196, %20		; <<4 x float>>:197 [#uses=1]
+	fmul <4 x float> %197, %21		; <<4 x float>>:198 [#uses=1]
+	fmul <4 x float> %198, %22		; <<4 x float>>:199 [#uses=1]
+	fmul <4 x float> %199, %23		; <<4 x float>>:200 [#uses=1]
+	fmul <4 x float> %200, %24		; <<4 x float>>:201 [#uses=1]
+	fmul <4 x float> %201, %25		; <<4 x float>>:202 [#uses=1]
+	fmul <4 x float> %202, %26		; <<4 x float>>:203 [#uses=1]
+	fmul <4 x float> %203, %27		; <<4 x float>>:204 [#uses=1]
+	fmul <4 x float> %204, %28		; <<4 x float>>:205 [#uses=1]
+	fmul <4 x float> %205, %29		; <<4 x float>>:206 [#uses=1]
+	fmul <4 x float> %206, %30		; <<4 x float>>:207 [#uses=1]
+	fmul <4 x float> %207, %31		; <<4 x float>>:208 [#uses=1]
+	fmul <4 x float> %208, %32		; <<4 x float>>:209 [#uses=1]
+	fmul <4 x float> %7, %7		; <<4 x float>>:210 [#uses=1]
+	fmul <4 x float> %210, %8		; <<4 x float>>:211 [#uses=1]
+	fmul <4 x float> %211, %9		; <<4 x float>>:212 [#uses=1]
+	fmul <4 x float> %212, %10		; <<4 x float>>:213 [#uses=1]
+	fmul <4 x float> %213, %11		; <<4 x float>>:214 [#uses=1]
+	fmul <4 x float> %214, %12		; <<4 x float>>:215 [#uses=1]
+	fmul <4 x float> %215, %13		; <<4 x float>>:216 [#uses=1]
+	fmul <4 x float> %216, %14		; <<4 x float>>:217 [#uses=1]
+	fmul <4 x float> %217, %15		; <<4 x float>>:218 [#uses=1]
+	fmul <4 x float> %218, %16		; <<4 x float>>:219 [#uses=1]
+	fmul <4 x float> %219, %17		; <<4 x float>>:220 [#uses=1]
+	fmul <4 x float> %220, %18		; <<4 x float>>:221 [#uses=1]
+	fmul <4 x float> %221, %19		; <<4 x float>>:222 [#uses=1]
+	fmul <4 x float> %222, %20		; <<4 x float>>:223 [#uses=1]
+	fmul <4 x float> %223, %21		; <<4 x float>>:224 [#uses=1]
+	fmul <4 x float> %224, %22		; <<4 x float>>:225 [#uses=1]
+	fmul <4 x float> %225, %23		; <<4 x float>>:226 [#uses=1]
+	fmul <4 x float> %226, %24		; <<4 x float>>:227 [#uses=1]
+	fmul <4 x float> %227, %25		; <<4 x float>>:228 [#uses=1]
+	fmul <4 x float> %228, %26		; <<4 x float>>:229 [#uses=1]
+	fmul <4 x float> %229, %27		; <<4 x float>>:230 [#uses=1]
+	fmul <4 x float> %230, %28		; <<4 x float>>:231 [#uses=1]
+	fmul <4 x float> %231, %29		; <<4 x float>>:232 [#uses=1]
+	fmul <4 x float> %232, %30		; <<4 x float>>:233 [#uses=1]
+	fmul <4 x float> %233, %31		; <<4 x float>>:234 [#uses=1]
+	fmul <4 x float> %234, %32		; <<4 x float>>:235 [#uses=1]
+	fmul <4 x float> %8, %8		; <<4 x float>>:236 [#uses=1]
+	fmul <4 x float> %236, %9		; <<4 x float>>:237 [#uses=1]
+	fmul <4 x float> %237, %10		; <<4 x float>>:238 [#uses=1]
+	fmul <4 x float> %238, %11		; <<4 x float>>:239 [#uses=1]
+	fmul <4 x float> %239, %12		; <<4 x float>>:240 [#uses=1]
+	fmul <4 x float> %240, %13		; <<4 x float>>:241 [#uses=1]
+	fmul <4 x float> %241, %14		; <<4 x float>>:242 [#uses=1]
+	fmul <4 x float> %242, %15		; <<4 x float>>:243 [#uses=1]
+	fmul <4 x float> %243, %16		; <<4 x float>>:244 [#uses=1]
+	fmul <4 x float> %244, %17		; <<4 x float>>:245 [#uses=1]
+	fmul <4 x float> %245, %18		; <<4 x float>>:246 [#uses=1]
+	fmul <4 x float> %246, %19		; <<4 x float>>:247 [#uses=1]
+	fmul <4 x float> %247, %20		; <<4 x float>>:248 [#uses=1]
+	fmul <4 x float> %248, %21		; <<4 x float>>:249 [#uses=1]
+	fmul <4 x float> %249, %22		; <<4 x float>>:250 [#uses=1]
+	fmul <4 x float> %250, %23		; <<4 x float>>:251 [#uses=1]
+	fmul <4 x float> %251, %24		; <<4 x float>>:252 [#uses=1]
+	fmul <4 x float> %252, %25		; <<4 x float>>:253 [#uses=1]
+	fmul <4 x float> %253, %26		; <<4 x float>>:254 [#uses=1]
+	fmul <4 x float> %254, %27		; <<4 x float>>:255 [#uses=1]
+	fmul <4 x float> %255, %28		; <<4 x float>>:256 [#uses=1]
+	fmul <4 x float> %256, %29		; <<4 x float>>:257 [#uses=1]
+	fmul <4 x float> %257, %30		; <<4 x float>>:258 [#uses=1]
+	fmul <4 x float> %258, %31		; <<4 x float>>:259 [#uses=1]
+	fmul <4 x float> %259, %32		; <<4 x float>>:260 [#uses=1]
+	fmul <4 x float> %9, %9		; <<4 x float>>:261 [#uses=1]
+	fmul <4 x float> %261, %10		; <<4 x float>>:262 [#uses=1]
+	fmul <4 x float> %262, %11		; <<4 x float>>:263 [#uses=1]
+	fmul <4 x float> %263, %12		; <<4 x float>>:264 [#uses=1]
+	fmul <4 x float> %264, %13		; <<4 x float>>:265 [#uses=1]
+	fmul <4 x float> %265, %14		; <<4 x float>>:266 [#uses=1]
+	fmul <4 x float> %266, %15		; <<4 x float>>:267 [#uses=1]
+	fmul <4 x float> %267, %16		; <<4 x float>>:268 [#uses=1]
+	fmul <4 x float> %268, %17		; <<4 x float>>:269 [#uses=1]
+	fmul <4 x float> %269, %18		; <<4 x float>>:270 [#uses=1]
+	fmul <4 x float> %270, %19		; <<4 x float>>:271 [#uses=1]
+	fmul <4 x float> %271, %20		; <<4 x float>>:272 [#uses=1]
+	fmul <4 x float> %272, %21		; <<4 x float>>:273 [#uses=1]
+	fmul <4 x float> %273, %22		; <<4 x float>>:274 [#uses=1]
+	fmul <4 x float> %274, %23		; <<4 x float>>:275 [#uses=1]
+	fmul <4 x float> %275, %24		; <<4 x float>>:276 [#uses=1]
+	fmul <4 x float> %276, %25		; <<4 x float>>:277 [#uses=1]
+	fmul <4 x float> %277, %26		; <<4 x float>>:278 [#uses=1]
+	fmul <4 x float> %278, %27		; <<4 x float>>:279 [#uses=1]
+	fmul <4 x float> %279, %28		; <<4 x float>>:280 [#uses=1]
+	fmul <4 x float> %280, %29		; <<4 x float>>:281 [#uses=1]
+	fmul <4 x float> %281, %30		; <<4 x float>>:282 [#uses=1]
+	fmul <4 x float> %282, %31		; <<4 x float>>:283 [#uses=1]
+	fmul <4 x float> %283, %32		; <<4 x float>>:284 [#uses=1]
+	fmul <4 x float> %10, %10		; <<4 x float>>:285 [#uses=1]
+	fmul <4 x float> %285, %11		; <<4 x float>>:286 [#uses=1]
+	fmul <4 x float> %286, %12		; <<4 x float>>:287 [#uses=1]
+	fmul <4 x float> %287, %13		; <<4 x float>>:288 [#uses=1]
+	fmul <4 x float> %288, %14		; <<4 x float>>:289 [#uses=1]
+	fmul <4 x float> %289, %15		; <<4 x float>>:290 [#uses=1]
+	fmul <4 x float> %290, %16		; <<4 x float>>:291 [#uses=1]
+	fmul <4 x float> %291, %17		; <<4 x float>>:292 [#uses=1]
+	fmul <4 x float> %292, %18		; <<4 x float>>:293 [#uses=1]
+	fmul <4 x float> %293, %19		; <<4 x float>>:294 [#uses=1]
+	fmul <4 x float> %294, %20		; <<4 x float>>:295 [#uses=1]
+	fmul <4 x float> %295, %21		; <<4 x float>>:296 [#uses=1]
+	fmul <4 x float> %296, %22		; <<4 x float>>:297 [#uses=1]
+	fmul <4 x float> %297, %23		; <<4 x float>>:298 [#uses=1]
+	fmul <4 x float> %298, %24		; <<4 x float>>:299 [#uses=1]
+	fmul <4 x float> %299, %25		; <<4 x float>>:300 [#uses=1]
+	fmul <4 x float> %300, %26		; <<4 x float>>:301 [#uses=1]
+	fmul <4 x float> %301, %27		; <<4 x float>>:302 [#uses=1]
+	fmul <4 x float> %302, %28		; <<4 x float>>:303 [#uses=1]
+	fmul <4 x float> %303, %29		; <<4 x float>>:304 [#uses=1]
+	fmul <4 x float> %304, %30		; <<4 x float>>:305 [#uses=1]
+	fmul <4 x float> %305, %31		; <<4 x float>>:306 [#uses=1]
+	fmul <4 x float> %306, %32		; <<4 x float>>:307 [#uses=1]
+	fmul <4 x float> %11, %11		; <<4 x float>>:308 [#uses=1]
+	fmul <4 x float> %308, %12		; <<4 x float>>:309 [#uses=1]
+	fmul <4 x float> %309, %13		; <<4 x float>>:310 [#uses=1]
+	fmul <4 x float> %310, %14		; <<4 x float>>:311 [#uses=1]
+	fmul <4 x float> %311, %15		; <<4 x float>>:312 [#uses=1]
+	fmul <4 x float> %312, %16		; <<4 x float>>:313 [#uses=1]
+	fmul <4 x float> %313, %17		; <<4 x float>>:314 [#uses=1]
+	fmul <4 x float> %314, %18		; <<4 x float>>:315 [#uses=1]
+	fmul <4 x float> %315, %19		; <<4 x float>>:316 [#uses=1]
+	fmul <4 x float> %316, %20		; <<4 x float>>:317 [#uses=1]
+	fmul <4 x float> %317, %21		; <<4 x float>>:318 [#uses=1]
+	fmul <4 x float> %318, %22		; <<4 x float>>:319 [#uses=1]
+	fmul <4 x float> %319, %23		; <<4 x float>>:320 [#uses=1]
+	fmul <4 x float> %320, %24		; <<4 x float>>:321 [#uses=1]
+	fmul <4 x float> %321, %25		; <<4 x float>>:322 [#uses=1]
+	fmul <4 x float> %322, %26		; <<4 x float>>:323 [#uses=1]
+	fmul <4 x float> %323, %27		; <<4 x float>>:324 [#uses=1]
+	fmul <4 x float> %324, %28		; <<4 x float>>:325 [#uses=1]
+	fmul <4 x float> %325, %29		; <<4 x float>>:326 [#uses=1]
+	fmul <4 x float> %326, %30		; <<4 x float>>:327 [#uses=1]
+	fmul <4 x float> %327, %31		; <<4 x float>>:328 [#uses=1]
+	fmul <4 x float> %328, %32		; <<4 x float>>:329 [#uses=1]
+	fmul <4 x float> %12, %12		; <<4 x float>>:330 [#uses=1]
+	fmul <4 x float> %330, %13		; <<4 x float>>:331 [#uses=1]
+	fmul <4 x float> %331, %14		; <<4 x float>>:332 [#uses=1]
+	fmul <4 x float> %332, %15		; <<4 x float>>:333 [#uses=1]
+	fmul <4 x float> %333, %16		; <<4 x float>>:334 [#uses=1]
+	fmul <4 x float> %334, %17		; <<4 x float>>:335 [#uses=1]
+	fmul <4 x float> %335, %18		; <<4 x float>>:336 [#uses=1]
+	fmul <4 x float> %336, %19		; <<4 x float>>:337 [#uses=1]
+	fmul <4 x float> %337, %20		; <<4 x float>>:338 [#uses=1]
+	fmul <4 x float> %338, %21		; <<4 x float>>:339 [#uses=1]
+	fmul <4 x float> %339, %22		; <<4 x float>>:340 [#uses=1]
+	fmul <4 x float> %340, %23		; <<4 x float>>:341 [#uses=1]
+	fmul <4 x float> %341, %24		; <<4 x float>>:342 [#uses=1]
+	fmul <4 x float> %342, %25		; <<4 x float>>:343 [#uses=1]
+	fmul <4 x float> %343, %26		; <<4 x float>>:344 [#uses=1]
+	fmul <4 x float> %344, %27		; <<4 x float>>:345 [#uses=1]
+	fmul <4 x float> %345, %28		; <<4 x float>>:346 [#uses=1]
+	fmul <4 x float> %346, %29		; <<4 x float>>:347 [#uses=1]
+	fmul <4 x float> %347, %30		; <<4 x float>>:348 [#uses=1]
+	fmul <4 x float> %348, %31		; <<4 x float>>:349 [#uses=1]
+	fmul <4 x float> %349, %32		; <<4 x float>>:350 [#uses=1]
+	fmul <4 x float> %13, %13		; <<4 x float>>:351 [#uses=1]
+	fmul <4 x float> %351, %14		; <<4 x float>>:352 [#uses=1]
+	fmul <4 x float> %352, %15		; <<4 x float>>:353 [#uses=1]
+	fmul <4 x float> %353, %16		; <<4 x float>>:354 [#uses=1]
+	fmul <4 x float> %354, %17		; <<4 x float>>:355 [#uses=1]
+	fmul <4 x float> %355, %18		; <<4 x float>>:356 [#uses=1]
+	fmul <4 x float> %356, %19		; <<4 x float>>:357 [#uses=1]
+	fmul <4 x float> %357, %20		; <<4 x float>>:358 [#uses=1]
+	fmul <4 x float> %358, %21		; <<4 x float>>:359 [#uses=1]
+	fmul <4 x float> %359, %22		; <<4 x float>>:360 [#uses=1]
+	fmul <4 x float> %360, %23		; <<4 x float>>:361 [#uses=1]
+	fmul <4 x float> %361, %24		; <<4 x float>>:362 [#uses=1]
+	fmul <4 x float> %362, %25		; <<4 x float>>:363 [#uses=1]
+	fmul <4 x float> %363, %26		; <<4 x float>>:364 [#uses=1]
+	fmul <4 x float> %364, %27		; <<4 x float>>:365 [#uses=1]
+	fmul <4 x float> %365, %28		; <<4 x float>>:366 [#uses=1]
+	fmul <4 x float> %366, %29		; <<4 x float>>:367 [#uses=1]
+	fmul <4 x float> %367, %30		; <<4 x float>>:368 [#uses=1]
+	fmul <4 x float> %368, %31		; <<4 x float>>:369 [#uses=1]
+	fmul <4 x float> %369, %32		; <<4 x float>>:370 [#uses=1]
+	fmul <4 x float> %14, %14		; <<4 x float>>:371 [#uses=1]
+	fmul <4 x float> %371, %15		; <<4 x float>>:372 [#uses=1]
+	fmul <4 x float> %372, %16		; <<4 x float>>:373 [#uses=1]
+	fmul <4 x float> %373, %17		; <<4 x float>>:374 [#uses=1]
+	fmul <4 x float> %374, %18		; <<4 x float>>:375 [#uses=1]
+	fmul <4 x float> %375, %19		; <<4 x float>>:376 [#uses=1]
+	fmul <4 x float> %376, %20		; <<4 x float>>:377 [#uses=1]
+	fmul <4 x float> %377, %21		; <<4 x float>>:378 [#uses=1]
+	fmul <4 x float> %378, %22		; <<4 x float>>:379 [#uses=1]
+	fmul <4 x float> %379, %23		; <<4 x float>>:380 [#uses=1]
+	fmul <4 x float> %380, %24		; <<4 x float>>:381 [#uses=1]
+	fmul <4 x float> %381, %25		; <<4 x float>>:382 [#uses=1]
+	fmul <4 x float> %382, %26		; <<4 x float>>:383 [#uses=1]
+	fmul <4 x float> %383, %27		; <<4 x float>>:384 [#uses=1]
+	fmul <4 x float> %384, %28		; <<4 x float>>:385 [#uses=1]
+	fmul <4 x float> %385, %29		; <<4 x float>>:386 [#uses=1]
+	fmul <4 x float> %386, %30		; <<4 x float>>:387 [#uses=1]
+	fmul <4 x float> %387, %31		; <<4 x float>>:388 [#uses=1]
+	fmul <4 x float> %388, %32		; <<4 x float>>:389 [#uses=1]
+	fmul <4 x float> %15, %15		; <<4 x float>>:390 [#uses=1]
+	fmul <4 x float> %390, %16		; <<4 x float>>:391 [#uses=1]
+	fmul <4 x float> %391, %17		; <<4 x float>>:392 [#uses=1]
+	fmul <4 x float> %392, %18		; <<4 x float>>:393 [#uses=1]
+	fmul <4 x float> %393, %19		; <<4 x float>>:394 [#uses=1]
+	fmul <4 x float> %394, %20		; <<4 x float>>:395 [#uses=1]
+	fmul <4 x float> %395, %21		; <<4 x float>>:396 [#uses=1]
+	fmul <4 x float> %396, %22		; <<4 x float>>:397 [#uses=1]
+	fmul <4 x float> %397, %23		; <<4 x float>>:398 [#uses=1]
+	fmul <4 x float> %398, %24		; <<4 x float>>:399 [#uses=1]
+	fmul <4 x float> %399, %25		; <<4 x float>>:400 [#uses=1]
+	fmul <4 x float> %400, %26		; <<4 x float>>:401 [#uses=1]
+	fmul <4 x float> %401, %27		; <<4 x float>>:402 [#uses=1]
+	fmul <4 x float> %402, %28		; <<4 x float>>:403 [#uses=1]
+	fmul <4 x float> %403, %29		; <<4 x float>>:404 [#uses=1]
+	fmul <4 x float> %404, %30		; <<4 x float>>:405 [#uses=1]
+	fmul <4 x float> %405, %31		; <<4 x float>>:406 [#uses=1]
+	fmul <4 x float> %406, %32		; <<4 x float>>:407 [#uses=1]
+	fmul <4 x float> %16, %16		; <<4 x float>>:408 [#uses=1]
+	fmul <4 x float> %408, %17		; <<4 x float>>:409 [#uses=1]
+	fmul <4 x float> %409, %18		; <<4 x float>>:410 [#uses=1]
+	fmul <4 x float> %410, %19		; <<4 x float>>:411 [#uses=1]
+	fmul <4 x float> %411, %20		; <<4 x float>>:412 [#uses=1]
+	fmul <4 x float> %412, %21		; <<4 x float>>:413 [#uses=1]
+	fmul <4 x float> %413, %22		; <<4 x float>>:414 [#uses=1]
+	fmul <4 x float> %414, %23		; <<4 x float>>:415 [#uses=1]
+	fmul <4 x float> %415, %24		; <<4 x float>>:416 [#uses=1]
+	fmul <4 x float> %416, %25		; <<4 x float>>:417 [#uses=1]
+	fmul <4 x float> %417, %26		; <<4 x float>>:418 [#uses=1]
+	fmul <4 x float> %418, %27		; <<4 x float>>:419 [#uses=1]
+	fmul <4 x float> %419, %28		; <<4 x float>>:420 [#uses=1]
+	fmul <4 x float> %420, %29		; <<4 x float>>:421 [#uses=1]
+	fmul <4 x float> %421, %30		; <<4 x float>>:422 [#uses=1]
+	fmul <4 x float> %422, %31		; <<4 x float>>:423 [#uses=1]
+	fmul <4 x float> %423, %32		; <<4 x float>>:424 [#uses=1]
+	fmul <4 x float> %17, %17		; <<4 x float>>:425 [#uses=1]
+	fmul <4 x float> %425, %18		; <<4 x float>>:426 [#uses=1]
+	fmul <4 x float> %426, %19		; <<4 x float>>:427 [#uses=1]
+	fmul <4 x float> %427, %20		; <<4 x float>>:428 [#uses=1]
+	fmul <4 x float> %428, %21		; <<4 x float>>:429 [#uses=1]
+	fmul <4 x float> %429, %22		; <<4 x float>>:430 [#uses=1]
+	fmul <4 x float> %430, %23		; <<4 x float>>:431 [#uses=1]
+	fmul <4 x float> %431, %24		; <<4 x float>>:432 [#uses=1]
+	fmul <4 x float> %432, %25		; <<4 x float>>:433 [#uses=1]
+	fmul <4 x float> %433, %26		; <<4 x float>>:434 [#uses=1]
+	fmul <4 x float> %434, %27		; <<4 x float>>:435 [#uses=1]
+	fmul <4 x float> %435, %28		; <<4 x float>>:436 [#uses=1]
+	fmul <4 x float> %436, %29		; <<4 x float>>:437 [#uses=1]
+	fmul <4 x float> %437, %30		; <<4 x float>>:438 [#uses=1]
+	fmul <4 x float> %438, %31		; <<4 x float>>:439 [#uses=1]
+	fmul <4 x float> %439, %32		; <<4 x float>>:440 [#uses=1]
+	fmul <4 x float> %18, %18		; <<4 x float>>:441 [#uses=1]
+	fmul <4 x float> %441, %19		; <<4 x float>>:442 [#uses=1]
+	fmul <4 x float> %442, %20		; <<4 x float>>:443 [#uses=1]
+	fmul <4 x float> %443, %21		; <<4 x float>>:444 [#uses=1]
+	fmul <4 x float> %444, %22		; <<4 x float>>:445 [#uses=1]
+	fmul <4 x float> %445, %23		; <<4 x float>>:446 [#uses=1]
+	fmul <4 x float> %446, %24		; <<4 x float>>:447 [#uses=1]
+	fmul <4 x float> %447, %25		; <<4 x float>>:448 [#uses=1]
+	fmul <4 x float> %448, %26		; <<4 x float>>:449 [#uses=1]
+	fmul <4 x float> %449, %27		; <<4 x float>>:450 [#uses=1]
+	fmul <4 x float> %450, %28		; <<4 x float>>:451 [#uses=1]
+	fmul <4 x float> %451, %29		; <<4 x float>>:452 [#uses=1]
+	fmul <4 x float> %452, %30		; <<4 x float>>:453 [#uses=1]
+	fmul <4 x float> %453, %31		; <<4 x float>>:454 [#uses=1]
+	fmul <4 x float> %454, %32		; <<4 x float>>:455 [#uses=1]
+	fmul <4 x float> %19, %19		; <<4 x float>>:456 [#uses=1]
+	fmul <4 x float> %456, %20		; <<4 x float>>:457 [#uses=1]
+	fmul <4 x float> %457, %21		; <<4 x float>>:458 [#uses=1]
+	fmul <4 x float> %458, %22		; <<4 x float>>:459 [#uses=1]
+	fmul <4 x float> %459, %23		; <<4 x float>>:460 [#uses=1]
+	fmul <4 x float> %460, %24		; <<4 x float>>:461 [#uses=1]
+	fmul <4 x float> %461, %25		; <<4 x float>>:462 [#uses=1]
+	fmul <4 x float> %462, %26		; <<4 x float>>:463 [#uses=1]
+	fmul <4 x float> %463, %27		; <<4 x float>>:464 [#uses=1]
+	fmul <4 x float> %464, %28		; <<4 x float>>:465 [#uses=1]
+	fmul <4 x float> %465, %29		; <<4 x float>>:466 [#uses=1]
+	fmul <4 x float> %466, %30		; <<4 x float>>:467 [#uses=1]
+	fmul <4 x float> %467, %31		; <<4 x float>>:468 [#uses=1]
+	fmul <4 x float> %468, %32		; <<4 x float>>:469 [#uses=1]
+	fmul <4 x float> %20, %20		; <<4 x float>>:470 [#uses=1]
+	fmul <4 x float> %470, %21		; <<4 x float>>:471 [#uses=1]
+	fmul <4 x float> %471, %22		; <<4 x float>>:472 [#uses=1]
+	fmul <4 x float> %472, %23		; <<4 x float>>:473 [#uses=1]
+	fmul <4 x float> %473, %24		; <<4 x float>>:474 [#uses=1]
+	fmul <4 x float> %474, %25		; <<4 x float>>:475 [#uses=1]
+	fmul <4 x float> %475, %26		; <<4 x float>>:476 [#uses=1]
+	fmul <4 x float> %476, %27		; <<4 x float>>:477 [#uses=1]
+	fmul <4 x float> %477, %28		; <<4 x float>>:478 [#uses=1]
+	fmul <4 x float> %478, %29		; <<4 x float>>:479 [#uses=1]
+	fmul <4 x float> %479, %30		; <<4 x float>>:480 [#uses=1]
+	fmul <4 x float> %480, %31		; <<4 x float>>:481 [#uses=1]
+	fmul <4 x float> %481, %32		; <<4 x float>>:482 [#uses=1]
+	fmul <4 x float> %21, %21		; <<4 x float>>:483 [#uses=1]
+	fmul <4 x float> %483, %22		; <<4 x float>>:484 [#uses=1]
+	fmul <4 x float> %484, %23		; <<4 x float>>:485 [#uses=1]
+	fmul <4 x float> %485, %24		; <<4 x float>>:486 [#uses=1]
+	fmul <4 x float> %486, %25		; <<4 x float>>:487 [#uses=1]
+	fmul <4 x float> %487, %26		; <<4 x float>>:488 [#uses=1]
+	fmul <4 x float> %488, %27		; <<4 x float>>:489 [#uses=1]
+	fmul <4 x float> %489, %28		; <<4 x float>>:490 [#uses=1]
+	fmul <4 x float> %490, %29		; <<4 x float>>:491 [#uses=1]
+	fmul <4 x float> %491, %30		; <<4 x float>>:492 [#uses=1]
+	fmul <4 x float> %492, %31		; <<4 x float>>:493 [#uses=1]
+	fmul <4 x float> %493, %32		; <<4 x float>>:494 [#uses=1]
+	fmul <4 x float> %22, %22		; <<4 x float>>:495 [#uses=1]
+	fmul <4 x float> %495, %23		; <<4 x float>>:496 [#uses=1]
+	fmul <4 x float> %496, %24		; <<4 x float>>:497 [#uses=1]
+	fmul <4 x float> %497, %25		; <<4 x float>>:498 [#uses=1]
+	fmul <4 x float> %498, %26		; <<4 x float>>:499 [#uses=1]
+	fmul <4 x float> %499, %27		; <<4 x float>>:500 [#uses=1]
+	fmul <4 x float> %500, %28		; <<4 x float>>:501 [#uses=1]
+	fmul <4 x float> %501, %29		; <<4 x float>>:502 [#uses=1]
+	fmul <4 x float> %502, %30		; <<4 x float>>:503 [#uses=1]
+	fmul <4 x float> %503, %31		; <<4 x float>>:504 [#uses=1]
+	fmul <4 x float> %504, %32		; <<4 x float>>:505 [#uses=1]
+	fmul <4 x float> %23, %23		; <<4 x float>>:506 [#uses=1]
+	fmul <4 x float> %506, %24		; <<4 x float>>:507 [#uses=1]
+	fmul <4 x float> %507, %25		; <<4 x float>>:508 [#uses=1]
+	fmul <4 x float> %508, %26		; <<4 x float>>:509 [#uses=1]
+	fmul <4 x float> %509, %27		; <<4 x float>>:510 [#uses=1]
+	fmul <4 x float> %510, %28		; <<4 x float>>:511 [#uses=1]
+	fmul <4 x float> %511, %29		; <<4 x float>>:512 [#uses=1]
+	fmul <4 x float> %512, %30		; <<4 x float>>:513 [#uses=1]
+	fmul <4 x float> %513, %31		; <<4 x float>>:514 [#uses=1]
+	fmul <4 x float> %514, %32		; <<4 x float>>:515 [#uses=1]
+	fmul <4 x float> %24, %24		; <<4 x float>>:516 [#uses=1]
+	fmul <4 x float> %516, %25		; <<4 x float>>:517 [#uses=1]
+	fmul <4 x float> %517, %26		; <<4 x float>>:518 [#uses=1]
+	fmul <4 x float> %518, %27		; <<4 x float>>:519 [#uses=1]
+	fmul <4 x float> %519, %28		; <<4 x float>>:520 [#uses=1]
+	fmul <4 x float> %520, %29		; <<4 x float>>:521 [#uses=1]
+	fmul <4 x float> %521, %30		; <<4 x float>>:522 [#uses=1]
+	fmul <4 x float> %522, %31		; <<4 x float>>:523 [#uses=1]
+	fmul <4 x float> %523, %32		; <<4 x float>>:524 [#uses=1]
+	fmul <4 x float> %25, %25		; <<4 x float>>:525 [#uses=1]
+	fmul <4 x float> %525, %26		; <<4 x float>>:526 [#uses=1]
+	fmul <4 x float> %526, %27		; <<4 x float>>:527 [#uses=1]
+	fmul <4 x float> %527, %28		; <<4 x float>>:528 [#uses=1]
+	fmul <4 x float> %528, %29		; <<4 x float>>:529 [#uses=1]
+	fmul <4 x float> %529, %30		; <<4 x float>>:530 [#uses=1]
+	fmul <4 x float> %530, %31		; <<4 x float>>:531 [#uses=1]
+	fmul <4 x float> %531, %32		; <<4 x float>>:532 [#uses=1]
+	fmul <4 x float> %26, %26		; <<4 x float>>:533 [#uses=1]
+	fmul <4 x float> %533, %27		; <<4 x float>>:534 [#uses=1]
+	fmul <4 x float> %534, %28		; <<4 x float>>:535 [#uses=1]
+	fmul <4 x float> %535, %29		; <<4 x float>>:536 [#uses=1]
+	fmul <4 x float> %536, %30		; <<4 x float>>:537 [#uses=1]
+	fmul <4 x float> %537, %31		; <<4 x float>>:538 [#uses=1]
+	fmul <4 x float> %538, %32		; <<4 x float>>:539 [#uses=1]
+	fmul <4 x float> %27, %27		; <<4 x float>>:540 [#uses=1]
+	fmul <4 x float> %540, %28		; <<4 x float>>:541 [#uses=1]
+	fmul <4 x float> %541, %29		; <<4 x float>>:542 [#uses=1]
+	fmul <4 x float> %542, %30		; <<4 x float>>:543 [#uses=1]
+	fmul <4 x float> %543, %31		; <<4 x float>>:544 [#uses=1]
+	fmul <4 x float> %544, %32		; <<4 x float>>:545 [#uses=1]
+	fmul <4 x float> %28, %28		; <<4 x float>>:546 [#uses=1]
+	fmul <4 x float> %546, %29		; <<4 x float>>:547 [#uses=1]
+	fmul <4 x float> %547, %30		; <<4 x float>>:548 [#uses=1]
+	fmul <4 x float> %548, %31		; <<4 x float>>:549 [#uses=1]
+	fmul <4 x float> %549, %32		; <<4 x float>>:550 [#uses=1]
+	fmul <4 x float> %29, %29		; <<4 x float>>:551 [#uses=1]
+	fmul <4 x float> %551, %30		; <<4 x float>>:552 [#uses=1]
+	fmul <4 x float> %552, %31		; <<4 x float>>:553 [#uses=1]
+	fmul <4 x float> %553, %32		; <<4 x float>>:554 [#uses=1]
+	fmul <4 x float> %30, %30		; <<4 x float>>:555 [#uses=1]
+	fmul <4 x float> %555, %31		; <<4 x float>>:556 [#uses=1]
+	fmul <4 x float> %556, %32		; <<4 x float>>:557 [#uses=1]
+	fmul <4 x float> %31, %31		; <<4 x float>>:558 [#uses=1]
+	fmul <4 x float> %558, %32		; <<4 x float>>:559 [#uses=1]
+	fmul <4 x float> %32, %32		; <<4 x float>>:560 [#uses=1]
+	fadd <4 x float> %64, %64		; <<4 x float>>:561 [#uses=1]
+	fadd <4 x float> %561, %64		; <<4 x float>>:562 [#uses=1]
+	fadd <4 x float> %562, %95		; <<4 x float>>:563 [#uses=1]
+	fadd <4 x float> %563, %125		; <<4 x float>>:564 [#uses=1]
+	fadd <4 x float> %564, %154		; <<4 x float>>:565 [#uses=1]
+	fadd <4 x float> %565, %182		; <<4 x float>>:566 [#uses=1]
+	fadd <4 x float> %566, %209		; <<4 x float>>:567 [#uses=1]
+	fadd <4 x float> %567, %235		; <<4 x float>>:568 [#uses=1]
+	fadd <4 x float> %568, %260		; <<4 x float>>:569 [#uses=1]
+	fadd <4 x float> %569, %284		; <<4 x float>>:570 [#uses=1]
+	fadd <4 x float> %570, %307		; <<4 x float>>:571 [#uses=1]
+	fadd <4 x float> %571, %329		; <<4 x float>>:572 [#uses=1]
+	fadd <4 x float> %572, %350		; <<4 x float>>:573 [#uses=1]
+	fadd <4 x float> %573, %370		; <<4 x float>>:574 [#uses=1]
+	fadd <4 x float> %574, %389		; <<4 x float>>:575 [#uses=1]
+	fadd <4 x float> %575, %407		; <<4 x float>>:576 [#uses=1]
+	fadd <4 x float> %576, %424		; <<4 x float>>:577 [#uses=1]
+	fadd <4 x float> %577, %440		; <<4 x float>>:578 [#uses=1]
+	fadd <4 x float> %578, %455		; <<4 x float>>:579 [#uses=1]
+	fadd <4 x float> %579, %469		; <<4 x float>>:580 [#uses=1]
+	fadd <4 x float> %580, %482		; <<4 x float>>:581 [#uses=1]
+	fadd <4 x float> %581, %494		; <<4 x float>>:582 [#uses=1]
+	fadd <4 x float> %582, %505		; <<4 x float>>:583 [#uses=1]
+	fadd <4 x float> %583, %515		; <<4 x float>>:584 [#uses=1]
+	fadd <4 x float> %584, %524		; <<4 x float>>:585 [#uses=1]
+	fadd <4 x float> %585, %532		; <<4 x float>>:586 [#uses=1]
+	fadd <4 x float> %586, %539		; <<4 x float>>:587 [#uses=1]
+	fadd <4 x float> %587, %545		; <<4 x float>>:588 [#uses=1]
+	fadd <4 x float> %588, %550		; <<4 x float>>:589 [#uses=1]
+	fadd <4 x float> %589, %554		; <<4 x float>>:590 [#uses=1]
+	fadd <4 x float> %590, %557		; <<4 x float>>:591 [#uses=1]
+	fadd <4 x float> %591, %559		; <<4 x float>>:592 [#uses=1]
+	fadd <4 x float> %592, %560		; <<4 x float>>:593 [#uses=1]
+	store <4 x float> %593, <4 x float>* @0, align 1
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2008-07-22-CombinerCrash.ll b/final/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
new file mode 100644
index 00000000000..0f6714579bc
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-07-22-CombinerCrash.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; PR2566
+
+external global i16		; <i16*>:0 [#uses=1]
+external global <4 x i16>		; <<4 x i16>*>:1 [#uses=1]
+
+declare void @abort()
+
+define void @t() nounwind {
+	load i16* @0		; <i16>:1 [#uses=1]
+	zext i16 %1 to i64		; <i64>:2 [#uses=1]
+	bitcast i64 %2 to <4 x i16>		; <<4 x i16>>:3 [#uses=1]
+	shufflevector <4 x i16> %3, <4 x i16> undef, <4 x i32> zeroinitializer		; <<4 x i16>>:4 [#uses=1]
+	store <4 x i16> %4, <4 x i16>* @1
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2008-07-23-VSetCC.ll b/final/test/CodeGen/X86/2008-07-23-VSetCC.ll
new file mode 100644
index 00000000000..684ca5c89fd
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-07-23-VSetCC.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 -mcpu=pentium
+; PR2575
+
+define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x) nounwind  {
+	br i1 false, label %bb.nph, label %._crit_edge
+
+bb.nph:		; preds = %bb.nph, %0
+	%X = icmp sgt <4 x i32> zeroinitializer, < i32 -128, i32 -128, i32 -128, i32 -128 >		; <<4 x i32>>:1 [#uses=1]
+        sext <4 x i1> %X to <4 x i32>
+	extractelement <4 x i32> %1, i32 3		; <i32>:2 [#uses=1]
+	lshr i32 %2, 31		; <i32>:3 [#uses=1]
+	trunc i32 %3 to i1		; <i1>:4 [#uses=1]
+	select i1 %4, i32 -1, i32 0		; <i32>:5 [#uses=1]
+	insertelement <4 x i32> zeroinitializer, i32 %5, i32 3		; <<4 x i32>>:6 [#uses=1]
+	and <4 x i32> zeroinitializer, %6		; <<4 x i32>>:7 [#uses=1]
+	bitcast <4 x i32> %7 to <4 x float>		; <<4 x float>>:8 [#uses=1]
+	fmul <4 x float> zeroinitializer, %8		; <<4 x float>>:9 [#uses=1]
+	bitcast <4 x float> %9 to <4 x i32>		; <<4 x i32>>:10 [#uses=1]
+	or <4 x i32> %10, zeroinitializer		; <<4 x i32>>:11 [#uses=1]
+	bitcast <4 x i32> %11 to <4 x float>		; <<4 x float>>:12 [#uses=1]
+	fmul <4 x float> %12, < float 1.000000e+02, float 1.000000e+02, float 1.000000e+02, float 1.000000e+02 >		; <<4 x float>>:13 [#uses=1]
+	fsub <4 x float> %13, < float 1.000000e+02, float 1.000000e+02, float 1.000000e+02, float 1.000000e+02 >		; <<4 x float>>:14 [#uses=1]
+	extractelement <4 x float> %14, i32 3		; <float>:15 [#uses=1]
+	call float @fmaxf( float 0.000000e+00, float %15 )		; <float>:16 [#uses=0]
+	br label %bb.nph
+
+._crit_edge:		; preds = %0
+	ret void
+}
+
+
+declare float @fmaxf(float, float)
diff --git a/final/test/CodeGen/X86/2008-08-05-SpillerBug.ll b/final/test/CodeGen/X86/2008-08-05-SpillerBug.ll
new file mode 100644
index 00000000000..ef90498a02c
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-08-05-SpillerBug.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -pre-RA-sched=list-burr -disable-fp-elim -stats |& grep asm-printer | grep 55
+; PR2568
+
+@g_3 = external global i16		; <i16*> [#uses=1]
+@g_5 = external global i32		; <i32*> [#uses=3]
+
+declare i32 @func_15(i16 signext , i16 signext , i32) nounwind 
+
+define void @func_9_entry_2E_ce(i8 %p_11) nounwind {
+newFuncRoot:
+	br label %entry.ce
+
+entry.ce.ret.exitStub:		; preds = %entry.ce
+	ret void
+
+entry.ce:		; preds = %newFuncRoot
+	load i16* @g_3, align 2		; <i16>:0 [#uses=1]
+	icmp sgt i16 %0, 0		; <i1>:1 [#uses=1]
+	zext i1 %1 to i32		; <i32>:2 [#uses=1]
+	load i32* @g_5, align 4		; <i32>:3 [#uses=4]
+	icmp ugt i32 %2, %3		; <i1>:4 [#uses=1]
+	zext i1 %4 to i32		; <i32>:5 [#uses=1]
+	icmp eq i32 %3, 0		; <i1>:6 [#uses=1]
+	%.0 = select i1 %6, i32 1, i32 %3		; <i32> [#uses=1]
+	urem i32 1, %.0		; <i32>:7 [#uses=2]
+	sext i8 %p_11 to i16		; <i16>:8 [#uses=1]
+	trunc i32 %3 to i16		; <i16>:9 [#uses=1]
+	tail call i32 @func_15( i16 signext  %8, i16 signext  %9, i32 1 ) nounwind 		; <i32>:10 [#uses=0]
+	load i32* @g_5, align 4		; <i32>:11 [#uses=1]
+	trunc i32 %11 to i16		; <i16>:12 [#uses=1]
+	tail call i32 @func_15( i16 signext  %12, i16 signext  1, i32 %7 ) nounwind 		; <i32>:13 [#uses=0]
+	sext i8 %p_11 to i32		; <i32>:14 [#uses=1]
+	%p_11.lobit = lshr i8 %p_11, 7		; <i8> [#uses=1]
+	%tmp = zext i8 %p_11.lobit to i32		; <i32> [#uses=1]
+	%tmp.not = xor i32 %tmp, 1		; <i32> [#uses=1]
+	%.015 = ashr i32 %14, %tmp.not		; <i32> [#uses=2]
+	icmp eq i32 %.015, 0		; <i1>:15 [#uses=1]
+	%.016 = select i1 %15, i32 1, i32 %.015		; <i32> [#uses=1]
+	udiv i32 %7, %.016		; <i32>:16 [#uses=1]
+	icmp ult i32 %5, %16		; <i1>:17 [#uses=1]
+	zext i1 %17 to i32		; <i32>:18 [#uses=1]
+	store i32 %18, i32* @g_5, align 4
+	br label %entry.ce.ret.exitStub
+}
diff --git a/final/test/CodeGen/X86/2008-08-06-CmpStride.ll b/final/test/CodeGen/X86/2008-08-06-CmpStride.ll
new file mode 100644
index 00000000000..99cb8569b3f
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-08-06-CmpStride.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=x86-64 < %s -o - | grep {cmpl	\\$\[1\], %}
+
+@.str = internal constant [4 x i8] c"%d\0A\00"
+
+declare i32 @printf(i8* noalias , ...) nounwind
+
+define i32 @main() nounwind {
+entry:
+        br label %forbody
+
+forbody:
+        %i.0 = phi i32 [ 0, %entry ], [ %inc, %forbody ]                ; <i32>[#uses=3]
+        %sub14 = sub i32 1027, %i.0             ; <i32> [#uses=1]
+        %mul15 = mul i32 %sub14, 10             ; <i32> [#uses=1]
+        %add166 = or i32 %mul15, 1              ; <i32> [#uses=1] *
+        call i32 (i8*, ...)* @printf( i8* noalias  getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
+        %inc = add i32 %i.0, 1          ; <i32> [#uses=3]
+        %cmp = icmp ne i32 %inc, 1027          ; <i1> [#uses=1]
+        br i1 %cmp, label %forbody, label %afterfor
+
+afterfor:               ; preds = %forcond
+        ret i32 0
+}
diff --git a/final/test/CodeGen/X86/2008-08-06-RewriterBug.ll b/final/test/CodeGen/X86/2008-08-06-RewriterBug.ll
new file mode 100644
index 00000000000..4428035cc82
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-08-06-RewriterBug.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=x86
+; PR2596
+
+@data = external global [400 x i64]		; <[400 x i64]*> [#uses=5]
+
+define void @foo(double* noalias, double* noalias) {
+	load i64* getelementptr ([400 x i64]* @data, i32 0, i64 200), align 4		; <i64>:3 [#uses=1]
+	load i64* getelementptr ([400 x i64]* @data, i32 0, i64 199), align 4		; <i64>:4 [#uses=1]
+	load i64* getelementptr ([400 x i64]* @data, i32 0, i64 198), align 4		; <i64>:5 [#uses=2]
+	load i64* getelementptr ([400 x i64]* @data, i32 0, i64 197), align 4		; <i64>:6 [#uses=1]
+	br i1 false, label %28, label %7
+
+; <label>:7		; preds = %2
+	load double** getelementptr (double** bitcast ([400 x i64]* @data to double**), i64 180), align 8		; <double*>:8 [#uses=1]
+	bitcast double* %8 to double*		; <double*>:9 [#uses=1]
+	ptrtoint double* %9 to i64		; <i64>:10 [#uses=1]
+	mul i64 %4, %3		; <i64>:11 [#uses=1]
+	add i64 0, %11		; <i64>:12 [#uses=1]
+	shl i64 %12, 3		; <i64>:13 [#uses=1]
+	sub i64 %10, %13		; <i64>:14 [#uses=1]
+	add i64 %5, 0		; <i64>:15 [#uses=1]
+	shl i64 %15, 3		; <i64>:16 [#uses=1]
+	bitcast i64 %16 to i64		; <i64>:17 [#uses=1]
+	mul i64 %6, %5		; <i64>:18 [#uses=1]
+	add i64 0, %18		; <i64>:19 [#uses=1]
+	shl i64 %19, 3		; <i64>:20 [#uses=1]
+	sub i64 %17, %20		; <i64>:21 [#uses=1]
+	add i64 0, %21		; <i64>:22 [#uses=1]
+	add i64 0, %14		; <i64>:23 [#uses=1]
+	br label %24
+
+; <label>:24		; preds = %24, %7
+	phi i64 [ 0, %24 ], [ %22, %7 ]		; <i64>:25 [#uses=1]
+	phi i64 [ 0, %24 ], [ %23, %7 ]		; <i64>:26 [#uses=0]
+	add i64 %25, 24		; <i64>:27 [#uses=0]
+	br label %24
+
+; <label>:28		; preds = %2
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll b/final/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll
new file mode 100644
index 00000000000..32f6ca0ce08
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movzbl
+
+define i32 @foo(<4 x float> %a, <4 x float> %b) nounwind {
+entry:
+	tail call i32 @llvm.x86.sse.ucomige.ss( <4 x float> %a, <4 x float> %b ) nounwind readnone
+	ret i32 %0
+}
+
+declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
diff --git a/final/test/CodeGen/X86/2008-08-19-SubAndFetch.ll b/final/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
new file mode 100644
index 00000000000..8475e8d354e
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-08-19-SubAndFetch.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+@var = external global i64		; <i64*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+; CHECK: main:
+; CHECK: lock
+; CHECK: decq
+	tail call i64 @llvm.atomic.load.sub.i64.p0i64( i64* @var, i64 1 )		; <i64>:0 [#uses=0]
+	unreachable
+}
+
+declare i64 @llvm.atomic.load.sub.i64.p0i64(i64*, i64) nounwind
diff --git a/final/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll b/final/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
new file mode 100644
index 00000000000..53402c04511
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-08-23-64Bit-maskmovq.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86-64
+
+	%struct.DrawHelper = type { void (i32, %struct.QT_FT_Span*, i8*)*, void (i32, %struct.QT_FT_Span*, i8*)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i32, i32)* }
+	%struct.QBasicAtomic = type { i32 }
+	%struct.QClipData = type { i32, %"struct.QClipData::ClipLine"*, i32, i32, %struct.QT_FT_Span*, i32, i32, i32, i32 }
+	%"struct.QClipData::ClipLine" = type { i32, %struct.QT_FT_Span* }
+	%struct.QRasterBuffer = type { %struct.QRect, %struct.QRect, %struct.QRegion, %struct.QRegion, %struct.QClipData*, %struct.QClipData*, i8, i8, i32, i32, i32, i32, %struct.DrawHelper*, i32, i32, i32, i8* }
+	%struct.QRect = type { i32, i32, i32, i32 }
+	%struct.QRegion = type { %"struct.QRegion::QRegionData"* }
+	%"struct.QRegion::QRegionData" = type { %struct.QBasicAtomic, %struct._XRegion*, i8*, %struct.QRegionPrivate* }
+	%struct.QRegionPrivate = type opaque
+	%struct.QT_FT_Span = type { i16, i16, i16, i8 }
+	%struct._XRegion = type opaque
+
+define hidden void @_Z24qt_bitmapblit16_sse3dnowP13QRasterBufferiijPKhiii(%struct.QRasterBuffer* %rasterBuffer, i32 %x, i32 %y, i32 %color, i8* %src, i32 %width, i32 %height, i32 %stride) nounwind {
+entry:
+	br i1 false, label %bb.nph144.split, label %bb133
+
+bb.nph144.split:		; preds = %entry
+        %tmp = bitcast <8 x i8> zeroinitializer to x86_mmx
+        %tmp2 = bitcast <8 x i8> zeroinitializer to x86_mmx
+	tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp, x86_mmx %tmp2, i8* null ) nounwind
+	unreachable
+
+bb133:		; preds = %entry
+	ret void
+}
+
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
diff --git a/final/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll b/final/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
new file mode 100644
index 00000000000..ce9e389fb35
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movd | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movq
+; PR2677
+
+
+	%struct.Bigint = type { %struct.Bigint*, i32, i32, i32, i32, [1 x i32] }
+
+define double @_Z7qstrtodPKcPS0_Pb(i8* %s00, i8** %se, i8* %ok) nounwind {
+entry:
+	br label %bb163
+
+bb151:		; preds = %entry
+	br label %bb163
+
+bb163:		; preds = %bb151, %entry
+	%tmp366 = load double* null, align 8		; <double> [#uses=1]
+	%tmp368 = fmul double %tmp366, 0.000000e+00		; <double> [#uses=1]
+	%tmp368226 = bitcast double %tmp368 to i64		; <i64> [#uses=1]
+	br label %bb5.i
+
+bb5.i:		; preds = %bb5.i57.i, %bb163
+	%b.0.i = phi %struct.Bigint* [ null, %bb163 ]		; <%struct.Bigint*> [#uses=1]
+	%tmp3.i7.i728 = load i32* null, align 4		; <i32> [#uses=1]
+	br label %bb.i27.i
+
+bb.i27.i:		; preds = %bb.i27.i, %bb5.i
+	%tmp23.i20.i = lshr i32 0, 16		; <i32> [#uses=1]
+	br label %bb5.i57.i
+
+bb5.i57.i:		; preds = %bb.i27.i
+	%tmp50.i35.i = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp51.i36.i = add i32 %tmp50.i35.i, 1		; <i32> [#uses=2]
+	%tmp2.i.i37.i = shl i32 1, %tmp51.i36.i		; <i32> [#uses=2]
+	%tmp4.i.i38.i = shl i32 %tmp2.i.i37.i, 2		; <i32> [#uses=1]
+	%tmp7.i.i39.i = add i32 %tmp4.i.i38.i, 28		; <i32> [#uses=1]
+	%tmp8.i.i40.i = malloc i8, i32 %tmp7.i.i39.i		; <i8*> [#uses=1]
+	%tmp9.i.i41.i = bitcast i8* %tmp8.i.i40.i to %struct.Bigint*		; <%struct.Bigint*> [#uses=2]
+	store i32 %tmp51.i36.i, i32* null, align 8
+	store i32 %tmp2.i.i37.i, i32* null, align 4
+	free %struct.Bigint* %b.0.i
+	store i32 %tmp23.i20.i, i32* null, align 4
+	%tmp74.i61.i = add i32 %tmp3.i7.i728, 1		; <i32> [#uses=1]
+	store i32 %tmp74.i61.i, i32* null, align 4
+	br label %bb7.i
+
+bb7.i:		; preds = %bb5.i57.i
+	%tmp514 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp515 = sext i32 %tmp514 to i64		; <i64> [#uses=1]
+	%tmp516 = shl i64 %tmp515, 2		; <i64> [#uses=1]
+	%tmp517 = add i64 %tmp516, 8		; <i64> [#uses=1]
+	%tmp519 = getelementptr %struct.Bigint* %tmp9.i.i41.i, i32 0, i32 3		; <i32*> [#uses=1]
+	%tmp523 = bitcast i32* %tmp519 to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i64( i8* null, i8* %tmp523, i64 %tmp517, i32 1 )
+	%tmp524136 = bitcast i64 %tmp368226 to double		; <double> [#uses=1]
+	store double %tmp524136, double* null
+	unreachable
+}
+
+declare void @llvm.memcpy.i64(i8*, i8*, i64, i32) nounwind
diff --git a/final/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll b/final/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
new file mode 100644
index 00000000000..101b3c5cfdb
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-08-25-AsmRegTypeMismatch.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mcpu=core2 | grep pxor | count 2
+; RUN: llc < %s -mcpu=core2 | not grep movapd
+; PR2715
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.XPTTypeDescriptorPrefix = type { i8 }
+	%struct.nsISupports = type { i32 (...)** }
+	%struct.nsXPTCMiniVariant = type { %"struct.nsXPTCMiniVariant::._39" }
+	%"struct.nsXPTCMiniVariant::._39" = type { i64 }
+	%struct.nsXPTCVariant = type { %struct.nsXPTCMiniVariant, i8*, %struct.nsXPTType, i8 }
+	%struct.nsXPTType = type { %struct.XPTTypeDescriptorPrefix }
+
+define i32 @XPTC_InvokeByIndex(%struct.nsISupports* %that, i32 %methodIndex, i32 %paramCount, %struct.nsXPTCVariant* %params) nounwind {
+entry:
+	call void asm sideeffect "", "{xmm0},{xmm1},{xmm2},{xmm3},{xmm4},{xmm5},{xmm6},{xmm7},~{dirflag},~{fpsr},~{flags}"( double undef, double undef, double undef, double 1.0, double undef, double 0.0, double undef, double 0.0 ) nounwind
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll b/final/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
new file mode 100644
index 00000000000..b92c789a30c
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-08-31-EH_RETURN32.ll
@@ -0,0 +1,17 @@
+; Check that eh_return & unwind_init were properly lowered
+; RUN: llc < %s | grep %ebp | count 7
+; RUN: llc < %s | grep %ecx | count 5
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i386-pc-linux"
+
+define i8* @test(i32 %a, i8* %b)  {
+entry:
+  call void @llvm.eh.unwind.init()
+  %foo   = alloca i32
+  call void @llvm.eh.return.i32(i32 %a, i8* %b)
+  unreachable
+}
+
+declare void @llvm.eh.return.i32(i32, i8*)
+declare void @llvm.eh.unwind.init()
diff --git a/final/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll b/final/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
new file mode 100644
index 00000000000..00ab73569c4
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-08-31-EH_RETURN64.ll
@@ -0,0 +1,17 @@
+; Check that eh_return & unwind_init were properly lowered
+; RUN: llc < %s | grep %rbp | count 5
+; RUN: llc < %s | grep %rcx | count 3
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i8* @test(i64 %a, i8* %b)  {
+entry:
+  call void @llvm.eh.unwind.init()
+  %foo   = alloca i32
+  call void @llvm.eh.return.i64(i64 %a, i8* %b)
+  unreachable
+}
+
+declare void @llvm.eh.return.i64(i64, i8*)
+declare void @llvm.eh.unwind.init()
diff --git a/final/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/final/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
new file mode 100644
index 00000000000..2dc1deaf173
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpcklpd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpckhpd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
+; originally from PR2687, but things don't work that way any more.
+; there are no MMX instructions here; we use XMM.
+
+define <2 x double> @a(<2 x i32> %x) nounwind {
+entry:
+  %y = sitofp <2 x i32> %x to <2 x double>
+  ret <2 x double> %y
+}
+
+define <2 x i32> @b(<2 x double> %x) nounwind {
+entry:
+  %y = fptosi <2 x double> %x to <2 x i32>
+  ret <2 x i32> %y
+}
+
+; This is how to get MMX instructions.
+
+define <2 x double> @a2(x86_mmx %x) nounwind {
+entry:
+  %y = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %x)
+  ret <2 x double> %y
+}
+
+define x86_mmx @b2(<2 x double> %x) nounwind {
+entry:
+  %y = tail call x86_mmx @llvm.x86.sse.cvttpd2pi (<2 x double> %x)
+  ret x86_mmx %y
+}
+
+declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx)
+declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>)
diff --git a/final/test/CodeGen/X86/2008-09-09-LinearScanBug.ll b/final/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
new file mode 100644
index 00000000000..b3312d9464d
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-09-09-LinearScanBug.ll
@@ -0,0 +1,65 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+; PR2757
+
+@g_3 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_125(i32 %p_126, i32 %p_128, i32 %p_129) nounwind {
+entry:
+	%tmp2.i = load i32* @g_3		; <i32> [#uses=2]
+	%conv = trunc i32 %tmp2.i to i16		; <i16> [#uses=3]
+	br label %forcond1.preheader.i.i7
+
+forcond1.preheader.i.i7:		; preds = %forinc6.i.i25, %entry
+	%p_86.addr.06.i.i4 = phi i32 [ 0, %entry ], [ %sub.i.i.i23, %forinc6.i.i25 ]		; <i32> [#uses=1]
+	%p_87.addr.15.i.i5 = phi i32 [ 0, %entry ], [ %p_87.addr.0.lcssa.i.i21, %forinc6.i.i25 ]		; <i32> [#uses=2]
+	br i1 false, label %forinc6.i.i25, label %forinc.i.i11
+
+forinc.i.i11:		; preds = %forcond1.backedge.i.i20, %forcond1.preheader.i.i7
+	%p_87.addr.02.i.i8 = phi i32 [ %p_87.addr.15.i.i5, %forcond1.preheader.i.i7 ], [ %p_87.addr.0.be.i.i18, %forcond1.backedge.i.i20 ]		; <i32> [#uses=1]
+	%conv.i.i9 = trunc i32 %p_87.addr.02.i.i8 to i8		; <i8> [#uses=1]
+	br i1 false, label %land_rhs3.i.i.i14, label %lor_rhs.i.i.i17
+
+land_rhs3.i.i.i14:		; preds = %forinc.i.i11
+	br i1 false, label %forcond1.backedge.i.i20, label %lor_rhs.i.i.i17
+
+lor_rhs.i.i.i17:		; preds = %land_rhs3.i.i.i14, %forinc.i.i11
+	%conv29.i.i.i15 = sext i8 %conv.i.i9 to i32		; <i32> [#uses=1]
+	%add.i.i.i16 = add i32 %conv29.i.i.i15, 1		; <i32> [#uses=1]
+	br label %forcond1.backedge.i.i20
+
+forcond1.backedge.i.i20:		; preds = %lor_rhs.i.i.i17, %land_rhs3.i.i.i14
+	%p_87.addr.0.be.i.i18 = phi i32 [ %add.i.i.i16, %lor_rhs.i.i.i17 ], [ 0, %land_rhs3.i.i.i14 ]		; <i32> [#uses=3]
+	%tobool3.i.i19 = icmp eq i32 %p_87.addr.0.be.i.i18, 0		; <i1> [#uses=1]
+	br i1 %tobool3.i.i19, label %forinc6.i.i25, label %forinc.i.i11
+
+forinc6.i.i25:		; preds = %forcond1.backedge.i.i20, %forcond1.preheader.i.i7
+	%p_87.addr.0.lcssa.i.i21 = phi i32 [ %p_87.addr.15.i.i5, %forcond1.preheader.i.i7 ], [ %p_87.addr.0.be.i.i18, %forcond1.backedge.i.i20 ]		; <i32> [#uses=1]
+	%conv.i.i.i22 = and i32 %p_86.addr.06.i.i4, 255		; <i32> [#uses=1]
+	%sub.i.i.i23 = add i32 %conv.i.i.i22, -1		; <i32> [#uses=2]
+	%phitmp.i.i24 = icmp eq i32 %sub.i.i.i23, 0		; <i1> [#uses=1]
+	br i1 %phitmp.i.i24, label %func_106.exit27, label %forcond1.preheader.i.i7
+
+func_106.exit27:		; preds = %forinc6.i.i25
+	%cmp = icmp ne i32 %tmp2.i, 1		; <i1> [#uses=3]
+	%cmp.ext = zext i1 %cmp to i32		; <i32> [#uses=1]
+	br i1 %cmp, label %safe_mod_int16_t_s_s.exit, label %lor_rhs.i
+
+lor_rhs.i:		; preds = %func_106.exit27
+	%tobool.i = xor i1 %cmp, true		; <i1> [#uses=1]
+	%or.cond.i = or i1 false, %tobool.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %ifend.i, label %safe_mod_int16_t_s_s.exit
+
+ifend.i:		; preds = %lor_rhs.i
+	%conv6.i = sext i16 %conv to i32		; <i32> [#uses=1]
+	%rem.i = urem i32 %conv6.i, %cmp.ext		; <i32> [#uses=1]
+	%conv8.i = trunc i32 %rem.i to i16		; <i16> [#uses=1]
+	br label %safe_mod_int16_t_s_s.exit
+
+safe_mod_int16_t_s_s.exit:		; preds = %ifend.i, %lor_rhs.i, %func_106.exit27
+	%call31 = phi i16 [ %conv8.i, %ifend.i ], [ %conv, %func_106.exit27 ], [ %conv, %lor_rhs.i ]		; <i16> [#uses=1]
+	%conv4 = sext i16 %call31 to i32		; <i32> [#uses=1]
+	%call5 = tail call i32 (...)* @func_104( i32 %conv4 )		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @func_104(...)
diff --git a/final/test/CodeGen/X86/2008-09-11-CoalescerBug.ll b/final/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
new file mode 100644
index 00000000000..108f24307ea
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-09-11-CoalescerBug.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86
+; PR2783
+
+@g_15 = external global i16		; <i16*> [#uses=2]
+
+define i32 @func_3(i32 %p_5) nounwind {
+entry:
+	%0 = srem i32 1, 0		; <i32> [#uses=2]
+	%1 = load i16* @g_15, align 2		; <i16> [#uses=1]
+	%2 = zext i16 %1 to i32		; <i32> [#uses=1]
+	%3 = and i32 %2, 1		; <i32> [#uses=1]
+	%4 = tail call i32 (...)* @rshift_u_s( i32 1 ) nounwind		; <i32> [#uses=1]
+	%5 = icmp slt i32 %4, 2		; <i1> [#uses=1]
+	%6 = zext i1 %5 to i32		; <i32> [#uses=1]
+	%7 = icmp sge i32 %3, %6		; <i1> [#uses=1]
+	%8 = zext i1 %7 to i32		; <i32> [#uses=1]
+	%9 = load i16* @g_15, align 2		; <i16> [#uses=1]
+	%10 = icmp eq i16 %9, 0		; <i1> [#uses=1]
+	%11 = zext i1 %10 to i32		; <i32> [#uses=1]
+	%12 = tail call i32 (...)* @func_20( i32 1 ) nounwind		; <i32> [#uses=1]
+	%13 = icmp sge i32 %11, %12		; <i1> [#uses=1]
+	%14 = zext i1 %13 to i32		; <i32> [#uses=1]
+	%15 = sub i32 %8, %14		; <i32> [#uses=1]
+	%16 = icmp ult i32 %15, 2		; <i1> [#uses=1]
+	%17 = zext i1 %16 to i32		; <i32> [#uses=1]
+	%18 = icmp ugt i32 %0, 3		; <i1> [#uses=1]
+	%or.cond = or i1 false, %18		; <i1> [#uses=1]
+	%19 = select i1 %or.cond, i32 0, i32 %0		; <i32> [#uses=1]
+	%.0 = lshr i32 %17, %19		; <i32> [#uses=1]
+	%20 = tail call i32 (...)* @func_7( i32 %.0 ) nounwind		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @rshift_u_s(...)
+
+declare i32 @func_20(...)
+
+declare i32 @func_7(...)
diff --git a/final/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll b/final/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
new file mode 100644
index 00000000000..534f9903337
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86
+; PR2748
+
+@g_73 = external global i32		; <i32*> [#uses=1]
+@g_5 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_44(i16 signext %p_46) nounwind {
+entry:
+	%0 = load i32* @g_5, align 4		; <i32> [#uses=1]
+	%1 = ashr i32 %0, 1		; <i32> [#uses=1]
+	%2 = icmp sgt i32 %1, 1		; <i1> [#uses=1]
+	%3 = zext i1 %2 to i32		; <i32> [#uses=1]
+	%4 = load i32* @g_73, align 4		; <i32> [#uses=1]
+	%5 = zext i16 %p_46 to i64		; <i64> [#uses=1]
+	%6 = sub i64 0, %5		; <i64> [#uses=1]
+	%7 = trunc i64 %6 to i8		; <i8> [#uses=2]
+	%8 = trunc i32 %4 to i8		; <i8> [#uses=2]
+	%9 = icmp eq i8 %8, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb11, label %bb12
+
+bb11:		; preds = %entry
+	%10 = urem i8 %7, %8		; <i8> [#uses=1]
+	br label %bb12
+
+bb12:		; preds = %bb11, %entry
+	%.014.in = phi i8 [ %10, %bb11 ], [ %7, %entry ]		; <i8> [#uses=1]
+	%11 = icmp ne i8 %.014.in, 0		; <i1> [#uses=1]
+	%12 = zext i1 %11 to i32		; <i32> [#uses=1]
+	%13 = tail call i32 (...)* @func_48( i32 %12, i32 %3, i32 0 ) nounwind		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @func_48(...)
diff --git a/final/test/CodeGen/X86/2008-09-17-inline-asm-1.ll b/final/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
new file mode 100644
index 00000000000..86e50c98bfd
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-09-17-inline-asm-1.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; RUN: llc < %s -march=x86 -regalloc=fast | FileCheck %s
+
+; %0 must not be put in EAX or EDX.
+; In the first asm, $0 and $2 must not be put in EAX.
+; CHECK: InlineAsm Start
+; CHECK-NOT: movl %eax, %eax
+; CHECK-NOT: movl (%eax), %eax
+; CHECK: InlineAsm End
+; In the second asm, $0 and $2 must not be put in EDX.
+; CHECK: InlineAsm Start
+; CHECK-NOT: movl %edx, %edx
+; CHECK-NOT: movl (%edx), %edx
+; CHECK: InlineAsm End
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@x = common global i32 0
+
+define i32 @aci(i32* %pw) nounwind {
+entry:
+	%0 = load i32* @x, align 4
+	%asmtmp = tail call { i32, i32 } asm "movl $0, %eax\0A\090:\0A\09test %eax, %eax\0A\09je 1f\0A\09movl %eax, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{ax},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind
+	%asmtmp2 = tail call { i32, i32 } asm "movl $0, %edx\0A\090:\0A\09test %edx, %edx\0A\09je 1f\0A\09movl %edx, $2\0A\09incl $2\0A\09lock\0A\09cmpxchgl $2, $0\0A\09jne 0b\0A\091:", "=*m,=&{dx},=&r,*m,~{dirflag},~{fpsr},~{flags},~{memory},~{cc}"(i32* %pw, i32* %pw) nounwind
+	%asmresult2 = extractvalue { i32, i32 } %asmtmp, 0
+	%asmresult3 = extractvalue { i32, i32 } %asmtmp2, 0
+	%1 = add i32 %asmresult2, %asmresult3
+	%2 = add i32 %0, %1
+	ret i32 %2
+}
diff --git a/final/test/CodeGen/X86/2008-09-18-inline-asm-2.ll b/final/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
new file mode 100644
index 00000000000..eadfda0394d
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-09-18-inline-asm-2.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 | grep "#%ebp %edi %ebx 8(%esi) %eax %dl"
+; RUN: llc < %s -march=x86 -regalloc=fast  | grep "#%ebx %esi %edi 8(%ebp) %eax %dl"
+
+; The 1st, 2nd, 3rd and 5th registers above must all be different.  The registers
+; referenced in the 4th and 6th operands must not be the same as the 1st or 5th
+; operand.  There are many combinations that work; this is what llc puts out now.
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+	%struct.foo = type { i32, i32, i8* }
+
+define i32 @get(%struct.foo* %c, i8* %state) nounwind {
+entry:
+	%0 = getelementptr %struct.foo* %c, i32 0, i32 0		; <i32*> [#uses=2]
+	%1 = getelementptr %struct.foo* %c, i32 0, i32 1		; <i32*> [#uses=2]
+	%2 = getelementptr %struct.foo* %c, i32 0, i32 2		; <i8**> [#uses=2]
+	%3 = load i32* %0, align 4		; <i32> [#uses=1]
+	%4 = load i32* %1, align 4		; <i32> [#uses=1]
+	%5 = load i8* %state, align 1		; <i8> [#uses=1]
+	%asmtmp = tail call { i32, i32, i32, i32 } asm sideeffect "#$0 $1 $2 $3 $4 $5", "=&r,=r,=r,=*m,=&q,=*imr,1,2,*m,5,~{dirflag},~{fpsr},~{flags},~{cx}"(i8** %2, i8* %state, i32 %3, i32 %4, i8** %2, i8 %5) nounwind		; <{ i32, i32, i32, i32 }> [#uses=3]
+	%asmresult = extractvalue { i32, i32, i32, i32 } %asmtmp, 0		; <i32> [#uses=1]
+	%asmresult1 = extractvalue { i32, i32, i32, i32 } %asmtmp, 1		; <i32> [#uses=1]
+	store i32 %asmresult1, i32* %0
+	%asmresult2 = extractvalue { i32, i32, i32, i32 } %asmtmp, 2		; <i32> [#uses=1]
+	store i32 %asmresult2, i32* %1
+	ret i32 %asmresult
+}
diff --git a/final/test/CodeGen/X86/2008-09-19-RegAllocBug.ll b/final/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
new file mode 100644
index 00000000000..a8f2912a70a
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-09-19-RegAllocBug.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+; PR2808
+
+@g_3 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_4() nounwind {
+entry:
+	%0 = load i32* @g_3, align 4		; <i32> [#uses=2]
+	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
+	%2 = sub i8 1, %1		; <i8> [#uses=1]
+	%3 = sext i8 %2 to i32		; <i32> [#uses=1]
+	%.0 = ashr i32 %3, select (i1 icmp ne (i8 zext (i1 icmp ugt (i32 ptrtoint (i32 ()* @func_4 to i32), i32 3) to i8), i8 0), i32 0, i32 ptrtoint (i32 ()* @func_4 to i32))		; <i32> [#uses=1]
+	%4 = urem i32 %0, %.0		; <i32> [#uses=1]
+	%5 = icmp eq i32 %4, 0		; <i1> [#uses=1]
+	br i1 %5, label %return, label %bb4
+
+bb4:		; preds = %entry
+	ret i32 undef
+
+return:		; preds = %entry
+	ret i32 undef
+}
diff --git a/final/test/CodeGen/X86/2008-09-25-sseregparm-1.ll b/final/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
new file mode 100644
index 00000000000..c92a8f46357
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movs | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep fld | count 2
+; check 'inreg' attribute for sse_regparm
+
+define double @foo1() inreg nounwind {
+  ret double 1.0
+}
+
+define float @foo2() inreg nounwind {
+  ret float 1.0
+}
+
+define double @bar() nounwind {
+  ret double 1.0
+}
+
+define float @bar2() nounwind {
+  ret float 1.0
+}
diff --git a/final/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll b/final/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll
new file mode 100644
index 00000000000..f1ada28bcfc
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-09-26-FrameAddrBug.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9
+
+	%struct._Unwind_Context = type { [18 x i8*], i8*, i8*, i8*, %struct.dwarf_eh_bases, i32, i32, i32, [18 x i8] }
+	%struct._Unwind_Exception = type { i64, void (i32, %struct._Unwind_Exception*)*, i32, i32, [3 x i32] }
+	%struct.dwarf_eh_bases = type { i8*, i8*, i8* }
+
+declare fastcc void @uw_init_context_1(%struct._Unwind_Context*, i8*, i8*)
+
+declare i8* @llvm.eh.dwarf.cfa(i32) nounwind
+
+define hidden void @_Unwind_Resume(%struct._Unwind_Exception* %exc) noreturn noreturn {
+entry:
+	%0 = call i8* @llvm.eh.dwarf.cfa(i32 0)		; <i8*> [#uses=1]
+	call fastcc void @uw_init_context_1(%struct._Unwind_Context* null, i8* %0, i8* null)
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2008-09-29-ReMatBug.ll b/final/test/CodeGen/X86/2008-09-29-ReMatBug.ll
new file mode 100644
index 00000000000..c36cf39fb34
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-09-29-ReMatBug.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim
+
+	%struct..0objc_selector = type opaque
+	%struct.NSString = type opaque
+	%struct.XCStringList = type { i32, %struct._XCStringListNode* }
+	%struct._XCStringListNode = type { [3 x i8], [0 x i8], i8 }
+	%struct.__builtin_CFString = type { i32*, i32, i8*, i32 }
+internal constant %struct.__builtin_CFString { i32* getelementptr ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr ([3 x i8]* @"\01LC", i32 0, i32 0), i32 2 }		; <%struct.__builtin_CFString*>:0 [#uses=1]
+@__CFConstantStringClassReference = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
+@"\01LC" = internal constant [3 x i8] c"NO\00"		; <[3 x i8]*> [#uses=1]
+@"\01LC1" = internal constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+@llvm.used1 = appending global [1 x i8*] [ i8* bitcast (%struct.NSString* (%struct.XCStringList*, %struct..0objc_selector*)* @"-[XCStringList stringRepresentation]" to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define %struct.NSString* @"-[XCStringList stringRepresentation]"(%struct.XCStringList* %self, %struct..0objc_selector* %_cmd) nounwind {
+entry:
+	%0 = load i32* null, align 4		; <i32> [#uses=1]
+	%1 = and i32 %0, 16777215		; <i32> [#uses=1]
+	%2 = icmp eq i32 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb44, label %bb4
+
+bb4:		; preds = %entry
+	%3 = load %struct._XCStringListNode** null, align 4		; <%struct._XCStringListNode*> [#uses=2]
+	%4 = icmp eq %struct._XCStringListNode* %3, null		; <i1> [#uses=1]
+	%5 = bitcast %struct._XCStringListNode* %3 to i32*		; <i32*> [#uses=1]
+	br label %bb37.outer
+
+bb6:		; preds = %bb37
+	br label %bb19
+
+bb19:		; preds = %bb37, %bb6
+	%.rle = phi i32 [ 0, %bb6 ], [ %10, %bb37 ]		; <i32> [#uses=1]
+	%bufptr.0.lcssa = phi i8* [ null, %bb6 ], [ null, %bb37 ]		; <i8*> [#uses=2]
+	%6 = and i32 %.rle, 16777215		; <i32> [#uses=1]
+	%7 = icmp eq i32 %6, 0		; <i1> [#uses=1]
+	br i1 %7, label %bb25.split, label %bb37
+
+bb25.split:		; preds = %bb19
+	call void @foo(i8* getelementptr ([1 x i8]* @"\01LC1", i32 0, i32 0)) nounwind nounwind
+	br label %bb35.outer
+
+bb34:		; preds = %bb35, %bb35, %bb35, %bb35
+	%8 = getelementptr i8* %bufptr.0.lcssa, i32 %totalLength.0.ph		; <i8*> [#uses=1]
+	store i8 92, i8* %8, align 1
+	br label %bb35.outer
+
+bb35.outer:		; preds = %bb34, %bb25.split
+	%totalLength.0.ph = add i32 0, %totalLength.1.ph		; <i32> [#uses=2]
+	br label %bb35
+
+bb35:		; preds = %bb35, %bb35.outer
+	%9 = load i8* null, align 1		; <i8> [#uses=1]
+	switch i8 %9, label %bb35 [
+		i8 0, label %bb37.outer
+		i8 32, label %bb34
+		i8 92, label %bb34
+		i8 34, label %bb34
+		i8 39, label %bb34
+	]
+
+bb37.outer:		; preds = %bb35, %bb4
+	%totalLength.1.ph = phi i32 [ 0, %bb4 ], [ %totalLength.0.ph, %bb35 ]		; <i32> [#uses=1]
+	%bufptr.1.ph = phi i8* [ null, %bb4 ], [ %bufptr.0.lcssa, %bb35 ]		; <i8*> [#uses=2]
+	br i1 %4, label %bb39.split, label %bb37
+
+bb37:		; preds = %bb37.outer, %bb19
+	%10 = load i32* %5, align 4		; <i32> [#uses=1]
+	br i1 false, label %bb6, label %bb19
+
+bb39.split:		; preds = %bb37.outer
+	%11 = bitcast i8* null to %struct.NSString*		; <%struct.NSString*> [#uses=2]
+	%12 = icmp eq i8* null, %bufptr.1.ph		; <i1> [#uses=1]
+	br i1 %12, label %bb44, label %bb42
+
+bb42:		; preds = %bb39.split
+	call void @quux(i8* %bufptr.1.ph) nounwind nounwind
+	ret %struct.NSString* %11
+
+bb44:		; preds = %bb39.split, %entry
+	%.0 = phi %struct.NSString* [ bitcast (%struct.__builtin_CFString* @0 to %struct.NSString*), %entry ], [ %11, %bb39.split ]		; <%struct.NSString*> [#uses=1]
+	ret %struct.NSString* %.0
+}
+
+declare void @foo(i8*)
+
+declare void @quux(i8*)
diff --git a/final/test/CodeGen/X86/2008-09-29-VolatileBug.ll b/final/test/CodeGen/X86/2008-09-29-VolatileBug.ll
new file mode 100644
index 00000000000..935c4c55f04
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-09-29-VolatileBug.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 | not grep movz
+; PR2835
+
+@g_407 = internal global i32 0		; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 ()* @main to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @main() nounwind {
+entry:
+	%0 = volatile load i32* @g_407, align 4		; <i32> [#uses=1]
+	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
+	%2 = tail call i32 @func_45(i8 zeroext %1) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @func_45(i8 zeroext) nounwind
diff --git a/final/test/CodeGen/X86/2008-10-02-Atomics32-2.ll b/final/test/CodeGen/X86/2008-10-02-Atomics32-2.ll
new file mode 100644
index 00000000000..b48c4adaa26
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-10-02-Atomics32-2.ll
@@ -0,0 +1,969 @@
+; RUN: llc < %s -march=x86 > %t
+;; This version includes 64-bit version of binary operators (in 32-bit mode).
+;; Swap, cmp-and-swap not supported yet in this mode.
+; ModuleID = 'Atomics.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@sc = common global i8 0		; <i8*> [#uses=52]
+@uc = common global i8 0		; <i8*> [#uses=112]
+@ss = common global i16 0		; <i16*> [#uses=15]
+@us = common global i16 0		; <i16*> [#uses=15]
+@si = common global i32 0		; <i32*> [#uses=15]
+@ui = common global i32 0		; <i32*> [#uses=23]
+@sl = common global i32 0		; <i32*> [#uses=15]
+@ul = common global i32 0		; <i32*> [#uses=15]
+@sll = common global i64 0, align 8		; <i64*> [#uses=13]
+@ull = common global i64 0, align 8		; <i64*> [#uses=13]
+
+define void @test_op_ignore() nounwind {
+entry:
+	%0 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @sc, i8 1)		; <i8> [#uses=0]
+	%1 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @uc, i8 1)		; <i8> [#uses=0]
+	%2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%3 = call i16 @llvm.atomic.load.add.i16.p0i16(i16* %2, i16 1)		; <i16> [#uses=0]
+	%4 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%5 = call i16 @llvm.atomic.load.add.i16.p0i16(i16* %4, i16 1)		; <i16> [#uses=0]
+	%6 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%7 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %6, i32 1)		; <i32> [#uses=0]
+	%8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%9 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %8, i32 1)		; <i32> [#uses=0]
+	%10 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%11 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %10, i32 1)		; <i32> [#uses=0]
+	%12 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%13 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %12, i32 1)		; <i32> [#uses=0]
+	%14 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%15 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %14, i64 1)		; <i64> [#uses=0]
+	%16 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%17 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %16, i64 1)		; <i64> [#uses=0]
+	%18 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @sc, i8 1)		; <i8> [#uses=0]
+	%19 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @uc, i8 1)		; <i8> [#uses=0]
+	%20 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%21 = call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %20, i16 1)		; <i16> [#uses=0]
+	%22 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%23 = call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %22, i16 1)		; <i16> [#uses=0]
+	%24 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%25 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %24, i32 1)		; <i32> [#uses=0]
+	%26 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%27 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %26, i32 1)		; <i32> [#uses=0]
+	%28 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%29 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %28, i32 1)		; <i32> [#uses=0]
+	%30 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%31 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %30, i32 1)		; <i32> [#uses=0]
+	%32 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%33 = call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %32, i64 1)		; <i64> [#uses=0]
+	%34 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%35 = call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %34, i64 1)		; <i64> [#uses=0]
+	%36 = call i8 @llvm.atomic.load.or.i8.p0i8(i8* @sc, i8 1)		; <i8> [#uses=0]
+	%37 = call i8 @llvm.atomic.load.or.i8.p0i8(i8* @uc, i8 1)		; <i8> [#uses=0]
+	%38 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%39 = call i16 @llvm.atomic.load.or.i16.p0i16(i16* %38, i16 1)		; <i16> [#uses=0]
+	%40 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%41 = call i16 @llvm.atomic.load.or.i16.p0i16(i16* %40, i16 1)		; <i16> [#uses=0]
+	%42 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%43 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %42, i32 1)		; <i32> [#uses=0]
+	%44 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%45 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %44, i32 1)		; <i32> [#uses=0]
+	%46 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%47 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %46, i32 1)		; <i32> [#uses=0]
+	%48 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%49 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %48, i32 1)		; <i32> [#uses=0]
+	%50 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%51 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %50, i64 1)		; <i64> [#uses=0]
+	%52 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%53 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %52, i64 1)		; <i64> [#uses=0]
+	%54 = call i8 @llvm.atomic.load.xor.i8.p0i8(i8* @sc, i8 1)		; <i8> [#uses=0]
+	%55 = call i8 @llvm.atomic.load.xor.i8.p0i8(i8* @uc, i8 1)		; <i8> [#uses=0]
+	%56 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%57 = call i16 @llvm.atomic.load.xor.i16.p0i16(i16* %56, i16 1)		; <i16> [#uses=0]
+	%58 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%59 = call i16 @llvm.atomic.load.xor.i16.p0i16(i16* %58, i16 1)		; <i16> [#uses=0]
+	%60 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%61 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %60, i32 1)		; <i32> [#uses=0]
+	%62 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%63 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %62, i32 1)		; <i32> [#uses=0]
+	%64 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%65 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %64, i32 1)		; <i32> [#uses=0]
+	%66 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%67 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %66, i32 1)		; <i32> [#uses=0]
+	%68 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%69 = call i64 @llvm.atomic.load.xor.i64.p0i64(i64* %68, i64 1)		; <i64> [#uses=0]
+	%70 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%71 = call i64 @llvm.atomic.load.xor.i64.p0i64(i64* %70, i64 1)		; <i64> [#uses=0]
+	%72 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @sc, i8 1)		; <i8> [#uses=0]
+	%73 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @uc, i8 1)		; <i8> [#uses=0]
+	%74 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%75 = call i16 @llvm.atomic.load.and.i16.p0i16(i16* %74, i16 1)		; <i16> [#uses=0]
+	%76 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%77 = call i16 @llvm.atomic.load.and.i16.p0i16(i16* %76, i16 1)		; <i16> [#uses=0]
+	%78 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%79 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %78, i32 1)		; <i32> [#uses=0]
+	%80 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%81 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %80, i32 1)		; <i32> [#uses=0]
+	%82 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%83 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %82, i32 1)		; <i32> [#uses=0]
+	%84 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%85 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %84, i32 1)		; <i32> [#uses=0]
+	%86 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%87 = call i64 @llvm.atomic.load.and.i64.p0i64(i64* %86, i64 1)		; <i64> [#uses=0]
+	%88 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%89 = call i64 @llvm.atomic.load.and.i64.p0i64(i64* %88, i64 1)		; <i64> [#uses=0]
+	%90 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @sc, i8 1)		; <i8> [#uses=0]
+	%91 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @uc, i8 1)		; <i8> [#uses=0]
+	%92 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%93 = call i16 @llvm.atomic.load.nand.i16.p0i16(i16* %92, i16 1)		; <i16> [#uses=0]
+	%94 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%95 = call i16 @llvm.atomic.load.nand.i16.p0i16(i16* %94, i16 1)		; <i16> [#uses=0]
+	%96 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%97 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %96, i32 1)		; <i32> [#uses=0]
+	%98 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%99 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %98, i32 1)		; <i32> [#uses=0]
+	%100 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%101 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %100, i32 1)		; <i32> [#uses=0]
+	%102 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%103 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %102, i32 1)		; <i32> [#uses=0]
+	%104 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%105 = call i64 @llvm.atomic.load.nand.i64.p0i64(i64* %104, i64 1)		; <i64> [#uses=0]
+	%106 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%107 = call i64 @llvm.atomic.load.nand.i64.p0i64(i64* %106, i64 1)		; <i64> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.add.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.sub.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.sub.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.sub.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.or.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.or.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.or.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.or.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.xor.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.xor.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.xor.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.xor.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.and.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.and.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.and.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.and.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.nand.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.nand.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.nand.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.nand.i64.p0i64(i64*, i64) nounwind
+
+define void @test_fetch_and_op() nounwind {
+entry:
+	%0 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @sc, i8 11)		; <i8> [#uses=1]
+	store i8 %0, i8* @sc, align 1
+	%1 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @uc, i8 11)		; <i8> [#uses=1]
+	store i8 %1, i8* @uc, align 1
+	%2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%3 = call i16 @llvm.atomic.load.add.i16.p0i16(i16* %2, i16 11)		; <i16> [#uses=1]
+	store i16 %3, i16* @ss, align 2
+	%4 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%5 = call i16 @llvm.atomic.load.add.i16.p0i16(i16* %4, i16 11)		; <i16> [#uses=1]
+	store i16 %5, i16* @us, align 2
+	%6 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%7 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %6, i32 11)		; <i32> [#uses=1]
+	store i32 %7, i32* @si, align 4
+	%8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%9 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %8, i32 11)		; <i32> [#uses=1]
+	store i32 %9, i32* @ui, align 4
+	%10 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%11 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %10, i32 11)		; <i32> [#uses=1]
+	store i32 %11, i32* @sl, align 4
+	%12 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%13 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %12, i32 11)		; <i32> [#uses=1]
+	store i32 %13, i32* @ul, align 4
+	%14 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%15 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %14, i64 11)		; <i64> [#uses=1]
+	store i64 %15, i64* @sll, align 8
+	%16 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%17 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %16, i64 11)		; <i64> [#uses=1]
+	store i64 %17, i64* @ull, align 8
+	%18 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @sc, i8 11)		; <i8> [#uses=1]
+	store i8 %18, i8* @sc, align 1
+	%19 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @uc, i8 11)		; <i8> [#uses=1]
+	store i8 %19, i8* @uc, align 1
+	%20 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%21 = call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %20, i16 11)		; <i16> [#uses=1]
+	store i16 %21, i16* @ss, align 2
+	%22 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%23 = call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %22, i16 11)		; <i16> [#uses=1]
+	store i16 %23, i16* @us, align 2
+	%24 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%25 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %24, i32 11)		; <i32> [#uses=1]
+	store i32 %25, i32* @si, align 4
+	%26 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%27 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %26, i32 11)		; <i32> [#uses=1]
+	store i32 %27, i32* @ui, align 4
+	%28 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%29 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %28, i32 11)		; <i32> [#uses=1]
+	store i32 %29, i32* @sl, align 4
+	%30 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%31 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %30, i32 11)		; <i32> [#uses=1]
+	store i32 %31, i32* @ul, align 4
+	%32 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%33 = call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %32, i64 11)		; <i64> [#uses=1]
+	store i64 %33, i64* @sll, align 8
+	%34 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%35 = call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %34, i64 11)		; <i64> [#uses=1]
+	store i64 %35, i64* @ull, align 8
+	%36 = call i8 @llvm.atomic.load.or.i8.p0i8(i8* @sc, i8 11)		; <i8> [#uses=1]
+	store i8 %36, i8* @sc, align 1
+	%37 = call i8 @llvm.atomic.load.or.i8.p0i8(i8* @uc, i8 11)		; <i8> [#uses=1]
+	store i8 %37, i8* @uc, align 1
+	%38 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%39 = call i16 @llvm.atomic.load.or.i16.p0i16(i16* %38, i16 11)		; <i16> [#uses=1]
+	store i16 %39, i16* @ss, align 2
+	%40 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%41 = call i16 @llvm.atomic.load.or.i16.p0i16(i16* %40, i16 11)		; <i16> [#uses=1]
+	store i16 %41, i16* @us, align 2
+	%42 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%43 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %42, i32 11)		; <i32> [#uses=1]
+	store i32 %43, i32* @si, align 4
+	%44 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%45 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %44, i32 11)		; <i32> [#uses=1]
+	store i32 %45, i32* @ui, align 4
+	%46 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%47 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %46, i32 11)		; <i32> [#uses=1]
+	store i32 %47, i32* @sl, align 4
+	%48 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%49 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %48, i32 11)		; <i32> [#uses=1]
+	store i32 %49, i32* @ul, align 4
+	%50 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%51 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %50, i64 11)		; <i64> [#uses=1]
+	store i64 %51, i64* @sll, align 8
+	%52 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%53 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %52, i64 11)		; <i64> [#uses=1]
+	store i64 %53, i64* @ull, align 8
+	%54 = call i8 @llvm.atomic.load.xor.i8.p0i8(i8* @sc, i8 11)		; <i8> [#uses=1]
+	store i8 %54, i8* @sc, align 1
+	%55 = call i8 @llvm.atomic.load.xor.i8.p0i8(i8* @uc, i8 11)		; <i8> [#uses=1]
+	store i8 %55, i8* @uc, align 1
+	%56 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%57 = call i16 @llvm.atomic.load.xor.i16.p0i16(i16* %56, i16 11)		; <i16> [#uses=1]
+	store i16 %57, i16* @ss, align 2
+	%58 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%59 = call i16 @llvm.atomic.load.xor.i16.p0i16(i16* %58, i16 11)		; <i16> [#uses=1]
+	store i16 %59, i16* @us, align 2
+	%60 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%61 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %60, i32 11)		; <i32> [#uses=1]
+	store i32 %61, i32* @si, align 4
+	%62 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%63 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %62, i32 11)		; <i32> [#uses=1]
+	store i32 %63, i32* @ui, align 4
+	%64 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%65 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %64, i32 11)		; <i32> [#uses=1]
+	store i32 %65, i32* @sl, align 4
+	%66 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%67 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %66, i32 11)		; <i32> [#uses=1]
+	store i32 %67, i32* @ul, align 4
+	%68 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%69 = call i64 @llvm.atomic.load.xor.i64.p0i64(i64* %68, i64 11)		; <i64> [#uses=1]
+	store i64 %69, i64* @sll, align 8
+	%70 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%71 = call i64 @llvm.atomic.load.xor.i64.p0i64(i64* %70, i64 11)		; <i64> [#uses=1]
+	store i64 %71, i64* @ull, align 8
+	%72 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @sc, i8 11)		; <i8> [#uses=1]
+	store i8 %72, i8* @sc, align 1
+	%73 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @uc, i8 11)		; <i8> [#uses=1]
+	store i8 %73, i8* @uc, align 1
+	%74 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%75 = call i16 @llvm.atomic.load.and.i16.p0i16(i16* %74, i16 11)		; <i16> [#uses=1]
+	store i16 %75, i16* @ss, align 2
+	%76 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%77 = call i16 @llvm.atomic.load.and.i16.p0i16(i16* %76, i16 11)		; <i16> [#uses=1]
+	store i16 %77, i16* @us, align 2
+	%78 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%79 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %78, i32 11)		; <i32> [#uses=1]
+	store i32 %79, i32* @si, align 4
+	%80 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%81 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %80, i32 11)		; <i32> [#uses=1]
+	store i32 %81, i32* @ui, align 4
+	%82 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%83 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %82, i32 11)		; <i32> [#uses=1]
+	store i32 %83, i32* @sl, align 4
+	%84 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%85 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %84, i32 11)		; <i32> [#uses=1]
+	store i32 %85, i32* @ul, align 4
+	%86 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%87 = call i64 @llvm.atomic.load.and.i64.p0i64(i64* %86, i64 11)		; <i64> [#uses=1]
+	store i64 %87, i64* @sll, align 8
+	%88 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%89 = call i64 @llvm.atomic.load.and.i64.p0i64(i64* %88, i64 11)		; <i64> [#uses=1]
+	store i64 %89, i64* @ull, align 8
+	%90 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @sc, i8 11)		; <i8> [#uses=1]
+	store i8 %90, i8* @sc, align 1
+	%91 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @uc, i8 11)		; <i8> [#uses=1]
+	store i8 %91, i8* @uc, align 1
+	%92 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%93 = call i16 @llvm.atomic.load.nand.i16.p0i16(i16* %92, i16 11)		; <i16> [#uses=1]
+	store i16 %93, i16* @ss, align 2
+	%94 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%95 = call i16 @llvm.atomic.load.nand.i16.p0i16(i16* %94, i16 11)		; <i16> [#uses=1]
+	store i16 %95, i16* @us, align 2
+	%96 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%97 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %96, i32 11)		; <i32> [#uses=1]
+	store i32 %97, i32* @si, align 4
+	%98 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%99 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %98, i32 11)		; <i32> [#uses=1]
+	store i32 %99, i32* @ui, align 4
+	%100 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%101 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %100, i32 11)		; <i32> [#uses=1]
+	store i32 %101, i32* @sl, align 4
+	%102 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%103 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %102, i32 11)		; <i32> [#uses=1]
+	store i32 %103, i32* @ul, align 4
+	%104 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%105 = call i64 @llvm.atomic.load.nand.i64.p0i64(i64* %104, i64 11)		; <i64> [#uses=1]
+	store i64 %105, i64* @sll, align 8
+	%106 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%107 = call i64 @llvm.atomic.load.nand.i64.p0i64(i64* %106, i64 11)		; <i64> [#uses=1]
+	store i64 %107, i64* @ull, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @test_op_and_fetch() nounwind {
+entry:
+	%0 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%1 = zext i8 %0 to i32		; <i32> [#uses=1]
+	%2 = trunc i32 %1 to i8		; <i8> [#uses=2]
+	%3 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @sc, i8 %2)		; <i8> [#uses=1]
+	%4 = add i8 %3, %2		; <i8> [#uses=1]
+	store i8 %4, i8* @sc, align 1
+	%5 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%6 = zext i8 %5 to i32		; <i32> [#uses=1]
+	%7 = trunc i32 %6 to i8		; <i8> [#uses=2]
+	%8 = call i8 @llvm.atomic.load.add.i8.p0i8(i8* @uc, i8 %7)		; <i8> [#uses=1]
+	%9 = add i8 %8, %7		; <i8> [#uses=1]
+	store i8 %9, i8* @uc, align 1
+	%10 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%11 = zext i8 %10 to i32		; <i32> [#uses=1]
+	%12 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%13 = trunc i32 %11 to i16		; <i16> [#uses=2]
+	%14 = call i16 @llvm.atomic.load.add.i16.p0i16(i16* %12, i16 %13)		; <i16> [#uses=1]
+	%15 = add i16 %14, %13		; <i16> [#uses=1]
+	store i16 %15, i16* @ss, align 2
+	%16 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%17 = zext i8 %16 to i32		; <i32> [#uses=1]
+	%18 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%19 = trunc i32 %17 to i16		; <i16> [#uses=2]
+	%20 = call i16 @llvm.atomic.load.add.i16.p0i16(i16* %18, i16 %19)		; <i16> [#uses=1]
+	%21 = add i16 %20, %19		; <i16> [#uses=1]
+	store i16 %21, i16* @us, align 2
+	%22 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%23 = zext i8 %22 to i32		; <i32> [#uses=2]
+	%24 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%25 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %24, i32 %23)		; <i32> [#uses=1]
+	%26 = add i32 %25, %23		; <i32> [#uses=1]
+	store i32 %26, i32* @si, align 4
+	%27 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%28 = zext i8 %27 to i32		; <i32> [#uses=2]
+	%29 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%30 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %29, i32 %28)		; <i32> [#uses=1]
+	%31 = add i32 %30, %28		; <i32> [#uses=1]
+	store i32 %31, i32* @ui, align 4
+	%32 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%33 = zext i8 %32 to i32		; <i32> [#uses=2]
+	%34 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%35 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %34, i32 %33)		; <i32> [#uses=1]
+	%36 = add i32 %35, %33		; <i32> [#uses=1]
+	store i32 %36, i32* @sl, align 4
+	%37 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%38 = zext i8 %37 to i32		; <i32> [#uses=2]
+	%39 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%40 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %39, i32 %38)		; <i32> [#uses=1]
+	%41 = add i32 %40, %38		; <i32> [#uses=1]
+	store i32 %41, i32* @ul, align 4
+	%42 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%43 = zext i8 %42 to i64		; <i64> [#uses=2]
+	%44 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%45 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %44, i64 %43)		; <i64> [#uses=1]
+	%46 = add i64 %45, %43		; <i64> [#uses=1]
+	store i64 %46, i64* @sll, align 8
+	%47 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%48 = zext i8 %47 to i64		; <i64> [#uses=2]
+	%49 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%50 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %49, i64 %48)		; <i64> [#uses=1]
+	%51 = add i64 %50, %48		; <i64> [#uses=1]
+	store i64 %51, i64* @ull, align 8
+	%52 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%53 = zext i8 %52 to i32		; <i32> [#uses=1]
+	%54 = trunc i32 %53 to i8		; <i8> [#uses=2]
+	%55 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @sc, i8 %54)		; <i8> [#uses=1]
+	%56 = sub i8 %55, %54		; <i8> [#uses=1]
+	store i8 %56, i8* @sc, align 1
+	%57 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%58 = zext i8 %57 to i32		; <i32> [#uses=1]
+	%59 = trunc i32 %58 to i8		; <i8> [#uses=2]
+	%60 = call i8 @llvm.atomic.load.sub.i8.p0i8(i8* @uc, i8 %59)		; <i8> [#uses=1]
+	%61 = sub i8 %60, %59		; <i8> [#uses=1]
+	store i8 %61, i8* @uc, align 1
+	%62 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%63 = zext i8 %62 to i32		; <i32> [#uses=1]
+	%64 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%65 = trunc i32 %63 to i16		; <i16> [#uses=2]
+	%66 = call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %64, i16 %65)		; <i16> [#uses=1]
+	%67 = sub i16 %66, %65		; <i16> [#uses=1]
+	store i16 %67, i16* @ss, align 2
+	%68 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%69 = zext i8 %68 to i32		; <i32> [#uses=1]
+	%70 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%71 = trunc i32 %69 to i16		; <i16> [#uses=2]
+	%72 = call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %70, i16 %71)		; <i16> [#uses=1]
+	%73 = sub i16 %72, %71		; <i16> [#uses=1]
+	store i16 %73, i16* @us, align 2
+	%74 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%75 = zext i8 %74 to i32		; <i32> [#uses=2]
+	%76 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%77 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %76, i32 %75)		; <i32> [#uses=1]
+	%78 = sub i32 %77, %75		; <i32> [#uses=1]
+	store i32 %78, i32* @si, align 4
+	%79 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%80 = zext i8 %79 to i32		; <i32> [#uses=2]
+	%81 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%82 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %81, i32 %80)		; <i32> [#uses=1]
+	%83 = sub i32 %82, %80		; <i32> [#uses=1]
+	store i32 %83, i32* @ui, align 4
+	%84 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%85 = zext i8 %84 to i32		; <i32> [#uses=2]
+	%86 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%87 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %86, i32 %85)		; <i32> [#uses=1]
+	%88 = sub i32 %87, %85		; <i32> [#uses=1]
+	store i32 %88, i32* @sl, align 4
+	%89 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%90 = zext i8 %89 to i32		; <i32> [#uses=2]
+	%91 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%92 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %91, i32 %90)		; <i32> [#uses=1]
+	%93 = sub i32 %92, %90		; <i32> [#uses=1]
+	store i32 %93, i32* @ul, align 4
+	%94 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%95 = zext i8 %94 to i64		; <i64> [#uses=2]
+	%96 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%97 = call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %96, i64 %95)		; <i64> [#uses=1]
+	%98 = sub i64 %97, %95		; <i64> [#uses=1]
+	store i64 %98, i64* @sll, align 8
+	%99 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%100 = zext i8 %99 to i64		; <i64> [#uses=2]
+	%101 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%102 = call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %101, i64 %100)		; <i64> [#uses=1]
+	%103 = sub i64 %102, %100		; <i64> [#uses=1]
+	store i64 %103, i64* @ull, align 8
+	%104 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%105 = zext i8 %104 to i32		; <i32> [#uses=1]
+	%106 = trunc i32 %105 to i8		; <i8> [#uses=2]
+	%107 = call i8 @llvm.atomic.load.or.i8.p0i8(i8* @sc, i8 %106)		; <i8> [#uses=1]
+	%108 = or i8 %107, %106		; <i8> [#uses=1]
+	store i8 %108, i8* @sc, align 1
+	%109 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%110 = zext i8 %109 to i32		; <i32> [#uses=1]
+	%111 = trunc i32 %110 to i8		; <i8> [#uses=2]
+	%112 = call i8 @llvm.atomic.load.or.i8.p0i8(i8* @uc, i8 %111)		; <i8> [#uses=1]
+	%113 = or i8 %112, %111		; <i8> [#uses=1]
+	store i8 %113, i8* @uc, align 1
+	%114 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%115 = zext i8 %114 to i32		; <i32> [#uses=1]
+	%116 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%117 = trunc i32 %115 to i16		; <i16> [#uses=2]
+	%118 = call i16 @llvm.atomic.load.or.i16.p0i16(i16* %116, i16 %117)		; <i16> [#uses=1]
+	%119 = or i16 %118, %117		; <i16> [#uses=1]
+	store i16 %119, i16* @ss, align 2
+	%120 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%121 = zext i8 %120 to i32		; <i32> [#uses=1]
+	%122 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%123 = trunc i32 %121 to i16		; <i16> [#uses=2]
+	%124 = call i16 @llvm.atomic.load.or.i16.p0i16(i16* %122, i16 %123)		; <i16> [#uses=1]
+	%125 = or i16 %124, %123		; <i16> [#uses=1]
+	store i16 %125, i16* @us, align 2
+	%126 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%127 = zext i8 %126 to i32		; <i32> [#uses=2]
+	%128 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%129 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %128, i32 %127)		; <i32> [#uses=1]
+	%130 = or i32 %129, %127		; <i32> [#uses=1]
+	store i32 %130, i32* @si, align 4
+	%131 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%132 = zext i8 %131 to i32		; <i32> [#uses=2]
+	%133 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%134 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %133, i32 %132)		; <i32> [#uses=1]
+	%135 = or i32 %134, %132		; <i32> [#uses=1]
+	store i32 %135, i32* @ui, align 4
+	%136 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%137 = zext i8 %136 to i32		; <i32> [#uses=2]
+	%138 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%139 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %138, i32 %137)		; <i32> [#uses=1]
+	%140 = or i32 %139, %137		; <i32> [#uses=1]
+	store i32 %140, i32* @sl, align 4
+	%141 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%142 = zext i8 %141 to i32		; <i32> [#uses=2]
+	%143 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%144 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %143, i32 %142)		; <i32> [#uses=1]
+	%145 = or i32 %144, %142		; <i32> [#uses=1]
+	store i32 %145, i32* @ul, align 4
+	%146 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%147 = zext i8 %146 to i64		; <i64> [#uses=2]
+	%148 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%149 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %148, i64 %147)		; <i64> [#uses=1]
+	%150 = or i64 %149, %147		; <i64> [#uses=1]
+	store i64 %150, i64* @sll, align 8
+	%151 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%152 = zext i8 %151 to i64		; <i64> [#uses=2]
+	%153 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%154 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %153, i64 %152)		; <i64> [#uses=1]
+	%155 = or i64 %154, %152		; <i64> [#uses=1]
+	store i64 %155, i64* @ull, align 8
+	%156 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%157 = zext i8 %156 to i32		; <i32> [#uses=1]
+	%158 = trunc i32 %157 to i8		; <i8> [#uses=2]
+	%159 = call i8 @llvm.atomic.load.xor.i8.p0i8(i8* @sc, i8 %158)		; <i8> [#uses=1]
+	%160 = xor i8 %159, %158		; <i8> [#uses=1]
+	store i8 %160, i8* @sc, align 1
+	%161 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%162 = zext i8 %161 to i32		; <i32> [#uses=1]
+	%163 = trunc i32 %162 to i8		; <i8> [#uses=2]
+	%164 = call i8 @llvm.atomic.load.xor.i8.p0i8(i8* @uc, i8 %163)		; <i8> [#uses=1]
+	%165 = xor i8 %164, %163		; <i8> [#uses=1]
+	store i8 %165, i8* @uc, align 1
+	%166 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%167 = zext i8 %166 to i32		; <i32> [#uses=1]
+	%168 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%169 = trunc i32 %167 to i16		; <i16> [#uses=2]
+	%170 = call i16 @llvm.atomic.load.xor.i16.p0i16(i16* %168, i16 %169)		; <i16> [#uses=1]
+	%171 = xor i16 %170, %169		; <i16> [#uses=1]
+	store i16 %171, i16* @ss, align 2
+	%172 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%173 = zext i8 %172 to i32		; <i32> [#uses=1]
+	%174 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%175 = trunc i32 %173 to i16		; <i16> [#uses=2]
+	%176 = call i16 @llvm.atomic.load.xor.i16.p0i16(i16* %174, i16 %175)		; <i16> [#uses=1]
+	%177 = xor i16 %176, %175		; <i16> [#uses=1]
+	store i16 %177, i16* @us, align 2
+	%178 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%179 = zext i8 %178 to i32		; <i32> [#uses=2]
+	%180 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%181 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %180, i32 %179)		; <i32> [#uses=1]
+	%182 = xor i32 %181, %179		; <i32> [#uses=1]
+	store i32 %182, i32* @si, align 4
+	%183 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%184 = zext i8 %183 to i32		; <i32> [#uses=2]
+	%185 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%186 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %185, i32 %184)		; <i32> [#uses=1]
+	%187 = xor i32 %186, %184		; <i32> [#uses=1]
+	store i32 %187, i32* @ui, align 4
+	%188 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%189 = zext i8 %188 to i32		; <i32> [#uses=2]
+	%190 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%191 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %190, i32 %189)		; <i32> [#uses=1]
+	%192 = xor i32 %191, %189		; <i32> [#uses=1]
+	store i32 %192, i32* @sl, align 4
+	%193 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%194 = zext i8 %193 to i32		; <i32> [#uses=2]
+	%195 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%196 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %195, i32 %194)		; <i32> [#uses=1]
+	%197 = xor i32 %196, %194		; <i32> [#uses=1]
+	store i32 %197, i32* @ul, align 4
+	%198 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%199 = zext i8 %198 to i64		; <i64> [#uses=2]
+	%200 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%201 = call i64 @llvm.atomic.load.xor.i64.p0i64(i64* %200, i64 %199)		; <i64> [#uses=1]
+	%202 = xor i64 %201, %199		; <i64> [#uses=1]
+	store i64 %202, i64* @sll, align 8
+	%203 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%204 = zext i8 %203 to i64		; <i64> [#uses=2]
+	%205 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%206 = call i64 @llvm.atomic.load.xor.i64.p0i64(i64* %205, i64 %204)		; <i64> [#uses=1]
+	%207 = xor i64 %206, %204		; <i64> [#uses=1]
+	store i64 %207, i64* @ull, align 8
+	%208 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%209 = zext i8 %208 to i32		; <i32> [#uses=1]
+	%210 = trunc i32 %209 to i8		; <i8> [#uses=2]
+	%211 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @sc, i8 %210)		; <i8> [#uses=1]
+	%212 = and i8 %211, %210		; <i8> [#uses=1]
+	store i8 %212, i8* @sc, align 1
+	%213 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%214 = zext i8 %213 to i32		; <i32> [#uses=1]
+	%215 = trunc i32 %214 to i8		; <i8> [#uses=2]
+	%216 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @uc, i8 %215)		; <i8> [#uses=1]
+	%217 = and i8 %216, %215		; <i8> [#uses=1]
+	store i8 %217, i8* @uc, align 1
+	%218 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%219 = zext i8 %218 to i32		; <i32> [#uses=1]
+	%220 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%221 = trunc i32 %219 to i16		; <i16> [#uses=2]
+	%222 = call i16 @llvm.atomic.load.and.i16.p0i16(i16* %220, i16 %221)		; <i16> [#uses=1]
+	%223 = and i16 %222, %221		; <i16> [#uses=1]
+	store i16 %223, i16* @ss, align 2
+	%224 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%225 = zext i8 %224 to i32		; <i32> [#uses=1]
+	%226 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%227 = trunc i32 %225 to i16		; <i16> [#uses=2]
+	%228 = call i16 @llvm.atomic.load.and.i16.p0i16(i16* %226, i16 %227)		; <i16> [#uses=1]
+	%229 = and i16 %228, %227		; <i16> [#uses=1]
+	store i16 %229, i16* @us, align 2
+	%230 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%231 = zext i8 %230 to i32		; <i32> [#uses=2]
+	%232 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%233 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %232, i32 %231)		; <i32> [#uses=1]
+	%234 = and i32 %233, %231		; <i32> [#uses=1]
+	store i32 %234, i32* @si, align 4
+	%235 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%236 = zext i8 %235 to i32		; <i32> [#uses=2]
+	%237 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%238 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %237, i32 %236)		; <i32> [#uses=1]
+	%239 = and i32 %238, %236		; <i32> [#uses=1]
+	store i32 %239, i32* @ui, align 4
+	%240 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%241 = zext i8 %240 to i32		; <i32> [#uses=2]
+	%242 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%243 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %242, i32 %241)		; <i32> [#uses=1]
+	%244 = and i32 %243, %241		; <i32> [#uses=1]
+	store i32 %244, i32* @sl, align 4
+	%245 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%246 = zext i8 %245 to i32		; <i32> [#uses=2]
+	%247 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%248 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %247, i32 %246)		; <i32> [#uses=1]
+	%249 = and i32 %248, %246		; <i32> [#uses=1]
+	store i32 %249, i32* @ul, align 4
+	%250 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%251 = zext i8 %250 to i64		; <i64> [#uses=2]
+	%252 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%253 = call i64 @llvm.atomic.load.and.i64.p0i64(i64* %252, i64 %251)		; <i64> [#uses=1]
+	%254 = and i64 %253, %251		; <i64> [#uses=1]
+	store i64 %254, i64* @sll, align 8
+	%255 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%256 = zext i8 %255 to i64		; <i64> [#uses=2]
+	%257 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%258 = call i64 @llvm.atomic.load.and.i64.p0i64(i64* %257, i64 %256)		; <i64> [#uses=1]
+	%259 = and i64 %258, %256		; <i64> [#uses=1]
+	store i64 %259, i64* @ull, align 8
+	%260 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%261 = zext i8 %260 to i32		; <i32> [#uses=1]
+	%262 = trunc i32 %261 to i8		; <i8> [#uses=2]
+	%263 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @sc, i8 %262)		; <i8> [#uses=1]
+	%264 = xor i8 %263, -1		; <i8> [#uses=1]
+	%265 = and i8 %264, %262		; <i8> [#uses=1]
+	store i8 %265, i8* @sc, align 1
+	%266 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%267 = zext i8 %266 to i32		; <i32> [#uses=1]
+	%268 = trunc i32 %267 to i8		; <i8> [#uses=2]
+	%269 = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* @uc, i8 %268)		; <i8> [#uses=1]
+	%270 = xor i8 %269, -1		; <i8> [#uses=1]
+	%271 = and i8 %270, %268		; <i8> [#uses=1]
+	store i8 %271, i8* @uc, align 1
+	%272 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%273 = zext i8 %272 to i32		; <i32> [#uses=1]
+	%274 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%275 = trunc i32 %273 to i16		; <i16> [#uses=2]
+	%276 = call i16 @llvm.atomic.load.nand.i16.p0i16(i16* %274, i16 %275)		; <i16> [#uses=1]
+	%277 = xor i16 %276, -1		; <i16> [#uses=1]
+	%278 = and i16 %277, %275		; <i16> [#uses=1]
+	store i16 %278, i16* @ss, align 2
+	%279 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%280 = zext i8 %279 to i32		; <i32> [#uses=1]
+	%281 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%282 = trunc i32 %280 to i16		; <i16> [#uses=2]
+	%283 = call i16 @llvm.atomic.load.nand.i16.p0i16(i16* %281, i16 %282)		; <i16> [#uses=1]
+	%284 = xor i16 %283, -1		; <i16> [#uses=1]
+	%285 = and i16 %284, %282		; <i16> [#uses=1]
+	store i16 %285, i16* @us, align 2
+	%286 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%287 = zext i8 %286 to i32		; <i32> [#uses=2]
+	%288 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%289 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %288, i32 %287)		; <i32> [#uses=1]
+	%290 = xor i32 %289, -1		; <i32> [#uses=1]
+	%291 = and i32 %290, %287		; <i32> [#uses=1]
+	store i32 %291, i32* @si, align 4
+	%292 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%293 = zext i8 %292 to i32		; <i32> [#uses=2]
+	%294 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%295 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %294, i32 %293)		; <i32> [#uses=1]
+	%296 = xor i32 %295, -1		; <i32> [#uses=1]
+	%297 = and i32 %296, %293		; <i32> [#uses=1]
+	store i32 %297, i32* @ui, align 4
+	%298 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%299 = zext i8 %298 to i32		; <i32> [#uses=2]
+	%300 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%301 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %300, i32 %299)		; <i32> [#uses=1]
+	%302 = xor i32 %301, -1		; <i32> [#uses=1]
+	%303 = and i32 %302, %299		; <i32> [#uses=1]
+	store i32 %303, i32* @sl, align 4
+	%304 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%305 = zext i8 %304 to i32		; <i32> [#uses=2]
+	%306 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%307 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %306, i32 %305)		; <i32> [#uses=1]
+	%308 = xor i32 %307, -1		; <i32> [#uses=1]
+	%309 = and i32 %308, %305		; <i32> [#uses=1]
+	store i32 %309, i32* @ul, align 4
+	%310 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%311 = zext i8 %310 to i64		; <i64> [#uses=2]
+	%312 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	%313 = call i64 @llvm.atomic.load.nand.i64.p0i64(i64* %312, i64 %311)		; <i64> [#uses=1]
+	%314 = xor i64 %313, -1		; <i64> [#uses=1]
+	%315 = and i64 %314, %311		; <i64> [#uses=1]
+	store i64 %315, i64* @sll, align 8
+	%316 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%317 = zext i8 %316 to i64		; <i64> [#uses=2]
+	%318 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	%319 = call i64 @llvm.atomic.load.nand.i64.p0i64(i64* %318, i64 %317)		; <i64> [#uses=1]
+	%320 = xor i64 %319, -1		; <i64> [#uses=1]
+	%321 = and i64 %320, %317		; <i64> [#uses=1]
+	store i64 %321, i64* @ull, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @test_compare_and_swap() nounwind {
+entry:
+	%0 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%1 = zext i8 %0 to i32		; <i32> [#uses=1]
+	%2 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%3 = zext i8 %2 to i32		; <i32> [#uses=1]
+	%4 = trunc i32 %3 to i8		; <i8> [#uses=1]
+	%5 = trunc i32 %1 to i8		; <i8> [#uses=1]
+	%6 = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* @sc, i8 %4, i8 %5)		; <i8> [#uses=1]
+	store i8 %6, i8* @sc, align 1
+	%7 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%8 = zext i8 %7 to i32		; <i32> [#uses=1]
+	%9 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%10 = zext i8 %9 to i32		; <i32> [#uses=1]
+	%11 = trunc i32 %10 to i8		; <i8> [#uses=1]
+	%12 = trunc i32 %8 to i8		; <i8> [#uses=1]
+	%13 = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* @uc, i8 %11, i8 %12)		; <i8> [#uses=1]
+	store i8 %13, i8* @uc, align 1
+	%14 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%15 = sext i8 %14 to i16		; <i16> [#uses=1]
+	%16 = zext i16 %15 to i32		; <i32> [#uses=1]
+	%17 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%18 = zext i8 %17 to i32		; <i32> [#uses=1]
+	%19 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%20 = trunc i32 %18 to i16		; <i16> [#uses=1]
+	%21 = trunc i32 %16 to i16		; <i16> [#uses=1]
+	%22 = call i16 @llvm.atomic.cmp.swap.i16.p0i16(i16* %19, i16 %20, i16 %21)		; <i16> [#uses=1]
+	store i16 %22, i16* @ss, align 2
+	%23 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%24 = sext i8 %23 to i16		; <i16> [#uses=1]
+	%25 = zext i16 %24 to i32		; <i32> [#uses=1]
+	%26 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%27 = zext i8 %26 to i32		; <i32> [#uses=1]
+	%28 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%29 = trunc i32 %27 to i16		; <i16> [#uses=1]
+	%30 = trunc i32 %25 to i16		; <i16> [#uses=1]
+	%31 = call i16 @llvm.atomic.cmp.swap.i16.p0i16(i16* %28, i16 %29, i16 %30)		; <i16> [#uses=1]
+	store i16 %31, i16* @us, align 2
+	%32 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%33 = sext i8 %32 to i32		; <i32> [#uses=1]
+	%34 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%35 = zext i8 %34 to i32		; <i32> [#uses=1]
+	%36 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%37 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %36, i32 %35, i32 %33)		; <i32> [#uses=1]
+	store i32 %37, i32* @si, align 4
+	%38 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%39 = sext i8 %38 to i32		; <i32> [#uses=1]
+	%40 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%41 = zext i8 %40 to i32		; <i32> [#uses=1]
+	%42 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%43 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %42, i32 %41, i32 %39)		; <i32> [#uses=1]
+	store i32 %43, i32* @ui, align 4
+	%44 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%45 = sext i8 %44 to i32		; <i32> [#uses=1]
+	%46 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%47 = zext i8 %46 to i32		; <i32> [#uses=1]
+	%48 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%49 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %48, i32 %47, i32 %45)		; <i32> [#uses=1]
+	store i32 %49, i32* @sl, align 4
+	%50 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%51 = sext i8 %50 to i32		; <i32> [#uses=1]
+	%52 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%53 = zext i8 %52 to i32		; <i32> [#uses=1]
+	%54 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%55 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %54, i32 %53, i32 %51)		; <i32> [#uses=1]
+	store i32 %55, i32* @ul, align 4
+	%56 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%57 = zext i8 %56 to i32		; <i32> [#uses=1]
+	%58 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%59 = zext i8 %58 to i32		; <i32> [#uses=1]
+	%60 = trunc i32 %59 to i8		; <i8> [#uses=2]
+	%61 = trunc i32 %57 to i8		; <i8> [#uses=1]
+	%62 = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* @sc, i8 %60, i8 %61)		; <i8> [#uses=1]
+	%63 = icmp eq i8 %62, %60		; <i1> [#uses=1]
+	%64 = zext i1 %63 to i8		; <i8> [#uses=1]
+	%65 = zext i8 %64 to i32		; <i32> [#uses=1]
+	store i32 %65, i32* @ui, align 4
+	%66 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%67 = zext i8 %66 to i32		; <i32> [#uses=1]
+	%68 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%69 = zext i8 %68 to i32		; <i32> [#uses=1]
+	%70 = trunc i32 %69 to i8		; <i8> [#uses=2]
+	%71 = trunc i32 %67 to i8		; <i8> [#uses=1]
+	%72 = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* @uc, i8 %70, i8 %71)		; <i8> [#uses=1]
+	%73 = icmp eq i8 %72, %70		; <i1> [#uses=1]
+	%74 = zext i1 %73 to i8		; <i8> [#uses=1]
+	%75 = zext i8 %74 to i32		; <i32> [#uses=1]
+	store i32 %75, i32* @ui, align 4
+	%76 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%77 = sext i8 %76 to i16		; <i16> [#uses=1]
+	%78 = zext i16 %77 to i32		; <i32> [#uses=1]
+	%79 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%80 = zext i8 %79 to i32		; <i32> [#uses=1]
+	%81 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%82 = trunc i32 %80 to i16		; <i16> [#uses=2]
+	%83 = trunc i32 %78 to i16		; <i16> [#uses=1]
+	%84 = call i16 @llvm.atomic.cmp.swap.i16.p0i16(i16* %81, i16 %82, i16 %83)		; <i16> [#uses=1]
+	%85 = icmp eq i16 %84, %82		; <i1> [#uses=1]
+	%86 = zext i1 %85 to i8		; <i8> [#uses=1]
+	%87 = zext i8 %86 to i32		; <i32> [#uses=1]
+	store i32 %87, i32* @ui, align 4
+	%88 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%89 = sext i8 %88 to i16		; <i16> [#uses=1]
+	%90 = zext i16 %89 to i32		; <i32> [#uses=1]
+	%91 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%92 = zext i8 %91 to i32		; <i32> [#uses=1]
+	%93 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%94 = trunc i32 %92 to i16		; <i16> [#uses=2]
+	%95 = trunc i32 %90 to i16		; <i16> [#uses=1]
+	%96 = call i16 @llvm.atomic.cmp.swap.i16.p0i16(i16* %93, i16 %94, i16 %95)		; <i16> [#uses=1]
+	%97 = icmp eq i16 %96, %94		; <i1> [#uses=1]
+	%98 = zext i1 %97 to i8		; <i8> [#uses=1]
+	%99 = zext i8 %98 to i32		; <i32> [#uses=1]
+	store i32 %99, i32* @ui, align 4
+	%100 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%101 = sext i8 %100 to i32		; <i32> [#uses=1]
+	%102 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%103 = zext i8 %102 to i32		; <i32> [#uses=2]
+	%104 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%105 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %104, i32 %103, i32 %101)		; <i32> [#uses=1]
+	%106 = icmp eq i32 %105, %103		; <i1> [#uses=1]
+	%107 = zext i1 %106 to i8		; <i8> [#uses=1]
+	%108 = zext i8 %107 to i32		; <i32> [#uses=1]
+	store i32 %108, i32* @ui, align 4
+	%109 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%110 = sext i8 %109 to i32		; <i32> [#uses=1]
+	%111 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%112 = zext i8 %111 to i32		; <i32> [#uses=2]
+	%113 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%114 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %113, i32 %112, i32 %110)		; <i32> [#uses=1]
+	%115 = icmp eq i32 %114, %112		; <i1> [#uses=1]
+	%116 = zext i1 %115 to i8		; <i8> [#uses=1]
+	%117 = zext i8 %116 to i32		; <i32> [#uses=1]
+	store i32 %117, i32* @ui, align 4
+	%118 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%119 = sext i8 %118 to i32		; <i32> [#uses=1]
+	%120 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%121 = zext i8 %120 to i32		; <i32> [#uses=2]
+	%122 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%123 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %122, i32 %121, i32 %119)		; <i32> [#uses=1]
+	%124 = icmp eq i32 %123, %121		; <i1> [#uses=1]
+	%125 = zext i1 %124 to i8		; <i8> [#uses=1]
+	%126 = zext i8 %125 to i32		; <i32> [#uses=1]
+	store i32 %126, i32* @ui, align 4
+	%127 = load i8* @sc, align 1		; <i8> [#uses=1]
+	%128 = sext i8 %127 to i32		; <i32> [#uses=1]
+	%129 = load i8* @uc, align 1		; <i8> [#uses=1]
+	%130 = zext i8 %129 to i32		; <i32> [#uses=2]
+	%131 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%132 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %131, i32 %130, i32 %128)		; <i32> [#uses=1]
+	%133 = icmp eq i32 %132, %130		; <i1> [#uses=1]
+	%134 = zext i1 %133 to i8		; <i8> [#uses=1]
+	%135 = zext i8 %134 to i32		; <i32> [#uses=1]
+	store i32 %135, i32* @ui, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8*, i8, i8) nounwind
+
+declare i16 @llvm.atomic.cmp.swap.i16.p0i16(i16*, i16, i16) nounwind
+
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32*, i32, i32) nounwind
+
+define void @test_lock() nounwind {
+entry:
+	%0 = call i8 @llvm.atomic.swap.i8.p0i8(i8* @sc, i8 1)		; <i8> [#uses=1]
+	store i8 %0, i8* @sc, align 1
+	%1 = call i8 @llvm.atomic.swap.i8.p0i8(i8* @uc, i8 1)		; <i8> [#uses=1]
+	store i8 %1, i8* @uc, align 1
+	%2 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	%3 = call i16 @llvm.atomic.swap.i16.p0i16(i16* %2, i16 1)		; <i16> [#uses=1]
+	store i16 %3, i16* @ss, align 2
+	%4 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	%5 = call i16 @llvm.atomic.swap.i16.p0i16(i16* %4, i16 1)		; <i16> [#uses=1]
+	store i16 %5, i16* @us, align 2
+	%6 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	%7 = call i32 @llvm.atomic.swap.i32.p0i32(i32* %6, i32 1)		; <i32> [#uses=1]
+	store i32 %7, i32* @si, align 4
+	%8 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	%9 = call i32 @llvm.atomic.swap.i32.p0i32(i32* %8, i32 1)		; <i32> [#uses=1]
+	store i32 %9, i32* @ui, align 4
+	%10 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	%11 = call i32 @llvm.atomic.swap.i32.p0i32(i32* %10, i32 1)		; <i32> [#uses=1]
+	store i32 %11, i32* @sl, align 4
+	%12 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	%13 = call i32 @llvm.atomic.swap.i32.p0i32(i32* %12, i32 1)		; <i32> [#uses=1]
+	store i32 %13, i32* @ul, align 4
+	call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false)
+	volatile store i8 0, i8* @sc, align 1
+	volatile store i8 0, i8* @uc, align 1
+	%14 = bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*> [#uses=1]
+	volatile store i16 0, i16* %14, align 2
+	%15 = bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*> [#uses=1]
+	volatile store i16 0, i16* %15, align 2
+	%16 = bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*> [#uses=1]
+	volatile store i32 0, i32* %16, align 4
+	%17 = bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*> [#uses=1]
+	volatile store i32 0, i32* %17, align 4
+	%18 = bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*> [#uses=1]
+	volatile store i32 0, i32* %18, align 4
+	%19 = bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*> [#uses=1]
+	volatile store i32 0, i32* %19, align 4
+	%20 = bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*> [#uses=1]
+	volatile store i64 0, i64* %20, align 8
+	%21 = bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*> [#uses=1]
+	volatile store i64 0, i64* %21, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.swap.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.swap.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind
diff --git a/final/test/CodeGen/X86/2008-10-06-MMXISelBug.ll b/final/test/CodeGen/X86/2008-10-06-MMXISelBug.ll
new file mode 100644
index 00000000000..7f7b1a436d2
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-10-06-MMXISelBug.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2
+; PR2850
+
+@tmp_V2i = common global <2 x i32> zeroinitializer		; <<2 x i32>*> [#uses=2]
+
+define void @f0() nounwind {
+entry:
+	%0 = load <2 x i32>* @tmp_V2i, align 8		; <<2 x i32>> [#uses=1]
+	%1 = shufflevector <2 x i32> %0, <2 x i32> undef, <2 x i32> zeroinitializer		; <<2 x i32>> [#uses=1]
+	store <2 x i32> %1, <2 x i32>* @tmp_V2i, align 8
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll b/final/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll
new file mode 100644
index 00000000000..a135cd49787
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-10-06-x87ld-nan-1.ll
@@ -0,0 +1,13 @@
+; ModuleID = 'nan.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-f80:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fldl
+; This NaN should be shortened to a double (not a float).
+
+declare x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %f)
+
+define i32 @main() {
+entry_nan.main:
+  call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 0xK7FFFC001234000000800)
+  ret i32 0
+}
diff --git a/final/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll b/final/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
new file mode 100644
index 00000000000..bd48105f129
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-10-06-x87ld-nan-2.ll
@@ -0,0 +1,18 @@
+; ModuleID = 'nan.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-f80:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fldt | count 3
+; it is not safe to shorten any of these NaNs.
+
+declare x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %f)
+
+@_D3nan4rvale = global x86_fp80 0xK7FFF8001234000000000   ; <x86_fp80*> [#uses=1]
+
+define i32 @main() {
+entry_nan.main:
+  %tmp = load x86_fp80* @_D3nan4rvale   ; <x86_fp80> [#uses=1]
+  call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 %tmp)
+  call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 0xK7FFF8001234000000000)
+  call x86_stdcallcc void @_D3nan5printFeZv(x86_fp80 0xK7FFFC001234000000400)
+  ret i32 0
+}
diff --git a/final/test/CodeGen/X86/2008-10-07-SSEISelBug.ll b/final/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
new file mode 100644
index 00000000000..bc5761288c9
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-10-07-SSEISelBug.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2
+
+define <4 x float> @f(float %w) nounwind {
+entry:
+	%retval = alloca <4 x float>		; <<4 x float>*> [#uses=2]
+	%w.addr = alloca float		; <float*> [#uses=2]
+	%.compoundliteral = alloca <4 x float>		; <<4 x float>*> [#uses=2]
+	store float %w, float* %w.addr
+	%tmp = load float* %w.addr		; <float> [#uses=1]
+	%0 = insertelement <4 x float> undef, float %tmp, i32 0		; <<4 x float>> [#uses=1]
+	%1 = insertelement <4 x float> %0, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%2 = insertelement <4 x float> %1, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%3 = insertelement <4 x float> %2, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	store <4 x float> %3, <4 x float>* %.compoundliteral
+	%tmp1 = load <4 x float>* %.compoundliteral		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp1, <4 x float>* %retval
+	br label %return
+
+return:		; preds = %entry
+	%4 = load <4 x float>* %retval		; <<4 x float>> [#uses=1]
+	ret <4 x float> %4
+}
diff --git a/final/test/CodeGen/X86/2008-10-11-CallCrash.ll b/final/test/CodeGen/X86/2008-10-11-CallCrash.ll
new file mode 100644
index 00000000000..efc6125cfc2
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-10-11-CallCrash.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s
+; PR2735
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+@g_385 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_45(i64 %p_46, i32 %p_48) nounwind {
+entry:
+	%0 = tail call i32 (...)* @lshift_s_u(i64 %p_46, i64 0) nounwind		; <i32> [#uses=0]
+	%1 = load i32* @g_385, align 4		; <i32> [#uses=1]
+	%2 = shl i32 %1, 1		; <i32> [#uses=1]
+	%3 = and i32 %2, 32		; <i32> [#uses=1]
+	%4 = tail call i32 (...)* @func_87(i32 undef, i32 %p_48, i32 1) nounwind		; <i32> [#uses=1]
+	%5 = add i32 %3, %4		; <i32> [#uses=1]
+	%6 = tail call i32 (...)* @div_rhs(i32 %5) nounwind		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @lshift_s_u(...)
+declare i32 @func_87(...)
+declare i32 @div_rhs(...)
diff --git a/final/test/CodeGen/X86/2008-10-13-CoalescerBug.ll b/final/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
new file mode 100644
index 00000000000..4d3f8c2071b
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-10-13-CoalescerBug.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86
+; PR2775
+
+define i32 @func_77(i8 zeroext %p_79) nounwind {
+entry:
+	%0 = tail call i32 (...)* @func_43(i32 1) nounwind		; <i32> [#uses=1]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb3, label %bb
+
+bb:		; preds = %entry
+	br label %bb3
+
+bb3:		; preds = %bb, %entry
+	%p_79_addr.0 = phi i8 [ 0, %bb ], [ %p_79, %entry ]		; <i8> [#uses=1]
+	%2 = zext i8 %p_79_addr.0 to i32		; <i32> [#uses=2]
+	%3 = zext i1 false to i32		; <i32> [#uses=2]
+	%4 = tail call i32 (...)* @rshift_u_s(i32 1) nounwind		; <i32> [#uses=0]
+	%5 = lshr i32 %2, %2		; <i32> [#uses=3]
+	%6 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %6, label %bb6, label %bb9
+
+bb6:		; preds = %bb3
+	%7 = ashr i32 %5, %3		; <i32> [#uses=1]
+	%8 = icmp eq i32 %7, 0		; <i1> [#uses=1]
+	%9 = select i1 %8, i32 %3, i32 0		; <i32> [#uses=1]
+	%. = shl i32 %5, %9		; <i32> [#uses=1]
+	br label %bb9
+
+bb9:		; preds = %bb6, %bb3
+	%.0 = phi i32 [ %., %bb6 ], [ %5, %bb3 ]		; <i32> [#uses=0]
+	br i1 false, label %return, label %bb10
+
+bb10:		; preds = %bb9
+	ret i32 undef
+
+return:		; preds = %bb9
+	ret i32 undef
+}
+
+declare i32 @func_43(...)
+
+declare i32 @rshift_u_s(...)
diff --git a/final/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll b/final/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
new file mode 100644
index 00000000000..de4c1e70b8d
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-10-16-VecUnaryOp.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; PR2762
+define void @foo(<4 x i32>* %p, <4 x double>* %q) {
+  %n = load <4 x i32>* %p
+  %z = sitofp <4 x i32> %n to <4 x double>
+  store <4 x double> %z, <4 x double>* %q
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll b/final/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll
new file mode 100644
index 00000000000..b2e6061ff91
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-10-17-Asm64bitRConstraint.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+
+define void @test(i64 %x) nounwind {
+entry:
+	tail call void asm sideeffect "ASM: $0", "r,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll b/final/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll
new file mode 100644
index 00000000000..353d1c75216
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-10-20-AsmDoubleInI32.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+
+; from gcc.c-torture/compile/920520-1.c
+
+define i32 @g() nounwind {
+entry:
+	call void asm sideeffect "$0", "r"(double 1.500000e+00) nounwind
+	ret i32 0
+}
+
diff --git a/final/test/CodeGen/X86/2008-10-24-FlippedCompare.ll b/final/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
new file mode 100644
index 00000000000..421b931ecd5
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-10-24-FlippedCompare.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o - | not grep {ucomiss\[^,\]*esp}
+
+define void @f(float %wt) {
+entry:
+	%0 = fcmp ogt float %wt, 0.000000e+00		; <i1> [#uses=1]
+	%1 = tail call i32 @g(i32 44)		; <i32> [#uses=3]
+	%2 = inttoptr i32 %1 to i8*		; <i8*> [#uses=2]
+	br i1 %0, label %bb, label %bb1
+
+bb:		; preds = %entry
+	ret void
+
+bb1:		; preds = %entry
+	ret void
+}
+
+declare i32 @g(i32)
diff --git a/final/test/CodeGen/X86/2008-10-27-CoalescerBug.ll b/final/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
new file mode 100644
index 00000000000..9d144a4be0e
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-10-27-CoalescerBug.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -stats |& FileCheck %s
+; Now this test spills one register. But a reload in the loop is cheaper than
+; the divsd so it's a win.
+
+define fastcc void @fourn(double* %data, i32 %isign) nounwind {
+; CHECK: fourn
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%indvar93 = phi i32 [ 0, %entry ], [ %idim.030, %bb ]		; <i32> [#uses=2]
+	%idim.030 = add i32 %indvar93, 1		; <i32> [#uses=1]
+	%0 = add i32 %indvar93, 2		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 2		; <i1> [#uses=1]
+	br i1 %1, label %bb30.loopexit, label %bb
+
+; CHECK: %bb30.loopexit
+; CHECK: divsd %xmm0
+; CHECK: movsd %xmm0, 16(%esp)
+; CHECK: .align
+; CHECK-NEXT: %bb3
+bb3:		; preds = %bb30.loopexit, %bb25, %bb3
+	%2 = load i32* null, align 4		; <i32> [#uses=1]
+	%3 = mul i32 %2, 0		; <i32> [#uses=1]
+	%4 = icmp slt i32 0, %3		; <i1> [#uses=1]
+	br i1 %4, label %bb18, label %bb3
+
+bb18:		; preds = %bb3
+	%5 = fdiv double %11, 0.000000e+00		; <double> [#uses=1]
+	%6 = tail call double @sin(double %5) nounwind readonly		; <double> [#uses=1]
+	br label %bb24.preheader
+
+bb22.preheader:		; preds = %bb24.preheader, %bb22.preheader
+	br label %bb22.preheader
+
+bb25:		; preds = %bb24.preheader
+	%7 = fmul double 0.000000e+00, %6		; <double> [#uses=0]
+	%8 = add i32 %i3.122100, 0		; <i32> [#uses=1]
+	%9 = icmp sgt i32 %8, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb3, label %bb24.preheader
+
+bb24.preheader:		; preds = %bb25, %bb18
+	%i3.122100 = or i32 0, 1		; <i32> [#uses=2]
+	%10 = icmp slt i32 0, %i3.122100		; <i1> [#uses=1]
+	br i1 %10, label %bb25, label %bb22.preheader
+
+bb30.loopexit:		; preds = %bb
+	%11 = fmul double 0.000000e+00, 0x401921FB54442D1C		; <double> [#uses=1]
+	br label %bb3
+}
+
+declare double @sin(double) nounwind readonly
diff --git a/final/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/final/test/CodeGen/X86/2008-10-27-StackRealignment.ll
new file mode 100644
index 00000000000..3d0766cde84
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-10-27-StackRealignment.ll
@@ -0,0 +1,22 @@
+; Linux doesn't support stack realignment for functions with allocas (PR2888).
+; Until it does, we shouldn't use movaps to access the stack.  On targets with
+; sufficiently aligned stack (e.g. darwin) we should.
+; PR8969 - make 32-bit linux have a 16-byte aligned stack
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mcpu=yonah | grep movaps | count 2
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=yonah | grep movaps | count 2
+
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+  
+define void @foo(i32 %t) nounwind {
+  %tmp1210 = alloca i8, i32 32, align 4
+  call void @llvm.memset.i64(i8* %tmp1210, i8 0, i64 32, i32 4)
+  
+  %x = alloca i8, i32 %t
+  call void @dummy(i8* %x)
+  ret void
+}
+
+declare void @dummy(i8* %x)
+declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind
diff --git a/final/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll b/final/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll
new file mode 100644
index 00000000000..7ad94f149e1
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-10-29-ExpandVAARG.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86
+; PR2977
+define i8* @ap_php_conv_p2(){
+entry:
+        %ap.addr = alloca i8*           ; <i8**> [#uses=36]
+        br label %sw.bb301
+sw.bb301:
+        %0 = va_arg i8** %ap.addr, i64          ; <i64> [#uses=1]
+        br label %sw.bb301
+}
diff --git a/final/test/CodeGen/X86/2008-11-03-F80VAARG.ll b/final/test/CodeGen/X86/2008-11-03-F80VAARG.ll
new file mode 100644
index 00000000000..507799b7304
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-11-03-F80VAARG.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -o - | not grep 10
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_copy(i8*, i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
+
+define x86_fp80 @test(...) nounwind {
+	%ap = alloca i8*		; <i8**> [#uses=3]
+	%v1 = bitcast i8** %ap to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_start(i8* %v1)
+	%t1 = va_arg i8** %ap, x86_fp80		; <x86_fp80> [#uses=1]
+	%t2 = va_arg i8** %ap, x86_fp80		; <x86_fp80> [#uses=1]
+	%t = fadd x86_fp80 %t1, %t2		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %t
+}
diff --git a/final/test/CodeGen/X86/2008-11-06-testb.ll b/final/test/CodeGen/X86/2008-11-06-testb.ll
new file mode 100644
index 00000000000..f8f317c2dd4
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-11-06-testb.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep testb
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+	%struct.x = type <{ i8, i8, i16 }>
+
+define i32 @foo(%struct.x* %p) nounwind {
+entry:
+	%0 = getelementptr %struct.x* %p, i32 0, i32 0		; <i8*> [#uses=1]
+	store i8 55, i8* %0, align 1
+	%1 = bitcast %struct.x* %p to i32*		; <i32*> [#uses=1]
+	%2 = load i32* %1, align 1		; <i32> [#uses=1]
+	%3 = and i32 %2, 512		; <i32> [#uses=1]
+	%4 = icmp eq i32 %3, 0		; <i1> [#uses=1]
+	br i1 %4, label %bb5, label %bb
+
+bb:		; preds = %entry
+	%5 = tail call i32 (...)* @xx() nounwind		; <i32> [#uses=1]
+	ret i32 %5
+
+bb5:		; preds = %entry
+	ret i32 0
+}
+
+declare i32 @xx(...)
diff --git a/final/test/CodeGen/X86/2008-11-13-inlineasm-3.ll b/final/test/CodeGen/X86/2008-11-13-inlineasm-3.ll
new file mode 100644
index 00000000000..1dc97fc52a4
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-11-13-inlineasm-3.ll
@@ -0,0 +1,19 @@
+; RUN:  llc < %s -mtriple=i686-pc-linux-gnu
+; PR 1779
+; Using 'A' constraint and a tied constraint together used to crash.
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.linux_dirent64 = type { i64, i64, i16, i8, [0 x i8] }
+
+define i32 @sys_getdents64(i32 %fd, %struct.linux_dirent64* %dirent, i32 %count) {
+entry:
+	br i1 true, label %cond_next29, label %UnifiedReturnBlock
+
+cond_next29:		; preds = %entry
+	%tmp83 = call i32 asm sideeffect "1:\09movl %eax,0($2)\0A2:\09movl %edx,4($2)\0A3:\0A.section .fixup,\22ax\22\0A4:\09movl $3,$0\0A\09jmp 3b\0A.previous\0A .section __ex_table,\22a\22\0A .balign 4 \0A .long 1b,4b\0A .previous\0A .section __ex_table,\22a\22\0A .balign 4 \0A .long 2b,4b\0A .previous\0A", "=r,A,r,i,0,~{dirflag},~{fpsr},~{flags}"(i64 0, i64* null, i32 -14, i32 0) nounwind		; <i32> [#uses=0]
+        br label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %entry
+	ret i32 -14
+}
diff --git a/final/test/CodeGen/X86/2008-11-29-ULT-Sign.ll b/final/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
new file mode 100644
index 00000000000..6dca141639e
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-11-29-ULT-Sign.ll
@@ -0,0 +1,22 @@
+; RUN:  llc < %s -mtriple=i686-pc-linux-gnu | grep "jns" | count 1
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define i32 @a(i32 %x) nounwind {
+entry:
+	%cmp = icmp ult i32 %x, -2147483648		; <i1> [#uses=1]
+	br i1 %cmp, label %if.end, label %if.then
+
+if.then:		; preds = %entry
+	%call = call i32 (...)* @b()		; <i32> [#uses=0]
+	br label %if.end
+
+if.end:		; preds = %if.then, %entry
+	br label %return
+
+return:		; preds = %if.end
+	ret i32 undef
+}
+
+declare i32 @b(...)
+
diff --git a/final/test/CodeGen/X86/2008-12-01-SpillerAssert.ll b/final/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
new file mode 100644
index 00000000000..d96d806388c
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-12-01-SpillerAssert.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR3124
+
+        %struct.cpuinfo_x86 = type { i8, i8, i8, i8, i32, i8, i8, i8, i32, i32, [9 x i32], [16 x i8], [64 x i8], i32, i32, i32, i64, %struct.cpumask_t, i16, i16, i16, i16, i16, i16, i16, i16, i32 }
+        %struct.cpumask_t = type { [1 x i64] }
+@.str10 = external constant [70 x i8]           ; <[70 x i8]*> [#uses=1]
+
+declare i32 @printk(i8*, ...)
+
+define void @display_cacheinfo(%struct.cpuinfo_x86* %c) nounwind section ".cpuinit.text" {
+entry:
+        %asmtmp = tail call { i32, i32, i32, i32 } asm "cpuid", "={ax},={bx},={cx},={dx},0,2,~{dirflag},~{fpsr},~{flags}"(i32 -2147483643, i32 0) nounwind          ; <{ i32, i32, i32, i32 }> [#uses=0]
+        %0 = tail call i32 (i8*, ...)* @printk(i8* getelementptr ([70 x i8]* @.str10, i32 0, i64 0), i32 0, i32 0, i32 0, i32 0) nounwind           ; <i32> [#uses=0]
+        unreachable
+}
diff --git a/final/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll b/final/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
new file mode 100644
index 00000000000..1f8bd45da14
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-12-01-loop-iv-used-outside-loop.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep lea
+; The inner loop should use [reg] addressing, not [reg+reg] addressing.
+; rdar://6403965
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+
+define i8* @test(i8* %Q, i32* %L) nounwind {
+entry:
+	br label %bb1
+
+bb:		; preds = %bb1, %bb1
+	%indvar.next = add i32 %P.0.rec, 1		; <i32> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%P.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%P.0 = getelementptr i8* %Q, i32 %P.0.rec		; <i8*> [#uses=2]
+	%0 = load i8* %P.0, align 1		; <i8> [#uses=1]
+	switch i8 %0, label %bb3 [
+		i8 12, label %bb
+		i8 42, label %bb
+	]
+
+bb3:		; preds = %bb1
+	%P.0.sum = add i32 %P.0.rec, 2		; <i32> [#uses=1]
+	%1 = getelementptr i8* %Q, i32 %P.0.sum		; <i8*> [#uses=1]
+	store i8 4, i8* %1, align 1
+	ret i8* %P.0
+}
diff --git a/final/test/CodeGen/X86/2008-12-02-IllegalResultType.ll b/final/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
new file mode 100644
index 00000000000..4b72cb919ff
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-12-02-IllegalResultType.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s
+; PR3117
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@g_118 = external global i8		; <i8*> [#uses=1]
+@g_7 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_73(i32 %p_74) nounwind {
+entry:
+	%0 = load i32* @g_7, align 4		; <i32> [#uses=1]
+	%1 = or i8 0, 118		; <i8> [#uses=1]
+	%2 = zext i8 %1 to i64		; <i64> [#uses=1]
+	%3 = icmp ne i32 %0, 0		; <i1> [#uses=1]
+	%4 = zext i1 %3 to i64		; <i64> [#uses=1]
+	%5 = or i64 %4, -758998846		; <i64> [#uses=3]
+	%6 = icmp sle i64 %2, %5		; <i1> [#uses=1]
+	%7 = zext i1 %6 to i8		; <i8> [#uses=1]
+	%8 = or i8 %7, 118		; <i8> [#uses=1]
+	%9 = zext i8 %8 to i64		; <i64> [#uses=1]
+	%10 = icmp sle i64 %9, 0		; <i1> [#uses=1]
+	%11 = zext i1 %10 to i8		; <i8> [#uses=1]
+	%12 = or i8 %11, 118		; <i8> [#uses=1]
+	%13 = zext i8 %12 to i64		; <i64> [#uses=1]
+	%14 = icmp sle i64 %13, %5		; <i1> [#uses=1]
+	%15 = zext i1 %14 to i8		; <i8> [#uses=1]
+	%16 = or i8 %15, 118		; <i8> [#uses=1]
+	%17 = zext i8 %16 to i64		; <i64> [#uses=1]
+	%18 = icmp sle i64 %17, 0		; <i1> [#uses=1]
+	%19 = zext i1 %18 to i8		; <i8> [#uses=1]
+	%20 = or i8 %19, 118		; <i8> [#uses=1]
+	%21 = zext i8 %20 to i64		; <i64> [#uses=1]
+	%22 = icmp sle i64 %21, %5		; <i1> [#uses=1]
+	%23 = zext i1 %22 to i8		; <i8> [#uses=1]
+	%24 = or i8 %23, 118		; <i8> [#uses=1]
+	store i8 %24, i8* @g_118, align 1
+	ret i32 undef
+}
diff --git a/final/test/CodeGen/X86/2008-12-02-dagcombine-1.ll b/final/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
new file mode 100644
index 00000000000..fe5bff3e345
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-12-02-dagcombine-1.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; a - a should be found and removed, leaving refs to only L and P
+define i8* @test(i8* %a, i8* %L, i8* %P) nounwind {
+entry:
+        %0 = ptrtoint i8* %a to i32
+        %1 = sub i32 -2, %0
+        %2 = ptrtoint i8* %P to i32
+        %3 = sub i32 0, %2
+        %4 = ptrtoint i8* %L to i32
+        %5 = add i32 %4, %3
+	%6 = add i32 %5, %1         	; <i32> [#uses=1]
+	%7 = getelementptr i8* %a, i32 %6		; <i8*> [#uses=1]
+	br label %return
+
+return:		; preds = %bb3
+	ret i8* %7
+}
diff --git a/final/test/CodeGen/X86/2008-12-02-dagcombine-2.ll b/final/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
new file mode 100644
index 00000000000..4cb1b42693b
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-12-02-dagcombine-2.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; a - a should be found and removed, leaving refs to only L and P
+define i8* @test(i8* %a, i8* %L, i8* %P) nounwind {
+entry:
+        %0 = ptrtoint i8* %a to i32
+        %1 = ptrtoint i8* %P to i32
+        %2 = sub i32 %1, %0
+        %3 = ptrtoint i8* %L to i32
+	%4 = sub i32 %2, %3         	; <i32> [#uses=1]
+	%5 = getelementptr i8* %a, i32 %4		; <i8*> [#uses=1]
+	br label %return
+
+return:		; preds = %bb3
+	ret i8* %5
+}
diff --git a/final/test/CodeGen/X86/2008-12-02-dagcombine-3.ll b/final/test/CodeGen/X86/2008-12-02-dagcombine-3.ll
new file mode 100644
index 00000000000..d5a676a7dbb
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-12-02-dagcombine-3.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | grep add | count 2
+; RUN: llc < %s -march=x86 | grep sub | grep -v subsections | count 1
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; this should be rearranged to have two +s and one -
+define i32 @test(i8* %a, i8* %L, i8* %P) nounwind {
+entry:
+        %0 = ptrtoint i8* %P to i32
+        %1 = sub i32 -2, %0
+        %2 = ptrtoint i8* %L to i32
+        %3 = ptrtoint i8* %a to i32
+	%4 = sub i32 %2, %3         	; <i32> [#uses=1]
+	%5 = add i32 %1, %4		; <i32> [#uses=1]
+	br label %return
+
+return:		; preds = %bb3
+	ret i32 %5
+}
diff --git a/final/test/CodeGen/X86/2008-12-05-SpillerCrash.ll b/final/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
new file mode 100644
index 00000000000..7fd2e6f2948
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
@@ -0,0 +1,237 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9.5 -mattr=+sse41 -relocation-model=pic
+
+	%struct.XXActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
+	%struct.XXAlphaTest = type { float, i16, i8, i8 }
+	%struct.XXArrayRange = type { i8, i8, i8, i8 }
+	%struct.XXBlendMode = type { i16, i16, i16, i16, %struct.ZZIColor4, i16, i16, i8, i8, i8, i8 }
+	%struct.XXBBRec = type opaque
+	%struct.XXBBstate = type { %struct.ZZGTransformKey, %struct.ZZGTransformKey, %struct.XXProgramLimits, %struct.XXProgramLimits, i8, i8, i8, i8, %struct.ZZSBB, %struct.ZZSBB, [4 x %struct.ZZSBB], %struct.ZZSBB, %struct.ZZSBB, %struct.ZZSBB, [8 x %struct.ZZSBB], %struct.ZZSBB }
+	%struct.XXClearColor = type { double, %struct.ZZIColor4, %struct.ZZIColor4, float, i32 }
+	%struct.XXClipPlane = type { i32, [6 x %struct.ZZIColor4] }
+	%struct.XXColorBB = type { i16, i8, i8, [8 x i16], i8, i8, i8, i8 }
+	%struct.XXColorMatrix = type { [16 x float]*, %struct.XXImagingColorScale }
+	%struct.XXConfig = type { i32, float, %struct.ZZGTransformKey, %struct.ZZGTransformKey, i8, i8, i8, i8, i8, i8, i16, i32, i32, i32, %struct.XXPixelFormatInfo, %struct.XXPointLineLimits, %struct.XXPointLineLimits, %struct.XXRenderFeatures, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.XXTextureLimits, [3 x %struct.XXPipelineProgramLimits], %struct.XXFragmentProgramLimits, %struct.XXVertexProgramLimits, %struct.XXGeometryShaderLimits, %struct.XXProgramLimits, %struct.XXGeometryShaderLimits, %struct.XXVertexDescriptor*, %struct.XXVertexDescriptor*, [3 x i32], [4 x i32], [0 x i32] }
+	%struct.XXContextRec = type { float, float, float, float, float, float, float, float, %struct.ZZIColor4, %struct.ZZIColor4, %struct.YYFPContext, [16 x [2 x %struct.PPStreamToken]], %struct.ZZGProcessor, %struct._YYConstants*, void (%struct.XXContextRec*, i32, i32, %struct.YYFragmentAttrib*, %struct.YYFragmentAttrib*, i32)*, %struct._YYFunction*, %struct.PPStreamToken*, void (%struct.XXContextRec*, %struct.XXVertex*)*, void (%struct.XXContextRec*, %struct.XXVertex*, %struct.XXVertex*)*, void (%struct.XXContextRec*, %struct.XXVertex*, %struct.XXVertex*, %struct.XXVertex*)*, %struct._YYFunction*, %struct._YYFunction*, %struct._YYFunction*, [4 x i32], [3 x i32], [3 x i32], float, float, float, %struct.PPStreamToken, i32, %struct.ZZSDrawable, %struct.XXFramebufferRec*, %struct.XXFramebufferRec*, %struct.XXRect, %struct.XXFormat, %struct.XXFormat, %struct.XXFormat, %struct.XXConfig*, %struct.XXBBstate, %struct.XXBBstate, %struct.XXSharedRec*, %struct.XXState*, %struct.XXPluginState*, %struct.XXVertex*, %struct.YYFragmentAttrib*, %struct.YYFragmentAttrib*, %struct.YYFragmentAttrib*, %struct.XXProgramRec*, %struct.XXPipelineProgramRec*, %struct.YYTextures, %struct.XXStippleData, i8, i16, i8, i32, i32, i32, %struct.XXQueryRec*, %struct.XXQueryRec*, %struct.XXFallback, { void (i8*, i8*, i32, i8*)* } }
+	%struct.XXConvolution = type { %struct.ZZIColor4, %struct.XXImagingColorScale, i16, i16, [0 x i32], float*, i32, i32 }
+	%struct.XXCurrent16A = type { [8 x %struct.ZZIColor4], [16 x %struct.ZZIColor4], %struct.ZZIColor4, %struct.XXPointLineLimits, float, %struct.XXPointLineLimits, float, [4 x float], %struct.XXPointLineLimits, float, float, float, float, i8, i8, i8, i8 }
+	%struct.XXDepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
+	%struct.XXDrawableWindow = type { i32, i32, i32 }
+	%struct.XXFallback = type { float*, %struct.XXRenderDispatch*, %struct.XXConfig*, i8*, i8*, i32, i32 }
+	%struct.XXFenceRec = type opaque
+	%struct.XXFixedFunction = type { %struct.PPStreamToken* }
+	%struct.XXFogMode = type { %struct.ZZIColor4, float, float, float, float, float, i16, i16, i16, i8, i8 }
+	%struct.XXFormat = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8, i32, i32, i32 }
+	%struct.XXFragmentProgramLimits = type { i32, i32, i32, i16, i16, i32, i32 }
+	%struct.XXFramebufferAttachment = type { i16, i16, i32, i32, i32 }
+	%struct.XXFramebufferData = type { [10 x %struct.XXFramebufferAttachment], [8 x i16], i16, i16, i16, i8, i8, i32, i32 }
+	%struct.XXFramebufferRec = type { %struct.XXFramebufferData*, %struct.XXPluginFramebufferData*, %struct.XXFormat, i8, i8, i8, i8 }
+	%struct.XXGeometryShaderLimits = type { i32, i32, i32, i32, i32 }
+	%struct.XXHintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
+	%struct.XXHistogram = type { %struct.XXProgramLimits*, i32, i16, i8, i8 }
+	%struct.XXImagingColorScale = type { %struct.ZZTCoord2, %struct.ZZTCoord2, %struct.ZZTCoord2, %struct.ZZTCoord2 }
+	%struct.XXImagingSubset = type { %struct.XXConvolution, %struct.XXConvolution, %struct.XXConvolution, %struct.XXColorMatrix, %struct.XXMinmax, %struct.XXHistogram, %struct.XXImagingColorScale, %struct.XXImagingColorScale, %struct.XXImagingColorScale, %struct.XXImagingColorScale, i32, [0 x i32] }
+	%struct.XXLight = type { %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.XXPointLineLimits, float, float, float, float, float, %struct.XXPointLineLimits, float, %struct.XXPointLineLimits, float, %struct.XXPointLineLimits, float, float, float, float, float }
+	%struct.XXLightModel = type { %struct.ZZIColor4, [8 x %struct.XXLight], [2 x %struct.XXMaterial], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 }
+	%struct.XXLightProduct = type { %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4 }
+	%struct.XXLineMode = type { float, i32, i16, i16, i8, i8, i8, i8 }
+	%struct.XXLogicOp = type { i16, i8, i8 }
+	%struct.XXMaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.XXMaterial = type { %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, float, float, float, float, [8 x %struct.XXLightProduct], %struct.ZZIColor4, [8 x i32] }
+	%struct.XXMinmax = type { %struct.XXMinmaxTable*, i16, i8, i8, [0 x i32] }
+	%struct.XXMinmaxTable = type { %struct.ZZIColor4, %struct.ZZIColor4 }
+	%struct.XXMipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, float*, i8*, i16, i16, i16, i16, [2 x float] }
+	%struct.XXMultisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.XXPipelineProgramData = type { i16, i8, i8, i32, %struct.PPStreamToken*, i64, %struct.ZZIColor4*, i32, [0 x i32] }
+	%struct.XXPipelineProgramLimits = type { i32, i16, i16, i32, i16, i16, i32, i32 }
+	%struct.XXPipelineProgramRec = type { %struct.XXPipelineProgramData*, %struct.PPStreamToken*, %struct.XXContextRec*, { %struct._YYFunction*, \2, \2, [20 x i32], [64 x i32], i32, i32, i32 }*, i32, i32 }
+	%struct.XXPipelineProgramState = type { i8, i8, i8, i8, [0 x i32], %struct.ZZIColor4* }
+	%struct.XXPixelFormatInfo = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.XXPixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.XXPixelMode = type { float, float, %struct.XXPixelStore, %struct.XXPixelTransfer, %struct.XXPixelMap, %struct.XXImagingSubset, i32, i32 }
+	%struct.XXPixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
+	%struct.XXPixelStore = type { %struct.XXPixelPack, %struct.XXPixelPack }
+	%struct.XXPixelTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }
+	%struct.XXPluginFramebufferData = type { [10 x %struct.XXTextureRec*], i8, i8, i8, i8 }
+	%struct.XXPluginProgramData = type { [3 x %struct.XXPipelineProgramRec*], %struct.XXBBRec**, i32, [0 x i32] }
+	%struct.XXPluginState = type { [16 x [5 x %struct.XXTextureRec*]], [3 x %struct.XXTextureRec*], [3 x %struct.XXPipelineProgramRec*], [3 x %struct.XXPipelineProgramRec*], %struct.XXProgramRec*, %struct.XXVertexArrayRec*, [16 x %struct.XXBBRec*], %struct.XXFramebufferRec*, %struct.XXFramebufferRec* }
+	%struct.XXPointLineLimits = type { float, float, float }
+	%struct.XXPointMode = type { float, float, float, float, %struct.XXPointLineLimits, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 }
+	%struct.XXPolygonMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.XXProgramData = type { i32, i32, i32, i32, %struct.PPStreamToken*, i32*, i32, i32, i32, i32, i8, i8, i8, i8, [0 x i32] }
+	%struct.XXProgramLimits = type { i32, i32, i32, i32 }
+	%struct.XXProgramRec = type { %struct.XXProgramData*, %struct.XXPluginProgramData*, %struct.ZZIColor4**, i32 }
+	%struct.XXQueryRec = type { i32, i32, %struct.XXQueryRec* }
+	%struct.XXRect = type { i32, i32, i32, i32, i32, i32 }
+	%struct.XXRegisterCombiners = type { i8, i8, i8, i8, i32, [2 x %struct.ZZIColor4], [8 x %struct.XXRegisterCombinersPerStageState], %struct.XXRegisterCombinersFinalStageState }
+	%struct.XXRegisterCombinersFinalStageState = type { i8, i8, i8, i8, [7 x %struct.XXRegisterCombinersPerVariableState] }
+	%struct.XXRegisterCombinersPerPortionState = type { [4 x %struct.XXRegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
+	%struct.XXRegisterCombinersPerStageState = type { [2 x %struct.XXRegisterCombinersPerPortionState], [2 x %struct.ZZIColor4] }
+	%struct.XXRegisterCombinersPerVariableState = type { i16, i16, i16, i16 }
+	%struct.XXRenderDispatch = type { void (%struct.XXContextRec*, i32, float)*, void (%struct.XXContextRec*, i32)*, i32 (%struct.XXContextRec*, i32, i32, i32, i32, i32, i32, i8*, i32, %struct.XXBBRec*)*, i32 (%struct.XXContextRec*, %struct.XXVertex*, i32, i32, i32, i32, i8*, i32, %struct.XXBBRec*)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32, i32, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32, float, float, i8*, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex**, i32)*, void (%struct.XXContextRec*, %struct.XXVertex**, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex**, i32, i32)*, i8* (%struct.XXContextRec*, i32, i32*)*, void (%struct.XXContextRec*, i32, i32, i32)*, i8* (%struct.XXContextRec*, i32, i32, i32, i32, i32)*, void (%struct.XXContextRec*, i32, i32, i32, i32, i32, i8*)*, void (%struct.XXContextRec*)*, void (%struct.XXContextRec*)*, void (%struct.XXContextRec*)*, void (%struct.XXContextRec*, %struct.XXFenceRec*)*, void (%struct.XXContextRec*, i32, %struct.XXQueryRec*)*, void (%struct.XXContextRec*, %struct.XXQueryRec*)*, i32 (%struct.XXContextRec*, i32, i32, i32, i32, i32, i8*, %struct.ZZIColor4*, %struct.XXCurrent16A*)*, i32 (%struct.XXContextRec*, %struct.XXTextureRec*, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, i32 (%struct.XXContextRec*, %struct.XXTextureRec*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i32, %struct.XXBBRec*)*, i32 (%struct.XXContextRec*, %struct.XXTextureRec*, i32)*, i32 (%struct.XXContextRec*, %struct.XXBBRec*, i32, i32, i8*)*, void (%struct.XXContextRec*, i32)*, void (%struct.XXContextRec*)*, void (%struct.XXContextRec*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, i32 (%struct.XXContextRec*, %struct.XXQueryRec*)*, void (%struct.XXContextRec*)* }
+	%struct.XXRenderFeatures = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.XXSWRSurfaceRec = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 }
+	%struct.XXScissorTest = type { %struct.XXProgramLimits, i8, i8, i8, i8 }
+	%struct.XXSharedData = type {  }
+	%struct.XXSharedRec = type { %struct.__ZZarrayelementDrawInfoListType, %struct.XXSharedData*, i32, i8, i8, i8, i8 }
+	%struct.XXState = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.ZZIColor4], [128 x %struct.ZZIColor4], %struct.XXViewport, %struct.XXTransform, %struct.XXLightModel, %struct.XXActiveTextureTargets, %struct.XXAlphaTest, %struct.XXBlendMode, %struct.XXClearColor, %struct.XXColorBB, %struct.XXDepthTest, %struct.XXArrayRange, %struct.XXFogMode, %struct.XXHintMode, %struct.XXLineMode, %struct.XXLogicOp, %struct.XXMaskMode, %struct.XXPixelMode, %struct.XXPointMode, %struct.XXPolygonMode, %struct.XXScissorTest, i32, %struct.XXStencilTest, [8 x %struct.XXTextureMode], [16 x %struct.XXTextureImageMode], %struct.XXArrayRange, [8 x %struct.XXTextureCoordGen], %struct.XXClipPlane, %struct.XXMultisample, %struct.XXRegisterCombiners, %struct.XXArrayRange, %struct.XXArrayRange, [3 x %struct.XXPipelineProgramState], %struct.XXArrayRange, %struct.XXTransformFeedback, i32*, %struct.XXFixedFunction, [1 x i32] }>
+	%struct.XXStencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
+	%struct.XXStippleData = type { i32, i16, i16, [32 x [32 x i8]] }
+	%struct.XXTextureCoordGen = type { { i16, i16, %struct.ZZIColor4, %struct.ZZIColor4 }, { i16, i16, %struct.ZZIColor4, %struct.ZZIColor4 }, { i16, i16, %struct.ZZIColor4, %struct.ZZIColor4 }, { i16, i16, %struct.ZZIColor4, %struct.ZZIColor4 }, i8, i8, i8, i8 }
+	%struct.XXTextureGeomState = type { i16, i16, i16, i16, i16, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] }
+	%struct.XXTextureImageMode = type { float }
+	%struct.XXTextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* }
+	%struct.XXTextureLimits = type { float, float, i16, i16, i16, i16, i16, i16, i16, i16, i16, i8, i8, [16 x i16], i32 }
+	%struct.XXTextureMode = type { %struct.ZZIColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
+	%struct.XXTextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.ZZIColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* }
+	%struct.XXTextureRec = type { [4 x float], %struct.XXTextureState*, %struct.XXMipmaplevel*, %struct.XXMipmaplevel*, float, float, float, float, i8, i8, i8, i8, i16, i16, i16, i16, i32, float, [2 x %struct.PPStreamToken] }
+	%struct.XXTextureState = type { i16, i8, i8, i16, i16, float, i32, %struct.XXSWRSurfaceRec*, %struct.XXTextureParamState, %struct.XXTextureGeomState, i16, i16, i8*, %struct.XXTextureLevel, [1 x [15 x %struct.XXTextureLevel]] }
+	%struct.XXTransform = type <{ [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }>
+	%struct.XXTransformFeedback = type { i8, i8, i8, i8, [0 x i32], [16 x i32], [16 x i32] }
+	%struct.XXVertex = type { %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.XXPointLineLimits, float, %struct.ZZIColor4, float, i8, i8, i8, i8, float, float, i32, i32, i32, i32, [4 x float], [2 x %struct.XXMaterial*], [2 x i32], [8 x %struct.ZZIColor4] }
+	%struct.XXVertexArrayRec = type opaque
+	%struct.XXVertexDescriptor = type { i8, i8, i8, i8, [0 x i32] }
+	%struct.XXVertexProgramLimits = type { i16, i16, i32, i32 }
+	%struct.XXViewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
+	%struct.ZZGColorTable = type { i32, i32, i32, i8* }
+	%struct.ZZGOperation = type { i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, %struct.ZZGColorTable, %struct.ZZGColorTable, %struct.ZZGColorTable }
+	%struct.ZZGProcessor = type { void (%struct.XXPixelMode*, %struct.ZZGOperation*, %struct._ZZGProcessorData*, %union._ZZGFunctionKey*)*, %struct._YYFunction*, %union._ZZGFunctionKey*, %struct._ZZGProcessorData* }
+	%struct.ZZGTransformKey = type { i32, i32 }
+	%struct.ZZIColor4 = type { float, float, float, float }
+	%struct.ZZSBB = type { i8* }
+	%struct.ZZSDrawable = type { %struct.ZZSWindowRec* }
+	%struct.ZZSWindowRec = type { %struct.ZZGTransformKey, %struct.ZZGTransformKey, i32, i32, %struct.ZZSDrawable, i8*, i8*, i8*, i8*, i8*, [4 x i8*], i32, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, %struct.XXDrawableWindow, i32, i32, i8*, i8* }
+	%struct.ZZTCoord2 = type { float, float }
+	%struct.YYFPContext = type { float, i32, i32, i32, float, [3 x float] }
+	%struct.YYFragmentAttrib = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, [8 x <4 x float>] }
+	%struct.YYTextures = type { [16 x %struct.XXTextureRec*] }
+	%struct.PPStreamToken = type { { i16, i16, i32 } }
+	%struct._ZZGProcessorData = type { void (i8*, i8*, i32, i32, i32, i32, i32, i32, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32, i32, i32)*, i8* (i32)*, void (i8*)* }
+	%struct._YYConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [4096 x i8], [8 x float], [48 x float], [128 x float], [528 x i8], { void (i8*, i8*, i32, i8*)*, float (float)*, float (float)*, float (float)*, i32 (float)* } }
+	%struct._YYFunction = type opaque
+	%struct.__ZZarrayelementDrawInfoListType = type { i32, [40 x i8] }
+	%union._ZZGFunctionKey = type opaque
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (void (%struct.XXContextRec*, i32, i32, %struct.YYFragmentAttrib*, %struct.YYFragmentAttrib*, i32)* @t to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @t(%struct.XXContextRec* %ctx, i32 %x, i32 %y, %struct.YYFragmentAttrib* %start, %struct.YYFragmentAttrib* %deriv, i32 %num_frags) nounwind {
+entry:
+	%tmp7485.i.i.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%tmp8382.i.i.i = extractelement <4 x i32> zeroinitializer, i32 1		; <i32> [#uses=1]
+	%tmp8383.i.i.i = extractelement <4 x i32> zeroinitializer, i32 2		; <i32> [#uses=2]
+	%tmp8384.i.i.i = extractelement <4 x i32> zeroinitializer, i32 3		; <i32> [#uses=2]
+	br label %bb7551.i.i.i
+
+bb4426.i.i.i:		; preds = %bb7551.i.i.i
+	%0 = getelementptr %struct.XXMipmaplevel* null, i32 %tmp8383.i.i.i, i32 3		; <[4 x i32]*> [#uses=1]
+	%1 = bitcast [4 x i32]* %0 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%2 = load <4 x i32>* %1, align 16		; <<4 x i32>> [#uses=1]
+	%3 = getelementptr %struct.XXMipmaplevel* null, i32 %tmp8384.i.i.i, i32 3		; <[4 x i32]*> [#uses=1]
+	%4 = bitcast [4 x i32]* %3 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%5 = load <4 x i32>* %4, align 16		; <<4 x i32>> [#uses=1]
+	%6 = shufflevector <4 x i32> %2, <4 x i32> %5, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x i32>> [#uses=1]
+	%7 = bitcast <4 x i32> %6 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%8 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %7, <2 x i32> < i32 1, i32 3 >		; <<2 x i64>> [#uses=1]
+	%9 = getelementptr %struct.XXMipmaplevel* null, i32 %tmp8382.i.i.i, i32 6		; <float**> [#uses=1]
+	%10 = load float** %9, align 4		; <float*> [#uses=1]
+	%11 = bitcast float* %10 to i8*		; <i8*> [#uses=1]
+	%12 = getelementptr %struct.XXMipmaplevel* null, i32 %tmp8383.i.i.i, i32 6		; <float**> [#uses=1]
+	%13 = load float** %12, align 4		; <float*> [#uses=1]
+	%14 = bitcast float* %13 to i8*		; <i8*> [#uses=1]
+	%15 = getelementptr %struct.XXMipmaplevel* null, i32 %tmp8384.i.i.i, i32 6		; <float**> [#uses=1]
+	%16 = load float** %15, align 4		; <float*> [#uses=1]
+	%17 = bitcast float* %16 to i8*		; <i8*> [#uses=1]
+	%tmp7308.i.i.i = and <2 x i64> zeroinitializer, %8		; <<2 x i64>> [#uses=1]
+	%18 = bitcast <2 x i64> %tmp7308.i.i.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%19 = mul <4 x i32> %18, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%20 = add <4 x i32> %19, zeroinitializer		; <<4 x i32>> [#uses=3]
+	%21 = load i32* null, align 4		; <i32> [#uses=0]
+	%22 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> zeroinitializer) nounwind readnone		; <<4 x float>> [#uses=1]
+	%23 = fmul <4 x float> %22, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
+	%tmp2114.i119.i.i = extractelement <4 x i32> %20, i32 1		; <i32> [#uses=1]
+	%24 = shl i32 %tmp2114.i119.i.i, 2		; <i32> [#uses=1]
+	%25 = getelementptr i8* %11, i32 %24		; <i8*> [#uses=1]
+	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
+	%27 = load i32* %26, align 4		; <i32> [#uses=1]
+	%28 = or i32 %27, -16777216		; <i32> [#uses=1]
+	%tmp1927.i120.i.i = insertelement <4 x i32> undef, i32 %28, i32 0		; <<4 x i32>> [#uses=1]
+	%29 = bitcast <4 x i32> %tmp1927.i120.i.i to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%30 = shufflevector <16 x i8> %29, <16 x i8> < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef >, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
+	%31 = bitcast <16 x i8> %30 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%32 = shufflevector <8 x i16> %31, <8 x i16> < i16 0, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef >, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
+	%33 = bitcast <8 x i16> %32 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%34 = shufflevector <4 x i32> %33, <4 x i32> undef, <4 x i32> < i32 2, i32 1, i32 0, i32 3 >		; <<4 x i32>> [#uses=1]
+	%35 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %34) nounwind readnone		; <<4 x float>> [#uses=1]
+	%36 = fmul <4 x float> %35, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
+	%tmp2113.i124.i.i = extractelement <4 x i32> %20, i32 2		; <i32> [#uses=1]
+	%37 = shl i32 %tmp2113.i124.i.i, 2		; <i32> [#uses=1]
+	%38 = getelementptr i8* %14, i32 %37		; <i8*> [#uses=1]
+	%39 = bitcast i8* %38 to i32*		; <i32*> [#uses=1]
+	%40 = load i32* %39, align 4		; <i32> [#uses=1]
+	%41 = or i32 %40, -16777216		; <i32> [#uses=1]
+	%tmp1963.i125.i.i = insertelement <4 x i32> undef, i32 %41, i32 0		; <<4 x i32>> [#uses=1]
+	%42 = bitcast <4 x i32> %tmp1963.i125.i.i to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%43 = shufflevector <16 x i8> %42, <16 x i8> < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef >, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
+	%44 = bitcast <16 x i8> %43 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%45 = shufflevector <8 x i16> %44, <8 x i16> < i16 0, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef >, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
+	%46 = bitcast <8 x i16> %45 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%47 = shufflevector <4 x i32> %46, <4 x i32> undef, <4 x i32> < i32 2, i32 1, i32 0, i32 3 >		; <<4 x i32>> [#uses=1]
+	%48 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %47) nounwind readnone		; <<4 x float>> [#uses=1]
+	%49 = fmul <4 x float> %48, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
+	%tmp2112.i129.i.i = extractelement <4 x i32> %20, i32 3		; <i32> [#uses=1]
+	%50 = shl i32 %tmp2112.i129.i.i, 2		; <i32> [#uses=1]
+	%51 = getelementptr i8* %17, i32 %50		; <i8*> [#uses=1]
+	%52 = bitcast i8* %51 to i32*		; <i32*> [#uses=1]
+	%53 = load i32* %52, align 4		; <i32> [#uses=1]
+	%54 = or i32 %53, -16777216		; <i32> [#uses=1]
+	%tmp1999.i130.i.i = insertelement <4 x i32> undef, i32 %54, i32 0		; <<4 x i32>> [#uses=1]
+	%55 = bitcast <4 x i32> %tmp1999.i130.i.i to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%56 = shufflevector <16 x i8> %55, <16 x i8> < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef >, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
+	%57 = bitcast <16 x i8> %56 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%58 = shufflevector <8 x i16> %57, <8 x i16> < i16 0, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef >, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
+	%59 = bitcast <8 x i16> %58 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%60 = shufflevector <4 x i32> %59, <4 x i32> undef, <4 x i32> < i32 2, i32 1, i32 0, i32 3 >		; <<4 x i32>> [#uses=1]
+	%61 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %60) nounwind readnone		; <<4 x float>> [#uses=1]
+	%62 = fmul <4 x float> %61, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
+	%63 = fmul <4 x float> %23, zeroinitializer		; <<4 x float>> [#uses=1]
+	%64 = fadd <4 x float> zeroinitializer, %63		; <<4 x float>> [#uses=1]
+	%65 = fmul <4 x float> %36, zeroinitializer		; <<4 x float>> [#uses=1]
+	%66 = fadd <4 x float> zeroinitializer, %65		; <<4 x float>> [#uses=1]
+	%67 = fmul <4 x float> %49, zeroinitializer		; <<4 x float>> [#uses=1]
+	%68 = fadd <4 x float> zeroinitializer, %67		; <<4 x float>> [#uses=1]
+	%69 = fmul <4 x float> %62, zeroinitializer		; <<4 x float>> [#uses=1]
+	%70 = fadd <4 x float> zeroinitializer, %69		; <<4 x float>> [#uses=1]
+	%tmp7452.i.i.i = bitcast <4 x float> %64 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp7454.i.i.i = and <4 x i32> %tmp7452.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp7459.i.i.i = or <4 x i32> %tmp7454.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp7460.i.i.i = bitcast <4 x i32> %tmp7459.i.i.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp7468.i.i.i = bitcast <4 x float> %66 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp7470.i.i.i = and <4 x i32> %tmp7468.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp7475.i.i.i = or <4 x i32> %tmp7470.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp7476.i.i.i = bitcast <4 x i32> %tmp7475.i.i.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp7479.i.i.i = bitcast <4 x float> %.279.1.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp7480.i.i.i = and <4 x i32> zeroinitializer, %tmp7479.i.i.i		; <<4 x i32>> [#uses=1]
+	%tmp7484.i.i.i = bitcast <4 x float> %68 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp7486.i.i.i = and <4 x i32> %tmp7484.i.i.i, %tmp7485.i.i.i		; <<4 x i32>> [#uses=1]
+	%tmp7491.i.i.i = or <4 x i32> %tmp7486.i.i.i, %tmp7480.i.i.i		; <<4 x i32>> [#uses=1]
+	%tmp7492.i.i.i = bitcast <4 x i32> %tmp7491.i.i.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp7495.i.i.i = bitcast <4 x float> %.380.1.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp7496.i.i.i = and <4 x i32> zeroinitializer, %tmp7495.i.i.i		; <<4 x i32>> [#uses=1]
+	%tmp7500.i.i.i = bitcast <4 x float> %70 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp7502.i.i.i = and <4 x i32> %tmp7500.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%tmp7507.i.i.i = or <4 x i32> %tmp7502.i.i.i, %tmp7496.i.i.i		; <<4 x i32>> [#uses=1]
+	%tmp7508.i.i.i = bitcast <4 x i32> %tmp7507.i.i.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%indvar.next.i.i.i = add i32 %aniso.0.i.i.i, 1		; <i32> [#uses=1]
+	br label %bb7551.i.i.i
+
+bb7551.i.i.i:		; preds = %bb4426.i.i.i, %entry
+	%.077.1.i = phi <4 x float> [ undef, %entry ], [ %tmp7460.i.i.i, %bb4426.i.i.i ]		; <<4 x float>> [#uses=0]
+	%.178.1.i = phi <4 x float> [ undef, %entry ], [ %tmp7476.i.i.i, %bb4426.i.i.i ]		; <<4 x float>> [#uses=0]
+	%.279.1.i = phi <4 x float> [ undef, %entry ], [ %tmp7492.i.i.i, %bb4426.i.i.i ]		; <<4 x float>> [#uses=1]
+	%.380.1.i = phi <4 x float> [ undef, %entry ], [ %tmp7508.i.i.i, %bb4426.i.i.i ]		; <<4 x float>> [#uses=1]
+	%aniso.0.i.i.i = phi i32 [ 0, %entry ], [ %indvar.next.i.i.i, %bb4426.i.i.i ]		; <i32> [#uses=1]
+	br i1 false, label %glvmInterpretFPTransformFour6.exit, label %bb4426.i.i.i
+
+glvmInterpretFPTransformFour6.exit:		; preds = %bb7551.i.i.i
+	unreachable
+}
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
diff --git a/final/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll b/final/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll
new file mode 100644
index 00000000000..e97b63db14d
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-12-12-PrivateEHSymbol.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | grep ^.L_Z1fv.eh
+; RUN: llc < %s -march=x86    -mtriple=i686-unknown-linux-gnu | grep ^.L_Z1fv.eh
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-apple-darwin9 | grep ^__Z1fv.eh
+; RUN: llc < %s -march=x86    -mtriple=i386-apple-darwin9 | grep ^__Z1fv.eh
+
+define void @_Z1fv() {
+entry:
+	br label %return
+
+return:
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2008-12-16-BadShift.ll b/final/test/CodeGen/X86/2008-12-16-BadShift.ll
new file mode 100644
index 00000000000..6c70c5ba532
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-12-16-BadShift.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s | not grep shrl
+; Note: this test is really trying to make sure that the shift
+; returns the right result; shrl is most likely wrong,
+; but if CodeGen starts legitimately using an shrl here,
+; please adjust the test appropriately.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@.str = internal constant [6 x i8] c"%lld\0A\00"		; <[6 x i8]*> [#uses=1]
+
+define i64 @mebbe_shift(i32 %xx, i32 %test) nounwind {
+entry:
+	%conv = zext i32 %xx to i64		; <i64> [#uses=1]
+	%tobool = icmp ne i32 %test, 0		; <i1> [#uses=1]
+	%shl = select i1 %tobool, i64 3, i64 0		; <i64> [#uses=1]
+	%x.0 = shl i64 %conv, %shl		; <i64> [#uses=1]
+	ret i64 %x.0
+}
+
diff --git a/final/test/CodeGen/X86/2008-12-16-dagcombine-4.ll b/final/test/CodeGen/X86/2008-12-16-dagcombine-4.ll
new file mode 100644
index 00000000000..3080d085572
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-12-16-dagcombine-4.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; a - a should be found and removed, leaving refs to only L and P
+define i32 @test(i32 %a, i32 %L, i32 %P) nounwind {
+entry:
+        %0 = sub i32 %a, %L
+        %1 = add i32 %P, %0
+	%2 = sub i32 %1, %a
+	br label %return
+
+return:		; preds = %bb3
+	ret i32 %2
+}
diff --git a/final/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll b/final/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
new file mode 100644
index 00000000000..5eba9b9c302
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-12-19-EarlyClobberBug.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -asm-verbose=0 | FileCheck %s
+; PR3149
+; Make sure the copy after inline asm is not coalesced away.
+
+; CHECK:         ## InlineAsm End
+; CHECK-NEXT: BB0_2:
+; CHECK-NEXT:    movl	%esi, %eax
+
+
+@"\01LC" = internal constant [7 x i8] c"n0=%d\0A\00"		; <[7 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i64, i64)* @umoddi3 to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @umoddi3(i64 %u, i64 %v) nounwind noinline {
+entry:
+	%0 = trunc i64 %v to i32		; <i32> [#uses=2]
+	%1 = trunc i64 %u to i32		; <i32> [#uses=4]
+	%2 = lshr i64 %u, 32		; <i64> [#uses=1]
+	%3 = trunc i64 %2 to i32		; <i32> [#uses=2]
+	%4 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8]* @"\01LC", i32 0, i32 0), i32 %1) nounwind		; <i32> [#uses=0]
+	%5 = icmp ult i32 %1, %0		; <i1> [#uses=1]
+	br i1 %5, label %bb2, label %bb
+
+bb:		; preds = %entry
+	%6 = lshr i64 %v, 32		; <i64> [#uses=1]
+	%7 = trunc i64 %6 to i32		; <i32> [#uses=1]
+	%asmtmp = tail call { i32, i32 } asm "subl $5,$1\0A\09sbbl $3,$0", "=r,=&r,0,imr,1,imr,~{dirflag},~{fpsr},~{flags}"(i32 %3, i32 %7, i32 %1, i32 %0) nounwind		; <{ i32, i32 }> [#uses=2]
+	%asmresult = extractvalue { i32, i32 } %asmtmp, 0		; <i32> [#uses=1]
+	%asmresult1 = extractvalue { i32, i32 } %asmtmp, 1		; <i32> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb, %entry
+	%n1.0 = phi i32 [ %asmresult, %bb ], [ %3, %entry ]		; <i32> [#uses=1]
+	%n0.0 = phi i32 [ %asmresult1, %bb ], [ %1, %entry ]		; <i32> [#uses=1]
+	%8 = add i32 %n0.0, %n1.0		; <i32> [#uses=1]
+	ret i32 %8
+}
+
+declare i32 @printf(i8*, ...) nounwind
diff --git a/final/test/CodeGen/X86/2008-12-22-dagcombine-5.ll b/final/test/CodeGen/X86/2008-12-22-dagcombine-5.ll
new file mode 100644
index 00000000000..75773e0959c
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-12-22-dagcombine-5.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 2
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; -(-a) - a should be found and removed, leaving refs to only L and P
+define i32 @test(i32 %a, i32 %L, i32 %P) nounwind {
+entry:
+        %0 = sub i32 %L, %a
+        %1 = sub i32 %P, %0
+	%2 = sub i32 %1, %a
+	br label %return
+
+return:		; preds = %bb3
+	ret i32 %2
+}
diff --git a/final/test/CodeGen/X86/2008-12-23-crazy-address.ll b/final/test/CodeGen/X86/2008-12-23-crazy-address.ll
new file mode 100644
index 00000000000..2edcaea80ce
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-12-23-crazy-address.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86 -relocation-model=static | grep {lea.*X.*esp} | count 2
+
+@X = external global [0 x i32]
+
+define void @foo() nounwind {
+entry:
+	%Y = alloca i32
+	call void @frob(i32* %Y) nounwind
+	%Y3 = bitcast i32* %Y to i8*
+	%ctg2 = getelementptr i8* %Y3, i32 ptrtoint ([0 x i32]* @X to i32)
+	%0 = ptrtoint i8* %ctg2 to i32
+	call void @borf(i32 %0) nounwind
+	ret void
+}
+
+define void @bar(i32 %i) nounwind {
+entry:
+	%Y = alloca [10 x i32]
+	%0 = getelementptr [10 x i32]* %Y, i32 0, i32 0
+	call void @frob(i32* %0) nounwind
+	%1 = getelementptr [0 x i32]* @X, i32 0, i32 %i
+	%2 = getelementptr [10 x i32]* %Y, i32 0, i32 0
+	%3 = ptrtoint i32* %2 to i32
+	%4 = bitcast i32* %1 to i8*
+	%ctg2 = getelementptr i8* %4, i32 %3
+	%5 = ptrtoint i8* %ctg2 to i32
+	call void @borf(i32 %5) nounwind
+	ret void
+}
+
+declare void @frob(i32*)
+
+declare void @borf(i32)
diff --git a/final/test/CodeGen/X86/2008-12-23-dagcombine-6.ll b/final/test/CodeGen/X86/2008-12-23-dagcombine-6.ll
new file mode 100644
index 00000000000..bae928336ba
--- /dev/null
+++ b/final/test/CodeGen/X86/2008-12-23-dagcombine-6.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 | grep "(%esp)" | count 4
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+; a - a should be found and removed, leaving refs to only L and P
+define i32 @test(i32 %a, i32 %L, i32 %P) nounwind {
+entry:
+        %0 = add i32 %a, %L
+        %1 = add i32 %P, %0
+	%2 = sub i32 %1, %a
+	br label %return
+
+return:		; preds = %bb3
+	ret i32 %2
+}
+define i32 @test2(i32 %a, i32 %L, i32 %P) nounwind {
+entry:
+        %0 = add i32 %L, %a
+        %1 = add i32 %P, %0
+	%2 = sub i32 %1, %a
+	br label %return
+
+return:		; preds = %bb3
+	ret i32 %2
+}
diff --git a/final/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll b/final/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
new file mode 100644
index 00000000000..4feb764bec6
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-01-13-DoubleUpdate.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -enable-legalize-types-checking
+
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
+
+define void @__mindd16(<16 x double>* sret %vec.result, <16 x double> %x, double %y) nounwind {
+entry:
+	%tmp3.i = shufflevector <16 x double> zeroinitializer, <16 x double> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >		; <<8 x double>> [#uses=1]
+	%tmp10.i.i = shufflevector <8 x double> %tmp3.i, <8 x double> undef, <4 x i32> < i32 4, i32 5, i32 6, i32 7 >		; <<4 x double>> [#uses=1]
+	%tmp3.i2.i.i = shufflevector <4 x double> %tmp10.i.i, <4 x double> undef, <2 x i32> < i32 0, i32 1 >		; <<2 x double>> [#uses=1]
+	%0 = tail call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> zeroinitializer, <2 x double> %tmp3.i2.i.i) nounwind		; <<2 x double>> [#uses=1]
+	%tmp5.i3.i.i = shufflevector <2 x double> %0, <2 x double> undef, <4 x i32> < i32 0, i32 1, i32 undef, i32 undef >		; <<4 x double>> [#uses=1]
+	%tmp6.i4.i.i = shufflevector <4 x double> zeroinitializer, <4 x double> %tmp5.i3.i.i, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x double>> [#uses=1]
+	%tmp14.i8.i.i = shufflevector <4 x double> %tmp6.i4.i.i, <4 x double> zeroinitializer, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >		; <<4 x double>> [#uses=1]
+	%tmp13.i.i = shufflevector <4 x double> %tmp14.i8.i.i, <4 x double> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef >		; <<8 x double>> [#uses=1]
+	%tmp14.i.i = shufflevector <8 x double> zeroinitializer, <8 x double> %tmp13.i.i, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11 >		; <<8 x double>> [#uses=1]
+	%tmp5.i = shufflevector <8 x double> %tmp14.i.i, <8 x double> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef >		; <<16 x double>> [#uses=1]
+	%tmp6.i = shufflevector <16 x double> %x, <16 x double> %tmp5.i, <16 x i32> < i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15 >		; <<16 x double>> [#uses=1]
+	%tmp14.i = shufflevector <16 x double> %tmp6.i, <16 x double> zeroinitializer, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23 >		; <<16 x double>> [#uses=1]
+	store <16 x double> %tmp14.i, <16 x double>* %vec.result
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2009-01-16-SchedulerBug.ll b/final/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
new file mode 100644
index 00000000000..99bef6ce3fc
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-01-16-SchedulerBug.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+; rdar://6501631
+
+	%CF = type { %Register }
+	%XXV = type { i32 (...)** }
+	%Register = type { %"struct.XXC::BCFs", i32 }
+	%"struct.XXC::BCFs" = type { i32 }
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind
+
+define fastcc %XXV* @bar(%CF* %call_frame, %XXV** %exception) nounwind {
+prologue:
+	%param_x = load %XXV** null		; <%XXV*> [#uses=1]
+	%unique_1.i = ptrtoint %XXV* %param_x to i1		; <i1> [#uses=1]
+	br i1 %unique_1.i, label %NextVerify42, label %FailedVerify
+
+NextVerify42:		; preds = %prologue
+	%param_y = load %XXV** null		; <%XXV*> [#uses=1]
+	%unique_1.i58 = ptrtoint %XXV* %param_y to i1		; <i1> [#uses=1]
+	br i1 %unique_1.i58, label %function_setup.cont, label %FailedVerify
+
+function_setup.cont:		; preds = %NextVerify42
+	br i1 false, label %label13, label %label
+
+label:		; preds = %function_setup.cont
+	%has_exn = icmp eq %XXV* null, null		; <i1> [#uses=1]
+	br i1 %has_exn, label %kjsNumberLiteral.exit, label %handle_exception
+
+kjsNumberLiteral.exit:		; preds = %label
+	%0 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 0, i32 0)		; <{ i32, i1 }> [#uses=2]
+	%intAdd = extractvalue { i32, i1 } %0, 0		; <i32> [#uses=2]
+	%intAddOverflow = extractvalue { i32, i1 } %0, 1		; <i1> [#uses=1]
+	%toint56 = ashr i32 %intAdd, 1		; <i32> [#uses=1]
+	%toFP57 = sitofp i32 %toint56 to double		; <double> [#uses=1]
+	br i1 %intAddOverflow, label %rematerializeAdd, label %label13
+
+label13:		; preds = %kjsNumberLiteral.exit, %function_setup.cont
+	%var_lr1.0 = phi double [ %toFP57, %kjsNumberLiteral.exit ], [ 0.000000e+00, %function_setup.cont ]		; <double> [#uses=0]
+	unreachable
+
+FailedVerify:		; preds = %NextVerify42, %prologue
+	ret %XXV* null
+
+rematerializeAdd:		; preds = %kjsNumberLiteral.exit
+	%rematerializedInt = sub i32 %intAdd, 0		; <i32> [#uses=0]
+	ret %XXV* null
+
+handle_exception:		; preds = %label
+	ret %XXV* undef
+}
diff --git a/final/test/CodeGen/X86/2009-01-16-UIntToFP.ll b/final/test/CodeGen/X86/2009-01-16-UIntToFP.ll
new file mode 100644
index 00000000000..2eab5f1773a
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-01-16-UIntToFP.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define hidden float @__floatundisf(i64 %u) nounwind readnone {
+entry:
+	%0 = icmp ugt i64 %u, 9007199254740991		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %bb2
+
+bb:		; preds = %entry
+	%1 = and i64 %u, 2047		; <i64> [#uses=1]
+	%2 = icmp eq i64 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb2, label %bb1
+
+bb1:		; preds = %bb
+	%3 = or i64 %u, 2048		; <i64> [#uses=1]
+	%4 = and i64 %3, -2048		; <i64> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb1, %bb, %entry
+	%u_addr.0 = phi i64 [ %4, %bb1 ], [ %u, %entry ], [ %u, %bb ]		; <i64> [#uses=2]
+	%5 = lshr i64 %u_addr.0, 32		; <i64> [#uses=1]
+	%6 = trunc i64 %5 to i32		; <i32> [#uses=1]
+	%7 = uitofp i32 %6 to double		; <double> [#uses=1]
+	%8 = fmul double %7, 0x41F0000000000000		; <double> [#uses=1]
+	%9 = trunc i64 %u_addr.0 to i32		; <i32> [#uses=1]
+	%10 = uitofp i32 %9 to double		; <double> [#uses=1]
+	%11 = fadd double %10, %8		; <double> [#uses=1]
+	%12 = fptrunc double %11 to float		; <float> [#uses=1]
+	ret float %12
+}
diff --git a/final/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll b/final/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
new file mode 100644
index 00000000000..f895336491e
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-01-18-ConstantExprCrash.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s
+; rdar://6505632
+; reduced from 483.xalancbmk
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+	%"struct.std::basic_ostream<char,std::char_traits<char> >.base" = type { i32 (...)** }
+	%"struct.xercesc_2_5::ASCIIRangeFactory" = type { %"struct.std::basic_ostream<char,std::char_traits<char> >.base", i8, i8 }
+@_ZN11xercesc_2_5L17gIdeographicCharsE = external constant [7 x i16]		; <[7 x i16]*> [#uses=3]
+
+define void @_ZN11xercesc_2_515XMLRangeFactory11buildRangesEv(%"struct.xercesc_2_5::ASCIIRangeFactory"* %this) {
+entry:
+	br i1 false, label %bb5, label %return
+
+bb5:		; preds = %entry
+	br label %bb4.i.i
+
+bb4.i.i:		; preds = %bb4.i.i, %bb5
+	br i1 false, label %bb.i51, label %bb4.i.i
+
+bb.i51:		; preds = %bb.i51, %bb4.i.i
+	br i1 false, label %bb4.i.i70, label %bb.i51
+
+bb4.i.i70:		; preds = %bb4.i.i70, %bb.i51
+	br i1 false, label %_ZN11xercesc_2_59XMLString9stringLenEPKt.exit.i73, label %bb4.i.i70
+
+_ZN11xercesc_2_59XMLString9stringLenEPKt.exit.i73:		; preds = %bb4.i.i70
+	%0 = load i16* getelementptr ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE, i32 0, i32 add (i32 ashr (i32 sub (i32 ptrtoint (i16* getelementptr ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE, i32 0, i32 4) to i32), i32 ptrtoint ([7 x i16]* @_ZN11xercesc_2_5L17gIdeographicCharsE to i32)), i32 1), i32 1)), align 4		; <i16> [#uses=0]
+	br label %bb4.i5.i141
+
+bb4.i5.i141:		; preds = %bb4.i5.i141, %_ZN11xercesc_2_59XMLString9stringLenEPKt.exit.i73
+	br label %bb4.i5.i141
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2009-01-25-NoSSE.ll b/final/test/CodeGen/X86/2009-01-25-NoSSE.ll
new file mode 100644
index 00000000000..0583ef19091
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-01-25-NoSSE.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 -mattr=-sse,-sse2 | not grep xmm
+; PR3402
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+        %struct.ktermios = type { i32, i32, i32, i32, i8, [19 x i8], i32, i32 }
+
+define void @foo() nounwind {
+entry:
+        %termios = alloca %struct.ktermios, align 8
+        %termios1 = bitcast %struct.ktermios* %termios to i8*
+        call void @llvm.memset.i64(i8* %termios1, i8 0, i64 44, i32 8)
+        call void @bar(%struct.ktermios* %termios) nounwind
+        ret void
+}
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
+
+declare void @bar(%struct.ktermios*)
+
diff --git a/final/test/CodeGen/X86/2009-01-26-WrongCheck.ll b/final/test/CodeGen/X86/2009-01-26-WrongCheck.ll
new file mode 100644
index 00000000000..117ff47657f
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-01-26-WrongCheck.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 -enable-legalize-types-checking
+; PR3393
+
+define void @foo(i32 inreg %x) {
+	%t709 = select i1 false, i32 0, i32 %x		; <i32> [#uses=1]
+	%t711 = add i32 %t709, 1		; <i32> [#uses=4]
+	%t801 = icmp slt i32 %t711, 0		; <i1> [#uses=1]
+	%t712 = zext i32 %t711 to i64		; <i64> [#uses=1]
+	%t804 = select i1 %t801, i64 0, i64 %t712		; <i64> [#uses=1]
+	store i64 %t804, i64* null
+	%t815 = icmp slt i32 %t711, 0		; <i1> [#uses=1]
+	%t814 = sext i32 %t711 to i64		; <i64> [#uses=1]
+	%t816 = select i1 %t815, i64 0, i64 %t814		; <i64> [#uses=1]
+	store i64 %t816, i64* null
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2009-01-27-NullStrings.ll b/final/test/CodeGen/X86/2009-01-27-NullStrings.ll
new file mode 100644
index 00000000000..8b3094be4b0
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-01-27-NullStrings.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s
+; CHECK: .section __TEXT,__cstring,cstring_literals
+
+@x = internal unnamed_addr constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+
+@y = global [1 x i8]* @x
+
diff --git a/final/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll b/final/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
new file mode 100644
index 00000000000..35fac0c02a1
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9.6 -regalloc=fast -disable-fp-elim
+; rdar://6538384
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.Lit = type { i32 }
+	%struct.StreamBuffer = type { %struct.FILE*, [1048576 x i8], i32, i32 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+
+declare fastcc i32 @_Z8parseIntI12StreamBufferEiRT_(%struct.StreamBuffer*)
+
+declare i8* @llvm.eh.exception() nounwind
+
+define i32 @main(i32 %argc, i8** nocapture %argv) noreturn {
+entry:
+	%0 = invoke fastcc i32 @_Z8parseIntI12StreamBufferEiRT_(%struct.StreamBuffer* null)
+			to label %bb1.i16.i.i unwind label %lpad.i.i		; <i32> [#uses=0]
+
+bb1.i16.i.i:		; preds = %entry
+	br i1 false, label %bb.i.i.i.i, label %_ZN3vecI3LitE4pushERKS0_.exit.i.i.i
+
+bb.i.i.i.i:		; preds = %bb1.i16.i.i
+	br label %_ZN3vecI3LitE4pushERKS0_.exit.i.i.i
+
+_ZN3vecI3LitE4pushERKS0_.exit.i.i.i:		; preds = %bb.i.i.i.i, %bb1.i16.i.i
+	%lits.i.i.0.0 = phi %struct.Lit* [ null, %bb1.i16.i.i ], [ null, %bb.i.i.i.i ]		; <%struct.Lit*> [#uses=1]
+	%1 = invoke fastcc i32 @_Z8parseIntI12StreamBufferEiRT_(%struct.StreamBuffer* null)
+			to label %.noexc21.i.i unwind label %lpad.i.i		; <i32> [#uses=0]
+
+.noexc21.i.i:		; preds = %_ZN3vecI3LitE4pushERKS0_.exit.i.i.i
+	unreachable
+
+lpad.i.i:		; preds = %_ZN3vecI3LitE4pushERKS0_.exit.i.i.i, %entry
+	%lits.i.i.0.3 = phi %struct.Lit* [ %lits.i.i.0.0, %_ZN3vecI3LitE4pushERKS0_.exit.i.i.i ], [ null, %entry ]		; <%struct.Lit*> [#uses=1]
+	%eh_ptr.i.i = call i8* @llvm.eh.exception()		; <i8*> [#uses=0]
+	free %struct.Lit* %lits.i.i.0.3
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2009-01-31-BigShift.ll b/final/test/CodeGen/X86/2009-01-31-BigShift.ll
new file mode 100644
index 00000000000..4eb0ec1485b
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-01-31-BigShift.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | not grep and
+; PR3401
+
+define void @x(i288 %i) nounwind {
+	call void @add(i288 %i)
+	ret void
+}
+
+declare void @add(i288)
diff --git a/final/test/CodeGen/X86/2009-01-31-BigShift2.ll b/final/test/CodeGen/X86/2009-01-31-BigShift2.ll
new file mode 100644
index 00000000000..9d240844afb
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-01-31-BigShift2.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 | grep {mov.*56}
+; PR3449
+
+define void @test(<8 x double>* %P, i64* %Q) nounwind {
+	%A = load <8 x double>* %P		; <<8 x double>> [#uses=1]
+	%B = bitcast <8 x double> %A to i512		; <i512> [#uses=1]
+	%C = lshr i512 %B, 448		; <i512> [#uses=1]
+	%D = trunc i512 %C to i64		; <i64> [#uses=1]
+	volatile store i64 %D, i64* %Q
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2009-01-31-BigShift3.ll b/final/test/CodeGen/X86/2009-01-31-BigShift3.ll
new file mode 100644
index 00000000000..1b531e37043
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-01-31-BigShift3.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86
+; PR3450
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+	%struct.BitMap = type { i8* }
+	%struct.BitMapListStruct = type { %struct.BitMap, %struct.BitMapListStruct*, %struct.BitMapListStruct* }
+	%struct.Material = type { float, float, float, %struct.Material*, %struct.Material* }
+	%struct.ObjPoint = type { double, double, double, double, double, double }
+	%struct.ObjectStruct = type { [57 x i8], %struct.PointListStruct*, %struct.Poly3Struct*, %struct.Poly4Struct*, %struct.Texture*, %struct.Material*, %struct.Point, i32, i32, %struct.Point, %struct.Point, %struct.Point, %struct.ObjectStruct*, %struct.ObjectStruct*, i32, i32, i32, i32, i32, i32, i32, %struct.ObjectStruct*, %struct.ObjectStruct* }
+	%struct.Point = type { double, double, double }
+	%struct.PointListStruct = type { %struct.ObjPoint*, %struct.PointListStruct*, %struct.PointListStruct* }
+	%struct.Poly3Struct = type { [3 x %struct.ObjPoint*], %struct.Material*, %struct.Texture*, %struct.Poly3Struct*, %struct.Poly3Struct* }
+	%struct.Poly4Struct = type { [4 x %struct.ObjPoint*], %struct.Material*, %struct.Texture*, %struct.Poly4Struct*, %struct.Poly4Struct* }
+	%struct.Texture = type { %struct.Point, %struct.BitMapListStruct*, %struct.Point, %struct.Point, %struct.Point, %struct.Texture*, %struct.Texture* }
+
+define fastcc void @ScaleObjectAdd(%struct.ObjectStruct* %o, double %sx, double %sy, double %sz) nounwind {
+entry:
+	%sz101112.ins = or i960 0, 0		; <i960> [#uses=1]
+	br i1 false, label %return, label %bb1.preheader
+
+bb1.preheader:		; preds = %entry
+	%0 = lshr i960 %sz101112.ins, 640		; <i960> [#uses=0]
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.preheader
+	br label %bb1
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2009-02-01-LargeMask.ll b/final/test/CodeGen/X86/2009-02-01-LargeMask.ll
new file mode 100644
index 00000000000..c4042e6c9c6
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-02-01-LargeMask.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86
+; PR3453
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+	%struct.cl_engine = type { i32, i16, i32, i8**, i8**, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
+	%struct.cl_limits = type { i32, i32, i32, i32, i16, i32 }
+	%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+	%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+	%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+	%struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 }
+	%struct.cli_ctx = type { i8**, i32*, %struct.cli_matcher*, %struct.cl_engine*, %struct.cl_limits*, i32, i32, i32, i32, %struct.cli_dconf* }
+	%struct.cli_dconf = type { i32, i32, i32, i32, i32, i32, i32 }
+	%struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+define fastcc i32 @cli_scanautoit(i32 %desc, %struct.cli_ctx* %ctx, i32 %offset) nounwind {
+entry:
+	br i1 false, label %bb.i49.i72, label %bb14
+
+bb.i49.i72:		; preds = %bb.i49.i72, %entry
+	%UNP.i1482.0 = phi i288 [ %.ins659, %bb.i49.i72 ], [ undef, %entry ]		; <i288> [#uses=1]
+	%0 = load i32* null, align 4		; <i32> [#uses=1]
+	%1 = xor i32 %0, 17834		; <i32> [#uses=1]
+	%2 = zext i32 %1 to i288		; <i288> [#uses=1]
+	%3 = shl i288 %2, 160		; <i288> [#uses=1]
+	%UNP.i1482.in658.mask = and i288 %UNP.i1482.0, -6277101733925179126504886505003981583386072424808101969921		; <i288> [#uses=1]
+	%.ins659 = or i288 %3, %UNP.i1482.in658.mask		; <i288> [#uses=1]
+	br label %bb.i49.i72
+
+bb14:		; preds = %entry
+	ret i32 -123
+}
diff --git a/final/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll b/final/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
new file mode 100644
index 00000000000..e75af13a600
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-02-03-AnalyzedTwice.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86
+; PR3411
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@g_3 = external global i32		; <i32*> [#uses=1]
+
+define void @bar(i64 %p_66) nounwind {
+entry:
+	br i1 false, label %bb, label %bb1
+
+bb:		; preds = %entry
+	unreachable
+
+bb1:		; preds = %entry
+	%0 = load i32* @g_3, align 4		; <i32> [#uses=2]
+	%1 = sext i32 %0 to i64		; <i64> [#uses=1]
+	%2 = or i64 %1, %p_66		; <i64> [#uses=1]
+	%3 = shl i64 %2, 0		; <i64> [#uses=1]
+	%4 = and i64 %3, %p_66		; <i64> [#uses=1]
+	%5 = icmp eq i64 %4, 1		; <i1> [#uses=1]
+	%6 = trunc i64 %p_66 to i32		; <i32> [#uses=2]
+	%7 = or i32 %0, %6		; <i32> [#uses=2]
+	%8 = sub i32 %7, %6		; <i32> [#uses=1]
+	%iftmp.0.0 = select i1 %5, i32 %8, i32 %7		; <i32> [#uses=1]
+	%9 = tail call i32 @foo(i32 %iftmp.0.0) nounwind		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @foo(i32)
diff --git a/final/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll b/final/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
new file mode 100644
index 00000000000..4880f626d5d
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-02-04-sext-i64-gep.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s | grep p-92
+; PR3481
+; The offset should print as -92, not +17179869092
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@p = common global [10 x i32] zeroinitializer, align 4          ; <[10 x i32]*>
+@g = global [1 x i32*] [ i32* bitcast (i8* getelementptr (i8* bitcast
+([10 x i32]* @p to i8*), i64 17179869092) to i32*) ], align 4 
diff --git a/final/test/CodeGen/X86/2009-02-05-CoalescerBug.ll b/final/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
new file mode 100644
index 00000000000..a46a20b1da6
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-02-05-CoalescerBug.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t
+; RUN: grep movss %t | count 2
+; RUN: grep movaps %t | count 2
+; RUN: grep movdqa %t | count 2
+
+define i1 @t([2 x float]* %y, [2 x float]* %w, i32, [2 x float]* %x.pn59, i32 %smax190, i32 %j.1180, <4 x float> %wu.2179, <4 x float> %wr.2178, <4 x float>* %tmp89.out, <4 x float>* %tmp107.out, i32* %indvar.next218.out) nounwind {
+newFuncRoot:
+	%tmp82 = insertelement <4 x float> %wr.2178, float 0.000000e+00, i32 0		; <<4 x float>> [#uses=1]
+	%tmp85 = insertelement <4 x float> %tmp82, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp87 = insertelement <4 x float> %tmp85, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp89 = insertelement <4 x float> %tmp87, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp89, <4 x float>* %tmp89.out
+	ret i1 false
+}
diff --git a/final/test/CodeGen/X86/2009-02-08-CoalescerBug.ll b/final/test/CodeGen/X86/2009-02-08-CoalescerBug.ll
new file mode 100644
index 00000000000..908cc08991d
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-02-08-CoalescerBug.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86
+; PR3486
+
+define i32 @foo(i8 signext %p_26) nounwind {
+entry:
+	%0 = icmp eq i8 %p_26, 0		; <i1> [#uses=2]
+	%or.cond = or i1 false, %0		; <i1> [#uses=2]
+	%iftmp.1.0 = zext i1 %or.cond to i16		; <i16> [#uses=1]
+	br i1 %0, label %bb.i, label %bar.exit
+
+bb.i:		; preds = %entry
+	%1 = zext i1 %or.cond to i32		; <i32> [#uses=1]
+	%2 = sdiv i32 %1, 0		; <i32> [#uses=1]
+	%3 = trunc i32 %2 to i16		; <i16> [#uses=1]
+	br label %bar.exit
+
+bar.exit:		; preds = %bb.i, %entry
+	%4 = phi i16 [ %3, %bb.i ], [ %iftmp.1.0, %entry ]		; <i16> [#uses=1]
+	%5 = trunc i16 %4 to i8		; <i8> [#uses=1]
+	%6 = sext i8 %5 to i32		; <i32> [#uses=1]
+	ret i32 %6
+}
diff --git a/final/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll b/final/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll
new file mode 100644
index 00000000000..36cc5354510
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-02-09-ivs-different-sizes.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s
+; This used to crash.
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout ="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @parse_number(i8* nocapture %p) nounwind {
+entry:
+        %shift.0 = select i1 false, i32 4, i32 2                ; <i32> [#uses=1]
+        br label %bb47
+
+bb47:           ; preds = %bb47, %entry
+        br i1 false, label %bb54, label %bb47
+
+bb54:           ; preds = %bb47
+        br i1 false, label %bb56, label %bb66
+
+bb56:           ; preds = %bb62, %bb54
+        %p_addr.0.pn.rec = phi i64 [ %p_addr.6.rec, %bb62 ], [ 0, %bb54 ]             ; <i64> [#uses=2]
+        %ch.6.in.in = phi i8* [ %p_addr.6, %bb62 ], [ null, %bb54 ]           ; <i8*> [#uses=0]
+        %indvar202 = trunc i64 %p_addr.0.pn.rec to i32          ; <i32>[#uses=1]
+        %frac_bits.0 = mul i32 %indvar202, %shift.0             ; <i32>[#uses=1]
+        %p_addr.6.rec = add i64 %p_addr.0.pn.rec, 1             ; <i64>[#uses=2]
+        %p_addr.6 = getelementptr i8* null, i64 %p_addr.6.rec           ; <i8*>[#uses=1]
+        br i1 false, label %bb66, label %bb62
+
+bb62:           ; preds = %bb56
+        br label %bb56
+
+bb66:           ; preds = %bb56, %bb54
+        %frac_bits.1 = phi i32 [ 0, %bb54 ], [ %frac_bits.0, %bb56 ]           ; <i32> [#uses=0]
+        unreachable
+}
diff --git a/final/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll b/final/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
new file mode 100644
index 00000000000..1284b0d1b7b
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-02-11-codegenprepare-reuse.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s
+; PR3537
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+	%struct.GetBitContext = type <{ i8*, i8*, i32, i32 }>
+
+define i32 @alac_decode_frame() nounwind {
+entry:
+	%tmp2 = load i8** null		; <i8*> [#uses=2]
+	%tmp34 = getelementptr i8* %tmp2, i32 4		; <i8*> [#uses=2]
+	%tmp5.i424 = bitcast i8* %tmp34 to i8**		; <i8**> [#uses=2]
+	%tmp15.i = getelementptr i8* %tmp2, i32 12		; <i8*> [#uses=1]
+	%0 = bitcast i8* %tmp15.i to i32*		; <i32*> [#uses=1]
+	br i1 false, label %if.then43, label %if.end47
+
+if.then43:		; preds = %entry
+	ret i32 0
+
+if.end47:		; preds = %entry
+	%tmp5.i590 = load i8** %tmp5.i424		; <i8*> [#uses=0]
+	store i32 19, i32* %0
+	%tmp6.i569 = load i8** %tmp5.i424		; <i8*> [#uses=0]
+	%1 = call i32 asm "bswap   $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 0) nounwind		; <i32> [#uses=0]
+	br i1 false, label %bb.nph, label %if.then63
+
+if.then63:		; preds = %if.end47
+	unreachable
+
+bb.nph:		; preds = %if.end47
+	%2 = bitcast i8* %tmp34 to %struct.GetBitContext*		; <%struct.GetBitContext*> [#uses=1]
+	%call9.i = call fastcc i32 @decode_scalar(%struct.GetBitContext* %2, i32 0, i32 0, i32 0) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare fastcc i32 @decode_scalar(%struct.GetBitContext* nocapture, i32, i32, i32) nounwind
diff --git a/final/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/final/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
new file mode 100644
index 00000000000..0dca14d064e
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s
+; RUN: llc < %s -march=x86-64
+; PR3538
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+define signext i8 @foo(i8* %s1) nounwind ssp {
+entry:
+  %s1_addr = alloca i8*                           ; <i8**> [#uses=2]
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %saved_stack.1 = alloca i8*                     ; <i8**> [#uses=2]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+  %str.0 = alloca [0 x i8]*                       ; <[0 x i8]**> [#uses=3]
+  %1 = alloca i64                                 ; <i64*> [#uses=2]
+  %2 = alloca i64                                 ; <i64*> [#uses=1]
+  %3 = alloca i64                                 ; <i64*> [#uses=6]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{i8** %s1_addr}, metadata !0), !dbg !7
+  store i8* %s1, i8** %s1_addr
+  call void @llvm.dbg.declare(metadata !{[0 x i8]** %str.0}, metadata !8), !dbg !7
+  %4 = call i8* @llvm.stacksave(), !dbg !7        ; <i8*> [#uses=1]
+  store i8* %4, i8** %saved_stack.1, align 8, !dbg !7
+  %5 = load i8** %s1_addr, align 8, !dbg !13      ; <i8*> [#uses=1]
+  %6 = call i64 @strlen(i8* %5) nounwind readonly, !dbg !13 ; <i64> [#uses=1]
+  %7 = add i64 %6, 1, !dbg !13                    ; <i64> [#uses=1]
+  store i64 %7, i64* %3, align 8, !dbg !13
+  %8 = load i64* %3, align 8, !dbg !13            ; <i64> [#uses=1]
+  %9 = sub nsw i64 %8, 1, !dbg !13                ; <i64> [#uses=0]
+  %10 = load i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
+  %11 = mul i64 %10, 8, !dbg !13                  ; <i64> [#uses=0]
+  %12 = load i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
+  store i64 %12, i64* %2, align 8, !dbg !13
+  %13 = load i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
+  %14 = mul i64 %13, 8, !dbg !13                  ; <i64> [#uses=0]
+  %15 = load i64* %3, align 8, !dbg !13           ; <i64> [#uses=1]
+  store i64 %15, i64* %1, align 8, !dbg !13
+  %16 = load i64* %1, align 8, !dbg !13           ; <i64> [#uses=1]
+  %17 = trunc i64 %16 to i32, !dbg !13            ; <i32> [#uses=1]
+  %18 = alloca i8, i32 %17, !dbg !13              ; <i8*> [#uses=1]
+  %19 = bitcast i8* %18 to [0 x i8]*, !dbg !13    ; <[0 x i8]*> [#uses=1]
+  store [0 x i8]* %19, [0 x i8]** %str.0, align 8, !dbg !13
+  %20 = load [0 x i8]** %str.0, align 8, !dbg !15 ; <[0 x i8]*> [#uses=1]
+  %21 = getelementptr inbounds [0 x i8]* %20, i64 0, i64 0, !dbg !15 ; <i8*> [#uses=1]
+  store i8 0, i8* %21, align 1, !dbg !15
+  %22 = load [0 x i8]** %str.0, align 8, !dbg !16 ; <[0 x i8]*> [#uses=1]
+  %23 = getelementptr inbounds [0 x i8]* %22, i64 0, i64 0, !dbg !16 ; <i8*> [#uses=1]
+  %24 = load i8* %23, align 1, !dbg !16           ; <i8> [#uses=1]
+  %25 = sext i8 %24 to i32, !dbg !16              ; <i32> [#uses=1]
+  store i32 %25, i32* %0, align 4, !dbg !16
+  %26 = load i8** %saved_stack.1, align 8, !dbg !16 ; <i8*> [#uses=1]
+  call void @llvm.stackrestore(i8* %26), !dbg !16
+  %27 = load i32* %0, align 4, !dbg !16           ; <i32> [#uses=1]
+  store i32 %27, i32* %retval, align 4, !dbg !16
+  br label %return, !dbg !16
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval, !dbg !16          ; <i32> [#uses=1]
+  %retval12 = trunc i32 %retval1 to i8, !dbg !16  ; <i8> [#uses=1]
+  ret i8 %retval12, !dbg !16
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i8* @llvm.stacksave() nounwind
+
+declare i64 @strlen(i8*) nounwind readonly
+
+declare void @llvm.stackrestore(i8*) nounwind
+
+!0 = metadata !{i32 459009, metadata !1, metadata !"s1", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 458798, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 458769, i32 0, i32 1, metadata !"vla.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 458773, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !6}
+!5 = metadata !{i32 458788, metadata !2, metadata !"char", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458767, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 2, i32 0, metadata !1, null}
+!8 = metadata !{i32 459008, metadata !1, metadata !"str.0", metadata !2, i32 3, metadata !9} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458767, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 458753, metadata !2, metadata !"", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !5, metadata !11, i32 0, null} ; [ DW_TAG_array_type ]
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 458785, i64 0, i64 0}        ; [ DW_TAG_subrange_type ]
+!13 = metadata !{i32 3, i32 0, metadata !14, null}
+!14 = metadata !{i32 458763, metadata !1, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 4, i32 0, metadata !14, null}
+!16 = metadata !{i32 5, i32 0, metadata !14, null}
diff --git a/final/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll b/final/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
new file mode 100644
index 00000000000..2e148ad6b18
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | grep {\$-81920} | count 3
+; RUN: llc < %s -march=x86 | grep {\$4294885376} | count 1
+
+; ModuleID = 'shant.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define void @f() nounwind {
+entry:
+	call void asm sideeffect "foo $0", "n,~{dirflag},~{fpsr},~{flags}"(i32 -81920) nounwind
+	call void asm sideeffect "foo $0", "i,~{dirflag},~{fpsr},~{flags}"(i32 -81920) nounwind
+	call void asm sideeffect "foo $0", "e,~{dirflag},~{fpsr},~{flags}"(i32 -81920) nounwind
+	call void asm sideeffect "foo $0", "Z,~{dirflag},~{fpsr},~{flags}"(i64 4294885376) nounwind
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2009-02-12-SpillerBug.ll b/final/test/CodeGen/X86/2009-02-12-SpillerBug.ll
new file mode 100644
index 00000000000..4f8a5e7b3e3
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-02-12-SpillerBug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8
+; PR3561
+
+define hidden void @__mulxc3({ x86_fp80, x86_fp80 }* noalias nocapture sret %agg.result, x86_fp80 %a, x86_fp80 %b, x86_fp80 %c, x86_fp80 %d) nounwind {
+entry:
+	%0 = fmul x86_fp80 %b, %d		; <x86_fp80> [#uses=1]
+	%1 = fsub x86_fp80 0xK00000000000000000000, %0		; <x86_fp80> [#uses=1]
+	%2 = fadd x86_fp80 0xK00000000000000000000, 0xK00000000000000000000		; <x86_fp80> [#uses=1]
+	%3 = fcmp uno x86_fp80 %1, 0xK00000000000000000000		; <i1> [#uses=1]
+	%4 = fcmp uno x86_fp80 %2, 0xK00000000000000000000		; <i1> [#uses=1]
+	%or.cond = and i1 %3, %4		; <i1> [#uses=1]
+	br i1 %or.cond, label %bb47, label %bb71
+
+bb47:		; preds = %entry
+	%5 = fcmp uno x86_fp80 %a, 0xK00000000000000000000		; <i1> [#uses=1]
+	br i1 %5, label %bb60, label %bb62
+
+bb60:		; preds = %bb47
+	%6 = tail call x86_fp80 @copysignl(x86_fp80 0xK00000000000000000000, x86_fp80 %a) nounwind readnone		; <x86_fp80> [#uses=0]
+	br label %bb62
+
+bb62:		; preds = %bb60, %bb47
+	unreachable
+
+bb71:		; preds = %entry
+	ret void
+}
+
+declare x86_fp80 @copysignl(x86_fp80, x86_fp80) nounwind readnone
diff --git a/final/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll b/final/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
new file mode 100644
index 00000000000..58a7f9fb759
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8 -pre-alloc-split
+
+define i32 @main() nounwind {
+bb4.i.thread:
+	br label %bb5.i4
+
+bb16:		; preds = %bb111.i
+	%phitmp = add i32 %indvar.reg2mem.4, 1		; <i32> [#uses=2]
+	switch i32 %indvar.reg2mem.4, label %bb100.i [
+		i32 0, label %bb5.i4
+		i32 1, label %bb5.i4
+		i32 2, label %bb5.i4
+		i32 5, label %bb.i14.i
+		i32 6, label %bb.i14.i
+		i32 7, label %bb.i14.i
+	]
+
+bb5.i4:		; preds = %bb16, %bb16, %bb16, %bb4.i.thread
+	br i1 false, label %bb102.i, label %bb103.i
+
+bb.i14.i:		; preds = %bb16, %bb16, %bb16
+	%0 = malloc [600 x i32]		; <[600 x i32]*> [#uses=0]
+	%1 = icmp eq i32 %phitmp, 7		; <i1> [#uses=1]
+	%tl.0.i = select i1 %1, float 1.000000e+02, float 1.000000e+00		; <float> [#uses=1]
+	%2 = icmp eq i32 %phitmp, 8		; <i1> [#uses=1]
+	%tu.0.i = select i1 %2, float 1.000000e+02, float 1.000000e+00		; <float> [#uses=1]
+	br label %bb30.i
+
+bb30.i:		; preds = %bb36.i, %bb.i14.i
+	%i.1173.i = phi i32 [ 0, %bb.i14.i ], [ %indvar.next240.i, %bb36.i ]		; <i32> [#uses=3]
+	%3 = icmp eq i32 0, %i.1173.i		; <i1> [#uses=1]
+	br i1 %3, label %bb33.i, label %bb34.i
+
+bb33.i:		; preds = %bb30.i
+	store float %tl.0.i, float* null, align 4
+	br label %bb36.i
+
+bb34.i:		; preds = %bb30.i
+	%4 = icmp eq i32 0, %i.1173.i		; <i1> [#uses=1]
+	br i1 %4, label %bb35.i, label %bb36.i
+
+bb35.i:		; preds = %bb34.i
+	store float %tu.0.i, float* null, align 4
+	br label %bb36.i
+
+bb36.i:		; preds = %bb35.i, %bb34.i, %bb33.i
+	%indvar.next240.i = add i32 %i.1173.i, 1		; <i32> [#uses=1]
+	br label %bb30.i
+
+bb100.i:		; preds = %bb16
+	ret i32 0
+
+bb102.i:		; preds = %bb5.i4
+	br label %bb103.i
+
+bb103.i:		; preds = %bb102.i, %bb5.i4
+	%indvar.reg2mem.4 = phi i32 [ 0, %bb5.i4 ], [ 0, %bb102.i ]		; <i32> [#uses=2]
+	%n.0.reg2mem.1.i = phi i32 [ 0, %bb102.i ], [ 0, %bb5.i4 ]		; <i32> [#uses=1]
+	%5 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %5, label %bb111.i, label %bb108.i
+
+bb108.i:		; preds = %bb103.i
+	ret i32 0
+
+bb111.i:		; preds = %bb103.i
+	%6 = icmp sgt i32 %n.0.reg2mem.1.i, 7		; <i1> [#uses=1]
+	br i1 %6, label %bb16, label %bb112.i
+
+bb112.i:		; preds = %bb111.i
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll b/final/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll
new file mode 100644
index 00000000000..b3dd13c50f9
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-02-21-ExtWeakInitializer.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s | grep weak | count 3
+; PR3629
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-unknown-freebsd7.1"
+module asm ".ident\09\22$FreeBSD$\22"
+	%struct.anon = type <{ %struct.uart_devinfo* }>
+	%struct.lock_object = type <{ i8*, i32, i32, %struct.witness* }>
+	%struct.mtx = type <{ %struct.lock_object, i64 }>
+	%struct.uart_bas = type <{ i64, i64, i32, i32, i32, i8, i8, i8, i8 }>
+	%struct.uart_class = type opaque
+	%struct.uart_devinfo = type <{ %struct.anon, %struct.uart_ops*, %struct.uart_bas, i32, i32, i32, i32, i32, i8, i8, i8, i8, i32 (%struct.uart_softc*)*, i32 (%struct.uart_softc*)*, i8*, %struct.mtx* }>
+	%struct.uart_ops = type <{ i32 (%struct.uart_bas*)*, void (%struct.uart_bas*, i32, i32, i32, i32)*, void (%struct.uart_bas*)*, void (%struct.uart_bas*, i32)*, i32 (%struct.uart_bas*)*, i32 (%struct.uart_bas*, %struct.mtx*)* }>
+	%struct.uart_softc = type opaque
+	%struct.witness = type opaque
+
+@uart_classes = internal global [3 x %struct.uart_class*] [%struct.uart_class* @uart_ns8250_class, %struct.uart_class* @uart_sab82532_class, %struct.uart_class* @uart_z8530_class], align 8		; <[3 x %struct.uart_class*]*> [#uses=1]
+@uart_ns8250_class = extern_weak global %struct.uart_class		; <%struct.uart_class*> [#uses=1]
+@uart_sab82532_class = extern_weak global %struct.uart_class		; <%struct.uart_class*> [#uses=1]
+@uart_z8530_class = extern_weak global %struct.uart_class		; <%struct.uart_class*> [#uses=1]
diff --git a/final/test/CodeGen/X86/2009-02-25-CommuteBug.ll b/final/test/CodeGen/X86/2009-02-25-CommuteBug.ll
new file mode 100644
index 00000000000..7ea699833ba
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-02-25-CommuteBug.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& not grep commuted
+; rdar://6608609
+
+define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone {
+entry:
+	%tmp.i2 = bitcast <2 x double> %B to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp2.i = or <2 x i64> %tmp.i2, <i64 4607632778762754458, i64 4607632778762754458>		; <<2 x i64>> [#uses=1]
+	%tmp3.i = bitcast <2 x i64> %tmp2.i to <2 x double>		; <<2 x double>> [#uses=1]
+	%0 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %A, <2 x double> %tmp3.i) nounwind readnone		; <<2 x double>> [#uses=1]
+	%tmp.i = fadd <2 x double> %0, %C		; <<2 x double>> [#uses=1]
+	ret <2 x double> %tmp.i
+}
+
+declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
diff --git a/final/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/final/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
new file mode 100644
index 00000000000..0b5b7bdd94d
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 -stats |& grep {8 machine-licm}
+; RUN: llc < %s -march=x86-64 -mattr=+sse3,+sse41 | FileCheck %s
+; rdar://6627786
+; rdar://7792037
+
+target triple = "x86_64-apple-darwin10.0"
+	%struct.Key = type { i64 }
+	%struct.__Rec = type opaque
+	%struct.__vv = type {  }
+
+define %struct.__vv* @t(%struct.Key* %desc, i64 %p) nounwind ssp {
+entry:
+	br label %bb4
+
+bb4:		; preds = %bb.i, %bb26, %bb4, %entry
+; CHECK: %bb4
+; CHECK: xorb
+; CHECK: callq
+; CHECK: movq
+; CHECK: xorl
+; CHECK: xorb
+
+	%0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind		; <i32> [#uses=0]
+	%ins = or i64 %p, 2097152		; <i64> [#uses=1]
+	%1 = call i32 (...)* @xxCalculateMidType(%struct.Key* %desc, i32 0) nounwind		; <i32> [#uses=1]
+	%cond = icmp eq i32 %1, 1		; <i1> [#uses=1]
+	br i1 %cond, label %bb26, label %bb4
+
+bb26:		; preds = %bb4
+	%2 = and i64 %ins, 15728640		; <i64> [#uses=1]
+	%cond.i = icmp eq i64 %2, 1048576		; <i1> [#uses=1]
+	br i1 %cond.i, label %bb.i, label %bb4
+
+bb.i:		; preds = %bb26
+	%3 = load i32* null, align 4		; <i32> [#uses=1]
+	%4 = uitofp i32 %3 to float		; <float> [#uses=1]
+	%.sum13.i = add i64 0, 4		; <i64> [#uses=1]
+	%5 = getelementptr i8* null, i64 %.sum13.i		; <i8*> [#uses=1]
+	%6 = bitcast i8* %5 to i32*		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%8 = uitofp i32 %7 to float		; <float> [#uses=1]
+	%.sum.i = add i64 0, 8		; <i64> [#uses=1]
+	%9 = getelementptr i8* null, i64 %.sum.i		; <i8*> [#uses=1]
+	%10 = bitcast i8* %9 to i32*		; <i32*> [#uses=1]
+	%11 = load i32* %10, align 4		; <i32> [#uses=1]
+	%12 = uitofp i32 %11 to float		; <float> [#uses=1]
+	%13 = insertelement <4 x float> undef, float %4, i32 0		; <<4 x float>> [#uses=1]
+	%14 = insertelement <4 x float> %13, float %8, i32 1		; <<4 x float>> [#uses=1]
+	%15 = insertelement <4 x float> %14, float %12, i32 2		; <<4 x float>> [#uses=1]
+	store <4 x float> %15, <4 x float>* null, align 16
+	br label %bb4
+}
+
+declare i32 @xxGetOffsetForCode(...)
+
+declare i32 @xxCalculateMidType(...)
diff --git a/final/test/CodeGen/X86/2009-03-03-BTHang.ll b/final/test/CodeGen/X86/2009-03-03-BTHang.ll
new file mode 100644
index 00000000000..bb959257743
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-03-03-BTHang.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=x86
+; rdar://6642541
+
+ 	%struct.HandleBlock = type { [30 x i32], [990 x i8*], %struct.HandleBlockTrailer }
+ 	%struct.HandleBlockTrailer = type { %struct.HandleBlock* }
+
+define hidden zeroext i8 @IsHandleAllocatedFromPool(i8** %h) nounwind optsize {
+entry:
+	%0 = ptrtoint i8** %h to i32		; <i32> [#uses=2]
+	%1 = and i32 %0, -4096		; <i32> [#uses=1]
+	%2 = inttoptr i32 %1 to %struct.HandleBlock*		; <%struct.HandleBlock*> [#uses=3]
+	%3 = getelementptr %struct.HandleBlock* %2, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
+	%4 = load i32* %3, align 4096		; <i32> [#uses=1]
+	%5 = icmp eq i32 %4, 1751280747		; <i1> [#uses=1]
+	br i1 %5, label %bb, label %bb1
+
+bb:		; preds = %entry
+	%6 = getelementptr %struct.HandleBlock* %2, i32 0, i32 1		; <[990 x i8*]*> [#uses=1]
+	%7 = ptrtoint [990 x i8*]* %6 to i32		; <i32> [#uses=1]
+	%8 = sub i32 %0, %7		; <i32> [#uses=2]
+	%9 = lshr i32 %8, 2		; <i32> [#uses=1]
+	%10 = ashr i32 %8, 7		; <i32> [#uses=1]
+	%11 = and i32 %10, 134217727		; <i32> [#uses=1]
+	%12 = getelementptr %struct.HandleBlock* %2, i32 0, i32 0, i32 %11		; <i32*> [#uses=1]
+	%not.i = and i32 %9, 31		; <i32> [#uses=1]
+	%13 = xor i32 %not.i, 31		; <i32> [#uses=1]
+	%14 = shl i32 1, %13		; <i32> [#uses=1]
+	%15 = load i32* %12, align 4		; <i32> [#uses=1]
+	%16 = and i32 %15, %14		; <i32> [#uses=1]
+	%17 = icmp eq i32 %16, 0		; <i1> [#uses=1]
+	%tmp = zext i1 %17 to i8		; <i8> [#uses=1]
+	ret i8 %tmp
+
+bb1:		; preds = %entry
+	ret i8 0
+}
+
diff --git a/final/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll b/final/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
new file mode 100644
index 00000000000..9deecebe945
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-03-03-BitcastLongDouble.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86
+; PR3686
+; rdar://6661799
+
+define i32 @x(i32 %y) nounwind readnone {
+entry:
+	%tmp14 = zext i32 %y to i80		; <i80> [#uses=1]
+	%tmp15 = bitcast i80 %tmp14 to x86_fp80		; <x86_fp80> [#uses=1]
+	%add = fadd x86_fp80 %tmp15, 0xK3FFF8000000000000000		; <x86_fp80> [#uses=1]
+	%tmp11 = bitcast x86_fp80 %add to i80		; <i80> [#uses=1]
+	%tmp10 = trunc i80 %tmp11 to i32		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
+
diff --git a/final/test/CodeGen/X86/2009-03-05-burr-list-crash.ll b/final/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
new file mode 100644
index 00000000000..411a0c92830
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-03-05-burr-list-crash.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+external global i32		; <i32*>:0 [#uses=1]
+
+declare i64 @strlen(i8* nocapture) nounwind readonly
+
+define fastcc i8* @1(i8*) nounwind {
+	br i1 false, label %3, label %2
+
+; <label>:2		; preds = %1
+	ret i8* %0
+
+; <label>:3		; preds = %1
+	%4 = call i64 @strlen(i8* %0) nounwind readonly		; <i64> [#uses=1]
+	%5 = trunc i64 %4 to i32		; <i32> [#uses=2]
+	%6 = load i32* @0, align 4		; <i32> [#uses=1]
+	%7 = sub i32 %5, %6		; <i32> [#uses=2]
+	%8 = sext i32 %5 to i64		; <i64> [#uses=1]
+	%9 = sext i32 %7 to i64		; <i64> [#uses=1]
+	%10 = sub i64 %8, %9		; <i64> [#uses=1]
+	%11 = getelementptr i8* %0, i64 %10		; <i8*> [#uses=1]
+	%12 = icmp sgt i32 %7, 0		; <i1> [#uses=1]
+	br i1 %12, label %13, label %14
+
+; <label>:13		; preds = %13, %3
+	br label %13
+
+; <label>:14		; preds = %3
+	%15 = call noalias i8* @make_temp_file(i8* %11) nounwind		; <i8*> [#uses=0]
+	unreachable
+}
+
+declare noalias i8* @make_temp_file(i8*)
diff --git a/final/test/CodeGen/X86/2009-03-07-FPConstSelect.ll b/final/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
new file mode 100644
index 00000000000..39caddcf934
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-03-07-FPConstSelect.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep xmm
+; This should do a single load into the fp stack for the return, not diddle with xmm registers.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define float @f(i32 %x) nounwind readnone {
+entry:
+	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01
+	ret float %iftmp.0.0
+}
diff --git a/final/test/CodeGen/X86/2009-03-09-APIntCrash.ll b/final/test/CodeGen/X86/2009-03-09-APIntCrash.ll
new file mode 100644
index 00000000000..896c9686cc4
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-03-09-APIntCrash.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86-64
+; PR3763
+	%struct.__block_descriptor = type { i64, i64 }
+
+define %struct.__block_descriptor @evUTCTime() nounwind {
+entry:
+	br i1 false, label %if.then, label %return
+
+if.then:		; preds = %entry
+	%srcval18 = load i128* null, align 8		; <i128> [#uses=1]
+	%tmp15 = lshr i128 %srcval18, 64		; <i128> [#uses=1]
+	%tmp9 = mul i128 %tmp15, 18446744073709551616000		; <i128> [#uses=1]
+	br label %return
+
+return:		; preds = %if.then, %entry
+	%retval.0 = phi i128 [ %tmp9, %if.then ], [ undef, %entry ]		; <i128> [#uses=0]
+	ret %struct.__block_descriptor undef
+}
+
+define i128 @test(i128 %arg) nounwind {
+	%A = shl i128 1, 92
+	%B = sub i128 0, %A
+	%C = mul i128 %arg, %B
+	ret i128 %C  ;; should codegen to neg(shift)
+}
diff --git a/final/test/CodeGen/X86/2009-03-09-SpillerBug.ll b/final/test/CodeGen/X86/2009-03-09-SpillerBug.ll
new file mode 100644
index 00000000000..4224210e58f
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-03-09-SpillerBug.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu
+; PR3706
+
+define void @__mulxc3(x86_fp80 %b) nounwind {
+entry:
+	%call = call x86_fp80 @y(x86_fp80* null, x86_fp80* null)		; <x86_fp80> [#uses=0]
+	%cmp = fcmp ord x86_fp80 %b, 0xK00000000000000000000		; <i1> [#uses=1]
+	%sub = fsub x86_fp80 %b, %b		; <x86_fp80> [#uses=1]
+	%cmp7 = fcmp uno x86_fp80 %sub, 0xK00000000000000000000		; <i1> [#uses=1]
+	%and12 = and i1 %cmp7, %cmp		; <i1> [#uses=1]
+	%and = zext i1 %and12 to i32		; <i32> [#uses=1]
+	%conv9 = sitofp i32 %and to x86_fp80		; <x86_fp80> [#uses=1]
+	store x86_fp80 %conv9, x86_fp80* null
+	store x86_fp80 %b, x86_fp80* null
+	ret void
+}
+
+declare x86_fp80 @y(x86_fp80*, x86_fp80*)
diff --git a/final/test/CodeGen/X86/2009-03-10-CoalescerBug.ll b/final/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
new file mode 100644
index 00000000000..90dff8878a7
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-03-10-CoalescerBug.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; rdar://r6661945
+
+	%struct.WINDOW = type { i16, i16, i16, i16, i16, i16, i16, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.ldat*, i16, i16, i32, i32, %struct.WINDOW*, %struct.pdat, i16, %struct.cchar_t }
+	%struct.cchar_t = type { i32, [5 x i32] }
+	%struct.ldat = type { %struct.cchar_t*, i16, i16, i16 }
+	%struct.pdat = type { i16, i16, i16, i16, i16, i16 }
+
+define i32 @pnoutrefresh(%struct.WINDOW* %win, i32 %pminrow, i32 %pmincol, i32 %sminrow, i32 %smincol, i32 %smaxrow, i32 %smaxcol) nounwind optsize ssp {
+entry:
+	%0 = load i16* null, align 4		; <i16> [#uses=2]
+	%1 = icmp sgt i16 0, %0		; <i1> [#uses=1]
+	br i1 %1, label %bb12, label %bb13
+
+bb12:		; preds = %entry
+	%2 = sext i16 %0 to i32		; <i32> [#uses=1]
+	%3 = sub i32 %2, 0		; <i32> [#uses=1]
+	%4 = add i32 %3, %smaxrow		; <i32> [#uses=2]
+	%5 = trunc i32 %4 to i16		; <i16> [#uses=1]
+	%6 = add i16 0, %5		; <i16> [#uses=1]
+	br label %bb13
+
+bb13:		; preds = %bb12, %entry
+	%pmaxrow.0 = phi i16 [ %6, %bb12 ], [ 0, %entry ]		; <i16> [#uses=0]
+	%smaxrow_addr.0 = phi i32 [ %4, %bb12 ], [ %smaxrow, %entry ]		; <i32> [#uses=1]
+	%7 = trunc i32 %smaxrow_addr.0 to i16		; <i16> [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/2009-03-12-CPAlignBug.ll b/final/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
new file mode 100644
index 00000000000..3564f01a7c4
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-03-12-CPAlignBug.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | not grep {.space}
+; rdar://6668548
+
+declare double @llvm.sqrt.f64(double) nounwind readonly
+
+declare double @fabs(double)
+
+declare double @llvm.pow.f64(double, double) nounwind readonly
+
+define void @SolveCubic_bb1(i32* %solutions, double* %x, x86_fp80 %.reload, x86_fp80 %.reload5, x86_fp80 %.reload6, double %.reload8) nounwind {
+newFuncRoot:
+	br label %bb1
+
+bb1.ret.exitStub:		; preds = %bb1
+	ret void
+
+bb1:		; preds = %newFuncRoot
+	store i32 1, i32* %solutions, align 4
+	%0 = tail call double @llvm.sqrt.f64(double %.reload8)		; <double> [#uses=1]
+	%1 = fptrunc x86_fp80 %.reload6 to double		; <double> [#uses=1]
+	%2 = tail call double @fabs(double %1) nounwind readnone		; <double> [#uses=1]
+	%3 = fadd double %0, %2		; <double> [#uses=1]
+	%4 = tail call double @llvm.pow.f64(double %3, double 0x3FD5555555555555)		; <double> [#uses=1]
+	%5 = fpext double %4 to x86_fp80		; <x86_fp80> [#uses=2]
+	%6 = fdiv x86_fp80 %.reload5, %5		; <x86_fp80> [#uses=1]
+	%7 = fadd x86_fp80 %5, %6		; <x86_fp80> [#uses=1]
+	%8 = fptrunc x86_fp80 %7 to double		; <double> [#uses=1]
+	%9 = fcmp olt x86_fp80 %.reload6, 0xK00000000000000000000		; <i1> [#uses=1]
+	%iftmp.6.0 = select i1 %9, double 1.000000e+00, double -1.000000e+00		; <double> [#uses=1]
+	%10 = fmul double %8, %iftmp.6.0		; <double> [#uses=1]
+	%11 = fpext double %10 to x86_fp80		; <x86_fp80> [#uses=1]
+	%12 = fdiv x86_fp80 %.reload, 0xKC000C000000000000000		; <x86_fp80> [#uses=1]
+	%13 = fadd x86_fp80 %11, %12		; <x86_fp80> [#uses=1]
+	%14 = fptrunc x86_fp80 %13 to double		; <double> [#uses=1]
+	store double %14, double* %x, align 1
+	br label %bb1.ret.exitStub
+}
diff --git a/final/test/CodeGen/X86/2009-03-13-PHIElimBug.ll b/final/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
new file mode 100644
index 00000000000..28539307aa4
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-03-13-PHIElimBug.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s
+; Check the register copy comes after the call to f and before the call to g
+; PR3784
+
+declare i32 @f()
+
+declare i32 @g()
+
+define i32 @phi() {
+entry:
+	%a = call i32 @f()		; <i32> [#uses=1]
+	%b = invoke i32 @g()
+			to label %cont unwind label %lpad		; <i32> [#uses=1]
+
+cont:		; preds = %entry
+	%x = phi i32 [ %b, %entry ]		; <i32> [#uses=0]
+	%aa = call i32 @g()		; <i32> [#uses=1]
+	%bb = invoke i32 @g()
+			to label %cont2 unwind label %lpad		; <i32> [#uses=1]
+
+cont2:		; preds = %cont
+	%xx = phi i32 [ %bb, %cont ]		; <i32> [#uses=1]
+	ret i32 %xx
+
+lpad:		; preds = %cont, %entry
+	%y = phi i32 [ %a, %entry ], [ %aa, %cont ]		; <i32> [#uses=1]
+	ret i32 %y
+}
+
+; CHECK: call{{.*}}f
+; CHECK-NEXT: Ltmp0:
+; CHECK-NEXT: movl %eax, %esi
diff --git a/final/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll b/final/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
new file mode 100644
index 00000000000..b13d33eb3fd
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=i686-linux -asm-verbose | FileCheck %s
+; Check that register copies in the landing pad come after the EH_LABEL
+
+declare i32 @f()
+
+define i32 @phi(i32 %x) {
+entry:
+	%a = invoke i32 @f()
+			to label %cont unwind label %lpad		; <i32> [#uses=1]
+
+cont:		; preds = %entry
+	%b = invoke i32 @f()
+			to label %cont2 unwind label %lpad		; <i32> [#uses=1]
+
+cont2:		; preds = %cont
+	ret i32 %b
+
+lpad:		; preds = %cont, %entry
+	%v = phi i32 [ %x, %entry ], [ %a, %cont ]		; <i32> [#uses=1]
+	ret i32 %v
+}
+
+; CHECK: lpad
+; CHECK-NEXT: Ltmp
diff --git a/final/test/CodeGen/X86/2009-03-16-SpillerBug.ll b/final/test/CodeGen/X86/2009-03-16-SpillerBug.ll
new file mode 100644
index 00000000000..80e7639e7c2
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-03-16-SpillerBug.ll
@@ -0,0 +1,167 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -stats |& grep virtregrewriter | not grep {stores unfolded}
+; rdar://6682365
+
+; Do not clobber a register if another spill slot is available in it and it's marked "do not clobber".
+
+	%struct.CAST_KEY = type { [32 x i32], i32 }
+@CAST_S_table0 = constant [2 x i32] [i32 821772500, i32 -1616838901], align 32		; <[2 x i32]*> [#uses=0]
+@CAST_S_table4 = constant [2 x i32] [i32 2127105028, i32 745436345], align 32		; <[2 x i32]*> [#uses=6]
+@CAST_S_table5 = constant [2 x i32] [i32 -151351395, i32 749497569], align 32		; <[2 x i32]*> [#uses=5]
+@CAST_S_table6 = constant [2 x i32] [i32 -2048901095, i32 858518887], align 32		; <[2 x i32]*> [#uses=4]
+@CAST_S_table7 = constant [2 x i32] [i32 -501862387, i32 -1143078916], align 32		; <[2 x i32]*> [#uses=5]
+@CAST_S_table1 = constant [2 x i32] [i32 522195092, i32 -284448933], align 32		; <[2 x i32]*> [#uses=0]
+@CAST_S_table2 = constant [2 x i32] [i32 -1913667008, i32 637164959], align 32		; <[2 x i32]*> [#uses=0]
+@CAST_S_table3 = constant [2 x i32] [i32 -1649212384, i32 532081118], align 32		; <[2 x i32]*> [#uses=0]
+
+define void @CAST_set_key(%struct.CAST_KEY* nocapture %key, i32 %len, i8* nocapture %data) nounwind ssp {
+bb1.thread:
+	%0 = getelementptr [16 x i32]* null, i32 0, i32 5		; <i32*> [#uses=1]
+	%1 = getelementptr [16 x i32]* null, i32 0, i32 8		; <i32*> [#uses=1]
+	%2 = load i32* null, align 4		; <i32> [#uses=1]
+	%3 = shl i32 %2, 24		; <i32> [#uses=1]
+	%4 = load i32* null, align 4		; <i32> [#uses=1]
+	%5 = shl i32 %4, 16		; <i32> [#uses=1]
+	%6 = load i32* null, align 4		; <i32> [#uses=1]
+	%7 = or i32 %5, %3		; <i32> [#uses=1]
+	%8 = or i32 %7, %6		; <i32> [#uses=1]
+	%9 = or i32 %8, 0		; <i32> [#uses=1]
+	%10 = load i32* null, align 4		; <i32> [#uses=1]
+	%11 = shl i32 %10, 24		; <i32> [#uses=1]
+	%12 = load i32* %0, align 4		; <i32> [#uses=1]
+	%13 = shl i32 %12, 16		; <i32> [#uses=1]
+	%14 = load i32* null, align 4		; <i32> [#uses=1]
+	%15 = or i32 %13, %11		; <i32> [#uses=1]
+	%16 = or i32 %15, %14		; <i32> [#uses=1]
+	%17 = or i32 %16, 0		; <i32> [#uses=1]
+	br label %bb11
+
+bb11:		; preds = %bb11, %bb1.thread
+	%18 = phi i32 [ %110, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=1]
+	%19 = phi i32 [ %112, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=0]
+	%20 = phi i32 [ 0, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=0]
+	%21 = phi i32 [ %113, %bb11 ], [ 0, %bb1.thread ]		; <i32> [#uses=1]
+	%X.0.0 = phi i32 [ %9, %bb1.thread ], [ %92, %bb11 ]		; <i32> [#uses=0]
+	%X.1.0 = phi i32 [ %17, %bb1.thread ], [ 0, %bb11 ]		; <i32> [#uses=0]
+	%22 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %21		; <i32*> [#uses=0]
+	%23 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %18		; <i32*> [#uses=0]
+	%24 = load i32* null, align 4		; <i32> [#uses=1]
+	%25 = xor i32 0, %24		; <i32> [#uses=1]
+	%26 = xor i32 %25, 0		; <i32> [#uses=1]
+	%27 = xor i32 %26, 0		; <i32> [#uses=4]
+	%28 = and i32 %27, 255		; <i32> [#uses=2]
+	%29 = lshr i32 %27, 8		; <i32> [#uses=1]
+	%30 = and i32 %29, 255		; <i32> [#uses=2]
+	%31 = lshr i32 %27, 16		; <i32> [#uses=1]
+	%32 = and i32 %31, 255		; <i32> [#uses=1]
+	%33 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %28		; <i32*> [#uses=1]
+	%34 = load i32* %33, align 4		; <i32> [#uses=2]
+	%35 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %30		; <i32*> [#uses=1]
+	%36 = load i32* %35, align 4		; <i32> [#uses=2]
+	%37 = xor i32 %34, 0		; <i32> [#uses=1]
+	%38 = xor i32 %37, %36		; <i32> [#uses=1]
+	%39 = xor i32 %38, 0		; <i32> [#uses=1]
+	%40 = xor i32 %39, 0		; <i32> [#uses=1]
+	%41 = xor i32 %40, 0		; <i32> [#uses=3]
+	%42 = lshr i32 %41, 8		; <i32> [#uses=1]
+	%43 = and i32 %42, 255		; <i32> [#uses=2]
+	%44 = lshr i32 %41, 16		; <i32> [#uses=1]
+	%45 = and i32 %44, 255		; <i32> [#uses=1]
+	%46 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %43		; <i32*> [#uses=1]
+	%47 = load i32* %46, align 4		; <i32> [#uses=1]
+	%48 = load i32* null, align 4		; <i32> [#uses=1]
+	%49 = xor i32 %47, 0		; <i32> [#uses=1]
+	%50 = xor i32 %49, %48		; <i32> [#uses=1]
+	%51 = xor i32 %50, 0		; <i32> [#uses=1]
+	%52 = xor i32 %51, 0		; <i32> [#uses=1]
+	%53 = xor i32 %52, 0		; <i32> [#uses=2]
+	%54 = and i32 %53, 255		; <i32> [#uses=1]
+	%55 = lshr i32 %53, 24		; <i32> [#uses=1]
+	%56 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %55		; <i32*> [#uses=1]
+	%57 = load i32* %56, align 4		; <i32> [#uses=1]
+	%58 = xor i32 0, %57		; <i32> [#uses=1]
+	%59 = xor i32 %58, 0		; <i32> [#uses=1]
+	%60 = xor i32 %59, 0		; <i32> [#uses=1]
+	store i32 %60, i32* null, align 4
+	%61 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 0		; <i32*> [#uses=1]
+	%62 = load i32* %61, align 4		; <i32> [#uses=1]
+	%63 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %54		; <i32*> [#uses=1]
+	%64 = load i32* %63, align 4		; <i32> [#uses=1]
+	%65 = xor i32 0, %64		; <i32> [#uses=1]
+	%66 = xor i32 %65, 0		; <i32> [#uses=1]
+	store i32 %66, i32* null, align 4
+	%67 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %45		; <i32*> [#uses=1]
+	%68 = load i32* %67, align 4		; <i32> [#uses=1]
+	%69 = xor i32 %36, %34		; <i32> [#uses=1]
+	%70 = xor i32 %69, 0		; <i32> [#uses=1]
+	%71 = xor i32 %70, %68		; <i32> [#uses=1]
+	%72 = xor i32 %71, 0		; <i32> [#uses=1]
+	store i32 %72, i32* null, align 4
+	%73 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %32		; <i32*> [#uses=1]
+	%74 = load i32* %73, align 4		; <i32> [#uses=2]
+	%75 = load i32* null, align 4		; <i32> [#uses=1]
+	%76 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %43		; <i32*> [#uses=1]
+	%77 = load i32* %76, align 4		; <i32> [#uses=1]
+	%78 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 0		; <i32*> [#uses=1]
+	%79 = load i32* %78, align 4		; <i32> [#uses=1]
+	%80 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %30		; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4		; <i32> [#uses=2]
+	%82 = xor i32 %75, %74		; <i32> [#uses=1]
+	%83 = xor i32 %82, %77		; <i32> [#uses=1]
+	%84 = xor i32 %83, %79		; <i32> [#uses=1]
+	%85 = xor i32 %84, %81		; <i32> [#uses=1]
+	store i32 %85, i32* null, align 4
+	%86 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 %28		; <i32*> [#uses=1]
+	%87 = load i32* %86, align 4		; <i32> [#uses=1]
+	%88 = xor i32 %74, %41		; <i32> [#uses=1]
+	%89 = xor i32 %88, %87		; <i32> [#uses=1]
+	%90 = xor i32 %89, 0		; <i32> [#uses=1]
+	%91 = xor i32 %90, %81		; <i32> [#uses=1]
+	%92 = xor i32 %91, 0		; <i32> [#uses=3]
+	%93 = lshr i32 %92, 16		; <i32> [#uses=1]
+	%94 = and i32 %93, 255		; <i32> [#uses=1]
+	store i32 %94, i32* null, align 4
+	%95 = lshr i32 %92, 24		; <i32> [#uses=2]
+	%96 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 %95		; <i32*> [#uses=1]
+	%97 = load i32* %96, align 4		; <i32> [#uses=1]
+	%98 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 0		; <i32*> [#uses=1]
+	%99 = load i32* %98, align 4		; <i32> [#uses=1]
+	%100 = load i32* null, align 4		; <i32> [#uses=0]
+	%101 = xor i32 %97, 0		; <i32> [#uses=1]
+	%102 = xor i32 %101, %99		; <i32> [#uses=1]
+	%103 = xor i32 %102, 0		; <i32> [#uses=1]
+	%104 = xor i32 %103, 0		; <i32> [#uses=0]
+	store i32 0, i32* null, align 4
+	%105 = xor i32 0, %27		; <i32> [#uses=1]
+	%106 = xor i32 %105, 0		; <i32> [#uses=1]
+	%107 = xor i32 %106, 0		; <i32> [#uses=1]
+	%108 = xor i32 %107, 0		; <i32> [#uses=1]
+	%109 = xor i32 %108, %62		; <i32> [#uses=3]
+	%110 = and i32 %109, 255		; <i32> [#uses=1]
+	%111 = lshr i32 %109, 16		; <i32> [#uses=1]
+	%112 = and i32 %111, 255		; <i32> [#uses=1]
+	%113 = lshr i32 %109, 24		; <i32> [#uses=3]
+	store i32 %113, i32* %1, align 4
+	%114 = load i32* null, align 4		; <i32> [#uses=1]
+	%115 = xor i32 0, %114		; <i32> [#uses=1]
+	%116 = xor i32 %115, 0		; <i32> [#uses=1]
+	%117 = xor i32 %116, 0		; <i32> [#uses=1]
+	%K.0.sum42 = or i32 0, 12		; <i32> [#uses=1]
+	%118 = getelementptr [32 x i32]* null, i32 0, i32 %K.0.sum42		; <i32*> [#uses=1]
+	store i32 %117, i32* %118, align 4
+	%119 = getelementptr [2 x i32]* @CAST_S_table5, i32 0, i32 0		; <i32*> [#uses=0]
+	store i32 0, i32* null, align 4
+	%120 = getelementptr [2 x i32]* @CAST_S_table6, i32 0, i32 %113		; <i32*> [#uses=1]
+	%121 = load i32* %120, align 4		; <i32> [#uses=1]
+	%122 = xor i32 0, %121		; <i32> [#uses=1]
+	store i32 %122, i32* null, align 4
+	%123 = getelementptr [2 x i32]* @CAST_S_table4, i32 0, i32 0		; <i32*> [#uses=1]
+	%124 = load i32* %123, align 4		; <i32> [#uses=1]
+	%125 = getelementptr [2 x i32]* @CAST_S_table7, i32 0, i32 %95		; <i32*> [#uses=1]
+	%126 = load i32* %125, align 4		; <i32> [#uses=1]
+	%127 = xor i32 0, %124		; <i32> [#uses=1]
+	%128 = xor i32 %127, 0		; <i32> [#uses=1]
+	%129 = xor i32 %128, %126		; <i32> [#uses=1]
+	%130 = xor i32 %129, 0		; <i32> [#uses=1]
+	store i32 %130, i32* null, align 4
+	br label %bb11
+}
diff --git a/final/test/CodeGen/X86/2009-03-23-LinearScanBug.ll b/final/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
new file mode 100644
index 00000000000..06dfdc0c767
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-03-23-LinearScanBug.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -O0
+
+define fastcc void @optimize_bit_field() nounwind {
+bb4:
+        %a = load i32* null             ; <i32> [#uses=1]
+        %s = load i32* getelementptr (i32* null, i32 1)         ; <i32> [#uses=1]
+        %z = load i32* getelementptr (i32* null, i32 2)         ; <i32> [#uses=1]
+        %r = bitcast i32 0 to i32          ; <i32> [#uses=1]
+        %q = trunc i32 %z to i8            ; <i8> [#uses=1]
+        %b = icmp eq i8 0, %q              ; <i1> [#uses=1]
+        br i1 %b, label %bb73, label %bb72
+
+bb72:      ; preds = %bb4
+        %f = tail call fastcc i32 @gen_lowpart(i32 %r, i32 %a) nounwind              ; <i32> [#uses=1]
+        br label %bb73
+
+bb73:         ; preds = %bb72, %bb4
+        %y = phi i32 [ %f, %bb72 ], [ %s, %bb4 ]          ; <i32> [#uses=1]
+        store i32 %y, i32* getelementptr (i32* null, i32 3)
+        unreachable
+}
+
+declare fastcc i32 @gen_lowpart(i32, i32) nounwind
diff --git a/final/test/CodeGen/X86/2009-03-23-MultiUseSched.ll b/final/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
new file mode 100644
index 00000000000..90dabb8ab63
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-03-23-MultiUseSched.ll
@@ -0,0 +1,242 @@
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static -o /dev/null -stats -info-output-file - > %t
+; RUN: not grep spill %t
+; RUN: not grep {%rsp} %t
+; RUN: not grep {%rbp} %t
+
+; The register-pressure scheduler should be able to schedule this in a
+; way that does not require spills.
+
+@X = external global i64		; <i64*> [#uses=25]
+
+define fastcc i64 @foo() nounwind {
+	%tmp = volatile load i64* @X		; <i64> [#uses=7]
+	%tmp1 = volatile load i64* @X		; <i64> [#uses=5]
+	%tmp2 = volatile load i64* @X		; <i64> [#uses=3]
+	%tmp3 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp4 = volatile load i64* @X		; <i64> [#uses=5]
+	%tmp5 = volatile load i64* @X		; <i64> [#uses=3]
+	%tmp6 = volatile load i64* @X		; <i64> [#uses=2]
+	%tmp7 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp8 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp9 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp10 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp11 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp12 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp13 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp14 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp15 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp16 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp17 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp18 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp19 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp20 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp21 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp22 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp23 = volatile load i64* @X		; <i64> [#uses=1]
+	%tmp24 = call i64 @llvm.bswap.i64(i64 %tmp8)		; <i64> [#uses=1]
+	%tmp25 = add i64 %tmp6, %tmp5		; <i64> [#uses=1]
+	%tmp26 = add i64 %tmp25, %tmp4		; <i64> [#uses=1]
+	%tmp27 = add i64 %tmp7, %tmp4		; <i64> [#uses=1]
+	%tmp28 = add i64 %tmp27, %tmp26		; <i64> [#uses=1]
+	%tmp29 = add i64 %tmp28, %tmp24		; <i64> [#uses=2]
+	%tmp30 = add i64 %tmp2, %tmp1		; <i64> [#uses=1]
+	%tmp31 = add i64 %tmp30, %tmp		; <i64> [#uses=1]
+	%tmp32 = add i64 %tmp2, %tmp1		; <i64> [#uses=1]
+	%tmp33 = add i64 %tmp31, %tmp32		; <i64> [#uses=1]
+	%tmp34 = add i64 %tmp29, %tmp3		; <i64> [#uses=5]
+	%tmp35 = add i64 %tmp33, %tmp		; <i64> [#uses=1]
+	%tmp36 = add i64 %tmp35, %tmp29		; <i64> [#uses=7]
+	%tmp37 = call i64 @llvm.bswap.i64(i64 %tmp9)		; <i64> [#uses=1]
+	%tmp38 = add i64 %tmp4, %tmp5		; <i64> [#uses=1]
+	%tmp39 = add i64 %tmp38, %tmp34		; <i64> [#uses=1]
+	%tmp40 = add i64 %tmp6, %tmp37		; <i64> [#uses=1]
+	%tmp41 = add i64 %tmp40, %tmp39		; <i64> [#uses=1]
+	%tmp42 = add i64 %tmp41, %tmp34		; <i64> [#uses=2]
+	%tmp43 = add i64 %tmp1, %tmp		; <i64> [#uses=1]
+	%tmp44 = add i64 %tmp36, %tmp43		; <i64> [#uses=1]
+	%tmp45 = add i64 %tmp1, %tmp		; <i64> [#uses=1]
+	%tmp46 = add i64 %tmp44, %tmp45		; <i64> [#uses=1]
+	%tmp47 = add i64 %tmp42, %tmp2		; <i64> [#uses=5]
+	%tmp48 = add i64 %tmp36, %tmp46		; <i64> [#uses=1]
+	%tmp49 = add i64 %tmp48, %tmp42		; <i64> [#uses=7]
+	%tmp50 = call i64 @llvm.bswap.i64(i64 %tmp10)		; <i64> [#uses=1]
+	%tmp51 = add i64 %tmp34, %tmp4		; <i64> [#uses=1]
+	%tmp52 = add i64 %tmp51, %tmp47		; <i64> [#uses=1]
+	%tmp53 = add i64 %tmp5, %tmp50		; <i64> [#uses=1]
+	%tmp54 = add i64 %tmp53, %tmp52		; <i64> [#uses=1]
+	%tmp55 = add i64 %tmp54, %tmp47		; <i64> [#uses=2]
+	%tmp56 = add i64 %tmp36, %tmp		; <i64> [#uses=1]
+	%tmp57 = add i64 %tmp49, %tmp56		; <i64> [#uses=1]
+	%tmp58 = add i64 %tmp36, %tmp		; <i64> [#uses=1]
+	%tmp59 = add i64 %tmp57, %tmp58		; <i64> [#uses=1]
+	%tmp60 = add i64 %tmp55, %tmp1		; <i64> [#uses=5]
+	%tmp61 = add i64 %tmp49, %tmp59		; <i64> [#uses=1]
+	%tmp62 = add i64 %tmp61, %tmp55		; <i64> [#uses=7]
+	%tmp63 = call i64 @llvm.bswap.i64(i64 %tmp11)		; <i64> [#uses=1]
+	%tmp64 = add i64 %tmp47, %tmp34		; <i64> [#uses=1]
+	%tmp65 = add i64 %tmp64, %tmp60		; <i64> [#uses=1]
+	%tmp66 = add i64 %tmp4, %tmp63		; <i64> [#uses=1]
+	%tmp67 = add i64 %tmp66, %tmp65		; <i64> [#uses=1]
+	%tmp68 = add i64 %tmp67, %tmp60		; <i64> [#uses=2]
+	%tmp69 = add i64 %tmp49, %tmp36		; <i64> [#uses=1]
+	%tmp70 = add i64 %tmp62, %tmp69		; <i64> [#uses=1]
+	%tmp71 = add i64 %tmp49, %tmp36		; <i64> [#uses=1]
+	%tmp72 = add i64 %tmp70, %tmp71		; <i64> [#uses=1]
+	%tmp73 = add i64 %tmp68, %tmp		; <i64> [#uses=5]
+	%tmp74 = add i64 %tmp62, %tmp72		; <i64> [#uses=1]
+	%tmp75 = add i64 %tmp74, %tmp68		; <i64> [#uses=7]
+	%tmp76 = call i64 @llvm.bswap.i64(i64 %tmp12)		; <i64> [#uses=1]
+	%tmp77 = add i64 %tmp60, %tmp47		; <i64> [#uses=1]
+	%tmp78 = add i64 %tmp77, %tmp73		; <i64> [#uses=1]
+	%tmp79 = add i64 %tmp34, %tmp76		; <i64> [#uses=1]
+	%tmp80 = add i64 %tmp79, %tmp78		; <i64> [#uses=1]
+	%tmp81 = add i64 %tmp80, %tmp73		; <i64> [#uses=2]
+	%tmp82 = add i64 %tmp62, %tmp49		; <i64> [#uses=1]
+	%tmp83 = add i64 %tmp75, %tmp82		; <i64> [#uses=1]
+	%tmp84 = add i64 %tmp62, %tmp49		; <i64> [#uses=1]
+	%tmp85 = add i64 %tmp83, %tmp84		; <i64> [#uses=1]
+	%tmp86 = add i64 %tmp81, %tmp36		; <i64> [#uses=5]
+	%tmp87 = add i64 %tmp75, %tmp85		; <i64> [#uses=1]
+	%tmp88 = add i64 %tmp87, %tmp81		; <i64> [#uses=7]
+	%tmp89 = call i64 @llvm.bswap.i64(i64 %tmp13)		; <i64> [#uses=1]
+	%tmp90 = add i64 %tmp73, %tmp60		; <i64> [#uses=1]
+	%tmp91 = add i64 %tmp90, %tmp86		; <i64> [#uses=1]
+	%tmp92 = add i64 %tmp47, %tmp89		; <i64> [#uses=1]
+	%tmp93 = add i64 %tmp92, %tmp91		; <i64> [#uses=1]
+	%tmp94 = add i64 %tmp93, %tmp86		; <i64> [#uses=2]
+	%tmp95 = add i64 %tmp75, %tmp62		; <i64> [#uses=1]
+	%tmp96 = add i64 %tmp88, %tmp95		; <i64> [#uses=1]
+	%tmp97 = add i64 %tmp75, %tmp62		; <i64> [#uses=1]
+	%tmp98 = add i64 %tmp96, %tmp97		; <i64> [#uses=1]
+	%tmp99 = add i64 %tmp94, %tmp49		; <i64> [#uses=5]
+	%tmp100 = add i64 %tmp88, %tmp98		; <i64> [#uses=1]
+	%tmp101 = add i64 %tmp100, %tmp94		; <i64> [#uses=7]
+	%tmp102 = call i64 @llvm.bswap.i64(i64 %tmp14)		; <i64> [#uses=1]
+	%tmp103 = add i64 %tmp86, %tmp73		; <i64> [#uses=1]
+	%tmp104 = add i64 %tmp103, %tmp99		; <i64> [#uses=1]
+	%tmp105 = add i64 %tmp102, %tmp60		; <i64> [#uses=1]
+	%tmp106 = add i64 %tmp105, %tmp104		; <i64> [#uses=1]
+	%tmp107 = add i64 %tmp106, %tmp99		; <i64> [#uses=2]
+	%tmp108 = add i64 %tmp88, %tmp75		; <i64> [#uses=1]
+	%tmp109 = add i64 %tmp101, %tmp108		; <i64> [#uses=1]
+	%tmp110 = add i64 %tmp88, %tmp75		; <i64> [#uses=1]
+	%tmp111 = add i64 %tmp109, %tmp110		; <i64> [#uses=1]
+	%tmp112 = add i64 %tmp107, %tmp62		; <i64> [#uses=5]
+	%tmp113 = add i64 %tmp101, %tmp111		; <i64> [#uses=1]
+	%tmp114 = add i64 %tmp113, %tmp107		; <i64> [#uses=7]
+	%tmp115 = call i64 @llvm.bswap.i64(i64 %tmp15)		; <i64> [#uses=1]
+	%tmp116 = add i64 %tmp99, %tmp86		; <i64> [#uses=1]
+	%tmp117 = add i64 %tmp116, %tmp112		; <i64> [#uses=1]
+	%tmp118 = add i64 %tmp115, %tmp73		; <i64> [#uses=1]
+	%tmp119 = add i64 %tmp118, %tmp117		; <i64> [#uses=1]
+	%tmp120 = add i64 %tmp119, %tmp112		; <i64> [#uses=2]
+	%tmp121 = add i64 %tmp101, %tmp88		; <i64> [#uses=1]
+	%tmp122 = add i64 %tmp114, %tmp121		; <i64> [#uses=1]
+	%tmp123 = add i64 %tmp101, %tmp88		; <i64> [#uses=1]
+	%tmp124 = add i64 %tmp122, %tmp123		; <i64> [#uses=1]
+	%tmp125 = add i64 %tmp120, %tmp75		; <i64> [#uses=5]
+	%tmp126 = add i64 %tmp114, %tmp124		; <i64> [#uses=1]
+	%tmp127 = add i64 %tmp126, %tmp120		; <i64> [#uses=7]
+	%tmp128 = call i64 @llvm.bswap.i64(i64 %tmp16)		; <i64> [#uses=1]
+	%tmp129 = add i64 %tmp112, %tmp99		; <i64> [#uses=1]
+	%tmp130 = add i64 %tmp129, %tmp125		; <i64> [#uses=1]
+	%tmp131 = add i64 %tmp128, %tmp86		; <i64> [#uses=1]
+	%tmp132 = add i64 %tmp131, %tmp130		; <i64> [#uses=1]
+	%tmp133 = add i64 %tmp132, %tmp125		; <i64> [#uses=2]
+	%tmp134 = add i64 %tmp114, %tmp101		; <i64> [#uses=1]
+	%tmp135 = add i64 %tmp127, %tmp134		; <i64> [#uses=1]
+	%tmp136 = add i64 %tmp114, %tmp101		; <i64> [#uses=1]
+	%tmp137 = add i64 %tmp135, %tmp136		; <i64> [#uses=1]
+	%tmp138 = add i64 %tmp133, %tmp88		; <i64> [#uses=5]
+	%tmp139 = add i64 %tmp127, %tmp137		; <i64> [#uses=1]
+	%tmp140 = add i64 %tmp139, %tmp133		; <i64> [#uses=7]
+	%tmp141 = call i64 @llvm.bswap.i64(i64 %tmp17)		; <i64> [#uses=1]
+	%tmp142 = add i64 %tmp125, %tmp112		; <i64> [#uses=1]
+	%tmp143 = add i64 %tmp142, %tmp138		; <i64> [#uses=1]
+	%tmp144 = add i64 %tmp141, %tmp99		; <i64> [#uses=1]
+	%tmp145 = add i64 %tmp144, %tmp143		; <i64> [#uses=1]
+	%tmp146 = add i64 %tmp145, %tmp138		; <i64> [#uses=2]
+	%tmp147 = add i64 %tmp127, %tmp114		; <i64> [#uses=1]
+	%tmp148 = add i64 %tmp140, %tmp147		; <i64> [#uses=1]
+	%tmp149 = add i64 %tmp127, %tmp114		; <i64> [#uses=1]
+	%tmp150 = add i64 %tmp148, %tmp149		; <i64> [#uses=1]
+	%tmp151 = add i64 %tmp146, %tmp101		; <i64> [#uses=5]
+	%tmp152 = add i64 %tmp140, %tmp150		; <i64> [#uses=1]
+	%tmp153 = add i64 %tmp152, %tmp146		; <i64> [#uses=7]
+	%tmp154 = call i64 @llvm.bswap.i64(i64 %tmp18)		; <i64> [#uses=1]
+	%tmp155 = add i64 %tmp138, %tmp125		; <i64> [#uses=1]
+	%tmp156 = add i64 %tmp155, %tmp151		; <i64> [#uses=1]
+	%tmp157 = add i64 %tmp154, %tmp112		; <i64> [#uses=1]
+	%tmp158 = add i64 %tmp157, %tmp156		; <i64> [#uses=1]
+	%tmp159 = add i64 %tmp158, %tmp151		; <i64> [#uses=2]
+	%tmp160 = add i64 %tmp140, %tmp127		; <i64> [#uses=1]
+	%tmp161 = add i64 %tmp153, %tmp160		; <i64> [#uses=1]
+	%tmp162 = add i64 %tmp140, %tmp127		; <i64> [#uses=1]
+	%tmp163 = add i64 %tmp161, %tmp162		; <i64> [#uses=1]
+	%tmp164 = add i64 %tmp159, %tmp114		; <i64> [#uses=5]
+	%tmp165 = add i64 %tmp153, %tmp163		; <i64> [#uses=1]
+	%tmp166 = add i64 %tmp165, %tmp159		; <i64> [#uses=7]
+	%tmp167 = call i64 @llvm.bswap.i64(i64 %tmp19)		; <i64> [#uses=1]
+	%tmp168 = add i64 %tmp151, %tmp138		; <i64> [#uses=1]
+	%tmp169 = add i64 %tmp168, %tmp164		; <i64> [#uses=1]
+	%tmp170 = add i64 %tmp167, %tmp125		; <i64> [#uses=1]
+	%tmp171 = add i64 %tmp170, %tmp169		; <i64> [#uses=1]
+	%tmp172 = add i64 %tmp171, %tmp164		; <i64> [#uses=2]
+	%tmp173 = add i64 %tmp153, %tmp140		; <i64> [#uses=1]
+	%tmp174 = add i64 %tmp166, %tmp173		; <i64> [#uses=1]
+	%tmp175 = add i64 %tmp153, %tmp140		; <i64> [#uses=1]
+	%tmp176 = add i64 %tmp174, %tmp175		; <i64> [#uses=1]
+	%tmp177 = add i64 %tmp172, %tmp127		; <i64> [#uses=5]
+	%tmp178 = add i64 %tmp166, %tmp176		; <i64> [#uses=1]
+	%tmp179 = add i64 %tmp178, %tmp172		; <i64> [#uses=6]
+	%tmp180 = call i64 @llvm.bswap.i64(i64 %tmp20)		; <i64> [#uses=1]
+	%tmp181 = add i64 %tmp164, %tmp151		; <i64> [#uses=1]
+	%tmp182 = add i64 %tmp181, %tmp177		; <i64> [#uses=1]
+	%tmp183 = add i64 %tmp180, %tmp138		; <i64> [#uses=1]
+	%tmp184 = add i64 %tmp183, %tmp182		; <i64> [#uses=1]
+	%tmp185 = add i64 %tmp184, %tmp177		; <i64> [#uses=2]
+	%tmp186 = add i64 %tmp166, %tmp153		; <i64> [#uses=1]
+	%tmp187 = add i64 %tmp179, %tmp186		; <i64> [#uses=1]
+	%tmp188 = add i64 %tmp166, %tmp153		; <i64> [#uses=1]
+	%tmp189 = add i64 %tmp187, %tmp188		; <i64> [#uses=1]
+	%tmp190 = add i64 %tmp185, %tmp140		; <i64> [#uses=4]
+	%tmp191 = add i64 %tmp179, %tmp189		; <i64> [#uses=1]
+	%tmp192 = add i64 %tmp191, %tmp185		; <i64> [#uses=4]
+	%tmp193 = call i64 @llvm.bswap.i64(i64 %tmp21)		; <i64> [#uses=1]
+	%tmp194 = add i64 %tmp177, %tmp164		; <i64> [#uses=1]
+	%tmp195 = add i64 %tmp194, %tmp190		; <i64> [#uses=1]
+	%tmp196 = add i64 %tmp193, %tmp151		; <i64> [#uses=1]
+	%tmp197 = add i64 %tmp196, %tmp195		; <i64> [#uses=1]
+	%tmp198 = add i64 %tmp197, %tmp190		; <i64> [#uses=2]
+	%tmp199 = add i64 %tmp179, %tmp166		; <i64> [#uses=1]
+	%tmp200 = add i64 %tmp192, %tmp199		; <i64> [#uses=1]
+	%tmp201 = add i64 %tmp179, %tmp166		; <i64> [#uses=1]
+	%tmp202 = add i64 %tmp200, %tmp201		; <i64> [#uses=1]
+	%tmp203 = add i64 %tmp198, %tmp153		; <i64> [#uses=3]
+	%tmp204 = add i64 %tmp192, %tmp202		; <i64> [#uses=1]
+	%tmp205 = add i64 %tmp204, %tmp198		; <i64> [#uses=2]
+	%tmp206 = call i64 @llvm.bswap.i64(i64 %tmp22)		; <i64> [#uses=1]
+	%tmp207 = add i64 %tmp190, %tmp177		; <i64> [#uses=1]
+	%tmp208 = add i64 %tmp207, %tmp203		; <i64> [#uses=1]
+	%tmp209 = add i64 %tmp206, %tmp164		; <i64> [#uses=1]
+	%tmp210 = add i64 %tmp209, %tmp208		; <i64> [#uses=1]
+	%tmp211 = add i64 %tmp210, %tmp203		; <i64> [#uses=2]
+	%tmp212 = add i64 %tmp192, %tmp179		; <i64> [#uses=1]
+	%tmp213 = add i64 %tmp205, %tmp212		; <i64> [#uses=1]
+	%tmp214 = add i64 %tmp192, %tmp179		; <i64> [#uses=1]
+	%tmp215 = add i64 %tmp213, %tmp214		; <i64> [#uses=1]
+	%tmp216 = add i64 %tmp211, %tmp166		; <i64> [#uses=2]
+	%tmp217 = add i64 %tmp205, %tmp215		; <i64> [#uses=1]
+	%tmp218 = add i64 %tmp217, %tmp211		; <i64> [#uses=1]
+	%tmp219 = call i64 @llvm.bswap.i64(i64 %tmp23)		; <i64> [#uses=2]
+	volatile store i64 %tmp219, i64* @X, align 8
+	%tmp220 = add i64 %tmp203, %tmp190		; <i64> [#uses=1]
+	%tmp221 = add i64 %tmp220, %tmp216		; <i64> [#uses=1]
+	%tmp222 = add i64 %tmp219, %tmp177		; <i64> [#uses=1]
+	%tmp223 = add i64 %tmp222, %tmp221		; <i64> [#uses=1]
+	%tmp224 = add i64 %tmp223, %tmp216		; <i64> [#uses=1]
+	%tmp225 = add i64 %tmp224, %tmp218		; <i64> [#uses=1]
+	ret i64 %tmp225
+}
+
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
diff --git a/final/test/CodeGen/X86/2009-03-23-i80-fp80.ll b/final/test/CodeGen/X86/2009-03-23-i80-fp80.ll
new file mode 100644
index 00000000000..e542325b636
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-03-23-i80-fp80.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | grep 302245289961712575840256
+; RUN: opt < %s -instcombine -S | grep K40018000000000000000
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9"
+
+define i80 @from() {
+  %tmp = bitcast x86_fp80 0xK4000C000000000000000 to i80
+  ret i80 %tmp
+}
+
+define x86_fp80 @to() {
+  %tmp = bitcast i80 302259125019767858003968 to x86_fp80
+  ret x86_fp80 %tmp
+}
diff --git a/final/test/CodeGen/X86/2009-03-25-TestBug.ll b/final/test/CodeGen/X86/2009-03-25-TestBug.ll
new file mode 100644
index 00000000000..f40fddc5a36
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-03-25-TestBug.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -o %t
+; RUN: not grep and %t
+; RUN: not grep shr %t
+; rdar://6661955
+
+@hello = internal constant [7 x i8] c"hello\0A\00"
+@world = internal constant [7 x i8] c"world\0A\00"
+
+define void @func(i32* %b) nounwind {
+bb1579.i.i:		; preds = %bb1514.i.i, %bb191.i.i
+	%tmp176 = load i32* %b, align 4
+	%tmp177 = and i32 %tmp176, 2
+	%tmp178 = icmp eq i32 %tmp177, 0
+        br i1 %tmp178, label %hello, label %world
+
+hello:
+	%h = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @hello, i32 0, i32 0))
+        ret void
+
+world:
+	%w = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([7 x i8]* @world, i32 0, i32 0))
+        ret void
+}
+
+declare i32 @printf(i8*, ...) nounwind
diff --git a/final/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll b/final/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll
new file mode 100644
index 00000000000..f4864793ba2
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-03-26-NoImplicitFPBug.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define double @t(double %x) nounwind ssp noimplicitfloat {
+entry:
+	br i1 false, label %return, label %bb3
+
+bb3:		; preds = %entry
+	ret double 0.000000e+00
+
+return:		; preds = %entry
+	ret double undef
+}
diff --git a/final/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll b/final/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
new file mode 100644
index 00000000000..97bbd93f83f
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
@@ -0,0 +1,165 @@
+; RUN: llc < %s 
+; rdar://6774324
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+	type <{ i32, %1 }>		; type %0
+	type <{ [216 x i8] }>		; type %1
+	type <{ %3, %4*, %28*, i64, i32, %6, %6, i32, i32, i32, i32, void (i8*, i32)*, i8*, %29*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i8*], i32, %30, i32, %24, %4*, %4*, i64, i64, i32, i32, void (i32, %2*)*, i32, i32, i32, i32, i32, i32, i32, i32, %24, i64, i64, i64, i64, i64, %21, i32, i32, %21, i32, %31*, %3, %33, %34, %9*, i32, i32, %3, %3, %35, %41*, %42*, %11, i32, i32, i32, i8, i8, i8, i8, %69*, %69, %9*, %9*, [11 x %61], %3, i8*, i32, i64, i64, i32, i32, i32, i64 }>		; type %2
+	type <{ %3*, %3* }>		; type %3
+	type <{ %3, i32, %2*, %2*, %2*, %5*, i32, i32, %21, i64, i64, i64, i32, %22, %9*, %6, %4*, %23 }>		; type %4
+	type <{ %3, %3, %4*, %4*, i32, %6, %9*, %9*, %5*, %20* }>		; type %5
+	type <{ %7, i16, i8, i8, %8 }>		; type %6
+	type <{ i32 }>		; type %7
+	type <{ i8*, i8*, [2 x i32], i16, i8, i8, i8*, i8, i8, i8, i8, i8* }>		; type %8
+	type <{ %10, %13, %15, i32, i32, i32, i32, %9*, %9*, %16*, i32, %17*, i64, i32 }>		; type %9
+	type <{ i32, i32, %11 }>		; type %10
+	type <{ %12 }>		; type %11
+	type <{ [12 x i8] }>		; type %12
+	type <{ %14 }>		; type %13
+	type <{ [40 x i8] }>		; type %14
+	type <{ [4 x i8] }>		; type %15
+	type <{ %15, %15 }>		; type %16
+	type <{ %17*, %17*, %9*, i32, %18*, %19* }>		; type %17
+	type opaque		; type %18
+	type <{ i32, i32, %9*, %9*, i32, i32 }>		; type %19
+	type <{ %5*, %20*, %20*, %20* }>		; type %20
+	type <{ %3, %3*, void (i8*, i8*)*, i8*, i8*, i64 }>		; type %21
+	type <{ i32, [4 x i32], i32, i32, [128 x %3] }>		; type %22
+	type <{ %24, %24, %24, %24*, %24*, %24*, %25, %26, %27, i32, i32, i8* }>		; type %23
+	type <{ i64, i32, i32, i32 }>		; type %24
+	type <{ i32, i32 }>		; type %25
+	type <{ i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32 }>		; type %26
+	type <{ [16 x %17*], i32 }>		; type %27
+	type <{ i8, i8, i8, i8, %7, %3 }>		; type %28
+	type <{ i32, %11*, i8*, i8*, %11* }>		; type %29
+	type <{ i32, i32, i32, i32, i64 }>		; type %30
+	type <{ %32*, %3, %3, i32, i32, i32, %5* }>		; type %31
+	type opaque		; type %32
+	type <{ [44 x i8] }>		; type %33
+	type <{ %17* }>		; type %34
+	type <{ %36, %36*, i32, [4 x %40], i32, i32, i64, i32 }>		; type %35
+	type <{ i8*, %0*, %37*, i64, %39, i32, %39, %6, i64, i64, i8*, i32 }>		; type %36
+	type <{ i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, %38 }>		; type %37
+	type <{ i16, i16, i8, i8, i16, i32, i16, i16, i32, i16, i16, i32, i32, [8 x [8 x i16]], [8 x [16 x i16]], [96 x i8] }>		; type %38
+	type <{ i8, i8, i8, i8, i8, i8, i8, i8 }>		; type %39
+	type <{ i64 }>		; type %40
+	type <{ %11, i32, i32, i32, %42*, %3, i8*, %3, %5*, %32*, i32, i32, i32, i32, i32, i32, i32, %59, %60, i64, i64, i32, %11, %9*, %9*, %9*, [11 x %61], %9*, %9*, %9*, %9*, %9*, [3 x %9*], %62*, %3, %3, i32, i32, %9*, %9*, i32, %67*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, %68*, [2 x i32], i64, i64, i32 }>		; type %41
+	type <{ %43, %44, %47*, i64, i64, i64, i32, %11, %54, %46*, %46*, i32, i32, i32, i32, i32, i32, i32 }>		; type %42
+	type <{ i16, i8, i8, i32, i32 }>		; type %43
+	type <{ %45, i32, i32 }>		; type %44
+	type <{ %46*, %46*, i64, i64 }>		; type %45
+	type <{ %45, %15, i64, i8, i8, i8, i8, i16, i16 }>		; type %46
+	type <{ i64*, i64, %48*, i32, i32, i32, %6, %53, i32, i64, i64*, i64*, %48*, %48*, %48*, i32 }>		; type %47
+	type <{ %3, %43, i64, %49*, i32, i32, i32, i32, %48*, %48*, i64, %50*, i64, %52*, i32, i16, i16, i8, i8, i8, i8, %3, %3, i64, i32, i32, i32, i8*, i32, i8, i8, i8, i8, %3 }>		; type %48
+	type <{ %3, %3, %49*, %48*, i64, i8, i8, i8, i8, i32, i8, i8, i8, i8 }>		; type %49
+	type <{ i32, %51* }>		; type %50
+	type <{ void (%50*)*, void (%50*)*, i32 (%50*, %52*, i32)*, i32 (%50*)*, i32 (%50*, i64, i32, i32, i32*)*, i32 (%50*, i64, i32, i64*, i32*, i32, i32, i32)*, i32 (%50*, i64, i32)*, i32 (%50*, i64, i64, i32)*, i32 (%50*, i64, i64, i32)*, i32 (%50*, i32)*, i32 (%50*)*, i8* }>		; type %51
+	type <{ i32, %48* }>		; type %52
+	type <{ i32, i32, i32 }>		; type %53
+	type <{ %11, %55*, i32, %53, i64 }>		; type %54
+	type <{ %3, i32, i32, i32, i32, i32, [64 x i8], %56 }>		; type %55
+	type <{ %57, %58, %58 }>		; type %56
+	type <{ i64, i64, i64, i64, i64 }>		; type %57
+	type <{ i64, i64, i64, i64, i64, i64, i64, i64 }>		; type %58
+	type <{ [2 x i32] }>		; type %59
+	type <{ [8 x i32] }>		; type %60
+	type <{ %9*, i32, i32, i32 }>		; type %61
+	type <{ %11, i32, %11, i32, i32, %63*, i32, %64*, %65, i32, i32, i32, i32, %41* }>		; type %62
+	type <{ %10*, i32, %15, %15 }>		; type %63
+	type opaque		; type %64
+	type <{ i32, %66*, %66*, %66**, %66*, %66** }>		; type %65
+	type <{ %63, i32, %62*, %66*, %66* }>		; type %66
+	type <{ i32, i32, [0 x %39] }>		; type %67
+	type opaque		; type %68
+	type <{ %69*, void (%69*, %2*)* }>		; type %69
+	type <{ %70*, %2*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i32, %71, i32, i32, i64, i64, i64, %72, i8*, i8*, %73, %4*, %79*, %81*, %39*, %84, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i32, i64*, i32, i64*, i8*, i32, [256 x i32], i64, i64, %86, %77*, i64, i64, %88*, %2*, %2* }>		; type %70
+	type <{ %3, i64, i32, i32 }>		; type %71
+	type <{ i64, i64, i64 }>		; type %72
+	type <{ %73*, %73*, %73*, %73*, %74*, %75*, %76*, %70*, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, [3 x %78*], i8*, i8* }>		; type %73
+	type <{ %74*, %74*, %75*, %76*, %73*, i32, i32, i32, i32, i32, i8*, i8* }>		; type %74
+	type <{ %75*, %73*, %74*, %76*, i32, i32, i32, i32, %78*, i8*, i8* }>		; type %75
+	type <{ %76*, %73*, %74*, %75*, i32, i32, i32, i32, i8*, i8*, %77* }>		; type %76
+	type opaque		; type %77
+	type <{ %78*, %75*, i8, i8, i8, i8, i16, i16, i16, i8, i8, i32, [0 x %73*] }>		; type %78
+	type <{ i32, i32, i32, [20 x %80] }>		; type %79
+	type <{ i64*, i8* }>		; type %80
+	type <{ [256 x %39], [19 x %39], i8, i8, i8, i8, i8, i8, i8, i8, %82, i8, i8, i8, i8, i8, i8, i8, i8, %82, %83 }>		; type %81
+	type <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16 }>		; type %82
+	type <{ [16 x i64], i64 }>		; type %83
+	type <{ %82*, %85, %85, %39*, i32 }>		; type %84
+	type <{ i16, %39* }>		; type %85
+	type <{ %87, i8* }>		; type %86
+	type <{ i32, i32, i32, i8, i8, i16, i32, i32, i32, i32, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>		; type %87
+	type <{ i64, i64, i32, i32, i32, i32 }>		; type %88
+	type <{ i32, i32, i32, i32, i32, i32, i32 }>		; type %89
+@kernel_stack_size = external global i32		; <i32*> [#uses=1]
+
+define void @test(%0*) nounwind {
+	%2 = tail call %2* asm sideeffect "mov %gs:${1:P},$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 ptrtoint (%2** getelementptr (%70* null, i32 0, i32 1) to i32)) nounwind		; <%2*> [#uses=1]
+	%3 = getelementptr %2* %2, i32 0, i32 15		; <i32*> [#uses=1]
+	%4 = load i32* %3		; <i32> [#uses=2]
+	%5 = icmp eq i32 %4, 0		; <i1> [#uses=1]
+	br i1 %5, label %47, label %6
+
+; <label>:6		; preds = %1
+	%7 = load i32* @kernel_stack_size		; <i32> [#uses=1]
+	%8 = add i32 %7, %4		; <i32> [#uses=1]
+	%9 = inttoptr i32 %8 to %89*		; <%89*> [#uses=12]
+	%10 = tail call %2* asm sideeffect "mov %gs:${1:P},$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 ptrtoint (%2** getelementptr (%70* null, i32 0, i32 1) to i32)) nounwind		; <%2*> [#uses=1]
+	%11 = getelementptr %2* %10, i32 0, i32 65, i32 1		; <%36**> [#uses=1]
+	%12 = load %36** %11		; <%36*> [#uses=1]
+	%13 = getelementptr %36* %12, i32 0, i32 1		; <%0**> [#uses=1]
+	%14 = load %0** %13		; <%0*> [#uses=1]
+	%15 = icmp eq %0* %14, %0		; <i1> [#uses=1]
+	br i1 %15, label %40, label %16
+
+; <label>:16		; preds = %6
+	%17 = getelementptr %0* %0, i32 0, i32 1		; <%1*> [#uses=1]
+	%18 = getelementptr %89* %9, i32 -1, i32 0		; <i32*> [#uses=1]
+	%19 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 32		; <i8*> [#uses=1]
+	%20 = bitcast i8* %19 to i32*		; <i32*> [#uses=1]
+	%21 = load i32* %20		; <i32> [#uses=1]
+	store i32 %21, i32* %18
+	%22 = getelementptr %89* %9, i32 -1, i32 1		; <i32*> [#uses=1]
+	%23 = ptrtoint %1* %17 to i32		; <i32> [#uses=1]
+	store i32 %23, i32* %22
+	%24 = getelementptr %89* %9, i32 -1, i32 2		; <i32*> [#uses=1]
+	%25 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 24		; <i8*> [#uses=1]
+	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
+	%27 = load i32* %26		; <i32> [#uses=1]
+	store i32 %27, i32* %24
+	%28 = getelementptr %89* %9, i32 -1, i32 3		; <i32*> [#uses=1]
+	%29 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 16		; <i8*> [#uses=1]
+	%30 = bitcast i8* %29 to i32*		; <i32*> [#uses=1]
+	%31 = load i32* %30		; <i32> [#uses=1]
+	store i32 %31, i32* %28
+	%32 = getelementptr %89* %9, i32 -1, i32 4		; <i32*> [#uses=1]
+	%33 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 20		; <i8*> [#uses=1]
+	%34 = bitcast i8* %33 to i32*		; <i32*> [#uses=1]
+	%35 = load i32* %34		; <i32> [#uses=1]
+	store i32 %35, i32* %32
+	%36 = getelementptr %89* %9, i32 -1, i32 5		; <i32*> [#uses=1]
+	%37 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 56		; <i8*> [#uses=1]
+	%38 = bitcast i8* %37 to i32*		; <i32*> [#uses=1]
+	%39 = load i32* %38		; <i32> [#uses=1]
+	store i32 %39, i32* %36
+	ret void
+
+; <label>:40		; preds = %6
+	%41 = getelementptr %89* %9, i32 -1, i32 0		; <i32*> [#uses=1]
+	tail call void asm sideeffect "movl %ebx, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %41) nounwind
+	%42 = getelementptr %89* %9, i32 -1, i32 1		; <i32*> [#uses=1]
+	tail call void asm sideeffect "movl %esp, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %42) nounwind
+	%43 = getelementptr %89* %9, i32 -1, i32 2		; <i32*> [#uses=1]
+	tail call void asm sideeffect "movl %ebp, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %43) nounwind
+	%44 = getelementptr %89* %9, i32 -1, i32 3		; <i32*> [#uses=1]
+	tail call void asm sideeffect "movl %edi, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %44) nounwind
+	%45 = getelementptr %89* %9, i32 -1, i32 4		; <i32*> [#uses=1]
+	tail call void asm sideeffect "movl %esi, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %45) nounwind
+	%46 = getelementptr %89* %9, i32 -1, i32 5		; <i32*> [#uses=1]
+	tail call void asm sideeffect "movl $$1f, $0\0A1:", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %46) nounwind
+	ret void
+
+; <label>:47		; preds = %1
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll b/final/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
new file mode 100644
index 00000000000..27f11cf6bc6
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -fast-isel
+; radr://6772169
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10"
+	type { i32, i1 }		; type %0
+
+declare %0 @llvm.sadd.with.overflow.i32(i32, i32) nounwind
+
+define fastcc i32 @test() nounwind {
+entry:
+	%tmp1 = call %0 @llvm.sadd.with.overflow.i32(i32 1, i32 0)
+	%tmp2 = extractvalue %0 %tmp1, 1
+	br i1 %tmp2, label %.backedge, label %BB3
+
+BB3:
+	%tmp4 = extractvalue %0 %tmp1, 0
+	br label %.backedge
+
+.backedge:
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/2009-04-12-picrel.ll b/final/test/CodeGen/X86/2009-04-12-picrel.ll
new file mode 100644
index 00000000000..f1942801c7a
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-12-picrel.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small > %t
+; RUN: grep leaq %t | count 1
+
+@dst = external global [131072 x i32]
+@ptr = external global i32*
+
+define void @off01(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 16
+	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
+	store i32* %0, i32** @ptr, align 8
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll b/final/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
new file mode 100644
index 00000000000..ff8cf0ac229
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+; rdar://6781755
+; PR3934
+
+	type { i32, i32 }		; type %0
+
+define void @bn_sqr_comba8(i32* nocapture %r, i32* %a) nounwind {
+entry:
+	%asmtmp23 = tail call %0 asm "mulq $3", "={ax},={dx},{ax},*m,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 0, i32* %a) nounwind		; <%0> [#uses=1]
+	%asmresult25 = extractvalue %0 %asmtmp23, 1		; <i32> [#uses=1]
+	%asmtmp26 = tail call %0 asm "addq $0,$0; adcq $2,$1", "={dx},=r,imr,0,1,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 0, i32 %asmresult25, i32 0) nounwind		; <%0> [#uses=1]
+	%asmresult27 = extractvalue %0 %asmtmp26, 0		; <i32> [#uses=1]
+	%asmtmp29 = tail call %0 asm "addq $0,$0; adcq $2,$1", "={ax},={dx},imr,0,1,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 0, i32 0, i32 %asmresult27) nounwind		; <%0> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2009-04-13-2AddrAssert.ll b/final/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
new file mode 100644
index 00000000000..4362ba43754
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-13-2AddrAssert.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s
+; rdar://6781755
+; PR3934
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-undermydesk-freebsd8.0"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+        %call = tail call i32 (...)* @getpid()          ; <i32> [#uses=1]
+        %conv = trunc i32 %call to i16          ; <i16> [#uses=1]
+        %0 = tail call i16 asm "xchgb ${0:h}, ${0:b}","=Q,0,~{dirflag},~{fpsr},~{flags}"(i16 %conv) nounwind           ; <i16> [#uses=0]
+        ret i32 undef
+}
+
+declare i32 @getpid(...)
diff --git a/final/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/final/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
new file mode 100644
index 00000000000..bed863e405a
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-14-IllegalRegs.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -O0 -regalloc=fast | not grep sil
+; rdar://6787136
+
+	%struct.X = type { i8, [32 x i8] }
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @z to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @z() nounwind ssp {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%xxx = alloca %struct.X		; <%struct.X*> [#uses=6]
+	%0 = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%1 = getelementptr %struct.X* %xxx, i32 0, i32 1		; <[32 x i8]*> [#uses=1]
+	%2 = getelementptr [32 x i8]* %1, i32 0, i32 31		; <i8*> [#uses=1]
+	store i8 48, i8* %2, align 1
+	%3 = getelementptr %struct.X* %xxx, i32 0, i32 1		; <[32 x i8]*> [#uses=1]
+	%4 = getelementptr [32 x i8]* %3, i32 0, i32 31		; <i8*> [#uses=1]
+	%5 = load i8* %4, align 1		; <i8> [#uses=1]
+	%6 = getelementptr %struct.X* %xxx, i32 0, i32 1		; <[32 x i8]*> [#uses=1]
+	%7 = getelementptr [32 x i8]* %6, i32 0, i32 0		; <i8*> [#uses=1]
+	store i8 %5, i8* %7, align 1
+	%8 = getelementptr %struct.X* %xxx, i32 0, i32 0		; <i8*> [#uses=1]
+	store i8 15, i8* %8, align 1
+	%9 = call i32 (...)* bitcast (i32 (%struct.X*, %struct.X*)* @f to i32 (...)*)(%struct.X* byval align 4 %xxx, %struct.X* byval align 4 %xxx) nounwind		; <i32> [#uses=1]
+	store i32 %9, i32* %0, align 4
+	%10 = load i32* %0, align 4		; <i32> [#uses=1]
+	store i32 %10, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval1
+}
+
+declare i32 @f(%struct.X* byval align 4, %struct.X* byval align 4) nounwind ssp
diff --git a/final/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll b/final/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
new file mode 100644
index 00000000000..f46eed4769f
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-16-SpillerUnfold.ll
@@ -0,0 +1,141 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of modref unfolded}
+; XFAIL: *
+; 69408 removed the opportunity for this optimization to work
+
+	%struct.SHA512_CTX = type { [8 x i64], i64, i64, %struct.anon, i32, i32 }
+	%struct.anon = type { [16 x i64] }
+@K512 = external constant [80 x i64], align 32		; <[80 x i64]*> [#uses=2]
+
+define fastcc void @sha512_block_data_order(%struct.SHA512_CTX* nocapture %ctx, i8* nocapture %in, i64 %num) nounwind ssp {
+entry:
+	br label %bb349
+
+bb349:		; preds = %bb349, %entry
+	%e.0489 = phi i64 [ 0, %entry ], [ %e.0, %bb349 ]		; <i64> [#uses=3]
+	%b.0472 = phi i64 [ 0, %entry ], [ %87, %bb349 ]		; <i64> [#uses=2]
+	%asmtmp356 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 41, i64 %e.0489) nounwind		; <i64> [#uses=1]
+	%0 = xor i64 0, %asmtmp356		; <i64> [#uses=1]
+	%1 = add i64 0, %0		; <i64> [#uses=1]
+	%2 = add i64 %1, 0		; <i64> [#uses=1]
+	%3 = add i64 %2, 0		; <i64> [#uses=1]
+	%4 = add i64 %3, 0		; <i64> [#uses=5]
+	%asmtmp372 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 %4) nounwind		; <i64> [#uses=1]
+	%asmtmp373 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 %4) nounwind		; <i64> [#uses=0]
+	%5 = xor i64 %asmtmp372, 0		; <i64> [#uses=0]
+	%6 = xor i64 0, %b.0472		; <i64> [#uses=1]
+	%7 = and i64 %4, %6		; <i64> [#uses=1]
+	%8 = xor i64 %7, 0		; <i64> [#uses=1]
+	%9 = add i64 0, %8		; <i64> [#uses=1]
+	%10 = add i64 %9, 0		; <i64> [#uses=2]
+	%asmtmp377 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 61, i64 0) nounwind		; <i64> [#uses=1]
+	%11 = xor i64 0, %asmtmp377		; <i64> [#uses=1]
+	%12 = add i64 0, %11		; <i64> [#uses=1]
+	%13 = add i64 %12, 0		; <i64> [#uses=1]
+	%not381 = xor i64 0, -1		; <i64> [#uses=1]
+	%14 = and i64 %e.0489, %not381		; <i64> [#uses=1]
+	%15 = xor i64 0, %14		; <i64> [#uses=1]
+	%16 = add i64 %15, 0		; <i64> [#uses=1]
+	%17 = add i64 %16, %13		; <i64> [#uses=1]
+	%18 = add i64 %17, 0		; <i64> [#uses=1]
+	%19 = add i64 %18, 0		; <i64> [#uses=2]
+	%20 = add i64 %19, %b.0472		; <i64> [#uses=3]
+	%21 = add i64 %19, 0		; <i64> [#uses=1]
+	%22 = add i64 %21, 0		; <i64> [#uses=1]
+	%23 = add i32 0, 12		; <i32> [#uses=1]
+	%24 = and i32 %23, 12		; <i32> [#uses=1]
+	%25 = zext i32 %24 to i64		; <i64> [#uses=1]
+	%26 = getelementptr [16 x i64]* null, i64 0, i64 %25		; <i64*> [#uses=0]
+	%27 = add i64 0, %e.0489		; <i64> [#uses=1]
+	%28 = add i64 %27, 0		; <i64> [#uses=1]
+	%29 = add i64 %28, 0		; <i64> [#uses=1]
+	%30 = add i64 %29, 0		; <i64> [#uses=2]
+	%31 = and i64 %10, %4		; <i64> [#uses=1]
+	%32 = xor i64 0, %31		; <i64> [#uses=1]
+	%33 = add i64 %30, 0		; <i64> [#uses=3]
+	%34 = add i64 %30, %32		; <i64> [#uses=1]
+	%35 = add i64 %34, 0		; <i64> [#uses=1]
+	%36 = and i64 %33, %20		; <i64> [#uses=1]
+	%37 = xor i64 %36, 0		; <i64> [#uses=1]
+	%38 = add i64 %37, 0		; <i64> [#uses=1]
+	%39 = add i64 %38, 0		; <i64> [#uses=1]
+	%40 = add i64 %39, 0		; <i64> [#uses=1]
+	%41 = add i64 %40, 0		; <i64> [#uses=1]
+	%42 = add i64 %41, %4		; <i64> [#uses=3]
+	%43 = or i32 0, 6		; <i32> [#uses=1]
+	%44 = and i32 %43, 14		; <i32> [#uses=1]
+	%45 = zext i32 %44 to i64		; <i64> [#uses=1]
+	%46 = getelementptr [16 x i64]* null, i64 0, i64 %45		; <i64*> [#uses=1]
+	%not417 = xor i64 %42, -1		; <i64> [#uses=1]
+	%47 = and i64 %20, %not417		; <i64> [#uses=1]
+	%48 = xor i64 0, %47		; <i64> [#uses=1]
+	%49 = getelementptr [80 x i64]* @K512, i64 0, i64 0		; <i64*> [#uses=1]
+	%50 = load i64* %49, align 8		; <i64> [#uses=1]
+	%51 = add i64 %48, 0		; <i64> [#uses=1]
+	%52 = add i64 %51, 0		; <i64> [#uses=1]
+	%53 = add i64 %52, 0		; <i64> [#uses=1]
+	%54 = add i64 %53, %50		; <i64> [#uses=2]
+	%asmtmp420 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 0) nounwind		; <i64> [#uses=1]
+	%asmtmp421 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 0) nounwind		; <i64> [#uses=1]
+	%55 = xor i64 %asmtmp420, 0		; <i64> [#uses=1]
+	%56 = xor i64 %55, %asmtmp421		; <i64> [#uses=1]
+	%57 = add i64 %54, %10		; <i64> [#uses=5]
+	%58 = add i64 %54, 0		; <i64> [#uses=1]
+	%59 = add i64 %58, %56		; <i64> [#uses=2]
+	%60 = or i32 0, 7		; <i32> [#uses=1]
+	%61 = and i32 %60, 15		; <i32> [#uses=1]
+	%62 = zext i32 %61 to i64		; <i64> [#uses=1]
+	%63 = getelementptr [16 x i64]* null, i64 0, i64 %62		; <i64*> [#uses=2]
+	%64 = load i64* null, align 8		; <i64> [#uses=1]
+	%65 = lshr i64 %64, 6		; <i64> [#uses=1]
+	%66 = xor i64 0, %65		; <i64> [#uses=1]
+	%67 = xor i64 %66, 0		; <i64> [#uses=1]
+	%68 = load i64* %46, align 8		; <i64> [#uses=1]
+	%69 = load i64* null, align 8		; <i64> [#uses=1]
+	%70 = add i64 %68, 0		; <i64> [#uses=1]
+	%71 = add i64 %70, %67		; <i64> [#uses=1]
+	%72 = add i64 %71, %69		; <i64> [#uses=1]
+	%asmtmp427 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 18, i64 %57) nounwind		; <i64> [#uses=1]
+	%asmtmp428 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 41, i64 %57) nounwind		; <i64> [#uses=1]
+	%73 = xor i64 %asmtmp427, 0		; <i64> [#uses=1]
+	%74 = xor i64 %73, %asmtmp428		; <i64> [#uses=1]
+	%75 = and i64 %57, %42		; <i64> [#uses=1]
+	%not429 = xor i64 %57, -1		; <i64> [#uses=1]
+	%76 = and i64 %33, %not429		; <i64> [#uses=1]
+	%77 = xor i64 %75, %76		; <i64> [#uses=1]
+	%78 = getelementptr [80 x i64]* @K512, i64 0, i64 0		; <i64*> [#uses=1]
+	%79 = load i64* %78, align 16		; <i64> [#uses=1]
+	%80 = add i64 %77, %20		; <i64> [#uses=1]
+	%81 = add i64 %80, %72		; <i64> [#uses=1]
+	%82 = add i64 %81, %74		; <i64> [#uses=1]
+	%83 = add i64 %82, %79		; <i64> [#uses=1]
+	%asmtmp432 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 34, i64 %59) nounwind		; <i64> [#uses=1]
+	%asmtmp433 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 39, i64 %59) nounwind		; <i64> [#uses=1]
+	%84 = xor i64 %asmtmp432, 0		; <i64> [#uses=1]
+	%85 = xor i64 %84, %asmtmp433		; <i64> [#uses=1]
+	%86 = add i64 %83, %22		; <i64> [#uses=2]
+	%87 = add i64 0, %85		; <i64> [#uses=1]
+	%asmtmp435 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 8, i64 0) nounwind		; <i64> [#uses=1]
+	%88 = xor i64 0, %asmtmp435		; <i64> [#uses=1]
+	%89 = load i64* null, align 8		; <i64> [#uses=3]
+	%asmtmp436 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 19, i64 %89) nounwind		; <i64> [#uses=1]
+	%asmtmp437 = call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 61, i64 %89) nounwind		; <i64> [#uses=1]
+	%90 = lshr i64 %89, 6		; <i64> [#uses=1]
+	%91 = xor i64 %asmtmp436, %90		; <i64> [#uses=1]
+	%92 = xor i64 %91, %asmtmp437		; <i64> [#uses=1]
+	%93 = load i64* %63, align 8		; <i64> [#uses=1]
+	%94 = load i64* null, align 8		; <i64> [#uses=1]
+	%95 = add i64 %93, %88		; <i64> [#uses=1]
+	%96 = add i64 %95, %92		; <i64> [#uses=1]
+	%97 = add i64 %96, %94		; <i64> [#uses=2]
+	store i64 %97, i64* %63, align 8
+	%98 = and i64 %86, %57		; <i64> [#uses=1]
+	%not441 = xor i64 %86, -1		; <i64> [#uses=1]
+	%99 = and i64 %42, %not441		; <i64> [#uses=1]
+	%100 = xor i64 %98, %99		; <i64> [#uses=1]
+	%101 = add i64 %100, %33		; <i64> [#uses=1]
+	%102 = add i64 %101, %97		; <i64> [#uses=1]
+	%103 = add i64 %102, 0		; <i64> [#uses=1]
+	%104 = add i64 %103, 0		; <i64> [#uses=1]
+	%e.0 = add i64 %104, %35		; <i64> [#uses=1]
+	br label %bb349
+}
diff --git a/final/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll b/final/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
new file mode 100644
index 00000000000..b74f4aec30a
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 79
+; rdar://6802189
+
+; Test if linearscan is unfavoring registers for allocation to allow more reuse
+; of reloads from stack slots.
+
+	%struct.SHA_CTX = type { i32, i32, i32, i32, i32, i32, i32, [16 x i32], i32 }
+
+define fastcc void @sha1_block_data_order(%struct.SHA_CTX* nocapture %c, i8* %p, i64 %num) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%asmtmp511 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=3]
+	%asmtmp513 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 0) nounwind		; <i32> [#uses=2]
+	%asmtmp516 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 0) nounwind		; <i32> [#uses=1]
+	%asmtmp517 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=2]
+	%0 = xor i32 0, %asmtmp513		; <i32> [#uses=0]
+	%1 = add i32 0, %asmtmp517		; <i32> [#uses=1]
+	%2 = add i32 %1, 0		; <i32> [#uses=1]
+	%3 = add i32 %2, 0		; <i32> [#uses=1]
+	%asmtmp519 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 0) nounwind		; <i32> [#uses=1]
+	%4 = xor i32 0, %asmtmp511		; <i32> [#uses=1]
+	%asmtmp520 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %4) nounwind		; <i32> [#uses=2]
+	%5 = xor i32 0, %asmtmp516		; <i32> [#uses=1]
+	%6 = xor i32 %5, %asmtmp519		; <i32> [#uses=1]
+	%7 = add i32 %asmtmp513, -899497514		; <i32> [#uses=1]
+	%8 = add i32 %7, %asmtmp520		; <i32> [#uses=1]
+	%9 = add i32 %8, %6		; <i32> [#uses=1]
+	%10 = add i32 %9, 0		; <i32> [#uses=1]
+	%asmtmp523 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=1]
+	%asmtmp525 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %3) nounwind		; <i32> [#uses=2]
+	%11 = xor i32 0, %asmtmp525		; <i32> [#uses=1]
+	%12 = add i32 0, %11		; <i32> [#uses=1]
+	%13 = add i32 %12, 0		; <i32> [#uses=2]
+	%asmtmp528 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %10) nounwind		; <i32> [#uses=1]
+	%14 = xor i32 0, %asmtmp520		; <i32> [#uses=1]
+	%asmtmp529 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %14) nounwind		; <i32> [#uses=1]
+	%asmtmp530 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %13) nounwind		; <i32> [#uses=1]
+	%15 = add i32 0, %asmtmp530		; <i32> [#uses=1]
+	%16 = xor i32 0, %asmtmp523		; <i32> [#uses=1]
+	%asmtmp532 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %16) nounwind		; <i32> [#uses=2]
+	%asmtmp533 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %15) nounwind		; <i32> [#uses=1]
+	%17 = xor i32 %13, %asmtmp528		; <i32> [#uses=1]
+	%18 = xor i32 %17, 0		; <i32> [#uses=1]
+	%19 = add i32 %asmtmp525, -899497514		; <i32> [#uses=1]
+	%20 = add i32 %19, %asmtmp532		; <i32> [#uses=1]
+	%21 = add i32 %20, %18		; <i32> [#uses=1]
+	%22 = add i32 %21, %asmtmp533		; <i32> [#uses=1]
+	%23 = xor i32 0, %asmtmp511		; <i32> [#uses=1]
+	%24 = xor i32 %23, 0		; <i32> [#uses=1]
+	%asmtmp535 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %24) nounwind		; <i32> [#uses=3]
+	%25 = add i32 0, %asmtmp535		; <i32> [#uses=1]
+	%26 = add i32 %25, 0		; <i32> [#uses=1]
+	%27 = add i32 %26, 0		; <i32> [#uses=1]
+	%28 = xor i32 0, %asmtmp529		; <i32> [#uses=0]
+	%29 = xor i32 %22, 0		; <i32> [#uses=1]
+	%30 = xor i32 %29, 0		; <i32> [#uses=1]
+	%31 = add i32 0, %30		; <i32> [#uses=1]
+	%32 = add i32 %31, 0		; <i32> [#uses=3]
+	%asmtmp541 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=2]
+	%asmtmp542 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %32) nounwind		; <i32> [#uses=1]
+	%33 = add i32 0, %asmtmp541		; <i32> [#uses=1]
+	%34 = add i32 %33, 0		; <i32> [#uses=1]
+	%35 = add i32 %34, %asmtmp542		; <i32> [#uses=1]
+	%asmtmp543 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %27) nounwind		; <i32> [#uses=2]
+	%36 = xor i32 0, %asmtmp535		; <i32> [#uses=0]
+	%37 = xor i32 %32, 0		; <i32> [#uses=1]
+	%38 = xor i32 %37, %asmtmp543		; <i32> [#uses=1]
+	%39 = add i32 0, %38		; <i32> [#uses=1]
+	%40 = add i32 %39, 0		; <i32> [#uses=2]
+	%asmtmp546 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %32) nounwind		; <i32> [#uses=1]
+	%asmtmp547 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=2]
+	%41 = add i32 0, -899497514		; <i32> [#uses=1]
+	%42 = add i32 %41, %asmtmp547		; <i32> [#uses=1]
+	%43 = add i32 %42, 0		; <i32> [#uses=1]
+	%44 = add i32 %43, 0		; <i32> [#uses=3]
+	%asmtmp549 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %35) nounwind		; <i32> [#uses=2]
+	%45 = xor i32 0, %asmtmp541		; <i32> [#uses=1]
+	%asmtmp550 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %45) nounwind		; <i32> [#uses=2]
+	%asmtmp551 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %44) nounwind		; <i32> [#uses=1]
+	%46 = xor i32 %40, %asmtmp546		; <i32> [#uses=1]
+	%47 = xor i32 %46, %asmtmp549		; <i32> [#uses=1]
+	%48 = add i32 %asmtmp543, -899497514		; <i32> [#uses=1]
+	%49 = add i32 %48, %asmtmp550		; <i32> [#uses=1]
+	%50 = add i32 %49, %47		; <i32> [#uses=1]
+	%51 = add i32 %50, %asmtmp551		; <i32> [#uses=1]
+	%asmtmp552 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %40) nounwind		; <i32> [#uses=2]
+	%52 = xor i32 %44, %asmtmp549		; <i32> [#uses=1]
+	%53 = xor i32 %52, %asmtmp552		; <i32> [#uses=1]
+	%54 = add i32 0, %53		; <i32> [#uses=1]
+	%55 = add i32 %54, 0		; <i32> [#uses=2]
+	%asmtmp555 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %44) nounwind		; <i32> [#uses=2]
+	%56 = xor i32 0, %asmtmp532		; <i32> [#uses=1]
+	%57 = xor i32 %56, %asmtmp547		; <i32> [#uses=1]
+	%asmtmp556 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %57) nounwind		; <i32> [#uses=1]
+	%58 = add i32 0, %asmtmp556		; <i32> [#uses=1]
+	%59 = add i32 %58, 0		; <i32> [#uses=1]
+	%60 = add i32 %59, 0		; <i32> [#uses=1]
+	%asmtmp558 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %51) nounwind		; <i32> [#uses=1]
+	%61 = xor i32 %asmtmp517, %asmtmp511		; <i32> [#uses=1]
+	%62 = xor i32 %61, %asmtmp535		; <i32> [#uses=1]
+	%63 = xor i32 %62, %asmtmp550		; <i32> [#uses=1]
+	%asmtmp559 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %63) nounwind		; <i32> [#uses=1]
+	%64 = xor i32 %55, %asmtmp555		; <i32> [#uses=1]
+	%65 = xor i32 %64, %asmtmp558		; <i32> [#uses=1]
+	%asmtmp561 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %55) nounwind		; <i32> [#uses=1]
+	%66 = add i32 %asmtmp552, -899497514		; <i32> [#uses=1]
+	%67 = add i32 %66, %65		; <i32> [#uses=1]
+	%68 = add i32 %67, %asmtmp559		; <i32> [#uses=1]
+	%69 = add i32 %68, 0		; <i32> [#uses=1]
+	%70 = add i32 %69, 0		; <i32> [#uses=1]
+	store i32 %70, i32* null, align 4
+	%71 = add i32 0, %60		; <i32> [#uses=1]
+	store i32 %71, i32* null, align 4
+	%72 = add i32 0, %asmtmp561		; <i32> [#uses=1]
+	store i32 %72, i32* null, align 4
+	%73 = add i32 0, %asmtmp555		; <i32> [#uses=1]
+	store i32 %73, i32* null, align 4
+	br label %bb
+}
diff --git a/final/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/final/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
new file mode 100644
index 00000000000..69f644f5834
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
+; RUN:     -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false < %s | \
+; RUN:   FileCheck %s
+; rdar://6808032
+
+; CHECK: pextrw $14
+; CHECK-NEXT: shrl $8
+; CHECK-NEXT: (%ebp)
+; CHECK-NEXT: pinsrw
+
+define void @update(i8** %args_list) nounwind {
+entry:
+	%cmp.i = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %cmp.i, label %if.then.i, label %test_cl.exit
+
+if.then.i:		; preds = %entry
+	%val = load <16 x i8> addrspace(1)* null		; <<16 x i8>> [#uses=8]
+	%tmp10.i = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 undef, i8 0, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef>, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 4, i32 undef, i32 6, i32 undef, i32 29, i32 undef, i32 10, i32 11, i32 12, i32 undef, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp17.i = shufflevector <16 x i8> %tmp10.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 18, i32 4, i32 undef, i32 6, i32 undef, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 undef, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp24.i = shufflevector <16 x i8> %tmp17.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 24, i32 6, i32 undef, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 undef, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp31.i = shufflevector <16 x i8> %tmp24.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 21, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp38.i = shufflevector <16 x i8> %tmp31.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 27, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 13, i32 undef, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp45.i = shufflevector <16 x i8> %tmp38.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 undef, i32 10, i32 11, i32 12, i32 13, i32 29, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp52.i = shufflevector <16 x i8> %tmp45.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 21, i32 10, i32 11, i32 12, i32 13, i32 14, i32 undef>		; <<16 x i8>> [#uses=1]
+	%tmp59.i = shufflevector <16 x i8> %tmp52.i, <16 x i8> %val, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 20>		; <<16 x i8>> [#uses=1]
+	store <16 x i8> %tmp59.i, <16 x i8> addrspace(1)* null
+	ret void
+
+test_cl.exit:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2009-04-24.ll b/final/test/CodeGen/X86/2009-04-24.ll
new file mode 100644
index 00000000000..dd8823574cd
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-24.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -regalloc=fast -relocation-model=pic > %t2
+; RUN: grep {leaq.*TLSGD} %t2
+; RUN; grep {__tls_get_addr} %t2
+; PR4004
+
+@i = thread_local global i32 15
+
+define i32 @f() {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/X86/2009-04-25-CoalescerBug.ll b/final/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
new file mode 100644
index 00000000000..94d3eb21cec
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-25-CoalescerBug.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 | grep mov | count 2
+; rdar://6806252
+
+define i64 @test(i32* %tmp13) nounwind {
+entry:
+	br label %while.cond
+
+while.cond:		; preds = %while.cond, %entry
+	%tmp15 = load i32* %tmp13		; <i32> [#uses=2]
+	%bf.lo = lshr i32 %tmp15, 1		; <i32> [#uses=1]
+	%bf.lo.cleared = and i32 %bf.lo, 2147483647		; <i32> [#uses=1]
+	%conv = zext i32 %bf.lo.cleared to i64		; <i64> [#uses=1]
+	%bf.lo.cleared25 = and i32 %tmp15, 1		; <i32> [#uses=1]
+	%tobool = icmp ne i32 %bf.lo.cleared25, 0		; <i1> [#uses=1]
+	br i1 %tobool, label %while.cond, label %while.end
+
+while.end:		; preds = %while.cond
+	ret i64 %conv
+}
diff --git a/final/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll b/final/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
new file mode 100644
index 00000000000..7981a52e740
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-27-CoalescerAssert.ll
@@ -0,0 +1,1457 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR4034
+
+	%struct.BiContextType = type { i16, i8 }
+	%struct.Bitstream = type { i32, i32, i32, i32, i8*, i32 }
+	%struct.DataPartition = type { %struct.Bitstream*, %struct.DecodingEnvironment, i32 (%struct.SyntaxElement*, %struct.ImageParameters*, %struct.DataPartition*)* }
+	%struct.DecRefPicMarking_t = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_t* }
+	%struct.DecodingEnvironment = type { i32, i32, i32, i32, i32, i8*, i32* }
+	%struct.ImageParameters = type { i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [16 x [16 x i16]], [6 x [32 x i32]], [16 x [16 x i32]], [4 x [12 x [4 x [4 x i32]]]], [16 x i32], i8**, i32*, i32***, i32**, i32, i32, i32, i32, %struct.Slice*, %struct.Macroblock*, i32, i32, i32, i32, i32, i32, %struct.DecRefPicMarking_t*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32***, i32***, i32****, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x [2 x i32]], [3 x [2 x i32]], i32, i32, i64, i64, %struct.timeb, %struct.timeb, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.Macroblock = type { i32, [2 x i32], i32, i32, %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], i32, i64, i64, i32, i32, [4 x i8], [4 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.MotionInfoContexts = type { [4 x [11 x %struct.BiContextType]], [2 x [9 x %struct.BiContextType]], [2 x [10 x %struct.BiContextType]], [2 x [6 x %struct.BiContextType]], [4 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x %struct.BiContextType] }
+	%struct.PixelPos = type { i32, i32, i32, i32, i32, i32 }
+	%struct.Slice = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.DataPartition*, %struct.MotionInfoContexts*, %struct.TextureInfoContexts*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (%struct.ImageParameters*, %struct.inp_par*)*, i32, i32, i32, i32 }
+	%struct.SyntaxElement = type { i32, i32, i32, i32, i32, i32, i32, i32, void (i32, i32, i32*, i32*)*, void (%struct.SyntaxElement*, %struct.ImageParameters*, %struct.DecodingEnvironment*)* }
+	%struct.TextureInfoContexts = type { [2 x %struct.BiContextType], [4 x %struct.BiContextType], [3 x [4 x %struct.BiContextType]], [10 x [4 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [5 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]], [10 x [15 x %struct.BiContextType]] }
+	%struct.inp_par = type { [1000 x i8], [1000 x i8], [1000 x i8], i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.timeb = type { i64, i16, i16, i16 }
+@get_mb_block_pos = external global void (i32, i32*, i32*)*		; <void (i32, i32*, i32*)**> [#uses=1]
+@img = external global %struct.ImageParameters*		; <%struct.ImageParameters**> [#uses=14]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (i32, i32, i32, i32, %struct.PixelPos*)* @getAffNeighbour to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @getAffNeighbour(i32 %curr_mb_nr, i32 %xN, i32 %yN, i32 %is_chroma, %struct.PixelPos* %pix) nounwind {
+entry:
+	%Opq.sa.calc = add i32 0, 2		; <i32> [#uses=2]
+	%0 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=3]
+	%1 = getelementptr %struct.ImageParameters* %0, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%2 = load %struct.Macroblock** %1, align 8		; <%struct.Macroblock*> [#uses=24]
+	%3 = zext i32 %curr_mb_nr to i64		; <i64> [#uses=24]
+	%4 = sext i32 %is_chroma to i64		; <i64> [#uses=8]
+	br label %meshBB392
+
+entry.fragment:		; preds = %meshBB392
+	%Opq.sa.calc747 = add i32 %Opq.sa.calc921, 70		; <i32> [#uses=0]
+	%5 = getelementptr %struct.ImageParameters* %0, i64 0, i32 119, i64 %4, i64 0		; <i32*> [#uses=1]
+	%6 = load i32* %5, align 4		; <i32> [#uses=2]
+	%7 = getelementptr %struct.ImageParameters* %0, i64 0, i32 119, i64 %4, i64 1		; <i32*> [#uses=1]
+	%8 = load i32* %7, align 4		; <i32> [#uses=5]
+	br label %entry.fragment181
+
+entry.fragment181:		; preds = %entry.fragment
+	%Opq.sa.calc863 = add i32 %Opq.sa.calc921, -50		; <i32> [#uses=4]
+	%9 = getelementptr %struct.PixelPos* %pix, i64 0, i32 0		; <i32*> [#uses=4]
+	store i32 0, i32* %9, align 4
+	%10 = add i32 %8, -1		; <i32> [#uses=6]
+	%11 = icmp slt i32 %10, %yN		; <i1> [#uses=1]
+	br i1 %11, label %meshBB448, label %bb
+
+bb:		; preds = %entry.fragment181
+	%Opq.sa.calc460 = add i32 %Opq.sa.calc863, 50		; <i32> [#uses=0]
+	%12 = add i32 %6, -1		; <i32> [#uses=5]
+	%13 = icmp slt i32 %12, %xN		; <i1> [#uses=1]
+	br label %bb.fragment
+
+bb.fragment:		; preds = %bb
+	%Opq.sa.calc976 = add i32 %Opq.sa.calc863, 13		; <i32> [#uses=3]
+	%.not8 = icmp sgt i32 %yN, -1		; <i1> [#uses=1]
+	%14 = icmp sgt i32 %8, %yN		; <i1> [#uses=1]
+	%or.cond.not = and i1 %14, %.not8		; <i1> [#uses=3]
+	%or.cond1 = and i1 %or.cond.not, %13		; <i1> [#uses=1]
+	br i1 %or.cond1, label %meshBB396, label %bb3
+
+bb3:		; preds = %bb.fragment
+	%Opq.sa.calc462 = sub i32 %Opq.sa.calc976, -152		; <i32> [#uses=5]
+	%Opq.sa.calc461 = sub i32 %Opq.sa.calc462, 168		; <i32> [#uses=2]
+	%15 = icmp slt i32 %xN, 0		; <i1> [#uses=1]
+	br i1 %15, label %bb4, label %meshBB404
+
+bb4:		; preds = %bb3
+	%Opq.sa.calc467 = xor i32 %Opq.sa.calc462, 171		; <i32> [#uses=2]
+	%Opq.sa.calc465 = sub i32 %Opq.sa.calc467, %Opq.sa.calc462		; <i32> [#uses=1]
+	%Opq.sa.calc466 = xor i32 %Opq.sa.calc465, -164		; <i32> [#uses=1]
+	%16 = icmp slt i32 %yN, 0		; <i1> [#uses=1]
+	br i1 %16, label %meshBB428, label %meshBB392
+
+bb5:		; preds = %meshBB428
+	%Opq.sa.calc470 = sub i32 %Opq.sa.calc897, -49		; <i32> [#uses=1]
+	%17 = getelementptr %struct.Macroblock* %2, i64 %3, i32 20		; <i32*> [#uses=1]
+	%18 = load i32* %17, align 4		; <i32> [#uses=1]
+	br label %bb5.fragment
+
+bb5.fragment:		; preds = %bb5
+	%Opq.sa.calc873 = sub i32 %Opq.sa.calc470, 169		; <i32> [#uses=7]
+	%19 = icmp eq i32 %18, 0		; <i1> [#uses=1]
+	%20 = and i32 %curr_mb_nr, 1		; <i32> [#uses=1]
+	%21 = icmp eq i32 %20, 0		; <i1> [#uses=2]
+	br i1 %19, label %bb6, label %bb13
+
+bb6:		; preds = %bb5.fragment
+	%Opq.sa.calc473 = xor i32 %Opq.sa.calc873, 81		; <i32> [#uses=1]
+	br i1 %21, label %bb7, label %meshBB348
+
+bb7:		; preds = %bb6
+	%Opq.sa.calc476 = add i32 %Opq.sa.calc873, -58		; <i32> [#uses=1]
+	%22 = getelementptr %struct.Macroblock* %2, i64 %3, i32 25		; <i32*> [#uses=1]
+	%23 = load i32* %22, align 8		; <i32> [#uses=1]
+	%24 = add i32 %23, 1		; <i32> [#uses=1]
+	%25 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %meshBB388
+
+bb7.fragment:		; preds = %meshBB388
+	%Opq.sa.calc709 = sub i32 %Opq.sa.calc886, 143		; <i32> [#uses=1]
+	%Opq.sa.calc707 = add i32 %Opq.sa.calc709, %Opq.sa.calc886		; <i32> [#uses=1]
+	%Opq.sa.calc708 = xor i32 %Opq.sa.calc707, 474		; <i32> [#uses=0]
+	store i32 %.SV194.phi, i32* %.SV196.phi, align 4
+	%26 = getelementptr %struct.Macroblock* %.load17.SV.phi, i64 %.load36.SV.phi, i32 29		; <i32*> [#uses=1]
+	%27 = load i32* %26, align 8		; <i32> [#uses=2]
+	store i32 %27, i32* %.load67.SV.phi, align 4
+	br label %bb96
+
+bb8:		; preds = %meshBB348
+	%Opq.sa.calc479 = sub i32 %Opq.sa.calc805, 141		; <i32> [#uses=1]
+	%28 = getelementptr %struct.Macroblock* %2, i64 %3, i32 22		; <i32*> [#uses=2]
+	%29 = load i32* %28, align 4		; <i32> [#uses=2]
+	%30 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=2]
+	br label %meshBB368
+
+bb8.fragment:		; preds = %meshBB368
+	%Opq.sa.calc765 = sub i32 %Opq.sa.calc768, -115		; <i32> [#uses=2]
+	store i32 %.SV198.phi, i32* %.SV200.phi, align 4
+	%31 = getelementptr %struct.Macroblock* %.load16.SV.phi, i64 %.load35.SV.phi, i32 26		; <i32*> [#uses=2]
+	%32 = load i32* %31, align 4		; <i32> [#uses=4]
+	store i32 %32, i32* %.load66.SV.phi, align 4
+	%33 = load i32* %31, align 4		; <i32> [#uses=1]
+	%34 = icmp eq i32 %33, 0		; <i1> [#uses=1]
+	br i1 %34, label %bb96, label %bb9
+
+bb9:		; preds = %bb8.fragment
+	%Opq.sa.calc482 = xor i32 %Opq.sa.calc765, 163		; <i32> [#uses=0]
+	%35 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%36 = getelementptr %struct.ImageParameters* %35, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%37 = load %struct.Macroblock** %36, align 8		; <%struct.Macroblock*> [#uses=1]
+	%38 = load i32* %.SV76.phi, align 4		; <i32> [#uses=1]
+	br label %bb9.fragment
+
+bb9.fragment:		; preds = %bb9
+	%Opq.sa.calc999 = add i32 %Opq.sa.calc765, -44		; <i32> [#uses=1]
+	%39 = sext i32 %38 to i64		; <i64> [#uses=1]
+	%40 = getelementptr %struct.Macroblock* %37, i64 %39, i32 20		; <i32*> [#uses=1]
+	%41 = load i32* %40, align 4		; <i32> [#uses=1]
+	%42 = icmp eq i32 %41, 0		; <i1> [#uses=1]
+	br i1 %42, label %bb96, label %bb11
+
+bb11:		; preds = %bb9.fragment
+	%Opq.sa.calc485 = sub i32 %Opq.sa.calc999, 200		; <i32> [#uses=2]
+	%43 = add i32 %.SV78.phi, 1		; <i32> [#uses=1]
+	br label %meshBB332
+
+bb11.fragment:		; preds = %meshBB332
+	%Opq.sa.calc954 = xor i32 %Opq.link.mask859, 233		; <i32> [#uses=0]
+	store i32 %.SV206.phi, i32* %.load81.SV.phi, align 4
+	%44 = add i32 %.load50.SV.phi, %yN		; <i32> [#uses=1]
+	%45 = ashr i32 %44, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb13:		; preds = %bb5.fragment
+	%Opq.sa.calc490 = xor i32 %Opq.sa.calc873, 175		; <i32> [#uses=1]
+	%Opq.sa.calc488 = sub i32 %Opq.sa.calc490, %Opq.sa.calc873		; <i32> [#uses=1]
+	%Opq.sa.calc489 = sub i32 %Opq.sa.calc488, 133		; <i32> [#uses=1]
+	%46 = getelementptr %struct.Macroblock* %2, i64 %3, i32 25		; <i32*> [#uses=1]
+	br label %meshBB360
+
+bb13.fragment:		; preds = %meshBB360
+	%Opq.sa.calc870 = add i32 %Opq.sa.calc866, -129		; <i32> [#uses=3]
+	%47 = load i32* %.SV208.phi, align 8		; <i32> [#uses=3]
+	br i1 %.load74.SV.phi, label %bb14, label %meshBB412
+
+bb14:		; preds = %bb13.fragment
+	%Opq.sa.calc493 = add i32 %Opq.sa.calc870, 103		; <i32> [#uses=1]
+	%48 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=2]
+	store i32 %47, i32* %48, align 4
+	%49 = getelementptr %struct.Macroblock* %2, i64 %3, i32 29		; <i32*> [#uses=2]
+	br label %bb14.fragment
+
+bb14.fragment:		; preds = %bb14
+	%Opq.sa.calc723 = sub i32 %Opq.sa.calc493, 117		; <i32> [#uses=4]
+	%50 = load i32* %49, align 8		; <i32> [#uses=4]
+	store i32 %50, i32* %.SV52.phi1113, align 4
+	%51 = load i32* %49, align 8		; <i32> [#uses=1]
+	%52 = icmp eq i32 %51, 0		; <i1> [#uses=1]
+	br i1 %52, label %meshBB, label %bb15
+
+bb15:		; preds = %bb14.fragment
+	%Opq.sa.calc496 = sub i32 %Opq.sa.calc723, -8		; <i32> [#uses=1]
+	%53 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%54 = getelementptr %struct.ImageParameters* %53, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%55 = load %struct.Macroblock** %54, align 8		; <%struct.Macroblock*> [#uses=1]
+	%56 = load i32* %.SV208.phi, align 8		; <i32> [#uses=1]
+	br label %meshBB324
+
+bb15.fragment:		; preds = %meshBB324
+	%Opq.sa.calc925 = xor i32 %Opq.sa.calc750, 215		; <i32> [#uses=2]
+	%57 = sext i32 %.SV214.phi to i64		; <i64> [#uses=1]
+	%58 = getelementptr %struct.Macroblock* %.SV212.phi, i64 %57, i32 20		; <i32*> [#uses=1]
+	%59 = load i32* %58, align 4		; <i32> [#uses=1]
+	%60 = icmp eq i32 %59, 0		; <i1> [#uses=1]
+	br i1 %60, label %bb16, label %bb96
+
+bb16:		; preds = %bb15.fragment
+	%Opq.sa.calc499 = sub i32 %Opq.sa.calc925, -140		; <i32> [#uses=0]
+	%61 = add i32 %.SV87.phi, 1		; <i32> [#uses=1]
+	br label %bb16.fragment
+
+bb16.fragment:		; preds = %bb16
+	%Opq.sa.calc968 = add i32 %Opq.sa.calc925, 129		; <i32> [#uses=0]
+	store i32 %61, i32* %.SV91.phi, align 4
+	%62 = shl i32 %yN, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb19:		; preds = %meshBB412
+	%Opq.sa.calc502 = sub i32 %Opq.sa.calc932, -94		; <i32> [#uses=0]
+	%63 = add i32 %.SV87.phi1030, 1		; <i32> [#uses=1]
+	%64 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %bb19.fragment
+
+bb19.fragment:		; preds = %bb19
+	%Opq.sa.calc880 = xor i32 %Opq.sa.calc932, 246		; <i32> [#uses=0]
+	store i32 %63, i32* %64, align 4
+	%65 = getelementptr %struct.Macroblock* %2, i64 %3, i32 29		; <i32*> [#uses=1]
+	%66 = load i32* %65, align 8		; <i32> [#uses=2]
+	store i32 %66, i32* %.SV52.phi1186, align 4
+	br label %bb96
+
+bb21:		; preds = %meshBB392
+	%Opq.sa.calc505 = add i32 %Opq.sa.calc921, -40		; <i32> [#uses=2]
+	br i1 %or.cond.not.SV.phi, label %meshBB360, label %bb97
+
+bb23:		; preds = %meshBB360
+	%Opq.sa.calc509 = xor i32 %Opq.sa.calc866, 70		; <i32> [#uses=1]
+	%Opq.sa.calc508 = sub i32 %Opq.sa.calc509, -19		; <i32> [#uses=0]
+	%67 = getelementptr %struct.Macroblock* %2, i64 %3, i32 20		; <i32*> [#uses=1]
+	%68 = load i32* %67, align 4		; <i32> [#uses=1]
+	%69 = icmp eq i32 %68, 0		; <i1> [#uses=1]
+	%70 = and i32 %curr_mb_nr, 1		; <i32> [#uses=1]
+	%71 = icmp eq i32 %70, 0		; <i1> [#uses=2]
+	br label %bb23.fragment
+
+bb23.fragment:		; preds = %bb23
+	%Opq.sa.calc847 = sub i32 %Opq.sa.calc866, -9		; <i32> [#uses=2]
+	%72 = getelementptr %struct.Macroblock* %2, i64 %3, i32 22		; <i32*> [#uses=3]
+	%73 = load i32* %72, align 4		; <i32> [#uses=3]
+	%74 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=3]
+	store i32 %73, i32* %74, align 4
+	br label %bb23.fragment182
+
+bb23.fragment182:		; preds = %bb23.fragment
+	%Opq.sa.calc744 = xor i32 %Opq.sa.calc847, 152		; <i32> [#uses=4]
+	%Opq.sa.calc742 = add i32 %Opq.sa.calc744, %Opq.sa.calc847		; <i32> [#uses=1]
+	%Opq.sa.calc743 = add i32 %Opq.sa.calc742, -149		; <i32> [#uses=2]
+	%75 = getelementptr %struct.Macroblock* %2, i64 %3, i32 26		; <i32*> [#uses=2]
+	%76 = load i32* %75, align 4		; <i32> [#uses=3]
+	store i32 %76, i32* %.SV52.phi1113, align 4
+	%77 = load i32* %75, align 4		; <i32> [#uses=1]
+	%78 = icmp ne i32 %77, 0		; <i1> [#uses=2]
+	br i1 %69, label %meshBB344, label %meshBB432
+
+bb24:		; preds = %meshBB344
+	%Opq.sa.calc512 = add i32 %Opq.sa.calc716, -55		; <i32> [#uses=3]
+	br i1 %.SV96.phi, label %bb25, label %bb32
+
+bb25:		; preds = %bb24
+	%Opq.sa.calc515 = sub i32 %Opq.sa.calc716, 18		; <i32> [#uses=1]
+	br i1 %.SV135.phi, label %bb26, label %bb96
+
+bb26:		; preds = %bb25
+	%Opq.sa.calc519 = xor i32 %Opq.sa.calc515, 23		; <i32> [#uses=2]
+	%Opq.sa.calc518 = xor i32 %Opq.sa.calc519, 84		; <i32> [#uses=1]
+	%79 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%80 = getelementptr %struct.ImageParameters* %79, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%81 = load %struct.Macroblock** %80, align 8		; <%struct.Macroblock*> [#uses=1]
+	%82 = load i32* %.SV99.phi, align 4		; <i32> [#uses=1]
+	br label %meshBB340
+
+bb26.fragment:		; preds = %meshBB340
+	%Opq.sa.calc918 = xor i32 %Opq.sa.calc754, 228		; <i32> [#uses=4]
+	%Opq.sa.calc916 = add i32 %Opq.sa.calc918, %Opq.sa.calc754		; <i32> [#uses=1]
+	%Opq.sa.calc917 = add i32 %Opq.sa.calc916, -237		; <i32> [#uses=1]
+	%83 = sext i32 %.SV230.phi to i64		; <i64> [#uses=1]
+	%84 = getelementptr %struct.Macroblock* %.SV228.phi, i64 %83, i32 20		; <i32*> [#uses=1]
+	%85 = load i32* %84, align 4		; <i32> [#uses=1]
+	%86 = icmp eq i32 %85, 0		; <i1> [#uses=1]
+	br i1 %86, label %meshBB420, label %meshBB356
+
+bb28:		; preds = %meshBB356
+	%Opq.sa.calc522 = xor i32 %Opq.sa.calc983, 107		; <i32> [#uses=2]
+	%87 = and i32 %yN, 1		; <i32> [#uses=1]
+	%88 = icmp eq i32 %87, 0		; <i1> [#uses=1]
+	br i1 %88, label %bb29, label %bb30
+
+bb29:		; preds = %bb28
+	%Opq.sa.calc525 = xor i32 %Opq.sa.calc522, 151		; <i32> [#uses=2]
+	%89 = ashr i32 %yN, 1		; <i32> [#uses=1]
+	br label %meshBB340
+
+bb30:		; preds = %bb28
+	%Opq.sa.calc528 = sub i32 %Opq.sa.calc522, -64		; <i32> [#uses=1]
+	%90 = add i32 %.SV104.phi1160, 1		; <i32> [#uses=1]
+	br label %bb30.fragment
+
+bb30.fragment:		; preds = %bb30
+	%Opq.sa.calc791 = add i32 %Opq.sa.calc528, -14		; <i32> [#uses=0]
+	store i32 %90, i32* %.SV111.phi1159, align 4
+	%91 = ashr i32 %yN, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb32:		; preds = %bb24
+	%Opq.sa.calc531 = xor i32 %Opq.sa.calc512, 50		; <i32> [#uses=1]
+	br i1 %.SV135.phi, label %bb33, label %meshBB324
+
+bb33:		; preds = %bb32
+	%Opq.sa.calc534 = sub i32 %Opq.sa.calc512, -75		; <i32> [#uses=2]
+	%92 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%93 = getelementptr %struct.ImageParameters* %92, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%94 = load %struct.Macroblock** %93, align 8		; <%struct.Macroblock*> [#uses=1]
+	%95 = load i32* %.SV99.phi, align 4		; <i32> [#uses=1]
+	br label %bb33.fragment
+
+bb33.fragment:		; preds = %bb33
+	%Opq.sa.calc712 = add i32 %Opq.sa.calc534, -109		; <i32> [#uses=3]
+	%96 = sext i32 %95 to i64		; <i64> [#uses=1]
+	%97 = getelementptr %struct.Macroblock* %94, i64 %96, i32 20		; <i32*> [#uses=1]
+	%98 = load i32* %97, align 4		; <i32> [#uses=1]
+	%99 = icmp eq i32 %98, 0		; <i1> [#uses=1]
+	br i1 %99, label %bb34, label %meshBB
+
+bb34:		; preds = %bb33.fragment
+	%Opq.sa.calc537 = add i32 %Opq.sa.calc712, 8		; <i32> [#uses=1]
+	%100 = add i32 %.SV104.phi, 1		; <i32> [#uses=1]
+	br label %meshBB328
+
+bb34.fragment:		; preds = %meshBB328
+	%Opq.sa.calc965 = xor i32 %Opq.sa.calc787, 251		; <i32> [#uses=0]
+	store i32 %.SV238.phi, i32* %.load116.SV.phi, align 4
+	br label %bb96
+
+bb35:		; preds = %meshBB
+	%Opq.sa.calc541 = add i32 %Opq.sa.calc828, -112		; <i32> [#uses=3]
+	%Opq.sa.calc540 = xor i32 %Opq.sa.calc541, 3		; <i32> [#uses=1]
+	%101 = and i32 %yN, 1		; <i32> [#uses=1]
+	%102 = icmp eq i32 %101, 0		; <i1> [#uses=1]
+	br i1 %102, label %meshBB372, label %meshBB448
+
+bb36:		; preds = %meshBB372
+	%Opq.sa.calc544 = sub i32 %Opq.sa.calc812, -10		; <i32> [#uses=0]
+	%103 = add i32 %.SV43.phi1015, %yN		; <i32> [#uses=1]
+	br label %bb36.fragment
+
+bb36.fragment:		; preds = %bb36
+	%Opq.sa.calc762 = add i32 %Opq.sa.calc812, -69		; <i32> [#uses=0]
+	%104 = ashr i32 %103, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb37:		; preds = %meshBB448
+	%Opq.sa.calc547 = add i32 %Opq.sa.calc958, -49		; <i32> [#uses=1]
+	%105 = add i32 %.SV104.phi1157, 1		; <i32> [#uses=1]
+	br label %meshBB348
+
+bb37.fragment:		; preds = %meshBB348
+	%Opq.sa.calc728 = add i32 %Opq.sa.calc805, -5		; <i32> [#uses=0]
+	store i32 %.SV242.phi, i32* %.load115.SV.phi, align 4
+	%106 = add i32 %.load48.SV.phi, %yN		; <i32> [#uses=1]
+	%107 = ashr i32 %106, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb39:		; preds = %meshBB432
+	%Opq.sa.calc550 = sub i32 %Opq.sa.calc798, -214		; <i32> [#uses=0]
+	br i1 %.SV96.phi1038, label %bb40, label %bb48
+
+bb40:		; preds = %bb39
+	%Opq.sa.calc554 = xor i32 %Opq.sa.calc798, 14		; <i32> [#uses=4]
+	%Opq.sa.calc553 = sub i32 %Opq.sa.calc554, 7		; <i32> [#uses=1]
+	br i1 %.SV135.phi1039, label %meshBB336, label %meshBB444
+
+bb41:		; preds = %meshBB336
+	%Opq.sa.calc557 = sub i32 %Opq.sa.calc979, 143		; <i32> [#uses=1]
+	%108 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%109 = getelementptr %struct.ImageParameters* %108, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%110 = load %struct.Macroblock** %109, align 8		; <%struct.Macroblock*> [#uses=1]
+	%111 = load i32* %.SV99.phi1128, align 4		; <i32> [#uses=1]
+	br label %bb41.fragment
+
+bb41.fragment:		; preds = %bb41
+	%Opq.sa.calc987 = xor i32 %Opq.sa.calc557, 213		; <i32> [#uses=4]
+	%112 = sext i32 %111 to i64		; <i64> [#uses=1]
+	%113 = getelementptr %struct.Macroblock* %110, i64 %112, i32 20		; <i32*> [#uses=1]
+	%114 = load i32* %113, align 4		; <i32> [#uses=1]
+	%115 = icmp eq i32 %114, 0		; <i1> [#uses=1]
+	br i1 %115, label %bb42, label %bb96
+
+bb42:		; preds = %bb41.fragment
+	%Opq.sa.calc560 = add i32 %Opq.sa.calc987, -221		; <i32> [#uses=1]
+	%116 = ashr i32 %.SV43.phi1230, 1		; <i32> [#uses=1]
+	%117 = icmp sgt i32 %116, %yN		; <i1> [#uses=1]
+	br i1 %117, label %meshBB432, label %bb44
+
+bb43:		; preds = %meshBB432
+	%Opq.sa.calc563 = xor i32 %Opq.sa.calc798, 31		; <i32> [#uses=0]
+	%118 = shl i32 %yN, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb44:		; preds = %bb42
+	%Opq.sa.calc566 = sub i32 %Opq.sa.calc987, 217		; <i32> [#uses=1]
+	%119 = add i32 %.SV104.phi1127, 1		; <i32> [#uses=1]
+	br label %meshBB332
+
+bb44.fragment:		; preds = %meshBB332
+	%Opq.sa.calc894 = add i32 %Opq.sa.calc856, -200		; <i32> [#uses=1]
+	store i32 %.SV248.phi, i32* %.load114.SV.phi, align 4
+	%120 = shl i32 %yN, 1		; <i32> [#uses=1]
+	%121 = sub i32 %120, %.load46.SV.phi		; <i32> [#uses=1]
+	br label %meshBB376
+
+bb48:		; preds = %bb39
+	%Opq.sa.calc569 = sub i32 %Opq.sa.calc798, -110		; <i32> [#uses=1]
+	br i1 %.SV135.phi1039, label %bb49, label %bb96
+
+bb49:		; preds = %bb48
+	%Opq.sa.calc572 = add i32 %Opq.sa.calc798, 84		; <i32> [#uses=0]
+	%122 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%123 = getelementptr %struct.ImageParameters* %122, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%124 = load %struct.Macroblock** %123, align 8		; <%struct.Macroblock*> [#uses=1]
+	%125 = load i32* %.SV99.phi1037, align 4		; <i32> [#uses=1]
+	br label %bb49.fragment
+
+bb49.fragment:		; preds = %bb49
+	%Opq.sa.calc860 = sub i32 %Opq.sa.calc569, 114		; <i32> [#uses=5]
+	%126 = sext i32 %125 to i64		; <i64> [#uses=1]
+	%127 = getelementptr %struct.Macroblock* %124, i64 %126, i32 20		; <i32*> [#uses=1]
+	%128 = load i32* %127, align 4		; <i32> [#uses=1]
+	%129 = icmp eq i32 %128, 0		; <i1> [#uses=1]
+	br i1 %129, label %bb50, label %meshBB380
+
+bb50:		; preds = %bb49.fragment
+	%Opq.sa.calc577 = add i32 %Opq.sa.calc860, 12		; <i32> [#uses=2]
+	%130 = ashr i32 %.SV43.phi1178, 1		; <i32> [#uses=1]
+	%131 = icmp sgt i32 %130, %yN		; <i1> [#uses=1]
+	br i1 %131, label %meshBB328, label %bb52
+
+bb51:		; preds = %meshBB328
+	%Opq.sa.calc580 = xor i32 %Opq.sa.calc787, 194		; <i32> [#uses=0]
+	%132 = shl i32 %yN, 1		; <i32> [#uses=1]
+	%133 = or i32 %132, 1		; <i32> [#uses=1]
+	br label %bb96
+
+bb52:		; preds = %bb50
+	%Opq.sa.calc584 = sub i32 %Opq.sa.calc860, -65		; <i32> [#uses=2]
+	%Opq.sa.calc583 = sub i32 %Opq.sa.calc584, 50		; <i32> [#uses=1]
+	%134 = add i32 %.SV104.phi1036, 1		; <i32> [#uses=1]
+	store i32 %134, i32* %.SV111.phi1035, align 4
+	br label %meshBB384
+
+bb52.fragment:		; preds = %meshBB384
+	%Opq.sa.calc844 = add i32 %Opq.sa.calc901, -214		; <i32> [#uses=1]
+	%135 = shl i32 %yN, 1		; <i32> [#uses=1]
+	%136 = or i32 %135, 1		; <i32> [#uses=1]
+	%137 = sub i32 %136, %.load44.SV.phi		; <i32> [#uses=1]
+	br label %meshBB388
+
+bb54:		; preds = %meshBB380
+	%Opq.sa.calc589 = add i32 %Opq.sa.calc946, 108		; <i32> [#uses=1]
+	%138 = add i32 %.SV104.phi1124, 1		; <i32> [#uses=1]
+	br label %bb54.fragment
+
+bb54.fragment:		; preds = %bb54
+	%Opq.sa.calc883 = xor i32 %Opq.sa.calc589, 119		; <i32> [#uses=2]
+	store i32 %138, i32* %.SV111.phi1123, align 4
+	br label %meshBB440
+
+bb56:		; preds = %meshBB404
+	%Opq.sa.calc592 = sub i32 %Opq.sa.calc939, 87		; <i32> [#uses=2]
+	%.not4 = icmp sgt i32 %xN, -1		; <i1> [#uses=1]
+	%139 = icmp sgt i32 %.SV40.phi, %xN		; <i1> [#uses=1]
+	br label %meshBB364
+
+bb56.fragment:		; preds = %meshBB364
+	%Opq.sa.calc1002 = xor i32 %Opq.link.mask737, 77		; <i32> [#uses=6]
+	%or.cond5 = and i1 %.SV256.phi, %.not4.SV.phi		; <i1> [#uses=1]
+	%140 = icmp slt i32 %yN, 0		; <i1> [#uses=2]
+	br i1 %or.cond5, label %bb58, label %bb83
+
+bb58:		; preds = %bb56.fragment
+	%Opq.sa.calc596 = xor i32 %Opq.sa.calc1002, 73		; <i32> [#uses=1]
+	%Opq.sa.calc595 = add i32 %Opq.sa.calc596, 147		; <i32> [#uses=0]
+	br i1 %140, label %bb59, label %bb76
+
+bb59:		; preds = %bb58
+	%Opq.sa.calc599 = add i32 %Opq.sa.calc1002, 151		; <i32> [#uses=0]
+	%141 = getelementptr %struct.Macroblock* %2, i64 %3, i32 20		; <i32*> [#uses=1]
+	%142 = load i32* %141, align 4		; <i32> [#uses=1]
+	br label %bb59.fragment
+
+bb59.fragment:		; preds = %bb59
+	%Opq.sa.calc731 = sub i32 %Opq.sa.calc1002, -161		; <i32> [#uses=3]
+	%143 = icmp eq i32 %142, 0		; <i1> [#uses=1]
+	%144 = and i32 %curr_mb_nr, 1		; <i32> [#uses=1]
+	%145 = icmp eq i32 %144, 0		; <i1> [#uses=2]
+	br i1 %143, label %bb60, label %bb68
+
+bb60:		; preds = %bb59.fragment
+	%Opq.sa.calc602 = xor i32 %Opq.sa.calc731, 1		; <i32> [#uses=2]
+	br i1 %145, label %bb61, label %bb66
+
+bb61:		; preds = %bb60
+	%Opq.sa.calc605 = xor i32 %Opq.sa.calc731, 57		; <i32> [#uses=1]
+	%146 = getelementptr %struct.Macroblock* %2, i64 %3, i32 23		; <i32*> [#uses=2]
+	%147 = load i32* %146, align 8		; <i32> [#uses=3]
+	%148 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=3]
+	br label %bb61.fragment
+
+bb61.fragment:		; preds = %bb61
+	%Opq.sa.calc700 = sub i32 %Opq.sa.calc605, 108		; <i32> [#uses=3]
+	store i32 %147, i32* %148, align 4
+	%149 = getelementptr %struct.Macroblock* %2, i64 %3, i32 27		; <i32*> [#uses=4]
+	%150 = load i32* %149, align 8		; <i32> [#uses=1]
+	%151 = icmp eq i32 %150, 0		; <i1> [#uses=1]
+	br i1 %151, label %bb65, label %bb62
+
+bb62:		; preds = %bb61.fragment
+	%Opq.sa.calc608 = add i32 %Opq.sa.calc700, -94		; <i32> [#uses=1]
+	%152 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=2]
+	%153 = getelementptr %struct.ImageParameters* %152, i64 0, i32 45		; <i32*> [#uses=1]
+	%154 = load i32* %153, align 4		; <i32> [#uses=1]
+	%155 = icmp eq i32 %154, 1		; <i1> [#uses=1]
+	br i1 %155, label %bb63, label %bb64
+
+bb63:		; preds = %bb62
+	%Opq.sa.calc611 = add i32 %Opq.sa.calc700, -101		; <i32> [#uses=2]
+	%156 = getelementptr %struct.ImageParameters* %152, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%157 = load %struct.Macroblock** %156, align 8		; <%struct.Macroblock*> [#uses=1]
+	%158 = load i32* %146, align 8		; <i32> [#uses=1]
+	br label %meshBB452
+
+bb63.fragment:		; preds = %meshBB452
+	%Opq.sa.calc891 = add i32 %Opq.link.mask823, 18		; <i32> [#uses=2]
+	%Opq.sa.calc890 = add i32 %Opq.sa.calc891, -3		; <i32> [#uses=2]
+	%159 = sext i32 %.SV266.phi to i64		; <i64> [#uses=1]
+	%160 = getelementptr %struct.Macroblock* %.SV264.phi, i64 %159, i32 20		; <i32*> [#uses=1]
+	%161 = load i32* %160, align 4		; <i32> [#uses=1]
+	%162 = icmp eq i32 %161, 0		; <i1> [#uses=1]
+	br i1 %162, label %bb64, label %meshBB456
+
+bb64:		; preds = %bb63.fragment, %bb62
+	%.SV38.phi1132 = phi i64 [ %.SV38.phi1110, %bb63.fragment ], [ %.SV38.phi1098, %bb62 ]		; <i64> [#uses=1]
+	%.SV52.phi1131 = phi i32* [ %.SV52.phi1109, %bb63.fragment ], [ %.SV52.phi1097, %bb62 ]		; <i32*> [#uses=1]
+	%.SV68.phi1130 = phi i32 [ %.SV68.phi1108, %bb63.fragment ], [ %.SV68.phi1096, %bb62 ]		; <i32> [#uses=1]
+	%.SV70.phi1129 = phi i32 [ %.SV70.phi1107, %bb63.fragment ], [ %.SV70.phi1095, %bb62 ]		; <i32> [#uses=1]
+	%Opq.link.SV615.phi = phi i32 [ %Opq.sa.calc890, %bb63.fragment ], [ %Opq.sa.calc608, %bb62 ]		; <i32> [#uses=1]
+	%.SV150.phi = phi i32* [ %.SV150.phi1060, %bb63.fragment ], [ %148, %bb62 ]		; <i32*> [#uses=1]
+	%.SV152.phi = phi i32* [ %.SV152.phi1059, %bb63.fragment ], [ %149, %bb62 ]		; <i32*> [#uses=1]
+	%.SV148.phi = phi i32 [ %.SV148.phi1057, %bb63.fragment ], [ %147, %bb62 ]		; <i32> [#uses=1]
+	%Opq.link.mask = and i32 %Opq.link.SV615.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc614 = add i32 %Opq.link.mask, 189		; <i32> [#uses=1]
+	%163 = add i32 %.SV148.phi, 1		; <i32> [#uses=1]
+	store i32 %163, i32* %.SV150.phi, align 4
+	br label %bb65
+
+bb65:		; preds = %meshBB456, %bb64, %bb61.fragment
+	%.SV38.phi1144 = phi i64 [ %.SV38.phi1137, %meshBB456 ], [ %.SV38.phi1098, %bb61.fragment ], [ %.SV38.phi1132, %bb64 ]		; <i64> [#uses=1]
+	%.SV52.phi1143 = phi i32* [ %.SV52.phi1136, %meshBB456 ], [ %.SV52.phi1097, %bb61.fragment ], [ %.SV52.phi1131, %bb64 ]		; <i32*> [#uses=1]
+	%.SV68.phi1142 = phi i32 [ %.SV68.phi1135, %meshBB456 ], [ %.SV68.phi1096, %bb61.fragment ], [ %.SV68.phi1130, %bb64 ]		; <i32> [#uses=1]
+	%.SV70.phi1141 = phi i32 [ %.SV70.phi1134, %meshBB456 ], [ %.SV70.phi1095, %bb61.fragment ], [ %.SV70.phi1129, %bb64 ]		; <i32> [#uses=1]
+	%.SV152.phi1058 = phi i32* [ %.SV152.phi1133, %meshBB456 ], [ %149, %bb61.fragment ], [ %.SV152.phi, %bb64 ]		; <i32*> [#uses=1]
+	%Opq.link.SV618.phi = phi i32 [ %Opq.sa.calc816, %meshBB456 ], [ %Opq.sa.calc700, %bb61.fragment ], [ %Opq.sa.calc614, %bb64 ]		; <i32> [#uses=1]
+	%Opq.link.mask620 = and i32 %Opq.link.SV618.phi, 40		; <i32> [#uses=1]
+	%Opq.sa.calc617 = add i32 %Opq.link.mask620, -35		; <i32> [#uses=2]
+	%164 = load i32* %.SV152.phi1058, align 8		; <i32> [#uses=1]
+	br label %meshBB436
+
+bb65.fragment:		; preds = %meshBB436
+	%Opq.sa.calc832 = add i32 %Opq.link.mask706, 1		; <i32> [#uses=2]
+	store i32 %.SV268.phi, i32* %.load62.SV.phi, align 4
+	br label %meshBB364
+
+bb66:		; preds = %bb60
+	%Opq.sa.calc621 = add i32 %Opq.sa.calc602, -217		; <i32> [#uses=1]
+	%165 = add i32 %curr_mb_nr, -1		; <i32> [#uses=1]
+	%166 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %meshBB420
+
+bb66.fragment:		; preds = %meshBB420
+	%Opq.sa.calc795 = xor i32 %Opq.sa.calc837, 105		; <i32> [#uses=2]
+	%Opq.sa.calc794 = sub i32 %Opq.sa.calc795, 167		; <i32> [#uses=1]
+	store i32 %.SV270.phi, i32* %.SV272.phi, align 4
+	store i32 1, i32* %.load61.SV.phi, align 4
+	br label %meshBB444
+
+bb68:		; preds = %bb59.fragment
+	%Opq.sa.calc624 = sub i32 %Opq.sa.calc731, 229		; <i32> [#uses=3]
+	%167 = getelementptr %struct.Macroblock* %2, i64 %3, i32 23		; <i32*> [#uses=1]
+	br label %meshBB344
+
+bb68.fragment:		; preds = %meshBB344
+	%Opq.sa.calc784 = sub i32 %Opq.link.mask722, 3		; <i32> [#uses=5]
+	%168 = load i32* %.SV274.phi, align 8		; <i32> [#uses=3]
+	br i1 %.load144.SV.phi, label %bb69, label %meshBB412
+
+bb69:		; preds = %bb68.fragment
+	%Opq.sa.calc627 = add i32 %Opq.sa.calc784, 163		; <i32> [#uses=0]
+	%169 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=2]
+	store i32 %168, i32* %169, align 4
+	%170 = getelementptr %struct.Macroblock* %2, i64 %3, i32 27		; <i32*> [#uses=2]
+	br label %bb69.fragment
+
+bb69.fragment:		; preds = %bb69
+	%Opq.sa.calc996 = sub i32 %Opq.sa.calc784, -9		; <i32> [#uses=3]
+	%Opq.sa.calc994 = sub i32 %Opq.sa.calc996, %Opq.sa.calc784		; <i32> [#uses=1]
+	%Opq.sa.calc995 = sub i32 %Opq.sa.calc994, 3		; <i32> [#uses=2]
+	%171 = load i32* %170, align 8		; <i32> [#uses=3]
+	store i32 %171, i32* %.SV52.phi1170, align 4
+	%172 = load i32* %170, align 8		; <i32> [#uses=1]
+	%173 = icmp eq i32 %172, 0		; <i1> [#uses=1]
+	br i1 %173, label %meshBB396, label %meshBB400
+
+bb70:		; preds = %meshBB400
+	%Opq.sa.calc630 = add i32 %Opq.sa.calc824, -203		; <i32> [#uses=2]
+	%174 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%175 = getelementptr %struct.ImageParameters* %174, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%176 = load %struct.Macroblock** %175, align 8		; <%struct.Macroblock*> [#uses=1]
+	%177 = load i32* %.SV156.phi, align 8		; <i32> [#uses=1]
+	br label %meshBB428
+
+bb70.fragment:		; preds = %meshBB428
+	%Opq.sa.calc739 = xor i32 %Opq.sa.calc897, 213		; <i32> [#uses=2]
+	%Opq.sa.calc738 = sub i32 %Opq.sa.calc739, 1		; <i32> [#uses=2]
+	%178 = sext i32 %.SV280.phi to i64		; <i64> [#uses=1]
+	%179 = getelementptr %struct.Macroblock* %.SV278.phi, i64 %178, i32 20		; <i32*> [#uses=1]
+	%180 = load i32* %179, align 4		; <i32> [#uses=1]
+	%181 = icmp eq i32 %180, 0		; <i1> [#uses=1]
+	br i1 %181, label %meshBB452, label %meshBB356
+
+bb71:		; preds = %meshBB452
+	%Opq.sa.calc633 = xor i32 %Opq.sa.calc820, 118		; <i32> [#uses=1]
+	%182 = add i32 %.SV158.phi1106, 1		; <i32> [#uses=1]
+	br label %meshBB352
+
+bb71.fragment:		; preds = %meshBB352
+	%Opq.sa.calc809 = sub i32 %Opq.sa.calc876, 17		; <i32> [#uses=2]
+	store i32 %.SV282.phi, i32* %.load163.SV.phi, align 4
+	%183 = shl i32 %yN, 1		; <i32> [#uses=1]
+	br label %meshBB436
+
+bb74:		; preds = %meshBB412
+	%Opq.sa.calc636 = xor i32 %Opq.sa.calc932, 233		; <i32> [#uses=1]
+	%184 = add i32 %.SV158.phi1063, 1		; <i32> [#uses=1]
+	%185 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %bb74.fragment
+
+bb74.fragment:		; preds = %bb74
+	%Opq.sa.calc1011 = sub i32 %Opq.sa.calc636, -19		; <i32> [#uses=0]
+	store i32 %184, i32* %185, align 4
+	%186 = getelementptr %struct.Macroblock* %2, i64 %3, i32 27		; <i32*> [#uses=1]
+	%187 = load i32* %186, align 8		; <i32> [#uses=2]
+	store i32 %187, i32* %.SV52.phi1186, align 4
+	br label %bb96
+
+bb76:		; preds = %bb58
+	%Opq.sa.calc640 = xor i32 %Opq.sa.calc1002, 71		; <i32> [#uses=4]
+	%Opq.sa.calc639 = xor i32 %Opq.sa.calc640, 219		; <i32> [#uses=0]
+	%188 = icmp eq i32 %yN, 0		; <i1> [#uses=1]
+	br i1 %188, label %bb77, label %bb79
+
+bb77:		; preds = %bb76
+	%Opq.sa.calc643 = add i32 %Opq.sa.calc640, 2		; <i32> [#uses=2]
+	%189 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%190 = getelementptr %struct.ImageParameters* %189, i64 0, i32 45		; <i32*> [#uses=1]
+	%191 = load i32* %190, align 4		; <i32> [#uses=1]
+	%192 = icmp eq i32 %191, 2		; <i1> [#uses=1]
+	br i1 %192, label %meshBB416, label %bb79
+
+bb78:		; preds = %meshBB416
+	%Opq.sa.calc647 = xor i32 %Opq.sa.calc971, 25		; <i32> [#uses=2]
+	%Opq.sa.calc646 = sub i32 %Opq.sa.calc647, 29		; <i32> [#uses=0]
+	%193 = getelementptr %struct.Macroblock* %2, i64 %3, i32 23		; <i32*> [#uses=1]
+	%194 = load i32* %193, align 8		; <i32> [#uses=1]
+	%195 = add i32 %194, 1		; <i32> [#uses=1]
+	br label %bb78.fragment
+
+bb78.fragment:		; preds = %bb78
+	%Opq.sa.calc850 = sub i32 %Opq.sa.calc647, -93		; <i32> [#uses=0]
+	%196 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	store i32 %195, i32* %196, align 4
+	store i32 1, i32* %.SV52.phi1200, align 4
+	%197 = add i32 %yN, -1		; <i32> [#uses=1]
+	br label %bb98
+
+bb79:		; preds = %bb77, %bb76
+	%Opq.link.SV652.phi = phi i32 [ %Opq.sa.calc643, %bb77 ], [ %Opq.sa.calc640, %bb76 ]		; <i32> [#uses=1]
+	%Opq.link.mask654 = and i32 %Opq.link.SV652.phi, 8		; <i32> [#uses=1]
+	%Opq.sa.calc651 = sub i32 %Opq.link.mask654, -2		; <i32> [#uses=3]
+	%Opq.sa.calc650 = xor i32 %Opq.sa.calc651, 1		; <i32> [#uses=2]
+	br i1 %or.cond.not.SV.phi1094, label %meshBB456, label %meshBB352
+
+bb81:		; preds = %meshBB456
+	%Opq.sa.calc655 = add i32 %Opq.sa.calc816, 56		; <i32> [#uses=0]
+	%198 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	store i32 %curr_mb_nr, i32* %198, align 4
+	store i32 1, i32* %.SV52.phi1136, align 4
+	br label %bb98
+
+bb83:		; preds = %bb56.fragment
+	%Opq.sa.calc658 = sub i32 %Opq.sa.calc1002, 73		; <i32> [#uses=3]
+	br i1 %140, label %bb84, label %meshBB424
+
+bb84:		; preds = %bb83
+	%Opq.sa.calc661 = xor i32 %Opq.sa.calc658, 22		; <i32> [#uses=1]
+	%199 = getelementptr %struct.Macroblock* %2, i64 %3, i32 20		; <i32*> [#uses=1]
+	%200 = load i32* %199, align 4		; <i32> [#uses=1]
+	br label %meshBB400
+
+bb84.fragment:		; preds = %meshBB400
+	%Opq.sa.calc802 = xor i32 %Opq.sa.calc824, 240		; <i32> [#uses=3]
+	%201 = icmp eq i32 %.SV290.phi, 0		; <i1> [#uses=1]
+	%202 = and i32 %curr_mb_nr, 1		; <i32> [#uses=1]
+	%203 = icmp eq i32 %202, 0		; <i1> [#uses=2]
+	br i1 %201, label %meshBB372, label %bb89
+
+bb85:		; preds = %meshBB372
+	%Opq.sa.calc667 = sub i32 %Opq.sa.calc812, 20		; <i32> [#uses=3]
+	%Opq.sa.calc666 = sub i32 %Opq.sa.calc667, 84		; <i32> [#uses=2]
+	%Opq.sa.calc664 = add i32 %Opq.sa.calc666, %Opq.sa.calc667		; <i32> [#uses=1]
+	%Opq.sa.calc665 = add i32 %Opq.sa.calc664, -112		; <i32> [#uses=2]
+	br i1 %.SV167.phi, label %meshBB336, label %meshBB440
+
+bb86:		; preds = %meshBB336
+	%Opq.sa.calc670 = sub i32 %Opq.sa.calc979, 35		; <i32> [#uses=1]
+	%204 = getelementptr %struct.Macroblock* %2, i64 %3, i32 24		; <i32*> [#uses=1]
+	%205 = load i32* %204, align 4		; <i32> [#uses=1]
+	%206 = add i32 %205, 1		; <i32> [#uses=1]
+	%207 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %bb86.fragment
+
+bb86.fragment:		; preds = %bb86
+	%Opq.sa.calc943 = xor i32 %Opq.sa.calc670, 123		; <i32> [#uses=2]
+	store i32 %206, i32* %207, align 4
+	%208 = getelementptr %struct.Macroblock* %2, i64 %3, i32 28		; <i32*> [#uses=1]
+	%209 = load i32* %208, align 4		; <i32> [#uses=2]
+	store i32 %209, i32* %.SV52.phi1234, align 4
+	br label %meshBB424
+
+bb87:		; preds = %meshBB440
+	%Opq.sa.calc674 = xor i32 %Opq.sa.calc990, 44		; <i32> [#uses=1]
+	%Opq.sa.calc673 = xor i32 %Opq.sa.calc674, 160		; <i32> [#uses=1]
+	store i32 0, i32* %.SV52.phi1235, align 4
+	br label %meshBB408
+
+bb89:		; preds = %bb84.fragment
+	%Opq.sa.calc677 = sub i32 %Opq.sa.calc802, -183		; <i32> [#uses=1]
+	%210 = getelementptr %struct.Macroblock* %2, i64 %3, i32 24		; <i32*> [#uses=2]
+	br label %bb89.fragment
+
+bb89.fragment:		; preds = %bb89
+	%Opq.sa.calc962 = add i32 %Opq.sa.calc677, -188		; <i32> [#uses=3]
+	%211 = load i32* %210, align 4		; <i32> [#uses=3]
+	br i1 %203, label %bb90, label %meshBB408
+
+bb90:		; preds = %bb89.fragment
+	%Opq.sa.calc680 = xor i32 %Opq.sa.calc962, 92		; <i32> [#uses=1]
+	%212 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=2]
+	store i32 %211, i32* %212, align 4
+	%213 = getelementptr %struct.Macroblock* %2, i64 %3, i32 28		; <i32*> [#uses=2]
+	br label %bb90.fragment
+
+bb90.fragment:		; preds = %bb90
+	%Opq.sa.calc773 = sub i32 %Opq.sa.calc680, 60		; <i32> [#uses=3]
+	%Opq.sa.calc772 = add i32 %Opq.sa.calc773, -25		; <i32> [#uses=2]
+	%214 = load i32* %213, align 4		; <i32> [#uses=3]
+	store i32 %214, i32* %.SV52.phi1190, align 4
+	%215 = load i32* %213, align 4		; <i32> [#uses=1]
+	%216 = icmp eq i32 %215, 0		; <i1> [#uses=1]
+	br i1 %216, label %meshBB416, label %meshBB368
+
+bb91:		; preds = %meshBB368
+	%Opq.sa.calc683 = sub i32 %Opq.sa.calc768, -7		; <i32> [#uses=0]
+	%217 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%218 = getelementptr %struct.ImageParameters* %217, i64 0, i32 39		; <%struct.Macroblock**> [#uses=1]
+	%219 = load %struct.Macroblock** %218, align 8		; <%struct.Macroblock*> [#uses=1]
+	%220 = load i32* %.SV170.phi, align 4		; <i32> [#uses=1]
+	br label %bb91.fragment
+
+bb91.fragment:		; preds = %bb91
+	%Opq.sa.calc853 = xor i32 %Opq.sa.calc768, 8		; <i32> [#uses=1]
+	%221 = sext i32 %220 to i64		; <i64> [#uses=1]
+	%222 = getelementptr %struct.Macroblock* %219, i64 %221, i32 20		; <i32*> [#uses=1]
+	%223 = load i32* %222, align 4		; <i32> [#uses=1]
+	%224 = icmp eq i32 %223, 0		; <i1> [#uses=1]
+	br i1 %224, label %bb92, label %bb96
+
+bb92:		; preds = %bb91.fragment
+	%Opq.sa.calc686 = xor i32 %Opq.sa.calc853, 2		; <i32> [#uses=1]
+	%225 = add i32 %.SV172.phi, 1		; <i32> [#uses=1]
+	br label %bb92.fragment
+
+bb92.fragment:		; preds = %bb92
+	%Opq.sa.calc1005 = xor i32 %Opq.sa.calc686, 130		; <i32> [#uses=2]
+	store i32 %225, i32* %.SV176.phi, align 4
+	%226 = shl i32 %yN, 1		; <i32> [#uses=1]
+	br label %meshBB380
+
+bb95:		; preds = %meshBB408
+	%Opq.sa.calc689 = xor i32 %Opq.sa.calc912, 207		; <i32> [#uses=3]
+	%227 = add i32 %.SV172.phi1074, 1		; <i32> [#uses=1]
+	%228 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	br label %meshBB384
+
+bb95.fragment:		; preds = %meshBB384
+	%Opq.sa.calc841 = sub i32 %Opq.sa.calc901, 76		; <i32> [#uses=0]
+	store i32 %.SV306.phi, i32* %.SV308.phi, align 4
+	%229 = getelementptr %struct.Macroblock* %.load.SV.phi, i64 %.load20.SV.phi, i32 28		; <i32*> [#uses=1]
+	%230 = load i32* %229, align 4		; <i32> [#uses=2]
+	store i32 %230, i32* %.load53.SV.phi, align 4
+	br label %bb96
+
+bb96:		; preds = %meshBB444, %meshBB440, %meshBB436, %meshBB424, %meshBB420, %meshBB416, %meshBB396, %meshBB388, %meshBB380, %meshBB376, %meshBB364, %meshBB356, %meshBB340, %meshBB324, %meshBB, %bb95.fragment, %bb91.fragment, %bb74.fragment, %bb51, %bb48, %bb43, %bb41.fragment, %bb37.fragment, %bb36.fragment, %bb34.fragment, %bb30.fragment, %bb25, %bb19.fragment, %bb16.fragment, %bb15.fragment, %bb11.fragment, %bb9.fragment, %bb8.fragment, %bb7.fragment
+	%.SV38.phi1087 = phi i64 [ %.SV38.phi1224, %meshBB444 ], [ %.SV38.phi1210, %meshBB440 ], [ %.SV38.phi1147, %meshBB436 ], [ %.SV38.phi1197, %meshBB424 ], [ %.SV38.phi1194, %meshBB420 ], [ %.SV38.phi1201, %meshBB416 ], [ %.SV38.phi, %meshBB396 ], [ %.SV38.phi1118, %meshBB388 ], [ %.SV38.phi1207, %meshBB380 ], [ %.SV38.phi1153, %meshBB376 ], [ %.SV38.phi1098, %meshBB364 ], [ %.SV38.phi1121, %meshBB356 ], [ %.SV38.phi1167, %meshBB340 ], [ %.SV38.phi1175, %meshBB324 ], [ %.SV38.phi1183, %meshBB ], [ %.SV38.phi1164, %bb91.fragment ], [ %.SV38.phi1179, %bb48 ], [ %.SV38.phi1231, %bb41.fragment ], [ %.SV38.phi1172, %bb25 ], [ %.SV38.phi1175, %bb15.fragment ], [ %.SV38.phi1164, %bb9.fragment ], [ %.SV38.phi1164, %bb8.fragment ], [ %.SV38.phi1221, %bb95.fragment ], [ %.SV38.phi1187, %bb74.fragment ], [ %.SV38.phi1227, %bb51 ], [ %.SV38.phi1179, %bb43 ], [ %.SV38.phi1103, %bb37.fragment ], [ %.SV38.phi1214, %bb36.fragment ], [ %.SV38.phi1227, %bb34.fragment ], [ %.SV38.phi1121, %bb30.fragment ], [ %.SV38.phi1187, %bb19.fragment ], [ %.SV38.phi1175, %bb16.fragment ], [ %.SV38.phi1204, %bb11.fragment ], [ %.SV38.phi1118, %bb7.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1086 = phi i32 [ %.SV68.phi1223, %meshBB444 ], [ %.SV68.phi1209, %meshBB440 ], [ %.SV68.phi1146, %meshBB436 ], [ %.SV68.phi1196, %meshBB424 ], [ %.SV68.phi1193, %meshBB420 ], [ %.SV68.phi1199, %meshBB416 ], [ %.SV68.phi, %meshBB396 ], [ %.SV68.phi1117, %meshBB388 ], [ %.SV68.phi1206, %meshBB380 ], [ %.SV68.phi1152, %meshBB376 ], [ %.SV68.phi1096, %meshBB364 ], [ %.SV68.phi1120, %meshBB356 ], [ %.SV68.phi1166, %meshBB340 ], [ %.SV68.phi1174, %meshBB324 ], [ %.SV68.phi1181, %meshBB ], [ %.SV68.phi1162, %bb91.fragment ], [ %.SV68.phi1177, %bb48 ], [ %.SV68.phi1229, %bb41.fragment ], [ %.SV68.phi1169, %bb25 ], [ %.SV68.phi1174, %bb15.fragment ], [ %.SV68.phi1162, %bb9.fragment ], [ %.SV68.phi1162, %bb8.fragment ], [ %.SV68.phi1220, %bb95.fragment ], [ %.SV68.phi1185, %bb74.fragment ], [ %.SV68.phi1226, %bb51 ], [ %.SV68.phi1177, %bb43 ], [ %.SV68.phi1100, %bb37.fragment ], [ %.SV68.phi1212, %bb36.fragment ], [ %.SV68.phi1226, %bb34.fragment ], [ %.SV68.phi1120, %bb30.fragment ], [ %.SV68.phi1185, %bb19.fragment ], [ %.SV68.phi1174, %bb16.fragment ], [ %.SV68.phi1203, %bb11.fragment ], [ %.SV68.phi1117, %bb7.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1085 = phi i32 [ %.SV70.phi1222, %meshBB444 ], [ %.SV70.phi1208, %meshBB440 ], [ %.SV70.phi1145, %meshBB436 ], [ %.SV70.phi1195, %meshBB424 ], [ %.SV70.phi1192, %meshBB420 ], [ %.SV70.phi1198, %meshBB416 ], [ %.SV70.phi, %meshBB396 ], [ %.SV70.phi1116, %meshBB388 ], [ %.SV70.phi1205, %meshBB380 ], [ %.SV70.phi1151, %meshBB376 ], [ %.SV70.phi1095, %meshBB364 ], [ %.SV70.phi1119, %meshBB356 ], [ %.SV70.phi1165, %meshBB340 ], [ %.SV70.phi1173, %meshBB324 ], [ %.SV70.phi1180, %meshBB ], [ %.SV70.phi1161, %bb91.fragment ], [ %.SV70.phi1176, %bb48 ], [ %.SV70.phi1228, %bb41.fragment ], [ %.SV70.phi1168, %bb25 ], [ %.SV70.phi1173, %bb15.fragment ], [ %.SV70.phi1161, %bb9.fragment ], [ %.SV70.phi1161, %bb8.fragment ], [ %.SV70.phi1219, %bb95.fragment ], [ %.SV70.phi1184, %bb74.fragment ], [ %.SV70.phi1225, %bb51 ], [ %.SV70.phi1176, %bb43 ], [ %.SV70.phi1099, %bb37.fragment ], [ %.SV70.phi1211, %bb36.fragment ], [ %.SV70.phi1225, %bb34.fragment ], [ %.SV70.phi1119, %bb30.fragment ], [ %.SV70.phi1184, %bb19.fragment ], [ %.SV70.phi1173, %bb16.fragment ], [ %.SV70.phi1202, %bb11.fragment ], [ %.SV70.phi1116, %bb7.fragment ]		; <i32> [#uses=2]
+	%.SV.phi = phi i32 [ %.SV.phi1048, %meshBB444 ], [ %.SV.phi1056, %meshBB440 ], [ %.SV.phi1067, %meshBB436 ], [ %.SV.phi1072, %meshBB424 ], [ %.SV.phi1044, %meshBB420 ], [ %.SV.phi1076, %meshBB416 ], [ %.SV.phi1065, %meshBB396 ], [ %.SV.phi1054, %meshBB388 ], [ %.SV.phi1052, %meshBB380 ], [ %.SV.phi1050, %meshBB376 ], [ %.SV.phi1062, %meshBB364 ], [ %.SV.phi1046, %meshBB356 ], [ %.SV.phi1042, %meshBB340 ], [ %.SV.phi1032, %meshBB324 ], [ %.SV.phi1034, %meshBB ], [ %.SV178.phi, %bb91.fragment ], [ %.SV118.phi1040, %bb48 ], [ %.SV118.phi1125, %bb41.fragment ], [ %.SV118.phi, %bb25 ], [ %.load94.SV.phi, %bb15.fragment ], [ %32, %bb9.fragment ], [ %32, %bb8.fragment ], [ %230, %bb95.fragment ], [ %187, %bb74.fragment ], [ %.SV118.phi1081, %bb51 ], [ %.SV118.phi1040, %bb43 ], [ %.load131.SV.phi, %bb37.fragment ], [ %.SV118.phi1154, %bb36.fragment ], [ %.load129.SV.phi, %bb34.fragment ], [ %.SV118.phi1158, %bb30.fragment ], [ %66, %bb19.fragment ], [ %.SV93.phi, %bb16.fragment ], [ %.load84.SV.phi, %bb11.fragment ], [ %27, %bb7.fragment ]		; <i32> [#uses=1]
+	%yM.0.SV.phi = phi i32 [ -1, %meshBB444 ], [ %yN, %meshBB440 ], [ %yM.0.SV.phi1066, %meshBB436 ], [ %yN, %meshBB424 ], [ %yN, %meshBB420 ], [ -1, %meshBB416 ], [ -1, %meshBB396 ], [ %yM.0.SV.phi1053, %meshBB388 ], [ %yM.0.SV.phi1051, %meshBB380 ], [ %yM.0.SV.phi1049, %meshBB376 ], [ %yN, %meshBB364 ], [ %yN, %meshBB356 ], [ %yM.0.SV.phi1041, %meshBB340 ], [ -1, %meshBB324 ], [ -1, %meshBB ], [ %yN, %bb91.fragment ], [ -1, %bb48 ], [ %yN, %bb41.fragment ], [ -1, %bb25 ], [ %yN, %bb15.fragment ], [ %yN, %bb9.fragment ], [ -1, %bb8.fragment ], [ %yN, %bb95.fragment ], [ %yN, %bb74.fragment ], [ %133, %bb51 ], [ %118, %bb43 ], [ %107, %bb37.fragment ], [ %104, %bb36.fragment ], [ %yN, %bb34.fragment ], [ %91, %bb30.fragment ], [ %yN, %bb19.fragment ], [ %62, %bb16.fragment ], [ %45, %bb11.fragment ], [ %yN, %bb7.fragment ]		; <i32> [#uses=2]
+	%Opq.sa.calc693 = add i32 0, 15		; <i32> [#uses=2]
+	%Opq.sa.calc692 = xor i32 %Opq.sa.calc693, 8		; <i32> [#uses=1]
+	%231 = icmp eq i32 %.SV.phi, 0		; <i1> [#uses=1]
+	br i1 %231, label %bb97, label %meshBB404
+
+bb97:		; preds = %meshBB424, %meshBB408, %meshBB352, %bb96, %bb21
+	%.SV38.phi1150 = phi i64 [ %.SV38.phi1197, %meshBB424 ], [ %.SV38.phi1218, %meshBB408 ], [ %.SV38.phi1140, %meshBB352 ], [ %.SV38.phi1087, %bb96 ], [ %4, %bb21 ]		; <i64> [#uses=1]
+	%.SV68.phi1149 = phi i32 [ %.SV68.phi1196, %meshBB424 ], [ %.SV68.phi1216, %meshBB408 ], [ %.SV68.phi1139, %meshBB352 ], [ %.SV68.phi1086, %bb96 ], [ %.SV68.phi1021, %bb21 ]		; <i32> [#uses=1]
+	%.SV70.phi1148 = phi i32 [ %.SV70.phi1195, %meshBB424 ], [ %.SV70.phi1215, %meshBB408 ], [ %.SV70.phi1138, %meshBB352 ], [ %.SV70.phi1085, %bb96 ], [ %.SV70.phi1027, %bb21 ]		; <i32> [#uses=1]
+	%yM.0.reg2mem.0.SV.phi = phi i32 [ -1, %meshBB424 ], [ -1, %meshBB408 ], [ -1, %meshBB352 ], [ %yM.0.SV.phi, %bb96 ], [ -1, %bb21 ]		; <i32> [#uses=1]
+	%Opq.sa.calc694 = xor i32 0, 243		; <i32> [#uses=1]
+	%232 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%233 = getelementptr %struct.ImageParameters* %232, i64 0, i32 45		; <i32*> [#uses=1]
+	br label %bb97.fragment
+
+bb97.fragment:		; preds = %bb97
+	%Opq.sa.calc928 = xor i32 %Opq.sa.calc694, 128		; <i32> [#uses=1]
+	%234 = load i32* %233, align 4		; <i32> [#uses=1]
+	%235 = icmp eq i32 %234, 0		; <i1> [#uses=1]
+	br i1 %235, label %return, label %bb98
+
+bb98:		; preds = %meshBB444, %meshBB404, %bb97.fragment, %bb81, %bb78.fragment
+	%.SV38.phi1093 = phi i64 [ %.SV38.phi1224, %meshBB444 ], [ %.SV38.phi1017, %meshBB404 ], [ %.SV38.phi1150, %bb97.fragment ], [ %.SV38.phi1137, %bb81 ], [ %.SV38.phi1201, %bb78.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1092 = phi i32 [ %.SV68.phi1223, %meshBB444 ], [ %.SV68.phi1023, %meshBB404 ], [ %.SV68.phi1149, %bb97.fragment ], [ %.SV68.phi1135, %bb81 ], [ %.SV68.phi1199, %bb78.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1091 = phi i32 [ %.SV70.phi1222, %meshBB444 ], [ %.SV70.phi1028, %meshBB404 ], [ %.SV70.phi1148, %bb97.fragment ], [ %.SV70.phi1134, %bb81 ], [ %.SV70.phi1198, %bb78.fragment ]		; <i32> [#uses=2]
+	%yM.0.reg2mem.1.SV.phi1068 = phi i32 [ %yN, %meshBB444 ], [ %yM.0.reg2mem.1.SV.phi1077, %meshBB404 ], [ %yM.0.reg2mem.0.SV.phi, %bb97.fragment ], [ %yN, %bb81 ], [ %197, %bb78.fragment ]		; <i32> [#uses=1]
+	%Opq.sa.calc695 = xor i32 0, 23		; <i32> [#uses=2]
+	%236 = and i32 %.SV70.phi1091, %xN		; <i32> [#uses=1]
+	%237 = getelementptr %struct.PixelPos* %pix, i64 0, i32 2		; <i32*> [#uses=2]
+	store i32 %236, i32* %237, align 4
+	%238 = and i32 %yM.0.reg2mem.1.SV.phi1068, %.SV68.phi1092		; <i32> [#uses=1]
+	%239 = getelementptr %struct.PixelPos* %pix, i64 0, i32 3		; <i32*> [#uses=2]
+	store i32 %238, i32* %239, align 4
+	%240 = getelementptr %struct.PixelPos* %pix, i64 0, i32 5		; <i32*> [#uses=1]
+	br label %meshBB376
+
+bb98.fragment:		; preds = %meshBB376
+	%Opq.sa.calc1008 = sub i32 %Opq.link.mask911, 13		; <i32> [#uses=1]
+	%241 = getelementptr %struct.PixelPos* %pix, i64 0, i32 4		; <i32*> [#uses=4]
+	%242 = getelementptr %struct.PixelPos* %pix, i64 0, i32 1		; <i32*> [#uses=1]
+	%243 = load i32* %242, align 4		; <i32> [#uses=1]
+	%244 = load void (i32, i32*, i32*)** @get_mb_block_pos, align 8		; <void (i32, i32*, i32*)*> [#uses=1]
+	tail call void %244(i32 %243, i32* %241, i32* %.SV317.phi) nounwind
+	%245 = load i32* %241, align 4		; <i32> [#uses=1]
+	%246 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%247 = getelementptr %struct.ImageParameters* %246, i64 0, i32 119, i64 %.load39.SV.phi, i64 0		; <i32*> [#uses=1]
+	%248 = load i32* %247, align 4		; <i32> [#uses=1]
+	%249 = mul i32 %248, %245		; <i32> [#uses=2]
+	store i32 %249, i32* %241, align 4
+	br label %bb98.fragment183
+
+bb98.fragment183:		; preds = %bb98.fragment
+	%Opq.sa.calc777 = sub i32 %Opq.sa.calc1008, -158		; <i32> [#uses=1]
+	%Opq.sa.calc776 = sub i32 %Opq.sa.calc777, 46		; <i32> [#uses=0]
+	%250 = load i32* %.SV317.phi, align 4		; <i32> [#uses=1]
+	%251 = load %struct.ImageParameters** @img, align 8		; <%struct.ImageParameters*> [#uses=1]
+	%252 = getelementptr %struct.ImageParameters* %251, i64 0, i32 119, i64 %.load39.SV.phi, i64 1		; <i32*> [#uses=1]
+	%253 = load i32* %252, align 4		; <i32> [#uses=1]
+	%254 = mul i32 %253, %250		; <i32> [#uses=1]
+	%255 = load i32* %.SV313.phi, align 4		; <i32> [#uses=1]
+	%256 = add i32 %255, %249		; <i32> [#uses=1]
+	store i32 %256, i32* %241, align 4
+	%257 = load i32* %.SV315.phi, align 4		; <i32> [#uses=1]
+	%258 = add i32 %257, %254		; <i32> [#uses=1]
+	store i32 %258, i32* %.SV317.phi, align 4
+	ret void
+
+return:		; preds = %meshBB448, %meshBB396, %bb97.fragment
+	%Opq.link.SV697.phi = phi i32 [ %Opq.sa.calc957, %meshBB448 ], [ %Opq.sa.calc758, %meshBB396 ], [ %Opq.sa.calc928, %bb97.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask699 = and i32 %Opq.link.SV697.phi, 0		; <i32> [#uses=1]
+	%Opq.sa.calc696 = add i32 %Opq.link.mask699, 238		; <i32> [#uses=0]
+	ret void
+
+meshBB:		; preds = %bb33.fragment, %bb14.fragment
+	%.SV38.phi1183 = phi i64 [ %.SV38.phi1115, %bb14.fragment ], [ %.SV38.phi1172, %bb33.fragment ]		; <i64> [#uses=3]
+	%.SV68.phi1181 = phi i32 [ %.SV68.phi1112, %bb14.fragment ], [ %.SV68.phi1169, %bb33.fragment ]		; <i32> [#uses=3]
+	%.SV70.phi1180 = phi i32 [ %.SV70.phi1111, %bb14.fragment ], [ %.SV70.phi1168, %bb33.fragment ]		; <i32> [#uses=3]
+	%.SV104.phi1084 = phi i32 [ undef, %bb14.fragment ], [ %.SV104.phi, %bb33.fragment ]		; <i32> [#uses=1]
+	%.SV111.phi1083 = phi i32* [ undef, %bb14.fragment ], [ %.SV111.phi, %bb33.fragment ]		; <i32*> [#uses=1]
+	%.SV118.phi1082 = phi i32 [ undef, %bb14.fragment ], [ %.SV118.phi, %bb33.fragment ]		; <i32> [#uses=2]
+	%.SV.phi1034 = phi i32 [ %50, %bb14.fragment ], [ undef, %bb33.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable.phi = phi i32 [ %Opq.sa.calc723, %bb14.fragment ], [ %Opq.sa.calc712, %bb33.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV829.phi = phi i32 [ %Opq.sa.calc723, %bb14.fragment ], [ %Opq.sa.calc534, %bb33.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask831 = and i32 %Opq.link.SV829.phi, 0		; <i32> [#uses=1]
+	%Opq.sa.calc828 = sub i32 %Opq.link.mask831, -117		; <i32> [#uses=2]
+	%meshCmp = icmp eq i32 %meshStackVariable.phi, 3		; <i1> [#uses=1]
+	br i1 %meshCmp, label %bb35, label %bb96
+
+meshBB324:		; preds = %bb32, %bb15
+	%.SV38.phi1175 = phi i64 [ %.SV38.phi1172, %bb32 ], [ %.SV38.phi1115, %bb15 ]		; <i64> [#uses=3]
+	%.SV68.phi1174 = phi i32 [ %.SV68.phi1169, %bb32 ], [ %.SV68.phi1112, %bb15 ]		; <i32> [#uses=3]
+	%.SV70.phi1173 = phi i32 [ %.SV70.phi1168, %bb32 ], [ %.SV70.phi1111, %bb15 ]		; <i32> [#uses=3]
+	%.load94.SV.phi = phi i32 [ undef, %bb32 ], [ %50, %bb15 ]		; <i32> [#uses=1]
+	%.SV212.phi = phi %struct.Macroblock* [ undef, %bb32 ], [ %55, %bb15 ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV214.phi = phi i32 [ undef, %bb32 ], [ %56, %bb15 ]		; <i32> [#uses=1]
+	%meshStackVariable325.phi = phi i32 [ %Opq.sa.calc531, %bb32 ], [ %Opq.sa.calc496, %bb15 ]		; <i32> [#uses=1]
+	%Opq.link.SV751.phi = phi i32 [ %Opq.sa.calc512, %bb32 ], [ %Opq.sa.calc723, %bb15 ]		; <i32> [#uses=1]
+	%.SV.phi1032 = phi i32 [ %.SV118.phi, %bb32 ], [ undef, %bb15 ]		; <i32> [#uses=1]
+	%.SV93.phi = phi i32 [ undef, %bb32 ], [ %50, %bb15 ]		; <i32> [#uses=1]
+	%.SV91.phi = phi i32* [ undef, %bb32 ], [ %48, %bb15 ]		; <i32*> [#uses=1]
+	%.SV87.phi = phi i32 [ undef, %bb32 ], [ %47, %bb15 ]		; <i32> [#uses=1]
+	%Opq.link.mask753 = and i32 %Opq.link.SV751.phi, 4		; <i32> [#uses=1]
+	%Opq.sa.calc750 = add i32 %Opq.link.mask753, 203		; <i32> [#uses=1]
+	%meshCmp327 = icmp eq i32 %meshStackVariable325.phi, 14		; <i1> [#uses=1]
+	br i1 %meshCmp327, label %bb15.fragment, label %bb96
+
+meshBB328:		; preds = %bb50, %bb34
+	%.SV38.phi1227 = phi i64 [ %.SV38.phi1179, %bb50 ], [ %.SV38.phi1172, %bb34 ]		; <i64> [#uses=2]
+	%.SV68.phi1226 = phi i32 [ %.SV68.phi1177, %bb50 ], [ %.SV68.phi1169, %bb34 ]		; <i32> [#uses=2]
+	%.SV70.phi1225 = phi i32 [ %.SV70.phi1176, %bb50 ], [ %.SV70.phi1168, %bb34 ]		; <i32> [#uses=2]
+	%.SV118.phi1081 = phi i32 [ %.SV118.phi1040, %bb50 ], [ %.SV118.phi, %bb34 ]		; <i32> [#uses=1]
+	%.load129.SV.phi = phi i32 [ undef, %bb50 ], [ %.SV118.phi, %bb34 ]		; <i32> [#uses=1]
+	%.load116.SV.phi = phi i32* [ undef, %bb50 ], [ %.SV111.phi, %bb34 ]		; <i32*> [#uses=1]
+	%.SV238.phi = phi i32 [ undef, %bb50 ], [ %100, %bb34 ]		; <i32> [#uses=1]
+	%meshStackVariable329.phi = phi i32 [ %Opq.sa.calc577, %bb50 ], [ %Opq.sa.calc537, %bb34 ]		; <i32> [#uses=1]
+	%Opq.link.SV788.phi = phi i32 [ %Opq.sa.calc577, %bb50 ], [ %Opq.sa.calc712, %bb34 ]		; <i32> [#uses=1]
+	%Opq.link.mask790 = and i32 %Opq.link.SV788.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc787 = sub i32 %Opq.link.mask790, -227		; <i32> [#uses=2]
+	%meshCmp331 = icmp eq i32 %meshStackVariable329.phi, 11		; <i1> [#uses=1]
+	br i1 %meshCmp331, label %bb34.fragment, label %bb51
+
+meshBB332:		; preds = %bb44, %bb11
+	%.SV38.phi1204 = phi i64 [ %.SV38.phi1231, %bb44 ], [ %.SV38.phi1164, %bb11 ]		; <i64> [#uses=2]
+	%.SV68.phi1203 = phi i32 [ %.SV68.phi1229, %bb44 ], [ %.SV68.phi1162, %bb11 ]		; <i32> [#uses=2]
+	%.SV70.phi1202 = phi i32 [ %.SV70.phi1228, %bb44 ], [ %.SV70.phi1161, %bb11 ]		; <i32> [#uses=2]
+	%.load127.SV.phi = phi i32 [ %.SV118.phi1125, %bb44 ], [ undef, %bb11 ]		; <i32> [#uses=1]
+	%.load114.SV.phi = phi i32* [ %.SV111.phi1126, %bb44 ], [ undef, %bb11 ]		; <i32*> [#uses=1]
+	%.load46.SV.phi = phi i32 [ %.SV43.phi1230, %bb44 ], [ undef, %bb11 ]		; <i32> [#uses=1]
+	%.SV248.phi = phi i32 [ %119, %bb44 ], [ undef, %bb11 ]		; <i32> [#uses=1]
+	%.load84.SV.phi = phi i32 [ undef, %bb44 ], [ %32, %bb11 ]		; <i32> [#uses=1]
+	%.load81.SV.phi = phi i32* [ undef, %bb44 ], [ %.SV80.phi, %bb11 ]		; <i32*> [#uses=1]
+	%.load50.SV.phi = phi i32 [ undef, %bb44 ], [ %.SV43.phi1163, %bb11 ]		; <i32> [#uses=1]
+	%.SV206.phi = phi i32 [ undef, %bb44 ], [ %43, %bb11 ]		; <i32> [#uses=1]
+	%meshStackVariable333.phi = phi i32 [ %Opq.sa.calc566, %bb44 ], [ %Opq.sa.calc485, %bb11 ]		; <i32> [#uses=1]
+	%Opq.link.SV857.phi = phi i32 [ %Opq.sa.calc987, %bb44 ], [ %Opq.sa.calc485, %bb11 ]		; <i32> [#uses=1]
+	%Opq.link.mask859 = and i32 %Opq.link.SV857.phi, 4		; <i32> [#uses=2]
+	%Opq.sa.calc856 = add i32 %Opq.link.mask859, 204		; <i32> [#uses=2]
+	%meshCmp335 = icmp eq i32 %meshStackVariable333.phi, 4		; <i1> [#uses=1]
+	br i1 %meshCmp335, label %bb11.fragment, label %bb44.fragment
+
+meshBB336:		; preds = %bb85, %bb40
+	%.SV52.phi1234 = phi i32* [ %.SV52.phi1213, %bb85 ], [ undef, %bb40 ]		; <i32*> [#uses=1]
+	%.SV38.phi1231 = phi i64 [ %.SV38.phi1214, %bb85 ], [ %.SV38.phi1179, %bb40 ]		; <i64> [#uses=4]
+	%.SV43.phi1230 = phi i32 [ undef, %bb85 ], [ %.SV43.phi1178, %bb40 ]		; <i32> [#uses=3]
+	%.SV68.phi1229 = phi i32 [ %.SV68.phi1212, %bb85 ], [ %.SV68.phi1177, %bb40 ]		; <i32> [#uses=4]
+	%.SV70.phi1228 = phi i32 [ %.SV70.phi1211, %bb85 ], [ %.SV70.phi1176, %bb40 ]		; <i32> [#uses=4]
+	%.SV99.phi1128 = phi i32* [ undef, %bb85 ], [ %.SV99.phi1037, %bb40 ]		; <i32*> [#uses=1]
+	%.SV104.phi1127 = phi i32 [ undef, %bb85 ], [ %.SV104.phi1036, %bb40 ]		; <i32> [#uses=2]
+	%.SV111.phi1126 = phi i32* [ undef, %bb85 ], [ %.SV111.phi1035, %bb40 ]		; <i32*> [#uses=2]
+	%.SV118.phi1125 = phi i32 [ undef, %bb85 ], [ %.SV118.phi1040, %bb40 ]		; <i32> [#uses=3]
+	%meshStackVariable337.phi = phi i32 [ %Opq.sa.calc665, %bb85 ], [ %Opq.sa.calc553, %bb40 ]		; <i32> [#uses=1]
+	%Opq.link.SV980.phi = phi i32 [ %Opq.sa.calc667, %bb85 ], [ %Opq.sa.calc554, %bb40 ]		; <i32> [#uses=1]
+	%Opq.link.mask982 = and i32 %Opq.link.SV980.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc979 = sub i32 %Opq.link.mask982, -153		; <i32> [#uses=2]
+	%meshCmp339 = icmp eq i32 %meshStackVariable337.phi, 4		; <i1> [#uses=1]
+	br i1 %meshCmp339, label %bb41, label %bb86
+
+meshBB340:		; preds = %bb29, %bb26
+	%.SV38.phi1167 = phi i64 [ %.SV38.phi1121, %bb29 ], [ %.SV38.phi1172, %bb26 ]		; <i64> [#uses=3]
+	%.SV68.phi1166 = phi i32 [ %.SV68.phi1120, %bb29 ], [ %.SV68.phi1169, %bb26 ]		; <i32> [#uses=3]
+	%.SV70.phi1165 = phi i32 [ %.SV70.phi1119, %bb29 ], [ %.SV70.phi1168, %bb26 ]		; <i32> [#uses=3]
+	%.SV104.phi1080 = phi i32 [ undef, %bb29 ], [ %.SV104.phi, %bb26 ]		; <i32> [#uses=1]
+	%.SV111.phi1079 = phi i32* [ undef, %bb29 ], [ %.SV111.phi, %bb26 ]		; <i32*> [#uses=1]
+	%.SV118.phi1078 = phi i32 [ %.SV118.phi1158, %bb29 ], [ %.SV118.phi, %bb26 ]		; <i32> [#uses=1]
+	%.load123.SV.phi = phi i32 [ undef, %bb29 ], [ %.SV118.phi, %bb26 ]		; <i32> [#uses=2]
+	%.SV228.phi = phi %struct.Macroblock* [ undef, %bb29 ], [ %81, %bb26 ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV230.phi = phi i32 [ undef, %bb29 ], [ %82, %bb26 ]		; <i32> [#uses=1]
+	%meshStackVariable341.phi = phi i32 [ %Opq.sa.calc525, %bb29 ], [ %Opq.sa.calc518, %bb26 ]		; <i32> [#uses=1]
+	%Opq.link.SV755.phi = phi i32 [ %Opq.sa.calc525, %bb29 ], [ %Opq.sa.calc519, %bb26 ]		; <i32> [#uses=1]
+	%.SV.phi1042 = phi i32 [ %.SV118.phi1158, %bb29 ], [ undef, %bb26 ]		; <i32> [#uses=1]
+	%yM.0.SV.phi1041 = phi i32 [ %89, %bb29 ], [ undef, %bb26 ]		; <i32> [#uses=1]
+	%Opq.link.mask757 = and i32 %Opq.link.SV755.phi, 12		; <i32> [#uses=1]
+	%Opq.sa.calc754 = add i32 %Opq.link.mask757, 225		; <i32> [#uses=2]
+	%meshCmp343 = icmp eq i32 %meshStackVariable341.phi, 9		; <i1> [#uses=1]
+	br i1 %meshCmp343, label %bb26.fragment, label %bb96
+
+meshBB344:		; preds = %bb68, %bb23.fragment182
+	%.SV38.phi1172 = phi i64 [ %.SV38.phi1115, %bb23.fragment182 ], [ %.SV38.phi1098, %bb68 ]		; <i64> [#uses=8]
+	%.SV52.phi1170 = phi i32* [ undef, %bb23.fragment182 ], [ %.SV52.phi1097, %bb68 ]		; <i32*> [#uses=2]
+	%.SV68.phi1169 = phi i32 [ %.SV68.phi1112, %bb23.fragment182 ], [ %.SV68.phi1096, %bb68 ]		; <i32> [#uses=8]
+	%.SV70.phi1168 = phi i32 [ %.SV70.phi1111, %bb23.fragment182 ], [ %.SV70.phi1095, %bb68 ]		; <i32> [#uses=8]
+	%.load144.SV.phi = phi i1 [ undef, %bb23.fragment182 ], [ %145, %bb68 ]		; <i1> [#uses=1]
+	%.SV274.phi = phi i32* [ undef, %bb23.fragment182 ], [ %167, %bb68 ]		; <i32*> [#uses=2]
+	%.SV118.phi = phi i32 [ %76, %bb23.fragment182 ], [ undef, %bb68 ]		; <i32> [#uses=7]
+	%.SV135.phi = phi i1 [ %78, %bb23.fragment182 ], [ undef, %bb68 ]		; <i1> [#uses=2]
+	%meshStackVariable345.phi = phi i32 [ %Opq.sa.calc743, %bb23.fragment182 ], [ %Opq.sa.calc624, %bb68 ]		; <i32> [#uses=1]
+	%Opq.link.SV717.phi = phi i32 [ %Opq.sa.calc744, %bb23.fragment182 ], [ %Opq.sa.calc624, %bb68 ]		; <i32> [#uses=1]
+	%Opq.link.SV720.phi = phi i32 [ %Opq.sa.calc743, %bb23.fragment182 ], [ %Opq.sa.calc624, %bb68 ]		; <i32> [#uses=1]
+	%.SV96.phi = phi i1 [ %71, %bb23.fragment182 ], [ undef, %bb68 ]		; <i1> [#uses=1]
+	%.SV99.phi = phi i32* [ %72, %bb23.fragment182 ], [ undef, %bb68 ]		; <i32*> [#uses=2]
+	%.SV104.phi = phi i32 [ %73, %bb23.fragment182 ], [ undef, %bb68 ]		; <i32> [#uses=3]
+	%.SV111.phi = phi i32* [ %74, %bb23.fragment182 ], [ undef, %bb68 ]		; <i32*> [#uses=3]
+	%Opq.link.mask722 = and i32 %Opq.link.SV720.phi, 9		; <i32> [#uses=3]
+	%Opq.link.mask719 = and i32 %Opq.link.SV717.phi, 0		; <i32> [#uses=1]
+	%Opq.sa.calc715 = sub i32 %Opq.link.mask719, %Opq.link.mask722		; <i32> [#uses=1]
+	%Opq.sa.calc716 = sub i32 %Opq.sa.calc715, -101		; <i32> [#uses=2]
+	%meshCmp347 = icmp eq i32 %meshStackVariable345.phi, 9		; <i1> [#uses=1]
+	br i1 %meshCmp347, label %bb68.fragment, label %bb24
+
+meshBB348:		; preds = %bb37, %bb6
+	%.SV38.phi1103 = phi i64 [ %.SV38.phi1014, %bb6 ], [ %.SV38.phi1019, %bb37 ]		; <i64> [#uses=2]
+	%.SV43.phi1102 = phi i32 [ %.SV43.phi, %bb6 ], [ %.SV43.phi1018, %bb37 ]		; <i32> [#uses=1]
+	%.SV52.phi1101 = phi i32* [ %.SV52.phi, %bb6 ], [ undef, %bb37 ]		; <i32*> [#uses=1]
+	%.SV68.phi1100 = phi i32 [ %.SV68.phi1020, %bb6 ], [ %.SV68.phi1025, %bb37 ]		; <i32> [#uses=2]
+	%.SV70.phi1099 = phi i32 [ %.SV70.phi1026, %bb6 ], [ %.SV70.phi1233, %bb37 ]		; <i32> [#uses=2]
+	%.load131.SV.phi = phi i32 [ undef, %bb6 ], [ %.SV118.phi1155, %bb37 ]		; <i32> [#uses=1]
+	%.load115.SV.phi = phi i32* [ undef, %bb6 ], [ %.SV111.phi1156, %bb37 ]		; <i32*> [#uses=1]
+	%.load48.SV.phi = phi i32 [ undef, %bb6 ], [ %.SV43.phi1018, %bb37 ]		; <i32> [#uses=1]
+	%.SV242.phi = phi i32 [ undef, %bb6 ], [ %105, %bb37 ]		; <i32> [#uses=1]
+	%meshStackVariable349.phi = phi i32 [ %Opq.sa.calc473, %bb6 ], [ %Opq.sa.calc547, %bb37 ]		; <i32> [#uses=1]
+	%Opq.link.SV806.phi = phi i32 [ %Opq.sa.calc873, %bb6 ], [ %Opq.sa.calc958, %bb37 ]		; <i32> [#uses=1]
+	%Opq.link.mask808 = and i32 %Opq.link.SV806.phi, 12		; <i32> [#uses=1]
+	%Opq.sa.calc805 = sub i32 %Opq.link.mask808, -147		; <i32> [#uses=3]
+	%meshCmp351 = icmp eq i32 %meshStackVariable349.phi, 13		; <i1> [#uses=1]
+	br i1 %meshCmp351, label %bb37.fragment, label %bb8
+
+meshBB352:		; preds = %bb79, %bb71
+	%.SV38.phi1140 = phi i64 [ %.SV38.phi1110, %bb71 ], [ %.SV38.phi1098, %bb79 ]		; <i64> [#uses=2]
+	%.SV68.phi1139 = phi i32 [ %.SV68.phi1108, %bb71 ], [ %.SV68.phi1096, %bb79 ]		; <i32> [#uses=2]
+	%.SV70.phi1138 = phi i32 [ %.SV70.phi1107, %bb71 ], [ %.SV70.phi1095, %bb79 ]		; <i32> [#uses=2]
+	%.load166.SV.phi = phi i32 [ %.SV164.phi1104, %bb71 ], [ undef, %bb79 ]		; <i32> [#uses=1]
+	%.load163.SV.phi = phi i32* [ %.SV162.phi1105, %bb71 ], [ undef, %bb79 ]		; <i32*> [#uses=1]
+	%.SV282.phi = phi i32 [ %182, %bb71 ], [ undef, %bb79 ]		; <i32> [#uses=1]
+	%meshStackVariable353.phi = phi i32 [ %Opq.sa.calc633, %bb71 ], [ %Opq.sa.calc650, %bb79 ]		; <i32> [#uses=1]
+	%Opq.link.SV877.phi = phi i32 [ %Opq.sa.calc820, %bb71 ], [ %Opq.sa.calc650, %bb79 ]		; <i32> [#uses=1]
+	%Opq.link.mask879 = and i32 %Opq.link.SV877.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc876 = add i32 %Opq.link.mask879, 18		; <i32> [#uses=1]
+	%meshCmp355 = icmp eq i32 %meshStackVariable353.phi, 11		; <i1> [#uses=1]
+	br i1 %meshCmp355, label %bb97, label %bb71.fragment
+
+meshBB356:		; preds = %bb70.fragment, %bb26.fragment
+	%.SV104.phi1160 = phi i32 [ undef, %bb70.fragment ], [ %.SV104.phi1080, %bb26.fragment ]		; <i32> [#uses=1]
+	%.SV111.phi1159 = phi i32* [ undef, %bb70.fragment ], [ %.SV111.phi1079, %bb26.fragment ]		; <i32*> [#uses=1]
+	%.SV118.phi1158 = phi i32 [ undef, %bb70.fragment ], [ %.SV118.phi1078, %bb26.fragment ]		; <i32> [#uses=3]
+	%.SV38.phi1121 = phi i64 [ %.SV38.phi1014, %bb70.fragment ], [ %.SV38.phi1167, %bb26.fragment ]		; <i64> [#uses=3]
+	%.SV68.phi1120 = phi i32 [ %.SV68.phi1020, %bb70.fragment ], [ %.SV68.phi1166, %bb26.fragment ]		; <i32> [#uses=3]
+	%.SV70.phi1119 = phi i32 [ %.SV70.phi1026, %bb70.fragment ], [ %.SV70.phi1165, %bb26.fragment ]		; <i32> [#uses=3]
+	%.SV.phi1046 = phi i32 [ %.load165.SV.phi, %bb70.fragment ], [ %.load123.SV.phi, %bb26.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable357.phi = phi i32 [ %Opq.sa.calc738, %bb70.fragment ], [ %Opq.sa.calc917, %bb26.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV984.phi = phi i32 [ %Opq.sa.calc738, %bb70.fragment ], [ %Opq.sa.calc918, %bb26.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask986 = and i32 %Opq.link.SV984.phi, 9		; <i32> [#uses=1]
+	%Opq.sa.calc983 = xor i32 %Opq.link.mask986, 251		; <i32> [#uses=1]
+	%meshCmp359 = icmp eq i32 %meshStackVariable357.phi, 9		; <i1> [#uses=1]
+	br i1 %meshCmp359, label %bb28, label %bb96
+
+meshBB360:		; preds = %bb21, %bb13
+	%.SV38.phi1115 = phi i64 [ %4, %bb21 ], [ %.SV38.phi1014, %bb13 ]		; <i64> [#uses=5]
+	%.SV52.phi1113 = phi i32* [ %.SV52.phi1022, %bb21 ], [ %.SV52.phi, %bb13 ]		; <i32*> [#uses=3]
+	%.SV68.phi1112 = phi i32 [ %.SV68.phi1021, %bb21 ], [ %.SV68.phi1020, %bb13 ]		; <i32> [#uses=5]
+	%.SV70.phi1111 = phi i32 [ %.SV70.phi1027, %bb21 ], [ %.SV70.phi1026, %bb13 ]		; <i32> [#uses=5]
+	%.load74.SV.phi = phi i1 [ undef, %bb21 ], [ %21, %bb13 ]		; <i1> [#uses=1]
+	%.SV208.phi = phi i32* [ undef, %bb21 ], [ %46, %bb13 ]		; <i32*> [#uses=2]
+	%meshStackVariable361.phi = phi i32 [ %Opq.sa.calc505, %bb21 ], [ %Opq.sa.calc489, %bb13 ]		; <i32> [#uses=1]
+	%Opq.link.SV867.phi = phi i32 [ %Opq.sa.calc505, %bb21 ], [ %Opq.sa.calc873, %bb13 ]		; <i32> [#uses=1]
+	%Opq.link.mask869 = and i32 %Opq.link.SV867.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc866 = add i32 %Opq.link.mask869, 148		; <i32> [#uses=4]
+	%meshCmp363 = icmp eq i32 %meshStackVariable361.phi, 16		; <i1> [#uses=1]
+	br i1 %meshCmp363, label %bb13.fragment, label %bb23
+
+meshBB364:		; preds = %bb65.fragment, %bb56
+	%.SV38.phi1098 = phi i64 [ %.SV38.phi1017, %bb56 ], [ %.SV38.phi1147, %bb65.fragment ]		; <i64> [#uses=11]
+	%.SV52.phi1097 = phi i32* [ %.SV52.phi1024, %bb56 ], [ undef, %bb65.fragment ]		; <i32*> [#uses=8]
+	%.SV68.phi1096 = phi i32 [ %.SV68.phi1023, %bb56 ], [ %.SV68.phi1146, %bb65.fragment ]		; <i32> [#uses=11]
+	%.SV70.phi1095 = phi i32 [ %.SV70.phi1028, %bb56 ], [ %.SV70.phi1145, %bb65.fragment ]		; <i32> [#uses=11]
+	%or.cond.not.SV.phi1094 = phi i1 [ %or.cond.not.SV.phi1029, %bb56 ], [ undef, %bb65.fragment ]		; <i1> [#uses=1]
+	%.SV.phi1062 = phi i32 [ undef, %bb56 ], [ %.SV268.phi, %bb65.fragment ]		; <i32> [#uses=1]
+	%.not4.SV.phi = phi i1 [ %.not4, %bb56 ], [ undef, %bb65.fragment ]		; <i1> [#uses=1]
+	%.SV256.phi = phi i1 [ %139, %bb56 ], [ undef, %bb65.fragment ]		; <i1> [#uses=1]
+	%meshStackVariable365.phi = phi i32 [ %Opq.sa.calc592, %bb56 ], [ %Opq.sa.calc832, %bb65.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV735.phi = phi i32 [ %Opq.sa.calc592, %bb56 ], [ %Opq.sa.calc832, %bb65.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask737 = and i32 %Opq.link.SV735.phi, 0		; <i32> [#uses=2]
+	%Opq.sa.calc734 = sub i32 %Opq.link.mask737, -242		; <i32> [#uses=0]
+	%meshCmp367 = icmp eq i32 %meshStackVariable365.phi, 1		; <i1> [#uses=1]
+	br i1 %meshCmp367, label %bb96, label %bb56.fragment
+
+meshBB368:		; preds = %bb90.fragment, %bb8
+	%.SV38.phi1164 = phi i64 [ %.SV38.phi1103, %bb8 ], [ %.SV38.phi1191, %bb90.fragment ]		; <i64> [#uses=5]
+	%.SV43.phi1163 = phi i32 [ %.SV43.phi1102, %bb8 ], [ undef, %bb90.fragment ]		; <i32> [#uses=1]
+	%.SV68.phi1162 = phi i32 [ %.SV68.phi1100, %bb8 ], [ %.SV68.phi1189, %bb90.fragment ]		; <i32> [#uses=5]
+	%.SV70.phi1161 = phi i32 [ %.SV70.phi1099, %bb8 ], [ %.SV70.phi1188, %bb90.fragment ]		; <i32> [#uses=5]
+	%.SV178.phi = phi i32 [ undef, %bb8 ], [ %214, %bb90.fragment ]		; <i32> [#uses=2]
+	%.SV176.phi = phi i32* [ undef, %bb8 ], [ %212, %bb90.fragment ]		; <i32*> [#uses=1]
+	%.SV170.phi = phi i32* [ undef, %bb8 ], [ %210, %bb90.fragment ]		; <i32*> [#uses=1]
+	%.SV172.phi = phi i32 [ undef, %bb8 ], [ %211, %bb90.fragment ]		; <i32> [#uses=1]
+	%.SV76.phi = phi i32* [ %28, %bb8 ], [ undef, %bb90.fragment ]		; <i32*> [#uses=1]
+	%.SV78.phi = phi i32 [ %29, %bb8 ], [ undef, %bb90.fragment ]		; <i32> [#uses=1]
+	%.SV80.phi = phi i32* [ %30, %bb8 ], [ undef, %bb90.fragment ]		; <i32*> [#uses=1]
+	%.load66.SV.phi = phi i32* [ %.SV52.phi1101, %bb8 ], [ undef, %bb90.fragment ]		; <i32*> [#uses=1]
+	%.load35.SV.phi = phi i64 [ %3, %bb8 ], [ undef, %bb90.fragment ]		; <i64> [#uses=1]
+	%.load16.SV.phi = phi %struct.Macroblock* [ %2, %bb8 ], [ undef, %bb90.fragment ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV198.phi = phi i32 [ %29, %bb8 ], [ undef, %bb90.fragment ]		; <i32> [#uses=1]
+	%.SV200.phi = phi i32* [ %30, %bb8 ], [ undef, %bb90.fragment ]		; <i32*> [#uses=1]
+	%meshStackVariable369.phi = phi i32 [ %Opq.sa.calc479, %bb8 ], [ %Opq.sa.calc772, %bb90.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV769.phi = phi i32 [ %Opq.sa.calc805, %bb8 ], [ %Opq.sa.calc772, %bb90.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask771 = and i32 %Opq.link.SV769.phi, 2		; <i32> [#uses=1]
+	%Opq.sa.calc768 = xor i32 %Opq.link.mask771, 135		; <i32> [#uses=3]
+	%meshCmp371 = icmp eq i32 %meshStackVariable369.phi, 2		; <i1> [#uses=1]
+	br i1 %meshCmp371, label %bb91, label %bb8.fragment
+
+meshBB372:		; preds = %bb84.fragment, %bb35
+	%.SV38.phi1214 = phi i64 [ %.SV38.phi1191, %bb84.fragment ], [ %.SV38.phi1183, %bb35 ]		; <i64> [#uses=3]
+	%.SV52.phi1213 = phi i32* [ %.SV52.phi1190, %bb84.fragment ], [ undef, %bb35 ]		; <i32*> [#uses=2]
+	%.SV68.phi1212 = phi i32 [ %.SV68.phi1189, %bb84.fragment ], [ %.SV68.phi1181, %bb35 ]		; <i32> [#uses=3]
+	%.SV70.phi1211 = phi i32 [ %.SV70.phi1188, %bb84.fragment ], [ %.SV70.phi1180, %bb35 ]		; <i32> [#uses=3]
+	%.SV118.phi1154 = phi i32 [ undef, %bb84.fragment ], [ %.SV118.phi1082, %bb35 ]		; <i32> [#uses=1]
+	%.SV167.phi = phi i1 [ %203, %bb84.fragment ], [ undef, %bb35 ]		; <i1> [#uses=1]
+	%meshStackVariable373.phi = phi i32 [ %Opq.sa.calc802, %bb84.fragment ], [ %Opq.sa.calc540, %bb35 ]		; <i32> [#uses=1]
+	%Opq.link.SV813.phi = phi i32 [ %Opq.sa.calc802, %bb84.fragment ], [ %Opq.sa.calc541, %bb35 ]		; <i32> [#uses=1]
+	%Opq.link.mask815 = and i32 %Opq.link.SV813.phi, 0		; <i32> [#uses=1]
+	%Opq.sa.calc812 = sub i32 %Opq.link.mask815, -121		; <i32> [#uses=3]
+	%meshCmp375 = icmp eq i32 %meshStackVariable373.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp375, label %bb36, label %bb85
+
+meshBB376:		; preds = %bb98, %bb44.fragment
+	%.SV38.phi1153 = phi i64 [ %.SV38.phi1093, %bb98 ], [ %.SV38.phi1204, %bb44.fragment ]		; <i64> [#uses=1]
+	%.SV68.phi1152 = phi i32 [ %.SV68.phi1092, %bb98 ], [ %.SV68.phi1203, %bb44.fragment ]		; <i32> [#uses=1]
+	%.SV70.phi1151 = phi i32 [ %.SV70.phi1091, %bb98 ], [ %.SV70.phi1202, %bb44.fragment ]		; <i32> [#uses=1]
+	%.load39.SV.phi = phi i64 [ %.SV38.phi1093, %bb98 ], [ undef, %bb44.fragment ]		; <i64> [#uses=2]
+	%.SV313.phi = phi i32* [ %237, %bb98 ], [ undef, %bb44.fragment ]		; <i32*> [#uses=1]
+	%.SV315.phi = phi i32* [ %239, %bb98 ], [ undef, %bb44.fragment ]		; <i32*> [#uses=1]
+	%.SV317.phi = phi i32* [ %240, %bb98 ], [ undef, %bb44.fragment ]		; <i32*> [#uses=3]
+	%.SV.phi1050 = phi i32 [ undef, %bb98 ], [ %.load127.SV.phi, %bb44.fragment ]		; <i32> [#uses=1]
+	%yM.0.SV.phi1049 = phi i32 [ undef, %bb98 ], [ %121, %bb44.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable377.phi = phi i32 [ %Opq.sa.calc695, %bb98 ], [ %Opq.sa.calc894, %bb44.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV909.phi = phi i32 [ %Opq.sa.calc695, %bb98 ], [ %Opq.sa.calc856, %bb44.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask911 = and i32 %Opq.link.SV909.phi, 16		; <i32> [#uses=2]
+	%Opq.sa.calc908 = add i32 %Opq.link.mask911, -11		; <i32> [#uses=0]
+	%meshCmp379 = icmp eq i32 %meshStackVariable377.phi, 8		; <i1> [#uses=1]
+	br i1 %meshCmp379, label %bb96, label %bb98.fragment
+
+meshBB380:		; preds = %bb92.fragment, %bb49.fragment
+	%.SV38.phi1207 = phi i64 [ %.SV38.phi1164, %bb92.fragment ], [ %.SV38.phi1179, %bb49.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1206 = phi i32 [ %.SV68.phi1162, %bb92.fragment ], [ %.SV68.phi1177, %bb49.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1205 = phi i32 [ %.SV70.phi1161, %bb92.fragment ], [ %.SV70.phi1176, %bb49.fragment ]		; <i32> [#uses=2]
+	%.SV104.phi1124 = phi i32 [ undef, %bb92.fragment ], [ %.SV104.phi1036, %bb49.fragment ]		; <i32> [#uses=1]
+	%.SV111.phi1123 = phi i32* [ undef, %bb92.fragment ], [ %.SV111.phi1035, %bb49.fragment ]		; <i32*> [#uses=1]
+	%.SV118.phi1122 = phi i32 [ undef, %bb92.fragment ], [ %.SV118.phi1040, %bb49.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable381.phi = phi i32 [ %Opq.sa.calc1005, %bb92.fragment ], [ %Opq.sa.calc860, %bb49.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV947.phi = phi i32 [ %Opq.sa.calc1005, %bb92.fragment ], [ %Opq.sa.calc860, %bb49.fragment ]		; <i32> [#uses=1]
+	%.SV.phi1052 = phi i32 [ %.SV178.phi, %bb92.fragment ], [ undef, %bb49.fragment ]		; <i32> [#uses=1]
+	%yM.0.SV.phi1051 = phi i32 [ %226, %bb92.fragment ], [ undef, %bb49.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask949 = and i32 %Opq.link.SV947.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc946 = sub i32 %Opq.link.mask949, -4		; <i32> [#uses=1]
+	%meshCmp383 = icmp eq i32 %meshStackVariable381.phi, 1		; <i1> [#uses=1]
+	br i1 %meshCmp383, label %bb54, label %bb96
+
+meshBB384:		; preds = %bb95, %bb52
+	%.SV38.phi1221 = phi i64 [ %.SV38.phi1179, %bb52 ], [ %.SV38.phi1218, %bb95 ]		; <i64> [#uses=2]
+	%.SV68.phi1220 = phi i32 [ %.SV68.phi1177, %bb52 ], [ %.SV68.phi1216, %bb95 ]		; <i32> [#uses=2]
+	%.SV70.phi1219 = phi i32 [ %.SV70.phi1176, %bb52 ], [ %.SV70.phi1215, %bb95 ]		; <i32> [#uses=2]
+	%.load53.SV.phi = phi i32* [ undef, %bb52 ], [ %.SV52.phi1217, %bb95 ]		; <i32*> [#uses=1]
+	%.load20.SV.phi = phi i64 [ undef, %bb52 ], [ %3, %bb95 ]		; <i64> [#uses=1]
+	%.load.SV.phi = phi %struct.Macroblock* [ undef, %bb52 ], [ %2, %bb95 ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV306.phi = phi i32 [ undef, %bb52 ], [ %227, %bb95 ]		; <i32> [#uses=1]
+	%.SV308.phi = phi i32* [ undef, %bb52 ], [ %228, %bb95 ]		; <i32*> [#uses=1]
+	%.load126.SV.phi = phi i32 [ %.SV118.phi1040, %bb52 ], [ undef, %bb95 ]		; <i32> [#uses=1]
+	%.load44.SV.phi = phi i32 [ %.SV43.phi1178, %bb52 ], [ undef, %bb95 ]		; <i32> [#uses=1]
+	%meshStackVariable385.phi = phi i32 [ %Opq.sa.calc583, %bb52 ], [ %Opq.sa.calc689, %bb95 ]		; <i32> [#uses=1]
+	%Opq.link.SV902.phi = phi i32 [ %Opq.sa.calc860, %bb52 ], [ %Opq.sa.calc689, %bb95 ]		; <i32> [#uses=1]
+	%Opq.link.SV905.phi = phi i32 [ %Opq.sa.calc584, %bb52 ], [ %Opq.sa.calc689, %bb95 ]		; <i32> [#uses=1]
+	%Opq.link.mask907 = and i32 %Opq.link.SV905.phi, 0		; <i32> [#uses=0]
+	%Opq.link.mask904 = and i32 %Opq.link.SV902.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc901 = xor i32 %Opq.link.mask904, 227		; <i32> [#uses=3]
+	%meshCmp387 = icmp eq i32 %meshStackVariable385.phi, 5		; <i1> [#uses=1]
+	br i1 %meshCmp387, label %bb95.fragment, label %bb52.fragment
+
+meshBB388:		; preds = %bb52.fragment, %bb7
+	%.SV38.phi1118 = phi i64 [ %.SV38.phi1014, %bb7 ], [ %.SV38.phi1221, %bb52.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1117 = phi i32 [ %.SV68.phi1020, %bb7 ], [ %.SV68.phi1220, %bb52.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1116 = phi i32 [ %.SV70.phi1026, %bb7 ], [ %.SV70.phi1219, %bb52.fragment ]		; <i32> [#uses=2]
+	%.SV.phi1054 = phi i32 [ undef, %bb7 ], [ %.load126.SV.phi, %bb52.fragment ]		; <i32> [#uses=1]
+	%yM.0.SV.phi1053 = phi i32 [ undef, %bb7 ], [ %137, %bb52.fragment ]		; <i32> [#uses=1]
+	%.load67.SV.phi = phi i32* [ %.SV52.phi, %bb7 ], [ undef, %bb52.fragment ]		; <i32*> [#uses=1]
+	%.load36.SV.phi = phi i64 [ %3, %bb7 ], [ undef, %bb52.fragment ]		; <i64> [#uses=1]
+	%.load17.SV.phi = phi %struct.Macroblock* [ %2, %bb7 ], [ undef, %bb52.fragment ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV194.phi = phi i32 [ %24, %bb7 ], [ undef, %bb52.fragment ]		; <i32> [#uses=1]
+	%.SV196.phi = phi i32* [ %25, %bb7 ], [ undef, %bb52.fragment ]		; <i32*> [#uses=1]
+	%meshStackVariable389.phi = phi i32 [ %Opq.sa.calc476, %bb7 ], [ %Opq.sa.calc844, %bb52.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV887.phi = phi i32 [ %Opq.sa.calc873, %bb7 ], [ %Opq.sa.calc901, %bb52.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask889 = and i32 %Opq.link.SV887.phi, 64		; <i32> [#uses=1]
+	%Opq.sa.calc886 = sub i32 %Opq.link.mask889, -170		; <i32> [#uses=2]
+	%meshCmp391 = icmp eq i32 %meshStackVariable389.phi, 12		; <i1> [#uses=1]
+	br i1 %meshCmp391, label %bb96, label %bb7.fragment
+
+meshBB392:		; preds = %bb4, %entry
+	%meshStackVariable393.phi = phi i32 [ %Opq.sa.calc466, %bb4 ], [ %Opq.sa.calc, %entry ]		; <i32> [#uses=1]
+	%Opq.link.SV922.phi = phi i32 [ %Opq.sa.calc462, %bb4 ], [ %Opq.sa.calc, %entry ]		; <i32> [#uses=1]
+	%or.cond.not.SV.phi = phi i1 [ %or.cond.not, %bb4 ], [ undef, %entry ]		; <i1> [#uses=1]
+	%.SV70.phi1027 = phi i32 [ %12, %bb4 ], [ undef, %entry ]		; <i32> [#uses=2]
+	%.SV52.phi1022 = phi i32* [ %9, %bb4 ], [ undef, %entry ]		; <i32*> [#uses=1]
+	%.SV68.phi1021 = phi i32 [ %10, %bb4 ], [ undef, %entry ]		; <i32> [#uses=2]
+	%.SV43.phi1015 = phi i32 [ %8, %bb4 ], [ undef, %entry ]		; <i32> [#uses=3]
+	%Opq.link.mask924 = and i32 %Opq.link.SV922.phi, 2		; <i32> [#uses=1]
+	%Opq.sa.calc921 = add i32 %Opq.link.mask924, 57		; <i32> [#uses=3]
+	%meshCmp395 = icmp eq i32 %meshStackVariable393.phi, 2		; <i1> [#uses=1]
+	br i1 %meshCmp395, label %entry.fragment, label %bb21
+
+meshBB396:		; preds = %bb69.fragment, %bb.fragment
+	%.SV.phi1065 = phi i32 [ undef, %bb.fragment ], [ %171, %bb69.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable397.phi = phi i32 [ %Opq.sa.calc976, %bb.fragment ], [ %Opq.sa.calc995, %bb69.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV759.phi = phi i32 [ %Opq.sa.calc976, %bb.fragment ], [ %Opq.sa.calc995, %bb69.fragment ]		; <i32> [#uses=1]
+	%.SV70.phi = phi i32 [ %12, %bb.fragment ], [ %.SV70.phi1168, %bb69.fragment ]		; <i32> [#uses=1]
+	%.SV68.phi = phi i32 [ %10, %bb.fragment ], [ %.SV68.phi1169, %bb69.fragment ]		; <i32> [#uses=1]
+	%.SV38.phi = phi i64 [ %4, %bb.fragment ], [ %.SV38.phi1172, %bb69.fragment ]		; <i64> [#uses=1]
+	%Opq.link.mask761 = and i32 %Opq.link.SV759.phi, 6		; <i32> [#uses=1]
+	%Opq.sa.calc758 = add i32 %Opq.link.mask761, 53		; <i32> [#uses=1]
+	%meshCmp399 = icmp eq i32 %meshStackVariable397.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp399, label %bb96, label %return
+
+meshBB400:		; preds = %bb84, %bb69.fragment
+	%.SV38.phi1191 = phi i64 [ %.SV38.phi1098, %bb84 ], [ %.SV38.phi1172, %bb69.fragment ]		; <i64> [#uses=5]
+	%.SV52.phi1190 = phi i32* [ %.SV52.phi1097, %bb84 ], [ undef, %bb69.fragment ]		; <i32*> [#uses=3]
+	%.SV68.phi1189 = phi i32 [ %.SV68.phi1096, %bb84 ], [ %.SV68.phi1169, %bb69.fragment ]		; <i32> [#uses=5]
+	%.SV70.phi1188 = phi i32 [ %.SV70.phi1095, %bb84 ], [ %.SV70.phi1168, %bb69.fragment ]		; <i32> [#uses=5]
+	%.SV290.phi = phi i32 [ %200, %bb84 ], [ undef, %bb69.fragment ]		; <i32> [#uses=1]
+	%.SV164.phi = phi i32 [ undef, %bb84 ], [ %171, %bb69.fragment ]		; <i32> [#uses=2]
+	%meshStackVariable401.phi = phi i32 [ %Opq.sa.calc661, %bb84 ], [ %Opq.sa.calc996, %bb69.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV825.phi = phi i32 [ %Opq.sa.calc658, %bb84 ], [ %Opq.sa.calc996, %bb69.fragment ]		; <i32> [#uses=1]
+	%.SV162.phi = phi i32* [ undef, %bb84 ], [ %169, %bb69.fragment ]		; <i32*> [#uses=1]
+	%.SV156.phi = phi i32* [ undef, %bb84 ], [ %.SV274.phi, %bb69.fragment ]		; <i32*> [#uses=1]
+	%.SV158.phi = phi i32 [ undef, %bb84 ], [ %168, %bb69.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask827 = and i32 %Opq.link.SV825.phi, 4		; <i32> [#uses=1]
+	%Opq.sa.calc824 = xor i32 %Opq.link.mask827, 228		; <i32> [#uses=2]
+	%meshCmp403 = icmp eq i32 %meshStackVariable401.phi, 15		; <i1> [#uses=1]
+	br i1 %meshCmp403, label %bb70, label %bb84.fragment
+
+meshBB404:		; preds = %bb96, %bb3
+	%yM.0.reg2mem.1.SV.phi1077 = phi i32 [ %yM.0.SV.phi, %bb96 ], [ undef, %bb3 ]		; <i32> [#uses=1]
+	%meshStackVariable405.phi = phi i32 [ %Opq.sa.calc692, %bb96 ], [ %Opq.sa.calc461, %bb3 ]		; <i32> [#uses=1]
+	%Opq.link.SV940.phi = phi i32 [ %Opq.sa.calc693, %bb96 ], [ %Opq.sa.calc461, %bb3 ]		; <i32> [#uses=1]
+	%or.cond.not.SV.phi1029 = phi i1 [ undef, %bb96 ], [ %or.cond.not, %bb3 ]		; <i1> [#uses=1]
+	%.SV70.phi1028 = phi i32 [ %.SV70.phi1085, %bb96 ], [ %12, %bb3 ]		; <i32> [#uses=2]
+	%.SV52.phi1024 = phi i32* [ undef, %bb96 ], [ %9, %bb3 ]		; <i32*> [#uses=1]
+	%.SV68.phi1023 = phi i32 [ %.SV68.phi1086, %bb96 ], [ %10, %bb3 ]		; <i32> [#uses=2]
+	%.SV38.phi1017 = phi i64 [ %.SV38.phi1087, %bb96 ], [ %4, %bb3 ]		; <i64> [#uses=2]
+	%.SV40.phi = phi i32 [ undef, %bb96 ], [ %6, %bb3 ]		; <i32> [#uses=1]
+	%Opq.link.mask942 = and i32 %Opq.link.SV940.phi, 6		; <i32> [#uses=1]
+	%Opq.sa.calc939 = sub i32 %Opq.link.mask942, -87		; <i32> [#uses=1]
+	%meshCmp407 = icmp eq i32 %meshStackVariable405.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp407, label %bb56, label %bb98
+
+meshBB408:		; preds = %bb89.fragment, %bb87
+	%.SV38.phi1218 = phi i64 [ %.SV38.phi1191, %bb89.fragment ], [ %.SV38.phi1210, %bb87 ]		; <i64> [#uses=2]
+	%.SV52.phi1217 = phi i32* [ %.SV52.phi1190, %bb89.fragment ], [ %.SV52.phi1235, %bb87 ]		; <i32*> [#uses=1]
+	%.SV68.phi1216 = phi i32 [ %.SV68.phi1189, %bb89.fragment ], [ %.SV68.phi1209, %bb87 ]		; <i32> [#uses=2]
+	%.SV70.phi1215 = phi i32 [ %.SV70.phi1188, %bb89.fragment ], [ %.SV70.phi1208, %bb87 ]		; <i32> [#uses=2]
+	%.SV172.phi1074 = phi i32 [ %211, %bb89.fragment ], [ undef, %bb87 ]		; <i32> [#uses=1]
+	%meshStackVariable409.phi = phi i32 [ %Opq.sa.calc962, %bb89.fragment ], [ %Opq.sa.calc673, %bb87 ]		; <i32> [#uses=1]
+	%Opq.link.SV913.phi = phi i32 [ %Opq.sa.calc962, %bb89.fragment ], [ %Opq.sa.calc990, %bb87 ]		; <i32> [#uses=1]
+	%Opq.link.mask915 = and i32 %Opq.link.SV913.phi, 9		; <i32> [#uses=1]
+	%Opq.sa.calc912 = xor i32 %Opq.link.mask915, 195		; <i32> [#uses=1]
+	%meshCmp411 = icmp eq i32 %meshStackVariable409.phi, 1		; <i1> [#uses=1]
+	br i1 %meshCmp411, label %bb97, label %bb95
+
+meshBB412:		; preds = %bb68.fragment, %bb13.fragment
+	%.SV38.phi1187 = phi i64 [ %.SV38.phi1115, %bb13.fragment ], [ %.SV38.phi1172, %bb68.fragment ]		; <i64> [#uses=2]
+	%.SV52.phi1186 = phi i32* [ %.SV52.phi1113, %bb13.fragment ], [ %.SV52.phi1170, %bb68.fragment ]		; <i32*> [#uses=2]
+	%.SV68.phi1185 = phi i32 [ %.SV68.phi1112, %bb13.fragment ], [ %.SV68.phi1169, %bb68.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1184 = phi i32 [ %.SV70.phi1111, %bb13.fragment ], [ %.SV70.phi1168, %bb68.fragment ]		; <i32> [#uses=2]
+	%.SV158.phi1063 = phi i32 [ undef, %bb13.fragment ], [ %168, %bb68.fragment ]		; <i32> [#uses=1]
+	%.SV87.phi1030 = phi i32 [ %47, %bb13.fragment ], [ undef, %bb68.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable413.phi = phi i32 [ %Opq.sa.calc870, %bb13.fragment ], [ %Opq.sa.calc784, %bb68.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV933.phi = phi i32 [ %Opq.sa.calc870, %bb13.fragment ], [ %Opq.link.mask722, %bb68.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV936.phi = phi i32 [ %Opq.sa.calc866, %bb13.fragment ], [ %Opq.sa.calc784, %bb68.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask938 = and i32 %Opq.link.SV936.phi, 4		; <i32> [#uses=1]
+	%Opq.link.mask935 = and i32 %Opq.link.SV933.phi, 0		; <i32> [#uses=1]
+	%Opq.sa.calc931 = sub i32 %Opq.link.mask935, %Opq.link.mask938		; <i32> [#uses=1]
+	%Opq.sa.calc932 = xor i32 %Opq.sa.calc931, -51		; <i32> [#uses=3]
+	%meshCmp415 = icmp eq i32 %meshStackVariable413.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp415, label %bb74, label %bb19
+
+meshBB416:		; preds = %bb90.fragment, %bb77
+	%.SV38.phi1201 = phi i64 [ %.SV38.phi1191, %bb90.fragment ], [ %.SV38.phi1098, %bb77 ]		; <i64> [#uses=2]
+	%.SV52.phi1200 = phi i32* [ undef, %bb90.fragment ], [ %.SV52.phi1097, %bb77 ]		; <i32*> [#uses=1]
+	%.SV68.phi1199 = phi i32 [ %.SV68.phi1189, %bb90.fragment ], [ %.SV68.phi1096, %bb77 ]		; <i32> [#uses=2]
+	%.SV70.phi1198 = phi i32 [ %.SV70.phi1188, %bb90.fragment ], [ %.SV70.phi1095, %bb77 ]		; <i32> [#uses=2]
+	%.SV.phi1076 = phi i32 [ %214, %bb90.fragment ], [ undef, %bb77 ]		; <i32> [#uses=1]
+	%meshStackVariable417.phi = phi i32 [ %Opq.sa.calc773, %bb90.fragment ], [ %Opq.sa.calc643, %bb77 ]		; <i32> [#uses=1]
+	%Opq.link.SV973.phi = phi i32 [ %Opq.sa.calc773, %bb90.fragment ], [ %Opq.sa.calc640, %bb77 ]		; <i32> [#uses=1]
+	%Opq.link.mask975 = and i32 %Opq.link.SV973.phi, 10		; <i32> [#uses=1]
+	%Opq.sa.calc972 = xor i32 %Opq.link.mask975, 110		; <i32> [#uses=1]
+	%Opq.sa.calc971 = add i32 %Opq.sa.calc972, -19		; <i32> [#uses=1]
+	%meshCmp419 = icmp eq i32 %meshStackVariable417.phi, 12		; <i1> [#uses=1]
+	br i1 %meshCmp419, label %bb78, label %bb96
+
+meshBB420:		; preds = %bb66, %bb26.fragment
+	%.SV38.phi1194 = phi i64 [ %.SV38.phi1098, %bb66 ], [ %.SV38.phi1167, %bb26.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1193 = phi i32 [ %.SV68.phi1096, %bb66 ], [ %.SV68.phi1166, %bb26.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1192 = phi i32 [ %.SV70.phi1095, %bb66 ], [ %.SV70.phi1165, %bb26.fragment ]		; <i32> [#uses=2]
+	%.load61.SV.phi = phi i32* [ %.SV52.phi1097, %bb66 ], [ undef, %bb26.fragment ]		; <i32*> [#uses=1]
+	%.SV270.phi = phi i32 [ %165, %bb66 ], [ undef, %bb26.fragment ]		; <i32> [#uses=1]
+	%.SV272.phi = phi i32* [ %166, %bb66 ], [ undef, %bb26.fragment ]		; <i32*> [#uses=1]
+	%.SV.phi1044 = phi i32 [ undef, %bb66 ], [ %.load123.SV.phi, %bb26.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable421.phi = phi i32 [ %Opq.sa.calc621, %bb66 ], [ %Opq.sa.calc918, %bb26.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV838.phi = phi i32 [ %Opq.sa.calc602, %bb66 ], [ %Opq.sa.calc918, %bb26.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask840 = and i32 %Opq.link.SV838.phi, 9		; <i32> [#uses=2]
+	%Opq.sa.calc837 = sub i32 %Opq.link.mask840, -202		; <i32> [#uses=2]
+	%Opq.sa.calc835 = sub i32 %Opq.sa.calc837, %Opq.link.mask840		; <i32> [#uses=1]
+	%Opq.sa.calc836 = xor i32 %Opq.sa.calc835, 176		; <i32> [#uses=0]
+	%meshCmp423 = icmp eq i32 %meshStackVariable421.phi, 9		; <i1> [#uses=1]
+	br i1 %meshCmp423, label %bb96, label %bb66.fragment
+
+meshBB424:		; preds = %bb86.fragment, %bb83
+	%.SV38.phi1197 = phi i64 [ %.SV38.phi1231, %bb86.fragment ], [ %.SV38.phi1098, %bb83 ]		; <i64> [#uses=2]
+	%.SV68.phi1196 = phi i32 [ %.SV68.phi1229, %bb86.fragment ], [ %.SV68.phi1096, %bb83 ]		; <i32> [#uses=2]
+	%.SV70.phi1195 = phi i32 [ %.SV70.phi1228, %bb86.fragment ], [ %.SV70.phi1095, %bb83 ]		; <i32> [#uses=2]
+	%.SV.phi1072 = phi i32 [ %209, %bb86.fragment ], [ undef, %bb83 ]		; <i32> [#uses=1]
+	%meshStackVariable425.phi = phi i32 [ %Opq.sa.calc943, %bb86.fragment ], [ %Opq.sa.calc658, %bb83 ]		; <i32> [#uses=1]
+	%Opq.link.SV951.phi = phi i32 [ %Opq.sa.calc943, %bb86.fragment ], [ %Opq.sa.calc1002, %bb83 ]		; <i32> [#uses=1]
+	%Opq.link.mask953 = and i32 %Opq.link.SV951.phi, 12		; <i32> [#uses=1]
+	%Opq.sa.calc950 = sub i32 %Opq.link.mask953, -208		; <i32> [#uses=0]
+	%meshCmp427 = icmp eq i32 %meshStackVariable425.phi, 4		; <i1> [#uses=1]
+	br i1 %meshCmp427, label %bb97, label %bb96
+
+meshBB428:		; preds = %bb70, %bb4
+	%.SV158.phi1090 = phi i32 [ %.SV158.phi, %bb70 ], [ undef, %bb4 ]		; <i32> [#uses=1]
+	%.SV162.phi1089 = phi i32* [ %.SV162.phi, %bb70 ], [ undef, %bb4 ]		; <i32*> [#uses=1]
+	%.SV164.phi1088 = phi i32 [ %.SV164.phi, %bb70 ], [ undef, %bb4 ]		; <i32> [#uses=1]
+	%.load165.SV.phi = phi i32 [ %.SV164.phi, %bb70 ], [ undef, %bb4 ]		; <i32> [#uses=1]
+	%.SV278.phi = phi %struct.Macroblock* [ %176, %bb70 ], [ undef, %bb4 ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV280.phi = phi i32 [ %177, %bb70 ], [ undef, %bb4 ]		; <i32> [#uses=1]
+	%meshStackVariable429.phi = phi i32 [ %Opq.sa.calc630, %bb70 ], [ %Opq.sa.calc467, %bb4 ]		; <i32> [#uses=1]
+	%Opq.link.SV898.phi = phi i32 [ %Opq.sa.calc630, %bb70 ], [ %Opq.sa.calc462, %bb4 ]		; <i32> [#uses=1]
+	%.SV70.phi1026 = phi i32 [ %.SV70.phi1188, %bb70 ], [ %12, %bb4 ]		; <i32> [#uses=5]
+	%.SV52.phi = phi i32* [ undef, %bb70 ], [ %9, %bb4 ]		; <i32*> [#uses=3]
+	%.SV68.phi1020 = phi i32 [ %.SV68.phi1189, %bb70 ], [ %10, %bb4 ]		; <i32> [#uses=5]
+	%.SV38.phi1014 = phi i64 [ %.SV38.phi1191, %bb70 ], [ %4, %bb4 ]		; <i64> [#uses=5]
+	%.SV43.phi = phi i32 [ undef, %bb70 ], [ %8, %bb4 ]		; <i32> [#uses=1]
+	%Opq.link.mask900 = and i32 %Opq.link.SV898.phi, 4		; <i32> [#uses=1]
+	%Opq.sa.calc897 = xor i32 %Opq.link.mask900, 193		; <i32> [#uses=3]
+	%meshCmp431 = icmp eq i32 %meshStackVariable429.phi, 5		; <i1> [#uses=1]
+	br i1 %meshCmp431, label %bb5, label %bb70.fragment
+
+meshBB432:		; preds = %bb42, %bb23.fragment182
+	%.SV38.phi1179 = phi i64 [ %.SV38.phi1115, %bb23.fragment182 ], [ %.SV38.phi1231, %bb42 ]		; <i64> [#uses=7]
+	%.SV43.phi1178 = phi i32 [ %.SV43.phi1015, %bb23.fragment182 ], [ %.SV43.phi1230, %bb42 ]		; <i32> [#uses=3]
+	%.SV68.phi1177 = phi i32 [ %.SV68.phi1112, %bb23.fragment182 ], [ %.SV68.phi1229, %bb42 ]		; <i32> [#uses=7]
+	%.SV70.phi1176 = phi i32 [ %.SV70.phi1111, %bb23.fragment182 ], [ %.SV70.phi1228, %bb42 ]		; <i32> [#uses=7]
+	%.SV118.phi1040 = phi i32 [ %76, %bb23.fragment182 ], [ %.SV118.phi1125, %bb42 ]		; <i32> [#uses=7]
+	%.SV135.phi1039 = phi i1 [ %78, %bb23.fragment182 ], [ undef, %bb42 ]		; <i1> [#uses=2]
+	%meshStackVariable433.phi = phi i32 [ %Opq.sa.calc744, %bb23.fragment182 ], [ %Opq.sa.calc560, %bb42 ]		; <i32> [#uses=1]
+	%Opq.link.SV799.phi = phi i32 [ %Opq.sa.calc744, %bb23.fragment182 ], [ %Opq.sa.calc987, %bb42 ]		; <i32> [#uses=1]
+	%.SV96.phi1038 = phi i1 [ %71, %bb23.fragment182 ], [ undef, %bb42 ]		; <i1> [#uses=1]
+	%.SV99.phi1037 = phi i32* [ %72, %bb23.fragment182 ], [ undef, %bb42 ]		; <i32*> [#uses=2]
+	%.SV104.phi1036 = phi i32 [ %73, %bb23.fragment182 ], [ %.SV104.phi1127, %bb42 ]		; <i32> [#uses=3]
+	%.SV111.phi1035 = phi i32* [ %74, %bb23.fragment182 ], [ %.SV111.phi1126, %bb42 ]		; <i32*> [#uses=3]
+	%Opq.link.mask801 = and i32 %Opq.link.SV799.phi, 6		; <i32> [#uses=1]
+	%Opq.sa.calc798 = xor i32 %Opq.link.mask801, 3		; <i32> [#uses=5]
+	%meshCmp435 = icmp eq i32 %meshStackVariable433.phi, 1		; <i1> [#uses=1]
+	br i1 %meshCmp435, label %bb43, label %bb39
+
+meshBB436:		; preds = %bb71.fragment, %bb65
+	%.SV38.phi1147 = phi i64 [ %.SV38.phi1144, %bb65 ], [ %.SV38.phi1140, %bb71.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1146 = phi i32 [ %.SV68.phi1142, %bb65 ], [ %.SV68.phi1139, %bb71.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1145 = phi i32 [ %.SV70.phi1141, %bb65 ], [ %.SV70.phi1138, %bb71.fragment ]		; <i32> [#uses=2]
+	%.SV.phi1067 = phi i32 [ undef, %bb65 ], [ %.load166.SV.phi, %bb71.fragment ]		; <i32> [#uses=1]
+	%yM.0.SV.phi1066 = phi i32 [ undef, %bb65 ], [ %183, %bb71.fragment ]		; <i32> [#uses=1]
+	%.load62.SV.phi = phi i32* [ %.SV52.phi1143, %bb65 ], [ undef, %bb71.fragment ]		; <i32*> [#uses=1]
+	%.SV268.phi = phi i32 [ %164, %bb65 ], [ undef, %bb71.fragment ]		; <i32> [#uses=2]
+	%meshStackVariable437.phi = phi i32 [ %Opq.sa.calc617, %bb65 ], [ %Opq.sa.calc809, %bb71.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV704.phi = phi i32 [ %Opq.sa.calc617, %bb65 ], [ %Opq.sa.calc809, %bb71.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask706 = and i32 %Opq.link.SV704.phi, 0		; <i32> [#uses=2]
+	%Opq.sa.calc703 = add i32 %Opq.link.mask706, 216		; <i32> [#uses=0]
+	%meshCmp439 = icmp eq i32 %meshStackVariable437.phi, 2		; <i1> [#uses=1]
+	br i1 %meshCmp439, label %bb96, label %bb65.fragment
+
+meshBB440:		; preds = %bb85, %bb54.fragment
+	%.SV52.phi1235 = phi i32* [ %.SV52.phi1213, %bb85 ], [ undef, %bb54.fragment ]		; <i32*> [#uses=2]
+	%.SV38.phi1210 = phi i64 [ %.SV38.phi1214, %bb85 ], [ %.SV38.phi1207, %bb54.fragment ]		; <i64> [#uses=2]
+	%.SV68.phi1209 = phi i32 [ %.SV68.phi1212, %bb85 ], [ %.SV68.phi1206, %bb54.fragment ]		; <i32> [#uses=2]
+	%.SV70.phi1208 = phi i32 [ %.SV70.phi1211, %bb85 ], [ %.SV70.phi1205, %bb54.fragment ]		; <i32> [#uses=2]
+	%.SV.phi1056 = phi i32 [ undef, %bb85 ], [ %.SV118.phi1122, %bb54.fragment ]		; <i32> [#uses=1]
+	%meshStackVariable441.phi = phi i32 [ %Opq.sa.calc666, %bb85 ], [ %Opq.sa.calc883, %bb54.fragment ]		; <i32> [#uses=1]
+	%Opq.link.SV991.phi = phi i32 [ %Opq.sa.calc665, %bb85 ], [ %Opq.sa.calc883, %bb54.fragment ]		; <i32> [#uses=1]
+	%Opq.link.mask993 = and i32 %Opq.link.SV991.phi, 6		; <i32> [#uses=1]
+	%Opq.sa.calc990 = xor i32 %Opq.link.mask993, 139		; <i32> [#uses=2]
+	%meshCmp443 = icmp eq i32 %meshStackVariable441.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp443, label %bb96, label %bb87
+
+meshBB444:		; preds = %bb66.fragment, %bb40
+	%.SV38.phi1224 = phi i64 [ %.SV38.phi1194, %bb66.fragment ], [ %.SV38.phi1179, %bb40 ]		; <i64> [#uses=2]
+	%.SV68.phi1223 = phi i32 [ %.SV68.phi1193, %bb66.fragment ], [ %.SV68.phi1177, %bb40 ]		; <i32> [#uses=2]
+	%.SV70.phi1222 = phi i32 [ %.SV70.phi1192, %bb66.fragment ], [ %.SV70.phi1176, %bb40 ]		; <i32> [#uses=2]
+	%.SV.phi1048 = phi i32 [ undef, %bb66.fragment ], [ %.SV118.phi1040, %bb40 ]		; <i32> [#uses=1]
+	%meshStackVariable445.phi = phi i32 [ %Opq.sa.calc794, %bb66.fragment ], [ %Opq.sa.calc554, %bb40 ]		; <i32> [#uses=1]
+	%Opq.link.SV781.phi = phi i32 [ %Opq.sa.calc795, %bb66.fragment ], [ %Opq.sa.calc554, %bb40 ]		; <i32> [#uses=1]
+	%Opq.link.mask783 = and i32 %Opq.link.SV781.phi, 10		; <i32> [#uses=1]
+	%Opq.sa.calc780 = add i32 %Opq.link.mask783, 1		; <i32> [#uses=0]
+	%meshCmp447 = icmp eq i32 %meshStackVariable445.phi, 11		; <i1> [#uses=1]
+	br i1 %meshCmp447, label %bb96, label %bb98
+
+meshBB448:		; preds = %bb35, %entry.fragment181
+	%.SV70.phi1233 = phi i32 [ undef, %entry.fragment181 ], [ %.SV70.phi1180, %bb35 ]		; <i32> [#uses=1]
+	%.SV104.phi1157 = phi i32 [ undef, %entry.fragment181 ], [ %.SV104.phi1084, %bb35 ]		; <i32> [#uses=1]
+	%.SV111.phi1156 = phi i32* [ undef, %entry.fragment181 ], [ %.SV111.phi1083, %bb35 ]		; <i32*> [#uses=1]
+	%.SV118.phi1155 = phi i32 [ undef, %entry.fragment181 ], [ %.SV118.phi1082, %bb35 ]		; <i32> [#uses=1]
+	%.SV68.phi1025 = phi i32 [ %10, %entry.fragment181 ], [ %.SV68.phi1181, %bb35 ]		; <i32> [#uses=1]
+	%meshStackVariable449.phi = phi i32 [ %Opq.sa.calc863, %entry.fragment181 ], [ %Opq.sa.calc541, %bb35 ]		; <i32> [#uses=1]
+	%Opq.link.SV959.phi = phi i32 [ %Opq.sa.calc863, %entry.fragment181 ], [ %Opq.sa.calc828, %bb35 ]		; <i32> [#uses=1]
+	%.SV38.phi1019 = phi i64 [ %4, %entry.fragment181 ], [ %.SV38.phi1183, %bb35 ]		; <i64> [#uses=1]
+	%.SV43.phi1018 = phi i32 [ %8, %entry.fragment181 ], [ %.SV43.phi1015, %bb35 ]		; <i32> [#uses=2]
+	%Opq.link.mask961 = and i32 %Opq.link.SV959.phi, 1		; <i32> [#uses=1]
+	%Opq.sa.calc958 = xor i32 %Opq.link.mask961, 63		; <i32> [#uses=3]
+	%Opq.sa.calc957 = xor i32 %Opq.sa.calc958, 126		; <i32> [#uses=1]
+	%meshCmp451 = icmp eq i32 %meshStackVariable449.phi, 5		; <i1> [#uses=1]
+	br i1 %meshCmp451, label %bb37, label %return
+
+meshBB452:		; preds = %bb70.fragment, %bb63
+	%.SV38.phi1110 = phi i64 [ %.SV38.phi1014, %bb70.fragment ], [ %.SV38.phi1098, %bb63 ]		; <i64> [#uses=3]
+	%.SV52.phi1109 = phi i32* [ undef, %bb70.fragment ], [ %.SV52.phi1097, %bb63 ]		; <i32*> [#uses=2]
+	%.SV68.phi1108 = phi i32 [ %.SV68.phi1020, %bb70.fragment ], [ %.SV68.phi1096, %bb63 ]		; <i32> [#uses=3]
+	%.SV70.phi1107 = phi i32 [ %.SV70.phi1026, %bb70.fragment ], [ %.SV70.phi1095, %bb63 ]		; <i32> [#uses=3]
+	%.SV158.phi1106 = phi i32 [ %.SV158.phi1090, %bb70.fragment ], [ undef, %bb63 ]		; <i32> [#uses=1]
+	%.SV162.phi1105 = phi i32* [ %.SV162.phi1089, %bb70.fragment ], [ undef, %bb63 ]		; <i32*> [#uses=1]
+	%.SV164.phi1104 = phi i32 [ %.SV164.phi1088, %bb70.fragment ], [ undef, %bb63 ]		; <i32> [#uses=1]
+	%.SV264.phi = phi %struct.Macroblock* [ undef, %bb70.fragment ], [ %157, %bb63 ]		; <%struct.Macroblock*> [#uses=1]
+	%.SV266.phi = phi i32 [ undef, %bb70.fragment ], [ %158, %bb63 ]		; <i32> [#uses=1]
+	%meshStackVariable453.phi = phi i32 [ %Opq.sa.calc739, %bb70.fragment ], [ %Opq.sa.calc611, %bb63 ]		; <i32> [#uses=1]
+	%Opq.link.SV821.phi = phi i32 [ %Opq.sa.calc897, %bb70.fragment ], [ %Opq.sa.calc611, %bb63 ]		; <i32> [#uses=1]
+	%.SV150.phi1060 = phi i32* [ undef, %bb70.fragment ], [ %148, %bb63 ]		; <i32*> [#uses=1]
+	%.SV152.phi1059 = phi i32* [ undef, %bb70.fragment ], [ %149, %bb63 ]		; <i32*> [#uses=2]
+	%.SV148.phi1057 = phi i32 [ undef, %bb70.fragment ], [ %147, %bb63 ]		; <i32> [#uses=1]
+	%Opq.link.mask823 = and i32 %Opq.link.SV821.phi, 4		; <i32> [#uses=2]
+	%Opq.sa.calc820 = sub i32 %Opq.link.mask823, -97		; <i32> [#uses=2]
+	%meshCmp455 = icmp eq i32 %meshStackVariable453.phi, 6		; <i1> [#uses=1]
+	br i1 %meshCmp455, label %bb63.fragment, label %bb71
+
+meshBB456:		; preds = %bb79, %bb63.fragment
+	%.SV38.phi1137 = phi i64 [ %.SV38.phi1110, %bb63.fragment ], [ %.SV38.phi1098, %bb79 ]		; <i64> [#uses=2]
+	%.SV52.phi1136 = phi i32* [ %.SV52.phi1109, %bb63.fragment ], [ %.SV52.phi1097, %bb79 ]		; <i32*> [#uses=2]
+	%.SV68.phi1135 = phi i32 [ %.SV68.phi1108, %bb63.fragment ], [ %.SV68.phi1096, %bb79 ]		; <i32> [#uses=2]
+	%.SV70.phi1134 = phi i32 [ %.SV70.phi1107, %bb63.fragment ], [ %.SV70.phi1095, %bb79 ]		; <i32> [#uses=2]
+	%.SV152.phi1133 = phi i32* [ %.SV152.phi1059, %bb63.fragment ], [ undef, %bb79 ]		; <i32*> [#uses=1]
+	%meshStackVariable457.phi = phi i32 [ %Opq.sa.calc890, %bb63.fragment ], [ %Opq.sa.calc651, %bb79 ]		; <i32> [#uses=1]
+	%Opq.link.SV817.phi = phi i32 [ %Opq.sa.calc891, %bb63.fragment ], [ %Opq.sa.calc651, %bb79 ]		; <i32> [#uses=1]
+	%Opq.link.mask819 = and i32 %Opq.link.SV817.phi, 2		; <i32> [#uses=1]
+	%Opq.sa.calc816 = add i32 %Opq.link.mask819, 186		; <i32> [#uses=2]
+	%meshCmp459 = icmp eq i32 %meshStackVariable457.phi, 10		; <i1> [#uses=1]
+	br i1 %meshCmp459, label %bb81, label %bb65
+}
diff --git a/final/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll b/final/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll
new file mode 100644
index 00000000000..d77e528fa7c
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9
+; PR4056
+
+define void @int163(i32 %p_4, i32 %p_5) nounwind {
+entry:
+	%0 = tail call i32 @bar(i32 1) nounwind		; <i32> [#uses=2]
+	%1 = icmp sgt i32 %0, 7		; <i1> [#uses=1]
+	br i1 %1, label %foo.exit, label %bb.i
+
+bb.i:		; preds = %entry
+	%2 = lshr i32 1, %0		; <i32> [#uses=1]
+	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+	%4 = zext i1 %3 to i32		; <i32> [#uses=1]
+	%.p_5 = shl i32 %p_5, %4		; <i32> [#uses=1]
+	br label %foo.exit
+
+foo.exit:		; preds = %bb.i, %entry
+	%5 = phi i32 [ %.p_5, %bb.i ], [ %p_5, %entry ]		; <i32> [#uses=1]
+	%6 = icmp eq i32 %5, 0		; <i1> [#uses=0]
+	%7 = tail call i32 @bar(i32 %p_5) nounwind		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @bar(i32)
diff --git a/final/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll b/final/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll
new file mode 100644
index 00000000000..f02565403e8
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-27-LiveIntervalsAssert2.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9
+; PR4051
+
+define void @int163(i32 %p_4, i32 %p_5) nounwind {
+entry:
+	%0 = tail call i32 @foo(i32 1) nounwind		; <i32> [#uses=2]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb.i, label %bar.exit
+
+bb.i:		; preds = %entry
+	%2 = lshr i32 1, %0		; <i32> [#uses=1]
+	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+	%retval.i = select i1 %3, i32 1, i32 %p_5		; <i32> [#uses=1]
+	br label %bar.exit
+
+bar.exit:		; preds = %bb.i, %entry
+	%4 = phi i32 [ %retval.i, %bb.i ], [ %p_5, %entry ]		; <i32> [#uses=1]
+	%5 = icmp eq i32 %4, 0		; <i1> [#uses=0]
+	%6 = tail call i32 @foo(i32 %p_5) nounwind		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @foo(i32)
diff --git a/final/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll b/final/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
new file mode 100644
index 00000000000..0a2fcdbf6c0
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
@@ -0,0 +1,165 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | grep cmpxchgl | not grep eax
+; PR4076
+
+	type { i8, i8, i8 }		; type %0
+	type { i32, i8** }		; type %1
+	type { %3* }		; type %2
+	type { %4 }		; type %3
+	type { %5 }		; type %4
+	type { %6, i32, %7 }		; type %5
+	type { i8* }		; type %6
+	type { i32, [12 x i8] }		; type %7
+	type { %9 }		; type %8
+	type { %10, %11*, i8 }		; type %9
+	type { %11* }		; type %10
+	type { i32, %6, i8*, %12, %13*, i8, i32, %28, %29, i32, %30, i32, i32, i32, i8*, i8*, i8, i8 }		; type %11
+	type { %13* }		; type %12
+	type { %14, i32, %13*, %21 }		; type %13
+	type { %15, %16 }		; type %14
+	type { i32 (...)** }		; type %15
+	type { %17, i8* (i32)*, void (i8*)*, i8 }		; type %16
+	type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %18 }		; type %17
+	type { %19* }		; type %18
+	type { i32, %20**, i32, %20**, i8** }		; type %19
+	type { i32 (...)**, i32 }		; type %20
+	type { %22, %25*, i8, i8, %17*, %26*, %27*, %27* }		; type %21
+	type { i32 (...)**, i32, i32, i32, i32, i32, %23*, %24, [8 x %24], i32, %24*, %18 }		; type %22
+	type { %23*, void (i32, %22*, i32)*, i32, i32 }		; type %23
+	type { i8*, i32 }		; type %24
+	type { i32 (...)**, %21 }		; type %25
+	type { %20, i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 }		; type %26
+	type { %20 }		; type %27
+	type { void (%9*)*, i32 }		; type %28
+	type { %15* }		; type %29
+	type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* }		; type %30
+@AtomicOps_Internalx86CPUFeatures = external global %0		; <%0*> [#uses=1]
+internal constant [19 x i8] c"xxxxxxxxxxxxxxxxxx\00"		; <[19 x i8]*>:0 [#uses=1]
+internal constant [47 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00"		; <[47 x i8]*>:1 [#uses=1]
+
+define i8** @func6(i8 zeroext, i32, i32, %1*) nounwind {
+; <label>:4
+	%5 = alloca i32, align 4		; <i32*> [#uses=2]
+	%6 = alloca i32, align 4		; <i32*> [#uses=2]
+	%7 = alloca %2, align 8		; <%2*> [#uses=3]
+	%8 = alloca %8, align 8		; <%8*> [#uses=2]
+	br label %17
+
+; <label>:9		; preds = %17
+	%10 = getelementptr %1* %3, i32 %19, i32 0		; <i32*> [#uses=1]
+	%11 = load i32* %10, align 4		; <i32> [#uses=1]
+	%12 = icmp eq i32 %11, %2		; <i1> [#uses=1]
+	br i1 %12, label %13, label %16
+
+; <label>:13		; preds = %9
+	%14 = getelementptr %1* %3, i32 %19, i32 1		; <i8***> [#uses=1]
+	%15 = load i8*** %14, align 4		; <i8**> [#uses=1]
+	ret i8** %15
+
+; <label>:16		; preds = %9
+	%indvar.next13 = add i32 %18, 1		; <i32> [#uses=1]
+	br label %17
+
+; <label>:17		; preds = %16, %4
+	%18 = phi i32 [ 0, %4 ], [ %indvar.next13, %16 ]		; <i32> [#uses=2]
+	%19 = add i32 %18, %1		; <i32> [#uses=3]
+	%20 = icmp sgt i32 %19, 3		; <i1> [#uses=1]
+	br i1 %20, label %21, label %9
+
+; <label>:21		; preds = %17
+	call void @func5()
+	%22 = getelementptr %1* %3, i32 0, i32 0		; <i32*> [#uses=1]
+	%23 = load i32* %22, align 4		; <i32> [#uses=1]
+	%24 = icmp eq i32 %23, 0		; <i1> [#uses=1]
+	br i1 %24, label %._crit_edge, label %._crit_edge1
+
+._crit_edge1:		; preds = %._crit_edge1, %21
+	%25 = phi i32 [ 0, %21 ], [ %26, %._crit_edge1 ]		; <i32> [#uses=1]
+	%26 = add i32 %25, 1		; <i32> [#uses=4]
+	%27 = getelementptr %1* %3, i32 %26, i32 0		; <i32*> [#uses=1]
+	%28 = load i32* %27, align 4		; <i32> [#uses=1]
+	%29 = icmp ne i32 %28, 0		; <i1> [#uses=1]
+	%30 = icmp ne i32 %26, 4		; <i1> [#uses=1]
+	%31 = and i1 %29, %30		; <i1> [#uses=1]
+	br i1 %31, label %._crit_edge1, label %._crit_edge
+
+._crit_edge:		; preds = %._crit_edge1, %21
+	%32 = phi i32 [ 0, %21 ], [ %26, %._crit_edge1 ]		; <i32> [#uses=3]
+	%33 = call i8* @pthread_getspecific(i32 0) nounwind		; <i8*> [#uses=2]
+	%34 = icmp ne i8* %33, null		; <i1> [#uses=1]
+	%35 = icmp eq i8 %0, 0		; <i1> [#uses=1]
+	%36 = or i1 %34, %35		; <i1> [#uses=1]
+	br i1 %36, label %._crit_edge4, label %37
+
+; <label>:37		; preds = %._crit_edge
+	%38 = call i8* @func2(i32 2048)		; <i8*> [#uses=4]
+	call void @llvm.memset.i32(i8* %38, i8 0, i32 2048, i32 4)
+	%39 = call i32 @pthread_setspecific(i32 0, i8* %38) nounwind		; <i32> [#uses=2]
+	store i32 %39, i32* %5
+	store i32 0, i32* %6
+	%40 = icmp eq i32 %39, 0		; <i1> [#uses=1]
+	br i1 %40, label %41, label %43
+
+; <label>:41		; preds = %37
+	%42 = getelementptr %2* %7, i32 0, i32 0		; <%3**> [#uses=1]
+	store %3* null, %3** %42, align 8
+	br label %._crit_edge4
+
+; <label>:43		; preds = %37
+	%44 = call %3* @func1(i32* %5, i32* %6, i8* getelementptr ([47 x i8]* @1, i32 0, i32 0))		; <%3*> [#uses=2]
+	%45 = getelementptr %2* %7, i32 0, i32 0		; <%3**> [#uses=1]
+	store %3* %44, %3** %45, align 8
+	%46 = icmp eq %3* %44, null		; <i1> [#uses=1]
+	br i1 %46, label %._crit_edge4, label %47
+
+; <label>:47		; preds = %43
+	call void @func4(%8* %8, i8* getelementptr ([19 x i8]* @0, i32 0, i32 0), i32 165, %2* %7)
+	call void @func3(%8* %8) noreturn
+	unreachable
+
+._crit_edge4:		; preds = %43, %41, %._crit_edge
+	%48 = phi i8* [ %38, %41 ], [ %33, %._crit_edge ], [ %38, %43 ]		; <i8*> [#uses=2]
+	%49 = bitcast i8* %48 to i8**		; <i8**> [#uses=3]
+	%50 = icmp ne i8* %48, null		; <i1> [#uses=1]
+	%51 = icmp slt i32 %32, 4		; <i1> [#uses=1]
+	%52 = and i1 %50, %51		; <i1> [#uses=1]
+	br i1 %52, label %53, label %._crit_edge6
+
+; <label>:53		; preds = %._crit_edge4
+	%54 = getelementptr %1* %3, i32 %32, i32 0		; <i32*> [#uses=1]
+	%55 = call i32 asm sideeffect "lock; cmpxchgl $1,$2", "={ax},q,*m,0,~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %2, i32* %54, i32 0) nounwind		; <i32> [#uses=1]
+	%56 = load i8* getelementptr (%0* @AtomicOps_Internalx86CPUFeatures, i32 0, i32 0), align 8		; <i8> [#uses=1]
+	%57 = icmp eq i8 %56, 0		; <i1> [#uses=1]
+	br i1 %57, label %._crit_edge7, label %58
+
+; <label>:58		; preds = %53
+	call void asm sideeffect "lfence", "~{dirflag},~{fpsr},~{flags},~{memory}"() nounwind
+	br label %._crit_edge7
+
+._crit_edge7:		; preds = %58, %53
+	%59 = icmp eq i32 %55, 0		; <i1> [#uses=1]
+	br i1 %59, label %60, label %._crit_edge6
+
+._crit_edge6:		; preds = %._crit_edge7, %._crit_edge4
+	ret i8** %49
+
+; <label>:60		; preds = %._crit_edge7
+	%61 = getelementptr %1* %3, i32 %32, i32 1		; <i8***> [#uses=1]
+	store i8** %49, i8*** %61, align 4
+	ret i8** %49
+}
+
+declare %3* @func1(i32* nocapture, i32* nocapture, i8*)
+
+declare void @func5()
+
+declare void @func4(%8*, i8*, i32, %2*)
+
+declare void @func3(%8*) noreturn
+
+declare i8* @pthread_getspecific(i32) nounwind
+
+declare i8* @func2(i32)
+
+declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind
+
+declare i32 @pthread_setspecific(i32, i8*) nounwind
diff --git a/final/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll b/final/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
new file mode 100644
index 00000000000..a2fd2e4c51c
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-29-IndirectDestOperands.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | grep {movl.*%ebx, 8(%esi)}
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.0"
+
+define void @cpuid(i32* %data) nounwind {
+entry:
+	%arrayidx = getelementptr i32* %data, i32 1		; <i32*> [#uses=1]
+	%arrayidx2 = getelementptr i32* %data, i32 2		; <i32*> [#uses=1]
+	%arrayidx4 = getelementptr i32* %data, i32 3		; <i32*> [#uses=1]
+	%arrayidx6 = getelementptr i32* %data, i32 4		; <i32*> [#uses=1]
+	%arrayidx8 = getelementptr i32* %data, i32 5		; <i32*> [#uses=1]
+	%tmp9 = load i32* %arrayidx8		; <i32> [#uses=1]
+	%arrayidx11 = getelementptr i32* %data, i32 6		; <i32*> [#uses=1]
+	%tmp12 = load i32* %arrayidx11		; <i32> [#uses=1]
+	%arrayidx14 = getelementptr i32* %data, i32 7		; <i32*> [#uses=1]
+	%tmp15 = load i32* %arrayidx14		; <i32> [#uses=1]
+	%arrayidx17 = getelementptr i32* %data, i32 8		; <i32*> [#uses=1]
+	%tmp18 = load i32* %arrayidx17		; <i32> [#uses=1]
+	%0 = call i32 asm "cpuid", "={ax},=*{bx},=*{cx},=*{dx},{ax},{bx},{cx},{dx},~{dirflag},~{fpsr},~{flags}"(i32* %arrayidx2, i32* %arrayidx4, i32* %arrayidx6, i32 %tmp9, i32 %tmp12, i32 %tmp15, i32 %tmp18) nounwind		; <i32> [#uses=1]
+	store i32 %0, i32* %arrayidx
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2009-04-29-LinearScanBug.ll b/final/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
new file mode 100644
index 00000000000..6843723052c
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
@@ -0,0 +1,215 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10
+; rdar://6837009
+
+	type { %struct.pf_state*, %struct.pf_state*, %struct.pf_state*, i32 }		; type %0
+	type { %2 }		; type %1
+	type { %struct.pf_addr, %struct.pf_addr }		; type %2
+	type { %struct.in6_addr }		; type %3
+	type { [4 x i32] }		; type %4
+	type { %struct.pfi_dynaddr*, [4 x i8] }		; type %5
+	type { %struct.pfi_dynaddr*, %struct.pfi_dynaddr** }		; type %6
+	type { %struct.pfr_ktable*, %struct.pfr_ktable*, %struct.pfr_ktable*, i32 }		; type %7
+	type { %struct.pfr_ktable* }		; type %8
+	type { i8* }		; type %9
+	type { %11 }		; type %10
+	type { i8*, i8*, %struct.radix_node* }		; type %11
+	type { [2 x %struct.pf_rulequeue], %13, %13 }		; type %12
+	type { %struct.pf_rulequeue*, %struct.pf_rule**, i32, i32, i32 }		; type %13
+	type { %struct.pf_anchor*, %struct.pf_anchor*, %struct.pf_anchor*, i32 }		; type %14
+	type { %struct.pfi_kif*, %struct.pfi_kif*, %struct.pfi_kif*, i32 }		; type %15
+	type { %struct.ifnet*, %struct.ifnet** }		; type %16
+	type { %18 }		; type %17
+	type { %struct.pkthdr, %19 }		; type %18
+	type { %struct.m_ext, [176 x i8] }		; type %19
+	type { %struct.ifmultiaddr*, %struct.ifmultiaddr** }		; type %20
+	type { i32, %22 }		; type %21
+	type { i8*, [4 x i8] }		; type %22
+	type { %struct.tcphdr* }		; type %23
+	type { %struct.pf_ike_state }		; type %24
+	type { %struct.pf_state_key*, %struct.pf_state_key*, %struct.pf_state_key*, i32 }		; type %25
+	type { %struct.pf_src_node*, %struct.pf_src_node*, %struct.pf_src_node*, i32 }		; type %26
+	%struct.anon = type { %struct.pf_state*, %struct.pf_state** }
+	%struct.au_mask_t = type { i32, i32 }
+	%struct.bpf_if = type opaque
+	%struct.dlil_threading_info = type opaque
+	%struct.ether_header = type { [6 x i8], [6 x i8], i16 }
+	%struct.ext_refsq = type { %struct.ext_refsq*, %struct.ext_refsq* }
+	%struct.hook_desc = type { %struct.hook_desc_head, void (i8*)*, i8* }
+	%struct.hook_desc_head = type { %struct.hook_desc*, %struct.hook_desc** }
+	%struct.if_data_internal = type { i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32, %struct.au_mask_t, i32, i32, i32 }
+	%struct.ifaddr = type { %struct.sockaddr*, %struct.sockaddr*, %struct.sockaddr*, %struct.ifnet*, %struct.ifaddrhead, void (i32, %struct.rtentry*, %struct.sockaddr*)*, i32, i32, i32, void (%struct.ifaddr*)*, void (%struct.ifaddr*, i32)*, i32 }
+	%struct.ifaddrhead = type { %struct.ifaddr*, %struct.ifaddr** }
+	%struct.ifmultiaddr = type { %20, %struct.sockaddr*, %struct.ifmultiaddr*, %struct.ifnet*, i32, i8*, i32, void (i8*)* }
+	%struct.ifmultihead = type { %struct.ifmultiaddr* }
+	%struct.ifnet = type { i8*, i8*, %16, %struct.ifaddrhead, i32, i32 (%struct.ifnet*, %struct.sockaddr*)*, i32, %struct.bpf_if*, i16, i16, i16, i16, i32, i8*, i32, %struct.if_data_internal, i32, i32 (%struct.ifnet*, %struct.mbuf*)*, i32 (%struct.ifnet*, i32, i8*)*, i32 (%struct.ifnet*, i32, i32 (%struct.ifnet*, %struct.mbuf*)*)*, void (%struct.ifnet*)*, i32 (%struct.ifnet*, %struct.mbuf*, i8*, i32*)*, void (%struct.ifnet*, %struct.kev_msg*)*, i32 (%struct.ifnet*, %struct.mbuf**, %struct.sockaddr*, i8*, i8*)*, i32, %struct.ifnet_filter_head, i32, i8*, i32, %struct.ifmultihead, i32, i32 (%struct.ifnet*, i32, %struct.ifnet_demux_desc*, i32)*, i32 (%struct.ifnet*, i32)*, %struct.proto_hash_entry*, i8*, %struct.dlil_threading_info*, i8*, %struct.ifqueue, [1 x i32], i32, %struct.ifprefixhead, %struct.lck_rw_t*, %21, i32, %struct.thread*, %struct.pfi_kif*, %struct.lck_mtx_t*, %struct.route }
+	%struct.ifnet_demux_desc = type { i32, i8*, i32 }
+	%struct.ifnet_filter = type opaque
+	%struct.ifnet_filter_head = type { %struct.ifnet_filter*, %struct.ifnet_filter** }
+	%struct.ifprefix = type { %struct.sockaddr*, %struct.ifnet*, %struct.ifprefixhead, i8, i8 }
+	%struct.ifprefixhead = type { %struct.ifprefix*, %struct.ifprefix** }
+	%struct.ifqueue = type { i8*, i8*, i32, i32, i32 }
+	%struct.in6_addr = type { %4 }
+	%struct.in_addr = type { i32 }
+	%struct.kev_d_vectors = type { i32, i8* }
+	%struct.kev_msg = type { i32, i32, i32, i32, [5 x %struct.kev_d_vectors] }
+	%struct.lck_mtx_t = type { [3 x i32] }
+	%struct.lck_rw_t = type <{ [3 x i32] }>
+	%struct.m_ext = type { i8*, void (i8*, i32, i8*)*, i32, i8*, %struct.ext_refsq, %struct.au_mask_t* }
+	%struct.m_hdr = type { %struct.mbuf*, %struct.mbuf*, i32, i8*, i16, i16 }
+	%struct.m_tag = type { %struct.packet_tags, i16, i16, i32 }
+	%struct.mbuf = type { %struct.m_hdr, %17 }
+	%struct.packet_tags = type { %struct.m_tag* }
+	%struct.pf_addr = type { %3 }
+	%struct.pf_addr_wrap = type <{ %1, %5, i8, i8, [6 x i8] }>
+	%struct.pf_anchor = type { %14, %14, %struct.pf_anchor*, %struct.pf_anchor_node, [64 x i8], [1024 x i8], %struct.pf_ruleset, i32, i32 }
+	%struct.pf_anchor_node = type { %struct.pf_anchor* }
+	%struct.pf_app_state = type { void (%struct.pf_state*, i32, i32, %struct.pf_pdesc*, %struct.pfi_kif*)*, i32 (%struct.pf_app_state*, %struct.pf_app_state*)*, i32 (%struct.pf_app_state*, %struct.pf_app_state*)*, %24 }
+	%struct.pf_ike_state = type { i64 }
+	%struct.pf_mtag = type { i8*, i32, i32, i16, i8, i8 }
+	%struct.pf_palist = type { %struct.pf_pooladdr*, %struct.pf_pooladdr** }
+	%struct.pf_pdesc = type { %struct.pf_threshold, i64, %23, %struct.pf_addr, %struct.pf_addr, %struct.pf_rule*, %struct.pf_addr*, %struct.pf_addr*, %struct.ether_header*, %struct.mbuf*, i32, %struct.pf_mtag*, i16*, i32, i16, i8, i8, i8, i8 }
+	%struct.pf_pool = type { %struct.pf_palist, [2 x i32], %struct.pf_pooladdr*, [4 x i8], %struct.in6_addr, %struct.pf_addr, i32, [2 x i16], i8, i8, [1 x i32] }
+	%struct.pf_pooladdr = type <{ %struct.pf_addr_wrap, %struct.pf_palist, [2 x i32], [16 x i8], %struct.pfi_kif*, [1 x i32] }>
+	%struct.pf_rule = type <{ %struct.pf_rule_addr, %struct.pf_rule_addr, [8 x %struct.pf_rule_ptr], [64 x i8], [16 x i8], [64 x i8], [64 x i8], [64 x i8], [64 x i8], [32 x i8], %struct.pf_rulequeue, [2 x i32], %struct.pf_pool, i64, [2 x i64], [2 x i64], %struct.pfi_kif*, [4 x i8], %struct.pf_anchor*, [4 x i8], %struct.pfr_ktable*, [4 x i8], i32, i32, [26 x i32], i32, i32, i32, i32, i32, i32, %struct.au_mask_t, i32, i32, i32, i32, i32, i32, i32, i16, i16, i16, i16, i16, [2 x i8], %struct.pf_rule_gid, %struct.pf_rule_gid, i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [2 x i8] }>
+	%struct.pf_rule_addr = type <{ %struct.pf_addr_wrap, %struct.pf_rule_xport, i8, [7 x i8] }>
+	%struct.pf_rule_gid = type { [2 x i32], i8, [3 x i8] }
+	%struct.pf_rule_ptr = type { %struct.pf_rule*, [4 x i8] }
+	%struct.pf_rule_xport = type { i32, [4 x i8] }
+	%struct.pf_rulequeue = type { %struct.pf_rule*, %struct.pf_rule** }
+	%struct.pf_ruleset = type { [5 x %12], %struct.pf_anchor*, i32, i32, i32 }
+	%struct.pf_src_node = type <{ %26, %struct.pf_addr, %struct.pf_addr, %struct.pf_rule_ptr, %struct.pfi_kif*, [2 x i64], [2 x i64], i32, i32, %struct.pf_threshold, i64, i64, i8, i8, [2 x i8] }>
+	%struct.pf_state = type <{ i64, i32, i32, %struct.anon, %struct.anon, %0, %struct.pf_state_peer, %struct.pf_state_peer, %struct.pf_rule_ptr, %struct.pf_rule_ptr, %struct.pf_rule_ptr, %struct.pf_addr, %struct.hook_desc_head, %struct.pf_state_key*, %struct.pfi_kif*, %struct.pfi_kif*, %struct.pf_src_node*, %struct.pf_src_node*, [2 x i64], [2 x i64], i64, i64, i64, i16, i8, i8, i8, i8, [6 x i8] }>
+	%struct.pf_state_host = type { %struct.pf_addr, %struct.in_addr }
+	%struct.pf_state_key = type { %struct.pf_state_host, %struct.pf_state_host, %struct.pf_state_host, i8, i8, i8, i8, %struct.pf_app_state*, %25, %25, %struct.anon, i16 }
+	%struct.pf_state_peer = type { i32, i32, i32, i16, i8, i8, i16, i8, %struct.pf_state_scrub*, [3 x i8] }
+	%struct.pf_state_scrub = type { %struct.au_mask_t, i32, i32, i32, i16, i8, i8, i32 }
+	%struct.pf_threshold = type { i32, i32, i32, i32 }
+	%struct.pfi_dynaddr = type { %6, %struct.pf_addr, %struct.pf_addr, %struct.pf_addr, %struct.pf_addr, %struct.pfr_ktable*, %struct.pfi_kif*, i8*, i32, i32, i32, i8, i8 }
+	%struct.pfi_kif = type { [16 x i8], %15, [2 x [2 x [2 x i64]]], [2 x [2 x [2 x i64]]], i64, i32, i8*, %struct.ifnet*, i32, i32, %6 }
+	%struct.pfr_ktable = type { %struct.pfr_tstats, %7, %8, %struct.radix_node_head*, %struct.radix_node_head*, %struct.pfr_ktable*, %struct.pfr_ktable*, %struct.pf_ruleset*, i64, i32 }
+	%struct.pfr_table = type { [1024 x i8], [32 x i8], i32, i8 }
+	%struct.pfr_tstats = type { %struct.pfr_table, [2 x [3 x i64]], [2 x [3 x i64]], i64, i64, i64, i32, [2 x i32] }
+	%struct.pkthdr = type { i32, %struct.ifnet*, i8*, i32, i32, i32, i16, i16, %struct.packet_tags }
+	%struct.proto_hash_entry = type opaque
+	%struct.radix_mask = type { i16, i8, i8, %struct.radix_mask*, %9, i32 }
+	%struct.radix_node = type { %struct.radix_mask*, %struct.radix_node*, i16, i8, i8, %10 }
+	%struct.radix_node_head = type { %struct.radix_node*, i32, i32, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*, %struct.radix_node*)*, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*, %struct.radix_node*)*, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*)*, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*)*, %struct.radix_node* (i8*, %struct.radix_node_head*)*, %struct.radix_node* (i8*, %struct.radix_node_head*, i32 (%struct.radix_node*, i8*)*, i8*)*, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*)*, %struct.radix_node* (i8*, i8*, %struct.radix_node_head*, i32 (%struct.radix_node*, i8*)*, i8*)*, %struct.radix_node* (i8*, %struct.radix_node_head*)*, i32 (%struct.radix_node_head*, i32 (%struct.radix_node*, i8*)*, i8*)*, i32 (%struct.radix_node_head*, i8*, i8*, i32 (%struct.radix_node*, i8*)*, i8*)*, void (%struct.radix_node*, %struct.radix_node_head*)*, [3 x %struct.radix_node], i32 }
+	%struct.route = type { %struct.rtentry*, i32, %struct.sockaddr }
+	%struct.rt_metrics = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [4 x i32] }
+	%struct.rtentry = type { [2 x %struct.radix_node], %struct.sockaddr*, i32, i32, %struct.ifnet*, %struct.ifaddr*, %struct.sockaddr*, i8*, void (i8*)*, %struct.rt_metrics, %struct.rtentry*, %struct.rtentry*, i32, %struct.lck_mtx_t }
+	%struct.sockaddr = type { i8, i8, [14 x i8] }
+	%struct.tcphdr = type { i16, i16, i32, i32, i8, i8, i16, i16, i16 }
+	%struct.thread = type opaque
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.pf_state_key*, %struct.pf_state_key*)* @pf_state_compare_ext_gwy to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define fastcc i32 @pf_state_compare_ext_gwy(%struct.pf_state_key* nocapture %a, %struct.pf_state_key* nocapture %b) nounwind optsize ssp {
+entry:
+	%0 = zext i8 0 to i32		; <i32> [#uses=2]
+	%1 = load i8* null, align 1		; <i8> [#uses=2]
+	%2 = zext i8 %1 to i32		; <i32> [#uses=1]
+	%3 = sub i32 %0, %2		; <i32> [#uses=1]
+	%4 = icmp eq i8 0, %1		; <i1> [#uses=1]
+	br i1 %4, label %bb1, label %bb79
+
+bb1:		; preds = %entry
+	%5 = load i8* null, align 4		; <i8> [#uses=2]
+	%6 = zext i8 %5 to i32		; <i32> [#uses=2]
+	%7 = getelementptr %struct.pf_state_key* %b, i32 0, i32 3		; <i8*> [#uses=1]
+	%8 = load i8* %7, align 4		; <i8> [#uses=2]
+	%9 = zext i8 %8 to i32		; <i32> [#uses=1]
+	%10 = sub i32 %6, %9		; <i32> [#uses=1]
+	%11 = icmp eq i8 %5, %8		; <i1> [#uses=1]
+	br i1 %11, label %bb3, label %bb79
+
+bb3:		; preds = %bb1
+	switch i32 %0, label %bb23 [
+		i32 1, label %bb4
+		i32 6, label %bb6
+		i32 17, label %bb10
+		i32 47, label %bb17
+		i32 50, label %bb21
+		i32 58, label %bb4
+	]
+
+bb4:		; preds = %bb3, %bb3
+	%12 = load i16* null, align 4		; <i16> [#uses=1]
+	%13 = zext i16 %12 to i32		; <i32> [#uses=1]
+	%14 = sub i32 0, %13		; <i32> [#uses=1]
+	br i1 false, label %bb23, label %bb79
+
+bb6:		; preds = %bb3
+	%15 = load i16* null, align 4		; <i16> [#uses=1]
+	%16 = zext i16 %15 to i32		; <i32> [#uses=1]
+	%17 = sub i32 0, %16		; <i32> [#uses=1]
+	ret i32 %17
+
+bb10:		; preds = %bb3
+	%18 = load i8* null, align 1		; <i8> [#uses=2]
+	%19 = zext i8 %18 to i32		; <i32> [#uses=1]
+	%20 = sub i32 0, %19		; <i32> [#uses=1]
+	%21 = icmp eq i8 0, %18		; <i1> [#uses=1]
+	br i1 %21, label %bb12, label %bb79
+
+bb12:		; preds = %bb10
+	%22 = load i16* null, align 4		; <i16> [#uses=1]
+	%23 = zext i16 %22 to i32		; <i32> [#uses=1]
+	%24 = sub i32 0, %23		; <i32> [#uses=1]
+	ret i32 %24
+
+bb17:		; preds = %bb3
+	%25 = load i8* null, align 1		; <i8> [#uses=2]
+	%26 = icmp eq i8 %25, 1		; <i1> [#uses=1]
+	br i1 %26, label %bb18, label %bb23
+
+bb18:		; preds = %bb17
+	%27 = icmp eq i8 %25, 0		; <i1> [#uses=1]
+	br i1 %27, label %bb19, label %bb23
+
+bb19:		; preds = %bb18
+	%28 = load i16* null, align 4		; <i16> [#uses=1]
+	%29 = zext i16 %28 to i32		; <i32> [#uses=1]
+	%30 = sub i32 0, %29		; <i32> [#uses=1]
+	br i1 false, label %bb23, label %bb79
+
+bb21:		; preds = %bb3
+	%31 = getelementptr %struct.pf_state_key* %a, i32 0, i32 1, i32 1, i32 0		; <i32*> [#uses=1]
+	%32 = load i32* %31, align 4		; <i32> [#uses=2]
+	%33 = getelementptr %struct.pf_state_key* %b, i32 0, i32 1, i32 1, i32 0		; <i32*> [#uses=1]
+	%34 = load i32* %33, align 4		; <i32> [#uses=2]
+	%35 = sub i32 %32, %34		; <i32> [#uses=1]
+	%36 = icmp eq i32 %32, %34		; <i1> [#uses=1]
+	br i1 %36, label %bb23, label %bb79
+
+bb23:		; preds = %bb21, %bb19, %bb18, %bb17, %bb4, %bb3
+	%cond = icmp eq i32 %6, 2		; <i1> [#uses=1]
+	br i1 %cond, label %bb24, label %bb70
+
+bb24:		; preds = %bb23
+	ret i32 1
+
+bb70:		; preds = %bb23
+	%37 = load i32 (%struct.pf_app_state*, %struct.pf_app_state*)** null, align 4		; <i32 (%struct.pf_app_state*, %struct.pf_app_state*)*> [#uses=3]
+	br i1 false, label %bb78, label %bb73
+
+bb73:		; preds = %bb70
+	%38 = load i32 (%struct.pf_app_state*, %struct.pf_app_state*)** null, align 4		; <i32 (%struct.pf_app_state*, %struct.pf_app_state*)*> [#uses=2]
+	%39 = icmp eq i32 (%struct.pf_app_state*, %struct.pf_app_state*)* %38, null		; <i1> [#uses=1]
+	br i1 %39, label %bb78, label %bb74
+
+bb74:		; preds = %bb73
+	%40 = ptrtoint i32 (%struct.pf_app_state*, %struct.pf_app_state*)* %37 to i32		; <i32> [#uses=1]
+	%41 = sub i32 0, %40		; <i32> [#uses=1]
+	%42 = icmp eq i32 (%struct.pf_app_state*, %struct.pf_app_state*)* %38, %37		; <i1> [#uses=1]
+	br i1 %42, label %bb76, label %bb79
+
+bb76:		; preds = %bb74
+	%43 = tail call i32 %37(%struct.pf_app_state* null, %struct.pf_app_state* null) nounwind		; <i32> [#uses=1]
+	ret i32 %43
+
+bb78:		; preds = %bb73, %bb70
+	ret i32 0
+
+bb79:		; preds = %bb74, %bb21, %bb19, %bb10, %bb4, %bb1, %entry
+	%.0 = phi i32 [ %3, %entry ], [ %10, %bb1 ], [ %14, %bb4 ], [ %20, %bb10 ], [ %30, %bb19 ], [ %35, %bb21 ], [ %41, %bb74 ]		; <i32> [#uses=1]
+	ret i32 %.0
+}
diff --git a/final/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll b/final/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
new file mode 100644
index 00000000000..d1f9cf83307
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
@@ -0,0 +1,117 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -disable-fp-elim -relocation-model=pic
+; PR4099
+
+	type { [62 x %struct.Bitvec*] }		; type %0
+	type { i8* }		; type %1
+	type { double }		; type %2
+	%struct..5sPragmaType = type { i8*, i32 }
+	%struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 }
+	%struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* }
+	%struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 }
+	%struct.AuxData = type { i8*, void (i8*)* }
+	%struct.Bitvec = type { i32, i32, i32, %0 }
+	%struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* }
+	%struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* }
+	%struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* }
+	%struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* }
+	%struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] }
+	%struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 }
+	%struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 }
+	%struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* }
+	%struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 }
+	%struct.Context = type { i64, i32, %struct.Fifo }
+	%struct.CountCtx = type { i64 }
+	%struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* }
+	%struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* }
+	%struct.DbPage = type { %struct.Pager*, i32, %struct.DbPage*, %struct.DbPage*, %struct.PagerLruLink, %struct.DbPage*, i8, i8, i8, i8, i8, i16, %struct.DbPage*, %struct.DbPage*, i8* }
+	%struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..5sPragmaType, %struct..5sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 }
+	%struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* }
+	%struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 }
+	%struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 }
+	%struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* }
+	%struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] }
+	%struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] }
+	%struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* }
+	%struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 }
+	%struct.IdList = type { %struct..5sPragmaType*, i32, i32 }
+	%struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** }
+	%struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] }
+	%struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* }
+	%struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.DbPage*, i32, %struct.MemPage* }
+	%struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* }
+	%struct.Op = type { i8, i8, i8, i8, i32, i32, i32, %1 }
+	%struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.DbPage*, %struct.DbPage*, %struct.DbPage*, i64, i64, i64, i64, i64, i32, void (%struct.DbPage*, i32)*, void (%struct.DbPage*, i32)*, i32, %struct.DbPage**, i8*, [16 x i8] }
+	%struct.PagerLruLink = type { %struct.DbPage*, %struct.DbPage* }
+	%struct.PagerLruList = type { %struct.DbPage*, %struct.DbPage*, %struct.DbPage* }
+	%struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* }
+	%struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] }
+	%struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] }
+	%struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 }
+	%struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* }
+	%struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..5sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* }
+	%struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..5sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* }
+	%struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 }
+	%struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] }
+	%struct._OvflCell = type { i8*, i16 }
+	%struct._ht = type { i32, %struct.HashElem* }
+	%struct.sColMap = type { i32, i8* }
+	%struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %2, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 }
+	%struct.sqlite3InitInfo = type { i32, i32, i8 }
+	%struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* }
+	%struct.sqlite3_file = type { %struct.sqlite3_io_methods* }
+	%struct.sqlite3_index_constraint = type { i32, i8, i8, i32 }
+	%struct.sqlite3_index_constraint_usage = type { i32, i8 }
+	%struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double }
+	%struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* }
+	%struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* }
+	%struct.sqlite3_mutex = type opaque
+	%struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* }
+	%struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* }
+	%struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* }
+
+define fastcc void @dropCell(%struct.MemPage* nocapture %pPage, i32 %idx, i32 %sz) nounwind ssp {
+entry:
+	%0 = load i8** null, align 8		; <i8*> [#uses=4]
+	%1 = or i32 0, 0		; <i32> [#uses=1]
+	%2 = icmp slt i32 %sz, 4		; <i1> [#uses=1]
+	%size_addr.0.i = select i1 %2, i32 4, i32 %sz		; <i32> [#uses=1]
+	br label %bb3.i
+
+bb3.i:		; preds = %bb3.i, %entry
+	%3 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	%or.cond.i = or i1 %3, false		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %bb5.i, label %bb3.i
+
+bb5.i:		; preds = %bb3.i
+	%4 = getelementptr i8* %0, i64 0		; <i8*> [#uses=1]
+	store i8 0, i8* %4, align 1
+	%5 = getelementptr i8* %0, i64 0		; <i8*> [#uses=1]
+	store i8 0, i8* %5, align 1
+	%6 = add i32 %1, 2		; <i32> [#uses=1]
+	%7 = zext i32 %6 to i64		; <i64> [#uses=2]
+	%8 = getelementptr i8* %0, i64 %7		; <i8*> [#uses=1]
+	%9 = lshr i32 %size_addr.0.i, 8		; <i32> [#uses=1]
+	%10 = trunc i32 %9 to i8		; <i8> [#uses=1]
+	store i8 %10, i8* %8, align 1
+	%.sum31.i = add i64 %7, 1		; <i64> [#uses=1]
+	%11 = getelementptr i8* %0, i64 %.sum31.i		; <i8*> [#uses=1]
+	store i8 0, i8* %11, align 1
+	br label %bb11.outer.i
+
+bb11.outer.i:		; preds = %bb11.outer.i, %bb5.i
+	%12 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %12, label %bb12.i, label %bb11.outer.i
+
+bb12.i:		; preds = %bb11.outer.i
+	%i.08 = add i32 %idx, 1		; <i32> [#uses=1]
+	%13 = icmp sgt i32 0, %i.08		; <i1> [#uses=1]
+	br i1 %13, label %bb, label %bb2
+
+bb:		; preds = %bb12.i
+	br label %bb2
+
+bb2:		; preds = %bb, %bb12.i
+	%14 = getelementptr %struct.MemPage* %pPage, i64 0, i32 1		; <i8*> [#uses=1]
+	store i8 1, i8* %14, align 1
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2009-04-scale.ll b/final/test/CodeGen/X86/2009-04-scale.ll
new file mode 100644
index 00000000000..e4c756cfdd4
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-04-scale.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-unknown-linux-gnu
+; PR3995
+
+        %struct.vtable = type { i32 (...)** }
+	%struct.array = type { %struct.impl, [256 x %struct.pair], [256 x %struct.pair], [256 x %struct.pair], [256 x %struct.pair], [256 x %struct.pair], [256 x %struct.pair] }
+	%struct.impl = type { %struct.vtable, i8, %struct.impl*, i32, i32, i64, i64 }
+	%struct.pair = type { i64, i64 }
+
+define void @test() {
+entry:
+	%0 = load i32* null, align 4		; <i32> [#uses=1]
+	%1 = lshr i32 %0, 8		; <i32> [#uses=1]
+	%2 = and i32 %1, 255		; <i32> [#uses=1]
+	%3 = getelementptr %struct.array* null, i32 0, i32 3		; <[256 x %struct.pair]*> [#uses=1]
+	%4 = getelementptr [256 x %struct.pair]* %3, i32 0, i32 %2		; <%struct.pair*> [#uses=1]
+	%5 = getelementptr %struct.pair* %4, i32 0, i32 1		; <i64*> [#uses=1]
+	%6 = load i64* %5, align 4		; <i64> [#uses=1]
+	%7 = xor i64 0, %6		; <i64> [#uses=1]
+	%8 = xor i64 %7, 0		; <i64> [#uses=1]
+	%9 = xor i64 %8, 0		; <i64> [#uses=1]
+	store i64 %9, i64* null, align 8
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll b/final/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
new file mode 100644
index 00000000000..738b5fbb704
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-05-08-InlineAsmIOffset.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -relocation-model=static > %t
+; RUN: grep "1: ._pv_cpu_ops+8" %t
+; RUN: grep "2: ._G" %t
+; PR4152
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+	%struct.pv_cpu_ops = type { i32, [2 x i32] }
+@pv_cpu_ops = external global %struct.pv_cpu_ops		; <%struct.pv_cpu_ops*> [#uses=1]
+@G = external global i32		; <i32*> [#uses=1]
+
+define void @x() nounwind {
+entry:
+	tail call void asm sideeffect "1: $0", "i,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr (%struct.pv_cpu_ops* @pv_cpu_ops, i32 0, i32 1, i32 1)) nounwind
+	tail call void asm sideeffect "2: $0", "i,~{dirflag},~{fpsr},~{flags}"(i32* @G) nounwind
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll b/final/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
new file mode 100644
index 00000000000..a5e28c07486
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-05-11-tailmerge-crash.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86
+; PR4188
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+@g_9 = external global i32		; <i32*> [#uses=1]
+
+define i32 @int86(i32 %p_87) nounwind {
+entry:
+	%0 = trunc i32 %p_87 to i8		; <i8> [#uses=1]
+	%1 = icmp ne i8 %0, 0		; <i1> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb.i, %bb, %entry
+	%2 = volatile load i32* @g_9, align 4		; <i32> [#uses=2]
+	%3 = icmp sgt i32 %2, 1		; <i1> [#uses=1]
+	%4 = and i1 %3, %1		; <i1> [#uses=1]
+	br i1 %4, label %bb.i, label %bb
+
+bb.i:		; preds = %bb
+	%5 = icmp sgt i32 0, %2		; <i1> [#uses=0]
+	br label %bb
+}
diff --git a/final/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll b/final/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
new file mode 100644
index 00000000000..6e062fb2508
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-05-19-SingleElementExtractElement.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64
+; PR3886
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+        %a = call <1 x i64> @bar()
+        %tmp5.i = extractelement <1 x i64> %a, i32 0
+        %tmp11 = bitcast i64 %tmp5.i to <1 x i64>
+        %tmp8 = extractelement <1 x i64> %tmp11, i32 0
+        %call6 = call i32 (i64)* @foo(i64 %tmp8)
+        ret i32 undef
+}
+
+declare i32 @foo(i64)
+
+declare <1 x i64> @bar()
diff --git a/final/test/CodeGen/X86/2009-05-23-available_externally.ll b/final/test/CodeGen/X86/2009-05-23-available_externally.ll
new file mode 100644
index 00000000000..94773d91ea1
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-05-23-available_externally.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -relocation-model=pic | grep atoi | grep PLT
+; PR4253
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i8* %x) nounwind readonly {
+entry:
+	%call = tail call fastcc i32 @atoi(i8* %x) nounwind readonly		; <i32> [#uses=1]
+	ret i32 %call
+}
+
+define available_externally fastcc i32 @atoi(i8* %__nptr) nounwind readonly {
+entry:
+	%call = tail call i64 @strtol(i8* nocapture %__nptr, i8** null, i32 10) nounwind readonly		; <i64> [#uses=1]
+	%conv = trunc i64 %call to i32		; <i32> [#uses=1]
+	ret i32 %conv
+}
+
+declare i64 @strtol(i8*, i8** nocapture, i32) nounwind
diff --git a/final/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/final/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
new file mode 100644
index 00000000000..3cd54169745
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s | FileCheck %s
+
+; Check that the shr(shl X, 56), 48) is not mistakenly turned into
+; a shr (X, -8) that gets subsequently "optimized away" as undef
+; PR4254
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i64 @foo(i64 %b) nounwind readnone {
+entry:
+; CHECK: foo:
+; CHECK: shlq $56, %rdi
+; CHECK: sarq $48, %rdi
+; CHECK: leaq 1(%rdi), %rax
+	%shl = shl i64 %b, 56		; <i64> [#uses=1]
+	%shr = ashr i64 %shl, 48		; <i64> [#uses=1]
+	%add5 = or i64 %shr, 1		; <i64> [#uses=1]
+	ret i64 %add5
+}
diff --git a/final/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll b/final/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
new file mode 100644
index 00000000000..1d146207549
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-05-28-DAGCombineCrash.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64
+
+define fastcc void @S_next_symbol(i448* %P) nounwind ssp {
+entry:
+	br label %bb14
+
+bb14:		; preds = %bb
+	%srcval16 = load i448* %P, align 8		; <i448> [#uses=1]
+	%tmp = zext i32 undef to i448		; <i448> [#uses=1]
+	%tmp15 = shl i448 %tmp, 288		; <i448> [#uses=1]
+	%mask = and i448 %srcval16, -2135987035423586845985235064014169866455883682256196619149693890381755748887481053010428711403521		; <i448> [#uses=1]
+	%ins = or i448 %tmp15, %mask		; <i448> [#uses=1]
+	store i448 %ins, i448* %P, align 8
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2009-05-30-ISelBug.ll b/final/test/CodeGen/X86/2009-05-30-ISelBug.ll
new file mode 100644
index 00000000000..af552d4ce20
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-05-30-ISelBug.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86-64 | not grep {movzbl	%\[abcd\]h,}
+
+define void @BZ2_bzDecompress_bb5_2E_outer_bb35_2E_i_bb54_2E_i(i32*, i32 %c_nblock_used.2.i, i32 %.reload51, i32* %.out, i32* %.out1, i32* %.out2, i32* %.out3) nounwind {
+newFuncRoot:
+	br label %bb54.i
+
+bb35.i.backedge.exitStub:		; preds = %bb54.i
+	store i32 %6, i32* %.out
+	store i32 %10, i32* %.out1
+	store i32 %11, i32* %.out2
+	store i32 %12, i32* %.out3
+	ret void
+
+bb54.i:		; preds = %newFuncRoot
+	%1 = zext i32 %.reload51 to i64		; <i64> [#uses=1]
+	%2 = getelementptr i32* %0, i64 %1		; <i32*> [#uses=1]
+	%3 = load i32* %2, align 4		; <i32> [#uses=2]
+	%4 = lshr i32 %3, 8		; <i32> [#uses=1]
+	%5 = and i32 %3, 255		; <i32> [#uses=1]
+	%6 = add i32 %5, 4		; <i32> [#uses=1]
+	%7 = zext i32 %4 to i64		; <i64> [#uses=1]
+	%8 = getelementptr i32* %0, i64 %7		; <i32*> [#uses=1]
+	%9 = load i32* %8, align 4		; <i32> [#uses=2]
+	%10 = and i32 %9, 255		; <i32> [#uses=1]
+	%11 = lshr i32 %9, 8		; <i32> [#uses=1]
+	%12 = add i32 %c_nblock_used.2.i, 5		; <i32> [#uses=1]
+	br label %bb35.i.backedge.exitStub
+}
diff --git a/final/test/CodeGen/X86/2009-06-02-RewriterBug.ll b/final/test/CodeGen/X86/2009-06-02-RewriterBug.ll
new file mode 100644
index 00000000000..779f9857de7
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-06-02-RewriterBug.ll
@@ -0,0 +1,362 @@
+; RUN: llc < %s -mtriple=x86_64-undermydesk-freebsd8.0 -relocation-model=pic -disable-fp-elim
+; PR4225
+
+define void @sha256_block1(i32* nocapture %arr, i8* nocapture %in, i64 %num) nounwind {
+entry:
+	br i1 undef, label %while.end, label %bb.nph
+
+bb.nph:		; preds = %entry
+	br label %while.body
+
+while.body:		; preds = %for.end, %bb.nph
+	%indvar2787 = phi i64 [ 0, %bb.nph ], [ %indvar.next2788, %for.end ]		; <i64> [#uses=2]
+	%tmp2791 = mul i64 %indvar2787, 44		; <i64> [#uses=0]
+	%ctg22996 = getelementptr i8* %in, i64 0		; <i8*> [#uses=1]
+	%conv = zext i32 undef to i64		; <i64> [#uses=1]
+	%conv11 = zext i32 undef to i64		; <i64> [#uses=1]
+	%tmp18 = load i32* undef		; <i32> [#uses=1]
+	%conv19 = zext i32 %tmp18 to i64		; <i64> [#uses=1]
+	%tmp30 = load i32* undef		; <i32> [#uses=1]
+	%conv31 = zext i32 %tmp30 to i64		; <i64> [#uses=4]
+	%ptrincdec3065 = load i8* null		; <i8> [#uses=1]
+	%conv442709 = zext i8 %ptrincdec3065 to i64		; <i64> [#uses=1]
+	%shl45 = shl i64 %conv442709, 16		; <i64> [#uses=1]
+	%conv632707 = zext i8 undef to i64		; <i64> [#uses=1]
+	%or = or i64 %shl45, 0		; <i64> [#uses=1]
+	%or55 = or i64 %or, %conv632707		; <i64> [#uses=1]
+	%or64 = or i64 %or55, 0		; <i64> [#uses=1]
+	%shr85 = lshr i64 %conv31, 25		; <i64> [#uses=0]
+	%add = add i64 %conv11, 1508970993		; <i64> [#uses=1]
+	%add95 = add i64 %add, 0		; <i64> [#uses=1]
+	%add98 = add i64 %add95, 0		; <i64> [#uses=1]
+	%add99 = add i64 %add98, %or64		; <i64> [#uses=1]
+	%add134 = add i64 %add99, 0		; <i64> [#uses=4]
+	store i32 undef, i32* undef
+	%shl187 = shl i64 %add134, 21		; <i64> [#uses=0]
+	%and203 = and i64 %add134, %conv31		; <i64> [#uses=1]
+	%xor208 = xor i64 0, %and203		; <i64> [#uses=1]
+	%add212 = add i64 0, %xor208		; <i64> [#uses=1]
+	%add213 = add i64 %add212, 0		; <i64> [#uses=1]
+	%add248 = add i64 %add213, 0		; <i64> [#uses=3]
+	%conv2852690 = zext i8 undef to i64		; <i64> [#uses=1]
+	%or277 = or i64 0, %conv2852690		; <i64> [#uses=1]
+	%or286 = or i64 %or277, 0		; <i64> [#uses=1]
+	%neg319 = xor i64 %add248, 4294967295		; <i64> [#uses=1]
+	%and321 = and i64 %neg319, %conv31		; <i64> [#uses=1]
+	%xor322 = xor i64 %and321, 0		; <i64> [#uses=1]
+	%add314 = add i64 %conv, 2870763221		; <i64> [#uses=1]
+	%add323 = add i64 %add314, %or286		; <i64> [#uses=1]
+	%add326 = add i64 %add323, %xor322		; <i64> [#uses=1]
+	%add327 = add i64 %add326, 0		; <i64> [#uses=2]
+	%add362 = add i64 %add327, %conv19		; <i64> [#uses=4]
+	%add365 = add i64 0, %add327		; <i64> [#uses=3]
+	%shl409 = shl i64 %add362, 26		; <i64> [#uses=0]
+	%and431 = and i64 %add362, %add248		; <i64> [#uses=1]
+	%neg433 = xor i64 %add362, -1		; <i64> [#uses=1]
+	%and435 = and i64 %add134, %neg433		; <i64> [#uses=1]
+	%xor436 = xor i64 %and431, %and435		; <i64> [#uses=1]
+	%add428 = add i64 %conv31, 3624381080		; <i64> [#uses=1]
+	%add437 = add i64 %add428, 0		; <i64> [#uses=1]
+	%add440 = add i64 %add437, %xor436		; <i64> [#uses=1]
+	%add441 = add i64 %add440, 0		; <i64> [#uses=1]
+	%shl443 = shl i64 %add365, 30		; <i64> [#uses=1]
+	%and445 = lshr i64 %add365, 2		; <i64> [#uses=1]
+	%shr446 = and i64 %and445, 1073741823		; <i64> [#uses=1]
+	%or447 = or i64 %shr446, %shl443		; <i64> [#uses=1]
+	%xor461 = xor i64 0, %or447		; <i64> [#uses=1]
+	%add473 = add i64 %xor461, 0		; <i64> [#uses=1]
+	%add479 = add i64 %add473, %add441		; <i64> [#uses=3]
+	%conv4932682 = zext i8 undef to i64		; <i64> [#uses=1]
+	%shl494 = shl i64 %conv4932682, 16		; <i64> [#uses=1]
+	%ptrincdec4903012 = load i8* null		; <i8> [#uses=1]
+	%conv5032681 = zext i8 %ptrincdec4903012 to i64		; <i64> [#uses=1]
+	%shl504 = shl i64 %conv5032681, 8		; <i64> [#uses=1]
+	%ptrincdec5003009 = load i8* null		; <i8> [#uses=1]
+	%conv5132680 = zext i8 %ptrincdec5003009 to i64		; <i64> [#uses=1]
+	%or495 = or i64 %shl494, 0		; <i64> [#uses=1]
+	%or505 = or i64 %or495, %conv5132680		; <i64> [#uses=1]
+	%or514 = or i64 %or505, %shl504		; <i64> [#uses=1]
+	store i32 undef, i32* undef
+	%or540 = or i64 undef, 0		; <i64> [#uses=0]
+	%add542 = add i64 %add134, 310598401		; <i64> [#uses=1]
+	%add551 = add i64 %add542, %or514		; <i64> [#uses=1]
+	%add554 = add i64 %add551, 0		; <i64> [#uses=1]
+	%add555 = add i64 %add554, 0		; <i64> [#uses=1]
+	%or561 = or i64 undef, undef		; <i64> [#uses=1]
+	%or567 = or i64 undef, undef		; <i64> [#uses=1]
+	%and572 = lshr i64 %add479, 22		; <i64> [#uses=1]
+	%shr573 = and i64 %and572, 1023		; <i64> [#uses=1]
+	%or574 = or i64 %shr573, 0		; <i64> [#uses=1]
+	%xor568 = xor i64 %or567, %or574		; <i64> [#uses=1]
+	%xor575 = xor i64 %xor568, %or561		; <i64> [#uses=1]
+	%add587 = add i64 %xor575, 0		; <i64> [#uses=1]
+	%add593 = add i64 %add587, %add555		; <i64> [#uses=1]
+	%ptrincdec6043000 = load i8* null		; <i8> [#uses=1]
+	%conv6172676 = zext i8 %ptrincdec6043000 to i64		; <i64> [#uses=1]
+	%shl618 = shl i64 %conv6172676, 8		; <i64> [#uses=1]
+	%ptrincdec6142997 = load i8* %ctg22996		; <i8> [#uses=1]
+	%conv6272675 = zext i8 %ptrincdec6142997 to i64		; <i64> [#uses=1]
+	%or619 = or i64 0, %conv6272675		; <i64> [#uses=1]
+	%or628 = or i64 %or619, %shl618		; <i64> [#uses=1]
+	%add656 = add i64 %add248, 607225278		; <i64> [#uses=1]
+	%add665 = add i64 %add656, %or628		; <i64> [#uses=1]
+	%add668 = add i64 %add665, 0		; <i64> [#uses=1]
+	%add669 = add i64 %add668, 0		; <i64> [#uses=1]
+	%and699 = and i64 %add479, %add365		; <i64> [#uses=1]
+	%xor700 = xor i64 0, %and699		; <i64> [#uses=1]
+	%add701 = add i64 0, %xor700		; <i64> [#uses=1]
+	%add707 = add i64 %add701, %add669		; <i64> [#uses=4]
+	%ptrincdec6242994 = load i8* null		; <i8> [#uses=1]
+	%conv7122673 = zext i8 %ptrincdec6242994 to i64		; <i64> [#uses=1]
+	%shl713 = shl i64 %conv7122673, 24		; <i64> [#uses=1]
+	%conv7412670 = zext i8 undef to i64		; <i64> [#uses=1]
+	%or723 = or i64 0, %shl713		; <i64> [#uses=1]
+	%or733 = or i64 %or723, %conv7412670		; <i64> [#uses=1]
+	%or742 = or i64 %or733, 0		; <i64> [#uses=2]
+	%conv743 = trunc i64 %or742 to i32		; <i32> [#uses=1]
+	store i32 %conv743, i32* undef
+	%xor762 = xor i64 undef, 0		; <i64> [#uses=0]
+	%add770 = add i64 %add362, 1426881987		; <i64> [#uses=1]
+	%add779 = add i64 %add770, %or742		; <i64> [#uses=1]
+	%add782 = add i64 %add779, 0		; <i64> [#uses=1]
+	%add783 = add i64 %add782, 0		; <i64> [#uses=1]
+	%shl785 = shl i64 %add707, 30		; <i64> [#uses=1]
+	%and787 = lshr i64 %add707, 2		; <i64> [#uses=1]
+	%shr788 = and i64 %and787, 1073741823		; <i64> [#uses=1]
+	%or789 = or i64 %shr788, %shl785		; <i64> [#uses=1]
+	%shl791 = shl i64 %add707, 19		; <i64> [#uses=0]
+	%xor803 = xor i64 0, %or789		; <i64> [#uses=1]
+	%and813 = and i64 %add593, %add479		; <i64> [#uses=1]
+	%xor814 = xor i64 0, %and813		; <i64> [#uses=1]
+	%add815 = add i64 %xor803, %xor814		; <i64> [#uses=1]
+	%add821 = add i64 %add815, %add783		; <i64> [#uses=1]
+	%add1160 = add i64 0, %add707		; <i64> [#uses=0]
+	%add1157 = add i64 undef, undef		; <i64> [#uses=0]
+	%ptrincdec11742940 = load i8* null		; <i8> [#uses=1]
+	%conv11872651 = zext i8 %ptrincdec11742940 to i64		; <i64> [#uses=1]
+	%shl1188 = shl i64 %conv11872651, 8		; <i64> [#uses=1]
+	%or1198 = or i64 0, %shl1188		; <i64> [#uses=1]
+	store i32 undef, i32* undef
+	%add1226 = add i64 %or1198, 3248222580		; <i64> [#uses=1]
+	%add1235 = add i64 %add1226, 0		; <i64> [#uses=1]
+	%add1238 = add i64 %add1235, 0		; <i64> [#uses=1]
+	%add1239 = add i64 %add1238, 0		; <i64> [#uses=1]
+	br label %for.cond
+
+for.cond:		; preds = %for.body, %while.body
+	%add821.pn = phi i64 [ %add821, %while.body ], [ undef, %for.body ]		; <i64> [#uses=0]
+	%add1239.pn = phi i64 [ %add1239, %while.body ], [ 0, %for.body ]		; <i64> [#uses=0]
+	br i1 undef, label %for.end, label %for.body
+
+for.body:		; preds = %for.cond
+	br label %for.cond
+
+for.end:		; preds = %for.cond
+	%indvar.next2788 = add i64 %indvar2787, 1		; <i64> [#uses=1]
+	br i1 undef, label %while.end, label %while.body
+
+while.end:		; preds = %for.end, %entry
+	ret void
+}
+
+define void @sha256_block2(i32* nocapture %arr, i8* nocapture %in, i64 %num) nounwind {
+entry:
+	br i1 undef, label %while.end, label %bb.nph
+
+bb.nph:		; preds = %entry
+	%arrayidx5 = getelementptr i32* %arr, i64 1		; <i32*> [#uses=1]
+	%arrayidx9 = getelementptr i32* %arr, i64 2		; <i32*> [#uses=2]
+	%arrayidx13 = getelementptr i32* %arr, i64 3		; <i32*> [#uses=2]
+	%arrayidx25 = getelementptr i32* %arr, i64 6		; <i32*> [#uses=1]
+	%arrayidx29 = getelementptr i32* %arr, i64 7		; <i32*> [#uses=1]
+	br label %while.body
+
+while.body:		; preds = %for.end, %bb.nph
+	%tmp3 = load i32* %arr		; <i32> [#uses=2]
+	%conv = zext i32 %tmp3 to i64		; <i64> [#uses=1]
+	%tmp10 = load i32* %arrayidx9		; <i32> [#uses=1]
+	%conv11 = zext i32 %tmp10 to i64		; <i64> [#uses=1]
+	%tmp14 = load i32* %arrayidx13		; <i32> [#uses=3]
+	%conv15 = zext i32 %tmp14 to i64		; <i64> [#uses=2]
+	%tmp18 = load i32* undef		; <i32> [#uses=2]
+	%conv19 = zext i32 %tmp18 to i64		; <i64> [#uses=1]
+	%conv23 = zext i32 undef to i64		; <i64> [#uses=1]
+	%tmp26 = load i32* %arrayidx25		; <i32> [#uses=1]
+	%conv27 = zext i32 %tmp26 to i64		; <i64> [#uses=1]
+	%tmp30 = load i32* %arrayidx29		; <i32> [#uses=2]
+	%conv31 = zext i32 %tmp30 to i64		; <i64> [#uses=5]
+	%shl72 = shl i64 %conv31, 26		; <i64> [#uses=1]
+	%shr = lshr i64 %conv31, 6		; <i64> [#uses=1]
+	%or74 = or i64 %shl72, %shr		; <i64> [#uses=1]
+	%shr85 = lshr i64 %conv31, 25		; <i64> [#uses=0]
+	%xor87 = xor i64 0, %or74		; <i64> [#uses=1]
+	%and902706 = and i32 %tmp30, %tmp3		; <i32> [#uses=1]
+	%and90 = zext i32 %and902706 to i64		; <i64> [#uses=1]
+	%xor94 = xor i64 0, %and90		; <i64> [#uses=1]
+	%add = add i64 %conv11, 1508970993		; <i64> [#uses=1]
+	%add95 = add i64 %add, %xor94		; <i64> [#uses=1]
+	%add98 = add i64 %add95, %xor87		; <i64> [#uses=1]
+	%add99 = add i64 %add98, 0		; <i64> [#uses=2]
+	%xor130 = zext i32 undef to i64		; <i64> [#uses=1]
+	%add134 = add i64 %add99, %conv27		; <i64> [#uses=2]
+	%add131 = add i64 %xor130, 0		; <i64> [#uses=1]
+	%add137 = add i64 %add131, %add99		; <i64> [#uses=5]
+	%conv1422700 = zext i8 undef to i64		; <i64> [#uses=1]
+	%shl143 = shl i64 %conv1422700, 24		; <i64> [#uses=1]
+	%ptrincdec1393051 = load i8* undef		; <i8> [#uses=1]
+	%conv1512699 = zext i8 %ptrincdec1393051 to i64		; <i64> [#uses=1]
+	%shl152 = shl i64 %conv1512699, 16		; <i64> [#uses=1]
+	%conv1712697 = zext i8 undef to i64		; <i64> [#uses=1]
+	%or153 = or i64 %shl152, %shl143		; <i64> [#uses=1]
+	%or163 = or i64 %or153, %conv1712697		; <i64> [#uses=1]
+	%or172 = or i64 %or163, 0		; <i64> [#uses=1]
+	%and203 = and i64 %add134, %conv31		; <i64> [#uses=1]
+	%xor208 = xor i64 0, %and203		; <i64> [#uses=1]
+	%add200 = add i64 0, 2453635748		; <i64> [#uses=1]
+	%add209 = add i64 %add200, %or172		; <i64> [#uses=1]
+	%add212 = add i64 %add209, %xor208		; <i64> [#uses=1]
+	%add213 = add i64 %add212, 0		; <i64> [#uses=2]
+	%shl228 = shl i64 %add137, 10		; <i64> [#uses=1]
+	%and230 = lshr i64 %add137, 22		; <i64> [#uses=1]
+	%shr231 = and i64 %and230, 1023		; <i64> [#uses=1]
+	%or232 = or i64 %shr231, %shl228		; <i64> [#uses=1]
+	%xor226 = xor i64 0, %or232		; <i64> [#uses=1]
+	%xor233 = xor i64 %xor226, 0		; <i64> [#uses=1]
+	%and2362695 = zext i32 undef to i64		; <i64> [#uses=1]
+	%xor240 = and i64 %add137, %and2362695		; <i64> [#uses=1]
+	%and2432694 = and i32 %tmp18, %tmp14		; <i32> [#uses=1]
+	%and243 = zext i32 %and2432694 to i64		; <i64> [#uses=1]
+	%xor244 = xor i64 %xor240, %and243		; <i64> [#uses=1]
+	%add248 = add i64 %add213, %conv23		; <i64> [#uses=2]
+	%add245 = add i64 %xor233, %xor244		; <i64> [#uses=1]
+	%add251 = add i64 %add245, %add213		; <i64> [#uses=1]
+	%conv2752691 = zext i8 undef to i64		; <i64> [#uses=1]
+	%shl276 = shl i64 %conv2752691, 8		; <i64> [#uses=0]
+	%and317 = and i64 %add248, %add134		; <i64> [#uses=1]
+	%neg319 = xor i64 %add248, 4294967295		; <i64> [#uses=1]
+	%and321 = and i64 %neg319, %conv31		; <i64> [#uses=1]
+	%xor322 = xor i64 %and321, %and317		; <i64> [#uses=1]
+	%add314 = add i64 %conv, 2870763221		; <i64> [#uses=1]
+	%add323 = add i64 %add314, 0		; <i64> [#uses=1]
+	%add326 = add i64 %add323, %xor322		; <i64> [#uses=1]
+	%add327 = add i64 %add326, 0		; <i64> [#uses=2]
+	%and3502689 = xor i64 %add137, %conv15		; <i64> [#uses=1]
+	%xor354 = and i64 %add251, %and3502689		; <i64> [#uses=1]
+	%and357 = and i64 %add137, %conv15		; <i64> [#uses=1]
+	%xor358 = xor i64 %xor354, %and357		; <i64> [#uses=1]
+	%add362 = add i64 %add327, %conv19		; <i64> [#uses=1]
+	%add359 = add i64 0, %xor358		; <i64> [#uses=1]
+	%add365 = add i64 %add359, %add327		; <i64> [#uses=1]
+	%add770 = add i64 %add362, 1426881987		; <i64> [#uses=1]
+	%add779 = add i64 %add770, 0		; <i64> [#uses=1]
+	%add782 = add i64 %add779, 0		; <i64> [#uses=1]
+	%add783 = add i64 %add782, 0		; <i64> [#uses=2]
+	%add818 = add i64 %add783, %add365		; <i64> [#uses=1]
+	%add821 = add i64 0, %add783		; <i64> [#uses=1]
+	store i32 undef, i32* undef
+	%add1046 = add i64 undef, undef		; <i64> [#uses=1]
+	%add1160 = add i64 undef, undef		; <i64> [#uses=1]
+	store i32 0, i32* undef
+	%add1235 = add i64 0, %add818		; <i64> [#uses=1]
+	%add1238 = add i64 %add1235, 0		; <i64> [#uses=1]
+	%add1239 = add i64 %add1238, 0		; <i64> [#uses=1]
+	br label %for.cond
+
+for.cond:		; preds = %for.body, %while.body
+	%h.0 = phi i64 [ undef, %while.body ], [ %add2035, %for.body ]		; <i64> [#uses=1]
+	%g.0 = phi i64 [ %add1046, %while.body ], [ undef, %for.body ]		; <i64> [#uses=1]
+	%f.0 = phi i64 [ %add1160, %while.body ], [ undef, %for.body ]		; <i64> [#uses=1]
+	%add821.pn = phi i64 [ %add821, %while.body ], [ undef, %for.body ]		; <i64> [#uses=0]
+	%add1239.pn2648 = phi i64 [ %add1239, %while.body ], [ undef, %for.body ]		; <i64> [#uses=0]
+	%d.0 = phi i64 [ undef, %while.body ], [ %add2038, %for.body ]		; <i64> [#uses=2]
+	br i1 undef, label %for.end, label %for.body
+
+for.body:		; preds = %for.cond
+	%conv1390 = zext i32 undef to i64		; <i64> [#uses=1]
+	%add1375 = add i64 0, %h.0		; <i64> [#uses=1]
+	%add1384 = add i64 %add1375, 0		; <i64> [#uses=1]
+	%add1391 = add i64 %add1384, %conv1390		; <i64> [#uses=1]
+	%add1392 = add i64 %add1391, 0		; <i64> [#uses=2]
+	%or1411 = or i64 0, undef		; <i64> [#uses=1]
+	%xor1405 = xor i64 0, %or1411		; <i64> [#uses=1]
+	%xor1412 = xor i64 %xor1405, 0		; <i64> [#uses=1]
+	%add1427 = add i64 %add1392, %d.0		; <i64> [#uses=1]
+	%add1424 = add i64 %xor1412, 0		; <i64> [#uses=1]
+	%add1430 = add i64 %add1424, %add1392		; <i64> [#uses=5]
+	%tmp1438 = load i32* undef		; <i32> [#uses=1]
+	%conv1439 = zext i32 %tmp1438 to i64		; <i64> [#uses=4]
+	%shl1441 = shl i64 %conv1439, 25		; <i64> [#uses=1]
+	%shr1444 = lshr i64 %conv1439, 7		; <i64> [#uses=1]
+	%or1445 = or i64 %shl1441, %shr1444		; <i64> [#uses=1]
+	%shr1450 = lshr i64 %conv1439, 18		; <i64> [#uses=1]
+	%or1451 = or i64 0, %shr1450		; <i64> [#uses=1]
+	%shr1454 = lshr i64 %conv1439, 3		; <i64> [#uses=1]
+	%xor1452 = xor i64 %or1451, %shr1454		; <i64> [#uses=1]
+	%xor1455 = xor i64 %xor1452, %or1445		; <i64> [#uses=1]
+	%conv1464 = zext i32 undef to i64		; <i64> [#uses=4]
+	%shl1466 = shl i64 %conv1464, 15		; <i64> [#uses=1]
+	%shr1469 = lshr i64 %conv1464, 17		; <i64> [#uses=1]
+	%or1470 = or i64 %shl1466, %shr1469		; <i64> [#uses=1]
+	%shr1475 = lshr i64 %conv1464, 19		; <i64> [#uses=1]
+	%or1476 = or i64 0, %shr1475		; <i64> [#uses=1]
+	%shr1479 = lshr i64 %conv1464, 10		; <i64> [#uses=1]
+	%xor1477 = xor i64 %or1476, %shr1479		; <i64> [#uses=1]
+	%xor1480 = xor i64 %xor1477, %or1470		; <i64> [#uses=1]
+	%tmp1499 = load i32* null		; <i32> [#uses=1]
+	%conv1500 = zext i32 %tmp1499 to i64		; <i64> [#uses=1]
+	%add1491 = add i64 %conv1500, 0		; <i64> [#uses=1]
+	%add1501 = add i64 %add1491, %xor1455		; <i64> [#uses=1]
+	%add1502 = add i64 %add1501, %xor1480		; <i64> [#uses=1]
+	%conv1504 = and i64 %add1502, 4294967295		; <i64> [#uses=1]
+	%tmp1541 = load i32* undef		; <i32> [#uses=1]
+	%conv1542 = zext i32 %tmp1541 to i64		; <i64> [#uses=1]
+	%add1527 = add i64 %conv1542, %g.0		; <i64> [#uses=1]
+	%add1536 = add i64 %add1527, 0		; <i64> [#uses=1]
+	%add1543 = add i64 %add1536, %conv1504		; <i64> [#uses=1]
+	%add1544 = add i64 %add1543, 0		; <i64> [#uses=1]
+	%shl1546 = shl i64 %add1430, 30		; <i64> [#uses=1]
+	%and1548 = lshr i64 %add1430, 2		; <i64> [#uses=1]
+	%shr1549 = and i64 %and1548, 1073741823		; <i64> [#uses=1]
+	%or1550 = or i64 %shr1549, %shl1546		; <i64> [#uses=1]
+	%shl1552 = shl i64 %add1430, 19		; <i64> [#uses=1]
+	%or1556 = or i64 0, %shl1552		; <i64> [#uses=1]
+	%shl1559 = shl i64 %add1430, 10		; <i64> [#uses=1]
+	%or1563 = or i64 0, %shl1559		; <i64> [#uses=1]
+	%xor1557 = xor i64 %or1556, %or1563		; <i64> [#uses=1]
+	%xor1564 = xor i64 %xor1557, %or1550		; <i64> [#uses=1]
+	%add1576 = add i64 %xor1564, 0		; <i64> [#uses=1]
+	%add1582 = add i64 %add1576, %add1544		; <i64> [#uses=3]
+	store i32 undef, i32* undef
+	%tmp1693 = load i32* undef		; <i32> [#uses=1]
+	%conv1694 = zext i32 %tmp1693 to i64		; <i64> [#uses=1]
+	%add1679 = add i64 %conv1694, %f.0		; <i64> [#uses=1]
+	%add1688 = add i64 %add1679, 0		; <i64> [#uses=1]
+	%add1695 = add i64 %add1688, 0		; <i64> [#uses=1]
+	%add1696 = add i64 %add1695, 0		; <i64> [#uses=1]
+	%shl1698 = shl i64 %add1582, 30		; <i64> [#uses=0]
+	%shl1704 = shl i64 %add1582, 19		; <i64> [#uses=0]
+	%add1734 = add i64 0, %add1696		; <i64> [#uses=1]
+	%add1983 = add i64 0, %add1427		; <i64> [#uses=1]
+	%add1992 = add i64 %add1983, 0		; <i64> [#uses=1]
+	%add1999 = add i64 %add1992, 0		; <i64> [#uses=1]
+	%add2000 = add i64 %add1999, 0		; <i64> [#uses=2]
+	%and2030 = and i64 %add1734, %add1582		; <i64> [#uses=1]
+	%xor2031 = xor i64 0, %and2030		; <i64> [#uses=1]
+	%add2035 = add i64 %add2000, %add1430		; <i64> [#uses=1]
+	%add2032 = add i64 0, %xor2031		; <i64> [#uses=1]
+	%add2038 = add i64 %add2032, %add2000		; <i64> [#uses=1]
+	store i32 0, i32* undef
+	br label %for.cond
+
+for.end:		; preds = %for.cond
+	store i32 undef, i32* %arrayidx5
+	store i32 undef, i32* %arrayidx9
+	%d.02641 = trunc i64 %d.0 to i32		; <i32> [#uses=1]
+	%conv2524 = add i32 %tmp14, %d.02641		; <i32> [#uses=1]
+	store i32 %conv2524, i32* %arrayidx13
+	%exitcond2789 = icmp eq i64 undef, %num		; <i1> [#uses=1]
+	br i1 %exitcond2789, label %while.end, label %while.body
+
+while.end:		; preds = %for.end, %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll b/final/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
new file mode 100644
index 00000000000..98b1e0ed2f4
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-06-03-Win64DisableRedZone.ll
@@ -0,0 +1,8 @@
+; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s
+; CHECK-NOT: -{{[1-9][0-9]*}}(%rsp)
+
+define x86_fp80 @a(i64 %x) nounwind readnone {
+entry:
+        %conv = sitofp i64 %x to x86_fp80               ; <x86_fp80> [#uses=1]
+        ret x86_fp80 %conv
+}
diff --git a/final/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll b/final/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
new file mode 100644
index 00000000000..12bd2851876
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -0,0 +1,10 @@
+; RUN: llc -mtriple=x86_64-mingw32 < %s | FileCheck %s
+; CHECK: subq    $40, %rsp
+; CHECK: movaps  %xmm8, (%rsp)
+; CHECK: movaps  %xmm7, 16(%rsp)
+
+define i32 @a() nounwind {
+entry:
+        tail call void asm sideeffect "", "~{xmm7},~{xmm8},~{dirflag},~{fpsr},~{flags}"() nounwind
+        ret i32 undef
+}
diff --git a/final/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll b/final/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
new file mode 100644
index 00000000000..9415732de02
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86
+
+	type { %struct.GAP }		; type %0
+	type { i16, i8, i8 }		; type %1
+	type { [2 x i32], [2 x i32] }		; type %2
+	type { %struct.rec* }		; type %3
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { %1 }
+	%struct.STYLE = type { %0, %0, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %2 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, %3, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.rec = type { %struct.head_type }
+
+define fastcc void @MinSize(%struct.rec* %x) nounwind {
+entry:
+	%tmp13 = load i8* undef, align 4		; <i8> [#uses=3]
+	%tmp14 = zext i8 %tmp13 to i32		; <i32> [#uses=2]
+	switch i32 %tmp14, label %bb1109 [
+		i32 42, label %bb246
+	]
+
+bb246:		; preds = %entry, %entry
+	switch i8 %tmp13, label %bb249 [
+		i8 42, label %bb269
+		i8 44, label %bb269
+	]
+
+bb249:		; preds = %bb246
+	%tmp3240 = icmp eq i8 %tmp13, 0		; <i1> [#uses=1]
+	br i1 %tmp3240, label %bb974, label %bb269
+
+bb269:
+	%tmp3424 = getelementptr %struct.rec* %x, i32 0, i32 0, i32 0, i32 0, i32 1		; <%struct.rec**> [#uses=0]
+	unreachable
+
+bb974:
+	unreachable
+
+bb1109:		; preds = %entry
+	call fastcc void @Image(i32 %tmp14) nounwind		; <i8*> [#uses=0]
+	unreachable
+}
+
+declare fastcc void @Image(i32) nounwind
diff --git a/final/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll b/final/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
new file mode 100644
index 00000000000..01852a6eca1
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-06-05-ScalarToVectorByteMMX.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | not grep movl
+
+define <8 x i8> @a(i8 zeroext %x) nounwind {
+  %r = insertelement <8 x i8> undef, i8 %x, i32 0
+  ret <8 x i8> %r
+}
+
diff --git a/final/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/final/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
new file mode 100644
index 00000000000..5c514805e48
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1
+; RUN: grep movzwl %t1 | count 2
+; RUN: grep movzbl %t1 | count 2
+; RUN: grep movd %t1 | count 4
+
+define <4 x i16> @a(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i16
+  %r = insertelement <4 x i16> zeroinitializer, i16 %x, i32 0
+  ret <4 x i16> %r
+}
+
+define <8 x i16> @b(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i16
+  %r = insertelement <8 x i16> zeroinitializer, i16 %x, i32 0
+  ret <8 x i16> %r
+}
+
+define <8 x i8> @c(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i8
+  %r = insertelement <8 x i8> zeroinitializer, i8 %x, i32 0
+  ret <8 x i8> %r
+}
+
+define <16 x i8> @d(i32* %x1) nounwind {
+  %x2 = load i32* %x1
+  %x3 = lshr i32 %x2, 1
+  %x = trunc i32 %x3 to i8
+  %r = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0
+  ret <16 x i8> %r
+}
+
diff --git a/final/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll b/final/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
new file mode 100644
index 00000000000..8bb3dc63a3b
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-06-05-VariableIndexInsert.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s
+
+define <2 x i64> @_mm_insert_epi16(<2 x i64> %a, i32 %b, i32 %imm) nounwind readnone {
+entry:
+	%conv = bitcast <2 x i64> %a to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%conv2 = trunc i32 %b to i16		; <i16> [#uses=1]
+	%and = and i32 %imm, 7		; <i32> [#uses=1]
+	%vecins = insertelement <8 x i16> %conv, i16 %conv2, i32 %and		; <<8 x i16>> [#uses=1]
+	%conv6 = bitcast <8 x i16> %vecins to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %conv6
+}
diff --git a/final/test/CodeGen/X86/2009-06-05-sitofpCrash.ll b/final/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
new file mode 100644
index 00000000000..e361804d61b
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-06-05-sitofpCrash.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse
+; PR2598
+
+define <2 x float> @a(<2 x i32> %i) nounwind {
+  %r = sitofp <2 x i32> %i to <2 x float> 
+  ret <2 x float> %r
+}
+
+define <2 x i32> @b(<2 x float> %i) nounwind {
+  %r = fptosi <2 x float> %i to <2 x i32> 
+  ret <2 x i32> %r
+}
+
diff --git a/final/test/CodeGen/X86/2009-06-06-ConcatVectors.ll b/final/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
new file mode 100644
index 00000000000..92419fcb8b8
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-06-06-ConcatVectors.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s
+
+define <2 x i64> @_mm_movpi64_pi64(<1 x i64> %a, <1 x i64> %b) nounwind readnone {
+entry:
+  %0 = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
+	ret <2 x i64> %0
+}
+
diff --git a/final/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll b/final/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
new file mode 100644
index 00000000000..07ef53e09d8
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-06-07-ExpandMMXBitcast.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep movl | count 2
+
+define i64 @a(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%0 = insertelement <2 x i32> undef, i32 %a, i32 0		; <<2 x i32>> [#uses=1]
+	%1 = insertelement <2 x i32> %0, i32 %b, i32 1		; <<2 x i32>> [#uses=1]
+	%conv = bitcast <2 x i32> %1 to i64		; <i64> [#uses=1]
+	ret i64 %conv
+}
+
diff --git a/final/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll b/final/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
new file mode 100644
index 00000000000..673e936e217
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-06-12-x86_64-tail-call-conv-out-of-sync-bug.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep fstpt
+; RUN: llc < %s -tailcallopt -march=x86-64 -mattr=+sse2 -mtriple=x86_64-apple-darwin | grep xmm
+
+; Check that x86-64 tail calls support x86_fp80 and v2f32 types. (Tail call
+; calling convention out of sync with standard c calling convention on x86_64)
+; Bug 4278.
+
+declare fastcc double @tailcallee(x86_fp80, <2 x float>) 
+	
+define fastcc double @tailcall() {
+entry:
+  %tmp = fpext float 1.000000e+00 to x86_fp80
+	%tmp2 = tail call fastcc double @tailcallee( x86_fp80 %tmp,  <2 x float> <float 1.000000e+00, float 1.000000e+00>)
+	ret double %tmp2
+}
diff --git a/final/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll b/final/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll
new file mode 100644
index 00000000000..feb578098ca
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-06-15-not-a-tail-call.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -tailcallopt | not grep TAILCALL 
+
+; Bug 4396. This tail call can NOT be optimized.
+
+declare fastcc i8* @_D3gcx2GC12mallocNoSyncMFmkZPv() nounwind
+
+define fastcc i8* @_D3gcx2GC12callocNoSyncMFmkZPv() nounwind {
+entry:
+	%tmp6 = tail call fastcc i8* @_D3gcx2GC12mallocNoSyncMFmkZPv()		; <i8*> [#uses=2]
+	%tmp9 = tail call i8* @memset(i8* %tmp6, i32 0, i64 2)		; <i8*> [#uses=0]
+	ret i8* %tmp6
+}
+
+declare i8* @memset(i8*, i32, i64)
diff --git a/final/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll b/final/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
new file mode 100644
index 00000000000..228cd48119e
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-06-18-movlp-shuffle-register.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2
+; PR2484
+
+define <4 x float> @f4523(<4 x float> %a,<4 x float> %b) nounwind {
+entry:
+%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4,i32
+5,i32 2,i32 3>
+ret <4 x float> %shuffle
+}
diff --git a/final/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll b/final/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll
new file mode 100644
index 00000000000..fcc71aef23a
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-07-06-TwoAddrAssert.ll
@@ -0,0 +1,137 @@
+; RUN: llc < %s -march=x86 -mtriple=x86_64-unknown-freebsd7.2
+; PR4478
+
+	%struct.sockaddr = type <{ i8, i8, [14 x i8] }>
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	br label %while.cond
+
+while.cond:		; preds = %sw.bb6, %entry
+	switch i32 undef, label %sw.default [
+		i32 -1, label %while.end
+		i32 119, label %sw.bb6
+	]
+
+sw.bb6:		; preds = %while.cond
+	br i1 undef, label %if.then, label %while.cond
+
+if.then:		; preds = %sw.bb6
+	ret i32 1
+
+sw.default:		; preds = %while.cond
+	ret i32 1
+
+while.end:		; preds = %while.cond
+	br i1 undef, label %if.then15, label %if.end16
+
+if.then15:		; preds = %while.end
+	ret i32 1
+
+if.end16:		; preds = %while.end
+	br i1 undef, label %lor.lhs.false, label %if.then21
+
+lor.lhs.false:		; preds = %if.end16
+	br i1 undef, label %if.end22, label %if.then21
+
+if.then21:		; preds = %lor.lhs.false, %if.end16
+	ret i32 1
+
+if.end22:		; preds = %lor.lhs.false
+	br i1 undef, label %lor.lhs.false27, label %if.then51
+
+lor.lhs.false27:		; preds = %if.end22
+	br i1 undef, label %lor.lhs.false39, label %if.then51
+
+lor.lhs.false39:		; preds = %lor.lhs.false27
+	br i1 undef, label %if.end52, label %if.then51
+
+if.then51:		; preds = %lor.lhs.false39, %lor.lhs.false27, %if.end22
+	ret i32 1
+
+if.end52:		; preds = %lor.lhs.false39
+	br i1 undef, label %if.then57, label %if.end58
+
+if.then57:		; preds = %if.end52
+	ret i32 1
+
+if.end58:		; preds = %if.end52
+	br i1 undef, label %if.then64, label %if.end65
+
+if.then64:		; preds = %if.end58
+	ret i32 1
+
+if.end65:		; preds = %if.end58
+	br i1 undef, label %if.then71, label %if.end72
+
+if.then71:		; preds = %if.end65
+	ret i32 1
+
+if.end72:		; preds = %if.end65
+	br i1 undef, label %if.then83, label %if.end84
+
+if.then83:		; preds = %if.end72
+	ret i32 1
+
+if.end84:		; preds = %if.end72
+	br i1 undef, label %if.then101, label %if.end102
+
+if.then101:		; preds = %if.end84
+	ret i32 1
+
+if.end102:		; preds = %if.end84
+	br i1 undef, label %if.then113, label %if.end114
+
+if.then113:		; preds = %if.end102
+	ret i32 1
+
+if.end114:		; preds = %if.end102
+	br i1 undef, label %if.then209, label %if.end210
+
+if.then209:		; preds = %if.end114
+	ret i32 1
+
+if.end210:		; preds = %if.end114
+	br i1 undef, label %if.then219, label %if.end220
+
+if.then219:		; preds = %if.end210
+	ret i32 1
+
+if.end220:		; preds = %if.end210
+	br i1 undef, label %if.end243, label %lor.lhs.false230
+
+lor.lhs.false230:		; preds = %if.end220
+	unreachable
+
+if.end243:		; preds = %if.end220
+	br i1 undef, label %if.then249, label %if.end250
+
+if.then249:		; preds = %if.end243
+	ret i32 1
+
+if.end250:		; preds = %if.end243
+	br i1 undef, label %if.end261, label %if.then260
+
+if.then260:		; preds = %if.end250
+	ret i32 1
+
+if.end261:		; preds = %if.end250
+	br i1 undef, label %if.then270, label %if.end271
+
+if.then270:		; preds = %if.end261
+	ret i32 1
+
+if.end271:		; preds = %if.end261
+	%call.i = call i32 @arc4random() nounwind		; <i32> [#uses=1]
+	%rem.i = urem i32 %call.i, 16383		; <i32> [#uses=1]
+	%rem1.i = trunc i32 %rem.i to i16		; <i16> [#uses=1]
+	%conv2.i = or i16 %rem1.i, -16384		; <i16> [#uses=1]
+	%0 = call i16 asm "xchgb ${0:h}, ${0:b}", "=Q,0,~{dirflag},~{fpsr},~{flags}"(i16 %conv2.i) nounwind		; <i16> [#uses=1]
+	store i16 %0, i16* undef
+	%call281 = call i32 @bind(i32 undef, %struct.sockaddr* undef, i32 16) nounwind		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @bind(i32, %struct.sockaddr*, i32)
+
+declare i32 @arc4random()
diff --git a/final/test/CodeGen/X86/2009-07-07-SplitICmp.ll b/final/test/CodeGen/X86/2009-07-07-SplitICmp.ll
new file mode 100644
index 00000000000..366985678e5
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-07-07-SplitICmp.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86
+
+define void @test2(<2 x i32> %A, <2 x i32> %B, <2 x i32>* %C) nounwind {
+       %D = icmp sgt <2 x i32> %A, %B
+       %E = zext <2 x i1> %D to <2 x i32>
+       store <2 x i32> %E, <2 x i32>* %C
+       ret void
+}
diff --git a/final/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll b/final/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll
new file mode 100644
index 00000000000..0fdfdcb8a30
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-07-09-ExtractBoolFromVector.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86
+; PR3037
+
+define void @entry(<4 x i8>* %dest) {
+	%1 = xor <4 x i1> zeroinitializer, < i1 true, i1 true, i1 true, i1 true >
+	%2 = extractelement <4 x i1> %1, i32 3
+	%3 = zext i1 %2 to i8
+	%4 = insertelement <4 x i8> zeroinitializer, i8 %3, i32 3
+	store <4 x i8> %4, <4 x i8>* %dest, align 1
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2009-07-15-CoalescerBug.ll b/final/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
new file mode 100644
index 00000000000..eabaf775ede
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-07-15-CoalescerBug.ll
@@ -0,0 +1,958 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+	%struct.ANY = type { i8* }
+	%struct.AV = type { %struct.XPVAV*, i32, i32 }
+	%struct.CLONE_PARAMS = type { %struct.AV*, i64, %struct.PerlInterpreter* }
+	%struct.CV = type { %struct.XPVCV*, i32, i32 }
+	%struct.DIR = type { i32, i64, i64, i8*, i32, i64, i64, i32, %struct.__darwin_pthread_mutex_t, %struct._telldir* }
+	%struct.GP = type { %struct.SV*, i32, %struct.io*, %struct.CV*, %struct.AV*, %struct.HV*, %struct.GV*, %struct.CV*, i32, i32, i32, i8* }
+	%struct.GV = type { %struct.XPVGV*, i32, i32 }
+	%struct.HE = type { %struct.HE*, %struct.HEK*, %struct.SV* }
+	%struct.HEK = type { i32, i32, [1 x i8] }
+	%struct.HV = type { %struct.XPVHV*, i32, i32 }
+	%struct.MAGIC = type { %struct.MAGIC*, %struct.MGVTBL*, i16, i8, i8, %struct.SV*, i8*, i32 }
+	%struct.MGVTBL = type { i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*)*, i32 (%struct.SV*, %struct.MAGIC*, %struct.SV*, i8*, i32)*, i32 (%struct.MAGIC*, %struct.CLONE_PARAMS*)* }
+	%struct.OP = type { %struct.OP*, %struct.OP*, %struct.OP* ()*, i64, i16, i16, i8, i8 }
+	%struct.PMOP = type { %struct.OP*, %struct.OP*, %struct.OP* ()*, i64, i16, i16, i8, i8, %struct.OP*, %struct.OP*, %struct.OP*, %struct.OP*, %struct.PMOP*, %struct.REGEXP*, i32, i32, i8, %struct.HV* }
+	%struct.PerlIO_funcs = type { i64, i8*, i64, i32, i64 (%struct.PerlIOl**, i8*, %struct.SV*, %struct.PerlIO_funcs*)*, i64 (%struct.PerlIOl**)*, %struct.PerlIOl** (%struct.PerlIO_funcs*, %struct.PerlIO_list_t*, i64, i8*, i32, i32, i32, %struct.PerlIOl**, i32, %struct.SV**)*, i64 (%struct.PerlIOl**)*, %struct.SV* (%struct.PerlIOl**, %struct.CLONE_PARAMS*, i32)*, i64 (%struct.PerlIOl**)*, %struct.PerlIOl** (%struct.PerlIOl**, %struct.PerlIOl**, %struct.CLONE_PARAMS*, i32)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i8*, i64)*, i64 (%struct.PerlIOl**, i64, i32)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, void (%struct.PerlIOl**)*, void (%struct.PerlIOl**)*, i8* (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, i8* (%struct.PerlIOl**)*, i64 (%struct.PerlIOl**)*, void (%struct.PerlIOl**, i8*, i64)* }
+	%struct.PerlIO_list_t = type { i64, i64, i64, %struct.PerlIO_pair_t* }
+	%struct.PerlIO_pair_t = type { %struct.PerlIO_funcs*, %struct.SV* }
+	%struct.PerlIOl = type { %struct.PerlIOl*, %struct.PerlIO_funcs*, i32 }
+	%struct.PerlInterpreter = type { i8 }
+	%struct.REGEXP = type { i32*, i32*, %struct.regnode*, %struct.reg_substr_data*, i8*, %struct.reg_data*, i8*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, [1 x %struct.regnode] }
+	%struct.SV = type { i8*, i32, i32 }
+	%struct.XPVAV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.SV**, %struct.SV*, i8 }
+	%struct.XPVCV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.HV*, %struct.OP*, %struct.OP*, void (%struct.CV*)*, %struct.ANY, %struct.GV*, i8*, i64, %struct.AV*, %struct.CV*, i16, i32 }
+	%struct.XPVGV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.GP*, i8*, i64, %struct.HV*, i8 }
+	%struct.XPVHV = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, i32, %struct.HE*, %struct.PMOP*, i8* }
+	%struct.XPVIO = type { i8*, i64, i64, i64, double, %struct.MAGIC*, %struct.HV*, %struct.PerlIOl**, %struct.PerlIOl**, %struct.anon, i64, i64, i64, i64, i8*, %struct.GV*, i8*, %struct.GV*, i8*, %struct.GV*, i16, i8, i8 }
+	%struct.__darwin_pthread_mutex_t = type { i64, [56 x i8] }
+	%struct._telldir = type opaque
+	%struct.anon = type { %struct.DIR* }
+	%struct.io = type { %struct.XPVIO*, i32, i32 }
+	%struct.reg_data = type { i32, i8*, [1 x i8*] }
+	%struct.reg_substr_data = type { [3 x %struct.reg_substr_datum] }
+	%struct.reg_substr_datum = type { i32, i32, %struct.SV*, %struct.SV* }
+	%struct.regnode = type { i8, i8, i16 }
+
+define i32 @Perl_yylex() nounwind ssp {
+entry:
+	br i1 undef, label %bb21, label %bb
+
+bb:		; preds = %entry
+	unreachable
+
+bb21:		; preds = %entry
+	switch i32 undef, label %bb103 [
+		i32 1, label %bb101
+		i32 4, label %bb75
+		i32 6, label %bb68
+		i32 7, label %bb67
+		i32 8, label %bb25
+	]
+
+bb25:		; preds = %bb21
+	ret i32 41
+
+bb67:		; preds = %bb21
+	ret i32 40
+
+bb68:		; preds = %bb21
+	br i1 undef, label %bb69, label %bb70
+
+bb69:		; preds = %bb68
+	ret i32 undef
+
+bb70:		; preds = %bb68
+	unreachable
+
+bb75:		; preds = %bb21
+	unreachable
+
+bb101:		; preds = %bb21
+	unreachable
+
+bb103:		; preds = %bb21
+	switch i32 undef, label %bb104 [
+		i32 0, label %bb126
+		i32 4, label %fake_eof
+		i32 26, label %fake_eof
+		i32 34, label %bb1423
+		i32 36, label %bb1050
+		i32 37, label %bb534
+		i32 39, label %bb1412
+		i32 41, label %bb643
+		i32 44, label %bb544
+		i32 48, label %bb1406
+		i32 49, label %bb1406
+		i32 50, label %bb1406
+		i32 51, label %bb1406
+		i32 52, label %bb1406
+		i32 53, label %bb1406
+		i32 54, label %bb1406
+		i32 55, label %bb1406
+		i32 56, label %bb1406
+		i32 57, label %bb1406
+		i32 59, label %bb639
+		i32 65, label %keylookup
+		i32 66, label %keylookup
+		i32 67, label %keylookup
+		i32 68, label %keylookup
+		i32 69, label %keylookup
+		i32 70, label %keylookup
+		i32 71, label %keylookup
+		i32 72, label %keylookup
+		i32 73, label %keylookup
+		i32 74, label %keylookup
+		i32 75, label %keylookup
+		i32 76, label %keylookup
+		i32 77, label %keylookup
+		i32 78, label %keylookup
+		i32 79, label %keylookup
+		i32 80, label %keylookup
+		i32 81, label %keylookup
+		i32 82, label %keylookup
+		i32 83, label %keylookup
+		i32 84, label %keylookup
+		i32 85, label %keylookup
+		i32 86, label %keylookup
+		i32 87, label %keylookup
+		i32 88, label %keylookup
+		i32 89, label %keylookup
+		i32 90, label %keylookup
+		i32 92, label %bb1455
+		i32 95, label %keylookup
+		i32 96, label %bb1447
+		i32 97, label %keylookup
+		i32 98, label %keylookup
+		i32 99, label %keylookup
+		i32 100, label %keylookup
+		i32 101, label %keylookup
+		i32 102, label %keylookup
+		i32 103, label %keylookup
+		i32 104, label %keylookup
+		i32 105, label %keylookup
+		i32 106, label %keylookup
+		i32 107, label %keylookup
+		i32 108, label %keylookup
+		i32 109, label %keylookup
+		i32 110, label %keylookup
+		i32 111, label %keylookup
+		i32 112, label %keylookup
+		i32 113, label %keylookup
+		i32 114, label %keylookup
+		i32 115, label %keylookup
+		i32 116, label %keylookup
+		i32 117, label %keylookup
+		i32 118, label %keylookup
+		i32 119, label %keylookup
+		i32 120, label %keylookup
+		i32 121, label %keylookup
+		i32 122, label %keylookup
+		i32 126, label %bb544
+	]
+
+bb104:		; preds = %bb103
+	unreachable
+
+bb126:		; preds = %bb103
+	ret i32 0
+
+fake_eof:		; preds = %bb1841, %bb103, %bb103
+	unreachable
+
+bb534:		; preds = %bb103
+	unreachable
+
+bb544:		; preds = %bb103, %bb103
+	ret i32 undef
+
+bb639:		; preds = %bb103
+	unreachable
+
+bb643:		; preds = %bb103
+	unreachable
+
+bb1050:		; preds = %bb103
+	unreachable
+
+bb1406:		; preds = %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103
+	unreachable
+
+bb1412:		; preds = %bb103
+	unreachable
+
+bb1423:		; preds = %bb103
+	unreachable
+
+bb1447:		; preds = %bb103
+	unreachable
+
+bb1455:		; preds = %bb103
+	unreachable
+
+keylookup:		; preds = %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103, %bb103
+	br i1 undef, label %bb1498, label %bb1496
+
+bb1496:		; preds = %keylookup
+	br i1 undef, label %bb1498, label %bb1510.preheader
+
+bb1498:		; preds = %bb1496, %keylookup
+	unreachable
+
+bb1510.preheader:		; preds = %bb1496
+	br i1 undef, label %bb1511, label %bb1518
+
+bb1511:		; preds = %bb1510.preheader
+	br label %bb1518
+
+bb1518:		; preds = %bb1511, %bb1510.preheader
+	switch i32 undef, label %bb741.i4285 [
+		i32 95, label %bb744.i4287
+		i32 115, label %bb852.i4394
+	]
+
+bb741.i4285:		; preds = %bb1518
+	br label %Perl_keyword.exit4735
+
+bb744.i4287:		; preds = %bb1518
+	br label %Perl_keyword.exit4735
+
+bb852.i4394:		; preds = %bb1518
+	br i1 undef, label %bb861.i4404, label %bb856.i4399
+
+bb856.i4399:		; preds = %bb852.i4394
+	br label %Perl_keyword.exit4735
+
+bb861.i4404:		; preds = %bb852.i4394
+	br label %Perl_keyword.exit4735
+
+Perl_keyword.exit4735:		; preds = %bb861.i4404, %bb856.i4399, %bb744.i4287, %bb741.i4285
+	br i1 undef, label %bb1544, label %reserved_word
+
+bb1544:		; preds = %Perl_keyword.exit4735
+	br i1 undef, label %bb1565, label %bb1545
+
+bb1545:		; preds = %bb1544
+	br i1 undef, label %bb1563, label %bb1558
+
+bb1558:		; preds = %bb1545
+	%0 = load %struct.SV** undef		; <%struct.SV*> [#uses=1]
+	%1 = bitcast %struct.SV* %0 to %struct.GV*		; <%struct.GV*> [#uses=5]
+	br i1 undef, label %bb1563, label %bb1559
+
+bb1559:		; preds = %bb1558
+	br i1 undef, label %bb1560, label %bb1563
+
+bb1560:		; preds = %bb1559
+	br i1 undef, label %bb1563, label %bb1561
+
+bb1561:		; preds = %bb1560
+	br i1 undef, label %bb1562, label %bb1563
+
+bb1562:		; preds = %bb1561
+	br label %bb1563
+
+bb1563:		; preds = %bb1562, %bb1561, %bb1560, %bb1559, %bb1558, %bb1545
+	%gv19.3 = phi %struct.GV* [ %1, %bb1562 ], [ undef, %bb1545 ], [ %1, %bb1558 ], [ %1, %bb1559 ], [ %1, %bb1560 ], [ %1, %bb1561 ]		; <%struct.GV*> [#uses=0]
+	br i1 undef, label %bb1565, label %reserved_word
+
+bb1565:		; preds = %bb1563, %bb1544
+	br i1 undef, label %bb1573, label %bb1580
+
+bb1573:		; preds = %bb1565
+	br label %bb1580
+
+bb1580:		; preds = %bb1573, %bb1565
+	br i1 undef, label %bb1595, label %reserved_word
+
+bb1595:		; preds = %bb1580
+	br i1 undef, label %reserved_word, label %bb1597
+
+bb1597:		; preds = %bb1595
+	br i1 undef, label %reserved_word, label %bb1602
+
+bb1602:		; preds = %bb1597
+	br label %reserved_word
+
+reserved_word:		; preds = %bb1602, %bb1597, %bb1595, %bb1580, %bb1563, %Perl_keyword.exit4735
+	switch i32 undef, label %bb2012 [
+		i32 1, label %bb1819
+		i32 2, label %bb1830
+		i32 4, label %bb1841
+		i32 5, label %bb1841
+		i32 8, label %bb1880
+		i32 14, label %bb1894
+		i32 16, label %bb1895
+		i32 17, label %bb1896
+		i32 18, label %bb1897
+		i32 19, label %bb1898
+		i32 20, label %bb1899
+		i32 22, label %bb1906
+		i32 23, label %bb1928
+		i32 24, label %bb2555
+		i32 26, label %bb1929
+		i32 31, label %bb1921
+		i32 32, label %bb1930
+		i32 33, label %bb1905
+		i32 34, label %bb1936
+		i32 35, label %bb1927
+		i32 37, label %bb1962
+		i32 40, label %bb1951
+		i32 41, label %bb1946
+		i32 42, label %bb1968
+		i32 44, label %bb1969
+		i32 45, label %bb1970
+		i32 46, label %bb2011
+		i32 47, label %bb2006
+		i32 48, label %bb2007
+		i32 49, label %bb2009
+		i32 50, label %bb2010
+		i32 51, label %bb2008
+		i32 53, label %bb1971
+		i32 54, label %bb1982
+		i32 55, label %bb2005
+		i32 59, label %bb2081
+		i32 61, label %bb2087
+		i32 64, label %bb2080
+		i32 65, label %really_sub
+		i32 66, label %bb2079
+		i32 67, label %bb2089
+		i32 69, label %bb2155
+		i32 72, label %bb2137
+		i32 74, label %bb2138
+		i32 75, label %bb2166
+		i32 76, label %bb2144
+		i32 78, label %bb2145
+		i32 81, label %bb2102
+		i32 82, label %bb2108
+		i32 84, label %bb2114
+		i32 85, label %bb2115
+		i32 86, label %bb2116
+		i32 89, label %bb2146
+		i32 90, label %bb2147
+		i32 91, label %bb2148
+		i32 93, label %bb2154
+		i32 94, label %bb2167
+		i32 96, label %bb2091
+		i32 97, label %bb2090
+		i32 98, label %bb2088
+		i32 100, label %bb2173
+		i32 101, label %bb2174
+		i32 102, label %bb2175
+		i32 103, label %bb2180
+		i32 104, label %bb2181
+		i32 106, label %bb2187
+		i32 107, label %bb2188
+		i32 110, label %bb2206
+		i32 112, label %bb2217
+		i32 113, label %bb2218
+		i32 114, label %bb2199
+		i32 119, label %bb2205
+		i32 120, label %bb2229
+		i32 121, label %bb2233
+		i32 122, label %bb2234
+		i32 123, label %bb2235
+		i32 124, label %bb2236
+		i32 125, label %bb2237
+		i32 126, label %bb2238
+		i32 127, label %bb2239
+		i32 128, label %bb2268
+		i32 129, label %bb2267
+		i32 133, label %bb2276
+		i32 134, label %bb2348
+		i32 135, label %bb2337
+		i32 137, label %bb2239
+		i32 138, label %bb2367
+		i32 139, label %bb2368
+		i32 140, label %bb2369
+		i32 141, label %bb2357
+		i32 143, label %bb2349
+		i32 144, label %bb2350
+		i32 146, label %bb2356
+		i32 147, label %bb2370
+		i32 148, label %bb2445
+		i32 149, label %bb2453
+		i32 151, label %bb2381
+		i32 152, label %bb2457
+		i32 154, label %bb2516
+		i32 156, label %bb2522
+		i32 158, label %bb2527
+		i32 159, label %bb2537
+		i32 160, label %bb2503
+		i32 162, label %bb2504
+		i32 163, label %bb2464
+		i32 165, label %bb2463
+		i32 166, label %bb2538
+		i32 168, label %bb2515
+		i32 170, label %bb2549
+		i32 172, label %bb2566
+		i32 173, label %bb2595
+		i32 174, label %bb2565
+		i32 175, label %bb2567
+		i32 176, label %bb2568
+		i32 177, label %bb2569
+		i32 178, label %bb2570
+		i32 179, label %bb2594
+		i32 182, label %bb2571
+		i32 183, label %bb2572
+		i32 185, label %bb2593
+		i32 186, label %bb2583
+		i32 187, label %bb2596
+		i32 189, label %bb2602
+		i32 190, label %bb2603
+		i32 191, label %bb2604
+		i32 192, label %bb2605
+		i32 193, label %bb2606
+		i32 196, label %bb2617
+		i32 197, label %bb2618
+		i32 198, label %bb2619
+		i32 199, label %bb2627
+		i32 200, label %bb2625
+		i32 201, label %bb2626
+		i32 206, label %really_sub
+		i32 207, label %bb2648
+		i32 208, label %bb2738
+		i32 209, label %bb2739
+		i32 210, label %bb2740
+		i32 211, label %bb2742
+		i32 212, label %bb2741
+		i32 213, label %bb2737
+		i32 214, label %bb2743
+		i32 217, label %bb2758
+		i32 219, label %bb2764
+		i32 220, label %bb2765
+		i32 221, label %bb2744
+		i32 222, label %bb2766
+		i32 226, label %bb2785
+		i32 227, label %bb2783
+		i32 228, label %bb2784
+		i32 229, label %bb2790
+		i32 230, label %bb2797
+		i32 232, label %bb2782
+		i32 234, label %bb2791
+		i32 236, label %bb2815
+		i32 237, label %bb2818
+		i32 238, label %bb2819
+		i32 239, label %bb2820
+		i32 240, label %bb2817
+		i32 241, label %bb2816
+		i32 242, label %bb2821
+		i32 243, label %bb2826
+		i32 244, label %bb2829
+		i32 245, label %bb2830
+	]
+
+bb1819:		; preds = %reserved_word
+	unreachable
+
+bb1830:		; preds = %reserved_word
+	unreachable
+
+bb1841:		; preds = %reserved_word, %reserved_word
+	br i1 undef, label %fake_eof, label %bb1842
+
+bb1842:		; preds = %bb1841
+	unreachable
+
+bb1880:		; preds = %reserved_word
+	unreachable
+
+bb1894:		; preds = %reserved_word
+	ret i32 undef
+
+bb1895:		; preds = %reserved_word
+	ret i32 301
+
+bb1896:		; preds = %reserved_word
+	ret i32 undef
+
+bb1897:		; preds = %reserved_word
+	ret i32 undef
+
+bb1898:		; preds = %reserved_word
+	ret i32 undef
+
+bb1899:		; preds = %reserved_word
+	ret i32 undef
+
+bb1905:		; preds = %reserved_word
+	ret i32 278
+
+bb1906:		; preds = %reserved_word
+	unreachable
+
+bb1921:		; preds = %reserved_word
+	ret i32 288
+
+bb1927:		; preds = %reserved_word
+	ret i32 undef
+
+bb1928:		; preds = %reserved_word
+	ret i32 undef
+
+bb1929:		; preds = %reserved_word
+	ret i32 undef
+
+bb1930:		; preds = %reserved_word
+	ret i32 undef
+
+bb1936:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb1937
+
+bb1937:		; preds = %bb1936
+	ret i32 undef
+
+bb1946:		; preds = %reserved_word
+	unreachable
+
+bb1951:		; preds = %reserved_word
+	ret i32 undef
+
+bb1962:		; preds = %reserved_word
+	ret i32 undef
+
+bb1968:		; preds = %reserved_word
+	ret i32 280
+
+bb1969:		; preds = %reserved_word
+	ret i32 276
+
+bb1970:		; preds = %reserved_word
+	ret i32 277
+
+bb1971:		; preds = %reserved_word
+	ret i32 288
+
+bb1982:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb1986
+
+bb1986:		; preds = %bb1982
+	ret i32 undef
+
+bb2005:		; preds = %reserved_word
+	ret i32 undef
+
+bb2006:		; preds = %reserved_word
+	ret i32 282
+
+bb2007:		; preds = %reserved_word
+	ret i32 282
+
+bb2008:		; preds = %reserved_word
+	ret i32 282
+
+bb2009:		; preds = %reserved_word
+	ret i32 282
+
+bb2010:		; preds = %reserved_word
+	ret i32 282
+
+bb2011:		; preds = %reserved_word
+	ret i32 282
+
+bb2012:		; preds = %reserved_word
+	unreachable
+
+bb2079:		; preds = %reserved_word
+	ret i32 undef
+
+bb2080:		; preds = %reserved_word
+	ret i32 282
+
+bb2081:		; preds = %reserved_word
+	ret i32 undef
+
+bb2087:		; preds = %reserved_word
+	ret i32 undef
+
+bb2088:		; preds = %reserved_word
+	ret i32 287
+
+bb2089:		; preds = %reserved_word
+	ret i32 287
+
+bb2090:		; preds = %reserved_word
+	ret i32 undef
+
+bb2091:		; preds = %reserved_word
+	ret i32 280
+
+bb2102:		; preds = %reserved_word
+	ret i32 282
+
+bb2108:		; preds = %reserved_word
+	ret i32 undef
+
+bb2114:		; preds = %reserved_word
+	ret i32 undef
+
+bb2115:		; preds = %reserved_word
+	ret i32 282
+
+bb2116:		; preds = %reserved_word
+	ret i32 282
+
+bb2137:		; preds = %reserved_word
+	ret i32 undef
+
+bb2138:		; preds = %reserved_word
+	ret i32 282
+
+bb2144:		; preds = %reserved_word
+	ret i32 undef
+
+bb2145:		; preds = %reserved_word
+	ret i32 282
+
+bb2146:		; preds = %reserved_word
+	ret i32 undef
+
+bb2147:		; preds = %reserved_word
+	ret i32 undef
+
+bb2148:		; preds = %reserved_word
+	ret i32 282
+
+bb2154:		; preds = %reserved_word
+	ret i32 undef
+
+bb2155:		; preds = %reserved_word
+	ret i32 282
+
+bb2166:		; preds = %reserved_word
+	ret i32 282
+
+bb2167:		; preds = %reserved_word
+	ret i32 undef
+
+bb2173:		; preds = %reserved_word
+	ret i32 274
+
+bb2174:		; preds = %reserved_word
+	ret i32 undef
+
+bb2175:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2176
+
+bb2176:		; preds = %bb2175
+	ret i32 undef
+
+bb2180:		; preds = %reserved_word
+	ret i32 undef
+
+bb2181:		; preds = %reserved_word
+	ret i32 undef
+
+bb2187:		; preds = %reserved_word
+	ret i32 undef
+
+bb2188:		; preds = %reserved_word
+	ret i32 280
+
+bb2199:		; preds = %reserved_word
+	ret i32 295
+
+bb2205:		; preds = %reserved_word
+	ret i32 287
+
+bb2206:		; preds = %reserved_word
+	ret i32 287
+
+bb2217:		; preds = %reserved_word
+	ret i32 undef
+
+bb2218:		; preds = %reserved_word
+	ret i32 undef
+
+bb2229:		; preds = %reserved_word
+	unreachable
+
+bb2233:		; preds = %reserved_word
+	ret i32 undef
+
+bb2234:		; preds = %reserved_word
+	ret i32 undef
+
+bb2235:		; preds = %reserved_word
+	ret i32 undef
+
+bb2236:		; preds = %reserved_word
+	ret i32 undef
+
+bb2237:		; preds = %reserved_word
+	ret i32 undef
+
+bb2238:		; preds = %reserved_word
+	ret i32 undef
+
+bb2239:		; preds = %reserved_word, %reserved_word
+	unreachable
+
+bb2267:		; preds = %reserved_word
+	ret i32 280
+
+bb2268:		; preds = %reserved_word
+	ret i32 288
+
+bb2276:		; preds = %reserved_word
+	unreachable
+
+bb2337:		; preds = %reserved_word
+	ret i32 300
+
+bb2348:		; preds = %reserved_word
+	ret i32 undef
+
+bb2349:		; preds = %reserved_word
+	ret i32 undef
+
+bb2350:		; preds = %reserved_word
+	ret i32 undef
+
+bb2356:		; preds = %reserved_word
+	ret i32 undef
+
+bb2357:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2358
+
+bb2358:		; preds = %bb2357
+	ret i32 undef
+
+bb2367:		; preds = %reserved_word
+	ret i32 undef
+
+bb2368:		; preds = %reserved_word
+	ret i32 270
+
+bb2369:		; preds = %reserved_word
+	ret i32 undef
+
+bb2370:		; preds = %reserved_word
+	unreachable
+
+bb2381:		; preds = %reserved_word
+	unreachable
+
+bb2445:		; preds = %reserved_word
+	unreachable
+
+bb2453:		; preds = %reserved_word
+	unreachable
+
+bb2457:		; preds = %reserved_word
+	unreachable
+
+bb2463:		; preds = %reserved_word
+	ret i32 286
+
+bb2464:		; preds = %reserved_word
+	unreachable
+
+bb2503:		; preds = %reserved_word
+	ret i32 280
+
+bb2504:		; preds = %reserved_word
+	ret i32 undef
+
+bb2515:		; preds = %reserved_word
+	ret i32 undef
+
+bb2516:		; preds = %reserved_word
+	ret i32 undef
+
+bb2522:		; preds = %reserved_word
+	unreachable
+
+bb2527:		; preds = %reserved_word
+	unreachable
+
+bb2537:		; preds = %reserved_word
+	ret i32 undef
+
+bb2538:		; preds = %reserved_word
+	ret i32 undef
+
+bb2549:		; preds = %reserved_word
+	unreachable
+
+bb2555:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2556
+
+bb2556:		; preds = %bb2555
+	ret i32 undef
+
+bb2565:		; preds = %reserved_word
+	ret i32 undef
+
+bb2566:		; preds = %reserved_word
+	ret i32 undef
+
+bb2567:		; preds = %reserved_word
+	ret i32 undef
+
+bb2568:		; preds = %reserved_word
+	ret i32 undef
+
+bb2569:		; preds = %reserved_word
+	ret i32 undef
+
+bb2570:		; preds = %reserved_word
+	ret i32 undef
+
+bb2571:		; preds = %reserved_word
+	ret i32 undef
+
+bb2572:		; preds = %reserved_word
+	ret i32 undef
+
+bb2583:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2584
+
+bb2584:		; preds = %bb2583
+	ret i32 undef
+
+bb2593:		; preds = %reserved_word
+	ret i32 282
+
+bb2594:		; preds = %reserved_word
+	ret i32 282
+
+bb2595:		; preds = %reserved_word
+	ret i32 undef
+
+bb2596:		; preds = %reserved_word
+	ret i32 undef
+
+bb2602:		; preds = %reserved_word
+	ret i32 undef
+
+bb2603:		; preds = %reserved_word
+	ret i32 undef
+
+bb2604:		; preds = %reserved_word
+	ret i32 undef
+
+bb2605:		; preds = %reserved_word
+	ret i32 undef
+
+bb2606:		; preds = %reserved_word
+	ret i32 undef
+
+bb2617:		; preds = %reserved_word
+	ret i32 undef
+
+bb2618:		; preds = %reserved_word
+	ret i32 undef
+
+bb2619:		; preds = %reserved_word
+	unreachable
+
+bb2625:		; preds = %reserved_word
+	ret i32 undef
+
+bb2626:		; preds = %reserved_word
+	ret i32 undef
+
+bb2627:		; preds = %reserved_word
+	ret i32 undef
+
+bb2648:		; preds = %reserved_word
+	ret i32 undef
+
+really_sub:		; preds = %reserved_word, %reserved_word
+	unreachable
+
+bb2737:		; preds = %reserved_word
+	ret i32 undef
+
+bb2738:		; preds = %reserved_word
+	ret i32 undef
+
+bb2739:		; preds = %reserved_word
+	ret i32 undef
+
+bb2740:		; preds = %reserved_word
+	ret i32 undef
+
+bb2741:		; preds = %reserved_word
+	ret i32 undef
+
+bb2742:		; preds = %reserved_word
+	ret i32 undef
+
+bb2743:		; preds = %reserved_word
+	ret i32 undef
+
+bb2744:		; preds = %reserved_word
+	unreachable
+
+bb2758:		; preds = %reserved_word
+	ret i32 undef
+
+bb2764:		; preds = %reserved_word
+	ret i32 282
+
+bb2765:		; preds = %reserved_word
+	ret i32 282
+
+bb2766:		; preds = %reserved_word
+	ret i32 undef
+
+bb2782:		; preds = %reserved_word
+	ret i32 273
+
+bb2783:		; preds = %reserved_word
+	ret i32 275
+
+bb2784:		; preds = %reserved_word
+	ret i32 undef
+
+bb2785:		; preds = %reserved_word
+	br i1 undef, label %bb2834, label %bb2786
+
+bb2786:		; preds = %bb2785
+	ret i32 undef
+
+bb2790:		; preds = %reserved_word
+	ret i32 undef
+
+bb2791:		; preds = %reserved_word
+	ret i32 undef
+
+bb2797:		; preds = %reserved_word
+	ret i32 undef
+
+bb2815:		; preds = %reserved_word
+	ret i32 undef
+
+bb2816:		; preds = %reserved_word
+	ret i32 272
+
+bb2817:		; preds = %reserved_word
+	ret i32 undef
+
+bb2818:		; preds = %reserved_word
+	ret i32 282
+
+bb2819:		; preds = %reserved_word
+	ret i32 undef
+
+bb2820:		; preds = %reserved_word
+	ret i32 282
+
+bb2821:		; preds = %reserved_word
+	unreachable
+
+bb2826:		; preds = %reserved_word
+	unreachable
+
+bb2829:		; preds = %reserved_word
+	ret i32 300
+
+bb2830:		; preds = %reserved_word
+	unreachable
+
+bb2834:		; preds = %bb2785, %bb2583, %bb2555, %bb2357, %bb2175, %bb1982, %bb1936
+	ret i32 283
+}
diff --git a/final/test/CodeGen/X86/2009-07-16-CoalescerBug.ll b/final/test/CodeGen/X86/2009-07-16-CoalescerBug.ll
new file mode 100644
index 00000000000..48af440df2d
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-07-16-CoalescerBug.ll
@@ -0,0 +1,210 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; rdar://7059496
+
+	%struct.brinfo = type <{ %struct.brinfo*, %struct.brinfo*, i8*, i32, i32, i32, i8, i8, i8, i8 }>
+	%struct.cadata = type <{ i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i32, %struct.cmatcher*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i8, i8, i8, i8 }>
+	%struct.cline = type <{ %struct.cline*, i32, i8, i8, i8, i8, i8*, i32, i8, i8, i8, i8, i8*, i32, i8, i8, i8, i8, i8*, i32, i32, %struct.cline*, %struct.cline*, i32, i32 }>
+	%struct.cmatch = type <{ i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i8, i8, i8, i8, i32*, i32*, i8*, i8*, i32, i32, i32, i32, i16, i8, i8, i16, i8, i8 }>
+	%struct.cmatcher = type <{ i32, i8, i8, i8, i8, %struct.cmatcher*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8, %struct.cpattern*, i32, i8, i8, i8, i8 }>
+	%struct.cpattern = type <{ %struct.cpattern*, i32, i8, i8, i8, i8, %union.anon }>
+	%struct.patprog = type <{ i64, i64, i64, i64, i32, i32, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8 }>
+	%union.anon = type <{ [8 x i8] }>
+
+define i32 @addmatches(%struct.cadata* %dat, i8** nocapture %argv) nounwind ssp {
+entry:
+	br i1 undef, label %if.else, label %if.then91
+
+if.then91:		; preds = %entry
+	br label %if.end96
+
+if.else:		; preds = %entry
+	br label %if.end96
+
+if.end96:		; preds = %if.else, %if.then91
+	br i1 undef, label %lor.lhs.false, label %if.then105
+
+lor.lhs.false:		; preds = %if.end96
+	br i1 undef, label %if.else139, label %if.then105
+
+if.then105:		; preds = %lor.lhs.false, %if.end96
+	unreachable
+
+if.else139:		; preds = %lor.lhs.false
+	br i1 undef, label %land.end, label %land.rhs
+
+land.rhs:		; preds = %if.else139
+	unreachable
+
+land.end:		; preds = %if.else139
+	br i1 undef, label %land.lhs.true285, label %if.then315
+
+land.lhs.true285:		; preds = %land.end
+	br i1 undef, label %if.end324, label %if.then322
+
+if.then315:		; preds = %land.end
+	unreachable
+
+if.then322:		; preds = %land.lhs.true285
+	unreachable
+
+if.end324:		; preds = %land.lhs.true285
+	br i1 undef, label %if.end384, label %if.then358
+
+if.then358:		; preds = %if.end324
+	unreachable
+
+if.end384:		; preds = %if.end324
+	br i1 undef, label %if.end394, label %land.lhs.true387
+
+land.lhs.true387:		; preds = %if.end384
+	unreachable
+
+if.end394:		; preds = %if.end384
+	br i1 undef, label %if.end498, label %land.lhs.true399
+
+land.lhs.true399:		; preds = %if.end394
+	br i1 undef, label %if.end498, label %if.then406
+
+if.then406:		; preds = %land.lhs.true399
+	unreachable
+
+if.end498:		; preds = %land.lhs.true399, %if.end394
+	br i1 undef, label %if.end514, label %if.then503
+
+if.then503:		; preds = %if.end498
+	unreachable
+
+if.end514:		; preds = %if.end498
+	br i1 undef, label %if.end585, label %if.then520
+
+if.then520:		; preds = %if.end514
+	br i1 undef, label %lor.lhs.false547, label %if.then560
+
+lor.lhs.false547:		; preds = %if.then520
+	unreachable
+
+if.then560:		; preds = %if.then520
+	br i1 undef, label %if.end585, label %land.lhs.true566
+
+land.lhs.true566:		; preds = %if.then560
+	br i1 undef, label %if.end585, label %if.then573
+
+if.then573:		; preds = %land.lhs.true566
+	unreachable
+
+if.end585:		; preds = %land.lhs.true566, %if.then560, %if.end514
+	br i1 undef, label %cond.true593, label %cond.false599
+
+cond.true593:		; preds = %if.end585
+	unreachable
+
+cond.false599:		; preds = %if.end585
+	br i1 undef, label %if.end647, label %if.then621
+
+if.then621:		; preds = %cond.false599
+	br i1 undef, label %cond.true624, label %cond.false630
+
+cond.true624:		; preds = %if.then621
+	br label %if.end647
+
+cond.false630:		; preds = %if.then621
+	unreachable
+
+if.end647:		; preds = %cond.true624, %cond.false599
+	br i1 undef, label %if.end723, label %if.then701
+
+if.then701:		; preds = %if.end647
+	br label %if.end723
+
+if.end723:		; preds = %if.then701, %if.end647
+	br i1 undef, label %if.else1090, label %if.then729
+
+if.then729:		; preds = %if.end723
+	br i1 undef, label %if.end887, label %if.then812
+
+if.then812:		; preds = %if.then729
+	unreachable
+
+if.end887:		; preds = %if.then729
+	br i1 undef, label %if.end972, label %if.then893
+
+if.then893:		; preds = %if.end887
+	br i1 undef, label %if.end919, label %if.then903
+
+if.then903:		; preds = %if.then893
+	unreachable
+
+if.end919:		; preds = %if.then893
+	br label %if.end972
+
+if.end972:		; preds = %if.end919, %if.end887
+	%sline.0 = phi %struct.cline* [ undef, %if.end919 ], [ null, %if.end887 ]		; <%struct.cline*> [#uses=5]
+	%bcs.0 = phi i32 [ undef, %if.end919 ], [ 0, %if.end887 ]		; <i32> [#uses=5]
+	br i1 undef, label %if.end1146, label %land.lhs.true975
+
+land.lhs.true975:		; preds = %if.end972
+	br i1 undef, label %if.end1146, label %if.then980
+
+if.then980:		; preds = %land.lhs.true975
+	br i1 undef, label %cond.false1025, label %cond.false1004
+
+cond.false1004:		; preds = %if.then980
+	unreachable
+
+cond.false1025:		; preds = %if.then980
+	br i1 undef, label %if.end1146, label %if.then1071
+
+if.then1071:		; preds = %cond.false1025
+	br i1 undef, label %if.then1074, label %if.end1081
+
+if.then1074:		; preds = %if.then1071
+	br label %if.end1081
+
+if.end1081:		; preds = %if.then1074, %if.then1071
+	%call1083 = call %struct.patprog* @patcompile(i8* undef, i32 0, i8** null) nounwind ssp		; <%struct.patprog*> [#uses=2]
+	br i1 undef, label %if.end1146, label %if.then1086
+
+if.then1086:		; preds = %if.end1081
+	br label %if.end1146
+
+if.else1090:		; preds = %if.end723
+	br i1 undef, label %if.end1146, label %land.lhs.true1093
+
+land.lhs.true1093:		; preds = %if.else1090
+	br i1 undef, label %if.end1146, label %if.then1098
+
+if.then1098:		; preds = %land.lhs.true1093
+	unreachable
+
+if.end1146:		; preds = %land.lhs.true1093, %if.else1090, %if.then1086, %if.end1081, %cond.false1025, %land.lhs.true975, %if.end972
+	%cp.0 = phi %struct.patprog* [ %call1083, %if.then1086 ], [ null, %if.end972 ], [ null, %land.lhs.true975 ], [ null, %cond.false1025 ], [ %call1083, %if.end1081 ], [ null, %if.else1090 ], [ null, %land.lhs.true1093 ]		; <%struct.patprog*> [#uses=1]
+	%sline.1 = phi %struct.cline* [ %sline.0, %if.then1086 ], [ %sline.0, %if.end972 ], [ %sline.0, %land.lhs.true975 ], [ %sline.0, %cond.false1025 ], [ %sline.0, %if.end1081 ], [ null, %if.else1090 ], [ null, %land.lhs.true1093 ]		; <%struct.cline*> [#uses=1]
+	%bcs.1 = phi i32 [ %bcs.0, %if.then1086 ], [ %bcs.0, %if.end972 ], [ %bcs.0, %land.lhs.true975 ], [ %bcs.0, %cond.false1025 ], [ %bcs.0, %if.end1081 ], [ 0, %if.else1090 ], [ 0, %land.lhs.true1093 ]		; <i32> [#uses=1]
+	br i1 undef, label %if.end1307, label %do.body1270
+
+do.body1270:		; preds = %if.end1146
+	unreachable
+
+if.end1307:		; preds = %if.end1146
+	br i1 undef, label %if.end1318, label %if.then1312
+
+if.then1312:		; preds = %if.end1307
+	unreachable
+
+if.end1318:		; preds = %if.end1307
+	br i1 undef, label %for.cond1330.preheader, label %if.then1323
+
+if.then1323:		; preds = %if.end1318
+	unreachable
+
+for.cond1330.preheader:		; preds = %if.end1318
+	%call1587 = call i8* @comp_match(i8* undef, i8* undef, i8* undef, %struct.patprog* %cp.0, %struct.cline** undef, i32 0, %struct.brinfo** undef, i32 0, %struct.brinfo** undef, i32 %bcs.1, i32* undef) nounwind ssp		; <i8*> [#uses=0]
+	%call1667 = call %struct.cmatch* @add_match_data(i32 0, i8* undef, i8* undef, %struct.cline* undef, i8* undef, i8* null, i8* undef, i8* undef, i8* undef, i8* undef, %struct.cline* null, i8* undef, %struct.cline* %sline.1, i8* undef, i32 undef, i32 undef) ssp		; <%struct.cmatch*> [#uses=0]
+	unreachable
+}
+
+declare %struct.patprog* @patcompile(i8*, i32, i8**) ssp
+
+declare i8* @comp_match(i8*, i8*, i8*, %struct.patprog*, %struct.cline**, i32, %struct.brinfo**, i32, %struct.brinfo**, i32, i32*) ssp
+
+declare %struct.cmatch* @add_match_data(i32, i8*, i8*, %struct.cline*, i8*, i8*, i8*, i8*, i8*, i8*, %struct.cline*, i8*, %struct.cline*, i8*, i32, i32) nounwind ssp
diff --git a/final/test/CodeGen/X86/2009-07-17-StackColoringBug.ll b/final/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
new file mode 100644
index 00000000000..3e5bd348ecd
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-07-17-StackColoringBug.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -disable-fp-elim -color-ss-with-regs | not grep dil
+; PR4552
+
+target triple = "i386-pc-linux-gnu"
+@g_8 = internal global i32 0		; <i32*> [#uses=1]
+@g_72 = internal global i32 0		; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32, i8, i8)* @uint84 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @uint84(i32 %p_15, i8 signext %p_17, i8 signext %p_19) nounwind {
+entry:
+	%g_72.promoted = load i32* @g_72		; <i32> [#uses=1]
+	%g_8.promoted = load i32* @g_8		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %func_40.exit, %entry
+	%g_8.tmp.1 = phi i32 [ %g_8.promoted, %entry ], [ %g_8.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
+	%g_72.tmp.1 = phi i32 [ %g_72.promoted, %entry ], [ %g_72.tmp.0, %func_40.exit ]		; <i32> [#uses=3]
+	%retval12.i4.i.i = trunc i32 %g_8.tmp.1 to i8		; <i8> [#uses=2]
+	%0 = trunc i32 %g_72.tmp.1 to i8		; <i8> [#uses=2]
+	%1 = mul i8 %retval12.i4.i.i, %0		; <i8> [#uses=1]
+	%2 = icmp eq i8 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb2.i.i, label %bb.i.i
+
+bb.i.i:		; preds = %bb
+	%3 = sext i8 %0 to i32		; <i32> [#uses=1]
+	%4 = and i32 %3, 50295		; <i32> [#uses=1]
+	%5 = icmp eq i32 %4, 0		; <i1> [#uses=1]
+	br i1 %5, label %bb2.i.i, label %func_55.exit.i
+
+bb2.i.i:		; preds = %bb.i.i, %bb
+	br label %func_55.exit.i
+
+func_55.exit.i:		; preds = %bb2.i.i, %bb.i.i
+	%g_72.tmp.2 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
+	%6 = phi i32 [ 1, %bb2.i.i ], [ %g_72.tmp.1, %bb.i.i ]		; <i32> [#uses=1]
+	%7 = trunc i32 %6 to i8		; <i8> [#uses=2]
+	%8 = mul i8 %7, %retval12.i4.i.i		; <i8> [#uses=1]
+	%9 = icmp eq i8 %8, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb2.i4.i, label %bb.i3.i
+
+bb.i3.i:		; preds = %func_55.exit.i
+	%10 = sext i8 %7 to i32		; <i32> [#uses=1]
+	%11 = and i32 %10, 50295		; <i32> [#uses=1]
+	%12 = icmp eq i32 %11, 0		; <i1> [#uses=1]
+	br i1 %12, label %bb2.i4.i, label %func_40.exit
+
+bb2.i4.i:		; preds = %bb.i3.i, %func_55.exit.i
+	br label %func_40.exit
+
+func_40.exit:		; preds = %bb2.i4.i, %bb.i3.i
+	%g_72.tmp.0 = phi i32 [ 1, %bb2.i4.i ], [ %g_72.tmp.2, %bb.i3.i ]		; <i32> [#uses=1]
+	%phitmp = icmp sgt i32 %g_8.tmp.1, 0		; <i1> [#uses=1]
+	%g_8.tmp.0 = select i1 %phitmp, i32 %g_8.tmp.1, i32 1		; <i32> [#uses=1]
+	br label %bb
+}
diff --git a/final/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll b/final/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll
new file mode 100644
index 00000000000..a0095ab2064
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-07-19-AsmExtraOperands.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64
+; PR4583
+
+define i32 @atomic_cmpset_long(i64* %dst, i64 %exp, i64 %src) nounwind ssp noredzone noimplicitfloat {
+entry:
+	%0 = call i8 asm sideeffect "\09lock ; \09\09\09cmpxchgq $2,$1 ;\09       sete\09$0 ;\09\091:\09\09\09\09# atomic_cmpset_long", "={ax},=*m,r,{ax},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i64* undef, i64 undef, i64 undef, i64* undef) nounwind		; <i8> [#uses=0]
+	br label %1
+
+; <label>:1		; preds = %entry
+	ret i32 undef
+}
diff --git a/final/test/CodeGen/X86/2009-07-20-CoalescerBug.ll b/final/test/CodeGen/X86/2009-07-20-CoalescerBug.ll
new file mode 100644
index 00000000000..e99edd60bd5
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-07-20-CoalescerBug.ll
@@ -0,0 +1,165 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; PR4587
+; rdar://7072590
+
+	%struct.re_pattern_buffer = type <{ i8*, i64, i64, i64, i8*, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8 }>
+
+define fastcc i32 @regex_compile(i8* %pattern, i64 %size, i64 %syntax, %struct.re_pattern_buffer* nocapture %bufp) nounwind ssp {
+entry:
+	br i1 undef, label %return, label %if.end
+
+if.end:		; preds = %entry
+	%tmp35 = getelementptr %struct.re_pattern_buffer* %bufp, i64 0, i32 3		; <i64*> [#uses=1]
+	store i64 %syntax, i64* %tmp35
+	store i32 undef, i32* undef
+	br i1 undef, label %if.then66, label %if.end102
+
+if.then66:		; preds = %if.end
+	br i1 false, label %if.else, label %if.then70
+
+if.then70:		; preds = %if.then66
+	%call74 = call i8* @xrealloc(i8* undef, i64 32) nounwind ssp		; <i8*> [#uses=0]
+	unreachable
+
+if.else:		; preds = %if.then66
+	br i1 false, label %do.body86, label %if.end99
+
+do.body86:		; preds = %if.else
+	br i1 false, label %do.end, label %if.then90
+
+if.then90:		; preds = %do.body86
+	unreachable
+
+do.end:		; preds = %do.body86
+	ret i32 12
+
+if.end99:		; preds = %if.else
+	br label %if.end102
+
+if.end102:		; preds = %if.end99, %if.end
+	br label %while.body
+
+while.body:		; preds = %if.end1126, %sw.bb532, %while.body, %if.end102
+	%laststart.2 = phi i8* [ null, %if.end102 ], [ %laststart.7.ph, %if.end1126 ], [ %laststart.2, %sw.bb532 ], [ %laststart.2, %while.body ]		; <i8*> [#uses=6]
+	%b.1 = phi i8* [ undef, %if.end102 ], [ %ctg29688, %if.end1126 ], [ %b.1, %sw.bb532 ], [ %b.1, %while.body ]		; <i8*> [#uses=5]
+	br i1 undef, label %while.body, label %if.end127
+
+if.end127:		; preds = %while.body
+	switch i32 undef, label %sw.bb532 [
+		i32 123, label %handle_interval
+		i32 92, label %do.body3527
+	]
+
+sw.bb532:		; preds = %if.end127
+	br i1 undef, label %while.body, label %if.end808
+
+if.end808:		; preds = %sw.bb532
+	br i1 undef, label %while.cond1267.preheader, label %if.then811
+
+while.cond1267.preheader:		; preds = %if.end808
+	br i1 false, label %return, label %if.end1294
+
+if.then811:		; preds = %if.end808
+	%call817 = call fastcc i8* @skip_one_char(i8* %laststart.2) ssp		; <i8*> [#uses=0]
+	br i1 undef, label %cond.end834, label %lor.lhs.false827
+
+lor.lhs.false827:		; preds = %if.then811
+	br label %cond.end834
+
+cond.end834:		; preds = %lor.lhs.false827, %if.then811
+	br i1 undef, label %land.lhs.true838, label %while.cond979.preheader
+
+land.lhs.true838:		; preds = %cond.end834
+	br i1 undef, label %if.then842, label %while.cond979.preheader
+
+if.then842:		; preds = %land.lhs.true838
+	%conv851 = trunc i64 undef to i32		; <i32> [#uses=1]
+	br label %while.cond979.preheader
+
+while.cond979.preheader:		; preds = %if.then842, %land.lhs.true838, %cond.end834
+	%startoffset.0.ph = phi i32 [ 0, %cond.end834 ], [ 0, %land.lhs.true838 ], [ %conv851, %if.then842 ]		; <i32> [#uses=2]
+	%laststart.7.ph = phi i8* [ %laststart.2, %cond.end834 ], [ %laststart.2, %land.lhs.true838 ], [ %laststart.2, %if.then842 ]		; <i8*> [#uses=3]
+	%b.4.ph = phi i8* [ %b.1, %cond.end834 ], [ %b.1, %land.lhs.true838 ], [ %b.1, %if.then842 ]		; <i8*> [#uses=3]
+	%ctg29688 = getelementptr i8* %b.4.ph, i64 6		; <i8*> [#uses=1]
+	br label %while.cond979
+
+while.cond979:		; preds = %if.end1006, %while.cond979.preheader
+	%cmp991 = icmp ugt i64 undef, 0		; <i1> [#uses=1]
+	br i1 %cmp991, label %do.body994, label %while.end1088
+
+do.body994:		; preds = %while.cond979
+	br i1 undef, label %return, label %if.end1006
+
+if.end1006:		; preds = %do.body994
+	%cmp1014 = icmp ugt i64 undef, 32768		; <i1> [#uses=1]
+	%storemerge10953 = select i1 %cmp1014, i64 32768, i64 undef		; <i64> [#uses=1]
+	store i64 %storemerge10953, i64* undef
+	br i1 false, label %return, label %while.cond979
+
+while.end1088:		; preds = %while.cond979
+	br i1 undef, label %if.then1091, label %if.else1101
+
+if.then1091:		; preds = %while.end1088
+	store i8 undef, i8* undef
+	%idx.ext1132.pre = zext i32 %startoffset.0.ph to i64		; <i64> [#uses=1]
+	%add.ptr1133.pre = getelementptr i8* %laststart.7.ph, i64 %idx.ext1132.pre		; <i8*> [#uses=1]
+	%sub.ptr.lhs.cast1135.pre = ptrtoint i8* %add.ptr1133.pre to i64		; <i64> [#uses=1]
+	br label %if.end1126
+
+if.else1101:		; preds = %while.end1088
+	%cond1109 = select i1 undef, i32 18, i32 14		; <i32> [#uses=1]
+	%idx.ext1112 = zext i32 %startoffset.0.ph to i64		; <i64> [#uses=1]
+	%add.ptr1113 = getelementptr i8* %laststart.7.ph, i64 %idx.ext1112		; <i8*> [#uses=2]
+	%sub.ptr.rhs.cast1121 = ptrtoint i8* %add.ptr1113 to i64		; <i64> [#uses=1]
+	call fastcc void @insert_op1(i32 %cond1109, i8* %add.ptr1113, i32 undef, i8* %b.4.ph) ssp
+	br label %if.end1126
+
+if.end1126:		; preds = %if.else1101, %if.then1091
+	%sub.ptr.lhs.cast1135.pre-phi = phi i64 [ %sub.ptr.rhs.cast1121, %if.else1101 ], [ %sub.ptr.lhs.cast1135.pre, %if.then1091 ]		; <i64> [#uses=1]
+	%add.ptr1128 = getelementptr i8* %b.4.ph, i64 3		; <i8*> [#uses=1]
+	%sub.ptr.rhs.cast1136 = ptrtoint i8* %add.ptr1128 to i64		; <i64> [#uses=1]
+	%sub.ptr.sub1137 = sub i64 %sub.ptr.lhs.cast1135.pre-phi, %sub.ptr.rhs.cast1136		; <i64> [#uses=1]
+	%sub.ptr.sub11378527 = trunc i64 %sub.ptr.sub1137 to i32		; <i32> [#uses=1]
+	%conv1139 = add i32 %sub.ptr.sub11378527, -3		; <i32> [#uses=1]
+	store i8 undef, i8* undef
+	%shr10.i8599 = lshr i32 %conv1139, 8		; <i32> [#uses=1]
+	%conv6.i8600 = trunc i32 %shr10.i8599 to i8		; <i8> [#uses=1]
+	store i8 %conv6.i8600, i8* undef
+	br label %while.body
+
+if.end1294:		; preds = %while.cond1267.preheader
+	ret i32 12
+
+do.body3527:		; preds = %if.end127
+	br i1 undef, label %do.end3536, label %if.then3531
+
+if.then3531:		; preds = %do.body3527
+	unreachable
+
+do.end3536:		; preds = %do.body3527
+	ret i32 5
+
+handle_interval:		; preds = %if.end127
+	br i1 undef, label %do.body4547, label %cond.false4583
+
+do.body4547:		; preds = %handle_interval
+	br i1 undef, label %do.end4556, label %if.then4551
+
+if.then4551:		; preds = %do.body4547
+	unreachable
+
+do.end4556:		; preds = %do.body4547
+	ret i32 9
+
+cond.false4583:		; preds = %handle_interval
+	unreachable
+
+return:		; preds = %if.end1006, %do.body994, %while.cond1267.preheader, %entry
+	ret i32 undef
+}
+
+declare i8* @xrealloc(i8*, i64) ssp
+
+declare fastcc i8* @skip_one_char(i8*) nounwind readonly ssp
+
+declare fastcc void @insert_op1(i32, i8*, i32, i8*) nounwind ssp
diff --git a/final/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll b/final/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
new file mode 100644
index 00000000000..e83b3a7db59
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-07-20-DAGCombineBug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86
+
+@bsBuff = internal global i32 0		; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @bsGetUInt32 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define fastcc i32 @bsGetUInt32() nounwind ssp {
+entry:
+	%bsBuff.promoted44 = load i32* @bsBuff		; <i32> [#uses=1]
+	%0 = add i32 0, -8		; <i32> [#uses=1]
+	%1 = lshr i32 %bsBuff.promoted44, %0		; <i32> [#uses=1]
+	%2 = shl i32 %1, 8		; <i32> [#uses=1]
+	br label %bb3.i17
+
+bb3.i9:		; preds = %bb3.i17
+	br i1 false, label %bb2.i16, label %bb1.i15
+
+bb1.i15:		; preds = %bb3.i9
+	unreachable
+
+bb2.i16:		; preds = %bb3.i9
+	br label %bb3.i17
+
+bb3.i17:		; preds = %bb2.i16, %entry
+	br i1 false, label %bb3.i9, label %bsR.exit18
+
+bsR.exit18:		; preds = %bb3.i17
+	%3 = or i32 0, %2		; <i32> [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll b/final/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
new file mode 100644
index 00000000000..288eef4f699
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-08-02-mmx-scalar-to-vector.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64
+; PR4669
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32)
+
+define <1 x i64> @test(i64 %t) {
+entry:
+	%t1 = insertelement <1 x i64> undef, i64 %t, i32 0
+        %t0 = bitcast <1 x i64> %t1 to x86_mmx
+	%t2 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %t0, i32 48)
+        %t3 = bitcast x86_mmx %t2 to <1 x i64>
+	ret <1 x i64> %t3
+}
diff --git a/final/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll b/final/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
new file mode 100644
index 00000000000..b329c9163c9
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -O3
+; PR4626
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@g_3 = common global i8 0, align 1		; <i8*> [#uses=2]
+
+define signext i8 @safe_mul_func_int16_t_s_s(i32 %_si1, i8 signext %_si2) nounwind readnone {
+entry:
+	%tobool = icmp eq i32 %_si1, 0		; <i1> [#uses=1]
+	%cmp = icmp sgt i8 %_si2, 0		; <i1> [#uses=2]
+	%or.cond = or i1 %cmp, %tobool		; <i1> [#uses=1]
+	br i1 %or.cond, label %lor.rhs, label %land.lhs.true3
+
+land.lhs.true3:		; preds = %entry
+	%conv5 = sext i8 %_si2 to i32		; <i32> [#uses=1]
+	%cmp7 = icmp slt i32 %conv5, %_si1		; <i1> [#uses=1]
+	br i1 %cmp7, label %cond.end, label %lor.rhs
+
+lor.rhs:		; preds = %land.lhs.true3, %entry
+	%cmp10.not = icmp slt i32 %_si1, 1		; <i1> [#uses=1]
+	%or.cond23 = and i1 %cmp, %cmp10.not		; <i1> [#uses=1]
+	br i1 %or.cond23, label %lor.end, label %cond.false
+
+lor.end:		; preds = %lor.rhs
+	%tobool19 = icmp ne i8 %_si2, 0		; <i1> [#uses=2]
+	%lor.ext = zext i1 %tobool19 to i32		; <i32> [#uses=1]
+	br i1 %tobool19, label %cond.end, label %cond.false
+
+cond.false:		; preds = %lor.end, %lor.rhs
+	%conv21 = sext i8 %_si2 to i32		; <i32> [#uses=1]
+	br label %cond.end
+
+cond.end:		; preds = %cond.false, %lor.end, %land.lhs.true3
+	%cond = phi i32 [ %conv21, %cond.false ], [ 1, %land.lhs.true3 ], [ %lor.ext, %lor.end ]		; <i32> [#uses=1]
+	%conv22 = trunc i32 %cond to i8		; <i8> [#uses=1]
+	ret i8 %conv22
+}
+
+define i32 @func_34(i8 signext %p_35) nounwind readonly {
+entry:
+	%tobool = icmp eq i8 %p_35, 0		; <i1> [#uses=1]
+	br i1 %tobool, label %lor.lhs.false, label %if.then
+
+lor.lhs.false:		; preds = %entry
+	%tmp1 = load i8* @g_3		; <i8> [#uses=1]
+	%tobool3 = icmp eq i8 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tobool3, label %return, label %if.then
+
+if.then:		; preds = %lor.lhs.false, %entry
+	%tmp4 = load i8* @g_3		; <i8> [#uses=1]
+	%conv5 = sext i8 %tmp4 to i32		; <i32> [#uses=1]
+	ret i32 %conv5
+
+return:		; preds = %lor.lhs.false
+	ret i32 0
+}
+
+define void @foo(i32 %p_5) noreturn nounwind {
+entry:
+	%cmp = icmp sgt i32 %p_5, 0		; <i1> [#uses=2]
+	%call = tail call i32 @safe() nounwind		; <i32> [#uses=1]
+	%conv1 = trunc i32 %call to i8		; <i8> [#uses=3]
+	%tobool.i = xor i1 %cmp, true		; <i1> [#uses=3]
+	%cmp.i = icmp sgt i8 %conv1, 0		; <i1> [#uses=3]
+	%or.cond.i = or i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	br i1 %or.cond.i, label %lor.rhs.i, label %land.lhs.true3.i
+
+land.lhs.true3.i:		; preds = %entry
+	%xor = zext i1 %cmp to i32		; <i32> [#uses=1]
+	%conv5.i = sext i8 %conv1 to i32		; <i32> [#uses=1]
+	%cmp7.i = icmp slt i32 %conv5.i, %xor		; <i1> [#uses=1]
+	%cmp7.i.not = xor i1 %cmp7.i, true		; <i1> [#uses=1]
+	%or.cond23.i = and i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	%or.cond = and i1 %cmp7.i.not, %or.cond23.i		; <i1> [#uses=1]
+	br i1 %or.cond, label %lor.end.i, label %for.inc
+
+lor.rhs.i:		; preds = %entry
+	%or.cond23.i.old = and i1 %cmp.i, %tobool.i		; <i1> [#uses=1]
+	br i1 %or.cond23.i.old, label %lor.end.i, label %for.inc
+
+lor.end.i:		; preds = %lor.rhs.i, %land.lhs.true3.i
+	%tobool19.i = icmp eq i8 %conv1, 0		; <i1> [#uses=0]
+	br label %for.inc
+
+for.inc:		; preds = %for.inc, %lor.end.i, %lor.rhs.i, %land.lhs.true3.i
+	br label %for.inc
+}
+
+declare i32 @safe()
+
+define i32 @func_35(i8 signext %p_35) nounwind readonly {
+entry:
+  %tobool = icmp eq i8 %p_35, 0                   ; <i1> [#uses=1]
+  br i1 %tobool, label %lor.lhs.false, label %if.then
+
+lor.lhs.false:                                    ; preds = %entry
+  %tmp1 = load i8* @g_3                           ; <i8> [#uses=1]
+  %tobool3 = icmp eq i8 %tmp1, 0                  ; <i1> [#uses=1]
+  br i1 %tobool3, label %return, label %if.then
+
+if.then:                                          ; preds = %lor.lhs.false, %entry
+  %tmp4 = load i8* @g_3                           ; <i8> [#uses=1]
+  %conv5 = sext i8 %tmp4 to i32                   ; <i32> [#uses=1]
+  ret i32 %conv5
+
+return:                                           ; preds = %lor.lhs.false
+  ret i32 0
+}
+
+define void @bar(i32 %p_5) noreturn nounwind {
+entry:
+  %cmp = icmp sgt i32 %p_5, 0                     ; <i1> [#uses=2]
+  %call = tail call i32 @safe() nounwind          ; <i32> [#uses=1]
+  %conv1 = trunc i32 %call to i8                  ; <i8> [#uses=3]
+  %tobool.i = xor i1 %cmp, true                   ; <i1> [#uses=3]
+  %cmp.i = icmp sgt i8 %conv1, 0                  ; <i1> [#uses=3]
+  %or.cond.i = or i1 %cmp.i, %tobool.i            ; <i1> [#uses=1]
+  br i1 %or.cond.i, label %lor.rhs.i, label %land.lhs.true3.i
+
+land.lhs.true3.i:                                 ; preds = %entry
+  %xor = zext i1 %cmp to i32                      ; <i32> [#uses=1]
+  %conv5.i = sext i8 %conv1 to i32                ; <i32> [#uses=1]
+  %cmp7.i = icmp slt i32 %conv5.i, %xor           ; <i1> [#uses=1]
+  %cmp7.i.not = xor i1 %cmp7.i, true              ; <i1> [#uses=1]
+  %or.cond23.i = and i1 %cmp.i, %tobool.i         ; <i1> [#uses=1]
+  %or.cond = and i1 %cmp7.i.not, %or.cond23.i     ; <i1> [#uses=1]
+  br i1 %or.cond, label %lor.end.i, label %for.inc
+
+lor.rhs.i:                                        ; preds = %entry
+  %or.cond23.i.old = and i1 %cmp.i, %tobool.i     ; <i1> [#uses=1]
+  br i1 %or.cond23.i.old, label %lor.end.i, label %for.inc
+
+lor.end.i:                                        ; preds = %lor.rhs.i, %land.lhs.true3.i
+  %tobool19.i = icmp eq i8 %conv1, 0              ; <i1> [#uses=0]
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.inc, %lor.end.i, %lor.rhs.i, %land.lhs.true3.i
+  br label %for.inc
+}
+
+declare i32 @safe()
diff --git a/final/test/CodeGen/X86/2009-08-06-inlineasm.ll b/final/test/CodeGen/X86/2009-08-06-inlineasm.ll
new file mode 100644
index 00000000000..f9b5f9e0b1f
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-08-06-inlineasm.ll
@@ -0,0 +1,30 @@
+; RUN: false
+; XRUN: llc -mtriple=i386-pc-linux-gnu < %s
+; PR4668
+; XFAIL: *
+; FIXME: If the coalescer happens to coalesce %level.1 with the copy to EAX
+; (for ret) then this will fail to compile. The fundamental problem is
+; once the coalescer fixes a virtual register to physical register we can't
+; evict it. This started passing again due to the changes for PR8969
+; so I've disabled it with a bigger stick.
+
+define i32 @x(i32 %qscale) nounwind {
+entry:
+	%temp_block = alloca [64 x i16], align 16		; <[64 x i16]*> [#uses=0]
+	%tmp = call i32 asm sideeffect "xor %edx, %edx", "={dx},~{dirflag},~{fpsr},~{flags}"() nounwind		; <i32> [#uses=1]
+	br i1 undef, label %if.end78, label %if.then28
+
+if.then28:		; preds = %entry
+	br label %if.end78
+
+if.end78:		; preds = %if.then28, %entry
+	%level.1 = phi i32 [ %tmp, %if.then28 ], [ 0, %entry ]		; <i32> [#uses=1]
+	%add.ptr1 = getelementptr [64 x i16]* null, i32 0, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr2 = getelementptr [64 x i16]* null, i32 1, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr3 = getelementptr [64 x i16]* null, i32 2, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr4 = getelementptr [64 x i16]* null, i32 3, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr5 = getelementptr [64 x i16]* null, i32 4, i32 %qscale		; <i16*> [#uses=1]
+	%add.ptr6 = getelementptr [64 x i16]* null, i32 5, i32 %qscale		; <i16*> [#uses=1]
+	%tmp1 = call i32 asm sideeffect "nop", "={ax},r,r,r,r,r,0,~{dirflag},~{fpsr},~{flags}"(i16* %add.ptr6, i16* %add.ptr5, i16* %add.ptr4, i16* %add.ptr3, i16* %add.ptr2, i16* %add.ptr1) nounwind		; <i32> [#uses=0]
+	ret i32 %level.1
+}
diff --git a/final/test/CodeGen/X86/2009-08-08-CastError.ll b/final/test/CodeGen/X86/2009-08-08-CastError.ll
new file mode 100644
index 00000000000..2dc812dbc62
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-08-08-CastError.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-pc-mingw64 | grep movabsq
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define <4 x float> @RecursiveTestFunc1(i8*) {
+EntryBlock:
+	%1 = call <4 x float> inttoptr (i64 5367207198 to <4 x float> (i8*, float, float, float, float)*)(i8* %0, float 8.000000e+00, float 5.000000e+00, float 3.000000e+00, float 4.000000e+00)		; <<4 x float>> [#uses=1]
+	ret <4 x float> %1
+}
diff --git a/final/test/CodeGen/X86/2009-08-12-badswitch.ll b/final/test/CodeGen/X86/2009-08-12-badswitch.ll
new file mode 100644
index 00000000000..a94fce04ee0
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-08-12-badswitch.ll
@@ -0,0 +1,176 @@
+; RUN: llc < %s | grep LJT
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10"
+
+declare void @f1() nounwind readnone
+declare void @f2() nounwind readnone
+declare void @f3() nounwind readnone
+declare void @f4() nounwind readnone
+declare void @f5() nounwind readnone
+declare void @f6() nounwind readnone
+declare void @f7() nounwind readnone
+declare void @f8() nounwind readnone
+declare void @f9() nounwind readnone
+declare void @f10() nounwind readnone
+declare void @f11() nounwind readnone
+declare void @f12() nounwind readnone
+declare void @f13() nounwind readnone
+declare void @f14() nounwind readnone
+declare void @f15() nounwind readnone
+declare void @f16() nounwind readnone
+declare void @f17() nounwind readnone
+declare void @f18() nounwind readnone
+declare void @f19() nounwind readnone
+declare void @f20() nounwind readnone
+declare void @f21() nounwind readnone
+declare void @f22() nounwind readnone
+declare void @f23() nounwind readnone
+declare void @f24() nounwind readnone
+declare void @f25() nounwind readnone
+declare void @f26() nounwind readnone
+
+define internal fastcc i32 @foo(i64 %bar) nounwind ssp {
+entry:
+        br label %bb49
+
+bb49:
+	switch i64 %bar, label %RETURN [
+		i64 2, label %RRETURN_2
+		i64 3, label %RRETURN_6
+		i64 4, label %RRETURN_7
+		i64 5, label %RRETURN_14
+		i64 6, label %RRETURN_15
+		i64 7, label %RRETURN_16
+		i64 8, label %RRETURN_17
+		i64 9, label %RRETURN_18
+		i64 10, label %RRETURN_19
+		i64 11, label %RRETURN_20
+		i64 12, label %RRETURN_21
+		i64 13, label %RRETURN_22
+		i64 14, label %RRETURN_24
+		i64 15, label %RRETURN_26
+		i64 16, label %RRETURN_27
+		i64 17, label %RRETURN_28
+		i64 18, label %RRETURN_29
+		i64 19, label %RRETURN_30
+		i64 20, label %RRETURN_31
+		i64 21, label %RRETURN_38
+		i64 22, label %RRETURN_40
+		i64 23, label %RRETURN_42
+		i64 24, label %RRETURN_44
+		i64 25, label %RRETURN_48
+		i64 26, label %RRETURN_52
+		i64 27, label %RRETURN_1
+	]
+
+RETURN:
+        call void @f1()
+        br label %EXIT
+
+RRETURN_2:		; preds = %bb49
+        call void @f2()
+        br label %EXIT
+
+RRETURN_6:		; preds = %bb49
+        call void @f2()
+        br label %EXIT
+
+RRETURN_7:		; preds = %bb49
+        call void @f3()
+        br label %EXIT
+
+RRETURN_14:		; preds = %bb49
+        call void @f4()
+        br label %EXIT
+
+RRETURN_15:		; preds = %bb49
+        call void @f5()
+        br label %EXIT
+
+RRETURN_16:		; preds = %bb49
+        call void @f6()
+        br label %EXIT
+
+RRETURN_17:		; preds = %bb49
+        call void @f7()
+        br label %EXIT
+
+RRETURN_18:		; preds = %bb49
+        call void @f8()
+        br label %EXIT
+
+RRETURN_19:		; preds = %bb49
+        call void @f9()
+        br label %EXIT
+
+RRETURN_20:		; preds = %bb49
+        call void @f10()
+        br label %EXIT
+
+RRETURN_21:		; preds = %bb49
+        call void @f11()
+        br label %EXIT
+
+RRETURN_22:		; preds = %bb49
+        call void @f12()
+        br label %EXIT
+
+RRETURN_24:		; preds = %bb49
+        call void @f13()
+        br label %EXIT
+
+RRETURN_26:		; preds = %bb49
+        call void @f14()
+        br label %EXIT
+
+RRETURN_27:		; preds = %bb49
+        call void @f15()
+        br label %EXIT
+
+RRETURN_28:		; preds = %bb49
+        call void @f16()
+        br label %EXIT
+
+RRETURN_29:		; preds = %bb49
+        call void @f17()
+        br label %EXIT
+
+RRETURN_30:		; preds = %bb49
+        call void @f18()
+        br label %EXIT
+
+RRETURN_31:		; preds = %bb49
+        call void @f19()
+        br label %EXIT
+
+RRETURN_38:		; preds = %bb49
+        call void @f20()
+        br label %EXIT
+
+RRETURN_40:		; preds = %bb49
+        call void @f21()
+        br label %EXIT
+
+RRETURN_42:		; preds = %bb49
+        call void @f22()
+        br label %EXIT
+
+RRETURN_44:		; preds = %bb49
+        call void @f23()
+        br label %EXIT
+
+RRETURN_48:		; preds = %bb49
+        call void @f24()
+        br label %EXIT
+
+RRETURN_52:		; preds = %bb49
+        call void @f25()
+        br label %EXIT
+
+RRETURN_1:		; preds = %bb49
+        call void @f26()
+        br label %EXIT
+
+EXIT:
+        ret i32 0
+}
diff --git a/final/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll b/final/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
new file mode 100644
index 00000000000..6b0d6d9790d
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s
+target triple = "x86_64-mingw"
+
+; ModuleID = 'mm.bc'
+	type opaque		; type %0
+	type opaque		; type %1
+
+define internal fastcc float @computeMipmappingRho(%0* %shaderExecutionStatePtr, i32 %index, <4 x float> %texCoord, <4 x float> %texCoordDX, <4 x float> %texCoordDY) readonly {
+indexCheckBlock:
+	%indexCmp = icmp ugt i32 %index, 16		; <i1> [#uses=1]
+	br i1 %indexCmp, label %zeroReturnBlock, label %primitiveTextureFetchBlock
+
+primitiveTextureFetchBlock:		; preds = %indexCheckBlock
+	%pointerArithmeticTmp = bitcast %0* %shaderExecutionStatePtr to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp1 = getelementptr i8* %pointerArithmeticTmp, i64 1808		; <i8*> [#uses=1]
+	%pointerArithmeticTmp2 = bitcast i8* %pointerArithmeticTmp1 to %1**		; <%1**> [#uses=1]
+	%primitivePtr = load %1** %pointerArithmeticTmp2		; <%1*> [#uses=1]
+	%pointerArithmeticTmp3 = bitcast %1* %primitivePtr to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp4 = getelementptr i8* %pointerArithmeticTmp3, i64 19408		; <i8*> [#uses=1]
+	%pointerArithmeticTmp5 = bitcast i8* %pointerArithmeticTmp4 to %1**		; <%1**> [#uses=1]
+	%primitiveTexturePtr = getelementptr %1** %pointerArithmeticTmp5, i32 %index		; <%1**> [#uses=1]
+	%primitiveTexturePtr6 = load %1** %primitiveTexturePtr		; <%1*> [#uses=2]
+	br label %textureCheckBlock
+
+textureCheckBlock:		; preds = %primitiveTextureFetchBlock
+	%texturePtrInt = ptrtoint %1* %primitiveTexturePtr6 to i64		; <i64> [#uses=1]
+	%testTextureNULL = icmp eq i64 %texturePtrInt, 0		; <i1> [#uses=1]
+	br i1 %testTextureNULL, label %zeroReturnBlock, label %rhoCalculateBlock
+
+rhoCalculateBlock:		; preds = %textureCheckBlock
+	%pointerArithmeticTmp7 = bitcast %1* %primitiveTexturePtr6 to i8*		; <i8*> [#uses=1]
+	%pointerArithmeticTmp8 = getelementptr i8* %pointerArithmeticTmp7, i64 640		; <i8*> [#uses=1]
+	%pointerArithmeticTmp9 = bitcast i8* %pointerArithmeticTmp8 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%dimensionsPtr = load <4 x float>* %pointerArithmeticTmp9, align 1		; <<4 x float>> [#uses=2]
+	%texDiffDX = fsub <4 x float> %texCoordDX, %texCoord		; <<4 x float>> [#uses=1]
+	%texDiffDY = fsub <4 x float> %texCoordDY, %texCoord		; <<4 x float>> [#uses=1]
+	%ddx = fmul <4 x float> %texDiffDX, %dimensionsPtr		; <<4 x float>> [#uses=2]
+	%ddx10 = fmul <4 x float> %texDiffDY, %dimensionsPtr		; <<4 x float>> [#uses=2]
+	%ddxSquared = fmul <4 x float> %ddx, %ddx		; <<4 x float>> [#uses=3]
+	%0 = shufflevector <4 x float> %ddxSquared, <4 x float> %ddxSquared, <4 x i32> <i32 1, i32 0, i32 0, i32 0>		; <<4 x float>> [#uses=1]
+	%dxSquared = fadd <4 x float> %ddxSquared, %0		; <<4 x float>> [#uses=1]
+	%1 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %dxSquared)		; <<4 x float>> [#uses=1]
+	%ddySquared = fmul <4 x float> %ddx10, %ddx10		; <<4 x float>> [#uses=3]
+	%2 = shufflevector <4 x float> %ddySquared, <4 x float> %ddySquared, <4 x i32> <i32 1, i32 0, i32 0, i32 0>		; <<4 x float>> [#uses=1]
+	%dySquared = fadd <4 x float> %ddySquared, %2		; <<4 x float>> [#uses=1]
+	%3 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %dySquared)		; <<4 x float>> [#uses=1]
+	%4 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %1, <4 x float> %3)		; <<4 x float>> [#uses=1]
+	%rho = extractelement <4 x float> %4, i32 0		; <float> [#uses=1]
+	ret float %rho
+
+zeroReturnBlock:		; preds = %textureCheckBlock, %indexCheckBlock
+	ret float 0.000000e+00
+}
+
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
diff --git a/final/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll b/final/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
new file mode 100644
index 00000000000..5f6cf3b9e0b
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-08-19-LoadNarrowingMiscompile.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-pc-linux | FileCheck %s
+
+@a = external global i96, align 4
+@b = external global i64, align 8
+
+define void @c() nounwind {
+; CHECK: movl a+8, %eax
+  %srcval1 = load i96* @a, align 4
+  %sroa.store.elt2 = lshr i96 %srcval1, 64
+  %tmp = trunc i96 %sroa.store.elt2 to i64
+; CHECK: movl %eax, b
+; CHECK: movl $0, b+4
+  store i64 %tmp, i64* @b, align 8
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll b/final/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
new file mode 100644
index 00000000000..790fd88c46d
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-08-23-SubRegReuseUndo.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=x86
+; PR4753
+
+; This function has a sub-register reuse undone.
+
+@uint8 = external global i32                      ; <i32*> [#uses=3]
+
+declare signext i8 @foo(i32, i8 signext) nounwind readnone
+
+declare signext i8 @bar(i32, i8 signext) nounwind readnone
+
+define i32 @uint80(i8 signext %p_52) nounwind {
+entry:
+  %0 = sext i8 %p_52 to i16                       ; <i16> [#uses=1]
+  %1 = tail call i32 @func_24(i16 zeroext %0, i8 signext ptrtoint (i8 (i32, i8)* @foo to i8)) nounwind; <i32> [#uses=1]
+  %2 = trunc i32 %1 to i8                         ; <i8> [#uses=1]
+  %3 = or i8 %2, 1                                ; <i8> [#uses=1]
+  %4 = tail call i32 @safe(i32 1) nounwind        ; <i32> [#uses=0]
+  %5 = tail call i32 @func_24(i16 zeroext 0, i8 signext undef) nounwind; <i32> [#uses=1]
+  %6 = trunc i32 %5 to i8                         ; <i8> [#uses=1]
+  %7 = xor i8 %3, %p_52                           ; <i8> [#uses=1]
+  %8 = xor i8 %7, %6                              ; <i8> [#uses=1]
+  %9 = icmp ne i8 %p_52, 0                        ; <i1> [#uses=1]
+  %10 = zext i1 %9 to i8                          ; <i8> [#uses=1]
+  %11 = tail call i32 @func_24(i16 zeroext ptrtoint (i8 (i32, i8)* @bar to i16), i8 signext %10) nounwind; <i32> [#uses=1]
+  %12 = tail call i32 @func_24(i16 zeroext 0, i8 signext 1) nounwind; <i32> [#uses=0]
+  br i1 undef, label %bb2, label %bb
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb, %entry
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %iftmp.2.0 = phi i32 [ 0, %bb2 ], [ 1, %bb ]    ; <i32> [#uses=1]
+  %13 = icmp ne i32 %11, %iftmp.2.0               ; <i1> [#uses=1]
+  %14 = tail call i32 @safe(i32 -2) nounwind      ; <i32> [#uses=0]
+  %15 = zext i1 %13 to i8                         ; <i8> [#uses=1]
+  %16 = tail call signext i8 @func_53(i8 signext undef, i8 signext 1, i8 signext %15, i8 signext %8) nounwind; <i8> [#uses=0]
+  br i1 undef, label %bb5, label %bb4
+
+bb4:                                              ; preds = %bb3
+  %17 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb3
+  %18 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %19 = sext i8 undef to i16                      ; <i16> [#uses=1]
+  %20 = tail call i32 @func_24(i16 zeroext %19, i8 signext 1) nounwind; <i32> [#uses=0]
+  br i1 undef, label %return, label %bb6.preheader
+
+bb6.preheader:                                    ; preds = %bb5
+  %21 = sext i8 %p_52 to i32                      ; <i32> [#uses=1]
+  %22 = volatile load i32* @uint8, align 4        ; <i32> [#uses=0]
+  %23 = tail call i32 (...)* @safefuncts(i32 %21, i32 1) nounwind; <i32> [#uses=0]
+  unreachable
+
+return:                                           ; preds = %bb5
+  ret i32 undef
+}
+
+declare i32 @func_24(i16 zeroext, i8 signext)
+
+declare i32 @safe(i32)
+
+declare signext i8 @func_53(i8 signext, i8 signext, i8 signext, i8 signext)
+
+declare i32 @safefuncts(...)
diff --git a/final/test/CodeGen/X86/2009-08-23-linkerprivate.ll b/final/test/CodeGen/X86/2009-08-23-linkerprivate.ll
new file mode 100644
index 00000000000..90fac15442a
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-08-23-linkerprivate.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | FileCheck %s
+
+; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm'
+
+@"\01l_objc_msgSend_fixup_alloc" = linker_private_weak hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16
+
+; CHECK: .globl l_objc_msgSend_fixup_alloc
+; CHECK: .weak_definition l_objc_msgSend_fixup_alloc
diff --git a/final/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll b/final/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
new file mode 100644
index 00000000000..7b5e871246c
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-09-10-LoadFoldingBug.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim | FileCheck %s
+
+; It's not legal to fold a load from 32-bit stack slot into a 64-bit
+; instruction. If done, the instruction does a 64-bit load and that's not
+; safe. This can happen we a subreg_to_reg 0 has been coalesced. One
+; exception is when the instruction that folds the load is a move, then we
+; can simply turn it into a 32-bit load from the stack slot.
+; rdar://7170444
+
+%struct.ComplexType = type { i32 }
+
+define i32 @t(i32 %clientPort, i32 %pluginID, i32 %requestID, i32 %objectID, i64 %serverIdentifier, i64 %argumentsData, i32 %argumentsLength) ssp {
+entry:
+; CHECK: _t:
+; CHECK: movl 16(%rbp),
+  %0 = zext i32 %argumentsLength to i64           ; <i64> [#uses=1]
+  %1 = zext i32 %clientPort to i64                ; <i64> [#uses=1]
+  %2 = inttoptr i64 %1 to %struct.ComplexType*    ; <%struct.ComplexType*> [#uses=1]
+  %3 = invoke i8* @pluginInstance(i8* undef, i32 %pluginID)
+          to label %invcont unwind label %lpad    ; <i8*> [#uses=1]
+
+invcont:                                          ; preds = %entry
+  %4 = add i32 %requestID, %pluginID              ; <i32> [#uses=0]
+  %5 = invoke zeroext i8 @invoke(i8* %3, i32 %objectID, i8* undef, i64 %argumentsData, i32 %argumentsLength, i64* undef, i32* undef)
+          to label %invcont1 unwind label %lpad   ; <i8> [#uses=0]
+
+invcont1:                                         ; preds = %invcont
+  %6 = getelementptr inbounds %struct.ComplexType* %2, i64 0, i32 0 ; <i32*> [#uses=1]
+  %7 = load i32* %6, align 4                      ; <i32> [#uses=1]
+  invoke void @booleanAndDataReply(i32 %7, i32 undef, i32 %requestID, i32 undef, i64 undef, i32 undef)
+          to label %invcont2 unwind label %lpad
+
+invcont2:                                         ; preds = %invcont1
+  ret i32 0
+
+lpad:                                             ; preds = %invcont1, %invcont, %entry
+  %8 = call i32 @vm_deallocate(i32 undef, i64 0, i64 %0) ; <i32> [#uses=0]
+  unreachable
+}
+
+declare i32 @vm_deallocate(i32, i64, i64)
+
+declare i8* @pluginInstance(i8*, i32)
+
+declare zeroext i8 @invoke(i8*, i32, i8*, i64, i32, i64*, i32*)
+
+declare void @booleanAndDataReply(i32, i32, i32, i32, i64, i32)
diff --git a/final/test/CodeGen/X86/2009-09-10-SpillComments.ll b/final/test/CodeGen/X86/2009-09-10-SpillComments.ll
new file mode 100644
index 00000000000..adac2033604
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-09-10-SpillComments.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s
+
+; This test shouldn't require spills.
+
+; CHECK: pushq
+; CHECK-NOT: $rsp
+; CHECK: popq
+
+	%struct..0anon = type { i32 }
+	%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
+@rtx_format = external global [116 x i8*]		; <[116 x i8*]*> [#uses=1]
+@rtx_length = external global [117 x i32]		; <[117 x i32]*> [#uses=1]
+
+declare %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*, i32)
+
+define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) {
+entry:
+	%tmp2 = icmp eq %struct.rtx_def* %x, null		; <i1> [#uses=1]
+	br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next
+
+cond_next:		; preds = %entry
+	%tmp6 = getelementptr %struct.rtx_def* %x, i32 0, i32 0		; <i16*> [#uses=1]
+	%tmp7 = load i16* %tmp6		; <i16> [#uses=2]
+	%tmp78 = zext i16 %tmp7 to i32		; <i32> [#uses=2]
+	%tmp10 = icmp eq i16 %tmp7, 54		; <i1> [#uses=1]
+	br i1 %tmp10, label %cond_true13, label %cond_next32
+
+cond_true13:		; preds = %cond_next
+	%tmp15 = getelementptr %struct.rtx_def* %x, i32 0, i32 3		; <[1 x %struct..0anon]*> [#uses=1]
+	%tmp1718 = bitcast [1 x %struct..0anon]* %tmp15 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp19 = load %struct.rtx_def** %tmp1718		; <%struct.rtx_def*> [#uses=1]
+	%tmp20 = getelementptr %struct.rtx_def* %tmp19, i32 0, i32 0		; <i16*> [#uses=1]
+	%tmp21 = load i16* %tmp20		; <i16> [#uses=1]
+	%tmp22 = icmp eq i16 %tmp21, 57		; <i1> [#uses=1]
+	br i1 %tmp22, label %cond_true25, label %cond_next32
+
+cond_true25:		; preds = %cond_true13
+	%tmp29 = tail call %struct.rtx_def* @fixup_memory_subreg( %struct.rtx_def* %x, %struct.rtx_def* %insn, i32 1 ) nounwind		; <%struct.rtx_def*> [#uses=1]
+	ret %struct.rtx_def* %tmp29
+
+cond_next32:		; preds = %cond_true13, %cond_next
+	%tmp34 = getelementptr [116 x i8*]* @rtx_format, i32 0, i32 %tmp78		; <i8**> [#uses=1]
+	%tmp35 = load i8** %tmp34, align 4		; <i8*> [#uses=1]
+	%tmp37 = getelementptr [117 x i32]* @rtx_length, i32 0, i32 %tmp78		; <i32*> [#uses=1]
+	%tmp38 = load i32* %tmp37, align 4		; <i32> [#uses=1]
+	%i.011 = add i32 %tmp38, -1		; <i32> [#uses=2]
+	%tmp12513 = icmp sgt i32 %i.011, -1		; <i1> [#uses=1]
+	br i1 %tmp12513, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %bb123, %cond_next32
+	%indvar = phi i32 [ %indvar.next26, %bb123 ], [ 0, %cond_next32 ]		; <i32> [#uses=2]
+	%i.01.0 = sub i32 %i.011, %indvar		; <i32> [#uses=5]
+	%tmp42 = getelementptr i8* %tmp35, i32 %i.01.0		; <i8*> [#uses=2]
+	%tmp43 = load i8* %tmp42		; <i8> [#uses=1]
+	switch i8 %tmp43, label %bb123 [
+		 i8 101, label %cond_true47
+		 i8 69, label %bb105.preheader
+	]
+
+cond_true47:		; preds = %bb
+	%tmp52 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0		; <%struct..0anon*> [#uses=1]
+	%tmp5354 = bitcast %struct..0anon* %tmp52 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp55 = load %struct.rtx_def** %tmp5354		; <%struct.rtx_def*> [#uses=1]
+	%tmp58 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn ) nounwind		; <%struct.rtx_def*> [#uses=1]
+	%tmp62 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0		; <i32*> [#uses=1]
+	%tmp58.c = ptrtoint %struct.rtx_def* %tmp58 to i32		; <i32> [#uses=1]
+	store i32 %tmp58.c, i32* %tmp62
+	%tmp6816 = load i8* %tmp42		; <i8> [#uses=1]
+	%tmp6917 = icmp eq i8 %tmp6816, 69		; <i1> [#uses=1]
+	br i1 %tmp6917, label %bb105.preheader, label %bb123
+
+bb105.preheader:		; preds = %cond_true47, %bb
+	%tmp11020 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0		; <%struct..0anon*> [#uses=1]
+	%tmp11111221 = bitcast %struct..0anon* %tmp11020 to %struct.rtvec_def**		; <%struct.rtvec_def**> [#uses=3]
+	%tmp11322 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=1]
+	%tmp11423 = getelementptr %struct.rtvec_def* %tmp11322, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp11524 = load i32* %tmp11423		; <i32> [#uses=1]
+	%tmp11625 = icmp eq i32 %tmp11524, 0		; <i1> [#uses=1]
+	br i1 %tmp11625, label %bb123, label %bb73
+
+bb73:		; preds = %bb73, %bb105.preheader
+	%j.019 = phi i32 [ %tmp104, %bb73 ], [ 0, %bb105.preheader ]		; <i32> [#uses=3]
+	%tmp81 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=2]
+	%tmp92 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019		; <%struct..0anon*> [#uses=1]
+	%tmp9394 = bitcast %struct..0anon* %tmp92 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
+	%tmp95 = load %struct.rtx_def** %tmp9394		; <%struct.rtx_def*> [#uses=1]
+	%tmp98 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn ) nounwind		; <%struct.rtx_def*> [#uses=1]
+	%tmp101 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0		; <i32*> [#uses=1]
+	%tmp98.c = ptrtoint %struct.rtx_def* %tmp98 to i32		; <i32> [#uses=1]
+	store i32 %tmp98.c, i32* %tmp101
+	%tmp104 = add i32 %j.019, 1		; <i32> [#uses=2]
+	%tmp113 = load %struct.rtvec_def** %tmp11111221		; <%struct.rtvec_def*> [#uses=1]
+	%tmp114 = getelementptr %struct.rtvec_def* %tmp113, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp115 = load i32* %tmp114		; <i32> [#uses=1]
+	%tmp116 = icmp ult i32 %tmp104, %tmp115		; <i1> [#uses=1]
+	br i1 %tmp116, label %bb73, label %bb123
+
+bb123:		; preds = %bb73, %bb105.preheader, %cond_true47, %bb
+	%i.0 = add i32 %i.01.0, -1		; <i32> [#uses=1]
+	%tmp125 = icmp sgt i32 %i.0, -1		; <i1> [#uses=1]
+	%indvar.next26 = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp125, label %bb, label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %bb123, %cond_next32, %entry
+	%UnifiedRetVal = phi %struct.rtx_def* [ null, %entry ], [ %x, %cond_next32 ], [ %x, %bb123 ]		; <%struct.rtx_def*> [#uses=1]
+	ret %struct.rtx_def* %UnifiedRetVal
+}
diff --git a/final/test/CodeGen/X86/2009-09-16-CoalescerBug.ll b/final/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
new file mode 100644
index 00000000000..18b5a179c9e
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-09-16-CoalescerBug.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10
+; PR4910
+
+%0 = type { i32, i32, i32, i32 }
+
+@boot_cpu_id = external global i32                ; <i32*> [#uses=1]
+@cpu_logical = common global i32 0, align 4       ; <i32*> [#uses=1]
+
+define void @topo_probe_0xb() nounwind ssp {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc38, %entry
+  %0 = phi i32 [ 0, %entry ], [ %inc40, %for.inc38 ] ; <i32> [#uses=3]
+  %cmp = icmp slt i32 %0, 3                       ; <i1> [#uses=1]
+  br i1 %cmp, label %for.body, label %for.end41
+
+for.body:                                         ; preds = %for.cond
+  %1 = tail call %0 asm sideeffect "cpuid", "={ax},={bx},={cx},={dx},0,{cx},~{dirflag},~{fpsr},~{flags}"(i32 11, i32 %0) nounwind ; <%0> [#uses=3]
+  %asmresult.i = extractvalue %0 %1, 0            ; <i32> [#uses=1]
+  %asmresult10.i = extractvalue %0 %1, 2          ; <i32> [#uses=1]
+  %and = and i32 %asmresult.i, 31                 ; <i32> [#uses=2]
+  %shr42 = lshr i32 %asmresult10.i, 8             ; <i32> [#uses=1]
+  %and12 = and i32 %shr42, 255                    ; <i32> [#uses=2]
+  %cmp14 = icmp eq i32 %and12, 0                  ; <i1> [#uses=1]
+  br i1 %cmp14, label %for.end41, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %for.body
+  %asmresult9.i = extractvalue %0 %1, 1           ; <i32> [#uses=1]
+  %and7 = and i32 %asmresult9.i, 65535            ; <i32> [#uses=1]
+  %cmp16 = icmp eq i32 %and7, 0                   ; <i1> [#uses=1]
+  br i1 %cmp16, label %for.end41, label %for.cond17.preheader
+
+for.cond17.preheader:                             ; preds = %lor.lhs.false
+  %tmp24 = load i32* @boot_cpu_id                 ; <i32> [#uses=1]
+  %shr26 = ashr i32 %tmp24, %and                  ; <i32> [#uses=1]
+  br label %for.body20
+
+for.body20:                                       ; preds = %for.body20, %for.cond17.preheader
+  %2 = phi i32 [ 0, %for.cond17.preheader ], [ %inc32, %for.body20 ] ; <i32> [#uses=2]
+  %cnt.143 = phi i32 [ 0, %for.cond17.preheader ], [ %inc.cnt.1, %for.body20 ] ; <i32> [#uses=1]
+  %shr23 = ashr i32 %2, %and                      ; <i32> [#uses=1]
+  %cmp27 = icmp eq i32 %shr23, %shr26             ; <i1> [#uses=1]
+  %inc = zext i1 %cmp27 to i32                    ; <i32> [#uses=1]
+  %inc.cnt.1 = add i32 %inc, %cnt.143             ; <i32> [#uses=2]
+  %inc32 = add nsw i32 %2, 1                      ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %inc32, 255             ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body20
+
+for.end:                                          ; preds = %for.body20
+  %cmp34 = icmp eq i32 %and12, 1                  ; <i1> [#uses=1]
+  br i1 %cmp34, label %if.then35, label %for.inc38
+
+if.then35:                                        ; preds = %for.end
+  store i32 %inc.cnt.1, i32* @cpu_logical
+  br label %for.inc38
+
+for.inc38:                                        ; preds = %for.end, %if.then35
+  %inc40 = add nsw i32 %0, 1                      ; <i32> [#uses=1]
+  br label %for.cond
+
+for.end41:                                        ; preds = %lor.lhs.false, %for.body, %for.cond
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2009-09-19-earlyclobber.ll b/final/test/CodeGen/X86/2009-09-19-earlyclobber.ll
new file mode 100644
index 00000000000..4f44caea74c
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-09-19-earlyclobber.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = '4964.c'
+; PR 4964
+; Registers other than RAX, RCX are OK, but they must be different.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+	type { i64, i64 }		; type %0
+
+define i64 @flsst(i64 %find) nounwind ssp {
+entry:
+; CHECK: FOO %rax %rcx
+	%asmtmp = tail call %0 asm sideeffect "FOO $0 $1 $2", "=r,=&r,rm,~{dirflag},~{fpsr},~{flags},~{cc}"(i64 %find) nounwind		; <%0> [#uses=1]
+	%asmresult = extractvalue %0 %asmtmp, 0		; <i64> [#uses=1]
+	ret i64 %asmresult
+}
diff --git a/final/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll b/final/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
new file mode 100644
index 00000000000..80b883582ce
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -relocation-model=pic | FileCheck %s
+
+define void @dot(i16* nocapture %A, i32 %As, i16* nocapture %B, i32 %Bs, i16* nocapture %C, i32 %N) nounwind ssp {
+; CHECK: dot:
+; CHECK: decl %
+; CHECK-NEXT: jne
+entry:
+	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %bb2
+
+bb:		; preds = %bb, %entry
+	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%sum.04 = phi i32 [ 0, %entry ], [ %10, %bb ]		; <i32> [#uses=1]
+	%1 = mul i32 %i.03, %As		; <i32> [#uses=1]
+	%2 = getelementptr i16* %A, i32 %1		; <i16*> [#uses=1]
+	%3 = load i16* %2, align 2		; <i16> [#uses=1]
+	%4 = sext i16 %3 to i32		; <i32> [#uses=1]
+	%5 = mul i32 %i.03, %Bs		; <i32> [#uses=1]
+	%6 = getelementptr i16* %B, i32 %5		; <i16*> [#uses=1]
+	%7 = load i16* %6, align 2		; <i16> [#uses=1]
+	%8 = sext i16 %7 to i32		; <i32> [#uses=1]
+	%9 = mul i32 %8, %4		; <i32> [#uses=1]
+	%10 = add i32 %9, %sum.04		; <i32> [#uses=2]
+	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb1.bb2_crit_edge, label %bb
+
+bb1.bb2_crit_edge:		; preds = %bb
+	%phitmp = trunc i32 %10 to i16		; <i16> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %entry, %bb1.bb2_crit_edge
+	%sum.0.lcssa = phi i16 [ %phitmp, %bb1.bb2_crit_edge ], [ 0, %entry ]		; <i16> [#uses=1]
+	store i16 %sum.0.lcssa, i16* %C, align 2
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2009-09-22-CoalescerBug.ll b/final/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
new file mode 100644
index 00000000000..33f35f881e8
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-09-22-CoalescerBug.ll
@@ -0,0 +1,124 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  br i1 undef, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  ret i32 3
+
+bb1:                                              ; preds = %entry
+  br i1 undef, label %bb3, label %bb2
+
+bb2:                                              ; preds = %bb1
+  ret i32 3
+
+bb3:                                              ; preds = %bb1
+  br i1 undef, label %bb.i18, label %quantum_getwidth.exit
+
+bb.i18:                                           ; preds = %bb.i18, %bb3
+  br i1 undef, label %bb.i18, label %quantum_getwidth.exit
+
+quantum_getwidth.exit:                            ; preds = %bb.i18, %bb3
+  br i1 undef, label %bb4, label %bb6.preheader
+
+bb4:                                              ; preds = %quantum_getwidth.exit
+  unreachable
+
+bb6.preheader:                                    ; preds = %quantum_getwidth.exit
+  br i1 undef, label %bb.i1, label %bb1.i2
+
+bb.i1:                                            ; preds = %bb6.preheader
+  unreachable
+
+bb1.i2:                                           ; preds = %bb6.preheader
+  br i1 undef, label %bb2.i, label %bb3.i4
+
+bb2.i:                                            ; preds = %bb1.i2
+  unreachable
+
+bb3.i4:                                           ; preds = %bb1.i2
+  br i1 undef, label %quantum_new_qureg.exit, label %bb4.i
+
+bb4.i:                                            ; preds = %bb3.i4
+  unreachable
+
+quantum_new_qureg.exit:                           ; preds = %bb3.i4
+  br i1 undef, label %bb9, label %bb11.thread
+
+bb11.thread:                                      ; preds = %quantum_new_qureg.exit
+  %.cast.i = zext i32 undef to i64                ; <i64> [#uses=1]
+  br label %bb.i37
+
+bb9:                                              ; preds = %quantum_new_qureg.exit
+  unreachable
+
+bb.i37:                                           ; preds = %bb.i37, %bb11.thread
+  %0 = load i64* undef, align 8                   ; <i64> [#uses=1]
+  %1 = shl i64 %0, %.cast.i                       ; <i64> [#uses=1]
+  store i64 %1, i64* undef, align 8
+  br i1 undef, label %bb.i37, label %quantum_addscratch.exit
+
+quantum_addscratch.exit:                          ; preds = %bb.i37
+  br i1 undef, label %bb12.preheader, label %bb14
+
+bb12.preheader:                                   ; preds = %quantum_addscratch.exit
+  unreachable
+
+bb14:                                             ; preds = %quantum_addscratch.exit
+  br i1 undef, label %bb17, label %bb.nph
+
+bb.nph:                                           ; preds = %bb14
+  unreachable
+
+bb17:                                             ; preds = %bb14
+  br i1 undef, label %bb1.i7, label %quantum_measure.exit
+
+bb1.i7:                                           ; preds = %bb17
+  br label %quantum_measure.exit
+
+quantum_measure.exit:                             ; preds = %bb1.i7, %bb17
+  switch i32 undef, label %bb21 [
+    i32 -1, label %bb18
+    i32 0, label %bb20
+  ]
+
+bb18:                                             ; preds = %quantum_measure.exit
+  unreachable
+
+bb20:                                             ; preds = %quantum_measure.exit
+  unreachable
+
+bb21:                                             ; preds = %quantum_measure.exit
+  br i1 undef, label %quantum_frac_approx.exit, label %bb1.i
+
+bb1.i:                                            ; preds = %bb21
+  unreachable
+
+quantum_frac_approx.exit:                         ; preds = %bb21
+  br i1 undef, label %bb25, label %bb26
+
+bb25:                                             ; preds = %quantum_frac_approx.exit
+  unreachable
+
+bb26:                                             ; preds = %quantum_frac_approx.exit
+  br i1 undef, label %quantum_gcd.exit, label %bb.i
+
+bb.i:                                             ; preds = %bb.i, %bb26
+  br i1 undef, label %quantum_gcd.exit, label %bb.i
+
+quantum_gcd.exit:                                 ; preds = %bb.i, %bb26
+  br i1 undef, label %bb32, label %bb33
+
+bb32:                                             ; preds = %quantum_gcd.exit
+  br i1 undef, label %bb.i.i, label %quantum_delete_qureg.exit
+
+bb.i.i:                                           ; preds = %bb32
+  ret i32 0
+
+quantum_delete_qureg.exit:                        ; preds = %bb32
+  ret i32 0
+
+bb33:                                             ; preds = %quantum_gcd.exit
+  unreachable
+}
diff --git a/final/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll b/final/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll
new file mode 100644
index 00000000000..d37d4b8bd42
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-09-23-LiveVariablesBug.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+
+; rdar://7247745
+
+%struct._lck_mtx_ = type { %union.anon }
+%struct._lck_rw_t_internal_ = type <{ i16, i8, i8, i32, i32, i32 }>
+%struct.anon = type { i64, i64, [2 x i8], i8, i8, i32 }
+%struct.memory_object = type { i32, i32, %struct.memory_object_pager_ops* }
+%struct.memory_object_control = type { i32, i32, %struct.vm_object* }
+%struct.memory_object_pager_ops = type { void (%struct.memory_object*)*, void (%struct.memory_object*)*, i32 (%struct.memory_object*, %struct.memory_object_control*, i32)*, i32 (%struct.memory_object*)*, i32 (%struct.memory_object*, i64, i32, i32, i32*)*, i32 (%struct.memory_object*, i64, i32, i64*, i32*, i32, i32, i32)*, i32 (%struct.memory_object*, i64, i32)*, i32 (%struct.memory_object*, i64, i64, i32)*, i32 (%struct.memory_object*, i64, i64, i32)*, i32 (%struct.memory_object*, i32)*, i32 (%struct.memory_object*)*, i8* }
+%struct.queue_entry = type { %struct.queue_entry*, %struct.queue_entry* }
+%struct.upl = type { %struct._lck_mtx_, i32, i32, %struct.vm_object*, i64, i32, i64, %struct.vm_object*, i32, i8* }
+%struct.upl_page_info = type <{ i32, i8, [3 x i8] }>
+%struct.vm_object = type { %struct.queue_entry, %struct._lck_rw_t_internal_, i64, %struct.vm_page*, i32, i32, i32, i32, %struct.vm_object*, %struct.vm_object*, i64, %struct.memory_object*, i64, %struct.memory_object_control*, i32, i16, i16, [2 x i8], i8, i8, %struct.queue_entry, %struct.queue_entry, i64, i32, i32, i32, i8*, i64, i8, i8, [2 x i8], %struct.queue_entry }
+%struct.vm_page = type { %struct.queue_entry, %struct.queue_entry, %struct.vm_page*, %struct.vm_object*, i64, [2 x i8], i8, i8, i32, i8, i8, i8, i8, i32 }
+%union.anon = type { %struct.anon }
+
+declare i64 @OSAddAtomic64(i64, i64*) noredzone noimplicitfloat
+
+define i32 @upl_commit_range(%struct.upl* %upl, i32 %offset, i32 %size, i32 %flags, %struct.upl_page_info* %page_list, i32 %count, i32* nocapture %empty) nounwind noredzone noimplicitfloat {
+entry:
+  br i1 undef, label %if.then, label %if.end
+
+if.end:                                           ; preds = %entry
+  br i1 undef, label %if.end143, label %if.then136
+
+if.then136:                                       ; preds = %if.end
+  unreachable
+
+if.end143:                                        ; preds = %if.end
+  br i1 undef, label %if.else155, label %if.then153
+
+if.then153:                                       ; preds = %if.end143
+  br label %while.cond
+
+if.else155:                                       ; preds = %if.end143
+  unreachable
+
+while.cond:                                       ; preds = %if.end1039, %if.then153
+  br i1 undef, label %if.then1138, label %while.body
+
+while.body:                                       ; preds = %while.cond
+  br i1 undef, label %if.end260, label %if.then217
+
+if.then217:                                       ; preds = %while.body
+  br i1 undef, label %if.end260, label %if.then230
+
+if.then230:                                       ; preds = %if.then217
+  br i1 undef, label %if.then246, label %if.end260
+
+if.then246:                                       ; preds = %if.then230
+  br label %if.end260
+
+if.end260:                                        ; preds = %if.then246, %if.then230, %if.then217, %while.body
+  br i1 undef, label %if.end296, label %if.then266
+
+if.then266:                                       ; preds = %if.end260
+  unreachable
+
+if.end296:                                        ; preds = %if.end260
+  br i1 undef, label %if.end1039, label %if.end306
+
+if.end306:                                        ; preds = %if.end296
+  br i1 undef, label %if.end796, label %if.then616
+
+if.then616:                                       ; preds = %if.end306
+  br i1 undef, label %commit_next_page, label %do.body716
+
+do.body716:                                       ; preds = %if.then616
+  %call721 = call i64 @OSAddAtomic64(i64 1, i64* undef) nounwind noredzone noimplicitfloat ; <i64> [#uses=0]
+  call void asm sideeffect "movq\090x0($0),%rdi\0A\09movq\090x8($0),%rsi\0A\09.section __DATA, __data\0A\09.globl __dtrace_probeDOLLAR${:uid}4794___vminfo____pgrec\0A\09__dtrace_probeDOLLAR${:uid}4794___vminfo____pgrec:.quad 1f\0A\09.text\0A\091:nop\0A\09nop\0A\09nop\0A\09", "r,~{memory},~{di},~{si},~{dirflag},~{fpsr},~{flags}"(i64* undef) nounwind
+  br label %commit_next_page
+
+if.end796:                                        ; preds = %if.end306
+  unreachable
+
+commit_next_page:                                 ; preds = %do.body716, %if.then616
+  br i1 undef, label %if.end1039, label %if.then1034
+
+if.then1034:                                      ; preds = %commit_next_page
+  br label %if.end1039
+
+if.end1039:                                       ; preds = %if.then1034, %commit_next_page, %if.end296
+  br label %while.cond
+
+if.then1138:                                      ; preds = %while.cond
+  unreachable
+
+if.then:                                          ; preds = %entry
+  ret i32 4
+}
diff --git a/final/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll b/final/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll
new file mode 100644
index 00000000000..91c5440b278
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll
@@ -0,0 +1,264 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -stats |& grep {machine-licm} | grep 2
+; rdar://7274692
+
+%0 = type { [125 x i32] }
+%1 = type { i32 }
+%struct..5sPragmaType = type { i8*, i32 }
+%struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 }
+%struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* }
+%struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 }
+%struct.AuxData = type { i8*, void (i8*)* }
+%struct.Bitvec = type { i32, i32, i32, %0 }
+%struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* }
+%struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* }
+%struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* }
+%struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* }
+%struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] }
+%struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 }
+%struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 }
+%struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* }
+%struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 }
+%struct.Context = type { i64, i32, %struct.Fifo }
+%struct.CountCtx = type { i64 }
+%struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* }
+%struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* }
+%struct.DbPage = type { %struct.Pager*, i32, %struct.DbPage*, %struct.DbPage*, %struct.PagerLruLink, %struct.DbPage*, i8, i8, i8, i8, i8, i16, %struct.DbPage*, %struct.DbPage*, i8* }
+%struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..5sPragmaType, %struct..5sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 }
+%struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* }
+%struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 }
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct..5sPragmaType, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct..5sPragmaType, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct..5sPragmaType, i32, i64 }
+%struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 }
+%struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* }
+%struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] }
+%struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] }
+%struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* }
+%struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 }
+%struct.IdList = type { %struct..5sPragmaType*, i32, i32 }
+%struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** }
+%struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] }
+%struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* }
+%struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.DbPage*, i32, %struct.MemPage* }
+%struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* }
+%struct.Op = type { i8, i8, i8, i8, i32, i32, i32, %1 }
+%struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.DbPage*, %struct.DbPage*, %struct.DbPage*, i64, i64, i64, i64, i64, i32, void (%struct.DbPage*, i32)*, void (%struct.DbPage*, i32)*, i32, %struct.DbPage**, i8*, [16 x i8] }
+%struct.PagerLruLink = type { %struct.DbPage*, %struct.DbPage* }
+%struct.PagerLruList = type { %struct.DbPage*, %struct.DbPage*, %struct.DbPage* }
+%struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* }
+%struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] }
+%struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] }
+%struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 }
+%struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* }
+%struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..5sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* }
+%struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..5sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* }
+%struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 }
+%struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] }
+%struct._OvflCell = type { i8*, i16 }
+%struct._RuneCharClass = type { [14 x i8], i32 }
+%struct._RuneEntry = type { i32, i32, i32, i32* }
+%struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i32, i8**)*, i32 (i32, i8*, i32, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32, i32, %struct._RuneCharClass* }
+%struct._RuneRange = type { i32, %struct._RuneEntry* }
+%struct.__sFILEX = type opaque
+%struct._ht = type { i32, %struct.HashElem* }
+%struct.callback_data = type { %struct.sqlite3*, i32, i32, %struct.FILE*, i32, i32, i32, i8*, [20 x i8], [100 x i32], [100 x i32], [20 x i8], %struct.previous_mode_data, [1024 x i8], i8* }
+%struct.previous_mode_data = type { i32, i32, i32, [100 x i32] }
+%struct.sColMap = type { i32, i8* }
+%struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %union.anon, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 }
+%struct.sqlite3InitInfo = type { i32, i32, i8 }
+%struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* }
+%struct.sqlite3_file = type { %struct.sqlite3_io_methods* }
+%struct.sqlite3_index_constraint = type { i32, i8, i8, i32 }
+%struct.sqlite3_index_constraint_usage = type { i32, i8 }
+%struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double }
+%struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* }
+%struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* }
+%struct.sqlite3_mutex = type opaque
+%struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* }
+%struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* }
+%struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* }
+%union.anon = type { double }
+
+@_DefaultRuneLocale = external global %struct._RuneLocale ; <%struct._RuneLocale*> [#uses=2]
+@__stderrp = external global %struct.FILE*        ; <%struct.FILE**> [#uses=1]
+@.str10 = internal constant [16 x i8] c"Out of memory!\0A\00", align 1 ; <[16 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.callback_data*, i8*)* @set_table_name to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define fastcc void @set_table_name(%struct.callback_data* nocapture %p, i8* %zName) nounwind ssp {
+entry:
+  %0 = getelementptr inbounds %struct.callback_data* %p, i32 0, i32 7 ; <i8**> [#uses=3]
+  %1 = load i8** %0, align 4                      ; <i8*> [#uses=2]
+  %2 = icmp eq i8* %1, null                       ; <i1> [#uses=1]
+  br i1 %2, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+  free i8* %1
+  store i8* null, i8** %0, align 4
+  br label %bb1
+
+bb1:                                              ; preds = %bb, %entry
+  %3 = icmp eq i8* %zName, null                   ; <i1> [#uses=1]
+  br i1 %3, label %return, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %4 = load i8* %zName, align 1                   ; <i8> [#uses=2]
+  %5 = zext i8 %4 to i32                          ; <i32> [#uses=2]
+  %6 = icmp sgt i8 %4, -1                         ; <i1> [#uses=1]
+  br i1 %6, label %bb.i.i, label %bb1.i.i
+
+bb.i.i:                                           ; preds = %bb2
+  %7 = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i32 0, i32 5, i32 %5 ; <i32*> [#uses=1]
+  %8 = load i32* %7, align 4                      ; <i32> [#uses=1]
+  %9 = and i32 %8, 256                            ; <i32> [#uses=1]
+  br label %isalpha.exit
+
+bb1.i.i:                                          ; preds = %bb2
+  %10 = tail call i32 @__maskrune(i32 %5, i32 256) nounwind ; <i32> [#uses=1]
+  br label %isalpha.exit
+
+isalpha.exit:                                     ; preds = %bb1.i.i, %bb.i.i
+  %storemerge.in.in.i.i = phi i32 [ %9, %bb.i.i ], [ %10, %bb1.i.i ] ; <i32> [#uses=1]
+  %storemerge.in.i.i = icmp eq i32 %storemerge.in.in.i.i, 0 ; <i1> [#uses=1]
+  br i1 %storemerge.in.i.i, label %bb3, label %bb5
+
+bb3:                                              ; preds = %isalpha.exit
+  %11 = load i8* %zName, align 1                  ; <i8> [#uses=2]
+  %12 = icmp eq i8 %11, 95                        ; <i1> [#uses=1]
+  br i1 %12, label %bb5, label %bb12.preheader
+
+bb5:                                              ; preds = %bb3, %isalpha.exit
+  %.pre = load i8* %zName, align 1                ; <i8> [#uses=1]
+  br label %bb12.preheader
+
+bb12.preheader:                                   ; preds = %bb5, %bb3
+  %13 = phi i8 [ %.pre, %bb5 ], [ %11, %bb3 ]     ; <i8> [#uses=1]
+  %needQuote.1.ph = phi i32 [ 0, %bb5 ], [ 1, %bb3 ] ; <i32> [#uses=2]
+  %14 = icmp eq i8 %13, 0                         ; <i1> [#uses=1]
+  br i1 %14, label %bb13, label %bb7
+
+bb7:                                              ; preds = %bb11, %bb12.preheader
+  %i.011 = phi i32 [ %tmp17, %bb11 ], [ 0, %bb12.preheader ] ; <i32> [#uses=2]
+  %n.110 = phi i32 [ %26, %bb11 ], [ 0, %bb12.preheader ] ; <i32> [#uses=3]
+  %needQuote.19 = phi i32 [ %needQuote.0, %bb11 ], [ %needQuote.1.ph, %bb12.preheader ] ; <i32> [#uses=2]
+  %scevgep16 = getelementptr i8* %zName, i32 %i.011 ; <i8*> [#uses=2]
+  %tmp17 = add i32 %i.011, 1                      ; <i32> [#uses=2]
+  %scevgep18 = getelementptr i8* %zName, i32 %tmp17 ; <i8*> [#uses=1]
+  %15 = load i8* %scevgep16, align 1              ; <i8> [#uses=2]
+  %16 = zext i8 %15 to i32                        ; <i32> [#uses=2]
+  %17 = icmp sgt i8 %15, -1                       ; <i1> [#uses=1]
+  br i1 %17, label %bb.i.i2, label %bb1.i.i3
+
+bb.i.i2:                                          ; preds = %bb7
+  %18 = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i32 0, i32 5, i32 %16 ; <i32*> [#uses=1]
+  %19 = load i32* %18, align 4                    ; <i32> [#uses=1]
+  %20 = and i32 %19, 1280                         ; <i32> [#uses=1]
+  br label %isalnum.exit
+
+bb1.i.i3:                                         ; preds = %bb7
+  %21 = tail call i32 @__maskrune(i32 %16, i32 1280) nounwind ; <i32> [#uses=1]
+  br label %isalnum.exit
+
+isalnum.exit:                                     ; preds = %bb1.i.i3, %bb.i.i2
+  %storemerge.in.in.i.i4 = phi i32 [ %20, %bb.i.i2 ], [ %21, %bb1.i.i3 ] ; <i32> [#uses=1]
+  %storemerge.in.i.i5 = icmp eq i32 %storemerge.in.in.i.i4, 0 ; <i1> [#uses=1]
+  br i1 %storemerge.in.i.i5, label %bb8, label %bb11
+
+bb8:                                              ; preds = %isalnum.exit
+  %22 = load i8* %scevgep16, align 1              ; <i8> [#uses=2]
+  %23 = icmp eq i8 %22, 95                        ; <i1> [#uses=1]
+  br i1 %23, label %bb11, label %bb9
+
+bb9:                                              ; preds = %bb8
+  %24 = icmp eq i8 %22, 39                        ; <i1> [#uses=1]
+  %25 = zext i1 %24 to i32                        ; <i32> [#uses=1]
+  %.n.1 = add i32 %n.110, %25                     ; <i32> [#uses=1]
+  br label %bb11
+
+bb11:                                             ; preds = %bb9, %bb8, %isalnum.exit
+  %needQuote.0 = phi i32 [ 1, %bb9 ], [ %needQuote.19, %isalnum.exit ], [ %needQuote.19, %bb8 ] ; <i32> [#uses=2]
+  %n.0 = phi i32 [ %.n.1, %bb9 ], [ %n.110, %isalnum.exit ], [ %n.110, %bb8 ] ; <i32> [#uses=1]
+  %26 = add nsw i32 %n.0, 1                       ; <i32> [#uses=2]
+  %27 = load i8* %scevgep18, align 1              ; <i8> [#uses=1]
+  %28 = icmp eq i8 %27, 0                         ; <i1> [#uses=1]
+  br i1 %28, label %bb13, label %bb7
+
+bb13:                                             ; preds = %bb11, %bb12.preheader
+  %n.1.lcssa = phi i32 [ 0, %bb12.preheader ], [ %26, %bb11 ] ; <i32> [#uses=2]
+  %needQuote.1.lcssa = phi i32 [ %needQuote.1.ph, %bb12.preheader ], [ %needQuote.0, %bb11 ] ; <i32> [#uses=1]
+  %29 = add nsw i32 %n.1.lcssa, 2                 ; <i32> [#uses=1]
+  %30 = icmp eq i32 %needQuote.1.lcssa, 0         ; <i1> [#uses=3]
+  %n.1. = select i1 %30, i32 %n.1.lcssa, i32 %29  ; <i32> [#uses=1]
+  %31 = add nsw i32 %n.1., 1                      ; <i32> [#uses=1]
+  %32 = malloc i8, i32 %31                        ; <i8*> [#uses=7]
+  store i8* %32, i8** %0, align 4
+  %33 = icmp eq i8* %32, null                     ; <i1> [#uses=1]
+  br i1 %33, label %bb16, label %bb17
+
+bb16:                                             ; preds = %bb13
+  %34 = load %struct.FILE** @__stderrp, align 4   ; <%struct.FILE*> [#uses=1]
+  %35 = bitcast %struct.FILE* %34 to i8*          ; <i8*> [#uses=1]
+  %36 = tail call i32 @"\01_fwrite$UNIX2003"(i8* getelementptr inbounds ([16 x i8]* @.str10, i32 0, i32 0), i32 1, i32 15, i8* %35) nounwind ; <i32> [#uses=0]
+  tail call void @exit(i32 1) noreturn nounwind
+  unreachable
+
+bb17:                                             ; preds = %bb13
+  br i1 %30, label %bb23.preheader, label %bb18
+
+bb18:                                             ; preds = %bb17
+  store i8 39, i8* %32, align 4
+  br label %bb23.preheader
+
+bb23.preheader:                                   ; preds = %bb18, %bb17
+  %n.3.ph = phi i32 [ 1, %bb18 ], [ 0, %bb17 ]    ; <i32> [#uses=2]
+  %37 = load i8* %zName, align 1                  ; <i8> [#uses=1]
+  %38 = icmp eq i8 %37, 0                         ; <i1> [#uses=1]
+  br i1 %38, label %bb24, label %bb20
+
+bb20:                                             ; preds = %bb22, %bb23.preheader
+  %storemerge18 = phi i32 [ %tmp, %bb22 ], [ 0, %bb23.preheader ] ; <i32> [#uses=2]
+  %n.37 = phi i32 [ %n.4, %bb22 ], [ %n.3.ph, %bb23.preheader ] ; <i32> [#uses=3]
+  %scevgep = getelementptr i8* %zName, i32 %storemerge18 ; <i8*> [#uses=1]
+  %tmp = add i32 %storemerge18, 1                 ; <i32> [#uses=2]
+  %scevgep15 = getelementptr i8* %zName, i32 %tmp ; <i8*> [#uses=1]
+  %39 = load i8* %scevgep, align 1                ; <i8> [#uses=2]
+  %40 = getelementptr inbounds i8* %32, i32 %n.37 ; <i8*> [#uses=1]
+  store i8 %39, i8* %40, align 1
+  %41 = add nsw i32 %n.37, 1                      ; <i32> [#uses=2]
+  %42 = icmp eq i8 %39, 39                        ; <i1> [#uses=1]
+  br i1 %42, label %bb21, label %bb22
+
+bb21:                                             ; preds = %bb20
+  %43 = getelementptr inbounds i8* %32, i32 %41   ; <i8*> [#uses=1]
+  store i8 39, i8* %43, align 1
+  %44 = add nsw i32 %n.37, 2                      ; <i32> [#uses=1]
+  br label %bb22
+
+bb22:                                             ; preds = %bb21, %bb20
+  %n.4 = phi i32 [ %44, %bb21 ], [ %41, %bb20 ]   ; <i32> [#uses=2]
+  %45 = load i8* %scevgep15, align 1              ; <i8> [#uses=1]
+  %46 = icmp eq i8 %45, 0                         ; <i1> [#uses=1]
+  br i1 %46, label %bb24, label %bb20
+
+bb24:                                             ; preds = %bb22, %bb23.preheader
+  %n.3.lcssa = phi i32 [ %n.3.ph, %bb23.preheader ], [ %n.4, %bb22 ] ; <i32> [#uses=3]
+  br i1 %30, label %bb26, label %bb25
+
+bb25:                                             ; preds = %bb24
+  %47 = getelementptr inbounds i8* %32, i32 %n.3.lcssa ; <i8*> [#uses=1]
+  store i8 39, i8* %47, align 1
+  %48 = add nsw i32 %n.3.lcssa, 1                 ; <i32> [#uses=1]
+  br label %bb26
+
+bb26:                                             ; preds = %bb25, %bb24
+  %n.5 = phi i32 [ %48, %bb25 ], [ %n.3.lcssa, %bb24 ] ; <i32> [#uses=1]
+  %49 = getelementptr inbounds i8* %32, i32 %n.5  ; <i8*> [#uses=1]
+  store i8 0, i8* %49, align 1
+  ret void
+
+return:                                           ; preds = %bb1
+  ret void
+}
+
+declare i32 @"\01_fwrite$UNIX2003"(i8*, i32, i32, i8*)
+
+declare void @exit(i32) noreturn nounwind
+
+declare i32 @__maskrune(i32, i32)
diff --git a/final/test/CodeGen/X86/2009-10-14-LiveVariablesBug.ll b/final/test/CodeGen/X86/2009-10-14-LiveVariablesBug.ll
new file mode 100644
index 00000000000..c1aa17ce870
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-10-14-LiveVariablesBug.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+; rdar://7299435
+
+@i = internal global i32 0                        ; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (i16)* @foo to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define void @foo(i16 signext %source) nounwind ssp {
+entry:
+  %source_addr = alloca i16, align 2              ; <i16*> [#uses=2]
+  store i16 %source, i16* %source_addr
+  store i32 4, i32* @i, align 4
+  call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
+  %asmtmp = call i16 asm sideeffect "movw $1, $0", "=={ax},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i16* %source_addr) nounwind ; <i16> [#uses=0]
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2009-10-16-Scope.ll b/final/test/CodeGen/X86/2009-10-16-Scope.ll
new file mode 100644
index 00000000000..86c20243c87
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-10-16-Scope.ll
@@ -0,0 +1,32 @@
+; RUN: llc %s -O0 -o /dev/null -mtriple=x86_64-apple-darwin
+; PR 5197
+; There is not any llvm instruction assocated with !5. The code generator
+; should be able to handle this.
+
+define void @bar() nounwind ssp {
+entry:
+  %count_ = alloca i32, align 4                   ; <i32*> [#uses=2]
+  br label %do.body, !dbg !0
+
+do.body:                                          ; preds = %entry
+  call void @llvm.dbg.declare(metadata !{i32* %count_}, metadata !4)
+  %conv = ptrtoint i32* %count_ to i32, !dbg !0   ; <i32> [#uses=1]
+  %call = call i32 @foo(i32 %conv) ssp, !dbg !0   ; <i32> [#uses=0]
+  br label %do.end, !dbg !0
+
+do.end:                                           ; preds = %do.body
+  ret void, !dbg !7
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i32 @foo(i32) ssp
+
+!0 = metadata !{i32 5, i32 2, metadata !1, null}
+!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", metadata !3, i32 4, null, i1 false, i1 true}; [DW_TAG_subprogram ]
+!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"genmodes.i", metadata !"/Users/yash/Downloads", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!4 = metadata !{i32 459008, metadata !5, metadata !"count_", metadata !3, i32 5, metadata !6}; [ DW_TAG_auto_variable ]
+!5 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
+!6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
+!7 = metadata !{i32 6, i32 1, metadata !2, null}
diff --git a/final/test/CodeGen/X86/2009-10-19-EmergencySpill.ll b/final/test/CodeGen/X86/2009-10-19-EmergencySpill.ll
new file mode 100644
index 00000000000..ba44a2e64fe
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-10-19-EmergencySpill.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -disable-fp-elim
+; rdar://7291624
+
+%union.RtreeCoord = type { float }
+%struct.RtreeCell = type { i64, [10 x %union.RtreeCoord] }
+%struct.Rtree = type { i32, i32*, i32, i32, i32, i32, i8*, i8* }
+%struct.RtreeNode = type { i32*, i64, i32, i32, i8*, i32* }
+
+define fastcc void @nodeOverwriteCell(%struct.Rtree* nocapture %pRtree, %struct.RtreeNode* nocapture %pNode, %struct.RtreeCell* nocapture %pCell, i32 %iCell) nounwind ssp {
+entry:
+  %0 = load i8** undef, align 8                   ; <i8*> [#uses=2]
+  %1 = load i32* undef, align 8                   ; <i32> [#uses=1]
+  %2 = mul i32 %1, %iCell                         ; <i32> [#uses=1]
+  %3 = add nsw i32 %2, 4                          ; <i32> [#uses=1]
+  %4 = sext i32 %3 to i64                         ; <i64> [#uses=2]
+  %5 = load i64* null, align 8                    ; <i64> [#uses=2]
+  %6 = lshr i64 %5, 48                            ; <i64> [#uses=1]
+  %7 = trunc i64 %6 to i8                         ; <i8> [#uses=1]
+  store i8 %7, i8* undef, align 1
+  %8 = lshr i64 %5, 8                             ; <i64> [#uses=1]
+  %9 = trunc i64 %8 to i8                         ; <i8> [#uses=1]
+  %.sum4 = add i64 %4, 6                          ; <i64> [#uses=1]
+  %10 = getelementptr inbounds i8* %0, i64 %.sum4 ; <i8*> [#uses=1]
+  store i8 %9, i8* %10, align 1
+  %11 = getelementptr inbounds %struct.Rtree* %pRtree, i64 0, i32 3 ; <i32*> [#uses=1]
+  br i1 undef, label %bb.nph, label %bb2
+
+bb.nph:                                           ; preds = %entry
+  %tmp25 = add i64 %4, 11                         ; <i64> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i64> [#uses=3]
+  %scevgep = getelementptr %struct.RtreeCell* %pCell, i64 0, i32 1, i64 %indvar ; <%union.RtreeCoord*> [#uses=1]
+  %scevgep12 = bitcast %union.RtreeCoord* %scevgep to i32* ; <i32*> [#uses=1]
+  %tmp = shl i64 %indvar, 2                       ; <i64> [#uses=1]
+  %tmp26 = add i64 %tmp, %tmp25                   ; <i64> [#uses=1]
+  %scevgep27 = getelementptr i8* %0, i64 %tmp26   ; <i8*> [#uses=1]
+  %12 = load i32* %scevgep12, align 4             ; <i32> [#uses=1]
+  %13 = lshr i32 %12, 24                          ; <i32> [#uses=1]
+  %14 = trunc i32 %13 to i8                       ; <i8> [#uses=1]
+  store i8 %14, i8* undef, align 1
+  store i8 undef, i8* %scevgep27, align 1
+  %15 = load i32* %11, align 4                    ; <i32> [#uses=1]
+  %16 = shl i32 %15, 1                            ; <i32> [#uses=1]
+  %17 = icmp sgt i32 %16, undef                   ; <i1> [#uses=1]
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=1]
+  br i1 %17, label %bb, label %bb2
+
+bb2:                                              ; preds = %bb, %entry
+  %18 = getelementptr inbounds %struct.RtreeNode* %pNode, i64 0, i32 3 ; <i32*> [#uses=1]
+  store i32 1, i32* %18, align 4
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll b/final/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll
new file mode 100644
index 00000000000..d7f0c1afa3b
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-10-19-atomic-cmp-eflags.ll
@@ -0,0 +1,69 @@
+; RUN: llvm-as <%s | llc | FileCheck %s
+; PR 5247
+; check that cmp is not scheduled before the add
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@.str76843 = external constant [45 x i8]          ; <[45 x i8]*> [#uses=1]
+@__profiling_callsite_timestamps_live = external global [1216 x i64] ; <[1216 x i64]*> [#uses=2]
+
+define i32 @cl_init(i32 %initoptions) nounwind {
+entry:
+  %retval.i = alloca i32                          ; <i32*> [#uses=3]
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %initoptions.addr = alloca i32                  ; <i32*> [#uses=2]
+  tail call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind
+  %0 = tail call i64 @llvm.readcyclecounter() nounwind ; <i64> [#uses=1]
+  store i32 %initoptions, i32* %initoptions.addr
+  %1 = bitcast i32* %initoptions.addr to { }*     ; <{ }*> [#uses=0]
+  call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind
+  %2 = call i64 @llvm.readcyclecounter() nounwind ; <i64> [#uses=1]
+  %call.i = call i32 @lt_dlinit() nounwind        ; <i32> [#uses=1]
+  %tobool.i = icmp ne i32 %call.i, 0              ; <i1> [#uses=1]
+  br i1 %tobool.i, label %if.then.i, label %if.end.i
+
+if.then.i:                                        ; preds = %entry
+  %call1.i = call i32 @warn_dlerror(i8* getelementptr inbounds ([45 x i8]* @.str76843, i32 0, i32 0)) nounwind ; <i32> [#uses=0]
+  store i32 -1, i32* %retval.i
+  br label %lt_init.exit
+
+if.end.i:                                         ; preds = %entry
+  store i32 0, i32* %retval.i
+  br label %lt_init.exit
+
+lt_init.exit:                                     ; preds = %if.end.i, %if.then.i
+  %3 = load i32* %retval.i                        ; <i32> [#uses=1]
+  call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind
+  %4 = call i64 @llvm.readcyclecounter() nounwind ; <i64> [#uses=1]
+  %5 = sub i64 %4, %2                             ; <i64> [#uses=1]
+  %6 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* getelementptr inbounds ([1216 x i64]* @__profiling_callsite_timestamps_live, i32 0, i32 51), i64 %5) nounwind ; <i64> [#uses=0]
+;CHECK: lock
+;CHECK-NEXT: {{xadd|addq}} %rdx, __profiling_callsite_timestamps_live
+;CHECK-NEXT: cmpl $0,
+;CHECK-NEXT: jne
+  %cmp = icmp eq i32 %3, 0                        ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %lt_init.exit
+  call void @cli_rarload()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %lt_init.exit
+  store i32 0, i32* %retval
+  %7 = load i32* %retval                          ; <i32> [#uses=1]
+  tail call void asm sideeffect "cpuid", "~{ax},~{bx},~{cx},~{dx},~{memory},~{dirflag},~{fpsr},~{flags}"() nounwind
+  %8 = tail call i64 @llvm.readcyclecounter() nounwind ; <i64> [#uses=1]
+  %9 = sub i64 %8, %0                             ; <i64> [#uses=1]
+  %10 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* getelementptr inbounds ([1216 x i64]* @__profiling_callsite_timestamps_live, i32 0, i32 50), i64 %9) ; <i64> [#uses=0]
+  ret i32 %7
+}
+
+declare void @cli_rarload() nounwind
+
+declare i32 @lt_dlinit()
+
+declare i32 @warn_dlerror(i8*) nounwind
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind
+
+declare i64 @llvm.readcyclecounter() nounwind
diff --git a/final/test/CodeGen/X86/2009-10-25-RewriterBug.ll b/final/test/CodeGen/X86/2009-10-25-RewriterBug.ll
new file mode 100644
index 00000000000..5b4e818359e
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-10-25-RewriterBug.ll
@@ -0,0 +1,171 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim
+
+%struct.DecRefPicMarking_t = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_t* }
+%struct.FrameStore = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.StorablePicture*, %struct.StorablePicture*, %struct.StorablePicture* }
+%struct.StorablePicture = type { i32, i32, i32, i32, i32, [50 x [6 x [33 x i64]]], [50 x [6 x [33 x i64]]], [50 x [6 x [33 x i64]]], [50 x [6 x [33 x i64]]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16**, i16***, i8*, i16**, i8***, i64***, i64***, i16****, i8**, i8**, %struct.StorablePicture*, %struct.StorablePicture*, %struct.StorablePicture*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i32], i32, %struct.DecRefPicMarking_t*, i32 }
+
+define fastcc void @insert_picture_in_dpb(%struct.FrameStore* nocapture %fs, %struct.StorablePicture* %p) nounwind ssp {
+entry:
+  %0 = getelementptr inbounds %struct.FrameStore* %fs, i64 0, i32 12 ; <%struct.StorablePicture**> [#uses=1]
+  %1 = icmp eq i32 undef, 0                       ; <i1> [#uses=1]
+  br i1 %1, label %bb.i, label %bb36.i
+
+bb.i:                                             ; preds = %entry
+  br i1 undef, label %bb3.i, label %bb14.preheader.i
+
+bb3.i:                                            ; preds = %bb.i
+  unreachable
+
+bb14.preheader.i:                                 ; preds = %bb.i
+  br i1 undef, label %bb9.i, label %bb20.preheader.i
+
+bb9.i:                                            ; preds = %bb9.i, %bb14.preheader.i
+  br i1 undef, label %bb9.i, label %bb20.preheader.i
+
+bb20.preheader.i:                                 ; preds = %bb9.i, %bb14.preheader.i
+  br i1 undef, label %bb18.i, label %bb29.preheader.i
+
+bb18.i:                                           ; preds = %bb20.preheader.i
+  unreachable
+
+bb29.preheader.i:                                 ; preds = %bb20.preheader.i
+  br i1 undef, label %bb24.i, label %bb30.i
+
+bb24.i:                                           ; preds = %bb29.preheader.i
+  unreachable
+
+bb30.i:                                           ; preds = %bb29.preheader.i
+  store i32 undef, i32* undef, align 8
+  br label %bb67.preheader.i
+
+bb36.i:                                           ; preds = %entry
+  br label %bb67.preheader.i
+
+bb67.preheader.i:                                 ; preds = %bb36.i, %bb30.i
+  %2 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %3 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %4 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %5 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %6 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %7 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %8 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %9 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %10 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %11 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %12 = phi %struct.StorablePicture* [ null, %bb36.i ], [ undef, %bb30.i ] ; <%struct.StorablePicture*> [#uses=1]
+  br i1 undef, label %bb38.i, label %bb68.i
+
+bb38.i:                                           ; preds = %bb66.i, %bb67.preheader.i
+  %13 = phi %struct.StorablePicture* [ %37, %bb66.i ], [ %2, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %14 = phi %struct.StorablePicture* [ %38, %bb66.i ], [ %3, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %15 = phi %struct.StorablePicture* [ %39, %bb66.i ], [ %4, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %16 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %5, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %17 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %6, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %18 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %7, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %19 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %8, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %20 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %9, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %21 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %10, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %22 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %11, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %23 = phi %struct.StorablePicture* [ %40, %bb66.i ], [ %12, %bb67.preheader.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %indvar248.i = phi i64 [ %indvar.next249.i, %bb66.i ], [ 0, %bb67.preheader.i ] ; <i64> [#uses=3]
+  %storemerge52.i = trunc i64 %indvar248.i to i32 ; <i32> [#uses=1]
+  %24 = getelementptr inbounds %struct.StorablePicture* %23, i64 0, i32 19 ; <i32*> [#uses=0]
+  br i1 undef, label %bb.nph51.i, label %bb66.i
+
+bb.nph51.i:                                       ; preds = %bb38.i
+  %25 = sdiv i32 %storemerge52.i, 8               ; <i32> [#uses=0]
+  br label %bb39.i
+
+bb39.i:                                           ; preds = %bb64.i, %bb.nph51.i
+  %26 = phi %struct.StorablePicture* [ %17, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %27 = phi %struct.StorablePicture* [ %18, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %28 = phi %struct.StorablePicture* [ %19, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %29 = phi %struct.StorablePicture* [ %20, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %30 = phi %struct.StorablePicture* [ %21, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %31 = phi %struct.StorablePicture* [ %22, %bb.nph51.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=0]
+  br i1 undef, label %bb57.i, label %bb40.i
+
+bb40.i:                                           ; preds = %bb39.i
+  br i1 undef, label %bb57.i, label %bb41.i
+
+bb41.i:                                           ; preds = %bb40.i
+  %storemerge10.i = select i1 undef, i32 2, i32 4 ; <i32> [#uses=1]
+  %32 = zext i32 %storemerge10.i to i64           ; <i64> [#uses=1]
+  br i1 undef, label %bb45.i, label %bb47.i
+
+bb45.i:                                           ; preds = %bb41.i
+  %33 = getelementptr inbounds %struct.StorablePicture* %26, i64 0, i32 5, i64 undef, i64 %32, i64 undef ; <i64*> [#uses=1]
+  %34 = load i64* %33, align 8                    ; <i64> [#uses=1]
+  br label %bb47.i
+
+bb47.i:                                           ; preds = %bb45.i, %bb41.i
+  %storemerge11.i = phi i64 [ %34, %bb45.i ], [ 0, %bb41.i ] ; <i64> [#uses=0]
+  %scevgep246.i = getelementptr i64* undef, i64 undef ; <i64*> [#uses=0]
+  br label %bb64.i
+
+bb57.i:                                           ; preds = %bb40.i, %bb39.i
+  br i1 undef, label %bb58.i, label %bb60.i
+
+bb58.i:                                           ; preds = %bb57.i
+  br label %bb60.i
+
+bb60.i:                                           ; preds = %bb58.i, %bb57.i
+  %35 = load i64*** undef, align 8                ; <i64**> [#uses=1]
+  %scevgep256.i = getelementptr i64** %35, i64 %indvar248.i ; <i64**> [#uses=1]
+  %36 = load i64** %scevgep256.i, align 8         ; <i64*> [#uses=1]
+  %scevgep243.i = getelementptr i64* %36, i64 undef ; <i64*> [#uses=1]
+  store i64 -1, i64* %scevgep243.i, align 8
+  br label %bb64.i
+
+bb64.i:                                           ; preds = %bb60.i, %bb47.i
+  br i1 undef, label %bb39.i, label %bb66.i
+
+bb66.i:                                           ; preds = %bb64.i, %bb38.i
+  %37 = phi %struct.StorablePicture* [ %13, %bb38.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %38 = phi %struct.StorablePicture* [ %14, %bb38.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %39 = phi %struct.StorablePicture* [ %15, %bb38.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=2]
+  %40 = phi %struct.StorablePicture* [ %16, %bb38.i ], [ null, %bb64.i ] ; <%struct.StorablePicture*> [#uses=8]
+  %indvar.next249.i = add i64 %indvar248.i, 1     ; <i64> [#uses=1]
+  br i1 undef, label %bb38.i, label %bb68.i
+
+bb68.i:                                           ; preds = %bb66.i, %bb67.preheader.i
+  %41 = phi %struct.StorablePicture* [ %2, %bb67.preheader.i ], [ %37, %bb66.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %42 = phi %struct.StorablePicture* [ %3, %bb67.preheader.i ], [ %38, %bb66.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %43 = phi %struct.StorablePicture* [ %4, %bb67.preheader.i ], [ %39, %bb66.i ] ; <%struct.StorablePicture*> [#uses=1]
+  br i1 undef, label %bb.nph48.i, label %bb108.i
+
+bb.nph48.i:                                       ; preds = %bb68.i
+  br label %bb80.i
+
+bb80.i:                                           ; preds = %bb104.i, %bb.nph48.i
+  %44 = phi %struct.StorablePicture* [ %42, %bb.nph48.i ], [ null, %bb104.i ] ; <%struct.StorablePicture*> [#uses=1]
+  %45 = phi %struct.StorablePicture* [ %43, %bb.nph48.i ], [ null, %bb104.i ] ; <%struct.StorablePicture*> [#uses=1]
+  br i1 undef, label %bb.nph39.i, label %bb104.i
+
+bb.nph39.i:                                       ; preds = %bb80.i
+  br label %bb81.i
+
+bb81.i:                                           ; preds = %bb102.i, %bb.nph39.i
+  %46 = phi %struct.StorablePicture* [ %44, %bb.nph39.i ], [ %48, %bb102.i ] ; <%struct.StorablePicture*> [#uses=0]
+  %47 = phi %struct.StorablePicture* [ %45, %bb.nph39.i ], [ %48, %bb102.i ] ; <%struct.StorablePicture*> [#uses=0]
+  br i1 undef, label %bb83.i, label %bb82.i
+
+bb82.i:                                           ; preds = %bb81.i
+  br i1 undef, label %bb83.i, label %bb101.i
+
+bb83.i:                                           ; preds = %bb82.i, %bb81.i
+  br label %bb102.i
+
+bb101.i:                                          ; preds = %bb82.i
+  br label %bb102.i
+
+bb102.i:                                          ; preds = %bb101.i, %bb83.i
+  %48 = load %struct.StorablePicture** %0, align 8 ; <%struct.StorablePicture*> [#uses=2]
+  br i1 undef, label %bb81.i, label %bb104.i
+
+bb104.i:                                          ; preds = %bb102.i, %bb80.i
+  br label %bb80.i
+
+bb108.i:                                          ; preds = %bb68.i
+  unreachable
+}
diff --git a/final/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll b/final/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll
new file mode 100644
index 00000000000..b5be65fb002
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-11-04-SubregCoalescingBug.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 | FileCheck %s
+; rdar://7362871
+
+define void @bar(i32 %b, i32 %a) nounwind optsize ssp {
+entry:
+; CHECK:     leal 15(%rsi), %edi
+; CHECK-NOT: movl
+; CHECK:     _foo
+  %0 = add i32 %a, 15                             ; <i32> [#uses=1]
+  %1 = zext i32 %0 to i64                         ; <i64> [#uses=1]
+  tail call void @foo(i64 %1) nounwind
+  ret void
+}
+
+declare void @foo(i64)
diff --git a/final/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll b/final/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll
new file mode 100644
index 00000000000..5398eef3dd1
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-11-13-VirtRegRewriterBug.ll
@@ -0,0 +1,133 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim
+; rdar://7394770
+
+%struct.JVTLib_100487 = type <{ i8 }>
+
+define i32 @_Z13JVTLib_10335613JVTLib_10266513JVTLib_100579S_S_S_jPhj(i16* nocapture %ResidualX_Array.0, %struct.JVTLib_100487* nocapture byval align 4 %xqp, i16* nocapture %ResidualL_Array.0, i16* %ResidualDCZ_Array.0, i16* nocapture %ResidualACZ_FOArray.0, i32 %useFRextDequant, i8* nocapture %JVTLib_103357, i32 %use_field_scan) ssp {
+bb.nph:
+  %0 = shl i32 undef, 1                           ; <i32> [#uses=2]
+  %mask133.masked.masked.masked.masked.masked.masked = or i640 undef, undef ; <i640> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit, %bb.nph
+  br i1 undef, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  br i1 undef, label %bb.i, label %bb1.i
+
+bb2:                                              ; preds = %bb
+  unreachable
+
+bb.i:                                             ; preds = %bb1
+  br label %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+
+bb1.i:                                            ; preds = %bb1
+  br label %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+
+_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit: ; preds = %bb1.i, %bb.i
+  br i1 undef, label %bb5, label %bb
+
+bb5:                                              ; preds = %_ZL13JVTLib_105204PKsPK13JVTLib_105184PsPhjS5_j.exit
+  %mask271.masked.masked.masked.masked.masked.masked.masked = or i256 0, undef ; <i256> [#uses=2]
+  %mask266.masked.masked.masked.masked.masked.masked = or i256 %mask271.masked.masked.masked.masked.masked.masked.masked, undef ; <i256> [#uses=1]
+  %mask241.masked = or i256 undef, undef          ; <i256> [#uses=1]
+  %ins237 = or i256 undef, 0                      ; <i256> [#uses=1]
+  br i1 undef, label %bb9, label %bb10
+
+bb9:                                              ; preds = %bb5
+  br i1 undef, label %bb12.i, label %_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit
+
+bb12.i:                                           ; preds = %bb9
+  br label %_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit
+
+_ZL13JVTLib_105255PKsPK13JVTLib_105184Psj.exit:   ; preds = %bb12.i, %bb9
+  ret i32 undef
+
+bb10:                                             ; preds = %bb5
+  %1 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %2 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %3 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %4 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %5 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %6 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %tmp211 = lshr i256 %mask271.masked.masked.masked.masked.masked.masked.masked, 112 ; <i256> [#uses=0]
+  %7 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %tmp208 = lshr i256 %mask266.masked.masked.masked.masked.masked.masked, 128 ; <i256> [#uses=1]
+  %tmp209 = trunc i256 %tmp208 to i16             ; <i16> [#uses=1]
+  %8 = sext i16 %tmp209 to i32                    ; <i32> [#uses=1]
+  %9 = sext i16 undef to i32                      ; <i32> [#uses=1]
+  %10 = sext i16 undef to i32                     ; <i32> [#uses=1]
+  %tmp193 = lshr i256 %mask241.masked, 208        ; <i256> [#uses=1]
+  %tmp194 = trunc i256 %tmp193 to i16             ; <i16> [#uses=1]
+  %11 = sext i16 %tmp194 to i32                   ; <i32> [#uses=1]
+  %tmp187 = lshr i256 %ins237, 240                ; <i256> [#uses=1]
+  %tmp188 = trunc i256 %tmp187 to i16             ; <i16> [#uses=1]
+  %12 = sext i16 %tmp188 to i32                   ; <i32> [#uses=1]
+  %13 = add nsw i32 %4, %1                        ; <i32> [#uses=1]
+  %14 = add nsw i32 %5, 0                         ; <i32> [#uses=1]
+  %15 = add nsw i32 %6, %2                        ; <i32> [#uses=1]
+  %16 = add nsw i32 %7, %3                        ; <i32> [#uses=1]
+  %17 = add nsw i32 0, %8                         ; <i32> [#uses=1]
+  %18 = add nsw i32 %11, %9                       ; <i32> [#uses=1]
+  %19 = add nsw i32 0, %10                        ; <i32> [#uses=1]
+  %20 = add nsw i32 %12, 0                        ; <i32> [#uses=1]
+  %21 = add nsw i32 %17, %13                      ; <i32> [#uses=2]
+  %22 = add nsw i32 %18, %14                      ; <i32> [#uses=2]
+  %23 = add nsw i32 %19, %15                      ; <i32> [#uses=2]
+  %24 = add nsw i32 %20, %16                      ; <i32> [#uses=2]
+  %25 = add nsw i32 %22, %21                      ; <i32> [#uses=2]
+  %26 = add nsw i32 %24, %23                      ; <i32> [#uses=2]
+  %27 = sub i32 %21, %22                          ; <i32> [#uses=1]
+  %28 = sub i32 %23, %24                          ; <i32> [#uses=1]
+  %29 = add nsw i32 %26, %25                      ; <i32> [#uses=1]
+  %30 = sub i32 %25, %26                          ; <i32> [#uses=1]
+  %31 = sub i32 %27, %28                          ; <i32> [#uses=1]
+  %32 = ashr i32 %29, 1                           ; <i32> [#uses=2]
+  %33 = ashr i32 %30, 1                           ; <i32> [#uses=2]
+  %34 = ashr i32 %31, 1                           ; <i32> [#uses=2]
+  %35 = icmp sgt i32 %32, 32767                   ; <i1> [#uses=1]
+  %o0_0.0.i = select i1 %35, i32 32767, i32 %32   ; <i32> [#uses=2]
+  %36 = icmp slt i32 %o0_0.0.i, -32768            ; <i1> [#uses=1]
+  %37 = icmp sgt i32 %33, 32767                   ; <i1> [#uses=1]
+  %o1_0.0.i = select i1 %37, i32 32767, i32 %33   ; <i32> [#uses=2]
+  %38 = icmp slt i32 %o1_0.0.i, -32768            ; <i1> [#uses=1]
+  %39 = icmp sgt i32 %34, 32767                   ; <i1> [#uses=1]
+  %o2_0.0.i = select i1 %39, i32 32767, i32 %34   ; <i32> [#uses=2]
+  %40 = icmp slt i32 %o2_0.0.i, -32768            ; <i1> [#uses=1]
+  %tmp101 = lshr i640 %mask133.masked.masked.masked.masked.masked.masked, 256 ; <i640> [#uses=1]
+  %41 = trunc i32 %o0_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp358 = select i1 %36, i16 -32768, i16 %41    ; <i16> [#uses=2]
+  %42 = trunc i32 %o1_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp347 = select i1 %38, i16 -32768, i16 %42    ; <i16> [#uses=1]
+  %43 = trunc i32 %o2_0.0.i to i16                ; <i16> [#uses=1]
+  %tmp335 = select i1 %40, i16 -32768, i16 %43    ; <i16> [#uses=1]
+  %44 = icmp sgt i16 %tmp358, -1                  ; <i1> [#uses=2]
+  %..i24 = select i1 %44, i16 %tmp358, i16 undef  ; <i16> [#uses=1]
+  %45 = icmp sgt i16 %tmp347, -1                  ; <i1> [#uses=1]
+  %46 = icmp sgt i16 %tmp335, -1                  ; <i1> [#uses=1]
+  %47 = zext i16 %..i24 to i32                    ; <i32> [#uses=1]
+  %tmp = trunc i640 %tmp101 to i32                ; <i32> [#uses=1]
+  %48 = and i32 %tmp, 65535                       ; <i32> [#uses=2]
+  %49 = mul i32 %47, %48                          ; <i32> [#uses=1]
+  %50 = zext i16 undef to i32                     ; <i32> [#uses=1]
+  %51 = mul i32 %50, %48                          ; <i32> [#uses=1]
+  %52 = add i32 %49, %0                           ; <i32> [#uses=1]
+  %53 = add i32 %51, %0                           ; <i32> [#uses=1]
+  %54 = lshr i32 %52, undef                       ; <i32> [#uses=1]
+  %55 = lshr i32 %53, undef                       ; <i32> [#uses=1]
+  %56 = trunc i32 %54 to i16                      ; <i16> [#uses=1]
+  %57 = trunc i32 %55 to i16                      ; <i16> [#uses=1]
+  %vs16Out0_0.0.i = select i1 %44, i16 %56, i16 undef ; <i16> [#uses=1]
+  %vs16Out0_4.0.i = select i1 %45, i16 0, i16 undef ; <i16> [#uses=1]
+  %vs16Out1_0.0.i = select i1 %46, i16 %57, i16 undef ; <i16> [#uses=1]
+  br i1 undef, label %bb129.i, label %_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit
+
+bb129.i:                                          ; preds = %bb10
+  br label %_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit
+
+_ZL13JVTLib_105207PKsPK13JVTLib_105184Psj.exit:   ; preds = %bb129.i, %bb10
+  %58 = phi i16 [ %vs16Out0_4.0.i, %bb129.i ], [ undef, %bb10 ] ; <i16> [#uses=0]
+  %59 = phi i16 [ undef, %bb129.i ], [ %vs16Out1_0.0.i, %bb10 ] ; <i16> [#uses=0]
+  store i16 %vs16Out0_0.0.i, i16* %ResidualDCZ_Array.0, align 2
+  unreachable
+}
diff --git a/final/test/CodeGen/X86/2009-11-16-MachineLICM.ll b/final/test/CodeGen/X86/2009-11-16-MachineLICM.ll
new file mode 100644
index 00000000000..2ac688fd80a
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-11-16-MachineLICM.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7395200
+
+@g = common global [4 x float] zeroinitializer, align 16 ; <[4 x float]*> [#uses=4]
+
+define void @foo(i32 %n, float* nocapture %x) nounwind ssp {
+entry:
+; CHECK: foo:
+  %0 = icmp sgt i32 %n, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %bb.nph, label %return
+
+bb.nph:                                           ; preds = %entry
+; CHECK: movq _g@GOTPCREL(%rip), [[REG:%[a-z]+]]
+  %tmp = zext i32 %n to i64                       ; <i64> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+; CHECK: LBB0_2:
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i64> [#uses=2]
+  %tmp9 = shl i64 %indvar, 2                      ; <i64> [#uses=4]
+  %tmp1016 = or i64 %tmp9, 1                      ; <i64> [#uses=1]
+  %scevgep = getelementptr float* %x, i64 %tmp1016 ; <float*> [#uses=1]
+  %tmp1117 = or i64 %tmp9, 2                      ; <i64> [#uses=1]
+  %scevgep12 = getelementptr float* %x, i64 %tmp1117 ; <float*> [#uses=1]
+  %tmp1318 = or i64 %tmp9, 3                      ; <i64> [#uses=1]
+  %scevgep14 = getelementptr float* %x, i64 %tmp1318 ; <float*> [#uses=1]
+  %x_addr.03 = getelementptr float* %x, i64 %tmp9 ; <float*> [#uses=1]
+  %1 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 0), align 16 ; <float> [#uses=1]
+  store float %1, float* %x_addr.03, align 4
+  %2 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 1), align 4 ; <float> [#uses=1]
+  store float %2, float* %scevgep, align 4
+  %3 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 2), align 8 ; <float> [#uses=1]
+  store float %3, float* %scevgep12, align 4
+  %4 = load float* getelementptr inbounds ([4 x float]* @g, i64 0, i64 3), align 4 ; <float> [#uses=1]
+  store float %4, float* %scevgep14, align 4
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next, %tmp      ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/final/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
new file mode 100644
index 00000000000..d33f93ea7b2
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 | FileCheck %s
+; rdar://7396984
+
+@str = private constant [28 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 1
+
+define void @t(i32 %count) ssp nounwind {
+entry:
+; CHECK: t:
+; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
+; CHECK: movups L_str(%rip), %xmm0
+  %tmp0 = alloca [60 x i8], align 1
+  %tmp1 = getelementptr inbounds [60 x i8]* %tmp0, i64 0, i64 0
+  br label %bb1
+
+bb1:
+; CHECK: LBB0_1:
+; CHECK: movaps %xmm0, (%rsp)
+  %tmp2 = phi i32 [ %tmp3, %bb1 ], [ 0, %entry ]
+  call void @llvm.memcpy.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1)
+  %tmp3 = add i32 %tmp2, 1
+  %tmp4 = icmp eq i32 %tmp3, %count
+  br i1 %tmp4, label %bb2, label %bb1
+
+bb2:
+  ret void
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
diff --git a/final/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll b/final/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll
new file mode 100644
index 00000000000..5c1a2bcee2e
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-11-17-UpdateTerminator.ll
@@ -0,0 +1,52 @@
+; RUN: llc -O3 < %s
+; This test fails with:
+; Assertion failed: (!B && "UpdateTerminators requires analyzable predecessors!"), function updateTerminator, MachineBasicBlock.cpp, line 255.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+%"struct.llvm::InlineAsm::ConstraintInfo" = type { i32, i8, i8, i8, i8, %"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" }
+%"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" = type { %"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >::_Vector_impl" }
+%"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >::_Vector_impl" = type { %"struct.llvm::InlineAsm::ConstraintInfo"*, %"struct.llvm::InlineAsm::ConstraintInfo"*, %"struct.llvm::InlineAsm::ConstraintInfo"* }
+%"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" }
+%"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >::_Vector_impl" = type { %"struct.std::string"*, %"struct.std::string"*, %"struct.std::string"* }
+%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
+%"struct.std::string" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" }
+%"struct.std::vector<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" = type { %"struct.std::_Vector_base<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >" }
+%"struct.std::vector<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" = type { %"struct.std::_Vector_base<std::basic_string<char, std::char_traits<char>, std::allocator<char> >,std::allocator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > > >" }
+
+define zeroext i8 @_ZN4llvm9InlineAsm14ConstraintInfo5ParseENS_9StringRefERSt6vectorIS1_SaIS1_EE(%"struct.llvm::InlineAsm::ConstraintInfo"* nocapture %this, i64 %Str.0, i64 %Str.1, %"struct.std::vector<llvm::InlineAsm::ConstraintInfo,std::allocator<llvm::InlineAsm::ConstraintInfo> >"* nocapture %ConstraintsSoFar) nounwind ssp align 2 {
+entry:
+  br i1 undef, label %bb56, label %bb27.outer
+
+bb8:                                              ; preds = %bb27.outer108, %bb13
+  switch i8 undef, label %bb27.outer [
+    i8 35, label %bb56
+    i8 37, label %bb14
+    i8 38, label %bb10
+    i8 42, label %bb56
+  ]
+
+bb27.outer:                                       ; preds = %bb8, %entry
+  %I.2.ph = phi i8* [ undef, %entry ], [ %I.2.ph109, %bb8 ] ; <i8*> [#uses=2]
+  br label %bb27.outer108
+
+bb10:                                             ; preds = %bb8
+  %toBool = icmp eq i8 0, 0                       ; <i1> [#uses=1]
+  %or.cond = and i1 undef, %toBool                ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb13, label %bb56
+
+bb13:                                             ; preds = %bb10
+  br i1 undef, label %bb27.outer108, label %bb8
+
+bb14:                                             ; preds = %bb8
+  ret i8 1
+
+bb27.outer108:                                    ; preds = %bb13, %bb27.outer
+  %I.2.ph109 = getelementptr i8* %I.2.ph, i64 undef ; <i8*> [#uses=1]
+  %scevgep = getelementptr i8* %I.2.ph, i64 undef ; <i8*> [#uses=0]
+  br label %bb8
+
+bb56:                                             ; preds = %bb10, %bb8, %bb8, %entry
+  ret i8 1
+}
diff --git a/final/test/CodeGen/X86/2009-11-18-TwoAddrKill.ll b/final/test/CodeGen/X86/2009-11-18-TwoAddrKill.ll
new file mode 100644
index 00000000000..0edaa701cd4
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-11-18-TwoAddrKill.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s
+; PR 5300
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+@g_296 = external global i8, align 1              ; <i8*> [#uses=1]
+
+define noalias i8** @func_31(i32** nocapture %int8p_33, i8** nocapture %p_34, i8* nocapture %p_35) nounwind {
+entry:
+  %cmp.i = icmp sgt i16 undef, 234                ; <i1> [#uses=1]
+  %tmp17 = select i1 %cmp.i, i16 undef, i16 0     ; <i16> [#uses=2]
+  %conv8 = trunc i16 %tmp17 to i8                 ; <i8> [#uses=3]
+  br i1 undef, label %cond.false.i29, label %land.lhs.true.i
+
+land.lhs.true.i:                                  ; preds = %entry
+  %tobool5.i = icmp eq i32 undef, undef           ; <i1> [#uses=1]
+  br i1 %tobool5.i, label %cond.false.i29, label %bar.exit
+
+cond.false.i29:                                   ; preds = %land.lhs.true.i, %entry
+  %tmp = sub i8 0, %conv8                         ; <i8> [#uses=1]
+  %mul.i = and i8 %conv8, %tmp                    ; <i8> [#uses=1]
+  br label %bar.exit
+
+bar.exit:                                         ; preds = %cond.false.i29, %land.lhs.true.i
+  %call1231 = phi i8 [ %mul.i, %cond.false.i29 ], [ %conv8, %land.lhs.true.i ] ; <i8> [#uses=0]
+  %conv21 = trunc i16 %tmp17 to i8                ; <i8> [#uses=1]
+  store i8 %conv21, i8* @g_296
+  ret i8** undef
+}
diff --git a/final/test/CodeGen/X86/2009-11-25-ImpDefBug.ll b/final/test/CodeGen/X86/2009-11-25-ImpDefBug.ll
new file mode 100644
index 00000000000..7606c0e1acc
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-11-25-ImpDefBug.ll
@@ -0,0 +1,116 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; pr5600
+
+%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
+%struct.ASN1ObjHeader = type { i8, %"struct.__gmp_expr<__mpz_struct [1],__mpz_struct [1]>", i64, i32, i32, i32 }
+%struct.ASN1Object = type { i32 (...)**, i32, i32, i64 }
+%struct.ASN1Unit = type { [4 x i32 (%struct.ASN1ObjHeader*, %struct.ASN1Object**)*], %"struct.std::ASN1ObjList" }
+%"struct.__gmp_expr<__mpz_struct [1],__mpz_struct [1]>" = type { [1 x %struct.__mpz_struct] }
+%struct.__mpz_struct = type { i32, i32, i64* }
+%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* }
+%struct.pthread_attr_t = type { i64, [48 x i8] }
+%struct.pthread_mutex_t = type { %struct..0__pthread_mutex_s }
+%struct.pthread_mutexattr_t = type { i32 }
+%"struct.std::ASN1ObjList" = type { %"struct.std::_Vector_base<ASN1Object*,std::allocator<ASN1Object*> >" }
+%"struct.std::_Vector_base<ASN1Object*,std::allocator<ASN1Object*> >" = type { %"struct.std::_Vector_base<ASN1Object*,std::allocator<ASN1Object*> >::_Vector_impl" }
+%"struct.std::_Vector_base<ASN1Object*,std::allocator<ASN1Object*> >::_Vector_impl" = type { %struct.ASN1Object**, %struct.ASN1Object**, %struct.ASN1Object** }
+%struct.xmstream = type { i8*, i64, i64, i64, i8 }
+
+declare void @_ZNSt6vectorIP10ASN1ObjectSaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%"struct.std::ASN1ObjList"* nocapture, i64, %struct.ASN1Object** nocapture)
+
+declare i32 @_Z17LoadObjectFromBERR8xmstreamPP10ASN1ObjectPPF10ASN1StatusP13ASN1ObjHeaderS3_E(%struct.xmstream*, %struct.ASN1Object**, i32 (%struct.ASN1ObjHeader*, %struct.ASN1Object**)**)
+
+define i32 @_ZN8ASN1Unit4loadER8xmstreamjm18ASN1LengthEncoding(%struct.ASN1Unit* %this, %struct.xmstream* nocapture %stream, i32 %numObjects, i64 %size, i32 %lEncoding) {
+entry:
+  br label %meshBB85
+
+bb5:                                              ; preds = %bb13.fragment.cl135, %bb13.fragment.cl, %bb.i.i.bbcl.disp, %bb13.fragment
+  %0 = invoke i32 @_Z17LoadObjectFromBERR8xmstreamPP10ASN1ObjectPPF10ASN1StatusP13ASN1ObjHeaderS3_E(%struct.xmstream* undef, %struct.ASN1Object** undef, i32 (%struct.ASN1ObjHeader*, %struct.ASN1Object**)** undef)
+          to label %meshBB81.bbcl.disp unwind label %lpad ; <i32> [#uses=0]
+
+bb10.fragment:                                    ; preds = %bb13.fragment.bbcl.disp
+  br i1 undef, label %bb1.i.fragment.bbcl.disp, label %bb.i.i.bbcl.disp
+
+bb1.i.fragment:                                   ; preds = %bb1.i.fragment.bbcl.disp
+  invoke void @_ZNSt6vectorIP10ASN1ObjectSaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%"struct.std::ASN1ObjList"* undef, i64 undef, %struct.ASN1Object** undef)
+          to label %meshBB81.bbcl.disp unwind label %lpad
+
+bb13.fragment:                                    ; preds = %bb13.fragment.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb5
+
+bb.i4:                                            ; preds = %bb.i4.bbcl.disp, %bb1.i.fragment.bbcl.disp
+  ret i32 undef
+
+bb1.i5:                                           ; preds = %bb.i1
+  ret i32 undef
+
+lpad:                                             ; preds = %bb1.i.fragment.cl, %bb1.i.fragment, %bb5
+  %.SV10.phi807 = phi i8* [ undef, %bb1.i.fragment.cl ], [ undef, %bb1.i.fragment ], [ undef, %bb5 ] ; <i8*> [#uses=1]
+  %1 = load i8* %.SV10.phi807, align 8            ; <i8> [#uses=0]
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb13.fragment.bbcl.disp
+
+bb.i1:                                            ; preds = %bb.i.i.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb1.i5
+
+meshBB81:                                         ; preds = %meshBB81.bbcl.disp, %bb.i.i.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb.i4.bbcl.disp
+
+meshBB85:                                         ; preds = %meshBB81.bbcl.disp, %bb.i4.bbcl.disp, %bb1.i.fragment.bbcl.disp, %bb.i.i.bbcl.disp, %entry
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb13.fragment.bbcl.disp
+
+bb.i.i.bbcl.disp:                                 ; preds = %bb10.fragment
+  switch i8 undef, label %meshBB85 [
+    i8 123, label %bb.i1
+    i8 97, label %bb5
+    i8 44, label %meshBB81
+    i8 1, label %meshBB81.cl
+    i8 51, label %meshBB81.cl141
+  ]
+
+bb1.i.fragment.cl:                                ; preds = %bb1.i.fragment.bbcl.disp
+  invoke void @_ZNSt6vectorIP10ASN1ObjectSaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%"struct.std::ASN1ObjList"* undef, i64 undef, %struct.ASN1Object** undef)
+          to label %meshBB81.bbcl.disp unwind label %lpad
+
+bb1.i.fragment.bbcl.disp:                         ; preds = %bb10.fragment
+  switch i8 undef, label %bb.i4 [
+    i8 97, label %bb1.i.fragment
+    i8 7, label %bb1.i.fragment.cl
+    i8 35, label %bb.i4.cl
+    i8 77, label %meshBB85
+  ]
+
+bb13.fragment.cl:                                 ; preds = %bb13.fragment.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb5
+
+bb13.fragment.cl135:                              ; preds = %bb13.fragment.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb5
+
+bb13.fragment.bbcl.disp:                          ; preds = %meshBB85, %lpad
+  switch i8 undef, label %bb10.fragment [
+    i8 67, label %bb13.fragment.cl
+    i8 108, label %bb13.fragment
+    i8 58, label %bb13.fragment.cl135
+  ]
+
+bb.i4.cl:                                         ; preds = %bb.i4.bbcl.disp, %bb1.i.fragment.bbcl.disp
+  ret i32 undef
+
+bb.i4.bbcl.disp:                                  ; preds = %meshBB81.cl141, %meshBB81.cl, %meshBB81
+  switch i8 undef, label %bb.i4 [
+    i8 35, label %bb.i4.cl
+    i8 77, label %meshBB85
+  ]
+
+meshBB81.cl:                                      ; preds = %meshBB81.bbcl.disp, %bb.i.i.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb.i4.bbcl.disp
+
+meshBB81.cl141:                                   ; preds = %meshBB81.bbcl.disp, %bb.i.i.bbcl.disp
+  br i1 undef, label %meshBB81.bbcl.disp, label %bb.i4.bbcl.disp
+
+meshBB81.bbcl.disp:                               ; preds = %meshBB81.cl141, %meshBB81.cl, %bb13.fragment.cl135, %bb13.fragment.cl, %bb1.i.fragment.cl, %meshBB85, %meshBB81, %bb.i1, %lpad, %bb13.fragment, %bb1.i.fragment, %bb5
+  switch i8 undef, label %meshBB85 [
+    i8 44, label %meshBB81
+    i8 1, label %meshBB81.cl
+    i8 51, label %meshBB81.cl141
+  ]
+}
diff --git a/final/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll b/final/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
new file mode 100644
index 00000000000..1e7a418d1d6
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; pr5391
+
+define void @t() nounwind ssp {
+entry:
+; CHECK: t:
+; CHECK: movl %ecx, %eax
+; CHECK: %eax = foo (%eax, %ecx)
+  %b = alloca i32                                 ; <i32*> [#uses=2]
+  %a = alloca i32                                 ; <i32*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %1 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %asmtmp = call i32 asm "$0 = foo ($1, $2)", "=&{ax},%0,r,~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp, i32* %a
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; CHECK: movl %eax, %ecx
+; CHECK: %ecx = foo (%ecx, %eax)
+  %b = alloca i32                                 ; <i32*> [#uses=2]
+  %a = alloca i32                                 ; <i32*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %1 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %asmtmp = call i32 asm "$0 = foo ($1, $2)", "=&r,%0,r,~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp, i32* %a
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll b/final/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
new file mode 100644
index 00000000000..823e0ca465e
--- /dev/null
+++ b/final/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
@@ -0,0 +1,63 @@
+; RUN: llc -relocation-model=pic < %s | FileCheck %s
+; PR5723
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%0 = type { [1 x i64] }
+%link = type { %0* }
+%test = type { i32, %link }
+
+@data = global [2 x i64] zeroinitializer, align 64 ; <[2 x i64]*> [#uses=1]
+@ptr = linkonce thread_local global [1 x i64] [i64 ptrtoint ([2 x i64]* @data to i64)], align 64 ; <[1 x i64]*> [#uses=1]
+@link_ptr = linkonce thread_local global [1 x i64] zeroinitializer, align 64 ; <[1 x i64]*> [#uses=1]
+@_dm_my_pe = external global [1 x i64], align 64  ; <[1 x i64]*> [#uses=0]
+@_dm_pes_in_prog = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+@_dm_npes_div_mult = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+@_dm_npes_div_shift = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+@_dm_pe_addr_loc = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+@_dm_offset_addr_mask = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+
+define void @leaf() nounwind {
+; CHECK: leaf:
+; CHECK-NOT: -8(%rsp)
+; CHECK: leaq link_ptr@TLSGD
+; CHECK: callq __tls_get_addr@PLT
+"file foo2.c, line 14, bb1":
+  %p = alloca %test*, align 8                     ; <%test**> [#uses=4]
+  br label %"file foo2.c, line 14, bb2"
+
+"file foo2.c, line 14, bb2":                      ; preds = %"file foo2.c, line 14, bb1"
+  br label %"@CFE_debug_label_0"
+
+"@CFE_debug_label_0":                             ; preds = %"file foo2.c, line 14, bb2"
+  %r = load %test** bitcast ([1 x i64]* @ptr to %test**), align 8 ; <%test*> [#uses=1]
+  store %test* %r, %test** %p, align 8
+  br label %"@CFE_debug_label_2"
+
+"@CFE_debug_label_2":                             ; preds = %"@CFE_debug_label_0"
+  %r1 = load %link** bitcast ([1 x i64]* @link_ptr to %link**), align 8 ; <%link*> [#uses=1]
+  %r2 = load %test** %p, align 8                  ; <%test*> [#uses=1]
+  %r3 = ptrtoint %test* %r2 to i64                ; <i64> [#uses=1]
+  %r4 = inttoptr i64 %r3 to %link**               ; <%link**> [#uses=1]
+  %r5 = getelementptr %link** %r4, i64 1          ; <%link**> [#uses=1]
+  store %link* %r1, %link** %r5, align 8
+  br label %"@CFE_debug_label_3"
+
+"@CFE_debug_label_3":                             ; preds = %"@CFE_debug_label_2"
+  %r6 = load %test** %p, align 8                  ; <%test*> [#uses=1]
+  %r7 = ptrtoint %test* %r6 to i64                ; <i64> [#uses=1]
+  %r8 = inttoptr i64 %r7 to %link*                ; <%link*> [#uses=1]
+  %r9 = getelementptr %link* %r8, i64 1           ; <%link*> [#uses=1]
+  store %link* %r9, %link** bitcast ([1 x i64]* @link_ptr to %link**), align 8
+  br label %"@CFE_debug_label_4"
+
+"@CFE_debug_label_4":                             ; preds = %"@CFE_debug_label_3"
+  %r10 = load %test** %p, align 8                 ; <%test*> [#uses=1]
+  %r11 = ptrtoint %test* %r10 to i64              ; <i64> [#uses=1]
+  %r12 = inttoptr i64 %r11 to i32*                ; <i32*> [#uses=1]
+  store i32 1, i32* %r12, align 4
+  br label %"@CFE_debug_label_5"
+
+"@CFE_debug_label_5":                             ; preds = %"@CFE_debug_label_4"
+  ret void
+}
diff --git a/final/test/CodeGen/X86/20090313-signext.ll b/final/test/CodeGen/X86/20090313-signext.ll
new file mode 100644
index 00000000000..de930d51267
--- /dev/null
+++ b/final/test/CodeGen/X86/20090313-signext.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -relocation-model=pic > %t
+; RUN: grep {movswl	%ax, %edi} %t
+; RUN: grep {movw	(%rax), %ax} %t
+; XFAIL: *
+
+@x = common global i16 0
+
+define signext i16 @f() nounwind {
+entry:
+	%0 = tail call signext i16 @h() nounwind
+	%1 = sext i16 %0 to i32
+	tail call void @g(i32 %1) nounwind
+	%2 = load i16* @x, align 2
+	ret i16 %2
+}
+
+declare signext i16 @h()
+
+declare void @g(i32)
diff --git a/final/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll b/final/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll
new file mode 100644
index 00000000000..e7004e28752
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-01-05-ZExt-Shl.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64
+; <rdar://problem/7499313>
+target triple = "i686-apple-darwin8"
+
+declare void @func2(i16 zeroext)
+
+define void @func1() nounwind {
+entry:
+  %t1 = icmp ne i8 undef, 0
+  %t2 = icmp eq i8 undef, 14
+  %t3 = and i1 %t1, %t2
+  %t4 = select i1 %t3, i16 0, i16 128
+  call void @func2(i16 zeroext %t4) nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2010-01-07-ISelBug.ll b/final/test/CodeGen/X86/2010-01-07-ISelBug.ll
new file mode 100644
index 00000000000..081fab7facf
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-01-07-ISelBug.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; rdar://r7519827
+
+define i32 @t() nounwind ssp {
+entry:
+  br label %if.end.i11
+
+if.end.i11:                                       ; preds = %lor.lhs.false.i10, %lor.lhs.false.i10, %lor.lhs.false.i10
+  br i1 undef, label %for.body161, label %for.end197
+
+for.body161:                                      ; preds = %if.end.i11
+  br label %for.end197
+
+for.end197:                                       ; preds = %for.body161, %if.end.i11
+  %mlucEntry.4 = phi i96 [ undef, %for.body161 ], [ undef, %if.end.i11 ] ; <i96> [#uses=2]
+  store i96 %mlucEntry.4, i96* undef, align 8
+  %tmp172 = lshr i96 %mlucEntry.4, 64             ; <i96> [#uses=1]
+  %tmp173 = trunc i96 %tmp172 to i32              ; <i32> [#uses=1]
+  %tmp1.i1.i = call i32 @llvm.bswap.i32(i32 %tmp173) nounwind ; <i32> [#uses=1]
+  store i32 %tmp1.i1.i, i32* undef, align 8
+  unreachable
+
+if.then283:                                       ; preds = %lor.lhs.false.i10, %do.end105, %for.end
+  ret i32 undef
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
diff --git a/final/test/CodeGen/X86/2010-01-07-UAMemFeature.ll b/final/test/CodeGen/X86/2010-01-07-UAMemFeature.ll
new file mode 100644
index 00000000000..bb24adb4181
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-01-07-UAMemFeature.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mcpu=yonah -mattr=vector-unaligned-mem -march=x86 < %s | FileCheck %s
+; CHECK: addps (
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define <4 x float> @foo(<4 x float>* %P, <4 x float> %In) nounwind {
+	%A = load <4 x float>* %P, align 4
+	%B = fadd <4 x float> %A, %In
+	ret <4 x float> %B
+}
diff --git a/final/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll b/final/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
new file mode 100644
index 00000000000..c6936362048
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-01-08-Atomic64Bug.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; rdar://r7512579
+
+; PHI defs in the atomic loop should be used by the add / adc
+; instructions. They should not be dead.
+
+define void @t(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: t:
+; CHECK: movl $1
+; CHECK: movl (%ebp), %eax
+; CHECK: movl 4(%ebp), %edx
+; CHECK: LBB0_1:
+; CHECK-NOT: movl $1
+; CHECK-NOT: movl $0
+; CHECK: addl
+; CHECK: adcl
+; CHECK: lock
+; CHECK: cmpxchg8b
+; CHECK: jne
+  tail call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
+  %0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1) ; <i64> [#uses=0]
+  tail call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
+  ret void
+}
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind
diff --git a/final/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll b/final/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll
new file mode 100644
index 00000000000..db98eef30e1
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-01-11-ExtraPHIArg.ll
@@ -0,0 +1,97 @@
+; RUN: llc -verify-machineinstrs < %s
+;
+; The lowering of a switch combined with constand folding would leave spurious extra arguments on a PHI instruction.
+;
+target triple = "x86_64-apple-darwin10"
+
+define void @foo() {
+  br label %cond_true813.i
+
+cond_true813.i:                                   ; preds = %0
+  br i1 false, label %cond_true818.i, label %cond_next1146.i
+
+cond_true818.i:                                   ; preds = %cond_true813.i
+  br i1 false, label %recog_memoized.exit52, label %cond_next1146.i
+
+recog_memoized.exit52:                            ; preds = %cond_true818.i
+  switch i32 0, label %bb886.i.preheader [
+    i32 0, label %bb907.i
+    i32 44, label %bb866.i
+    i32 103, label %bb874.i
+    i32 114, label %bb874.i
+  ]
+
+bb857.i:                                          ; preds = %bb886.i, %bb866.i
+  %tmp862.i494.24 = phi i8* [ null, %bb866.i ], [ %tmp862.i494.26, %bb886.i ] ; <i8*> [#uses=1]
+  switch i32 0, label %bb886.i.preheader [
+    i32 0, label %bb907.i
+    i32 44, label %bb866.i
+    i32 103, label %bb874.i
+    i32 114, label %bb874.i
+  ]
+
+bb866.i.loopexit:                                 ; preds = %bb874.i
+  br label %bb866.i
+
+bb866.i.loopexit31:                               ; preds = %cond_true903.i
+  br label %bb866.i
+
+bb866.i:                                          ; preds = %bb866.i.loopexit31, %bb866.i.loopexit, %bb857.i, %recog_memoized.exit52
+  br i1 false, label %bb907.i, label %bb857.i
+
+bb874.i.preheader.loopexit:                       ; preds = %cond_true903.i, %cond_true903.i
+  ret void
+
+bb874.i:                                          ; preds = %bb857.i, %bb857.i, %recog_memoized.exit52, %recog_memoized.exit52
+  switch i32 0, label %bb886.i.preheader.loopexit [
+    i32 0, label %bb907.i
+    i32 44, label %bb866.i.loopexit
+    i32 103, label %bb874.i.backedge
+    i32 114, label %bb874.i.backedge
+  ]
+
+bb874.i.backedge:                                 ; preds = %bb874.i, %bb874.i
+  ret void
+
+bb886.i.preheader.loopexit:                       ; preds = %bb874.i
+  ret void
+
+bb886.i.preheader:                                ; preds = %bb857.i, %recog_memoized.exit52
+  %tmp862.i494.26 = phi i8* [ undef, %recog_memoized.exit52 ], [ %tmp862.i494.24, %bb857.i ] ; <i8*> [#uses=1]
+  br label %bb886.i
+
+bb886.i:                                          ; preds = %cond_true903.i, %bb886.i.preheader
+  br i1 false, label %bb857.i, label %cond_true903.i
+
+cond_true903.i:                                   ; preds = %bb886.i
+  switch i32 0, label %bb886.i [
+    i32 0, label %bb907.i
+    i32 44, label %bb866.i.loopexit31
+    i32 103, label %bb874.i.preheader.loopexit
+    i32 114, label %bb874.i.preheader.loopexit
+  ]
+
+bb907.i:                                          ; preds = %cond_true903.i, %bb874.i, %bb866.i, %bb857.i, %recog_memoized.exit52
+  br i1 false, label %cond_next1146.i, label %cond_true910.i
+
+cond_true910.i:                                   ; preds = %bb907.i
+  ret void
+
+cond_next1146.i:                                  ; preds = %bb907.i, %cond_true818.i, %cond_true813.i
+  ret void
+
+bb2060.i:                                         ; No predecessors!
+  br i1 false, label %cond_true2064.i, label %bb2067.i
+
+cond_true2064.i:                                  ; preds = %bb2060.i
+  unreachable
+
+bb2067.i:                                         ; preds = %bb2060.i
+  ret void
+
+cond_next3473:                                    ; No predecessors!
+  ret void
+
+cond_next3521:                                    ; No predecessors!
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2010-01-13-OptExtBug.ll b/final/test/CodeGen/X86/2010-01-13-OptExtBug.ll
new file mode 100644
index 00000000000..d49e2a8d079
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-01-13-OptExtBug.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu
+; PR6027
+
+%class.OlsonTimeZone = type { i16, i32*, i8*, i16 }
+
+define void @XX(%class.OlsonTimeZone* %this) align 2 {
+entry:
+  %call = tail call i8* @_Z15uprv_malloc_4_2v()
+  %0 = bitcast i8* %call to double*
+  %tmp = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 3
+  %tmp2 = load i16* %tmp
+  %tmp525 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 0
+  %tmp626 = load i16* %tmp525
+  %cmp27 = icmp slt i16 %tmp2, %tmp626
+  br i1 %cmp27, label %bb.nph, label %for.end
+
+for.cond:
+  %tmp6 = load i16* %tmp5
+  %cmp = icmp slt i16 %inc, %tmp6
+  %indvar.next = add i32 %indvar, 1
+  br i1 %cmp, label %for.body, label %for.end
+
+bb.nph:
+  %tmp10 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 2
+  %tmp17 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 1
+  %tmp5 = getelementptr inbounds %class.OlsonTimeZone* %this, i32 0, i32 0
+  %tmp29 = sext i16 %tmp2 to i32
+  %tmp31 = add i16 %tmp2, 1
+  %tmp32 = zext i16 %tmp31 to i32
+  br label %for.body
+
+for.body:
+  %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %for.cond ]
+  %tmp30 = add i32 %indvar, %tmp29
+  %tmp33 = add i32 %indvar, %tmp32
+  %inc = trunc i32 %tmp33 to i16
+  %tmp11 = load i8** %tmp10
+  %arrayidx = getelementptr i8* %tmp11, i32 %tmp30
+  %tmp12 = load i8* %arrayidx
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+declare i8* @_Z15uprv_malloc_4_2v()
diff --git a/final/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll b/final/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll
new file mode 100644
index 00000000000..5d96e4a192f
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-01-15-SelectionDAGCycle.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86-64
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @numvec_(i32* noalias %ncelet, i32* noalias %ncel, i32* noalias %nfac, i32* noalias %nfabor, i32* noalias %lregis, i32* noalias %irveci, i32* noalias %irvecb, [0 x [2 x i32]]* noalias %ifacel, [0 x i32]* noalias %ifabor, [0 x i32]* noalias %inumfi, [0 x i32]* noalias %inumfb, [1 x i32]* noalias %iworkf, [0 x i32]* noalias %ismbs) {
+"file bug754399.f90, line 1, bb1":
+	%r1037 = bitcast <2 x double> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=1]
+	br label %"file bug754399.f90, line 184, in inner vector loop at depth 0, bb164"
+
+"file bug754399.f90, line 184, in inner vector loop at depth 0, bb164":		; preds = %"file bug754399.f90, line 184, in inner vector loop at depth 0, bb164", %"file bug754399.f90, line 1, bb1"
+	%tmp641 = add i64 0, 48		; <i64> [#uses=1]
+	%tmp641642 = inttoptr i64 %tmp641 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%r1258 = load <4 x i32>* %tmp641642, align 4		; <<4 x i32>> [#uses=2]
+	%r1295 = extractelement <4 x i32> %r1258, i32 3		; <i32> [#uses=1]
+	%r1296 = sext i32 %r1295 to i64		; <i64> [#uses=1]
+	%r1297 = add i64 %r1296, -1		; <i64> [#uses=1]
+	%r1298183 = getelementptr [0 x i32]* %ismbs, i64 0, i64 %r1297		; <i32*> [#uses=1]
+	%r1298184 = load i32* %r1298183, align 4		; <i32> [#uses=1]
+	%r1301 = extractelement <4 x i32> %r1037, i32 3		; <i32> [#uses=1]
+	%r1302 = mul i32 %r1298184, %r1301		; <i32> [#uses=1]
+	%r1306 = insertelement <4 x i32> zeroinitializer, i32 %r1302, i32 3		; <<4 x i32>> [#uses=1]
+	%r1321 = add <4 x i32> %r1306, %r1258		; <<4 x i32>> [#uses=1]
+	%tmp643 = add i64 0, 48		; <i64> [#uses=1]
+	%tmp643644 = inttoptr i64 %tmp643 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	store <4 x i32> %r1321, <4 x i32>* %tmp643644, align 4
+	br label %"file bug754399.f90, line 184, in inner vector loop at depth 0, bb164"
+}
diff --git a/final/test/CodeGen/X86/2010-01-18-DbgValue.ll b/final/test/CodeGen/X86/2010-01-18-DbgValue.ll
new file mode 100644
index 00000000000..85ee091c347
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-01-18-DbgValue.ll
@@ -0,0 +1,48 @@
+; RUN: llc -march=x86 -O0 < %s | FileCheck %s
+; Currently, dbg.declare generates a DEBUG_VALUE comment.  Eventually it will
+; generate DWARF and this test will need to be modified or removed.
+
+
+%struct.Pt = type { double, double }
+%struct.Rect = type { %struct.Pt, %struct.Pt }
+
+define double @foo(%struct.Rect* byval %my_r0) nounwind ssp {
+entry:
+;CHECK: DEBUG_VALUE
+  %retval = alloca double                         ; <double*> [#uses=2]
+  %0 = alloca double                              ; <double*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.Rect* %my_r0}, metadata !0), !dbg !15
+  %1 = getelementptr inbounds %struct.Rect* %my_r0, i32 0, i32 0, !dbg !16 ; <%struct.Pt*> [#uses=1]
+  %2 = getelementptr inbounds %struct.Pt* %1, i32 0, i32 0, !dbg !16 ; <double*> [#uses=1]
+  %3 = load double* %2, align 8, !dbg !16         ; <double> [#uses=1]
+  store double %3, double* %0, align 8, !dbg !16
+  %4 = load double* %0, align 8, !dbg !16         ; <double> [#uses=1]
+  store double %4, double* %retval, align 8, !dbg !16
+  br label %return, !dbg !16
+
+return:                                           ; preds = %entry
+  %retval1 = load double* %retval, !dbg !16       ; <double> [#uses=1]
+  ret double %retval1, !dbg !16
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"b2.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"b2.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !7}
+!6 = metadata !{i32 524324, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524307, metadata !2, metadata !"Rect", metadata !2, i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
+!8 = metadata !{metadata !9, metadata !14}
+!9 = metadata !{i32 524301, metadata !7, metadata !"P1", metadata !2, i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 524307, metadata !2, metadata !"Pt", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!11 = metadata !{metadata !12, metadata !13}
+!12 = metadata !{i32 524301, metadata !10, metadata !"x", metadata !2, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!13 = metadata !{i32 524301, metadata !10, metadata !"y", metadata !2, i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!14 = metadata !{i32 524301, metadata !7, metadata !"P2", metadata !2, i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
+!15 = metadata !{i32 11, i32 0, metadata !1, null}
+!16 = metadata !{i32 12, i32 0, metadata !17, null}
+!17 = metadata !{i32 524299, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/final/test/CodeGen/X86/2010-01-19-OptExtBug.ll b/final/test/CodeGen/X86/2010-01-19-OptExtBug.ll
new file mode 100644
index 00000000000..cd8960b9ed7
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-01-19-OptExtBug.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 -relocation-model=pic -disable-fp-elim -stats |& not grep ext-opt
+
+define fastcc i8* @S_scan_str(i8* %start, i32 %keep_quoted, i32 %keep_delims) nounwind ssp {
+entry:
+  switch i8 undef, label %bb6 [
+    i8 9, label %bb5
+    i8 32, label %bb5
+    i8 10, label %bb5
+    i8 13, label %bb5
+    i8 12, label %bb5
+  ]
+
+bb5:                                              ; preds = %entry, %entry, %entry, %entry, %entry
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %entry
+  br i1 undef, label %bb7, label %bb9
+
+bb7:                                              ; preds = %bb6
+  unreachable
+
+bb9:                                              ; preds = %bb6
+  %0 = load i8* undef, align 1                    ; <i8> [#uses=3]
+  br i1 undef, label %bb12, label %bb10
+
+bb10:                                             ; preds = %bb9
+  br i1 undef, label %bb12, label %bb11
+
+bb11:                                             ; preds = %bb10
+  unreachable
+
+bb12:                                             ; preds = %bb10, %bb9
+  br i1 undef, label %bb13, label %bb14
+
+bb13:                                             ; preds = %bb12
+  store i8 %0, i8* undef, align 1
+  %1 = zext i8 %0 to i32                          ; <i32> [#uses=1]
+  br label %bb18
+
+bb14:                                             ; preds = %bb12
+  br label %bb18
+
+bb18:                                             ; preds = %bb14, %bb13
+  %termcode.0 = phi i32 [ %1, %bb13 ], [ undef, %bb14 ] ; <i32> [#uses=2]
+  %2 = icmp eq i8 %0, 0                           ; <i1> [#uses=1]
+  br i1 %2, label %bb21, label %bb19
+
+bb19:                                             ; preds = %bb18
+  br i1 undef, label %bb21, label %bb20
+
+bb20:                                             ; preds = %bb19
+  br label %bb21
+
+bb21:                                             ; preds = %bb20, %bb19, %bb18
+  %termcode.1 = phi i32 [ %termcode.0, %bb18 ], [ %termcode.0, %bb19 ], [ undef, %bb20 ] ; <i32> [#uses=0]
+  unreachable
+}
diff --git a/final/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll b/final/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
new file mode 100644
index 00000000000..2113263c0ac
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-02-01-DbgValueCrash.ll
@@ -0,0 +1,33 @@
+; RUN: llc -O1 < %s
+; ModuleID = 'pr6157.bc'
+; formerly crashed in SelectionDAGBuilder
+
+%tart.reflect.ComplexType = type { double, double }
+
+@.type.SwitchStmtTest = constant %tart.reflect.ComplexType { double 3.0, double 2.0 }
+
+define i32 @"main(tart.core.String[])->int32"(i32 %args) {
+entry:
+  tail call void @llvm.dbg.value(metadata !14, i64 0, metadata !8)
+  tail call void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType* @.type.SwitchStmtTest) ; <%tart.core.Object*> [#uses=2]
+  ret i32 3
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+declare void @"tart.reflect.ComplexType.create->tart.core.Object"(%tart.reflect.ComplexType*) nounwind readnone
+
+!0 = metadata !{i32 458769, i32 0, i32 1, metadata !"sm.c", metadata !"/Volumes/MacOS9/tests/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 458790, metadata !0, metadata !"", metadata !0, i32 0, i64 192, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_const_type ]
+!2 = metadata !{i32 458771, metadata !0, metadata !"C", metadata !0, i32 1, i64 192, i64 64, i64 0, i32 0, null, metadata !3, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{metadata !4, metadata !6, metadata !7}
+!4 = metadata !{i32 458765, metadata !2, metadata !"x", metadata !0, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!5 = metadata !{i32 458788, metadata !0, metadata !"double", metadata !0, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 458765, metadata !2, metadata !"y", metadata !0, i32 1, i64 64, i64 64, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!7 = metadata !{i32 458765, metadata !2, metadata !"z", metadata !0, i32 1, i64 64, i64 64, i64 128, i32 0, metadata !5} ; [ DW_TAG_member ]
+!8 = metadata !{i32 459008, metadata !9, metadata !"t", metadata !0, i32 5, metadata !2} ; [ DW_TAG_auto_variable ]
+!9 = metadata !{i32 458763, metadata !10}        ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !0, metadata !"foo", metadata !"foo", metadata !"foo", metadata !0, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 458773, metadata !0, metadata !"", metadata !0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !13}
+!13 = metadata !{i32 458788, metadata !0, metadata !"int", metadata !0, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{%tart.reflect.ComplexType* @.type.SwitchStmtTest}
diff --git a/final/test/CodeGen/X86/2010-02-01-TaillCallCrash.ll b/final/test/CodeGen/X86/2010-02-01-TaillCallCrash.ll
new file mode 100644
index 00000000000..275117483d6
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-02-01-TaillCallCrash.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu
+; PR6196
+
+%"char[]" = type [1 x i8]
+
+@.str = external constant %"char[]", align 1      ; <%"char[]"*> [#uses=1]
+
+define i32 @regex_subst() nounwind {
+entry:
+  %0 = tail call i32 bitcast (%"char[]"* @.str to i32 (i32)*)(i32 0) nounwind ; <i32> [#uses=1]
+  ret i32 %0
+}
diff --git a/final/test/CodeGen/X86/2010-02-03-DualUndef.ll b/final/test/CodeGen/X86/2010-02-03-DualUndef.ll
new file mode 100644
index 00000000000..d116ecc6bde
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-02-03-DualUndef.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86-64
+; PR6086
+define fastcc void @prepOutput() nounwind {
+bb:                                               ; preds = %output.exit
+  br label %bb.i1
+
+bb.i1:                                            ; preds = %bb7.i, %bb
+  br i1 undef, label %bb7.i, label %bb.nph.i
+
+bb.nph.i:                                         ; preds = %bb.i1
+  br label %bb3.i
+
+bb3.i:                                            ; preds = %bb5.i6, %bb.nph.i
+  %tmp10.i = trunc i64 undef to i32               ; <i32> [#uses=1]
+  br i1 undef, label %bb4.i, label %bb5.i6
+
+bb4.i:                                            ; preds = %bb3.i
+  br label %bb5.i6
+
+bb5.i6:                                           ; preds = %bb4.i, %bb3.i
+  %0 = phi i32 [ undef, %bb4.i ], [ undef, %bb3.i ] ; <i32> [#uses=1]
+  %1 = icmp slt i32 %0, %tmp10.i                  ; <i1> [#uses=1]
+  br i1 %1, label %bb7.i, label %bb3.i
+
+bb7.i:                                            ; preds = %bb5.i6, %bb.i1
+  br label %bb.i1
+}
diff --git a/final/test/CodeGen/X86/2010-02-04-SchedulerBug.ll b/final/test/CodeGen/X86/2010-02-04-SchedulerBug.ll
new file mode 100644
index 00000000000..c966e21d52d
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-02-04-SchedulerBug.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin11
+; rdar://7604000
+
+%struct.a_t = type { i8*, i64*, i8*, i32, i32, i64*, i64*, i64* }
+%struct.b_t = type { i32, i32, i32, i32, i64, i64, i64, i64 }
+
+define void @t(i32 %cNum, i64 %max) nounwind optsize ssp noimplicitfloat {
+entry:
+  %0 = load %struct.b_t** null, align 4 ; <%struct.b_t*> [#uses=1]
+  %1 = getelementptr inbounds %struct.b_t* %0, i32 %cNum, i32 5 ; <i64*> [#uses=1]
+  %2 = load i64* %1, align 4                      ; <i64> [#uses=1]
+  %3 = icmp ult i64 %2, %max            ; <i1> [#uses=1]
+  %4 = getelementptr inbounds %struct.a_t* null, i32 0, i32 7 ; <i64**> [#uses=1]
+  %5 = load i64** %4, align 4                     ; <i64*> [#uses=0]
+  %6 = load i64* null, align 4                    ; <i64> [#uses=1]
+  br i1 %3, label %bb2, label %bb
+
+bb:                                               ; preds = %entry
+  br label %bb3
+
+bb2:                                              ; preds = %entry
+  %7 = or i64 %6, undef                           ; <i64> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %misc_enables.0 = phi i64 [ undef, %bb ], [ %7, %bb2 ] ; <i64> [#uses=0]
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2010-02-11-NonTemporal.ll b/final/test/CodeGen/X86/2010-02-11-NonTemporal.ll
new file mode 100644
index 00000000000..5789a0b9847
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-02-11-NonTemporal.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK: movnt
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+!0 = metadata !{ i32 1 }
+
+define void @sub_(i32* noalias %n) {
+"file movnt.f90, line 2, bb1":
+	%n1 = alloca i32*, align 8
+	%i = alloca i32, align 4
+	%"$LCS_0" = alloca i64, align 8
+	%"$LCS_S2" = alloca <2 x double>, align 16
+	%r9 = load <2 x double>* %"$LCS_S2", align 8
+	%r10 = load i64* %"$LCS_0", align 8
+	%r11 = inttoptr i64 %r10 to <2 x double>*
+	store <2 x double> %r9, <2 x double>* %r11, align 16, !nontemporal !0
+	br label %"file movnt.f90, line 18, bb5"
+
+"file movnt.f90, line 18, bb5":	
+	ret void
+}
diff --git a/final/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll b/final/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll
new file mode 100644
index 00000000000..c5d3d16f81a
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll
@@ -0,0 +1,260 @@
+; RUN: llc < %s > %t
+; PR6283
+
+; Tricky coalescer bug:
+; After coalescing %RAX with a virtual register, this instruction was rematted:
+;
+;   %EAX<def> = MOV32rr %reg1070<kill>
+;
+; This instruction silently defined %RAX, and when rematting removed the
+; instruction, the live interval for %RAX was not properly updated. The valno
+; referred to a deleted instruction and bad things happened.
+;
+; The fix is to implicitly define %RAX when coalescing:
+;
+;   %EAX<def> = MOV32rr %reg1070<kill>, %RAX<imp-def>
+;
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm "\09.ident\09\22GCC: (GNU) 4.5.0 20100212 (experimental) LLVM: 95975\22"
+
+%0 = type { %"union gimple_statement_d"* }
+%"BITMAP_WORD[]" = type [2 x i64]
+%"char[]" = type [4 x i8]
+%"enum dom_state[]" = type [2 x i32]
+%"int[]" = type [4 x i32]
+%"struct VEC_basic_block_base" = type { i32, i32, [1 x %"struct basic_block_def"*] }
+%"struct VEC_basic_block_gc" = type { %"struct VEC_basic_block_base" }
+%"struct VEC_edge_base" = type { i32, i32, [1 x %"struct edge_def"*] }
+%"struct VEC_edge_gc" = type { %"struct VEC_edge_base" }
+%"struct VEC_gimple_base" = type { i32, i32, [1 x %"union gimple_statement_d"*] }
+%"struct VEC_gimple_gc" = type { %"struct VEC_gimple_base" }
+%"struct VEC_iv_cand_p_base" = type { i32, i32, [1 x %"struct iv_cand"*] }
+%"struct VEC_iv_cand_p_heap" = type { %"struct VEC_iv_cand_p_base" }
+%"struct VEC_iv_use_p_base" = type { i32, i32, [1 x %"struct iv_use"*] }
+%"struct VEC_iv_use_p_heap" = type { %"struct VEC_iv_use_p_base" }
+%"struct VEC_loop_p_base" = type { i32, i32, [1 x %"struct loop"*] }
+%"struct VEC_loop_p_gc" = type { %"struct VEC_loop_p_base" }
+%"struct VEC_rtx_base" = type { i32, i32, [1 x %"struct rtx_def"*] }
+%"struct VEC_rtx_gc" = type { %"struct VEC_rtx_base" }
+%"struct VEC_tree_base" = type { i32, i32, [1 x %"union tree_node"*] }
+%"struct VEC_tree_gc" = type { %"struct VEC_tree_base" }
+%"struct _obstack_chunk" = type { i8*, %"struct _obstack_chunk"*, %"char[]" }
+%"struct basic_block_def" = type { %"struct VEC_edge_gc"*, %"struct VEC_edge_gc"*, i8*, %"struct loop"*, [2 x %"struct et_node"*], %"struct basic_block_def"*, %"struct basic_block_def"*, %"union basic_block_il_dependent", i64, i32, i32, i32, i32, i32 }
+%"struct bitmap_element" = type { %"struct bitmap_element"*, %"struct bitmap_element"*, i32, %"BITMAP_WORD[]" }
+%"struct bitmap_head_def" = type { %"struct bitmap_element"*, %"struct bitmap_element"*, i32, %"struct bitmap_obstack"* }
+%"struct bitmap_obstack" = type { %"struct bitmap_element"*, %"struct bitmap_head_def"*, %"struct obstack" }
+%"struct block_symbol" = type { [3 x %"union rtunion"], %"struct object_block"*, i64 }
+%"struct comp_cost" = type { i32, i32 }
+%"struct control_flow_graph" = type { %"struct basic_block_def"*, %"struct basic_block_def"*, %"struct VEC_basic_block_gc"*, i32, i32, i32, %"struct VEC_basic_block_gc"*, i32, %"enum dom_state[]", %"enum dom_state[]", i32, i32 }
+%"struct cost_pair" = type { %"struct iv_cand"*, %"struct comp_cost", %"struct bitmap_head_def"*, %"union tree_node"* }
+%"struct def_optype_d" = type { %"struct def_optype_d"*, %"union tree_node"** }
+%"struct double_int" = type { i64, i64 }
+%"struct edge_def" = type { %"struct basic_block_def"*, %"struct basic_block_def"*, %"union edge_def_insns", i8*, %"union tree_node"*, i32, i32, i32, i32, i64 }
+%"struct eh_status" = type opaque
+%"struct et_node" = type opaque
+%"struct function" = type { %"struct eh_status"*, %"struct control_flow_graph"*, %"struct gimple_seq_d"*, %"struct gimple_df"*, %"struct loops"*, %"struct htab"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"struct machine_function"*, %"struct language_function"*, %"struct htab"*, i32, i32, i32, i32, i32, i32, i8*, i8, i8, i8, i8 }
+%"struct gimple_bb_info" = type { %"struct gimple_seq_d"*, %"struct gimple_seq_d"* }
+%"struct gimple_df" = type { %"struct htab"*, %"struct VEC_gimple_gc"*, %"struct VEC_tree_gc"*, %"union tree_node"*, %"struct pt_solution", %"struct pt_solution", %"struct pointer_map_t"*, %"union tree_node"*, %"struct htab"*, %"struct bitmap_head_def"*, i8, %"struct ssa_operands" }
+%"struct gimple_seq_d" = type { %"struct gimple_seq_node_d"*, %"struct gimple_seq_node_d"*, %"struct gimple_seq_d"* }
+%"struct gimple_seq_node_d" = type { %"union gimple_statement_d"*, %"struct gimple_seq_node_d"*, %"struct gimple_seq_node_d"* }
+%"struct gimple_statement_base" = type { i8, i8, i16, i32, i32, i32, %"struct basic_block_def"*, %"union tree_node"* }
+%"struct gimple_statement_phi" = type { %"struct gimple_statement_base", i32, i32, %"union tree_node"*, %"struct phi_arg_d[]" }
+%"struct htab" = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i64, i64, i64, i32, i32, i8* (i64, i64)*, void (i8*)*, i8*, i8* (i8*, i64, i64)*, void (i8*, i8*)*, i32 }
+%"struct iv" = type { %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, i8, i8, i32 }
+%"struct iv_cand" = type { i32, i8, i32, %"union gimple_statement_d"*, %"union tree_node"*, %"union tree_node"*, %"struct iv"*, i32, i32, %"struct iv_use"*, %"struct bitmap_head_def"* }
+%"struct iv_use" = type { i32, i32, %"struct iv"*, %"union gimple_statement_d"*, %"union tree_node"**, %"struct bitmap_head_def"*, i32, %"struct cost_pair"*, %"struct iv_cand"* }
+%"struct ivopts_data" = type { %"struct loop"*, %"struct pointer_map_t"*, i32, i32, %"struct version_info"*, %"struct bitmap_head_def"*, %"struct VEC_iv_use_p_heap"*, %"struct VEC_iv_cand_p_heap"*, %"struct bitmap_head_def"*, i32, i8, i8 }
+%"struct lang_decl" = type opaque
+%"struct language_function" = type opaque
+%"struct loop" = type { i32, i32, %"struct basic_block_def"*, %"struct basic_block_def"*, %"struct comp_cost", i32, i32, %"struct VEC_loop_p_gc"*, %"struct loop"*, %"struct loop"*, i8*, %"union tree_node"*, %"struct double_int", %"struct double_int", i8, i8, i32, %"struct nb_iter_bound"*, %"struct loop_exit"*, i8, %"union tree_node"* }
+%"struct loop_exit" = type { %"struct edge_def"*, %"struct loop_exit"*, %"struct loop_exit"*, %"struct loop_exit"* }
+%"struct loops" = type { i32, %"struct VEC_loop_p_gc"*, %"struct htab"*, %"struct loop"* }
+%"struct machine_cfa_state" = type { %"struct rtx_def"*, i64 }
+%"struct machine_function" = type { %"struct stack_local_entry"*, i8*, i32, i32, %"int[]", i32, %"struct machine_cfa_state", i32, i8 }
+%"struct nb_iter_bound" = type { %"union gimple_statement_d"*, %"struct double_int", i8, %"struct nb_iter_bound"* }
+%"struct object_block" = type { %"union section"*, i32, i64, %"struct VEC_rtx_gc"*, %"struct VEC_rtx_gc"* }
+%"struct obstack" = type { i64, %"struct _obstack_chunk"*, i8*, i8*, i8*, i64, i32, %"struct _obstack_chunk"* (i8*, i64)*, void (i8*, %"struct _obstack_chunk"*)*, i8*, i8 }
+%"struct phi_arg_d" = type { %"struct ssa_use_operand_d", %"union tree_node"*, i32 }
+%"struct phi_arg_d[]" = type [1 x %"struct phi_arg_d"]
+%"struct pointer_map_t" = type opaque
+%"struct pt_solution" = type { i8, %"struct bitmap_head_def"* }
+%"struct rtx_def" = type { i16, i8, i8, %"union u" }
+%"struct section_common" = type { i32 }
+%"struct ssa_operand_memory_d" = type { %"struct ssa_operand_memory_d"*, %"uchar[]" }
+%"struct ssa_operands" = type { %"struct ssa_operand_memory_d"*, i32, i32, i8, %"struct def_optype_d"*, %"struct use_optype_d"* }
+%"struct ssa_use_operand_d" = type { %"struct ssa_use_operand_d"*, %"struct ssa_use_operand_d"*, %0, %"union tree_node"** }
+%"struct stack_local_entry" = type opaque
+%"struct tree_base" = type <{ i16, i8, i8, i8, [2 x i8], i8 }>
+%"struct tree_common" = type { %"struct tree_base", %"union tree_node"*, %"union tree_node"* }
+%"struct tree_decl_common" = type { %"struct tree_decl_minimal", %"union tree_node"*, i8, i8, i8, i8, i8, i32, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"struct lang_decl"* }
+%"struct tree_decl_minimal" = type { %"struct tree_common", i32, i32, %"union tree_node"*, %"union tree_node"* }
+%"struct tree_decl_non_common" = type { %"struct tree_decl_with_vis", %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"* }
+%"struct tree_decl_with_rtl" = type { %"struct tree_decl_common", %"struct rtx_def"* }
+%"struct tree_decl_with_vis" = type { %"struct tree_decl_with_rtl", %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, i8, i8, i8 }
+%"struct tree_function_decl" = type { %"struct tree_decl_non_common", %"struct function"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, i16, i8, i8 }
+%"struct unnamed_section" = type { %"struct section_common", void (i8*)*, i8*, %"union section"* }
+%"struct use_optype_d" = type { %"struct use_optype_d"*, %"struct ssa_use_operand_d" }
+%"struct version_info" = type { %"union tree_node"*, %"struct iv"*, i8, i32, i8 }
+%"uchar[]" = type [1 x i8]
+%"union basic_block_il_dependent" = type { %"struct gimple_bb_info"* }
+%"union edge_def_insns" = type { %"struct gimple_seq_d"* }
+%"union gimple_statement_d" = type { %"struct gimple_statement_phi" }
+%"union rtunion" = type { i8* }
+%"union section" = type { %"struct unnamed_section" }
+%"union tree_node" = type { %"struct tree_function_decl" }
+%"union u" = type { %"struct block_symbol" }
+
+declare fastcc %"union tree_node"* @get_computation_at(%"struct loop"*, %"struct iv_use"* nocapture, %"struct iv_cand"* nocapture, %"union gimple_statement_d"*) nounwind
+
+declare fastcc i32 @computation_cost(%"union tree_node"*, i8 zeroext) nounwind
+
+define fastcc i64 @get_computation_cost_at(%"struct ivopts_data"* %data, %"struct iv_use"* nocapture %use, %"struct iv_cand"* nocapture %cand, i8 zeroext %address_p, %"struct bitmap_head_def"** %depends_on, %"union gimple_statement_d"* %at, i8* %can_autoinc) nounwind {
+entry:
+  br i1 undef, label %"100", label %"4"
+
+"4":                                              ; preds = %entry
+  br i1 undef, label %"6", label %"5"
+
+"5":                                              ; preds = %"4"
+  unreachable
+
+"6":                                              ; preds = %"4"
+  br i1 undef, label %"8", label %"7"
+
+"7":                                              ; preds = %"6"
+  unreachable
+
+"8":                                              ; preds = %"6"
+  br i1 undef, label %"100", label %"10"
+
+"10":                                             ; preds = %"8"
+  br i1 undef, label %"17", label %"16"
+
+"16":                                             ; preds = %"10"
+  unreachable
+
+"17":                                             ; preds = %"10"
+  br i1 undef, label %"19", label %"18"
+
+"18":                                             ; preds = %"17"
+  unreachable
+
+"19":                                             ; preds = %"17"
+  br i1 undef, label %"93", label %"20"
+
+"20":                                             ; preds = %"19"
+  br i1 undef, label %"23", label %"21"
+
+"21":                                             ; preds = %"20"
+  unreachable
+
+"23":                                             ; preds = %"20"
+  br i1 undef, label %"100", label %"25"
+
+"25":                                             ; preds = %"23"
+  br i1 undef, label %"100", label %"26"
+
+"26":                                             ; preds = %"25"
+  br i1 undef, label %"30", label %"28"
+
+"28":                                             ; preds = %"26"
+  unreachable
+
+"30":                                             ; preds = %"26"
+  br i1 undef, label %"59", label %"51"
+
+"51":                                             ; preds = %"30"
+  br i1 undef, label %"55", label %"52"
+
+"52":                                             ; preds = %"51"
+  unreachable
+
+"55":                                             ; preds = %"51"
+  %0 = icmp ugt i32 0, undef                      ; <i1> [#uses=1]
+  br i1 %0, label %"50.i", label %"9.i"
+
+"9.i":                                            ; preds = %"55"
+  unreachable
+
+"50.i":                                           ; preds = %"55"
+  br i1 undef, label %"55.i", label %"54.i"
+
+"54.i":                                           ; preds = %"50.i"
+  br i1 undef, label %"57.i", label %"55.i"
+
+"55.i":                                           ; preds = %"54.i", %"50.i"
+  unreachable
+
+"57.i":                                           ; preds = %"54.i"
+  br label %"63.i"
+
+"61.i":                                           ; preds = %"63.i"
+  br i1 undef, label %"64.i", label %"62.i"
+
+"62.i":                                           ; preds = %"61.i"
+  br label %"63.i"
+
+"63.i":                                           ; preds = %"62.i", %"57.i"
+  br i1 undef, label %"61.i", label %"64.i"
+
+"64.i":                                           ; preds = %"63.i", %"61.i"
+  unreachable
+
+"59":                                             ; preds = %"30"
+  br i1 undef, label %"60", label %"82"
+
+"60":                                             ; preds = %"59"
+  br i1 undef, label %"61", label %"82"
+
+"61":                                             ; preds = %"60"
+  br i1 undef, label %"62", label %"82"
+
+"62":                                             ; preds = %"61"
+  br i1 undef, label %"100", label %"63"
+
+"63":                                             ; preds = %"62"
+  br i1 undef, label %"65", label %"64"
+
+"64":                                             ; preds = %"63"
+  unreachable
+
+"65":                                             ; preds = %"63"
+  br i1 undef, label %"66", label %"67"
+
+"66":                                             ; preds = %"65"
+  unreachable
+
+"67":                                             ; preds = %"65"
+  %1 = load i32* undef, align 4                   ; <i32> [#uses=0]
+  br label %"100"
+
+"82":                                             ; preds = %"61", %"60", %"59"
+  unreachable
+
+"93":                                             ; preds = %"19"
+  %2 = call fastcc %"union tree_node"* @get_computation_at(%"struct loop"* undef, %"struct iv_use"* %use, %"struct iv_cand"* %cand, %"union gimple_statement_d"* %at) nounwind ; <%"union tree_node"*> [#uses=1]
+  br i1 undef, label %"100", label %"97"
+
+"97":                                             ; preds = %"93"
+  br i1 undef, label %"99", label %"98"
+
+"98":                                             ; preds = %"97"
+  br label %"99"
+
+"99":                                             ; preds = %"98", %"97"
+  %3 = phi %"union tree_node"* [ undef, %"98" ], [ %2, %"97" ] ; <%"union tree_node"*> [#uses=1]
+  %4 = call fastcc i32 @computation_cost(%"union tree_node"* %3, i8 zeroext undef) nounwind ; <i32> [#uses=1]
+  br label %"100"
+
+"100":                                            ; preds = %"99", %"93", %"67", %"62", %"25", %"23", %"8", %entry
+  %memtmp1.1.0 = phi i32 [ 0, %"99" ], [ 10000000, %entry ], [ 10000000, %"8" ], [ 10000000, %"23" ], [ 10000000, %"25" ], [ undef, %"62" ], [ undef, %"67" ], [ 10000000, %"93" ] ; <i32> [#uses=1]
+  %memtmp1.0.0 = phi i32 [ %4, %"99" ], [ 10000000, %entry ], [ 10000000, %"8" ], [ 10000000, %"23" ], [ 10000000, %"25" ], [ undef, %"62" ], [ undef, %"67" ], [ 10000000, %"93" ] ; <i32> [#uses=1]
+  %5 = zext i32 %memtmp1.0.0 to i64               ; <i64> [#uses=1]
+  %6 = zext i32 %memtmp1.1.0 to i64               ; <i64> [#uses=1]
+  %7 = shl i64 %6, 32                             ; <i64> [#uses=1]
+  %8 = or i64 %7, %5                              ; <i64> [#uses=1]
+  ret i64 %8
+}
diff --git a/final/test/CodeGen/X86/2010-02-15-ImplicitDefBug.ll b/final/test/CodeGen/X86/2010-02-15-ImplicitDefBug.ll
new file mode 100644
index 00000000000..c429172852d
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-02-15-ImplicitDefBug.ll
@@ -0,0 +1,80 @@
+; RUN: llc < %s > %t
+; PR6300
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+; When the "154" loops back onto itself, it defines a register after using it.
+; The first value of the register is implicit-def.
+
+%"struct location_chain_def" = type { %"struct location_chain_def"*, %"struct rtx_def"*, %"struct rtx_def"*, i32 }
+%"struct real_value" = type { i32, [5 x i32] }
+%"struct rtx_def" = type { i16, i8, i8, %"union u" }
+%"union u" = type { %"struct real_value" }
+
+define i32 @variable_union(i8** nocapture %slot, i8* nocapture %data) nounwind {
+entry:
+  br i1 undef, label %"4.thread", label %"3"
+
+"4.thread":                                       ; preds = %entry
+  unreachable
+
+"3":                                              ; preds = %entry
+  br i1 undef, label %"19", label %"20"
+
+"19":                                             ; preds = %"3"
+  unreachable
+
+"20":                                             ; preds = %"3"
+  br i1 undef, label %"56.preheader", label %dv_onepart_p.exit
+
+dv_onepart_p.exit:                                ; preds = %"20"
+  unreachable
+
+"56.preheader":                                   ; preds = %"20"
+  br label %"56"
+
+"50":                                             ; preds = %"57"
+  br label %"56"
+
+"56":                                             ; preds = %"50", %"56.preheader"
+  br i1 undef, label %"57", label %"58"
+
+"57":                                             ; preds = %"56"
+  br i1 undef, label %"50", label %"58"
+
+"58":                                             ; preds = %"57", %"56"
+  br i1 undef, label %"62", label %"63"
+
+"62":                                             ; preds = %"58"
+  unreachable
+
+"63":                                             ; preds = %"58"
+  br i1 undef, label %"67", label %"66"
+
+"66":                                             ; preds = %"63"
+  br label %"67"
+
+"67":                                             ; preds = %"66", %"63"
+  br label %"68"
+
+"68":                                             ; preds = %"161", %"67"
+  br i1 undef, label %"153", label %"161"
+
+"153":                                            ; preds = %"68"
+  br i1 undef, label %"160", label %bb.nph46
+
+bb.nph46:                                         ; preds = %"153"
+  br label %"154"
+
+"154":                                            ; preds = %"154", %bb.nph46
+  %0 = phi %"struct location_chain_def"** [ undef, %bb.nph46 ], [ %1, %"154" ] ; <%"struct location_chain_def"**> [#uses=1]
+  %1 = bitcast i8* undef to %"struct location_chain_def"** ; <%"struct location_chain_def"**> [#uses=1]
+  store %"struct location_chain_def"* undef, %"struct location_chain_def"** %0, align 4
+  br i1 undef, label %"160", label %"154"
+
+"160":                                            ; preds = %"154", %"153"
+  br label %"161"
+
+"161":                                            ; preds = %"160", %"68"
+  br label %"68"
+}
diff --git a/final/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/final/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
new file mode 100644
index 00000000000..7325f4ae125
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
+; Check that lowered argumens do not overwrite the return address before it is moved.
+; Bug 6225
+;
+; If a call is a fastcc tail call and tail call optimization is enabled, the
+; caller frame is replaced by the callee frame. This can require that arguments are 
+; placed on the former return address stack slot. Special care needs to be taken
+; taken that the return address is moved / or stored in a register before
+; lowering of arguments potentially overwrites the value.
+;
+; Move return address (76(%esp)) to a temporary register (%ebp)
+; CHECK: movl 76(%esp), [[REGISTER:%[a-z]+]]
+; Overwrite return addresss
+; CHECK: movl %ebx, 76(%esp)
+; Move return address from temporary register (%ebp) to new stack location (60(%esp))
+; CHECK: movl [[REGISTER]], 60(%esp)
+
+%tupl_p = type [9 x i32]*
+
+declare fastcc void @l297(i32 %r10, i32 %r9, i32 %r8, i32 %r7, i32 %r6, i32 %r5, i32 %r3, i32 %r2) noreturn nounwind
+declare fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind
+
+define fastcc void @l186(%tupl_p %r1) noreturn nounwind {
+entry:
+  %ptr1 = getelementptr %tupl_p %r1, i32 0, i32 0
+  %r2 = load i32* %ptr1
+  %ptr3 = getelementptr %tupl_p %r1, i32 0, i32 1
+  %r3 = load i32* %ptr3
+  %ptr5 = getelementptr %tupl_p %r1, i32 0, i32 2
+  %r4 = load i32* %ptr5
+  %ptr7 = getelementptr %tupl_p %r1, i32 0, i32 3
+  %r5 = load i32* %ptr7
+  %ptr9 = getelementptr %tupl_p %r1, i32 0, i32 4
+  %r6 = load i32* %ptr9
+  %ptr11 = getelementptr %tupl_p %r1, i32 0, i32 5
+  %r7 = load i32* %ptr11
+  %ptr13 = getelementptr %tupl_p %r1, i32 0, i32 6
+  %r8 = load i32* %ptr13
+  %ptr15 = getelementptr %tupl_p %r1, i32 0, i32 7
+  %r9 = load i32* %ptr15
+  %ptr17 = getelementptr %tupl_p %r1, i32 0, i32 8
+  %r10 = load i32* %ptr17
+  %cond = icmp eq i32 %r10, 3
+  br i1 %cond, label %true, label %false
+
+true:
+  tail call fastcc void @l297(i32 %r10, i32 %r9, i32 %r8, i32 %r7, i32 %r6, i32 %r5, i32 %r3, i32 %r2) noreturn nounwind
+  ret void
+
+false:
+  tail call fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind
+  ret void
+}
+
+
diff --git a/final/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll b/final/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll
new file mode 100644
index 00000000000..6a58e9e5518
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-02-23-DAGCombineBug.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i32* @t() nounwind optsize ssp {
+entry:
+; CHECK: t:
+; CHECK: testl %eax, %eax
+; CHECK: js
+  %cmp = icmp slt i32 undef, 0                    ; <i1> [#uses=1]
+  %outsearch.0 = select i1 %cmp, i1 false, i1 true ; <i1> [#uses=1]
+  br i1 %outsearch.0, label %if.then27, label %if.else29
+
+if.then27:                                        ; preds = %entry
+  ret i32* undef
+
+if.else29:                                        ; preds = %entry
+  unreachable
+}
+
diff --git a/final/test/CodeGen/X86/2010-02-23-DIV8rDefinesAX.ll b/final/test/CodeGen/X86/2010-02-23-DIV8rDefinesAX.ll
new file mode 100644
index 00000000000..8543c80117b
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-02-23-DIV8rDefinesAX.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s
+; PR6374
+;
+; This test produces a DIV8r instruction and uses %AX instead of %AH and %AL.
+; The DIV8r must have the right imp-defs for that to work.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%struct._i386_state = type { %union.anon }
+%union.anon = type { [0 x i8] }
+
+define void @i386_aam(%struct._i386_state* nocapture %cpustate) nounwind ssp {
+entry:
+  %call = tail call fastcc signext i8 @FETCH()    ; <i8> [#uses=1]
+  %rem = urem i8 0, %call                         ; <i8> [#uses=1]
+  store i8 %rem, i8* undef
+  ret void
+}
+
+declare fastcc signext i8 @FETCH() nounwind readnone ssp
diff --git a/final/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll b/final/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll
new file mode 100644
index 00000000000..4a26ba088e5
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-02-23-RematImplicitSubreg.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s
+; PR6372
+;
+; This test produces a move instruction with an implicitly defined super-register:
+;
+;   %DL<def> = MOV8rr %reg1038<kill>, %RDX<imp-def>
+;
+; When %DL is rematerialized, we must remember to update live intervals for
+; sub-registers %DX and %EDX.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define noalias i8* @foo() nounwind ssp {
+entry:
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %if.end40, %entry
+  %tmp6 = load i8* undef, align 2                 ; <i8> [#uses=3]
+  %conv11 = sext i8 %tmp6 to i64                  ; <i64> [#uses=1]
+  %cmp15 = icmp slt i64 %conv11, undef            ; <i1> [#uses=1]
+  br i1 %cmp15, label %if.end, label %if.then
+
+if.then:                                          ; preds = %for.body
+  %conv18 = sext i8 %tmp6 to i32                  ; <i32> [#uses=1]
+  %call = tail call i32 (...)* @invalid(i32 0, i32 0, i32 %conv18) nounwind ; <i32> [#uses=0]
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  %index.0 = phi i8 [ 0, %if.then ], [ %tmp6, %for.body ] ; <i8> [#uses=1]
+  store i8 %index.0, i8* undef
+  %tmp24 = load i8* undef                         ; <i8> [#uses=2]
+  br i1 undef, label %if.end40, label %if.then36
+
+if.then36:                                        ; preds = %if.end
+  %conv38 = sext i8 %tmp24 to i32                 ; <i32> [#uses=1]
+  %call39 = tail call i32 (...)* @invalid(i32 0, i32 0, i32 %conv38) nounwind ; <i32> [#uses=0]
+  br label %if.end40
+
+if.end40:                                         ; preds = %if.then36, %if.end
+  %index.1 = phi i8 [ 0, %if.then36 ], [ %tmp24, %if.end ] ; <i8> [#uses=1]
+  store i8 %index.1, i8* undef
+  br i1 false, label %for.body, label %for.end
+
+for.end:                                          ; preds = %if.end40, %entry
+  ret i8* undef
+}
+
+declare i32 @invalid(...)
diff --git a/final/test/CodeGen/X86/2010-02-23-SingleDefPhiJoin.ll b/final/test/CodeGen/X86/2010-02-23-SingleDefPhiJoin.ll
new file mode 100644
index 00000000000..aeed4014617
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-02-23-SingleDefPhiJoin.ll
@@ -0,0 +1,146 @@
+; RUN: llc < %s
+; PR6363
+;
+; This test case creates a phi join register with a single definition. The other
+; predecessor blocks are implicit-def.
+;
+; If LiveIntervalAnalysis fails to recognize this as a phi join, the coalescer
+; will detect an infinity valno loop.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @decode(i8* nocapture %input, i32 %offset, i8* nocapture %output) nounwind {
+entry:
+  br i1 undef, label %meshBB86, label %meshBB102
+
+bb:                                               ; preds = %meshBB106, %meshBB102
+  br i1 false, label %bb9, label %meshBB90
+
+bb.nph:                                           ; preds = %meshBB90
+  br label %meshBB114
+
+bb.nph.fragment:                                  ; preds = %meshBB114
+  br label %meshBB118
+
+bb1.fragment:                                     ; preds = %meshBB118
+  br i1 false, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1.fragment
+  br label %meshBB74
+
+bb2.fragment15:                                   ; preds = %meshBB74
+  br label %meshBB98
+
+bb3:                                              ; preds = %bb1.fragment
+  br i1 undef, label %meshBB, label %meshBB102
+
+bb4:                                              ; preds = %meshBB
+  br label %meshBB118
+
+bb4.fragment:                                     ; preds = %meshBB118
+  br label %meshBB82
+
+bb5:                                              ; preds = %meshBB102, %meshBB82
+  br i1 false, label %bb6, label %bb7
+
+bb6:                                              ; preds = %bb5
+  br label %bb7
+
+bb7:                                              ; preds = %meshBB98, %bb6, %bb5
+  br label %meshBB114
+
+bb7.fragment:                                     ; preds = %meshBB114
+  br i1 undef, label %meshBB74, label %bb9
+
+bb9:                                              ; preds = %bb7.fragment, %bb
+  br label %bb1.i23
+
+bb1.i23:                                          ; preds = %meshBB110, %bb9
+  br i1 undef, label %meshBB106, label %meshBB110
+
+skip_to_newline.exit26:                           ; preds = %meshBB106
+  br label %meshBB86
+
+skip_to_newline.exit26.fragment:                  ; preds = %meshBB86
+  br i1 false, label %meshBB90, label %meshBB106
+
+bb11.fragment:                                    ; preds = %meshBB90, %meshBB86
+  br label %meshBB122
+
+bb1.i:                                            ; preds = %meshBB122, %meshBB
+  %ooffset.2.lcssa.phi.SV.phi203 = phi i32 [ 0, %meshBB122 ], [ %ooffset.2.lcssa.phi.SV.phi233, %meshBB ] ; <i32> [#uses=1]
+  br label %meshBB98
+
+bb1.i.fragment:                                   ; preds = %meshBB98
+  br i1 undef, label %meshBB78, label %meshBB
+
+skip_to_newline.exit:                             ; preds = %meshBB78
+  br i1 undef, label %bb12, label %meshBB110
+
+bb12:                                             ; preds = %skip_to_newline.exit
+  br label %meshBB94
+
+bb12.fragment:                                    ; preds = %meshBB94
+  br i1 false, label %bb13, label %meshBB78
+
+bb13:                                             ; preds = %bb12.fragment
+  br label %meshBB82
+
+bb13.fragment:                                    ; preds = %meshBB82
+  br i1 undef, label %meshBB94, label %meshBB122
+
+bb14:                                             ; preds = %meshBB94
+  ret i32 %ooffset.2.lcssa.phi.SV.phi250
+
+bb15:                                             ; preds = %meshBB122, %meshBB110, %meshBB78
+  unreachable
+
+meshBB:                                           ; preds = %bb1.i.fragment, %bb3
+  %ooffset.2.lcssa.phi.SV.phi233 = phi i32 [ undef, %bb3 ], [ %ooffset.2.lcssa.phi.SV.phi209, %bb1.i.fragment ] ; <i32> [#uses=1]
+  br i1 undef, label %bb1.i, label %bb4
+
+meshBB74:                                         ; preds = %bb7.fragment, %bb2
+  br i1 false, label %meshBB118, label %bb2.fragment15
+
+meshBB78:                                         ; preds = %bb12.fragment, %bb1.i.fragment
+  %ooffset.2.lcssa.phi.SV.phi239 = phi i32 [ %ooffset.2.lcssa.phi.SV.phi209, %bb1.i.fragment ], [ %ooffset.2.lcssa.phi.SV.phi250, %bb12.fragment ] ; <i32> [#uses=1]
+  br i1 false, label %bb15, label %skip_to_newline.exit
+
+meshBB82:                                         ; preds = %bb13, %bb4.fragment
+  br i1 false, label %bb5, label %bb13.fragment
+
+meshBB86:                                         ; preds = %skip_to_newline.exit26, %entry
+  br i1 undef, label %skip_to_newline.exit26.fragment, label %bb11.fragment
+
+meshBB90:                                         ; preds = %skip_to_newline.exit26.fragment, %bb
+  br i1 false, label %bb11.fragment, label %bb.nph
+
+meshBB94:                                         ; preds = %bb13.fragment, %bb12
+  %ooffset.2.lcssa.phi.SV.phi250 = phi i32 [ 0, %bb13.fragment ], [ %ooffset.2.lcssa.phi.SV.phi239, %bb12 ] ; <i32> [#uses=2]
+  br i1 false, label %bb12.fragment, label %bb14
+
+meshBB98:                                         ; preds = %bb1.i, %bb2.fragment15
+  %ooffset.2.lcssa.phi.SV.phi209 = phi i32 [ undef, %bb2.fragment15 ], [ %ooffset.2.lcssa.phi.SV.phi203, %bb1.i ] ; <i32> [#uses=2]
+  br i1 undef, label %bb1.i.fragment, label %bb7
+
+meshBB102:                                        ; preds = %bb3, %entry
+  br i1 undef, label %bb5, label %bb
+
+meshBB106:                                        ; preds = %skip_to_newline.exit26.fragment, %bb1.i23
+  br i1 undef, label %bb, label %skip_to_newline.exit26
+
+meshBB110:                                        ; preds = %skip_to_newline.exit, %bb1.i23
+  br i1 false, label %bb15, label %bb1.i23
+
+meshBB114:                                        ; preds = %bb7, %bb.nph
+  %meshStackVariable115.phi = phi i32 [ 19, %bb7 ], [ 8, %bb.nph ] ; <i32> [#uses=0]
+  br i1 undef, label %bb.nph.fragment, label %bb7.fragment
+
+meshBB118:                                        ; preds = %meshBB74, %bb4, %bb.nph.fragment
+  %meshCmp121 = icmp eq i32 undef, 10             ; <i1> [#uses=1]
+  br i1 %meshCmp121, label %bb4.fragment, label %bb1.fragment
+
+meshBB122:                                        ; preds = %bb13.fragment, %bb11.fragment
+  br i1 false, label %bb1.i, label %bb15
+}
diff --git a/final/test/CodeGen/X86/2010-03-04-Mul8Bug.ll b/final/test/CodeGen/X86/2010-03-04-Mul8Bug.ll
new file mode 100644
index 00000000000..48e75e95724
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-03-04-Mul8Bug.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s
+; PR6489
+;
+; This test case produces a MUL8 instruction and then tries to read the result
+; from the AX register instead of AH/AL. That confuses live interval analysis.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define void @func_56(i64 %p_57, i32*** %p_58) nounwind ssp {
+for.end:
+  %conv49 = trunc i32 undef to i8                 ; <i8> [#uses=1]
+  %div.i = udiv i8 %conv49, 5                     ; <i8> [#uses=1]
+  %conv51 = zext i8 %div.i to i32                 ; <i32> [#uses=1]
+  %call55 = call i32 @qux(i32 undef, i32 -2) nounwind ; <i32> [#uses=1]
+  %rem.i = urem i32 %call55, -1                   ; <i32> [#uses=1]
+  %cmp57 = icmp uge i32 %conv51, %rem.i           ; <i1> [#uses=1]
+  %conv58 = zext i1 %cmp57 to i32                 ; <i32> [#uses=1]
+  %call85 = call i32 @func_35(i32*** undef, i32 undef, i32 %conv58, i32 1247, i32 0) nounwind ; <i32> [#uses=0]
+  ret void
+}
+
+declare i32 @func_35(i32***, i32, i32, i32, i32)
+
+declare i32 @qux(i32, i32)
diff --git a/final/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll b/final/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll
new file mode 100644
index 00000000000..5de19662fff
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-03-05-ConstantFoldCFG.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -verify-machineinstrs
+;
+; When BRCOND is constant-folded to BR, make sure that PHI nodes don't get
+; spurious operands when the CFG is trimmed.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+define fastcc void @_ZSt16__introsort_loopIPdl17less_than_functorEvT_S2_T0_T1_(double* %__first, double* %__last, i64 %__depth_limit) nounwind ssp {
+entry:
+  br i1 undef, label %bb1, label %bb2
+
+bb1:                                              ; preds = %entry
+  ret void
+
+bb2:                                              ; preds = %entry
+  br label %bb2.outer.i
+
+bb2.outer.i:                                      ; preds = %bb9.i, %bb2
+  br i1 undef, label %bb1.i, label %bb5.preheader.i
+
+bb1.i:                                            ; preds = %bb1.i, %bb2.outer.i
+  %indvar5.i = phi i64 [ %tmp, %bb1.i ], [ 0, %bb2.outer.i ] ; <i64> [#uses=1]
+  %tmp = add i64 %indvar5.i, 1                    ; <i64> [#uses=2]
+  %scevgep.i = getelementptr double* undef, i64 %tmp ; <double*> [#uses=0]
+  br i1 undef, label %bb1.i, label %bb5.preheader.i
+
+bb5.preheader.i:                                  ; preds = %bb1.i, %bb2.outer.i
+  br label %bb5.i
+
+bb5.i:                                            ; preds = %bb5.i, %bb5.preheader.i
+  br i1 undef, label %bb5.i, label %bb7.i6
+
+bb7.i6:                                           ; preds = %bb5.i
+  br i1 undef, label %bb9.i, label %_ZSt21__unguarded_partitionIPdd17less_than_functorET_S2_S2_T0_T1_.exit
+
+bb9.i:                                            ; preds = %bb7.i6
+  br label %bb2.outer.i
+
+_ZSt21__unguarded_partitionIPdd17less_than_functorET_S2_S2_T0_T1_.exit: ; preds = %bb7.i6
+  unreachable
+}
diff --git a/final/test/CodeGen/X86/2010-03-05-EFLAGS-Redef.ll b/final/test/CodeGen/X86/2010-03-05-EFLAGS-Redef.ll
new file mode 100644
index 00000000000..3cca10e268c
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-03-05-EFLAGS-Redef.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -verify-machineinstrs
+;
+; This test case is transformed into a single basic block by the machine
+; branch folding pass. That makes a complete mess of the %EFLAGS liveness, but
+; we don't care about liveness this late anyway.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) ssp {
+entry:
+  br i1 undef, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  br label %bb2
+
+bb2:                                              ; preds = %bb, %entry
+  br i1 undef, label %bb3, label %bb5
+
+bb3:                                              ; preds = %bb2
+  br label %bb5
+
+bb5:                                              ; preds = %bb3, %bb2
+  br i1 undef, label %bb.nph239, label %bb8
+
+bb.nph239:                                        ; preds = %bb5
+  unreachable
+
+bb8:                                              ; preds = %bb5
+  br i1 undef, label %bb.nph237, label %bb47
+
+bb.nph237:                                        ; preds = %bb8
+  unreachable
+
+bb47:                                             ; preds = %bb8
+  br i1 undef, label %bb49, label %bb48
+
+bb48:                                             ; preds = %bb47
+  unreachable
+
+bb49:                                             ; preds = %bb47
+  br i1 undef, label %bb51, label %bb50
+
+bb50:                                             ; preds = %bb49
+  ret i32 0
+
+bb51:                                             ; preds = %bb49
+  ret i32 0
+}
diff --git a/final/test/CodeGen/X86/2010-03-17-ISelBug.ll b/final/test/CodeGen/X86/2010-03-17-ISelBug.ll
new file mode 100644
index 00000000000..ba21902f7d0
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-03-17-ISelBug.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin5
+
+; rdar://7761790
+
+%"struct..0$_485" = type { i16, i16, i32 }
+%union.PPToken = type { %"struct..0$_485" }
+%struct.PPOperation = type { %union.PPToken, %union.PPToken, [6 x %union.PPToken], i32, i32, i32, [1 x i32], [0 x i8] }
+
+define i32* @t() align 2 nounwind {
+entry:
+  %operation = alloca %struct.PPOperation, align 8 ; <%struct.PPOperation*> [#uses=2]
+  %0 = load i32*** null, align 4  ; [#uses=1]
+  %1 = ptrtoint i32** %0 to i32   ; <i32> [#uses=1]
+  %2 = sub nsw i32 %1, undef                      ; <i32> [#uses=2]
+  br i1 false, label %bb20, label %bb.nph380
+
+bb20:                                             ; preds = %entry
+  ret i32* null
+
+bb.nph380:                                        ; preds = %entry
+  %scevgep403 = getelementptr %struct.PPOperation* %operation, i32 0, i32 1, i32 0, i32 2 ; <i32*> [#uses=1]
+  %3 = ashr i32 %2, 1                             ; <i32> [#uses=1]
+  %tmp405 = and i32 %3, -2                        ; <i32> [#uses=1]
+  %scevgep408 = getelementptr %struct.PPOperation* %operation, i32 0, i32 1, i32 0, i32 1 ; <i16*> [#uses=1]
+  %tmp410 = and i32 %2, -4                        ; <i32> [#uses=1]
+  br label %bb169
+
+bb169:                                            ; preds = %bb169, %bb.nph380
+  %index.6379 = phi i32 [ 0, %bb.nph380 ], [ %4, %bb169 ] ; <i32> [#uses=3]
+  %tmp404 = mul i32 %index.6379, -2               ; <i32> [#uses=1]
+  %tmp406 = add i32 %tmp405, %tmp404              ; <i32> [#uses=1]
+  %scevgep407 = getelementptr i32* %scevgep403, i32 %tmp406 ; <i32*> [#uses=1]
+  %tmp409 = mul i32 %index.6379, -4               ; <i32> [#uses=1]
+  %tmp411 = add i32 %tmp410, %tmp409              ; <i32> [#uses=1]
+  %scevgep412 = getelementptr i16* %scevgep408, i32 %tmp411 ; <i16*> [#uses=1]
+  store i16 undef, i16* %scevgep412, align 2
+  store i32 undef, i32* %scevgep407, align 4
+  %4 = add nsw i32 %index.6379, 1                 ; <i32> [#uses=1]
+  br label %bb169
+}
+
+; PR7368
+
+%struct.bufBit_s = type { i8*, i8 }
+
+define fastcc void @printSwipe([2 x [256 x %struct.bufBit_s]]* nocapture %colourLines) nounwind {
+entry:
+  br label %for.body190
+  
+for.body261.i:                                    ; preds = %for.body261.i, %for.body190
+  %line.3300.i = phi i32 [ undef, %for.body190 ], [ %add292.i, %for.body261.i ] ; <i32> [#uses=3]
+  %conv268.i = and i32 %line.3300.i, 255          ; <i32> [#uses=1]
+  %tmp278.i = getelementptr [2 x [256 x %struct.bufBit_s]]* %colourLines, i32 undef, i32 %pen.1100, i32 %conv268.i, i32 0 ; <i8**> [#uses=1]
+  store i8* undef, i8** %tmp278.i
+  %tmp338 = shl i32 %line.3300.i, 3               ; <i32> [#uses=1]
+  %tmp339 = and i32 %tmp338, 2040                 ; <i32> [#uses=1]
+  %tmp285.i = getelementptr i8* %scevgep328, i32 %tmp339 ; <i8*> [#uses=1]
+  store i8 undef, i8* %tmp285.i
+  %add292.i = add nsw i32 0, %line.3300.i         ; <i32> [#uses=1]
+  br i1 undef, label %for.body190, label %for.body261.i
+
+for.body190:                                      ; preds = %for.body261.i, %for.body190, %bb.nph104
+  %pen.1100 = phi i32 [ 0, %entry ], [ %inc230, %for.body261.i ], [ %inc230, %for.body190 ] ; <i32> [#uses=3]
+  %scevgep328 = getelementptr [2 x [256 x %struct.bufBit_s]]* %colourLines, i32 undef, i32 %pen.1100, i32 0, i32 1 ; <i8*> [#uses=1]
+  %inc230 = add i32 %pen.1100, 1                  ; <i32> [#uses=2]
+  br i1 undef, label %for.body190, label %for.body261.i
+}
diff --git a/final/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll b/final/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll
new file mode 100644
index 00000000000..ef1798d1ae4
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-04-06-SSEDomainFixCrash.ll
@@ -0,0 +1,64 @@
+; RUN: llc < %s -O3 -relocation-model=pic -disable-fp-elim -mcpu=nocona
+;
+; This test case is reduced from Bullet. It crashes SSEDomainFix.
+;
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0"
+
+declare i32 @_ZN11HullLibrary16CreateConvexHullERK8HullDescR10HullResult(i8*, i8* nocapture, i8* nocapture) ssp align 2
+
+define void @_ZN17btSoftBodyHelpers4DrawEP10btSoftBodyP12btIDebugDrawi(i8* %psb, i8* %idraw, i32 %drawflags) ssp align 2 {
+entry:
+  br i1 undef, label %bb92, label %bb58
+
+bb58:                                             ; preds = %entry
+  %0 = invoke i32 @_ZN11HullLibrary16CreateConvexHullERK8HullDescR10HullResult(i8* undef, i8* undef, i8* undef)
+          to label %invcont64 unwind label %lpad159 ; <i32> [#uses=0]
+
+invcont64:                                        ; preds = %bb58
+  br i1 undef, label %invcont65, label %bb.i.i
+
+bb.i.i:                                           ; preds = %invcont64
+  %1 = load <4 x float>* undef, align 16          ; <<4 x float>> [#uses=5]
+  br i1 undef, label %bb.nph.i.i, label %invcont65
+
+bb.nph.i.i:                                       ; preds = %bb.i.i
+  %tmp22.i.i = bitcast <4 x float> %1 to i128     ; <i128> [#uses=1]
+  %tmp23.i.i = trunc i128 %tmp22.i.i to i32       ; <i32> [#uses=1]
+  %2 = bitcast i32 %tmp23.i.i to float            ; <float> [#uses=1]
+  %tmp6.i = extractelement <4 x float> %1, i32 1  ; <float> [#uses=1]
+  %tmp2.i = extractelement <4 x float> %1, i32 2  ; <float> [#uses=1]
+  br label %bb1.i.i
+
+bb1.i.i:                                          ; preds = %bb1.i.i, %bb.nph.i.i
+  %.tmp6.0.i.i = phi float [ %tmp2.i, %bb.nph.i.i ], [ %5, %bb1.i.i ] ; <float> [#uses=1]
+  %.tmp5.0.i.i = phi float [ %tmp6.i, %bb.nph.i.i ], [ %4, %bb1.i.i ] ; <float> [#uses=1]
+  %.tmp.0.i.i = phi float [ %2, %bb.nph.i.i ], [ %3, %bb1.i.i ] ; <float> [#uses=1]
+  %3 = fadd float %.tmp.0.i.i, undef              ; <float> [#uses=2]
+  %4 = fadd float %.tmp5.0.i.i, undef             ; <float> [#uses=2]
+  %5 = fadd float %.tmp6.0.i.i, undef             ; <float> [#uses=2]
+  br i1 undef, label %bb2.return.loopexit_crit_edge.i.i, label %bb1.i.i
+
+bb2.return.loopexit_crit_edge.i.i:                ; preds = %bb1.i.i
+  %tmp8.i = insertelement <4 x float> %1, float %3, i32 0 ; <<4 x float>> [#uses=1]
+  %tmp4.i = insertelement <4 x float> %tmp8.i, float %4, i32 1 ; <<4 x float>> [#uses=1]
+  %tmp.i = insertelement <4 x float> %tmp4.i, float %5, i32 2 ; <<4 x float>> [#uses=1]
+  br label %invcont65
+
+invcont65:                                        ; preds = %bb2.return.loopexit_crit_edge.i.i, %bb.i.i, %invcont64
+  %.0.i = phi <4 x float> [ %tmp.i, %bb2.return.loopexit_crit_edge.i.i ], [ undef, %invcont64 ], [ %1, %bb.i.i ] ; <<4 x float>> [#uses=1]
+  %tmp15.i = extractelement <4 x float> %.0.i, i32 2 ; <float> [#uses=1]
+  %6 = fmul float %tmp15.i, undef                 ; <float> [#uses=1]
+  br label %bb.i265
+
+bb.i265:                                          ; preds = %bb.i265, %invcont65
+  %7 = fsub float 0.000000e+00, %6                ; <float> [#uses=1]
+  store float %7, float* undef, align 4
+  br label %bb.i265
+
+bb92:                                             ; preds = %entry
+  unreachable
+
+lpad159:                                          ; preds = %bb58
+  unreachable
+}
diff --git a/final/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll b/final/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 00000000000..42f19b3ad86
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=x86 -asm-verbose < %s | FileCheck %s
+; RUN: llc -O0 -march=x86-64 -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
diff --git a/final/test/CodeGen/X86/2010-04-08-CoalescerBug.ll b/final/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
new file mode 100644
index 00000000000..1c7c28c68e9
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7842028
+
+; Do not delete partially dead copy instructions.
+; %RDI<def,dead> = MOV64rr %RAX<kill>, %EDI<imp-def>
+; REP_MOVSD %ECX<imp-def,dead>, %EDI<imp-def,dead>, %ESI<imp-def,dead>, %ECX<imp-use,kill>, %EDI<imp-use,kill>, %ESI<imp-use,kill>
+
+
+%struct.F = type { %struct.FC*, i32, i32, i8, i32, i32, i32 }
+%struct.FC = type { [10 x i8], [32 x i32], %struct.FC*, i32 }
+
+define void @t(%struct.F* %this) nounwind {
+entry:
+; CHECK: t:
+; CHECK: addq $12, %rsi
+  %BitValueArray = alloca [32 x i32], align 4
+  %tmp2 = getelementptr inbounds %struct.F* %this, i64 0, i32 0
+  %tmp3 = load %struct.FC** %tmp2, align 8
+  %tmp4 = getelementptr inbounds %struct.FC* %tmp3, i64 0, i32 1, i64 0
+  %tmp5 = bitcast [32 x i32]* %BitValueArray to i8*
+  %tmp6 = bitcast i32* %tmp4 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp5, i8* %tmp6, i64 128, i32 4, i1 false)
+  unreachable
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/final/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll b/final/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll
new file mode 100644
index 00000000000..fadbd219198
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-04-13-AnalyzeBranchCrash.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=core2
+; rdar://7857830
+
+%0 = type opaque
+%1 = type opaque
+
+define void @t(%0* %self, i8* nocapture %_cmd, %1* %scroller, i32 %hitPart, float %multiplier) nounwind optsize ssp {
+entry:
+  switch i32 %hitPart, label %if.else [
+    i32 7, label %if.then
+    i32 8, label %if.then
+  ]
+
+if.then:                                          ; preds = %entry, %entry
+  %tmp69 = load float* null, align 4              ; <float> [#uses=1]
+  %cmp19 = icmp eq %1* null, %scroller            ; <i1> [#uses=2]
+  %cond = select i1 %cmp19, float %tmp69, float 0.000000e+00 ; <float> [#uses=1]
+  %call36 = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*)*)(i8* undef, i8* undef) nounwind optsize ; <i64> [#uses=2]
+  br i1 %cmp19, label %cond.true32, label %cond.false39
+
+cond.true32:                                      ; preds = %if.then
+  %sroa.store.elt68 = lshr i64 %call36, 32        ; <i64> [#uses=1]
+  %0 = trunc i64 %sroa.store.elt68 to i32         ; <i32> [#uses=1]
+  br label %cond.end47
+
+cond.false39:                                     ; preds = %if.then
+  %1 = trunc i64 %call36 to i32                   ; <i32> [#uses=1]
+  br label %cond.end47
+
+cond.end47:                                       ; preds = %cond.false39, %cond.true32
+  %cond48.in = phi i32 [ %0, %cond.true32 ], [ %1, %cond.false39 ] ; <i32> [#uses=1]
+  %cond48 = bitcast i32 %cond48.in to float       ; <float> [#uses=1]
+  %div = fdiv float %cond, undef                  ; <float> [#uses=1]
+  %div58 = fdiv float %div, %cond48               ; <float> [#uses=1]
+  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, float)*)(i8* undef, i8* undef, float %div58) nounwind optsize
+  ret void
+
+if.else:                                          ; preds = %entry
+  ret void
+}
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
diff --git a/final/test/CodeGen/X86/2010-04-21-CoalescerBug.ll b/final/test/CodeGen/X86/2010-04-21-CoalescerBug.ll
new file mode 100644
index 00000000000..d5987645cfc
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-04-21-CoalescerBug.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; rdar://7886733
+
+%struct.CMTime = type <{ i64, i32, i32, i64 }>
+%struct.CMTimeMapping = type { %struct.CMTimeRange, %struct.CMTimeRange }
+%struct.CMTimeRange = type { %struct.CMTime, %struct.CMTime }
+
+define void @t(%struct.CMTimeMapping* noalias nocapture sret %agg.result) nounwind optsize ssp {
+entry:
+  %agg.result1 = bitcast %struct.CMTimeMapping* %agg.result to i8* ; <i8*> [#uses=1]
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %agg.result1, i8* null, i64 96, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/final/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/final/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
new file mode 100644
index 00000000000..fa3d5fbcdc4
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
@@ -0,0 +1,93 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s
+; There are no MMX operations here, so we use XMM or i64.
+
+define void @ti8(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to <8 x i8>
+        %tmp2 = bitcast double %b to <8 x i8>
+        %tmp3 = add <8 x i8> %tmp1, %tmp2
+; CHECK:  paddb %xmm1, %xmm0
+        store <8 x i8> %tmp3, <8 x i8>* null
+        ret void
+}
+
+define void @ti16(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to <4 x i16>
+        %tmp2 = bitcast double %b to <4 x i16>
+        %tmp3 = add <4 x i16> %tmp1, %tmp2
+; CHECK:  paddw %xmm1, %xmm0
+        store <4 x i16> %tmp3, <4 x i16>* null
+        ret void
+}
+
+define void @ti32(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to <2 x i32>
+        %tmp2 = bitcast double %b to <2 x i32>
+        %tmp3 = add <2 x i32> %tmp1, %tmp2
+; CHECK:  paddd %xmm1, %xmm0
+        store <2 x i32> %tmp3, <2 x i32>* null
+        ret void
+}
+
+define void @ti64(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to <1 x i64>
+        %tmp2 = bitcast double %b to <1 x i64>
+        %tmp3 = add <1 x i64> %tmp1, %tmp2
+; CHECK:  addq  %rax, %rcx
+        store <1 x i64> %tmp3, <1 x i64>* null
+        ret void
+}
+
+; MMX intrinsics calls get us MMX instructions.
+
+define void @ti8a(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %tmp1, x86_mmx %tmp2)
+        store x86_mmx %tmp3, x86_mmx* null
+        ret void
+}
+
+define void @ti16a(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %tmp1, x86_mmx %tmp2)
+        store x86_mmx %tmp3, x86_mmx* null
+        ret void
+}
+
+define void @ti32a(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %tmp1, x86_mmx %tmp2)
+        store x86_mmx %tmp3, x86_mmx* null
+        ret void
+}
+
+define void @ti64a(double %a, double %b) nounwind {
+entry:
+        %tmp1 = bitcast double %a to x86_mmx
+; CHECK: movdq2q
+        %tmp2 = bitcast double %b to x86_mmx
+; CHECK: movdq2q
+        %tmp3 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %tmp1, x86_mmx %tmp2)
+        store x86_mmx %tmp3, x86_mmx* null
+        ret void
+}
+ 
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/final/test/CodeGen/X86/2010-04-29-CoalescerCrash.ll b/final/test/CodeGen/X86/2010-04-29-CoalescerCrash.ll
new file mode 100644
index 00000000000..a22f38ae3ba
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-04-29-CoalescerCrash.ll
@@ -0,0 +1,142 @@
+; RUN: llc < %s -relocation-model=pic -disable-fp-elim -verify-machineinstrs
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @_ZN12_GLOBAL__N_113SPUAsmPrinter15EmitInstructionEPKN4llvm12MachineInstrE(i8* %this, i8* %MI) nounwind inlinehint align 2 {
+entry:
+  br i1 undef, label %"3.i", label %"4.i"
+
+"3.i":                                            ; preds = %entry
+  unreachable
+
+"4.i":                                            ; preds = %entry
+  switch i32 undef, label %_ZN12_GLOBAL__N_113SPUAsmPrinter16printInstructionEPKN4llvm12MachineInstrERNS1_11raw_ostreamE.exit [
+    i32 1, label %"5.i"
+    i32 2, label %"6.i"
+    i32 3, label %"7.i"
+    i32 4, label %"8.i"
+    i32 5, label %"9.i"
+  ]
+
+"5.i":                                            ; preds = %"4.i"
+  unreachable
+
+"6.i":                                            ; preds = %"4.i"
+  switch i32 undef, label %"11.i" [
+    i32 1, label %"12.i"
+    i32 2, label %"13.i"
+    i32 3, label %_ZN12_GLOBAL__N_113SPUAsmPrinter16printInstructionEPKN4llvm12MachineInstrERNS1_11raw_ostreamE.exit
+    i32 4, label %"14.i"
+  ]
+
+"7.i":                                            ; preds = %"4.i"
+  unreachable
+
+"8.i":                                            ; preds = %"4.i"
+  unreachable
+
+"9.i":                                            ; preds = %"4.i"
+  unreachable
+
+"11.i":                                           ; preds = %"6.i"
+  switch i32 undef, label %"15.i" [
+    i32 1, label %"16.i"
+    i32 2, label %"17.i"
+    i32 3, label %"18.i"
+    i32 4, label %"19.i"
+    i32 5, label %"20.i"
+    i32 6, label %"21.i"
+    i32 7, label %"24.i"
+    i32 8, label %"27.i"
+    i32 9, label %"28.i"
+    i32 10, label %"29.i"
+    i32 11, label %"30.i"
+    i32 12, label %"31.i"
+    i32 13, label %"32.i"
+    i32 14, label %"39.i"
+  ]
+
+"12.i":                                           ; preds = %"6.i"
+  unreachable
+
+"13.i":                                           ; preds = %"6.i"
+  unreachable
+
+"14.i":                                           ; preds = %"6.i"
+  unreachable
+
+"15.i":                                           ; preds = %"11.i"
+  unreachable
+
+"16.i":                                           ; preds = %"11.i"
+  unreachable
+
+"17.i":                                           ; preds = %"11.i"
+  unreachable
+
+"18.i":                                           ; preds = %"11.i"
+  unreachable
+
+"19.i":                                           ; preds = %"11.i"
+  unreachable
+
+"20.i":                                           ; preds = %"11.i"
+  unreachable
+
+"21.i":                                           ; preds = %"11.i"
+  br i1 undef, label %"22.i", label %"23.i"
+
+"22.i":                                           ; preds = %"21.i"
+  unreachable
+
+"23.i":                                           ; preds = %"21.i"
+  unreachable
+
+"24.i":                                           ; preds = %"11.i"
+  unreachable
+
+"27.i":                                           ; preds = %"11.i"
+  unreachable
+
+"28.i":                                           ; preds = %"11.i"
+  unreachable
+
+"29.i":                                           ; preds = %"11.i"
+  unreachable
+
+"30.i":                                           ; preds = %"11.i"
+  unreachable
+
+"31.i":                                           ; preds = %"11.i"
+  unreachable
+
+"32.i":                                           ; preds = %"11.i"
+  unreachable
+
+"39.i":                                           ; preds = %"11.i"
+  br i1 undef, label %"41.i", label %"40.i"
+
+"40.i":                                           ; preds = %"39.i"
+  unreachable
+
+"41.i":                                           ; preds = %"39.i"
+  %0 = call i64 @_ZNK4llvm14MachineOperand6getImmEv(i8 undef) nounwind inlinehint ; <i64> [#uses=1]
+  %1 = trunc i64 %0 to i16                        ; <i16> [#uses=1]
+  br i1 undef, label %"42.i", label %"43.i"
+
+"42.i":                                           ; preds = %"41.i"
+  unreachable
+
+"43.i":                                           ; preds = %"41.i"
+  %2 = and i16 %1, -16                            ; <i16> [#uses=1]
+  %3 = sext i16 %2 to i64                         ; <i64> [#uses=1]
+  %4 = call i8 @_ZN4llvm11raw_ostreamlsEl(i8 undef, i64 %3) nounwind ; <i8> [#uses=0]
+  unreachable
+
+_ZN12_GLOBAL__N_113SPUAsmPrinter16printInstructionEPKN4llvm12MachineInstrERNS1_11raw_ostreamE.exit: ; preds = %"6.i", %"4.i"
+  ret void
+}
+
+declare i64 @_ZNK4llvm14MachineOperand6getImmEv(i8) nounwind inlinehint align 2
+
+declare i8 @_ZN4llvm11raw_ostreamlsEl(i8, i64)
diff --git a/final/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll b/final/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
new file mode 100644
index 00000000000..3738f802e95
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-04-30-LocalAlloc-LandingPad.ll
@@ -0,0 +1,143 @@
+; RUN: llc < %s -O0 -regalloc=fast -relocation-model=pic -disable-fp-elim | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0.0"
+
+%struct.S = type { [2 x i8*] }
+
+@_ZTIi = external constant i8*                    ; <i8**> [#uses=1]
+@.str = internal constant [4 x i8] c"%p\0A\00"    ; <[4 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i8* (%struct.S*, i32, %struct.S*)* @_Z4test1SiS_ to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+; Verify that %esi gets spilled before the call.
+; CHECK: Z4test1SiS
+; CHECK: movl %esi,{{.*}}(%ebp) 
+; CHECK: calll __Z6throwsv
+
+define i8* @_Z4test1SiS_(%struct.S* byval %s1, i32 %n, %struct.S* byval %s2) ssp {
+entry:
+  %retval = alloca i8*, align 4                   ; <i8**> [#uses=2]
+  %n.addr = alloca i32, align 4                   ; <i32*> [#uses=1]
+  %_rethrow = alloca i8*                          ; <i8**> [#uses=4]
+  %0 = alloca i32, align 4                        ; <i32*> [#uses=1]
+  %cleanup.dst = alloca i32                       ; <i32*> [#uses=3]
+  %cleanup.dst7 = alloca i32                      ; <i32*> [#uses=6]
+  store i32 %n, i32* %n.addr
+  invoke void @_Z6throwsv()
+          to label %invoke.cont unwind label %try.handler
+
+invoke.cont:                                      ; preds = %entry
+  store i32 1, i32* %cleanup.dst7
+  br label %finally
+
+terminate.handler:                                ; preds = %match.end
+  %exc = call i8* @llvm.eh.exception()            ; <i8*> [#uses=1]
+  %1 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1) ; <i32> [#uses=0]
+  call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+
+try.handler:                                      ; preds = %entry
+  %exc1 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=3]
+  %selector = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc1, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* bitcast (i8** @_ZTIi to i8*), i8* null) ; <i32> [#uses=1]
+  %2 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) ; <i32> [#uses=1]
+  %3 = icmp eq i32 %selector, %2                  ; <i1> [#uses=1]
+  br i1 %3, label %match, label %catch.next
+
+match:                                            ; preds = %try.handler
+  %4 = call i8* @__cxa_begin_catch(i8* %exc1)     ; <i8*> [#uses=1]
+  %5 = bitcast i8* %4 to i32*                     ; <i32*> [#uses=1]
+  %6 = load i32* %5                               ; <i32> [#uses=1]
+  store i32 %6, i32* %0
+  %call = invoke i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), %struct.S* %s2)
+          to label %invoke.cont2 unwind label %match.handler ; <i32> [#uses=0]
+
+invoke.cont2:                                     ; preds = %match
+  store i32 1, i32* %cleanup.dst
+  br label %match.end
+
+match.handler:                                    ; preds = %match
+  %exc3 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=2]
+  %7 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc3, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0) ; <i32> [#uses=0]
+  store i8* %exc3, i8** %_rethrow
+  store i32 2, i32* %cleanup.dst
+  br label %match.end
+
+cleanup.pad:                                      ; preds = %cleanup.switch
+  store i32 1, i32* %cleanup.dst7
+  br label %finally
+
+cleanup.pad4:                                     ; preds = %cleanup.switch
+  store i32 2, i32* %cleanup.dst7
+  br label %finally
+
+match.end:                                        ; preds = %match.handler, %invoke.cont2
+  invoke void @__cxa_end_catch()
+          to label %invoke.cont5 unwind label %terminate.handler
+
+invoke.cont5:                                     ; preds = %match.end
+  br label %cleanup.switch
+
+cleanup.switch:                                   ; preds = %invoke.cont5
+  %tmp = load i32* %cleanup.dst                   ; <i32> [#uses=1]
+  switch i32 %tmp, label %cleanup.end [
+    i32 1, label %cleanup.pad
+    i32 2, label %cleanup.pad4
+  ]
+
+cleanup.end:                                      ; preds = %cleanup.switch
+  %exc6 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=1]
+  store i8* %exc6, i8** %_rethrow
+  store i32 2, i32* %cleanup.dst7
+  br label %finally
+
+catch.next:                                       ; preds = %try.handler
+  store i8* %exc1, i8** %_rethrow
+  store i32 2, i32* %cleanup.dst7
+  br label %finally
+
+finally:                                          ; preds = %catch.next, %cleanup.end, %cleanup.pad4, %cleanup.pad, %invoke.cont
+  br label %cleanup.switch9
+
+cleanup.switch9:                                  ; preds = %finally
+  %tmp8 = load i32* %cleanup.dst7                 ; <i32> [#uses=1]
+  switch i32 %tmp8, label %cleanup.end10 [
+    i32 1, label %finally.end
+    i32 2, label %finally.throw
+  ]
+
+cleanup.end10:                                    ; preds = %cleanup.switch9
+  br label %finally.end
+
+finally.throw:                                    ; preds = %cleanup.switch9
+  %8 = load i8** %_rethrow                        ; <i8*> [#uses=1]
+  call void @_Unwind_Resume_or_Rethrow(i8* %8)
+  unreachable
+
+finally.end:                                      ; preds = %cleanup.end10, %cleanup.switch9
+  %tmp11 = getelementptr inbounds %struct.S* %s1, i32 0, i32 0 ; <[2 x i8*]*> [#uses=1]
+  %arraydecay = getelementptr inbounds [2 x i8*]* %tmp11, i32 0, i32 0 ; <i8**> [#uses=1]
+  %arrayidx = getelementptr inbounds i8** %arraydecay, i32 1 ; <i8**> [#uses=1]
+  %tmp12 = load i8** %arrayidx                    ; <i8*> [#uses=1]
+  store i8* %tmp12, i8** %retval
+  %9 = load i8** %retval                          ; <i8*> [#uses=1]
+  ret i8* %9
+}
+
+declare void @_Z6throwsv() ssp
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare void @_ZSt9terminatev()
+
+declare void @_Unwind_Resume_or_Rethrow(i8*)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare i32 @printf(i8*, ...)
+
+declare void @__cxa_end_catch()
diff --git a/final/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll b/final/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
new file mode 100644
index 00000000000..79273a0d27b
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-05-03-CoalescerSubRegClobber.ll
@@ -0,0 +1,33 @@
+; RUN: llc -pre-RA-sched=list-burr < %s | FileCheck %s
+; PR6941
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define i32 @snd_xbytes(i32 %v, i32 %from, i32 %to) nounwind readnone ssp {
+entry:
+  %cmp19 = icmp eq i32 %to, 0                     ; <i1> [#uses=1]
+  br i1 %cmp19, label %while.end, label %while.cond
+
+while.cond:                                       ; preds = %entry, %while.cond
+  %y.021 = phi i32 [ %rem, %while.cond ], [ %to, %entry ] ; <i32> [#uses=3]
+  %x.020 = phi i32 [ %y.021, %while.cond ], [ %from, %entry ] ; <i32> [#uses=1]
+  %rem = urem i32 %x.020, %y.021                  ; <i32> [#uses=2]
+  %cmp = icmp eq i32 %rem, 0                      ; <i1> [#uses=1]
+  br i1 %cmp, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond, %entry
+  %x.0.lcssa = phi i32 [ %from, %entry ], [ %y.021, %while.cond ] ; <i32> [#uses=2]
+  %div = udiv i32 %from, %x.0.lcssa               ; <i32> [#uses=1]
+  %div11 = udiv i32 %to, %x.0.lcssa               ; <i32> [#uses=1]
+  %conv = zext i32 %v to i64                      ; <i64> [#uses=1]
+  %conv14 = zext i32 %div11 to i64                ; <i64> [#uses=1]
+; Verify that we don't clobber %eax after putting the imulq result in %rax
+; CHECK: imulq	%r{{.}}x, %r[[RES:.]]x
+; CHECK-NOT: movl	{{.*}}, %e[[RES]]x
+; CHECK: div
+  %mul = mul i64 %conv14, %conv                   ; <i64> [#uses=1]
+  %conv16 = zext i32 %div to i64                  ; <i64> [#uses=1]
+  %div17 = udiv i64 %mul, %conv16                 ; <i64> [#uses=1]
+  %conv18 = trunc i64 %div17 to i32               ; <i32> [#uses=1]
+  ret i32 %conv18
+}
diff --git a/final/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll b/final/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll
new file mode 100644
index 00000000000..74a5ec28db1
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-05-05-LocalAllocEarlyClobber.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -O0 -regalloc=fast | FileCheck %s
+; PR6520
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0.0"
+
+%0 = type { i8*, i8*, i32 }
+
+define i8* @func() nounwind ssp {
+entry:
+  %retval = alloca i8*, align 4                   ; <i8**> [#uses=2]
+  %ret = alloca i8*, align 4                      ; <i8**> [#uses=2]
+  %p = alloca i8*, align 4                        ; <i8**> [#uses=1]
+  %t = alloca i32, align 4                        ; <i32*> [#uses=1]
+; The earlyclobber $1 should only appear once. It should not be shared.
+; CHECK: deafbeef, [[REG:%e.x]]
+; CHECK-NOT: [[REG]]
+; CHECK: InlineAsm End
+  %0 = call %0 asm "mov    $$0xdeafbeef, $1\0A\09mov    $$0xcafebabe, $0\0A\09mov    $0, $2\0A\09", "=&r,=&r,=&{cx},~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0 ; <%0> [#uses=3]
+  %asmresult = extractvalue %0 %0, 0              ; <i8*> [#uses=1]
+  %asmresult1 = extractvalue %0 %0, 1             ; <i8*> [#uses=1]
+  %asmresult2 = extractvalue %0 %0, 2             ; <i32> [#uses=1]
+  store i8* %asmresult, i8** %ret
+  store i8* %asmresult1, i8** %p
+  store i32 %asmresult2, i32* %t
+  %tmp = load i8** %ret                           ; <i8*> [#uses=1]
+  store i8* %tmp, i8** %retval
+  %1 = load i8** %retval                          ; <i8*> [#uses=1]
+  ret i8* %1
+}
+
+!0 = metadata !{i32 79}
diff --git a/final/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll b/final/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll
new file mode 100644
index 00000000000..90eb84d1dc4
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-05-06-LocalInlineAsmClobber.ll
@@ -0,0 +1,10 @@
+; RUN: llc -regalloc=fast %s -o %t
+; PR7066
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @sys_clone(i32 (i8*)* %fn, i8* %child_stack, i32 %flags, i8* %arg, i32* %parent_tidptr, i8* %newtls, i32* %child_tidptr) nounwind {
+  call i64 asm sideeffect "", "={ax},0,i,i,r,{si},{di},r,{dx},imr,imr,~{sp},~{memory},~{r8},~{r10},~{r11},~{cx},~{dirflag},~{fpsr},~{flags}"(i64 4294967274, i32 56, i32 60, i32 (i8*)* undef, i8* undef, i32 undef, i8* undef, i32* undef, i8* undef, i32* undef) nounwind ; <i64> [#uses=0]
+  ret i32 undef
+}
diff --git a/final/test/CodeGen/X86/2010-05-07-ldconvert.ll b/final/test/CodeGen/X86/2010-05-07-ldconvert.ll
new file mode 100644
index 00000000000..0ba6a8fd6d7
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-05-07-ldconvert.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11
+; PR 7087 - used to crash
+
+define i32 @main() ssp {
+entry:
+  %retval = alloca i32, align 4                   ; <i32*> [#uses=2]
+  %r = alloca i32, align 4                        ; <i32*> [#uses=2]
+  store i32 0, i32* %retval
+  %tmp = call x86_fp80 @llvm.powi.f80(x86_fp80 0xK3FFF8000000000000000, i32 -64) ; <x86_fp80> [#uses=1]
+  %conv = fptosi x86_fp80 %tmp to i32             ; <i32> [#uses=1]
+  store i32 %conv, i32* %r
+  %tmp1 = load i32* %r                            ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %tmp1, 0                  ; <i1> [#uses=1]
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @_Z1fv()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %0 = load i32* %retval                          ; <i32> [#uses=1]
+  ret i32 %0
+}
+
+declare x86_fp80 @llvm.powi.f80(x86_fp80, i32) nounwind readonly
+
+declare void @_Z1fv()
diff --git a/final/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll b/final/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll
new file mode 100644
index 00000000000..e719da304c5
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-05-10-DAGCombinerBug.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10
+; PR7018
+; rdar://7939869
+
+define i32 @CXB30130(i32 %num1, i16* nocapture %num2, float* nocapture %num3, double* nocapture %num4) nounwind ssp {
+entry:
+  %0 = load i16* %num2, align 2                   ; <i16> [#uses=2]
+  %1 = mul nsw i16 %0, %0                         ; <i16> [#uses=1]
+  store i16 %1, i16* %num2, align 2
+  ret i32 undef
+}
diff --git a/final/test/CodeGen/X86/2010-05-12-FastAllocKills.ll b/final/test/CodeGen/X86/2010-05-12-FastAllocKills.ll
new file mode 100644
index 00000000000..36a99d6f90e
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-05-12-FastAllocKills.ll
@@ -0,0 +1,59 @@
+; RUN: llc -regalloc=fast -verify-machineinstrs < %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin"
+
+; This test causes a virtual FP register to be redefined while it is live:
+;BB#5: derived from LLVM BB %bb10
+;    Predecessors according to CFG: BB#4 BB#5
+;	%reg1024<def> = MOV_Fp8080 %reg1034
+;	%reg1025<def> = MUL_Fp80m32 %reg1024, %RIP, 1, %reg0, <cp#0>, %reg0; mem:LD4[ConstantPool]
+;	%reg1034<def> = MOV_Fp8080 %reg1025
+;	FP_REG_KILL %FP0<imp-def>, %FP1<imp-def>, %FP2<imp-def>, %FP3<imp-def>, %FP4<imp-def>, %FP5<imp-def>, %FP6<imp-def>
+;	JMP_4 <BB#5>
+;    Successors according to CFG: BB#5
+;
+; The X86FP pass needs good kill flags, like on %FP0 representing %reg1034:
+;BB#5: derived from LLVM BB %bb10
+;    Predecessors according to CFG: BB#4 BB#5
+;	%FP0<def> = LD_Fp80m <fi#3>, 1, %reg0, 0, %reg0; mem:LD10[FixedStack3](align=4)
+;	%FP1<def> = MOV_Fp8080 %FP0<kill>
+;	%FP2<def> = MUL_Fp80m32 %FP1, %RIP, 1, %reg0, <cp#0>, %reg0; mem:LD4[ConstantPool]
+;	%FP0<def> = MOV_Fp8080 %FP2
+;	ST_FpP80m <fi#3>, 1, %reg0, 0, %reg0, %FP0<kill>; mem:ST10[FixedStack3](align=4)
+;	ST_FpP80m <fi#4>, 1, %reg0, 0, %reg0, %FP1<kill>; mem:ST10[FixedStack4](align=4)
+;	ST_FpP80m <fi#5>, 1, %reg0, 0, %reg0, %FP2<kill>; mem:ST10[FixedStack5](align=4)
+;	FP_REG_KILL %FP0<imp-def>, %FP1<imp-def>, %FP2<imp-def>, %FP3<imp-def>, %FP4<imp-def>, %FP5<imp-def>, %FP6<imp-def>
+;	JMP_4 <BB#5>
+;    Successors according to CFG: BB#5
+
+define fastcc i32 @sqlite3AtoF(i8* %z, double* nocapture %pResult) nounwind ssp {
+entry:
+  br i1 undef, label %bb2, label %bb1.i.i
+
+bb1.i.i:                                          ; preds = %entry
+  unreachable
+
+bb2:                                              ; preds = %entry
+  br i1 undef, label %isdigit339.exit11.preheader, label %bb13
+
+isdigit339.exit11.preheader:                      ; preds = %bb2
+  br i1 undef, label %bb12, label %bb10
+
+bb10:                                             ; preds = %bb10, %isdigit339.exit11.preheader
+  %divisor.041 = phi x86_fp80 [ %0, %bb10 ], [ 0xK3FFF8000000000000000, %isdigit339.exit11.preheader ] ; <x86_fp80> [#uses=1]
+  %0 = fmul x86_fp80 %divisor.041, 0xK4002A000000000000000 ; <x86_fp80> [#uses=2]
+  br i1 false, label %bb12, label %bb10
+
+bb12:                                             ; preds = %bb10, %isdigit339.exit11.preheader
+  %divisor.0.lcssa = phi x86_fp80 [ 0xK3FFF8000000000000000, %isdigit339.exit11.preheader ], [ %0, %bb10 ] ; <x86_fp80> [#uses=0]
+  br label %bb13
+
+bb13:                                             ; preds = %bb12, %bb2
+  br i1 undef, label %bb34, label %bb36
+
+bb34:                                             ; preds = %bb13
+  br label %bb36
+
+bb36:                                             ; preds = %bb34, %bb13
+  ret i32 undef
+}
diff --git a/final/test/CodeGen/X86/2010-05-16-nosseconversion.ll b/final/test/CodeGen/X86/2010-05-16-nosseconversion.ll
new file mode 100644
index 00000000000..889575cea3a
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-05-16-nosseconversion.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mattr=-sse < %s
+; PR 7135
+
+@x = common global i64 0                          ; <i64*> [#uses=1]
+
+define i32 @foo() nounwind readonly ssp {
+entry:
+  %0 = load i64* @x, align 8                      ; <i64> [#uses=1]
+  %1 = uitofp i64 %0 to double                    ; <double> [#uses=1]
+  %2 = fptosi double %1 to i32                    ; <i32> [#uses=1]
+  ret i32 %2
+}
diff --git a/final/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll b/final/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
new file mode 100644
index 00000000000..f9bda7f1007
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-05-25-DotDebugLoc.ll
@@ -0,0 +1,242 @@
+; RUN: llc -march=x86-64 -O2 < %s | FileCheck %s
+; Test to check .debug_loc support. This test case emits many debug_loc entries.
+
+; CHECK: Loc expr size
+; CHECK-NEXT: DW_OP_reg
+
+%0 = type { double }
+
+define hidden %0 @__divsc3(float %a, float %b, float %c, float %d) nounwind readnone {
+entry:
+  tail call void @llvm.dbg.value(metadata !{float %a}, i64 0, metadata !0)
+  tail call void @llvm.dbg.value(metadata !{float %b}, i64 0, metadata !11)
+  tail call void @llvm.dbg.value(metadata !{float %c}, i64 0, metadata !12)
+  tail call void @llvm.dbg.value(metadata !{float %d}, i64 0, metadata !13)
+  %0 = tail call float @fabsf(float %c) nounwind readnone, !dbg !19 ; <float> [#uses=1]
+  %1 = tail call float @fabsf(float %d) nounwind readnone, !dbg !19 ; <float> [#uses=1]
+  %2 = fcmp olt float %0, %1, !dbg !19            ; <i1> [#uses=1]
+  br i1 %2, label %bb, label %bb1, !dbg !19
+
+bb:                                               ; preds = %entry
+  %3 = fdiv float %c, %d, !dbg !20                ; <float> [#uses=3]
+  tail call void @llvm.dbg.value(metadata !{float %3}, i64 0, metadata !16), !dbg !20
+  %4 = fmul float %3, %c, !dbg !21                ; <float> [#uses=1]
+  %5 = fadd float %4, %d, !dbg !21                ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %5}, i64 0, metadata !14), !dbg !21
+  %6 = fmul float %3, %a, !dbg !22                ; <float> [#uses=1]
+  %7 = fadd float %6, %b, !dbg !22                ; <float> [#uses=1]
+  %8 = fdiv float %7, %5, !dbg !22                ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %8}, i64 0, metadata !17), !dbg !22
+  %9 = fmul float %3, %b, !dbg !23                ; <float> [#uses=1]
+  %10 = fsub float %9, %a, !dbg !23               ; <float> [#uses=1]
+  %11 = fdiv float %10, %5, !dbg !23              ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %11}, i64 0, metadata !18), !dbg !23
+  br label %bb2, !dbg !23
+
+bb1:                                              ; preds = %entry
+  %12 = fdiv float %d, %c, !dbg !24               ; <float> [#uses=3]
+  tail call void @llvm.dbg.value(metadata !{float %12}, i64 0, metadata !16), !dbg !24
+  %13 = fmul float %12, %d, !dbg !25              ; <float> [#uses=1]
+  %14 = fadd float %13, %c, !dbg !25              ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %14}, i64 0, metadata !14), !dbg !25
+  %15 = fmul float %12, %b, !dbg !26              ; <float> [#uses=1]
+  %16 = fadd float %15, %a, !dbg !26              ; <float> [#uses=1]
+  %17 = fdiv float %16, %14, !dbg !26             ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %17}, i64 0, metadata !17), !dbg !26
+  %18 = fmul float %12, %a, !dbg !27              ; <float> [#uses=1]
+  %19 = fsub float %b, %18, !dbg !27              ; <float> [#uses=1]
+  %20 = fdiv float %19, %14, !dbg !27             ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %20}, i64 0, metadata !18), !dbg !27
+  br label %bb2, !dbg !27
+
+bb2:                                              ; preds = %bb1, %bb
+  %y.0 = phi float [ %11, %bb ], [ %20, %bb1 ]    ; <float> [#uses=5]
+  %x.0 = phi float [ %8, %bb ], [ %17, %bb1 ]     ; <float> [#uses=5]
+  %21 = fcmp uno float %x.0, 0.000000e+00, !dbg !28 ; <i1> [#uses=1]
+  %22 = fcmp uno float %y.0, 0.000000e+00, !dbg !28 ; <i1> [#uses=1]
+  %or.cond = and i1 %21, %22                      ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb4, label %bb46, !dbg !28
+
+bb4:                                              ; preds = %bb2
+  %23 = fcmp une float %c, 0.000000e+00, !dbg !29 ; <i1> [#uses=1]
+  %24 = fcmp une float %d, 0.000000e+00, !dbg !29 ; <i1> [#uses=1]
+  %or.cond93 = or i1 %23, %24                     ; <i1> [#uses=1]
+  br i1 %or.cond93, label %bb9, label %bb6, !dbg !29
+
+bb6:                                              ; preds = %bb4
+  %25 = fcmp uno float %a, 0.000000e+00, !dbg !29 ; <i1> [#uses=1]
+  %26 = fcmp uno float %b, 0.000000e+00, !dbg !29 ; <i1> [#uses=1]
+  %or.cond94 = and i1 %25, %26                    ; <i1> [#uses=1]
+  br i1 %or.cond94, label %bb9, label %bb8, !dbg !29
+
+bb8:                                              ; preds = %bb6
+  %27 = tail call float @copysignf(float 0x7FF0000000000000, float %c) nounwind readnone, !dbg !30 ; <float> [#uses=2]
+  %28 = fmul float %27, %a, !dbg !30              ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %28}, i64 0, metadata !17), !dbg !30
+  %29 = fmul float %27, %b, !dbg !31              ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %29}, i64 0, metadata !18), !dbg !31
+  br label %bb46, !dbg !31
+
+bb9:                                              ; preds = %bb6, %bb4
+  %30 = fcmp ord float %a, 0.000000e+00           ; <i1> [#uses=1]
+  %31 = fsub float %a, %a, !dbg !32               ; <float> [#uses=3]
+  %32 = fcmp uno float %31, 0.000000e+00          ; <i1> [#uses=1]
+  %33 = and i1 %30, %32, !dbg !32                 ; <i1> [#uses=2]
+  br i1 %33, label %bb14, label %bb11, !dbg !32
+
+bb11:                                             ; preds = %bb9
+  %34 = fcmp ord float %b, 0.000000e+00           ; <i1> [#uses=1]
+  %35 = fsub float %b, %b, !dbg !32               ; <float> [#uses=1]
+  %36 = fcmp uno float %35, 0.000000e+00          ; <i1> [#uses=1]
+  %37 = and i1 %34, %36, !dbg !32                 ; <i1> [#uses=1]
+  br i1 %37, label %bb14, label %bb27, !dbg !32
+
+bb14:                                             ; preds = %bb11, %bb9
+  %38 = fsub float %c, %c, !dbg !32               ; <float> [#uses=1]
+  %39 = fcmp ord float %38, 0.000000e+00          ; <i1> [#uses=1]
+  br i1 %39, label %bb15, label %bb27, !dbg !32
+
+bb15:                                             ; preds = %bb14
+  %40 = fsub float %d, %d, !dbg !32               ; <float> [#uses=1]
+  %41 = fcmp ord float %40, 0.000000e+00          ; <i1> [#uses=1]
+  br i1 %41, label %bb16, label %bb27, !dbg !32
+
+bb16:                                             ; preds = %bb15
+  %iftmp.0.0 = select i1 %33, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
+  %42 = tail call float @copysignf(float %iftmp.0.0, float %a) nounwind readnone, !dbg !33 ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %42}, i64 0, metadata !0), !dbg !33
+  %43 = fcmp ord float %b, 0.000000e+00           ; <i1> [#uses=1]
+  %44 = fsub float %b, %b, !dbg !34               ; <float> [#uses=1]
+  %45 = fcmp uno float %44, 0.000000e+00          ; <i1> [#uses=1]
+  %46 = and i1 %43, %45, !dbg !34                 ; <i1> [#uses=1]
+  %iftmp.1.0 = select i1 %46, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
+  %47 = tail call float @copysignf(float %iftmp.1.0, float %b) nounwind readnone, !dbg !34 ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %47}, i64 0, metadata !11), !dbg !34
+  %48 = fmul float %42, %c, !dbg !35              ; <float> [#uses=1]
+  %49 = fmul float %47, %d, !dbg !35              ; <float> [#uses=1]
+  %50 = fadd float %48, %49, !dbg !35             ; <float> [#uses=1]
+  %51 = fmul float %50, 0x7FF0000000000000, !dbg !35 ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %51}, i64 0, metadata !17), !dbg !35
+  %52 = fmul float %47, %c, !dbg !36              ; <float> [#uses=1]
+  %53 = fmul float %42, %d, !dbg !36              ; <float> [#uses=1]
+  %54 = fsub float %52, %53, !dbg !36             ; <float> [#uses=1]
+  %55 = fmul float %54, 0x7FF0000000000000, !dbg !36 ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %55}, i64 0, metadata !18), !dbg !36
+  br label %bb46, !dbg !36
+
+bb27:                                             ; preds = %bb15, %bb14, %bb11
+  %56 = fcmp ord float %c, 0.000000e+00           ; <i1> [#uses=1]
+  %57 = fsub float %c, %c, !dbg !37               ; <float> [#uses=1]
+  %58 = fcmp uno float %57, 0.000000e+00          ; <i1> [#uses=1]
+  %59 = and i1 %56, %58, !dbg !37                 ; <i1> [#uses=2]
+  br i1 %59, label %bb33, label %bb30, !dbg !37
+
+bb30:                                             ; preds = %bb27
+  %60 = fcmp ord float %d, 0.000000e+00           ; <i1> [#uses=1]
+  %61 = fsub float %d, %d, !dbg !37               ; <float> [#uses=1]
+  %62 = fcmp uno float %61, 0.000000e+00          ; <i1> [#uses=1]
+  %63 = and i1 %60, %62, !dbg !37                 ; <i1> [#uses=1]
+  %64 = fcmp ord float %31, 0.000000e+00          ; <i1> [#uses=1]
+  %or.cond95 = and i1 %63, %64                    ; <i1> [#uses=1]
+  br i1 %or.cond95, label %bb34, label %bb46, !dbg !37
+
+bb33:                                             ; preds = %bb27
+  %.old = fcmp ord float %31, 0.000000e+00        ; <i1> [#uses=1]
+  br i1 %.old, label %bb34, label %bb46, !dbg !37
+
+bb34:                                             ; preds = %bb33, %bb30
+  %65 = fsub float %b, %b, !dbg !37               ; <float> [#uses=1]
+  %66 = fcmp ord float %65, 0.000000e+00          ; <i1> [#uses=1]
+  br i1 %66, label %bb35, label %bb46, !dbg !37
+
+bb35:                                             ; preds = %bb34
+  %iftmp.2.0 = select i1 %59, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
+  %67 = tail call float @copysignf(float %iftmp.2.0, float %c) nounwind readnone, !dbg !38 ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %67}, i64 0, metadata !12), !dbg !38
+  %68 = fcmp ord float %d, 0.000000e+00           ; <i1> [#uses=1]
+  %69 = fsub float %d, %d, !dbg !39               ; <float> [#uses=1]
+  %70 = fcmp uno float %69, 0.000000e+00          ; <i1> [#uses=1]
+  %71 = and i1 %68, %70, !dbg !39                 ; <i1> [#uses=1]
+  %iftmp.3.0 = select i1 %71, float 1.000000e+00, float 0.000000e+00 ; <float> [#uses=1]
+  %72 = tail call float @copysignf(float %iftmp.3.0, float %d) nounwind readnone, !dbg !39 ; <float> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{float %72}, i64 0, metadata !13), !dbg !39
+  %73 = fmul float %67, %a, !dbg !40              ; <float> [#uses=1]
+  %74 = fmul float %72, %b, !dbg !40              ; <float> [#uses=1]
+  %75 = fadd float %73, %74, !dbg !40             ; <float> [#uses=1]
+  %76 = fmul float %75, 0.000000e+00, !dbg !40    ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %76}, i64 0, metadata !17), !dbg !40
+  %77 = fmul float %67, %b, !dbg !41              ; <float> [#uses=1]
+  %78 = fmul float %72, %a, !dbg !41              ; <float> [#uses=1]
+  %79 = fsub float %77, %78, !dbg !41             ; <float> [#uses=1]
+  %80 = fmul float %79, 0.000000e+00, !dbg !41    ; <float> [#uses=1]
+  tail call void @llvm.dbg.value(metadata !{float %80}, i64 0, metadata !18), !dbg !41
+  br label %bb46, !dbg !41
+
+bb46:                                             ; preds = %bb35, %bb34, %bb33, %bb30, %bb16, %bb8, %bb2
+  %y.1 = phi float [ %80, %bb35 ], [ %y.0, %bb34 ], [ %y.0, %bb33 ], [ %y.0, %bb30 ], [ %55, %bb16 ], [ %29, %bb8 ], [ %y.0, %bb2 ] ; <float> [#uses=2]
+  %x.1 = phi float [ %76, %bb35 ], [ %x.0, %bb34 ], [ %x.0, %bb33 ], [ %x.0, %bb30 ], [ %51, %bb16 ], [ %28, %bb8 ], [ %x.0, %bb2 ] ; <float> [#uses=1]
+  %81 = fmul float %y.1, 0.000000e+00, !dbg !42   ; <float> [#uses=1]
+  %82 = fadd float %y.1, 0.000000e+00, !dbg !42   ; <float> [#uses=1]
+  %tmpr = fadd float %x.1, %81, !dbg !42          ; <float> [#uses=1]
+  %tmp89 = bitcast float %tmpr to i32             ; <i32> [#uses=1]
+  %tmp90 = zext i32 %tmp89 to i64                 ; <i64> [#uses=1]
+  %tmp85 = bitcast float %82 to i32               ; <i32> [#uses=1]
+  %tmp86 = zext i32 %tmp85 to i64                 ; <i64> [#uses=1]
+  %tmp87 = shl i64 %tmp86, 32                     ; <i64> [#uses=1]
+  %ins = or i64 %tmp90, %tmp87                    ; <i64> [#uses=1]
+  %tmp84 = bitcast i64 %ins to double             ; <double> [#uses=1]
+  %mrv75 = insertvalue %0 undef, double %tmp84, 0, !dbg !42 ; <%0> [#uses=1]
+  ret %0 %mrv75, !dbg !42
+}
+
+declare float @fabsf(float)
+
+declare float @copysignf(float, float) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.lv = !{!0, !11, !12, !13, !14, !16, !17, !18}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"a", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"__divsc3", metadata !"__divsc3", metadata !"__divsc3", metadata !2, i32 1922, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"libgcc2.c", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !9, metadata !9, metadata !9, metadata !9}
+!6 = metadata !{i32 524310, metadata !7, metadata !"SCtype", metadata !7, i32 170, i64 0, i64 0, i64 0, i32 0, metadata !8} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 524329, metadata !"libgcc2.h", metadata !"/Users/yash/clean/LG.D/gcc/../../llvmgcc/gcc", metadata !3} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 524324, metadata !2, metadata !"complex float", metadata !2, i32 0, i64 64, i64 32, i64 0, i32 0, i32 3} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524310, metadata !7, metadata !"SFtype", metadata !7, i32 167, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!10 = metadata !{i32 524324, metadata !2, metadata !"float", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 524545, metadata !1, metadata !"b", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 524545, metadata !1, metadata !"c", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 524545, metadata !1, metadata !"d", metadata !2, i32 1921, metadata !9} ; [ DW_TAG_arg_variable ]
+!14 = metadata !{i32 524544, metadata !15, metadata !"denom", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 524299, metadata !1, i32 1922, i32 0} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 524544, metadata !15, metadata !"ratio", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!17 = metadata !{i32 524544, metadata !15, metadata !"x", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 524544, metadata !15, metadata !"y", metadata !2, i32 1923, metadata !9} ; [ DW_TAG_auto_variable ]
+!19 = metadata !{i32 1929, i32 0, metadata !15, null}
+!20 = metadata !{i32 1931, i32 0, metadata !15, null}
+!21 = metadata !{i32 1932, i32 0, metadata !15, null}
+!22 = metadata !{i32 1933, i32 0, metadata !15, null}
+!23 = metadata !{i32 1934, i32 0, metadata !15, null}
+!24 = metadata !{i32 1938, i32 0, metadata !15, null}
+!25 = metadata !{i32 1939, i32 0, metadata !15, null}
+!26 = metadata !{i32 1940, i32 0, metadata !15, null}
+!27 = metadata !{i32 1941, i32 0, metadata !15, null}
+!28 = metadata !{i32 1946, i32 0, metadata !15, null}
+!29 = metadata !{i32 1948, i32 0, metadata !15, null}
+!30 = metadata !{i32 1950, i32 0, metadata !15, null}
+!31 = metadata !{i32 1951, i32 0, metadata !15, null}
+!32 = metadata !{i32 1953, i32 0, metadata !15, null}
+!33 = metadata !{i32 1955, i32 0, metadata !15, null}
+!34 = metadata !{i32 1956, i32 0, metadata !15, null}
+!35 = metadata !{i32 1957, i32 0, metadata !15, null}
+!36 = metadata !{i32 1958, i32 0, metadata !15, null}
+!37 = metadata !{i32 1960, i32 0, metadata !15, null}
+!38 = metadata !{i32 1962, i32 0, metadata !15, null}
+!39 = metadata !{i32 1963, i32 0, metadata !15, null}
+!40 = metadata !{i32 1964, i32 0, metadata !15, null}
+!41 = metadata !{i32 1965, i32 0, metadata !15, null}
+!42 = metadata !{i32 1969, i32 0, metadata !15, null}
diff --git a/final/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll b/final/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
new file mode 100644
index 00000000000..60171eb6297
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-05-26-DotDebugLoc.ll
@@ -0,0 +1,66 @@
+; RUN: llc -O2 < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin"
+
+%struct.a = type { i32, %struct.a* }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i8* (%struct.a*)* @bar to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define i8* @bar(%struct.a* %myvar) nounwind optsize noinline ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.a* %myvar}, i64 0, metadata !8)
+  %0 = getelementptr inbounds %struct.a* %myvar, i64 0, i32 0, !dbg !28 ; <i32*> [#uses=1]
+  %1 = load i32* %0, align 8, !dbg !28            ; <i32> [#uses=1]
+  tail call void @foo(i32 %1) nounwind optsize noinline ssp, !dbg !28
+  %2 = bitcast %struct.a* %myvar to i8*, !dbg !30 ; <i8*> [#uses=1]
+  ret i8* %2, !dbg !30
+}
+
+declare void @foo(i32) nounwind optsize noinline ssp
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.gv = !{!0}
+!llvm.dbg.lv = !{!4, !8, !18, !25, !26}
+
+!0 = metadata !{i32 524340, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 7, metadata !3, i1 false, i1 true, null} ; [ DW_TAG_variable ]
+!1 = metadata !{i32 524329, metadata !"foo.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!4 = metadata !{i32 524545, metadata !5, metadata !"x", metadata !1, i32 12, metadata !3} ; [ DW_TAG_arg_variable ]
+!5 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 13, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{null, metadata !3}
+!8 = metadata !{i32 524545, metadata !9, metadata !"myvar", metadata !1, i32 17, metadata !13} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 17, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{metadata !12, metadata !13}
+!12 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 524307, metadata !1, metadata !"a", metadata !1, i32 2, i64 128, i64 64, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_structure_type ]
+!15 = metadata !{metadata !16, metadata !17}
+!16 = metadata !{i32 524301, metadata !14, metadata !"c", metadata !1, i32 3, i64 32, i64 32, i64 0, i32 0, metadata !3} ; [ DW_TAG_member ]
+!17 = metadata !{i32 524301, metadata !14, metadata !"d", metadata !1, i32 4, i64 64, i64 64, i64 64, i32 0, metadata !13} ; [ DW_TAG_member ]
+!18 = metadata !{i32 524545, metadata !19, metadata !"argc", metadata !1, i32 22, metadata !3} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 524334, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 22, metadata !20, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!20 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !21, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!21 = metadata !{metadata !3, metadata !3, metadata !22}
+!22 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !24} ; [ DW_TAG_pointer_type ]
+!24 = metadata !{i32 524324, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!25 = metadata !{i32 524545, metadata !19, metadata !"argv", metadata !1, i32 22, metadata !22} ; [ DW_TAG_arg_variable ]
+!26 = metadata !{i32 524544, metadata !27, metadata !"e", metadata !1, i32 23, metadata !14} ; [ DW_TAG_auto_variable ]
+!27 = metadata !{i32 524299, metadata !19, i32 22, i32 0} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 18, i32 0, metadata !29, null}
+!29 = metadata !{i32 524299, metadata !9, i32 17, i32 0} ; [ DW_TAG_lexical_block ]
+!30 = metadata !{i32 19, i32 0, metadata !29, null}
+
+; CHECK: Ldebug_loc0:
+; CHECK-NEXT: .quad   Lfunc_begin0
+; CHECK-NEXT: .quad   Ltmp3
+; CHECK-NEXT: .short  1
+; CHECK-NEXT: .byte   85
+; CHECK-NEXT: .quad   Ltmp3
+; CHECK-NEXT: .quad   Ltmp6
+; CHECK-NEXT: .short  1
+; CHECK-NEXT: .byte   83
diff --git a/final/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll b/final/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll
new file mode 100644
index 00000000000..38dcb806cc9
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-05-26-FP_TO_INT-crash.ll
@@ -0,0 +1,16 @@
+; RUN: llc -O0 -mcpu=i386 -mattr=-sse,-mmx < %s
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+module asm "\09.ident\09\22GCC: (GNU) 4.5.1 20100510 (prerelease) LLVM: 104604:104605\22"
+
+define i32 @f2(double %x) nounwind {
+entry:
+  %0 = load double* undef, align 64               ; <double> [#uses=1]
+  %1 = fptoui double %0 to i16                    ; <i16> [#uses=1]
+  %2 = zext i16 %1 to i32                         ; <i32> [#uses=1]
+  %3 = add nsw i32 0, %2                          ; <i32> [#uses=1]
+  store i32 %3, i32* undef, align 1
+  ret i32 0
+}
diff --git a/final/test/CodeGen/X86/2010-05-28-Crash.ll b/final/test/CodeGen/X86/2010-05-28-Crash.ll
new file mode 100644
index 00000000000..ad8546ef8ce
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-05-28-Crash.ll
@@ -0,0 +1,44 @@
+; RUN: llc  -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; Test to check separate label for inlined function argument.
+
+define i32 @foo(i32 %y) nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %y}, i64 0, metadata !0)
+  %0 = tail call i32 (...)* @zoo(i32 %y) nounwind, !dbg !9 ; <i32> [#uses=1]
+  ret i32 %0, !dbg !9
+}
+
+declare i32 @zoo(...)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+define i32 @bar(i32 %x) nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !7)
+  tail call void @llvm.dbg.value(metadata !11, i64 0, metadata !0) nounwind
+  %0 = tail call i32 (...)* @zoo(i32 1) nounwind, !dbg !12 ; <i32> [#uses=1]
+  %1 = add nsw i32 %0, %x, !dbg !13               ; <i32> [#uses=1]
+  ret i32 %1, !dbg !13
+}
+
+!llvm.dbg.lv = !{!0, !7}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"y", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"f.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"f.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !6}
+!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524545, metadata !8, metadata !"x", metadata !2, i32 6, metadata !6} ; [ DW_TAG_arg_variable ]
+!8 = metadata !{i32 524334, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 6, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 3, i32 0, metadata !10, null}
+!10 = metadata !{i32 524299, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!11 = metadata !{i32 1}
+!12 = metadata !{i32 3, i32 0, metadata !10, metadata !13}
+!13 = metadata !{i32 7, i32 0, metadata !14, null}
+!14 = metadata !{i32 524299, metadata !8, i32 6, i32 0} ; [ DW_TAG_lexical_block ]
+
+;CHECK: DEBUG_VALUE: bar:x <- E
+;CHECK: Ltmp
+;CHECK:	DEBUG_VALUE: foo:y <- 1+0
diff --git a/final/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll b/final/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
new file mode 100644
index 00000000000..812d3720d6f
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll
@@ -0,0 +1,53 @@
+; RUN: llc -O2 < %s | FileCheck %s 
+; Test to check that unused argument 'this' is not undefined in debug info.
+
+target triple = "x86_64-apple-darwin10.2"
+%struct.foo = type { i32 }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.foo*, i32)* @_ZN3foo3bazEi to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define i32 @_ZN3foo3bazEi(%struct.foo* nocapture %this, i32 %x) nounwind readnone optsize noinline ssp align 2 {
+;CHECK: DEBUG_VALUE: baz:this <- RDI+0
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.foo* %this}, i64 0, metadata !15)
+  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !16)
+  %0 = mul nsw i32 %x, 7, !dbg !29                ; <i32> [#uses=1]
+  %1 = add nsw i32 %0, 1, !dbg !29                ; <i32> [#uses=1]
+  ret i32 %1, !dbg !29
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.lv = !{!0, !14, !15, !16, !17, !24, !25, !28}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"this", metadata !3, i32 11, metadata !12} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"_ZN3foo3barEi", metadata !3, i32 11, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524307, metadata !3, metadata !"foo", metadata !3, i32 3, i64 32, i64 32, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_structure_type ]
+!3 = metadata !{i32 524329, metadata !"foo.cp", metadata !"/tmp/", metadata !4} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 524305, i32 0, i32 4, metadata !"foo.cp", metadata !"/tmp/", metadata !"4.2.1 LLVM build", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{metadata !6, metadata !1, metadata !8}
+!6 = metadata !{i32 524301, metadata !2, metadata !"y", metadata !3, i32 8, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_member ]
+!7 = metadata !{i32 524324, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 524334, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"_ZN3foo3bazEi", metadata !3, i32 15, metadata !9, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{metadata !7, metadata !11, metadata !7}
+!11 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !2} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 524326, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !13} ; [ DW_TAG_const_type ]
+!13 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !2} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 524545, metadata !1, metadata !"x", metadata !3, i32 11, metadata !7} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 524545, metadata !8, metadata !"this", metadata !3, i32 15, metadata !12} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 524545, metadata !8, metadata !"x", metadata !3, i32 15, metadata !7} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 524545, metadata !18, metadata !"argc", metadata !3, i32 19, metadata !7} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 524334, i32 0, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 19, metadata !19, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true} ; [ DW_TAG_subprogram ]
+!19 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!20 = metadata !{metadata !7, metadata !7, metadata !21}
+!21 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ]
+!22 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ]
+!23 = metadata !{i32 524324, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!24 = metadata !{i32 524545, metadata !18, metadata !"argv", metadata !3, i32 19, metadata !21} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 524544, metadata !26, metadata !"a", metadata !3, i32 20, metadata !2} ; [ DW_TAG_auto_variable ]
+!26 = metadata !{i32 524299, metadata !27, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 524299, metadata !18, i32 19, i32 0} ; [ DW_TAG_lexical_block ]
+!28 = metadata !{i32 524544, metadata !26, metadata !"b", metadata !3, i32 21, metadata !7} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 16, i32 0, metadata !30, null}
+!30 = metadata !{i32 524299, metadata !8, i32 15, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/final/test/CodeGen/X86/2010-06-09-FastAllocRegisters.ll b/final/test/CodeGen/X86/2010-06-09-FastAllocRegisters.ll
new file mode 100644
index 00000000000..7c7792ac65a
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-06-09-FastAllocRegisters.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O0 -disable-fp-elim -relocation-model=pic
+; PR7313
+;
+; The inline asm in this function clobbers almost all allocatable registers.
+; Make sure that the register allocator recovers.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @snapshot()
+
+define void @test_too_many_longs() nounwind {
+entry:
+  call void asm sideeffect "xor %rax, %rax\0A\09xor %rbx, %rbx\0A\09xor %rcx, %rcx\0A\09xor %rdx, %rdx\0A\09xor %rsi, %rsi\0A\09xor %rdi, %rdi\0A\09xor %r8, %r8\0A\09xor %r9, %r9\0A\09xor %r10, %r10\0A\09xor %r11, %r11\0A\09xor %r12, %r12\0A\09xor %r13, %r13\0A\09xor %r14, %r14\0A\09xor %r15, %r15\0A\09", "~{fpsr},~{flags},~{r15},~{r14},~{r13},~{r12},~{r11},~{r10},~{r9},~{r8},~{rdi},~{rsi},~{rdx},~{rcx},~{rbx},~{rax}"() nounwind
+  call void bitcast (void ()* @snapshot to void (i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64)*)(i64 32, i64 33, i64 34, i64 35, i64 36, i64 37, i64 38, i64 39, i64 40, i64 41, i64 42, i64 43) nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll b/final/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
new file mode 100644
index 00000000000..b22a391ef35
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll
@@ -0,0 +1,6 @@
+; RUN: llc -fast-isel -march=x86 < %s | grep %fs:
+
+define i32 @test1(i32 addrspace(257)* %arg) nounwind {
+       %tmp = load i32 addrspace(257)* %arg
+       ret i32 %tmp
+}
diff --git a/final/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll b/final/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
new file mode 100644
index 00000000000..4639866afc5
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll
@@ -0,0 +1,29 @@
+; RUN: llc -regalloc=fast < %s | FileCheck %s
+; PR7382
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@.str = private constant [23 x i8] c"This should be -1: %d\0A\00" ; <[23 x i8]*> [#uses=1]
+
+define i32 @main() {
+entry:
+  %retval = alloca i32, align 4                   ; <i32*> [#uses=3]
+  %v = alloca i32, align 4                        ; <i32*> [#uses=3]
+  store i32 0, i32* %retval
+  %zero = load i32* %retval
+; The earlyclobber register EC0 should not be spilled before the inline asm.
+; Yes, check-not can refer to FileCheck variables defined in the future.
+; CHECK-NOT: [[EC0]]{{.*}}(%rsp)
+; CHECK: bsr {{[^,]*}}, [[EC0:%...]]
+  %0 = call i32 asm "bsr   $1, $0\0A\09cmovz $2, $0", "=&r,ro,r,~{cc},~{dirflag},~{fpsr},~{flags}"(i32 %zero, i32 -1) nounwind, !srcloc !0 ; <i32> [#uses=1]
+  store i32 %0, i32* %v
+  %tmp = load i32* %v                             ; <i32> [#uses=1]
+  %call1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 0), i32 %tmp) ; <i32> [#uses=0]
+  store i32 0, i32* %retval
+  %1 = load i32* %retval                          ; <i32> [#uses=1]
+  ret i32 %0
+}
+
+declare i32 @printf(i8*, ...)
+
+!0 = metadata !{i32 191}
diff --git a/final/test/CodeGen/X86/2010-06-24-g-constraint-crash.ll b/final/test/CodeGen/X86/2010-06-24-g-constraint-crash.ll
new file mode 100644
index 00000000000..2a938d941e2
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-06-24-g-constraint-crash.ll
@@ -0,0 +1,15 @@
+; RUN: llc %s -mtriple=x86_64-apple-darwin10 -disable-fp-elim -o /dev/null
+; Formerly crashed, rdar://8015842
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%0 = type { i64, i64, i64, i64, i64 }
+
+@utcbs.1559 = internal global [3 x i64] zeroinitializer ; <[3 x i64]*> [#uses=1]
+
+define void @bar() nounwind ssp {
+entry:
+  %asmtmp.i.i = tail call %0 asm sideeffect "push %rbp; syscall; pop %rbp\0A", "={ax},={di},={si},={dx},={bx},{ax},{di},{si},{dx},{bx},~{dirflag},~{fpsr},~{flags},~{memory},~{r15},~{r14},~{r13},~{r12},~{r11},~{r10},~{r9},~{r8},~{rcx}"(i32 7, i64 -1, i64 0, i64 -1, i64 -1) nounwind ; <%0> [#uses=0]
+  %asmtmp.i1.i = tail call %0 asm sideeffect "mov $10, %r8;\0Amov $11, %r9;\0Amov $12, %r10;\0Apush %rbp; syscall; pop %rbp\0A", "={ax},={di},={si},={dx},={bx},{ax},{di},{si},{dx},{bx},imr,imr,imr,~{dirflag},~{fpsr},~{flags},~{memory},~{r15},~{r14},~{r13},~{r12},~{r11},~{r10},~{r9},~{r8},~{rcx}"(i32 8, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 0, i8* bitcast (i64* getelementptr inbounds ([3 x i64]* @utcbs.1559, i64 0, i64 2) to i8*)) nounwind ; <%0> [#uses=0]
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll b/final/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
new file mode 100644
index 00000000000..6db3ce1f42c
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
@@ -0,0 +1,39 @@
+; RUN: llc -O1 -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
+; <rdar://problem/8124405>
+
+%struct.type = type { %struct.subtype*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* }
+%struct.subtype = type { i8*, i32, i32, i32, i8*, i32, i32, i32, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8* }
+
+define i32 @func(%struct.type* %s) nounwind optsize ssp {
+entry:
+  %tmp1 = getelementptr inbounds %struct.type* %s, i32 0, i32 1
+  %tmp2 = load i32* %tmp1, align 8
+  %tmp3 = icmp eq i32 %tmp2, 10
+  %tmp4 = getelementptr inbounds %struct.type* %s, i32 0, i32 40
+  br i1 %tmp3, label %bb, label %entry.bb1_crit_edge
+
+entry.bb1_crit_edge:
+  br label %bb1
+
+bb:
+
+; The point of this code is that %rdi is set to %rdi+64036 for the rep;stosl
+; statement. It can be an ADD or LEA instruction, it's not important which one
+; it is.
+;
+; CHECK: # %bb
+; CHECK: addq $64036, %rdi
+; CHECK: rep;stosl
+
+  %tmp5 = bitcast i32* %tmp4 to i8*
+  call void @llvm.memset.p0i8.i64(i8* %tmp5, i8 0, i64 84, i32 4, i1 false)
+  %tmp6 = getelementptr inbounds %struct.type* %s, i32 0, i32 62
+  store i32* null, i32** %tmp6, align 8
+  br label %bb1
+
+bb1:
+  store i32 10, i32* %tmp1, align 8
+  ret i32 42
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/final/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll b/final/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll
new file mode 100644
index 00000000000..68a6a134de5
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-06-25-asm-RA-crash.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -disable-fp-elim -mtriple=i686-pc-mingw32
+
+%struct.__SEH2Frame = type {}
+
+define void @_SEH2FrameHandler() nounwind {
+entry:
+  %target.addr.i = alloca i8*, align 4            ; <i8**> [#uses=2]
+  %frame = alloca %struct.__SEH2Frame*, align 4   ; <%struct.__SEH2Frame**> [#uses=1]
+  %tmp = load %struct.__SEH2Frame** %frame        ; <%struct.__SEH2Frame*> [#uses=1]
+  %conv = bitcast %struct.__SEH2Frame* %tmp to i8* ; <i8*> [#uses=1]
+  store i8* %conv, i8** %target.addr.i
+  %tmp.i = load i8** %target.addr.i               ; <i8*> [#uses=1]
+  call void asm sideeffect "push %ebp\0Apush $$0\0Apush $$0\0Apush $$Return${:uid}\0Apush $0\0Acall ${1:c}\0AReturn${:uid}: pop %ebp\0A", "imr,imr,~{ax},~{bx},~{cx},~{dx},~{si},~{di},~{flags},~{memory},~{dirflag},~{fpsr},~{flags}"(i8* %tmp.i, void (...)* @RtlUnwind) nounwind, !srcloc !0
+  ret void
+}
+
+declare x86_stdcallcc void @RtlUnwind(...)
+
+!0 = metadata !{i32 215}
diff --git a/final/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll b/final/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll
new file mode 100644
index 00000000000..2ba12dfc568
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll
@@ -0,0 +1,108 @@
+; RUN: llc -O2 -mtriple=i386-apple-darwin <%s | FileCheck %s
+; Use DW_FORM_addr for DW_AT_entry_pc.
+; Radar 8094785
+
+; CHECK:	.byte	17                      ## DW_TAG_compile_unit
+; CHECK-NEXT:	.byte	1                       ## DW_CHILDREN_yes
+; CHECK-NEXT:	.byte	37                      ## DW_AT_producer
+; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
+; CHECK-NEXT:	.byte	19                      ## DW_AT_language
+; CHECK-NEXT:	.byte	5                       ## DW_FORM_data2
+; CHECK-NEXT:	.byte	3                       ## DW_AT_name
+; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
+; CHECK-NEXT:	.byte	82                      ## DW_AT_entry_pc
+; CHECK-NEXT:	.byte	1                       ## DW_FORM_addr
+; CHECK-NEXT:	.byte	16                      ## DW_AT_stmt_list
+; CHECK-NEXT:	.byte	6                       ## DW_FORM_data4
+; CHECK-NEXT:	.byte	27                      ## DW_AT_comp_dir
+; CHECK-NEXT:	.byte	8                       ## DW_FORM_string
+; CHECK-NEXT:	.byte	225                     ## DW_AT_APPLE_optimized
+
+%struct.a = type { i32, %struct.a* }
+
+@ret = common global i32 0                        ; <i32*> [#uses=2]
+
+define void @foo(i32 %x) nounwind noinline ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %x}, i64 0, metadata !21), !dbg !28
+  store i32 %x, i32* @ret, align 4, !dbg !29
+  ret void, !dbg !31
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+define i8* @bar(%struct.a* %b) nounwind noinline ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !22), !dbg !32
+  %0 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !33 ; <i32*> [#uses=1]
+  %1 = load i32* %0, align 8, !dbg !33            ; <i32> [#uses=1]
+  tail call void @foo(i32 %1) nounwind noinline ssp, !dbg !33
+  %2 = bitcast %struct.a* %b to i8*, !dbg !35     ; <i8*> [#uses=1]
+  ret i8* %2, !dbg !35
+}
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  %e = alloca %struct.a, align 8                  ; <%struct.a*> [#uses=4]
+  call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !23), !dbg !36
+  call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !24), !dbg !36
+  call void @llvm.dbg.declare(metadata !{%struct.a* %e}, metadata !25), !dbg !37
+  %0 = getelementptr inbounds %struct.a* %e, i64 0, i32 0, !dbg !38 ; <i32*> [#uses=1]
+  store i32 4, i32* %0, align 8, !dbg !38
+  %1 = getelementptr inbounds %struct.a* %e, i64 0, i32 1, !dbg !39 ; <%struct.a**> [#uses=1]
+  store %struct.a* %e, %struct.a** %1, align 8, !dbg !39
+  %2 = call i8* @bar(%struct.a* %e) nounwind noinline ssp, !dbg !40 ; <i8*> [#uses=0]
+  %3 = load i32* @ret, align 4, !dbg !41          ; <i32> [#uses=1]
+  ret i32 %3, !dbg !41
+}
+
+!llvm.dbg.sp = !{!0, !6, !15}
+!llvm.dbg.lv.foo = !{!21}
+!llvm.dbg.lv.bar = !{!22}
+!llvm.dbg.lv.main = !{!23, !24, !25}
+!llvm.dbg.gv = !{!27}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 34, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void (i32)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"2010-06-28-DbgEntryPC.c", metadata !"/Users/yash/clean/llvm/test/FrontendC", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"2010-06-28-DbgEntryPC.c", metadata !"/Users/yash/clean/llvm/test/FrontendC", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null, metadata !5}
+!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 38, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i8* (%struct.a*)* @bar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9, metadata !10}
+!9 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 524307, metadata !1, metadata !"a", metadata !1, i32 23, i64 128, i64 64, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_structure_type ]
+!12 = metadata !{metadata !13, metadata !14}
+!13 = metadata !{i32 524301, metadata !11, metadata !"c", metadata !1, i32 24, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!14 = metadata !{i32 524301, metadata !11, metadata !"d", metadata !1, i32 25, i64 64, i64 64, i64 64, i32 0, metadata !10} ; [ DW_TAG_member ]
+!15 = metadata !{i32 524334, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 43, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ]
+!16 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!17 = metadata !{metadata !5, metadata !5, metadata !18}
+!18 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !19} ; [ DW_TAG_pointer_type ]
+!19 = metadata !{i32 524303, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ]
+!20 = metadata !{i32 524324, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 524545, metadata !0, metadata !"x", metadata !1, i32 33, metadata !5} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 524545, metadata !6, metadata !"b", metadata !1, i32 38, metadata !10} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 524545, metadata !15, metadata !"argc", metadata !1, i32 43, metadata !5} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 524545, metadata !15, metadata !"argv", metadata !1, i32 43, metadata !18} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 524544, metadata !26, metadata !"e", metadata !1, i32 44, metadata !11} ; [ DW_TAG_auto_variable ]
+!26 = metadata !{i32 524299, metadata !15, i32 43, i32 0} ; [ DW_TAG_lexical_block ]
+!27 = metadata !{i32 524340, i32 0, metadata !1, metadata !"ret", metadata !"ret", metadata !"", metadata !1, i32 28, metadata !5, i1 false, i1 true, i32* @ret} ; [ DW_TAG_variable ]
+!28 = metadata !{i32 33, i32 0, metadata !0, null}
+!29 = metadata !{i32 35, i32 0, metadata !30, null}
+!30 = metadata !{i32 524299, metadata !0, i32 34, i32 0} ; [ DW_TAG_lexical_block ]
+!31 = metadata !{i32 36, i32 0, metadata !30, null}
+!32 = metadata !{i32 38, i32 0, metadata !6, null}
+!33 = metadata !{i32 39, i32 0, metadata !34, null}
+!34 = metadata !{i32 524299, metadata !6, i32 38, i32 0} ; [ DW_TAG_lexical_block ]
+!35 = metadata !{i32 40, i32 0, metadata !34, null}
+!36 = metadata !{i32 43, i32 0, metadata !15, null}
+!37 = metadata !{i32 44, i32 0, metadata !26, null}
+!38 = metadata !{i32 45, i32 0, metadata !26, null}
+!39 = metadata !{i32 46, i32 0, metadata !26, null}
+!40 = metadata !{i32 48, i32 0, metadata !26, null}
+!41 = metadata !{i32 49, i32 0, metadata !26, null}
diff --git a/final/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll b/final/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll
new file mode 100644
index 00000000000..e1491a03d8a
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -O0 | FileCheck %s
+; PR7509
+target triple = "i386-apple-darwin10"
+%asmtype = type { i32, i8*, i32, i32 }
+
+; Arguments 1 and 4 must be the same. No other output arguments may be
+; allocated %eax.
+
+; CHECK: InlineAsm Start
+; CHECK: arg1 %[[A1:...]]
+; CHECK-NOT: ax
+; CHECK: arg4 %[[A1]]
+; CHECK: InlineAsm End
+
+define i32 @func(i8* %s) nounwind ssp {
+entry:
+  %0 = tail call %asmtype asm "arg0 $0\0A\09arg1 $1\0A\09arg2 $2\0A\09arg3 $3\0A\09arg4 $4", "={ax},=r,=r,=r,1,~{dirflag},~{fpsr},~{flags}"(i8* %s) nounwind, !srcloc !0 ; <%0> [#uses=1]
+  %asmresult = extractvalue %asmtype %0, 0              ; <i64> [#uses=1]
+  ret i32 %asmresult
+}
+
+!0 = metadata !{i32 108}
diff --git a/final/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll b/final/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll
new file mode 100644
index 00000000000..82dac9d9930
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-06-28-matched-g-constraint.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin11 | FileCheck %s
+; Any register is OK for %0, but it must be a register, not memory.
+
+define i32 @foo() nounwind ssp {
+entry:
+; CHECK: GCROOT %eax
+  %_r = alloca i32, align 4                       ; <i32*> [#uses=2]
+  call void asm "/* GCROOT $0 */", "=*imr,0,~{dirflag},~{fpsr},~{flags}"(i32* %_r, i32 4) nounwind
+  %0 = load i32* %_r, align 4                     ; <i32> [#uses=1]
+  ret i32 %0
+}
diff --git a/final/test/CodeGen/X86/2010-07-02-UnfoldBug.ll b/final/test/CodeGen/X86/2010-07-02-UnfoldBug.ll
new file mode 100644
index 00000000000..79219dcfe60
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-07-02-UnfoldBug.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; rdar://8154265
+
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
+
+define void @_ZN2CA3OGL20fill_surface_mesh_3dERNS0_7ContextEPKNS_6Render13MeshTransformEPKNS0_5LayerEPNS0_7SurfaceEfNS0_13TextureFilterESC_f() nounwind optsize ssp {
+entry:
+  br i1 undef, label %bb2.thread, label %bb2
+
+bb2.thread:                                       ; preds = %entry
+  br i1 undef, label %bb41, label %bb10.preheader
+
+bb2:                                              ; preds = %entry
+  unreachable
+
+bb10.preheader:                                   ; preds = %bb2.thread
+  br i1 undef, label %bb9, label %bb12
+
+bb9:                                              ; preds = %bb9, %bb10.preheader
+  br i1 undef, label %bb9, label %bb12
+
+bb12:                                             ; preds = %bb9, %bb10.preheader
+  br i1 undef, label %bb4.i.i, label %bb3.i.i
+
+bb3.i.i:                                          ; preds = %bb12
+  unreachable
+
+bb4.i.i:                                          ; preds = %bb12
+  br i1 undef, label %bb8.i.i, label %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit
+
+bb8.i.i:                                          ; preds = %bb4.i.i
+  br i1 undef, label %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit, label %bb9.i.i
+
+bb9.i.i:                                          ; preds = %bb8.i.i
+  br i1 undef, label %bb11.i.i, label %bb10.i.i
+
+bb10.i.i:                                         ; preds = %bb9.i.i
+  unreachable
+
+bb11.i.i:                                         ; preds = %bb9.i.i
+  unreachable
+
+_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit: ; preds = %bb8.i.i, %bb4.i.i
+  br i1 undef, label %bb19, label %bb14
+
+bb14:                                             ; preds = %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit
+  unreachable
+
+bb19:                                             ; preds = %_ZN2CA3OGL12_GLOBAL__N_16LightsC1ERNS0_7ContextEPKNS0_5LayerEPKNS_6Render13MeshTransformERKNS_4Vec3IfEESF_.exit
+  br i1 undef, label %bb.i50, label %bb6.i
+
+bb.i50:                                           ; preds = %bb19
+  unreachable
+
+bb6.i:                                            ; preds = %bb19
+  br i1 undef, label %bb28, label %bb.nph106
+
+bb22:                                             ; preds = %bb24.preheader
+  br i1 undef, label %bb2.i.i, label %bb.i.i49
+
+bb.i.i49:                                         ; preds = %bb22
+  %0 = load float* undef, align 4                 ; <float> [#uses=1]
+  %1 = insertelement <4 x float> undef, float %0, i32 0 ; <<4 x float>> [#uses=1]
+  %2 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> %1) nounwind readnone ; <<4 x float>> [#uses=1]
+  %3 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %2, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>) nounwind readnone ; <<4 x float>> [#uses=1]
+  %4 = extractelement <4 x float> %3, i32 0       ; <float> [#uses=1]
+  store float %4, float* undef, align 4
+  %5 = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> <float 1.000000e+00, float undef, float undef, float undef>, <4 x float> undef) nounwind readnone ; <<4 x float>> [#uses=1]
+  %6 = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %5, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>) nounwind readnone ; <<4 x float>> [#uses=1]
+  %7 = extractelement <4 x float> %6, i32 0       ; <float> [#uses=1]
+  store float %7, float* undef, align 4
+  unreachable
+
+bb2.i.i:                                          ; preds = %bb22
+  unreachable
+
+bb26.loopexit:                                    ; preds = %bb24.preheader
+  br i1 undef, label %bb28, label %bb24.preheader
+
+bb.nph106:                                        ; preds = %bb6.i
+  br label %bb24.preheader
+
+bb24.preheader:                                   ; preds = %bb.nph106, %bb26.loopexit
+  br i1 undef, label %bb22, label %bb26.loopexit
+
+bb28:                                             ; preds = %bb26.loopexit, %bb6.i
+  unreachable
+
+bb41:                                             ; preds = %bb2.thread
+  br i1 undef, label %return, label %bb46
+
+bb46:                                             ; preds = %bb41
+  ret void
+
+return:                                           ; preds = %bb41
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2010-07-02-asm-alignstack.ll b/final/test/CodeGen/X86/2010-07-02-asm-alignstack.ll
new file mode 100644
index 00000000000..0bbb24f6ecd
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-07-02-asm-alignstack.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+
+define void @foo() nounwind ssp {
+entry:
+; CHECK: foo
+; CHECK: pushq
+; CHECK: int $3
+  call void asm sideeffect alignstack "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
+  call void asm sideeffect alignstack ".file \22small.c\22", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  call void asm sideeffect alignstack ".line 3", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  call void asm sideeffect alignstack "int $$3", "~{dirflag},~{fpsr},~{flags},~{memory}"() nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @bar() nounwind ssp {
+entry:
+; CHECK: bar
+; CHECK-NOT: pushq
+; CHECK: int $3
+  call void asm sideeffect "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
+  call void asm sideeffect ".file \22small.c\22", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  call void asm sideeffect ".line 3", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  call void asm sideeffect "int $$3", "~{dirflag},~{fpsr},~{flags},~{memory}"() nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2010-07-06-DbgCrash.ll b/final/test/CodeGen/X86/2010-07-06-DbgCrash.ll
new file mode 100644
index 00000000000..edd6015b0d2
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-07-06-DbgCrash.ll
@@ -0,0 +1,29 @@
+; RUN: llc -O0 -relocation-model pic < %s -o /dev/null
+; PR7545
+@.str = private constant [4 x i8] c"one\00", align 1 ; <[4 x i8]*> [#uses=1]
+@.str1 = private constant [4 x i8] c"two\00", align 1 ; <[5 x i8]*> [#uses=1]
+@C.9.2167 = internal constant [2 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0)]
+!38 = metadata !{i32 524329, metadata !"pbmsrch.c", metadata !"/Users/grawp/LLVM/test-suite/MultiSource/Benchmarks/MiBench/office-stringsearch", metadata !39} ; [ DW_TAG_file_type ]
+!39 = metadata !{i32 524305, i32 0, i32 1, metadata !"pbmsrch.c", metadata !"/Users/grawp/LLVM/test-suite/MultiSource/Benchmarks/MiBench/office-stringsearch", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!46 = metadata !{i32 524303, metadata !38, metadata !"", metadata !38, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !47} ; [ DW_TAG_pointer_type ]!97 = metadata !{i32 524334, i32 0, metadata !38, metadata !"main", metadata !"main", metadata !"main", metadata !38, i32 73, metadata !98, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]!101 = metadata !{[2 x i8*]* @C.9.2167}
+!47 = metadata !{i32 524324, metadata !38, metadata !"char", metadata !38, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!97 = metadata !{i32 524334, i32 0, metadata !38, metadata !"main", metadata !"main", metadata !"main", metadata !38, i32 73, metadata !98, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!98 = metadata !{i32 524309, metadata !38, metadata !"", metadata !38, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !99, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!99 = metadata !{metadata !100}
+!100 = metadata !{i32 524324, metadata !38, metadata !"int", metadata !38, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!101 = metadata !{[2 x i8*]* @C.9.2167}
+!102 = metadata !{i32 524544, metadata !103, metadata !"find_strings", metadata !38, i32 75, metadata !104} ; [ DW_TAG_auto_variable ]
+!103 = metadata !{i32 524299, metadata !97, i32 73, i32 0} ; [ DW_TAG_lexical_block ]
+!104 = metadata !{i32 524289, metadata !38, metadata !"", metadata !38, i32 0, i64 85312, i64 64, i64 0, i32 0, metadata !46, metadata !105, i32 0, null} ; [ DW_TAG_array_type ]
+!105 = metadata !{metadata !106}
+!106 = metadata !{i32 524321, i64 0, i64 1332}    ; [ DW_TAG_subrange_type ]
+!107 = metadata !{i32 73, i32 0, metadata !103, null}
+
+define i32 @main() nounwind ssp {
+bb.nph:
+  tail call void @llvm.dbg.declare(metadata !101, metadata !102), !dbg !107
+  ret i32 0, !dbg !107
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
diff --git a/final/test/CodeGen/X86/2010-07-06-asm-RIP.ll b/final/test/CodeGen/X86/2010-07-06-asm-RIP.ll
new file mode 100644
index 00000000000..9526b8d4cdc
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-07-06-asm-RIP.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; PR 4752
+
+@n = global i32 0                                 ; <i32*> [#uses=2]
+
+define void @f(i32*) nounwind ssp {
+  ret void
+}
+
+define void @g() nounwind ssp {
+entry:
+; CHECK: _g:
+; CHECK: push $_f$_f
+; CHECK: call _f(%rip)
+  call void asm sideeffect "push\09$1$1\0A\09call\09${1:a}\0A\09pop\09%edx", "imr,i,~{dirflag},~{fpsr},~{flags},~{memory},~{cc},~{edi},~{esi},~{edx},~{ecx},~{ebx},~{eax}"(i32* @n, void (i32*)* @f) nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
diff --git a/final/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll b/final/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
new file mode 100644
index 00000000000..be7d94c4f29
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mcpu=i486
+; PR7375
+;
+; This function contains a block (while.cond) with a lonely RFP use that is
+; not a kill. We still need an FP_REG_KILL for that block since the register
+; allocator will insert a reload.
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define void @_ZN7QVectorIdE4fillERKdi(double* nocapture %t) nounwind ssp align 2 {
+entry:
+  %tmp2 = load double* %t                         ; <double> [#uses=1]
+  br i1 undef, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  br i1 undef, label %if.end, label %bb.nph
+
+while.cond:                                       ; preds = %bb.nph, %while.cond
+  store double %tmp2, double* undef
+  br i1 undef, label %if.end, label %while.cond
+
+bb.nph:                                           ; preds = %if.then
+  br label %while.cond
+
+if.end:                                           ; preds = %while.cond, %if.then, %entry
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2010-07-13-indirectXconstraint.ll b/final/test/CodeGen/X86/2010-07-13-indirectXconstraint.ll
new file mode 100644
index 00000000000..97cbe3ea5a0
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-07-13-indirectXconstraint.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; PR 7528
+; formerly crashed
+
+%0 = type { [12 x i16] }
+%union..0anon = type { [3 x <1 x i64>] }
+
+@gsm_H.1466 = internal constant %0 { [12 x i16] [i16 -134, i16 -374, i16 0, i16 2054, i16 5741, i16 8192, i16 5741, i16 2054, i16 0, i16 -374, i16 -134, i16 0] }, align 8 ; <%0*> [#uses=1]
+
+define void @weighting_filter() nounwind ssp {
+entry:
+; CHECK: leaq _gsm_H.1466(%rip),%rax;
+  call void asm sideeffect "leaq $0,%rax;\0A", "*X,~{dirflag},~{fpsr},~{flags},~{memory},~{rax}"(%union..0anon* bitcast (%0* @gsm_H.1466 to %union..0anon*)) nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2010-07-15-Crash.ll b/final/test/CodeGen/X86/2010-07-15-Crash.ll
new file mode 100644
index 00000000000..3ac4cf5964c
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-07-15-Crash.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s -o /dev/null
+; PR7653
+
+@__FUNCTION__.1623 = external constant [4 x i8]   ; <[4 x i8]*> [#uses=1]
+
+define void @foo() nounwind {
+entry:
+  tail call void asm sideeffect "", "s,i,~{fpsr},~{flags}"(i8* getelementptr
+inbounds ([4 x i8]* @__FUNCTION__.1623, i64 0, i64 0), i8* getelementptr
+inbounds ([4 x i8]* @__FUNCTION__.1623, i64 0, i64 0)) nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2010-07-29-SetccSimplify.ll b/final/test/CodeGen/X86/2010-07-29-SetccSimplify.ll
new file mode 100644
index 00000000000..96016cfe1c7
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-07-29-SetccSimplify.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define i32 @extend2bit_v2(i32 %val) {
+entry:
+  %0 = trunc i32 %val to i2                       ; <i2> [#uses=1]
+  %1 = sext i2 %0 to i32                          ; <i32> [#uses=1]
+  %2 = icmp eq i32 %1, 3                          ; <i1> [#uses=1]
+  %3 = zext i1 %2 to i32                          ; <i32> [#uses=1]
+  ret i32 %3
+}
+
+; CHECK: extend2bit_v2:
+; CHECK: xorl	%eax, %eax
+; CHECK-NEXT: ret
diff --git a/final/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll b/final/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
new file mode 100644
index 00000000000..1919d2ef34a
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-08-04-MaskedSignedCompare.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; PR7814
+
+@g_16 = global i64 -3738643449681751625, align 8  ; <i64*> [#uses=1]
+@g_38 = global i32 0, align 4                     ; <i32*> [#uses=2]
+@.str = private constant [4 x i8] c"%d\0A\00"     ; <[4 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+  %tmp = load i64* @g_16                          ; <i64> [#uses=1]
+  %not.lnot = icmp ne i64 %tmp, 0                 ; <i1> [#uses=1]
+  %conv = sext i1 %not.lnot to i64                ; <i64> [#uses=1]
+  %and = and i64 %conv, 150                       ; <i64> [#uses=1]
+  %conv.i = trunc i64 %and to i8                  ; <i8> [#uses=1]
+  %cmp = icmp sgt i8 %conv.i, 0                   ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then, label %entry.if.end_crit_edge
+
+; CHECK: andl	$150
+; CHECK-NEXT: testb
+; CHECK-NEXT: jg
+
+entry.if.end_crit_edge:                           ; preds = %entry
+  %tmp4.pre = load i32* @g_38                     ; <i32> [#uses=1]
+  br label %if.end
+
+if.then:                                          ; preds = %entry
+  store i32 1, i32* @g_38
+  br label %if.end
+
+if.end:                                           ; preds = %entry.if.end_crit_edge, %if.then
+  %tmp4 = phi i32 [ %tmp4.pre, %entry.if.end_crit_edge ], [ 1, %if.then ] ; <i32> [#uses=1]
+  %call5 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %tmp4) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/final/test/CodeGen/X86/2010-08-04-MingWCrash.ll b/final/test/CodeGen/X86/2010-08-04-MingWCrash.ll
new file mode 100644
index 00000000000..98a0887c0e6
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-08-04-MingWCrash.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=i386-pc-mingw32
+
+define void @func() nounwind {
+invoke.cont:
+  %call = tail call i8* @malloc()
+  %a = invoke i32 @bar()
+          to label %bb1 unwind label %lpad
+
+bb1:
+  ret void
+
+lpad:
+  %exn = tail call i8* @llvm.eh.exception() nounwind
+  %eh.selector = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exn, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1, i8* null) nounwind
+  %ehspec.fails = icmp slt i32 %eh.selector, 0
+  br i1 %ehspec.fails, label %ehspec.unexpected, label %cleanup
+
+cleanup:
+  tail call void @_Unwind_Resume_or_Rethrow(i8* %exn) noreturn nounwind
+  unreachable
+
+ehspec.unexpected:
+  tail call void @__cxa_call_unexpected(i8* %exn) noreturn nounwind
+  unreachable
+}
+
+declare noalias i8* @malloc()
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare void @_Unwind_Resume_or_Rethrow(i8*)
+
+declare void @__cxa_call_unexpected(i8*)
+
+declare i32 @bar()
diff --git a/final/test/CodeGen/X86/2010-08-04-StackVariable.ll b/final/test/CodeGen/X86/2010-08-04-StackVariable.ll
new file mode 100644
index 00000000000..edfd1b86873
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-08-04-StackVariable.ll
@@ -0,0 +1,124 @@
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin < %s | grep DW_OP_fbreg
+; Use DW_OP_fbreg in variable's location expression if the variable is in a stack slot.
+
+%struct.SVal = type { i8*, i32 }
+
+define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp {
+entry:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !23), !dbg !24
+  call void @llvm.dbg.value(metadata !{%struct.SVal* %location}, i64 0, metadata !25), !dbg !24
+  %0 = icmp ne i32 %i, 0, !dbg !27                ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb1, !dbg !27
+
+bb:                                               ; preds = %entry
+  %1 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !29 ; <i32*> [#uses=1]
+  %2 = load i32* %1, align 8, !dbg !29            ; <i32> [#uses=1]
+  %3 = add i32 %2, %i, !dbg !29                   ; <i32> [#uses=1]
+  br label %bb2, !dbg !29
+
+bb1:                                              ; preds = %entry
+  %4 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !30 ; <i32*> [#uses=1]
+  %5 = load i32* %4, align 8, !dbg !30            ; <i32> [#uses=1]
+  %6 = sub i32 %5, 1, !dbg !30                    ; <i32> [#uses=1]
+  br label %bb2, !dbg !30
+
+bb2:                                              ; preds = %bb1, %bb
+  %.0 = phi i32 [ %3, %bb ], [ %6, %bb1 ]         ; <i32> [#uses=1]
+  br label %return, !dbg !29
+
+return:                                           ; preds = %bb2
+  ret i32 %.0, !dbg !29
+}
+
+define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 {
+entry:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.value(metadata !{%struct.SVal* %this}, i64 0, metadata !31), !dbg !34
+  %0 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 0, !dbg !34 ; <i8**> [#uses=1]
+  store i8* null, i8** %0, align 8, !dbg !34
+  %1 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 1, !dbg !34 ; <i32*> [#uses=1]
+  store i32 0, i32* %1, align 8, !dbg !34
+  br label %return, !dbg !34
+
+return:                                           ; preds = %entry
+  ret void, !dbg !35
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @main() nounwind ssp {
+entry:
+  %0 = alloca %struct.SVal                        ; <%struct.SVal*> [#uses=3]
+  %v = alloca %struct.SVal                        ; <%struct.SVal*> [#uses=4]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.SVal* %v}, metadata !38), !dbg !41
+  call void @_ZN4SValC1Ev(%struct.SVal* %v) nounwind, !dbg !41
+  %1 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !42 ; <i32*> [#uses=1]
+  store i32 1, i32* %1, align 8, !dbg !42
+  %2 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
+  %3 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 0, !dbg !43 ; <i8**> [#uses=1]
+  %4 = load i8** %3, align 8, !dbg !43            ; <i8*> [#uses=1]
+  store i8* %4, i8** %2, align 8, !dbg !43
+  %5 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
+  %6 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !43 ; <i32*> [#uses=1]
+  %7 = load i32* %6, align 8, !dbg !43            ; <i32> [#uses=1]
+  store i32 %7, i32* %5, align 8, !dbg !43
+  %8 = call i32 @_Z3fooi4SVal(i32 2, %struct.SVal* noalias %0) nounwind, !dbg !43 ; <i32> [#uses=0]
+  call void @llvm.dbg.value(metadata !{i32 %8}, i64 0, metadata !44), !dbg !43
+  br label %return, !dbg !45
+
+return:                                           ; preds = %entry
+  ret i32 0, !dbg !45
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !9, !16, !17, !20}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524307, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ]
+!2 = metadata !{i32 524329, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 4, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9}
+!5 = metadata !{i32 524301, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 524301, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ]
+!8 = metadata !{i32 524324, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{null, metadata !12, metadata !13}
+!12 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!15 = metadata !{null, metadata !12}
+!16 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ]
+!18 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!19 = metadata !{metadata !13, metadata !13, metadata !1}
+!20 = metadata !{i32 524334, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!21 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!22 = metadata !{metadata !13}
+!23 = metadata !{i32 524545, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 16, i32 0, metadata !17, null}
+!25 = metadata !{i32 524545, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26} ; [ DW_TAG_arg_variable ]
+!26 = metadata !{i32 524304, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ]
+!27 = metadata !{i32 17, i32 0, metadata !28, null}
+!28 = metadata !{i32 524299, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ]
+!29 = metadata !{i32 18, i32 0, metadata !28, null}
+!30 = metadata !{i32 20, i32 0, metadata !28, null}
+!31 = metadata !{i32 524545, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32} ; [ DW_TAG_arg_variable ]
+!32 = metadata !{i32 524326, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ]
+!33 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ]
+!34 = metadata !{i32 11, i32 0, metadata !16, null}
+!35 = metadata !{i32 11, i32 0, metadata !36, null}
+!36 = metadata !{i32 524299, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ]
+!37 = metadata !{i32 524299, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ]
+!38 = metadata !{i32 524544, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1} ; [ DW_TAG_auto_variable ]
+!39 = metadata !{i32 524299, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ]
+!40 = metadata !{i32 524299, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ]
+!41 = metadata !{i32 24, i32 0, metadata !39, null}
+!42 = metadata !{i32 25, i32 0, metadata !39, null}
+!43 = metadata !{i32 26, i32 0, metadata !39, null}
+!44 = metadata !{i32 524544, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13} ; [ DW_TAG_auto_variable ]
+!45 = metadata !{i32 27, i32 0, metadata !39, null}
diff --git a/final/test/CodeGen/X86/2010-08-10-DbgConstant.ll b/final/test/CodeGen/X86/2010-08-10-DbgConstant.ll
new file mode 100644
index 00000000000..d98ef14e108
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-08-10-DbgConstant.ll
@@ -0,0 +1,25 @@
+; RUN: llc  -march=x86 -O0 < %s | FileCheck %s
+; CHECK: DW_TAG_constant
+; CHECK-NEXT: ascii	 "ro"                   #{{#?}} DW_AT_name
+
+define void @foo() nounwind ssp {
+entry:
+  call void @bar(i32 201), !dbg !8
+  ret void, !dbg !8
+}
+
+declare void @bar(i32)
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.gv = !{!5}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"/tmp/l.c", metadata !"/Volumes/Lalgate/clean/D", metadata !"clang 2.8", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 524327, i32 0, metadata !1, metadata !"ro", metadata !"ro", metadata !"ro", metadata !1, i32 1, metadata !6, i1 true, i1 true, i32 201} ; [ DW_TAG_constant ]
+!6 = metadata !{i32 524326, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_const_type ]
+!7 = metadata !{i32 524324, metadata !1, metadata !"unsigned int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 3, i32 14, metadata !9, null}
+!9 = metadata !{i32 524299, metadata !0, i32 3, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/final/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll b/final/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll
new file mode 100644
index 00000000000..e5542baf2ee
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-09-01-RemoveCopyByCommutingDef.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -verify-machineinstrs | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; This test exercises the alias checking in SimpleRegisterCoalescing::RemoveCopyByCommutingDef.
+
+define void @f(i32* %w, i32* %h, i8* %_this, i8* %image) nounwind ssp {
+  %x1 = tail call i64 @g(i8* %_this, i8* %image) nounwind ; <i64> [#uses=3]
+  %tmp1 = trunc i64 %x1 to i32                     ; <i32> [#uses=1]
+; CHECK: movl (%r{{.*}}), %
+  %x4 = load i32* %h, align 4                      ; <i32> [#uses=1]
+
+; The imull clobbers a 32-bit register.
+; CHECK: imull %{{...}}, %e[[CLOBBER:..]]
+  %x5 = mul nsw i32 %x4, %tmp1                      ; <i32> [#uses=1]
+
+; So we cannot use the corresponding 64-bit register anymore.
+; CHECK-NOT: shrq $32, %r[[CLOBBER]]
+  %btmp3 = lshr i64 %x1, 32                         ; <i64> [#uses=1]
+  %btmp4 = trunc i64 %btmp3 to i32                  ; <i32> [#uses=1]
+
+; CHECK: idiv
+  %x6 = sdiv i32 %x5, %btmp4                         ; <i32> [#uses=1]
+  store i32 %x6, i32* %w, align 4
+  ret void
+}
+
+declare i64 @g(i8*, i8*)
diff --git a/final/test/CodeGen/X86/2010-09-16-EmptyFilename.ll b/final/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
new file mode 100644
index 00000000000..bed8c8a77b9
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-09-16-EmptyFilename.ll
@@ -0,0 +1,29 @@
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin10 < %s - | FileCheck %s
+; Radar 8286101
+; CHECK: .file   2 "<stdin>"
+
+define i32 @foo() nounwind ssp {
+entry:
+  ret i32 42, !dbg !8
+}
+
+define i32 @bar() nounwind ssp {
+entry:
+  ret i32 21, !dbg !10
+}
+
+!llvm.dbg.sp = !{!0, !6}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 53, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"bug.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 114084)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524334, i32 0, metadata !7, metadata !"bar", metadata !"bar", metadata !"bar", metadata !7, i32 4, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @bar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 524329, metadata !"bug.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!8 = metadata !{i32 53, i32 13, metadata !9, null}
+!9 = metadata !{i32 524299, metadata !0, i32 53, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 4, i32 13, metadata !11, null}
+!11 = metadata !{i32 524299, metadata !12, i32 4, i32 13, metadata !7, i32 2} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 524299, metadata !6, i32 4, i32 11, metadata !7, i32 1} ; [ DW_TAG_lexical_block ]
diff --git a/final/test/CodeGen/X86/2010-09-16-asmcrash.ll b/final/test/CodeGen/X86/2010-09-16-asmcrash.ll
new file mode 100644
index 00000000000..9bbd6919421
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-09-16-asmcrash.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-freebsd8.1 -o /dev/null
+; This formerly crashed, PR 8154.
+
+module asm ".weak sem_close"
+module asm ".equ sem_close, _sem_close"
+module asm ".weak sem_destroy"
+module asm ".equ sem_destroy, _sem_destroy"
+module asm ".weak sem_getvalue"
+module asm ".equ sem_getvalue, _sem_getvalue"
+module asm ".weak sem_init"
+module asm ".equ sem_init, _sem_init"
+module asm ".weak sem_open"
+module asm ".equ sem_open, _sem_open"
+module asm ".weak sem_post"
+module asm ".equ sem_post, _sem_post"
+module asm ".weak sem_timedwait"
+module asm ".equ sem_timedwait, _sem_timedwait"
+module asm ".weak sem_trywait"
+module asm ".equ sem_trywait, _sem_trywait"
+module asm ".weak sem_unlink"
+module asm ".equ sem_unlink, _sem_unlink"
+module asm ".weak sem_wait"
+module asm ".equ sem_wait, _sem_wait"
+
+%struct._sem = type { i32, %struct._usem }
+%struct._usem = type { i32, i32, i32 }
+
+define void @_sem_timedwait(%struct._sem* noalias %sem) nounwind ssp {
+entry:
+  br i1 undef, label %while.cond.preheader, label %sem_check_validity.exit
+
+while.cond.preheader:                             ; preds = %entry
+  %tmp4 = getelementptr inbounds %struct._sem* %sem, i64 0, i32 1, i32 1
+  br label %while.cond
+
+sem_check_validity.exit:                          ; preds = %entry
+  ret void
+
+while.cond:                                       ; preds = %while.body, %while.cond.preheader
+  br i1 undef, label %while.body, label %while.end
+
+while.body:                                       ; preds = %while.cond
+  %0 = call i8 asm sideeffect "\09lock ; \09\09\09cmpxchgl $2,$1 ;\09       sete\09$0 ;\09\091:\09\09\09\09# atomic_cmpset_int", "={ax},=*m,r,{ax},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %tmp4, i32 undef, i32 undef, i32* %tmp4) nounwind, !srcloc !0
+  br i1 undef, label %while.cond, label %return
+
+while.end:                                        ; preds = %while.cond
+  br i1 undef, label %if.end18, label %return
+
+if.end18:                                         ; preds = %while.end
+  unreachable
+
+return:                                           ; preds = %while.end, %while.body
+  ret void
+}
+
+!0 = metadata !{i32 158484}
diff --git a/final/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/final/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
new file mode 100644
index 00000000000..38c3862001a
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -combiner-alias-analysis -march=x86-64 | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.4"
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define fastcc i32 @cli_magic_scandesc(i8* %in) nounwind ssp {
+entry:
+  %a = alloca [64 x i8]
+  %b = getelementptr inbounds [64 x i8]* %a, i64 0, i32 0
+  %c = getelementptr inbounds [64 x i8]* %a, i64 0, i32 30
+  %d = load i8* %b, align 8
+  %e = load i8* %c, align 8
+  %f = bitcast [64 x i8]* %a to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* %in, i64 64, i32 8, i1 false) nounwind
+  store i8 %d, i8* %b, align 8
+  store i8 %e, i8* %c, align 8
+  ret i32 0
+}
+
+; CHECK: movq	___stack_chk_guard@GOTPCREL(%rip), %rax
+; CHECK: movb   38(%rsp), %bl
+; CHECK: movb   8(%rsp), %dl
+; CHECK: movb   %dl, 8(%rsp)
+; CHECK: movb   %bl, 38(%rsp)
+; CHECK: callq	___stack_chk_fail
diff --git a/final/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll b/final/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
new file mode 100644
index 00000000000..73e996c5d55
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-09-30-CMOV-JumpTable-PHI.ll
@@ -0,0 +1,71 @@
+; RUN: llc -verify-machineinstrs -mcpu=i386 < %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+; The bb.i basic block gets split while emitting the schedule because
+; -mcpu=i386 doesn't have CMOV.'
+;
+; That causes the PHI to be updated wrong because the jumptable data structure is remembering the original MBB.
+;
+; -cgp-critical-edge-splitting=0 prevents the edge to PHI from being split.
+
+@.str146 = external constant [4 x i8], align 1
+@.str706 = external constant [4 x i8], align 1
+@.str1189 = external constant [5 x i8], align 1
+
+declare i32 @memcmp(i8* nocapture, i8* nocapture, i32) nounwind readonly
+declare i32 @strlen(i8* nocapture) nounwind readonly
+
+define hidden zeroext i8 @f(i8* %this, i8* %Name.0, i32 %Name.1, i8* noalias %NameLoc, i8* %Operands) nounwind align 2 {
+bb.i:
+  %0 = icmp eq i8 undef, 0
+  %iftmp.285.0 = select i1 %0, i8* getelementptr inbounds ([5 x i8]* @.str1189, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str706, i32 0, i32 0)
+  %1 = call i32 @strlen(i8* %iftmp.285.0) nounwind readonly
+  switch i32 %Name.1, label %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit [
+    i32 3, label %bb1.i
+    i32 4, label %bb1.i1237
+    i32 5, label %bb1.i1266
+    i32 6, label %bb1.i1275
+    i32 2, label %bb1.i1434
+    i32 8, label %bb1.i1523
+    i32 7, label %bb1.i1537
+  ]
+
+bb1.i:                                            ; preds = %bb.i
+  unreachable
+
+bb1.i1237:                                        ; preds = %bb.i
+  br i1 undef, label %bb.i1820, label %bb1.i1241
+
+bb1.i1241:                                        ; preds = %bb1.i1237
+  unreachable
+
+bb1.i1266:                                        ; preds = %bb.i
+  unreachable
+
+bb1.i1275:                                        ; preds = %bb.i
+  unreachable
+
+bb1.i1434:                                        ; preds = %bb.i
+  unreachable
+
+bb1.i1523:                                        ; preds = %bb.i
+  unreachable
+
+bb1.i1537:                                        ; preds = %bb.i
+  unreachable
+
+bb.i1820:                                         ; preds = %bb1.i1237
+  br label %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+
+_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit: ; preds = %bb.i1820, %bb.i
+  %PatchedName.0.0 = phi i8* [ undef, %bb.i1820 ], [ %Name.0, %bb.i ]
+  br i1 undef, label %bb141, label %_ZNK4llvm9StringRef10startswithES0_.exit
+
+_ZNK4llvm9StringRef10startswithES0_.exit:         ; preds = %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+  %2 = call i32 @memcmp(i8* %PatchedName.0.0, i8* getelementptr inbounds ([4 x i8]* @.str146, i32 0, i32 0), i32 3) nounwind readonly
+  unreachable
+
+bb141:                                            ; preds = %_ZNK4llvm12StringSwitchINS_9StringRefES1_E7DefaultERKS1_.exit
+  unreachable
+}
diff --git a/final/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll b/final/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
new file mode 100644
index 00000000000..40e7f017dc3
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-10-08-cmpxchg8b.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin | FileCheck %s
+; PR8297
+;
+; On i386, i64 cmpxchg is lowered during legalize types to extract the
+; 64-bit result into a pair of fixed regs. So creation of the DAG node
+; happens in a different place. See
+; X86TargetLowering::ReplaceNodeResults, case ATOMIC_CMP_SWAP.
+;
+; Neither Atomic-xx.ll nor atomic_op.ll cover this. Those tests were
+; autogenerated from C source before 64-bit variants were supported.
+;
+; Note that this case requires a loop around the cmpxchg to force
+; machine licm to query alias anlysis, exposing a bad
+; MachineMemOperand.
+define void @foo(i64* %ptr) nounwind inlinehint {
+entry:
+  br label %loop
+loop:
+; CHECK: lock
+; CHECK-NEXT: cmpxchg8b
+  %r = call i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* %ptr, i64 0, i64 1)
+  %stored1  = icmp eq i64 %r, 0
+  br i1 %stored1, label %loop, label %continue
+continue:
+  ret void
+}
+
+declare i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* nocapture, i64, i64) nounwind
diff --git a/final/test/CodeGen/X86/2010-11-02-DbgParameter.ll b/final/test/CodeGen/X86/2010-11-02-DbgParameter.ll
new file mode 100644
index 00000000000..79c0cf35c66
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-11-02-DbgParameter.ll
@@ -0,0 +1,35 @@
+; RUN: llc -O2 -asm-verbose < %s | FileCheck %s
+; Radar 8616981
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin11.0.0"
+
+%struct.bar = type { i32, i32 }
+
+define i32 @foo(%struct.bar* nocapture %i) nounwind readnone optsize noinline ssp {
+; CHECK: TAG_formal_parameter
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.bar* %i}, i64 0, metadata !6), !dbg !12
+  ret i32 1, !dbg !13
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.lv.foo = !{!6}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (%struct.bar*)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"one.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"one.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 117922)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590081, metadata !0, metadata !"i", metadata !1, i32 3, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 589843, metadata !1, metadata !"bar", metadata !1, i32 2, i64 64, i64 32, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{metadata !10, metadata !11}
+!10 = metadata !{i32 589837, metadata !1, metadata !"x", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 589837, metadata !1, metadata !"y", metadata !1, i32 2, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
+!12 = metadata !{i32 3, i32 47, metadata !0, null}
+!13 = metadata !{i32 4, i32 2, metadata !14, null}
+!14 = metadata !{i32 589835, metadata !0, i32 3, i32 50, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/final/test/CodeGen/X86/2010-11-09-MOVLPS.ll b/final/test/CodeGen/X86/2010-11-09-MOVLPS.ll
new file mode 100644
index 00000000000..2368f3f6919
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-11-09-MOVLPS.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86-64 -O0
+; PR8211
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm "\09.ident\09\22GCC: (GNU) 4.5.2 20100914 (prerelease) LLVM: 114628\22"
+
+%0 = type { %"int[]" }
+%float = type float
+%"float[]" = type [4 x float]
+%int = type i32
+%"int[]" = type [4 x i32]
+%"long unsigned int" = type i64
+
+define void @swizzle(i8* %a, %0* %b, %0* %c) nounwind {
+entry:
+  %a_addr = alloca i8*
+  %b_addr = alloca %0*
+  %c_addr = alloca %0*
+  %"alloca point" = bitcast i32 0 to i32
+  store i8* %a, i8** %a_addr
+  store %0* %b, %0** %b_addr
+  store %0* %c, %0** %c_addr
+  %0 = load i8** %a_addr, align 64
+  %1 = load %0** %b_addr, align 64
+  %2 = load %0** %c_addr, align 64
+  %"ssa point" = bitcast i32 0 to i32
+  br label %"2"
+
+"2":                                              ; preds = %entry
+  %3 = bitcast i8* %0 to <2 x i32>*
+  %4 = getelementptr inbounds %0* %1, i32 0, i32 0
+  %5 = bitcast %"int[]"* %4 to <4 x float>*
+  %6 = load <4 x float>* %5, align 16
+  %7 = bitcast <2 x i32>* %3 to <2 x float>*
+  %8 = bitcast <2 x float>* %7 to double*
+  %9 = load double* %8
+  %10 = insertelement <2 x double> undef, double %9, i32 0
+  %11 = insertelement <2 x double> %10, double undef, i32 1
+  %12 = bitcast <2 x double> %11 to <4 x float>
+  %13 = shufflevector <4 x float> %6, <4 x float> %12, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %14 = getelementptr inbounds %0* %1, i32 0, i32 0
+  %15 = bitcast %"int[]"* %14 to <4 x float>*
+  store <4 x float> %13, <4 x float>* %15, align 16
+  %16 = bitcast i8* %0 to <2 x i32>*
+  %17 = bitcast <2 x i32>* %16 to i8*
+  %18 = getelementptr i8* %17, i64 8
+  %19 = bitcast i8* %18 to <2 x i32>*
+  %20 = getelementptr inbounds %0* %2, i32 0, i32 0
+  %21 = bitcast %"int[]"* %20 to <4 x float>*
+  %22 = load <4 x float>* %21, align 16
+  %23 = bitcast <2 x i32>* %19 to <2 x float>*
+  %24 = bitcast <2 x float>* %23 to double*
+  %25 = load double* %24
+  %26 = insertelement <2 x double> undef, double %25, i32 0
+  %27 = insertelement <2 x double> %26, double undef, i32 1
+  %28 = bitcast <2 x double> %27 to <4 x float>
+  %29 = shufflevector <4 x float> %22, <4 x float> %28, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %30 = getelementptr inbounds %0* %2, i32 0, i32 0
+  %31 = bitcast %"int[]"* %30 to <4 x float>*
+  store <4 x float> %29, <4 x float>* %31, align 16
+  br label %return
+
+return:                                           ; preds = %"2"
+  ret void
+}
diff --git a/final/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll b/final/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
new file mode 100644
index 00000000000..a1074b6b8f3
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-11-18-SelectOfExtload.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; Both values were being zero extended.
+@u = external global i8
+@s = external global i8
+define i32 @foo(i1 %cond) {
+; CHECK: @foo
+  %u_base = load i8* @u
+  %u_val = zext i8 %u_base to i32
+; CHECK: movzbl
+; CHECK: movsbl
+  %s_base = load i8* @s
+  %s_val = sext i8 %s_base to i32
+  %val = select i1 %cond, i32 %u_val, i32 %s_val
+  ret i32 %val
+}
diff --git a/final/test/CodeGen/X86/2010-12-02-MC-Set.ll b/final/test/CodeGen/X86/2010-12-02-MC-Set.ll
new file mode 100644
index 00000000000..31446786ec1
--- /dev/null
+++ b/final/test/CodeGen/X86/2010-12-02-MC-Set.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -disable-dot-loc -mtriple=x86_64-apple-darwin -O0 | FileCheck %s
+
+
+define void @foo() nounwind ssp {
+entry:
+  ret void, !dbg !5
+}
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"e.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"e.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 120563)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 5, i32 1, metadata !6, null}
+!6 = metadata !{i32 589835, metadata !0, i32 3, i32 16, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+
+; CHECK: .subsections_via_symbols
+; CHECK-NEXT: __debug_line
+; CHECK-NEXT: Ltmp
+; CHECK-NEXT: Ltmp{{[0-9]}} = (Ltmp
diff --git a/final/test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll b/final/test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll
new file mode 100644
index 00000000000..b9cf65b1e73
--- /dev/null
+++ b/final/test/CodeGen/X86/2011-01-07-LegalizeTypesCrash.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -enable-legalize-types-checking
+; PR8582
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-win32"
+
+define void @test() nounwind {
+ %i17 = icmp eq <4 x i8> undef, zeroinitializer
+ %cond = extractelement <4 x i1> %i17, i32 0
+ %_comp = select i1 %cond, i8 0, i8 undef
+ %merge = insertelement <4 x i8> undef, i8 %_comp, i32 0
+ %cond3 = extractelement <4 x i1> %i17, i32 1
+ %_comp4 = select i1 %cond3, i8 0, i8 undef
+ %merge5 = insertelement <4 x i8> %merge, i8 %_comp4, i32 1
+ %cond8 = extractelement <4 x i1> %i17, i32 2
+ %_comp9 = select i1 %cond8, i8 0, i8 undef
+ %m387 = insertelement <4 x i8> %merge5, i8 %_comp9, i32 2
+ store <4 x i8> %m387, <4 x i8>* undef
+ ret void
+}
diff --git a/final/test/CodeGen/X86/2011-01-10-DagCombineHang.ll b/final/test/CodeGen/X86/2011-01-10-DagCombineHang.ll
new file mode 100644
index 00000000000..bf438b82edf
--- /dev/null
+++ b/final/test/CodeGen/X86/2011-01-10-DagCombineHang.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; This formerly got DagCombine into a loop, PR 8916.
+
+define i32 @foo(i64 %x, i64 %y, i64 %z, i32 %a, i32 %b) {
+entry:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %t1 = shl i64 %x, 15
+  %t2 = and i64 %t1, 4294934528
+  %t3 = or i64 %t2, %y
+  %t4 = xor i64 %z, %t3
+  %t5 = trunc i64 %t4 to i32
+  %t6 = add i32 %a, %t5
+  %t7 = add i32 %t6, %b
+  ret i32 %t7
+}
diff --git a/final/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll b/final/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
new file mode 100644
index 00000000000..973975b658a
--- /dev/null
+++ b/final/test/CodeGen/X86/2011-01-24-DbgValue-Before-Use.ll
@@ -0,0 +1,103 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; Check debug info for variable z_s
+;CHECK:       .ascii   "z_s"                  ## DW_AT_name
+;CHECK-NEXT:  .byte   0
+;CHECK-NEXT:  ## DW_AT_decl_file
+;CHECK-NEXT:  ## DW_AT_decl_line
+;CHECK-NEXT:  ## DW_AT_type
+;CHECK-NEXT:  ## DW_AT_location
+
+
+@.str1 = private unnamed_addr constant [14 x i8] c"m=%u, z_s=%d\0A\00"
+@str = internal constant [21 x i8] c"Failing test vector:\00"
+
+define i64 @gcd(i64 %a, i64 %b) nounwind readnone optsize noinline ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i64 %a}, i64 0, metadata !10), !dbg !18
+  tail call void @llvm.dbg.value(metadata !{i64 %b}, i64 0, metadata !11), !dbg !19
+  br label %while.body, !dbg !20
+
+while.body:                                       ; preds = %while.body, %entry
+  %b.addr.0 = phi i64 [ %b, %entry ], [ %rem, %while.body ]
+  %a.addr.0 = phi i64 [ %a, %entry ], [ %b.addr.0, %while.body ]
+  %rem = srem i64 %a.addr.0, %b.addr.0, !dbg !21
+  %cmp = icmp eq i64 %rem, 0, !dbg !23
+  br i1 %cmp, label %if.then, label %while.body, !dbg !23
+
+if.then:                                          ; preds = %while.body
+  tail call void @llvm.dbg.value(metadata !{i64 %rem}, i64 0, metadata !12), !dbg !21
+  ret i64 %b.addr.0, !dbg !23
+}
+
+define i32 @main() nounwind optsize ssp {
+entry:
+  %call = tail call i32 @rand() nounwind optsize, !dbg !24
+  tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !14), !dbg !24
+  %cmp = icmp ugt i32 %call, 21, !dbg !25
+  br i1 %cmp, label %cond.true, label %cond.end, !dbg !25
+
+cond.true:                                        ; preds = %entry
+  %call1 = tail call i32 @rand() nounwind optsize, !dbg !25
+  br label %cond.end, !dbg !25
+
+cond.end:                                         ; preds = %entry, %cond.true
+  %cond = phi i32 [ %call1, %cond.true ], [ %call, %entry ], !dbg !25
+  tail call void @llvm.dbg.value(metadata !{i32 %cond}, i64 0, metadata !17), !dbg !25
+  %conv = sext i32 %cond to i64, !dbg !26
+  %conv5 = zext i32 %call to i64, !dbg !26
+  %call6 = tail call i64 @gcd(i64 %conv, i64 %conv5) optsize, !dbg !26
+  %cmp7 = icmp eq i64 %call6, 0
+  br i1 %cmp7, label %return, label %if.then, !dbg !26
+
+if.then:                                          ; preds = %cond.end
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([21 x i8]* @str, i64 0, i64 0))
+  %call12 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([14 x i8]* @.str1, i64 0, i64 0), i32 %call, i32 %cond) nounwind optsize, !dbg !26
+  ret i32 1, !dbg !27
+
+return:                                           ; preds = %cond.end
+  ret i32 0, !dbg !27
+}
+
+declare i32 @rand() optsize
+
+declare i32 @printf(i8* nocapture, ...) nounwind optsize
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare i32 @puts(i8* nocapture) nounwind
+
+!llvm.dbg.sp = !{!0, !6}
+!llvm.dbg.lv.gcd = !{!10, !11, !12}
+!llvm.dbg.lv.main = !{!14, !17}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"gcd", metadata !"gcd", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i64 (i64, i64)* @gcd} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"rem_small.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"rem_small.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 124117)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"long int", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 25, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 5, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 590081, metadata !0, metadata !"b", metadata !1, i32 5, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 590080, metadata !13, metadata !"c", metadata !1, i32 6, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!13 = metadata !{i32 589835, metadata !0, i32 5, i32 52, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 590080, metadata !15, metadata !"m", metadata !1, i32 26, metadata !16, i32 0} ; [ DW_TAG_auto_variable ]
+!15 = metadata !{i32 589835, metadata !6, i32 25, i32 12, metadata !1, i32 2} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 589860, metadata !2, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 590080, metadata !15, metadata !"z_s", metadata !1, i32 27, metadata !9, i32 0} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 5, i32 41, metadata !0, null}
+!19 = metadata !{i32 5, i32 49, metadata !0, null}
+!20 = metadata !{i32 7, i32 5, metadata !13, null}
+!21 = metadata !{i32 8, i32 9, metadata !22, null}
+!22 = metadata !{i32 589835, metadata !13, i32 7, i32 14, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!23 = metadata !{i32 9, i32 9, metadata !22, null}
+!24 = metadata !{i32 26, i32 38, metadata !15, null}
+!25 = metadata !{i32 27, i32 38, metadata !15, null}
+!26 = metadata !{i32 28, i32 9, metadata !15, null}
+!27 = metadata !{i32 30, i32 1, metadata !15, null}
diff --git a/final/test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll b/final/test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll
new file mode 100644
index 00000000000..cedd6a2a1b8
--- /dev/null
+++ b/final/test/CodeGen/X86/2011-02-04-FastRegallocNoFP.ll
@@ -0,0 +1,14 @@
+; RUN: llc -O0 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i32 @foo()
+
+define i32 @bar() nounwind {
+; CHECK: bar
+; CHECK-NOT: pop.*ax
+  %call = call i32 @foo()
+  ret i32 %call
+}
+
diff --git a/final/test/CodeGen/X86/2011-02-21-VirtRegRewriter-KillSubReg.ll b/final/test/CodeGen/X86/2011-02-21-VirtRegRewriter-KillSubReg.ll
new file mode 100644
index 00000000000..f982723781e
--- /dev/null
+++ b/final/test/CodeGen/X86/2011-02-21-VirtRegRewriter-KillSubReg.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -O2 -march=x86 -mtriple=i386-pc-linux-gnu -relocation-model=pic | FileCheck %s
+; PR9237: Assertion in VirtRegRewriter.cpp, ResurrectConfirmedKill
+;         `KillOps[*SR] == KillOp && "invalid subreg kill flags"'
+
+%t = type { i32 }
+
+define i32 @foo(%t* %s) nounwind {
+entry:
+  br label %if.then735
+
+if.then735:
+  %call747 = call i32 undef(%t* %s, i8* null, i8* undef, i32 128, i8* undef, i32 516) nounwind
+  br i1 undef, label %if.then751, label %if.then758
+
+if.then751:
+  unreachable
+
+if.then758:
+  %add761 = add i32 %call747, 4
+  %add763 = add i32 %add761, %call747
+  %add.ptr768 = getelementptr inbounds [516 x i8]* null, i32 0, i32 %add761
+  br i1 undef, label %cond.false783, label %cond.true771
+
+cond.true771:
+  %call782 = call i8* @__memmove_chk(i8* %add.ptr768, i8* undef, i32 %call747, i32 undef)
+  br label %cond.end791
+
+; CHECK: calll __memmove_chk
+cond.false783:
+  %call.i1035 = call i8* @__memmove_chk(i8* %add.ptr768, i8* undef, i32 %call747, i32 undef) nounwind
+  br label %cond.end791
+
+cond.end791:
+  %conv801 = trunc i32 %call747 to i8
+  %add.ptr822.sum = add i32 %call747, 3
+  %arrayidx833 = getelementptr inbounds [516 x i8]* null, i32 0, i32 %add.ptr822.sum
+  store i8 %conv801, i8* %arrayidx833, align 1
+  %cmp841 = icmp eq i8* undef, null
+  br i1 %cmp841, label %if.end849, label %if.then843
+
+if.then843:
+  unreachable
+
+if.end849:
+  %call921 = call i32 undef(%t* %s, i8* undef, i8* undef, i32 %add763) nounwind
+  unreachable
+
+}
+
+declare i8* @__memmove_chk(i8*, i8*, i32, i32) nounwind
diff --git a/final/test/CodeGen/X86/2011-02-23-UnfoldBug.ll b/final/test/CodeGen/X86/2011-02-23-UnfoldBug.ll
new file mode 100644
index 00000000000..900106aac35
--- /dev/null
+++ b/final/test/CodeGen/X86/2011-02-23-UnfoldBug.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10
+; rdar://9045024
+; PR9305
+
+define void @calc_gb_rad_still_sse2_double() nounwind ssp {
+entry:
+  br label %for.cond.outer
+
+for.cond.outer:                                   ; preds = %if.end71, %entry
+  %theta.0.ph = phi <2 x double> [ undef, %entry ], [ %theta.1, %if.end71 ]
+  %mul.i97 = fmul <2 x double> %theta.0.ph, undef
+  %mul.i96 = fmul <2 x double> %mul.i97, fmul (<2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double> undef)
+  br i1 undef, label %for.body, label %for.end82
+
+for.body:                                         ; preds = %for.cond.outer
+  br i1 undef, label %for.body33.lr.ph, label %for.end
+
+for.body33.lr.ph:                                 ; preds = %for.body
+  %dccf.2 = select i1 undef, <2 x double> %mul.i96, <2 x double> undef
+  unreachable
+
+for.end:                                          ; preds = %for.body
+  %vecins.i94 = insertelement <2 x double> undef, double 0.000000e+00, i32 0
+  %cmpsd.i = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %vecins.i94, <2 x double> <double 0x3FE984B204153B34, double 0x3FE984B204153B34>, i8 2) nounwind
+  tail call void (...)* @_mm_movemask_pd(<2 x double> %cmpsd.i) nounwind
+  br i1 undef, label %if.then67, label %if.end71
+
+if.then67:                                        ; preds = %for.end
+  %vecins.i91 = insertelement <2 x double> %vecins.i94, double undef, i32 0
+  br label %if.end71
+
+if.end71:                                         ; preds = %if.then67, %for.end
+  %theta.1 = phi <2 x double> [ %vecins.i91, %if.then67 ], [ %theta.0.ph, %for.end ]
+  br label %for.cond.outer
+
+for.end82:                                        ; preds = %for.cond.outer
+  ret void
+}
+
+declare void @_mm_movemask_pd(...)
+
+declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
diff --git a/final/test/CodeGen/X86/2011-02-27-Fpextend.ll b/final/test/CodeGen/X86/2011-02-27-Fpextend.ll
new file mode 100644
index 00000000000..c12b9563b30
--- /dev/null
+++ b/final/test/CodeGen/X86/2011-02-27-Fpextend.ll
@@ -0,0 +1,7 @@
+; RUN: llc -mtriple=x86_64-pc-linux < %s
+; PR9309
+
+define <4 x double> @f_fu(<4 x float>) nounwind {
+  %float2double.i = fpext <4 x float> %0 to <4 x double>
+  ret <4 x double> %float2double.i
+}
diff --git a/final/test/CodeGen/X86/2011-03-02-DAGCombiner.ll b/final/test/CodeGen/X86/2011-03-02-DAGCombiner.ll
new file mode 100644
index 00000000000..be58cedfdaa
--- /dev/null
+++ b/final/test/CodeGen/X86/2011-03-02-DAGCombiner.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86-64
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0.0"
+
+%0 = type { i8, [3 x i8] }
+%struct.anon = type { float, x86_fp80 }
+
+define i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32, align 4
+  %F = alloca %struct.anon, align 16
+  %K = alloca %0, align 4
+  store i32 0, i32* %retval
+  %0 = bitcast %0* %K to i32*
+  %1 = load i32* %0, align 4
+  %2 = and i32 %1, -121
+  %3 = or i32 %2, 32
+  store i32 %3, i32* %0, align 4
+  %4 = bitcast %0* %K to i32*
+  %5 = load i32* %4, align 4
+  %6 = lshr i32 %5, 3
+  %bf.clear = and i32 %6, 15
+  %conv = sitofp i32 %bf.clear to float
+  %f = getelementptr inbounds %struct.anon* %F, i32 0, i32 0
+  %tmp = load float* %f, align 4
+  %sub = fsub float %tmp, %conv
+  store float %sub, float* %f, align 4
+  %ld = getelementptr inbounds %struct.anon* %F, i32 0, i32 1
+  %tmp1 = load x86_fp80* %ld, align 16
+  %7 = bitcast %0* %K to i32*
+  %8 = load i32* %7, align 4
+  %9 = lshr i32 %8, 7
+  %bf.clear2 = and i32 %9, 1
+  %conv3 = uitofp i32 %bf.clear2 to x86_fp80
+  %sub4 = fsub x86_fp80 %conv3, %tmp1
+  %conv5 = fptoui x86_fp80 %sub4 to i32
+  %bf.value = and i32 %conv5, 1
+  %10 = bitcast %0* %K to i32*
+  %11 = and i32 %bf.value, 1
+  %12 = shl i32 %11, 7
+  %13 = load i32* %10, align 4
+  %14 = and i32 %13, -129
+  %15 = or i32 %14, %12
+  store i32 %15, i32* %10, align 4
+  %call = call i32 (...)* @iequals(i32 1841, i32 %bf.value, i32 0)
+  %16 = load i32* %retval
+  ret i32 %16
+}
+
+declare i32 @iequals(...)
diff --git a/final/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll b/final/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
new file mode 100644
index 00000000000..e48edf7e30b
--- /dev/null
+++ b/final/test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mcpu=yonah < %s
+; PR9438
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-unknown-freebsd9.0"
+
+; The 'call fastcc' ties down %ebx, %ecx, and %edx.
+; A MUL8r ties down %al, leaving no GR32_ABCD registers available.
+; The coalescer can easily overallocate physical registers,
+; and register allocation fails.
+
+declare fastcc i8* @save_string(i8* %d, i8* nocapture %s) nounwind
+
+define i32 @cvtchar(i8* nocapture %sp) nounwind {
+  %temp.i = alloca [2 x i8], align 1
+  %tmp1 = load i8* %sp, align 1
+  %div = udiv i8 %tmp1, 10
+  %rem = urem i8 %div, 10
+  %arrayidx.i = getelementptr inbounds [2 x i8]* %temp.i, i32 0, i32 0
+  store i8 %rem, i8* %arrayidx.i, align 1
+  %call.i = call fastcc i8* @save_string(i8* %sp, i8* %arrayidx.i) nounwind
+  ret i32 undef
+}
diff --git a/final/test/CodeGen/X86/3addr-16bit.ll b/final/test/CodeGen/X86/3addr-16bit.ll
new file mode 100644
index 00000000000..c51247ab925
--- /dev/null
+++ b/final/test/CodeGen/X86/3addr-16bit.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -asm-verbose=false | FileCheck %s -check-prefix=64BIT
+; rdar://7329206
+
+; In 32-bit the partial register stall would degrade performance.
+
+define zeroext i16 @t1(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
+entry:
+; 32BIT:     t1:
+; 32BIT:     movw 20(%esp), %ax
+; 32BIT-NOT: movw %ax, %cx
+; 32BIT:     leal 1(%eax), %ecx
+
+; 64BIT:     t1:
+; 64BIT-NOT: movw %si, %ax
+; 64BIT:     leal 1(%rsi), %eax
+  %0 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
+  %1 = add i16 %k, 1                              ; <i16> [#uses=3]
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  tail call void @foo(i16 zeroext %1) nounwind
+  ret i16 %1
+
+bb1:                                              ; preds = %entry
+  ret i16 %1
+}
+
+define zeroext i16 @t2(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
+entry:
+; 32BIT:     t2:
+; 32BIT:     movw 20(%esp), %ax
+; 32BIT-NOT: movw %ax, %cx
+; 32BIT:     leal -1(%eax), %ecx
+
+; 64BIT:     t2:
+; 64BIT-NOT: movw %si, %ax
+; 64BIT:     leal -1(%rsi), %eax
+  %0 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
+  %1 = add i16 %k, -1                             ; <i16> [#uses=3]
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  tail call void @foo(i16 zeroext %1) nounwind
+  ret i16 %1
+
+bb1:                                              ; preds = %entry
+  ret i16 %1
+}
+
+declare void @foo(i16 zeroext)
+
+define zeroext i16 @t3(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
+entry:
+; 32BIT:     t3:
+; 32BIT:     movw 20(%esp), %ax
+; 32BIT-NOT: movw %ax, %cx
+; 32BIT:     leal 2(%eax), %ecx
+
+; 64BIT:     t3:
+; 64BIT-NOT: movw %si, %ax
+; 64BIT:     leal 2(%rsi), %eax
+  %0 = add i16 %k, 2                              ; <i16> [#uses=3]
+  %1 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  tail call void @foo(i16 zeroext %0) nounwind
+  ret i16 %0
+
+bb1:                                              ; preds = %entry
+  ret i16 %0
+}
+
+define zeroext i16 @t4(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
+entry:
+; 32BIT:     t4:
+; 32BIT:     movw 16(%esp), %ax
+; 32BIT:     movw 20(%esp), %cx
+; 32BIT-NOT: movw %cx, %dx
+; 32BIT:     leal (%ecx,%eax), %edx
+
+; 64BIT:     t4:
+; 64BIT-NOT: movw %si, %ax
+; 64BIT:     leal (%rsi,%rdi), %eax
+  %0 = add i16 %k, %c                             ; <i16> [#uses=3]
+  %1 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  tail call void @foo(i16 zeroext %0) nounwind
+  ret i16 %0
+
+bb1:                                              ; preds = %entry
+  ret i16 %0
+}
diff --git a/final/test/CodeGen/X86/3addr-or.ll b/final/test/CodeGen/X86/3addr-or.ll
new file mode 100644
index 00000000000..912bdc21547
--- /dev/null
+++ b/final/test/CodeGen/X86/3addr-or.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7527734
+
+define i32 @test1(i32 %x) nounwind readnone ssp {
+entry:
+; CHECK: test1:
+; CHECK: leal 3(%rdi), %eax
+  %0 = shl i32 %x, 5                              ; <i32> [#uses=1]
+  %1 = or i32 %0, 3                               ; <i32> [#uses=1]
+  ret i32 %1
+}
+
+define i64 @test2(i8 %A, i8 %B) nounwind {
+; CHECK: test2:
+; CHECK: shrq $4
+; CHECK-NOT: movq
+; CHECK-NOT: orq
+; CHECK: leaq
+; CHECK: ret
+  %C = zext i8 %A to i64                          ; <i64> [#uses=1]
+  %D = shl i64 %C, 4                              ; <i64> [#uses=1]
+  %E = and i64 %D, 48                             ; <i64> [#uses=1]
+  %F = zext i8 %B to i64                          ; <i64> [#uses=1]
+  %G = lshr i64 %F, 4                             ; <i64> [#uses=1]
+  %H = or i64 %G, %E                              ; <i64> [#uses=1]
+  ret i64 %H
+}
+
+;; Test that OR is only emitted as LEA, not as ADD.
+
+define void @test3(i32 %x, i32* %P) nounwind readnone ssp {
+entry:
+; No reason to emit an add here, should be an or.
+; CHECK: test3:
+; CHECK: orl $3, %edi
+  %0 = shl i32 %x, 5
+  %1 = or i32 %0, 3
+  store i32 %1, i32* %P
+  ret void
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind readnone ssp {
+entry:
+  %and = and i32 %a, 6
+  %and2 = and i32 %b, 16
+  %or = or i32 %and2, %and
+  ret i32 %or
+; CHECK: test4:
+; CHECK: leal	(%rsi,%rdi), %eax
+}
+
+define void @test5(i32 %a, i32 %b, i32* nocapture %P) nounwind ssp {
+entry:
+  %and = and i32 %a, 6
+  %and2 = and i32 %b, 16
+  %or = or i32 %and2, %and
+  store i32 %or, i32* %P, align 4
+  ret void
+; CHECK: test5:
+; CHECK: orl
+}
diff --git a/final/test/CodeGen/X86/Atomics-32.ll b/final/test/CodeGen/X86/Atomics-32.ll
new file mode 100644
index 00000000000..0e9b73ea109
--- /dev/null
+++ b/final/test/CodeGen/X86/Atomics-32.ll
@@ -0,0 +1,818 @@
+; RUN: llc < %s -march=x86 > %t
+;; Note the 64-bit variants are not supported yet (in 32-bit mode).
+; ModuleID = 'Atomics.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@sc = common global i8 0		; <i8*> [#uses=52]
+@uc = common global i8 0		; <i8*> [#uses=100]
+@ss = common global i16 0		; <i16*> [#uses=15]
+@us = common global i16 0		; <i16*> [#uses=15]
+@si = common global i32 0		; <i32*> [#uses=15]
+@ui = common global i32 0		; <i32*> [#uses=23]
+@sl = common global i32 0		; <i32*> [#uses=15]
+@ul = common global i32 0		; <i32*> [#uses=15]
+
+define void @test_op_ignore() nounwind {
+entry:
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 1 )		; <i8>:0 [#uses=0]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 1 )		; <i8>:1 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %2, i16 1 )		; <i16>:3 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %4, i16 1 )		; <i16>:5 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %6, i32 1 )		; <i32>:7 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %8, i32 1 )		; <i32>:9 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:10 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %10, i32 1 )		; <i32>:11 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:12 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %12, i32 1 )		; <i32>:13 [#uses=0]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 1 )		; <i8>:14 [#uses=0]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 1 )		; <i8>:15 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:16 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %16, i16 1 )		; <i16>:17 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:18 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %18, i16 1 )		; <i16>:19 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:20 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %20, i32 1 )		; <i32>:21 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:22 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %22, i32 1 )		; <i32>:23 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:24 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %24, i32 1 )		; <i32>:25 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:26 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %26, i32 1 )		; <i32>:27 [#uses=0]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 1 )		; <i8>:28 [#uses=0]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 1 )		; <i8>:29 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:30 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %30, i16 1 )		; <i16>:31 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:32 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %32, i16 1 )		; <i16>:33 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:34 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %34, i32 1 )		; <i32>:35 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:36 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %36, i32 1 )		; <i32>:37 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:38 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %38, i32 1 )		; <i32>:39 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:40 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %40, i32 1 )		; <i32>:41 [#uses=0]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 1 )		; <i8>:42 [#uses=0]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 1 )		; <i8>:43 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:44 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %44, i16 1 )		; <i16>:45 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:46 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %46, i16 1 )		; <i16>:47 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:48 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %48, i32 1 )		; <i32>:49 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:50 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %50, i32 1 )		; <i32>:51 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:52 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %52, i32 1 )		; <i32>:53 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:54 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %54, i32 1 )		; <i32>:55 [#uses=0]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 1 )		; <i8>:56 [#uses=0]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 1 )		; <i8>:57 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:58 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %58, i16 1 )		; <i16>:59 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:60 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %60, i16 1 )		; <i16>:61 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:62 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %62, i32 1 )		; <i32>:63 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:64 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %64, i32 1 )		; <i32>:65 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:66 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %66, i32 1 )		; <i32>:67 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:68 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %68, i32 1 )		; <i32>:69 [#uses=0]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 1 )		; <i8>:70 [#uses=0]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 1 )		; <i8>:71 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:72 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %72, i16 1 )		; <i16>:73 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:74 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %74, i16 1 )		; <i16>:75 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:76 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %76, i32 1 )		; <i32>:77 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:78 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %78, i32 1 )		; <i32>:79 [#uses=0]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:80 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %80, i32 1 )		; <i32>:81 [#uses=0]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:82 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %82, i32 1 )		; <i32>:83 [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.add.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind
+
+declare i8 @llvm.atomic.load.sub.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.sub.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind
+
+declare i8 @llvm.atomic.load.or.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.or.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.or.i32.p0i32(i32*, i32) nounwind
+
+declare i8 @llvm.atomic.load.xor.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.xor.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.xor.i32.p0i32(i32*, i32) nounwind
+
+declare i8 @llvm.atomic.load.and.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.and.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.and.i32.p0i32(i32*, i32) nounwind
+
+declare i8 @llvm.atomic.load.nand.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.nand.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.nand.i32.p0i32(i32*, i32) nounwind
+
+define void @test_fetch_and_op() nounwind {
+entry:
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 11 )		; <i8>:0 [#uses=1]
+	store i8 %0, i8* @sc, align 1
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 11 )		; <i8>:1 [#uses=1]
+	store i8 %1, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %2, i16 11 )		; <i16>:3 [#uses=1]
+	store i16 %3, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %4, i16 11 )		; <i16>:5 [#uses=1]
+	store i16 %5, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %6, i32 11 )		; <i32>:7 [#uses=1]
+	store i32 %7, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %8, i32 11 )		; <i32>:9 [#uses=1]
+	store i32 %9, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:10 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %10, i32 11 )		; <i32>:11 [#uses=1]
+	store i32 %11, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:12 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %12, i32 11 )		; <i32>:13 [#uses=1]
+	store i32 %13, i32* @ul, align 4
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 11 )		; <i8>:14 [#uses=1]
+	store i8 %14, i8* @sc, align 1
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 11 )		; <i8>:15 [#uses=1]
+	store i8 %15, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:16 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %16, i16 11 )		; <i16>:17 [#uses=1]
+	store i16 %17, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:18 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %18, i16 11 )		; <i16>:19 [#uses=1]
+	store i16 %19, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:20 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %20, i32 11 )		; <i32>:21 [#uses=1]
+	store i32 %21, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:22 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %22, i32 11 )		; <i32>:23 [#uses=1]
+	store i32 %23, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:24 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %24, i32 11 )		; <i32>:25 [#uses=1]
+	store i32 %25, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:26 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %26, i32 11 )		; <i32>:27 [#uses=1]
+	store i32 %27, i32* @ul, align 4
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 11 )		; <i8>:28 [#uses=1]
+	store i8 %28, i8* @sc, align 1
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 11 )		; <i8>:29 [#uses=1]
+	store i8 %29, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:30 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %30, i16 11 )		; <i16>:31 [#uses=1]
+	store i16 %31, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:32 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %32, i16 11 )		; <i16>:33 [#uses=1]
+	store i16 %33, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:34 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %34, i32 11 )		; <i32>:35 [#uses=1]
+	store i32 %35, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:36 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %36, i32 11 )		; <i32>:37 [#uses=1]
+	store i32 %37, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:38 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %38, i32 11 )		; <i32>:39 [#uses=1]
+	store i32 %39, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:40 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %40, i32 11 )		; <i32>:41 [#uses=1]
+	store i32 %41, i32* @ul, align 4
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 11 )		; <i8>:42 [#uses=1]
+	store i8 %42, i8* @sc, align 1
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 11 )		; <i8>:43 [#uses=1]
+	store i8 %43, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:44 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %44, i16 11 )		; <i16>:45 [#uses=1]
+	store i16 %45, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:46 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %46, i16 11 )		; <i16>:47 [#uses=1]
+	store i16 %47, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:48 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %48, i32 11 )		; <i32>:49 [#uses=1]
+	store i32 %49, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:50 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %50, i32 11 )		; <i32>:51 [#uses=1]
+	store i32 %51, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:52 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %52, i32 11 )		; <i32>:53 [#uses=1]
+	store i32 %53, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:54 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %54, i32 11 )		; <i32>:55 [#uses=1]
+	store i32 %55, i32* @ul, align 4
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 11 )		; <i8>:56 [#uses=1]
+	store i8 %56, i8* @sc, align 1
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 11 )		; <i8>:57 [#uses=1]
+	store i8 %57, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:58 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %58, i16 11 )		; <i16>:59 [#uses=1]
+	store i16 %59, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:60 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %60, i16 11 )		; <i16>:61 [#uses=1]
+	store i16 %61, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:62 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %62, i32 11 )		; <i32>:63 [#uses=1]
+	store i32 %63, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:64 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %64, i32 11 )		; <i32>:65 [#uses=1]
+	store i32 %65, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:66 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %66, i32 11 )		; <i32>:67 [#uses=1]
+	store i32 %67, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:68 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %68, i32 11 )		; <i32>:69 [#uses=1]
+	store i32 %69, i32* @ul, align 4
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 11 )		; <i8>:70 [#uses=1]
+	store i8 %70, i8* @sc, align 1
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 11 )		; <i8>:71 [#uses=1]
+	store i8 %71, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:72 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %72, i16 11 )		; <i16>:73 [#uses=1]
+	store i16 %73, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:74 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %74, i16 11 )		; <i16>:75 [#uses=1]
+	store i16 %75, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:76 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %76, i32 11 )		; <i32>:77 [#uses=1]
+	store i32 %77, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:78 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %78, i32 11 )		; <i32>:79 [#uses=1]
+	store i32 %79, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:80 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %80, i32 11 )		; <i32>:81 [#uses=1]
+	store i32 %81, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:82 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %82, i32 11 )		; <i32>:83 [#uses=1]
+	store i32 %83, i32* @ul, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @test_op_and_fetch() nounwind {
+entry:
+	load i8* @uc, align 1		; <i8>:0 [#uses=1]
+	zext i8 %0 to i32		; <i32>:1 [#uses=1]
+	trunc i32 %1 to i8		; <i8>:2 [#uses=2]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 %2 )		; <i8>:3 [#uses=1]
+	add i8 %3, %2		; <i8>:4 [#uses=1]
+	store i8 %4, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:5 [#uses=1]
+	zext i8 %5 to i32		; <i32>:6 [#uses=1]
+	trunc i32 %6 to i8		; <i8>:7 [#uses=2]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 %7 )		; <i8>:8 [#uses=1]
+	add i8 %8, %7		; <i8>:9 [#uses=1]
+	store i8 %9, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:10 [#uses=1]
+	zext i8 %10 to i32		; <i32>:11 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:12 [#uses=1]
+	trunc i32 %11 to i16		; <i16>:13 [#uses=2]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %12, i16 %13 )		; <i16>:14 [#uses=1]
+	add i16 %14, %13		; <i16>:15 [#uses=1]
+	store i16 %15, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:16 [#uses=1]
+	zext i8 %16 to i32		; <i32>:17 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:18 [#uses=1]
+	trunc i32 %17 to i16		; <i16>:19 [#uses=2]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %18, i16 %19 )		; <i16>:20 [#uses=1]
+	add i16 %20, %19		; <i16>:21 [#uses=1]
+	store i16 %21, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:22 [#uses=1]
+	zext i8 %22 to i32		; <i32>:23 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:24 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %24, i32 %23 )		; <i32>:25 [#uses=1]
+	add i32 %25, %23		; <i32>:26 [#uses=1]
+	store i32 %26, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:27 [#uses=1]
+	zext i8 %27 to i32		; <i32>:28 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:29 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %29, i32 %28 )		; <i32>:30 [#uses=1]
+	add i32 %30, %28		; <i32>:31 [#uses=1]
+	store i32 %31, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:32 [#uses=1]
+	zext i8 %32 to i32		; <i32>:33 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:34 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %34, i32 %33 )		; <i32>:35 [#uses=1]
+	add i32 %35, %33		; <i32>:36 [#uses=1]
+	store i32 %36, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:37 [#uses=1]
+	zext i8 %37 to i32		; <i32>:38 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:39 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %39, i32 %38 )		; <i32>:40 [#uses=1]
+	add i32 %40, %38		; <i32>:41 [#uses=1]
+	store i32 %41, i32* @ul, align 4
+	load i8* @uc, align 1		; <i8>:42 [#uses=1]
+	zext i8 %42 to i32		; <i32>:43 [#uses=1]
+	trunc i32 %43 to i8		; <i8>:44 [#uses=2]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 %44 )		; <i8>:45 [#uses=1]
+	sub i8 %45, %44		; <i8>:46 [#uses=1]
+	store i8 %46, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:47 [#uses=1]
+	zext i8 %47 to i32		; <i32>:48 [#uses=1]
+	trunc i32 %48 to i8		; <i8>:49 [#uses=2]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 %49 )		; <i8>:50 [#uses=1]
+	sub i8 %50, %49		; <i8>:51 [#uses=1]
+	store i8 %51, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:52 [#uses=1]
+	zext i8 %52 to i32		; <i32>:53 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:54 [#uses=1]
+	trunc i32 %53 to i16		; <i16>:55 [#uses=2]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %54, i16 %55 )		; <i16>:56 [#uses=1]
+	sub i16 %56, %55		; <i16>:57 [#uses=1]
+	store i16 %57, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:58 [#uses=1]
+	zext i8 %58 to i32		; <i32>:59 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:60 [#uses=1]
+	trunc i32 %59 to i16		; <i16>:61 [#uses=2]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %60, i16 %61 )		; <i16>:62 [#uses=1]
+	sub i16 %62, %61		; <i16>:63 [#uses=1]
+	store i16 %63, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:64 [#uses=1]
+	zext i8 %64 to i32		; <i32>:65 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:66 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %66, i32 %65 )		; <i32>:67 [#uses=1]
+	sub i32 %67, %65		; <i32>:68 [#uses=1]
+	store i32 %68, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:69 [#uses=1]
+	zext i8 %69 to i32		; <i32>:70 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:71 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %71, i32 %70 )		; <i32>:72 [#uses=1]
+	sub i32 %72, %70		; <i32>:73 [#uses=1]
+	store i32 %73, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:74 [#uses=1]
+	zext i8 %74 to i32		; <i32>:75 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:76 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %76, i32 %75 )		; <i32>:77 [#uses=1]
+	sub i32 %77, %75		; <i32>:78 [#uses=1]
+	store i32 %78, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:79 [#uses=1]
+	zext i8 %79 to i32		; <i32>:80 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:81 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %81, i32 %80 )		; <i32>:82 [#uses=1]
+	sub i32 %82, %80		; <i32>:83 [#uses=1]
+	store i32 %83, i32* @ul, align 4
+	load i8* @uc, align 1		; <i8>:84 [#uses=1]
+	zext i8 %84 to i32		; <i32>:85 [#uses=1]
+	trunc i32 %85 to i8		; <i8>:86 [#uses=2]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 %86 )		; <i8>:87 [#uses=1]
+	or i8 %87, %86		; <i8>:88 [#uses=1]
+	store i8 %88, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:89 [#uses=1]
+	zext i8 %89 to i32		; <i32>:90 [#uses=1]
+	trunc i32 %90 to i8		; <i8>:91 [#uses=2]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 %91 )		; <i8>:92 [#uses=1]
+	or i8 %92, %91		; <i8>:93 [#uses=1]
+	store i8 %93, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:94 [#uses=1]
+	zext i8 %94 to i32		; <i32>:95 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:96 [#uses=1]
+	trunc i32 %95 to i16		; <i16>:97 [#uses=2]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %96, i16 %97 )		; <i16>:98 [#uses=1]
+	or i16 %98, %97		; <i16>:99 [#uses=1]
+	store i16 %99, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:100 [#uses=1]
+	zext i8 %100 to i32		; <i32>:101 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:102 [#uses=1]
+	trunc i32 %101 to i16		; <i16>:103 [#uses=2]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %102, i16 %103 )		; <i16>:104 [#uses=1]
+	or i16 %104, %103		; <i16>:105 [#uses=1]
+	store i16 %105, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:106 [#uses=1]
+	zext i8 %106 to i32		; <i32>:107 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:108 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %108, i32 %107 )		; <i32>:109 [#uses=1]
+	or i32 %109, %107		; <i32>:110 [#uses=1]
+	store i32 %110, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:111 [#uses=1]
+	zext i8 %111 to i32		; <i32>:112 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:113 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %113, i32 %112 )		; <i32>:114 [#uses=1]
+	or i32 %114, %112		; <i32>:115 [#uses=1]
+	store i32 %115, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:116 [#uses=1]
+	zext i8 %116 to i32		; <i32>:117 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:118 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %118, i32 %117 )		; <i32>:119 [#uses=1]
+	or i32 %119, %117		; <i32>:120 [#uses=1]
+	store i32 %120, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:121 [#uses=1]
+	zext i8 %121 to i32		; <i32>:122 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:123 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %123, i32 %122 )		; <i32>:124 [#uses=1]
+	or i32 %124, %122		; <i32>:125 [#uses=1]
+	store i32 %125, i32* @ul, align 4
+	load i8* @uc, align 1		; <i8>:126 [#uses=1]
+	zext i8 %126 to i32		; <i32>:127 [#uses=1]
+	trunc i32 %127 to i8		; <i8>:128 [#uses=2]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 %128 )		; <i8>:129 [#uses=1]
+	xor i8 %129, %128		; <i8>:130 [#uses=1]
+	store i8 %130, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:131 [#uses=1]
+	zext i8 %131 to i32		; <i32>:132 [#uses=1]
+	trunc i32 %132 to i8		; <i8>:133 [#uses=2]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 %133 )		; <i8>:134 [#uses=1]
+	xor i8 %134, %133		; <i8>:135 [#uses=1]
+	store i8 %135, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:136 [#uses=1]
+	zext i8 %136 to i32		; <i32>:137 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:138 [#uses=1]
+	trunc i32 %137 to i16		; <i16>:139 [#uses=2]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %138, i16 %139 )		; <i16>:140 [#uses=1]
+	xor i16 %140, %139		; <i16>:141 [#uses=1]
+	store i16 %141, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:142 [#uses=1]
+	zext i8 %142 to i32		; <i32>:143 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:144 [#uses=1]
+	trunc i32 %143 to i16		; <i16>:145 [#uses=2]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %144, i16 %145 )		; <i16>:146 [#uses=1]
+	xor i16 %146, %145		; <i16>:147 [#uses=1]
+	store i16 %147, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:148 [#uses=1]
+	zext i8 %148 to i32		; <i32>:149 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:150 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %150, i32 %149 )		; <i32>:151 [#uses=1]
+	xor i32 %151, %149		; <i32>:152 [#uses=1]
+	store i32 %152, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:153 [#uses=1]
+	zext i8 %153 to i32		; <i32>:154 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:155 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %155, i32 %154 )		; <i32>:156 [#uses=1]
+	xor i32 %156, %154		; <i32>:157 [#uses=1]
+	store i32 %157, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:158 [#uses=1]
+	zext i8 %158 to i32		; <i32>:159 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:160 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %160, i32 %159 )		; <i32>:161 [#uses=1]
+	xor i32 %161, %159		; <i32>:162 [#uses=1]
+	store i32 %162, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:163 [#uses=1]
+	zext i8 %163 to i32		; <i32>:164 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:165 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %165, i32 %164 )		; <i32>:166 [#uses=1]
+	xor i32 %166, %164		; <i32>:167 [#uses=1]
+	store i32 %167, i32* @ul, align 4
+	load i8* @uc, align 1		; <i8>:168 [#uses=1]
+	zext i8 %168 to i32		; <i32>:169 [#uses=1]
+	trunc i32 %169 to i8		; <i8>:170 [#uses=2]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 %170 )		; <i8>:171 [#uses=1]
+	and i8 %171, %170		; <i8>:172 [#uses=1]
+	store i8 %172, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:173 [#uses=1]
+	zext i8 %173 to i32		; <i32>:174 [#uses=1]
+	trunc i32 %174 to i8		; <i8>:175 [#uses=2]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 %175 )		; <i8>:176 [#uses=1]
+	and i8 %176, %175		; <i8>:177 [#uses=1]
+	store i8 %177, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:178 [#uses=1]
+	zext i8 %178 to i32		; <i32>:179 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:180 [#uses=1]
+	trunc i32 %179 to i16		; <i16>:181 [#uses=2]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %180, i16 %181 )		; <i16>:182 [#uses=1]
+	and i16 %182, %181		; <i16>:183 [#uses=1]
+	store i16 %183, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:184 [#uses=1]
+	zext i8 %184 to i32		; <i32>:185 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:186 [#uses=1]
+	trunc i32 %185 to i16		; <i16>:187 [#uses=2]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %186, i16 %187 )		; <i16>:188 [#uses=1]
+	and i16 %188, %187		; <i16>:189 [#uses=1]
+	store i16 %189, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:190 [#uses=1]
+	zext i8 %190 to i32		; <i32>:191 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:192 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %192, i32 %191 )		; <i32>:193 [#uses=1]
+	and i32 %193, %191		; <i32>:194 [#uses=1]
+	store i32 %194, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:195 [#uses=1]
+	zext i8 %195 to i32		; <i32>:196 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:197 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %197, i32 %196 )		; <i32>:198 [#uses=1]
+	and i32 %198, %196		; <i32>:199 [#uses=1]
+	store i32 %199, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:200 [#uses=1]
+	zext i8 %200 to i32		; <i32>:201 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:202 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %202, i32 %201 )		; <i32>:203 [#uses=1]
+	and i32 %203, %201		; <i32>:204 [#uses=1]
+	store i32 %204, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:205 [#uses=1]
+	zext i8 %205 to i32		; <i32>:206 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:207 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %207, i32 %206 )		; <i32>:208 [#uses=1]
+	and i32 %208, %206		; <i32>:209 [#uses=1]
+	store i32 %209, i32* @ul, align 4
+	load i8* @uc, align 1		; <i8>:210 [#uses=1]
+	zext i8 %210 to i32		; <i32>:211 [#uses=1]
+	trunc i32 %211 to i8		; <i8>:212 [#uses=2]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 %212 )		; <i8>:213 [#uses=1]
+	xor i8 %213, -1		; <i8>:214 [#uses=1]
+	and i8 %214, %212		; <i8>:215 [#uses=1]
+	store i8 %215, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:216 [#uses=1]
+	zext i8 %216 to i32		; <i32>:217 [#uses=1]
+	trunc i32 %217 to i8		; <i8>:218 [#uses=2]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 %218 )		; <i8>:219 [#uses=1]
+	xor i8 %219, -1		; <i8>:220 [#uses=1]
+	and i8 %220, %218		; <i8>:221 [#uses=1]
+	store i8 %221, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:222 [#uses=1]
+	zext i8 %222 to i32		; <i32>:223 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:224 [#uses=1]
+	trunc i32 %223 to i16		; <i16>:225 [#uses=2]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %224, i16 %225 )		; <i16>:226 [#uses=1]
+	xor i16 %226, -1		; <i16>:227 [#uses=1]
+	and i16 %227, %225		; <i16>:228 [#uses=1]
+	store i16 %228, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:229 [#uses=1]
+	zext i8 %229 to i32		; <i32>:230 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:231 [#uses=1]
+	trunc i32 %230 to i16		; <i16>:232 [#uses=2]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %231, i16 %232 )		; <i16>:233 [#uses=1]
+	xor i16 %233, -1		; <i16>:234 [#uses=1]
+	and i16 %234, %232		; <i16>:235 [#uses=1]
+	store i16 %235, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:236 [#uses=1]
+	zext i8 %236 to i32		; <i32>:237 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:238 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %238, i32 %237 )		; <i32>:239 [#uses=1]
+	xor i32 %239, -1		; <i32>:240 [#uses=1]
+	and i32 %240, %237		; <i32>:241 [#uses=1]
+	store i32 %241, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:242 [#uses=1]
+	zext i8 %242 to i32		; <i32>:243 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:244 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %244, i32 %243 )		; <i32>:245 [#uses=1]
+	xor i32 %245, -1		; <i32>:246 [#uses=1]
+	and i32 %246, %243		; <i32>:247 [#uses=1]
+	store i32 %247, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:248 [#uses=1]
+	zext i8 %248 to i32		; <i32>:249 [#uses=2]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:250 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %250, i32 %249 )		; <i32>:251 [#uses=1]
+	xor i32 %251, -1		; <i32>:252 [#uses=1]
+	and i32 %252, %249		; <i32>:253 [#uses=1]
+	store i32 %253, i32* @sl, align 4
+	load i8* @uc, align 1		; <i8>:254 [#uses=1]
+	zext i8 %254 to i32		; <i32>:255 [#uses=2]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:256 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %256, i32 %255 )		; <i32>:257 [#uses=1]
+	xor i32 %257, -1		; <i32>:258 [#uses=1]
+	and i32 %258, %255		; <i32>:259 [#uses=1]
+	store i32 %259, i32* @ul, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @test_compare_and_swap() nounwind {
+entry:
+	load i8* @sc, align 1		; <i8>:0 [#uses=1]
+	zext i8 %0 to i32		; <i32>:1 [#uses=1]
+	load i8* @uc, align 1		; <i8>:2 [#uses=1]
+	zext i8 %2 to i32		; <i32>:3 [#uses=1]
+	trunc i32 %3 to i8		; <i8>:4 [#uses=1]
+	trunc i32 %1 to i8		; <i8>:5 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @sc, i8 %4, i8 %5 )		; <i8>:6 [#uses=1]
+	store i8 %6, i8* @sc, align 1
+	load i8* @sc, align 1		; <i8>:7 [#uses=1]
+	zext i8 %7 to i32		; <i32>:8 [#uses=1]
+	load i8* @uc, align 1		; <i8>:9 [#uses=1]
+	zext i8 %9 to i32		; <i32>:10 [#uses=1]
+	trunc i32 %10 to i8		; <i8>:11 [#uses=1]
+	trunc i32 %8 to i8		; <i8>:12 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @uc, i8 %11, i8 %12 )		; <i8>:13 [#uses=1]
+	store i8 %13, i8* @uc, align 1
+	load i8* @sc, align 1		; <i8>:14 [#uses=1]
+	sext i8 %14 to i16		; <i16>:15 [#uses=1]
+	zext i16 %15 to i32		; <i32>:16 [#uses=1]
+	load i8* @uc, align 1		; <i8>:17 [#uses=1]
+	zext i8 %17 to i32		; <i32>:18 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:19 [#uses=1]
+	trunc i32 %18 to i16		; <i16>:20 [#uses=1]
+	trunc i32 %16 to i16		; <i16>:21 [#uses=1]
+	call i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* %19, i16 %20, i16 %21 )		; <i16>:22 [#uses=1]
+	store i16 %22, i16* @ss, align 2
+	load i8* @sc, align 1		; <i8>:23 [#uses=1]
+	sext i8 %23 to i16		; <i16>:24 [#uses=1]
+	zext i16 %24 to i32		; <i32>:25 [#uses=1]
+	load i8* @uc, align 1		; <i8>:26 [#uses=1]
+	zext i8 %26 to i32		; <i32>:27 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:28 [#uses=1]
+	trunc i32 %27 to i16		; <i16>:29 [#uses=1]
+	trunc i32 %25 to i16		; <i16>:30 [#uses=1]
+	call i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* %28, i16 %29, i16 %30 )		; <i16>:31 [#uses=1]
+	store i16 %31, i16* @us, align 2
+	load i8* @sc, align 1		; <i8>:32 [#uses=1]
+	sext i8 %32 to i32		; <i32>:33 [#uses=1]
+	load i8* @uc, align 1		; <i8>:34 [#uses=1]
+	zext i8 %34 to i32		; <i32>:35 [#uses=1]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:36 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %36, i32 %35, i32 %33 )		; <i32>:37 [#uses=1]
+	store i32 %37, i32* @si, align 4
+	load i8* @sc, align 1		; <i8>:38 [#uses=1]
+	sext i8 %38 to i32		; <i32>:39 [#uses=1]
+	load i8* @uc, align 1		; <i8>:40 [#uses=1]
+	zext i8 %40 to i32		; <i32>:41 [#uses=1]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:42 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %42, i32 %41, i32 %39 )		; <i32>:43 [#uses=1]
+	store i32 %43, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:44 [#uses=1]
+	sext i8 %44 to i32		; <i32>:45 [#uses=1]
+	load i8* @uc, align 1		; <i8>:46 [#uses=1]
+	zext i8 %46 to i32		; <i32>:47 [#uses=1]
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:48 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %48, i32 %47, i32 %45 )		; <i32>:49 [#uses=1]
+	store i32 %49, i32* @sl, align 4
+	load i8* @sc, align 1		; <i8>:50 [#uses=1]
+	sext i8 %50 to i32		; <i32>:51 [#uses=1]
+	load i8* @uc, align 1		; <i8>:52 [#uses=1]
+	zext i8 %52 to i32		; <i32>:53 [#uses=1]
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:54 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %54, i32 %53, i32 %51 )		; <i32>:55 [#uses=1]
+	store i32 %55, i32* @ul, align 4
+	load i8* @sc, align 1		; <i8>:56 [#uses=1]
+	zext i8 %56 to i32		; <i32>:57 [#uses=1]
+	load i8* @uc, align 1		; <i8>:58 [#uses=1]
+	zext i8 %58 to i32		; <i32>:59 [#uses=1]
+	trunc i32 %59 to i8		; <i8>:60 [#uses=2]
+	trunc i32 %57 to i8		; <i8>:61 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @sc, i8 %60, i8 %61 )		; <i8>:62 [#uses=1]
+	icmp eq i8 %62, %60		; <i1>:63 [#uses=1]
+	zext i1 %63 to i8		; <i8>:64 [#uses=1]
+	zext i8 %64 to i32		; <i32>:65 [#uses=1]
+	store i32 %65, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:66 [#uses=1]
+	zext i8 %66 to i32		; <i32>:67 [#uses=1]
+	load i8* @uc, align 1		; <i8>:68 [#uses=1]
+	zext i8 %68 to i32		; <i32>:69 [#uses=1]
+	trunc i32 %69 to i8		; <i8>:70 [#uses=2]
+	trunc i32 %67 to i8		; <i8>:71 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @uc, i8 %70, i8 %71 )		; <i8>:72 [#uses=1]
+	icmp eq i8 %72, %70		; <i1>:73 [#uses=1]
+	zext i1 %73 to i8		; <i8>:74 [#uses=1]
+	zext i8 %74 to i32		; <i32>:75 [#uses=1]
+	store i32 %75, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:76 [#uses=1]
+	sext i8 %76 to i16		; <i16>:77 [#uses=1]
+	zext i16 %77 to i32		; <i32>:78 [#uses=1]
+	load i8* @uc, align 1		; <i8>:79 [#uses=1]
+	zext i8 %79 to i32		; <i32>:80 [#uses=1]
+	trunc i32 %80 to i8		; <i8>:81 [#uses=2]
+	trunc i32 %78 to i8		; <i8>:82 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i16* @ss to i8*), i8 %81, i8 %82 )		; <i8>:83 [#uses=1]
+	icmp eq i8 %83, %81		; <i1>:84 [#uses=1]
+	zext i1 %84 to i8		; <i8>:85 [#uses=1]
+	zext i8 %85 to i32		; <i32>:86 [#uses=1]
+	store i32 %86, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:87 [#uses=1]
+	sext i8 %87 to i16		; <i16>:88 [#uses=1]
+	zext i16 %88 to i32		; <i32>:89 [#uses=1]
+	load i8* @uc, align 1		; <i8>:90 [#uses=1]
+	zext i8 %90 to i32		; <i32>:91 [#uses=1]
+	trunc i32 %91 to i8		; <i8>:92 [#uses=2]
+	trunc i32 %89 to i8		; <i8>:93 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i16* @us to i8*), i8 %92, i8 %93 )		; <i8>:94 [#uses=1]
+	icmp eq i8 %94, %92		; <i1>:95 [#uses=1]
+	zext i1 %95 to i8		; <i8>:96 [#uses=1]
+	zext i8 %96 to i32		; <i32>:97 [#uses=1]
+	store i32 %97, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:98 [#uses=1]
+	sext i8 %98 to i32		; <i32>:99 [#uses=1]
+	load i8* @uc, align 1		; <i8>:100 [#uses=1]
+	zext i8 %100 to i32		; <i32>:101 [#uses=1]
+	trunc i32 %101 to i8		; <i8>:102 [#uses=2]
+	trunc i32 %99 to i8		; <i8>:103 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i32* @si to i8*), i8 %102, i8 %103 )		; <i8>:104 [#uses=1]
+	icmp eq i8 %104, %102		; <i1>:105 [#uses=1]
+	zext i1 %105 to i8		; <i8>:106 [#uses=1]
+	zext i8 %106 to i32		; <i32>:107 [#uses=1]
+	store i32 %107, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:108 [#uses=1]
+	sext i8 %108 to i32		; <i32>:109 [#uses=1]
+	load i8* @uc, align 1		; <i8>:110 [#uses=1]
+	zext i8 %110 to i32		; <i32>:111 [#uses=1]
+	trunc i32 %111 to i8		; <i8>:112 [#uses=2]
+	trunc i32 %109 to i8		; <i8>:113 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i32* @ui to i8*), i8 %112, i8 %113 )		; <i8>:114 [#uses=1]
+	icmp eq i8 %114, %112		; <i1>:115 [#uses=1]
+	zext i1 %115 to i8		; <i8>:116 [#uses=1]
+	zext i8 %116 to i32		; <i32>:117 [#uses=1]
+	store i32 %117, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:118 [#uses=1]
+	sext i8 %118 to i32		; <i32>:119 [#uses=1]
+	load i8* @uc, align 1		; <i8>:120 [#uses=1]
+	zext i8 %120 to i32		; <i32>:121 [#uses=1]
+	trunc i32 %121 to i8		; <i8>:122 [#uses=2]
+	trunc i32 %119 to i8		; <i8>:123 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i32* @sl to i8*), i8 %122, i8 %123 )		; <i8>:124 [#uses=1]
+	icmp eq i8 %124, %122		; <i1>:125 [#uses=1]
+	zext i1 %125 to i8		; <i8>:126 [#uses=1]
+	zext i8 %126 to i32		; <i32>:127 [#uses=1]
+	store i32 %127, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:128 [#uses=1]
+	sext i8 %128 to i32		; <i32>:129 [#uses=1]
+	load i8* @uc, align 1		; <i8>:130 [#uses=1]
+	zext i8 %130 to i32		; <i32>:131 [#uses=1]
+	trunc i32 %131 to i8		; <i8>:132 [#uses=2]
+	trunc i32 %129 to i8		; <i8>:133 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i32* @ul to i8*), i8 %132, i8 %133 )		; <i8>:134 [#uses=1]
+	icmp eq i8 %134, %132		; <i1>:135 [#uses=1]
+	zext i1 %135 to i8		; <i8>:136 [#uses=1]
+	zext i8 %136 to i32		; <i32>:137 [#uses=1]
+	store i32 %137, i32* @ui, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8*, i8, i8) nounwind
+
+declare i16 @llvm.atomic.cmp.swap.i16.p0i16(i16*, i16, i16) nounwind
+
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32*, i32, i32) nounwind
+
+define void @test_lock() nounwind {
+entry:
+	call i8 @llvm.atomic.swap.i8.p0i8( i8* @sc, i8 1 )		; <i8>:0 [#uses=1]
+	store i8 %0, i8* @sc, align 1
+	call i8 @llvm.atomic.swap.i8.p0i8( i8* @uc, i8 1 )		; <i8>:1 [#uses=1]
+	store i8 %1, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.swap.i16.p0i16( i16* %2, i16 1 )		; <i16>:3 [#uses=1]
+	store i16 %3, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.swap.i16.p0i16( i16* %4, i16 1 )		; <i16>:5 [#uses=1]
+	store i16 %5, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %6, i32 1 )		; <i32>:7 [#uses=1]
+	store i32 %7, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %8, i32 1 )		; <i32>:9 [#uses=1]
+	store i32 %9, i32* @ui, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:10 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %10, i32 1 )		; <i32>:11 [#uses=1]
+	store i32 %11, i32* @sl, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:12 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %12, i32 1 )		; <i32>:13 [#uses=1]
+	store i32 %13, i32* @ul, align 4
+	call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true, i1 false )
+	volatile store i8 0, i8* @sc, align 1
+	volatile store i8 0, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:14 [#uses=1]
+	volatile store i16 0, i16* %14, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:15 [#uses=1]
+	volatile store i16 0, i16* %15, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:16 [#uses=1]
+	volatile store i32 0, i32* %16, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:17 [#uses=1]
+	volatile store i32 0, i32* %17, align 4
+	bitcast i8* bitcast (i32* @sl to i8*) to i32*		; <i32*>:18 [#uses=1]
+	volatile store i32 0, i32* %18, align 4
+	bitcast i8* bitcast (i32* @ul to i8*) to i32*		; <i32*>:19 [#uses=1]
+	volatile store i32 0, i32* %19, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.swap.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.swap.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind
diff --git a/final/test/CodeGen/X86/Atomics-64.ll b/final/test/CodeGen/X86/Atomics-64.ll
new file mode 100644
index 00000000000..ac174b9f9a3
--- /dev/null
+++ b/final/test/CodeGen/X86/Atomics-64.ll
@@ -0,0 +1,1015 @@
+; RUN: llc < %s -march=x86-64 > %t
+; ModuleID = 'Atomics.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+@sc = common global i8 0		; <i8*> [#uses=56]
+@uc = common global i8 0		; <i8*> [#uses=116]
+@ss = common global i16 0		; <i16*> [#uses=15]
+@us = common global i16 0		; <i16*> [#uses=15]
+@si = common global i32 0		; <i32*> [#uses=15]
+@ui = common global i32 0		; <i32*> [#uses=25]
+@sl = common global i64 0		; <i64*> [#uses=15]
+@ul = common global i64 0		; <i64*> [#uses=15]
+@sll = common global i64 0		; <i64*> [#uses=15]
+@ull = common global i64 0		; <i64*> [#uses=15]
+
+define void @test_op_ignore() nounwind {
+entry:
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 1 )		; <i8>:0 [#uses=0]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 1 )		; <i8>:1 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %2, i16 1 )		; <i16>:3 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %4, i16 1 )		; <i16>:5 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %6, i32 1 )		; <i32>:7 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %8, i32 1 )		; <i32>:9 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:10 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %10, i64 1 )		; <i64>:11 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:12 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %12, i64 1 )		; <i64>:13 [#uses=0]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:14 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %14, i64 1 )		; <i64>:15 [#uses=0]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:16 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %16, i64 1 )		; <i64>:17 [#uses=0]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 1 )		; <i8>:18 [#uses=0]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 1 )		; <i8>:19 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:20 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %20, i16 1 )		; <i16>:21 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:22 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %22, i16 1 )		; <i16>:23 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:24 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %24, i32 1 )		; <i32>:25 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:26 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %26, i32 1 )		; <i32>:27 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:28 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %28, i64 1 )		; <i64>:29 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:30 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %30, i64 1 )		; <i64>:31 [#uses=0]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:32 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %32, i64 1 )		; <i64>:33 [#uses=0]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:34 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %34, i64 1 )		; <i64>:35 [#uses=0]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 1 )		; <i8>:36 [#uses=0]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 1 )		; <i8>:37 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:38 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %38, i16 1 )		; <i16>:39 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:40 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %40, i16 1 )		; <i16>:41 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:42 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %42, i32 1 )		; <i32>:43 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:44 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %44, i32 1 )		; <i32>:45 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:46 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %46, i64 1 )		; <i64>:47 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:48 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %48, i64 1 )		; <i64>:49 [#uses=0]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:50 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %50, i64 1 )		; <i64>:51 [#uses=0]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:52 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %52, i64 1 )		; <i64>:53 [#uses=0]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 1 )		; <i8>:54 [#uses=0]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 1 )		; <i8>:55 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:56 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %56, i16 1 )		; <i16>:57 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:58 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %58, i16 1 )		; <i16>:59 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:60 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %60, i32 1 )		; <i32>:61 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:62 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %62, i32 1 )		; <i32>:63 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:64 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %64, i64 1 )		; <i64>:65 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:66 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %66, i64 1 )		; <i64>:67 [#uses=0]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:68 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %68, i64 1 )		; <i64>:69 [#uses=0]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:70 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %70, i64 1 )		; <i64>:71 [#uses=0]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 1 )		; <i8>:72 [#uses=0]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 1 )		; <i8>:73 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:74 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %74, i16 1 )		; <i16>:75 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:76 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %76, i16 1 )		; <i16>:77 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:78 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %78, i32 1 )		; <i32>:79 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:80 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %80, i32 1 )		; <i32>:81 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:82 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %82, i64 1 )		; <i64>:83 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:84 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %84, i64 1 )		; <i64>:85 [#uses=0]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:86 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %86, i64 1 )		; <i64>:87 [#uses=0]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:88 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %88, i64 1 )		; <i64>:89 [#uses=0]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 1 )		; <i8>:90 [#uses=0]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 1 )		; <i8>:91 [#uses=0]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:92 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %92, i16 1 )		; <i16>:93 [#uses=0]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:94 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %94, i16 1 )		; <i16>:95 [#uses=0]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:96 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %96, i32 1 )		; <i32>:97 [#uses=0]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:98 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %98, i32 1 )		; <i32>:99 [#uses=0]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:100 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %100, i64 1 )		; <i64>:101 [#uses=0]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:102 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %102, i64 1 )		; <i64>:103 [#uses=0]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:104 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %104, i64 1 )		; <i64>:105 [#uses=0]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:106 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %106, i64 1 )		; <i64>:107 [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.add.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.sub.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.sub.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.sub.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.or.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.or.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.or.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.or.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.xor.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.xor.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.xor.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.xor.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.and.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.and.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.and.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.and.i64.p0i64(i64*, i64) nounwind
+
+declare i8 @llvm.atomic.load.nand.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.load.nand.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.load.nand.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.load.nand.i64.p0i64(i64*, i64) nounwind
+
+define void @test_fetch_and_op() nounwind {
+entry:
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 11 )		; <i8>:0 [#uses=1]
+	store i8 %0, i8* @sc, align 1
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 11 )		; <i8>:1 [#uses=1]
+	store i8 %1, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %2, i16 11 )		; <i16>:3 [#uses=1]
+	store i16 %3, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %4, i16 11 )		; <i16>:5 [#uses=1]
+	store i16 %5, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %6, i32 11 )		; <i32>:7 [#uses=1]
+	store i32 %7, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %8, i32 11 )		; <i32>:9 [#uses=1]
+	store i32 %9, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:10 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %10, i64 11 )		; <i64>:11 [#uses=1]
+	store i64 %11, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:12 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %12, i64 11 )		; <i64>:13 [#uses=1]
+	store i64 %13, i64* @ul, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:14 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %14, i64 11 )		; <i64>:15 [#uses=1]
+	store i64 %15, i64* @sll, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:16 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %16, i64 11 )		; <i64>:17 [#uses=1]
+	store i64 %17, i64* @ull, align 8
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 11 )		; <i8>:18 [#uses=1]
+	store i8 %18, i8* @sc, align 1
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 11 )		; <i8>:19 [#uses=1]
+	store i8 %19, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:20 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %20, i16 11 )		; <i16>:21 [#uses=1]
+	store i16 %21, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:22 [#uses=1]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %22, i16 11 )		; <i16>:23 [#uses=1]
+	store i16 %23, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:24 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %24, i32 11 )		; <i32>:25 [#uses=1]
+	store i32 %25, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:26 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %26, i32 11 )		; <i32>:27 [#uses=1]
+	store i32 %27, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:28 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %28, i64 11 )		; <i64>:29 [#uses=1]
+	store i64 %29, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:30 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %30, i64 11 )		; <i64>:31 [#uses=1]
+	store i64 %31, i64* @ul, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:32 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %32, i64 11 )		; <i64>:33 [#uses=1]
+	store i64 %33, i64* @sll, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:34 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %34, i64 11 )		; <i64>:35 [#uses=1]
+	store i64 %35, i64* @ull, align 8
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 11 )		; <i8>:36 [#uses=1]
+	store i8 %36, i8* @sc, align 1
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 11 )		; <i8>:37 [#uses=1]
+	store i8 %37, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:38 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %38, i16 11 )		; <i16>:39 [#uses=1]
+	store i16 %39, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:40 [#uses=1]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %40, i16 11 )		; <i16>:41 [#uses=1]
+	store i16 %41, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:42 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %42, i32 11 )		; <i32>:43 [#uses=1]
+	store i32 %43, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:44 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %44, i32 11 )		; <i32>:45 [#uses=1]
+	store i32 %45, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:46 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %46, i64 11 )		; <i64>:47 [#uses=1]
+	store i64 %47, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:48 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %48, i64 11 )		; <i64>:49 [#uses=1]
+	store i64 %49, i64* @ul, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:50 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %50, i64 11 )		; <i64>:51 [#uses=1]
+	store i64 %51, i64* @sll, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:52 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %52, i64 11 )		; <i64>:53 [#uses=1]
+	store i64 %53, i64* @ull, align 8
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 11 )		; <i8>:54 [#uses=1]
+	store i8 %54, i8* @sc, align 1
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 11 )		; <i8>:55 [#uses=1]
+	store i8 %55, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:56 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %56, i16 11 )		; <i16>:57 [#uses=1]
+	store i16 %57, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:58 [#uses=1]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %58, i16 11 )		; <i16>:59 [#uses=1]
+	store i16 %59, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:60 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %60, i32 11 )		; <i32>:61 [#uses=1]
+	store i32 %61, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:62 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %62, i32 11 )		; <i32>:63 [#uses=1]
+	store i32 %63, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:64 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %64, i64 11 )		; <i64>:65 [#uses=1]
+	store i64 %65, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:66 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %66, i64 11 )		; <i64>:67 [#uses=1]
+	store i64 %67, i64* @ul, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:68 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %68, i64 11 )		; <i64>:69 [#uses=1]
+	store i64 %69, i64* @sll, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:70 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %70, i64 11 )		; <i64>:71 [#uses=1]
+	store i64 %71, i64* @ull, align 8
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 11 )		; <i8>:72 [#uses=1]
+	store i8 %72, i8* @sc, align 1
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 11 )		; <i8>:73 [#uses=1]
+	store i8 %73, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:74 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %74, i16 11 )		; <i16>:75 [#uses=1]
+	store i16 %75, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:76 [#uses=1]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %76, i16 11 )		; <i16>:77 [#uses=1]
+	store i16 %77, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:78 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %78, i32 11 )		; <i32>:79 [#uses=1]
+	store i32 %79, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:80 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %80, i32 11 )		; <i32>:81 [#uses=1]
+	store i32 %81, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:82 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %82, i64 11 )		; <i64>:83 [#uses=1]
+	store i64 %83, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:84 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %84, i64 11 )		; <i64>:85 [#uses=1]
+	store i64 %85, i64* @ul, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:86 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %86, i64 11 )		; <i64>:87 [#uses=1]
+	store i64 %87, i64* @sll, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:88 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %88, i64 11 )		; <i64>:89 [#uses=1]
+	store i64 %89, i64* @ull, align 8
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 11 )		; <i8>:90 [#uses=1]
+	store i8 %90, i8* @sc, align 1
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 11 )		; <i8>:91 [#uses=1]
+	store i8 %91, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:92 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %92, i16 11 )		; <i16>:93 [#uses=1]
+	store i16 %93, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:94 [#uses=1]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %94, i16 11 )		; <i16>:95 [#uses=1]
+	store i16 %95, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:96 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %96, i32 11 )		; <i32>:97 [#uses=1]
+	store i32 %97, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:98 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %98, i32 11 )		; <i32>:99 [#uses=1]
+	store i32 %99, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:100 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %100, i64 11 )		; <i64>:101 [#uses=1]
+	store i64 %101, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:102 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %102, i64 11 )		; <i64>:103 [#uses=1]
+	store i64 %103, i64* @ul, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:104 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %104, i64 11 )		; <i64>:105 [#uses=1]
+	store i64 %105, i64* @sll, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:106 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %106, i64 11 )		; <i64>:107 [#uses=1]
+	store i64 %107, i64* @ull, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @test_op_and_fetch() nounwind {
+entry:
+	load i8* @uc, align 1		; <i8>:0 [#uses=1]
+	zext i8 %0 to i32		; <i32>:1 [#uses=1]
+	trunc i32 %1 to i8		; <i8>:2 [#uses=2]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @sc, i8 %2 )		; <i8>:3 [#uses=1]
+	add i8 %3, %2		; <i8>:4 [#uses=1]
+	store i8 %4, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:5 [#uses=1]
+	zext i8 %5 to i32		; <i32>:6 [#uses=1]
+	trunc i32 %6 to i8		; <i8>:7 [#uses=2]
+	call i8 @llvm.atomic.load.add.i8.p0i8( i8* @uc, i8 %7 )		; <i8>:8 [#uses=1]
+	add i8 %8, %7		; <i8>:9 [#uses=1]
+	store i8 %9, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:10 [#uses=1]
+	zext i8 %10 to i32		; <i32>:11 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:12 [#uses=1]
+	trunc i32 %11 to i16		; <i16>:13 [#uses=2]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %12, i16 %13 )		; <i16>:14 [#uses=1]
+	add i16 %14, %13		; <i16>:15 [#uses=1]
+	store i16 %15, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:16 [#uses=1]
+	zext i8 %16 to i32		; <i32>:17 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:18 [#uses=1]
+	trunc i32 %17 to i16		; <i16>:19 [#uses=2]
+	call i16 @llvm.atomic.load.add.i16.p0i16( i16* %18, i16 %19 )		; <i16>:20 [#uses=1]
+	add i16 %20, %19		; <i16>:21 [#uses=1]
+	store i16 %21, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:22 [#uses=1]
+	zext i8 %22 to i32		; <i32>:23 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:24 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %24, i32 %23 )		; <i32>:25 [#uses=1]
+	add i32 %25, %23		; <i32>:26 [#uses=1]
+	store i32 %26, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:27 [#uses=1]
+	zext i8 %27 to i32		; <i32>:28 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:29 [#uses=1]
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %29, i32 %28 )		; <i32>:30 [#uses=1]
+	add i32 %30, %28		; <i32>:31 [#uses=1]
+	store i32 %31, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:32 [#uses=1]
+	zext i8 %32 to i64		; <i64>:33 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:34 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %34, i64 %33 )		; <i64>:35 [#uses=1]
+	add i64 %35, %33		; <i64>:36 [#uses=1]
+	store i64 %36, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:37 [#uses=1]
+	zext i8 %37 to i64		; <i64>:38 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:39 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %39, i64 %38 )		; <i64>:40 [#uses=1]
+	add i64 %40, %38		; <i64>:41 [#uses=1]
+	store i64 %41, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:42 [#uses=1]
+	zext i8 %42 to i64		; <i64>:43 [#uses=2]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:44 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %44, i64 %43 )		; <i64>:45 [#uses=1]
+	add i64 %45, %43		; <i64>:46 [#uses=1]
+	store i64 %46, i64* @sll, align 8
+	load i8* @uc, align 1		; <i8>:47 [#uses=1]
+	zext i8 %47 to i64		; <i64>:48 [#uses=2]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:49 [#uses=1]
+	call i64 @llvm.atomic.load.add.i64.p0i64( i64* %49, i64 %48 )		; <i64>:50 [#uses=1]
+	add i64 %50, %48		; <i64>:51 [#uses=1]
+	store i64 %51, i64* @ull, align 8
+	load i8* @uc, align 1		; <i8>:52 [#uses=1]
+	zext i8 %52 to i32		; <i32>:53 [#uses=1]
+	trunc i32 %53 to i8		; <i8>:54 [#uses=2]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @sc, i8 %54 )		; <i8>:55 [#uses=1]
+	sub i8 %55, %54		; <i8>:56 [#uses=1]
+	store i8 %56, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:57 [#uses=1]
+	zext i8 %57 to i32		; <i32>:58 [#uses=1]
+	trunc i32 %58 to i8		; <i8>:59 [#uses=2]
+	call i8 @llvm.atomic.load.sub.i8.p0i8( i8* @uc, i8 %59 )		; <i8>:60 [#uses=1]
+	sub i8 %60, %59		; <i8>:61 [#uses=1]
+	store i8 %61, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:62 [#uses=1]
+	zext i8 %62 to i32		; <i32>:63 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:64 [#uses=1]
+	trunc i32 %63 to i16		; <i16>:65 [#uses=2]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %64, i16 %65 )		; <i16>:66 [#uses=1]
+	sub i16 %66, %65		; <i16>:67 [#uses=1]
+	store i16 %67, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:68 [#uses=1]
+	zext i8 %68 to i32		; <i32>:69 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:70 [#uses=1]
+	trunc i32 %69 to i16		; <i16>:71 [#uses=2]
+	call i16 @llvm.atomic.load.sub.i16.p0i16( i16* %70, i16 %71 )		; <i16>:72 [#uses=1]
+	sub i16 %72, %71		; <i16>:73 [#uses=1]
+	store i16 %73, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:74 [#uses=1]
+	zext i8 %74 to i32		; <i32>:75 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:76 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %76, i32 %75 )		; <i32>:77 [#uses=1]
+	sub i32 %77, %75		; <i32>:78 [#uses=1]
+	store i32 %78, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:79 [#uses=1]
+	zext i8 %79 to i32		; <i32>:80 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:81 [#uses=1]
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %81, i32 %80 )		; <i32>:82 [#uses=1]
+	sub i32 %82, %80		; <i32>:83 [#uses=1]
+	store i32 %83, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:84 [#uses=1]
+	zext i8 %84 to i64		; <i64>:85 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:86 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %86, i64 %85 )		; <i64>:87 [#uses=1]
+	sub i64 %87, %85		; <i64>:88 [#uses=1]
+	store i64 %88, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:89 [#uses=1]
+	zext i8 %89 to i64		; <i64>:90 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:91 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %91, i64 %90 )		; <i64>:92 [#uses=1]
+	sub i64 %92, %90		; <i64>:93 [#uses=1]
+	store i64 %93, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:94 [#uses=1]
+	zext i8 %94 to i64		; <i64>:95 [#uses=2]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:96 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %96, i64 %95 )		; <i64>:97 [#uses=1]
+	sub i64 %97, %95		; <i64>:98 [#uses=1]
+	store i64 %98, i64* @sll, align 8
+	load i8* @uc, align 1		; <i8>:99 [#uses=1]
+	zext i8 %99 to i64		; <i64>:100 [#uses=2]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:101 [#uses=1]
+	call i64 @llvm.atomic.load.sub.i64.p0i64( i64* %101, i64 %100 )		; <i64>:102 [#uses=1]
+	sub i64 %102, %100		; <i64>:103 [#uses=1]
+	store i64 %103, i64* @ull, align 8
+	load i8* @uc, align 1		; <i8>:104 [#uses=1]
+	zext i8 %104 to i32		; <i32>:105 [#uses=1]
+	trunc i32 %105 to i8		; <i8>:106 [#uses=2]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @sc, i8 %106 )		; <i8>:107 [#uses=1]
+	or i8 %107, %106		; <i8>:108 [#uses=1]
+	store i8 %108, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:109 [#uses=1]
+	zext i8 %109 to i32		; <i32>:110 [#uses=1]
+	trunc i32 %110 to i8		; <i8>:111 [#uses=2]
+	call i8 @llvm.atomic.load.or.i8.p0i8( i8* @uc, i8 %111 )		; <i8>:112 [#uses=1]
+	or i8 %112, %111		; <i8>:113 [#uses=1]
+	store i8 %113, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:114 [#uses=1]
+	zext i8 %114 to i32		; <i32>:115 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:116 [#uses=1]
+	trunc i32 %115 to i16		; <i16>:117 [#uses=2]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %116, i16 %117 )		; <i16>:118 [#uses=1]
+	or i16 %118, %117		; <i16>:119 [#uses=1]
+	store i16 %119, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:120 [#uses=1]
+	zext i8 %120 to i32		; <i32>:121 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:122 [#uses=1]
+	trunc i32 %121 to i16		; <i16>:123 [#uses=2]
+	call i16 @llvm.atomic.load.or.i16.p0i16( i16* %122, i16 %123 )		; <i16>:124 [#uses=1]
+	or i16 %124, %123		; <i16>:125 [#uses=1]
+	store i16 %125, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:126 [#uses=1]
+	zext i8 %126 to i32		; <i32>:127 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:128 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %128, i32 %127 )		; <i32>:129 [#uses=1]
+	or i32 %129, %127		; <i32>:130 [#uses=1]
+	store i32 %130, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:131 [#uses=1]
+	zext i8 %131 to i32		; <i32>:132 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:133 [#uses=1]
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %133, i32 %132 )		; <i32>:134 [#uses=1]
+	or i32 %134, %132		; <i32>:135 [#uses=1]
+	store i32 %135, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:136 [#uses=1]
+	zext i8 %136 to i64		; <i64>:137 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:138 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %138, i64 %137 )		; <i64>:139 [#uses=1]
+	or i64 %139, %137		; <i64>:140 [#uses=1]
+	store i64 %140, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:141 [#uses=1]
+	zext i8 %141 to i64		; <i64>:142 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:143 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %143, i64 %142 )		; <i64>:144 [#uses=1]
+	or i64 %144, %142		; <i64>:145 [#uses=1]
+	store i64 %145, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:146 [#uses=1]
+	zext i8 %146 to i64		; <i64>:147 [#uses=2]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:148 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %148, i64 %147 )		; <i64>:149 [#uses=1]
+	or i64 %149, %147		; <i64>:150 [#uses=1]
+	store i64 %150, i64* @sll, align 8
+	load i8* @uc, align 1		; <i8>:151 [#uses=1]
+	zext i8 %151 to i64		; <i64>:152 [#uses=2]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:153 [#uses=1]
+	call i64 @llvm.atomic.load.or.i64.p0i64( i64* %153, i64 %152 )		; <i64>:154 [#uses=1]
+	or i64 %154, %152		; <i64>:155 [#uses=1]
+	store i64 %155, i64* @ull, align 8
+	load i8* @uc, align 1		; <i8>:156 [#uses=1]
+	zext i8 %156 to i32		; <i32>:157 [#uses=1]
+	trunc i32 %157 to i8		; <i8>:158 [#uses=2]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @sc, i8 %158 )		; <i8>:159 [#uses=1]
+	xor i8 %159, %158		; <i8>:160 [#uses=1]
+	store i8 %160, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:161 [#uses=1]
+	zext i8 %161 to i32		; <i32>:162 [#uses=1]
+	trunc i32 %162 to i8		; <i8>:163 [#uses=2]
+	call i8 @llvm.atomic.load.xor.i8.p0i8( i8* @uc, i8 %163 )		; <i8>:164 [#uses=1]
+	xor i8 %164, %163		; <i8>:165 [#uses=1]
+	store i8 %165, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:166 [#uses=1]
+	zext i8 %166 to i32		; <i32>:167 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:168 [#uses=1]
+	trunc i32 %167 to i16		; <i16>:169 [#uses=2]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %168, i16 %169 )		; <i16>:170 [#uses=1]
+	xor i16 %170, %169		; <i16>:171 [#uses=1]
+	store i16 %171, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:172 [#uses=1]
+	zext i8 %172 to i32		; <i32>:173 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:174 [#uses=1]
+	trunc i32 %173 to i16		; <i16>:175 [#uses=2]
+	call i16 @llvm.atomic.load.xor.i16.p0i16( i16* %174, i16 %175 )		; <i16>:176 [#uses=1]
+	xor i16 %176, %175		; <i16>:177 [#uses=1]
+	store i16 %177, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:178 [#uses=1]
+	zext i8 %178 to i32		; <i32>:179 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:180 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %180, i32 %179 )		; <i32>:181 [#uses=1]
+	xor i32 %181, %179		; <i32>:182 [#uses=1]
+	store i32 %182, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:183 [#uses=1]
+	zext i8 %183 to i32		; <i32>:184 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:185 [#uses=1]
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %185, i32 %184 )		; <i32>:186 [#uses=1]
+	xor i32 %186, %184		; <i32>:187 [#uses=1]
+	store i32 %187, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:188 [#uses=1]
+	zext i8 %188 to i64		; <i64>:189 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:190 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %190, i64 %189 )		; <i64>:191 [#uses=1]
+	xor i64 %191, %189		; <i64>:192 [#uses=1]
+	store i64 %192, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:193 [#uses=1]
+	zext i8 %193 to i64		; <i64>:194 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:195 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %195, i64 %194 )		; <i64>:196 [#uses=1]
+	xor i64 %196, %194		; <i64>:197 [#uses=1]
+	store i64 %197, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:198 [#uses=1]
+	zext i8 %198 to i64		; <i64>:199 [#uses=2]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:200 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %200, i64 %199 )		; <i64>:201 [#uses=1]
+	xor i64 %201, %199		; <i64>:202 [#uses=1]
+	store i64 %202, i64* @sll, align 8
+	load i8* @uc, align 1		; <i8>:203 [#uses=1]
+	zext i8 %203 to i64		; <i64>:204 [#uses=2]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:205 [#uses=1]
+	call i64 @llvm.atomic.load.xor.i64.p0i64( i64* %205, i64 %204 )		; <i64>:206 [#uses=1]
+	xor i64 %206, %204		; <i64>:207 [#uses=1]
+	store i64 %207, i64* @ull, align 8
+	load i8* @uc, align 1		; <i8>:208 [#uses=1]
+	zext i8 %208 to i32		; <i32>:209 [#uses=1]
+	trunc i32 %209 to i8		; <i8>:210 [#uses=2]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @sc, i8 %210 )		; <i8>:211 [#uses=1]
+	and i8 %211, %210		; <i8>:212 [#uses=1]
+	store i8 %212, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:213 [#uses=1]
+	zext i8 %213 to i32		; <i32>:214 [#uses=1]
+	trunc i32 %214 to i8		; <i8>:215 [#uses=2]
+	call i8 @llvm.atomic.load.and.i8.p0i8( i8* @uc, i8 %215 )		; <i8>:216 [#uses=1]
+	and i8 %216, %215		; <i8>:217 [#uses=1]
+	store i8 %217, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:218 [#uses=1]
+	zext i8 %218 to i32		; <i32>:219 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:220 [#uses=1]
+	trunc i32 %219 to i16		; <i16>:221 [#uses=2]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %220, i16 %221 )		; <i16>:222 [#uses=1]
+	and i16 %222, %221		; <i16>:223 [#uses=1]
+	store i16 %223, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:224 [#uses=1]
+	zext i8 %224 to i32		; <i32>:225 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:226 [#uses=1]
+	trunc i32 %225 to i16		; <i16>:227 [#uses=2]
+	call i16 @llvm.atomic.load.and.i16.p0i16( i16* %226, i16 %227 )		; <i16>:228 [#uses=1]
+	and i16 %228, %227		; <i16>:229 [#uses=1]
+	store i16 %229, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:230 [#uses=1]
+	zext i8 %230 to i32		; <i32>:231 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:232 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %232, i32 %231 )		; <i32>:233 [#uses=1]
+	and i32 %233, %231		; <i32>:234 [#uses=1]
+	store i32 %234, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:235 [#uses=1]
+	zext i8 %235 to i32		; <i32>:236 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:237 [#uses=1]
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %237, i32 %236 )		; <i32>:238 [#uses=1]
+	and i32 %238, %236		; <i32>:239 [#uses=1]
+	store i32 %239, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:240 [#uses=1]
+	zext i8 %240 to i64		; <i64>:241 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:242 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %242, i64 %241 )		; <i64>:243 [#uses=1]
+	and i64 %243, %241		; <i64>:244 [#uses=1]
+	store i64 %244, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:245 [#uses=1]
+	zext i8 %245 to i64		; <i64>:246 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:247 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %247, i64 %246 )		; <i64>:248 [#uses=1]
+	and i64 %248, %246		; <i64>:249 [#uses=1]
+	store i64 %249, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:250 [#uses=1]
+	zext i8 %250 to i64		; <i64>:251 [#uses=2]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:252 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %252, i64 %251 )		; <i64>:253 [#uses=1]
+	and i64 %253, %251		; <i64>:254 [#uses=1]
+	store i64 %254, i64* @sll, align 8
+	load i8* @uc, align 1		; <i8>:255 [#uses=1]
+	zext i8 %255 to i64		; <i64>:256 [#uses=2]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:257 [#uses=1]
+	call i64 @llvm.atomic.load.and.i64.p0i64( i64* %257, i64 %256 )		; <i64>:258 [#uses=1]
+	and i64 %258, %256		; <i64>:259 [#uses=1]
+	store i64 %259, i64* @ull, align 8
+	load i8* @uc, align 1		; <i8>:260 [#uses=1]
+	zext i8 %260 to i32		; <i32>:261 [#uses=1]
+	trunc i32 %261 to i8		; <i8>:262 [#uses=2]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @sc, i8 %262 )		; <i8>:263 [#uses=1]
+	xor i8 %263, -1		; <i8>:264 [#uses=1]
+	and i8 %264, %262		; <i8>:265 [#uses=1]
+	store i8 %265, i8* @sc, align 1
+	load i8* @uc, align 1		; <i8>:266 [#uses=1]
+	zext i8 %266 to i32		; <i32>:267 [#uses=1]
+	trunc i32 %267 to i8		; <i8>:268 [#uses=2]
+	call i8 @llvm.atomic.load.nand.i8.p0i8( i8* @uc, i8 %268 )		; <i8>:269 [#uses=1]
+	xor i8 %269, -1		; <i8>:270 [#uses=1]
+	and i8 %270, %268		; <i8>:271 [#uses=1]
+	store i8 %271, i8* @uc, align 1
+	load i8* @uc, align 1		; <i8>:272 [#uses=1]
+	zext i8 %272 to i32		; <i32>:273 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:274 [#uses=1]
+	trunc i32 %273 to i16		; <i16>:275 [#uses=2]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %274, i16 %275 )		; <i16>:276 [#uses=1]
+	xor i16 %276, -1		; <i16>:277 [#uses=1]
+	and i16 %277, %275		; <i16>:278 [#uses=1]
+	store i16 %278, i16* @ss, align 2
+	load i8* @uc, align 1		; <i8>:279 [#uses=1]
+	zext i8 %279 to i32		; <i32>:280 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:281 [#uses=1]
+	trunc i32 %280 to i16		; <i16>:282 [#uses=2]
+	call i16 @llvm.atomic.load.nand.i16.p0i16( i16* %281, i16 %282 )		; <i16>:283 [#uses=1]
+	xor i16 %283, -1		; <i16>:284 [#uses=1]
+	and i16 %284, %282		; <i16>:285 [#uses=1]
+	store i16 %285, i16* @us, align 2
+	load i8* @uc, align 1		; <i8>:286 [#uses=1]
+	zext i8 %286 to i32		; <i32>:287 [#uses=2]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:288 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %288, i32 %287 )		; <i32>:289 [#uses=1]
+	xor i32 %289, -1		; <i32>:290 [#uses=1]
+	and i32 %290, %287		; <i32>:291 [#uses=1]
+	store i32 %291, i32* @si, align 4
+	load i8* @uc, align 1		; <i8>:292 [#uses=1]
+	zext i8 %292 to i32		; <i32>:293 [#uses=2]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:294 [#uses=1]
+	call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %294, i32 %293 )		; <i32>:295 [#uses=1]
+	xor i32 %295, -1		; <i32>:296 [#uses=1]
+	and i32 %296, %293		; <i32>:297 [#uses=1]
+	store i32 %297, i32* @ui, align 4
+	load i8* @uc, align 1		; <i8>:298 [#uses=1]
+	zext i8 %298 to i64		; <i64>:299 [#uses=2]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:300 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %300, i64 %299 )		; <i64>:301 [#uses=1]
+	xor i64 %301, -1		; <i64>:302 [#uses=1]
+	and i64 %302, %299		; <i64>:303 [#uses=1]
+	store i64 %303, i64* @sl, align 8
+	load i8* @uc, align 1		; <i8>:304 [#uses=1]
+	zext i8 %304 to i64		; <i64>:305 [#uses=2]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:306 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %306, i64 %305 )		; <i64>:307 [#uses=1]
+	xor i64 %307, -1		; <i64>:308 [#uses=1]
+	and i64 %308, %305		; <i64>:309 [#uses=1]
+	store i64 %309, i64* @ul, align 8
+	load i8* @uc, align 1		; <i8>:310 [#uses=1]
+	zext i8 %310 to i64		; <i64>:311 [#uses=2]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:312 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %312, i64 %311 )		; <i64>:313 [#uses=1]
+	xor i64 %313, -1		; <i64>:314 [#uses=1]
+	and i64 %314, %311		; <i64>:315 [#uses=1]
+	store i64 %315, i64* @sll, align 8
+	load i8* @uc, align 1		; <i8>:316 [#uses=1]
+	zext i8 %316 to i64		; <i64>:317 [#uses=2]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:318 [#uses=1]
+	call i64 @llvm.atomic.load.nand.i64.p0i64( i64* %318, i64 %317 )		; <i64>:319 [#uses=1]
+	xor i64 %319, -1		; <i64>:320 [#uses=1]
+	and i64 %320, %317		; <i64>:321 [#uses=1]
+	store i64 %321, i64* @ull, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @test_compare_and_swap() nounwind {
+entry:
+	load i8* @sc, align 1		; <i8>:0 [#uses=1]
+	zext i8 %0 to i32		; <i32>:1 [#uses=1]
+	load i8* @uc, align 1		; <i8>:2 [#uses=1]
+	zext i8 %2 to i32		; <i32>:3 [#uses=1]
+	trunc i32 %3 to i8		; <i8>:4 [#uses=1]
+	trunc i32 %1 to i8		; <i8>:5 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @sc, i8 %4, i8 %5 )		; <i8>:6 [#uses=1]
+	store i8 %6, i8* @sc, align 1
+	load i8* @sc, align 1		; <i8>:7 [#uses=1]
+	zext i8 %7 to i32		; <i32>:8 [#uses=1]
+	load i8* @uc, align 1		; <i8>:9 [#uses=1]
+	zext i8 %9 to i32		; <i32>:10 [#uses=1]
+	trunc i32 %10 to i8		; <i8>:11 [#uses=1]
+	trunc i32 %8 to i8		; <i8>:12 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @uc, i8 %11, i8 %12 )		; <i8>:13 [#uses=1]
+	store i8 %13, i8* @uc, align 1
+	load i8* @sc, align 1		; <i8>:14 [#uses=1]
+	sext i8 %14 to i16		; <i16>:15 [#uses=1]
+	zext i16 %15 to i32		; <i32>:16 [#uses=1]
+	load i8* @uc, align 1		; <i8>:17 [#uses=1]
+	zext i8 %17 to i32		; <i32>:18 [#uses=1]
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:19 [#uses=1]
+	trunc i32 %18 to i16		; <i16>:20 [#uses=1]
+	trunc i32 %16 to i16		; <i16>:21 [#uses=1]
+	call i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* %19, i16 %20, i16 %21 )		; <i16>:22 [#uses=1]
+	store i16 %22, i16* @ss, align 2
+	load i8* @sc, align 1		; <i8>:23 [#uses=1]
+	sext i8 %23 to i16		; <i16>:24 [#uses=1]
+	zext i16 %24 to i32		; <i32>:25 [#uses=1]
+	load i8* @uc, align 1		; <i8>:26 [#uses=1]
+	zext i8 %26 to i32		; <i32>:27 [#uses=1]
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:28 [#uses=1]
+	trunc i32 %27 to i16		; <i16>:29 [#uses=1]
+	trunc i32 %25 to i16		; <i16>:30 [#uses=1]
+	call i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* %28, i16 %29, i16 %30 )		; <i16>:31 [#uses=1]
+	store i16 %31, i16* @us, align 2
+	load i8* @sc, align 1		; <i8>:32 [#uses=1]
+	sext i8 %32 to i32		; <i32>:33 [#uses=1]
+	load i8* @uc, align 1		; <i8>:34 [#uses=1]
+	zext i8 %34 to i32		; <i32>:35 [#uses=1]
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:36 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %36, i32 %35, i32 %33 )		; <i32>:37 [#uses=1]
+	store i32 %37, i32* @si, align 4
+	load i8* @sc, align 1		; <i8>:38 [#uses=1]
+	sext i8 %38 to i32		; <i32>:39 [#uses=1]
+	load i8* @uc, align 1		; <i8>:40 [#uses=1]
+	zext i8 %40 to i32		; <i32>:41 [#uses=1]
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:42 [#uses=1]
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %42, i32 %41, i32 %39 )		; <i32>:43 [#uses=1]
+	store i32 %43, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:44 [#uses=1]
+	sext i8 %44 to i64		; <i64>:45 [#uses=1]
+	load i8* @uc, align 1		; <i8>:46 [#uses=1]
+	zext i8 %46 to i64		; <i64>:47 [#uses=1]
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:48 [#uses=1]
+	call i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* %48, i64 %47, i64 %45 )		; <i64>:49 [#uses=1]
+	store i64 %49, i64* @sl, align 8
+	load i8* @sc, align 1		; <i8>:50 [#uses=1]
+	sext i8 %50 to i64		; <i64>:51 [#uses=1]
+	load i8* @uc, align 1		; <i8>:52 [#uses=1]
+	zext i8 %52 to i64		; <i64>:53 [#uses=1]
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:54 [#uses=1]
+	call i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* %54, i64 %53, i64 %51 )		; <i64>:55 [#uses=1]
+	store i64 %55, i64* @ul, align 8
+	load i8* @sc, align 1		; <i8>:56 [#uses=1]
+	sext i8 %56 to i64		; <i64>:57 [#uses=1]
+	load i8* @uc, align 1		; <i8>:58 [#uses=1]
+	zext i8 %58 to i64		; <i64>:59 [#uses=1]
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:60 [#uses=1]
+	call i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* %60, i64 %59, i64 %57 )		; <i64>:61 [#uses=1]
+	store i64 %61, i64* @sll, align 8
+	load i8* @sc, align 1		; <i8>:62 [#uses=1]
+	sext i8 %62 to i64		; <i64>:63 [#uses=1]
+	load i8* @uc, align 1		; <i8>:64 [#uses=1]
+	zext i8 %64 to i64		; <i64>:65 [#uses=1]
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:66 [#uses=1]
+	call i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* %66, i64 %65, i64 %63 )		; <i64>:67 [#uses=1]
+	store i64 %67, i64* @ull, align 8
+	load i8* @sc, align 1		; <i8>:68 [#uses=1]
+	zext i8 %68 to i32		; <i32>:69 [#uses=1]
+	load i8* @uc, align 1		; <i8>:70 [#uses=1]
+	zext i8 %70 to i32		; <i32>:71 [#uses=1]
+	trunc i32 %71 to i8		; <i8>:72 [#uses=2]
+	trunc i32 %69 to i8		; <i8>:73 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @sc, i8 %72, i8 %73 )		; <i8>:74 [#uses=1]
+	icmp eq i8 %74, %72		; <i1>:75 [#uses=1]
+	zext i1 %75 to i8		; <i8>:76 [#uses=1]
+	zext i8 %76 to i32		; <i32>:77 [#uses=1]
+	store i32 %77, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:78 [#uses=1]
+	zext i8 %78 to i32		; <i32>:79 [#uses=1]
+	load i8* @uc, align 1		; <i8>:80 [#uses=1]
+	zext i8 %80 to i32		; <i32>:81 [#uses=1]
+	trunc i32 %81 to i8		; <i8>:82 [#uses=2]
+	trunc i32 %79 to i8		; <i8>:83 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* @uc, i8 %82, i8 %83 )		; <i8>:84 [#uses=1]
+	icmp eq i8 %84, %82		; <i1>:85 [#uses=1]
+	zext i1 %85 to i8		; <i8>:86 [#uses=1]
+	zext i8 %86 to i32		; <i32>:87 [#uses=1]
+	store i32 %87, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:88 [#uses=1]
+	sext i8 %88 to i16		; <i16>:89 [#uses=1]
+	zext i16 %89 to i32		; <i32>:90 [#uses=1]
+	load i8* @uc, align 1		; <i8>:91 [#uses=1]
+	zext i8 %91 to i32		; <i32>:92 [#uses=1]
+	trunc i32 %92 to i8		; <i8>:93 [#uses=2]
+	trunc i32 %90 to i8		; <i8>:94 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i16* @ss to i8*), i8 %93, i8 %94 )		; <i8>:95 [#uses=1]
+	icmp eq i8 %95, %93		; <i1>:96 [#uses=1]
+	zext i1 %96 to i8		; <i8>:97 [#uses=1]
+	zext i8 %97 to i32		; <i32>:98 [#uses=1]
+	store i32 %98, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:99 [#uses=1]
+	sext i8 %99 to i16		; <i16>:100 [#uses=1]
+	zext i16 %100 to i32		; <i32>:101 [#uses=1]
+	load i8* @uc, align 1		; <i8>:102 [#uses=1]
+	zext i8 %102 to i32		; <i32>:103 [#uses=1]
+	trunc i32 %103 to i8		; <i8>:104 [#uses=2]
+	trunc i32 %101 to i8		; <i8>:105 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i16* @us to i8*), i8 %104, i8 %105 )		; <i8>:106 [#uses=1]
+	icmp eq i8 %106, %104		; <i1>:107 [#uses=1]
+	zext i1 %107 to i8		; <i8>:108 [#uses=1]
+	zext i8 %108 to i32		; <i32>:109 [#uses=1]
+	store i32 %109, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:110 [#uses=1]
+	sext i8 %110 to i32		; <i32>:111 [#uses=1]
+	load i8* @uc, align 1		; <i8>:112 [#uses=1]
+	zext i8 %112 to i32		; <i32>:113 [#uses=1]
+	trunc i32 %113 to i8		; <i8>:114 [#uses=2]
+	trunc i32 %111 to i8		; <i8>:115 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i32* @si to i8*), i8 %114, i8 %115 )		; <i8>:116 [#uses=1]
+	icmp eq i8 %116, %114		; <i1>:117 [#uses=1]
+	zext i1 %117 to i8		; <i8>:118 [#uses=1]
+	zext i8 %118 to i32		; <i32>:119 [#uses=1]
+	store i32 %119, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:120 [#uses=1]
+	sext i8 %120 to i32		; <i32>:121 [#uses=1]
+	load i8* @uc, align 1		; <i8>:122 [#uses=1]
+	zext i8 %122 to i32		; <i32>:123 [#uses=1]
+	trunc i32 %123 to i8		; <i8>:124 [#uses=2]
+	trunc i32 %121 to i8		; <i8>:125 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i32* @ui to i8*), i8 %124, i8 %125 )		; <i8>:126 [#uses=1]
+	icmp eq i8 %126, %124		; <i1>:127 [#uses=1]
+	zext i1 %127 to i8		; <i8>:128 [#uses=1]
+	zext i8 %128 to i32		; <i32>:129 [#uses=1]
+	store i32 %129, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:130 [#uses=1]
+	sext i8 %130 to i64		; <i64>:131 [#uses=1]
+	load i8* @uc, align 1		; <i8>:132 [#uses=1]
+	zext i8 %132 to i64		; <i64>:133 [#uses=1]
+	trunc i64 %133 to i8		; <i8>:134 [#uses=2]
+	trunc i64 %131 to i8		; <i8>:135 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i64* @sl to i8*), i8 %134, i8 %135 )		; <i8>:136 [#uses=1]
+	icmp eq i8 %136, %134		; <i1>:137 [#uses=1]
+	zext i1 %137 to i8		; <i8>:138 [#uses=1]
+	zext i8 %138 to i32		; <i32>:139 [#uses=1]
+	store i32 %139, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:140 [#uses=1]
+	sext i8 %140 to i64		; <i64>:141 [#uses=1]
+	load i8* @uc, align 1		; <i8>:142 [#uses=1]
+	zext i8 %142 to i64		; <i64>:143 [#uses=1]
+	trunc i64 %143 to i8		; <i8>:144 [#uses=2]
+	trunc i64 %141 to i8		; <i8>:145 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i64* @ul to i8*), i8 %144, i8 %145 )		; <i8>:146 [#uses=1]
+	icmp eq i8 %146, %144		; <i1>:147 [#uses=1]
+	zext i1 %147 to i8		; <i8>:148 [#uses=1]
+	zext i8 %148 to i32		; <i32>:149 [#uses=1]
+	store i32 %149, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:150 [#uses=1]
+	sext i8 %150 to i64		; <i64>:151 [#uses=1]
+	load i8* @uc, align 1		; <i8>:152 [#uses=1]
+	zext i8 %152 to i64		; <i64>:153 [#uses=1]
+	trunc i64 %153 to i8		; <i8>:154 [#uses=2]
+	trunc i64 %151 to i8		; <i8>:155 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i64* @sll to i8*), i8 %154, i8 %155 )		; <i8>:156 [#uses=1]
+	icmp eq i8 %156, %154		; <i1>:157 [#uses=1]
+	zext i1 %157 to i8		; <i8>:158 [#uses=1]
+	zext i8 %158 to i32		; <i32>:159 [#uses=1]
+	store i32 %159, i32* @ui, align 4
+	load i8* @sc, align 1		; <i8>:160 [#uses=1]
+	sext i8 %160 to i64		; <i64>:161 [#uses=1]
+	load i8* @uc, align 1		; <i8>:162 [#uses=1]
+	zext i8 %162 to i64		; <i64>:163 [#uses=1]
+	trunc i64 %163 to i8		; <i8>:164 [#uses=2]
+	trunc i64 %161 to i8		; <i8>:165 [#uses=1]
+	call i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* bitcast (i64* @ull to i8*), i8 %164, i8 %165 )		; <i8>:166 [#uses=1]
+	icmp eq i8 %166, %164		; <i1>:167 [#uses=1]
+	zext i1 %167 to i8		; <i8>:168 [#uses=1]
+	zext i8 %168 to i32		; <i32>:169 [#uses=1]
+	store i32 %169, i32* @ui, align 4
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8*, i8, i8) nounwind
+
+declare i16 @llvm.atomic.cmp.swap.i16.p0i16(i16*, i16, i16) nounwind
+
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32*, i32, i32) nounwind
+
+declare i64 @llvm.atomic.cmp.swap.i64.p0i64(i64*, i64, i64) nounwind
+
+define void @test_lock() nounwind {
+entry:
+	call i8 @llvm.atomic.swap.i8.p0i8( i8* @sc, i8 1 )		; <i8>:0 [#uses=1]
+	store i8 %0, i8* @sc, align 1
+	call i8 @llvm.atomic.swap.i8.p0i8( i8* @uc, i8 1 )		; <i8>:1 [#uses=1]
+	store i8 %1, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:2 [#uses=1]
+	call i16 @llvm.atomic.swap.i16.p0i16( i16* %2, i16 1 )		; <i16>:3 [#uses=1]
+	store i16 %3, i16* @ss, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:4 [#uses=1]
+	call i16 @llvm.atomic.swap.i16.p0i16( i16* %4, i16 1 )		; <i16>:5 [#uses=1]
+	store i16 %5, i16* @us, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:6 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %6, i32 1 )		; <i32>:7 [#uses=1]
+	store i32 %7, i32* @si, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:8 [#uses=1]
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %8, i32 1 )		; <i32>:9 [#uses=1]
+	store i32 %9, i32* @ui, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:10 [#uses=1]
+	call i64 @llvm.atomic.swap.i64.p0i64( i64* %10, i64 1 )		; <i64>:11 [#uses=1]
+	store i64 %11, i64* @sl, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:12 [#uses=1]
+	call i64 @llvm.atomic.swap.i64.p0i64( i64* %12, i64 1 )		; <i64>:13 [#uses=1]
+	store i64 %13, i64* @ul, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:14 [#uses=1]
+	call i64 @llvm.atomic.swap.i64.p0i64( i64* %14, i64 1 )		; <i64>:15 [#uses=1]
+	store i64 %15, i64* @sll, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:16 [#uses=1]
+	call i64 @llvm.atomic.swap.i64.p0i64( i64* %16, i64 1 )		; <i64>:17 [#uses=1]
+	store i64 %17, i64* @ull, align 8
+	call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true, i1 false )
+	volatile store i8 0, i8* @sc, align 1
+	volatile store i8 0, i8* @uc, align 1
+	bitcast i8* bitcast (i16* @ss to i8*) to i16*		; <i16*>:18 [#uses=1]
+	volatile store i16 0, i16* %18, align 2
+	bitcast i8* bitcast (i16* @us to i8*) to i16*		; <i16*>:19 [#uses=1]
+	volatile store i16 0, i16* %19, align 2
+	bitcast i8* bitcast (i32* @si to i8*) to i32*		; <i32*>:20 [#uses=1]
+	volatile store i32 0, i32* %20, align 4
+	bitcast i8* bitcast (i32* @ui to i8*) to i32*		; <i32*>:21 [#uses=1]
+	volatile store i32 0, i32* %21, align 4
+	bitcast i8* bitcast (i64* @sl to i8*) to i64*		; <i64*>:22 [#uses=1]
+	volatile store i64 0, i64* %22, align 8
+	bitcast i8* bitcast (i64* @ul to i8*) to i64*		; <i64*>:23 [#uses=1]
+	volatile store i64 0, i64* %23, align 8
+	bitcast i8* bitcast (i64* @sll to i8*) to i64*		; <i64*>:24 [#uses=1]
+	volatile store i64 0, i64* %24, align 8
+	bitcast i8* bitcast (i64* @ull to i8*) to i64*		; <i64*>:25 [#uses=1]
+	volatile store i64 0, i64* %25, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8 @llvm.atomic.swap.i8.p0i8(i8*, i8) nounwind
+
+declare i16 @llvm.atomic.swap.i16.p0i16(i16*, i16) nounwind
+
+declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind
+
+declare i64 @llvm.atomic.swap.i64.p0i64(i64*, i64) nounwind
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind
diff --git a/final/test/CodeGen/X86/GC/alloc_loop.ll b/final/test/CodeGen/X86/GC/alloc_loop.ll
new file mode 100644
index 00000000000..fb78ba2cd10
--- /dev/null
+++ b/final/test/CodeGen/X86/GC/alloc_loop.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s
+
+
+declare i8* @llvm_gc_allocate(i32)
+declare void @llvm_gc_initialize(i32)
+
+declare void @llvm.gcroot(i8**, i8*)
+declare void @llvm.gcwrite(i8*, i8*, i8**)
+
+define i32 @main() gc "shadow-stack" {
+entry:
+	%A = alloca i8*
+	%B = alloca i8**
+
+	call void @llvm_gc_initialize(i32 1048576)  ; Start with 1MB heap
+
+        ;; void *A;
+	call void @llvm.gcroot(i8** %A, i8* null)
+
+        ;; A = gcalloc(10);
+	%Aptr = call i8* @llvm_gc_allocate(i32 10)
+	store i8* %Aptr, i8** %A
+
+        ;; void **B;
+	%tmp.1 = bitcast i8*** %B to i8**
+	call void @llvm.gcroot(i8** %tmp.1, i8* null)
+
+	;; B = gcalloc(4);
+	%B.upgrd.1 = call i8* @llvm_gc_allocate(i32 8)
+	%tmp.2 = bitcast i8* %B.upgrd.1 to i8**
+	store i8** %tmp.2, i8*** %B
+
+	;; *B = A;
+	%B.1 = load i8*** %B
+	%A.1 = load i8** %A
+	call void @llvm.gcwrite(i8* %A.1, i8* %B.upgrd.1, i8** %B.1)
+	
+	br label %AllocLoop
+
+AllocLoop:
+	%i = phi i32 [ 0, %entry ], [ %indvar.next, %AllocLoop ]
+        ;; Allocated mem: allocated memory is immediately dead.
+	call i8* @llvm_gc_allocate(i32 100)
+	
+	%indvar.next = add i32 %i, 1
+	%exitcond = icmp eq i32 %indvar.next, 10000000
+	br i1 %exitcond, label %Exit, label %AllocLoop
+
+Exit:
+	ret i32 0
+}
+
+declare void @__main()
diff --git a/final/test/CodeGen/X86/GC/argpromotion.ll b/final/test/CodeGen/X86/GC/argpromotion.ll
new file mode 100644
index 00000000000..c63ce222b86
--- /dev/null
+++ b/final/test/CodeGen/X86/GC/argpromotion.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -argpromotion
+
+declare void @llvm.gcroot(i8**, i8*)
+
+define i32 @g() {
+entry:
+	%var = alloca i32
+	store i32 1, i32* %var
+	%x = call i32 @f(i32* %var)
+	ret i32 %x
+}
+
+define internal i32 @f(i32* %xp) gc "example" {
+entry:
+	%var = alloca i8*
+	call void @llvm.gcroot(i8** %var, i8* null)
+	%x = load i32* %xp
+	ret i32 %x
+}
diff --git a/final/test/CodeGen/X86/GC/badreadproto.ll b/final/test/CodeGen/X86/GC/badreadproto.ll
new file mode 100644
index 00000000000..4fe90b90833
--- /dev/null
+++ b/final/test/CodeGen/X86/GC/badreadproto.ll
@@ -0,0 +1,13 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+	%list = type { i32, %list* }
+
+; This usage is invalid now; instead, objects must be bitcast to i8* for input
+; to the gc intrinsics.
+declare %list* @llvm.gcread(%list*, %list**)
+
+define %list* @tl(%list* %l) gc "example" {
+	%hd.ptr = getelementptr %list* %l, i32 0, i32 0
+	%hd = call %list* @llvm.gcread(%list* %l, %list** %hd.ptr)
+	ret i32 %tmp
+}
diff --git a/final/test/CodeGen/X86/GC/badrootproto.ll b/final/test/CodeGen/X86/GC/badrootproto.ll
new file mode 100644
index 00000000000..ff86d03c646
--- /dev/null
+++ b/final/test/CodeGen/X86/GC/badrootproto.ll
@@ -0,0 +1,13 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+	%list = type { i32, %list* }
+	%meta = type opaque
+
+; This usage is invalid now; instead, objects must be bitcast to i8* for input
+; to the gc intrinsics.
+declare void @llvm.gcroot(%list*, %meta*)
+
+define void @root() gc "example" {
+	%x.var = alloca i8*
+	call void @llvm.gcroot(i8** %x.var, %meta* null)
+}
diff --git a/final/test/CodeGen/X86/GC/badwriteproto.ll b/final/test/CodeGen/X86/GC/badwriteproto.ll
new file mode 100644
index 00000000000..be81f842672
--- /dev/null
+++ b/final/test/CodeGen/X86/GC/badwriteproto.ll
@@ -0,0 +1,22 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+	%list = type { i32, %list* }
+
+; This usage is invalid now; instead, objects must be bitcast to i8* for input
+; to the gc intrinsics.
+declare void @llvm.gcwrite(%list*, %list*, %list**)
+
+define %list* @cons(i32 %hd, %list* %tl) gc "example" {
+	%tmp = call i8* @gcalloc(i32 bitcast(%list* getelementptr(%list* null, i32 1) to i32))
+	%cell = bitcast i8* %tmp to %list*
+	
+	%hd.ptr = getelementptr %list* %cell, i32 0, i32 0
+	store i32 %hd, i32* %hd.ptr
+	
+	%tl.ptr = getelementptr %list* %cell, i32 0, i32 0
+	call void @llvm.gcwrite(%list* %tl, %list* %cell, %list** %tl.ptr)
+	
+	ret %cell.2
+}
+
+declare i8* @gcalloc(i32)
diff --git a/final/test/CodeGen/X86/GC/deadargelim.ll b/final/test/CodeGen/X86/GC/deadargelim.ll
new file mode 100644
index 00000000000..176019020ad
--- /dev/null
+++ b/final/test/CodeGen/X86/GC/deadargelim.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -deadargelim
+
+declare void @llvm.gcroot(i8**, i8*)
+
+define void @g() {
+entry:
+	call void @f(i32 0)
+	ret void
+}
+
+define internal void @f(i32 %unused) gc "example" {
+entry:
+	%var = alloca i8*
+	call void @llvm.gcroot(i8** %var, i8* null)
+	ret void
+}
diff --git a/final/test/CodeGen/X86/GC/dg.exp b/final/test/CodeGen/X86/GC/dg.exp
new file mode 100644
index 00000000000..629a1477361
--- /dev/null
+++ b/final/test/CodeGen/X86/GC/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/X86/GC/fat.ll b/final/test/CodeGen/X86/GC/fat.ll
new file mode 100644
index 00000000000..d05ca3da819
--- /dev/null
+++ b/final/test/CodeGen/X86/GC/fat.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+declare void @llvm.gcroot(i8**, i8*) nounwind
+
+define void @f() gc "x" {
+	%st = alloca { i8*, i1 }		; <{ i8*, i1 }*> [#uses=1]
+	%st_ptr = bitcast { i8*, i1 }* %st to i8**		; <i8**> [#uses=1]
+	call void @llvm.gcroot(i8** %st_ptr, i8* null)
+	ret void
+}
diff --git a/final/test/CodeGen/X86/GC/inline.ll b/final/test/CodeGen/X86/GC/inline.ll
new file mode 100644
index 00000000000..9da33aef8dd
--- /dev/null
+++ b/final/test/CodeGen/X86/GC/inline.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -inline -S | grep example
+
+	%IntArray = type { i32, [0 x i32*] }
+
+declare void @llvm.gcroot(i8**, i8*) nounwind 
+
+define i32 @f() {
+	%x = call i32 @g( )		; <i32> [#uses=1]
+	ret i32 %x
+}
+
+define internal i32 @g() gc "example" {
+	%root = alloca i8*		; <i8**> [#uses=2]
+	call void @llvm.gcroot( i8** %root, i8* null )
+	%obj = call %IntArray* @h( )		; <%IntArray*> [#uses=2]
+	%obj.2 = bitcast %IntArray* %obj to i8*		; <i8*> [#uses=1]
+	store i8* %obj.2, i8** %root
+	%Length.ptr = getelementptr %IntArray* %obj, i32 0, i32 0		; <i32*> [#uses=1]
+	%Length = load i32* %Length.ptr		; <i32> [#uses=1]
+	ret i32 %Length
+}
+
+declare %IntArray* @h()
diff --git a/final/test/CodeGen/X86/GC/inline2.ll b/final/test/CodeGen/X86/GC/inline2.ll
new file mode 100644
index 00000000000..15947056ee3
--- /dev/null
+++ b/final/test/CodeGen/X86/GC/inline2.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -inline -S | grep sample
+; RUN: opt < %s -inline -S | grep example
+
+	%IntArray = type { i32, [0 x i32*] }
+
+declare void @llvm.gcroot(i8**, i8*) nounwind 
+
+define i32 @f() gc "sample" {
+	%x = call i32 @g( )		; <i32> [#uses=1]
+	ret i32 %x
+}
+
+define internal i32 @g() gc "example" {
+	%root = alloca i8*		; <i8**> [#uses=2]
+	call void @llvm.gcroot( i8** %root, i8* null )
+	%obj = call %IntArray* @h( )		; <%IntArray*> [#uses=2]
+	%obj.2 = bitcast %IntArray* %obj to i8*		; <i8*> [#uses=1]
+	store i8* %obj.2, i8** %root
+	%Length.ptr = getelementptr %IntArray* %obj, i32 0, i32 0		; <i32*> [#uses=1]
+	%Length = load i32* %Length.ptr		; <i32> [#uses=1]
+	ret i32 %Length
+}
+
+declare %IntArray* @h()
diff --git a/final/test/CodeGen/X86/GC/lower_gcroot.ll b/final/test/CodeGen/X86/GC/lower_gcroot.ll
new file mode 100644
index 00000000000..c2d418ac50e
--- /dev/null
+++ b/final/test/CodeGen/X86/GC/lower_gcroot.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s
+
+	%Env = type i8*
+
+define void @.main(%Env) gc "shadow-stack" {
+	%Root = alloca %Env
+	call void @llvm.gcroot( %Env* %Root, %Env null )
+	unreachable
+}
+
+declare void @llvm.gcroot(%Env*, %Env)
diff --git a/final/test/CodeGen/X86/GC/outside.ll b/final/test/CodeGen/X86/GC/outside.ll
new file mode 100644
index 00000000000..2968c6917ce
--- /dev/null
+++ b/final/test/CodeGen/X86/GC/outside.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+declare void @llvm.gcroot(i8**, i8*)
+
+define void @f(i8* %x) {
+	%root = alloca i8*
+	call void @llvm.gcroot(i8** %root, i8* null)
+	store i8* %x, i8** %root
+	ret void
+}
diff --git a/final/test/CodeGen/X86/GC/simple_ocaml.ll b/final/test/CodeGen/X86/GC/simple_ocaml.ll
new file mode 100644
index 00000000000..f765dc029da
--- /dev/null
+++ b/final/test/CodeGen/X86/GC/simple_ocaml.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s | grep caml.*__frametable
+; RUN: llc < %s -march=x86 | grep {movl	.0}
+
+%struct.obj = type { i8*, %struct.obj* }
+
+define %struct.obj* @fun(%struct.obj* %head) gc "ocaml" {
+entry:
+	%gcroot.0 = alloca i8*
+	%gcroot.1 = alloca i8*
+	
+	call void @llvm.gcroot(i8** %gcroot.0, i8* null)
+	call void @llvm.gcroot(i8** %gcroot.1, i8* null)
+	
+	%local.0 = bitcast i8** %gcroot.0 to %struct.obj**
+	%local.1 = bitcast i8** %gcroot.1 to %struct.obj**
+
+	store %struct.obj* %head, %struct.obj** %local.0
+	br label %bb.loop
+bb.loop:
+	%t0 = load %struct.obj** %local.0
+	%t1 = getelementptr %struct.obj* %t0, i32 0, i32 1
+	%t2 = bitcast %struct.obj* %t0 to i8*
+	%t3 = bitcast %struct.obj** %t1 to i8**
+	%t4 = call i8* @llvm.gcread(i8* %t2, i8** %t3)
+	%t5 = bitcast i8* %t4 to %struct.obj*
+	%t6 = icmp eq %struct.obj* %t5, null
+	br i1 %t6, label %bb.loop, label %bb.end
+bb.end:
+	%t7 = malloc %struct.obj
+	store %struct.obj* %t7, %struct.obj** %local.1
+	%t8 = bitcast %struct.obj* %t7 to i8*
+	%t9 = load %struct.obj** %local.0
+	%t10 = getelementptr %struct.obj* %t9, i32 0, i32 1
+	%t11 = bitcast %struct.obj* %t9 to i8*
+	%t12 = bitcast %struct.obj** %t10 to i8**
+	call void @llvm.gcwrite(i8* %t8, i8* %t11, i8** %t12)
+	ret %struct.obj* %t7
+}
+
+declare void @llvm.gcroot(i8** %value, i8* %tag)
+declare void @llvm.gcwrite(i8* %value, i8* %obj, i8** %field)
+declare i8* @llvm.gcread(i8* %obj, i8** %field)
diff --git a/final/test/CodeGen/X86/MachineSink-CritEdge.ll b/final/test/CodeGen/X86/MachineSink-CritEdge.ll
new file mode 100644
index 00000000000..74a1049772a
--- /dev/null
+++ b/final/test/CodeGen/X86/MachineSink-CritEdge.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define i32 @f(i32 %x) nounwind ssp {
+entry:
+  %shl.i = shl i32 %x, 12
+  %neg.i = xor i32 %shl.i, -1
+  %add.i = add nsw i32 %neg.i, %x
+  %shr.i = ashr i32 %add.i, 22
+  %xor.i = xor i32 %shr.i, %add.i
+  %shl5.i = shl i32 %xor.i, 13
+  %neg6.i = xor i32 %shl5.i, -1
+  %add8.i = add nsw i32 %xor.i, %neg6.i
+  %shr10.i = ashr i32 %add8.i, 8
+  %xor12.i = xor i32 %shr10.i, %add8.i
+  %add16.i = mul i32 %xor12.i, 9
+  %shr18.i = ashr i32 %add16.i, 15
+  %xor20.i = xor i32 %shr18.i, %add16.i
+  %shl22.i = shl i32 %xor20.i, 27
+  %neg23.i = xor i32 %shl22.i, -1
+  %add25.i = add nsw i32 %xor20.i, %neg23.i
+  %shr27.i = ashr i32 %add25.i, 31
+  %rem = srem i32 %x, 7
+  %cmp = icmp eq i32 %rem, 3
+  br i1 %cmp, label %land.lhs.true, label %do.body.preheader
+
+land.lhs.true:
+  %call3 = tail call i32 @g(i32 %x) nounwind
+  %cmp4 = icmp eq i32 %call3, 10
+  br i1 %cmp4, label %do.body.preheader, label %if.then
+
+; %shl.i should be sinked all the way down to do.body.preheader, but not into the loop.
+; CHECK: do.body.preheader
+; CHECK-NOT: do.body
+; CHECK: shll	$12
+
+do.body.preheader:
+  %xor29.i = xor i32 %shr27.i, %add25.i
+  br label %do.body
+
+if.then:
+  %add = add nsw i32 %x, 11
+  ret i32 %add
+
+do.body:
+  %x.addr.1 = phi i32 [ %add9, %do.body ], [ %x, %do.body.preheader ]
+  %xor = xor i32 %xor29.i, %x.addr.1
+  %add9 = add nsw i32 %xor, %x.addr.1
+  %and = and i32 %add9, 13
+  %tobool = icmp eq i32 %and, 0
+  br i1 %tobool, label %if.end, label %do.body
+
+if.end:
+  ret i32 %add9
+}
+
+declare i32 @g(i32)
diff --git a/final/test/CodeGen/X86/MachineSink-PHIUse.ll b/final/test/CodeGen/X86/MachineSink-PHIUse.ll
new file mode 100644
index 00000000000..728e3773601
--- /dev/null
+++ b/final/test/CodeGen/X86/MachineSink-PHIUse.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=x86_64-appel-darwin -stats |& grep {machine-sink}
+
+define fastcc void @t() nounwind ssp {
+entry:
+  br i1 undef, label %bb, label %bb4
+
+bb:                                               ; preds = %entry
+  br i1 undef, label %return, label %bb3
+
+bb3:                                              ; preds = %bb
+  unreachable
+
+bb4:                                              ; preds = %entry
+  br i1 undef, label %bb.nph, label %return
+
+bb.nph:                                           ; preds = %bb4
+  br label %bb5
+
+bb5:                                              ; preds = %bb9, %bb.nph
+  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp12, %bb9 ] ; <i64> [#uses=1]
+  %tmp12 = add i64 %indvar, 1                     ; <i64> [#uses=2]
+  %tmp13 = trunc i64 %tmp12 to i32                ; <i32> [#uses=0]
+  br i1 undef, label %bb9, label %bb6
+
+bb6:                                              ; preds = %bb5
+  br i1 undef, label %bb9, label %bb7
+
+bb7:                                              ; preds = %bb6
+  br i1 undef, label %bb9, label %bb8
+
+bb8:                                              ; preds = %bb7
+  unreachable
+
+bb9:                                              ; preds = %bb7, %bb6, %bb5
+  br i1 undef, label %bb5, label %return
+
+return:                                           ; preds = %bb9, %bb4, %bb
+  ret void
+}
diff --git a/final/test/CodeGen/X86/SIMD/dg.exp b/final/test/CodeGen/X86/SIMD/dg.exp
new file mode 100644
index 00000000000..629a1477361
--- /dev/null
+++ b/final/test/CodeGen/X86/SIMD/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/X86/SIMD/notvunpcklpd.ll b/final/test/CodeGen/X86/SIMD/notvunpcklpd.ll
new file mode 100644
index 00000000000..3afc2f2305a
--- /dev/null
+++ b/final/test/CodeGen/X86/SIMD/notvunpcklpd.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mattr=+avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @try_([2 x <4 x double>]* noalias %incarray, [2 x <4 x double>]* noalias %incarrayb ) {
+entry:
+	%incarray1 = alloca [2 x <4 x double>]*, align 8
+	%incarrayb1 = alloca [2 x <4 x double>]*, align 8
+	%carray = alloca [2 x <4 x double>], align 16
+	%r = getelementptr [2 x <4 x double>]* %incarray, i32 0, i32 0
+	%rb = getelementptr [2 x <4 x double>]* %incarrayb, i32 0, i32 0
+	%r3 = load <4 x double>* %r, align 8
+	%r4 = load <4 x double>* %rb, align 8
+	%r11 = shufflevector <4 x double> %r3, <4 x double> %r4, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x double>> [#uses=1]
+; CHECK-NOT: vunpcklpd
+	%r12 = getelementptr [2 x <4 x double>]* %carray, i32 0, i32 1
+	store <4 x double> %r11, <4 x double>* %r12, align 4
+	ret void
+}
diff --git a/final/test/CodeGen/X86/SIMD/notvunpcklps.ll b/final/test/CodeGen/X86/SIMD/notvunpcklps.ll
new file mode 100644
index 00000000000..19daa3e7d50
--- /dev/null
+++ b/final/test/CodeGen/X86/SIMD/notvunpcklps.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mattr=+avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @try_([2 x <8 x float>]* noalias %incarray, [2 x <8 x float>]* noalias %incarrayb ) {
+enmtry:
+	%incarray1 = alloca [2 x <8 x float>]*, align 8
+	%incarrayb1 = alloca [2 x <8 x float>]*, align 8
+	%carray = alloca [2 x <8 x float>], align 16
+	%r = getelementptr [2 x <8 x float>]* %incarray, i32 0, i32 0
+	%rb = getelementptr [2 x <8 x float>]* %incarrayb, i32 0, i32 0
+	%r3 = load <8 x float>* %r, align 8
+	%r4 = load <8 x float>* %rb, align 8
+	%r8 = shufflevector <8 x float> %r3, <8 x float> %r4, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x float>> [#uses=1]
+; CHECK-NOT: vunpcklps
+	%r9 = getelementptr [2 x <8 x float>]* %carray, i32 0, i32 0
+	store <8 x float> %r8, <8 x float>* %r9, align 4
+	ret void
+}
diff --git a/final/test/CodeGen/X86/SIMD/vunpcklpd.ll b/final/test/CodeGen/X86/SIMD/vunpcklpd.ll
new file mode 100644
index 00000000000..60d23a4f678
--- /dev/null
+++ b/final/test/CodeGen/X86/SIMD/vunpcklpd.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mattr=+avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @try_([2 x <4 x double>]* noalias %incarray, [2 x <4 x double>]* noalias %incarrayb ) {
+entry:
+	%incarray1 = alloca [2 x <4 x double>]*, align 8
+	%incarrayb1 = alloca [2 x <4 x double>]*, align 8
+	%carray = alloca [2 x <4 x double>], align 16
+	%r = getelementptr [2 x <4 x double>]* %incarray, i32 0, i32 0
+	%rb = getelementptr [2 x <4 x double>]* %incarrayb, i32 0, i32 0
+	%r3 = load <4 x double>* %r, align 8
+	%r4 = load <4 x double>* %rb, align 8
+	%r11 = shufflevector <4 x double> %r3, <4 x double> %r4, <4 x i32> < i32 0, i32 4, i32 2, i32 6 >		; <<4 x double>> [#uses=1]
+; CHECK: vunpcklpd
+	%r12 = getelementptr [2 x <4 x double>]* %carray, i32 0, i32 1
+	store <4 x double> %r11, <4 x double>* %r12, align 4
+	ret void
+}
diff --git a/final/test/CodeGen/X86/SIMD/vunpcklps.ll b/final/test/CodeGen/X86/SIMD/vunpcklps.ll
new file mode 100644
index 00000000000..a87b29965ea
--- /dev/null
+++ b/final/test/CodeGen/X86/SIMD/vunpcklps.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mattr=+avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @try_([2 x <8 x float>]* noalias %incarray, [2 x <8 x float>]* noalias %incarrayb ) {
+entry:
+	%incarray1 = alloca [2 x <8 x float>]*, align 8
+	%incarrayb1 = alloca [2 x <8 x float>]*, align 8
+	%carray = alloca [2 x <8 x float>], align 16
+	%r = getelementptr [2 x <8 x float>]* %incarray, i32 0, i32 0
+	%rb = getelementptr [2 x <8 x float>]* %incarrayb, i32 0, i32 0
+	%r3 = load <8 x float>* %r, align 8
+	%r4 = load <8 x float>* %rb, align 8
+	%r11 = shufflevector <8 x float> %r3, <8 x float> %r4, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13 >		; <<8 x float>> [#uses=1]
+; CHECK: vunpcklps
+	%r12 = getelementptr [2 x <8 x float>]* %carray, i32 0, i32 1
+	store <8 x float> %r11, <8 x float>* %r12, align 4
+	ret void
+}
diff --git a/final/test/CodeGen/X86/SwitchLowering.ll b/final/test/CodeGen/X86/SwitchLowering.ll
new file mode 100644
index 00000000000..29a0e82bf59
--- /dev/null
+++ b/final/test/CodeGen/X86/SwitchLowering.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 | grep cmp | count 1
+; PR964
+
+define i8* @FindChar(i8* %CurPtr) {
+entry:
+        br label %bb
+
+bb:             ; preds = %bb, %entry
+        %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]          ; <i32> [#uses=3]
+        %CurPtr_addr.0.rec = bitcast i32 %indvar to i32         ; <i32> [#uses=1]
+        %gep.upgrd.1 = zext i32 %indvar to i64          ; <i64> [#uses=1]
+        %CurPtr_addr.0 = getelementptr i8* %CurPtr, i64 %gep.upgrd.1            ; <i8*> [#uses=1]
+        %tmp = load i8* %CurPtr_addr.0          ; <i8> [#uses=3]
+        %tmp2.rec = add i32 %CurPtr_addr.0.rec, 1               ; <i32> [#uses=1]
+        %tmp2 = getelementptr i8* %CurPtr, i32 %tmp2.rec                ; <i8*> [#uses=1]
+        %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
+        switch i8 %tmp, label %bb [
+                 i8 0, label %bb7
+                 i8 120, label %bb7
+        ]
+
+bb7:            ; preds = %bb, %bb
+        tail call void @foo( i8 %tmp )
+        ret i8* %tmp2
+}
+
+declare void @foo(i8)
+
diff --git a/final/test/CodeGen/X86/abi-isel.ll b/final/test/CodeGen/X86/abi-isel.ll
new file mode 100644
index 00000000000..5b4d79fa22b
--- /dev/null
+++ b/final/test/CodeGen/X86/abi-isel.ll
@@ -0,0 +1,9660 @@
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
+
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
+; RUN: llc < %s -asm-verbose=0 -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
+
+@src = external global [131072 x i32]
+@dst = external global [131072 x i32]
+@xsrc = external global [32 x i32]
+@xdst = external global [32 x i32]
+@ptr = external global i32*
+@dsrc = global [131072 x i32] zeroinitializer, align 32
+@ddst = global [131072 x i32] zeroinitializer, align 32
+@dptr = global i32* null
+@lsrc = internal global [131072 x i32] zeroinitializer
+@ldst = internal global [131072 x i32] zeroinitializer
+@lptr = internal global i32* null
+@ifunc = external global void ()*
+@difunc = global void ()* null
+@lifunc = internal global void ()* null
+@lxsrc = internal global [32 x i32] zeroinitializer, align 32
+@lxdst = internal global [32 x i32] zeroinitializer, align 32
+@dxsrc = global [32 x i32] zeroinitializer, align 32
+@dxdst = global [32 x i32] zeroinitializer, align 32
+
+define void @foo00() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 0), align 4
+	ret void
+
+; LINUX-64-STATIC: foo00:
+; LINUX-64-STATIC: movl	src(%rip), %eax
+; LINUX-64-STATIC: movl	%eax, dst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo00:
+; LINUX-32-STATIC: 	movl	src, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo00:
+; LINUX-32-PIC: 	movl	src, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo00:
+; DARWIN-32-STATIC: 	movl	_src, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo00:
+; DARWIN-32-PIC: 	calll	L0$pb
+; DARWIN-32-PIC-NEXT: L0$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L0$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L0$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @fxo00() nounwind {
+entry:
+	%0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
+	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 0), align 4
+	ret void
+
+; LINUX-64-STATIC: fxo00:
+; LINUX-64-STATIC: movl	xsrc(%rip), %eax
+; LINUX-64-STATIC: movl	%eax, xdst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo00:
+; LINUX-32-STATIC: 	movl	xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, xdst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: fxo00:
+; LINUX-32-PIC: 	movl	xsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, xdst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo00:
+; DARWIN-32-STATIC: 	movl	_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _xdst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo00:
+; DARWIN-32-PIC: 	calll	L1$pb
+; DARWIN-32-PIC-NEXT: L1$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L1$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L1$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo01() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i32 0), i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: foo01:
+; LINUX-64-STATIC: movq	$dst, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo01:
+; LINUX-32-STATIC: 	movl	$dst, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo01:
+; LINUX-32-PIC: 	movl	$dst, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo01:
+; DARWIN-32-STATIC: 	movl	$_dst, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo01:
+; DARWIN-32-PIC: 	calll	L2$pb
+; DARWIN-32-PIC-NEXT: L2$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L2$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L2$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @fxo01() nounwind {
+entry:
+	store i32* getelementptr ([32 x i32]* @xdst, i32 0, i32 0), i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: fxo01:
+; LINUX-64-STATIC: movq	$xdst, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo01:
+; LINUX-32-STATIC: 	movl	$xdst, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: fxo01:
+; LINUX-32-PIC: 	movl	$xdst, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo01:
+; DARWIN-32-STATIC: 	movl	$_xdst, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo01:
+; DARWIN-32-PIC: 	calll	L3$pb
+; DARWIN-32-PIC-NEXT: L3$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L3$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L3$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 0), align 4
+	store i32 %1, i32* %0, align 4
+	ret void
+; LINUX-64-STATIC: foo02:
+; LINUX-64-STATIC: movl    src(%rip), %
+; LINUX-64-STATIC: movq    ptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo02:
+; LINUX-32-STATIC: 	movl	src, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo02:
+; LINUX-32-PIC: 	movl	src, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo02:
+; DARWIN-32-STATIC: 	movl	_src, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo02:
+; DARWIN-32-PIC: 	calll	L4$pb
+; DARWIN-32-PIC-NEXT: L4$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L4$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L4$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @fxo02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 0), align 4
+	store i32 %1, i32* %0, align 4
+; LINUX-64-STATIC: fxo02:
+; LINUX-64-STATIC: movl    xsrc(%rip), %
+; LINUX-64-STATIC: movq    ptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: fxo02:
+; LINUX-32-STATIC: 	movl	xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+	ret void
+
+; LINUX-32-PIC: fxo02:
+; LINUX-32-PIC: 	movl	xsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: fxo02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _fxo02:
+; DARWIN-32-STATIC: 	movl	_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _fxo02:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _fxo02:
+; DARWIN-32-PIC: 	calll	L5$pb
+; DARWIN-32-PIC-NEXT: L5$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L5$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L5$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _fxo02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _fxo02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _fxo02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo03() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 0), align 32
+	ret void
+; LINUX-64-STATIC: foo03:
+; LINUX-64-STATIC: movl    dsrc(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ddst
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo03:
+; LINUX-32-STATIC: 	movl	dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo03:
+; LINUX-32-PIC: 	movl	dsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ddst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo03:
+; DARWIN-32-STATIC: 	movl	_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo03:
+; DARWIN-32-PIC: 	calll	L6$pb
+; DARWIN-32-PIC-NEXT: L6$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L6$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _ddst-L6$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo03:
+; DARWIN-64-STATIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo03:
+; DARWIN-64-PIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo04() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i32 0), i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: foo04:
+; LINUX-64-STATIC: movq    $ddst, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo04:
+; LINUX-32-STATIC: 	movl	$ddst, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo04:
+; LINUX-32-PIC: 	movl	$ddst, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo04:
+; DARWIN-32-STATIC: 	movl	$_ddst, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo04:
+; DARWIN-32-PIC: 	calll	L7$pb
+; DARWIN-32-PIC-NEXT: L7$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L7$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L7$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo05() nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 0), align 32
+	store i32 %1, i32* %0, align 4
+	ret void
+; LINUX-64-STATIC: foo05:
+; LINUX-64-STATIC: movl    dsrc(%rip), %
+; LINUX-64-STATIC: movq    dptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo05:
+; LINUX-32-STATIC: 	movl	dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo05:
+; LINUX-32-PIC: 	movl	dsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo05:
+; DARWIN-32-STATIC: 	movl	_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo05:
+; DARWIN-32-PIC: 	calll	L8$pb
+; DARWIN-32-PIC-NEXT: L8$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L8$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L8$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo05:
+; DARWIN-64-STATIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo05:
+; DARWIN-64-PIC: 	movl	_dsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo06() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 0), align 4
+	ret void
+; LINUX-64-STATIC: foo06:
+; LINUX-64-STATIC: movl    lsrc(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ldst(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo06:
+; LINUX-32-STATIC: 	movl	lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo06:
+; LINUX-32-PIC: 	movl	lsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ldst
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo06:
+; LINUX-64-PIC: 	movl	lsrc(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movl	%eax, ldst(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo06:
+; DARWIN-32-STATIC: 	movl	_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo06:
+; DARWIN-32-PIC: 	calll	L9$pb
+; DARWIN-32-PIC-NEXT: L9$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L9$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _ldst-L9$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo06:
+; DARWIN-64-STATIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo06:
+; DARWIN-64-PIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo07() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i32 0), i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: foo07:
+; LINUX-64-STATIC: movq    $ldst, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo07:
+; LINUX-32-STATIC: 	movl	$ldst, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo07:
+; LINUX-32-PIC: 	movl	$ldst, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo07:
+; DARWIN-32-STATIC: 	movl	$_ldst, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo07:
+; DARWIN-32-PIC: 	calll	L10$pb
+; DARWIN-32-PIC-NEXT: L10$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L10$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L10$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @foo08() nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 0), align 4
+	store i32 %1, i32* %0, align 4
+	ret void
+; LINUX-64-STATIC: foo08:
+; LINUX-64-STATIC: movl    lsrc(%rip), %
+; LINUX-64-STATIC: movq    lptr(%rip), %
+; LINUX-64-STATIC: movl
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: foo08:
+; LINUX-32-STATIC: 	movl	lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: foo08:
+; LINUX-32-PIC: 	movl	lsrc, %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, (%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: foo08:
+; LINUX-64-PIC: 	movl	lsrc(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _foo08:
+; DARWIN-32-STATIC: 	movl	_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _foo08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _foo08:
+; DARWIN-32-PIC: 	calll	L11$pb
+; DARWIN-32-PIC-NEXT: L11$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L11$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L11$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _foo08:
+; DARWIN-64-STATIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _foo08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _foo08:
+; DARWIN-64-PIC: 	movl	_lsrc(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux00() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), align 4
+	ret void
+; LINUX-64-STATIC: qux00:
+; LINUX-64-STATIC: movl    src+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, dst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux00:
+; LINUX-32-STATIC: 	movl	src+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux00:
+; LINUX-32-PIC: 	movl	src+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux00:
+; DARWIN-32-STATIC: 	movl	_src+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux00:
+; DARWIN-32-PIC: 	calll	L12$pb
+; DARWIN-32-PIC-NEXT: L12$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L12$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L12$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qxx00() nounwind {
+entry:
+	%0 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
+	store i32 %0, i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), align 4
+	ret void
+; LINUX-64-STATIC: qxx00:
+; LINUX-64-STATIC: movl    xsrc+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, xdst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx00:
+; LINUX-32-STATIC: 	movl	xsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, xdst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qxx00:
+; LINUX-32-PIC: 	movl	xsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, xdst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx00:
+; DARWIN-32-STATIC: 	movl	_xsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _xdst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx00:
+; DARWIN-32-PIC: 	calll	L13$pb
+; DARWIN-32-PIC-NEXT: L13$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L13$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L13$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux01() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16), i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: qux01:
+; LINUX-64-STATIC: movq    $dst+64, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux01:
+; LINUX-32-STATIC: 	movl	$dst+64, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux01:
+; LINUX-32-PIC: 	movl	$dst+64, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux01:
+; DARWIN-32-STATIC: 	movl	$_dst+64, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux01:
+; DARWIN-32-PIC: 	calll	L14$pb
+; DARWIN-32-PIC-NEXT: L14$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L14$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	addl	$64, %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L14$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qxx01() nounwind {
+entry:
+	store i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16), i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: qxx01:
+; LINUX-64-STATIC: movq    $xdst+64, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx01:
+; LINUX-32-STATIC: 	movl	$xdst+64, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qxx01:
+; LINUX-32-PIC: 	movl	$xdst+64, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx01:
+; DARWIN-32-STATIC: 	movl	$_xdst+64, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx01:
+; DARWIN-32-PIC: 	calll	L15$pb
+; DARWIN-32-PIC-NEXT: L15$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L15$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	addl	$64, %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L15$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16), align 4
+	%2 = getelementptr i32* %0, i64 16
+	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux02:
+; LINUX-64-STATIC: movl    src+64(%rip), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux02:
+; LINUX-32-STATIC: 	movl	src+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+	ret void
+
+; LINUX-32-PIC: qux02:
+; LINUX-32-PIC: 	movl	src+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux02:
+; DARWIN-32-STATIC: 	movl	_src+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux02:
+; DARWIN-32-PIC: 	calll	L16$pb
+; DARWIN-32-PIC-NEXT: L16$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L16$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L16$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qxx02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = load i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16), align 4
+	%2 = getelementptr i32* %0, i64 16
+	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qxx02:
+; LINUX-64-STATIC: movl    xsrc+64(%rip), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qxx02:
+; LINUX-32-STATIC: 	movl	xsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+	ret void
+
+; LINUX-32-PIC: qxx02:
+; LINUX-32-PIC: 	movl	xsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qxx02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qxx02:
+; DARWIN-32-STATIC: 	movl	_xsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qxx02:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qxx02:
+; DARWIN-32-PIC: 	calll	L17$pb
+; DARWIN-32-PIC-NEXT: L17$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L17$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	64(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L17$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qxx02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qxx02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qxx02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux03() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), align 32
+	ret void
+; LINUX-64-STATIC: qux03:
+; LINUX-64-STATIC: movl    dsrc+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+64(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux03:
+; LINUX-32-STATIC: 	movl	dsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux03:
+; LINUX-32-PIC: 	movl	dsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ddst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux03:
+; DARWIN-32-STATIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst+64
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux03:
+; DARWIN-32-PIC: 	calll	L18$pb
+; DARWIN-32-PIC-NEXT: L18$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L18$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ddst-L18$pb)+64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux03:
+; DARWIN-64-STATIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux03:
+; DARWIN-64-PIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst+64(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux04() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16), i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: qux04:
+; LINUX-64-STATIC: movq    $ddst+64, dptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux04:
+; LINUX-32-STATIC: 	movl	$ddst+64, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux04:
+; LINUX-32-PIC: 	movl	$ddst+64, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux04:
+; DARWIN-32-STATIC: 	movl	$_ddst+64, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+64, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux04:
+; DARWIN-32-PIC: 	calll	L19$pb
+; DARWIN-32-PIC-NEXT: L19$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L19$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L19$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux04:
+; DARWIN-64-STATIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux04:
+; DARWIN-64-PIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux05() nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16), align 32
+	%2 = getelementptr i32* %0, i64 16
+	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux05:
+; LINUX-64-STATIC: movl    dsrc+64(%rip), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux05:
+; LINUX-32-STATIC: 	movl	dsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+	ret void
+
+; LINUX-32-PIC: qux05:
+; LINUX-32-PIC: 	movl	dsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux05:
+; DARWIN-32-STATIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux05:
+; DARWIN-32-PIC: 	calll	L20$pb
+; DARWIN-32-PIC-NEXT: L20$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L20$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L20$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux05:
+; DARWIN-64-STATIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux05:
+; DARWIN-64-PIC: 	movl	_dsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux06() nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), align 4
+	ret void
+; LINUX-64-STATIC: qux06:
+; LINUX-64-STATIC: movl    lsrc+64(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+64
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux06:
+; LINUX-32-STATIC: 	movl	lsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst+64
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux06:
+; LINUX-32-PIC: 	movl	lsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ldst+64
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux06:
+; LINUX-64-PIC: 	movl	lsrc+64(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movl	%eax, ldst+64(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux06:
+; DARWIN-32-STATIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst+64
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst+64
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux06:
+; DARWIN-32-PIC: 	calll	L21$pb
+; DARWIN-32-PIC-NEXT: L21$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L21$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ldst-L21$pb)+64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux06:
+; DARWIN-64-STATIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux06:
+; DARWIN-64-PIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst+64(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux07() nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16), i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: qux07:
+; LINUX-64-STATIC: movq    $ldst+64, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux07:
+; LINUX-32-STATIC: 	movl	$ldst+64, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: qux07:
+; LINUX-32-PIC: 	movl	$ldst+64, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux07:
+; LINUX-64-PIC: 	leaq	ldst+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux07:
+; DARWIN-32-STATIC: 	movl	$_ldst+64, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+64, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux07:
+; DARWIN-32-PIC: 	calll	L22$pb
+; DARWIN-32-PIC-NEXT: L22$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L22$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L22$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux07:
+; DARWIN-64-STATIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux07:
+; DARWIN-64-PIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @qux08() nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16), align 4
+	%2 = getelementptr i32* %0, i64 16
+	store i32 %1, i32* %2, align 4
+; LINUX-64-STATIC: qux08:
+; LINUX-64-STATIC: movl    lsrc+64(%rip), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: qux08:
+; LINUX-32-STATIC: 	movl	lsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+	ret void
+
+; LINUX-32-PIC: qux08:
+; LINUX-32-PIC: 	movl	lsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 64(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: qux08:
+; LINUX-64-PIC: 	movl	lsrc+64(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _qux08:
+; DARWIN-32-STATIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _qux08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 64(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _qux08:
+; DARWIN-32-PIC: 	calll	L23$pb
+; DARWIN-32-PIC-NEXT: L23$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L23$pb)+64(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L23$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 64(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _qux08:
+; DARWIN-64-STATIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _qux08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _qux08:
+; DARWIN-64-PIC: 	movl	_lsrc+64(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind00(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @src, i64 0, i64 %i
+	%1 = load i32* %0, align 4
+	%2 = getelementptr [131072 x i32]* @dst, i64 0, i64 %i
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: ind00:
+; LINUX-64-STATIC: movl    src(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, dst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, dst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind00:
+; DARWIN-32-PIC: 	calll	L24$pb
+; DARWIN-32-PIC-NEXT: L24$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L24$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L24$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ixd00(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %i
+	%1 = load i32* %0, align 4
+	%2 = getelementptr [32 x i32]* @xdst, i64 0, i64 %i
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: ixd00:
+; LINUX-64-STATIC: movl    xsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, xdst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, xdst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, xdst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _xdst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd00:
+; DARWIN-32-PIC: 	calll	L25$pb
+; DARWIN-32-PIC-NEXT: L25$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L25$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L25$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind01(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %i
+	store i32* %0, i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: ind01:
+; LINUX-64-STATIC: leaq    dst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind01:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	shll	$2, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind01:
+; DARWIN-32-PIC: 	calll	L26$pb
+; DARWIN-32-PIC-NEXT: L26$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	shll	$2, %ecx
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L26$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L26$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind01:
+; DARWIN-64-STATIC: 	shlq	$2, %rdi
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind01:
+; DARWIN-64-DYNAMIC: 	shlq	$2, %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind01:
+; DARWIN-64-PIC: 	shlq	$2, %rdi
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rdi
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ixd01(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [32 x i32]* @xdst, i64 0, i64 %i
+	store i32* %0, i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: ixd01:
+; LINUX-64-STATIC: leaq    xdst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd01:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	xdst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	shll	$2, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd01:
+; DARWIN-32-PIC: 	calll	L27$pb
+; DARWIN-32-PIC-NEXT: L27$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	shll	$2, %ecx
+; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L27$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L27$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd01:
+; DARWIN-64-STATIC: 	shlq	$2, %rdi
+; DARWIN-64-STATIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd01:
+; DARWIN-64-DYNAMIC: 	shlq	$2, %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd01:
+; DARWIN-64-PIC: 	shlq	$2, %rdi
+; DARWIN-64-PIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rdi
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %i
+	%2 = load i32* %1, align 4
+	%3 = getelementptr i32* %0, i64 %i
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: ind02:
+; LINUX-64-STATIC: movl    src(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind02:
+; DARWIN-32-PIC: 	calll	L28$pb
+; DARWIN-32-PIC-NEXT: L28$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L28$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L28$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ixd02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %i
+	%2 = load i32* %1, align 4
+	%3 = getelementptr i32* %0, i64 %i
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: ixd02:
+; LINUX-64-STATIC: movl    xsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ixd02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ixd02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ixd02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ixd02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ixd02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ixd02:
+; DARWIN-32-PIC: 	calll	L29$pb
+; DARWIN-32-PIC-NEXT: L29$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L29$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L29$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ixd02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ixd02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ixd02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind03(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %i
+	%1 = load i32* %0, align 4
+	%2 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %i
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: ind03:
+; LINUX-64-STATIC: movl    dsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ddst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind03:
+; DARWIN-32-PIC: 	calll	L30$pb
+; DARWIN-32-PIC-NEXT: L30$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L30$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, _ddst-L30$pb(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind04(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %i
+	store i32* %0, i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: ind04:
+; LINUX-64-STATIC: leaq    ddst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind04:
+; LINUX-64-PIC: 	shlq	$2, %rdi
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rdi
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rdi, (%rax)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind04:
+; DARWIN-32-PIC: 	calll	L31$pb
+; DARWIN-32-PIC-NEXT: L31$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L31$pb(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L31$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %i
+	%2 = load i32* %1, align 4
+	%3 = getelementptr i32* %0, i64 %i
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: ind05:
+; LINUX-64-STATIC: movl    dsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind05:
+; DARWIN-32-PIC: 	calll	L32$pb
+; DARWIN-32-PIC-NEXT: L32$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dsrc-L32$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L32$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind06(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %i
+	%1 = load i32* %0, align 4
+	%2 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %i
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: ind06:
+; LINUX-64-STATIC: movl    lsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ldst(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind06:
+; DARWIN-32-PIC: 	calll	L33$pb
+; DARWIN-32-PIC-NEXT: L33$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L33$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, _ldst-L33$pb(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind07(i64 %i) nounwind {
+entry:
+	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %i
+	store i32* %0, i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: ind07:
+; LINUX-64-STATIC: leaq    ldst(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind07:
+; DARWIN-32-PIC: 	calll	L34$pb
+; DARWIN-32-PIC-NEXT: L34$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L34$pb(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L34$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ind08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %i
+	%2 = load i32* %1, align 4
+	%3 = getelementptr i32* %0, i64 %i
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: ind08:
+; LINUX-64-STATIC: movl    lsrc(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, (%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ind08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ind08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ind08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ind08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ind08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, (%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ind08:
+; DARWIN-32-PIC: 	calll	L35$pb
+; DARWIN-32-PIC-NEXT: L35$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lsrc-L35$pb(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L35$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ind08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ind08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ind08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, (%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: off00:
+; LINUX-64-STATIC: movl    src+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, dst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, dst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off00:
+; DARWIN-32-PIC: 	calll	L36$pb
+; DARWIN-32-PIC-NEXT: L36$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L36$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L36$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @oxf00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: oxf00:
+; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, xdst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, xdst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, xdst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	xdst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _xdst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf00:
+; DARWIN-32-PIC: 	calll	L37$pb
+; DARWIN-32-PIC-NEXT: L37$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L37$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L37$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_xdst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off01(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 16
+	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
+	store i32* %0, i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: off01:
+; LINUX-64-STATIC: leaq    dst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off01:
+; DARWIN-32-PIC: 	calll	L38$pb
+; DARWIN-32-PIC-NEXT: L38$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L38$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	leal	64(%edx,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L38$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @oxf01(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 16
+	%0 = getelementptr [32 x i32]* @xdst, i64 0, i64 %.sum
+	store i32* %0, i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: oxf01:
+; LINUX-64-STATIC: leaq    xdst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf01:
+; DARWIN-32-PIC: 	calll	L39$pb
+; DARWIN-32-PIC-NEXT: L39$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L39$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	leal	64(%edx,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L39$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr [131072 x i32]* @src, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: off02:
+; LINUX-64-STATIC: movl    src+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off02:
+; DARWIN-32-PIC: 	calll	L40$pb
+; DARWIN-32-PIC-NEXT: L40$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L40$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L40$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @oxf02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: oxf02:
+; LINUX-64-STATIC: movl    xsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: oxf02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: oxf02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	xsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: oxf02:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _oxf02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_xsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _oxf02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	64(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _oxf02:
+; DARWIN-32-PIC: 	calll	L41$pb
+; DARWIN-32-PIC-NEXT: L41$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L41$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	64(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L41$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _oxf02:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _oxf02:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _oxf02:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off03(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: off03:
+; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst+64(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off03:
+; DARWIN-32-PIC: 	calll	L42$pb
+; DARWIN-32-PIC-NEXT: L42$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L42$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ddst-L42$pb)+64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off04(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 16
+	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %.sum
+	store i32* %0, i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: off04:
+; LINUX-64-STATIC: leaq    ddst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off04:
+; DARWIN-32-PIC: 	calll	L43$pb
+; DARWIN-32-PIC-NEXT: L43$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L43$pb)+64(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L43$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: off05:
+; LINUX-64-STATIC: movl    dsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off05:
+; DARWIN-32-PIC: 	calll	L44$pb
+; DARWIN-32-PIC-NEXT: L44$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L44$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L44$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off06(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: off06:
+; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+64(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst+64(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst+64(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst+64(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst+64(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off06:
+; DARWIN-32-PIC: 	calll	L45$pb
+; DARWIN-32-PIC-NEXT: L45$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L45$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ldst-L45$pb)+64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off07(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 16
+	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %.sum
+	store i32* %0, i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: off07:
+; LINUX-64-STATIC: leaq    ldst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off07:
+; DARWIN-32-PIC: 	calll	L46$pb
+; DARWIN-32-PIC-NEXT: L46$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L46$pb)+64(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L46$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @off08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: off08:
+; LINUX-64-STATIC: movl    lsrc+64(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 64(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: off08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: off08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+64(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: off08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _off08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _off08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+64(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 64(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _off08:
+; DARWIN-32-PIC: 	calll	L47$pb
+; DARWIN-32-PIC-NEXT: L47$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L47$pb)+64(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L47$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 64(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _off08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _off08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _off08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	64(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 64(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo00(i64 %i) nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), align 4
+	ret void
+; LINUX-64-STATIC: moo00:
+; LINUX-64-STATIC: movl    src+262144(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, dst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo00:
+; LINUX-32-STATIC: 	movl	src+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo00:
+; LINUX-32-PIC: 	movl	src+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo00:
+; DARWIN-32-STATIC: 	movl	_src+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo00:
+; DARWIN-32-PIC: 	calll	L48$pb
+; DARWIN-32-PIC-NEXT: L48$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L48$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L48$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo01(i64 %i) nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536), i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: moo01:
+; LINUX-64-STATIC: movq    $dst+262144, ptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo01:
+; LINUX-32-STATIC: 	movl	$dst+262144, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo01:
+; LINUX-32-PIC: 	movl	$dst+262144, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo01:
+; DARWIN-32-STATIC: 	movl	$_dst+262144, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo01:
+; DARWIN-32-PIC: 	calll	L49$pb
+; DARWIN-32-PIC-NEXT: L49$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %ecx
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L49$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L49$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536), align 4
+	%2 = getelementptr i32* %0, i64 65536
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: moo02:
+; LINUX-64-STATIC: movl    src+262144(%rip), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo02:
+; LINUX-32-STATIC: 	movl	src+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo02:
+; LINUX-32-PIC: 	movl	src+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo02:
+; DARWIN-32-STATIC: 	movl	_src+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo02:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%ecx), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo02:
+; DARWIN-32-PIC: 	calll	L50$pb
+; DARWIN-32-PIC-NEXT: L50$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L50$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%ecx), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L50$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo03(i64 %i) nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536), align 32
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), align 32
+	ret void
+; LINUX-64-STATIC: moo03:
+; LINUX-64-STATIC: movl    dsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo03:
+; LINUX-32-STATIC: 	movl	dsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ddst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo03:
+; LINUX-32-PIC: 	movl	dsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ddst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo03:
+; DARWIN-32-STATIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ddst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo03:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ddst+262144
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo03:
+; DARWIN-32-PIC: 	calll	L51$pb
+; DARWIN-32-PIC-NEXT: L51$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L51$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ddst-L51$pb)+262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo03:
+; DARWIN-64-STATIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo03:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo03:
+; DARWIN-64-PIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ddst+262144(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo04(i64 %i) nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536), i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: moo04:
+; LINUX-64-STATIC: movq    $ddst+262144, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo04:
+; LINUX-32-STATIC: 	movl	$ddst+262144, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo04:
+; LINUX-32-PIC: 	movl	$ddst+262144, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo04:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo04:
+; DARWIN-32-STATIC: 	movl	$_ddst+262144, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+262144, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo04:
+; DARWIN-32-PIC: 	calll	L52$pb
+; DARWIN-32-PIC-NEXT: L52$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L52$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L52$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo04:
+; DARWIN-64-STATIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo04:
+; DARWIN-64-PIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536), align 32
+	%2 = getelementptr i32* %0, i64 65536
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: moo05:
+; LINUX-64-STATIC: movl    dsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo05:
+; LINUX-32-STATIC: 	movl	dsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo05:
+; LINUX-32-PIC: 	movl	dsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo05:
+; DARWIN-32-STATIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo05:
+; DARWIN-32-DYNAMIC: 	movl	_dsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo05:
+; DARWIN-32-PIC: 	calll	L53$pb
+; DARWIN-32-PIC-NEXT: L53$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L53$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L53$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo05:
+; DARWIN-64-STATIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo05:
+; DARWIN-64-DYNAMIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo05:
+; DARWIN-64-PIC: 	movl	_dsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo06(i64 %i) nounwind {
+entry:
+	%0 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536), align 4
+	store i32 %0, i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), align 4
+	ret void
+; LINUX-64-STATIC: moo06:
+; LINUX-64-STATIC: movl    lsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+262144(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo06:
+; LINUX-32-STATIC: 	movl	lsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ldst+262144
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo06:
+; LINUX-32-PIC: 	movl	lsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ldst+262144
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo06:
+; LINUX-64-PIC: 	movl	lsrc+262144(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movl	%eax, ldst+262144(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo06:
+; DARWIN-32-STATIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ldst+262144
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo06:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _ldst+262144
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo06:
+; DARWIN-32-PIC: 	calll	L54$pb
+; DARWIN-32-PIC-NEXT: L54$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L54$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (_ldst-L54$pb)+262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo06:
+; DARWIN-64-STATIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo06:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo06:
+; DARWIN-64-PIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movl	%eax, _ldst+262144(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo07(i64 %i) nounwind {
+entry:
+	store i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536), i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: moo07:
+; LINUX-64-STATIC: movq    $ldst+262144, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo07:
+; LINUX-32-STATIC: 	movl	$ldst+262144, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo07:
+; LINUX-32-PIC: 	movl	$ldst+262144, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo07:
+; LINUX-64-PIC: 	leaq	ldst+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo07:
+; DARWIN-32-STATIC: 	movl	$_ldst+262144, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+262144, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo07:
+; DARWIN-32-PIC: 	calll	L55$pb
+; DARWIN-32-PIC-NEXT: L55$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L55$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L55$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo07:
+; DARWIN-64-STATIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo07:
+; DARWIN-64-PIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @moo08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = load i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536), align 4
+	%2 = getelementptr i32* %0, i64 65536
+	store i32 %1, i32* %2, align 4
+	ret void
+; LINUX-64-STATIC: moo08:
+; LINUX-64-STATIC: movl    lsrc+262144(%rip), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: moo08:
+; LINUX-32-STATIC: 	movl	lsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: moo08:
+; LINUX-32-PIC: 	movl	lsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	movl	%eax, 262144(%ecx)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: moo08:
+; LINUX-64-PIC: 	movl	lsrc+262144(%rip), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _moo08:
+; DARWIN-32-STATIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _moo08:
+; DARWIN-32-DYNAMIC: 	movl	_lsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, 262144(%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _moo08:
+; DARWIN-32-PIC: 	calll	L56$pb
+; DARWIN-32-PIC-NEXT: L56$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L56$pb)+262144(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L56$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, 262144(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _moo08:
+; DARWIN-64-STATIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _moo08:
+; DARWIN-64-DYNAMIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _moo08:
+; DARWIN-64-PIC: 	movl	_lsrc+262144(%rip), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: big00:
+; LINUX-64-STATIC: movl    src+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, dst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, dst+262144(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, dst+262144(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _dst+262144(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big00:
+; DARWIN-32-PIC: 	calll	L57$pb
+; DARWIN-32-PIC-NEXT: L57$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L57$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L57$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dst@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big01(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 65536
+	%0 = getelementptr [131072 x i32]* @dst, i64 0, i64 %.sum
+	store i32* %0, i32** @ptr, align 8
+	ret void
+; LINUX-64-STATIC: big01:
+; LINUX-64-STATIC: leaq    dst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, ptr(%rip)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, ptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _ptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, (%ecx)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big01:
+; DARWIN-32-PIC: 	calll	L58$pb
+; DARWIN-32-PIC-NEXT: L58$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L58$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	leal	262144(%edx,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L58$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, (%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr [131072 x i32]* @src, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: big02:
+; LINUX-64-STATIC: movl    src+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    ptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	src+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	ptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big02:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_src+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big02:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	262144(%ecx,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ptr$non_lazy_ptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%edx), %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big02:
+; DARWIN-32-PIC: 	calll	L59$pb
+; DARWIN-32-PIC-NEXT: L59$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L59$pb(%eax), %edx
+; DARWIN-32-PIC-NEXT: 	movl	262144(%edx,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L59$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big02:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big02:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big02:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big03(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: big03:
+; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ddst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ddst+262144(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ddst+262144(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	ddst@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ddst+262144(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ddst+262144(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big03:
+; DARWIN-32-PIC: 	calll	L60$pb
+; DARWIN-32-PIC-NEXT: L60$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L60$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ddst-L60$pb)+262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ddst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big04(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 65536
+	%0 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %.sum
+	store i32* %0, i32** @dptr, align 8
+	ret void
+; LINUX-64-STATIC: big04:
+; LINUX-64-STATIC: leaq    ddst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, dptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, dptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	%rax, (%rcx)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _dptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big04:
+; DARWIN-32-PIC: 	calll	L61$pb
+; DARWIN-32-PIC-NEXT: L61$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L61$pb)+262144(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _dptr-L61$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _dptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: big05:
+; LINUX-64-STATIC: movl    dsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    dptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	dptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big05:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movq	(%rcx), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big05:
+; DARWIN-32-PIC: 	calll	L62$pb
+; DARWIN-32-PIC-NEXT: L62$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_dsrc-L62$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L62$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big05:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big05:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big05:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_dptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big06(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
+	%2 = load i32* %1, align 4
+	%3 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
+	store i32 %2, i32* %3, align 4
+	ret void
+; LINUX-64-STATIC: big06:
+; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movl    %eax, ldst+262144(,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, ldst+262144(,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, ldst+262144(,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	leaq	ldst(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, _ldst+262144(,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, _ldst+262144(,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big06:
+; DARWIN-32-PIC: 	calll	L63$pb
+; DARWIN-32-PIC-NEXT: L63$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L63$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	%edx, (_ldst-L63$pb)+262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	leaq	_ldst(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big07(i64 %i) nounwind {
+entry:
+	%.sum = add i64 %i, 65536
+	%0 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %.sum
+	store i32* %0, i32** @lptr, align 8
+	ret void
+; LINUX-64-STATIC: big07:
+; LINUX-64-STATIC: leaq    ldst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: movq    %rax, lptr
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	movl	%eax, lptr
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	movq	%rax, lptr(%rip)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%eax, _lptr
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big07:
+; DARWIN-32-PIC: 	calll	L64$pb
+; DARWIN-32-PIC-NEXT: L64$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L64$pb)+262144(%eax,%ecx,4), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	%ecx, _lptr-L64$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	movq	%rax, _lptr(%rip)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @big08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %1
+	%3 = load i32* %2, align 4
+	%4 = getelementptr i32* %0, i64 %1
+	store i32 %3, i32* %4, align 4
+	ret void
+; LINUX-64-STATIC: big08:
+; LINUX-64-STATIC: movl    lsrc+262144(,%rdi,4), %eax
+; LINUX-64-STATIC: movq    lptr(%rip), %rcx
+; LINUX-64-STATIC: movl    %eax, 262144(%rcx,%rdi,4)
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: big08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: big08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lsrc+262144(,%eax,4), %ecx
+; LINUX-32-PIC-NEXT: 	movl	lptr, %edx
+; LINUX-32-PIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: big08:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; LINUX-64-PIC-NEXT: 	movq	lptr(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _big08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-STATIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _big08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lsrc+262144(,%eax,4), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %edx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	%ecx, 262144(%edx,%eax,4)
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _big08:
+; DARWIN-32-PIC: 	calll	L65$pb
+; DARWIN-32-PIC-NEXT: L65$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	(_lsrc-L65$pb)+262144(%eax,%ecx,4), %edx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L65$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	%edx, 262144(%eax,%ecx,4)
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _big08:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-STATIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _big08:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _big08:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movl	262144(%rax,%rdi,4), %eax
+; DARWIN-64-PIC-NEXT: 	movq	_lptr(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	%eax, 262144(%rcx,%rdi,4)
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar00() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @src to i8*)
+; LINUX-64-STATIC: bar00:
+; LINUX-64-STATIC: movl    $src, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar00:
+; LINUX-32-STATIC: 	movl	$src, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar00:
+; LINUX-32-PIC: 	movl	$src, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar00:
+; DARWIN-32-STATIC: 	movl	$_src, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar00:
+; DARWIN-32-PIC: 	calll	L66$pb
+; DARWIN-32-PIC-NEXT: L66$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L66$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bxr00() nounwind {
+entry:
+	ret i8* bitcast ([32 x i32]* @xsrc to i8*)
+; LINUX-64-STATIC: bxr00:
+; LINUX-64-STATIC: movl    $xsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxr00:
+; LINUX-32-STATIC: 	movl	$xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxr00:
+; LINUX-32-PIC: 	movl	$xsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxr00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxr00:
+; DARWIN-32-STATIC: 	movl	$_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxr00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxr00:
+; DARWIN-32-PIC: 	calll	L67$pb
+; DARWIN-32-PIC-NEXT: L67$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L67$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxr00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxr00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxr00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar01() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @dst to i8*)
+; LINUX-64-STATIC: bar01:
+; LINUX-64-STATIC: movl    $dst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar01:
+; LINUX-32-STATIC: 	movl	$dst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar01:
+; LINUX-32-PIC: 	movl	$dst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar01:
+; DARWIN-32-STATIC: 	movl	$_dst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar01:
+; DARWIN-32-PIC: 	calll	L68$pb
+; DARWIN-32-PIC-NEXT: L68$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L68$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bxr01() nounwind {
+entry:
+	ret i8* bitcast ([32 x i32]* @xdst to i8*)
+; LINUX-64-STATIC: bxr01:
+; LINUX-64-STATIC: movl    $xdst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxr01:
+; LINUX-32-STATIC: 	movl	$xdst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxr01:
+; LINUX-32-PIC: 	movl	$xdst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxr01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxr01:
+; DARWIN-32-STATIC: 	movl	$_xdst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxr01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxr01:
+; DARWIN-32-PIC: 	calll	L69$pb
+; DARWIN-32-PIC-NEXT: L69$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L69$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxr01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxr01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxr01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar02() nounwind {
+entry:
+	ret i8* bitcast (i32** @ptr to i8*)
+; LINUX-64-STATIC: bar02:
+; LINUX-64-STATIC: movl    $ptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar02:
+; LINUX-32-STATIC: 	movl	$ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar02:
+; LINUX-32-PIC: 	movl	$ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar02:
+; DARWIN-32-STATIC: 	movl	$_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar02:
+; DARWIN-32-PIC: 	calll	L70$pb
+; DARWIN-32-PIC-NEXT: L70$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L70$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar03() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @dsrc to i8*)
+; LINUX-64-STATIC: bar03:
+; LINUX-64-STATIC: movl    $dsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar03:
+; LINUX-32-STATIC: 	movl	$dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar03:
+; LINUX-32-PIC: 	movl	$dsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar03:
+; DARWIN-32-STATIC: 	movl	$_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar03:
+; DARWIN-32-PIC: 	calll	L71$pb
+; DARWIN-32-PIC-NEXT: L71$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L71$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar04() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @ddst to i8*)
+; LINUX-64-STATIC: bar04:
+; LINUX-64-STATIC: movl    $ddst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar04:
+; LINUX-32-STATIC: 	movl	$ddst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar04:
+; LINUX-32-PIC: 	movl	$ddst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar04:
+; DARWIN-32-STATIC: 	movl	$_ddst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar04:
+; DARWIN-32-PIC: 	calll	L72$pb
+; DARWIN-32-PIC-NEXT: L72$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L72$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar05() nounwind {
+entry:
+	ret i8* bitcast (i32** @dptr to i8*)
+; LINUX-64-STATIC: bar05:
+; LINUX-64-STATIC: movl    $dptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar05:
+; LINUX-32-STATIC: 	movl	$dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar05:
+; LINUX-32-PIC: 	movl	$dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar05:
+; DARWIN-32-STATIC: 	movl	$_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar05:
+; DARWIN-32-DYNAMIC: 	movl	$_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar05:
+; DARWIN-32-PIC: 	calll	L73$pb
+; DARWIN-32-PIC-NEXT: L73$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dptr-L73$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar05:
+; DARWIN-64-STATIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar05:
+; DARWIN-64-DYNAMIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar05:
+; DARWIN-64-PIC: 	leaq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar06() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @lsrc to i8*)
+; LINUX-64-STATIC: bar06:
+; LINUX-64-STATIC: movl    $lsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar06:
+; LINUX-32-STATIC: 	movl	$lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar06:
+; LINUX-32-PIC: 	movl	$lsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar06:
+; DARWIN-32-STATIC: 	movl	$_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar06:
+; DARWIN-32-PIC: 	calll	L74$pb
+; DARWIN-32-PIC-NEXT: L74$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L74$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar07() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @ldst to i8*)
+; LINUX-64-STATIC: bar07:
+; LINUX-64-STATIC: movl    $ldst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar07:
+; LINUX-32-STATIC: 	movl	$ldst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar07:
+; LINUX-32-PIC: 	movl	$ldst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar07:
+; DARWIN-32-STATIC: 	movl	$_ldst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar07:
+; DARWIN-32-PIC: 	calll	L75$pb
+; DARWIN-32-PIC-NEXT: L75$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L75$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bar08() nounwind {
+entry:
+	ret i8* bitcast (i32** @lptr to i8*)
+; LINUX-64-STATIC: bar08:
+; LINUX-64-STATIC: movl    $lptr, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bar08:
+; LINUX-32-STATIC: 	movl	$lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bar08:
+; LINUX-32-PIC: 	movl	$lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bar08:
+; LINUX-64-PIC: 	leaq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bar08:
+; DARWIN-32-STATIC: 	movl	$_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bar08:
+; DARWIN-32-DYNAMIC: 	movl	$_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bar08:
+; DARWIN-32-PIC: 	calll	L76$pb
+; DARWIN-32-PIC-NEXT: L76$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lptr-L76$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bar08:
+; DARWIN-64-STATIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bar08:
+; DARWIN-64-DYNAMIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bar08:
+; DARWIN-64-PIC: 	leaq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har00() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @src to i8*)
+; LINUX-64-STATIC: har00:
+; LINUX-64-STATIC: movl    $src, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har00:
+; LINUX-32-STATIC: 	movl	$src, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har00:
+; LINUX-32-PIC: 	movl	$src, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har00:
+; DARWIN-32-STATIC: 	movl	$_src, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har00:
+; DARWIN-32-PIC: 	calll	L77$pb
+; DARWIN-32-PIC-NEXT: L77$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L77$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @hxr00() nounwind {
+entry:
+	ret i8* bitcast ([32 x i32]* @xsrc to i8*)
+; LINUX-64-STATIC: hxr00:
+; LINUX-64-STATIC: movl    $xsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: hxr00:
+; LINUX-32-STATIC: 	movl	$xsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: hxr00:
+; LINUX-32-PIC: 	movl	$xsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: hxr00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _hxr00:
+; DARWIN-32-STATIC: 	movl	$_xsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _hxr00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _hxr00:
+; DARWIN-32-PIC: 	calll	L78$pb
+; DARWIN-32-PIC-NEXT: L78$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L78$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _hxr00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _hxr00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _hxr00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har01() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @dst to i8*)
+; LINUX-64-STATIC: har01:
+; LINUX-64-STATIC: movl    $dst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har01:
+; LINUX-32-STATIC: 	movl	$dst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har01:
+; LINUX-32-PIC: 	movl	$dst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har01:
+; DARWIN-32-STATIC: 	movl	$_dst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har01:
+; DARWIN-32-PIC: 	calll	L79$pb
+; DARWIN-32-PIC-NEXT: L79$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L79$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @hxr01() nounwind {
+entry:
+	ret i8* bitcast ([32 x i32]* @xdst to i8*)
+; LINUX-64-STATIC: hxr01:
+; LINUX-64-STATIC: movl    $xdst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: hxr01:
+; LINUX-32-STATIC: 	movl	$xdst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: hxr01:
+; LINUX-32-PIC: 	movl	$xdst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: hxr01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _hxr01:
+; DARWIN-32-STATIC: 	movl	$_xdst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _hxr01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _hxr01:
+; DARWIN-32-PIC: 	calll	L80$pb
+; DARWIN-32-PIC-NEXT: L80$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L80$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _hxr01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _hxr01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _hxr01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = bitcast i32* %0 to i8*
+	ret i8* %1
+; LINUX-64-STATIC: har02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har02:
+; LINUX-32-STATIC: 	movl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har02:
+; LINUX-32-PIC: 	movl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har02:
+; DARWIN-32-STATIC: 	movl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har02:
+; DARWIN-32-PIC: 	calll	L81$pb
+; DARWIN-32-PIC-NEXT: L81$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L81$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har03() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @dsrc to i8*)
+; LINUX-64-STATIC: har03:
+; LINUX-64-STATIC: movl    $dsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har03:
+; LINUX-32-STATIC: 	movl	$dsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har03:
+; LINUX-32-PIC: 	movl	$dsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har03:
+; DARWIN-32-STATIC: 	movl	$_dsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har03:
+; DARWIN-32-PIC: 	calll	L82$pb
+; DARWIN-32-PIC-NEXT: L82$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dsrc-L82$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har04() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @ddst to i8*)
+; LINUX-64-STATIC: har04:
+; LINUX-64-STATIC: movl    $ddst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har04:
+; LINUX-32-STATIC: 	movl	$ddst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har04:
+; LINUX-32-PIC: 	movl	$ddst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har04:
+; DARWIN-32-STATIC: 	movl	$_ddst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har04:
+; DARWIN-32-PIC: 	calll	L83$pb
+; DARWIN-32-PIC-NEXT: L83$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ddst-L83$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har05() nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = bitcast i32* %0 to i8*
+	ret i8* %1
+; LINUX-64-STATIC: har05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har05:
+; LINUX-32-STATIC: 	movl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har05:
+; LINUX-32-PIC: 	movl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har05:
+; DARWIN-32-STATIC: 	movl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har05:
+; DARWIN-32-DYNAMIC: 	movl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har05:
+; DARWIN-32-PIC: 	calll	L84$pb
+; DARWIN-32-PIC-NEXT: L84$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L84$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har06() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @lsrc to i8*)
+; LINUX-64-STATIC: har06:
+; LINUX-64-STATIC: movl    $lsrc, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har06:
+; LINUX-32-STATIC: 	movl	$lsrc, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har06:
+; LINUX-32-PIC: 	movl	$lsrc, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har06:
+; DARWIN-32-STATIC: 	movl	$_lsrc, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har06:
+; DARWIN-32-PIC: 	calll	L85$pb
+; DARWIN-32-PIC-NEXT: L85$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lsrc-L85$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har07() nounwind {
+entry:
+	ret i8* bitcast ([131072 x i32]* @ldst to i8*)
+; LINUX-64-STATIC: har07:
+; LINUX-64-STATIC: movl    $ldst, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har07:
+; LINUX-32-STATIC: 	movl	$ldst, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har07:
+; LINUX-32-PIC: 	movl	$ldst, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har07:
+; DARWIN-32-STATIC: 	movl	$_ldst, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har07:
+; DARWIN-32-PIC: 	calll	L86$pb
+; DARWIN-32-PIC-NEXT: L86$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_ldst-L86$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @har08() nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = bitcast i32* %0 to i8*
+	ret i8* %1
+; LINUX-64-STATIC: har08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: har08:
+; LINUX-32-STATIC: 	movl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: har08:
+; LINUX-32-PIC: 	movl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: har08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _har08:
+; DARWIN-32-STATIC: 	movl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _har08:
+; DARWIN-32-DYNAMIC: 	movl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _har08:
+; DARWIN-32-PIC: 	calll	L87$pb
+; DARWIN-32-PIC-NEXT: L87$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L87$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _har08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _har08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _har08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat00() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @src, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat00:
+; LINUX-64-STATIC: movl    $src+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat00:
+; LINUX-32-STATIC: 	movl	$src+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat00:
+; LINUX-32-PIC: 	movl	$src+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat00:
+; DARWIN-32-STATIC: 	movl	$_src+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat00:
+; DARWIN-32-DYNAMIC: 	movl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat00:
+; DARWIN-32-PIC: 	calll	L88$pb
+; DARWIN-32-PIC-NEXT: L88$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L88$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bxt00() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bxt00:
+; LINUX-64-STATIC: movl    $xsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxt00:
+; LINUX-32-STATIC: 	movl	$xsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxt00:
+; LINUX-32-PIC: 	movl	$xsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxt00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxt00:
+; DARWIN-32-STATIC: 	movl	$_xsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxt00:
+; DARWIN-32-DYNAMIC: 	movl	L_xsrc$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxt00:
+; DARWIN-32-PIC: 	calll	L89$pb
+; DARWIN-32-PIC-NEXT: L89$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L89$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxt00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxt00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxt00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat01() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat01:
+; LINUX-64-STATIC: movl    $dst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat01:
+; LINUX-32-STATIC: 	movl	$dst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat01:
+; LINUX-32-PIC: 	movl	$dst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat01:
+; DARWIN-32-STATIC: 	movl	$_dst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat01:
+; DARWIN-32-DYNAMIC: 	movl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat01:
+; DARWIN-32-PIC: 	calll	L90$pb
+; DARWIN-32-PIC-NEXT: L90$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L90$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bxt01() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bxt01:
+; LINUX-64-STATIC: movl    $xdst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxt01:
+; LINUX-32-STATIC: 	movl	$xdst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxt01:
+; LINUX-32-PIC: 	movl	$xdst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxt01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxt01:
+; DARWIN-32-STATIC: 	movl	$_xdst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxt01:
+; DARWIN-32-DYNAMIC: 	movl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxt01:
+; DARWIN-32-PIC: 	calll	L91$pb
+; DARWIN-32-PIC-NEXT: L91$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L91$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxt01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxt01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxt01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = getelementptr i32* %0, i64 16
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bat02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat02:
+; LINUX-32-STATIC: 	movl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat02:
+; LINUX-32-PIC: 	movl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat02:
+; DARWIN-32-STATIC: 	movl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat02:
+; DARWIN-32-PIC: 	calll	L92$pb
+; DARWIN-32-PIC-NEXT: L92$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L92$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat03() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat03:
+; LINUX-64-STATIC: movl    $dsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat03:
+; LINUX-32-STATIC: 	movl	$dsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat03:
+; LINUX-32-PIC: 	movl	$dsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat03:
+; DARWIN-32-STATIC: 	movl	$_dsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat03:
+; DARWIN-32-PIC: 	calll	L93$pb
+; DARWIN-32-PIC-NEXT: L93$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L93$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat03:
+; DARWIN-64-STATIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat03:
+; DARWIN-64-PIC: 	leaq	_dsrc+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat04() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat04:
+; LINUX-64-STATIC: movl    $ddst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat04:
+; LINUX-32-STATIC: 	movl	$ddst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat04:
+; LINUX-32-PIC: 	movl	$ddst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat04:
+; DARWIN-32-STATIC: 	movl	$_ddst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat04:
+; DARWIN-32-PIC: 	calll	L94$pb
+; DARWIN-32-PIC-NEXT: L94$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L94$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat04:
+; DARWIN-64-STATIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat04:
+; DARWIN-64-PIC: 	leaq	_ddst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat05() nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = getelementptr i32* %0, i64 16
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bat05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat05:
+; LINUX-32-STATIC: 	movl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat05:
+; LINUX-32-PIC: 	movl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat05:
+; DARWIN-32-STATIC: 	movl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat05:
+; DARWIN-32-DYNAMIC: 	movl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat05:
+; DARWIN-32-PIC: 	calll	L95$pb
+; DARWIN-32-PIC-NEXT: L95$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L95$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat06() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat06:
+; LINUX-64-STATIC: movl    $lsrc+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat06:
+; LINUX-32-STATIC: 	movl	$lsrc+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat06:
+; LINUX-32-PIC: 	movl	$lsrc+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat06:
+; LINUX-64-PIC: 	leaq	lsrc+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat06:
+; DARWIN-32-STATIC: 	movl	$_lsrc+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat06:
+; DARWIN-32-PIC: 	calll	L96$pb
+; DARWIN-32-PIC-NEXT: L96$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L96$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat06:
+; DARWIN-64-STATIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat06:
+; DARWIN-64-PIC: 	leaq	_lsrc+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat07() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 16) to i8*)
+; LINUX-64-STATIC: bat07:
+; LINUX-64-STATIC: movl    $ldst+64, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat07:
+; LINUX-32-STATIC: 	movl	$ldst+64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat07:
+; LINUX-32-PIC: 	movl	$ldst+64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat07:
+; LINUX-64-PIC: 	leaq	ldst+64(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat07:
+; DARWIN-32-STATIC: 	movl	$_ldst+64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat07:
+; DARWIN-32-PIC: 	calll	L97$pb
+; DARWIN-32-PIC-NEXT: L97$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L97$pb)+64(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat07:
+; DARWIN-64-STATIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat07:
+; DARWIN-64-PIC: 	leaq	_ldst+64(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bat08() nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = getelementptr i32* %0, i64 16
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bat08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: addq    $64, %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bat08:
+; LINUX-32-STATIC: 	movl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	addl	$64, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bat08:
+; LINUX-32-PIC: 	movl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	addl	$64, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bat08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	addq	$64, %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bat08:
+; DARWIN-32-STATIC: 	movl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bat08:
+; DARWIN-32-DYNAMIC: 	movl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bat08:
+; DARWIN-32-PIC: 	calll	L98$pb
+; DARWIN-32-PIC-NEXT: L98$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L98$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	addl	$64, %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bat08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bat08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bat08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	addq	$64, %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam00() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @src, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam00:
+; LINUX-64-STATIC: movl    $src+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam00:
+; LINUX-32-STATIC: 	movl	$src+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam00:
+; LINUX-32-PIC: 	movl	$src+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam00:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam00:
+; DARWIN-32-STATIC: 	movl	$_src+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam00:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_src$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam00:
+; DARWIN-32-PIC: 	calll	L99$pb
+; DARWIN-32-PIC-NEXT: L99$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_src$non_lazy_ptr-L99$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam00:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam00:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam00:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam01() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam01:
+; LINUX-64-STATIC: movl    $dst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam01:
+; LINUX-32-STATIC: 	movl	$dst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam01:
+; LINUX-32-PIC: 	movl	$dst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam01:
+; DARWIN-32-STATIC: 	movl	$_dst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_dst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam01:
+; DARWIN-32-PIC: 	calll	L100$pb
+; DARWIN-32-PIC-NEXT: L100$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_dst$non_lazy_ptr-L100$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bxm01() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([32 x i32]* @xdst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bxm01:
+; LINUX-64-STATIC: movl    $xdst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bxm01:
+; LINUX-32-STATIC: 	movl	$xdst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bxm01:
+; LINUX-32-PIC: 	movl	$xdst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bxm01:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bxm01:
+; DARWIN-32-STATIC: 	movl	$_xdst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bxm01:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	L_xdst$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bxm01:
+; DARWIN-32-PIC: 	calll	L101$pb
+; DARWIN-32-PIC-NEXT: L101$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	L_xdst$non_lazy_ptr-L101$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bxm01:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bxm01:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bxm01:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam02() nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = getelementptr i32* %0, i64 65536
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bam02:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    ptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam02:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	ptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam02:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	ptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	(%rcx), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam02:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_ptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	(%ecx), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam02:
+; DARWIN-32-PIC: 	calll	L102$pb
+; DARWIN-32-PIC-NEXT: L102$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L102$pb(%eax), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-STATIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-DYNAMIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rcx
+; DARWIN-64-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	(%rcx), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam03() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @dsrc, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam03:
+; LINUX-64-STATIC: movl    $dsrc+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam03:
+; LINUX-32-STATIC: 	movl	$dsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam03:
+; LINUX-32-PIC: 	movl	$dsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam03:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam03:
+; DARWIN-32-STATIC: 	movl	$_dsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam03:
+; DARWIN-32-DYNAMIC: 	movl	$_dsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam03:
+; DARWIN-32-PIC: 	calll	L103$pb
+; DARWIN-32-PIC-NEXT: L103$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L103$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam03:
+; DARWIN-64-STATIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam03:
+; DARWIN-64-PIC: 	leaq	_dsrc+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam04() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ddst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam04:
+; LINUX-64-STATIC: movl    $ddst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam04:
+; LINUX-32-STATIC: 	movl	$ddst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam04:
+; LINUX-32-PIC: 	movl	$ddst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam04:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam04:
+; DARWIN-32-STATIC: 	movl	$_ddst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam04:
+; DARWIN-32-DYNAMIC: 	movl	$_ddst+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam04:
+; DARWIN-32-PIC: 	calll	L104$pb
+; DARWIN-32-PIC-NEXT: L104$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L104$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam04:
+; DARWIN-64-STATIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam04:
+; DARWIN-64-PIC: 	leaq	_ddst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam05() nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = getelementptr i32* %0, i64 65536
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bam05:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    dptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam05:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	dptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam05:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	dptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rcx
+; LINUX-64-PIC-NEXT: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	(%rcx), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam05:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_dptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam05:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	_dptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam05:
+; DARWIN-32-PIC: 	calll	L105$pb
+; DARWIN-32-PIC-NEXT: L105$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	_dptr-L105$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam05:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam05:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam05:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam06() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @lsrc, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam06:
+; LINUX-64-STATIC: movl    $lsrc+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam06:
+; LINUX-32-STATIC: 	movl	$lsrc+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam06:
+; LINUX-32-PIC: 	movl	$lsrc+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam06:
+; LINUX-64-PIC: 	leaq	lsrc+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam06:
+; DARWIN-32-STATIC: 	movl	$_lsrc+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam06:
+; DARWIN-32-DYNAMIC: 	movl	$_lsrc+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam06:
+; DARWIN-32-PIC: 	calll	L106$pb
+; DARWIN-32-PIC-NEXT: L106$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L106$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam06:
+; DARWIN-64-STATIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam06:
+; DARWIN-64-PIC: 	leaq	_lsrc+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam07() nounwind {
+entry:
+	ret i8* bitcast (i32* getelementptr ([131072 x i32]* @ldst, i32 0, i64 65536) to i8*)
+; LINUX-64-STATIC: bam07:
+; LINUX-64-STATIC: movl    $ldst+262144, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam07:
+; LINUX-32-STATIC: 	movl	$ldst+262144, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam07:
+; LINUX-32-PIC: 	movl	$ldst+262144, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam07:
+; LINUX-64-PIC: 	leaq	ldst+262144(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam07:
+; DARWIN-32-STATIC: 	movl	$_ldst+262144, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam07:
+; DARWIN-32-DYNAMIC: 	movl	$_ldst+262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam07:
+; DARWIN-32-PIC: 	calll	L107$pb
+; DARWIN-32-PIC-NEXT: L107$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L107$pb)+262144(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam07:
+; DARWIN-64-STATIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam07:
+; DARWIN-64-PIC: 	leaq	_ldst+262144(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @bam08() nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = getelementptr i32* %0, i64 65536
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: bam08:
+; LINUX-64-STATIC: movl    $262144, %eax
+; LINUX-64-STATIC: addq    lptr(%rip), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: bam08:
+; LINUX-32-STATIC: 	movl	$262144, %eax
+; LINUX-32-STATIC-NEXT: 	addl	lptr, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: bam08:
+; LINUX-32-PIC: 	movl	$262144, %eax
+; LINUX-32-PIC-NEXT: 	addl	lptr, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: bam08:
+; LINUX-64-PIC: 	movl	$262144, %eax
+; LINUX-64-PIC-NEXT: 	addq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _bam08:
+; DARWIN-32-STATIC: 	movl	$262144, %eax
+; DARWIN-32-STATIC-NEXT: 	addl	_lptr, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _bam08:
+; DARWIN-32-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	addl	_lptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _bam08:
+; DARWIN-32-PIC: 	calll	L108$pb
+; DARWIN-32-PIC-NEXT: L108$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%ecx
+; DARWIN-32-PIC-NEXT: 	movl	$262144, %eax
+; DARWIN-32-PIC-NEXT: 	addl	_lptr-L108$pb(%ecx), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _bam08:
+; DARWIN-64-STATIC: 	movl	$262144, %eax
+; DARWIN-64-STATIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _bam08:
+; DARWIN-64-DYNAMIC: 	movl	$262144, %eax
+; DARWIN-64-DYNAMIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _bam08:
+; DARWIN-64-PIC: 	movl	$262144, %eax
+; DARWIN-64-PIC-NEXT: 	addq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat00:
+; LINUX-64-STATIC: leaq    src+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	src+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	src+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_src+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat00:
+; DARWIN-32-PIC: 	calll	L109$pb
+; DARWIN-32-PIC-NEXT: L109$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L109$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cxt00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cxt00:
+; LINUX-64-STATIC: leaq    xsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxt00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxt00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xsrc+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxt00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxt00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxt00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxt00:
+; DARWIN-32-PIC: 	calll	L110$pb
+; DARWIN-32-PIC-NEXT: L110$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L110$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxt00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxt00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxt00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat01(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat01:
+; LINUX-64-STATIC: leaq    dst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat01:
+; DARWIN-32-PIC: 	calll	L111$pb
+; DARWIN-32-PIC-NEXT: L111$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L111$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cxt01(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cxt01:
+; LINUX-64-STATIC: leaq    xdst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxt01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxt01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxt01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxt01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxt01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxt01:
+; DARWIN-32-PIC: 	calll	L112$pb
+; DARWIN-32-PIC-NEXT: L112$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L112$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxt01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxt01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxt01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cat02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat02:
+; DARWIN-32-PIC: 	calll	L113$pb
+; DARWIN-32-PIC-NEXT: L113$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L113$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat03(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat03:
+; LINUX-64-STATIC: leaq    dsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dsrc+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_dsrc+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat03:
+; DARWIN-32-PIC: 	calll	L114$pb
+; DARWIN-32-PIC-NEXT: L114$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L114$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat04(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat04:
+; LINUX-64-STATIC: leaq    ddst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat04:
+; DARWIN-32-PIC: 	calll	L115$pb
+; DARWIN-32-PIC-NEXT: L115$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L115$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cat05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat05:
+; DARWIN-32-PIC: 	calll	L116$pb
+; DARWIN-32-PIC-NEXT: L116$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L116$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat06(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat06:
+; LINUX-64-STATIC: leaq    lsrc+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	lsrc+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	lsrc+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_lsrc+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_lsrc+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat06:
+; DARWIN-32-PIC: 	calll	L117$pb
+; DARWIN-32-PIC-NEXT: L117$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L117$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat07(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 16
+	%1 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cat07:
+; LINUX-64-STATIC: leaq    ldst+64(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+64(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+64(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat07:
+; DARWIN-32-PIC: 	calll	L118$pb
+; DARWIN-32-PIC-NEXT: L118$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L118$pb)+64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cat08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = add i64 %i, 16
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cat08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: leaq    64(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cat08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cat08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cat08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cat08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cat08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	64(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cat08:
+; DARWIN-32-PIC: 	calll	L119$pb
+; DARWIN-32-PIC-NEXT: L119$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L119$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	64(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cat08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cat08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cat08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	64(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @src, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam00:
+; LINUX-64-STATIC: leaq    src+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	src+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	src+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam00:
+; LINUX-64-PIC: 	movq	src@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_src+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_src$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam00:
+; DARWIN-32-PIC: 	calll	L120$pb
+; DARWIN-32-PIC-NEXT: L120$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_src$non_lazy_ptr-L120$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam00:
+; DARWIN-64-STATIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam00:
+; DARWIN-64-DYNAMIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam00:
+; DARWIN-64-PIC: 	movq	_src@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cxm00(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [32 x i32]* @xsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cxm00:
+; LINUX-64-STATIC: leaq    xsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxm00:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxm00:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxm00:
+; LINUX-64-PIC: 	movq	xsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxm00:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxm00:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xsrc$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxm00:
+; DARWIN-32-PIC: 	calll	L121$pb
+; DARWIN-32-PIC-NEXT: L121$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xsrc$non_lazy_ptr-L121$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxm00:
+; DARWIN-64-STATIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxm00:
+; DARWIN-64-DYNAMIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxm00:
+; DARWIN-64-PIC: 	movq	_xsrc@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam01(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @dst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam01:
+; LINUX-64-STATIC: leaq    dst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam01:
+; LINUX-64-PIC: 	movq	dst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_dst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam01:
+; DARWIN-32-PIC: 	calll	L122$pb
+; DARWIN-32-PIC-NEXT: L122$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_dst$non_lazy_ptr-L122$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam01:
+; DARWIN-64-STATIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam01:
+; DARWIN-64-DYNAMIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam01:
+; DARWIN-64-PIC: 	movq	_dst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cxm01(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [32 x i32]* @xdst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cxm01:
+; LINUX-64-STATIC: leaq    xdst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cxm01:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	xdst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cxm01:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	xdst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cxm01:
+; LINUX-64-PIC: 	movq	xdst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cxm01:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_xdst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cxm01:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_xdst$non_lazy_ptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cxm01:
+; DARWIN-32-PIC: 	calll	L123$pb
+; DARWIN-32-PIC-NEXT: L123$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	L_xdst$non_lazy_ptr-L123$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cxm01:
+; DARWIN-64-STATIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cxm01:
+; DARWIN-64-DYNAMIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cxm01:
+; DARWIN-64-PIC: 	movq	_xdst@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam02(i64 %i) nounwind {
+entry:
+	%0 = load i32** @ptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cam02:
+; LINUX-64-STATIC: movq    ptr(%rip), %rax
+; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam02:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam02:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	ptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam02:
+; LINUX-64-PIC: 	movq	ptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam02:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_ptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam02:
+; DARWIN-32-DYNAMIC: 	movl	L_ptr$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam02:
+; DARWIN-32-PIC: 	calll	L124$pb
+; DARWIN-32-PIC-NEXT: L124$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ptr$non_lazy_ptr-L124$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam02:
+; DARWIN-64-STATIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam02:
+; DARWIN-64-DYNAMIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam02:
+; DARWIN-64-PIC: 	movq	_ptr@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	movq	(%rax), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam03(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @dsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam03:
+; LINUX-64-STATIC: leaq    dsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam03:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	dsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam03:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	dsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam03:
+; LINUX-64-PIC: 	movq	dsrc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam03:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_dsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam03:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_dsrc+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam03:
+; DARWIN-32-PIC: 	calll	L125$pb
+; DARWIN-32-PIC-NEXT: L125$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_dsrc-L125$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam03:
+; DARWIN-64-STATIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam03:
+; DARWIN-64-DYNAMIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam03:
+; DARWIN-64-PIC: 	leaq	_dsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam04(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @ddst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam04:
+; LINUX-64-STATIC: leaq    ddst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam04:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam04:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ddst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam04:
+; LINUX-64-PIC: 	movq	ddst@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam04:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam04:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ddst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam04:
+; DARWIN-32-PIC: 	calll	L126$pb
+; DARWIN-32-PIC-NEXT: L126$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ddst-L126$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam04:
+; DARWIN-64-STATIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam04:
+; DARWIN-64-DYNAMIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam04:
+; DARWIN-64-PIC: 	leaq	_ddst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam05(i64 %i) nounwind {
+entry:
+	%0 = load i32** @dptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cam05:
+; LINUX-64-STATIC: movq    dptr(%rip), %rax
+; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam05:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam05:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	dptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam05:
+; LINUX-64-PIC: 	movq	dptr@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	movq	(%rax), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam05:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam05:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_dptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam05:
+; DARWIN-32-PIC: 	calll	L127$pb
+; DARWIN-32-PIC-NEXT: L127$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_dptr-L127$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam05:
+; DARWIN-64-STATIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam05:
+; DARWIN-64-DYNAMIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam05:
+; DARWIN-64-PIC: 	movq	_dptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam06(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @lsrc, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam06:
+; LINUX-64-STATIC: leaq    lsrc+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam06:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	lsrc+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam06:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	lsrc+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam06:
+; LINUX-64-PIC: 	leaq	lsrc(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam06:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_lsrc+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam06:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_lsrc+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam06:
+; DARWIN-32-PIC: 	calll	L128$pb
+; DARWIN-32-PIC-NEXT: L128$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_lsrc-L128$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam06:
+; DARWIN-64-STATIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam06:
+; DARWIN-64-DYNAMIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam06:
+; DARWIN-64-PIC: 	leaq	_lsrc(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam07(i64 %i) nounwind {
+entry:
+	%0 = add i64 %i, 65536
+	%1 = getelementptr [131072 x i32]* @ldst, i64 0, i64 %0
+	%2 = bitcast i32* %1 to i8*
+	ret i8* %2
+; LINUX-64-STATIC: cam07:
+; LINUX-64-STATIC: leaq    ldst+262144(,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam07:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam07:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	leal	ldst+262144(,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam07:
+; LINUX-64-PIC: 	leaq	ldst(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam07:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam07:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	leal	_ldst+262144(,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam07:
+; DARWIN-32-PIC: 	calll	L129$pb
+; DARWIN-32-PIC-NEXT: L129$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	leal	(_ldst-L129$pb)+262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam07:
+; DARWIN-64-STATIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam07:
+; DARWIN-64-DYNAMIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam07:
+; DARWIN-64-PIC: 	leaq	_ldst(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define i8* @cam08(i64 %i) nounwind {
+entry:
+	%0 = load i32** @lptr, align 8
+	%1 = add i64 %i, 65536
+	%2 = getelementptr i32* %0, i64 %1
+	%3 = bitcast i32* %2 to i8*
+	ret i8* %3
+; LINUX-64-STATIC: cam08:
+; LINUX-64-STATIC: movq    lptr(%rip), %rax
+; LINUX-64-STATIC: leaq    262144(%rax,%rdi,4), %rax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: cam08:
+; LINUX-32-STATIC: 	movl	4(%esp), %eax
+; LINUX-32-STATIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: cam08:
+; LINUX-32-PIC: 	movl	4(%esp), %eax
+; LINUX-32-PIC-NEXT: 	movl	lptr, %ecx
+; LINUX-32-PIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: cam08:
+; LINUX-64-PIC: 	movq	lptr(%rip), %rax
+; LINUX-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _cam08:
+; DARWIN-32-STATIC: 	movl	4(%esp), %eax
+; DARWIN-32-STATIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-STATIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _cam08:
+; DARWIN-32-DYNAMIC: 	movl	4(%esp), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	movl	_lptr, %ecx
+; DARWIN-32-DYNAMIC-NEXT: 	leal	262144(%ecx,%eax,4), %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _cam08:
+; DARWIN-32-PIC: 	calll	L130$pb
+; DARWIN-32-PIC-NEXT: L130$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	4(%esp), %ecx
+; DARWIN-32-PIC-NEXT: 	movl	_lptr-L130$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	leal	262144(%eax,%ecx,4), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _cam08:
+; DARWIN-64-STATIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _cam08:
+; DARWIN-64-DYNAMIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _cam08:
+; DARWIN-64-PIC: 	movq	_lptr(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	leaq	262144(%rax,%rdi,4), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @lcallee() nounwind {
+entry:
+	call void @x() nounwind
+	call void @x() nounwind
+	call void @x() nounwind
+	call void @x() nounwind
+	call void @x() nounwind
+	call void @x() nounwind
+	call void @x() nounwind
+	ret void
+; LINUX-64-STATIC: lcallee:
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: callq   x
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: lcallee:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	calll	x
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: lcallee:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	calll	x
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: lcallee:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	callq	x@PLT
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _lcallee:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	calll	_x
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _lcallee:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _lcallee:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_x$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _lcallee:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	callq	_x
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _lcallee:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_x
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _lcallee:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	callq	_x
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+declare void @x()
+
+define internal void @dcallee() nounwind {
+entry:
+	call void @y() nounwind
+	call void @y() nounwind
+	call void @y() nounwind
+	call void @y() nounwind
+	call void @y() nounwind
+	call void @y() nounwind
+	call void @y() nounwind
+	ret void
+; LINUX-64-STATIC: dcallee:
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: callq   y
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dcallee:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	calll	y
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dcallee:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	calll	y
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dcallee:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	callq	y@PLT
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dcallee:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	calll	_y
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dcallee:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dcallee:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_y$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dcallee:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	callq	_y
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dcallee:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_y
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dcallee:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	callq	_y
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+declare void @y()
+
+define void ()* @address() nounwind {
+entry:
+	ret void ()* @callee
+; LINUX-64-STATIC: address:
+; LINUX-64-STATIC: movl    $callee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: address:
+; LINUX-32-STATIC: 	movl	$callee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: address:
+; LINUX-32-PIC: 	movl	$callee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: address:
+; LINUX-64-PIC: 	movq	callee@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _address:
+; DARWIN-32-STATIC: 	movl	$_callee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _address:
+; DARWIN-32-DYNAMIC: 	movl	L_callee$non_lazy_ptr, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _address:
+; DARWIN-32-PIC: 	calll	L133$pb
+; DARWIN-32-PIC-NEXT: L133$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_callee$non_lazy_ptr-L133$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _address:
+; DARWIN-64-STATIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _address:
+; DARWIN-64-DYNAMIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _address:
+; DARWIN-64-PIC: 	movq	_callee@GOTPCREL(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+declare void @callee()
+
+define void ()* @laddress() nounwind {
+entry:
+	ret void ()* @lcallee
+; LINUX-64-STATIC: laddress:
+; LINUX-64-STATIC: movl    $lcallee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: laddress:
+; LINUX-32-STATIC: 	movl	$lcallee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: laddress:
+; LINUX-32-PIC: 	movl	$lcallee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: laddress:
+; LINUX-64-PIC: 	movq	lcallee@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _laddress:
+; DARWIN-32-STATIC: 	movl	$_lcallee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _laddress:
+; DARWIN-32-DYNAMIC: 	movl	$_lcallee, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _laddress:
+; DARWIN-32-PIC: 	calll	L134$pb
+; DARWIN-32-PIC-NEXT: L134$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_lcallee-L134$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _laddress:
+; DARWIN-64-STATIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _laddress:
+; DARWIN-64-DYNAMIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _laddress:
+; DARWIN-64-PIC: 	leaq	_lcallee(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void ()* @daddress() nounwind {
+entry:
+	ret void ()* @dcallee
+; LINUX-64-STATIC: daddress:
+; LINUX-64-STATIC: movl    $dcallee, %eax
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: daddress:
+; LINUX-32-STATIC: 	movl	$dcallee, %eax
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: daddress:
+; LINUX-32-PIC: 	movl	$dcallee, %eax
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: daddress:
+; LINUX-64-PIC: 	leaq	dcallee(%rip), %rax
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _daddress:
+; DARWIN-32-STATIC: 	movl	$_dcallee, %eax
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _daddress:
+; DARWIN-32-DYNAMIC: 	movl	$_dcallee, %eax
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _daddress:
+; DARWIN-32-PIC: 	calll	L135$pb
+; DARWIN-32-PIC-NEXT: L135$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	leal	_dcallee-L135$pb(%eax), %eax
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _daddress:
+; DARWIN-64-STATIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _daddress:
+; DARWIN-64-DYNAMIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _daddress:
+; DARWIN-64-PIC: 	leaq	_dcallee(%rip), %rax
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @caller() nounwind {
+entry:
+	call void @callee() nounwind
+	call void @callee() nounwind
+	ret void
+; LINUX-64-STATIC: caller:
+; LINUX-64-STATIC: callq   callee
+; LINUX-64-STATIC: callq   callee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: caller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	callee
+; LINUX-32-STATIC-NEXT: 	calll	callee
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: caller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	callee
+; LINUX-32-PIC-NEXT: 	calll	callee
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: caller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	callee@PLT
+; LINUX-64-PIC-NEXT: 	callq	callee@PLT
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _caller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	_callee
+; DARWIN-32-STATIC-NEXT: 	calll	_callee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _caller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _caller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	calll	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _caller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	_callee
+; DARWIN-64-STATIC-NEXT: 	callq	_callee
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _caller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _caller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	_callee
+; DARWIN-64-PIC-NEXT: 	callq	_callee
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @dcaller() nounwind {
+entry:
+	call void @dcallee() nounwind
+	call void @dcallee() nounwind
+	ret void
+; LINUX-64-STATIC: dcaller:
+; LINUX-64-STATIC: callq   dcallee
+; LINUX-64-STATIC: callq   dcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dcaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	dcallee
+; LINUX-32-STATIC-NEXT: 	calll	dcallee
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dcaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	dcallee
+; LINUX-32-PIC-NEXT: 	calll	dcallee
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dcaller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	dcallee
+; LINUX-64-PIC-NEXT: 	callq	dcallee
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	_dcallee
+; DARWIN-32-STATIC-NEXT: 	calll	_dcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	_dcallee
+; DARWIN-32-PIC-NEXT: 	calll	_dcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dcaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	_dcallee
+; DARWIN-64-STATIC-NEXT: 	callq	_dcallee
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dcaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dcaller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	_dcallee
+; DARWIN-64-PIC-NEXT: 	callq	_dcallee
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @lcaller() nounwind {
+entry:
+	call void @lcallee() nounwind
+	call void @lcallee() nounwind
+	ret void
+; LINUX-64-STATIC: lcaller:
+; LINUX-64-STATIC: callq   lcallee
+; LINUX-64-STATIC: callq   lcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: lcaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	lcallee
+; LINUX-32-STATIC-NEXT: 	calll	lcallee
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: lcaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	lcallee
+; LINUX-32-PIC-NEXT: 	calll	lcallee
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: lcaller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _lcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	_lcallee
+; DARWIN-32-STATIC-NEXT: 	calll	_lcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _lcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _lcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	_lcallee
+; DARWIN-32-PIC-NEXT: 	calll	_lcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _lcaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	_lcallee
+; DARWIN-64-STATIC-NEXT: 	callq	_lcallee
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _lcaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _lcaller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	_lcallee
+; DARWIN-64-PIC-NEXT: 	callq	_lcallee
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @tailcaller() nounwind {
+entry:
+	call void @callee() nounwind
+	ret void
+; LINUX-64-STATIC: tailcaller:
+; LINUX-64-STATIC: callq   callee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: tailcaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	callee
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: tailcaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	callee
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: tailcaller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	callee@PLT
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _tailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	_callee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _tailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	L_callee$stub
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _tailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L_callee$stub
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _tailcaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	_callee
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _tailcaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_callee
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _tailcaller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	_callee
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @dtailcaller() nounwind {
+entry:
+	call void @dcallee() nounwind
+	ret void
+; LINUX-64-STATIC: dtailcaller:
+; LINUX-64-STATIC: callq   dcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dtailcaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	dcallee
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dtailcaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	dcallee
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dtailcaller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	dcallee
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dtailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	_dcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dtailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_dcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dtailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	_dcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dtailcaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	_dcallee
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dtailcaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_dcallee
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dtailcaller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	_dcallee
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ltailcaller() nounwind {
+entry:
+	call void @lcallee() nounwind
+	ret void
+; LINUX-64-STATIC: ltailcaller:
+; LINUX-64-STATIC: callq   lcallee
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ltailcaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	lcallee
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ltailcaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	lcallee
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ltailcaller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	lcallee@PLT
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ltailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	_lcallee
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ltailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	_lcallee
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ltailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	_lcallee
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ltailcaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	_lcallee
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ltailcaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	_lcallee
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ltailcaller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	_lcallee
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @icaller() nounwind {
+entry:
+	%0 = load void ()** @ifunc, align 8
+	call void %0() nounwind
+	%1 = load void ()** @ifunc, align 8
+	call void %1() nounwind
+	ret void
+; LINUX-64-STATIC: icaller:
+; LINUX-64-STATIC: callq   *ifunc
+; LINUX-64-STATIC: callq   *ifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: icaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*ifunc
+; LINUX-32-STATIC-NEXT: 	calll	*ifunc
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: icaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*ifunc
+; LINUX-32-PIC-NEXT: 	calll	*ifunc
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: icaller:
+; LINUX-64-PIC: 	pushq	%rbx
+; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), %rbx
+; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
+; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
+; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _icaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _icaller:
+; DARWIN-32-DYNAMIC: 	pushl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ifunc$non_lazy_ptr, %esi
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	popl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _icaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L142$pb
+; DARWIN-32-PIC-NEXT: L142$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L142$pb(%eax), %esi
+; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _icaller:
+; DARWIN-64-STATIC: 	pushq	%rbx
+; DARWIN-64-STATIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-STATIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	popq	%rbx
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _icaller:
+; DARWIN-64-DYNAMIC: 	pushq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	popq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _icaller:
+; DARWIN-64-PIC: 	pushq	%rbx
+; DARWIN-64-PIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-PIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	popq	%rbx
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @dicaller() nounwind {
+entry:
+	%0 = load void ()** @difunc, align 8
+	call void %0() nounwind
+	%1 = load void ()** @difunc, align 8
+	call void %1() nounwind
+	ret void
+; LINUX-64-STATIC: dicaller:
+; LINUX-64-STATIC: callq   *difunc
+; LINUX-64-STATIC: callq   *difunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: dicaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*difunc
+; LINUX-32-STATIC-NEXT: 	calll	*difunc
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: dicaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*difunc
+; LINUX-32-PIC-NEXT: 	calll	*difunc
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: dicaller:
+; LINUX-64-PIC: 	pushq	%rbx
+; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), %rbx
+; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
+; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
+; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _dicaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	*_difunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_difunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _dicaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _dicaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L143$pb
+; DARWIN-32-PIC-NEXT: L143$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	calll	*_difunc-L143$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*_difunc-L143$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _dicaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _dicaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _dicaller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @licaller() nounwind {
+entry:
+	%0 = load void ()** @lifunc, align 8
+	call void %0() nounwind
+	%1 = load void ()** @lifunc, align 8
+	call void %1() nounwind
+	ret void
+; LINUX-64-STATIC: licaller:
+; LINUX-64-STATIC: callq   *lifunc
+; LINUX-64-STATIC: callq   *lifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: licaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*lifunc
+; LINUX-32-STATIC-NEXT: 	calll	*lifunc
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: licaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*lifunc
+; LINUX-32-PIC-NEXT: 	calll	*lifunc
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: licaller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _licaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _licaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _licaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L144$pb
+; DARWIN-32-PIC-NEXT: L144$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	calll	*_lifunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*_lifunc-L144$pb(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _licaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _licaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _licaller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @itailcaller() nounwind {
+entry:
+	%0 = load void ()** @ifunc, align 8
+	call void %0() nounwind
+	%1 = load void ()** @ifunc, align 8
+	call void %1() nounwind
+	ret void
+; LINUX-64-STATIC: itailcaller:
+; LINUX-64-STATIC: callq   *ifunc
+; LINUX-64-STATIC: callq   *ifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: itailcaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*ifunc
+; LINUX-32-STATIC-NEXT: 	calll	*ifunc
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: itailcaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*ifunc
+; LINUX-32-PIC-NEXT: 	calll	*ifunc
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: itailcaller:
+; LINUX-64-PIC: 	pushq	%rbx
+; LINUX-64-PIC-NEXT: 	movq	ifunc@GOTPCREL(%rip), %rbx
+; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
+; LINUX-64-PIC-NEXT: 	callq	*(%rbx)
+; LINUX-64-PIC-NEXT: 	popq	%rbx
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _itailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	calll	*_ifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _itailcaller:
+; DARWIN-32-DYNAMIC: 	pushl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	movl	L_ifunc$non_lazy_ptr, %esi
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	popl	%esi
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _itailcaller:
+; DARWIN-32-PIC: 	pushl	%esi
+; DARWIN-32-PIC-NEXT: 	subl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L145$pb
+; DARWIN-32-PIC-NEXT: L145$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	movl	L_ifunc$non_lazy_ptr-L145$pb(%eax), %esi
+; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-PIC-NEXT: 	calll	*(%esi)
+; DARWIN-32-PIC-NEXT: 	addl	$8, %esp
+; DARWIN-32-PIC-NEXT: 	popl	%esi
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _itailcaller:
+; DARWIN-64-STATIC: 	pushq	%rbx
+; DARWIN-64-STATIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-STATIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-STATIC-NEXT: 	popq	%rbx
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _itailcaller:
+; DARWIN-64-DYNAMIC: 	pushq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-DYNAMIC-NEXT: 	popq	%rbx
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _itailcaller:
+; DARWIN-64-PIC: 	pushq	%rbx
+; DARWIN-64-PIC-NEXT: 	movq	_ifunc@GOTPCREL(%rip), %rbx
+; DARWIN-64-PIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	callq	*(%rbx)
+; DARWIN-64-PIC-NEXT: 	popq	%rbx
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @ditailcaller() nounwind {
+entry:
+	%0 = load void ()** @difunc, align 8
+	call void %0() nounwind
+	ret void
+; LINUX-64-STATIC: ditailcaller:
+; LINUX-64-STATIC: callq   *difunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: ditailcaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*difunc
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: ditailcaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*difunc
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: ditailcaller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	movq	difunc@GOTPCREL(%rip), %rax
+; LINUX-64-PIC-NEXT: 	callq	*(%rax)
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _ditailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	*_difunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _ditailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_difunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _ditailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L146$pb
+; DARWIN-32-PIC-NEXT: L146$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	calll	*_difunc-L146$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _ditailcaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _ditailcaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_difunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _ditailcaller:
+; DARWIN-64-PIC: 	callq	*_difunc(%rip)
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
+
+define void @litailcaller() nounwind {
+entry:
+	%0 = load void ()** @lifunc, align 8
+	call void %0() nounwind
+	ret void
+; LINUX-64-STATIC: litailcaller:
+; LINUX-64-STATIC: callq   *lifunc
+; LINUX-64-STATIC: ret
+
+; LINUX-32-STATIC: litailcaller:
+; LINUX-32-STATIC: 	subl
+; LINUX-32-STATIC-NEXT: 	calll	*lifunc
+; LINUX-32-STATIC-NEXT: 	addl
+; LINUX-32-STATIC-NEXT: 	ret
+
+; LINUX-32-PIC: litailcaller:
+; LINUX-32-PIC: 	subl
+; LINUX-32-PIC-NEXT: 	calll	*lifunc
+; LINUX-32-PIC-NEXT: 	addl
+
+; LINUX-32-PIC-NEXT: 	ret
+
+; LINUX-64-PIC: litailcaller:
+; LINUX-64-PIC: 	pushq
+; LINUX-64-PIC-NEXT: 	callq	*lifunc(%rip)
+; LINUX-64-PIC-NEXT: 	popq
+; LINUX-64-PIC-NEXT: 	ret
+
+; DARWIN-32-STATIC: _litailcaller:
+; DARWIN-32-STATIC: 	subl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	calll	*_lifunc
+; DARWIN-32-STATIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-STATIC-NEXT: 	ret
+
+; DARWIN-32-DYNAMIC: _litailcaller:
+; DARWIN-32-DYNAMIC: 	subl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	calll	*_lifunc
+; DARWIN-32-DYNAMIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-DYNAMIC-NEXT: 	ret
+
+; DARWIN-32-PIC: _litailcaller:
+; DARWIN-32-PIC: 	subl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	calll	L147$pb
+; DARWIN-32-PIC-NEXT: L147$pb:
+; DARWIN-32-PIC-NEXT: 	popl	%eax
+; DARWIN-32-PIC-NEXT: 	calll	*_lifunc-L147$pb(%eax)
+; DARWIN-32-PIC-NEXT: 	addl	$12, %esp
+; DARWIN-32-PIC-NEXT: 	ret
+
+; DARWIN-64-STATIC: _litailcaller:
+; DARWIN-64-STATIC: 	pushq
+; DARWIN-64-STATIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-STATIC-NEXT: 	popq
+; DARWIN-64-STATIC-NEXT: 	ret
+
+; DARWIN-64-DYNAMIC: _litailcaller:
+; DARWIN-64-DYNAMIC: 	pushq
+; DARWIN-64-DYNAMIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-DYNAMIC-NEXT: 	popq
+; DARWIN-64-DYNAMIC-NEXT: 	ret
+
+; DARWIN-64-PIC: _litailcaller:
+; DARWIN-64-PIC: 	pushq
+; DARWIN-64-PIC-NEXT: 	callq	*_lifunc(%rip)
+; DARWIN-64-PIC-NEXT: 	popq
+; DARWIN-64-PIC-NEXT: 	ret
+}
diff --git a/final/test/CodeGen/X86/add-of-carry.ll b/final/test/CodeGen/X86/add-of-carry.ll
new file mode 100644
index 00000000000..f924ec8132e
--- /dev/null
+++ b/final/test/CodeGen/X86/add-of-carry.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; <rdar://problem/8449754>
+
+define i32 @test1(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+; CHECK: test1:
+; CHECK:	sbbl	%ecx, %ecx
+; CHECK-NOT: addl
+; CHECK: subl	%ecx, %eax
+  %add4 = add i32 %x, %sum
+  %cmp = icmp ult i32 %add4, %x
+  %inc = zext i1 %cmp to i32
+  %z.0 = add i32 %add4, %inc
+  ret i32 %z.0
+}
+
+; Instcombine transforms test1 into test2:
+; CHECK: test2:
+; CHECK: movl
+; CHECK-NEXT: addl
+; CHECK-NEXT: sbbl
+; CHECK-NEXT: subl
+; CHECK-NEXT: ret
+define i32 @test2(i32 %sum, i32 %x) nounwind readnone ssp {
+entry:
+  %uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %sum)
+  %0 = extractvalue { i32, i1 } %uadd, 0
+  %cmp = extractvalue { i32, i1 } %uadd, 1
+  %inc = zext i1 %cmp to i32
+  %z.0 = add i32 %0, %inc
+  ret i32 %z.0
+}
+
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
diff --git a/final/test/CodeGen/X86/add.ll b/final/test/CodeGen/X86/add.ll
new file mode 100644
index 00000000000..62c898025c8
--- /dev/null
+++ b/final/test/CodeGen/X86/add.ll
@@ -0,0 +1,135 @@
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
+
+; The immediate can be encoded in a smaller way if the
+; instruction is a sub instead of an add.
+
+define i32 @test1(i32 inreg %a) nounwind {
+  %b = add i32 %a, 128
+  ret i32 %b
+; X32: subl	$-128, %eax
+; X64: subl $-128, 
+}
+define i64 @test2(i64 inreg %a) nounwind {
+  %b = add i64 %a, 2147483648
+  ret i64 %b
+; X32: addl	$-2147483648, %eax
+; X64: subq	$-2147483648,
+}
+define i64 @test3(i64 inreg %a) nounwind {
+  %b = add i64 %a, 128
+  ret i64 %b
+  
+; X32: addl $128, %eax
+; X64: subq	$-128,
+}
+
+define i1 @test4(i32 %v1, i32 %v2, i32* %X) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  store i32 0, i32* %X
+  br label %overflow
+
+overflow:
+  ret i1 false
+  
+; X32: test4:
+; X32: addl
+; X32-NEXT: jo
+
+; X64:        test4:
+; X64:          addl	%e[[A1:si|dx]], %e[[A0:di|cx]]
+; X64-NEXT:	jo
+}
+
+define i1 @test5(i32 %v1, i32 %v2, i32* %X) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  store i32 0, i32* %X
+  br label %carry
+
+carry:
+  ret i1 false
+
+; X32: test5:
+; X32: addl
+; X32-NEXT: jb
+
+; X64:        test5:
+; X64:          addl	%e[[A1]], %e[[A0]]
+; X64-NEXT:	jb
+}
+
+declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
+
+
+define i64 @test6(i64 %A, i32 %B) nounwind {
+        %tmp12 = zext i32 %B to i64             ; <i64> [#uses=1]
+        %tmp3 = shl i64 %tmp12, 32              ; <i64> [#uses=1]
+        %tmp5 = add i64 %tmp3, %A               ; <i64> [#uses=1]
+        ret i64 %tmp5
+
+; X32: test6:
+; X32:	    movl 12(%esp), %edx
+; X32-NEXT: addl 8(%esp), %edx
+; X32-NEXT: movl 4(%esp), %eax
+; X32-NEXT: ret
+        
+; X64: test6:
+; X64:	shlq	$32, %r[[A1]]
+; X64:	leaq	(%r[[A1]],%r[[A0]]), %rax
+; X64:	ret
+}
+
+define {i32, i1} @test7(i32 %v1, i32 %v2) nounwind {
+   %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)
+   ret {i32, i1} %t
+}
+
+; X64: test7:
+; X64: addl %e[[A1]], %eax
+; X64-NEXT: setb %dl
+; X64-NEXT: ret
+
+; PR5443
+define {i64, i1} @test8(i64 %left, i64 %right) nounwind {
+entry:
+    %extleft = zext i64 %left to i65
+    %extright = zext i64 %right to i65
+    %sum = add i65 %extleft, %extright
+    %res.0 = trunc i65 %sum to i64
+    %overflow = and i65 %sum, -18446744073709551616
+    %res.1 = icmp ne i65 %overflow, 0
+    %final0 = insertvalue {i64, i1} undef, i64 %res.0, 0
+    %final1 = insertvalue {i64, i1} %final0, i1 %res.1, 1
+    ret {i64, i1} %final1
+}
+
+; X64: test8:
+; X64: addq
+; X64-NEXT: sbbq
+; X64-NEXT: testb
+
+define i32 @test9(i32 %x, i32 %y) nounwind readnone {
+  %cmp = icmp eq i32 %x, 10
+  %sub = sext i1 %cmp to i32
+  %cond = add i32 %sub, %y
+  ret i32 %cond
+; X64: test9:
+; X64: cmpl $10
+; X64: sete
+; X64: subl
+; X64: ret
+}
diff --git a/final/test/CodeGen/X86/adde-carry.ll b/final/test/CodeGen/X86/adde-carry.ll
new file mode 100644
index 00000000000..98c4f993431
--- /dev/null
+++ b/final/test/CodeGen/X86/adde-carry.ll
@@ -0,0 +1,26 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s -check-prefix=CHECK-64
+; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=CHECK-32
+
+define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, i64 %c) nounwind {
+entry:
+ %0 = zext i64 %a to i128
+ %1 = zext i64 %b to i128
+ %2 = add i128 %1, %0
+ %3 = zext i64 %c to i128
+ %4 = shl i128 %3, 64
+ %5 = add i128 %4, %2
+ %6 = lshr i128 %5, 64
+ %7 = trunc i128 %6 to i64
+ store i64 %7, i64* %s, align 8
+ %8 = trunc i128 %2 to i64
+ store i64 %8, i64* %t, align 8
+ ret void
+
+; CHECK-32: addl
+; CHECK-32: adcl
+; CHECK-32: adcl $0
+; CHECK-32: adcl $0
+
+; CHECK-64: addq
+; CHECK-64: adcq $0
+}
diff --git a/final/test/CodeGen/X86/addr-label-difference.ll b/final/test/CodeGen/X86/addr-label-difference.ll
new file mode 100644
index 00000000000..49abd8a92e6
--- /dev/null
+++ b/final/test/CodeGen/X86/addr-label-difference.ll
@@ -0,0 +1,26 @@
+; RUN: llc %s -o - | grep {__TEXT,__const}
+; PR5929
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0"
+
+; This array should go into the __TEXT,__const section, not into the
+; __DATA,__const section, because the elements don't need relocations.
+@test.array = internal unnamed_addr constant [3 x i32] [i32 sub (i32 ptrtoint (i8* blockaddress(@test, %foo) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %bar) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32)), i32 sub (i32 ptrtoint (i8* blockaddress(@test, %hack) to i32), i32 ptrtoint (i8* blockaddress(@test, %foo) to i32))] ; <[3 x i32]*> [#uses=1]
+
+define void @test(i32 %i) nounwind ssp {
+entry:
+  call void @test(i32 1)
+  br label %foo
+
+foo:
+  call void @test(i32 1)
+  br label %bar
+
+bar:
+  call void @test(i32 1)
+  br label %hack
+
+hack:
+  call void @test(i32 1)
+  ret void
+}
diff --git a/final/test/CodeGen/X86/aliases.ll b/final/test/CodeGen/X86/aliases.ll
new file mode 100644
index 00000000000..3ed3bd67cef
--- /dev/null
+++ b/final/test/CodeGen/X86/aliases.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=false -o %t
+; RUN: grep { = } %t   | count 16
+; RUN: grep set %t   | count 18
+; RUN: grep globl %t | count 6
+; RUN: grep weak %t  | count 1
+; RUN: grep hidden %t | count 1
+; RUN: grep protected %t | count 1
+
+@bar = external global i32
+@foo1 = alias i32* @bar
+@foo2 = alias i32* @bar
+
+%FunTy = type i32()
+
+declare i32 @foo_f()
+@bar_f = alias weak %FunTy* @foo_f
+
+@bar_i = alias internal i32* @bar
+
+@A = alias bitcast (i32* @bar to i64*)
+
+@bar_h = hidden alias i32* @bar
+
+@bar_p = protected alias i32* @bar
+
+define i32 @test() {
+entry:
+   %tmp = load i32* @foo1
+   %tmp1 = load i32* @foo2
+   %tmp0 = load i32* @bar_i
+   %tmp2 = call i32 @foo_f()
+   %tmp3 = add i32 %tmp, %tmp2
+   %tmp4 = call %FunTy* @bar_f()
+   %tmp5 = add i32 %tmp3, %tmp4
+   %tmp6 = add i32 %tmp1, %tmp5
+   %tmp7 = add i32 %tmp6, %tmp0
+   ret i32 %tmp7
+}
diff --git a/final/test/CodeGen/X86/aligned-comm.ll b/final/test/CodeGen/X86/aligned-comm.ll
new file mode 100644
index 00000000000..7715869ed99
--- /dev/null
+++ b/final/test/CodeGen/X86/aligned-comm.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep {array,16512,7}
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep {array,16512,7}
+
+; Darwin 9+ should get alignment on common symbols.
+@array = common global [4128 x i32] zeroinitializer, align 128
diff --git a/final/test/CodeGen/X86/alignment.ll b/final/test/CodeGen/X86/alignment.ll
new file mode 100644
index 00000000000..9678e6df740
--- /dev/null
+++ b/final/test/CodeGen/X86/alignment.ll
@@ -0,0 +1,43 @@
+; RUN: llc %s -o - -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; This cannot get rounded up to the preferred alignment (16) if they have an
+; explicit alignment specified.
+@GlobalA = global { [384 x i8] } zeroinitializer, align 8 
+
+; CHECK:	.bss
+; CHECK:	.globl	GlobalA
+; CHECK:	.align	16
+; CHECK: GlobalA:
+; CHECK:	.zero	384
+
+; Common variables should not get rounded up to the preferred alignment (16) if
+; they have an explicit alignment specified.
+; PR6921
+@GlobalB = common global { [384 x i8] } zeroinitializer, align 8
+
+; CHECK: 	.comm	GlobalB,384,16
+
+
+@GlobalC = common global { [384 x i8] } zeroinitializer, align 2
+
+; CHECK: 	.comm	GlobalC,384,16
+
+
+
+; This cannot get rounded up to the preferred alignment (16) if they have an
+; explicit alignment specified *and* a section specified.
+@GlobalAS = global { [384 x i8] } zeroinitializer, align 8, section "foo"
+
+; CHECK:	.globl	GlobalAS
+; CHECK:	.align	8
+; CHECK: GlobalAS:
+; CHECK:	.zero	384
+
+; Common variables should not get rounded up to the preferred alignment (16) if
+; they have an explicit alignment specified and a section specified.
+; PR6921
+@GlobalBS = common global { [384 x i8] } zeroinitializer, align 8, section "foo"
+; CHECK: 	.comm	GlobalBS,384,8
+
+@GlobalCS = common global { [384 x i8] } zeroinitializer, align 2, section "foo"
+; CHECK: 	.comm	GlobalCS,384,2
\ No newline at end of file
diff --git a/final/test/CodeGen/X86/all-ones-vector.ll b/final/test/CodeGen/X86/all-ones-vector.ll
new file mode 100644
index 00000000000..10fecadaa02
--- /dev/null
+++ b/final/test/CodeGen/X86/all-ones-vector.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=sse2 | grep pcmpeqd | count 4
+
+define <4 x i32> @ioo() nounwind {
+        ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+}
+define <2 x i64> @loo() nounwind {
+        ret <2 x i64> <i64 -1, i64 -1>
+}
+define <2 x double> @doo() nounwind {
+        ret <2 x double> <double 0xffffffffffffffff, double 0xffffffffffffffff>
+}
+define <4 x float> @foo() nounwind {
+        ret <4 x float> <float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000, float 0xffffffffe0000000>
+}
diff --git a/final/test/CodeGen/X86/alldiv-divdi3.ll b/final/test/CodeGen/X86/alldiv-divdi3.ll
new file mode 100644
index 00000000000..86aa1fde195
--- /dev/null
+++ b/final/test/CodeGen/X86/alldiv-divdi3.ll
@@ -0,0 +1,17 @@
+; Test that, for a 64 bit signed div, a libcall to alldiv is made on Windows
+; unless we have libgcc.
+
+; RUN: llc < %s -mtriple i386-pc-win32 | FileCheck %s
+; RUN: llc < %s -mtriple i386-pc-cygwin | FileCheck %s -check-prefix USEDIVDI
+; RUN: llc < %s -mtriple i386-pc-mingw32 | FileCheck %s -check-prefix USEDIVDI
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
+entry:
+  %conv4 = sext i32 %argc to i64
+  %div = sdiv i64 84, %conv4
+  %conv7 = trunc i64 %div to i32
+  ret i32 %conv7
+}
+
+; CHECK: alldiv
+; USEDIVDI: divdi3
diff --git a/final/test/CodeGen/X86/alloca-align-rounding-32.ll b/final/test/CodeGen/X86/alloca-align-rounding-32.ll
new file mode 100644
index 00000000000..c0f1a18123e
--- /dev/null
+++ b/final/test/CodeGen/X86/alloca-align-rounding-32.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep and | count 1
+
+declare void @bar(<2 x i64>* %n)
+
+define void @foo(i32 %h) {
+  %p = alloca <2 x i64>, i32 %h
+  call void @bar(<2 x i64>* %p)
+  ret void
+}
+
+define void @foo2(i32 %h) {
+  %p = alloca <2 x i64>, i32 %h, align 32
+  call void @bar(<2 x i64>* %p)
+  ret void
+}
diff --git a/final/test/CodeGen/X86/alloca-align-rounding.ll b/final/test/CodeGen/X86/alloca-align-rounding.ll
new file mode 100644
index 00000000000..3c87dbf2bd7
--- /dev/null
+++ b/final/test/CodeGen/X86/alloca-align-rounding.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | grep and | count 1
+
+declare void @bar(<2 x i64>* %n)
+
+define void @foo(i64 %h) {
+  %p = alloca <2 x i64>, i64 %h
+  call void @bar(<2 x i64>* %p)
+  ret void
+}
+
+define void @foo2(i64 %h) {
+  %p = alloca <2 x i64>, i64 %h, align 32
+  call void @bar(<2 x i64>* %p)
+  ret void
+}
diff --git a/final/test/CodeGen/X86/and-or-fold.ll b/final/test/CodeGen/X86/and-or-fold.ll
new file mode 100644
index 00000000000..836b5f1551c
--- /dev/null
+++ b/final/test/CodeGen/X86/and-or-fold.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck -check-prefix=DARWIN %s
+; RUN: opt < %s -O2 | llc -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=DARWIN-OPT %s
+
+; The dag combiner should fold together (x&127)|(y&16711680) -> (x|y)&c1
+; in this case.
+
+define i32 @test1(i32 %x, i16 %y) {
+  %tmp1 = zext i16 %y to i32
+  %tmp2 = and i32 %tmp1, 127
+  %tmp4 = shl i32 %x, 16
+  %tmp5 = and i32 %tmp4, 16711680
+  %tmp6 = or i32 %tmp2, %tmp5
+  ret i32 %tmp6
+; DARWIN: andl $16711807, %eax
+}
+
+; <rdar://problem/7529774> The optimizer shouldn't fold this into (and (or, C),  D)
+; if (C & D) == 0
+define i64 @test2(i64 %x) nounwind readnone ssp {
+entry:
+  %tmp1 = and i64 %x, 123127
+  %tmp2 = or i64 %tmp1, 3
+  ret i64 %tmp2
+; DARWIN-OPT:       andq $123124
+; DARWIN-OPT-NEXT:  leaq 3
+}
diff --git a/final/test/CodeGen/X86/and-su.ll b/final/test/CodeGen/X86/and-su.ll
new file mode 100644
index 00000000000..38db88af12c
--- /dev/null
+++ b/final/test/CodeGen/X86/and-su.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; Don't duplicate the load.
+
+define fastcc i32 @foo(i32* %p) nounwind {
+; CHECK: foo:
+; CHECK: andl $10, %eax
+; CHECK: je
+	%t0 = load i32* %p
+	%t2 = and i32 %t0, 10
+	%t3 = icmp ne i32 %t2, 0
+	br i1 %t3, label %bb63, label %bb76
+bb63:
+	ret i32 %t2
+bb76:
+	ret i32 0
+}
+
+define fastcc double @bar(i32 %hash, double %x, double %y) nounwind {
+entry:
+; CHECK: bar:
+  %0 = and i32 %hash, 15
+  %1 = icmp ult i32 %0, 8
+  br i1 %1, label %bb11, label %bb10
+
+bb10:
+; CHECK: bb10
+; CHECK: testb $1
+  %2 = and i32 %hash, 1
+  %3 = icmp eq i32 %2, 0
+  br i1 %3, label %bb13, label %bb11
+
+bb11:
+  %4 = fsub double -0.000000e+00, %x
+  br label %bb13
+
+bb13:
+; CHECK: bb13
+; CHECK: testb $2
+  %iftmp.9.0 = phi double [ %4, %bb11 ], [ %x, %bb10 ]
+  %5 = and i32 %hash, 2
+  %6 = icmp eq i32 %5, 0
+  br i1 %6, label %bb16, label %bb14
+
+bb14:
+  %7 = fsub double -0.000000e+00, %y
+  br label %bb16
+
+bb16:
+  %iftmp.10.0 = phi double [ %7, %bb14 ], [ %y, %bb13 ]
+  %8 = fadd double %iftmp.9.0, %iftmp.10.0
+  ret double %8
+}
diff --git a/final/test/CodeGen/X86/andimm8.ll b/final/test/CodeGen/X86/andimm8.ll
new file mode 100644
index 00000000000..640237d0b50
--- /dev/null
+++ b/final/test/CodeGen/X86/andimm8.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-pc-linux-gnu -show-mc-encoding | FileCheck %s
+
+; PR8365
+; CHECK: andl	$-64, %edi              # encoding: [0x83,0xe7,0xc0]
+
+define i64 @bra(i32 %zed) nounwind {
+ %t1 = zext i32 %zed to i64
+ %t2 = and i64  %t1, 4294967232
+ ret i64 %t2
+}
+
+; CHECK:  orq     $2, %rdi                # encoding: [0x48,0x83,0xcf,0x02]
+
+define void @foo(i64 %zed, i64* %x) nounwind {
+  %t1 = and i64 %zed, -4
+  %t2 = or i64 %t1, 2
+  store i64 %t2, i64* %x, align 8
+  ret void
+}
diff --git a/final/test/CodeGen/X86/anyext.ll b/final/test/CodeGen/X86/anyext.ll
new file mode 100644
index 00000000000..106fe83661b
--- /dev/null
+++ b/final/test/CodeGen/X86/anyext.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64 | grep movzbl | count 2
+
+; Use movzbl to avoid partial-register updates.
+
+define i32 @foo(i32 %p, i8 zeroext %x) nounwind {
+  %q = trunc i32 %p to i8
+  %r = udiv i8 %q, %x
+  %s = zext i8 %r to i32
+  %t = and i32 %s, 1
+  ret i32 %t
+}
+define i32 @bar(i32 %p, i16 zeroext %x) nounwind {
+  %q = trunc i32 %p to i16
+  %r = udiv i16 %q, %x
+  %s = zext i16 %r to i32
+  %t = and i32 %s, 1
+  ret i32 %t
+}
diff --git a/final/test/CodeGen/X86/apm.ll b/final/test/CodeGen/X86/apm.ll
new file mode 100644
index 00000000000..b514cf6427d
--- /dev/null
+++ b/final/test/CodeGen/X86/apm.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; PR8573
+
+; CHECK: foo:
+; CHECK: leaq    (%rdi), %rax
+; CHECK-NEXT: movl    %esi, %ecx
+; CHECK-NEXT: monitor
+; WIN64: foo:
+; WIN64:      leaq    (%rcx), %rax
+; WIN64-NEXT: movl    %edx, %ecx
+; WIN64-NEXT: movl    %r8d, %edx
+; WIN64-NEXT: monitor
+define void @foo(i8* %P, i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H)
+  ret void
+}
+
+declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind
+
+; CHECK: bar:
+; CHECK: movl    %edi, %ecx
+; CHECK-NEXT: movl    %esi, %eax
+; CHECK-NEXT: mwait
+; WIN64: bar:
+; WIN64:      movl    %edx, %eax
+; WIN64-NEXT: mwait
+define void @bar(i32 %E, i32 %H) nounwind {
+entry:
+  tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H)
+  ret void
+}
+
+declare void @llvm.x86.sse3.mwait(i32, i32) nounwind
diff --git a/final/test/CodeGen/X86/arg-cast.ll b/final/test/CodeGen/X86/arg-cast.ll
new file mode 100644
index 00000000000..c11151446bc
--- /dev/null
+++ b/final/test/CodeGen/X86/arg-cast.ll
@@ -0,0 +1,18 @@
+; This should compile to movl $2147483647, %eax + andl only.
+; RUN: llc < %s | grep andl
+; RUN: llc < %s | not grep movsd
+; RUN: llc < %s | grep esp | not grep add
+; rdar://5736574
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define i32 @foo(double %x) nounwind  {
+entry:
+	%x15 = bitcast double %x to i64		; <i64> [#uses=1]
+	%tmp713 = lshr i64 %x15, 32		; <i64> [#uses=1]
+	%tmp714 = trunc i64 %tmp713 to i32		; <i32> [#uses=1]
+	%tmp8 = and i32 %tmp714, 2147483647		; <i32> [#uses=1]
+	ret i32 %tmp8
+}
+
diff --git a/final/test/CodeGen/X86/asm-block-labels.ll b/final/test/CodeGen/X86/asm-block-labels.ll
new file mode 100644
index 00000000000..a43d4302319
--- /dev/null
+++ b/final/test/CodeGen/X86/asm-block-labels.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -std-compile-opts | llc
+; ModuleID = 'block12.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define void @bar() {
+entry:
+	br label %"LASM$foo"
+
+"LASM$foo":		; preds = %entry
+	call void asm sideeffect ".file \22block12.c\22", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect ".line 1", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect "int $$1", "~{dirflag},~{fpsr},~{flags},~{memory}"( )
+	call void asm sideeffect ".file \22block12.c\22", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect ".line 2", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect "brl ${0:l}", "X,~{dirflag},~{fpsr},~{flags},~{memory}"( label %"LASM$foo" )
+	br label %return
+
+return:		; preds = %"LASM$foo"
+	ret void
+}
+
+define void @baz() {
+entry:
+	call void asm sideeffect ".file \22block12.c\22", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect ".line 3", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect "brl ${0:l}", "X,~{dirflag},~{fpsr},~{flags},~{memory}"( label %"LASM$foo" )
+	call void asm sideeffect ".file \22block12.c\22", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect ".line 4", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect "int $$1", "~{dirflag},~{fpsr},~{flags},~{memory}"( )
+	br label %"LASM$foo"
+
+"LASM$foo":		; preds = %entry
+	call void asm sideeffect ".file \22block12.c\22", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect ".line 5", "~{dirflag},~{fpsr},~{flags}"( )
+	call void asm sideeffect "int $$1", "~{dirflag},~{fpsr},~{flags},~{memory}"( )
+	br label %return
+
+return:		; preds = %"LASM$foo"
+	ret void
+}
diff --git a/final/test/CodeGen/X86/asm-global-imm.ll b/final/test/CodeGen/X86/asm-global-imm.ll
new file mode 100644
index 00000000000..96da224c852
--- /dev/null
+++ b/final/test/CodeGen/X86/asm-global-imm.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -relocation-model=static | \
+; RUN:   grep {test1 \$_GV}
+; RUN: llc < %s -march=x86 -relocation-model=static | \
+; RUN:   grep {test2 _GV}
+; PR882
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin9.0.0d2"
+@GV = weak global i32 0		; <i32*> [#uses=2]
+@str = external global [12 x i8]		; <[12 x i8]*> [#uses=1]
+
+define void @foo() {
+entry:
+	tail call void asm sideeffect "test1 $0", "i,~{dirflag},~{fpsr},~{flags}"( i32* @GV )
+	tail call void asm sideeffect "test2 ${0:c}", "i,~{dirflag},~{fpsr},~{flags}"( i32* @GV )
+	ret void
+}
+
+define void @unknown_bootoption() {
+entry:
+	call void asm sideeffect "ud2\0A\09.word ${0:c}\0A\09.long ${1:c}\0A", "i,i,~{dirflag},~{fpsr},~{flags}"( i32 235, i8* getelementptr ([12 x i8]* @str, i32 0, i64 0) )
+	ret void
+}
diff --git a/final/test/CodeGen/X86/asm-indirect-mem.ll b/final/test/CodeGen/X86/asm-indirect-mem.ll
new file mode 100644
index 00000000000..c57aa995e8a
--- /dev/null
+++ b/final/test/CodeGen/X86/asm-indirect-mem.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s 
+; PR2267
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @atomic_store_rel_int(i32* %p, i32 %v) nounwind  {
+entry:
+	%asmtmp = tail call i32 asm sideeffect "xchgl $1,$0", "=*m,=r,*m,1,~{dirflag},~{fpsr},~{flags}"( i32* %p, i32* %p, i32 %v ) nounwind 		; <i32> [#uses=0]
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/asm-modifier-P.ll b/final/test/CodeGen/X86/asm-modifier-P.ll
new file mode 100644
index 00000000000..6139da8c368
--- /dev/null
+++ b/final/test/CodeGen/X86/asm-modifier-P.ll
@@ -0,0 +1,79 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-unknown-linux-gnu -relocation-model=pic    | FileCheck %s -check-prefix=CHECK-PIC-32
+; RUN: llc < %s -march=x86 -mtriple=i686-unknown-linux-gnu -relocation-model=static | FileCheck %s -check-prefix=CHECK-STATIC-32
+; RUN: llc < %s -march=x86-64 -relocation-model=static | FileCheck %s -check-prefix=CHECK-STATIC-64
+; RUN: llc < %s -march=x86-64 -relocation-model=pic    | FileCheck %s -check-prefix=CHECK-PIC-64
+; PR3379
+; XFAIL: *
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@G = external global i32              ; <i32*> [#uses=1]
+
+declare void @bar(...)
+
+; extern int G;
+; void test1() {
+;  asm("frob %0 x" : : "m"(G));
+;  asm("frob %P0 x" : : "m"(G));
+;}
+
+define void @test1() nounwind {
+entry:
+; P suffix removes (rip) in -static 64-bit mode.
+
+; CHECK-PIC-64: test1:
+; CHECK-PIC-64: movq	G@GOTPCREL(%rip), %rax
+; CHECK-PIC-64: frob (%rax) x
+; CHECK-PIC-64: frob (%rax) x
+
+; CHECK-STATIC-64: test1:
+; CHECK-STATIC-64: frob G(%rip) x
+; CHECK-STATIC-64: frob G x
+
+; CHECK-PIC-32: test1:
+; CHECK-PIC-32: frob G x
+; CHECK-PIC-32: frob G x
+
+; CHECK-STATIC-32: test1:
+; CHECK-STATIC-32: frob G x
+; CHECK-STATIC-32: frob G x
+
+        call void asm "frob $0 x", "*m"(i32* @G) nounwind
+        call void asm "frob ${0:P} x", "*m"(i32* @G) nounwind
+        ret void
+}
+
+define void @test3() nounwind {
+entry:
+; CHECK-STATIC-64: test3:
+; CHECK-STATIC-64: call bar
+; CHECK-STATIC-64: call test3
+; CHECK-STATIC-64: call $bar
+; CHECK-STATIC-64: call $test3
+
+; CHECK-STATIC-32: test3:
+; CHECK-STATIC-32: call bar
+; CHECK-STATIC-32: call test3
+; CHECK-STATIC-32: call $bar
+; CHECK-STATIC-32: call $test3
+
+; CHECK-PIC-64: test3:
+; CHECK-PIC-64: call bar@PLT
+; CHECK-PIC-64: call test3@PLT
+; CHECK-PIC-64: call $bar
+; CHECK-PIC-64: call $test3
+
+; CHECK-PIC-32: test3:
+; CHECK-PIC-32: call bar@PLT
+; CHECK-PIC-32: call test3@PLT
+; CHECK-PIC-32: call $bar
+; CHECK-PIC-32: call $test3
+
+
+; asm(" blah %P0" : : "X"(bar));
+  tail call void asm sideeffect "call ${0:P}", "X"(void (...)* @bar) nounwind
+  tail call void asm sideeffect "call ${0:P}", "X"(void (...)* bitcast (void ()* @test3 to void (...)*)) nounwind
+  tail call void asm sideeffect "call $0", "X"(void (...)* @bar) nounwind
+  tail call void asm sideeffect "call $0", "X"(void (...)* bitcast (void ()* @test3 to void (...)*)) nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/X86/asm-modifier.ll b/final/test/CodeGen/X86/asm-modifier.ll
new file mode 100644
index 00000000000..44f972ec719
--- /dev/null
+++ b/final/test/CodeGen/X86/asm-modifier.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s | FileCheck %s
+; ModuleID = 'asm.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define i32 @test1() nounwind {
+entry:
+; CHECK: test1:
+; CHECK: movw	%gs:6, %ax
+  %asmtmp.i = tail call i16 asm "movw\09%gs:${1:a}, ${0:w}", "=r,ir,~{dirflag},~{fpsr},~{flags}"(i32 6) nounwind ; <i16> [#uses=1]
+  %0 = zext i16 %asmtmp.i to i32                  ; <i32> [#uses=1]
+  ret i32 %0
+}
+
+define zeroext i16 @test2(i32 %address) nounwind {
+entry:
+; CHECK: test2:
+; CHECK: movw	%gs:(%eax), %ax
+  %asmtmp = tail call i16 asm "movw\09%gs:${1:a}, ${0:w}", "=r,ir,~{dirflag},~{fpsr},~{flags}"(i32 %address) nounwind ; <i16> [#uses=1]
+  ret i16 %asmtmp
+}
+
+@n = global i32 42                                ; <i32*> [#uses=3]
+@y = common global i32 0                          ; <i32*> [#uses=3]
+
+define void @test3() nounwind {
+entry:
+; CHECK: test3:
+; CHECK: movl _n, %eax
+  call void asm sideeffect "movl ${0:a}, %eax", "ir,~{dirflag},~{fpsr},~{flags},~{eax}"(i32* @n) nounwind
+  ret void
+}
+
+define void @test4() nounwind {
+entry:
+; CHECK: test4:
+; CHECK: movl	L_y$non_lazy_ptr, %ecx
+; CHECK: movl (%ecx), %eax
+  call void asm sideeffect "movl ${0:a}, %eax", "ir,~{dirflag},~{fpsr},~{flags},~{eax}"(i32* @y) nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/X86/atomic_add.ll b/final/test/CodeGen/X86/atomic_add.ll
new file mode 100644
index 00000000000..26d25e24dfb
--- /dev/null
+++ b/final/test/CodeGen/X86/atomic_add.ll
@@ -0,0 +1,217 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; rdar://7103704
+
+define void @sub1(i32* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub1:
+; CHECK: subl
+	%0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 %v)		; <i32> [#uses=0]
+	ret void
+}
+
+define void @inc4(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc4:
+; CHECK: incq
+	%0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 1)		; <i64> [#uses=0]
+	ret void
+}
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind
+
+define void @add8(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add8:
+; CHECK: addq $2
+	%0 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 2)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @add4(i64* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add4:
+; CHECK: addq
+	%0 = sext i32 %v to i64		; <i64> [#uses=1]
+	%1 = tail call i64 @llvm.atomic.load.add.i64.p0i64(i64* %p, i64 %0)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @inc3(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc3:
+; CHECK: incb
+	%0 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 1)		; <i8> [#uses=0]
+	ret void
+}
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8* nocapture, i8) nounwind
+
+define void @add7(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add7:
+; CHECK: addb $2
+	%0 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 2)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @add3(i8* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add3:
+; CHECK: addb
+	%0 = trunc i32 %v to i8		; <i8> [#uses=1]
+	%1 = tail call i8 @llvm.atomic.load.add.i8.p0i8(i8* %p, i8 %0)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @inc2(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc2:
+; CHECK: incw
+	%0 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 1)		; <i16> [#uses=0]
+	ret void
+}
+
+declare i16 @llvm.atomic.load.add.i16.p0i16(i16* nocapture, i16) nounwind
+
+define void @add6(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add6:
+; CHECK: addw $2
+	%0 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 2)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @add2(i16* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add2:
+; CHECK: addw
+	%0 = trunc i32 %v to i16		; <i16> [#uses=1]
+	%1 = tail call i16 @llvm.atomic.load.add.i16.p0i16(i16* %p, i16 %0)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @inc1(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: inc1:
+; CHECK: incl
+	%0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 1)		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32* nocapture, i32) nounwind
+
+define void @add5(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: add5:
+; CHECK: addl $2
+	%0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 2)		; <i32> [#uses=0]
+	ret void
+}
+
+define void @add1(i32* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: add1:
+; CHECK: addl
+	%0 = tail call i32 @llvm.atomic.load.add.i32.p0i32(i32* %p, i32 %v)		; <i32> [#uses=0]
+	ret void
+}
+
+define void @dec4(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec4:
+; CHECK: decq
+	%0 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 1)		; <i64> [#uses=0]
+	ret void
+}
+
+declare i64 @llvm.atomic.load.sub.i64.p0i64(i64* nocapture, i64) nounwind
+
+define void @sub8(i64* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub8:
+; CHECK: subq $2
+	%0 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 2)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @sub4(i64* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub4:
+; CHECK: subq
+	%0 = sext i32 %v to i64		; <i64> [#uses=1]
+	%1 = tail call i64 @llvm.atomic.load.sub.i64.p0i64(i64* %p, i64 %0)		; <i64> [#uses=0]
+	ret void
+}
+
+define void @dec3(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec3:
+; CHECK: decb
+	%0 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 1)		; <i8> [#uses=0]
+	ret void
+}
+
+declare i8 @llvm.atomic.load.sub.i8.p0i8(i8* nocapture, i8) nounwind
+
+define void @sub7(i8* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub7:
+; CHECK: subb $2
+	%0 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 2)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @sub3(i8* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub3:
+; CHECK: subb
+	%0 = trunc i32 %v to i8		; <i8> [#uses=1]
+	%1 = tail call i8 @llvm.atomic.load.sub.i8.p0i8(i8* %p, i8 %0)		; <i8> [#uses=0]
+	ret void
+}
+
+define void @dec2(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec2:
+; CHECK: decw
+	%0 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 1)		; <i16> [#uses=0]
+	ret void
+}
+
+declare i16 @llvm.atomic.load.sub.i16.p0i16(i16* nocapture, i16) nounwind
+
+define void @sub6(i16* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub6:
+; CHECK: subw $2
+	%0 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 2)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @sub2(i16* nocapture %p, i32 %v) nounwind ssp {
+entry:
+; CHECK: sub2:
+; CHECK: negl
+	%0 = trunc i32 %v to i16		; <i16> [#uses=1]
+	%1 = tail call i16 @llvm.atomic.load.sub.i16.p0i16(i16* %p, i16 %0)		; <i16> [#uses=0]
+	ret void
+}
+
+define void @dec1(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: dec1:
+; CHECK: decl
+	%0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 1)		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32* nocapture, i32) nounwind
+
+define void @sub5(i32* nocapture %p) nounwind ssp {
+entry:
+; CHECK: sub5:
+; CHECK: subl $2
+	%0 = tail call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %p, i32 2)		; <i32> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/X86/atomic_op.ll b/final/test/CodeGen/X86/atomic_op.ll
new file mode 100644
index 00000000000..f3ade93c8a3
--- /dev/null
+++ b/final/test/CodeGen/X86/atomic_op.ll
@@ -0,0 +1,143 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+define void @func(i32 %argc, i8** %argv) nounwind {
+entry:
+	%argc.addr = alloca i32		; <i32*> [#uses=1]
+	%argv.addr = alloca i8**		; <i8***> [#uses=1]
+	%val1 = alloca i32		; <i32*> [#uses=2]
+	%val2 = alloca i32		; <i32*> [#uses=15]
+	%andt = alloca i32		; <i32*> [#uses=2]
+	%ort = alloca i32		; <i32*> [#uses=2]
+	%xort = alloca i32		; <i32*> [#uses=2]
+	%old = alloca i32		; <i32*> [#uses=18]
+	%temp = alloca i32		; <i32*> [#uses=2]
+	store i32 %argc, i32* %argc.addr
+	store i8** %argv, i8*** %argv.addr
+	store i32 0, i32* %val1
+	store i32 31, i32* %val2
+	store i32 3855, i32* %andt
+	store i32 3855, i32* %ort
+	store i32 3855, i32* %xort
+	store i32 4, i32* %temp
+	%tmp = load i32* %temp
+        ; CHECK: lock
+        ; CHECK: xaddl
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %val1, i32 %tmp )		; <i32>:0 [#uses=1]
+	store i32 %0, i32* %old
+        ; CHECK: lock
+        ; CHECK: xaddl
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %val2, i32 30 )		; <i32>:1 [#uses=1]
+	store i32 %1, i32* %old
+        ; CHECK: lock
+        ; CHECK: xaddl
+	call i32 @llvm.atomic.load.add.i32.p0i32( i32* %val2, i32 1 )		; <i32>:2 [#uses=1]
+	store i32 %2, i32* %old
+        ; CHECK: lock
+        ; CHECK: xaddl
+	call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %val2, i32 1 )		; <i32>:3 [#uses=1]
+	store i32 %3, i32* %old
+        ; CHECK: andl
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+	call i32 @llvm.atomic.load.and.i32.p0i32( i32* %andt, i32 4080 )		; <i32>:4 [#uses=1]
+	store i32 %4, i32* %old
+        ; CHECK: orl
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+	call i32 @llvm.atomic.load.or.i32.p0i32( i32* %ort, i32 4080 )		; <i32>:5 [#uses=1]
+	store i32 %5, i32* %old
+        ; CHECK: xorl
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+	call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %xort, i32 4080 )		; <i32>:6 [#uses=1]
+	store i32 %6, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+	call i32 @llvm.atomic.load.min.i32.p0i32( i32* %val2, i32 16 )		; <i32>:7 [#uses=1]
+	store i32 %7, i32* %old
+	%neg = sub i32 0, 1		; <i32> [#uses=1]
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+	call i32 @llvm.atomic.load.min.i32.p0i32( i32* %val2, i32 %neg )		; <i32>:8 [#uses=1]
+	store i32 %8, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+	call i32 @llvm.atomic.load.max.i32.p0i32( i32* %val2, i32 1 )		; <i32>:9 [#uses=1]
+	store i32 %9, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+	call i32 @llvm.atomic.load.max.i32.p0i32( i32* %val2, i32 0 )		; <i32>:10 [#uses=1]
+	store i32 %10, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+	call i32 @llvm.atomic.load.umax.i32.p0i32( i32* %val2, i32 65535 )		; <i32>:11 [#uses=1]
+	store i32 %11, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+	call i32 @llvm.atomic.load.umax.i32.p0i32( i32* %val2, i32 10 )		; <i32>:12 [#uses=1]
+	store i32 %12, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+	call i32 @llvm.atomic.load.umin.i32.p0i32( i32* %val2, i32 1 )		; <i32>:13 [#uses=1]
+	store i32 %13, i32* %old
+        ; CHECK: cmov
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+	call i32 @llvm.atomic.load.umin.i32.p0i32( i32* %val2, i32 10 )		; <i32>:14 [#uses=1]
+	store i32 %14, i32* %old
+        ; CHECK: xchgl   %{{.*}}, {{.*}}(%esp)
+	call i32 @llvm.atomic.swap.i32.p0i32( i32* %val2, i32 1976 )		; <i32>:15 [#uses=1]
+	store i32 %15, i32* %old
+	%neg1 = sub i32 0, 10		; <i32> [#uses=1]
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %val2, i32 %neg1, i32 1 )		; <i32>:16 [#uses=1]
+	store i32 %16, i32* %old
+        ; CHECK: lock
+        ; CHECK: cmpxchgl
+	call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %val2, i32 1976, i32 1 )		; <i32>:17 [#uses=1]
+	store i32 %17, i32* %old
+	ret void
+}
+
+define void @test2(i32 addrspace(256)* nocapture %P) nounwind {
+entry:
+; CHECK: lock
+; CHECK:	cmpxchgl	%{{.*}}, %gs:(%{{.*}})
+
+  %0 = tail call i32 @llvm.atomic.cmp.swap.i32.p256i32(i32 addrspace(256)* %P, i32 0, i32 1)
+  ret void
+}
+
+declare i32 @llvm.atomic.cmp.swap.i32.p256i32(i32 addrspace(256)* nocapture, i32, i32) nounwind
+
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.and.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.or.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.xor.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.min.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.max.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.umax.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.load.umin.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.swap.i32.p0i32(i32*, i32) nounwind 
+
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32*, i32, i32) nounwind 
diff --git a/final/test/CodeGen/X86/attribute-sections.ll b/final/test/CodeGen/X86/attribute-sections.ll
new file mode 100644
index 00000000000..30353346b5c
--- /dev/null
+++ b/final/test/CodeGen/X86/attribute-sections.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+
+declare i32 @foo()
+@G0 = global i32 ()* @foo, section ".init_array"
+
+; LINUX:  .section  .init_array,"aw"
+; LINUX:  .globl G0
+
+@G1 = global i32 ()* @foo, section ".fini_array"
+
+; LINUX:  .section  .fini_array,"aw"
+; LINUX:  .globl G1
+
+@G2 = global i32 ()* @foo, section ".preinit_array"
+
+; LINUX:  .section .preinit_array,"aw"
+; LINUX:  .globl G2
+
diff --git a/final/test/CodeGen/X86/avoid-lea-scale2.ll b/final/test/CodeGen/X86/avoid-lea-scale2.ll
new file mode 100644
index 00000000000..cee2ee4e039
--- /dev/null
+++ b/final/test/CodeGen/X86/avoid-lea-scale2.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK: leal -2({{%rdi,%rdi|%rcx,%rcx}})
+
+define i32 @foo(i32 %x) nounwind readnone {
+  %t0 = shl i32 %x, 1
+  %t1 = add i32 %t0, -2
+  ret i32 %t1
+}
+
diff --git a/final/test/CodeGen/X86/avoid-loop-align-2.ll b/final/test/CodeGen/X86/avoid-loop-align-2.ll
new file mode 100644
index 00000000000..fc9d1f0428f
--- /dev/null
+++ b/final/test/CodeGen/X86/avoid-loop-align-2.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86 | grep align | count 4
+
+; TODO: Is it a good idea to align inner loops? It's hard to know without
+; knowing what their trip counts are, or other dynamic information. For
+; now, CodeGen aligns all loops.
+
+@x = external global i32*		; <i32**> [#uses=1]
+
+define i32 @t(i32 %a, i32 %b) nounwind readonly ssp {
+entry:
+	%0 = icmp eq i32 %a, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb5, label %bb.nph12
+
+bb.nph12:		; preds = %entry
+	%1 = icmp eq i32 %b, 0		; <i1> [#uses=1]
+	%2 = load i32** @x, align 8		; <i32*> [#uses=1]
+	br i1 %1, label %bb2.preheader, label %bb2.preheader.us
+
+bb2.preheader.us:		; preds = %bb2.bb3_crit_edge.us, %bb.nph12
+	%indvar18 = phi i32 [ 0, %bb.nph12 ], [ %indvar.next19, %bb2.bb3_crit_edge.us ]		; <i32> [#uses=2]
+	%sum.111.us = phi i32 [ 0, %bb.nph12 ], [ %4, %bb2.bb3_crit_edge.us ]		; <i32> [#uses=0]
+	%tmp16 = mul i32 %indvar18, %a		; <i32> [#uses=1]
+	br label %bb1.us
+
+bb1.us:		; preds = %bb1.us, %bb2.preheader.us
+	%indvar = phi i32 [ 0, %bb2.preheader.us ], [ %indvar.next, %bb1.us ]		; <i32> [#uses=2]
+	%tmp17 = add i32 %indvar, %tmp16		; <i32> [#uses=1]
+	%tmp. = zext i32 %tmp17 to i64		; <i64> [#uses=1]
+	%3 = getelementptr i32* %2, i64 %tmp.		; <i32*> [#uses=1]
+	%4 = load i32* %3, align 4		; <i32> [#uses=2]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %b		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb2.bb3_crit_edge.us, label %bb1.us
+
+bb2.bb3_crit_edge.us:		; preds = %bb1.us
+	%indvar.next19 = add i32 %indvar18, 1		; <i32> [#uses=2]
+	%exitcond22 = icmp eq i32 %indvar.next19, %a		; <i1> [#uses=1]
+	br i1 %exitcond22, label %bb5, label %bb2.preheader.us
+
+bb2.preheader:		; preds = %bb2.preheader, %bb.nph12
+	%indvar24 = phi i32 [ %indvar.next25, %bb2.preheader ], [ 0, %bb.nph12 ]		; <i32> [#uses=1]
+	%indvar.next25 = add i32 %indvar24, 1		; <i32> [#uses=2]
+	%exitcond28 = icmp eq i32 %indvar.next25, %a		; <i1> [#uses=1]
+	br i1 %exitcond28, label %bb5, label %bb2.preheader
+
+bb5:		; preds = %bb2.preheader, %bb2.bb3_crit_edge.us, %entry
+	%sum.1.lcssa = phi i32 [ 0, %entry ], [ 0, %bb2.preheader ], [ %4, %bb2.bb3_crit_edge.us ]		; <i32> [#uses=1]
+	ret i32 %sum.1.lcssa
+}
diff --git a/final/test/CodeGen/X86/avoid-loop-align.ll b/final/test/CodeGen/X86/avoid-loop-align.ll
new file mode 100644
index 00000000000..7957db72fe6
--- /dev/null
+++ b/final/test/CodeGen/X86/avoid-loop-align.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+
+; CodeGen should align the top of the loop, which differs from the loop
+; header in this case.
+
+; CHECK: jmp LBB0_2
+; CHECK: .align
+; CHECK: LBB0_1:
+
+@A = common global [100 x i32] zeroinitializer, align 32		; <[100 x i32]*> [#uses=1]
+
+define i8* @test(i8* %Q, i32* %L) nounwind {
+entry:
+	%tmp = tail call i32 (...)* @foo() nounwind		; <i32> [#uses=2]
+	%tmp1 = inttoptr i32 %tmp to i8*		; <i8*> [#uses=1]
+	br label %bb1
+
+bb:		; preds = %bb1, %bb1
+	%indvar.next = add i32 %P.0.rec, 1		; <i32> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%P.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%P.0 = getelementptr i8* %tmp1, i32 %P.0.rec		; <i8*> [#uses=3]
+	%tmp2 = load i8* %P.0, align 1		; <i8> [#uses=1]
+	switch i8 %tmp2, label %bb4 [
+		i8 12, label %bb
+		i8 42, label %bb
+	]
+
+bb4:		; preds = %bb1
+	%tmp3 = ptrtoint i8* %P.0 to i32		; <i32> [#uses=1]
+	%tmp4 = sub i32 %tmp3, %tmp		; <i32> [#uses=1]
+	%tmp5 = getelementptr [100 x i32]* @A, i32 0, i32 %tmp4		; <i32*> [#uses=1]
+	store i32 4, i32* %tmp5, align 4
+	ret i8* %P.0
+}
+
+declare i32 @foo(...)
diff --git a/final/test/CodeGen/X86/avx-128.ll b/final/test/CodeGen/X86/avx-128.ll
new file mode 100644
index 00000000000..2bd3b5dfedd
--- /dev/null
+++ b/final/test/CodeGen/X86/avx-128.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s
+
+@z = common global <4 x float> zeroinitializer, align 16
+
+define void @zero() nounwind ssp {
+entry:
+  ; CHECK: vxorps
+  ; CHECK: vmovaps
+  store <4 x float> zeroinitializer, <4 x float>* @z, align 16
+  ret void
+}
+
diff --git a/final/test/CodeGen/X86/avx-256.ll b/final/test/CodeGen/X86/avx-256.ll
new file mode 100644
index 00000000000..20d31e73885
--- /dev/null
+++ b/final/test/CodeGen/X86/avx-256.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s
+
+@x = common global <8 x float> zeroinitializer, align 32
+@y = common global <4 x double> zeroinitializer, align 32
+
+define void @zero() nounwind ssp {
+entry:
+  ; CHECK: vxorps
+  ; CHECK: vmovaps
+  ; CHECK: vmovaps
+  store <8 x float> zeroinitializer, <8 x float>* @x, align 32
+  store <4 x double> zeroinitializer, <4 x double>* @y, align 32
+  ret void
+}
+
diff --git a/final/test/CodeGen/X86/avx-intrinsics-x86.ll b/final/test/CodeGen/X86/avx-intrinsics-x86.ll
new file mode 100644
index 00000000000..6c32396a417
--- /dev/null
+++ b/final/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -0,0 +1,2578 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s
+
+define <2 x i64> @test_x86_aesni_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vaesdec
+  %res = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vaesdeclast
+  %res = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vaesenc
+  %res = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vaesenclast
+  %res = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aesimc(<2 x i64> %a0) {
+  ; CHECK: vaesimc
+  %res = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
+
+
+define <2 x i64> @test_x86_aesni_aeskeygenassist(<2 x i64> %a0) {
+  ; CHECK: vaeskeygenassist
+  %res = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vaddsd
+  %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcmpordpd
+  %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcmpordsd
+  %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: sbbl    %eax, %eax
+  ; CHECK: andl    $1, %eax
+  %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vcomisd
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
+  ; CHECK: vcvtdq2pd
+  %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
+  ; CHECK: vcvtdq2ps
+  %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) {
+  ; CHECK: vcvtpd2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) {
+  ; CHECK: vcvtpd2ps
+  %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) {
+  ; CHECK: vcvtps2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
+  ; CHECK: vcvtps2pd
+  %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) {
+  ; CHECK: vcvtsd2si
+  %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) {
+  ; CHECK: vcvtsd2ss
+  %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) {
+  ; CHECK: movl
+  ; CHECK: vcvtsi2sd
+  %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
+  ; CHECK: vcvtss2sd
+  %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) {
+  ; CHECK: vcvttpd2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) {
+  ; CHECK: vcvttps2dq
+  %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) {
+  ; CHECK: vcvttsd2si
+  %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vdivsd
+  %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_loadu_dq(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vmovdqu
+  %res = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readonly
+
+
+define <2 x double> @test_x86_sse2_loadu_pd(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vmovupd
+  %res = call <2 x double> @llvm.x86.sse2.loadu.pd(i8* %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.loadu.pd(i8*) nounwind readonly
+
+
+define void @test_x86_sse2_maskmov_dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) {
+  ; CHECK: pushl
+  ; CHECK: movl
+  ; CHECK: vmaskmovdqu
+  ; CHECK: popl
+  call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2)
+  ret void
+}
+declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
+
+
+define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vmaxpd
+  %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vmaxsd
+  %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vminpd
+  %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vminsd
+  %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) {
+  ; CHECK: vmovmskpd
+  %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
+
+
+define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovntdq
+  call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
+
+
+define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovntpd
+  call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
+
+
+define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vmulsd
+  %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpackssdw
+  %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpacksswb
+  %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpackuswb
+  %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpaddsb
+  %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpaddsw
+  %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpaddusb
+  %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpaddusw
+  %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpavgb
+  %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpavgw
+  %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pcmpeq_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpeqb
+  %res = call <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pcmpeq_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcmpeqd
+  %res = call <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pcmpeq.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pcmpeq_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcmpeqw
+  %res = call <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pcmpeq.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pcmpgt_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpgtb
+  %res = call <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pcmpgt.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pcmpgt_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpcmpgtd
+  %res = call <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pcmpgt_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpcmpgtw
+  %res = call <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pcmpgt.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmaddwd
+  %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmaxsw
+  %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpmaxub
+  %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpminsw
+  %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpminub
+  %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
+  ; CHECK: vpmovmskb
+  %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmulhw
+  %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmulhuw
+  %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpmuludq
+  %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpsadbw
+  %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpslld
+  %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
+  ; CHECK: vpslldq
+  %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
+  ; CHECK: vpslldq
+  %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsllq
+  %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsllw
+  %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) {
+  ; CHECK: vpslld
+  %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) {
+  ; CHECK: vpsllq
+  %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) {
+  ; CHECK: vpsllw
+  %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsrad
+  %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsraw
+  %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) {
+  ; CHECK: vpsrad
+  %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) {
+  ; CHECK: vpsraw
+  %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsrld
+  %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
+  ; CHECK: vpsrldq
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
+  ; CHECK: vpsrldq
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpsrlq
+  %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsrlw
+  %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) {
+  ; CHECK: vpsrld
+  %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) {
+  ; CHECK: vpsrlq
+  %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) {
+  ; CHECK: vpsrlw
+  %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpsubsb
+  %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsubsw
+  %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpsubusb
+  %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsubusw
+  %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
+  ; CHECK: vsqrtpd
+  %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
+  ; CHECK: vsqrtsd
+  %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+
+
+define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovq
+  call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
+
+
+define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovdqu
+  call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
+
+
+define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovupd
+  call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1)
+  ret void
+}
+declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
+
+
+define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vsubsd
+  %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vucomisd
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vaddsubpd
+  %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vaddsubps
+  %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vhaddpd
+  %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vhaddps
+  %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vhsubpd
+  %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vhsubps
+  %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vlddqu
+  %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly
+
+
+define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vblendpd
+  %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vblendps
+  %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: vblendvpd
+  %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: vblendvps
+  %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vdppd
+  %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vdpps
+  %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vinsertps
+  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_movntdqa(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vmovntdqa
+  %res = call <2 x i64> @llvm.x86.sse41.movntdqa(i8* %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readonly
+
+
+define <16 x i8> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vmpsadbw
+  %res = call <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i32 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i32) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpackusdw
+  %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
+  ; CHECK: vpblendvb
+  %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpblendw
+  %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i32 7) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i32) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcmpeqq
+  %res = call <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pcmpeqq(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
+  ; CHECK: vphminposuw
+  %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpmaxsb
+  %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpmaxsd
+  %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpmaxud
+  %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmaxuw
+  %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpminsb
+  %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpminsd
+  %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpminud
+  %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpminuw
+  %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
+  ; CHECK: vpmovsxbw
+  %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
+  ; CHECK: vpmovsxdq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
+  ; CHECK: vpmovsxwd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
+  ; CHECK: vpmovsxwq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
+  ; CHECK: vpmovzxbw
+  %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
+  ; CHECK: vpmovzxdq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
+  ; CHECK: vpmovzxwd
+  %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
+  ; CHECK: vpmovzxwq
+  %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpmuldq
+  %res = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestc(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vptest 
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.sse41.ptestc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestnzc(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vptest 
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse41_ptestz(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vptest 
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse41.ptestz(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
+  ; CHECK: vroundpd
+  %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
+  ; CHECK: vroundps
+  %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
+
+
+define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vroundsd
+  %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
+
+
+define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vroundss
+  %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestri128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestria128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse42_pcmpestrm128(<16 x i8> %a0, <16 x i8> %a2) {
+  ; CHECK: movl
+  ; CHECK: movl
+  ; CHECK: vpcmpestrm
+  %res = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse42_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
+  ; CHECK: vpcmpgtq
+  %res = call <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse42.pcmpgtq(<2 x i64>, <2 x i64>) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistri128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistria128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistric128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistri
+  ; CHECK: movl
+  %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define <16 x i8> @test_x86_sse42_pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpcmpistrm
+  %res = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vaddss
+  %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcmpordps
+  %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcmpordss
+  %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone
+
+
+define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: sbb
+  %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vcomiss
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) {
+  ; CHECK: movl
+  ; CHECK: vcvtsi2ss
+  %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone
+
+
+define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) {
+  ; CHECK: vcvtss2si
+  %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) {
+  ; CHECK: vcvttss2si
+  %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vdivss
+  %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_ldmxcsr(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vldmxcsr
+  call void @llvm.x86.sse.ldmxcsr(i8* %a0)
+  ret void
+}
+declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind
+
+
+define <4 x float> @test_x86_sse_loadu_ps(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vmovups
+  %res = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readonly
+
+
+define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vmaxps
+  %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vmaxss
+  %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vminps
+  %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vminss
+  %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) {
+  ; CHECK: vmovmskps
+  %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_movnt_ps(i8* %a0, <4 x float> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovntps
+  call void @llvm.x86.sse.movnt.ps(i8* %a0, <4 x float> %a1)
+  ret void
+}
+declare void @llvm.x86.sse.movnt.ps(i8*, <4 x float>) nounwind
+
+
+define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vmulss
+  %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) {
+  ; CHECK: vrcpps
+  %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) {
+  ; CHECK: vrcpss
+  %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) {
+  ; CHECK: vrsqrtps
+  %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) {
+  ; CHECK: vrsqrtss
+  %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) {
+  ; CHECK: vsqrtps
+  %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) {
+  ; CHECK: vsqrtss
+  %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
+
+
+define void @test_x86_sse_stmxcsr(i8* %a0) {
+  ; CHECK: movl
+  ; CHECK: vstmxcsr
+  call void @llvm.x86.sse.stmxcsr(i8* %a0)
+  ret void
+}
+declare void @llvm.x86.sse.stmxcsr(i8*) nounwind
+
+
+define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
+  ; CHECK: movl
+  ; CHECK: vmovups
+  call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
+  ret void
+}
+declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
+
+
+define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vsubss
+  %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: setae
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: setbe
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vucomiss
+  ; CHECK: setne
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone
+
+
+define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) {
+  ; CHECK: vpabsb
+  %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) {
+  ; CHECK: vpabsd
+  %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) {
+  ; CHECK: vpabsw
+  %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vphaddd
+  %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_phadd_sw_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vphaddsw
+  %res = call <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phadd.sw.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vphaddw
+  %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vphsubd
+  %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vphsubsw
+  %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vphsubw
+  %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmaddubsw
+  %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpmulhrsw
+  %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpshufb
+  %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
+  ; CHECK: vpsignb
+  %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
+
+
+define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vpsignd
+  %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
+  ; CHECK: vpsignw
+  %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_addsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vaddsubpd
+  %res = call <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_addsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vaddsubps
+  %res = call <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vblendpd
+  %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vblendps
+  %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_blendv_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
+  ; CHECK: vblendvpd
+  %res = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_blendv_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
+  ; CHECK: vblendvps
+  %res = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_cmp_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vcmpordpd
+  %res = call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vcmpordps
+  %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_avx_cvt_pd2_ps_256(<4 x double> %a0) {
+  ; CHECK: vcvtpd2psy
+  %res = call <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double> %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.cvt.pd2.ps.256(<4 x double>) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx_cvt_pd2dq_256(<4 x double> %a0) {
+  ; CHECK: vcvtpd2dqy
+  %res = call <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx.cvt.pd2dq.256(<4 x double>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
+  ; CHECK: vcvtps2pd
+  %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_cvt_ps2dq_256(<8 x float> %a0) {
+  ; CHECK: vcvtps2dq
+  %res = call <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.cvt.ps2dq.256(<8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
+  ; CHECK: vcvtdq2pd
+  %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
+  ; CHECK: vcvtdq2ps
+  %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx_cvtt_pd2dq_256(<4 x double> %a0) {
+  ; CHECK: vcvttpd2dqy
+  %res = call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> %a0) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_cvtt_ps2dq_256(<8 x float> %a0) {
+  ; CHECK: vcvttps2dq
+  %res = call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> %a0) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vdpps
+  %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_hadd_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vhaddpd
+  %res = call <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_hadd_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vhaddps
+  %res = call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_hsub_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vhsubpd
+  %res = call <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_hsub_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vhsubps
+  %res = call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define <32 x i8> @test_x86_avx_ldu_dq_256(i8* %a0) {
+  ; CHECK: vlddqu
+  %res = call <32 x i8> @llvm.x86.avx.ldu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly
+
+
+define <32 x i8> @test_x86_avx_loadu_dq_256(i8* %a0) {
+  ; CHECK: vmovdqu
+  %res = call <32 x i8> @llvm.x86.avx.loadu.dq.256(i8* %a0) ; <<32 x i8>> [#uses=1]
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx.loadu.dq.256(i8*) nounwind readonly
+
+
+define <4 x double> @test_x86_avx_loadu_pd_256(i8* %a0) {
+  ; CHECK: vmovupd
+  %res = call <4 x double> @llvm.x86.avx.loadu.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.loadu.pd.256(i8*) nounwind readonly
+
+
+define <8 x float> @test_x86_avx_loadu_ps_256(i8* %a0) {
+  ; CHECK: vmovups
+  %res = call <8 x float> @llvm.x86.avx.loadu.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.loadu.ps.256(i8*) nounwind readonly
+
+
+define <2 x double> @test_x86_avx_maskload_pd(i8* %a0, <2 x double> %a1) {
+  ; CHECK: vmaskmovpd
+  %res = call <2 x double> @llvm.x86.avx.maskload.pd(i8* %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x double>) nounwind readonly
+
+
+define <4 x double> @test_x86_avx_maskload_pd_256(i8* %a0, <4 x double> %a1) {
+  ; CHECK: vmaskmovpd
+  %res = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x double>) nounwind readonly
+
+
+define <4 x float> @test_x86_avx_maskload_ps(i8* %a0, <4 x float> %a1) {
+  ; CHECK: vmaskmovps
+  %res = call <4 x float> @llvm.x86.avx.maskload.ps(i8* %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x float>) nounwind readonly
+
+
+define <8 x float> @test_x86_avx_maskload_ps_256(i8* %a0, <8 x float> %a1) {
+  ; CHECK: vmaskmovps
+  %res = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x float>) nounwind readonly
+
+
+define void @test_x86_avx_maskstore_pd(i8* %a0, <2 x double> %a1, <2 x double> %a2) {
+  ; CHECK: vmaskmovpd
+  call void @llvm.x86.avx.maskstore.pd(i8* %a0, <2 x double> %a1, <2 x double> %a2)
+  ret void
+}
+declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x double>, <2 x double>) nounwind
+
+
+define void @test_x86_avx_maskstore_pd_256(i8* %a0, <4 x double> %a1, <4 x double> %a2) {
+  ; CHECK: vmaskmovpd
+  call void @llvm.x86.avx.maskstore.pd.256(i8* %a0, <4 x double> %a1, <4 x double> %a2)
+  ret void
+}
+declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x double>, <4 x double>) nounwind
+
+
+define void @test_x86_avx_maskstore_ps(i8* %a0, <4 x float> %a1, <4 x float> %a2) {
+  ; CHECK: vmaskmovps
+  call void @llvm.x86.avx.maskstore.ps(i8* %a0, <4 x float> %a1, <4 x float> %a2)
+  ret void
+}
+declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x float>, <4 x float>) nounwind
+
+
+define void @test_x86_avx_maskstore_ps_256(i8* %a0, <8 x float> %a1, <8 x float> %a2) {
+  ; CHECK: vmaskmovps
+  call void @llvm.x86.avx.maskstore.ps.256(i8* %a0, <8 x float> %a1, <8 x float> %a2)
+  ret void
+}
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x float>, <8 x float>) nounwind
+
+
+define <4 x double> @test_x86_avx_max_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vmaxpd
+  %res = call <4 x double> @llvm.x86.avx.max.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_max_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vmaxps
+  %res = call <8 x float> @llvm.x86.avx.max.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_min_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vminpd
+  %res = call <4 x double> @llvm.x86.avx.min.pd.256(<4 x double> %a0, <4 x double> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_min_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vminps
+  %res = call <8 x float> @llvm.x86.avx.min.ps.256(<8 x float> %a0, <8 x float> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_movmsk_pd_256(<4 x double> %a0) {
+  ; CHECK: vmovmskpd
+  %res = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_movmsk_ps_256(<8 x float> %a0) {
+  ; CHECK: vmovmskps
+  %res = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
+
+
+define void @test_x86_avx_movnt_dq_256(i8* %a0, <4 x i64> %a1) {
+  ; CHECK: vmovntdq
+  call void @llvm.x86.avx.movnt.dq.256(i8* %a0, <4 x i64> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.movnt.dq.256(i8*, <4 x i64>) nounwind
+
+
+define void @test_x86_avx_movnt_pd_256(i8* %a0, <4 x double> %a1) {
+  ; CHECK: vmovntpd
+  call void @llvm.x86.avx.movnt.pd.256(i8* %a0, <4 x double> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.movnt.pd.256(i8*, <4 x double>) nounwind
+
+
+define void @test_x86_avx_movnt_ps_256(i8* %a0, <8 x float> %a1) {
+  ; CHECK: vmovntps
+  call void @llvm.x86.avx.movnt.ps.256(i8* %a0, <8 x float> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.movnt.ps.256(i8*, <8 x float>) nounwind
+
+
+define i32 @test_x86_avx_ptestc_256(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vptest
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vptest
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) {
+  ; CHECK: vptest
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.ptestz.256(<4 x i64>, <4 x i64>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_rcp_ps_256(<8 x float> %a0) {
+  ; CHECK: vrcpps
+  %res = call <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_round_pd_256(<4 x double> %a0) {
+  ; CHECK: vroundpd
+  %res = call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %a0, i32 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_round_ps_256(<8 x float> %a0) {
+  ; CHECK: vroundps
+  %res = call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %a0, i32 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_rsqrt_ps_256(<8 x float> %a0) {
+  ; CHECK: vrsqrtps
+  %res = call <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
+  ; CHECK: vsqrtpd
+  %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
+  ; CHECK: vsqrtps
+  %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
+
+
+define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
+  ; CHECK: vmovdqu
+  call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
+
+
+define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
+  ; CHECK: vmovupd
+  call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
+
+
+define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
+  ; CHECK: vmovups
+  call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
+  ret void
+}
+declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
+
+
+define <4 x double> @test_x86_avx_vbroadcast_sd_256(i8* %a0) {
+  ; CHECK: vbroadcastsd
+  %res = call <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8* %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vbroadcast.sd.256(i8*) nounwind readonly
+
+
+define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
+  ; CHECK: vbroadcastf128
+  %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
+
+
+define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
+  ; CHECK: vbroadcastf128
+  %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
+
+
+define <4 x float> @test_x86_avx_vbroadcastss(i8* %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <4 x float> @llvm.x86.avx.vbroadcastss(i8* %a0) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vbroadcastss(i8*) nounwind readonly
+
+
+define <8 x float> @test_x86_avx_vbroadcastss_256(i8* %a0) {
+  ; CHECK: vbroadcastss
+  %res = call <8 x float> @llvm.x86.avx.vbroadcastss.256(i8* %a0) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vbroadcastss.256(i8*) nounwind readonly
+
+
+define <2 x double> @test_x86_avx_vextractf128_pd_256(<4 x double> %a0) {
+  ; CHECK: vextractf128
+  %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_avx_vextractf128_ps_256(<8 x float> %a0) {
+  ; CHECK: vextractf128
+  %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
+
+
+define <4 x i32> @test_x86_avx_vextractf128_si_256(<8 x i32> %a0) {
+  ; CHECK: vextractf128
+  %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 7) ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_vinsertf128_pd_256(<4 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vinsertf128
+  %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_vinsertf128_ps_256(<8 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vinsertf128
+  %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_vinsertf128_si_256(<8 x i32> %a0, <4 x i32> %a1) {
+  ; CHECK: vinsertf128
+  %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vperm2f128
+  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vperm2f128
+  %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
+  ; CHECK: vperm2f128
+  %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
+  ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
+  ; CHECK: vpermilpd
+  %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 7) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
+  ; CHECK: vpermilpd
+  %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
+
+
+define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
+  ; CHECK: vpermilps
+  %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
+  ; CHECK: vpermilps
+  %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
+
+
+define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) {
+  ; CHECK: vpermilpd
+  %res = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone
+
+
+define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
+  ; CHECK: vpermilpd
+  %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %a1) ; <<4 x double>> [#uses=1]
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone
+
+
+define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) {
+  ; CHECK: vpermilps
+  %res = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone
+
+
+define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
+  ; CHECK: vpermilps
+  %res = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestc_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestc_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestc_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestc_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: sbbl
+  %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: seta
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) {
+  ; CHECK: vtestpd
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone
+
+
+define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) {
+  ; CHECK: vtestps
+  ; CHECK: sete
+  ; CHECK: movzbl
+  %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; <i32> [#uses=1]
+  ret i32 %res
+}
+declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readnone
+
+
+define void @test_x86_avx_vzeroall() {
+  ; CHECK: vzeroall
+  call void @llvm.x86.avx.vzeroall()
+  ret void
+}
+declare void @llvm.x86.avx.vzeroall() nounwind
+
+
+define void @test_x86_avx_vzeroupper() {
+  ; CHECK: vzeroupper
+  call void @llvm.x86.avx.vzeroupper()
+  ret void
+}
+declare void @llvm.x86.avx.vzeroupper() nounwind
+
+
diff --git a/final/test/CodeGen/X86/avx-intrinsics-x86_64.ll b/final/test/CodeGen/X86/avx-intrinsics-x86_64.ll
new file mode 100644
index 00000000000..5a466fc3250
--- /dev/null
+++ b/final/test/CodeGen/X86/avx-intrinsics-x86_64.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86-64 -mcpu=corei7 -mattr=avx | FileCheck %s
+
+define i64 @test_x86_sse2_cvtsd2si64(<2 x double> %a0) {
+  ; CHECK: vcvtsd2si
+  %res = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
+
+
+define <2 x double> @test_x86_sse2_cvtsi642sd(<2 x double> %a0, i64 %a1) {
+  ; CHECK: vcvtsi2sd
+  %res = call <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double> %a0, i64 %a1) ; <<2 x double>> [#uses=1]
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.sse2.cvtsi642sd(<2 x double>, i64) nounwind readnone
+
+
+define i64 @test_x86_sse2_cvttsd2si64(<2 x double> %a0) {
+  ; CHECK: vcvttsd2si
+  %res = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
+
+
+define i64 @test_x86_sse_cvtss2si64(<4 x float> %a0) {
+  ; CHECK: vcvtss2si
+  %res = call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
+
+
+define <4 x float> @test_x86_sse_cvtsi642ss(<4 x float> %a0, i64 %a1) {
+  ; CHECK: vcvtsi2ss
+  %res = call <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float> %a0, i64 %a1) ; <<4 x float>> [#uses=1]
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.sse.cvtsi642ss(<4 x float>, i64) nounwind readnone
+
+
+define i64 @test_x86_sse_cvttss2si64(<4 x float> %a0) {
+  ; CHECK: vcvttss2si
+  %res = call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %a0) ; <i64> [#uses=1]
+  ret i64 %res
+}
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+
+
diff --git a/final/test/CodeGen/X86/barrier-sse.ll b/final/test/CodeGen/X86/barrier-sse.ll
new file mode 100644
index 00000000000..6190c3684ed
--- /dev/null
+++ b/final/test/CodeGen/X86/barrier-sse.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep sfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep mfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep MEMBARRIER
+
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+	call void @llvm.memory.barrier( i1 true, i1 true,  i1 false, i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 true, i1 false, i1 true,  i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true,  i1 false)
+
+	call void @llvm.memory.barrier( i1 true, i1 true,  i1 true,  i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 true, i1 true,  i1 false, i1 true,  i1 false)
+	call void @llvm.memory.barrier( i1 true, i1 false, i1 true,  i1 true,  i1 false)
+
+	call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 false)
+	call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 false)
+	ret void
+}
diff --git a/final/test/CodeGen/X86/barrier.ll b/final/test/CodeGen/X86/barrier.ll
new file mode 100644
index 00000000000..fad6ef690c2
--- /dev/null
+++ b/final/test/CodeGen/X86/barrier.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mattr=-sse2 | grep lock
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+	call void @llvm.memory.barrier( i1 true, i1 true,  i1 false, i1 false, i1 false)
+	ret void
+}
\ No newline at end of file
diff --git a/final/test/CodeGen/X86/bc-extract.ll b/final/test/CodeGen/X86/bc-extract.ll
new file mode 100644
index 00000000000..ac972a8e2e5
--- /dev/null
+++ b/final/test/CodeGen/X86/bc-extract.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 |  FileCheck %s
+
+
+define float @extractFloat1() nounwind {
+entry:
+  ; CHECK: 1065353216
+  %tmp0 = bitcast <1 x double> <double 0x000000003F800000> to <2 x float>
+  %tmp1 = extractelement <2 x float> %tmp0, i32 0 
+  ret float %tmp1
+}
+
+define float @extractFloat2() nounwind {
+entry:
+  ; CHECK: pxor	%xmm0, %xmm0
+  %tmp4 = bitcast <1 x double> <double 0x000000003F800000> to <2 x float>
+  %tmp5 = extractelement <2 x float> %tmp4, i32 1
+  ret float %tmp5
+}
+
+define i32 @extractInt2() nounwind {
+entry:
+  ; CHECK: xorl	%eax, %eax
+  %tmp4 = bitcast <1 x i64> <i64 256> to <2 x i32>
+  %tmp5 = extractelement <2 x i32> %tmp4, i32 1
+  ret i32 %tmp5
+}
+
diff --git a/final/test/CodeGen/X86/bigstructret.ll b/final/test/CodeGen/X86/bigstructret.ll
new file mode 100644
index 00000000000..633995d5d78
--- /dev/null
+++ b/final/test/CodeGen/X86/bigstructret.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -o %t
+; RUN: grep "movl	.24601, 12(%ecx)" %t
+; RUN: grep "movl	.48, 8(%ecx)" %t
+; RUN: grep "movl	.24, 4(%ecx)" %t
+; RUN: grep "movl	.12, (%ecx)" %t
+
+%0 = type { i32, i32, i32, i32 }
+
+define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
+entry:
+  %0 = insertvalue %0 zeroinitializer, i32 12, 0
+  %1 = insertvalue %0 %0, i32 24, 1
+  %2 = insertvalue %0 %1, i32 48, 2
+  %3 = insertvalue %0 %2, i32 24601, 3
+  ret %0 %3
+}
+
diff --git a/final/test/CodeGen/X86/bigstructret2.ll b/final/test/CodeGen/X86/bigstructret2.ll
new file mode 100644
index 00000000000..46e0fd21764
--- /dev/null
+++ b/final/test/CodeGen/X86/bigstructret2.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -o %t
+
+%0 = type { i64, i64 }
+
+declare fastcc %0 @ReturnBigStruct() nounwind readnone
+
+define void @test(%0* %p) {
+  %1 = call fastcc %0 @ReturnBigStruct()
+  store %0 %1, %0* %p
+  ret void
+}
+
diff --git a/final/test/CodeGen/X86/bit-test-shift.ll b/final/test/CodeGen/X86/bit-test-shift.ll
new file mode 100644
index 00000000000..7497613f256
--- /dev/null
+++ b/final/test/CodeGen/X86/bit-test-shift.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; <rdar://problem/8285015>
+
+define i32 @x(i32 %t) nounwind readnone ssp {
+entry:
+; CHECK: shll	$23, %eax
+; CHECK: sarl	$31, %eax
+; CHECK: andl	$-26, %eax
+  %and = and i32 %t, 256
+  %tobool = icmp eq i32 %and, 0
+  %retval.0 = select i1 %tobool, i32 0, i32 -26
+  ret i32 %retval.0
+}
diff --git a/final/test/CodeGen/X86/bitcast-int-to-vector.ll b/final/test/CodeGen/X86/bitcast-int-to-vector.ll
new file mode 100644
index 00000000000..4c25979dcd5
--- /dev/null
+++ b/final/test/CodeGen/X86/bitcast-int-to-vector.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86
+
+define i1 @foo(i64 %a)
+{
+  %t = bitcast i64 %a to <2 x float>
+  %r = extractelement <2 x float> %t, i32 0
+  %s = extractelement <2 x float> %t, i32 1
+  %b = fcmp uno float %r, %s
+  ret i1 %b
+}
diff --git a/final/test/CodeGen/X86/bitcast.ll b/final/test/CodeGen/X86/bitcast.ll
new file mode 100644
index 00000000000..c34c6753bfe
--- /dev/null
+++ b/final/test/CodeGen/X86/bitcast.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+; PR1033
+
+define i64 @test1(double %t) {
+        %u = bitcast double %t to i64           ; <i64> [#uses=1]
+        ret i64 %u
+}
+
+define double @test2(i64 %t) {
+        %u = bitcast i64 %t to double           ; <double> [#uses=1]
+        ret double %u
+}
+
+define i32 @test3(float %t) {
+        %u = bitcast float %t to i32            ; <i32> [#uses=1]
+        ret i32 %u
+}
+
+define float @test4(i32 %t) {
+        %u = bitcast i32 %t to float            ; <float> [#uses=1]
+        ret float %u
+}
+
diff --git a/final/test/CodeGen/X86/bitcast2.ll b/final/test/CodeGen/X86/bitcast2.ll
new file mode 100644
index 00000000000..48922b5f5a1
--- /dev/null
+++ b/final/test/CodeGen/X86/bitcast2.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 | grep movd | count 2
+; RUN: llc < %s -march=x86-64 | not grep rsp
+
+define i64 @test1(double %A) {
+   %B = bitcast double %A to i64
+   ret i64 %B
+}
+
+define double @test2(i64 %A) {
+   %B = bitcast i64 %A to double
+   ret double %B
+}
+
diff --git a/final/test/CodeGen/X86/br-fold.ll b/final/test/CodeGen/X86/br-fold.ll
new file mode 100644
index 00000000000..8af3bd1bc22
--- /dev/null
+++ b/final/test/CodeGen/X86/br-fold.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; CHECK: orq
+; CHECK-NEXT: jne
+
+@_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE = external constant [33 x i16], align 32 ; <[33 x i16]*> [#uses=1]
+@_ZN11xercesc_2_56XMLUni16fgNotationStringE = external constant [9 x i16], align 16 ; <[9 x i16]*> [#uses=1]
+
+define fastcc void @foo() {
+entry:
+  br i1 icmp eq (i64 or (i64 ptrtoint ([33 x i16]* @_ZN11xercesc_2_513SchemaSymbols21fgURI_SCHEMAFORSCHEMAE to i64),
+                         i64 ptrtoint ([9 x i16]* @_ZN11xercesc_2_56XMLUni16fgNotationStringE to i64)), i64 0),
+     label %bb8.i329, label %bb4.i.i318.preheader
+
+bb4.i.i318.preheader:                             ; preds = %bb6
+  unreachable
+
+bb8.i329:                                         ; preds = %bb6
+  unreachable
+}
diff --git a/final/test/CodeGen/X86/brcond.ll b/final/test/CodeGen/X86/brcond.ll
new file mode 100644
index 00000000000..5cdc1000f3c
--- /dev/null
+++ b/final/test/CodeGen/X86/brcond.ll
@@ -0,0 +1,108 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=core2 | FileCheck %s
+; rdar://7475489
+
+define i32 @test1(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK: test1:
+; CHECK: xorb
+; CHECK-NOT: andb
+; CHECK-NOT: shrb
+; CHECK: testb $64
+  %0 = and i32 %a, 16384
+  %1 = icmp ne i32 %0, 0
+  %2 = and i32 %b, 16384
+  %3 = icmp ne i32 %2, 0
+  %4 = xor i1 %1, %3
+  br i1 %4, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+  %5 = tail call i32 (...)* @foo() nounwind       ; <i32> [#uses=1]
+  ret i32 %5
+
+bb1:                                              ; preds = %entry
+  %6 = tail call i32 (...)* @bar() nounwind       ; <i32> [#uses=1]
+  ret i32 %6
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(...)
+
+
+
+; PR3351 - (P == 0) & (Q == 0) -> (P|Q) == 0
+define i32 @test2(i32* %P, i32* %Q) nounwind ssp {
+entry:
+  %a = icmp eq i32* %P, null                    ; <i1> [#uses=1]
+  %b = icmp eq i32* %Q, null                    ; <i1> [#uses=1]
+  %c = and i1 %a, %b
+  br i1 %c, label %bb1, label %return
+
+bb1:                                              ; preds = %entry
+  ret i32 4
+
+return:                                           ; preds = %entry
+  ret i32 192
+; CHECK: test2:
+; CHECK:	movl	4(%esp), %eax
+; CHECK-NEXT:	orl	8(%esp), %eax
+; CHECK-NEXT:	jne	LBB1_2
+}
+
+; PR3351 - (P != 0) | (Q != 0) -> (P|Q) != 0
+define i32 @test3(i32* %P, i32* %Q) nounwind ssp {
+entry:
+  %a = icmp ne i32* %P, null                    ; <i1> [#uses=1]
+  %b = icmp ne i32* %Q, null                    ; <i1> [#uses=1]
+  %c = or i1 %a, %b
+  br i1 %c, label %bb1, label %return
+
+bb1:                                              ; preds = %entry
+  ret i32 4
+
+return:                                           ; preds = %entry
+  ret i32 192
+; CHECK: test3:
+; CHECK:	movl	4(%esp), %eax
+; CHECK-NEXT:	orl	8(%esp), %eax
+; CHECK-NEXT:	je	LBB2_2
+}
+
+; <rdar://problem/7598384>:
+;
+;    jCC  L1
+;    jmp  L2
+; L1:
+;   ...
+; L2:
+;   ...
+;
+; to:
+; 
+;    jnCC L2
+; L1:
+;   ...
+; L2:
+;   ...
+define float @test4(float %x, float %y) nounwind readnone optsize ssp {
+entry:
+  %0 = fpext float %x to double                   ; <double> [#uses=1]
+  %1 = fpext float %y to double                   ; <double> [#uses=1]
+  %2 = fmul double %0, %1                         ; <double> [#uses=3]
+  %3 = fcmp oeq double %2, 0.000000e+00           ; <i1> [#uses=1]
+  br i1 %3, label %bb2, label %bb1
+
+; CHECK:      jne
+; CHECK-NEXT: jnp
+; CHECK-NOT:  jmp
+; CHECK:      LBB
+
+bb1:                                              ; preds = %entry
+  %4 = fadd double %2, -1.000000e+00              ; <double> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %entry, %bb1
+  %.0.in = phi double [ %4, %bb1 ], [ %2, %entry ] ; <double> [#uses=1]
+  %.0 = fptrunc double %.0.in to float            ; <float> [#uses=1]
+  ret float %.0
+}
diff --git a/final/test/CodeGen/X86/break-anti-dependencies.ll b/final/test/CodeGen/X86/break-anti-dependencies.ll
new file mode 100644
index 00000000000..93b20437e1e
--- /dev/null
+++ b/final/test/CodeGen/X86/break-anti-dependencies.ll
@@ -0,0 +1,34 @@
+; Without list-burr scheduling we may not see the difference in codegen here.
+; RUN: llc < %s -march=x86-64 -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
+; RUN:   grep {%xmm0} %t | count 14
+; RUN:   not grep {%xmm1} %t
+; RUN: llc < %s -march=x86-64 -post-RA-scheduler -break-anti-dependencies=critical > %t
+; RUN:   grep {%xmm0} %t | count 7
+; RUN:   grep {%xmm1} %t | count 7
+
+define void @goo(double* %r, double* %p, double* %q) nounwind {
+entry:
+	%0 = load double* %p, align 8
+	%1 = fadd double %0, 1.100000e+00
+	%2 = fmul double %1, 1.200000e+00
+	%3 = fadd double %2, 1.300000e+00
+	%4 = fmul double %3, 1.400000e+00
+	%5 = fadd double %4, 1.500000e+00
+	%6 = fptosi double %5 to i32
+	%7 = load double* %r, align 8
+	%8 = fadd double %7, 7.100000e+00
+	%9 = fmul double %8, 7.200000e+00
+	%10 = fadd double %9, 7.300000e+00
+	%11 = fmul double %10, 7.400000e+00
+	%12 = fadd double %11, 7.500000e+00
+	%13 = fptosi double %12 to i32
+	%14 = icmp slt i32 %6, %13
+	br i1 %14, label %bb, label %return
+
+bb:
+	store double 9.300000e+00, double* %q, align 8
+	ret void
+
+return:
+	ret void
+}
diff --git a/final/test/CodeGen/X86/break-sse-dep.ll b/final/test/CodeGen/X86/break-sse-dep.ll
new file mode 100644
index 00000000000..2dee5754256
--- /dev/null
+++ b/final/test/CodeGen/X86/break-sse-dep.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2 | FileCheck %s
+
+define double @t1(float* nocapture %x) nounwind readonly ssp {
+entry:
+; CHECK: t1:
+; CHECK: movss ([[A0:%rdi|%rcx]]), %xmm0
+; CHECK: cvtss2sd %xmm0, %xmm0
+
+  %0 = load float* %x, align 4
+  %1 = fpext float %0 to double
+  ret double %1
+}
+
+define float @t2(double* nocapture %x) nounwind readonly ssp optsize {
+entry:
+; CHECK: t2:
+; CHECK: cvtsd2ss ([[A0]]), %xmm0
+  %0 = load double* %x, align 8
+  %1 = fptrunc double %0 to float
+  ret float %1
+}
+
+define float @squirtf(float* %x) nounwind {
+entry:
+; CHECK: squirtf:
+; CHECK: movss ([[A0]]), %xmm0
+; CHECK: sqrtss %xmm0, %xmm0
+  %z = load float* %x
+  %t = call float @llvm.sqrt.f32(float %z)
+  ret float %t
+}
+
+define double @squirt(double* %x) nounwind {
+entry:
+; CHECK: squirt:
+; CHECK: movsd ([[A0]]), %xmm0
+; CHECK: sqrtsd %xmm0, %xmm0
+  %z = load double* %x
+  %t = call double @llvm.sqrt.f64(double %z)
+  ret double %t
+}
+
+define float @squirtf_size(float* %x) nounwind optsize {
+entry:
+; CHECK: squirtf_size:
+; CHECK: sqrtss ([[A0]]), %xmm0
+  %z = load float* %x
+  %t = call float @llvm.sqrt.f32(float %z)
+  ret float %t
+}
+
+define double @squirt_size(double* %x) nounwind optsize {
+entry:
+; CHECK: squirt_size:
+; CHECK: sqrtsd ([[A0]]), %xmm0
+  %z = load double* %x
+  %t = call double @llvm.sqrt.f64(double %z)
+  ret double %t
+}
+
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)
diff --git a/final/test/CodeGen/X86/bss_pagealigned.ll b/final/test/CodeGen/X86/bss_pagealigned.ll
new file mode 100644
index 00000000000..da95aca110d
--- /dev/null
+++ b/final/test/CodeGen/X86/bss_pagealigned.ll
@@ -0,0 +1,21 @@
+; RUN: llc --code-model=kernel -march=x86-64 <%s -asm-verbose=0 | FileCheck %s
+; PR4933
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+%struct.kmem_cache_order_objects = type { i64 }
+declare i8* @memset(i8*, i32, i64)
+define void @unxlate_dev_mem_ptr(i64 %phis, i8* %addr) nounwind {
+  %pte.addr.i = alloca %struct.kmem_cache_order_objects*
+  %call8 = call i8* @memset(i8* bitcast ([512 x %struct.kmem_cache_order_objects]* @bm_pte to i8*), i32 0, i64 4096)
+; CHECK: movq    $bm_pte, %rdi
+; CHECK-NEXT: xorl    %esi, %esi
+; CHECK-NEXT: movl    $4096, %edx
+; CHECK-NEXT: callq   memset
+  ret void
+}
+@bm_pte = internal global [512 x %struct.kmem_cache_order_objects] zeroinitializer, section ".bss.page_aligned", align 4096
+; CHECK: .section        .bss.page_aligned,"aw",@nobits
+; CHECK-NEXT: .align  4096
+; CHECK-NEXT: bm_pte:
+; CHECK-NEXT: .zero   4096
+; CHECK-NEXT: .size   bm_pte, 4096
diff --git a/final/test/CodeGen/X86/bswap-inline-asm.ll b/final/test/CodeGen/X86/bswap-inline-asm.ll
new file mode 100644
index 00000000000..3bb9124633d
--- /dev/null
+++ b/final/test/CodeGen/X86/bswap-inline-asm.ll
@@ -0,0 +1,87 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin > %t
+; RUN: not grep InlineAsm %t
+; RUN: FileCheck %s < %t
+
+; CHECK: foo:
+; CHECK: bswapq
+define i64 @foo(i64 %x) nounwind {
+	%asmtmp = tail call i64 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
+	ret i64 %asmtmp
+}
+
+; CHECK: bar:
+; CHECK: bswapq
+define i64 @bar(i64 %x) nounwind {
+	%asmtmp = tail call i64 asm "bswapq ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
+	ret i64 %asmtmp
+}
+
+; CHECK: pen:
+; CHECK: bswapl
+define i32 @pen(i32 %x) nounwind {
+	%asmtmp = tail call i32 asm "bswapl ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
+	ret i32 %asmtmp
+}
+
+; CHECK: s16:
+; CHECK: rolw    $8,
+define zeroext i16 @s16(i16 zeroext %x) nounwind {
+  %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind
+  ret i16 %asmtmp
+}
+
+; CHECK: t16:
+; CHECK: rolw    $8,
+define zeroext i16 @t16(i16 zeroext %x) nounwind {
+  %asmtmp = tail call i16 asm "rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind
+  ret i16 %asmtmp
+}
+
+; CHECK: u16:
+; CHECK: rolw    $8,
+define zeroext i16 @u16(i16 zeroext %x) nounwind {
+  %asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i16 %x) nounwind
+  ret i16 %asmtmp
+}
+
+; CHECK: v16:
+; CHECK: rolw    $8,
+define zeroext i16 @v16(i16 zeroext %x) nounwind {
+  %asmtmp = tail call i16 asm "rolw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{fpsr},~{flags}"(i16 %x) nounwind
+  ret i16 %asmtmp
+}
+
+; CHECK: s32:
+; CHECK: bswapl
+define i32 @s32(i32 %x) nounwind {
+  %asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{fpsr},~{flags}"(i32 %x) nounwind
+  ret i32 %asmtmp
+}
+
+; CHECK: t32:
+; CHECK: bswapl
+define i32 @t32(i32 %x) nounwind {
+  %asmtmp = tail call i32 asm "bswap $0", "=r,0,~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind
+  ret i32 %asmtmp
+}
+
+; CHECK: u32:
+; CHECK: bswapl
+define i32 @u32(i32 %x) nounwind {
+  %asmtmp = tail call i32 asm "rorw $$8, ${0:w};rorl $$16, $0;rorw $$8, ${0:w}", "=r,0,~{cc},~{dirflag},~{flags},~{fpsr}"(i32 %x) nounwind
+  ret i32 %asmtmp
+}
+
+; CHECK: s64:
+; CHECK: bswapq
+define i64 @s64(i64 %x) nounwind {
+  %asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{dirflag},~{fpsr},~{flags}"(i64 %x) nounwind
+  ret i64 %asmtmp
+}
+
+; CHECK: t64:
+; CHECK: bswapq
+define i64 @t64(i64 %x) nounwind {
+  %asmtmp = tail call i64 asm "bswap ${0:q}", "=r,0,~{fpsr},~{dirflag},~{flags}"(i64 %x) nounwind
+  ret i64 %asmtmp
+}
diff --git a/final/test/CodeGen/X86/bswap.ll b/final/test/CodeGen/X86/bswap.ll
new file mode 100644
index 00000000000..0a72c1c4784
--- /dev/null
+++ b/final/test/CodeGen/X86/bswap.ll
@@ -0,0 +1,27 @@
+; bswap should be constant folded when it is passed a constant argument
+
+; RUN: llc < %s -march=x86 | \
+; RUN:   grep bswapl | count 3
+; RUN: llc < %s -march=x86 | grep rolw | count 1
+
+declare i16 @llvm.bswap.i16(i16)
+
+declare i32 @llvm.bswap.i32(i32)
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i16 @W(i16 %A) {
+        %Z = call i16 @llvm.bswap.i16( i16 %A )         ; <i16> [#uses=1]
+        ret i16 %Z
+}
+
+define i32 @X(i32 %A) {
+        %Z = call i32 @llvm.bswap.i32( i32 %A )         ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+define i64 @Y(i64 %A) {
+        %Z = call i64 @llvm.bswap.i64( i64 %A )         ; <i64> [#uses=1]
+        ret i64 %Z
+}
+
diff --git a/final/test/CodeGen/X86/bt.ll b/final/test/CodeGen/X86/bt.ll
new file mode 100644
index 00000000000..ec447e5e9c8
--- /dev/null
+++ b/final/test/CodeGen/X86/bt.ll
@@ -0,0 +1,442 @@
+; RUN: llc < %s -march=x86 | grep btl | count 28
+; RUN: llc < %s -march=x86 -mcpu=pentium4 | grep btl | not grep esp
+; RUN: llc < %s -march=x86 -mcpu=penryn   | grep btl | not grep esp
+; PR3253
+
+; The register+memory form of the BT instruction should be usable on
+; pentium4, however it is currently disabled due to the register+memory
+; form having different semantics than the register+register form.
+
+; Test these patterns:
+;    (X & (1 << N))  != 0  -->  BT(X, N).
+;    ((X >>u N) & 1) != 0  -->  BT(X, N).
+; as well as several variations:
+;    - The second form can use an arithmetic shift.
+;    - Either form can use == instead of !=.
+;    - Either form can compare with an operand of the &
+;      instead of with 0.
+;    - The comparison can be commuted (only cases where neither
+;      operand is constant are included).
+;    - The and can be commuted.
+
+define void @test2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @atest2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @atest2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test3(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test3b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @testne2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @testne2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @atestne2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @atestne2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @testne3(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @testne3b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @aquery2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @aquery2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query3(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, %tmp29		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query3b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp eq i32 %tmp3, %tmp29		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query3x(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp29, %tmp3		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @query3bx(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp eq i32 %tmp29, %tmp3		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @aqueryne2(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @aqueryne2b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = ashr i32 %x, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 1, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, 1		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne3(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp3, %tmp29		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne3b(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp ne i32 %tmp3, %tmp29		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne3x(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp29, %x		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp29, %tmp3		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @queryne3bx(i32 %x, i32 %n) nounwind {
+entry:
+	%tmp29 = shl i32 1, %n		; <i32> [#uses=1]
+	%tmp3 = and i32 %x, %tmp29
+	%tmp4 = icmp ne i32 %tmp29, %tmp3		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+declare void @foo()
diff --git a/final/test/CodeGen/X86/byval.ll b/final/test/CodeGen/X86/byval.ll
new file mode 100644
index 00000000000..185eda1566d
--- /dev/null
+++ b/final/test/CodeGen/X86/byval.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck -check-prefix=X86-64 %s
+; Win64 has not supported byval yet.
+; RUN: llc < %s -march=x86 | FileCheck -check-prefix=X86 %s
+
+; X86: movl	4(%esp), %eax
+; X86: movl	8(%esp), %edx
+
+; X86-64: movq	8(%rsp), %rax
+
+%struct.s = type { i64, i64, i64 }
+
+define i64 @f(%struct.s* byval %a) {
+entry:
+	%tmp2 = getelementptr %struct.s* %a, i32 0, i32 0
+	%tmp3 = load i64* %tmp2, align 8
+	ret i64 %tmp3
+}
diff --git a/final/test/CodeGen/X86/byval2.ll b/final/test/CodeGen/X86/byval2.ll
new file mode 100644
index 00000000000..03a9f0fb742
--- /dev/null
+++ b/final/test/CodeGen/X86/byval2.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+
+; Win64 has not supported byval yet.
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
+
+%struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
+                   i64, i64, i64, i64, i64, i64, i64, i64,
+                   i64 }
+
+define void @g(i64 %a, i64 %b, i64 %c) {
+entry:
+	%d = alloca %struct.s, align 16
+	%tmp = getelementptr %struct.s* %d, i32 0, i32 0
+	store i64 %a, i64* %tmp, align 16
+	%tmp2 = getelementptr %struct.s* %d, i32 0, i32 1
+	store i64 %b, i64* %tmp2, align 16
+	%tmp4 = getelementptr %struct.s* %d, i32 0, i32 2
+	store i64 %c, i64* %tmp4, align 16
+	call void @f( %struct.s* %d byval)
+	call void @f( %struct.s* %d byval)
+	ret void
+}
+
+declare void @f(%struct.s* byval)
diff --git a/final/test/CodeGen/X86/byval3.ll b/final/test/CodeGen/X86/byval3.ll
new file mode 100644
index 00000000000..8d5bb6d972d
--- /dev/null
+++ b/final/test/CodeGen/X86/byval3.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+
+; Win64 has not supported byval yet.
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
+
+%struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
+                   i32, i32, i32, i32, i32, i32, i32, i32,
+                   i32, i32, i32, i32, i32, i32, i32, i32,
+                   i32, i32, i32, i32, i32, i32, i32, i32,
+                   i32 }
+
+define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) nounwind {
+entry:
+        %d = alloca %struct.s, align 16
+        %tmp = getelementptr %struct.s* %d, i32 0, i32 0
+        store i32 %a1, i32* %tmp, align 16
+        %tmp2 = getelementptr %struct.s* %d, i32 0, i32 1
+        store i32 %a2, i32* %tmp2, align 16
+        %tmp4 = getelementptr %struct.s* %d, i32 0, i32 2
+        store i32 %a3, i32* %tmp4, align 16
+        %tmp6 = getelementptr %struct.s* %d, i32 0, i32 3
+        store i32 %a4, i32* %tmp6, align 16
+        %tmp8 = getelementptr %struct.s* %d, i32 0, i32 4
+        store i32 %a5, i32* %tmp8, align 16
+        %tmp10 = getelementptr %struct.s* %d, i32 0, i32 5
+        store i32 %a6, i32* %tmp10, align 16
+        call void @f( %struct.s* %d byval)
+        call void @f( %struct.s* %d byval)
+        ret void
+}
+
+declare void @f(%struct.s* byval)
diff --git a/final/test/CodeGen/X86/byval4.ll b/final/test/CodeGen/X86/byval4.ll
new file mode 100644
index 00000000000..ae1a79a1e10
--- /dev/null
+++ b/final/test/CodeGen/X86/byval4.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+
+; Win64 has not supported byval yet.
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
+
+%struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16, i16, i16, i16, i16, i16, i16, i16,
+                   i16 }
+
+
+define void @g(i16 signext  %a1, i16 signext  %a2, i16 signext  %a3,
+	 i16 signext  %a4, i16 signext  %a5, i16 signext  %a6) nounwind {
+entry:
+        %a = alloca %struct.s, align 16
+        %tmp = getelementptr %struct.s* %a, i32 0, i32 0
+        store i16 %a1, i16* %tmp, align 16
+        %tmp2 = getelementptr %struct.s* %a, i32 0, i32 1
+        store i16 %a2, i16* %tmp2, align 16
+        %tmp4 = getelementptr %struct.s* %a, i32 0, i32 2
+        store i16 %a3, i16* %tmp4, align 16
+        %tmp6 = getelementptr %struct.s* %a, i32 0, i32 3
+        store i16 %a4, i16* %tmp6, align 16
+        %tmp8 = getelementptr %struct.s* %a, i32 0, i32 4
+        store i16 %a5, i16* %tmp8, align 16
+        %tmp10 = getelementptr %struct.s* %a, i32 0, i32 5
+        store i16 %a6, i16* %tmp10, align 16
+        call void @f( %struct.s* %a byval )
+        call void @f( %struct.s* %a byval )
+        ret void
+}
+
+declare void @f(%struct.s* byval)
diff --git a/final/test/CodeGen/X86/byval5.ll b/final/test/CodeGen/X86/byval5.ll
new file mode 100644
index 00000000000..a376709d734
--- /dev/null
+++ b/final/test/CodeGen/X86/byval5.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     movsq
+; X64:     rep
+; X64-NOT:     rep
+; X64:     movsq
+; X64-NOT:     rep
+; X64-NOT:     movsq
+
+; Win64 has not supported byval yet.
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     movsl
+; X32:     rep
+; X32-NOT:     rep
+; X32:     movsl
+; X32-NOT:     rep
+; X32-NOT:     movsl
+
+%struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8, i8, i8, i8, i8, i8, i8, i8,
+                   i8 }
+
+
+define void @g(i8 signext  %a1, i8 signext  %a2, i8 signext  %a3,
+	 i8 signext  %a4, i8 signext  %a5, i8 signext  %a6) {
+entry:
+        %a = alloca %struct.s
+        %tmp = getelementptr %struct.s* %a, i32 0, i32 0
+        store i8 %a1, i8* %tmp, align 8
+        %tmp2 = getelementptr %struct.s* %a, i32 0, i32 1
+        store i8 %a2, i8* %tmp2, align 8
+        %tmp4 = getelementptr %struct.s* %a, i32 0, i32 2
+        store i8 %a3, i8* %tmp4, align 8
+        %tmp6 = getelementptr %struct.s* %a, i32 0, i32 3
+        store i8 %a4, i8* %tmp6, align 8
+        %tmp8 = getelementptr %struct.s* %a, i32 0, i32 4
+        store i8 %a5, i8* %tmp8, align 8
+        %tmp10 = getelementptr %struct.s* %a, i32 0, i32 5
+        store i8 %a6, i8* %tmp10, align 8
+        call void @f( %struct.s* %a byval )
+        call void @f( %struct.s* %a byval )
+        ret void
+}
+
+declare void @f(%struct.s* byval)
diff --git a/final/test/CodeGen/X86/byval6.ll b/final/test/CodeGen/X86/byval6.ll
new file mode 100644
index 00000000000..b060369a182
--- /dev/null
+++ b/final/test/CodeGen/X86/byval6.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 | grep add | not grep 16
+
+	%struct.W = type { x86_fp80, x86_fp80 }
+@B = global %struct.W { x86_fp80 0xK4001A000000000000000, x86_fp80 0xK4001C000000000000000 }, align 32
+@.cpx = internal constant %struct.W { x86_fp80 0xK4001E000000000000000, x86_fp80 0xK40028000000000000000 }
+
+define i32 @main() nounwind  {
+entry:
+	tail call void (i32, ...)* @bar( i32 3, %struct.W* byval  @.cpx ) nounwind 
+	tail call void (i32, ...)* @baz( i32 3, %struct.W* byval  @B ) nounwind 
+	ret i32 undef
+}
+
+declare void @bar(i32, ...)
+
+declare void @baz(i32, ...)
diff --git a/final/test/CodeGen/X86/byval7.ll b/final/test/CodeGen/X86/byval7.ll
new file mode 100644
index 00000000000..686ed9c74dd
--- /dev/null
+++ b/final/test/CodeGen/X86/byval7.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+
+	%struct.S = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
+                           <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>,
+                           <2 x i64> }
+
+define i32 @main() nounwind  {
+entry:
+; CHECK: main:
+; CHECK: movl $1, (%esp)
+; CHECK: leal 16(%esp), %edi
+; CHECK: movl $36, %ecx
+; CHECK: leal 160(%esp), %esi
+; CHECK: rep;movsl
+	%s = alloca %struct.S		; <%struct.S*> [#uses=2]
+	%tmp15 = getelementptr %struct.S* %s, i32 0, i32 0		; <<2 x i64>*> [#uses=1]
+	store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16
+	call void @t( i32 1, %struct.S* byval  %s ) nounwind 
+	ret i32 0
+}
+
+declare void @t(i32, %struct.S* byval )
diff --git a/final/test/CodeGen/X86/call-imm.ll b/final/test/CodeGen/X86/call-imm.ll
new file mode 100644
index 00000000000..3857fb15790
--- /dev/null
+++ b/final/test/CodeGen/X86/call-imm.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=static | grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | not grep {call.*12345678}
+; RUN: llc < %s -mtriple=i386-pc-linux -relocation-model=dynamic-no-pic | grep {call.*12345678}
+
+; Call to immediate is not safe on x86-64 unless we *know* that the
+; call will be within 32-bits pcrel from the dest immediate.
+
+; RUN: llc < %s -march=x86-64 | grep {call.*\\*%rax}
+
+; PR3666
+; PR3773
+; rdar://6904453
+
+define i32 @main() nounwind {
+entry:
+	%0 = call i32 inttoptr (i32 12345678 to i32 (i32)*)(i32 0) nounwind		; <i32> [#uses=1]
+	ret i32 %0
+}
diff --git a/final/test/CodeGen/X86/call-push.ll b/final/test/CodeGen/X86/call-push.ll
new file mode 100644
index 00000000000..02cbccc1a49
--- /dev/null
+++ b/final/test/CodeGen/X86/call-push.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim | FileCheck %s
+
+        %struct.decode_t = type { i8, i8, i8, i8, i16, i8, i8, %struct.range_t** }
+        %struct.range_t = type { float, float, i32, i32, i32, [0 x i8] }
+
+define i32 @decode_byte(%struct.decode_t* %decode) nounwind {
+; CHECK: decode_byte:
+; CHECK: pushl
+; CHECK: popl
+; CHECK: popl
+; CHECK: jmp
+entry:
+        %tmp2 = getelementptr %struct.decode_t* %decode, i32 0, i32 4           ; <i16*> [#uses=1]
+        %tmp23 = bitcast i16* %tmp2 to i32*             ; <i32*> [#uses=1]
+        %tmp4 = load i32* %tmp23                ; <i32> [#uses=1]
+        %tmp514 = lshr i32 %tmp4, 24            ; <i32> [#uses=1]
+        %tmp56 = trunc i32 %tmp514 to i8                ; <i8> [#uses=1]
+        %tmp7 = icmp eq i8 %tmp56, 0            ; <i1> [#uses=1]
+        br i1 %tmp7, label %UnifiedReturnBlock, label %cond_true
+
+cond_true:              ; preds = %entry
+        %tmp10 = tail call i32 @f( %struct.decode_t* %decode )          ; <i32> [#uses=1]
+        ret i32 %tmp10
+
+UnifiedReturnBlock:             ; preds = %entry
+        ret i32 0
+}
+
+declare i32 @f(%struct.decode_t*)
diff --git a/final/test/CodeGen/X86/change-compare-stride-0.ll b/final/test/CodeGen/X86/change-compare-stride-0.ll
new file mode 100644
index 00000000000..d520a6ff13b
--- /dev/null
+++ b/final/test/CodeGen/X86/change-compare-stride-0.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep {cmpl	\$-478,} %t
+; RUN: not grep inc %t
+; RUN: not grep {leal	1(} %t
+; RUN: not grep {leal	-1(} %t
+; RUN: grep dec %t | count 1
+
+define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
+bb4.thread:
+	br label %bb2.outer
+
+bb2.outer:		; preds = %bb4, %bb4.thread
+	%indvar18 = phi i32 [ 0, %bb4.thread ], [ %indvar.next28, %bb4 ]		; <i32> [#uses=3]
+	%tmp34 = mul i32 %indvar18, 65535		; <i32> [#uses=1]
+	%i.0.reg2mem.0.ph = add i32 %tmp34, 639		; <i32> [#uses=1]
+	%0 = and i32 %i.0.reg2mem.0.ph, 65535		; <i32> [#uses=1]
+	%1 = mul i32 %0, 480		; <i32> [#uses=1]
+	%tmp20 = mul i32 %indvar18, -478		; <i32> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb2, %bb2.outer
+	%indvar = phi i32 [ 0, %bb2.outer ], [ %indvar.next, %bb2 ]		; <i32> [#uses=3]
+	%ctg2 = getelementptr i8* %out, i32 %tmp20		; <i8*> [#uses=1]
+	%tmp21 = ptrtoint i8* %ctg2 to i32		; <i32> [#uses=1]
+	%tmp23 = sub i32 %tmp21, %indvar		; <i32> [#uses=1]
+	%out_addr.0.reg2mem.0 = inttoptr i32 %tmp23 to i8*		; <i8*> [#uses=1]
+	%tmp25 = mul i32 %indvar, 65535		; <i32> [#uses=1]
+	%j.0.reg2mem.0 = add i32 %tmp25, 479		; <i32> [#uses=1]
+	%2 = and i32 %j.0.reg2mem.0, 65535		; <i32> [#uses=1]
+	%3 = add i32 %1, %2		; <i32> [#uses=9]
+	%4 = add i32 %3, -481		; <i32> [#uses=1]
+	%5 = getelementptr i8* %in, i32 %4		; <i8*> [#uses=1]
+	%6 = load i8* %5, align 1		; <i8> [#uses=1]
+	%7 = add i32 %3, -480		; <i32> [#uses=1]
+	%8 = getelementptr i8* %in, i32 %7		; <i8*> [#uses=1]
+	%9 = load i8* %8, align 1		; <i8> [#uses=1]
+	%10 = add i32 %3, -479		; <i32> [#uses=1]
+	%11 = getelementptr i8* %in, i32 %10		; <i8*> [#uses=1]
+	%12 = load i8* %11, align 1		; <i8> [#uses=1]
+	%13 = add i32 %3, -1		; <i32> [#uses=1]
+	%14 = getelementptr i8* %in, i32 %13		; <i8*> [#uses=1]
+	%15 = load i8* %14, align 1		; <i8> [#uses=1]
+	%16 = getelementptr i8* %in, i32 %3		; <i8*> [#uses=1]
+	%17 = load i8* %16, align 1		; <i8> [#uses=1]
+	%18 = add i32 %3, 1		; <i32> [#uses=1]
+	%19 = getelementptr i8* %in, i32 %18		; <i8*> [#uses=1]
+	%20 = load i8* %19, align 1		; <i8> [#uses=1]
+	%21 = add i32 %3, 481		; <i32> [#uses=1]
+	%22 = getelementptr i8* %in, i32 %21		; <i8*> [#uses=1]
+	%23 = load i8* %22, align 1		; <i8> [#uses=1]
+	%24 = add i32 %3, 480		; <i32> [#uses=1]
+	%25 = getelementptr i8* %in, i32 %24		; <i8*> [#uses=1]
+	%26 = load i8* %25, align 1		; <i8> [#uses=1]
+	%27 = add i32 %3, 479		; <i32> [#uses=1]
+	%28 = getelementptr i8* %in, i32 %27		; <i8*> [#uses=1]
+	%29 = load i8* %28, align 1		; <i8> [#uses=1]
+	%30 = add i8 %9, %6		; <i8> [#uses=1]
+	%31 = add i8 %30, %12		; <i8> [#uses=1]
+	%32 = add i8 %31, %15		; <i8> [#uses=1]
+	%33 = add i8 %32, %17		; <i8> [#uses=1]
+	%34 = add i8 %33, %20		; <i8> [#uses=1]
+	%35 = add i8 %34, %23		; <i8> [#uses=1]
+	%36 = add i8 %35, %26		; <i8> [#uses=1]
+	%37 = add i8 %36, %29		; <i8> [#uses=1]
+	store i8 %37, i8* %out_addr.0.reg2mem.0, align 1
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 478		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb4, label %bb2
+
+bb4:		; preds = %bb2
+	%indvar.next28 = add i32 %indvar18, 1		; <i32> [#uses=2]
+	%exitcond29 = icmp eq i32 %indvar.next28, 638		; <i1> [#uses=1]
+	br i1 %exitcond29, label %return, label %bb2.outer
+
+return:		; preds = %bb4
+	ret void
+}
diff --git a/final/test/CodeGen/X86/change-compare-stride-1.ll b/final/test/CodeGen/X86/change-compare-stride-1.ll
new file mode 100644
index 00000000000..a9ddbdb7f74
--- /dev/null
+++ b/final/test/CodeGen/X86/change-compare-stride-1.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep {cmpq	\$-478,} %t
+; RUN: not grep inc %t
+; RUN: not grep {leal	1(} %t
+; RUN: not grep {leal	-1(} %t
+; RUN: grep dec %t | count 1
+
+define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
+bb4.thread:
+	br label %bb2.outer
+
+bb2.outer:		; preds = %bb4, %bb4.thread
+	%indvar19 = phi i64 [ 0, %bb4.thread ], [ %indvar.next29, %bb4 ]		; <i64> [#uses=3]
+	%indvar31 = trunc i64 %indvar19 to i16		; <i16> [#uses=1]
+	%i.0.reg2mem.0.ph = sub i16 639, %indvar31		; <i16> [#uses=1]
+	%0 = zext i16 %i.0.reg2mem.0.ph to i32		; <i32> [#uses=1]
+	%1 = mul i32 %0, 480		; <i32> [#uses=1]
+	%tmp21 = mul i64 %indvar19, -478		; <i64> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb2, %bb2.outer
+	%indvar = phi i64 [ 0, %bb2.outer ], [ %indvar.next, %bb2 ]		; <i64> [#uses=3]
+	%indvar16 = trunc i64 %indvar to i16		; <i16> [#uses=1]
+	%ctg2 = getelementptr i8* %out, i64 %tmp21		; <i8*> [#uses=1]
+	%tmp22 = ptrtoint i8* %ctg2 to i64		; <i64> [#uses=1]
+	%tmp24 = sub i64 %tmp22, %indvar		; <i64> [#uses=1]
+	%out_addr.0.reg2mem.0 = inttoptr i64 %tmp24 to i8*		; <i8*> [#uses=1]
+	%j.0.reg2mem.0 = sub i16 479, %indvar16		; <i16> [#uses=1]
+	%2 = zext i16 %j.0.reg2mem.0 to i32		; <i32> [#uses=1]
+	%3 = add i32 %1, %2		; <i32> [#uses=9]
+	%4 = add i32 %3, -481		; <i32> [#uses=1]
+	%5 = zext i32 %4 to i64		; <i64> [#uses=1]
+	%6 = getelementptr i8* %in, i64 %5		; <i8*> [#uses=1]
+	%7 = load i8* %6, align 1		; <i8> [#uses=1]
+	%8 = add i32 %3, -480		; <i32> [#uses=1]
+	%9 = zext i32 %8 to i64		; <i64> [#uses=1]
+	%10 = getelementptr i8* %in, i64 %9		; <i8*> [#uses=1]
+	%11 = load i8* %10, align 1		; <i8> [#uses=1]
+	%12 = add i32 %3, -479		; <i32> [#uses=1]
+	%13 = zext i32 %12 to i64		; <i64> [#uses=1]
+	%14 = getelementptr i8* %in, i64 %13		; <i8*> [#uses=1]
+	%15 = load i8* %14, align 1		; <i8> [#uses=1]
+	%16 = add i32 %3, -1		; <i32> [#uses=1]
+	%17 = zext i32 %16 to i64		; <i64> [#uses=1]
+	%18 = getelementptr i8* %in, i64 %17		; <i8*> [#uses=1]
+	%19 = load i8* %18, align 1		; <i8> [#uses=1]
+	%20 = zext i32 %3 to i64		; <i64> [#uses=1]
+	%21 = getelementptr i8* %in, i64 %20		; <i8*> [#uses=1]
+	%22 = load i8* %21, align 1		; <i8> [#uses=1]
+	%23 = add i32 %3, 1		; <i32> [#uses=1]
+	%24 = zext i32 %23 to i64		; <i64> [#uses=1]
+	%25 = getelementptr i8* %in, i64 %24		; <i8*> [#uses=1]
+	%26 = load i8* %25, align 1		; <i8> [#uses=1]
+	%27 = add i32 %3, 481		; <i32> [#uses=1]
+	%28 = zext i32 %27 to i64		; <i64> [#uses=1]
+	%29 = getelementptr i8* %in, i64 %28		; <i8*> [#uses=1]
+	%30 = load i8* %29, align 1		; <i8> [#uses=1]
+	%31 = add i32 %3, 480		; <i32> [#uses=1]
+	%32 = zext i32 %31 to i64		; <i64> [#uses=1]
+	%33 = getelementptr i8* %in, i64 %32		; <i8*> [#uses=1]
+	%34 = load i8* %33, align 1		; <i8> [#uses=1]
+	%35 = add i32 %3, 479		; <i32> [#uses=1]
+	%36 = zext i32 %35 to i64		; <i64> [#uses=1]
+	%37 = getelementptr i8* %in, i64 %36		; <i8*> [#uses=1]
+	%38 = load i8* %37, align 1		; <i8> [#uses=1]
+	%39 = add i8 %11, %7		; <i8> [#uses=1]
+	%40 = add i8 %39, %15		; <i8> [#uses=1]
+	%41 = add i8 %40, %19		; <i8> [#uses=1]
+	%42 = add i8 %41, %22		; <i8> [#uses=1]
+	%43 = add i8 %42, %26		; <i8> [#uses=1]
+	%44 = add i8 %43, %30		; <i8> [#uses=1]
+	%45 = add i8 %44, %34		; <i8> [#uses=1]
+	%46 = add i8 %45, %38		; <i8> [#uses=1]
+	store i8 %46, i8* %out_addr.0.reg2mem.0, align 1
+	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %indvar.next, 478		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb4, label %bb2
+
+bb4:		; preds = %bb2
+	%indvar.next29 = add i64 %indvar19, 1		; <i64> [#uses=2]
+	%exitcond30 = icmp eq i64 %indvar.next29, 638		; <i1> [#uses=1]
+	br i1 %exitcond30, label %return, label %bb2.outer
+
+return:		; preds = %bb4
+	ret void
+}
diff --git a/final/test/CodeGen/X86/change-compare-stride-trickiness-0.ll b/final/test/CodeGen/X86/change-compare-stride-trickiness-0.ll
new file mode 100644
index 00000000000..1f7f6ecafaf
--- /dev/null
+++ b/final/test/CodeGen/X86/change-compare-stride-trickiness-0.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -o - | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9"
+
+; The comparison happens before the relevant use, but it can still be rewritten
+; to compare with zero.
+
+; CHECK: foo:
+; CHECK: align
+; CHECK: incl  %eax
+; CHECK-NEXT: decl  %ecx
+; CHECK-NEXT: jne
+
+define void @foo() nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i32 [ 0, %entry ], [ %i.2.0.us1534, %loop ]		; <i32> [#uses=1]
+	%i.2.0.us1534 = add i32 %indvar, 1		; <i32> [#uses=3]
+	%tmp611.us1535 = icmp eq i32 %i.2.0.us1534, 4		; <i1> [#uses=2]
+	%tmp623.us1538 = select i1 %tmp611.us1535, i32 6, i32 0		; <i32> [#uses=0]
+	%tmp628.us1540 = shl i32 %i.2.0.us1534, 1		; <i32> [#uses=1]
+	%tmp645646647.us1547 = sext i32 %tmp628.us1540 to i64		; <i64> [#uses=0]
+	br i1 %tmp611.us1535, label %exit, label %loop
+
+exit:
+	ret void
+}
diff --git a/final/test/CodeGen/X86/change-compare-stride-trickiness-1.ll b/final/test/CodeGen/X86/change-compare-stride-trickiness-1.ll
new file mode 100644
index 00000000000..cb638092ea1
--- /dev/null
+++ b/final/test/CodeGen/X86/change-compare-stride-trickiness-1.ll
@@ -0,0 +1,28 @@
+; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmp.	\$10}
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9"
+
+; The comparison happens after the relevant use, so the stride can easily
+; be changed. The comparison can be done in a narrower mode than the
+; induction variable.
+; TODO: By making the first store post-increment as well, the loop setup
+; could be made simpler.
+
+define void @foo() nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i32 [ 0, %entry ], [ %i.2.0.us1534, %loop ]		; <i32> [#uses=1]
+	%i.2.0.us1534 = add i32 %indvar, 1		; <i32> [#uses=3]
+	%tmp628.us1540 = shl i32 %i.2.0.us1534, 1		; <i32> [#uses=1]
+	%tmp645646647.us1547 = sext i32 %tmp628.us1540 to i64		; <i64> [#uses=1]
+	store i64 %tmp645646647.us1547, i64* null
+	%tmp611.us1535 = icmp eq i32 %i.2.0.us1534, 4		; <i1> [#uses=2]
+	%tmp623.us1538 = select i1 %tmp611.us1535, i32 6, i32 0		; <i32> [#uses=1]
+	store i32 %tmp623.us1538, i32* null
+	br i1 %tmp611.us1535, label %exit, label %loop
+
+exit:
+	ret void
+}
diff --git a/final/test/CodeGen/X86/change-compare-stride-trickiness-2.ll b/final/test/CodeGen/X86/change-compare-stride-trickiness-2.ll
new file mode 100644
index 00000000000..ae27383895c
--- /dev/null
+++ b/final/test/CodeGen/X86/change-compare-stride-trickiness-2.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s
+; PR4222
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-pc-linux-gnu"
+module asm ".ident\09\22$FreeBSD: head/sys/amd64/amd64/minidump_machdep.c 184499 2008-10-31 10:11:35Z kib $\22"
+	%struct.dumperinfo = type <{ i32 (i8*, i8*, i64, i64, i64)*, i8*, i32, i32, i64, i64 }>
+
+define void @minidumpsys(%struct.dumperinfo* %di) nounwind {
+entry:
+	br label %if.end
+
+if.end:		; preds = %if.end52, %entry
+	br label %for.cond.i.preheader
+
+for.cond.i.preheader:		; preds = %if.end52, %if.end
+	%indvar688 = phi i64 [ 0, %if.end ], [ %indvar.next689, %if.end52 ]		; <i64> [#uses=3]
+	%tmp690 = shl i64 %indvar688, 12		; <i64> [#uses=1]
+	%pa.0642 = add i64 %tmp690, 0		; <i64> [#uses=1]
+	%indvar688703 = trunc i64 %indvar688 to i32		; <i32> [#uses=1]
+	%tmp692693 = add i32 %indvar688703, 1		; <i32> [#uses=1]
+	%phitmp = sext i32 %tmp692693 to i64		; <i64> [#uses=1]
+	br i1 false, label %if.end52, label %land.lhs.true.i
+
+land.lhs.true.i:		; preds = %for.cond.i.preheader
+	%shr2.i = lshr i64 %pa.0642, 18		; <i64> [#uses=0]
+	unreachable
+
+if.end52:		; preds = %for.cond.i.preheader
+	%phitmp654 = icmp ult i64 %phitmp, 512		; <i1> [#uses=1]
+	%indvar.next689 = add i64 %indvar688, 1		; <i64> [#uses=1]
+	br i1 %phitmp654, label %for.cond.i.preheader, label %if.end
+}
+
+define void @promote(%struct.dumperinfo* %di) nounwind {
+entry:
+	br label %if.end
+
+if.end:		; preds = %if.end52, %entry
+	br label %for.cond.i.preheader
+
+for.cond.i.preheader:		; preds = %if.end52, %if.end
+	%indvar688 = phi i32 [ 0, %if.end ], [ %indvar.next689, %if.end52 ]		; <i64> [#uses=3]
+	%tmp690 = shl i32 %indvar688, 12		; <i64> [#uses=1]
+	%pa.0642 = add i32 %tmp690, 0		; <i64> [#uses=1]
+	%tmp692693 = add i32 %indvar688, 1		; <i32> [#uses=1]
+	%phitmp = sext i32 %tmp692693 to i64		; <i64> [#uses=1]
+	br i1 false, label %if.end52, label %land.lhs.true.i
+
+land.lhs.true.i:		; preds = %for.cond.i.preheader
+	%shr2.i = lshr i32 %pa.0642, 18		; <i64> [#uses=0]
+	unreachable
+
+if.end52:		; preds = %for.cond.i.preheader
+	%phitmp654 = icmp ult i64 %phitmp, 512		; <i1> [#uses=1]
+	%indvar.next689 = add i32 %indvar688, 1		; <i64> [#uses=1]
+	br i1 %phitmp654, label %for.cond.i.preheader, label %if.end
+}
diff --git a/final/test/CodeGen/X86/clz.ll b/final/test/CodeGen/X86/clz.ll
new file mode 100644
index 00000000000..623ac75b529
--- /dev/null
+++ b/final/test/CodeGen/X86/clz.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+
+define i32 @t1(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.ctlz.i32( i32 %x )
+	ret i32 %tmp
+; CHECK: t1:
+; CHECK: bsrl
+; CHECK: cmov
+}
+
+declare i32 @llvm.ctlz.i32(i32) nounwind readnone 
+
+define i32 @t2(i32 %x) nounwind  {
+	%tmp = tail call i32 @llvm.cttz.i32( i32 %x )
+	ret i32 %tmp
+; CHECK: t2:
+; CHECK: bsfl
+; CHECK: cmov
+}
+
+declare i32 @llvm.cttz.i32(i32) nounwind readnone 
+
+define i16 @t3(i16 %x, i16 %y) nounwind  {
+entry:
+        %tmp1 = add i16 %x, %y
+	%tmp2 = tail call i16 @llvm.ctlz.i16( i16 %tmp1 )		; <i16> [#uses=1]
+	ret i16 %tmp2
+; CHECK: t3:
+; CHECK: bsrw
+; CHECK: cmov
+}
+
+declare i16 @llvm.ctlz.i16(i16) nounwind readnone 
diff --git a/final/test/CodeGen/X86/cmov.ll b/final/test/CodeGen/X86/cmov.ll
new file mode 100644
index 00000000000..39d9d1e9ec0
--- /dev/null
+++ b/final/test/CodeGen/X86/cmov.ll
@@ -0,0 +1,157 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
+entry:
+; CHECK: test1:
+; CHECK: btl
+; CHECK-NEXT: movl	$12, %eax
+; CHECK-NEXT: cmovael	(%rcx), %eax
+; CHECK-NEXT: ret
+
+	%0 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%1 = and i32 %0, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %1, 0		; <i1> [#uses=1]
+        %v = load i32* %vp
+	%.0 = select i1 %toBool, i32 %v, i32 12		; <i32> [#uses=1]
+	ret i32 %.0
+}
+define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
+entry:
+; CHECK: test2:
+; CHECK: btl
+; CHECK-NEXT: movl	$12, %eax
+; CHECK-NEXT: cmovbl	(%rcx), %eax
+; CHECK-NEXT: ret
+
+	%0 = lshr i32 %x, %n		; <i32> [#uses=1]
+	%1 = and i32 %0, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %1, 0		; <i1> [#uses=1]
+        %v = load i32* %vp
+	%.0 = select i1 %toBool, i32 12, i32 %v		; <i32> [#uses=1]
+	ret i32 %.0
+}
+
+
+; x86's 32-bit cmov doesn't clobber the high 32 bits of the destination
+; if the condition is false. An explicit zero-extend (movl) is needed
+; after the cmov.
+
+declare void @bar(i64) nounwind
+
+define void @test3(i64 %a, i64 %b, i1 %p) nounwind {
+; CHECK: test3:
+; CHECK:      cmovnel %edi, %esi
+; CHECK-NEXT: movl    %esi, %edi
+
+  %c = trunc i64 %a to i32
+  %d = trunc i64 %b to i32
+  %e = select i1 %p, i32 %c, i32 %d
+  %f = zext i32 %e to i64
+  call void @bar(i64 %f)
+  ret void
+}
+
+
+
+; CodeGen shouldn't try to do a setne after an expanded 8-bit conditional
+; move without recomputing EFLAGS, because the expansion of the conditional
+; move with control flow may clobber EFLAGS (e.g., with xor, to set the
+; register to zero).
+
+; The test is a little awkward; the important part is that there's a test before the
+; setne.
+; PR4814
+
+
+@g_3 = external global i8                         ; <i8*> [#uses=1]
+@g_96 = external global i8                        ; <i8*> [#uses=2]
+@g_100 = external global i8                       ; <i8*> [#uses=2]
+@_2E_str = external constant [15 x i8], align 1   ; <[15 x i8]*> [#uses=1]
+
+define i32 @test4() nounwind {
+entry:
+  %0 = load i8* @g_3, align 1                     ; <i8> [#uses=2]
+  %1 = sext i8 %0 to i32                          ; <i32> [#uses=1]
+  %.lobit.i = lshr i8 %0, 7                       ; <i8> [#uses=1]
+  %tmp.i = zext i8 %.lobit.i to i32               ; <i32> [#uses=1]
+  %tmp.not.i = xor i32 %tmp.i, 1                  ; <i32> [#uses=1]
+  %iftmp.17.0.i.i = ashr i32 %1, %tmp.not.i       ; <i32> [#uses=1]
+  %retval56.i.i = trunc i32 %iftmp.17.0.i.i to i8 ; <i8> [#uses=1]
+  %2 = icmp eq i8 %retval56.i.i, 0                ; <i1> [#uses=2]
+  %g_96.promoted.i = load i8* @g_96               ; <i8> [#uses=3]
+  %3 = icmp eq i8 %g_96.promoted.i, 0             ; <i1> [#uses=2]
+  br i1 %3, label %func_4.exit.i, label %bb.i.i.i
+
+bb.i.i.i:                                         ; preds = %entry
+  %4 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  br label %func_4.exit.i
+
+; CHECK: test4:
+; CHECK: g_100
+; CHECK: testb
+; CHECK: testb %al, %al
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: testb
+
+func_4.exit.i:                                    ; preds = %bb.i.i.i, %entry
+  %.not.i = xor i1 %2, true                       ; <i1> [#uses=1]
+  %brmerge.i = or i1 %3, %.not.i                  ; <i1> [#uses=1]
+  %.mux.i = select i1 %2, i8 %g_96.promoted.i, i8 0 ; <i8> [#uses=1]
+  br i1 %brmerge.i, label %func_1.exit, label %bb.i.i
+
+bb.i.i:                                           ; preds = %func_4.exit.i
+  %5 = volatile load i8* @g_100, align 1          ; <i8> [#uses=0]
+  br label %func_1.exit
+
+func_1.exit:                                      ; preds = %bb.i.i, %func_4.exit.i
+  %g_96.tmp.0.i = phi i8 [ %g_96.promoted.i, %bb.i.i ], [ %.mux.i, %func_4.exit.i ] ; <i8> [#uses=2]
+  store i8 %g_96.tmp.0.i, i8* @g_96
+  %6 = zext i8 %g_96.tmp.0.i to i32               ; <i32> [#uses=1]
+  %7 = tail call i32 (i8*, ...)* @printf(i8* noalias getelementptr ([15 x i8]* @_2E_str, i64 0, i64 0), i32 %6) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+
+; Should compile to setcc | -2.
+; rdar://6668608
+define i32 @test5(i32* nocapture %P) nounwind readonly {
+entry:
+; CHECK: test5:
+; CHECK: 	setg	%al
+; CHECK:	movzbl	%al, %eax
+; CHECK:	orl	$-2, %eax
+; CHECK:	ret
+
+	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %1, i32 -1, i32 -2		; <i32> [#uses=1]
+	ret i32 %iftmp.0.0
+}
+
+define i32 @test6(i32* nocapture %P) nounwind readonly {
+entry:
+; CHECK: test6:
+; CHECK: 	setl	%al
+; CHECK:	movzbl	%al, %eax
+; CHECK:	leal	4(%rax,%rax,8), %eax
+; CHECK:        ret
+	%0 = load i32* %P, align 4		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 41		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %1, i32 4, i32 13		; <i32> [#uses=1]
+	ret i32 %iftmp.0.0
+}
+
+
+; Don't try to use a 16-bit conditional move to do an 8-bit select,
+; because it isn't worth it. Just use a branch instead.
+define i8 @test7(i1 inreg %c, i8 inreg %a, i8 inreg %b) nounwind {
+; CHECK: test7:
+; CHECK:     testb	$1, %dil
+; CHECK-NEXT:     jne	LBB
+
+  %d = select i1 %c, i8 %a, i8 %b
+  ret i8 %d
+}
diff --git a/final/test/CodeGen/X86/cmp.ll b/final/test/CodeGen/X86/cmp.ll
new file mode 100644
index 00000000000..ef5e353e9f9
--- /dev/null
+++ b/final/test/CodeGen/X86/cmp.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -show-mc-encoding | FileCheck %s
+
+define i32 @test1(i32 %X, i32* %y) nounwind {
+	%tmp = load i32* %y		; <i32> [#uses=1]
+	%tmp.upgrd.1 = icmp eq i32 %tmp, 0		; <i1> [#uses=1]
+	br i1 %tmp.upgrd.1, label %ReturnBlock, label %cond_true
+
+cond_true:		; preds = %0
+	ret i32 1
+
+ReturnBlock:		; preds = %0
+	ret i32 0
+; CHECK: test1:
+; CHECK: cmpl	$0, (%rsi)
+}
+
+define i32 @test2(i32 %X, i32* %y) nounwind {
+	%tmp = load i32* %y		; <i32> [#uses=1]
+	%tmp1 = shl i32 %tmp, 3		; <i32> [#uses=1]
+	%tmp1.upgrd.2 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp1.upgrd.2, label %ReturnBlock, label %cond_true
+
+cond_true:		; preds = %0
+	ret i32 1
+
+ReturnBlock:		; preds = %0
+	ret i32 0
+; CHECK: test2:
+; CHECK: movl	(%rsi), %eax
+; CHECK: shll	$3, %eax
+; CHECK: testl	%eax, %eax
+}
+
+define i64 @test3(i64 %x) nounwind {
+  %t = icmp eq i64 %x, 0
+  %r = zext i1 %t to i64
+  ret i64 %r
+; CHECK: test3:
+; CHECK: 	testq	%rdi, %rdi
+; CHECK: 	sete	%al
+; CHECK: 	movzbl	%al, %eax
+; CHECK: 	ret
+}
+
+define i64 @test4(i64 %x) nounwind {
+  %t = icmp slt i64 %x, 1
+  %r = zext i1 %t to i64
+  ret i64 %r
+; CHECK: test4:
+; CHECK: 	testq	%rdi, %rdi
+; CHECK: 	setle	%al
+; CHECK: 	movzbl	%al, %eax
+; CHECK: 	ret
+}
+
+
+define i32 @test5(double %A) nounwind  {
+ entry:
+ %tmp2 = fcmp ogt double %A, 1.500000e+02; <i1> [#uses=1]
+ %tmp5 = fcmp ult double %A, 7.500000e+01; <i1> [#uses=1]
+ %bothcond = or i1 %tmp2, %tmp5; <i1> [#uses=1]
+ br i1 %bothcond, label %bb8, label %bb12
+
+ bb8:; preds = %entry
+ %tmp9 = tail call i32 (...)* @foo( ) nounwind ; <i32> [#uses=1]
+ ret i32 %tmp9
+
+ bb12:; preds = %entry
+ ret i32 32
+; CHECK: test5:
+; CHECK: ucomisd	LCPI4_0(%rip), %xmm0
+; CHECK: ucomisd	LCPI4_1(%rip), %xmm0
+}
+
+declare i32 @foo(...)
+
+define i32 @test6() nounwind align 2 {
+  %A = alloca {i64, i64}, align 8
+  %B = getelementptr inbounds {i64, i64}* %A, i64 0, i32 1
+  %C = load i64* %B
+  %D = icmp eq i64 %C, 0
+  br i1 %D, label %T, label %F
+T:
+  ret i32 1
+  
+F:
+  ret i32 0
+; CHECK: test6:
+; CHECK: cmpq	$0, -8(%rsp)
+; CHECK: encoding: [0x48,0x83,0x7c,0x24,0xf8,0x00]
+}
+
diff --git a/final/test/CodeGen/X86/coalesce-esp.ll b/final/test/CodeGen/X86/coalesce-esp.ll
new file mode 100644
index 00000000000..a5848763c98
--- /dev/null
+++ b/final/test/CodeGen/X86/coalesce-esp.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s | grep {movl	%esp, %ebp}
+; PR4572
+
+; Don't coalesce with %esp if it would end up putting %esp in
+; the index position of an address, because that can't be
+; encoded on x86. It would actually be slightly better to
+; swap the address operands though, since there's no scale.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-mingw32"
+	%"struct.std::valarray<unsigned int>" = type { i32, i32* }
+
+define void @_ZSt17__gslice_to_indexjRKSt8valarrayIjES2_RS0_(i32 %__o, %"struct.std::valarray<unsigned int>"* nocapture %__l, %"struct.std::valarray<unsigned int>"* nocapture %__s, %"struct.std::valarray<unsigned int>"* nocapture %__i) nounwind {
+entry:
+	%0 = alloca i32, i32 undef, align 4		; <i32*> [#uses=1]
+	br i1 undef, label %return, label %bb4
+
+bb4:		; preds = %bb7.backedge, %entry
+	%indvar = phi i32 [ %indvar.next, %bb7.backedge ], [ 0, %entry ]		; <i32> [#uses=2]
+	%scevgep24.sum = sub i32 undef, %indvar		; <i32> [#uses=2]
+	%scevgep25 = getelementptr i32* %0, i32 %scevgep24.sum		; <i32*> [#uses=1]
+	%scevgep27 = getelementptr i32* undef, i32 %scevgep24.sum		; <i32*> [#uses=1]
+	%1 = load i32* %scevgep27, align 4		; <i32> [#uses=0]
+	br i1 undef, label %bb7.backedge, label %bb5
+
+bb5:		; preds = %bb4
+	store i32 0, i32* %scevgep25, align 4
+	br label %bb7.backedge
+
+bb7.backedge:		; preds = %bb5, %bb4
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br label %bb4
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/coalescer-commute1.ll b/final/test/CodeGen/X86/coalescer-commute1.ll
new file mode 100644
index 00000000000..8aa0bfdd51f
--- /dev/null
+++ b/final/test/CodeGen/X86/coalescer-commute1.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+; PR1877
+
+@NNTOT = weak global i32 0		; <i32*> [#uses=1]
+@G = weak global float 0.000000e+00		; <float*> [#uses=1]
+
+define void @runcont(i32* %source) nounwind  {
+entry:
+	%tmp10 = load i32* @NNTOT, align 4		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%neuron.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%thesum.0 = phi float [ 0.000000e+00, %entry ], [ %tmp6, %bb ]		; <float> [#uses=1]
+	%tmp2 = getelementptr i32* %source, i32 %neuron.0		; <i32*> [#uses=1]
+	%tmp3 = load i32* %tmp2, align 4		; <i32> [#uses=1]
+	%tmp34 = sitofp i32 %tmp3 to float		; <float> [#uses=1]
+	%tmp6 = fadd float %tmp34, %thesum.0		; <float> [#uses=2]
+	%indvar.next = add i32 %neuron.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %tmp10		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb13, label %bb
+
+bb13:		; preds = %bb
+	volatile store float %tmp6, float* @G, align 4
+	ret void
+}
diff --git a/final/test/CodeGen/X86/coalescer-commute2.ll b/final/test/CodeGen/X86/coalescer-commute2.ll
new file mode 100644
index 00000000000..730692093de
--- /dev/null
+++ b/final/test/CodeGen/X86/coalescer-commute2.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     mov
+; CHECK:     paddw
+; CHECK-NOT:     mov
+; CHECK:     paddw
+; CHECK-NOT:     paddw
+; CHECK-NOT:     mov
+
+; The 2-addr pass should ensure that identical code is produced for these functions
+; no extra copy should be generated.
+
+define <2 x i64> @test1(<2 x i64> %x, <2 x i64> %y) nounwind  {
+entry:
+	%tmp6 = bitcast <2 x i64> %y to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp8 = bitcast <2 x i64> %x to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp9 = add <8 x i16> %tmp8, %tmp6		; <<8 x i16>> [#uses=1]
+	%tmp10 = bitcast <8 x i16> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp10
+}
+
+define <2 x i64> @test2(<2 x i64> %x, <2 x i64> %y) nounwind  {
+entry:
+	%tmp6 = bitcast <2 x i64> %x to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp8 = bitcast <2 x i64> %y to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp9 = add <8 x i16> %tmp8, %tmp6		; <<8 x i16>> [#uses=1]
+	%tmp10 = bitcast <8 x i16> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp10
+}
+
+
+; The coalescer should commute the add to avoid a copy.
+define <4 x float> @test3(<4 x float> %V) {
+entry:
+        %tmp8 = shufflevector <4 x float> %V, <4 x float> undef,
+                                        <4 x i32> < i32 3, i32 2, i32 1, i32 0 >
+        %add = fadd <4 x float> %tmp8, %V
+        ret <4 x float> %add
+}
+
diff --git a/final/test/CodeGen/X86/coalescer-commute3.ll b/final/test/CodeGen/X86/coalescer-commute3.ll
new file mode 100644
index 00000000000..e5bd448a415
--- /dev/null
+++ b/final/test/CodeGen/X86/coalescer-commute3.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 6
+
+	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+
+define  i32 @perimeter(%struct.quad_struct* %tree, i32 %size) nounwind  {
+entry:
+	switch i32 %size, label %UnifiedReturnBlock [
+		 i32 2, label %bb
+		 i32 0, label %bb50
+	]
+
+bb:		; preds = %entry
+	%tmp31 = tail call  i32 @perimeter( %struct.quad_struct* null, i32 0 ) nounwind 		; <i32> [#uses=1]
+	%tmp40 = tail call  i32 @perimeter( %struct.quad_struct* null, i32 0 ) nounwind 		; <i32> [#uses=1]
+	%tmp33 = add i32 0, %tmp31		; <i32> [#uses=1]
+	%tmp42 = add i32 %tmp33, %tmp40		; <i32> [#uses=1]
+	ret i32 %tmp42
+
+bb50:		; preds = %entry
+	ret i32 0
+
+UnifiedReturnBlock:		; preds = %entry
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/coalescer-commute4.ll b/final/test/CodeGen/X86/coalescer-commute4.ll
new file mode 100644
index 00000000000..02a97813fdc
--- /dev/null
+++ b/final/test/CodeGen/X86/coalescer-commute4.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+; PR1501
+
+define float @foo(i32* %x, float* %y, i32 %c) nounwind  {
+entry:
+	%tmp2132 = icmp eq i32 %c, 0		; <i1> [#uses=2]
+	br i1 %tmp2132, label %bb23, label %bb.preheader
+
+bb.preheader:		; preds = %entry
+	%umax = select i1 %tmp2132, i32 1, i32 %c		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.preheader
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb.preheader ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%res.0.reg2mem.0 = phi float [ 0.000000e+00, %bb.preheader ], [ %tmp14, %bb ]		; <float> [#uses=1]
+	%tmp3 = getelementptr i32* %x, i32 %i.0.reg2mem.0		; <i32*> [#uses=1]
+	%tmp4 = load i32* %tmp3, align 4		; <i32> [#uses=1]
+	%tmp45 = sitofp i32 %tmp4 to float		; <float> [#uses=1]
+	%tmp8 = getelementptr float* %y, i32 %i.0.reg2mem.0		; <float*> [#uses=1]
+	%tmp9 = load float* %tmp8, align 4		; <float> [#uses=1]
+	%tmp11 = fmul float %tmp9, %tmp45		; <float> [#uses=1]
+	%tmp14 = fadd float %tmp11, %res.0.reg2mem.0		; <float> [#uses=2]
+	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb23, label %bb
+
+bb23:		; preds = %bb, %entry
+	%res.0.reg2mem.1 = phi float [ 0.000000e+00, %entry ], [ %tmp14, %bb ]		; <float> [#uses=1]
+	ret float %res.0.reg2mem.1
+}
diff --git a/final/test/CodeGen/X86/coalescer-commute5.ll b/final/test/CodeGen/X86/coalescer-commute5.ll
new file mode 100644
index 00000000000..510d115f4ad
--- /dev/null
+++ b/final/test/CodeGen/X86/coalescer-commute5.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps
+
+define i32 @t() {
+entry:
+	br i1 true, label %bb1664, label %bb1656
+bb1656:		; preds = %entry
+	ret i32 0
+bb1664:		; preds = %entry
+	%tmp4297 = bitcast <16 x i8> zeroinitializer to <2 x i64>		; <<2 x i64>> [#uses=2]
+	%tmp4351 = call <16 x i8> @llvm.x86.sse2.pcmpeq.b( <16 x i8> zeroinitializer, <16 x i8> zeroinitializer ) nounwind readnone 		; <<16 x i8>> [#uses=0]
+	br i1 false, label %bb5310, label %bb4743
+bb4743:		; preds = %bb1664
+	%tmp4360.not28 = or <2 x i64> zeroinitializer, %tmp4297		; <<2 x i64>> [#uses=1]
+	br label %bb5310
+bb5310:		; preds = %bb4743, %bb1664
+	%tmp4360.not28.pn = phi <2 x i64> [ %tmp4360.not28, %bb4743 ], [ %tmp4297, %bb1664 ]		; <<2 x i64>> [#uses=1]
+	%tmp4415.not.pn = or <2 x i64> zeroinitializer, %tmp4360.not28.pn		; <<2 x i64>> [#uses=0]
+	ret i32 0
+}
+
+declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone 
diff --git a/final/test/CodeGen/X86/coalescer-cross.ll b/final/test/CodeGen/X86/coalescer-cross.ll
new file mode 100644
index 00000000000..7d6f399930f
--- /dev/null
+++ b/final/test/CodeGen/X86/coalescer-cross.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | not grep movaps
+; rdar://6509240
+
+	type { %struct.TValue }		; type %0
+	type { %struct.L_Umaxalign, i32, %struct.Node* }		; type %1
+	%struct.CallInfo = type { %struct.TValue*, %struct.TValue*, %struct.TValue*, i32*, i32, i32 }
+	%struct.GCObject = type { %struct.lua_State }
+	%struct.L_Umaxalign = type { double }
+	%struct.Mbuffer = type { i8*, i32, i32 }
+	%struct.Node = type { %struct.TValue, %struct.TKey }
+	%struct.TKey = type { %1 }
+	%struct.TString = type { %struct.anon }
+	%struct.TValue = type { %struct.L_Umaxalign, i32 }
+	%struct.Table = type { %struct.GCObject*, i8, i8, i8, i8, %struct.Table*, %struct.TValue*, %struct.Node*, %struct.Node*, %struct.GCObject*, i32 }
+	%struct.UpVal = type { %struct.GCObject*, i8, i8, %struct.TValue*, %0 }
+	%struct.anon = type { %struct.GCObject*, i8, i8, i8, i32, i32 }
+	%struct.global_State = type { %struct.stringtable, i8* (i8*, i8*, i32, i32)*, i8*, i8, i8, i32, %struct.GCObject*, %struct.GCObject**, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.GCObject*, %struct.Mbuffer, i32, i32, i32, i32, i32, i32, i32 (%struct.lua_State*)*, %struct.TValue, %struct.lua_State*, %struct.UpVal, [9 x %struct.Table*], [17 x %struct.TString*] }
+	%struct.lua_Debug = type { i32, i8*, i8*, i8*, i8*, i32, i32, i32, i32, [60 x i8], i32 }
+	%struct.lua_State = type { %struct.GCObject*, i8, i8, i8, %struct.TValue*, %struct.TValue*, %struct.global_State*, %struct.CallInfo*, i32*, %struct.TValue*, %struct.TValue*, %struct.CallInfo*, %struct.CallInfo*, i32, i32, i16, i16, i8, i8, i32, i32, void (%struct.lua_State*, %struct.lua_Debug*)*, %struct.TValue, %struct.TValue, %struct.GCObject*, %struct.GCObject*, %struct.lua_longjmp*, i32 }
+	%struct.lua_longjmp = type { %struct.lua_longjmp*, [18 x i32], i32 }
+	%struct.stringtable = type { %struct.GCObject**, i32, i32 }
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (%struct.lua_State*)* @os_clock to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @os_clock(%struct.lua_State* nocapture %L) nounwind ssp {
+entry:
+	%0 = tail call i32 @"\01_clock$UNIX2003"() nounwind		; <i32> [#uses=1]
+	%1 = uitofp i32 %0 to double		; <double> [#uses=1]
+	%2 = fdiv double %1, 1.000000e+06		; <double> [#uses=1]
+	%3 = getelementptr %struct.lua_State* %L, i32 0, i32 4		; <%struct.TValue**> [#uses=3]
+	%4 = load %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=2]
+	%5 = getelementptr %struct.TValue* %4, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	store double %2, double* %5, align 4
+	%6 = getelementptr %struct.TValue* %4, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 3, i32* %6, align 4
+	%7 = load %struct.TValue** %3, align 4		; <%struct.TValue*> [#uses=1]
+	%8 = getelementptr %struct.TValue* %7, i32 1		; <%struct.TValue*> [#uses=1]
+	store %struct.TValue* %8, %struct.TValue** %3, align 4
+	ret i32 1
+}
+
+declare i32 @"\01_clock$UNIX2003"()
diff --git a/final/test/CodeGen/X86/coalescer-remat.ll b/final/test/CodeGen/X86/coalescer-remat.ll
new file mode 100644
index 00000000000..4db520fee74
--- /dev/null
+++ b/final/test/CodeGen/X86/coalescer-remat.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep xor | count 3
+
+@val = internal global i64 0		; <i64*> [#uses=1]
+@"\01LC" = internal constant [7 x i8] c"0x%lx\0A\00"		; <[7 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+	%0 = tail call i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* @val, i64 0, i64 1)		; <i64> [#uses=1]
+	%1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([7 x i8]* @"\01LC", i32 0, i64 0), i64 %0) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i64 @llvm.atomic.cmp.swap.i64.p0i64(i64*, i64, i64) nounwind
+
+declare i32 @printf(i8*, ...) nounwind
diff --git a/final/test/CodeGen/X86/code_placement.ll b/final/test/CodeGen/X86/code_placement.ll
new file mode 100644
index 00000000000..97471835a4c
--- /dev/null
+++ b/final/test/CodeGen/X86/code_placement.ll
@@ -0,0 +1,136 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+@Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
+@Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
+@Te3 = external global [256 x i32]		; <[256 x i32]*> [#uses=2]
+
+define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind ssp {
+entry:
+	%0 = load i32* %rk, align 4		; <i32> [#uses=1]
+	%1 = getelementptr i32* %rk, i64 1		; <i32*> [#uses=1]
+	%2 = load i32* %1, align 4		; <i32> [#uses=1]
+	%tmp15 = add i32 %r, -1		; <i32> [#uses=1]
+	%tmp.16 = zext i32 %tmp15 to i64		; <i64> [#uses=2]
+	br label %bb
+; CHECK: jmp
+; CHECK-NEXT: align
+
+bb:		; preds = %bb1, %entry
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ]		; <i64> [#uses=3]
+	%s1.0 = phi i32 [ %2, %entry ], [ %56, %bb1 ]		; <i32> [#uses=2]
+	%s0.0 = phi i32 [ %0, %entry ], [ %43, %bb1 ]		; <i32> [#uses=2]
+	%tmp18 = shl i64 %indvar, 4		; <i64> [#uses=4]
+	%rk26 = bitcast i32* %rk to i8*		; <i8*> [#uses=6]
+	%3 = lshr i32 %s0.0, 24		; <i32> [#uses=1]
+	%4 = zext i32 %3 to i64		; <i64> [#uses=1]
+	%5 = getelementptr [256 x i32]* @Te0, i64 0, i64 %4		; <i32*> [#uses=1]
+	%6 = load i32* %5, align 4		; <i32> [#uses=1]
+	%7 = lshr i32 %s1.0, 16		; <i32> [#uses=1]
+	%8 = and i32 %7, 255		; <i32> [#uses=1]
+	%9 = zext i32 %8 to i64		; <i64> [#uses=1]
+	%10 = getelementptr [256 x i32]* @Te1, i64 0, i64 %9		; <i32*> [#uses=1]
+	%11 = load i32* %10, align 4		; <i32> [#uses=1]
+	%ctg2.sum2728 = or i64 %tmp18, 8		; <i64> [#uses=1]
+	%12 = getelementptr i8* %rk26, i64 %ctg2.sum2728		; <i8*> [#uses=1]
+	%13 = bitcast i8* %12 to i32*		; <i32*> [#uses=1]
+	%14 = load i32* %13, align 4		; <i32> [#uses=1]
+	%15 = xor i32 %11, %6		; <i32> [#uses=1]
+	%16 = xor i32 %15, %14		; <i32> [#uses=3]
+	%17 = lshr i32 %s1.0, 24		; <i32> [#uses=1]
+	%18 = zext i32 %17 to i64		; <i64> [#uses=1]
+	%19 = getelementptr [256 x i32]* @Te0, i64 0, i64 %18		; <i32*> [#uses=1]
+	%20 = load i32* %19, align 4		; <i32> [#uses=1]
+	%21 = and i32 %s0.0, 255		; <i32> [#uses=1]
+	%22 = zext i32 %21 to i64		; <i64> [#uses=1]
+	%23 = getelementptr [256 x i32]* @Te3, i64 0, i64 %22		; <i32*> [#uses=1]
+	%24 = load i32* %23, align 4		; <i32> [#uses=1]
+	%ctg2.sum2930 = or i64 %tmp18, 12		; <i64> [#uses=1]
+	%25 = getelementptr i8* %rk26, i64 %ctg2.sum2930		; <i8*> [#uses=1]
+	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
+	%27 = load i32* %26, align 4		; <i32> [#uses=1]
+	%28 = xor i32 %24, %20		; <i32> [#uses=1]
+	%29 = xor i32 %28, %27		; <i32> [#uses=4]
+	%30 = lshr i32 %16, 24		; <i32> [#uses=1]
+	%31 = zext i32 %30 to i64		; <i64> [#uses=1]
+	%32 = getelementptr [256 x i32]* @Te0, i64 0, i64 %31		; <i32*> [#uses=1]
+	%33 = load i32* %32, align 4		; <i32> [#uses=2]
+	%exitcond = icmp eq i64 %indvar, %tmp.16		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb1:		; preds = %bb
+	%ctg2.sum31 = add i64 %tmp18, 16		; <i64> [#uses=1]
+	%34 = getelementptr i8* %rk26, i64 %ctg2.sum31		; <i8*> [#uses=1]
+	%35 = bitcast i8* %34 to i32*		; <i32*> [#uses=1]
+	%36 = lshr i32 %29, 16		; <i32> [#uses=1]
+	%37 = and i32 %36, 255		; <i32> [#uses=1]
+	%38 = zext i32 %37 to i64		; <i64> [#uses=1]
+	%39 = getelementptr [256 x i32]* @Te1, i64 0, i64 %38		; <i32*> [#uses=1]
+	%40 = load i32* %39, align 4		; <i32> [#uses=1]
+	%41 = load i32* %35, align 4		; <i32> [#uses=1]
+	%42 = xor i32 %40, %33		; <i32> [#uses=1]
+	%43 = xor i32 %42, %41		; <i32> [#uses=1]
+	%44 = lshr i32 %29, 24		; <i32> [#uses=1]
+	%45 = zext i32 %44 to i64		; <i64> [#uses=1]
+	%46 = getelementptr [256 x i32]* @Te0, i64 0, i64 %45		; <i32*> [#uses=1]
+	%47 = load i32* %46, align 4		; <i32> [#uses=1]
+	%48 = and i32 %16, 255		; <i32> [#uses=1]
+	%49 = zext i32 %48 to i64		; <i64> [#uses=1]
+	%50 = getelementptr [256 x i32]* @Te3, i64 0, i64 %49		; <i32*> [#uses=1]
+	%51 = load i32* %50, align 4		; <i32> [#uses=1]
+	%ctg2.sum32 = add i64 %tmp18, 20		; <i64> [#uses=1]
+	%52 = getelementptr i8* %rk26, i64 %ctg2.sum32		; <i8*> [#uses=1]
+	%53 = bitcast i8* %52 to i32*		; <i32*> [#uses=1]
+	%54 = load i32* %53, align 4		; <i32> [#uses=1]
+	%55 = xor i32 %51, %47		; <i32> [#uses=1]
+	%56 = xor i32 %55, %54		; <i32> [#uses=1]
+	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
+	br label %bb
+
+bb2:		; preds = %bb
+	%tmp10 = shl i64 %tmp.16, 4		; <i64> [#uses=2]
+	%ctg2.sum = add i64 %tmp10, 16		; <i64> [#uses=1]
+	%tmp1213 = getelementptr i8* %rk26, i64 %ctg2.sum		; <i8*> [#uses=1]
+	%57 = bitcast i8* %tmp1213 to i32*		; <i32*> [#uses=1]
+	%58 = and i32 %33, -16777216		; <i32> [#uses=1]
+	%59 = lshr i32 %29, 16		; <i32> [#uses=1]
+	%60 = and i32 %59, 255		; <i32> [#uses=1]
+	%61 = zext i32 %60 to i64		; <i64> [#uses=1]
+	%62 = getelementptr [256 x i32]* @Te1, i64 0, i64 %61		; <i32*> [#uses=1]
+	%63 = load i32* %62, align 4		; <i32> [#uses=1]
+	%64 = and i32 %63, 16711680		; <i32> [#uses=1]
+	%65 = or i32 %64, %58		; <i32> [#uses=1]
+	%66 = load i32* %57, align 4		; <i32> [#uses=1]
+	%67 = xor i32 %65, %66		; <i32> [#uses=2]
+	%68 = lshr i32 %29, 8		; <i32> [#uses=1]
+	%69 = zext i32 %68 to i64		; <i64> [#uses=1]
+	%70 = getelementptr [256 x i32]* @Te0, i64 0, i64 %69		; <i32*> [#uses=1]
+	%71 = load i32* %70, align 4		; <i32> [#uses=1]
+	%72 = and i32 %71, -16777216		; <i32> [#uses=1]
+	%73 = and i32 %16, 255		; <i32> [#uses=1]
+	%74 = zext i32 %73 to i64		; <i64> [#uses=1]
+	%75 = getelementptr [256 x i32]* @Te1, i64 0, i64 %74		; <i32*> [#uses=1]
+	%76 = load i32* %75, align 4		; <i32> [#uses=1]
+	%77 = and i32 %76, 16711680		; <i32> [#uses=1]
+	%78 = or i32 %77, %72		; <i32> [#uses=1]
+	%ctg2.sum25 = add i64 %tmp10, 20		; <i64> [#uses=1]
+	%79 = getelementptr i8* %rk26, i64 %ctg2.sum25		; <i8*> [#uses=1]
+	%80 = bitcast i8* %79 to i32*		; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4		; <i32> [#uses=1]
+	%82 = xor i32 %78, %81		; <i32> [#uses=2]
+	%83 = lshr i32 %67, 24		; <i32> [#uses=1]
+	%84 = trunc i32 %83 to i8		; <i8> [#uses=1]
+	store i8 %84, i8* %out, align 1
+	%85 = lshr i32 %67, 16		; <i32> [#uses=1]
+	%86 = trunc i32 %85 to i8		; <i8> [#uses=1]
+	%87 = getelementptr i8* %out, i64 1		; <i8*> [#uses=1]
+	store i8 %86, i8* %87, align 1
+	%88 = getelementptr i8* %out, i64 4		; <i8*> [#uses=1]
+	%89 = lshr i32 %82, 24		; <i32> [#uses=1]
+	%90 = trunc i32 %89 to i8		; <i8> [#uses=1]
+	store i8 %90, i8* %88, align 1
+	%91 = lshr i32 %82, 16		; <i32> [#uses=1]
+	%92 = trunc i32 %91 to i8		; <i8> [#uses=1]
+	%93 = getelementptr i8* %out, i64 5		; <i8*> [#uses=1]
+	store i8 %92, i8* %93, align 1
+	ret void
+}
diff --git a/final/test/CodeGen/X86/code_placement_eh.ll b/final/test/CodeGen/X86/code_placement_eh.ll
new file mode 100644
index 00000000000..172d5910d03
--- /dev/null
+++ b/final/test/CodeGen/X86/code_placement_eh.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s
+
+; CodePlacementOpt shouldn't try to modify this loop because
+; it involves EH edges.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0"
+
+define void @foo() {
+invcont5:
+  br label %bb15
+
+.noexc3:                                          ; preds = %bb15
+  br i1 undef, label %bb18.i5.i, label %bb15
+
+.noexc6.i.i:                                      ; preds = %bb18.i5.i
+  %tmp2021 = invoke float @cosf(float 0.000000e+00) readonly
+          to label %bb18.i5.i unwind label %lpad.i.i ; <float> [#uses=0]
+
+bb18.i5.i:                                        ; preds = %.noexc6.i.i, %bb51.i
+  %tmp2019 = invoke float @sinf(float 0.000000e+00) readonly
+          to label %.noexc6.i.i unwind label %lpad.i.i ; <float> [#uses=0]
+
+lpad.i.i:                                         ; preds = %bb18.i5.i, %.noexc6.i.i
+  %eh_ptr.i.i = call i8* @llvm.eh.exception()     ; <i8*> [#uses=1]
+  unreachable
+
+lpad59.i:                                         ; preds = %bb15
+  %eh_ptr60.i = call i8* @llvm.eh.exception()     ; <i8*> [#uses=1]
+  unreachable
+
+bb15:                                             ; preds = %.noexc3, %invcont5
+  invoke fastcc void @_ZN28btHashedOverlappingPairCacheC2Ev()
+          to label %.noexc3 unwind label %lpad59.i
+}
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare float @sinf(float) readonly
+
+declare float @cosf(float) readonly
+
+declare fastcc void @_ZN28btHashedOverlappingPairCacheC2Ev() align 2
diff --git a/final/test/CodeGen/X86/codegen-prepare-cast.ll b/final/test/CodeGen/X86/codegen-prepare-cast.ll
new file mode 100644
index 00000000000..2a8ead8c490
--- /dev/null
+++ b/final/test/CodeGen/X86/codegen-prepare-cast.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86-64
+; PR4297
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+        %"byte[]" = type { i64, i8* }
+        %"char[][]" = type { i64, %"byte[]"* }
+@.str = external constant [7 x i8]              ; <[7 x i8]*> [#uses=1]
+
+define fastcc i32 @_Dmain(%"char[][]" %unnamed) {
+entry:
+        %tmp = getelementptr [7 x i8]* @.str, i32 0, i32 0              ; <i8*> [#uses=1]
+        br i1 undef, label %foreachbody, label %foreachend
+
+foreachbody:            ; preds = %entry
+        %tmp4 = getelementptr i8* %tmp, i32 undef               ; <i8*> [#uses=1]
+        %tmp5 = load i8* %tmp4          ; <i8> [#uses=0]
+        unreachable
+
+foreachend:             ; preds = %entry
+        ret i32 0
+}
+
diff --git a/final/test/CodeGen/X86/codegen-prepare-extload.ll b/final/test/CodeGen/X86/codegen-prepare-extload.ll
new file mode 100644
index 00000000000..14df815663e
--- /dev/null
+++ b/final/test/CodeGen/X86/codegen-prepare-extload.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win64 | FileCheck %s
+; rdar://7304838
+
+; CodeGenPrepare should move the zext into the block with the load
+; so that SelectionDAG can select it with the load.
+
+; CHECK: movzbl ({{%rdi|%rcx}}), %eax
+
+define void @foo(i8* %p, i32* %q) {
+entry:
+  %t = load i8* %p
+  %a = icmp slt i8 %t, 20
+  br i1 %a, label %true, label %false
+true:
+  %s = zext i8 %t to i32
+  store i32 %s, i32* %q
+  ret void
+false:
+  ret void
+}
diff --git a/final/test/CodeGen/X86/codemodel.ll b/final/test/CodeGen/X86/codemodel.ll
new file mode 100644
index 00000000000..b6ca1cedc22
--- /dev/null
+++ b/final/test/CodeGen/X86/codemodel.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -code-model=small  | FileCheck -check-prefix CHECK-SMALL %s
+; RUN: llc < %s -code-model=kernel | FileCheck -check-prefix CHECK-KERNEL %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@data = external global [0 x i32]		; <[0 x i32]*> [#uses=5]
+
+define i32 @foo() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo:
+; CHECK-SMALL:   movl data(%rip), %eax
+; CHECK-KERNEL: foo:
+; CHECK-KERNEL:  movl data, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i64 0, i64 0), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo2() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo2:
+; CHECK-SMALL:   movl data+40(%rip), %eax
+; CHECK-KERNEL: foo2:
+; CHECK-KERNEL:  movl data+40, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 10), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo3() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo3:
+; CHECK-SMALL:   movl data-40(%rip), %eax
+; CHECK-KERNEL: foo3:
+; CHECK-KERNEL:  movq $-40, %rax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -10), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo4() nounwind readonly {
+entry:
+; FIXME: We really can use movabsl here!
+; CHECK-SMALL:  foo4:
+; CHECK-SMALL:   movl $16777216, %eax
+; CHECK-SMALL:   movl data(%rax), %eax
+; CHECK-KERNEL: foo4:
+; CHECK-KERNEL:  movl data+16777216, %eax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194304), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+define i32 @foo1() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo1:
+; CHECK-SMALL:   movl data+16777212(%rip), %eax
+; CHECK-KERNEL: foo1:
+; CHECK-KERNEL:  movl data+16777212, %eax
+        %0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 4194303), align 4            ; <i32> [#uses=1]
+        ret i32 %0
+}
+define i32 @foo5() nounwind readonly {
+entry:
+; CHECK-SMALL:  foo5:
+; CHECK-SMALL:   movl data-16777216(%rip), %eax
+; CHECK-KERNEL: foo5:
+; CHECK-KERNEL:  movq $-16777216, %rax
+	%0 = load i32* getelementptr ([0 x i32]* @data, i32 0, i64 -4194304), align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
diff --git a/final/test/CodeGen/X86/combine-lds.ll b/final/test/CodeGen/X86/combine-lds.ll
new file mode 100644
index 00000000000..b49d081a64f
--- /dev/null
+++ b/final/test/CodeGen/X86/combine-lds.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep fldl | count 1
+
+define double @doload64(i64 %x) nounwind  {
+	%tmp717 = bitcast i64 %x to double
+	ret double %tmp717
+}
diff --git a/final/test/CodeGen/X86/combiner-aa-0.ll b/final/test/CodeGen/X86/combiner-aa-0.ll
new file mode 100644
index 00000000000..a61ef7acd13
--- /dev/null
+++ b/final/test/CodeGen/X86/combiner-aa-0.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 -combiner-global-alias-analysis -combiner-alias-analysis
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+	%struct.Hash_Key = type { [4 x i32], i32 }
+@g_flipV_hashkey = external global %struct.Hash_Key, align 16		; <%struct.Hash_Key*> [#uses=1]
+
+define void @foo() nounwind {
+	%t0 = load i32* undef, align 16		; <i32> [#uses=1]
+	%t1 = load i32* null, align 4		; <i32> [#uses=1]
+	%t2 = srem i32 %t0, 32		; <i32> [#uses=1]
+	%t3 = shl i32 1, %t2		; <i32> [#uses=1]
+	%t4 = xor i32 %t3, %t1		; <i32> [#uses=1]
+	store i32 %t4, i32* null, align 4
+	%t5 = getelementptr %struct.Hash_Key* @g_flipV_hashkey, i64 0, i32 0, i64 0		; <i32*> [#uses=2]
+	%t6 = load i32* %t5, align 4		; <i32> [#uses=1]
+	%t7 = shl i32 1, undef		; <i32> [#uses=1]
+	%t8 = xor i32 %t7, %t6		; <i32> [#uses=1]
+	store i32 %t8, i32* %t5, align 4
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/combiner-aa-1.ll b/final/test/CodeGen/X86/combiner-aa-1.ll
new file mode 100644
index 00000000000..58a7129b600
--- /dev/null
+++ b/final/test/CodeGen/X86/combiner-aa-1.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s --combiner-alias-analysis --combiner-global-alias-analysis
+; PR4880
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+%struct.alst_node = type { %struct.node }
+%struct.arg_node = type { %struct.node, i8*, %struct.alst_node* }
+%struct.arglst_node = type { %struct.alst_node, %struct.arg_node*, %struct.arglst_node* }
+%struct.lam_node = type { %struct.alst_node, %struct.arg_node*, %struct.alst_node* }
+%struct.node = type { i32 (...)**, %struct.node* }
+
+define i32 @._ZN8lam_node18resolve_name_clashEP8arg_nodeP9alst_node._ZNK8lam_nodeeqERK8exp_node._ZN11arglst_nodeD0Ev(%struct.lam_node* %this.this, %struct.arg_node* %outer_arg, %struct.alst_node* %env.cmp, %struct.arglst_node* %this, i32 %functionID) {
+comb_entry:
+  %.SV59 = alloca %struct.node*                   ; <%struct.node**> [#uses=1]
+  %0 = load i32 (...)*** null, align 4            ; <i32 (...)**> [#uses=1]
+  %1 = getelementptr inbounds i32 (...)** %0, i32 3 ; <i32 (...)**> [#uses=1]
+  %2 = load i32 (...)** %1, align 4               ; <i32 (...)*> [#uses=1]
+  store %struct.node* undef, %struct.node** %.SV59
+  %3 = bitcast i32 (...)* %2 to i32 (%struct.node*)* ; <i32 (%struct.node*)*> [#uses=1]
+  %4 = tail call i32 %3(%struct.node* undef)      ; <i32> [#uses=0]
+  unreachable
+}
diff --git a/final/test/CodeGen/X86/commute-intrinsic.ll b/final/test/CodeGen/X86/commute-intrinsic.ll
new file mode 100644
index 00000000000..d810cb1eff7
--- /dev/null
+++ b/final/test/CodeGen/X86/commute-intrinsic.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -relocation-model=static | not grep movaps
+
+@a = external global <2 x i64>		; <<2 x i64>*> [#uses=1]
+
+define <2 x i64> @madd(<2 x i64> %b) nounwind  {
+entry:
+	%tmp2 = load <2 x i64>* @a, align 16		; <<2 x i64>> [#uses=1]
+	%tmp6 = bitcast <2 x i64> %b to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp9 = bitcast <2 x i64> %tmp2 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp11 = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd( <8 x i16> %tmp9, <8 x i16> %tmp6 ) nounwind readnone 		; <<4 x i32>> [#uses=1]
+	%tmp14 = bitcast <4 x i32> %tmp11 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp14
+}
+
+declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 
diff --git a/final/test/CodeGen/X86/commute-two-addr.ll b/final/test/CodeGen/X86/commute-two-addr.ll
new file mode 100644
index 00000000000..ef44a3d119b
--- /dev/null
+++ b/final/test/CodeGen/X86/commute-two-addr.ll
@@ -0,0 +1,62 @@
+; The register allocator can commute two-address instructions to avoid
+; insertion of register-register copies.
+
+; Make sure there are only 3 mov's for each testcase
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu   | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=DARWIN
+
+
+@G = external global i32                ; <i32*> [#uses=2]
+
+declare void @ext(i32)
+
+define i32 @t1(i32 %X, i32 %Y) nounwind {
+; LINUX: t1:
+; LINUX: movl 4(%esp), %eax
+; LINUX: movl 8(%esp), %ecx
+; LINUX: addl %eax, %ecx
+; LINUX: movl %ecx, G
+        %Z = add i32 %X, %Y             ; <i32> [#uses=1]
+        store i32 %Z, i32* @G
+        ret i32 %X
+}
+
+define i32 @t2(i32 %X, i32 %Y) nounwind {
+; LINUX: t2:
+; LINUX: movl 4(%esp), %eax
+; LINUX: movl 8(%esp), %ecx
+; LINUX: xorl %eax, %ecx
+; LINUX: movl %ecx, G
+        %Z = xor i32 %X, %Y             ; <i32> [#uses=1]
+        store i32 %Z, i32* @G
+        ret i32 %X
+}
+
+; rdar://8762995
+%0 = type { i64, i32 }
+
+define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 zeroext %has_ub, i8 zeroext %ub_inclusive) nounwind {
+entry:
+; DARWIN: t3:
+; DARWIN: shll $16
+; DARWIN: shlq $32, %rcx
+; DARWIN-NOT: leaq
+; DARWIN: orq %rcx, %rax
+  %tmp21 = zext i32 %lb to i64
+  %tmp23 = zext i32 %ub to i64
+  %tmp24 = shl i64 %tmp23, 32
+  %ins26 = or i64 %tmp24, %tmp21
+  %tmp28 = zext i8 %has_lb to i32
+  %tmp33 = zext i8 %has_ub to i32
+  %tmp34 = shl i32 %tmp33, 8
+  %tmp38 = zext i8 %lb_inclusive to i32
+  %tmp39 = shl i32 %tmp38, 16
+  %tmp43 = zext i8 %ub_inclusive to i32
+  %tmp44 = shl i32 %tmp43, 24
+  %ins31 = or i32 %tmp39, %tmp28
+  %ins36 = or i32 %ins31, %tmp34
+  %ins46 = or i32 %ins36, %tmp44
+  %tmp16 = insertvalue %0 undef, i64 %ins26, 0
+  %tmp19 = insertvalue %0 %tmp16, i32 %ins46, 1
+  ret %0 %tmp19
+}
diff --git a/final/test/CodeGen/X86/compare-add.ll b/final/test/CodeGen/X86/compare-add.ll
new file mode 100644
index 00000000000..358ee59c95a
--- /dev/null
+++ b/final/test/CodeGen/X86/compare-add.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | not grep add
+
+define i1 @X(i32 %X) {
+        %Y = add i32 %X, 14             ; <i32> [#uses=1]
+        %Z = icmp ne i32 %Y, 12345              ; <i1> [#uses=1]
+        ret i1 %Z
+}
+
diff --git a/final/test/CodeGen/X86/compare-inf.ll b/final/test/CodeGen/X86/compare-inf.ll
new file mode 100644
index 00000000000..9aa44a30af5
--- /dev/null
+++ b/final/test/CodeGen/X86/compare-inf.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; Convert oeq and une to ole/oge/ule/uge when comparing with infinity
+; and negative infinity, because those are more efficient on x86.
+
+; CHECK: oeq_inff:
+; CHECK: ucomiss
+; CHECK: jb
+define float @oeq_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp oeq float %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: oeq_inf:
+; CHECK: ucomisd
+; CHECK: jb
+define double @oeq_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp oeq double %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: une_inff:
+; CHECK: ucomiss
+; CHECK: jae
+define float @une_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp une float %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: une_inf:
+; CHECK: ucomisd
+; CHECK: jae
+define double @une_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp une double %x, 0x7FF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: oeq_neg_inff:
+; CHECK: ucomiss
+; CHECK: jb
+define float @oeq_neg_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp oeq float %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: oeq_neg_inf:
+; CHECK: ucomisd
+; CHECK: jb
+define double @oeq_neg_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp oeq double %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
+
+; CHECK: une_neg_inff:
+; CHECK: ucomiss
+; CHECK: jae
+define float @une_neg_inff(float %x, float %y) nounwind readonly {
+  %t0 = fcmp une float %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, float 1.0, float %y
+  ret float %t1
+}
+
+; CHECK: une_neg_inf:
+; CHECK: ucomisd
+; CHECK: jae
+define double @une_neg_inf(double %x, double %y) nounwind readonly {
+  %t0 = fcmp une double %x, 0xFFF0000000000000
+  %t1 = select i1 %t0, double 1.0, double %y
+  ret double %t1
+}
diff --git a/final/test/CodeGen/X86/compare_folding.ll b/final/test/CodeGen/X86/compare_folding.ll
new file mode 100644
index 00000000000..84c152d7721
--- /dev/null
+++ b/final/test/CodeGen/X86/compare_folding.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | \
+; RUN:   grep movsd | count 1
+; RUN: llc < %s -march=x86 -mcpu=yonah | \
+; RUN:   grep ucomisd
+declare i1 @llvm.isunordered.f64(double, double)
+
+define i1 @test1(double %X, double %Y) {
+        %COM = fcmp uno double %X, %Y           ; <i1> [#uses=1]
+        ret i1 %COM
+}
+
diff --git a/final/test/CodeGen/X86/compiler_used.ll b/final/test/CodeGen/X86/compiler_used.ll
new file mode 100644
index 00000000000..be8de5e09f8
--- /dev/null
+++ b/final/test/CodeGen/X86/compiler_used.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | grep no_dead_strip | count 1
+; We should have a .no_dead_strip directive for Z but not for X/Y.
+
+@X = internal global i8 4
+@Y = internal global i32 123
+@Z = internal global i8 4
+
+@llvm.used = appending global [1 x i8*] [ i8* @Z ], section "llvm.metadata"
+@llvm.compiler_used = appending global [2 x i8*] [ i8* @X, i8* bitcast (i32* @Y to i8*)], section "llvm.metadata"
diff --git a/final/test/CodeGen/X86/complex-asm.ll b/final/test/CodeGen/X86/complex-asm.ll
new file mode 100644
index 00000000000..49878b982db
--- /dev/null
+++ b/final/test/CodeGen/X86/complex-asm.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin
+; This formerly crashed.
+
+%0 = type { i64, i64 }
+
+define %0 @f() nounwind ssp {
+entry:
+  %v = alloca %0, align 8
+  call void asm sideeffect "", "=*r,r,r,0,~{dirflag},~{fpsr},~{flags}"(%0* %v, i32 0, i32 1, i128 undef) nounwind
+  %0 = getelementptr inbounds %0* %v, i64 0, i32 0
+  %1 = load i64* %0, align 8
+  %2 = getelementptr inbounds %0* %v, i64 0, i32 1
+  %3 = load i64* %2, align 8
+  %mrv4 = insertvalue %0 undef, i64 %1, 0
+  %mrv5 = insertvalue %0 %mrv4, i64 %3, 1
+  ret %0 %mrv5
+}
diff --git a/final/test/CodeGen/X86/complex-fca.ll b/final/test/CodeGen/X86/complex-fca.ll
new file mode 100644
index 00000000000..7e7acaa98a7
--- /dev/null
+++ b/final/test/CodeGen/X86/complex-fca.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep mov | count 2
+
+define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %z) nounwind {
+entry:
+	%z8 = extractvalue { x86_fp80, x86_fp80 } %z, 0
+	%z9 = extractvalue { x86_fp80, x86_fp80 } %z, 1
+	%0 = fsub x86_fp80 0xK80000000000000000000, %z9
+	%insert = insertvalue { x86_fp80, x86_fp80 } undef, x86_fp80 %0, 0
+	%insert7 = insertvalue { x86_fp80, x86_fp80 } %insert, x86_fp80 %z8, 1
+	call void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 } %insert7) nounwind
+	ret void
+}
+
+declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret, { x86_fp80, x86_fp80 }) nounwind
diff --git a/final/test/CodeGen/X86/conditional-indecrement.ll b/final/test/CodeGen/X86/conditional-indecrement.ll
new file mode 100644
index 00000000000..a3a0c39905a
--- /dev/null
+++ b/final/test/CodeGen/X86/conditional-indecrement.ll
@@ -0,0 +1,89 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+define i32 @test1(i32 %a, i32 %b) nounwind readnone {
+  %not.cmp = icmp ne i32 %a, 0
+  %inc = zext i1 %not.cmp to i32
+  %retval.0 = add i32 %inc, %b
+  ret i32 %retval.0
+; CHECK: test1:
+; CHECK: cmpl $1
+; CHECK: sbbl $-1
+; CHECK: ret
+}
+
+define i32 @test2(i32 %a, i32 %b) nounwind readnone {
+  %cmp = icmp eq i32 %a, 0
+  %inc = zext i1 %cmp to i32
+  %retval.0 = add i32 %inc, %b
+  ret i32 %retval.0
+; CHECK: test2:
+; CHECK: cmpl $1
+; CHECK: adcl $0
+; CHECK: ret
+}
+
+define i32 @test3(i32 %a, i32 %b) nounwind readnone {
+  %cmp = icmp eq i32 %a, 0
+  %inc = zext i1 %cmp to i32
+  %retval.0 = add i32 %inc, %b
+  ret i32 %retval.0
+; CHECK: test3:
+; CHECK: cmpl $1
+; CHECK: adcl $0
+; CHECK: ret
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind readnone {
+  %not.cmp = icmp ne i32 %a, 0
+  %inc = zext i1 %not.cmp to i32
+  %retval.0 = add i32 %inc, %b
+  ret i32 %retval.0
+; CHECK: test4:
+; CHECK: cmpl $1
+; CHECK: sbbl $-1
+; CHECK: ret
+}
+
+define i32 @test5(i32 %a, i32 %b) nounwind readnone {
+  %not.cmp = icmp ne i32 %a, 0
+  %inc = zext i1 %not.cmp to i32
+  %retval.0 = sub i32 %b, %inc
+  ret i32 %retval.0
+; CHECK: test5:
+; CHECK: cmpl $1
+; CHECK: adcl $-1
+; CHECK: ret
+}
+
+define i32 @test6(i32 %a, i32 %b) nounwind readnone {
+  %cmp = icmp eq i32 %a, 0
+  %inc = zext i1 %cmp to i32
+  %retval.0 = sub i32 %b, %inc
+  ret i32 %retval.0
+; CHECK: test6:
+; CHECK: cmpl $1
+; CHECK: sbbl $0
+; CHECK: ret
+}
+
+define i32 @test7(i32 %a, i32 %b) nounwind readnone {
+  %cmp = icmp eq i32 %a, 0
+  %inc = zext i1 %cmp to i32
+  %retval.0 = sub i32 %b, %inc
+  ret i32 %retval.0
+; CHECK: test7:
+; CHECK: cmpl $1
+; CHECK: sbbl $0
+; CHECK: ret
+}
+
+define i32 @test8(i32 %a, i32 %b) nounwind readnone {
+  %not.cmp = icmp ne i32 %a, 0
+  %inc = zext i1 %not.cmp to i32
+  %retval.0 = sub i32 %b, %inc
+  ret i32 %retval.0
+; CHECK: test8:
+; CHECK: cmpl $1
+; CHECK: adcl $-1
+; CHECK: ret
+}
diff --git a/final/test/CodeGen/X86/constant-pool-remat-0.ll b/final/test/CodeGen/X86/constant-pool-remat-0.ll
new file mode 100644
index 00000000000..f1b061f65a3
--- /dev/null
+++ b/final/test/CodeGen/X86/constant-pool-remat-0.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-linux   | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; CHECK:     LCPI
+; CHECK:     LCPI
+; CHECK:     LCPI
+; CHECK-NOT: LCPI
+
+; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats -info-output-file - | FileCheck %s -check-prefix=X64stat
+; X64stat: 6 asm-printer
+
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o /dev/null -stats -info-output-file - | FileCheck %s -check-prefix=X32stat
+; X32stat: 12 asm-printer
+
+declare float @qux(float %y)
+
+define float @array(float %a) nounwind {
+  %n = fmul float %a, 9.0
+  %m = call float @qux(float %n)
+  %o = fmul float %m, 9.0
+  ret float %o
+}
diff --git a/final/test/CodeGen/X86/constant-pool-sharing.ll b/final/test/CodeGen/X86/constant-pool-sharing.ll
new file mode 100644
index 00000000000..f979945835f
--- /dev/null
+++ b/final/test/CodeGen/X86/constant-pool-sharing.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+
+; llc should share constant pool entries between this integer vector
+; and this floating-point vector since they have the same encoding.
+
+; CHECK:  LCPI0_0(%rip), %xmm0
+; CHECK:  movaps        %xmm0, ({{%rdi|%rcx}})
+; CHECK:  movaps        %xmm0, ({{%rsi|%rdx}})
+
+define void @foo(<4 x i32>* %p, <4 x float>* %q, i1 %t) nounwind {
+entry:
+  br label %loop
+loop:
+  store <4 x i32><i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824>, <4 x i32>* %p
+  store <4 x float><float 2.0, float 2.0, float 2.0, float 2.0>, <4 x float>* %q
+  br i1 %t, label %loop, label %ret
+ret:
+  ret void
+}
diff --git a/final/test/CodeGen/X86/constpool.ll b/final/test/CodeGen/X86/constpool.ll
new file mode 100644
index 00000000000..2aac486323a
--- /dev/null
+++ b/final/test/CodeGen/X86/constpool.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s 
+; RUN: llc < %s -fast-isel
+; RUN: llc < %s -march=x86-64
+; RUN: llc < %s -fast-isel -march=x86-64
+; PR4466
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.7"
+
+define i32 @main() nounwind {
+entry:
+	%0 = fcmp oeq float undef, 0x7FF0000000000000		; <i1> [#uses=1]
+	%1 = zext i1 %0 to i32		; <i32> [#uses=1]
+	store i32 %1, i32* undef, align 4
+	ret i32 undef
+}
diff --git a/final/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll b/final/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
new file mode 100644
index 00000000000..b82348b32e4
--- /dev/null
+++ b/final/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-linux -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
+; RUN: llc < %s -mtriple=x86_64-win32 -o /dev/null -stats |& FileCheck %s -check-prefix=STATS
+; STATS: 9 asm-printer
+
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK: leal 1({{%rsi|%rdx}}),
+
+define fastcc zeroext i8 @fullGtU(i32 %i1, i32 %i2, i8* %ptr) nounwind optsize {
+entry:
+  %0 = add i32 %i2, 1           ; <i32> [#uses=1]
+  %1 = sext i32 %0 to i64               ; <i64> [#uses=1]
+  %2 = getelementptr i8* %ptr, i64 %1           ; <i8*> [#uses=1]
+  %3 = load i8* %2, align 1             ; <i8> [#uses=1]
+  %4 = icmp eq i8 0, %3         ; <i1> [#uses=1]
+  br i1 %4, label %bb3, label %bb34
+
+bb3:            ; preds = %entry
+  %5 = add i32 %i2, 4           ; <i32> [#uses=0]
+  %6 = trunc i32 %5 to i8
+  ret i8 %6
+
+bb34:           ; preds = %entry
+  ret i8 0
+}
+
diff --git a/final/test/CodeGen/X86/copysign-zero.ll b/final/test/CodeGen/X86/copysign-zero.ll
new file mode 100644
index 00000000000..47522d80805
--- /dev/null
+++ b/final/test/CodeGen/X86/copysign-zero.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s | not grep orpd
+; RUN: llc < %s | grep andpd | count 1
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+define double @test(double %X) nounwind  {
+entry:
+	%tmp2 = tail call double @copysign( double 0.000000e+00, double %X ) nounwind readnone 		; <double> [#uses=1]
+	ret double %tmp2
+}
+
+declare double @copysign(double, double) nounwind readnone 
+
diff --git a/final/test/CodeGen/X86/crash-O0.ll b/final/test/CodeGen/X86/crash-O0.ll
new file mode 100644
index 00000000000..956d43b4e89
--- /dev/null
+++ b/final/test/CodeGen/X86/crash-O0.ll
@@ -0,0 +1,31 @@
+; RUN: llc -O0 -relocation-model=pic -disable-fp-elim < %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10"
+
+; This file contains functions that may crash llc -O0
+
+; The DIV8 instruction produces results in AH and AL, but we don't want to use
+; AH in 64-bit mode. The hack used must not generate copyFromReg nodes for
+; aliased registers (AX and AL) - RegAllocFast does not like that.
+; PR7312
+define i32 @div8() nounwind {
+entry:
+  %0 = trunc i64 undef to i8                      ; <i8> [#uses=3]
+  %1 = udiv i8 0, %0                              ; <i8> [#uses=1]
+  %2 = urem i8 0, %0                              ; <i8> [#uses=1]
+  %3 = icmp uge i8 %2, %0                         ; <i1> [#uses=1]
+  br i1 %3, label %"40", label %"39"
+
+"39":                                             ; preds = %"36"
+  %4 = zext i8 %1 to i32                          ; <i32> [#uses=1]
+  %5 = mul nsw i32 %4, undef                      ; <i32> [#uses=1]
+  %6 = add nsw i32 %5, undef                      ; <i32> [#uses=1]
+  %7 = icmp ne i32 %6, undef                      ; <i1> [#uses=1]
+  br i1 %7, label %"40", label %"41"
+
+"40":                                             ; preds = %"39", %"36"
+  unreachable
+
+"41":                                             ; preds = %"39"
+  unreachable
+}
diff --git a/final/test/CodeGen/X86/crash.ll b/final/test/CodeGen/X86/crash.ll
new file mode 100644
index 00000000000..2d8e63e3134
--- /dev/null
+++ b/final/test/CodeGen/X86/crash.ll
@@ -0,0 +1,201 @@
+; RUN: llc -march=x86 %s -o -
+; RUN: llc -march=x86-64 %s -o -
+
+; PR6497
+
+; Chain and flag folding issues.
+define i32 @test1() nounwind ssp {
+entry:
+  %tmp5.i = volatile load i32* undef              ; <i32> [#uses=1]
+  %conv.i = zext i32 %tmp5.i to i64               ; <i64> [#uses=1]
+  %tmp12.i = volatile load i32* undef             ; <i32> [#uses=1]
+  %conv13.i = zext i32 %tmp12.i to i64            ; <i64> [#uses=1]
+  %shl.i = shl i64 %conv13.i, 32                  ; <i64> [#uses=1]
+  %or.i = or i64 %shl.i, %conv.i                  ; <i64> [#uses=1]
+  %add16.i = add i64 %or.i, 256                   ; <i64> [#uses=1]
+  %shr.i = lshr i64 %add16.i, 8                   ; <i64> [#uses=1]
+  %conv19.i = trunc i64 %shr.i to i32             ; <i32> [#uses=1]
+  volatile store i32 %conv19.i, i32* undef
+  ret i32 undef
+}
+
+; PR6533
+define void @test2(i1 %x, i32 %y) nounwind {
+  %land.ext = zext i1 %x to i32                   ; <i32> [#uses=1]
+  %and = and i32 %y, 1                        ; <i32> [#uses=1]
+  %xor = xor i32 %and, %land.ext                  ; <i32> [#uses=1]
+  %cmp = icmp eq i32 %xor, 1                      ; <i1> [#uses=1]
+  br i1 %cmp, label %if.end, label %if.then
+
+if.then:                                          ; preds = %land.end
+  ret void
+
+if.end:                                           ; preds = %land.end
+  ret void
+}
+
+; PR6577
+%pair = type { i64, double }
+
+define void @test3() {
+dependentGraph243.exit:
+  %subject19 = load %pair* undef                     ; <%1> [#uses=1]
+  %0 = extractvalue %pair %subject19, 1              ; <double> [#uses=2]
+  %1 = select i1 undef, double %0, double undef   ; <double> [#uses=1]
+  %2 = select i1 undef, double %1, double %0      ; <double> [#uses=1]
+  %3 = insertvalue %pair undef, double %2, 1         ; <%1> [#uses=1]
+  store %pair %3, %pair* undef
+  ret void
+}
+
+; PR6605
+define i64 @test4(i8* %P) nounwind ssp {
+entry:
+  %tmp1 = load i8* %P                           ; <i8> [#uses=3]
+  %tobool = icmp eq i8 %tmp1, 0                   ; <i1> [#uses=1]
+  %tmp58 = sext i1 %tobool to i8                  ; <i8> [#uses=1]
+  %mul.i = and i8 %tmp58, %tmp1                   ; <i8> [#uses=1]
+  %conv6 = zext i8 %mul.i to i32                  ; <i32> [#uses=1]
+  %cmp = icmp ne i8 %tmp1, 1                      ; <i1> [#uses=1]
+  %conv11 = zext i1 %cmp to i32                   ; <i32> [#uses=1]
+  %call12 = tail call i32 @safe(i32 %conv11) nounwind ; <i32> [#uses=1]
+  %and = and i32 %conv6, %call12                  ; <i32> [#uses=1]
+  %tobool13 = icmp eq i32 %and, 0                 ; <i1> [#uses=1]
+  br i1 %tobool13, label %if.else, label %return
+
+if.else:                                          ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %if.else, %entry
+  ret i64 undef
+}
+
+declare i32 @safe(i32)
+
+; PR6607
+define fastcc void @test5(i32 %FUNC) nounwind {
+foo:
+  %0 = load i8* undef, align 1                    ; <i8> [#uses=3]
+  %1 = sext i8 %0 to i32                          ; <i32> [#uses=2]
+  %2 = zext i8 %0 to i32                          ; <i32> [#uses=1]
+  %tmp1.i5037 = urem i32 %2, 10                   ; <i32> [#uses=1]
+  %tmp.i5038 = icmp ugt i32 %tmp1.i5037, 15       ; <i1> [#uses=1]
+  %3 = zext i1 %tmp.i5038 to i8                   ; <i8> [#uses=1]
+  %4 = icmp slt i8 %0, %3                         ; <i1> [#uses=1]
+  %5 = add nsw i32 %1, 256                        ; <i32> [#uses=1]
+  %storemerge.i.i57 = select i1 %4, i32 %5, i32 %1 ; <i32> [#uses=1]
+  %6 = shl i32 %storemerge.i.i57, 16              ; <i32> [#uses=1]
+  %7 = sdiv i32 %6, -256                          ; <i32> [#uses=1]
+  %8 = trunc i32 %7 to i8                         ; <i8> [#uses=1]
+  store i8 %8, i8* undef, align 1
+  ret void
+}
+
+
+; Crash commoning identical asms.
+; PR6803
+define void @test6(i1 %C) nounwind optsize ssp {
+entry:
+  br i1 %C, label %do.body55, label %do.body92
+
+do.body55:                                        ; preds = %if.else36
+  call void asm sideeffect "foo", "~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0
+  ret void
+
+do.body92:                                        ; preds = %if.then66
+  call void asm sideeffect "foo", "~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !1
+  ret void
+}
+
+!0 = metadata !{i32 633550}                       
+!1 = metadata !{i32 634261}                       
+
+
+; Crash during XOR optimization.
+; <rdar://problem/7869290>
+
+define void @test7() nounwind ssp {
+entry:
+  br i1 undef, label %bb14, label %bb67
+
+bb14:
+  %tmp0 = trunc i16 undef to i1
+  %tmp1 = load i8* undef, align 8
+  %tmp2 = shl i8 %tmp1, 4
+  %tmp3 = lshr i8 %tmp2, 7
+  %tmp4 = trunc i8 %tmp3 to i1
+  %tmp5 = icmp ne i1 %tmp0, %tmp4
+  br i1 %tmp5, label %bb14, label %bb67
+
+bb67:
+  ret void
+}
+
+; Crash when trying to copy AH to AL.
+; PR7540
+define void @copy8bitregs() nounwind {
+entry:
+  %div.i = sdiv i32 115200, 0
+  %shr8.i = lshr i32 %div.i, 8
+  %conv4.i = trunc i32 %shr8.i to i8
+  call void asm sideeffect "outb $0, ${1:w}", "{ax},N{dx},~{dirflag},~{fpsr},~{flags}"(i8 %conv4.i, i32 1017) nounwind
+  unreachable
+}
+
+; Crash trying to form conditional increment with fp value.
+; PR8981
+define i32 @test9(double %X) ssp align 2 {
+entry:
+  %0 = fcmp one double %X, 0.000000e+00
+  %cond = select i1 %0, i32 1, i32 2
+  ret i32 %cond
+}
+
+
+; PR8514 - Crash in match address do to "heroics" turning and-of-shift into
+; shift of and.
+%struct.S0 = type { i8, [2 x i8], i8 }
+
+define void @func_59(i32 %p_63) noreturn nounwind {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc44, %entry
+  %p_63.addr.1 = phi i32 [ %p_63, %entry ], [ 0, %for.inc44 ]
+  %l_74.0 = phi i32 [ 0, %entry ], [ %add46, %for.inc44 ]
+  br i1 undef, label %for.inc44, label %bb.nph81
+
+bb.nph81:                                         ; preds = %for.body
+  %tmp98 = add i32 %p_63.addr.1, 0
+  br label %for.body22
+
+for.body22:                                       ; preds = %for.body22, %bb.nph81
+  %l_75.077 = phi i64 [ %ins, %for.body22 ], [ undef, %bb.nph81 ]
+  %tmp110 = trunc i64 %l_75.077 to i32
+  %tmp111 = and i32 %tmp110, 65535
+  %arrayidx32.0 = getelementptr [9 x [5 x [2 x %struct.S0]]]* undef, i32 0, i32 %l_74.0, i32 %tmp98, i32 %tmp111, i32 0
+  store i8 1, i8* %arrayidx32.0, align 4
+  %tmp106 = shl i32 %tmp110, 2
+  %tmp107 = and i32 %tmp106, 262140
+  %scevgep99.sum114 = or i32 %tmp107, 1
+  %arrayidx32.1.1 = getelementptr [9 x [5 x [2 x %struct.S0]]]* undef, i32 0, i32 %l_74.0, i32 %tmp98, i32 0, i32 1, i32 %scevgep99.sum114
+  store i8 0, i8* %arrayidx32.1.1, align 1
+  %ins = or i64 undef, undef
+  br label %for.body22
+
+for.inc44:                                        ; preds = %for.body
+  %add46 = add i32 %l_74.0, 1
+  br label %for.body
+}
+
+; PR9028
+define void @f(i64 %A) nounwind {
+entry:
+  %0 = zext i64 %A to i160
+  %1 = shl i160 %0, 64
+  %2 = zext i160 %1 to i576
+  %3 = zext i96 undef to i576
+  %4 = or i576 %3, %2
+  store i576 %4, i576* undef, align 8
+  ret void
+}
diff --git a/final/test/CodeGen/X86/critical-edge-split-2.ll b/final/test/CodeGen/X86/critical-edge-split-2.ll
new file mode 100644
index 00000000000..70301cd9bcc
--- /dev/null
+++ b/final/test/CodeGen/X86/critical-edge-split-2.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%0 = type <{ %1, %1 }>
+%1 = type { i8, i8, i8, i8 }
+
+@g_2 = global %0 zeroinitializer
+@g_4 = global %1 zeroinitializer, align 4
+
+
+; PR8642
+define i16 @test1(i1 zeroext %C, i8** nocapture %argv) nounwind ssp {
+entry:
+  br i1 %C, label %cond.end.i, label %cond.false.i
+
+cond.false.i:                                     ; preds = %entry
+  br label %cond.end.i
+
+cond.end.i:                                       ; preds = %entry
+  %call1 = phi i16 [ trunc (i32 srem (i32 1, i32 zext (i1 icmp eq (%1* bitcast (i8* getelementptr inbounds (%0* @g_2, i64 0, i32 1, i32 0) to %1*), %1* @g_4) to i32)) to i16), %cond.false.i ], [ 1, %entry ]
+  ret i16 %call1
+}
+
+; CHECK: test1:
+; CHECK: testb %dil, %dil
+; CHECK: jne LBB0_2
+; CHECK: divl
+; CHECK: LBB0_2:
diff --git a/final/test/CodeGen/X86/cstring.ll b/final/test/CodeGen/X86/cstring.ll
new file mode 100644
index 00000000000..5b5a7662fff
--- /dev/null
+++ b/final/test/CodeGen/X86/cstring.ll
@@ -0,0 +1,4 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep comm
+; rdar://6479858
+
+@str1 = internal constant [1 x i8] zeroinitializer
diff --git a/final/test/CodeGen/X86/ctpop-combine.ll b/final/test/CodeGen/X86/ctpop-combine.ll
new file mode 100644
index 00000000000..6406cc73e41
--- /dev/null
+++ b/final/test/CodeGen/X86/ctpop-combine.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
+
+define i32 @test1(i64 %x) nounwind readnone {
+  %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+  %cast = trunc i64 %count to i32
+  %cmp = icmp ugt i32 %cast, 1
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: test1:
+; CHECK: leaq -1([[A0:%rdi|%rcx]])
+; CHECK-NEXT: testq
+; CHECK-NEXT: setne
+; CHECK: ret
+}
+
+
+define i32 @test2(i64 %x) nounwind readnone {
+  %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+  %cmp = icmp ult i64 %count, 2
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: test2:
+; CHECK: leaq -1([[A0]])
+; CHECK-NEXT: testq
+; CHECK-NEXT: sete
+; CHECK: ret
+}
+
+define i32 @test3(i64 %x) nounwind readnone {
+  %count = tail call i64 @llvm.ctpop.i64(i64 %x)
+  %cast = trunc i64 %count to i6 ; Too small for 0-64
+  %cmp = icmp ult i6 %cast, 2
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+; CHECK: test3:
+; CHECK: cmpb $2
+; CHECK: ret
+}
diff --git a/final/test/CodeGen/X86/dag-rauw-cse.ll b/final/test/CodeGen/X86/dag-rauw-cse.ll
new file mode 100644
index 00000000000..edcfeb78a4d
--- /dev/null
+++ b/final/test/CodeGen/X86/dag-rauw-cse.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep {orl	\$1}
+; PR3018
+
+define i32 @test(i32 %A) nounwind {
+  %B = or i32 %A, 1
+  %C = or i32 %B, 1
+  %D = and i32 %C, 7057
+  ret i32 %D
+}
diff --git a/final/test/CodeGen/X86/dagcombine-buildvector.ll b/final/test/CodeGen/X86/dagcombine-buildvector.ll
new file mode 100644
index 00000000000..dae91d5ccdd
--- /dev/null
+++ b/final/test/CodeGen/X86/dagcombine-buildvector.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 -mcpu=penryn | FileCheck %s
+
+; Shows a dag combine bug that will generate an illegal build vector
+; with v2i64 build_vector i32, i32.
+
+; CHECK: test:
+; CHECK: unpcklpd
+; CHECK: movapd
+define void @test(<2 x double>* %dst, <4 x double> %src) nounwind {
+entry:
+        %tmp7.i = shufflevector <4 x double> %src, <4 x double> undef, <2 x i32> < i32 0, i32 2 >
+        store <2 x double> %tmp7.i, <2 x double>* %dst
+        ret void
+}
+
+; CHECK: test2:
+; CHECK: movdqa
+define void @test2(<4 x i16>* %src, <4 x i32>* %dest) nounwind {
+entry:
+        %tmp1 = load <4 x i16>* %src
+        %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+        %0 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp3)
+        store <4 x i32> %0, <4 x i32>* %dest
+        ret void
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
diff --git a/final/test/CodeGen/X86/dagcombine-cse.ll b/final/test/CodeGen/X86/dagcombine-cse.ll
new file mode 100644
index 00000000000..c3c7990d19e
--- /dev/null
+++ b/final/test/CodeGen/X86/dagcombine-cse.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 14
+
+define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind  {
+entry:
+	%tmp7 = mul i32 %idxY, %ref_frame_stride		; <i32> [#uses=2]
+	%tmp9 = add i32 %tmp7, %idxX		; <i32> [#uses=1]
+	%tmp11 = getelementptr i8* %ref_frame_ptr, i32 %tmp9		; <i8*> [#uses=1]
+	%tmp1112 = bitcast i8* %tmp11 to i32*		; <i32*> [#uses=1]
+	%tmp13 = load i32* %tmp1112, align 4		; <i32> [#uses=1]
+	%tmp18 = add i32 %idxX, 4		; <i32> [#uses=1]
+	%tmp20.sum = add i32 %tmp18, %tmp7		; <i32> [#uses=1]
+	%tmp21 = getelementptr i8* %ref_frame_ptr, i32 %tmp20.sum		; <i8*> [#uses=1]
+	%tmp2122 = bitcast i8* %tmp21 to i16*		; <i16*> [#uses=1]
+	%tmp23 = load i16* %tmp2122, align 2		; <i16> [#uses=1]
+	%tmp2425 = zext i16 %tmp23 to i64		; <i64> [#uses=1]
+	%tmp26 = shl i64 %tmp2425, 32		; <i64> [#uses=1]
+	%tmp2728 = zext i32 %tmp13 to i64		; <i64> [#uses=1]
+	%tmp29 = or i64 %tmp26, %tmp2728		; <i64> [#uses=1]
+	%tmp3454 = bitcast i64 %tmp29 to double		; <double> [#uses=1]
+	%tmp35 = insertelement <2 x double> undef, double %tmp3454, i32 0		; <<2 x double>> [#uses=1]
+	%tmp36 = insertelement <2 x double> %tmp35, double 0.000000e+00, i32 1		; <<2 x double>> [#uses=1]
+	%tmp42 = bitcast <2 x double> %tmp36 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp43 = shufflevector <8 x i16> %tmp42, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
+	%tmp47 = bitcast <8 x i16> %tmp43 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp48 = extractelement <4 x i32> %tmp47, i32 0		; <i32> [#uses=1]
+	ret i32 %tmp48
+}
diff --git a/final/test/CodeGen/X86/darwin-bzero.ll b/final/test/CodeGen/X86/darwin-bzero.ll
new file mode 100644
index 00000000000..a9573cfc6a2
--- /dev/null
+++ b/final/test/CodeGen/X86/darwin-bzero.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep __bzero
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32)
+
+define void @foo(i8* %p, i32 %len) {
+  call void @llvm.memset.i32(i8* %p, i8 0, i32 %len, i32 1)
+  ret void
+}
diff --git a/final/test/CodeGen/X86/darwin-no-dead-strip.ll b/final/test/CodeGen/X86/darwin-no-dead-strip.ll
new file mode 100644
index 00000000000..452d1f8ce39
--- /dev/null
+++ b/final/test/CodeGen/X86/darwin-no-dead-strip.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s | grep no_dead_strip
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8.7.2"
+@x = weak global i32 0          ; <i32*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i32* @x to i8*) ]                ; <[1 x i8*]*> [#uses=0]
+
diff --git a/final/test/CodeGen/X86/darwin-quote.ll b/final/test/CodeGen/X86/darwin-quote.ll
new file mode 100644
index 00000000000..8fddc118f61
--- /dev/null
+++ b/final/test/CodeGen/X86/darwin-quote.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin  | FileCheck %s
+
+
+define internal i64 @baz() nounwind {
+  %tmp = load i64* @"+x"
+  ret i64 %tmp
+; CHECK: _baz:
+; CHECK:    movl "L_+x$non_lazy_ptr", %ecx
+}
+
+
+@"+x" = external global i64
+
+; CHECK: "L_+x$non_lazy_ptr":
+; CHECK:	.indirect_symbol "_+x"
diff --git a/final/test/CodeGen/X86/darwin-stub.ll b/final/test/CodeGen/X86/darwin-stub.ll
new file mode 100644
index 00000000000..b4d2e1aa566
--- /dev/null
+++ b/final/test/CodeGen/X86/darwin-stub.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin  |     grep stub
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | not grep stub
+
+@"\01LC" = internal constant [13 x i8] c"Hello World!\00"		; <[13 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+	%0 = tail call i32 @puts(i8* getelementptr ([13 x i8]* @"\01LC", i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @puts(i8*)
diff --git a/final/test/CodeGen/X86/dbg-byval-parameter.ll b/final/test/CodeGen/X86/dbg-byval-parameter.ll
new file mode 100644
index 00000000000..5e5577620d9
--- /dev/null
+++ b/final/test/CodeGen/X86/dbg-byval-parameter.ll
@@ -0,0 +1,45 @@
+; RUN: llc  -march=x86 -asm-verbose < %s | grep DW_TAG_formal_parameter
+
+
+%struct.Pt = type { double, double }
+%struct.Rect = type { %struct.Pt, %struct.Pt }
+
+define double @foo(%struct.Rect* byval %my_r0) nounwind ssp {
+entry:
+  %retval = alloca double                         ; <double*> [#uses=2]
+  %0 = alloca double                              ; <double*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.Rect* %my_r0}, metadata !0), !dbg !15
+  %1 = getelementptr inbounds %struct.Rect* %my_r0, i32 0, i32 0, !dbg !16 ; <%struct.Pt*> [#uses=1]
+  %2 = getelementptr inbounds %struct.Pt* %1, i32 0, i32 0, !dbg !16 ; <double*> [#uses=1]
+  %3 = load double* %2, align 8, !dbg !16         ; <double> [#uses=1]
+  store double %3, double* %0, align 8, !dbg !16
+  %4 = load double* %0, align 8, !dbg !16         ; <double> [#uses=1]
+  store double %4, double* %retval, align 8, !dbg !16
+  br label %return, !dbg !16
+
+return:                                           ; preds = %entry
+  %retval1 = load double* %retval, !dbg !16       ; <double> [#uses=1]
+  ret double %retval1, !dbg !16
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"my_r0", metadata !2, i32 11, metadata !7} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 11, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"b2.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"b2.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !7}
+!6 = metadata !{i32 524324, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524307, metadata !2, metadata !"Rect", metadata !2, i32 6, i64 256, i64 64, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_structure_type ]
+!8 = metadata !{metadata !9, metadata !14}
+!9 = metadata !{i32 524301, metadata !7, metadata !"P1", metadata !2, i32 7, i64 128, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_member ]
+!10 = metadata !{i32 524307, metadata !2, metadata !"Pt", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!11 = metadata !{metadata !12, metadata !13}
+!12 = metadata !{i32 524301, metadata !10, metadata !"x", metadata !2, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!13 = metadata !{i32 524301, metadata !10, metadata !"y", metadata !2, i32 3, i64 64, i64 64, i64 64, i32 0, metadata !6} ; [ DW_TAG_member ]
+!14 = metadata !{i32 524301, metadata !7, metadata !"P2", metadata !2, i32 8, i64 128, i64 64, i64 128, i32 0, metadata !10} ; [ DW_TAG_member ]
+!15 = metadata !{i32 11, i32 0, metadata !1, null}
+!16 = metadata !{i32 12, i32 0, metadata !17, null}
+!17 = metadata !{i32 524299, metadata !1, i32 11, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/final/test/CodeGen/X86/dbg-merge-loc-entry.ll b/final/test/CodeGen/X86/dbg-merge-loc-entry.ll
new file mode 100644
index 00000000000..83df1478cf1
--- /dev/null
+++ b/final/test/CodeGen/X86/dbg-merge-loc-entry.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin8"
+
+;CHECK: Ldebug_loc0:
+;CHECK-NEXT:	.quad	Lfunc_begin0
+;CHECK-NEXT:	.quad	Lfunc_end0
+;CHECK-NEXT:	.short	1                       ## Loc expr size
+;CHECK-NEXT:	.byte	85                      ## DW_OP_reg5
+;CHECK-NEXT:	.quad	0
+;CHECK-NEXT:	.quad	0
+
+%0 = type { i64, i1 }
+
+@__clz_tab = external constant [256 x i8]
+
+define hidden i128 @__divti3(i128 %u, i128 %v) nounwind readnone {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i128 %u}, i64 0, metadata !14), !dbg !15
+  tail call void @llvm.dbg.value(metadata !16, i64 0, metadata !17), !dbg !21
+  br i1 undef, label %bb2, label %bb4, !dbg !22
+
+bb2:                                              ; preds = %entry
+  br label %bb4, !dbg !23
+
+bb4:                                              ; preds = %bb2, %entry
+  br i1 undef, label %__udivmodti4.exit, label %bb82.i, !dbg !24
+
+bb82.i:                                           ; preds = %bb4
+  unreachable
+
+__udivmodti4.exit:                                ; preds = %bb4
+  ret i128 undef, !dbg !27
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !9}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__udivmodti4", metadata !"__udivmodti4", metadata !"", metadata !1, i32 879, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"foobar.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"foobar.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !5, metadata !5, metadata !8}
+!5 = metadata !{i32 589846, metadata !6, metadata !"UTItype", metadata !6, i32 166, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!6 = metadata !{i32 589865, metadata !"foobar.h", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 589860, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"__divti3", metadata !"__divti3", metadata !"__divti3", metadata !1, i32 1094, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i128 (i128, i128)* @__divti3} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{metadata !12, metadata !12, metadata !12}
+!12 = metadata !{i32 589846, metadata !6, metadata !"TItype", metadata !6, i32 160, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
+!13 = metadata !{i32 589860, metadata !1, metadata !"", metadata !1, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 590081, metadata !9, metadata !"u", metadata !1, i32 1093, metadata !12, i32 0} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 1093, i32 0, metadata !9, null}
+!16 = metadata !{i64 0}
+!17 = metadata !{i32 590080, metadata !18, metadata !"c", metadata !1, i32 1095, metadata !19, i32 0} ; [ DW_TAG_auto_variable ]
+!18 = metadata !{i32 589835, metadata !9, i32 1094, i32 0, metadata !1, i32 13} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 589846, metadata !6, metadata !"word_type", metadata !6, i32 424, i64 0, i64 0, i64 0, i32 0, metadata !20} ; [ DW_TAG_typedef ]
+!20 = metadata !{i32 589860, metadata !1, metadata !"long int", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 1095, i32 0, metadata !18, null}
+!22 = metadata !{i32 1103, i32 0, metadata !18, null}
+!23 = metadata !{i32 1104, i32 0, metadata !18, null}
+!24 = metadata !{i32 1003, i32 0, metadata !25, metadata !26}
+!25 = metadata !{i32 589835, metadata !0, i32 879, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 1107, i32 0, metadata !18, null}
+!27 = metadata !{i32 1111, i32 0, metadata !18, null}
diff --git a/final/test/CodeGen/X86/dbg-value-inlined-parameter.ll b/final/test/CodeGen/X86/dbg-value-inlined-parameter.ll
new file mode 100644
index 00000000000..89bbf34a128
--- /dev/null
+++ b/final/test/CodeGen/X86/dbg-value-inlined-parameter.ll
@@ -0,0 +1,86 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+;CHECK: DW_TAG_inlined_subroutine
+;CHECK-NEXT: DW_AT_abstract_origin
+;CHECK-NEXT: DW_AT_low_pc
+;CHECK-NEXT: DW_AT_high_pc
+;CHECK-NEXT: DW_AT_call_file
+;CHECK-NEXT: DW_AT_call_line
+;CHECK-NEXT: DW_TAG_formal_parameter
+;CHECK-NEXT: .ascii   "sp"                   ## DW_AT_name
+
+%struct.S1 = type { float*, i32 }
+
+@p = common global %struct.S1 zeroinitializer, align 8
+
+define i32 @foo(%struct.S1* nocapture %sp, i32 %nums) nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.S1* %sp}, i64 0, metadata !9), !dbg !20
+  tail call void @llvm.dbg.value(metadata !{i32 %nums}, i64 0, metadata !18), !dbg !21
+  %tmp2 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 1, !dbg !22
+  store i32 %nums, i32* %tmp2, align 4, !dbg !22, !tbaa !24
+  %call = tail call float* @bar(i32 %nums) nounwind optsize, !dbg !27
+  %tmp5 = getelementptr inbounds %struct.S1* %sp, i64 0, i32 0, !dbg !27
+  store float* %call, float** %tmp5, align 8, !dbg !27, !tbaa !28
+  %cmp = icmp ne float* %call, null, !dbg !29
+  %cond = zext i1 %cmp to i32, !dbg !29
+  ret i32 %cond, !dbg !29
+}
+
+declare float* @bar(i32) optsize
+
+define void @foobar() nounwind optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !30, i64 0, metadata !9) nounwind, !dbg !31
+  tail call void @llvm.dbg.value(metadata !34, i64 0, metadata !18) nounwind, !dbg !35
+  store i32 1, i32* getelementptr inbounds (%struct.S1* @p, i64 0, i32 1), align 8, !dbg !36, !tbaa !24
+  %call.i = tail call float* @bar(i32 1) nounwind optsize, !dbg !37
+  store float* %call.i, float** getelementptr inbounds (%struct.S1* @p, i64 0, i32 0), align 8, !dbg !37, !tbaa !28
+  ret void, !dbg !38
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !6}
+!llvm.dbg.lv.foo = !{!9, !18}
+!llvm.dbg.gv = !{!19}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 8, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.S1*, i32)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"nm2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"nm2.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 125693)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foobar", metadata !"foobar", metadata !"", metadata !1, i32 15, metadata !7, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foobar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !8, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{null}
+!9 = metadata !{i32 590081, metadata !0, metadata !"sp", metadata !1, i32 7, metadata !10, i32 0} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 589846, metadata !2, metadata !"S1", metadata !1, i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
+!12 = metadata !{i32 589843, metadata !2, metadata !"S1", metadata !1, i32 1, i64 128, i64 64, i32 0, i32 0, i32 0, metadata !13, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!13 = metadata !{metadata !14, metadata !17}
+!14 = metadata !{i32 589837, metadata !1, metadata !"m", metadata !1, i32 2, i64 64, i64 64, i64 0, i32 0, metadata !15} ; [ DW_TAG_member ]
+!15 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !16} ; [ DW_TAG_pointer_type ]
+!16 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!17 = metadata !{i32 589837, metadata !1, metadata !"nums", metadata !1, i32 3, i64 32, i64 32, i64 64, i32 0, metadata !5} ; [ DW_TAG_member ]
+!18 = metadata !{i32 590081, metadata !0, metadata !"nums", metadata !1, i32 7, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 589876, i32 0, metadata !2, metadata !"p", metadata !"p", metadata !"", metadata !1, i32 14, metadata !11, i32 0, i32 1, %struct.S1* @p} ; [ DW_TAG_variable ]
+!20 = metadata !{i32 7, i32 13, metadata !0, null}
+!21 = metadata !{i32 7, i32 21, metadata !0, null}
+!22 = metadata !{i32 9, i32 3, metadata !23, null}
+!23 = metadata !{i32 589835, metadata !0, i32 8, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!24 = metadata !{metadata !"int", metadata !25}
+!25 = metadata !{metadata !"omnipotent char", metadata !26}
+!26 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!27 = metadata !{i32 10, i32 3, metadata !23, null}
+!28 = metadata !{metadata !"any pointer", metadata !25}
+!29 = metadata !{i32 11, i32 3, metadata !23, null}
+!30 = metadata !{%struct.S1* @p}
+!31 = metadata !{i32 7, i32 13, metadata !0, metadata !32}
+!32 = metadata !{i32 16, i32 3, metadata !33, null}
+!33 = metadata !{i32 589835, metadata !6, i32 15, i32 15, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!34 = metadata !{i32 1}
+!35 = metadata !{i32 7, i32 21, metadata !0, metadata !32}
+!36 = metadata !{i32 9, i32 3, metadata !23, metadata !32}
+!37 = metadata !{i32 10, i32 3, metadata !23, metadata !32}
+!38 = metadata !{i32 17, i32 1, metadata !33, null}
diff --git a/final/test/CodeGen/X86/dbg-value-location.ll b/final/test/CodeGen/X86/dbg-value-location.ll
new file mode 100644
index 00000000000..87d7e910c33
--- /dev/null
+++ b/final/test/CodeGen/X86/dbg-value-location.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+;Radar 8950491
+
+;CHECK:        .ascii   "var"                  ## DW_AT_name
+;CHECK-NEXT:        .byte   0
+;CHECK-NEXT:        ## DW_AT_decl_file
+;CHECK-NEXT:        ## DW_AT_decl_line
+;CHECK-NEXT:        ## DW_AT_type
+;CHECK-NEXT:        ## DW_AT_location
+
+@dfm = external global i32, align 4
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @foo(i32 %dev, i64 %cmd, i8* %data, i32 %data2) nounwind optsize ssp {
+entry:
+  call void @llvm.dbg.value(metadata !{i32 %dev}, i64 0, metadata !12), !dbg !13
+  %tmp.i = load i32* @dfm, align 4, !dbg !14
+  %cmp.i = icmp eq i32 %tmp.i, 0, !dbg !14
+  br i1 %cmp.i, label %if.else, label %if.end.i, !dbg !14
+
+if.end.i:                                         ; preds = %entry
+  switch i64 %cmd, label %if.then [
+    i64 2147772420, label %bb.i
+    i64 536897538, label %bb116.i
+  ], !dbg !22
+
+bb.i:                                             ; preds = %if.end.i
+  unreachable
+
+bb116.i:                                          ; preds = %if.end.i
+  unreachable
+
+if.then:                                          ; preds = %if.end.i
+  ret i32 undef, !dbg !23
+
+if.else:                                          ; preds = %entry
+  ret i32 0
+}
+
+declare hidden fastcc i32 @bar(i32, i32* nocapture) nounwind optsize ssp
+declare hidden fastcc i32 @bar2(i32) nounwind optsize ssp
+declare hidden fastcc i32 @bar3(i32) nounwind optsize ssp
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0, !6, !7, !8}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 19510, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i64, i8*, i32)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/f.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"f.i", metadata !"/tmp", metadata !"clang version 2.9 (trunk 124753)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar3", metadata !"bar3", metadata !"", metadata !1, i32 14827, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar3} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar2", metadata !"bar2", metadata !"", metadata !1, i32 15397, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32)* @bar2} ; [ DW_TAG_subprogram ]
+!8 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 12382, metadata !9, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i32*)* @bar} ; [ DW_TAG_subprogram ]
+!9 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !10, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 589860, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!12 = metadata !{i32 590081, metadata !0, metadata !"var", metadata !1, i32 19509, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 19509, i32 20, metadata !0, null}
+!14 = metadata !{i32 18091, i32 2, metadata !15, metadata !17}
+!15 = metadata !{i32 589835, metadata !16, i32 18086, i32 1, metadata !1, i32 748} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo_bar", metadata !"foo_bar", metadata !"", metadata !1, i32 18086, metadata !3, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 19514, i32 2, metadata !18, null}
+!18 = metadata !{i32 589835, metadata !0, i32 19510, i32 1, metadata !1, i32 99} ; [ DW_TAG_lexical_block ]
+!22 = metadata !{i32 18094, i32 2, metadata !15, metadata !17}
+!23 = metadata !{i32 19524, i32 1, metadata !18, null}
diff --git a/final/test/CodeGen/X86/dbg-value-range.ll b/final/test/CodeGen/X86/dbg-value-range.ll
new file mode 100644
index 00000000000..2985224d9db
--- /dev/null
+++ b/final/test/CodeGen/X86/dbg-value-range.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+%struct.a = type { i32 }
+
+define i32 @bar(%struct.a* nocapture %b) nounwind ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{%struct.a* %b}, i64 0, metadata !6), !dbg !13
+  %tmp1 = getelementptr inbounds %struct.a* %b, i64 0, i32 0, !dbg !14
+  %tmp2 = load i32* %tmp1, align 4, !dbg !14, !tbaa !15
+  tail call void @llvm.dbg.value(metadata !{i32 %tmp2}, i64 0, metadata !11), !dbg !14
+  %call = tail call i32 (...)* @foo(i32 %tmp2) nounwind , !dbg !18
+  %add = add nsw i32 %tmp2, 1, !dbg !19
+  ret i32 %add, !dbg !19
+}
+
+declare i32 @foo(...) 
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.lv.bar = !{!6, !11}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (%struct.a*)* @bar} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"bar.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"bar.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 122997)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590081, metadata !0, metadata !"b", metadata !1, i32 5, metadata !7, i32 0} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 589843, metadata !2, metadata !"a", metadata !1, i32 1, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !9, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 589837, metadata !1, metadata !"c", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!11 = metadata !{i32 590080, metadata !12, metadata !"x", metadata !1, i32 6, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!12 = metadata !{i32 589835, metadata !0, i32 5, i32 22, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 5, i32 19, metadata !0, null}
+!14 = metadata !{i32 6, i32 14, metadata !12, null}
+!15 = metadata !{metadata !"int", metadata !16}
+!16 = metadata !{metadata !"omnipotent char", metadata !17}
+!17 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!18 = metadata !{i32 7, i32 2, metadata !12, null}
+!19 = metadata !{i32 8, i32 2, metadata !12, null}
+
+; check that variable bar:b value range is appropriately trucated in debug info. Here Ltmp5 is end of
+; location range.
+
+;CHECK:Ltmp6
+;CHECK-NEXT: DEBUG_VALUE: bar:b <- undef
+
+;CHECK:Ldebug_loc0:
+;CHECK-NEXT:	.quad	Ltmp
+;CHECK-NEXT:	.quad	Ltmp6
+;CHECK-NEXT:	.short	1
+;CHECK-NEXT:	.byte	85
+;CHECK-NEXT:	.quad	0
+;CHECK-NEXT:	.quad	0
diff --git a/final/test/CodeGen/X86/dg.exp b/final/test/CodeGen/X86/dg.exp
new file mode 100644
index 00000000000..629a1477361
--- /dev/null
+++ b/final/test/CodeGen/X86/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/X86/discontiguous-loops.ll b/final/test/CodeGen/X86/discontiguous-loops.ll
new file mode 100644
index 00000000000..479c450ca20
--- /dev/null
+++ b/final/test/CodeGen/X86/discontiguous-loops.ll
@@ -0,0 +1,72 @@
+; RUN: llc -verify-loop-info -verify-dom-info -march=x86-64 < %s
+; PR5243
+
+@.str96 = external constant [37 x i8], align 8    ; <[37 x i8]*> [#uses=1]
+
+define void @foo() nounwind {
+bb:
+  br label %ybb1
+
+ybb1:                                              ; preds = %yybb13, %xbb6, %bb
+  switch i32 undef, label %bb18 [
+    i32 150, label %ybb2
+    i32 151, label %bb17
+    i32 152, label %bb19
+    i32 157, label %ybb8
+  ]
+
+ybb2:                                              ; preds = %ybb1
+  %tmp = icmp eq i8** undef, null                 ; <i1> [#uses=1]
+  br i1 %tmp, label %bb3, label %xbb6
+
+bb3:                                              ; preds = %ybb2
+  unreachable
+
+xbb4:                                              ; preds = %xbb6
+  store i32 0, i32* undef, align 8
+  br i1 undef, label %xbb6, label %bb5
+
+bb5:                                              ; preds = %xbb4
+  call fastcc void @decl_mode_check_failed() nounwind
+  unreachable
+
+xbb6:                                              ; preds = %xbb4, %ybb2
+  %tmp7 = icmp slt i32 undef, 0                   ; <i1> [#uses=1]
+  br i1 %tmp7, label %xbb4, label %ybb1
+
+ybb8:                                              ; preds = %ybb1
+  %tmp9 = icmp eq i8** undef, null                ; <i1> [#uses=1]
+  br i1 %tmp9, label %bb10, label %ybb12
+
+bb10:                                             ; preds = %ybb8
+  %tmp11 = load i8** undef, align 8               ; <i8*> [#uses=1]
+  call void (i8*, ...)* @fatal(i8* getelementptr inbounds ([37 x i8]* @.str96, i64 0, i64 0), i8* %tmp11) nounwind
+  unreachable
+
+ybb12:                                             ; preds = %ybb8
+  br i1 undef, label %bb15, label %ybb13
+
+ybb13:                                             ; preds = %ybb12
+  %tmp14 = icmp sgt i32 undef, 0                  ; <i1> [#uses=1]
+  br i1 %tmp14, label %bb16, label %ybb1
+
+bb15:                                             ; preds = %ybb12
+  call void (i8*, ...)* @fatal(i8* getelementptr inbounds ([37 x i8]* @.str96, i64 0, i64 0), i8* undef) nounwind
+  unreachable
+
+bb16:                                             ; preds = %ybb13
+  unreachable
+
+bb17:                                             ; preds = %ybb1
+  unreachable
+
+bb18:                                             ; preds = %ybb1
+  unreachable
+
+bb19:                                             ; preds = %ybb1
+  unreachable
+}
+
+declare void @fatal(i8*, ...)
+
+declare fastcc void @decl_mode_check_failed() nounwind
diff --git a/final/test/CodeGen/X86/divide-by-constant.ll b/final/test/CodeGen/X86/divide-by-constant.ll
new file mode 100644
index 00000000000..fe335b9369c
--- /dev/null
+++ b/final/test/CodeGen/X86/divide-by-constant.ll
@@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -asm-verbose=0 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define zeroext i16 @test1(i16 zeroext %x) nounwind {
+entry:
+	%div = udiv i16 %x, 33
+	ret i16 %div
+; CHECK: test1:
+; CHECK: imull	$63551, %eax, %eax
+; CHECK-NEXT: shrl	$21, %eax
+; CHECK-NEXT: ret
+}
+
+define zeroext i16 @test2(i8 signext %x, i16 zeroext %c) nounwind readnone ssp noredzone {
+entry:
+  %div = udiv i16 %c, 3
+  ret i16 %div
+
+; CHECK: test2:
+; CHECK: imull	$43691, %eax, %eax
+; CHECK-NEXT: shrl	$17, %eax
+; CHECK-NEXT: ret
+}
+
+define zeroext i8 @test3(i8 zeroext %x, i8 zeroext %c) nounwind readnone ssp noredzone {
+entry:
+  %div = udiv i8 %c, 3
+  ret i8 %div
+
+; CHECK: test3:
+; CHECK: movzbl  8(%esp), %eax
+; CHECK-NEXT: imull	$171, %eax, %eax
+; CHECK-NEXT: shrl	$9, %eax
+; CHECK-NEXT: ret
+}
+
+define signext i16 @test4(i16 signext %x) nounwind {
+entry:
+	%div = sdiv i16 %x, 33		; <i32> [#uses=1]
+	ret i16 %div
+; CHECK: test4:
+; CHECK: imull	$1986, %eax, %eax 
+}
+
+define i32 @test5(i32 %A) nounwind {
+        %tmp1 = udiv i32 %A, 1577682821         ; <i32> [#uses=1]
+        ret i32 %tmp1
+; CHECK: test5:
+; CHECK: movl	$365384439, %eax
+; CHECK: mull	4(%esp)
+}
+
+define signext i16 @test6(i16 signext %x) nounwind {
+entry:
+  %div = sdiv i16 %x, 10
+  ret i16 %div
+; CHECK: test6:
+; CHECK: imull	$26215, %eax, %eax
+; CHECK: shrl	$31, %ecx
+; CHECK: sarl	$18, %eax
+}
diff --git a/final/test/CodeGen/X86/divrem.ll b/final/test/CodeGen/X86/divrem.ll
new file mode 100644
index 00000000000..e86b52fe82d
--- /dev/null
+++ b/final/test/CodeGen/X86/divrem.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=x86-64 | grep div | count 8
+
+define void @si64(i64 %x, i64 %y, i64* %p, i64* %q) {
+	%r = sdiv i64 %x, %y
+	%t = srem i64 %x, %y
+	store i64 %r, i64* %p
+	store i64 %t, i64* %q
+	ret void
+}
+define void @si32(i32 %x, i32 %y, i32* %p, i32* %q) {
+	%r = sdiv i32 %x, %y
+	%t = srem i32 %x, %y
+	store i32 %r, i32* %p
+	store i32 %t, i32* %q
+	ret void
+}
+define void @si16(i16 %x, i16 %y, i16* %p, i16* %q) {
+	%r = sdiv i16 %x, %y
+	%t = srem i16 %x, %y
+	store i16 %r, i16* %p
+	store i16 %t, i16* %q
+	ret void
+}
+define void @si8(i8 %x, i8 %y, i8* %p, i8* %q) {
+	%r = sdiv i8 %x, %y
+	%t = srem i8 %x, %y
+	store i8 %r, i8* %p
+	store i8 %t, i8* %q
+	ret void
+}
+define void @ui64(i64 %x, i64 %y, i64* %p, i64* %q) {
+	%r = udiv i64 %x, %y
+	%t = urem i64 %x, %y
+	store i64 %r, i64* %p
+	store i64 %t, i64* %q
+	ret void
+}
+define void @ui32(i32 %x, i32 %y, i32* %p, i32* %q) {
+	%r = udiv i32 %x, %y
+	%t = urem i32 %x, %y
+	store i32 %r, i32* %p
+	store i32 %t, i32* %q
+	ret void
+}
+define void @ui16(i16 %x, i16 %y, i16* %p, i16* %q) {
+	%r = udiv i16 %x, %y
+	%t = urem i16 %x, %y
+	store i16 %r, i16* %p
+	store i16 %t, i16* %q
+	ret void
+}
+define void @ui8(i8 %x, i8 %y, i8* %p, i8* %q) {
+	%r = udiv i8 %x, %y
+	%t = urem i8 %x, %y
+	store i8 %r, i8* %p
+	store i8 %t, i8* %q
+	ret void
+}
diff --git a/final/test/CodeGen/X86/dll-linkage.ll b/final/test/CodeGen/X86/dll-linkage.ll
new file mode 100644
index 00000000000..a0c2a54a99a
--- /dev/null
+++ b/final/test/CodeGen/X86/dll-linkage.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i386-pc-mingw32 | FileCheck %s
+
+; RUN: llc < %s -mtriple=i386-pc-mingw32 -O0 | FileCheck %s -check-prefix=FAST
+; PR6275
+
+declare dllimport void @foo()
+
+define void @bar() nounwind {
+; CHECK: calll	*__imp__foo
+; FAST:  movl   __imp__foo, [[R:%[a-z]{3}]]
+; FAST:  calll  *[[R]]
+  call void @foo()
+  ret void
+}
diff --git a/final/test/CodeGen/X86/dllexport.ll b/final/test/CodeGen/X86/dllexport.ll
new file mode 100644
index 00000000000..bf57e78f35d
--- /dev/null
+++ b/final/test/CodeGen/X86/dllexport.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s | FileCheck %s
+; PR2936
+
+target triple = "i386-pc-mingw32"
+
+define dllexport x86_fastcallcc i32 @foo() nounwind  {
+entry:
+	ret i32 0
+}
+
+; CHECK: .section .drectve
+; CHECK: -export:@foo@0
diff --git a/final/test/CodeGen/X86/dollar-name.ll b/final/test/CodeGen/X86/dollar-name.ll
new file mode 100644
index 00000000000..2ecd72909cb
--- /dev/null
+++ b/final/test/CodeGen/X86/dollar-name.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux | FileCheck %s
+; PR1339
+
+@"$bar" = global i32 zeroinitializer
+@"$qux" = external global i32
+
+define i32 @"$foo"() nounwind {
+; CHECK: movl	($bar),
+; CHECK: addl	($qux),
+; CHECK: calll	($hen)
+  %m = load i32* @"$bar"
+  %n = load i32* @"$qux"
+  %t = add i32 %m, %n
+  %u = call i32 @"$hen"(i32 %t)
+  ret i32 %u
+}
+
+declare i32 @"$hen"(i32 %a)
diff --git a/final/test/CodeGen/X86/dyn-stackalloc.ll b/final/test/CodeGen/X86/dyn-stackalloc.ll
new file mode 100644
index 00000000000..7b0fe18f65c
--- /dev/null
+++ b/final/test/CodeGen/X86/dyn-stackalloc.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefix=X32
+; X32-NOT:     {{$429496728|-7}}
+; X32:     {{$4294967280|-16}}
+; X32-NOT:     {{$429496728|-7}}
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64:     -16
+
+define void @t() nounwind {
+A:
+	br label %entry
+
+entry:
+	%m1 = alloca i32, align 4
+	%m2 = alloca [7 x i8], align 16
+	call void @s( i32* %m1, [7 x i8]* %m2 )
+	ret void
+}
+
+declare void @s(i32*, [7 x i8]*)
diff --git a/final/test/CodeGen/X86/empty-functions.ll b/final/test/CodeGen/X86/empty-functions.ll
new file mode 100644
index 00000000000..b303cd1f736
--- /dev/null
+++ b/final/test/CodeGen/X86/empty-functions.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck -check-prefix=CHECK-NO-FP %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
+
+define void @func() {
+entry:
+  unreachable
+}
+; CHECK-NO-FP:     _func:
+; CHECK-NO-FP-NOT: movq %rsp, %rbp
+; CHECK-NO-FP:     nop
+
+; CHECK-FP:      _func:
+; CHECK-FP:      movq %rsp, %rbp
+; CHECK-FP-NEXT: Ltmp1:
+; CHECK-FP:      nop
diff --git a/final/test/CodeGen/X86/empty-struct-return-type.ll b/final/test/CodeGen/X86/empty-struct-return-type.ll
new file mode 100644
index 00000000000..34cd5d92505
--- /dev/null
+++ b/final/test/CodeGen/X86/empty-struct-return-type.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | grep call
+; PR4688
+
+; Return types can be empty structs, which can be awkward.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @_ZN15QtSharedPointer22internalSafetyCheckAddEPVKv(i8* %ptr) {
+entry:
+	%0 = call { } @_ZNK5QHashIPv15QHashDummyValueE5valueERKS0_(i8** undef)		; <{ }> [#uses=0]
+        ret void
+}
+
+declare hidden { } @_ZNK5QHashIPv15QHashDummyValueE5valueERKS0_(i8** nocapture) nounwind
diff --git a/final/test/CodeGen/X86/epilogue.ll b/final/test/CodeGen/X86/epilogue.ll
new file mode 100644
index 00000000000..52dcb61d87f
--- /dev/null
+++ b/final/test/CodeGen/X86/epilogue.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86 | grep {movl	%ebp}
+
+declare void @bar(<2 x i64>* %n)
+
+define void @foo(i64 %h) {
+  %k = trunc i64 %h to i32
+  %p = alloca <2 x i64>, i32 %k
+  call void @bar(<2 x i64>* %p)
+  ret void
+}
diff --git a/final/test/CodeGen/X86/extend.ll b/final/test/CodeGen/X86/extend.ll
new file mode 100644
index 00000000000..9553b1b578b
--- /dev/null
+++ b/final/test/CodeGen/X86/extend.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep movzx | count 1
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep movsx | count 1
+
+@G1 = internal global i8 0              ; <i8*> [#uses=1]
+@G2 = internal global i8 0              ; <i8*> [#uses=1]
+
+define i16 @test1() {
+        %tmp.0 = load i8* @G1           ; <i8> [#uses=1]
+        %tmp.3 = zext i8 %tmp.0 to i16          ; <i16> [#uses=1]
+        ret i16 %tmp.3
+}
+
+define i16 @test2() {
+        %tmp.0 = load i8* @G2           ; <i8> [#uses=1]
+        %tmp.3 = sext i8 %tmp.0 to i16          ; <i16> [#uses=1]
+        ret i16 %tmp.3
+}
+
diff --git a/final/test/CodeGen/X86/extern_weak.ll b/final/test/CodeGen/X86/extern_weak.ll
new file mode 100644
index 00000000000..01e32aae08c
--- /dev/null
+++ b/final/test/CodeGen/X86/extern_weak.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin | grep weak_reference | count 2
+
+@Y = global i32 (i8*)* @X               ; <i32 (i8*)**> [#uses=0]
+
+declare extern_weak i32 @X(i8*)
+
+define void @bar() {
+        tail call void (...)* @foo( )
+        ret void
+}
+
+declare extern_weak void @foo(...)
+
diff --git a/final/test/CodeGen/X86/extmul128.ll b/final/test/CodeGen/X86/extmul128.ll
new file mode 100644
index 00000000000..9b598299e53
--- /dev/null
+++ b/final/test/CodeGen/X86/extmul128.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 | grep mul | count 2
+
+define i128 @i64_sext_i128(i64 %a, i64 %b) {
+  %aa = sext i64 %a to i128
+  %bb = sext i64 %b to i128
+  %cc = mul i128 %aa, %bb
+  ret i128 %cc
+}
+define i128 @i64_zext_i128(i64 %a, i64 %b) {
+  %aa = zext i64 %a to i128
+  %bb = zext i64 %b to i128
+  %cc = mul i128 %aa, %bb
+  ret i128 %cc
+}
diff --git a/final/test/CodeGen/X86/extmul64.ll b/final/test/CodeGen/X86/extmul64.ll
new file mode 100644
index 00000000000..9e20ded1111
--- /dev/null
+++ b/final/test/CodeGen/X86/extmul64.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep mul | count 2
+
+define i64 @i32_sext_i64(i32 %a, i32 %b) {
+  %aa = sext i32 %a to i64
+  %bb = sext i32 %b to i64
+  %cc = mul i64 %aa, %bb
+  ret i64 %cc
+}
+define i64 @i32_zext_i64(i32 %a, i32 %b) {
+  %aa = zext i32 %a to i64
+  %bb = zext i32 %b to i64
+  %cc = mul i64 %aa, %bb
+  ret i64 %cc
+}
diff --git a/final/test/CodeGen/X86/extract-combine.ll b/final/test/CodeGen/X86/extract-combine.ll
new file mode 100644
index 00000000000..2040e872f7f
--- /dev/null
+++ b/final/test/CodeGen/X86/extract-combine.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 -mcpu=core2 -o %t
+; RUN: not grep unpcklps %t
+
+define i32 @foo() nounwind {
+entry:
+	%tmp74.i25762 = shufflevector <16 x float> zeroinitializer, <16 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>		; <<16 x float>> [#uses=1]
+	%tmp518 = shufflevector <16 x float> %tmp74.i25762, <16 x float> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>		; <<4 x float>> [#uses=1]
+	%movss.i25611 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp518, <4 x i32> <i32 4, i32 1, i32 2, i32 3>		; <<4 x float>> [#uses=1]
+	%conv3.i25615 = shufflevector <4 x float> %movss.i25611, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>		; <<4 x float>> [#uses=1]
+	%sub.i25620 = fsub <4 x float> %conv3.i25615, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul.i25621 = fmul <4 x float> zeroinitializer, %sub.i25620		; <<4 x float>> [#uses=1]
+	%add.i25622 = fadd <4 x float> zeroinitializer, %mul.i25621		; <<4 x float>> [#uses=1]
+	store <4 x float> %add.i25622, <4 x float>* null
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/extract-extract.ll b/final/test/CodeGen/X86/extract-extract.ll
new file mode 100644
index 00000000000..ad79ab9ae20
--- /dev/null
+++ b/final/test/CodeGen/X86/extract-extract.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 >/dev/null
+; PR4699
+
+; Handle this extractvalue-of-extractvalue case without getting in
+; trouble with CSE in DAGCombine.
+
+        %cc = type { %crd }
+        %cr = type { i32 }
+        %crd = type { i64, %cr* }
+        %pp = type { %cc }
+
+define fastcc void @foo(%pp* nocapture byval %p_arg) {
+entry:
+        %tmp2 = getelementptr %pp* %p_arg, i64 0, i32 0         ; <%cc*> [#uses=
+        %tmp3 = load %cc* %tmp2         ; <%cc> [#uses=1]
+        %tmp34 = extractvalue %cc %tmp3, 0              ; <%crd> [#uses=1]
+        %tmp345 = extractvalue %crd %tmp34, 0           ; <i64> [#uses=1]
+        %.ptr.i = load %cr** undef              ; <%cr*> [#uses=0]
+        %tmp15.i = shl i64 %tmp345, 3           ; <i64> [#uses=0]
+        store %cr* undef, %cr** undef
+        ret void
+}
+
+
diff --git a/final/test/CodeGen/X86/extractelement-from-arg.ll b/final/test/CodeGen/X86/extractelement-from-arg.ll
new file mode 100644
index 00000000000..4ea37f0c46d
--- /dev/null
+++ b/final/test/CodeGen/X86/extractelement-from-arg.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2
+
+define void @test(float* %R, <4 x float> %X) nounwind {
+	%tmp = extractelement <4 x float> %X, i32 3
+	store float %tmp, float* %R
+	ret void
+}
diff --git a/final/test/CodeGen/X86/extractelement-load.ll b/final/test/CodeGen/X86/extractelement-load.ll
new file mode 100644
index 00000000000..ee57d9b7629
--- /dev/null
+++ b/final/test/CodeGen/X86/extractelement-load.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mcpu=yonah | not grep movd
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 -mcpu=core2 | not grep movd
+
+define i32 @t(<2 x i64>* %val) nounwind  {
+	%tmp2 = load <2 x i64>* %val, align 16		; <<2 x i64>> [#uses=1]
+	%tmp3 = bitcast <2 x i64> %tmp2 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp4 = extractelement <4 x i32> %tmp3, i32 2		; <i32> [#uses=1]
+	ret i32 %tmp4
+}
diff --git a/final/test/CodeGen/X86/extractelement-shuffle.ll b/final/test/CodeGen/X86/extractelement-shuffle.ll
new file mode 100644
index 00000000000..d1ba9a84580
--- /dev/null
+++ b/final/test/CodeGen/X86/extractelement-shuffle.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s
+
+; Examples that exhibits a bug in DAGCombine.  The case is triggered by the
+; following program.  The bug is DAGCombine assumes that the bit convert
+; preserves the number of elements so the optimization code tries to read
+; through the 3rd mask element, which doesn't exist.
+define i32 @update(<2 x i64> %val1, <2 x i64> %val2) nounwind readnone {
+entry:
+	%shuf = shufflevector <2 x i64> %val1, <2 x i64> %val2, <2 x i32> <i32 0, i32 3>
+	%bit  = bitcast <2 x i64> %shuf to <4 x i32>
+	%res =  extractelement <4 x i32> %bit, i32 3
+	ret i32 %res
+}
diff --git a/final/test/CodeGen/X86/extractps.ll b/final/test/CodeGen/X86/extractps.ll
new file mode 100644
index 00000000000..14778f097ef
--- /dev/null
+++ b/final/test/CodeGen/X86/extractps.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 -mcpu=penryn > %t
+; RUN: not grep movd %t
+; RUN: grep {movss	%xmm} %t | count 1
+; RUN: grep {extractps	\\\$1, %xmm0, } %t | count 1
+; PR2647
+
+external global float, align 16         ; <float*>:0 [#uses=2]
+
+define internal void @""() nounwind {
+        load float* @0, align 16                ; <float>:1 [#uses=1]
+        insertelement <4 x float> undef, float %1, i32 0                ; <<4 x float>>:2 [#uses=1]
+        call <4 x float> @llvm.x86.sse.rsqrt.ss( <4 x float> %2 )              ; <<4 x float>>:3 [#uses=1]
+        extractelement <4 x float> %3, i32 0            ; <float>:4 [#uses=1]
+        store float %4, float* @0, align 16
+        ret void
+}
+define internal void @""() nounwind {
+        load float* @0, align 16                ; <float>:1 [#uses=1]
+        insertelement <4 x float> undef, float %1, i32 1                ; <<4 x float>>:2 [#uses=1]
+        call <4 x float> @llvm.x86.sse.rsqrt.ss( <4 x float> %2 )              ; <<4 x float>>:3 [#uses=1]
+        extractelement <4 x float> %3, i32 1            ; <float>:4 [#uses=1]
+        store float %4, float* @0, align 16
+        ret void
+}
+
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
+
diff --git a/final/test/CodeGen/X86/fabs.ll b/final/test/CodeGen/X86/fabs.ll
new file mode 100644
index 00000000000..9ded7e05dc4
--- /dev/null
+++ b/final/test/CodeGen/X86/fabs.ll
@@ -0,0 +1,28 @@
+; Make sure this testcase codegens to the fabs instruction, not a call to fabsf
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3,-sse | grep fabs\$ | \
+; RUN:   count 2
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math -enable-no-nans-fp-math | \
+; RUN:   grep fabs\$ | count 3
+
+declare float @fabsf(float)
+
+declare x86_fp80 @fabsl(x86_fp80)
+
+define float @test1(float %X) {
+        %Y = call float @fabsf(float %X)
+        ret float %Y
+}
+
+define double @test2(double %X) {
+        %Y = fcmp oge double %X, -0.0
+        %Z = fsub double -0.0, %X
+        %Q = select i1 %Y, double %X, double %Z
+        ret double %Q
+}
+
+define x86_fp80 @test3(x86_fp80 %X) {
+        %Y = call x86_fp80 @fabsl(x86_fp80 %X)
+        ret x86_fp80 %Y
+}
+
+
diff --git a/final/test/CodeGen/X86/fast-cc-callee-pops.ll b/final/test/CodeGen/X86/fast-cc-callee-pops.ll
new file mode 100644
index 00000000000..ea10897c735
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-cc-callee-pops.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=yonah | FileCheck %s
+
+; Check that a fastcc function pops its stack variables before returning.
+
+define x86_fastcallcc void @func(i64 %X, i64 %Y, float %G, double %Z) nounwind {
+        ret void
+; CHECK: ret{{.*}}20
+}
+
+define x86_thiscallcc void @func2(i32 %X, i64 %Y, float %G, double %Z) nounwind {
+        ret void
+; CHECK: ret{{.*}}20
+}
diff --git a/final/test/CodeGen/X86/fast-cc-merge-stack-adj.ll b/final/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
new file mode 100644
index 00000000000..e1518212009
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-cc-merge-stack-adj.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   grep {add	ESP, 8}
+
+target triple = "i686-pc-linux-gnu"
+
+declare x86_fastcallcc void @func(i32*, i64)
+
+define x86_fastcallcc void @caller(i32, i64) {
+        %X = alloca i32         ; <i32*> [#uses=1]
+        call x86_fastcallcc void @func( i32* %X, i64 0 )
+        ret void
+}
+
diff --git a/final/test/CodeGen/X86/fast-cc-pass-in-regs.ll b/final/test/CodeGen/X86/fast-cc-pass-in-regs.ll
new file mode 100644
index 00000000000..a96e5043fed
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-cc-pass-in-regs.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | FileCheck %s
+; check that fastcc is passing stuff in regs.
+
+declare x86_fastcallcc i64 @callee(i64)
+
+define i64 @caller() {
+        %X = call x86_fastcallcc  i64 @callee( i64 4294967299 )          ; <i64> [#uses=1]
+; CHECK: mov{{.*}}EDX, 1
+        ret i64 %X
+}
+
+define x86_fastcallcc i64 @caller2(i64 %X) {
+        ret i64 %X
+; CHECK: mov{{.*}}EAX, ECX
+}
+
+declare x86_thiscallcc i64 @callee2(i32)
+
+define i64 @caller3() {
+        %X = call x86_thiscallcc i64 @callee2( i32 3 )
+; CHECK: mov{{.*}}ECX, 3
+        ret i64 %X
+}
+
+define x86_thiscallcc i32 @caller4(i32 %X) {
+        ret i32 %X
+; CHECK: mov{{.*}}EAX, ECX
+}
+
diff --git a/final/test/CodeGen/X86/fast-isel-atomic.ll b/final/test/CodeGen/X86/fast-isel-atomic.ll
new file mode 100644
index 00000000000..74c586846d9
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel-atomic.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -O0 -march=x86-64
+; rdar://8204072
+; PR7652
+
+@sc = external global i8
+@uc = external global i8
+
+declare i8 @llvm.atomic.load.and.i8.p0i8(i8* nocapture, i8) nounwind
+
+define void @test_fetch_and_op() nounwind {
+entry:
+  %tmp40 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @sc, i8 11) ; <i8> [#uses=1]
+  store i8 %tmp40, i8* @sc
+  %tmp41 = call i8 @llvm.atomic.load.and.i8.p0i8(i8* @uc, i8 11) ; <i8> [#uses=1]
+  store i8 %tmp41, i8* @uc
+  ret void
+}
diff --git a/final/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll b/final/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
new file mode 100644
index 00000000000..9233d3f7c1a
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel-avoid-unnecessary-pic-base.ll
@@ -0,0 +1,23 @@
+; RUN: llc -O0 -relocation-model=pic < %s | not grep call
+; rdar://8396318
+
+; Don't emit a PIC base register if no addresses are needed.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin11.0.0"
+
+define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind ssp {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  %z.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  store i32 %z, i32* %z.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %tmp1 = load i32* %y.addr, align 4
+  %add = add nsw i32 %tmp, %tmp1
+  %tmp2 = load i32* %z.addr, align 4
+  %add3 = add nsw i32 %add, %tmp2
+  ret i32 %add3
+}
diff --git a/final/test/CodeGen/X86/fast-isel-bail.ll b/final/test/CodeGen/X86/fast-isel-bail.ll
new file mode 100644
index 00000000000..9072c5c7b59
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel-bail.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -O0
+
+; This file is for regression tests for cases where FastISel needs
+; to gracefully bail out and let SelectionDAGISel take over.
+
+	type { i64, i8* }		; type %0
+
+declare void @bar(%0)
+
+define fastcc void @foo() nounwind {
+entry:
+	call void @bar(%0 zeroinitializer)
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/fast-isel-bc.ll b/final/test/CodeGen/X86/fast-isel-bc.ll
new file mode 100644
index 00000000000..4abc3b5b3c8
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel-bc.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -O0 -regalloc=linearscan -march=x86-64 -mattr=+mmx,+sse2 | FileCheck %s
+; PR4684
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.8"
+
+declare void @func2(x86_mmx)
+
+define void @func1() nounwind {
+
+; This isn't spectacular, but it's MMX code at -O0...
+; CHECK:  movq2dq %mm0, %xmm0
+; For now, handling of x86_mmx parameters in fast Isel is unimplemented,
+; so we get pretty poor code.  The below is preferable.
+; CHEK: movl $2, %eax
+; CHEK: movd %rax, %mm0
+; CHEK: movd %mm0, %rdi
+
+        %tmp0 = bitcast <2 x i32><i32 0, i32 2> to x86_mmx
+        call void @func2(x86_mmx %tmp0)
+        ret void
+}
diff --git a/final/test/CodeGen/X86/fast-isel-call.ll b/final/test/CodeGen/X86/fast-isel-call.ll
new file mode 100644
index 00000000000..5fcdbbbe53b
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel-call.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -fast-isel -march=x86 | grep and
+
+define i32 @t() nounwind {
+tak:
+	%tmp = call i1 @foo()
+	br i1 %tmp, label %BB1, label %BB2
+BB1:
+	ret i32 1
+BB2:
+	ret i32 0
+}
+
+declare i1 @foo() zeroext nounwind
diff --git a/final/test/CodeGen/X86/fast-isel-cmp-branch.ll b/final/test/CodeGen/X86/fast-isel-cmp-branch.ll
new file mode 100644
index 00000000000..12312e8a581
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel-cmp-branch.ll
@@ -0,0 +1,30 @@
+; RUN: llc -O0 -mtriple=x86_64-linux -asm-verbose=false < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=x86_64-win32 -asm-verbose=false < %s | FileCheck %s
+; rdar://8337108
+
+; Fast-isel shouldn't try to look through the compare because it's in a
+; different basic block, so its operands aren't necessarily exported
+; for cross-block usage.
+
+; CHECK: movb    %al, [[OFS:[0-9]*]](%rsp)
+; CHECK: callq   {{_?}}bar
+; CHECK: movb    [[OFS]](%rsp), %al
+
+declare void @bar()
+
+define void @foo(i32 %a, i32 %b) nounwind {
+entry:
+  %q = add i32 %a, 7
+  %r = add i32 %b, 9
+  %t = icmp ult i32 %q, %r
+  invoke void @bar() to label %next unwind label %unw
+next:
+  br i1 %t, label %true, label %return
+true:
+  call void @bar()
+  br label %return
+return:
+  ret void
+unw:
+  unreachable
+}
diff --git a/final/test/CodeGen/X86/fast-isel-constpool.ll b/final/test/CodeGen/X86/fast-isel-constpool.ll
new file mode 100644
index 00000000000..323c8533cec
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel-constpool.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -fast-isel | grep {LCPI0_0(%rip)}
+; Make sure fast isel uses rip-relative addressing when required.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.0"
+
+define i32 @f0(double %x) nounwind {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%x.addr = alloca double		; <double*> [#uses=2]
+	store double %x, double* %x.addr
+	%tmp = load double* %x.addr		; <double> [#uses=1]
+	%cmp = fcmp olt double %tmp, 8.500000e-01		; <i1> [#uses=1]
+	%conv = zext i1 %cmp to i32		; <i32> [#uses=1]
+	store i32 %conv, i32* %retval
+	%0 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %0
+}
diff --git a/final/test/CodeGen/X86/fast-isel-fneg.ll b/final/test/CodeGen/X86/fast-isel-fneg.ll
new file mode 100644
index 00000000000..5ffd48bce65
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel-fneg.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86-64 | FileCheck %s
+; RUN: llc < %s -fast-isel -march=x86 -mattr=+sse2 | grep xor | count 2
+
+; CHECK: doo:
+; CHECK: xor
+define double @doo(double %x) nounwind {
+  %y = fsub double -0.0, %x
+  ret double %y
+}
+
+; CHECK: foo:
+; CHECK: xor
+define float @foo(float %x) nounwind {
+  %y = fsub float -0.0, %x
+  ret float %y
+}
diff --git a/final/test/CodeGen/X86/fast-isel-gep.ll b/final/test/CodeGen/X86/fast-isel-gep.ll
new file mode 100644
index 00000000000..48abfd0f26e
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel-gep.ll
@@ -0,0 +1,109 @@
+; RUN: llc < %s -mtriple=x86_64-linux -O0 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -O0 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -march=x86 -O0 | FileCheck %s --check-prefix=X32
+
+; GEP indices are interpreted as signed integers, so they
+; should be sign-extended to 64 bits on 64-bit targets.
+; PR3181
+define i32 @test1(i32 %t3, i32* %t1) nounwind {
+       %t9 = getelementptr i32* %t1, i32 %t3           ; <i32*> [#uses=1]
+       %t15 = load i32* %t9            ; <i32> [#uses=1]
+       ret i32 %t15
+; X32: test1:
+; X32:  	movl	(%eax,%ecx,4), %eax
+; X32:  	ret
+
+; X64: test1:
+; X64:  	movslq	%e[[A0:di|cx]], %rax
+; X64:  	movl	(%r[[A1:si|dx]],%rax,4), %eax
+; X64:  	ret
+
+}
+define i32 @test2(i64 %t3, i32* %t1) nounwind {
+       %t9 = getelementptr i32* %t1, i64 %t3           ; <i32*> [#uses=1]
+       %t15 = load i32* %t9            ; <i32> [#uses=1]
+       ret i32 %t15
+; X32: test2:
+; X32:  	movl	(%edx,%ecx,4), %eax
+; X32:  	ret
+
+; X64: test2:
+; X64:  	movl	(%r[[A1]],%r[[A0]],4), %eax
+; X64:  	ret
+}
+
+
+
+; PR4984
+define i8 @test3(i8* %start) nounwind {
+entry:
+  %A = getelementptr i8* %start, i64 -2               ; <i8*> [#uses=1]
+  %B = load i8* %A, align 1                       ; <i8> [#uses=1]
+  ret i8 %B
+  
+  
+; X32: test3:
+; X32:  	movl	4(%esp), %eax
+; X32:  	movb	-2(%eax), %al
+; X32:  	ret
+
+; X64: test3:
+; X64:  	movb	-2(%r[[A0]]), %al
+; X64:  	ret
+
+}
+
+define double @test4(i64 %x, double* %p) nounwind {
+entry:
+  %x.addr = alloca i64, align 8                   ; <i64*> [#uses=2]
+  %p.addr = alloca double*, align 8               ; <double**> [#uses=2]
+  store i64 %x, i64* %x.addr
+  store double* %p, double** %p.addr
+  %tmp = load i64* %x.addr                        ; <i64> [#uses=1]
+  %add = add nsw i64 %tmp, 16                     ; <i64> [#uses=1]
+  %tmp1 = load double** %p.addr                   ; <double*> [#uses=1]
+  %arrayidx = getelementptr inbounds double* %tmp1, i64 %add ; <double*> [#uses=1]
+  %tmp2 = load double* %arrayidx                  ; <double> [#uses=1]
+  ret double %tmp2
+
+; X32: test4:
+; X32: 128(%e{{.*}},%e{{.*}},8)
+; X64: test4:
+; X64: 128(%r{{.*}},%r{{.*}},8)
+}
+
+; PR8961 - Make sure the sext for the GEP addressing comes before the load that
+; is folded.
+define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind {
+  %v8 = getelementptr i8* %A, i32 %I
+  %v9 = bitcast i8* %v8 to i64*
+  %v10 = load i64* %v9
+  %v11 = add i64 %B, %v10
+  ret i64 %v11
+; X64: test5:
+; X64: movslq	%e[[A1]], %rax
+; X64-NEXT: movq	(%r[[A0]],%rax), %rax
+; X64-NEXT: addq	%{{rdx|r8}}, %rax
+; X64-NEXT: ret
+}
+
+; PR9500, rdar://9156159 - Don't do non-local address mode folding,
+; because it may require values which wouldn't otherwise be live out
+; of their blocks.
+define void @test6() {
+if.end:                                           ; preds = %if.then, %invoke.cont
+  %tmp15 = load i64* undef
+  %dec = add i64 %tmp15, 13
+  store i64 %dec, i64* undef
+  %call17 = invoke i8* @_ZNK18G__FastAllocString4dataEv()
+          to label %invoke.cont16 unwind label %lpad
+
+invoke.cont16:                                    ; preds = %if.then14
+  %arrayidx18 = getelementptr inbounds i8* %call17, i64 %dec
+  store i8 0, i8* %arrayidx18
+  unreachable
+
+lpad:                                             ; preds = %if.end19, %if.then14, %if.end, %entry
+  unreachable
+}
+declare i8* @_ZNK18G__FastAllocString4dataEv() nounwind
diff --git a/final/test/CodeGen/X86/fast-isel-gv.ll b/final/test/CodeGen/X86/fast-isel-gv.ll
new file mode 100644
index 00000000000..34f8b382522
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel-gv.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -fast-isel | grep {_kill@GOTPCREL(%rip)}
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+@f = global i8 (...)* @kill		; <i8 (...)**> [#uses=1]
+
+declare signext i8 @kill(...)
+
+define i32 @main() nounwind ssp {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%0 = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%1 = load i8 (...)** @f, align 8		; <i8 (...)*> [#uses=1]
+	%2 = icmp ne i8 (...)* %1, @kill		; <i1> [#uses=1]
+	%3 = zext i1 %2 to i32		; <i32> [#uses=1]
+	store i32 %3, i32* %0, align 4
+	%4 = load i32* %0, align 4		; <i32> [#uses=1]
+	store i32 %4, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval1
+}
diff --git a/final/test/CodeGen/X86/fast-isel-i1.ll b/final/test/CodeGen/X86/fast-isel-i1.ll
new file mode 100644
index 00000000000..d0665783ce6
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel-i1.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -fast-isel | grep {andb	\$1, %}
+
+declare i64 @bar(i64)
+
+define i32 @foo(i64 %x) nounwind {
+	%y = add i64 %x, -3		; <i64> [#uses=1]
+	%t = call i64 @bar(i64 %y)		; <i64> [#uses=1]
+	%s = mul i64 %t, 77		; <i64> [#uses=1]
+	%z = trunc i64 %s to i1		; <i1> [#uses=1]
+	br label %next
+
+next:		; preds = %0
+	%u = zext i1 %z to i32		; <i32> [#uses=1]
+	%v = add i32 %u, 1999		; <i32> [#uses=1]
+	br label %exit
+
+exit:		; preds = %next
+	ret i32 %v
+}
diff --git a/final/test/CodeGen/X86/fast-isel-mem.ll b/final/test/CodeGen/X86/fast-isel-mem.ll
new file mode 100644
index 00000000000..8db1936bc20
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel-mem.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -fast-isel -mtriple=i386-apple-darwin | FileCheck %s
+
+@src = external global i32
+
+; rdar://6653118
+define i32 @loadgv() nounwind {
+entry:
+	%0 = load i32* @src, align 4
+	%1 = load i32* @src, align 4
+        %2 = add i32 %0, %1
+        store i32 %2, i32* @src
+	ret i32 %2
+; This should fold one of the loads into the add.
+; CHECK: loadgv:
+; CHECK: 	movl	L_src$non_lazy_ptr, %ecx
+; CHECK: 	movl	(%ecx), %eax
+; CHECK: 	addl	(%ecx), %eax
+; CHECK: 	movl	%eax, (%ecx)
+; CHECK: 	ret
+
+}
+
+%stuff = type { i32 (...)** }
+@LotsStuff = external constant [4 x i32 (...)*]
+
+define void @t(%stuff* %this) nounwind {
+entry:
+	store i32 (...)** getelementptr ([4 x i32 (...)*]* @LotsStuff, i32 0, i32 2), i32 (...)*** null, align 4
+	ret void
+; CHECK: _t:
+; CHECK:	movl	$0, %eax
+; CHECK:	movl	L_LotsStuff$non_lazy_ptr, %ecx
+
+}
diff --git a/final/test/CodeGen/X86/fast-isel-shift-imm.ll b/final/test/CodeGen/X86/fast-isel-shift-imm.ll
new file mode 100644
index 00000000000..5c62c188051
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel-shift-imm.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -O0 | grep {sarl	\$80, %e}
+; PR3242
+
+define void @foo(i32 %x, i32* %p) nounwind {
+  %y = ashr i32 %x, 50000
+  store i32 %y, i32* %p
+  ret void
+}
diff --git a/final/test/CodeGen/X86/fast-isel-tailcall.ll b/final/test/CodeGen/X86/fast-isel-tailcall.ll
new file mode 100644
index 00000000000..c3e527c4e5b
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel-tailcall.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -fast-isel -tailcallopt -march=x86 | not grep add
+; PR4154
+
+; On x86, -tailcallopt changes the ABI so the caller shouldn't readjust
+; the stack pointer after the call in this code.
+
+define i32 @stub(i8* %t0) nounwind {
+entry:
+        %t1 = load i32* inttoptr (i32 139708680 to i32*)         ; <i32> [#uses=1]
+        %t2 = bitcast i8* %t0 to i32 (i32)*               ; <i32 (i32)*> [#uses=1]
+        %t3 = call fastcc i32 %t2(i32 %t1)         ; <i32> [#uses=1]
+        ret i32 %t3
+}
diff --git a/final/test/CodeGen/X86/fast-isel-tls.ll b/final/test/CodeGen/X86/fast-isel-tls.ll
new file mode 100644
index 00000000000..a5e6642e09c
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel-tls.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -relocation-model=pic -mtriple=i686-unknown-linux-gnu -fast-isel | grep __tls_get_addr
+; PR3654
+
+@v = thread_local global i32 0
+define i32 @f() nounwind {
+entry:
+          %t = load i32* @v
+          %s = add i32 %t, 1
+          ret i32 %s
+}
diff --git a/final/test/CodeGen/X86/fast-isel-x86.ll b/final/test/CodeGen/X86/fast-isel-x86.ll
new file mode 100644
index 00000000000..56aeb3a3436
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel-x86.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=x86 -relocation-model=pic < %s
+
+; This should use flds to set the return value.
+; CHECK: test0:
+; CHECK: flds
+; CHECK: ret
+@G = external global float
+define float @test0() nounwind {
+  %t = load float* @G
+  ret float %t
+}
+
+; This should pop 4 bytes on return.
+; CHECK: test1:
+; CHECK: ret $4
+define void @test1({i32, i32, i32, i32}* sret %p) nounwind {
+  store {i32, i32, i32, i32} zeroinitializer, {i32, i32, i32, i32}* %p
+  ret void
+}
+
+; Properly initialize the pic base.
+; CHECK: test2:
+; CHECK-NOT: HHH
+; CHECK: call{{.*}}L2$pb
+; CHECK-NEXT: L2$pb:
+; CHECK-NEXT: pop
+; CHECK: HHH
+; CHECK: ret
+@HHH = external global i32
+define i32 @test2() nounwind {
+  %t = load i32* @HHH
+  ret i32 %t
+}
diff --git a/final/test/CodeGen/X86/fast-isel.ll b/final/test/CodeGen/X86/fast-isel.ll
new file mode 100644
index 00000000000..177c06b45dc
--- /dev/null
+++ b/final/test/CodeGen/X86/fast-isel.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86 -mattr=sse2
+; RUN: llc < %s -fast-isel -fast-isel-abort -march=x86-64
+
+; This tests very minimal fast-isel functionality.
+
+define i32* @foo(i32* %p, i32* %q, i32** %z) nounwind {
+entry:
+  %r = load i32* %p
+  %s = load i32* %q
+  %y = load i32** %z
+  br label %fast
+
+fast:
+  %t0 = add i32 %r, %s
+  %t1 = mul i32 %t0, %s
+  %t2 = sub i32 %t1, %s
+  %t3 = and i32 %t2, %s
+  %t4 = xor i32 %t3, 3
+  %t5 = xor i32 %t4, %s
+  %t6 = add i32 %t5, 2
+  %t7 = getelementptr i32* %y, i32 1
+  %t8 = getelementptr i32* %t7, i32 %t6
+  br label %exit
+
+exit:
+  ret i32* %t8
+}
+
+define double @bar(double* %p, double* %q) nounwind {
+entry:
+  %r = load double* %p
+  %s = load double* %q
+  br label %fast
+
+fast:
+  %t0 = fadd double %r, %s
+  %t1 = fmul double %t0, %s
+  %t2 = fsub double %t1, %s
+  %t3 = fadd double %t2, 707.0
+  br label %exit
+
+exit:
+  ret double %t3
+}
+
+define i32 @cast() nounwind {
+entry:
+	%tmp2 = bitcast i32 0 to i32
+	ret i32 %tmp2
+}
+
+define void @ptrtoint_i1(i8* %p, i1* %q) nounwind {
+  %t = ptrtoint i8* %p to i1
+  store i1 %t, i1* %q
+  ret void
+}
+define i8* @inttoptr_i1(i1 %p) nounwind {
+  %t = inttoptr i1 %p to i8*
+  ret i8* %t
+}
+define i32 @ptrtoint_i32(i8* %p) nounwind {
+  %t = ptrtoint i8* %p to i32
+  ret i32 %t
+}
+define i8* @inttoptr_i32(i32 %p) nounwind {
+  %t = inttoptr i32 %p to i8*
+  ret i8* %t
+}
+
+define i8 @trunc_i32_i8(i32 %x) signext nounwind  {
+	%tmp1 = trunc i32 %x to i8
+	ret i8 %tmp1
+}
+
+define i8 @trunc_i16_i8(i16 signext %x) signext nounwind  {
+	%tmp1 = trunc i16 %x to i8
+	ret i8 %tmp1
+}
+
+define i8 @shl_i8(i8 %a, i8 %c) nounwind {
+       %tmp = shl i8 %a, %c
+       ret i8 %tmp
+}
+
+define i8 @mul_i8(i8 %a) nounwind {
+       %tmp = mul i8 %a, 17
+       ret i8 %tmp
+}
+
+define void @load_store_i1(i1* %p, i1* %q) nounwind {
+  %t = load i1* %p
+  store i1 %t, i1* %q
+  ret void
+}
diff --git a/final/test/CodeGen/X86/fastcall-correct-mangling.ll b/final/test/CodeGen/X86/fastcall-correct-mangling.ll
new file mode 100644
index 00000000000..33b18bb8cc6
--- /dev/null
+++ b/final/test/CodeGen/X86/fastcall-correct-mangling.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=i386-unknown-mingw32 | FileCheck %s
+
+; Check that a fastcall function gets correct mangling
+
+define x86_fastcallcc void @func(i64 %X, i8 %Y, i8 %G, i16 %Z) {
+; CHECK: @func@20:
+        ret void
+}
+
diff --git a/final/test/CodeGen/X86/fastcc-2.ll b/final/test/CodeGen/X86/fastcc-2.ll
new file mode 100644
index 00000000000..d044a2ad9e8
--- /dev/null
+++ b/final/test/CodeGen/X86/fastcc-2.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep movsd
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | grep mov | count 1
+
+define i32 @foo() nounwind {
+entry:
+	tail call fastcc void @bar( double 1.000000e+00 ) nounwind
+	ret i32 0
+}
+
+declare fastcc void @bar(double)
diff --git a/final/test/CodeGen/X86/fastcc-byval.ll b/final/test/CodeGen/X86/fastcc-byval.ll
new file mode 100644
index 00000000000..52b3e57b96b
--- /dev/null
+++ b/final/test/CodeGen/X86/fastcc-byval.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -tailcallopt=false | grep {movl\[\[:space:\]\]*8(%esp), %eax} | count 2
+; PR3122
+; rdar://6400815
+
+; byval requires a copy, even with fastcc.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+	%struct.MVT = type { i32 }
+
+define fastcc i32 @bar() nounwind {
+	%V = alloca %struct.MVT
+	%a = getelementptr %struct.MVT* %V, i32 0, i32 0
+	store i32 1, i32* %a
+	call fastcc void @foo(%struct.MVT* byval %V) nounwind
+	%t = load i32* %a
+	ret i32 %t
+}
+
+declare fastcc void @foo(%struct.MVT* byval)
diff --git a/final/test/CodeGen/X86/fastcc-sret.ll b/final/test/CodeGen/X86/fastcc-sret.ll
new file mode 100644
index 00000000000..d45741884c7
--- /dev/null
+++ b/final/test/CodeGen/X86/fastcc-sret.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -tailcallopt=false | grep ret | not grep 4
+
+	%struct.foo = type { [4 x i32] }
+
+define fastcc void @bar(%struct.foo* noalias sret %agg.result) nounwind  {
+entry:
+	%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0
+	%tmp3 = getelementptr [4 x i32]* %tmp1, i32 0, i32 0
+	store i32 1, i32* %tmp3, align 8
+        ret void
+}
+
+@dst = external global i32
+
+define void @foo() nounwind {
+	%memtmp = alloca %struct.foo, align 4
+        call fastcc void @bar( %struct.foo* sret %memtmp ) nounwind
+        %tmp4 = getelementptr %struct.foo* %memtmp, i32 0, i32 0
+	%tmp5 = getelementptr [4 x i32]* %tmp4, i32 0, i32 0
+        %tmp6 = load i32* %tmp5
+        store i32 %tmp6, i32* @dst
+        ret void
+}
diff --git a/final/test/CodeGen/X86/fastcc.ll b/final/test/CodeGen/X86/fastcc.ll
new file mode 100644
index 00000000000..705ab7bada7
--- /dev/null
+++ b/final/test/CodeGen/X86/fastcc.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -post-RA-scheduler=false | FileCheck %s
+; CHECK: movsd %xmm0, 8(%esp)
+; CHECK: xorl %ecx, %ecx
+
+@d = external global double		; <double*> [#uses=1]
+@c = external global double		; <double*> [#uses=1]
+@b = external global double		; <double*> [#uses=1]
+@a = external global double		; <double*> [#uses=1]
+
+define i32 @foo() nounwind {
+entry:
+	%0 = load double* @d, align 8		; <double> [#uses=1]
+	%1 = load double* @c, align 8		; <double> [#uses=1]
+	%2 = load double* @b, align 8		; <double> [#uses=1]
+	%3 = load double* @a, align 8		; <double> [#uses=1]
+	tail call fastcc void @bar( i32 0, i32 1, i32 2, double 1.000000e+00, double %3, double %2, double %1, double %0 ) nounwind
+	ret i32 0
+}
+
+declare fastcc void @bar(i32, i32, i32, double, double, double, double, double)
diff --git a/final/test/CodeGen/X86/fastcc3struct.ll b/final/test/CodeGen/X86/fastcc3struct.ll
new file mode 100644
index 00000000000..84f8ef6cf36
--- /dev/null
+++ b/final/test/CodeGen/X86/fastcc3struct.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -o %t
+; RUN: grep "movl	.48, %ecx" %t
+; RUN: grep "movl	.24, %edx" %t
+; RUN: grep "movl	.12, %eax" %t
+
+%0 = type { i32, i32, i32 }
+
+define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
+entry:
+  %0 = insertvalue %0 zeroinitializer, i32 12, 0
+  %1 = insertvalue %0 %0, i32 24, 1
+  %2 = insertvalue %0 %1, i32 48, 2
+  ret %0 %2
+}
+
diff --git a/final/test/CodeGen/X86/field-extract-use-trunc.ll b/final/test/CodeGen/X86/field-extract-use-trunc.ll
new file mode 100644
index 00000000000..735e1341f65
--- /dev/null
+++ b/final/test/CodeGen/X86/field-extract-use-trunc.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=x86 | grep sar | count 1
+; RUN: llc < %s -march=x86-64 | not grep sar
+
+define i32 @test(i32 %f12) nounwind {
+	%tmp7.25 = lshr i32 %f12, 16		
+	%tmp7.26 = trunc i32 %tmp7.25 to i8
+	%tmp78.2 = sext i8 %tmp7.26 to i32
+	ret i32 %tmp78.2
+}
+
+define i32 @test2(i32 %f12) nounwind {
+	%f11 = shl i32 %f12, 8
+	%tmp7.25 = ashr i32 %f11, 24
+	ret i32 %tmp7.25
+}
+
+define i32 @test3(i32 %f12) nounwind {
+	%f11 = shl i32 %f12, 13
+	%tmp7.25 = ashr i32 %f11, 24
+	ret i32 %tmp7.25
+}
+
+define i64 @test4(i64 %f12) nounwind {
+	%f11 = shl i64 %f12, 32
+	%tmp7.25 = ashr i64 %f11, 32
+	ret i64 %tmp7.25
+}
+
+define i16 @test5(i16 %f12) nounwind {
+	%f11 = shl i16 %f12, 2
+	%tmp7.25 = ashr i16 %f11, 8
+	ret i16 %tmp7.25
+}
+
+define i16 @test6(i16 %f12) nounwind {
+	%f11 = shl i16 %f12, 8
+	%tmp7.25 = ashr i16 %f11, 8
+	ret i16 %tmp7.25
+}
diff --git a/final/test/CodeGen/X86/fildll.ll b/final/test/CodeGen/X86/fildll.ll
new file mode 100644
index 00000000000..c5a3765c717
--- /dev/null
+++ b/final/test/CodeGen/X86/fildll.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=att -mattr=-sse2 | grep fildll | count 2
+
+define fastcc double @sint64_to_fp(i64 %X) {
+        %R = sitofp i64 %X to double            ; <double> [#uses=1]
+        ret double %R
+}
+
+define fastcc double @uint64_to_fp(i64 %X) {
+        %R = uitofp i64 %X to double            ; <double> [#uses=1]
+        ret double %R
+}
+
diff --git a/final/test/CodeGen/X86/fltused.ll b/final/test/CodeGen/X86/fltused.ll
new file mode 100644
index 00000000000..2ffcb966782
--- /dev/null
+++ b/final/test/CodeGen/X86/fltused.ll
@@ -0,0 +1,19 @@
+; The purpose of this test to to verify that the fltused symbol is emitted when
+; any function is called with floating point arguments on Windows. And that it
+; is not emitted otherwise.
+
+; RUN: llc < %s -mtriple i686-pc-win32 | FileCheck %s --check-prefix WIN32
+; RUN: llc < %s -mtriple x86_64-pc-win32 | FileCheck %s --check-prefix WIN64
+
+@.str = private constant [4 x i8] c"%f\0A\00"
+
+define i32 @main() nounwind {
+entry:
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i32 0, i32 0), double 1.000000e+000) nounwind
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+; WIN32: .globl __fltused
+; WIN64: .globl _fltused
diff --git a/final/test/CodeGen/X86/fmul-zero.ll b/final/test/CodeGen/X86/fmul-zero.ll
new file mode 100644
index 00000000000..03bad659412
--- /dev/null
+++ b/final/test/CodeGen/X86/fmul-zero.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 -enable-unsafe-fp-math | not grep mulps
+; RUN: llc < %s -march=x86-64 | grep mulps
+
+define void @test14(<4 x float>*) nounwind {
+        load <4 x float>* %0, align 1
+        fmul <4 x float> %2, zeroinitializer
+        store <4 x float> %3, <4 x float>* %0, align 1
+        ret void
+}
diff --git a/final/test/CodeGen/X86/fold-add.ll b/final/test/CodeGen/X86/fold-add.ll
new file mode 100644
index 00000000000..5e80ea54789
--- /dev/null
+++ b/final/test/CodeGen/X86/fold-add.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86-64 | grep {cmpb	\$0, (%r.\*,%r.\*)}
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.6"
+@prev_length = internal global i32 0		; <i32*> [#uses=1]
+@window = internal global [65536 x i8] zeroinitializer, align 32		; <[65536 x i8]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32)* @longest_match to i8*)]		; <[1 x i8*]*> [#uses=0]
+
+define fastcc i32 @longest_match(i32 %cur_match) nounwind {
+entry:
+	%0 = load i32* @prev_length, align 4		; <i32> [#uses=3]
+	%1 = zext i32 %cur_match to i64		; <i64> [#uses=1]
+	%2 = sext i32 %0 to i64		; <i64> [#uses=1]
+	%.sum3 = add i64 %1, %2		; <i64> [#uses=1]
+	%3 = getelementptr [65536 x i8]* @window, i64 0, i64 %.sum3		; <i8*> [#uses=1]
+	%4 = load i8* %3, align 1		; <i8> [#uses=1]
+	%5 = icmp eq i8 %4, 0		; <i1> [#uses=1]
+	br i1 %5, label %bb5, label %bb23
+
+bb5:		; preds = %entry
+	ret i32 %0
+
+bb23:		; preds = %entry
+	ret i32 %0
+}
diff --git a/final/test/CodeGen/X86/fold-and-shift.ll b/final/test/CodeGen/X86/fold-and-shift.ll
new file mode 100644
index 00000000000..9f79f7723b3
--- /dev/null
+++ b/final/test/CodeGen/X86/fold-and-shift.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 | not grep and
+
+define i32 @t1(i8* %X, i32 %i) {
+entry:
+	%tmp2 = shl i32 %i, 2		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp2, 1020		; <i32> [#uses=1]
+	%tmp7 = getelementptr i8* %X, i32 %tmp4		; <i8*> [#uses=1]
+	%tmp78 = bitcast i8* %tmp7 to i32*		; <i32*> [#uses=1]
+	%tmp9 = load i32* %tmp78, align 4		; <i32> [#uses=1]
+	ret i32 %tmp9
+}
+
+define i32 @t2(i16* %X, i32 %i) {
+entry:
+	%tmp2 = shl i32 %i, 1		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp2, 131070		; <i32> [#uses=1]
+	%tmp7 = getelementptr i16* %X, i32 %tmp4		; <i16*> [#uses=1]
+	%tmp78 = bitcast i16* %tmp7 to i32*		; <i32*> [#uses=1]
+	%tmp9 = load i32* %tmp78, align 4		; <i32> [#uses=1]
+	ret i32 %tmp9
+}
diff --git a/final/test/CodeGen/X86/fold-call-2.ll b/final/test/CodeGen/X86/fold-call-2.ll
new file mode 100644
index 00000000000..7a2b03833ae
--- /dev/null
+++ b/final/test/CodeGen/X86/fold-call-2.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | grep mov | count 1
+
+@f = external global void ()*		; <void ()**> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+	load void ()** @f, align 8		; <void ()*>:0 [#uses=1]
+	tail call void %0( ) nounwind
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/fold-call-3.ll b/final/test/CodeGen/X86/fold-call-3.ll
new file mode 100644
index 00000000000..337a7edb173
--- /dev/null
+++ b/final/test/CodeGen/X86/fold-call-3.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep call | grep 560
+; rdar://6522427
+
+	%"struct.clang::Action" = type { %"struct.clang::ActionBase" }
+	%"struct.clang::ActionBase" = type { i32 (...)** }
+	%"struct.clang::ActionBase::ActionResult<0u>" = type { i8*, i8 }
+@NumTrials = internal global i32 10000000		; <i32*> [#uses=2]
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (void (i8*, %"struct.clang::Action"*)* @_Z25RawPointerPerformanceTestPvRN5clang6ActionE to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define void @_Z25RawPointerPerformanceTestPvRN5clang6ActionE(i8* %Val, %"struct.clang::Action"* %Actions) nounwind {
+entry:
+	%0 = alloca %"struct.clang::ActionBase::ActionResult<0u>", align 8		; <%"struct.clang::ActionBase::ActionResult<0u>"*> [#uses=3]
+	%1 = load i32* @NumTrials, align 4		; <i32> [#uses=1]
+	%2 = icmp eq i32 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %return, label %bb.nph
+
+bb.nph:		; preds = %entry
+	%3 = getelementptr %"struct.clang::Action"* %Actions, i64 0, i32 0, i32 0		; <i32 (...)***> [#uses=1]
+	%mrv_gep = bitcast %"struct.clang::ActionBase::ActionResult<0u>"* %0 to i64*		; <i64*> [#uses=1]
+	%mrv_gep1 = getelementptr %"struct.clang::ActionBase::ActionResult<0u>"* %0, i64 0, i32 1		; <i8*> [#uses=1]
+	%4 = bitcast i8* %mrv_gep1 to i64*		; <i64*> [#uses=1]
+	%5 = getelementptr %"struct.clang::ActionBase::ActionResult<0u>"* %0, i64 0, i32 0		; <i8**> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.nph
+	%Trial.01 = phi i32 [ 0, %bb.nph ], [ %12, %bb ]		; <i32> [#uses=1]
+	%Val_addr.02 = phi i8* [ %Val, %bb.nph ], [ %11, %bb ]		; <i8*> [#uses=1]
+	%6 = load i32 (...)*** %3, align 8		; <i32 (...)**> [#uses=1]
+	%7 = getelementptr i32 (...)** %6, i64 70		; <i32 (...)**> [#uses=1]
+	%8 = load i32 (...)** %7, align 8		; <i32 (...)*> [#uses=1]
+	%9 = bitcast i32 (...)* %8 to { i64, i64 } (%"struct.clang::Action"*, i8*)*		; <{ i64, i64 } (%"struct.clang::Action"*, i8*)*> [#uses=1]
+	%10 = call { i64, i64 } %9(%"struct.clang::Action"* %Actions, i8* %Val_addr.02) nounwind		; <{ i64, i64 }> [#uses=2]
+	%mrv_gr = extractvalue { i64, i64 } %10, 0		; <i64> [#uses=1]
+	store i64 %mrv_gr, i64* %mrv_gep
+	%mrv_gr2 = extractvalue { i64, i64 } %10, 1		; <i64> [#uses=1]
+	store i64 %mrv_gr2, i64* %4
+	%11 = load i8** %5, align 8		; <i8*> [#uses=1]
+	%12 = add i32 %Trial.01, 1		; <i32> [#uses=2]
+	%13 = load i32* @NumTrials, align 4		; <i32> [#uses=1]
+	%14 = icmp ult i32 %12, %13		; <i1> [#uses=1]
+	br i1 %14, label %bb, label %return
+
+return:		; preds = %bb, %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/fold-call.ll b/final/test/CodeGen/X86/fold-call.ll
new file mode 100644
index 00000000000..603e9ad66ca
--- /dev/null
+++ b/final/test/CodeGen/X86/fold-call.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | not grep mov
+; RUN: llc < %s -march=x86-64 | not grep mov
+
+declare void @bar()
+
+define void @foo(i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, void()* %arg) nounwind {
+	call void @bar()
+	call void %arg()
+	ret void
+}
diff --git a/final/test/CodeGen/X86/fold-imm.ll b/final/test/CodeGen/X86/fold-imm.ll
new file mode 100644
index 00000000000..f1fcbcfd13b
--- /dev/null
+++ b/final/test/CodeGen/X86/fold-imm.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep inc
+; RUN: llc < %s -march=x86 | grep add | grep 4
+
+define i32 @test(i32 %X) nounwind {
+entry:
+	%0 = add i32 %X, 1
+	ret i32 %0
+}
+
+define i32 @test2(i32 %X) nounwind {
+entry:
+	%0 = add i32 %X, 4
+	ret i32 %0
+}
diff --git a/final/test/CodeGen/X86/fold-load.ll b/final/test/CodeGen/X86/fold-load.ll
new file mode 100644
index 00000000000..5525af25270
--- /dev/null
+++ b/final/test/CodeGen/X86/fold-load.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
+	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (...)*, void (...)*, i8*, i8 }
+@stmt_obstack = external global %struct.obstack		; <%struct.obstack*> [#uses=1]
+
+; This should just not crash.
+define void @test1() nounwind {
+entry:
+	br i1 true, label %cond_true, label %cond_next
+
+cond_true:		; preds = %entry
+	%new_size.0.i = select i1 false, i32 0, i32 0		; <i32> [#uses=1]
+	%tmp.i = load i32* bitcast (i8* getelementptr (%struct.obstack* @stmt_obstack, i32 0, i32 10) to i32*)		; <i32> [#uses=1]
+	%tmp.i.upgrd.1 = trunc i32 %tmp.i to i8		; <i8> [#uses=1]
+	%tmp21.i = and i8 %tmp.i.upgrd.1, 1		; <i8> [#uses=1]
+	%tmp22.i = icmp eq i8 %tmp21.i, 0		; <i1> [#uses=1]
+	br i1 %tmp22.i, label %cond_false30.i, label %cond_true23.i
+
+cond_true23.i:		; preds = %cond_true
+	ret void
+
+cond_false30.i:		; preds = %cond_true
+	%tmp35.i = tail call %struct._obstack_chunk* null( i32 %new_size.0.i )		; <%struct._obstack_chunk*> [#uses=0]
+	ret void
+
+cond_next:		; preds = %entry
+	ret void
+}
+
+
+
+define i32 @test2(i16* %P, i16* %Q) nounwind {
+  %A = load i16* %P, align 4                      ; <i16> [#uses=11]
+  %C = zext i16 %A to i32                         ; <i32> [#uses=1]
+  %D = and i32 %C, 255                            ; <i32> [#uses=1]
+  br label %L
+L:
+
+  store i16 %A, i16* %Q
+  ret i32 %D
+  
+; CHECK: test2:
+; CHECK: 	movl	4(%esp), %eax
+; CHECK-NEXT:	movzwl	(%eax), %ecx
+
+}
+
diff --git a/final/test/CodeGen/X86/fold-mul-lohi.ll b/final/test/CodeGen/X86/fold-mul-lohi.ll
new file mode 100644
index 00000000000..5614c808d0e
--- /dev/null
+++ b/final/test/CodeGen/X86/fold-mul-lohi.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86            | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT: lea
+
+@B = external global [1000 x i8], align 32
+@A = external global [1000 x i8], align 32
+@P = external global [1000 x i8], align 32
+
+define void @foo(i32 %m) nounwind {
+entry:
+	%tmp1 = icmp sgt i32 %m, 0
+	br i1 %tmp1, label %bb, label %return
+
+bb:
+	%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
+	%tmp2 = getelementptr [1000 x i8]* @B, i32 0, i32 %i.019.0
+	%tmp3 = load i8* %tmp2, align 4
+	%tmp4 = mul i8 %tmp3, 2
+	%tmp5 = getelementptr [1000 x i8]* @A, i32 0, i32 %i.019.0
+	store i8 %tmp4, i8* %tmp5, align 4
+	%tmp8 = mul i32 %i.019.0, 9
+	%tmp10 = getelementptr [1000 x i8]* @P, i32 0, i32 %tmp8
+	store i8 17, i8* %tmp10, align 4
+	%indvar.next = add i32 %i.019.0, 1
+	%exitcond = icmp eq i32 %indvar.next, %m
+	br i1 %exitcond, label %return, label %bb
+
+return:
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/fold-pcmpeqd-0.ll b/final/test/CodeGen/X86/fold-pcmpeqd-0.ll
new file mode 100644
index 00000000000..e5be58e1aaa
--- /dev/null
+++ b/final/test/CodeGen/X86/fold-pcmpeqd-0.ll
@@ -0,0 +1,105 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | not grep pcmpeqd
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah  | grep orps | grep CPI0_2  | count 2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
+
+; This testcase shouldn't need to spill the -1 value,
+; so it should just use pcmpeqd to materialize an all-ones vector.
+; For i386, cp load of -1 are folded.
+
+	%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
+	%struct._cl_image_format_t = type <{ i32, i32, i32 }>
+	%struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }>
+
+define void @program_1(%struct._image2d_t* %dest, %struct._image2d_t* %t0, <4 x float> %p0, <4 x float> %p1, <4 x float> %p4, <4 x float> %p5, <4 x float> %p6) nounwind {
+entry:
+	%tmp3.i = load i32* null		; <i32> [#uses=1]
+	%cmp = icmp sgt i32 %tmp3.i, 200		; <i1> [#uses=1]
+	br i1 %cmp, label %forcond, label %ifthen
+
+ifthen:		; preds = %entry
+	ret void
+
+forcond:		; preds = %entry
+	%tmp3.i536 = load i32* null		; <i32> [#uses=1]
+	%cmp12 = icmp slt i32 0, %tmp3.i536		; <i1> [#uses=1]
+	br i1 %cmp12, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%bitcast204.i313 = bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul233 = fmul <4 x float> %bitcast204.i313, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul257 = fmul <4 x float> %mul233, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul275 = fmul <4 x float> %mul257, zeroinitializer		; <<4 x float>> [#uses=1]
+	%tmp51 = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %mul275, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast198.i182 = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=0]
+	%bitcast204.i185 = bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp69 = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> zeroinitializer) nounwind		; <<4 x i32>> [#uses=1]
+	%tmp70 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp69) nounwind		; <<4 x float>> [#uses=1]
+	%sub140.i78 = fsub <4 x float> zeroinitializer, %tmp70		; <<4 x float>> [#uses=2]
+	%mul166.i86 = fmul <4 x float> zeroinitializer, %sub140.i78		; <<4 x float>> [#uses=1]
+	%add167.i87 = fadd <4 x float> %mul166.i86, < float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000 >		; <<4 x float>> [#uses=1]
+	%mul171.i88 = fmul <4 x float> %add167.i87, %sub140.i78		; <<4 x float>> [#uses=1]
+	%add172.i89 = fadd <4 x float> %mul171.i88, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
+	%bitcast176.i90 = bitcast <4 x float> %add172.i89 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps178.i92 = and <4 x i32> %bitcast176.i90, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast179.i93 = bitcast <4 x i32> %andnps178.i92 to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul186.i96 = fmul <4 x float> %bitcast179.i93, zeroinitializer		; <<4 x float>> [#uses=1]
+	%bitcast190.i98 = bitcast <4 x float> %mul186.i96 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps192.i100 = and <4 x i32> %bitcast190.i98, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%xorps.i102 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%orps203.i103 = or <4 x i32> %andnps192.i100, %xorps.i102		; <<4 x i32>> [#uses=1]
+	%bitcast204.i104 = bitcast <4 x i32> %orps203.i103 to <4 x float>		; <<4 x float>> [#uses=1]
+	%cmple.i = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> %tmp51, i8 2) nounwind		; <<4 x float>> [#uses=1]
+	%tmp80 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%sub140.i = fsub <4 x float> zeroinitializer, %tmp80		; <<4 x float>> [#uses=1]
+	%bitcast148.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps150.i = and <4 x i32> %bitcast148.i, < i32 -2139095041, i32 -2139095041, i32 -2139095041, i32 -2139095041 >		; <<4 x i32>> [#uses=0]
+	%mul171.i = fmul <4 x float> zeroinitializer, %sub140.i		; <<4 x float>> [#uses=1]
+	%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
+	%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%bitcast189.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=0]
+	%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast198.i = bitcast <4 x float> %cmple.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%xorps.i = xor <4 x i32> %bitcast198.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%orps203.i = or <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
+	%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul307 = fmul <4 x float> %bitcast204.i185, zeroinitializer		; <<4 x float>> [#uses=1]
+	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
+	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%tmp82 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul307, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast11.i15 = bitcast <4 x float> %tmp82 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps.i17 = and <4 x i32> %bitcast11.i15, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%orps.i18 = or <4 x i32> %andnps.i17, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%bitcast6.i4 = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=2]
+	%andps.i5 = and <4 x i32> %bitcast.i3, %bitcast6.i4		; <<4 x i32>> [#uses=1]
+	%bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%not.i7 = xor <4 x i32> %bitcast6.i4, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7		; <<4 x i32>> [#uses=1]
+	%orps.i9 = or <4 x i32> %andnps.i8, %andps.i5		; <<4 x i32>> [#uses=1]
+	%bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float>		; <<4 x float>> [#uses=1]
+	%bitcast.i = bitcast <4 x float> %mul313 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andps.i = and <4 x i32> %bitcast.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%orps.i = or <4 x i32> zeroinitializer, %andps.i		; <<4 x i32>> [#uses=1]
+	%bitcast17.i = bitcast <4 x i32> %orps.i to <4 x float>		; <<4 x float>> [#uses=1]
+	call void null(<4 x float> %bitcast17.i19, <4 x float> %bitcast17.i10, <4 x float> %bitcast17.i, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
+	unreachable
+
+afterfor:		; preds = %forcond
+	ret void
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
+
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
diff --git a/final/test/CodeGen/X86/fold-pcmpeqd-1.ll b/final/test/CodeGen/X86/fold-pcmpeqd-1.ll
new file mode 100644
index 00000000000..cc4198d7caf
--- /dev/null
+++ b/final/test/CodeGen/X86/fold-pcmpeqd-1.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
+; RUN: grep pcmpeqd %t | count 1
+; RUN: grep xor %t | count 1
+; RUN: not grep LCP %t
+
+define <2 x double> @foo() nounwind {
+  ret <2 x double> bitcast (<2 x i64><i64 -1, i64 -1> to <2 x double>)
+}
+define <2 x double> @bar() nounwind {
+  ret <2 x double> bitcast (<2 x i64><i64 0, i64 0> to <2 x double>)
+}
diff --git a/final/test/CodeGen/X86/fold-pcmpeqd-2.ll b/final/test/CodeGen/X86/fold-pcmpeqd-2.ll
new file mode 100644
index 00000000000..c85a97a3fa4
--- /dev/null
+++ b/final/test/CodeGen/X86/fold-pcmpeqd-2.ll
@@ -0,0 +1,94 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; This testcase should need to spill the -1 value on both x86-32 and x86-64,
+; so it shouldn't use pcmpeqd to materialize an all-ones vector; it
+; should use a constant-pool load instead.
+
+; Constant pool all-ones vector:
+; CHECK: .long 4294967295
+; CHECK-NEXT: .long 4294967295
+; CHECK-NEXT: .long 4294967295
+; CHECK-NEXT: .long 4294967295
+
+; No pcmpeqd instructions, everybody uses the constant pool.
+; CHECK: program_1:
+; CHECK-NOT: pcmpeqd
+
+	%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
+	%struct._cl_image_format_t = type <{ i32, i32, i32 }>
+	%struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }>
+
+define void @program_1(%struct._image2d_t* %dest, %struct._image2d_t* %t0, <4 x float> %p0, <4 x float> %p1, <4 x float> %p4, <4 x float> %p5, <4 x float> %p6) nounwind {
+entry:
+	%tmp3.i = load i32* null		; <i32> [#uses=1]
+	%cmp = icmp slt i32 0, %tmp3.i		; <i1> [#uses=1]
+	br i1 %cmp, label %forcond, label %ifthen
+
+ifthen:		; preds = %entry
+	ret void
+
+forcond:		; preds = %entry
+	%tmp3.i536 = load i32* null		; <i32> [#uses=1]
+	%cmp12 = icmp slt i32 0, %tmp3.i536		; <i1> [#uses=1]
+	br i1 %cmp12, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%bitcast204.i104 = bitcast <4 x i32> zeroinitializer to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp78 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> < float 1.280000e+02, float 1.280000e+02, float 1.280000e+02, float 1.280000e+02 >, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=2]
+	%tmp79 = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %tmp78) nounwind		; <<4 x i32>> [#uses=1]
+	%tmp80 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %tmp79) nounwind		; <<4 x float>> [#uses=1]
+	%sub140.i = fsub <4 x float> %tmp78, %tmp80		; <<4 x float>> [#uses=2]
+	%mul166.i = fmul <4 x float> zeroinitializer, %sub140.i		; <<4 x float>> [#uses=1]
+	%add167.i = fadd <4 x float> %mul166.i, < float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000, float 0x3FE62ACB60000000 >		; <<4 x float>> [#uses=1]
+	%mul171.i = fmul <4 x float> %add167.i, %sub140.i		; <<4 x float>> [#uses=1]
+	%add172.i = fadd <4 x float> %mul171.i, < float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000, float 0x3FF0000A40000000 >		; <<4 x float>> [#uses=1]
+	%bitcast176.i = bitcast <4 x float> %add172.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps178.i = and <4 x i32> %bitcast176.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast179.i = bitcast <4 x i32> %andnps178.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul186.i = fmul <4 x float> %bitcast179.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%bitcast190.i = bitcast <4 x float> %mul186.i to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andnps192.i = and <4 x i32> %bitcast190.i, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%xorps.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%orps203.i = or <4 x i32> %andnps192.i, %xorps.i		; <<4 x i32>> [#uses=1]
+	%bitcast204.i = bitcast <4 x i32> %orps203.i to <4 x float>		; <<4 x float>> [#uses=1]
+	%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer		; <<4 x float>> [#uses=2]
+	%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer		; <<4 x float>> [#uses=1]
+	%cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32>		; <<4 x i32>> [#uses=2]
+	%andps.i14 = and <4 x i32> zeroinitializer, %bitcast6.i13		; <<4 x i32>> [#uses=1]
+	%not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%andnps.i17 = and <4 x i32> zeroinitializer, %not.i16		; <<4 x i32>> [#uses=1]
+	%orps.i18 = or <4 x i32> %andnps.i17, %andps.i14		; <<4 x i32>> [#uses=1]
+	%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer		; <<4 x i32>> [#uses=1]
+	%bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7		; <<4 x i32>> [#uses=1]
+	call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
+	%orps.i9 = or <4 x i32> %andnps.i8, %andps.i5		; <<4 x i32>> [#uses=1]
+	%bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind		; <<4 x float>> [#uses=1]
+	%bitcast6.i = bitcast <4 x float> zeroinitializer to <4 x i32>		; <<4 x i32>> [#uses=2]
+	%andps.i = and <4 x i32> zeroinitializer, %bitcast6.i		; <<4 x i32>> [#uses=1]
+	%bitcast11.i = bitcast <4 x float> %tmp84 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%not.i = xor <4 x i32> %bitcast6.i, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%andnps.i = and <4 x i32> %bitcast11.i, %not.i		; <<4 x i32>> [#uses=1]
+	%orps.i = or <4 x i32> %andnps.i, %andps.i		; <<4 x i32>> [#uses=1]
+	%bitcast17.i = bitcast <4 x i32> %orps.i to <4 x float>		; <<4 x float>> [#uses=1]
+	call void null(<4 x float> %bitcast17.i19, <4 x float> %bitcast17.i10, <4 x float> %bitcast17.i, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
+	unreachable
+
+afterfor:		; preds = %forcond
+	ret void
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
+
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
diff --git a/final/test/CodeGen/X86/fold-sext-trunc.ll b/final/test/CodeGen/X86/fold-sext-trunc.ll
new file mode 100644
index 00000000000..2605123d6dd
--- /dev/null
+++ b/final/test/CodeGen/X86/fold-sext-trunc.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 | grep movslq | count 1
+; PR4050
+
+	type { i64 }		; type %0
+	%struct.S1 = type { i16, i32 }
+@g_10 = external global %struct.S1		; <%struct.S1*> [#uses=2]
+
+declare void @func_28(i64, i64)
+
+define void @int322(i32 %foo) nounwind {
+entry:
+	%val = load i64* getelementptr (%0* bitcast (%struct.S1* @g_10 to %0*), i32 0, i32 0)		; <i64> [#uses=1]
+	%0 = load i32* getelementptr (%struct.S1* @g_10, i32 0, i32 1), align 4		; <i32> [#uses=1]
+	%1 = sext i32 %0 to i64		; <i64> [#uses=1]
+	%tmp4.i = lshr i64 %val, 32		; <i64> [#uses=1]
+	%tmp5.i = trunc i64 %tmp4.i to i32		; <i32> [#uses=1]
+	%2 = sext i32 %tmp5.i to i64		; <i64> [#uses=1]
+	tail call void @func_28(i64 %2, i64 %1) nounwind
+	ret void
+}
diff --git a/final/test/CodeGen/X86/force-align-stack.ll b/final/test/CodeGen/X86/force-align-stack.ll
new file mode 100644
index 00000000000..ffcbf8a908c
--- /dev/null
+++ b/final/test/CodeGen/X86/force-align-stack.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -relocation-model=static -force-align-stack | FileCheck %s
+; Tests to make sure that we always align the stack out to the minimum needed - 
+; in this case 16-bytes.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.3"
+
+define void @a() nounwind ssp {
+entry:
+; CHECK: _a:
+; CHECK: andl    $-16, %esp
+  %z = alloca <16 x i8>                           ; <<16 x i8>*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store <16 x i8> zeroinitializer, <16 x i8>* %z, align 16
+  call void @b(<16 x i8>* %z) nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare void @b(<16 x i8>*)
diff --git a/final/test/CodeGen/X86/fp-elim.ll b/final/test/CodeGen/X86/fp-elim.ll
new file mode 100644
index 00000000000..60892a2352f
--- /dev/null
+++ b/final/test/CodeGen/X86/fp-elim.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=x86 -asm-verbose=false                           | FileCheck %s -check-prefix=FP-ELIM
+; RUN: llc < %s -march=x86 -asm-verbose=false -disable-fp-elim          | FileCheck %s -check-prefix=NO-ELIM
+; RUN: llc < %s -march=x86 -asm-verbose=false -disable-non-leaf-fp-elim | FileCheck %s -check-prefix=NON-LEAF
+
+; Implement -momit-leaf-frame-pointer
+; rdar://7886181
+
+define i32 @t1() nounwind readnone {
+entry:
+; FP-ELIM:      t1:
+; FP-ELIM-NEXT: movl
+; FP-ELIM-NEXT: ret
+
+; NO-ELIM:      t1:
+; NO-ELIM-NEXT: pushl %ebp
+; NO-ELIM:      popl %ebp
+; NO-ELIM-NEXT: ret
+
+; NON-LEAF:      t1:
+; NON-LEAF-NEXT: movl
+; NON-LEAF-NEXT: ret
+  ret i32 10
+}
+
+define void @t2() nounwind {
+entry:
+; FP-ELIM:     t2:
+; FP-ELIM-NOT: pushl %ebp
+; FP-ELIM:     ret
+
+; NO-ELIM:      t2:
+; NO-ELIM-NEXT: pushl %ebp
+; NO-ELIM:      popl %ebp
+; NO-ELIM-NEXT: ret
+
+; NON-LEAF:      t2:
+; NON-LEAF-NEXT: pushl %ebp
+; NON-LEAF:      popl %ebp
+; NON-LEAF-NEXT: ret
+  tail call void @foo(i32 0) nounwind
+  ret void
+}
+
+declare void @foo(i32)
diff --git a/final/test/CodeGen/X86/fp-immediate-shorten.ll b/final/test/CodeGen/X86/fp-immediate-shorten.ll
new file mode 100644
index 00000000000..cafc61a41ff
--- /dev/null
+++ b/final/test/CodeGen/X86/fp-immediate-shorten.ll
@@ -0,0 +1,9 @@
+;; Test that this FP immediate is stored in the constant pool as a float.
+
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | \
+; RUN:   grep {.long.1123418112}
+
+define double @D() {
+        ret double 1.230000e+02
+}
+
diff --git a/final/test/CodeGen/X86/fp-in-intregs.ll b/final/test/CodeGen/X86/fp-in-intregs.ll
new file mode 100644
index 00000000000..6966cf04978
--- /dev/null
+++ b/final/test/CodeGen/X86/fp-in-intregs.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; CHECK-NOT:     {{((xor|and)ps|movd)}}
+
+; These operations should be done in integer registers, eliminating constant
+; pool loads, movd's etc.
+
+define i32 @test1(float %x) nounwind  {
+entry:
+	%tmp2 = fsub float -0.000000e+00, %x		; <float> [#uses=1]
+	%tmp210 = bitcast float %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp210
+}
+
+define i32 @test2(float %x) nounwind  {
+entry:
+	%tmp2 = tail call float @copysignf( float 1.000000e+00, float %x ) nounwind readnone 		; <float> [#uses=1]
+	%tmp210 = bitcast float %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp210
+}
+
+declare float @copysignf(float, float) nounwind readnone 
+
diff --git a/final/test/CodeGen/X86/fp-stack-2results.ll b/final/test/CodeGen/X86/fp-stack-2results.ll
new file mode 100644
index 00000000000..321e267cb2f
--- /dev/null
+++ b/final/test/CodeGen/X86/fp-stack-2results.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -march=x86 | grep fldz
+; RUN: llc < %s -march=x86-64 | grep fld1
+
+; This is basically this code on x86-64:
+; _Complex long double test() { return 1.0; }
+define {x86_fp80, x86_fp80} @test() {
+  %A = fpext double 1.0 to x86_fp80
+  %B = fpext double 0.0 to x86_fp80
+  ret x86_fp80 %A, x86_fp80 %B
+}
+
+
+;_test2:
+;	fld1
+;	fld	%st(0)
+;	ret
+define {x86_fp80, x86_fp80} @test2() {
+  %A = fpext double 1.0 to x86_fp80
+  ret x86_fp80 %A, x86_fp80 %A
+}
+
+; Uses both values.
+define void @call1(x86_fp80 *%P1, x86_fp80 *%P2) {
+  %a = call {x86_fp80,x86_fp80} @test()
+  %b = getresult {x86_fp80,x86_fp80} %a, 0
+  store x86_fp80 %b, x86_fp80* %P1
+
+  %c = getresult {x86_fp80,x86_fp80} %a, 1
+  store x86_fp80 %c, x86_fp80* %P2
+  ret void 
+}
+
+; Uses both values, requires fxch
+define void @call2(x86_fp80 *%P1, x86_fp80 *%P2) {
+  %a = call {x86_fp80,x86_fp80} @test()
+  %b = getresult {x86_fp80,x86_fp80} %a, 1
+  store x86_fp80 %b, x86_fp80* %P1
+
+  %c = getresult {x86_fp80,x86_fp80} %a, 0
+  store x86_fp80 %c, x86_fp80* %P2
+  ret void
+}
+
+; Uses ST(0), ST(1) is dead but must be popped.
+define void @call3(x86_fp80 *%P1, x86_fp80 *%P2) {
+  %a = call {x86_fp80,x86_fp80} @test()
+  %b = getresult {x86_fp80,x86_fp80} %a, 0
+  store x86_fp80 %b, x86_fp80* %P1
+  ret void 
+}
+
+; Uses ST(1), ST(0) is dead and must be popped.
+define void @call4(x86_fp80 *%P1, x86_fp80 *%P2) {
+  %a = call {x86_fp80,x86_fp80} @test()
+
+  %c = getresult {x86_fp80,x86_fp80} %a, 1
+  store x86_fp80 %c, x86_fp80* %P2
+  ret void 
+}
+
diff --git a/final/test/CodeGen/X86/fp-stack-O0-crash.ll b/final/test/CodeGen/X86/fp-stack-O0-crash.ll
new file mode 100644
index 00000000000..9b629c08652
--- /dev/null
+++ b/final/test/CodeGen/X86/fp-stack-O0-crash.ll
@@ -0,0 +1,30 @@
+; RUN: llc %s -O0 -fast-isel -regalloc=fast -o -
+; PR4767
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10"
+
+define void @fn(x86_fp80 %x) nounwind ssp {
+entry:
+  %x.addr = alloca x86_fp80                       ; <x86_fp80*> [#uses=5]
+  store x86_fp80 %x, x86_fp80* %x.addr
+  br i1 false, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %tmp = load x86_fp80* %x.addr                   ; <x86_fp80> [#uses=1]
+  %tmp1 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %cmp = fcmp oeq x86_fp80 %tmp, %tmp1            ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then, label %if.end
+
+cond.false:                                       ; preds = %entry
+  %tmp2 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %tmp3 = load x86_fp80* %x.addr                  ; <x86_fp80> [#uses=1]
+  %cmp4 = fcmp une x86_fp80 %tmp2, %tmp3          ; <i1> [#uses=1]
+  br i1 %cmp4, label %if.then, label %if.end
+
+if.then:                                          ; preds = %cond.false, %cond.true
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %cond.false, %cond.true
+  ret void
+}
diff --git a/final/test/CodeGen/X86/fp-stack-compare.ll b/final/test/CodeGen/X86/fp-stack-compare.ll
new file mode 100644
index 00000000000..b216914d239
--- /dev/null
+++ b/final/test/CodeGen/X86/fp-stack-compare.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=i386 | grep {fucompi.*st.\[12\]}
+; PR1012
+
+define float @foo(float* %col.2.0) {
+        %tmp = load float* %col.2.0             ; <float> [#uses=3]
+        %tmp16 = fcmp olt float %tmp, 0.000000e+00              ; <i1> [#uses=1]
+        %tmp20 = fsub float -0.000000e+00, %tmp          ; <float> [#uses=1]
+        %iftmp.2.0 = select i1 %tmp16, float %tmp20, float %tmp         ; <float> [#uses=1]
+        ret float %iftmp.2.0
+}
+
diff --git a/final/test/CodeGen/X86/fp-stack-direct-ret.ll b/final/test/CodeGen/X86/fp-stack-direct-ret.ll
new file mode 100644
index 00000000000..5a28bb50a34
--- /dev/null
+++ b/final/test/CodeGen/X86/fp-stack-direct-ret.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 | not grep fstp
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep movsd
+
+declare double @foo()
+
+define double @bar() {
+entry:
+	%tmp5 = tail call double @foo()
+	ret double %tmp5
+}
+
diff --git a/final/test/CodeGen/X86/fp-stack-ret-conv.ll b/final/test/CodeGen/X86/fp-stack-ret-conv.ll
new file mode 100644
index 00000000000..f220b24f90b
--- /dev/null
+++ b/final/test/CodeGen/X86/fp-stack-ret-conv.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mcpu=yonah | grep cvtss2sd
+; RUN: llc < %s -mcpu=yonah | grep fstps
+; RUN: llc < %s -mcpu=yonah | not grep cvtsd2ss
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define void @test(double *%b) {
+entry:
+	%tmp13 = tail call double @foo()
+	%tmp1314 = fptrunc double %tmp13 to float		; <float> [#uses=1]
+	%tmp3940 = fpext float %tmp1314 to double		; <double> [#uses=1]
+	volatile store double %tmp3940, double* %b
+	ret void
+}
+
+declare double @foo()
diff --git a/final/test/CodeGen/X86/fp-stack-ret-store.ll b/final/test/CodeGen/X86/fp-stack-ret-store.ll
new file mode 100644
index 00000000000..05dfc545db1
--- /dev/null
+++ b/final/test/CodeGen/X86/fp-stack-ret-store.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mcpu=yonah | not grep movss
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+; This should store directly into P from the FP stack.  It should not
+; go through a stack slot to get there.
+
+define void @bar(double* %P) {
+entry:
+	%tmp = tail call double (...)* @foo( )		; <double> [#uses=1]
+	store double %tmp, double* %P, align 8
+	ret void
+}
+
+declare double @foo(...)
+
+define void @bar2(float* %P) {
+entry:
+	%tmp = tail call double (...)* @foo2( )		; <double> [#uses=1]
+	%tmp1 = fptrunc double %tmp to float		; <float> [#uses=1]
+	store float %tmp1, float* %P, align 4
+	ret void
+}
+
+declare double @foo2(...)
+
diff --git a/final/test/CodeGen/X86/fp-stack-ret.ll b/final/test/CodeGen/X86/fp-stack-ret.ll
new file mode 100644
index 00000000000..c83a0cbf69e
--- /dev/null
+++ b/final/test/CodeGen/X86/fp-stack-ret.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin8 -mcpu=yonah -march=x86 > %t
+; RUN: grep fldl %t | count 1
+; RUN: not grep xmm %t
+; RUN: grep {sub.*esp} %t | count 1
+
+; These testcases shouldn't require loading into an XMM register then storing 
+; to memory, then reloading into an FPStack reg.
+
+define double @test1(double *%P) {
+        %A = load double* %P
+        ret double %A
+}
+
+; fastcc should return a value 
+define fastcc double @test2(<2 x double> %A) {
+	%B = extractelement <2 x double> %A, i32 0
+	ret double %B
+}
+
+define fastcc double @test3(<4 x float> %A) {
+	%B = bitcast <4 x float> %A to <2 x double>
+	%C = call fastcc double @test2(<2 x double> %B)
+	ret double %C
+}
+	
diff --git a/final/test/CodeGen/X86/fp-stack-retcopy.ll b/final/test/CodeGen/X86/fp-stack-retcopy.ll
new file mode 100644
index 00000000000..67dcb1871df
--- /dev/null
+++ b/final/test/CodeGen/X86/fp-stack-retcopy.ll
@@ -0,0 +1,12 @@
+; This should not copy the result of foo into an xmm register.
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin9 | not grep xmm
+; rdar://5689903
+
+declare double @foo()
+
+define double @carg({ double, double }* byval  %z) nounwind  {
+entry:
+	%tmp5 = tail call double @foo() nounwind 		; <double> [#uses=1]
+	ret double %tmp5
+}
+
diff --git a/final/test/CodeGen/X86/fp-stack-set-st1.ll b/final/test/CodeGen/X86/fp-stack-set-st1.ll
new file mode 100644
index 00000000000..894897a2a5f
--- /dev/null
+++ b/final/test/CodeGen/X86/fp-stack-set-st1.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 | grep fxch | count 2
+
+define i32 @main() nounwind {
+entry:
+	%asmtmp = tail call { double, double } asm sideeffect "fmul\09%st(1),%st\0A\09fst\09%st(1)\0A\09frndint\0A\09fxch  %st(1)\0A\09fsub\09%st(1),%st\0A\09f2xm1\0A\09", "={st},={st(1)},0,1,~{dirflag},~{fpsr},~{flags}"(double 0x4030FEFBD582097D, double 4.620000e+01) nounwind		; <{ double, double }> [#uses=0]
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/fp-stack.ll b/final/test/CodeGen/X86/fp-stack.ll
new file mode 100644
index 00000000000..dca644de667
--- /dev/null
+++ b/final/test/CodeGen/X86/fp-stack.ll
@@ -0,0 +1,25 @@
+; RUN: llc %s -o - -mcpu=pentium
+; PR6828
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+define void @foo() nounwind {
+entry:
+  %tmp6 = load x86_fp80* undef                       ; <x86_fp80> [#uses=2]
+  %tmp15 = load x86_fp80* undef                      ; <x86_fp80> [#uses=2]
+  %tmp24 = load x86_fp80* undef                      ; <x86_fp80> [#uses=1]
+  br i1 undef, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %cmp139 = fcmp ogt x86_fp80 %tmp15, %tmp6          ; <i1> [#uses=1]
+  %maxdiag.0 = select i1 %cmp139, x86_fp80 %tmp15, x86_fp80 %tmp6 ; <x86_fp80> [#uses=1]
+  %cmp139.1 = fcmp ogt x86_fp80 %tmp24, %maxdiag.0   ; <i1> [#uses=1]
+  br i1 %cmp139.1, label %sw.bb372, label %return
+
+sw.bb372:                                         ; preds = %for.end
+  ret void
+
+return:                                           ; preds = %for.end
+  ret void
+}
+
diff --git a/final/test/CodeGen/X86/fp2sint.ll b/final/test/CodeGen/X86/fp2sint.ll
new file mode 100644
index 00000000000..16754448871
--- /dev/null
+++ b/final/test/CodeGen/X86/fp2sint.ll
@@ -0,0 +1,18 @@
+;; LowerFP_TO_SINT should not create a stack object if it's not needed.
+
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep add
+
+define i32 @main(i32 %argc, i8** %argv) {
+cond_false.i.i.i:               ; preds = %bb.i5
+       %tmp35.i = load double* null, align 8           ; <double> [#uses=1]
+       %tmp3536.i = fptosi double %tmp35.i to i32              ; <i32> [#uses=1]
+       %tmp3536140.i = zext i32 %tmp3536.i to i64              ; <i64> [#uses=1]
+       %tmp39.i = load double* null, align 4           ; <double> [#uses=1]
+       %tmp3940.i = fptosi double %tmp39.i to i32              ; <i32> [#uses=1]
+       %tmp3940137.i = zext i32 %tmp3940.i to i64              ; <i64> [#uses=1]
+       %tmp3940137138.i = shl i64 %tmp3940137.i, 32            ; <i64> [#uses=1]
+       %tmp3940137138.ins.i = or i64 %tmp3940137138.i, %tmp3536140.i           ; <i64> [#uses=1]
+       %tmp95.i.i = trunc i64 %tmp3940137138.ins.i to i32              ; <i32> [#uses=1]
+       store i32 %tmp95.i.i, i32* null, align 4
+       ret i32 0
+}
diff --git a/final/test/CodeGen/X86/fp_constant_op.ll b/final/test/CodeGen/X86/fp_constant_op.ll
new file mode 100644
index 00000000000..b3ec5388d70
--- /dev/null
+++ b/final/test/CodeGen/X86/fp_constant_op.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=i486 | FileCheck %s
+; Test that the load of the constant is folded into the operation.
+
+
+define double @foo_add(double %P) {
+	%tmp.1 = fadd double %P, 1.230000e+02		; <double> [#uses=1]
+	ret double %tmp.1
+}
+; CHECK: foo_add:
+; CHECK: fadd DWORD PTR
+
+define double @foo_mul(double %P) {
+	%tmp.1 = fmul double %P, 1.230000e+02		; <double> [#uses=1]
+	ret double %tmp.1
+}
+; CHECK: foo_mul:
+; CHECK: fmul DWORD PTR
+
+define double @foo_sub(double %P) {
+	%tmp.1 = fsub double %P, 1.230000e+02		; <double> [#uses=1]
+	ret double %tmp.1
+}
+; CHECK: foo_sub:
+; CHECK: fadd DWORD PTR
+
+define double @foo_subr(double %P) {
+	%tmp.1 = fsub double 1.230000e+02, %P		; <double> [#uses=1]
+	ret double %tmp.1
+}
+; CHECK: foo_subr:
+; CHECK: fsub QWORD PTR
+
+define double @foo_div(double %P) {
+	%tmp.1 = fdiv double %P, 1.230000e+02		; <double> [#uses=1]
+	ret double %tmp.1
+}
+; CHECK: foo_div:
+; CHECK: fdiv DWORD PTR
+
+define double @foo_divr(double %P) {
+	%tmp.1 = fdiv double 1.230000e+02, %P		; <double> [#uses=1]
+	ret double %tmp.1
+}
+; CHECK: foo_divr:
+; CHECK: fdiv QWORD PTR
+
diff --git a/final/test/CodeGen/X86/fp_load_cast_fold.ll b/final/test/CodeGen/X86/fp_load_cast_fold.ll
new file mode 100644
index 00000000000..a160ac69442
--- /dev/null
+++ b/final/test/CodeGen/X86/fp_load_cast_fold.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | grep fild | not grep ESP
+
+define double @short(i16* %P) {
+        %V = load i16* %P               ; <i16> [#uses=1]
+        %V2 = sitofp i16 %V to double           ; <double> [#uses=1]
+        ret double %V2
+}
+
+define double @int(i32* %P) {
+        %V = load i32* %P               ; <i32> [#uses=1]
+        %V2 = sitofp i32 %V to double           ; <double> [#uses=1]
+        ret double %V2
+}
+
+define double @long(i64* %P) {
+        %V = load i64* %P               ; <i64> [#uses=1]
+        %V2 = sitofp i64 %V to double           ; <double> [#uses=1]
+        ret double %V2
+}
+
diff --git a/final/test/CodeGen/X86/fp_load_fold.ll b/final/test/CodeGen/X86/fp_load_fold.ll
new file mode 100644
index 00000000000..0145069b8cd
--- /dev/null
+++ b/final/test/CodeGen/X86/fp_load_fold.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   grep -i ST | not grep {fadd\\|fsub\\|fdiv\\|fmul}
+
+; Test that the load of the memory location is folded into the operation.
+
+define double @test_add(double %X, double* %P) {
+	%Y = load double* %P		; <double> [#uses=1]
+	%R = fadd double %X, %Y		; <double> [#uses=1]
+	ret double %R
+}
+
+define double @test_mul(double %X, double* %P) {
+	%Y = load double* %P		; <double> [#uses=1]
+	%R = fmul double %X, %Y		; <double> [#uses=1]
+	ret double %R
+}
+
+define double @test_sub(double %X, double* %P) {
+	%Y = load double* %P		; <double> [#uses=1]
+	%R = fsub double %X, %Y		; <double> [#uses=1]
+	ret double %R
+}
+
+define double @test_subr(double %X, double* %P) {
+	%Y = load double* %P		; <double> [#uses=1]
+	%R = fsub double %Y, %X		; <double> [#uses=1]
+	ret double %R
+}
+
+define double @test_div(double %X, double* %P) {
+	%Y = load double* %P		; <double> [#uses=1]
+	%R = fdiv double %X, %Y		; <double> [#uses=1]
+	ret double %R
+}
+
+define double @test_divr(double %X, double* %P) {
+	%Y = load double* %P		; <double> [#uses=1]
+	%R = fdiv double %Y, %X		; <double> [#uses=1]
+	ret double %R
+}
diff --git a/final/test/CodeGen/X86/fsxor-alignment.ll b/final/test/CodeGen/X86/fsxor-alignment.ll
new file mode 100644
index 00000000000..6a8dbcfaa7c
--- /dev/null
+++ b/final/test/CodeGen/X86/fsxor-alignment.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -enable-unsafe-fp-math | \
+; RUN:  grep -v sp | grep xorps | count 2
+
+; Don't fold the incoming stack arguments into the xorps instructions used
+; to do floating-point negations, because the arguments aren't vectors
+; and aren't vector-aligned.
+
+define void @foo(float* %p, float* %q, float %s, float %y) {
+  %ss = fsub float -0.0, %s
+  %yy = fsub float -0.0, %y
+  store float %ss, float* %p
+  store float %yy, float* %q
+  ret void
+}
diff --git a/final/test/CodeGen/X86/full-lsr.ll b/final/test/CodeGen/X86/full-lsr.ll
new file mode 100644
index 00000000000..ff9b1b0b6a5
--- /dev/null
+++ b/final/test/CodeGen/X86/full-lsr.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 >%t
+
+; RUN: grep {addl	\\\$4,} %t | count 3
+; RUN: not grep {,%} %t
+
+define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
+entry:
+	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %return
+
+bb:		; preds = %bb, %entry
+	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=5]
+	%1 = getelementptr float* %A, i32 %i.03		; <float*> [#uses=1]
+	%2 = load float* %1, align 4		; <float> [#uses=1]
+	%3 = getelementptr float* %B, i32 %i.03		; <float*> [#uses=1]
+	%4 = load float* %3, align 4		; <float> [#uses=1]
+	%5 = fadd float %2, %4		; <float> [#uses=1]
+	%6 = getelementptr float* %C, i32 %i.03		; <float*> [#uses=1]
+	store float %5, float* %6, align 4
+	%7 = add i32 %i.03, 10		; <i32> [#uses=3]
+	%8 = getelementptr float* %A, i32 %7		; <float*> [#uses=1]
+	%9 = load float* %8, align 4		; <float> [#uses=1]
+	%10 = getelementptr float* %B, i32 %7		; <float*> [#uses=1]
+	%11 = load float* %10, align 4		; <float> [#uses=1]
+	%12 = fadd float %9, %11		; <float> [#uses=1]
+	%13 = getelementptr float* %C, i32 %7		; <float*> [#uses=1]
+	store float %12, float* %13, align 4
+	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/ga-offset.ll b/final/test/CodeGen/X86/ga-offset.ll
new file mode 100644
index 00000000000..9f6d3f75cf8
--- /dev/null
+++ b/final/test/CodeGen/X86/ga-offset.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: not grep lea %t
+; RUN: not grep add %t
+; RUN: grep mov %t | count 1
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static > %t
+; RUN: not grep lea %t
+; RUN: not grep add %t
+; RUN: grep mov %t | count 1
+
+; This store should fold to a single mov instruction.
+
+@ptr = global i32* null
+@dst = global [131072 x i32] zeroinitializer
+
+define void @foo() nounwind {
+  store i32* getelementptr ([131072 x i32]* @dst, i32 0, i32 16), i32** @ptr
+  ret void
+}
diff --git a/final/test/CodeGen/X86/gather-addresses.ll b/final/test/CodeGen/X86/gather-addresses.ll
new file mode 100644
index 00000000000..4a6927f6a26
--- /dev/null
+++ b/final/test/CodeGen/X86/gather-addresses.ll
@@ -0,0 +1,41 @@
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-win32 < %s | FileCheck %s
+; rdar://7398554
+
+; When doing vector gather-scatter index calculation with 32-bit indices,
+; bounce the vector off of cache rather than shuffling each individual
+; element out of the index vector.
+
+; CHECK: andps    ([[H:%rdx|%r8]]), %xmm0
+; CHECK: movaps   %xmm0, {{(-24)?}}(%rsp)
+; CHECK: movslq   {{(-24)?}}(%rsp), %rax
+; CHECK: movsd    ([[P:%rdi|%rcx]],%rax,8), %xmm0
+; CHECK: movslq   {{-20|4}}(%rsp), %rax
+; CHECK: movhpd   ([[P]],%rax,8), %xmm0
+; CHECK: movslq   {{-16|8}}(%rsp), %rax
+; CHECK: movsd    ([[P]],%rax,8), %xmm1
+; CHECK: movslq   {{-12|12}}(%rsp), %rax
+; CHECK: movhpd   ([[P]],%rax,8), %xmm1
+
+define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
+  %a = load <4 x i32>* %i
+  %b = load <4 x i32>* %h
+  %j = and <4 x i32> %a, %b
+  %d0 = extractelement <4 x i32> %j, i32 0
+  %d1 = extractelement <4 x i32> %j, i32 1
+  %d2 = extractelement <4 x i32> %j, i32 2
+  %d3 = extractelement <4 x i32> %j, i32 3
+  %q0 = getelementptr double* %p, i32 %d0
+  %q1 = getelementptr double* %p, i32 %d1
+  %q2 = getelementptr double* %p, i32 %d2
+  %q3 = getelementptr double* %p, i32 %d3
+  %r0 = load double* %q0
+  %r1 = load double* %q1
+  %r2 = load double* %q2
+  %r3 = load double* %q3
+  %v0 = insertelement <4 x double> undef, double %r0, i32 0
+  %v1 = insertelement <4 x double> %v0, double %r1, i32 1
+  %v2 = insertelement <4 x double> %v1, double %r2, i32 2
+  %v3 = insertelement <4 x double> %v2, double %r3, i32 3
+  ret <4 x double> %v3
+}
diff --git a/final/test/CodeGen/X86/ghc-cc.ll b/final/test/CodeGen/X86/ghc-cc.ll
new file mode 100644
index 00000000000..0e65cfdbae3
--- /dev/null
+++ b/final/test/CodeGen/X86/ghc-cc.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -tailcallopt -mtriple=i686-linux-gnu | FileCheck %s
+
+; Test the GHC call convention works (x86-32)
+
+@base = external global i32 ; assigned to register: EBX
+@sp   = external global i32 ; assigned to register: EBP
+@hp   = external global i32 ; assigned to register: EDI
+@r1   = external global i32 ; assigned to register: ESI
+
+define void @zap(i32 %a, i32 %b) nounwind {
+entry:
+  ; CHECK: movl {{[0-9]*}}(%esp), %ebx
+  ; CHECK-NEXT: movl {{[0-9]*}}(%esp), %ebp
+  ; CHECK-NEXT: calll addtwo
+  %0 = call cc 10 i32 @addtwo(i32 %a, i32 %b)
+  ; CHECK: calll foo
+  call void @foo() nounwind
+  ret void
+}
+
+define cc 10 i32 @addtwo(i32 %x, i32 %y) nounwind {
+entry:
+  ; CHECK: leal (%ebx,%ebp), %eax
+  %0 = add i32 %x, %y
+  ; CHECK-NEXT: ret
+  ret i32 %0
+}
+
+define cc 10 void @foo() nounwind {
+entry:
+  ; CHECK: movl base, %ebx
+  ; CHECK-NEXT: movl sp, %ebp
+  ; CHECK-NEXT: movl hp, %edi
+  ; CHECK-NEXT: movl r1, %esi
+  %0 = load i32* @r1
+  %1 = load i32* @hp
+  %2 = load i32* @sp
+  %3 = load i32* @base
+  ; CHECK: jmp bar
+  tail call cc 10 void @bar( i32 %3, i32 %2, i32 %1, i32 %0 ) nounwind
+  ret void
+}
+
+declare cc 10 void @bar(i32, i32, i32, i32)
+
diff --git a/final/test/CodeGen/X86/ghc-cc64.ll b/final/test/CodeGen/X86/ghc-cc64.ll
new file mode 100644
index 00000000000..fcf7e1797ad
--- /dev/null
+++ b/final/test/CodeGen/X86/ghc-cc64.ll
@@ -0,0 +1,86 @@
+; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux-gnu | FileCheck %s
+
+; Check the GHC call convention works (x86-64)
+
+@base  = external global i64 ; assigned to register: R13
+@sp    = external global i64 ; assigned to register: RBP
+@hp    = external global i64 ; assigned to register: R12
+@r1    = external global i64 ; assigned to register: RBX
+@r2    = external global i64 ; assigned to register: R14
+@r3    = external global i64 ; assigned to register: RSI
+@r4    = external global i64 ; assigned to register: RDI
+@r5    = external global i64 ; assigned to register: R8
+@r6    = external global i64 ; assigned to register: R9
+@splim = external global i64 ; assigned to register: R15
+
+@f1 = external global float  ; assigned to register: XMM1
+@f2 = external global float  ; assigned to register: XMM2
+@f3 = external global float  ; assigned to register: XMM3
+@f4 = external global float  ; assigned to register: XMM4
+@d1 = external global double ; assigned to register: XMM5
+@d2 = external global double ; assigned to register: XMM6
+
+define void @zap(i64 %a, i64 %b) nounwind {
+entry:
+  ; CHECK:      movq %rdi, %r13
+  ; CHECK-NEXT: movq %rsi, %rbp
+  ; CHECK-NEXT: callq addtwo
+  %0 = call cc 10 i64 @addtwo(i64 %a, i64 %b)
+  ; CHECK:      callq foo
+  call void @foo() nounwind
+  ret void
+}
+
+define cc 10 i64 @addtwo(i64 %x, i64 %y) nounwind {
+entry:
+  ; CHECK:      leaq (%r13,%rbp), %rax
+  %0 = add i64 %x, %y
+  ; CHECK-NEXT: ret
+  ret i64 %0
+}
+
+define cc 10 void @foo() nounwind {
+entry:
+  ; CHECK: movq base(%rip), %r13
+  ; CHECK-NEXT: movq sp(%rip), %rbp
+  ; CHECK-NEXT: movq hp(%rip), %r12
+  ; CHECK-NEXT: movq r1(%rip), %rbx
+  ; CHECK-NEXT: movq r2(%rip), %r14
+  ; CHECK-NEXT: movq r3(%rip), %rsi
+  ; CHECK-NEXT: movq r4(%rip), %rdi
+  ; CHECK-NEXT: movq r5(%rip), %r8
+  ; CHECK-NEXT: movq r6(%rip), %r9
+  ; CHECK-NEXT: movq splim(%rip), %r15
+  ; CHECK-NEXT: movss f1(%rip), %xmm1
+  ; CHECK-NEXT: movss f2(%rip), %xmm2
+  ; CHECK-NEXT: movss f3(%rip), %xmm3
+  ; CHECK-NEXT: movss f4(%rip), %xmm4
+  ; CHECK-NEXT: movsd d1(%rip), %xmm5
+  ; CHECK-NEXT: movsd d2(%rip), %xmm6
+  %0 = load double* @d2
+  %1 = load double* @d1
+  %2 = load float* @f4
+  %3 = load float* @f3
+  %4 = load float* @f2
+  %5 = load float* @f1
+  %6 = load i64* @splim
+  %7 = load i64* @r6
+  %8 = load i64* @r5
+  %9 = load i64* @r4
+  %10 = load i64* @r3
+  %11 = load i64* @r2
+  %12 = load i64* @r1
+  %13 = load i64* @hp
+  %14 = load i64* @sp
+  %15 = load i64* @base
+  ; CHECK: jmp bar
+  tail call cc 10 void @bar( i64 %15, i64 %14, i64 %13, i64 %12, i64 %11,
+                             i64 %10, i64 %9, i64 %8, i64 %7, i64 %6,
+                             float %5, float %4, float %3, float %2, double %1,
+                             double %0 ) nounwind
+  ret void
+}
+
+declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64,
+                        float, float, float, float, double, double)
+
diff --git a/final/test/CodeGen/X86/global-sections-tls.ll b/final/test/CodeGen/X86/global-sections-tls.ll
new file mode 100644
index 00000000000..d5409a579b2
--- /dev/null
+++ b/final/test/CodeGen/X86/global-sections-tls.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+
+; PR4639
+@G1 = internal thread_local global i32 0		; <i32*> [#uses=1]
+; LINUX: .section	.tbss,"awT",@nobits
+; LINUX: G1:
+
+
+define i32* @foo() nounwind readnone {
+entry:
+	ret i32* @G1
+}
+
+
diff --git a/final/test/CodeGen/X86/global-sections.ll b/final/test/CodeGen/X86/global-sections.ll
new file mode 100644
index 00000000000..d0a1b4d281f
--- /dev/null
+++ b/final/test/CodeGen/X86/global-sections.ll
@@ -0,0 +1,160 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=i386-apple-darwin9.7 | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -fdata-sections | FileCheck %s -check-prefix=LINUX-SECTIONS
+
+
+; int G1;
+@G1 = common global i32 0
+
+; LINUX: .type   G1,@object
+; LINUX: .comm  G1,4,4
+
+; DARWIN: .comm	_G1,4,2
+
+
+
+
+; const int G2 __attribute__((weak)) = 42;
+@G2 = weak_odr unnamed_addr constant i32 42	
+
+
+; TODO: linux drops this into .rodata, we drop it into ".gnu.linkonce.r.G2"
+
+; DARWIN: .section __TEXT,__const_coal,coalesced
+; DARWIN: _G2:
+; DARWIN:    .long 42
+
+
+; int * const G3 = &G1;
+@G3 = unnamed_addr constant i32* @G1
+
+; DARWIN: .section        __DATA,__const
+; DARWIN: .globl _G3
+; DARWIN: _G3:
+; DARWIN:     .long _G1
+
+; LINUX:   .section        .rodata,"a",@progbits
+; LINUX:   .globl  G3
+
+; LINUX-SECTIONS: .section        .rodata.G3,"a",@progbits
+; LINUX-SECTIONS: .globl  G3
+
+
+; _Complex long long const G4 = 34;
+@G4 = unnamed_addr constant {i64,i64} { i64 34, i64 0 }
+
+; DARWIN: .section        __TEXT,__const
+; DARWIN: _G4:
+; DARWIN:     .long 34
+
+
+; int G5 = 47;
+@G5 = global i32 47
+
+; LINUX: .data
+; LINUX: .globl G5
+; LINUX: G5:
+; LINUX:    .long 47
+
+; DARWIN: .section        __DATA,__data
+; DARWIN: .globl _G5
+; DARWIN: _G5:
+; DARWIN:    .long 47
+
+
+; PR4584
+@"foo bar" = linkonce global i32 42
+
+; LINUX: .type	foo_20_bar,@object
+; LINUX: .section .data.foo_20_bar,"aGw",@progbits,foo_20_bar,comdat
+; LINUX: .weak	foo_20_bar
+; LINUX: foo_20_bar:
+
+; DARWIN: .section		__DATA,__datacoal_nt,coalesced
+; DARWIN: .globl	"_foo bar"
+; DARWIN:	.weak_definition "_foo bar"
+; DARWIN: "_foo bar":
+
+; PR4650
+@G6 = weak_odr unnamed_addr constant [1 x i8] c"\01"
+
+; LINUX:   .type	G6,@object
+; LINUX:   .section	.rodata.G6,"aG",@progbits,G6,comdat
+; LINUX:   .weak	G6
+; LINUX: G6:
+; LINUX:   .byte	1
+; LINUX:   .size	G6, 1
+
+; DARWIN:  .section __TEXT,__const_coal,coalesced
+; DARWIN:  .globl _G6
+; DARWIN:  .weak_definition _G6
+; DARWIN:_G6:
+; DARWIN:  .byte 1
+
+
+@G7 = unnamed_addr constant [10 x i8] c"abcdefghi\00"
+
+; DARWIN:	__TEXT,__cstring,cstring_literals
+; DARWIN:	.globl _G7
+; DARWIN: _G7:
+; DARWIN:	.asciz	"abcdefghi"
+
+; LINUX:	.section	.rodata.str1.1,"aMS",@progbits,1
+; LINUX:	.globl G7
+; LINUX: G7:
+; LINUX:	.asciz	"abcdefghi"
+
+; LINUX-SECTIONS: .section        .rodata.G7,"aMS",@progbits,1
+; LINUX-SECTIONS:	.globl G7
+
+
+@G8 = unnamed_addr constant [4 x i16] [ i16 1, i16 2, i16 3, i16 0 ]
+
+; DARWIN:	.section	__TEXT,__const
+; DARWIN:	.globl _G8
+; DARWIN: _G8:
+
+; LINUX:	.section	.rodata.str2.2,"aMS",@progbits,2
+; LINUX:	.globl G8
+; LINUX:G8:
+
+@G9 = unnamed_addr constant [4 x i32] [ i32 1, i32 2, i32 3, i32 0 ]
+
+; DARWIN:	.globl _G9
+; DARWIN: _G9:
+
+; LINUX:	.section	.rodata.str4.4,"aMS",@progbits,4
+; LINUX:	.globl G9
+; LINUX:G9
+
+
+@G10 = weak global [100 x i32] zeroinitializer, align 32 ; <[100 x i32]*> [#uses=0]
+
+
+; DARWIN: 	.section	__DATA,__datacoal_nt,coalesced
+; DARWIN: .globl _G10
+; DARWIN:	.weak_definition _G10
+; DARWIN:	.align	5
+; DARWIN: _G10:
+; DARWIN:	.space	400
+
+; LINUX:	.bss
+; LINUX:	.weak	G10
+; LINUX:	.align	32
+; LINUX: G10:
+; LINUX:	.zero	400
+
+
+
+;; Zero sized objects should round up to 1 byte in zerofill directives.
+; rdar://7886017
+@G11 = global [0 x i32] zeroinitializer
+@G12 = global {} zeroinitializer
+@G13 = global { [0 x {}] } zeroinitializer
+
+; DARWIN: .globl _G11
+; DARWIN: .zerofill __DATA,__common,_G11,1,2
+; DARWIN: .globl _G12
+; DARWIN: .zerofill __DATA,__common,_G12,1,3
+; DARWIN: .globl _G13
+; DARWIN: .zerofill __DATA,__common,_G13,1,3
diff --git a/final/test/CodeGen/X86/h-register-addressing-32.ll b/final/test/CodeGen/X86/h-register-addressing-32.ll
new file mode 100644
index 00000000000..76ffd66524b
--- /dev/null
+++ b/final/test/CodeGen/X86/h-register-addressing-32.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=x86 | grep {movzbl	%\[abcd\]h,} | count 7
+
+; Use h-register extract and zero-extend.
+
+define double @foo8(double* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  %t2 = getelementptr double* %p, i32 %t1
+  %t3 = load double* %t2, align 8
+  ret double %t3
+}
+define float @foo4(float* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  %t2 = getelementptr float* %p, i32 %t1
+  %t3 = load float* %t2, align 8
+  ret float %t3
+}
+define i16 @foo2(i16* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  %t2 = getelementptr i16* %p, i32 %t1
+  %t3 = load i16* %t2, align 8
+  ret i16 %t3
+}
+define i8 @foo1(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  %t2 = getelementptr i8* %p, i32 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar8(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 5
+  %t1 = and i32 %t0, 2040
+  %t2 = getelementptr i8* %p, i32 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar4(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 6
+  %t1 = and i32 %t0, 1020
+  %t2 = getelementptr i8* %p, i32 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar2(i8* nocapture inreg %p, i32 inreg %x) nounwind readonly {
+  %t0 = lshr i32 %x, 7
+  %t1 = and i32 %t0, 510
+  %t2 = getelementptr i8* %p, i32 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
diff --git a/final/test/CodeGen/X86/h-register-addressing-64.ll b/final/test/CodeGen/X86/h-register-addressing-64.ll
new file mode 100644
index 00000000000..98817f3fb59
--- /dev/null
+++ b/final/test/CodeGen/X86/h-register-addressing-64.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -march=x86-64 | grep {movzbl	%\[abcd\]h,} | count 7
+
+; Use h-register extract and zero-extend.
+
+define double @foo8(double* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  %t2 = getelementptr double* %p, i64 %t1
+  %t3 = load double* %t2, align 8
+  ret double %t3
+}
+define float @foo4(float* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  %t2 = getelementptr float* %p, i64 %t1
+  %t3 = load float* %t2, align 8
+  ret float %t3
+}
+define i16 @foo2(i16* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  %t2 = getelementptr i16* %p, i64 %t1
+  %t3 = load i16* %t2, align 8
+  ret i16 %t3
+}
+define i8 @foo1(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  %t2 = getelementptr i8* %p, i64 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar8(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 5
+  %t1 = and i64 %t0, 2040
+  %t2 = getelementptr i8* %p, i64 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar4(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 6
+  %t1 = and i64 %t0, 1020
+  %t2 = getelementptr i8* %p, i64 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
+define i8 @bar2(i8* nocapture inreg %p, i64 inreg %x) nounwind readonly {
+  %t0 = lshr i64 %x, 7
+  %t1 = and i64 %t0, 510
+  %t2 = getelementptr i8* %p, i64 %t1
+  %t3 = load i8* %t2, align 8
+  ret i8 %t3
+}
diff --git a/final/test/CodeGen/X86/h-register-store.ll b/final/test/CodeGen/X86/h-register-store.ll
new file mode 100644
index 00000000000..0adb2b148c3
--- /dev/null
+++ b/final/test/CodeGen/X86/h-register-store.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; X64:      mov
+; X64-NEXT: movb %ah, (%rsi)
+; X64:      mov
+; X64-NEXT: movb %ah, (%rsi)
+; X64:      mov
+; X64-NEXT: movb %ah, (%rsi)
+; X64-NOT:      mov
+
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
+; W64-NOT:      mov
+; W64:      movb %ch, (%rdx)
+; W64-NOT:      mov
+; W64:      movb %ch, (%rdx)
+; W64-NOT:      mov
+; W64:      movb %ch, (%rdx)
+; W64-NOT:      mov
+
+; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
+; X32-NOT:      mov
+; X32:      movb %ah, (%e
+; X32-NOT:      mov
+; X32:      movb %ah, (%e
+; X32-NOT:      mov
+; X32:      movb %ah, (%e
+; X32-NOT:      mov
+
+; Use h-register extract and store.
+
+define void @foo16(i16 inreg %p, i8* inreg %z) nounwind {
+  %q = lshr i16 %p, 8
+  %t = trunc i16 %q to i8
+  store i8 %t, i8* %z
+  ret void
+}
+define void @foo32(i32 inreg %p, i8* inreg %z) nounwind {
+  %q = lshr i32 %p, 8
+  %t = trunc i32 %q to i8
+  store i8 %t, i8* %z
+  ret void
+}
+define void @foo64(i64 inreg %p, i8* inreg %z) nounwind {
+  %q = lshr i64 %p, 8
+  %t = trunc i64 %q to i8
+  store i8 %t, i8* %z
+  ret void
+}
diff --git a/final/test/CodeGen/X86/h-registers-0.ll b/final/test/CodeGen/X86/h-registers-0.ll
new file mode 100644
index 00000000000..cdc75af92e4
--- /dev/null
+++ b/final/test/CodeGen/X86/h-registers-0.ll
@@ -0,0 +1,106 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X86-64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=X86-32
+
+; Use h registers. On x86-64, codegen doesn't support general allocation
+; of h registers yet, due to x86 encoding complications.
+
+define void @bar64(i64 inreg %x, i8* inreg %p) nounwind {
+; X86-64: bar64:
+; X86-64: shrq $8, %rdi
+; X86-64: incb %dil
+
+; See FIXME: on regclass GR8.
+; It could be optimally transformed like; incb %ch; movb %ch, (%rdx)
+; WIN64:  bar64:
+; WIN64:  shrq $8, %rcx
+; WIN64:  incb %cl
+
+; X86-32: bar64:
+; X86-32: incb %ah
+  %t0 = lshr i64 %x, 8
+  %t1 = trunc i64 %t0 to i8
+  %t2 = add i8 %t1, 1
+  store i8 %t2, i8* %p
+  ret void
+}
+
+define void @bar32(i32 inreg %x, i8* inreg %p) nounwind {
+; X86-64: bar32:
+; X86-64: shrl $8, %edi
+; X86-64: incb %dil
+
+; WIN64:  bar32:
+; WIN64:  shrl $8, %ecx
+; WIN64:  incb %cl
+
+; X86-32: bar32:
+; X86-32: incb %ah
+  %t0 = lshr i32 %x, 8
+  %t1 = trunc i32 %t0 to i8
+  %t2 = add i8 %t1, 1
+  store i8 %t2, i8* %p
+  ret void
+}
+
+define void @bar16(i16 inreg %x, i8* inreg %p) nounwind {
+; X86-64: bar16:
+; X86-64: shrl $8, %edi
+; X86-64: incb %dil
+
+; WIN64:  bar16:
+; WIN64:  shrl $8, %ecx
+; WIN64:  incb %cl
+
+; X86-32: bar16:
+; X86-32: incb %ah
+  %t0 = lshr i16 %x, 8
+  %t1 = trunc i16 %t0 to i8
+  %t2 = add i8 %t1, 1
+  store i8 %t2, i8* %p
+  ret void
+}
+
+define i64 @qux64(i64 inreg %x) nounwind {
+; X86-64: qux64:
+; X86-64: movq %rdi, %rax
+; X86-64: movzbl %ah, %eax
+
+; WIN64:  qux64:
+; WIN64:  movzbl %ch, %eax
+
+; X86-32: qux64:
+; X86-32: movzbl %ah, %eax
+  %t0 = lshr i64 %x, 8
+  %t1 = and i64 %t0, 255
+  ret i64 %t1
+}
+
+define i32 @qux32(i32 inreg %x) nounwind {
+; X86-64: qux32:
+; X86-64: movl %edi, %eax
+; X86-64: movzbl %ah, %eax
+
+; WIN64:  qux32:
+; WIN64:  movzbl %ch, %eax
+
+; X86-32: qux32:
+; X86-32: movzbl %ah, %eax
+  %t0 = lshr i32 %x, 8
+  %t1 = and i32 %t0, 255
+  ret i32 %t1
+}
+
+define i16 @qux16(i16 inreg %x) nounwind {
+; X86-64: qux16:
+; X86-64: movl %edi, %eax
+; X86-64: movzbl %ah, %eax
+
+; WIN64:  qux16:
+; WIN64:  movzbl %ch, %eax
+
+; X86-32: qux16:
+; X86-32: movzbl %ah, %eax
+  %t0 = lshr i16 %x, 8
+  ret i16 %t0
+}
diff --git a/final/test/CodeGen/X86/h-registers-1.ll b/final/test/CodeGen/X86/h-registers-1.ll
new file mode 100644
index 00000000000..402cdfe413b
--- /dev/null
+++ b/final/test/CodeGen/X86/h-registers-1.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -mtriple=x86_64-linux > %t
+; RUN: grep {movzbl	%\[abcd\]h,} %t | count 8
+; RUN: grep {%\[abcd\]h} %t | not grep {%r\[\[:digit:\]\]*d}
+
+; LLVM creates virtual registers for values live across blocks
+; based on the type of the value. Make sure that the extracts
+; here use the GR64_NOREX register class for their result,
+; instead of plain GR64.
+
+define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d,
+                i64 %e, i64 %f, i64 %g, i64 %h) {
+  %sa = lshr i64 %a, 8
+  %A = and i64 %sa, 255
+  %sb = lshr i64 %b, 8
+  %B = and i64 %sb, 255
+  %sc = lshr i64 %c, 8
+  %C = and i64 %sc, 255
+  %sd = lshr i64 %d, 8
+  %D = and i64 %sd, 255
+  %se = lshr i64 %e, 8
+  %E = and i64 %se, 255
+  %sf = lshr i64 %f, 8
+  %F = and i64 %sf, 255
+  %sg = lshr i64 %g, 8
+  %G = and i64 %sg, 255
+  %sh = lshr i64 %h, 8
+  %H = and i64 %sh, 255
+  br label %next
+
+next:
+  %u = add i64 %A, %B
+  %v = add i64 %C, %D
+  %w = add i64 %E, %F
+  %x = add i64 %G, %H
+  %y = add i64 %u, %v
+  %z = add i64 %w, %x
+  %t = add i64 %y, %z
+  ret i64 %t
+}
diff --git a/final/test/CodeGen/X86/h-registers-2.ll b/final/test/CodeGen/X86/h-registers-2.ll
new file mode 100644
index 00000000000..16e13f83966
--- /dev/null
+++ b/final/test/CodeGen/X86/h-registers-2.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep {movzbl	%\[abcd\]h,} %t | count 1
+; RUN: grep {shll	\$3,} %t | count 1
+
+; Use an h register, but don't omit the explicit shift for
+; non-address use(s).
+
+define i32 @foo(i8* %x, i32 %y) nounwind {
+	%t0 = lshr i32 %y, 8		; <i32> [#uses=1]
+	%t1 = and i32 %t0, 255		; <i32> [#uses=2]
+        %t2 = shl i32 %t1, 3
+	%t3 = getelementptr i8* %x, i32 %t2		; <i8*> [#uses=1]
+	store i8 77, i8* %t3, align 4
+	ret i32 %t2
+}
diff --git a/final/test/CodeGen/X86/h-registers-3.ll b/final/test/CodeGen/X86/h-registers-3.ll
new file mode 100644
index 00000000000..8a0b07b31c2
--- /dev/null
+++ b/final/test/CodeGen/X86/h-registers-3.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86    | grep mov | count 1
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
+
+define zeroext i8 @foo() nounwind ssp {
+entry:
+  %0 = tail call zeroext i16 (...)* @bar() nounwind
+  %1 = lshr i16 %0, 8
+  %2 = trunc i16 %1 to i8
+  ret i8 %2
+}
+
+declare zeroext i16 @bar(...)
diff --git a/final/test/CodeGen/X86/hidden-vis-2.ll b/final/test/CodeGen/X86/hidden-vis-2.ll
new file mode 100644
index 00000000000..74554d15e2f
--- /dev/null
+++ b/final/test/CodeGen/X86/hidden-vis-2.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9   | grep mov | count 1
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | not grep GOT
+
+@x = weak hidden global i32 0		; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
diff --git a/final/test/CodeGen/X86/hidden-vis-3.ll b/final/test/CodeGen/X86/hidden-vis-3.ll
new file mode 100644
index 00000000000..4be881e84d6
--- /dev/null
+++ b/final/test/CodeGen/X86/hidden-vis-3.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9   | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | FileCheck %s -check-prefix=X64
+
+@x = external hidden global i32		; <i32*> [#uses=1]
+@y = extern_weak hidden global i32	; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+; X32: _t:
+; X32: movl _y, %eax
+
+; X64: _t:
+; X64: movl _y(%rip), %eax
+
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	%1 = load i32* @y, align 4		; <i32> [#uses=1]
+	%2 = add i32 %1, %0		; <i32> [#uses=1]
+	ret i32 %2
+}
diff --git a/final/test/CodeGen/X86/hidden-vis-4.ll b/final/test/CodeGen/X86/hidden-vis-4.ll
new file mode 100644
index 00000000000..a8aede52acc
--- /dev/null
+++ b/final/test/CodeGen/X86/hidden-vis-4.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s
+
+@x = common hidden global i32 0		; <i32*> [#uses=1]
+
+define i32 @t() nounwind readonly {
+entry:
+; CHECK: t:
+; CHECK: movl _x, %eax
+; CHECK: .comm _x,4
+	%0 = load i32* @x, align 4		; <i32> [#uses=1]
+	ret i32 %0
+}
diff --git a/final/test/CodeGen/X86/hidden-vis-pic.ll b/final/test/CodeGen/X86/hidden-vis-pic.ll
new file mode 100644
index 00000000000..ba130a2c1c8
--- /dev/null
+++ b/final/test/CodeGen/X86/hidden-vis-pic.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -relocation-model=pic -disable-fp-elim -unwind-tables | FileCheck %s
+
+
+
+; PR7353 PR7334 rdar://8072315 rdar://8018308
+
+define available_externally hidden 
+void @_ZNSbIcED1Ev() nounwind readnone ssp align 2 {
+entry:
+  ret void
+}
+
+define void()* @test1() nounwind {
+entry:
+  ret void()* @_ZNSbIcED1Ev
+}
+
+; This must use movl of the stub, not an lea, since the function isn't being
+; emitted here.
+; CHECK: movl L__ZNSbIcED1Ev$non_lazy_ptr-L1$pb(
+
+
+
+
+; <rdar://problem/7383328>
+
+@.str = private constant [12 x i8] c"hello world\00", align 1 ; <[12 x i8]*> [#uses=1]
+
+define hidden void @func() nounwind ssp {
+entry:
+  %0 = call i32 @puts(i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare i32 @puts(i8*)
+
+define hidden i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @func() nounwind
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval                    ; <i32> [#uses=1]
+  ret i32 %retval1
+}
+
+; CHECK: .private_extern _func.eh
+; CHECK: .private_extern _main.eh
+
+
diff --git a/final/test/CodeGen/X86/hidden-vis.ll b/final/test/CodeGen/X86/hidden-vis.ll
new file mode 100644
index 00000000000..a948bdfe687
--- /dev/null
+++ b/final/test/CodeGen/X86/hidden-vis.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=i686-apple-darwin8 | FileCheck %s -check-prefix=DARWIN
+
+@a = hidden global i32 0
+@b = external global i32
+
+define weak hidden void @t1() nounwind {
+; LINUX: .hidden t1
+; LINUX: t1:
+
+; DARWIN: .private_extern _t1
+; DARWIN: t1:
+  ret void
+}
+
+define weak void @t2() nounwind {
+; LINUX: t2:
+; LINUX: .hidden a
+
+; DARWIN: t2:
+; DARWIN: .private_extern _a
+  ret void
+}
+
diff --git a/final/test/CodeGen/X86/i128-and-beyond.ll b/final/test/CodeGen/X86/i128-and-beyond.ll
new file mode 100644
index 00000000000..b741681ac17
--- /dev/null
+++ b/final/test/CodeGen/X86/i128-and-beyond.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep -- -1 | count 14
+
+; These static initializers are too big to hand off to assemblers
+; as monolithic blobs.
+
+@x = global i128 -1
+@y = global i256 -1
+@z = global i512 -1
diff --git a/final/test/CodeGen/X86/i128-immediate.ll b/final/test/CodeGen/X86/i128-immediate.ll
new file mode 100644
index 00000000000..c47569e700f
--- /dev/null
+++ b/final/test/CodeGen/X86/i128-immediate.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86-64 | grep movq | count 2
+
+define i128 @__addvti3() {
+          ret i128 -1
+}
diff --git a/final/test/CodeGen/X86/i128-mul.ll b/final/test/CodeGen/X86/i128-mul.ll
new file mode 100644
index 00000000000..e9d30d67019
--- /dev/null
+++ b/final/test/CodeGen/X86/i128-mul.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64
+; PR1198
+
+define i64 @foo(i64 %x, i64 %y) {
+        %tmp0 = zext i64 %x to i128
+        %tmp1 = zext i64 %y to i128
+        %tmp2 = mul i128 %tmp0, %tmp1
+        %tmp7 = zext i32 64 to i128
+        %tmp3 = lshr i128 %tmp2, %tmp7
+        %tmp4 = trunc i128 %tmp3 to i64
+        ret i64 %tmp4
+}
diff --git a/final/test/CodeGen/X86/i128-ret.ll b/final/test/CodeGen/X86/i128-ret.ll
new file mode 100644
index 00000000000..264f07ceb4c
--- /dev/null
+++ b/final/test/CodeGen/X86/i128-ret.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK: movq ([[A0:%rdi|%rcx]]), %rax
+; CHECK: movq 8([[A0]]), %rdx
+
+define i128 @test(i128 *%P) {
+        %A = load i128* %P
+        ret i128 %A
+}
+
diff --git a/final/test/CodeGen/X86/i256-add.ll b/final/test/CodeGen/X86/i256-add.ll
new file mode 100644
index 00000000000..5a7a7a7fe84
--- /dev/null
+++ b/final/test/CodeGen/X86/i256-add.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep adcl %t | count 7
+; RUN: grep sbbl %t | count 7
+
+define void @add(i256* %p, i256* %q) nounwind {
+  %a = load i256* %p
+  %b = load i256* %q
+  %c = add i256 %a, %b
+  store i256 %c, i256* %p
+  ret void
+}
+define void @sub(i256* %p, i256* %q) nounwind {
+  %a = load i256* %p
+  %b = load i256* %q
+  %c = sub i256 %a, %b
+  store i256 %c, i256* %p
+  ret void
+}
diff --git a/final/test/CodeGen/X86/i2k.ll b/final/test/CodeGen/X86/i2k.ll
new file mode 100644
index 00000000000..6116c2e7165
--- /dev/null
+++ b/final/test/CodeGen/X86/i2k.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86
+
+define void @foo(i2011* %x, i2011* %y, i2011* %p) nounwind {
+  %a = load i2011* %x
+  %b = load i2011* %y
+  %c = add i2011 %a, %b
+  store i2011 %c, i2011* %p
+  ret void
+}
diff --git a/final/test/CodeGen/X86/i64-mem-copy.ll b/final/test/CodeGen/X86/i64-mem-copy.ll
new file mode 100644
index 00000000000..dce12ae1248
--- /dev/null
+++ b/final/test/CodeGen/X86/i64-mem-copy.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=x86_64-linux   | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32   | FileCheck %s -check-prefix=X64
+; X64: movq ({{%rsi|%rdx}}), %r
+
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s -check-prefix=X32
+; X32: movsd (%eax), %xmm
+
+; Uses movsd to load / store i64 values if sse2 is available.
+
+; rdar://6659858
+
+define void @foo(i64* %x, i64* %y) nounwind  {
+entry:
+	%tmp1 = load i64* %y, align 8		; <i64> [#uses=1]
+	store i64 %tmp1, i64* %x, align 8
+	ret void
+}
diff --git a/final/test/CodeGen/X86/iabs.ll b/final/test/CodeGen/X86/iabs.ll
new file mode 100644
index 00000000000..a8ba0155fd1
--- /dev/null
+++ b/final/test/CodeGen/X86/iabs.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -stats  |& \
+; RUN:   grep {5 .*Number of machine instrs printed}
+
+;; Integer absolute value, should produce something at least as good as:
+;;       movl %edi, %ecx
+;;       sarl $31, %ecx
+;;       leal (%rdi,%rcx), %eax
+;;       xorl %ecx, %eax
+;;       ret
+define i32 @test(i32 %a) nounwind {
+        %tmp1neg = sub i32 0, %a
+        %b = icmp sgt i32 %a, -1
+        %abs = select i1 %b, i32 %a, i32 %tmp1neg
+        ret i32 %abs
+}
+
diff --git a/final/test/CodeGen/X86/illegal-insert.ll b/final/test/CodeGen/X86/illegal-insert.ll
new file mode 100644
index 00000000000..dbf1b14684c
--- /dev/null
+++ b/final/test/CodeGen/X86/illegal-insert.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64
+
+define <4 x double> @foo0(<4 x double> %t) {
+  %r = insertelement <4 x double> %t, double 2.3, i32 0
+  ret <4 x double> %r
+}
+define <4 x double> @foo1(<4 x double> %t) {
+  %r = insertelement <4 x double> %t, double 2.3, i32 1
+  ret <4 x double> %r
+}
+define <4 x double> @foo2(<4 x double> %t) {
+  %r = insertelement <4 x double> %t, double 2.3, i32 2
+  ret <4 x double> %r
+}
+define <4 x double> @foo3(<4 x double> %t) {
+  %r = insertelement <4 x double> %t, double 2.3, i32 3
+  ret <4 x double> %r
+}
diff --git a/final/test/CodeGen/X86/illegal-vector-args-return.ll b/final/test/CodeGen/X86/illegal-vector-args-return.ll
new file mode 100644
index 00000000000..cecf77af4de
--- /dev/null
+++ b/final/test/CodeGen/X86/illegal-vector-args-return.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {mulpd	%xmm3, %xmm1}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {mulpd	%xmm2, %xmm0}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {addps	%xmm3, %xmm1}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {addps	%xmm2, %xmm0}
+
+define <4 x double> @foo(<4 x double> %x, <4 x double> %z) {
+  %y = fmul <4 x double> %x, %z
+  ret <4 x double> %y
+}
+
+define <8 x float> @bar(<8 x float> %x, <8 x float> %z) {
+  %y = fadd <8 x float> %x, %z
+  ret <8 x float> %y
+}
diff --git a/final/test/CodeGen/X86/imul-lea-2.ll b/final/test/CodeGen/X86/imul-lea-2.ll
new file mode 100644
index 00000000000..1cb54b37b0e
--- /dev/null
+++ b/final/test/CodeGen/X86/imul-lea-2.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | grep lea | count 3
+; RUN: llc < %s -march=x86-64 | grep shl | count 1
+; RUN: llc < %s -march=x86-64 | not grep imul
+
+define i64 @t1(i64 %a) nounwind readnone {
+entry:
+	%0 = mul i64 %a, 81		; <i64> [#uses=1]
+	ret i64 %0
+}
+
+define i64 @t2(i64 %a) nounwind readnone {
+entry:
+	%0 = mul i64 %a, 40		; <i64> [#uses=1]
+	ret i64 %0
+}
diff --git a/final/test/CodeGen/X86/imul-lea.ll b/final/test/CodeGen/X86/imul-lea.ll
new file mode 100644
index 00000000000..4e8e2af0f2f
--- /dev/null
+++ b/final/test/CodeGen/X86/imul-lea.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | grep lea
+
+declare i32 @foo()
+
+define i32 @test() {
+        %tmp.0 = tail call i32 @foo( )          ; <i32> [#uses=1]
+        %tmp.1 = mul i32 %tmp.0, 9              ; <i32> [#uses=1]
+        ret i32 %tmp.1
+}
+
diff --git a/final/test/CodeGen/X86/inline-asm-2addr.ll b/final/test/CodeGen/X86/inline-asm-2addr.ll
new file mode 100644
index 00000000000..4a2c7fc5eba
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-2addr.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 | not grep movq
+
+define i64 @t(i64 %a, i64 %b) nounwind ssp {
+entry:
+	%asmtmp = tail call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i64 %a) nounwind		; <i64> [#uses=1]
+	%asmtmp1 = tail call i64 asm "rorq $1,$0", "=r,J,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i64 %b) nounwind		; <i64> [#uses=1]
+	%0 = add i64 %asmtmp1, %asmtmp		; <i64> [#uses=1]
+	ret i64 %0
+}
diff --git a/final/test/CodeGen/X86/inline-asm-R-constraint.ll b/final/test/CodeGen/X86/inline-asm-R-constraint.ll
new file mode 100644
index 00000000000..66c27ac8771
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-R-constraint.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+; 7282062
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+define void @udiv8(i8* %quotient, i16 zeroext %a, i8 zeroext %b, i8 zeroext %c, i8* %remainder) nounwind ssp {
+entry:
+; CHECK: udiv8:
+; CHECK-NOT: movb %ah, (%r8)
+  %a_addr = alloca i16, align 2                   ; <i16*> [#uses=2]
+  %b_addr = alloca i8, align 1                    ; <i8*> [#uses=2]
+  store i16 %a, i16* %a_addr
+  store i8 %b, i8* %b_addr
+  call void asm "\09\09movw\09$2, %ax\09\09\0A\09\09divb\09$3\09\09\09\0A\09\09movb\09%al, $0\09\0A\09\09movb %ah, ($4)", "=*m,=*m,*m,*m,R,~{dirflag},~{fpsr},~{flags},~{ax}"(i8* %quotient, i8* %remainder, i16* %a_addr, i8* %b_addr, i8* %remainder) nounwind
+  ret void
+; CHECK: ret
+}
diff --git a/final/test/CodeGen/X86/inline-asm-flag-clobber.ll b/final/test/CodeGen/X86/inline-asm-flag-clobber.ll
new file mode 100644
index 00000000000..51ea843712d
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-flag-clobber.ll
@@ -0,0 +1,19 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+; PR3701
+
+define i64 @t(i64* %arg) nounwind {
+	br i1 true, label %1, label %5
+
+; <label>:1		; preds = %0
+	%2 = icmp eq i64* null, %arg		; <i1> [#uses=1]
+	%3 = tail call i64* asm sideeffect "movl %fs:0,$0", "=r,~{dirflag},~{fpsr},~{flags}"() nounwind		; <%struct.thread*> [#uses=0]
+; CHECK: test
+; CHECK-NEXT: j
+	br i1 %2, label %4, label %5
+
+; <label>:4		; preds = %1
+	ret i64 1
+
+; <label>:5		; preds = %1
+	ret i64 0
+}
diff --git a/final/test/CodeGen/X86/inline-asm-fpstack.ll b/final/test/CodeGen/X86/inline-asm-fpstack.ll
new file mode 100644
index 00000000000..6348fcaf7a0
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -0,0 +1,88 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+
+; There should be no stack manipulations between the inline asm and ret.
+; CHECK: test1
+; CHECK: InlineAsm End
+; CHECK-NEXT: ret
+define x86_fp80 @test1() {
+        %tmp85 = call x86_fp80 asm sideeffect "fld0", "={st(0)}"()
+        ret x86_fp80 %tmp85
+}
+
+; CHECK: test2
+; CHECK: InlineAsm End
+; CHECK-NEXT: ret
+define double @test2() {
+        %tmp85 = call double asm sideeffect "fld0", "={st(0)}"()
+        ret double %tmp85
+}
+
+; Setting up argument in st(0) should be a single fld.
+; CHECK: test3
+; CHECK: fld
+; CHECK-NEXT: InlineAsm Start
+; Asm consumes stack, nothing should be popped.
+; CHECK: InlineAsm End
+; CHECK-NOT: fstp
+; CHECK: ret
+define void @test3(x86_fp80 %X) {
+        call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( x86_fp80 %X)
+        ret void
+}
+
+; CHECK: test4
+; CHECK: fld
+; CHECK-NEXT: InlineAsm Start
+; CHECK: InlineAsm End
+; CHECK-NOT: fstp
+; CHECK: ret
+define void @test4(double %X) {
+        call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( double %X)
+        ret void
+}
+
+; Same as test3/4, but using value from fadd.
+; The fadd can be done in xmm or x87 regs - we don't test that.
+; CHECK: test5
+; CHECK: InlineAsm End
+; CHECK-NOT: fstp
+; CHECK: ret
+define void @test5(double %X) {
+        %Y = fadd double %X, 123.0
+        call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( double %Y)
+        ret void
+}
+
+; CHECK: test6
+define void @test6(double %A, double %B, double %C, 
+                   double %D, double %E) nounwind  {
+entry:
+; Uses the same value twice, should have one fstp after the asm.
+; CHECK: foo
+; CHECK: InlineAsm End
+; CHECK-NEXT: fstp
+; CHECK-NOT: fstp
+	tail call void asm sideeffect "foo $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %A, double %A ) nounwind 
+; Uses two different values, should be in st(0)/st(1) and both be popped.
+; CHECK: bar
+; CHECK: InlineAsm End
+; CHECK-NEXT: fstp
+; CHECK-NEXT: fstp
+	tail call void asm sideeffect "bar $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %B, double %C ) nounwind 
+; Uses two different values, one of which isn't killed in this asm, it
+; should not be popped after the asm.
+; CHECK: baz
+; CHECK: InlineAsm End
+; CHECK-NEXT: fstp
+; CHECK-NOT: fstp
+	tail call void asm sideeffect "baz $0 $1", "f,f,~{dirflag},~{fpsr},~{flags}"( double %D, double %E ) nounwind 
+; This is the last use of %D, so it should be popped after.
+; CHECK: baz
+; CHECK: InlineAsm End
+; CHECK-NEXT: fstp
+; CHECK-NOT: fstp
+; CHECK: ret
+	tail call void asm sideeffect "baz $0", "f,~{dirflag},~{fpsr},~{flags}"( double %D ) nounwind 
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/inline-asm-fpstack2.ll b/final/test/CodeGen/X86/inline-asm-fpstack2.ll
new file mode 100644
index 00000000000..78037e0423a
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-fpstack2.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; PR4185
+
+; Passing a non-killed value to asm in {st}.
+; Make sure it is duped before.
+; asm kills st(0), so we shouldn't pop anything
+; CHECK: fld %st(0)
+; CHECK: fistpl
+; CHECK-NOT: fstp
+; CHECK: fistpl
+; CHECK-NOT: fstp
+; CHECK: ret
+define void @test() {
+return:
+	call void asm sideeffect "fistpl $0", "{st}"(double 1.000000e+06)
+	call void asm sideeffect "fistpl $0", "{st}"(double 1.000000e+06)
+	ret void
+}
+
+; A valid alternative would be to remat the constant pool load before each
+; inline asm.
diff --git a/final/test/CodeGen/X86/inline-asm-fpstack3.ll b/final/test/CodeGen/X86/inline-asm-fpstack3.ll
new file mode 100644
index 00000000000..a609681c492
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-fpstack3.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; PR4459
+
+; The return value from ceil must be duped before being consumed by asm.
+; CHECK: ceil
+; CHECK: fld %st(0)
+; CHECK-NOT: fxch
+; CHECK: fistpl
+; CHECK-NOT: fxch
+; CHECK: fstpt
+; CHECK: test
+define void @test2(x86_fp80 %a) {
+entry:
+	%0 = call x86_fp80 @ceil(x86_fp80 %a)
+	call void asm sideeffect "fistpl $0", "{st}"( x86_fp80 %0)
+	call void @test(x86_fp80 %0 )
+        ret void
+}
+declare x86_fp80 @ceil(x86_fp80)
+declare void @test(x86_fp80)
diff --git a/final/test/CodeGen/X86/inline-asm-fpstack4.ll b/final/test/CodeGen/X86/inline-asm-fpstack4.ll
new file mode 100644
index 00000000000..ec572b45238
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-fpstack4.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; PR4484
+
+; ceil leaves a value on the stack that is needed after the asm.
+; CHECK: ceil
+; CHECK-NOT: fstp
+; Load %a from stack after ceil
+; CHECK: fldt
+; CHECK-NOT: fxch
+; CHECK: fistpl
+; CHECK-NOT: fstp
+; Set up call to test.
+; CHECK: fstpt
+; CHECK: test
+define void @test2(x86_fp80 %a) {
+entry:
+	%0 = call x86_fp80 @ceil()
+	call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %a)
+	call void @test(x86_fp80 %0)
+	ret void
+}
+
+declare x86_fp80 @ceil()
+declare void @test(x86_fp80)
diff --git a/final/test/CodeGen/X86/inline-asm-fpstack5.ll b/final/test/CodeGen/X86/inline-asm-fpstack5.ll
new file mode 100644
index 00000000000..8b219cf9277
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-fpstack5.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86
+; PR4485
+
+define void @test(x86_fp80* %a) {
+entry:
+	%0 = load x86_fp80* %a, align 16
+	%1 = fmul x86_fp80 %0, 0xK4006B400000000000000
+	%2 = fmul x86_fp80 %1, 0xK4012F424000000000000
+	tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %2)
+	%3 = load x86_fp80* %a, align 16
+	%4 = fmul x86_fp80 %3, 0xK4006B400000000000000
+	%5 = fmul x86_fp80 %4, 0xK4012F424000000000000
+	tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %5)
+	ret void
+}
diff --git a/final/test/CodeGen/X86/inline-asm-h.ll b/final/test/CodeGen/X86/inline-asm-h.ll
new file mode 100644
index 00000000000..53cf419bd11
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-h.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s | FileCheck %s
+
+@foobar = common global i32 0, align 4
+
+define void @zed() nounwind {
+entry:
+  call void asm "movq %mm2,${0:H}", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* @foobar) nounwind
+  ret void
+}
+
+; CHECK: zed
+; CHECK: movq %mm2,foobar+8(%rip)
diff --git a/final/test/CodeGen/X86/inline-asm-modifier-n.ll b/final/test/CodeGen/X86/inline-asm-modifier-n.ll
new file mode 100644
index 00000000000..5e76b6c0580
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-modifier-n.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep { 37}
+; rdar://7008959
+
+define void @bork() nounwind {
+entry:
+	tail call void asm sideeffect "BORK ${0:n}", "i,~{dirflag},~{fpsr},~{flags}"(i32 -37) nounwind
+	ret void
+}
diff --git a/final/test/CodeGen/X86/inline-asm-mrv.ll b/final/test/CodeGen/X86/inline-asm-mrv.ll
new file mode 100644
index 00000000000..78d7e776cf2
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-mrv.ll
@@ -0,0 +1,35 @@
+; PR2094
+; RUN: llc < %s -march=x86-64 | grep movslq
+; RUN: llc < %s -march=x86-64 | grep addps
+; RUN: llc < %s -march=x86-64 | grep paddd
+; RUN: llc < %s -march=x86-64 | not grep movq
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+define i32 @test1(i8* %v, i8* %blk2, i8* %blk1, i32 %stride, i32 %h) nounwind  {
+	%tmp12 = sext i32 %stride to i64		; <i64> [#uses=1]
+	%mrv = call {i32, i8*, i8*} asm sideeffect "$0 $1 $2 $3 $4 $5 $6",
+         "=r,=r,=r,r,r,r,r"( i64 %tmp12, i32 %h, i8* %blk1, i8* %blk2 ) nounwind
+        %tmp6 = getresult {i32, i8*, i8*} %mrv, 0
+	%tmp7 = call i32 asm sideeffect "set $0",
+             "=r,~{dirflag},~{fpsr},~{flags}"( ) nounwind
+	ret i32 %tmp7
+}
+
+define <4 x float> @test2() nounwind {
+	%mrv = call {<4 x float>, <4 x float>} asm "set $0, $1", "=x,=x"()
+	%a = getresult {<4 x float>, <4 x float>} %mrv, 0
+	%b = getresult {<4 x float>, <4 x float>} %mrv, 1
+	%c = fadd <4 x float> %a, %b
+	ret <4 x float> %c
+}
+
+define <4 x i32> @test3() nounwind {
+	%mrv = call {<4 x i32>, <4 x i32>} asm "set $0, $1", "=x,=x"()
+	%a = getresult {<4 x i32>, <4 x i32>} %mrv, 0
+	%b = getresult {<4 x i32>, <4 x i32>} %mrv, 1
+	%c = add <4 x i32> %a, %b
+	ret <4 x i32> %c
+}
+
diff --git a/final/test/CodeGen/X86/inline-asm-out-regs.ll b/final/test/CodeGen/X86/inline-asm-out-regs.ll
new file mode 100644
index 00000000000..46966f5370d
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-out-regs.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu
+; PR3391
+
+@pci_indirect = external global { }             ; <{ }*> [#uses=1]
+@pcibios_last_bus = external global i32         ; <i32*> [#uses=2]
+
+define void @pci_pcbios_init() nounwind section ".init.text" {
+entry:
+        br label %bb1.i
+
+bb1.i:          ; preds = %bb6.i.i, %bb1.i, %entry
+        %0 = load i32* null, align 8            ; <i32> [#uses=1]
+        %1 = icmp ugt i32 %0, 1048575           ; <i1> [#uses=1]
+        br i1 %1, label %bb2.i, label %bb1.i
+
+bb2.i:          ; preds = %bb1.i
+        %asmtmp.i.i = tail call { i32, i32, i32, i32 } asm "lcall *(%edi); cld\0A\09jc 1f\0A\09xor %ah, %ah\0A1:", "={dx},={ax},={bx},={cx},1,{di},~{dirflag},~{fpsr},~{flags},~{memory}"(i32 45313, { }* @pci_indirect) nounwind             ; <{ i32, i32, i32, i32 }> [#uses=2]
+        %asmresult2.i.i = extractvalue { i32, i32, i32, i32 } %asmtmp.i.i, 1   
+        ; <i32> [#uses=1]
+        %2 = lshr i32 %asmresult2.i.i, 8                ; <i32> [#uses=1]
+        %3 = trunc i32 %2 to i8         ; <i8> [#uses=1]
+        %4 = load i32* @pcibios_last_bus, align 4               ; <i32> [#uses=1]
+        %5 = icmp slt i32 %4, 0         ; <i1> [#uses=1]
+        br i1 %5, label %bb5.i.i, label %bb6.i.i
+
+bb5.i.i:                ; preds = %bb2.i
+        %asmresult4.i.i = extractvalue { i32, i32, i32, i32 } %asmtmp.i.i, 3   
+        ; <i32> [#uses=1]
+        %6 = and i32 %asmresult4.i.i, 255               ; <i32> [#uses=1]
+        store i32 %6, i32* @pcibios_last_bus, align 4
+        br label %bb6.i.i
+
+bb6.i.i:                ; preds = %bb5.i.i, %bb2.i
+        %7 = icmp eq i8 %3, 0           ; <i1> [#uses=1]
+        %or.cond.i.i = and i1 %7, false         ; <i1> [#uses=1]
+        br i1 %or.cond.i.i, label %bb1.i, label %bb8.i.i
+
+bb8.i.i:                ; preds = %bb6.i.i
+        unreachable
+}
diff --git a/final/test/CodeGen/X86/inline-asm-pic.ll b/final/test/CodeGen/X86/inline-asm-pic.ll
new file mode 100644
index 00000000000..0b5ff08c3f3
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-pic.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | grep lea
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic | grep call
+
+@main_q = internal global i8* null		; <i8**> [#uses=1]
+
+define void @func2() nounwind {
+entry:
+	tail call void asm "mov $1,%gs:$0", "=*m,ri,~{dirflag},~{fpsr},~{flags}"(i8** inttoptr (i32 152 to i8**), i8* bitcast (i8** @main_q to i8*)) nounwind
+	ret void
+}
diff --git a/final/test/CodeGen/X86/inline-asm-ptr-cast.ll b/final/test/CodeGen/X86/inline-asm-ptr-cast.ll
new file mode 100644
index 00000000000..50e30210181
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-ptr-cast.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu <%s
+; ModuleID = 'bug.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@func.flagmask = internal constant i64 1, align 8
+
+define void @func() nounwind {
+entry:
+  %src = alloca i32, align 4
+  %dst = alloca i32, align 4
+  %flags = alloca i64, align 8
+  %newflags = alloca i64, align 8
+  store i32 0, i32* %src, align 4
+  store i32 0, i32* %dst, align 4
+  store i64 1, i64* %flags, align 8
+  store i64 -1, i64* %newflags, align 8
+  %0 = bitcast i32* %dst to i8*
+  %tmp = load i64* %flags, align 8
+  %and = and i64 %tmp, 1
+  %1 = bitcast i32* %src to i8*
+  %tmp1 = load i8* %1
+  %2 = bitcast i32* %dst to i8*
+  %tmp2 = load i8* %2
+  call void asm "pushfq \0Aandq $2, (%rsp) \0Aorq  $3, (%rsp) \0Apopfq \0Aaddb $4, $1 \0Apushfq \0Apopq $0 \0A", "=*&rm,=*&rm,i,r,r,1,~{cc},~{dirflag},~{fpsr},~{flags}"(i64* %newflags, i8* %0, i64 -2, i64 %and, i8 %tmp1, i8 %tmp2) nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/X86/inline-asm-q-regs.ll b/final/test/CodeGen/X86/inline-asm-q-regs.ll
new file mode 100644
index 00000000000..ab44206f806
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-q-regs.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64
+; rdar://7066579
+
+	type { i64, i64, i64, i64, i64 }		; type %0
+
+define void @t() nounwind {
+entry:
+	%asmtmp = call %0 asm sideeffect "mov    %cr0, $0       \0Amov    %cr2, $1       \0Amov    %cr3, $2       \0Amov    %cr4, $3       \0Amov    %cr8, $0       \0A", "=q,=q,=q,=q,=q,~{dirflag},~{fpsr},~{flags}"() nounwind		; <%0> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/X86/inline-asm-tied.ll b/final/test/CodeGen/X86/inline-asm-tied.ll
new file mode 100644
index 00000000000..79b688551eb
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-tied.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin9 -O0 -regalloc=linearscan | grep {movl	%edx, 4(%esp)} | count 2
+; rdar://6992609
+
+target triple = "i386-apple-darwin9.0"
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i64 (i64)* @_OSSwapInt64 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i64 @_OSSwapInt64(i64 %_data) nounwind {
+entry:
+	%retval = alloca i64		; <i64*> [#uses=2]
+	%_data.addr = alloca i64		; <i64*> [#uses=4]
+	store i64 %_data, i64* %_data.addr
+	%tmp = load i64* %_data.addr		; <i64> [#uses=1]
+	%0 = call i64 asm "bswap   %eax\0A\09bswap   %edx\0A\09xchgl   %eax, %edx", "=A,0,~{dirflag},~{fpsr},~{flags}"(i64 %tmp) nounwind		; <i64> [#uses=1]
+	store i64 %0, i64* %_data.addr
+	%tmp1 = load i64* %_data.addr		; <i64> [#uses=1]
+	store i64 %tmp1, i64* %retval
+	%1 = load i64* %retval		; <i64> [#uses=1]
+	ret i64 %1
+}
diff --git a/final/test/CodeGen/X86/inline-asm-x-scalar.ll b/final/test/CodeGen/X86/inline-asm-x-scalar.ll
new file mode 100644
index 00000000000..5a9628b3df7
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm-x-scalar.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah
+
+define void @test1() {
+        tail call void asm sideeffect "ucomiss $0", "x"( float 0x41E0000000000000)
+        ret void
+}
+
+define void @test2() {
+        %tmp53 = tail call i32 asm "ucomiss $1, $3\0Acmovae  $2, $0 ", "=r,mx,mr,x,0,~{dirflag},~{fpsr},~{flags},~{cc}"( float 0x41E0000000000000, i32 2147483647, float 0.000000e+00, i32 0 )         ; <i32> [#uses
+        unreachable
+}
+
+define void @test3() {
+        tail call void asm sideeffect "ucomiss $0, $1", "mx,x,~{dirflag},~{fpsr},~{flags},~{cc}"( float 0x41E0000000000000, i32 65536 )
+        ret void
+}
+
+define void @test4() {
+        %tmp1 = tail call float asm "", "=x,0,~{dirflag},~{fpsr},~{flags}"( float 0x47EFFFFFE0000000 ); <float> [#uses=1]
+        %tmp4 = fsub float %tmp1, 0x3810000000000000             ; <float> [#uses=1]
+        tail call void asm sideeffect "", "x,~{dirflag},~{fpsr},~{flags}"( float %tmp4 )
+        ret void
+}
+
diff --git a/final/test/CodeGen/X86/inline-asm.ll b/final/test/CodeGen/X86/inline-asm.ll
new file mode 100644
index 00000000000..c66d7a8bd11
--- /dev/null
+++ b/final/test/CodeGen/X86/inline-asm.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86
+
+define i32 @test1() nounwind {
+	; Dest is AX, dest type = i32.
+        %tmp4 = call i32 asm sideeffect "FROB $0", "={ax}"()
+        ret i32 %tmp4
+}
+
+define void @test2(i32 %V) nounwind {
+	; input is AX, in type = i32.
+        call void asm sideeffect "FROB $0", "{ax}"(i32 %V)
+        ret void
+}
+
+define void @test3() nounwind {
+        ; FP constant as a memory operand.
+        tail call void asm sideeffect "frob $0", "m"( float 0x41E0000000000000)
+        ret void
+}
+
+define void @test4() nounwind {
+       ; J means a constant in range 0 to 63.
+       tail call void asm sideeffect "bork $0", "J"(i32 37) nounwind
+       ret void
+}
diff --git a/final/test/CodeGen/X86/ins_subreg_coalesce-1.ll b/final/test/CodeGen/X86/ins_subreg_coalesce-1.ll
new file mode 100644
index 00000000000..83674361a77
--- /dev/null
+++ b/final/test/CodeGen/X86/ins_subreg_coalesce-1.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define fastcc i32 @t() nounwind  {
+entry:
+; CHECK: t:
+; CHECK: movzwl 0, %eax
+; CHECK: orl $2, %eax
+; CHECK: movw %ax, 0
+; CHECK: shrl $3, %eax
+; CHECK: andl $1, %eax
+	br i1 false, label %UnifiedReturnBlock, label %bb4
+bb4:		; preds = %entry
+	br i1 false, label %bb17, label %bb22
+bb17:		; preds = %bb4
+	ret i32 1
+bb22:		; preds = %bb4
+	br i1 true, label %walkExprTree.exit, label %bb4.i
+bb4.i:		; preds = %bb22
+	ret i32 0
+walkExprTree.exit:		; preds = %bb22
+	%tmp83 = load i16* null, align 4		; <i16> [#uses=1]
+	%tmp84 = or i16 %tmp83, 2		; <i16> [#uses=2]
+	store i16 %tmp84, i16* null, align 4
+	%tmp98993 = zext i16 %tmp84 to i32		; <i32> [#uses=1]
+	%tmp1004 = lshr i32 %tmp98993, 3		; <i32> [#uses=1]
+	%tmp100.lobit5 = and i32 %tmp1004, 1		; <i32> [#uses=1]
+	ret i32 %tmp100.lobit5
+UnifiedReturnBlock:		; preds = %entry
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/ins_subreg_coalesce-2.ll b/final/test/CodeGen/X86/ins_subreg_coalesce-2.ll
new file mode 100644
index 00000000000..f2c9cc72719
--- /dev/null
+++ b/final/test/CodeGen/X86/ins_subreg_coalesce-2.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86-64 | not grep movw
+
+define i16 @test5(i16 %f12) nounwind {
+	%f11 = shl i16 %f12, 2		; <i16> [#uses=1]
+	%tmp7.25 = ashr i16 %f11, 8		; <i16> [#uses=1]
+	ret i16 %tmp7.25
+}
diff --git a/final/test/CodeGen/X86/ins_subreg_coalesce-3.ll b/final/test/CodeGen/X86/ins_subreg_coalesce-3.ll
new file mode 100644
index 00000000000..63881e0ccb5
--- /dev/null
+++ b/final/test/CodeGen/X86/ins_subreg_coalesce-3.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -march=x86-64 | grep mov | count 3
+
+	%struct.COMPOSITE = type { i8, i16, i16 }
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.FILE_POS = type { i8, i8, i16, i32 }
+	%struct.FIRST_UNION = type { %struct.FILE_POS }
+	%struct.FONT_INFO = type { %struct.metrics*, i8*, i16*, %struct.COMPOSITE*, i32, %struct.rec*, %struct.rec*, i16, i16, i16*, i8*, i8*, i16* }
+	%struct.FOURTH_UNION = type { %struct.STYLE }
+	%struct.GAP = type { i8, i8, i16 }
+	%struct.LIST = type { %struct.rec*, %struct.rec* }
+	%struct.SECOND_UNION = type { { i16, i8, i8 } }
+	%struct.STYLE = type { { %struct.GAP }, { %struct.GAP }, i16, i16, i32 }
+	%struct.THIRD_UNION = type { %struct.FILE*, [8 x i8] }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.head_type = type { [2 x %struct.LIST], %struct.FIRST_UNION, %struct.SECOND_UNION, %struct.THIRD_UNION, %struct.FOURTH_UNION, %struct.rec*, { %struct.rec* }, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, %struct.rec*, i32 }
+	%struct.metrics = type { i16, i16, i16, i16, i16 }
+	%struct.rec = type { %struct.head_type }
+
+define void @FontChange(i1 %foo) nounwind {
+entry:
+	br i1 %foo, label %bb298, label %bb49
+bb49:		; preds = %entry
+	ret void
+bb298:		; preds = %entry
+	br i1 %foo, label %bb304, label %bb366
+bb304:		; preds = %bb298
+	br i1 %foo, label %bb330, label %bb428
+bb330:		; preds = %bb366, %bb304
+	br label %bb366
+bb366:		; preds = %bb330, %bb298
+	br i1 %foo, label %bb330, label %bb428
+bb428:		; preds = %bb366, %bb304
+	br i1 %foo, label %bb650, label %bb433
+bb433:		; preds = %bb428
+	ret void
+bb650:		; preds = %bb650, %bb428
+	%tmp658 = load i8* null, align 8		; <i8> [#uses=1]
+	%tmp659 = icmp eq i8 %tmp658, 0		; <i1> [#uses=1]
+	br i1 %tmp659, label %bb650, label %bb662
+bb662:		; preds = %bb650
+	br label %bb761
+bb688:		; preds = %bb662
+	ret void
+bb761:		; preds = %bb662
+	%tmp487248736542 = load i32* null, align 4		; <i32> [#uses=2]
+	%tmp487648776541 = and i32 %tmp487248736542, 57344		; <i32> [#uses=1]
+	%tmp4881 = icmp eq i32 %tmp487648776541, 8192		; <i1> [#uses=1]
+	br i1 %tmp4881, label %bb4884, label %bb4897
+bb4884:		; preds = %bb761
+	%tmp488948906540 = and i32 %tmp487248736542, 7168		; <i32> [#uses=1]
+	%tmp4894 = icmp eq i32 %tmp488948906540, 1024		; <i1> [#uses=1]
+	br i1 %tmp4894, label %bb4932, label %bb4897
+bb4897:		; preds = %bb4884, %bb761
+	ret void
+bb4932:		; preds = %bb4884
+	%tmp4933 = load i32* null, align 4		; <i32> [#uses=1]
+	br i1 %foo, label %bb5054, label %bb4940
+bb4940:		; preds = %bb4932
+	%tmp4943 = load i32* null, align 4		; <i32> [#uses=2]
+	switch i32 %tmp4933, label %bb5054 [
+		 i32 159, label %bb4970
+		 i32 160, label %bb5002
+	]
+bb4970:		; preds = %bb4940
+	%tmp49746536 = trunc i32 %tmp4943 to i16		; <i16> [#uses=1]
+	%tmp49764977 = and i16 %tmp49746536, 4095		; <i16> [#uses=1]
+	%mask498049814982 = zext i16 %tmp49764977 to i64		; <i64> [#uses=1]
+	%tmp4984 = getelementptr %struct.FONT_INFO* null, i64 %mask498049814982, i32 5		; <%struct.rec**> [#uses=1]
+	%tmp4985 = load %struct.rec** %tmp4984, align 8		; <%struct.rec*> [#uses=1]
+	%tmp4988 = getelementptr %struct.rec* %tmp4985, i64 0, i32 0, i32 3		; <%struct.THIRD_UNION*> [#uses=1]
+	%tmp4991 = bitcast %struct.THIRD_UNION* %tmp4988 to i32*		; <i32*> [#uses=1]
+	%tmp4992 = load i32* %tmp4991, align 8		; <i32> [#uses=1]
+	%tmp49924993 = trunc i32 %tmp4992 to i16		; <i16> [#uses=1]
+	%tmp4996 = add i16 %tmp49924993, 0		; <i16> [#uses=1]
+	br label %bb5054
+bb5002:		; preds = %bb4940
+	%tmp50066537 = trunc i32 %tmp4943 to i16		; <i16> [#uses=1]
+	%tmp50085009 = and i16 %tmp50066537, 4095		; <i16> [#uses=1]
+	%mask501250135014 = zext i16 %tmp50085009 to i64		; <i64> [#uses=1]
+	%tmp5016 = getelementptr %struct.FONT_INFO* null, i64 %mask501250135014, i32 5		; <%struct.rec**> [#uses=1]
+	%tmp5017 = load %struct.rec** %tmp5016, align 8		; <%struct.rec*> [#uses=1]
+	%tmp5020 = getelementptr %struct.rec* %tmp5017, i64 0, i32 0, i32 3		; <%struct.THIRD_UNION*> [#uses=1]
+	%tmp5023 = bitcast %struct.THIRD_UNION* %tmp5020 to i32*		; <i32*> [#uses=1]
+	%tmp5024 = load i32* %tmp5023, align 8		; <i32> [#uses=1]
+	%tmp50245025 = trunc i32 %tmp5024 to i16		; <i16> [#uses=1]
+	%tmp5028 = sub i16 %tmp50245025, 0		; <i16> [#uses=1]
+	br label %bb5054
+bb5054:		; preds = %bb5002, %bb4970, %bb4940, %bb4932
+	%flen.0.reg2mem.0 = phi i16 [ %tmp4996, %bb4970 ], [ %tmp5028, %bb5002 ], [ 0, %bb4932 ], [ undef, %bb4940 ]		; <i16> [#uses=0]
+	ret void
+}
diff --git a/final/test/CodeGen/X86/insert-positions.ll b/final/test/CodeGen/X86/insert-positions.ll
new file mode 100644
index 00000000000..1a695f35e3b
--- /dev/null
+++ b/final/test/CodeGen/X86/insert-positions.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=x86-64 >/dev/null
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define void @test0() nounwind {
+if.end90.i.i:
+  br label %while.body.i.i221.i
+
+while.body.i.i221.i:                              ; preds = %while.cond.backedge.i.i.i, %if.end90.i.i
+  br i1 undef, label %if.then.i.i224.i, label %while.cond.backedge.i.i.i
+
+while.cond.backedge.i.i.i:                        ; preds = %for.end.i.i.i, %while.body.i.i221.i
+  br label %while.body.i.i221.i
+
+if.then.i.i224.i:                                 ; preds = %while.body.i.i221.i
+  switch i32 undef, label %for.cond.i.i226.i [
+    i32 92, label %sw.bb.i.i225.i
+    i32 34, label %sw.bb.i.i225.i
+    i32 110, label %sw.bb21.i.i.i
+  ]
+
+sw.bb.i.i225.i:                                   ; preds = %if.then.i.i224.i, %if.then.i.i224.i
+  unreachable
+
+sw.bb21.i.i.i:                                    ; preds = %if.then.i.i224.i
+  unreachable
+
+for.cond.i.i226.i:                                ; preds = %for.body.i.i.i, %if.then.i.i224.i
+  %0 = phi i64 [ %tmp154.i.i.i, %for.body.i.i.i ], [ 0, %if.then.i.i224.i ] ; <i64> [#uses=2]
+  %tmp154.i.i.i = add i64 %0, 1                   ; <i64> [#uses=2]
+  %i.0.i.i.i = trunc i64 %0 to i32                ; <i32> [#uses=1]
+  br i1 undef, label %land.rhs.i.i.i, label %for.end.i.i.i
+
+land.rhs.i.i.i:                                   ; preds = %for.cond.i.i226.i
+  br i1 undef, label %for.body.i.i.i, label %for.end.i.i.i
+
+for.body.i.i.i:                                   ; preds = %land.rhs.i.i.i
+  br label %for.cond.i.i226.i
+
+for.end.i.i.i:                                    ; preds = %land.rhs.i.i.i, %for.cond.i.i226.i
+  %idx.ext.i.i.i = sext i32 %i.0.i.i.i to i64     ; <i64> [#uses=1]
+  %sub.ptr72.sum.i.i.i = xor i64 %idx.ext.i.i.i, -1 ; <i64> [#uses=1]
+  %pos.addr.1.sum155.i.i.i = add i64 %tmp154.i.i.i, %sub.ptr72.sum.i.i.i ; <i64> [#uses=1]
+  %arrayidx76.i.i.i = getelementptr inbounds i8* undef, i64 %pos.addr.1.sum155.i.i.i ; <i8*> [#uses=0]
+  br label %while.cond.backedge.i.i.i
+}
+
+define void @test1() nounwind {
+entry:
+  %t = shl i32 undef, undef                     ; <i32> [#uses=1]
+  %t9 = sub nsw i32 0, %t                     ; <i32> [#uses=1]
+  br label %outer
+
+outer:                                             ; preds = %bb18, %bb
+  %i12 = phi i32 [ %t21, %bb18 ], [ 0, %entry ]  ; <i32> [#uses=2]
+  %i13 = phi i32 [ %t20, %bb18 ], [ 0, %entry ]  ; <i32> [#uses=2]
+  br label %inner
+
+inner:                                             ; preds = %bb16, %bb11
+  %t17 = phi i32 [ %i13, %outer ], [ undef, %inner ] ; <i32> [#uses=1]
+  store i32 %t17, i32* undef
+  br i1 undef, label %bb18, label %inner
+
+bb18:                                             ; preds = %bb16
+  %t19 = add i32 %i13, %t9                 ; <i32> [#uses=1]
+  %t20 = add i32 %t19, %i12                 ; <i32> [#uses=1]
+  %t21 = add i32 %i12, 1                      ; <i32> [#uses=1]
+  br label %outer
+}
diff --git a/final/test/CodeGen/X86/insertelement-copytoregs.ll b/final/test/CodeGen/X86/insertelement-copytoregs.ll
new file mode 100644
index 00000000000..34a29ca7d93
--- /dev/null
+++ b/final/test/CodeGen/X86/insertelement-copytoregs.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 | grep -v IMPLICIT_DEF
+
+define void @foo(<2 x float>* %p) {
+  %t = insertelement <2 x float> undef, float 0.0, i32 0
+  %v = insertelement <2 x float> %t,   float 0.0, i32 1
+  br label %bb8
+
+bb8:
+  store <2 x float> %v, <2 x float>* %p
+  ret void
+}
diff --git a/final/test/CodeGen/X86/insertelement-legalize.ll b/final/test/CodeGen/X86/insertelement-legalize.ll
new file mode 100644
index 00000000000..3805cbbaaaf
--- /dev/null
+++ b/final/test/CodeGen/X86/insertelement-legalize.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86
+
+; Test to check that we properly legalize an insert vector element
+define void @test(<2 x i64> %val, <2 x i64>* %dst, i64 %x) nounwind {
+entry:
+	%tmp4 = insertelement <2 x i64> %val, i64 %x, i32 0		; <<2 x i64>> [#uses=1]
+	%add = add <2 x i64> %tmp4, %val		; <<2 x i64>> [#uses=1]
+	store <2 x i64> %add, <2 x i64>* %dst
+	ret void
+}
diff --git a/final/test/CodeGen/X86/int-intrinsic.ll b/final/test/CodeGen/X86/int-intrinsic.ll
new file mode 100644
index 00000000000..45a9b0f15c6
--- /dev/null
+++ b/final/test/CodeGen/X86/int-intrinsic.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86    | FileCheck %s
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+declare void @llvm.x86.int(i8) nounwind
+
+; CHECK: int3
+; CHECK: ret
+define void @primitive_int3 () {
+bb.entry:
+  call void @llvm.x86.int(i8 3) nounwind
+  ret void
+}
+
+; CHECK: int	$-128
+; CHECK: ret
+define void @primitive_int128 () {
+bb.entry:
+  call void @llvm.x86.int(i8 128) nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/X86/invalid-shift-immediate.ll b/final/test/CodeGen/X86/invalid-shift-immediate.ll
new file mode 100644
index 00000000000..77a9f7eda78
--- /dev/null
+++ b/final/test/CodeGen/X86/invalid-shift-immediate.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86
+; PR2098
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @foo(i32 %x) {
+entry:
+	%x_addr = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %x, i32* %x_addr
+	%tmp = load i32* %x_addr, align 4		; <i32> [#uses=1]
+	%tmp1 = ashr i32 %tmp, -2		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 1		; <i32> [#uses=1]
+	%tmp23 = trunc i32 %tmp2 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp23, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb5
+
+bb:		; preds = %entry
+	%tmp4 = call i32 (...)* @bar( ) nounwind 		; <i32> [#uses=0]
+	br label %bb5
+
+bb5:		; preds = %bb, %entry
+	br label %return
+
+return:		; preds = %bb5
+	ret void
+}
+
+declare i32 @bar(...)
diff --git a/final/test/CodeGen/X86/isel-sink.ll b/final/test/CodeGen/X86/isel-sink.ll
new file mode 100644
index 00000000000..0f94b233bcf
--- /dev/null
+++ b/final/test/CodeGen/X86/isel-sink.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | not grep lea
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin8 | \
+; RUN:   grep {movl	\$4, (.*,.*,4)}
+
+define i32 @test(i32* %X, i32 %B) {
+	; This gep should be sunk out of this block into the load/store users.
+	%P = getelementptr i32* %X, i32 %B
+	%G = icmp ult i32 %B, 1234
+	br i1 %G, label %T, label %F
+T:
+	store i32 4, i32* %P
+	ret i32 141
+F:
+	%V = load i32* %P
+	ret i32 %V
+}
+	
+	
diff --git a/final/test/CodeGen/X86/isel-sink2.ll b/final/test/CodeGen/X86/isel-sink2.ll
new file mode 100644
index 00000000000..5ed0e00fd87
--- /dev/null
+++ b/final/test/CodeGen/X86/isel-sink2.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep {movb.7(%...)} %t
+; RUN: not grep leal %t
+
+define i8 @test(i32 *%P) nounwind {
+  %Q = getelementptr i32* %P, i32 1
+  %R = bitcast i32* %Q to i8*
+  %S = load i8* %R
+  %T = icmp eq i8 %S, 0
+  br i1 %T, label %TB, label %F
+TB:
+  ret i8 4
+F:
+  %U = getelementptr i8* %R, i32 3
+  %V = load i8* %U
+  ret i8 %V
+}
diff --git a/final/test/CodeGen/X86/isel-sink3.ll b/final/test/CodeGen/X86/isel-sink3.ll
new file mode 100644
index 00000000000..8d3d97a930b
--- /dev/null
+++ b/final/test/CodeGen/X86/isel-sink3.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s | grep {addl.\$4, %ecx}
+; RUN: llc < %s | not grep leal
+; this should not sink %1 into bb1, that would increase reg pressure.
+
+; rdar://6399178
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define i32 @bar(i32** %P) nounwind {
+entry:
+	%0 = load i32** %P, align 4		; <i32*> [#uses=2]
+	%1 = getelementptr i32* %0, i32 1		; <i32*> [#uses=1]
+	%2 = icmp ugt i32* %1, inttoptr (i64 1233 to i32*)		; <i1> [#uses=1]
+	br i1 %2, label %bb1, label %bb
+
+bb:		; preds = %entry
+	store i32* inttoptr (i64 123 to i32*), i32** %P, align 4
+	br label %bb1
+
+bb1:		; preds = %entry, %bb
+	%3 = getelementptr i32* %1, i32 1		; <i32*> [#uses=1]
+	%4 = load i32* %3, align 4		; <i32> [#uses=1]
+	ret i32 %4
+}
diff --git a/final/test/CodeGen/X86/isint.ll b/final/test/CodeGen/X86/isint.ll
new file mode 100644
index 00000000000..507a328c3ff
--- /dev/null
+++ b/final/test/CodeGen/X86/isint.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
+; RUN: not grep cmp %t
+; RUN: not grep xor %t
+; RUN: grep jne %t | count 1
+; RUN: grep jp %t | count 1
+; RUN: grep setnp %t | count 1
+; RUN: grep sete %t | count 1
+; RUN: grep and %t | count 1
+; RUN: grep cvt %t | count 4
+
+define i32 @isint_return(double %d) nounwind {
+  %i = fptosi double %d to i32
+  %e = sitofp i32 %i to double
+  %c = fcmp oeq double %d, %e
+  %z = zext i1 %c to i32
+  ret i32 %z
+}
+
+declare void @foo()
+
+define void @isint_branch(double %d) nounwind {
+  %i = fptosi double %d to i32
+  %e = sitofp i32 %i to double
+  %c = fcmp oeq double %d, %e
+  br i1 %c, label %true, label %false
+true:
+  call void @foo()
+  ret void
+false:
+  ret void
+}
diff --git a/final/test/CodeGen/X86/isnan.ll b/final/test/CodeGen/X86/isnan.ll
new file mode 100644
index 00000000000..4d465c0c7aa
--- /dev/null
+++ b/final/test/CodeGen/X86/isnan.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | not grep call
+
+declare i1 @llvm.isunordered.f64(double)
+
+define i1 @test_isnan(double %X) {
+        %R = fcmp uno double %X, %X             ; <i1> [#uses=1]
+        ret i1 %R
+}
+
diff --git a/final/test/CodeGen/X86/isnan2.ll b/final/test/CodeGen/X86/isnan2.ll
new file mode 100644
index 00000000000..7753346fd94
--- /dev/null
+++ b/final/test/CodeGen/X86/isnan2.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep pxor
+
+; This should not need to materialize 0.0 to evaluate the condition.
+
+define i32 @test(double %X) nounwind  {
+entry:
+	%tmp6 = fcmp uno double %X, 0.000000e+00		; <i1> [#uses=1]
+	%tmp67 = zext i1 %tmp6 to i32		; <i32> [#uses=1]
+	ret i32 %tmp67
+}
+
diff --git a/final/test/CodeGen/X86/ispositive.ll b/final/test/CodeGen/X86/ispositive.ll
new file mode 100644
index 00000000000..8adf723aabc
--- /dev/null
+++ b/final/test/CodeGen/X86/ispositive.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep {shrl.*31}
+
+define i32 @test1(i32 %X) {
+entry:
+        icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
+        zext i1 %0 to i32               ; <i32>:1 [#uses=1]
+        ret i32 %1
+}
+
diff --git a/final/test/CodeGen/X86/iv-users-in-other-loops.ll b/final/test/CodeGen/X86/iv-users-in-other-loops.ll
new file mode 100644
index 00000000000..8385a29fa22
--- /dev/null
+++ b/final/test/CodeGen/X86/iv-users-in-other-loops.ll
@@ -0,0 +1,296 @@
+; RUN: llc < %s -march=x86-64 -o %t
+; RUN: not grep inc %t
+; RUN: grep dec %t | count 2
+; RUN: grep addq %t | count 12
+; RUN: not grep addb %t
+; RUN: not grep leaq %t
+; RUN: not grep leal %t
+; RUN: not grep movq %t
+
+; IV users in each of the loops from other loops shouldn't cause LSR
+; to insert new induction variables. Previously it would create a
+; flood of new induction variables.
+; Also, the loop reversal should kick in once.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @foo(float* %A, i32 %IA, float* %B, i32 %IB, float* nocapture %C, i32 %N) nounwind {
+entry:
+      %0 = xor i32 %IA, 1		; <i32> [#uses=1]
+      %1 = xor i32 %IB, 1		; <i32> [#uses=1]
+      %2 = or i32 %1, %0		; <i32> [#uses=1]
+      %3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+      br i1 %3, label %bb2, label %bb13
+
+bb:		; preds = %bb3
+      %4 = load float* %A_addr.0, align 4		; <float> [#uses=1]
+      %5 = load float* %B_addr.0, align 4		; <float> [#uses=1]
+      %6 = fmul float %4, %5		; <float> [#uses=1]
+      %7 = fadd float %6, %Sum0.0		; <float> [#uses=1]
+      %indvar.next154 = add i64 %B_addr.0.rec, 1		; <i64> [#uses=1]
+      br label %bb2
+
+bb2:		; preds = %entry, %bb
+      %B_addr.0.rec = phi i64 [ %indvar.next154, %bb ], [ 0, %entry ]		; <i64> [#uses=14]
+      %Sum0.0 = phi float [ %7, %bb ], [ 0.000000e+00, %entry ]		; <float> [#uses=5]
+      %indvar146 = trunc i64 %B_addr.0.rec to i32		; <i32> [#uses=1]
+      %N_addr.0 = sub i32 %N, %indvar146		; <i32> [#uses=6]
+      %A_addr.0 = getelementptr float* %A, i64 %B_addr.0.rec		; <float*> [#uses=4]
+      %B_addr.0 = getelementptr float* %B, i64 %B_addr.0.rec		; <float*> [#uses=4]
+      %8 = icmp sgt i32 %N_addr.0, 0		; <i1> [#uses=1]
+      br i1 %8, label %bb3, label %bb4
+
+bb3:		; preds = %bb2
+      %9 = ptrtoint float* %A_addr.0 to i64		; <i64> [#uses=1]
+      %10 = and i64 %9, 15		; <i64> [#uses=1]
+      %11 = icmp eq i64 %10, 0		; <i1> [#uses=1]
+      br i1 %11, label %bb4, label %bb
+
+bb4:		; preds = %bb3, %bb2
+      %12 = ptrtoint float* %B_addr.0 to i64		; <i64> [#uses=1]
+      %13 = and i64 %12, 15		; <i64> [#uses=1]
+      %14 = icmp eq i64 %13, 0		; <i1> [#uses=1]
+      %15 = icmp sgt i32 %N_addr.0, 15		; <i1> [#uses=2]
+      br i1 %14, label %bb6.preheader, label %bb10.preheader
+
+bb10.preheader:		; preds = %bb4
+      br i1 %15, label %bb9, label %bb12.loopexit
+
+bb6.preheader:		; preds = %bb4
+      br i1 %15, label %bb5, label %bb8.loopexit
+
+bb5:		; preds = %bb5, %bb6.preheader
+      %indvar143 = phi i64 [ 0, %bb6.preheader ], [ %indvar.next144, %bb5 ]		; <i64> [#uses=3]
+      %vSum0.072 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=1]
+	%vSum1.070 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %29, %bb5 ]		; <<4 x float>> [#uses=1]
+	%vSum2.069 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %37, %bb5 ]		; <<4 x float>> [#uses=1]
+	%vSum3.067 = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %45, %bb5 ]		; <<4 x float>> [#uses=1]
+	%indvar145 = trunc i64 %indvar143 to i32		; <i32> [#uses=1]
+	%tmp150 = mul i32 %indvar145, -16		; <i32> [#uses=1]
+	%N_addr.268 = add i32 %tmp150, %N_addr.0		; <i32> [#uses=1]
+	%A_addr.273.rec = shl i64 %indvar143, 4		; <i64> [#uses=5]
+	%B_addr.0.sum180 = add i64 %B_addr.0.rec, %A_addr.273.rec		; <i64> [#uses=2]
+	%B_addr.271 = getelementptr float* %B, i64 %B_addr.0.sum180		; <float*> [#uses=1]
+	%A_addr.273 = getelementptr float* %A, i64 %B_addr.0.sum180		; <float*> [#uses=1]
+	tail call void asm sideeffect ";# foo", "~{dirflag},~{fpsr},~{flags}"() nounwind
+	%16 = bitcast float* %A_addr.273 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%17 = load <4 x float>* %16, align 16		; <<4 x float>> [#uses=1]
+	%18 = bitcast float* %B_addr.271 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%19 = load <4 x float>* %18, align 16		; <<4 x float>> [#uses=1]
+	%20 = fmul <4 x float> %17, %19		; <<4 x float>> [#uses=1]
+	%21 = fadd <4 x float> %20, %vSum0.072		; <<4 x float>> [#uses=2]
+	%A_addr.273.sum163 = or i64 %A_addr.273.rec, 4		; <i64> [#uses=1]
+	%A_addr.0.sum175 = add i64 %B_addr.0.rec, %A_addr.273.sum163		; <i64> [#uses=2]
+	%22 = getelementptr float* %A, i64 %A_addr.0.sum175		; <float*> [#uses=1]
+	%23 = bitcast float* %22 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%24 = load <4 x float>* %23, align 16		; <<4 x float>> [#uses=1]
+	%25 = getelementptr float* %B, i64 %A_addr.0.sum175		; <float*> [#uses=1]
+	%26 = bitcast float* %25 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%27 = load <4 x float>* %26, align 16		; <<4 x float>> [#uses=1]
+	%28 = fmul <4 x float> %24, %27		; <<4 x float>> [#uses=1]
+	%29 = fadd <4 x float> %28, %vSum1.070		; <<4 x float>> [#uses=2]
+	%A_addr.273.sum161 = or i64 %A_addr.273.rec, 8		; <i64> [#uses=1]
+	%A_addr.0.sum174 = add i64 %B_addr.0.rec, %A_addr.273.sum161		; <i64> [#uses=2]
+	%30 = getelementptr float* %A, i64 %A_addr.0.sum174		; <float*> [#uses=1]
+	%31 = bitcast float* %30 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%32 = load <4 x float>* %31, align 16		; <<4 x float>> [#uses=1]
+	%33 = getelementptr float* %B, i64 %A_addr.0.sum174		; <float*> [#uses=1]
+	%34 = bitcast float* %33 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%35 = load <4 x float>* %34, align 16		; <<4 x float>> [#uses=1]
+	%36 = fmul <4 x float> %32, %35		; <<4 x float>> [#uses=1]
+	%37 = fadd <4 x float> %36, %vSum2.069		; <<4 x float>> [#uses=2]
+	%A_addr.273.sum159 = or i64 %A_addr.273.rec, 12		; <i64> [#uses=1]
+	%A_addr.0.sum173 = add i64 %B_addr.0.rec, %A_addr.273.sum159		; <i64> [#uses=2]
+	%38 = getelementptr float* %A, i64 %A_addr.0.sum173		; <float*> [#uses=1]
+	%39 = bitcast float* %38 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%40 = load <4 x float>* %39, align 16		; <<4 x float>> [#uses=1]
+	%41 = getelementptr float* %B, i64 %A_addr.0.sum173		; <float*> [#uses=1]
+	%42 = bitcast float* %41 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%43 = load <4 x float>* %42, align 16		; <<4 x float>> [#uses=1]
+	%44 = fmul <4 x float> %40, %43		; <<4 x float>> [#uses=1]
+	%45 = fadd <4 x float> %44, %vSum3.067		; <<4 x float>> [#uses=2]
+	%.rec83 = add i64 %A_addr.273.rec, 16		; <i64> [#uses=1]
+	%A_addr.0.sum172 = add i64 %B_addr.0.rec, %.rec83		; <i64> [#uses=2]
+	%46 = getelementptr float* %A, i64 %A_addr.0.sum172		; <float*> [#uses=1]
+	%47 = getelementptr float* %B, i64 %A_addr.0.sum172		; <float*> [#uses=1]
+	%48 = add i32 %N_addr.268, -16		; <i32> [#uses=2]
+	%49 = icmp sgt i32 %48, 15		; <i1> [#uses=1]
+	%indvar.next144 = add i64 %indvar143, 1		; <i64> [#uses=1]
+	br i1 %49, label %bb5, label %bb8.loopexit
+
+bb7:		; preds = %bb7, %bb8.loopexit
+	%indvar130 = phi i64 [ 0, %bb8.loopexit ], [ %indvar.next131, %bb7 ]		; <i64> [#uses=3]
+	%vSum0.260 = phi <4 x float> [ %vSum0.0.lcssa, %bb8.loopexit ], [ %55, %bb7 ]		; <<4 x float>> [#uses=1]
+	%indvar132 = trunc i64 %indvar130 to i32		; <i32> [#uses=1]
+	%tmp133 = mul i32 %indvar132, -4		; <i32> [#uses=1]
+	%N_addr.358 = add i32 %tmp133, %N_addr.2.lcssa		; <i32> [#uses=1]
+	%A_addr.361.rec = shl i64 %indvar130, 2		; <i64> [#uses=3]
+	%B_addr.359 = getelementptr float* %B_addr.2.lcssa, i64 %A_addr.361.rec		; <float*> [#uses=1]
+	%A_addr.361 = getelementptr float* %A_addr.2.lcssa, i64 %A_addr.361.rec		; <float*> [#uses=1]
+	%50 = bitcast float* %A_addr.361 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%51 = load <4 x float>* %50, align 16		; <<4 x float>> [#uses=1]
+	%52 = bitcast float* %B_addr.359 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%53 = load <4 x float>* %52, align 16		; <<4 x float>> [#uses=1]
+	%54 = fmul <4 x float> %51, %53		; <<4 x float>> [#uses=1]
+	%55 = fadd <4 x float> %54, %vSum0.260		; <<4 x float>> [#uses=2]
+	%.rec85 = add i64 %A_addr.361.rec, 4		; <i64> [#uses=2]
+	%56 = getelementptr float* %A_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
+	%57 = getelementptr float* %B_addr.2.lcssa, i64 %.rec85		; <float*> [#uses=1]
+	%58 = add i32 %N_addr.358, -4		; <i32> [#uses=2]
+	%59 = icmp sgt i32 %58, 3		; <i1> [#uses=1]
+	%indvar.next131 = add i64 %indvar130, 1		; <i64> [#uses=1]
+	br i1 %59, label %bb7, label %bb13
+
+bb8.loopexit:		; preds = %bb5, %bb6.preheader
+	%A_addr.2.lcssa = phi float* [ %A_addr.0, %bb6.preheader ], [ %46, %bb5 ]		; <float*> [#uses=3]
+	%vSum0.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %21, %bb5 ]		; <<4 x float>> [#uses=2]
+	%B_addr.2.lcssa = phi float* [ %B_addr.0, %bb6.preheader ], [ %47, %bb5 ]		; <float*> [#uses=3]
+	%vSum1.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %29, %bb5 ]		; <<4 x float>> [#uses=2]
+	%vSum2.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %37, %bb5 ]		; <<4 x float>> [#uses=2]
+	%N_addr.2.lcssa = phi i32 [ %N_addr.0, %bb6.preheader ], [ %48, %bb5 ]		; <i32> [#uses=3]
+	%vSum3.0.lcssa = phi <4 x float> [ zeroinitializer, %bb6.preheader ], [ %45, %bb5 ]		; <<4 x float>> [#uses=2]
+	%60 = icmp sgt i32 %N_addr.2.lcssa, 3		; <i1> [#uses=1]
+	br i1 %60, label %bb7, label %bb13
+
+bb9:		; preds = %bb9, %bb10.preheader
+	%indvar106 = phi i64 [ 0, %bb10.preheader ], [ %indvar.next107, %bb9 ]		; <i64> [#uses=3]
+	%vSum0.339 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %75, %bb9 ]		; <<4 x float>> [#uses=1]
+	%vSum1.237 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %80, %bb9 ]		; <<4 x float>> [#uses=1]
+	%vSum2.236 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %85, %bb9 ]		; <<4 x float>> [#uses=1]
+	%vSum3.234 = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %90, %bb9 ]		; <<4 x float>> [#uses=1]
+	%indvar108 = trunc i64 %indvar106 to i32		; <i32> [#uses=1]
+	%tmp113 = mul i32 %indvar108, -16		; <i32> [#uses=1]
+	%N_addr.435 = add i32 %tmp113, %N_addr.0		; <i32> [#uses=1]
+	%A_addr.440.rec = shl i64 %indvar106, 4		; <i64> [#uses=5]
+	%B_addr.0.sum = add i64 %B_addr.0.rec, %A_addr.440.rec		; <i64> [#uses=2]
+	%B_addr.438 = getelementptr float* %B, i64 %B_addr.0.sum		; <float*> [#uses=1]
+	%A_addr.440 = getelementptr float* %A, i64 %B_addr.0.sum		; <float*> [#uses=1]
+	%61 = bitcast float* %B_addr.438 to <4 x float>*		; <i8*> [#uses=1]
+	%62 = load <4 x float>* %61, align 1
+	%B_addr.438.sum169 = or i64 %A_addr.440.rec, 4		; <i64> [#uses=1]
+	%B_addr.0.sum187 = add i64 %B_addr.0.rec, %B_addr.438.sum169		; <i64> [#uses=2]
+	%63 = getelementptr float* %B, i64 %B_addr.0.sum187		; <float*> [#uses=1]
+	%64 = bitcast float* %63 to <4 x float>*		; <i8*> [#uses=1]
+	%65 = load <4 x float>* %64, align 1
+	%B_addr.438.sum168 = or i64 %A_addr.440.rec, 8		; <i64> [#uses=1]
+	%B_addr.0.sum186 = add i64 %B_addr.0.rec, %B_addr.438.sum168		; <i64> [#uses=2]
+	%66 = getelementptr float* %B, i64 %B_addr.0.sum186		; <float*> [#uses=1]
+	%67 = bitcast float* %66 to <4 x float>*		; <i8*> [#uses=1]
+	%68 = load <4 x float>* %67, align 1
+	%B_addr.438.sum167 = or i64 %A_addr.440.rec, 12		; <i64> [#uses=1]
+	%B_addr.0.sum185 = add i64 %B_addr.0.rec, %B_addr.438.sum167		; <i64> [#uses=2]
+	%69 = getelementptr float* %B, i64 %B_addr.0.sum185		; <float*> [#uses=1]
+	%70 = bitcast float* %69 to <4 x float>*		; <i8*> [#uses=1]
+	%71 = load <4 x float>* %70, align 1
+	%72 = bitcast float* %A_addr.440 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%73 = load <4 x float>* %72, align 16		; <<4 x float>> [#uses=1]
+	%74 = fmul <4 x float> %73, %62		; <<4 x float>> [#uses=1]
+	%75 = fadd <4 x float> %74, %vSum0.339		; <<4 x float>> [#uses=2]
+	%76 = getelementptr float* %A, i64 %B_addr.0.sum187		; <float*> [#uses=1]
+	%77 = bitcast float* %76 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%78 = load <4 x float>* %77, align 16		; <<4 x float>> [#uses=1]
+	%79 = fmul <4 x float> %78, %65		; <<4 x float>> [#uses=1]
+	%80 = fadd <4 x float> %79, %vSum1.237		; <<4 x float>> [#uses=2]
+	%81 = getelementptr float* %A, i64 %B_addr.0.sum186		; <float*> [#uses=1]
+	%82 = bitcast float* %81 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%83 = load <4 x float>* %82, align 16		; <<4 x float>> [#uses=1]
+	%84 = fmul <4 x float> %83, %68		; <<4 x float>> [#uses=1]
+	%85 = fadd <4 x float> %84, %vSum2.236		; <<4 x float>> [#uses=2]
+	%86 = getelementptr float* %A, i64 %B_addr.0.sum185		; <float*> [#uses=1]
+	%87 = bitcast float* %86 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%88 = load <4 x float>* %87, align 16		; <<4 x float>> [#uses=1]
+	%89 = fmul <4 x float> %88, %71		; <<4 x float>> [#uses=1]
+	%90 = fadd <4 x float> %89, %vSum3.234		; <<4 x float>> [#uses=2]
+	%.rec89 = add i64 %A_addr.440.rec, 16		; <i64> [#uses=1]
+	%A_addr.0.sum170 = add i64 %B_addr.0.rec, %.rec89		; <i64> [#uses=2]
+	%91 = getelementptr float* %A, i64 %A_addr.0.sum170		; <float*> [#uses=1]
+	%92 = getelementptr float* %B, i64 %A_addr.0.sum170		; <float*> [#uses=1]
+	%93 = add i32 %N_addr.435, -16		; <i32> [#uses=2]
+	%94 = icmp sgt i32 %93, 15		; <i1> [#uses=1]
+	%indvar.next107 = add i64 %indvar106, 1		; <i64> [#uses=1]
+	br i1 %94, label %bb9, label %bb12.loopexit
+
+bb11:		; preds = %bb11, %bb12.loopexit
+	%indvar = phi i64 [ 0, %bb12.loopexit ], [ %indvar.next, %bb11 ]		; <i64> [#uses=3]
+	%vSum0.428 = phi <4 x float> [ %vSum0.3.lcssa, %bb12.loopexit ], [ %100, %bb11 ]		; <<4 x float>> [#uses=1]
+	%indvar96 = trunc i64 %indvar to i32		; <i32> [#uses=1]
+	%tmp = mul i32 %indvar96, -4		; <i32> [#uses=1]
+	%N_addr.526 = add i32 %tmp, %N_addr.4.lcssa		; <i32> [#uses=1]
+	%A_addr.529.rec = shl i64 %indvar, 2		; <i64> [#uses=3]
+	%B_addr.527 = getelementptr float* %B_addr.4.lcssa, i64 %A_addr.529.rec		; <float*> [#uses=1]
+	%A_addr.529 = getelementptr float* %A_addr.4.lcssa, i64 %A_addr.529.rec		; <float*> [#uses=1]
+	%95 = bitcast float* %B_addr.527 to <4 x float>*		; <i8*> [#uses=1]
+	%96 = load <4 x float>* %95, align 1
+	%97 = bitcast float* %A_addr.529 to <4 x float>*		; <<4 x float>*> [#uses=1]
+	%98 = load <4 x float>* %97, align 16		; <<4 x float>> [#uses=1]
+	%99 = fmul <4 x float> %98, %96		; <<4 x float>> [#uses=1]
+	%100 = fadd <4 x float> %99, %vSum0.428		; <<4 x float>> [#uses=2]
+	%.rec91 = add i64 %A_addr.529.rec, 4		; <i64> [#uses=2]
+	%101 = getelementptr float* %A_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
+	%102 = getelementptr float* %B_addr.4.lcssa, i64 %.rec91		; <float*> [#uses=1]
+	%103 = add i32 %N_addr.526, -4		; <i32> [#uses=2]
+	%104 = icmp sgt i32 %103, 3		; <i1> [#uses=1]
+	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
+	br i1 %104, label %bb11, label %bb13
+
+bb12.loopexit:		; preds = %bb9, %bb10.preheader
+	%A_addr.4.lcssa = phi float* [ %A_addr.0, %bb10.preheader ], [ %91, %bb9 ]		; <float*> [#uses=3]
+	%vSum0.3.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %75, %bb9 ]		; <<4 x float>> [#uses=2]
+	%B_addr.4.lcssa = phi float* [ %B_addr.0, %bb10.preheader ], [ %92, %bb9 ]		; <float*> [#uses=3]
+	%vSum1.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %80, %bb9 ]		; <<4 x float>> [#uses=2]
+	%vSum2.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %85, %bb9 ]		; <<4 x float>> [#uses=2]
+	%N_addr.4.lcssa = phi i32 [ %N_addr.0, %bb10.preheader ], [ %93, %bb9 ]		; <i32> [#uses=3]
+	%vSum3.2.lcssa = phi <4 x float> [ zeroinitializer, %bb10.preheader ], [ %90, %bb9 ]		; <<4 x float>> [#uses=2]
+	%105 = icmp sgt i32 %N_addr.4.lcssa, 3		; <i1> [#uses=1]
+	br i1 %105, label %bb11, label %bb13
+
+bb13:		; preds = %bb12.loopexit, %bb11, %bb8.loopexit, %bb7, %entry
+	%Sum0.1 = phi float [ 0.000000e+00, %entry ], [ %Sum0.0, %bb7 ], [ %Sum0.0, %bb8.loopexit ], [ %Sum0.0, %bb11 ], [ %Sum0.0, %bb12.loopexit ]		; <float> [#uses=1]
+	%vSum3.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum3.0.lcssa, %bb7 ], [ %vSum3.0.lcssa, %bb8.loopexit ], [ %vSum3.2.lcssa, %bb11 ], [ %vSum3.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
+	%N_addr.1 = phi i32 [ %N, %entry ], [ %N_addr.2.lcssa, %bb8.loopexit ], [ %58, %bb7 ], [ %N_addr.4.lcssa, %bb12.loopexit ], [ %103, %bb11 ]		; <i32> [#uses=2]
+	%vSum2.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum2.0.lcssa, %bb7 ], [ %vSum2.0.lcssa, %bb8.loopexit ], [ %vSum2.2.lcssa, %bb11 ], [ %vSum2.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
+	%vSum1.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum1.0.lcssa, %bb7 ], [ %vSum1.0.lcssa, %bb8.loopexit ], [ %vSum1.2.lcssa, %bb11 ], [ %vSum1.2.lcssa, %bb12.loopexit ]		; <<4 x float>> [#uses=1]
+	%B_addr.1 = phi float* [ %B, %entry ], [ %B_addr.2.lcssa, %bb8.loopexit ], [ %57, %bb7 ], [ %B_addr.4.lcssa, %bb12.loopexit ], [ %102, %bb11 ]		; <float*> [#uses=1]
+	%vSum0.1 = phi <4 x float> [ zeroinitializer, %entry ], [ %vSum0.0.lcssa, %bb8.loopexit ], [ %55, %bb7 ], [ %vSum0.3.lcssa, %bb12.loopexit ], [ %100, %bb11 ]		; <<4 x float>> [#uses=1]
+	%A_addr.1 = phi float* [ %A, %entry ], [ %A_addr.2.lcssa, %bb8.loopexit ], [ %56, %bb7 ], [ %A_addr.4.lcssa, %bb12.loopexit ], [ %101, %bb11 ]		; <float*> [#uses=1]
+	%106 = fadd <4 x float> %vSum0.1, %vSum2.1		; <<4 x float>> [#uses=1]
+	%107 = fadd <4 x float> %vSum1.1, %vSum3.1		; <<4 x float>> [#uses=1]
+	%108 = fadd <4 x float> %106, %107		; <<4 x float>> [#uses=4]
+	%tmp23 = extractelement <4 x float> %108, i32 0		; <float> [#uses=1]
+	%tmp21 = extractelement <4 x float> %108, i32 1		; <float> [#uses=1]
+	%109 = fadd float %tmp23, %tmp21		; <float> [#uses=1]
+	%tmp19 = extractelement <4 x float> %108, i32 2		; <float> [#uses=1]
+	%tmp17 = extractelement <4 x float> %108, i32 3		; <float> [#uses=1]
+	%110 = fadd float %tmp19, %tmp17		; <float> [#uses=1]
+	%111 = fadd float %109, %110		; <float> [#uses=1]
+	%Sum0.254 = fadd float %111, %Sum0.1		; <float> [#uses=2]
+	%112 = icmp sgt i32 %N_addr.1, 0		; <i1> [#uses=1]
+	br i1 %112, label %bb.nph56, label %bb16
+
+bb.nph56:		; preds = %bb13
+	%tmp. = zext i32 %N_addr.1 to i64		; <i64> [#uses=1]
+	br label %bb14
+
+bb14:		; preds = %bb14, %bb.nph56
+	%indvar117 = phi i64 [ 0, %bb.nph56 ], [ %indvar.next118, %bb14 ]		; <i64> [#uses=3]
+	%Sum0.255 = phi float [ %Sum0.254, %bb.nph56 ], [ %Sum0.2, %bb14 ]		; <float> [#uses=1]
+	%tmp.122 = sext i32 %IB to i64		; <i64> [#uses=1]
+	%B_addr.652.rec = mul i64 %indvar117, %tmp.122		; <i64> [#uses=1]
+	%tmp.124 = sext i32 %IA to i64		; <i64> [#uses=1]
+	%A_addr.653.rec = mul i64 %indvar117, %tmp.124		; <i64> [#uses=1]
+	%B_addr.652 = getelementptr float* %B_addr.1, i64 %B_addr.652.rec		; <float*> [#uses=1]
+	%A_addr.653 = getelementptr float* %A_addr.1, i64 %A_addr.653.rec		; <float*> [#uses=1]
+	%113 = load float* %A_addr.653, align 4		; <float> [#uses=1]
+	%114 = load float* %B_addr.652, align 4		; <float> [#uses=1]
+	%115 = fmul float %113, %114		; <float> [#uses=1]
+	%Sum0.2 = fadd float %115, %Sum0.255		; <float> [#uses=2]
+	%indvar.next118 = add i64 %indvar117, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %indvar.next118, %tmp.		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb16, label %bb14
+
+bb16:		; preds = %bb14, %bb13
+	%Sum0.2.lcssa = phi float [ %Sum0.254, %bb13 ], [ %Sum0.2, %bb14 ]		; <float> [#uses=1]
+	store float %Sum0.2.lcssa, float* %C, align 4
+	ret void
+}
diff --git a/final/test/CodeGen/X86/jump_sign.ll b/final/test/CodeGen/X86/jump_sign.ll
new file mode 100644
index 00000000000..5e8e1621736
--- /dev/null
+++ b/final/test/CodeGen/X86/jump_sign.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | grep jns
+
+define i32 @f(i32 %X) {
+entry:
+	%tmp1 = add i32 %X, 1		; <i32> [#uses=1]
+	%tmp = icmp slt i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %cond_true, label %cond_next
+
+cond_true:		; preds = %entry
+	%tmp2 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp3 = tail call i32 (...)* @baz( )		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @bar(...)
+
+declare i32 @baz(...)
diff --git a/final/test/CodeGen/X86/label-redefinition.ll b/final/test/CodeGen/X86/label-redefinition.ll
new file mode 100644
index 00000000000..9ad33e02976
--- /dev/null
+++ b/final/test/CodeGen/X86/label-redefinition.ll
@@ -0,0 +1,15 @@
+; PR7054
+; RUN: not llc %s -o - |& grep {'_foo' label emitted multiple times to assembly}
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0.0"
+
+define i32 @"\01_foo"() {
+  unreachable
+}
+
+define i32 @foo() {
+entry:
+  unreachable
+}
+
+declare i32 @xstat64(i32, i8*, i8*)
diff --git a/final/test/CodeGen/X86/large-gep-scale.ll b/final/test/CodeGen/X86/large-gep-scale.ll
new file mode 100644
index 00000000000..143294e8b07
--- /dev/null
+++ b/final/test/CodeGen/X86/large-gep-scale.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+; PR5281
+
+; After scaling, this type doesn't fit in memory. Codegen should generate
+; correct addressing still.
+
+; CHECK: shll $2, %edx
+
+define fastcc i32* @_ada_smkr([2147483647 x i32]* %u, i32 %t) nounwind {
+  %x = getelementptr [2147483647 x i32]* %u, i32 %t, i32 0
+  ret i32* %x
+}
diff --git a/final/test/CodeGen/X86/ldzero.ll b/final/test/CodeGen/X86/ldzero.ll
new file mode 100644
index 00000000000..dab04bc353c
--- /dev/null
+++ b/final/test/CodeGen/X86/ldzero.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s
+; verify PR 1700 is still fixed
+; ModuleID = 'hh.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define x86_fp80 @x() {
+entry:
+	%retval = alloca x86_fp80, align 16		; <x86_fp80*> [#uses=2]
+	%tmp = alloca x86_fp80, align 16		; <x86_fp80*> [#uses=2]
+	%d = alloca double, align 8		; <double*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store double 0.000000e+00, double* %d, align 8
+	%tmp1 = load double* %d, align 8		; <double> [#uses=1]
+	%tmp12 = fpext double %tmp1 to x86_fp80		; <x86_fp80> [#uses=1]
+	store x86_fp80 %tmp12, x86_fp80* %tmp, align 16
+	%tmp3 = load x86_fp80* %tmp, align 16		; <x86_fp80> [#uses=1]
+	store x86_fp80 %tmp3, x86_fp80* %retval, align 16
+	br label %return
+
+return:		; preds = %entry
+	%retval4 = load x86_fp80* %retval		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %retval4
+}
+
+define double @y() {
+entry:
+	%retval = alloca double, align 8		; <double*> [#uses=2]
+	%tmp = alloca double, align 8		; <double*> [#uses=2]
+	%ld = alloca x86_fp80, align 16		; <x86_fp80*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store x86_fp80 0xK00000000000000000000, x86_fp80* %ld, align 16
+	%tmp1 = load x86_fp80* %ld, align 16		; <x86_fp80> [#uses=1]
+	%tmp12 = fptrunc x86_fp80 %tmp1 to double		; <double> [#uses=1]
+	store double %tmp12, double* %tmp, align 8
+	%tmp3 = load double* %tmp, align 8		; <double> [#uses=1]
+	store double %tmp3, double* %retval, align 8
+	br label %return
+
+return:		; preds = %entry
+	%retval4 = load double* %retval		; <double> [#uses=1]
+	ret double %retval4
+}
diff --git a/final/test/CodeGen/X86/lea-2.ll b/final/test/CodeGen/X86/lea-2.ll
new file mode 100644
index 00000000000..69303507d6e
--- /dev/null
+++ b/final/test/CodeGen/X86/lea-2.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   grep {lea	EAX, DWORD PTR \\\[... + 4\\*... - 5\\\]}
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   not grep add
+
+define i32 @test1(i32 %A, i32 %B) {
+        %tmp1 = shl i32 %A, 2           ; <i32> [#uses=1]
+        %tmp3 = add i32 %B, -5          ; <i32> [#uses=1]
+        %tmp4 = add i32 %tmp3, %tmp1            ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+
diff --git a/final/test/CodeGen/X86/lea-3.ll b/final/test/CodeGen/X86/lea-3.ll
new file mode 100644
index 00000000000..040c5c25bd3
--- /dev/null
+++ b/final/test/CodeGen/X86/lea-3.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+
+; CHECK: leaq (,[[A0:%rdi|%rcx]],4), %rax
+define i64 @test2(i64 %a) {
+        %tmp2 = shl i64 %a, 2
+	%tmp3 = or i64 %tmp2, %a
+        ret i64 %tmp3
+}
+
+; CHECK: leal ([[A0]],[[A0]],2), %eax
+define i32 @test(i32 %a) {
+        %tmp2 = mul i32 %a, 3           ; <i32> [#uses=1]
+        ret i32 %tmp2
+}
+
+;; TODO!  LEA instead of shift + copy.
+define i64 @test3(i64 %a) {
+        %tmp2 = shl i64 %a, 3
+        ret i64 %tmp2
+}
+
diff --git a/final/test/CodeGen/X86/lea-4.ll b/final/test/CodeGen/X86/lea-4.ll
new file mode 100644
index 00000000000..2171204c01d
--- /dev/null
+++ b/final/test/CodeGen/X86/lea-4.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 | grep lea | count 2
+
+define zeroext i16 @t1(i32 %on_off) nounwind {
+entry:
+	%0 = sub i32 %on_off, 1
+	%1 = mul i32 %0, 2
+	%2 = trunc i32 %1 to i16
+	%3 = zext i16 %2 to i32
+	%4 = trunc i32 %3 to i16
+	ret i16 %4
+}
+
+define i32 @t2(i32 %on_off) nounwind {
+entry:
+	%0 = sub i32 %on_off, 1
+	%1 = mul i32 %0, 2
+        %2 = and i32 %1, 65535
+	ret i32 %2
+}
diff --git a/final/test/CodeGen/X86/lea-recursion.ll b/final/test/CodeGen/X86/lea-recursion.ll
new file mode 100644
index 00000000000..3f32fd27c5c
--- /dev/null
+++ b/final/test/CodeGen/X86/lea-recursion.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -march=x86-64 | grep lea | count 12
+
+; This testcase was written to demonstrate an instruction-selection problem,
+; however it also happens to expose a limitation in the DAGCombiner's
+; expression reassociation which causes it to miss opportunities for
+; constant folding due to the intermediate adds having multiple uses.
+; The Reassociate pass has similar limitations. If these limitations are
+; fixed, the test commands above will need to be updated to expect fewer
+; lea instructions.
+
+@g0 = weak global [1000 x i32] zeroinitializer, align 32		; <[1000 x i32]*> [#uses=8]
+@g1 = weak global [1000 x i32] zeroinitializer, align 32		; <[1000 x i32]*> [#uses=7]
+
+define void @foo() {
+entry:
+	%tmp4 = load i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 0)		; <i32> [#uses=1]
+	%tmp8 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 0)		; <i32> [#uses=1]
+	%tmp9 = add i32 %tmp4, 1		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=2]
+	store i32 %tmp10, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 1)
+	%tmp8.1 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 1)		; <i32> [#uses=1]
+	%tmp9.1 = add i32 %tmp10, 1		; <i32> [#uses=1]
+	%tmp10.1 = add i32 %tmp9.1, %tmp8.1		; <i32> [#uses=2]
+	store i32 %tmp10.1, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 2)
+	%tmp8.2 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 2)		; <i32> [#uses=1]
+	%tmp9.2 = add i32 %tmp10.1, 1		; <i32> [#uses=1]
+	%tmp10.2 = add i32 %tmp9.2, %tmp8.2		; <i32> [#uses=2]
+	store i32 %tmp10.2, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 3)
+	%tmp8.3 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 3)		; <i32> [#uses=1]
+	%tmp9.3 = add i32 %tmp10.2, 1		; <i32> [#uses=1]
+	%tmp10.3 = add i32 %tmp9.3, %tmp8.3		; <i32> [#uses=2]
+	store i32 %tmp10.3, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 4)
+	%tmp8.4 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 4)		; <i32> [#uses=1]
+	%tmp9.4 = add i32 %tmp10.3, 1		; <i32> [#uses=1]
+	%tmp10.4 = add i32 %tmp9.4, %tmp8.4		; <i32> [#uses=2]
+	store i32 %tmp10.4, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 5)
+	%tmp8.5 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 5)		; <i32> [#uses=1]
+	%tmp9.5 = add i32 %tmp10.4, 1		; <i32> [#uses=1]
+	%tmp10.5 = add i32 %tmp9.5, %tmp8.5		; <i32> [#uses=2]
+	store i32 %tmp10.5, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 6)
+	%tmp8.6 = load i32* getelementptr ([1000 x i32]* @g1, i32 0, i32 6)		; <i32> [#uses=1]
+	%tmp9.6 = add i32 %tmp10.5, 1		; <i32> [#uses=1]
+	%tmp10.6 = add i32 %tmp9.6, %tmp8.6		; <i32> [#uses=1]
+	store i32 %tmp10.6, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 7)
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/lea.ll b/final/test/CodeGen/X86/lea.ll
new file mode 100644
index 00000000000..542135529f1
--- /dev/null
+++ b/final/test/CodeGen/X86/lea.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+
+define i32 @test1(i32 %x) nounwind {
+        %tmp1 = shl i32 %x, 3
+        %tmp2 = add i32 %tmp1, 7
+        ret i32 %tmp2
+; CHECK: test1:
+; CHECK:    leal 7(,[[A0:%rdi|%rcx]],8), %eax
+}
+
+
+; ISel the add of -4 with a neg and use an lea for the rest of the
+; arithemtic.
+define i32 @test2(i32 %x_offs) nounwind readnone {
+entry:
+	%t0 = icmp sgt i32 %x_offs, 4
+	br i1 %t0, label %bb.nph, label %bb2
+
+bb.nph:
+	%tmp = add i32 %x_offs, -5
+	%tmp6 = lshr i32 %tmp, 2
+	%tmp7 = mul i32 %tmp6, -4
+	%tmp8 = add i32 %tmp7, %x_offs
+	%tmp9 = add i32 %tmp8, -4
+	ret i32 %tmp9
+
+bb2:
+	ret i32 %x_offs
+; CHECK: test2:
+; CHECK:	leal	-5([[A0]]), %eax
+; CHECK:	andl	$-4, %eax
+; CHECK:	negl	%eax
+; CHECK:	leal	-4([[A0]],%rax), %eax
+}
diff --git a/final/test/CodeGen/X86/leaf-fp-elim.ll b/final/test/CodeGen/X86/leaf-fp-elim.ll
new file mode 100644
index 00000000000..607dc72e2fa
--- /dev/null
+++ b/final/test/CodeGen/X86/leaf-fp-elim.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -disable-non-leaf-fp-elim -relocation-model=pic -mtriple=x86_64-apple-darwin | FileCheck %s
+; <rdar://problem/8170192>
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0"
+
+@msg = internal global i8* null                   ; <i8**> [#uses=1]
+@.str = private constant [2 x i8] c"x\00", align 1 ; <[2 x i8]*> [#uses=1]
+
+define void @test(i8* %p) nounwind optsize ssp {
+
+; No stack frame, please.
+; CHECK:     _test
+; CHECK-NOT: pushq %rbp
+; CHECK-NOT: movq %rsp, %rbp
+; CHECK:     InlineAsm Start
+
+entry:
+  %0 = icmp eq i8* %p, null                       ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  tail call void asm "mov $1, $0", "=*m,{cx},~{dirflag},~{fpsr},~{flags}"(i8** @msg, i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0)) nounwind
+  tail call void @llvm.trap()
+  unreachable
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+declare void @llvm.trap() nounwind
diff --git a/final/test/CodeGen/X86/legalize-fmp-oeq-vector-select.ll b/final/test/CodeGen/X86/legalize-fmp-oeq-vector-select.ll
new file mode 100644
index 00000000000..6a8c154a1bb
--- /dev/null
+++ b/final/test/CodeGen/X86/legalize-fmp-oeq-vector-select.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=x86-64 -enable-legalize-types-checking < %s
+; PR5092
+
+define <4 x float> @bug(float %a) nounwind {
+entry:
+  %cmp = fcmp oeq float %a, 0.000000e+00          ; <i1> [#uses=1]
+  %temp = select i1 %cmp, <4 x float> <float 1.000000e+00, float 0.000000e+00,
+float 0.000000e+00, float 0.000000e+00>, <4 x float> zeroinitializer
+  ret <4 x float> %temp
+}
+
diff --git a/final/test/CodeGen/X86/legalize-sub-zero-2.ll b/final/test/CodeGen/X86/legalize-sub-zero-2.ll
new file mode 100644
index 00000000000..f02ca715aee
--- /dev/null
+++ b/final/test/CodeGen/X86/legalize-sub-zero-2.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin
+
+define fastcc void @foo(i32 %type) nounwind optsize {
+entry:
+  switch i32 %type, label %bb26 [
+    i32 33634, label %bb11
+    i32 5121, label %bb27
+  ]
+
+bb11:                                             ; preds = %entry
+  br label %bb27
+
+bb26:                                             ; preds = %entry
+  unreachable
+
+bb27:                                             ; preds = %bb11, %entry
+  %srcpb.0 = phi i32 [ 1, %bb11 ], [ 0, %entry ]
+  br i1 undef, label %bb348, label %bb30.lr.ph
+
+bb30.lr.ph:                                       ; preds = %bb27
+  %.sum743 = shl i32 %srcpb.0, 1
+  %0 = mul i32 %srcpb.0, -2
+  %.sum745 = add i32 %.sum743, %0
+  br i1 undef, label %bb70, label %bb71
+
+bb70:                                             ; preds = %bb30.lr.ph
+  unreachable
+
+bb71:                                             ; preds = %bb30.lr.ph
+  br i1 undef, label %bb92, label %bb80
+
+bb80:                                             ; preds = %bb71
+  unreachable
+
+bb92:                                             ; preds = %bb71
+  %1 = getelementptr inbounds i8* undef, i32 %.sum745
+  unreachable
+
+bb348:                                            ; preds = %bb27
+  ret void
+}
diff --git a/final/test/CodeGen/X86/legalize-sub-zero.ll b/final/test/CodeGen/X86/legalize-sub-zero.ll
new file mode 100644
index 00000000000..ee76d468e81
--- /dev/null
+++ b/final/test/CodeGen/X86/legalize-sub-zero.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=i686-pc-win32
+
+;target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+;target triple = "i686-pc-win32"
+
+define void @test() nounwind {
+  %1 = fdiv <3 x double> zeroinitializer, undef
+  %2 = fdiv <2 x double> zeroinitializer, undef
+  %3 = shufflevector <2 x double> %2, <2 x double> undef, <3 x i32> <i32 0, i32
+1, i32 undef>
+  %4 = insertelement <3 x double> %3, double undef, i32 2
+  %5 = bitcast <3 x double> %1 to <3 x i64>
+  %6 = bitcast <3 x double> %4 to <3 x i64>
+  %7 = sub <3 x i64> %5, %6
+  %8 = shufflevector <3 x i64> %7, <3 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %9 = xor <2 x i64> %8, zeroinitializer
+  %10 = add nsw <2 x i64> %9, zeroinitializer
+  %11 = shufflevector <2 x i64> %10, <2 x i64> undef, <3 x i32> <i32 0, i32 1,
+i32 undef>
+  %12 = insertelement <3 x i64> %11, i64 0, i32 2
+  %13 = shufflevector <3 x i64> %12, <3 x i64> undef, <4 x i32> <i32 0, i32 1,
+i32 2, i32 3>
+  %14 = shufflevector <4 x i64> %13, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %15 = bitcast <2 x i64> %14 to <4 x i32>
+  %16 = shufflevector <4 x i32> %15, <4 x i32> undef, <4 x i32> <i32 0, i32 2,
+i32 0, i32 2>
+  %17 = bitcast <4 x i32> %16 to <2 x i64>
+  %18 = shufflevector <2 x i64> %17, <2 x i64> undef, <2 x i32> <i32 0, i32 2>
+  %19 = bitcast <2 x i64> %18 to <4 x i32>
+  %20 = shufflevector <4 x i32> %19, <4 x i32> undef, <3 x i32> <i32 0, i32 1,
+i32 2>
+  %21 = or <3 x i32> %20, zeroinitializer
+  store <3 x i32> %21, <3 x i32> addrspace(1)* undef, align 16
+  ret void
+}
diff --git a/final/test/CodeGen/X86/legalizedag_vec.ll b/final/test/CodeGen/X86/legalizedag_vec.ll
new file mode 100644
index 00000000000..dff693120fb
--- /dev/null
+++ b/final/test/CodeGen/X86/legalizedag_vec.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=sse2 | FileCheck %s
+
+
+; Test case for r63760 where we generate a legalization assert that an illegal
+; type has been inserted by LegalizeDAG after LegalizeType has run. With sse2,
+; v2i64 is a legal type but with mmx disabled, i64 is an illegal type. When
+; legalizing the divide in LegalizeDAG, we scalarize the vector divide and make
+; two 64 bit divide library calls which introduces i64 nodes that needs to be
+; promoted.
+
+define <2 x i64> @test_long_div(<2 x i64> %num, <2 x i64> %div) {
+  %div.r = sdiv <2 x i64> %num, %div
+  ret <2 x i64>  %div.r
+}
+
+; CHECK: call{{.*(divdi3|alldiv)}}
+; CHECK: call{{.*(divdi3|alldiv)}}
diff --git a/final/test/CodeGen/X86/lfence.ll b/final/test/CodeGen/X86/lfence.ll
new file mode 100644
index 00000000000..7a96ca30e75
--- /dev/null
+++ b/final/test/CodeGen/X86/lfence.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep lfence
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+	call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 false, i1 true)
+	ret void
+}
diff --git a/final/test/CodeGen/X86/licm-nested.ll b/final/test/CodeGen/X86/licm-nested.ll
new file mode 100644
index 00000000000..b0105ac533b
--- /dev/null
+++ b/final/test/CodeGen/X86/licm-nested.ll
@@ -0,0 +1,89 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -march=x86-64 < %s -o /dev/null -stats -info-output-file - | grep machine-licm | grep 3
+
+; MachineLICM should be able to hoist the symbolic addresses out of
+; the inner loops.
+
+@main.flags = internal global [8193 x i8] zeroinitializer, align 16 ; <[8193 x i8]*> [#uses=3]
+@.str = private constant [11 x i8] c"Count: %d\0A\00" ; <[11 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  %cmp = icmp eq i32 %argc, 2                     ; <i1> [#uses=1]
+  br i1 %cmp, label %while.cond.preheader, label %bb.nph53
+
+while.cond.preheader:                             ; preds = %entry
+  %arrayidx = getelementptr inbounds i8** %argv, i64 1 ; <i8**> [#uses=1]
+  %tmp2 = load i8** %arrayidx                     ; <i8*> [#uses=1]
+  %call = tail call i32 @atoi(i8* %tmp2) nounwind ; <i32> [#uses=2]
+  %tobool51 = icmp eq i32 %call, 0                ; <i1> [#uses=1]
+  br i1 %tobool51, label %while.end, label %bb.nph53
+
+while.cond.loopexit:                              ; preds = %for.inc35
+  %indvar.next77 = add i32 %indvar76, 1           ; <i32> [#uses=2]
+  %exitcond78 = icmp eq i32 %indvar.next77, %NUM.0.ph80 ; <i1> [#uses=1]
+  br i1 %exitcond78, label %while.end, label %bb.nph
+
+bb.nph53:                                         ; preds = %entry, %while.cond.preheader
+  %NUM.0.ph80 = phi i32 [ %call, %while.cond.preheader ], [ 17000, %entry ] ; <i32> [#uses=1]
+  br label %bb.nph
+
+bb.nph:                                           ; preds = %while.cond.loopexit, %bb.nph53
+  %indvar76 = phi i32 [ 0, %bb.nph53 ], [ %indvar.next77, %while.cond.loopexit ] ; <i32> [#uses=1]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %bb.nph
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ] ; <i64> [#uses=2]
+  %tmp = add i64 %indvar, 2                       ; <i64> [#uses=1]
+  %arrayidx10 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp ; <i8*> [#uses=1]
+  store i8 1, i8* %arrayidx10
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next, 8191      ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.body15, label %for.body
+
+for.body15:                                       ; preds = %for.body, %for.inc35
+  %indvar57 = phi i64 [ %indvar.next58, %for.inc35 ], [ 0, %for.body ] ; <i64> [#uses=4]
+  %count.248 = phi i32 [ %count.1, %for.inc35 ], [ 0, %for.body ] ; <i32> [#uses=2]
+  %tmp68 = add i64 %indvar57, 2                   ; <i64> [#uses=2]
+  %tmp70 = mul i64 %indvar57, 3                   ; <i64> [#uses=1]
+  %tmp71 = add i64 %tmp70, 6                      ; <i64> [#uses=1]
+  %tmp73 = shl i64 %indvar57, 1                   ; <i64> [#uses=1]
+  %add = add i64 %tmp73, 4                        ; <i64> [#uses=2]
+  %arrayidx17 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp68 ; <i8*> [#uses=1]
+  %tmp18 = load i8* %arrayidx17                   ; <i8> [#uses=1]
+  %tobool19 = icmp eq i8 %tmp18, 0                ; <i1> [#uses=1]
+  br i1 %tobool19, label %for.inc35, label %if.then
+
+if.then:                                          ; preds = %for.body15
+  %cmp2443 = icmp slt i64 %add, 8193              ; <i1> [#uses=1]
+  br i1 %cmp2443, label %for.body25, label %for.end32
+
+for.body25:                                       ; preds = %if.then, %for.body25
+  %indvar55 = phi i64 [ %indvar.next56, %for.body25 ], [ 0, %if.then ] ; <i64> [#uses=2]
+  %tmp60 = mul i64 %tmp68, %indvar55              ; <i64> [#uses=2]
+  %tmp75 = add i64 %add, %tmp60                   ; <i64> [#uses=1]
+  %arrayidx27 = getelementptr [8193 x i8]* @main.flags, i64 0, i64 %tmp75 ; <i8*> [#uses=1]
+  store i8 0, i8* %arrayidx27
+  %add31 = add i64 %tmp71, %tmp60                 ; <i64> [#uses=1]
+  %cmp24 = icmp slt i64 %add31, 8193              ; <i1> [#uses=1]
+  %indvar.next56 = add i64 %indvar55, 1           ; <i64> [#uses=1]
+  br i1 %cmp24, label %for.body25, label %for.end32
+
+for.end32:                                        ; preds = %for.body25, %if.then
+  %inc34 = add nsw i32 %count.248, 1              ; <i32> [#uses=1]
+  br label %for.inc35
+
+for.inc35:                                        ; preds = %for.body15, %for.end32
+  %count.1 = phi i32 [ %inc34, %for.end32 ], [ %count.248, %for.body15 ] ; <i32> [#uses=2]
+  %indvar.next58 = add i64 %indvar57, 1           ; <i64> [#uses=2]
+  %exitcond67 = icmp eq i64 %indvar.next58, 8191  ; <i1> [#uses=1]
+  br i1 %exitcond67, label %while.cond.loopexit, label %for.body15
+
+while.end:                                        ; preds = %while.cond.loopexit, %while.cond.preheader
+  %count.0.lcssa = phi i32 [ 0, %while.cond.preheader ], [ %count.1, %while.cond.loopexit ] ; <i32> [#uses=1]
+  %call40 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i64 0, i64 0), i32 %count.0.lcssa) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @atoi(i8* nocapture) nounwind readonly
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/final/test/CodeGen/X86/licm-symbol.ll b/final/test/CodeGen/X86/licm-symbol.ll
new file mode 100644
index 00000000000..c3d1938e9db
--- /dev/null
+++ b/final/test/CodeGen/X86/licm-symbol.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s | FileCheck %s
+
+; MachineLICM should be able to hoist the sF reference out of the loop.
+
+; CHECK: pushl %esi
+; CHECK: pushl
+; CHECK: movl  $176, %esi
+; CHECK: addl  L___sF$non_lazy_ptr, %esi
+; CHECK: .align  4, 0x90
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin8"
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+%struct.gcov_ctr_summary = type { i32, i32, i64, i64, i64 }
+%struct.gcov_summary = type { i32, [1 x %struct.gcov_ctr_summary] }
+
+@__sF = external global [0 x %struct.FILE]        ; <[0 x %struct.FILE]*> [#uses=1]
+
+declare i32 @fprintf(%struct.FILE* nocapture) nounwind
+
+define void @gcov_exit() nounwind {
+entry:
+  br label %bb151
+
+bb151:                                            ; preds = %bb59, %bb56, %bb14
+  br i1 undef, label %bb56, label %bb59
+
+bb56:                                             ; preds = %bb151
+  %t0 = call i32 (%struct.FILE*)* @fprintf(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2)) nounwind
+  br label %bb151
+
+bb59:                                             ; preds = %bb151
+  %t1 = call i32 (%struct.FILE*)* @fprintf(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2)) nounwind
+  br label %bb151
+}
+
diff --git a/final/test/CodeGen/X86/limited-prec.ll b/final/test/CodeGen/X86/limited-prec.ll
new file mode 100644
index 00000000000..7bf4ac28fdf
--- /dev/null
+++ b/final/test/CodeGen/X86/limited-prec.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -limit-float-precision=6 -march=x86 | \
+; RUN:    not grep exp | not grep log | not grep pow
+; RUN: llc < %s -limit-float-precision=12 -march=x86 | \
+; RUN:    not grep exp | not grep log | not grep pow
+; RUN: llc < %s -limit-float-precision=18 -march=x86 | \
+; RUN:    not grep exp | not grep log | not grep pow
+
+define float @f1(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.exp.f32(float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.exp.f32(float) nounwind readonly
+
+define float @f2(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.exp2.f32(float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.exp2.f32(float) nounwind readonly
+
+define float @f3(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.pow.f32(float 1.000000e+01, float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.pow.f32(float, float) nounwind readonly
+
+define float @f4(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.log.f32(float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.log.f32(float) nounwind readonly
+
+define float @f5(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.log2.f32(float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.log2.f32(float) nounwind readonly
+
+define float @f6(float %x) nounwind noinline {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = call float @llvm.log10.f32(float %x)		; <float> [#uses=1]
+	ret float %0
+}
+
+declare float @llvm.log10.f32(float) nounwind readonly
diff --git a/final/test/CodeGen/X86/live-out-reg-info.ll b/final/test/CodeGen/X86/live-out-reg-info.ll
new file mode 100644
index 00000000000..8cd9774983b
--- /dev/null
+++ b/final/test/CodeGen/X86/live-out-reg-info.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86-64 | grep testb
+
+; Make sure dagcombine doesn't eliminate the comparison due
+; to an off-by-one bug with ComputeMaskedBits information.
+
+declare void @qux()
+
+define void @foo(i32 %a) {
+  %t0 = lshr i32 %a, 23
+  br label %next
+next:
+  %t1 = and i32 %t0, 256
+  %t2 = icmp eq i32 %t1, 0
+  br i1 %t2, label %true, label %false
+true:
+  call void @qux()
+  ret void
+false:
+  ret void
+}
diff --git a/final/test/CodeGen/X86/liveness-local-regalloc.ll b/final/test/CodeGen/X86/liveness-local-regalloc.ll
new file mode 100644
index 00000000000..b469d0837dc
--- /dev/null
+++ b/final/test/CodeGen/X86/liveness-local-regalloc.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -O3 -regalloc=fast -mtriple=x86_64-apple-darwin10
+; <rdar://problem/7755473>
+
+%0 = type { i32, i8*, i8*, %1*, i8*, i64, i64, i32, i32, i32, i32, [1024 x i8] }
+%1 = type { i8*, i32, i32, i16, i16, %2, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %2, %3*, i32, [3 x i8], [1 x i8], %2, i32, i64 }
+%2 = type { i8*, i32 }
+%3 = type opaque
+
+declare fastcc i32 @func(%0*, i32, i32) nounwind ssp
+
+define fastcc void @func2(%0* %arg, i32 %arg1) nounwind ssp {
+bb:
+  br label %.exit3
+
+.exit3:                                           ; preds = %.exit3, %bb
+  switch i32 undef, label %.exit3 [
+    i32 -1, label %.loopexit
+    i32 37, label %bb2
+  ]
+
+bb2:                                              ; preds = %bb5, %bb3, %.exit3
+  br i1 undef, label %bb3, label %bb5
+
+bb3:                                              ; preds = %bb2
+  switch i32 undef, label %infloop [
+    i32 125, label %.loopexit
+    i32 -1, label %bb4
+    i32 37, label %bb2
+  ]
+
+bb4:                                              ; preds = %bb3
+  %tmp = add nsw i32 undef, 1                     ; <i32> [#uses=1]
+  br label %.loopexit
+
+bb5:                                              ; preds = %bb2
+  switch i32 undef, label %infloop1 [
+    i32 -1, label %.loopexit
+    i32 37, label %bb2
+  ]
+
+.loopexit:                                        ; preds = %bb5, %bb4, %bb3, %.exit3
+  %.04 = phi i32 [ %tmp, %bb4 ], [ undef, %bb3 ], [ undef, %.exit3 ], [ undef, %bb5 ] ; <i32> [#uses=2]
+  br i1 undef, label %bb8, label %bb6
+
+bb6:                                              ; preds = %.loopexit
+  %tmp7 = tail call fastcc i32 @func(%0* %arg, i32 %.04, i32 undef) nounwind ssp ; <i32> [#uses=0]
+  ret void
+
+bb8:                                              ; preds = %.loopexit
+  %tmp9 = sext i32 %.04 to i64                    ; <i64> [#uses=1]
+  %tmp10 = getelementptr inbounds %0* %arg, i64 0, i32 11, i64 %tmp9 ; <i8*> [#uses=1]
+  store i8 0, i8* %tmp10, align 1
+  ret void
+
+infloop:                                          ; preds = %infloop, %bb3
+  br label %infloop
+
+infloop1:                                         ; preds = %infloop1, %bb5
+  br label %infloop1
+}
diff --git a/final/test/CodeGen/X86/lock-inst-encoding.ll b/final/test/CodeGen/X86/lock-inst-encoding.ll
new file mode 100644
index 00000000000..03468e2b3f4
--- /dev/null
+++ b/final/test/CodeGen/X86/lock-inst-encoding.ll
@@ -0,0 +1,22 @@
+; RUN: llc -O0 --show-mc-encoding < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK: f0:
+; CHECK: addq %rax, (%rdi)
+; CHECK: # encoding: [0xf0,0x48,0x01,0x07]
+; CHECK: ret
+define void @f0(i64* %a0) {
+  %t0 = and i64 1, 1
+  call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) nounwind
+  %1 = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %a0, i64 %t0) nounwind
+  call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true) nounwind
+  ret void
+}
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind
+
+declare i32 @llvm.atomic.load.and.i32.p0i32(i32* nocapture, i32) nounwind
+
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind
diff --git a/final/test/CodeGen/X86/long-setcc.ll b/final/test/CodeGen/X86/long-setcc.ll
new file mode 100644
index 00000000000..e0165fb01b5
--- /dev/null
+++ b/final/test/CodeGen/X86/long-setcc.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | grep cmp | count 1
+; RUN: llc < %s -march=x86 | grep shr | count 1
+; RUN: llc < %s -march=x86 | grep xor | count 1
+
+define i1 @t1(i64 %x) nounwind {
+	%B = icmp slt i64 %x, 0
+	ret i1 %B
+}
+
+define i1 @t2(i64 %x) nounwind {
+	%tmp = icmp ult i64 %x, 4294967296
+	ret i1 %tmp
+}
+
+define i1 @t3(i32 %x) nounwind {
+	%tmp = icmp ugt i32 %x, -1
+	ret i1 %tmp
+}
diff --git a/final/test/CodeGen/X86/longlong-deadload.ll b/final/test/CodeGen/X86/longlong-deadload.ll
new file mode 100644
index 00000000000..9a4c8f21237
--- /dev/null
+++ b/final/test/CodeGen/X86/longlong-deadload.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 | not grep '4{(%...)}
+; This should not load or store the top part of *P.
+
+define void @test(i64* %P) nounwind  {
+entry:
+	%tmp1 = load i64* %P, align 8		; <i64> [#uses=1]
+	%tmp2 = xor i64 %tmp1, 1		; <i64> [#uses=1]
+	store i64 %tmp2, i64* %P, align 8
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/loop-blocks.ll b/final/test/CodeGen/X86/loop-blocks.ll
new file mode 100644
index 00000000000..faba6300712
--- /dev/null
+++ b/final/test/CodeGen/X86/loop-blocks.ll
@@ -0,0 +1,208 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false | FileCheck %s
+
+; These tests check for loop branching structure, and that the loop align
+; directive is placed in the expected place.
+
+; CodeGen should insert a branch into the middle of the loop in
+; order to avoid a branch within the loop.
+
+; CHECK: simple:
+;      CHECK:   jmp   .LBB0_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB0_2:
+; CHECK-NEXT:   callq loop_latch
+; CHECK-NEXT: .LBB0_1:
+; CHECK-NEXT:   callq loop_header
+
+define void @simple() nounwind {
+entry:
+  br label %loop
+
+loop:
+  call void @loop_header()
+  %t0 = tail call i32 @get()
+  %t1 = icmp slt i32 %t0, 0
+  br i1 %t1, label %done, label %bb
+
+bb:
+  call void @loop_latch()
+  br label %loop
+
+done:
+  call void @exit()
+  ret void
+}
+
+; CodeGen should move block_a to the top of the loop so that it
+; falls through into the loop, avoiding a branch within the loop.
+
+; CHECK: slightly_more_involved:
+;      CHECK:   jmp .LBB1_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB1_4:
+; CHECK-NEXT:   callq bar99
+; CHECK-NEXT: .LBB1_1:
+; CHECK-NEXT:   callq body
+
+define void @slightly_more_involved() nounwind {
+entry:
+  br label %loop
+
+loop:
+  call void @body()
+  %t0 = call i32 @get()
+  %t1 = icmp slt i32 %t0, 2
+  br i1 %t1, label %block_a, label %bb
+
+bb:
+  %t2 = call i32 @get()
+  %t3 = icmp slt i32 %t2, 99
+  br i1 %t3, label %exit, label %loop
+
+block_a:
+  call void @bar99()
+  br label %loop
+
+exit:
+  call void @exit()
+  ret void
+}
+
+; Same as slightly_more_involved, but block_a is now a CFG diamond with
+; fallthrough edges which should be preserved.
+; "callq block_a_merge_func" is tail duped.
+
+; CHECK: yet_more_involved:
+;      CHECK:   jmp .LBB2_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB2_4:
+; CHECK-NEXT:   callq bar99
+; CHECK-NEXT:   callq get
+; CHECK-NEXT:   cmpl $2999, %eax
+; CHECK-NEXT:   jle .LBB2_5
+; CHECK-NEXT:   callq block_a_false_func
+; CHECK-NEXT:   callq block_a_merge_func
+; CHECK-NEXT:   jmp .LBB2_1
+; CHECK-NEXT: .LBB2_5:
+; CHECK-NEXT:   callq block_a_true_func
+; CHECK-NEXT:   callq block_a_merge_func
+; CHECK-NEXT: .LBB2_1:
+; CHECK-NEXT:   callq body
+
+define void @yet_more_involved() nounwind {
+entry:
+  br label %loop
+
+loop:
+  call void @body()
+  %t0 = call i32 @get()
+  %t1 = icmp slt i32 %t0, 2
+  br i1 %t1, label %block_a, label %bb
+
+bb:
+  %t2 = call i32 @get()
+  %t3 = icmp slt i32 %t2, 99
+  br i1 %t3, label %exit, label %loop
+
+block_a:
+  call void @bar99()
+  %z0 = call i32 @get()
+  %z1 = icmp slt i32 %z0, 3000
+  br i1 %z1, label %block_a_true, label %block_a_false
+
+block_a_true:
+  call void @block_a_true_func()
+  br label %block_a_merge
+
+block_a_false:
+  call void @block_a_false_func()
+  br label %block_a_merge
+
+block_a_merge:
+  call void @block_a_merge_func()
+  br label %loop
+
+exit:
+  call void @exit()
+  ret void
+}
+
+; CodeGen should move the CFG islands that are part of the loop but don't
+; conveniently fit anywhere so that they are at least contiguous with the
+; loop.
+
+; CHECK: cfg_islands:
+;      CHECK:   jmp     .LBB3_1
+; CHECK-NEXT:   align
+; CHECK-NEXT: .LBB3_7:
+; CHECK-NEXT:   callq   bar100
+; CHECK-NEXT:   jmp     .LBB3_1
+; CHECK-NEXT: .LBB3_8:
+; CHECK-NEXT:   callq   bar101
+; CHECK-NEXT:   jmp     .LBB3_1
+; CHECK-NEXT: .LBB3_9:
+; CHECK-NEXT:   callq   bar102
+; CHECK-NEXT:   jmp     .LBB3_1
+; CHECK-NEXT: .LBB3_5:
+; CHECK-NEXT:   callq   loop_latch
+; CHECK-NEXT: .LBB3_1:
+; CHECK-NEXT:   callq   loop_header
+
+define void @cfg_islands() nounwind {
+entry:
+  br label %loop
+
+loop:
+  call void @loop_header()
+  %t0 = call i32 @get()
+  %t1 = icmp slt i32 %t0, 100
+  br i1 %t1, label %block100, label %bb
+
+bb:
+  %t2 = call i32 @get()
+  %t3 = icmp slt i32 %t2, 101
+  br i1 %t3, label %block101, label %bb1
+
+bb1:
+  %t4 = call i32 @get()
+  %t5 = icmp slt i32 %t4, 102
+  br i1 %t5, label %block102, label %bb2
+
+bb2:
+  %t6 = call i32 @get()
+  %t7 = icmp slt i32 %t6, 103
+  br i1 %t7, label %exit, label %bb3
+
+bb3:
+  call void @loop_latch()
+  br label %loop
+
+exit:
+  call void @exit()
+  ret void
+
+block100:
+  call void @bar100()
+  br label %loop
+
+block101:
+  call void @bar101()
+  br label %loop
+
+block102:
+  call void @bar102()
+  br label %loop
+}
+
+declare void @bar99() nounwind
+declare void @bar100() nounwind
+declare void @bar101() nounwind
+declare void @bar102() nounwind
+declare void @body() nounwind
+declare void @exit() nounwind
+declare void @loop_header() nounwind
+declare void @loop_latch() nounwind
+declare i32 @get() nounwind
+declare void @block_a_true_func() nounwind
+declare void @block_a_false_func() nounwind
+declare void @block_a_merge_func() nounwind
diff --git a/final/test/CodeGen/X86/loop-hoist.ll b/final/test/CodeGen/X86/loop-hoist.ll
new file mode 100644
index 00000000000..c103e29f3bf
--- /dev/null
+++ b/final/test/CodeGen/X86/loop-hoist.ll
@@ -0,0 +1,27 @@
+; LSR should hoist the load from the "Arr" stub out of the loop.
+
+; RUN: llc < %s -relocation-model=dynamic-no-pic -mtriple=i686-apple-darwin8.7.2 | FileCheck %s
+
+; CHECK: _foo:
+; CHECK:    L_Arr$non_lazy_ptr
+; CHECK: LBB0_1:
+
+@Arr = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
+
+define void @foo(i32 %N.in, i32 %x) nounwind {
+entry:
+	%N = bitcast i32 %N.in to i32		; <i32> [#uses=1]
+	br label %cond_true
+
+cond_true:		; preds = %cond_true, %entry
+	%indvar = phi i32 [ %x, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%i.0.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]
+	%tmp = getelementptr [0 x i32]* @Arr, i32 0, i32 %i.0.0		; <i32*> [#uses=1]
+	store i32 %i.0.0, i32* %tmp
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %cond_true
+
+return:		; preds = %cond_true
+	ret void
+}
diff --git a/final/test/CodeGen/X86/loop-strength-reduce-2.ll b/final/test/CodeGen/X86/loop-strength-reduce-2.ll
new file mode 100644
index 00000000000..b546462b684
--- /dev/null
+++ b/final/test/CodeGen/X86/loop-strength-reduce-2.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
+; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s -check-prefix=STATIC
+;
+; Make sure the common loop invariant A is hoisted up to preheader,
+; since too many registers are needed to subsume it into the addressing modes.
+; It's safe to sink A in when it's not pic.
+
+; PIC:  align
+; PIC:  movl  $4, -4([[REG:%e[a-z]+]])
+; PIC:  movl  $5, ([[REG]])
+; PIC:  addl  $4, [[REG]]
+; PIC:  decl  {{%e[[a-z]+}}
+; PIC:  jne
+
+; STATIC: align
+; STATIC: movl  $4, -4(%ecx)
+; STATIC: movl  $5, (%ecx)
+; STATIC: addl  $4, %ecx
+; STATIC: decl  %eax
+; STATIC: jne
+
+@A = global [16 x [16 x i32]] zeroinitializer, align 32		; <[16 x [16 x i32]]*> [#uses=2]
+
+define void @test(i32 %row, i32 %N.in) nounwind {
+entry:
+	%N = bitcast i32 %N.in to i32		; <i32> [#uses=1]
+	%tmp5 = icmp sgt i32 %N.in, 0		; <i1> [#uses=1]
+	br i1 %tmp5, label %cond_true, label %return
+
+cond_true:		; preds = %cond_true, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%i.0.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]
+	%tmp2 = add i32 %i.0.0, 1		; <i32> [#uses=1]
+	%tmp = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2		; <i32*> [#uses=1]
+	store i32 4, i32* %tmp
+	%tmp5.upgrd.1 = add i32 %i.0.0, 2		; <i32> [#uses=1]
+	%tmp7 = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1		; <i32*> [#uses=1]
+	store i32 5, i32* %tmp7
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %cond_true
+
+return:		; preds = %cond_true, %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/loop-strength-reduce-3.ll b/final/test/CodeGen/X86/loop-strength-reduce-3.ll
new file mode 100644
index 00000000000..b1c9fb9c077
--- /dev/null
+++ b/final/test/CodeGen/X86/loop-strength-reduce-3.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s
+
+; CHECK: align
+; CHECK: movl  $4, -4(%ecx)
+; CHECK: movl  $5, (%ecx)
+; CHECK: addl  $4, %ecx
+; CHECK: decl  %eax
+; CHECK: jne
+
+@A = global [16 x [16 x i32]] zeroinitializer, align 32		; <[16 x [16 x i32]]*> [#uses=2]
+
+define void @test(i32 %row, i32 %N.in) nounwind {
+entry:
+	%N = bitcast i32 %N.in to i32		; <i32> [#uses=1]
+	%tmp5 = icmp sgt i32 %N.in, 0		; <i1> [#uses=1]
+	br i1 %tmp5, label %cond_true, label %return
+
+cond_true:		; preds = %cond_true, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%i.0.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]
+	%tmp2 = add i32 %i.0.0, 1		; <i32> [#uses=1]
+	%tmp = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2		; <i32*> [#uses=1]
+	store i32 4, i32* %tmp
+	%tmp5.upgrd.1 = add i32 %i.0.0, 2		; <i32> [#uses=1]
+	%tmp7 = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1		; <i32*> [#uses=1]
+	store i32 5, i32* %tmp7
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %cond_true
+
+return:		; preds = %cond_true, %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/loop-strength-reduce.ll b/final/test/CodeGen/X86/loop-strength-reduce.ll
new file mode 100644
index 00000000000..42c6ac4983d
--- /dev/null
+++ b/final/test/CodeGen/X86/loop-strength-reduce.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s
+
+; CHECK: align
+; CHECK: movl  $4, -4(%ecx)
+; CHECK: movl  $5, (%ecx)
+; CHECK: addl  $4, %ecx
+; CHECK: decl  %eax
+; CHECK: jne
+
+@A = internal global [16 x [16 x i32]] zeroinitializer, align 32		; <[16 x [16 x i32]]*> [#uses=2]
+
+define void @test(i32 %row, i32 %N.in) nounwind {
+entry:
+	%N = bitcast i32 %N.in to i32		; <i32> [#uses=1]
+	%tmp5 = icmp sgt i32 %N.in, 0		; <i1> [#uses=1]
+	br i1 %tmp5, label %cond_true, label %return
+
+cond_true:		; preds = %cond_true, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%i.0.0 = bitcast i32 %indvar to i32		; <i32> [#uses=2]
+	%tmp2 = add i32 %i.0.0, 1		; <i32> [#uses=1]
+	%tmp = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp2		; <i32*> [#uses=1]
+	store i32 4, i32* %tmp
+	%tmp5.upgrd.1 = add i32 %i.0.0, 2		; <i32> [#uses=1]
+	%tmp7 = getelementptr [16 x [16 x i32]]* @A, i32 0, i32 %row, i32 %tmp5.upgrd.1		; <i32*> [#uses=1]
+	store i32 5, i32* %tmp7
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %cond_true
+
+return:		; preds = %cond_true, %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/loop-strength-reduce2.ll b/final/test/CodeGen/X86/loop-strength-reduce2.ll
new file mode 100644
index 00000000000..9b53adb2a36
--- /dev/null
+++ b/final/test/CodeGen/X86/loop-strength-reduce2.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic | grep {\$pb} | grep mov
+;
+; Make sure the PIC label flags2-"L1$pb" is not moved up to the preheader.
+
+@flags2 = internal global [8193 x i8] zeroinitializer, align 32		; <[8193 x i8]*> [#uses=1]
+
+define void @test(i32 %k, i32 %i) nounwind {
+entry:
+	%k_addr.012 = shl i32 %i, 1		; <i32> [#uses=1]
+	%tmp14 = icmp sgt i32 %k_addr.012, 8192		; <i1> [#uses=1]
+	br i1 %tmp14, label %return, label %bb
+
+bb:		; preds = %bb, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%tmp. = shl i32 %i, 1		; <i32> [#uses=1]
+	%tmp.15 = mul i32 %indvar, %i		; <i32> [#uses=1]
+	%tmp.16 = add i32 %tmp.15, %tmp.		; <i32> [#uses=2]
+	%k_addr.0.0 = bitcast i32 %tmp.16 to i32		; <i32> [#uses=1]
+	%gep.upgrd.1 = zext i32 %tmp.16 to i64		; <i64> [#uses=1]
+	%tmp = getelementptr [8193 x i8]* @flags2, i32 0, i64 %gep.upgrd.1		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp
+	%k_addr.0 = add i32 %k_addr.0.0, %i		; <i32> [#uses=1]
+	%tmp.upgrd.2 = icmp sgt i32 %k_addr.0, 8192		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp.upgrd.2, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/loop-strength-reduce3.ll b/final/test/CodeGen/X86/loop-strength-reduce3.ll
new file mode 100644
index 00000000000..c45a3741119
--- /dev/null
+++ b/final/test/CodeGen/X86/loop-strength-reduce3.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=x86 | grep cmp | grep 240
+; RUN: llc < %s -march=x86 | grep inc | count 1
+
+define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) nounwind {
+entry:
+	%tmp2955 = icmp sgt i32 %C, 0		; <i1> [#uses=1]
+	br i1 %tmp2955, label %bb26.outer.us, label %bb40.split
+
+bb26.outer.us:		; preds = %bb26.bb32_crit_edge.us, %entry
+	%i.044.0.ph.us = phi i32 [ 0, %entry ], [ %indvar.next57, %bb26.bb32_crit_edge.us ]		; <i32> [#uses=2]
+	%k.1.ph.us = phi i32 [ 0, %entry ], [ %k.0.us, %bb26.bb32_crit_edge.us ]		; <i32> [#uses=1]
+	%tmp3.us = mul i32 %i.044.0.ph.us, 6		; <i32> [#uses=1]
+	br label %bb1.us
+
+bb1.us:		; preds = %bb1.us, %bb26.outer.us
+	%j.053.us = phi i32 [ 0, %bb26.outer.us ], [ %tmp25.us, %bb1.us ]		; <i32> [#uses=2]
+	%k.154.us = phi i32 [ %k.1.ph.us, %bb26.outer.us ], [ %k.0.us, %bb1.us ]		; <i32> [#uses=1]
+	%tmp5.us = add i32 %tmp3.us, %j.053.us		; <i32> [#uses=1]
+	%tmp7.us = shl i32 %D, %tmp5.us		; <i32> [#uses=2]
+	%tmp9.us = icmp eq i32 %tmp7.us, %B		; <i1> [#uses=1]
+	%tmp910.us = zext i1 %tmp9.us to i32		; <i32> [#uses=1]
+	%tmp12.us = and i32 %tmp7.us, %A		; <i32> [#uses=1]
+	%tmp19.us = and i32 %tmp12.us, %tmp910.us		; <i32> [#uses=1]
+	%k.0.us = add i32 %tmp19.us, %k.154.us		; <i32> [#uses=3]
+	%tmp25.us = add i32 %j.053.us, 1		; <i32> [#uses=2]
+	%tmp29.us = icmp slt i32 %tmp25.us, %C		; <i1> [#uses=1]
+	br i1 %tmp29.us, label %bb1.us, label %bb26.bb32_crit_edge.us
+
+bb26.bb32_crit_edge.us:		; preds = %bb1.us
+	%indvar.next57 = add i32 %i.044.0.ph.us, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next57, 40		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb40.split, label %bb26.outer.us
+
+bb40.split:		; preds = %bb26.bb32_crit_edge.us, %entry
+	%k.1.lcssa.lcssa.us-lcssa = phi i32 [ %k.0.us, %bb26.bb32_crit_edge.us ], [ 0, %entry ]		; <i32> [#uses=1]
+	ret i32 %k.1.lcssa.lcssa.us-lcssa
+}
diff --git a/final/test/CodeGen/X86/loop-strength-reduce4.ll b/final/test/CodeGen/X86/loop-strength-reduce4.ll
new file mode 100644
index 00000000000..6556fdeea83
--- /dev/null
+++ b/final/test/CodeGen/X86/loop-strength-reduce4.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=STATIC
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=PIC
+
+; By starting the IV at -64 instead of 0, a cmp is eliminated,
+; as the flags from the add can be used directly.
+
+; STATIC: movl    $-64, %ecx
+
+; STATIC: movl    %eax, _state+76(%ecx)
+; STATIC: addl    $16, %ecx
+; STATIC: jne
+
+; In PIC mode the symbol can't be folded, so the change-compare-stride
+; trick applies.
+
+; PIC: cmpl $64
+
+@state = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
+@S = external global [0 x i32]		; <[0 x i32]*> [#uses=4]
+
+define i32 @foo() nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%t.063.0 = phi i32 [ 0, %entry ], [ %tmp47, %bb ]		; <i32> [#uses=1]
+	%j.065.0 = shl i32 %indvar, 2		; <i32> [#uses=4]
+	%tmp3 = getelementptr [0 x i32]* @state, i32 0, i32 %j.065.0		; <i32*> [#uses=2]
+	%tmp4 = load i32* %tmp3, align 4		; <i32> [#uses=1]
+	%tmp6 = getelementptr [0 x i32]* @S, i32 0, i32 %t.063.0		; <i32*> [#uses=1]
+	%tmp7 = load i32* %tmp6, align 4		; <i32> [#uses=1]
+	%tmp8 = xor i32 %tmp7, %tmp4		; <i32> [#uses=2]
+	store i32 %tmp8, i32* %tmp3, align 4
+	%tmp1378 = or i32 %j.065.0, 1		; <i32> [#uses=1]
+	%tmp16 = getelementptr [0 x i32]* @state, i32 0, i32 %tmp1378		; <i32*> [#uses=2]
+	%tmp17 = load i32* %tmp16, align 4		; <i32> [#uses=1]
+	%tmp19 = getelementptr [0 x i32]* @S, i32 0, i32 %tmp8		; <i32*> [#uses=1]
+	%tmp20 = load i32* %tmp19, align 4		; <i32> [#uses=1]
+	%tmp21 = xor i32 %tmp20, %tmp17		; <i32> [#uses=2]
+	store i32 %tmp21, i32* %tmp16, align 4
+	%tmp2680 = or i32 %j.065.0, 2		; <i32> [#uses=1]
+	%tmp29 = getelementptr [0 x i32]* @state, i32 0, i32 %tmp2680		; <i32*> [#uses=2]
+	%tmp30 = load i32* %tmp29, align 4		; <i32> [#uses=1]
+	%tmp32 = getelementptr [0 x i32]* @S, i32 0, i32 %tmp21		; <i32*> [#uses=1]
+	%tmp33 = load i32* %tmp32, align 4		; <i32> [#uses=1]
+	%tmp34 = xor i32 %tmp33, %tmp30		; <i32> [#uses=2]
+	store i32 %tmp34, i32* %tmp29, align 4
+	%tmp3982 = or i32 %j.065.0, 3		; <i32> [#uses=1]
+	%tmp42 = getelementptr [0 x i32]* @state, i32 0, i32 %tmp3982		; <i32*> [#uses=2]
+	%tmp43 = load i32* %tmp42, align 4		; <i32> [#uses=1]
+	%tmp45 = getelementptr [0 x i32]* @S, i32 0, i32 %tmp34		; <i32*> [#uses=1]
+	%tmp46 = load i32* %tmp45, align 4		; <i32> [#uses=1]
+	%tmp47 = xor i32 %tmp46, %tmp43		; <i32> [#uses=3]
+	store i32 %tmp47, i32* %tmp42, align 4
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 4		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb57, label %bb
+
+bb57:		; preds = %bb
+	%tmp59 = and i32 %tmp47, 255		; <i32> [#uses=1]
+	ret i32 %tmp59
+}
diff --git a/final/test/CodeGen/X86/loop-strength-reduce5.ll b/final/test/CodeGen/X86/loop-strength-reduce5.ll
new file mode 100644
index 00000000000..b07eeb6759a
--- /dev/null
+++ b/final/test/CodeGen/X86/loop-strength-reduce5.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 | grep inc | count 1
+
+@X = weak global i16 0		; <i16*> [#uses=1]
+@Y = weak global i16 0		; <i16*> [#uses=1]
+
+define void @foo(i32 %N) nounwind {
+entry:
+	%tmp1019 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %tmp1019, label %bb, label %return
+
+bb:		; preds = %bb, %entry
+	%i.014.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%tmp1 = trunc i32 %i.014.0 to i16		; <i16> [#uses=2]
+	volatile store i16 %tmp1, i16* @X, align 2
+	%tmp34 = shl i16 %tmp1, 2		; <i16> [#uses=1]
+	volatile store i16 %tmp34, i16* @Y, align 2
+	%indvar.next = add i32 %i.014.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/loop-strength-reduce6.ll b/final/test/CodeGen/X86/loop-strength-reduce6.ll
new file mode 100644
index 00000000000..919f836841f
--- /dev/null
+++ b/final/test/CodeGen/X86/loop-strength-reduce6.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86-64 | not grep inc
+
+define fastcc i32 @decodeMP3(i32 %isize, i32* %done) nounwind {
+entry:
+	br label %cond_true189
+
+cond_true189:		; preds = %entry
+	ret i32 0
+
+cond_next191:		; preds = %entry
+	br label %cond_false.i9
+
+cond_false.i9:		; preds = %cond_next191
+	ret i32 0
+
+cond_next37.i:		; preds = %cond_next191
+	br label %cond_true44.i
+
+cond_true44.i:		; preds = %cond_next37.i
+	br label %bb414.preheader.i
+
+cond_true11.i.i:		; preds = %cond_true44.i
+	ret i32 0
+
+cond_false50.i:		; preds = %cond_next37.i
+	ret i32 0
+
+bb414.preheader.i:		; preds = %cond_true44.i
+	br label %do_layer3.exit
+
+bb.i18:		; preds = %bb414.preheader.i
+	br label %cond_true79.i
+
+cond_true79.i:		; preds = %bb.i18
+	ret i32 0
+
+bb331.i:		; preds = %bb358.i, %cond_true.i149.i
+	br label %cond_false.i151.i
+
+cond_true.i149.i:		; preds = %bb331.i
+	br label %bb331.i
+
+cond_false.i151.i:		; preds = %bb331.i
+	ret i32 0
+
+bb163.i.i:		; preds = %bb178.preheader.i.i, %bb163.i.i
+	%rawout2.451.rec.i.i = phi i64 [ 0, %bb178.preheader.i.i ], [ %indvar.next260.i, %bb163.i.i ]		; <i64> [#uses=2]
+	%i.052.i.i = trunc i64 %rawout2.451.rec.i.i to i32		; <i32> [#uses=1]
+	%tmp165.i144.i = shl i32 %i.052.i.i, 5		; <i32> [#uses=1]
+	%tmp165169.i.i = sext i32 %tmp165.i144.i to i64		; <i64> [#uses=0]
+	%indvar.next260.i = add i64 %rawout2.451.rec.i.i, 1		; <i64> [#uses=2]
+	%exitcond261.i = icmp eq i64 %indvar.next260.i, 18		; <i1> [#uses=1]
+	br i1 %exitcond261.i, label %bb178.preheader.i.i, label %bb163.i.i
+
+bb178.preheader.i.i:		; preds = %bb163.i.i, %cond_true.i149.i
+	br label %bb163.i.i
+
+bb358.i:		; preds = %bb.i18
+	br label %bb406.i
+
+bb406.i:		; preds = %bb358.i
+	ret i32 0
+
+do_layer3.exit:		; preds = %bb414.preheader.i
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/loop-strength-reduce7.ll b/final/test/CodeGen/X86/loop-strength-reduce7.ll
new file mode 100644
index 00000000000..4b565a67fb2
--- /dev/null
+++ b/final/test/CodeGen/X86/loop-strength-reduce7.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=x86 | not grep imul
+
+target triple = "i386-apple-darwin9.6"
+	%struct.III_psy_xmin = type { [22 x double], [13 x [3 x double]] }
+	%struct.III_scalefac_t = type { [22 x i32], [13 x [3 x i32]] }
+	%struct.gr_info = type { i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, [4 x i32] }
+	%struct.lame_global_flags = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define fastcc void @outer_loop(%struct.lame_global_flags* nocapture %gfp, double* nocapture %xr, i32 %targ_bits, double* nocapture %best_noise, %struct.III_psy_xmin* nocapture %l3_xmin, i32* nocapture %l3_enc, %struct.III_scalefac_t* nocapture %scalefac, %struct.gr_info* nocapture %cod_info, i32 %ch) nounwind {
+entry:
+	br label %bb4
+
+bb4:		; preds = %bb4, %entry
+	br i1 true, label %bb5, label %bb4
+
+bb5:		; preds = %bb4
+	br i1 true, label %bb28.i37, label %bb.i4
+
+bb.i4:		; preds = %bb.i4, %bb5
+	br label %bb.i4
+
+bb28.i37:		; preds = %bb33.i47, %bb5
+	%i.1.reg2mem.0.i = phi i32 [ %0, %bb33.i47 ], [ 0, %bb5 ]		; <i32> [#uses=2]
+	%0 = add i32 %i.1.reg2mem.0.i, 1		; <i32> [#uses=2]
+	br label %bb29.i38
+
+bb29.i38:		; preds = %bb33.i47, %bb28.i37
+	%indvar32.i = phi i32 [ %indvar.next33.i, %bb33.i47 ], [ 0, %bb28.i37 ]		; <i32> [#uses=2]
+	%sfb.314.i = add i32 %indvar32.i, 0		; <i32> [#uses=3]
+	%1 = getelementptr [4 x [21 x double]]* null, i32 0, i32 %0, i32 %sfb.314.i		; <double*> [#uses=1]
+	%2 = load double* %1, align 8		; <double> [#uses=0]
+	br i1 false, label %bb30.i41, label %bb33.i47
+
+bb30.i41:		; preds = %bb29.i38
+	%3 = getelementptr %struct.III_scalefac_t* null, i32 0, i32 1, i32 %sfb.314.i, i32 %i.1.reg2mem.0.i		; <i32*> [#uses=1]
+	store i32 0, i32* %3, align 4
+	br label %bb33.i47
+
+bb33.i47:		; preds = %bb30.i41, %bb29.i38
+	%4 = add i32 %sfb.314.i, 1		; <i32> [#uses=1]
+	%phitmp.i46 = icmp ugt i32 %4, 11		; <i1> [#uses=1]
+	%indvar.next33.i = add i32 %indvar32.i, 1		; <i32> [#uses=1]
+	br i1 %phitmp.i46, label %bb28.i37, label %bb29.i38
+}
diff --git a/final/test/CodeGen/X86/loop-strength-reduce8.ll b/final/test/CodeGen/X86/loop-strength-reduce8.ll
new file mode 100644
index 00000000000..1d042769b0b
--- /dev/null
+++ b/final/test/CodeGen/X86/loop-strength-reduce8.ll
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+
+; CHECK: leal 16(%eax), %edx
+; CHECK: align
+; CHECK: addl    $4, %edx
+; CHECK: decl    %ecx
+; CHECK: jne     LBB0_2
+
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
+	%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
+	%struct.bitmap_head_def = type { %struct.bitmap_element*, %struct.bitmap_element*, i32 }
+	%struct.branch_path = type { %struct.rtx_def*, i32 }
+	%struct.c_lang_decl = type <{ i8, [3 x i8] }>
+	%struct.constant_descriptor = type { %struct.constant_descriptor*, i8*, %struct.rtx_def*, { x86_fp80 } }
+	%struct.eh_region = type { %struct.eh_region*, %struct.eh_region*, %struct.eh_region*, i32, %struct.bitmap_head_def*, i32, { { %struct.eh_region*, %struct.eh_region*, %struct.eh_region*, %struct.rtx_def* } }, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.eh_status = type { %struct.eh_region*, %struct.eh_region**, %struct.eh_region*, %struct.eh_region*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, i32, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.branch_path*, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.sequence_stack*, i32, i32, i8*, i32, i8*, %struct.tree_node**, %struct.rtx_def** }
+	%struct.equiv_table = type { %struct.rtx_def*, %struct.rtx_def* }
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.stmt_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, i8*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, i8*, %struct.initial_value_struct*, i32, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.rtx_def**, %struct.temp_slot*, i32, i32, i32, %struct.var_refs_queue*, i32, i32, i8*, %struct.tree_node*, %struct.rtx_def*, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.rtx_def*, i8, i8, i8 }
+	%struct.goto_fixup = type { %struct.goto_fixup*, %struct.rtx_def*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.rtx_def*, %struct.tree_node* }
+	%struct.initial_value_struct = type { i32, i32, %struct.equiv_table* }
+	%struct.label_chain = type { %struct.label_chain*, %struct.tree_node* }
+	%struct.lang_decl = type { %struct.c_lang_decl, %struct.tree_node* }
+	%struct.language_function = type { %struct.stmt_tree_s, %struct.tree_node* }
+	%struct.machine_function = type { [59 x [3 x %struct.rtx_def*]], i32, i32 }
+	%struct.nesting = type { %struct.nesting*, %struct.nesting*, i32, %struct.rtx_def*, { { i32, %struct.rtx_def*, %struct.rtx_def*, %struct.nesting*, %struct.tree_node*, %struct.tree_node*, %struct.label_chain*, i32, i32, i32, i32, %struct.rtx_def*, %struct.tree_node** } } }
+	%struct.pool_constant = type { %struct.constant_descriptor*, %struct.pool_constant*, %struct.pool_constant*, %struct.rtx_def*, i32, i32, i32, i64, i32 }
+	%struct.rtunion = type { i64 }
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.sequence_stack* }
+	%struct.stmt_status = type { %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, i32, i32, %struct.tree_node*, %struct.rtx_def*, i32, i8*, i32, %struct.goto_fixup* }
+	%struct.stmt_tree_s = type { %struct.tree_node*, %struct.tree_node*, i8*, i32 }
+	%struct.temp_slot = type { %struct.temp_slot*, %struct.rtx_def*, %struct.rtx_def*, i32, i64, %struct.tree_node*, %struct.tree_node*, i8, i8, i32, i32, i64, i64 }
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, i8*, i32, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, %struct.rtunion, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, { %struct.function* }, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_exp = type { %struct.tree_common, i32, [1 x %struct.tree_node*] }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type { %struct.constant_descriptor**, %struct.pool_constant**, %struct.pool_constant*, %struct.pool_constant*, i64, %struct.rtx_def* }
+	%struct.varray_data = type { [1 x i64] }
+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.varray_data }
+@lineno = internal global i32 0		; <i32*> [#uses=1]
+@tree_code_length = internal global [256 x i32] zeroinitializer
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (%struct.tree_node* (i32, ...)* @build_stmt to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define %struct.tree_node* @build_stmt(i32 %code, ...) nounwind {
+entry:
+	%p = alloca i8*		; <i8**> [#uses=3]
+	%p1 = bitcast i8** %p to i8*		; <i8*> [#uses=2]
+	call void @llvm.va_start(i8* %p1)
+	%0 = call fastcc %struct.tree_node* @make_node(i32 %code) nounwind		; <%struct.tree_node*> [#uses=2]
+	%1 = getelementptr [256 x i32]* @tree_code_length, i32 0, i32 %code		; <i32*> [#uses=1]
+	%2 = load i32* %1, align 4		; <i32> [#uses=2]
+	%3 = load i32* @lineno, align 4		; <i32> [#uses=1]
+	%4 = bitcast %struct.tree_node* %0 to %struct.tree_exp*		; <%struct.tree_exp*> [#uses=2]
+	%5 = getelementptr %struct.tree_exp* %4, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 %3, i32* %5, align 4
+	%6 = icmp sgt i32 %2, 0		; <i1> [#uses=1]
+	br i1 %6, label %bb, label %bb3
+
+bb:		; preds = %bb, %entry
+	%i.01 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]		; <i32> [#uses=2]
+	%7 = load i8** %p, align 4		; <i8*> [#uses=2]
+	%8 = getelementptr i8* %7, i32 4		; <i8*> [#uses=1]
+	store i8* %8, i8** %p, align 4
+	%9 = bitcast i8* %7 to %struct.tree_node**		; <%struct.tree_node**> [#uses=1]
+	%10 = load %struct.tree_node** %9, align 4		; <%struct.tree_node*> [#uses=1]
+	%11 = getelementptr %struct.tree_exp* %4, i32 0, i32 2, i32 %i.01		; <%struct.tree_node**> [#uses=1]
+	store %struct.tree_node* %10, %struct.tree_node** %11, align 4
+	%indvar.next = add i32 %i.01, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %2		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb3, label %bb
+
+bb3:		; preds = %bb, %entry
+	call void @llvm.va_end(i8* %p1)
+	ret %struct.tree_node* %0
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
+
+declare fastcc %struct.tree_node* @make_node(i32) nounwind
diff --git a/final/test/CodeGen/X86/lsr-delayed-fold.ll b/final/test/CodeGen/X86/lsr-delayed-fold.ll
new file mode 100644
index 00000000000..8ed97e447fe
--- /dev/null
+++ b/final/test/CodeGen/X86/lsr-delayed-fold.ll
@@ -0,0 +1,178 @@
+; RUN: llc -march=x86-64 < %s > /dev/null
+
+; ScalarEvolution misses an opportunity to fold ((trunc x) + (trunc -x) + y),
+; but LSR should tolerate this.
+; rdar://7886751
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0"
+
+define fastcc void @formatValue(i64 %arg5) nounwind {
+bb12:                                             ; preds = %bb11
+  %t = trunc i64 %arg5 to i32                   ; <i32> [#uses=1]
+  %t13 = sub i64 0, %arg5                       ; <i64> [#uses=1]
+  %t14 = and i64 %t13, 4294967295             ; <i64> [#uses=1]
+  br label %bb15
+
+bb15:                                             ; preds = %bb21, %bb12
+  %t16 = phi i64 [ 0, %bb12 ], [ %t23, %bb15 ] ; <i64> [#uses=2]
+  %t17 = mul i64 %t14, %t16                 ; <i64> [#uses=1]
+  %t18 = add i64 undef, %t17                  ; <i64> [#uses=1]
+  %t19 = trunc i64 %t18 to i32                ; <i32> [#uses=1]
+  %t22 = icmp eq i32 %t19, %t               ; <i1> [#uses=1]
+  %t23 = add i64 %t16, 1                      ; <i64> [#uses=1]
+  br i1 %t22, label %bb24, label %bb15
+
+bb24:                                             ; preds = %bb21, %bb11
+  unreachable
+}
+
+; ScalarEvolution should be able to correctly expand the crazy addrec here.
+; PR6914
+
+define void @int323() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %lbl_264, %for.inc, %entry
+  %g_263.tmp.1 = phi i8 [ undef, %entry ], [ %g_263.tmp.1, %for.cond ]
+  %p_95.addr.0 = phi i8 [ 0, %entry ], [ %add, %for.cond ]
+  %add = add i8 %p_95.addr.0, 1                   ; <i8> [#uses=1]
+  br i1 undef, label %for.cond, label %lbl_264
+
+lbl_264:                                          ; preds = %if.end, %lbl_264.preheader
+  %g_263.tmp.0 = phi i8 [ %g_263.tmp.1, %for.cond ] ; <i8> [#uses=1]
+  %tmp7 = load i16* undef                         ; <i16> [#uses=1]
+  %conv8 = trunc i16 %tmp7 to i8                  ; <i8> [#uses=1]
+  %mul.i = mul i8 %p_95.addr.0, %p_95.addr.0      ; <i8> [#uses=1]
+  %mul.i18 = mul i8 %mul.i, %conv8                ; <i8> [#uses=1]
+  %tobool12 = icmp eq i8 %mul.i18, 0              ; <i1> [#uses=1]
+  unreachable
+}
+
+; LSR ends up going into conservative pruning mode; don't prune the solution
+; so far that it becomes unsolvable though.
+; PR7077
+
+%struct.Bu = type { i32, i32, i32 }
+
+define void @_Z3fooP2Bui(%struct.Bu* nocapture %bu) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc131, %entry
+  %indvar = phi i64 [ %indvar.next, %for.inc131 ], [ 0, %entry ] ; <i64> [#uses=3]
+  br i1 undef, label %for.inc131, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %for.body
+  %tmp15 = add i64 %indvar, 1                     ; <i64> [#uses=1]
+  %tmp17 = add i64 %indvar, 2                      ; <i64> [#uses=1]
+  %tmp19 = add i64 %indvar, 3                      ; <i64> [#uses=1]
+  %tmp21 = add i64 %indvar, 4                      ; <i64> [#uses=1]
+  %tmp23 = add i64 %indvar, 5                      ; <i64> [#uses=1]
+  %tmp25 = add i64 %indvar, 6                      ; <i64> [#uses=1]
+  %tmp27 = add i64 %indvar, 7                      ; <i64> [#uses=1]
+  %tmp29 = add i64 %indvar, 8                      ; <i64> [#uses=1]
+  %tmp31 = add i64 %indvar, 9                      ; <i64> [#uses=1]
+  %tmp35 = add i64 %indvar, 11                     ; <i64> [#uses=1]
+  %tmp37 = add i64 %indvar, 12                     ; <i64> [#uses=1]
+  %tmp39 = add i64 %indvar, 13                     ; <i64> [#uses=1]
+  %tmp41 = add i64 %indvar, 14                     ; <i64> [#uses=1]
+  %tmp43 = add i64 %indvar, 15                     ; <i64> [#uses=1]
+  %tmp45 = add i64 %indvar, 16                     ; <i64> [#uses=1]
+  %tmp47 = add i64 %indvar, 17                     ; <i64> [#uses=1]
+  %mul = trunc i64 %indvar to i32                  ; <i32> [#uses=1]
+  %add22 = trunc i64 %tmp15 to i32                ; <i32> [#uses=1]
+  %add28 = trunc i64 %tmp17 to i32                ; <i32> [#uses=1]
+  %add34 = trunc i64 %tmp19 to i32                ; <i32> [#uses=1]
+  %add40 = trunc i64 %tmp21 to i32                ; <i32> [#uses=1]
+  %add46 = trunc i64 %tmp23 to i32                ; <i32> [#uses=1]
+  %add52 = trunc i64 %tmp25 to i32                ; <i32> [#uses=1]
+  %add58 = trunc i64 %tmp27 to i32                ; <i32> [#uses=1]
+  %add64 = trunc i64 %tmp29 to i32                ; <i32> [#uses=1]
+  %add70 = trunc i64 %tmp31 to i32                ; <i32> [#uses=1]
+  %add82 = trunc i64 %tmp35 to i32                ; <i32> [#uses=1]
+  %add88 = trunc i64 %tmp37 to i32                ; <i32> [#uses=1]
+  %add94 = trunc i64 %tmp39 to i32                ; <i32> [#uses=1]
+  %add100 = trunc i64 %tmp41 to i32               ; <i32> [#uses=1]
+  %add106 = trunc i64 %tmp43 to i32               ; <i32> [#uses=1]
+  %add112 = trunc i64 %tmp45 to i32               ; <i32> [#uses=1]
+  %add118 = trunc i64 %tmp47 to i32               ; <i32> [#uses=1]
+  %tmp10 = getelementptr %struct.Bu* %bu, i64 %indvar, i32 2 ; <i32*> [#uses=1]
+  %tmp11 = load i32* %tmp10                       ; <i32> [#uses=0]
+  tail call void undef(i32 %add22)
+  tail call void undef(i32 %add28)
+  tail call void undef(i32 %add34)
+  tail call void undef(i32 %add40)
+  tail call void undef(i32 %add46)
+  tail call void undef(i32 %add52)
+  tail call void undef(i32 %add58)
+  tail call void undef(i32 %add64)
+  tail call void undef(i32 %add70)
+  tail call void undef(i32 %add82)
+  tail call void undef(i32 %add88)
+  tail call void undef(i32 %add94)
+  tail call void undef(i32 %add100)
+  tail call void undef(i32 %add106)
+  tail call void undef(i32 %add112)
+  tail call void undef(i32 %add118)
+  br label %for.body123
+
+for.body123:                                      ; preds = %for.body123, %lor.lhs.false
+  %j.03 = phi i32 [ 0, %lor.lhs.false ], [ %inc, %for.body123 ] ; <i32> [#uses=2]
+  %add129 = add i32 %mul, %j.03                   ; <i32> [#uses=1]
+  tail call void undef(i32 %add129)
+  %inc = add nsw i32 %j.03, 1                     ; <i32> [#uses=1]
+  br i1 undef, label %for.inc131, label %for.body123
+
+for.inc131:                                       ; preds = %for.body123, %for.body
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=1]
+  br i1 undef, label %for.end134, label %for.body
+
+for.end134:                                       ; preds = %for.inc131
+  ret void
+}
+
+; LSR needs to remember inserted instructions even in postinc mode, because
+; there could be multiple subexpressions within a single expansion which
+; require insert point adjustment.
+; PR7306
+
+define fastcc i32 @GetOptimum() nounwind {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %t = phi i32 [ 0, %bb ], [ %t2, %bb1 ]      ; <i32> [#uses=1]
+  %t2 = add i32 %t, undef                     ; <i32> [#uses=3]
+  br i1 undef, label %bb1, label %bb3
+
+bb3:                                              ; preds = %bb1
+  %t4 = add i32 undef, -1                       ; <i32> [#uses=1]
+  br label %bb5
+
+bb5:                                              ; preds = %bb16, %bb3
+  %t6 = phi i32 [ %t17, %bb16 ], [ 0, %bb3 ]  ; <i32> [#uses=3]
+  %t7 = add i32 undef, %t6                    ; <i32> [#uses=2]
+  %t8 = add i32 %t4, %t6                    ; <i32> [#uses=1]
+  br i1 undef, label %bb9, label %bb10
+
+bb9:                                              ; preds = %bb5
+  br label %bb10
+
+bb10:                                             ; preds = %bb9, %bb5
+  br i1 undef, label %bb11, label %bb16
+
+bb11:                                             ; preds = %bb10
+  %t12 = icmp ugt i32 %t7, %t2              ; <i1> [#uses=1]
+  %t13 = select i1 %t12, i32 %t2, i32 %t7 ; <i32> [#uses=1]
+  br label %bb14
+
+bb14:                                             ; preds = %bb11
+  store i32 %t13, i32* null
+  ret i32 %t8
+
+bb16:                                             ; preds = %bb10
+  %t17 = add i32 %t6, 1                       ; <i32> [#uses=1]
+  br label %bb5
+}
diff --git a/final/test/CodeGen/X86/lsr-i386.ll b/final/test/CodeGen/X86/lsr-i386.ll
new file mode 100644
index 00000000000..02baf2072e3
--- /dev/null
+++ b/final/test/CodeGen/X86/lsr-i386.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+; PR7651
+
+; CHECK: align
+; CHECK: align
+; CHECK: align
+; CHECK: movl  $0, (%e
+; CHECK-NEXT: addl  $4, %e
+; CHECK-NEXT: decl  %e
+; CHECK-NEXT: jne
+
+%struct.anon = type { [72 x i32], i32 }
+
+@mp2grad_ = external global %struct.anon
+
+define void @chomp2g_setup_(i32 %n, i32 %m) nounwind {
+entry:
+  br label %bb1
+
+bb1:                                              ; preds = %bb6, %bb
+  %indvar11 = phi i32 [ %indvar.next12, %bb6 ], [ 0, %entry ] ; <i32> [#uses=2]
+  %tmp21 = add i32 %indvar11, 1                   ; <i32> [#uses=1]
+  %t = load i32* getelementptr inbounds (%struct.anon* @mp2grad_, i32 0, i32 1)
+  %tmp15 = mul i32 %n, %t                      ; <i32> [#uses=1]
+  %tmp16 = add i32 %tmp21, %tmp15                 ; <i32> [#uses=1]
+  %tmp17 = shl i32 %tmp16, 3                      ; <i32> [#uses=1]
+  %tmp18 = add i32 %tmp17, -8                     ; <i32> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb2, %bb2.preheader
+  %indvar = phi i32 [ 0, %bb1 ], [ %indvar.next, %bb2 ] ; <i32> [#uses=2]
+  %tmp19 = add i32 %tmp18, %indvar                ; <i32> [#uses=1]
+  %scevgep = getelementptr %struct.anon* @mp2grad_, i32 0, i32 0, i32 %tmp19 ; <i32*> [#uses=1]
+  store i32 0, i32* %scevgep
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
+  %c = icmp ne i32 %indvar.next, %m
+  br i1 %c, label %bb2, label %bb6
+
+bb6:                                              ; preds = %bb2, %bb1
+  %indvar.next12 = add i32 %indvar11, 1           ; <i32> [#uses=1]
+  br label %bb1
+}
diff --git a/final/test/CodeGen/X86/lsr-interesting-step.ll b/final/test/CodeGen/X86/lsr-interesting-step.ll
new file mode 100644
index 00000000000..4b7050bd507
--- /dev/null
+++ b/final/test/CodeGen/X86/lsr-interesting-step.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86-64 -relocation-model=static -mtriple=x86_64-unknown-linux-gnu
+
+; The inner loop should require only one add (and no leas either).
+; rdar://8100380
+
+; CHECK:      BB0_4:
+; CHECK-NEXT:   movb    $0, flags(%rdx)
+; CHECK-NEXT:   addq    %rcx, %rdx
+; CHECK-NEXT:   cmpq    $8192, %rdx
+; CHECK-NEXT:   jl
+
+@flags = external global [8192 x i8], align 16 ; <[8192 x i8]*> [#uses=1]
+
+define void @foo() nounwind {
+entry:
+  %tmp = icmp slt i64 2, 8192                     ; <i1> [#uses=1]
+  br i1 %tmp, label %bb, label %bb21
+
+bb:                                               ; preds = %entry
+  br label %bb7
+
+bb7:                                              ; preds = %bb, %bb17
+  %tmp8 = phi i64 [ %tmp18, %bb17 ], [ 2, %bb ]   ; <i64> [#uses=2]
+  %tmp9 = icmp slt i64 2, 8192                    ; <i1> [#uses=1]
+  br i1 %tmp9, label %bb10, label %bb17
+
+bb10:                                             ; preds = %bb7
+  br label %bb11
+
+bb11:                                             ; preds = %bb10, %bb11
+  %tmp12 = phi i64 [ %tmp14, %bb11 ], [ 2, %bb10 ] ; <i64> [#uses=2]
+  %tmp13 = getelementptr inbounds [8192 x i8]* @flags, i64 0, i64 %tmp12 ; <i8*> [#uses=1]
+  store i8 0, i8* %tmp13, align 1
+  %tmp14 = add nsw i64 %tmp12, %tmp8              ; <i64> [#uses=2]
+  %tmp15 = icmp slt i64 %tmp14, 8192              ; <i1> [#uses=1]
+  br i1 %tmp15, label %bb11, label %bb16
+
+bb16:                                             ; preds = %bb11
+  br label %bb17
+
+bb17:                                             ; preds = %bb16, %bb7
+  %tmp18 = add nsw i64 %tmp8, 1                   ; <i64> [#uses=2]
+  %tmp19 = icmp slt i64 %tmp18, 8192              ; <i1> [#uses=1]
+  br i1 %tmp19, label %bb7, label %bb20
+
+bb20:                                             ; preds = %bb17
+  br label %bb21
+
+bb21:                                             ; preds = %bb20, %entry
+  ret void
+}
diff --git a/final/test/CodeGen/X86/lsr-loop-exit-cond.ll b/final/test/CodeGen/X86/lsr-loop-exit-cond.ll
new file mode 100644
index 00000000000..d33cc3a0966
--- /dev/null
+++ b/final/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -0,0 +1,138 @@
+; XFAIL: *
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; CHECK: decq
+; CHECK-NEXT: jne
+
+@Te0 = external global [256 x i32]		; <[256 x i32]*> [#uses=5]
+@Te1 = external global [256 x i32]		; <[256 x i32]*> [#uses=4]
+@Te3 = external global [256 x i32]		; <[256 x i32]*> [#uses=2]
+
+define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r) nounwind {
+entry:
+	%0 = load i32* %rk, align 4		; <i32> [#uses=1]
+	%1 = getelementptr i32* %rk, i64 1		; <i32*> [#uses=1]
+	%2 = load i32* %1, align 4		; <i32> [#uses=1]
+	%tmp15 = add i32 %r, -1		; <i32> [#uses=1]
+	%tmp.16 = zext i32 %tmp15 to i64		; <i64> [#uses=2]
+	br label %bb
+
+bb:		; preds = %bb1, %entry
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb1 ]		; <i64> [#uses=3]
+	%s1.0 = phi i32 [ %2, %entry ], [ %56, %bb1 ]		; <i32> [#uses=2]
+	%s0.0 = phi i32 [ %0, %entry ], [ %43, %bb1 ]		; <i32> [#uses=2]
+	%tmp18 = shl i64 %indvar, 4		; <i64> [#uses=4]
+	%rk26 = bitcast i32* %rk to i8*		; <i8*> [#uses=6]
+	%3 = lshr i32 %s0.0, 24		; <i32> [#uses=1]
+	%4 = zext i32 %3 to i64		; <i64> [#uses=1]
+	%5 = getelementptr [256 x i32]* @Te0, i64 0, i64 %4		; <i32*> [#uses=1]
+	%6 = load i32* %5, align 4		; <i32> [#uses=1]
+	%7 = lshr i32 %s1.0, 16		; <i32> [#uses=1]
+	%8 = and i32 %7, 255		; <i32> [#uses=1]
+	%9 = zext i32 %8 to i64		; <i64> [#uses=1]
+	%10 = getelementptr [256 x i32]* @Te1, i64 0, i64 %9		; <i32*> [#uses=1]
+	%11 = load i32* %10, align 4		; <i32> [#uses=1]
+	%ctg2.sum2728 = or i64 %tmp18, 8		; <i64> [#uses=1]
+	%12 = getelementptr i8* %rk26, i64 %ctg2.sum2728		; <i8*> [#uses=1]
+	%13 = bitcast i8* %12 to i32*		; <i32*> [#uses=1]
+	%14 = load i32* %13, align 4		; <i32> [#uses=1]
+	%15 = xor i32 %11, %6		; <i32> [#uses=1]
+	%16 = xor i32 %15, %14		; <i32> [#uses=3]
+	%17 = lshr i32 %s1.0, 24		; <i32> [#uses=1]
+	%18 = zext i32 %17 to i64		; <i64> [#uses=1]
+	%19 = getelementptr [256 x i32]* @Te0, i64 0, i64 %18		; <i32*> [#uses=1]
+	%20 = load i32* %19, align 4		; <i32> [#uses=1]
+	%21 = and i32 %s0.0, 255		; <i32> [#uses=1]
+	%22 = zext i32 %21 to i64		; <i64> [#uses=1]
+	%23 = getelementptr [256 x i32]* @Te3, i64 0, i64 %22		; <i32*> [#uses=1]
+	%24 = load i32* %23, align 4		; <i32> [#uses=1]
+	%ctg2.sum2930 = or i64 %tmp18, 12		; <i64> [#uses=1]
+	%25 = getelementptr i8* %rk26, i64 %ctg2.sum2930		; <i8*> [#uses=1]
+	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
+	%27 = load i32* %26, align 4		; <i32> [#uses=1]
+	%28 = xor i32 %24, %20		; <i32> [#uses=1]
+	%29 = xor i32 %28, %27		; <i32> [#uses=4]
+	%30 = lshr i32 %16, 24		; <i32> [#uses=1]
+	%31 = zext i32 %30 to i64		; <i64> [#uses=1]
+	%32 = getelementptr [256 x i32]* @Te0, i64 0, i64 %31		; <i32*> [#uses=1]
+	%33 = load i32* %32, align 4		; <i32> [#uses=2]
+	%exitcond = icmp eq i64 %indvar, %tmp.16		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb1:		; preds = %bb
+	%ctg2.sum31 = add i64 %tmp18, 16		; <i64> [#uses=1]
+	%34 = getelementptr i8* %rk26, i64 %ctg2.sum31		; <i8*> [#uses=1]
+	%35 = bitcast i8* %34 to i32*		; <i32*> [#uses=1]
+	%36 = lshr i32 %29, 16		; <i32> [#uses=1]
+	%37 = and i32 %36, 255		; <i32> [#uses=1]
+	%38 = zext i32 %37 to i64		; <i64> [#uses=1]
+	%39 = getelementptr [256 x i32]* @Te1, i64 0, i64 %38		; <i32*> [#uses=1]
+	%40 = load i32* %39, align 4		; <i32> [#uses=1]
+	%41 = load i32* %35, align 4		; <i32> [#uses=1]
+	%42 = xor i32 %40, %33		; <i32> [#uses=1]
+	%43 = xor i32 %42, %41		; <i32> [#uses=1]
+	%44 = lshr i32 %29, 24		; <i32> [#uses=1]
+	%45 = zext i32 %44 to i64		; <i64> [#uses=1]
+	%46 = getelementptr [256 x i32]* @Te0, i64 0, i64 %45		; <i32*> [#uses=1]
+	%47 = load i32* %46, align 4		; <i32> [#uses=1]
+	%48 = and i32 %16, 255		; <i32> [#uses=1]
+	%49 = zext i32 %48 to i64		; <i64> [#uses=1]
+	%50 = getelementptr [256 x i32]* @Te3, i64 0, i64 %49		; <i32*> [#uses=1]
+	%51 = load i32* %50, align 4		; <i32> [#uses=1]
+	%ctg2.sum32 = add i64 %tmp18, 20		; <i64> [#uses=1]
+	%52 = getelementptr i8* %rk26, i64 %ctg2.sum32		; <i8*> [#uses=1]
+	%53 = bitcast i8* %52 to i32*		; <i32*> [#uses=1]
+	%54 = load i32* %53, align 4		; <i32> [#uses=1]
+	%55 = xor i32 %51, %47		; <i32> [#uses=1]
+	%56 = xor i32 %55, %54		; <i32> [#uses=1]
+	%indvar.next = add i64 %indvar, 1		; <i64> [#uses=1]
+	br label %bb
+
+bb2:		; preds = %bb
+	%tmp10 = shl i64 %tmp.16, 4		; <i64> [#uses=2]
+	%ctg2.sum = add i64 %tmp10, 16		; <i64> [#uses=1]
+	%tmp1213 = getelementptr i8* %rk26, i64 %ctg2.sum		; <i8*> [#uses=1]
+	%57 = bitcast i8* %tmp1213 to i32*		; <i32*> [#uses=1]
+	%58 = and i32 %33, -16777216		; <i32> [#uses=1]
+	%59 = lshr i32 %29, 16		; <i32> [#uses=1]
+	%60 = and i32 %59, 255		; <i32> [#uses=1]
+	%61 = zext i32 %60 to i64		; <i64> [#uses=1]
+	%62 = getelementptr [256 x i32]* @Te1, i64 0, i64 %61		; <i32*> [#uses=1]
+	%63 = load i32* %62, align 4		; <i32> [#uses=1]
+	%64 = and i32 %63, 16711680		; <i32> [#uses=1]
+	%65 = or i32 %64, %58		; <i32> [#uses=1]
+	%66 = load i32* %57, align 4		; <i32> [#uses=1]
+	%67 = xor i32 %65, %66		; <i32> [#uses=2]
+	%68 = lshr i32 %29, 8		; <i32> [#uses=1]
+	%69 = zext i32 %68 to i64		; <i64> [#uses=1]
+	%70 = getelementptr [256 x i32]* @Te0, i64 0, i64 %69		; <i32*> [#uses=1]
+	%71 = load i32* %70, align 4		; <i32> [#uses=1]
+	%72 = and i32 %71, -16777216		; <i32> [#uses=1]
+	%73 = and i32 %16, 255		; <i32> [#uses=1]
+	%74 = zext i32 %73 to i64		; <i64> [#uses=1]
+	%75 = getelementptr [256 x i32]* @Te1, i64 0, i64 %74		; <i32*> [#uses=1]
+	%76 = load i32* %75, align 4		; <i32> [#uses=1]
+	%77 = and i32 %76, 16711680		; <i32> [#uses=1]
+	%78 = or i32 %77, %72		; <i32> [#uses=1]
+	%ctg2.sum25 = add i64 %tmp10, 20		; <i64> [#uses=1]
+	%79 = getelementptr i8* %rk26, i64 %ctg2.sum25		; <i8*> [#uses=1]
+	%80 = bitcast i8* %79 to i32*		; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4		; <i32> [#uses=1]
+	%82 = xor i32 %78, %81		; <i32> [#uses=2]
+	%83 = lshr i32 %67, 24		; <i32> [#uses=1]
+	%84 = trunc i32 %83 to i8		; <i8> [#uses=1]
+	store i8 %84, i8* %out, align 1
+	%85 = lshr i32 %67, 16		; <i32> [#uses=1]
+	%86 = trunc i32 %85 to i8		; <i8> [#uses=1]
+	%87 = getelementptr i8* %out, i64 1		; <i8*> [#uses=1]
+	store i8 %86, i8* %87, align 1
+	%88 = getelementptr i8* %out, i64 4		; <i8*> [#uses=1]
+	%89 = lshr i32 %82, 24		; <i32> [#uses=1]
+	%90 = trunc i32 %89 to i8		; <i8> [#uses=1]
+	store i8 %90, i8* %88, align 1
+	%91 = lshr i32 %82, 16		; <i32> [#uses=1]
+	%92 = trunc i32 %91 to i8		; <i8> [#uses=1]
+	%93 = getelementptr i8* %out, i64 5		; <i8*> [#uses=1]
+	store i8 %92, i8* %93, align 1
+	ret void
+}
diff --git a/final/test/CodeGen/X86/lsr-negative-stride.ll b/final/test/CodeGen/X86/lsr-negative-stride.ll
new file mode 100644
index 00000000000..b08356c8d30
--- /dev/null
+++ b/final/test/CodeGen/X86/lsr-negative-stride.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: not grep neg %t
+; RUN: not grep sub.*esp %t
+; RUN: not grep esi %t
+; RUN: not grep push %t
+
+; This corresponds to:
+;int t(int a, int b) {
+;  while (a != b) {
+;    if (a > b)
+;      a -= b;
+;    else
+;      b -= a;
+;  }
+;  return a;
+;}
+
+
+define i32 @t(i32 %a, i32 %b) nounwind {
+entry:
+	%tmp1434 = icmp eq i32 %a, %b		; <i1> [#uses=1]
+	br i1 %tmp1434, label %bb17, label %bb.outer
+
+bb.outer:		; preds = %cond_false, %entry
+	%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]		; <i32> [#uses=5]
+	%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %cond_true, %bb.outer
+	%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]		; <i32> [#uses=2]
+	%tmp. = sub i32 0, %b_addr.021.0.ph		; <i32> [#uses=1]
+	%tmp.40 = mul i32 %indvar, %tmp.		; <i32> [#uses=1]
+	%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph		; <i32> [#uses=6]
+	%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph		; <i1> [#uses=1]
+	br i1 %tmp3, label %cond_true, label %cond_false
+
+cond_true:		; preds = %bb
+	%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph		; <i32> [#uses=2]
+	%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp1437, label %bb17, label %bb
+
+cond_false:		; preds = %bb
+	%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0		; <i32> [#uses=2]
+	%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb17, label %bb.outer
+
+bb17:		; preds = %cond_false, %cond_true, %entry
+	%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]		; <i32> [#uses=1]
+	ret i32 %a_addr.026.1
+}
diff --git a/final/test/CodeGen/X86/lsr-nonaffine.ll b/final/test/CodeGen/X86/lsr-nonaffine.ll
new file mode 100644
index 00000000000..b0d30641dd2
--- /dev/null
+++ b/final/test/CodeGen/X86/lsr-nonaffine.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; LSR should compute the correct starting values for this loop. Note that
+; it's not necessarily LSR's job to compute loop exit expressions; that's
+; indvars' job.
+; CHECK: movl  $12
+; CHECK: movl  $42
+
+define i32 @real_symmetric_eigen(i32 %n) nounwind {
+while.body127:                                    ; preds = %while.cond122
+  br label %while.cond141
+
+while.cond141:                                    ; preds = %while.cond141, %while.body127
+  %0 = phi i32 [ 7, %while.body127 ], [ %indvar.next67, %while.cond141 ] ; <i32> [#uses=3]
+  %indvar.next67 = add i32 %0, 1                  ; <i32> [#uses=1]
+  %t = icmp slt i32 %indvar.next67, %n
+  br i1 %t, label %if.then171, label %while.cond141
+
+if.then171:                                       ; preds = %while.cond141
+  %mul150 = mul i32 %0, %0                 ; <i32> [#uses=1]
+  %add174 = add i32 %mul150, %0                 ; <i32> [#uses=1]
+  ret i32 %add174
+}
diff --git a/final/test/CodeGen/X86/lsr-normalization.ll b/final/test/CodeGen/X86/lsr-normalization.ll
new file mode 100644
index 00000000000..932141d0448
--- /dev/null
+++ b/final/test/CodeGen/X86/lsr-normalization.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -march=x86-64 | grep div | count 1
+; rdar://8168938
+
+; This testcase involves SCEV normalization with the exit value from
+; one loop involved with the increment value for an addrec on another
+; loop. The expression should be properly normalized and simplified,
+; and require only a single division.
+
+%0 = type { %0*, %0* }
+
+@0 = private constant [13 x i8] c"Result: %lu\0A\00" ; <[13 x i8]*> [#uses=1]
+@1 = internal constant [5 x i8] c"Huh?\00"        ; <[5 x i8]*> [#uses=1]
+
+define i32 @main(i32 %arg, i8** nocapture %arg1) nounwind {
+bb:
+  %tmp = alloca %0, align 8                       ; <%0*> [#uses=11]
+  %tmp2 = bitcast %0* %tmp to i8*                 ; <i8*> [#uses=1]
+  call void @llvm.memset.p0i8.i64(i8* %tmp2, i8 0, i64 16, i32 8, i1 false) nounwind
+  %tmp3 = getelementptr inbounds %0* %tmp, i64 0, i32 0 ; <%0**> [#uses=3]
+  store %0* %tmp, %0** %tmp3
+  %tmp4 = getelementptr inbounds %0* %tmp, i64 0, i32 1 ; <%0**> [#uses=1]
+  store %0* %tmp, %0** %tmp4
+  %tmp5 = call noalias i8* @_Znwm(i64 24) nounwind ; <i8*> [#uses=2]
+  %tmp6 = getelementptr inbounds i8* %tmp5, i64 16 ; <i8*> [#uses=2]
+  %tmp7 = icmp eq i8* %tmp6, null                 ; <i1> [#uses=1]
+  br i1 %tmp7, label %bb10, label %bb8
+
+bb8:                                              ; preds = %bb
+  %tmp9 = bitcast i8* %tmp6 to i32*               ; <i32*> [#uses=1]
+  store i32 1, i32* %tmp9
+  br label %bb10
+
+bb10:                                             ; preds = %bb8, %bb
+  %tmp11 = bitcast i8* %tmp5 to %0*               ; <%0*> [#uses=1]
+  call void @_ZNSt15_List_node_base4hookEPS_(%0* %tmp11, %0* %tmp) nounwind
+  %tmp12 = load %0** %tmp3                        ; <%0*> [#uses=3]
+  %tmp13 = icmp eq %0* %tmp12, %tmp               ; <i1> [#uses=1]
+  br i1 %tmp13, label %bb14, label %bb16
+
+bb14:                                             ; preds = %bb10
+  %tmp15 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @1, i64 0, i64 0))
+  br label %bb35
+
+bb16:                                             ; preds = %bb16, %bb10
+  %tmp17 = phi i64 [ %tmp22, %bb16 ], [ 0, %bb10 ] ; <i64> [#uses=1]
+  %tmp18 = phi %0* [ %tmp20, %bb16 ], [ %tmp12, %bb10 ] ; <%0*> [#uses=1]
+  %tmp19 = getelementptr inbounds %0* %tmp18, i64 0, i32 0 ; <%0**> [#uses=1]
+  %tmp20 = load %0** %tmp19                       ; <%0*> [#uses=2]
+  %tmp21 = icmp eq %0* %tmp20, %tmp               ; <i1> [#uses=1]
+  %tmp22 = add i64 %tmp17, 1                      ; <i64> [#uses=2]
+  br i1 %tmp21, label %bb23, label %bb16
+
+bb23:                                             ; preds = %bb16
+  %tmp24 = udiv i64 100, %tmp22                   ; <i64> [#uses=1]
+  br label %bb25
+
+bb25:                                             ; preds = %bb25, %bb23
+  %tmp26 = phi i64 [ %tmp31, %bb25 ], [ 0, %bb23 ] ; <i64> [#uses=1]
+  %tmp27 = phi %0* [ %tmp29, %bb25 ], [ %tmp12, %bb23 ] ; <%0*> [#uses=1]
+  %tmp28 = getelementptr inbounds %0* %tmp27, i64 0, i32 0 ; <%0**> [#uses=1]
+  %tmp29 = load %0** %tmp28                       ; <%0*> [#uses=2]
+  %tmp30 = icmp eq %0* %tmp29, %tmp               ; <i1> [#uses=1]
+  %tmp31 = add i64 %tmp26, 1                      ; <i64> [#uses=2]
+  br i1 %tmp30, label %bb32, label %bb25
+
+bb32:                                             ; preds = %bb25
+  %tmp33 = mul i64 %tmp31, %tmp24                 ; <i64> [#uses=1]
+  %tmp34 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @0, i64 0, i64 0), i64 %tmp33) nounwind
+  br label %bb35
+
+bb35:                                             ; preds = %bb32, %bb14
+  %tmp36 = load %0** %tmp3                        ; <%0*> [#uses=2]
+  %tmp37 = icmp eq %0* %tmp36, %tmp               ; <i1> [#uses=1]
+  br i1 %tmp37, label %bb44, label %bb38
+
+bb38:                                             ; preds = %bb38, %bb35
+  %tmp39 = phi %0* [ %tmp41, %bb38 ], [ %tmp36, %bb35 ] ; <%0*> [#uses=2]
+  %tmp40 = getelementptr inbounds %0* %tmp39, i64 0, i32 0 ; <%0**> [#uses=1]
+  %tmp41 = load %0** %tmp40                       ; <%0*> [#uses=2]
+  %tmp42 = bitcast %0* %tmp39 to i8*              ; <i8*> [#uses=1]
+  call void @_ZdlPv(i8* %tmp42) nounwind
+  %tmp43 = icmp eq %0* %tmp41, %tmp               ; <i1> [#uses=1]
+  br i1 %tmp43, label %bb44, label %bb38
+
+bb44:                                             ; preds = %bb38, %bb35
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare void @_ZNSt15_List_node_base4hookEPS_(%0*, %0*)
+
+declare noalias i8* @_Znwm(i64)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare i32 @puts(i8* nocapture) nounwind
diff --git a/final/test/CodeGen/X86/lsr-overflow.ll b/final/test/CodeGen/X86/lsr-overflow.ll
new file mode 100644
index 00000000000..5bc4f7e96a0
--- /dev/null
+++ b/final/test/CodeGen/X86/lsr-overflow.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+
+; The comparison uses the pre-inc value, which could lead LSR to
+; try to compute -INT64_MIN.
+
+; CHECK: movabsq $-9223372036854775808, %rax
+; CHECK: cmpq  %rax,
+; CHECK: sete  %al
+
+declare i64 @bar()
+
+define i1 @foo() nounwind {
+entry:
+  br label %for.cond.i
+
+for.cond.i:
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.cond.i ]
+  %t = call i64 @bar()
+  %indvar.next = add i64 %indvar, 1
+  %s = icmp ne i64 %indvar.next, %t
+  br i1 %s, label %for.cond.i, label %__ABContainsLabel.exit
+
+__ABContainsLabel.exit:
+  %cmp = icmp eq i64 %indvar, 9223372036854775807
+  ret i1 %cmp
+}
diff --git a/final/test/CodeGen/X86/lsr-quadratic-expand.ll b/final/test/CodeGen/X86/lsr-quadratic-expand.ll
new file mode 100644
index 00000000000..2bbb4709290
--- /dev/null
+++ b/final/test/CodeGen/X86/lsr-quadratic-expand.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=x86-64 < %s
+
+define void @dw2102_i2c_transfer() nounwind {
+entry:
+  br label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %z = phi i64 [ 0, %entry ], [ %z3, %bb ]
+  %z1 = phi i16 [ undef, %entry ], [ %z6, %bb ]
+  %z2 = phi i32 [ 0, %entry ], [ %z8, %bb ]
+  %z3 = add i64 %z, 1
+  %z4 = zext i16 %z1 to i32
+  %z5 = add nsw i32 %z4, %z2
+  %z6 = trunc i32 %z5 to i16
+  call fastcc void @dw210x_op_rw(i16 zeroext %z6)
+  %z7 = getelementptr i8* null, i64 %z
+  store i8 undef, i8* %z7, align 1
+  %z8 = add nsw i32 %z2, 1
+  br label %bb
+}
+
+declare fastcc void @dw210x_op_rw(i16 zeroext) nounwind
diff --git a/final/test/CodeGen/X86/lsr-redundant-addressing.ll b/final/test/CodeGen/X86/lsr-redundant-addressing.ll
new file mode 100644
index 00000000000..aaa1426918f
--- /dev/null
+++ b/final/test/CodeGen/X86/lsr-redundant-addressing.ll
@@ -0,0 +1,45 @@
+; RUN: llc -march=x86-64 < %s | fgrep {addq	$-16,} | count 1
+; rdar://9081094
+
+; LSR shouldn't create lots of redundant address computations.
+
+%0 = type { i32, [3 x i32] }
+%1 = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
+
+@pgm = external hidden unnamed_addr global [5 x %0], align 32
+@isa = external hidden unnamed_addr constant [13 x %1], align 32
+
+define void @main_bb.i() nounwind {
+bb:
+  br label %bb38
+
+bb38:                                             ; preds = %bb200, %bb
+  %tmp39 = phi i64 [ %tmp201, %bb200 ], [ 0, %bb ]
+  %tmp40 = sub i64 0, %tmp39
+  %tmp47 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 0
+  %tmp34 = load i32* %tmp47, align 16
+  %tmp203 = icmp slt i32 %tmp34, 12
+  br i1 %tmp203, label %bb215, label %bb200
+
+bb200:                                            ; preds = %bb38
+  %tmp201 = add i64 %tmp39, 1
+  br label %bb38
+
+bb215:                                            ; preds = %bb38
+  %tmp50 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 2
+  %tmp49 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 1
+  %tmp48 = getelementptr [5 x %0]* @pgm, i64 0, i64 %tmp40, i32 1, i64 0
+  %tmp216 = add nsw i32 %tmp34, 1
+  store i32 %tmp216, i32* %tmp47, align 16
+  %tmp217 = sext i32 %tmp216 to i64
+  %tmp218 = getelementptr inbounds [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 0
+  %tmp219 = load i32* %tmp218, align 8
+  store i32 %tmp219, i32* %tmp48, align 4
+  %tmp220 = getelementptr inbounds [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 1
+  %tmp221 = load i32* %tmp220, align 4
+  store i32 %tmp221, i32* %tmp49, align 4
+  %tmp222 = getelementptr inbounds [13 x %1]* @isa, i64 0, i64 %tmp217, i32 3, i64 2
+  %tmp223 = load i32* %tmp222, align 8
+  store i32 %tmp223, i32* %tmp50, align 4
+  ret void
+}
diff --git a/final/test/CodeGen/X86/lsr-reuse-trunc.ll b/final/test/CodeGen/X86/lsr-reuse-trunc.ll
new file mode 100644
index 00000000000..29f03d68dad
--- /dev/null
+++ b/final/test/CodeGen/X86/lsr-reuse-trunc.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+
+; Full strength reduction wouldn't reduce register pressure, so LSR should
+; stick with indexing here.
+
+; CHECK: movaps        (%{{rsi|rdx}},%rax,4), %xmm3
+; CHECK: movaps        %xmm3, (%{{rdi|rcx}},%rax,4)
+; CHECK: addq  $4, %rax
+; CHECK: cmpl  %eax, (%{{rdx|r8}})
+; CHECK-NEXT: jg
+
+define void @vvfloorf(float* nocapture %y, float* nocapture %x, i32* nocapture %n) nounwind {
+entry:
+  %0 = load i32* %n, align 4
+  %1 = icmp sgt i32 %0, 0
+  br i1 %1, label %bb, label %return
+
+bb:
+  %indvar = phi i64 [ %indvar.next, %bb ], [ 0, %entry ]
+  %tmp = shl i64 %indvar, 2
+  %scevgep = getelementptr float* %y, i64 %tmp
+  %scevgep9 = bitcast float* %scevgep to <4 x float>*
+  %scevgep10 = getelementptr float* %x, i64 %tmp
+  %scevgep1011 = bitcast float* %scevgep10 to <4 x float>*
+  %2 = load <4 x float>* %scevgep1011, align 16
+  %3 = bitcast <4 x float> %2 to <4 x i32>
+  %4 = and <4 x i32> %3, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+  %5 = bitcast <4 x i32> %4 to <4 x float>
+  %6 = and <4 x i32> %3, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
+  %7 = tail call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %5, <4 x float> <float 8.388608e+06, float 8.388608e+06, float 8.388608e+06, float 8.388608e+06>, i8 5) nounwind
+  %tmp.i4 = bitcast <4 x float> %7 to <4 x i32>
+  %8 = xor <4 x i32> %tmp.i4, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %9 = and <4 x i32> %8, <i32 1258291200, i32 1258291200, i32 1258291200, i32 1258291200>
+  %10 = or <4 x i32> %9, %6
+  %11 = bitcast <4 x i32> %10 to <4 x float>
+  %12 = fadd <4 x float> %2, %11
+  %13 = fsub <4 x float> %12, %11
+  %14 = tail call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %2, <4 x float> %13, i8 1) nounwind
+  %15 = bitcast <4 x float> %14 to <4 x i32>
+  %16 = tail call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %15) nounwind readnone
+  %17 = fadd <4 x float> %13, %16
+  %tmp.i = bitcast <4 x float> %17 to <4 x i32>
+  %18 = or <4 x i32> %tmp.i, %6
+  %19 = bitcast <4 x i32> %18 to <4 x float>
+  store <4 x float> %19, <4 x float>* %scevgep9, align 16
+  %tmp12 = add i64 %tmp, 4
+  %tmp13 = trunc i64 %tmp12 to i32
+  %20 = load i32* %n, align 4
+  %21 = icmp sgt i32 %20, %tmp13
+  %indvar.next = add i64 %indvar, 1
+  br i1 %21, label %bb, label %return
+
+return:
+  ret void
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
diff --git a/final/test/CodeGen/X86/lsr-reuse.ll b/final/test/CodeGen/X86/lsr-reuse.ll
new file mode 100644
index 00000000000..527a5a60e86
--- /dev/null
+++ b/final/test/CodeGen/X86/lsr-reuse.ll
@@ -0,0 +1,751 @@
+; XFAIL: *
+; RUN: llc < %s -march=x86-64 -O3 -asm-verbose=false | FileCheck %s
+target datalayout = "e-p:64:64:64"
+target triple = "x86_64-unknown-unknown"
+
+; Full strength reduction reduces register pressure from 5 to 4 here.
+; Instruction selection should use the FLAGS value from the dec for
+; the branch. Scheduling should push the adds upwards.
+
+; CHECK: full_me_0:
+; CHECK: movsd   (%rsi), %xmm0
+; CHECK: mulsd   (%rdx), %xmm0
+; CHECK: movsd   %xmm0, (%rdi)
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
+; CHECK: addq    $8, %rdi
+; CHECK: decq    %rcx
+; CHECK: jne
+
+define void @full_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; Mostly-full strength reduction means we do full strength reduction on all
+; except for the offsets.
+;
+; Given a choice between constant offsets -2048 and 2048, choose the negative
+; value, because at boundary conditions it has a smaller encoding.
+; TODO: That's an over-general heuristic. It would be better for the target
+; to indicate what the encoding cost would be. Then using a 2048 offset
+; would be better on x86-64, since the start value would be 0 instead of
+; 2048.
+
+; CHECK: mostly_full_me_0:
+; CHECK: movsd   -2048(%rsi), %xmm0
+; CHECK: mulsd   -2048(%rdx), %xmm0
+; CHECK: movsd   %xmm0, -2048(%rdi)
+; CHECK: movsd   (%rsi), %xmm0
+; CHECK: divsd   (%rdx), %xmm0
+; CHECK: movsd   %xmm0, (%rdi)
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
+; CHECK: addq    $8, %rdi
+; CHECK: decq    %rcx
+; CHECK: jne
+
+define void @mostly_full_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %j = add i64 %i, 256
+  %Aj = getelementptr inbounds double* %A, i64 %j
+  %Bj = getelementptr inbounds double* %B, i64 %j
+  %Cj = getelementptr inbounds double* %C, i64 %j
+  %t3 = load double* %Bj
+  %t4 = load double* %Cj
+  %o = fdiv double %t3, %t4
+  store double %o, double* %Aj
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; A minor variation on mostly_full_me_0.
+; Prefer to start the indvar at 0.
+
+; CHECK: mostly_full_me_1:
+; CHECK: movsd   (%rsi), %xmm0
+; CHECK: mulsd   (%rdx), %xmm0
+; CHECK: movsd   %xmm0, (%rdi)
+; CHECK: movsd   -2048(%rsi), %xmm0
+; CHECK: divsd   -2048(%rdx), %xmm0
+; CHECK: movsd   %xmm0, -2048(%rdi)
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
+; CHECK: addq    $8, %rdi
+; CHECK: decq    %rcx
+; CHECK: jne
+
+define void @mostly_full_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %j = sub i64 %i, 256
+  %Aj = getelementptr inbounds double* %A, i64 %j
+  %Bj = getelementptr inbounds double* %B, i64 %j
+  %Cj = getelementptr inbounds double* %C, i64 %j
+  %t3 = load double* %Bj
+  %t4 = load double* %Cj
+  %o = fdiv double %t3, %t4
+  store double %o, double* %Aj
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; A slightly less minor variation on mostly_full_me_0.
+
+; CHECK: mostly_full_me_2:
+; CHECK: movsd   (%rsi), %xmm0
+; CHECK: mulsd   (%rdx), %xmm0
+; CHECK: movsd   %xmm0, (%rdi)
+; CHECK: movsd   -4096(%rsi), %xmm0
+; CHECK: divsd   -4096(%rdx), %xmm0
+; CHECK: movsd   %xmm0, -4096(%rdi)
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
+; CHECK: addq    $8, %rdi
+; CHECK: decq    %rcx
+; CHECK: jne
+
+define void @mostly_full_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %k = add i64 %i, 256
+  %Ak = getelementptr inbounds double* %A, i64 %k
+  %Bk = getelementptr inbounds double* %B, i64 %k
+  %Ck = getelementptr inbounds double* %C, i64 %k
+  %t1 = load double* %Bk
+  %t2 = load double* %Ck
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ak
+  %j = sub i64 %i, 256
+  %Aj = getelementptr inbounds double* %A, i64 %j
+  %Bj = getelementptr inbounds double* %B, i64 %j
+  %Cj = getelementptr inbounds double* %C, i64 %j
+  %t3 = load double* %Bj
+  %t4 = load double* %Cj
+  %o = fdiv double %t3, %t4
+  store double %o, double* %Aj
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; In this test, the counting IV exit value is used, so full strength reduction
+; would not reduce register pressure. IndVarSimplify ought to simplify such
+; cases away, but it's useful here to verify that LSR's register pressure
+; heuristics are working as expected.
+
+; CHECK: count_me_0:
+; CHECK: movsd   (%rsi,%rax,8), %xmm0
+; CHECK: mulsd   (%rdx,%rax,8), %xmm0
+; CHECK: movsd   %xmm0, (%rdi,%rax,8)
+; CHECK: incq    %rax
+; CHECK: cmpq    %rax, %rcx
+; CHECK: jne
+
+define i64 @count_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  %q = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  ret i64 %q
+}
+
+; In this test, the trip count value is used, so full strength reduction
+; would not reduce register pressure.
+; (though it would reduce register pressure inside the loop...)
+
+; CHECK: count_me_1:
+; CHECK: movsd   (%rsi,%rax,8), %xmm0
+; CHECK: mulsd   (%rdx,%rax,8), %xmm0
+; CHECK: movsd   %xmm0, (%rdi,%rax,8)
+; CHECK: incq    %rax
+; CHECK: cmpq    %rax, %rcx
+; CHECK: jne
+
+define i64 @count_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  %q = phi i64 [ 0, %entry ], [ %n, %loop ]
+  ret i64 %q
+}
+
+; Full strength reduction doesn't save any registers here because the
+; loop tripcount is a constant.
+
+; CHECK: count_me_2:
+; CHECK: movl    $10, %eax
+; CHECK: align
+; CHECK: BB6_1:
+; CHECK: movsd   -40(%rdi,%rax,8), %xmm0
+; CHECK: addsd   -40(%rsi,%rax,8), %xmm0
+; CHECK: movsd   %xmm0, -40(%rdx,%rax,8)
+; CHECK: movsd   (%rdi,%rax,8), %xmm0
+; CHECK: subsd   (%rsi,%rax,8), %xmm0
+; CHECK: movsd   %xmm0, (%rdx,%rax,8)
+; CHECK: incq    %rax
+; CHECK: cmpq    $5010, %rax
+; CHECK: jne
+
+define void @count_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C) nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  %i5 = add i64 %i, 5
+  %Ai = getelementptr double* %A, i64 %i5
+  %t2 = load double* %Ai
+  %Bi = getelementptr double* %B, i64 %i5
+  %t4 = load double* %Bi
+  %t5 = fadd double %t2, %t4
+  %Ci = getelementptr double* %C, i64 %i5
+  store double %t5, double* %Ci
+  %i10 = add i64 %i, 10
+  %Ai10 = getelementptr double* %A, i64 %i10
+  %t9 = load double* %Ai10
+  %Bi10 = getelementptr double* %B, i64 %i10
+  %t11 = load double* %Bi10
+  %t12 = fsub double %t9, %t11
+  %Ci10 = getelementptr double* %C, i64 %i10
+  store double %t12, double* %Ci10
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, 5000
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; This should be fully strength-reduced to reduce register pressure.
+
+; CHECK: full_me_1:
+; CHECK: align
+; CHECK: BB7_1:
+; CHECK: movsd   (%rdi), %xmm0
+; CHECK: addsd   (%rsi), %xmm0
+; CHECK: movsd   %xmm0, (%rdx)
+; CHECK: movsd   40(%rdi), %xmm0
+; CHECK: subsd   40(%rsi), %xmm0
+; CHECK: movsd   %xmm0, 40(%rdx)
+; CHECK: addq    $8, %rdi
+; CHECK: addq    $8, %rsi
+; CHECK: addq    $8, %rdx
+; CHECK: decq    %rcx
+; CHECK: jne
+
+define void @full_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  %i5 = add i64 %i, 5
+  %Ai = getelementptr double* %A, i64 %i5
+  %t2 = load double* %Ai
+  %Bi = getelementptr double* %B, i64 %i5
+  %t4 = load double* %Bi
+  %t5 = fadd double %t2, %t4
+  %Ci = getelementptr double* %C, i64 %i5
+  store double %t5, double* %Ci
+  %i10 = add i64 %i, 10
+  %Ai10 = getelementptr double* %A, i64 %i10
+  %t9 = load double* %Ai10
+  %Bi10 = getelementptr double* %B, i64 %i10
+  %t11 = load double* %Bi10
+  %t12 = fsub double %t9, %t11
+  %Ci10 = getelementptr double* %C, i64 %i10
+  store double %t12, double* %Ci10
+  %i.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; This is a variation on full_me_0 in which the 0,+,1 induction variable
+; has a non-address use, pinning that value in a register.
+
+; CHECK: count_me_3:
+; CHECK: call
+; CHECK: movsd   (%r{{[^,]*}},%r{{[^,]*}},8), %xmm0
+; CHECK: mulsd   (%r{{[^,]*}},%r{{[^,]*}},8), %xmm0
+; CHECK: movsd   %xmm0, (%r{{[^,]*}},%r{{[^,]*}},8)
+; CHECK: incq    %r{{.*}}
+; CHECK: cmpq    %r{{.*}}, %r{{.*}}
+; CHECK: jne
+
+declare void @use(i64)
+
+define void @count_me_3(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
+entry:
+  %t0 = icmp sgt i64 %n, 0
+  br i1 %t0, label %loop, label %return
+
+loop:
+  %i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
+  call void @use(i64 %i)
+  %Ai = getelementptr inbounds double* %A, i64 %i
+  %Bi = getelementptr inbounds double* %B, i64 %i
+  %Ci = getelementptr inbounds double* %C, i64 %i
+  %t1 = load double* %Bi
+  %t2 = load double* %Ci
+  %m = fmul double %t1, %t2
+  store double %m, double* %Ai
+  %i.next = add nsw i64 %i, 1
+  %exitcond = icmp eq i64 %i.next, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; LSR should use only one indvar for the inner loop.
+; rdar://7657764
+
+; CHECK: asd:
+; CHECK: BB9_4:
+; CHECK-NEXT: addl  (%r{{[^,]*}},%rdi,4), %e
+; CHECK-NEXT: incq  %rdi
+; CHECK-NEXT: cmpq  %rdi, %r{{[^,]*}}
+; CHECK-NEXT: jg
+
+%struct.anon = type { i32, [4200 x i32] }
+
+@bars = common global [123123 x %struct.anon] zeroinitializer, align 32 ; <[123123 x %struct.anon]*> [#uses=2]
+
+define i32 @asd(i32 %n) nounwind readonly {
+entry:
+  %0 = icmp sgt i32 %n, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %bb.nph14, label %bb5
+
+bb.nph14:                                         ; preds = %entry
+  %tmp18 = zext i32 %n to i64                     ; <i64> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb3, %bb.nph14
+  %indvar16 = phi i64 [ 0, %bb.nph14 ], [ %indvar.next17, %bb3 ] ; <i64> [#uses=3]
+  %s.113 = phi i32 [ 0, %bb.nph14 ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=2]
+  %scevgep2526 = getelementptr [123123 x %struct.anon]* @bars, i64 0, i64 %indvar16, i32 0 ; <i32*> [#uses=1]
+  %1 = load i32* %scevgep2526, align 4            ; <i32> [#uses=2]
+  %2 = icmp sgt i32 %1, 0                         ; <i1> [#uses=1]
+  br i1 %2, label %bb.nph, label %bb3
+
+bb.nph:                                           ; preds = %bb
+  %tmp23 = sext i32 %1 to i64                     ; <i64> [#uses=1]
+  br label %bb1
+
+bb1:                                              ; preds = %bb.nph, %bb1
+  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp19, %bb1 ] ; <i64> [#uses=2]
+  %s.07 = phi i32 [ %s.113, %bb.nph ], [ %4, %bb1 ] ; <i32> [#uses=1]
+  %c.08 = getelementptr [123123 x %struct.anon]* @bars, i64 0, i64 %indvar16, i32 1, i64 %indvar ; <i32*> [#uses=1]
+  %3 = load i32* %c.08, align 4                   ; <i32> [#uses=1]
+  %4 = add nsw i32 %3, %s.07                      ; <i32> [#uses=2]
+  %tmp19 = add i64 %indvar, 1                     ; <i64> [#uses=2]
+  %5 = icmp sgt i64 %tmp23, %tmp19                ; <i1> [#uses=1]
+  br i1 %5, label %bb1, label %bb3
+
+bb3:                                              ; preds = %bb1, %bb
+  %s.0.lcssa = phi i32 [ %s.113, %bb ], [ %4, %bb1 ] ; <i32> [#uses=2]
+  %indvar.next17 = add i64 %indvar16, 1           ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next17, %tmp18  ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb5, label %bb
+
+bb5:                                              ; preds = %bb3, %entry
+  %s.1.lcssa = phi i32 [ 0, %entry ], [ %s.0.lcssa, %bb3 ] ; <i32> [#uses=1]
+  ret i32 %s.1.lcssa
+}
+
+; Two loops here are of particular interest; the one at %bb21, where
+; we don't want to leave extra induction variables around, or use an
+; lea to compute an exit condition inside the loop:
+
+; CHECK: test:
+
+; CHECK:      BB10_4:
+; CHECK-NEXT:   movaps  %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   addss   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   mulss   (%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   movss   %xmm{{.*}}, (%r{{[^,]*}})
+; CHECK-NEXT:   addq    $4, %r{{.*}}
+; CHECK-NEXT:   decq    %r{{.*}}
+; CHECK-NEXT:   addq    $4, %r{{.*}}
+; CHECK-NEXT:   movaps  %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT: BB10_2:
+; CHECK-NEXT:   testq   %r{{.*}}, %r{{.*}}
+; CHECK-NEXT:   jle
+; CHECK-NEXT:   testb   $15, %r{{.*}}
+; CHECK-NEXT:   jne
+
+; And the one at %bb68, where we want to be sure to use superhero mode:
+
+; CHECK:      BB10_7:
+; CHECK-NEXT:   movaps  48(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  32(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  16(%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  (%r{{[^,]*}}), %xmm{{.*}}
+; CHECK-NEXT:   mulps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   movaps  %xmm{{.*}}, (%r{{[^,]*}})
+; CHECK-NEXT:   movaps  %xmm{{.*}}, 16(%r{{[^,]*}})
+; CHECK-NEXT:   movaps  %xmm{{.*}}, 32(%r{{[^,]*}})
+; CHECK-NEXT:   movaps  %xmm{{.*}}, 48(%r{{[^,]*}})
+; CHECK-NEXT:   addps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   addps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   addps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   addps   %xmm{{.*}}, %xmm{{.*}}
+; CHECK-NEXT:   addq    $64, %r{{.*}}
+; CHECK-NEXT:   addq    $64, %r{{.*}}
+; CHECK-NEXT:   addq    $-16, %r{{.*}}
+; CHECK-NEXT:   cmpq    $15, %r{{.*}}
+; CHECK-NEXT:   jg
+
+define void @test(float* %arg, i64 %arg1, float* nocapture %arg2, float* nocapture %arg3, float* %arg4, i64 %arg5, i64 %arg6) nounwind {
+bb:
+  %t = alloca float, align 4                      ; <float*> [#uses=3]
+  %t7 = alloca float, align 4                     ; <float*> [#uses=2]
+  %t8 = load float* %arg3                         ; <float> [#uses=8]
+  %t9 = ptrtoint float* %arg to i64               ; <i64> [#uses=1]
+  %t10 = ptrtoint float* %arg4 to i64             ; <i64> [#uses=1]
+  %t11 = xor i64 %t10, %t9                        ; <i64> [#uses=1]
+  %t12 = and i64 %t11, 15                         ; <i64> [#uses=1]
+  %t13 = icmp eq i64 %t12, 0                      ; <i1> [#uses=1]
+  %t14 = xor i64 %arg1, 1                         ; <i64> [#uses=1]
+  %t15 = xor i64 %arg5, 1                         ; <i64> [#uses=1]
+  %t16 = or i64 %t15, %t14                        ; <i64> [#uses=1]
+  %t17 = trunc i64 %t16 to i32                    ; <i32> [#uses=1]
+  %t18 = icmp eq i32 %t17, 0                      ; <i1> [#uses=1]
+  br i1 %t18, label %bb19, label %bb213
+
+bb19:                                             ; preds = %bb
+  %t20 = load float* %arg2                        ; <float> [#uses=1]
+  br label %bb21
+
+bb21:                                             ; preds = %bb32, %bb19
+  %t22 = phi i64 [ %t36, %bb32 ], [ 0, %bb19 ]    ; <i64> [#uses=21]
+  %t23 = phi float [ %t35, %bb32 ], [ %t20, %bb19 ] ; <float> [#uses=6]
+  %t24 = sub i64 %arg6, %t22                      ; <i64> [#uses=4]
+  %t25 = getelementptr float* %arg4, i64 %t22     ; <float*> [#uses=4]
+  %t26 = getelementptr float* %arg, i64 %t22      ; <float*> [#uses=3]
+  %t27 = icmp sgt i64 %t24, 0                     ; <i1> [#uses=1]
+  br i1 %t27, label %bb28, label %bb37
+
+bb28:                                             ; preds = %bb21
+  %t29 = ptrtoint float* %t25 to i64              ; <i64> [#uses=1]
+  %t30 = and i64 %t29, 15                         ; <i64> [#uses=1]
+  %t31 = icmp eq i64 %t30, 0                      ; <i1> [#uses=1]
+  br i1 %t31, label %bb37, label %bb32
+
+bb32:                                             ; preds = %bb28
+  %t33 = load float* %t26                         ; <float> [#uses=1]
+  %t34 = fmul float %t23, %t33                    ; <float> [#uses=1]
+  store float %t34, float* %t25
+  %t35 = fadd float %t23, %t8                     ; <float> [#uses=1]
+  %t36 = add i64 %t22, 1                          ; <i64> [#uses=1]
+  br label %bb21
+
+bb37:                                             ; preds = %bb28, %bb21
+  %t38 = fmul float %t8, 4.000000e+00             ; <float> [#uses=1]
+  store float %t38, float* %t
+  %t39 = fmul float %t8, 1.600000e+01             ; <float> [#uses=1]
+  store float %t39, float* %t7
+  %t40 = fmul float %t8, 0.000000e+00             ; <float> [#uses=1]
+  %t41 = fadd float %t23, %t40                    ; <float> [#uses=1]
+  %t42 = insertelement <4 x float> undef, float %t41, i32 0 ; <<4 x float>> [#uses=1]
+  %t43 = fadd float %t23, %t8                     ; <float> [#uses=1]
+  %t44 = insertelement <4 x float> %t42, float %t43, i32 1 ; <<4 x float>> [#uses=1]
+  %t45 = fmul float %t8, 2.000000e+00             ; <float> [#uses=1]
+  %t46 = fadd float %t23, %t45                    ; <float> [#uses=1]
+  %t47 = insertelement <4 x float> %t44, float %t46, i32 2 ; <<4 x float>> [#uses=1]
+  %t48 = fmul float %t8, 3.000000e+00             ; <float> [#uses=1]
+  %t49 = fadd float %t23, %t48                    ; <float> [#uses=1]
+  %t50 = insertelement <4 x float> %t47, float %t49, i32 3 ; <<4 x float>> [#uses=5]
+  %t51 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t) nounwind ; <<4 x float>> [#uses=3]
+  %t52 = fadd <4 x float> %t50, %t51              ; <<4 x float>> [#uses=3]
+  %t53 = fadd <4 x float> %t52, %t51              ; <<4 x float>> [#uses=3]
+  %t54 = fadd <4 x float> %t53, %t51              ; <<4 x float>> [#uses=2]
+  %t55 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t7) nounwind ; <<4 x float>> [#uses=8]
+  %t56 = icmp sgt i64 %t24, 15                    ; <i1> [#uses=2]
+  br i1 %t13, label %bb57, label %bb118
+
+bb57:                                             ; preds = %bb37
+  br i1 %t56, label %bb61, label %bb112
+
+bb58:                                             ; preds = %bb68
+  %t59 = getelementptr float* %arg, i64 %t78      ; <float*> [#uses=1]
+  %t60 = getelementptr float* %arg4, i64 %t78     ; <float*> [#uses=1]
+  br label %bb112
+
+bb61:                                             ; preds = %bb57
+  %t62 = add i64 %t22, 16                         ; <i64> [#uses=1]
+  %t63 = add i64 %t22, 4                          ; <i64> [#uses=1]
+  %t64 = add i64 %t22, 8                          ; <i64> [#uses=1]
+  %t65 = add i64 %t22, 12                         ; <i64> [#uses=1]
+  %t66 = add i64 %arg6, -16                       ; <i64> [#uses=1]
+  %t67 = sub i64 %t66, %t22                       ; <i64> [#uses=1]
+  br label %bb68
+
+bb68:                                             ; preds = %bb68, %bb61
+  %t69 = phi i64 [ 0, %bb61 ], [ %t111, %bb68 ]   ; <i64> [#uses=3]
+  %t70 = phi <4 x float> [ %t54, %bb61 ], [ %t107, %bb68 ] ; <<4 x float>> [#uses=2]
+  %t71 = phi <4 x float> [ %t50, %bb61 ], [ %t103, %bb68 ] ; <<4 x float>> [#uses=2]
+  %t72 = phi <4 x float> [ %t53, %bb61 ], [ %t108, %bb68 ] ; <<4 x float>> [#uses=2]
+  %t73 = phi <4 x float> [ %t52, %bb61 ], [ %t109, %bb68 ] ; <<4 x float>> [#uses=2]
+  %t74 = shl i64 %t69, 4                          ; <i64> [#uses=5]
+  %t75 = add i64 %t22, %t74                       ; <i64> [#uses=2]
+  %t76 = getelementptr float* %arg, i64 %t75      ; <float*> [#uses=1]
+  %t77 = bitcast float* %t76 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t78 = add i64 %t62, %t74                       ; <i64> [#uses=2]
+  %t79 = add i64 %t63, %t74                       ; <i64> [#uses=2]
+  %t80 = getelementptr float* %arg, i64 %t79      ; <float*> [#uses=1]
+  %t81 = bitcast float* %t80 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t82 = add i64 %t64, %t74                       ; <i64> [#uses=2]
+  %t83 = getelementptr float* %arg, i64 %t82      ; <float*> [#uses=1]
+  %t84 = bitcast float* %t83 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t85 = add i64 %t65, %t74                       ; <i64> [#uses=2]
+  %t86 = getelementptr float* %arg, i64 %t85      ; <float*> [#uses=1]
+  %t87 = bitcast float* %t86 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t88 = getelementptr float* %arg4, i64 %t75     ; <float*> [#uses=1]
+  %t89 = bitcast float* %t88 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t90 = getelementptr float* %arg4, i64 %t79     ; <float*> [#uses=1]
+  %t91 = bitcast float* %t90 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t92 = getelementptr float* %arg4, i64 %t82     ; <float*> [#uses=1]
+  %t93 = bitcast float* %t92 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t94 = getelementptr float* %arg4, i64 %t85     ; <float*> [#uses=1]
+  %t95 = bitcast float* %t94 to <4 x float>*      ; <<4 x float>*> [#uses=1]
+  %t96 = mul i64 %t69, -16                        ; <i64> [#uses=1]
+  %t97 = add i64 %t67, %t96                       ; <i64> [#uses=2]
+  %t98 = load <4 x float>* %t77                   ; <<4 x float>> [#uses=1]
+  %t99 = load <4 x float>* %t81                   ; <<4 x float>> [#uses=1]
+  %t100 = load <4 x float>* %t84                  ; <<4 x float>> [#uses=1]
+  %t101 = load <4 x float>* %t87                  ; <<4 x float>> [#uses=1]
+  %t102 = fmul <4 x float> %t98, %t71             ; <<4 x float>> [#uses=1]
+  %t103 = fadd <4 x float> %t71, %t55             ; <<4 x float>> [#uses=2]
+  %t104 = fmul <4 x float> %t99, %t73             ; <<4 x float>> [#uses=1]
+  %t105 = fmul <4 x float> %t100, %t72            ; <<4 x float>> [#uses=1]
+  %t106 = fmul <4 x float> %t101, %t70            ; <<4 x float>> [#uses=1]
+  store <4 x float> %t102, <4 x float>* %t89
+  store <4 x float> %t104, <4 x float>* %t91
+  store <4 x float> %t105, <4 x float>* %t93
+  store <4 x float> %t106, <4 x float>* %t95
+  %t107 = fadd <4 x float> %t70, %t55             ; <<4 x float>> [#uses=1]
+  %t108 = fadd <4 x float> %t72, %t55             ; <<4 x float>> [#uses=1]
+  %t109 = fadd <4 x float> %t73, %t55             ; <<4 x float>> [#uses=1]
+  %t110 = icmp sgt i64 %t97, 15                   ; <i1> [#uses=1]
+  %t111 = add i64 %t69, 1                         ; <i64> [#uses=1]
+  br i1 %t110, label %bb68, label %bb58
+
+bb112:                                            ; preds = %bb58, %bb57
+  %t113 = phi float* [ %t59, %bb58 ], [ %t26, %bb57 ] ; <float*> [#uses=1]
+  %t114 = phi float* [ %t60, %bb58 ], [ %t25, %bb57 ] ; <float*> [#uses=1]
+  %t115 = phi <4 x float> [ %t103, %bb58 ], [ %t50, %bb57 ] ; <<4 x float>> [#uses=1]
+  %t116 = phi i64 [ %t97, %bb58 ], [ %t24, %bb57 ] ; <i64> [#uses=1]
+  %t117 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %t) nounwind ; <<4 x float>> [#uses=0]
+  br label %bb194
+
+bb118:                                            ; preds = %bb37
+  br i1 %t56, label %bb122, label %bb194
+
+bb119:                                            ; preds = %bb137
+  %t120 = getelementptr float* %arg, i64 %t145    ; <float*> [#uses=1]
+  %t121 = getelementptr float* %arg4, i64 %t145   ; <float*> [#uses=1]
+  br label %bb194
+
+bb122:                                            ; preds = %bb118
+  %t123 = add i64 %t22, -1                        ; <i64> [#uses=1]
+  %t124 = getelementptr inbounds float* %arg, i64 %t123 ; <float*> [#uses=1]
+  %t125 = bitcast float* %t124 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t126 = load <4 x float>* %t125                 ; <<4 x float>> [#uses=1]
+  %t127 = add i64 %t22, 16                        ; <i64> [#uses=1]
+  %t128 = add i64 %t22, 3                         ; <i64> [#uses=1]
+  %t129 = add i64 %t22, 7                         ; <i64> [#uses=1]
+  %t130 = add i64 %t22, 11                        ; <i64> [#uses=1]
+  %t131 = add i64 %t22, 15                        ; <i64> [#uses=1]
+  %t132 = add i64 %t22, 4                         ; <i64> [#uses=1]
+  %t133 = add i64 %t22, 8                         ; <i64> [#uses=1]
+  %t134 = add i64 %t22, 12                        ; <i64> [#uses=1]
+  %t135 = add i64 %arg6, -16                      ; <i64> [#uses=1]
+  %t136 = sub i64 %t135, %t22                     ; <i64> [#uses=1]
+  br label %bb137
+
+bb137:                                            ; preds = %bb137, %bb122
+  %t138 = phi i64 [ 0, %bb122 ], [ %t193, %bb137 ] ; <i64> [#uses=3]
+  %t139 = phi <4 x float> [ %t54, %bb122 ], [ %t189, %bb137 ] ; <<4 x float>> [#uses=2]
+  %t140 = phi <4 x float> [ %t50, %bb122 ], [ %t185, %bb137 ] ; <<4 x float>> [#uses=2]
+  %t141 = phi <4 x float> [ %t53, %bb122 ], [ %t190, %bb137 ] ; <<4 x float>> [#uses=2]
+  %t142 = phi <4 x float> [ %t52, %bb122 ], [ %t191, %bb137 ] ; <<4 x float>> [#uses=2]
+  %t143 = phi <4 x float> [ %t126, %bb122 ], [ %t175, %bb137 ] ; <<4 x float>> [#uses=1]
+  %t144 = shl i64 %t138, 4                        ; <i64> [#uses=9]
+  %t145 = add i64 %t127, %t144                    ; <i64> [#uses=2]
+  %t146 = add i64 %t128, %t144                    ; <i64> [#uses=1]
+  %t147 = getelementptr float* %arg, i64 %t146    ; <float*> [#uses=1]
+  %t148 = bitcast float* %t147 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t149 = add i64 %t129, %t144                    ; <i64> [#uses=1]
+  %t150 = getelementptr float* %arg, i64 %t149    ; <float*> [#uses=1]
+  %t151 = bitcast float* %t150 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t152 = add i64 %t130, %t144                    ; <i64> [#uses=1]
+  %t153 = getelementptr float* %arg, i64 %t152    ; <float*> [#uses=1]
+  %t154 = bitcast float* %t153 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t155 = add i64 %t131, %t144                    ; <i64> [#uses=1]
+  %t156 = getelementptr float* %arg, i64 %t155    ; <float*> [#uses=1]
+  %t157 = bitcast float* %t156 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t158 = add i64 %t22, %t144                     ; <i64> [#uses=1]
+  %t159 = getelementptr float* %arg4, i64 %t158   ; <float*> [#uses=1]
+  %t160 = bitcast float* %t159 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t161 = add i64 %t132, %t144                    ; <i64> [#uses=1]
+  %t162 = getelementptr float* %arg4, i64 %t161   ; <float*> [#uses=1]
+  %t163 = bitcast float* %t162 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t164 = add i64 %t133, %t144                    ; <i64> [#uses=1]
+  %t165 = getelementptr float* %arg4, i64 %t164   ; <float*> [#uses=1]
+  %t166 = bitcast float* %t165 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t167 = add i64 %t134, %t144                    ; <i64> [#uses=1]
+  %t168 = getelementptr float* %arg4, i64 %t167   ; <float*> [#uses=1]
+  %t169 = bitcast float* %t168 to <4 x float>*    ; <<4 x float>*> [#uses=1]
+  %t170 = mul i64 %t138, -16                      ; <i64> [#uses=1]
+  %t171 = add i64 %t136, %t170                    ; <i64> [#uses=2]
+  %t172 = load <4 x float>* %t148                 ; <<4 x float>> [#uses=2]
+  %t173 = load <4 x float>* %t151                 ; <<4 x float>> [#uses=2]
+  %t174 = load <4 x float>* %t154                 ; <<4 x float>> [#uses=2]
+  %t175 = load <4 x float>* %t157                 ; <<4 x float>> [#uses=2]
+  %t176 = shufflevector <4 x float> %t143, <4 x float> %t172, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %t177 = shufflevector <4 x float> %t176, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %t178 = shufflevector <4 x float> %t172, <4 x float> %t173, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %t179 = shufflevector <4 x float> %t178, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %t180 = shufflevector <4 x float> %t173, <4 x float> %t174, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %t181 = shufflevector <4 x float> %t180, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %t182 = shufflevector <4 x float> %t174, <4 x float> %t175, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %t183 = shufflevector <4 x float> %t182, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %t184 = fmul <4 x float> %t177, %t140           ; <<4 x float>> [#uses=1]
+  %t185 = fadd <4 x float> %t140, %t55            ; <<4 x float>> [#uses=2]
+  %t186 = fmul <4 x float> %t179, %t142           ; <<4 x float>> [#uses=1]
+  %t187 = fmul <4 x float> %t181, %t141           ; <<4 x float>> [#uses=1]
+  %t188 = fmul <4 x float> %t183, %t139           ; <<4 x float>> [#uses=1]
+  store <4 x float> %t184, <4 x float>* %t160
+  store <4 x float> %t186, <4 x float>* %t163
+  store <4 x float> %t187, <4 x float>* %t166
+  store <4 x float> %t188, <4 x float>* %t169
+  %t189 = fadd <4 x float> %t139, %t55            ; <<4 x float>> [#uses=1]
+  %t190 = fadd <4 x float> %t141, %t55            ; <<4 x float>> [#uses=1]
+  %t191 = fadd <4 x float> %t142, %t55            ; <<4 x float>> [#uses=1]
+  %t192 = icmp sgt i64 %t171, 15                  ; <i1> [#uses=1]
+  %t193 = add i64 %t138, 1                        ; <i64> [#uses=1]
+  br i1 %t192, label %bb137, label %bb119
+
+bb194:                                            ; preds = %bb119, %bb118, %bb112
+  %t195 = phi i64 [ %t116, %bb112 ], [ %t171, %bb119 ], [ %t24, %bb118 ] ; <i64> [#uses=2]
+  %t196 = phi <4 x float> [ %t115, %bb112 ], [ %t185, %bb119 ], [ %t50, %bb118 ] ; <<4 x float>> [#uses=1]
+  %t197 = phi float* [ %t114, %bb112 ], [ %t121, %bb119 ], [ %t25, %bb118 ] ; <float*> [#uses=1]
+  %t198 = phi float* [ %t113, %bb112 ], [ %t120, %bb119 ], [ %t26, %bb118 ] ; <float*> [#uses=1]
+  %t199 = extractelement <4 x float> %t196, i32 0 ; <float> [#uses=2]
+  %t200 = icmp sgt i64 %t195, 0                   ; <i1> [#uses=1]
+  br i1 %t200, label %bb201, label %bb211
+
+bb201:                                            ; preds = %bb201, %bb194
+  %t202 = phi i64 [ %t209, %bb201 ], [ 0, %bb194 ] ; <i64> [#uses=3]
+  %t203 = phi float [ %t208, %bb201 ], [ %t199, %bb194 ] ; <float> [#uses=2]
+  %t204 = getelementptr float* %t198, i64 %t202   ; <float*> [#uses=1]
+  %t205 = getelementptr float* %t197, i64 %t202   ; <float*> [#uses=1]
+  %t206 = load float* %t204                       ; <float> [#uses=1]
+  %t207 = fmul float %t203, %t206                 ; <float> [#uses=1]
+  store float %t207, float* %t205
+  %t208 = fadd float %t203, %t8                   ; <float> [#uses=2]
+  %t209 = add i64 %t202, 1                        ; <i64> [#uses=2]
+  %t210 = icmp eq i64 %t209, %t195                ; <i1> [#uses=1]
+  br i1 %t210, label %bb211, label %bb201
+
+bb211:                                            ; preds = %bb201, %bb194
+  %t212 = phi float [ %t199, %bb194 ], [ %t208, %bb201 ] ; <float> [#uses=1]
+  store float %t212, float* %arg2
+  ret void
+
+bb213:                                            ; preds = %bb
+  ret void
+}
diff --git a/final/test/CodeGen/X86/lsr-sort.ll b/final/test/CodeGen/X86/lsr-sort.ll
new file mode 100644
index 00000000000..1f3b59a905b
--- /dev/null
+++ b/final/test/CodeGen/X86/lsr-sort.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep inc %t | count 1
+; RUN: not grep incw %t
+
+@X = common global i16 0		; <i16*> [#uses=1]
+
+define i32 @foo(i32 %N) nounwind {
+entry:
+	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %return
+
+bb:		; preds = %bb, %entry
+	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%1 = trunc i32 %i.03 to i16		; <i16> [#uses=1]
+	volatile store i16 %1, i16* @X, align 2
+	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+        %h = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]
+	ret i32 %h
+}
diff --git a/final/test/CodeGen/X86/lsr-static-addr.ll b/final/test/CodeGen/X86/lsr-static-addr.ll
new file mode 100644
index 00000000000..c9ed3e553a4
--- /dev/null
+++ b/final/test/CodeGen/X86/lsr-static-addr.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -relocation-model=static -asm-verbose=false < %s | FileCheck %s
+
+; CHECK: xorl  %eax, %eax
+; CHECK: movsd .LCPI0_0(%rip), %xmm0
+; CHECK: align
+; CHECK-NEXT: BB0_2:
+; CHECK-NEXT: movsd A(,%rax,8)
+; CHECK-NEXT: mulsd
+; CHECK-NEXT: movsd
+; CHECK-NEXT: incq %rax
+
+@A = external global [0 x double]
+
+define void @foo(i64 %n) nounwind {
+entry:
+  %cmp5 = icmp sgt i64 %n, 0
+  br i1 %cmp5, label %for.body, label %for.end
+
+for.body:
+  %i.06 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr [0 x double]* @A, i64 0, i64 %i.06
+  %tmp3 = load double* %arrayidx, align 8
+  %mul = fmul double %tmp3, 2.300000e+00
+  store double %mul, double* %arrayidx, align 8
+  %inc = add nsw i64 %i.06, 1
+  %exitcond = icmp eq i64 %inc, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
diff --git a/final/test/CodeGen/X86/lsr-wrap.ll b/final/test/CodeGen/X86/lsr-wrap.ll
new file mode 100644
index 00000000000..d605e4f14fe
--- /dev/null
+++ b/final/test/CodeGen/X86/lsr-wrap.ll
@@ -0,0 +1,37 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; LSR would like to use a single IV for both of these, however it's
+; not safe due to wraparound.
+
+; CHECK: addb  $-4, %
+; CHECK: decw  %
+
+@g_19 = common global i32 0                       ; <i32*> [#uses=2]
+
+declare i32 @func_8(i8 zeroext) nounwind
+
+declare i32 @func_3(i8 signext) nounwind
+
+define void @func_1() nounwind {
+entry:
+  br label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %indvar = phi i16 [ 0, %entry ], [ %indvar.next, %bb ] ; <i16> [#uses=2]
+  %tmp = sub i16 0, %indvar                       ; <i16> [#uses=1]
+  %tmp27 = trunc i16 %tmp to i8                   ; <i8> [#uses=1]
+  %tmp1 = load i32* @g_19, align 4                ; <i32> [#uses=2]
+  %tmp2 = add i32 %tmp1, 1                        ; <i32> [#uses=1]
+  store i32 %tmp2, i32* @g_19, align 4
+  %tmp3 = trunc i32 %tmp1 to i8                   ; <i8> [#uses=1]
+  %tmp4 = tail call i32 @func_8(i8 zeroext %tmp3) nounwind ; <i32> [#uses=0]
+  %tmp5 = shl i8 %tmp27, 2                        ; <i8> [#uses=1]
+  %tmp6 = add i8 %tmp5, -112                      ; <i8> [#uses=1]
+  %tmp7 = tail call i32 @func_3(i8 signext %tmp6) nounwind ; <i32> [#uses=0]
+  %indvar.next = add i16 %indvar, 1               ; <i16> [#uses=2]
+  %exitcond = icmp eq i16 %indvar.next, -28       ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb
+  ret void
+}
diff --git a/final/test/CodeGen/X86/machine-cse.ll b/final/test/CodeGen/X86/machine-cse.ll
new file mode 100644
index 00000000000..e284776ed02
--- /dev/null
+++ b/final/test/CodeGen/X86/machine-cse.ll
@@ -0,0 +1,79 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; rdar://7610418
+
+%ptr = type { i8* }
+%struct.s1 = type { %ptr, %ptr }
+%struct.s2 = type { i32, i8*, i8*, [256 x %struct.s1*], [8 x i32], i64, i8*, i32, i64, i64, i32, %struct.s3*, %struct.s3*, [49 x i64] }
+%struct.s3 = type { %struct.s3*, %struct.s3*, i32, i32, i32 }
+
+define fastcc i8* @t(i64 %size) nounwind {
+entry:
+; CHECK: t:
+; CHECK: leaq (%rax,%rax,4)
+  %0 = zext i32 undef to i64
+  %1 = getelementptr inbounds %struct.s2* null, i64 %0
+  br i1 undef, label %bb1, label %bb2
+
+bb1:
+; CHECK: %bb1
+; CHECK-NOT: shlq $9
+; CHECK-NOT: leaq
+; CHECK: call
+  %2 = getelementptr inbounds %struct.s2* null, i64 %0, i32 0
+  call void @bar(i32* %2) nounwind
+  unreachable
+
+bb2:
+; CHECK: %bb2
+; CHECK-NOT: leaq
+; CHECK: callq
+  %3 = call fastcc i8* @foo(%struct.s2* %1) nounwind
+  unreachable
+
+bb3:
+  ret i8* undef
+}
+
+declare void @bar(i32*)
+
+declare fastcc i8* @foo(%struct.s2*) nounwind
+
+; rdar://8773371
+
+declare void @printf(...) nounwind
+
+define void @commute(i32 %test_case, i32 %scale) nounwind ssp {
+; CHECK: commute:
+entry:
+  switch i32 %test_case, label %sw.bb307 [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb
+    i32 3, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry, %entry
+  %mul = mul nsw i32 %test_case, 3
+  %mul20 = mul nsw i32 %mul, %scale
+  br i1 undef, label %if.end34, label %sw.bb307
+
+if.end34:                                         ; preds = %sw.bb
+; CHECK: %if.end34
+; CHECK: imull
+; CHECK: leal
+; CHECK-NOT: imull
+  tail call void (...)* @printf(i32 %test_case, i32 %mul20) nounwind
+  %tmp = mul i32 %scale, %test_case
+  %tmp752 = mul i32 %tmp, 3
+  %tmp753 = zext i32 %tmp752 to i64
+  br label %bb.nph743.us
+
+for.body53.us:                                    ; preds = %bb.nph743.us, %for.body53.us
+  %exitcond = icmp eq i64 undef, %tmp753
+  br i1 %exitcond, label %bb.nph743.us, label %for.body53.us
+
+bb.nph743.us:                                     ; preds = %for.body53.us, %if.end34
+  br label %for.body53.us
+
+sw.bb307:                                         ; preds = %sw.bb, %entry
+  ret void
+}
diff --git a/final/test/CodeGen/X86/masked-iv-safe.ll b/final/test/CodeGen/X86/masked-iv-safe.ll
new file mode 100644
index 00000000000..0b4d73a683a
--- /dev/null
+++ b/final/test/CodeGen/X86/masked-iv-safe.ll
@@ -0,0 +1,244 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: not grep and %t
+; RUN: not grep movz %t
+; RUN: not grep sar %t
+; RUN: not grep shl %t
+; RUN: grep add %t | count 2
+; RUN: grep inc %t | count 4
+; RUN: grep dec %t | count 2
+; RUN: grep lea %t | count 2
+
+; Optimize away zext-inreg and sext-inreg on the loop induction
+; variable using trip-count information.
+
+define void @count_up(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 10
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_down(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 10
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @another_count_up(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @another_count_down(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fdiv double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 18446744073709551615
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @another_count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 18446744073709551615, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fdiv double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @another_count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fdiv double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 18446744073709551615
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
diff --git a/final/test/CodeGen/X86/masked-iv-unsafe.ll b/final/test/CodeGen/X86/masked-iv-unsafe.ll
new file mode 100644
index 00000000000..f23c0201954
--- /dev/null
+++ b/final/test/CodeGen/X86/masked-iv-unsafe.ll
@@ -0,0 +1,386 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep and %t | count 6
+; RUN: grep movzb %t | count 6
+; RUN: grep sar %t | count 12
+
+; Don't optimize away zext-inreg and sext-inreg on the loop induction
+; variable, because it isn't safe to do so in these cases.
+
+define void @count_up(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_down(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 20
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+	%indvar.i8 = ashr i64 %s0, 8
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%s1 = shl i64 %indvar, 24
+	%indvar.i24 = ashr i64 %s1, 24
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 20
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @another_count_up(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %indvar.i8 = and i64 %indvar, 255
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %indvar.i24 = and i64 %indvar, 16777215
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = add i64 %indvar, 1
+        %exitcond = icmp eq i64 %indvar.next, %n
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @another_count_down(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ]
+        %indvar.i8 = and i64 %indvar, 255
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %indvar.i24 = and i64 %indvar, 16777215
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = sub i64 %indvar, 1
+        %exitcond = icmp eq i64 %indvar.next, 10
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @another_count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+        %indvar.i8 = ashr i64 %s0, 8
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %s1 = shl i64 %indvar, 24
+        %indvar.i24 = ashr i64 %s1, 24
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = add i64 %indvar, 1
+        %exitcond = icmp eq i64 %indvar.next, %n
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @another_count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ %n, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+        %indvar.i8 = ashr i64 %s0, 8
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %s1 = shl i64 %indvar, 24
+        %indvar.i24 = ashr i64 %s1, 24
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = sub i64 %indvar, 1
+        %exitcond = icmp eq i64 %indvar.next, 10
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @yet_another_count_down(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = sub i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 18446744073709551615
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
+
+define void @yet_another_count_up(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %indvar.i8 = and i64 %indvar, 255
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %indvar.i24 = and i64 %indvar, 16777215
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = add i64 %indvar, 3
+        %exitcond = icmp eq i64 %indvar.next, 10
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @still_another_count_down(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+        %indvar.i8 = and i64 %indvar, 255
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %indvar.i24 = and i64 %indvar, 16777215
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = sub i64 %indvar, 3
+        %exitcond = icmp eq i64 %indvar.next, 0
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @yet_another_count_up_signed(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+        %indvar.i8 = ashr i64 %s0, 8
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %s1 = shl i64 %indvar, 24
+        %indvar.i24 = ashr i64 %s1, 24
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = add i64 %indvar, 3
+        %exitcond = icmp eq i64 %indvar.next, 10
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+define void @yet_another_count_down_signed(double* %d, i64 %n) nounwind {
+entry:
+        br label %loop
+
+loop:
+        %indvar = phi i64 [ 10, %entry ], [ %indvar.next, %loop ]
+        %s0 = shl i64 %indvar, 8
+        %indvar.i8 = ashr i64 %s0, 8
+        %t0 = getelementptr double* %d, i64 %indvar.i8
+        %t1 = load double* %t0
+        %t2 = fmul double %t1, 0.1
+        store double %t2, double* %t0
+        %s1 = shl i64 %indvar, 24
+        %indvar.i24 = ashr i64 %s1, 24
+        %t3 = getelementptr double* %d, i64 %indvar.i24
+        %t4 = load double* %t3
+        %t5 = fmul double %t4, 2.3
+        store double %t5, double* %t3
+        %t6 = getelementptr double* %d, i64 %indvar
+        %t7 = load double* %t6
+        %t8 = fmul double %t7, 4.5
+        store double %t8, double* %t6
+        %indvar.next = sub i64 %indvar, 3
+        %exitcond = icmp eq i64 %indvar.next, 0
+        br i1 %exitcond, label %return, label %loop
+
+return:
+        ret void
+}
+
+
+
diff --git a/final/test/CodeGen/X86/maskmovdqu.ll b/final/test/CodeGen/X86/maskmovdqu.ll
new file mode 100644
index 00000000000..7796f0e9a19
--- /dev/null
+++ b/final/test/CodeGen/X86/maskmovdqu.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86    -mattr=+sse2 | grep -i EDI
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep -i RDI
+; rdar://6573467
+
+define void @test(<16 x i8> %a, <16 x i8> %b, i32 %dummy, i8* %c) nounwind {
+entry:
+	tail call void @llvm.x86.sse2.maskmov.dqu( <16 x i8> %a, <16 x i8> %b, i8* %c )
+	ret void
+}
+
+declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind
diff --git a/final/test/CodeGen/X86/mcinst-lowering-cmp0.ll b/final/test/CodeGen/X86/mcinst-lowering-cmp0.ll
new file mode 100644
index 00000000000..756be1fabfd
--- /dev/null
+++ b/final/test/CodeGen/X86/mcinst-lowering-cmp0.ll
@@ -0,0 +1,68 @@
+; RUN: llc --show-mc-encoding -relocation-model=pic -disable-fp-elim -O3 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0.0"
+
+%struct.NSConstantString = type { i32*, i32, i8*, i32 }
+%struct._objc_module = type { i32, i32, i8*, %struct._objc_symtab* }
+%struct._objc_symtab = type { i32, i8*, i16, i16, [0 x i8*] }
+
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__OBJC, __image_info,regular" ; <[2 x i32]*> [#uses=1]
+@"\01L_OBJC_METH_VAR_NAME_" = internal global [4 x i8] c"foo\00", section "__TEXT,__cstring,cstring_literals", align 1 ; <[4 x i8]*> [#uses=1]
+@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), section "__OBJC,__message_refs,literal_pointers,no_dead_strip", align 4 ; <i8**> [#uses=3]
+@__CFConstantStringClassReference = external global [0 x i32] ; <[0 x i32]*> [#uses=1]
+@.str = private constant [3 x i8] c"||\00"        ; <[3 x i8]*> [#uses=1]
+@_unnamed_cfstring_ = private constant %struct.NSConstantString { i32* getelementptr inbounds ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), i32 1992, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i32 2 }, section "__DATA,__cfstring" ; <%struct.NSConstantString*> [#uses=1]
+@"\01L_OBJC_METH_VAR_NAME_1" = internal global [5 x i8] c"baz:\00", section "__TEXT,__cstring,cstring_literals", align 1 ; <[5 x i8]*> [#uses=1]
+@"\01L_OBJC_SELECTOR_REFERENCES_2" = internal global i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), section "__OBJC,__message_refs,literal_pointers,no_dead_strip", align 4 ; <i8**> [#uses=2]
+@"\01L_OBJC_METH_VAR_NAME_3" = internal global [4 x i8] c"bar\00", section "__TEXT,__cstring,cstring_literals", align 1 ; <[4 x i8]*> [#uses=1]
+@"\01L_OBJC_SELECTOR_REFERENCES_4" = internal global i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_3", i32 0, i32 0), section "__OBJC,__message_refs,literal_pointers,no_dead_strip", align 4 ; <i8**> [#uses=2]
+@"\01L_OBJC_CLASS_NAME_" = internal global [1 x i8] zeroinitializer, section "__TEXT,__cstring,cstring_literals", align 1 ; <[1 x i8]*> [#uses=1]
+@"\01L_OBJC_MODULES" = internal global %struct._objc_module { i32 7, i32 16, i8* getelementptr inbounds ([1 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), %struct._objc_symtab* null }, section "__OBJC,__module_info,regular,no_dead_strip", align 4 ; <%struct._objc_module*> [#uses=1]
+@llvm.used = appending global [9 x i8*] [i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_1", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_2" to i8*), i8* getelementptr inbounds ([4 x i8]* @"\01L_OBJC_METH_VAR_NAME_3", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_4" to i8*), i8* getelementptr inbounds ([1 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*)], section "llvm.metadata" ; <[9 x i8*]*> [#uses=0]
+
+define void @f0(i8* nocapture %a, i8* nocapture %b) nounwind optsize ssp {
+entry:
+  %call = tail call i32 (...)* @get_name() nounwind optsize ; <i32> [#uses=2]
+  %conv = inttoptr i32 %call to i8*               ; <i8*> [#uses=1]
+  %call1 = tail call i32 (...)* @get_dict() nounwind optsize ; <i32> [#uses=2]
+  %conv2 = inttoptr i32 %call1 to i8*             ; <i8*> [#uses=2]
+
+; Check that we lower to the short form of cmpl, which has an 8-bit immediate.
+;
+; CHECK: cmpl  $0, -16(%ebp)           ## 4-byte Folded Reload
+; CHECK:                               ## encoding: [0x83,0x7d,0xf0,0x00]
+; rdar://7999130
+  %cmp = icmp eq i32 %call1, 0                    ; <i1> [#uses=1]
+  br i1 %cmp, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %tmp5 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_" ; <i8*> [#uses=1]
+  %call6 = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %conv2, i8* %tmp5) nounwind optsize ; <i8*> [#uses=1]
+  %tmp7 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_2" ; <i8*> [#uses=1]
+  %call820 = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call6, i8* %tmp7, i8* bitcast (%struct.NSConstantString* @_unnamed_cfstring_ to i8*)) nounwind optsize ; <i8*> [#uses=0]
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  %tmp10 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_" ; <i8*> [#uses=1]
+  %call11 = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %conv2, i8* %tmp10) nounwind optsize ; <i8*> [#uses=1]
+  %tmp12 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_4" ; <i8*> [#uses=1]
+  %call13 = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call11, i8* %tmp12) nounwind optsize ; <i8*> [#uses=0]
+  %cmp15 = icmp eq i32 %call, 0                   ; <i1> [#uses=1]
+  br i1 %cmp15, label %if.end19, label %if.then17
+
+if.then17:                                        ; preds = %if.end
+  tail call void (...)* @f1(i8* %conv) nounwind optsize
+  ret void
+
+if.end19:                                         ; preds = %if.end
+  ret void
+}
+
+declare i32 @get_name(...) optsize
+
+declare i32 @get_dict(...) optsize
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+declare void @f1(...) optsize
diff --git a/final/test/CodeGen/X86/mcinst-lowering.ll b/final/test/CodeGen/X86/mcinst-lowering.ll
new file mode 100644
index 00000000000..1ef5a971bab
--- /dev/null
+++ b/final/test/CodeGen/X86/mcinst-lowering.ll
@@ -0,0 +1,26 @@
+; RUN: llc --show-mc-encoding < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define i32 @f0(i32* nocapture %x) nounwind readonly ssp {
+entry:
+  %tmp1 = load i32* %x                            ; <i32> [#uses=2]
+  %tobool = icmp eq i32 %tmp1, 0                  ; <i1> [#uses=1]
+  br i1 %tobool, label %if.end, label %return
+
+if.end:                                           ; preds = %entry
+
+; Check that we lower to the short form of cmpl, which has a fixed %eax
+; register.
+;
+; CHECK: cmpl $16777216, %eax
+; CHECK: # encoding: [0x3d,0x00,0x00,0x00,0x01]
+  %cmp = icmp eq i32 %tmp1, 16777216              ; <i1> [#uses=1]
+
+  %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
+  ret i32 %conv
+
+return:                                           ; preds = %entry
+  ret i32 0
+}
diff --git a/final/test/CodeGen/X86/memcmp.ll b/final/test/CodeGen/X86/memcmp.ll
new file mode 100644
index 00000000000..f4bc1bb7015
--- /dev/null
+++ b/final/test/CodeGen/X86/memcmp.ll
@@ -0,0 +1,111 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+
+; This tests codegen time inlining/optimization of memcmp
+; rdar://6480398
+
+@.str = private constant [23 x i8] c"fooooooooooooooooooooo\00", align 1 ; <[23 x i8]*> [#uses=1]
+
+declare i32 @memcmp(...)
+
+define void @memcmp2(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 2) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp2:
+; CHECK: movw    ([[A0:%rdi|%rcx]]), %ax
+; CHECK: cmpw    ([[A1:%rsi|%rdx]]), %ax
+}
+
+define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 2) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp2a:
+; CHECK: cmpw    $28527, ([[A0]])
+}
+
+
+define void @memcmp4(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 4) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp4:
+; CHECK: movl    ([[A0]]), %eax
+; CHECK: cmpl    ([[A1]]), %eax
+}
+
+define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 1), i32 4) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp4a:
+; CHECK: cmpl $1869573999, ([[A0]])
+}
+
+define void @memcmp8(i8* %X, i8* %Y, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* %Y, i32 8) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp8:
+; CHECK: movq    ([[A0]]), %rax
+; CHECK: cmpq    ([[A1]]), %rax
+}
+
+define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind {
+entry:
+  %0 = tail call i32 (...)* @memcmp(i8* %X, i8* getelementptr inbounds ([23 x i8]* @.str, i32 0, i32 0), i32 8) nounwind ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store i32 4, i32* %P, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: memcmp8a:
+; CHECK: movabsq $8029759185026510694, %rax
+; CHECK: cmpq	%rax, ([[A0]])
+}
+
diff --git a/final/test/CodeGen/X86/memcpy-2.ll b/final/test/CodeGen/X86/memcpy-2.ll
new file mode 100644
index 00000000000..17cd8e868a2
--- /dev/null
+++ b/final/test/CodeGen/X86/memcpy-2.ll
@@ -0,0 +1,167 @@
+; RUN: llc < %s -mattr=+sse2      -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE2
+; RUN: llc < %s -mattr=+sse,-sse2 -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE1
+; RUN: llc < %s -mattr=-sse       -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=NOSSE
+; RUN: llc < %s                 -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=X86-64
+
+@.str = internal constant [25 x i8] c"image\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00"
+@.str2 = internal constant [30 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 4
+
+define void @t1(i32 %argc, i8** %argv) nounwind  {
+entry:
+; SSE2: t1:
+; SSE2: movaps _.str, %xmm0
+; SSE2: movaps %xmm0
+; SSE2: movb $0
+; SSE2: movl $0
+; SSE2: movl $0
+
+; SSE1: t1:
+; SSE1: movaps _.str, %xmm0
+; SSE1: movaps %xmm0
+; SSE1: movb $0
+; SSE1: movl $0
+; SSE1: movl $0
+
+; NOSSE: t1:
+; NOSSE: movb $0
+; NOSSE: movl $0
+; NOSSE: movl $0
+; NOSSE: movl $0
+; NOSSE: movl $0
+; NOSSE: movl $101
+; NOSSE: movl $1734438249
+
+; X86-64: t1:
+; X86-64: movaps _.str(%rip), %xmm0
+; X86-64: movaps %xmm0
+; X86-64: movb $0
+; X86-64: movq $0
+  %tmp1 = alloca [25 x i8]
+  %tmp2 = bitcast [25 x i8]* %tmp1 to i8*
+  call void @llvm.memcpy.i32( i8* %tmp2, i8* getelementptr ([25 x i8]* @.str, i32 0, i32 0), i32 25, i32 1 ) nounwind 
+  unreachable
+}
+
+;rdar://7774704
+%struct.s0 = type { [2 x double] }
+
+define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
+entry:
+; SSE2: t2:
+; SSE2: movaps (%eax), %xmm0
+; SSE2: movaps %xmm0, (%eax)
+
+; SSE1: t2:
+; SSE1: movaps (%eax), %xmm0
+; SSE1: movaps %xmm0, (%eax)
+
+; NOSSE: t2:
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+
+; X86-64: t2:
+; X86-64: movaps (%rsi), %xmm0
+; X86-64: movaps %xmm0, (%rdi)
+  %tmp2 = bitcast %struct.s0* %a to i8*           ; <i8*> [#uses=1]
+  %tmp3 = bitcast %struct.s0* %b to i8*           ; <i8*> [#uses=1]
+  tail call void @llvm.memcpy.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16)
+  ret void
+}
+
+define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
+entry:
+; SSE2: t3:
+; SSE2: movsd (%eax), %xmm0
+; SSE2: movsd 8(%eax), %xmm1
+; SSE2: movsd %xmm1, 8(%eax)
+; SSE2: movsd %xmm0, (%eax)
+
+; SSE1: t3:
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+; SSE1: movl
+
+; NOSSE: t3:
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+; NOSSE: movl
+
+; X86-64: t3:
+; X86-64: movq (%rsi), %rax
+; X86-64: movq 8(%rsi), %rcx
+; X86-64: movq %rcx, 8(%rdi)
+; X86-64: movq %rax, (%rdi)
+  %tmp2 = bitcast %struct.s0* %a to i8*           ; <i8*> [#uses=1]
+  %tmp3 = bitcast %struct.s0* %b to i8*           ; <i8*> [#uses=1]
+  tail call void @llvm.memcpy.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 8)
+  ret void
+}
+
+define void @t4() nounwind {
+entry:
+; SSE2: t4:
+; SSE2: movw $120
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+; SSE2: movl $2021161080
+
+; SSE1: t4:
+; SSE1: movw $120
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+; SSE1: movl $2021161080
+
+; NOSSE: t4:
+; NOSSE: movw $120
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+; NOSSE: movl $2021161080
+
+; X86-64: t4:
+; X86-64: movabsq $8680820740569200760, %rax
+; X86-64: movq %rax
+; X86-64: movq %rax
+; X86-64: movq %rax
+; X86-64: movw $120
+; X86-64: movl $2021161080
+  %tmp1 = alloca [30 x i8]
+  %tmp2 = bitcast [30 x i8]* %tmp1 to i8*
+  call void @llvm.memcpy.i32(i8* %tmp2, i8* getelementptr inbounds ([30 x i8]* @.str2, i32 0, i32 0), i32 30, i32 1)
+  unreachable
+}
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
diff --git a/final/test/CodeGen/X86/memcpy.ll b/final/test/CodeGen/X86/memcpy.ll
new file mode 100644
index 00000000000..72342cbacb4
--- /dev/null
+++ b/final/test/CodeGen/X86/memcpy.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=DARWIN
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+
+; Variable memcpy's should lower to calls.
+define i8* @test1(i8* %a, i8* %b, i64 %n) nounwind {
+entry:
+	tail call void @llvm.memcpy.p0i8.p0i8.i64( i8* %a, i8* %b, i64 %n, i32 1, i1 0 )
+	ret i8* %a
+        
+; LINUX: test1:
+; LINUX: memcpy
+}
+
+; Variable memcpy's should lower to calls.
+define i8* @test2(i64* %a, i64* %b, i64 %n) nounwind {
+entry:
+	%tmp14 = bitcast i64* %a to i8*
+	%tmp25 = bitcast i64* %b to i8*
+	tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp25, i64 %n, i32 8, i1 0 )
+	ret i8* %tmp14
+        
+; LINUX: test2:
+; LINUX: memcpy
+}
+
+; Large constant memcpy's should lower to a call when optimizing for size.
+; PR6623
+
+; On the other hand, Darwin's definition of -Os is optimizing for size without
+; hurting performance so it should just ignore optsize when expanding memcpy.
+; rdar://8821501
+define void @test3(i8* nocapture %A, i8* nocapture %B) nounwind optsize noredzone {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
+  ret void
+; LINUX: test3:
+; LINUX: memcpy
+
+; DARWIN: test3:
+; DARWIN-NOT: memcpy
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+; DARWIN: movq
+}
+
+; Large constant memcpy's should be inlined when not optimizing for size.
+define void @test4(i8* nocapture %A, i8* nocapture %B) nounwind noredzone {
+entry:
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false)
+  ret void
+; LINUX: test4:
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+; LINUX movq
+}
+
diff --git a/final/test/CodeGen/X86/memmove-4.ll b/final/test/CodeGen/X86/memmove-4.ll
new file mode 100644
index 00000000000..027db1f4839
--- /dev/null
+++ b/final/test/CodeGen/X86/memmove-4.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s | not grep call
+
+target triple = "i686-pc-linux-gnu"
+
+define void @a(i8* %a, i8* %b) nounwind {
+        %tmp2 = bitcast i8* %a to i8*
+        %tmp3 = bitcast i8* %b to i8*
+        tail call void @llvm.memmove.i32( i8* %tmp2, i8* %tmp3, i32 12, i32 4 )
+        ret void
+}
+
+declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
diff --git a/final/test/CodeGen/X86/memset-2.ll b/final/test/CodeGen/X86/memset-2.ll
new file mode 100644
index 00000000000..993583b4a49
--- /dev/null
+++ b/final/test/CodeGen/X86/memset-2.ll
@@ -0,0 +1,39 @@
+; RUN: llc -mtriple=i386-apple-darwin -mcpu=yonah < %s | FileCheck %s
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
+
+define fastcc void @t1() nounwind {
+entry:
+; CHECK: t1:
+; CHECK: calll _memset
+  call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind
+  unreachable
+}
+
+define fastcc void @t2(i8 signext %c) nounwind {
+entry:
+; CHECK: t2:
+; CHECK: calll _memset
+  call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind
+  unreachable
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define void @t3(i8* nocapture %s, i8 %a) nounwind {
+entry:
+  tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 8, i32 1, i1 false)
+  ret void
+; CHECK: t3:
+; CHECK: imull $16843009
+}
+
+define void @t4(i8* nocapture %s, i8 %a) nounwind {
+entry:
+  tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i32 1, i1 false)
+  ret void
+; CHECK: t4:
+; CHECK: imull $16843009
+; CHECK-NOT: imul
+; CHECK: ret
+}
diff --git a/final/test/CodeGen/X86/memset-3.ll b/final/test/CodeGen/X86/memset-3.ll
new file mode 100644
index 00000000000..9b20ad506a5
--- /dev/null
+++ b/final/test/CodeGen/X86/memset-3.ll
@@ -0,0 +1,12 @@
+; RUN: llc -mtriple=i386-apple-darwin < %s | not grep memset
+; PR6767
+
+define void @t() nounwind ssp {
+entry:
+  %buf = alloca [512 x i8], align 1
+  %ptr = getelementptr inbounds [512 x i8]* %buf, i32 0, i32 0
+  call void @llvm.memset.i32(i8* %ptr, i8 undef, i32 512, i32 1)
+  unreachable
+}
+
+declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind
diff --git a/final/test/CodeGen/X86/memset.ll b/final/test/CodeGen/X86/memset.ll
new file mode 100644
index 00000000000..cf7464d03bf
--- /dev/null
+++ b/final/test/CodeGen/X86/memset.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mattr=-sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 9
+; RUN: llc < %s -march=x86 -mattr=+sse -mtriple=i686-apple-darwin8.8.0 | grep mov | count 3
+
+	%struct.x = type { i16, i16 }
+
+define void @t() nounwind  {
+entry:
+	%up_mvd = alloca [8 x %struct.x]		; <[8 x %struct.x]*> [#uses=2]
+	%up_mvd116 = getelementptr [8 x %struct.x]* %up_mvd, i32 0, i32 0		; <%struct.x*> [#uses=1]
+	%tmp110117 = bitcast [8 x %struct.x]* %up_mvd to i8*		; <i8*> [#uses=1]
+	call void @llvm.memset.i64( i8* %tmp110117, i8 0, i64 32, i32 8 )
+	call void @foo( %struct.x* %up_mvd116 ) nounwind 
+	ret void
+}
+
+declare void @foo(%struct.x*)
+
+declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind 
diff --git a/final/test/CodeGen/X86/memset64-on-x86-32.ll b/final/test/CodeGen/X86/memset64-on-x86-32.ll
new file mode 100644
index 00000000000..3f069b4a1aa
--- /dev/null
+++ b/final/test/CodeGen/X86/memset64-on-x86-32.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin   -mcpu=nehalem | grep movups | count 5
+; RUN: llc < %s -mtriple=i386-apple-darwin   -mcpu=core2   | grep movl   | count 20
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2   | grep movq   | count 10
+
+define void @bork() nounwind {
+entry:
+        call void @llvm.memset.i64( i8* null, i8 0, i64 80, i32 4 )
+        ret void
+}
+
+declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind
+
diff --git a/final/test/CodeGen/X86/mfence.ll b/final/test/CodeGen/X86/mfence.ll
new file mode 100644
index 00000000000..a1b22834d1a
--- /dev/null
+++ b/final/test/CodeGen/X86/mfence.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep sfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep lfence
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mfence
+
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+	call void @llvm.memory.barrier( i1 true, i1 true,  i1 false, i1 false, i1 true)
+	call void @llvm.memory.barrier( i1 true, i1 false, i1 true,  i1 false, i1 true)
+	call void @llvm.memory.barrier( i1 true, i1 false, i1 false, i1 true,  i1 true)
+
+	call void @llvm.memory.barrier( i1 true, i1 true,  i1 true,  i1 false, i1 true)
+	call void @llvm.memory.barrier( i1 true, i1 true,  i1 false, i1 true,  i1 true)
+	call void @llvm.memory.barrier( i1 true, i1 false, i1 true,  i1 true,  i1 true)
+
+	call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 true)
+	call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 true)
+	ret void
+}
diff --git a/final/test/CodeGen/X86/mingw-alloca.ll b/final/test/CodeGen/X86/mingw-alloca.ll
new file mode 100644
index 00000000000..ded4b73d093
--- /dev/null
+++ b/final/test/CodeGen/X86/mingw-alloca.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i386-pc-mingw32"
+
+define void @foo1(i32 %N) nounwind {
+entry:
+; CHECK: _foo1:
+; CHECK: calll __alloca
+	%tmp14 = alloca i32, i32 %N		; <i32*> [#uses=1]
+	call void @bar1( i32* %tmp14 )
+	ret void
+}
+
+declare void @bar1(i32*)
+
+define void @foo2(i32 inreg  %N) nounwind {
+entry:
+; CHECK: _foo2:
+; CHECK: andl $-16, %esp
+; CHECK: pushl %eax
+; CHECK: calll __alloca
+; CHECK: movl	8028(%esp), %eax
+	%A2 = alloca [2000 x i32], align 16		; <[2000 x i32]*> [#uses=1]
+	%A2.sub = getelementptr [2000 x i32]* %A2, i32 0, i32 0		; <i32*> [#uses=1]
+	call void @bar2( i32* %A2.sub, i32 %N )
+	ret void
+}
+
+declare void @bar2(i32*, i32)
diff --git a/final/test/CodeGen/X86/misaligned-memset.ll b/final/test/CodeGen/X86/misaligned-memset.ll
new file mode 100644
index 00000000000..21f8bf2bf29
--- /dev/null
+++ b/final/test/CodeGen/X86/misaligned-memset.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=x86_64-apple-darwin -mcpu=nehalem < %s | FileCheck %s
+
+@a = common global [3 x i64] zeroinitializer, align 16
+
+define i32 @main() nounwind ssp {
+; CHECK: movups
+entry:
+  %retval = alloca i32, align 4
+  store i32 0, i32* %retval
+  call void @llvm.memset.p0i8.i64(i8* bitcast (i64* getelementptr inbounds ([3 x i64]* @a, i32 0, i64 1) to i8*), i8 0, i64 16, i32 1, i1 false)
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/final/test/CodeGen/X86/mmx-arg-passing.ll b/final/test/CodeGen/X86/mmx-arg-passing.ll
new file mode 100644
index 00000000000..b348512b579
--- /dev/null
+++ b/final/test/CodeGen/X86/mmx-arg-passing.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep mm0 | count 1
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+mmx | grep esp | count 2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep xmm0
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep rdi
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | not grep movups
+;
+; On Darwin x86-32, v8i8, v4i16, v2i32 values are passed in MM[0-2].
+; On Darwin x86-32, v1i64 values are passed in memory.  In this example, they
+;                   are never moved into an MM register at all.
+; On Darwin x86-64, v8i8, v4i16, v2i32 values are passed in XMM[0-7].
+; On Darwin x86-64, v1i64 values are passed in 64-bit GPRs.
+
+@u1 = external global x86_mmx
+
+define void @t1(x86_mmx %v1) nounwind  {
+	store x86_mmx %v1, x86_mmx* @u1, align 8
+	ret void
+}
+
+@u2 = external global x86_mmx
+
+define void @t2(<1 x i64> %v1) nounwind  {
+        %tmp = bitcast <1 x i64> %v1 to x86_mmx
+	store x86_mmx %tmp, x86_mmx* @u2, align 8
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/mmx-arg-passing2.ll b/final/test/CodeGen/X86/mmx-arg-passing2.ll
new file mode 100644
index 00000000000..c132d311b94
--- /dev/null
+++ b/final/test/CodeGen/X86/mmx-arg-passing2.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | grep movdq2q | count 2
+; Since the add is not an MMX add, we don't have a movq2dq any more.
+
+@g_v8qi = external global <8 x i8>
+
+define void @t1() nounwind  {
+	%tmp3 = load <8 x i8>* @g_v8qi, align 8
+        %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
+	%tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
+	ret void
+}
+
+define void @t2(x86_mmx %v1, x86_mmx %v2) nounwind  {
+       %v1a = bitcast x86_mmx %v1 to <8 x i8>
+       %v2b = bitcast x86_mmx %v2 to <8 x i8>
+       %tmp3 = add <8 x i8> %v1a, %v2b
+       %tmp3a = bitcast <8 x i8> %tmp3 to x86_mmx
+       %tmp4 = tail call i32 (...)* @pass_v8qi( x86_mmx %tmp3a ) nounwind
+       ret void
+}
+
+define void @t3() nounwind  {
+	call void @pass_v1di( <1 x i64> zeroinitializer )
+        ret void
+}
+
+declare i32 @pass_v8qi(...)
+declare void @pass_v1di(<1 x i64>)
diff --git a/final/test/CodeGen/X86/mmx-arith.ll b/final/test/CodeGen/X86/mmx-arith.ll
new file mode 100644
index 00000000000..68174873240
--- /dev/null
+++ b/final/test/CodeGen/X86/mmx-arith.ll
@@ -0,0 +1,309 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx
+
+;; A basic sanity check to make sure that MMX arithmetic actually compiles.
+;; First is a straight translation of the original with bitcasts as needed.
+
+define void @foo(x86_mmx* %A, x86_mmx* %B) {
+entry:
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp1a = bitcast x86_mmx %tmp1 to <8 x i8>
+        %tmp3a = bitcast x86_mmx %tmp3 to <8 x i8>
+	%tmp4 = add <8 x i8> %tmp1a, %tmp3a		; <<8 x i8>> [#uses=2]
+        %tmp4a = bitcast <8 x i8> %tmp4 to x86_mmx
+	store x86_mmx %tmp4a, x86_mmx* %A
+	%tmp7 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4a, x86_mmx %tmp7 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp12, x86_mmx* %A
+	%tmp16 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp21, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp21a = bitcast x86_mmx %tmp21 to <8 x i8>
+        %tmp27a = bitcast x86_mmx %tmp27 to <8 x i8>
+	%tmp28 = sub <8 x i8> %tmp21a, %tmp27a		; <<8 x i8>> [#uses=2]
+        %tmp28a = bitcast <8 x i8> %tmp28 to x86_mmx
+	store x86_mmx %tmp28a, x86_mmx* %A
+	%tmp31 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28a, x86_mmx %tmp31 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp36, x86_mmx* %A
+	%tmp40 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp45, x86_mmx* %A
+	%tmp51 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp45a = bitcast x86_mmx %tmp45 to <8 x i8>
+        %tmp51a = bitcast x86_mmx %tmp51 to <8 x i8>
+	%tmp52 = mul <8 x i8> %tmp45a, %tmp51a		; <<8 x i8>> [#uses=2]
+        %tmp52a = bitcast <8 x i8> %tmp52 to x86_mmx
+	store x86_mmx %tmp52a, x86_mmx* %A
+	%tmp57 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp57a = bitcast x86_mmx %tmp57 to <8 x i8>
+	%tmp58 = and <8 x i8> %tmp52, %tmp57a		; <<8 x i8>> [#uses=2]
+        %tmp58a = bitcast <8 x i8> %tmp58 to x86_mmx
+	store x86_mmx %tmp58a, x86_mmx* %A
+	%tmp63 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp63a = bitcast x86_mmx %tmp63 to <8 x i8>
+	%tmp64 = or <8 x i8> %tmp58, %tmp63a		; <<8 x i8>> [#uses=2]
+        %tmp64a = bitcast <8 x i8> %tmp64 to x86_mmx
+	store x86_mmx %tmp64a, x86_mmx* %A
+	%tmp69 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp69a = bitcast x86_mmx %tmp69 to <8 x i8>
+        %tmp64b = bitcast x86_mmx %tmp64a to <8 x i8>
+	%tmp70 = xor <8 x i8> %tmp64b, %tmp69a		; <<8 x i8>> [#uses=1]
+        %tmp70a = bitcast <8 x i8> %tmp70 to x86_mmx
+	store x86_mmx %tmp70a, x86_mmx* %A
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+define void @baz(x86_mmx* %A, x86_mmx* %B) {
+entry:
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp1a = bitcast x86_mmx %tmp1 to <2 x i32>
+        %tmp3a = bitcast x86_mmx %tmp3 to <2 x i32>
+	%tmp4 = add <2 x i32> %tmp1a, %tmp3a		; <<2 x i32>> [#uses=2]
+        %tmp4a = bitcast <2 x i32> %tmp4 to x86_mmx
+	store x86_mmx %tmp4a, x86_mmx* %A
+	%tmp9 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp9a = bitcast x86_mmx %tmp9 to <2 x i32>
+	%tmp10 = sub <2 x i32> %tmp4, %tmp9a		; <<2 x i32>> [#uses=2]
+        %tmp10a = bitcast <2 x i32> %tmp4 to x86_mmx
+	store x86_mmx %tmp10a, x86_mmx* %A
+	%tmp15 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp10b = bitcast x86_mmx %tmp10a to <2 x i32>
+        %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>
+	%tmp16 = mul <2 x i32> %tmp10b, %tmp15a		; <<2 x i32>> [#uses=2]
+        %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx
+	store x86_mmx %tmp16a, x86_mmx* %A
+	%tmp21 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp16b = bitcast x86_mmx %tmp16a to <2 x i32>
+        %tmp21a = bitcast x86_mmx %tmp21 to <2 x i32>
+	%tmp22 = and <2 x i32> %tmp16b, %tmp21a		; <<2 x i32>> [#uses=2]
+        %tmp22a = bitcast <2 x i32> %tmp22 to x86_mmx
+	store x86_mmx %tmp22a, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp22b = bitcast x86_mmx %tmp22a to <2 x i32>
+        %tmp27a = bitcast x86_mmx %tmp27 to <2 x i32>
+	%tmp28 = or <2 x i32> %tmp22b, %tmp27a		; <<2 x i32>> [#uses=2]
+        %tmp28a = bitcast <2 x i32> %tmp28 to x86_mmx
+	store x86_mmx %tmp28a, x86_mmx* %A
+	%tmp33 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp28b = bitcast x86_mmx %tmp28a to <2 x i32>
+        %tmp33a = bitcast x86_mmx %tmp33 to <2 x i32>
+	%tmp34 = xor <2 x i32> %tmp28b, %tmp33a		; <<2 x i32>> [#uses=1]
+        %tmp34a = bitcast <2 x i32> %tmp34 to x86_mmx
+	store x86_mmx %tmp34a, x86_mmx* %A
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+define void @bar(x86_mmx* %A, x86_mmx* %B) {
+entry:
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp1a = bitcast x86_mmx %tmp1 to <4 x i16>
+        %tmp3a = bitcast x86_mmx %tmp3 to <4 x i16>
+	%tmp4 = add <4 x i16> %tmp1a, %tmp3a		; <<4 x i16>> [#uses=2]
+        %tmp4a = bitcast <4 x i16> %tmp4 to x86_mmx
+	store x86_mmx %tmp4a, x86_mmx* %A
+	%tmp7 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4a, x86_mmx %tmp7 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp12, x86_mmx* %A
+	%tmp16 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp21, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp21a = bitcast x86_mmx %tmp21 to <4 x i16>
+        %tmp27a = bitcast x86_mmx %tmp27 to <4 x i16>
+	%tmp28 = sub <4 x i16> %tmp21a, %tmp27a		; <<4 x i16>> [#uses=2]
+        %tmp28a = bitcast <4 x i16> %tmp28 to x86_mmx
+	store x86_mmx %tmp28a, x86_mmx* %A
+	%tmp31 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28a, x86_mmx %tmp31 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp36, x86_mmx* %A
+	%tmp40 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp45, x86_mmx* %A
+	%tmp51 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp45a = bitcast x86_mmx %tmp45 to <4 x i16>
+        %tmp51a = bitcast x86_mmx %tmp51 to <4 x i16>
+	%tmp52 = mul <4 x i16> %tmp45a, %tmp51a		; <<4 x i16>> [#uses=2]
+        %tmp52a = bitcast <4 x i16> %tmp52 to x86_mmx
+	store x86_mmx %tmp52a, x86_mmx* %A
+	%tmp55 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52a, x86_mmx %tmp55 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp60, x86_mmx* %A
+	%tmp64 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 )		; <x86_mmx> [#uses=1]
+	%tmp70 = bitcast x86_mmx %tmp69 to x86_mmx		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp70, x86_mmx* %A
+	%tmp75 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp70a = bitcast x86_mmx %tmp70 to <4 x i16>
+        %tmp75a = bitcast x86_mmx %tmp75 to <4 x i16>
+	%tmp76 = and <4 x i16> %tmp70a, %tmp75a		; <<4 x i16>> [#uses=2]
+        %tmp76a = bitcast <4 x i16> %tmp76 to x86_mmx
+	store x86_mmx %tmp76a, x86_mmx* %A
+	%tmp81 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp76b = bitcast x86_mmx %tmp76a to <4 x i16>
+        %tmp81a = bitcast x86_mmx %tmp81 to <4 x i16>
+	%tmp82 = or <4 x i16> %tmp76b, %tmp81a		; <<4 x i16>> [#uses=2]
+        %tmp82a = bitcast <4 x i16> %tmp82 to x86_mmx
+	store x86_mmx %tmp82a, x86_mmx* %A
+	%tmp87 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp82b = bitcast x86_mmx %tmp82a to <4 x i16>
+        %tmp87a = bitcast x86_mmx %tmp87 to <4 x i16>
+	%tmp88 = xor <4 x i16> %tmp82b, %tmp87a		; <<4 x i16>> [#uses=1]
+        %tmp88a = bitcast <4 x i16> %tmp88 to x86_mmx
+	store x86_mmx %tmp88a, x86_mmx* %A
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+;; The following is modified to use MMX intrinsics everywhere they work.
+
+define void @fooa(x86_mmx* %A, x86_mmx* %B) {
+entry:
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.b( x86_mmx %tmp1, x86_mmx %tmp3 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp4, x86_mmx* %A
+	%tmp7 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.b( x86_mmx %tmp4, x86_mmx %tmp7 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp12, x86_mmx* %A
+	%tmp16 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.b( x86_mmx %tmp12, x86_mmx %tmp16 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp21, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.b( x86_mmx %tmp21, x86_mmx %tmp27 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp28, x86_mmx* %A
+	%tmp31 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.b( x86_mmx %tmp28, x86_mmx %tmp31 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp36, x86_mmx* %A
+	%tmp40 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.b( x86_mmx %tmp36, x86_mmx %tmp40 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp45, x86_mmx* %A
+	%tmp51 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp51a = bitcast x86_mmx %tmp51 to i64
+        %tmp51aa = bitcast i64 %tmp51a to <8 x i8>
+        %tmp51b = bitcast x86_mmx %tmp45 to <8 x i8>
+	%tmp52 = mul <8 x i8> %tmp51b, %tmp51aa		; <x86_mmx> [#uses=2]
+        %tmp52a = bitcast <8 x i8> %tmp52 to i64
+        %tmp52aa = bitcast i64 %tmp52a to x86_mmx
+	store x86_mmx %tmp52aa, x86_mmx* %A
+	%tmp57 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp58 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp51, x86_mmx %tmp57 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp58, x86_mmx* %A
+	%tmp63 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp64 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp58, x86_mmx %tmp63 )		; <x86_mmx> [#uses=2]	
+	store x86_mmx %tmp64, x86_mmx* %A
+	%tmp69 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp70 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp64, x86_mmx %tmp69 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp70, x86_mmx* %A
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+define void @baza(x86_mmx* %A, x86_mmx* %B) {
+entry:
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.d( x86_mmx %tmp1, x86_mmx %tmp3 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp4, x86_mmx* %A
+	%tmp9 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp10 = tail call x86_mmx @llvm.x86.mmx.psub.d( x86_mmx %tmp4, x86_mmx %tmp9 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp10, x86_mmx* %A
+	%tmp15 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+        %tmp10a = bitcast x86_mmx %tmp10 to <2 x i32>
+        %tmp15a = bitcast x86_mmx %tmp15 to <2 x i32>
+	%tmp16 = mul <2 x i32> %tmp10a, %tmp15a		; <x86_mmx> [#uses=2]
+        %tmp16a = bitcast <2 x i32> %tmp16 to x86_mmx
+	store x86_mmx %tmp16a, x86_mmx* %A
+	%tmp21 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp22 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp16a, x86_mmx %tmp21 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp22, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp28 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp22, x86_mmx %tmp27 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp28, x86_mmx* %A
+	%tmp33 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp34 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp28, x86_mmx %tmp33 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp34, x86_mmx* %A
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+define void @bara(x86_mmx* %A, x86_mmx* %B) {
+entry:
+	%tmp1 = load x86_mmx* %A		; <x86_mmx> [#uses=1]
+	%tmp3 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp4 = tail call x86_mmx @llvm.x86.mmx.padd.w( x86_mmx %tmp1, x86_mmx %tmp3 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp4, x86_mmx* %A
+	%tmp7 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp12 = tail call x86_mmx @llvm.x86.mmx.padds.w( x86_mmx %tmp4, x86_mmx %tmp7 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp12, x86_mmx* %A
+	%tmp16 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp21 = tail call x86_mmx @llvm.x86.mmx.paddus.w( x86_mmx %tmp12, x86_mmx %tmp16 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp21, x86_mmx* %A
+	%tmp27 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp28 = tail call x86_mmx @llvm.x86.mmx.psub.w( x86_mmx %tmp21, x86_mmx %tmp27 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp28, x86_mmx* %A
+	%tmp31 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp36 = tail call x86_mmx @llvm.x86.mmx.psubs.w( x86_mmx %tmp28, x86_mmx %tmp31 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp36, x86_mmx* %A
+	%tmp40 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp45 = tail call x86_mmx @llvm.x86.mmx.psubus.w( x86_mmx %tmp36, x86_mmx %tmp40 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp45, x86_mmx* %A
+	%tmp51 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp52 = tail call x86_mmx @llvm.x86.mmx.pmull.w( x86_mmx %tmp45, x86_mmx %tmp51 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp52, x86_mmx* %A
+	%tmp55 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp60 = tail call x86_mmx @llvm.x86.mmx.pmulh.w( x86_mmx %tmp52, x86_mmx %tmp55 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp60, x86_mmx* %A
+	%tmp64 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp69 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd( x86_mmx %tmp60, x86_mmx %tmp64 )		; <x86_mmx> [#uses=1]
+	%tmp70 = bitcast x86_mmx %tmp69 to x86_mmx		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp70, x86_mmx* %A
+	%tmp75 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp76 = tail call x86_mmx @llvm.x86.mmx.pand( x86_mmx %tmp70, x86_mmx %tmp75 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp76, x86_mmx* %A
+	%tmp81 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp82 = tail call x86_mmx @llvm.x86.mmx.por( x86_mmx %tmp76, x86_mmx %tmp81 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp82, x86_mmx* %A
+	%tmp87 = load x86_mmx* %B		; <x86_mmx> [#uses=1]
+	%tmp88 = tail call x86_mmx @llvm.x86.mmx.pxor( x86_mmx %tmp82, x86_mmx %tmp87 )		; <x86_mmx> [#uses=2]
+	store x86_mmx %tmp88, x86_mmx* %A
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx)
+
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx)
+
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx)
+
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx)
+
+declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx)
+
+declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx)
+
+declare void @llvm.x86.mmx.emms()
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padds.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psubs.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx)
+
diff --git a/final/test/CodeGen/X86/mmx-bitcast-to-i64.ll b/final/test/CodeGen/X86/mmx-bitcast-to-i64.ll
new file mode 100644
index 00000000000..8b1840abf61
--- /dev/null
+++ b/final/test/CodeGen/X86/mmx-bitcast-to-i64.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86-64 | grep movd | count 4
+
+define i64 @foo(x86_mmx* %p) {
+  %t = load x86_mmx* %p
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %t, x86_mmx %t)
+  %s = bitcast x86_mmx %u to i64
+  ret i64 %s
+}
+define i64 @goo(x86_mmx* %p) {
+  %t = load x86_mmx* %p
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %t, x86_mmx %t)
+  %s = bitcast x86_mmx %u to i64
+  ret i64 %s
+}
+define i64 @hoo(x86_mmx* %p) {
+  %t = load x86_mmx* %p
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %t, x86_mmx %t)
+  %s = bitcast x86_mmx %u to i64
+  ret i64 %s
+}
+define i64 @ioo(x86_mmx* %p) {
+  %t = load x86_mmx* %p
+  %u = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %t, x86_mmx %t)
+  %s = bitcast x86_mmx %u to i64
+  ret i64 %s
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx)
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx)
diff --git a/final/test/CodeGen/X86/mmx-builtins.ll b/final/test/CodeGen/X86/mmx-builtins.ll
new file mode 100644
index 00000000000..3ac0e4ee4b8
--- /dev/null
+++ b/final/test/CodeGen/X86/mmx-builtins.ll
@@ -0,0 +1,1324 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3 | FileCheck %s
+
+declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test1(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test87(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test86(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpgtb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test85(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test84(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test83(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pcmpeqb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpeq.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test82(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckldq
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckldq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test81(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpcklwd
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test80(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpcklbw
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpcklbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test79(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhdq
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test78(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhwd
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhwd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test77(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: punpckhbw
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.punpckhbw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packuswb(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test76(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packuswb
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.packuswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packssdw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test75(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packssdw
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.packssdw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.packsswb(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test74(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: packsswb
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.packsswb(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test73(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrad
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.d(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test72(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psraw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrai.w(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
+
+define i64 @test71(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrlq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to i64
+  ret i64 %2
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test70(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrld
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.d(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test69(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psrlw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone
+
+define i64 @test68(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psllq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to i64
+  ret i64 %2
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx, i32) nounwind readnone
+
+define i64 @test67(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pslld
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.d(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <2 x i32>
+  %3 = bitcast <2 x i32> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx, i32) nounwind readnone
+
+define i64 @test66(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: psllw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = tail call x86_mmx @llvm.x86.mmx.pslli.w(x86_mmx %mmx_var.i, i32 3) nounwind
+  %2 = bitcast x86_mmx %1 to <4 x i16>
+  %3 = bitcast <4 x i16> %2 to <1 x i64>
+  %4 = extractelement <1 x i64> %3, i32 0
+  ret i64 %4
+}
+
+declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test65(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrad
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psra.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psra.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test64(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psraw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psra.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test63(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrlq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test62(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrld
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test61(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psrlw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psrl.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test60(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psllq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psll.q(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test59(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pslld
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psll.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psll.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test58(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psllw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1.i = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psll.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pxor(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test56(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pxor
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pxor(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.por(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test55(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: por
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.por(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pandn(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test54(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pandn
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pandn(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pand(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test53(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pand
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pand(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test52(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmullw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+define i64 @test51(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmullw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmull.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test50(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmulh.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test49(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaddwd
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmadd.wd(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test48(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubusw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test47(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubusb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test46(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test45(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubsb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psubs.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+define i64 @test44(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1 = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.q(x86_mmx, x86_mmx) nounwind readnone
+
+declare x86_mmx @llvm.x86.mmx.psub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test43(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test42(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psub.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test41(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psubb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psub.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test40(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddusw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test39(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddusb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.paddus.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padds.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test38(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padds.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padds.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test37(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddsb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padds.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.q(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test36(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddq
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1 = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.q(x86_mmx %mmx_var, x86_mmx %mmx_var1)
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test35(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test34(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.padd.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test33(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: paddb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.padd.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test32(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psadbw
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.psad.bw(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test31(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pminsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmins.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test30(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pminub
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pminu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test29(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaxsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxs.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test28(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaxub
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmaxu.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test27(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pavgw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test26(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pavgb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pavg.b(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare void @llvm.x86.mmx.movnt.dq(x86_mmx*, x86_mmx) nounwind
+
+define void @test25(<1 x i64>* %p, <1 x i64> %a) nounwind optsize ssp {
+; CHECK: movntq
+entry:
+  %mmx_ptr_var.i = bitcast <1 x i64>* %p to x86_mmx*
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var.i = bitcast i64 %0 to x86_mmx
+  tail call void @llvm.x86.mmx.movnt.dq(x86_mmx* %mmx_ptr_var.i, x86_mmx %mmx_var.i) nounwind
+  ret void
+}
+
+declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx) nounwind readnone
+
+define i32 @test24(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pmovmskb
+entry:
+  %0 = bitcast <1 x i64> %a to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %0 to x86_mmx
+  %1 = tail call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %mmx_var.i) nounwind
+  ret i32 %1
+}
+
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind
+
+define void @test23(<1 x i64> %d, <1 x i64> %n, i8* %p) nounwind optsize ssp {
+; CHECK: maskmovq
+entry:
+  %0 = bitcast <1 x i64> %n to <8 x i8>
+  %1 = bitcast <1 x i64> %d to <8 x i8>
+  %mmx_var.i = bitcast <8 x i8> %1 to x86_mmx
+  %mmx_var1.i = bitcast <8 x i8> %0 to x86_mmx
+  tail call void @llvm.x86.mmx.maskmovq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i, i8* %p) nounwind
+  ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test22(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhuw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %mmx_var.i = bitcast <4 x i16> %1 to x86_mmx
+  %mmx_var1.i = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmulhu.w(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx, i8) nounwind readnone
+
+define i64 @test21(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pshufw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %1 = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.sse.pshuf.w(x86_mmx %1, i8 3) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test20(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmuludq
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx
+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.pmulu.dq(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx) nounwind readnone
+
+define <2 x double> @test19(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: cvtpi2pd
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call <2 x double> @llvm.x86.sse.cvtpi2pd(x86_mmx %1) nounwind readnone
+  ret <2 x double> %2
+}
+
+declare x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double>) nounwind readnone
+
+define i64 @test18(<2 x double> %a) nounwind readnone optsize ssp {
+; CHECK: cvttpd2pi
+entry:
+  %0 = tail call x86_mmx @llvm.x86.sse.cvttpd2pi(<2 x double> %a) nounwind readnone
+  %1 = bitcast x86_mmx %0 to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to <1 x i64>
+  %3 = extractelement <1 x i64> %2, i32 0
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double>) nounwind readnone
+
+define i64 @test17(<2 x double> %a) nounwind readnone optsize ssp {
+; CHECK: cvtpd2pi
+entry:
+  %0 = tail call x86_mmx @llvm.x86.sse.cvtpd2pi(<2 x double> %a) nounwind readnone
+  %1 = bitcast x86_mmx %0 to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to <1 x i64>
+  %3 = extractelement <1 x i64> %2, i32 0
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx, x86_mmx, i8) nounwind readnone
+
+define i64 @test16(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: palignr
+entry:
+  %0 = extractelement <1 x i64> %a, i32 0
+  %mmx_var = bitcast i64 %0 to x86_mmx
+  %1 = extractelement <1 x i64> %b, i32 0
+  %mmx_var1 = bitcast i64 %1 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.mmx.palignr.b(x86_mmx %mmx_var, x86_mmx %mmx_var1, i8 16)
+  %3 = bitcast x86_mmx %2 to i64
+  ret i64 %3
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx) nounwind readnone
+
+define i64 @test15(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsd
+entry:
+  %0 = bitcast <1 x i64> %a to <2 x i32>
+  %1 = bitcast <2 x i32> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.d(x86_mmx %1) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <2 x i32>
+  %4 = bitcast <2 x i32> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx) nounwind readnone
+
+define i64 @test14(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsw
+entry:
+  %0 = bitcast <1 x i64> %a to <4 x i16>
+  %1 = bitcast <4 x i16> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.w(x86_mmx %1) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <4 x i16>
+  %4 = bitcast <4 x i16> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx) nounwind readnone
+
+define i64 @test13(<1 x i64> %a) nounwind readnone optsize ssp {
+; CHECK: pabsb
+entry:
+  %0 = bitcast <1 x i64> %a to <8 x i8>
+  %1 = bitcast <8 x i8> %0 to x86_mmx
+  %2 = tail call x86_mmx @llvm.x86.ssse3.pabs.b(x86_mmx %1) nounwind readnone
+  %3 = bitcast x86_mmx %2 to <8 x i8>
+  %4 = bitcast <8 x i8> %3 to <1 x i64>
+  %5 = extractelement <1 x i64> %4, i32 0
+  ret i64 %5
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test12(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to x86_mmx
+  %3 = bitcast <2 x i32> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <2 x i32>
+  %6 = bitcast <2 x i32> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test11(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test10(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: psignb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to x86_mmx
+  %3 = bitcast <8 x i8> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.psign.b(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  %6 = bitcast <8 x i8> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test9(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pshufb
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to x86_mmx
+  %3 = bitcast <8 x i8> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.pshuf.b(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  %6 = bitcast <8 x i8> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test8(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmulhrsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.pmul.hr.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test7(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: pmaddubsw
+entry:
+  %0 = bitcast <1 x i64> %b to <8 x i8>
+  %1 = bitcast <1 x i64> %a to <8 x i8>
+  %2 = bitcast <8 x i8> %1 to x86_mmx
+  %3 = bitcast <8 x i8> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.pmadd.ub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <8 x i8>
+  %6 = bitcast <8 x i8> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test6(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test5(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to x86_mmx
+  %3 = bitcast <2 x i32> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <2 x i32>
+  %6 = bitcast <2 x i32> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test4(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phsubw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phsub.w(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test3(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddsw
+entry:
+  %0 = bitcast <1 x i64> %b to <4 x i16>
+  %1 = bitcast <1 x i64> %a to <4 x i16>
+  %2 = bitcast <4 x i16> %1 to x86_mmx
+  %3 = bitcast <4 x i16> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.sw(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <4 x i16>
+  %6 = bitcast <4 x i16> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
+
+declare x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx, x86_mmx) nounwind readnone
+
+define i64 @test2(<1 x i64> %a, <1 x i64> %b) nounwind readnone optsize ssp {
+; CHECK: phaddd
+entry:
+  %0 = bitcast <1 x i64> %b to <2 x i32>
+  %1 = bitcast <1 x i64> %a to <2 x i32>
+  %2 = bitcast <2 x i32> %1 to x86_mmx
+  %3 = bitcast <2 x i32> %0 to x86_mmx
+  %4 = tail call x86_mmx @llvm.x86.ssse3.phadd.d(x86_mmx %2, x86_mmx %3) nounwind readnone
+  %5 = bitcast x86_mmx %4 to <2 x i32>
+  %6 = bitcast <2 x i32> %5 to <1 x i64>
+  %7 = extractelement <1 x i64> %6, i32 0
+  ret i64 %7
+}
diff --git a/final/test/CodeGen/X86/mmx-copy-gprs.ll b/final/test/CodeGen/X86/mmx-copy-gprs.ll
new file mode 100644
index 00000000000..6cb21ca386d
--- /dev/null
+++ b/final/test/CodeGen/X86/mmx-copy-gprs.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=x86_64-linux   | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32   | FileCheck %s -check-prefix=X64
+; X64: movq ({{%rsi|%rdx}}), %rax
+; RUN: llc < %s -march=x86 -mattr=-sse2 | FileCheck %s -check-prefix=X32
+; X32: movl 4(%eax),
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s -check-prefix=XMM
+; XMM: movsd (%eax),
+
+; This test should use GPRs to copy the mmx value, not MMX regs.  Using mmx regs,
+; increases the places that need to use emms.
+
+; rdar://5741668
+
+define void @foo(<1 x i64>* %x, <1 x i64>* %y) nounwind  {
+entry:
+	%tmp1 = load <1 x i64>* %y, align 8		; <<1 x i64>> [#uses=1]
+	store <1 x i64> %tmp1, <1 x i64>* %x, align 8
+	ret void
+}
diff --git a/final/test/CodeGen/X86/mmx-emms.ll b/final/test/CodeGen/X86/mmx-emms.ll
new file mode 100644
index 00000000000..5ff2588da69
--- /dev/null
+++ b/final/test/CodeGen/X86/mmx-emms.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep emms
+define void @foo() {
+entry:
+	call void @llvm.x86.mmx.emms( )
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare void @llvm.x86.mmx.emms()
diff --git a/final/test/CodeGen/X86/mmx-insert-element.ll b/final/test/CodeGen/X86/mmx-insert-element.ll
new file mode 100644
index 00000000000..348dac8d4d5
--- /dev/null
+++ b/final/test/CodeGen/X86/mmx-insert-element.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pshufd
+; This is not an MMX operation; promoted to XMM.
+
+define x86_mmx @qux(i32 %A) nounwind {
+	%tmp3 = insertelement <2 x i32> < i32 0, i32 undef >, i32 %A, i32 1		; <<2 x i32>> [#uses=1]
+        %tmp4 = bitcast <2 x i32> %tmp3 to x86_mmx
+	ret x86_mmx %tmp4
+}
diff --git a/final/test/CodeGen/X86/mmx-pinsrw.ll b/final/test/CodeGen/X86/mmx-pinsrw.ll
new file mode 100644
index 00000000000..6062b505a56
--- /dev/null
+++ b/final/test/CodeGen/X86/mmx-pinsrw.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsrw | count 1
+; PR2562
+
+external global i16		; <i16*>:0 [#uses=1]
+external global <4 x i16>		; <<4 x i16>*>:1 [#uses=2]
+
+declare void @abort()
+
+define void @""() {
+	load i16* @0		; <i16>:1 [#uses=1]
+	load <4 x i16>* @1		; <<4 x i16>>:2 [#uses=1]
+	insertelement <4 x i16> %2, i16 %1, i32 0		; <<4 x i16>>:3 [#uses=1]
+	store <4 x i16> %3, <4 x i16>* @1
+	ret void
+}
diff --git a/final/test/CodeGen/X86/mmx-punpckhdq.ll b/final/test/CodeGen/X86/mmx-punpckhdq.ll
new file mode 100644
index 00000000000..689f7bf5956
--- /dev/null
+++ b/final/test/CodeGen/X86/mmx-punpckhdq.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse42 -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; There are no MMX operations in bork; promoted to XMM.
+
+define void @bork(<1 x i64>* %x) {
+; CHECK: bork
+; CHECK: pextrd
+entry:
+	%tmp2 = load <1 x i64>* %x		; <<1 x i64>> [#uses=1]
+	%tmp6 = bitcast <1 x i64> %tmp2 to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%tmp9 = shufflevector <2 x i32> %tmp6, <2 x i32> undef, <2 x i32> < i32 1, i32 1 >		; <<2 x i32>> [#uses=1]
+	%tmp10 = bitcast <2 x i32> %tmp9 to <1 x i64>		; <<1 x i64>> [#uses=1]
+	store <1 x i64> %tmp10, <1 x i64>* %x
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+; pork uses MMX.
+
+define void @pork(x86_mmx* %x) {
+; CHECK: pork
+; CHECK: punpckhdq
+entry:
+	%tmp2 = load x86_mmx* %x		; <x86_mmx> [#uses=1]
+        %tmp9 = tail call x86_mmx @llvm.x86.mmx.punpckhdq (x86_mmx %tmp2, x86_mmx %tmp2)
+	store x86_mmx %tmp9, x86_mmx* %x
+	tail call void @llvm.x86.mmx.emms( )
+	ret void
+}
+
+declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx)
+declare void @llvm.x86.mmx.emms()
diff --git a/final/test/CodeGen/X86/mmx-s2v.ll b/final/test/CodeGen/X86/mmx-s2v.ll
new file mode 100644
index 00000000000..c98023c0f41
--- /dev/null
+++ b/final/test/CodeGen/X86/mmx-s2v.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx
+; PR2574
+
+define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x) {; <label>:0
+        br i1 true, label %bb.nph, label %._crit_edge
+
+bb.nph:         ; preds = %bb.nph, %0
+        %t2206f2.0 = phi <2 x float> [ %2, %bb.nph ], [ undef, %0 ]             ; <<2 x float>> [#uses=1]
+        insertelement <2 x float> %t2206f2.0, float 0.000000e+00, i32 0         ; <<2 x float>>:1 [#uses=1]
+        insertelement <2 x float> %1, float 0.000000e+00, i32 1         ; <<2 x float>>:2 [#uses=1]
+        br label %bb.nph
+
+._crit_edge:            ; preds = %0
+        ret void
+}
diff --git a/final/test/CodeGen/X86/mmx-shift.ll b/final/test/CodeGen/X86/mmx-shift.ll
new file mode 100644
index 00000000000..bafc75444d9
--- /dev/null
+++ b/final/test/CodeGen/X86/mmx-shift.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep psllq | grep 32
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep psllq | grep 32
+; RUN: llc < %s -march=x86 -mattr=+mmx | grep psrad
+; RUN: llc < %s -march=x86-64 -mattr=+mmx | grep psrlw
+
+define i64 @t1(<1 x i64> %mm1) nounwind  {
+entry:
+        %tmp = bitcast <1 x i64> %mm1 to x86_mmx
+	%tmp6 = tail call x86_mmx @llvm.x86.mmx.pslli.q( x86_mmx %tmp, i32 32 )		; <x86_mmx> [#uses=1]
+        %retval1112 = bitcast x86_mmx %tmp6 to i64
+	ret i64 %retval1112
+}
+
+declare x86_mmx @llvm.x86.mmx.pslli.q(x86_mmx, i32) nounwind readnone 
+
+define i64 @t2(x86_mmx %mm1, x86_mmx %mm2) nounwind  {
+entry:
+	%tmp7 = tail call x86_mmx @llvm.x86.mmx.psra.d( x86_mmx %mm1, x86_mmx %mm2 ) nounwind readnone 		; <x86_mmx> [#uses=1]
+        %retval1112 = bitcast x86_mmx %tmp7 to i64
+	ret i64 %retval1112
+}
+
+declare x86_mmx @llvm.x86.mmx.psra.d(x86_mmx, x86_mmx) nounwind readnone 
+
+define i64 @t3(x86_mmx %mm1, i32 %bits) nounwind  {
+entry:
+	%tmp8 = tail call x86_mmx @llvm.x86.mmx.psrli.w( x86_mmx %mm1, i32 %bits ) nounwind readnone 		; <x86_mmx> [#uses=1]
+        %retval1314 = bitcast x86_mmx %tmp8 to i64
+	ret i64 %retval1314
+}
+
+declare x86_mmx @llvm.x86.mmx.psrli.w(x86_mmx, i32) nounwind readnone 
diff --git a/final/test/CodeGen/X86/mmx-shuffle.ll b/final/test/CodeGen/X86/mmx-shuffle.ll
new file mode 100644
index 00000000000..9f7501eb7c5
--- /dev/null
+++ b/final/test/CodeGen/X86/mmx-shuffle.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mcpu=yonah
+; PR1427
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+	%struct.DrawHelper = type { void (i32, %struct.QT_FT_Span*, i8*)*, void (i32, %struct.QT_FT_Span*, i8*)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i32, i32)* }
+	%struct.QBasicAtomic = type { i32 }
+	%struct.QClipData = type { i32, "struct.QClipData::ClipLine"*, i32, i32, %struct.QT_FT_Span*, i32, i32, i32, i32 }
+	"struct.QClipData::ClipLine" = type { i32, %struct.QT_FT_Span* }
+	%struct.QRasterBuffer = type { %struct.QRect, %struct.QRegion, %struct.QClipData*, %struct.QClipData*, i8, i32, i32, %struct.DrawHelper*, i32, i32, i32, i8* }
+	%struct.QRect = type { i32, i32, i32, i32 }
+	%struct.QRegion = type { "struct.QRegion::QRegionData"* }
+	"struct.QRegion::QRegionData" = type { %struct.QBasicAtomic, %struct._XRegion*, i8*, %struct.QRegionPrivate* }
+	%struct.QRegionPrivate = type opaque
+	%struct.QT_FT_Span = type { i16, i16, i16, i8 }
+	%struct._XRegion = type opaque
+
+define void @_Z19qt_bitmapblit16_sseP13QRasterBufferiijPKhiii(%struct.QRasterBuffer* %rasterBuffer, i32 %x, i32 %y, i32 %color, i8* %src, i32 %width, i32 %height, i32 %stride) {
+entry:
+	%tmp528 = bitcast <8 x i8> zeroinitializer to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%tmp529 = and <2 x i32> %tmp528, bitcast (<4 x i16> < i16 -32640, i16 16448, i16 8224, i16 4112 > to <2 x i32>)		; <<2 x i32>> [#uses=1]
+	%tmp542 = bitcast <2 x i32> %tmp529 to <4 x i16>		; <<4 x i16>> [#uses=1]
+	%tmp543 = add <4 x i16> %tmp542, < i16 0, i16 16448, i16 24672, i16 28784 >		; <<4 x i16>> [#uses=1]
+	%tmp555 = bitcast <4 x i16> %tmp543 to <8 x i8>		; <<8 x i8>> [#uses=1]
+        %tmp556 = bitcast <8 x i8> %tmp555 to x86_mmx
+        %tmp557 = bitcast <8 x i8> zeroinitializer to x86_mmx
+	tail call void @llvm.x86.mmx.maskmovq( x86_mmx %tmp557, x86_mmx %tmp556, i8* null )
+	ret void
+}
+
+declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*)
diff --git a/final/test/CodeGen/X86/mmx-vzmovl-2.ll b/final/test/CodeGen/X86/mmx-vzmovl-2.ll
new file mode 100644
index 00000000000..a7ce7d93920
--- /dev/null
+++ b/final/test/CodeGen/X86/mmx-vzmovl-2.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpckldq
+
+	%struct.vS1024 = type { [8 x <4 x i32>] }
+	%struct.vS512 = type { [4 x <4 x i32>] }
+
+declare x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx, i32) nounwind readnone
+
+define void @t() nounwind {
+entry:
+	br label %bb554
+
+bb554:		; preds = %bb554, %entry
+	%sum.0.reg2mem.0 = phi <1 x i64> [ %tmp562, %bb554 ], [ zeroinitializer, %entry ]		; <<1 x i64>> [#uses=1]
+	%0 = load x86_mmx* null, align 8		; <<1 x i64>> [#uses=2]
+	%1 = bitcast x86_mmx %0 to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%tmp555 = and <2 x i32> %1, < i32 -1, i32 0 >		; <<2 x i32>> [#uses=1]
+	%2 = bitcast <2 x i32> %tmp555 to x86_mmx		; <<1 x i64>> [#uses=1]
+	%3 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %0, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
+        store <1 x i64> %sum.0.reg2mem.0, <1 x i64>* null
+        %tmp3 = bitcast x86_mmx %2 to <1 x i64>
+	%tmp558 = add <1 x i64> %sum.0.reg2mem.0, %tmp3		; <<1 x i64>> [#uses=1]
+        %tmp5 = bitcast <1 x i64> %tmp558 to x86_mmx
+	%4 = call x86_mmx @llvm.x86.mmx.psrli.q(x86_mmx %tmp5, i32 32) nounwind readnone		; <<1 x i64>> [#uses=1]
+        %tmp6 = bitcast x86_mmx %4 to <1 x i64>
+        %tmp7 = bitcast x86_mmx %3 to <1 x i64>
+	%tmp562 = add <1 x i64> %tmp6, %tmp7		; <<1 x i64>> [#uses=1]
+	br label %bb554
+}
diff --git a/final/test/CodeGen/X86/mmx-vzmovl.ll b/final/test/CodeGen/X86/mmx-vzmovl.ll
new file mode 100644
index 00000000000..191e261f616
--- /dev/null
+++ b/final/test/CodeGen/X86/mmx-vzmovl.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep movq | count 2
+; There are no MMX operations here; this is promoted to XMM.
+
+define void @foo(<1 x i64>* %a, <1 x i64>* %b) nounwind {
+entry:
+	%0 = load <1 x i64>* %a, align 8		; <<1 x i64>> [#uses=1]
+	%1 = bitcast <1 x i64> %0 to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%2 = and <2 x i32> %1, < i32 -1, i32 0 >		; <<2 x i32>> [#uses=1]
+	%3 = bitcast <2 x i32> %2 to <1 x i64>		; <<1 x i64>> [#uses=1]
+	store <1 x i64> %3, <1 x i64>* %b, align 8
+	br label %bb2
+
+bb2:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/movfs.ll b/final/test/CodeGen/X86/movfs.ll
new file mode 100644
index 00000000000..823e98689e7
--- /dev/null
+++ b/final/test/CodeGen/X86/movfs.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep fs
+
+define i32 @foo() nounwind readonly {
+entry:
+	%tmp = load i32* addrspace(257)* getelementptr (i32* addrspace(257)* inttoptr (i32 72 to i32* addrspace(257)*), i32 31)		; <i32*> [#uses=1]
+	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/X86/movgs.ll b/final/test/CodeGen/X86/movgs.ll
new file mode 100644
index 00000000000..97b7fe70d85
--- /dev/null
+++ b/final/test/CodeGen/X86/movgs.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=sse41 | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=sse41 | FileCheck %s --check-prefix=X64
+
+define i32 @test1() nounwind readonly {
+entry:
+	%tmp = load i32* addrspace(256)* getelementptr (i32* addrspace(256)* inttoptr (i32 72 to i32* addrspace(256)*), i32 31)		; <i32*> [#uses=1]
+	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+; X32: test1:
+; X32: 	movl	%gs:196, %eax
+; X32: 	movl	(%eax), %eax
+; X32: 	ret
+
+; X64: test1:
+; X64: 	movq	%gs:320, %rax
+; X64: 	movl	(%rax), %eax
+; X64: 	ret
+
+define i64 @test2(void (i8*)* addrspace(256)* %tmp8) nounwind {
+entry:
+  %tmp9 = load void (i8*)* addrspace(256)* %tmp8, align 8
+  tail call void %tmp9(i8* undef) nounwind optsize
+  ret i64 0
+}
+
+; rdar://8453210
+; X32: test2:
+; X32: movl	{{.*}}(%esp), %eax
+; X32: calll	*%gs:(%eax)
+
+; X64: test2:
+; X64: callq	*%gs:([[A0:%rdi|%rcx]])
+
+
+
+
+define <2 x i64> @pmovsxwd_1(i64 addrspace(256)* %p) nounwind readonly {
+entry:
+  %0 = load i64 addrspace(256)* %p
+  %tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0
+  %1 = bitcast <2 x i64> %tmp2 to <8 x i16>
+  %2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone
+  %3 = bitcast <4 x i32> %2 to <2 x i64>
+  ret <2 x i64> %3
+  
+; X32: pmovsxwd_1:
+; X32: 	movl	4(%esp), %eax
+; X32: 	pmovsxwd	%gs:(%eax), %xmm0
+; X32: 	ret
+
+; X64: pmovsxwd_1:
+; X64:	pmovsxwd	%gs:([[A0]]), %xmm0
+; X64:	ret
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
diff --git a/final/test/CodeGen/X86/mul-legalize.ll b/final/test/CodeGen/X86/mul-legalize.ll
new file mode 100644
index 00000000000..069737d4d10
--- /dev/null
+++ b/final/test/CodeGen/X86/mul-legalize.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 | grep 24576
+; PR2135
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@.str = constant [13 x i8] c"c45531m.adb\00\00"		
+
+define void @main() nounwind {
+entry:
+	%tmp1 = call i1 @report__equal( i32 3, i32 3 )		
+	%b.0 = select i1 %tmp1, i64 35184372088832, i64 0		
+	%tmp7 = mul i64 3, %b.0		
+	%tmp32 = icmp eq i64 %tmp7, 105553116266496		
+	br i1 %tmp32, label %return, label %bb35
+bb35:		
+	call void @abort( )
+	unreachable
+return:		
+	ret void
+}
+
+declare i1 @report__equal(i32 %x, i32 %y) nounwind
+
+declare void @abort()
diff --git a/final/test/CodeGen/X86/mul-remat.ll b/final/test/CodeGen/X86/mul-remat.ll
new file mode 100644
index 00000000000..3fa005079de
--- /dev/null
+++ b/final/test/CodeGen/X86/mul-remat.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep mov | count 1
+; PR1874
+	
+define i32 @test(i32 %a, i32 %b) {
+entry:
+	%tmp3 = mul i32 %b, %a
+	ret i32 %tmp3
+}
diff --git a/final/test/CodeGen/X86/mul-shift-reassoc.ll b/final/test/CodeGen/X86/mul-shift-reassoc.ll
new file mode 100644
index 00000000000..3777d8b8cfb
--- /dev/null
+++ b/final/test/CodeGen/X86/mul-shift-reassoc.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 | grep lea
+; RUN: llc < %s -march=x86 | not grep add
+
+define i32 @test(i32 %X, i32 %Y) {
+	; Push the shl through the mul to allow an LEA to be formed, instead
+        ; of using a shift and add separately.
+        %tmp.2 = shl i32 %X, 1          ; <i32> [#uses=1]
+        %tmp.3 = mul i32 %tmp.2, %Y             ; <i32> [#uses=1]
+        %tmp.5 = add i32 %tmp.3, %Y             ; <i32> [#uses=1]
+        ret i32 %tmp.5
+}
+
diff --git a/final/test/CodeGen/X86/mul128.ll b/final/test/CodeGen/X86/mul128.ll
new file mode 100644
index 00000000000..6825b99f242
--- /dev/null
+++ b/final/test/CodeGen/X86/mul128.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 | grep mul | count 3
+
+define i128 @foo(i128 %t, i128 %u) {
+  %k = mul i128 %t, %u
+  ret i128 %k
+}
diff --git a/final/test/CodeGen/X86/mul64.ll b/final/test/CodeGen/X86/mul64.ll
new file mode 100644
index 00000000000..5a25c5d0e9d
--- /dev/null
+++ b/final/test/CodeGen/X86/mul64.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 | grep mul | count 3
+
+define i64 @foo(i64 %t, i64 %u) {
+  %k = mul i64 %t, %u
+  ret i64 %k
+}
diff --git a/final/test/CodeGen/X86/mult-alt-generic-i686.ll b/final/test/CodeGen/X86/mult-alt-generic-i686.ll
new file mode 100644
index 00000000000..7c3499f178a
--- /dev/null
+++ b/final/test/CodeGen/X86/mult-alt-generic-i686.ll
@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=x86
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/final/test/CodeGen/X86/mult-alt-generic-x86_64.ll b/final/test/CodeGen/X86/mult-alt-generic-x86_64.ll
new file mode 100644
index 00000000000..f35bb5e3407
--- /dev/null
+++ b/final/test/CodeGen/X86/mult-alt-generic-x86_64.ll
@@ -0,0 +1,321 @@
+; RUN: llc < %s -march=x86-64
+; ModuleID = 'mult-alt-generic.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_m() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,*m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32* @min1) nounwind
+  ret void
+}
+
+define void @single_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @single_V() nounwind {
+entry:
+  ret void
+}
+
+define void @single_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @single_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r,F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @single_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @single_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @single_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r,X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @single_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r,im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_m() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*m|r,m|r,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_o() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %index = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %index, align 4
+  ret void
+}
+
+define void @multi_V() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_lt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|<r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r<,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_gt() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|>r,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* %in1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|r>,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_r() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|m,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_i() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|i,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_n() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|n,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_E() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|E,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_F() nounwind {
+entry:
+  %out0 = alloca double, align 8
+  store double 0.000000e+000, double* %out0, align 8
+; No lowering support.
+;  %0 = call double asm "foo $1,$0", "=r|r,r|F,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+;  store double %0, double* %out0, align 8
+  ret void
+}
+
+define void @multi_s() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_g() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|imr,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_X() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  %in1 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  store i32 1, i32* %in1, align 4
+  %tmp = load i32* %in1, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* %out0, align 4
+  %tmp1 = load i32* @min1, align 4
+  %1 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 %tmp1) nounwind
+  store i32 %1, i32* %out0, align 4
+  %2 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32 1) nounwind
+  store i32 %2, i32* %out0, align 4
+  %3 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %3, i32* %out0, align 4
+  %4 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+001) nounwind
+  store i32 %4, i32* %out0, align 4
+  %5 = call i32 asm "foo $1,$0", "=r|r,r|X,~{dirflag},~{fpsr},~{flags}"(double 1.000000e+000) nounwind
+  store i32 %5, i32* %out0, align 4
+  ret void
+}
+
+define void @multi_p() nounwind {
+entry:
+  %out0 = alloca i32, align 4
+  store i32 0, i32* %out0, align 4
+  %0 = call i32 asm "foo $1,$0", "=r|r,r|im,~{dirflag},~{fpsr},~{flags}"(i32* getelementptr inbounds ([2 x i32]* @marray, i32 0, i32 0)) nounwind
+  store i32 %0, i32* %out0, align 4
+  ret void
+}
diff --git a/final/test/CodeGen/X86/mult-alt-x86.ll b/final/test/CodeGen/X86/mult-alt-x86.ll
new file mode 100644
index 00000000000..06175da4645
--- /dev/null
+++ b/final/test/CodeGen/X86/mult-alt-x86.ll
@@ -0,0 +1,358 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; ModuleID = 'mult-alt-x86.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i686-pc-win32"
+
+@mout0 = common global i32 0, align 4
+@min1 = common global i32 0, align 4
+@dout0 = common global double 0.000000e+000, align 8
+@din1 = common global double 0.000000e+000, align 8
+@marray = common global [2 x i32] zeroinitializer, align 4
+
+define void @single_R() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "=R,R,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_q() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "=q,q,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_Q() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "=Q,Q,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_a() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={ax},{ax},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_b() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={bx},{bx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_c() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={cx},{cx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_d() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={dx},{dx},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_S() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={si},{si},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_D() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "={di},{di},~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_A() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  %0 = call i32 asm "foo $1,$0", "=A,A,~{dirflag},~{fpsr},~{flags}"(i32 %tmp) nounwind
+  store i32 %0, i32* @mout0, align 4
+  ret void
+}
+
+define void @single_f() nounwind {
+entry:
+  ret void
+}
+
+define void @single_t() nounwind {
+entry:
+  ret void
+}
+
+define void @single_u() nounwind {
+entry:
+  ret void
+}
+
+define void @single_y() nounwind {
+entry:
+  %tmp = load double* @din1, align 8
+  %0 = call double asm "foo $1,$0", "=y,y,~{dirflag},~{fpsr},~{flags}"(double %tmp) nounwind
+  store double %0, double* @dout0, align 8
+  ret void
+}
+
+define void @single_x() nounwind {
+entry:
+  %tmp = load double* @din1, align 8
+  %0 = call double asm "foo $1,$0", "=x,x,~{dirflag},~{fpsr},~{flags}"(double %tmp) nounwind
+  store double %0, double* @dout0, align 8
+  ret void
+}
+
+define void @single_Y0() nounwind {
+entry:
+  ret void
+}
+
+define void @single_I() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,I,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_J() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,J,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_K() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,K,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_L() nounwind {
+entry:
+; Missing lowering support for 'L'.
+;  call void asm "foo $1,$0", "=*m,L,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_M() nounwind {
+entry:
+; Missing lowering support for 'M'.
+;  call void asm "foo $1,$0", "=*m,M,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_N() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,N,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_G() nounwind {
+entry:
+; Missing lowering support for 'G'.
+;  call void asm "foo $1,$0", "=*m,G,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+  ret void
+}
+
+define void @single_C() nounwind {
+entry:
+; Missing lowering support for 'C'.
+;  call void asm "foo $1,$0", "=*m,C,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+  ret void
+}
+
+define void @single_e() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,e,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @single_Z() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*m,Z,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_R() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|R|m,r|R|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_q() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|q|m,r|q|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_Q() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|Q|m,r|Q|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_a() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{ax}|m,r|{ax}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_b() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{bx}|m,r|{bx}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_c() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{cx}|m,r|{cx}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_d() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{dx}|m,r|{dx},~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_S() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{si}|m,r|{si}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_D() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|{di}|m,r|{di}|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_A() nounwind {
+entry:
+  %tmp = load i32* @min1, align 4
+  call void asm "foo $1,$0", "=*r|A|m,r|A|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 %tmp) nounwind
+  ret void
+}
+
+define void @multi_f() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_t() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_u() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_y() nounwind {
+entry:
+  %tmp = load double* @din1, align 8
+  call void asm "foo $1,$0", "=*r|y|m,r|y|m,~{dirflag},~{fpsr},~{flags}"(double* @dout0, double %tmp) nounwind
+  ret void
+}
+
+define void @multi_x() nounwind {
+entry:
+  %tmp = load double* @din1, align 8
+  call void asm "foo $1,$0", "=*r|x|m,r|x|m,~{dirflag},~{fpsr},~{flags}"(double* @dout0, double %tmp) nounwind
+  ret void
+}
+
+define void @multi_Y0() nounwind {
+entry:
+  ret void
+}
+
+define void @multi_I() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|I|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_J() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|J|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_K() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|K|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_L() nounwind {
+entry:
+; Missing lowering support for 'L'.
+;  call void asm "foo $1,$0", "=*r|m|m,r|L|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_M() nounwind {
+entry:
+; Missing lowering support for 'M'.
+;  call void asm "foo $1,$0", "=*r|m|m,r|M|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_N() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|N|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_G() nounwind {
+entry:
+; Missing lowering support for 'G'.
+;  call void asm "foo $1,$0", "=*r|m|m,r|G|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+  ret void
+}
+
+define void @multi_C() nounwind {
+entry:
+; Missing lowering support for 'C'.
+;  call void asm "foo $1,$0", "=*r|m|m,r|C|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, double 1.000000e+000) nounwind
+  ret void
+}
+
+define void @multi_e() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|e|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
+
+define void @multi_Z() nounwind {
+entry:
+  call void asm "foo $1,$0", "=*r|m|m,r|Z|m,~{dirflag},~{fpsr},~{flags}"(i32* @mout0, i32 1) nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/X86/multiple-loop-post-inc.ll b/final/test/CodeGen/X86/multiple-loop-post-inc.ll
new file mode 100644
index 00000000000..51a06112aad
--- /dev/null
+++ b/final/test/CodeGen/X86/multiple-loop-post-inc.ll
@@ -0,0 +1,304 @@
+; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s
+; rdar://7236213
+
+; CodeGen shouldn't require any lea instructions inside the marked loop.
+; It should properly set up post-increment uses and do coalescing for
+; the induction variables.
+
+; CHECK: # Start
+; CHECK-NOT: lea
+; CHECK: # Stop
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define void @foo(float* %I, i64 %IS, float* nocapture %Start, float* nocapture %Step, float* %O, i64 %OS, i64 %N) nounwind {
+entry:
+  %times4 = alloca float, align 4                 ; <float*> [#uses=3]
+  %timesN = alloca float, align 4                 ; <float*> [#uses=2]
+  %0 = load float* %Step, align 4                 ; <float> [#uses=8]
+  %1 = ptrtoint float* %I to i64                  ; <i64> [#uses=1]
+  %2 = ptrtoint float* %O to i64                  ; <i64> [#uses=1]
+  %tmp = xor i64 %2, %1                           ; <i64> [#uses=1]
+  %tmp16 = and i64 %tmp, 15                       ; <i64> [#uses=1]
+  %3 = icmp eq i64 %tmp16, 0                      ; <i1> [#uses=1]
+  %4 = trunc i64 %IS to i32                       ; <i32> [#uses=1]
+  %5 = xor i32 %4, 1                              ; <i32> [#uses=1]
+  %6 = trunc i64 %OS to i32                       ; <i32> [#uses=1]
+  %7 = xor i32 %6, 1                              ; <i32> [#uses=1]
+  %8 = or i32 %7, %5                              ; <i32> [#uses=1]
+  %9 = icmp eq i32 %8, 0                          ; <i1> [#uses=1]
+  br i1 %9, label %bb, label %return
+
+bb:                                               ; preds = %entry
+  %10 = load float* %Start, align 4               ; <float> [#uses=1]
+  br label %bb2
+
+bb1:                                              ; preds = %bb3
+  %11 = load float* %I_addr.0, align 4            ; <float> [#uses=1]
+  %12 = fmul float %11, %x.0                      ; <float> [#uses=1]
+  store float %12, float* %O_addr.0, align 4
+  %13 = fadd float %x.0, %0                       ; <float> [#uses=1]
+  %indvar.next53 = add i64 %14, 1                 ; <i64> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  %14 = phi i64 [ %indvar.next53, %bb1 ], [ 0, %bb ] ; <i64> [#uses=21]
+  %x.0 = phi float [ %13, %bb1 ], [ %10, %bb ]    ; <float> [#uses=6]
+  %N_addr.0 = sub i64 %N, %14                     ; <i64> [#uses=4]
+  %O_addr.0 = getelementptr float* %O, i64 %14    ; <float*> [#uses=4]
+  %I_addr.0 = getelementptr float* %I, i64 %14    ; <float*> [#uses=3]
+  %15 = icmp slt i64 %N_addr.0, 1                 ; <i1> [#uses=1]
+  br i1 %15, label %bb4, label %bb3
+
+bb3:                                              ; preds = %bb2
+  %16 = ptrtoint float* %O_addr.0 to i64          ; <i64> [#uses=1]
+  %17 = and i64 %16, 15                           ; <i64> [#uses=1]
+  %18 = icmp eq i64 %17, 0                        ; <i1> [#uses=1]
+  br i1 %18, label %bb4, label %bb1
+
+bb4:                                              ; preds = %bb3, %bb2
+  %19 = fmul float %0, 4.000000e+00               ; <float> [#uses=1]
+  store float %19, float* %times4, align 4
+  %20 = fmul float %0, 1.600000e+01               ; <float> [#uses=1]
+  store float %20, float* %timesN, align 4
+  %21 = fmul float %0, 0.000000e+00               ; <float> [#uses=1]
+  %22 = fadd float %21, %x.0                      ; <float> [#uses=1]
+  %23 = fadd float %x.0, %0                       ; <float> [#uses=1]
+  %24 = fmul float %0, 2.000000e+00               ; <float> [#uses=1]
+  %25 = fadd float %24, %x.0                      ; <float> [#uses=1]
+  %26 = fmul float %0, 3.000000e+00               ; <float> [#uses=1]
+  %27 = fadd float %26, %x.0                      ; <float> [#uses=1]
+  %28 = insertelement <4 x float> undef, float %22, i32 0 ; <<4 x float>> [#uses=1]
+  %29 = insertelement <4 x float> %28, float %23, i32 1 ; <<4 x float>> [#uses=1]
+  %30 = insertelement <4 x float> %29, float %25, i32 2 ; <<4 x float>> [#uses=1]
+  %31 = insertelement <4 x float> %30, float %27, i32 3 ; <<4 x float>> [#uses=5]
+  %asmtmp.i = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %times4) nounwind ; <<4 x float>> [#uses=3]
+  %32 = fadd <4 x float> %31, %asmtmp.i           ; <<4 x float>> [#uses=3]
+  %33 = fadd <4 x float> %32, %asmtmp.i           ; <<4 x float>> [#uses=3]
+  %34 = fadd <4 x float> %33, %asmtmp.i           ; <<4 x float>> [#uses=2]
+  %asmtmp.i18 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %timesN) nounwind ; <<4 x float>> [#uses=8]
+  %35 = icmp sgt i64 %N_addr.0, 15                ; <i1> [#uses=2]
+  br i1 %3, label %bb6.preheader, label %bb8
+
+bb6.preheader:                                    ; preds = %bb4
+  br i1 %35, label %bb.nph43, label %bb7
+
+bb.nph43:                                         ; preds = %bb6.preheader
+  %tmp108 = add i64 %14, 16                       ; <i64> [#uses=1]
+  %tmp111 = add i64 %14, 4                        ; <i64> [#uses=1]
+  %tmp115 = add i64 %14, 8                        ; <i64> [#uses=1]
+  %tmp119 = add i64 %14, 12                       ; <i64> [#uses=1]
+  %tmp134 = add i64 %N, -16                       ; <i64> [#uses=1]
+  %tmp135 = sub i64 %tmp134, %14                  ; <i64> [#uses=1]
+  call void asm sideeffect "# Start.", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  br label %bb5
+
+bb5:                                              ; preds = %bb.nph43, %bb5
+  %indvar102 = phi i64 [ 0, %bb.nph43 ], [ %indvar.next103, %bb5 ] ; <i64> [#uses=3]
+  %vX3.041 = phi <4 x float> [ %34, %bb.nph43 ], [ %45, %bb5 ] ; <<4 x float>> [#uses=2]
+  %vX0.039 = phi <4 x float> [ %31, %bb.nph43 ], [ %41, %bb5 ] ; <<4 x float>> [#uses=2]
+  %vX2.037 = phi <4 x float> [ %33, %bb.nph43 ], [ %46, %bb5 ] ; <<4 x float>> [#uses=2]
+  %vX1.036 = phi <4 x float> [ %32, %bb.nph43 ], [ %47, %bb5 ] ; <<4 x float>> [#uses=2]
+  %tmp104 = shl i64 %indvar102, 4                 ; <i64> [#uses=5]
+  %tmp105 = add i64 %14, %tmp104                  ; <i64> [#uses=2]
+  %scevgep106 = getelementptr float* %I, i64 %tmp105 ; <float*> [#uses=1]
+  %scevgep106107 = bitcast float* %scevgep106 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp109 = add i64 %tmp108, %tmp104              ; <i64> [#uses=2]
+  %tmp112 = add i64 %tmp111, %tmp104              ; <i64> [#uses=2]
+  %scevgep113 = getelementptr float* %I, i64 %tmp112 ; <float*> [#uses=1]
+  %scevgep113114 = bitcast float* %scevgep113 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp116 = add i64 %tmp115, %tmp104              ; <i64> [#uses=2]
+  %scevgep117 = getelementptr float* %I, i64 %tmp116 ; <float*> [#uses=1]
+  %scevgep117118 = bitcast float* %scevgep117 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp120 = add i64 %tmp119, %tmp104              ; <i64> [#uses=2]
+  %scevgep121 = getelementptr float* %I, i64 %tmp120 ; <float*> [#uses=1]
+  %scevgep121122 = bitcast float* %scevgep121 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep123 = getelementptr float* %O, i64 %tmp105 ; <float*> [#uses=1]
+  %scevgep123124 = bitcast float* %scevgep123 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep126 = getelementptr float* %O, i64 %tmp112 ; <float*> [#uses=1]
+  %scevgep126127 = bitcast float* %scevgep126 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep128 = getelementptr float* %O, i64 %tmp116 ; <float*> [#uses=1]
+  %scevgep128129 = bitcast float* %scevgep128 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %scevgep130 = getelementptr float* %O, i64 %tmp120 ; <float*> [#uses=1]
+  %scevgep130131 = bitcast float* %scevgep130 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp132 = mul i64 %indvar102, -16               ; <i64> [#uses=1]
+  %tmp136 = add i64 %tmp135, %tmp132              ; <i64> [#uses=2]
+  %36 = load <4 x float>* %scevgep106107, align 16 ; <<4 x float>> [#uses=1]
+  %37 = load <4 x float>* %scevgep113114, align 16 ; <<4 x float>> [#uses=1]
+  %38 = load <4 x float>* %scevgep117118, align 16 ; <<4 x float>> [#uses=1]
+  %39 = load <4 x float>* %scevgep121122, align 16 ; <<4 x float>> [#uses=1]
+  %40 = fmul <4 x float> %36, %vX0.039            ; <<4 x float>> [#uses=1]
+  %41 = fadd <4 x float> %vX0.039, %asmtmp.i18    ; <<4 x float>> [#uses=2]
+  %42 = fmul <4 x float> %37, %vX1.036            ; <<4 x float>> [#uses=1]
+  %43 = fmul <4 x float> %38, %vX2.037            ; <<4 x float>> [#uses=1]
+  %44 = fmul <4 x float> %39, %vX3.041            ; <<4 x float>> [#uses=1]
+  store <4 x float> %40, <4 x float>* %scevgep123124, align 16
+  store <4 x float> %42, <4 x float>* %scevgep126127, align 16
+  store <4 x float> %43, <4 x float>* %scevgep128129, align 16
+  store <4 x float> %44, <4 x float>* %scevgep130131, align 16
+  %45 = fadd <4 x float> %vX3.041, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %46 = fadd <4 x float> %vX2.037, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %47 = fadd <4 x float> %vX1.036, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %48 = icmp sgt i64 %tmp136, 15                  ; <i1> [#uses=1]
+  %indvar.next103 = add i64 %indvar102, 1         ; <i64> [#uses=1]
+  br i1 %48, label %bb5, label %bb6.bb7_crit_edge
+
+bb6.bb7_crit_edge:                                ; preds = %bb5
+  call void asm sideeffect "# Stop.", "~{dirflag},~{fpsr},~{flags}"() nounwind
+  %scevgep110 = getelementptr float* %I, i64 %tmp109 ; <float*> [#uses=1]
+  %scevgep125 = getelementptr float* %O, i64 %tmp109 ; <float*> [#uses=1]
+  br label %bb7
+
+bb7:                                              ; preds = %bb6.bb7_crit_edge, %bb6.preheader
+  %I_addr.1.lcssa = phi float* [ %scevgep110, %bb6.bb7_crit_edge ], [ %I_addr.0, %bb6.preheader ] ; <float*> [#uses=1]
+  %O_addr.1.lcssa = phi float* [ %scevgep125, %bb6.bb7_crit_edge ], [ %O_addr.0, %bb6.preheader ] ; <float*> [#uses=1]
+  %vX0.0.lcssa = phi <4 x float> [ %41, %bb6.bb7_crit_edge ], [ %31, %bb6.preheader ] ; <<4 x float>> [#uses=1]
+  %N_addr.1.lcssa = phi i64 [ %tmp136, %bb6.bb7_crit_edge ], [ %N_addr.0, %bb6.preheader ] ; <i64> [#uses=1]
+  %asmtmp.i17 = call <4 x float> asm "movss $1, $0\09\0Apshufd $$0, $0, $0", "=x,*m,~{dirflag},~{fpsr},~{flags}"(float* %times4) nounwind ; <<4 x float>> [#uses=0]
+  br label %bb11
+
+bb8:                                              ; preds = %bb4
+  br i1 %35, label %bb.nph, label %bb11
+
+bb.nph:                                           ; preds = %bb8
+  %I_addr.0.sum = add i64 %14, -1                 ; <i64> [#uses=1]
+  %49 = getelementptr inbounds float* %I, i64 %I_addr.0.sum ; <float*> [#uses=1]
+  %50 = bitcast float* %49 to <4 x float>*        ; <<4 x float>*> [#uses=1]
+  %51 = load <4 x float>* %50, align 16           ; <<4 x float>> [#uses=1]
+  %tmp54 = add i64 %14, 16                        ; <i64> [#uses=1]
+  %tmp56 = add i64 %14, 3                         ; <i64> [#uses=1]
+  %tmp60 = add i64 %14, 7                         ; <i64> [#uses=1]
+  %tmp64 = add i64 %14, 11                        ; <i64> [#uses=1]
+  %tmp68 = add i64 %14, 15                        ; <i64> [#uses=1]
+  %tmp76 = add i64 %14, 4                         ; <i64> [#uses=1]
+  %tmp80 = add i64 %14, 8                         ; <i64> [#uses=1]
+  %tmp84 = add i64 %14, 12                        ; <i64> [#uses=1]
+  %tmp90 = add i64 %N, -16                        ; <i64> [#uses=1]
+  %tmp91 = sub i64 %tmp90, %14                    ; <i64> [#uses=1]
+  br label %bb9
+
+bb9:                                              ; preds = %bb.nph, %bb9
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb9 ] ; <i64> [#uses=3]
+  %vX3.125 = phi <4 x float> [ %34, %bb.nph ], [ %69, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vX0.223 = phi <4 x float> [ %31, %bb.nph ], [ %65, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vX2.121 = phi <4 x float> [ %33, %bb.nph ], [ %70, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vX1.120 = phi <4 x float> [ %32, %bb.nph ], [ %71, %bb9 ] ; <<4 x float>> [#uses=2]
+  %vI0.019 = phi <4 x float> [ %51, %bb.nph ], [ %55, %bb9 ] ; <<4 x float>> [#uses=1]
+  %tmp51 = shl i64 %indvar, 4                     ; <i64> [#uses=9]
+  %tmp55 = add i64 %tmp54, %tmp51                 ; <i64> [#uses=2]
+  %tmp57 = add i64 %tmp56, %tmp51                 ; <i64> [#uses=1]
+  %scevgep58 = getelementptr float* %I, i64 %tmp57 ; <float*> [#uses=1]
+  %scevgep5859 = bitcast float* %scevgep58 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp61 = add i64 %tmp60, %tmp51                 ; <i64> [#uses=1]
+  %scevgep62 = getelementptr float* %I, i64 %tmp61 ; <float*> [#uses=1]
+  %scevgep6263 = bitcast float* %scevgep62 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp65 = add i64 %tmp64, %tmp51                 ; <i64> [#uses=1]
+  %scevgep66 = getelementptr float* %I, i64 %tmp65 ; <float*> [#uses=1]
+  %scevgep6667 = bitcast float* %scevgep66 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp69 = add i64 %tmp68, %tmp51                 ; <i64> [#uses=1]
+  %scevgep70 = getelementptr float* %I, i64 %tmp69 ; <float*> [#uses=1]
+  %scevgep7071 = bitcast float* %scevgep70 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp72 = add i64 %14, %tmp51                    ; <i64> [#uses=1]
+  %scevgep73 = getelementptr float* %O, i64 %tmp72 ; <float*> [#uses=1]
+  %scevgep7374 = bitcast float* %scevgep73 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp77 = add i64 %tmp76, %tmp51                 ; <i64> [#uses=1]
+  %scevgep78 = getelementptr float* %O, i64 %tmp77 ; <float*> [#uses=1]
+  %scevgep7879 = bitcast float* %scevgep78 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp81 = add i64 %tmp80, %tmp51                 ; <i64> [#uses=1]
+  %scevgep82 = getelementptr float* %O, i64 %tmp81 ; <float*> [#uses=1]
+  %scevgep8283 = bitcast float* %scevgep82 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp85 = add i64 %tmp84, %tmp51                 ; <i64> [#uses=1]
+  %scevgep86 = getelementptr float* %O, i64 %tmp85 ; <float*> [#uses=1]
+  %scevgep8687 = bitcast float* %scevgep86 to <4 x float>* ; <<4 x float>*> [#uses=1]
+  %tmp88 = mul i64 %indvar, -16                   ; <i64> [#uses=1]
+  %tmp92 = add i64 %tmp91, %tmp88                 ; <i64> [#uses=2]
+  %52 = load <4 x float>* %scevgep5859, align 16  ; <<4 x float>> [#uses=2]
+  %53 = load <4 x float>* %scevgep6263, align 16  ; <<4 x float>> [#uses=2]
+  %54 = load <4 x float>* %scevgep6667, align 16  ; <<4 x float>> [#uses=2]
+  %55 = load <4 x float>* %scevgep7071, align 16  ; <<4 x float>> [#uses=2]
+  %56 = shufflevector <4 x float> %vI0.019, <4 x float> %52, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %57 = shufflevector <4 x float> %56, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %58 = shufflevector <4 x float> %52, <4 x float> %53, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %59 = shufflevector <4 x float> %58, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %60 = shufflevector <4 x float> %53, <4 x float> %54, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %61 = shufflevector <4 x float> %60, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %62 = shufflevector <4 x float> %54, <4 x float> %55, <4 x i32> <i32 4, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
+  %63 = shufflevector <4 x float> %62, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0> ; <<4 x float>> [#uses=1]
+  %64 = fmul <4 x float> %57, %vX0.223            ; <<4 x float>> [#uses=1]
+  %65 = fadd <4 x float> %vX0.223, %asmtmp.i18    ; <<4 x float>> [#uses=2]
+  %66 = fmul <4 x float> %59, %vX1.120            ; <<4 x float>> [#uses=1]
+  %67 = fmul <4 x float> %61, %vX2.121            ; <<4 x float>> [#uses=1]
+  %68 = fmul <4 x float> %63, %vX3.125            ; <<4 x float>> [#uses=1]
+  store <4 x float> %64, <4 x float>* %scevgep7374, align 16
+  store <4 x float> %66, <4 x float>* %scevgep7879, align 16
+  store <4 x float> %67, <4 x float>* %scevgep8283, align 16
+  store <4 x float> %68, <4 x float>* %scevgep8687, align 16
+  %69 = fadd <4 x float> %vX3.125, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %70 = fadd <4 x float> %vX2.121, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %71 = fadd <4 x float> %vX1.120, %asmtmp.i18    ; <<4 x float>> [#uses=1]
+  %72 = icmp sgt i64 %tmp92, 15                   ; <i1> [#uses=1]
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=1]
+  br i1 %72, label %bb9, label %bb10.bb11.loopexit_crit_edge
+
+bb10.bb11.loopexit_crit_edge:                     ; preds = %bb9
+  %scevgep = getelementptr float* %I, i64 %tmp55  ; <float*> [#uses=1]
+  %scevgep75 = getelementptr float* %O, i64 %tmp55 ; <float*> [#uses=1]
+  br label %bb11
+
+bb11:                                             ; preds = %bb8, %bb10.bb11.loopexit_crit_edge, %bb7
+  %N_addr.2 = phi i64 [ %N_addr.1.lcssa, %bb7 ], [ %tmp92, %bb10.bb11.loopexit_crit_edge ], [ %N_addr.0, %bb8 ] ; <i64> [#uses=2]
+  %vX0.1 = phi <4 x float> [ %vX0.0.lcssa, %bb7 ], [ %65, %bb10.bb11.loopexit_crit_edge ], [ %31, %bb8 ] ; <<4 x float>> [#uses=1]
+  %O_addr.2 = phi float* [ %O_addr.1.lcssa, %bb7 ], [ %scevgep75, %bb10.bb11.loopexit_crit_edge ], [ %O_addr.0, %bb8 ] ; <float*> [#uses=1]
+  %I_addr.2 = phi float* [ %I_addr.1.lcssa, %bb7 ], [ %scevgep, %bb10.bb11.loopexit_crit_edge ], [ %I_addr.0, %bb8 ] ; <float*> [#uses=1]
+  %73 = extractelement <4 x float> %vX0.1, i32 0  ; <float> [#uses=2]
+  %74 = icmp sgt i64 %N_addr.2, 0                 ; <i1> [#uses=1]
+  br i1 %74, label %bb12, label %bb14
+
+bb12:                                             ; preds = %bb11, %bb12
+  %indvar94 = phi i64 [ %indvar.next95, %bb12 ], [ 0, %bb11 ] ; <i64> [#uses=3]
+  %x.130 = phi float [ %77, %bb12 ], [ %73, %bb11 ] ; <float> [#uses=2]
+  %I_addr.433 = getelementptr float* %I_addr.2, i64 %indvar94 ; <float*> [#uses=1]
+  %O_addr.432 = getelementptr float* %O_addr.2, i64 %indvar94 ; <float*> [#uses=1]
+  %75 = load float* %I_addr.433, align 4          ; <float> [#uses=1]
+  %76 = fmul float %75, %x.130                    ; <float> [#uses=1]
+  store float %76, float* %O_addr.432, align 4
+  %77 = fadd float %x.130, %0                     ; <float> [#uses=2]
+  %indvar.next95 = add i64 %indvar94, 1           ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next95, %N_addr.2 ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb14, label %bb12
+
+bb14:                                             ; preds = %bb12, %bb11
+  %x.1.lcssa = phi float [ %73, %bb11 ], [ %77, %bb12 ] ; <float> [#uses=1]
+  store float %x.1.lcssa, float* %Start, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+; Codegen shouldn't crash on this testcase.
+
+define void @bar(i32 %a, i32 %b) nounwind {
+entry:                           ; preds = %bb1, %entry, %for.end204
+  br label %outer
+
+outer:                                     ; preds = %bb1, %entry
+  %i6 = phi i32 [ %storemerge171, %bb1 ], [ %a, %entry ] ; <i32> [#uses=2]
+  %storemerge171 = add i32 %i6, 1      ; <i32> [#uses=1]
+  br label %inner
+
+inner:                                       ; preds = %bb0, %if.end275
+  %i8 = phi i32 [ %a, %outer ], [ %indvar.next159, %bb0 ] ; <i32> [#uses=2]
+  %t338 = load i32* undef                     ; <i32> [#uses=1]
+  %t191 = mul i32 %i8, %t338        ; <i32> [#uses=1]
+  %t179 = add i32 %i6, %t191        ; <i32> [#uses=1]
+  br label %bb0
+
+bb0:                                     ; preds = %for.body332
+  %indvar.next159 = add i32 %i8, 1     ; <i32> [#uses=1]
+  br i1 undef, label %bb1, label %inner
+
+bb1:                                     ; preds = %bb0, %outer
+  %midx.4 = phi i32 [ %t179, %bb0 ] ; <i32> [#uses=0]
+  br label %outer
+}
diff --git a/final/test/CodeGen/X86/multiple-return-values-cross-block.ll b/final/test/CodeGen/X86/multiple-return-values-cross-block.ll
new file mode 100644
index 00000000000..e9837d0ebbf
--- /dev/null
+++ b/final/test/CodeGen/X86/multiple-return-values-cross-block.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86
+
+declare {x86_fp80, x86_fp80} @test()
+
+define void @call2(x86_fp80 *%P1, x86_fp80 *%P2) {
+  %a = call {x86_fp80,x86_fp80} @test()
+  %b = getresult {x86_fp80,x86_fp80} %a, 1
+  store x86_fp80 %b, x86_fp80* %P1
+br label %L
+
+L:
+  %c = getresult {x86_fp80,x86_fp80} %a, 0
+  store x86_fp80 %c, x86_fp80* %P2
+  ret void
+}
diff --git a/final/test/CodeGen/X86/multiple-return-values.ll b/final/test/CodeGen/X86/multiple-return-values.ll
new file mode 100644
index 00000000000..018d997599a
--- /dev/null
+++ b/final/test/CodeGen/X86/multiple-return-values.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86
+
+define {i64, float} @bar(i64 %a, float %b) {
+        %y = add i64 %a, 7
+        %z = fadd float %b, 7.0
+	ret i64 %y, float %z
+}
+
+define i64 @foo() {
+	%M = call {i64, float} @bar(i64 21, float 21.0)
+        %N = getresult {i64, float} %M, 0
+        %O = getresult {i64, float} %M, 1
+        %P = fptosi float %O to i64
+        %Q = add i64 %P, %N
+	ret i64 %Q
+}
diff --git a/final/test/CodeGen/X86/nancvt.ll b/final/test/CodeGen/X86/nancvt.ll
new file mode 100644
index 00000000000..82b73319ad1
--- /dev/null
+++ b/final/test/CodeGen/X86/nancvt.ll
@@ -0,0 +1,183 @@
+; RUN: opt < %s -std-compile-opts | llc > %t
+; RUN: grep 2147027116 %t | count 3
+; RUN: grep 2147228864 %t | count 3
+; RUN: grep 2146502828 %t | count 3
+; RUN: grep 2143034560 %t | count 3
+; Compile time conversions of NaNs.
+; ModuleID = 'nan2.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+	%struct..0anon = type { float }
+	%struct..1anon = type { double }
+@fnan = constant [3 x i32] [ i32 2143831397, i32 2143831396, i32 2143831398 ]		; <[3 x i32]*> [#uses=1]
+@dnan = constant [3 x i64] [ i64 9223235251041752696, i64 9223235251041752697, i64 9223235250773317239 ], align 8		; <[3 x i64]*> [#uses=1]
+@fsnan = constant [3 x i32] [ i32 2139637093, i32 2139637092, i32 2139637094 ]		; <[3 x i32]*> [#uses=1]
+@dsnan = constant [3 x i64] [ i64 9220983451228067448, i64 9220983451228067449, i64 9220983450959631991 ], align 8		; <[3 x i64]*> [#uses=1]
+@.str = internal constant [10 x i8] c"%08x%08x\0A\00"		; <[10 x i8]*> [#uses=2]
+@.str1 = internal constant [6 x i8] c"%08x\0A\00"		; <[6 x i8]*> [#uses=2]
+
+@var = external global i32
+
+define i32 @main() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	%i = alloca i32, align 4		; <i32*> [#uses=20]
+	%uf = alloca %struct..0anon, align 4		; <%struct..0anon*> [#uses=8]
+	%ud = alloca %struct..1anon, align 8		; <%struct..1anon*> [#uses=10]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 0, i32* %i, align 4
+	br label %bb23
+
+bb:		; preds = %bb23
+	%tmp = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp1 = getelementptr [3 x i32]* @fnan, i32 0, i32 %tmp		; <i32*> [#uses=1]
+	%tmp2 = load i32* %tmp1, align 4		; <i32> [#uses=1]
+	%tmp3 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp34 = bitcast float* %tmp3 to i32*		; <i32*> [#uses=1]
+	store i32 %tmp2, i32* %tmp34, align 4
+	%tmp5 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp6 = load float* %tmp5, align 4		; <float> [#uses=1]
+	%tmp67 = fpext float %tmp6 to double		; <double> [#uses=1]
+	%tmp8 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	store double %tmp67, double* %tmp8, align 8
+	%tmp9 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp910 = bitcast double* %tmp9 to i64*		; <i64*> [#uses=1]
+	%tmp11 = load i64* %tmp910, align 8		; <i64> [#uses=1]
+	%tmp1112 = trunc i64 %tmp11 to i32		; <i32> [#uses=1]
+	%tmp13 = and i32 %tmp1112, -1		; <i32> [#uses=1]
+	%tmp14 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp1415 = bitcast double* %tmp14 to i64*		; <i64*> [#uses=1]
+	%tmp16 = load i64* %tmp1415, align 8		; <i64> [#uses=1]
+	%.cast = zext i32 32 to i64		; <i64> [#uses=1]
+	%tmp17 = ashr i64 %tmp16, %.cast		; <i64> [#uses=1]
+	%tmp1718 = trunc i64 %tmp17 to i32		; <i32> [#uses=1]
+	%tmp19 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
+	volatile store i32 %tmp1718, i32* @var
+	volatile store i32 %tmp13, i32* @var
+	%tmp21 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp22 = add i32 %tmp21, 1		; <i32> [#uses=1]
+	store i32 %tmp22, i32* %i, align 4
+	br label %bb23
+
+bb23:		; preds = %bb, %entry
+	%tmp24 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp25 = icmp sle i32 %tmp24, 2		; <i1> [#uses=1]
+	%tmp2526 = zext i1 %tmp25 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp2526, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb27
+
+bb27:		; preds = %bb23
+	store i32 0, i32* %i, align 4
+	br label %bb46
+
+bb28:		; preds = %bb46
+	%tmp29 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp30 = getelementptr [3 x i64]* @dnan, i32 0, i32 %tmp29		; <i64*> [#uses=1]
+	%tmp31 = load i64* %tmp30, align 8		; <i64> [#uses=1]
+	%tmp32 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp3233 = bitcast double* %tmp32 to i64*		; <i64*> [#uses=1]
+	store i64 %tmp31, i64* %tmp3233, align 8
+	%tmp35 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp36 = load double* %tmp35, align 8		; <double> [#uses=1]
+	%tmp3637 = fptrunc double %tmp36 to float		; <float> [#uses=1]
+	%tmp38 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	store float %tmp3637, float* %tmp38, align 4
+	%tmp39 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp3940 = bitcast float* %tmp39 to i32*		; <i32*> [#uses=1]
+	%tmp41 = load i32* %tmp3940, align 4		; <i32> [#uses=1]
+	%tmp42 = getelementptr [6 x i8]* @.str1, i32 0, i32 0		; <i8*> [#uses=1]
+	volatile store i32 %tmp41, i32* @var
+	%tmp44 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp45 = add i32 %tmp44, 1		; <i32> [#uses=1]
+	store i32 %tmp45, i32* %i, align 4
+	br label %bb46
+
+bb46:		; preds = %bb28, %bb27
+	%tmp47 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp48 = icmp sle i32 %tmp47, 2		; <i1> [#uses=1]
+	%tmp4849 = zext i1 %tmp48 to i8		; <i8> [#uses=1]
+	%toBool50 = icmp ne i8 %tmp4849, 0		; <i1> [#uses=1]
+	br i1 %toBool50, label %bb28, label %bb51
+
+bb51:		; preds = %bb46
+	store i32 0, i32* %i, align 4
+	br label %bb78
+
+bb52:		; preds = %bb78
+	%tmp53 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp54 = getelementptr [3 x i32]* @fsnan, i32 0, i32 %tmp53		; <i32*> [#uses=1]
+	%tmp55 = load i32* %tmp54, align 4		; <i32> [#uses=1]
+	%tmp56 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp5657 = bitcast float* %tmp56 to i32*		; <i32*> [#uses=1]
+	store i32 %tmp55, i32* %tmp5657, align 4
+	%tmp58 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp59 = load float* %tmp58, align 4		; <float> [#uses=1]
+	%tmp5960 = fpext float %tmp59 to double		; <double> [#uses=1]
+	%tmp61 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	store double %tmp5960, double* %tmp61, align 8
+	%tmp62 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp6263 = bitcast double* %tmp62 to i64*		; <i64*> [#uses=1]
+	%tmp64 = load i64* %tmp6263, align 8		; <i64> [#uses=1]
+	%tmp6465 = trunc i64 %tmp64 to i32		; <i32> [#uses=1]
+	%tmp66 = and i32 %tmp6465, -1		; <i32> [#uses=1]
+	%tmp68 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp6869 = bitcast double* %tmp68 to i64*		; <i64*> [#uses=1]
+	%tmp70 = load i64* %tmp6869, align 8		; <i64> [#uses=1]
+	%.cast71 = zext i32 32 to i64		; <i64> [#uses=1]
+	%tmp72 = ashr i64 %tmp70, %.cast71		; <i64> [#uses=1]
+	%tmp7273 = trunc i64 %tmp72 to i32		; <i32> [#uses=1]
+	%tmp74 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
+	volatile store i32 %tmp7273, i32* @var
+	volatile store i32 %tmp66, i32* @var
+	%tmp76 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp77 = add i32 %tmp76, 1		; <i32> [#uses=1]
+	store i32 %tmp77, i32* %i, align 4
+	br label %bb78
+
+bb78:		; preds = %bb52, %bb51
+	%tmp79 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp80 = icmp sle i32 %tmp79, 2		; <i1> [#uses=1]
+	%tmp8081 = zext i1 %tmp80 to i8		; <i8> [#uses=1]
+	%toBool82 = icmp ne i8 %tmp8081, 0		; <i1> [#uses=1]
+	br i1 %toBool82, label %bb52, label %bb83
+
+bb83:		; preds = %bb78
+	store i32 0, i32* %i, align 4
+	br label %bb101
+
+bb84:		; preds = %bb101
+	%tmp85 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp86 = getelementptr [3 x i64]* @dsnan, i32 0, i32 %tmp85		; <i64*> [#uses=1]
+	%tmp87 = load i64* %tmp86, align 8		; <i64> [#uses=1]
+	%tmp88 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp8889 = bitcast double* %tmp88 to i64*		; <i64*> [#uses=1]
+	store i64 %tmp87, i64* %tmp8889, align 8
+	%tmp90 = getelementptr %struct..1anon* %ud, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp91 = load double* %tmp90, align 8		; <double> [#uses=1]
+	%tmp9192 = fptrunc double %tmp91 to float		; <float> [#uses=1]
+	%tmp93 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	store float %tmp9192, float* %tmp93, align 4
+	%tmp94 = getelementptr %struct..0anon* %uf, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp9495 = bitcast float* %tmp94 to i32*		; <i32*> [#uses=1]
+	%tmp96 = load i32* %tmp9495, align 4		; <i32> [#uses=1]
+	%tmp97 = getelementptr [6 x i8]* @.str1, i32 0, i32 0		; <i8*> [#uses=1]
+	volatile store i32 %tmp96, i32* @var
+	%tmp99 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp100 = add i32 %tmp99, 1		; <i32> [#uses=1]
+	store i32 %tmp100, i32* %i, align 4
+	br label %bb101
+
+bb101:		; preds = %bb84, %bb83
+	%tmp102 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp103 = icmp sle i32 %tmp102, 2		; <i1> [#uses=1]
+	%tmp103104 = zext i1 %tmp103 to i8		; <i8> [#uses=1]
+	%toBool105 = icmp ne i8 %tmp103104, 0		; <i1> [#uses=1]
+	br i1 %toBool105, label %bb84, label %bb106
+
+bb106:		; preds = %bb101
+	br label %return
+
+return:		; preds = %bb106
+	%retval107 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval107
+}
diff --git a/final/test/CodeGen/X86/narrow-shl-load.ll b/final/test/CodeGen/X86/narrow-shl-load.ll
new file mode 100644
index 00000000000..ef27cbc3418
--- /dev/null
+++ b/final/test/CodeGen/X86/narrow-shl-load.ll
@@ -0,0 +1,83 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-pc-linux-gnu"
+
+; DAGCombiner should fold this code in finite time.
+; rdar://8606584
+
+define void @test1() nounwind readnone {
+bb.nph:
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %bb.nph
+  %tmp6 = load i32* undef, align 4
+  %and = or i64 undef, undef
+  %conv11 = zext i32 undef to i64
+  %conv14 = zext i32 %tmp6 to i64
+  %shl15 = shl i64 %conv14, 1
+  %shl15.masked = and i64 %shl15, 4294967294
+  %and17 = or i64 %shl15.masked, %conv11
+  %add = add i64 %and17, 1
+  %xor = xor i64 %add, %and
+  %tmp20 = load i64* undef, align 8
+  %add21 = add i64 %xor, %tmp20
+  %conv22 = trunc i64 %add21 to i32
+  store i32 %conv22, i32* undef, align 4
+  br i1 false, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  ret void
+}
+
+
+; DAGCombiner shouldn't fold the sdiv (ashr) away.
+; rdar://8636812
+; CHECK: test2:
+; CHECK:   sarl
+
+define i32 @test2() nounwind {
+entry:
+  %i = alloca i32, align 4
+  %j = alloca i8, align 1
+  store i32 127, i32* %i, align 4
+  store i8 0, i8* %j, align 1
+  %tmp3 = load i32* %i, align 4
+  %mul = mul nsw i32 %tmp3, 2
+  %conv4 = trunc i32 %mul to i8
+  %conv5 = sext i8 %conv4 to i32
+  %div6 = sdiv i32 %conv5, 2
+  %conv7 = trunc i32 %div6 to i8
+  %conv9 = sext i8 %conv7 to i32
+  %cmp = icmp eq i32 %conv9, -1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  ret i32 0
+
+if.end:                                           ; preds = %entry
+  call void @abort() noreturn
+  unreachable
+}
+
+declare void @abort() noreturn
+
+declare void @exit(i32) noreturn
+
+; DAG Combiner can't fold this into a load of the 1'th byte.
+; PR8757
+define i32 @test3(i32 *%P) nounwind ssp {
+  volatile store i32 128, i32* %P
+  %tmp4.pre = load i32* %P
+  %phitmp = trunc i32 %tmp4.pre to i16
+  %phitmp13 = shl i16 %phitmp, 8
+  %phitmp14 = ashr i16 %phitmp13, 8
+  %phitmp15 = lshr i16 %phitmp14, 8
+  %phitmp16 = zext i16 %phitmp15 to i32
+  ret i32 %phitmp16
+  
+; CHECK: movl	$128, (%rdi)
+; CHECK-NEXT: movsbl	(%rdi), %eax
+; CHECK-NEXT: movzbl	%ah, %eax
+; CHECK-NEXT: ret
+}
diff --git a/final/test/CodeGen/X86/narrow_op-1.ll b/final/test/CodeGen/X86/narrow_op-1.ll
new file mode 100644
index 00000000000..18f110821bd
--- /dev/null
+++ b/final/test/CodeGen/X86/narrow_op-1.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86-64 | grep orb | count 1
+; RUN: llc < %s -march=x86-64 | grep orb | grep 1
+; RUN: llc < %s -march=x86-64 | grep orl | count 1
+; RUN: llc < %s -march=x86-64 | grep orl | grep 16842752
+
+	%struct.bf = type { i64, i16, i16, i32 }
+@bfi = common global %struct.bf zeroinitializer, align 16
+
+define void @t1() nounwind optsize ssp {
+entry:
+	%0 = load i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+	%1 = or i32 %0, 65536
+	store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+	ret void
+}
+
+define void @t2() nounwind optsize ssp {
+entry:
+	%0 = load i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+	%1 = or i32 %0, 16842752
+	store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+	ret void
+}
diff --git a/final/test/CodeGen/X86/neg-shl-add.ll b/final/test/CodeGen/X86/neg-shl-add.ll
new file mode 100644
index 00000000000..7aebc383dde
--- /dev/null
+++ b/final/test/CodeGen/X86/neg-shl-add.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=x86-64 < %s | not grep negq
+
+; These sequences don't need neg instructions; they can be done with
+; a single shift and sub each.
+
+define i64 @foo(i64 %x, i64 %y, i64 %n) nounwind {
+  %a = sub i64 0, %y
+  %b = shl i64 %a, %n
+  %c = add i64 %b, %x
+  ret i64 %c
+}
+define i64 @boo(i64 %x, i64 %y, i64 %n) nounwind {
+  %a = sub i64 0, %y
+  %b = shl i64 %a, %n
+  %c = add i64 %x, %b
+  ret i64 %c
+}
diff --git a/final/test/CodeGen/X86/neg_fp.ll b/final/test/CodeGen/X86/neg_fp.ll
new file mode 100644
index 00000000000..57164f2bcaf
--- /dev/null
+++ b/final/test/CodeGen/X86/neg_fp.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
+; RUN: grep xorps %t | count 1
+
+; Test that when we don't -enable-unsafe-fp-math, we don't do the optimization
+; -0 - (A - B) to (B - A) because A==B, -0 != 0
+
+define float @negfp(float %a, float %b) {
+entry:
+	%sub = fsub float %a, %b		; <float> [#uses=1]
+	%neg = fsub float -0.000000e+00, %sub		; <float> [#uses=1]
+	ret float %neg
+}
diff --git a/final/test/CodeGen/X86/negate-add-zero.ll b/final/test/CodeGen/X86/negate-add-zero.ll
new file mode 100644
index 00000000000..c3f412e09ae
--- /dev/null
+++ b/final/test/CodeGen/X86/negate-add-zero.ll
@@ -0,0 +1,1145 @@
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86 | not grep xor
+; PR3374
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+	%struct.AtomList = type { %"struct.CDSListRep<IVMAtom*>"* }
+	%struct.AtomTree = type { %struct.IVM*, %"struct.CDSList<CDSList<HingeNode*> >" }
+	%"struct.CDS::DefaultAlloc" = type <{ i8 }>
+	%"struct.CDS::SingularError" = type { %"struct.CDS::exception" }
+	%"struct.CDS::auto_ptr<IVMAtom>" = type { %struct.IVMAtom* }
+	%"struct.CDS::exception" = type { [300 x i8] }
+	%"struct.CDSList<CDSList<HingeNode*> >" = type { %"struct.CDSListRep<CDSList<HingeNode*> >"* }
+	%"struct.CDSList<CDSList<int> >" = type { %"struct.CDSListRep<CDSList<int> >"* }
+	%"struct.CDSList<HingeNode*>" = type { %"struct.CDSListRep<HingeNode*>"* }
+	%"struct.CDSList<InternalDynamics::HingeSpec>" = type { %"struct.CDSListRep<InternalDynamics::HingeSpec>"* }
+	%"struct.CDSList<Loop>" = type { %"struct.CDSListRep<Loop>"* }
+	%"struct.CDSList<Pair<int, int> >" = type { %"struct.CDSListRep<Pair<int, int> >"* }
+	%"struct.CDSList<int>" = type { %"struct.CDSListRep<int>"* }
+	%"struct.CDSListRep<CDSList<HingeNode*> >" = type opaque
+	%"struct.CDSListRep<CDSList<int> >" = type opaque
+	%"struct.CDSListRep<HingeNode*>" = type { i32, i32, %struct.HingeNode**, i32 }
+	%"struct.CDSListRep<IVMAtom*>" = type { i32, i32, %struct.IVMAtom**, i32 }
+	%"struct.CDSListRep<InternalDynamics::HingeSpec>" = type opaque
+	%"struct.CDSListRep<Loop>" = type opaque
+	%"struct.CDSListRep<Pair<int, int> >" = type opaque
+	%"struct.CDSListRep<int>" = type { i32, i32, i32*, i32 }
+	%"struct.CDSMatrixBase<double>" = type { %"struct.CDSMatrixRep<double>"* }
+	%"struct.CDSMatrixRep<double>" = type opaque
+	%"struct.CDSStringRep<char>" = type { i8*, i32, i32, i32, i32 }
+	%"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>" = type { %"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>" }
+	%"struct.CDSVector<double,0,CDS::DefaultAlloc>" = type { %"struct.CDSVectorBase<double,CDS::DefaultAlloc>" }
+	%"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>" = type { %"struct.CDSVectorRep<Vec3,CDS::DefaultAlloc>"* }
+	%"struct.CDSVectorBase<double,CDS::DefaultAlloc>" = type { %"struct.CDSVectorRep<double,CDS::DefaultAlloc>"* }
+	%"struct.CDSVectorRep<Vec3,CDS::DefaultAlloc>" = type { i32, %"struct.CDS::DefaultAlloc", %struct.Vec3*, i32 }
+	%"struct.CDSVectorRep<double,CDS::DefaultAlloc>" = type { i32, %"struct.CDS::DefaultAlloc", double*, i32 }
+	%"struct.FixedMatrix<double,1,1,0,0>" = type { %"struct.FixedMatrixBase<double,1,1>" }
+	%"struct.FixedMatrix<double,1,3,0,0>" = type { %"struct.FixedMatrixBase<double,1,3>" }
+	%"struct.FixedMatrix<double,1,6,0,0>" = type { %"struct.FixedMatrixBase<double,1,6>" }
+	%"struct.FixedMatrix<double,2,2,0,0>" = type { %"struct.FixedMatrixBase<double,2,2>" }
+	%"struct.FixedMatrix<double,2,6,0,0>" = type { %"struct.FixedMatrixBase<double,2,6>" }
+	%"struct.FixedMatrix<double,3,3,0,0>" = type { %"struct.FixedMatrixBase<double,3,3>" }
+	%"struct.FixedMatrix<double,3,6,0,0>" = type { %"struct.FixedMatrixBase<double,3,6>" }
+	%"struct.FixedMatrix<double,5,5,0,0>" = type { %"struct.FixedMatrixBase<double,5,5>" }
+	%"struct.FixedMatrix<double,5,6,0,0>" = type { %"struct.FixedMatrixBase<double,5,6>" }
+	%"struct.FixedMatrixBase<double,1,1>" = type { [1 x double] }
+	%"struct.FixedMatrixBase<double,1,3>" = type { [3 x double] }
+	%"struct.FixedMatrixBase<double,1,6>" = type { [6 x double] }
+	%"struct.FixedMatrixBase<double,2,2>" = type { [4 x double] }
+	%"struct.FixedMatrixBase<double,2,6>" = type { [12 x double] }
+	%"struct.FixedMatrixBase<double,3,3>" = type { [9 x double] }
+	%"struct.FixedMatrixBase<double,3,6>" = type { [18 x double] }
+	%"struct.FixedMatrixBase<double,5,5>" = type { [25 x double] }
+	%"struct.FixedMatrixBase<double,5,6>" = type { [30 x double] }
+	%"struct.FixedMatrixBase<double,6,6>" = type { [36 x double] }
+	%"struct.FixedVector<double,2,0>" = type { %"struct.FixedVectorBase<double,2>" }
+	%"struct.FixedVector<double,5,0>" = type { %"struct.FixedVectorBase<double,5>" }
+	%"struct.FixedVectorBase<double,2>" = type { [2 x double] }
+	%"struct.FixedVectorBase<double,5>" = type { [5 x double] }
+	%struct.HNodeOrigin = type { %struct.HingeNode }
+	%struct.HNodeRotate2 = type { %"struct.HingeNodeSpec<2>", %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Mat3, %struct.Mat3, %struct.Vec3, %"struct.CDS::auto_ptr<IVMAtom>", %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>" }
+	%struct.HNodeRotate3 = type { %"struct.HingeNodeSpec<3>", %struct.Vec4, %struct.Vec4, %struct.Vec4, %struct.Vec3, %"struct.CDS::auto_ptr<IVMAtom>", %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>", double, double, double, double, double, double, i8 }
+	%struct.HNodeTorsion = type { %"struct.HingeNodeSpec<1>", %struct.Vec3, %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>", %struct.Vec3, %struct.Mat3 }
+	%struct.HNodeTranslate = type { %"struct.HingeNodeSpec<3>", %struct.IVMAtom*, %struct.Vec3, %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>" }
+	%struct.HNodeTranslateRotate2 = type { %"struct.HingeNodeSpec<5>", %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Vec3, %struct.Mat3, %struct.Mat3, %struct.Vec3, %"struct.CDS::auto_ptr<IVMAtom>", %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>" }
+	%struct.HNodeTranslateRotate3 = type { %"struct.HingeNodeSpec<6>", %struct.Vec4, %struct.Vec4, %struct.Vec4, %struct.Vec3, %"struct.CDS::auto_ptr<IVMAtom>", %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>", double, double, double, double, double, double, i8 }
+	%struct.HingeNode = type { i32 (...)**, %struct.HingeNode*, %"struct.CDSList<HingeNode*>", i32, %struct.AtomList, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %struct.PhiMatrix, %struct.Mat6, %struct.Mat6, %"struct.FixedMatrix<double,1,6,0,0>", %struct.Mat6, %"struct.FixedMatrix<double,1,6,0,0>", %struct.Mat3, %struct.Mat6, %struct.IVM*, %struct.IVMAtom* }
+	%"struct.HingeNodeSpec<1>" = type { %struct.HingeNode, i32, double, %struct.InertiaTensor, %struct.Mat6, %struct.Vec3, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,1,0,0>", %"struct.FixedMatrix<double,1,6,0,0>" }
+	%"struct.HingeNodeSpec<2>" = type { %struct.HingeNode, i32, double, %struct.InertiaTensor, %struct.Mat6, %struct.Vec3, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedMatrix<double,2,6,0,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedVector<double,2,0>", %"struct.FixedMatrix<double,2,2,0,0>", %"struct.FixedMatrix<double,2,6,0,0>" }
+	%"struct.HingeNodeSpec<3>" = type { %struct.HingeNode, i32, double, %struct.InertiaTensor, %struct.Mat6, %struct.Vec3, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,3,6,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,1,3,0,0>", %"struct.FixedMatrix<double,3,3,0,0>", %"struct.FixedMatrix<double,3,6,0,0>" }
+	%"struct.HingeNodeSpec<5>" = type { %struct.HingeNode, i32, double, %struct.InertiaTensor, %struct.Mat6, %struct.Vec3, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedMatrix<double,5,6,0,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedVector<double,5,0>", %"struct.FixedMatrix<double,5,5,0,0>", %"struct.FixedMatrix<double,5,6,0,0>" }
+	%"struct.HingeNodeSpec<6>" = type { %struct.HingeNode, i32, double, %struct.InertiaTensor, %struct.Mat6, %struct.Vec3, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %struct.Mat6, %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %"struct.FixedMatrix<double,1,6,0,0>", %struct.Mat6, %struct.Mat6 }
+	%struct.IVM = type { i32 (...)**, %struct.AtomTree*, %struct.Integrator*, %struct.LengthConstraints*, i32, i32, i32, i8, i8, i8, i8, double, double, double, double, double, double, double, double, double, i32, double, double, double, double, double, double, %"struct.CDSList<Loop>", %"struct.CDSList<Pair<int, int> >", %struct.AtomList, %"struct.CDSList<CDSList<int> >", %"struct.CDSList<InternalDynamics::HingeSpec>", %struct.String, %"struct.CDSList<int>", i32 (%"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)*, double (%"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)*, i32 (%"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>"*)*, double (%"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<Vec3,0,CDS::DefaultAlloc>"*)* }
+	%struct.IVMAtom = type { i32, %struct.HingeNode*, %struct.AtomList, %struct.Vec3, %struct.Vec3, %struct.Vec3, double, double }
+	%struct.InertiaTensor = type { %struct.Mat3 }
+	%struct.Integrator = type { i32 (...)**, %"struct.CDSVector<double,0,CDS::DefaultAlloc>", %"struct.CDSVector<double,0,CDS::DefaultAlloc>", %struct.IVM* }
+	%"struct.InternalDynamics::HingeSpec" = type { %struct.String, i32, i32, %"struct.CDSList<int>" }
+	%struct.LengthConstraints = type { double, i32, i32, %struct.IVM*, %struct.LengthConstraintsPrivates* }
+	%struct.LengthConstraintsPrivates = type opaque
+	%struct.Mat3 = type { %"struct.FixedMatrix<double,3,3,0,0>" }
+	%struct.Mat6 = type { %"struct.FixedMatrixBase<double,6,6>" }
+	%"struct.MatrixTools::InverseResults<FullMatrix<double> >" = type { %"struct.CDSVector<double,0,CDS::DefaultAlloc>", i32 }
+	%struct.PhiMatrix = type { %struct.Vec3 }
+	%struct.PhiMatrixTranspose = type { %struct.PhiMatrix* }
+	%struct.RMat = type { %"struct.CDSMatrixBase<double>" }
+	%struct.String = type { %"struct.CDSStringRep<char>"* }
+	%"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >" = type { %struct.Mat6*, i32, i32, i32, i32 }
+	%"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >" = type { %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, i32, i32 }
+	%"struct.SubVector<FixedVector<double, 6, 0> >" = type { %"struct.FixedMatrix<double,1,6,0,0>"*, i32, i32 }
+	%struct.Vec3 = type { %"struct.FixedMatrix<double,1,3,0,0>" }
+	%struct.Vec4 = type { %"struct.FixedMatrix<double,2,2,0,0>" }
+	%struct.__class_type_info_pseudo = type { %struct.__type_info_pseudo }
+	%struct.__si_class_type_info_pseudo = type { %struct.__type_info_pseudo, %"struct.std::type_info"* }
+	%struct.__type_info_pseudo = type { i8*, i8* }
+	%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"* }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }
+	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
+	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+	%"struct.std::ios_base::_Words" = type { i8*, i32 }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+	%"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >" = type { %"struct.std::locale::facet" }
+	%"struct.std::type_info" = type { i32 (...)**, i8* }
+@_ZN9HingeNode7DEG2RADE = external constant double, align 8		; <double*> [#uses=0]
+@"\01LC" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC1" = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@"\01LC2" = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@"\01LC3" = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@"\01LC4" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC5" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC6" = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@"\01LC7" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC8" = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@"\01LC9" = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@"\01LC10" = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@_ZStL8__ioinit = external global %"struct.CDS::DefaultAlloc"		; <%"struct.CDS::DefaultAlloc"*> [#uses=0]
+@__dso_handle = external global i8*		; <i8**> [#uses=0]
+@_ZTIN9HingeNode17VirtualBaseMethodE = external constant %struct.__class_type_info_pseudo		; <%struct.__class_type_info_pseudo*> [#uses=0]
+@_ZTVN10__cxxabiv117__class_type_infoE = external constant [0 x i32 (...)*]		; <[0 x i32 (...)*]*> [#uses=0]
+@_ZTSN9HingeNode17VirtualBaseMethodE = external constant [32 x i8], align 4		; <[32 x i8]*> [#uses=0]
+@_ZTV9HingeNode = external constant [31 x i32 (...)*], align 32		; <[31 x i32 (...)*]*> [#uses=0]
+@_ZTI9HingeNode = external constant %struct.__class_type_info_pseudo		; <%struct.__class_type_info_pseudo*> [#uses=0]
+@_ZTS9HingeNode = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@_ZTV11HNodeOrigin = external constant [31 x i32 (...)*], align 32		; <[31 x i32 (...)*]*> [#uses=0]
+@_ZTI11HNodeOrigin = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTVN10__cxxabiv120__si_class_type_infoE = external constant [0 x i32 (...)*]		; <[0 x i32 (...)*]*> [#uses=0]
+@_ZTS11HNodeOrigin = external constant [14 x i8]		; <[14 x i8]*> [#uses=0]
+@_ZTV13HingeNodeSpecILi1EE = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI13HingeNodeSpecILi1EE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS13HingeNodeSpecILi1EE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZTV13HingeNodeSpecILi3EE = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI13HingeNodeSpecILi3EE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS13HingeNodeSpecILi3EE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZTV13HingeNodeSpecILi2EE = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI13HingeNodeSpecILi2EE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS13HingeNodeSpecILi2EE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZTV13HingeNodeSpecILi6EE = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI13HingeNodeSpecILi6EE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS13HingeNodeSpecILi6EE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZTV13HingeNodeSpecILi5EE = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI13HingeNodeSpecILi5EE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS13HingeNodeSpecILi5EE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZSt4cout = external global %"struct.std::basic_ostream<char,std::char_traits<char> >"		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+@"\01LC11" = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@"\01LC12" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC13" = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@_ZSt4cerr = external global %"struct.std::basic_ostream<char,std::char_traits<char> >"		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+@"\01LC14" = external constant [29 x i8]		; <[29 x i8]*> [#uses=0]
+@"\01LC15" = external constant [11 x i8]		; <[11 x i8]*> [#uses=0]
+@"\01LC16" = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+@"\01LC17" = external constant [21 x i8]		; <[21 x i8]*> [#uses=0]
+@"\01LC18" = external constant [8 x i8]		; <[8 x i8]*> [#uses=0]
+@"\01LC19" = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@"\01LC20" = external constant [42 x i8]		; <[42 x i8]*> [#uses=0]
+@_ZTIN16InternalDynamics9ExceptionE = external constant %struct.__class_type_info_pseudo		; <%struct.__class_type_info_pseudo*> [#uses=0]
+@_ZTSN16InternalDynamics9ExceptionE = external constant [31 x i8], align 4		; <[31 x i8]*> [#uses=0]
+@_ZTIN3CDS13SingularErrorE = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTSN3CDS13SingularErrorE = external constant [22 x i8]		; <[22 x i8]*> [#uses=0]
+@_ZTIN3CDS9exceptionE = external constant %struct.__class_type_info_pseudo		; <%struct.__class_type_info_pseudo*> [#uses=0]
+@_ZTSN3CDS9exceptionE = external constant [17 x i8]		; <[17 x i8]*> [#uses=0]
+@_ZTV12HNodeTorsion = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI12HNodeTorsion = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS12HNodeTorsion = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@_ZTV12HNodeRotate3 = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI12HNodeRotate3 = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS12HNodeRotate3 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@_ZTV12HNodeRotate2 = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI12HNodeRotate2 = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS12HNodeRotate2 = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@_ZTV21HNodeTranslateRotate3 = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI21HNodeTranslateRotate3 = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS21HNodeTranslateRotate3 = external constant [24 x i8]		; <[24 x i8]*> [#uses=0]
+@_ZTV21HNodeTranslateRotate2 = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI21HNodeTranslateRotate2 = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS21HNodeTranslateRotate2 = external constant [24 x i8]		; <[24 x i8]*> [#uses=0]
+@_ZTV14HNodeTranslate = external constant [33 x i32 (...)*], align 32		; <[33 x i32 (...)*]*> [#uses=0]
+@_ZTI14HNodeTranslate = external constant %struct.__si_class_type_info_pseudo		; <%struct.__si_class_type_info_pseudo*> [#uses=0]
+@_ZTS14HNodeTranslate = external constant [17 x i8]		; <[17 x i8]*> [#uses=0]
+@"\01LC21" = external constant [31 x i8]		; <[31 x i8]*> [#uses=0]
+@"\01LC22" = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@"\01LC23" = external constant [12 x i8]		; <[12 x i8]*> [#uses=0]
+@"\01LC24" = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@"\01LC25" = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@"\01LC26" = external constant [7 x i8]		; <[7 x i8]*> [#uses=0]
+@"\01LC27" = external constant [43 x i8]		; <[43 x i8]*> [#uses=0]
+@"\01LC28" = external constant [15 x i8]		; <[15 x i8]*> [#uses=0]
+@"\01LC29" = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@"\01LC30" = external constant [41 x i8]		; <[41 x i8]*> [#uses=0]
+@llvm.global_ctors = external global [1 x { i32, void ()* }]		; <[1 x { i32, void ()* }]*> [#uses=0]
+
+declare void @_GLOBAL__I__ZN9HingeNode7DEG2RADE() section "__TEXT,__StaticInit,regular,pure_instructions"
+
+declare void @_ZN9HingeNode16velFromCartesianEv(%struct.HingeNode*) nounwind
+
+declare i32 @_ZNK9HingeNode6offsetEv(%struct.HingeNode*) nounwind
+
+declare i32 @_ZNK9HingeNode6getDOFEv(%struct.HingeNode*) nounwind
+
+declare i32 @_ZNK9HingeNode6getDimEv(%struct.HingeNode*) nounwind
+
+declare double @_ZN9HingeNode8kineticEEv(%struct.HingeNode*) nounwind
+
+declare double @_ZN9HingeNode8approxKEEv(%struct.HingeNode*) nounwind
+
+declare i8* @_ZN9HingeNode4typeEv(%struct.HingeNode*) nounwind
+
+declare i8* @_ZN11HNodeOrigin4typeEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin5calcPEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin5calcZEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin9calcPandZEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin9calcAccelEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin17calcInternalForceEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin18prepareVelInternalEv(%struct.HNodeOrigin*) nounwind
+
+declare void @_ZN11HNodeOrigin13propagateSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeOrigin*, %"struct.FixedMatrix<double,1,6,0,0>"*) nounwind
+
+declare void @_ZN11HNodeOrigin9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeOrigin*, %"struct.FixedMatrix<double,1,6,0,0>"*) nounwind
+
+declare void @_ZN11HNodeOrigin18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin5printEi(%struct.HNodeOrigin*, i32) nounwind
+
+declare void @_ZN11HNodeOrigin6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeOrigin*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_ZN11HNodeOrigin5calcYEv(%struct.HNodeOrigin*) nounwind
+
+declare i8* @_ZN14HNodeTranslate4typeEv(%struct.HNodeTranslate*) nounwind
+
+declare i8* @_ZN21HNodeTranslateRotate34typeEv(%struct.HNodeTranslateRotate3*) nounwind
+
+declare i32 @_ZNK21HNodeTranslateRotate36getDimEv(%struct.HNodeTranslateRotate3*) nounwind
+
+declare i8* @_ZN12HNodeRotate34typeEv(%struct.HNodeRotate3*) nounwind
+
+declare i32 @_ZNK12HNodeRotate36getDimEv(%struct.HNodeRotate3*) nounwind
+
+declare i8* @_ZN12HNodeRotate24typeEv(%struct.HNodeRotate2*) nounwind
+
+declare i32 @_ZNK12HNodeRotate26getDimEv(%struct.HNodeRotate2*) nounwind
+
+declare i8* @_ZN21HNodeTranslateRotate24typeEv(%struct.HNodeTranslateRotate2*) nounwind
+
+declare i32 @_ZNK21HNodeTranslateRotate26getDimEv(%struct.HNodeTranslateRotate2*) nounwind
+
+declare i8* @_ZN12HNodeTorsion4typeEv(%struct.HNodeTorsion*) nounwind
+
+declare fastcc double @_ZL12sumMassToTipPK9HingeNode(%struct.HingeNode*)
+
+declare void @_ZN13InertiaTensor4calcERK4Vec3RK7CDSListIP7IVMAtomE(%struct.InertiaTensor*, %struct.Vec3*, %struct.AtomList*) nounwind
+
+declare fastcc double @_ZL15sumInertiaToTipPK9HingeNodeRK4Vec3S4_(%struct.HingeNode*, %struct.Vec3*, %struct.Vec3*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsI11FixedVectorIdLi6ELi0EEERSoS2_RK9SubVectorIT_E(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.SubVector<FixedVector<double, 6, 0> >"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_St5_Setw(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i32)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, double)
+
+declare void @_Z14orthoTransformIdLi3ELi3EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%"struct.FixedMatrix<double,3,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,3,0,0>"*, %"struct.FixedMatrix<double,3,3,0,0>"*)
+
+declare void @_ZN12HNodeRotate27calcRotEv(%struct.HNodeRotate2*)
+
+declare void @_ZN21HNodeTranslateRotate27calcRotEv(%struct.HNodeTranslateRotate2*)
+
+declare void @_ZmlIdLi6ELi6EE11FixedVectorIT_XT0_ELi0EERK11FixedMatrixIS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_ELi0EE(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZmlIdLi6ELi6ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%struct.Mat6* noalias sret, %struct.Mat6*, %struct.Mat6*)
+
+declare void @_ZmlIdLi6ELi6ELi3EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,3,6,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare void @_ZmlIdLi6ELi6ELi2EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,2,6,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare void @_ZmlIdLi5ELi6EE11FixedVectorIT_XT0_ELi0EERK11FixedMatrixIS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_ELi0EE(%"struct.FixedVector<double,5,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZmlIdLi6ELi6ELi5EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,5,6,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_ZN12HNodeRotate39setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate29setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate39setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate29setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi1EE6offsetEv(%"struct.HingeNodeSpec<1>"*) nounwind
+
+declare %struct.Vec3* @_ZNK13HingeNodeSpecILi1EE5posCMEv(%"struct.HingeNodeSpec<1>"*) nounwind
+
+declare double* @_ZNK13HingeNodeSpecILi1EE4massEv(%"struct.HingeNodeSpec<1>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi1EE9calcPandZEv(%"struct.HingeNodeSpec<1>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi1EE6getDOFEv(%"struct.HingeNodeSpec<1>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi1EE6getDimEv(%"struct.HingeNodeSpec<1>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi1EE18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi5EE6offsetEv(%"struct.HingeNodeSpec<5>"*) nounwind
+
+declare %struct.Vec3* @_ZNK13HingeNodeSpecILi5EE5posCMEv(%"struct.HingeNodeSpec<5>"*) nounwind
+
+declare double* @_ZNK13HingeNodeSpecILi5EE4massEv(%"struct.HingeNodeSpec<5>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi5EE9calcPandZEv(%"struct.HingeNodeSpec<5>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi5EE6getDOFEv(%"struct.HingeNodeSpec<5>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi5EE6getDimEv(%"struct.HingeNodeSpec<5>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi5EE18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi2EE6offsetEv(%"struct.HingeNodeSpec<2>"*) nounwind
+
+declare %struct.Vec3* @_ZNK13HingeNodeSpecILi2EE5posCMEv(%"struct.HingeNodeSpec<2>"*) nounwind
+
+declare double* @_ZNK13HingeNodeSpecILi2EE4massEv(%"struct.HingeNodeSpec<2>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi2EE9calcPandZEv(%"struct.HingeNodeSpec<2>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi2EE6getDOFEv(%"struct.HingeNodeSpec<2>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi2EE6getDimEv(%"struct.HingeNodeSpec<2>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi2EE18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi3EE6offsetEv(%"struct.HingeNodeSpec<3>"*) nounwind
+
+declare %struct.Vec3* @_ZNK13HingeNodeSpecILi3EE5posCMEv(%"struct.HingeNodeSpec<3>"*) nounwind
+
+declare double* @_ZNK13HingeNodeSpecILi3EE4massEv(%"struct.HingeNodeSpec<3>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi3EE9calcPandZEv(%"struct.HingeNodeSpec<3>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi3EE6getDOFEv(%"struct.HingeNodeSpec<3>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi6EE6offsetEv(%"struct.HingeNodeSpec<6>"*) nounwind
+
+declare %struct.Vec3* @_ZNK13HingeNodeSpecILi6EE5posCMEv(%"struct.HingeNodeSpec<6>"*) nounwind
+
+declare double* @_ZNK13HingeNodeSpecILi6EE4massEv(%"struct.HingeNodeSpec<6>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi6EE9calcPandZEv(%"struct.HingeNodeSpec<6>"*)
+
+declare i32 @_ZNK13HingeNodeSpecILi6EE6getDOFEv(%"struct.HingeNodeSpec<6>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi6EE6getDimEv(%"struct.HingeNodeSpec<6>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi6EE9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES6_(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare i32 @_ZNK13HingeNodeSpecILi3EE6getDimEv(%"struct.HingeNodeSpec<3>"*) nounwind
+
+declare void @_ZN13HingeNodeSpecILi3EE9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES6_(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*) nounwind
+
+declare void @_Z14orthoTransformIdLi6ELi6EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%struct.Mat6* noalias sret, %struct.Mat6*, %struct.Mat6*)
+
+declare double @_ZN13HingeNodeSpecILi1EE8kineticEEv(%"struct.HingeNodeSpec<1>"*)
+
+declare double @_ZN13HingeNodeSpecILi3EE8kineticEEv(%"struct.HingeNodeSpec<3>"*)
+
+declare double @_ZN13HingeNodeSpecILi2EE8kineticEEv(%"struct.HingeNodeSpec<2>"*)
+
+declare double @_ZN13HingeNodeSpecILi6EE8kineticEEv(%"struct.HingeNodeSpec<6>"*)
+
+declare double @_ZN13HingeNodeSpecILi5EE8kineticEEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZmlIdLi6ELi5ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%struct.Mat6* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES6_(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES6_(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES6_(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_Z14orthoTransformIdLi3ELi6EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%struct.Mat6* noalias sret, %"struct.FixedMatrix<double,3,3,0,0>"*, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare void @_ZmlIdLi6ELi1ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%struct.Mat6* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZmlIdLi6ELi5ELi5EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,5,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*, %"struct.FixedMatrix<double,5,5,0,0>"*)
+
+declare void @_Z14orthoTransformIdLi5ELi6EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%struct.Mat6* noalias sret, %"struct.FixedMatrix<double,5,5,0,0>"*, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_Z14orthoTransformIdLi2ELi6EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%struct.Mat6* noalias sret, %"struct.FixedMatrix<double,2,2,0,0>"*, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare void @_ZmlIdLi1ELi6ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*, %struct.Mat6*)
+
+declare void @_ZmlIdLi5ELi6ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,5,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*, %struct.Mat6*)
+
+declare void @_Z14orthoTransformIdLi6ELi5EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%"struct.FixedMatrix<double,5,5,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_ZmlIdLi2ELi6ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,2,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,2,6,0,0>"*, %struct.Mat6*)
+
+declare void @_Z14orthoTransformIdLi6ELi2EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%"struct.FixedMatrix<double,2,2,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare void @_ZmlIdLi3ELi6ELi6EE11FixedMatrixIT_XT0_EXT2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT1_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,3,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,6,0,0>"*, %struct.Mat6*)
+
+declare void @_Z14orthoTransformIdLi6ELi3EE11FixedMatrixIT_XT1_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT0_ELi0ELi0EERKS0_IS1_XT1_EXT0_ELi0ELi0EE(%"struct.FixedMatrix<double,3,3,0,0>"* noalias sret, %struct.Mat6*, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare void @_ZNSt8ios_base4InitC1Ev(%"struct.CDS::DefaultAlloc"*)
+
+declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*) nounwind
+
+declare void @__tcf_0(i8* nocapture)
+
+declare void @_ZNSt8ios_base4InitD1Ev(%"struct.CDS::DefaultAlloc"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsRSoRK9HingeNode(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %struct.HingeNode*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsRSoPK7IVMAtom(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %struct.IVMAtom*)
+
+declare void @_ZN9HingeNode8addChildEPS_(%struct.HingeNode*, %struct.HingeNode*)
+
+declare void @_ZN7CDSListIP9HingeNodeE6appendES1_(%"struct.CDSList<HingeNode*>"*, %struct.HingeNode*)
+
+declare void @_ZN9HingeNode4getHEv(%struct.RMat* noalias sret, %struct.HingeNode*)
+
+declare i8* @__cxa_allocate_exception(i32) nounwind
+
+declare void @__cxa_throw(i8*, i8*, void (i8*)*) noreturn
+
+declare void @_ZN9HingeNode16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode9calcAccelEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode5printEi(%struct.HingeNode*, i32)
+
+declare void @_ZN9HingeNode18enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HingeNode*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN9HingeNode6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode9setPosVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEES5_(%struct.HingeNode*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN9HingeNode13propagateSVelERK11FixedVectorIdLi6ELi0EE(%struct.HingeNode*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN9HingeNode18prepareVelInternalEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode17calcInternalForceEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode5calcYEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode9calcPandZEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode5calcZEv(%struct.HingeNode*)
+
+declare void @_ZN9HingeNode5calcPEv(%struct.HingeNode*)
+
+declare double* @_ZNK9HingeNode4massEv(%struct.HingeNode*)
+
+declare %struct.Vec3* @_ZNK9HingeNode5posCMEv(%struct.HingeNode*)
+
+declare i8* @_Znam(i32)
+
+declare void @_ZN7CDSListIP9HingeNodeEC1Eii(%"struct.CDSList<HingeNode*>"*, i32, i32)
+
+declare i8* @_Znwm(i32)
+
+declare i8* @llvm.eh.exception() nounwind
+
+declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_Unwind_Resume_or_Rethrow(i8*)
+
+declare void @_ZN7CDSListIP7IVMAtomEC1Eii(%struct.AtomList*, i32, i32)
+
+declare void @_ZN13CDSVectorBaseIdN3CDS12DefaultAllocEE8splitRepEv(%"struct.CDSVectorBase<double,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeTorsion16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTorsion*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate316getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate216getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate28getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate26getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate26getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate38getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate36getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate36getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate316getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate38getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate36getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate36getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate216getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate28getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate26getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate26getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE16getInternalForceER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE8getAccelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE6getVelER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE6getPosER9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13CDSVectorBaseI4Vec3N3CDS12DefaultAllocEE8splitRepEv(%"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>"*)
+
+declare void @_ZN7CDSListIP7IVMAtomE8splitRepEv(%struct.AtomList*)
+
+declare void @_ZN7CDSListIP9HingeNodeE8splitRepEv(%"struct.CDSList<HingeNode*>"*)
+
+declare void @_ZdaPv(i8*) nounwind
+
+declare void @_ZSt9terminatev() noreturn nounwind
+
+declare void @_ZN9HingeNodeC2EPK3IVMP7IVMAtomPKS3_PS_(%struct.HingeNode*, %struct.IVM*, %struct.IVMAtom*, %struct.IVMAtom*, %struct.HingeNode*)
+
+declare void @_ZN9HingeNodeD1Ev(%struct.HingeNode*)
+
+declare void @_ZN9HingeNodeD0Ev(%struct.HingeNode*)
+
+declare void @_ZN7CDSListIP7IVMAtomE6appendES1_(%struct.AtomList*, %struct.IVMAtom*)
+
+declare void @_ZN9HingeNodeC1EPK3IVMP7IVMAtomPKS3_PS_(%struct.HingeNode*, %struct.IVM*, %struct.IVMAtom*, %struct.IVMAtom*, %struct.HingeNode*)
+
+declare void @_ZN9HingeNodeD2Ev(%struct.HingeNode*)
+
+declare void @_ZN11HNodeOriginD0Ev(%struct.HNodeOrigin*)
+
+declare void @_ZN11HNodeOriginD1Ev(%struct.HNodeOrigin*)
+
+declare void @_ZN13HingeNodeSpecILi1EED0Ev(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EED1Ev(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE5calcPEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE5calcZEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE5calcYEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE17calcInternalForceEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE18prepareVelInternalEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE13propagateSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<1>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare double @_ZN13HingeNodeSpecILi1EE8approxKEEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<1>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<1>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE5printEi(%"struct.HingeNodeSpec<1>"*, i32)
+
+declare void @_ZN13HingeNodeSpecILi1EE9calcAccelEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE4getHEv(%struct.RMat* noalias sret, %"struct.HingeNodeSpec<1>"*)
+
+declare void @__cxa_pure_virtual() nounwind
+
+declare void @_ZN13HingeNodeSpecILi3EED0Ev(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EED1Ev(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE5calcPEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE5calcZEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE5calcYEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE17calcInternalForceEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE18prepareVelInternalEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE13propagateSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<3>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare double @_ZN13HingeNodeSpecILi3EE8approxKEEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<3>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<3>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE5printEi(%"struct.HingeNodeSpec<3>"*, i32)
+
+declare void @_ZN13HingeNodeSpecILi3EE9calcAccelEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE4getHEv(%struct.RMat* noalias sret, %"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EED0Ev(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EED1Ev(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE5calcPEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE5calcZEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE5calcYEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE17calcInternalForceEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE18prepareVelInternalEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE13propagateSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<2>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare double @_ZN13HingeNodeSpecILi2EE8approxKEEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<2>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<2>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE5printEi(%"struct.HingeNodeSpec<2>"*, i32)
+
+declare void @_ZN13HingeNodeSpecILi2EE9calcAccelEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE4getHEv(%struct.RMat* noalias sret, %"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EED0Ev(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EED1Ev(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE5calcPEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE5calcZEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE5calcYEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE17calcInternalForceEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE18prepareVelInternalEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE13propagateSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<6>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare double @_ZN13HingeNodeSpecILi6EE8approxKEEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<6>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<6>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE5printEi(%"struct.HingeNodeSpec<6>"*, i32)
+
+declare void @_ZN13HingeNodeSpecILi6EE9calcAccelEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE4getHEv(%struct.RMat* noalias sret, %"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EED0Ev(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EED1Ev(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE5calcPEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE5calcZEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE5calcYEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE17calcInternalForceEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE18prepareVelInternalEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE13propagateSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<5>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare double @_ZN13HingeNodeSpecILi5EE8approxKEEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE6setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%"struct.HingeNodeSpec<5>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE14setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%"struct.HingeNodeSpec<5>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE5printEi(%"struct.HingeNodeSpec<5>"*, i32)
+
+declare void @_ZN13HingeNodeSpecILi5EE9calcAccelEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE4getHEv(%struct.RMat* noalias sret, %"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN12HNodeTorsion7calcRotEv(%struct.HNodeTorsion*)
+
+declare double @sin(double) nounwind readnone
+
+declare double @cos(double) nounwind readnone
+
+declare void @_ZN12HNodeRotate37calcRotEv(%struct.HNodeRotate3*)
+
+declare void @_ZN21HNodeTranslateRotate37calcRotEv(%struct.HNodeTranslateRotate3*)
+
+declare void @_ZN9HingeNodeC2ERKS_(%struct.HingeNode*, %struct.HingeNode*)
+
+declare void @_ZN7CDSListIP9HingeNodeEC1ERKS2_(%"struct.CDSList<HingeNode*>"*, %"struct.CDSList<HingeNode*>"*)
+
+declare void @_ZN7CDSListIP7IVMAtomEC1ERKS2_(%struct.AtomList*, %struct.AtomList*)
+
+declare void @_ZN11HNodeOriginC2EPK9HingeNode(%struct.HNodeOrigin*, %struct.HingeNode*)
+
+declare void @_ZN13HingeNodeSpecILi1EEC2EPK9HingeNodeRi(%"struct.HingeNodeSpec<1>"*, %struct.HingeNode*, i32*)
+
+declare void @_ZN13HingeNodeSpecILi3EEC2EPK9HingeNodeRi(%"struct.HingeNodeSpec<3>"*, %struct.HingeNode*, i32*)
+
+declare void @_ZN13HingeNodeSpecILi2EEC2EPK9HingeNodeRi(%"struct.HingeNodeSpec<2>"*, %struct.HingeNode*, i32*)
+
+declare void @_ZN13HingeNodeSpecILi6EEC2EPK9HingeNodeRi(%"struct.HingeNodeSpec<6>"*, %struct.HingeNode*, i32*)
+
+declare void @_ZN13HingeNodeSpecILi5EEC2EPK9HingeNodeRi(%"struct.HingeNodeSpec<5>"*, %struct.HingeNode*, i32*)
+
+declare void @_ZplI4Vec3K11FixedVectorIdLi6ELi0EEET_RK9SubVectorIT0_ERKS4_(%struct.Vec3* noalias sret, %"struct.SubVector<FixedVector<double, 6, 0> >"*, %struct.Vec3*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi1ELi6ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN12HNodeRotate314setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeRotate3*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN12HNodeRotate214setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeRotate2*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN21HNodeTranslateRotate314setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeTranslateRotate3*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN21HNodeTranslateRotate214setVelFromSVelERK11FixedVectorIdLi6ELi0EE(%struct.HNodeTranslateRotate2*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE9calcPropsEv(%"struct.HingeNodeSpec<1>"*)
+
+declare zeroext i8 @_ZNK3IVM12minimizationEv(%struct.IVM*)
+
+declare void @_Z8blockVecIdLi3ELi3EE11FixedVectorIT_XplT0_T1_ELi0EERKS0_IS1_XT0_ELi0EERKS0_IS1_XT1_ELi0EE(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,3,0,0>"*, %"struct.FixedMatrix<double,1,3,0,0>"*)
+
+declare void @_ZN12HNodeTorsion11toCartesianEv(%struct.HNodeTorsion*)
+
+declare void @_ZN13HingeNodeSpecILi1EE18calcCartesianForceEv(%"struct.HingeNodeSpec<1>"*)
+
+declare void @_ZN13HingeNodeSpecILi3EE18calcCartesianForceEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN13HingeNodeSpecILi2EE18calcCartesianForceEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE18calcCartesianForceEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE18calcCartesianForceEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN12HNodeTorsion5calcHEv(%struct.HNodeTorsion*)
+
+declare void @_Z10blockMat12IdLi1ELi3ELi3EE11FixedMatrixIT_XT0_EXplT1_T2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,3,0,0>"*, %"struct.FixedMatrix<double,1,3,0,0>"*)
+
+declare void @_ZN13CDSMatrixBaseIdEC2I11FixedMatrixIdLi1ELi6ELi0ELi0EEEERKT_(%"struct.CDSMatrixBase<double>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi1ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_St13_Setprecision(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i32)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi6EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,1,6>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_c(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8 signext)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi3EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,1,3>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi1EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,1,1>"*)
+
+declare void @_ZN11FixedVectorIdLi3ELi0EE6subColILi6ELi1ELi0ELi0EEES0_RK11FixedMatrixIdXT_EXT0_EXT1_EXT2_EEiii(%"struct.FixedMatrix<double,1,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*, i32, i32, i32)
+
+declare %"struct.FixedMatrixBase<double,6,6>"* @_ZN15FixedMatrixBaseIdLi6ELi6EEpLERKS0_(%"struct.FixedMatrixBase<double,6,6>"*, %"struct.FixedMatrixBase<double,6,6>"*)
+
+declare void @_ZN13HingeNodeSpecILi6EE9calcPropsEv(%"struct.HingeNodeSpec<6>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi6ELi0ELi0EEEENT_13TransposeTypeERKS3_(%struct.Mat6* noalias sret, %struct.Mat6*)
+
+declare void @_ZN21HNodeTranslateRotate311toCartesianEv(%struct.HNodeTranslateRotate3*)
+
+define linkonce void @_ZN21HNodeTranslateRotate36setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3* %this, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv) {
+entry:
+	%0 = add i32 0, -1		; <i32> [#uses=1]
+	%1 = getelementptr double* null, i32 %0		; <double*> [#uses=1]
+	%2 = load double* %1, align 8		; <double> [#uses=1]
+	%3 = load double* null, align 8		; <double> [#uses=2]
+	%4 = load double* null, align 8		; <double> [#uses=2]
+	%5 = load double* null, align 8		; <double> [#uses=3]
+	%6 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0		; <double*> [#uses=0]
+	%7 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 1		; <double*> [#uses=0]
+	%8 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=0]
+	%9 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 2, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=0]
+	%10 = load double* null, align 8		; <double> [#uses=2]
+	%11 = fsub double -0.000000e+00, %10		; <double> [#uses=1]
+	%12 = load double* null, align 8		; <double> [#uses=2]
+	%13 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 1, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=1]
+	%14 = load double* %13, align 8		; <double> [#uses=2]
+	%15 = fsub double -0.000000e+00, %14		; <double> [#uses=1]
+	%16 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 1, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%17 = load double* %16, align 8		; <double> [#uses=2]
+	%18 = fsub double -0.000000e+00, %17		; <double> [#uses=1]
+	%19 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 0		; <double*> [#uses=0]
+	%20 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 3		; <double*> [#uses=0]
+	%21 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 6		; <double*> [#uses=0]
+	%22 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 9		; <double*> [#uses=0]
+	%23 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 1		; <double*> [#uses=0]
+	%24 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 4		; <double*> [#uses=0]
+	%25 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 7		; <double*> [#uses=0]
+	%26 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 10		; <double*> [#uses=0]
+	%27 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=0]
+	%28 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 5		; <double*> [#uses=0]
+	%29 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 8		; <double*> [#uses=0]
+	%30 = getelementptr %"struct.FixedMatrix<double,2,6,0,0>"* null, i32 0, i32 0, i32 0, i32 11		; <double*> [#uses=0]
+	%31 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 0		; <double*> [#uses=0]
+	%32 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	%33 = getelementptr %"struct.FixedMatrix<double,1,3,0,0>"* null, i32 0, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	%34 = fmul double %17, %5		; <double> [#uses=1]
+	%35 = fadd double 0.000000e+00, %34		; <double> [#uses=1]
+	%36 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%37 = fmul double %14, %3		; <double> [#uses=1]
+	%38 = fadd double %36, %37		; <double> [#uses=1]
+	%39 = fmul double %12, %4		; <double> [#uses=1]
+	%40 = fadd double %38, %39		; <double> [#uses=1]
+	%41 = fmul double %5, %11		; <double> [#uses=1]
+	%42 = fadd double %40, %41		; <double> [#uses=2]
+	store double %42, double* %32, align 8
+	%43 = fmul double %2, %15		; <double> [#uses=1]
+	%44 = fadd double %43, 0.000000e+00		; <double> [#uses=1]
+	%45 = fmul double %3, %18		; <double> [#uses=1]
+	%46 = fadd double %44, %45		; <double> [#uses=1]
+	%47 = fmul double %10, %4		; <double> [#uses=1]
+	%48 = fadd double %46, %47		; <double> [#uses=1]
+	%49 = fmul double %12, %5		; <double> [#uses=1]
+	%50 = fadd double %48, %49		; <double> [#uses=2]
+	store double %50, double* %33, align 8
+	%51 = fmul double %35, 2.000000e+00		; <double> [#uses=1]
+	%52 = fmul double %42, 2.000000e+00		; <double> [#uses=1]
+	%53 = fmul double %50, 2.000000e+00		; <double> [#uses=1]
+	%54 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0		; <double*> [#uses=1]
+	store double %51, double* %54, align 8
+	%55 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 1		; <double*> [#uses=1]
+	store double %52, double* %55, align 8
+	%56 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 2		; <double*> [#uses=1]
+	store double %53, double* %56, align 8
+	%57 = add i32 0, 4		; <i32> [#uses=1]
+	%58 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 0		; <%"struct.CDSVector<double,0,CDS::DefaultAlloc>"**> [#uses=1]
+	store %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"** %58, align 8
+	%59 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 %57, i32* %59, align 4
+	%60 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 2		; <i32*> [#uses=1]
+	store i32 3, i32* %60, align 8
+	unreachable
+}
+
+declare void @_ZmlRK11FixedMatrixIdLi6ELi6ELi0ELi0EERK18PhiMatrixTranspose(%struct.Mat6* noalias sret, %struct.Mat6*, %struct.PhiMatrixTranspose*)
+
+declare void @_ZmlI4Mat3K11FixedMatrixIdLi6ELi6ELi0ELi0EEET_RK9SubMatrixIT0_ERKS4_(%struct.Mat3* noalias sret, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*, %struct.Mat3*)
+
+declare void @_ZmiI4Mat3K11FixedMatrixIdLi6ELi6ELi0ELi0EEET_RK9SubMatrixIT0_ERKS4_(%struct.Mat3* noalias sret, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*, %struct.Mat3*)
+
+declare %"struct.FixedMatrixBase<double,3,3>"* @_ZN15FixedMatrixBaseIdLi3ELi3EEmIERKS0_(%"struct.FixedMatrixBase<double,3,3>"*, %"struct.FixedMatrixBase<double,3,3>"*)
+
+declare void @_ZplI4Mat311FixedMatrixIdLi6ELi6ELi0ELi0EEET_RKS3_RK9SubMatrixIT0_E(%struct.Mat3* noalias sret, %struct.Mat3*, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*)
+
+declare void @_ZN13CDSVectorBaseIdN3CDS12DefaultAllocEED2Ev(%"struct.CDSVectorBase<double,CDS::DefaultAlloc>"*)
+
+declare void @_ZN13HingeNodeSpecILi1EE7calcD_GERK11FixedMatrixIdLi6ELi6ELi0ELi0EE(%"struct.HingeNodeSpec<1>"*, %struct.Mat6*)
+
+declare void @_ZN11MatrixTools7inverseI11FixedMatrixIdLi1ELi1ELi0ELi0EEEET_RKS3_NS_14InverseResultsINS3_10MatrixTypeEEE(%"struct.FixedMatrix<double,1,1,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,1,0,0>"*, %"struct.MatrixTools::InverseResults<FullMatrix<double> >"*)
+
+declare i8* @__cxa_get_exception_ptr(i8*) nounwind
+
+declare i8* @__cxa_begin_catch(i8*) nounwind
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi1ELi1EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,1,1>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi1ELi6EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,1,6>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEi(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i32)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIcERSoS0_RK9CDSStringIT_E(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %struct.String*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEPFRSoS_E(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.std::basic_ostream<char,std::char_traits<char> >"* (%"struct.std::basic_ostream<char,std::char_traits<char> >"*)*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_(%"struct.std::basic_ostream<char,std::char_traits<char> >"*)
+
+declare void @__cxa_end_catch()
+
+declare void @_ZmlI4Mat311FixedMatrixIdLi6ELi6ELi0ELi0EEET_RKS3_RK9SubMatrixIT0_E(%struct.Mat3* noalias sret, %struct.Mat3*, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*)
+
+declare void @_ZmlI4Mat311FixedMatrixIdLi6ELi6ELi0ELi0EEET_RK9SubMatrixIT0_ERKS3_(%struct.Mat3* noalias sret, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*, %struct.Mat3*)
+
+declare void @_ZmiI4Mat311FixedMatrixIdLi6ELi6ELi0ELi0EEET_RK9SubMatrixIT0_ERKS3_(%struct.Mat3* noalias sret, %"struct.SubMatrix<FixedMatrix<double, 6, 6, 0, 0> >"*, %struct.Mat3*)
+
+declare %"struct.FixedMatrixBase<double,6,6>"* @_ZN15FixedMatrixBaseIdLi6ELi6EEmIERKS0_(%"struct.FixedMatrixBase<double,6,6>"*, %"struct.FixedMatrixBase<double,6,6>"*)
+
+declare void @_ZN13CDSVectorBaseI4Vec3N3CDS12DefaultAllocEEC2EiS2_(%"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>"*, i32, %"struct.CDS::DefaultAlloc"* byval align 4)
+
+declare void @_ZN13CDSVectorBaseI4Vec3N3CDS12DefaultAllocEED2Ev(%"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeTorsionD0Ev(%struct.HNodeTorsion*)
+
+declare void @_ZN12HNodeTorsionD1Ev(%struct.HNodeTorsion*)
+
+declare void @_ZN12HNodeRotate3D0Ev(%struct.HNodeRotate3*)
+
+declare void @_ZN12HNodeRotate3D1Ev(%struct.HNodeRotate3*)
+
+declare void @_ZN12HNodeRotate36setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate318enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HNodeRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate35printEi(%struct.HNodeRotate3*, i32)
+
+declare void @_ZN12HNodeRotate35calcHEv(%struct.HNodeRotate3*)
+
+declare void @_ZN12HNodeRotate311toCartesianEv(%struct.HNodeRotate3*)
+
+declare void @_ZN12HNodeRotate2D0Ev(%struct.HNodeRotate2*)
+
+declare void @_ZN12HNodeRotate2D1Ev(%struct.HNodeRotate2*)
+
+declare void @_ZN12HNodeRotate26setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate218enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HNodeRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN12HNodeRotate25printEi(%struct.HNodeRotate2*, i32)
+
+declare void @_ZN12HNodeRotate25calcHEv(%struct.HNodeRotate2*)
+
+declare void @_ZN12HNodeRotate211toCartesianEv(%struct.HNodeRotate2*)
+
+declare void @_ZN21HNodeTranslateRotate3D0Ev(%struct.HNodeTranslateRotate3*)
+
+declare void @_ZN21HNodeTranslateRotate3D1Ev(%struct.HNodeTranslateRotate3*)
+
+declare void @_ZN21HNodeTranslateRotate318enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HNodeTranslateRotate3*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate35printEi(%struct.HNodeTranslateRotate3*, i32)
+
+declare void @_ZN21HNodeTranslateRotate35calcHEv(%struct.HNodeTranslateRotate3*)
+
+declare void @_ZN21HNodeTranslateRotate2D0Ev(%struct.HNodeTranslateRotate2*)
+
+declare void @_ZN21HNodeTranslateRotate2D1Ev(%struct.HNodeTranslateRotate2*)
+
+declare void @_ZN21HNodeTranslateRotate26setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate218enforceConstraintsER9CDSVectorIdLi1EN3CDS12DefaultAllocEES4_(%struct.HNodeTranslateRotate2*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"*)
+
+declare void @_ZN21HNodeTranslateRotate25printEi(%struct.HNodeTranslateRotate2*, i32)
+
+declare void @_ZN21HNodeTranslateRotate25calcHEv(%struct.HNodeTranslateRotate2*)
+
+declare void @_ZN21HNodeTranslateRotate211toCartesianEv(%struct.HNodeTranslateRotate2*)
+
+declare void @_ZN14HNodeTranslateC2EPK9HingeNodeP7IVMAtomRi(%struct.HNodeTranslate*, %struct.HingeNode*, %struct.IVMAtom*, i32*)
+
+declare void @_ZN14HNodeTranslateD1Ev(%struct.HNodeTranslate*)
+
+declare void @_ZN14HNodeTranslateD0Ev(%struct.HNodeTranslate*)
+
+declare void @_ZN14HNodeTranslate5calcHEv(%struct.HNodeTranslate*)
+
+declare void @_ZN14HNodeTranslate11toCartesianEv(%struct.HNodeTranslate*)
+
+declare void @_ZN12HNodeRotate3C2EPK9HingeNodeP7IVMAtomRib(%struct.HNodeRotate3*, %struct.HingeNode*, %struct.IVMAtom*, i32*, i8 zeroext)
+
+declare void @_ZN8AtomTree6findCMEPK9HingeNode(%struct.Vec3* noalias sret, %struct.HingeNode*)
+
+declare %struct.IVMAtom** @_ZN7CDSListIP7IVMAtomE7prependERKS1_(%struct.AtomList*, %struct.IVMAtom**)
+
+declare %"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>"* @_ZN13CDSVectorBaseI4Vec3N3CDS12DefaultAllocEE6resizeEi(%"struct.CDSVectorBase<Vec3,CDS::DefaultAlloc>"*, i32)
+
+declare void @_ZN12HNodeRotate2C2EPK9HingeNodeRK4Vec3Ri(%struct.HNodeRotate2*, %struct.HingeNode*, %struct.Vec3*, i32*)
+
+declare void @_ZN21HNodeTranslateRotate3C2EPK9HingeNodeP7IVMAtomRib(%struct.HNodeTranslateRotate3*, %struct.HingeNode*, %struct.IVMAtom*, i32*, i8 zeroext)
+
+declare void @_ZN13HingeNodeSpecILi3EE9calcPropsEv(%"struct.HingeNodeSpec<3>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi3ELi6ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,3,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare void @_ZN11MatrixTools9transposeI4Mat3EENT_13TransposeTypeERKS2_(%struct.Mat3* noalias sret, %struct.Mat3*)
+
+declare void @_Z10blockMat12IdLi3ELi3ELi3EE11FixedMatrixIT_XT0_EXplT1_T2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,3,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,3,0,0>"*, %"struct.FixedMatrix<double,3,3,0,0>"*)
+
+declare void @_ZN13CDSMatrixBaseIdEC2I11FixedMatrixIdLi3ELi6ELi0ELi0EEEERKT_(%"struct.CDSMatrixBase<double>"*, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi3ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,3,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,6,0,0>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi4EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,2,2>"*)
+
+declare double @_Z4normIdLi4EET_RK11FixedVectorIS0_XT0_ELi0EE(%"struct.FixedMatrix<double,2,2,0,0>"*)
+
+declare %"struct.FixedMatrixBase<double,2,2>"* @_ZN15FixedVectorBaseIdLi4EEdVERKd(%"struct.FixedMatrixBase<double,2,2>"*, double*)
+
+declare %"struct.FixedMatrixBase<double,2,2>"* @_ZN15FixedVectorBaseIdLi4EEmIERKS0_(%"struct.FixedMatrixBase<double,2,2>"*, %"struct.FixedMatrixBase<double,2,2>"*)
+
+declare void @_ZN11FixedVectorIdLi3ELi0EE6subColILi6ELi3ELi0ELi0EEES0_RK11FixedMatrixIdXT_EXT0_EXT1_EXT2_EEiii(%"struct.FixedMatrix<double,1,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,6,0,0>"*, i32, i32, i32)
+
+declare void @_ZN13HingeNodeSpecILi3EE7calcD_GERK11FixedMatrixIdLi6ELi6ELi0ELi0EE(%"struct.HingeNodeSpec<3>"*, %struct.Mat6*)
+
+declare void @_ZN11MatrixTools7inverseI11FixedMatrixIdLi3ELi3ELi0ELi0EEEET_RKS3_NS_14InverseResultsINS3_10MatrixTypeEEE(%"struct.FixedMatrix<double,3,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,3,3,0,0>"*, %"struct.MatrixTools::InverseResults<FullMatrix<double> >"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi3ELi3EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,3,3>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi3ELi6EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,3,6>"*)
+
+declare void @_Z7unitVecRK4Vec3(%struct.Vec3* noalias sret, %struct.Vec3*)
+
+declare double @_Z4normIdLi3EET_RK11FixedVectorIS0_XT0_ELi0EE(%"struct.FixedMatrix<double,1,3,0,0>"*)
+
+declare void @_ZN12HNodeTorsionC2EPK9HingeNodeRK4Vec3Ri(%struct.HNodeTorsion*, %struct.HingeNode*, %struct.Vec3*, i32*)
+
+declare double @acos(double) nounwind readnone
+
+declare double @atan2(double, double) nounwind readnone
+
+declare void @_ZN21HNodeTranslateRotate2C2EPK9HingeNodeRi(%struct.HNodeTranslateRotate2*, %struct.HingeNode*, i32*)
+
+declare void @_ZN13HingeNodeSpecILi2EE9calcPropsEv(%"struct.HingeNodeSpec<2>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi2ELi6ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,2,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare void @_Z10blockMat21IdLi1ELi3ELi1EE11FixedMatrixIT_XplT0_T2_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT2_EXT1_ELi0ELi0EE(%"struct.FixedMatrix<double,1,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,3,0,0>"*, %"struct.FixedMatrix<double,1,3,0,0>"*)
+
+declare void @_Z10blockMat12IdLi2ELi3ELi3EE11FixedMatrixIT_XT0_EXplT1_T2_ELi0ELi0EERKS0_IS1_XT0_EXT1_ELi0ELi0EERKS0_IS1_XT0_EXT2_ELi0ELi0EE(%"struct.FixedMatrix<double,2,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,1,6,0,0>"*, %"struct.FixedMatrix<double,1,6,0,0>"*)
+
+declare void @_ZN13CDSMatrixBaseIdEC2I11FixedMatrixIdLi2ELi6ELi0ELi0EEEERKT_(%"struct.CDSMatrixBase<double>"*, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi2ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,2,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,2,6,0,0>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi2EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedVectorBase<double,2>"*)
+
+declare %"struct.FixedMatrixBase<double,1,3>"* @_ZN15FixedVectorBaseIdLi3EEdVERKd(%"struct.FixedMatrixBase<double,1,3>"*, double*)
+
+declare %"struct.FixedMatrixBase<double,1,3>"* @_ZN15FixedVectorBaseIdLi3EEmIERKS0_(%"struct.FixedMatrixBase<double,1,3>"*, %"struct.FixedMatrixBase<double,1,3>"*)
+
+declare void @_ZN11FixedVectorIdLi3ELi0EE6subColILi6ELi2ELi0ELi0EEES0_RK11FixedMatrixIdXT_EXT0_EXT1_EXT2_EEiii(%"struct.FixedMatrix<double,1,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,2,6,0,0>"*, i32, i32, i32)
+
+declare void @_ZN13HingeNodeSpecILi2EE7calcD_GERK11FixedMatrixIdLi6ELi6ELi0ELi0EE(%"struct.HingeNodeSpec<2>"*, %struct.Mat6*)
+
+declare void @_ZN11MatrixTools7inverseI11FixedMatrixIdLi2ELi2ELi0ELi0EEEET_RKS3_NS_14InverseResultsINS3_10MatrixTypeEEE(%"struct.FixedMatrix<double,2,2,0,0>"* noalias sret, %"struct.FixedMatrix<double,2,2,0,0>"*, %"struct.MatrixTools::InverseResults<FullMatrix<double> >"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi2ELi2EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,2,2>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi2ELi6EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,2,6>"*)
+
+declare zeroext i8 @_ZNK9CDSStringIcE7matchesEPKcb(%struct.String*, i8*, i8 zeroext)
+
+declare %struct.HingeNode* @_Z9constructP9HingeNodeRKN16InternalDynamics9HingeSpecERi(%struct.HingeNode*, %"struct.InternalDynamics::HingeSpec"*, i32*)
+
+declare void @_ZN9CDSStringIcEC1ERKS0_(%struct.String*, %struct.String*)
+
+declare void @_ZN9CDSStringIcE8downcaseEv(%struct.String*)
+
+declare %struct.String* @_ZN9CDSStringIcEaSEPKc(%struct.String*, i8*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIP7IVMAtomERSoS2_RK7CDSListIT_E(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %struct.AtomList*)
+
+declare i32 @_ZNK7CDSListIP9HingeNodeE8getIndexERKS1_(%"struct.CDSList<HingeNode*>"*, %struct.HingeNode**)
+
+declare void @_ZN13CDSMatrixBaseIdEC2I11FixedMatrixIdLi6ELi6ELi0ELi0EEEERKT_(%"struct.CDSMatrixBase<double>"*, %struct.Mat6*)
+
+declare void @_ZN11FixedVectorIdLi3ELi0EE6subColILi6ELi6ELi0ELi0EEES0_RK11FixedMatrixIdXT_EXT0_EXT1_EXT2_EEiii(%"struct.FixedMatrix<double,1,3,0,0>"* noalias sret, %struct.Mat6*, i32, i32, i32)
+
+declare void @_ZN13HingeNodeSpecILi6EE7calcD_GERK11FixedMatrixIdLi6ELi6ELi0ELi0EE(%"struct.HingeNodeSpec<6>"*, %struct.Mat6*)
+
+declare void @_ZN11MatrixTools7inverseI11FixedMatrixIdLi6ELi6ELi0ELi0EEEET_RKS3_NS_14InverseResultsINS3_10MatrixTypeEEE(%struct.Mat6* noalias sret, %struct.Mat6*, %"struct.MatrixTools::InverseResults<FullMatrix<double> >"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi6ELi6EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,6,6>"*)
+
+declare void @_ZN13HingeNodeSpecILi5EE9calcPropsEv(%"struct.HingeNodeSpec<5>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi5ELi6ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,5,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_ZN13CDSMatrixBaseIdEC2I11FixedMatrixIdLi5ELi6ELi0ELi0EEEERKT_(%"struct.CDSMatrixBase<double>"*, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi5ELi0ELi0EEEENT_13TransposeTypeERKS3_(%"struct.FixedMatrix<double,5,6,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi5EERSoS0_RK15FixedVectorBaseIT_XT0_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedVectorBase<double,5>"*)
+
+declare void @_ZN11FixedVectorIdLi3ELi0EE6subColILi6ELi5ELi0ELi0EEES0_RK11FixedMatrixIdXT_EXT0_EXT1_EXT2_EEiii(%"struct.FixedMatrix<double,1,3,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,6,0,0>"*, i32, i32, i32)
+
+declare void @_ZN13HingeNodeSpecILi5EE7calcD_GERK11FixedMatrixIdLi6ELi6ELi0ELi0EE(%"struct.HingeNodeSpec<5>"*, %struct.Mat6*)
+
+declare void @_ZN11MatrixTools7inverseI11FixedMatrixIdLi5ELi5ELi0ELi0EEEET_RKS3_NS_14InverseResultsINS3_10MatrixTypeEEE(%"struct.FixedMatrix<double,5,5,0,0>"* noalias sret, %"struct.FixedMatrix<double,5,5,0,0>"*, %"struct.MatrixTools::InverseResults<FullMatrix<double> >"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi5ELi5EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,5,5>"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZlsIdLi5ELi6EERSoS0_RK15FixedMatrixBaseIT_XT0_EXT1_EE(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.FixedMatrixBase<double,5,6>"*)
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
diff --git a/final/test/CodeGen/X86/negative-sin.ll b/final/test/CodeGen/X86/negative-sin.ll
new file mode 100644
index 00000000000..76e557b8422
--- /dev/null
+++ b/final/test/CodeGen/X86/negative-sin.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -enable-unsafe-fp-math -march=x86-64 | FileCheck %s
+; CHECK-NOT:     {{addsd|subsd|xor}}
+
+declare double @sin(double %f)
+
+define double @foo(double %e)
+{
+  %f = fsub double 0.0, %e
+  %g = call double @sin(double %f) readonly
+  %h = fsub double 0.0, %g
+  ret double %h
+}
diff --git a/final/test/CodeGen/X86/negative-stride-fptosi-user.ll b/final/test/CodeGen/X86/negative-stride-fptosi-user.ll
new file mode 100644
index 00000000000..332e0b9cc6e
--- /dev/null
+++ b/final/test/CodeGen/X86/negative-stride-fptosi-user.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86-64 | grep cvtsi2sd
+
+; LSR previously eliminated the sitofp by introducing an induction
+; variable which stepped by a bogus ((double)UINT32_C(-1)). It's theoretically
+; possible to eliminate the sitofp using a proper -1.0 step though; this
+; test should be changed if that is done.
+
+define void @foo(i32 %N) nounwind {
+entry:
+  %0 = icmp slt i32 %N, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %return
+
+bb:                                               ; preds = %bb, %entry
+  %i.03 = phi i32 [ 0, %entry ], [ %2, %bb ]      ; <i32> [#uses=2]
+  %1 = sitofp i32 %i.03 to double                  ; <double> [#uses=1]
+  tail call void @bar(double %1) nounwind
+  %2 = add nsw i32 %i.03, -1                       ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %2, %N                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
+
+declare void @bar(double)
diff --git a/final/test/CodeGen/X86/negative-subscript.ll b/final/test/CodeGen/X86/negative-subscript.ll
new file mode 100644
index 00000000000..28f7d6b2dba
--- /dev/null
+++ b/final/test/CodeGen/X86/negative-subscript.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86
+; rdar://6559995
+
+@a = external global [255 x i8*], align 32
+
+define i32 @main() nounwind {
+entry:
+	store i8* bitcast (i8** getelementptr ([255 x i8*]* @a, i32 0, i32 -2147483624) to i8*), i8** getelementptr ([255 x i8*]* @a, i32 0, i32 16), align 32
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/negative_zero.ll b/final/test/CodeGen/X86/negative_zero.ll
new file mode 100644
index 00000000000..29474c21f24
--- /dev/null
+++ b/final/test/CodeGen/X86/negative_zero.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -mattr=-sse2,-sse3 | grep fchs
+
+
+define double @T() {
+	ret double -1.0   ;; codegen as fld1/fchs, not as a load from cst pool
+}
diff --git a/final/test/CodeGen/X86/nobt.ll b/final/test/CodeGen/X86/nobt.ll
new file mode 100644
index 00000000000..35090e37291
--- /dev/null
+++ b/final/test/CodeGen/X86/nobt.ll
@@ -0,0 +1,70 @@
+; RUN: llc < %s -march=x86 | not grep btl
+
+; This tests some cases where BT must not be generated.  See also bt.ll.
+; Fixes 20040709-[12].c in gcc testsuite.
+
+define void @test2(i32 %x, i32 %n) nounwind {
+entry:
+        %tmp1 = and i32 %x, 1
+        %tmp2 = urem i32 %tmp1, 15
+	%tmp3 = and i32 %tmp2, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp3, %tmp2	; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test3(i32 %x, i32 %n) nounwind {
+entry:
+        %tmp1 = and i32 %x, 1
+        %tmp2 = urem i32 %tmp1, 15
+	%tmp3 = and i32 %tmp2, 1		; <i32> [#uses=1]
+	%tmp4 = icmp eq i32 %tmp2, %tmp3	; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test4(i32 %x, i32 %n) nounwind {
+entry:
+        %tmp1 = and i32 %x, 1
+        %tmp2 = urem i32 %tmp1, 15
+	%tmp3 = and i32 %tmp2, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp2, %tmp3	; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define void @test5(i32 %x, i32 %n) nounwind {
+entry:
+        %tmp1 = and i32 %x, 1
+        %tmp2 = urem i32 %tmp1, 15
+	%tmp3 = and i32 %tmp2, 1		; <i32> [#uses=1]
+	%tmp4 = icmp ne i32 %tmp2, %tmp3	; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %UnifiedReturnBlock
+
+bb:		; preds = %entry
+	call void @foo()
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+declare void @foo()
diff --git a/final/test/CodeGen/X86/nofence.ll b/final/test/CodeGen/X86/nofence.ll
new file mode 100644
index 00000000000..244d2e9780d
--- /dev/null
+++ b/final/test/CodeGen/X86/nofence.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep fence
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+	call void @llvm.memory.barrier( i1 true,  i1 false, i1 false, i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 false, i1 true,  i1 false, i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 false, i1 false, i1 true,  i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true,  i1 false)
+
+	call void @llvm.memory.barrier( i1 true,  i1 true,  i1 false, i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 true,  i1 false, i1 true,  i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 true,  i1 false, i1 false, i1 true,  i1 false)
+	call void @llvm.memory.barrier( i1 false, i1 true,  i1 true,  i1 false, i1 false)
+	call void @llvm.memory.barrier( i1 false, i1 true,  i1 false, i1 true,  i1 false)
+	call void @llvm.memory.barrier( i1 false, i1 false, i1 true,  i1 true,  i1 false)
+
+	call void @llvm.memory.barrier( i1 true,  i1 true,  i1 true,  i1 false,  i1 false)
+	call void @llvm.memory.barrier( i1 true,  i1 true,  i1 false,  i1 true,  i1 false)
+	call void @llvm.memory.barrier( i1 true,  i1 false,  i1 true,  i1 true,  i1 false)
+	call void @llvm.memory.barrier( i1 false,  i1 true,  i1 true,  i1 true,  i1 false)
+
+
+	call void @llvm.memory.barrier( i1 true, i1 true, i1 true, i1 true , i1 false)
+	call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 false , i1 false)
+	ret void
+}
diff --git a/final/test/CodeGen/X86/nosse-error1.ll b/final/test/CodeGen/X86/nosse-error1.ll
new file mode 100644
index 00000000000..16cbb732af0
--- /dev/null
+++ b/final/test/CodeGen/X86/nosse-error1.ll
@@ -0,0 +1,33 @@
+; RUN: llvm-as < %s > %t1
+; RUN: not llc -march=x86-64 -mattr=-sse < %t1 2> %t2
+; RUN: grep "SSE register return with SSE disabled" %t2
+; RUN: llc -march=x86-64 < %t1 | grep xmm
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@f = external global float		; <float*> [#uses=4]
+@d = external global double		; <double*> [#uses=4]
+
+define void @test() nounwind {
+entry:
+	%0 = load float* @f, align 4		; <float> [#uses=1]
+	%1 = tail call float @foo1(float %0) nounwind		; <float> [#uses=1]
+	store float %1, float* @f, align 4
+	%2 = load double* @d, align 8		; <double> [#uses=1]
+	%3 = tail call double @foo2(double %2) nounwind		; <double> [#uses=1]
+	store double %3, double* @d, align 8
+	%4 = load float* @f, align 4		; <float> [#uses=1]
+	%5 = tail call float @foo3(float %4) nounwind		; <float> [#uses=1]
+	store float %5, float* @f, align 4
+	%6 = load double* @d, align 8		; <double> [#uses=1]
+	%7 = tail call double @foo4(double %6) nounwind		; <double> [#uses=1]
+	store double %7, double* @d, align 8
+	ret void
+}
+
+declare float @foo1(float)
+
+declare double @foo2(double)
+
+declare float @foo3(float)
+
+declare double @foo4(double)
diff --git a/final/test/CodeGen/X86/nosse-error2.ll b/final/test/CodeGen/X86/nosse-error2.ll
new file mode 100644
index 00000000000..45a5eaf3a41
--- /dev/null
+++ b/final/test/CodeGen/X86/nosse-error2.ll
@@ -0,0 +1,33 @@
+; RUN: llvm-as < %s > %t1
+; RUN: not llc -march=x86 -mcpu=i686 -mattr=-sse < %t1 2> %t2
+; RUN: grep "SSE register return with SSE disabled" %t2
+; RUN: llc -march=x86 -mcpu=i686 -mattr=+sse < %t1 | grep xmm
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-unknown-linux-gnu"
+@f = external global float		; <float*> [#uses=4]
+@d = external global double		; <double*> [#uses=4]
+
+define void @test() nounwind {
+entry:
+	%0 = load float* @f, align 4		; <float> [#uses=1]
+	%1 = tail call inreg float @foo1(float inreg %0) nounwind		; <float> [#uses=1]
+	store float %1, float* @f, align 4
+	%2 = load double* @d, align 8		; <double> [#uses=1]
+	%3 = tail call inreg double @foo2(double inreg %2) nounwind		; <double> [#uses=1]
+	store double %3, double* @d, align 8
+	%4 = load float* @f, align 4		; <float> [#uses=1]
+	%5 = tail call inreg float @foo3(float inreg %4) nounwind		; <float> [#uses=1]
+	store float %5, float* @f, align 4
+	%6 = load double* @d, align 8		; <double> [#uses=1]
+	%7 = tail call inreg double @foo4(double inreg %6) nounwind		; <double> [#uses=1]
+	store double %7, double* @d, align 8
+	ret void
+}
+
+declare inreg float @foo1(float inreg)
+
+declare inreg double @foo2(double inreg)
+
+declare inreg float @foo3(float inreg)
+
+declare inreg double @foo4(double inreg)
diff --git a/final/test/CodeGen/X86/nosse-varargs.ll b/final/test/CodeGen/X86/nosse-varargs.ll
new file mode 100644
index 00000000000..e6da0ab5e37
--- /dev/null
+++ b/final/test/CodeGen/X86/nosse-varargs.ll
@@ -0,0 +1,46 @@
+; RUN: llvm-as < %s > %t
+; RUN: llc -march=x86-64 -mattr=-sse < %t | not grep xmm
+; RUN: llc -march=x86-64 < %t | grep xmm
+; PR3403
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+define i32 @foo(float %a, i8* nocapture %fmt, ...) nounwind {
+entry:
+	%ap = alloca [1 x %struct.__va_list_tag], align 8		; <[1 x %struct.__va_list_tag]*> [#uses=4]
+	%ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*		; <i8*> [#uses=2]
+	call void @llvm.va_start(i8* %ap12)
+	%0 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 0		; <i32*> [#uses=2]
+	%1 = load i32* %0, align 8		; <i32> [#uses=3]
+	%2 = icmp ult i32 %1, 48		; <i1> [#uses=1]
+	br i1 %2, label %bb, label %bb3
+
+bb:		; preds = %entry
+	%3 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 3		; <i8**> [#uses=1]
+	%4 = load i8** %3, align 8		; <i8*> [#uses=1]
+	%5 = inttoptr i32 %1 to i8*		; <i8*> [#uses=1]
+	%6 = ptrtoint i8* %5 to i64		; <i64> [#uses=1]
+	%ctg2 = getelementptr i8* %4, i64 %6		; <i8*> [#uses=1]
+	%7 = add i32 %1, 8		; <i32> [#uses=1]
+	store i32 %7, i32* %0, align 8
+	br label %bb4
+
+bb3:		; preds = %entry
+	%8 = getelementptr [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0, i32 2		; <i8**> [#uses=2]
+	%9 = load i8** %8, align 8		; <i8*> [#uses=2]
+	%10 = getelementptr i8* %9, i64 8		; <i8*> [#uses=1]
+	store i8* %10, i8** %8, align 8
+	br label %bb4
+
+bb4:		; preds = %bb3, %bb
+	%addr.0.0 = phi i8* [ %ctg2, %bb ], [ %9, %bb3 ]		; <i8*> [#uses=1]
+	%11 = bitcast i8* %addr.0.0 to i32*		; <i32*> [#uses=1]
+	%12 = load i32* %11, align 4		; <i32> [#uses=1]
+	call void @llvm.va_end(i8* %ap12)
+	ret i32 %12
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
diff --git a/final/test/CodeGen/X86/object-size.ll b/final/test/CodeGen/X86/object-size.ll
new file mode 100644
index 00000000000..0493edc8d09
--- /dev/null
+++ b/final/test/CodeGen/X86/object-size.ll
@@ -0,0 +1,55 @@
+; RUN: llc -O0 -regalloc=linearscan < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+
+; ModuleID = 'ts.c'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0"
+
+@p = common global i8* null, align 8              ; <i8**> [#uses=4]
+@.str = private constant [3 x i8] c"Hi\00"        ; <[3 x i8]*> [#uses=1]
+
+define void @bar() nounwind ssp {
+entry:
+  %tmp = load i8** @p                             ; <i8*> [#uses=1]
+  %0 = call i64 @llvm.objectsize.i64(i8* %tmp, i1 0) ; <i64> [#uses=1]
+  %cmp = icmp ne i64 %0, -1                       ; <i1> [#uses=1]
+; X64: movabsq $-1, %rax
+; X64: cmpq    $-1, %rax
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %tmp1 = load i8** @p                            ; <i8*> [#uses=1]
+  %tmp2 = load i8** @p                            ; <i8*> [#uses=1]
+  %1 = call i64 @llvm.objectsize.i64(i8* %tmp2, i1 1) ; <i64> [#uses=1]
+  %call = call i8* @__strcpy_chk(i8* %tmp1, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0), i64 %1) ssp ; <i8*> [#uses=1]
+  br label %cond.end
+
+cond.false:                                       ; preds = %entry
+  %tmp3 = load i8** @p                            ; <i8*> [#uses=1]
+  %call4 = call i8* @__inline_strcpy_chk(i8* %tmp3, i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) ssp ; <i8*> [#uses=1]
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.false, %cond.true
+  %cond = phi i8* [ %call, %cond.true ], [ %call4, %cond.false ] ; <i8*> [#uses=0]
+  ret void
+}
+
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly
+
+declare i8* @__strcpy_chk(i8*, i8*, i64) ssp
+
+define internal i8* @__inline_strcpy_chk(i8* %__dest, i8* %__src) nounwind ssp {
+entry:
+  %retval = alloca i8*                            ; <i8**> [#uses=2]
+  %__dest.addr = alloca i8*                       ; <i8**> [#uses=3]
+  %__src.addr = alloca i8*                        ; <i8**> [#uses=2]
+  store i8* %__dest, i8** %__dest.addr
+  store i8* %__src, i8** %__src.addr
+  %tmp = load i8** %__dest.addr                   ; <i8*> [#uses=1]
+  %tmp1 = load i8** %__src.addr                   ; <i8*> [#uses=1]
+  %tmp2 = load i8** %__dest.addr                  ; <i8*> [#uses=1]
+  %0 = call i64 @llvm.objectsize.i64(i8* %tmp2, i1 1) ; <i64> [#uses=1]
+  %call = call i8* @__strcpy_chk(i8* %tmp, i8* %tmp1, i64 %0) ssp ; <i8*> [#uses=1]
+  store i8* %call, i8** %retval
+  %1 = load i8** %retval                          ; <i8*> [#uses=1]
+  ret i8* %1
+}
diff --git a/final/test/CodeGen/X86/opt-ext-uses.ll b/final/test/CodeGen/X86/opt-ext-uses.ll
new file mode 100644
index 00000000000..fa2aef51747
--- /dev/null
+++ b/final/test/CodeGen/X86/opt-ext-uses.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 | grep movw | count 1
+
+define i16 @t() signext  {
+entry:
+        %tmp180 = load i16* null, align 2               ; <i16> [#uses=3]
+        %tmp180181 = sext i16 %tmp180 to i32            ; <i32> [#uses=1]
+        %tmp182 = add i16 %tmp180, 10
+        %tmp185 = icmp slt i16 %tmp182, 0               ; <i1> [#uses=1]
+        br i1 %tmp185, label %cond_true188, label %cond_next245
+
+cond_true188:           ; preds = %entry
+        %tmp195196 = trunc i16 %tmp180 to i8            ; <i8> [#uses=0]
+        ret i16 %tmp180
+
+cond_next245:           ; preds = %entry
+        %tmp256 = and i32 %tmp180181, 15                ; <i32> [#uses=0]
+        %tmp3 = trunc i32 %tmp256 to i16
+        ret i16 %tmp3
+}
diff --git a/final/test/CodeGen/X86/optimize-max-0.ll b/final/test/CodeGen/X86/optimize-max-0.ll
new file mode 100644
index 00000000000..162c7a568fd
--- /dev/null
+++ b/final/test/CodeGen/X86/optimize-max-0.ll
@@ -0,0 +1,461 @@
+; RUN: llc < %s -march=x86 | not grep cmov
+
+; LSR should be able to eliminate the max computations by
+; making the loops use slt/ult comparisons instead of ne comparisons.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+
+define void @foo(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
+entry:
+	%0 = mul i32 %x, %w		; <i32> [#uses=2]
+	%1 = mul i32 %x, %w		; <i32> [#uses=1]
+	%2 = sdiv i32 %1, 4		; <i32> [#uses=1]
+	%.sum2 = add i32 %2, %0		; <i32> [#uses=2]
+	%cond = icmp eq i32 %d, 1		; <i1> [#uses=1]
+	br i1 %cond, label %bb29, label %bb10.preheader
+
+bb10.preheader:		; preds = %entry
+	%3 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb.nph9, label %bb18.loopexit
+
+bb.nph7:		; preds = %bb7.preheader
+	%4 = mul i32 %y.08, %w		; <i32> [#uses=1]
+	%5 = mul i32 %y.08, %s		; <i32> [#uses=1]
+	%6 = add i32 %5, 1		; <i32> [#uses=1]
+	%tmp8 = icmp sgt i32 1, %w		; <i1> [#uses=1]
+	%smax9 = select i1 %tmp8, i32 1, i32 %w		; <i32> [#uses=1]
+	br label %bb6
+
+bb6:		; preds = %bb7, %bb.nph7
+	%x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]		; <i32> [#uses=3]
+	%7 = add i32 %x.06, %4		; <i32> [#uses=1]
+	%8 = shl i32 %x.06, 1		; <i32> [#uses=1]
+	%9 = add i32 %6, %8		; <i32> [#uses=1]
+	%10 = getelementptr i8* %r, i32 %9		; <i8*> [#uses=1]
+	%11 = load i8* %10, align 1		; <i8> [#uses=1]
+	%12 = getelementptr i8* %j, i32 %7		; <i8*> [#uses=1]
+	store i8 %11, i8* %12, align 1
+	br label %bb7
+
+bb7:		; preds = %bb6
+	%indvar.next7 = add i32 %x.06, 1		; <i32> [#uses=2]
+	%exitcond10 = icmp ne i32 %indvar.next7, %smax9		; <i1> [#uses=1]
+	br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
+
+bb7.bb9_crit_edge:		; preds = %bb7
+	br label %bb9
+
+bb9:		; preds = %bb7.preheader, %bb7.bb9_crit_edge
+	br label %bb10
+
+bb10:		; preds = %bb9
+	%indvar.next11 = add i32 %y.08, 1		; <i32> [#uses=2]
+	%exitcond12 = icmp ne i32 %indvar.next11, %x		; <i1> [#uses=1]
+	br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
+
+bb10.bb18.loopexit_crit_edge:		; preds = %bb10
+	br label %bb10.bb18.loopexit_crit_edge.split
+
+bb10.bb18.loopexit_crit_edge.split:		; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
+	br label %bb18.loopexit
+
+bb.nph9:		; preds = %bb10.preheader
+	%13 = icmp sgt i32 %w, 0		; <i1> [#uses=1]
+	br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
+
+bb.nph9.split:		; preds = %bb.nph9
+	br label %bb7.preheader
+
+bb7.preheader:		; preds = %bb.nph9.split, %bb10
+	%y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]		; <i32> [#uses=3]
+	br i1 true, label %bb.nph7, label %bb9
+
+bb.nph5:		; preds = %bb18.loopexit
+	%14 = sdiv i32 %w, 2		; <i32> [#uses=1]
+	%15 = icmp slt i32 %w, 2		; <i1> [#uses=1]
+	%16 = sdiv i32 %x, 2		; <i32> [#uses=2]
+	br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
+
+bb.nph5.split:		; preds = %bb.nph5
+	%tmp2 = icmp sgt i32 1, %16		; <i1> [#uses=1]
+	%smax3 = select i1 %tmp2, i32 1, i32 %16		; <i32> [#uses=1]
+	br label %bb13
+
+bb13:		; preds = %bb18, %bb.nph5.split
+	%y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]		; <i32> [#uses=4]
+	%17 = mul i32 %14, %y.14		; <i32> [#uses=2]
+	%18 = shl i32 %y.14, 1		; <i32> [#uses=1]
+	%19 = srem i32 %y.14, 2		; <i32> [#uses=1]
+	%20 = add i32 %19, %18		; <i32> [#uses=1]
+	%21 = mul i32 %20, %s		; <i32> [#uses=2]
+	br i1 true, label %bb.nph3, label %bb17
+
+bb.nph3:		; preds = %bb13
+	%22 = add i32 %17, %0		; <i32> [#uses=1]
+	%23 = add i32 %17, %.sum2		; <i32> [#uses=1]
+	%24 = sdiv i32 %w, 2		; <i32> [#uses=2]
+	%tmp = icmp sgt i32 1, %24		; <i1> [#uses=1]
+	%smax = select i1 %tmp, i32 1, i32 %24		; <i32> [#uses=1]
+	br label %bb14
+
+bb14:		; preds = %bb15, %bb.nph3
+	%x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]		; <i32> [#uses=5]
+	%25 = shl i32 %x.12, 2		; <i32> [#uses=1]
+	%26 = add i32 %25, %21		; <i32> [#uses=1]
+	%27 = getelementptr i8* %r, i32 %26		; <i8*> [#uses=1]
+	%28 = load i8* %27, align 1		; <i8> [#uses=1]
+	%.sum = add i32 %22, %x.12		; <i32> [#uses=1]
+	%29 = getelementptr i8* %j, i32 %.sum		; <i8*> [#uses=1]
+	store i8 %28, i8* %29, align 1
+	%30 = shl i32 %x.12, 2		; <i32> [#uses=1]
+	%31 = or i32 %30, 2		; <i32> [#uses=1]
+	%32 = add i32 %31, %21		; <i32> [#uses=1]
+	%33 = getelementptr i8* %r, i32 %32		; <i8*> [#uses=1]
+	%34 = load i8* %33, align 1		; <i8> [#uses=1]
+	%.sum6 = add i32 %23, %x.12		; <i32> [#uses=1]
+	%35 = getelementptr i8* %j, i32 %.sum6		; <i8*> [#uses=1]
+	store i8 %34, i8* %35, align 1
+	br label %bb15
+
+bb15:		; preds = %bb14
+	%indvar.next = add i32 %x.12, 1		; <i32> [#uses=2]
+	%exitcond = icmp ne i32 %indvar.next, %smax		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
+
+bb15.bb17_crit_edge:		; preds = %bb15
+	br label %bb17
+
+bb17:		; preds = %bb15.bb17_crit_edge, %bb13
+	br label %bb18
+
+bb18.loopexit:		; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
+	%36 = icmp slt i32 %x, 2		; <i1> [#uses=1]
+	br i1 %36, label %bb20, label %bb.nph5
+
+bb18:		; preds = %bb17
+	%indvar.next1 = add i32 %y.14, 1		; <i32> [#uses=2]
+	%exitcond4 = icmp ne i32 %indvar.next1, %smax3		; <i1> [#uses=1]
+	br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
+
+bb18.bb20_crit_edge:		; preds = %bb18
+	br label %bb18.bb20_crit_edge.split
+
+bb18.bb20_crit_edge.split:		; preds = %bb18.bb20_crit_edge, %bb.nph5
+	br label %bb20
+
+bb20:		; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
+	switch i32 %d, label %return [
+		i32 3, label %bb22
+		i32 1, label %bb29
+	]
+
+bb22:		; preds = %bb20
+	%37 = mul i32 %x, %w		; <i32> [#uses=1]
+	%38 = sdiv i32 %37, 4		; <i32> [#uses=1]
+	%.sum3 = add i32 %38, %.sum2		; <i32> [#uses=2]
+	%39 = add i32 %x, 15		; <i32> [#uses=1]
+	%40 = and i32 %39, -16		; <i32> [#uses=1]
+	%41 = add i32 %w, 15		; <i32> [#uses=1]
+	%42 = and i32 %41, -16		; <i32> [#uses=1]
+	%43 = mul i32 %40, %s		; <i32> [#uses=1]
+	%44 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %44, label %bb.nph, label %bb26
+
+bb.nph:		; preds = %bb22
+	br label %bb23
+
+bb23:		; preds = %bb24, %bb.nph
+	%y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]		; <i32> [#uses=3]
+	%45 = mul i32 %y.21, %42		; <i32> [#uses=1]
+	%.sum1 = add i32 %45, %43		; <i32> [#uses=1]
+	%46 = getelementptr i8* %r, i32 %.sum1		; <i8*> [#uses=1]
+	%47 = mul i32 %y.21, %w		; <i32> [#uses=1]
+	%.sum5 = add i32 %47, %.sum3		; <i32> [#uses=1]
+	%48 = getelementptr i8* %j, i32 %.sum5		; <i8*> [#uses=1]
+	tail call void @llvm.memcpy.i32(i8* %48, i8* %46, i32 %w, i32 1)
+	br label %bb24
+
+bb24:		; preds = %bb23
+	%indvar.next5 = add i32 %y.21, 1		; <i32> [#uses=2]
+	%exitcond6 = icmp ne i32 %indvar.next5, %x		; <i1> [#uses=1]
+	br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
+
+bb24.bb26_crit_edge:		; preds = %bb24
+	br label %bb26
+
+bb26:		; preds = %bb24.bb26_crit_edge, %bb22
+	%49 = mul i32 %x, %w		; <i32> [#uses=1]
+	%.sum4 = add i32 %.sum3, %49		; <i32> [#uses=1]
+	%50 = getelementptr i8* %j, i32 %.sum4		; <i8*> [#uses=1]
+	%51 = mul i32 %x, %w		; <i32> [#uses=1]
+	%52 = sdiv i32 %51, 2		; <i32> [#uses=1]
+	tail call void @llvm.memset.i32(i8* %50, i8 -128, i32 %52, i32 1)
+	ret void
+
+bb29:		; preds = %bb20, %entry
+	%53 = add i32 %w, 15		; <i32> [#uses=1]
+	%54 = and i32 %53, -16		; <i32> [#uses=1]
+	%55 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %55, label %bb.nph11, label %bb33
+
+bb.nph11:		; preds = %bb29
+	br label %bb30
+
+bb30:		; preds = %bb31, %bb.nph11
+	%y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]		; <i32> [#uses=3]
+	%56 = mul i32 %y.310, %54		; <i32> [#uses=1]
+	%57 = getelementptr i8* %r, i32 %56		; <i8*> [#uses=1]
+	%58 = mul i32 %y.310, %w		; <i32> [#uses=1]
+	%59 = getelementptr i8* %j, i32 %58		; <i8*> [#uses=1]
+	tail call void @llvm.memcpy.i32(i8* %59, i8* %57, i32 %w, i32 1)
+	br label %bb31
+
+bb31:		; preds = %bb30
+	%indvar.next13 = add i32 %y.310, 1		; <i32> [#uses=2]
+	%exitcond14 = icmp ne i32 %indvar.next13, %x		; <i1> [#uses=1]
+	br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
+
+bb31.bb33_crit_edge:		; preds = %bb31
+	br label %bb33
+
+bb33:		; preds = %bb31.bb33_crit_edge, %bb29
+	%60 = mul i32 %x, %w		; <i32> [#uses=1]
+	%61 = getelementptr i8* %j, i32 %60		; <i8*> [#uses=1]
+	%62 = mul i32 %x, %w		; <i32> [#uses=1]
+	%63 = sdiv i32 %62, 2		; <i32> [#uses=1]
+	tail call void @llvm.memset.i32(i8* %61, i8 -128, i32 %63, i32 1)
+	ret void
+
+return:		; preds = %bb20
+	ret void
+}
+
+define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
+entry:
+	%0 = mul i32 %x, %w		; <i32> [#uses=2]
+	%1 = mul i32 %x, %w		; <i32> [#uses=1]
+	%2 = udiv i32 %1, 4		; <i32> [#uses=1]
+	%.sum2 = add i32 %2, %0		; <i32> [#uses=2]
+	%cond = icmp eq i32 %d, 1		; <i1> [#uses=1]
+	br i1 %cond, label %bb29, label %bb10.preheader
+
+bb10.preheader:		; preds = %entry
+	%3 = icmp ne i32 %x, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb.nph9, label %bb18.loopexit
+
+bb.nph7:		; preds = %bb7.preheader
+	%4 = mul i32 %y.08, %w		; <i32> [#uses=1]
+	%5 = mul i32 %y.08, %s		; <i32> [#uses=1]
+	%6 = add i32 %5, 1		; <i32> [#uses=1]
+	%tmp8 = icmp ugt i32 1, %w		; <i1> [#uses=1]
+	%smax9 = select i1 %tmp8, i32 1, i32 %w		; <i32> [#uses=1]
+	br label %bb6
+
+bb6:		; preds = %bb7, %bb.nph7
+	%x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]		; <i32> [#uses=3]
+	%7 = add i32 %x.06, %4		; <i32> [#uses=1]
+	%8 = shl i32 %x.06, 1		; <i32> [#uses=1]
+	%9 = add i32 %6, %8		; <i32> [#uses=1]
+	%10 = getelementptr i8* %r, i32 %9		; <i8*> [#uses=1]
+	%11 = load i8* %10, align 1		; <i8> [#uses=1]
+	%12 = getelementptr i8* %j, i32 %7		; <i8*> [#uses=1]
+	store i8 %11, i8* %12, align 1
+	br label %bb7
+
+bb7:		; preds = %bb6
+	%indvar.next7 = add i32 %x.06, 1		; <i32> [#uses=2]
+	%exitcond10 = icmp ne i32 %indvar.next7, %smax9		; <i1> [#uses=1]
+	br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
+
+bb7.bb9_crit_edge:		; preds = %bb7
+	br label %bb9
+
+bb9:		; preds = %bb7.preheader, %bb7.bb9_crit_edge
+	br label %bb10
+
+bb10:		; preds = %bb9
+	%indvar.next11 = add i32 %y.08, 1		; <i32> [#uses=2]
+	%exitcond12 = icmp ne i32 %indvar.next11, %x		; <i1> [#uses=1]
+	br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
+
+bb10.bb18.loopexit_crit_edge:		; preds = %bb10
+	br label %bb10.bb18.loopexit_crit_edge.split
+
+bb10.bb18.loopexit_crit_edge.split:		; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
+	br label %bb18.loopexit
+
+bb.nph9:		; preds = %bb10.preheader
+	%13 = icmp ugt i32 %w, 0		; <i1> [#uses=1]
+	br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
+
+bb.nph9.split:		; preds = %bb.nph9
+	br label %bb7.preheader
+
+bb7.preheader:		; preds = %bb.nph9.split, %bb10
+	%y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]		; <i32> [#uses=3]
+	br i1 true, label %bb.nph7, label %bb9
+
+bb.nph5:		; preds = %bb18.loopexit
+	%14 = udiv i32 %w, 2		; <i32> [#uses=1]
+	%15 = icmp ult i32 %w, 2		; <i1> [#uses=1]
+	%16 = udiv i32 %x, 2		; <i32> [#uses=2]
+	br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
+
+bb.nph5.split:		; preds = %bb.nph5
+	%tmp2 = icmp ugt i32 1, %16		; <i1> [#uses=1]
+	%smax3 = select i1 %tmp2, i32 1, i32 %16		; <i32> [#uses=1]
+	br label %bb13
+
+bb13:		; preds = %bb18, %bb.nph5.split
+	%y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]		; <i32> [#uses=4]
+	%17 = mul i32 %14, %y.14		; <i32> [#uses=2]
+	%18 = shl i32 %y.14, 1		; <i32> [#uses=1]
+	%19 = urem i32 %y.14, 2		; <i32> [#uses=1]
+	%20 = add i32 %19, %18		; <i32> [#uses=1]
+	%21 = mul i32 %20, %s		; <i32> [#uses=2]
+	br i1 true, label %bb.nph3, label %bb17
+
+bb.nph3:		; preds = %bb13
+	%22 = add i32 %17, %0		; <i32> [#uses=1]
+	%23 = add i32 %17, %.sum2		; <i32> [#uses=1]
+	%24 = udiv i32 %w, 2		; <i32> [#uses=2]
+	%tmp = icmp ugt i32 1, %24		; <i1> [#uses=1]
+	%smax = select i1 %tmp, i32 1, i32 %24		; <i32> [#uses=1]
+	br label %bb14
+
+bb14:		; preds = %bb15, %bb.nph3
+	%x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]		; <i32> [#uses=5]
+	%25 = shl i32 %x.12, 2		; <i32> [#uses=1]
+	%26 = add i32 %25, %21		; <i32> [#uses=1]
+	%27 = getelementptr i8* %r, i32 %26		; <i8*> [#uses=1]
+	%28 = load i8* %27, align 1		; <i8> [#uses=1]
+	%.sum = add i32 %22, %x.12		; <i32> [#uses=1]
+	%29 = getelementptr i8* %j, i32 %.sum		; <i8*> [#uses=1]
+	store i8 %28, i8* %29, align 1
+	%30 = shl i32 %x.12, 2		; <i32> [#uses=1]
+	%31 = or i32 %30, 2		; <i32> [#uses=1]
+	%32 = add i32 %31, %21		; <i32> [#uses=1]
+	%33 = getelementptr i8* %r, i32 %32		; <i8*> [#uses=1]
+	%34 = load i8* %33, align 1		; <i8> [#uses=1]
+	%.sum6 = add i32 %23, %x.12		; <i32> [#uses=1]
+	%35 = getelementptr i8* %j, i32 %.sum6		; <i8*> [#uses=1]
+	store i8 %34, i8* %35, align 1
+	br label %bb15
+
+bb15:		; preds = %bb14
+	%indvar.next = add i32 %x.12, 1		; <i32> [#uses=2]
+	%exitcond = icmp ne i32 %indvar.next, %smax		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
+
+bb15.bb17_crit_edge:		; preds = %bb15
+	br label %bb17
+
+bb17:		; preds = %bb15.bb17_crit_edge, %bb13
+	br label %bb18
+
+bb18.loopexit:		; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
+	%36 = icmp ult i32 %x, 2		; <i1> [#uses=1]
+	br i1 %36, label %bb20, label %bb.nph5
+
+bb18:		; preds = %bb17
+	%indvar.next1 = add i32 %y.14, 1		; <i32> [#uses=2]
+	%exitcond4 = icmp ne i32 %indvar.next1, %smax3		; <i1> [#uses=1]
+	br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
+
+bb18.bb20_crit_edge:		; preds = %bb18
+	br label %bb18.bb20_crit_edge.split
+
+bb18.bb20_crit_edge.split:		; preds = %bb18.bb20_crit_edge, %bb.nph5
+	br label %bb20
+
+bb20:		; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
+	switch i32 %d, label %return [
+		i32 3, label %bb22
+		i32 1, label %bb29
+	]
+
+bb22:		; preds = %bb20
+	%37 = mul i32 %x, %w		; <i32> [#uses=1]
+	%38 = udiv i32 %37, 4		; <i32> [#uses=1]
+	%.sum3 = add i32 %38, %.sum2		; <i32> [#uses=2]
+	%39 = add i32 %x, 15		; <i32> [#uses=1]
+	%40 = and i32 %39, -16		; <i32> [#uses=1]
+	%41 = add i32 %w, 15		; <i32> [#uses=1]
+	%42 = and i32 %41, -16		; <i32> [#uses=1]
+	%43 = mul i32 %40, %s		; <i32> [#uses=1]
+	%44 = icmp ugt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %44, label %bb.nph, label %bb26
+
+bb.nph:		; preds = %bb22
+	br label %bb23
+
+bb23:		; preds = %bb24, %bb.nph
+	%y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]		; <i32> [#uses=3]
+	%45 = mul i32 %y.21, %42		; <i32> [#uses=1]
+	%.sum1 = add i32 %45, %43		; <i32> [#uses=1]
+	%46 = getelementptr i8* %r, i32 %.sum1		; <i8*> [#uses=1]
+	%47 = mul i32 %y.21, %w		; <i32> [#uses=1]
+	%.sum5 = add i32 %47, %.sum3		; <i32> [#uses=1]
+	%48 = getelementptr i8* %j, i32 %.sum5		; <i8*> [#uses=1]
+	tail call void @llvm.memcpy.i32(i8* %48, i8* %46, i32 %w, i32 1)
+	br label %bb24
+
+bb24:		; preds = %bb23
+	%indvar.next5 = add i32 %y.21, 1		; <i32> [#uses=2]
+	%exitcond6 = icmp ne i32 %indvar.next5, %x		; <i1> [#uses=1]
+	br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
+
+bb24.bb26_crit_edge:		; preds = %bb24
+	br label %bb26
+
+bb26:		; preds = %bb24.bb26_crit_edge, %bb22
+	%49 = mul i32 %x, %w		; <i32> [#uses=1]
+	%.sum4 = add i32 %.sum3, %49		; <i32> [#uses=1]
+	%50 = getelementptr i8* %j, i32 %.sum4		; <i8*> [#uses=1]
+	%51 = mul i32 %x, %w		; <i32> [#uses=1]
+	%52 = udiv i32 %51, 2		; <i32> [#uses=1]
+	tail call void @llvm.memset.i32(i8* %50, i8 -128, i32 %52, i32 1)
+	ret void
+
+bb29:		; preds = %bb20, %entry
+	%53 = add i32 %w, 15		; <i32> [#uses=1]
+	%54 = and i32 %53, -16		; <i32> [#uses=1]
+	%55 = icmp ugt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %55, label %bb.nph11, label %bb33
+
+bb.nph11:		; preds = %bb29
+	br label %bb30
+
+bb30:		; preds = %bb31, %bb.nph11
+	%y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]		; <i32> [#uses=3]
+	%56 = mul i32 %y.310, %54		; <i32> [#uses=1]
+	%57 = getelementptr i8* %r, i32 %56		; <i8*> [#uses=1]
+	%58 = mul i32 %y.310, %w		; <i32> [#uses=1]
+	%59 = getelementptr i8* %j, i32 %58		; <i8*> [#uses=1]
+	tail call void @llvm.memcpy.i32(i8* %59, i8* %57, i32 %w, i32 1)
+	br label %bb31
+
+bb31:		; preds = %bb30
+	%indvar.next13 = add i32 %y.310, 1		; <i32> [#uses=2]
+	%exitcond14 = icmp ne i32 %indvar.next13, %x		; <i1> [#uses=1]
+	br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
+
+bb31.bb33_crit_edge:		; preds = %bb31
+	br label %bb33
+
+bb33:		; preds = %bb31.bb33_crit_edge, %bb29
+	%60 = mul i32 %x, %w		; <i32> [#uses=1]
+	%61 = getelementptr i8* %j, i32 %60		; <i8*> [#uses=1]
+	%62 = mul i32 %x, %w		; <i32> [#uses=1]
+	%63 = udiv i32 %62, 2		; <i32> [#uses=1]
+	tail call void @llvm.memset.i32(i8* %61, i8 -128, i32 %63, i32 1)
+	ret void
+
+return:		; preds = %bb20
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
diff --git a/final/test/CodeGen/X86/optimize-max-1.ll b/final/test/CodeGen/X86/optimize-max-1.ll
new file mode 100644
index 00000000000..ad6c24dce00
--- /dev/null
+++ b/final/test/CodeGen/X86/optimize-max-1.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -march=x86-64 | not grep cmov
+
+; LSR should be able to eliminate both smax and umax expressions
+; in loop trip counts.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define void @fs(double* nocapture %p, i64 %n) nounwind {
+entry:
+	%tmp = icmp slt i64 %n, 1		; <i1> [#uses=1]
+	%smax = select i1 %tmp, i64 1, i64 %n		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %scevgep, align 8
+	%0 = add i64 %i.0, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %0, %smax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
+
+define void @bs(double* nocapture %p, i64 %n) nounwind {
+entry:
+	%tmp = icmp sge i64 %n, 1		; <i1> [#uses=1]
+	%smax = select i1 %tmp, i64 %n, i64 1		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %scevgep, align 8
+	%0 = add i64 %i.0, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %0, %smax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
+
+define void @fu(double* nocapture %p, i64 %n) nounwind {
+entry:
+	%tmp = icmp eq i64 %n, 0		; <i1> [#uses=1]
+	%umax = select i1 %tmp, i64 1, i64 %n		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %scevgep, align 8
+	%0 = add i64 %i.0, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %0, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
+
+define void @bu(double* nocapture %p, i64 %n) nounwind {
+entry:
+	%tmp = icmp ne i64 %n, 0		; <i1> [#uses=1]
+	%umax = select i1 %tmp, i64 %n, i64 1		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%i.0 = phi i64 [ 0, %entry ], [ %0, %bb ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %scevgep, align 8
+	%0 = add i64 %i.0, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %0, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
diff --git a/final/test/CodeGen/X86/optimize-max-2.ll b/final/test/CodeGen/X86/optimize-max-2.ll
new file mode 100644
index 00000000000..8851c5b1a30
--- /dev/null
+++ b/final/test/CodeGen/X86/optimize-max-2.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep cmov %t | count 2
+; RUN: grep jne %t | count 1
+
+; LSR's OptimizeMax function shouldn't try to eliminate this max, because
+; it has three operands.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define void @foo(double* nocapture %p, i64 %x, i64 %y) nounwind {
+entry:
+	%tmp = icmp eq i64 %y, 0		; <i1> [#uses=1]
+	%umax = select i1 %tmp, i64 1, i64 %y		; <i64> [#uses=2]
+	%tmp8 = icmp ugt i64 %umax, %x		; <i1> [#uses=1]
+	%umax9 = select i1 %tmp8, i64 %umax, i64 %x		; <i64> [#uses=1]
+	br label %bb4
+
+bb4:		; preds = %bb4, %entry
+	%i.07 = phi i64 [ 0, %entry ], [ %2, %bb4 ]		; <i64> [#uses=2]
+	%scevgep = getelementptr double* %p, i64 %i.07		; <double*> [#uses=2]
+	%0 = load double* %scevgep, align 8		; <double> [#uses=1]
+	%1 = fmul double %0, 2.000000e+00		; <double> [#uses=1]
+	store double %1, double* %scevgep, align 8
+	%2 = add i64 %i.07, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %2, %umax9		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb4
+
+return:		; preds = %bb4
+	ret void
+}
diff --git a/final/test/CodeGen/X86/optimize-max-3.ll b/final/test/CodeGen/X86/optimize-max-3.ll
new file mode 100644
index 00000000000..b90413d40a0
--- /dev/null
+++ b/final/test/CodeGen/X86/optimize-max-3.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s
+
+; LSR's OptimizeMax should eliminate the select (max).
+
+; CHECK: foo:
+; CHECK-NOT: cmov
+; CHECK: jle
+
+define void @foo(i64 %n, double* nocapture %p) nounwind {
+entry:
+  %cmp6 = icmp slt i64 %n, 0                      ; <i1> [#uses=1]
+  br i1 %cmp6, label %for.end, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  %tmp = icmp sgt i64 %n, 0                       ; <i1> [#uses=1]
+  %n.op = add i64 %n, 1                           ; <i64> [#uses=1]
+  %tmp1 = select i1 %tmp, i64 %n.op, i64 1        ; <i64> [#uses=1]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i = phi i64 [ %i.next, %for.body ], [ 0, %for.body.preheader ] ; <i64> [#uses=2]
+  %arrayidx = getelementptr double* %p, i64 %i    ; <double*> [#uses=2]
+  %t4 = load double* %arrayidx                    ; <double> [#uses=1]
+  %mul = fmul double %t4, 2.200000e+00            ; <double> [#uses=1]
+  store double %mul, double* %arrayidx
+  %i.next = add nsw i64 %i, 1                     ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %i.next, %tmp1          ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; In this case, one of the max operands is another max, which folds,
+; leaving a two-operand max which doesn't fit the usual pattern.
+; OptimizeMax should handle this case.
+; PR7454
+
+;      CHECK: _Z18GenerateStatusPagei:
+
+;      CHECK:         jle
+;  CHECK-NOT:         cmov
+;      CHECK:         xorl    {{%edi, %edi|%ecx, %ecx}}
+; CHECK-NEXT:         align
+; CHECK-NEXT: BB1_2:
+; CHECK-NEXT:         callq
+; CHECK-NEXT:         incl    [[BX:%ebx|%esi]]
+; CHECK-NEXT:         cmpl    [[R14:%r14d|%edi]], [[BX]]
+; CHECK-NEXT:         movq    %rax, %r{{di|cx}}
+; CHECK-NEXT:         jl
+
+define void @_Z18GenerateStatusPagei(i32 %jobs_to_display) nounwind {
+entry:
+  %cmp.i = icmp sgt i32 %jobs_to_display, 0       ; <i1> [#uses=1]
+  %tmp = select i1 %cmp.i, i32 %jobs_to_display, i32 0 ; <i32> [#uses=3]
+  %cmp8 = icmp sgt i32 %tmp, 0                    ; <i1> [#uses=1]
+  br i1 %cmp8, label %bb.nph, label %for.end
+
+bb.nph:                                           ; preds = %entry
+  %tmp11 = icmp sgt i32 %tmp, 1                   ; <i1> [#uses=1]
+  %smax = select i1 %tmp11, i32 %tmp, i32 1       ; <i32> [#uses=1]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %bb.nph
+  %i.010 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] ; <i32> [#uses=1]
+  %it.0.09 = phi float* [ null, %bb.nph ], [ %call.i, %for.body ] ; <float*> [#uses=1]
+  %call.i = call float* @_ZSt18_Rb_tree_decrementPKSt18_Rb_tree_node_base(float* %it.0.09) ; <float*> [#uses=1]
+  %inc = add nsw i32 %i.010, 1                    ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %inc, %smax             ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+declare float* @_ZSt18_Rb_tree_decrementPKSt18_Rb_tree_node_base(float*)
diff --git a/final/test/CodeGen/X86/or-address.ll b/final/test/CodeGen/X86/or-address.ll
new file mode 100644
index 00000000000..6447680e623
--- /dev/null
+++ b/final/test/CodeGen/X86/or-address.ll
@@ -0,0 +1,90 @@
+; PR1135
+; RUN: llc %s -o - | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.3"
+
+
+; CHECK: 	movl	%{{.*}},   (%rdi,%rdx,4)
+; CHECK:	movl	%{{.*}},  8(%rdi,%rdx,4)
+; CHECK:	movl	%{{.*}},  4(%rdi,%rdx,4)
+; CHECK:	movl	%{{.*}}, 12(%rdi,%rdx,4)
+
+define void @test(i32* nocapture %array, i32 %r0) nounwind ssp noredzone {
+bb.nph:
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %j.010 = phi i8 [ 0, %bb.nph ], [ %14, %bb ]    ; <i8> [#uses=1]
+  %k.19 = phi i8 [ 0, %bb.nph ], [ %.k.1, %bb ]   ; <i8> [#uses=1]
+  %i0.08 = phi i8 [ 0, %bb.nph ], [ %15, %bb ]    ; <i8> [#uses=3]
+  %0 = icmp slt i8 %i0.08, 4                      ; <i1> [#uses=1]
+  %iftmp.0.0 = select i1 %0, i8 %i0.08, i8 0      ; <i8> [#uses=2]
+  %1 = icmp eq i8 %i0.08, 4                       ; <i1> [#uses=1]
+  %2 = zext i1 %1 to i8                           ; <i8> [#uses=1]
+  %.k.1 = add i8 %2, %k.19                        ; <i8> [#uses=2]
+  %3 = shl i8 %.k.1, 2                            ; <i8> [#uses=1]
+  %4 = add i8 %3, %iftmp.0.0                      ; <i8> [#uses=1]
+  %5 = shl i8 %4, 2                               ; <i8> [#uses=1]
+  %6 = zext i8 %5 to i64                          ; <i64> [#uses=4]
+  %7 = getelementptr inbounds i32* %array, i64 %6 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %7, align 4
+  %8 = or i64 %6, 2                               ; <i64> [#uses=1]
+  %9 = getelementptr inbounds i32* %array, i64 %8 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %9, align 4
+  %10 = or i64 %6, 1                              ; <i64> [#uses=1]
+  %11 = getelementptr inbounds i32* %array, i64 %10 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %11, align 4
+  %12 = or i64 %6, 3                              ; <i64> [#uses=1]
+  %13 = getelementptr inbounds i32* %array, i64 %12 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %13, align 4
+  %14 = add nsw i8 %j.010, 1                      ; <i8> [#uses=2]
+  %15 = add i8 %iftmp.0.0, 1                      ; <i8> [#uses=1]
+  %exitcond = icmp eq i8 %14, 32                  ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb
+  ret void
+}
+
+; CHECK: test1:
+; CHECK: 	movl	%{{.*}},   (%rdi,%rcx,4)
+; CHECK:	movl	%{{.*}},  8(%rdi,%rcx,4)
+; CHECK:	movl	%{{.*}},  4(%rdi,%rcx,4)
+; CHECK:	movl	%{{.*}}, 12(%rdi,%rcx,4)
+
+define void @test1(i32* nocapture %array, i32 %r0, i8 signext %k, i8 signext %i0) nounwind {
+bb.nph:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %bb.nph
+  %j.065 = phi i8 [ 0, %bb.nph ], [ %inc52, %for.body ] ; <i8> [#uses=1]
+  %i0.addr.064 = phi i8 [ %i0, %bb.nph ], [ %add, %for.body ] ; <i8> [#uses=3]
+  %k.addr.163 = phi i8 [ %k, %bb.nph ], [ %inc.k.addr.1, %for.body ] ; <i8> [#uses=1]
+  %cmp5 = icmp slt i8 %i0.addr.064, 4             ; <i1> [#uses=1]
+  %cond = select i1 %cmp5, i8 %i0.addr.064, i8 0  ; <i8> [#uses=2]
+  %cmp12 = icmp eq i8 %i0.addr.064, 4             ; <i1> [#uses=1]
+  %inc = zext i1 %cmp12 to i8                     ; <i8> [#uses=1]
+  %inc.k.addr.1 = add i8 %inc, %k.addr.163        ; <i8> [#uses=2]
+  %mul = shl i8 %cond, 2                          ; <i8> [#uses=1]
+  %mul22 = shl i8 %inc.k.addr.1, 4                ; <i8> [#uses=1]
+  %add23 = add i8 %mul22, %mul                    ; <i8> [#uses=1]
+  %idxprom = zext i8 %add23 to i64                ; <i64> [#uses=4]
+  %arrayidx = getelementptr inbounds i32* %array, i64 %idxprom ; <i32*> [#uses=1]
+  store i32 %r0, i32* %arrayidx
+  %add3356 = or i64 %idxprom, 2                   ; <i64> [#uses=1]
+  %arrayidx36 = getelementptr inbounds i32* %array, i64 %add3356 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %arrayidx36
+  %add4058 = or i64 %idxprom, 1                   ; <i64> [#uses=1]
+  %arrayidx43 = getelementptr inbounds i32* %array, i64 %add4058 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %arrayidx43
+  %add4760 = or i64 %idxprom, 3                   ; <i64> [#uses=1]
+  %arrayidx50 = getelementptr inbounds i32* %array, i64 %add4760 ; <i32*> [#uses=1]
+  store i32 %r0, i32* %arrayidx50
+  %inc52 = add nsw i8 %j.065, 1                   ; <i8> [#uses=2]
+  %add = add i8 %cond, 1                          ; <i8> [#uses=1]
+  %exitcond = icmp eq i8 %inc52, 32               ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+}
diff --git a/final/test/CodeGen/X86/or-branch.ll b/final/test/CodeGen/X86/or-branch.ll
new file mode 100644
index 00000000000..9ebf8901b77
--- /dev/null
+++ b/final/test/CodeGen/X86/or-branch.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86  | not grep set
+
+define void @foo(i32 %X, i32 %Y, i32 %Z) nounwind {
+entry:
+	%tmp = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	%tmp.upgrd.1 = icmp eq i32 %X, 0		; <i1> [#uses=1]
+	%tmp3 = icmp slt i32 %Y, 5		; <i1> [#uses=1]
+	%tmp4 = or i1 %tmp3, %tmp.upgrd.1		; <i1> [#uses=1]
+	br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock
+
+cond_true:		; preds = %entry
+	%tmp5 = tail call i32 (...)* @bar( )		; <i32> [#uses=0]
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+declare i32 @bar(...)
diff --git a/final/test/CodeGen/X86/overlap-shift.ll b/final/test/CodeGen/X86/overlap-shift.ll
new file mode 100644
index 00000000000..c1fc041e7d9
--- /dev/null
+++ b/final/test/CodeGen/X86/overlap-shift.ll
@@ -0,0 +1,19 @@
+;; X's live range extends beyond the shift, so the register allocator
+;; cannot coalesce it with Y.  Because of this, a copy needs to be
+;; emitted before the shift to save the register value before it is
+;; clobbered.  However, this copy is not needed if the register
+;; allocator turns the shift into an LEA.  This also occurs for ADD.
+
+; Check that the shift gets turned into an LEA.
+
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   not grep {mov E.X, E.X}
+
+@G = external global i32                ; <i32*> [#uses=1]
+
+define i32 @test1(i32 %X) {
+        %Z = shl i32 %X, 2              ; <i32> [#uses=1]
+        volatile store i32 %Z, i32* @G
+        ret i32 %X
+}
+
diff --git a/final/test/CodeGen/X86/packed_struct.ll b/final/test/CodeGen/X86/packed_struct.ll
new file mode 100644
index 00000000000..da6e8f8745f
--- /dev/null
+++ b/final/test/CodeGen/X86/packed_struct.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep foos+5 %t
+; RUN: grep foos+1 %t
+; RUN: grep foos+9 %t
+; RUN: grep bara+19 %t
+; RUN: grep bara+4 %t
+
+; make sure we compute the correct offset for a packed structure
+
+;Note: codegen for this could change rendering the above checks wrong
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.anon = type <{ i8, i32, i32, i32 }>
+@foos = external global %struct.anon		; <%struct.anon*> [#uses=3]
+@bara = weak global [4 x <{ i32, i8 }>] zeroinitializer		; <[4 x <{ i32, i8 }>]*> [#uses=2]
+
+define i32 @foo() nounwind {
+entry:
+	%tmp = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 1)		; <i32> [#uses=1]
+	%tmp3 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 2)		; <i32> [#uses=1]
+	%tmp6 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 3)		; <i32> [#uses=1]
+	%tmp4 = add i32 %tmp3, %tmp		; <i32> [#uses=1]
+	%tmp7 = add i32 %tmp4, %tmp6		; <i32> [#uses=1]
+	ret i32 %tmp7
+}
+
+define i8 @bar() nounwind {
+entry:
+	%tmp = load i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 0, i32 1)		; <i8> [#uses=1]
+	%tmp4 = load i8* getelementptr ([4 x <{ i32, i8 }>]* @bara, i32 0, i32 3, i32 1)		; <i8> [#uses=1]
+	%tmp5 = add i8 %tmp4, %tmp		; <i8> [#uses=1]
+	ret i8 %tmp5
+}
diff --git a/final/test/CodeGen/X86/palignr-2.ll b/final/test/CodeGen/X86/palignr-2.ll
new file mode 100644
index 00000000000..116d4c71814
--- /dev/null
+++ b/final/test/CodeGen/X86/palignr-2.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 -mattr=+ssse3 | FileCheck %s
+; rdar://7341330
+
+@a = global [4 x i32] [i32 4, i32 5, i32 6, i32 7], align 16 ; <[4 x i32]*> [#uses=1]
+@c = common global [4 x i32] zeroinitializer, align 16 ; <[4 x i32]*> [#uses=1]
+@b = global [4 x i32] [i32 0, i32 1, i32 2, i32 3], align 16 ; <[4 x i32]*> [#uses=1]
+
+define void @t1(<2 x i64> %a, <2 x i64> %b) nounwind ssp {
+entry:
+; CHECK: t1:
+; palignr $3, %xmm1, %xmm0
+  %0 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %a, <2 x i64> %b, i8 24) nounwind readnone
+  store <2 x i64> %0, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16
+  ret void
+}
+
+declare <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64>, <2 x i64>, i8) nounwind readnone
+
+define void @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; palignr $4, _b, %xmm0
+  %0 = load <2 x i64>* bitcast ([4 x i32]* @b to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
+  %1 = load <2 x i64>* bitcast ([4 x i32]* @a to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1]
+  %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 32) nounwind readnone
+  store <2 x i64> %2, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16
+  ret void
+}
diff --git a/final/test/CodeGen/X86/palignr.ll b/final/test/CodeGen/X86/palignr.ll
new file mode 100644
index 00000000000..3812c7238c4
--- /dev/null
+++ b/final/test/CodeGen/X86/palignr.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck --check-prefix=YONAH %s
+
+define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: pshufd
+; CHECK-YONAH: pshufd
+  %C = shufflevector <4 x i32> %A, <4 x i32> undef, <4 x i32> < i32 1, i32 2, i32 3, i32 0 >
+	ret <4 x i32> %C
+}
+
+define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: palignr
+; CHECK-YONAH: shufps
+  %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 3, i32 4 >
+	ret <4 x i32> %C
+}
+
+define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: palignr
+  %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 1, i32 2, i32 undef, i32 4 >
+	ret <4 x i32> %C
+}
+
+define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: palignr
+  %C = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
+	ret <4 x i32> %C
+}
+
+define <4 x float> @test5(<4 x float> %A, <4 x float> %B) nounwind {
+; CHECK: palignr
+  %C = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 6, i32 7, i32 undef, i32 1 >
+	ret <4 x float> %C
+}
+
+define <8 x i16> @test6(<8 x i16> %A, <8 x i16> %B) nounwind {
+; CHECK: palignr
+  %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 3, i32 4, i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10 >
+	ret <8 x i16> %C
+}
+
+define <8 x i16> @test7(<8 x i16> %A, <8 x i16> %B) nounwind {
+; CHECK: palignr
+  %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 6, i32 undef, i32 8, i32 9, i32 10, i32 11, i32 12 >
+	ret <8 x i16> %C
+}
+
+define <8 x i16> @test8(<8 x i16> %A, <8 x i16> %B) nounwind {
+; CHECK: palignr
+  %C = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 undef, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0 >
+	ret <8 x i16> %C
+}
+
+define <16 x i8> @test9(<16 x i8> %A, <16 x i8> %B) nounwind {
+; CHECK: palignr
+  %C = shufflevector <16 x i8> %A, <16 x i8> %B, <16 x i32> < i32 5, i32 6, i32 7, i32 undef, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20 >
+	ret <16 x i8> %C
+}
diff --git a/final/test/CodeGen/X86/peep-test-0.ll b/final/test/CodeGen/X86/peep-test-0.ll
new file mode 100644
index 00000000000..e521d8e3785
--- /dev/null
+++ b/final/test/CodeGen/X86/peep-test-0.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: not grep cmp %t
+; RUN: not grep test %t
+
+define void @loop(i64 %n, double* nocapture %d) nounwind {
+entry:
+	br label %bb
+
+bb:
+	%indvar = phi i64 [ %n, %entry ], [ %indvar.next, %bb ]
+	%i.03 = add i64 %indvar, %n
+	%0 = getelementptr double* %d, i64 %i.03
+	%1 = load double* %0, align 8
+	%2 = fmul double %1, 3.000000e+00
+	store double %2, double* %0, align 8
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 0
+	br i1 %exitcond, label %return, label %bb
+
+return:
+	ret void
+}
diff --git a/final/test/CodeGen/X86/peep-test-1.ll b/final/test/CodeGen/X86/peep-test-1.ll
new file mode 100644
index 00000000000..f83f0f6aa6f
--- /dev/null
+++ b/final/test/CodeGen/X86/peep-test-1.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep dec %t | count 1
+; RUN: not grep test %t
+; RUN: not grep cmp %t
+
+define void @foo(i32 %n, double* nocapture %p) nounwind {
+	br label %bb
+
+bb:
+	%indvar = phi i32 [ 0, %0 ], [ %indvar.next, %bb ]
+	%i.03 = sub i32 %n, %indvar
+	%1 = getelementptr double* %p, i32 %i.03
+	%2 = load double* %1, align 4
+	%3 = fmul double %2, 2.930000e+00
+	store double %3, double* %1, align 4
+	%4 = add i32 %i.03, -1
+	%phitmp = icmp slt i32 %4, 0
+	%indvar.next = add i32 %indvar, 1
+	br i1 %phitmp, label %bb, label %return
+
+return:
+	ret void
+}
diff --git a/final/test/CodeGen/X86/peep-test-2.ll b/final/test/CodeGen/X86/peep-test-2.ll
new file mode 100644
index 00000000000..27451729759
--- /dev/null
+++ b/final/test/CodeGen/X86/peep-test-2.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 | grep testl
+
+; It's tempting to eliminate the testl instruction here and just use the
+; EFLAGS value from the incl, however it can't be known whether the add
+; will overflow, and if it does the incl would set OF, and the
+; subsequent setg would return true.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define i32 @f(i32 %j) nounwind readnone {
+entry:
+	%0 = add i32 %j, 1		; <i32> [#uses=1]
+	%1 = icmp sgt i32 %0, 0		; <i1> [#uses=1]
+	%2 = zext i1 %1 to i32		; <i32> [#uses=1]
+	ret i32 %2
+}
diff --git a/final/test/CodeGen/X86/peep-test-3.ll b/final/test/CodeGen/X86/peep-test-3.ll
new file mode 100644
index 00000000000..a34a9784cdf
--- /dev/null
+++ b/final/test/CodeGen/X86/peep-test-3.ll
@@ -0,0 +1,89 @@
+; RUN: llc < %s -march=x86 -post-RA-scheduler=false | FileCheck %s
+; rdar://7226797
+
+; LLVM should omit the testl and use the flags result from the orl.
+
+; CHECK: or:
+define void @or(float* %A, i32 %IA, i32 %N) nounwind {
+entry:
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      orl %ecx, %edx
+; CHECK-NEXT: je
+  %3 = or i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* %A, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+; CHECK: xor:
+define void @xor(float* %A, i32 %IA, i32 %N) nounwind {
+entry:
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+; CHECK:      xorl $1, %e
+; CHECK-NEXT: je
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+  %3 = xor i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* %A, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+; CHECK: and:
+define void @and(float* %A, i32 %IA, i32 %N, i8* %p) nounwind {
+entry:
+  store i8 0, i8* %p
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      andl  $3, %
+; CHECK-NEXT: movb  %
+; CHECK-NEXT: je
+  %3 = and i32 %2, %1                              ; <i32> [#uses=1]
+  %t = trunc i32 %3 to i8
+  store i8 %t, i8* %p
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* null, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+; Just like @and, but without the trunc+store. This should use a testb
+; instead of an andl.
+; CHECK: test:
+define void @test(float* %A, i32 %IA, i32 %N, i8* %p) nounwind {
+entry:
+  store i8 0, i8* %p
+  %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
+  %1 = and i32 %0, 3                              ; <i32> [#uses=1]
+  %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
+; CHECK:      testb $3, %
+; CHECK-NEXT: je
+  %3 = and i32 %2, %1                              ; <i32> [#uses=1]
+  %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
+  br i1 %4, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  store float 0.000000e+00, float* null, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/final/test/CodeGen/X86/peep-vector-extract-concat.ll b/final/test/CodeGen/X86/peep-vector-extract-concat.ll
new file mode 100644
index 00000000000..606a9be68bd
--- /dev/null
+++ b/final/test/CodeGen/X86/peep-vector-extract-concat.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2,-sse41 | FileCheck %s
+; CHECK: pshufd $3, %xmm0, %xmm0
+
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2,-sse41 | FileCheck %s -check-prefix=WIN64
+; %a is passed indirectly on Win64.
+; WIN64: movss   12(%rcx), %xmm0
+
+define float @foo(<8 x float> %a) nounwind {
+  %c = extractelement <8 x float> %a, i32 3
+  ret float %c
+}
diff --git a/final/test/CodeGen/X86/peep-vector-extract-insert.ll b/final/test/CodeGen/X86/peep-vector-extract-insert.ll
new file mode 100644
index 00000000000..5e18044e7e1
--- /dev/null
+++ b/final/test/CodeGen/X86/peep-vector-extract-insert.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 | grep {pxor	%xmm0, %xmm0} | count 2
+
+define float @foo(<4 x float> %a) {
+  %b = insertelement <4 x float> %a, float 0.0, i32 3
+  %c = extractelement <4 x float> %b, i32 3
+  ret float %c
+}
+define float @bar(float %a) {
+  %b = insertelement <4 x float> <float 0x400B333340000000, float 4.5, float 0.0, float 0x4022666660000000>, float %a, i32 3
+  %c = extractelement <4 x float> %b, i32 2
+  ret float %c
+}
diff --git a/final/test/CodeGen/X86/personality.ll b/final/test/CodeGen/X86/personality.ll
new file mode 100644
index 00000000000..6789bb0c0fb
--- /dev/null
+++ b/final/test/CodeGen/X86/personality.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i386-apple-darwin9 | FileCheck %s -check-prefix=X32
+; PR1632
+
+define void @_Z1fv() {
+entry:
+	invoke void @_Z1gv( )
+			to label %return unwind label %unwind
+
+unwind:		; preds = %entry
+	br i1 false, label %eh_then, label %cleanup20
+
+eh_then:		; preds = %unwind
+	invoke void @__cxa_end_catch( )
+			to label %return unwind label %unwind10
+
+unwind10:		; preds = %eh_then
+	%eh_select13 = tail call i64 (i8*, i8*, ...)* @llvm.eh.selector.i64( i8* null, i8* bitcast (void ()* @__gxx_personality_v0 to i8*), i32 1 )		; <i32> [#uses=2]
+	%tmp18 = icmp slt i64 %eh_select13, 0		; <i1> [#uses=1]
+	br i1 %tmp18, label %filter, label %cleanup20
+
+filter:		; preds = %unwind10
+	unreachable
+
+cleanup20:		; preds = %unwind10, %unwind
+	%eh_selector.0 = phi i64 [ 0, %unwind ], [ %eh_select13, %unwind10 ]		; <i32> [#uses=0]
+	ret void
+
+return:		; preds = %eh_then, %entry
+	ret void
+}
+
+declare void @_Z1gv()
+
+declare i64 @llvm.eh.selector.i64(i8*, i8*, ...)
+
+declare void @__gxx_personality_v0()
+
+declare void @__cxa_end_catch()
+
+; X64: Leh_frame_common_begin0:
+; X64: .long	___gxx_personality_v0@GOTPCREL+4
+
+; X32: Leh_frame_common_begin0:
+; X32: .long	L___gxx_personality_v0$non_lazy_ptr-
+; ....
+
+; X32: .section	__IMPORT,__pointers,non_lazy_symbol_pointers
+; X32: L___gxx_personality_v0$non_lazy_ptr:
+; X32:   .indirect_symbol ___gxx_personality_v0
diff --git a/final/test/CodeGen/X86/phi-bit-propagation.ll b/final/test/CodeGen/X86/phi-bit-propagation.ll
new file mode 100644
index 00000000000..94c97229b09
--- /dev/null
+++ b/final/test/CodeGen/X86/phi-bit-propagation.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+%"class.std::bitset" = type { [8 x i8] }
+
+define zeroext i1 @_Z3fooPjmS_mRSt6bitsetILm32EE(i32* nocapture %a, i64 %asize, i32* nocapture %b, i64 %bsize, %"class.std::bitset"* %bits) nounwind readonly ssp noredzone {
+entry:
+  %tmp.i.i.i.i = bitcast %"class.std::bitset"* %bits to i64*
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %conv = zext i32 %0 to i64
+  %cmp = icmp eq i64 %conv, %bsize
+  br i1 %cmp, label %return, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32* %b, i64 %conv
+  %tmp5 = load i32* %arrayidx, align 4
+  %conv6 = zext i32 %tmp5 to i64
+  %rem.i.i.i.i = and i64 %conv6, 63
+  %tmp3.i = load i64* %tmp.i.i.i.i, align 8
+  %shl.i.i = shl i64 1, %rem.i.i.i.i
+  %and.i = and i64 %shl.i.i, %tmp3.i
+  %cmp.i = icmp eq i64 %and.i, 0
+  br i1 %cmp.i, label %for.inc, label %return
+
+for.inc:                                          ; preds = %for.body
+  %inc = add i32 %0, 1
+  br label %for.cond
+
+return:                                           ; preds = %for.body, %for.cond
+; CHECK-NOT: and
+  %retval.0 = phi i1 [ true, %for.body ], [ false, %for.cond ]
+  ret i1 %retval.0
+}
+
+; This test case caused an assertion failure; see PR9324.
+define void @func_37() noreturn nounwind ssp {
+entry:
+  br i1 undef, label %lbl_919, label %entry.for.inc_crit_edge
+
+entry.for.inc_crit_edge:                          ; preds = %entry
+  br label %for.inc
+
+lbl_919:                                          ; preds = %for.cond7.preheader, %entry
+  br label %for.cond7.preheader
+
+for.cond7.preheader:                              ; preds = %for.inc, %lbl_919
+  %storemerge.ph = phi i8 [ 0, %lbl_919 ], [ %add, %for.inc ]
+  br i1 undef, label %for.inc, label %lbl_919
+
+for.inc:                                          ; preds = %for.cond7.preheader, %entry.for.inc_crit_edge
+  %add = add i8 undef, 1
+  br label %for.cond7.preheader
+}
diff --git a/final/test/CodeGen/X86/phi-immediate-factoring.ll b/final/test/CodeGen/X86/phi-immediate-factoring.ll
new file mode 100644
index 00000000000..ef02af2d785
--- /dev/null
+++ b/final/test/CodeGen/X86/phi-immediate-factoring.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=x86 -stats |& grep {Number of blocks eliminated} | grep 6
+; PR1296
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define i32 @foo(i32 %A, i32 %B, i32 %C) nounwind {
+entry:
+	switch i32 %A, label %out [
+		 i32 1, label %bb
+		 i32 0, label %bb13
+		 i32 2, label %bb35
+	]
+
+bb:		; preds = %cond_next, %entry
+	%i.144.1 = phi i32 [ 0, %entry ], [ %tmp7, %cond_next ]		; <i32> [#uses=2]
+	%tmp4 = and i32 %i.144.1, %B		; <i32> [#uses=1]
+	icmp eq i32 %tmp4, 0		; <i1>:0 [#uses=1]
+	br i1 %0, label %cond_next, label %out
+
+cond_next:		; preds = %bb
+	%tmp7 = add i32 %i.144.1, 1		; <i32> [#uses=2]
+	icmp slt i32 %tmp7, 1000		; <i1>:1 [#uses=1]
+	br i1 %1, label %bb, label %out
+
+bb13:		; preds = %cond_next18, %entry
+	%i.248.1 = phi i32 [ 0, %entry ], [ %tmp20, %cond_next18 ]		; <i32> [#uses=2]
+	%tmp16 = and i32 %i.248.1, %C		; <i32> [#uses=1]
+	icmp eq i32 %tmp16, 0		; <i1>:2 [#uses=1]
+	br i1 %2, label %cond_next18, label %out
+
+cond_next18:		; preds = %bb13
+	%tmp20 = add i32 %i.248.1, 1		; <i32> [#uses=2]
+	icmp slt i32 %tmp20, 1000		; <i1>:3 [#uses=1]
+	br i1 %3, label %bb13, label %out
+
+bb27:		; preds = %bb35
+	%tmp30 = and i32 %i.3, %C		; <i32> [#uses=1]
+	icmp eq i32 %tmp30, 0		; <i1>:4 [#uses=1]
+	br i1 %4, label %cond_next32, label %out
+
+cond_next32:		; preds = %bb27
+	%indvar.next = add i32 %i.3, 1		; <i32> [#uses=1]
+	br label %bb35
+
+bb35:		; preds = %entry, %cond_next32
+	%i.3 = phi i32 [ %indvar.next, %cond_next32 ], [ 0, %entry ]		; <i32> [#uses=3]
+	icmp slt i32 %i.3, 1000		; <i1>:5 [#uses=1]
+	br i1 %5, label %bb27, label %out
+
+out:		; preds = %bb27, %bb35, %bb13, %cond_next18, %bb, %cond_next, %entry
+	%result.0 = phi i32 [ 0, %entry ], [ 1, %bb ], [ 0, %cond_next ], [ 1, %bb13 ], [ 0, %cond_next18 ], [ 1, %bb27 ], [ 0, %bb35 ]		; <i32> [#uses=1]
+	ret i32 %result.0
+}
diff --git a/final/test/CodeGen/X86/phys-reg-local-regalloc.ll b/final/test/CodeGen/X86/phys-reg-local-regalloc.ll
new file mode 100644
index 00000000000..8b9ea17c4e2
--- /dev/null
+++ b/final/test/CodeGen/X86/phys-reg-local-regalloc.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=fast | FileCheck %s
+; RUN: llc -O0 < %s -march=x86 -mtriple=i386-apple-darwin9 -regalloc=fast | FileCheck %s
+; CHECKed instructions should be the same with or without -O0.
+
+@.str = private constant [12 x i8] c"x + y = %i\0A\00", align 1 ; <[12 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+; CHECK: movl 24(%esp), %eax
+; CHECK-NOT: movl
+; CHECK: movl	%eax, 36(%esp)
+; CHECK-NOT: movl
+; CHECK: movl 28(%esp), %ebx
+; CHECK-NOT: movl
+; CHECK: movl	%ebx, 40(%esp)
+; CHECK-NOT: movl
+; CHECK: addl %ebx, %eax
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %"%ebx" = alloca i32                            ; <i32*> [#uses=1]
+  %"%eax" = alloca i32                            ; <i32*> [#uses=2]
+  %result = alloca i32                            ; <i32*> [#uses=2]
+  %y = alloca i32                                 ; <i32*> [#uses=2]
+  %x = alloca i32                                 ; <i32*> [#uses=2]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 1, i32* %x, align 4
+  store i32 2, i32* %y, align 4
+  call void asm sideeffect alignstack "# top of block", "~{dirflag},~{fpsr},~{flags},~{edi},~{esi},~{edx},~{ecx},~{eax}"() nounwind
+  %asmtmp = call i32 asm sideeffect alignstack "movl $1, $0", "=={eax},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32* %x) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp, i32* %"%eax"
+  %asmtmp1 = call i32 asm sideeffect alignstack "movl $1, $0", "=={ebx},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32* %y) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp1, i32* %"%ebx"
+  %1 = call i32 asm "", "={bx}"() nounwind        ; <i32> [#uses=1]
+  %2 = call i32 asm "", "={ax}"() nounwind        ; <i32> [#uses=1]
+  %asmtmp2 = call i32 asm sideeffect alignstack "addl $1, $0", "=={eax},{ebx},{eax},~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %1, i32 %2) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp2, i32* %"%eax"
+  %3 = call i32 asm "", "={ax}"() nounwind        ; <i32> [#uses=1]
+  call void asm sideeffect alignstack "movl $0, $1", "{eax},*m,~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %3, i32* %result) nounwind
+  %4 = load i32* %result, align 4                 ; <i32> [#uses=1]
+  %5 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str, i32 0, i32 0), i32 %4) nounwind ; <i32> [#uses=0]
+  store i32 0, i32* %0, align 4
+  %6 = load i32* %0, align 4                      ; <i32> [#uses=1]
+  store i32 %6, i32* %retval, align 4
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval3 = load i32* %retval                    ; <i32> [#uses=1]
+  ret i32 %retval3
+}
+
+declare i32 @printf(i8*, ...) nounwind
diff --git a/final/test/CodeGen/X86/phys_subreg_coalesce-2.ll b/final/test/CodeGen/X86/phys_subreg_coalesce-2.ll
new file mode 100644
index 00000000000..13e804d94a5
--- /dev/null
+++ b/final/test/CodeGen/X86/phys_subreg_coalesce-2.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 | grep mov | count 4
+; PR2659
+
+define i32 @binomial(i32 %n, i32 %k) nounwind {
+entry:
+	%cmp = icmp ugt i32 %k, %n		; <i1> [#uses=1]
+	br i1 %cmp, label %ifthen, label %forcond.preheader
+
+forcond.preheader:		; preds = %entry
+	%cmp44 = icmp eq i32 %k, 0		; <i1> [#uses=1]
+	br i1 %cmp44, label %afterfor, label %forbody
+
+ifthen:		; preds = %entry
+	ret i32 0
+
+forbody:		; preds = %forbody, %forcond.preheader
+	%indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ]		; <i32> [#uses=3]
+	%accumulator.01 = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ]		; <i32> [#uses=1]
+	%divisor.02 = add i32 %indvar, 1		; <i32> [#uses=2]
+	%n.addr.03 = sub i32 %n, %indvar		; <i32> [#uses=1]
+	%mul = mul i32 %n.addr.03, %accumulator.01		; <i32> [#uses=1]
+	%div = udiv i32 %mul, %divisor.02		; <i32> [#uses=2]
+	%inc = add i32 %indvar, 2		; <i32> [#uses=1]
+	%cmp4 = icmp ugt i32 %inc, %k		; <i1> [#uses=1]
+	br i1 %cmp4, label %afterfor, label %forbody
+
+afterfor:		; preds = %forbody, %forcond.preheader
+	%accumulator.0.lcssa = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ]		; <i32> [#uses=1]
+	ret i32 %accumulator.0.lcssa
+}
diff --git a/final/test/CodeGen/X86/phys_subreg_coalesce-3.ll b/final/test/CodeGen/X86/phys_subreg_coalesce-3.ll
new file mode 100644
index 00000000000..f23669ed9a4
--- /dev/null
+++ b/final/test/CodeGen/X86/phys_subreg_coalesce-3.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+; rdar://5571034
+
+define void @foo(i32* nocapture %quadrant, i32* nocapture %ptr, i32 %bbSize, i32 %bbStart, i32 %shifts) nounwind ssp {
+; CHECK: foo:
+entry:
+  %j.03 = add i32 %bbSize, -1                     ; <i32> [#uses=2]
+  %0 = icmp sgt i32 %j.03, -1                     ; <i1> [#uses=1]
+  br i1 %0, label %bb.nph, label %return
+
+bb.nph:                                           ; preds = %entry
+  %tmp9 = add i32 %bbStart, %bbSize               ; <i32> [#uses=1]
+  %tmp10 = add i32 %tmp9, -1                      ; <i32> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+; CHECK: %bb
+; CHECK-NOT: movb {{.*}}l, %cl
+; CHECK: sarl %cl
+  %indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
+  %j.06 = sub i32 %j.03, %indvar                  ; <i32> [#uses=1]
+  %tmp11 = sub i32 %tmp10, %indvar                ; <i32> [#uses=1]
+  %scevgep = getelementptr i32* %ptr, i32 %tmp11  ; <i32*> [#uses=1]
+  %1 = load i32* %scevgep, align 4                ; <i32> [#uses=1]
+  %2 = ashr i32 %j.06, %shifts                    ; <i32> [#uses=1]
+  %3 = and i32 %2, 65535                          ; <i32> [#uses=1]
+  %4 = getelementptr inbounds i32* %quadrant, i32 %1 ; <i32*> [#uses=1]
+  store i32 %3, i32* %4, align 4
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %indvar.next, %bbSize   ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/final/test/CodeGen/X86/phys_subreg_coalesce.ll b/final/test/CodeGen/X86/phys_subreg_coalesce.ll
new file mode 100644
index 00000000000..2c855ce8da6
--- /dev/null
+++ b/final/test/CodeGen/X86/phys_subreg_coalesce.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=+sse2 | not grep movl
+
+	%struct.dpoint = type { double, double }
+
+define %struct.dpoint @midpoint(i64 %p1.0, i64 %p2.0) nounwind readnone {
+entry:
+	%0 = trunc i64 %p1.0 to i32		; <i32> [#uses=1]
+	%1 = sitofp i32 %0 to double		; <double> [#uses=1]
+	%2 = trunc i64 %p2.0 to i32		; <i32> [#uses=1]
+	%3 = sitofp i32 %2 to double		; <double> [#uses=1]
+	%4 = fadd double %1, %3		; <double> [#uses=1]
+	%5 = fmul double %4, 5.000000e-01		; <double> [#uses=1]
+	%6 = lshr i64 %p1.0, 32		; <i64> [#uses=1]
+	%7 = trunc i64 %6 to i32		; <i32> [#uses=1]
+	%8 = sitofp i32 %7 to double		; <double> [#uses=1]
+	%9 = lshr i64 %p2.0, 32		; <i64> [#uses=1]
+	%10 = trunc i64 %9 to i32		; <i32> [#uses=1]
+	%11 = sitofp i32 %10 to double		; <double> [#uses=1]
+	%12 = fadd double %8, %11		; <double> [#uses=1]
+	%13 = fmul double %12, 5.000000e-01		; <double> [#uses=1]
+	%mrv3 = insertvalue %struct.dpoint undef, double %5, 0		; <%struct.dpoint> [#uses=1]
+	%mrv4 = insertvalue %struct.dpoint %mrv3, double %13, 1		; <%struct.dpoint> [#uses=1]
+	ret %struct.dpoint %mrv4
+}
diff --git a/final/test/CodeGen/X86/pic-load-remat.ll b/final/test/CodeGen/X86/pic-load-remat.ll
new file mode 100644
index 00000000000..77297521cd0
--- /dev/null
+++ b/final/test/CodeGen/X86/pic-load-remat.ll
@@ -0,0 +1,47 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic | grep psllw | grep pb
+
+define void @f() nounwind  {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%tmp4403 = tail call <8 x i16> @llvm.x86.sse2.psubs.w( <8 x i16> zeroinitializer, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=2]
+	%tmp4443 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> zeroinitializer, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4609 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 3, i32 5, i32 6, i32 9 > to <8 x i16>) )		; <<8 x i16>> [#uses=1]
+	%tmp4651 = add <8 x i16> %tmp4609, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >		; <<8 x i16>> [#uses=1]
+	%tmp4658 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp4651, <8 x i16> bitcast (<4 x i32> < i32 4, i32 1, i32 2, i32 3 > to <8 x i16>) )		; <<8 x i16>> [#uses=1]
+	%tmp4669 = tail call <8 x i16> @llvm.x86.sse2.pavg.w( <8 x i16> < i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170 >, <8 x i16> %tmp4443 ) nounwind readnone 		; <<8 x i16>> [#uses=2]
+	%tmp4679 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4669, <8 x i16> %tmp4669 ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4689 = add <8 x i16> %tmp4679, %tmp4658		; <<8 x i16>> [#uses=1]
+	%tmp4700 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4689, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4708 = bitcast <8 x i16> %tmp4700 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp4772 = add <8 x i16> zeroinitializer, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >		; <<8 x i16>> [#uses=1]
+	%tmp4779 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp4772, <8 x i16> bitcast (<4 x i32> < i32 3, i32 5, i32 undef, i32 7 > to <8 x i16>) )		; <<8 x i16>> [#uses=1]
+	%tmp4810 = add <8 x i16> zeroinitializer, %tmp4779		; <<8 x i16>> [#uses=1]
+	%tmp4821 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4810, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4829 = bitcast <8 x i16> %tmp4821 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp4900 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> zeroinitializer, <8 x i16> bitcast (<4 x i32> < i32 1, i32 1, i32 2, i32 2 > to <8 x i16>) )		; <<8 x i16>> [#uses=1]
+	%tmp4911 = tail call <8 x i16> @llvm.x86.sse2.pavg.w( <8 x i16> < i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170, i16 -23170 >, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=2]
+	%tmp4921 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4911, <8 x i16> %tmp4911 ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4931 = add <8 x i16> %tmp4921, %tmp4900		; <<8 x i16>> [#uses=1]
+	%tmp4942 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4931, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4950 = bitcast <8 x i16> %tmp4942 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp4957 = tail call <8 x i16> @llvm.x86.sse2.padds.w( <8 x i16> %tmp4403, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4958 = bitcast <8 x i16> %tmp4957 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp4967 = tail call <8 x i16> @llvm.x86.sse2.psubs.w( <8 x i16> %tmp4403, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp4968 = bitcast <8 x i16> %tmp4967 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	store <2 x i64> %tmp4829, <2 x i64>* null, align 16
+	store <2 x i64> %tmp4958, <2 x i64>* null, align 16
+	store <2 x i64> %tmp4968, <2 x i64>* null, align 16
+	store <2 x i64> %tmp4950, <2 x i64>* null, align 16
+	store <2 x i64> %tmp4708, <2 x i64>* null, align 16
+	br label %bb
+}
+
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 
+
+declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone 
+
+declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 
+
+declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 
diff --git a/final/test/CodeGen/X86/pic.ll b/final/test/CodeGen/X86/pic.ll
new file mode 100644
index 00000000000..dc5fcd78dc8
--- /dev/null
+++ b/final/test/CodeGen/X86/pic.ll
@@ -0,0 +1,208 @@
+; RUN: llc < %s -mtriple=i686-pc-linux-gnu -relocation-model=pic -asm-verbose=false -post-RA-scheduler=false | FileCheck %s -check-prefix=LINUX
+
+@ptr = external global i32* 
+@dst = external global i32 
+@src = external global i32 
+
+define void @test0() nounwind {
+entry:
+    store i32* @dst, i32** @ptr
+    %tmp.s = load i32* @src
+    store i32 %tmp.s, i32* @dst
+    ret void
+    
+; LINUX:    test0:
+; LINUX:	calll	.L0$pb
+; LINUX-NEXT: .L0$pb:
+; LINUX-NEXT:	popl
+; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L0$pb),
+; LINUX:	movl	dst@GOT(%eax),
+; LINUX:	movl	ptr@GOT(%eax),
+; LINUX:	movl	src@GOT(%eax),
+; LINUX:	ret
+}
+
+@ptr2 = global i32* null
+@dst2 = global i32 0
+@src2 = global i32 0
+
+define void @test1() nounwind {
+entry:
+    store i32* @dst2, i32** @ptr2
+    %tmp.s = load i32* @src2
+    store i32 %tmp.s, i32* @dst2
+    ret void
+    
+; LINUX: test1:
+; LINUX:	calll	.L1$pb
+; LINUX-NEXT: .L1$pb:
+; LINUX-NEXT:	popl
+; LINUX:	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L1$pb), %eax
+; LINUX:	movl	dst2@GOT(%eax),
+; LINUX:	movl	ptr2@GOT(%eax),
+; LINUX:	movl	src2@GOT(%eax),
+; LINUX:	ret
+
+}
+
+declare i8* @malloc(i32)
+
+define void @test2() nounwind {
+entry:
+    %ptr = call i8* @malloc(i32 40)
+    ret void
+; LINUX: test2:
+; LINUX: 	pushl	%ebx
+; LINUX-NEXT: 	subl	$8, %esp
+; LINUX-NEXT: 	calll	.L2$pb
+; LINUX-NEXT: .L2$pb:
+; LINUX-NEXT: 	popl	%ebx
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L2$pb), %ebx
+; LINUX: 	movl	$40, (%esp)
+; LINUX: 	calll	malloc@PLT
+; LINUX: 	addl	$8, %esp
+; LINUX: 	popl	%ebx
+; LINUX: 	ret
+}
+
+@pfoo = external global void(...)* 
+
+define void @test3() nounwind {
+entry:
+    %tmp = call void(...)*(...)* @afoo()
+    store void(...)* %tmp, void(...)** @pfoo
+    %tmp1 = load void(...)** @pfoo
+    call void(...)* %tmp1()
+    ret void
+; LINUX: test3:
+; LINUX: 	calll	.L3$pb
+; LINUX-NEXT: .L3$pb:
+; LINUX: 	popl
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L3$pb), %[[REG3:e..]]
+; LINUX: 	movl	pfoo@GOT(%[[REG3]]),
+; LINUX: 	calll	afoo@PLT
+; LINUX: 	calll	*
+}
+
+declare void(...)* @afoo(...)
+
+define void @test4() nounwind {
+entry:
+    call void(...)* @foo()
+    ret void
+; LINUX: test4:
+; LINUX: calll	.L4$pb
+; LINUX: popl	%ebx
+; LINUX: addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L4$pb), %ebx
+; LINUX: calll	foo@PLT
+}
+
+declare void @foo(...)
+
+
+@ptr6 = internal global i32* null
+@dst6 = internal global i32 0
+@src6 = internal global i32 0
+
+define void @test5() nounwind {
+entry:
+    store i32* @dst6, i32** @ptr6
+    %tmp.s = load i32* @src6
+    store i32 %tmp.s, i32* @dst6
+    ret void
+    
+; LINUX: test5:
+; LINUX: 	calll	.L5$pb
+; LINUX-NEXT: .L5$pb:
+; LINUX-NEXT: 	popl	%eax
+; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L5$pb), %eax
+; LINUX: 	leal	dst6@GOTOFF(%eax), %ecx
+; LINUX: 	movl	%ecx, ptr6@GOTOFF(%eax)
+; LINUX: 	movl	src6@GOTOFF(%eax), %ecx
+; LINUX: 	movl	%ecx, dst6@GOTOFF(%eax)
+; LINUX: 	ret
+}
+
+
+;; Test constant pool references.
+define double @test6(i32 %a.u) nounwind {
+entry:
+    %tmp = icmp eq i32 %a.u,0
+    %retval = select i1 %tmp, double 4.561230e+02, double 1.234560e+02
+    ret double %retval
+
+; LINUX: .LCPI6_0:
+
+; LINUX: test6:
+; LINUX:    calll .L6$pb
+; LINUX: .L6$pb:
+; LINUX:    addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L6$pb), 
+; LINUX:    fldl	.LCPI6_0@GOTOFF(
+}
+
+
+;; Test jump table references.
+define void @test7(i32 %n.u) nounwind {
+entry:
+    switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
+bb:
+    tail call void(...)* @foo1()
+    ret void
+bb1:
+    tail call void(...)* @foo2()
+    ret void
+bb2:
+    tail call void(...)* @foo6()
+    ret void
+bb3:
+    tail call void(...)* @foo3()
+    ret void
+bb4:
+    tail call void(...)* @foo4()
+    ret void
+bb5:
+    tail call void(...)* @foo5()
+    ret void
+bb6:
+    tail call void(...)* @foo1()
+    ret void
+bb7:
+    tail call void(...)* @foo2()
+    ret void
+bb8:
+    tail call void(...)* @foo6()
+    ret void
+bb9:
+    tail call void(...)* @foo3()
+    ret void
+bb10:
+    tail call void(...)* @foo4()
+    ret void
+bb11:
+    tail call void(...)* @foo5()
+    ret void
+bb12:
+    tail call void(...)* @foo6()
+    ret void
+    
+; LINUX: test7:
+; LINUX:   calll	.L7$pb
+; LINUX: .L7$pb:
+; LINUX:   addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L7$pb),
+; LINUX:   .LJTI7_0@GOTOFF(
+; LINUX:   jmpl	*
+
+; LINUX: .LJTI7_0:
+; LINUX:   .long	 .LBB7_2@GOTOFF
+; LINUX:   .long	 .LBB7_8@GOTOFF
+; LINUX:   .long	 .LBB7_14@GOTOFF
+; LINUX:   .long	 .LBB7_9@GOTOFF
+; LINUX:   .long	 .LBB7_10@GOTOFF
+}
+
+declare void @foo1(...)
+declare void @foo2(...)
+declare void @foo6(...)
+declare void @foo3(...)
+declare void @foo4(...)
+declare void @foo5(...)
diff --git a/final/test/CodeGen/X86/pic_jumptable.ll b/final/test/CodeGen/X86/pic_jumptable.ll
new file mode 100644
index 00000000000..b6761e338aa
--- /dev/null
+++ b/final/test/CodeGen/X86/pic_jumptable.ll
@@ -0,0 +1,83 @@
+; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | grep -F .text._Z3fooILi1EEvi,"axG",@progbits,_Z3fooILi1EEvi,comdat
+; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | FileCheck %s
+; RUN: llc < %s                       -mtriple=x86_64-apple-darwin | not grep 'lJTI'
+; rdar://6971437
+; rdar://7738756
+
+declare void @_Z3bari(i32)
+
+define linkonce void @_Z3fooILi1EEvi(i32 %Y) nounwind {
+entry:
+; CHECK:       L0$pb
+; CHECK-NOT:   leal
+; CHECK:       Ltmp0 = LJTI0_0-L0$pb
+; CHECK-NEXT:  addl Ltmp0(%eax,%ecx,4)
+; CHECK-NEXT:  jmpl *%eax
+	%Y_addr = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %Y, i32* %Y_addr
+	%tmp = load i32* %Y_addr		; <i32> [#uses=1]
+	switch i32 %tmp, label %bb10 [
+		 i32 0, label %bb3
+		 i32 1, label %bb
+		 i32 2, label %bb
+		 i32 3, label %bb
+		 i32 4, label %bb
+		 i32 5, label %bb
+		 i32 6, label %bb
+		 i32 7, label %bb
+		 i32 8, label %bb
+		 i32 9, label %bb
+		 i32 10, label %bb
+		 i32 12, label %bb1
+		 i32 13, label %bb5
+		 i32 14, label %bb6
+		 i32 16, label %bb2
+		 i32 17, label %bb4
+		 i32 23, label %bb8
+		 i32 27, label %bb7
+		 i32 34, label %bb9
+	]
+
+bb:		; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	br label %bb2
+
+bb2:		; preds = %bb1, %entry
+	call void @_Z3bari( i32 1 )
+	br label %bb11
+
+bb3:		; preds = %entry
+	br label %bb4
+
+bb4:		; preds = %bb3, %entry
+	br label %bb5
+
+bb5:		; preds = %bb4, %entry
+	br label %bb6
+
+bb6:		; preds = %bb5, %entry
+	call void @_Z3bari( i32 2 )
+	br label %bb11
+
+bb7:		; preds = %entry
+	br label %bb8
+
+bb8:		; preds = %bb7, %entry
+	br label %bb9
+
+bb9:		; preds = %bb8, %entry
+	call void @_Z3bari( i32 3 )
+	br label %bb11
+
+bb10:		; preds = %entry
+	br label %bb11
+
+bb11:		; preds = %bb10, %bb9, %bb6, %bb2
+	br label %return
+
+return:		; preds = %bb11
+	ret void
+}
diff --git a/final/test/CodeGen/X86/pmul.ll b/final/test/CodeGen/X86/pmul.ll
new file mode 100644
index 00000000000..bf5229aa1ee
--- /dev/null
+++ b/final/test/CodeGen/X86/pmul.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -stack-alignment=16 > %t
+; RUN: grep pmul %t | count 12
+; RUN: grep mov %t | count 11
+
+define <4 x i32> @a(<4 x i32> %i) nounwind  {
+        %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >
+        ret <4 x i32> %A
+}
+define <2 x i64> @b(<2 x i64> %i) nounwind  {
+        %A = mul <2 x i64> %i, < i64 117, i64 117 >
+        ret <2 x i64> %A
+}
+define <4 x i32> @c(<4 x i32> %i, <4 x i32> %j) nounwind  {
+        %A = mul <4 x i32> %i, %j
+        ret <4 x i32> %A
+}
+define <2 x i64> @d(<2 x i64> %i, <2 x i64> %j) nounwind  {
+        %A = mul <2 x i64> %i, %j
+        ret <2 x i64> %A
+}
+; Use a call to force spills.
+declare void @foo()
+define <4 x i32> @e(<4 x i32> %i, <4 x i32> %j) nounwind  {
+        call void @foo()
+        %A = mul <4 x i32> %i, %j
+        ret <4 x i32> %A
+}
+define <2 x i64> @f(<2 x i64> %i, <2 x i64> %j) nounwind  {
+        call void @foo()
+        %A = mul <2 x i64> %i, %j
+        ret <2 x i64> %A
+}
diff --git a/final/test/CodeGen/X86/pmulld.ll b/final/test/CodeGen/X86/pmulld.ll
new file mode 100644
index 00000000000..be527aed9a9
--- /dev/null
+++ b/final/test/CodeGen/X86/pmulld.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse41 -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse41 -asm-verbose=0 | FileCheck %s -check-prefix=WIN64
+
+define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test1:
+; CHECK-NEXT: pmulld
+
+; WIN64: test1:
+; WIN64-NEXT: movdqa  (%rcx), %xmm0
+; WIN64-NEXT: pmulld  (%rdx), %xmm0
+  %C = mul <4 x i32> %A, %B
+  ret <4 x i32> %C
+}
+
+define <4 x i32> @test1a(<4 x i32> %A, <4 x i32> *%Bp) nounwind {
+; CHECK: test1a:
+; CHECK-NEXT: pmulld
+
+; WIN64: test1a:
+; WIN64-NEXT: movdqa  (%rcx), %xmm0
+; WIN64-NEXT: pmulld  (%rdx), %xmm0
+
+  %B = load <4 x i32>* %Bp
+  %C = mul <4 x i32> %A, %B
+  ret <4 x i32> %C
+}
diff --git a/final/test/CodeGen/X86/popcnt.ll b/final/test/CodeGen/X86/popcnt.ll
new file mode 100644
index 00000000000..430214c73b1
--- /dev/null
+++ b/final/test/CodeGen/X86/popcnt.ll
@@ -0,0 +1,38 @@
+; RUN: llc -march=x86-64 -mattr=+popcnt < %s | FileCheck %s
+
+define i8 @cnt8(i8 %x) nounwind readnone {
+  %cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
+  ret i8 %cnt
+; CHECK: cnt8:
+; CHECK: popcntw
+; CHECK: ret
+}
+
+define i16 @cnt16(i16 %x) nounwind readnone {
+  %cnt = tail call i16 @llvm.ctpop.i16(i16 %x)
+  ret i16 %cnt
+; CHECK: cnt16:
+; CHECK: popcntw
+; CHECK: ret
+}
+
+define i32 @cnt32(i32 %x) nounwind readnone {
+  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %cnt
+; CHECK: cnt32:
+; CHECK: popcntl
+; CHECK: ret
+}
+
+define i64 @cnt64(i64 %x) nounwind readnone {
+  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %cnt
+; CHECK: cnt64:
+; CHECK: popcntq
+; CHECK: ret
+}
+
+declare i8 @llvm.ctpop.i8(i8) nounwind readnone
+declare i16 @llvm.ctpop.i16(i16) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i64 @llvm.ctpop.i64(i64) nounwind readnone
diff --git a/final/test/CodeGen/X86/postalloc-coalescing.ll b/final/test/CodeGen/X86/postalloc-coalescing.ll
new file mode 100644
index 00000000000..fe6f521f4d3
--- /dev/null
+++ b/final/test/CodeGen/X86/postalloc-coalescing.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 | grep mov | count 3
+
+define fastcc i32 @_Z18yy_get_next_bufferv() nounwind {
+entry:
+	br label %bb131
+
+bb116:		; preds = %bb131
+	%tmp125126 = trunc i32 %c.1 to i8		; <i8> [#uses=1]
+	store i8 %tmp125126, i8* null, align 1
+	br label %bb131
+
+bb131:		; preds = %bb116, %entry
+	%c.2 = phi i32 [ %c.1, %bb116 ], [ 42, %entry ]		; <i32> [#uses=1]
+	%c.1 = select i1 false, i32 0, i32 %c.2		; <i32> [#uses=4]
+	%tmp181 = icmp eq i32 %c.1, -1		; <i1> [#uses=1]
+	br i1 %tmp181, label %bb158, label %bb116
+
+bb158:		; preds = %bb131
+	br i1 true, label %cond_true163, label %cond_next178
+
+cond_true163:		; preds = %bb158
+	%tmp172173 = trunc i32 %c.1 to i8		; <i8> [#uses=1]
+	store i8 %tmp172173, i8* null, align 1
+	br label %cond_next178
+
+cond_next178:		; preds = %cond_true163, %bb158
+	%tmp180 = icmp eq i32 %c.1, -1		; <i1> [#uses=1]
+	br i1 %tmp180, label %cond_next184, label %cond_next199
+
+cond_next184:		; preds = %cond_next178
+	ret i32 0
+
+cond_next199:		; preds = %cond_next178
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/postra-licm.ll b/final/test/CodeGen/X86/postra-licm.ll
new file mode 100644
index 00000000000..902c69b471d
--- /dev/null
+++ b/final/test/CodeGen/X86/postra-licm.ll
@@ -0,0 +1,185 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=X86-32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=X86-64
+
+; MachineLICM should be able to hoist loop invariant reload out of the loop.
+; rdar://7233099
+
+%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+%struct.__sFILEX = type opaque
+%struct.__sbuf = type { i8*, i32 }
+%struct.epoch_t = type { %struct.trans_t*, %struct.trans_t*, i32, i32, i32, i32, i32 }
+%struct.trans_t = type { i32, i32, i32, i8* }
+
+@.str12 = external constant [2 x i8], align 1     ; <[2 x i8]*> [#uses=1]
+@.str19 = external constant [7 x i8], align 1     ; <[7 x i8]*> [#uses=1]
+@.str24 = external constant [4 x i8], align 1     ; <[4 x i8]*> [#uses=1]
+
+define i32 @t1(i32 %c, i8** nocapture %v) nounwind ssp {
+; X86-32: t1:
+entry:
+  br i1 undef, label %bb, label %bb3
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb3:                                              ; preds = %entry
+  br i1 undef, label %bb.i, label %bb.nph41
+
+bb.i:                                             ; preds = %bb3
+  unreachable
+
+bb.nph41:                                         ; preds = %bb3
+  %0 = call %struct.FILE* @"\01_fopen$UNIX2003"(i8* undef, i8* getelementptr inbounds ([2 x i8]* @.str12, i32 0, i32 0)) nounwind ; <%struct.FILE*> [#uses=3]
+  br i1 undef, label %bb4, label %bb5.preheader
+
+bb5.preheader:                                    ; preds = %bb.nph41
+  br label %bb5
+
+bb4:                                              ; preds = %bb.nph41
+  unreachable
+
+bb5:                                              ; preds = %bb5, %bb5.preheader
+  br i1 undef, label %bb7, label %bb5
+
+bb7:                                              ; preds = %bb5
+  br i1 undef, label %bb9, label %bb12
+
+bb9:                                              ; preds = %bb7
+  unreachable
+
+bb12:                                             ; preds = %bb7
+  br i1 undef, label %bb16, label %bb22
+
+bb16:                                             ; preds = %bb12
+  unreachable
+
+bb22:                                             ; preds = %bb12
+  br label %bb.i1
+
+bb.i1:                                            ; preds = %bb.i1, %bb22
+  %1 = icmp eq i8 undef, 69                       ; <i1> [#uses=1]
+  br i1 %1, label %imix_test.exit, label %bb.i1
+
+imix_test.exit:                                   ; preds = %bb.i1
+  br i1 undef, label %bb23, label %bb26.preheader
+
+bb26.preheader:                                   ; preds = %imix_test.exit
+  br i1 undef, label %bb28, label %bb30
+
+bb23:                                             ; preds = %imix_test.exit
+  unreachable
+; X86-32: %bb26.preheader
+; X86-32: movl -16(%ebp),
+; X86-32-NEXT: .align 4
+; X86-32-NEXT: %bb28
+
+bb28:                                             ; preds = %bb28, %bb26.preheader
+  %counter.035 = phi i32 [ %3, %bb28 ], [ 0, %bb26.preheader ] ; <i32> [#uses=2]
+  %tmp56 = shl i32 %counter.035, 2                ; <i32> [#uses=0]
+  %2 = call i8* @fgets(i8* undef, i32 50, %struct.FILE* %0) nounwind ; <i8*> [#uses=0]
+  %3 = add nsw i32 %counter.035, 1                ; <i32> [#uses=1]
+  %4 = call i32 @feof(%struct.FILE* %0) nounwind  ; <i32> [#uses=0]
+  br label %bb28
+
+bb30:                                             ; preds = %bb26.preheader
+  %5 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([7 x i8]* @.str19, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0]
+  br i1 undef, label %bb34, label %bb70
+
+bb32.loopexit:                                    ; preds = %bb45
+  %6 = icmp eq i32 undef, 0                       ; <i1> [#uses=1]
+  %indvar.next55 = add i32 %indvar54, 1           ; <i32> [#uses=1]
+  br i1 %6, label %bb34, label %bb70
+
+bb34:                                             ; preds = %bb32.loopexit, %bb30
+  %indvar54 = phi i32 [ %indvar.next55, %bb32.loopexit ], [ 0, %bb30 ] ; <i32> [#uses=3]
+  br i1 false, label %bb35, label %bb39.preheader
+
+bb35:                                             ; preds = %bb34
+  unreachable
+
+bb39.preheader:                                   ; preds = %bb34
+  %7 = getelementptr inbounds %struct.epoch_t* undef, i32 %indvar54, i32 3 ; <i32*> [#uses=1]
+  %8 = getelementptr inbounds %struct.epoch_t* undef, i32 %indvar54, i32 2 ; <i32*> [#uses=0]
+  br i1 false, label %bb42, label %bb45
+
+bb42:                                             ; preds = %bb39.preheader
+  unreachable
+
+bb45:                                             ; preds = %bb39.preheader
+  %9 = call i32 @strcmp(i8* undef, i8* getelementptr inbounds ([4 x i8]* @.str24, i32 0, i32 0)) nounwind readonly ; <i32> [#uses=0]
+  br i1 false, label %bb47, label %bb32.loopexit
+
+bb47:                                             ; preds = %bb45
+  %10 = load i32* %7, align 4                     ; <i32> [#uses=0]
+  unreachable
+
+bb70:                                             ; preds = %bb32.loopexit, %bb30
+  br i1 undef, label %bb78, label %bb76
+
+bb76:                                             ; preds = %bb70
+  unreachable
+
+bb78:                                             ; preds = %bb70
+  br i1 undef, label %bb83, label %bb79
+
+bb79:                                             ; preds = %bb78
+  unreachable
+
+bb83:                                             ; preds = %bb78
+  call void @rewind(%struct.FILE* %0) nounwind
+  unreachable
+}
+
+declare %struct.FILE* @"\01_fopen$UNIX2003"(i8*, i8*)
+
+declare i8* @fgets(i8*, i32, %struct.FILE* nocapture) nounwind
+
+declare void @rewind(%struct.FILE* nocapture) nounwind
+
+declare i32 @feof(%struct.FILE* nocapture) nounwind
+
+declare i32 @strcmp(i8* nocapture, i8* nocapture) nounwind readonly
+
+@map_4_to_16 = external constant [16 x i16], align 32 ; <[16 x i16]*> [#uses=2]
+
+define void @t2(i8* nocapture %bufp, i8* nocapture %data, i32 %dsize) nounwind ssp {
+; X86-64: t2:
+entry:
+  br i1 undef, label %return, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+; X86-64: movq _map_4_to_16@GOTPCREL(%rip)
+; X86-64: .align 4
+  %tmp5 = zext i32 undef to i64                   ; <i64> [#uses=1]
+  %tmp6 = add i64 %tmp5, 1                        ; <i64> [#uses=1]
+  %tmp11 = shl i64 undef, 1                       ; <i64> [#uses=1]
+  %tmp14 = mul i64 undef, 3                       ; <i64> [#uses=1]
+  br label %bb
+
+bb:                                               ; preds = %bb, %bb.nph
+  %tmp9 = mul i64 undef, undef                    ; <i64> [#uses=2]
+  %tmp12 = add i64 %tmp11, %tmp9                  ; <i64> [#uses=1]
+  %scevgep13 = getelementptr i8* %bufp, i64 %tmp12 ; <i8*> [#uses=1]
+  %tmp15 = add i64 %tmp14, %tmp9                  ; <i64> [#uses=1]
+  %scevgep16 = getelementptr i8* %bufp, i64 %tmp15 ; <i8*> [#uses=1]
+  %0 = load i8* undef, align 1                    ; <i8> [#uses=1]
+  %1 = zext i8 %0 to i32                          ; <i32> [#uses=1]
+  %2 = getelementptr inbounds [16 x i16]* @map_4_to_16, i64 0, i64 0 ; <i16*> [#uses=1]
+  %3 = load i16* %2, align 2                      ; <i16> [#uses=1]
+  %4 = trunc i16 %3 to i8                         ; <i8> [#uses=1]
+  store i8 %4, i8* undef, align 1
+  %5 = and i32 %1, 15                             ; <i32> [#uses=1]
+  %6 = zext i32 %5 to i64                         ; <i64> [#uses=1]
+  %7 = getelementptr inbounds [16 x i16]* @map_4_to_16, i64 0, i64 %6 ; <i16*> [#uses=1]
+  %8 = load i16* %7, align 2                      ; <i16> [#uses=2]
+  %9 = lshr i16 %8, 8                             ; <i16> [#uses=1]
+  %10 = trunc i16 %9 to i8                        ; <i8> [#uses=1]
+  store i8 %10, i8* %scevgep13, align 1
+  %11 = trunc i16 %8 to i8                        ; <i8> [#uses=1]
+  store i8 %11, i8* %scevgep16, align 1
+  %exitcond = icmp eq i64 undef, %tmp6            ; <i1> [#uses=1]
+  br i1 %exitcond, label %return, label %bb
+
+return:                                           ; preds = %bb, %entry
+  ret void
+}
diff --git a/final/test/CodeGen/X86/powi.ll b/final/test/CodeGen/X86/powi.ll
new file mode 100644
index 00000000000..c3d68312ce1
--- /dev/null
+++ b/final/test/CodeGen/X86/powi.ll
@@ -0,0 +1,11 @@
+; RUN: llc %s -march=x86 -mcpu=yonah -o - | grep mulsd | count 6
+; Ideally this would compile to 5 multiplies.
+
+define double @_Z3f10d(double %a) nounwind readonly ssp noredzone {
+entry:
+  %0 = tail call double @llvm.powi.f64(double %a, i32 15) nounwind ; <double> [#uses=1]
+  ret double %0
+}
+
+declare double @llvm.powi.f64(double, i32) nounwind readonly
+
diff --git a/final/test/CodeGen/X86/pr1462.ll b/final/test/CodeGen/X86/pr1462.ll
new file mode 100644
index 00000000000..62549a50356
--- /dev/null
+++ b/final/test/CodeGen/X86/pr1462.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s
+; PR1462
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-
+v64:64:64-v128:128:128-a0:0:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define hidden i128 @__addvti3(i128 %a1, i128 %b2) {
+entry:
+        %tmp8 = add i128 %b2, %a1               ; <i128> [#uses=3]
+        %tmp10 = icmp sgt i128 %b2, -1          ; <i1> [#uses=1]
+        %tmp18 = icmp sgt i128 %tmp8, %a1               ; <i1> [#uses=1]
+        %tmp14 = icmp slt i128 %tmp8, %a1               ; <i1> [#uses=1]
+        %iftmp.0.0.in = select i1 %tmp10, i1 %tmp14, i1 %tmp18          ; <i1> [#uses=1]
+        br i1 %iftmp.0.0.in, label %cond_true22, label %cond_next23
+
+cond_true22:            ; preds = %entry
+        tail call void @abort( )
+        unreachable
+
+cond_next23:            ; preds = %entry
+        ret i128 %tmp8
+}
+
+declare void @abort()
diff --git a/final/test/CodeGen/X86/pr1489.ll b/final/test/CodeGen/X86/pr1489.ll
new file mode 100644
index 00000000000..c9e24bfb13f
--- /dev/null
+++ b/final/test/CodeGen/X86/pr1489.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -disable-fp-elim -O0 -mcpu=i486 | grep 1082126238 | count 3
+; RUN: llc < %s -disable-fp-elim -O0 -mcpu=i486 | grep -- -1236950581 | count 1
+;; magic constants are 3.999f and half of 3.999
+; ModuleID = '1489.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+@.str = internal constant [13 x i8] c"%d %d %d %d\0A\00"		; <[13 x i8]*> [#uses=1]
+
+define i32 @quux() nounwind {
+entry:
+	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
+	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
+	%tmp23 = zext i1 %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp23
+}
+
+declare i32 @lrintf(float)
+
+define i32 @foo() nounwind {
+entry:
+	%tmp1 = tail call i32 @lrint( double 3.999000e+00 )		; <i32> [#uses=1]
+	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
+	%tmp23 = zext i1 %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp23
+}
+
+declare i32 @lrint(double)
+
+define i32 @bar() nounwind {
+entry:
+	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
+	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
+	%tmp23 = zext i1 %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp23
+}
+
+define i32 @baz() nounwind {
+entry:
+	%tmp1 = tail call i32 @lrintf( float 0x400FFDF3C0000000 )		; <i32> [#uses=1]
+	%tmp2 = icmp slt i32 %tmp1, 1		; <i1> [#uses=1]
+	%tmp23 = zext i1 %tmp2 to i32		; <i32> [#uses=1]
+	ret i32 %tmp23
+}
+
+define i32 @main() nounwind {
+entry:
+	%tmp = tail call i32 @baz( )		; <i32> [#uses=1]
+	%tmp1 = tail call i32 @bar( )		; <i32> [#uses=1]
+	%tmp2 = tail call i32 @foo( )		; <i32> [#uses=1]
+	%tmp3 = tail call i32 @quux( )		; <i32> [#uses=1]
+	%tmp5 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([13 x i8]* @.str, i32 0, i32 0), i32 %tmp3, i32 %tmp2, i32 %tmp1, i32 %tmp )		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/final/test/CodeGen/X86/pr1505.ll b/final/test/CodeGen/X86/pr1505.ll
new file mode 100644
index 00000000000..883a806f38d
--- /dev/null
+++ b/final/test/CodeGen/X86/pr1505.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mcpu=i486 | not grep fldl
+; PR1505
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+@G = weak global float 0.000000e+00		; <float*> [#uses=1]
+
+define void @t1(float %F) {
+entry:
+	store float %F, float* @G
+	ret void
+}
diff --git a/final/test/CodeGen/X86/pr1505b.ll b/final/test/CodeGen/X86/pr1505b.ll
new file mode 100644
index 00000000000..6a08dae51f8
--- /dev/null
+++ b/final/test/CodeGen/X86/pr1505b.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mcpu=i486 | grep fstpl | count 5
+; RUN: llc < %s -mcpu=i486 | grep fstps | count 2
+; PR1505
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+	%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"* }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }
+	%"struct.std::ctype_base" = type <{ i8 }>
+	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
+	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+	%"struct.std::ios_base::_Words" = type { i8*, i32 }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+	%"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >" = type { %"struct.std::locale::facet" }
+@a = global float 0x3FD3333340000000		; <float*> [#uses=1]
+@b = global double 6.000000e-01, align 8		; <double*> [#uses=1]
+@_ZSt8__ioinit = internal global %"struct.std::ctype_base" zeroinitializer		; <%"struct.std::ctype_base"*> [#uses=2]
+@__dso_handle = external global i8*		; <i8**> [#uses=1]
+@_ZSt4cout = external global %"struct.std::basic_ostream<char,std::char_traits<char> >"		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=2]
+@.str = internal constant [12 x i8] c"tan float: \00"		; <[12 x i8]*> [#uses=1]
+@.str1 = internal constant [13 x i8] c"tan double: \00"		; <[13 x i8]*> [#uses=1]
+
+declare void @_ZNSt8ios_base4InitD1Ev(%"struct.std::ctype_base"*)
+
+declare void @_ZNSt8ios_base4InitC1Ev(%"struct.std::ctype_base"*)
+
+declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
+
+define i32 @main() {
+entry:
+	%tmp6 = volatile load float* @a		; <float> [#uses=1]
+	%tmp9 = tail call float @tanf( float %tmp6 )		; <float> [#uses=1]
+	%tmp12 = volatile load double* @b		; <double> [#uses=1]
+	%tmp13 = tail call double @tan( double %tmp12 )		; <double> [#uses=1]
+	%tmp1314 = fptrunc double %tmp13 to float		; <float> [#uses=1]
+	%tmp16 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0) )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+	%tmp1920 = fpext float %tmp9 to double		; <double> [#uses=1]
+	%tmp22 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp16, double %tmp1920 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+	%tmp30 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp22 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+	%tmp34 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0) )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+	%tmp3940 = fpext float %tmp1314 to double		; <double> [#uses=1]
+	%tmp42 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp34, double %tmp3940 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+	%tmp51 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp42 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+	ret i32 0
+}
+
+declare float @tanf(float)
+
+declare double @tan(double)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, double)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_(%"struct.std::basic_ostream<char,std::char_traits<char> >"*)
diff --git a/final/test/CodeGen/X86/pr2177.ll b/final/test/CodeGen/X86/pr2177.ll
new file mode 100644
index 00000000000..e941bf7fdab
--- /dev/null
+++ b/final/test/CodeGen/X86/pr2177.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s
+; PR2177
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.1.0"
+	%struct.S2259 = type { <4 x i16>, i8, i64 }
+
+define void @check2259va(i32 %z, ...) {
+entry:
+	br i1 false, label %bb5, label %return
+bb5:		; preds = %entry
+	switch i32 0, label %bb155 [
+		 i32 16, label %bb10
+		 i32 17, label %bb118
+		 i32 18, label %bb54
+		 i32 32, label %bb118
+		 i32 33, label %bb118
+		 i32 36, label %bb118
+	]
+bb10:		; preds = %bb5
+	ret void
+bb54:		; preds = %bb5
+	ret void
+bb118:		; preds = %bb5, %bb5, %bb5, %bb5
+	%tmp125 = load i8** null, align 8		; <i8*> [#uses=1]
+	%tmp125126 = bitcast i8* %tmp125 to %struct.S2259*		; <%struct.S2259*> [#uses=1]
+	%tmp128 = getelementptr %struct.S2259* %tmp125126, i32 0, i32 0		; <<4 x i16>*> [#uses=1]
+	%tmp129 = load <4 x i16>* %tmp128, align 8		; <<4 x i16>> [#uses=1]
+	store <4 x i16> %tmp129, <4 x i16>* null, align 8
+	ret void
+bb155:		; preds = %bb5
+	ret void
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/pr2182.ll b/final/test/CodeGen/X86/pr2182.ll
new file mode 100644
index 00000000000..f97663c6c1f
--- /dev/null
+++ b/final/test/CodeGen/X86/pr2182.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s | grep {addl	\$3, (%eax)} | count 4
+; PR2182
+
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@x = weak global i32 0          ; <i32*> [#uses=8]
+
+define void @loop_2() nounwind  {
+entry:
+        %tmp = volatile load i32* @x, align 4           ; <i32> [#uses=1]
+        %tmp1 = add i32 %tmp, 3         ; <i32> [#uses=1]
+        volatile store i32 %tmp1, i32* @x, align 4
+        %tmp.1 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+        %tmp1.1 = add i32 %tmp.1, 3             ; <i32> [#uses=1]
+        volatile store i32 %tmp1.1, i32* @x, align 4
+        %tmp.2 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+        %tmp1.2 = add i32 %tmp.2, 3             ; <i32> [#uses=1]
+        volatile store i32 %tmp1.2, i32* @x, align 4
+        %tmp.3 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+        %tmp1.3 = add i32 %tmp.3, 3             ; <i32> [#uses=1]
+        volatile store i32 %tmp1.3, i32* @x, align 4
+        ret void
+}
diff --git a/final/test/CodeGen/X86/pr2326.ll b/final/test/CodeGen/X86/pr2326.ll
new file mode 100644
index 00000000000..f82dcb5d678
--- /dev/null
+++ b/final/test/CodeGen/X86/pr2326.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 | grep sete
+; PR2326
+
+define i32 @func_59(i32 %p_60) nounwind  {
+entry:
+	%l_108 = alloca i32		; <i32*> [#uses=2]
+	%tmp15 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp16 = load i32* %l_108, align 4		; <i32> [#uses=1]
+	%tmp17 = icmp eq i32 %tmp15, %tmp16		; <i1> [#uses=1]
+	%tmp1718 = zext i1 %tmp17 to i8		; <i8> [#uses=1]
+	%tmp19 = load i32* null, align 4		; <i32> [#uses=1]
+	%tmp20 = load i32* %l_108, align 4		; <i32> [#uses=1]
+	%tmp21 = icmp ule i32 %tmp19, %tmp20		; <i1> [#uses=1]
+	%tmp2122 = zext i1 %tmp21 to i8		; <i8> [#uses=1]
+	%toBool23 = icmp ne i8 %tmp1718, 0		; <i1> [#uses=1]
+	%toBool24 = icmp ne i8 %tmp2122, 0		; <i1> [#uses=1]
+	%tmp25 = and i1 %toBool23, %toBool24		; <i1> [#uses=1]
+	%tmp2526 = zext i1 %tmp25 to i8		; <i8> [#uses=1]
+	%tmp252627 = zext i8 %tmp2526 to i32		; <i32> [#uses=1]
+	%tmp29 = call i32 (...)* @func_15( i32 %tmp252627, i32 0 ) nounwind 		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i32 @func_15(...)
diff --git a/final/test/CodeGen/X86/pr2623.ll b/final/test/CodeGen/X86/pr2623.ll
new file mode 100644
index 00000000000..5d0eb5da215
--- /dev/null
+++ b/final/test/CodeGen/X86/pr2623.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s
+; PR2623
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-unknown-freebsd7.0"
+	%.objc_id = type { %.objc_id }*
+	%.objc_selector = type { i8*, i8* }*
+@.objc_sel_ptr = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr13 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr14 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr15 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr16 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr17 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr18 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr19 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr20 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+@.objc_sel_ptr21 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
+
+@.objc_untyped_selector_alias = alias internal %.objc_selector* @.objc_sel_ptr15		; <%.objc_selector*> [#uses=0]
+@.objc_untyped_selector_alias1 = alias internal %.objc_selector* @.objc_sel_ptr		; <%.objc_selector*> [#uses=0]
+@.objc_untyped_selector_alias2 = alias internal %.objc_selector* @.objc_sel_ptr17		; <%.objc_selector*> [#uses=0]
+@.objc_untyped_selector_alias3 = alias internal %.objc_selector* @.objc_sel_ptr16		; <%.objc_selector*> [#uses=0]
+@.objc_untyped_selector_alias4 = alias internal %.objc_selector* @.objc_sel_ptr13		; <%.objc_selector*> [#uses=0]
+@.objc_untyped_selector_alias7 = alias internal %.objc_selector* @.objc_sel_ptr14		; <%.objc_selector*> [#uses=0]
+@getRange = alias internal %.objc_selector* @.objc_sel_ptr18		; <%.objc_selector*> [#uses=0]
+@"valueWithRange:" = alias internal %.objc_selector* @.objc_sel_ptr21		; <%.objc_selector*> [#uses=0]
+@rangeValue = alias internal %.objc_selector* @.objc_sel_ptr20		; <%.objc_selector*> [#uses=0]
+@"printRange:" = alias internal %.objc_selector* @.objc_sel_ptr19		; <%.objc_selector*> [#uses=0]
+
+define void @"._objc_method_SmalltalkTool()-run"(i8* %self, %.objc_selector %_cmd) {
+entry:
+	br i1 false, label %small_int_messagerangeValue, label %real_object_messagerangeValue
+
+small_int_messagerangeValue:		; preds = %entry
+	br label %Continue
+
+real_object_messagerangeValue:		; preds = %entry
+	br label %Continue
+
+Continue:		; preds = %real_object_messagerangeValue, %small_int_messagerangeValue
+	%rangeValue = phi { i32, i32 } [ undef, %small_int_messagerangeValue ], [ undef, %real_object_messagerangeValue ]		; <{ i32, i32 }> [#uses=1]
+	call void (%.objc_id, %.objc_selector, ...)* null( %.objc_id null, %.objc_selector null, { i32, i32 } %rangeValue )
+	ret void
+}
diff --git a/final/test/CodeGen/X86/pr2656.ll b/final/test/CodeGen/X86/pr2656.ll
new file mode 100644
index 00000000000..afd71143c45
--- /dev/null
+++ b/final/test/CodeGen/X86/pr2656.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {xorps.\*sp} | count 1
+; PR2656
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9.4.0"
+	%struct.anon = type <{ float, float }>
+@.str = internal constant [17 x i8] c"pt: %.0f, %.0f\0A\00\00"		; <[17 x i8]*> [#uses=1]
+
+define void @foo(%struct.anon* byval %p) nounwind {
+entry:
+	%tmp = getelementptr %struct.anon* %p, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp1 = load float* %tmp		; <float> [#uses=1]
+	%tmp2 = getelementptr %struct.anon* %p, i32 0, i32 1		; <float*> [#uses=1]
+	%tmp3 = load float* %tmp2		; <float> [#uses=1]
+	%neg = fsub float -0.000000e+00, %tmp1		; <float> [#uses=1]
+	%conv = fpext float %neg to double		; <double> [#uses=1]
+	%neg4 = fsub float -0.000000e+00, %tmp3		; <float> [#uses=1]
+	%conv5 = fpext float %neg4 to double		; <double> [#uses=1]
+	%call = call i32 (...)* @printf( i8* getelementptr ([17 x i8]* @.str, i32 0, i32 0), double %conv, double %conv5 )		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @printf(...)
diff --git a/final/test/CodeGen/X86/pr2659.ll b/final/test/CodeGen/X86/pr2659.ll
new file mode 100644
index 00000000000..54d043d54f8
--- /dev/null
+++ b/final/test/CodeGen/X86/pr2659.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | grep movl | count 4
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin9.4.0 | FileCheck %s
+; PR2659
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9.4.0"
+
+define i32 @binomial(i32 %n, i32 %k) nounwind  {
+entry:
+  %cmp = icmp ugt i32 %k, %n            ; <i1> [#uses=1]
+  br i1 %cmp, label %ifthen, label %forcond.preheader
+
+forcond.preheader:              ; preds = %entry
+  %cmp44 = icmp eq i32 %k, 0            ; <i1> [#uses=1]
+  br i1 %cmp44, label %afterfor, label %forbody
+
+; CHECK: %forcond.preheader
+; CHECK: movl $1
+; CHECK-NOT: xorl
+; CHECK-NOT: movl
+; CHECK-NEXT: je
+
+ifthen:         ; preds = %entry
+  ret i32 0
+
+forbody:                ; preds = %forbody, %forcond.preheader
+  %indvar = phi i32 [ 0, %forcond.preheader ], [ %divisor.02, %forbody ]                ; <i32> [#uses=3]
+  %accumulator.01 = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ]               ; <i32> [#uses=1]
+  %divisor.02 = add i32 %indvar, 1              ; <i32> [#uses=2]
+  %n.addr.03 = sub i32 %n, %indvar              ; <i32> [#uses=1]
+  %mul = mul i32 %n.addr.03, %accumulator.01            ; <i32> [#uses=1]
+  %div = udiv i32 %mul, %divisor.02             ; <i32> [#uses=2]
+  %inc = add i32 %indvar, 2             ; <i32> [#uses=1]
+  %cmp4 = icmp ugt i32 %inc, %k         ; <i1> [#uses=1]
+  br i1 %cmp4, label %afterfor, label %forbody
+
+afterfor:               ; preds = %forbody, %forcond.preheader
+  %accumulator.0.lcssa = phi i32 [ 1, %forcond.preheader ], [ %div, %forbody ]          ; <i32> [#uses=1]
+  ret i32 %accumulator.0.lcssa
+}
diff --git a/final/test/CodeGen/X86/pr2849.ll b/final/test/CodeGen/X86/pr2849.ll
new file mode 100644
index 00000000000..0fec4813e10
--- /dev/null
+++ b/final/test/CodeGen/X86/pr2849.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s
+; PR2849
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.BaseBoundPtrs = type { i8*, i8* }
+	%struct.HashEntry = type { %struct.BaseBoundPtrs }
+	%struct.NODE = type { i8, i8, %struct.anon }
+	%struct.anon = type { %struct.xlist }
+	%struct.xlist = type { %struct.NODE*, %struct.NODE* }
+	%struct.xvect = type { %struct.NODE** }
+@hash_table_begin = external global %struct.HashEntry*
+
+define void @obshow() {
+entry:
+	%tmp = load %struct.HashEntry** @hash_table_begin, align 8
+	br i1 false, label %xlygetvalue.exit, label %xlygetvalue.exit
+
+xlygetvalue.exit:
+	%storemerge.in.i = phi %struct.NODE** [ null, %entry ], [ null, %entry ]
+	%storemerge.i = load %struct.NODE** %storemerge.in.i
+	%tmp1 = ptrtoint %struct.NODE** %storemerge.in.i to i64
+	%tmp2 = lshr i64 %tmp1, 3
+	%tmp3 = and i64 %tmp2, 2147483647
+	%tmp4 = getelementptr %struct.HashEntry* %tmp, i64 %tmp3, i32 0, i32 1
+	%tmp7 = load i8** %tmp4, align 8
+	%tmp8 = getelementptr %struct.NODE* %storemerge.i, i64 0, i32 2
+	%tmp9 = bitcast %struct.anon* %tmp8 to %struct.NODE***
+	%tmp11 = load %struct.NODE*** %tmp9, align 8
+	%tmp12 = ptrtoint %struct.NODE** %tmp11 to i64
+	%tmp13 = lshr i64 %tmp12, 3
+	%tmp14 = and i64 %tmp13, 2147483647
+	%tmp15 = getelementptr %struct.HashEntry* %tmp, i64 %tmp14, i32 0, i32 1
+	call fastcc void @xlprint(i8** %tmp4, i8* %tmp7, i8** %tmp15)
+	ret void
+}
+
+declare fastcc void @xlprint(i8**, i8*, i8**)
diff --git a/final/test/CodeGen/X86/pr2924.ll b/final/test/CodeGen/X86/pr2924.ll
new file mode 100644
index 00000000000..b9e8dc1740d
--- /dev/null
+++ b/final/test/CodeGen/X86/pr2924.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s
+; PR2924
+
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define x86_stdcallcc { i32, i8* } @_D3std6string7toupperFAaZAa({ i32, i8* } %s) {
+entry_std.string.toupper:
+        %tmp58 = load i32* null
+        %tmp59 = icmp eq i32 %tmp58, 0
+        %r.val = load { i32, i8* }* null, align 8
+        %condtmp.0 = select i1 %tmp59, { i32, i8* } undef, { i32, i8* } %r.val 
+
+        ret { i32, i8* } %condtmp.0
+}
+define { } @empty({ } %s) {
+entry_std.string.toupper:
+        %tmp58 = load i32* null
+        %tmp59 = icmp eq i32 %tmp58, 0
+        %r.val = load { }* null, align 8
+        %condtmp.0 = select i1 %tmp59, { } undef, { } %r.val
+        ret { } %condtmp.0
+}
diff --git a/final/test/CodeGen/X86/pr2982.ll b/final/test/CodeGen/X86/pr2982.ll
new file mode 100644
index 00000000000..3f9a5953153
--- /dev/null
+++ b/final/test/CodeGen/X86/pr2982.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86
+; PR2982
+
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+@g_279 = external global i32            ; <i32*> [#uses=1]
+@g_265 = external global i32            ; <i32*> [#uses=1]
+@g_3 = external global i8               ; <i8*> [#uses=1]
+
+declare i32 @rshift_u_u(...)
+
+define void @bar() nounwind {
+entry:
+        %0 = load i32* @g_279, align 4          ; <i32> [#uses=1]
+        %1 = shl i32 %0, 1              ; <i32> [#uses=1]
+        %2 = and i32 %1, 2              ; <i32> [#uses=1]
+        %3 = load i32* @g_265, align 4          ; <i32> [#uses=1]
+        %4 = load i8* @g_3, align 1             ; <i8> [#uses=1]
+        %5 = sext i8 %4 to i32          ; <i32> [#uses=1]
+        %6 = add i32 %2, %3             ; <i32> [#uses=1]
+        %7 = add i32 %6, %5             ; <i32> [#uses=1]
+        %8 = tail call i32 (...)* @rshift_u_u(i32 %7, i32 0) nounwind          
+; <i32> [#uses=0]
+        ret void
+}
diff --git a/final/test/CodeGen/X86/pr3154.ll b/final/test/CodeGen/X86/pr3154.ll
new file mode 100644
index 00000000000..18df97c7230
--- /dev/null
+++ b/final/test/CodeGen/X86/pr3154.ll
@@ -0,0 +1,104 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mattr=+sse2
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu -mattr=+sse2 -relocation-model=pic -disable-fp-elim
+; PR3154
+
+define void @ff_flac_compute_autocorr_sse2(i32* %data, i32 %len, i32 %lag, double* %autoc) nounwind {
+entry:
+	%c = alloca double, align 8		; <double*> [#uses=2]
+	%0 = add i32 %len, 2		; <i32> [#uses=1]
+	%1 = add i32 %0, %lag		; <i32> [#uses=1]
+	%2 = alloca double, i32 %1		; <double*> [#uses=2]
+	%3 = getelementptr double* %2, i32 %lag		; <double*> [#uses=2]
+	%4 = ptrtoint double* %3 to i32		; <i32> [#uses=1]
+	%5 = and i32 %4, 8		; <i32> [#uses=1]
+	%6 = icmp eq i32 %5, 0		; <i1> [#uses=1]
+	br i1 %6, label %bb19, label %bb
+
+bb:		; preds = %entry
+	%.sum = add i32 %lag, 1		; <i32> [#uses=1]
+	%7 = getelementptr double* %2, i32 %.sum		; <double*> [#uses=1]
+	br label %bb19
+
+bb19:		; preds = %bb, %entry
+	%data15.0 = phi double* [ %7, %bb ], [ %3, %entry ]		; <double*> [#uses=5]
+	%8 = sitofp i32 %len to double		; <double> [#uses=1]
+	%9 = fsub double %8, 1.000000e+00		; <double> [#uses=1]
+	%10 = fdiv double 2.000000e+00, %9		; <double> [#uses=1]
+	store double %10, double* %c, align 8
+	%11 = ashr i32 %len, 1		; <i32> [#uses=3]
+	%12 = mul i32 %11, -4		; <i32> [#uses=2]
+	%13 = shl i32 %len, 1		; <i32> [#uses=1]
+	%14 = and i32 %13, -4		; <i32> [#uses=2]
+	call void asm sideeffect "movsd   $0,     %xmm7                \0A\09movapd  ff_pd_1, %xmm6     \0A\09movapd  ff_pd_2, %xmm5     \0A\09movlhps %xmm7, %xmm7                \0A\09subpd   %xmm5, %xmm7                \0A\09addsd   %xmm6, %xmm7                \0A\09", "*m,~{dirflag},~{fpsr},~{flags}"(double* %c) nounwind
+	%15 = and i32 %len, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %15, 0		; <i1> [#uses=1]
+	%16 = getelementptr double* %data15.0, i32 %11		; <double*> [#uses=2]
+	%17 = getelementptr i32* %data, i32 %11		; <i32*> [#uses=2]
+	br i1 %toBool, label %bb22, label %bb20
+
+bb20:		; preds = %bb19
+	%asmtmp = call { i32, i32 } asm sideeffect "1:                                    \0A\09movapd   %xmm7,  %xmm1              \0A\09mulpd    %xmm1,  %xmm1              \0A\09movapd   %xmm6,  %xmm0              \0A\09subpd    %xmm1,  %xmm0              \0A\09pshufd   $$0x4e,   %xmm0, %xmm1      \0A\09cvtpi2pd ($3,$0), %xmm2              \0A\09cvtpi2pd -1*4($3,$1), %xmm3   \0A\09mulpd    %xmm0,  %xmm2              \0A\09mulpd    %xmm1,  %xmm3              \0A\09movapd   %xmm2, ($2,$0,2)            \0A\09movupd    %xmm3, -1*8($2,$1,2) \0A\09subpd    %xmm5,  %xmm7              \0A\09sub      $$8,      $1                  \0A\09add      $$8,      $0                  \0A\09jl 1b                                 \0A\09", "=&r,=&r,r,r,0,1,~{dirflag},~{fpsr},~{flags}"(double* %16, i32* %17, i32 %12, i32 %14) nounwind		; <{ i32, i32 }> [#uses=0]
+	br label %bb28.preheader
+
+bb22:		; preds = %bb19
+	%asmtmp23 = call { i32, i32 } asm sideeffect "1:                                    \0A\09movapd   %xmm7,  %xmm1              \0A\09mulpd    %xmm1,  %xmm1              \0A\09movapd   %xmm6,  %xmm0              \0A\09subpd    %xmm1,  %xmm0              \0A\09pshufd   $$0x4e,   %xmm0, %xmm1      \0A\09cvtpi2pd ($3,$0), %xmm2              \0A\09cvtpi2pd -2*4($3,$1), %xmm3   \0A\09mulpd    %xmm0,  %xmm2              \0A\09mulpd    %xmm1,  %xmm3              \0A\09movapd   %xmm2, ($2,$0,2)            \0A\09movapd    %xmm3, -2*8($2,$1,2) \0A\09subpd    %xmm5,  %xmm7              \0A\09sub      $$8,      $1                  \0A\09add      $$8,      $0                  \0A\09jl 1b                                 \0A\09", "=&r,=&r,r,r,0,1,~{dirflag},~{fpsr},~{flags}"(double* %16, i32* %17, i32 %12, i32 %14) nounwind		; <{ i32, i32 }> [#uses=0]
+	br label %bb28.preheader
+
+bb28.preheader:		; preds = %bb22, %bb20
+	%18 = icmp sgt i32 %lag, 0		; <i1> [#uses=2]
+	br i1 %18, label %bb27, label %bb29
+
+bb27:		; preds = %bb27, %bb28.preheader
+	%j4.042 = phi i32 [ 0, %bb28.preheader ], [ %indvar.next45, %bb27 ]		; <i32> [#uses=2]
+	%19 = sub i32 %j4.042, %lag		; <i32> [#uses=1]
+	%20 = getelementptr double* %data15.0, i32 %19		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %20, align 8
+	%indvar.next45 = add i32 %j4.042, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next45, %lag		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb29, label %bb27
+
+bb29:		; preds = %bb27, %bb28.preheader
+	%21 = getelementptr double* %data15.0, i32 %len		; <double*> [#uses=3]
+	store double 0.000000e+00, double* %21, align 8
+	br i1 %18, label %bb.nph, label %bb37
+
+bb.nph:		; preds = %bb29
+	%22 = mul i32 %len, -8		; <i32> [#uses=2]
+	%23 = add i32 %lag, -2		; <i32> [#uses=1]
+	br label %bb30
+
+bb30:		; preds = %bb35, %bb.nph
+	%indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb35 ]		; <i32> [#uses=2]
+	%j4.141 = shl i32 %indvar, 1		; <i32> [#uses=8]
+	%24 = icmp eq i32 %23, %j4.141		; <i1> [#uses=1]
+	%25 = or i32 %j4.141, 1		; <i32> [#uses=2]
+	br i1 %24, label %bb31, label %bb33
+
+bb31:		; preds = %bb30
+	%26 = add i32 %j4.141, 2		; <i32> [#uses=2]
+	%.sum38 = sub i32 %len, %j4.141		; <i32> [#uses=1]
+	%27 = getelementptr double* %data15.0, i32 %.sum38		; <double*> [#uses=1]
+	%28 = getelementptr double* %autoc, i32 %j4.141		; <double*> [#uses=1]
+	%29 = getelementptr double* %autoc, i32 %25		; <double*> [#uses=1]
+	%30 = getelementptr double* %autoc, i32 %26		; <double*> [#uses=1]
+	%asmtmp32 = call i32 asm sideeffect "movsd    ff_pd_1, %xmm0 \0A\09movsd    ff_pd_1, %xmm1 \0A\09movsd    ff_pd_1, %xmm2 \0A\091:                                 \0A\09movapd   ($4,$0), %xmm3           \0A\09movupd -8($5,$0), %xmm4           \0A\09movapd   ($5,$0), %xmm5           \0A\09mulpd     %xmm3, %xmm4           \0A\09mulpd     %xmm3, %xmm5           \0A\09mulpd -16($5,$0), %xmm3           \0A\09addpd     %xmm4, %xmm1           \0A\09addpd     %xmm5, %xmm0           \0A\09addpd     %xmm3, %xmm2           \0A\09add       $$16,    $0               \0A\09jl 1b                              \0A\09movhlps   %xmm0, %xmm3           \0A\09movhlps   %xmm1, %xmm4           \0A\09movhlps   %xmm2, %xmm5           \0A\09addsd     %xmm3, %xmm0           \0A\09addsd     %xmm4, %xmm1           \0A\09addsd     %xmm5, %xmm2           \0A\09movsd     %xmm0, $1               \0A\09movsd     %xmm1, $2               \0A\09movsd     %xmm2, $3               \0A\09", "=&r,=*m,=*m,=*m,r,r,0,~{dirflag},~{fpsr},~{flags}"(double* %28, double* %29, double* %30, double* %21, double* %27, i32 %22) nounwind		; <i32> [#uses=0]
+	br label %bb35
+
+bb33:		; preds = %bb30
+	%.sum39 = sub i32 %len, %j4.141		; <i32> [#uses=1]
+	%31 = getelementptr double* %data15.0, i32 %.sum39		; <double*> [#uses=1]
+	%32 = getelementptr double* %autoc, i32 %j4.141		; <double*> [#uses=1]
+	%33 = getelementptr double* %autoc, i32 %25		; <double*> [#uses=1]
+	%asmtmp34 = call i32 asm sideeffect "movsd    ff_pd_1, %xmm0 \0A\09movsd    ff_pd_1, %xmm1 \0A\091:                                 \0A\09movapd   ($3,$0), %xmm3           \0A\09movupd -8($4,$0), %xmm4           \0A\09mulpd     %xmm3, %xmm4           \0A\09mulpd    ($4,$0), %xmm3           \0A\09addpd     %xmm4, %xmm1           \0A\09addpd     %xmm3, %xmm0           \0A\09add       $$16,    $0               \0A\09jl 1b                              \0A\09movhlps   %xmm0, %xmm3           \0A\09movhlps   %xmm1, %xmm4           \0A\09addsd     %xmm3, %xmm0           \0A\09addsd     %xmm4, %xmm1           \0A\09movsd     %xmm0, $1               \0A\09movsd     %xmm1, $2               \0A\09", "=&r,=*m,=*m,r,r,0,~{dirflag},~{fpsr},~{flags}"(double* %32, double* %33, double* %21, double* %31, i32 %22) nounwind		; <i32> [#uses=0]
+	%.pre = add i32 %j4.141, 2		; <i32> [#uses=1]
+	br label %bb35
+
+bb35:		; preds = %bb33, %bb31
+	%.pre-phi = phi i32 [ %.pre, %bb33 ], [ %26, %bb31 ]		; <i32> [#uses=1]
+	%34 = icmp slt i32 %.pre-phi, %lag		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %34, label %bb30, label %bb37
+
+bb37:		; preds = %bb35, %bb29
+	ret void
+}
diff --git a/final/test/CodeGen/X86/pr3216.ll b/final/test/CodeGen/X86/pr3216.ll
new file mode 100644
index 00000000000..38c9f324cca
--- /dev/null
+++ b/final/test/CodeGen/X86/pr3216.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | grep {sar.	\$5}
+
+@foo = global i8 127
+
+define i32 @main() nounwind {
+entry:
+        %tmp = load i8* @foo
+        %bf.lo = lshr i8 %tmp, 5
+        %bf.lo.cleared = and i8 %bf.lo, 7
+        %0 = shl i8 %bf.lo.cleared, 5
+        %bf.val.sext = ashr i8 %0, 5
+        %conv = sext i8 %bf.val.sext to i32
+        ret i32 %conv
+}
diff --git a/final/test/CodeGen/X86/pr3241.ll b/final/test/CodeGen/X86/pr3241.ll
new file mode 100644
index 00000000000..2f7917b77c3
--- /dev/null
+++ b/final/test/CodeGen/X86/pr3241.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86
+; PR3241
+
+@g_620 = external global i32
+
+define void @func_18(i32 %p_21) nounwind {
+entry:
+	%t0 = call i32 @func_31(i32 %p_21) nounwind
+	%t1 = call i32 @safe_add_macro_uint32_t_u_u() nounwind
+	%t2 = icmp sgt i32 %t1, 0
+	%t3 = zext i1 %t2 to i32
+	%t4 = load i32* @g_620, align 4
+	%t5 = icmp eq i32 %t3, %t4
+	%t6 = xor i32 %p_21, 1
+	%t7 = call i32 @func_55(i32 %t6) nounwind
+	br i1 %t5, label %return, label %bb
+
+bb:
+	unreachable
+
+return:
+	unreachable
+}
+
+declare i32 @func_31(i32)
+
+declare i32 @safe_add_macro_uint32_t_u_u()
+
+declare i32 @func_55(i32)
diff --git a/final/test/CodeGen/X86/pr3243.ll b/final/test/CodeGen/X86/pr3243.ll
new file mode 100644
index 00000000000..483b5bf3a2a
--- /dev/null
+++ b/final/test/CodeGen/X86/pr3243.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86
+; PR3243
+
+declare signext i16 @safe_mul_func_int16_t_s_s(i16 signext, i32) nounwind readnone optsize
+
+define i32 @func_120(i32 %p_121) nounwind optsize {
+entry:
+	%0 = trunc i32 %p_121 to i16		; <i16> [#uses=1]
+	%1 = urem i16 %0, -15461		; <i16> [#uses=1]
+	%phitmp1 = trunc i16 %1 to i8		; <i8> [#uses=1]
+	%phitmp2 = urem i8 %phitmp1, -1		; <i8> [#uses=1]
+	%phitmp3 = zext i8 %phitmp2 to i16		; <i16> [#uses=1]
+	%2 = tail call signext i16 @safe_mul_func_int16_t_s_s(i16 signext %phitmp3, i32 1) nounwind		; <i16> [#uses=0]
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/pr3244.ll b/final/test/CodeGen/X86/pr3244.ll
new file mode 100644
index 00000000000..2598c2f976b
--- /dev/null
+++ b/final/test/CodeGen/X86/pr3244.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86
+; PR3244
+
+@g_62 = external global i16             ; <i16*> [#uses=1]
+@g_487 = external global i32            ; <i32*> [#uses=1]
+
+define i32 @func_42(i32 %p_43, i32 %p_44, i32 %p_45, i32 %p_46) nounwind {
+entry:
+        %0 = load i16* @g_62, align 2           ; <i16> [#uses=1]
+        %1 = load i32* @g_487, align 4          ; <i32> [#uses=1]
+        %2 = trunc i16 %0 to i8         ; <i8> [#uses=1]
+        %3 = trunc i32 %1 to i8         ; <i8> [#uses=1]
+        %4 = tail call i32 (...)* @func_7(i64 -4455561449541442965, i32 1)
+nounwind             ; <i32> [#uses=1]
+        %5 = trunc i32 %4 to i8         ; <i8> [#uses=1]
+        %6 = mul i8 %3, %2              ; <i8> [#uses=1]
+        %7 = mul i8 %6, %5              ; <i8> [#uses=1]
+        %8 = sext i8 %7 to i16          ; <i16> [#uses=1]
+        %9 = tail call i32 @func_85(i16 signext %8, i32 1, i32 1) nounwind     
+        ; <i32> [#uses=0]
+        ret i32 undef
+}
+
+declare i32 @func_7(...)
+
+declare i32 @func_85(i16 signext, i32, i32)
diff --git a/final/test/CodeGen/X86/pr3250.ll b/final/test/CodeGen/X86/pr3250.ll
new file mode 100644
index 00000000000..cccbf54bcc6
--- /dev/null
+++ b/final/test/CodeGen/X86/pr3250.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86
+; PR3250
+
+declare i32 @safe_sub_func_short_u_u(i16 signext, i16 signext) nounwind
+
+define i32 @func_106(i32 %p_107) nounwind {
+entry:
+        %0 = tail call i32 (...)* @safe_div_(i32 %p_107, i32 1) nounwind       
+        ; <i32> [#uses=1]
+        %1 = lshr i32 %0, -9            ; <i32> [#uses=1]
+        %2 = trunc i32 %1 to i16                ; <i16> [#uses=1]
+        %3 = tail call i32 @safe_sub_func_short_u_u(i16 signext 1, i16 signext
+%2) nounwind             ; <i32> [#uses=0]
+        ret i32 undef
+}
+
+declare i32 @safe_div_(...)
diff --git a/final/test/CodeGen/X86/pr3317.ll b/final/test/CodeGen/X86/pr3317.ll
new file mode 100644
index 00000000000..9d6626b324d
--- /dev/null
+++ b/final/test/CodeGen/X86/pr3317.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=x86
+; PR3317
+
+        %ArraySInt16 = type { %JavaObject, i8*, [0 x i16] }
+        %ArraySInt8 = type { %JavaObject, i8*, [0 x i8] }
+        %Attribut = type { %ArraySInt16*, i32, i32 }
+        %CacheNode = type { i8*, %JavaCommonClass*, %CacheNode*, %Enveloppe* }
+        %Enveloppe = type { %CacheNode*, %ArraySInt16*, %ArraySInt16*, i8, %JavaClass*, %CacheNode }
+        %JavaArray = type { %JavaObject, i8* }
+        %JavaClass = type { %JavaCommonClass, i32, %VT*, [1 x %TaskClassMirror], i8*, %JavaField*, i16, %JavaField*, i16, %JavaMethod*, i16, %JavaMethod*, i16, i8*, %ArraySInt8*, i8*, %Attribut*, i16, %JavaClass**, i16, %JavaClass*, i16, i8, i32, i32, i8*, void (i8*)* }
+        %JavaCommonClass = type { %JavaCommonClass**, i32, [1 x %JavaObject*], i16, %JavaClass**, i16, %ArraySInt16*, %JavaClass*, i8* }
+        %JavaField = type { i8*, i16, %ArraySInt16*, %ArraySInt16*, %Attribut*, i16, %JavaClass*, i32, i16, i8* }
+        %JavaMethod = type { i8*, i16, %Attribut*, i16, %Enveloppe*, i16, %JavaClass*, %ArraySInt16*, %ArraySInt16*, i8, i8*, i32, i8* }
+        %JavaObject = type { %VT*, %JavaCommonClass*, i8* }
+        %TaskClassMirror = type { i32, i8* }
+        %UTF8 = type { %JavaObject, i8*, [0 x i16] }
+        %VT = type [0 x i32 (...)*]
+
+declare void @jnjvmNullPointerException()
+
+define i32 @JnJVM_java_rmi_activation_ActivationGroupID_hashCode__(%JavaObject* nocapture) nounwind {
+start:
+        %1 = getelementptr %JavaObject* %0, i64 1, i32 1                ; <%JavaCommonClass**> [#uses=1]
+        %2 = load %JavaCommonClass** %1         ; <%JavaCommonClass*> [#uses=4]
+        %3 = icmp eq %JavaCommonClass* %2, null         ; <i1> [#uses=1]
+        br i1 %3, label %verifyNullExit1, label %verifyNullCont2
+
+verifyNullExit1:                ; preds = %start
+        tail call void @jnjvmNullPointerException()
+        unreachable
+
+verifyNullCont2:                ; preds = %start
+        %4 = bitcast %JavaCommonClass* %2 to { %JavaObject, i16, i32, i64 }*            ; <{ %JavaObject, i16, i32, i64 }*> [#uses=1]
+        %5 = getelementptr { %JavaObject, i16, i32, i64 }* %4, i64 0, i32 2             ; <i32*> [#uses=1]
+        %6 = load i32* %5               ; <i32> [#uses=1]
+        %7 = getelementptr %JavaCommonClass* %2, i64 0, i32 4           ; <%JavaClass***> [#uses=1]
+        %8 = bitcast %JavaClass*** %7 to i64*           ; <i64*> [#uses=1]
+        %9 = load i64* %8               ; <i64> [#uses=1]
+        %10 = trunc i64 %9 to i32               ; <i32> [#uses=1]
+        %11 = getelementptr %JavaCommonClass* %2, i64 0, i32 3          ; <i16*> [#uses=1]
+        %12 = load i16* %11             ; <i16> [#uses=1]
+        %13 = sext i16 %12 to i32               ; <i32> [#uses=1]
+        %14 = xor i32 %10, %6           ; <i32> [#uses=1]
+        %15 = xor i32 %14, %13          ; <i32> [#uses=1]
+        ret i32 %15 
+}
diff --git a/final/test/CodeGen/X86/pr3366.ll b/final/test/CodeGen/X86/pr3366.ll
new file mode 100644
index 00000000000..f813e2e5880
--- /dev/null
+++ b/final/test/CodeGen/X86/pr3366.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86 | grep movzbl
+; PR3366
+
+define void @_ada_c34002a() nounwind {
+entry:
+  %0 = load i8* null, align 1
+  %1 = sdiv i8 90, %0
+  %2 = icmp ne i8 %1, 3
+  %3 = zext i1 %2 to i8
+  %toBool449 = icmp ne i8 %3, 0
+  %4 = or i1 false, %toBool449
+  %5 = zext i1 %4 to i8
+  %toBool450 = icmp ne i8 %5, 0
+  br i1 %toBool450, label %bb451, label %bb457
+
+bb451:
+  br label %bb457
+
+bb457:
+  unreachable
+}
diff --git a/final/test/CodeGen/X86/pr3457.ll b/final/test/CodeGen/X86/pr3457.ll
new file mode 100644
index 00000000000..f7af927d613
--- /dev/null
+++ b/final/test/CodeGen/X86/pr3457.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | not grep fstpt
+; PR3457
+; rdar://6548010
+
+define void @foo(double* nocapture %P) nounwind {
+entry:
+	%0 = tail call double (...)* @test() nounwind		; <double> [#uses=2]
+	%1 = tail call double (...)* @test() nounwind		; <double> [#uses=2]
+	%2 = fmul double %0, %0		; <double> [#uses=1]
+	%3 = fmul double %1, %1		; <double> [#uses=1]
+	%4 = fadd double %2, %3		; <double> [#uses=1]
+	store double %4, double* %P, align 8
+	ret void
+}
+
+declare double @test(...)
diff --git a/final/test/CodeGen/X86/pr3495-2.ll b/final/test/CodeGen/X86/pr3495-2.ll
new file mode 100644
index 00000000000..98c064a07db
--- /dev/null
+++ b/final/test/CodeGen/X86/pr3495-2.ll
@@ -0,0 +1,54 @@
+; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of loads added} | grep 1
+; PR3495
+;
+; This test may not be testing what it was supposed to test.
+; It used to have two spills and four reloads, but not it only has one spill and one reload.
+
+target datalayout = "e-p:32:32:32"
+target triple = "i386-apple-darwin9.6"
+	%struct.constraintVCGType = type { i32, i32, i32, i32 }
+	%struct.nodeVCGType = type { %struct.constraintVCGType*, i32, i32, i32, %struct.constraintVCGType*, i32, i32, i32 }
+
+define fastcc void @SCC_DFSBelowVCG(%struct.nodeVCGType* %VCG, i32 %net, i32 %label) nounwind {
+entry:
+	%0 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 5		; <i32*> [#uses=2]
+	%1 = load i32* %0, align 4		; <i32> [#uses=1]
+	%2 = icmp eq i32 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb5, label %bb.nph3
+
+bb.nph3:		; preds = %entry
+	%3 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 4		; <%struct.constraintVCGType**> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb3, %bb.nph3
+	%s.02 = phi i32 [ 0, %bb.nph3 ], [ %12, %bb3 ]		; <i32> [#uses=2]
+	%4 = load %struct.constraintVCGType** %3, align 4		; <%struct.constraintVCGType*> [#uses=1]
+	%5 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %5, label %bb1, label %bb3
+
+bb1:		; preds = %bb
+	%6 = getelementptr %struct.constraintVCGType* %4, i32 %s.02, i32 0		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=2]
+	%8 = getelementptr %struct.nodeVCGType* %VCG, i32 %7, i32 7		; <i32*> [#uses=1]
+	%9 = load i32* %8, align 4		; <i32> [#uses=1]
+	%10 = icmp eq i32 %9, 0		; <i1> [#uses=1]
+	br i1 %10, label %bb2, label %bb3
+
+bb2:		; preds = %bb1
+	%11 = getelementptr %struct.nodeVCGType* %VCG, i32 %7, i32 4		; <%struct.constraintVCGType**> [#uses=0]
+	br label %bb.i
+
+bb.i:		; preds = %bb.i, %bb2
+	br label %bb.i
+
+bb3:		; preds = %bb1, %bb
+	%12 = add i32 %s.02, 1		; <i32> [#uses=2]
+	%13 = load i32* %0, align 4		; <i32> [#uses=1]
+	%14 = icmp ugt i32 %13, %12		; <i1> [#uses=1]
+	br i1 %14, label %bb, label %bb5
+
+bb5:		; preds = %bb3, %entry
+	%15 = getelementptr %struct.nodeVCGType* %VCG, i32 %net, i32 6		; <i32*> [#uses=1]
+	store i32 %label, i32* %15, align 4
+	ret void
+}
diff --git a/final/test/CodeGen/X86/pr3495.ll b/final/test/CodeGen/X86/pr3495.ll
new file mode 100644
index 00000000000..e84a84f59bb
--- /dev/null
+++ b/final/test/CodeGen/X86/pr3495.ll
@@ -0,0 +1,79 @@
+; RUN: llc < %s -march=x86 -stats |& grep {Number of loads added} | grep 2
+; RUN: llc < %s -march=x86 -stats |& grep {Number of register spills} | grep 1
+; RUN: llc < %s -march=x86 -stats |& grep {Number of machine instrs printed} | grep 34
+; PR3495
+
+target triple = "i386-pc-linux-gnu"
+@x = external global [8 x i32], align 32		; <[8 x i32]*> [#uses=1]
+@rows = external global [8 x i32], align 32		; <[8 x i32]*> [#uses=2]
+@up = external global [15 x i32], align 32		; <[15 x i32]*> [#uses=2]
+@down = external global [15 x i32], align 32		; <[15 x i32]*> [#uses=1]
+
+define i32 @queens(i32 %c) nounwind {
+entry:
+	%tmp91 = add i32 %c, 1		; <i32> [#uses=3]
+	%tmp135 = getelementptr [8 x i32]* @x, i32 0, i32 %tmp91		; <i32*> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb569, %entry
+	%r25.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %indvar.next715, %bb569 ]		; <i32> [#uses=4]
+	%tmp27 = getelementptr [8 x i32]* @rows, i32 0, i32 %r25.0.reg2mem.0		; <i32*> [#uses=1]
+	%tmp28 = load i32* %tmp27, align 4		; <i32> [#uses=1]
+	%tmp29 = icmp eq i32 %tmp28, 0		; <i1> [#uses=1]
+	br i1 %tmp29, label %bb569, label %bb31
+
+bb31:		; preds = %bb
+	%tmp35 = sub i32 %r25.0.reg2mem.0, 0		; <i32> [#uses=1]
+	%tmp36 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp35		; <i32*> [#uses=1]
+	%tmp37 = load i32* %tmp36, align 4		; <i32> [#uses=1]
+	%tmp38 = icmp eq i32 %tmp37, 0		; <i1> [#uses=1]
+	br i1 %tmp38, label %bb569, label %bb41
+
+bb41:		; preds = %bb31
+	%tmp54 = sub i32 %r25.0.reg2mem.0, %c		; <i32> [#uses=1]
+	%tmp55 = add i32 %tmp54, 7		; <i32> [#uses=1]
+	%tmp62 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp55		; <i32*> [#uses=2]
+	store i32 0, i32* %tmp62, align 4
+	br label %bb92
+
+bb92:		; preds = %bb545, %bb41
+	%r20.0.reg2mem.0 = phi i32 [ 0, %bb41 ], [ %indvar.next711, %bb545 ]		; <i32> [#uses=5]
+	%tmp94 = getelementptr [8 x i32]* @rows, i32 0, i32 %r20.0.reg2mem.0		; <i32*> [#uses=1]
+	%tmp95 = load i32* %tmp94, align 4		; <i32> [#uses=0]
+	%tmp112 = add i32 %r20.0.reg2mem.0, %tmp91		; <i32> [#uses=1]
+	%tmp113 = getelementptr [15 x i32]* @down, i32 0, i32 %tmp112		; <i32*> [#uses=2]
+	%tmp114 = load i32* %tmp113, align 4		; <i32> [#uses=1]
+	%tmp115 = icmp eq i32 %tmp114, 0		; <i1> [#uses=1]
+	br i1 %tmp115, label %bb545, label %bb118
+
+bb118:		; preds = %bb92
+	%tmp122 = sub i32 %r20.0.reg2mem.0, %tmp91		; <i32> [#uses=0]
+	store i32 0, i32* %tmp113, align 4
+	store i32 %r20.0.reg2mem.0, i32* %tmp135, align 4
+	br label %bb142
+
+bb142:		; preds = %bb142, %bb118
+	%k18.0.reg2mem.0 = phi i32 [ 0, %bb118 ], [ %indvar.next709, %bb142 ]		; <i32> [#uses=1]
+	%indvar.next709 = add i32 %k18.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond710 = icmp eq i32 %indvar.next709, 8		; <i1> [#uses=1]
+	br i1 %exitcond710, label %bb155, label %bb142
+
+bb155:		; preds = %bb142
+	%tmp156 = tail call i32 @putchar(i32 10) nounwind		; <i32> [#uses=0]
+	br label %bb545
+
+bb545:		; preds = %bb155, %bb92
+	%indvar.next711 = add i32 %r20.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond712 = icmp eq i32 %indvar.next711, 8		; <i1> [#uses=1]
+	br i1 %exitcond712, label %bb553, label %bb92
+
+bb553:		; preds = %bb545
+	store i32 1, i32* %tmp62, align 4
+	br label %bb569
+
+bb569:		; preds = %bb553, %bb31, %bb
+	%indvar.next715 = add i32 %r25.0.reg2mem.0, 1		; <i32> [#uses=1]
+	br label %bb
+}
+
+declare i32 @putchar(i32)
diff --git a/final/test/CodeGen/X86/pr3522.ll b/final/test/CodeGen/X86/pr3522.ll
new file mode 100644
index 00000000000..da1623721d1
--- /dev/null
+++ b/final/test/CodeGen/X86/pr3522.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 -stats |& not grep {instructions sunk}
+; PR3522
+
+target triple = "i386-pc-linux-gnu"
+@.str = external constant [13 x i8]		; <[13 x i8]*> [#uses=1]
+
+define void @_ada_c34018a() {
+entry:
+	%0 = tail call i32 @report__ident_int(i32 90)		; <i32> [#uses=1]
+	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
+	invoke void @__gnat_rcheck_12(i8* getelementptr ([13 x i8]* @.str, i32 0, i32 0), i32 32) noreturn
+			to label %invcont unwind label %lpad
+
+invcont:		; preds = %entry
+	unreachable
+
+bb22:		; preds = %lpad
+	ret void
+
+return:		; preds = %lpad
+	ret void
+
+lpad:		; preds = %entry
+	%2 = icmp eq i8 %1, 90		; <i1> [#uses=1]
+	br i1 %2, label %return, label %bb22
+}
+
+declare void @__gnat_rcheck_12(i8*, i32) noreturn
+
+declare i32 @report__ident_int(i32)
diff --git a/final/test/CodeGen/X86/pr7882.ll b/final/test/CodeGen/X86/pr7882.ll
new file mode 100644
index 00000000000..88404dbe125
--- /dev/null
+++ b/final/test/CodeGen/X86/pr7882.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin -pre-RA-sched=fast \
+; RUN: | FileCheck %s
+; make sure scheduler honors the flags clobber.  PR 7882.
+
+define i32 @main(i32 %argc, i8** %argv) nounwind
+{
+entry:
+; CHECK: InlineAsm End
+; CHECK: cmpl
+    %res = icmp slt i32 1, %argc
+    %tmp = call i32 asm sideeffect alignstack
+        "push $$0
+         popf
+         mov $$13, $0", "=r,r,~{memory},~{flags}" (i1 %res)
+    %ret = select i1 %res, i32 %tmp, i32 42
+    ret i32 %ret
+}
diff --git a/final/test/CodeGen/X86/pr9127.ll b/final/test/CodeGen/X86/pr9127.ll
new file mode 100644
index 00000000000..9b251f57e0e
--- /dev/null
+++ b/final/test/CodeGen/X86/pr9127.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-win32 < %s | FileCheck %s
+
+define i8 @foobar(double %d, double* %x) {
+entry:
+  %tmp2 = load double* %x, align 8
+  %cmp = fcmp oeq double %tmp2, %d
+  %conv3 = zext i1 %cmp to i8
+  ret i8 %conv3
+}
+
+; test that the load is folded.
+; CHECK: ucomisd	(%{{rdi|rdx}}), %xmm0
diff --git a/final/test/CodeGen/X86/pre-split1.ll b/final/test/CodeGen/X86/pre-split1.ll
new file mode 100644
index 00000000000..e89b507414e
--- /dev/null
+++ b/final/test/CodeGen/X86/pre-split1.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
+; XFAIL: *
+
+define void @test(double* %P, i32 %cond) nounwind {
+entry:
+	%0 = load double* %P, align 8		; <double> [#uses=1]
+	%1 = fadd double %0, 4.000000e+00		; <double> [#uses=2]
+	%2 = icmp eq i32 %cond, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb1, label %bb
+
+bb:		; preds = %entry
+	%3 = fadd double %1, 4.000000e+00		; <double> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%A.0 = phi double [ %3, %bb ], [ %1, %entry ]		; <double> [#uses=1]
+	%4 = fmul double %A.0, 4.000000e+00		; <double> [#uses=1]
+	%5 = tail call i32 (...)* @bar() nounwind		; <i32> [#uses=0]
+	store double %4, double* %P, align 8
+	ret void
+}
+
+declare i32 @bar(...)
diff --git a/final/test/CodeGen/X86/pre-split10.ll b/final/test/CodeGen/X86/pre-split10.ll
new file mode 100644
index 00000000000..db039bd97ac
--- /dev/null
+++ b/final/test/CodeGen/X86/pre-split10.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	br label %bb14.i
+
+bb14.i:		; preds = %bb14.i, %entry
+	%i8.0.reg2mem.0.i = phi i32 [ 0, %entry ], [ %0, %bb14.i ]		; <i32> [#uses=1]
+	%0 = add i32 %i8.0.reg2mem.0.i, 1		; <i32> [#uses=2]
+	%1 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%2 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%3 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%exitcond75.i = icmp eq i32 %0, 32		; <i1> [#uses=1]
+	br i1 %exitcond75.i, label %bb24.i, label %bb14.i
+
+bb24.i:		; preds = %bb14.i
+	%4 = fdiv double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
+	%5 = fdiv double %1, 0.000000e+00		; <double> [#uses=1]
+	%6 = fdiv double %2, 0.000000e+00		; <double> [#uses=1]
+	%7 = fdiv double %3, 0.000000e+00		; <double> [#uses=1]
+	br label %bb31.i
+
+bb31.i:		; preds = %bb31.i, %bb24.i
+	%tmp.0.reg2mem.0.i = phi i32 [ 0, %bb24.i ], [ %indvar.next64.i, %bb31.i ]		; <i32> [#uses=1]
+	%indvar.next64.i = add i32 %tmp.0.reg2mem.0.i, 1		; <i32> [#uses=2]
+	%exitcond65.i = icmp eq i32 %indvar.next64.i, 64		; <i1> [#uses=1]
+	br i1 %exitcond65.i, label %bb33.i, label %bb31.i
+
+bb33.i:		; preds = %bb31.i
+	br label %bb35.preheader.i
+
+bb5.i.i:		; preds = %bb35.preheader.i
+	%8 = call double @floor(double 0.000000e+00) nounwind readnone		; <double> [#uses=0]
+	br label %bb7.i.i
+
+bb7.i.i:		; preds = %bb35.preheader.i, %bb5.i.i
+	br label %bb35.preheader.i
+
+bb35.preheader.i:		; preds = %bb7.i.i, %bb33.i
+	%9 = fsub double 0.000000e+00, %4		; <double> [#uses=1]
+	store double %9, double* null, align 8
+	%10 = fsub double 0.000000e+00, %5		; <double> [#uses=1]
+	store double %10, double* null, align 8
+	%11 = fsub double 0.000000e+00, %6		; <double> [#uses=1]
+	store double %11, double* null, align 8
+	%12 = fsub double 0.000000e+00, %7		; <double> [#uses=1]
+	store double %12, double* null, align 8
+	br i1 false, label %bb7.i.i, label %bb5.i.i
+}
+
+declare double @floor(double) nounwind readnone
diff --git a/final/test/CodeGen/X86/pre-split11.ll b/final/test/CodeGen/X86/pre-split11.ll
new file mode 100644
index 00000000000..0a9f4e33f34
--- /dev/null
+++ b/final/test/CodeGen/X86/pre-split11.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -pre-alloc-split | FileCheck %s
+
+@.str = private constant [28 x i8] c"\0A\0ADOUBLE            D = %f\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
+@.str1 = private constant [37 x i8] c"double to long    l1 = %ld\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1]
+@.str2 = private constant [35 x i8] c"double to uint   ui1 = %u\09\09(0x%x)\0A\00", align 8 ; <[35 x i8]*> [#uses=1]
+@.str3 = private constant [37 x i8] c"double to ulong  ul1 = %lu\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+; CHECK: movsd %xmm0, (%rsp)
+entry:
+  %0 = icmp sgt i32 %argc, 4                      ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  %1 = getelementptr inbounds i8** %argv, i64 4   ; <i8**> [#uses=1]
+  %2 = load i8** %1, align 8                      ; <i8*> [#uses=1]
+  %3 = tail call double @atof(i8* %2) nounwind    ; <double> [#uses=1]
+  br label %bb2
+
+bb2:                                              ; preds = %bb, %entry
+  %storemerge = phi double [ %3, %bb ], [ 2.000000e+00, %entry ] ; <double> [#uses=4]
+  %4 = fptoui double %storemerge to i32           ; <i32> [#uses=2]
+  %5 = fptoui double %storemerge to i64           ; <i64> [#uses=2]
+  %6 = fptosi double %storemerge to i64           ; <i64> [#uses=2]
+  %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i64 0, i64 0), double %storemerge) nounwind ; <i32> [#uses=0]
+  %8 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([37 x i8]* @.str1, i64 0, i64 0), i64 %6, i64 %6) nounwind ; <i32> [#uses=0]
+  %9 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8]* @.str2, i64 0, i64 0), i32 %4, i32 %4) nounwind ; <i32> [#uses=0]
+  %10 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([37 x i8]* @.str3, i64 0, i64 0), i64 %5, i64 %5) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare double @atof(i8* nocapture) nounwind readonly
+
+declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/final/test/CodeGen/X86/pre-split2.ll b/final/test/CodeGen/X86/pre-split2.ll
new file mode 100644
index 00000000000..ba902f95513
--- /dev/null
+++ b/final/test/CodeGen/X86/pre-split2.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN:   grep {pre-alloc-split} | count 2
+
+define i32 @t(i32 %arg) {
+entry:
+	br label %bb6
+
+.noexc6:		; preds = %bb6
+	%0 = and i32 %2, -8		; <i32> [#uses=1]
+	tail call void @llvm.memmove.i32(i8* %3, i8* null, i32 %0, i32 1) nounwind
+	store double %1, double* null, align 8
+	br label %bb6
+
+bb6:		; preds = %.noexc6, %entry
+	%1 = uitofp i32 %arg to double		; <double> [#uses=1]
+	%2 = sub i32 0, 0		; <i32> [#uses=1]
+	%3 = invoke i8* @_Znwm(i32 0)
+			to label %.noexc6 unwind label %lpad32		; <i8*> [#uses=1]
+
+lpad32:		; preds = %bb6
+	unreachable
+}
+
+declare void @llvm.memmove.i32(i8*, i8*, i32, i32) nounwind
+
+declare i8* @_Znwm(i32)
diff --git a/final/test/CodeGen/X86/pre-split3.ll b/final/test/CodeGen/X86/pre-split3.ll
new file mode 100644
index 00000000000..2e314207c3e
--- /dev/null
+++ b/final/test/CodeGen/X86/pre-split3.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
+
+define i32 @t(i32 %arg) {
+entry:
+	br label %bb6
+
+.noexc6:		; preds = %bb6
+	%0 = and i32 %2, -8		; <i32> [#uses=1]
+	tail call void @llvm.memmove.i32(i8* %3, i8* null, i32 %0, i32 1) nounwind
+	store double %1, double* null, align 8
+	br label %bb6
+
+bb6:		; preds = %.noexc6, %entry
+	%1 = uitofp i32 %arg to double		; <double> [#uses=1]
+	%2 = sub i32 0, 0		; <i32> [#uses=1]
+	%3 = invoke i8* @_Znwm(i32 0)
+			to label %.noexc6 unwind label %lpad32		; <i8*> [#uses=1]
+
+lpad32:		; preds = %bb6
+	unreachable
+}
+
+declare void @llvm.memmove.i32(i8*, i8*, i32, i32) nounwind
+
+declare i8* @_Znwm(i32)
diff --git a/final/test/CodeGen/X86/pre-split4.ll b/final/test/CodeGen/X86/pre-split4.ll
new file mode 100644
index 00000000000..10cef276c62
--- /dev/null
+++ b/final/test/CodeGen/X86/pre-split4.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 2
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%k.0.reg2mem.0 = phi double [ 1.000000e+00, %entry ], [ %6, %bb ]		; <double> [#uses=2]
+	%Flint.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %5, %bb ]		; <double> [#uses=1]
+	%twoThrd.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=1]
+	%0 = tail call double @llvm.pow.f64(double 0x3FE5555555555555, double 0.000000e+00)		; <double> [#uses=1]
+	%1 = fadd double %0, %twoThrd.0.reg2mem.0		; <double> [#uses=1]
+	%2 = tail call double @sin(double %k.0.reg2mem.0) nounwind readonly		; <double> [#uses=1]
+	%3 = fmul double 0.000000e+00, %2		; <double> [#uses=1]
+	%4 = fdiv double 1.000000e+00, %3		; <double> [#uses=1]
+        store double %Flint.0.reg2mem.0, double* null
+        store double %twoThrd.0.reg2mem.0, double* null
+	%5 = fadd double %4, %Flint.0.reg2mem.0		; <double> [#uses=1]
+	%6 = fadd double %k.0.reg2mem.0, 1.000000e+00		; <double> [#uses=1]
+	br label %bb
+}
+
+declare double @llvm.pow.f64(double, double) nounwind readonly
+
+declare double @sin(double) nounwind readonly
diff --git a/final/test/CodeGen/X86/pre-split5.ll b/final/test/CodeGen/X86/pre-split5.ll
new file mode 100644
index 00000000000..8def460809f
--- /dev/null
+++ b/final/test/CodeGen/X86/pre-split5.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
+
+target triple = "i386-apple-darwin9.5"
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+@"\01LC1" = external constant [48 x i8]		; <[48 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+	br label %bb5.us
+
+bb5.us:		; preds = %bb8.split, %bb5.us, %entry
+	%i.0.reg2mem.0.ph = phi i32 [ 0, %entry ], [ %indvar.next53, %bb8.split ], [ %i.0.reg2mem.0.ph, %bb5.us ]		; <i32> [#uses=2]
+	%j.0.reg2mem.0.us = phi i32 [ %indvar.next47, %bb5.us ], [ 0, %bb8.split ], [ 0, %entry ]		; <i32> [#uses=1]
+	%indvar.next47 = add i32 %j.0.reg2mem.0.us, 1		; <i32> [#uses=2]
+	%exitcond48 = icmp eq i32 %indvar.next47, 256		; <i1> [#uses=1]
+	br i1 %exitcond48, label %bb8.split, label %bb5.us
+
+bb8.split:		; preds = %bb5.us
+	%indvar.next53 = add i32 %i.0.reg2mem.0.ph, 1		; <i32> [#uses=2]
+	%exitcond54 = icmp eq i32 %indvar.next53, 256		; <i1> [#uses=1]
+	br i1 %exitcond54, label %bb11, label %bb5.us
+
+bb11:		; preds = %bb11, %bb8.split
+	%i.1.reg2mem.0 = phi i32 [ %indvar.next44, %bb11 ], [ 0, %bb8.split ]		; <i32> [#uses=1]
+	%indvar.next44 = add i32 %i.1.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond45 = icmp eq i32 %indvar.next44, 63		; <i1> [#uses=1]
+	br i1 %exitcond45, label %bb14, label %bb11
+
+bb14:		; preds = %bb14, %bb11
+	%indvar = phi i32 [ %indvar.next40, %bb14 ], [ 0, %bb11 ]		; <i32> [#uses=1]
+	%indvar.next40 = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond41 = icmp eq i32 %indvar.next40, 32768		; <i1> [#uses=1]
+	br i1 %exitcond41, label %bb28, label %bb14
+
+bb28:		; preds = %bb14
+	%0 = fdiv double 2.550000e+02, 0.000000e+00		; <double> [#uses=1]
+	br label %bb30
+
+bb30:		; preds = %bb36, %bb28
+	%m.1.reg2mem.0 = phi i32 [ %m.0, %bb36 ], [ 0, %bb28 ]		; <i32> [#uses=1]
+	%1 = fmul double 0.000000e+00, %0		; <double> [#uses=1]
+	%2 = fptosi double %1 to i32		; <i32> [#uses=1]
+	br i1 false, label %bb36, label %bb35
+
+bb35:		; preds = %bb30
+	%3 = tail call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* null, i8* getelementptr ([48 x i8]* @"\01LC1", i32 0, i32 0), i32 0, i32 0, i32 0, i32 %2) nounwind		; <i32> [#uses=0]
+	br label %bb36
+
+bb36:		; preds = %bb35, %bb30
+	%m.0 = phi i32 [ 0, %bb35 ], [ %m.1.reg2mem.0, %bb30 ]		; <i32> [#uses=1]
+	br label %bb30
+}
+
+declare i32 @fprintf(%struct.FILE*, i8*, ...) nounwind
diff --git a/final/test/CodeGen/X86/pre-split6.ll b/final/test/CodeGen/X86/pre-split6.ll
new file mode 100644
index 00000000000..837e238b620
--- /dev/null
+++ b/final/test/CodeGen/X86/pre-split6.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -pre-alloc-split | grep {divsd	24} | count 1
+
+@current_surfaces.b = external global i1		; <i1*> [#uses=1]
+
+declare double @sin(double) nounwind readonly
+
+declare double @asin(double) nounwind readonly
+
+define fastcc void @trace_line(i32 %line) nounwind {
+entry:
+	%.b3 = load i1* @current_surfaces.b		; <i1> [#uses=1]
+	br i1 %.b3, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	%0 = load double* null, align 8		; <double> [#uses=1]
+	%1 = load double* null, align 8		; <double> [#uses=2]
+	%2 = fcmp une double %0, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %2, label %bb9.i, label %bb13.i
+
+bb9.i:		; preds = %bb.nph
+	%3 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
+	%4 = fdiv double 1.000000e+00, %1		; <double> [#uses=1]
+	%5 = fmul double %4, 0.000000e+00		; <double> [#uses=1]
+	%6 = tail call double @asin(double %5) nounwind readonly		; <double> [#uses=0]
+	unreachable
+
+bb13.i:		; preds = %bb.nph
+	%7 = fdiv double 1.000000e+00, %1		; <double> [#uses=1]
+	%8 = tail call double @sin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
+	%9 = fmul double %7, %8		; <double> [#uses=1]
+	%10 = tail call double @asin(double %9) nounwind readonly		; <double> [#uses=0]
+	unreachable
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/pre-split7.ll b/final/test/CodeGen/X86/pre-split7.ll
new file mode 100644
index 00000000000..0b81c0bc09f
--- /dev/null
+++ b/final/test/CodeGen/X86/pre-split7.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
+
+@object_distance = external global double, align 8		; <double*> [#uses=1]
+@axis_slope_angle = external global double, align 8		; <double*> [#uses=1]
+@current_surfaces.b = external global i1		; <i1*> [#uses=1]
+
+declare double @sin(double) nounwind readonly
+
+declare double @asin(double) nounwind readonly
+
+declare double @tan(double) nounwind readonly
+
+define fastcc void @trace_line(i32 %line) nounwind {
+entry:
+	%.b3 = load i1* @current_surfaces.b		; <i1> [#uses=1]
+	br i1 %.b3, label %bb, label %return
+
+bb:		; preds = %bb, %entry
+	%0 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
+	%1 = fadd double 0.000000e+00, %0		; <double> [#uses=2]
+	%2 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
+	%3 = fsub double %1, %2		; <double> [#uses=2]
+	store double %3, double* @axis_slope_angle, align 8
+	%4 = fdiv double %1, 2.000000e+00		; <double> [#uses=1]
+	%5 = tail call double @sin(double %4) nounwind readonly		; <double> [#uses=1]
+	%6 = fmul double 0.000000e+00, %5		; <double> [#uses=1]
+	%7 = tail call double @tan(double %3) nounwind readonly		; <double> [#uses=0]
+	%8 = fadd double 0.000000e+00, %6		; <double> [#uses=1]
+	store double %8, double* @object_distance, align 8
+	br label %bb
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/pre-split8.ll b/final/test/CodeGen/X86/pre-split8.ll
new file mode 100644
index 00000000000..0684bd036ce
--- /dev/null
+++ b/final/test/CodeGen/X86/pre-split8.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
+
+@current_surfaces.b = external global i1		; <i1*> [#uses=1]
+
+declare double @asin(double) nounwind readonly
+
+declare double @tan(double) nounwind readonly
+
+define fastcc void @trace_line(i32 %line) nounwind {
+entry:
+	%.b3 = load i1* @current_surfaces.b		; <i1> [#uses=1]
+	br i1 %.b3, label %bb, label %return
+
+bb:		; preds = %bb9.i, %entry
+	%.rle4 = phi double [ %7, %bb9.i ], [ 0.000000e+00, %entry ]		; <double> [#uses=1]
+	%0 = load double* null, align 8		; <double> [#uses=3]
+	%1 = fcmp une double %0, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %1, label %bb9.i, label %bb13.i
+
+bb9.i:		; preds = %bb
+	%2 = fsub double %.rle4, %0		; <double> [#uses=0]
+	%3 = tail call double @asin(double %.rle4) nounwind readonly		; <double> [#uses=0]
+	%4 = fmul double 0.000000e+00, %0		; <double> [#uses=1]
+	%5 = tail call double @tan(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
+	%6 = fmul double %4, 0.000000e+00		; <double> [#uses=1]
+	%7 = fadd double %6, 0.000000e+00		; <double> [#uses=1]
+	br i1 false, label %return, label %bb
+
+bb13.i:		; preds = %bb
+	unreachable
+
+return:		; preds = %bb9.i, %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/pre-split9.ll b/final/test/CodeGen/X86/pre-split9.ll
new file mode 100644
index 00000000000..86dda33533f
--- /dev/null
+++ b/final/test/CodeGen/X86/pre-split9.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
+; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
+
+@current_surfaces.b = external global i1		; <i1*> [#uses=1]
+
+declare double @sin(double) nounwind readonly
+
+declare double @asin(double) nounwind readonly
+
+declare double @tan(double) nounwind readonly
+
+define fastcc void @trace_line(i32 %line) nounwind {
+entry:
+	%.b3 = load i1* @current_surfaces.b		; <i1> [#uses=1]
+	br i1 %.b3, label %bb, label %return
+
+bb:		; preds = %bb9.i, %entry
+	%.rle4 = phi double [ %8, %bb9.i ], [ 0.000000e+00, %entry ]		; <double> [#uses=1]
+	%0 = load double* null, align 8		; <double> [#uses=3]
+	%1 = fcmp une double %0, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %1, label %bb9.i, label %bb13.i
+
+bb9.i:		; preds = %bb
+	%2 = fsub double %.rle4, %0		; <double> [#uses=0]
+	%3 = tail call double @asin(double %.rle4) nounwind readonly		; <double> [#uses=0]
+	%4 = tail call double @sin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
+	%5 = fmul double %4, %0		; <double> [#uses=1]
+	%6 = tail call double @tan(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
+	%7 = fmul double %5, 0.000000e+00		; <double> [#uses=1]
+	%8 = fadd double %7, 0.000000e+00		; <double> [#uses=1]
+	br i1 false, label %return, label %bb
+
+bb13.i:		; preds = %bb
+	unreachable
+
+return:		; preds = %bb9.i, %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/prefetch.ll b/final/test/CodeGen/X86/prefetch.ll
new file mode 100644
index 00000000000..48d2673e488
--- /dev/null
+++ b/final/test/CodeGen/X86/prefetch.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 -mattr=+sse | FileCheck %s
+
+define void @t(i8* %ptr) nounwind  {
+entry:
+; CHECK: prefetcht2
+; CHECK: prefetcht1
+; CHECK: prefetcht0
+; CHECK: prefetchnta
+	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0 )
+	ret void
+}
+
+declare void @llvm.prefetch(i8*, i32, i32) nounwind 
diff --git a/final/test/CodeGen/X86/private-2.ll b/final/test/CodeGen/X86/private-2.ll
new file mode 100644
index 00000000000..8aa744ead8c
--- /dev/null
+++ b/final/test/CodeGen/X86/private-2.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | grep L__ZZ20
+; Quote should be outside of private prefix.
+; rdar://6855766x
+
+	%struct.A = type { i32*, i32 }
+@"_ZZ20-[Example1 whatever]E4C.91" = private constant %struct.A { i32* null, i32 1 }		; <%struct.A*> [#uses=1]
+
+define internal i32* @"\01-[Example1 whatever]"() nounwind optsize ssp {
+entry:
+	%0 = getelementptr %struct.A* @"_ZZ20-[Example1 whatever]E4C.91", i64 0, i32 0		; <i32**> [#uses=1]
+	%1 = load i32** %0, align 8		; <i32*> [#uses=1]
+	ret i32* %1
+}
diff --git a/final/test/CodeGen/X86/private.ll b/final/test/CodeGen/X86/private.ll
new file mode 100644
index 00000000000..f52f8c7af8c
--- /dev/null
+++ b/final/test/CodeGen/X86/private.ll
@@ -0,0 +1,20 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep .Lfoo:
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep call.*\.Lfoo
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep .Lbaz:
+; RUN: llc < %s -mtriple=x86_64-pc-linux | grep movl.*\.Lbaz
+
+declare void @foo()
+
+define private void @foo() {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}
diff --git a/final/test/CodeGen/X86/promote-assert-zext.ll b/final/test/CodeGen/X86/promote-assert-zext.ll
new file mode 100644
index 00000000000..b582806c96a
--- /dev/null
+++ b/final/test/CodeGen/X86/promote-assert-zext.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s | FileCheck %s
+; rdar://8051990
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11"
+
+; ISel doesn't yet know how to eliminate this extra zero-extend. But until
+; it knows how to do so safely, it shouldn;t eliminate it.
+; CHECK: movzbl  (%rdi), %eax
+; CHECK: movzwl  %ax, %eax
+
+define i64 @_ZL5matchPKtPKhiR9MatchData(i8* %tmp13) nounwind {
+entry:
+  %tmp14 = load i8* %tmp13, align 1
+  %tmp17 = zext i8 %tmp14 to i16
+  br label %bb341
+
+bb341:
+  %tmp18 = add i16 %tmp17, -1
+  %tmp23 = sext i16 %tmp18 to i64
+  ret i64 %tmp23
+}
diff --git a/final/test/CodeGen/X86/promote-i16.ll b/final/test/CodeGen/X86/promote-i16.ll
new file mode 100644
index 00000000000..101bb29593c
--- /dev/null
+++ b/final/test/CodeGen/X86/promote-i16.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define signext i16 @foo(i16 signext %x) nounwind {
+entry:
+; CHECK: foo:
+; CHECK: movzwl 4(%esp), %eax
+; CHECK: xorl $21998, %eax
+; CHECK: movswl %ax, %eax
+  %0 = xor i16 %x, 21998
+  ret i16 %0
+}
diff --git a/final/test/CodeGen/X86/ptrtoint-constexpr.ll b/final/test/CodeGen/X86/ptrtoint-constexpr.ll
new file mode 100644
index 00000000000..d1cb34bec8a
--- /dev/null
+++ b/final/test/CodeGen/X86/ptrtoint-constexpr.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=i386-linux | FileCheck %s
+	%union.x = type { i64 }
+
+; CHECK:	.globl r
+; CHECK: r:
+; CHECK: .quad	r&4294967295
+
+@r = global %union.x { i64 ptrtoint (%union.x* @r to i64) }, align 4
+
+; CHECK:	.globl x
+; CHECK: x:
+; CHECK: .quad	((0+1)&4294967295)*3
+
+@x = global i64 mul (i64 3, i64 ptrtoint (i2* getelementptr (i2* null, i64 1) to i64))
diff --git a/final/test/CodeGen/X86/rdtsc.ll b/final/test/CodeGen/X86/rdtsc.ll
new file mode 100644
index 00000000000..f21a44c3607
--- /dev/null
+++ b/final/test/CodeGen/X86/rdtsc.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | grep rdtsc
+; RUN: llc < %s -march=x86-64 | grep rdtsc
+declare i64 @llvm.readcyclecounter()
+
+define i64 @foo() {
+	%tmp.1 = call i64 @llvm.readcyclecounter( )		; <i64> [#uses=1]
+	ret i64 %tmp.1
+}
diff --git a/final/test/CodeGen/X86/red-zone.ll b/final/test/CodeGen/X86/red-zone.ll
new file mode 100644
index 00000000000..d9369712359
--- /dev/null
+++ b/final/test/CodeGen/X86/red-zone.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+
+; First without noredzone.
+; CHECK: f0:
+; CHECK: -4(%rsp)
+; CHECK: -4(%rsp)
+; CHECK: ret
+define x86_fp80 @f0(float %f) nounwind readnone {
+entry:
+	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %0
+}
+
+; Then with noredzone.
+; CHECK: f1:
+; CHECK: subq $4, %rsp
+; CHECK: (%rsp)
+; CHECK: (%rsp)
+; CHECK: addq $4, %rsp
+; CHECK: ret
+define x86_fp80 @f1(float %f) nounwind readnone noredzone {
+entry:
+	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %0
+}
diff --git a/final/test/CodeGen/X86/red-zone2.ll b/final/test/CodeGen/X86/red-zone2.ll
new file mode 100644
index 00000000000..9557d17150e
--- /dev/null
+++ b/final/test/CodeGen/X86/red-zone2.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep subq %t | count 1
+; RUN: grep addq %t | count 1
+
+define x86_fp80 @f0(float %f) nounwind readnone noredzone {
+entry:
+	%0 = fpext float %f to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %0
+}
diff --git a/final/test/CodeGen/X86/regpressure.ll b/final/test/CodeGen/X86/regpressure.ll
new file mode 100644
index 00000000000..e0b5f7a870b
--- /dev/null
+++ b/final/test/CodeGen/X86/regpressure.ll
@@ -0,0 +1,114 @@
+;; Both functions in this testcase should codegen to the same function, and
+;; neither of them should require spilling anything to the stack.
+
+; RUN: llc < %s -march=x86 -stats |& \
+; RUN:   not grep {Number of register spills}
+
+;; This can be compiled to use three registers if the loads are not
+;; folded into the multiplies, 2 registers otherwise.
+
+define i32 @regpressure1(i32* %P) {
+	%A = load i32* %P		; <i32> [#uses=1]
+	%Bp = getelementptr i32* %P, i32 1		; <i32*> [#uses=1]
+	%B = load i32* %Bp		; <i32> [#uses=1]
+	%s1 = mul i32 %A, %B		; <i32> [#uses=1]
+	%Cp = getelementptr i32* %P, i32 2		; <i32*> [#uses=1]
+	%C = load i32* %Cp		; <i32> [#uses=1]
+	%s2 = mul i32 %s1, %C		; <i32> [#uses=1]
+	%Dp = getelementptr i32* %P, i32 3		; <i32*> [#uses=1]
+	%D = load i32* %Dp		; <i32> [#uses=1]
+	%s3 = mul i32 %s2, %D		; <i32> [#uses=1]
+	%Ep = getelementptr i32* %P, i32 4		; <i32*> [#uses=1]
+	%E = load i32* %Ep		; <i32> [#uses=1]
+	%s4 = mul i32 %s3, %E		; <i32> [#uses=1]
+	%Fp = getelementptr i32* %P, i32 5		; <i32*> [#uses=1]
+	%F = load i32* %Fp		; <i32> [#uses=1]
+	%s5 = mul i32 %s4, %F		; <i32> [#uses=1]
+	%Gp = getelementptr i32* %P, i32 6		; <i32*> [#uses=1]
+	%G = load i32* %Gp		; <i32> [#uses=1]
+	%s6 = mul i32 %s5, %G		; <i32> [#uses=1]
+	%Hp = getelementptr i32* %P, i32 7		; <i32*> [#uses=1]
+	%H = load i32* %Hp		; <i32> [#uses=1]
+	%s7 = mul i32 %s6, %H		; <i32> [#uses=1]
+	%Ip = getelementptr i32* %P, i32 8		; <i32*> [#uses=1]
+	%I = load i32* %Ip		; <i32> [#uses=1]
+	%s8 = mul i32 %s7, %I		; <i32> [#uses=1]
+	%Jp = getelementptr i32* %P, i32 9		; <i32*> [#uses=1]
+	%J = load i32* %Jp		; <i32> [#uses=1]
+	%s9 = mul i32 %s8, %J		; <i32> [#uses=1]
+	ret i32 %s9
+}
+
+define i32 @regpressure2(i32* %P) {
+	%A = load i32* %P		; <i32> [#uses=1]
+	%Bp = getelementptr i32* %P, i32 1		; <i32*> [#uses=1]
+	%B = load i32* %Bp		; <i32> [#uses=1]
+	%Cp = getelementptr i32* %P, i32 2		; <i32*> [#uses=1]
+	%C = load i32* %Cp		; <i32> [#uses=1]
+	%Dp = getelementptr i32* %P, i32 3		; <i32*> [#uses=1]
+	%D = load i32* %Dp		; <i32> [#uses=1]
+	%Ep = getelementptr i32* %P, i32 4		; <i32*> [#uses=1]
+	%E = load i32* %Ep		; <i32> [#uses=1]
+	%Fp = getelementptr i32* %P, i32 5		; <i32*> [#uses=1]
+	%F = load i32* %Fp		; <i32> [#uses=1]
+	%Gp = getelementptr i32* %P, i32 6		; <i32*> [#uses=1]
+	%G = load i32* %Gp		; <i32> [#uses=1]
+	%Hp = getelementptr i32* %P, i32 7		; <i32*> [#uses=1]
+	%H = load i32* %Hp		; <i32> [#uses=1]
+	%Ip = getelementptr i32* %P, i32 8		; <i32*> [#uses=1]
+	%I = load i32* %Ip		; <i32> [#uses=1]
+	%Jp = getelementptr i32* %P, i32 9		; <i32*> [#uses=1]
+	%J = load i32* %Jp		; <i32> [#uses=1]
+	%s1 = mul i32 %A, %B		; <i32> [#uses=1]
+	%s2 = mul i32 %s1, %C		; <i32> [#uses=1]
+	%s3 = mul i32 %s2, %D		; <i32> [#uses=1]
+	%s4 = mul i32 %s3, %E		; <i32> [#uses=1]
+	%s5 = mul i32 %s4, %F		; <i32> [#uses=1]
+	%s6 = mul i32 %s5, %G		; <i32> [#uses=1]
+	%s7 = mul i32 %s6, %H		; <i32> [#uses=1]
+	%s8 = mul i32 %s7, %I		; <i32> [#uses=1]
+	%s9 = mul i32 %s8, %J		; <i32> [#uses=1]
+	ret i32 %s9
+}
+
+define i32 @regpressure3(i16* %P, i1 %Cond, i32* %Other) {
+	%A = load i16* %P		; <i16> [#uses=1]
+	%Bp = getelementptr i16* %P, i32 1		; <i16*> [#uses=1]
+	%B = load i16* %Bp		; <i16> [#uses=1]
+	%Cp = getelementptr i16* %P, i32 2		; <i16*> [#uses=1]
+	%C = load i16* %Cp		; <i16> [#uses=1]
+	%Dp = getelementptr i16* %P, i32 3		; <i16*> [#uses=1]
+	%D = load i16* %Dp		; <i16> [#uses=1]
+	%Ep = getelementptr i16* %P, i32 4		; <i16*> [#uses=1]
+	%E = load i16* %Ep		; <i16> [#uses=1]
+	%Fp = getelementptr i16* %P, i32 5		; <i16*> [#uses=1]
+	%F = load i16* %Fp		; <i16> [#uses=1]
+	%Gp = getelementptr i16* %P, i32 6		; <i16*> [#uses=1]
+	%G = load i16* %Gp		; <i16> [#uses=1]
+	%Hp = getelementptr i16* %P, i32 7		; <i16*> [#uses=1]
+	%H = load i16* %Hp		; <i16> [#uses=1]
+	%Ip = getelementptr i16* %P, i32 8		; <i16*> [#uses=1]
+	%I = load i16* %Ip		; <i16> [#uses=1]
+	%Jp = getelementptr i16* %P, i32 9		; <i16*> [#uses=1]
+	%J = load i16* %Jp		; <i16> [#uses=1]
+	%A.upgrd.1 = sext i16 %A to i32		; <i32> [#uses=1]
+	%B.upgrd.2 = sext i16 %B to i32		; <i32> [#uses=1]
+	%D.upgrd.3 = sext i16 %D to i32		; <i32> [#uses=1]
+	%C.upgrd.4 = sext i16 %C to i32		; <i32> [#uses=1]
+	%E.upgrd.5 = sext i16 %E to i32		; <i32> [#uses=1]
+	%F.upgrd.6 = sext i16 %F to i32		; <i32> [#uses=1]
+	%G.upgrd.7 = sext i16 %G to i32		; <i32> [#uses=1]
+	%H.upgrd.8 = sext i16 %H to i32		; <i32> [#uses=1]
+	%I.upgrd.9 = sext i16 %I to i32		; <i32> [#uses=1]
+	%J.upgrd.10 = sext i16 %J to i32		; <i32> [#uses=1]
+	%s1 = add i32 %A.upgrd.1, %B.upgrd.2		; <i32> [#uses=1]
+	%s2 = add i32 %C.upgrd.4, %s1		; <i32> [#uses=1]
+	%s3 = add i32 %D.upgrd.3, %s2		; <i32> [#uses=1]
+	%s4 = add i32 %E.upgrd.5, %s3		; <i32> [#uses=1]
+	%s5 = add i32 %F.upgrd.6, %s4		; <i32> [#uses=1]
+	%s6 = add i32 %G.upgrd.7, %s5		; <i32> [#uses=1]
+	%s7 = add i32 %H.upgrd.8, %s6		; <i32> [#uses=1]
+	%s8 = add i32 %I.upgrd.9, %s7		; <i32> [#uses=1]
+	%s9 = add i32 %J.upgrd.10, %s8		; <i32> [#uses=1]
+	ret i32 %s9
+}
diff --git a/final/test/CodeGen/X86/rem-2.ll b/final/test/CodeGen/X86/rem-2.ll
new file mode 100644
index 00000000000..1b2af4b87a3
--- /dev/null
+++ b/final/test/CodeGen/X86/rem-2.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 | not grep cltd
+
+define i32 @test(i32 %X) nounwind readnone {
+entry:
+	%0 = srem i32 41, %X
+	ret i32 %0
+}
diff --git a/final/test/CodeGen/X86/rem.ll b/final/test/CodeGen/X86/rem.ll
new file mode 100644
index 00000000000..394070ecdf2
--- /dev/null
+++ b/final/test/CodeGen/X86/rem.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 | not grep div
+
+define i32 @test1(i32 %X) {
+        %tmp1 = srem i32 %X, 255                ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @test2(i32 %X) {
+        %tmp1 = srem i32 %X, 256                ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @test3(i32 %X) {
+        %tmp1 = urem i32 %X, 255                ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
+define i32 @test4(i32 %X) {
+        %tmp1 = urem i32 %X, 256                ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
+
diff --git a/final/test/CodeGen/X86/remat-constant.ll b/final/test/CodeGen/X86/remat-constant.ll
new file mode 100644
index 00000000000..3e813209d41
--- /dev/null
+++ b/final/test/CodeGen/X86/remat-constant.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-linux -relocation-model=static | grep xmm | count 2
+
+declare void @bar() nounwind
+
+@a = external constant float
+
+declare void @qux(float %f) nounwind 
+
+define void @foo() nounwind  {
+  %f = load float* @a
+  call void @bar()
+  call void @qux(float %f)
+  call void @qux(float %f)
+  ret void
+}
diff --git a/final/test/CodeGen/X86/remat-mov-0.ll b/final/test/CodeGen/X86/remat-mov-0.ll
new file mode 100644
index 00000000000..f89cd330803
--- /dev/null
+++ b/final/test/CodeGen/X86/remat-mov-0.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+
+; CodeGen should remat the zero instead of spilling it.
+
+declare void @foo(i64 %p)
+
+; CHECK: bar:
+; CHECK: xorl %e[[A0:di|cx]], %e
+; CHECK: xorl %e[[A0]], %e[[A0]]
+define void @bar() nounwind {
+  call void @foo(i64 0)
+  call void @foo(i64 0)
+  ret void
+}
+
+; CHECK: bat:
+; CHECK: movq $-1, %r[[A0]]
+; CHECK: movq $-1, %r[[A0]]
+define void @bat() nounwind {
+  call void @foo(i64 -1)
+  call void @foo(i64 -1)
+  ret void
+}
+
+; CHECK: bau:
+; CHECK: movl $1, %e[[A0]]
+; CHECK: movl $1, %e[[A0]]
+define void @bau() nounwind {
+  call void @foo(i64 1)
+  call void @foo(i64 1)
+  ret void
+}
+
diff --git a/final/test/CodeGen/X86/remat-scalar-zero.ll b/final/test/CodeGen/X86/remat-scalar-zero.ll
new file mode 100644
index 00000000000..f6f0ed10b51
--- /dev/null
+++ b/final/test/CodeGen/X86/remat-scalar-zero.ll
@@ -0,0 +1,96 @@
+; XFAIL: *
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu > %t
+; RUN: not grep xor %t
+; RUN: not grep movap %t
+; RUN: grep {\\.quad.*0} %t
+
+; Remat should be able to fold the zero constant into the div instructions
+; as a constant-pool load.
+
+define void @foo(double* nocapture %x, double* nocapture %y) nounwind {
+entry:
+  %tmp1 = load double* %x                         ; <double> [#uses=1]
+  %arrayidx4 = getelementptr inbounds double* %x, i64 1 ; <double*> [#uses=1]
+  %tmp5 = load double* %arrayidx4                 ; <double> [#uses=1]
+  %arrayidx8 = getelementptr inbounds double* %x, i64 2 ; <double*> [#uses=1]
+  %tmp9 = load double* %arrayidx8                 ; <double> [#uses=1]
+  %arrayidx12 = getelementptr inbounds double* %x, i64 3 ; <double*> [#uses=1]
+  %tmp13 = load double* %arrayidx12               ; <double> [#uses=1]
+  %arrayidx16 = getelementptr inbounds double* %x, i64 4 ; <double*> [#uses=1]
+  %tmp17 = load double* %arrayidx16               ; <double> [#uses=1]
+  %arrayidx20 = getelementptr inbounds double* %x, i64 5 ; <double*> [#uses=1]
+  %tmp21 = load double* %arrayidx20               ; <double> [#uses=1]
+  %arrayidx24 = getelementptr inbounds double* %x, i64 6 ; <double*> [#uses=1]
+  %tmp25 = load double* %arrayidx24               ; <double> [#uses=1]
+  %arrayidx28 = getelementptr inbounds double* %x, i64 7 ; <double*> [#uses=1]
+  %tmp29 = load double* %arrayidx28               ; <double> [#uses=1]
+  %arrayidx32 = getelementptr inbounds double* %x, i64 8 ; <double*> [#uses=1]
+  %tmp33 = load double* %arrayidx32               ; <double> [#uses=1]
+  %arrayidx36 = getelementptr inbounds double* %x, i64 9 ; <double*> [#uses=1]
+  %tmp37 = load double* %arrayidx36               ; <double> [#uses=1]
+  %arrayidx40 = getelementptr inbounds double* %x, i64 10 ; <double*> [#uses=1]
+  %tmp41 = load double* %arrayidx40               ; <double> [#uses=1]
+  %arrayidx44 = getelementptr inbounds double* %x, i64 11 ; <double*> [#uses=1]
+  %tmp45 = load double* %arrayidx44               ; <double> [#uses=1]
+  %arrayidx48 = getelementptr inbounds double* %x, i64 12 ; <double*> [#uses=1]
+  %tmp49 = load double* %arrayidx48               ; <double> [#uses=1]
+  %arrayidx52 = getelementptr inbounds double* %x, i64 13 ; <double*> [#uses=1]
+  %tmp53 = load double* %arrayidx52               ; <double> [#uses=1]
+  %arrayidx56 = getelementptr inbounds double* %x, i64 14 ; <double*> [#uses=1]
+  %tmp57 = load double* %arrayidx56               ; <double> [#uses=1]
+  %arrayidx60 = getelementptr inbounds double* %x, i64 15 ; <double*> [#uses=1]
+  %tmp61 = load double* %arrayidx60               ; <double> [#uses=1]
+  %arrayidx64 = getelementptr inbounds double* %x, i64 16 ; <double*> [#uses=1]
+  %tmp65 = load double* %arrayidx64               ; <double> [#uses=1]
+  %div = fdiv double %tmp1, 0.000000e+00          ; <double> [#uses=1]
+  store double %div, double* %y
+  %div70 = fdiv double %tmp5, 2.000000e-01        ; <double> [#uses=1]
+  %arrayidx72 = getelementptr inbounds double* %y, i64 1 ; <double*> [#uses=1]
+  store double %div70, double* %arrayidx72
+  %div74 = fdiv double %tmp9, 2.000000e-01        ; <double> [#uses=1]
+  %arrayidx76 = getelementptr inbounds double* %y, i64 2 ; <double*> [#uses=1]
+  store double %div74, double* %arrayidx76
+  %div78 = fdiv double %tmp13, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx80 = getelementptr inbounds double* %y, i64 3 ; <double*> [#uses=1]
+  store double %div78, double* %arrayidx80
+  %div82 = fdiv double %tmp17, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx84 = getelementptr inbounds double* %y, i64 4 ; <double*> [#uses=1]
+  store double %div82, double* %arrayidx84
+  %div86 = fdiv double %tmp21, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx88 = getelementptr inbounds double* %y, i64 5 ; <double*> [#uses=1]
+  store double %div86, double* %arrayidx88
+  %div90 = fdiv double %tmp25, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx92 = getelementptr inbounds double* %y, i64 6 ; <double*> [#uses=1]
+  store double %div90, double* %arrayidx92
+  %div94 = fdiv double %tmp29, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx96 = getelementptr inbounds double* %y, i64 7 ; <double*> [#uses=1]
+  store double %div94, double* %arrayidx96
+  %div98 = fdiv double %tmp33, 2.000000e-01       ; <double> [#uses=1]
+  %arrayidx100 = getelementptr inbounds double* %y, i64 8 ; <double*> [#uses=1]
+  store double %div98, double* %arrayidx100
+  %div102 = fdiv double %tmp37, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx104 = getelementptr inbounds double* %y, i64 9 ; <double*> [#uses=1]
+  store double %div102, double* %arrayidx104
+  %div106 = fdiv double %tmp41, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx108 = getelementptr inbounds double* %y, i64 10 ; <double*> [#uses=1]
+  store double %div106, double* %arrayidx108
+  %div110 = fdiv double %tmp45, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx112 = getelementptr inbounds double* %y, i64 11 ; <double*> [#uses=1]
+  store double %div110, double* %arrayidx112
+  %div114 = fdiv double %tmp49, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx116 = getelementptr inbounds double* %y, i64 12 ; <double*> [#uses=1]
+  store double %div114, double* %arrayidx116
+  %div118 = fdiv double %tmp53, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx120 = getelementptr inbounds double* %y, i64 13 ; <double*> [#uses=1]
+  store double %div118, double* %arrayidx120
+  %div122 = fdiv double %tmp57, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx124 = getelementptr inbounds double* %y, i64 14 ; <double*> [#uses=1]
+  store double %div122, double* %arrayidx124
+  %div126 = fdiv double %tmp61, 2.000000e-01      ; <double> [#uses=1]
+  %arrayidx128 = getelementptr inbounds double* %y, i64 15 ; <double*> [#uses=1]
+  store double %div126, double* %arrayidx128
+  %div130 = fdiv double %tmp65, 0.000000e+00      ; <double> [#uses=1]
+  %arrayidx132 = getelementptr inbounds double* %y, i64 16 ; <double*> [#uses=1]
+  store double %div130, double* %arrayidx132
+  ret void
+}
diff --git a/final/test/CodeGen/X86/ret-addr.ll b/final/test/CodeGen/X86/ret-addr.ll
new file mode 100644
index 00000000000..b7b57ab3b84
--- /dev/null
+++ b/final/test/CodeGen/X86/ret-addr.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -disable-fp-elim -march=x86 | not grep xor
+; RUN: llc < %s -disable-fp-elim -march=x86-64 | not grep xor
+
+define i8* @h() nounwind readnone optsize {
+entry:
+	%0 = tail call i8* @llvm.returnaddress(i32 2)		; <i8*> [#uses=1]
+	ret i8* %0
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
+
+define i8* @g() nounwind readnone optsize {
+entry:
+	%0 = tail call i8* @llvm.returnaddress(i32 1)		; <i8*> [#uses=1]
+	ret i8* %0
+}
+
+define i8* @f() nounwind readnone optsize {
+entry:
+	%0 = tail call i8* @llvm.returnaddress(i32 0)		; <i8*> [#uses=1]
+	ret i8* %0
+}
diff --git a/final/test/CodeGen/X86/ret-i64-0.ll b/final/test/CodeGen/X86/ret-i64-0.ll
new file mode 100644
index 00000000000..bca0f056b90
--- /dev/null
+++ b/final/test/CodeGen/X86/ret-i64-0.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 | grep xor | count 2
+
+define i64 @foo() nounwind {
+  ret i64 0
+}
diff --git a/final/test/CodeGen/X86/ret-mmx.ll b/final/test/CodeGen/X86/ret-mmx.ll
new file mode 100644
index 00000000000..04b57dd8d6c
--- /dev/null
+++ b/final/test/CodeGen/X86/ret-mmx.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2
+; rdar://6602459
+
+@g_v1di = external global <1 x i64>
+
+define void @t1() nounwind {
+entry:
+	%call = call <1 x i64> @return_v1di()		; <<1 x i64>> [#uses=0]
+	store <1 x i64> %call, <1 x i64>* @g_v1di
+        ret void
+}
+
+declare <1 x i64> @return_v1di()
+
+define <1 x i64> @t2() nounwind {
+	ret <1 x i64> <i64 1>
+}
+
+define <2 x i32> @t3() nounwind {
+	ret <2 x i32> <i32 1, i32 0>
+}
+
+define double @t4() nounwind {
+	ret double bitcast (<2 x i32> <i32 1, i32 0> to double)
+}
+
diff --git a/final/test/CodeGen/X86/rip-rel-address.ll b/final/test/CodeGen/X86/rip-rel-address.ll
new file mode 100644
index 00000000000..24ff07b4b21
--- /dev/null
+++ b/final/test/CodeGen/X86/rip-rel-address.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 -relocation-model=pic -mtriple=x86_64-apple-darwin10 | FileCheck %s -check-prefix=PIC64
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -relocation-model=static | FileCheck %s -check-prefix=STATIC64
+
+; Use %rip-relative addressing even in static mode on x86-64, because
+; it has a smaller encoding.
+
+@a = internal global double 3.4
+define double @foo() nounwind {
+  %a = load double* @a
+  ret double %a
+  
+; PIC64:    movsd	_a(%rip), %xmm0
+; STATIC64: movsd	a(%rip), %xmm0
+}
diff --git a/final/test/CodeGen/X86/rodata-relocs.ll b/final/test/CodeGen/X86/rodata-relocs.ll
new file mode 100644
index 00000000000..9291200f011
--- /dev/null
+++ b/final/test/CodeGen/X86/rodata-relocs.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -relocation-model=static | grep rodata | count 3
+; RUN: llc < %s -relocation-model=static | grep -F "rodata.cst" | count 2
+; RUN: llc < %s -relocation-model=pic | grep rodata | count 2
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.ro" | count 2
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.ro.local" | count 1
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel" | count 4
+; RUN: llc < %s -relocation-model=pic | grep -F ".data.rel.local" | count 1
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@a = internal unnamed_addr constant [2 x i32] [i32 1, i32 2]
+@a1 = unnamed_addr constant [2 x i32] [i32 1, i32 2]
+@e = internal  unnamed_addr constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
+@e1 = unnamed_addr constant [2 x [2 x i32]] [[2 x i32] [i32 1, i32 2], [2 x i32] [i32 3, i32 4]], align 16
+@p = unnamed_addr constant i8* bitcast ([2 x i32]* @a to i8*)
+@t = unnamed_addr constant i8* bitcast ([2 x [2 x i32]]* @e to i8*)
+@p1 = unnamed_addr constant i8* bitcast ([2 x i32]* @a1 to i8*)
+@t1 = unnamed_addr constant i8* bitcast ([2 x [2 x i32]]* @e1 to i8*)
+@p2 = internal global i8* bitcast([2 x i32]* @a1 to i8*)
+@t2 = internal global i8* bitcast([2 x [2 x i32]]* @e1 to i8*)
+@p3 = internal global i8* bitcast([2 x i32]* @a to i8*)
+@t3 = internal global i8* bitcast([2 x [2 x i32]]* @e to i8*)
+
diff --git a/final/test/CodeGen/X86/rot16.ll b/final/test/CodeGen/X86/rot16.ll
new file mode 100644
index 00000000000..de23dcb78f1
--- /dev/null
+++ b/final/test/CodeGen/X86/rot16.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i16 @foo(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+; CHECK: foo:
+; CHECK: rolw %cl
+	%0 = shl i16 %x, %z
+	%1 = sub i16 16, %z
+	%2 = lshr i16 %x, %1
+	%3 = or i16 %2, %0
+	ret i16 %3
+}
+
+define i16 @bar(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+; CHECK: bar:
+; CHECK: shldw %cl
+	%0 = shl i16 %y, %z
+	%1 = sub i16 16, %z
+	%2 = lshr i16 %x, %1
+	%3 = or i16 %2, %0
+	ret i16 %3
+}
+
+define i16 @un(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+; CHECK: un:
+; CHECK: rorw %cl
+	%0 = lshr i16 %x, %z
+	%1 = sub i16 16, %z
+	%2 = shl i16 %x, %1
+	%3 = or i16 %2, %0
+	ret i16 %3
+}
+
+define i16 @bu(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+; CHECK: bu:
+; CHECK: shrdw
+	%0 = lshr i16 %y, %z
+	%1 = sub i16 16, %z
+	%2 = shl i16 %x, %1
+	%3 = or i16 %2, %0
+	ret i16 %3
+}
+
+define i16 @xfoo(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+; CHECK: xfoo:
+; CHECK: rolw $5
+	%0 = lshr i16 %x, 11
+	%1 = shl i16 %x, 5
+	%2 = or i16 %0, %1
+	ret i16 %2
+}
+
+define i16 @xbar(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+; CHECK: xbar:
+; CHECK: shldw $5
+	%0 = shl i16 %y, 5
+	%1 = lshr i16 %x, 11
+	%2 = or i16 %0, %1
+	ret i16 %2
+}
+
+define i16 @xun(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+; CHECK: xun:
+; CHECK: rolw $11
+	%0 = lshr i16 %x, 5
+	%1 = shl i16 %x, 11
+	%2 = or i16 %0, %1
+	ret i16 %2
+}
+
+define i16 @xbu(i16 %x, i16 %y, i16 %z) nounwind readnone {
+entry:
+; CHECK: xbu:
+; CHECK: shldw $11
+	%0 = lshr i16 %y, 5
+	%1 = shl i16 %x, 11
+	%2 = or i16 %0, %1
+	ret i16 %2
+}
diff --git a/final/test/CodeGen/X86/rot32.ll b/final/test/CodeGen/X86/rot32.ll
new file mode 100644
index 00000000000..99602fd64ff
--- /dev/null
+++ b/final/test/CodeGen/X86/rot32.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+; CHECK: foo:
+; CHECK: roll %cl
+	%0 = shl i32 %x, %z
+	%1 = sub i32 32, %z
+	%2 = lshr i32 %x, %1
+	%3 = or i32 %2, %0
+	ret i32 %3
+}
+
+define i32 @bar(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+; CHECK: bar:
+; CHECK: shldl %cl
+	%0 = shl i32 %y, %z
+	%1 = sub i32 32, %z
+	%2 = lshr i32 %x, %1
+	%3 = or i32 %2, %0
+	ret i32 %3
+}
+
+define i32 @un(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+; CHECK: un:
+; CHECK: rorl %cl
+	%0 = lshr i32 %x, %z
+	%1 = sub i32 32, %z
+	%2 = shl i32 %x, %1
+	%3 = or i32 %2, %0
+	ret i32 %3
+}
+
+define i32 @bu(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+; CHECK: bu:
+; CHECK: shrdl %cl
+	%0 = lshr i32 %y, %z
+	%1 = sub i32 32, %z
+	%2 = shl i32 %x, %1
+	%3 = or i32 %2, %0
+	ret i32 %3
+}
+
+define i32 @xfoo(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+; CHECK: xfoo:
+; CHECK: roll $7
+	%0 = lshr i32 %x, 25
+	%1 = shl i32 %x, 7
+	%2 = or i32 %0, %1
+	ret i32 %2
+}
+
+define i32 @xbar(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+; CHECK: xbar:
+; CHECK: shldl $7
+	%0 = shl i32 %y, 7
+	%1 = lshr i32 %x, 25
+	%2 = or i32 %0, %1
+	ret i32 %2
+}
+
+define i32 @xun(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+; CHECK: xun:
+; CHECK: roll $25
+	%0 = lshr i32 %x, 7
+	%1 = shl i32 %x, 25
+	%2 = or i32 %0, %1
+	ret i32 %2
+}
+
+define i32 @xbu(i32 %x, i32 %y, i32 %z) nounwind readnone {
+entry:
+; CHECK: xbu:
+; CHECK: shldl
+	%0 = lshr i32 %y, 7
+	%1 = shl i32 %x, 25
+	%2 = or i32 %0, %1
+	ret i32 %2
+}
diff --git a/final/test/CodeGen/X86/rot64.ll b/final/test/CodeGen/X86/rot64.ll
new file mode 100644
index 00000000000..4e082bb860b
--- /dev/null
+++ b/final/test/CodeGen/X86/rot64.ll
@@ -0,0 +1,73 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: grep rol %t | count 3
+; RUN: grep ror %t | count 1
+; RUN: grep shld %t | count 2
+; RUN: grep shrd %t | count 2
+
+define i64 @foo(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = shl i64 %x, %z
+	%1 = sub i64 64, %z
+	%2 = lshr i64 %x, %1
+	%3 = or i64 %2, %0
+	ret i64 %3
+}
+
+define i64 @bar(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = shl i64 %y, %z
+	%1 = sub i64 64, %z
+	%2 = lshr i64 %x, %1
+	%3 = or i64 %2, %0
+	ret i64 %3
+}
+
+define i64 @un(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = lshr i64 %x, %z
+	%1 = sub i64 64, %z
+	%2 = shl i64 %x, %1
+	%3 = or i64 %2, %0
+	ret i64 %3
+}
+
+define i64 @bu(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = lshr i64 %y, %z
+	%1 = sub i64 64, %z
+	%2 = shl i64 %x, %1
+	%3 = or i64 %2, %0
+	ret i64 %3
+}
+
+define i64 @xfoo(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = lshr i64 %x, 57
+	%1 = shl i64 %x, 7
+	%2 = or i64 %0, %1
+	ret i64 %2
+}
+
+define i64 @xbar(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = shl i64 %y, 7
+	%1 = lshr i64 %x, 57
+	%2 = or i64 %0, %1
+	ret i64 %2
+}
+
+define i64 @xun(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = lshr i64 %x, 7
+	%1 = shl i64 %x, 57
+	%2 = or i64 %0, %1
+	ret i64 %2
+}
+
+define i64 @xbu(i64 %x, i64 %y, i64 %z) nounwind readnone {
+entry:
+	%0 = lshr i64 %y, 7
+	%1 = shl i64 %x, 57
+	%2 = or i64 %0, %1
+	ret i64 %2
+}
diff --git a/final/test/CodeGen/X86/rotate.ll b/final/test/CodeGen/X86/rotate.ll
new file mode 100644
index 00000000000..1e20273194d
--- /dev/null
+++ b/final/test/CodeGen/X86/rotate.ll
@@ -0,0 +1,100 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   grep {ro\[rl\]} | count 12
+
+define i32 @rotl32(i32 %A, i8 %Amt) {
+	%shift.upgrd.1 = zext i8 %Amt to i32		; <i32> [#uses=1]
+	%B = shl i32 %A, %shift.upgrd.1		; <i32> [#uses=1]
+	%Amt2 = sub i8 32, %Amt		; <i8> [#uses=1]
+	%shift.upgrd.2 = zext i8 %Amt2 to i32		; <i32> [#uses=1]
+	%C = lshr i32 %A, %shift.upgrd.2		; <i32> [#uses=1]
+	%D = or i32 %B, %C		; <i32> [#uses=1]
+	ret i32 %D
+}
+
+define i32 @rotr32(i32 %A, i8 %Amt) {
+	%shift.upgrd.3 = zext i8 %Amt to i32		; <i32> [#uses=1]
+	%B = lshr i32 %A, %shift.upgrd.3		; <i32> [#uses=1]
+	%Amt2 = sub i8 32, %Amt		; <i8> [#uses=1]
+	%shift.upgrd.4 = zext i8 %Amt2 to i32		; <i32> [#uses=1]
+	%C = shl i32 %A, %shift.upgrd.4		; <i32> [#uses=1]
+	%D = or i32 %B, %C		; <i32> [#uses=1]
+	ret i32 %D
+}
+
+define i32 @rotli32(i32 %A) {
+	%B = shl i32 %A, 5		; <i32> [#uses=1]
+	%C = lshr i32 %A, 27		; <i32> [#uses=1]
+	%D = or i32 %B, %C		; <i32> [#uses=1]
+	ret i32 %D
+}
+
+define i32 @rotri32(i32 %A) {
+	%B = lshr i32 %A, 5		; <i32> [#uses=1]
+	%C = shl i32 %A, 27		; <i32> [#uses=1]
+	%D = or i32 %B, %C		; <i32> [#uses=1]
+	ret i32 %D
+}
+
+define i16 @rotl16(i16 %A, i8 %Amt) {
+	%shift.upgrd.5 = zext i8 %Amt to i16		; <i16> [#uses=1]
+	%B = shl i16 %A, %shift.upgrd.5		; <i16> [#uses=1]
+	%Amt2 = sub i8 16, %Amt		; <i8> [#uses=1]
+	%shift.upgrd.6 = zext i8 %Amt2 to i16		; <i16> [#uses=1]
+	%C = lshr i16 %A, %shift.upgrd.6		; <i16> [#uses=1]
+	%D = or i16 %B, %C		; <i16> [#uses=1]
+	ret i16 %D
+}
+
+define i16 @rotr16(i16 %A, i8 %Amt) {
+	%shift.upgrd.7 = zext i8 %Amt to i16		; <i16> [#uses=1]
+	%B = lshr i16 %A, %shift.upgrd.7		; <i16> [#uses=1]
+	%Amt2 = sub i8 16, %Amt		; <i8> [#uses=1]
+	%shift.upgrd.8 = zext i8 %Amt2 to i16		; <i16> [#uses=1]
+	%C = shl i16 %A, %shift.upgrd.8		; <i16> [#uses=1]
+	%D = or i16 %B, %C		; <i16> [#uses=1]
+	ret i16 %D
+}
+
+define i16 @rotli16(i16 %A) {
+	%B = shl i16 %A, 5		; <i16> [#uses=1]
+	%C = lshr i16 %A, 11		; <i16> [#uses=1]
+	%D = or i16 %B, %C		; <i16> [#uses=1]
+	ret i16 %D
+}
+
+define i16 @rotri16(i16 %A) {
+	%B = lshr i16 %A, 5		; <i16> [#uses=1]
+	%C = shl i16 %A, 11		; <i16> [#uses=1]
+	%D = or i16 %B, %C		; <i16> [#uses=1]
+	ret i16 %D
+}
+
+define i8 @rotl8(i8 %A, i8 %Amt) {
+	%B = shl i8 %A, %Amt		; <i8> [#uses=1]
+	%Amt2 = sub i8 8, %Amt		; <i8> [#uses=1]
+	%C = lshr i8 %A, %Amt2		; <i8> [#uses=1]
+	%D = or i8 %B, %C		; <i8> [#uses=1]
+	ret i8 %D
+}
+
+define i8 @rotr8(i8 %A, i8 %Amt) {
+	%B = lshr i8 %A, %Amt		; <i8> [#uses=1]
+	%Amt2 = sub i8 8, %Amt		; <i8> [#uses=1]
+	%C = shl i8 %A, %Amt2		; <i8> [#uses=1]
+	%D = or i8 %B, %C		; <i8> [#uses=1]
+	ret i8 %D
+}
+
+define i8 @rotli8(i8 %A) {
+	%B = shl i8 %A, 5		; <i8> [#uses=1]
+	%C = lshr i8 %A, 3		; <i8> [#uses=1]
+	%D = or i8 %B, %C		; <i8> [#uses=1]
+	ret i8 %D
+}
+
+define i8 @rotri8(i8 %A) {
+	%B = lshr i8 %A, 5		; <i8> [#uses=1]
+	%C = shl i8 %A, 3		; <i8> [#uses=1]
+	%D = or i8 %B, %C		; <i8> [#uses=1]
+	ret i8 %D
+}
diff --git a/final/test/CodeGen/X86/rotate2.ll b/final/test/CodeGen/X86/rotate2.ll
new file mode 100644
index 00000000000..2eea3999e7b
--- /dev/null
+++ b/final/test/CodeGen/X86/rotate2.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 | grep rol | count 2
+
+define i64 @test1(i64 %x) nounwind  {
+entry:
+	%tmp2 = lshr i64 %x, 55		; <i64> [#uses=1]
+	%tmp4 = shl i64 %x, 9		; <i64> [#uses=1]
+	%tmp5 = or i64 %tmp2, %tmp4		; <i64> [#uses=1]
+	ret i64 %tmp5
+}
+
+define i64 @test2(i32 %x) nounwind  {
+entry:
+	%tmp2 = lshr i32 %x, 22		; <i32> [#uses=1]
+	%tmp4 = shl i32 %x, 10		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp2, %tmp4		; <i32> [#uses=1]
+	%tmp56 = zext i32 %tmp5 to i64		; <i64> [#uses=1]
+	ret i64 %tmp56
+}
+
diff --git a/final/test/CodeGen/X86/scalar-extract.ll b/final/test/CodeGen/X86/scalar-extract.ll
new file mode 100644
index 00000000000..28458384093
--- /dev/null
+++ b/final/test/CodeGen/X86/scalar-extract.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx -o %t
+; RUN: not grep movq  %t
+
+; Check that widening doesn't introduce a mmx register in this case when
+; a simple load/store would suffice.
+
+define void @foo(<2 x i16>* %A, <2 x i16>* %B) {
+entry:
+	%tmp1 = load <2 x i16>* %A		; <<2 x i16>> [#uses=1]
+	store <2 x i16> %tmp1, <2 x i16>* %B
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/scalar-min-max-fill-operand.ll b/final/test/CodeGen/X86/scalar-min-max-fill-operand.ll
new file mode 100644
index 00000000000..2f90932c0ed
--- /dev/null
+++ b/final/test/CodeGen/X86/scalar-min-max-fill-operand.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     {{(min|max|mov)}}
+; CHECK:     mov
+; CHECK-NOT:     {{(min|max|mov)}}
+; CHECK:     min
+; CHECK-NOT:     {{(min|max|mov)}}
+; CHECK:     mov
+; CHECK-NOT:     {{(min|max|mov)}}
+; CHECK:     max
+; CHECK-NOT:     {{(min|max|mov)}}
+
+declare float @bar()
+
+define float @foo(float %a) nounwind
+{
+  %s = call float @bar()
+  %t = fcmp olt float %s, %a
+  %u = select i1 %t, float %s, float %a
+  ret float %u
+}
+define float @hem(float %a) nounwind
+{
+  %s = call float @bar()
+  %t = fcmp ogt float %s, %a
+  %u = select i1 %t, float %s, float %a
+  ret float %u
+}
diff --git a/final/test/CodeGen/X86/scalar_sse_minmax.ll b/final/test/CodeGen/X86/scalar_sse_minmax.ll
new file mode 100644
index 00000000000..bc4ab5d836c
--- /dev/null
+++ b/final/test/CodeGen/X86/scalar_sse_minmax.ll
@@ -0,0 +1,44 @@
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 | \
+; RUN:   grep mins | count 3
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2 | \
+; RUN:   grep maxs | count 2
+
+declare i1 @llvm.isunordered.f64(double, double)
+
+declare i1 @llvm.isunordered.f32(float, float)
+
+define float @min1(float %x, float %y) {
+	%tmp = fcmp olt float %x, %y		; <i1> [#uses=1]
+	%retval = select i1 %tmp, float %x, float %y		; <float> [#uses=1]
+	ret float %retval
+}
+
+define double @min2(double %x, double %y) {
+	%tmp = fcmp olt double %x, %y		; <i1> [#uses=1]
+	%retval = select i1 %tmp, double %x, double %y		; <double> [#uses=1]
+	ret double %retval
+}
+
+define float @max1(float %x, float %y) {
+	%tmp = fcmp oge float %x, %y		; <i1> [#uses=1]
+	%tmp2 = fcmp uno float %x, %y		; <i1> [#uses=1]
+	%tmp3 = or i1 %tmp2, %tmp		; <i1> [#uses=1]
+	%retval = select i1 %tmp3, float %x, float %y		; <float> [#uses=1]
+	ret float %retval
+}
+
+define double @max2(double %x, double %y) {
+	%tmp = fcmp oge double %x, %y		; <i1> [#uses=1]
+	%tmp2 = fcmp uno double %x, %y		; <i1> [#uses=1]
+	%tmp3 = or i1 %tmp2, %tmp		; <i1> [#uses=1]
+	%retval = select i1 %tmp3, double %x, double %y		; <double> [#uses=1]
+	ret double %retval
+}
+
+define <4 x float> @min3(float %tmp37) {
+	%tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0		; <<4 x float>> [#uses=1]
+	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp48
+}
+
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
diff --git a/final/test/CodeGen/X86/scalar_widen_div.ll b/final/test/CodeGen/X86/scalar_widen_div.ll
new file mode 100644
index 00000000000..adc58ac34b9
--- /dev/null
+++ b/final/test/CodeGen/X86/scalar_widen_div.ll
@@ -0,0 +1,183 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 |  FileCheck %s
+
+; Verify when widening a divide/remainder operation, we only generate a
+; divide/rem per element since divide/remainder can trap.
+
+define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind {
+; CHECK: idivl
+; CHECK: idivl
+; CHECK-NOT: idivl
+; CHECK: ret
+entry:
+  %nsource.addr = alloca <2 x i32> addrspace(1)*, align 4
+  %dsource.addr = alloca <2 x i32> addrspace(1)*, align 4
+  %qdest.addr = alloca <2 x i32> addrspace(1)*, align 4
+  %index = alloca i32, align 4
+  store <2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)** %nsource.addr
+  store <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)** %dsource.addr
+  store <2 x i32> addrspace(1)* %qdest, <2 x i32> addrspace(1)** %qdest.addr
+  %tmp = load <2 x i32> addrspace(1)** %qdest.addr
+  %tmp1 = load i32* %index
+  %arrayidx = getelementptr <2 x i32> addrspace(1)* %tmp, i32 %tmp1
+  %tmp2 = load <2 x i32> addrspace(1)** %nsource.addr
+  %tmp3 = load i32* %index
+  %arrayidx4 = getelementptr <2 x i32> addrspace(1)* %tmp2, i32 %tmp3
+  %tmp5 = load <2 x i32> addrspace(1)* %arrayidx4
+  %tmp6 = load <2 x i32> addrspace(1)** %dsource.addr
+  %tmp7 = load i32* %index
+  %arrayidx8 = getelementptr <2 x i32> addrspace(1)* %tmp6, i32 %tmp7
+  %tmp9 = load <2 x i32> addrspace(1)* %arrayidx8
+  %tmp10 = sdiv <2 x i32> %tmp5, %tmp9
+  store <2 x i32> %tmp10, <2 x i32> addrspace(1)* %arrayidx
+  ret void
+}
+
+define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
+; CHECK: idivb
+; CHECK: idivb
+; CHECK: idivb
+; CHECK-NOT: idivb
+; CHECK: ret
+  %div.r = sdiv <3 x i8> %num, %div
+  ret <3 x i8>  %div.r
+}
+
+define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
+; CHECK: divb
+; CHECK: divb
+; CHECK: divb
+; CHECK-NOT: divb
+; CHECK: ret
+  %div.r = udiv <3 x i8> %num, %div
+  ret <3 x i8>  %div.r
+}
+
+define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK-NOT: idivw
+; CHECK: ret
+  %div.r = sdiv <5 x i16> %num, %div
+  ret <5 x i16>  %div.r
+}
+
+define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
+; CHECK: divw
+; CHECK: divw
+; CHECK: divw
+; CHECK: divw
+; CHECK-NOT: divw
+; CHECK: ret
+  %div.r = udiv <4 x i16> %num, %div
+  ret <4 x i16>  %div.r
+}
+
+define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
+; CHECK: divl
+; CHECK: divl
+; CHECK: divl
+; CHECK-NOT: divl
+; CHECK: ret
+  %div.r = udiv <3 x i32> %num, %div
+  ret <3 x i32>  %div.r
+}
+
+define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
+; CHECK: idivq
+; CHECK: idivq
+; CHECK: idivq
+; CHECK-NOT: idivq
+; CHECK: ret
+  %div.r = sdiv <3 x i64> %num, %div
+  ret <3 x i64>  %div.r
+}
+
+define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
+; CHECK: divq
+; CHECK: divq
+; CHECK: divq
+; CHECK-NOT: divq
+; CHECK: ret
+  %div.r = udiv <3 x i64> %num, %div
+  ret <3 x i64>  %div.r
+}
+
+
+define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) {
+; CHECK: idivb
+; CHECK: idivb
+; CHECK: idivb
+; CHECK: idivb
+; CHECK-NOT: idivb
+; CHECK: ret
+  %rem.r = srem <4 x i8> %num, %rem
+  ret <4 x i8>  %rem.r
+}
+
+define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK: idivw
+; CHECK-NOT: idivw
+; CHECK: ret
+  %rem.r = srem <5 x i16> %num, %rem
+  ret <5 x i16>  %rem.r
+}
+
+define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK-NOT: idivl
+; CHECK: ret
+  %rem.r = srem <4 x i32> %num, %rem
+  ret <4 x i32>  %rem.r
+}
+
+
+define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
+; CHECK: divq
+; CHECK: divq
+; CHECK: divq
+; CHECK: divq
+; CHECK: divq
+; CHECK-NOT: divq
+; CHECK: ret
+  %rem.r = urem <5 x i64> %num, %rem
+  ret <5 x i64>  %rem.r
+}
+
+define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK-NOT: idivl
+; CHECK: ret
+entry:
+  %cmp13 = icmp sgt i32 %n, 0
+  br i1 %cmp13, label %bb.nph, label %for.end
+
+bb.nph:  
+  br label %for.body
+
+for.body:
+  %i.014 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body ] 
+  %arrayidx11 = getelementptr <3 x i32>* %dest, i32 %i.014
+  %tmp4 = load <3 x i32>* %arrayidx11 ; <<3 x i32>> [#uses=1]
+  %arrayidx7 = getelementptr inbounds <3 x i32>* %old, i32 %i.014
+  %tmp8 = load <3 x i32>* %arrayidx7 ; <<3 x i32>> [#uses=1]
+  %div = sdiv <3 x i32> %tmp4, %tmp8
+  store <3 x i32> %div, <3 x i32>* %arrayidx11
+  %inc = add nsw i32 %i.014, 1
+  %exitcond = icmp eq i32 %inc, %n 
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/final/test/CodeGen/X86/scalarize-bitcast.ll b/final/test/CodeGen/X86/scalarize-bitcast.ll
new file mode 100644
index 00000000000..f6b29ecfbb6
--- /dev/null
+++ b/final/test/CodeGen/X86/scalarize-bitcast.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86-64
+; PR3886
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-pc-linux-gnu"
+
+define void @mmxCombineMaskU(i32* nocapture %src, i32* nocapture %mask) nounwind {
+entry:
+	%tmp1 = load i32* %src		; <i32> [#uses=1]
+	%0 = insertelement <2 x i32> undef, i32 %tmp1, i32 0		; <<2 x i32>> [#uses=1]
+	%1 = insertelement <2 x i32> %0, i32 0, i32 1		; <<2 x i32>> [#uses=1]
+	%conv.i.i = bitcast <2 x i32> %1 to <1 x i64>		; <<1 x i64>> [#uses=1]
+	%tmp2.i.i = extractelement <1 x i64> %conv.i.i, i32 0		; <i64> [#uses=1]
+	%tmp22.i = bitcast i64 %tmp2.i.i to <1 x i64>		; <<1 x i64>> [#uses=1]
+	%tmp15.i = extractelement <1 x i64> %tmp22.i, i32 0		; <i64> [#uses=1]
+	%conv.i26.i = bitcast i64 %tmp15.i to <8 x i8>		; <<8 x i8>> [#uses=1]
+	%shuffle.i.i = shufflevector <8 x i8> %conv.i26.i, <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>		; <<8 x i8>> [#uses=1]
+	%conv6.i.i = bitcast <8 x i8> %shuffle.i.i to <1 x i64>		; <<1 x i64>> [#uses=1]
+	%tmp12.i.i = extractelement <1 x i64> %conv6.i.i, i32 0		; <i64> [#uses=1]
+	%tmp10.i = bitcast i64 %tmp12.i.i to <1 x i64>		; <<1 x i64>> [#uses=1]
+	%tmp24.i = extractelement <1 x i64> %tmp10.i, i32 0		; <i64> [#uses=1]
+	%tmp10 = bitcast i64 %tmp24.i to <1 x i64>		; <<1 x i64>> [#uses=1]
+	%tmp7 = extractelement <1 x i64> %tmp10, i32 0		; <i64> [#uses=1]
+	%call6 = tail call i32 (...)* @store8888(i64 %tmp7)		; <i32> [#uses=1]
+	store i32 %call6, i32* %src
+	ret void
+}
+
+declare i32 @store8888(...)
diff --git a/final/test/CodeGen/X86/scev-interchange.ll b/final/test/CodeGen/X86/scev-interchange.ll
new file mode 100644
index 00000000000..81c919f8dff
--- /dev/null
+++ b/final/test/CodeGen/X86/scev-interchange.ll
@@ -0,0 +1,338 @@
+; RUN: llc < %s -march=x86-64
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+	%"struct.DataOutBase::GmvFlags" = type { i32 }
+	%"struct.FE_DGPNonparametric<3>" = type { [1156 x i8], i32, %"struct.PolynomialSpace<1>" }
+	%"struct.FiniteElementData<1>" = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.Line = type { [2 x i32] }
+	%"struct.PolynomialSpace<1>" = type { %"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >", i32, %"struct.std::vector<int,std::allocator<int> >", %"struct.std::vector<int,std::allocator<int> >" }
+	%"struct.Polynomials::Polynomial<double>" = type { %struct.Subscriptor, %"struct.std::vector<double,std::allocator<double> >" }
+	%struct.Subscriptor = type { i32 (...)**, i32, %"struct.std::type_info"* }
+	%"struct.TableBase<2,double>" = type { %struct.Subscriptor, double*, i32, %"struct.TableIndices<2>" }
+	%"struct.TableIndices<2>" = type { %struct.Line }
+	%"struct.std::_Bit_const_iterator" = type { %"struct.std::_Bit_iterator_base" }
+	%"struct.std::_Bit_iterator_base" = type { i64*, i32 }
+	%"struct.std::_Bvector_base<std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" }
+	%"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" = type { %"struct.std::_Bit_const_iterator", %"struct.std::_Bit_const_iterator", i64* }
+	%"struct.std::_Vector_base<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >" = type { %"struct.std::_Vector_base<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >::_Vector_impl" }
+	%"struct.std::_Vector_base<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >::_Vector_impl" = type { %"struct.Polynomials::Polynomial<double>"*, %"struct.Polynomials::Polynomial<double>"*, %"struct.Polynomials::Polynomial<double>"* }
+	%"struct.std::_Vector_base<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" }
+	%"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" = type { double*, double*, double* }
+	%"struct.std::_Vector_base<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" }
+	%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
+	%"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" = type { %"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >::_Vector_impl" }
+	%"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >::_Vector_impl" = type { %"struct.std::vector<bool,std::allocator<bool> >"*, %"struct.std::vector<bool,std::allocator<bool> >"*, %"struct.std::vector<bool,std::allocator<bool> >"* }
+	%"struct.std::type_info" = type { i32 (...)**, i8* }
+	%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >" = type { %"struct.std::_Vector_base<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >" }
+	%"struct.std::vector<bool,std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >" }
+	%"struct.std::vector<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >" }
+	%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
+	%"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" = type { %"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" }
+
+declare void @_Unwind_Resume(i8*)
+
+declare i8* @_Znwm(i64)
+
+declare fastcc void @_ZNSt6vectorIjSaIjEEaSERKS1_(%"struct.std::vector<int,std::allocator<int> >"*, %"struct.std::vector<int,std::allocator<int> >"*)
+
+declare fastcc void @_ZN9TableBaseILi2EdE6reinitERK12TableIndicesILi2EE(%"struct.TableBase<2,double>"* nocapture, i32, i32)
+
+declare fastcc void @_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_(%"struct.std::vector<bool,std::allocator<bool> >"* nocapture, i64, i8* nocapture)
+
+declare fastcc void @_ZNSt6vectorIS_IbSaIbEESaIS1_EEC2EmRKS1_RKS2_(%"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >"* nocapture, i64, %"struct.std::vector<bool,std::allocator<bool> >"* nocapture)
+
+declare fastcc void @_ZNSt6vectorIN11Polynomials10PolynomialIdEESaIS2_EED1Ev(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* nocapture)
+
+declare fastcc void @_ZN24TensorProductPolynomialsILi3EEC2IN11Polynomials10PolynomialIdEEEERKSt6vectorIT_SaIS6_EE(%"struct.PolynomialSpace<1>"* nocapture, %"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* nocapture)
+
+declare fastcc void @_ZN7FE_PolyI24TensorProductPolynomialsILi3EELi3EEC2EjRKS1_RK17FiniteElementDataILi3EERKSt6vectorIbSaIbEERKS9_ISB_SaISB_EE(%"struct.FE_DGPNonparametric<3>"*, i32, %"struct.PolynomialSpace<1>"* nocapture, %"struct.FiniteElementData<1>"* nocapture, %"struct.std::vector<bool,std::allocator<bool> >"* nocapture, %"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >"* nocapture)
+
+declare fastcc void @_ZN11FE_Q_Helper12_GLOBAL__N_116invert_numberingERKSt6vectorIjSaIjEE(%"struct.std::vector<int,std::allocator<int> >"* noalias nocapture sret, %"struct.std::vector<int,std::allocator<int> >"* nocapture)
+
+declare fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias nocapture sret, i32)
+
+define fastcc void @_ZN4FE_QILi3EEC1Ej(i32 %degree) {
+entry:
+	invoke fastcc void @_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_(%"struct.std::vector<bool,std::allocator<bool> >"* undef, i64 1, i8* undef)
+			to label %invcont.i unwind label %lpad.i
+
+invcont.i:		; preds = %entry
+	invoke fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, i32 %degree)
+			to label %invcont1.i unwind label %lpad120.i
+
+invcont1.i:		; preds = %invcont.i
+	invoke fastcc void @_ZNSt6vectorIS_IbSaIbEESaIS1_EEC2EmRKS1_RKS2_(%"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >"* undef, i64 undef, %"struct.std::vector<bool,std::allocator<bool> >"* undef)
+			to label %invcont3.i unwind label %lpad124.i
+
+invcont3.i:		; preds = %invcont1.i
+	invoke fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, i32 %degree)
+			to label %invcont4.i unwind label %lpad128.i
+
+invcont4.i:		; preds = %invcont3.i
+	invoke fastcc void @_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_(%"struct.std::vector<bool,std::allocator<bool> >"* undef, i64 undef, i8* undef)
+			to label %invcont6.i unwind label %lpad132.i
+
+invcont6.i:		; preds = %invcont4.i
+	invoke fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, i32 %degree)
+			to label %invcont7.i unwind label %lpad136.i
+
+invcont7.i:		; preds = %invcont6.i
+	invoke fastcc void @_ZN11Polynomials19LagrangeEquidistant23generate_complete_basisEj(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* noalias sret undef, i32 %degree)
+			to label %invcont9.i unwind label %lpad140.i
+
+invcont9.i:		; preds = %invcont7.i
+	invoke fastcc void @_ZN24TensorProductPolynomialsILi3EEC2IN11Polynomials10PolynomialIdEEEERKSt6vectorIT_SaIS6_EE(%"struct.PolynomialSpace<1>"* undef, %"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* undef)
+			to label %invcont10.i unwind label %lpad144.i
+
+invcont10.i:		; preds = %invcont9.i
+	invoke fastcc void @_ZN7FE_PolyI24TensorProductPolynomialsILi3EELi3EEC2EjRKS1_RK17FiniteElementDataILi3EERKSt6vectorIbSaIbEERKS9_ISB_SaISB_EE(%"struct.FE_DGPNonparametric<3>"* undef, i32 %degree, %"struct.PolynomialSpace<1>"* undef, %"struct.FiniteElementData<1>"* undef, %"struct.std::vector<bool,std::allocator<bool> >"* undef, %"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >"* undef)
+			to label %bb14.i unwind label %lpad148.i
+
+bb14.i:		; preds = %invcont10.i
+	br i1 false, label %bb3.i164.i, label %bb.i.i.i.i160.i
+
+bb.i.i.i.i160.i:		; preds = %bb14.i
+	unreachable
+
+bb3.i164.i:		; preds = %bb14.i
+	br i1 undef, label %bb10.i168.i, label %bb.i.i.i20.i166.i
+
+bb.i.i.i20.i166.i:		; preds = %bb3.i164.i
+	unreachable
+
+bb10.i168.i:		; preds = %bb3.i164.i
+	invoke fastcc void @_ZNSt6vectorIN11Polynomials10PolynomialIdEESaIS2_EED1Ev(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* undef)
+			to label %bb21.i unwind label %lpad144.i
+
+bb21.i:		; preds = %bb10.i168.i
+	invoke fastcc void @_ZNSt6vectorIN11Polynomials10PolynomialIdEESaIS2_EED1Ev(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* undef)
+			to label %bb28.i unwind label %lpad140.i
+
+bb28.i:		; preds = %bb21.i
+	br i1 undef, label %bb35.i, label %bb.i.i.i175.i
+
+bb.i.i.i175.i:		; preds = %bb28.i
+	br label %bb35.i
+
+bb35.i:		; preds = %bb.i.i.i175.i, %bb28.i
+	br i1 undef, label %bb42.i, label %bb.i.i.i205.i
+
+bb.i.i.i205.i:		; preds = %bb35.i
+	unreachable
+
+bb42.i:		; preds = %bb35.i
+	br i1 undef, label %bb47.i, label %bb.i.i.i213.i
+
+bb.i.i.i213.i:		; preds = %bb42.i
+	unreachable
+
+bb47.i:		; preds = %bb42.i
+	br i1 undef, label %bb59.i, label %bb.i.i.i247.i
+
+bb.i.i.i247.i:		; preds = %bb47.i
+	unreachable
+
+bb59.i:		; preds = %bb47.i
+	br i1 undef, label %bb66.i, label %bb.i.i.i255.i
+
+bb.i.i.i255.i:		; preds = %bb59.i
+	unreachable
+
+bb66.i:		; preds = %bb59.i
+	br i1 undef, label %bb71.i, label %bb.i.i.i262.i
+
+bb.i.i.i262.i:		; preds = %bb66.i
+	br label %bb71.i
+
+bb71.i:		; preds = %bb.i.i.i262.i, %bb66.i
+	%tmp11.i.i29.i.i.i.i.i.i = invoke i8* @_Znwm(i64 12)
+			to label %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i unwind label %lpad.i.i.i.i.i.i		; <i8*> [#uses=0]
+
+lpad.i.i.i.i.i.i:		; preds = %bb71.i
+	unreachable
+
+_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i:		; preds = %bb71.i
+	br i1 undef, label %_ZNSt6vectorIjSaIjEED1Ev.exit.i.i, label %bb.i.i.i.i94.i
+
+bb.i.i.i.i94.i:		; preds = %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i
+	unreachable
+
+_ZNSt6vectorIjSaIjEED1Ev.exit.i.i:		; preds = %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i
+	%tmp11.i.i29.i.i.i.i5.i.i = invoke i8* @_Znwm(i64 undef)
+			to label %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i unwind label %lpad.i.i.i.i8.i.i		; <i8*> [#uses=0]
+
+lpad.i.i.i.i8.i.i:		; preds = %_ZNSt6vectorIjSaIjEED1Ev.exit.i.i
+	invoke void @_Unwind_Resume(i8* undef)
+			to label %.noexc.i9.i.i unwind label %lpad.i19.i.i
+
+.noexc.i9.i.i:		; preds = %lpad.i.i.i.i8.i.i
+	unreachable
+
+_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i:		; preds = %_ZNSt6vectorIjSaIjEED1Ev.exit.i.i
+	br i1 undef, label %bb50.i.i.i, label %bb.i.i.i.i.i.i.i.i.i.i
+
+bb.i.i.i.i.i.i.i.i.i.i:		; preds = %bb.i.i.i.i.i.i.i.i.i.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i
+	br i1 undef, label %bb50.i.i.i, label %bb.i.i.i.i.i.i.i.i.i.i
+
+bb50.i.i.i:		; preds = %bb.i.i.i.i.i.i.i.i.i.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i
+	invoke fastcc void @_ZN11FE_Q_Helper12_GLOBAL__N_116invert_numberingERKSt6vectorIjSaIjEE(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, %"struct.std::vector<int,std::allocator<int> >"* undef)
+			to label %bb83.i unwind label %lpad188.i
+
+lpad.i19.i.i:		; preds = %lpad.i.i.i.i8.i.i
+	unreachable
+
+bb83.i:		; preds = %bb50.i.i.i
+	br i1 undef, label %invcont84.i, label %bb.i.i.i221.i
+
+bb.i.i.i221.i:		; preds = %bb83.i
+	unreachable
+
+invcont84.i:		; preds = %bb83.i
+	%tmp11.i.i29.i.i.i.i.i = invoke i8* @_Znwm(i64 undef)
+			to label %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i unwind label %lpad.i.i.i.i315.i		; <i8*> [#uses=0]
+
+lpad.i.i.i.i315.i:		; preds = %invcont84.i
+	invoke void @_Unwind_Resume(i8* undef)
+			to label %.noexc.i316.i unwind label %lpad.i352.i
+
+.noexc.i316.i:		; preds = %lpad.i.i.i.i315.i
+	unreachable
+
+_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i:		; preds = %invcont84.i
+	br i1 undef, label %bb50.i.i, label %bb.i.i.i.i.i.i.i.i320.i
+
+bb.i.i.i.i.i.i.i.i320.i:		; preds = %bb.i.i.i.i.i.i.i.i320.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i
+	br i1 undef, label %bb50.i.i, label %bb.i.i.i.i.i.i.i.i320.i
+
+bb50.i.i:		; preds = %bb.i.i.i.i.i.i.i.i320.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i
+	invoke fastcc void @_ZN11FE_Q_Helper12_GLOBAL__N_116invert_numberingERKSt6vectorIjSaIjEE(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, %"struct.std::vector<int,std::allocator<int> >"* undef)
+			to label %invcont86.i unwind label %lpad200.i
+
+lpad.i352.i:		; preds = %lpad.i.i.i.i315.i
+	unreachable
+
+invcont86.i:		; preds = %bb50.i.i
+	invoke fastcc void @_ZNSt6vectorIjSaIjEEaSERKS1_(%"struct.std::vector<int,std::allocator<int> >"* undef, %"struct.std::vector<int,std::allocator<int> >"* undef)
+			to label %.noexc380.i unwind label %lpad204.i
+
+.noexc380.i:		; preds = %invcont86.i
+	br i1 undef, label %bb100.i, label %bb.i.i.i198.i
+
+bb.i.i.i198.i:		; preds = %.noexc380.i
+	unreachable
+
+bb100.i:		; preds = %.noexc380.i
+	br i1 undef, label %invcont101.i, label %bb.i.i.i190.i
+
+bb.i.i.i190.i:		; preds = %bb100.i
+	unreachable
+
+invcont101.i:		; preds = %bb100.i
+	invoke fastcc void @_ZN9TableBaseILi2EdE6reinitERK12TableIndicesILi2EE(%"struct.TableBase<2,double>"* undef, i32 undef, i32 undef)
+			to label %_ZN10FullMatrixIdEC1Ejj.exit.i.i unwind label %lpad.i.i.i.i.i
+
+lpad.i.i.i.i.i:		; preds = %invcont101.i
+	unreachable
+
+_ZN10FullMatrixIdEC1Ejj.exit.i.i:		; preds = %invcont101.i
+	invoke fastcc void @_ZN9TableBaseILi2EdE6reinitERK12TableIndicesILi2EE(%"struct.TableBase<2,double>"* undef, i32 undef, i32 undef)
+			to label %_ZN10FullMatrixIdEC1Ejj.exit28.i.i unwind label %lpad.i.i.i27.i.i
+
+lpad.i.i.i27.i.i:		; preds = %_ZN10FullMatrixIdEC1Ejj.exit.i.i
+	invoke void @_Unwind_Resume(i8* undef)
+			to label %.noexc.i.i unwind label %lpad.i.i
+
+.noexc.i.i:		; preds = %lpad.i.i.i27.i.i
+	unreachable
+
+_ZN10FullMatrixIdEC1Ejj.exit28.i.i:		; preds = %_ZN10FullMatrixIdEC1Ejj.exit.i.i
+	br i1 undef, label %bb58.i.i, label %bb.i.i.i304.i.i
+
+bb.i.i.i304.i.i:		; preds = %_ZN10FullMatrixIdEC1Ejj.exit28.i.i
+	unreachable
+
+bb58.i.i:		; preds = %_ZN10FullMatrixIdEC1Ejj.exit28.i.i
+	br i1 false, label %bb.i191.i, label %bb.i.i.i297.i.i
+
+bb.i.i.i297.i.i:		; preds = %bb58.i.i
+	unreachable
+
+lpad.i.i:		; preds = %lpad.i.i.i27.i.i
+	unreachable
+
+bb.i191.i:		; preds = %.noexc232.i, %bb58.i.i
+	invoke fastcc void @_ZN9TableBaseILi2EdE6reinitERK12TableIndicesILi2EE(%"struct.TableBase<2,double>"* undef, i32 undef, i32 undef)
+			to label %.noexc232.i unwind label %lpad196.i
+
+.noexc232.i:		; preds = %bb.i191.i
+	br i1 undef, label %bb29.loopexit.i.i, label %bb.i191.i
+
+bb7.i215.i:		; preds = %bb9.i216.i
+	br i1 undef, label %bb16.preheader.i.i, label %bb8.i.i
+
+bb8.i.i:		; preds = %bb7.i215.i
+	%tmp60.i.i = add i32 %0, 1		; <i32> [#uses=1]
+	br label %bb9.i216.i
+
+bb9.i216.i:		; preds = %bb29.loopexit.i.i, %bb8.i.i
+	%0 = phi i32 [ 0, %bb29.loopexit.i.i ], [ %tmp60.i.i, %bb8.i.i ]		; <i32> [#uses=2]
+	br i1 undef, label %bb7.i215.i, label %bb16.preheader.i.i
+
+bb15.i.i:		; preds = %bb16.preheader.i.i, %bb15.i.i
+	%j1.0212.i.i = phi i32 [ %1, %bb15.i.i ], [ 0, %bb16.preheader.i.i ]		; <i32> [#uses=2]
+	%tmp6.i.i195.i.i = load i32* undef, align 4		; <i32> [#uses=1]
+	%tmp231.i.i = mul i32 %0, %tmp6.i.i195.i.i		; <i32> [#uses=1]
+	%tmp13.i197.i.i = add i32 %j1.0212.i.i, %tmp231.i.i		; <i32> [#uses=0]
+	%1 = add i32 %j1.0212.i.i, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb15.i.i, label %bb17.i.i
+
+bb17.i.i:		; preds = %bb16.preheader.i.i, %bb15.i.i
+	br label %bb16.preheader.i.i
+
+bb16.preheader.i.i:		; preds = %bb17.i.i, %bb9.i216.i, %bb7.i215.i
+	br i1 undef, label %bb17.i.i, label %bb15.i.i
+
+bb29.loopexit.i.i:		; preds = %.noexc232.i
+	br label %bb9.i216.i
+
+lpad.i:		; preds = %entry
+	unreachable
+
+lpad120.i:		; preds = %invcont.i
+	unreachable
+
+lpad124.i:		; preds = %invcont1.i
+	unreachable
+
+lpad128.i:		; preds = %invcont3.i
+	unreachable
+
+lpad132.i:		; preds = %invcont4.i
+	unreachable
+
+lpad136.i:		; preds = %invcont6.i
+	unreachable
+
+lpad140.i:		; preds = %bb21.i, %invcont7.i
+	unreachable
+
+lpad144.i:		; preds = %bb10.i168.i, %invcont9.i
+	unreachable
+
+lpad148.i:		; preds = %invcont10.i
+	unreachable
+
+lpad188.i:		; preds = %bb50.i.i.i
+	unreachable
+
+lpad196.i:		; preds = %bb.i191.i
+	unreachable
+
+lpad200.i:		; preds = %bb50.i.i
+	unreachable
+
+lpad204.i:		; preds = %invcont86.i
+	unreachable
+}
+
+declare fastcc void @_ZN11Polynomials19LagrangeEquidistant23generate_complete_basisEj(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* noalias nocapture sret, i32)
diff --git a/final/test/CodeGen/X86/select.ll b/final/test/CodeGen/X86/select.ll
new file mode 100644
index 00000000000..ce04e07854a
--- /dev/null
+++ b/final/test/CodeGen/X86/select.ll
@@ -0,0 +1,220 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+; PR5757
+
+%0 = type { i64, i32 }
+
+define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind {
+  %t0 = load %0* %p
+  %t1 = load %0* %q
+  %t4 = select i1 %r, %0 %t0, %0 %t1
+  %t5 = extractvalue %0 %t4, 1
+  ret i32 %t5
+; CHECK: test1:
+; CHECK: cmovneq %rdi, %rsi
+; CHECK: movl (%rsi), %eax
+}
+
+
+; PR2139
+define i32 @test2() nounwind {
+entry:
+	%tmp73 = tail call i1 @return_false()		; <i8> [#uses=1]
+	%g.0 = select i1 %tmp73, i16 0, i16 -480		; <i16> [#uses=2]
+	%tmp7778 = sext i16 %g.0 to i32		; <i32> [#uses=1]
+	%tmp80 = shl i32 %tmp7778, 3		; <i32> [#uses=2]
+	%tmp87 = icmp sgt i32 %tmp80, 32767		; <i1> [#uses=1]
+	br i1 %tmp87, label %bb90, label %bb91
+bb90:		; preds = %bb84, %bb72
+	unreachable
+bb91:		; preds = %bb84
+	ret i32 0
+; CHECK: test2:
+; CHECK: movnew
+; CHECK: movswl
+}
+
+declare i1 @return_false()
+
+
+;; Select between two floating point constants.
+define float @test3(i32 %x) nounwind readnone {
+entry:
+	%0 = icmp eq i32 %x, 0		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, float 4.200000e+01, float 2.300000e+01		; <float> [#uses=1]
+	ret float %iftmp.0.0
+; CHECK: test3:
+; CHECK: movss	{{.*}},4), %xmm0
+}
+
+define signext i8 @test4(i8* nocapture %P, double %F) nounwind readonly {
+entry:
+	%0 = fcmp olt double %F, 4.200000e+01		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, i32 4, i32 0		; <i32> [#uses=1]
+	%1 = getelementptr i8* %P, i32 %iftmp.0.0		; <i8*> [#uses=1]
+	%2 = load i8* %1, align 1		; <i8> [#uses=1]
+	ret i8 %2
+; CHECK: test4:
+; CHECK: movsbl	({{.*}},4), %eax
+}
+
+define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
+  %x = select i1 %c, <2 x i16> %a, <2 x i16> %b
+  store <2 x i16> %x, <2 x i16>* %p
+  ret void
+; CHECK: test5:
+}
+
+define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
+        %tmp = load <4 x float>* %A             ; <<4 x float>> [#uses=1]
+        %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=2]
+        %tmp9 = fmul <4 x float> %tmp3, %tmp3            ; <<4 x float>> [#uses=1]
+        %tmp.upgrd.1 = icmp eq i32 %C, 0                ; <i1> [#uses=1]
+        %iftmp.38.0 = select i1 %tmp.upgrd.1, <4 x float> %tmp9, <4 x float> %tmp               ; <<4 x float>> [#uses=1]
+        store <4 x float> %iftmp.38.0, <4 x float>* %A
+        ret void
+; Verify that the fmul gets sunk into the one part of the diamond where it is
+; needed.
+; CHECK: test6:
+; CHECK: jne
+; CHECK: mulps
+; CHECK: ret
+; CHECK: ret
+}
+
+; Select with fp80's
+define x86_fp80 @test7(i32 %tmp8) nounwind {
+        %tmp9 = icmp sgt i32 %tmp8, -1          ; <i1> [#uses=1]
+        %retval = select i1 %tmp9, x86_fp80 0xK4005B400000000000000, x86_fp80 0xK40078700000000000000
+        ret x86_fp80 %retval
+; CHECK: test7:
+; CHECK: leaq
+; CHECK: fldt (%r{{.}}x,%r{{.}}x)
+}
+
+; widening select v6i32 and then a sub
+define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2) nounwind {
+	%x = select i1 %c, <6 x i32> %src1, <6 x i32> %src2
+	%val = sub <6 x i32> %x, < i32 1, i32 1, i32 1, i32 1, i32 1, i32 1 >
+	store <6 x i32> %val, <6 x i32>* %dst.addr
+	ret void
+        
+; CHECK: test8:
+}
+
+
+;; Test integer select between values and constants.
+
+define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp ne i64 %x, 0
+  %cond = select i1 %cmp, i64 %y, i64 -1
+  ret i64 %cond
+; CHECK: test9:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
+}
+
+;; Same as test9
+define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp eq i64 %x, 0
+  %cond = select i1 %cmp, i64 -1, i64 %y
+  ret i64 %cond
+; CHECK: test9a:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
+}
+
+define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp eq i64 %x, 0
+  %A = sext i1 %cmp to i64
+  %cond = or i64 %y, %A
+  ret i64 %cond
+; CHECK: test9b:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
+}
+
+;; Select between -1 and 1.
+define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp eq i64 %x, 0
+  %cond = select i1 %cmp, i64 -1, i64 1
+  ret i64 %cond
+; CHECK: test10:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: orq	$1, %rax
+; CHECK: ret
+}
+
+
+
+define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp eq i64 %x, 0
+  %cond = select i1 %cmp, i64 %y, i64 -1
+  ret i64 %cond
+; CHECK: test11:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: notq %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
+}
+
+define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
+  %cmp = icmp ne i64 %x, 0
+  %cond = select i1 %cmp, i64 -1, i64 %y
+  ret i64 %cond
+; CHECK: test11a:
+; CHECK: cmpq	$1, %rdi
+; CHECK: sbbq	%rax, %rax
+; CHECK: notq %rax
+; CHECK: orq	%rsi, %rax
+; CHECK: ret
+}
+
+
+declare noalias i8* @_Znam(i64) noredzone
+
+define noalias i8* @test12(i64 %count) nounwind ssp noredzone {
+entry:
+  %A = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %count, i64 4)
+  %B = extractvalue { i64, i1 } %A, 1
+  %C = extractvalue { i64, i1 } %A, 0
+  %D = select i1 %B, i64 -1, i64 %C
+  %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
+  ret i8* %call
+; CHECK: test12:
+; CHECK: mulq
+; CHECK: movq $-1, %rdi
+; CHECK: cmovnoq	%rax, %rdi
+; CHECK: jmp	__Znam
+}
+
+declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
+
+define i32 @test13(i32 %a, i32 %b) nounwind {
+  %c = icmp ult i32 %a, %b
+  %d = sext i1 %c to i32
+  ret i32 %d
+; CHECK: test13:
+; CHECK: cmpl
+; CHECK-NEXT: sbbl
+; CHECK-NEXT: ret
+}
+
+define i32 @test14(i32 %a, i32 %b) nounwind {
+  %c = icmp uge i32 %a, %b
+  %d = sext i1 %c to i32
+  ret i32 %d
+; CHECK: test14:
+; CHECK: cmpl
+; CHECK-NEXT: sbbl
+; CHECK-NEXT: notl
+; CHECK-NEXT: ret
+}
+
diff --git a/final/test/CodeGen/X86/setcc.ll b/final/test/CodeGen/X86/setcc.ll
new file mode 100644
index 00000000000..c37e15d24f3
--- /dev/null
+++ b/final/test/CodeGen/X86/setcc.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; rdar://7329206
+
+; Use sbb x, x to materialize carry bit in a GPR. The value is either
+; all 1's or all 0's.
+
+define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp {
+entry:
+; CHECK: t1:
+; CHECK: seta %al
+; CHECK: movzbl %al, %eax
+; CHECK: shll $5, %eax
+  %0 = icmp ugt i16 %x, 26                        ; <i1> [#uses=1]
+  %iftmp.1.0 = select i1 %0, i16 32, i16 0        ; <i16> [#uses=1]
+  ret i16 %iftmp.1.0
+}
+
+define zeroext i16 @t2(i16 zeroext %x) nounwind readnone ssp {
+entry:
+; CHECK: t2:
+; CHECK: sbbl %eax, %eax
+; CHECK: andl $32, %eax
+  %0 = icmp ult i16 %x, 26                        ; <i1> [#uses=1]
+  %iftmp.0.0 = select i1 %0, i16 32, i16 0        ; <i16> [#uses=1]
+  ret i16 %iftmp.0.0
+}
+
+define i64 @t3(i64 %x) nounwind readnone ssp {
+entry:
+; CHECK: t3:
+; CHECK: sbbq %rax, %rax
+; CHECK: andq $64, %rax
+  %0 = icmp ult i64 %x, 18                        ; <i1> [#uses=1]
+  %iftmp.2.0 = select i1 %0, i64 64, i64 0        ; <i64> [#uses=1]
+  ret i64 %iftmp.2.0
+}
diff --git a/final/test/CodeGen/X86/setoeq.ll b/final/test/CodeGen/X86/setoeq.ll
new file mode 100644
index 00000000000..4a9c1bacc5f
--- /dev/null
+++ b/final/test/CodeGen/X86/setoeq.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86  | grep set | count 2
+; RUN: llc < %s -march=x86  | grep and
+
+define zeroext i8 @t(double %x) nounwind readnone {
+entry:
+	%0 = fptosi double %x to i32		; <i32> [#uses=1]
+	%1 = sitofp i32 %0 to double		; <double> [#uses=1]
+	%2 = fcmp oeq double %1, %x		; <i1> [#uses=1]
+	%retval12 = zext i1 %2 to i8		; <i8> [#uses=1]
+	ret i8 %retval12
+}
diff --git a/final/test/CodeGen/X86/setuge.ll b/final/test/CodeGen/X86/setuge.ll
new file mode 100644
index 00000000000..4ca2f1871c0
--- /dev/null
+++ b/final/test/CodeGen/X86/setuge.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86  | not grep set
+
+declare i1 @llvm.isunordered.f32(float, float)
+
+define float @cmp(float %A, float %B, float %C, float %D) nounwind {
+entry:
+        %tmp.1 = fcmp uno float %A, %B          ; <i1> [#uses=1]
+        %tmp.2 = fcmp oge float %A, %B          ; <i1> [#uses=1]
+        %tmp.3 = or i1 %tmp.1, %tmp.2           ; <i1> [#uses=1]
+        %tmp.4 = select i1 %tmp.3, float %C, float %D           ; <float> [#uses=1]
+        ret float %tmp.4
+}
+
diff --git a/final/test/CodeGen/X86/sext-i1.ll b/final/test/CodeGen/X86/sext-i1.ll
new file mode 100644
index 00000000000..21c418d534e
--- /dev/null
+++ b/final/test/CodeGen/X86/sext-i1.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=32
+; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=64
+; rdar://7573216
+; PR6146
+
+define i32 @t1(i32 %x) nounwind readnone ssp {
+entry:
+; 32: t1:
+; 32: cmpl $1
+; 32: sbbl
+
+; 64: t1:
+; 64: cmpl $1
+; 64: sbbl
+  %0 = icmp eq i32 %x, 0
+  %iftmp.0.0 = select i1 %0, i32 -1, i32 0
+  ret i32 %iftmp.0.0
+}
+
+define i32 @t2(i32 %x) nounwind readnone ssp {
+entry:
+; 32: t2:
+; 32: cmpl $1
+; 32: sbbl
+
+; 64: t2:
+; 64: cmpl $1
+; 64: sbbl
+  %0 = icmp eq i32 %x, 0
+  %iftmp.0.0 = sext i1 %0 to i32
+  ret i32 %iftmp.0.0
+}
+
+%struct.zbookmark = type { i64, i64 }
+%struct.zstream = type { }
+
+define i32 @t3() nounwind readonly {
+entry:
+; 32: t3:
+; 32: cmpl $1
+; 32: sbbl
+; 32: cmpl
+; 32: xorl
+
+; 64: t3:
+; 64: cmpl $1
+; 64: sbbq
+; 64: cmpq
+; 64: xorl
+  %not.tobool = icmp eq i32 undef, 0              ; <i1> [#uses=2]
+  %cond = sext i1 %not.tobool to i32              ; <i32> [#uses=1]
+  %conv = sext i1 %not.tobool to i64              ; <i64> [#uses=1]
+  %add13 = add i64 0, %conv                       ; <i64> [#uses=1]
+  %cmp = icmp ult i64 undef, %add13               ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  %xor27 = xor i32 undef, %cond                   ; <i32> [#uses=0]
+  ret i32 0
+}
diff --git a/final/test/CodeGen/X86/sext-load.ll b/final/test/CodeGen/X86/sext-load.ll
new file mode 100644
index 00000000000..c9b39d3a489
--- /dev/null
+++ b/final/test/CodeGen/X86/sext-load.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep movsbl
+
+define i32 @foo(i32 %X) nounwind  {
+entry:
+	%tmp12 = trunc i32 %X to i8		; <i8> [#uses=1]
+	%tmp123 = sext i8 %tmp12 to i32		; <i32> [#uses=1]
+	ret i32 %tmp123
+}
+
diff --git a/final/test/CodeGen/X86/sext-ret-val.ll b/final/test/CodeGen/X86/sext-ret-val.ll
new file mode 100644
index 00000000000..da1a1871e7e
--- /dev/null
+++ b/final/test/CodeGen/X86/sext-ret-val.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 | grep movzbl | count 1
+; rdar://6699246
+
+define signext i8 @t1(i8* %A) nounwind readnone ssp {
+entry:
+        %0 = icmp ne i8* %A, null
+        %1 = zext i1 %0 to i8
+        ret i8 %1
+}
+
+define i8 @t2(i8* %A) nounwind readnone ssp {
+entry:
+        %0 = icmp ne i8* %A, null
+        %1 = zext i1 %0 to i8
+        ret i8 %1
+}
diff --git a/final/test/CodeGen/X86/sext-subreg.ll b/final/test/CodeGen/X86/sext-subreg.ll
new file mode 100644
index 00000000000..b2b9f8121fd
--- /dev/null
+++ b/final/test/CodeGen/X86/sext-subreg.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; rdar://7529457
+
+define i64 @t(i64 %A, i64 %B, i32* %P, i64 *%P2) nounwind {
+; CHECK: t:
+; CHECK: movslq %e{{.*}}, %rax
+; CHECK: movq %rax
+; CHECK: movl %eax
+  %C = add i64 %A, %B
+  %D = trunc i64 %C to i32
+  volatile store i32 %D, i32* %P
+  %E = shl i64 %C, 32
+  %F = ashr i64 %E, 32  
+  volatile store i64 %F, i64 *%P2
+  volatile store i32 %D, i32* %P
+  ret i64 undef
+}
diff --git a/final/test/CodeGen/X86/sext-trunc.ll b/final/test/CodeGen/X86/sext-trunc.ll
new file mode 100644
index 00000000000..2eaf42577c7
--- /dev/null
+++ b/final/test/CodeGen/X86/sext-trunc.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 > %t
+; RUN: grep movsbl %t
+; RUN: not grep movz %t
+; RUN: not grep and %t
+
+define i8 @foo(i16 signext  %x) signext nounwind  {
+	%retval56 = trunc i16 %x to i8
+	ret i8 %retval56
+}
diff --git a/final/test/CodeGen/X86/sfence.ll b/final/test/CodeGen/X86/sfence.ll
new file mode 100644
index 00000000000..478287919ec
--- /dev/null
+++ b/final/test/CodeGen/X86/sfence.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep sfence
+
+declare void @llvm.memory.barrier( i1 , i1 , i1 , i1 , i1)
+
+define void @test() {
+	call void @llvm.memory.barrier( i1 false, i1 false, i1 false, i1 true, i1 true)
+	ret void
+}
diff --git a/final/test/CodeGen/X86/shift-and.ll b/final/test/CodeGen/X86/shift-and.ll
new file mode 100644
index 00000000000..fd278c2239f
--- /dev/null
+++ b/final/test/CodeGen/X86/shift-and.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86    | grep and | count 1
+; RUN: llc < %s -march=x86-64 | not grep and 
+
+define i32 @t1(i32 %t, i32 %val) nounwind {
+       %shamt = and i32 %t, 31
+       %res = shl i32 %val, %shamt
+       ret i32 %res
+}
+
+@X = internal global i16 0
+
+define void @t2(i16 %t) nounwind {
+       %shamt = and i16 %t, 31
+       %tmp = load i16* @X
+       %tmp1 = ashr i16 %tmp, %shamt
+       store i16 %tmp1, i16* @X
+       ret void
+}
+
+define i64 @t3(i64 %t, i64 %val) nounwind {
+       %shamt = and i64 %t, 63
+       %res = lshr i64 %val, %shamt
+       ret i64 %res
+}
diff --git a/final/test/CodeGen/X86/shift-coalesce.ll b/final/test/CodeGen/X86/shift-coalesce.ll
new file mode 100644
index 00000000000..d38f9a88fcd
--- /dev/null
+++ b/final/test/CodeGen/X86/shift-coalesce.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   grep {shld.*CL}
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   not grep {mov CL, BL}
+
+; PR687
+
+define i64 @foo(i64 %x, i64* %X) {
+        %tmp.1 = load i64* %X           ; <i64> [#uses=1]
+        %tmp.3 = trunc i64 %tmp.1 to i8         ; <i8> [#uses=1]
+        %shift.upgrd.1 = zext i8 %tmp.3 to i64          ; <i64> [#uses=1]
+        %tmp.4 = shl i64 %x, %shift.upgrd.1             ; <i64> [#uses=1]
+        ret i64 %tmp.4
+}
+
diff --git a/final/test/CodeGen/X86/shift-codegen.ll b/final/test/CodeGen/X86/shift-codegen.ll
new file mode 100644
index 00000000000..4cba1834bf6
--- /dev/null
+++ b/final/test/CodeGen/X86/shift-codegen.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -relocation-model=static -march=x86 | \
+; RUN:   grep {shll	\$3} | count 2
+
+; This should produce two shll instructions, not any lea's.
+
+target triple = "i686-apple-darwin8"
+@Y = weak global i32 0          ; <i32*> [#uses=1]
+@X = weak global i32 0          ; <i32*> [#uses=2]
+
+
+define void @fn1() {
+entry:
+        %tmp = load i32* @Y             ; <i32> [#uses=1]
+        %tmp1 = shl i32 %tmp, 3         ; <i32> [#uses=1]
+        %tmp2 = load i32* @X            ; <i32> [#uses=1]
+        %tmp3 = or i32 %tmp1, %tmp2             ; <i32> [#uses=1]
+        store i32 %tmp3, i32* @X
+        ret void
+}
+
+define i32 @fn2(i32 %X, i32 %Y) {
+entry:
+        %tmp2 = shl i32 %Y, 3           ; <i32> [#uses=1]
+        %tmp4 = or i32 %tmp2, %X                ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
diff --git a/final/test/CodeGen/X86/shift-combine.ll b/final/test/CodeGen/X86/shift-combine.ll
new file mode 100644
index 00000000000..e443ac19a80
--- /dev/null
+++ b/final/test/CodeGen/X86/shift-combine.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s | not grep shrl
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+@array = weak global [4 x i32] zeroinitializer		; <[4 x i32]*> [#uses=1]
+
+define i32 @foo(i32 %x) {
+entry:
+	%tmp2 = lshr i32 %x, 2		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp2, 3		; <i32> [#uses=1]
+	%tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3		; <i32*> [#uses=1]
+	%tmp5 = load i32* %tmp4, align 4		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
diff --git a/final/test/CodeGen/X86/shift-double.ll b/final/test/CodeGen/X86/shift-double.ll
new file mode 100644
index 00000000000..5adee7c7694
--- /dev/null
+++ b/final/test/CodeGen/X86/shift-double.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | \
+; RUN:   grep {sh\[lr\]d} | count 5
+
+define i64 @test1(i64 %X, i8 %C) {
+        %shift.upgrd.1 = zext i8 %C to i64              ; <i64> [#uses=1]
+        %Y = shl i64 %X, %shift.upgrd.1         ; <i64> [#uses=1]
+        ret i64 %Y
+}
+
+define i64 @test2(i64 %X, i8 %C) {
+        %shift.upgrd.2 = zext i8 %C to i64              ; <i64> [#uses=1]
+        %Y = ashr i64 %X, %shift.upgrd.2                ; <i64> [#uses=1]
+        ret i64 %Y
+}
+
+define i64 @test3(i64 %X, i8 %C) {
+        %shift.upgrd.3 = zext i8 %C to i64              ; <i64> [#uses=1]
+        %Y = lshr i64 %X, %shift.upgrd.3                ; <i64> [#uses=1]
+        ret i64 %Y
+}
+
+define i32 @test4(i32 %A, i32 %B, i8 %C) {
+        %shift.upgrd.4 = zext i8 %C to i32              ; <i32> [#uses=1]
+        %X = shl i32 %A, %shift.upgrd.4         ; <i32> [#uses=1]
+        %Cv = sub i8 32, %C             ; <i8> [#uses=1]
+        %shift.upgrd.5 = zext i8 %Cv to i32             ; <i32> [#uses=1]
+        %Y = lshr i32 %B, %shift.upgrd.5                ; <i32> [#uses=1]
+        %Z = or i32 %Y, %X              ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+define i16 @test5(i16 %A, i16 %B, i8 %C) {
+        %shift.upgrd.6 = zext i8 %C to i16              ; <i16> [#uses=1]
+        %X = shl i16 %A, %shift.upgrd.6         ; <i16> [#uses=1]
+        %Cv = sub i8 16, %C             ; <i8> [#uses=1]
+        %shift.upgrd.7 = zext i8 %Cv to i16             ; <i16> [#uses=1]
+        %Y = lshr i16 %B, %shift.upgrd.7                ; <i16> [#uses=1]
+        %Z = or i16 %Y, %X              ; <i16> [#uses=1]
+        ret i16 %Z
+}
+
diff --git a/final/test/CodeGen/X86/shift-folding.ll b/final/test/CodeGen/X86/shift-folding.ll
new file mode 100644
index 00000000000..d9c3061ff68
--- /dev/null
+++ b/final/test/CodeGen/X86/shift-folding.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 | \
+; RUN:   grep {s\[ah\]\[rl\]l} | count 1
+
+define i32* @test1(i32* %P, i32 %X) nounwind {
+        %Y = lshr i32 %X, 2             ; <i32> [#uses=1]
+        %gep.upgrd.1 = zext i32 %Y to i64               ; <i64> [#uses=1]
+        %P2 = getelementptr i32* %P, i64 %gep.upgrd.1           ; <i32*> [#uses=1]
+        ret i32* %P2
+}
+
+define i32* @test2(i32* %P, i32 %X) nounwind {
+        %Y = shl i32 %X, 2              ; <i32> [#uses=1]
+        %gep.upgrd.2 = zext i32 %Y to i64               ; <i64> [#uses=1]
+        %P2 = getelementptr i32* %P, i64 %gep.upgrd.2           ; <i32*> [#uses=1]
+        ret i32* %P2
+}
+
+define i32* @test3(i32* %P, i32 %X) nounwind {
+        %Y = ashr i32 %X, 2             ; <i32> [#uses=1]
+        %P2 = getelementptr i32* %P, i32 %Y             ; <i32*> [#uses=1]
+        ret i32* %P2
+}
+
+define fastcc i32 @test4(i32* %d) nounwind {
+  %tmp4 = load i32* %d
+  %tmp512 = lshr i32 %tmp4, 24
+  ret i32 %tmp512
+}
diff --git a/final/test/CodeGen/X86/shift-i128.ll b/final/test/CodeGen/X86/shift-i128.ll
new file mode 100644
index 00000000000..c4d15ae9053
--- /dev/null
+++ b/final/test/CodeGen/X86/shift-i128.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+
+define void @t(i128 %x, i128 %a, i128* nocapture %r) nounwind {
+entry:
+	%0 = lshr i128 %x, %a
+	store i128 %0, i128* %r, align 16
+	ret void
+}
diff --git a/final/test/CodeGen/X86/shift-i256.ll b/final/test/CodeGen/X86/shift-i256.ll
new file mode 100644
index 00000000000..d5f65a6ed18
--- /dev/null
+++ b/final/test/CodeGen/X86/shift-i256.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86
+; RUN: llc < %s -march=x86-64
+
+define void @t(i256 %x, i256 %a, i256* nocapture %r) nounwind readnone {
+entry:
+	%0 = ashr i256 %x, %a
+	store i256 %0, i256* %r
+        ret void
+}
diff --git a/final/test/CodeGen/X86/shift-one.ll b/final/test/CodeGen/X86/shift-one.ll
new file mode 100644
index 00000000000..0f80f90c773
--- /dev/null
+++ b/final/test/CodeGen/X86/shift-one.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | not grep leal
+
+@x = external global i32                ; <i32*> [#uses=1]
+
+define i32 @test() {
+        %tmp.0 = load i32* @x           ; <i32> [#uses=1]
+        %tmp.1 = shl i32 %tmp.0, 1              ; <i32> [#uses=1]
+        ret i32 %tmp.1
+}
+
diff --git a/final/test/CodeGen/X86/shift-parts.ll b/final/test/CodeGen/X86/shift-parts.ll
new file mode 100644
index 00000000000..ce4f538f4de
--- /dev/null
+++ b/final/test/CodeGen/X86/shift-parts.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86-64 | grep shrdq
+; PR4736
+
+%0 = type { i32, i8, [35 x i8] }
+
+@g_144 = external global %0, align 8              ; <%0*> [#uses=1]
+
+define i32 @int87(i32 %uint64p_8) nounwind {
+entry:
+  %srcval4 = load i320* bitcast (%0* @g_144 to i320*), align 8 ; <i320> [#uses=1]
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  %call3.in.in.in.v = select i1 undef, i320 192, i320 128 ; <i320> [#uses=1]
+  %call3.in.in.in = lshr i320 %srcval4, %call3.in.in.in.v ; <i320> [#uses=1]
+  %call3.in = trunc i320 %call3.in.in.in to i32   ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %call3.in, 0              ; <i1> [#uses=1]
+  br i1 %tobool, label %for.cond, label %if.then
+
+if.then:                                          ; preds = %for.cond
+  ret i32 1
+}
diff --git a/final/test/CodeGen/X86/shl-anyext.ll b/final/test/CodeGen/X86/shl-anyext.ll
new file mode 100644
index 00000000000..10d489b9a8a
--- /dev/null
+++ b/final/test/CodeGen/X86/shl-anyext.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=x86-64 < %s | FileCheck %s
+
+; Codegen should be able to use a 32-bit shift instead of a 64-bit shift.
+; CHECK: shll $16
+
+define fastcc void @test(i32 %level, i64 %a, i64 %b, i64 %c, i64 %d, i32* %p) nounwind {
+if.end523:                                        ; preds = %if.end453
+  %conv7981749 = zext i32 %level to i64           ; <i64> [#uses=1]
+  %and799 = shl i64 %conv7981749, 16              ; <i64> [#uses=1]
+  %shl800 = and i64 %and799, 16711680             ; <i64> [#uses=1]
+  %or801 = or i64 %shl800, %a                     ; <i64> [#uses=1]
+  %or806 = or i64 %or801, %b                      ; <i64> [#uses=1]
+  %or811 = or i64 %or806, %c                      ; <i64> [#uses=1]
+  %or819 = or i64 %or811, %d                      ; <i64> [#uses=1]
+  %conv820 = trunc i64 %or819 to i32              ; <i32> [#uses=1]
+  store i32 %conv820, i32* %p
+  ret void
+}
+
+; CHECK: foo:
+
+declare void @bar(i64)
+
+define fastcc void @foo(i32 %t) {
+bb:
+  %tmp = add i32 %t, -1                           ; <i32> [#uses=1]
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = zext i32 %tmp to i64                    ; <i64> [#uses=2]
+  %tmp3 = add i64 %tmp2, 1                        ; <i64> [#uses=1]
+  %tmp4 = xor i64 %tmp2, 536870911                ; <i64> [#uses=1]
+  %tmp5 = and i64 %tmp3, %tmp4                    ; <i64> [#uses=1]
+  %tmp6 = shl i64 %tmp5, 3                        ; <i64> [#uses=1]
+  %tmp7 = sub i64 64, %tmp6                       ; <i64> [#uses=1]
+  %tmp8 = and i64 %tmp7, 4294967288               ; <i64> [#uses=1]
+  %tmp9 = lshr i64 -1, %tmp8                      ; <i64> [#uses=1]
+  call void @bar(i64 %tmp9)
+  ret void
+}
diff --git a/final/test/CodeGen/X86/shl_elim.ll b/final/test/CodeGen/X86/shl_elim.ll
new file mode 100644
index 00000000000..0827221875b
--- /dev/null
+++ b/final/test/CodeGen/X86/shl_elim.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 | grep {movl	8(.esp), %eax}
+; RUN: llc < %s -march=x86 | grep {shrl	.eax}
+; RUN: llc < %s -march=x86 | grep {movswl	.ax, .eax}
+
+define i32 @test1(i64 %a) nounwind {
+        %tmp29 = lshr i64 %a, 24                ; <i64> [#uses=1]
+        %tmp23 = trunc i64 %tmp29 to i32                ; <i32> [#uses=1]
+        %tmp410 = lshr i32 %tmp23, 9            ; <i32> [#uses=1]
+        %tmp45 = trunc i32 %tmp410 to i16               ; <i16> [#uses=1]
+        %tmp456 = sext i16 %tmp45 to i32                ; <i32> [#uses=1]
+        ret i32 %tmp456
+}
+
diff --git a/final/test/CodeGen/X86/shrink-fp-const1.ll b/final/test/CodeGen/X86/shrink-fp-const1.ll
new file mode 100644
index 00000000000..49b9fa3c412
--- /dev/null
+++ b/final/test/CodeGen/X86/shrink-fp-const1.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | not grep cvtss2sd
+; PR1264
+
+define double @foo(double %x) {
+        %y = fmul double %x, 5.000000e-01
+        ret double %y
+}
diff --git a/final/test/CodeGen/X86/shrink-fp-const2.ll b/final/test/CodeGen/X86/shrink-fp-const2.ll
new file mode 100644
index 00000000000..3d5203be09a
--- /dev/null
+++ b/final/test/CodeGen/X86/shrink-fp-const2.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 | grep flds
+; This should be a flds, not fldt.
+define x86_fp80 @test2() nounwind  {
+entry:
+	ret x86_fp80 0xK3FFFC000000000000000
+}
+
diff --git a/final/test/CodeGen/X86/sibcall-2.ll b/final/test/CodeGen/X86/sibcall-2.ll
new file mode 100644
index 00000000000..f8a746563b5
--- /dev/null
+++ b/final/test/CodeGen/X86/sibcall-2.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin   -disable-fp-elim | FileCheck %s -check-prefix=32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -disable-fp-elim | FileCheck %s -check-prefix=64
+
+; Tail call should not use ebp / rbp after it's popped. Use esp / rsp.
+
+define void @t1(i8* nocapture %value) nounwind {
+entry:
+; 32: t1:
+; 32: jmpl *4(%esp)
+
+; 64: t1:
+; 64: jmpq *%rdi
+  %0 = bitcast i8* %value to void ()*
+  tail call void %0() nounwind
+  ret void
+}
+
+define void @t2(i32 %a, i8* nocapture %value) nounwind {
+entry:
+; 32: t2:
+; 32: jmpl *8(%esp)
+
+; 64: t2:
+; 64: jmpq *%rsi
+  %0 = bitcast i8* %value to void ()*
+  tail call void %0() nounwind
+  ret void
+}
+
+define void @t3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i8* nocapture %value) nounwind {
+entry:
+; 32: t3:
+; 32: jmpl *28(%esp)
+
+; 64: t3:
+; 64: jmpq *8(%rsp)
+  %0 = bitcast i8* %value to void ()*
+  tail call void %0() nounwind
+  ret void
+}
+
+define void @t4(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i8* nocapture %value) nounwind {
+entry:
+; 32: t4:
+; 32: jmpl *32(%esp)
+
+; 64: t4:
+; 64: jmpq *16(%rsp)
+  %0 = bitcast i8* %value to void ()*
+  tail call void %0() nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/X86/sibcall-3.ll b/final/test/CodeGen/X86/sibcall-3.ll
new file mode 100644
index 00000000000..f97abe00295
--- /dev/null
+++ b/final/test/CodeGen/X86/sibcall-3.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=i386-unknown-unknown | FileCheck %s
+; PR7193
+
+define void @t1(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind {
+; CHECK: t1:
+; CHECK: calll 0
+  tail call void null(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind
+  ret void
+}
+
+define void @t2(i8* inreg %dst, i8* inreg %src, i8* inreg %len) nounwind {
+; CHECK: t2:
+; CHECK: jmpl
+  tail call void null(i8* inreg %dst, i8* inreg %src) nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/X86/sibcall-4.ll b/final/test/CodeGen/X86/sibcall-4.ll
new file mode 100644
index 00000000000..1499e668802
--- /dev/null
+++ b/final/test/CodeGen/X86/sibcall-4.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=i386-pc-linux-gnu | FileCheck %s
+; pr7610
+
+define cc10 void @t(i32* %Base_Arg, i32* %Sp_Arg, i32* %Hp_Arg, i32 %R1_Arg) nounwind {
+cm1:
+; CHECK: t:
+; CHECK: jmpl *%eax
+  %nm3 = getelementptr i32* %Sp_Arg, i32 1
+  %nm9 = load i32* %Sp_Arg
+  %nma = inttoptr i32 %nm9 to void (i32*, i32*, i32*, i32)*
+  tail call cc10 void %nma(i32* %Base_Arg, i32* %nm3, i32* %Hp_Arg, i32 %R1_Arg) nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/X86/sibcall-5.ll b/final/test/CodeGen/X86/sibcall-5.ll
new file mode 100644
index 00000000000..9d74121b430
--- /dev/null
+++ b/final/test/CodeGen/X86/sibcall-5.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin8 -mattr=+sse2  | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64
+
+; Sibcall optimization of expanded libcalls.
+; rdar://8707777
+
+define double @foo(double %a) nounwind readonly ssp {
+entry:
+; X32: foo:
+; X32: jmp _sin$stub
+
+; X64: foo:
+; X64: jmp _sin
+  %0 = tail call double @sin(double %a) nounwind readonly
+  ret double %0
+}
+
+define float @bar(float %a) nounwind readonly ssp {
+; X32: bar:
+; X32: jmp _sinf$stub
+
+; X64: bar:
+; X64: jmp _sinf
+entry:
+  %0 = tail call float @sinf(float %a) nounwind readonly
+  ret float %0
+}
+
+declare float @sinf(float) nounwind readonly
+
+declare double @sin(double) nounwind readonly
diff --git a/final/test/CodeGen/X86/sibcall.ll b/final/test/CodeGen/X86/sibcall.ll
new file mode 100644
index 00000000000..de2a81e80bd
--- /dev/null
+++ b/final/test/CodeGen/X86/sibcall.ll
@@ -0,0 +1,333 @@
+; RUN: llc < %s -mtriple=i686-linux   -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=32
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 -asm-verbose=false | FileCheck %s -check-prefix=64
+
+define void @t1(i32 %x) nounwind ssp {
+entry:
+; 32: t1:
+; 32: jmp {{_?}}foo
+
+; 64: t1:
+; 64: jmp {{_?}}foo
+  tail call void @foo() nounwind
+  ret void
+}
+
+declare void @foo()
+
+define void @t2() nounwind ssp {
+entry:
+; 32: t2:
+; 32: jmp {{_?}}foo2
+
+; 64: t2:
+; 64: jmp {{_?}}foo2
+  %0 = tail call i32 @foo2() nounwind
+  ret void
+}
+
+declare i32 @foo2()
+
+define void @t3() nounwind ssp {
+entry:
+; 32: t3:
+; 32: jmp {{_?}}foo3
+
+; 64: t3:
+; 64: jmp {{_?}}foo3
+  %0 = tail call i32 @foo3() nounwind
+  ret void
+}
+
+declare i32 @foo3()
+
+define void @t4(void (i32)* nocapture %x) nounwind ssp {
+entry:
+; 32: t4:
+; 32: calll *
+; FIXME: gcc can generate a tailcall for this. But it's tricky.
+
+; 64: t4:
+; 64-NOT: call
+; 64: jmpq *
+  tail call void %x(i32 0) nounwind
+  ret void
+}
+
+define void @t5(void ()* nocapture %x) nounwind ssp {
+entry:
+; 32: t5:
+; 32-NOT: call
+; 32: jmpl *4(%esp)
+
+; 64: t5:
+; 64-NOT: call
+; 64: jmpq *%rdi
+  tail call void %x() nounwind
+  ret void
+}
+
+define i32 @t6(i32 %x) nounwind ssp {
+entry:
+; 32: t6:
+; 32: calll {{_?}}t6
+; 32: jmp {{_?}}bar
+
+; 64: t6:
+; 64: jmp {{_?}}t6
+; 64: jmp {{_?}}bar
+  %0 = icmp slt i32 %x, 10
+  br i1 %0, label %bb, label %bb1
+
+bb:
+  %1 = add nsw i32 %x, -1
+  %2 = tail call i32 @t6(i32 %1) nounwind ssp
+  ret i32 %2
+
+bb1:
+  %3 = tail call i32 @bar(i32 %x) nounwind
+  ret i32 %3
+}
+
+declare i32 @bar(i32)
+
+define i32 @t7(i32 %a, i32 %b, i32 %c) nounwind ssp {
+entry:
+; 32: t7:
+; 32: jmp {{_?}}bar2
+
+; 64: t7:
+; 64: jmp {{_?}}bar2
+  %0 = tail call i32 @bar2(i32 %a, i32 %b, i32 %c) nounwind
+  ret i32 %0
+}
+
+declare i32 @bar2(i32, i32, i32)
+
+define signext i16 @t8() nounwind ssp {
+entry:
+; 32: t8:
+; 32: calll {{_?}}bar3
+
+; 64: t8:
+; 64: callq {{_?}}bar3
+  %0 = tail call signext i16 @bar3() nounwind      ; <i16> [#uses=1]
+  ret i16 %0
+}
+
+declare signext i16 @bar3()
+
+define signext i16 @t9(i32 (i32)* nocapture %x) nounwind ssp {
+entry:
+; 32: t9:
+; 32: calll *
+
+; 64: t9:
+; 64: callq *
+  %0 = bitcast i32 (i32)* %x to i16 (i32)*
+  %1 = tail call signext i16 %0(i32 0) nounwind
+  ret i16 %1
+}
+
+define void @t10() nounwind ssp {
+entry:
+; 32: t10:
+; 32: calll
+
+; 64: t10:
+; 64: callq
+  %0 = tail call i32 @foo4() noreturn nounwind
+  unreachable
+}
+
+declare i32 @foo4()
+
+define i32 @t11(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind ssp {
+; In 32-bit mode, it's emitting a bunch of dead loads that are not being
+; eliminated currently.
+
+; 32: t11:
+; 32-NOT: subl ${{[0-9]+}}, %esp
+; 32: jne
+; 32-NOT: movl
+; 32-NOT: addl ${{[0-9]+}}, %esp
+; 32: jmp {{_?}}foo5
+
+; 64: t11:
+; 64-NOT: subq ${{[0-9]+}}, %esp
+; 64-NOT: addq ${{[0-9]+}}, %esp
+; 64: jmp {{_?}}foo5
+entry:
+  %0 = icmp eq i32 %x, 0
+  br i1 %0, label %bb6, label %bb
+
+bb:
+  %1 = tail call i32 @foo5(i32 %x, i32 %y, i32 %z.0, i32 %z.1, i32 %z.2) nounwind
+  ret i32 %1
+
+bb6:
+  ret i32 0
+}
+
+declare i32 @foo5(i32, i32, i32, i32, i32)
+
+%struct.t = type { i32, i32, i32, i32, i32 }
+
+define i32 @t12(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind ssp {
+; 32: t12:
+; 32-NOT: subl ${{[0-9]+}}, %esp
+; 32-NOT: addl ${{[0-9]+}}, %esp
+; 32: jmp {{_?}}foo6
+
+; 64: t12:
+; 64-NOT: subq ${{[0-9]+}}, %esp
+; 64-NOT: addq ${{[0-9]+}}, %esp
+; 64: jmp {{_?}}foo6
+entry:
+  %0 = icmp eq i32 %x, 0
+  br i1 %0, label %bb2, label %bb
+
+bb:
+  %1 = tail call i32 @foo6(i32 %x, i32 %y, %struct.t* byval align 4 %z) nounwind
+  ret i32 %1
+
+bb2:
+  ret i32 0
+}
+
+declare i32 @foo6(i32, i32, %struct.t* byval align 4)
+
+; rdar://r7717598
+%struct.ns = type { i32, i32 }
+%struct.cp = type { float, float }
+
+define %struct.ns* @t13(%struct.cp* %yy) nounwind ssp {
+; 32: t13:
+; 32-NOT: jmp
+; 32: calll
+; 32: ret
+
+; 64: t13:
+; 64-NOT: jmp
+; 64: callq
+; 64: ret
+entry:
+  %0 = tail call fastcc %struct.ns* @foo7(%struct.cp* byval align 4 %yy, i8 signext 0) nounwind
+  ret %struct.ns* %0
+}
+
+; rdar://6195379
+; llvm can't do sibcall for this in 32-bit mode (yet).
+declare fastcc %struct.ns* @foo7(%struct.cp* byval align 4, i8 signext) nounwind ssp
+
+%struct.__block_descriptor = type { i64, i64 }
+%struct.__block_descriptor_withcopydispose = type { i64, i64, i8*, i8* }
+%struct.__block_literal_1 = type { i8*, i32, i32, i8*, %struct.__block_descriptor* }
+%struct.__block_literal_2 = type { i8*, i32, i32, i8*, %struct.__block_descriptor_withcopydispose*, void ()* }
+
+define void @t14(%struct.__block_literal_2* nocapture %.block_descriptor) nounwind ssp {
+entry:
+; 64: t14:
+; 64: movq 32(%rdi)
+; 64-NOT: movq 16(%rdi)
+; 64: jmpq *16(%rdi)
+  %0 = getelementptr inbounds %struct.__block_literal_2* %.block_descriptor, i64 0, i32 5 ; <void ()**> [#uses=1]
+  %1 = load void ()** %0, align 8                 ; <void ()*> [#uses=2]
+  %2 = bitcast void ()* %1 to %struct.__block_literal_1* ; <%struct.__block_literal_1*> [#uses=1]
+  %3 = getelementptr inbounds %struct.__block_literal_1* %2, i64 0, i32 3 ; <i8**> [#uses=1]
+  %4 = load i8** %3, align 8                      ; <i8*> [#uses=1]
+  %5 = bitcast i8* %4 to void (i8*)*              ; <void (i8*)*> [#uses=1]
+  %6 = bitcast void ()* %1 to i8*                 ; <i8*> [#uses=1]
+  tail call void %5(i8* %6) nounwind
+  ret void
+}
+
+; rdar://7726868
+%struct.foo = type { [4 x i32] }
+
+define void @t15(%struct.foo* noalias sret %agg.result) nounwind  {
+; 32: t15:
+; 32: calll {{_?}}f
+; 32: ret $4
+
+; 64: t15:
+; 64: callq {{_?}}f
+; 64: ret
+  tail call fastcc void @f(%struct.foo* noalias sret %agg.result) nounwind
+  ret void
+}
+
+declare void @f(%struct.foo* noalias sret) nounwind
+
+define void @t16() nounwind ssp {
+entry:
+; 32: t16:
+; 32: calll {{_?}}bar4
+; 32: fstp
+
+; 64: t16:
+; 64: jmp {{_?}}bar4
+  %0 = tail call double @bar4() nounwind
+  ret void
+}
+
+declare double @bar4()
+
+; rdar://6283267
+define void @t17() nounwind ssp {
+entry:
+; 32: t17:
+; 32: jmp {{_?}}bar5
+
+; 64: t17:
+; 64: xorb %al, %al
+; 64: jmp {{_?}}bar5
+  tail call void (...)* @bar5() nounwind
+  ret void
+}
+
+declare void @bar5(...)
+
+; rdar://7774847
+define void @t18() nounwind ssp {
+entry:
+; 32: t18:
+; 32: calll {{_?}}bar6
+; 32: fstp %st(0)
+
+; 64: t18:
+; 64: xorb %al, %al
+; 64: jmp {{_?}}bar6
+  %0 = tail call double (...)* @bar6() nounwind
+  ret void
+}
+
+declare double @bar6(...)
+
+define void @t19() alignstack(32) nounwind {
+entry:
+; CHECK: t19:
+; CHECK: andl $-32
+; CHECK: calll {{_?}}foo
+  tail call void @foo() nounwind
+  ret void
+}
+
+declare void @foo()
+
+; If caller / callee calling convention mismatch then check if the return
+; values are returned in the same registers.
+; rdar://7874780
+
+define double @t20(double %x) nounwind {
+entry:
+; 32: t20:
+; 32: calll {{_?}}foo20
+; 32: fldl (%esp)
+
+; 64: t20:
+; 64: jmp {{_?}}foo20
+  %0 = tail call fastcc double @foo20(double %x) nounwind
+  ret double %0
+}
+
+declare fastcc double @foo20(double) nounwind
diff --git a/final/test/CodeGen/X86/sincos.ll b/final/test/CodeGen/X86/sincos.ll
new file mode 100644
index 00000000000..13f932982f1
--- /dev/null
+++ b/final/test/CodeGen/X86/sincos.ll
@@ -0,0 +1,48 @@
+; Make sure this testcase codegens to the sin and cos instructions, not calls
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
+; RUN:   grep sin\$ | count 3
+; RUN: llc < %s -march=x86 -mattr=-sse,-sse2,-sse3 -enable-unsafe-fp-math  | \
+; RUN:   grep cos\$ | count 3
+
+declare float  @sinf(float) readonly
+
+declare double @sin(double) readonly
+
+declare x86_fp80 @sinl(x86_fp80) readonly
+
+define float @test1(float %X) {
+        %Y = call float @sinf(float %X) readonly
+        ret float %Y
+}
+
+define double @test2(double %X) {
+        %Y = call double @sin(double %X) readonly
+        ret double %Y
+}
+
+define x86_fp80 @test3(x86_fp80 %X) {
+        %Y = call x86_fp80 @sinl(x86_fp80 %X) readonly
+        ret x86_fp80 %Y
+}
+
+declare float @cosf(float) readonly
+
+declare double @cos(double) readonly
+
+declare x86_fp80 @cosl(x86_fp80) readonly
+
+define float @test4(float %X) {
+        %Y = call float @cosf(float %X) readonly
+        ret float %Y
+}
+
+define double @test5(double %X) {
+        %Y = call double @cos(double %X) readonly
+        ret double %Y
+}
+
+define x86_fp80 @test6(x86_fp80 %X) {
+        %Y = call x86_fp80 @cosl(x86_fp80 %X) readonly
+        ret x86_fp80 %Y
+}
+
diff --git a/final/test/CodeGen/X86/sink-hoist.ll b/final/test/CodeGen/X86/sink-hoist.ll
new file mode 100644
index 00000000000..31f41eebc5a
--- /dev/null
+++ b/final/test/CodeGen/X86/sink-hoist.ll
@@ -0,0 +1,174 @@
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -post-RA-scheduler=true | FileCheck %s
+
+; Currently, floating-point selects are lowered to CFG triangles.
+; This means that one side of the select is always unconditionally
+; evaluated, however with MachineSink we can sink the other side so
+; that it's conditionally evaluated.
+
+; CHECK: foo:
+; CHECK-NEXT: testb $1, %dil
+; CHECK-NEXT: je
+; CHECK-NEXT: divsd
+; CHECK-NEXT: ret
+; CHECK:      divsd
+
+define double @foo(double %x, double %y, i1 %c) nounwind {
+  %a = fdiv double %x, 3.2
+  %b = fdiv double %y, 3.3
+  %z = select i1 %c, double %a, double %b
+  ret double %z
+}
+
+; Make sure the critical edge is broken so the divsd is sunken below
+; the conditional branch.
+; rdar://8454886
+
+; CHECK: split:
+; CHECK-NEXT: testb $1, %dil
+; CHECK-NEXT: je
+; CHECK-NEXT: divsd
+; CHECK-NEXT: ret
+; CHECK:      movaps
+; CHECK-NEXT: ret
+define double @split(double %x, double %y, i1 %c) nounwind {
+  %a = fdiv double %x, 3.2
+  %z = select i1 %c, double %a, double %y
+  ret double %z
+}
+
+
+; Hoist floating-point constant-pool loads out of loops.
+
+; CHECK: bar:
+; CHECK: movsd
+; CHECK: align
+define void @bar(double* nocapture %p, i64 %n) nounwind {
+entry:
+  %0 = icmp sgt i64 %n, 0
+  br i1 %0, label %bb, label %return
+
+bb:
+  %i.03 = phi i64 [ 0, %entry ], [ %3, %bb ]
+  %scevgep = getelementptr double* %p, i64 %i.03
+  %1 = load double* %scevgep, align 8
+  %2 = fdiv double 3.200000e+00, %1
+  store double %2, double* %scevgep, align 8
+  %3 = add nsw i64 %i.03, 1
+  %exitcond = icmp eq i64 %3, %n
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}
+
+; Sink instructions with dead EFLAGS defs.
+
+; FIXME: Unfail the zzz test if we can correctly mark pregs with the kill flag.
+; 
+; See <rdar://problem/8030636>. This test isn't valid after we made machine
+; sinking more conservative about sinking instructions that define a preg into a
+; block when we don't know if the preg is killed within the current block.
+
+
+; FIXMEHECK: zzz:
+; FIXMEHECK:      je
+; FIXMEHECK-NEXT: orb
+
+; define zeroext i8 @zzz(i8 zeroext %a, i8 zeroext %b) nounwind readnone {
+; entry:
+;   %tmp = zext i8 %a to i32                        ; <i32> [#uses=1]
+;   %tmp2 = icmp eq i8 %a, 0                    ; <i1> [#uses=1]
+;   %tmp3 = or i8 %b, -128                          ; <i8> [#uses=1]
+;   %tmp4 = and i8 %b, 127                          ; <i8> [#uses=1]
+;   %b_addr.0 = select i1 %tmp2, i8 %tmp4, i8 %tmp3 ; <i8> [#uses=1]
+;   ret i8 %b_addr.0
+; }
+
+; Codegen should hoist and CSE these constants.
+
+; CHECK: vv:
+; CHECK: LCPI3_0(%rip), %xmm0
+; CHECK: LCPI3_1(%rip), %xmm1
+; CHECK: LCPI3_2(%rip), %xmm2
+; CHECK: align
+; CHECK-NOT: LCPI
+; CHECK: ret
+
+@_minusZero.6007 = internal constant <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00> ; <<4 x float>*> [#uses=0]
+@twoTo23.6008 = internal constant <4 x float> <float 8.388608e+06, float 8.388608e+06, float 8.388608e+06, float 8.388608e+06> ; <<4 x float>*> [#uses=0]
+
+define void @vv(float* %y, float* %x, i32* %n) nounwind ssp {
+entry:
+  br label %bb60
+
+bb:                                               ; preds = %bb60
+  %0 = bitcast float* %x_addr.0 to <4 x float>*   ; <<4 x float>*> [#uses=1]
+  %1 = load <4 x float>* %0, align 16             ; <<4 x float>> [#uses=4]
+  %tmp20 = bitcast <4 x float> %1 to <4 x i32>    ; <<4 x i32>> [#uses=1]
+  %tmp22 = and <4 x i32> %tmp20, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> ; <<4 x i32>> [#uses=1]
+  %tmp23 = bitcast <4 x i32> %tmp22 to <4 x float> ; <<4 x float>> [#uses=1]
+  %tmp25 = bitcast <4 x float> %1 to <4 x i32>    ; <<4 x i32>> [#uses=1]
+  %tmp27 = and <4 x i32> %tmp25, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> ; <<4 x i32>> [#uses=2]
+  %tmp30 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %tmp23, <4 x float> <float 8.388608e+06, float 8.388608e+06, float 8.388608e+06, float 8.388608e+06>, i8 5) ; <<4 x float>> [#uses=1]
+  %tmp34 = bitcast <4 x float> %tmp30 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %tmp36 = xor <4 x i32> %tmp34, <i32 -1, i32 -1, i32 -1, i32 -1> ; <<4 x i32>> [#uses=1]
+  %tmp37 = and <4 x i32> %tmp36, <i32 1258291200, i32 1258291200, i32 1258291200, i32 1258291200> ; <<4 x i32>> [#uses=1]
+  %tmp42 = or <4 x i32> %tmp37, %tmp27            ; <<4 x i32>> [#uses=1]
+  %tmp43 = bitcast <4 x i32> %tmp42 to <4 x float> ; <<4 x float>> [#uses=2]
+  %tmp45 = fadd <4 x float> %1, %tmp43            ; <<4 x float>> [#uses=1]
+  %tmp47 = fsub <4 x float> %tmp45, %tmp43        ; <<4 x float>> [#uses=2]
+  %tmp49 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %1, <4 x float> %tmp47, i8 1) ; <<4 x float>> [#uses=1]
+  %2 = bitcast <4 x float> %tmp49 to <4 x i32>    ; <<4 x i32>> [#uses=1]
+  %3 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %2) nounwind readnone ; <<4 x float>> [#uses=1]
+  %tmp53 = fadd <4 x float> %tmp47, %3            ; <<4 x float>> [#uses=1]
+  %tmp55 = bitcast <4 x float> %tmp53 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %tmp57 = or <4 x i32> %tmp55, %tmp27            ; <<4 x i32>> [#uses=1]
+  %tmp58 = bitcast <4 x i32> %tmp57 to <4 x float> ; <<4 x float>> [#uses=1]
+  %4 = bitcast float* %y_addr.0 to <4 x float>*   ; <<4 x float>*> [#uses=1]
+  store <4 x float> %tmp58, <4 x float>* %4, align 16
+  %5 = getelementptr float* %x_addr.0, i64 4      ; <float*> [#uses=1]
+  %6 = getelementptr float* %y_addr.0, i64 4      ; <float*> [#uses=1]
+  %7 = add i32 %i.0, 4                            ; <i32> [#uses=1]
+  br label %bb60
+
+bb60:                                             ; preds = %bb, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %7, %bb ]       ; <i32> [#uses=2]
+  %x_addr.0 = phi float* [ %x, %entry ], [ %5, %bb ] ; <float*> [#uses=2]
+  %y_addr.0 = phi float* [ %y, %entry ], [ %6, %bb ] ; <float*> [#uses=2]
+  %8 = load i32* %n, align 4                      ; <i32> [#uses=1]
+  %9 = icmp sgt i32 %8, %i.0                      ; <i1> [#uses=1]
+  br i1 %9, label %bb, label %return
+
+return:                                           ; preds = %bb60
+  ret void
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
+
+; CodeGen should use the correct register class when extracting
+; a load from a zero-extending load for hoisting.
+
+; CHECK: default_get_pch_validity:
+; CHECK: movl cl_options_count(%rip), %ecx
+
+@cl_options_count = external constant i32         ; <i32*> [#uses=2]
+
+define void @default_get_pch_validity() nounwind {
+entry:
+  %tmp4 = load i32* @cl_options_count, align 4    ; <i32> [#uses=1]
+  %tmp5 = icmp eq i32 %tmp4, 0                    ; <i1> [#uses=1]
+  br i1 %tmp5, label %bb6, label %bb2
+
+bb2:                                              ; preds = %bb2, %entry
+  %i.019 = phi i64 [ 0, %entry ], [ %tmp25, %bb2 ] ; <i64> [#uses=1]
+  %tmp25 = add i64 %i.019, 1                      ; <i64> [#uses=2]
+  %tmp11 = load i32* @cl_options_count, align 4   ; <i32> [#uses=1]
+  %tmp12 = zext i32 %tmp11 to i64                 ; <i64> [#uses=1]
+  %tmp13 = icmp ugt i64 %tmp12, %tmp25            ; <i1> [#uses=1]
+  br i1 %tmp13, label %bb2, label %bb6
+
+bb6:                                              ; preds = %bb2, %entry
+  ret void
+}
diff --git a/final/test/CodeGen/X86/small-byval-memcpy.ll b/final/test/CodeGen/X86/small-byval-memcpy.ll
new file mode 100644
index 00000000000..1b596b58989
--- /dev/null
+++ b/final/test/CodeGen/X86/small-byval-memcpy.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=core2   | grep movsd  | count 8
+; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=nehalem | grep movups | count 2
+
+define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval align 4  %z) nounwind  {
+entry:
+	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
+	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
+	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
+	%tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
+	%tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1		; <x86_fp80*> [#uses=1]
+	%real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0		; <x86_fp80*> [#uses=1]
+	%tmp6 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0		; <x86_fp80*> [#uses=1]
+	%tmp7 = load x86_fp80* %tmp6, align 16		; <x86_fp80> [#uses=1]
+	store x86_fp80 %tmp3, x86_fp80* %real, align 16
+	store x86_fp80 %tmp7, x86_fp80* %tmp4, align 16
+	call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval align 4  %iz ) nounwind 
+	ret void
+}
+
+declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval align 4 ) nounwind 
diff --git a/final/test/CodeGen/X86/smul-with-overflow-2.ll b/final/test/CodeGen/X86/smul-with-overflow-2.ll
new file mode 100644
index 00000000000..7c23adba406
--- /dev/null
+++ b/final/test/CodeGen/X86/smul-with-overflow-2.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 | grep mul | count 1
+; RUN: llc < %s -march=x86 | grep add | count 3
+
+define i32 @t1(i32 %a, i32 %b) nounwind readnone {
+entry:
+        %tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 2)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
+}
+
+define i32 @t2(i32 %a, i32 %b) nounwind readnone {
+entry:
+        %tmp0 = add i32 %b, %a
+	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 4)
+	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
+	ret i32 %tmp2
+}
+
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind
diff --git a/final/test/CodeGen/X86/smul-with-overflow-3.ll b/final/test/CodeGen/X86/smul-with-overflow-3.ll
new file mode 100644
index 00000000000..49c31f56ae8
--- /dev/null
+++ b/final/test/CodeGen/X86/smul-with-overflow-3.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 | grep {jno} | count 1
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
diff --git a/final/test/CodeGen/X86/smul-with-overflow.ll b/final/test/CodeGen/X86/smul-with-overflow.ll
new file mode 100644
index 00000000000..6d125e415e0
--- /dev/null
+++ b/final/test/CodeGen/X86/smul-with-overflow.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 | grep {jo} | count 1
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
diff --git a/final/test/CodeGen/X86/soft-fp.ll b/final/test/CodeGen/X86/soft-fp.ll
new file mode 100644
index 00000000000..a52135dc908
--- /dev/null
+++ b/final/test/CodeGen/X86/soft-fp.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86    -mattr=+sse2 -soft-float | not grep xmm
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 -soft-float | not grep xmm
+
+	%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+define i32 @t1(i32 %a, ...) nounwind {
+entry:
+	%va = alloca [1 x %struct.__va_list_tag], align 8		; <[1 x %struct.__va_list_tag]*> [#uses=2]
+	%va12 = bitcast [1 x %struct.__va_list_tag]* %va to i8*		; <i8*> [#uses=2]
+	call void @llvm.va_start(i8* %va12)
+	%va3 = getelementptr [1 x %struct.__va_list_tag]* %va, i64 0, i64 0		; <%struct.__va_list_tag*> [#uses=1]
+	call void @bar(%struct.__va_list_tag* %va3) nounwind
+	call void @llvm.va_end(i8* %va12)
+	ret i32 undef
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @bar(%struct.__va_list_tag*)
+
+declare void @llvm.va_end(i8*) nounwind
+
+define float @t2(float %a, float %b) nounwind readnone {
+entry:
+	%0 = fadd float %a, %b		; <float> [#uses=1]
+	ret float %0
+}
diff --git a/final/test/CodeGen/X86/splat-scalar-load.ll b/final/test/CodeGen/X86/splat-scalar-load.ll
new file mode 100644
index 00000000000..2b13029896e
--- /dev/null
+++ b/final/test/CodeGen/X86/splat-scalar-load.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s
+; rdar://7434544
+
+define <2 x i64> @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; CHECK: pshufd	$85, (%esp), %xmm0
+  %array = alloca [8 x float], align 4
+  %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 1
+  %tmp2 = load float* %arrayidx
+  %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
+  %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
+  %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
+  %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
+  %0 = bitcast <4 x float> %vecinit9 to <2 x i64>
+  ret <2 x i64> %0
+}
diff --git a/final/test/CodeGen/X86/split-eh-lpad-edges.ll b/final/test/CodeGen/X86/split-eh-lpad-edges.ll
new file mode 100644
index 00000000000..fd40a7f7037
--- /dev/null
+++ b/final/test/CodeGen/X86/split-eh-lpad-edges.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep jmp
+; rdar://6647639
+
+	%struct.FetchPlanHeader = type { i8*, i8*, i32, i8*, i8*, i8*, i8*, i8*, %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)*, %struct.__attributeDescriptionFlags }
+	%struct.NSArray = type { %struct.NSObject }
+	%struct.NSAutoreleasePool = type { %struct.NSObject, i8*, i8*, i8*, i8* }
+	%struct.NSObject = type { %struct.NSObject* }
+	%struct.__attributeDescriptionFlags = type <{ i32 }>
+	%struct._message_ref_t = type { %struct.NSObject* (%struct.NSObject*, %struct._message_ref_t*, ...)*, %struct.objc_selector* }
+	%struct.objc_selector = type opaque
+@"\01l_objc_msgSend_fixup_alloc" = external global %struct._message_ref_t, align 16		; <%struct._message_ref_t*> [#uses=2]
+
+define %struct.NSArray* @newFetchedRowsForFetchPlan_MT(%struct.FetchPlanHeader* %fetchPlan, %struct.objc_selector* %selectionMethod, %struct.NSObject* %selectionParameter) ssp {
+entry:
+	%0 = invoke %struct.NSObject* null(%struct.NSObject* null, %struct._message_ref_t* @"\01l_objc_msgSend_fixup_alloc")
+			to label %invcont unwind label %lpad		; <%struct.NSObject*> [#uses=1]
+
+invcont:		; preds = %entry
+	%1 = invoke %struct.NSObject* (%struct.NSObject*, %struct.objc_selector*, ...)* @objc_msgSend(%struct.NSObject* %0, %struct.objc_selector* null)
+			to label %invcont26 unwind label %lpad		; <%struct.NSObject*> [#uses=0]
+
+invcont26:		; preds = %invcont
+	%2 = invoke %struct.NSObject* null(%struct.NSObject* null, %struct._message_ref_t* @"\01l_objc_msgSend_fixup_alloc")
+			to label %invcont27 unwind label %lpad		; <%struct.NSObject*> [#uses=0]
+
+invcont27:		; preds = %invcont26
+	unreachable
+
+lpad:		; preds = %invcont26, %invcont, %entry
+	%pool.1 = phi %struct.NSAutoreleasePool* [ null, %entry ], [ null, %invcont ], [ null, %invcont26 ]		; <%struct.NSAutoreleasePool*> [#uses=0]
+	unreachable
+}
+
+declare %struct.NSObject* @objc_msgSend(%struct.NSObject*, %struct.objc_selector*, ...)
diff --git a/final/test/CodeGen/X86/split-vector-rem.ll b/final/test/CodeGen/X86/split-vector-rem.ll
new file mode 100644
index 00000000000..681c6b0beaa
--- /dev/null
+++ b/final/test/CodeGen/X86/split-vector-rem.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | grep div | count 16
+; RUN: llc < %s -march=x86-64 | grep fmodf | count 8
+
+define <8 x i32> @foo(<8 x i32> %t, <8 x i32> %u) {
+	%m = srem <8 x i32> %t, %u
+	ret <8 x i32> %m
+}
+define <8 x i32> @bar(<8 x i32> %t, <8 x i32> %u) {
+	%m = urem <8 x i32> %t, %u
+	ret <8 x i32> %m
+}
+define <8 x float> @qux(<8 x float> %t, <8 x float> %u) {
+	%m = frem <8 x float> %t, %u
+	ret <8 x float> %m
+}
diff --git a/final/test/CodeGen/X86/sret.ll b/final/test/CodeGen/X86/sret.ll
new file mode 100644
index 00000000000..b9455300bdb
--- /dev/null
+++ b/final/test/CodeGen/X86/sret.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=x86 | grep ret | grep 4
+
+	%struct.foo = type { [4 x i32] }
+
+define void @bar(%struct.foo* noalias sret %agg.result) nounwind  {
+entry:
+	%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0
+	%tmp3 = getelementptr [4 x i32]* %tmp1, i32 0, i32 0
+	store i32 1, i32* %tmp3, align 8
+        ret void
+}
+
+@dst = external global i32
+
+define void @foo() nounwind {
+	%memtmp = alloca %struct.foo, align 4
+        call void @bar( %struct.foo* sret %memtmp ) nounwind
+        %tmp4 = getelementptr %struct.foo* %memtmp, i32 0, i32 0
+	%tmp5 = getelementptr [4 x i32]* %tmp4, i32 0, i32 0
+        %tmp6 = load i32* %tmp5
+        store i32 %tmp6, i32* @dst
+        ret void
+}
diff --git a/final/test/CodeGen/X86/sse-align-0.ll b/final/test/CodeGen/X86/sse-align-0.ll
new file mode 100644
index 00000000000..8ffd3124770
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-align-0.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     mov
+
+define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
+  %t = load <4 x float>* %p
+  %z = fmul <4 x float> %t, %x
+  ret <4 x float> %z
+}
+define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
+  %t = load <2 x double>* %p
+  %z = fmul <2 x double> %t, %x
+  ret <2 x double> %z
+}
diff --git a/final/test/CodeGen/X86/sse-align-1.ll b/final/test/CodeGen/X86/sse-align-1.ll
new file mode 100644
index 00000000000..c7a5cd55912
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-align-1.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep movap | count 2
+
+define <4 x float> @foo(<4 x float>* %p) nounwind {
+  %t = load <4 x float>* %p
+  ret <4 x float> %t
+}
+define <2 x double> @bar(<2 x double>* %p) nounwind {
+  %t = load <2 x double>* %p
+  ret <2 x double> %t
+}
diff --git a/final/test/CodeGen/X86/sse-align-10.ll b/final/test/CodeGen/X86/sse-align-10.ll
new file mode 100644
index 00000000000..0f916971255
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-align-10.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 | grep movups | count 1
+
+define <2 x i64> @bar(<2 x i64>* %p) nounwind {
+  %t = load <2 x i64>* %p, align 8
+  ret <2 x i64> %t
+}
diff --git a/final/test/CodeGen/X86/sse-align-11.ll b/final/test/CodeGen/X86/sse-align-11.ll
new file mode 100644
index 00000000000..9f5d4b40d61
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-align-11.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-apple-darwin8 | grep movaps
+; RUN: llc < %s -march=x86 -mcpu=yonah -mtriple=i686-linux-gnu | grep movaps
+; PR8969 - make 32-bit linux have a 16-byte aligned stack
+
+define <4 x float> @foo(float %a, float %b, float %c, float %d) nounwind {
+entry:
+        %tmp6 = insertelement <4 x float> undef, float %a, i32 0               
+        %tmp7 = insertelement <4 x float> %tmp6, float %b, i32 1               
+        %tmp8 = insertelement <4 x float> %tmp7, float %c, i32 2               
+        %tmp9 = insertelement <4 x float> %tmp8, float %d, i32 3               
+        ret <4 x float> %tmp9
+}
+
diff --git a/final/test/CodeGen/X86/sse-align-12.ll b/final/test/CodeGen/X86/sse-align-12.ll
new file mode 100644
index 00000000000..118e393b7ba
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-align-12.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; CHECK: a:
+; CHECK: movdqu
+; CHECK: pshufd
+define <4 x float> @a(<4 x float>* %y) nounwind {
+  %x = load <4 x float>* %y, align 4
+  %a = extractelement <4 x float> %x, i32 0
+  %b = extractelement <4 x float> %x, i32 1
+  %c = extractelement <4 x float> %x, i32 2
+  %d = extractelement <4 x float> %x, i32 3
+  %p = insertelement <4 x float> undef, float %d, i32 0
+  %q = insertelement <4 x float> %p, float %c, i32 1
+  %r = insertelement <4 x float> %q, float %b, i32 2
+  %s = insertelement <4 x float> %r, float %a, i32 3
+  ret <4 x float> %s
+}
+
+; CHECK: b:
+; CHECK: movups
+; CHECK: unpckhps
+define <4 x float> @b(<4 x float>* %y, <4 x float> %z) nounwind {
+  %x = load <4 x float>* %y, align 4
+  %a = extractelement <4 x float> %x, i32 2
+  %b = extractelement <4 x float> %x, i32 3
+  %c = extractelement <4 x float> %z, i32 2
+  %d = extractelement <4 x float> %z, i32 3
+  %p = insertelement <4 x float> undef, float %c, i32 0
+  %q = insertelement <4 x float> %p, float %a, i32 1
+  %r = insertelement <4 x float> %q, float %d, i32 2
+  %s = insertelement <4 x float> %r, float %b, i32 3
+  ret <4 x float> %s
+}
+
+; CHECK: c:
+; CHECK: movupd
+; CHECK: shufpd
+define <2 x double> @c(<2 x double>* %y) nounwind {
+  %x = load <2 x double>* %y, align 8
+  %a = extractelement <2 x double> %x, i32 0
+  %c = extractelement <2 x double> %x, i32 1
+  %p = insertelement <2 x double> undef, double %c, i32 0
+  %r = insertelement <2 x double> %p, double %a, i32 1
+  ret <2 x double> %r
+}
+
+; CHECK: d:
+; CHECK: movupd
+; CHECK: unpckhpd
+define <2 x double> @d(<2 x double>* %y, <2 x double> %z) nounwind {
+  %x = load <2 x double>* %y, align 8
+  %a = extractelement <2 x double> %x, i32 1
+  %c = extractelement <2 x double> %z, i32 1
+  %p = insertelement <2 x double> undef, double %c, i32 0
+  %r = insertelement <2 x double> %p, double %a, i32 1
+  ret <2 x double> %r
+}
diff --git a/final/test/CodeGen/X86/sse-align-2.ll b/final/test/CodeGen/X86/sse-align-2.ll
new file mode 100644
index 00000000000..102c3fb06cd
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-align-2.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
+
+define <4 x float> @foo(<4 x float>* %p, <4 x float> %x) nounwind {
+  %t = load <4 x float>* %p, align 4
+  %z = fmul <4 x float> %t, %x
+  ret <4 x float> %z
+}
+define <2 x double> @bar(<2 x double>* %p, <2 x double> %x) nounwind {
+  %t = load <2 x double>* %p, align 8
+  %z = fmul <2 x double> %t, %x
+  ret <2 x double> %z
+}
diff --git a/final/test/CodeGen/X86/sse-align-3.ll b/final/test/CodeGen/X86/sse-align-3.ll
new file mode 100644
index 00000000000..04f216176c3
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-align-3.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     movapd
+; CHECK:     movaps
+; CHECK-NOT:     movaps
+; CHECK:     movapd
+; CHECK-NOT:     movap
+
+define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
+  store <4 x float> %x, <4 x float>* %p
+  ret void
+}
+define void @bar(<2 x double>* %p, <2 x double> %x) nounwind {
+  store <2 x double> %x, <2 x double>* %p
+  ret void
+}
diff --git a/final/test/CodeGen/X86/sse-align-4.ll b/final/test/CodeGen/X86/sse-align-4.ll
new file mode 100644
index 00000000000..4c59934917f
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-align-4.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
+
+define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {
+  store <4 x float> %x, <4 x float>* %p, align 4
+  ret void
+}
+define void @bar(<2 x double>* %p, <2 x double> %x) nounwind {
+  store <2 x double> %x, <2 x double>* %p, align 8
+  ret void
+}
diff --git a/final/test/CodeGen/X86/sse-align-5.ll b/final/test/CodeGen/X86/sse-align-5.ll
new file mode 100644
index 00000000000..21cd2311b91
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-align-5.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 | grep movaps | count 1
+
+define <2 x i64> @bar(<2 x i64>* %p) nounwind {
+  %t = load <2 x i64>* %p
+  ret <2 x i64> %t
+}
diff --git a/final/test/CodeGen/X86/sse-align-6.ll b/final/test/CodeGen/X86/sse-align-6.ll
new file mode 100644
index 00000000000..fcea1b102a2
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-align-6.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86-64 | grep movdqu | count 1
+
+define <2 x i64> @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
+  %t = load <2 x i64>* %p, align 8
+  %z = mul <2 x i64> %t, %x
+  ret <2 x i64> %z
+}
diff --git a/final/test/CodeGen/X86/sse-align-7.ll b/final/test/CodeGen/X86/sse-align-7.ll
new file mode 100644
index 00000000000..e55d5859560
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-align-7.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK:     movaps
+; CHECK-NOT:     movaps
+
+define void @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
+  store <2 x i64> %x, <2 x i64>* %p
+  ret void
+}
diff --git a/final/test/CodeGen/X86/sse-align-8.ll b/final/test/CodeGen/X86/sse-align-8.ll
new file mode 100644
index 00000000000..cfeff8161c5
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-align-8.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 | grep movups | count 1
+
+define void @bar(<2 x i64>* %p, <2 x i64> %x) nounwind {
+  store <2 x i64> %x, <2 x i64>* %p, align 8
+  ret void
+}
diff --git a/final/test/CodeGen/X86/sse-align-9.ll b/final/test/CodeGen/X86/sse-align-9.ll
new file mode 100644
index 00000000000..cb26b9535a8
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-align-9.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep movup | count 2
+
+define <4 x float> @foo(<4 x float>* %p) nounwind {
+  %t = load <4 x float>* %p, align 4
+  ret <4 x float> %t
+}
+define <2 x double> @bar(<2 x double>* %p) nounwind {
+  %t = load <2 x double>* %p, align 8
+  ret <2 x double> %t
+}
diff --git a/final/test/CodeGen/X86/sse-commute.ll b/final/test/CodeGen/X86/sse-commute.ll
new file mode 100644
index 00000000000..336bf06e557
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-commute.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s
+
+; Commute the comparison to avoid a move.
+; PR7500.
+
+; CHECK: a:
+; CHECK-NOT: mov
+; CHECK:     pcmpeqd
+define <2 x double> @a(<2 x double>, <2 x double>) nounwind readnone {
+entry:
+  %tmp6 = bitcast <2 x double> %0 to <4 x i32>    ; <<4 x i32>> [#uses=2]
+  %tmp4 = bitcast <2 x double> %1 to <4 x i32>    ; <<4 x i32>> [#uses=1]
+  %cmp = icmp eq <4 x i32> %tmp6, %tmp4           ; <<4 x i1>> [#uses=1]
+  %sext = sext <4 x i1> %cmp to <4 x i32>         ; <<4 x i32>> [#uses=1]
+  %and = and <4 x i32> %tmp6, %sext               ; <<4 x i32>> [#uses=1]
+  %tmp8 = bitcast <4 x i32> %and to <2 x double>  ; <<2 x double>> [#uses=1]
+  ret <2 x double> %tmp8
+}
+
+
diff --git a/final/test/CodeGen/X86/sse-fcopysign.ll b/final/test/CodeGen/X86/sse-fcopysign.ll
new file mode 100644
index 00000000000..0e0e4a9a86c
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-fcopysign.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep test
+
+define float @tst1(float %a, float %b) {
+	%tmp = tail call float @copysignf( float %b, float %a )
+	ret float %tmp
+}
+
+define double @tst2(double %a, float %b, float %c) {
+	%tmp1 = fadd float %b, %c
+	%tmp2 = fpext float %tmp1 to double
+	%tmp = tail call double @copysign( double %a, double %tmp2 )
+	ret double %tmp
+}
+
+declare float @copysignf(float, float)
+declare double @copysign(double, double)
diff --git a/final/test/CodeGen/X86/sse-load-ret.ll b/final/test/CodeGen/X86/sse-load-ret.ll
new file mode 100644
index 00000000000..1ebcb1a6fa6
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-load-ret.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep movss
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep xmm
+
+define double @test1(double* %P) {
+        %X = load double* %P            ; <double> [#uses=1]
+        ret double %X
+}
+
+define double @test2() {
+        ret double 1.234560e+03
+}
+
+
+; FIXME: Todo
+;double %test3(bool %B) {
+;	%C = select bool %B, double 123.412, double 523.01123123
+;	ret double %C
+;}
+
diff --git a/final/test/CodeGen/X86/sse-minmax.ll b/final/test/CodeGen/X86/sse-minmax.ll
new file mode 100644
index 00000000000..348121ac8bc
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-minmax.ll
@@ -0,0 +1,932 @@
+; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck -check-prefix=UNSAFE %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -enable-no-nans-fp-math | FileCheck -check-prefix=FINITE %s
+
+; Some of these patterns can be matched as SSE min or max. Some of
+; then can be matched provided that the operands are swapped.
+; Some of them can't be matched at all and require a comparison
+; and a conditional branch.
+
+; The naming convention is {,x_,y_}{o,u}{gt,lt,ge,le}{,_inverse}
+; x_ : use 0.0 instead of %y
+; y_ : use -0.0 instead of %y
+; _inverse : swap the arms of the select.
+
+; CHECK:      ogt:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      ogt:
+; UNSAFE-NEXT: maxsd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ogt:
+; FINITE-NEXT: maxsd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ogt(double %x, double %y) nounwind {
+  %c = fcmp ogt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      olt:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      olt:
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      olt:
+; FINITE-NEXT: minsd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @olt(double %x, double %y) nounwind {
+  %c = fcmp olt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ogt_inverse:
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      ogt_inverse:
+; UNSAFE-NEXT: minsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ogt_inverse:
+; FINITE-NEXT: minsd  %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ogt_inverse(double %x, double %y) nounwind {
+  %c = fcmp ogt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      olt_inverse:
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      olt_inverse:
+; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      olt_inverse:
+; FINITE-NEXT: maxsd  %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @olt_inverse(double %x, double %y) nounwind {
+  %c = fcmp olt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      oge:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; UNSAFE:      oge:
+; UNSAFE-NEXT: maxsd	%xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      oge:
+; FINITE-NEXT: maxsd	%xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @oge(double %x, double %y) nounwind {
+  %c = fcmp oge double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ole:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; UNSAFE:      ole:
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
+; FINITE:      ole:
+; FINITE-NEXT: minsd %xmm1, %xmm0
+define double @ole(double %x, double %y) nounwind {
+  %c = fcmp ole double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      oge_inverse:
+; CHECK-NEXT: ucomisd %xmm1, %xmm0
+; UNSAFE:      oge_inverse:
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      oge_inverse:
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @oge_inverse(double %x, double %y) nounwind {
+  %c = fcmp oge double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ole_inverse:
+; CHECK-NEXT: ucomisd %xmm0, %xmm1
+; UNSAFE:      ole_inverse:
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ole_inverse:
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ole_inverse(double %x, double %y) nounwind {
+  %c = fcmp ole double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      x_ogt:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      x_ogt:
+; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ogt:
+; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: maxsd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ogt(double %x) nounwind {
+  %c = fcmp ogt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_olt:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      x_olt:
+; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_olt:
+; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_olt(double %x) nounwind {
+  %c = fcmp olt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ogt_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      x_ogt_inverse:
+; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: minsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ogt_inverse:
+; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: minsd  %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ogt_inverse(double %x) nounwind {
+  %c = fcmp ogt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_olt_inverse:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      x_olt_inverse:
+; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_olt_inverse:
+; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: maxsd  %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_olt_inverse(double %x) nounwind {
+  %c = fcmp olt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_oge:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      x_oge:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_oge:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: maxsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_oge(double %x) nounwind {
+  %c = fcmp oge double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ole:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      x_ole:
+; UNSAFE-NEXT: pxor %xmm1, %xmm1
+; UNSAFE-NEXT: minsd %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ole:
+; FINITE-NEXT: pxor %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ole(double %x) nounwind {
+  %c = fcmp ole double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_oge_inverse:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      x_oge_inverse:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_oge_inverse:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: minsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_oge_inverse(double %x) nounwind {
+  %c = fcmp oge double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ole_inverse:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      x_ole_inverse:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ole_inverse:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: maxsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ole_inverse(double %x) nounwind {
+  %c = fcmp ole double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      ugt:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      ugt:
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ugt:
+; FINITE-NEXT: maxsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ugt(double %x, double %y) nounwind {
+  %c = fcmp ugt double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ult:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      ult:
+; UNSAFE-NEXT: minsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ult:
+; FINITE-NEXT: minsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ult(double %x, double %y) nounwind {
+  %c = fcmp ult double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ugt_inverse:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      ugt_inverse:
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ugt_inverse:
+; FINITE-NEXT: minsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ugt_inverse(double %x, double %y) nounwind {
+  %c = fcmp ugt double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ult_inverse:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      ult_inverse:
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ult_inverse:
+; FINITE-NEXT: maxsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ult_inverse(double %x, double %y) nounwind {
+  %c = fcmp ult double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      uge:
+; CHECK-NEXT: maxsd   %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      uge:
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      uge:
+; FINITE-NEXT: maxsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @uge(double %x, double %y) nounwind {
+  %c = fcmp uge double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      ule:
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      ule:
+; UNSAFE-NEXT: minsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ule:
+; FINITE-NEXT: minsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ule(double %x, double %y) nounwind {
+  %c = fcmp ule double %x, %y
+  %d = select i1 %c, double %x, double %y
+  ret double %d
+}
+
+; CHECK:      uge_inverse:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      uge_inverse:
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      uge_inverse:
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @uge_inverse(double %x, double %y) nounwind {
+  %c = fcmp uge double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      ule_inverse:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      ule_inverse:
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      ule_inverse:
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @ule_inverse(double %x, double %y) nounwind {
+  %c = fcmp ule double %x, %y
+  %d = select i1 %c, double %y, double %x
+  ret double %d
+}
+
+; CHECK:      x_ugt:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      x_ugt:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ugt:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: maxsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ugt(double %x) nounwind {
+  %c = fcmp ugt double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ult:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      x_ult:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: minsd   %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ult:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: minsd   %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ult(double %x) nounwind {
+  %c = fcmp ult double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ugt_inverse:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      x_ugt_inverse:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ugt_inverse:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: minsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ugt_inverse(double %x) nounwind {
+  %c = fcmp ugt double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ult_inverse:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      x_ult_inverse:
+; UNSAFE-NEXT: pxor    %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ult_inverse:
+; FINITE-NEXT: pxor    %xmm1, %xmm1
+; FINITE-NEXT: maxsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ult_inverse(double %x) nounwind {
+  %c = fcmp ult double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_uge:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      x_uge:
+; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_uge:
+; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: maxsd  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_uge(double %x) nounwind {
+  %c = fcmp uge double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_ule:
+; CHECK-NEXT: pxor   %xmm1, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      x_ule:
+; UNSAFE-NEXT: pxor   %xmm1, %xmm1
+; UNSAFE-NEXT: minsd  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ule:
+; FINITE-NEXT: pxor   %xmm1, %xmm1
+; FINITE-NEXT: minsd  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ule(double %x) nounwind {
+  %c = fcmp ule double %x, 0.000000e+00
+  %d = select i1 %c, double %x, double 0.000000e+00
+  ret double %d
+}
+
+; CHECK:      x_uge_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      x_uge_inverse:
+; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_uge_inverse:
+; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_uge_inverse(double %x) nounwind {
+  %c = fcmp uge double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      x_ule_inverse:
+; CHECK-NEXT: pxor  %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      x_ule_inverse:
+; UNSAFE-NEXT: pxor  %xmm1, %xmm1
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      x_ule_inverse:
+; FINITE-NEXT: pxor  %xmm1, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @x_ule_inverse(double %x) nounwind {
+  %c = fcmp ule double %x, 0.000000e+00
+  %d = select i1 %c, double 0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_ogt:
+; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_ogt:
+; UNSAFE-NEXT: maxsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ogt:
+; FINITE-NEXT: maxsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ogt(double %x) nounwind {
+  %c = fcmp ogt double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_olt:
+; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_olt:
+; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_olt:
+; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_olt(double %x) nounwind {
+  %c = fcmp olt double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_ogt_inverse:
+; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_ogt_inverse:
+; UNSAFE-NEXT: movsd  {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: minsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ogt_inverse:
+; FINITE-NEXT: movsd  {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd  %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ogt_inverse(double %x) nounwind {
+  %c = fcmp ogt double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_olt_inverse:
+; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_olt_inverse:
+; UNSAFE-NEXT: movsd  {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: maxsd  %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_olt_inverse:
+; FINITE-NEXT: movsd  {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd  %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_olt_inverse(double %x) nounwind {
+  %c = fcmp olt double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_oge:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      y_oge:
+; UNSAFE-NEXT: maxsd   {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_oge:
+; FINITE-NEXT: maxsd   {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_oge(double %x) nounwind {
+  %c = fcmp oge double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_ole:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      y_ole:
+; UNSAFE-NEXT: minsd {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ole:
+; FINITE-NEXT: minsd {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ole(double %x) nounwind {
+  %c = fcmp ole double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_oge_inverse:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      y_oge_inverse:
+; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_oge_inverse:
+; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_oge_inverse(double %x) nounwind {
+  %c = fcmp oge double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_ole_inverse:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      y_ole_inverse:
+; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ole_inverse:
+; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ole_inverse(double %x) nounwind {
+  %c = fcmp ole double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_ugt:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      y_ugt:
+; UNSAFE-NEXT: maxsd   {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ugt:
+; FINITE-NEXT: maxsd   {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ugt(double %x) nounwind {
+  %c = fcmp ugt double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_ult:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      y_ult:
+; UNSAFE-NEXT: minsd   {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ult:
+; FINITE-NEXT: minsd   {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ult(double %x) nounwind {
+  %c = fcmp ult double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_ugt_inverse:
+; CHECK:      ucomisd %xmm0, %xmm1
+; UNSAFE:      y_ugt_inverse:
+; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: minsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ugt_inverse:
+; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ugt_inverse(double %x) nounwind {
+  %c = fcmp ugt double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_ult_inverse:
+; CHECK:      ucomisd %xmm1, %xmm0
+; UNSAFE:      y_ult_inverse:
+; UNSAFE-NEXT: movsd   {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: maxsd   %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ult_inverse:
+; FINITE-NEXT: movsd   {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd   %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}}  %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ult_inverse(double %x) nounwind {
+  %c = fcmp ult double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_uge:
+; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
+; CHECK-NEXT: maxsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_uge:
+; UNSAFE-NEXT: maxsd  {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_uge:
+; FINITE-NEXT: maxsd  {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_uge(double %x) nounwind {
+  %c = fcmp uge double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_ule:
+; CHECK-NEXT: movsd  {{[^,]*}}, %xmm1
+; CHECK-NEXT: minsd  %xmm0, %xmm1
+; CHECK-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_ule:
+; UNSAFE-NEXT: minsd  {{[^,]*}}, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ule:
+; FINITE-NEXT: minsd  {{[^,]*}}, %xmm0
+; FINITE-NEXT: ret
+define double @y_ule(double %x) nounwind {
+  %c = fcmp ule double %x, -0.000000e+00
+  %d = select i1 %c, double %x, double -0.000000e+00
+  ret double %d
+}
+
+; CHECK:      y_uge_inverse:
+; CHECK-NEXT: minsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_uge_inverse:
+; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: minsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_uge_inverse:
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: minsd %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_uge_inverse(double %x) nounwind {
+  %c = fcmp uge double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+
+; CHECK:      y_ule_inverse:
+; CHECK-NEXT: maxsd {{[^,]*}}, %xmm0
+; CHECK-NEXT: ret
+; UNSAFE:      y_ule_inverse:
+; UNSAFE-NEXT: movsd {{[^,]*}}, %xmm1
+; UNSAFE-NEXT: maxsd %xmm0, %xmm1
+; UNSAFE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; UNSAFE-NEXT: ret
+; FINITE:      y_ule_inverse:
+; FINITE-NEXT: movsd {{[^,]*}}, %xmm1
+; FINITE-NEXT: maxsd %xmm0, %xmm1
+; FINITE-NEXT: movap{{[sd]}} %xmm1, %xmm0
+; FINITE-NEXT: ret
+define double @y_ule_inverse(double %x) nounwind {
+  %c = fcmp ule double %x, -0.000000e+00
+  %d = select i1 %c, double -0.000000e+00, double %x
+  ret double %d
+}
+; Test a few more misc. cases.
+
+; CHECK: clampTo3k_a:
+; CHECK: minsd
+; UNSAFE: clampTo3k_a:
+; UNSAFE: minsd
+; FINITE: clampTo3k_a:
+; FINITE: minsd
+define double @clampTo3k_a(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ogt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_b:
+; CHECK: minsd
+; UNSAFE: clampTo3k_b:
+; UNSAFE: minsd
+; FINITE: clampTo3k_b:
+; FINITE: minsd
+define double @clampTo3k_b(double %x) nounwind readnone {
+entry:
+  %0 = fcmp uge double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_c:
+; CHECK: maxsd
+; UNSAFE: clampTo3k_c:
+; UNSAFE: maxsd
+; FINITE: clampTo3k_c:
+; FINITE: maxsd
+define double @clampTo3k_c(double %x) nounwind readnone {
+entry:
+  %0 = fcmp olt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_d:
+; CHECK: maxsd
+; UNSAFE: clampTo3k_d:
+; UNSAFE: maxsd
+; FINITE: clampTo3k_d:
+; FINITE: maxsd
+define double @clampTo3k_d(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ule double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_e:
+; CHECK: maxsd
+; UNSAFE: clampTo3k_e:
+; UNSAFE: maxsd
+; FINITE: clampTo3k_e:
+; FINITE: maxsd
+define double @clampTo3k_e(double %x) nounwind readnone {
+entry:
+  %0 = fcmp olt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_f:
+; CHECK: maxsd
+; UNSAFE: clampTo3k_f:
+; UNSAFE: maxsd
+; FINITE: clampTo3k_f:
+; FINITE: maxsd
+define double @clampTo3k_f(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ule double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_g:
+; CHECK: minsd
+; UNSAFE: clampTo3k_g:
+; UNSAFE: minsd
+; FINITE: clampTo3k_g:
+; FINITE: minsd
+define double @clampTo3k_g(double %x) nounwind readnone {
+entry:
+  %0 = fcmp ogt double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
+
+; CHECK: clampTo3k_h:
+; CHECK: minsd
+; UNSAFE: clampTo3k_h:
+; UNSAFE: minsd
+; FINITE: clampTo3k_h:
+; FINITE: minsd
+define double @clampTo3k_h(double %x) nounwind readnone {
+entry:
+  %0 = fcmp uge double %x, 3.000000e+03           ; <i1> [#uses=1]
+  %x_addr.0 = select i1 %0, double 3.000000e+03, double %x ; <double> [#uses=1]
+  ret double %x_addr.0
+}
diff --git a/final/test/CodeGen/X86/sse-varargs.ll b/final/test/CodeGen/X86/sse-varargs.ll
new file mode 100644
index 00000000000..da38f0e148f
--- /dev/null
+++ b/final/test/CodeGen/X86/sse-varargs.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep xmm | grep esp
+
+define i32 @t() nounwind  {
+entry:
+	tail call void (i32, ...)* @foo( i32 1, <4 x i32> < i32 10, i32 11, i32 12, i32 13 > ) nounwind 
+	ret i32 0
+}
+
+declare void @foo(i32, ...)
diff --git a/final/test/CodeGen/X86/sse1.ll b/final/test/CodeGen/X86/sse1.ll
new file mode 100644
index 00000000000..73f88aec643
--- /dev/null
+++ b/final/test/CodeGen/X86/sse1.ll
@@ -0,0 +1,45 @@
+; Tests for SSE1 and below, without SSE2+.
+; RUN: llc < %s -march=x86 -mcpu=pentium3 -O3 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mcpu=pentium3 -O3 | FileCheck %s
+
+define <8 x i16> @test1(<8 x i32> %a) nounwind {
+; CHECK: test1
+  ret <8 x i16> zeroinitializer
+}
+
+define <8 x i16> @test2(<8 x i32> %a) nounwind {
+; CHECK: test2
+  %c = trunc <8 x i32> %a to <8 x i16>            ; <<8 x i16>> [#uses=1]
+  ret <8 x i16> %c
+}
+
+; PR7993
+;define <4 x i32> @test3(<4 x i16> %a) nounwind {
+;  %c = sext <4 x i16> %a to <4 x i32>             ; <<4 x i32>> [#uses=1]
+;  ret <4 x i32> %c
+;}
+
+; This should not emit shuffles to populate the top 2 elements of the 4-element
+; vector that this ends up returning.
+; rdar://8368414
+define <2 x float> @test4(<2 x float> %A, <2 x float> %B) nounwind {
+entry:
+  %tmp7 = extractelement <2 x float> %A, i32 0
+  %tmp5 = extractelement <2 x float> %A, i32 1
+  %tmp3 = extractelement <2 x float> %B, i32 0
+  %tmp1 = extractelement <2 x float> %B, i32 1
+  %add.r = fadd float %tmp7, %tmp3
+  %add.i = fsub float %tmp5, %tmp1
+  %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
+  %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
+  ret <2 x float> %tmp9
+; CHECK: test4:
+; CHECK-NOT: shufps	$16
+; CHECK: shufps	$1, 
+; CHECK-NOT: shufps	$16
+; CHECK: shufps	$1, 
+; CHECK-NOT: shufps	$16
+; CHECK: unpcklps
+; CHECK-NOT: shufps	$16
+; CHECK: ret
+}
diff --git a/final/test/CodeGen/X86/sse2.ll b/final/test/CodeGen/X86/sse2.ll
new file mode 100644
index 00000000000..5c3e32f016a
--- /dev/null
+++ b/final/test/CodeGen/X86/sse2.ll
@@ -0,0 +1,224 @@
+; Tests for SSE2 and below, without SSE3+.
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s
+
+define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+	%tmp3 = load <2 x double>* %A, align 16
+	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
+	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
+	store <2 x double> %tmp9, <2 x double>* %r, align 16
+	ret void
+        
+; CHECK: test1:
+; CHECK: 	movl	8(%esp), %eax
+; CHECK-NEXT: 	movapd	(%eax), %xmm0
+; CHECK-NEXT: 	movlpd	12(%esp), %xmm0
+; CHECK-NEXT: 	movl	4(%esp), %eax
+; CHECK-NEXT: 	movapd	%xmm0, (%eax)
+; CHECK-NEXT: 	ret
+}
+
+define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
+	%tmp3 = load <2 x double>* %A, align 16
+	%tmp7 = insertelement <2 x double> undef, double %B, i32 0
+	%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
+	store <2 x double> %tmp9, <2 x double>* %r, align 16
+	ret void
+        
+; CHECK: test2:
+; CHECK: 	movl	8(%esp), %eax
+; CHECK-NEXT: 	movapd	(%eax), %xmm0
+; CHECK-NEXT: 	movhpd	12(%esp), %xmm0
+; CHECK-NEXT: 	movl	4(%esp), %eax
+; CHECK-NEXT: 	movapd	%xmm0, (%eax)
+; CHECK-NEXT: 	ret
+}
+
+
+define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind {
+	%tmp = load <4 x float>* %B		; <<4 x float>> [#uses=2]
+	%tmp3 = load <4 x float>* %A		; <<4 x float>> [#uses=2]
+	%tmp.upgrd.1 = extractelement <4 x float> %tmp3, i32 0		; <float> [#uses=1]
+	%tmp7 = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
+	%tmp8 = extractelement <4 x float> %tmp3, i32 1		; <float> [#uses=1]
+	%tmp9 = extractelement <4 x float> %tmp, i32 1		; <float> [#uses=1]
+	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.1, i32 0		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1		; <<4 x float>> [#uses=1]
+	%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2		; <<4 x float>> [#uses=1]
+	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp13, <4 x float>* %res
+	ret void
+; CHECK: @test3
+; CHECK: 	unpcklps	
+}
+
+define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
+	%tmp5 = shufflevector <4 x float> %X, <4 x float> undef, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp5, <4 x float>* %res
+	ret void
+; CHECK: @test4
+; CHECK: 	pshufd	$50, %xmm0, %xmm0
+}
+
+define <4 x i32> @test5(i8** %ptr) nounwind {
+; CHECK: test5:
+; CHECK: pxor
+; CHECK: punpcklbw
+; CHECK: punpcklwd
+
+	%tmp = load i8** %ptr		; <i8*> [#uses=1]
+	%tmp.upgrd.1 = bitcast i8* %tmp to float*		; <float*> [#uses=1]
+	%tmp.upgrd.2 = load float* %tmp.upgrd.1		; <float> [#uses=1]
+	%tmp.upgrd.3 = insertelement <4 x float> undef, float %tmp.upgrd.2, i32 0		; <<4 x float>> [#uses=1]
+	%tmp9 = insertelement <4 x float> %tmp.upgrd.3, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	%tmp21 = bitcast <4 x float> %tmp11 to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%tmp22 = shufflevector <16 x i8> %tmp21, <16 x i8> zeroinitializer, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
+	%tmp31 = bitcast <16 x i8> %tmp22 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp.upgrd.4 = shufflevector <8 x i16> zeroinitializer, <8 x i16> %tmp31, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
+	%tmp36 = bitcast <8 x i16> %tmp.upgrd.4 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp36
+}
+
+define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
+        %tmp1 = load <4 x float>* %A            ; <<4 x float>> [#uses=1]
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 >          ; <<4 x float>> [#uses=1]
+        store <4 x float> %tmp2, <4 x float>* %res
+        ret void
+        
+; CHECK: test6:
+; CHECK: 	movaps	(%eax), %xmm0
+; CHECK:	movaps	%xmm0, (%eax)
+}
+
+define void @test7() nounwind {
+        bitcast <4 x i32> zeroinitializer to <4 x float>                ; <<4 x float>>:1 [#uses=1]
+        shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer         ; <<4 x float>>:2 [#uses=1]
+        store <4 x float> %2, <4 x float>* null
+        ret void
+        
+; CHECK: test7:
+; CHECK:	pxor	%xmm0, %xmm0
+; CHECK:	movaps	%xmm0, 0
+}
+
+@x = external global [4 x i32]
+
+define <2 x i64> @test8() nounwind {
+	%tmp = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 0)		; <i32> [#uses=1]
+	%tmp3 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 1)		; <i32> [#uses=1]
+	%tmp5 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 2)		; <i32> [#uses=1]
+	%tmp7 = load i32* getelementptr ([4 x i32]* @x, i32 0, i32 3)		; <i32> [#uses=1]
+	%tmp.upgrd.1 = insertelement <4 x i32> undef, i32 %tmp, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp13 = insertelement <4 x i32> %tmp.upgrd.1, i32 %tmp3, i32 1		; <<4 x i32>> [#uses=1]
+	%tmp14 = insertelement <4 x i32> %tmp13, i32 %tmp5, i32 2		; <<4 x i32>> [#uses=1]
+	%tmp15 = insertelement <4 x i32> %tmp14, i32 %tmp7, i32 3		; <<4 x i32>> [#uses=1]
+	%tmp16 = bitcast <4 x i32> %tmp15 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp16
+; CHECK: test8:
+; CHECK: movups	(%eax), %xmm0
+}
+
+define <4 x float> @test9(i32 %dummy, float %a, float %b, float %c, float %d) nounwind {
+	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
+	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
+	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp13
+; CHECK: test9:
+; CHECK: movups	8(%esp), %xmm0
+}
+
+define <4 x float> @test10(float %a, float %b, float %c, float %d) nounwind {
+	%tmp = insertelement <4 x float> undef, float %a, i32 0		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
+	%tmp12 = insertelement <4 x float> %tmp11, float %c, i32 2		; <<4 x float>> [#uses=1]
+	%tmp13 = insertelement <4 x float> %tmp12, float %d, i32 3		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp13
+; CHECK: test10:
+; CHECK: movaps	4(%esp), %xmm0
+}
+
+define <2 x double> @test11(double %a, double %b) nounwind {
+	%tmp = insertelement <2 x double> undef, double %a, i32 0		; <<2 x double>> [#uses=1]
+	%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1		; <<2 x double>> [#uses=1]
+	ret <2 x double> %tmp7
+; CHECK: test11:
+; CHECK: movapd	4(%esp), %xmm0
+}
+
+define void @test12() nounwind {
+        %tmp1 = load <4 x float>* null          ; <<4 x float>> [#uses=2]
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >             ; <<4 x float>> [#uses=1]
+        %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >                ; <<4 x float>> [#uses=1]
+        %tmp4 = fadd <4 x float> %tmp2, %tmp3            ; <<4 x float>> [#uses=1]
+        store <4 x float> %tmp4, <4 x float>* null
+        ret void
+; CHECK: test12:
+; CHECK: movhlps
+; CHECK: shufps
+}
+
+define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x float>* %C) nounwind {
+        %tmp3 = load <4 x float>* %B            ; <<4 x float>> [#uses=1]
+        %tmp5 = load <4 x float>* %C            ; <<4 x float>> [#uses=1]
+        %tmp11 = shufflevector <4 x float> %tmp3, <4 x float> %tmp5, <4 x i32> < i32 1, i32 4, i32 1, i32 5 >         ; <<4 x float>> [#uses=1]
+        store <4 x float> %tmp11, <4 x float>* %res
+        ret void
+; CHECK: test13
+; CHECK: shufps	$69, (%eax), %xmm0
+; CHECK: pshufd	$-40, %xmm0, %xmm0
+}
+
+define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
+        %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=2]
+        %tmp5 = load <4 x float>* %x            ; <<4 x float>> [#uses=2]
+        %tmp9 = fadd <4 x float> %tmp5, %tmp             ; <<4 x float>> [#uses=1]
+        %tmp21 = fsub <4 x float> %tmp5, %tmp            ; <<4 x float>> [#uses=1]
+        %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 >                ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp27
+; CHECK: test14:
+; CHECK: 	addps	%xmm1, %xmm0
+; CHECK: 	subps	%xmm1, %xmm2
+; CHECK: 	movlhps	%xmm2, %xmm0
+}
+
+define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {
+entry:
+        %tmp = load <4 x float>* %y             ; <<4 x float>> [#uses=1]
+        %tmp3 = load <4 x float>* %x            ; <<4 x float>> [#uses=1]
+        %tmp4 = shufflevector <4 x float> %tmp3, <4 x float> %tmp, <4 x i32> < i32 2, i32 3, i32 6, i32 7 >           ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp4
+; CHECK: test15:
+; CHECK: 	movhlps	%xmm1, %xmm0
+}
+
+; PR8900
+; CHECK: test16:
+; CHECK: unpcklpd
+; CHECK: ret
+
+define  <2 x double> @test16(<4 x double> * nocapture %srcA, <2 x double>* nocapture %dst) {
+  %i5 = getelementptr inbounds <4 x double>* %srcA, i32 3
+  %i6 = load <4 x double>* %i5, align 32
+  %i7 = shufflevector <4 x double> %i6, <4 x double> undef, <2 x i32> <i32 0, i32 2>
+  ret <2 x double> %i7
+}
+
+; PR9009
+define fastcc void @test17() nounwind {
+entry:
+  %0 = insertelement <4 x i32> undef, i32 undef, i32 1
+  %1 = shufflevector <4 x i32> <i32 undef, i32 undef, i32 32768, i32 32768>, <4 x i32> %0, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
+  %2 = bitcast <4 x i32> %1 to <4 x float>
+  store <4 x float> %2, <4 x float> * undef
+  ret void
+}
+
+; PR9210
+define <4 x float> @f(<4 x double>) nounwind {
+entry:
+ %double2float.i = fptrunc <4 x double> %0 to <4 x float>
+ ret <4 x float> %double2float.i
+}
+
diff --git a/final/test/CodeGen/X86/sse3.ll b/final/test/CodeGen/X86/sse3.ll
new file mode 100644
index 00000000000..9a60091a0cf
--- /dev/null
+++ b/final/test/CodeGen/X86/sse3.ll
@@ -0,0 +1,277 @@
+; These are tests for SSE3 codegen.  Yonah has SSE3 and earlier but not SSSE3+.
+
+; RUN: llc < %s -march=x86-64 -mcpu=yonah -mtriple=i686-apple-darwin9 -O3 \
+; RUN:              | FileCheck %s --check-prefix=X64
+
+; Test for v8xi16 lowering where we extract the first element of the vector and
+; placed it in the second element of the result.
+
+define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind {
+entry:
+	%tmp3 = load <8 x i16>* %old
+	%tmp6 = shufflevector <8 x i16> %tmp3,
+                <8 x i16> < i16 0, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef >,
+                <8 x i32> < i32 8, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef  >
+	store <8 x i16> %tmp6, <8 x i16>* %dest
+	ret void
+        
+; X64: t0:
+; X64: 	movddup	(%rsi), %xmm0
+; X64:  pshuflw	$0, %xmm0, %xmm0
+; X64:	xorl	%eax, %eax
+; X64:	pinsrw	$0, %eax, %xmm0
+; X64:	movdqa	%xmm0, (%rdi)
+; X64:	ret
+}
+
+define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+	%tmp1 = load <8 x i16>* %A
+	%tmp2 = load <8 x i16>* %B
+	%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp3
+        
+; X64: t1:
+; X64: 	movl	(%rsi), %eax
+; X64: 	movdqa	(%rdi), %xmm0
+; X64: 	pinsrw	$0, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp
+; X64: t2:
+; X64:	pextrw	$1, %xmm1, %eax
+; X64:	pinsrw	$0, %eax, %xmm0
+; X64:	pinsrw	$3, %eax, %xmm0
+; X64:	ret
+}
+
+define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
+	ret <8 x i16> %tmp
+; X64: t3:
+; X64: 	pextrw	$5, %xmm0, %eax
+; X64: 	pshuflw	$44, %xmm0, %xmm0
+; X64: 	pshufhw	$27, %xmm0, %xmm0
+; X64: 	pinsrw	$3, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
+	ret <8 x i16> %tmp
+; X64: t4:
+; X64: 	pextrw	$7, %xmm0, %eax
+; X64: 	pshufhw	$100, %xmm0, %xmm1
+; X64: 	pinsrw	$1, %eax, %xmm1
+; X64: 	pextrw	$1, %xmm0, %eax
+; X64: 	movdqa	%xmm1, %xmm0
+; X64: 	pinsrw	$4, %eax, %xmm0
+; X64: 	ret
+}
+
+define <8 x i16> @t5(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 >
+	ret <8 x i16> %tmp
+; X64: 	t5:
+; X64: 		movlhps	%xmm1, %xmm0
+; X64: 		pshufd	$114, %xmm0, %xmm0
+; X64: 		ret
+}
+
+define <8 x i16> @t6(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+	ret <8 x i16> %tmp
+; X64: 	t6:
+; X64: 		movss	%xmm1, %xmm0
+; X64: 		ret
+}
+
+define <8 x i16> @t7(<8 x i16> %A, <8 x i16> %B) nounwind {
+	%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 >
+	ret <8 x i16> %tmp
+; X64: 	t7:
+; X64: 		pshuflw	$-80, %xmm0, %xmm0
+; X64: 		pshufhw	$-56, %xmm0, %xmm0
+; X64: 		ret
+}
+
+define void @t8(<2 x i64>* %res, <2 x i64>* %A) nounwind {
+	%tmp = load <2 x i64>* %A
+	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>
+	%tmp0 = extractelement <8 x i16> %tmp.upgrd.1, i32 0
+	%tmp1 = extractelement <8 x i16> %tmp.upgrd.1, i32 1
+	%tmp2 = extractelement <8 x i16> %tmp.upgrd.1, i32 2
+	%tmp3 = extractelement <8 x i16> %tmp.upgrd.1, i32 3
+	%tmp4 = extractelement <8 x i16> %tmp.upgrd.1, i32 4
+	%tmp5 = extractelement <8 x i16> %tmp.upgrd.1, i32 5
+	%tmp6 = extractelement <8 x i16> %tmp.upgrd.1, i32 6
+	%tmp7 = extractelement <8 x i16> %tmp.upgrd.1, i32 7
+	%tmp8 = insertelement <8 x i16> undef, i16 %tmp2, i32 0
+	%tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 1
+	%tmp10 = insertelement <8 x i16> %tmp9, i16 %tmp0, i32 2
+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 3
+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp6, i32 4
+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 5
+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp4, i32 6
+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 7
+	%tmp15.upgrd.2 = bitcast <8 x i16> %tmp15 to <2 x i64>
+	store <2 x i64> %tmp15.upgrd.2, <2 x i64>* %res
+	ret void
+; X64: 	t8:
+; X64: 		pshuflw	$-58, (%rsi), %xmm0
+; X64: 		pshufhw	$-58, %xmm0, %xmm0
+; X64: 		movdqa	%xmm0, (%rdi)
+; X64: 		ret
+}
+
+define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
+	%tmp = load <4 x float>* %r
+	%tmp.upgrd.3 = bitcast <2 x i32>* %A to double*
+	%tmp.upgrd.4 = load double* %tmp.upgrd.3
+	%tmp.upgrd.5 = insertelement <2 x double> undef, double %tmp.upgrd.4, i32 0
+	%tmp5 = insertelement <2 x double> %tmp.upgrd.5, double undef, i32 1	
+	%tmp6 = bitcast <2 x double> %tmp5 to <4 x float>	
+	%tmp.upgrd.6 = extractelement <4 x float> %tmp, i32 0	
+	%tmp7 = extractelement <4 x float> %tmp, i32 1		
+	%tmp8 = extractelement <4 x float> %tmp6, i32 0		
+	%tmp9 = extractelement <4 x float> %tmp6, i32 1		
+	%tmp10 = insertelement <4 x float> undef, float %tmp.upgrd.6, i32 0	
+	%tmp11 = insertelement <4 x float> %tmp10, float %tmp7, i32 1
+	%tmp12 = insertelement <4 x float> %tmp11, float %tmp8, i32 2
+	%tmp13 = insertelement <4 x float> %tmp12, float %tmp9, i32 3
+	store <4 x float> %tmp13, <4 x float>* %r
+	ret void
+; X64: 	t9:
+; X64: 		movaps	(%rdi), %xmm0
+; X64:	        movhps	(%rsi), %xmm0
+; X64:	        movaps	%xmm0, (%rdi)
+; X64: 		ret
+}
+
+
+
+; FIXME: This testcase produces icky code. It can be made much better!
+; PR2585
+
+@g1 = external constant <4 x i32>
+@g2 = external constant <4 x i16>
+
+define internal void @t10() nounwind {
+        load <4 x i32>* @g1, align 16 
+        bitcast <4 x i32> %1 to <8 x i16>
+        shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> < i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef >
+        bitcast <8 x i16> %3 to <2 x i64>  
+        extractelement <2 x i64> %4, i32 0 
+        bitcast i64 %5 to <4 x i16>        
+        store <4 x i16> %6, <4 x i16>* @g2, align 8
+        ret void
+; X64: 	t10:
+; X64: 		pextrw	$4, %xmm0, %eax
+; X64: 		unpcklpd %xmm1, %xmm1
+; X64: 		pshuflw	$8, %xmm1, %xmm1
+; X64: 		pinsrw	$2, %eax, %xmm1
+; X64: 		pextrw	$6, %xmm0, %eax
+; X64: 		pinsrw	$3, %eax, %xmm1
+}
+
+
+; Pack various elements via shuffles.
+define <8 x i16> @t11(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp7
+
+; X64: t11:
+; X64:	movd	%xmm1, %eax
+; X64:	movlhps	%xmm0, %xmm0
+; X64:	pshuflw	$1, %xmm0, %xmm0
+; X64:	pinsrw	$1, %eax, %xmm0
+; X64:	ret
+}
+
+
+define <8 x i16> @t12(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+
+; X64: t12:
+; X64: 	pextrw	$3, %xmm1, %eax
+; X64: 	movlhps	%xmm0, %xmm0
+; X64: 	pshufhw	$3, %xmm0, %xmm0
+; X64: 	pinsrw	$5, %eax, %xmm0
+; X64: 	ret
+}
+
+
+define <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+; X64: t13:
+; X64: 	punpcklqdq	%xmm0, %xmm1
+; X64: 	pextrw	$3, %xmm1, %eax
+; X64: 	pshufd	$52, %xmm1, %xmm0
+; X64: 	pinsrw	$4, %eax, %xmm0
+; X64: 	ret
+}
+
+
+define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+; X64: t14:
+; X64: 	punpcklqdq	%xmm0, %xmm1
+; X64: 	pshufhw	$8, %xmm1, %xmm0
+; X64: 	ret
+}
+
+
+
+define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+        %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+        ret <8 x i16> %tmp8
+; X64: 	t15:
+; X64: 		pextrw	$7, %xmm0, %eax
+; X64: 		punpcklqdq	%xmm1, %xmm0
+; X64: 		pshuflw	$-128, %xmm0, %xmm0
+; X64: 		pinsrw	$2, %eax, %xmm0
+; X64: 		ret
+}
+
+
+; Test yonah where we convert a shuffle to pextrw and pinrsw
+define <16 x i8> @t16(<16 x i8> %T0) nounwind readnone {
+entry:
+        %tmp8 = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 0, i8 0, i8 0, i8 0,  i8 0, i8 0, i8 0, i8 0>, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+        %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0,  <16 x i32> < i32 0, i32 1, i32 2, i32 17,  i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+        ret <16 x i8> %tmp9
+; X64: 	t16:
+; X64: 		pinsrw	$0, %eax, %xmm1
+; X64: 		pextrw	$8, %xmm0, %eax
+; X64: 		pinsrw	$1, %eax, %xmm1
+; X64: 		pextrw	$1, %xmm1, %ecx
+; X64: 		movd	%xmm1, %edx
+; X64: 		pinsrw	$0, %edx, %xmm1
+; X64: 		pinsrw	$1, %eax, %xmm0
+; X64: 		ret
+}
+
+; rdar://8520311
+define <4 x i32> @t17() nounwind {
+entry:
+; X64: t17:
+; X64:          movddup (%rax), %xmm0
+  %tmp1 = load <4 x float>* undef, align 16
+  %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+  %tmp3 = load <4 x float>* undef, align 16
+  %tmp4 = shufflevector <4 x float> %tmp2, <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+  %tmp5 = bitcast <4 x float> %tmp3 to <4 x i32>
+  %tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1>
+  %tmp7 = and <4 x i32> %tmp6, <i32 undef, i32 undef, i32 -1, i32 0>
+  ret <4 x i32> %tmp7
+}
diff --git a/final/test/CodeGen/X86/sse41.ll b/final/test/CodeGen/X86/sse41.ll
new file mode 100644
index 00000000000..2ac4cb435a7
--- /dev/null
+++ b/final/test/CodeGen/X86/sse41.ll
@@ -0,0 +1,251 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse41 | FileCheck %s -check-prefix=X64
+
+@g16 = external global i16
+
+define <4 x i32> @pinsrd_1(i32 %s, <4 x i32> %tmp) nounwind {
+        %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 1
+        ret <4 x i32> %tmp1
+; X32: pinsrd_1:
+; X32:    pinsrd $1, 4(%esp), %xmm0
+
+; X64: pinsrd_1:
+; X64:    pinsrd $1, %edi, %xmm0
+}
+
+define <16 x i8> @pinsrb_1(i8 %s, <16 x i8> %tmp) nounwind {
+        %tmp1 = insertelement <16 x i8> %tmp, i8 %s, i32 1
+        ret <16 x i8> %tmp1
+; X32: pinsrb_1:
+; X32:    pinsrb $1, 4(%esp), %xmm0
+
+; X64: pinsrb_1:
+; X64:    pinsrb $1, %edi, %xmm0
+}
+
+
+define <2 x i64> @pmovsxbd_1(i32* %p) nounwind {
+entry:
+	%0 = load i32* %p, align 4
+	%1 = insertelement <4 x i32> undef, i32 %0, i32 0
+	%2 = insertelement <4 x i32> %1, i32 0, i32 1
+	%3 = insertelement <4 x i32> %2, i32 0, i32 2
+	%4 = insertelement <4 x i32> %3, i32 0, i32 3
+	%5 = bitcast <4 x i32> %4 to <16 x i8>
+	%6 = tail call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %5) nounwind readnone
+	%7 = bitcast <4 x i32> %6 to <2 x i64>
+	ret <2 x i64> %7
+        
+; X32: _pmovsxbd_1:
+; X32:   movl      4(%esp), %eax
+; X32:   pmovsxbd   (%eax), %xmm0
+
+; X64: _pmovsxbd_1:
+; X64:   pmovsxbd   (%rdi), %xmm0
+}
+
+define <2 x i64> @pmovsxwd_1(i64* %p) nounwind readonly {
+entry:
+	%0 = load i64* %p		; <i64> [#uses=1]
+	%tmp2 = insertelement <2 x i64> zeroinitializer, i64 %0, i32 0		; <<2 x i64>> [#uses=1]
+	%1 = bitcast <2 x i64> %tmp2 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%2 = tail call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %1) nounwind readnone		; <<4 x i32>> [#uses=1]
+	%3 = bitcast <4 x i32> %2 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %3
+        
+; X32: _pmovsxwd_1:
+; X32:   movl 4(%esp), %eax
+; X32:   pmovsxwd (%eax), %xmm0
+
+; X64: _pmovsxwd_1:
+; X64:   pmovsxwd (%rdi), %xmm0
+}
+
+
+
+
+define <2 x i64> @pmovzxbq_1() nounwind {
+entry:
+	%0 = load i16* @g16, align 2		; <i16> [#uses=1]
+	%1 = insertelement <8 x i16> undef, i16 %0, i32 0		; <<8 x i16>> [#uses=1]
+	%2 = bitcast <8 x i16> %1 to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%3 = tail call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %2) nounwind readnone		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %3
+
+; X32: _pmovzxbq_1:
+; X32:   movl	L_g16$non_lazy_ptr, %eax
+; X32:   pmovzxbq	(%eax), %xmm0
+
+; X64: _pmovzxbq_1:
+; X64:   movq	_g16@GOTPCREL(%rip), %rax
+; X64:   pmovzxbq	(%rax), %xmm0
+}
+
+declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
+declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+
+
+define i32 @extractps_1(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  %i = bitcast float %s to i32
+  ret i32 %i
+
+; X32: _extractps_1:  
+; X32:	  extractps	$3, %xmm0, %eax
+
+; X64: _extractps_1:  
+; X64:	  extractps	$3, %xmm0, %eax
+}
+define i32 @extractps_2(<4 x float> %v) nounwind {
+  %t = bitcast <4 x float> %v to <4 x i32>
+  %s = extractelement <4 x i32> %t, i32 3
+  ret i32 %s
+
+; X32: _extractps_2:
+; X32:	  extractps	$3, %xmm0, %eax
+
+; X64: _extractps_2:
+; X64:	  extractps	$3, %xmm0, %eax
+}
+
+
+; The non-store form of extractps puts its result into a GPR.
+; This makes it suitable for an extract from a <4 x float> that
+; is bitcasted to i32, but unsuitable for much of anything else.
+
+define float @ext_1(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  %t = fadd float %s, 1.0
+  ret float %t
+
+; X32: _ext_1:
+; X32:	  pshufd	$3, %xmm0, %xmm0
+; X32:	  addss	LCPI7_0, %xmm0
+
+; X64: _ext_1:
+; X64:	  pshufd	$3, %xmm0, %xmm0
+; X64:	  addss	LCPI7_0(%rip), %xmm0
+}
+define float @ext_2(<4 x float> %v) nounwind {
+  %s = extractelement <4 x float> %v, i32 3
+  ret float %s
+
+; X32: _ext_2:
+; X32:	  pshufd	$3, %xmm0, %xmm0
+
+; X64: _ext_2:
+; X64:	  pshufd	$3, %xmm0, %xmm0
+}
+define i32 @ext_3(<4 x i32> %v) nounwind {
+  %i = extractelement <4 x i32> %v, i32 3
+  ret i32 %i
+
+; X32: _ext_3:
+; X32:	  pextrd	$3, %xmm0, %eax
+
+; X64: _ext_3:
+; X64:	  pextrd	$3, %xmm0, %eax
+}
+
+define <4 x float> @insertps_1(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %t1, <4 x float> %t2, i32 1) nounwind readnone
+        ret <4 x float> %tmp1
+; X32: _insertps_1:
+; X32:    insertps  $1, %xmm1, %xmm0
+
+; X64: _insertps_1:
+; X64:    insertps  $1, %xmm1, %xmm0
+}
+
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i32) nounwind readnone
+
+define <4 x float> @insertps_2(<4 x float> %t1, float %t2) nounwind {
+        %tmp1 = insertelement <4 x float> %t1, float %t2, i32 0
+        ret <4 x float> %tmp1
+; X32: _insertps_2:
+; X32:    insertps  $0, 4(%esp), %xmm0
+
+; X64: _insertps_2:
+; X64:    insertps  $0, %xmm1, %xmm0        
+}
+
+define <4 x float> @insertps_3(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp2 = extractelement <4 x float> %t2, i32 0
+        %tmp1 = insertelement <4 x float> %t1, float %tmp2, i32 0
+        ret <4 x float> %tmp1
+; X32: _insertps_3:
+; X32:    insertps  $0, %xmm1, %xmm0        
+
+; X64: _insertps_3:
+; X64:    insertps  $0, %xmm1, %xmm0        
+}
+
+define i32 @ptestz_1(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestz(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_1:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    sete	%al
+
+; X64: _ptestz_1:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    sete	%al
+}
+
+define i32 @ptestz_2(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_2:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    sbbl	%eax
+
+; X64: _ptestz_2:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    sbbl	%eax
+}
+
+define i32 @ptestz_3(<4 x float> %t1, <4 x float> %t2) nounwind {
+        %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<4 x float> %t1, <4 x float> %t2) nounwind readnone
+        ret i32 %tmp1
+; X32: _ptestz_3:
+; X32:    ptest 	%xmm1, %xmm0
+; X32:    seta	%al
+
+; X64: _ptestz_3:
+; X64:    ptest 	%xmm1, %xmm0
+; X64:    seta	%al
+}
+
+
+declare i32 @llvm.x86.sse41.ptestz(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestc(<4 x float>, <4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse41.ptestnzc(<4 x float>, <4 x float>) nounwind readnone
+
+; This used to compile to insertps $0  + insertps $16.  insertps $0 is always
+; pointless.
+define <2 x float> @buildvector(<2 x float> %A, <2 x float> %B) nounwind  {
+entry:
+  %tmp7 = extractelement <2 x float> %A, i32 0
+  %tmp5 = extractelement <2 x float> %A, i32 1
+  %tmp3 = extractelement <2 x float> %B, i32 0
+  %tmp1 = extractelement <2 x float> %B, i32 1
+  %add.r = fadd float %tmp7, %tmp3
+  %add.i = fadd float %tmp5, %tmp1
+  %tmp11 = insertelement <2 x float> undef, float %add.r, i32 0
+  %tmp9 = insertelement <2 x float> %tmp11, float %add.i, i32 1
+  ret <2 x float> %tmp9
+; X32: buildvector:
+; X32-NOT: insertps $0
+; X32: insertps $16
+; X32-NOT: insertps $0
+; X32: ret
+; X64: buildvector:
+; X64-NOT: insertps $0
+; X64: insertps $16
+; X64-NOT: insertps $0
+; X64: ret
+}
+
diff --git a/final/test/CodeGen/X86/sse42.ll b/final/test/CodeGen/X86/sse42.ll
new file mode 100644
index 00000000000..1723909a4b4
--- /dev/null
+++ b/final/test/CodeGen/X86/sse42.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mattr=sse42 | FileCheck %s -check-prefix=X64
+
+declare i32 @llvm.x86.sse42.crc32.8(i32, i8) nounwind
+declare i32 @llvm.x86.sse42.crc32.16(i32, i16) nounwind
+declare i32 @llvm.x86.sse42.crc32.32(i32, i32) nounwind
+
+define i32 @crc32_8(i32 %a, i8 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.8(i32 %a, i8 %b)
+  ret i32 %tmp
+; X32: _crc32_8:
+; X32:     crc32b   8(%esp), %eax
+
+; X64: _crc32_8:
+; X64:     crc32b   %sil, %eax
+}
+
+
+define i32 @crc32_16(i32 %a, i16 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.16(i32 %a, i16 %b)
+  ret i32 %tmp
+; X32: _crc32_16:
+; X32:     crc32w   8(%esp), %eax
+
+; X64: _crc32_16:
+; X64:     crc32w   %si, %eax
+}
+
+
+define i32 @crc32_32(i32 %a, i32 %b) nounwind {
+  %tmp = call i32 @llvm.x86.sse42.crc32.32(i32 %a, i32 %b)
+  ret i32 %tmp
+; X32: _crc32_32:
+; X32:     crc32l   8(%esp), %eax
+
+; X64: _crc32_32:
+; X64:     crc32l   %esi, %eax
+}
diff --git a/final/test/CodeGen/X86/sse_reload_fold.ll b/final/test/CodeGen/X86/sse_reload_fold.ll
new file mode 100644
index 00000000000..02399c49955
--- /dev/null
+++ b/final/test/CodeGen/X86/sse_reload_fold.ll
@@ -0,0 +1,125 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mattr=+64bit,+sse3 -print-failed-fuse-candidates |& FileCheck %s
+; CHECK: fail
+; CHECK-NOT: fail
+
+declare float @test_f(float %f)
+declare double @test_d(double %f)
+declare <4 x float> @test_vf(<4 x float> %f)
+declare <2 x double> @test_vd(<2 x double> %f)
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)
+
+declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>)
+declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>)
+declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>)
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8)
+declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>)
+declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>)
+declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8)
+declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>)
+
+define float @foo(float %f) {
+  %a = call float @test_f(float %f)
+  %t = call float @llvm.sqrt.f32(float %f)
+  ret float %t
+}
+define double @doo(double %f) {
+  %a = call double @test_d(double %f)
+  %t = call double @llvm.sqrt.f64(double %f)
+  ret double %t
+}
+define <4 x float> @a0(<4 x float> %f) {
+  %a = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @a1(<4 x float> %f) {
+  %a = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @a2(<4 x float> %f) {
+  %a = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @b3(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %y, <4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @b4(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %y, <4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @b5(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %y, <4 x float> %f, i8 7)
+  ret <4 x float> %t
+}
+define <4 x float> @b6(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %y, <4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @b7(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %y, <4 x float> %f)
+  ret <4 x float> %t
+}
+define <4 x float> @b8(<4 x float> %f) {
+  %y = call <4 x float> @test_vf(<4 x float> %f)
+  %t = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %y, <4 x float> %f)
+  ret <4 x float> %t
+}
+define <2 x double> @c1(<2 x double> %f) {
+  %a = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %f)
+  ret <2 x double> %t
+}
+define <2 x double> @d3(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %y, <2 x double> %f)
+  ret <2 x double> %t
+}
+define <2 x double> @d4(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %y, <2 x double> %f)
+  ret <2 x double> %t
+}
+define <2 x double> @d5(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %y, <2 x double> %f, i8 7)
+  ret <2 x double> %t
+}
+define <2 x double> @d6(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %y, <2 x double> %f)
+  ret <2 x double> %t
+}
+define <2 x double> @d7(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %y, <2 x double> %f)
+  ret <2 x double> %t
+}
+define <2 x double> @d8(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %y, <2 x double> %f)
+  ret <2 x double> %t
+}
+
+; This one should fail to fuse.
+define <2 x double> @z0(<2 x double> %f) {
+  %y = call <2 x double> @test_vd(<2 x double> %f)
+  %t = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %f, <2 x double> %y)
+  ret <2 x double> %t
+}
diff --git a/final/test/CodeGen/X86/stack-align.ll b/final/test/CodeGen/X86/stack-align.ll
new file mode 100644
index 00000000000..793c0267124
--- /dev/null
+++ b/final/test/CodeGen/X86/stack-align.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -relocation-model=static -realign-stack=1 -mcpu=yonah | FileCheck %s
+
+; The double argument is at 4(esp) which is 16-byte aligned, allowing us to
+; fold the load into the andpd.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+@G = external global double
+
+define void @test({ double, double }* byval  %z, double* %P) nounwind {
+entry:
+	%tmp3 = load double* @G, align 16		; <double> [#uses=1]
+	%tmp4 = tail call double @fabs( double %tmp3 )		; <double> [#uses=1]
+        volatile store double %tmp4, double* %P
+	%tmp = getelementptr { double, double }* %z, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp1 = volatile load double* %tmp, align 8		; <double> [#uses=1]
+	%tmp2 = tail call double @fabs( double %tmp1 )		; <double> [#uses=1]
+    ; CHECK: andpd{{.*}}4(%esp), %xmm
+	%tmp6 = fadd double %tmp4, %tmp2		; <double> [#uses=1]
+	volatile store double %tmp6, double* %P, align 8
+	ret void
+}
+
+define void @test2() alignstack(16) nounwind {
+entry:
+    ; CHECK: andl{{.*}}$-16, %esp
+    ret void
+}
+
+; Use a call to force a spill.
+define <2 x double> @test3(<2 x double> %x, <2 x double> %y) alignstack(32) nounwind {
+entry:
+    ; CHECK: andl{{.*}}$-32, %esp
+    call void @test2()
+    %A = fmul <2 x double> %x, %y
+    ret <2 x double> %A
+}
+
+declare double @fabs(double)
+
+; The pointer is already known aligned, so and x,-16 is eliminable.
+define i32 @test4() nounwind {
+entry:
+  %buffer = alloca [2048 x i8], align 16
+  %0 = ptrtoint [2048 x i8]* %buffer to i32
+  %and = and i32 %0, -16
+  ret i32 %and
+; CHECK: test4:
+; CHECK-NOT: and
+; CHECK: ret
+}
diff --git a/final/test/CodeGen/X86/stack-protector-linux.ll b/final/test/CodeGen/X86/stack-protector-linux.ll
new file mode 100644
index 00000000000..fe2a9c5d57a
--- /dev/null
+++ b/final/test/CodeGen/X86/stack-protector-linux.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mtriple=i386-pc-linux-gnu < %s -o - | grep %gs:
+; RUN: llc -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %fs:
+; RUN: llc -code-model=kernel -mtriple=x86_64-pc-linux-gnu < %s -o - | grep %gs:
+; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep {__stack_chk_guard}
+; RUN: llc -mtriple=x86_64-apple-darwin < %s -o - | grep {__stack_chk_fail}
+
+@"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00"		; <[11 x i8]*> [#uses=1]
+
+define void @test(i8* %a) nounwind ssp {
+entry:
+	%a_addr = alloca i8*		; <i8**> [#uses=2]
+	%buf = alloca [8 x i8]		; <[8 x i8]*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i8* %a, i8** %a_addr
+	%buf1 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
+	%0 = load i8** %a_addr, align 4		; <i8*> [#uses=1]
+	%1 = call i8* @strcpy(i8* %buf1, i8* %0) nounwind		; <i8*> [#uses=0]
+	%buf2 = bitcast [8 x i8]* %buf to i8*		; <i8*> [#uses=1]
+	%2 = call i32 (i8*, ...)* @printf(i8* getelementptr ([11 x i8]* @"\01LC", i32 0, i32 0), i8* %buf2) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i8* @strcpy(i8*, i8*) nounwind
+
+declare i32 @printf(i8*, ...) nounwind
diff --git a/final/test/CodeGen/X86/stdarg.ll b/final/test/CodeGen/X86/stdarg.ll
new file mode 100644
index 00000000000..5728daf1ee1
--- /dev/null
+++ b/final/test/CodeGen/X86/stdarg.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK: testb %al, %al
+
+%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+
+define void @foo(i32 %x, ...) nounwind {
+entry:
+  %ap = alloca [1 x %struct.__va_list_tag], align 8; <[1 x %struct.__va_list_tag]*> [#uses=2]
+  %ap12 = bitcast [1 x %struct.__va_list_tag]* %ap to i8*; <i8*> [#uses=2]
+  call void @llvm.va_start(i8* %ap12)
+  %ap3 = getelementptr inbounds [1 x %struct.__va_list_tag]* %ap, i64 0, i64 0; <%struct.__va_list_tag*> [#uses=1]
+  call void @bar(%struct.__va_list_tag* %ap3) nounwind
+  call void @llvm.va_end(i8* %ap12)
+  ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @bar(%struct.__va_list_tag*)
+
+declare void @llvm.va_end(i8*) nounwind
diff --git a/final/test/CodeGen/X86/stdcall-notailcall.ll b/final/test/CodeGen/X86/stdcall-notailcall.ll
new file mode 100644
index 00000000000..8e33c30bf29
--- /dev/null
+++ b/final/test/CodeGen/X86/stdcall-notailcall.ll
@@ -0,0 +1,13 @@
+; RUN: llc -mtriple=i386-apple-darwin11 -O2 < %s | FileCheck %s
+
+%struct.I = type { i32 (...)** }
+define x86_stdcallcc void @bar(%struct.I* nocapture %this) ssp align 2 {
+; CHECK: bar:
+; CHECK-NOT: jmp
+; CHECK: ret $4
+entry:
+  tail call void @foo()
+  ret void
+}
+
+declare void @foo()
diff --git a/final/test/CodeGen/X86/stdcall.ll b/final/test/CodeGen/X86/stdcall.ll
new file mode 100644
index 00000000000..a7c2517e7db
--- /dev/null
+++ b/final/test/CodeGen/X86/stdcall.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s | FileCheck %s
+; PR5851
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-mingw32"
+
+%0 = type { void (...)* }
+
+@B = global %0 { void (...)* bitcast (void ()* @MyFunc to void (...)*) }, align 4
+; CHECK: _B:
+; CHECK: .long _MyFunc@0
+
+define internal x86_stdcallcc void @MyFunc() nounwind {
+entry:
+  ret void
+}
diff --git a/final/test/CodeGen/X86/store-empty-member.ll b/final/test/CodeGen/X86/store-empty-member.ll
new file mode 100644
index 00000000000..37f86c60fae
--- /dev/null
+++ b/final/test/CodeGen/X86/store-empty-member.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+; Don't crash on an empty struct member.
+
+; CHECK: movl  $2, 4(%esp)
+; CHECK: movl  $1, (%esp)
+
+%testType = type {i32, [0 x i32], i32}
+
+define void @foo() nounwind {
+  %1 = alloca %testType
+  volatile store %testType {i32 1, [0 x i32] zeroinitializer, i32 2}, %testType* %1
+  ret void
+}
diff --git a/final/test/CodeGen/X86/store-fp-constant.ll b/final/test/CodeGen/X86/store-fp-constant.ll
new file mode 100644
index 00000000000..206886bb608
--- /dev/null
+++ b/final/test/CodeGen/X86/store-fp-constant.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 | not grep rodata
+; RUN: llc < %s -march=x86 | not grep literal
+;
+; Check that no FP constants in this testcase ends up in the 
+; constant pool.
+
+@G = external global float              ; <float*> [#uses=1]
+
+declare void @extfloat(float)
+
+declare void @extdouble(double)
+
+define void @testfloatstore() {
+        call void @extfloat( float 0x40934999A0000000 )
+        call void @extdouble( double 0x409349A631F8A090 )
+        store float 0x402A064C20000000, float* @G
+        ret void
+}
+
diff --git a/final/test/CodeGen/X86/store-global-address.ll b/final/test/CodeGen/X86/store-global-address.ll
new file mode 100644
index 00000000000..c8d4cbceea3
--- /dev/null
+++ b/final/test/CodeGen/X86/store-global-address.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 | grep movl | count 1
+
+@dst = global i32 0             ; <i32*> [#uses=1]
+@ptr = global i32* null         ; <i32**> [#uses=1]
+
+define void @test() {
+        store i32* @dst, i32** @ptr
+        ret void
+}
+
diff --git a/final/test/CodeGen/X86/store-narrow.ll b/final/test/CodeGen/X86/store-narrow.ll
new file mode 100644
index 00000000000..0dd228eb145
--- /dev/null
+++ b/final/test/CodeGen/X86/store-narrow.ll
@@ -0,0 +1,168 @@
+; rdar://7860110
+; RUN: llc -asm-verbose=false < %s | FileCheck %s -check-prefix=X64
+; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s -check-prefix=X32
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+define void @test1(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i32* %a0, align 4
+  %B = and i32 %A, -256     ; 0xFFFFFF00
+  %C = zext i8 %a1 to i32
+  %D = or i32 %C, %B
+  store i32 %D, i32* %a0, align 4
+  ret void
+  
+; X64: test1:
+; X64: movb	%sil, (%rdi)
+
+; X32: test1:
+; X32: movb	8(%esp), %al
+; X32: movb	%al, (%{{.*}})
+}
+
+define void @test2(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i32* %a0, align 4
+  %B = and i32 %A, -65281    ; 0xFFFF00FF
+  %C = zext i8 %a1 to i32
+  %CS = shl i32 %C, 8
+  %D = or i32 %B, %CS
+  store i32 %D, i32* %a0, align 4
+  ret void
+; X64: test2:
+; X64: movb	%sil, 1(%rdi)
+
+; X32: test2:
+; X32: movb	8(%esp), %al
+; X32: movb	%al, 1(%{{.*}})
+}
+
+define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i32* %a0, align 4
+  %B = and i32 %A, -65536    ; 0xFFFF0000
+  %C = zext i16 %a1 to i32
+  %D = or i32 %B, %C
+  store i32 %D, i32* %a0, align 4
+  ret void
+; X64: test3:
+; X64: movw	%si, (%rdi)
+
+; X32: test3:
+; X32: movw	8(%esp), %ax
+; X32: movw	%ax, (%{{.*}})
+}
+
+define void @test4(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i32* %a0, align 4
+  %B = and i32 %A, 65535    ; 0x0000FFFF
+  %C = zext i16 %a1 to i32
+  %CS = shl i32 %C, 16
+  %D = or i32 %B, %CS
+  store i32 %D, i32* %a0, align 4
+  ret void
+; X64: test4:
+; X64: movw	%si, 2(%rdi)
+
+; X32: test4:
+; X32: movl	8(%esp), %eax
+; X32: movw	%ax, 2(%{{.*}})
+}
+
+define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i64* %a0, align 4
+  %B = and i64 %A, -4294901761    ; 0xFFFFFFFF0000FFFF
+  %C = zext i16 %a1 to i64
+  %CS = shl i64 %C, 16
+  %D = or i64 %B, %CS
+  store i64 %D, i64* %a0, align 4
+  ret void
+; X64: test5:
+; X64: movw	%si, 2(%rdi)
+
+; X32: test5:
+; X32: movzwl	8(%esp), %eax
+; X32: movw	%ax, 2(%{{.*}})
+}
+
+define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
+entry:
+  %A = load i64* %a0, align 4
+  %B = and i64 %A, -280375465082881    ; 0xFFFF00FFFFFFFFFF
+  %C = zext i8 %a1 to i64
+  %CS = shl i64 %C, 40
+  %D = or i64 %B, %CS
+  store i64 %D, i64* %a0, align 4
+  ret void
+; X64: test6:
+; X64: movb	%sil, 5(%rdi)
+
+
+; X32: test6:
+; X32: movb	8(%esp), %al
+; X32: movb	%al, 5(%{{.*}})
+}
+
+define i32 @test7(i64* nocapture %a0, i8 zeroext %a1, i32* %P2) nounwind {
+entry:
+  %OtherLoad = load i32 *%P2
+  %A = load i64* %a0, align 4
+  %B = and i64 %A, -280375465082881    ; 0xFFFF00FFFFFFFFFF
+  %C = zext i8 %a1 to i64
+  %CS = shl i64 %C, 40
+  %D = or i64 %B, %CS
+  store i64 %D, i64* %a0, align 4
+  ret i32 %OtherLoad
+; X64: test7:
+; X64: movb	%sil, 5(%rdi)
+
+
+; X32: test7:
+; X32: movb	8(%esp), %cl
+; X32: movb	%cl, 5(%{{.*}})
+}
+
+; PR7833
+
+@g_16 = internal global i32 -1
+
+; X64: test8:
+; X64-NEXT: movl _g_16(%rip), %eax
+; X64-NEXT: movl $0, _g_16(%rip)
+; X64-NEXT: orl  $1, %eax
+; X64-NEXT: movl %eax, _g_16(%rip)
+; X64-NEXT: ret
+define void @test8() nounwind {
+  %tmp = load i32* @g_16
+  store i32 0, i32* @g_16
+  %or = or i32 %tmp, 1
+  store i32 %or, i32* @g_16
+  ret void
+}
+
+; X64: test9:
+; X64-NEXT: orb $1, _g_16(%rip)
+; X64-NEXT: ret
+define void @test9() nounwind {
+  %tmp = load i32* @g_16
+  %or = or i32 %tmp, 1
+  store i32 %or, i32* @g_16
+  ret void
+}
+
+; rdar://8494845 + PR8244
+; X64: test10:
+; X64-NEXT: movsbl	(%rdi), %eax
+; X64-NEXT: shrl	$8, %eax
+; X64-NEXT: ret
+define i8 @test10(i8* %P) nounwind ssp {
+entry:
+  %tmp = load i8* %P, align 1
+  %conv = sext i8 %tmp to i32
+  %shr3 = lshr i32 %conv, 8
+  %conv2 = trunc i32 %shr3 to i8
+  ret i8 %conv2
+}
diff --git a/final/test/CodeGen/X86/store_op_load_fold.ll b/final/test/CodeGen/X86/store_op_load_fold.ll
new file mode 100644
index 00000000000..6e47eb397d1
--- /dev/null
+++ b/final/test/CodeGen/X86/store_op_load_fold.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 | not grep mov
+;
+; Test the add and load are folded into the store instruction.
+
+@X = internal global i16 0              ; <i16*> [#uses=2]
+
+define void @foo() nounwind {
+        %tmp.0 = load i16* @X           ; <i16> [#uses=1]
+        %tmp.3 = add i16 %tmp.0, 329            ; <i16> [#uses=1]
+        store i16 %tmp.3, i16* @X
+        ret void
+}
+
diff --git a/final/test/CodeGen/X86/store_op_load_fold2.ll b/final/test/CodeGen/X86/store_op_load_fold2.ll
new file mode 100644
index 00000000000..11686227ab9
--- /dev/null
+++ b/final/test/CodeGen/X86/store_op_load_fold2.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=i686-linux -x86-asm-syntax=intel | FileCheck %s
+
+target datalayout = "e-p:32:32"
+        %struct.Macroblock = type { i32, i32, i32, i32, i32, [8 x i32], %struct.Macroblock*, %struct.Macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], [16 x i8], [16 x i8], i32, i64, [4 x i32], [4 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, double, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define internal fastcc i32 @dct_chroma(i32 %uv, i32 %cr_cbp) nounwind {
+cond_true2732.preheader:                ; preds = %entry
+        %tmp2666 = getelementptr %struct.Macroblock* null, i32 0, i32 13                ; <i64*> [#uses=2]
+        %tmp2674 = trunc i32 0 to i8            ; <i8> [#uses=1]
+        %tmp2667.us.us = load i64* %tmp2666             ; <i64> [#uses=1]
+        %tmp2670.us.us = load i64* null         ; <i64> [#uses=1]
+        %shift.upgrd.1 = zext i8 %tmp2674 to i64                ; <i64> [#uses=1]
+        %tmp2675.us.us = shl i64 %tmp2670.us.us, %shift.upgrd.1         ; <i64> [#uses=1]
+        %tmp2675not.us.us = xor i64 %tmp2675.us.us, -1          ; <i64> [#uses=1]
+        %tmp2676.us.us = and i64 %tmp2667.us.us, %tmp2675not.us.us              ; <i64> [#uses=1]
+        store i64 %tmp2676.us.us, i64* %tmp2666
+        ret i32 0
+
+; CHECK: 	and	{{E..}}, DWORD PTR [360]
+; CHECK:	and	DWORD PTR [356], {{E..}}
+; CHECK:	mov	DWORD PTR [360], {{E..}}
+
+}
+
diff --git a/final/test/CodeGen/X86/storetrunc-fp.ll b/final/test/CodeGen/X86/storetrunc-fp.ll
new file mode 100644
index 00000000000..03ad093ba86
--- /dev/null
+++ b/final/test/CodeGen/X86/storetrunc-fp.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 | not grep flds
+
+define void @foo(x86_fp80 %a, x86_fp80 %b, float* %fp) {
+	%c = fadd x86_fp80 %a, %b
+	%d = fptrunc x86_fp80 %c to float
+	store float %d, float* %fp
+	ret void
+}
diff --git a/final/test/CodeGen/X86/stride-nine-with-base-reg.ll b/final/test/CodeGen/X86/stride-nine-with-base-reg.ll
new file mode 100644
index 00000000000..ddf059c675d
--- /dev/null
+++ b/final/test/CodeGen/X86/stride-nine-with-base-reg.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux               | FileCheck %s
+; CHECK-NOT:     lea
+
+; P should be sunk into the loop and folded into the address mode. There
+; shouldn't be any lea instructions inside the loop.
+
+@B = external global [1000 x i8], align 32
+@A = external global [1000 x i8], align 32
+@P = external global [1000 x i8], align 32
+@Q = external global [1000 x i8], align 32
+
+define void @foo(i32 %m, i32 %p) nounwind {
+entry:
+	%tmp1 = icmp sgt i32 %m, 0
+	br i1 %tmp1, label %bb, label %return
+
+bb:
+	%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
+	%tmp2 = getelementptr [1000 x i8]* @B, i32 0, i32 %i.019.0
+	%tmp3 = load i8* %tmp2, align 4
+	%tmp4 = mul i8 %tmp3, 2
+	%tmp5 = getelementptr [1000 x i8]* @A, i32 0, i32 %i.019.0
+	store i8 %tmp4, i8* %tmp5, align 4
+	%tmp8 = mul i32 %i.019.0, 9
+        %tmp0 = add i32 %tmp8, %p
+	%tmp10 = getelementptr [1000 x i8]* @P, i32 0, i32 %tmp0
+	store i8 17, i8* %tmp10, align 4
+	%tmp11 = getelementptr [1000 x i8]* @Q, i32 0, i32 %tmp0
+	store i8 19, i8* %tmp11, align 4
+	%indvar.next = add i32 %i.019.0, 1
+	%exitcond = icmp eq i32 %indvar.next, %m
+	br i1 %exitcond, label %return, label %bb
+
+return:
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/stride-reuse.ll b/final/test/CodeGen/X86/stride-reuse.ll
new file mode 100644
index 00000000000..1251a240055
--- /dev/null
+++ b/final/test/CodeGen/X86/stride-reuse.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86            | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; CHECK-NOT:     lea
+
+@B = external global [1000 x float], align 32
+@A = external global [1000 x float], align 32
+@P = external global [1000 x i32], align 32
+
+define void @foo(i32 %m) nounwind {
+entry:
+	%tmp1 = icmp sgt i32 %m, 0
+	br i1 %tmp1, label %bb, label %return
+
+bb:
+	%i.019.0 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ]
+	%tmp2 = getelementptr [1000 x float]* @B, i32 0, i32 %i.019.0
+	%tmp3 = load float* %tmp2, align 4
+	%tmp4 = fmul float %tmp3, 2.000000e+00
+	%tmp5 = getelementptr [1000 x float]* @A, i32 0, i32 %i.019.0
+	store float %tmp4, float* %tmp5, align 4
+	%tmp8 = shl i32 %i.019.0, 1
+	%tmp9 = add i32 %tmp8, 64
+	%tmp10 = getelementptr [1000 x i32]* @P, i32 0, i32 %i.019.0
+	store i32 %tmp9, i32* %tmp10, align 4
+	%indvar.next = add i32 %i.019.0, 1
+	%exitcond = icmp eq i32 %indvar.next, %m
+	br i1 %exitcond, label %return, label %bb
+
+return:
+	ret void
+}
diff --git a/final/test/CodeGen/X86/sub-with-overflow.ll b/final/test/CodeGen/X86/sub-with-overflow.ll
new file mode 100644
index 00000000000..19f4079abb5
--- /dev/null
+++ b/final/test/CodeGen/X86/sub-with-overflow.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=x86 | grep {jo} | count 1
+; RUN: llc < %s -march=x86 | grep {jb} | count 1
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func1(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %overflow, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+overflow:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+define i1 @func2(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+carry:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32)
diff --git a/final/test/CodeGen/X86/subreg-to-reg-0.ll b/final/test/CodeGen/X86/subreg-to-reg-0.ll
new file mode 100644
index 00000000000..d718c85a1d1
--- /dev/null
+++ b/final/test/CodeGen/X86/subreg-to-reg-0.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
+
+; Do eliminate the zero-extension instruction and rely on
+; x86-64's implicit zero-extension!
+
+define i64 @foo(i32* %p) nounwind {
+  %t = load i32* %p
+  %n = add i32 %t, 1
+  %z = zext i32 %n to i64
+  ret i64 %z
+}
diff --git a/final/test/CodeGen/X86/subreg-to-reg-1.ll b/final/test/CodeGen/X86/subreg-to-reg-1.ll
new file mode 100644
index 00000000000..a297728aee8
--- /dev/null
+++ b/final/test/CodeGen/X86/subreg-to-reg-1.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 | grep {leal	.*), %e.\*} | count 1
+
+; Don't eliminate or coalesce away the explicit zero-extension!
+; This is currently using an leal because of a 3-addressification detail,
+; though this isn't necessary; The point of this test is to make sure
+; a 32-bit add is used.
+
+define i64 @foo(i64 %a) nounwind {
+  %b = add i64 %a, 4294967295
+  %c = and i64 %b, 4294967295
+  %d = add i64 %c, 1
+  ret i64 %d
+}
diff --git a/final/test/CodeGen/X86/subreg-to-reg-2.ll b/final/test/CodeGen/X86/subreg-to-reg-2.ll
new file mode 100644
index 00000000000..49d2e88d2c8
--- /dev/null
+++ b/final/test/CodeGen/X86/subreg-to-reg-2.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl
+; rdar://6707985
+
+	%XXOO = type { %"struct.XXC::XXCC", i8*, %"struct.XXC::XXOO::$_71" }
+	%XXValue = type opaque
+	%"struct.XXC::ArrayStorage" = type { i32, i32, i32, i8*, i8*, [1 x %XXValue*] }
+	%"struct.XXC::XXArray" = type { %XXOO, i32, %"struct.XXC::ArrayStorage"* }
+	%"struct.XXC::XXCC" = type { i32 (...)**, i8* }
+	%"struct.XXC::XXOO::$_71" = type { [2 x %XXValue*] }
+
+define internal fastcc %XXValue* @t(i64* %out, %"struct.XXC::ArrayStorage"* %tmp9) nounwind {
+prologue:
+	%array = load %XXValue** inttoptr (i64 11111111 to %XXValue**)		; <%XXValue*> [#uses=0]
+	%index = load %XXValue** inttoptr (i64 22222222 to %XXValue**)		; <%XXValue*> [#uses=1]
+	%tmp = ptrtoint %XXValue* %index to i64		; <i64> [#uses=2]
+	store i64 %tmp, i64* %out
+	%tmp6 = trunc i64 %tmp to i32		; <i32> [#uses=1]
+	br label %bb5
+
+bb5:		; preds = %prologue
+	%tmp10 = zext i32 %tmp6 to i64		; <i64> [#uses=1]
+	%tmp11 = getelementptr %"struct.XXC::ArrayStorage"* %tmp9, i64 0, i32 5, i64 %tmp10		; <%XXValue**> [#uses=1]
+	%tmp12 = load %XXValue** %tmp11, align 8		; <%XXValue*> [#uses=1]
+	ret %XXValue* %tmp12
+}
diff --git a/final/test/CodeGen/X86/subreg-to-reg-3.ll b/final/test/CodeGen/X86/subreg-to-reg-3.ll
new file mode 100644
index 00000000000..931ae758ac5
--- /dev/null
+++ b/final/test/CodeGen/X86/subreg-to-reg-3.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep imull
+
+; Don't eliminate or coalesce away the explicit zero-extension!
+
+define i64 @foo(i64 %a) {
+  %b = mul i64 %a, 7823
+  %c = and i64 %b, 4294967295
+  %d = add i64 %c, 1
+  ret i64 %d
+}
diff --git a/final/test/CodeGen/X86/subreg-to-reg-4.ll b/final/test/CodeGen/X86/subreg-to-reg-4.ll
new file mode 100644
index 00000000000..0ea5541c89d
--- /dev/null
+++ b/final/test/CodeGen/X86/subreg-to-reg-4.ll
@@ -0,0 +1,135 @@
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: not grep leaq %t
+; RUN: not grep incq %t
+; RUN: not grep decq %t
+; RUN: not grep negq %t
+; RUN: not grep addq %t
+; RUN: not grep subq %t
+; RUN: not grep {movl	%} %t
+
+; Utilize implicit zero-extension on x86-64 to eliminate explicit
+; zero-extensions. Shrink 64-bit adds to 32-bit when the high
+; 32-bits will be zeroed.
+
+define void @bar(i64 %x, i64 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = add i64 %x, %y
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @easy(i32 %x, i32 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = add i32 %x, %y
+        %tn = zext i32 %t0 to i64
+	%t1 = and i64 %tn, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @cola(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
+entry:
+        %p = load i64* %x
+	%t0 = add i64 %p, %y
+	%t1 = and i64 %t0, 4294967295
+        %t2 = xor i64 %t1, %u
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @yaks(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
+entry:
+        %p = load i64* %x
+	%t0 = add i64 %p, %y
+        %t1 = xor i64 %t0, %u
+	%t2 = and i64 %t1, 4294967295
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @foo(i64 *%x, i64 *%y, i64* %z) nounwind readnone {
+entry:
+        %a = load i64* %x
+        %b = load i64* %y
+	%t0 = add i64 %a, %b
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @avo(i64 %x, i64* %z, i64 %u) nounwind readnone {
+entry:
+	%t0 = add i64 %x, 734847
+	%t1 = and i64 %t0, 4294967295
+        %t2 = xor i64 %t1, %u
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @phe(i64 %x, i64* %z, i64 %u) nounwind readnone {
+entry:
+	%t0 = add i64 %x, 734847
+        %t1 = xor i64 %t0, %u
+	%t2 = and i64 %t1, 4294967295
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @oze(i64 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = add i64 %y, 1
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+
+define void @sbar(i64 %x, i64 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = sub i64 %x, %y
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @seasy(i32 %x, i32 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = sub i32 %x, %y
+        %tn = zext i32 %t0 to i64
+	%t1 = and i64 %tn, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @scola(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
+entry:
+        %p = load i64* %x
+	%t0 = sub i64 %p, %y
+	%t1 = and i64 %t0, 4294967295
+        %t2 = xor i64 %t1, %u
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @syaks(i64 *%x, i64 %y, i64* %z, i64 %u) nounwind readnone {
+entry:
+        %p = load i64* %x
+	%t0 = sub i64 %p, %y
+        %t1 = xor i64 %t0, %u
+	%t2 = and i64 %t1, 4294967295
+        store i64 %t2, i64* %z
+	ret void
+}
+define void @sfoo(i64 *%x, i64 *%y, i64* %z) nounwind readnone {
+entry:
+        %a = load i64* %x
+        %b = load i64* %y
+	%t0 = sub i64 %a, %b
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @swya(i64 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = sub i64 0, %y
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
+define void @soze(i64 %y, i64* %z) nounwind readnone {
+entry:
+	%t0 = sub i64 %y, 1
+	%t1 = and i64 %t0, 4294967295
+        store i64 %t1, i64* %z
+	ret void
+}
diff --git a/final/test/CodeGen/X86/subreg-to-reg-6.ll b/final/test/CodeGen/X86/subreg-to-reg-6.ll
new file mode 100644
index 00000000000..76430cd783e
--- /dev/null
+++ b/final/test/CodeGen/X86/subreg-to-reg-6.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86-64
+
+define i64 @foo() nounwind {
+entry:
+	%t0 = load i32* null, align 8
+	switch i32 %t0, label %bb65 [
+		i32 16, label %bb
+		i32 12, label %bb56
+	]
+
+bb:
+	br label %bb65
+
+bb56:
+	unreachable
+
+bb65:
+	%a = phi i64 [ 0, %bb ], [ 0, %entry ]
+	tail call void asm "", "{cx}"(i64 %a) nounwind
+	%t15 = and i64 %a, 4294967295
+	ret i64 %t15
+}
+
+define i64 @bar(i64 %t0) nounwind {
+	call void asm "", "{cx}"(i64 0) nounwind
+	%t1 = sub i64 0, %t0
+	%t2 = and i64 %t1, 4294967295
+	ret i64 %t2
+}
diff --git a/final/test/CodeGen/X86/switch-bt.ll b/final/test/CodeGen/X86/switch-bt.ll
new file mode 100644
index 00000000000..9f491d452fa
--- /dev/null
+++ b/final/test/CodeGen/X86/switch-bt.ll
@@ -0,0 +1,81 @@
+; RUN: llc -march=x86-64 -asm-verbose=false < %s | FileCheck %s
+
+; This switch should use bit tests, and the third bit test case is just
+; testing for one possible value, so it doesn't need a bt.
+
+;      CHECK: movabsq $2305843009482129440, %r
+; CHECK-NEXT: btq %rax, %r
+; CHECK-NEXT: jb
+; CHECK-NEXT: movl  $671088640, %e
+; CHECK-NEXT: btq %rax, %r
+; CHECK-NEXT: jb
+; CHECK-NEXT: testq %rax, %r
+; CHECK-NEXT: j
+
+define void @test(i8* %l) nounwind {
+entry:
+  %l.addr = alloca i8*, align 8                   ; <i8**> [#uses=2]
+  store i8* %l, i8** %l.addr
+  %tmp = load i8** %l.addr                        ; <i8*> [#uses=1]
+  %tmp1 = load i8* %tmp                           ; <i8> [#uses=1]
+  %conv = sext i8 %tmp1 to i32                    ; <i32> [#uses=1]
+  switch i32 %conv, label %sw.default [
+    i32 62, label %sw.bb
+    i32 60, label %sw.bb
+    i32 38, label %sw.bb2
+    i32 94, label %sw.bb2
+    i32 61, label %sw.bb2
+    i32 33, label %sw.bb4
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry
+  call void @foo(i32 0)
+  br label %sw.epilog
+
+sw.bb2:                                           ; preds = %entry, %entry, %entry
+  call void @foo(i32 1)
+  br label %sw.epilog
+
+sw.bb4:                                           ; preds = %entry
+  call void @foo(i32 3)
+  br label %sw.epilog
+
+sw.default:                                       ; preds = %entry
+  call void @foo(i32 97)
+  br label %sw.epilog
+
+sw.epilog:                                        ; preds = %sw.default, %sw.bb4, %sw.bb2, %sw.bb
+  ret void
+}
+
+declare void @foo(i32)
+
+; Don't zero extend the test operands to pointer type if it can be avoided.
+; rdar://8781238
+define void @test2(i32 %x) nounwind ssp {
+; CHECK: test2:
+; CHECK: cmpl $6
+; CHECK: ja
+
+; CHECK-NEXT: movl $91
+; CHECK-NOT: movl
+; CHECK-NEXT: btl
+; CHECK-NEXT: jb
+entry:
+  switch i32 %x, label %if.end [
+    i32 6, label %if.then
+    i32 4, label %if.then
+    i32 3, label %if.then
+    i32 1, label %if.then
+    i32 0, label %if.then
+  ]
+
+if.then:                                          ; preds = %entry, %entry, %entry, %entry, %entry
+  tail call void @bar() nounwind
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+}
+
+declare void @bar()
diff --git a/final/test/CodeGen/X86/switch-crit-edge-constant.ll b/final/test/CodeGen/X86/switch-crit-edge-constant.ll
new file mode 100644
index 00000000000..1f2ab0dbcbe
--- /dev/null
+++ b/final/test/CodeGen/X86/switch-crit-edge-constant.ll
@@ -0,0 +1,52 @@
+; PR925
+; RUN: llc < %s -march=x86 | \
+; RUN:   grep mov.*str1 | count 1
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8.7.2"
+@str1 = internal constant [5 x i8] c"bonk\00"		; <[5 x i8]*> [#uses=1]
+@str2 = internal constant [5 x i8] c"bork\00"		; <[5 x i8]*> [#uses=1]
+@str = internal constant [8 x i8] c"perfwap\00"		; <[8 x i8]*> [#uses=1]
+
+define void @foo(i32 %C) {
+entry:
+	switch i32 %C, label %bb2 [
+		 i32 1, label %blahaha
+		 i32 2, label %blahaha
+		 i32 3, label %blahaha
+		 i32 4, label %blahaha
+		 i32 5, label %blahaha
+		 i32 6, label %blahaha
+		 i32 7, label %blahaha
+		 i32 8, label %blahaha
+		 i32 9, label %blahaha
+		 i32 10, label %blahaha
+	]
+
+bb2:		; preds = %entry
+	%tmp5 = and i32 %C, 123		; <i32> [#uses=1]
+	%tmp = icmp eq i32 %tmp5, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %blahaha, label %cond_true
+
+cond_true:		; preds = %bb2
+	br label %blahaha
+
+blahaha:		; preds = %cond_true, %bb2, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
+	%s.0 = phi i8* [ getelementptr ([8 x i8]* @str, i32 0, i64 0), %cond_true ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str1, i32 0, i64 0), %entry ], [ getelementptr ([5 x i8]* @str2, i32 0, i64 0), %bb2 ]		; <i8*> [#uses=13]
+	%tmp8 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp10 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp12 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp14 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp16 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp18 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp20 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp22 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp24 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp26 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp28 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp30 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	%tmp32 = tail call i32 (i8*, ...)* @printf( i8* %s.0 )		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @printf(i8*, ...)
diff --git a/final/test/CodeGen/X86/switch-or.ll b/final/test/CodeGen/X86/switch-or.ll
new file mode 100644
index 00000000000..75832c7d304
--- /dev/null
+++ b/final/test/CodeGen/X86/switch-or.ll
@@ -0,0 +1,22 @@
+; RUN: llc -march=x86 -asm-verbose=false < %s | FileCheck %s
+
+; Check that merging switch cases that differ in one bit works.
+; CHECK: orl $2
+; CHECK-NEXT: cmpl $6
+
+define void @foo(i32 %variable) nounwind {
+entry:
+  switch i32 %variable, label %if.end [
+    i32 4, label %if.then
+    i32 6, label %if.then
+  ]
+
+if.then:
+  %call = tail call i32 (...)* @bar() nounwind
+  ret void
+
+if.end:
+  ret void
+}
+
+declare i32 @bar(...) nounwind
diff --git a/final/test/CodeGen/X86/switch-zextload.ll b/final/test/CodeGen/X86/switch-zextload.ll
new file mode 100644
index 00000000000..55425bc7da5
--- /dev/null
+++ b/final/test/CodeGen/X86/switch-zextload.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 | grep mov | count 1
+
+; Do zextload, instead of a load and a separate zext.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+	%struct.move_s = type { i32, i32, i32, i32, i32, i32 }
+	%struct.node_t = type { i8, i8, i8, i8, i32, i32, %struct.node_t**, %struct.node_t*, %struct.move_s }
+
+define fastcc void @set_proof_and_disproof_numbers(%struct.node_t* nocapture %node) nounwind {
+entry:
+	%0 = load i8* null, align 1		; <i8> [#uses=1]
+	switch i8 %0, label %return [
+		i8 2, label %bb31
+		i8 0, label %bb80
+		i8 1, label %bb82
+		i8 3, label %bb84
+	]
+
+bb31:		; preds = %entry
+	unreachable
+
+bb80:		; preds = %entry
+	ret void
+
+bb82:		; preds = %entry
+	ret void
+
+bb84:		; preds = %entry
+	ret void
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/CodeGen/X86/swizzle.ll b/final/test/CodeGen/X86/swizzle.ll
new file mode 100644
index 00000000000..23e0c2453d6
--- /dev/null
+++ b/final/test/CodeGen/X86/swizzle.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movlps
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movups
+; rdar://6523650
+
+	%struct.vector4_t = type { <4 x float> }
+
+define void @swizzle(i8* nocapture %a, %struct.vector4_t* nocapture %b, %struct.vector4_t* nocapture %c) nounwind {
+entry:
+	%0 = getelementptr %struct.vector4_t* %b, i32 0, i32 0		; <<4 x float>*> [#uses=2]
+	%1 = load <4 x float>* %0, align 4		; <<4 x float>> [#uses=1]
+	%tmp.i = bitcast i8* %a to double*		; <double*> [#uses=1]
+	%tmp1.i = load double* %tmp.i		; <double> [#uses=1]
+	%2 = insertelement <2 x double> undef, double %tmp1.i, i32 0		; <<2 x double>> [#uses=1]
+	%tmp2.i = bitcast <2 x double> %2 to <4 x float>		; <<4 x float>> [#uses=1]
+	%3 = shufflevector <4 x float> %1, <4 x float> %tmp2.i, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
+	store <4 x float> %3, <4 x float>* %0, align 4
+	ret void
+}
diff --git a/final/test/CodeGen/X86/tail-opts.ll b/final/test/CodeGen/X86/tail-opts.ll
new file mode 100644
index 00000000000..9291695f4d6
--- /dev/null
+++ b/final/test/CodeGen/X86/tail-opts.ll
@@ -0,0 +1,428 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false -post-RA-scheduler=true | FileCheck %s
+
+declare void @bar(i32)
+declare void @car(i32)
+declare void @dar(i32)
+declare void @ear(i32)
+declare void @far(i32)
+declare i1 @qux()
+
+@GHJK = global i32 0
+@HABC = global i32 0
+
+; BranchFolding should tail-merge the stores since they all precede
+; direct branches to the same place.
+
+; CHECK: tail_merge_me:
+; CHECK-NOT:  GHJK
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: movl $1, HABC(%rip)
+; CHECK-NOT:  GHJK
+
+define void @tail_merge_me() nounwind {
+entry:
+  %a = call i1 @qux()
+  br i1 %a, label %A, label %next
+next:
+  %b = call i1 @qux()
+  br i1 %b, label %B, label %C
+
+A:
+  call void @bar(i32 0)
+  store i32 0, i32* @GHJK
+  br label %M
+
+B:
+  call void @car(i32 1)
+  store i32 0, i32* @GHJK
+  br label %M
+
+C:
+  call void @dar(i32 2)
+  store i32 0, i32* @GHJK
+  br label %M
+
+M:
+  store i32 1, i32* @HABC
+  %c = call i1 @qux()
+  br i1 %c, label %return, label %altret
+
+return:
+  call void @ear(i32 1000)
+  ret void
+altret:
+  call void @far(i32 1001)
+  ret void
+}
+
+declare i8* @choose(i8*, i8*)
+
+; BranchFolding should tail-duplicate the indirect jump to avoid
+; redundant branching.
+
+; CHECK: tail_duplicate_me:
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%r
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%r
+; CHECK:      movl $0, GHJK(%rip)
+; CHECK-NEXT: jmpq *%r
+
+define void @tail_duplicate_me() nounwind {
+entry:
+  %a = call i1 @qux()
+  %c = call i8* @choose(i8* blockaddress(@tail_duplicate_me, %return),
+                        i8* blockaddress(@tail_duplicate_me, %altret))
+  br i1 %a, label %A, label %next
+next:
+  %b = call i1 @qux()
+  br i1 %b, label %B, label %C
+
+A:
+  call void @bar(i32 0)
+  store i32 0, i32* @GHJK
+  br label %M
+
+B:
+  call void @car(i32 1)
+  store i32 0, i32* @GHJK
+  br label %M
+
+C:
+  call void @dar(i32 2)
+  store i32 0, i32* @GHJK
+  br label %M
+
+M:
+  indirectbr i8* %c, [label %return, label %altret]
+
+return:
+  call void @ear(i32 1000)
+  ret void
+altret:
+  call void @far(i32 1001)
+  ret void
+}
+
+; BranchFolding shouldn't try to merge the tails of two blocks
+; with only a branch in common, regardless of the fallthrough situation.
+
+; CHECK: dont_merge_oddly:
+; CHECK-NOT:   ret
+; CHECK:        ucomiss %xmm1, %xmm2
+; CHECK-NEXT:   jbe .LBB2_3
+; CHECK-NEXT:   ucomiss %xmm0, %xmm1
+; CHECK-NEXT:   ja .LBB2_4
+; CHECK-NEXT: .LBB2_2:
+; CHECK-NEXT:   movb $1, %al
+; CHECK-NEXT:   ret
+; CHECK-NEXT: .LBB2_3:
+; CHECK-NEXT:   ucomiss %xmm0, %xmm2
+; CHECK-NEXT:   jbe .LBB2_2
+; CHECK-NEXT: .LBB2_4:
+; CHECK-NEXT:   xorb %al, %al
+; CHECK-NEXT:   ret
+
+define i1 @dont_merge_oddly(float* %result) nounwind {
+entry:
+  %tmp4 = getelementptr float* %result, i32 2
+  %tmp5 = load float* %tmp4, align 4
+  %tmp7 = getelementptr float* %result, i32 4
+  %tmp8 = load float* %tmp7, align 4
+  %tmp10 = getelementptr float* %result, i32 6
+  %tmp11 = load float* %tmp10, align 4
+  %tmp12 = fcmp olt float %tmp8, %tmp11
+  br i1 %tmp12, label %bb, label %bb21
+
+bb:
+  %tmp23469 = fcmp olt float %tmp5, %tmp8
+  br i1 %tmp23469, label %bb26, label %bb30
+
+bb21:
+  %tmp23 = fcmp olt float %tmp5, %tmp11
+  br i1 %tmp23, label %bb26, label %bb30
+
+bb26:
+  ret i1 0
+
+bb30:
+  ret i1 1
+}
+
+; Do any-size tail-merging when two candidate blocks will both require
+; an unconditional jump to complete a two-way conditional branch.
+
+; CHECK: c_expand_expr_stmt:
+; CHECK:        jmp .LBB3_11
+; CHECK-NEXT: .LBB3_9:
+; CHECK-NEXT:   movq 8(%rax), %rax
+; CHECK-NEXT:   xorb %dl, %dl
+; CHECK-NEXT:   movb 16(%rax), %al
+; CHECK-NEXT:   cmpb $16, %al
+; CHECK-NEXT:   je .LBB3_11
+; CHECK-NEXT:   cmpb $23, %al
+; CHECK-NEXT:   jne .LBB3_14
+; CHECK-NEXT: .LBB3_11:
+
+%0 = type { %struct.rtx_def* }
+%struct.lang_decl = type opaque
+%struct.rtx_def = type { i16, i8, i8, [1 x %union.rtunion] }
+%struct.tree_decl = type { [24 x i8], i8*, i32, %union.tree_node*, i32, i8, i8, i8, i8, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %union.tree_node*, %struct.rtx_def*, %union..2anon, %0, %union.tree_node*, %struct.lang_decl* }
+%union..2anon = type { i32 }
+%union.rtunion = type { i8* }
+%union.tree_node = type { %struct.tree_decl }
+
+define fastcc void @c_expand_expr_stmt(%union.tree_node* %expr) nounwind {
+entry:
+  %tmp4 = load i8* null, align 8                  ; <i8> [#uses=3]
+  switch i8 %tmp4, label %bb3 [
+    i8 18, label %bb
+  ]
+
+bb:                                               ; preds = %entry
+  switch i32 undef, label %bb1 [
+    i32 0, label %bb2.i
+    i32 37, label %bb.i
+  ]
+
+bb.i:                                             ; preds = %bb
+  switch i32 undef, label %bb1 [
+    i32 0, label %lvalue_p.exit
+  ]
+
+bb2.i:                                            ; preds = %bb
+  br label %bb3
+
+lvalue_p.exit:                                    ; preds = %bb.i
+  %tmp21 = load %union.tree_node** null, align 8  ; <%union.tree_node*> [#uses=3]
+  %tmp22 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 0 ; <i8*> [#uses=1]
+  %tmp23 = load i8* %tmp22, align 8               ; <i8> [#uses=1]
+  %tmp24 = zext i8 %tmp23 to i32                  ; <i32> [#uses=1]
+  switch i32 %tmp24, label %lvalue_p.exit4 [
+    i32 0, label %bb2.i3
+    i32 2, label %bb.i1
+  ]
+
+bb.i1:                                            ; preds = %lvalue_p.exit
+  %tmp25 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 2 ; <i32*> [#uses=1]
+  %tmp26 = bitcast i32* %tmp25 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp27 = load %union.tree_node** %tmp26, align 8 ; <%union.tree_node*> [#uses=2]
+  %tmp28 = getelementptr inbounds %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp29 = load i8* %tmp28, align 8               ; <i8> [#uses=1]
+  %tmp30 = zext i8 %tmp29 to i32                  ; <i32> [#uses=1]
+  switch i32 %tmp30, label %lvalue_p.exit4 [
+    i32 0, label %bb2.i.i2
+    i32 2, label %bb.i.i
+  ]
+
+bb.i.i:                                           ; preds = %bb.i1
+  %tmp34 = tail call fastcc i32 @lvalue_p(%union.tree_node* null) nounwind ; <i32> [#uses=1]
+  %phitmp = icmp ne i32 %tmp34, 0                 ; <i1> [#uses=1]
+  br label %lvalue_p.exit4
+
+bb2.i.i2:                                         ; preds = %bb.i1
+  %tmp35 = getelementptr inbounds %union.tree_node* %tmp27, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
+  %tmp36 = bitcast i8* %tmp35 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp37 = load %union.tree_node** %tmp36, align 8 ; <%union.tree_node*> [#uses=1]
+  %tmp38 = getelementptr inbounds %union.tree_node* %tmp37, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp39 = load i8* %tmp38, align 8               ; <i8> [#uses=1]
+  switch i8 %tmp39, label %bb2 [
+    i8 16, label %lvalue_p.exit4
+    i8 23, label %lvalue_p.exit4
+  ]
+
+bb2.i3:                                           ; preds = %lvalue_p.exit
+  %tmp40 = getelementptr inbounds %union.tree_node* %tmp21, i64 0, i32 0, i32 0, i64 8 ; <i8*> [#uses=1]
+  %tmp41 = bitcast i8* %tmp40 to %union.tree_node** ; <%union.tree_node**> [#uses=1]
+  %tmp42 = load %union.tree_node** %tmp41, align 8 ; <%union.tree_node*> [#uses=1]
+  %tmp43 = getelementptr inbounds %union.tree_node* %tmp42, i64 0, i32 0, i32 0, i64 16 ; <i8*> [#uses=1]
+  %tmp44 = load i8* %tmp43, align 8               ; <i8> [#uses=1]
+  switch i8 %tmp44, label %bb2 [
+    i8 16, label %lvalue_p.exit4
+    i8 23, label %lvalue_p.exit4
+  ]
+
+lvalue_p.exit4:                                   ; preds = %bb2.i3, %bb2.i3, %bb2.i.i2, %bb2.i.i2, %bb.i.i, %bb.i1, %lvalue_p.exit
+  %tmp45 = phi i1 [ %phitmp, %bb.i.i ], [ false, %bb2.i.i2 ], [ false, %bb2.i.i2 ], [ false, %bb.i1 ], [ false, %bb2.i3 ], [ false, %bb2.i3 ], [ false, %lvalue_p.exit ] ; <i1> [#uses=1]
+  %tmp46 = icmp eq i8 %tmp4, 0                    ; <i1> [#uses=1]
+  %or.cond = or i1 %tmp45, %tmp46                 ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb2, label %bb3
+
+bb1:                                              ; preds = %bb2.i.i, %bb.i, %bb
+  %.old = icmp eq i8 %tmp4, 23                    ; <i1> [#uses=1]
+  br i1 %.old, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1, %lvalue_p.exit4, %bb2.i3, %bb2.i.i2
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb1, %lvalue_p.exit4, %bb2.i, %entry
+  %expr_addr.0 = phi %union.tree_node* [ null, %bb2 ], [ %expr, %bb2.i ], [ %expr, %entry ], [ %expr, %bb1 ], [ %expr, %lvalue_p.exit4 ] ; <%union.tree_node*> [#uses=0]
+  unreachable
+}
+
+declare fastcc i32 @lvalue_p(%union.tree_node* nocapture) nounwind readonly
+
+declare fastcc %union.tree_node* @default_conversion(%union.tree_node*) nounwind
+
+
+; If one tail merging candidate falls through into the other,
+; tail merging is likely profitable regardless of how few
+; instructions are involved. This function should have only
+; one ret instruction.
+
+; CHECK: foo:
+; CHECK:        callq func
+; CHECK-NEXT: .LBB4_2:
+; CHECK-NEXT:   popq
+; CHECK-NEXT:   ret
+
+define void @foo(i1* %V) nounwind {
+entry:
+  %t0 = icmp eq i1* %V, null
+  br i1 %t0, label %return, label %bb
+
+bb:
+  call void @func()
+  ret void
+
+return:
+  ret void
+}
+
+declare void @func()
+
+; one - One instruction may be tail-duplicated even with optsize.
+
+; CHECK: one:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+
+@XYZ = external global i32
+
+define void @one() nounwind optsize {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
+; two - Same as one, but with two instructions in the common
+; tail instead of one. This is too much to be merged, given
+; the optsize attribute.
+
+; CHECK: two:
+; CHECK-NOT: XYZ
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK-NOT: XYZ
+; CHECK: ret
+
+define void @two() nounwind optsize {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
+; two_nosize - Same as two, but without the optsize attribute.
+; Now two instructions are enough to be tail-duplicated.
+
+; CHECK: two_nosize:
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+; CHECK: movl $0, XYZ(%rip)
+; CHECK: movl $1, XYZ(%rip)
+
+define void @two_nosize() nounwind {
+entry:
+  %0 = icmp eq i32 undef, 0
+  br i1 %0, label %bbx, label %bby
+
+bby:
+  switch i32 undef, label %bb7 [
+    i32 16, label %return
+  ]
+
+bb7:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+bbx:
+  switch i32 undef, label %bb12 [
+    i32 128, label %return
+  ]
+
+bb12:
+  volatile store i32 0, i32* @XYZ
+  volatile store i32 1, i32* @XYZ
+  unreachable
+
+return:
+  ret void
+}
+
+; Tail-merging should merge the two ret instructions since one side
+; can fall-through into the ret and the other side has to branch anyway.
+
+; CHECK: TESTE:
+; CHECK: imulq
+; CHECK-NEXT: LBB8_2:
+; CHECK-NEXT: ret
+
+define i64 @TESTE(i64 %parami, i64 %paraml) nounwind readnone {
+entry:
+  %cmp = icmp slt i64 %parami, 1                  ; <i1> [#uses=1]
+  %varx.0 = select i1 %cmp, i64 1, i64 %parami    ; <i64> [#uses=1]
+  %cmp410 = icmp slt i64 %paraml, 1               ; <i1> [#uses=1]
+  br i1 %cmp410, label %for.end, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %tmp15 = mul i64 %paraml, %parami                   ; <i64> [#uses=1]
+  ret i64 %tmp15
+
+for.end:                                          ; preds = %entry
+  ret i64 %varx.0
+}
diff --git a/final/test/CodeGen/X86/tailcall-fastisel.ll b/final/test/CodeGen/X86/tailcall-fastisel.ll
new file mode 100644
index 00000000000..7f92af4dca9
--- /dev/null
+++ b/final/test/CodeGen/X86/tailcall-fastisel.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | not grep TAILCALL
+
+; Fast-isel shouldn't attempt to cope with tail calls.
+
+%0 = type { i64, i32, i8* }
+
+define fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 %arg1) nounwind {
+fail:                                             ; preds = %entry
+  %tmp20 = tail call fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 undef) ; <i8*> [#uses=1]
+  ret i8* %tmp20
+}
+
+define i32 @foo() nounwind {
+entry:
+ %0 = tail call i32 (...)* @bar() nounwind       ; <i32> [#uses=1]
+ ret i32 %0
+}
+
+declare i32 @bar(...) nounwind
diff --git a/final/test/CodeGen/X86/tailcall-i1.ll b/final/test/CodeGen/X86/tailcall-i1.ll
new file mode 100644
index 00000000000..8ef1f11383b
--- /dev/null
+++ b/final/test/CodeGen/X86/tailcall-i1.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+define fastcc i1 @i1test(i32, i32, i32, i32) {
+  entry:
+  %4 = tail call fastcc i1 @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
+  ret i1 %4
+}
diff --git a/final/test/CodeGen/X86/tailcall-largecode.ll b/final/test/CodeGen/X86/tailcall-largecode.ll
new file mode 100644
index 00000000000..c3f4278aecb
--- /dev/null
+++ b/final/test/CodeGen/X86/tailcall-largecode.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large | FileCheck %s
+
+declare fastcc i32 @callee(i32 %arg)
+define fastcc i32 @directcall(i32 %arg) {
+entry:
+; This is the large code model, so &callee may not fit into the jmp
+; instruction.  Instead, stick it into a register.
+;  CHECK: movabsq $callee, [[REGISTER:%r[a-z0-9]+]]
+;  CHECK: jmpq    *[[REGISTER]]  # TAILCALL
+  %res = tail call fastcc i32 @callee(i32 %arg)
+  ret i32 %res
+}
+
+; Check that the register used for an indirect tail call doesn't
+; clobber any of the arguments.
+define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target) {
+; Adjust the stack to enter the function.  (The amount of the
+; adjustment may change in the future, in which case the location of
+; the stack argument and the return adjustment will change too.)
+;  CHECK: pushq
+; Put the call target into R11, which won't be clobbered while restoring
+; callee-saved registers and won't be used for passing arguments.
+;  CHECK: movq %rdi, %rax
+; Pass the stack argument.
+;  CHECK: movl $7, 16(%rsp)
+; Pass the register arguments, in the right registers.
+;  CHECK: movl $1, %edi
+;  CHECK: movl $2, %esi
+;  CHECK: movl $3, %edx
+;  CHECK: movl $4, %ecx
+;  CHECK: movl $5, %r8d
+;  CHECK: movl $6, %r9d
+; Adjust the stack to "return".
+;  CHECK: popq
+; And tail-call to the target.
+;  CHECK: jmpq *%rax  # TAILCALL
+  %res = tail call fastcc i32 %target(i32 1, i32 2, i32 3, i32 4, i32 5,
+                                      i32 6, i32 7)
+  ret i32 %res
+}
+
+; Check that the register used for a direct tail call doesn't clobber
+; any of the arguments.
+declare fastcc i32 @manyargs_callee(i32,i32,i32,i32,i32,i32,i32)
+define fastcc i32 @direct_manyargs() {
+; Adjust the stack to enter the function.  (The amount of the
+; adjustment may change in the future, in which case the location of
+; the stack argument and the return adjustment will change too.)
+;  CHECK: pushq
+; Pass the stack argument.
+;  CHECK: movl $7, 16(%rsp)
+; Pass the register arguments, in the right registers.
+;  CHECK: movl $1, %edi
+;  CHECK: movl $2, %esi
+;  CHECK: movl $3, %edx
+;  CHECK: movl $4, %ecx
+;  CHECK: movl $5, %r8d
+;  CHECK: movl $6, %r9d
+; This is the large code model, so &manyargs_callee may not fit into
+; the jmp instruction.  Put it into R11, which won't be clobbered
+; while restoring callee-saved registers and won't be used for passing
+; arguments.
+;  CHECK: movabsq $manyargs_callee, %rax
+; Adjust the stack to "return".
+;  CHECK: popq
+; And tail-call to the target.
+;  CHECK: jmpq *%rax  # TAILCALL
+  %res = tail call fastcc i32 @manyargs_callee(i32 1, i32 2, i32 3, i32 4,
+                                               i32 5, i32 6, i32 7)
+  ret i32 %res
+}
diff --git a/final/test/CodeGen/X86/tailcall-returndup-void.ll b/final/test/CodeGen/X86/tailcall-returndup-void.ll
new file mode 100644
index 00000000000..c1d631225ec
--- /dev/null
+++ b/final/test/CodeGen/X86/tailcall-returndup-void.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK: rBM_info
+; CHECK-NOT: ret
+
+@sES_closure = external global [0 x i64]
+declare cc10 void @sEH_info(i64* noalias nocapture, i64* noalias nocapture, i64* noalias nocapture, i64, i64, i64) align 8
+
+define cc10 void @rBM_info(i64* noalias nocapture %Base_Arg, i64* noalias nocapture %Sp_Arg, i64* noalias nocapture %Hp_Arg, i64 %R1_Arg, i64 %R2_Arg, i64 %R3_Arg) nounwind align 8 {
+c263:
+  %ln265 = getelementptr inbounds i64* %Sp_Arg, i64 -2
+  %ln266 = ptrtoint i64* %ln265 to i64
+  %ln268 = icmp ult i64 %ln266, %R3_Arg
+  br i1 %ln268, label %c26a, label %n26p
+
+n26p:                                             ; preds = %c263
+  br i1 icmp ne (i64 and (i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 7), i64 0), label %c1ZP.i, label %n1ZQ.i
+
+n1ZQ.i:                                           ; preds = %n26p
+  %ln1ZT.i = load i64* getelementptr inbounds ([0 x i64]* @sES_closure, i64 0, i64 0), align 8
+  %ln1ZU.i = inttoptr i64 %ln1ZT.i to void (i64*, i64*, i64*, i64, i64, i64)*
+  tail call cc10 void %ln1ZU.i(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 %R3_Arg) nounwind
+  br label %rBL_info.exit
+
+c1ZP.i:                                           ; preds = %n26p
+  tail call cc10 void @sEH_info(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 ptrtoint ([0 x i64]* @sES_closure to i64), i64 %R3_Arg) nounwind
+  br label %rBL_info.exit
+
+rBL_info.exit:                                    ; preds = %c1ZP.i, %n1ZQ.i
+  ret void
+
+c26a:                                             ; preds = %c263
+  %ln27h = getelementptr inbounds i64* %Base_Arg, i64 -2
+  %ln27j = load i64* %ln27h, align 8
+  %ln27k = inttoptr i64 %ln27j to void (i64*, i64*, i64*, i64, i64, i64)*
+  tail call cc10 void %ln27k(i64* %Base_Arg, i64* %Sp_Arg, i64* %Hp_Arg, i64 %R1_Arg, i64 %R2_Arg, i64 %R3_Arg) nounwind
+  ret void
+}
diff --git a/final/test/CodeGen/X86/tailcall-ri64.ll b/final/test/CodeGen/X86/tailcall-ri64.ll
new file mode 100644
index 00000000000..914d8f7b8bc
--- /dev/null
+++ b/final/test/CodeGen/X86/tailcall-ri64.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=AMD64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; PR8743
+; TAILJMPri64 should not receive "callee-saved" registers beyond epilogue.
+
+; AMD64: jmpq
+; AMD64-NOT: %{{e[a-z]|rbx|rbp|r10|r12|r13|r14|r15}}
+
+; WIN64: jmpq
+; WIN64-NOT: %{{e[a-z]|rbx|rsi|rdi|rbp|r12|r13|r14|r15}}
+
+%class = type { [8 x i8] }
+%vt = type { i32 (...)** }
+
+define %vt* @_ZN4llvm9UnsetInit20convertInitializerToEPNS_5RecTyE(%class*
+%this, %vt* %Ty) align 2 {
+entry:
+  %0 = bitcast %vt* %Ty to %vt* (%vt*, %class*)***
+  %vtable = load %vt* (%vt*, %class*)*** %0, align 8
+  %vfn = getelementptr inbounds %vt* (%vt*, %class*)** %vtable, i64 4
+  %1 = load %vt* (%vt*, %class*)** %vfn, align 8
+  %call = tail call %vt* %1(%vt* %Ty, %class* %this)
+  ret %vt* %call
+}
diff --git a/final/test/CodeGen/X86/tailcall-stackalign.ll b/final/test/CodeGen/X86/tailcall-stackalign.ll
new file mode 100644
index 00000000000..d3f811cff24
--- /dev/null
+++ b/final/test/CodeGen/X86/tailcall-stackalign.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s  -mtriple=i686-unknown-linux  -tailcallopt | FileCheck %s
+; Linux has 8 byte alignment so the params cause stack size 20 when tailcallopt
+; is enabled, ensure that a normal fastcc call has matching stack size
+
+
+define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+       ret i32 %a3
+}
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2, i32 %in3, i32 %in4) {
+       %tmp11 = tail call fastcc i32 @tailcallee(i32 %in1, i32 %in2,
+                                                 i32 %in1, i32 %in2)
+       ret i32 %tmp11
+}
+
+define i32 @main(i32 %argc, i8** %argv) {
+ %tmp1 = call fastcc i32 @tailcaller( i32 1, i32 2, i32 3, i32 4 )
+ ; expect match subl [stacksize] here
+ ret i32 0
+}
+
+; CHECK: calll tailcaller
+; CHECK-NEXT: subl $12
diff --git a/final/test/CodeGen/X86/tailcall-structret.ll b/final/test/CodeGen/X86/tailcall-structret.ll
new file mode 100644
index 00000000000..d8be4b2e2df
--- /dev/null
+++ b/final/test/CodeGen/X86/tailcall-structret.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+define fastcc { { i8*, i8* }*, i8*} @init({ { i8*, i8* }*, i8*}, i32) {
+entry:
+      %2 = tail call fastcc { { i8*, i8* }*, i8* } @init({ { i8*, i8*}*, i8*} %0, i32 %1)
+      ret { { i8*, i8* }*, i8*} %2
+}
diff --git a/final/test/CodeGen/X86/tailcall-void.ll b/final/test/CodeGen/X86/tailcall-void.ll
new file mode 100644
index 00000000000..4e578d1b641
--- /dev/null
+++ b/final/test/CodeGen/X86/tailcall-void.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+define fastcc void @i1test(i32, i32, i32, i32) {
+  entry:
+   tail call fastcc void @i1test( i32 %0, i32 %1, i32 %2, i32 %3)
+   ret void 
+}
diff --git a/final/test/CodeGen/X86/tailcall1.ll b/final/test/CodeGen/X86/tailcall1.ll
new file mode 100644
index 00000000000..f7ff5d5308d
--- /dev/null
+++ b/final/test/CodeGen/X86/tailcall1.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL | count 5
+
+; With -tailcallopt, CodeGen guarantees a tail call optimization
+; for all of these.
+
+declare fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4)
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) nounwind {
+entry:
+  %tmp11 = tail call fastcc i32 @tailcallee(i32 %in1, i32 %in2, i32 %in1, i32 %in2)
+  ret i32 %tmp11
+}
+
+declare fastcc i8* @alias_callee()
+
+define fastcc noalias i8* @noalias_caller() nounwind {
+  %p = tail call fastcc i8* @alias_callee()
+  ret i8* %p
+}
+
+declare fastcc noalias i8* @noalias_callee()
+
+define fastcc i8* @alias_caller() nounwind {
+  %p = tail call fastcc noalias i8* @noalias_callee()
+  ret i8* %p
+}
+
+declare fastcc i32 @i32_callee()
+
+define fastcc i32 @ret_undef() nounwind {
+  %p = tail call fastcc i32 @i32_callee()
+  ret i32 undef
+}
+
+declare fastcc void @does_not_return()
+
+define fastcc i32 @noret() nounwind {
+  tail call fastcc void @does_not_return()
+  unreachable
+}
diff --git a/final/test/CodeGen/X86/tailcallbyval.ll b/final/test/CodeGen/X86/tailcallbyval.ll
new file mode 100644
index 00000000000..7002560c82a
--- /dev/null
+++ b/final/test/CodeGen/X86/tailcallbyval.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -tailcallopt | grep TAILCALL
+; RUN: llc < %s -march=x86 -tailcallopt | grep {movl\[\[:space:\]\]*4(%esp), %eax} | count 1
+%struct.s = type {i32, i32, i32, i32, i32, i32, i32, i32,
+                  i32, i32, i32, i32, i32, i32, i32, i32,
+                  i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define  fastcc i32 @tailcallee(%struct.s* byval %a) nounwind {
+entry:
+        %tmp2 = getelementptr %struct.s* %a, i32 0, i32 0
+        %tmp3 = load i32* %tmp2
+        ret i32 %tmp3
+}
+
+define  fastcc i32 @tailcaller(%struct.s* byval %a) nounwind {
+entry:
+        %tmp4 = tail call fastcc i32 @tailcallee(%struct.s* %a byval)
+        ret i32 %tmp4
+}
diff --git a/final/test/CodeGen/X86/tailcallbyval64.ll b/final/test/CodeGen/X86/tailcallbyval64.ll
new file mode 100644
index 00000000000..1b1efe713c6
--- /dev/null
+++ b/final/test/CodeGen/X86/tailcallbyval64.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -mtriple=x86_64-linux  -tailcallopt  | FileCheck %s
+
+; FIXME: Win64 does not support byval.
+
+; Expect the entry point.
+; CHECK: tailcaller:
+
+; Expect 2 rep;movs because of tail call byval lowering.
+; CHECK: rep;
+; CHECK: rep;
+
+; A sequence of copyto/copyfrom virtual registers is used to deal with byval
+; lowering appearing after moving arguments to registers. The following two
+; checks verify that the register allocator changes those sequences to direct
+; moves to argument register where it can (for registers that are not used in 
+; byval lowering - not rsi, not rdi, not rcx).
+; Expect argument 4 to be moved directly to register edx.
+; CHECK: movl $7, %edx
+
+; Expect argument 6 to be moved directly to register r8.
+; CHECK: movl $17, %r8d
+
+; Expect not call but jmp to @tailcallee.
+; CHECK: jmp tailcallee
+
+; Expect the trailer.
+; CHECK: .size tailcaller
+
+%struct.s = type { i64, i64, i64, i64, i64, i64, i64, i64,
+                   i64, i64, i64, i64, i64, i64, i64, i64,
+                   i64, i64, i64, i64, i64, i64, i64, i64 }
+
+declare  fastcc i64 @tailcallee(%struct.s* byval %a, i64 %val, i64 %val2, i64 %val3, i64 %val4, i64 %val5)
+
+
+define  fastcc i64 @tailcaller(i64 %b, %struct.s* byval %a) {
+entry:
+        %tmp2 = getelementptr %struct.s* %a, i32 0, i32 1
+        %tmp3 = load i64* %tmp2, align 8
+        %tmp4 = tail call fastcc i64 @tailcallee(%struct.s* %a byval, i64 %tmp3, i64 %b, i64 7, i64 13, i64 17)
+        ret i64 %tmp4
+}
diff --git a/final/test/CodeGen/X86/tailcallfp.ll b/final/test/CodeGen/X86/tailcallfp.ll
new file mode 100644
index 00000000000..c0b609ac956
--- /dev/null
+++ b/final/test/CodeGen/X86/tailcallfp.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -tailcallopt | not grep call
+define fastcc i32 @bar(i32 %X, i32(double, i32) *%FP) {
+     %Y = tail call fastcc i32 %FP(double 0.0, i32 %X)
+     ret i32 %Y
+}
diff --git a/final/test/CodeGen/X86/tailcallfp2.ll b/final/test/CodeGen/X86/tailcallfp2.ll
new file mode 100644
index 00000000000..04c4e95710c
--- /dev/null
+++ b/final/test/CodeGen/X86/tailcallfp2.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 -tailcallopt | FileCheck %s
+
+declare i32 @putchar(i32)
+
+define fastcc i32 @checktail(i32 %x, i32* %f, i32 %g) nounwind {
+; CHECK: checktail:
+        %tmp1 = icmp sgt i32 %x, 0
+        br i1 %tmp1, label %if-then, label %if-else
+
+if-then:
+        %fun_ptr = bitcast i32* %f to i32(i32, i32*, i32)* 
+        %arg1    = add i32 %x, -1
+        call i32 @putchar(i32 90)       
+; CHECK: jmpl *%e{{.*}}
+        %res = tail call fastcc i32 %fun_ptr( i32 %arg1, i32 * %f, i32 %g)
+        ret i32 %res
+
+if-else:
+        ret i32  %x
+}
+
+
+define i32 @main() nounwind { 
+ %f   = bitcast i32 (i32, i32*, i32)* @checktail to i32*
+ %res = tail call fastcc i32 @checktail( i32 10, i32* %f,i32 10)
+ ret i32 %res
+}
diff --git a/final/test/CodeGen/X86/tailcallpic1.ll b/final/test/CodeGen/X86/tailcallpic1.ll
new file mode 100644
index 00000000000..60e3be5c50f
--- /dev/null
+++ b/final/test/CodeGen/X86/tailcallpic1.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep TAILCALL
+
+define protected fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+entry:
+	ret i32 %a3
+}
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
+entry:
+	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
+	ret i32 %tmp11
+}
diff --git a/final/test/CodeGen/X86/tailcallpic2.ll b/final/test/CodeGen/X86/tailcallpic2.ll
new file mode 100644
index 00000000000..eaa76312396
--- /dev/null
+++ b/final/test/CodeGen/X86/tailcallpic2.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s  -tailcallopt -mtriple=i686-pc-linux-gnu -relocation-model=pic | grep -v TAILCALL
+
+define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+entry:
+	ret i32 %a3
+}
+
+define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
+entry:
+	%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )		; <i32> [#uses=1]
+	ret i32 %tmp11
+}
diff --git a/final/test/CodeGen/X86/tailcallstack64.ll b/final/test/CodeGen/X86/tailcallstack64.ll
new file mode 100644
index 00000000000..09277794315
--- /dev/null
+++ b/final/test/CodeGen/X86/tailcallstack64.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -tailcallopt -mtriple=x86_64-linux -post-RA-scheduler=true | FileCheck %s
+; RUN: llc < %s -tailcallopt -mtriple=x86_64-win32 -post-RA-scheduler=true | FileCheck %s
+
+; FIXME: Redundant unused stack allocation could be eliminated.
+; CHECK: subq  ${{24|72}}, %rsp
+
+; Check that lowered arguments on the stack do not overwrite each other.
+; Add %in1 %p1 to a different temporary register (%eax).
+; CHECK: movl  [[A1:32|144]](%rsp), %eax
+; Move param %in1 to temp register (%r10d).
+; CHECK: movl  [[A2:40|152]](%rsp), %r10d
+; Add %in1 %p1 to a different temporary register (%eax).
+; CHECK: addl {{%edi|%ecx}}, %eax
+; Move param %in2 to stack.
+; CHECK: movl  %r10d, [[A1]](%rsp)
+; Move result of addition to stack.
+; CHECK: movl  %eax, [[A2]](%rsp)
+; Eventually, do a TAILCALL
+; CHECK: TAILCALL
+
+declare fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %a, i32 %b) nounwind
+
+define fastcc i32 @tailcaller(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in1, i32 %in2) nounwind {
+entry:
+        %tmp = add i32 %in1, %p1
+        %retval = tail call fastcc i32 @tailcallee(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6, i32 %in2,i32 %tmp)
+        ret i32 %retval
+}
diff --git a/final/test/CodeGen/X86/test-nofold.ll b/final/test/CodeGen/X86/test-nofold.ll
new file mode 100644
index 00000000000..f1063dcabf4
--- /dev/null
+++ b/final/test/CodeGen/X86/test-nofold.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+; rdar://5752025
+
+; We want:
+;      CHECK: movl	4(%esp), %ecx
+; CHECK-NEXT: andl	$15, %ecx
+; CHECK-NEXT: movl	$42, %eax
+; CHECK-NEXT: cmovel	%ecx, %eax
+; CHECK-NEXT: ret
+;
+; We don't want:
+;	movl	4(%esp), %eax
+;	movl	%eax, %ecx     # bad: extra copy
+;	andl	$15, %ecx
+;	testl	$15, %eax      # bad: peep obstructed
+;	movl	$42, %eax
+;	cmovel	%ecx, %eax
+;	ret
+;
+; We also don't want:
+;	movl	$15, %ecx      # bad: larger encoding
+;	andl	4(%esp), %ecx
+;	movl	$42, %eax
+;	cmovel	%ecx, %eax
+;	ret
+;
+; We also don't want:
+;	movl	4(%esp), %ecx
+;	andl	$15, %ecx
+;	testl	%ecx, %ecx     # bad: unnecessary test
+;	movl	$42, %eax
+;	cmovel	%ecx, %eax
+;	ret
+
+define i32 @t1(i32 %X) nounwind  {
+entry:
+	%tmp2 = and i32 %X, 15		; <i32> [#uses=2]
+	%tmp4 = icmp eq i32 %tmp2, 0		; <i1> [#uses=1]
+	%retval = select i1 %tmp4, i32 %tmp2, i32 42		; <i32> [#uses=1]
+	ret i32 %retval
+}
+
diff --git a/final/test/CodeGen/X86/test-shrink-bug.ll b/final/test/CodeGen/X86/test-shrink-bug.ll
new file mode 100644
index 00000000000..64631ea5fc9
--- /dev/null
+++ b/final/test/CodeGen/X86/test-shrink-bug.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s | FileCheck %s
+
+; Codegen shouldn't reduce the comparison down to testb $-1, %al
+; because that changes the result of the signed test.
+; PR5132
+; CHECK: testw  $255, %ax
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+@g_14 = global i8 -6, align 1                     ; <i8*> [#uses=1]
+
+declare i32 @func_16(i8 signext %p_19, i32 %p_20) nounwind
+
+define i32 @func_35(i64 %p_38) nounwind ssp {
+entry:
+  %tmp = load i8* @g_14                           ; <i8> [#uses=2]
+  %conv = zext i8 %tmp to i32                     ; <i32> [#uses=1]
+  %cmp = icmp sle i32 1, %conv                    ; <i1> [#uses=1]
+  %conv2 = zext i1 %cmp to i32                    ; <i32> [#uses=1]
+  %call = call i32 @func_16(i8 signext %tmp, i32 %conv2) ssp ; <i32> [#uses=1]
+  ret i32 1
+}
diff --git a/final/test/CodeGen/X86/test-shrink.ll b/final/test/CodeGen/X86/test-shrink.ll
new file mode 100644
index 00000000000..5bc28ecbc48
--- /dev/null
+++ b/final/test/CodeGen/X86/test-shrink.ll
@@ -0,0 +1,159 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
+
+; CHECK-64: g64xh:
+; CHECK-64:   testb $8, {{%ah|%ch}}
+; CHECK-64:   ret
+; CHECK-32: g64xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g64xh(i64 inreg %x) nounwind {
+  %t = and i64 %x, 2048
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64xl:
+; CHECK-64:   testb $8, [[A0L:%dil|%cl]]
+; CHECK-64:   ret
+; CHECK-32: g64xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g64xl(i64 inreg %x) nounwind {
+  %t = and i64 %x, 8
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32xh:
+; CHECK-64:   testb $8, {{%ah|%ch}}
+; CHECK-64:   ret
+; CHECK-32: g32xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g32xh(i32 inreg %x) nounwind {
+  %t = and i32 %x, 2048
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32xl:
+; CHECK-64:   testb $8, [[A0L]]
+; CHECK-64:   ret
+; CHECK-32: g32xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g32xl(i32 inreg %x) nounwind {
+  %t = and i32 %x, 8
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g16xh:
+; CHECK-64:   testb $8, {{%ah|%ch}}
+; CHECK-64:   ret
+; CHECK-32: g16xh:
+; CHECK-32:   testb $8, %ah
+; CHECK-32:   ret
+define void @g16xh(i16 inreg %x) nounwind {
+  %t = and i16 %x, 2048
+  %s = icmp eq i16 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g16xl:
+; CHECK-64:   testb $8, [[A0L]]
+; CHECK-64:   ret
+; CHECK-32: g16xl:
+; CHECK-32:   testb $8, %al
+; CHECK-32:   ret
+define void @g16xl(i16 inreg %x) nounwind {
+  %t = and i16 %x, 8
+  %s = icmp eq i16 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64x16:
+; CHECK-64:   testw $-32640, %[[A0W:di|cx]]
+; CHECK-64:   ret
+; CHECK-32: g64x16:
+; CHECK-32:   testw $-32640, %ax
+; CHECK-32:   ret
+define void @g64x16(i64 inreg %x) nounwind {
+  %t = and i64 %x, 32896
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g32x16:
+; CHECK-64:   testw $-32640, %[[A0W]]
+; CHECK-64:   ret
+; CHECK-32: g32x16:
+; CHECK-32:   testw $-32640, %ax
+; CHECK-32:   ret
+define void @g32x16(i32 inreg %x) nounwind {
+  %t = and i32 %x, 32896
+  %s = icmp eq i32 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+; CHECK-64: g64x32:
+; CHECK-64:   testl $268468352, %e[[A0W]]
+; CHECK-64:   ret
+; CHECK-32: g64x32:
+; CHECK-32:   testl $268468352, %eax
+; CHECK-32:   ret
+define void @g64x32(i64 inreg %x) nounwind {
+  %t = and i64 %x, 268468352
+  %s = icmp eq i64 %t, 0
+  br i1 %s, label %yes, label %no
+
+yes:
+  call void @bar()
+  ret void
+no:
+  ret void
+}
+
+declare void @bar()
diff --git a/final/test/CodeGen/X86/testl-commute.ll b/final/test/CodeGen/X86/testl-commute.ll
new file mode 100644
index 00000000000..3d5f672f98f
--- /dev/null
+++ b/final/test/CodeGen/X86/testl-commute.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s | grep {testl.*\(%r.i\), %} | count 3
+; rdar://5671654
+; The loads should fold into the testl instructions, no matter how
+; the inputs are commuted.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin7"
+
+define i32 @test(i32* %P, i32* %G) nounwind {
+entry:
+	%0 = load i32* %P, align 4		; <i32> [#uses=3]
+	%1 = load i32* %G, align 4		; <i32> [#uses=1]
+	%2 = and i32 %1, %0		; <i32> [#uses=1]
+	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb1, label %bb
+
+bb:		; preds = %entry
+	%4 = tail call i32 @bar() nounwind		; <i32> [#uses=0]
+	ret i32 %0
+
+bb1:		; preds = %entry
+	ret i32 %0
+}
+
+define i32 @test2(i32* %P, i32* %G) nounwind {
+entry:
+	%0 = load i32* %P, align 4		; <i32> [#uses=3]
+	%1 = load i32* %G, align 4		; <i32> [#uses=1]
+	%2 = and i32 %0, %1		; <i32> [#uses=1]
+	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb1, label %bb
+
+bb:		; preds = %entry
+	%4 = tail call i32 @bar() nounwind		; <i32> [#uses=0]
+	ret i32 %0
+
+bb1:		; preds = %entry
+	ret i32 %0
+}
+define i32 @test3(i32* %P, i32* %G) nounwind {
+entry:
+	%0 = load i32* %P, align 4		; <i32> [#uses=3]
+	%1 = load i32* %G, align 4		; <i32> [#uses=1]
+	%2 = and i32 %0, %1		; <i32> [#uses=1]
+	%3 = icmp eq i32 %2, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb1, label %bb
+
+bb:		; preds = %entry
+	%4 = tail call i32 @bar() nounwind		; <i32> [#uses=0]
+	ret i32 %1
+
+bb1:		; preds = %entry
+	ret i32 %1
+}
+
+declare i32 @bar()
diff --git a/final/test/CodeGen/X86/tls-pic.ll b/final/test/CodeGen/X86/tls-pic.ll
new file mode 100644
index 00000000000..b83416d4b32
--- /dev/null
+++ b/final/test/CodeGen/X86/tls-pic.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X32 %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64 %s
+
+@i = thread_local global i32 15
+
+define i32 @f1() {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}
+
+; X32: f1:
+; X32:   leal i@TLSGD(,%ebx), %eax
+; X32:   calll ___tls_get_addr@PLT
+
+; X64: f1:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   callq __tls_get_addr@PLT
+
+
+@i2 = external thread_local global i32
+
+define i32* @f2() {
+entry:
+	ret i32* @i
+}
+
+; X32: f2:
+; X32:   leal i@TLSGD(,%ebx), %eax
+; X32:   calll ___tls_get_addr@PLT
+
+; X64: f2:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   callq __tls_get_addr@PLT
+
+
+
+define i32 @f3() {
+entry:
+	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
+
+; X32: f3:
+; X32:   leal	i@TLSGD(,%ebx), %eax
+; X32:   calll ___tls_get_addr@PLT
+
+; X64: f3:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   callq __tls_get_addr@PLT
+
+
+define i32* @f4() nounwind {
+entry:
+	ret i32* @i
+}
+
+; X32: f4:
+; X32:   leal	i@TLSGD(,%ebx), %eax
+; X32:   calll ___tls_get_addr@PLT
+
+; X64: f4:
+; X64:   leaq i@TLSGD(%rip), %rdi
+; X64:   callq __tls_get_addr@PLT
+
+
+
diff --git a/final/test/CodeGen/X86/tls1.ll b/final/test/CodeGen/X86/tls1.ll
new file mode 100644
index 00000000000..0cae5c4f288
--- /dev/null
+++ b/final/test/CodeGen/X86/tls1.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
+
+@i = thread_local global i32 15
+
+define i32 @f() nounwind {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/X86/tls10.ll b/final/test/CodeGen/X86/tls10.ll
new file mode 100644
index 00000000000..fb61596d09c
--- /dev/null
+++ b/final/test/CodeGen/X86/tls10.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t
+; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2
+; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
+
+@i = external hidden thread_local global i32
+
+define i32* @f() {
+entry:
+	ret i32* @i
+}
diff --git a/final/test/CodeGen/X86/tls11.ll b/final/test/CodeGen/X86/tls11.ll
new file mode 100644
index 00000000000..514a168c538
--- /dev/null
+++ b/final/test/CodeGen/X86/tls11.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movzwl	%gs:i@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movzwl	%fs:i@TPOFF, %eax} %t2
+
+@i = thread_local global i16 15
+
+define i16 @f() {
+entry:
+	%tmp1 = load i16* @i
+	ret i16 %tmp1
+}
diff --git a/final/test/CodeGen/X86/tls12.ll b/final/test/CodeGen/X86/tls12.ll
new file mode 100644
index 00000000000..c29f6adacd2
--- /dev/null
+++ b/final/test/CodeGen/X86/tls12.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movb	%gs:i@NTPOFF, %al} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movb	%fs:i@TPOFF, %al} %t2
+
+@i = thread_local global i8 15
+
+define i8 @f() {
+entry:
+	%tmp1 = load i8* @i
+	ret i8 %tmp1
+}
diff --git a/final/test/CodeGen/X86/tls13.ll b/final/test/CodeGen/X86/tls13.ll
new file mode 100644
index 00000000000..08778ec2ce8
--- /dev/null
+++ b/final/test/CodeGen/X86/tls13.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movswl	%gs:i@NTPOFF, %eax} %t
+; RUN: grep {movzwl	%gs:j@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movswl	%fs:i@TPOFF, %edi} %t2
+; RUN: grep {movzwl	%fs:j@TPOFF, %edi} %t2
+
+@i = thread_local global i16 0
+@j = thread_local global i16 0
+
+define void @f() nounwind optsize {
+entry:
+        %0 = load i16* @i, align 2
+        %1 = sext i16 %0 to i32
+        tail call void @g(i32 %1) nounwind
+        %2 = load i16* @j, align 2
+        %3 = zext i16 %2 to i32
+        tail call void @h(i32 %3) nounwind
+        ret void
+}
+
+declare void @g(i32)
+
+declare void @h(i32)
diff --git a/final/test/CodeGen/X86/tls14.ll b/final/test/CodeGen/X86/tls14.ll
new file mode 100644
index 00000000000..88426dd43d5
--- /dev/null
+++ b/final/test/CodeGen/X86/tls14.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movsbl	%gs:i@NTPOFF, %eax} %t
+; RUN: grep {movzbl	%gs:j@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movsbl	%fs:i@TPOFF, %edi} %t2
+; RUN: grep {movzbl	%fs:j@TPOFF, %edi} %t2
+
+@i = thread_local global i8 0
+@j = thread_local global i8 0
+
+define void @f() nounwind optsize {
+entry:
+        %0 = load i8* @i, align 2
+        %1 = sext i8 %0 to i32
+        tail call void @g(i32 %1) nounwind
+        %2 = load i8* @j, align 2
+        %3 = zext i8 %2 to i32
+        tail call void @h(i32 %3) nounwind
+        ret void
+}
+
+declare void @g(i32)
+
+declare void @h(i32)
diff --git a/final/test/CodeGen/X86/tls15.ll b/final/test/CodeGen/X86/tls15.ll
new file mode 100644
index 00000000000..7abf070d3fd
--- /dev/null
+++ b/final/test/CodeGen/X86/tls15.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t | count 1
+; RUN: grep {leal	i@NTPOFF(%eax), %ecx} %t
+; RUN: grep {leal	j@NTPOFF(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2 | count 1
+; RUN: grep {leaq	i@TPOFF(%rax), %rcx} %t2
+; RUN: grep {leaq	j@TPOFF(%rax), %rax} %t2
+
+@i = thread_local global i32 0
+@j = thread_local global i32 0
+
+define void @f(i32** %a, i32** %b) {
+entry:
+	store i32* @i, i32** %a, align 8
+	store i32* @j, i32** %b, align 8
+	ret void
+}
diff --git a/final/test/CodeGen/X86/tls2.ll b/final/test/CodeGen/X86/tls2.ll
new file mode 100644
index 00000000000..5a94296afef
--- /dev/null
+++ b/final/test/CodeGen/X86/tls2.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t
+; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2
+; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
+
+@i = thread_local global i32 15
+
+define i32* @f() {
+entry:
+	ret i32* @i
+}
diff --git a/final/test/CodeGen/X86/tls3.ll b/final/test/CodeGen/X86/tls3.ll
new file mode 100644
index 00000000000..7327cc41777
--- /dev/null
+++ b/final/test/CodeGen/X86/tls3.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	i@INDNTPOFF, %eax} %t
+; RUN: grep {movl	%gs:(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	i@GOTTPOFF(%rip), %rax} %t2
+; RUN: grep {movl	%fs:(%rax), %eax} %t2
+
+@i = external thread_local global i32		; <i32*> [#uses=2]
+
+define i32 @f() nounwind {
+entry:
+	%tmp1 = load i32* @i		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/X86/tls4.ll b/final/test/CodeGen/X86/tls4.ll
new file mode 100644
index 00000000000..d2e40e389bd
--- /dev/null
+++ b/final/test/CodeGen/X86/tls4.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t
+; RUN: grep {addl	i@INDNTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2
+; RUN: grep {addq	i@GOTTPOFF(%rip), %rax} %t2
+
+@i = external thread_local global i32		; <i32*> [#uses=2]
+
+define i32* @f() {
+entry:
+	ret i32* @i
+}
diff --git a/final/test/CodeGen/X86/tls5.ll b/final/test/CodeGen/X86/tls5.ll
new file mode 100644
index 00000000000..4d2cc02b502
--- /dev/null
+++ b/final/test/CodeGen/X86/tls5.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
+
+@i = internal thread_local global i32 15
+
+define i32 @f() {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/X86/tls6.ll b/final/test/CodeGen/X86/tls6.ll
new file mode 100644
index 00000000000..505106ee14e
--- /dev/null
+++ b/final/test/CodeGen/X86/tls6.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t
+; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2
+; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
+
+@i = internal thread_local global i32 15
+
+define i32* @f() {
+entry:
+	ret i32* @i
+}
diff --git a/final/test/CodeGen/X86/tls7.ll b/final/test/CodeGen/X86/tls7.ll
new file mode 100644
index 00000000000..e9116e77209
--- /dev/null
+++ b/final/test/CodeGen/X86/tls7.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
+
+@i = hidden thread_local global i32 15
+
+define i32 @f() {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/X86/tls8.ll b/final/test/CodeGen/X86/tls8.ll
new file mode 100644
index 00000000000..375af94920f
--- /dev/null
+++ b/final/test/CodeGen/X86/tls8.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:0, %eax} %t
+; RUN: grep {leal	i@NTPOFF(%eax), %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movq	%fs:0, %rax} %t2
+; RUN: grep {leaq	i@TPOFF(%rax), %rax} %t2
+
+@i = hidden thread_local global i32 15
+
+define i32* @f() {
+entry:
+	ret i32* @i
+}
diff --git a/final/test/CodeGen/X86/tls9.ll b/final/test/CodeGen/X86/tls9.ll
new file mode 100644
index 00000000000..7d08df84a9f
--- /dev/null
+++ b/final/test/CodeGen/X86/tls9.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu > %t
+; RUN: grep {movl	%gs:i@NTPOFF, %eax} %t
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu > %t2
+; RUN: grep {movl	%fs:i@TPOFF, %eax} %t2
+
+@i = external hidden thread_local global i32
+
+define i32 @f() nounwind {
+entry:
+	%tmp1 = load i32* @i
+	ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/X86/tlv-1.ll b/final/test/CodeGen/X86/tlv-1.ll
new file mode 100644
index 00000000000..42940f147ed
--- /dev/null
+++ b/final/test/CodeGen/X86/tlv-1.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin | FileCheck %s
+
+%struct.A = type { [48 x i8], i32, i32, i32 }
+
+@c = external thread_local global %struct.A, align 4
+
+define void @main() nounwind ssp {
+entry:
+  call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds (%struct.A* @c, i32 0, i32 0, i32 0), i8 0, i64 60, i32 1, i1 false)
+  unreachable  
+  ; CHECK: movq    _c@TLVP(%rip), %rdi
+  ; CHECK-NEXT: callq   *(%rdi)
+  ; CHECK-NEXT: movl    $0, 56(%rax)
+  ; CHECK-NEXT: movq    $0, 48(%rax)
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+@a = thread_local global i32 0                    ; <i32*> [#uses=0]
+@b = thread_local global i32 0                    ; <i32*> [#uses=0]
+
+; CHECK: .tbss _a$tlv$init, 4, 2
+; CHECK:        .section        __DATA,__thread_vars,thread_local_variables
+; CHECK:        .globl  _a
+; CHECK: _a:
+; CHECK:        .quad   __tlv_bootstrap
+; CHECK:        .quad   0
+; CHECK:        .quad   _a$tlv$init
+
+; CHECK: .tbss _b$tlv$init, 4, 2
+; CHECK:        .globl  _b
+; CHECK: _b:
+; CHECK:        .quad   __tlv_bootstrap
+; CHECK:        .quad   0
+; CHECK:        .quad   _b$tlv$init
diff --git a/final/test/CodeGen/X86/tlv-2.ll b/final/test/CodeGen/X86/tlv-2.ll
new file mode 100644
index 00000000000..5f29a60bef5
--- /dev/null
+++ b/final/test/CodeGen/X86/tlv-2.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -mtriple x86_64-apple-darwin -O0 | FileCheck %s
+
+@b = thread_local global i32 5, align 4
+@a = thread_local global i32 0, align 4
+@c = internal thread_local global i32 0, align 4
+@d = internal thread_local global i32 5, align 4
+
+define void @foo() nounwind ssp {
+entry:
+  store i32 1, i32* @a, align 4
+  ; CHECK: movq    _a@TLVP(%rip), %rdi
+  ; CHECK: callq   *(%rdi)
+  ; CHECK: movl    $1, (%rax)
+  
+  store i32 2, i32* @b, align 4
+  ; CHECK: movq    _b@TLVP(%rip), %rdi
+  ; CHECK: callq   *(%rdi)
+  ; CHECK: movl    $2, (%rax)
+
+  store i32 3, i32* @c, align 4
+  ; CHECK: movq    _c@TLVP(%rip), %rdi
+  ; CHECK: callq   *(%rdi)
+  ; CHECK: movl    $3, (%rax)
+  
+  store i32 4, i32* @d, align 4
+  ; CHECK: movq    _d@TLVP(%rip), %rdi
+  ; CHECK: callq   *(%rdi)
+  ; CHECK: movl    $4, (%rax)
+  ; CHECK: popq
+  
+  ret void
+}
diff --git a/final/test/CodeGen/X86/trap.ll b/final/test/CodeGen/X86/trap.ll
new file mode 100644
index 00000000000..03ae6bfc869
--- /dev/null
+++ b/final/test/CodeGen/X86/trap.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep ud2
+define i32 @test() noreturn nounwind  {
+entry:
+	tail call void @llvm.trap( )
+	unreachable
+}
+
+declare void @llvm.trap() nounwind 
+
diff --git a/final/test/CodeGen/X86/trunc-to-bool.ll b/final/test/CodeGen/X86/trunc-to-bool.ll
new file mode 100644
index 00000000000..60620841064
--- /dev/null
+++ b/final/test/CodeGen/X86/trunc-to-bool.ll
@@ -0,0 +1,59 @@
+; An integer truncation to i1 should be done with an and instruction to make
+; sure only the LSBit survives. Test that this is the case both for a returned
+; value and as the operand of a branch.
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i1 @test1(i32 %X) zeroext nounwind {
+    %Y = trunc i32 %X to i1
+    ret i1 %Y
+}
+; CHECK: test1:
+; CHECK: andl $1, %eax
+
+define i1 @test2(i32 %val, i32 %mask) nounwind {
+entry:
+    %shifted = ashr i32 %val, %mask
+    %anded = and i32 %shifted, 1
+    %trunced = trunc i32 %anded to i1
+    br i1 %trunced, label %ret_true, label %ret_false
+ret_true:
+    ret i1 true
+ret_false:
+    ret i1 false
+}
+; CHECK: test2:
+; CHECK: btl %eax
+
+define i32 @test3(i8* %ptr) nounwind {
+    %val = load i8* %ptr
+    %tmp = trunc i8 %val to i1
+    br i1 %tmp, label %cond_true, label %cond_false
+cond_true:
+    ret i32 21
+cond_false:
+    ret i32 42
+}
+; CHECK: test3:
+; CHECK: testb $1, (%eax)
+
+define i32 @test4(i8* %ptr) nounwind {
+    %tmp = ptrtoint i8* %ptr to i1
+    br i1 %tmp, label %cond_true, label %cond_false
+cond_true:
+    ret i32 21
+cond_false:
+    ret i32 42
+}
+; CHECK: test4:
+; CHECK: testb $1, 4(%esp)
+
+define i32 @test5(double %d) nounwind {
+    %tmp = fptosi double %d to i1
+    br i1 %tmp, label %cond_true, label %cond_false
+cond_true:
+    ret i32 21
+cond_false:
+    ret i32 42
+}
+; CHECK: test5:
+; CHECK: testb $1
diff --git a/final/test/CodeGen/X86/twoaddr-coalesce-2.ll b/final/test/CodeGen/X86/twoaddr-coalesce-2.ll
new file mode 100644
index 00000000000..6f16a2548aa
--- /dev/null
+++ b/final/test/CodeGen/X86/twoaddr-coalesce-2.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& \
+; RUN:   grep {twoaddrinstr} | grep {Number of instructions aggressively commuted}
+; rdar://6480363
+
+target triple = "i386-apple-darwin9.6"
+
+define <2 x double> @t(<2 x double> %A, <2 x double> %B, <2 x double> %C) nounwind readnone {
+entry:
+	%tmp.i3 = bitcast <2 x double> %B to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp2.i = or <2 x i64> %tmp.i3, <i64 4607632778762754458, i64 4607632778762754458>		; <<2 x i64>> [#uses=1]
+	%tmp3.i = bitcast <2 x i64> %tmp2.i to <2 x double>		; <<2 x double>> [#uses=1]
+	%tmp.i2 = fadd <2 x double> %tmp3.i, %A		; <<2 x double>> [#uses=1]
+	%tmp.i = fadd <2 x double> %tmp.i2, %C		; <<2 x double>> [#uses=1]
+	ret <2 x double> %tmp.i
+}
diff --git a/final/test/CodeGen/X86/twoaddr-coalesce.ll b/final/test/CodeGen/X86/twoaddr-coalesce.ll
new file mode 100644
index 00000000000..6f6d6f2cd96
--- /dev/null
+++ b/final/test/CodeGen/X86/twoaddr-coalesce.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 | grep mov | count 4
+; rdar://6523745
+
+@"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+
+define i32 @foo() nounwind {
+bb1.thread:
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]		; <i32> [#uses=2]
+	%0 = trunc i32 %i.0.reg2mem.0 to i8		; <i8> [#uses=1]
+	%1 = sdiv i8 %0, 2		; <i8> [#uses=1]
+	%2 = sext i8 %1 to i32		; <i32> [#uses=1]
+	%3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i32 %2) nounwind		; <i32> [#uses=0]
+	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 258		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb2:		; preds = %bb1
+	ret i32 0
+}
+
+declare i32 @printf(i8*, ...) nounwind
diff --git a/final/test/CodeGen/X86/twoaddr-lea.ll b/final/test/CodeGen/X86/twoaddr-lea.ll
new file mode 100644
index 00000000000..a1d797feeac
--- /dev/null
+++ b/final/test/CodeGen/X86/twoaddr-lea.ll
@@ -0,0 +1,47 @@
+;; X's live range extends beyond the shift, so the register allocator
+;; cannot coalesce it with Y.  Because of this, a copy needs to be
+;; emitted before the shift to save the register value before it is
+;; clobbered.  However, this copy is not needed if the register
+;; allocator turns the shift into an LEA.  This also occurs for ADD.
+
+; Check that the shift gets turned into an LEA.
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+@G = external global i32
+
+define i32 @test1(i32 %X) nounwind {
+; CHECK: test1:
+; CHECK-NOT: mov
+; CHECK: leal 1(%rdi)
+        %Z = add i32 %X, 1
+        volatile store i32 %Z, i32* @G
+        ret i32 %X
+}
+
+; rdar://8977508
+; The second add should not be transformed to leal nor should it be
+; commutted (which would require inserting a copy).
+define i32 @test2(i32 inreg %a, i32 inreg %b, i32 %c, i32 %d) nounwind {
+entry:
+; CHECK: test2:
+; CHECK: leal
+; CHECK-NOT: leal
+; CHECK-NOT: mov
+; CHECK-NEXT: addl
+; CHECK-NEXT: ret
+ %add = add i32 %b, %a
+ %add3 = add i32 %add, %c
+ %add5 = add i32 %add3, %d
+ ret i32 %add5
+}
+
+; rdar://9002648
+define i64 @test3(i64 %x) nounwind readnone ssp {
+entry:
+; CHECK: test3:
+; CHECK: leaq (%rdi,%rdi), %rax
+; CHECK-NOT: addq
+; CHECK-NEXT: ret
+  %0 = shl i64 %x, 1
+  ret i64 %0
+}
diff --git a/final/test/CodeGen/X86/twoaddr-pass-sink.ll b/final/test/CodeGen/X86/twoaddr-pass-sink.ll
new file mode 100644
index 00000000000..077fee07739
--- /dev/null
+++ b/final/test/CodeGen/X86/twoaddr-pass-sink.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& grep {Number of 3-address instructions sunk}
+
+define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind  {
+entry:
+	%tmp25 = bitcast <2 x i64> %a1 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	br label %bb
+bb:		; preds = %bb, %entry
+	%skiplist_addr.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%vYp_addr.0.rec = shl i32 %skiplist_addr.0.rec, 3		; <i32> [#uses=3]
+	%vDct_addr.0 = getelementptr <2 x i64>* %vDct, i32 %vYp_addr.0.rec		; <<2 x i64>*> [#uses=1]
+	%vYp_addr.0 = getelementptr <2 x i64>* %vYp, i32 %vYp_addr.0.rec		; <<2 x i64>*> [#uses=1]
+	%skiplist_addr.0 = getelementptr i8* %skiplist, i32 %skiplist_addr.0.rec		; <i8*> [#uses=1]
+	%vDct_addr.0.sum43 = or i32 %vYp_addr.0.rec, 1		; <i32> [#uses=1]
+	%tmp7 = getelementptr <2 x i64>* %vDct, i32 %vDct_addr.0.sum43		; <<2 x i64>*> [#uses=1]
+	%tmp8 = load <2 x i64>* %tmp7, align 16		; <<2 x i64>> [#uses=1]
+	%tmp11 = load <2 x i64>* %vDct_addr.0, align 16		; <<2 x i64>> [#uses=1]
+	%tmp13 = bitcast <2 x i64> %tmp8 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp15 = bitcast <2 x i64> %tmp11 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp16 = shufflevector <8 x i16> %tmp15, <8 x i16> %tmp13, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
+	%tmp26 = mul <8 x i16> %tmp25, %tmp16		; <<8 x i16>> [#uses=1]
+	%tmp27 = bitcast <8 x i16> %tmp26 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	store <2 x i64> %tmp27, <2 x i64>* %vYp_addr.0, align 16
+	%tmp37 = load i8* %skiplist_addr.0, align 1		; <i8> [#uses=1]
+	%tmp38 = icmp eq i8 %tmp37, 0		; <i1> [#uses=1]
+	%indvar.next = add i32 %skiplist_addr.0.rec, 1		; <i32> [#uses=1]
+	br i1 %tmp38, label %return, label %bb
+return:		; preds = %bb
+	ret void
+}
diff --git a/final/test/CodeGen/X86/twoaddr-remat.ll b/final/test/CodeGen/X86/twoaddr-remat.ll
new file mode 100644
index 00000000000..4940c78371d
--- /dev/null
+++ b/final/test/CodeGen/X86/twoaddr-remat.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86 | grep 59796 | count 3
+
+	%Args = type %Value*
+	%Exec = type opaque*
+	%Identifier = type opaque*
+	%JSFunction = type %Value (%Exec, %Scope, %Value, %Args)
+	%PropertyNameArray = type opaque*
+	%Scope = type opaque*
+	%Value = type opaque*
+
+declare i1 @X1(%Exec) readonly 
+
+declare %Value @X2(%Exec)
+
+declare i32 @X3(%Exec, %Value)
+
+declare %Value @X4(i32) readnone 
+
+define internal %Value @fast3bitlookup(%Exec %exec, %Scope %scope, %Value %this, %Args %args) nounwind {
+prologue:
+	%eh_check = tail call i1 @X1( %Exec %exec ) readonly 		; <i1> [#uses=1]
+	br i1 %eh_check, label %exception, label %no_exception
+
+exception:		; preds = %no_exception, %prologue
+	%rethrow_result = tail call %Value @X2( %Exec %exec )		; <%Value> [#uses=1]
+	ret %Value %rethrow_result
+
+no_exception:		; preds = %prologue
+	%args_intptr = bitcast %Args %args to i32*		; <i32*> [#uses=1]
+	%argc_val = load i32* %args_intptr		; <i32> [#uses=1]
+	%cmpParamArgc = icmp sgt i32 %argc_val, 0		; <i1> [#uses=1]
+	%arg_ptr = getelementptr %Args %args, i32 1		; <%Args> [#uses=1]
+	%arg_val = load %Args %arg_ptr		; <%Value> [#uses=1]
+	%ext_arg_val = select i1 %cmpParamArgc, %Value %arg_val, %Value inttoptr (i32 5 to %Value)		; <%Value> [#uses=1]
+	%toInt325 = tail call i32 @X3( %Exec %exec, %Value %ext_arg_val )		; <i32> [#uses=3]
+	%eh_check6 = tail call i1 @X1( %Exec %exec ) readonly 		; <i1> [#uses=1]
+	br i1 %eh_check6, label %exception, label %no_exception7
+
+no_exception7:		; preds = %no_exception
+	%shl_tmp_result = shl i32 %toInt325, 1		; <i32> [#uses=1]
+	%rhs_masked13 = and i32 %shl_tmp_result, 14		; <i32> [#uses=1]
+	%ashr_tmp_result = lshr i32 59796, %rhs_masked13		; <i32> [#uses=1]
+	%and_tmp_result15 = and i32 %ashr_tmp_result, 3		; <i32> [#uses=1]
+	%ashr_tmp_result3283 = lshr i32 %toInt325, 2		; <i32> [#uses=1]
+	%rhs_masked38 = and i32 %ashr_tmp_result3283, 14		; <i32> [#uses=1]
+	%ashr_tmp_result39 = lshr i32 59796, %rhs_masked38		; <i32> [#uses=1]
+	%and_tmp_result41 = and i32 %ashr_tmp_result39, 3		; <i32> [#uses=1]
+	%addconv = add i32 %and_tmp_result15, %and_tmp_result41		; <i32> [#uses=1]
+	%ashr_tmp_result6181 = lshr i32 %toInt325, 5		; <i32> [#uses=1]
+	%rhs_masked67 = and i32 %ashr_tmp_result6181, 6		; <i32> [#uses=1]
+	%ashr_tmp_result68 = lshr i32 59796, %rhs_masked67		; <i32> [#uses=1]
+	%and_tmp_result70 = and i32 %ashr_tmp_result68, 3		; <i32> [#uses=1]
+	%addconv82 = add i32 %addconv, %and_tmp_result70		; <i32> [#uses=3]
+	%rangetmp = add i32 %addconv82, 536870912		; <i32> [#uses=1]
+	%rangecmp = icmp ult i32 %rangetmp, 1073741824		; <i1> [#uses=1]
+	br i1 %rangecmp, label %NumberLiteralIntFast, label %NumberLiteralIntSlow
+
+NumberLiteralIntFast:		; preds = %no_exception7
+	%imm_shift = shl i32 %addconv82, 2		; <i32> [#uses=1]
+	%imm_or = or i32 %imm_shift, 3		; <i32> [#uses=1]
+	%imm_val = inttoptr i32 %imm_or to %Value		; <%Value> [#uses=1]
+	ret %Value %imm_val
+
+NumberLiteralIntSlow:		; preds = %no_exception7
+	%toVal = call %Value @X4( i32 %addconv82 )		; <%Value> [#uses=1]
+	ret %Value %toVal
+}
diff --git a/final/test/CodeGen/X86/uint64-to-float.ll b/final/test/CodeGen/X86/uint64-to-float.ll
new file mode 100644
index 00000000000..d9f753c7a88
--- /dev/null
+++ b/final/test/CodeGen/X86/uint64-to-float.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; Verify that we are using the efficient uitofp --> sitofp lowering illustrated
+; by the compiler_rt implementation of __floatundisf.
+; <rdar://problem/8493982>
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK: testq %rdi, %rdi
+; CHECK-NEXT: jns LBB0_2
+; CHECK: shrq
+; CHECK-NEXT: andq
+; CHECK-NEXT: orq
+; CHECK-NEXT: cvtsi2ss
+; CHECK: LBB0_2
+; CHECK-NEXT: cvtsi2ss
+define float @test(i64 %a) {
+entry:
+  %b = uitofp i64 %a to float
+  ret float %b
+}
diff --git a/final/test/CodeGen/X86/uint_to_fp-2.ll b/final/test/CodeGen/X86/uint_to_fp-2.ll
new file mode 100644
index 00000000000..da5105d8a4e
--- /dev/null
+++ b/final/test/CodeGen/X86/uint_to_fp-2.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 1
+; rdar://6504833
+
+define float @f(i32 %x) nounwind readnone {
+entry:
+	%0 = uitofp i32 %x to float
+	ret float %0
+}
diff --git a/final/test/CodeGen/X86/uint_to_fp.ll b/final/test/CodeGen/X86/uint_to_fp.ll
new file mode 100644
index 00000000000..41ee1947edc
--- /dev/null
+++ b/final/test/CodeGen/X86/uint_to_fp.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | not grep {sub.*esp}
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep cvtsi2ss
+; rdar://6034396
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @test(i32 %x, float* %y) nounwind  {
+entry:
+	lshr i32 %x, 23		; <i32>:0 [#uses=1]
+	uitofp i32 %0 to float		; <float>:1 [#uses=1]
+	store float %1, float* %y
+	ret void
+}
diff --git a/final/test/CodeGen/X86/umul-with-carry.ll b/final/test/CodeGen/X86/umul-with-carry.ll
new file mode 100644
index 00000000000..7416051693b
--- /dev/null
+++ b/final/test/CodeGen/X86/umul-with-carry.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 | grep {jc} | count 1
+; XFAIL: *
+
+; FIXME: umul-with-overflow not supported yet.
+
+@ok = internal constant [4 x i8] c"%d\0A\00"
+@no = internal constant [4 x i8] c"no\0A\00"
+
+define i1 @func(i32 %v1, i32 %v2) nounwind {
+entry:
+  %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2)
+  %sum = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  br i1 %obit, label %carry, label %normal
+
+normal:
+  %t1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
+  ret i1 true
+
+carry:
+  %t2 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @no, i32 0, i32 0) ) nounwind
+  ret i1 false
+}
+
+declare i32 @printf(i8*, ...) nounwind
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32)
diff --git a/final/test/CodeGen/X86/umul-with-overflow.ll b/final/test/CodeGen/X86/umul-with-overflow.ll
new file mode 100644
index 00000000000..c9976617a24
--- /dev/null
+++ b/final/test/CodeGen/X86/umul-with-overflow.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
+define i1 @a(i32 %x) zeroext nounwind {
+  %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
+  %obil = extractvalue {i32, i1} %res, 1
+  ret i1 %obil
+  
+; CHECK: a:
+; CHECK: mull
+; CHECK: seto %al
+; CHECK: movzbl	%al, %eax
+; CHECK: ret
+}
diff --git a/final/test/CodeGen/X86/unaligned-load.ll b/final/test/CodeGen/X86/unaligned-load.ll
new file mode 100644
index 00000000000..6a493c0594d
--- /dev/null
+++ b/final/test/CodeGen/X86/unaligned-load.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -mcpu=core2  -relocation-model=dynamic-no-pic --asm-verbose=0   | FileCheck -check-prefix=I386 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=core2  -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=CORE2 %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=corei7 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=COREI7 %s
+
+@.str1 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 8
+@.str3 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, 2'ND STRING\00", align 8
+
+define void @func() nounwind ssp {
+entry:
+  %String2Loc = alloca [31 x i8], align 1
+  br label %bb
+
+bb:
+  %String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
+  call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1)
+; I386: calll {{_?}}memcpy
+
+; CORE2: movabsq
+; CORE2: movabsq
+; CORE2: movabsq
+
+; COREI7: movups _.str3
+  br label %bb
+
+return:
+  ret void
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+
+; CORE2: .section
+; CORE2: .align  4
+; CORE2-NEXT: _.str1:
+; CORE2-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING"
+; CORE2: .align 4
+; CORE2-NEXT: _.str3:
diff --git a/final/test/CodeGen/X86/unknown-location.ll b/final/test/CodeGen/X86/unknown-location.ll
new file mode 100644
index 00000000000..09431b5564a
--- /dev/null
+++ b/final/test/CodeGen/X86/unknown-location.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -asm-verbose=false -mtriple=x86_64-apple-darwin10 -use-unknown-locations | FileCheck %s
+
+; The divide instruction does not have a debug location. CodeGen should
+; represent this in the debug information. This is done by setting line
+; and column to 0
+
+;      CHECK:         leal    (%rdi,%rsi), %eax
+; CHECK-NEXT:         .loc 1 0 0
+; CHECK-NEXT: Ltmp
+; CHECK-NEXT:         cltd
+; CHECK-NEXT:         idivl   %r8d
+; CHECK-NEXT:         .loc 1 4 3
+; CHECK-NEXT: Ltmp
+; CHECK-NEXT:         addl    %ecx, %eax
+; CHECK-NEXT:         ret
+; CHECK-NEXT: Ltmp
+
+define i32 @foo(i32 %w, i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %a = add  i32 %w, %x, !dbg !8
+  %b = sdiv i32 %a, %y
+  %c = add  i32 %b, %z, !dbg !8
+  ret i32 %c, !dbg !8
+}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"x", metadata !2, i32 1, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 1, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"test.c", metadata !"/dir", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 12, metadata !"test.c", metadata !".", metadata !"producer", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524299, metadata !1, i32 1, i32 30} ; [ DW_TAG_lexical_block ]
+!8 = metadata !{i32 4, i32 3, metadata !7, null}
diff --git a/final/test/CodeGen/X86/unreachable-loop-sinking.ll b/final/test/CodeGen/X86/unreachable-loop-sinking.ll
new file mode 100644
index 00000000000..35f69175a86
--- /dev/null
+++ b/final/test/CodeGen/X86/unreachable-loop-sinking.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s
+; PR6777
+
+; MachineSink shouldn't try to sink code in unreachable blocks, as it's
+; not worthwhile, and there are corner cases which it doesn't handle.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define double @fn1(i8* %arg, i64 %arg1) {
+Entry:
+  br i1 undef, label %Body, label %Exit
+
+Exit:                                             ; preds = %Brancher7, %Entry
+  ret double undef
+
+Body:                                             ; preds = %Entry
+  br i1 false, label %Brancher7, label %Body3
+
+Body3:                                            ; preds = %Body6, %Body3, %Body
+  br label %Body3
+
+Body6:                                            ; preds = %Brancher7
+  %tmp = fcmp oeq double 0xC04FBB2E40000000, undef ; <i1> [#uses=1]
+  br i1 %tmp, label %Body3, label %Brancher7
+
+Brancher7:                                        ; preds = %Body6, %Body
+  %tmp2 = icmp ult i32 undef, 10                  ; <i1> [#uses=1]
+  br i1 %tmp2, label %Body6, label %Exit
+}
diff --git a/final/test/CodeGen/X86/urem-i8-constant.ll b/final/test/CodeGen/X86/urem-i8-constant.ll
new file mode 100644
index 00000000000..e3cb69ca591
--- /dev/null
+++ b/final/test/CodeGen/X86/urem-i8-constant.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 | grep 111
+
+define i8 @foo(i8 %tmp325) {
+	%t546 = urem i8 %tmp325, 37
+	ret i8 %t546
+}
diff --git a/final/test/CodeGen/X86/use-add-flags.ll b/final/test/CodeGen/X86/use-add-flags.ll
new file mode 100644
index 00000000000..8fbbd397b8a
--- /dev/null
+++ b/final/test/CodeGen/X86/use-add-flags.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+
+; Reuse the flags value from the add instructions instead of emitting separate
+; testl instructions.
+
+; Use the flags on the add.
+
+; CHECK: test1:
+;      CHECK: addl    (%r[[A0:di|cx]]), {{%esi|%edx}}
+; CHECK-NEXT: movl    {{%edx|%r8d}}, %eax
+; CHECK-NEXT: cmovnsl {{%ecx|%r9d}}, %eax
+; CHECK-NEXT: ret
+
+define i32 @test1(i32* %x, i32 %y, i32 %a, i32 %b) nounwind {
+	%tmp2 = load i32* %x, align 4		; <i32> [#uses=1]
+	%tmp4 = add i32 %tmp2, %y		; <i32> [#uses=1]
+	%tmp5 = icmp slt i32 %tmp4, 0		; <i1> [#uses=1]
+	%tmp.0 = select i1 %tmp5, i32 %a, i32 %b		; <i32> [#uses=1]
+	ret i32 %tmp.0
+}
+
+declare void @foo(i32)
+
+; Don't use the flags result of the and here, since the and has no
+; other use. A simple test is better.
+
+; CHECK: test2:
+; CHECK: testb   $16, {{%dil|%cl}}
+
+define void @test2(i32 %x) nounwind {
+  %y = and i32 %x, 16
+  %t = icmp eq i32 %y, 0
+  br i1 %t, label %true, label %false
+true:
+  call void @foo(i32 %x)
+  ret void
+false:
+  ret void
+}
+
+; Do use the flags result of the and here, since the and has another use.
+
+; CHECK: test3:
+;      CHECK: andl    $16, %e[[A0]]
+; CHECK-NEXT: jne
+
+define void @test3(i32 %x) nounwind {
+  %y = and i32 %x, 16
+  %t = icmp eq i32 %y, 0
+  br i1 %t, label %true, label %false
+true:
+  call void @foo(i32 %y)
+  ret void
+false:
+  ret void
+}
diff --git a/final/test/CodeGen/X86/v-binop-widen.ll b/final/test/CodeGen/X86/v-binop-widen.ll
new file mode 100644
index 00000000000..3bee7007749
--- /dev/null
+++ b/final/test/CodeGen/X86/v-binop-widen.ll
@@ -0,0 +1,12 @@
+; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s
+; CHECK: divss
+; CHECK: divps
+; CHECK: divps
+
+%vec = type <9 x float>
+define %vec @vecdiv( %vec %p1, %vec %p2)
+{
+  %result = fdiv %vec %p1, %p2
+  ret %vec %result
+}
+
diff --git a/final/test/CodeGen/X86/v-binop-widen2.ll b/final/test/CodeGen/X86/v-binop-widen2.ll
new file mode 100644
index 00000000000..ae3f55a316f
--- /dev/null
+++ b/final/test/CodeGen/X86/v-binop-widen2.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=x86 -mattr=+sse < %s | FileCheck %s
+
+%vec = type <6 x float>
+; CHECK: divss
+; CHECK: divss
+; CHECK: divps
+define %vec @vecdiv( %vec %p1, %vec %p2)
+{
+  %result = fdiv %vec %p1, %p2
+  ret %vec %result
+}
+
+@a = constant %vec < float 2.0, float 4.0, float 8.0, float 16.0, float 32.0, float 64.0 >
+@b = constant %vec < float 2.0, float 2.0, float 2.0, float 2.0, float 2.0, float 2.0 >
+
+; Expected result: < 1.0, 2.0, 4.0, ..., 2.0^(n-1) >
+; main() returns 0 if the result is expected and 1 otherwise
+; to execute, use llvm-as < %s | lli
+define i32 @main() nounwind {
+entry:
+  %avec = load %vec* @a
+  %bvec = load %vec* @b
+
+  %res = call %vec @vecdiv(%vec %avec, %vec %bvec)
+  br label %loop
+loop:
+  %idx = phi i32 [0, %entry], [%nextInd, %looptail]
+  %expected = phi float [1.0, %entry], [%nextExpected, %looptail]
+  %elem = extractelement %vec %res, i32 %idx
+  %expcmp = fcmp oeq float %elem, %expected
+  br i1 %expcmp, label %looptail, label %return
+looptail:
+  %nextExpected = fmul float %expected, 2.0
+  %nextInd = add i32 %idx, 1
+  %cmp = icmp slt i32 %nextInd, 6
+  br i1 %cmp, label %loop, label %return
+return:
+  %retval = phi i32 [0, %looptail], [1, %loop]
+  ret i32 %retval
+}
diff --git a/final/test/CodeGen/X86/v2f32.ll b/final/test/CodeGen/X86/v2f32.ll
new file mode 100644
index 00000000000..6d14099b5c0
--- /dev/null
+++ b/final/test/CodeGen/X86/v2f32.ll
@@ -0,0 +1,115 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=penryn -asm-verbose=0 -o - | FileCheck %s -check-prefix=W64
+; RUN: llc < %s -mcpu=yonah -march=x86 -asm-verbose=0 -o - | FileCheck %s -check-prefix=X32
+
+; PR7518
+define void @test1(<2 x float> %Q, float *%P2) nounwind {
+  %a = extractelement <2 x float> %Q, i32 0
+  %b = extractelement <2 x float> %Q, i32 1
+  %c = fadd float %a, %b
+
+  store float %c, float* %P2
+  ret void
+; X64: test1:
+; X64-NEXT: pshufd	$1, %xmm0, %xmm1
+; X64-NEXT: addss	%xmm0, %xmm1
+; X64-NEXT: movss	%xmm1, (%rdi)
+; X64-NEXT: ret
+
+; W64: test1:
+; W64-NEXT: movdqa  (%rcx), %xmm0
+; W64-NEXT: pshufd  $1, %xmm0, %xmm1
+; W64-NEXT: addss   %xmm0, %xmm1
+; W64-NEXT: movss   %xmm1, (%rdx)
+; W64-NEXT: ret
+
+; X32: test1:
+; X32-NEXT: pshufd	$1, %xmm0, %xmm1
+; X32-NEXT: addss	%xmm0, %xmm1
+; X32-NEXT: movl	4(%esp), %eax
+; X32-NEXT: movss	%xmm1, (%eax)
+; X32-NEXT: ret
+}
+
+
+define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, <2 x float> *%P) nounwind {
+  %Z = fadd <2 x float> %Q, %R
+  ret <2 x float> %Z
+  
+; X64: test2:
+; X64-NEXT: addps	%xmm1, %xmm0
+; X64-NEXT: ret
+
+; W64: test2:
+; W64-NEXT: movaps  (%rcx), %xmm0
+; W64-NEXT: addps   (%rdx), %xmm0
+; W64-NEXT: ret
+
+; X32: test2:
+; X32:      addps	%xmm1, %xmm0
+}
+
+
+define <2 x float> @test3(<4 x float> %A) nounwind {
+	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+	%C = fadd <2 x float> %B, %B
+	ret <2 x float> %C
+; X64: test3:
+; X64-NEXT: addps	%xmm0, %xmm0
+; X64-NEXT: ret
+
+; W64: test3:
+; W64-NEXT: movaps  (%rcx), %xmm0
+; W64-NEXT: addps   %xmm0, %xmm0
+; W64-NEXT: ret
+
+; X32: test3:
+; X32-NEXT: addps	%xmm0, %xmm0
+; X32-NEXT: ret
+}
+
+define <2 x float> @test4(<2 x float> %A) nounwind {
+	%C = fadd <2 x float> %A, %A
+	ret <2 x float> %C
+; X64: test4:
+; X64-NEXT: addps	%xmm0, %xmm0
+; X64-NEXT: ret
+
+; W64: test4:
+; W64-NEXT: movaps  (%rcx), %xmm0
+; W64-NEXT: addps   %xmm0, %xmm0
+; W64-NEXT: ret
+
+; X32: test4:
+; X32-NEXT: addps	%xmm0, %xmm0
+; X32-NEXT: ret
+}
+
+define <4 x float> @test5(<4 x float> %A) nounwind {
+	%B = shufflevector <4 x float> %A, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+	%C = fadd <2 x float> %B, %B
+        br label %BB
+        
+BB:
+        %D = fadd <2 x float> %C, %C
+	%E = shufflevector <2 x float> %D, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+	ret <4 x float> %E
+        
+; X64: test5:
+; X64-NEXT: addps	%xmm0, %xmm0
+; X64-NEXT: addps	%xmm0, %xmm0
+; X64-NEXT: ret
+
+; W64: test5:
+; W64-NEXT: movaps  (%rcx), %xmm0
+; W64-NEXT: addps   %xmm0, %xmm0
+; W64-NEXT: addps   %xmm0, %xmm0
+; W64-NEXT: ret
+
+; X32: test5:
+; X32-NEXT: addps	%xmm0, %xmm0
+; X32-NEXT: addps	%xmm0, %xmm0
+; X32-NEXT: ret
+}
+
+
diff --git a/final/test/CodeGen/X86/v4f32-immediate.ll b/final/test/CodeGen/X86/v4f32-immediate.ll
new file mode 100644
index 00000000000..b5ebaa74bd0
--- /dev/null
+++ b/final/test/CodeGen/X86/v4f32-immediate.ll
@@ -0,0 +1,5 @@
+; RUN: llc < %s -march=x86 -mattr=+sse | grep movaps
+
+define <4 x float> @foo() {
+  ret <4 x float> <float 0x4009C9D0A0000000, float 0x4002666660000000, float 0x3FF3333340000000, float 0x3FB99999A0000000>
+}
diff --git a/final/test/CodeGen/X86/variable-sized-darwin-bzero.ll b/final/test/CodeGen/X86/variable-sized-darwin-bzero.ll
new file mode 100644
index 00000000000..4817db22c35
--- /dev/null
+++ b/final/test/CodeGen/X86/variable-sized-darwin-bzero.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin10 | grep __bzero
+
+declare void @llvm.memset.i64(i8*, i8, i64, i32)
+
+define void @foo(i8* %p, i64 %n) {
+  call void @llvm.memset.i64(i8* %p, i8 0, i64 %n, i32 4)
+  ret void
+}
diff --git a/final/test/CodeGen/X86/variadic-node-pic.ll b/final/test/CodeGen/X86/variadic-node-pic.ll
new file mode 100644
index 00000000000..1182a306abd
--- /dev/null
+++ b/final/test/CodeGen/X86/variadic-node-pic.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -relocation-model=pic -code-model=large
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+declare void @xscanf(i64) nounwind 
+
+define void @foo() nounwind  {
+	call void (i64)* @xscanf( i64 0 ) nounwind
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/vec-sign.ll b/final/test/CodeGen/X86/vec-sign.ll
new file mode 100644
index 00000000000..31b9c2eb4c7
--- /dev/null
+++ b/final/test/CodeGen/X86/vec-sign.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -march=x86 -mcpu=nehalem | FileCheck %s
+
+define <4 x i32> @signd(<4 x i32> %a, <4 x i32> %b) nounwind {
+entry:
+; CHECK: signd:
+; CHECK: psignd
+; CHECK-NOT: sub
+; CHECK: ret
+  %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+  %sub = sub nsw <4 x i32> zeroinitializer, %a
+  %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = and <4 x i32> %a, %0
+  %2 = and <4 x i32> %b.lobit, %sub
+  %cond = or <4 x i32> %1, %2
+  ret <4 x i32> %cond
+}
+
+define <4 x i32> @blendvb(<4 x i32> %b, <4 x i32> %a, <4 x i32> %c) nounwind {
+entry:
+; CHECK: blendvb:
+; CHECK: pblendvb
+; CHECK: ret
+  %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+  %sub = sub nsw <4 x i32> zeroinitializer, %a
+  %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = and <4 x i32> %c, %0
+  %2 = and <4 x i32> %a, %b.lobit
+  %cond = or <4 x i32> %1, %2
+  ret <4 x i32> %cond
+}
diff --git a/final/test/CodeGen/X86/vec-trunc-store.ll b/final/test/CodeGen/X86/vec-trunc-store.ll
new file mode 100644
index 00000000000..4d665f1843e
--- /dev/null
+++ b/final/test/CodeGen/X86/vec-trunc-store.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64
+
+define void @foo(<8 x i32>* %p) nounwind {
+  %t = load <8 x i32>* %p
+  %cti69 = trunc <8 x i32> %t to <8 x i16>     ; <<8 x i16>> [#uses=1]
+  store <8 x i16> %cti69, <8 x i16>* undef
+  ret void
+}
+
+define void @bar(<4 x i32>* %p) nounwind {
+  %t = load <4 x i32>* %p
+  %cti44 = trunc <4 x i32> %t to <4 x i16>     ; <<4 x i16>> [#uses=1]
+  store <4 x i16> %cti44, <4 x i16>* undef
+  ret void
+}
diff --git a/final/test/CodeGen/X86/vec_add.ll b/final/test/CodeGen/X86/vec_add.ll
new file mode 100644
index 00000000000..7c77d11a7b5
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_add.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define <2 x i64> @test(<2 x i64> %a, <2 x i64> %b) {
+entry:
+	%tmp9 = add <2 x i64> %b, %a		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp9
+}
diff --git a/final/test/CodeGen/X86/vec_align.ll b/final/test/CodeGen/X86/vec_align.ll
new file mode 100644
index 00000000000..e27311561b2
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_align.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mcpu=yonah -relocation-model=static | grep movaps | count 2
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+%f4 = type <4 x float>
+
+@G = external global { float,float,float,float}, align 16
+
+define %f4 @test1(float %W, float %X, float %Y, float %Z) nounwind {
+        %tmp = insertelement %f4 undef, float %W, i32 0
+        %tmp2 = insertelement %f4 %tmp, float %X, i32 1
+        %tmp4 = insertelement %f4 %tmp2, float %Y, i32 2
+        %tmp6 = insertelement %f4 %tmp4, float %Z, i32 3
+	ret %f4 %tmp6
+}
+
+define %f4 @test2() nounwind {
+	%Wp = getelementptr { float,float,float,float}* @G, i32 0, i32 0
+	%Xp = getelementptr { float,float,float,float}* @G, i32 0, i32 1
+	%Yp = getelementptr { float,float,float,float}* @G, i32 0, i32 2
+	%Zp = getelementptr { float,float,float,float}* @G, i32 0, i32 3
+	
+	%W = load float* %Wp
+	%X = load float* %Xp
+	%Y = load float* %Yp
+	%Z = load float* %Zp
+
+        %tmp = insertelement %f4 undef, float %W, i32 0
+        %tmp2 = insertelement %f4 %tmp, float %X, i32 1
+        %tmp4 = insertelement %f4 %tmp2, float %Y, i32 2
+        %tmp6 = insertelement %f4 %tmp4, float %Z, i32 3
+	ret %f4 %tmp6
+}
+
diff --git a/final/test/CodeGen/X86/vec_anyext.ll b/final/test/CodeGen/X86/vec_anyext.ll
new file mode 100644
index 00000000000..d2a4c7f60dd
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_anyext.ll
@@ -0,0 +1,77 @@
+; RUN: llc < %s -march=x86-64
+; PR 9267
+
+define<4 x i16> @func_16_32() {
+  %F = load <4 x i32>* undef
+  %G = trunc <4 x i32> %F to <4 x i16>
+  %H = load <4 x i32>* undef
+  %Y = trunc <4 x i32> %H to <4 x i16>
+  %T = add <4 x i16> %Y, %G
+  store <4 x i16>%T , <4 x i16>* undef
+  ret <4 x i16> %T
+}
+
+define<4 x i16> @func_16_64() {
+  %F = load <4 x i64>* undef
+  %G = trunc <4 x i64> %F to <4 x i16>
+  %H = load <4 x i64>* undef
+  %Y = trunc <4 x i64> %H to <4 x i16>
+  %T = xor <4 x i16> %Y, %G
+  store <4 x i16>%T , <4 x i16>* undef
+  ret <4 x i16> %T
+}
+
+define<4 x i32> @func_32_64() {
+  %F = load <4 x i64>* undef
+  %G = trunc <4 x i64> %F to <4 x i32>
+  %H = load <4 x i64>* undef
+  %Y = trunc <4 x i64> %H to <4 x i32>
+  %T = or <4 x i32> %Y, %G
+  ret <4 x i32> %T
+}
+
+define<4 x i8> @func_8_16() {
+  %F = load <4 x i16>* undef
+  %G = trunc <4 x i16> %F to <4 x i8>
+  %H = load <4 x i16>* undef
+  %Y = trunc <4 x i16> %H to <4 x i8>
+  %T = add <4 x i8> %Y, %G
+  ret <4 x i8> %T
+}
+
+define<4 x i8> @func_8_32() {
+  %F = load <4 x i32>* undef
+  %G = trunc <4 x i32> %F to <4 x i8>
+  %H = load <4 x i32>* undef
+  %Y = trunc <4 x i32> %H to <4 x i8>
+  %T = sub <4 x i8> %Y, %G
+  ret <4 x i8> %T
+}
+
+define<4 x i8> @func_8_64() {
+  %F = load <4 x i64>* undef
+  %G = trunc <4 x i64> %F to <4 x i8>
+  %H = load <4 x i64>* undef
+  %Y = trunc <4 x i64> %H to <4 x i8>
+  %T = add <4 x i8> %Y, %G
+  ret <4 x i8> %T
+}
+
+define<4 x i16> @const_16_32() {
+  %G = trunc <4 x i32> <i32 0, i32 3, i32 8, i32 7> to <4 x i16>
+  ret <4 x i16> %G
+}
+
+define<4 x i16> @const_16_64() {
+  %G = trunc <4 x i64> <i64 0, i64 3, i64 8, i64 7> to <4 x i16>
+  ret <4 x i16> %G
+}
+
+define void @bugOnTruncBitwidthReduce() nounwind {
+meh:
+  %0 = xor <4 x i64> zeroinitializer, zeroinitializer
+  %1 = trunc <4 x i64> %0 to <4 x i32>
+  %2 = lshr <4 x i32> %1, <i32 18, i32 18, i32 18, i32 18>
+  %3 = xor <4 x i32> %2, %1
+  ret void
+}
diff --git a/final/test/CodeGen/X86/vec_call.ll b/final/test/CodeGen/X86/vec_call.ll
new file mode 100644
index 00000000000..b3efc7b16b7
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_call.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN:   grep {subl.*60}
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8 | \
+; RUN:   grep {movaps.*32}
+
+
+define void @test() {
+        tail call void @xx( i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, <2 x i64> bitcast (<4 x i32> < i32 4, i32 3, i32 2, i32 1 > to <2 x i64>), <2 x i64> bitcast (<4 x i32> < i32 8, i32 7, i32 6, i32 5 > to <2 x i64>), <2 x i64> bitcast (<4 x i32> < i32 6, i32 4, i32 2, i32 0 > to <2 x i64>), <2 x i64> bitcast (<4 x i32> < i32 8, i32 4, i32 2, i32 1 > to <2 x i64>), <2 x i64> bitcast (<4 x i32> < i32 0, i32 1, i32 3, i32 9 > to <2 x i64>) )
+        ret void
+}
+
+declare void @xx(i32, i32, i32, i32, i32, i32, i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>)
+
diff --git a/final/test/CodeGen/X86/vec_cast.ll b/final/test/CodeGen/X86/vec_cast.ll
new file mode 100644
index 00000000000..90d39d0b46a
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_cast.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core2
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=core2
+
+define <8 x i32> @a(<8 x i16> %a) nounwind {
+  %c = sext <8 x i16> %a to <8 x i32>
+  ret <8 x i32> %c
+}
+
+;define <3 x i32> @b(<3 x i16> %a) nounwind {
+;  %c = sext <3 x i16> %a to <3 x i32>
+;  ret <3 x i32> %c
+;}
+
+define <1 x i32> @c(<1 x i16> %a) nounwind {
+  %c = sext <1 x i16> %a to <1 x i32>
+  ret <1 x i32> %c
+}
+
+define <8 x i32> @d(<8 x i16> %a) nounwind {
+  %c = zext <8 x i16> %a to <8 x i32>
+  ret <8 x i32> %c
+}
+
+;define <3 x i32> @e(<3 x i16> %a) nounwind {
+;  %c = zext <3 x i16> %a to <3 x i32>
+;  ret <3 x i32> %c
+;}
+
+define <1 x i32> @f(<1 x i16> %a) nounwind {
+  %c = zext <1 x i16> %a to <1 x i32>
+  ret <1 x i32> %c
+}
+
+define <8 x i16> @g(<8 x i32> %a) nounwind {
+  %c = trunc <8 x i32> %a to <8 x i16>
+  ret <8 x i16> %c
+}
+
+define <3 x i16> @h(<3 x i32> %a) nounwind {
+  %c = trunc <3 x i32> %a to <3 x i16>
+  ret <3 x i16> %c
+}
+
+define <1 x i16> @i(<1 x i32> %a) nounwind {
+  %c = trunc <1 x i32> %a to <1 x i16>
+  ret <1 x i16> %c
+}
+
+; PR6438
+define void @__OpenCL_math_kernel4_kernel() nounwind {
+  %tmp12.i = and <4 x i32> zeroinitializer, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040> ; <<4 x i32>> [#uses=1]
+  %cmp13.i = icmp eq <4 x i32> %tmp12.i, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040> ; <<4 x i1>> [#uses=2]
+  %cmp.ext14.i = sext <4 x i1> %cmp13.i to <4 x i32> ; <<4 x i32>> [#uses=0]
+  %tmp2110.i = and <4 x i1> %cmp13.i, zeroinitializer ; <<4 x i1>> [#uses=0]
+  ret void
+}
diff --git a/final/test/CodeGen/X86/vec_clear.ll b/final/test/CodeGen/X86/vec_clear.ll
new file mode 100644
index 00000000000..166d4363ec8
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_clear.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t
+; RUN: not grep and %t
+; RUN: not grep psrldq %t
+; RUN: grep xorps %t
+
+define <4 x float> @test(<4 x float>* %v1) nounwind {
+        %tmp = load <4 x float>* %v1            ; <<4 x float>> [#uses=1]
+        %tmp15 = bitcast <4 x float> %tmp to <2 x i64>          ; <<2 x i64>> [#uses=1]
+        %tmp24 = and <2 x i64> %tmp15, bitcast (<4 x i32> < i32 0, i32 0, i32 -1, i32 -1 > to <2 x i64>)              ; <<2 x i64>> [#uses=1]
+        %tmp31 = bitcast <2 x i64> %tmp24 to <4 x float>                ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp31
+}
+
diff --git a/final/test/CodeGen/X86/vec_compare-2.ll b/final/test/CodeGen/X86/vec_compare-2.ll
new file mode 100644
index 00000000000..04bb7254fb0
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_compare-2.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mcpu=penryn | FileCheck %s
+
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
+entry:
+; CHECK-NOT: set
+; CHECK: pcmpgt
+; CHECK: blendvps
+  %shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
+  %cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %sub322.i = sub <4 x i32> %shr.i, zeroinitializer ; <<4 x i32>> [#uses=1]
+  %cmp323.x = icmp slt <4 x i32> zeroinitializer, %sub322.i ; <<4 x i1>> [#uses=1]
+  %cmp323.i = sext <4 x i1> %cmp323.x to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %or.i = or <4 x i32> %cmp318.i, %cmp323.i       ; <<4 x i32>> [#uses=1]
+  %tmp10.i83.i = bitcast <4 x i32> %or.i to <4 x float> ; <<4 x float>> [#uses=1]
+  %0 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> undef, <4 x float> undef, <4 x float> %tmp10.i83.i) nounwind ; <<4 x float>> [#uses=1]
+  %conv.i.i15.i = bitcast <4 x float> %0 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %swz.i.i28.i = shufflevector <4 x i32> %conv.i.i15.i, <4 x i32> undef, <2 x i32> <i32 0, i32 1> ; <<2 x i32>> [#uses=1]
+  %tmp6.i29.i = bitcast <2 x i32> %swz.i.i28.i to <4 x i16> ; <<4 x i16>> [#uses=1]
+  %swz.i30.i = shufflevector <4 x i16> %tmp6.i29.i, <4 x i16> undef, <2 x i32> <i32 0, i32 1> ; <<2 x i16>> [#uses=1]
+  store <2 x i16> %swz.i30.i, <2 x i16>* undef
+  unreachable
+  ret void
+}
diff --git a/final/test/CodeGen/X86/vec_compare.ll b/final/test/CodeGen/X86/vec_compare.ll
new file mode 100644
index 00000000000..39c9b770d5f
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_compare.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | FileCheck %s
+
+
+define <4 x i32> @test1(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test1:
+; CHECK: pcmpgtd
+; CHECK: ret
+
+	%C = icmp sgt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test2(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test2:
+; CHECK: pcmp
+; CHECK: pcmp
+; CHECK: pxor
+; CHECK: ret
+	%C = icmp sge <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test3(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test3:
+; CHECK: pcmpgtd
+; CHECK: movdqa
+; CHECK: ret
+	%C = icmp slt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
+
+define <4 x i32> @test4(<4 x i32> %A, <4 x i32> %B) nounwind {
+; CHECK: test4:
+; CHECK: movdqa
+; CHECK: pcmpgtd
+; CHECK: ret
+	%C = icmp ugt <4 x i32> %A, %B
+        %D = sext <4 x i1> %C to <4 x i32>
+	ret <4 x i32> %D
+}
diff --git a/final/test/CodeGen/X86/vec_ctbits.ll b/final/test/CodeGen/X86/vec_ctbits.ll
new file mode 100644
index 00000000000..f0158d643c1
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_ctbits.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64
+
+declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>)
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>)
+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>)
+
+define <2 x i64> @footz(<2 x i64> %a) nounwind {
+  %c = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a)
+  ret <2 x i64> %c
+}
+define <2 x i64> @foolz(<2 x i64> %a) nounwind {
+  %c = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a)
+  ret <2 x i64> %c
+}
+define <2 x i64> @foopop(<2 x i64> %a) nounwind {
+  %c = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
+  ret <2 x i64> %c
+}
diff --git a/final/test/CodeGen/X86/vec_ext_inreg.ll b/final/test/CodeGen/X86/vec_ext_inreg.ll
new file mode 100644
index 00000000000..02b16a79f4a
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_ext_inreg.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=x86-64 
+
+define <8 x i32> @a(<8 x i32> %a) nounwind {
+  %b = trunc <8 x i32> %a to <8 x i16>
+  %c = sext <8 x i16> %b to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <3 x i32> @b(<3 x i32> %a) nounwind {
+  %b = trunc <3 x i32> %a to <3 x i16>
+  %c = sext <3 x i16> %b to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <1 x i32> @c(<1 x i32> %a) nounwind {
+  %b = trunc <1 x i32> %a to <1 x i16>
+  %c = sext <1 x i16> %b to <1 x i32>
+  ret <1 x i32> %c
+}
+
+define <8 x i32> @d(<8 x i32> %a) nounwind {
+  %b = trunc <8 x i32> %a to <8 x i16>
+  %c = zext <8 x i16> %b to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <3 x i32> @e(<3 x i32> %a) nounwind {
+  %b = trunc <3 x i32> %a to <3 x i16>
+  %c = zext <3 x i16> %b to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <1 x i32> @f(<1 x i32> %a) nounwind {
+  %b = trunc <1 x i32> %a to <1 x i16>
+  %c = zext <1 x i16> %b to <1 x i32>
+  ret <1 x i32> %c
+}
diff --git a/final/test/CodeGen/X86/vec_extract-sse4.ll b/final/test/CodeGen/X86/vec_extract-sse4.ll
new file mode 100644
index 00000000000..dab5dd144f0
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_extract-sse4.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
+; RUN: grep extractps   %t | count 1
+; RUN: grep pextrd      %t | count 1
+; RUN: not grep pshufd  %t
+; RUN: not grep movss   %t
+
+define void @t1(float* %R, <4 x float>* %P1) nounwind {
+	%X = load <4 x float>* %P1
+	%tmp = extractelement <4 x float> %X, i32 3
+	store float %tmp, float* %R
+	ret void
+}
+
+define float @t2(<4 x float>* %P1) nounwind {
+	%X = load <4 x float>* %P1
+	%tmp = extractelement <4 x float> %X, i32 2
+	ret float %tmp
+}
+
+define void @t3(i32* %R, <4 x i32>* %P1) nounwind {
+	%X = load <4 x i32>* %P1
+	%tmp = extractelement <4 x i32> %X, i32 3
+	store i32 %tmp, i32* %R
+	ret void
+}
+
+define i32 @t4(<4 x i32>* %P1) nounwind {
+	%X = load <4 x i32>* %P1
+	%tmp = extractelement <4 x i32> %X, i32 3
+	ret i32 %tmp
+}
diff --git a/final/test/CodeGen/X86/vec_extract.ll b/final/test/CodeGen/X86/vec_extract.ll
new file mode 100644
index 00000000000..b0137304e8a
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_extract.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 -o %t
+; RUN: grep movss    %t | count 3
+; RUN: grep movhlps  %t | count 1
+; RUN: grep pshufd   %t | count 1
+; RUN: grep unpckhpd %t | count 1
+
+define void @test1(<4 x float>* %F, float* %f) nounwind {
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp7 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp2 = extractelement <4 x float> %tmp7, i32 0		; <float> [#uses=1]
+	store float %tmp2, float* %f
+	ret void
+}
+
+define float @test2(<4 x float>* %F, float* %f) nounwind {
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp7 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp2 = extractelement <4 x float> %tmp7, i32 2		; <float> [#uses=1]
+	ret float %tmp2
+}
+
+define void @test3(float* %R, <4 x float>* %P1) nounwind {
+	%X = load <4 x float>* %P1		; <<4 x float>> [#uses=1]
+	%tmp = extractelement <4 x float> %X, i32 3		; <float> [#uses=1]
+	store float %tmp, float* %R
+	ret void
+}
+
+define double @test4(double %A) nounwind {
+	%tmp1 = call <2 x double> @foo( )		; <<2 x double>> [#uses=1]
+	%tmp2 = extractelement <2 x double> %tmp1, i32 1		; <double> [#uses=1]
+	%tmp3 = fadd double %tmp2, %A		; <double> [#uses=1]
+	ret double %tmp3
+}
+
+declare <2 x double> @foo()
diff --git a/final/test/CodeGen/X86/vec_fneg.ll b/final/test/CodeGen/X86/vec_fneg.ll
new file mode 100644
index 00000000000..d49c70e5639
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_fneg.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define <4 x float> @t1(<4 x float> %Q) {
+        %tmp15 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %Q
+	ret <4 x float> %tmp15
+}
+
+define <4 x float> @t2(<4 x float> %Q) {
+        %tmp15 = fsub <4 x float> zeroinitializer, %Q
+	ret <4 x float> %tmp15
+}
diff --git a/final/test/CodeGen/X86/vec_i64.ll b/final/test/CodeGen/X86/vec_i64.ll
new file mode 100644
index 00000000000..462e16e1302
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_i64.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: grep movq %t | count 2
+
+; Used movq to load i64 into a v2i64 when the top i64 is 0.
+
+define <2 x i64> @foo1(i64* %y) nounwind  {
+entry:
+	%tmp1 = load i64* %y, align 8		; <i64> [#uses=1]
+	%s2v = insertelement <2 x i64> undef, i64 %tmp1, i32 0
+	%loadl = shufflevector <2 x i64> zeroinitializer, <2 x i64> %s2v, <2 x i32> <i32 2, i32 1>
+	ret <2 x i64> %loadl
+}
+
+
+define <4 x float> @foo2(i64* %p) nounwind {
+entry:
+	%load = load i64* %p
+	%s2v = insertelement <2 x i64> undef, i64 %load, i32 0
+	%loadl = shufflevector <2 x i64> zeroinitializer, <2 x i64> %s2v, <2 x i32> <i32 2, i32 1>
+	%0 = bitcast <2 x i64> %loadl to <4 x float>
+	ret <4 x float> %0
+}
diff --git a/final/test/CodeGen/X86/vec_ins_extract-1.ll b/final/test/CodeGen/X86/vec_ins_extract-1.ll
new file mode 100644
index 00000000000..29511934af0
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_ins_extract-1.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep {(%esp,%eax,4)} | count 4
+
+; Inserts and extracts with variable indices must be lowered
+; to memory accesses.
+
+define i32 @t0(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
+  %t13 = insertelement <4 x i32> %t8, i32 76, i32 %t7
+  %t9 = extractelement <4 x i32> %t13, i32 0
+  ret i32 %t9
+}
+define i32 @t1(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
+  %t13 = insertelement <4 x i32> %t8, i32 76, i32 0
+  %t9 = extractelement <4 x i32> %t13, i32 %t7
+  ret i32 %t9
+}
+define <4 x i32> @t2(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
+  %t9 = extractelement <4 x i32> %t8, i32 %t7
+  %t13 = insertelement <4 x i32> %t8, i32 %t9, i32 0
+  ret <4 x i32> %t13
+}
+define <4 x i32> @t3(i32 inreg %t7, <4 x i32> inreg %t8) nounwind {
+  %t9 = extractelement <4 x i32> %t8, i32 0
+  %t13 = insertelement <4 x i32> %t8, i32 %t9, i32 %t7
+  ret <4 x i32> %t13
+}
diff --git a/final/test/CodeGen/X86/vec_ins_extract.ll b/final/test/CodeGen/X86/vec_ins_extract.ll
new file mode 100644
index 00000000000..daf222e395b
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_ins_extract.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -scalarrepl -instcombine | \
+; RUN:   llc -march=x86 -mcpu=yonah | not grep sub.*esp
+
+; This checks that various insert/extract idiom work without going to the
+; stack.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+
+define void @test(<4 x float>* %F, float %f) {
+entry:
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	%tmp10 = insertelement <4 x float> %tmp3, float %f, i32 0		; <<4 x float>> [#uses=2]
+	%tmp6 = fadd <4 x float> %tmp10, %tmp10		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp6, <4 x float>* %F
+	ret void
+}
+
+define void @test2(<4 x float>* %F, float %f) {
+entry:
+	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=3]
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp3, <4 x float>* %G
+	%tmp.upgrd.1 = getelementptr <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
+	store float %f, float* %tmp.upgrd.1
+	%tmp4 = load <4 x float>* %G		; <<4 x float>> [#uses=2]
+	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp6, <4 x float>* %F
+	ret void
+}
+
+define void @test3(<4 x float>* %F, float* %f) {
+entry:
+	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=2]
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp3, <4 x float>* %G
+	%tmp.upgrd.2 = getelementptr <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
+	%tmp.upgrd.3 = load float* %tmp.upgrd.2		; <float> [#uses=1]
+	store float %tmp.upgrd.3, float* %f
+	ret void
+}
+
+define void @test4(<4 x float>* %F, float* %f) {
+entry:
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp5.lhs = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
+	%tmp5.rhs = extractelement <4 x float> %tmp, i32 0		; <float> [#uses=1]
+	%tmp5 = fadd float %tmp5.lhs, %tmp5.rhs		; <float> [#uses=1]
+	store float %tmp5, float* %f
+	ret void
+}
diff --git a/final/test/CodeGen/X86/vec_insert-2.ll b/final/test/CodeGen/X86/vec_insert-2.ll
new file mode 100644
index 00000000000..b08044bb869
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_insert-2.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep {\$36,} | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep shufps | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep pinsrw | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movhpd | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep unpcklpd | count 1
+
+define <4 x float> @t1(float %s, <4 x float> %tmp) nounwind {
+        %tmp1 = insertelement <4 x float> %tmp, float %s, i32 3
+        ret <4 x float> %tmp1
+}
+
+define <4 x i32> @t2(i32 %s, <4 x i32> %tmp) nounwind {
+        %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 3
+        ret <4 x i32> %tmp1
+}
+
+define <2 x double> @t3(double %s, <2 x double> %tmp) nounwind {
+        %tmp1 = insertelement <2 x double> %tmp, double %s, i32 1
+        ret <2 x double> %tmp1
+}
+
+define <8 x i16> @t4(i16 %s, <8 x i16> %tmp) nounwind {
+        %tmp1 = insertelement <8 x i16> %tmp, i16 %s, i32 5
+        ret <8 x i16> %tmp1
+}
diff --git a/final/test/CodeGen/X86/vec_insert-3.ll b/final/test/CodeGen/X86/vec_insert-3.ll
new file mode 100644
index 00000000000..a18cd86489c
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_insert-3.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep punpcklqdq | count 1
+
+define <2 x i64> @t1(i64 %s, <2 x i64> %tmp) nounwind {
+        %tmp1 = insertelement <2 x i64> %tmp, i64 %s, i32 1
+        ret <2 x i64> %tmp1
+}
diff --git a/final/test/CodeGen/X86/vec_insert-4.ll b/final/test/CodeGen/X86/vec_insert-4.ll
new file mode 100644
index 00000000000..2c31e56b4af
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_insert-4.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah | grep 1084227584 | count 1
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9.2.2"
+
+define <8 x float> @f(<8 x float> %a, i32 %b) nounwind  {
+entry:
+	%vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b		; <<4 x float>> [#uses=1]
+	ret <8 x float> %vecins
+}
diff --git a/final/test/CodeGen/X86/vec_insert-5.ll b/final/test/CodeGen/X86/vec_insert-5.ll
new file mode 100644
index 00000000000..471cc1611fc
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_insert-5.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
+; RUN: grep shll %t | grep 12
+; RUN: grep pslldq %t | grep 12
+; RUN: grep psrldq %t | grep 8
+; RUN: grep psrldq %t | grep 12
+; There are no MMX operations in @t1
+
+define void  @t1(i32 %a, x86_mmx* %P) nounwind {
+       %tmp12 = shl i32 %a, 12
+       %tmp21 = insertelement <2 x i32> undef, i32 %tmp12, i32 1
+       %tmp22 = insertelement <2 x i32> %tmp21, i32 0, i32 0
+       %tmp23 = bitcast <2 x i32> %tmp22 to x86_mmx
+       store x86_mmx %tmp23, x86_mmx* %P
+       ret void
+}
+
+define <4 x float> @t2(<4 x float>* %P) nounwind {
+        %tmp1 = load <4 x float>* %P
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
+        ret <4 x float> %tmp2
+}
+
+define <4 x float> @t3(<4 x float>* %P) nounwind {
+        %tmp1 = load <4 x float>* %P
+        %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> zeroinitializer, <4 x i32> < i32 2, i32 3, i32 4, i32 4 >
+        ret <4 x float> %tmp2
+}
+
+define <4 x float> @t4(<4 x float>* %P) nounwind {
+        %tmp1 = load <4 x float>* %P
+        %tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 7, i32 0, i32 0, i32 0 >
+        ret <4 x float> %tmp2
+}
diff --git a/final/test/CodeGen/X86/vec_insert-6.ll b/final/test/CodeGen/X86/vec_insert-6.ll
new file mode 100644
index 00000000000..de3b36ff126
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_insert-6.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pslldq
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -o /dev/null -stats -info-output-file - | grep asm-printer | grep 6
+
+define <4 x float> @t3(<4 x float>* %P) nounwind  {
+	%tmp1 = load <4 x float>* %P
+	%tmp2 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp1, <4 x i32> < i32 4, i32 4, i32 4, i32 0 >
+	ret <4 x float> %tmp2
+}
diff --git a/final/test/CodeGen/X86/vec_insert-7.ll b/final/test/CodeGen/X86/vec_insert-7.ll
new file mode 100644
index 00000000000..268b5c4bf97
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_insert-7.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse42 -mtriple=i686-apple-darwin9 | FileCheck %s
+; MMX insertelement is not available; these are promoted to XMM.
+; (Without SSE they are split to two ints, and the code is much better.)
+
+define x86_mmx @mmx_movzl(x86_mmx %x) nounwind  {
+entry:
+; CHECK: mmx_movzl
+; CHECK: pinsrd
+; CHECK: pinsrd
+        %tmp = bitcast x86_mmx %x to <2 x i32> 
+	%tmp3 = insertelement <2 x i32> %tmp, i32 32, i32 0		; <<2 x i32>> [#uses=1]
+	%tmp8 = insertelement <2 x i32> %tmp3, i32 0, i32 1		; <<2 x i32>> [#uses=1]
+        %tmp9 = bitcast <2 x i32> %tmp8 to x86_mmx
+	ret x86_mmx %tmp9
+}
diff --git a/final/test/CodeGen/X86/vec_insert-8.ll b/final/test/CodeGen/X86/vec_insert-8.ll
new file mode 100644
index 00000000000..650951cc9e5
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_insert-8.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 -o %t
+
+; tests variable insert and extract of a 4 x i32
+
+define <4 x i32> @var_insert(<4 x i32> %x, i32 %val, i32 %idx) nounwind  {
+entry:
+	%tmp3 = insertelement <4 x i32> %x, i32 %val, i32 %idx		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp3
+}
+
+define i32 @var_extract(<4 x i32> %x, i32 %idx) nounwind  {
+entry:
+	%tmp3 = extractelement <4 x i32> %x, i32 %idx		; <<i32>> [#uses=1]
+	ret i32 %tmp3
+}
diff --git a/final/test/CodeGen/X86/vec_insert-9.ll b/final/test/CodeGen/X86/vec_insert-9.ll
new file mode 100644
index 00000000000..e5a7ccc5ef9
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_insert-9.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 > %t
+; RUN: grep pinsrd %t | count 1
+
+define <4 x i32> @var_insert2(<4 x i32> %x, i32 %val, i32 %idx) nounwind  {
+entry:
+	%tmp3 = insertelement <4 x i32> undef, i32 %val, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp4 = insertelement <4 x i32> %tmp3, i32 %idx, i32 3		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp4
+}
diff --git a/final/test/CodeGen/X86/vec_insert.ll b/final/test/CodeGen/X86/vec_insert.ll
new file mode 100644
index 00000000000..4e5d445ff62
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_insert.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movss | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | not grep pinsrw
+
+define void @test(<4 x float>* %F, i32 %I) nounwind {
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]
+	%f = sitofp i32 %I to float		; <float> [#uses=1]
+	%tmp1 = insertelement <4 x float> %tmp, float %f, i32 0		; <<4 x float>> [#uses=2]
+	%tmp18 = fadd <4 x float> %tmp1, %tmp1		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp18, <4 x float>* %F
+	ret void
+}
+
+define void @test2(<4 x float>* %F, i32 %I, float %g) nounwind {
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=1]
+	%f = sitofp i32 %I to float		; <float> [#uses=1]
+	%tmp1 = insertelement <4 x float> %tmp, float %f, i32 2		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp1, <4 x float>* %F
+	ret void
+}
diff --git a/final/test/CodeGen/X86/vec_loadsingles.ll b/final/test/CodeGen/X86/vec_loadsingles.ll
new file mode 100644
index 00000000000..8812c4f820c
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_loadsingles.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+
+define <4 x float> @a(<4 x float> %a, float* nocapture %p) nounwind readonly {
+entry:
+	%tmp1 = load float* %p
+	%vecins = insertelement <4 x float> undef, float %tmp1, i32 0
+	%add.ptr = getelementptr float* %p, i32 1
+	%tmp5 = load float* %add.ptr
+	%vecins7 = insertelement <4 x float> %vecins, float %tmp5, i32 1
+	ret <4 x float> %vecins7
+}
+
diff --git a/final/test/CodeGen/X86/vec_logical.ll b/final/test/CodeGen/X86/vec_logical.ll
new file mode 100644
index 00000000000..1dc0b163aeb
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_logical.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
+; RUN: grep xorps %t | count 2
+; RUN: grep andnps %t
+; RUN: grep movaps %t | count 2
+
+define void @t(<4 x float> %A) {
+	%tmp1277 = fsub <4 x float> < float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00 >, %A
+	store <4 x float> %tmp1277, <4 x float>* null
+	ret void
+}
+
+define <4 x float> @t1(<4 x float> %a, <4 x float> %b) {
+entry:
+	%tmp9 = bitcast <4 x float> %a to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp10 = bitcast <4 x float> %b to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp11 = xor <4 x i32> %tmp9, %tmp10		; <<4 x i32>> [#uses=1]
+	%tmp13 = bitcast <4 x i32> %tmp11 to <4 x float>		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp13
+}
+
+define <2 x double> @t2(<2 x double> %a, <2 x double> %b) {
+entry:
+	%tmp9 = bitcast <2 x double> %a to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp10 = bitcast <2 x double> %b to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp11 = and <2 x i64> %tmp9, %tmp10		; <<2 x i64>> [#uses=1]
+	%tmp13 = bitcast <2 x i64> %tmp11 to <2 x double>		; <<2 x double>> [#uses=1]
+	ret <2 x double> %tmp13
+}
+
+define void @t3(<4 x float> %a, <4 x float> %b, <4 x float>* %c, <4 x float>* %d) {
+entry:
+	%tmp3 = load <4 x float>* %c		; <<4 x float>> [#uses=1]
+	%tmp11 = bitcast <4 x float> %a to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp12 = bitcast <4 x float> %b to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp13 = xor <4 x i32> %tmp11, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
+	%tmp14 = and <4 x i32> %tmp12, %tmp13		; <<4 x i32>> [#uses=1]
+	%tmp27 = bitcast <4 x float> %tmp3 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp28 = or <4 x i32> %tmp14, %tmp27		; <<4 x i32>> [#uses=1]
+	%tmp30 = bitcast <4 x i32> %tmp28 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp30, <4 x float>* %d
+	ret void
+}
diff --git a/final/test/CodeGen/X86/vec_return.ll b/final/test/CodeGen/X86/vec_return.ll
new file mode 100644
index 00000000000..676be9b7179
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_return.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 > %t
+; RUN: grep pxor %t | count 1
+; RUN: grep movaps %t | count 1
+; RUN: not grep shuf %t
+
+define <2 x double> @test() {
+	ret <2 x double> zeroinitializer
+}
+
+define <4 x i32> @test2() nounwind  {
+	ret <4 x i32> < i32 0, i32 0, i32 1, i32 0 >
+}
diff --git a/final/test/CodeGen/X86/vec_set-2.ll b/final/test/CodeGen/X86/vec_set-2.ll
new file mode 100644
index 00000000000..a8f1187084d
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-2.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 1
+
+define <4 x float> @test1(float %a) nounwind {
+	%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0		; <<4 x float>> [#uses=1]
+	%tmp5 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp7 = insertelement <4 x float> %tmp6, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp7
+}
+
+define <2 x i64> @test(i32 %a) nounwind {
+	%tmp = insertelement <4 x i32> zeroinitializer, i32 %a, i32 0		; <<8 x i16>> [#uses=1]
+	%tmp6 = insertelement <4 x i32> %tmp, i32 0, i32 1		; <<8 x i32>> [#uses=1]
+	%tmp8 = insertelement <4 x i32> %tmp6, i32 0, i32 2		; <<8 x i32>> [#uses=1]
+	%tmp10 = insertelement <4 x i32> %tmp8, i32 0, i32 3		; <<8 x i32>> [#uses=1]
+	%tmp19 = bitcast <4 x i32> %tmp10 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp19
+}
diff --git a/final/test/CodeGen/X86/vec_set-3.ll b/final/test/CodeGen/X86/vec_set-3.ll
new file mode 100644
index 00000000000..ada17e0092a
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-3.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: grep pshufd %t | count 2
+
+define <4 x float> @test(float %a) nounwind {
+        %tmp = insertelement <4 x float> zeroinitializer, float %a, i32 1               ; <<4 x float>> [#uses=1]
+        %tmp5 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 2               ; <<4 x float>> [#uses=1]
+        %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3              ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp6
+}
+
+define <2 x i64> @test2(i32 %a) nounwind {
+        %tmp7 = insertelement <4 x i32> zeroinitializer, i32 %a, i32 2          ; <<4 x i32>> [#uses=1]
+        %tmp9 = insertelement <4 x i32> %tmp7, i32 0, i32 3             ; <<4 x i32>> [#uses=1]
+        %tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64>           ; <<2 x i64>> [#uses=1]
+        ret <2 x i64> %tmp10
+}
+
diff --git a/final/test/CodeGen/X86/vec_set-4.ll b/final/test/CodeGen/X86/vec_set-4.ll
new file mode 100644
index 00000000000..332c8b70760
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-4.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pinsrw | count 2
+
+define <2 x i64> @test(i16 %a) nounwind {
+entry:
+	%tmp10 = insertelement <8 x i16> zeroinitializer, i16 %a, i32 3		; <<8 x i16>> [#uses=1]
+	%tmp12 = insertelement <8 x i16> %tmp10, i16 0, i32 4		; <<8 x i16>> [#uses=1]
+	%tmp14 = insertelement <8 x i16> %tmp12, i16 0, i32 5		; <<8 x i16>> [#uses=1]
+	%tmp16 = insertelement <8 x i16> %tmp14, i16 0, i32 6		; <<8 x i16>> [#uses=1]
+	%tmp18 = insertelement <8 x i16> %tmp16, i16 0, i32 7		; <<8 x i16>> [#uses=1]
+	%tmp19 = bitcast <8 x i16> %tmp18 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp19
+}
+
+define <2 x i64> @test2(i8 %a) nounwind {
+entry:
+	%tmp24 = insertelement <16 x i8> zeroinitializer, i8 %a, i32 10		; <<16 x i8>> [#uses=1]
+	%tmp26 = insertelement <16 x i8> %tmp24, i8 0, i32 11		; <<16 x i8>> [#uses=1]
+	%tmp28 = insertelement <16 x i8> %tmp26, i8 0, i32 12		; <<16 x i8>> [#uses=1]
+	%tmp30 = insertelement <16 x i8> %tmp28, i8 0, i32 13		; <<16 x i8>> [#uses=1]
+	%tmp32 = insertelement <16 x i8> %tmp30, i8 0, i32 14		; <<16 x i8>> [#uses=1]
+	%tmp34 = insertelement <16 x i8> %tmp32, i8 0, i32 15		; <<16 x i8>> [#uses=1]
+	%tmp35 = bitcast <16 x i8> %tmp34 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp35
+}
diff --git a/final/test/CodeGen/X86/vec_set-5.ll b/final/test/CodeGen/X86/vec_set-5.ll
new file mode 100644
index 00000000000..f811a7404a2
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-5.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: grep movlhps   %t | count 1
+; RUN: grep movq      %t | count 2
+
+define <4 x float> @test1(float %a, float %b) nounwind {
+	%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0		; <<4 x float>> [#uses=1]
+	%tmp6 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp8 = insertelement <4 x float> %tmp6, float %b, i32 2		; <<4 x float>> [#uses=1]
+	%tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp9
+}
+
+define <4 x float> @test2(float %a, float %b) nounwind {
+	%tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0		; <<4 x float>> [#uses=1]
+	%tmp7 = insertelement <4 x float> %tmp, float %b, i32 1		; <<4 x float>> [#uses=1]
+	%tmp8 = insertelement <4 x float> %tmp7, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp9 = insertelement <4 x float> %tmp8, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	ret <4 x float> %tmp9
+}
+
+define <2 x i64> @test3(i32 %a, i32 %b) nounwind {
+	%tmp = insertelement <4 x i32> zeroinitializer, i32 %a, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp6 = insertelement <4 x i32> %tmp, i32 %b, i32 1		; <<4 x i32>> [#uses=1]
+	%tmp8 = insertelement <4 x i32> %tmp6, i32 0, i32 2		; <<4 x i32>> [#uses=1]
+	%tmp10 = insertelement <4 x i32> %tmp8, i32 0, i32 3		; <<4 x i32>> [#uses=1]
+	%tmp11 = bitcast <4 x i32> %tmp10 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp11
+}
diff --git a/final/test/CodeGen/X86/vec_set-6.ll b/final/test/CodeGen/X86/vec_set-6.ll
new file mode 100644
index 00000000000..0713d956ee4
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-6.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -o %t
+; RUN: grep movss    %t | count 1
+; RUN: grep movq     %t | count 1
+; RUN: grep shufps   %t | count 1
+
+define <4 x float> @test(float %a, float %b, float %c) nounwind {
+        %tmp = insertelement <4 x float> zeroinitializer, float %a, i32 1               ; <<4 x float>> [#uses=1]
+        %tmp8 = insertelement <4 x float> %tmp, float %b, i32 2         ; <<4 x float>> [#uses=1]
+        %tmp10 = insertelement <4 x float> %tmp8, float %c, i32 3               ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp10
+}
+
diff --git a/final/test/CodeGen/X86/vec_set-7.ll b/final/test/CodeGen/X86/vec_set-7.ll
new file mode 100644
index 00000000000..d993178a989
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-7.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movsd | count 1
+
+define <2 x i64> @test(<2 x i64>* %p) nounwind {
+	%tmp = bitcast <2 x i64>* %p to double*		
+	%tmp.upgrd.1 = load double* %tmp	
+	%tmp.upgrd.2 = insertelement <2 x double> undef, double %tmp.upgrd.1, i32 0
+	%tmp5 = insertelement <2 x double> %tmp.upgrd.2, double 0.0, i32 1
+	%tmp.upgrd.3 = bitcast <2 x double> %tmp5 to <2 x i64>
+	ret <2 x i64> %tmp.upgrd.3
+}
+
diff --git a/final/test/CodeGen/X86/vec_set-8.ll b/final/test/CodeGen/X86/vec_set-8.ll
new file mode 100644
index 00000000000..66056d0add9
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-8.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK-NOT: movsd
+; CHECK: movd {{%rdi|%rcx}}, %xmm0
+; CHECK-NOT: movsd
+
+define <2 x i64> @test(i64 %i) nounwind  {
+entry:
+	%tmp10 = insertelement <2 x i64> undef, i64 %i, i32 0
+	%tmp11 = insertelement <2 x i64> %tmp10, i64 0, i32 1
+	ret <2 x i64> %tmp11
+}
+
diff --git a/final/test/CodeGen/X86/vec_set-9.ll b/final/test/CodeGen/X86/vec_set-9.ll
new file mode 100644
index 00000000000..3656e5f6ca4
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-9.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86-64 | grep movd | count 1
+; RUN: llc < %s -march=x86-64 | grep {movlhps.*%xmm0, %xmm0}
+
+define <2 x i64> @test3(i64 %A) nounwind {
+entry:
+	%B = insertelement <2 x i64> undef, i64 %A, i32 1
+	ret <2 x i64> %B
+}
+
diff --git a/final/test/CodeGen/X86/vec_set-A.ll b/final/test/CodeGen/X86/vec_set-A.ll
new file mode 100644
index 00000000000..f05eecf8c3a
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-A.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movl.*\$1, %}
+define <2 x i64> @test1() nounwind {
+entry:
+	ret <2 x i64> < i64 1, i64 0 >
+}
+
diff --git a/final/test/CodeGen/X86/vec_set-B.ll b/final/test/CodeGen/X86/vec_set-B.ll
new file mode 100644
index 00000000000..f5b3e8baa33
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-B.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movaps
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep esp | count 2
+
+; These should both generate something like this:
+;_test3:
+;	movl	$1234567, %eax
+;	andl	4(%esp), %eax
+;	movd	%eax, %xmm0
+;	ret
+
+define <2 x i64> @test3(i64 %arg) nounwind {
+entry:
+        %A = and i64 %arg, 1234567
+        %B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0
+        ret <2 x i64> %B
+}
+
+define <2 x i64> @test2(i64 %arg) nounwind {
+entry:
+	%A = and i64 %arg, 1234567
+	%B = insertelement <2 x i64> undef, i64 %A, i32 0
+	ret <2 x i64> %B
+}
+
diff --git a/final/test/CodeGen/X86/vec_set-C.ll b/final/test/CodeGen/X86/vec_set-C.ll
new file mode 100644
index 00000000000..7636ac3b374
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-C.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep mov | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movd
+
+define <2 x i64> @t1(i64 %x) nounwind  {
+	%tmp8 = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0
+	ret <2 x i64> %tmp8
+}
diff --git a/final/test/CodeGen/X86/vec_set-D.ll b/final/test/CodeGen/X86/vec_set-D.ll
new file mode 100644
index 00000000000..3d6369e1c76
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-D.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+
+define <4 x i32> @t(i32 %x, i32 %y) nounwind  {
+	%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0
+	%tmp2 = insertelement <4 x i32> %tmp1, i32 %y, i32 1
+	ret <4 x i32> %tmp2
+}
diff --git a/final/test/CodeGen/X86/vec_set-E.ll b/final/test/CodeGen/X86/vec_set-E.ll
new file mode 100644
index 00000000000..d78be669fc7
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-E.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movq
+
+define <4 x float> @t(float %X) nounwind  {
+	%tmp11 = insertelement <4 x float> undef, float %X, i32 0
+	%tmp12 = insertelement <4 x float> %tmp11, float %X, i32 1
+	%tmp27 = insertelement <4 x float> %tmp12, float 0.000000e+00, i32 2
+	%tmp28 = insertelement <4 x float> %tmp27, float 0.000000e+00, i32 3
+	ret <4 x float> %tmp28
+}
diff --git a/final/test/CodeGen/X86/vec_set-F.ll b/final/test/CodeGen/X86/vec_set-F.ll
new file mode 100644
index 00000000000..6dd3cb0abeb
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-F.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movq
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep movsd
+; RUN: llc < %s -mtriple=i686-linux -mattr=+sse2 | grep mov | count 3
+
+define <2 x i64> @t1(<2 x i64>* %ptr) nounwind  {
+	%tmp45 = bitcast <2 x i64>* %ptr to <2 x i32>*
+	%tmp615 = load <2 x i32>* %tmp45
+	%tmp7 = bitcast <2 x i32> %tmp615 to i64
+	%tmp8 = insertelement <2 x i64> zeroinitializer, i64 %tmp7, i32 0
+	ret <2 x i64> %tmp8
+}
+
+define <2 x i64> @t2(i64 %x) nounwind  {
+	%tmp717 = bitcast i64 %x to double
+	%tmp8 = insertelement <2 x double> undef, double %tmp717, i32 0
+	%tmp9 = insertelement <2 x double> %tmp8, double 0.000000e+00, i32 1
+	%tmp11 = bitcast <2 x double> %tmp9 to <2 x i64>
+	ret <2 x i64> %tmp11
+}
diff --git a/final/test/CodeGen/X86/vec_set-G.ll b/final/test/CodeGen/X86/vec_set-G.ll
new file mode 100644
index 00000000000..4a542feafaf
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-G.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss
+
+define fastcc void @t(<4 x float> %A) nounwind  {
+	%tmp41896 = extractelement <4 x float> %A, i32 0		; <float> [#uses=1]
+	%tmp14082 = insertelement <4 x float> < float 0.000000e+00, float undef, float undef, float undef >, float %tmp41896, i32 1		; <<4 x float>> [#uses=1]
+	%tmp14083 = insertelement <4 x float> %tmp14082, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp14083, <4 x float>* null, align 16
+        ret void
+}
diff --git a/final/test/CodeGen/X86/vec_set-H.ll b/final/test/CodeGen/X86/vec_set-H.ll
new file mode 100644
index 00000000000..5037e36d3fd
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-H.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep movz
+
+define <2 x i64> @doload64(i16 signext  %x) nounwind  {
+entry:
+	%tmp36 = insertelement <8 x i16> undef, i16 %x, i32 0		; <<8 x i16>> [#uses=1]
+	%tmp37 = insertelement <8 x i16> %tmp36, i16 %x, i32 1		; <<8 x i16>> [#uses=1]
+	%tmp38 = insertelement <8 x i16> %tmp37, i16 %x, i32 2		; <<8 x i16>> [#uses=1]
+	%tmp39 = insertelement <8 x i16> %tmp38, i16 %x, i32 3		; <<8 x i16>> [#uses=1]
+	%tmp40 = insertelement <8 x i16> %tmp39, i16 %x, i32 4		; <<8 x i16>> [#uses=1]
+	%tmp41 = insertelement <8 x i16> %tmp40, i16 %x, i32 5		; <<8 x i16>> [#uses=1]
+	%tmp42 = insertelement <8 x i16> %tmp41, i16 %x, i32 6		; <<8 x i16>> [#uses=1]
+	%tmp43 = insertelement <8 x i16> %tmp42, i16 %x, i32 7		; <<8 x i16>> [#uses=1]
+	%tmp46 = bitcast <8 x i16> %tmp43 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp46
+}
diff --git a/final/test/CodeGen/X86/vec_set-I.ll b/final/test/CodeGen/X86/vec_set-I.ll
new file mode 100644
index 00000000000..64f36f99e4d
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-I.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xorp
+
+define void @t1() nounwind  {
+	%tmp298.i.i = load <4 x float>* null, align 16
+	%tmp304.i.i = bitcast <4 x float> %tmp298.i.i to <4 x i32>
+	%tmp305.i.i = and <4 x i32> %tmp304.i.i, < i32 -1, i32 0, i32 0, i32 0 >
+	store <4 x i32> %tmp305.i.i, <4 x i32>* null, align 16
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/vec_set-J.ll b/final/test/CodeGen/X86/vec_set-J.ll
new file mode 100644
index 00000000000..d90ab85b8cf
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set-J.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movss
+; PR2472
+
+define <4 x i32> @a(<4 x i32> %a) nounwind {
+entry:
+        %vecext = extractelement <4 x i32> %a, i32 0
+        insertelement <4 x i32> zeroinitializer, i32 %vecext, i32 0
+        %add = add <4 x i32> %a, %0
+        ret <4 x i32> %add
+}
diff --git a/final/test/CodeGen/X86/vec_set.ll b/final/test/CodeGen/X86/vec_set.ll
new file mode 100644
index 00000000000..7f5f8dd213a
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_set.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep punpckl | count 7
+
+define void @test(<8 x i16>* %b, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind {
+        %tmp = insertelement <8 x i16> zeroinitializer, i16 %a0, i32 0          ; <<8 x i16>> [#uses=1]
+        %tmp2 = insertelement <8 x i16> %tmp, i16 %a1, i32 1            ; <<8 x i16>> [#uses=1]
+        %tmp4 = insertelement <8 x i16> %tmp2, i16 %a2, i32 2           ; <<8 x i16>> [#uses=1]
+        %tmp6 = insertelement <8 x i16> %tmp4, i16 %a3, i32 3           ; <<8 x i16>> [#uses=1]
+        %tmp8 = insertelement <8 x i16> %tmp6, i16 %a4, i32 4           ; <<8 x i16>> [#uses=1]
+        %tmp10 = insertelement <8 x i16> %tmp8, i16 %a5, i32 5          ; <<8 x i16>> [#uses=1]
+        %tmp12 = insertelement <8 x i16> %tmp10, i16 %a6, i32 6         ; <<8 x i16>> [#uses=1]
+        %tmp14 = insertelement <8 x i16> %tmp12, i16 %a7, i32 7         ; <<8 x i16>> [#uses=1]
+        store <8 x i16> %tmp14, <8 x i16>* %b
+        ret void
+}
+
diff --git a/final/test/CodeGen/X86/vec_sext.ll b/final/test/CodeGen/X86/vec_sext.ll
new file mode 100644
index 00000000000..776ddec2e63
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_sext.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=x86-64
+; PR 9267
+
+define<4 x i32> @func_16_32() {
+  %F = load <4 x i16>* undef
+  %G = sext <4 x i16> %F to <4 x i32>
+  %H = load <4 x i16>* undef
+  %Y = sext <4 x i16> %H to <4 x i32>
+  %T = add <4 x i32> %Y, %G
+  store <4 x i32>%T , <4 x i32>* undef
+  ret <4 x i32> %T
+}
+
+define<4 x i64> @func_16_64() {
+  %F = load <4 x i16>* undef
+  %G = sext <4 x i16> %F to <4 x i64>
+  %H = load <4 x i16>* undef
+  %Y = sext <4 x i16> %H to <4 x i64>
+  %T = xor <4 x i64> %Y, %G
+  store <4 x i64>%T , <4 x i64>* undef
+  ret <4 x i64> %T
+}
+
+define<4 x i64> @func_32_64() {
+  %F = load <4 x i32>* undef
+  %G = sext <4 x i32> %F to <4 x i64>
+  %H = load <4 x i32>* undef
+  %Y = sext <4 x i32> %H to <4 x i64>
+  %T = or <4 x i64> %Y, %G
+  ret <4 x i64> %T
+}
+
+define<4 x i16> @func_8_16() {
+  %F = load <4 x i8>* undef
+  %G = sext <4 x i8> %F to <4 x i16>
+  %H = load <4 x i8>* undef
+  %Y = sext <4 x i8> %H to <4 x i16>
+  %T = add <4 x i16> %Y, %G
+  ret <4 x i16> %T
+}
+
+define<4 x i32> @func_8_32() {
+  %F = load <4 x i8>* undef
+  %G = sext <4 x i8> %F to <4 x i32>
+  %H = load <4 x i8>* undef
+  %Y = sext <4 x i8> %H to <4 x i32>
+  %T = sub <4 x i32> %Y, %G
+  ret <4 x i32> %T
+}
+
+define<4 x i64> @func_8_64() {
+  %F = load <4 x i8>* undef
+  %G = sext <4 x i8> %F to <4 x i64>
+  %H = load <4 x i8>* undef
+  %Y = sext <4 x i8> %H to <4 x i64>
+  %T = add <4 x i64> %Y, %G
+  ret <4 x i64> %T
+}
+
+define<4 x i32> @const_16_32() {
+  %G = sext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i32>
+  ret <4 x i32> %G
+}
+
+define<4 x i64> @const_16_64() {
+  %G = sext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i64>
+  ret <4 x i64> %G
+}
+
diff --git a/final/test/CodeGen/X86/vec_shift.ll b/final/test/CodeGen/X86/vec_shift.ll
new file mode 100644
index 00000000000..ddf0469b72a
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shift.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psllw
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psrlq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psraw
+
+define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind  {
+entry:
+	%tmp6 = bitcast <2 x i64> %c to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp8 = bitcast <2 x i64> %b1 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp9 = tail call <8 x i16> @llvm.x86.sse2.psll.w( <8 x i16> %tmp8, <8 x i16> %tmp6 ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp10 = bitcast <8 x i16> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp10
+}
+
+define <2 x i64> @t3(<2 x i64> %b1, i32 %c) nounwind  {
+entry:
+	%tmp2 = bitcast <2 x i64> %b1 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp4 = insertelement <4 x i32> undef, i32 %c, i32 0		; <<4 x i32>> [#uses=1]
+	%tmp8 = bitcast <4 x i32> %tmp4 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp9 = tail call <8 x i16> @llvm.x86.sse2.psra.w( <8 x i16> %tmp2, <8 x i16> %tmp8 )		; <<8 x i16>> [#uses=1]
+	%tmp11 = bitcast <8 x i16> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp11
+}
+
+declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 
+
+define <2 x i64> @t2(<2 x i64> %b1, <2 x i64> %c) nounwind  {
+entry:
+	%tmp9 = tail call <2 x i64> @llvm.x86.sse2.psrl.q( <2 x i64> %b1, <2 x i64> %c ) nounwind readnone 		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp9
+}
+
+declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 
+
+declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 
diff --git a/final/test/CodeGen/X86/vec_shift2.ll b/final/test/CodeGen/X86/vec_shift2.ll
new file mode 100644
index 00000000000..c5f9dc4ace3
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shift2.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep CPI
+
+define <2 x i64> @t1(<2 x i64> %b1, <2 x i64> %c) nounwind  {
+	%tmp1 = bitcast <2 x i64> %b1 to <8 x i16>
+	%tmp2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w( <8 x i16> %tmp1, <8 x i16> bitcast (<4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > to <8 x i16>) ) nounwind readnone
+	%tmp3 = bitcast <8 x i16> %tmp2 to <2 x i64>
+	ret <2 x i64> %tmp3
+}
+
+define <4 x i32> @t2(<2 x i64> %b1, <2 x i64> %c) nounwind  {
+	%tmp1 = bitcast <2 x i64> %b1 to <4 x i32>
+	%tmp2 = tail call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> %tmp1, <4 x i32> < i32 14, i32 undef, i32 undef, i32 undef > ) nounwind readnone
+	ret <4 x i32> %tmp2
+}
+
+declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 
diff --git a/final/test/CodeGen/X86/vec_shift3.ll b/final/test/CodeGen/X86/vec_shift3.ll
new file mode 100644
index 00000000000..1ebf455c055
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shift3.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psllq
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep psraw
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 2
+
+define <2 x i64> @t1(<2 x i64> %x1, i32 %bits) nounwind  {
+entry:
+	%tmp3 = tail call <2 x i64> @llvm.x86.sse2.pslli.q( <2 x i64> %x1, i32 %bits ) nounwind readnone 		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp3
+}
+
+define <2 x i64> @t2(<2 x i64> %x1) nounwind  {
+entry:
+	%tmp3 = tail call <2 x i64> @llvm.x86.sse2.pslli.q( <2 x i64> %x1, i32 10 ) nounwind readnone 		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp3
+}
+
+define <2 x i64> @t3(<2 x i64> %x1, i32 %bits) nounwind  {
+entry:
+	%tmp2 = bitcast <2 x i64> %x1 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w( <8 x i16> %tmp2, i32 %bits ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp5 = bitcast <8 x i16> %tmp4 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp5
+}
+
+declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone 
+declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone 
diff --git a/final/test/CodeGen/X86/vec_shift4.ll b/final/test/CodeGen/X86/vec_shift4.ll
new file mode 100644
index 00000000000..9ef7fbdb0c5
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shift4.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
+
+define <2 x i64> @shl1(<4 x i32> %r, <4 x i32> %a) nounwind readnone ssp {
+entry:
+; CHECK-NOT: shll
+; CHECK: pslld
+; CHECK: paddd
+; CHECK: cvttps2dq
+; CHECK: pmulld
+
+  %shl = shl <4 x i32> %r, %a                     ; <<4 x i32>> [#uses=1]
+  %tmp2 = bitcast <4 x i32> %shl to <2 x i64>     ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %tmp2
+}
+
+define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp {
+entry:
+; CHECK-NOT: shlb
+; CHECK: pblendvb
+; CHECK: pblendvb
+; CHECK: pblendvb
+  %shl = shl <16 x i8> %r, %a                     ; <<16 x i8>> [#uses=1]
+  %tmp2 = bitcast <16 x i8> %shl to <2 x i64>     ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %tmp2
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-11.ll b/final/test/CodeGen/X86/vec_shuffle-11.ll
new file mode 100644
index 00000000000..640745ae264
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-11.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin | not grep mov
+
+define <4 x i32> @test() nounwind {
+        %tmp131 = call <2 x i64> @llvm.x86.sse2.psrl.dq( <2 x i64> < i64 -1, i64 -1 >, i32 96 )         ; <<2 x i64>> [#uses=1]
+        %tmp137 = bitcast <2 x i64> %tmp131 to <4 x i32>                ; <<4 x i32>> [#uses=1]
+        %tmp138 = and <4 x i32> %tmp137, bitcast (<2 x i64> < i64 -1, i64 -1 > to <4 x i32>)            ; <<4 x i32>> [#uses=1]
+        ret <4 x i32> %tmp138
+}
+
+declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32)
diff --git a/final/test/CodeGen/X86/vec_shuffle-14.ll b/final/test/CodeGen/X86/vec_shuffle-14.ll
new file mode 100644
index 00000000000..f0cfc44ab19
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-14.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep movd | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movd | count 2
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | grep movq | count 3
+; RUN: llc < %s -march=x86 -mattr=+sse2 | not grep xor
+
+define <4 x i32> @t1(i32 %a) nounwind  {
+entry:
+        %tmp = insertelement <4 x i32> undef, i32 %a, i32 0
+	%tmp6 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp, <4 x i32> < i32 4, i32 1, i32 2, i32 3 >		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp6
+}
+
+define <2 x i64> @t2(i64 %a) nounwind  {
+entry:
+        %tmp = insertelement <2 x i64> undef, i64 %a, i32 0
+	%tmp6 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %tmp, <2 x i32> < i32 2, i32 1 >		; <<4 x i32>> [#uses=1]
+	ret <2 x i64> %tmp6
+}
+
+define <2 x i64> @t3(<2 x i64>* %a) nounwind  {
+entry:
+	%tmp4 = load <2 x i64>* %a, align 16		; <<2 x i64>> [#uses=1]
+	%tmp6 = bitcast <2 x i64> %tmp4 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp7 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp6, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x i32>> [#uses=1]
+	%tmp8 = bitcast <4 x i32> %tmp7 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp8
+}
+
+define <2 x i64> @t4(<2 x i64> %a) nounwind  {
+entry:
+	%tmp5 = bitcast <2 x i64> %a to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp6 = shufflevector <4 x i32> zeroinitializer, <4 x i32> %tmp5, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x i32>> [#uses=1]
+	%tmp7 = bitcast <4 x i32> %tmp6 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp7
+}
+
+define <2 x i64> @t5(<2 x i64> %a) nounwind  {
+entry:
+	%tmp6 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %a, <2 x i32> < i32 2, i32 1 >		; <<4 x i32>> [#uses=1]
+	ret <2 x i64> %tmp6
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-15.ll b/final/test/CodeGen/X86/vec_shuffle-15.ll
new file mode 100644
index 00000000000..5a9b8fd3457
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-15.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define <2 x i64> @t00(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 0 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t01(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 1 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t02(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 2 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t03(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 0, i32 3 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t10(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 0 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t11(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 1 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t12(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 2 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t13(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 1, i32 3 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t20(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 0 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t21(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 1 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t22(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 2 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t23(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 2, i32 3 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t30(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 0 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t31(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 1 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t32(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 2 >
+	ret <2 x i64> %tmp
+}
+
+define <2 x i64> @t33(<2 x i64> %a, <2 x i64> %b) nounwind  {
+	%tmp = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> < i32 3, i32 3 >
+	ret <2 x i64> %tmp
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-16.ll b/final/test/CodeGen/X86/vec_shuffle-16.ll
new file mode 100644
index 00000000000..470f676d462
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-16.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -march=x86 -mattr=+sse,-sse2 -mtriple=i386-apple-darwin -o %t
+; RUN: grep shufps %t | count 4
+; RUN: grep movaps %t | count 2
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -o %t
+; RUN: grep pshufd %t | count 4
+; RUN: not grep shufps %t
+; RUN: not grep mov %t
+
+define <4 x float> @t1(<4 x float> %a, <4 x float> %b) nounwind  {
+        %tmp1 = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
+        ret <4 x float> %tmp1
+}
+
+define <4 x float> @t2(<4 x float> %A, <4 x float> %B) nounwind {
+	%tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 3, i32 3, i32 3, i32 3 >
+	ret <4 x float> %tmp
+}
+
+define <4 x float> @t3(<4 x float> %A, <4 x float> %B) nounwind {
+	%tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 4, i32 4, i32 4, i32 4 >
+	ret <4 x float> %tmp
+}
+
+define <4 x float> @t4(<4 x float> %A, <4 x float> %B) nounwind {
+	%tmp = shufflevector <4 x float> %A, <4 x float> %B, <4 x i32> < i32 1, i32 3, i32 2, i32 0 >
+	ret <4 x float> %tmp
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-17.ll b/final/test/CodeGen/X86/vec_shuffle-17.ll
new file mode 100644
index 00000000000..ebc8c5b34a9
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-17.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK-NOT: xor
+; CHECK: movd {{%rdi|%rcx}}, %xmm0
+; CHECK-NOT: xor
+; PR2108
+
+define <2 x i64> @doload64(i64 %x) nounwind  {
+entry:
+	%tmp717 = bitcast i64 %x to double		; <double> [#uses=1]
+	%tmp8 = insertelement <2 x double> undef, double %tmp717, i32 0		; <<2 x double>> [#uses=1]
+	%tmp9 = insertelement <2 x double> %tmp8, double 0.000000e+00, i32 1		; <<2 x double>> [#uses=1]
+	%tmp11 = bitcast <2 x double> %tmp9 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	ret <2 x i64> %tmp11
+}
+
diff --git a/final/test/CodeGen/X86/vec_shuffle-18.ll b/final/test/CodeGen/X86/vec_shuffle-18.ll
new file mode 100644
index 00000000000..1104a4a8856
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-18.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin8.8.0 | grep mov | count 7
+
+	%struct.vector4_t = type { <4 x float> }
+
+define void @swizzle(i8* %a, %struct.vector4_t* %b, %struct.vector4_t* %c) nounwind  {
+entry:
+	%tmp9 = getelementptr %struct.vector4_t* %b, i32 0, i32 0		; <<4 x float>*> [#uses=2]
+	%tmp10 = load <4 x float>* %tmp9, align 16		; <<4 x float>> [#uses=1]
+	%tmp14 = bitcast i8* %a to double*		; <double*> [#uses=1]
+	%tmp15 = load double* %tmp14		; <double> [#uses=1]
+	%tmp16 = insertelement <2 x double> undef, double %tmp15, i32 0		; <<2 x double>> [#uses=1]
+	%tmp18 = bitcast <2 x double> %tmp16 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp19 = shufflevector <4 x float> %tmp10, <4 x float> %tmp18, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp19, <4 x float>* %tmp9, align 16
+	%tmp28 = getelementptr %struct.vector4_t* %c, i32 0, i32 0		; <<4 x float>*> [#uses=2]
+	%tmp29 = load <4 x float>* %tmp28, align 16		; <<4 x float>> [#uses=1]
+	%tmp26 = getelementptr i8* %a, i32 8		; <i8*> [#uses=1]
+	%tmp33 = bitcast i8* %tmp26 to double*		; <double*> [#uses=1]
+	%tmp34 = load double* %tmp33		; <double> [#uses=1]
+	%tmp35 = insertelement <2 x double> undef, double %tmp34, i32 0		; <<2 x double>> [#uses=1]
+	%tmp37 = bitcast <2 x double> %tmp35 to <4 x float>		; <<4 x float>> [#uses=1]
+	%tmp38 = shufflevector <4 x float> %tmp29, <4 x float> %tmp37, <4 x i32> < i32 4, i32 5, i32 2, i32 3 >		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp38, <4 x float>* %tmp28, align 16
+	ret void
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-19.ll b/final/test/CodeGen/X86/vec_shuffle-19.ll
new file mode 100644
index 00000000000..861a1cc5b93
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-19.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 4
+; PR2485
+
+define <4 x i32> @t(<4 x i32> %a, <4 x i32> %b) nounwind  {
+entry:
+	%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> < i32 4, i32 0, i32 0, i32 0 >		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %shuffle
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-20.ll b/final/test/CodeGen/X86/vec_shuffle-20.ll
new file mode 100644
index 00000000000..fc06b9514e4
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-20.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -o /dev/null -march=x86 -mattr=+sse2 -mtriple=i686-apple-darwin9 -stats -info-output-file - | grep asm-printer | grep 3
+
+define <4 x float> @func(<4 x float> %fp0, <4 x float> %fp1) nounwind  {
+entry:
+	shufflevector <4 x float> %fp0, <4 x float> %fp1, <4 x i32> < i32 0, i32 1, i32 2, i32 7 >		; <<4 x float>>:0 [#uses=1]
+	ret <4 x float> %0
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-22.ll b/final/test/CodeGen/X86/vec_shuffle-22.ll
new file mode 100644
index 00000000000..6807e4d6390
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-22.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mcpu=pentium-m  | FileCheck %s
+
+define <4 x float> @t1(<4 x float> %a) nounwind  {
+; CHECK: movlhps
+  %tmp1 = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 >       ; <<4 x float>> [#uses=1]
+  ret <4 x float> %tmp1
+}
+
+define <4 x i32> @t2(<4 x i32>* %a) nounwind {
+; CHECK: pshufd
+; CHECK: ret
+  %tmp1 = load <4 x i32>* %a
+	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> < i32 0, i32 1, i32 0, i32 1 >		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp2
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-23.ll b/final/test/CodeGen/X86/vec_shuffle-23.ll
new file mode 100644
index 00000000000..05a3a1e9d27
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-23.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2                | not grep punpck
+; RUN: llc < %s -march=x86 -mattr=+sse2                |     grep pshufd
+
+define i32 @t() nounwind {
+entry:
+	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
+	volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
+	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp, <4 x i32>* %b
+	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%tmp2 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%punpckldq = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %punpckldq, <4 x i32>* %b
+	%tmp3 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%result = extractelement <4 x i32> %tmp3, i32 0		; <i32> [#uses=1]
+	ret i32 %result
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-24.ll b/final/test/CodeGen/X86/vec_shuffle-24.ll
new file mode 100644
index 00000000000..1b104deb305
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-24.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+define i32 @t() nounwind optsize {
+entry:
+; CHECK: punpckldq
+	%a = alloca <4 x i32>		; <<4 x i32>*> [#uses=2]
+	%b = alloca <4 x i32>		; <<4 x i32>*> [#uses=5]
+	volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
+	%tmp = load <4 x i32>* %a		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %tmp, <4 x i32>* %b
+	%tmp1 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%tmp2 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%punpckldq = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x i32>> [#uses=1]
+	store <4 x i32> %punpckldq, <4 x i32>* %b
+	%tmp3 = load <4 x i32>* %b		; <<4 x i32>> [#uses=1]
+	%result = extractelement <4 x i32> %tmp3, i32 0		; <i32> [#uses=1]
+	ret i32 %result
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-25.ll b/final/test/CodeGen/X86/vec_shuffle-25.ll
new file mode 100644
index 00000000000..d9b2388809a
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-25.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: grep unpcklps %t | count 3
+; RUN: grep unpckhps %t | count 1
+ 
+; Transpose example using the more generic vector shuffle.  We return
+; float8 instead of float16 since x86 can return that in register.
+; ModuleID = 'transpose2_opt.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-apple-cl.1.0"
+@r0 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r1 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r2 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r3 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+
+define <8 x float> @__transpose2(<4 x float> %p0, <4 x float> %p1, <4 x float> %p2, <4 x float> %p3) nounwind {
+entry:
+	%unpcklps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
+	%unpckhps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
+	%unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
+	%unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
+	%unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
+	%unpcklps14a = shufflevector <4 x float> %unpcklps14,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+	%unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
+	%unpckhps17a = shufflevector <4 x float> %unpckhps17,  <4 x float> undef, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+	%r1 = shufflevector <16 x float> %unpcklps14a,  <16 x float> %unpckhps17a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+	%unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
+	%unpcklps20a = shufflevector <4 x float> %unpcklps20,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+	%r2 = shufflevector <16 x float> %r1,  <16 x float> %unpcklps20a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 12, i32 13, i32 14, i32 15>
+	%unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
+	%unpckhps23a = shufflevector <4 x float> %unpckhps23,  <4 x float> undef,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+	%r3 = shufflevector <16 x float> %r2,  <16 x float> %unpckhps23a, <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
+	%r4 = shufflevector <16 x float> %r3,  <16 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+	ret <8 x float> %r4
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-26.ll b/final/test/CodeGen/X86/vec_shuffle-26.ll
new file mode 100644
index 00000000000..086af6bb114
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-26.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: grep unpcklps %t | count 1
+; RUN: grep unpckhps %t | count 3
+
+; Transpose example using the more generic vector shuffle. Return float8
+; instead of float16
+; ModuleID = 'transpose2_opt.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-apple-cl.1.0"
+@r0 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r1 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r2 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+@r3 = common global <4 x float> zeroinitializer, align 16		; <<4 x float>*> [#uses=1]
+
+define <8 x float> @__transpose2(<4 x float> %p0, <4 x float> %p1, <4 x float> %p2, <4 x float> %p3) nounwind {
+entry:
+	%unpcklps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
+	%unpckhps = shufflevector <4 x float> %p0, <4 x float> %p2, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
+	%unpcklps8 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=2]
+	%unpckhps11 = shufflevector <4 x float> %p1, <4 x float> %p3, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=2]
+	%unpcklps14 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
+	%unpckhps17 = shufflevector <4 x float> %unpcklps, <4 x float> %unpcklps8, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
+        %r1 = shufflevector <4 x float> %unpcklps14,  <4 x float> %unpckhps17,  <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+	%unpcklps20 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>> [#uses=1]
+	%unpckhps23 = shufflevector <4 x float> %unpckhps, <4 x float> %unpckhps11, <4 x i32> < i32 2, i32 6, i32 3, i32 7 >		; <<4 x float>> [#uses=1]
+        %r2 = shufflevector <4 x float> %unpcklps20,  <4 x float> %unpckhps23,  <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
+;       %r3 = shufflevector <8 x float> %r1,  <8 x float> %r2,  <16 x i32> < i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15 >; 
+	ret <8 x float> %r2
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-27.ll b/final/test/CodeGen/X86/vec_shuffle-27.ll
new file mode 100644
index 00000000000..dec98c7400a
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-27.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s
+
+; ModuleID = 'vec_shuffle-27.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-apple-cl.1.0"
+
+define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone {
+entry:
+; CHECK: subps
+; CHECK: mulps
+; CHECK: addps
+; CHECK: subps
+; CHECK: mulps
+; CHECK: addps
+	%tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 >		; <<8 x float>> [#uses=1]
+	%sub = fsub <8 x float> %T1, %T0		; <<8 x float>> [#uses=1]
+	%mul = fmul <8 x float> %sub, %tmp7		; <<8 x float>> [#uses=1]
+	%add = fadd <8 x float> %mul, %T0		; <<8 x float>> [#uses=1]
+	ret <8 x float> %add
+}
+
+; Test case for r122206
+define void @test2(<4 x i64>* %ap, <4 x i64>* %bp) nounwind {
+entry:
+; CHECK: movdqa
+  %a = load <4 x i64> * %ap
+  %b = load <4 x i64> * %bp
+  %mulaa = mul <4 x i64> %a, %a
+  %mulbb = mul <4 x i64> %b, %b
+  %mulab = mul <4 x i64> %a, %b
+  %vect1271 = shufflevector <4 x i64> %mulaa, <4 x i64> %mulbb, <4 x i32> <i32 0, i32 4, i32 undef, i32 undef>
+  %vect1272 = shufflevector <4 x i64> %mulaa, <4 x i64> %mulbb, <4 x i32> <i32 1, i32 5, i32 undef, i32 undef>
+  %vect1487 = shufflevector <4 x i64> %vect1271, <4 x i64> %mulab, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  %vect1488 = shufflevector <4 x i64> %vect1272, <4 x i64> %mulab, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
+  store <4 x i64> %vect1487, <4 x i64>* %ap
+  store <4 x i64> %vect1488, <4 x i64>* %bp
+  ret void;
+}
\ No newline at end of file
diff --git a/final/test/CodeGen/X86/vec_shuffle-28.ll b/final/test/CodeGen/X86/vec_shuffle-28.ll
new file mode 100644
index 00000000000..343685bf8ad
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-28.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
+; RUN: grep pshufb %t | count 1
+
+; FIXME: this test has a superfluous punpcklqdq pre-pshufb currently.
+;        Don't XFAIL it because it's still better than the previous code.
+
+; Pack various elements via shuffles.
+define <8 x i16> @shuf1(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp7
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-30.ll b/final/test/CodeGen/X86/vec_shuffle-30.ll
new file mode 100644
index 00000000000..1651c4cdace
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-30.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: grep pshufhw %t | grep -- -95 | count 1
+; RUN: grep shufps %t | count 1
+; RUN: not grep pslldq %t
+
+; Test case when creating pshufhw, we incorrectly set the higher order bit
+; for an undef,
+define void @test(<8 x i16>* %dest, <8 x i16> %in) nounwind {
+entry:
+  %0 = load <8 x i16>* %dest
+  %1 = shufflevector <8 x i16> %0, <8 x i16> %in, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 13, i32 undef, i32 14, i32 14>
+  store <8 x i16> %1, <8 x i16>* %dest
+  ret void
+}                              
+
+; A test case where we shouldn't generate a punpckldq but a pshufd and a pslldq
+define void @test2(<4 x i32>* %dest, <4 x i32> %in) nounwind {
+entry:
+  %0 = shufflevector <4 x i32> %in, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> < i32 undef, i32 5, i32 undef, i32 2>
+  store <4 x i32> %0, <4 x i32>* %dest
+  ret void
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-31.ll b/final/test/CodeGen/X86/vec_shuffle-31.ll
new file mode 100644
index 00000000000..bb06e15425b
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-31.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
+; RUN: grep pshufb %t | count 1
+
+define <8 x i16> @shuf3(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
+	ret <8 x i16> %tmp9
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-34.ll b/final/test/CodeGen/X86/vec_shuffle-34.ll
new file mode 100644
index 00000000000..d057b3fa7ea
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-34.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=x86 -mcpu=core2 | grep pshufb | count 2
+
+define <8 x i16> @shuf2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp8
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-35.ll b/final/test/CodeGen/X86/vec_shuffle-35.ll
new file mode 100644
index 00000000000..7f0fcb5969e
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-35.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah -stack-alignment=16 -o %t
+; RUN: grep pextrw %t | count 13
+; RUN: grep pinsrw %t | count 14
+; RUN: grep rolw %t | count 13
+; RUN: not grep esp %t
+; RUN: not grep ebp %t
+; RUN: llc < %s -march=x86 -mcpu=core2 -stack-alignment=16 -o %t
+; RUN: grep pshufb %t | count 3
+
+define <16 x i8> @shuf1(<16 x i8> %T0) nounwind readnone {
+entry:
+	%tmp8 = shufflevector <16 x i8> %T0, <16 x i8> undef, <16 x i32> < i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 15 , i32 14 >
+	ret <16 x i8> %tmp8
+}
+
+define <16 x i8> @shuf2(<16 x i8> %T0, <16 x i8> %T1) nounwind readnone {
+entry:
+	%tmp8 = shufflevector <16 x i8> %T0, <16 x i8> %T1, <16 x i32> < i32 undef, i32 undef, i32 3, i32 2, i32 17, i32 16, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 12, i32 13, i32 15 , i32 14 >
+	ret <16 x i8> %tmp8
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-36.ll b/final/test/CodeGen/X86/vec_shuffle-36.ll
new file mode 100644
index 00000000000..8090afc7434
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-36.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=x86-64 -mattr=sse41 | FileCheck %s
+
+define <8 x i16> @shuf6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+; CHECK: pshufb
+; CHECK-NOT: pshufb
+; CHECK: ret
+entry:
+  %tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 3, i32 2, i32 0, i32 2, i32 1, i32 5, i32 6 , i32 undef >
+  ret <8 x i16> %tmp9
+}
+
+define <8 x i16> @shuf7(<8 x i16> %t0) {
+; CHECK: pshufd
+  %tmp10 = shufflevector <8 x i16> %t0, <8 x i16> undef, <8 x i32> < i32 undef, i32 2, i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef >
+  ret <8 x i16> %tmp10
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle-37.ll b/final/test/CodeGen/X86/vec_shuffle-37.ll
new file mode 100644
index 00000000000..2efdb14b404
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle-37.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; RUN: llc -O0 < %s -march=x86 -mcpu=core2 | FileCheck %s --check-prefix=CHECK_O0
+
+define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp {
+entry:
+; CHECK: movaps  ({{%rdi|%rcx}}), %xmm0
+; CHECK-NEXT: movaps  %xmm0, %xmm1
+; CHECK-NEXT: movlps  (%rax), %xmm1
+; CHECK-NEXT: shufps  $36, %xmm1, %xmm0
+  %0 = load <4 x i32>* undef, align 16
+  %1 = load <4 x i32>* %a0, align 16
+  %2 = shufflevector <4 x i32> %1, <4 x i32> %0, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+  ret <4 x i32> %2
+}
+
+define void @t01(double* %a0) nounwind ssp {
+entry:
+; CHECK_O0: movsd (%eax), %xmm0
+; CHECK_O0: unpcklpd  %xmm0, %xmm0
+  %tmp93 = load double* %a0, align 8
+  %vecinit94 = insertelement <2 x double> undef, double %tmp93, i32 1
+  store <2 x double> %vecinit94, <2 x double>* undef
+  ret void
+}
diff --git a/final/test/CodeGen/X86/vec_shuffle.ll b/final/test/CodeGen/X86/vec_shuffle.ll
new file mode 100644
index 00000000000..2a48de22098
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_shuffle.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=x86 -mcpu=core2 -o %t
+; RUN: grep movq    %t | count 1
+; RUN: grep pshufd  %t | count 1
+; RUN: grep movupd  %t | count 1
+; RUN: grep pshufhw %t | count 1
+
+define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
+	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
+	%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1		; <<4 x float>> [#uses=1]
+	%tmp4 = insertelement <4 x float> %tmp2, float %Y, i32 2		; <<4 x float>> [#uses=1]
+	%tmp6 = insertelement <4 x float> %tmp4, float %Y, i32 3		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp6, <4 x float>* %P
+	ret void
+}
+
+define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
+	%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0		; <<2 x double>> [#uses=1]
+	%tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1		; <<2 x double>> [#uses=1]
+	store <2 x double> %tmp2, <2 x double>* %P
+	ret void
+}
+
+define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind {
+	%tmp = load <2 x i64>* %A		; <<2 x i64>> [#uses=1]
+	%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>		; <<8 x i16>> [#uses=8]
+	%tmp.upgrd.2 = extractelement <8 x i16> %tmp.upgrd.1, i32 0		; <i16> [#uses=1]
+	%tmp1 = extractelement <8 x i16> %tmp.upgrd.1, i32 1		; <i16> [#uses=1]
+	%tmp2 = extractelement <8 x i16> %tmp.upgrd.1, i32 2		; <i16> [#uses=1]
+	%tmp3 = extractelement <8 x i16> %tmp.upgrd.1, i32 3		; <i16> [#uses=1]
+	%tmp4 = extractelement <8 x i16> %tmp.upgrd.1, i32 6		; <i16> [#uses=1]
+	%tmp5 = extractelement <8 x i16> %tmp.upgrd.1, i32 5		; <i16> [#uses=1]
+	%tmp6 = extractelement <8 x i16> %tmp.upgrd.1, i32 4		; <i16> [#uses=1]
+	%tmp7 = extractelement <8 x i16> %tmp.upgrd.1, i32 7		; <i16> [#uses=1]
+	%tmp8 = insertelement <8 x i16> undef, i16 %tmp.upgrd.2, i32 0		; <<8 x i16>> [#uses=1]
+	%tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 1		; <<8 x i16>> [#uses=1]
+	%tmp10 = insertelement <8 x i16> %tmp9, i16 %tmp2, i32 2		; <<8 x i16>> [#uses=1]
+	%tmp11 = insertelement <8 x i16> %tmp10, i16 %tmp3, i32 3		; <<8 x i16>> [#uses=1]
+	%tmp12 = insertelement <8 x i16> %tmp11, i16 %tmp4, i32 4		; <<8 x i16>> [#uses=1]
+	%tmp13 = insertelement <8 x i16> %tmp12, i16 %tmp5, i32 5		; <<8 x i16>> [#uses=1]
+	%tmp14 = insertelement <8 x i16> %tmp13, i16 %tmp6, i32 6		; <<8 x i16>> [#uses=1]
+	%tmp15 = insertelement <8 x i16> %tmp14, i16 %tmp7, i32 7		; <<8 x i16>> [#uses=1]
+	%tmp15.upgrd.3 = bitcast <8 x i16> %tmp15 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	store <2 x i64> %tmp15.upgrd.3, <2 x i64>* %res
+	ret void
+}
diff --git a/final/test/CodeGen/X86/vec_splat-2.ll b/final/test/CodeGen/X86/vec_splat-2.ll
new file mode 100644
index 00000000000..cde5ae99563
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_splat-2.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pshufd | count 1
+
+define void @test(<2 x i64>* %P, i8 %x) nounwind {
+	%tmp = insertelement <16 x i8> zeroinitializer, i8 %x, i32 0		; <<16 x i8>> [#uses=1]
+	%tmp36 = insertelement <16 x i8> %tmp, i8 %x, i32 1		; <<16 x i8>> [#uses=1]
+	%tmp38 = insertelement <16 x i8> %tmp36, i8 %x, i32 2		; <<16 x i8>> [#uses=1]
+	%tmp40 = insertelement <16 x i8> %tmp38, i8 %x, i32 3		; <<16 x i8>> [#uses=1]
+	%tmp42 = insertelement <16 x i8> %tmp40, i8 %x, i32 4		; <<16 x i8>> [#uses=1]
+	%tmp44 = insertelement <16 x i8> %tmp42, i8 %x, i32 5		; <<16 x i8>> [#uses=1]
+	%tmp46 = insertelement <16 x i8> %tmp44, i8 %x, i32 6		; <<16 x i8>> [#uses=1]
+	%tmp48 = insertelement <16 x i8> %tmp46, i8 %x, i32 7		; <<16 x i8>> [#uses=1]
+	%tmp50 = insertelement <16 x i8> %tmp48, i8 %x, i32 8		; <<16 x i8>> [#uses=1]
+	%tmp52 = insertelement <16 x i8> %tmp50, i8 %x, i32 9		; <<16 x i8>> [#uses=1]
+	%tmp54 = insertelement <16 x i8> %tmp52, i8 %x, i32 10		; <<16 x i8>> [#uses=1]
+	%tmp56 = insertelement <16 x i8> %tmp54, i8 %x, i32 11		; <<16 x i8>> [#uses=1]
+	%tmp58 = insertelement <16 x i8> %tmp56, i8 %x, i32 12		; <<16 x i8>> [#uses=1]
+	%tmp60 = insertelement <16 x i8> %tmp58, i8 %x, i32 13		; <<16 x i8>> [#uses=1]
+	%tmp62 = insertelement <16 x i8> %tmp60, i8 %x, i32 14		; <<16 x i8>> [#uses=1]
+	%tmp64 = insertelement <16 x i8> %tmp62, i8 %x, i32 15		; <<16 x i8>> [#uses=1]
+	%tmp68 = load <2 x i64>* %P		; <<2 x i64>> [#uses=1]
+	%tmp71 = bitcast <2 x i64> %tmp68 to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%tmp73 = add <16 x i8> %tmp71, %tmp64		; <<16 x i8>> [#uses=1]
+	%tmp73.upgrd.1 = bitcast <16 x i8> %tmp73 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	store <2 x i64> %tmp73.upgrd.1, <2 x i64>* %P
+	ret void
+}
diff --git a/final/test/CodeGen/X86/vec_splat-3.ll b/final/test/CodeGen/X86/vec_splat-3.ll
new file mode 100644
index 00000000000..649b85c5dad
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_splat-3.ll
@@ -0,0 +1,55 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: grep punpcklwd %t | count 4
+; RUN: grep punpckhwd %t | count 4
+; RUN: grep "pshufd" %t | count 8
+
+; Splat test for v8i16
+; Should generate with pshufd with masks $0, $85, $170, $255 (each mask is used twice)
+define <8 x i16> @shuf_8i16_0(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_1(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_3(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_4(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_5(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+define <8 x i16> @shuf_8i16_6(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 6, i32 6, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
+
+
+define <8 x i16> @shuf_8i16_7(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef , i32 undef >
+	ret <8 x i16> %tmp6
+}
diff --git a/final/test/CodeGen/X86/vec_splat-4.ll b/final/test/CodeGen/X86/vec_splat-4.ll
new file mode 100644
index 00000000000..d9941e65bde
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_splat-4.ll
@@ -0,0 +1,104 @@
+; RUN: llc < %s -march=x86 -mattr=sse41 -o %t
+; RUN: grep punpcklbw %t | count 16
+; RUN: grep punpckhbw %t | count 16
+; RUN: grep "pshufd" %t | count 16
+
+; Should generate with pshufd with masks $0, $85, $170, $255 (each mask is used 4 times)
+
+; Splat test for v16i8
+define <16 x i8 > @shuf_16i8_0(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 0, i32 undef, i32 undef, i32 0, i32 undef, i32 0, i32 0 , i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_1(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef  >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_2(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 2, i32 undef, i32 undef, i32 2, i32 undef, i32 2, i32 2 , i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_3(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 3, i32 undef, i32 undef, i32 3, i32 undef, i32 3, i32 3 , i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3 >
+	ret <16 x i8 > %tmp6
+}
+
+
+define <16 x i8 > @shuf_16i8_4(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 4, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef  >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_5(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 5, i32 undef, i32 undef, i32 5, i32 undef, i32 5, i32 5 , i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_6(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 6, i32 undef, i32 undef, i32 6, i32 undef, i32 6, i32 6 , i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_7(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 7, i32 undef, i32 undef, i32 7, i32 undef, i32 undef, i32 undef , i32 undef, i32 undef, i32 undef, i32 undef , i32 undef , i32 undef, i32 undef, i32 undef , i32 undef  >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_8(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 8, i32 undef, i32 undef, i32 8, i32 undef, i32 8, i32 8 , i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_9(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 9, i32 undef, i32 undef, i32 9, i32 undef, i32 9, i32 9 , i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_10(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 10, i32 undef, i32 undef, i32 10, i32 undef, i32 10, i32 10 , i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_11(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 11, i32 undef, i32 undef, i32 11, i32 undef, i32 11, i32 11 , i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11, i32 11 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_12(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 12, i32 undef, i32 undef, i32 12, i32 undef, i32 12, i32 12 , i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_13(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 13, i32 undef, i32 undef, i32 13, i32 undef, i32 13, i32 13 , i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13, i32 13 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_14(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 14, i32 undef, i32 undef, i32 14, i32 undef, i32 14, i32 14 , i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14, i32 14 >
+	ret <16 x i8 > %tmp6
+}
+
+define <16 x i8 > @shuf_16i8_15(<16 x i8 > %T0, <16 x i8 > %T1) nounwind readnone {
+entry:
+	%tmp6 = shufflevector <16 x i8 > %T0, <16 x i8 > %T1, <16 x i32> < i32 15, i32 undef, i32 undef, i32 15, i32 undef, i32 15, i32 15 , i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15 >
+	ret <16 x i8 > %tmp6
+}
diff --git a/final/test/CodeGen/X86/vec_splat.ll b/final/test/CodeGen/X86/vec_splat.ll
new file mode 100644
index 00000000000..a87fbd0dc65
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_splat.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | grep pshufd
+; RUN: llc < %s -march=x86 -mattr=+sse3 | grep movddup
+
+define void @test_v4sf(<4 x float>* %P, <4 x float>* %Q, float %X) nounwind {
+	%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0		; <<4 x float>> [#uses=1]
+	%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1		; <<4 x float>> [#uses=1]
+	%tmp4 = insertelement <4 x float> %tmp2, float %X, i32 2		; <<4 x float>> [#uses=1]
+	%tmp6 = insertelement <4 x float> %tmp4, float %X, i32 3		; <<4 x float>> [#uses=1]
+	%tmp8 = load <4 x float>* %Q		; <<4 x float>> [#uses=1]
+	%tmp10 = fmul <4 x float> %tmp8, %tmp6		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp10, <4 x float>* %P
+	ret void
+}
+
+define void @test_v2sd(<2 x double>* %P, <2 x double>* %Q, double %X) nounwind {
+	%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0		; <<2 x double>> [#uses=1]
+	%tmp2 = insertelement <2 x double> %tmp, double %X, i32 1		; <<2 x double>> [#uses=1]
+	%tmp4 = load <2 x double>* %Q		; <<2 x double>> [#uses=1]
+	%tmp6 = fmul <2 x double> %tmp4, %tmp2		; <<2 x double>> [#uses=1]
+	store <2 x double> %tmp6, <2 x double>* %P
+	ret void
+}
diff --git a/final/test/CodeGen/X86/vec_ss_load_fold.ll b/final/test/CodeGen/X86/vec_ss_load_fold.ll
new file mode 100644
index 00000000000..3bd3f7b60b3
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_ss_load_fold.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s -march=x86 -mattr=+sse,+sse2,+sse41 | FileCheck %s
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8.7.2"
+
+define i16 @test1(float %f) nounwind {
+	%tmp = insertelement <4 x float> undef, float %f, i32 0		; <<4 x float>> [#uses=1]
+	%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	%tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
+	%tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
+	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
+	%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer )		; <<4 x float>> [#uses=1]
+	%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]
+	%tmp69 = trunc i32 %tmp.upgrd.1 to i16		; <i16> [#uses=1]
+	ret i16 %tmp69
+; CHECK: test1:
+; CHECK: subss	LCPI0_
+; CHECK: mulss	LCPI0_
+; CHECK: minss	LCPI0_
+}
+
+define i16 @test2(float %f) nounwind {
+	%tmp28 = fsub float %f, 1.000000e+00		; <float> [#uses=1]
+	%tmp37 = fmul float %tmp28, 5.000000e-01		; <float> [#uses=1]
+	%tmp375 = insertelement <4 x float> undef, float %tmp37, i32 0		; <<4 x float>> [#uses=1]
+	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp375, <4 x float> < float 6.553500e+04, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]
+	%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> < float 0.000000e+00, float undef, float undef, float undef > )		; <<4 x float>> [#uses=1]
+	%tmp = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]
+	%tmp69 = trunc i32 %tmp to i16		; <i16> [#uses=1]
+	ret i16 %tmp69
+; CHECK: test2:
+; CHECK: addss	LCPI1_
+; CHECK: mulss	LCPI1_
+; CHECK: minss	LCPI1_
+}
+
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
+
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)
+
+
+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32)
+declare <4 x float> @f()
+
+define <4 x float> @test3(<4 x float> %A, float *%b, i32 %C) nounwind {
+  %a = load float *%b
+  %B = insertelement <4 x float> undef, float %a, i32 0
+  %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %A, <4 x float> %B, i32 4)
+  ret <4 x float> %X
+; CHECK: test3:
+; CHECK: roundss	$4, (%eax), %xmm0
+}
+
+define <4 x float> @test4(<4 x float> %A, float *%b, i32 %C) nounwind {
+  %a = load float *%b
+  %B = insertelement <4 x float> undef, float %a, i32 0
+  %q = call <4 x float> @f()
+  %X = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %q, <4 x float> %B, i32 4)
+  ret <4 x float> %X
+; CHECK: test4:
+; CHECK: movss	(%eax), %xmm
+; CHECK: call
+; CHECK: roundss $4, %xmm{{.*}}, %xmm0
+}
diff --git a/final/test/CodeGen/X86/vec_zero-2.ll b/final/test/CodeGen/X86/vec_zero-2.ll
new file mode 100644
index 00000000000..cdb030eb38b
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_zero-2.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+define i32 @t() {
+entry:
+	br i1 true, label %bb4743, label %bb1656
+bb1656:		; preds = %entry
+	ret i32 0
+bb1664:		; preds = %entry
+	br i1 false, label %bb5310, label %bb4743
+bb4743:		; preds = %bb1664
+	%tmp5256 = bitcast <2 x i64> zeroinitializer to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp5257 = sub <8 x i16> %tmp5256, zeroinitializer		; <<8 x i16>> [#uses=1]
+	%tmp5258 = bitcast <8 x i16> %tmp5257 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp5265 = bitcast <2 x i64> %tmp5258 to <8 x i16>		; <<8 x i16>> [#uses=1]
+	%tmp5266 = call <16 x i8> @llvm.x86.sse2.packuswb.128( <8 x i16> %tmp5265, <8 x i16> zeroinitializer ) nounwind readnone 		; <<8 x i16>> [#uses=1]
+	%tmp5267 = bitcast <16 x i8> %tmp5266 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%tmp5294 = and <2 x i64> zeroinitializer, %tmp5267		; <<2 x i64>> [#uses=1]
+	br label %bb5310
+bb5310:		; preds = %bb4743, %bb1664
+	%tmp5294.pn = phi <2 x i64> [ %tmp5294, %bb4743 ], [ zeroinitializer, %bb1664 ]		; <<2 x i64>> [#uses=0]
+	ret i32 0
+}
+
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
diff --git a/final/test/CodeGen/X86/vec_zero.ll b/final/test/CodeGen/X86/vec_zero.ll
new file mode 100644
index 00000000000..4d1f05629b4
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_zero.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; CHECK: xorps
+define void @foo(<4 x float>* %P) {
+        %T = load <4 x float>* %P               ; <<4 x float>> [#uses=1]
+        %S = fadd <4 x float> zeroinitializer, %T                ; <<4 x float>> [#uses=1]
+        store <4 x float> %S, <4 x float>* %P
+        ret void
+}
+
+; CHECK: pxor
+define void @bar(<4 x i32>* %P) {
+        %T = load <4 x i32>* %P         ; <<4 x i32>> [#uses=1]
+        %S = add <4 x i32> zeroinitializer, %T          ; <<4 x i32>> [#uses=1]
+        store <4 x i32> %S, <4 x i32>* %P
+        ret void
+}
+
diff --git a/final/test/CodeGen/X86/vec_zero_cse.ll b/final/test/CodeGen/X86/vec_zero_cse.ll
new file mode 100644
index 00000000000..8aa50945e63
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_zero_cse.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pxor | count 1
+; RUN: llc < %s -relocation-model=static -march=x86 -mcpu=yonah | grep pcmpeqd | count 1
+; 64-bit stores here do not use MMX.
+
+@M1 = external global <1 x i64>
+@M2 = external global <2 x i32>
+
+@S1 = external global <2 x i64>
+@S2 = external global <4 x i32>
+
+define void @test() {
+  store <1 x i64> zeroinitializer, <1 x i64>* @M1
+  store <2 x i32> zeroinitializer, <2 x i32>* @M2
+  ret void
+}
+
+define void @test2() {
+  store <1 x i64> < i64 -1 >, <1 x i64>* @M1
+  store <2 x i32> < i32 -1, i32 -1 >, <2 x i32>* @M2
+  ret void
+}
+
+define void @test3() {
+  store <2 x i64> zeroinitializer, <2 x i64>* @S1
+  store <4 x i32> zeroinitializer, <4 x i32>* @S2
+  ret void
+}
+
+define void @test4() {
+  store <2 x i64> < i64 -1, i64 -1>, <2 x i64>* @S1
+  store <4 x i32> < i32 -1, i32 -1, i32 -1, i32 -1 >, <4 x i32>* @S2
+  ret void
+}
+
+
diff --git a/final/test/CodeGen/X86/vec_zext.ll b/final/test/CodeGen/X86/vec_zext.ll
new file mode 100644
index 00000000000..615a50b7afc
--- /dev/null
+++ b/final/test/CodeGen/X86/vec_zext.ll
@@ -0,0 +1,69 @@
+; RUN: llc < %s -march=x86-64
+; PR 9267
+
+define<4 x i32> @func_16_32() {
+  %F = load <4 x i16>* undef
+  %G = zext <4 x i16> %F to <4 x i32>
+  %H = load <4 x i16>* undef
+  %Y = zext <4 x i16> %H to <4 x i32>
+  %T = add <4 x i32> %Y, %G
+  store <4 x i32>%T , <4 x i32>* undef
+  ret <4 x i32> %T
+}
+
+define<4 x i64> @func_16_64() {
+  %F = load <4 x i16>* undef
+  %G = zext <4 x i16> %F to <4 x i64>
+  %H = load <4 x i16>* undef
+  %Y = zext <4 x i16> %H to <4 x i64>
+  %T = xor <4 x i64> %Y, %G
+  store <4 x i64>%T , <4 x i64>* undef
+  ret <4 x i64> %T
+}
+
+define<4 x i64> @func_32_64() {
+  %F = load <4 x i32>* undef
+  %G = zext <4 x i32> %F to <4 x i64>
+  %H = load <4 x i32>* undef
+  %Y = zext <4 x i32> %H to <4 x i64>
+  %T = or <4 x i64> %Y, %G
+  ret <4 x i64> %T
+}
+
+define<4 x i16> @func_8_16() {
+  %F = load <4 x i8>* undef
+  %G = zext <4 x i8> %F to <4 x i16>
+  %H = load <4 x i8>* undef
+  %Y = zext <4 x i8> %H to <4 x i16>
+  %T = add <4 x i16> %Y, %G
+  ret <4 x i16> %T
+}
+
+define<4 x i32> @func_8_32() {
+  %F = load <4 x i8>* undef
+  %G = zext <4 x i8> %F to <4 x i32>
+  %H = load <4 x i8>* undef
+  %Y = zext <4 x i8> %H to <4 x i32>
+  %T = sub <4 x i32> %Y, %G
+  ret <4 x i32> %T
+}
+
+define<4 x i64> @func_8_64() {
+  %F = load <4 x i8>* undef
+  %G = zext <4 x i8> %F to <4 x i64>
+  %H = load <4 x i8>* undef
+  %Y = zext <4 x i8> %H to <4 x i64>
+  %T = add <4 x i64> %Y, %G
+  ret <4 x i64> %T
+}
+
+define<4 x i32> @const_16_32() {
+  %G = zext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i32>
+  ret <4 x i32> %G
+}
+
+define<4 x i64> @const_16_64() {
+  %G = zext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i64>
+  ret <4 x i64> %G
+}
+
diff --git a/final/test/CodeGen/X86/vector-intrinsics.ll b/final/test/CodeGen/X86/vector-intrinsics.ll
new file mode 100644
index 00000000000..cabacb572ce
--- /dev/null
+++ b/final/test/CodeGen/X86/vector-intrinsics.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=x86-64 | grep call | count 43
+
+declare <4 x double> @llvm.sin.v4f64(<4 x double> %p)
+declare <4 x double> @llvm.cos.v4f64(<4 x double> %p)
+declare <4 x double> @llvm.pow.v4f64(<4 x double> %p, <4 x double> %q)
+declare <4 x double> @llvm.powi.v4f64(<4 x double> %p, i32)
+
+define <4 x double> @foo(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.sin.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+define <4 x double> @goo(<4 x double> %p)
+{
+  %t = call <4 x double> @llvm.cos.v4f64(<4 x double> %p)
+  ret <4 x double> %t
+}
+define <4 x double> @moo(<4 x double> %p, <4 x double> %q)
+{
+  %t = call <4 x double> @llvm.pow.v4f64(<4 x double> %p, <4 x double> %q)
+  ret <4 x double> %t
+}
+define <4 x double> @zoo(<4 x double> %p, i32 %q)
+{
+  %t = call <4 x double> @llvm.powi.v4f64(<4 x double> %p, i32 %q)
+  ret <4 x double> %t
+}
+
+
+declare <9 x double> @llvm.exp.v9f64(<9 x double> %a)
+declare <9 x double> @llvm.pow.v9f64(<9 x double> %a, <9 x double> %b)
+declare <9 x double> @llvm.powi.v9f64(<9 x double> %a, i32)
+
+define void @a(<9 x double>* %p) nounwind {
+  %a = load <9 x double>* %p
+  %r = call <9 x double> @llvm.exp.v9f64(<9 x double> %a)
+  store <9 x double> %r, <9 x double>* %p
+  ret void
+}
+define void @b(<9 x double>* %p, <9 x double>* %q) nounwind {
+  %a = load <9 x double>* %p
+  %b = load <9 x double>* %q
+  %r = call <9 x double> @llvm.pow.v9f64(<9 x double> %a, <9 x double> %b)
+  store <9 x double> %r, <9 x double>* %p
+  ret void
+}
+define void @c(<9 x double>* %p, i32 %n) nounwind {
+  %a = load <9 x double>* %p
+  %r = call <9 x double> @llvm.powi.v9f64(<9 x double> %a, i32 %n)
+  store <9 x double> %r, <9 x double>* %p
+  ret void
+}
diff --git a/final/test/CodeGen/X86/vector-rem.ll b/final/test/CodeGen/X86/vector-rem.ll
new file mode 100644
index 00000000000..51cd872643f
--- /dev/null
+++ b/final/test/CodeGen/X86/vector-rem.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | grep div | count 8
+; RUN: llc < %s -march=x86-64 | grep fmodf | count 4
+
+define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) {
+	%m = srem <4 x i32> %t, %u
+	ret <4 x i32> %m
+}
+define <4 x i32> @bar(<4 x i32> %t, <4 x i32> %u) {
+	%m = urem <4 x i32> %t, %u
+	ret <4 x i32> %m
+}
+define <4 x float> @qux(<4 x float> %t, <4 x float> %u) {
+	%m = frem <4 x float> %t, %u
+	ret <4 x float> %m
+}
diff --git a/final/test/CodeGen/X86/vector-variable-idx.ll b/final/test/CodeGen/X86/vector-variable-idx.ll
new file mode 100644
index 00000000000..2a4d18c141a
--- /dev/null
+++ b/final/test/CodeGen/X86/vector-variable-idx.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86-64 | grep movss | count 2
+; PR2676
+
+define float @foo(<4 x float> %p, i32 %t) {
+  %z = extractelement <4 x float> %p, i32 %t
+  ret float %z
+}
+define <4 x float> @bar(<4 x float> %p, float %f, i32 %t) {
+  %z = insertelement <4 x float> %p, float %f, i32 %t
+  ret <4 x float> %z
+}
diff --git a/final/test/CodeGen/X86/vector.ll b/final/test/CodeGen/X86/vector.ll
new file mode 100644
index 00000000000..3fff8497dfd
--- /dev/null
+++ b/final/test/CodeGen/X86/vector.ll
@@ -0,0 +1,156 @@
+; Test that vectors are scalarized/lowered correctly.
+; RUN: llc < %s -march=x86 -mcpu=i386 > %t
+; RUN: llc < %s -march=x86 -mcpu=yonah > %t
+
+%d8 = type <8 x double>
+%f1 = type <1 x float>
+%f2 = type <2 x float>
+%f4 = type <4 x float>
+%f8 = type <8 x float>
+%i4 = type <4 x i32>
+
+
+;;; TEST HANDLING OF VARIOUS VECTOR SIZES
+
+define void @test_f1(%f1* %P, %f1* %Q, %f1* %S) {
+        %p = load %f1* %P               ; <%f1> [#uses=1]
+        %q = load %f1* %Q               ; <%f1> [#uses=1]
+        %R = fadd %f1 %p, %q             ; <%f1> [#uses=1]
+        store %f1 %R, %f1* %S
+        ret void
+}
+
+define void @test_f2(%f2* %P, %f2* %Q, %f2* %S) {
+        %p = load %f2* %P               ; <%f2> [#uses=1]
+        %q = load %f2* %Q               ; <%f2> [#uses=1]
+        %R = fadd %f2 %p, %q             ; <%f2> [#uses=1]
+        store %f2 %R, %f2* %S
+        ret void
+}
+
+define void @test_f4(%f4* %P, %f4* %Q, %f4* %S) {
+        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %q = load %f4* %Q               ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, %q             ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_f8(%f8* %P, %f8* %Q, %f8* %S) {
+        %p = load %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %R = fadd %f8 %p, %q             ; <%f8> [#uses=1]
+        store %f8 %R, %f8* %S
+        ret void
+}
+
+define void @test_fmul(%f8* %P, %f8* %Q, %f8* %S) {
+        %p = load %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %R = fmul %f8 %p, %q             ; <%f8> [#uses=1]
+        store %f8 %R, %f8* %S
+        ret void
+}
+
+define void @test_div(%f8* %P, %f8* %Q, %f8* %S) {
+        %p = load %f8* %P               ; <%f8> [#uses=1]
+        %q = load %f8* %Q               ; <%f8> [#uses=1]
+        %R = fdiv %f8 %p, %q            ; <%f8> [#uses=1]
+        store %f8 %R, %f8* %S
+        ret void
+}
+
+;;; TEST VECTOR CONSTRUCTS
+
+define void @test_cst(%f4* %P, %f4* %S) {
+        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, < float 0x3FB99999A0000000, float 1.000000e+00, float 2.000000e+00, float 4.500000e+00 >             ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_zero(%f4* %P, %f4* %S) {
+        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, zeroinitializer                ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_undef(%f4* %P, %f4* %S) {
+        %p = load %f4* %P               ; <%f4> [#uses=1]
+        %R = fadd %f4 %p, undef          ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_constant_insert(%f4* %S) {
+        %R = insertelement %f4 zeroinitializer, float 1.000000e+01, i32 0               ; <%f4> [#uses
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_variable_buildvector(float %F, %f4* %S) {
+        %R = insertelement %f4 zeroinitializer, float %F, i32 0         ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define void @test_scalar_to_vector(float %F, %f4* %S) {
+        %R = insertelement %f4 undef, float %F, i32 0           ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %S
+        ret void
+}
+
+define float @test_extract_elt(%f8* %P) {
+        %p = load %f8* %P               ; <%f8> [#uses=1]
+        %R = extractelement %f8 %p, i32 3               ; <float> [#uses=1]
+        ret float %R
+}
+
+define double @test_extract_elt2(%d8* %P) {
+        %p = load %d8* %P               ; <%d8> [#uses=1]
+        %R = extractelement %d8 %p, i32 3               ; <double> [#uses=1]
+        ret double %R
+}
+
+define void @test_cast_1(%f4* %b, %i4* %a) {
+        %tmp = load %f4* %b             ; <%f4> [#uses=1]
+        %tmp2 = fadd %f4 %tmp, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >              ; <%f4> [#uses=1]
+        %tmp3 = bitcast %f4 %tmp2 to %i4                ; <%i4> [#uses=1]
+        %tmp4 = add %i4 %tmp3, < i32 1, i32 2, i32 3, i32 4 >           ; <%i4> [#uses=1]
+        store %i4 %tmp4, %i4* %a
+        ret void
+}
+
+define void @test_cast_2(%f8* %a, <8 x i32>* %b) {
+        %T = load %f8* %a               ; <%f8> [#uses=1]
+        %T2 = bitcast %f8 %T to <8 x i32>               ; <<8 x i32>> [#uses=1]
+        store <8 x i32> %T2, <8 x i32>* %b
+        ret void
+}
+
+
+;;; TEST IMPORTANT IDIOMS
+
+define void @splat(%f4* %P, %f4* %Q, float %X) {
+        %tmp = insertelement %f4 undef, float %X, i32 0         ; <%f4> [#uses=1]
+        %tmp2 = insertelement %f4 %tmp, float %X, i32 1         ; <%f4> [#uses=1]
+        %tmp4 = insertelement %f4 %tmp2, float %X, i32 2                ; <%f4> [#uses=1]
+        %tmp6 = insertelement %f4 %tmp4, float %X, i32 3                ; <%f4> [#uses=1]
+        %q = load %f4* %Q               ; <%f4> [#uses=1]
+        %R = fadd %f4 %q, %tmp6          ; <%f4> [#uses=1]
+        store %f4 %R, %f4* %P
+        ret void
+}
+
+define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) {
+        %tmp = insertelement %i4 undef, i32 %X, i32 0           ; <%i4> [#uses=1]
+        %tmp2 = insertelement %i4 %tmp, i32 %X, i32 1           ; <%i4> [#uses=1]
+        %tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2          ; <%i4> [#uses=1]
+        %tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3          ; <%i4> [#uses=1]
+        %q = load %i4* %Q               ; <%i4> [#uses=1]
+        %R = add %i4 %q, %tmp6          ; <%i4> [#uses=1]
+        store %i4 %R, %i4* %P
+        ret void
+}
+
diff --git a/final/test/CodeGen/X86/vfcmp.ll b/final/test/CodeGen/X86/vfcmp.ll
new file mode 100644
index 00000000000..f5f5293622b
--- /dev/null
+++ b/final/test/CodeGen/X86/vfcmp.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+; PR2620
+
+
+define void @t2(i32 %m_task_id, i32 %start_x, i32 %end_x) nounwind {
+	%A = fcmp olt <2 x double> zeroinitializer, zeroinitializer		; <<2 x i64>>:1 [#uses=1]
+        sext <2 x i1> %A to <2 x i64>
+	extractelement <2 x i64> %1, i32 1		; <i64>:2 [#uses=1]
+	lshr i64 %2, 63		; <i64>:3 [#uses=1]
+	trunc i64 %3 to i1		; <i1>:4 [#uses=1]
+	zext i1 %4 to i8		; <i8>:5 [#uses=1]
+	insertelement <2 x i8> zeroinitializer, i8 %5, i32 1		; <<2 x i8>>:6 [#uses=1]
+	store <2 x i8> %6, <2 x i8>* null
+	ret void
+}
diff --git a/final/test/CodeGen/X86/visibility.ll b/final/test/CodeGen/X86/visibility.ll
new file mode 100644
index 00000000000..a8d287083a8
--- /dev/null
+++ b/final/test/CodeGen/X86/visibility.ll
@@ -0,0 +1,11 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu %s -o - | FileCheck %s
+
+define hidden void @foo() nounwind {
+entry:
+  call void @bar()
+  ret void
+}
+
+declare hidden void @bar()
+
+;CHECK: .hidden	bar
diff --git a/final/test/CodeGen/X86/volatile.ll b/final/test/CodeGen/X86/volatile.ll
new file mode 100644
index 00000000000..2e5742afdf8
--- /dev/null
+++ b/final/test/CodeGen/X86/volatile.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=sse2 | grep movsd | count 5
+; RUN: llc < %s -march=x86 -mattr=sse2 -O0 | grep -v esp | grep movsd | count 5
+
+@x = external global double
+
+define void @foo() nounwind  {
+  %a = volatile load double* @x
+  volatile store double 0.0, double* @x
+  volatile store double 0.0, double* @x
+  %b = volatile load double* @x
+  ret void
+}
+
+define void @bar() nounwind  {
+  %c = volatile load double* @x
+  ret void
+}
diff --git a/final/test/CodeGen/X86/vortex-bug.ll b/final/test/CodeGen/X86/vortex-bug.ll
new file mode 100644
index 00000000000..40f11175b20
--- /dev/null
+++ b/final/test/CodeGen/X86/vortex-bug.ll
@@ -0,0 +1,21 @@
+; RUN: llc < %s -march=x86-64
+
+	%struct.blktkntype = type { i32, i32 }
+	%struct.fieldstruc = type { [128 x i8], %struct.blktkntype*, i32, i32 }
+
+define fastcc i32 @Env_GetFieldStruc(i8* %FieldName, i32* %Status, %struct.fieldstruc* %FieldStruc) nounwind  {
+entry:
+	br label %bb137.i
+
+bb137.i:		; preds = %bb137.i, %entry
+	%FieldName_addr.0209.rec.i = phi i64 [ %tmp139.rec.i, %bb137.i ], [ 0, %entry ]		; <i64> [#uses=1]
+	%tmp147213.i = phi i32 [ %tmp147.i, %bb137.i ], [ 1, %entry ]		; <i32> [#uses=2]
+	%tmp139.rec.i = add i64 %FieldName_addr.0209.rec.i, 1		; <i64> [#uses=2]
+	%tmp141142.i = sext i32 %tmp147213.i to i64		; <i64> [#uses=0]
+	%tmp147.i = add i32 %tmp147213.i, 1		; <i32> [#uses=1]
+	br i1 false, label %bb137.i, label %bb149.i.loopexit
+
+bb149.i.loopexit:		; preds = %bb137.i
+	%tmp139.i = getelementptr i8* %FieldName, i64 %tmp139.rec.i		; <i8*> [#uses=0]
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/vshift-1.ll b/final/test/CodeGen/X86/vshift-1.ll
new file mode 100644
index 00000000000..49551562c5a
--- /dev/null
+++ b/final/test/CodeGen/X86/vshift-1.ll
@@ -0,0 +1,79 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; test vector shifts converted to proper SSE2 vector shifts when the shift
+; amounts are the same.
+
+define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
+entry:
+; CHECK: shift1a:
+; CHECK: psllq
+  %shl = shl <2 x i64> %val, < i64 32, i64 32 >
+  store <2 x i64> %shl, <2 x i64>* %dst
+  ret void
+}
+
+define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
+entry:
+; CHECK: shift1b:
+; CHECK: movd
+; CHECK-NEXT: psllq
+  %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
+  %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
+  %shl = shl <2 x i64> %val, %1
+  store <2 x i64> %shl, <2 x i64>* %dst
+  ret void
+}
+
+
+define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
+entry:
+; CHECK: shift2a:
+; CHECK: pslld
+  %shl = shl <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 >
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: pslld
+  %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
+  %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
+  %3 = insertelement <4 x i32> %2, i32 %amt, i32 3
+  %shl = shl <4 x i32> %val, %3
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
+entry:
+; CHECK: shift3a:
+; CHECK: psllw
+  %shl = shl <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
+  store <8 x i16> %shl, <8 x i16>* %dst
+  ret void
+}
+
+; Make sure the shift amount is properly zero extended.
+define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
+entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psllw
+  %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
+  %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
+  %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
+  %3 = insertelement <8 x i16> %0, i16 %amt, i32 3
+  %4 = insertelement <8 x i16> %0, i16 %amt, i32 4
+  %5 = insertelement <8 x i16> %0, i16 %amt, i32 5
+  %6 = insertelement <8 x i16> %0, i16 %amt, i32 6
+  %7 = insertelement <8 x i16> %0, i16 %amt, i32 7
+  %shl = shl <8 x i16> %val, %7
+  store <8 x i16> %shl, <8 x i16>* %dst
+  ret void
+}
+
diff --git a/final/test/CodeGen/X86/vshift-2.ll b/final/test/CodeGen/X86/vshift-2.ll
new file mode 100644
index 00000000000..9a9b419abea
--- /dev/null
+++ b/final/test/CodeGen/X86/vshift-2.ll
@@ -0,0 +1,78 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; test vector shifts converted to proper SSE2 vector shifts when the shift
+; amounts are the same.
+
+define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
+entry:
+; CHECK: shift1a:
+; CHECK: psrlq
+  %lshr = lshr <2 x i64> %val, < i64 32, i64 32 >
+  store <2 x i64> %lshr, <2 x i64>* %dst
+  ret void
+}
+
+define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, i64 %amt) nounwind {
+entry:
+; CHECK: shift1b:
+; CHECK: movd
+; CHECK-NEXT: psrlq
+  %0 = insertelement <2 x i64> undef, i64 %amt, i32 0
+  %1 = insertelement <2 x i64> %0, i64 %amt, i32 1
+  %lshr = lshr <2 x i64> %val, %1
+  store <2 x i64> %lshr, <2 x i64>* %dst
+  ret void
+}
+
+define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
+entry:
+; CHECK: shift2a:
+; CHECK: psrld
+  %lshr = lshr <4 x i32> %val, < i32 17, i32 17, i32 17, i32 17 >
+  store <4 x i32> %lshr, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: psrld
+  %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
+  %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
+  %3 = insertelement <4 x i32> %2, i32 %amt, i32 3
+  %lshr = lshr <4 x i32> %val, %3
+  store <4 x i32> %lshr, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
+entry:
+; CHECK: shift3a:
+; CHECK: psrlw
+  %lshr = lshr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
+  store <8 x i16> %lshr, <8 x i16>* %dst
+  ret void
+}
+
+; properly zero extend the shift amount
+define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
+entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psrlw
+  %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
+  %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
+  %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
+  %3 = insertelement <8 x i16> %0, i16 %amt, i32 3
+  %4 = insertelement <8 x i16> %0, i16 %amt, i32 4
+  %5 = insertelement <8 x i16> %0, i16 %amt, i32 5
+  %6 = insertelement <8 x i16> %0, i16 %amt, i32 6
+  %7 = insertelement <8 x i16> %0, i16 %amt, i32 7
+  %lshr = lshr <8 x i16> %val, %7
+  store <8 x i16> %lshr, <8 x i16>* %dst
+  ret void
+}
diff --git a/final/test/CodeGen/X86/vshift-3.ll b/final/test/CodeGen/X86/vshift-3.ll
new file mode 100644
index 00000000000..8e8a9aa04b2
--- /dev/null
+++ b/final/test/CodeGen/X86/vshift-3.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; test vector shifts converted to proper SSE2 vector shifts when the shift
+; amounts are the same.
+
+; Note that x86 does have ashr 
+
+; shift1a can't use a packed shift
+define void @shift1a(<2 x i64> %val, <2 x i64>* %dst) nounwind {
+entry:
+; CHECK: shift1a:
+; CHECK: sarl
+  %ashr = ashr <2 x i64> %val, < i64 32, i64 32 >
+  store <2 x i64> %ashr, <2 x i64>* %dst
+  ret void
+}
+
+define void @shift2a(<4 x i32> %val, <4 x i32>* %dst) nounwind {
+entry:
+; CHECK: shift2a:
+; CHECK: psrad	$5
+  %ashr = ashr <4 x i32> %val, < i32 5, i32 5, i32 5, i32 5 >
+  store <4 x i32> %ashr, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift2b:
+; CHECK: movd
+; CHECK-NEXT: psrad
+  %0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %1 = insertelement <4 x i32> %0, i32 %amt, i32 1
+  %2 = insertelement <4 x i32> %1, i32 %amt, i32 2
+  %3 = insertelement <4 x i32> %2, i32 %amt, i32 3
+  %ashr = ashr <4 x i32> %val, %3
+  store <4 x i32> %ashr, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift3a(<8 x i16> %val, <8 x i16>* %dst) nounwind {
+entry:
+; CHECK: shift3a:
+; CHECK: psraw	$5
+  %ashr = ashr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 >
+  store <8 x i16> %ashr, <8 x i16>* %dst
+  ret void
+}
+
+define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
+entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: movd
+; CHECK-NEXT: psraw
+  %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
+  %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
+  %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
+  %3 = insertelement <8 x i16> %0, i16 %amt, i32 3
+  %4 = insertelement <8 x i16> %0, i16 %amt, i32 4
+  %5 = insertelement <8 x i16> %0, i16 %amt, i32 5
+  %6 = insertelement <8 x i16> %0, i16 %amt, i32 6
+  %7 = insertelement <8 x i16> %0, i16 %amt, i32 7
+  %ashr = ashr <8 x i16> %val, %7
+  store <8 x i16> %ashr, <8 x i16>* %dst
+  ret void
+}
diff --git a/final/test/CodeGen/X86/vshift-4.ll b/final/test/CodeGen/X86/vshift-4.ll
new file mode 100644
index 00000000000..8e24fda1835
--- /dev/null
+++ b/final/test/CodeGen/X86/vshift-4.ll
@@ -0,0 +1,85 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; test vector shifts converted to proper SSE2 vector shifts when the shift
+; amounts are the same when using a shuffle splat.
+
+define void @shift1a(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
+entry:
+; CHECK: shift1a:
+; CHECK: psllq
+  %shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
+  %shl = shl <2 x i64> %val, %shamt
+  store <2 x i64> %shl, <2 x i64>* %dst
+  ret void
+}
+
+; shift1b can't use a packed shift
+define void @shift1b(<2 x i64> %val, <2 x i64>* %dst, <2 x i64> %sh) nounwind {
+entry:
+; CHECK: shift1b:
+; CHECK: shll
+  %shamt = shufflevector <2 x i64> %sh, <2 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %shl = shl <2 x i64> %val, %shamt
+  store <2 x i64> %shl, <2 x i64>* %dst
+  ret void
+}
+
+define void @shift2a(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
+entry:
+; CHECK: shift2a:
+; CHECK: pslld
+  %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift2b(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
+entry:
+; CHECK: shift2b:
+; CHECK: pslld
+  %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 1, i32 1>
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift2c(<4 x i32> %val, <4 x i32>* %dst, <2 x i32> %amt) nounwind {
+entry:
+; CHECK: shift2c:
+; CHECK: pslld
+  %shamt = shufflevector <2 x i32> %amt, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+define void @shift3a(<8 x i16> %val, <8 x i16>* %dst, <8 x i16> %amt) nounwind {
+entry:
+; CHECK: shift3a:
+; CHECK: movzwl
+; CHECK: psllw
+  %shamt = shufflevector <8 x i16> %amt, <8 x i16> undef, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
+  %shl = shl <8 x i16> %val, %shamt
+  store <8 x i16> %shl, <8 x i16>* %dst
+  ret void
+}
+
+define void @shift3b(<8 x i16> %val, <8 x i16>* %dst, i16 %amt) nounwind {
+entry:
+; CHECK: shift3b:
+; CHECK: movzwl
+; CHECK: psllw
+  %0 = insertelement <8 x i16> undef, i16 %amt, i32 0
+  %1 = insertelement <8 x i16> %0, i16 %amt, i32 1
+  %2 = insertelement <8 x i16> %0, i16 %amt, i32 2
+  %3 = insertelement <8 x i16> %0, i16 %amt, i32 3
+  %4 = insertelement <8 x i16> %0, i16 %amt, i32 4
+  %5 = insertelement <8 x i16> %0, i16 %amt, i32 5
+  %6 = insertelement <8 x i16> %0, i16 %amt, i32 6
+  %7 = insertelement <8 x i16> %0, i16 %amt, i32 7
+  %shl = shl <8 x i16> %val, %7
+  store <8 x i16> %shl, <8 x i16>* %dst
+  ret void
+}
+
diff --git a/final/test/CodeGen/X86/vshift-5.ll b/final/test/CodeGen/X86/vshift-5.ll
new file mode 100644
index 00000000000..cb254aeb573
--- /dev/null
+++ b/final/test/CodeGen/X86/vshift-5.ll
@@ -0,0 +1,56 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+
+; When loading the shift amount from memory, avoid generating the splat.
+
+define void @shift5a(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
+entry:
+; CHECK: shift5a:
+; CHECK: movd
+; CHECK-NEXT: pslld
+  %amt = load i32* %pamt 
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5b(<4 x i32> %val, <4 x i32>* %dst, i32* %pamt) nounwind {
+entry:
+; CHECK: shift5b:
+; CHECK: movd
+; CHECK-NEXT: psrad
+  %amt = load i32* %pamt 
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer 
+  %shr = ashr <4 x i32> %val, %shamt
+  store <4 x i32> %shr, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5c(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift5c:
+; CHECK: movd
+; CHECK-NEXT: pslld
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shl = shl <4 x i32> %val, %shamt
+  store <4 x i32> %shl, <4 x i32>* %dst
+  ret void
+}
+
+
+define void @shift5d(<4 x i32> %val, <4 x i32>* %dst, i32 %amt) nounwind {
+entry:
+; CHECK: shift5d:
+; CHECK: movd
+; CHECK-NEXT: psrad
+  %tmp0 = insertelement <4 x i32> undef, i32 %amt, i32 0
+  %shamt = shufflevector <4 x i32> %tmp0, <4 x i32> undef, <4 x i32> zeroinitializer
+  %shr = ashr <4 x i32> %val, %shamt
+  store <4 x i32> %shr, <4 x i32>* %dst
+  ret void
+}
diff --git a/final/test/CodeGen/X86/vshift_scalar.ll b/final/test/CodeGen/X86/vshift_scalar.ll
new file mode 100644
index 00000000000..9dd8478caae
--- /dev/null
+++ b/final/test/CodeGen/X86/vshift_scalar.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s
+
+; Legalization test that requires scalarizing a vector.
+
+define void @update(<1 x i32> %val, <1 x i32>* %dst) nounwind {
+entry:
+	%shl = shl <1 x i32> %val, < i32 2>
+	%shr = ashr <1 x i32> %val, < i32 4>
+	store <1 x i32> %shr, <1 x i32>* %dst
+	ret void
+}
diff --git a/final/test/CodeGen/X86/vshift_split.ll b/final/test/CodeGen/X86/vshift_split.ll
new file mode 100644
index 00000000000..359d36d8af6
--- /dev/null
+++ b/final/test/CodeGen/X86/vshift_split.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2
+
+; Example that requires splitting and expanding a vector shift.
+define <2 x i64> @update(<2 x i64> %val) nounwind readnone {
+entry:
+	%shr = lshr <2 x i64> %val, < i64 2, i64 3 >
+	ret <2 x i64> %shr
+}
diff --git a/final/test/CodeGen/X86/vshift_split2.ll b/final/test/CodeGen/X86/vshift_split2.ll
new file mode 100644
index 00000000000..0f8c2b896e2
--- /dev/null
+++ b/final/test/CodeGen/X86/vshift_split2.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mcpu=yonah
+
+; Legalization example that requires splitting a large vector into smaller pieces.
+
+define void @update(<8 x i32> %val, <8 x i32>* %dst) nounwind {
+entry:
+	%shl = shl <8 x i32> %val, < i32 2, i32 2, i32 2, i32 2, i32 4, i32 4, i32 4, i32 4 >
+	%shr = ashr <8 x i32> %val, < i32 2, i32 2, i32 2, i32 2, i32 4, i32 4, i32 4, i32 4 >
+	store <8 x i32> %shr, <8 x i32>* %dst
+	ret void
+}
diff --git a/final/test/CodeGen/X86/vsplit-and.ll b/final/test/CodeGen/X86/vsplit-and.ll
new file mode 100644
index 00000000000..97dacfdf09e
--- /dev/null
+++ b/final/test/CodeGen/X86/vsplit-and.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -march=x86 |  FileCheck %s
+
+
+define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
+; CHECK: andb
+  %cmp1 = icmp ne <2 x i64> %src1, zeroinitializer
+  %cmp2 = icmp ne <2 x i64> %src2, zeroinitializer
+  %t1 = and <2 x i1> %cmp1, %cmp2
+  %t2 = sext <2 x i1> %t1 to <2 x i64>
+  store <2 x i64> %t2, <2 x i64>* %dst
+  ret void
+}
+
+define void @t2(<3 x i64>* %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
+; CHECK: andb
+  %cmp1 = icmp ne <3 x i64> %src1, zeroinitializer
+  %cmp2 = icmp ne <3 x i64> %src2, zeroinitializer
+  %t1 = and <3 x i1> %cmp1, %cmp2
+  %t2 = sext <3 x i1> %t1 to <3 x i64>
+  store <3 x i64> %t2, <3 x i64>* %dst
+  ret void
+}
diff --git a/final/test/CodeGen/X86/weak.ll b/final/test/CodeGen/X86/weak.ll
new file mode 100644
index 00000000000..8590e8d0001
--- /dev/null
+++ b/final/test/CodeGen/X86/weak.ll
@@ -0,0 +1,4 @@
+; RUN: llc < %s -march=x86
+@a = extern_weak global i32             ; <i32*> [#uses=1]
+@b = global i32* @a             ; <i32**> [#uses=0]
+
diff --git a/final/test/CodeGen/X86/wide-integer-fold.ll b/final/test/CodeGen/X86/wide-integer-fold.ll
new file mode 100644
index 00000000000..b3b4d24ab3a
--- /dev/null
+++ b/final/test/CodeGen/X86/wide-integer-fold.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; CHECK:  movq  $-65535, %rax
+
+; DAGCombiner should fold this to a simple constant.
+
+define i64 @foo(i192 %a) nounwind {
+  %t = or i192 %a, -22300404916163702203072254898040925442801665
+  %s = and i192 %t, -22300404916163702203072254898040929737768960
+  %u = lshr i192 %s, 128
+  %v = trunc i192 %u to i64
+  ret i64 %v
+}
diff --git a/final/test/CodeGen/X86/widen_arith-1.ll b/final/test/CodeGen/X86/widen_arith-1.ll
new file mode 100644
index 00000000000..4b8016dc713
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_arith-1.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 |  FileCheck %s
+
+; Widen a v3i8 to v16i8 to use a vector add
+
+define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
+entry:
+; CHECK-NOT: pextrw
+; CHECK: paddb
+; CHECK: pextrb
+	%dst.addr = alloca <3 x i8>*		; <<3 x i8>**> [#uses=2]
+	%src.addr = alloca <3 x i8>*		; <<3 x i8>**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <3 x i8>* %dst, <3 x i8>** %dst.addr
+	store <3 x i8>* %src, <3 x i8>** %src.addr
+	store i32 %n, i32* %n.addr
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <3 x i8>** %dst.addr		; <<3 x i8>*> [#uses=1]
+	%arrayidx = getelementptr <3 x i8>* %tmp3, i32 %tmp2		; <<3 x i8>*> [#uses=1]
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <3 x i8>** %src.addr		; <<3 x i8>*> [#uses=1]
+	%arrayidx6 = getelementptr <3 x i8>* %tmp5, i32 %tmp4		; <<3 x i8>*> [#uses=1]
+	%tmp7 = load <3 x i8>* %arrayidx6		; <<3 x i8>> [#uses=1]
+	%add = add <3 x i8> %tmp7, < i8 1, i8 1, i8 1 >		; <<3 x i8>> [#uses=1]
+	store <3 x i8> %add, <3 x i8>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp8, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
diff --git a/final/test/CodeGen/X86/widen_arith-2.ll b/final/test/CodeGen/X86/widen_arith-2.ll
new file mode 100644
index 00000000000..03b3fea01f6
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_arith-2.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: paddb
+; CHECK: pand
+
+; widen v8i8 to v16i8 (checks even power of 2 widening with add & and)
+
+define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
+entry:
+	%dst_i.addr = alloca i64*		; <i64**> [#uses=2]
+	%src_i.addr = alloca i64*		; <i64**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=8]
+	%dst = alloca <8 x i8>*, align 4		; <<8 x i8>**> [#uses=2]
+	%src = alloca <8 x i8>*, align 4		; <<8 x i8>**> [#uses=2]
+	store i64* %dst_i, i64** %dst_i.addr
+	store i64* %src_i, i64** %src_i.addr
+	store i32 %n, i32* %n.addr
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load i64** %dst_i.addr		; <i64*> [#uses=1]
+	%arrayidx = getelementptr i64* %tmp3, i32 %tmp2		; <i64*> [#uses=1]
+	%conv = bitcast i64* %arrayidx to <8 x i8>*		; <<8 x i8>*> [#uses=1]
+	store <8 x i8>* %conv, <8 x i8>** %dst
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load i64** %src_i.addr		; <i64*> [#uses=1]
+	%arrayidx6 = getelementptr i64* %tmp5, i32 %tmp4		; <i64*> [#uses=1]
+	%conv7 = bitcast i64* %arrayidx6 to <8 x i8>*		; <<8 x i8>*> [#uses=1]
+	store <8 x i8>* %conv7, <8 x i8>** %src
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%tmp9 = load <8 x i8>** %dst		; <<8 x i8>*> [#uses=1]
+	%arrayidx10 = getelementptr <8 x i8>* %tmp9, i32 %tmp8		; <<8 x i8>*> [#uses=1]
+	%tmp11 = load i32* %i		; <i32> [#uses=1]
+	%tmp12 = load <8 x i8>** %src		; <<8 x i8>*> [#uses=1]
+	%arrayidx13 = getelementptr <8 x i8>* %tmp12, i32 %tmp11		; <<8 x i8>*> [#uses=1]
+	%tmp14 = load <8 x i8>* %arrayidx13		; <<8 x i8>> [#uses=1]
+	%add = add <8 x i8> %tmp14, < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >		; <<8 x i8>> [#uses=1]
+	%and = and <8 x i8> %add, < i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4 >		; <<8 x i8>> [#uses=1]
+	store <8 x i8> %and, <8 x i8>* %arrayidx10
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp15 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp15, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/widen_arith-3.ll b/final/test/CodeGen/X86/widen_arith-3.ll
new file mode 100644
index 00000000000..057492377a2
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_arith-3.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
+; CHECK: paddw
+; CHECK: pextrw
+; CHECK: movd
+
+; Widen a v3i16 to v8i16 to do a vector add
+
+@.str = internal constant [4 x i8] c"%d \00"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [2 x i8] c"\0A\00"		; <[2 x i8]*> [#uses=1]
+
+define void @update(<3 x i16>* %dst, <3 x i16>* %src, i32 %n) nounwind {
+entry:
+	%dst.addr = alloca <3 x i16>*		; <<3 x i16>**> [#uses=2]
+	%src.addr = alloca <3 x i16>*		; <<3 x i16>**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%v = alloca <3 x i16>, align 8		; <<3 x i16>*> [#uses=1]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <3 x i16>* %dst, <3 x i16>** %dst.addr
+	store <3 x i16>* %src, <3 x i16>** %src.addr
+	store i32 %n, i32* %n.addr
+	store <3 x i16> < i16 1, i16 1, i16 1 >, <3 x i16>* %v
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <3 x i16>** %dst.addr		; <<3 x i16>*> [#uses=1]
+	%arrayidx = getelementptr <3 x i16>* %tmp3, i32 %tmp2		; <<3 x i16>*> [#uses=1]
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <3 x i16>** %src.addr		; <<3 x i16>*> [#uses=1]
+	%arrayidx6 = getelementptr <3 x i16>* %tmp5, i32 %tmp4		; <<3 x i16>*> [#uses=1]
+	%tmp7 = load <3 x i16>* %arrayidx6		; <<3 x i16>> [#uses=1]
+	%add = add <3 x i16> %tmp7, < i16 1, i16 1, i16 1 >		; <<3 x i16>> [#uses=1]
+	store <3 x i16> %add, <3 x i16>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp8, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/widen_arith-4.ll b/final/test/CodeGen/X86/widen_arith-4.ll
new file mode 100644
index 00000000000..5931d639f19
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_arith-4.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
+; CHECK: psubw
+; CHECK-NEXT: pmullw
+
+; Widen a v5i16 to v8i16 to do a vector sub and multiple
+
+define void @update(<5 x i16>* %dst, <5 x i16>* %src, i32 %n) nounwind {
+entry:
+	%dst.addr = alloca <5 x i16>*		; <<5 x i16>**> [#uses=2]
+	%src.addr = alloca <5 x i16>*		; <<5 x i16>**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%v = alloca <5 x i16>, align 16		; <<5 x i16>*> [#uses=1]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <5 x i16>* %dst, <5 x i16>** %dst.addr
+	store <5 x i16>* %src, <5 x i16>** %src.addr
+	store i32 %n, i32* %n.addr
+	store <5 x i16> < i16 1, i16 1, i16 1, i16 0, i16 0 >, <5 x i16>* %v
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <5 x i16>** %dst.addr		; <<5 x i16>*> [#uses=1]
+	%arrayidx = getelementptr <5 x i16>* %tmp3, i32 %tmp2		; <<5 x i16>*> [#uses=1]
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <5 x i16>** %src.addr		; <<5 x i16>*> [#uses=1]
+	%arrayidx6 = getelementptr <5 x i16>* %tmp5, i32 %tmp4		; <<5 x i16>*> [#uses=1]
+	%tmp7 = load <5 x i16>* %arrayidx6		; <<5 x i16>> [#uses=1]
+	%sub = sub <5 x i16> %tmp7, < i16 271, i16 271, i16 271, i16 271, i16 271 >		; <<5 x i16>> [#uses=1]
+	%mul = mul <5 x i16> %sub, < i16 2, i16 2, i16 2, i16 2, i16 2 >		; <<5 x i16>> [#uses=1]
+	store <5 x i16> %mul, <5 x i16>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp8, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/widen_arith-5.ll b/final/test/CodeGen/X86/widen_arith-5.ll
new file mode 100644
index 00000000000..7f2eff09f47
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_arith-5.ll
@@ -0,0 +1,50 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse42  | FileCheck %s
+; CHECK: movdqa
+; CHECK: pmulld
+; CHECK: psubd
+
+; widen a v3i32 to v4i32 to do a vector multiple and a subtraction
+
+define void @update(<3 x i32>* %dst, <3 x i32>* %src, i32 %n) nounwind {
+entry:
+	%dst.addr = alloca <3 x i32>*		; <<3 x i32>**> [#uses=2]
+	%src.addr = alloca <3 x i32>*		; <<3 x i32>**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%v = alloca <3 x i32>, align 16		; <<3 x i32>*> [#uses=1]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <3 x i32>* %dst, <3 x i32>** %dst.addr
+	store <3 x i32>* %src, <3 x i32>** %src.addr
+	store i32 %n, i32* %n.addr
+	store <3 x i32> < i32 1, i32 1, i32 1 >, <3 x i32>* %v
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <3 x i32>** %dst.addr		; <<3 x i32>*> [#uses=1]
+	%arrayidx = getelementptr <3 x i32>* %tmp3, i32 %tmp2		; <<3 x i32>*> [#uses=1]
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <3 x i32>** %src.addr		; <<3 x i32>*> [#uses=1]
+	%arrayidx6 = getelementptr <3 x i32>* %tmp5, i32 %tmp4		; <<3 x i32>*> [#uses=1]
+	%tmp7 = load <3 x i32>* %arrayidx6		; <<3 x i32>> [#uses=1]
+	%mul = mul <3 x i32> %tmp7, < i32 4, i32 4, i32 4 >		; <<3 x i32>> [#uses=1]
+	%sub = sub <3 x i32> %mul, < i32 3, i32 3, i32 3 >		; <<3 x i32>> [#uses=1]
+	store <3 x i32> %sub, <3 x i32>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp8, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/widen_arith-6.ll b/final/test/CodeGen/X86/widen_arith-6.ll
new file mode 100644
index 00000000000..b983d141ddf
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_arith-6.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: mulps
+; CHECK: addps
+
+; widen a v3f32 to vfi32 to do a vector multiple and an add
+
+define void @update(<3 x float>* %dst, <3 x float>* %src, i32 %n) nounwind {
+entry:
+	%dst.addr = alloca <3 x float>*		; <<3 x float>**> [#uses=2]
+	%src.addr = alloca <3 x float>*		; <<3 x float>**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%v = alloca <3 x float>, align 16		; <<3 x float>*> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <3 x float>* %dst, <3 x float>** %dst.addr
+	store <3 x float>* %src, <3 x float>** %src.addr
+	store i32 %n, i32* %n.addr
+	store <3 x float> < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00 >, <3 x float>* %v
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load <3 x float>** %dst.addr		; <<3 x float>*> [#uses=1]
+	%arrayidx = getelementptr <3 x float>* %tmp3, i32 %tmp2		; <<3 x float>*> [#uses=1]
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load <3 x float>** %src.addr		; <<3 x float>*> [#uses=1]
+	%arrayidx6 = getelementptr <3 x float>* %tmp5, i32 %tmp4		; <<3 x float>*> [#uses=1]
+	%tmp7 = load <3 x float>* %arrayidx6		; <<3 x float>> [#uses=1]
+	%tmp8 = load <3 x float>* %v		; <<3 x float>> [#uses=1]
+	%mul = fmul <3 x float> %tmp7, %tmp8		; <<3 x float>> [#uses=1]
+	%add = fadd <3 x float> %mul, < float 0x409EE02900000000, float 0x409EE02900000000, float 0x409EE02900000000 >		; <<3 x float>> [#uses=1]
+	store <3 x float> %add, <3 x float>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp9 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp9, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
diff --git a/final/test/CodeGen/X86/widen_cast-1.ll b/final/test/CodeGen/X86/widen_cast-1.ll
new file mode 100644
index 00000000000..1eace9e024e
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_cast-1.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s
+; CHECK: paddw
+; CHECK: pextrd
+; CHECK: movd
+
+; bitcast a v4i16 to v2i32
+
+define void @convert(<2 x i32>* %dst, <4 x i16>* %src) nounwind {
+entry:
+	%dst.addr = alloca <2 x i32>*		; <<2 x i32>**> [#uses=2]
+	%src.addr = alloca <4 x i16>*		; <<4 x i16>**> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <2 x i32>* %dst, <2 x i32>** %dst.addr
+	store <4 x i16>* %src, <4 x i16>** %src.addr
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, 4		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp1 = load i32* %i		; <i32> [#uses=1]
+	%tmp2 = load <2 x i32>** %dst.addr		; <<2 x i32>*> [#uses=1]
+	%arrayidx = getelementptr <2 x i32>* %tmp2, i32 %tmp1		; <<2 x i32>*> [#uses=1]
+	%tmp3 = load i32* %i		; <i32> [#uses=1]
+	%tmp4 = load <4 x i16>** %src.addr		; <<4 x i16>*> [#uses=1]
+	%arrayidx5 = getelementptr <4 x i16>* %tmp4, i32 %tmp3		; <<4 x i16>*> [#uses=1]
+	%tmp6 = load <4 x i16>* %arrayidx5		; <<4 x i16>> [#uses=1]
+	%add = add <4 x i16> %tmp6, < i16 1, i16 1, i16 1, i16 1 >		; <<4 x i16>> [#uses=1]
+	%conv = bitcast <4 x i16> %add to <2 x i32>		; <<2 x i32>> [#uses=1]
+	store <2 x i32> %conv, <2 x i32>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp7 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp7, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
diff --git a/final/test/CodeGen/X86/widen_cast-2.ll b/final/test/CodeGen/X86/widen_cast-2.ll
new file mode 100644
index 00000000000..5c695ea0003
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_cast-2.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: pextrd
+; CHECK: pextrd
+; CHECK: movd
+; CHECK: movdqa
+
+
+; bitcast v14i16 to v7i32
+
+define void @convert(<7 x i32>* %dst, <14 x i16>* %src) nounwind {
+entry:
+	%dst.addr = alloca <7 x i32>*		; <<7 x i32>**> [#uses=2]
+	%src.addr = alloca <14 x i16>*		; <<14 x i16>**> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=6]
+	store <7 x i32>* %dst, <7 x i32>** %dst.addr
+	store <14 x i16>* %src, <14 x i16>** %src.addr
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, 4		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp1 = load i32* %i		; <i32> [#uses=1]
+	%tmp2 = load <7 x i32>** %dst.addr		; <<2 x i32>*> [#uses=1]
+	%arrayidx = getelementptr <7 x i32>* %tmp2, i32 %tmp1		; <<7 x i32>*> [#uses=1]
+	%tmp3 = load i32* %i		; <i32> [#uses=1]
+	%tmp4 = load <14 x i16>** %src.addr		; <<4 x i16>*> [#uses=1]
+	%arrayidx5 = getelementptr <14 x i16>* %tmp4, i32 %tmp3		; <<4 x i16>*> [#uses=1]
+	%tmp6 = load <14 x i16>* %arrayidx5		; <<4 x i16>> [#uses=1]
+	%add = add <14 x i16> %tmp6, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1 >		; <<4 x i16>> [#uses=1]
+	%conv = bitcast <14 x i16> %add to <7 x i32>		; <<7 x i32>> [#uses=1]
+	store <7 x i32> %conv, <7 x i32>* %arrayidx
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp7 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp7, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
diff --git a/final/test/CodeGen/X86/widen_cast-3.ll b/final/test/CodeGen/X86/widen_cast-3.ll
new file mode 100644
index 00000000000..87486d96611
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_cast-3.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: paddd
+; CHECK: pextrd
+; CHECK: pextrd
+
+; bitcast v12i8 to v3i32
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin10.0.0d2"
+
+define void @convert(<12 x i8>* %dst.addr, <3 x i32> %src) nounwind {
+entry:
+	%add = add <3 x i32> %src, < i32 1, i32 1, i32 1 >		; <<3 x i32>> [#uses=1]
+	%conv = bitcast <3 x i32> %add to <12 x i8>		; <<12 x i8>> [#uses=1]
+	store <12 x i8> %conv, <12 x i8>* %dst.addr
+	ret void
+}
diff --git a/final/test/CodeGen/X86/widen_cast-4.ll b/final/test/CodeGen/X86/widen_cast-4.ll
new file mode 100644
index 00000000000..8e1adf58f86
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_cast-4.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+; CHECK: sarb
+
+; v8i8 that is widen to v16i8 then split
+; FIXME: This is widen to v16i8 and split to 16 and we then rebuild the vector.
+; Unfortunately, we don't split the store so we don't get the code we want.
+
+define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
+entry:
+	%dst_i.addr = alloca i64*		; <i64**> [#uses=2]
+	%src_i.addr = alloca i64*		; <i64**> [#uses=2]
+	%n.addr = alloca i32		; <i32*> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=8]
+	%dst = alloca <8 x i8>*, align 4		; <<8 x i8>**> [#uses=2]
+	%src = alloca <8 x i8>*, align 4		; <<8 x i8>**> [#uses=2]
+	store i64* %dst_i, i64** %dst_i.addr
+	store i64* %src_i, i64** %src_i.addr
+	store i32 %n, i32* %n.addr
+	store i32 0, i32* %i
+	br label %forcond
+
+forcond:		; preds = %forinc, %entry
+	%tmp = load i32* %i		; <i32> [#uses=1]
+	%tmp1 = load i32* %n.addr		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %tmp, %tmp1		; <i1> [#uses=1]
+	br i1 %cmp, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = load i64** %dst_i.addr		; <i64*> [#uses=1]
+	%arrayidx = getelementptr i64* %tmp3, i32 %tmp2		; <i64*> [#uses=1]
+	%conv = bitcast i64* %arrayidx to <8 x i8>*		; <<8 x i8>*> [#uses=1]
+	store <8 x i8>* %conv, <8 x i8>** %dst
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = load i64** %src_i.addr		; <i64*> [#uses=1]
+	%arrayidx6 = getelementptr i64* %tmp5, i32 %tmp4		; <i64*> [#uses=1]
+	%conv7 = bitcast i64* %arrayidx6 to <8 x i8>*		; <<8 x i8>*> [#uses=1]
+	store <8 x i8>* %conv7, <8 x i8>** %src
+	%tmp8 = load i32* %i		; <i32> [#uses=1]
+	%tmp9 = load <8 x i8>** %dst		; <<8 x i8>*> [#uses=1]
+	%arrayidx10 = getelementptr <8 x i8>* %tmp9, i32 %tmp8		; <<8 x i8>*> [#uses=1]
+	%tmp11 = load i32* %i		; <i32> [#uses=1]
+	%tmp12 = load <8 x i8>** %src		; <<8 x i8>*> [#uses=1]
+	%arrayidx13 = getelementptr <8 x i8>* %tmp12, i32 %tmp11		; <<8 x i8>*> [#uses=1]
+	%tmp14 = load <8 x i8>* %arrayidx13		; <<8 x i8>> [#uses=1]
+	%add = add <8 x i8> %tmp14, < i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1 >		; <<8 x i8>> [#uses=1]
+	%shr = ashr <8 x i8> %add, < i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2 >		; <<8 x i8>> [#uses=1]
+	store <8 x i8> %shr, <8 x i8>* %arrayidx10
+	br label %forinc
+
+forinc:		; preds = %forbody
+	%tmp15 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp15, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %forcond
+
+afterfor:		; preds = %forcond
+	ret void
+}
+
diff --git a/final/test/CodeGen/X86/widen_cast-5.ll b/final/test/CodeGen/X86/widen_cast-5.ll
new file mode 100644
index 00000000000..136578df1e8
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_cast-5.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: movl
+; CHECK: movd
+
+; bitcast a i64 to v2i32
+
+define void @convert(<2 x i32>* %dst.addr, i64 %src) nounwind {
+entry:
+	%conv = bitcast i64 %src to <2 x i32>
+	%xor = xor <2 x i32> %conv, < i32 255, i32 32767 >
+	store <2 x i32> %xor, <2 x i32>* %dst.addr
+	ret void
+}
diff --git a/final/test/CodeGen/X86/widen_cast-6.ll b/final/test/CodeGen/X86/widen_cast-6.ll
new file mode 100644
index 00000000000..39032347c01
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_cast-6.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86 -mattr=+sse41 | FileCheck %s
+; CHECK: movd
+
+; Test bit convert that requires widening in the operand.
+
+define i32 @return_v2hi() nounwind {
+entry:
+	%retval12 = bitcast <2 x i16> zeroinitializer to i32		; <i32> [#uses=1]
+	ret i32 %retval12
+}
diff --git a/final/test/CodeGen/X86/widen_conv-1.ll b/final/test/CodeGen/X86/widen_conv-1.ll
new file mode 100644
index 00000000000..f6810cda9e3
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_conv-1.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: pshufd
+; CHECK: paddd
+
+; truncate v2i64 to v2i32
+
+define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) nounwind {
+entry:
+	%val = trunc <2 x i64> %src to <2 x i32>
+	%add = add <2 x i32> %val, < i32 1, i32 1 >
+	store <2 x i32> %add, <2 x i32>* %dst.addr
+	ret void
+}
diff --git a/final/test/CodeGen/X86/widen_conv-2.ll b/final/test/CodeGen/X86/widen_conv-2.ll
new file mode 100644
index 00000000000..969cb512beb
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_conv-2.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: movswl
+; CHECK: movswl
+
+; sign extension v2i32 to v2i16
+
+define void @convert(<2 x i32>* %dst.addr, <2 x i16> %src) nounwind {
+entry:
+	%signext = sext <2 x i16> %src to <2 x i32>		; <<12 x i8>> [#uses=1]
+	store <2 x i32> %signext, <2 x i32>* %dst.addr
+	ret void
+}
diff --git a/final/test/CodeGen/X86/widen_conv-3.ll b/final/test/CodeGen/X86/widen_conv-3.ll
new file mode 100644
index 00000000000..a25fae9e1bc
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_conv-3.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: cvtsi2ss
+
+; sign to float v2i16 to v2f32
+
+define void @convert(<2 x float>* %dst.addr, <2 x i16> %src) nounwind {
+entry:
+	%val = sitofp <2 x i16> %src to <2 x float>
+	store <2 x float> %val, <2 x float>* %dst.addr
+	ret void
+}
diff --git a/final/test/CodeGen/X86/widen_conv-4.ll b/final/test/CodeGen/X86/widen_conv-4.ll
new file mode 100644
index 00000000000..80f3a492c49
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_conv-4.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+; CHECK: cvtsi2ss
+
+; unsigned to float v7i16 to v7f32
+
+define void @convert(<7 x float>* %dst.addr, <7 x i16> %src) nounwind {
+entry:
+	%val = sitofp <7 x i16> %src to <7 x float>
+	store <7 x float> %val, <7 x float>* %dst.addr
+	ret void
+}
diff --git a/final/test/CodeGen/X86/widen_extract-1.ll b/final/test/CodeGen/X86/widen_extract-1.ll
new file mode 100644
index 00000000000..4bcac58f2b6
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_extract-1.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64 -mattr=+sse42 | FileCheck %s
+; widen extract subvector
+
+define void @convert(<2 x double>* %dst.addr, <3 x double> %src)  {
+entry:
+; CHECK: convert:
+; CHECK: unpcklpd {{%xmm[0-7]}}, {{%xmm[0-7]}}
+; CHECK-NEXT: movapd
+  %val = shufflevector <3 x double> %src, <3 x double> undef, <2 x i32> < i32 0, i32 1>
+  store <2 x double> %val, <2 x double>* %dst.addr
+  ret void
+}
diff --git a/final/test/CodeGen/X86/widen_load-0.ll b/final/test/CodeGen/X86/widen_load-0.ll
new file mode 100644
index 00000000000..82c8252e7bb
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_load-0.ll
@@ -0,0 +1,27 @@
+; RUN: llc < %s -o - -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -o - -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64
+; PR4891
+
+; Both loads should happen before either store.
+
+; CHECK: movl  (%rdi), %eax
+; CHECK: movl  (%rsi), %ecx
+; CHECK: movl  %ecx, (%rdi)
+; CHECK: movl  %eax, (%rsi)
+
+; WIN64: movl  (%rcx), %eax
+; WIN64: movl  (%rdx), %esi
+; WIN64: movl  %esi, (%rcx)
+; WIN64: movl  %eax, (%rdx)
+
+define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
+entry:
+  %0 = load <2 x i16>* %b, align 2                ; <<2 x i16>> [#uses=1]
+  %1 = load i32* %c, align 4                      ; <i32> [#uses=1]
+  %tmp1 = bitcast i32 %1 to <2 x i16>             ; <<2 x i16>> [#uses=1]
+  store <2 x i16> %tmp1, <2 x i16>* %b, align 2
+  %tmp5 = bitcast <2 x i16> %0 to <1 x i32>       ; <<1 x i32>> [#uses=1]
+  %tmp3 = extractelement <1 x i32> %tmp5, i32 0   ; <i32> [#uses=1]
+  store i32 %tmp3, i32* %c, align 4
+  ret void
+}
diff --git a/final/test/CodeGen/X86/widen_load-1.ll b/final/test/CodeGen/X86/widen_load-1.ll
new file mode 100644
index 00000000000..639617f1777
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_load-1.ll
@@ -0,0 +1,45 @@
+; RUN: llc %s -o - -march=x86-64 -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+; PR4891
+
+; This load should be before the call, not after.
+
+; CHECK: movaps    compl+128(%rip), %xmm0
+; CHECK: movaps  %xmm0, (%rsp)
+; CHECK: callq   killcommon
+
+@compl = linkonce global [20 x i64] zeroinitializer, align 64 ; <[20 x i64]*> [#uses=1]
+
+declare void @killcommon(i32* noalias)
+
+define void @reset(<2 x float>* noalias %garbage1) {
+"file complex.c, line 27, bb1":
+  %changed = alloca i32, align 4                  ; <i32*> [#uses=3]
+  br label %"file complex.c, line 27, bb13"
+
+"file complex.c, line 27, bb13":                  ; preds = %"file complex.c, line 27, bb1"
+  store i32 0, i32* %changed, align 4
+  %r2 = getelementptr float* bitcast ([20 x i64]* @compl to float*), i64 32 ; <float*> [#uses=1]
+  %r3 = bitcast float* %r2 to <2 x float>*        ; <<2 x float>*> [#uses=1]
+  %r4 = load <2 x float>* %r3, align 4            ; <<2 x float>> [#uses=1]
+  call void @killcommon(i32* %changed)
+  br label %"file complex.c, line 34, bb4"
+
+"file complex.c, line 34, bb4":                   ; preds = %"file complex.c, line 27, bb13"
+  %r5 = load i32* %changed, align 4               ; <i32> [#uses=1]
+  %r6 = icmp eq i32 %r5, 0                        ; <i1> [#uses=1]
+  %r7 = zext i1 %r6 to i32                        ; <i32> [#uses=1]
+  %r8 = icmp ne i32 %r7, 0                        ; <i1> [#uses=1]
+  br i1 %r8, label %"file complex.c, line 34, bb7", label %"file complex.c, line 27, bb5"
+
+"file complex.c, line 27, bb5":                   ; preds = %"file complex.c, line 34, bb4"
+  br label %"file complex.c, line 35, bb6"
+
+"file complex.c, line 35, bb6":                   ; preds = %"file complex.c, line 27, bb5"
+  %r11 = ptrtoint <2 x float>* %garbage1 to i64   ; <i64> [#uses=1]
+  %r12 = inttoptr i64 %r11 to <2 x float>*        ; <<2 x float>*> [#uses=1]
+  store <2 x float> %r4, <2 x float>* %r12, align 4
+  br label %"file complex.c, line 34, bb7"
+
+"file complex.c, line 34, bb7":                   ; preds = %"file complex.c, line 35, bb6", %"file complex.c, line 34, bb4"
+  ret void
+}
diff --git a/final/test/CodeGen/X86/widen_load-2.ll b/final/test/CodeGen/X86/widen_load-2.ll
new file mode 100644
index 00000000000..642206316c6
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_load-2.ll
@@ -0,0 +1,179 @@
+; RUN: llc < %s -o - -march=x86-64 -mattr=+sse42 | FileCheck %s
+
+; Test based on pr5626 to load/store
+;
+
+%i32vec3 = type <3 x i32>
+define void @add3i32(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
+; CHECK: movdqa
+; CHECK: paddd
+; CHECK: pextrd
+; CHECK: movq
+	%a = load %i32vec3* %ap, align 16
+	%b = load %i32vec3* %bp, align 16
+	%x = add %i32vec3 %a, %b
+	store %i32vec3 %x, %i32vec3* %ret, align 16
+	ret void
+}
+
+define void @add3i32_2(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
+; CHECK: movq
+; CHECK: pinsrd
+; CHECK: movq
+; CHECK: pinsrd
+; CHECK: paddd
+; CHECK: pextrd
+; CHECK: movq
+	%a = load %i32vec3* %ap, align 8
+	%b = load %i32vec3* %bp, align 8
+	%x = add %i32vec3 %a, %b
+	store %i32vec3 %x, %i32vec3* %ret, align 8
+	ret void
+}
+
+%i32vec7 = type <7 x i32>
+define void @add7i32(%i32vec7*  sret %ret, %i32vec7* %ap, %i32vec7* %bp)  {
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: paddd
+; CHECK: paddd
+; CHECK: pextrd
+; CHECK: movq
+; CHECK: movdqa
+	%a = load %i32vec7* %ap, align 16
+	%b = load %i32vec7* %bp, align 16
+	%x = add %i32vec7 %a, %b
+	store %i32vec7 %x, %i32vec7* %ret, align 16
+	ret void
+}
+
+%i32vec12 = type <12 x i32>
+define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: paddd
+; CHECK: paddd
+; CHECK: paddd
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: movdqa
+	%a = load %i32vec12* %ap, align 16
+	%b = load %i32vec12* %bp, align 16
+	%x = add %i32vec12 %a, %b
+	store %i32vec12 %x, %i32vec12* %ret, align 16
+	ret void
+}
+
+
+%i16vec3 = type <3 x i16>
+define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
+; CHECK: movdqa
+; CHECK: paddw
+; CHECK: movd
+; CHECK: pextrw
+	%a = load %i16vec3* %ap, align 16
+	%b = load %i16vec3* %bp, align 16
+	%x = add %i16vec3 %a, %b
+	store %i16vec3 %x, %i16vec3* %ret, align 16
+	ret void
+}
+
+%i16vec4 = type <4 x i16>
+define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
+; CHECK: movdqa
+; CHECK: paddw
+; CHECK: movq
+	%a = load %i16vec4* %ap, align 16
+	%b = load %i16vec4* %bp, align 16
+	%x = add %i16vec4 %a, %b
+	store %i16vec4 %x, %i16vec4* %ret, align 16
+	ret void
+}
+
+%i16vec12 = type <12 x i16>
+define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: paddw
+; CHECK: paddw
+; CHECK: movq
+; CHECK: movdqa
+	%a = load %i16vec12* %ap, align 16
+	%b = load %i16vec12* %bp, align 16
+	%x = add %i16vec12 %a, %b
+	store %i16vec12 %x, %i16vec12* %ret, align 16
+	ret void
+}
+
+%i16vec18 = type <18 x i16>
+define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: paddw
+; CHECK: paddw
+; CHECK: paddw
+; CHECK: movd
+; CHECK: movdqa
+; CHECK: movdqa
+	%a = load %i16vec18* %ap, align 16
+	%b = load %i16vec18* %bp, align 16
+	%x = add %i16vec18 %a, %b
+	store %i16vec18 %x, %i16vec18* %ret, align 16
+	ret void
+}
+
+
+%i8vec3 = type <3 x i8>
+define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
+; CHECK: movdqa
+; CHECK: paddb
+; CHECK: pextrb
+; CHECK: movb
+	%a = load %i8vec3* %ap, align 16
+	%b = load %i8vec3* %bp, align 16
+	%x = add %i8vec3 %a, %b
+	store %i8vec3 %x, %i8vec3* %ret, align 16
+	ret void
+}
+
+%i8vec31 = type <31 x i8>
+define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
+; CHECK: movdqa
+; CHECK: movdqa
+; CHECK: paddb
+; CHECK: paddb
+; CHECK: movq
+; CHECK: pextrb
+; CHECK: pextrw
+	%a = load %i8vec31* %ap, align 16
+	%b = load %i8vec31* %bp, align 16
+	%x = add %i8vec31 %a, %b
+	store %i8vec31 %x, %i8vec31* %ret, align 16
+	ret void
+}
+
+
+%i8vec3pack = type { <3 x i8>, i8 }
+define %i8vec3pack  @rot() nounwind {
+; CHECK: shrb
+entry:
+  %X = alloca %i8vec3pack, align 4
+  %rot = alloca %i8vec3pack, align 4
+  %result = alloca %i8vec3pack, align 4
+  %storetmp = bitcast %i8vec3pack* %X to <3 x i8>*
+  store <3 x i8> <i8 -98, i8 -98, i8 -98>, <3 x i8>* %storetmp
+  %storetmp1 = bitcast %i8vec3pack* %rot to <3 x i8>*
+  store <3 x i8> <i8 1, i8 1, i8 1>, <3 x i8>* %storetmp1
+  %tmp = load %i8vec3pack* %X
+  %extractVec = extractvalue %i8vec3pack %tmp, 0
+  %tmp2 = load %i8vec3pack* %rot
+  %extractVec3 = extractvalue %i8vec3pack %tmp2, 0
+  %shr = lshr <3 x i8> %extractVec, %extractVec3
+  %storetmp4 = bitcast %i8vec3pack* %result to <3 x i8>*
+  store <3 x i8> %shr, <3 x i8>* %storetmp4
+  %tmp5 = load %i8vec3pack* %result
+  ret %i8vec3pack %tmp5
+}
+
diff --git a/final/test/CodeGen/X86/widen_shuffle-1.ll b/final/test/CodeGen/X86/widen_shuffle-1.ll
new file mode 100644
index 00000000000..034c42c758b
--- /dev/null
+++ b/final/test/CodeGen/X86/widen_shuffle-1.ll
@@ -0,0 +1,48 @@
+; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
+
+; widening shuffle v3float and then a add
+define void @shuf(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
+entry:
+; CHECK: shuf:
+; CHECK: extractps
+; CHECK: extractps
+	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 1, i32 2>
+	%val = fadd <3 x float> %x, %src2
+	store <3 x float> %val, <3 x float>* %dst.addr
+	ret void
+}
+
+
+; widening shuffle v3float with a different mask and then a add
+define void @shuf2(<3 x float>* %dst.addr, <3 x float> %src1,<3 x float> %src2) nounwind {
+entry:
+; CHECK: shuf2:
+; CHECK: extractps
+; CHECK: extractps
+	%x = shufflevector <3 x float> %src1, <3 x float> %src2, <3 x i32> < i32 0, i32 4, i32 2>
+	%val = fadd <3 x float> %x, %src2
+	store <3 x float> %val, <3 x float>* %dst.addr
+	ret void
+}
+
+; Example of when widening a v3float operation causes the DAG to replace a node
+; with the operation that we are currently widening, i.e. when replacing
+; opA with opB, the DAG will produce new operations with opA.
+define void @shuf3(<4 x float> %tmp10, <4 x float> %vecinit15, <4 x float>* %dst) nounwind {
+entry:
+; CHECK: pshufd
+  %shuffle.i.i.i12 = shufflevector <4 x float> %tmp10, <4 x float> %vecinit15, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  %tmp25.i.i = shufflevector <4 x float> %shuffle.i.i.i12, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 
+  %tmp1.i.i = shufflevector <3 x float> %tmp25.i.i, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %tmp3.i13 = shufflevector <4 x float> %tmp1.i.i, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> ; <<3 x float>>
+  %tmp6.i14 = shufflevector <3 x float> %tmp3.i13, <3 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %tmp97.i = shufflevector <4 x float> %tmp6.i14, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+  %tmp2.i18 = shufflevector <3 x float> %tmp97.i, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %t5 = bitcast <4 x float> %tmp2.i18 to <4 x i32>
+  %shr.i.i19 = lshr <4 x i32> %t5, <i32 19, i32 19, i32 19, i32 19>
+  %and.i.i20 = and <4 x i32> %shr.i.i19, <i32 4080, i32 4080, i32 4080, i32 4080> 
+  %shuffle.i.i.i21 = shufflevector <4 x float> %tmp2.i18, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+  store <4 x float> %shuffle.i.i.i21, <4 x float>* %dst
+  ret void
+}
+
diff --git a/final/test/CodeGen/X86/win64_params.ll b/final/test/CodeGen/X86/win64_params.ll
new file mode 100644
index 00000000000..f9d4bf9c309
--- /dev/null
+++ b/final/test/CodeGen/X86/win64_params.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+; Verify that the 5th and 6th parameters are coming from the correct location
+; on the stack.
+define i32 @f6(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5, i32 %p6) nounwind readnone optsize {
+entry:
+; CHECK: movl    48(%rsp), %eax
+; CHECK: addl    40(%rsp), %eax
+  %add = add nsw i32 %p6, %p5
+  ret i32 %add
+}
diff --git a/final/test/CodeGen/X86/win64_vararg.ll b/final/test/CodeGen/X86/win64_vararg.ll
new file mode 100644
index 00000000000..efe8bcacbea
--- /dev/null
+++ b/final/test/CodeGen/X86/win64_vararg.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+; Verify that the var arg parameters which are passed in registers are stored
+; in home stack slots allocated by the caller and that AP is correctly
+; calculated.
+define void @average_va(i32 %count, ...) nounwind {
+entry:
+; CHECK: pushq
+; CHECK: movq   %r9, 40(%rsp)
+; CHECK: movq   %r8, 32(%rsp)
+; CHECK: movq   %rdx, 24(%rsp)
+; CHECK: leaq   24(%rsp), %rax
+
+  %ap = alloca i8*, align 8                       ; <i8**> [#uses=1]
+  %ap1 = bitcast i8** %ap to i8*                  ; <i8*> [#uses=1]
+  call void @llvm.va_start(i8* %ap1)
+  ret void
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+; CHECK: f5:
+; CHECK: pushq
+; CHECK: leaq 56(%rsp),
+define i8* @f5(i64 %a0, i64 %a1, i64 %a2, i64 %a3, i64 %a4, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  ret i8* %ap1
+}
+
+; CHECK: f4:
+; CHECK: pushq
+; CHECK: leaq 48(%rsp),
+define i8* @f4(i64 %a0, i64 %a1, i64 %a2, i64 %a3, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  ret i8* %ap1
+}
+
+; CHECK: f3:
+; CHECK: pushq
+; CHECK: leaq 40(%rsp),
+define i8* @f3(i64 %a0, i64 %a1, i64 %a2, ...) nounwind {
+entry:
+  %ap = alloca i8*, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  ret i8* %ap1
+}
diff --git a/final/test/CodeGen/X86/win_chkstk.ll b/final/test/CodeGen/X86/win_chkstk.ll
new file mode 100644
index 00000000000..82ce81d4ae7
--- /dev/null
+++ b/final/test/CodeGen/X86/win_chkstk.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN_X32
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN_X64
+; RUN: llc < %s -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X32
+; RUN: llc < %s -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X64
+; RUN: llc < %s -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
+
+; Windows and mingw require a prologue helper routine if more than 4096 bytes area
+; allocated on the stack.  Windows uses __chkstk and mingw uses __alloca.  __alloca
+; and the 32-bit version of __chkstk will probe the stack and adjust the stack pointer.
+; The 64-bit version of __chkstk is only responsible for probing the stack.  The 64-bit
+; prologue is responsible for adjusting the stack pointer.
+
+; Stack allocation >= 4096 bytes will require call to __chkstk in the Windows ABI.
+define i32 @main4k() nounwind {
+entry:
+; WIN_X32:    calll __chkstk
+; WIN_X64:    callq __chkstk
+; MINGW_X32:  calll __alloca
+; MINGW_X64:  callq __chkstk
+; LINUX-NOT:  call __chkstk
+  %array4096 = alloca [4096 x i8], align 16       ; <[4096 x i8]*> [#uses=0]
+  ret i32 0
+}
+
+; Make sure we don't call __chkstk or __alloca when we have less than a 4096 stack
+; allocation.
+define i32 @main128() nounwind {
+entry:
+; WIN_X32:       # BB#0:
+; WIN_X32-NOT:   calll __chkstk
+; WIN_X32:       ret
+
+; WIN_X64:       # BB#0:
+; WIN_X64-NOT:   callq __chkstk
+; WIN_X64:       ret
+
+; MINGW_X64:     # BB#0:
+; MINGW_X64-NOT: callq _alloca
+; MINGW_X64:     ret
+
+; LINUX:         # BB#0:
+; LINUX-NOT:     call __chkstk
+; LINUX:         ret
+  %array128 = alloca [128 x i8], align 16         ; <[128 x i8]*> [#uses=0]
+  ret i32 0
+}
diff --git a/final/test/CodeGen/X86/x86-64-and-mask.ll b/final/test/CodeGen/X86/x86-64-and-mask.ll
new file mode 100644
index 00000000000..2465f23a768
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-and-mask.ll
@@ -0,0 +1,49 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+; This should be a single mov, not a load of immediate + andq.
+; CHECK: test:
+; CHECK: movl %edi, %eax
+
+define i64 @test(i64 %x) nounwind {
+entry:
+	%tmp123 = and i64 %x, 4294967295		; <i64> [#uses=1]
+	ret i64 %tmp123
+}
+
+; This copy can't be coalesced away because it needs the implicit zero-extend.
+; CHECK: bbb:
+; CHECK: movl %edi, %edi
+
+define void @bbb(i64 %x) nounwind {
+  %t = and i64 %x, 4294967295
+  call void @foo(i64 %t)
+  ret void
+}
+
+; This should use a 32-bit and with implicit zero-extension, not a 64-bit and
+; with a separate mov to materialize the mask.
+; rdar://7527390
+; CHECK: ccc:
+; CHECK: andl $-1048593, %edi
+
+declare void @foo(i64 %x) nounwind
+
+define void @ccc(i64 %x) nounwind {
+  %t = and i64 %x, 4293918703
+  call void @foo(i64 %t)
+  ret void
+}
+
+; This requires a mov and a 64-bit and.
+; CHECK: ddd:
+; CHECK: movabsq $4294967296, %rax
+; CHECK: andq %rax, %rdi
+
+define void @ddd(i64 %x) nounwind {
+  %t = and i64 %x, 4294967296
+  call void @foo(i64 %t)
+  ret void
+}
diff --git a/final/test/CodeGen/X86/x86-64-arg.ll b/final/test/CodeGen/X86/x86-64-arg.ll
new file mode 100644
index 00000000000..ec8dd8edb63
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-arg.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s | grep {movl	%edi, %eax}
+; The input value is already sign extended, don't re-extend it.
+; This testcase corresponds to:
+;   int test(short X) { return (int)X; }
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-apple-darwin8"
+
+
+define i32 @test(i16 signext  %X) {
+entry:
+        %tmp12 = sext i16 %X to i32             ; <i32> [#uses=1]
+        ret i32 %tmp12
+}
+
diff --git a/final/test/CodeGen/X86/x86-64-asm.ll b/final/test/CodeGen/X86/x86-64-asm.ll
new file mode 100644
index 00000000000..2640e593ec1
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-asm.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s
+; PR1029
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @frame_dummy() {
+entry:
+        %tmp1 = tail call void (i8*)* (void (i8*)*)* asm "", "=r,0,~{dirflag},~{fpsr},~{flags}"( void (i8*)* null )           ; <void (i8*)*> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/CodeGen/X86/x86-64-dead-stack-adjust.ll b/final/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
new file mode 100644
index 00000000000..79316f29de3
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-dead-stack-adjust.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s | not grep rsp
+; RUN: llc < %s | grep cvttsd2siq
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+define double @a(double %b) nounwind  {
+entry:
+	%tmp12 = fptoui double %b to i32		; <i32> [#uses=1]
+	%tmp123 = uitofp i32 %tmp12 to double		; <double> [#uses=1]
+	ret double %tmp123
+}
diff --git a/final/test/CodeGen/X86/x86-64-disp.ll b/final/test/CodeGen/X86/x86-64-disp.ll
new file mode 100644
index 00000000000..d8059ebb1c1
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-disp.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=x86-64 | grep mov | count 2
+
+; Fold an offset into an address even if it's not a 32-bit
+; signed integer.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@call_used_regs = external global [53 x i8], align 32
+
+define fastcc void @foo() nounwind {
+	%t = getelementptr [53 x i8]* @call_used_regs, i64 0, i64 4294967295
+	store i8 1, i8* %t, align 1
+	ret void
+}
diff --git a/final/test/CodeGen/X86/x86-64-extend-shift.ll b/final/test/CodeGen/X86/x86-64-extend-shift.ll
new file mode 100644
index 00000000000..6852785fd6a
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-extend-shift.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; Formerly there were two shifts.
+
+define i64 @baz(i32 %A) nounwind {
+; CHECK:  shlq  $49, %rax
+        %tmp1 = shl i32 %A, 17
+        %tmp2 = zext i32 %tmp1 to i64
+        %tmp3 = shl i64 %tmp2, 32
+        ret i64 %tmp3
+}
diff --git a/final/test/CodeGen/X86/x86-64-frameaddr.ll b/final/test/CodeGen/X86/x86-64-frameaddr.ll
new file mode 100644
index 00000000000..57163d3c683
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-frameaddr.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=x86-64 | grep movq | grep rbp
+
+define i64* @stack_end_address() nounwind  {
+entry:
+	tail call i8* @llvm.frameaddress( i32 0 )
+	bitcast i8* %0 to i64*
+	ret i64* %1
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone 
diff --git a/final/test/CodeGen/X86/x86-64-gv-offset.ll b/final/test/CodeGen/X86/x86-64-gv-offset.ll
new file mode 100644
index 00000000000..365e4af63fc
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-gv-offset.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | not grep lea
+
+	%struct.x = type { float, double }
+@X = global %struct.x { float 1.000000e+00, double 2.000000e+00 }, align 16		; <%struct.x*> [#uses=2]
+
+define i32 @main() nounwind  {
+entry:
+	%tmp2 = load float* getelementptr (%struct.x* @X, i32 0, i32 0), align 16		; <float> [#uses=1]
+	%tmp4 = load double* getelementptr (%struct.x* @X, i32 0, i32 1), align 8		; <double> [#uses=1]
+	tail call void @t( float %tmp2, double %tmp4 ) nounwind 
+	ret i32 0
+}
+
+declare void @t(float, double)
diff --git a/final/test/CodeGen/X86/x86-64-jumps.ll b/final/test/CodeGen/X86/x86-64-jumps.ll
new file mode 100644
index 00000000000..11b40c89761
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-jumps.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s 
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+define i8 @test1() nounwind ssp {
+entry:
+  %0 = select i1 undef, i8* blockaddress(@test1, %bb), i8* blockaddress(@test1, %bb6) ; <i8*> [#uses=1]
+  indirectbr i8* %0, [label %bb, label %bb6]
+
+bb:                                               ; preds = %entry
+  ret i8 1
+
+bb6:                                              ; preds = %entry
+  ret i8 2
+}
+
+
+; PR5930 - Trunc of block address differences.
+@test.array = internal constant [3 x i32] [i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %bar) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (i8* blockaddress(@test2, %hack) to i64), i64 ptrtoint (i8* blockaddress(@test2, %foo) to i64)) to i32)] ; <[3 x i32]*> [#uses=1]
+
+define void @test2(i32 %i) nounwind ssp {
+entry:
+  %i.addr = alloca i32                            ; <i32*> [#uses=2]
+  store i32 %i, i32* %i.addr
+  %tmp = load i32* %i.addr                        ; <i32> [#uses=1]
+  %idxprom = sext i32 %tmp to i64                 ; <i64> [#uses=1]
+  %arrayidx = getelementptr inbounds i32* getelementptr inbounds ([3 x i32]* @test.array, i32 0, i32 0), i64 %idxprom ; <i32*> [#uses=1]
+  %tmp1 = load i32* %arrayidx                     ; <i32> [#uses=1]
+  %idx.ext = sext i32 %tmp1 to i64                ; <i64> [#uses=1]
+  %add.ptr = getelementptr i8* blockaddress(@test2, %foo), i64 %idx.ext ; <i8*> [#uses=1]
+  br label %indirectgoto
+
+foo:                                              ; preds = %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto, %indirectgoto
+  br label %bar
+
+bar:                                              ; preds = %foo, %indirectgoto
+  br label %hack
+
+hack:                                             ; preds = %bar, %indirectgoto
+  ret void
+
+indirectgoto:                                     ; preds = %entry
+  %indirect.goto.dest = phi i8* [ %add.ptr, %entry ] ; <i8*> [#uses=1]
+  indirectbr i8* %indirect.goto.dest, [label %foo, label %foo, label %bar, label %foo, label %hack, label %foo, label %foo]
+}
diff --git a/final/test/CodeGen/X86/x86-64-malloc.ll b/final/test/CodeGen/X86/x86-64-malloc.ll
new file mode 100644
index 00000000000..4aa0ec3dc9f
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-malloc.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
+; CHECK: shll $3, {{%edi|%ecx}}
+; PR3829
+; The generated code should multiply by 3 (sizeof i8*) as an i32,
+; not as an i64!
+
+define i8** @test(i32 %sz) {
+	%sub = add i32 %sz, 536870911		; <i32> [#uses=1]
+	%call = malloc i8*, i32 %sub		; <i8**> [#uses=1]
+	ret i8** %call
+}
diff --git a/final/test/CodeGen/X86/x86-64-mem.ll b/final/test/CodeGen/X86/x86-64-mem.ll
new file mode 100644
index 00000000000..d15f516cdde
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-mem.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -o %t1
+; RUN: grep GOTPCREL %t1 | count 4
+; RUN: grep %%rip      %t1 | count 6
+; RUN: grep movq     %t1 | count 6
+; RUN: grep leaq     %t1 | count 1
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=static -o %t2
+; RUN: grep movl %t2 | count 2
+; RUN: grep movq %t2 | count 2
+
+@ptr = external global i32*		; <i32**> [#uses=1]
+@src = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
+@dst = external global [0 x i32]		; <[0 x i32]*> [#uses=1]
+@lptr = internal global i32* null		; <i32**> [#uses=1]
+@ldst = internal global [500 x i32] zeroinitializer, align 32		; <[500 x i32]*> [#uses=1]
+@lsrc = internal global [500 x i32] zeroinitializer, align 32		; <[500 x i32]*> [#uses=0]
+@bsrc = internal global [500000 x i32] zeroinitializer, align 32		; <[500000 x i32]*> [#uses=0]
+@bdst = internal global [500000 x i32] zeroinitializer, align 32		; <[500000 x i32]*> [#uses=0]
+
+define void @test1() nounwind {
+	%tmp = load i32* getelementptr ([0 x i32]* @src, i32 0, i32 0)		; <i32> [#uses=1]
+	store i32 %tmp, i32* getelementptr ([0 x i32]* @dst, i32 0, i32 0)
+	ret void
+}
+
+define void @test2() nounwind {
+	store i32* getelementptr ([0 x i32]* @dst, i32 0, i32 0), i32** @ptr
+	ret void
+}
+
+define void @test3() nounwind {
+	store i32* getelementptr ([500 x i32]* @ldst, i32 0, i32 0), i32** @lptr
+	br label %return
+
+return:		; preds = %0
+	ret void
+}
diff --git a/final/test/CodeGen/X86/x86-64-pic-1.ll b/final/test/CodeGen/X86/x86-64-pic-1.ll
new file mode 100644
index 00000000000..46f6d335d05
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-pic-1.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {callq	f@PLT} %t1
+
+define void @g() {
+entry:
+	call void @f( )
+	ret void
+}
+
+declare void @f()
diff --git a/final/test/CodeGen/X86/x86-64-pic-10.ll b/final/test/CodeGen/X86/x86-64-pic-10.ll
new file mode 100644
index 00000000000..b6f82e23b7e
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-pic-10.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {callq	g@PLT} %t1
+
+@g = alias weak i32 ()* @f
+
+define void @h() {
+entry:
+	%tmp31 = call i32 @g()
+        ret void
+}
+
+declare extern_weak i32 @f()
diff --git a/final/test/CodeGen/X86/x86-64-pic-11.ll b/final/test/CodeGen/X86/x86-64-pic-11.ll
new file mode 100644
index 00000000000..4db331cee43
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-pic-11.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {callq	__fixunsxfti@PLT} %t1
+
+define i128 @f(x86_fp80 %a) nounwind {
+entry:
+	%tmp78 = fptoui x86_fp80 %a to i128
+	ret i128 %tmp78
+}
diff --git a/final/test/CodeGen/X86/x86-64-pic-2.ll b/final/test/CodeGen/X86/x86-64-pic-2.ll
new file mode 100644
index 00000000000..1ce2de7209c
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-pic-2.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {callq	f} %t1
+; RUN: not grep {callq	f@PLT} %t1
+
+define void @g() {
+entry:
+	call void @f( )
+	ret void
+}
+
+declare hidden void @f()
diff --git a/final/test/CodeGen/X86/x86-64-pic-3.ll b/final/test/CodeGen/X86/x86-64-pic-3.ll
new file mode 100644
index 00000000000..aa3c888ed60
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-pic-3.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {callq	f} %t1
+; RUN: not grep {callq	f@PLT} %t1
+
+define void @g() {
+entry:
+	call void @f( )
+	ret void
+}
+
+define internal void @f() {
+entry:
+	ret void
+}
diff --git a/final/test/CodeGen/X86/x86-64-pic-4.ll b/final/test/CodeGen/X86/x86-64-pic-4.ll
new file mode 100644
index 00000000000..90fc1194a33
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-pic-4.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {movq	a@GOTPCREL(%rip),} %t1
+
+@a = global i32 0
+
+define i32 @get_a() {
+entry:
+	%tmp1 = load i32* @a, align 4
+	ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/X86/x86-64-pic-5.ll b/final/test/CodeGen/X86/x86-64-pic-5.ll
new file mode 100644
index 00000000000..6369bde6943
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-pic-5.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {movl	a(%rip),} %t1
+; RUN: not grep GOTPCREL %t1
+
+@a = hidden global i32 0
+
+define i32 @get_a() {
+entry:
+	%tmp1 = load i32* @a, align 4
+	ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/X86/x86-64-pic-6.ll b/final/test/CodeGen/X86/x86-64-pic-6.ll
new file mode 100644
index 00000000000..6e19ad35bcf
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-pic-6.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {movl	a(%rip),} %t1
+; RUN: not grep GOTPCREL %t1
+
+@a = internal global i32 0
+
+define i32 @get_a() nounwind {
+entry:
+	%tmp1 = load i32* @a, align 4
+	ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/X86/x86-64-pic-7.ll b/final/test/CodeGen/X86/x86-64-pic-7.ll
new file mode 100644
index 00000000000..4d98ee61402
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-pic-7.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {movq	f@GOTPCREL(%rip),} %t1
+
+define void ()* @g() nounwind {
+entry:
+	ret void ()* @f
+}
+
+declare void @f()
diff --git a/final/test/CodeGen/X86/x86-64-pic-8.ll b/final/test/CodeGen/X86/x86-64-pic-8.ll
new file mode 100644
index 00000000000..d3b567c6107
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-pic-8.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {leaq	f(%rip),} %t1
+; RUN: not grep GOTPCREL %t1
+
+define void ()* @g() {
+entry:
+	ret void ()* @f
+}
+
+declare hidden void @f()
diff --git a/final/test/CodeGen/X86/x86-64-pic-9.ll b/final/test/CodeGen/X86/x86-64-pic-9.ll
new file mode 100644
index 00000000000..076103133fa
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-pic-9.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1
+; RUN: grep {leaq	f(%rip),} %t1
+; RUN: not grep GOTPCREL %t1
+
+define void ()* @g() nounwind {
+entry:
+	ret void ()* @f
+}
+
+define internal void @f() nounwind {
+entry:
+	ret void
+}
diff --git a/final/test/CodeGen/X86/x86-64-ret0.ll b/final/test/CodeGen/X86/x86-64-ret0.ll
new file mode 100644
index 00000000000..c74f6d803b1
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-ret0.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=x86-64 | grep mov | count 1
+
+define i32 @f() nounwind  {
+	tail call void @t( i32 1 ) nounwind 
+	ret i32 0
+}
+
+declare void @t(i32)
diff --git a/final/test/CodeGen/X86/x86-64-shortint.ll b/final/test/CodeGen/X86/x86-64-shortint.ll
new file mode 100644
index 00000000000..7f96543ba49
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-shortint.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s | grep movswl
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-apple-darwin8"
+
+
+define void @bar(i16 zeroext  %A) {
+        tail call void @foo( i16 %A signext  )
+        ret void
+}
+declare void @foo(i16 signext )
+
diff --git a/final/test/CodeGen/X86/x86-64-sret-return.ll b/final/test/CodeGen/X86/x86-64-sret-return.ll
new file mode 100644
index 00000000000..7b5f189faa0
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-sret-return.ll
@@ -0,0 +1,63 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+	%struct.foo = type { [4 x i64] }
+
+; CHECK: bar:
+; CHECK: movq %rdi, %rax
+define void @bar(%struct.foo* noalias sret  %agg.result, %struct.foo* %d) nounwind  {
+entry:
+	%d_addr = alloca %struct.foo*		; <%struct.foo**> [#uses=2]
+	%memtmp = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store %struct.foo* %d, %struct.foo** %d_addr
+	%tmp = load %struct.foo** %d_addr, align 8		; <%struct.foo*> [#uses=1]
+	%tmp1 = getelementptr %struct.foo* %agg.result, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
+	%tmp2 = getelementptr %struct.foo* %tmp, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
+	%tmp3 = getelementptr [4 x i64]* %tmp1, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp4 = getelementptr [4 x i64]* %tmp2, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp5 = load i64* %tmp4, align 8		; <i64> [#uses=1]
+	store i64 %tmp5, i64* %tmp3, align 8
+	%tmp6 = getelementptr [4 x i64]* %tmp1, i32 0, i32 1		; <i64*> [#uses=1]
+	%tmp7 = getelementptr [4 x i64]* %tmp2, i32 0, i32 1		; <i64*> [#uses=1]
+	%tmp8 = load i64* %tmp7, align 8		; <i64> [#uses=1]
+	store i64 %tmp8, i64* %tmp6, align 8
+	%tmp9 = getelementptr [4 x i64]* %tmp1, i32 0, i32 2		; <i64*> [#uses=1]
+	%tmp10 = getelementptr [4 x i64]* %tmp2, i32 0, i32 2		; <i64*> [#uses=1]
+	%tmp11 = load i64* %tmp10, align 8		; <i64> [#uses=1]
+	store i64 %tmp11, i64* %tmp9, align 8
+	%tmp12 = getelementptr [4 x i64]* %tmp1, i32 0, i32 3		; <i64*> [#uses=1]
+	%tmp13 = getelementptr [4 x i64]* %tmp2, i32 0, i32 3		; <i64*> [#uses=1]
+	%tmp14 = load i64* %tmp13, align 8		; <i64> [#uses=1]
+	store i64 %tmp14, i64* %tmp12, align 8
+	%tmp15 = getelementptr %struct.foo* %memtmp, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
+	%tmp16 = getelementptr %struct.foo* %agg.result, i32 0, i32 0		; <[4 x i64]*> [#uses=4]
+	%tmp17 = getelementptr [4 x i64]* %tmp15, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp18 = getelementptr [4 x i64]* %tmp16, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp19 = load i64* %tmp18, align 8		; <i64> [#uses=1]
+	store i64 %tmp19, i64* %tmp17, align 8
+	%tmp20 = getelementptr [4 x i64]* %tmp15, i32 0, i32 1		; <i64*> [#uses=1]
+	%tmp21 = getelementptr [4 x i64]* %tmp16, i32 0, i32 1		; <i64*> [#uses=1]
+	%tmp22 = load i64* %tmp21, align 8		; <i64> [#uses=1]
+	store i64 %tmp22, i64* %tmp20, align 8
+	%tmp23 = getelementptr [4 x i64]* %tmp15, i32 0, i32 2		; <i64*> [#uses=1]
+	%tmp24 = getelementptr [4 x i64]* %tmp16, i32 0, i32 2		; <i64*> [#uses=1]
+	%tmp25 = load i64* %tmp24, align 8		; <i64> [#uses=1]
+	store i64 %tmp25, i64* %tmp23, align 8
+	%tmp26 = getelementptr [4 x i64]* %tmp15, i32 0, i32 3		; <i64*> [#uses=1]
+	%tmp27 = getelementptr [4 x i64]* %tmp16, i32 0, i32 3		; <i64*> [#uses=1]
+	%tmp28 = load i64* %tmp27, align 8		; <i64> [#uses=1]
+	store i64 %tmp28, i64* %tmp26, align 8
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+; CHECK: foo:
+; CHECK: movq %rdi, %rax
+define void @foo({ i64 }* noalias nocapture sret %agg.result) nounwind {
+  store { i64 } { i64 0 }, { i64 }* %agg.result
+  ret void
+}
diff --git a/final/test/CodeGen/X86/x86-64-tls-1.ll b/final/test/CodeGen/X86/x86-64-tls-1.ll
new file mode 100644
index 00000000000..8d3b300da3b
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-tls-1.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+@tm_nest_level = internal thread_local global i32 0
+define i64 @z() nounwind {
+; CHECK: movabsq    $tm_nest_level@TPOFF, %rcx
+  ret i64 and (i64 ptrtoint (i32* @tm_nest_level to i64), i64 100)
+}
diff --git a/final/test/CodeGen/X86/x86-64-varargs.ll b/final/test/CodeGen/X86/x86-64-varargs.ll
new file mode 100644
index 00000000000..428f4493b06
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-64-varargs.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -code-model=large -relocation-model=static | grep call | not grep rax
+
+@.str = internal constant [26 x i8] c"%d, %f, %d, %lld, %d, %f\0A\00"		; <[26 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...) nounwind 
+
+define i32 @main() nounwind  {
+entry:
+	%tmp10.i = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([26 x i8]* @.str, i32 0, i64 0), i32 12, double 0x3FF3EB8520000000, i32 120, i64 123456677890, i32 -10, double 4.500000e+15 ) nounwind 		; <i32> [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/CodeGen/X86/x86-frameaddr.ll b/final/test/CodeGen/X86/x86-frameaddr.ll
new file mode 100644
index 00000000000..d5958745dff
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-frameaddr.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep mov | grep ebp
+
+define i8* @t() nounwind {
+entry:
+	%0 = tail call i8* @llvm.frameaddress(i32 0)
+	ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
diff --git a/final/test/CodeGen/X86/x86-frameaddr2.ll b/final/test/CodeGen/X86/x86-frameaddr2.ll
new file mode 100644
index 00000000000..c5091154152
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-frameaddr2.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -march=x86 | grep mov | count 3
+
+define i8* @t() nounwind {
+entry:
+	%0 = tail call i8* @llvm.frameaddress(i32 2)
+	ret i8* %0
+}
+
+declare i8* @llvm.frameaddress(i32) nounwind readnone
diff --git a/final/test/CodeGen/X86/x86-store-gv-addr.ll b/final/test/CodeGen/X86/x86-store-gv-addr.ll
new file mode 100644
index 00000000000..089517aadb1
--- /dev/null
+++ b/final/test/CodeGen/X86/x86-store-gv-addr.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=static | not grep lea
+; RUN: llc < %s -mtriple=x86_64-pc-linux-gnu -relocation-model=static | not grep lea
+
+@v = external global i32, align 8
+@v_addr = external global i32*, align 8
+
+define void @t() nounwind optsize {
+	store i32* @v, i32** @v_addr, align 8
+	unreachable
+}
diff --git a/final/test/CodeGen/X86/x86_64-mul-by-const.ll b/final/test/CodeGen/X86/x86_64-mul-by-const.ll
new file mode 100644
index 00000000000..df48a29156c
--- /dev/null
+++ b/final/test/CodeGen/X86/x86_64-mul-by-const.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; Formerly there were two shifts.  rdar://8771012.
+
+define i32 @f9188_mul365384439_shift27(i32 %A) nounwind {
+; CHECK:  imulq $365384439,
+; CHECK:  shrq  $59, %rax
+        %tmp1 = udiv i32 %A, 1577682821         ; <i32> [#uses=1]
+        ret i32 %tmp1
+}
diff --git a/final/test/CodeGen/X86/xmm-r64.ll b/final/test/CodeGen/X86/xmm-r64.ll
new file mode 100644
index 00000000000..2a6b5c71aa4
--- /dev/null
+++ b/final/test/CodeGen/X86/xmm-r64.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=x86-64
+
+define <4 x i32> @test() {
+        %tmp1039 = call <4 x i32> @llvm.x86.sse2.psll.d( <4 x i32> zeroinitializer, <4 x i32> zeroinitializer )               ; <<4 x i32>> [#uses=1]
+        %tmp1040 = bitcast <4 x i32> %tmp1039 to <2 x i64>              ; <<2 x i64>> [#uses=1]
+        %tmp1048 = add <2 x i64> %tmp1040, zeroinitializer              ; <<2 x i64>> [#uses=1]
+        %tmp1048.upgrd.1 = bitcast <2 x i64> %tmp1048 to <4 x i32>              ; <<4 x i32>> [#uses=1]
+        ret <4 x i32> %tmp1048.upgrd.1
+}
+
+declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>)
+
diff --git a/final/test/CodeGen/X86/xor-icmp.ll b/final/test/CodeGen/X86/xor-icmp.ll
new file mode 100644
index 00000000000..34875ed8995
--- /dev/null
+++ b/final/test/CodeGen/X86/xor-icmp.ll
@@ -0,0 +1,67 @@
+; RUN: llc < %s -march=x86    | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+; rdar://7367229
+
+define i32 @t(i32 %a, i32 %b) nounwind ssp {
+entry:
+; X32:     t:
+; X32:     xorb
+; X32-NOT: andb
+; X32-NOT: shrb
+; X32:     testb $64
+; X32:     jne
+
+; X64:     t:
+; X64-NOT: setne
+; X64:     xorl
+; X64:     testb $64
+; X64:     jne
+  %0 = and i32 %a, 16384
+  %1 = icmp ne i32 %0, 0
+  %2 = and i32 %b, 16384
+  %3 = icmp ne i32 %2, 0
+  %4 = xor i1 %1, %3
+  br i1 %4, label %bb1, label %bb
+
+bb:                                               ; preds = %entry
+  %5 = tail call i32 (...)* @foo() nounwind       ; <i32> [#uses=1]
+  ret i32 %5
+
+bb1:                                              ; preds = %entry
+  %6 = tail call i32 (...)* @bar() nounwind       ; <i32> [#uses=1]
+  ret i32 %6
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(...)
+
+define i32 @t2(i32 %x, i32 %y) nounwind ssp {
+; X32: t2:
+; X32: cmpl
+; X32: sete
+; X32: cmpl
+; X32: sete
+; X32-NOT: xor
+; X32: jne
+
+; X64: t2:
+; X64: testl
+; X64: sete
+; X64: testl
+; X64: sete
+; X64-NOT: xor
+; X64: jne
+entry:
+  %0 = icmp eq i32 %x, 0                          ; <i1> [#uses=1]
+  %1 = icmp eq i32 %y, 0                          ; <i1> [#uses=1]
+  %2 = xor i1 %1, %0                              ; <i1> [#uses=1]
+  br i1 %2, label %bb, label %return
+
+bb:                                               ; preds = %entry
+  %3 = tail call i32 (...)* @foo() nounwind       ; <i32> [#uses=0]
+  ret i32 undef
+
+return:                                           ; preds = %entry
+  ret i32 undef
+}
diff --git a/final/test/CodeGen/X86/xor.ll b/final/test/CodeGen/X86/xor.ll
new file mode 100644
index 00000000000..b90d81ac9b1
--- /dev/null
+++ b/final/test/CodeGen/X86/xor.ll
@@ -0,0 +1,145 @@
+; RUN: llc < %s -march=x86 -mattr=+sse2  | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
+
+; Though it is undefined, we want xor undef,undef to produce zero.
+define <4 x i32> @test1() nounwind {
+	%tmp = xor <4 x i32> undef, undef
+	ret <4 x i32> %tmp
+        
+; X32: test1:
+; X32:	pxor	%xmm0, %xmm0
+; X32:	ret
+}
+
+; Though it is undefined, we want xor undef,undef to produce zero.
+define i32 @test2() nounwind{
+	%tmp = xor i32 undef, undef
+	ret i32 %tmp
+; X32: test2:
+; X32:	xorl	%eax, %eax
+; X32:	ret
+}
+
+define i32 @test3(i32 %a, i32 %b) nounwind  {
+entry:
+        %tmp1not = xor i32 %b, -2
+	%tmp3 = and i32 %tmp1not, %a
+        %tmp4 = lshr i32 %tmp3, 1
+        ret i32 %tmp4
+        
+; X64: test3:
+; X64:	notl	[[A1:%esi|%edx]]
+; X64:	andl	[[A0:%edi|%ecx]], [[A1]]
+; X64:	movl	[[A1]], %eax
+; X64:	shrl	%eax
+; X64:	ret
+
+; X32: test3:
+; X32: 	movl	8(%esp), %eax
+; X32: 	notl	%eax
+; X32: 	andl	4(%esp), %eax
+; X32: 	shrl	%eax
+; X32: 	ret
+}
+
+define i32 @test4(i32 %a, i32 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i32 %a_addr.0, %b_addr.0
+        %tmp4not = xor i32 %tmp3, 2147483647
+        %tmp6 = and i32 %tmp4not, %b_addr.0
+        %tmp8 = shl i32 %tmp6, 1
+        %tmp10 = icmp eq i32 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i32 %tmp3
+        
+; X64: test4:
+; X64:    notl	[[REG:%[a-z]+]]
+; X64:    andl	{{.*}}[[REG]]
+; X32: test4:
+; X32:    notl	[[REG:%[a-z]+]]
+; X32:    andl	{{.*}}[[REG]]
+}
+
+define i16 @test5(i16 %a, i16 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i16 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i16 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i16 %a_addr.0, %b_addr.0
+        %tmp4not = xor i16 %tmp3, 32767
+        %tmp6 = and i16 %tmp4not, %b_addr.0
+        %tmp8 = shl i16 %tmp6, 1
+        %tmp10 = icmp eq i16 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i16 %tmp3
+; X64: test5:
+; X64:    notl	[[REG:%[a-z]+]]
+; X64:    andl	{{.*}}[[REG]]
+; X32: test5:
+; X32:    notl	[[REG:%[a-z]+]]
+; X32:    andl	{{.*}}[[REG]]
+}
+
+define i8 @test6(i8 %a, i8 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i8 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i8 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i8 %a_addr.0, %b_addr.0
+        %tmp4not = xor i8 %tmp3, 127
+        %tmp6 = and i8 %tmp4not, %b_addr.0
+        %tmp8 = shl i8 %tmp6, 1
+        %tmp10 = icmp eq i8 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i8 %tmp3
+; X64: test6:
+; X64:    notb	[[REG:%[a-z]+]]
+; X64:    andb	{{.*}}[[REG]]
+; X32: test6:
+; X32:    notb	[[REG:%[a-z]+]]
+; X32:    andb	{{.*}}[[REG]]
+}
+
+define i32 @test7(i32 %a, i32 %b) nounwind  {
+entry:
+        br label %bb
+bb:
+	%b_addr.0 = phi i32 [ %b, %entry ], [ %tmp8, %bb ]
+        %a_addr.0 = phi i32 [ %a, %entry ], [ %tmp3, %bb ]
+	%tmp3 = xor i32 %a_addr.0, %b_addr.0
+        %tmp4not = xor i32 %tmp3, 2147483646
+        %tmp6 = and i32 %tmp4not, %b_addr.0
+        %tmp8 = shl i32 %tmp6, 1
+        %tmp10 = icmp eq i32 %tmp8, 0
+	br i1 %tmp10, label %bb12, label %bb
+bb12:
+	ret i32 %tmp3
+; X64: test7:
+; X64:    xorl	$2147483646, [[REG:%[a-z]+]]
+; X64:    andl	{{.*}}[[REG]]
+; X32: test7:
+; X32:    xorl	$2147483646, [[REG:%[a-z]+]]
+; X32:    andl	{{.*}}[[REG]]
+}
+
+define i32 @test8(i32 %a) nounwind {
+; rdar://7553032
+entry:
+  %t1 = sub i32 0, %a
+  %t2 = add i32 %t1, -1
+  ret i32 %t2
+; X64: test8:
+; X64:   notl %eax
+; X32: test8:
+; X32:   notl %eax
+}
diff --git a/final/test/CodeGen/X86/zero-remat.ll b/final/test/CodeGen/X86/zero-remat.ll
new file mode 100644
index 00000000000..447007439fb
--- /dev/null
+++ b/final/test/CodeGen/X86/zero-remat.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -march=x86-64 -o /dev/null -stats  -info-output-file - | grep asm-printer  | grep 12
+; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32
+
+declare void @bar(double %x)
+declare void @barf(float %x)
+
+define double @foo() nounwind {
+
+  call void @bar(double 0.0)
+  ret double 0.0
+
+;CHECK-32: foo:
+;CHECK-32: call
+;CHECK-32: fldz
+;CHECK-32: ret
+
+;CHECK-64: foo:
+;CHECK-64: pxor
+;CHECK-64: call
+;CHECK-64: pxor
+;CHECK-64: ret
+}
+
+
+define float @foof() nounwind {
+  call void @barf(float 0.0)
+  ret float 0.0
+
+;CHECK-32: foof:
+;CHECK-32: call
+;CHECK-32: fldz
+;CHECK-32: ret
+
+;CHECK-64: foof:
+;CHECK-64: pxor
+;CHECK-64: call
+;CHECK-64: pxor
+;CHECK-64: ret
+}
diff --git a/final/test/CodeGen/X86/zext-extract_subreg.ll b/final/test/CodeGen/X86/zext-extract_subreg.ll
new file mode 100644
index 00000000000..e61e8805a2f
--- /dev/null
+++ b/final/test/CodeGen/X86/zext-extract_subreg.ll
@@ -0,0 +1,60 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+define void @t() nounwind ssp {
+; CHECK: t:
+entry:
+  br i1 undef, label %return, label %if.end.i
+
+if.end.i:                                         ; preds = %entry
+  %tmp7.i = load i32* undef, align 4, !tbaa !0
+  br i1 undef, label %return, label %if.end
+
+if.end:                                           ; preds = %if.end.i
+; CHECK: %if.end
+; CHECK: movl (%{{.*}}), [[REG:%[a-z]+]]
+; CHECK-NOT: movl [[REG]], [[REG]]
+; CHECK-NEXT: xorb
+  %tmp138 = select i1 undef, i32 0, i32 %tmp7.i
+  %tmp867 = zext i32 %tmp138 to i64
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.body, %if.end
+  %tmp869 = sub i64 %tmp867, 0
+  %scale2.0 = trunc i64 %tmp869 to i32
+  %cmp149 = icmp eq i32 %scale2.0, 0
+  br i1 %cmp149, label %while.end, label %land.rhs
+
+land.rhs:                                         ; preds = %while.cond
+  br i1 undef, label %while.body, label %while.end
+
+while.body:                                       ; preds = %land.rhs
+  br label %while.cond
+
+while.end:                                        ; preds = %land.rhs, %while.cond
+  br i1 undef, label %cond.false205, label %cond.true190
+
+cond.true190:                                     ; preds = %while.end
+  br i1 undef, label %cond.false242, label %cond.true225
+
+cond.false205:                                    ; preds = %while.end
+  unreachable
+
+cond.true225:                                     ; preds = %cond.true190
+  br i1 undef, label %cond.false280, label %cond.true271
+
+cond.false242:                                    ; preds = %cond.true190
+  unreachable
+
+cond.true271:                                     ; preds = %cond.true225
+  unreachable
+
+cond.false280:                                    ; preds = %cond.true225
+  unreachable
+
+return:                                           ; preds = %if.end.i, %entry
+  ret void
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/final/test/CodeGen/X86/zext-inreg-0.ll b/final/test/CodeGen/X86/zext-inreg-0.ll
new file mode 100644
index 00000000000..ae6221af9d8
--- /dev/null
+++ b/final/test/CodeGen/X86/zext-inreg-0.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -march=x86 | not grep and
+; RUN: llc < %s -march=x86-64 > %t
+; RUN: not grep and %t
+; RUN: not grep movzbq %t
+; RUN: not grep movzwq %t
+; RUN: not grep movzlq %t
+
+; These should use movzbl instead of 'and 255'.
+; This related to not having a ZERO_EXTEND_REG opcode.
+
+define i32 @a(i32 %d) nounwind  {
+        %e = add i32 %d, 1
+        %retval = and i32 %e, 255
+        ret i32 %retval
+}
+define i32 @b(float %d) nounwind  {
+        %tmp12 = fptoui float %d to i8
+        %retval = zext i8 %tmp12 to i32
+        ret i32 %retval
+}
+define i32 @c(i32 %d) nounwind  {
+        %e = add i32 %d, 1
+        %retval = and i32 %e, 65535
+        ret i32 %retval
+}
+define i64 @d(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 255
+        ret i64 %retval
+}
+define i64 @e(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 65535
+        ret i64 %retval
+}
+define i64 @f(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 4294967295
+        ret i64 %retval
+}
+
+define i32 @g(i8 %d) nounwind  {
+        %e = add i8 %d, 1
+        %retval = zext i8 %e to i32
+        ret i32 %retval
+}
+define i32 @h(i16 %d) nounwind  {
+        %e = add i16 %d, 1
+        %retval = zext i16 %e to i32
+        ret i32 %retval
+}
+define i64 @i(i8 %d) nounwind  {
+        %e = add i8 %d, 1
+        %retval = zext i8 %e to i64
+        ret i64 %retval
+}
+define i64 @j(i16 %d) nounwind  {
+        %e = add i16 %d, 1
+        %retval = zext i16 %e to i64
+        ret i64 %retval
+}
+define i64 @k(i32 %d) nounwind  {
+        %e = add i32 %d, 1
+        %retval = zext i32 %e to i64
+        ret i64 %retval
+}
diff --git a/final/test/CodeGen/X86/zext-inreg-1.ll b/final/test/CodeGen/X86/zext-inreg-1.ll
new file mode 100644
index 00000000000..17fe374e01e
--- /dev/null
+++ b/final/test/CodeGen/X86/zext-inreg-1.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86 | not grep and
+
+; These tests differ from the ones in zext-inreg-0.ll in that
+; on x86-64 they do require and instructions.
+
+; These should use movzbl instead of 'and 255'.
+; This related to not having ZERO_EXTEND_REG node.
+
+define i64 @l(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 1099511627775
+        ret i64 %retval
+}
+define i64 @m(i64 %d) nounwind  {
+        %e = add i64 %d, 1
+        %retval = and i64 %e, 281474976710655
+        ret i64 %retval
+}
diff --git a/final/test/CodeGen/X86/zext-sext.ll b/final/test/CodeGen/X86/zext-sext.ll
new file mode 100644
index 00000000000..cea9e9c854d
--- /dev/null
+++ b/final/test/CodeGen/X86/zext-sext.ll
@@ -0,0 +1,54 @@
+; XFAIL: *
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; <rdar://problem/8006248>
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void ([40 x i16]*, i32*, i16**, i64*)* @func to i8*)], section "llvm.metadata"
+
+define void @func([40 x i16]* %a, i32* %b, i16** %c, i64* %d) nounwind {
+entry:
+  %tmp103 = getelementptr inbounds [40 x i16]* %a, i64 0, i64 4
+  %tmp104 = load i16* %tmp103, align 2
+  %tmp105 = sext i16 %tmp104 to i32
+  %tmp106 = load i32* %b, align 4
+  %tmp107 = sub nsw i32 4, %tmp106
+  %tmp108 = load i16** %c, align 8
+  %tmp109 = sext i32 %tmp107 to i64
+  %tmp110 = getelementptr inbounds i16* %tmp108, i64 %tmp109
+  %tmp111 = load i16* %tmp110, align 1
+  %tmp112 = sext i16 %tmp111 to i32
+  %tmp = mul i32 355244649, %tmp112
+  %tmp1 = mul i32 %tmp, %tmp105
+  %tmp2 = add i32 %tmp1, 2138875574
+  %tmp3 = add i32 %tmp2, 1546991088
+  %tmp4 = mul i32 %tmp3, 2122487257
+  %tmp5 = icmp sge i32 %tmp4, 2138875574
+  %tmp6 = icmp slt i32 %tmp4, -8608074
+  %tmp7 = or i1 %tmp5, %tmp6
+  %outSign = select i1 %tmp7, i32 1, i32 -1
+  %tmp8 = icmp slt i32 %tmp4, 0
+  %tmp9 = icmp eq i32 %outSign, 1
+  %tmp10 = and i1 %tmp8, %tmp9
+  %tmp11 = sext i32 %tmp4 to i64
+  %tmp12 = add i64 %tmp11, 5089792279245435153
+
+; CHECK:      addl	$2138875574, %e[[REGISTER_zext:[a-z]+]]
+; CHECK-NEXT: movslq	%e[[REGISTER_zext]], [[REGISTER_tmp:%[a-z]+]]
+; CHECK:      movq	[[REGISTER_tmp]], [[REGISTER_sext:%[a-z]+]]
+; CHECK-NEXT: subq	%r[[REGISTER_zext]], [[REGISTER_sext]]
+
+  %tmp13 = sub i64 %tmp12, 2138875574
+  %tmp14 = zext i32 %tmp4 to i64
+  %tmp15 = sub i64 %tmp11, %tmp14
+  %tmp16 = select i1 %tmp10, i64 %tmp15, i64 0
+  %tmp17 = sub i64 %tmp13, %tmp16
+  %tmp18 = mul i64 %tmp17, 4540133155013554595
+  %tmp19 = sub i64 %tmp18, 5386586244038704851
+  %tmp20 = add i64 %tmp19, -1368057358110947217
+  %tmp21 = mul i64 %tmp20, -422037402840850817
+  %tmp115 = load i64* %d, align 8
+  %alphaX = mul i64 468858157810230901, %tmp21
+  %alphaXbetaY = add i64 %alphaX, %tmp115
+  %transformed = add i64 %alphaXbetaY, 9040145182981852475
+  store i64 %transformed, i64* %d, align 8
+  ret void
+}
diff --git a/final/test/CodeGen/X86/zext-shl.ll b/final/test/CodeGen/X86/zext-shl.ll
new file mode 100644
index 00000000000..928848e3f7a
--- /dev/null
+++ b/final/test/CodeGen/X86/zext-shl.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i32 @t1(i8 zeroext %x) nounwind readnone ssp {
+entry:
+; CHECK: t1:
+; CHECK: shll
+; CHECK-NOT: movzwl
+; CHECK: ret
+  %0 = zext i8 %x to i16
+  %1 = shl i16 %0, 5
+  %2 = zext i16 %1 to i32
+  ret i32 %2
+}
+
+define i32 @t2(i8 zeroext %x) nounwind readnone ssp {
+entry:
+; CHECK: t2:
+; CHECK: shrl
+; CHECK-NOT: movzwl
+; CHECK: ret
+  %0 = zext i8 %x to i16
+  %1 = lshr i16 %0, 3
+  %2 = zext i16 %1 to i32
+  ret i32 %2
+}
diff --git a/final/test/CodeGen/X86/zext-trunc.ll b/final/test/CodeGen/X86/zext-trunc.ll
new file mode 100644
index 00000000000..b9ffbe87b21
--- /dev/null
+++ b/final/test/CodeGen/X86/zext-trunc.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; rdar://7570931
+
+define i64 @foo(i64 %a, i64 %b) nounwind {
+; CHECK: foo:
+; CHECK: leal
+; CHECK-NOT: movl
+; CHECK: ret
+  %c = add i64 %a, %b
+  %d = trunc i64 %c to i32
+  %e = zext i32 %d to i64
+  ret i64 %e
+}
diff --git a/final/test/CodeGen/XCore/2008-11-17-Shl64.ll b/final/test/CodeGen/XCore/2008-11-17-Shl64.ll
new file mode 100644
index 00000000000..04b1b5a0016
--- /dev/null
+++ b/final/test/CodeGen/XCore/2008-11-17-Shl64.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; PR3080
+define i64 @test(i64 %a) {
+	%result = shl i64 %a, 1
+	ret i64 %result
+}
diff --git a/final/test/CodeGen/XCore/2009-01-08-Crash.ll b/final/test/CodeGen/XCore/2009-01-08-Crash.ll
new file mode 100644
index 00000000000..a31ea1e2e9b
--- /dev/null
+++ b/final/test/CodeGen/XCore/2009-01-08-Crash.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=xcore > %t1.s
+;; This caused a compilation failure since the
+;; address arithmetic was folded into the LDWSP instruction,
+;; resulting in a negative offset which eliminateFrameIndex was
+;; unable to eliminate.
+define i32 @test(i32 %bar) nounwind readnone {
+entry:
+        %bar_addr = alloca i32
+        %0 = getelementptr i32* %bar_addr, i32 -1
+        %1 = load i32* %0, align 4
+        ret i32 %1
+}
diff --git a/final/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll b/final/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
new file mode 100644
index 00000000000..b2bbcb1183d
--- /dev/null
+++ b/final/test/CodeGen/XCore/2009-01-14-Remat-Crash.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; PR3324
+define double @f1(double %a, double %b, double %c, double %d, double %e, double %f, double %g) nounwind {
+entry:
+	br i1 false, label %bb113, label %bb129
+
+bb113:		; preds = %entry
+	ret double 0.000000e+00
+
+bb129:		; preds = %entry
+	%tmp134 = fsub double %b, %a		; <double> [#uses=1]
+	%tmp136 = fsub double %tmp134, %c		; <double> [#uses=1]
+	%tmp138 = fadd double %tmp136, %d		; <double> [#uses=1]
+	%tmp140 = fsub double %tmp138, %e		; <double> [#uses=1]
+	%tmp142 = fadd double %tmp140, %f		; <double> [#uses=1]
+	%tmp.0 = fmul double %tmp142, 0.000000e+00		; <double> [#uses=1]
+	ret double %tmp.0
+}
diff --git a/final/test/CodeGen/XCore/2009-03-27-v2f64-param.ll b/final/test/CodeGen/XCore/2009-03-27-v2f64-param.ll
new file mode 100644
index 00000000000..e35a36a8a15
--- /dev/null
+++ b/final/test/CodeGen/XCore/2009-03-27-v2f64-param.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=xcore
+; PR3898
+
+define i32 @vector_param(<2 x double> %x) nounwind {
+       ret i32 1
+}
diff --git a/final/test/CodeGen/XCore/2009-07-15-store192.ll b/final/test/CodeGen/XCore/2009-07-15-store192.ll
new file mode 100644
index 00000000000..5278af8ac22
--- /dev/null
+++ b/final/test/CodeGen/XCore/2009-07-15-store192.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=xcore > %t1.s
+define void @store32(i8* %p) nounwind {
+entry:
+	%0 = bitcast i8* %p to i192*
+	store i192 0, i192* %0, align 4
+	ret void
+}
diff --git a/final/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll b/final/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll
new file mode 100644
index 00000000000..6ad9a73899d
--- /dev/null
+++ b/final/test/CodeGen/XCore/2010-02-25-LSR-Crash.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -march=xcore
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "xcore-xmos-elf"
+
+%0 = type { i32 }
+%struct.dwarf_fde = type <{ i32, i32, [0 x i8] }>
+%struct.object = type { i8*, i8*, i8*, %union.anon, %0, %struct.object* }
+%union.anon = type { %struct.dwarf_fde* }
+
+define %struct.dwarf_fde* @search_object(%struct.object* %ob, i8* %pc) {
+entry:
+  br i1 undef, label %bb3.i15.i.i, label %bb2
+
+bb3.i15.i.i:                                      ; preds = %bb3.i15.i.i, %entry
+  %indvar.i.i.i = phi i32 [ %indvar.next.i.i.i, %bb3.i15.i.i ], [ 0, %entry ] ; <i32> [#uses=2]
+  %tmp137 = sub i32 0, %indvar.i.i.i              ; <i32> [#uses=1]
+  %scevgep13.i.i.i = getelementptr i32* undef, i32 %tmp137 ; <i32*> [#uses=2]
+  %scevgep1314.i.i.i = bitcast i32* %scevgep13.i.i.i to %struct.dwarf_fde** ; <%struct.dwarf_fde**> [#uses=1]
+  %0 = load %struct.dwarf_fde** %scevgep1314.i.i.i, align 4 ; <%struct.dwarf_fde*> [#uses=0]
+  store i32 undef, i32* %scevgep13.i.i.i
+  %indvar.next.i.i.i = add i32 %indvar.i.i.i, 1   ; <i32> [#uses=1]
+  br label %bb3.i15.i.i
+
+bb2:                                              ; preds = %entry
+  ret %struct.dwarf_fde* undef
+}
diff --git a/final/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll b/final/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
new file mode 100644
index 00000000000..80cf3a6d678
--- /dev/null
+++ b/final/test/CodeGen/XCore/2010-04-07-DbgValueOtherTargets.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O0 -march=xcore -asm-verbose < %s | FileCheck %s
+; Check that DEBUG_VALUE comments come through on a variety of targets.
+
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: DEBUG_VALUE
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !7), !dbg !9
+  ret i32 0, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/tmp/x.c", metadata !"/Users/manav", metadata !"clang version 2.9 (trunk 120996)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 0}
+!7 = metadata !{i32 590080, metadata !8, metadata !"i", metadata !1, i32 3, metadata !5, i32 0} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 3, i32 11, metadata !8, null}
+!10 = metadata !{i32 4, i32 2, metadata !8, null}
+
diff --git a/final/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll b/final/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll
new file mode 100644
index 00000000000..f8fe0d2136f
--- /dev/null
+++ b/final/test/CodeGen/XCore/2011-01-31-DAGCombineBug.ll
@@ -0,0 +1,10 @@
+; RUN: llc < %s -march=xcore
+%struct.st = type <{ i8, i32, i8, i32, i8, i32 }>
+
+@x = external global %struct.st, align 4
+
+define i32 @test_entry() nounwind {
+entry:
+  %0 = load i32* getelementptr inbounds (%struct.st* @x, i32 0, i32 3), align 2
+  ret i32 %0
+}
diff --git a/final/test/CodeGen/XCore/addsub64.ll b/final/test/CodeGen/XCore/addsub64.ll
new file mode 100644
index 00000000000..d06248022e3
--- /dev/null
+++ b/final/test/CodeGen/XCore/addsub64.ll
@@ -0,0 +1,59 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+define i64 @add64(i64 %a, i64 %b) {
+	%result = add i64 %a, %b
+	ret i64 %result
+}
+; CHECK: add64
+; CHECK: ldc r11, 0
+; CHECK-NEXT: ladd r2, r0, r0, r2, r11
+; CHECK-NEXT: ladd r2, r1, r1, r3, r2
+; CHECK-NEXT: retsp 0
+
+define i64 @sub64(i64 %a, i64 %b) {
+	%result = sub i64 %a, %b
+	ret i64 %result
+}
+; CHECK: sub64
+; CHECK: ldc r11, 0
+; CHECK-NEXT: lsub r2, r0, r0, r2, r11
+; CHECK-NEXT: lsub r2, r1, r1, r3, r2
+; CHECK-NEXT: retsp 0
+
+define i64 @maccu(i64 %a, i32 %b, i32 %c) {
+entry:
+	%0 = zext i32 %b to i64
+	%1 = zext i32 %c to i64
+	%2 = mul i64 %1, %0
+	%3 = add i64 %2, %a
+	ret i64 %3
+}
+; CHECK: maccu:
+; CHECK: maccu r1, r0, r3, r2
+; CHECK-NEXT: retsp 0
+
+define i64 @maccs(i64 %a, i32 %b, i32 %c) {
+entry:
+	%0 = sext i32 %b to i64
+	%1 = sext i32 %c to i64
+	%2 = mul i64 %1, %0
+	%3 = add i64 %2, %a
+	ret i64 %3
+}
+; CHECK: maccs:
+; CHECK: maccs r1, r0, r3, r2
+; CHECK-NEXT: retsp 0
+
+define i64 @lmul(i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+	%0 = zext i32 %a to i64
+	%1 = zext i32 %b to i64
+	%2 = zext i32 %c to i64
+	%3 = zext i32 %d to i64
+	%4 = mul i64 %1, %0
+	%5 = add i64 %4, %2
+	%6 = add i64 %5, %3
+	ret i64 %6
+}
+; CHECK: lmul:
+; CHECK: lmul r1, r0, r1, r0, r2, r3
+; CHECK-NEXT: retsp 0
diff --git a/final/test/CodeGen/XCore/ashr.ll b/final/test/CodeGen/XCore/ashr.ll
new file mode 100644
index 00000000000..4514fdb8bf3
--- /dev/null
+++ b/final/test/CodeGen/XCore/ashr.ll
@@ -0,0 +1,76 @@
+; RUN: llc < %s -march=xcore -asm-verbose=0 | FileCheck %s
+define i32 @ashr(i32 %a, i32 %b) {
+	%1 = ashr i32 %a, %b
+	ret i32 %1
+}
+; CHECK: ashr:
+; CHECK-NEXT: ashr r0, r0, r1
+
+define i32 @ashri1(i32 %a) {
+	%1 = ashr i32 %a, 24
+	ret i32 %1
+}
+; CHECK: ashri1:
+; CHECK-NEXT: ashr r0, r0, 24
+
+define i32 @ashri2(i32 %a) {
+	%1 = ashr i32 %a, 31
+	ret i32 %1
+}
+; CHECK: ashri2:
+; CHECK-NEXT: ashr r0, r0, 32
+
+define i32 @f1(i32 %a) {
+        %1 = icmp slt i32 %a, 0
+	br i1 %1, label %less, label %not_less
+less:
+	ret i32 10
+not_less:
+	ret i32 17
+}
+; CHECK: f1:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bf r0
+
+define i32 @f2(i32 %a) {
+        %1 = icmp sge i32 %a, 0
+	br i1 %1, label %greater, label %not_greater
+greater:
+	ret i32 10
+not_greater:
+	ret i32 17
+}
+; CHECK: f2:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bt r0
+
+define i32 @f3(i32 %a) {
+        %1 = icmp slt i32 %a, 0
+	%2 = select i1 %1, i32 10, i32 17
+	ret i32 %2
+}
+; CHECK: f3:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bf r0
+; CHECK-NEXT: ldc r0, 10
+; CHECK: ldc r0, 17
+
+define i32 @f4(i32 %a) {
+        %1 = icmp sge i32 %a, 0
+	%2 = select i1 %1, i32 10, i32 17
+	ret i32 %2
+}
+; CHECK: f4:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: bf r0
+; CHECK-NEXT: ldc r0, 17
+; CHECK: ldc r0, 10
+
+define i32 @f5(i32 %a) {
+        %1 = icmp sge i32 %a, 0
+	%2 = zext i1 %1 to i32
+	ret i32 %2
+}
+; CHECK: f5:
+; CHECK-NEXT: ashr r0, r0, 32
+; CHECK-NEXT: eq r0, r0, 0
diff --git a/final/test/CodeGen/XCore/basictest.ll b/final/test/CodeGen/XCore/basictest.ll
new file mode 100644
index 00000000000..de5eaff0807
--- /dev/null
+++ b/final/test/CodeGen/XCore/basictest.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -march=xcore
+
+define i32 @test(i32 %X) {
+	%tmp.1 = add i32 %X, 1
+	ret i32 %tmp.1
+}
diff --git a/final/test/CodeGen/XCore/bigstructret.ll b/final/test/CodeGen/XCore/bigstructret.ll
new file mode 100644
index 00000000000..56af930cc40
--- /dev/null
+++ b/final/test/CodeGen/XCore/bigstructret.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+%0 = type { i32, i32, i32, i32 }
+%1 = type { i32, i32, i32, i32, i32 }
+
+; Structs of 4 words can be returned in registers
+define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
+entry:
+  %0 = insertvalue %0 zeroinitializer, i32 12, 0
+  %1 = insertvalue %0 %0, i32 24, 1
+  %2 = insertvalue %0 %1, i32 48, 2
+  %3 = insertvalue %0 %2, i32 24601, 3
+  ret %0 %3
+}
+; CHECK: ReturnBigStruct:
+; CHECK: ldc r0, 12
+; CHECK: ldc r1, 24
+; CHECK: ldc r2, 48
+; CHECK: ldc r3, 24601
+; CHECK: retsp 0
+
+; Structs bigger than 4 words are returned via a hidden hidden sret-parameter
+define internal fastcc %1 @ReturnBigStruct2() nounwind readnone {
+entry:
+  %0 = insertvalue %1 zeroinitializer, i32 12, 0
+  %1 = insertvalue %1 %0, i32 24, 1
+  %2 = insertvalue %1 %1, i32 48, 2
+  %3 = insertvalue %1 %2, i32 24601, 3
+  %4 = insertvalue %1 %3, i32 4321, 4
+  ret %1 %4
+}
+; CHECK: ReturnBigStruct2:
+; CHECK: ldc r1, 4321
+; CHECK: stw r1, r0[4]
+; CHECK: ldc r1, 24601
+; CHECK: stw r1, r0[3]
+; CHECK: ldc r1, 48
+; CHECK: stw r1, r0[2]
+; CHECK: ldc r1, 24
+; CHECK: stw r1, r0[1]
+; CHECK: ldc r1, 12
+; CHECK: stw r1, r0[0]
+; CHECK: retsp 0
diff --git a/final/test/CodeGen/XCore/bitrev.ll b/final/test/CodeGen/XCore/bitrev.ll
new file mode 100644
index 00000000000..09202d36567
--- /dev/null
+++ b/final/test/CodeGen/XCore/bitrev.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep bitrev %t1.s | count 1 
+declare i32 @llvm.xcore.bitrev(i32)
+
+define i32 @test(i32 %val) {
+	%result = call i32 @llvm.xcore.bitrev(i32 %val)
+	ret i32 %result
+}
diff --git a/final/test/CodeGen/XCore/constants.ll b/final/test/CodeGen/XCore/constants.ll
new file mode 100644
index 00000000000..cad1a2153f4
--- /dev/null
+++ b/final/test/CodeGen/XCore/constants.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic | FileCheck %s
+
+; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4
+; CHECK: .LCPI0_0:
+; CHECK: .long 12345678
+; CHECK: f:
+; CHECK: ldw r0, cp[.LCPI0_0]
+define i32 @f() {
+entry:
+	ret i32 12345678
+}
diff --git a/final/test/CodeGen/XCore/cos.ll b/final/test/CodeGen/XCore/cos.ll
new file mode 100644
index 00000000000..8211f85b9bc
--- /dev/null
+++ b/final/test/CodeGen/XCore/cos.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl cosf" %t1.s | count 1
+; RUN: grep "bl cos" %t1.s | count 2
+declare double @llvm.cos.f64(double)
+
+define double @test(double %F) {
+        %result = call double @llvm.cos.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.cos.f32(float)
+
+define float @testf(float %F) {
+        %result = call float @llvm.cos.f32(float %F)
+	ret float %result
+}
diff --git a/final/test/CodeGen/XCore/dg.exp b/final/test/CodeGen/XCore/dg.exp
new file mode 100644
index 00000000000..7110eabb3a5
--- /dev/null
+++ b/final/test/CodeGen/XCore/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target XCore] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/CodeGen/XCore/events.ll b/final/test/CodeGen/XCore/events.ll
new file mode 100644
index 00000000000..4fc2f26d1b6
--- /dev/null
+++ b/final/test/CodeGen/XCore/events.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+declare void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* %p)
+declare i8* @llvm.xcore.waitevent()
+declare void @llvm.xcore.clre()
+
+define i32 @f(i8 addrspace(1)* %r) nounwind {
+; CHECK: f:
+entry:
+; CHECK: clre
+  call void @llvm.xcore.clre()
+  call void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* blockaddress(@f, %L1))
+  call void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* blockaddress(@f, %L2))
+  %goto_addr = call i8* @llvm.xcore.waitevent()
+; CHECK: waiteu
+  indirectbr i8* %goto_addr, [label %L1, label %L2]
+L1:
+  br label %ret
+L2:
+  br label %ret
+ret:
+  %retval = phi i32 [1, %L1], [2, %L2]
+  ret i32 %retval
+}
diff --git a/final/test/CodeGen/XCore/exp.ll b/final/test/CodeGen/XCore/exp.ll
new file mode 100644
index 00000000000..d23d484ed62
--- /dev/null
+++ b/final/test/CodeGen/XCore/exp.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl expf" %t1.s | count 1
+; RUN: grep "bl exp" %t1.s | count 2
+declare double @llvm.exp.f64(double)
+
+define double @test(double %F) {
+        %result = call double @llvm.exp.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.exp.f32(float)
+
+define float @testf(float %F) {
+        %result = call float @llvm.exp.f32(float %F)
+	ret float %result
+}
diff --git a/final/test/CodeGen/XCore/exp2.ll b/final/test/CodeGen/XCore/exp2.ll
new file mode 100644
index 00000000000..4c4d17f4bbf
--- /dev/null
+++ b/final/test/CodeGen/XCore/exp2.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl exp2f" %t1.s | count 1
+; RUN: grep "bl exp2" %t1.s | count 2
+declare double @llvm.exp2.f64(double)
+
+define double @test(double %F) {
+        %result = call double @llvm.exp2.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.exp2.f32(float)
+
+define float @testf(float %F) {
+        %result = call float @llvm.exp2.f32(float %F)
+	ret float %result
+}
diff --git a/final/test/CodeGen/XCore/fneg.ll b/final/test/CodeGen/XCore/fneg.ll
new file mode 100644
index 00000000000..e3dd3dd45c2
--- /dev/null
+++ b/final/test/CodeGen/XCore/fneg.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=xcore | grep "xor" | count 1
+define i1 @test(double %F) nounwind {
+entry:
+	%0 = fsub double -0.000000e+00, %F
+	%1 = fcmp olt double 0.000000e+00, %0
+	ret i1 %1
+}
diff --git a/final/test/CodeGen/XCore/getid.ll b/final/test/CodeGen/XCore/getid.ll
new file mode 100644
index 00000000000..ecab65c0e92
--- /dev/null
+++ b/final/test/CodeGen/XCore/getid.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "get r11, id" %t1.s | count 1 
+declare i32 @llvm.xcore.getid()
+
+define i32 @test() {
+	%result = call i32 @llvm.xcore.getid()
+	ret i32 %result
+}
diff --git a/final/test/CodeGen/XCore/globals.ll b/final/test/CodeGen/XCore/globals.ll
new file mode 100644
index 00000000000..7487561dec9
--- /dev/null
+++ b/final/test/CodeGen/XCore/globals.ll
@@ -0,0 +1,92 @@
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic | FileCheck %s
+
+define i32 *@addr_G1() {
+entry:
+; CHECK: addr_G1:
+; CHECK: ldaw r0, dp[G1]
+	ret i32* @G1
+}
+
+define i32 *@addr_G2() {
+entry:
+; CHECK: addr_G2:
+; CHECK: ldaw r0, dp[G2]
+	ret i32* @G2
+}
+
+define i32 *@addr_G3() {
+entry:
+; CHECK: addr_G3:
+; CHECK: ldaw r11, cp[G3]
+; CHECK: mov r0, r11
+	ret i32* @G3
+}
+
+define i32 **@addr_G4() {
+entry:
+; CHECK: addr_G4:
+; CHECK: ldaw r0, dp[G4]
+	ret i32** @G4
+}
+
+define i32 **@addr_G5() {
+entry:
+; CHECK: addr_G5:
+; CHECK: ldaw r11, cp[G5]
+; CHECK: mov r0, r11
+	ret i32** @G5
+}
+
+define i32 **@addr_G6() {
+entry:
+; CHECK: addr_G6:
+; CHECK: ldaw r0, dp[G6]
+	ret i32** @G6
+}
+
+define i32 **@addr_G7() {
+entry:
+; CHECK: addr_G7:
+; CHECK: ldaw r11, cp[G7]
+; CHECK: mov r0, r11
+	ret i32** @G7
+}
+
+define i32 *@addr_G8() {
+entry:
+; CHECK: addr_G8:
+; CHECK: ldaw r0, dp[G8]
+	ret i32* @G8
+}
+
+@G1 = global i32 4712
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G1:
+
+@G2 = global i32 0
+; CHECK: .section .dp.bss,"awd",@nobits
+; CHECK: G2:
+
+@G3 = unnamed_addr constant i32 9401
+; CHECK: .section .cp.rodata.cst4,"aMc",@progbits,4
+; CHECK: G3:
+
+@G4 = global i32* @G1
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G4:
+
+@G5 = unnamed_addr constant i32* @G1
+; CHECK: .section .cp.rodata,"ac",@progbits
+; CHECK: G5:
+
+@G6 = global i32* @G8
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G6:
+
+@G7 = unnamed_addr constant i32* @G8
+; CHECK: .section .cp.rodata,"ac",@progbits
+; CHECK: G7:
+
+@G8 = internal global i32 9312
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G8:
diff --git a/final/test/CodeGen/XCore/indirectbr.ll b/final/test/CodeGen/XCore/indirectbr.ll
new file mode 100644
index 00000000000..92690029cd0
--- /dev/null
+++ b/final/test/CodeGen/XCore/indirectbr.ll
@@ -0,0 +1,45 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+@nextaddr = global i8* null                       ; <i8**> [#uses=2]
+@C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1]
+
+define internal i32 @foo(i32 %i) nounwind {
+; CHECK: foo:
+entry:
+  %0 = load i8** @nextaddr, align 4               ; <i8*> [#uses=2]
+  %1 = icmp eq i8* %0, null                       ; <i1> [#uses=1]
+  br i1 %1, label %bb3, label %bb2
+
+bb2:                                              ; preds = %entry, %bb3
+  %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; <i8*> [#uses=1]
+; CHECK: bau
+  indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1]
+
+bb3:                                              ; preds = %entry
+  %2 = getelementptr inbounds [5 x i8*]* @C.0.2070, i32 0, i32 %i ; <i8**> [#uses=1]
+  %gotovar.4.0.pre = load i8** %2, align 4        ; <i8*> [#uses=1]
+  br label %bb2
+
+L5:                                               ; preds = %bb2
+  br label %L4
+
+L4:                                               ; preds = %L5, %bb2
+  %res.0 = phi i32 [ 385, %L5 ], [ 35, %bb2 ]     ; <i32> [#uses=1]
+  br label %L3
+
+L3:                                               ; preds = %L4, %bb2
+  %res.1 = phi i32 [ %res.0, %L4 ], [ 5, %bb2 ]   ; <i32> [#uses=1]
+  br label %L2
+
+L2:                                               ; preds = %L3, %bb2
+  %res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ]   ; <i32> [#uses=1]
+  %phitmp = mul i32 %res.2, 6                     ; <i32> [#uses=1]
+  br label %L1
+
+L1:                                               ; preds = %L2, %bb2
+  %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ]  ; <i32> [#uses=1]
+; CHECK: ldap r11, .Ltmp0
+; CHECK: stw r11, dp[nextaddr]
+  store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4
+  ret i32 %res.3
+}
diff --git a/final/test/CodeGen/XCore/ladd_lsub_combine.ll b/final/test/CodeGen/XCore/ladd_lsub_combine.ll
new file mode 100644
index 00000000000..a693ee22291
--- /dev/null
+++ b/final/test/CodeGen/XCore/ladd_lsub_combine.ll
@@ -0,0 +1,67 @@
+; RUN: llvm-as < %s | llc -march=xcore | FileCheck %s
+
+; Only needs one ladd
+define i64 @f1(i32 %x, i32 %y) nounwind {
+entry:
+	%0 = zext i32 %x to i64		; <i64> [#uses=1]
+	%1 = zext i32 %y to i64		; <i64> [#uses=1]
+	%2 = add i64 %1, %0		; <i64> [#uses=1]
+	ret i64 %2
+}
+; CHECK: f1:
+; CHECK: ldc r2, 0
+; CHECK-NEXT: ladd r1, r0, r1, r0, r2
+; CHECK-NEXT: retsp 0
+
+; Only needs one lsub and one neg
+define i64 @f2(i32 %x, i32 %y) nounwind {
+entry:
+	%0 = zext i32 %x to i64		; <i64> [#uses=1]
+	%1 = zext i32 %y to i64		; <i64> [#uses=1]
+	%2 = sub i64 %1, %0		; <i64> [#uses=1]
+	ret i64 %2
+}
+; CHECK: f2:
+; CHECK: ldc r2, 0
+; CHECK-NEXT: lsub r1, r0, r1, r0, r2
+; CHECK-NEXT: neg r1, r1
+; CHECK-NEXT: retsp 0
+
+; Should compile to one ladd and one add
+define i64 @f3(i64 %x, i32 %y) nounwind {
+entry:
+	%0 = zext i32 %y to i64		; <i64> [#uses=1]
+	%1 = add i64 %x, %0		; <i64> [#uses=1]
+	ret i64 %1
+}
+; CHECK: f3:
+; CHECK: ldc r3, 0
+; CHECK-NEXT: ladd r2, r0, r0, r2, r3
+; CHECK-NEXT: add r1, r1, r2
+; CHECK-NEXT: retsp 0
+
+; Should compile to one ladd and one add
+define i64 @f4(i32 %x, i64 %y) nounwind {
+entry:
+	%0 = zext i32 %x to i64		; <i64> [#uses=1]
+	%1 = add i64 %0, %y		; <i64> [#uses=1]
+	ret i64 %1
+}
+; CHECK: f4:
+; CHECK: ldc r3, 0
+; CHECK-NEXT: ladd r1, r0, r0, r1, r3
+; CHECK-NEXT: add r1, r2, r1
+; CHECK-NEXT: retsp 0
+
+; Should compile to one lsub and one sub
+define i64 @f5(i64 %x, i32 %y) nounwind {
+entry:
+	%0 = zext i32 %y to i64		; <i64> [#uses=1]
+	%1 = sub i64 %x, %0		; <i64> [#uses=1]
+	ret i64 %1
+}
+; CHECK: f5:
+; CHECK: ldc r3, 0
+; CHECK-NEXT: lsub r2, r0, r0, r2, r3
+; CHECK-NEXT: sub r1, r1, r2
+; CHECK-NEXT: retsp 0
diff --git a/final/test/CodeGen/XCore/load.ll b/final/test/CodeGen/XCore/load.ll
new file mode 100644
index 00000000000..adfea212a27
--- /dev/null
+++ b/final/test/CodeGen/XCore/load.ll
@@ -0,0 +1,39 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: not grep add %t1.s
+; RUN: not grep ldaw %t1.s
+; RUN: not grep lda16 %t1.s
+; RUN: not grep zext %t1.s
+; RUN: not grep sext %t1.s
+; RUN: grep "ldw" %t1.s | count 2
+; RUN: grep "ld16s" %t1.s | count 1
+; RUN: grep "ld8u" %t1.s | count 1
+
+define i32 @load32(i32* %p, i32 %offset) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 %offset
+	%1 = load i32* %0, align 4
+	ret i32 %1
+}
+
+define i32 @load32_imm(i32* %p) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 11
+	%1 = load i32* %0, align 4
+	ret i32 %1
+}
+
+define i32 @load16(i16* %p, i32 %offset) nounwind {
+entry:
+	%0 = getelementptr i16* %p, i32 %offset
+	%1 = load i16* %0, align 2
+	%2 = sext i16 %1 to i32
+	ret i32 %2
+}
+
+define i32 @load8(i8* %p, i32 %offset) nounwind {
+entry:
+	%0 = getelementptr i8* %p, i32 %offset
+	%1 = load i8* %0, align 1
+	%2 = zext i8 %1 to i32
+	ret i32 %2
+}
diff --git a/final/test/CodeGen/XCore/log.ll b/final/test/CodeGen/XCore/log.ll
new file mode 100644
index 00000000000..a08471f48e4
--- /dev/null
+++ b/final/test/CodeGen/XCore/log.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl logf" %t1.s | count 1
+; RUN: grep "bl log" %t1.s | count 2
+declare double @llvm.log.f64(double)
+
+define double @test(double %F) {
+        %result = call double @llvm.log.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.log.f32(float)
+
+define float @testf(float %F) {
+        %result = call float @llvm.log.f32(float %F)
+	ret float %result
+}
diff --git a/final/test/CodeGen/XCore/log10.ll b/final/test/CodeGen/XCore/log10.ll
new file mode 100644
index 00000000000..a72b8bfaf6b
--- /dev/null
+++ b/final/test/CodeGen/XCore/log10.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl log10f" %t1.s | count 1
+; RUN: grep "bl log10" %t1.s | count 2
+declare double @llvm.log10.f64(double)
+
+define double @test(double %F) {
+        %result = call double @llvm.log10.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.log10.f32(float)
+
+define float @testf(float %F) {
+        %result = call float @llvm.log10.f32(float %F)
+	ret float %result
+}
diff --git a/final/test/CodeGen/XCore/log2.ll b/final/test/CodeGen/XCore/log2.ll
new file mode 100644
index 00000000000..d257433a01a
--- /dev/null
+++ b/final/test/CodeGen/XCore/log2.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl log2f" %t1.s | count 1
+; RUN: grep "bl log2" %t1.s | count 2
+declare double @llvm.log2.f64(double)
+
+define double @test(double %F) {
+        %result = call double @llvm.log2.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.log2.f32(float)
+
+define float @testf(float %F) {
+        %result = call float @llvm.log2.f32(float %F)
+	ret float %result
+}
diff --git a/final/test/CodeGen/XCore/mul64.ll b/final/test/CodeGen/XCore/mul64.ll
new file mode 100644
index 00000000000..1dc94712507
--- /dev/null
+++ b/final/test/CodeGen/XCore/mul64.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+define i64 @umul_lohi(i32 %a, i32 %b) {
+entry:
+	%0 = zext i32 %a to i64
+	%1 = zext i32 %b to i64
+	%2 = mul i64 %1, %0
+	ret i64 %2
+}
+; CHECK: umul_lohi:
+; CHECK: ldc r2, 0
+; CHECK-NEXT: lmul r1, r0, r1, r0, r2, r2
+; CHECK-NEXT: retsp 0
+
+define i64 @smul_lohi(i32 %a, i32 %b) {
+entry:
+	%0 = sext i32 %a to i64
+	%1 = sext i32 %b to i64
+	%2 = mul i64 %1, %0
+	ret i64 %2
+}
+; CHECK: smul_lohi:
+; CHECK: ldc r2, 0
+; CHECK-NEXT: mov r3, r2
+; CHECK-NEXT: maccs r2, r3, r1, r0
+; CHECK-NEXT: mov r0, r3
+; CHECK-NEXT: mov r1, r2
+; CHECK-NEXT: retsp 0
+
+define i64 @mul64(i64 %a, i64 %b) {
+entry:
+	%0 = mul i64 %a, %b
+	ret i64 %0
+}
+; CHECK: mul64:
+; CHECK: ldc r11, 0
+; CHECK-NEXT: lmul r11, r4, r0, r2, r11, r11
+; CHECK-NEXT: mul r0, r0, r3
+; CHECK-NEXT: lmul r0, r1, r1, r2, r11, r0
+; CHECK-NEXT: mov r0, r4
+
+define i64 @mul64_2(i64 %a, i32 %b) {
+entry:
+	%0 = zext i32 %b to i64
+	%1 = mul i64 %a, %0
+	ret i64 %1
+}
+; CHECK: mul64_2:
+; CHECK: ldc r3, 0
+; CHECK-NEXT: lmul r3, r0, r0, r2, r3, r3
+; CHECK-NEXT: mul r1, r1, r2
+; CHECK-NEXT: add r1, r3, r1
+; CHECK-NEXT: retsp 0
diff --git a/final/test/CodeGen/XCore/pow.ll b/final/test/CodeGen/XCore/pow.ll
new file mode 100644
index 00000000000..b461185b7fd
--- /dev/null
+++ b/final/test/CodeGen/XCore/pow.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl powf" %t1.s | count 1
+; RUN: grep "bl pow" %t1.s | count 2
+declare double @llvm.pow.f64(double, double)
+
+define double @test(double %F, double %power) {
+        %result = call double @llvm.pow.f64(double %F, double %power)
+	ret double %result
+}
+
+declare float @llvm.pow.f32(float, float)
+
+define float @testf(float %F, float %power) {
+        %result = call float @llvm.pow.f32(float %F, float %power)
+	ret float %result
+}
diff --git a/final/test/CodeGen/XCore/powi.ll b/final/test/CodeGen/XCore/powi.ll
new file mode 100644
index 00000000000..de31cbed00c
--- /dev/null
+++ b/final/test/CodeGen/XCore/powi.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl __powidf2" %t1.s | count 1
+; RUN: grep "bl __powisf2" %t1.s | count 1
+declare double @llvm.powi.f64(double, i32)
+
+define double @test(double %F, i32 %power) {
+        %result = call double @llvm.powi.f64(double %F, i32 %power)
+	ret double %result
+}
+
+declare float @llvm.powi.f32(float, i32)
+
+define float @testf(float %F, i32 %power) {
+        %result = call float @llvm.powi.f32(float %F, i32 %power)
+	ret float %result
+}
diff --git a/final/test/CodeGen/XCore/private.ll b/final/test/CodeGen/XCore/private.ll
new file mode 100644
index 00000000000..c595a6df495
--- /dev/null
+++ b/final/test/CodeGen/XCore/private.ll
@@ -0,0 +1,21 @@
+; Test to make sure that the 'private' is used correctly.
+;
+; RUN: llc < %s -march=xcore > %t
+; RUN: grep .Lfoo: %t
+; RUN: grep bl.*\.Lfoo %t
+; RUN: grep .Lbaz: %t
+; RUN: grep ldw.*\.Lbaz %t
+
+declare void @foo()
+
+define private void @foo() {
+        ret void
+}
+
+@baz = private global i32 4
+
+define i32 @bar() {
+        call void @foo()
+	%1 = load i32* @baz, align 4
+        ret i32 %1
+}
diff --git a/final/test/CodeGen/XCore/resources.ll b/final/test/CodeGen/XCore/resources.ll
new file mode 100644
index 00000000000..3389912b8c0
--- /dev/null
+++ b/final/test/CodeGen/XCore/resources.ll
@@ -0,0 +1,176 @@
+; RUN: llc -march=xcore < %s | FileCheck %s
+
+declare i8 addrspace(1)* @llvm.xcore.getr.p1i8(i32 %type)
+declare void @llvm.xcore.freer.p1i8(i8 addrspace(1)* %r)
+declare i32 @llvm.xcore.in.p1i8(i8 addrspace(1)* %r)
+declare i32 @llvm.xcore.int.p1i8(i8 addrspace(1)* %r)
+declare i32 @llvm.xcore.inct.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.out.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.outt.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.outct.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.chkct.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.setd.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.setc.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare i32 @llvm.xcore.inshr.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare i32 @llvm.xcore.outshr.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.setpt.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare i32 @llvm.xcore.getts.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.syncr.p1i8(i8 addrspace(1)* %r)
+declare void @llvm.xcore.settw.p1i8(i8 addrspace(1)* %r, i32 %value)
+declare void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* %p)
+declare void @llvm.xcore.eeu.p1i8(i8 addrspace(1)* %r)
+
+define i8 addrspace(1)* @getr() {
+; CHECK: getr:
+; CHECK: getr r0, 5
+	%result = call i8 addrspace(1)* @llvm.xcore.getr.p1i8(i32 5)
+	ret i8 addrspace(1)* %result
+}
+
+define void @freer(i8 addrspace(1)* %r) {
+; CHECK: freer:
+; CHECK: freer res[r0]
+	call void @llvm.xcore.freer.p1i8(i8 addrspace(1)* %r)
+	ret void
+}
+
+define i32 @in(i8 addrspace(1)* %r) {
+; CHECK: in:
+; CHECK: in r0, res[r0]
+	%result = call i32 @llvm.xcore.in.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define i32 @int(i8 addrspace(1)* %r) {
+; CHECK: int:
+; CHECK: int r0, res[r0]
+	%result = call i32 @llvm.xcore.int.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define i32 @inct(i8 addrspace(1)* %r) {
+; CHECK: inct:
+; CHECK: inct r0, res[r0]
+	%result = call i32 @llvm.xcore.inct.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define void @out(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: out:
+; CHECK: out res[r0], r1
+	call void @llvm.xcore.out.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @outt(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: outt:
+; CHECK: outt res[r0], r1
+	call void @llvm.xcore.outt.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @outct(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: outct:
+; CHECK: outct res[r0], r1
+	call void @llvm.xcore.outct.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @outcti(i8 addrspace(1)* %r) {
+; CHECK: outcti:
+; CHECK: outct res[r0], 11
+	call void @llvm.xcore.outct.p1i8(i8 addrspace(1)* %r, i32 11)
+	ret void
+}
+
+define void @chkct(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: chkct:
+; CHECK: chkct res[r0], r1
+	call void @llvm.xcore.chkct.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @chkcti(i8 addrspace(1)* %r) {
+; CHECK: chkcti:
+; CHECK: chkct res[r0], 11
+	call void @llvm.xcore.chkct.p1i8(i8 addrspace(1)* %r, i32 11)
+	ret void
+}
+
+define void @setd(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: setd:
+; CHECK: setd res[r0], r1
+	call void @llvm.xcore.setd.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @setc(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: setc:
+; CHECK: setc res[r0], r1
+	call void @llvm.xcore.setc.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @setci(i8 addrspace(1)* %r) {
+; CHECK: setci:
+; CHECK: setc res[r0], 2
+	call void @llvm.xcore.setc.p1i8(i8 addrspace(1)* %r, i32 2)
+	ret void
+}
+
+define i32 @inshr(i32 %value, i8 addrspace(1)* %r) {
+; CHECK: inshr:
+; CHECK: inshr r0, res[r1]
+	%result = call i32 @llvm.xcore.inshr.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret i32 %result
+}
+
+define i32 @outshr(i32 %value, i8 addrspace(1)* %r) {
+; CHECK: outshr:
+; CHECK: outshr res[r1], r0
+	%result = call i32 @llvm.xcore.outshr.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret i32 %result
+}
+
+define void @setpt(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: setpt:
+; CHECK: setpt res[r0], r1
+	call void @llvm.xcore.setpt.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define i32 @getts(i8 addrspace(1)* %r) {
+; CHECK: getts:
+; CHECK: getts r0, res[r0]
+	%result = call i32 @llvm.xcore.getts.p1i8(i8 addrspace(1)* %r)
+	ret i32 %result
+}
+
+define void @syncr(i8 addrspace(1)* %r) {
+; CHECK: syncr:
+; CHECK: syncr res[r0]
+	call void @llvm.xcore.syncr.p1i8(i8 addrspace(1)* %r)
+	ret void
+}
+
+define void @settw(i8 addrspace(1)* %r, i32 %value) {
+; CHECK: settw:
+; CHECK: settw res[r0], r1
+	call void @llvm.xcore.settw.p1i8(i8 addrspace(1)* %r, i32 %value)
+	ret void
+}
+
+define void @setv(i8 addrspace(1)* %r, i8* %p) {
+; CHECK: setv:
+; CHECK: mov r11, r1
+; CHECK-NEXT: setv res[r0], r11
+	call void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* %p)
+	ret void
+}
+
+define void @eeu(i8 addrspace(1)* %r) {
+; CHECK: eeu:
+; CHECK: eeu res[r0]
+	call void @llvm.xcore.eeu.p1i8(i8 addrspace(1)* %r)
+	ret void
+}
diff --git a/final/test/CodeGen/XCore/sext.ll b/final/test/CodeGen/XCore/sext.ll
new file mode 100644
index 00000000000..9cd4ad66a5c
--- /dev/null
+++ b/final/test/CodeGen/XCore/sext.ll
@@ -0,0 +1,32 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+define i32 @sext1(i32 %a) {
+	%1 = trunc i32 %a to i1
+	%2 = sext i1 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext1:
+; CHECK: sext r0, 1
+
+define i32 @sext2(i32 %a) {
+	%1 = trunc i32 %a to i2
+	%2 = sext i2 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext2:
+; CHECK: sext r0, 2
+
+define i32 @sext8(i32 %a) {
+	%1 = trunc i32 %a to i8
+	%2 = sext i8 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext8:
+; CHECK: sext r0, 8
+
+define i32 @sext16(i32 %a) {
+	%1 = trunc i32 %a to i16
+	%2 = sext i16 %1 to i32
+	ret i32 %2
+}
+; CHECK: sext16:
+; CHECK: sext r0, 16
diff --git a/final/test/CodeGen/XCore/sin.ll b/final/test/CodeGen/XCore/sin.ll
new file mode 100644
index 00000000000..ced026f1d3e
--- /dev/null
+++ b/final/test/CodeGen/XCore/sin.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl sinf" %t1.s | count 1
+; RUN: grep "bl sin" %t1.s | count 2
+declare double @llvm.sin.f64(double)
+
+define double @test(double %F) {
+        %result = call double @llvm.sin.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.sin.f32(float)
+
+define float @testf(float %F) {
+        %result = call float @llvm.sin.f32(float %F)
+	ret float %result
+}
diff --git a/final/test/CodeGen/XCore/sqrt.ll b/final/test/CodeGen/XCore/sqrt.ll
new file mode 100644
index 00000000000..364d1a14c6a
--- /dev/null
+++ b/final/test/CodeGen/XCore/sqrt.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl sqrtf" %t1.s | count 1
+; RUN: grep "bl sqrt" %t1.s | count 2
+declare double @llvm.sqrt.f64(double)
+
+define double @test(double %F) {
+        %result = call double @llvm.sqrt.f64(double %F)
+	ret double %result
+}
+
+declare float @llvm.sqrt.f32(float)
+
+define float @testf(float %F) {
+        %result = call float @llvm.sqrt.f32(float %F)
+	ret float %result
+}
diff --git a/final/test/CodeGen/XCore/store.ll b/final/test/CodeGen/XCore/store.ll
new file mode 100644
index 00000000000..2213743ff89
--- /dev/null
+++ b/final/test/CodeGen/XCore/store.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: not grep add %t1.s
+; RUN: not grep ldaw %t1.s
+; RUN: not grep lda16 %t1.s
+; RUN: grep "stw" %t1.s | count 2
+; RUN: grep "st16" %t1.s | count 1
+; RUN: grep "st8" %t1.s | count 1
+
+define void @store32(i32* %p, i32 %offset, i32 %val) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 %offset
+	store i32 %val, i32* %0, align 4
+	ret void
+}
+
+define void @store32_imm(i32* %p, i32 %val) nounwind {
+entry:
+	%0 = getelementptr i32* %p, i32 11
+	store i32 %val, i32* %0, align 4
+	ret void
+}
+
+define void @store16(i16* %p, i32 %offset, i16 %val) nounwind {
+entry:
+	%0 = getelementptr i16* %p, i32 %offset
+	store i16 %val, i16* %0, align 2
+	ret void
+}
+
+define void @store8(i8* %p, i32 %offset, i8 %val) nounwind {
+entry:
+	%0 = getelementptr i8* %p, i32 %offset
+	store i8 %val, i8* %0, align 1
+	ret void
+}
diff --git a/final/test/CodeGen/XCore/switch.ll b/final/test/CodeGen/XCore/switch.ll
new file mode 100644
index 00000000000..9cc27f2ffaa
--- /dev/null
+++ b/final/test/CodeGen/XCore/switch.ll
@@ -0,0 +1,24 @@
+; RUN: llc -march=xcore < %s | FileCheck %s
+
+define i32 @switch(i32 %i) {
+entry:
+        switch i32 %i, label %default [
+                 i32 0, label %bb0
+                 i32 1, label %bb1
+                 i32 2, label %bb2
+                 i32 3, label %bb3
+        ]
+; CHECK-NOT: shl
+; CHECK: bru
+; CHECK: .jmptable
+bb0:
+        ret i32 0
+bb1:
+        ret i32 1
+bb2:
+        ret i32 2
+bb3:
+        ret i32 3
+default:
+        ret i32 4
+}
diff --git a/final/test/CodeGen/XCore/switch_long.ll b/final/test/CodeGen/XCore/switch_long.ll
new file mode 100644
index 00000000000..30c9e3db739
--- /dev/null
+++ b/final/test/CodeGen/XCore/switch_long.ll
@@ -0,0 +1,132 @@
+; RUN: llc -march=xcore < %s | FileCheck %s
+
+define i32 @switch(i32 %i) {
+entry:
+        switch i32 %i, label %default [
+                 i32 0, label %bb0
+                 i32 1, label %bb1
+                 i32 2, label %bb2
+                 i32 3, label %bb3
+                 i32 4, label %bb4
+                 i32 5, label %bb5
+                 i32 6, label %bb6
+                 i32 7, label %bb7
+                 i32 8, label %bb8
+                 i32 9, label %bb9
+                 i32 10, label %bb10
+                 i32 11, label %bb11
+                 i32 12, label %bb12
+                 i32 13, label %bb13
+                 i32 14, label %bb14
+                 i32 15, label %bb15
+                 i32 16, label %bb16
+                 i32 17, label %bb17
+                 i32 18, label %bb18
+                 i32 19, label %bb19
+                 i32 20, label %bb20
+                 i32 21, label %bb21
+                 i32 22, label %bb22
+                 i32 23, label %bb23
+                 i32 24, label %bb24
+                 i32 25, label %bb25
+                 i32 26, label %bb26
+                 i32 27, label %bb27
+                 i32 28, label %bb28
+                 i32 29, label %bb29
+                 i32 30, label %bb30
+                 i32 31, label %bb31
+                 i32 32, label %bb32
+                 i32 33, label %bb33
+                 i32 34, label %bb34
+                 i32 35, label %bb35
+                 i32 36, label %bb36
+                 i32 37, label %bb37
+                 i32 38, label %bb38
+                 i32 39, label %bb39
+        ]
+; CHECK: shl
+; CHECK: bru
+; CHECK: .jmptable
+bb0:
+        ret i32 0
+bb1:
+        ret i32 1
+bb2:
+        ret i32 2
+bb3:
+        ret i32 3
+bb4:
+        ret i32 4
+bb5:
+        ret i32 5
+bb6:
+        ret i32 6
+bb7:
+        ret i32 7
+bb8:
+        ret i32 8
+bb9:
+        ret i32 9
+bb10:
+        ret i32 0
+bb11:
+        ret i32 1
+bb12:
+        ret i32 2
+bb13:
+        ret i32 3
+bb14:
+        ret i32 4
+bb15:
+        ret i32 5
+bb16:
+        ret i32 6
+bb17:
+        ret i32 7
+bb18:
+        ret i32 8
+bb19:
+        ret i32 9
+bb20:
+        ret i32 0
+bb21:
+        ret i32 1
+bb22:
+        ret i32 2
+bb23:
+        ret i32 3
+bb24:
+        ret i32 4
+bb25:
+        ret i32 5
+bb26:
+        ret i32 6
+bb27:
+        ret i32 7
+bb28:
+        ret i32 8
+bb29:
+        ret i32 9
+bb30:
+        ret i32 0
+bb31:
+        ret i32 1
+bb32:
+        ret i32 2
+bb33:
+        ret i32 3
+bb34:
+        ret i32 4
+bb35:
+        ret i32 5
+bb36:
+        ret i32 6
+bb37:
+        ret i32 7
+bb38:
+        ret i32 8
+bb39:
+        ret i32 9
+default:
+        ret i32 0
+}
diff --git a/final/test/CodeGen/XCore/tls.ll b/final/test/CodeGen/XCore/tls.ll
new file mode 100644
index 00000000000..ed41afae099
--- /dev/null
+++ b/final/test/CodeGen/XCore/tls.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -march=xcore -mcpu=xs1b-generic | FileCheck %s
+
+define i32 *@addr_G() {
+entry:
+; CHECK: addr_G:
+; CHECK: get r11, id
+	ret i32* @G
+}
+
+@G = thread_local global i32 15
+; CHECK: .section .dp.data,"awd",@progbits
+; CHECK: G:
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
+; CHECK: .long 15
diff --git a/final/test/CodeGen/XCore/trampoline.ll b/final/test/CodeGen/XCore/trampoline.ll
new file mode 100644
index 00000000000..18cc45edbf9
--- /dev/null
+++ b/final/test/CodeGen/XCore/trampoline.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=xcore | FileCheck %s
+
+%struct.FRAME.f = type { i32, i32 ()* }
+
+define void @f() nounwind {
+entry:
+; CHECK: f:
+; CHECK ldap r11, g.1101
+; CHECK stw r11, sp[7]
+  %TRAMP.23 = alloca [20 x i8], align 2
+  %FRAME.0 = alloca %struct.FRAME.f, align 4
+  %TRAMP.23.sub = getelementptr inbounds [20 x i8]* %TRAMP.23, i32 0, i32 0
+  %FRAME.02 = bitcast %struct.FRAME.f* %FRAME.0 to i8*
+  %tramp = call i8* @llvm.init.trampoline(i8* %TRAMP.23.sub, i8* bitcast (i32 (%struct.FRAME.f*)* @g.1101 to i8*), i8* %FRAME.02)
+  %0 = getelementptr inbounds %struct.FRAME.f* %FRAME.0, i32 0, i32 1
+  %1 = bitcast i8* %tramp to i32 ()*
+  store i32 ()* %1, i32 ()** %0, align 4
+  %2 = getelementptr inbounds %struct.FRAME.f* %FRAME.0, i32 0, i32 0
+  store i32 1, i32* %2, align 4
+  call void @h(i32 ()* %1) nounwind
+  ret void
+}
+
+define internal i32 @g.1101(%struct.FRAME.f* nocapture nest %CHAIN.1) nounwind readonly {
+entry:
+; CHECK: g.1101:
+; CHECK: ldw r11, sp[0]
+; CHECK-NEXT: ldw r0, r11[0]
+; CHECK-NEXT: retsp 0
+  %0 = getelementptr inbounds %struct.FRAME.f* %CHAIN.1, i32 0, i32 0
+  %1 = load i32* %0, align 4
+  ret i32 %1
+}
+
+declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind
+
+declare void @h(i32 ()*)
diff --git a/final/test/CodeGen/XCore/trap.ll b/final/test/CodeGen/XCore/trap.ll
new file mode 100644
index 00000000000..45f886d332a
--- /dev/null
+++ b/final/test/CodeGen/XCore/trap.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "ecallf" %t1.s | count 1
+; RUN: grep "ldc" %t1.s | count 1
+define i32 @test() noreturn nounwind  {
+entry:
+	tail call void @llvm.trap( )
+	unreachable
+}
+
+declare void @llvm.trap() nounwind 
+
diff --git a/final/test/CodeGen/XCore/unaligned_load.ll b/final/test/CodeGen/XCore/unaligned_load.ll
new file mode 100644
index 00000000000..0ee8e1c3266
--- /dev/null
+++ b/final/test/CodeGen/XCore/unaligned_load.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl __misaligned_load" %t1.s | count 1
+; RUN: grep ld16s %t1.s | count 2
+; RUN: grep ldw %t1.s | count 2
+; RUN: grep shl %t1.s | count 2
+; RUN: grep shr %t1.s | count 1
+; RUN: grep zext %t1.s | count 1
+; RUN: grep "or " %t1.s | count 2
+
+; Byte aligned load. Expands to call to __misaligned_load.
+define i32 @align1(i32* %p) nounwind {
+entry:
+	%0 = load i32* %p, align 1		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+; Half word aligned load. Expands to two 16bit loads.
+define i32 @align2(i32* %p) nounwind {
+entry:
+	%0 = load i32* %p, align 2		; <i32> [#uses=1]
+	ret i32 %0
+}
+
+@a = global [5 x i8] zeroinitializer, align 4
+
+; Constant offset from word aligned base. Expands to two 32bit loads.
+define i32 @align3() nounwind {
+entry:
+	%0 = load i32* bitcast (i8* getelementptr ([5 x i8]* @a, i32 0, i32 1) to i32*), align 1
+	ret i32 %0
+}
diff --git a/final/test/CodeGen/XCore/unaligned_store.ll b/final/test/CodeGen/XCore/unaligned_store.ll
new file mode 100644
index 00000000000..62078e6f607
--- /dev/null
+++ b/final/test/CodeGen/XCore/unaligned_store.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl __misaligned_store" %t1.s | count 1
+; RUN: grep st16 %t1.s | count 2
+; RUN: grep shr %t1.s | count 1
+
+; Byte aligned store. Expands to call to __misaligned_store.
+define void @align1(i32* %p, i32 %val) nounwind {
+entry:
+	store i32 %val, i32* %p, align 1
+	ret void
+}
+
+; Half word aligned store. Expands to two 16bit stores.
+define void @align2(i32* %p, i32 %val) nounwind {
+entry:
+	store i32 %val, i32* %p, align 2
+	ret void
+}
diff --git a/final/test/CodeGen/XCore/unaligned_store_combine.ll b/final/test/CodeGen/XCore/unaligned_store_combine.ll
new file mode 100644
index 00000000000..493ca6a975f
--- /dev/null
+++ b/final/test/CodeGen/XCore/unaligned_store_combine.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -march=xcore > %t1.s
+; RUN: grep "bl memmove" %t1.s | count 1
+; RUN: grep "ldc r., 8" %t1.s | count 1
+
+; Unaligned load / store pair. Should be combined into a memmove
+; of size 8
+define void @f(i64* %dst, i64* %src) nounwind {
+entry:
+	%0 = load i64* %src, align 1
+	store i64 %0, i64* %dst, align 1
+	ret void
+}
diff --git a/final/test/DebugInfo/2009-01-15-dbg_declare.ll b/final/test/DebugInfo/2009-01-15-dbg_declare.ll
new file mode 100644
index 00000000000..ab404afbd8a
--- /dev/null
+++ b/final/test/DebugInfo/2009-01-15-dbg_declare.ll
@@ -0,0 +1,16 @@
+; RUN: llc %s -o /dev/null
+
+        %llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }*, i8*, i8* }
+@llvm.dbg.variable24 = external constant %llvm.dbg.variable.type                ; <%llvm.dbg.variable.type*> [#uses=1]
+
+declare void @llvm.dbg.declare({ }*, { }*) nounwind
+
+define i32 @isascii(i32 %_c) nounwind {
+entry:
+	%j = alloca i32
+	%0 = bitcast i32* %j to { }*
+        call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable24 to { }*))
+        unreachable
+}
+
+
diff --git a/final/test/DebugInfo/2009-01-15-member.ll b/final/test/DebugInfo/2009-01-15-member.ll
new file mode 100644
index 00000000000..a0fb0dbf5ea
--- /dev/null
+++ b/final/test/DebugInfo/2009-01-15-member.ll
@@ -0,0 +1,30 @@
+; RUN: llc %s -o /dev/null
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32, i8*, i8* }
+	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
+	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }*, i8*, i8* }
+	%llvm.dbg.derivedtype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, i8*, i8* }
+	%llvm.dbg.global_variable.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1, { }*, i8*, i8* }
+	%struct.f = type opaque
+	%struct.s = type { %struct.f*, i32 }
+@s2 = common global %struct.s zeroinitializer		; <%struct.s*> [#uses=1]
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str = internal constant [4 x i8] c"t.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"		; <[6 x i8]*> [#uses=1]
+@.str2 = internal constant [57 x i8] c"4.2.1 (Based on Apple Inc. build 5628) (LLVM build 2091)\00", section "llvm.metadata"		; <[57 x i8]*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([57 x i8]* @.str2, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+@.str3 = internal constant [2 x i8] c"s\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@.str4 = internal constant [2 x i8] c"f\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.composite5 = internal constant %llvm.dbg.composite.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str4, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, i64 0, i64 0, i64 0, i32 60, { }* null, { }* null, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite5 to { }*), i8* null, i8* null }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@.str6 = internal constant [3 x i8] c"f1\00", section "llvm.metadata"		; <[3 x i8]*> [#uses=1]
+@llvm.dbg.derivedtype7 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([3 x i8]* @.str6, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, i64 32, i64 32, i64 0, i32 1, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*), i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@.str8 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str8, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5, i8* null, i8* null }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@.str9 = internal constant [2 x i8] c"a\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.derivedtype10 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str9, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 5, i64 32, i64 32, i64 32, i32 1, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.array = internal constant [2 x { }*] [ { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype7 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype10 to { }*) ], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
+@llvm.dbg.composite11 = internal constant %llvm.dbg.composite.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 3, i64 64, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array to { }*), i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@llvm.dbg.global_variables = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 52 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str12 = internal constant [3 x i8] c"s2\00", section "llvm.metadata"		; <[3 x i8]*> [#uses=1]
+@llvm.dbg.global_variable = internal constant %llvm.dbg.global_variable.type { i32 458804, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.global_variables to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([3 x i8]* @.str12, i32 0, i32 0), i8* getelementptr ([3 x i8]* @.str12, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 6, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite11 to { }*), i1 false, i1 true, { }* bitcast (%struct.s* @s2 to { }*), i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.global_variable.type*> [#uses=0]
diff --git a/final/test/DebugInfo/2009-10-08-DebugInfo-NullGlobalVariable.ll b/final/test/DebugInfo/2009-10-08-DebugInfo-NullGlobalVariable.ll
new file mode 100644
index 00000000000..fc281078669
--- /dev/null
+++ b/final/test/DebugInfo/2009-10-08-DebugInfo-NullGlobalVariable.ll
@@ -0,0 +1,72 @@
+; RUN: llc < %s
+
+%struct.TConstantDictionary = type { %struct.__CFDictionary* }
+%struct.TSharedGlobalSet_AS = type { [52 x i32], [20 x i32], [22 x i32], [8 x i32], [20 x i32], [146 x i32] }
+%struct.__CFDictionary = type opaque
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void ()* @func to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define void @func() ssp {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !13)
+  tail call void @llvm.dbg.stoppoint(i32 1001, i32 0, metadata !1)
+  %0 = tail call %struct.TSharedGlobalSet_AS* @g1() nounwind ; <%struct.TSharedGlobalSet_AS*> [#uses=1]
+  %1 = getelementptr inbounds %struct.TSharedGlobalSet_AS* %0, i32 0, i32 4, i32 4 ; <i32*> [#uses=1]
+  %2 = bitcast i32* %1 to %struct.TConstantDictionary* ; <%struct.TConstantDictionary*> [#uses=1]
+  tail call void @g2(%struct.TConstantDictionary* %2) ssp
+  tail call void @llvm.dbg.stoppoint(i32 1002, i32 0, metadata !1)
+  %3 = tail call %struct.TSharedGlobalSet_AS* @g1() nounwind ; <%struct.TSharedGlobalSet_AS*> [#uses=1]
+  %4 = getelementptr inbounds %struct.TSharedGlobalSet_AS* %3, i32 0, i32 4, i32 3 ; <i32*> [#uses=1]
+  %5 = bitcast i32* %4 to %struct.TConstantDictionary* ; <%struct.TConstantDictionary*> [#uses=1]
+  tail call void @g4(%struct.TConstantDictionary* %5) ssp
+  tail call void @llvm.dbg.stoppoint(i32 1003, i32 0, metadata !1)
+  %6 = tail call %struct.TSharedGlobalSet_AS* @g1() nounwind ; <%struct.TSharedGlobalSet_AS*> [#uses=1]
+  %7 = getelementptr inbounds %struct.TSharedGlobalSet_AS* %6, i32 0, i32 4, i32 2 ; <i32*> [#uses=1]
+  %8 = bitcast i32* %7 to %struct.TConstantDictionary* ; <%struct.TConstantDictionary*> [#uses=1]
+  tail call void @g3(%struct.TConstantDictionary* %8) ssp
+  tail call void @llvm.dbg.stoppoint(i32 1004, i32 0, metadata !1)
+  %9 = tail call %struct.TSharedGlobalSet_AS* @g1() nounwind ; <%struct.TSharedGlobalSet_AS*> [#uses=1]
+  %10 = getelementptr inbounds %struct.TSharedGlobalSet_AS* %9, i32 0, i32 4, i32 1 ; <i32*> [#uses=1]
+  %11 = bitcast i32* %10 to %struct.TConstantDictionary* ; <%struct.TConstantDictionary*> [#uses=1]
+  tail call void @g4(%struct.TConstantDictionary* %11) ssp
+  tail call void @llvm.dbg.stoppoint(i32 1005, i32 0, metadata !1)
+  tail call void @g5()
+  tail call void @llvm.dbg.stoppoint(i32 1006, i32 0, metadata !1)
+  tail call void @llvm.dbg.region.end(metadata !13)
+  ret void
+}
+
+declare void @llvm.dbg.func.start(metadata) nounwind readnone
+
+declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
+
+declare void @llvm.dbg.region.end(metadata) nounwind readnone
+
+declare %struct.TSharedGlobalSet_AS* @g1() nounwind readonly ssp
+
+declare void @g2(%struct.TConstantDictionary* nocapture) ssp align 2
+
+declare void @g3(%struct.TConstantDictionary* nocapture) ssp align 2
+
+declare void @g4(%struct.TConstantDictionary* nocapture) ssp align 2
+
+declare void @g5()
+
+!llvm.dbg.gv = !{!0, !9, !10, !11, !12}
+
+!0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"_ZZ7UASInitmmmmmmmmmE5C.408", metadata !"C.408", metadata !"_ZZ7UASInitmmmmmmmmmE5C.408", metadata !1, i32 874, metadata !2, i1 false, i1 true, null}; [DW_TAG_variable ]
+!1 = metadata !{i32 458769, i32 0, i32 4, metadata !"func.cp", metadata !"/tmp/func", metadata !"4.2.1 (Based on Apple Inc. build 5653) (LLVM build 2311)", i1 false, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!2 = metadata !{i32 458753, metadata !3, metadata !"", metadata !3, i32 0, i64 16, i64 16, i64 0, i32 0, metadata !4, metadata !7, i32 0}; [DW_TAG_array_type ]
+!3 = metadata !{i32 458769, i32 0, i32 4, metadata !"testcase.ii", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5653) (LLVM build 2311)", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!4 = metadata !{i32 458774, metadata !3, metadata !"UniChar", metadata !5, i32 417, i64 0, i64 0, i64 0, i32 0, metadata !6}; [DW_TAG_typedef ]
+!5 = metadata !{i32 458769, i32 0, i32 4, metadata !"MacTypes.h", metadata !"/System/Library/Frameworks/CoreServices.framework/Headers/../Frameworks/CarbonCore.framework/Headers", metadata !"4.2.1 (Based on Apple Inc. build 5653) (LLVM build 2311)", i1 false, i1 true, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!6 = metadata !{i32 458788, metadata !3, metadata !"short unsigned int", metadata !3, i32 0, i64 16, i64 16, i64 0, i32 0, i32 7}; [DW_TAG_base_type ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 458785, i64 0, i64 0}; [DW_TAG_subrange_type ]
+!9 = metadata !{i32 458804, i32 0, metadata !1, metadata !"_ZZ7UASInitmmmmmmmmmE5C.409", metadata !"C.409", metadata !"_ZZ7UASInitmmmmmmmmmE5C.409", metadata !1, i32 877, metadata !2, i1 false, i1 true, null}; [DW_TAG_variable ]
+!10 = metadata !{i32 458804, i32 0, metadata !1, metadata !"_ZZ7UASInitmmmmmmmmmE5C.410", metadata !"C.410", metadata !"_ZZ7UASInitmmmmmmmmmE5C.410", metadata !1, i32 880, metadata !2, i1 false, i1 true, null}; [DW_TAG_variable ]
+!11 = metadata !{i32 458804, i32 0, metadata !1, metadata !"_ZZ7UASInitmmmmmmmmmE5C.411", metadata !"C.411", metadata !"_ZZ7UASInitmmmmmmmmmE5C.411", metadata !1, i32 924, metadata !2, i1 false, i1 true, null}; [DW_TAG_variable ]
+!12 = metadata !{i32 458804, i32 0, metadata !1, metadata !"_ZZ7UASInitmmmmmmmmmE5C.412", metadata !"C.412", metadata !"_ZZ7UASInitmmmmmmmmmE5C.412", metadata !1, i32 928, metadata !2, i1 false, i1 true, null}; [DW_TAG_variable ]
+!13 = metadata !{i32 458798, i32 0, metadata !3, metadata !"UASShutdown", metadata !"UASShutdown", metadata !"_Z11UASShutdownv", metadata !1, i32 999, metadata !14, i1 false, i1 true}; [DW_TAG_subprogram ]
+!14 = metadata !{i32 458773, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0}; [DW_TAG_subroutine_type ]
+!15 = metadata !{null}
diff --git a/final/test/DebugInfo/2009-10-16-Phi.ll b/final/test/DebugInfo/2009-10-16-Phi.ll
new file mode 100644
index 00000000000..0f799e3a789
--- /dev/null
+++ b/final/test/DebugInfo/2009-10-16-Phi.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as %s -disable-output
+
+define i32 @foo() {
+E:
+   br label %B2
+B1:
+   br label %B2
+B2:
+   %0 = phi i32 [ 0, %E ], [ 1, %B1 ], !dbg !0
+   ret i32 %0
+}
+
+!0 = metadata !{i32 42}
diff --git a/final/test/DebugInfo/2009-11-03-InsertExtractValue.ll b/final/test/DebugInfo/2009-11-03-InsertExtractValue.ll
new file mode 100644
index 00000000000..8782e4446f4
--- /dev/null
+++ b/final/test/DebugInfo/2009-11-03-InsertExtractValue.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+!0 = metadata !{i32 42}
+
+define <{i32, i32}> @f1() {
+; CHECK: !dbgx !0
+  %r = insertvalue <{ i32, i32 }> zeroinitializer, i32 4, 1, !dbgx !0
+; CHECK: !dbgx !0
+  %e = extractvalue <{ i32, i32 }> %r, 0, !dbgx !0
+  ret <{ i32, i32 }> %r
+}
diff --git a/final/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll b/final/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
new file mode 100644
index 00000000000..c7838c54884
--- /dev/null
+++ b/final/test/DebugInfo/2009-11-05-DeadGlobalVariable.ll
@@ -0,0 +1,17 @@
+; RUN: llc %s -o /dev/null
+; Here variable bar is optimzied away. Do not trip over while trying to generate debug info.
+
+define i32 @foo() nounwind readnone optsize ssp {
+entry:
+  ret i32 42, !dbg !6
+}
+
+!llvm.dbg.gv = !{!0}
+
+!0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"foo.bar", metadata !"foo.bar", metadata !"foo.bar", metadata !2, i32 3, metadata !5, i1 true, i1 true, null}; [DW_TAG_variable ]
+!1 = metadata !{i32 458798, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !3, i1 false, i1 true}; [DW_TAG_subprogram ]
+!2 = metadata !{i32 458769, i32 0, i32 12, metadata !"st.c", metadata !"/private/tmp", metadata !"clang 1.1", i1 true, i1 true, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!3 = metadata !{i32 458773, metadata !2, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0}; [DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 458788, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
+!6 = metadata !{i32 5, i32 1, metadata !1, null}
diff --git a/final/test/DebugInfo/2009-11-06-InvalidDerivedType.ll b/final/test/DebugInfo/2009-11-06-InvalidDerivedType.ll
new file mode 100644
index 00000000000..73211bb2635
--- /dev/null
+++ b/final/test/DebugInfo/2009-11-06-InvalidDerivedType.ll
@@ -0,0 +1,13 @@
+; RUN: llc %s -o /dev/null
+%struct._t = type { i32 }
+
+@s1 = common global %struct._t zeroinitializer, align 4 ; <%struct._t*> [#uses=0]
+
+!llvm.dbg.gv = !{!0}
+
+!0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"s1", metadata !"s1", metadata !"s1", metadata !1, i32 3, metadata !2, i1 false, i1 true, %struct._t* @s1}; [DW_TAG_variable ]
+!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"t.c", metadata !"/tmp", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!2 = metadata !{i32 458771, metadata !1, metadata !"_t", metadata !1, i32 1, i64 32, i64 32, i64 0, i32 0, null, metadata !3, i32 0}; [DW_TAG_structure_type ]
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 458765, metadata !1, metadata !"j", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !5}; [DW_TAG_member ]
+!5 = metadata !{i32 458790, metadata !1, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null}; [DW_TAG_const_type ]
diff --git a/final/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll b/final/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
new file mode 100644
index 00000000000..739def8051f
--- /dev/null
+++ b/final/test/DebugInfo/2009-11-06-NamelessGlobalVariable.ll
@@ -0,0 +1,8 @@
+; RUN: llc %s -o /dev/null
+@0 = internal constant i32 1                      ; <i32*> [#uses=1]
+
+!llvm.dbg.gv = !{!0}
+
+!0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"", metadata !"", metadata !"", metadata !1, i32 378, metadata !2, i1 true, i1 true, i32* @0}; [DW_TAG_variable ]
+!1 = metadata !{i32 458769, i32 0, i32 1, metadata !"cbdsqr.f", metadata !"/home/duncan/LLVM/dragonegg/unsolved/", metadata !"4.5.0 20091030 (experimental)", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!2 = metadata !{i32 458788, metadata !1, metadata !"integer(kind=4)", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
diff --git a/final/test/DebugInfo/2009-11-10-CurrentFn.ll b/final/test/DebugInfo/2009-11-10-CurrentFn.ll
new file mode 100644
index 00000000000..250395c3763
--- /dev/null
+++ b/final/test/DebugInfo/2009-11-10-CurrentFn.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -o /dev/null
+
+declare void @foo()
+
+define void @bar(i32 %i) nounwind ssp {
+entry:
+  tail call void @foo() nounwind, !dbg !0
+  ret void, !dbg !6
+}
+
+!0 = metadata !{i32 9, i32 0, metadata !1, null}
+!1 = metadata !{i32 458798, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !3, i1 true, i1 true}; [DW_TAG_subprogram ]
+!2 = metadata !{i32 458769, i32 0, i32 1, metadata !"2007-12-VarArrayDebug.c", metadata !"/Volumes/Data/ddunbar/llvm/test/FrontendC", metadata !"4.2.1 (Based on Apple Inc. build 5653) (LLVM build)", i1 true, i1 true, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!3 = metadata !{i32 458773, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0}; [DW_TAG_subroutine_type ]
+!4 = metadata !{null, metadata !5}
+!5 = metadata !{i32 458788, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
+!6 = metadata !{i32 18, i32 0, metadata !7, null}
+!7 = metadata !{i32 458798, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 16, metadata !8, i1 false, i1 true}; [DW_TAG_subprogram ]
+!8 = metadata !{i32 458773, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0}; [DW_TAG_subroutine_type ]
+!9 = metadata !{null}
diff --git a/final/test/DebugInfo/2009-11-10-ParentScope.ll b/final/test/DebugInfo/2009-11-10-ParentScope.ll
new file mode 100644
index 00000000000..df5155f7de4
--- /dev/null
+++ b/final/test/DebugInfo/2009-11-10-ParentScope.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s -o /dev/null
+%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i64, i64, i64, i32, i32, i8* (i64, i64)*, void (i8*)*, i8*, i8* (i8*, i64, i64)*, void (i8*, i8*)*, i32, [4 x i8] }
+
+define i8* @htab_find_with_hash(%struct.htab* %htab, i8* %element, i32 %hash) nounwind {
+entry:
+  br i1 undef, label %land.lhs.true, label %if.end, !dbg !0
+
+land.lhs.true:                                    ; preds = %entry
+  unreachable
+
+if.end:                                           ; preds = %entry
+  store i8* undef, i8** undef, !dbg !7
+  ret i8* undef, !dbg !10
+}
+
+!0 = metadata !{i32 571, i32 3, metadata !1, null}
+!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"htab_find_with_hash", metadata !"htab_find_with_hash", metadata !"htab_find_with_hash", metadata !3, i32 561, metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ]
+!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"hashtab.c", metadata !"/usr/src/gnu/usr.bin/cc/cc_tools/../../../../contrib/gcclibs/libiberty", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 458767, metadata !3, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, null}; [DW_TAG_pointer_type ]
+!7 = metadata !{i32 583, i32 7, metadata !8, null}
+!8 = metadata !{i32 458763, metadata !9}; [DW_TAG_lexical_block ]
+!9 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
+!10 = metadata !{i32 588, i32 1, metadata !2, null}
diff --git a/final/test/DebugInfo/2010-01-05-DbgScope.ll b/final/test/DebugInfo/2010-01-05-DbgScope.ll
new file mode 100644
index 00000000000..8cf20e3146a
--- /dev/null
+++ b/final/test/DebugInfo/2010-01-05-DbgScope.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -o /dev/null
+; PR 5942
+define i8* @foo() nounwind {
+entry:
+  %0 = load i32* undef, align 4, !dbg !0          ; <i32> [#uses=1]
+  %1 = inttoptr i32 %0 to i8*, !dbg !0            ; <i8*> [#uses=1]
+  ret i8* %1, !dbg !10
+
+}
+
+!0 = metadata !{i32 571, i32 3, metadata !1, null}
+!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", metadata !3, i32 561, metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ]
+!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"hashtab.c", metadata !"/usr/src/gnu/usr.bin/cc/cc_tools/../../../../contrib/gcclibs/libiberty", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 458788, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 588, i32 1, metadata !2, null}
diff --git a/final/test/DebugInfo/2010-01-19-DbgScope.ll b/final/test/DebugInfo/2010-01-19-DbgScope.ll
new file mode 100644
index 00000000000..7afb5a500fb
--- /dev/null
+++ b/final/test/DebugInfo/2010-01-19-DbgScope.ll
@@ -0,0 +1,28 @@
+; RUN: llc -O0 < %s -o /dev/null
+; Ignore unreachable scopes.
+declare void @foo(i32) noreturn
+
+define i32 @bar() nounwind ssp {
+entry:
+  br i1 undef, label %bb, label %bb11, !dbg !0
+
+bb:                                               ; preds = %entry
+  call void @foo(i32 0) noreturn nounwind, !dbg !7
+  unreachable, !dbg !7
+
+bb11:                                             ; preds = %entry
+  ret i32 1, !dbg !11
+}
+
+!0 = metadata !{i32 8647, i32 0, metadata !1, null}
+!1 = metadata !{i32 458763, metadata !2}          ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", metadata !3, i32 8639, metadata !4, i1 true, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 458769, i32 0, i32 1, metadata !"c-parser.c", metadata !"llvmgcc", metadata !"LLVM build 00", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 458773, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 458788, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 8648, i32 0, metadata !8, null}
+!8 = metadata !{i32 458763, metadata !9}          ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 458763, metadata !10}         ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar2", metadata !"bar2", metadata !"bar2", metadata !3, i32 8639, metadata !4, i1 true, i1 true, i32 0, i32 0, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 8652, i32 0, metadata !1, null}
diff --git a/final/test/DebugInfo/2010-03-12-llc-crash.ll b/final/test/DebugInfo/2010-03-12-llc-crash.ll
new file mode 100644
index 00000000000..f6de2349585
--- /dev/null
+++ b/final/test/DebugInfo/2010-03-12-llc-crash.ll
@@ -0,0 +1,20 @@
+; RUN: llc -O0 < %s -o /dev/null
+; llc should not crash on this invalid input.
+; PR6588
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define void @foo() {
+entry:
+  call void @llvm.dbg.declare(metadata !{i32* undef}, metadata !0)
+  ret void
+}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"sy", metadata !2, i32 890, metadata !7} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 892, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"qpainter.h", metadata !"QtGui", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 4, metadata !"splineeditor.cpp", metadata !"editor", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !5, metadata !"", metadata !5, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{i32 524329, metadata !"splineeditor.cpp", metadata !"src", metadata !3} ; [ DW_TAG_file_type ]
+!6 = metadata !{null}
+!7 = metadata !{i32 524324, metadata !5, metadata !"int", metadata !5, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+
diff --git a/final/test/DebugInfo/2010-03-19-DbgDeclare.ll b/final/test/DebugInfo/2010-03-19-DbgDeclare.ll
new file mode 100644
index 00000000000..1f7a889c7d7
--- /dev/null
+++ b/final/test/DebugInfo/2010-03-19-DbgDeclare.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | opt -verify -disable-output
+
+define void @Foo(i32 %a, i32 %b) {
+entry:
+  call void @llvm.dbg.declare(metadata !{i32* null}, metadata !1)
+  ret void
+}
+
+!0 = metadata !{i32 662302, i32 26, metadata !1, null}
+!1 = metadata !{i32 4, metadata !"foo"}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
diff --git a/final/test/DebugInfo/2010-03-24-MemberFn.ll b/final/test/DebugInfo/2010-03-24-MemberFn.ll
new file mode 100644
index 00000000000..20c0b8ee009
--- /dev/null
+++ b/final/test/DebugInfo/2010-03-24-MemberFn.ll
@@ -0,0 +1,62 @@
+; RUN: llc -O0 < %s | grep AT_decl_file |  grep 2
+; Here _ZN1S3fooEv is defined in header file identified as AT_decl_file no. 2 in debug info.
+%struct.S = type <{ i8 }>
+
+define i32 @_Z3barv() nounwind ssp {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+  %s1 = alloca %struct.S                          ; <%struct.S*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.S* %s1}, metadata !0), !dbg !16
+  %1 = call i32 @_ZN1S3fooEv(%struct.S* %s1) nounwind, !dbg !17 ; <i32> [#uses=1]
+  store i32 %1, i32* %0, align 4, !dbg !17
+  %2 = load i32* %0, align 4, !dbg !17            ; <i32> [#uses=1]
+  store i32 %2, i32* %retval, align 4, !dbg !17
+  br label %return, !dbg !17
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval, !dbg !17          ; <i32> [#uses=1]
+  ret i32 %retval1, !dbg !16
+}
+
+define linkonce_odr i32 @_ZN1S3fooEv(%struct.S* %this) nounwind ssp align 2 {
+entry:
+  %this_addr = alloca %struct.S*                  ; <%struct.S**> [#uses=1]
+  %retval = alloca i32                            ; <i32*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.S** %this_addr}, metadata !18), !dbg !21
+  store %struct.S* %this, %struct.S** %this_addr
+  br label %return, !dbg !21
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval, !dbg !21          ; <i32> [#uses=1]
+  ret i32 %retval1, !dbg !22
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!0 = metadata !{i32 524544, metadata !1, metadata !"s1", metadata !4, i32 3, metadata !9} ; [ DW_TAG_auto_variable ]
+!1 = metadata !{i32 524299, metadata !2, i32 3, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 524299, metadata !3, i32 3, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"bar", metadata !"bar", metadata !"_Z3barv", metadata !4, i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 524329, metadata !"one.cc", metadata !"/tmp/", metadata !5} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 524305, i32 0, i32 4, metadata !"one.cc", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 524324, metadata !4, metadata !"int", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524307, metadata !4, metadata !"S", metadata !10, i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!10 = metadata !{i32 524329, metadata !"one.h", metadata !"/tmp/", metadata !5} ; [ DW_TAG_file_type ]
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 524334, i32 0, metadata !9, metadata !"foo", metadata !"foo", metadata !"_ZN1S3fooEv", metadata !10, i32 3, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!13 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!14 = metadata !{metadata !8, metadata !15}
+!15 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !9} ; [ DW_TAG_pointer_type ]
+!16 = metadata !{i32 3, i32 0, metadata !1, null}
+!17 = metadata !{i32 3, i32 0, metadata !3, null}
+!18 = metadata !{i32 524545, metadata !12, metadata !"this", metadata !10, i32 3, metadata !19} ; [ DW_TAG_arg_variable ]
+!19 = metadata !{i32 524326, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !20} ; [ DW_TAG_const_type ]
+!20 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
+!21 = metadata !{i32 3, i32 0, metadata !12, null}
+!22 = metadata !{i32 3, i32 0, metadata !23, null}
+!23 = metadata !{i32 524299, metadata !12, i32 3, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/final/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll b/final/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
new file mode 100644
index 00000000000..9bb35fab4fe
--- /dev/null
+++ b/final/test/DebugInfo/2010-03-30-InvalidDbgInfoCrash.ll
@@ -0,0 +1,30 @@
+; RUN: llc < %s -o /dev/null
+
+define void @baz(i32 %i) nounwind ssp {
+entry:
+  call void @llvm.dbg.declare(metadata !0, metadata !1), !dbg !0
+  ret void, !dbg !0
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!0 = metadata !{{ [0 x i8] }** undef}
+!1 = metadata !{i32 524544, metadata !2, metadata !"x", metadata !4, i32 11, metadata !9} ; [ DW_TAG_auto_variable ]
+!2 = metadata !{i32 524299, metadata !3, i32 8, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"baz", metadata !"baz", metadata !"baz", metadata !4, i32 8, metadata !6, i1 true, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 524329, metadata !"2007-12-VarArrayDebug.c", metadata !"/Users/sabre/llvm/test/FrontendC/", metadata !5} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 524305, i32 0, i32 1, metadata !"2007-12-VarArrayDebug.c", metadata !"/Users/sabre/llvm/test/FrontendC/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{null, metadata !8}
+!8 = metadata !{i32 524324, metadata !4, metadata !"int", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524303, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 524307, metadata !3, metadata !"", metadata !4, i32 11, i64 8, i64 8, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 524301, metadata !10, metadata !"b", metadata !4, i32 11, i64 8, i64 8, i64 0, i32 0, metadata !13} ; [ DW_TAG_member ]
+!13 = metadata !{i32 524310, metadata !3, metadata !"A", metadata !4, i32 11, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_typedef ]
+!14 = metadata !{i32 524289, metadata !4, metadata !"", metadata !4, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !15, metadata !16, i32 0, null} ; [ DW_TAG_array_type ]
+!15 = metadata !{i32 524324, metadata !4, metadata !"char", metadata !4, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!16 = metadata !{metadata !17}
+!17 = metadata !{i32 524321, i64 0, i64 0}        ; [ DW_TAG_subrange_type ]
+!18 = metadata !{metadata !"llvm.mdnode.fwdref.19"}
+!19 = metadata !{metadata !"llvm.mdnode.fwdref.23"}
diff --git a/final/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll b/final/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
new file mode 100644
index 00000000000..dd6c5a965eb
--- /dev/null
+++ b/final/test/DebugInfo/2010-04-06-NestedFnDbgInfo.ll
@@ -0,0 +1,89 @@
+; RUN: llvm-as < %s | llc -asm-verbose -O0 | grep AT_specification | count 2
+; Radar 7833483
+; Do not emit AT_specification for nested function foo.
+
+%class.A = type { i8 }
+%class.B = type { i8 }
+
+define i32 @main() ssp {
+entry:
+  %retval = alloca i32, align 4                   ; <i32*> [#uses=3]
+  %b = alloca %class.A, align 1                   ; <%class.A*> [#uses=1]
+  store i32 0, i32* %retval
+  call void @llvm.dbg.declare(metadata !{%class.A* %b}, metadata !0), !dbg !14
+  %call = call i32 @_ZN1B2fnEv(%class.A* %b), !dbg !15 ; <i32> [#uses=1]
+  store i32 %call, i32* %retval, !dbg !15
+  %0 = load i32* %retval, !dbg !16                ; <i32> [#uses=1]
+  ret i32 %0, !dbg !16
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define linkonce_odr i32 @_ZN1B2fnEv(%class.A* %this) ssp align 2 {
+entry:
+  %retval = alloca i32, align 4                   ; <i32*> [#uses=2]
+  %this.addr = alloca %class.A*, align 8          ; <%class.A**> [#uses=2]
+  %a = alloca %class.A, align 1                   ; <%class.A*> [#uses=1]
+  %i = alloca i32, align 4                        ; <i32*> [#uses=2]
+  store %class.A* %this, %class.A** %this.addr
+  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !17), !dbg !18
+  %this1 = load %class.A** %this.addr             ; <%class.A*> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%class.A* %a}, metadata !19), !dbg !27
+  call void @llvm.dbg.declare(metadata !{i32* %i}, metadata !28), !dbg !29
+  %call = call i32 @_ZZN1B2fnEvEN1A3fooEv(%class.A* %a), !dbg !30 ; <i32> [#uses=1]
+  store i32 %call, i32* %i, !dbg !30
+  %tmp = load i32* %i, !dbg !31                   ; <i32> [#uses=1]
+  store i32 %tmp, i32* %retval, !dbg !31
+  %0 = load i32* %retval, !dbg !32                ; <i32> [#uses=1]
+  ret i32 %0, !dbg !32
+}
+
+define internal i32 @_ZZN1B2fnEvEN1A3fooEv(%class.A* %this) ssp align 2 {
+entry:
+  %retval = alloca i32, align 4                   ; <i32*> [#uses=2]
+  %this.addr = alloca %class.A*, align 8          ; <%class.A**> [#uses=2]
+  store %class.A* %this, %class.A** %this.addr
+  call void @llvm.dbg.declare(metadata !{%class.A** %this.addr}, metadata !33), !dbg !34
+  %this1 = load %class.A** %this.addr             ; <%class.A*> [#uses=0]
+  store i32 42, i32* %retval, !dbg !35
+  %0 = load i32* %retval, !dbg !35                ; <i32> [#uses=1]
+  ret i32 %0, !dbg !35
+}
+
+!0 = metadata !{i32 524544, metadata !1, metadata !"b", metadata !3, i32 16, metadata !8} ; [ DW_TAG_auto_variable ]
+!1 = metadata !{i32 524299, metadata !2, i32 15, i32 12} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 524334, i32 0, metadata !3, metadata !"main", metadata !"main", metadata !"main", metadata !3, i32 15, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 524329, metadata !"one.cc", metadata !"/tmp", metadata !4} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 524305, i32 0, i32 4, metadata !"one.cc", metadata !"/tmp", metadata !"clang 1.5", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 524324, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 524290, metadata !3, metadata !"B", metadata !3, i32 2, i64 8, i64 8, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_class_type ]
+!9 = metadata !{metadata !10}
+!10 = metadata !{i32 524334, i32 0, metadata !8, metadata !"fn", metadata !"fn", metadata !"_ZN1B2fnEv", metadata !3, i32 4, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{metadata !7, metadata !13}
+!13 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
+!14 = metadata !{i32 16, i32 5, metadata !1, null}
+!15 = metadata !{i32 17, i32 3, metadata !1, null}
+!16 = metadata !{i32 18, i32 1, metadata !2, null}
+!17 = metadata !{i32 524545, metadata !10, metadata !"this", metadata !3, i32 4, metadata !13} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 4, i32 7, metadata !10, null}
+!19 = metadata !{i32 524544, metadata !20, metadata !"a", metadata !3, i32 9, metadata !21} ; [ DW_TAG_auto_variable ]
+!20 = metadata !{i32 524299, metadata !10, i32 4, i32 12} ; [ DW_TAG_lexical_block ]
+!21 = metadata !{i32 524290, metadata !10, metadata !"A", metadata !3, i32 5, i64 8, i64 8, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_class_type ]
+!22 = metadata !{metadata !23}
+!23 = metadata !{i32 524334, i32 0, metadata !21, metadata !"foo", metadata !"foo", metadata !"_ZZN1B2fnEvEN1A3fooEv", metadata !3, i32 7, metadata !24, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!24 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !25, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!25 = metadata !{metadata !7, metadata !26}
+!26 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !21} ; [ DW_TAG_pointer_type ]
+!27 = metadata !{i32 9, i32 7, metadata !20, null}
+!28 = metadata !{i32 524544, metadata !20, metadata !"i", metadata !3, i32 10, metadata !7} ; [ DW_TAG_auto_variable ]
+!29 = metadata !{i32 10, i32 9, metadata !20, null}
+!30 = metadata !{i32 10, i32 5, metadata !20, null}
+!31 = metadata !{i32 11, i32 5, metadata !20, null}
+!32 = metadata !{i32 12, i32 3, metadata !10, null}
+!33 = metadata !{i32 524545, metadata !23, metadata !"this", metadata !3, i32 7, metadata !26} ; [ DW_TAG_arg_variable ]
+!34 = metadata !{i32 7, i32 11, metadata !23, null}
+!35 = metadata !{i32 7, i32 19, metadata !36, null}
+!36 = metadata !{i32 524299, metadata !23, i32 7, i32 17} ; [ DW_TAG_lexical_block ]
diff --git a/final/test/DebugInfo/2010-04-13-PubType.ll b/final/test/DebugInfo/2010-04-13-PubType.ll
new file mode 100644
index 00000000000..371169fe183
--- /dev/null
+++ b/final/test/DebugInfo/2010-04-13-PubType.ll
@@ -0,0 +1,47 @@
+; RUN: llc -O0 -asm-verbose < %s > %t
+; RUN: grep "External Name" %t | grep -v X
+; RUN: grep "External Name" %t | grep Y | count 1
+; Test to check type with no defintion is listed in pubtypes section.
+%struct.X = type opaque
+%struct.Y = type { i32 }
+
+define i32 @foo(%struct.X* %x, %struct.Y* %y) nounwind ssp {
+entry:
+  %x_addr = alloca %struct.X*                     ; <%struct.X**> [#uses=1]
+  %y_addr = alloca %struct.Y*                     ; <%struct.Y**> [#uses=1]
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.X** %x_addr}, metadata !0), !dbg !13
+  store %struct.X* %x, %struct.X** %x_addr
+  call void @llvm.dbg.declare(metadata !{%struct.Y** %y_addr}, metadata !14), !dbg !13
+  store %struct.Y* %y, %struct.Y** %y_addr
+  store i32 0, i32* %0, align 4, !dbg !13
+  %1 = load i32* %0, align 4, !dbg !13            ; <i32> [#uses=1]
+  store i32 %1, i32* %retval, align 4, !dbg !13
+  br label %return, !dbg !13
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval, !dbg !13          ; <i32> [#uses=1]
+  ret i32 %retval1, !dbg !15
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"x", metadata !2, i32 7, metadata !7} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"a.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"a.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !7, metadata !9}
+!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 524307, metadata !2, metadata !"X", metadata !2, i32 3, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null} ; [ DW_TAG_structure_type ]
+!9 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 524307, metadata !2, metadata !"Y", metadata !2, i32 4, i64 32, i64 32, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_structure_type ]
+!11 = metadata !{metadata !12}
+!12 = metadata !{i32 524301, metadata !10, metadata !"x", metadata !2, i32 5, i64 32, i64 32, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ]
+!13 = metadata !{i32 7, i32 0, metadata !1, null}
+!14 = metadata !{i32 524545, metadata !1, metadata !"y", metadata !2, i32 7, metadata !9} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 7, i32 0, metadata !16, null}
+!16 = metadata !{i32 524299, metadata !1, i32 7, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/final/test/DebugInfo/2010-04-19-FramePtr.ll b/final/test/DebugInfo/2010-04-19-FramePtr.ll
new file mode 100644
index 00000000000..30031219d4e
--- /dev/null
+++ b/final/test/DebugInfo/2010-04-19-FramePtr.ll
@@ -0,0 +1,30 @@
+; RUN: llc -asm-verbose -O0 -o %t < %s 
+; RUN: grep DW_AT_APPLE_omit_frame_ptr %t
+; RUN: llc -disable-fp-elim -asm-verbose -O0 -o %t < %s 
+; RUN: grep -v DW_AT_APPLE_omit_frame_ptr %t
+
+
+define i32 @foo() nounwind ssp {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 42, i32* %0, align 4, !dbg !0
+  %1 = load i32* %0, align 4, !dbg !0             ; <i32> [#uses=1]
+  store i32 %1, i32* %retval, align 4, !dbg !0
+  br label %return, !dbg !0
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval, !dbg !0           ; <i32> [#uses=1]
+  ret i32 %retval1, !dbg !7
+}
+
+!0 = metadata !{i32 2, i32 0, metadata !1, null}
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"foo", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"a.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"a.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6}
+!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 2, i32 0, metadata !8, null}
+!8 = metadata !{i32 524299, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/final/test/DebugInfo/2010-04-25-CU-entry_pc.ll b/final/test/DebugInfo/2010-04-25-CU-entry_pc.ll
new file mode 100644
index 00000000000..de099b6b9c5
--- /dev/null
+++ b/final/test/DebugInfo/2010-04-25-CU-entry_pc.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s | grep entry_pc | count 2
+@i = global i32 1                                 ; <i32*> [#uses=0]
+
+!llvm.dbg.gv = !{!0}
+
+!0 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"", metadata !1, i32 1, metadata !3, i1 false, i1 true, i32* @i} ; [ DW_TAG_variable ]
+!1 = metadata !{i32 524329, metadata !"b.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"b.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/final/test/DebugInfo/2010-05-03-DisableFramePtr.ll b/final/test/DebugInfo/2010-05-03-DisableFramePtr.ll
new file mode 100644
index 00000000000..4061bdc834d
--- /dev/null
+++ b/final/test/DebugInfo/2010-05-03-DisableFramePtr.ll
@@ -0,0 +1,34 @@
+; RUN: llc  -o /dev/null -disable-non-leaf-fp-elim < %s
+; Radar 7937664
+%struct.AppleEvent = type opaque
+
+define void @DisposeDMNotificationUPP(void (%struct.AppleEvent*)* %userUPP) nounwind ssp {
+entry:
+  %userUPP_addr = alloca void (%struct.AppleEvent*)* ; <void (%struct.AppleEvent*)**> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{void (%struct.AppleEvent*)** %userUPP_addr}, metadata !0), !dbg !13
+  store void (%struct.AppleEvent*)* %userUPP, void (%struct.AppleEvent*)** %userUPP_addr
+  br label %return, !dbg !14
+
+return:                                           ; preds = %entry
+  ret void, !dbg !14
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"userUPP", metadata !2, i32 7, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"DisposeDMNotificationUPP", metadata !"DisposeDMNotificationUPP", metadata !"DisposeDMNotificationUPP", metadata !2, i32 7, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"t.c", metadata !"/Users/echeng/LLVM/radars/r7937664/", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"t.c", metadata !"/Users/echeng/LLVM/radars/r7937664/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{null, metadata !6}
+!6 = metadata !{i32 524310, metadata !2, metadata !"DMNotificationUPP", metadata !2, i32 6, i64 0, i64 0, i64 0, i32 0, metadata !7} ; [ DW_TAG_typedef ]
+!7 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !9, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!9 = metadata !{null, metadata !10}
+!10 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
+!11 = metadata !{i32 524310, metadata !2, metadata !"AppleEvent", metadata !2, i32 4, i64 0, i64 0, i64 0, i32 0, metadata !12} ; [ DW_TAG_typedef ]
+!12 = metadata !{i32 524307, metadata !2, metadata !"AEDesc", metadata !2, i32 1, i64 0, i64 0, i64 0, i32 4, null, null, i32 0, null} ; [ DW_TAG_structure_type ]
+!13 = metadata !{i32 7, i32 0, metadata !1, null}
+!14 = metadata !{i32 8, i32 0, metadata !15, null}
+!15 = metadata !{i32 524299, metadata !1, i32 7, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/final/test/DebugInfo/2010-05-03-OriginDIE.ll b/final/test/DebugInfo/2010-05-03-OriginDIE.ll
new file mode 100644
index 00000000000..0e1d1fddc41
--- /dev/null
+++ b/final/test/DebugInfo/2010-05-03-OriginDIE.ll
@@ -0,0 +1,86 @@
+
+;RUN: llc < %s -o /dev/null
+;Radar 7937109
+
+%struct.anon = type { i64, i32, i32, i32, [1 x i32] }
+%struct.gpm_t = type { i32, i8*, [16 x i8], i32, i64, i64, i64, i64, i64, i64, i32, i16, i16, [8 x %struct.gpmr_t] }
+%struct.gpmr_t = type { [48 x i8], [48 x i8], [16 x i8], i64, i64, i64, i64, i16 }
+%struct.gpt_t = type { [8 x i8], i32, i32, i32, i32, i64, i64, i64, i64, [16 x i8], %struct.anon }
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.gpm_t*, %struct.gpt_t*)* @gpt2gpm to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define fastcc void @gpt2gpm(%struct.gpm_t* %gpm, %struct.gpt_t* %gpt) nounwind optsize ssp {
+entry:
+  %data_addr.i18 = alloca i64, align 8            ; <i64*> [#uses=1]
+  %data_addr.i17 = alloca i64, align 8            ; <i64*> [#uses=2]
+  %data_addr.i16 = alloca i64, align 8            ; <i64*> [#uses=0]
+  %data_addr.i15 = alloca i32, align 4            ; <i32*> [#uses=0]
+  %data_addr.i = alloca i64, align 8              ; <i64*> [#uses=0]
+  %0 = getelementptr inbounds %struct.gpm_t* %gpm, i32 0, i32 2, i32 0 ; <i8*> [#uses=1]
+  %1 = getelementptr inbounds %struct.gpt_t* %gpt, i32 0, i32 9, i32 0 ; <i8*> [#uses=1]
+  call void @uuid_LtoB(i8* %0, i8* %1) nounwind, !dbg !0
+  %a9 = volatile load i64* %data_addr.i18, align 8 ; <i64> [#uses=1]
+  %a10 = call i64 @llvm.bswap.i64(i64 %a9) nounwind ; <i64> [#uses=1]
+  %a11 = getelementptr inbounds %struct.gpt_t* %gpt, i32 0, i32 8, !dbg !7 ; <i64*> [#uses=1]
+  %a12 = load i64* %a11, align 4, !dbg !7         ; <i64> [#uses=1]
+  call void @llvm.dbg.declare(metadata !{i64* %data_addr.i17}, metadata !8) nounwind, !dbg !14
+  store i64 %a12, i64* %data_addr.i17, align 8
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !15) nounwind
+  call void @llvm.dbg.value(metadata !18, i64 0, metadata !19) nounwind
+  call void @llvm.dbg.declare(metadata !6, metadata !23) nounwind
+  call void @llvm.dbg.value(metadata !{i64* %data_addr.i17}, i64 0, metadata !34) nounwind
+  %a13 = volatile load i64* %data_addr.i17, align 8 ; <i64> [#uses=1]
+  %a14 = call i64 @llvm.bswap.i64(i64 %a13) nounwind ; <i64> [#uses=2]
+  %a15 = add i64 %a10, %a14, !dbg !7              ; <i64> [#uses=1]
+  %a16 = sub i64 %a15, %a14                       ; <i64> [#uses=1]
+  %a17 = getelementptr inbounds %struct.gpm_t* %gpm, i32 0, i32 5, !dbg !7 ; <i64*> [#uses=1]
+  store i64 %a16, i64* %a17, align 4, !dbg !7
+  ret void, !dbg !7
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
+
+declare i64 @llvm.bswap.i64(i64) nounwind readnone
+
+declare void @uuid_LtoB(i8*, i8*)
+
+!0 = metadata !{i32 808, i32 0, metadata !1, null}
+!1 = metadata !{i32 524299, metadata !2, i32 807, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 524334, i32 0, metadata !3, metadata !"gpt2gpm", metadata !"gpt2gpm", metadata !"gpt2gpm", metadata !3, i32 807, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 524329, metadata !"G.c", metadata !"/tmp", metadata !4} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 524305, i32 0, i32 1, metadata !"G.c", metadata !"/tmp", metadata !"llvm-gcc", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{null}
+!7 = metadata !{i32 810, i32 0, metadata !1, null}
+!8 = metadata !{i32 524545, metadata !9, metadata !"data", metadata !10, i32 201, metadata !11} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 524334, i32 0, metadata !3, metadata !"_OSSwapInt64", metadata !"_OSSwapInt64", metadata !"_OSSwapInt64", metadata !10, i32 202, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 524329, metadata !"OSByteOrder.h", metadata !"/usr/include/libkern/ppc", metadata !4} ; [ DW_TAG_file_type ]
+!11 = metadata !{i32 524310, metadata !3, metadata !"uint64_t", metadata !12, i32 59, i64 0, i64 0, i64 0, i32 0, metadata !13} ; [ DW_TAG_typedef ]
+!12 = metadata !{i32 524329, metadata !"stdint.h", metadata !"/usr/4.2.1/include", metadata !4} ; [ DW_TAG_file_type ]
+!13 = metadata !{i32 524324, metadata !3, metadata !"long long unsigned int", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 202, i32 0, metadata !9, metadata !7}
+!15 = metadata !{i32 524545, metadata !16, metadata !"base", metadata !10, i32 92, metadata !17} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 524334, i32 0, metadata !3, metadata !"OSReadSwapInt64", metadata !"OSReadSwapInt64", metadata !"OSReadSwapInt64", metadata !10, i32 95, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!18 = metadata !{i32 0}
+!19 = metadata !{i32 524545, metadata !16, metadata !"byteOffset", metadata !10, i32 94, metadata !20} ; [ DW_TAG_arg_variable ]
+!20 = metadata !{i32 524310, metadata !3, metadata !"uintptr_t", metadata !21, i32 114, i64 0, i64 0, i64 0, i32 0, metadata !22} ; [ DW_TAG_typedef ]
+!21 = metadata !{i32 524329, metadata !"types.h", metadata !"/usr/include/ppc", metadata !4} ; [ DW_TAG_file_type ]
+!22 = metadata !{i32 524324, metadata !3, metadata !"long unsigned int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!23 = metadata !{i32 524544, metadata !24, metadata !"u", metadata !10, i32 100, metadata !25} ; [ DW_TAG_auto_variable ]
+!24 = metadata !{i32 524299, metadata !16, i32 95, i32 0} ; [ DW_TAG_lexical_block ]
+!25 = metadata !{i32 524311, metadata !16, metadata !"", metadata !10, i32 97, i64 64, i64 64, i64 0, i32 0, null, metadata !26, i32 0, null} ; [ DW_TAG_union_type ]
+!26 = metadata !{metadata !27, metadata !28}
+!27 = metadata !{i32 524301, metadata !25, metadata !"u64", metadata !10, i32 98, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_member ]
+!28 = metadata !{i32 524301, metadata !25, metadata !"u32", metadata !10, i32 99, i64 64, i64 32, i64 0, i32 0, metadata !29} ; [ DW_TAG_member ]
+!29 = metadata !{i32 524289, metadata !3, metadata !"", metadata !3, i32 0, i64 64, i64 32, i64 0, i32 0, metadata !30, metadata !32, i32 0, null} ; [ DW_TAG_array_type ]
+!30 = metadata !{i32 524310, metadata !3, metadata !"uint32_t", metadata !12, i32 55, i64 0, i64 0, i64 0, i32 0, metadata !31} ; [ DW_TAG_typedef ]
+!31 = metadata !{i32 524324, metadata !3, metadata !"unsigned int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!32 = metadata !{metadata !33}
+!33 = metadata !{i32 524321, i64 0, i64 1}        ; [ DW_TAG_subrange_type ]
+!34 = metadata !{i32 524544, metadata !24, metadata !"addr", metadata !10, i32 96, metadata !35} ; [ DW_TAG_auto_variable ]
+!35 = metadata !{i32 524303, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !11} ; [ DW_TAG_pointer_type ]
diff --git a/final/test/DebugInfo/2010-05-10-MultipleCU.ll b/final/test/DebugInfo/2010-05-10-MultipleCU.ll
new file mode 100644
index 00000000000..721b70839fe
--- /dev/null
+++ b/final/test/DebugInfo/2010-05-10-MultipleCU.ll
@@ -0,0 +1,44 @@
+; RUN: llc -O0 -asm-verbose < %s | FileCheck %s
+; One for a.c, second one for b.c and third one for abbrev.
+
+; CHECK: info_begin
+; CHECK: DW_TAG_compile_unit
+; CHECK-NOT: DW_TAG_compile_unit
+; CHECK: info_end
+
+; CHECK: info_begin
+; CHECK: DW_TAG_compile_unit
+; CHECK-NOT: DW_TAG_compile_unit
+; CHECK: info_end
+
+; CHECK: abbrev_begin
+; CHECK: DW_TAG_compile_unit
+; CHECK-NOT: DW_TAG_compile_unit
+; CHECK: abbrev_end
+
+define i32 @foo() nounwind readnone ssp {
+return:
+  ret i32 42, !dbg !0
+}
+
+define i32 @bar() nounwind readnone ssp {
+return:
+  ret i32 21, !dbg !8
+}
+
+!0 = metadata !{i32 3, i32 0, metadata !1, null}
+!1 = metadata !{i32 524299, metadata !2, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 524334, i32 0, metadata !3, metadata !"foo", metadata !"foo", metadata !"foo", metadata !3, i32 2, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 524329, metadata !"a.c", metadata !"/tmp/", metadata !4} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 524305, i32 0, i32 1, metadata !"a.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 524309, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 524324, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 3, i32 0, metadata !9, null}
+!9 = metadata !{i32 524299, metadata !10, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!10 = metadata !{i32 524334, i32 0, metadata !11, metadata !"bar", metadata !"bar", metadata !"bar", metadata !11, i32 2, metadata !13, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 524329, metadata !"b.c", metadata !"/tmp/", metadata !12} ; [ DW_TAG_file_type ]
+!12 = metadata !{i32 524305, i32 0, i32 1, metadata !"b.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!13 = metadata !{i32 524309, metadata !11, metadata !"", metadata !11, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !14, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!14 = metadata !{metadata !15}
+!15 = metadata !{i32 524324, metadata !11, metadata !"int", metadata !11, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
diff --git a/final/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll b/final/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
new file mode 100644
index 00000000000..1d67cb44da6
--- /dev/null
+++ b/final/test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll
@@ -0,0 +1,52 @@
+; RUN: llc -O2 %s -o - | FileCheck %s
+; Check struct X for dead variable xyz from inlined function foo.
+
+; CHECK:	DW_TAG_structure_type
+; CHECK-NEXT:	DW_AT_sibling
+; CHECK-NEXT:	DW_AT_name
+ 
+
+@i = common global i32 0                          ; <i32*> [#uses=2]
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+define i32 @bar() nounwind ssp {
+entry:
+  %0 = load i32* @i, align 4, !dbg !17            ; <i32> [#uses=2]
+  tail call void @llvm.dbg.value(metadata !{i32 %0}, i64 0, metadata !9), !dbg !19
+  tail call void @llvm.dbg.declare(metadata !20, metadata !10), !dbg !21
+  %1 = mul nsw i32 %0, %0, !dbg !22               ; <i32> [#uses=2]
+  store i32 %1, i32* @i, align 4, !dbg !17
+  ret i32 %1, !dbg !23
+}
+
+!llvm.dbg.sp = !{!0, !6}
+!llvm.dbg.lv.foo = !{!9, !10}
+!llvm.dbg.gv = !{!16}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 9, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"bar.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"bar.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5, metadata !5}
+!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 14, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !8, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!8 = metadata !{metadata !5}
+!9 = metadata !{i32 524545, metadata !0, metadata !"j", metadata !1, i32 9, metadata !5} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 524544, metadata !11, metadata !"xyz", metadata !1, i32 10, metadata !12} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 524299, metadata !0, i32 9, i32 0} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 524307, metadata !0, metadata !"X", metadata !1, i32 10, i64 64, i64 32, i64 0, i32 0, null, metadata !13, i32 0, null} ; [ DW_TAG_structure_type ]
+!13 = metadata !{metadata !14, metadata !15}
+!14 = metadata !{i32 524301, metadata !12, metadata !"a", metadata !1, i32 10, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_member ]
+!15 = metadata !{i32 524301, metadata !12, metadata !"b", metadata !1, i32 10, i64 32, i64 32, i64 32, i32 0, metadata !5} ; [ DW_TAG_member ]
+!16 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"", metadata !1, i32 5, metadata !5, i1 false, i1 true, i32* @i} ; [ DW_TAG_variable ]
+!17 = metadata !{i32 15, i32 0, metadata !18, null}
+!18 = metadata !{i32 524299, metadata !6, i32 14, i32 0} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 9, i32 0, metadata !0, metadata !17}
+!20 = metadata !{null}
+!21 = metadata !{i32 9, i32 0, metadata !11, metadata !17}
+!22 = metadata !{i32 11, i32 0, metadata !11, metadata !17}
+!23 = metadata !{i32 16, i32 0, metadata !18, null}
diff --git a/final/test/DebugInfo/2010-07-19-Crash.ll b/final/test/DebugInfo/2010-07-19-Crash.ll
new file mode 100644
index 00000000000..87a4a8955a3
--- /dev/null
+++ b/final/test/DebugInfo/2010-07-19-Crash.ll
@@ -0,0 +1,24 @@
+; RUN: llc -o /dev/null < %s
+; PR7662
+; Do not add variables to !11 because it is a declaration entry.
+
+define i32 @bar() nounwind readnone ssp {
+entry:
+  ret i32 42, !dbg !9
+}
+
+!llvm.dbg.sp = !{!0, !6, !11}
+!llvm.dbg.lv.foo = !{!7}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @bar} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"one.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"one.c", metadata !".", metadata !"clang 2.8", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 7, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 7, metadata !3, i1 true, i1 false, i32 0, i32 0, null, i1 false, i1 true, null} ; [ DW_TAG_subprogram ]
+!7 = metadata !{i32 524544, metadata !8, metadata !"one", metadata !1, i32 8, metadata !5} ; [ DW_TAG_auto_variable ]
+!8 = metadata !{i32 524299, metadata !6, i32 7, i32 18} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 4, i32 3, metadata !10, null}
+!10 = metadata !{i32 524299, metadata !0, i32 3, i32 11} ; [ DW_TAG_lexical_block ]
diff --git a/final/test/DebugInfo/2010-10-01-crash.ll b/final/test/DebugInfo/2010-10-01-crash.ll
new file mode 100644
index 00000000000..e61f63f40d8
--- /dev/null
+++ b/final/test/DebugInfo/2010-10-01-crash.ll
@@ -0,0 +1,21 @@
+; RUN: llc -O0 %s -o /dev/null
+; PR 8235
+
+define void @CGRectStandardize(i32* sret %agg.result, i32* byval %rect) nounwind ssp {
+entry:
+  call void @llvm.dbg.declare(metadata !{i32* %rect}, metadata !23), !dbg !24
+  ret void
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"CGRectStandardize", metadata !"CGRectStandardize", metadata !"CGRectStandardize", metadata !1, i32 54, null, i1 false, i1 true, i32 0, i32 0, null, i32 0, i1 false, void (i32*, i32*)* @CGRectStandardize} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"GSFusedSilica.m", metadata !"/Volumes/Data/Users/sabre/Desktop", metadata !2}
+!2 = metadata !{i32 589841, i32 0, i32 16, metadata !"GSFusedSilica.m", metadata !"/Volumes/Data/Users/sabre/Desktop", metadata !"clang version 2.9 (trunk 115292)", i1 true, i1 false, metadata !"", i32 1} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 589846, metadata !1, metadata !"CGRect", metadata !1, i32 49, i64 0, i64 0, i64 0, i32 0, null}
+!23 = metadata !{i32 590081, metadata !0, metadata !"rect", metadata !1, i32 53, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 53, i32 33, metadata !0, null}
+
diff --git a/final/test/DebugInfo/dg.exp b/final/test/DebugInfo/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/DebugInfo/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/DebugInfo/inheritance.ll b/final/test/DebugInfo/inheritance.ll
new file mode 100644
index 00000000000..a689cb22818
--- /dev/null
+++ b/final/test/DebugInfo/inheritance.ll
@@ -0,0 +1,151 @@
+; RUN: llc %s -o /dev/null
+; PR 2613.
+
+%struct.__class_type_info_pseudo = type { %struct.__type_info_pseudo }
+%struct.__type_info_pseudo = type { i8*, i8* }
+%struct.test1 = type { i32 (...)** }
+
+@_ZTV5test1 = weak_odr constant [4 x i32 (...)*] [i32 (...)* null, i32 (...)* bitcast (%struct.__class_type_info_pseudo* @_ZTI5test1 to i32 (...)*), i32 (...)* bitcast (void (%struct.test1*)* @_ZN5test1D1Ev to i32 (...)*), i32 (...)* bitcast (void (%struct.test1*)* @_ZN5test1D0Ev to i32 (...)*)], align 32 ; <[4 x i32 (...)*]*> [#uses=1]
+@_ZTI5test1 = weak_odr constant %struct.__class_type_info_pseudo { %struct.__type_info_pseudo { i8* inttoptr (i64 add (i64 ptrtoint ([0 x i32 (...)*]* @_ZTVN10__cxxabiv117__class_type_infoE to i64), i64 16) to i8*), i8* getelementptr inbounds ([7 x i8]* @_ZTS5test1, i64 0, i64 0) } }, align 16 ; <%struct.__class_type_info_pseudo*> [#uses=1]
+@_ZTVN10__cxxabiv117__class_type_infoE = external constant [0 x i32 (...)*] ; <[0 x i32 (...)*]*> [#uses=1]
+@_ZTS5test1 = weak_odr constant [7 x i8] c"5test1\00" ; <[7 x i8]*> [#uses=2]
+
+define i32 @main() nounwind ssp {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  %0 = alloca i32                                 ; <i32*> [#uses=2]
+  %tst = alloca %struct.test1                     ; <%struct.test1*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.test1* %tst}, metadata !0), !dbg !21
+  call void @_ZN5test1C1Ev(%struct.test1* %tst) nounwind, !dbg !22
+  store i32 0, i32* %0, align 4, !dbg !23
+  %1 = load i32* %0, align 4, !dbg !23            ; <i32> [#uses=1]
+  store i32 %1, i32* %retval, align 4, !dbg !23
+  br label %return, !dbg !23
+
+return:                                           ; preds = %entry
+  %retval1 = load i32* %retval, !dbg !23          ; <i32> [#uses=1]
+  ret i32 %retval1, !dbg !23
+}
+
+define linkonce_odr void @_ZN5test1C1Ev(%struct.test1* %this) nounwind ssp align 2 {
+entry:
+  %this_addr = alloca %struct.test1*              ; <%struct.test1**> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.test1** %this_addr}, metadata !24), !dbg !28
+  store %struct.test1* %this, %struct.test1** %this_addr
+  %0 = load %struct.test1** %this_addr, align 8, !dbg !28 ; <%struct.test1*> [#uses=1]
+  %1 = getelementptr inbounds %struct.test1* %0, i32 0, i32 0, !dbg !28 ; <i32 (...)***> [#uses=1]
+  store i32 (...)** getelementptr inbounds ([4 x i32 (...)*]* @_ZTV5test1, i64 0, i64 2), i32 (...)*** %1, align 8, !dbg !28
+  br label %return, !dbg !28
+
+return:                                           ; preds = %entry
+  ret void, !dbg !29
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define linkonce_odr void @_ZN5test1D1Ev(%struct.test1* %this) nounwind ssp align 2 {
+entry:
+  %this_addr = alloca %struct.test1*              ; <%struct.test1**> [#uses=3]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.test1** %this_addr}, metadata !32), !dbg !34
+  store %struct.test1* %this, %struct.test1** %this_addr
+  %0 = load %struct.test1** %this_addr, align 8, !dbg !35 ; <%struct.test1*> [#uses=1]
+  %1 = getelementptr inbounds %struct.test1* %0, i32 0, i32 0, !dbg !35 ; <i32 (...)***> [#uses=1]
+  store i32 (...)** getelementptr inbounds ([4 x i32 (...)*]* @_ZTV5test1, i64 0, i64 2), i32 (...)*** %1, align 8, !dbg !35
+  br label %bb, !dbg !37
+
+bb:                                               ; preds = %entry
+  %2 = trunc i32 0 to i8, !dbg !37                ; <i8> [#uses=1]
+  %toBool = icmp ne i8 %2, 0, !dbg !37            ; <i1> [#uses=1]
+  br i1 %toBool, label %bb1, label %bb2, !dbg !37
+
+bb1:                                              ; preds = %bb
+  %3 = load %struct.test1** %this_addr, align 8, !dbg !37 ; <%struct.test1*> [#uses=1]
+  %4 = bitcast %struct.test1* %3 to i8*, !dbg !37 ; <i8*> [#uses=1]
+  call void @_ZdlPv(i8* %4) nounwind, !dbg !37
+  br label %bb2, !dbg !37
+
+bb2:                                              ; preds = %bb1, %bb
+  br label %return, !dbg !37
+
+return:                                           ; preds = %bb2
+  ret void, !dbg !37
+}
+
+define linkonce_odr void @_ZN5test1D0Ev(%struct.test1* %this) nounwind ssp align 2 {
+entry:
+  %this_addr = alloca %struct.test1*              ; <%struct.test1**> [#uses=3]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{%struct.test1** %this_addr}, metadata !38), !dbg !40
+  store %struct.test1* %this, %struct.test1** %this_addr
+  %0 = load %struct.test1** %this_addr, align 8, !dbg !41 ; <%struct.test1*> [#uses=1]
+  %1 = getelementptr inbounds %struct.test1* %0, i32 0, i32 0, !dbg !41 ; <i32 (...)***> [#uses=1]
+  store i32 (...)** getelementptr inbounds ([4 x i32 (...)*]* @_ZTV5test1, i64 0, i64 2), i32 (...)*** %1, align 8, !dbg !41
+  br label %bb, !dbg !43
+
+bb:                                               ; preds = %entry
+  %2 = trunc i32 1 to i8, !dbg !43                ; <i8> [#uses=1]
+  %toBool = icmp ne i8 %2, 0, !dbg !43            ; <i1> [#uses=1]
+  br i1 %toBool, label %bb1, label %bb2, !dbg !43
+
+bb1:                                              ; preds = %bb
+  %3 = load %struct.test1** %this_addr, align 8, !dbg !43 ; <%struct.test1*> [#uses=1]
+  %4 = bitcast %struct.test1* %3 to i8*, !dbg !43 ; <i8*> [#uses=1]
+  call void @_ZdlPv(i8* %4) nounwind, !dbg !43
+  br label %bb2, !dbg !43
+
+bb2:                                              ; preds = %bb1, %bb
+  br label %return, !dbg !43
+
+return:                                           ; preds = %bb2
+  ret void, !dbg !43
+}
+
+declare void @_ZdlPv(i8*) nounwind
+
+!0 = metadata !{i32 459008, metadata !1, metadata !"tst", metadata !4, i32 13, metadata !8} ; [ DW_TAG_auto_variable ]
+!1 = metadata !{i32 458763, metadata !2, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 458763, metadata !3, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 458798, i32 0, metadata !4, metadata !"main", metadata !"main", metadata !"main", metadata !4, i32 11, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 458769, i32 0, i32 4, metadata !"inheritance.cpp", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 458773, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 458788, metadata !4, metadata !"int", metadata !4, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 458771, metadata !4, metadata !"test1", metadata !4, i32 1, i64 64, i64 64, i64 0, i32 0, null, metadata !9, i32 0, metadata !8} ; [ DW_TAG_structure_type ]
+!9 = metadata !{metadata !10, metadata !14, metadata !18}
+!10 = metadata !{i32 458765, metadata !8, metadata !"_vptr$test1", metadata !4, i32 1, i64 64, i64 64, i64 0, i32 0, metadata !11} ; [ DW_TAG_member ]
+!11 = metadata !{i32 458767, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 458767, metadata !4, metadata !"__vtbl_ptr_type", metadata !13, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!13 = metadata !{i32 458769, i32 0, i32 4, metadata !"<built-in>", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 false, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!14 = metadata !{i32 458798, i32 0, metadata !8, metadata !"test1", metadata !"test1", metadata !"", metadata !4, i32 1, metadata !15, i1 false, i1 false, i32 0, i32 0, null, i1 true} ; [ DW_TAG_subprogram ]
+!15 = metadata !{i32 458773, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !16, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!16 = metadata !{null, metadata !17}
+!17 = metadata !{i32 458767, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !8} ; [ DW_TAG_pointer_type ]
+!18 = metadata !{i32 458798, i32 0, metadata !8, metadata !"~test1", metadata !"~test1", metadata !"", metadata !4, i32 4, metadata !19, i1 false, i1 false, i32 1, i32 0, metadata !8, i1 false} ; [ DW_TAG_subprogram ]
+!19 = metadata !{i32 458773, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !20, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!20 = metadata !{null, metadata !17, metadata !7}
+!21 = metadata !{i32 11, i32 0, metadata !1, null}
+!22 = metadata !{i32 13, i32 0, metadata !1, null}
+!23 = metadata !{i32 14, i32 0, metadata !1, null}
+!24 = metadata !{i32 459009, metadata !25, metadata !"this", metadata !4, i32 13, metadata !26} ; [ DW_TAG_arg_variable ]
+!25 = metadata !{i32 458798, i32 0, metadata !4, metadata !"test1", metadata !"test1", metadata !"_ZN5test1C1Ev", metadata !4, i32 1, metadata !15, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!26 = metadata !{i32 458790, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !27} ; [ DW_TAG_const_type ]
+!27 = metadata !{i32 458767, metadata !4, metadata !"", metadata !4, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!28 = metadata !{i32 1, i32 0, metadata !25, null}
+!29 = metadata !{i32 1, i32 0, metadata !30, null}
+!30 = metadata !{i32 458763, metadata !31, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!31 = metadata !{i32 458763, metadata !25, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!32 = metadata !{i32 459009, metadata !33, metadata !"this", metadata !4, i32 4, metadata !26} ; [ DW_TAG_arg_variable ]
+!33 = metadata !{i32 458798, i32 0, metadata !8, metadata !"~test1", metadata !"~test1", metadata !"_ZN5test1D1Ev", metadata !4, i32 4, metadata !15, i1 false, i1 true, i32 1, i32 0, metadata !8, i1 false} ; [ DW_TAG_subprogram ]
+!34 = metadata !{i32 4, i32 0, metadata !33, null}
+!35 = metadata !{i32 5, i32 0, metadata !36, null}
+!36 = metadata !{i32 458763, metadata !33, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!37 = metadata !{i32 6, i32 0, metadata !36, null}
+!38 = metadata !{i32 459009, metadata !39, metadata !"this", metadata !4, i32 4, metadata !26} ; [ DW_TAG_arg_variable ]
+!39 = metadata !{i32 458798, i32 0, metadata !8, metadata !"~test1", metadata !"~test1", metadata !"_ZN5test1D0Ev", metadata !4, i32 4, metadata !15, i1 false, i1 true, i32 1, i32 1, metadata !8, i1 false} ; [ DW_TAG_subprogram ]
+!40 = metadata !{i32 4, i32 0, metadata !39, null}
+!41 = metadata !{i32 5, i32 0, metadata !42, null}
+!42 = metadata !{i32 458763, metadata !39, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!43 = metadata !{i32 6, i32 0, metadata !42, null}
diff --git a/final/test/DebugInfo/printdbginfo2.ll b/final/test/DebugInfo/printdbginfo2.ll
new file mode 100644
index 00000000000..31937919747
--- /dev/null
+++ b/final/test/DebugInfo/printdbginfo2.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -print-dbginfo -disable-output |& FileCheck %s
+;  grep {%b is variable b of type x declared at x.c:7} %t1
+;  grep {%2 is variable b of type x declared at x.c:7} %t1
+;  grep {@c.1442 is variable c of type int declared at x.c:4} %t1
+
+%struct.foo = type { i32 }
+
+@main.c = internal global i32 5                   ; <i32*> [#uses=1]
+
+define i32 @main() nounwind {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=3]
+  %b = alloca %struct.foo, align 4                ; <%struct.foo*> [#uses=2]
+; CHECK:; %b is variable b of type foo declared at x.c:7
+  %a = alloca [4 x i32], align 4                  ; <[4 x i32]*> [#uses=1]
+; CHECK:; %a is variable a of type  declared at x.c:8
+  call void @llvm.dbg.func.start(metadata !3)
+  store i32 0, i32* %retval
+  call void @llvm.dbg.stoppoint(i32 6, i32 3, metadata !1)
+  call void @llvm.dbg.stoppoint(i32 7, i32 3, metadata !1)
+  %0 = bitcast %struct.foo* %b to { }*            ; <{ }*> [#uses=1]
+  call void @llvm.dbg.declare(metadata !{%struct.foo* %b}, metadata !4)
+; CHECK:; %0 is variable b of type foo declared at x.c:7
+  call void @llvm.dbg.stoppoint(i32 8, i32 3, metadata !1)
+  %1 = bitcast [4 x i32]* %a to { }*              ; <{ }*> [#uses=1]
+  call void @llvm.dbg.declare(metadata !{[4 x i32]* %a}, metadata !8)
+; CHECK:; %1 is variable a of type  declared at x.c:8
+  call void @llvm.dbg.stoppoint(i32 9, i32 3, metadata !1)
+  %tmp = getelementptr inbounds %struct.foo* %b, i32 0, i32 0 ; <i32*> [#uses=1]
+; CHECK:; %tmp is variable b of type foo declared at x.c:7
+  store i32 5, i32* %tmp
+  call void @llvm.dbg.stoppoint(i32 10, i32 3, metadata !1)
+  %tmp1 = load i32* @main.c                       ; <i32> [#uses=1]
+; CHECK:; @main.c is variable c of type int declared at x.c:6
+  store i32 %tmp1, i32* %retval
+  br label %2
+
+; <label>:2                                       ; preds = %entry
+  call void @llvm.dbg.stoppoint(i32 11, i32 1, metadata !1)
+  call void @llvm.dbg.region.end(metadata !3)
+  %3 = load i32* %retval                          ; <i32> [#uses=1]
+  ret i32 %3
+}
+
+declare void @llvm.dbg.func.start(metadata) nounwind readnone
+
+declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.region.end(metadata) nounwind readnone
+
+!llvm.dbg.gv = !{!0}
+
+!0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"c", metadata !"c", metadata !"", metadata !1, i32 6, metadata !2, i1 true, i1 true, i32* @main.c}
+!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"x.c", metadata !"/home/edwin/llvm-git/llvm/test/DebugInfo", metadata !"clang 1.0", i1 true, i1 false, metadata !"", i32 0}
+!2 = metadata !{i32 458788, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!3 = metadata !{i32 458798, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 5, metadata !2, i1 false, i1 true}
+!4 = metadata !{i32 459008, metadata !3, metadata !"b", metadata !1, i32 7, metadata !5}
+!5 = metadata !{i32 458771, metadata !1, metadata !"foo", metadata !1, i32 1, i64 32, i64 32, i64 0, i32 0, null, metadata !6, i32 0}
+!6 = metadata !{metadata !7}
+!7 = metadata !{i32 458765, metadata !1, metadata !"a", metadata !1, i32 2, i64 32, i64 32, i64 0, i32 0, metadata !2}
+!8 = metadata !{i32 459008, metadata !3, metadata !"a", metadata !1, i32 8, metadata !9}
+!9 = metadata !{i32 458753, metadata !1, metadata !"", null, i32 0, i64 128, i64 32, i64 0, i32 0, metadata !2, metadata !10, i32 0}
+!10 = metadata !{metadata !11}
+!11 = metadata !{i32 458785, i64 0, i64 3}
diff --git a/final/test/ExecutionEngine/2002-12-16-ArgTest.ll b/final/test/ExecutionEngine/2002-12-16-ArgTest.ll
new file mode 100644
index 00000000000..eba58ccca42
--- /dev/null
+++ b/final/test/ExecutionEngine/2002-12-16-ArgTest.ll
@@ -0,0 +1,37 @@
+; RUN: lli %s > /dev/null
+
+@.LC0 = internal global [10 x i8] c"argc: %d\0A\00"		; <[10 x i8]*> [#uses=1]
+
+declare i32 @puts(i8*)
+
+define void @getoptions(i32* %argc) {
+bb0:
+	ret void
+}
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main(i32 %argc, i8** %argv) {
+bb0:
+	call i32 (i8*, ...)* @printf( i8* getelementptr ([10 x i8]* @.LC0, i64 0, i64 0), i32 %argc )		; <i32>:0 [#uses=0]
+	%cast224 = bitcast i8** %argv to i8*		; <i8*> [#uses=1]
+	%local = alloca i8*		; <i8**> [#uses=3]
+	store i8* %cast224, i8** %local
+	%cond226 = icmp sle i32 %argc, 0		; <i1> [#uses=1]
+	br i1 %cond226, label %bb3, label %bb2
+bb2:		; preds = %bb2, %bb0
+	%cann-indvar = phi i32 [ 0, %bb0 ], [ %add1-indvar, %bb2 ]		; <i32> [#uses=2]
+	%add1-indvar = add i32 %cann-indvar, 1		; <i32> [#uses=2]
+	%cann-indvar-idxcast = sext i32 %cann-indvar to i64		; <i64> [#uses=1]
+	%CT = bitcast i8** %local to i8***		; <i8***> [#uses=1]
+	%reg115 = load i8*** %CT		; <i8**> [#uses=1]
+	%cast235 = getelementptr i8** %reg115, i64 %cann-indvar-idxcast		; <i8**> [#uses=1]
+	%reg117 = load i8** %cast235		; <i8*> [#uses=1]
+	%reg236 = call i32 @puts( i8* %reg117 )		; <i32> [#uses=0]
+	%cond239 = icmp slt i32 %add1-indvar, %argc		; <i1> [#uses=1]
+	br i1 %cond239, label %bb2, label %bb3
+bb3:		; preds = %bb2, %bb0
+	%cast243 = bitcast i8** %local to i32*		; <i32*> [#uses=1]
+	call void @getoptions( i32* %cast243 )
+	ret i32 0
+}
diff --git a/final/test/ExecutionEngine/2003-01-04-ArgumentBug.ll b/final/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
new file mode 100644
index 00000000000..4cfd1ebb69e
--- /dev/null
+++ b/final/test/ExecutionEngine/2003-01-04-ArgumentBug.ll
@@ -0,0 +1,13 @@
+; RUN: lli %s > /dev/null
+
+define i32 @foo(i32 %X, i32 %Y, double %A) {
+	%cond212 = fcmp une double %A, 1.000000e+00		; <i1> [#uses=1]
+	%cast110 = zext i1 %cond212 to i32		; <i32> [#uses=1]
+	ret i32 %cast110
+}
+
+define i32 @main() {
+	%reg212 = call i32 @foo( i32 0, i32 1, double 1.000000e+00 )		; <i32> [#uses=1]
+	ret i32 %reg212
+}
+
diff --git a/final/test/ExecutionEngine/2003-01-04-LoopTest.ll b/final/test/ExecutionEngine/2003-01-04-LoopTest.ll
new file mode 100644
index 00000000000..668c8fd7ad0
--- /dev/null
+++ b/final/test/ExecutionEngine/2003-01-04-LoopTest.ll
@@ -0,0 +1,20 @@
+; RUN: lli %s > /dev/null
+
+define i32 @main() {
+	call i32 @mylog( i32 4 )		; <i32>:1 [#uses=0]
+	ret i32 0
+}
+
+define internal i32 @mylog(i32 %num) {
+bb0:
+	br label %bb2
+bb2:		; preds = %bb2, %bb0
+	%reg112 = phi i32 [ 10, %bb2 ], [ 1, %bb0 ]		; <i32> [#uses=1]
+	%cann-indvar = phi i32 [ %cann-indvar, %bb2 ], [ 0, %bb0 ]		; <i32> [#uses=1]
+	%reg114 = add i32 %reg112, 1		; <i32> [#uses=2]
+	%cond222 = icmp slt i32 %reg114, %num		; <i1> [#uses=1]
+	br i1 %cond222, label %bb2, label %bb3
+bb3:		; preds = %bb2
+	ret i32 %reg114
+}
+
diff --git a/final/test/ExecutionEngine/2003-01-04-PhiTest.ll b/final/test/ExecutionEngine/2003-01-04-PhiTest.ll
new file mode 100644
index 00000000000..2bc70d749f5
--- /dev/null
+++ b/final/test/ExecutionEngine/2003-01-04-PhiTest.ll
@@ -0,0 +1,12 @@
+; RUN: lli %s > /dev/null
+
+define i32 @main() {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%X = phi i32 [ 0, %0 ], [ 1, %Loop ]		; <i32> [#uses=1]
+	br i1 true, label %Out, label %Loop
+Out:		; preds = %Loop
+	ret i32 %X
+}
+
diff --git a/final/test/ExecutionEngine/2003-01-09-SARTest.ll b/final/test/ExecutionEngine/2003-01-09-SARTest.ll
new file mode 100644
index 00000000000..560cd3eae9a
--- /dev/null
+++ b/final/test/ExecutionEngine/2003-01-09-SARTest.ll
@@ -0,0 +1,11 @@
+; RUN: lli %s > /dev/null
+
+; We were accidentally inverting the signedness of right shifts.  Whoops.
+
+define i32 @main() {
+	%X = ashr i32 -1, 16		; <i32> [#uses=1]
+	%Y = ashr i32 %X, 16		; <i32> [#uses=1]
+	%Z = add i32 %Y, 1		; <i32> [#uses=1]
+	ret i32 %Z
+}
+
diff --git a/final/test/ExecutionEngine/2003-01-10-FUCOM.ll b/final/test/ExecutionEngine/2003-01-10-FUCOM.ll
new file mode 100644
index 00000000000..8512f634323
--- /dev/null
+++ b/final/test/ExecutionEngine/2003-01-10-FUCOM.ll
@@ -0,0 +1,10 @@
+; RUN: lli %s > /dev/null
+
+define i32 @main() {
+	%X = fadd double 0.000000e+00, 1.000000e+00		; <double> [#uses=1]
+	%Y = fsub double 0.000000e+00, 1.000000e+00		; <double> [#uses=2]
+	%Z = fcmp oeq double %X, %Y		; <i1> [#uses=0]
+	fadd double %Y, 0.000000e+00		; <double>:1 [#uses=0]
+	ret i32 0
+}
+
diff --git a/final/test/ExecutionEngine/2003-01-15-AlignmentTest.ll b/final/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
new file mode 100644
index 00000000000..9df21c1ccd6
--- /dev/null
+++ b/final/test/ExecutionEngine/2003-01-15-AlignmentTest.ll
@@ -0,0 +1,17 @@
+; RUN: lli %s > /dev/null
+
+define i32 @bar(i8* %X) {
+        ; pointer should be 4 byte aligned!
+	%P = alloca double		; <double*> [#uses=1]
+	%R = ptrtoint double* %P to i32		; <i32> [#uses=1]
+	%A = and i32 %R, 3		; <i32> [#uses=1]
+	ret i32 %A
+}
+
+define i32 @main() {
+	%SP = alloca i8		; <i8*> [#uses=1]
+	%X = add i32 0, 0		; <i32> [#uses=1]
+	alloca i8, i32 %X		; <i8*>:1 [#uses=0]
+	call i32 @bar( i8* %SP )		; <i32>:2 [#uses=1]
+	ret i32 %2
+}
diff --git a/final/test/ExecutionEngine/2003-05-06-LivenessClobber.ll b/final/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
new file mode 100644
index 00000000000..e670d11d4a1
--- /dev/null
+++ b/final/test/ExecutionEngine/2003-05-06-LivenessClobber.ll
@@ -0,0 +1,19 @@
+; This testcase should return with an exit code of 1.
+;
+; RUN: not lli %s
+
+@test = global i64 0		; <i64*> [#uses=1]
+
+define internal i64 @test.upgrd.1() {
+	%tmp.0 = load i64* @test		; <i64> [#uses=1]
+	%tmp.1 = add i64 %tmp.0, 1		; <i64> [#uses=1]
+	ret i64 %tmp.1
+}
+
+define i32 @main() {
+	%L = call i64 @test.upgrd.1( )		; <i64> [#uses=1]
+	%I = trunc i64 %L to i32		; <i32> [#uses=1]
+	ret i32 %I
+}
+
+
diff --git a/final/test/ExecutionEngine/2003-05-07-ArgumentTest.ll b/final/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
new file mode 100644
index 00000000000..baf42e5e83d
--- /dev/null
+++ b/final/test/ExecutionEngine/2003-05-07-ArgumentTest.ll
@@ -0,0 +1,11 @@
+; RUN: lli %s test
+
+declare i32 @puts(i8*)
+
+define i32 @main(i32 %argc.1, i8** %argv.1) {
+	%tmp.5 = getelementptr i8** %argv.1, i64 1		; <i8**> [#uses=1]
+	%tmp.6 = load i8** %tmp.5		; <i8*> [#uses=1]
+	%tmp.0 = call i32 @puts( i8* %tmp.6 )		; <i32> [#uses=0]
+	ret i32 0
+}
+
diff --git a/final/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll b/final/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
new file mode 100644
index 00000000000..bcdb11468dc
--- /dev/null
+++ b/final/test/ExecutionEngine/2003-05-11-PHIRegAllocBug.ll
@@ -0,0 +1,15 @@
+; RUN: lli %s > /dev/null
+
+target datalayout = "e-p:32:32"
+
+define i32 @main() {
+entry:
+	br label %endif
+then:		; No predecessors!
+	br label %endif
+endif:		; preds = %then, %entry
+	%x = phi i32 [ 4, %entry ], [ 27, %then ]		; <i32> [#uses=0]
+	%result = phi i32 [ 32, %then ], [ 0, %entry ]		; <i32> [#uses=0]
+	ret i32 0
+}
+
diff --git a/final/test/ExecutionEngine/2003-06-04-bzip2-bug.ll b/final/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
new file mode 100644
index 00000000000..37dae861c98
--- /dev/null
+++ b/final/test/ExecutionEngine/2003-06-04-bzip2-bug.ll
@@ -0,0 +1,19 @@
+; RUN: lli %s > /dev/null
+
+; Testcase distilled from 256.bzip2.
+
+target datalayout = "e-p:32:32"
+
+define i32 @main() {
+entry:
+	br label %loopentry.0
+loopentry.0:		; preds = %loopentry.0, %entry
+	%h.0 = phi i32 [ %tmp.2, %loopentry.0 ], [ -1, %entry ]		; <i32> [#uses=1]
+	%tmp.2 = add i32 %h.0, 1		; <i32> [#uses=3]
+	%tmp.4 = icmp ne i32 %tmp.2, 0		; <i1> [#uses=1]
+	br i1 %tmp.4, label %loopentry.0, label %loopentry.1
+loopentry.1:		; preds = %loopentry.0
+	%h.1 = phi i32 [ %tmp.2, %loopentry.0 ]		; <i32> [#uses=1]
+	ret i32 %h.1
+}
+
diff --git a/final/test/ExecutionEngine/2003-06-05-PHIBug.ll b/final/test/ExecutionEngine/2003-06-05-PHIBug.ll
new file mode 100644
index 00000000000..f7bd8b77244
--- /dev/null
+++ b/final/test/ExecutionEngine/2003-06-05-PHIBug.ll
@@ -0,0 +1,17 @@
+; RUN: lli %s > /dev/null
+
+; Testcase distilled from 256.bzip2.
+
+target datalayout = "e-p:32:32"
+
+define i32 @main() {
+entry:
+	%X = add i32 1, -1		; <i32> [#uses=3]
+	br label %Next
+Next:		; preds = %entry
+	%A = phi i32 [ %X, %entry ]		; <i32> [#uses=0]
+	%B = phi i32 [ %X, %entry ]		; <i32> [#uses=0]
+	%C = phi i32 [ %X, %entry ]		; <i32> [#uses=1]
+	ret i32 %C
+}
+
diff --git a/final/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll b/final/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
new file mode 100644
index 00000000000..5970628eaed
--- /dev/null
+++ b/final/test/ExecutionEngine/2003-08-15-AllocaAssertion.ll
@@ -0,0 +1,11 @@
+; RUN: lli %s > /dev/null
+
+; This testcase failed to work because two variable sized allocas confused the
+; local register allocator.
+
+define i32 @main(i32 %X) {
+	%A = alloca i32, i32 %X		; <i32*> [#uses=0]
+	%B = alloca float, i32 %X		; <float*> [#uses=0]
+	ret i32 0
+}
+
diff --git a/final/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll b/final/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
new file mode 100644
index 00000000000..9c8ec1dd9ce
--- /dev/null
+++ b/final/test/ExecutionEngine/2003-08-21-EnvironmentTest.ll
@@ -0,0 +1,21 @@
+; RUN: lli %s > /dev/null
+
+;
+; Regression Test: EnvironmentTest.ll
+;
+; Description:
+;	This is a regression test that verifies that the JIT passes the
+;	environment to the main() function.
+;
+
+
+declare i32 @strlen(i8*)
+
+define i32 @main(i32 %argc.1, i8** %argv.1, i8** %envp.1) {
+	%tmp.2 = load i8** %envp.1		; <i8*> [#uses=1]
+	%tmp.3 = call i32 @strlen( i8* %tmp.2 )		; <i32> [#uses=1]
+	%T = icmp eq i32 %tmp.3, 0		; <i1> [#uses=1]
+	%R = zext i1 %T to i32		; <i32> [#uses=1]
+	ret i32 %R
+}
+
diff --git a/final/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll b/final/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
new file mode 100644
index 00000000000..152482d436d
--- /dev/null
+++ b/final/test/ExecutionEngine/2003-08-23-RegisterAllocatePhysReg.ll
@@ -0,0 +1,34 @@
+; RUN: lli %s > /dev/null
+
+; This testcase exposes a bug in the local register allocator where it runs out
+; of registers (due to too many overlapping live ranges), but then attempts to
+; use the ESP register (which is not allocatable) to hold a value.
+
+define i32 @main(i32 %A) {
+        ; ESP gets used again...
+	%Ap2 = alloca i32, i32 %A		; <i32*> [#uses=11]
+	; Produce lots of overlapping live ranges
+        %B = add i32 %A, 1		; <i32> [#uses=1]
+	%C = add i32 %A, 2		; <i32> [#uses=1]
+	%D = add i32 %A, 3		; <i32> [#uses=1]
+	%E = add i32 %A, 4		; <i32> [#uses=1]
+	%F = add i32 %A, 5		; <i32> [#uses=1]
+	%G = add i32 %A, 6		; <i32> [#uses=1]
+	%H = add i32 %A, 7		; <i32> [#uses=1]
+	%I = add i32 %A, 8		; <i32> [#uses=1]
+	%J = add i32 %A, 9		; <i32> [#uses=1]
+	%K = add i32 %A, 10		; <i32> [#uses=1]
+        ; Uses of all of the values
+	store i32 %A, i32* %Ap2
+	store i32 %B, i32* %Ap2
+	store i32 %C, i32* %Ap2
+	store i32 %D, i32* %Ap2
+	store i32 %E, i32* %Ap2
+	store i32 %F, i32* %Ap2
+	store i32 %G, i32* %Ap2
+	store i32 %H, i32* %Ap2
+	store i32 %I, i32* %Ap2
+	store i32 %J, i32* %Ap2
+	store i32 %K, i32* %Ap2
+	ret i32 0
+}
diff --git a/final/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll b/final/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
new file mode 100644
index 00000000000..97e84cd4092
--- /dev/null
+++ b/final/test/ExecutionEngine/2003-10-18-PHINode-ConstantExpr-CondCode-Failure.ll
@@ -0,0 +1,23 @@
+; RUN: lli %s > /dev/null
+
+@A = global i32 0		; <i32*> [#uses=1]
+
+define i32 @main() {
+	%Ret = call i32 @test( i1 true, i32 0 )		; <i32> [#uses=1]
+	ret i32 %Ret
+}
+
+define i32 @test(i1 %c, i32 %A) {
+	br i1 %c, label %Taken1, label %NotTaken
+Cont:		; preds = %Taken1, %NotTaken
+	%V = phi i32 [ 0, %NotTaken ], [ sub (i32 ptrtoint (i32* @A to i32), i32 1234), %Taken1 ]		; <i32> [#uses=0]
+	ret i32 0
+NotTaken:		; preds = %0
+	br label %Cont
+Taken1:		; preds = %0
+	%B = icmp eq i32 %A, 0		; <i1> [#uses=1]
+	br i1 %B, label %Cont, label %ExitError
+ExitError:		; preds = %Taken1
+	ret i32 12
+}
+
diff --git a/final/test/ExecutionEngine/2005-12-02-TailCallBug.ll b/final/test/ExecutionEngine/2005-12-02-TailCallBug.ll
new file mode 100644
index 00000000000..874ce39e662
--- /dev/null
+++ b/final/test/ExecutionEngine/2005-12-02-TailCallBug.ll
@@ -0,0 +1,22 @@
+; PR672
+; RUN: lli %s
+; XFAIL: arm
+
+define i32 @main() {
+	%f = bitcast i32 (i32, i32*, i32)* @check_tail to i32*		; <i32*> [#uses=1]
+	%res = tail call fastcc i32 @check_tail( i32 10, i32* %f, i32 10 )		; <i32> [#uses=1]
+	ret i32 %res
+}
+
+define fastcc i32 @check_tail(i32 %x, i32* %f, i32 %g) {
+	%tmp1 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %tmp1, label %if-then, label %if-else
+if-then:		; preds = %0
+	%fun_ptr = bitcast i32* %f to i32 (i32, i32*, i32)*		; <i32 (i32, i32*, i32)*> [#uses=1]
+	%arg1 = add i32 %x, -1		; <i32> [#uses=1]
+	%res = tail call fastcc i32 %fun_ptr( i32 %arg1, i32* %f, i32 %g )		; <i32> [#uses=1]
+	ret i32 %res
+if-else:		; preds = %0
+	ret i32 %x
+}
+
diff --git a/final/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll b/final/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll
new file mode 100644
index 00000000000..c0dc4cf61ab
--- /dev/null
+++ b/final/test/ExecutionEngine/2007-12-10-APIntLoadStore.ll
@@ -0,0 +1,19 @@
+; RUN: lli -force-interpreter %s
+; PR1836
+
+define i32 @main() {
+entry:
+    %retval = alloca i32        ; <i32*> [#uses=2]
+    %tmp = alloca i32       ; <i32*> [#uses=2]
+    %x = alloca i75, align 16       ; <i75*> [#uses=1]
+    %"alloca point" = bitcast i32 0 to i32      ; <i32> [#uses=0]
+    store i75 999, i75* %x, align 16
+    store i32 0, i32* %tmp, align 4
+    %tmp1 = load i32* %tmp, align 4     ; <i32> [#uses=1]
+    store i32 %tmp1, i32* %retval, align 4
+    br label %return
+
+return:     ; preds = %entry
+    %retval2 = load i32* %retval        ; <i32> [#uses=1]
+    ret i32 %retval2
+}
diff --git a/final/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll b/final/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
new file mode 100644
index 00000000000..07cc659cd04
--- /dev/null
+++ b/final/test/ExecutionEngine/2008-06-05-APInt-OverAShr.ll
@@ -0,0 +1,59 @@
+; RUN: lli -force-interpreter=true %s | grep 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+@.str = internal constant [10 x i8] c"MSB = %d\0A\00"		; <[10 x i8]*> [#uses=1]
+
+define i65 @foo(i65 %x) {
+entry:
+	%x_addr = alloca i65		; <i65*> [#uses=2]
+	%retval = alloca i65		; <i65*> [#uses=2]
+	%tmp = alloca i65		; <i65*> [#uses=2]
+	%"alloca point" = bitcast i65 0 to i65		; <i65> [#uses=0]
+	store i65 %x, i65* %x_addr
+	%tmp1 = load i65* %x_addr, align 4		; <i65> [#uses=1]
+	%tmp2 = ashr i65 %tmp1, 65		; <i65> [#uses=1]
+	store i65 %tmp2, i65* %tmp, align 4
+	%tmp3 = load i65* %tmp, align 4		; <i65> [#uses=1]
+	store i65 %tmp3, i65* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval4 = load i65* %retval		; <i65> [#uses=1]
+	ret i65 %retval4
+}
+
+define i32 @main() {
+entry:
+	%retval = alloca i32		; <i32*> [#uses=1]
+	%iftmp.0 = alloca i32		; <i32*> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = call i65 @foo( i65 -9 )		; <i65> [#uses=1]
+	%tmp1 = lshr i65 %tmp, 64		; <i65> [#uses=1]
+	%tmp2 = xor i65 %tmp1, 1		; <i65> [#uses=1]
+	%tmp3 = and i65 %tmp2, 1		; <i65> [#uses=1]
+	%tmp34 = trunc i65 %tmp3 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp34, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	store i32 0, i32* %iftmp.0, align 4
+	br label %cond_next
+
+cond_false:		; preds = %entry
+	store i32 1, i32* %iftmp.0, align 4
+	br label %cond_next
+
+cond_next:		; preds = %cond_false, %cond_true
+	%tmp5 = getelementptr [10 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp6 = load i32* %iftmp.0, align 4		; <i32> [#uses=1]
+	%tmp7 = call i32 (i8*, ...)* @printf( i8* noalias  %tmp5, i32 %tmp6 ) nounwind 		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %cond_next
+    store i32 0, i32* %retval, align 4
+	%retval8 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval8
+}
+
+declare i32 @printf(i8* noalias , ...) nounwind 
diff --git a/final/test/ExecutionEngine/2010-01-15-UndefValue.ll b/final/test/ExecutionEngine/2010-01-15-UndefValue.ll
new file mode 100644
index 00000000000..6e7a392125e
--- /dev/null
+++ b/final/test/ExecutionEngine/2010-01-15-UndefValue.ll
@@ -0,0 +1,8 @@
+; RUN: lli -force-interpreter=true %s
+
+define i32 @main() {
+       %a = add i32 0, undef
+       %b = fadd float 0.0, undef
+       %c = fadd double 0.0, undef
+       ret i32 0
+}
diff --git a/final/test/ExecutionEngine/dg.exp b/final/test/ExecutionEngine/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/ExecutionEngine/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/ExecutionEngine/fpbitcast.ll b/final/test/ExecutionEngine/fpbitcast.ll
new file mode 100644
index 00000000000..47cbb02db18
--- /dev/null
+++ b/final/test/ExecutionEngine/fpbitcast.ll
@@ -0,0 +1,20 @@
+; RUN: lli -force-interpreter=true %s | grep 40091eb8
+;
+define i32 @test(double %x) {
+entry:
+	%x46.i = bitcast double %x to i64	
+	%tmp343.i = lshr i64 %x46.i, 32	
+	%tmp344.i = trunc i64 %tmp343.i to i32
+        ret i32 %tmp344.i
+}
+
+define i32 @main()
+{
+       %res = call i32 @test(double 3.14)
+       %ptr = getelementptr [4 x i8]* @format, i32 0, i32 0
+       call i32 (i8*,...)* @printf(i8* %ptr, i32 %res)
+       ret i32 0
+}
+
+declare i32 @printf(i8*, ...)
+@format = internal constant [4 x i8] c"%x\0A\00"
diff --git a/final/test/ExecutionEngine/hello.ll b/final/test/ExecutionEngine/hello.ll
new file mode 100644
index 00000000000..0b75c10f354
--- /dev/null
+++ b/final/test/ExecutionEngine/hello.ll
@@ -0,0 +1,11 @@
+; RUN: lli %s > /dev/null
+
+@.LC0 = internal global [12 x i8] c"Hello World\00"		; <[12 x i8]*> [#uses=1]
+
+declare i32 @puts(i8*)
+
+define i32 @main() {
+	%reg210 = call i32 @puts( i8* getelementptr ([12 x i8]* @.LC0, i64 0, i64 0) )		; <i32> [#uses=0]
+	ret i32 0
+}
+
diff --git a/final/test/ExecutionEngine/hello2.ll b/final/test/ExecutionEngine/hello2.ll
new file mode 100644
index 00000000000..9f060be1c34
--- /dev/null
+++ b/final/test/ExecutionEngine/hello2.ll
@@ -0,0 +1,17 @@
+; RUN: lli %s > /dev/null
+
+@X = global i32 7		; <i32*> [#uses=0]
+@msg = internal global [13 x i8] c"Hello World\0A\00"		; <[13 x i8]*> [#uses=1]
+
+declare void @printf([13 x i8]*, ...)
+
+define void @bar() {
+	call void ([13 x i8]*, ...)* @printf( [13 x i8]* @msg )
+	ret void
+}
+
+define i32 @main() {
+	call void @bar( )
+	ret i32 0
+}
+
diff --git a/final/test/ExecutionEngine/simplesttest.ll b/final/test/ExecutionEngine/simplesttest.ll
new file mode 100644
index 00000000000..ad38485d6e0
--- /dev/null
+++ b/final/test/ExecutionEngine/simplesttest.ll
@@ -0,0 +1,6 @@
+; RUN: lli %s > /dev/null
+
+define i32 @main() {
+	ret i32 0
+}
+
diff --git a/final/test/ExecutionEngine/simpletest.ll b/final/test/ExecutionEngine/simpletest.ll
new file mode 100644
index 00000000000..68eb7580b4c
--- /dev/null
+++ b/final/test/ExecutionEngine/simpletest.ll
@@ -0,0 +1,11 @@
+; RUN: lli %s > /dev/null
+
+define i32 @bar() {
+	ret i32 0
+}
+
+define i32 @main() {
+	%r = call i32 @bar( )		; <i32> [#uses=1]
+	ret i32 %r
+}
+
diff --git a/final/test/ExecutionEngine/stubs.ll b/final/test/ExecutionEngine/stubs.ll
new file mode 100644
index 00000000000..bd9f69adb83
--- /dev/null
+++ b/final/test/ExecutionEngine/stubs.ll
@@ -0,0 +1,35 @@
+; RUN: lli -disable-lazy-compilation=false %s
+
+define i32 @main() nounwind {
+entry:
+	call void @lazily_compiled_address_is_consistent()
+	ret i32 0
+}
+
+; Test PR3043: @test should have the same address before and after
+; it's JIT-compiled.
+@funcPtr = common global i1 ()* null, align 4
+@lcaic_failure = internal constant [46 x i8] c"@lazily_compiled_address_is_consistent failed\00"
+
+define void @lazily_compiled_address_is_consistent() nounwind {
+entry:
+	store i1 ()* @test, i1 ()** @funcPtr
+	%pass = tail call i1 @test()		; <i32> [#uses=1]
+	br i1 %pass, label %pass_block, label %fail_block
+pass_block:
+	ret void
+fail_block:
+	call i32 @puts(i8* getelementptr([46 x i8]* @lcaic_failure, i32 0, i32 0))
+	call void @exit(i32 1)
+	unreachable
+}
+
+define i1 @test() nounwind {
+entry:
+	%tmp = load i1 ()** @funcPtr
+	%eq = icmp eq i1 ()* %tmp, @test
+	ret i1 %eq
+}
+
+declare i32 @puts(i8*) noreturn
+declare void @exit(i32) noreturn
diff --git a/final/test/ExecutionEngine/test-arith.ll b/final/test/ExecutionEngine/test-arith.ll
new file mode 100644
index 00000000000..354ecd24bc8
--- /dev/null
+++ b/final/test/ExecutionEngine/test-arith.ll
@@ -0,0 +1,34 @@
+; RUN: lli %s > /dev/null
+
+define i32 @main() {
+	%A = add i8 0, 12		; <i8> [#uses=1]
+	%B = sub i8 %A, 1		; <i8> [#uses=2]
+	%C = mul i8 %B, %B		; <i8> [#uses=2]
+	%D = sdiv i8 %C, %C		; <i8> [#uses=2]
+	%E = srem i8 %D, %D		; <i8> [#uses=0]
+	%F = udiv i8 5, 6		; <i8> [#uses=0]
+	%G = urem i8 6, 5		; <i8> [#uses=0]
+	%A.upgrd.1 = add i16 0, 12		; <i16> [#uses=1]
+	%B.upgrd.2 = sub i16 %A.upgrd.1, 1		; <i16> [#uses=2]
+	%C.upgrd.3 = mul i16 %B.upgrd.2, %B.upgrd.2		; <i16> [#uses=2]
+	%D.upgrd.4 = sdiv i16 %C.upgrd.3, %C.upgrd.3		; <i16> [#uses=2]
+	%E.upgrd.5 = srem i16 %D.upgrd.4, %D.upgrd.4		; <i16> [#uses=0]
+	%F.upgrd.6 = udiv i16 5, 6		; <i16> [#uses=0]
+	%G.upgrd.7 = urem i32 6, 5		; <i32> [#uses=0]
+	%A.upgrd.8 = add i32 0, 12		; <i32> [#uses=1]
+	%B.upgrd.9 = sub i32 %A.upgrd.8, 1		; <i32> [#uses=2]
+	%C.upgrd.10 = mul i32 %B.upgrd.9, %B.upgrd.9		; <i32> [#uses=2]
+	%D.upgrd.11 = sdiv i32 %C.upgrd.10, %C.upgrd.10		; <i32> [#uses=2]
+	%E.upgrd.12 = srem i32 %D.upgrd.11, %D.upgrd.11		; <i32> [#uses=0]
+	%F.upgrd.13 = udiv i32 5, 6		; <i32> [#uses=0]
+	%G1 = urem i32 6, 5		; <i32> [#uses=0]
+	%A.upgrd.14 = add i64 0, 12		; <i64> [#uses=1]
+	%B.upgrd.15 = sub i64 %A.upgrd.14, 1		; <i64> [#uses=2]
+	%C.upgrd.16 = mul i64 %B.upgrd.15, %B.upgrd.15		; <i64> [#uses=2]
+	%D.upgrd.17 = sdiv i64 %C.upgrd.16, %C.upgrd.16		; <i64> [#uses=2]
+	%E.upgrd.18 = srem i64 %D.upgrd.17, %D.upgrd.17		; <i64> [#uses=0]
+	%F.upgrd.19 = udiv i64 5, 6		; <i64> [#uses=0]
+	%G.upgrd.20 = urem i64 6, 5		; <i64> [#uses=0]
+	ret i32 0
+}
+
diff --git a/final/test/ExecutionEngine/test-branch.ll b/final/test/ExecutionEngine/test-branch.ll
new file mode 100644
index 00000000000..7d4fd560592
--- /dev/null
+++ b/final/test/ExecutionEngine/test-branch.ll
@@ -0,0 +1,12 @@
+; RUN: lli %s > /dev/null
+
+; test unconditional branch
+define i32 @main() {
+	br label %Test
+Test:		; preds = %Test, %0
+	%X = icmp eq i32 0, 4		; <i1> [#uses=1]
+	br i1 %X, label %Test, label %Label
+Label:		; preds = %Test
+	ret i32 0
+}
+
diff --git a/final/test/ExecutionEngine/test-call.ll b/final/test/ExecutionEngine/test-call.ll
new file mode 100644
index 00000000000..c4131a20f79
--- /dev/null
+++ b/final/test/ExecutionEngine/test-call.ll
@@ -0,0 +1,22 @@
+; RUN: lli %s > /dev/null
+; XFAIL: arm
+
+declare void @exit(i32)
+
+define i32 @test(i8 %C, i16 %S) {
+	%X = trunc i16 %S to i8		; <i8> [#uses=1]
+	%Y = zext i8 %X to i32		; <i32> [#uses=1]
+	ret i32 %Y
+}
+
+define void @FP(void (i32)* %F) {
+	%X = call i32 @test( i8 123, i16 1024 )		; <i32> [#uses=1]
+	call void %F( i32 %X )
+	ret void
+}
+
+define i32 @main() {
+	call void @FP( void (i32)* @exit )
+	ret i32 1
+}
+
diff --git a/final/test/ExecutionEngine/test-cast.ll b/final/test/ExecutionEngine/test-cast.ll
new file mode 100644
index 00000000000..f41448cc60a
--- /dev/null
+++ b/final/test/ExecutionEngine/test-cast.ll
@@ -0,0 +1,109 @@
+; RUN: lli %s > /dev/null
+
+define i32 @foo() {
+	ret i32 0
+}
+
+define i32 @main() {
+	icmp ne i1 true, false		; <i1>:1 [#uses=0]
+	zext i1 true to i8		; <i8>:2 [#uses=0]
+	zext i1 true to i8		; <i8>:3 [#uses=0]
+	zext i1 true to i16		; <i16>:4 [#uses=0]
+	zext i1 true to i16		; <i16>:5 [#uses=0]
+	zext i1 true to i32		; <i32>:6 [#uses=0]
+	zext i1 true to i32		; <i32>:7 [#uses=0]
+	zext i1 true to i64		; <i64>:8 [#uses=0]
+	zext i1 true to i64		; <i64>:9 [#uses=0]
+	uitofp i1 true to float		; <float>:10 [#uses=0]
+	uitofp i1 true to double		; <double>:11 [#uses=0]
+	icmp ne i8 0, 0		; <i1>:12 [#uses=0]
+	icmp ne i8 1, 0		; <i1>:13 [#uses=0]
+	bitcast i8 0 to i8		; <i8>:14 [#uses=0]
+	bitcast i8 -1 to i8		; <i8>:15 [#uses=0]
+	sext i8 4 to i16		; <i16>:16 [#uses=0]
+	sext i8 4 to i16		; <i16>:17 [#uses=0]
+	sext i8 4 to i64		; <i64>:18 [#uses=0]
+	sext i8 4 to i64		; <i64>:19 [#uses=0]
+	sitofp i8 4 to float		; <float>:20 [#uses=0]
+	sitofp i8 4 to double		; <double>:21 [#uses=0]
+	icmp ne i8 0, 0		; <i1>:22 [#uses=0]
+	icmp ne i8 1, 0		; <i1>:23 [#uses=0]
+	bitcast i8 0 to i8		; <i8>:24 [#uses=0]
+	bitcast i8 1 to i8		; <i8>:25 [#uses=0]
+	zext i8 4 to i16		; <i16>:26 [#uses=0]
+	zext i8 4 to i16		; <i16>:27 [#uses=0]
+	zext i8 4 to i64		; <i64>:28 [#uses=0]
+	zext i8 4 to i64		; <i64>:29 [#uses=0]
+	uitofp i8 0 to float		; <float>:30 [#uses=0]
+	uitofp i8 0 to double		; <double>:31 [#uses=0]
+	icmp ne i16 1, 0		; <i1>:32 [#uses=0]
+	trunc i16 -1 to i8		; <i8>:33 [#uses=0]
+	trunc i16 255 to i8		; <i8>:34 [#uses=0]
+	bitcast i16 0 to i16		; <i16>:35 [#uses=0]
+	bitcast i16 0 to i16		; <i16>:36 [#uses=0]
+	sext i16 0 to i64		; <i64>:37 [#uses=0]
+	sext i16 0 to i64		; <i64>:38 [#uses=0]
+	sitofp i16 0 to float		; <float>:39 [#uses=0]
+	sitofp i16 0 to double		; <double>:40 [#uses=0]
+	icmp ne i16 1, 0		; <i1>:41 [#uses=0]
+	trunc i16 1 to i8		; <i8>:42 [#uses=0]
+	trunc i16 255 to i8		; <i8>:43 [#uses=0]
+	bitcast i16 0 to i16		; <i16>:44 [#uses=0]
+	bitcast i16 0 to i16		; <i16>:45 [#uses=0]
+	zext i16 0 to i64		; <i64>:46 [#uses=0]
+	zext i16 0 to i64		; <i64>:47 [#uses=0]
+	uitofp i16 0 to float		; <float>:48 [#uses=0]
+	uitofp i16 0 to double		; <double>:49 [#uses=0]
+	icmp ne i32 6, 0		; <i1>:50 [#uses=0]
+	trunc i32 -6 to i8		; <i8>:51 [#uses=0]
+	trunc i32 6 to i8		; <i8>:52 [#uses=0]
+	trunc i32 6 to i16		; <i16>:53 [#uses=0]
+	bitcast i32 0 to i32		; <i32>:54 [#uses=0]
+	sext i32 0 to i64		; <i64>:55 [#uses=0]
+	sext i32 0 to i64		; <i64>:56 [#uses=0]
+	sitofp i32 0 to float		; <float>:57 [#uses=0]
+	sitofp i32 0 to double		; <double>:58 [#uses=0]
+	icmp ne i32 6, 0		; <i1>:59 [#uses=0]
+	trunc i32 7 to i8		; <i8>:60 [#uses=0]
+	trunc i32 8 to i8		; <i8>:61 [#uses=0]
+	trunc i32 9 to i16		; <i16>:62 [#uses=0]
+	bitcast i32 10 to i32		; <i32>:63 [#uses=0]
+	zext i32 0 to i64		; <i64>:64 [#uses=0]
+	zext i32 0 to i64		; <i64>:65 [#uses=0]
+	uitofp i32 0 to float		; <float>:66 [#uses=0]
+	uitofp i32 0 to double		; <double>:67 [#uses=0]
+	icmp ne i64 0, 0		; <i1>:68 [#uses=0]
+	trunc i64 0 to i8		; <i8>:69 [#uses=0]
+	trunc i64 0 to i8		; <i8>:70 [#uses=0]
+	trunc i64 0 to i16		; <i16>:71 [#uses=0]
+	trunc i64 0 to i16		; <i16>:72 [#uses=0]
+	trunc i64 0 to i32		; <i32>:73 [#uses=0]
+	trunc i64 0 to i32		; <i32>:74 [#uses=0]
+	bitcast i64 0 to i64		; <i64>:75 [#uses=0]
+	bitcast i64 0 to i64		; <i64>:76 [#uses=0]
+	sitofp i64 0 to float		; <float>:77 [#uses=0]
+	sitofp i64 0 to double		; <double>:78 [#uses=0]
+	icmp ne i64 1, 0		; <i1>:79 [#uses=0]
+	trunc i64 1 to i8		; <i8>:80 [#uses=0]
+	trunc i64 1 to i8		; <i8>:81 [#uses=0]
+	trunc i64 1 to i16		; <i16>:82 [#uses=0]
+	trunc i64 1 to i16		; <i16>:83 [#uses=0]
+	trunc i64 1 to i32		; <i32>:84 [#uses=0]
+	trunc i64 1 to i32		; <i32>:85 [#uses=0]
+	bitcast i64 1 to i64		; <i64>:86 [#uses=0]
+	bitcast i64 1 to i64		; <i64>:87 [#uses=0]
+	uitofp i64 1 to float		; <float>:88 [#uses=0]
+	uitofp i64 0 to double		; <double>:89 [#uses=0]
+	bitcast float 0.000000e+00 to float		; <float>:90 [#uses=0]
+	fpext float 0.000000e+00 to double		; <double>:91 [#uses=0]
+	fptosi double 0.000000e+00 to i8		; <i8>:92 [#uses=0]
+	fptoui double 0.000000e+00 to i8		; <i8>:93 [#uses=0]
+	fptosi double 0.000000e+00 to i16		; <i16>:94 [#uses=0]
+	fptoui double 0.000000e+00 to i16		; <i16>:95 [#uses=0]
+	fptosi double 0.000000e+00 to i32		; <i32>:96 [#uses=0]
+	fptoui double 0.000000e+00 to i32		; <i32>:97 [#uses=0]
+	fptosi double 0.000000e+00 to i64		; <i64>:98 [#uses=0]
+	fptrunc double 0.000000e+00 to float		; <float>:99 [#uses=0]
+	bitcast double 0.000000e+00 to double		; <double>:100 [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/ExecutionEngine/test-constantexpr.ll b/final/test/ExecutionEngine/test-constantexpr.ll
new file mode 100644
index 00000000000..d6d90e3e198
--- /dev/null
+++ b/final/test/ExecutionEngine/test-constantexpr.ll
@@ -0,0 +1,12 @@
+; RUN: lli %s > /dev/null
+
+; This tests to make sure that we can evaluate weird constant expressions
+
+@A = global i32 5		; <i32*> [#uses=1]
+@B = global i32 6		; <i32*> [#uses=1]
+
+define i32 @main() {
+	%A = or i1 false, icmp slt (i32* @A, i32* @B)		; <i1> [#uses=0]
+	ret i32 0
+}
+
diff --git a/final/test/ExecutionEngine/test-fp.ll b/final/test/ExecutionEngine/test-fp.ll
new file mode 100644
index 00000000000..f653660fb83
--- /dev/null
+++ b/final/test/ExecutionEngine/test-fp.ll
@@ -0,0 +1,23 @@
+; RUN: lli %s > /dev/null
+
+define double @test(double* %DP, double %Arg) {
+	%D = load double* %DP		; <double> [#uses=1]
+	%V = fadd double %D, 1.000000e+00		; <double> [#uses=2]
+	%W = fsub double %V, %V		; <double> [#uses=3]
+	%X = fmul double %W, %W		; <double> [#uses=2]
+	%Y = fdiv double %X, %X		; <double> [#uses=2]
+	%Z = frem double %Y, %Y		; <double> [#uses=3]
+	%Z1 = fdiv double %Z, %W		; <double> [#uses=0]
+	%Q = fadd double %Z, %Arg		; <double> [#uses=1]
+	%R = bitcast double %Q to double		; <double> [#uses=1]
+	store double %R, double* %DP
+	ret double %Z
+}
+
+define i32 @main() {
+	%X = alloca double		; <double*> [#uses=2]
+	store double 0.000000e+00, double* %X
+	call double @test( double* %X, double 2.000000e+00 )		; <double>:1 [#uses=0]
+	ret i32 0
+}
+
diff --git a/final/test/ExecutionEngine/test-loadstore.ll b/final/test/ExecutionEngine/test-loadstore.ll
new file mode 100644
index 00000000000..53155e8ac90
--- /dev/null
+++ b/final/test/ExecutionEngine/test-loadstore.ll
@@ -0,0 +1,31 @@
+; RUN: lli %s > /dev/null
+
+define void @test(i8* %P, i16* %P.upgrd.1, i32* %P.upgrd.2, i64* %P.upgrd.3) {
+	%V = load i8* %P		; <i8> [#uses=1]
+	store i8 %V, i8* %P
+	%V.upgrd.4 = load i16* %P.upgrd.1		; <i16> [#uses=1]
+	store i16 %V.upgrd.4, i16* %P.upgrd.1
+	%V.upgrd.5 = load i32* %P.upgrd.2		; <i32> [#uses=1]
+	store i32 %V.upgrd.5, i32* %P.upgrd.2
+	%V.upgrd.6 = load i64* %P.upgrd.3		; <i64> [#uses=1]
+	store i64 %V.upgrd.6, i64* %P.upgrd.3
+	ret void
+}
+
+define i32 @varalloca(i32 %Size) {
+        ;; Variable sized alloca
+	%X = alloca i32, i32 %Size		; <i32*> [#uses=2]
+	store i32 %Size, i32* %X
+	%Y = load i32* %X		; <i32> [#uses=1]
+	ret i32 %Y
+}
+
+define i32 @main() {
+	%A = alloca i8		; <i8*> [#uses=1]
+	%B = alloca i16		; <i16*> [#uses=1]
+	%C = alloca i32		; <i32*> [#uses=1]
+	%D = alloca i64		; <i64*> [#uses=1]
+	call void @test( i8* %A, i16* %B, i32* %C, i64* %D )
+	call i32 @varalloca( i32 7 )		; <i32>:1 [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/ExecutionEngine/test-logical.ll b/final/test/ExecutionEngine/test-logical.ll
new file mode 100644
index 00000000000..710763a30bd
--- /dev/null
+++ b/final/test/ExecutionEngine/test-logical.ll
@@ -0,0 +1,18 @@
+; RUN: lli %s > /dev/null
+
+define i32 @main() {
+	%A = and i8 4, 8		; <i8> [#uses=2]
+	%B = or i8 %A, 7		; <i8> [#uses=1]
+	%C = xor i8 %B, %A		; <i8> [#uses=0]
+	%A.upgrd.1 = and i16 4, 8		; <i16> [#uses=2]
+	%B.upgrd.2 = or i16 %A.upgrd.1, 7		; <i16> [#uses=1]
+	%C.upgrd.3 = xor i16 %B.upgrd.2, %A.upgrd.1		; <i16> [#uses=0]
+	%A.upgrd.4 = and i32 4, 8		; <i32> [#uses=2]
+	%B.upgrd.5 = or i32 %A.upgrd.4, 7		; <i32> [#uses=1]
+	%C.upgrd.6 = xor i32 %B.upgrd.5, %A.upgrd.4		; <i32> [#uses=0]
+	%A.upgrd.7 = and i64 4, 8		; <i64> [#uses=2]
+	%B.upgrd.8 = or i64 %A.upgrd.7, 7		; <i64> [#uses=1]
+	%C.upgrd.9 = xor i64 %B.upgrd.8, %A.upgrd.7		; <i64> [#uses=0]
+	ret i32 0
+}
+
diff --git a/final/test/ExecutionEngine/test-loop.ll b/final/test/ExecutionEngine/test-loop.ll
new file mode 100644
index 00000000000..f0e6f7a6f9c
--- /dev/null
+++ b/final/test/ExecutionEngine/test-loop.ll
@@ -0,0 +1,14 @@
+; RUN: lli %s > /dev/null
+
+define i32 @main() {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%I = phi i32 [ 0, %0 ], [ %i2, %Loop ]		; <i32> [#uses=1]
+	%i2 = add i32 %I, 1		; <i32> [#uses=2]
+	%C = icmp eq i32 %i2, 10		; <i1> [#uses=1]
+	br i1 %C, label %Out, label %Loop
+Out:		; preds = %Loop
+	ret i32 0
+}
+
diff --git a/final/test/ExecutionEngine/test-malloc.ll b/final/test/ExecutionEngine/test-malloc.ll
new file mode 100644
index 00000000000..b3400df4409
--- /dev/null
+++ b/final/test/ExecutionEngine/test-malloc.ll
@@ -0,0 +1,13 @@
+; RUN: lli %s > /dev/null
+
+define i32 @main() {
+	%X = malloc i32		; <i32*> [#uses=1]
+	%Y = malloc i32, i32 100		; <i32*> [#uses=1]
+	%u = add i32 1, 2		; <i32> [#uses=1]
+	%Z = malloc i32, i32 %u		; <i32*> [#uses=1]
+	free i32* %X
+	free i32* %Y
+	free i32* %Z
+	ret i32 0
+}
+
diff --git a/final/test/ExecutionEngine/test-phi.ll b/final/test/ExecutionEngine/test-phi.ll
new file mode 100644
index 00000000000..c5848a8b5cc
--- /dev/null
+++ b/final/test/ExecutionEngine/test-phi.ll
@@ -0,0 +1,34 @@
+; RUN: lli %s > /dev/null
+
+; test phi node
+@Y = global i32 6		; <i32*> [#uses=1]
+
+define void @blah(i32* %X) {
+; <label>:0
+	br label %T
+T:		; preds = %Dead, %0
+	phi i32* [ %X, %0 ], [ @Y, %Dead ]		; <i32*>:1 [#uses=0]
+	ret void
+Dead:		; No predecessors!
+	br label %T
+}
+
+define i32 @test(i1 %C) {
+; <label>:0
+	br i1 %C, label %T, label %T
+T:		; preds = %0, %0
+	%X = phi i32 [ 123, %0 ], [ 123, %0 ]		; <i32> [#uses=1]
+	ret i32 %X
+}
+
+define i32 @main() {
+; <label>:0
+	br label %Test
+Test:		; preds = %Dead, %0
+	%X = phi i32 [ 0, %0 ], [ %Y, %Dead ]		; <i32> [#uses=1]
+	ret i32 %X
+Dead:		; No predecessors!
+	%Y = ashr i32 12, 4		; <i32> [#uses=1]
+	br label %Test
+}
+
diff --git a/final/test/ExecutionEngine/test-ret.ll b/final/test/ExecutionEngine/test-ret.ll
new file mode 100644
index 00000000000..beec3996075
--- /dev/null
+++ b/final/test/ExecutionEngine/test-ret.ll
@@ -0,0 +1,46 @@
+; RUN: lli %s > /dev/null
+
+; test return instructions
+define void @test1() {
+	ret void
+}
+
+define i8 @test2() {
+	ret i8 1
+}
+
+define i8 @test3() {
+	ret i8 1
+}
+
+define i16 @test4() {
+	ret i16 -1
+}
+
+define i16 @test5() {
+	ret i16 -1
+}
+
+define i32 @main() {
+	ret i32 0
+}
+
+define i32 @test6() {
+	ret i32 4
+}
+
+define i64 @test7() {
+	ret i64 0
+}
+
+define i64 @test8() {
+	ret i64 0
+}
+
+define float @test9() {
+	ret float 1.000000e+00
+}
+
+define double @test10() {
+	ret double 2.000000e+00
+}
diff --git a/final/test/ExecutionEngine/test-setcond-fp.ll b/final/test/ExecutionEngine/test-setcond-fp.ll
new file mode 100644
index 00000000000..d1d6d05b335
--- /dev/null
+++ b/final/test/ExecutionEngine/test-setcond-fp.ll
@@ -0,0 +1,24 @@
+; RUN: lli %s > /dev/null
+
+
+define i32 @main() {
+	%double1 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=6]
+	%double2 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=6]
+	%float1 = fadd float 0.000000e+00, 0.000000e+00		; <float> [#uses=6]
+	%float2 = fadd float 0.000000e+00, 0.000000e+00		; <float> [#uses=6]
+	%test49 = fcmp oeq float %float1, %float2		; <i1> [#uses=0]
+	%test50 = fcmp oge float %float1, %float2		; <i1> [#uses=0]
+	%test51 = fcmp ogt float %float1, %float2		; <i1> [#uses=0]
+	%test52 = fcmp ole float %float1, %float2		; <i1> [#uses=0]
+	%test53 = fcmp olt float %float1, %float2		; <i1> [#uses=0]
+	%test54 = fcmp une float %float1, %float2		; <i1> [#uses=0]
+	%test55 = fcmp oeq double %double1, %double2		; <i1> [#uses=0]
+	%test56 = fcmp oge double %double1, %double2		; <i1> [#uses=0]
+	%test57 = fcmp ogt double %double1, %double2		; <i1> [#uses=0]
+	%test58 = fcmp ole double %double1, %double2		; <i1> [#uses=0]
+	%test59 = fcmp olt double %double1, %double2		; <i1> [#uses=0]
+	%test60 = fcmp une double %double1, %double2		; <i1> [#uses=0]
+	ret i32 0
+}
+
+
diff --git a/final/test/ExecutionEngine/test-setcond-int.ll b/final/test/ExecutionEngine/test-setcond-int.ll
new file mode 100644
index 00000000000..f59d325a5b5
--- /dev/null
+++ b/final/test/ExecutionEngine/test-setcond-int.ll
@@ -0,0 +1,69 @@
+; RUN: lli %s > /dev/null
+
+define i32 @main() {
+	%int1 = add i32 0, 0		; <i32> [#uses=6]
+	%int2 = add i32 0, 0		; <i32> [#uses=6]
+	%long1 = add i64 0, 0		; <i64> [#uses=6]
+	%long2 = add i64 0, 0		; <i64> [#uses=6]
+	%sbyte1 = add i8 0, 0		; <i8> [#uses=6]
+	%sbyte2 = add i8 0, 0		; <i8> [#uses=6]
+	%short1 = add i16 0, 0		; <i16> [#uses=6]
+	%short2 = add i16 0, 0		; <i16> [#uses=6]
+	%ubyte1 = add i8 0, 0		; <i8> [#uses=6]
+	%ubyte2 = add i8 0, 0		; <i8> [#uses=6]
+	%uint1 = add i32 0, 0		; <i32> [#uses=6]
+	%uint2 = add i32 0, 0		; <i32> [#uses=6]
+	%ulong1 = add i64 0, 0		; <i64> [#uses=6]
+	%ulong2 = add i64 0, 0		; <i64> [#uses=6]
+	%ushort1 = add i16 0, 0		; <i16> [#uses=6]
+	%ushort2 = add i16 0, 0		; <i16> [#uses=6]
+	%test1 = icmp eq i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test2 = icmp uge i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test3 = icmp ugt i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test4 = icmp ule i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test5 = icmp ult i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test6 = icmp ne i8 %ubyte1, %ubyte2		; <i1> [#uses=0]
+	%test7 = icmp eq i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test8 = icmp uge i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test9 = icmp ugt i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test10 = icmp ule i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test11 = icmp ult i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test12 = icmp ne i16 %ushort1, %ushort2		; <i1> [#uses=0]
+	%test13 = icmp eq i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test14 = icmp uge i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test15 = icmp ugt i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test16 = icmp ule i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test17 = icmp ult i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test18 = icmp ne i32 %uint1, %uint2		; <i1> [#uses=0]
+	%test19 = icmp eq i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test20 = icmp uge i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test21 = icmp ugt i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test22 = icmp ule i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test23 = icmp ult i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test24 = icmp ne i64 %ulong1, %ulong2		; <i1> [#uses=0]
+	%test25 = icmp eq i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test26 = icmp sge i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test27 = icmp sgt i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test28 = icmp sle i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test29 = icmp slt i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test30 = icmp ne i8 %sbyte1, %sbyte2		; <i1> [#uses=0]
+	%test31 = icmp eq i16 %short1, %short2		; <i1> [#uses=0]
+	%test32 = icmp sge i16 %short1, %short2		; <i1> [#uses=0]
+	%test33 = icmp sgt i16 %short1, %short2		; <i1> [#uses=0]
+	%test34 = icmp sle i16 %short1, %short2		; <i1> [#uses=0]
+	%test35 = icmp slt i16 %short1, %short2		; <i1> [#uses=0]
+	%test36 = icmp ne i16 %short1, %short2		; <i1> [#uses=0]
+	%test37 = icmp eq i32 %int1, %int2		; <i1> [#uses=0]
+	%test38 = icmp sge i32 %int1, %int2		; <i1> [#uses=0]
+	%test39 = icmp sgt i32 %int1, %int2		; <i1> [#uses=0]
+	%test40 = icmp sle i32 %int1, %int2		; <i1> [#uses=0]
+	%test41 = icmp slt i32 %int1, %int2		; <i1> [#uses=0]
+	%test42 = icmp ne i32 %int1, %int2		; <i1> [#uses=0]
+	%test43 = icmp eq i64 %long1, %long2		; <i1> [#uses=0]
+	%test44 = icmp sge i64 %long1, %long2		; <i1> [#uses=0]
+	%test45 = icmp sgt i64 %long1, %long2		; <i1> [#uses=0]
+	%test46 = icmp sle i64 %long1, %long2		; <i1> [#uses=0]
+	%test47 = icmp slt i64 %long1, %long2		; <i1> [#uses=0]
+	%test48 = icmp ne i64 %long1, %long2		; <i1> [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/ExecutionEngine/test-shift.ll b/final/test/ExecutionEngine/test-shift.ll
new file mode 100644
index 00000000000..d0fb90a4275
--- /dev/null
+++ b/final/test/ExecutionEngine/test-shift.ll
@@ -0,0 +1,32 @@
+; RUN: lli %s > /dev/null
+
+define i32 @main() {
+	%shamt = add i8 0, 1		; <i8> [#uses=8]
+	%shift.upgrd.1 = zext i8 %shamt to i32		; <i32> [#uses=1]
+	%t1.s = shl i32 1, %shift.upgrd.1		; <i32> [#uses=0]
+	%t2.s = shl i32 1, 4		; <i32> [#uses=0]
+	%shift.upgrd.2 = zext i8 %shamt to i32		; <i32> [#uses=1]
+	%t1 = shl i32 1, %shift.upgrd.2		; <i32> [#uses=0]
+	%t2 = shl i32 1, 5		; <i32> [#uses=0]
+	%t2.s.upgrd.3 = shl i64 1, 4		; <i64> [#uses=0]
+	%t2.upgrd.4 = shl i64 1, 5		; <i64> [#uses=0]
+	%shift.upgrd.5 = zext i8 %shamt to i32		; <i32> [#uses=1]
+	%tr1.s = ashr i32 1, %shift.upgrd.5		; <i32> [#uses=0]
+	%tr2.s = ashr i32 1, 4		; <i32> [#uses=0]
+	%shift.upgrd.6 = zext i8 %shamt to i32		; <i32> [#uses=1]
+	%tr1 = lshr i32 1, %shift.upgrd.6		; <i32> [#uses=0]
+	%tr2 = lshr i32 1, 5		; <i32> [#uses=0]
+	%tr1.l = ashr i64 1, 4		; <i64> [#uses=0]
+	%shift.upgrd.7 = zext i8 %shamt to i64		; <i64> [#uses=1]
+	%tr2.l = ashr i64 1, %shift.upgrd.7		; <i64> [#uses=0]
+	%tr3.l = shl i64 1, 4		; <i64> [#uses=0]
+	%shift.upgrd.8 = zext i8 %shamt to i64		; <i64> [#uses=1]
+	%tr4.l = shl i64 1, %shift.upgrd.8		; <i64> [#uses=0]
+	%tr1.u = lshr i64 1, 5		; <i64> [#uses=0]
+	%shift.upgrd.9 = zext i8 %shamt to i64		; <i64> [#uses=1]
+	%tr2.u = lshr i64 1, %shift.upgrd.9		; <i64> [#uses=0]
+	%tr3.u = shl i64 1, 5		; <i64> [#uses=0]
+	%shift.upgrd.10 = zext i8 %shamt to i64		; <i64> [#uses=1]
+	%tr4.u = shl i64 1, %shift.upgrd.10		; <i64> [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/Feature/NamedMDNode.ll b/final/test/Feature/NamedMDNode.ll
new file mode 100644
index 00000000000..0c6bcd9abfe
--- /dev/null
+++ b/final/test/Feature/NamedMDNode.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llvm-dis | grep "llvm.stuff = "
+
+;; Simple NamedMDNode
+!0 = metadata !{i32 42}
+!1 = metadata !{metadata !"foo"}
+!llvm.stuff = !{!0, !1}
+
+!samename = !{!0, !1}
+declare void @samename()
diff --git a/final/test/Feature/NamedMDNode2.ll b/final/test/Feature/NamedMDNode2.ll
new file mode 100644
index 00000000000..0524dd27a4a
--- /dev/null
+++ b/final/test/Feature/NamedMDNode2.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s -o /dev/null
+; PR4654
+
+
+@foo = constant i1 false
+!0 = metadata !{i1 false}
+!a = !{!0}
diff --git a/final/test/Feature/README.txt b/final/test/Feature/README.txt
new file mode 100644
index 00000000000..5947bb21f0b
--- /dev/null
+++ b/final/test/Feature/README.txt
@@ -0,0 +1,6 @@
+This directory contains test cases for individual source features of LLVM.
+It is designed to make sure that the major components of LLVM support all of the
+features of LLVM, for very small examples.  Entire programs should not go here.
+
+Regression tests for individual bug fixes should go into the test/Regression dir.
+
diff --git a/final/test/Feature/aliases.ll b/final/test/Feature/aliases.ll
new file mode 100644
index 00000000000..d44dff4c43c
--- /dev/null
+++ b/final/test/Feature/aliases.ll
@@ -0,0 +1,32 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+@bar = external global i32
+@foo1 = alias i32* @bar
+@foo2 = alias i32* @bar
+@foo3 = alias i32* @foo2
+
+%FunTy = type i32()
+
+declare i32 @foo_f()
+@bar_f = alias weak %FunTy* @foo_f
+@bar_ff = alias i32()* @bar_f
+
+@bar_i = alias internal i32* @bar
+
+@A = alias bitcast (i32* @bar to i64*)
+
+define i32 @test() {
+entry:
+   %tmp = load i32* @foo1
+   %tmp1 = load i32* @foo2
+   %tmp0 = load i32* @bar_i
+   %tmp2 = call i32 @foo_f()
+   %tmp3 = add i32 %tmp, %tmp2
+   %tmp4 = call %FunTy* @bar_f()
+   %tmp5 = add i32 %tmp3, %tmp4
+   %tmp6 = add i32 %tmp1, %tmp5
+   %tmp7 = add i32 %tmp6, %tmp0
+   ret i32 %tmp7
+}
diff --git a/final/test/Feature/alignment.ll b/final/test/Feature/alignment.ll
new file mode 100644
index 00000000000..ef35a1344a2
--- /dev/null
+++ b/final/test/Feature/alignment.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+@X = global i32 4, align 16             ; <i32*> [#uses=0]
+
+define i32* @test() align 32 {
+        %X = alloca i32, align 4                ; <i32*> [#uses=1]
+        %Y = alloca i32, i32 42, align 16               ; <i32*> [#uses=0]
+        %Z = alloca i32         ; <i32*> [#uses=0]
+        ret i32* %X
+}
+
+define i32* @test2() {
+        %X = malloc i32, align 4                ; <i32*> [#uses=1]
+        %Y = malloc i32, i32 42, align 16               ; <i32*> [#uses=0]
+        %Z = malloc i32         ; <i32*> [#uses=0]
+        %T = malloc i32, align 256              ; <i32*> [#uses=0]
+        ret i32* %X
+}
+
+define void @test3() alignstack(16) {
+        ret void
+}
+
diff --git a/final/test/Feature/basictest.ll b/final/test/Feature/basictest.ll
new file mode 100644
index 00000000000..2303b593ffe
--- /dev/null
+++ b/final/test/Feature/basictest.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+; Test "stripped" format where nothing is symbolic... this is how the bytecode
+; format looks anyways (except for negative vs positive offsets)...
+;
+
+define void @void(i32, i32) {
+        add i32 0, 0            ; <i32>:3 [#uses=2]
+        sub i32 0, 4            ; <i32>:4 [#uses=2]
+        br label %5
+
+; <label>:5             ; preds = %5, %2
+        add i32 %0, %1          ; <i32>:6 [#uses=2]
+        sub i32 %6, %4          ; <i32>:7 [#uses=1]
+        icmp sle i32 %7, %3             ; <i1>:8 [#uses=1]
+        br i1 %8, label %9, label %5
+
+; <label>:9             ; preds = %5
+        add i32 %0, %1          ; <i32>:10 [#uses=0]
+        sub i32 %6, %4          ; <i32>:11 [#uses=1]
+        icmp sle i32 %11, %3            ; <i1>:12 [#uses=0]
+        ret void
+}
+
+; This function always returns zero
+define i32 @zarro() {
+Startup:
+        ret i32 0
+}
diff --git a/final/test/Feature/callingconventions.ll b/final/test/Feature/callingconventions.ll
new file mode 100644
index 00000000000..d2e9de404fb
--- /dev/null
+++ b/final/test/Feature/callingconventions.ll
@@ -0,0 +1,50 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+define fastcc void @foo() {
+        ret void
+}
+
+define coldcc void @bar() {
+        call fastcc void @foo( )
+        ret void
+}
+
+define void @structret({ i8 }* sret  %P) {
+        call void @structret( { i8 }* sret  %P )
+        ret void
+}
+
+define void @foo2() {
+        ret void
+}
+
+define coldcc void @bar2() {
+        call fastcc void @foo( )
+        ret void
+}
+
+define cc42 void @bar3() {
+        invoke fastcc void @foo( )
+                        to label %Ok unwind label %U
+
+Ok:             ; preds = %0
+        ret void
+
+U:              ; preds = %0
+        unwind
+}
+
+define void @bar4() {
+        call cc42 void @bar( )
+        invoke cc42 void @bar3( )
+                        to label %Ok unwind label %U
+
+Ok:             ; preds = %0
+        ret void
+
+U:              ; preds = %0
+        unwind
+}
+
diff --git a/final/test/Feature/calltest.ll b/final/test/Feature/calltest.ll
new file mode 100644
index 00000000000..feafd3cd208
--- /dev/null
+++ b/final/test/Feature/calltest.ll
@@ -0,0 +1,32 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+%FunTy = type i32 (i32)
+
+declare i32 @test(i32)   ; Test forward declaration merging
+
+define void @invoke(%FunTy* %x) {
+        %foo = call i32 %x( i32 123 )           ; <i32> [#uses=0]
+        %foo2 = tail call i32 %x( i32 123 )             ; <i32> [#uses=0]
+        ret void
+}
+
+define i32 @main(i32 %argc) {
+        %retval = call i32 @test( i32 %argc )           ; <i32> [#uses=2]
+        %two = add i32 %retval, %retval         ; <i32> [#uses=1]
+        %retval2 = invoke i32 @test( i32 %argc )
+                        to label %Next unwind label %Error              ; <i32> [#uses=1]
+
+Next:           ; preds = %0
+        %two2 = add i32 %two, %retval2          ; <i32> [#uses=1]
+        call void @invoke( %FunTy* @test )
+        ret i32 %two2
+
+Error:          ; preds = %0
+        ret i32 -1
+}
+
+define i32 @test(i32 %i0) {
+        ret i32 %i0
+}
diff --git a/final/test/Feature/casttest.ll b/final/test/Feature/casttest.ll
new file mode 100644
index 00000000000..d9c22ffd180
--- /dev/null
+++ b/final/test/Feature/casttest.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+define i16 @FunFunc(i64 %x, i8 %z) {
+bb0:
+        %cast110 = sext i8 %z to i16            ; <i16> [#uses=1]
+        %cast10 = trunc i64 %x to i16           ; <i16> [#uses=1]
+        %reg109 = add i16 %cast110, %cast10             ; <i16> [#uses=1]
+        ret i16 %reg109
+}
+
diff --git a/final/test/Feature/cfgstructures.ll b/final/test/Feature/cfgstructures.ll
new file mode 100644
index 00000000000..e667f6d7582
--- /dev/null
+++ b/final/test/Feature/cfgstructures.ll
@@ -0,0 +1,53 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+;; This is an irreducible flow graph
+define void @irreducible(i1 %cond) {
+        br i1 %cond, label %X, label %Y
+
+X:              ; preds = %Y, %0
+        br label %Y
+
+Y:              ; preds = %X, %0
+        br label %X
+}
+
+;; This is a pair of loops that share the same header
+define void @sharedheader(i1 %cond) {
+        br label %A
+
+A:              ; preds = %Y, %X, %0
+        br i1 %cond, label %X, label %Y
+
+X:              ; preds = %A
+        br label %A
+
+Y:              ; preds = %A
+        br label %A
+}
+
+
+;; This is a simple nested loop
+define void @nested(i1 %cond1, i1 %cond2, i1 %cond3) {
+        br label %Loop1
+
+Loop1:          ; preds = %L2Exit, %0
+        br label %Loop2
+
+Loop2:          ; preds = %L3Exit, %Loop1
+        br label %Loop3
+
+Loop3:          ; preds = %Loop3, %Loop2
+        br i1 %cond3, label %Loop3, label %L3Exit
+
+L3Exit:         ; preds = %Loop3
+        br i1 %cond2, label %Loop2, label %L2Exit
+
+L2Exit:         ; preds = %L3Exit
+        br i1 %cond1, label %Loop1, label %L1Exit
+
+L1Exit:         ; preds = %L2Exit
+        ret void
+}
+
diff --git a/final/test/Feature/constexpr.ll b/final/test/Feature/constexpr.ll
new file mode 100644
index 00000000000..13e6f365aab
--- /dev/null
+++ b/final/test/Feature/constexpr.ll
@@ -0,0 +1,80 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+; This testcase is for testing expressions constructed from
+; constant values, including constant pointers to globals.
+;
+
+;;-------------------------------
+;; Test constant cast expressions
+;;-------------------------------
+
+global i64 u0x00001     ; hexadecimal unsigned integer constants
+global i64  s0x0012312   ; hexadecimal signed integer constants
+
+@t2 = global i32* @t1                             ;; Forward reference without cast
+@t3 = global i32* bitcast (i32* @t1 to i32*)       ;; Forward reference with cast
+@t1 = global i32 4                                ;; i32* @0
+@t4 = global i32** bitcast (i32** @t3 to i32**)     ;; Cast of a previous cast
+@t5 = global i32** @t3                           ;; Reference to a previous cast
+@t6 = global i32*** @t4                           ;; Different ref. to a previous cast
+@t7 = global float* inttoptr (i32 12345678 to float*) ;; Cast ordinary value to ptr
+@t9 = global i32 bitcast (float bitcast (i32 8 to float) to i32) ;; Nested cast expression
+
+global i32* bitcast (float* @4 to i32*)   ;; Forward numeric reference
+global float* @4                       ;; Duplicate forward numeric reference
+global float 0.0
+
+
+;;---------------------------------------------------
+;; Test constant getelementpr expressions for arrays
+;;---------------------------------------------------
+
+@array  = constant [2 x i32] [ i32 12, i32 52 ]
+@arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)    ;; i32* &@array[0][0]
+@arrayPtr5 = global i32** getelementptr (i32** @arrayPtr, i64 5)    ;; i32* &@arrayPtr[5]
+
+@somestr = constant [11x i8] c"hello world"
+@char5  = global i8* getelementptr([11x i8]* @somestr, i64 0, i64 5)
+
+;; cast of getelementptr
+@char8a = global i32* bitcast (i8* getelementptr([11x i8]* @somestr, i64 0, i64 8) to i32*)
+
+;; getelementptr containing casts
+@char8b = global i8* getelementptr([11x i8]* @somestr, i64 sext (i8 0 to i64), i64 sext (i8 8 to i64))
+
+;;-------------------------------------------------------
+;; TODO: Test constant getelementpr expressions for structures
+;;-------------------------------------------------------
+
+%SType  = type { i32 , {float, {i8} }, i64 } ;; struct containing struct
+%SAType = type { i32 , {[2x float], i64} } ;; struct containing array
+
+@S1 = global %SType* null			;; Global initialized to NULL
+@S2c = constant %SType { i32 1, {float,{i8}} {float 2.0, {i8} {i8 3}}, i64 4}
+
+@S3c = constant %SAType { i32 1, {[2x float], i64} {[2x float] [float 2.0, float 3.0], i64 4} }
+
+@S1ptr = global %SType** @S1		    ;; Ref. to global S1
+@S2  = global %SType* @S2c		    ;; Ref. to constant S2
+@S3  = global %SAType* @S3c		    ;; Ref. to constant S3
+
+					    ;; Pointer to float (**@S1).1.0
+@S1fld1a = global float* getelementptr (%SType* @S2c, i64 0, i32 1, i32 0)
+					    ;; Another ptr to the same!
+@S1fld1b = global float* getelementptr (%SType* @S2c, i64 0, i32 1, i32 0)
+
+@S1fld1bptr = global float** @S1fld1b	    ;; Ref. to previous pointer
+
+					    ;; Pointer to i8 (**@S2).1.1.0
+@S2fld3 = global i8* getelementptr (%SType* @S2c, i64 0, i32 1, i32 1, i32 0) 
+
+					    ;; Pointer to float (**@S2).1.0[0]
+;@S3fld3 = global float* getelementptr (%SAType** @S3, i64 0, i64 0, i32 1, i32 0, i64 0) 
+
+;;---------------------------------------------------------
+;; TODO: Test constant expressions for unary and binary operators
+;;---------------------------------------------------------
+
+;;---------------------------------------------------
diff --git a/final/test/Feature/constpointer.ll b/final/test/Feature/constpointer.ll
new file mode 100644
index 00000000000..5c1bed194bd
--- /dev/null
+++ b/final/test/Feature/constpointer.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+; This testcase is primarily used for testing that global values can be used as 
+; constant pointer initializers.  This is tricky because they can be forward
+; declared and involves an icky bytecode encoding.  There is no meaningful 
+; optimization that can be performed on this file, it is just here to test 
+; assembly and disassembly.
+;
+
+
+@t3 = global i32* @t1           ;; Forward reference
+@t1 = global i32 4
+@t4 = global i32** @t3		 ;; reference to reference
+
+@t2 = global i32* @t1
+
+global float * @2                ;; Forward numeric reference
+global float * @2                ;; Duplicate forward numeric reference
+global float 0.0
+global float * @2                ;; Numeric reference
+
+
+@fptr = global void() * @f       ;; Forward ref method defn
+declare void @f()               ;; External method
+
+@sptr1   = global [11x i8]* @somestr		;; Forward ref to a constant
+@somestr = constant [11x i8] c"hello world"
+@sptr2   = global [11x i8]* @somestr
+
diff --git a/final/test/Feature/dg.exp b/final/test/Feature/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Feature/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Feature/escaped_label.ll b/final/test/Feature/escaped_label.ll
new file mode 100644
index 00000000000..7f5f6197ebd
--- /dev/null
+++ b/final/test/Feature/escaped_label.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+define i32 @foo() {
+        br label %"foo`~!@#$%^&*()-_=+{}[]\\\\|;:',<.>/?"
+
+"foo`~!@#$%^&*()-_=+{}[]\\\\|;:',<.>/?":                ; preds = %0
+        ret i32 17
+}
+
diff --git a/final/test/Feature/float.ll b/final/test/Feature/float.ll
new file mode 100644
index 00000000000..6c6c5dd5397
--- /dev/null
+++ b/final/test/Feature/float.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+@F1     = global float 0x4010000000000000
+@D1     = global double 0x4010000000000000
diff --git a/final/test/Feature/fold-fpcast.ll b/final/test/Feature/fold-fpcast.ll
new file mode 100644
index 00000000000..cdf8da69f68
--- /dev/null
+++ b/final/test/Feature/fold-fpcast.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | llvm-dis | not grep bitcast
+
+define i32 @test1() {
+   ret i32 bitcast(float 0x400D9999A0000000 to i32)
+}
+
+define float @test2() {
+  ret float bitcast(i32 17 to float)
+}
+
+define i64 @test3() {
+  ret i64 bitcast (double 0x400921FB4D12D84A to i64)
+}
+
+define double @test4() {
+  ret double bitcast (i64 42 to double)
+}
+
diff --git a/final/test/Feature/forwardreftest.ll b/final/test/Feature/forwardreftest.ll
new file mode 100644
index 00000000000..26d214ae883
--- /dev/null
+++ b/final/test/Feature/forwardreftest.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+%myty = type i32 
+%myfn = type float (i32,double,i32,i16)
+type i32(%myfn*)
+type i32(i32)
+type i32(i32(i32)*)
+
+  %thisfuncty = type i32 (i32) *
+
+declare void @F(%thisfuncty, %thisfuncty, %thisfuncty)
+
+define i32 @zarro(i32 %Func) {
+Startup:
+        add i32 0, 10           ; <i32>:0 [#uses=0]
+        ret i32 0
+}
+
+define i32 @test(i32) {
+        call void @F( %thisfuncty @zarro, %thisfuncty @test, %thisfuncty @foozball )
+        ret i32 0
+}
+
+define i32 @foozball(i32) {
+        ret i32 0
+}
+
diff --git a/final/test/Feature/global_section.ll b/final/test/Feature/global_section.ll
new file mode 100644
index 00000000000..b8f5eb1b666
--- /dev/null
+++ b/final/test/Feature/global_section.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+@X = global i32 4, section "foo", align 16              ; <i32*> [#uses=0]
+
+define void @test() section "bar" {
+        ret void
+}
+
diff --git a/final/test/Feature/globalredefinition.ll b/final/test/Feature/globalredefinition.ll
new file mode 100644
index 00000000000..42e2d1aeee7
--- /dev/null
+++ b/final/test/Feature/globalredefinition.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+; Test forward references and redefinitions of globals
+
+@A = global i32* @B             ; <i32**> [#uses=0]
+@B = global i32 7               ; <i32*> [#uses=1]
+
+declare void @X()
+
+declare void @X()
+
+define void @X() {
+  ret void
+}
+
+declare void @X()
diff --git a/final/test/Feature/globalredefinition3.ll b/final/test/Feature/globalredefinition3.ll
new file mode 100644
index 00000000000..5a5b3f1f89a
--- /dev/null
+++ b/final/test/Feature/globalredefinition3.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as %s -o /dev/null |& grep {redefinition of global '@B'}
+
+@B = global i32 7
+@B = global i32 7
diff --git a/final/test/Feature/globalvars.ll b/final/test/Feature/globalvars.ll
new file mode 100644
index 00000000000..9a23775269b
--- /dev/null
+++ b/final/test/Feature/globalvars.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+@MyVar = external global i32            ; <i32*> [#uses=1]
+@MyIntList = external global { \2*, i32 }               ; <{ \2*, i32 }*> [#uses=1]
+external global i32             ; <i32*>:0 [#uses=0]
+@AConst = constant i32 123              ; <i32*> [#uses=0]
+@AString = constant [4 x i8] c"test"            ; <[4 x i8]*> [#uses=0]
+@ZeroInit = global { [100 x i32], [40 x float] } zeroinitializer                ; <{ [100 x i32], [40 x float] }*> [#uses=0]
+
+define i32 @foo(i32 %blah) {
+        store i32 5, i32* @MyVar
+        %idx = getelementptr { \2*, i32 }* @MyIntList, i64 0, i32 1             ; <i32*> [#uses=1]
+        store i32 12, i32* %idx
+        ret i32 %blah
+}
+
diff --git a/final/test/Feature/indirectcall.ll b/final/test/Feature/indirectcall.ll
new file mode 100644
index 00000000000..c1cf39f3374
--- /dev/null
+++ b/final/test/Feature/indirectcall.ll
@@ -0,0 +1,49 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+declare i32 @atoi(i8*)
+
+define i64 @fib(i64 %n) {
+        icmp ult i64 %n, 2              ; <i1>:1 [#uses=1]
+        br i1 %1, label %BaseCase, label %RecurseCase
+
+BaseCase:               ; preds = %0
+        ret i64 1
+
+RecurseCase:            ; preds = %0
+        %n2 = sub i64 %n, 2             ; <i64> [#uses=1]
+        %n1 = sub i64 %n, 1             ; <i64> [#uses=1]
+        %f2 = call i64 @fib( i64 %n2 )          ; <i64> [#uses=1]
+        %f1 = call i64 @fib( i64 %n1 )          ; <i64> [#uses=1]
+        %result = add i64 %f2, %f1              ; <i64> [#uses=1]
+        ret i64 %result
+}
+
+define i64 @realmain(i32 %argc, i8** %argv) {
+; <label>:0
+        icmp eq i32 %argc, 2            ; <i1>:1 [#uses=1]
+        br i1 %1, label %HasArg, label %Continue
+
+HasArg:         ; preds = %0
+        %n1 = add i32 1, 1              ; <i32> [#uses=1]
+        br label %Continue
+
+Continue:               ; preds = %HasArg, %0
+        %n = phi i32 [ %n1, %HasArg ], [ 1, %0 ]                ; <i32> [#uses=1]
+        %N = sext i32 %n to i64         ; <i64> [#uses=1]
+        %F = call i64 @fib( i64 %N )            ; <i64> [#uses=1]
+        ret i64 %F
+}
+
+define i64 @trampoline(i64 %n, i64 (i64)* %fibfunc) {
+        %F = call i64 %fibfunc( i64 %n )                ; <i64> [#uses=1]
+        ret i64 %F
+}
+
+define i32 @main() {
+        %Result = call i64 @trampoline( i64 10, i64 (i64)* @fib )               ; <i64> [#uses=1]
+        %Result.upgrd.1 = trunc i64 %Result to i32              ; <i32> [#uses=1]
+        ret i32 %Result.upgrd.1
+}
+
diff --git a/final/test/Feature/indirectcall2.ll b/final/test/Feature/indirectcall2.ll
new file mode 100644
index 00000000000..1b949fc00e9
--- /dev/null
+++ b/final/test/Feature/indirectcall2.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+define i64 @test(i64 %X) {
+        ret i64 %X
+}
+
+define i64 @fib(i64 %n) {
+; <label>:0
+        %T = icmp ult i64 %n, 2         ; <i1> [#uses=1]
+        br i1 %T, label %BaseCase, label %RecurseCase
+
+RecurseCase:            ; preds = %0
+        %result = call i64 @test( i64 %n )              ; <i64> [#uses=0]
+        br label %BaseCase
+
+BaseCase:               ; preds = %RecurseCase, %0
+        %X = phi i64 [ 1, %0 ], [ 2, %RecurseCase ]             ; <i64> [#uses=1]
+        ret i64 %X
+}
+
diff --git a/final/test/Feature/inlineasm.ll b/final/test/Feature/inlineasm.ll
new file mode 100644
index 00000000000..6be5722abfc
--- /dev/null
+++ b/final/test/Feature/inlineasm.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+module asm "this is an inline asm block"
+module asm "this is another inline asm block"
+
+define i32 @test() {
+        %X = call i32 asm "tricky here $0, $1", "=r,r"( i32 4 )         ; <i32> [#uses=1]
+        call void asm sideeffect "eieio", ""( )
+        ret i32 %X
+}
+
diff --git a/final/test/Feature/instructions.ll b/final/test/Feature/instructions.ll
new file mode 100644
index 00000000000..d0c303d7191
--- /dev/null
+++ b/final/test/Feature/instructions.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+define i32 @test_extractelement(<4 x i32> %V) {
+        %R = extractelement <4 x i32> %V, i32 1         ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define <4 x i32> @test_insertelement(<4 x i32> %V) {
+        %R = insertelement <4 x i32> %V, i32 0, i32 0           ; <<4 x i32>> [#uses=1]
+        ret <4 x i32> %R
+}
+
+define <4 x i32> @test_shufflevector_u(<4 x i32> %V) {
+        %R = shufflevector <4 x i32> %V, <4 x i32> %V, <4 x i32> < i32 1, i32 undef, i32 7, i32 2 >             ; <<4 x i32>> [#uses=1]
+        ret <4 x i32> %R
+}
+
+define <4 x float> @test_shufflevector_f(<4 x float> %V) {
+        %R = shufflevector <4 x float> %V, <4 x float> undef, <4 x i32> < i32 1, i32 undef, i32 7, i32 2 >      ; <<4 x float>> [#uses=1]
+        ret <4 x float> %R
+}
+
diff --git a/final/test/Feature/intrinsics.ll b/final/test/Feature/intrinsics.ll
new file mode 100644
index 00000000000..2dd6b53e7c9
--- /dev/null
+++ b/final/test/Feature/intrinsics.ll
@@ -0,0 +1,62 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+declare i1 @llvm.isunordered.f32(float, float)
+
+declare i1 @llvm.isunordered.f64(double, double)
+
+declare void @llvm.prefetch(i8*, i32, i32)
+
+declare i8 @llvm.ctpop.i8(i8)
+
+declare i16 @llvm.ctpop.i16(i16)
+
+declare i32 @llvm.ctpop.i32(i32)
+
+declare i64 @llvm.ctpop.i64(i64)
+
+declare i8 @llvm.cttz.i8(i8)
+
+declare i16 @llvm.cttz.i16(i16)
+
+declare i32 @llvm.cttz.i32(i32)
+
+declare i64 @llvm.cttz.i64(i64)
+
+declare i8 @llvm.ctlz.i8(i8)
+
+declare i16 @llvm.ctlz.i16(i16)
+
+declare i32 @llvm.ctlz.i32(i32)
+
+declare i64 @llvm.ctlz.i64(i64)
+
+declare float @llvm.sqrt.f32(float)
+
+declare double @llvm.sqrt.f64(double)
+
+; Test llvm intrinsics
+;
+define void @libm() {
+        fcmp uno float 1.000000e+00, 2.000000e+00               ; <i1>:1 [#uses=0]
+        fcmp uno double 3.000000e+00, 4.000000e+00              ; <i1>:2 [#uses=0]
+        call void @llvm.prefetch( i8* null, i32 1, i32 3 )
+        call float @llvm.sqrt.f32( float 5.000000e+00 )         ; <float>:3 [#uses=0]
+        call double @llvm.sqrt.f64( double 6.000000e+00 )               ; <double>:4 [#uses=0]
+        call i8  @llvm.ctpop.i8( i8 10 )                ; <i32>:5 [#uses=0]
+        call i16 @llvm.ctpop.i16( i16 11 )              ; <i32>:6 [#uses=0]
+        call i32 @llvm.ctpop.i32( i32 12 )              ; <i32>:7 [#uses=0]
+        call i64 @llvm.ctpop.i64( i64 13 )              ; <i32>:8 [#uses=0]
+        call i8  @llvm.ctlz.i8( i8 14 )         ; <i32>:9 [#uses=0]
+        call i16 @llvm.ctlz.i16( i16 15 )               ; <i32>:10 [#uses=0]
+        call i32 @llvm.ctlz.i32( i32 16 )               ; <i32>:11 [#uses=0]
+        call i64 @llvm.ctlz.i64( i64 17 )               ; <i32>:12 [#uses=0]
+        call i8  @llvm.cttz.i8( i8 18 )         ; <i32>:13 [#uses=0]
+        call i16 @llvm.cttz.i16( i16 19 )               ; <i32>:14 [#uses=0]
+        call i32 @llvm.cttz.i32( i32 20 )               ; <i32>:15 [#uses=0]
+        call i64 @llvm.cttz.i64( i64 21 )               ; <i32>:16 [#uses=0]
+        ret void
+}
+
+; FIXME: test ALL the intrinsics in this file.
diff --git a/final/test/Feature/linker_private_linkages.ll b/final/test/Feature/linker_private_linkages.ll
new file mode 100644
index 00000000000..f9f29087564
--- /dev/null
+++ b/final/test/Feature/linker_private_linkages.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+@foo = linker_private hidden global i32 0
+@bar = linker_private_weak hidden global i32 0
+@qux = linker_private_weak_def_auto global i32 0
diff --git a/final/test/Feature/llvm2cpp.exp b/final/test/Feature/llvm2cpp.exp
new file mode 100644
index 00000000000..de0126ce323
--- /dev/null
+++ b/final/test/Feature/llvm2cpp.exp
@@ -0,0 +1,3 @@
+load_lib llvm2cpp.exp
+
+llvm2cpp-test [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/final/test/Feature/load_module.ll b/final/test/Feature/load_module.ll
new file mode 100644
index 00000000000..05f6c238134
--- /dev/null
+++ b/final/test/Feature/load_module.ll
@@ -0,0 +1,12 @@
+; PR1318
+; RUN: opt < %s -load=%llvmshlibdir/LLVMHello%shlibext -hello \
+; RUN:   -disable-output |& grep Hello
+; REQUIRES: loadable_module
+; FIXME: On Cygming, it might fail without building LLVMHello manually.
+
+@junk = global i32 0
+
+define i32* @somefunk() {
+  ret i32* @junk
+}
+
diff --git a/final/test/Feature/md_on_instruction.ll b/final/test/Feature/md_on_instruction.ll
new file mode 100644
index 00000000000..da9e49ebfb2
--- /dev/null
+++ b/final/test/Feature/md_on_instruction.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llvm-dis | grep " !dbg " | count 4
+define i32 @foo() nounwind ssp {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  call void @llvm.dbg.func.start(metadata !0)
+  store i32 42, i32* %retval, !dbg !3
+  br label %0, !dbg !3
+
+; <label>:0                                       ; preds = %entry
+  call void @llvm.dbg.region.end(metadata !0)
+  %1 = load i32* %retval, !dbg !3                  ; <i32> [#uses=1]
+  ret i32 %1, !dbg !3
+}
+
+declare void @llvm.dbg.func.start(metadata) nounwind readnone
+
+declare void @llvm.dbg.region.end(metadata) nounwind readnone
+
+!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 1, metadata !2, i1 false, i1 true}
+!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"foo.c", metadata !"/tmp", metadata !"clang 1.0", i1 true, i1 false, metadata !"", i32 0}
+!2 = metadata !{i32 458788, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}
+!3 = metadata !{i32 1, i32 13, metadata !1, metadata !1}
diff --git a/final/test/Feature/memorymarkers.ll b/final/test/Feature/memorymarkers.ll
new file mode 100644
index 00000000000..06b8376678f
--- /dev/null
+++ b/final/test/Feature/memorymarkers.ll
@@ -0,0 +1,36 @@
+; RUN: llvm-as -disable-output < %s
+
+%"struct.std::pair<int,int>" = type { i32, i32 }
+
+declare void @_Z3barRKi(i32*)
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+declare {}* @llvm.invariant.start(i64, i8* nocapture) readonly nounwind
+declare void @llvm.invariant.end({}*, i64, i8* nocapture) nounwind
+
+define i32 @_Z4foo2v() nounwind {
+entry:
+  %x = alloca %"struct.std::pair<int,int>"
+  %y = bitcast %"struct.std::pair<int,int>"* %x to i8*
+
+  ;; Constructor starts here (this isn't needed since it is immediately
+  ;; preceded by an alloca, but shown for completeness).
+  call void @llvm.lifetime.start(i64 8, i8* %y)
+
+  %0 = getelementptr %"struct.std::pair<int,int>"* %x, i32 0, i32 0
+  store i32 4, i32* %0, align 8
+  %1 = getelementptr %"struct.std::pair<int,int>"* %x, i32 0, i32 1
+  store i32 5, i32* %1, align 4
+
+  ;; Constructor has finished here.
+  %inv = call {}* @llvm.invariant.start(i64 8, i8* %y)
+  call void @_Z3barRKi(i32* %0) nounwind
+  %2 = load i32* %0, align 8
+
+  ;; Destructor is run here.
+  call void @llvm.invariant.end({}* %inv, i64 8, i8* %y)
+  ;; Destructor is done here.
+  call void @llvm.lifetime.end(i64 8, i8* %y)
+  ret i32 %2
+}
diff --git a/final/test/Feature/metadata.ll b/final/test/Feature/metadata.ll
new file mode 100644
index 00000000000..9856b375495
--- /dev/null
+++ b/final/test/Feature/metadata.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
+; PR7105
+
+define void @foo(i32 %x) {
+  call void @llvm.zonk(metadata !1, i64 0, metadata !1)
+  store i32 0, i32* null, !whatever !0, !whatever_else !{}, !more !{metadata !"hello"}
+  store i32 0, i32* null, !whatever !{i32 %x, metadata !"hello", metadata !1, metadata !{}, metadata !2}
+  ret void, !whatever !{i32 %x}
+}
+
+declare void @llvm.zonk(metadata, i64, metadata) nounwind readnone
+
+!named = !{!0}
+!another_named = !{}
+!0 = metadata !{i8** null}
+!1 = metadata !{i8* null, metadata !2}
+!2 = metadata !{}
diff --git a/final/test/Feature/newcasts.ll b/final/test/Feature/newcasts.ll
new file mode 100644
index 00000000000..4cfc8bcf08e
--- /dev/null
+++ b/final/test/Feature/newcasts.ll
@@ -0,0 +1,33 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+define void @"NewCasts" (i16 %x) {
+  %a = zext i16 %x to i32
+  %b = sext i16 %x to i32
+  %c = trunc i16 %x to i8
+  %d = uitofp i16 %x to float
+  %e = sitofp i16 %x to double
+  %f = fptoui float %d to i16
+  %g = fptosi double %e to i16
+  %i = fpext float %d to double
+  %j = fptrunc double %i to float
+  %k = bitcast i32 %a to float
+  %l = inttoptr i16 %x to i32*
+  %m = ptrtoint i32* %l to i64
+  %n = insertelement <4 x i32> undef, i32 %a, i32 0
+  %o = sitofp <4 x i32> %n to <4 x float>
+  %p = uitofp <4 x i32> %n to <4 x float>
+  %q = fptosi <4 x float> %p to <4 x i32>
+  %r = fptoui <4 x float> %p to <4 x i32>
+  ret void
+}
+
+
+define i16 @"ZExtConst" () {
+  ret i16 trunc ( i32 zext ( i16 42 to i32) to i16 )
+}
+
+define i16 @"SExtConst" () {
+  ret i16 trunc (i32 sext (i16 42 to i32) to i16 )
+}
diff --git a/final/test/Feature/noalias-ret.ll b/final/test/Feature/noalias-ret.ll
new file mode 100644
index 00000000000..d88452b7976
--- /dev/null
+++ b/final/test/Feature/noalias-ret.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s
+
+define noalias i8* @_Znwj(i32 %x) nounwind {
+  %A = malloc i8, i32 %x
+  ret i8* %A
+}
diff --git a/final/test/Feature/opaquetypes.ll b/final/test/Feature/opaquetypes.ll
new file mode 100644
index 00000000000..6539c1a6e1c
--- /dev/null
+++ b/final/test/Feature/opaquetypes.ll
@@ -0,0 +1,55 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+; This test case is used to test opaque type processing, forward references,
+; and recursive types.  Oh my.
+; 
+
+%SQ1 = type { i32 }
+%SQ2 = type { %ITy }
+%ITy = type i32
+
+
+%CCC = type { \2* }
+%BBB = type { \2*, \2 * }
+%AAA = type { \2*, {\2*}, [12x{\2*}], {[1x{\2*}]} }
+
+; Test numbered types
+type %CCC
+type %BBB
+%Composite = type { %0, %1 }
+
+; Test simple opaque type resolution...
+%intty = type i32
+
+; Perform a simple forward reference...
+%ty1 = type { %ty2, i32 }
+%ty2 = type float
+
+; Do a recursive type...
+%list = type { %list * }
+%listp = type { %listp } *
+
+; Do two mutually recursive types...
+%TyA = type { %ty2, %TyB * }
+%TyB = type { double, %TyA * }
+
+; A complex recursive type...
+%Y = type { {%Y*}, %Y* }
+%Z = type { { %Z * }, [12x%Z] *, {{{ %Z * }}} }
+
+; More ridiculous test cases...
+%A = type [ 123x %A*]
+%M = type %M (%M, %M) *
+%P = type %P*
+
+; Recursive ptrs
+%u = type %v*
+%v = type %u*
+
+; Test the parser for unnamed recursive types...
+%P1 = type \1 *
+%Y1 = type { { \3 * }, \2 * }
+%Z1 = type { { \3 * }, [12x\3] *, { { { \5 * } } } }
+
diff --git a/final/test/Feature/packed.ll b/final/test/Feature/packed.ll
new file mode 100644
index 00000000000..b86a22792b1
--- /dev/null
+++ b/final/test/Feature/packed.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+@foo1 = external global <4 x float>             ; <<4 x float>*> [#uses=2]
+@foo2 = external global <2 x i32>               ; <<2 x i32>*> [#uses=2]
+
+define void @main() {
+        store <4 x float> < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >, <4 x float>* @foo1
+        store <2 x i32> < i32 4, i32 4 >, <2 x i32>* @foo2
+        %l1 = load <4 x float>* @foo1           ; <<4 x float>> [#uses=0]
+        %l2 = load <2 x i32>* @foo2             ; <<2 x i32>> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/Feature/packed_struct.ll b/final/test/Feature/packed_struct.ll
new file mode 100644
index 00000000000..4d4ace9534d
--- /dev/null
+++ b/final/test/Feature/packed_struct.ll
@@ -0,0 +1,33 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+; RUN: not grep cast %t2.ll
+; RUN: grep {\\}>} %t2.ll
+; END.
+
+%struct.anon = type <{ i8, i32, i32, i32 }>
+@foos = external global %struct.anon 
+@bara = external global [2 x <{ i32, i8 }>]
+
+;initializers should work for packed and non-packed the same way
+@E1 = global <{i8, i32, i32}> <{i8 1, i32 2, i32 3}>
+@E2 = global {i8, i32, i32} {i8 4, i32 5, i32 6}
+
+
+define i32 @main() 
+{
+        %tmp = load i32*  getelementptr (%struct.anon* @foos, i32 0, i32 1)            ; <i32> [#uses=1]
+        %tmp3 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 2)            ; <i32> [#uses=1]
+        %tmp6 = load i32* getelementptr (%struct.anon* @foos, i32 0, i32 3)            ; <i32> [#uses=1]
+        %tmp4 = add i32 %tmp3, %tmp             ; <i32> [#uses=1]
+        %tmp7 = add i32 %tmp4, %tmp6            ; <i32> [#uses=1]
+        ret i32 %tmp7
+}
+
+define i32 @bar() {
+entry:
+        %tmp = load i32* getelementptr([2 x <{ i32, i8 }>]* @bara, i32 0, i32 0, i32 0 )            ; <i32> [#uses=1]
+        %tmp4 = load i32* getelementptr ([2 x <{ i32, i8 }>]* @bara, i32 0, i32 1, i32 0)           ; <i32> [#uses=1]
+        %tmp5 = add i32 %tmp4, %tmp             ; <i32> [#uses=1]
+        ret i32 %tmp5
+}
diff --git a/final/test/Feature/paramattrs.ll b/final/test/Feature/paramattrs.ll
new file mode 100644
index 00000000000..3bee6177e0b
--- /dev/null
+++ b/final/test/Feature/paramattrs.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+%ZFunTy = type i32(i8 zeroext)
+%SFunTy = type i32(i8 signext)
+
+declare i16 @"test"(i16 signext %arg) signext 
+declare i8 @"test2" (i16 zeroext %a2) zeroext 
+
+declare i32 @"test3"(i32* noalias %p)
+
+declare void @exit(i32) noreturn nounwind
+
+define i32 @main(i32 inreg %argc, i8 ** inreg %argv) nounwind {
+    %val = trunc i32 %argc to i16
+    %res1 = call i16 (i16 signext) signext *@test(i16 signext %val) signext
+    %two = add i16 %res1, %res1
+    %res2 = call i8 @test2(i16 %two zeroext) zeroext 
+    %retVal = sext i16 %two to i32
+    ret i32 %retVal
+}
diff --git a/final/test/Feature/ppcld.ll b/final/test/Feature/ppcld.ll
new file mode 100644
index 00000000000..393a491847c
--- /dev/null
+++ b/final/test/Feature/ppcld.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-as < %s | llvm-dis > %t
+; RUN: llvm-as < %t | llvm-dis > %t2
+; RUN: diff %t %t2
+; ModuleID = '<stdin>'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "powerpc-apple-darwin8"
+@ld = external global ppc_fp128		; <ppc_fp128*> [#uses=1]
+@d = global double 4.050000e+00, align 8		; <double*> [#uses=1]
+@f = global float 0x4010333340000000		; <float*> [#uses=1]
+
+define i32 @foo() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = load float* @f		; <float> [#uses=1]
+	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
+	%tmp2 = load double* @d		; <double> [#uses=1]
+	%tmp3 = fmul double %tmp1, %tmp2		; <double> [#uses=1]
+	%tmp4 = fpext double %tmp3 to ppc_fp128		; <ppc_fp128> [#uses=1]
+	store ppc_fp128 %tmp4, ppc_fp128* @ld
+	br label %return
+
+return:		; preds = %entry
+	%retval4 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval4
+}
diff --git a/final/test/Feature/properties.ll b/final/test/Feature/properties.ll
new file mode 100644
index 00000000000..c688d689be2
--- /dev/null
+++ b/final/test/Feature/properties.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+target datalayout = "e-p:32:32"
+target triple = "proc-vend-sys"
+deplibs = [ "m", "c" ]
diff --git a/final/test/Feature/prototype.ll b/final/test/Feature/prototype.ll
new file mode 100644
index 00000000000..3754a1d9596
--- /dev/null
+++ b/final/test/Feature/prototype.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+declare i32 @bar(i32)
+
+define i32 @foo(i32 %blah) {
+        %xx = call i32 @bar( i32 %blah )                ; <i32> [#uses=1]
+        ret i32 %xx
+}
+
diff --git a/final/test/Feature/recursivetype.ll b/final/test/Feature/recursivetype.ll
new file mode 100644
index 00000000000..43db5f052c4
--- /dev/null
+++ b/final/test/Feature/recursivetype.ll
@@ -0,0 +1,103 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+; This file contains the output from the following compiled C code:
+; typedef struct list {
+;   struct list *Next;
+;   int Data;
+; } list;
+;
+; // Iterative insert fn
+; void InsertIntoListTail(list **L, int Data) {
+;   while (*L)
+;     L = &(*L)->Next;
+;   *L = (list*)malloc(sizeof(list));
+;   (*L)->Data = Data;
+;   (*L)->Next = 0;
+; }
+;
+; // Recursive list search fn
+; list *FindData(list *L, int Data) {
+;   if (L == 0) return 0;
+;   if (L->Data == Data) return L;
+;   return FindData(L->Next, Data);
+; }
+;
+; void DoListStuff() {
+;   list *MyList = 0;
+;   InsertIntoListTail(&MyList, 100);
+;   InsertIntoListTail(&MyList, 12);
+;   InsertIntoListTail(&MyList, 42);
+;   InsertIntoListTail(&MyList, 1123);
+;   InsertIntoListTail(&MyList, 1213);
+;
+;   if (FindData(MyList, 75)) foundIt();
+;   if (FindData(MyList, 42)) foundIt();
+;   if (FindData(MyList, 700)) foundIt();
+; }
+
+%list = type { %list*, i32 }
+
+declare i8* @malloc(i32)
+
+define void @InsertIntoListTail(%list** %L, i32 %Data) {
+bb1:
+        %reg116 = load %list** %L               ; <%list*> [#uses=1]
+        %cast1004 = inttoptr i64 0 to %list*            ; <%list*> [#uses=1]
+        %cond1000 = icmp eq %list* %reg116, %cast1004           ; <i1> [#uses=1]
+        br i1 %cond1000, label %bb3, label %bb2
+
+bb2:            ; preds = %bb2, %bb1
+        %reg117 = phi %list** [ %reg118, %bb2 ], [ %L, %bb1 ]           ; <%list**> [#uses=1]
+        %cast1010 = bitcast %list** %reg117 to %list***         ; <%list***> [#uses=1]
+        %reg118 = load %list*** %cast1010               ; <%list**> [#uses=3]
+        %reg109 = load %list** %reg118          ; <%list*> [#uses=1]
+        %cast1005 = inttoptr i64 0 to %list*            ; <%list*> [#uses=1]
+        %cond1001 = icmp ne %list* %reg109, %cast1005           ; <i1> [#uses=1]
+        br i1 %cond1001, label %bb2, label %bb3
+
+bb3:            ; preds = %bb2, %bb1
+        %reg119 = phi %list** [ %reg118, %bb2 ], [ %L, %bb1 ]           ; <%list**> [#uses=1]
+        %cast1006 = bitcast %list** %reg119 to i8**             ; <i8**> [#uses=1]
+        %reg111 = call i8* @malloc( i32 16 )            ; <i8*> [#uses=3]
+        store i8* %reg111, i8** %cast1006
+        %reg111.upgrd.1 = ptrtoint i8* %reg111 to i64           ; <i64> [#uses=1]
+        %reg1002 = add i64 %reg111.upgrd.1, 8           ; <i64> [#uses=1]
+        %reg1002.upgrd.2 = inttoptr i64 %reg1002 to i8*         ; <i8*> [#uses=1]
+        %cast1008 = bitcast i8* %reg1002.upgrd.2 to i32*                ; <i32*> [#uses=1]
+        store i32 %Data, i32* %cast1008
+        %cast1003 = inttoptr i64 0 to i64*              ; <i64*> [#uses=1]
+        %cast1009 = bitcast i8* %reg111 to i64**                ; <i64**> [#uses=1]
+        store i64* %cast1003, i64** %cast1009
+        ret void
+}
+
+define %list* @FindData(%list* %L, i32 %Data) {
+bb1:
+        br label %bb2
+
+bb2:            ; preds = %bb6, %bb1
+        %reg115 = phi %list* [ %reg116, %bb6 ], [ %L, %bb1 ]            ; <%list*> [#uses=4]
+        %cast1014 = inttoptr i64 0 to %list*            ; <%list*> [#uses=1]
+        %cond1011 = icmp ne %list* %reg115, %cast1014           ; <i1> [#uses=1]
+        br i1 %cond1011, label %bb4, label %bb3
+
+bb3:            ; preds = %bb2
+        ret %list* null
+
+bb4:            ; preds = %bb2
+        %idx = getelementptr %list* %reg115, i64 0, i32 1               ; <i32*> [#uses=1]
+        %reg111 = load i32* %idx                ; <i32> [#uses=1]
+        %cond1013 = icmp ne i32 %reg111, %Data          ; <i1> [#uses=1]
+        br i1 %cond1013, label %bb6, label %bb5
+
+bb5:            ; preds = %bb4
+        ret %list* %reg115
+
+bb6:            ; preds = %bb4
+        %idx2 = getelementptr %list* %reg115, i64 0, i32 0              ; <%list**> [#uses=1]
+        %reg116 = load %list** %idx2            ; <%list*> [#uses=1]
+        br label %bb2
+}
+
diff --git a/final/test/Feature/simplecalltest.ll b/final/test/Feature/simplecalltest.ll
new file mode 100644
index 00000000000..64522864d57
--- /dev/null
+++ b/final/test/Feature/simplecalltest.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+        %FunTy = type i32 (i32)
+
+define void @invoke(%FunTy* %x) {
+        %foo = call i32 %x( i32 123 )           ; <i32> [#uses=0]
+        ret void
+}
+
+define i32 @main(i32 %argc, i8** %argv, i8** %envp) {
+        %retval = call i32 @test( i32 %argc )           ; <i32> [#uses=2]
+        %two = add i32 %retval, %retval         ; <i32> [#uses=1]
+        %retval2 = call i32 @test( i32 %argc )          ; <i32> [#uses=1]
+        %two2 = add i32 %two, %retval2          ; <i32> [#uses=1]
+        call void @invoke( %FunTy* @test )
+        ret i32 %two2
+}
+
+define i32 @test(i32 %i0) {
+        ret i32 %i0
+}
+
diff --git a/final/test/Feature/small.ll b/final/test/Feature/small.ll
new file mode 100644
index 00000000000..4644f6421bc
--- /dev/null
+++ b/final/test/Feature/small.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+%x = type i32
+
+define i32 @foo(i32 %in) {
+label:
+        ret i32 2
+}
+
diff --git a/final/test/Feature/smallest.ll b/final/test/Feature/smallest.ll
new file mode 100644
index 00000000000..5dd023cc602
--- /dev/null
+++ b/final/test/Feature/smallest.ll
@@ -0,0 +1,4 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
diff --git a/final/test/Feature/sparcld.ll b/final/test/Feature/sparcld.ll
new file mode 100644
index 00000000000..095f6f6c7c7
--- /dev/null
+++ b/final/test/Feature/sparcld.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as < %s | llvm-dis > %t
+; RUN: llvm-as < %t | llvm-dis > %t2
+; RUN: diff %t %t2
+; ModuleID = '<stdin>'
+@ld = external global fp128		; <fp128*> [#uses=1]
+@d = global double 4.050000e+00, align 8		; <double*> [#uses=1]
+@f = global float 0x4010333340000000		; <float*> [#uses=1]
+
+define i32 @foo() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = load float* @f		; <float> [#uses=1]
+	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
+	%tmp2 = load double* @d		; <double> [#uses=1]
+	%tmp3 = fmul double %tmp1, %tmp2		; <double> [#uses=1]
+	%tmp4 = fpext double %tmp3 to fp128		; <fp128> [#uses=1]
+	store fp128 %tmp4, fp128* @ld
+	br label %return
+
+return:		; preds = %entry
+	%retval4 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval4
+}
diff --git a/final/test/Feature/terminators.ll b/final/test/Feature/terminators.ll
new file mode 100644
index 00000000000..1bca2a842c5
--- /dev/null
+++ b/final/test/Feature/terminators.ll
@@ -0,0 +1,43 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+        %int = type i32
+
+define i32 @squared(i32 %i0) {
+        switch i32 %i0, label %Default [
+                 i32 1, label %Case1
+                 i32 2, label %Case2
+                 i32 4, label %Case4
+        ]
+
+Default:                ; preds = %0
+        ret i32 -1
+
+Case1:          ; preds = %0
+        ret i32 1
+
+Case2:          ; preds = %0
+        ret i32 4
+
+Case4:          ; preds = %0
+        ret i32 16
+}
+
+
+@Addr = global i8* blockaddress(@indbrtest, %BB1)
+@Addr3 = global i8* blockaddress(@squared, %Case1)
+
+
+define i32 @indbrtest(i8* %P, i32* %Q) {
+  indirectbr i8* %P, [label %BB1, label %BB2, label %BB3]
+BB1:
+  indirectbr i32* %Q, []
+BB2:
+  %R = bitcast i8* blockaddress(@indbrtest, %BB3) to i8*
+  indirectbr i8* %R, [label %BB1, label %BB2, label %BB3]
+BB3:
+  ret i32 2
+}
+
+
diff --git a/final/test/Feature/testalloca.ll b/final/test/Feature/testalloca.ll
new file mode 100644
index 00000000000..230b5a90dc4
--- /dev/null
+++ b/final/test/Feature/testalloca.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+        %inners = type { float, { i8 } }
+        %struct = type { i32, %inners, i64 }
+
+define i32 @testfunction(i32 %i0, i32 %j0) {
+        alloca i8, i32 5                ; <i8*>:1 [#uses=0]
+        %ptr = alloca i32               ; <i32*> [#uses=2]
+        store i32 3, i32* %ptr
+        %val = load i32* %ptr           ; <i32> [#uses=0]
+        %sptr = alloca %struct          ; <%struct*> [#uses=2]
+        %nsptr = getelementptr %struct* %sptr, i64 0, i32 1             ; <%inners*> [#uses=1]
+        %ubsptr = getelementptr %inners* %nsptr, i64 0, i32 1           ; <{ i8 }*> [#uses=1]
+        %idx = getelementptr { i8 }* %ubsptr, i64 0, i32 0              ; <i8*> [#uses=1]
+        store i8 4, i8* %idx
+        %fptr = getelementptr %struct* %sptr, i64 0, i32 1, i32 0               ; <float*> [#uses=1]
+        store float 4.000000e+00, float* %fptr
+        ret i32 3
+}
+
diff --git a/final/test/Feature/testconstants.ll b/final/test/Feature/testconstants.ll
new file mode 100644
index 00000000000..6810f3de13d
--- /dev/null
+++ b/final/test/Feature/testconstants.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+@somestr = constant [11 x i8] c"hello world"            ; <[11 x i8]*> [#uses=1]
+@array = constant [2 x i32] [ i32 12, i32 52 ]          ; <[2 x i32]*> [#uses=1]
+constant { i32, i32 } { i32 4, i32 3 }          ; <{ i32, i32 }*>:0 [#uses=0]
+
+define [2 x i32]* @testfunction(i32 %i0, i32 %j0) {
+        ret [2 x i32]* @array
+}
+
+define i8* @otherfunc(i32, double) {
+        %somestr = getelementptr [11 x i8]* @somestr, i64 0, i64 0              ; <i8*> [#uses=1]
+        ret i8* %somestr
+}
+
+define i8* @yetanotherfunc(i32, double) {
+        ret i8* null
+}
+
+define i32 @negativeUnsigned() {
+        ret i32 -1
+}
+
+define i32 @largeSigned() {
+        ret i32 -394967296
+}
+
diff --git a/final/test/Feature/testlogical.ll b/final/test/Feature/testlogical.ll
new file mode 100644
index 00000000000..a064869abfa
--- /dev/null
+++ b/final/test/Feature/testlogical.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+define i32 @simpleAdd(i32 %i0, i32 %j0) {
+        %t1 = xor i32 %i0, %j0          ; <i32> [#uses=1]
+        %t2 = or i32 %i0, %j0           ; <i32> [#uses=1]
+        %t3 = and i32 %t1, %t2          ; <i32> [#uses=1]
+        ret i32 %t3
+}
+
diff --git a/final/test/Feature/testmemory.ll b/final/test/Feature/testmemory.ll
new file mode 100644
index 00000000000..a9019f0bd3e
--- /dev/null
+++ b/final/test/Feature/testmemory.ll
@@ -0,0 +1,36 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+        %complexty = type { i32, { [4 x i8*], float }, double }
+        %struct = type { i32, { float, { i8 } }, i64 }
+
+define i32 @main() {
+        call i32 @testfunction( i64 0, i64 1 )          ; <i32>:1 [#uses=0]
+        ret i32 0
+}
+
+define i32 @testfunction(i64 %i0, i64 %j0) {
+        %array0 = malloc [4 x i8]               ; <[4 x i8]*> [#uses=2]
+        %size = add i32 2, 2            ; <i32> [#uses=1]
+        %array1 = malloc i8, i32 4              ; <i8*> [#uses=1]
+        %array2 = malloc i8, i32 %size          ; <i8*> [#uses=1]
+        %idx = getelementptr [4 x i8]* %array0, i64 0, i64 2            ; <i8*> [#uses=1]
+        store i8 123, i8* %idx
+        free [4 x i8]* %array0
+        free i8* %array1
+        free i8* %array2
+        %aa = alloca %complexty, i32 5          ; <%complexty*> [#uses=1]
+        %idx2 = getelementptr %complexty* %aa, i64 %i0, i32 1, i32 0, i64 %j0           ; <i8**> [#uses=1]
+        store i8* null, i8** %idx2
+        %ptr = alloca i32               ; <i32*> [#uses=2]
+        store i32 3, i32* %ptr
+        %val = load i32* %ptr           ; <i32> [#uses=0]
+        %sptr = alloca %struct          ; <%struct*> [#uses=1]
+        %ubsptr = getelementptr %struct* %sptr, i64 0, i32 1, i32 1             ; <{ i8 }*> [#uses=1]
+        %idx3 = getelementptr { i8 }* %ubsptr, i64 0, i32 0             ; <i8*> [#uses=1]
+        store i8 4, i8* %idx3
+        ret i32 3
+}
+
diff --git a/final/test/Feature/testtype.ll b/final/test/Feature/testtype.ll
new file mode 100644
index 00000000000..124aa090ec9
--- /dev/null
+++ b/final/test/Feature/testtype.ll
@@ -0,0 +1,21 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+%X = type i32* addrspace(4)*
+
+        %inners = type { float, { i8 } }
+        %struct = type { i32, %inners, i64 }
+
+%fwdref = type { %fwd* }
+%fwd    = type %fwdref*
+
+; same as above with unnamed types
+type { %1* }
+type %0* 
+%test = type %1
+
+%test2 = type [2 x i32]
+;%x = type %undefined*
+
+%test3 = type i32 (i32()*, float(...)*, ...)*
diff --git a/final/test/Feature/testvarargs.ll b/final/test/Feature/testvarargs.ll
new file mode 100644
index 00000000000..a73b7ecd7d6
--- /dev/null
+++ b/final/test/Feature/testvarargs.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+declare i32 @printf(i8*, ...)   ;; Prototype for: int __builtin_printf(const char*, ...)
+
+define i32 @testvarar() {
+        call i32 (i8*, ...)* @printf( i8* null, i32 12, i8 42 )         ; <i32>:1 [#uses=1]
+        ret i32 %1
+}
+
diff --git a/final/test/Feature/undefined.ll b/final/test/Feature/undefined.ll
new file mode 100644
index 00000000000..e63ce41f681
--- /dev/null
+++ b/final/test/Feature/undefined.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+@X = global i32 undef           ; <i32*> [#uses=0]
+
+declare i32 @atoi(i8*)
+
+define i32 @test() {
+        ret i32 undef
+}
+
+define i32 @test2() {
+        %X = add i32 undef, 1           ; <i32> [#uses=1]
+        ret i32 %X
+}
+
diff --git a/final/test/Feature/unreachable.ll b/final/test/Feature/unreachable.ll
new file mode 100644
index 00000000000..8bffb4c10e7
--- /dev/null
+++ b/final/test/Feature/unreachable.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+declare void @bar()
+
+define i32 @foo() {
+        unreachable
+}
+
+define double @xyz() {
+        call void @bar( )
+        unreachable
+}
+
diff --git a/final/test/Feature/varargs.ll b/final/test/Feature/varargs.ll
new file mode 100644
index 00000000000..b9317df491a
--- /dev/null
+++ b/final/test/Feature/varargs.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+; Demonstrate all of the variable argument handling intrinsic functions plus 
+; the va_arg instruction.
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_copy(i8*, i8*)
+
+declare void @llvm.va_end(i8*)
+
+define i32 @test(i32 %X, ...) {
+        %ap = alloca i8*                ; <i8**> [#uses=4]
+        %va.upgrd.1 = bitcast i8** %ap to i8*           ; <i8*> [#uses=1]
+        call void @llvm.va_start( i8* %va.upgrd.1 )
+        %tmp = va_arg i8** %ap, i32             ; <i32> [#uses=1]
+        %aq = alloca i8*                ; <i8**> [#uses=2]
+        %va0.upgrd.2 = bitcast i8** %aq to i8*          ; <i8*> [#uses=1]
+        %va1.upgrd.3 = bitcast i8** %ap to i8*          ; <i8*> [#uses=1]
+        call void @llvm.va_copy( i8* %va0.upgrd.2, i8* %va1.upgrd.3 )
+        %va.upgrd.4 = bitcast i8** %aq to i8*           ; <i8*> [#uses=1]
+        call void @llvm.va_end( i8* %va.upgrd.4 )
+        %va.upgrd.5 = bitcast i8** %ap to i8*           ; <i8*> [#uses=1]
+        call void @llvm.va_end( i8* %va.upgrd.5 )
+        ret i32 %tmp
+}
+
diff --git a/final/test/Feature/varargs_new.ll b/final/test/Feature/varargs_new.ll
new file mode 100644
index 00000000000..a46f270d86d
--- /dev/null
+++ b/final/test/Feature/varargs_new.ll
@@ -0,0 +1,38 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+; Demonstrate all of the variable argument handling intrinsic functions plus 
+; the va_arg instruction.
+
+declare void @llvm.va_start(i8*)
+
+declare void @llvm.va_copy(i8*, i8*)
+
+declare void @llvm.va_end(i8*)
+
+define i32 @test(i32 %X, ...) {
+        ; Allocate two va_list items.  On this target, va_list is of type sbyte*
+        %ap = alloca i8*                ; <i8**> [#uses=4]
+        %aq = alloca i8*                ; <i8**> [#uses=2]
+
+        ; Initialize variable argument processing
+        %va.upgrd.1 = bitcast i8** %ap to i8*           ; <i8*> [#uses=1]
+        call void @llvm.va_start( i8* %va.upgrd.1 )
+
+        ; Read a single integer argument
+        %tmp = va_arg i8** %ap, i32             ; <i32> [#uses=1]
+
+        ; Demonstrate usage of llvm.va_copy and llvm_va_end
+        %apv = load i8** %ap            ; <i8*> [#uses=1]
+        %va0.upgrd.2 = bitcast i8** %aq to i8*          ; <i8*> [#uses=1]
+        %va1.upgrd.3 = bitcast i8* %apv to i8*          ; <i8*> [#uses=1]
+        call void @llvm.va_copy( i8* %va0.upgrd.2, i8* %va1.upgrd.3 )
+        %va.upgrd.4 = bitcast i8** %aq to i8*           ; <i8*> [#uses=1]
+        call void @llvm.va_end( i8* %va.upgrd.4 )
+
+        ; Stop processing of arguments.
+        %va.upgrd.5 = bitcast i8** %ap to i8*           ; <i8*> [#uses=1]
+        call void @llvm.va_end( i8* %va.upgrd.5 )
+        ret i32 %tmp
+}
diff --git a/final/test/Feature/vector-cast-constant-exprs.ll b/final/test/Feature/vector-cast-constant-exprs.ll
new file mode 100644
index 00000000000..ffdc0f080fa
--- /dev/null
+++ b/final/test/Feature/vector-cast-constant-exprs.ll
@@ -0,0 +1,37 @@
+; RUN: llvm-as < %s | llvm-dis | not grep {ret.*(}
+
+; All of these constant expressions should fold.
+
+define <2 x float> @ga() {
+  ret <2 x float> fptrunc (<2 x double><double 4.3, double 3.2> to <2 x float>)
+}
+define <2 x double> @gb() {
+  ret <2 x double> fpext (<2 x float><float 2.0, float 8.0> to <2 x double>)
+}
+define <2 x i64> @gd() {
+  ret <2 x i64> zext (<2 x i32><i32 3, i32 4> to <2 x i64>)
+}
+define <2 x i64> @ge() {
+  ret <2 x i64> sext (<2 x i32><i32 3, i32 4> to <2 x i64>)
+}
+define <2 x i32> @gf() {
+  ret <2 x i32> trunc (<2 x i64><i64 3, i64 4> to <2 x i32>)
+}
+define <2 x i32> @gh() {
+  ret <2 x i32> fptoui (<2 x float><float 8.0, float 7.0> to <2 x i32>)
+}
+define <2 x i32> @gi() {
+  ret <2 x i32> fptosi (<2 x float><float 8.0, float 7.0> to <2 x i32>)
+}
+define <2 x float> @gj() {
+  ret <2 x float> uitofp (<2 x i32><i32 8, i32 7> to <2 x float>)
+}
+define <2 x float> @gk() {
+  ret <2 x float> sitofp (<2 x i32><i32 8, i32 7> to <2 x float>)
+}
+define <2 x double> @gl() {
+  ret <2 x double> bitcast (<2 x double><double 4.0, double 3.0> to <2 x double>)
+}
+define <2 x double> @gm() {
+  ret <2 x double> bitcast (<2 x i64><i64 4, i64 3> to <2 x double>)
+}
diff --git a/final/test/Feature/weak_constant.ll b/final/test/Feature/weak_constant.ll
new file mode 100644
index 00000000000..9025aaac797
--- /dev/null
+++ b/final/test/Feature/weak_constant.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -std-compile-opts -S > %t
+; RUN:   grep undef %t | count 1
+; RUN:   grep 5 %t | count 1
+; RUN:   grep 7 %t | count 1
+; RUN:   grep 9 %t | count 1
+
+	type { i32, i32 }		; type %0
+@a = weak constant i32 undef		; <i32*> [#uses=1]
+@b = weak constant i32 5		; <i32*> [#uses=1]
+@c = weak constant %0 { i32 7, i32 9 }		; <%0*> [#uses=1]
+
+define i32 @la() {
+	%v = load i32* @a		; <i32> [#uses=1]
+	ret i32 %v
+}
+
+define i32 @lb() {
+	%v = load i32* @b		; <i32> [#uses=1]
+	ret i32 %v
+}
+
+define i32 @lc() {
+	%g = getelementptr %0* @c, i32 0, i32 0		; <i32*> [#uses=1]
+	%u = load i32* %g		; <i32> [#uses=1]
+	%h = getelementptr %0* @c, i32 0, i32 1		; <i32*> [#uses=1]
+	%v = load i32* %h		; <i32> [#uses=1]
+	%r = add i32 %u, %v
+	ret i32 %r
+}
+
+define i32 @f() {
+	%u = call i32 @la()		; <i32> [#uses=1]
+	%v = call i32 @lb()		; <i32> [#uses=1]
+	%w = call i32 @lc()		; <i32> [#uses=1]
+	%r = add i32 %u, %v		; <i32> [#uses=1]
+	%s = add i32 %r, %w		; <i32> [#uses=1]
+	ret i32 %s
+}
diff --git a/final/test/Feature/weirdnames.ll b/final/test/Feature/weirdnames.ll
new file mode 100644
index 00000000000..cc773cddcf7
--- /dev/null
+++ b/final/test/Feature/weirdnames.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+; Test using double quotes to form names that are not legal in the % form
+%"&^ " = type { i32 }
+@"%.*+ foo" = global %"&^ " { i32 5 }           
+@"0" = global float 0.000000e+00                ; This CANNOT be %0 
+@"\\03foo" = global float 0x3FB99999A0000000    ; Make sure funny char gets round trip 
diff --git a/final/test/Feature/x86ld.ll b/final/test/Feature/x86ld.ll
new file mode 100644
index 00000000000..32005aee305
--- /dev/null
+++ b/final/test/Feature/x86ld.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-as < %s | llvm-dis > %t
+; RUN: llvm-as < %t | llvm-dis > %t2
+; RUN: diff %t %t2
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+@ld = external global x86_fp80		; <x86_fp80*> [#uses=1]
+@d = global double 4.050000e+00, align 8		; <double*> [#uses=1]
+@f = global float 0x4010333340000000		; <float*> [#uses=1]
+
+define i32 @foo() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = load float* @f		; <float> [#uses=1]
+	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
+	%tmp2 = load double* @d		; <double> [#uses=1]
+	%tmp3 = fmul double %tmp1, %tmp2		; <double> [#uses=1]
+	%tmp4 = fpext double %tmp3 to x86_fp80		; <x86_fp80> [#uses=1]
+	store x86_fp80 %tmp4, x86_fp80* @ld
+	br label %return
+
+return:		; preds = %entry
+	%retval4 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval4
+}
diff --git a/final/test/FrontendAda/Support/element_copy.ads b/final/test/FrontendAda/Support/element_copy.ads
new file mode 100644
index 00000000000..52c6e49275e
--- /dev/null
+++ b/final/test/FrontendAda/Support/element_copy.ads
@@ -0,0 +1,8 @@
+package Element_Copy is
+   type SmallInt is range 1 .. 4;
+   type SmallStr is array (SmallInt range <>) of Character;
+   type VariableSizedField (D : SmallInt := 2) is record
+      S : SmallStr (1 .. D) := "Hi";
+   end record;
+   function F return VariableSizedField;
+end;
diff --git a/final/test/FrontendAda/Support/fat_fields.ads b/final/test/FrontendAda/Support/fat_fields.ads
new file mode 100644
index 00000000000..d3eab3e2866
--- /dev/null
+++ b/final/test/FrontendAda/Support/fat_fields.ads
@@ -0,0 +1,6 @@
+package Fat_Fields is
+   pragma Elaborate_Body;
+   type A is array (Positive range <>) of Boolean;
+   type A_Ptr is access A;
+   P : A_Ptr := null;
+end;
diff --git a/final/test/FrontendAda/Support/global_constant.ads b/final/test/FrontendAda/Support/global_constant.ads
new file mode 100644
index 00000000000..cef4b11f684
--- /dev/null
+++ b/final/test/FrontendAda/Support/global_constant.ads
@@ -0,0 +1,4 @@
+package Global_Constant is
+   pragma Elaborate_Body;
+   An_Error : exception;
+end;
diff --git a/final/test/FrontendAda/Support/non_lvalue.ads b/final/test/FrontendAda/Support/non_lvalue.ads
new file mode 100644
index 00000000000..7d4eeed8b8e
--- /dev/null
+++ b/final/test/FrontendAda/Support/non_lvalue.ads
@@ -0,0 +1,11 @@
+package Non_LValue is
+   type T (Length : Natural) is record
+      A : String (1 .. Length);
+      B : String (1 .. Length);
+   end record;
+   type T_Ptr is access all T;
+   type U is record
+      X : T_Ptr;
+   end record;
+   function A (Y : U) return String;
+end;
diff --git a/final/test/FrontendAda/Support/real_cst.ads b/final/test/FrontendAda/Support/real_cst.ads
new file mode 100644
index 00000000000..54a34bc4056
--- /dev/null
+++ b/final/test/FrontendAda/Support/real_cst.ads
@@ -0,0 +1,4 @@
+with Ada.Streams;
+package Real_Cst is
+   procedure Write (Stream : access Ada.Streams.Root_Stream_Type'Class);
+end;
diff --git a/final/test/FrontendAda/Support/unc_constructor.ads b/final/test/FrontendAda/Support/unc_constructor.ads
new file mode 100644
index 00000000000..d6f8db50d89
--- /dev/null
+++ b/final/test/FrontendAda/Support/unc_constructor.ads
@@ -0,0 +1,8 @@
+package Unc_Constructor is
+   type C is null record;
+   type A is array (Positive range <>) of C;
+   A0 : constant A;
+   procedure P (X : A);
+private
+   A0 : aliased constant A := (1 .. 0 => (null record));
+end;
diff --git a/final/test/FrontendAda/Support/var_offset.ads b/final/test/FrontendAda/Support/var_offset.ads
new file mode 100644
index 00000000000..55d0eb21f7c
--- /dev/null
+++ b/final/test/FrontendAda/Support/var_offset.ads
@@ -0,0 +1,9 @@
+package Var_Offset is
+   pragma Elaborate_Body;
+   type T (L : Natural) is record
+      Var_Len   : String (1 .. L);
+      Space     : Integer;
+      Small     : Character;
+      Bad_Field : Character;
+   end record;
+end;
diff --git a/final/test/FrontendAda/Support/var_size.ads b/final/test/FrontendAda/Support/var_size.ads
new file mode 100644
index 00000000000..6a570cba8bd
--- /dev/null
+++ b/final/test/FrontendAda/Support/var_size.ads
@@ -0,0 +1,7 @@
+package Var_Size is
+   type T (Length : Natural) is record
+      A : String (1 .. Length);
+      B : String (1 .. Length);
+   end record;
+   function A (X : T) return String;
+end;
diff --git a/final/test/FrontendAda/array_constructor.adb b/final/test/FrontendAda/array_constructor.adb
new file mode 100644
index 00000000000..13517c7565a
--- /dev/null
+++ b/final/test/FrontendAda/array_constructor.adb
@@ -0,0 +1,6 @@
+-- RUN: %llvmgcc -S %s
+procedure Array_Constructor is
+   A : array (Integer range <>) of Boolean := (True, False);
+begin
+   null;
+end;
diff --git a/final/test/FrontendAda/array_range_ref.adb b/final/test/FrontendAda/array_range_ref.adb
new file mode 100644
index 00000000000..037c5aa8b6f
--- /dev/null
+++ b/final/test/FrontendAda/array_range_ref.adb
@@ -0,0 +1,7 @@
+-- RUN: %llvmgcc -S %s
+procedure Array_Range_Ref is
+   A : String (1 .. 3);
+   B : String := A (A'RANGE)(1 .. 3);
+begin
+   null;
+end;
diff --git a/final/test/FrontendAda/array_ref.adb b/final/test/FrontendAda/array_ref.adb
new file mode 100644
index 00000000000..2bf4b7988d6
--- /dev/null
+++ b/final/test/FrontendAda/array_ref.adb
@@ -0,0 +1,11 @@
+-- RUN: %llvmgcc -S %s
+procedure Array_Ref is
+   type A is array (Natural range <>, Natural range <>) of Boolean;
+   type A_Access is access A;
+   function Get (X : A_Access) return Boolean is
+   begin
+      return X (0, 0);
+   end;
+begin
+   null;
+end;
diff --git a/final/test/FrontendAda/array_size.adb b/final/test/FrontendAda/array_size.adb
new file mode 100644
index 00000000000..c73616c9807
--- /dev/null
+++ b/final/test/FrontendAda/array_size.adb
@@ -0,0 +1,10 @@
+-- RUN: %llvmgcc -S %s
+procedure Array_Size is
+   subtype S is String (1 .. 2);
+   type R is record
+      A : S;
+   end record;
+   X : R;
+begin
+   null;
+end;
diff --git a/final/test/FrontendAda/asm.adb b/final/test/FrontendAda/asm.adb
new file mode 100644
index 00000000000..844885e6715
--- /dev/null
+++ b/final/test/FrontendAda/asm.adb
@@ -0,0 +1,6 @@
+-- RUN: %llvmgcc -S %s
+with System.Machine_Code;
+procedure Asm is
+begin
+   System.Machine_Code.Asm ("");
+end;
diff --git a/final/test/FrontendAda/constant_fold.ads b/final/test/FrontendAda/constant_fold.ads
new file mode 100644
index 00000000000..6223e7cb64d
--- /dev/null
+++ b/final/test/FrontendAda/constant_fold.ads
@@ -0,0 +1,4 @@
+-- RUN: %llvmgcc -S -emit-llvm %s -o - | not grep ptrtoint
+package Constant_Fold is
+  Error : exception;
+end;
diff --git a/final/test/FrontendAda/debug_var_size.ads b/final/test/FrontendAda/debug_var_size.ads
new file mode 100644
index 00000000000..e8863cbddf9
--- /dev/null
+++ b/final/test/FrontendAda/debug_var_size.ads
@@ -0,0 +1,8 @@
+-- RUN: %llvmgcc -S -g %s
+package Debug_Var_Size is
+   subtype Length_Type is Positive range 1 .. 64;
+   type T (Length : Length_Type := 1) is record
+      Varying_Length : String (1 .. Length);
+      Fixed_Length   : Boolean;
+   end record;
+end;
diff --git a/final/test/FrontendAda/dg.exp b/final/test/FrontendAda/dg.exp
new file mode 100644
index 00000000000..2307c3fdbe0
--- /dev/null
+++ b/final/test/FrontendAda/dg.exp
@@ -0,0 +1,6 @@
+load_lib llvm.exp
+
+if [ llvm_gcc_supports ada ] then {
+    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{adb,ads}]]
+}
+
diff --git a/final/test/FrontendAda/element_copy.adb b/final/test/FrontendAda/element_copy.adb
new file mode 100644
index 00000000000..29274fa744a
--- /dev/null
+++ b/final/test/FrontendAda/element_copy.adb
@@ -0,0 +1,8 @@
+-- RUN: %llvmgcc -S -O2 %s -I%p/Support -o - | grep 105 | count 2
+package body Element_Copy is
+   function F return VariableSizedField is
+      X : VariableSizedField;
+   begin
+      return X;
+   end;
+end;
diff --git a/final/test/FrontendAda/emit_var.ads b/final/test/FrontendAda/emit_var.ads
new file mode 100644
index 00000000000..47e2538691a
--- /dev/null
+++ b/final/test/FrontendAda/emit_var.ads
@@ -0,0 +1,5 @@
+-- RUN: %llvmgcc -S %s
+with Ada.Finalization;
+package Emit_Var is
+   type Search_Type is new Ada.Finalization.Controlled with null record;
+end;
diff --git a/final/test/FrontendAda/fat_fields.adb b/final/test/FrontendAda/fat_fields.adb
new file mode 100644
index 00000000000..443a9b679c5
--- /dev/null
+++ b/final/test/FrontendAda/fat_fields.adb
@@ -0,0 +1,10 @@
+-- RUN: %llvmgcc -S %s -I%p/Support
+-- RUN: %llvmgcc -S %s -I%p/Support -O2
+package body Fat_Fields is
+   procedure Proc is
+   begin
+      if P = null then
+         null;
+      end if;
+   end;
+end;
diff --git a/final/test/FrontendAda/field_order.ads b/final/test/FrontendAda/field_order.ads
new file mode 100644
index 00000000000..9b2b37c20db
--- /dev/null
+++ b/final/test/FrontendAda/field_order.ads
@@ -0,0 +1,7 @@
+-- RUN: %llvmgcc -S %s
+package Field_Order is
+   type Tagged_Type is abstract tagged null record;
+   type With_Discriminant (L : Positive) is new Tagged_Type with record
+      S : String (1 .. L);
+   end record;
+end;
diff --git a/final/test/FrontendAda/global_constant.adb b/final/test/FrontendAda/global_constant.adb
new file mode 100644
index 00000000000..330f97b5d0e
--- /dev/null
+++ b/final/test/FrontendAda/global_constant.adb
@@ -0,0 +1,5 @@
+-- RUN: %llvmgcc -S %s -I%p/Support
+package body Global_Constant is
+begin
+   raise An_Error;
+end;
diff --git a/final/test/FrontendAda/init_size.ads b/final/test/FrontendAda/init_size.ads
new file mode 100644
index 00000000000..f423682bec7
--- /dev/null
+++ b/final/test/FrontendAda/init_size.ads
@@ -0,0 +1,12 @@
+-- RUN: %llvmgcc -S %s
+package Init_Size is
+   type T (B : Boolean := False) is record
+      case B is
+         when False =>
+            I : Integer;
+         when True =>
+            J : Long_Long_Integer; -- Bigger than I
+      end case;
+   end record;
+   A_T : constant T := (False, 0);
+end;
diff --git a/final/test/FrontendAda/negative_field_offset.adb b/final/test/FrontendAda/negative_field_offset.adb
new file mode 100644
index 00000000000..ec8184dde47
--- /dev/null
+++ b/final/test/FrontendAda/negative_field_offset.adb
@@ -0,0 +1,16 @@
+-- RUN: %llvmgcc -S %s
+with System;
+procedure Negative_Field_Offset (N : Integer) is
+   type String_Pointer is access String;
+   --  Force use of a thin pointer.
+   for String_Pointer'Size use System.Word_Size;
+   P : String_Pointer;
+
+   procedure Q (P : String_Pointer) is
+   begin
+      P (1) := 'Z';
+   end;
+begin
+   P := new String (1 .. N);
+   Q (P);
+end;
diff --git a/final/test/FrontendAda/non_bitfield.ads b/final/test/FrontendAda/non_bitfield.ads
new file mode 100644
index 00000000000..8a49d46f6b4
--- /dev/null
+++ b/final/test/FrontendAda/non_bitfield.ads
@@ -0,0 +1,12 @@
+-- RUN: %llvmgcc -S %s
+package Non_Bitfield is
+   type SP is access String;
+   type E is (A, B, C);
+   type T (D : E) is record
+      case D is
+         when A => X : Boolean;
+         when B => Y : SP;
+         when C => Z : String (1 .. 2);
+      end case;
+   end record;
+end;
diff --git a/final/test/FrontendAda/non_lvalue.adb b/final/test/FrontendAda/non_lvalue.adb
new file mode 100644
index 00000000000..71e7e102d05
--- /dev/null
+++ b/final/test/FrontendAda/non_lvalue.adb
@@ -0,0 +1,7 @@
+-- RUN: %llvmgcc -S %s -I%p/Support
+package body Non_LValue is
+   function A (Y : U) return String is
+   begin
+      return Y.X.B;
+   end;
+end;
diff --git a/final/test/FrontendAda/placeholder.adb b/final/test/FrontendAda/placeholder.adb
new file mode 100644
index 00000000000..88908263f87
--- /dev/null
+++ b/final/test/FrontendAda/placeholder.adb
@@ -0,0 +1,12 @@
+-- RUN: %llvmgcc -S %s
+procedure Placeholder is
+   subtype Bounded is Integer range 1 .. 5;
+   type Vector is array (Bounded range <>) of Integer;
+   type Interval (Length : Bounded := 1) is record
+      Points : Vector (1 .. Length);
+   end record;
+   An_Interval : Interval := (Length => 1, Points => (1 => 1));
+   generic The_Interval : Interval; package R is end;
+   package body R is end;
+   package S is new R (An_Interval);
+begin null; end;
diff --git a/final/test/FrontendAda/real_cst.adb b/final/test/FrontendAda/real_cst.adb
new file mode 100644
index 00000000000..c9708301d62
--- /dev/null
+++ b/final/test/FrontendAda/real_cst.adb
@@ -0,0 +1,8 @@
+-- RUN: %llvmgcc -S -O2 -gnatn %s
+package body Real_Cst is
+   Cst : constant Float := 0.0;
+   procedure Write (Stream : access Ada.Streams.Root_Stream_Type'Class) is
+   begin
+      Float'Write (Stream, Cst);
+   end;
+end;
diff --git a/final/test/FrontendAda/switch.adb b/final/test/FrontendAda/switch.adb
new file mode 100644
index 00000000000..0c83a2e6420
--- /dev/null
+++ b/final/test/FrontendAda/switch.adb
@@ -0,0 +1,12 @@
+-- RUN: %llvmgcc -S %s
+function Switch (N : Integer) return Integer is
+begin
+   case N is
+      when Integer'First .. -1 =>
+         return -1;
+      when 0 =>
+         return 0;
+      when others =>
+         return 1;
+   end case;
+end;
diff --git a/final/test/FrontendAda/unc_constructor.adb b/final/test/FrontendAda/unc_constructor.adb
new file mode 100644
index 00000000000..ee10de6c811
--- /dev/null
+++ b/final/test/FrontendAda/unc_constructor.adb
@@ -0,0 +1,9 @@
+-- RUN: %llvmgcc -S %s -I%p/Support
+package body Unc_Constructor is
+   procedure P (X : A) is
+   begin
+      if X = A0 then
+         null;
+      end if;
+   end;
+end;
diff --git a/final/test/FrontendAda/var_offset.adb b/final/test/FrontendAda/var_offset.adb
new file mode 100644
index 00000000000..1d3ca98fa49
--- /dev/null
+++ b/final/test/FrontendAda/var_offset.adb
@@ -0,0 +1,7 @@
+-- RUN: %llvmgcc -S %s -I%p/Support
+package body Var_Offset is
+   function F (X : T) return Character is
+   begin
+      return X.Bad_Field;
+   end;
+end;
diff --git a/final/test/FrontendAda/var_size.adb b/final/test/FrontendAda/var_size.adb
new file mode 100644
index 00000000000..291f91d4eeb
--- /dev/null
+++ b/final/test/FrontendAda/var_size.adb
@@ -0,0 +1,7 @@
+-- RUN: %llvmgcc -S %s -I%p/Support
+package body Var_Size is
+   function A (X : T) return String is
+   begin
+      return X.A;
+   end;
+end;
diff --git a/final/test/FrontendAda/vce.adb b/final/test/FrontendAda/vce.adb
new file mode 100644
index 00000000000..85cdca066a2
--- /dev/null
+++ b/final/test/FrontendAda/vce.adb
@@ -0,0 +1,7 @@
+-- RUN: %llvmgcc -S %s
+procedure VCE is
+  S : String (1 .. 2);
+  B : Character := 'B';
+begin
+  S := 'A' & B;
+end;
diff --git a/final/test/FrontendAda/vce_lv.adb b/final/test/FrontendAda/vce_lv.adb
new file mode 100644
index 00000000000..d1b9e0824f1
--- /dev/null
+++ b/final/test/FrontendAda/vce_lv.adb
@@ -0,0 +1,9 @@
+-- RUN: %llvmgcc -S %s
+procedure VCE_LV is
+   type P is access String ;
+   type T is new P (5 .. 7);
+   subtype U is String (5 .. 7);
+   X : T := new U'(others => 'A');
+begin
+   null;
+end;
diff --git a/final/test/FrontendC++/2003-11-02-WeakLinkage.cpp b/final/test/FrontendC++/2003-11-02-WeakLinkage.cpp
new file mode 100644
index 00000000000..748ca6385be
--- /dev/null
+++ b/final/test/FrontendC++/2003-11-02-WeakLinkage.cpp
@@ -0,0 +1,13 @@
+// RUN: %llvmgcc -xc++ -S -o - %s | not grep weak
+// The template should compile to linkonce linkage, not weak linkage.
+
+template<class T>
+void thefunc();
+
+template<class T>
+inline void thefunc() {}
+
+void test() {
+  thefunc<int>();
+}
+
diff --git a/final/test/FrontendC++/2003-11-18-PtrMemConstantInitializer.cpp b/final/test/FrontendC++/2003-11-18-PtrMemConstantInitializer.cpp
new file mode 100644
index 00000000000..72609e7ccb4
--- /dev/null
+++ b/final/test/FrontendC++/2003-11-18-PtrMemConstantInitializer.cpp
@@ -0,0 +1,14 @@
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
+
+struct Gfx {
+  void opMoveSetShowText();
+};
+
+struct Operator {
+  void (Gfx::*func)();
+};
+
+Operator opTab[] = {
+  {&Gfx::opMoveSetShowText},
+};
+
diff --git a/final/test/FrontendC++/2003-11-25-ReturningOpaqueByValue.cpp b/final/test/FrontendC++/2003-11-25-ReturningOpaqueByValue.cpp
new file mode 100644
index 00000000000..5ea0a2c4aa8
--- /dev/null
+++ b/final/test/FrontendC++/2003-11-25-ReturningOpaqueByValue.cpp
@@ -0,0 +1,12 @@
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
+
+#include <vector>
+std::vector<int> my_method ();
+
+int
+main ()
+{
+  my_method ();
+  return 0;
+}
+
diff --git a/final/test/FrontendC++/2003-11-27-MultipleInheritanceThunk.cpp b/final/test/FrontendC++/2003-11-27-MultipleInheritanceThunk.cpp
new file mode 100644
index 00000000000..99cfc8d21df
--- /dev/null
+++ b/final/test/FrontendC++/2003-11-27-MultipleInheritanceThunk.cpp
@@ -0,0 +1,28 @@
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
+
+
+struct CallSite {
+  int X;
+
+  CallSite(const CallSite &CS);
+};
+
+struct AliasAnalysis {
+  int TD;
+
+  virtual int getModRefInfo(CallSite CS);
+};
+
+
+struct Pass {
+  int X;
+  virtual int foo();
+};
+
+struct AliasAnalysisCounter : public Pass, public AliasAnalysis {
+  int getModRefInfo(CallSite CS) {
+    return 0;
+  }
+};
+
+AliasAnalysisCounter AAC;
diff --git a/final/test/FrontendC++/2003-11-29-DuplicatedCleanupTest.cpp b/final/test/FrontendC++/2003-11-29-DuplicatedCleanupTest.cpp
new file mode 100644
index 00000000000..8df95cb1ee6
--- /dev/null
+++ b/final/test/FrontendC++/2003-11-29-DuplicatedCleanupTest.cpp
@@ -0,0 +1,41 @@
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
+
+
+void doesntThrow() throw();
+struct F {
+  ~F() { doesntThrow(); }
+};
+
+void atest() {
+  F A;
+lab:
+  F B;
+  goto lab;
+}
+
+void test(int val) {
+label: {
+   F A;
+   F B;
+   if (val == 0) goto label;
+   if (val == 1) goto label;
+}
+}
+
+void test3(int val) {
+label: {
+   F A;
+   F B;
+   if (val == 0) { doesntThrow(); goto label; }
+   if (val == 1) { doesntThrow(); goto label; }
+}
+}
+
+void test4(int val) {
+label: {
+   F A;
+   F B;
+   if (val == 0) { F C; goto label; }
+   if (val == 1) { F D; goto label; }
+}
+}
diff --git a/final/test/FrontendC++/2003-12-08-ArrayOfPtrToMemberFunc.cpp b/final/test/FrontendC++/2003-12-08-ArrayOfPtrToMemberFunc.cpp
new file mode 100644
index 00000000000..b87e7869ed7
--- /dev/null
+++ b/final/test/FrontendC++/2003-12-08-ArrayOfPtrToMemberFunc.cpp
@@ -0,0 +1,12 @@
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
+
+struct Evil {
+ void fun ();
+};
+int foo();
+typedef void (Evil::*memfunptr) ();
+static memfunptr jumpTable[] = { &Evil::fun };
+
+void Evil::fun() {
+ (this->*jumpTable[foo()]) ();
+}
diff --git a/final/test/FrontendC++/2004-01-11-DynamicInitializedConstant.cpp b/final/test/FrontendC++/2004-01-11-DynamicInitializedConstant.cpp
new file mode 100644
index 00000000000..8ae15c9592b
--- /dev/null
+++ b/final/test/FrontendC++/2004-01-11-DynamicInitializedConstant.cpp
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -xc++ -S -o - %s | not grep { constant }
+
+extern int X;
+const int Y = X;
+const int* foo() { return &Y; }
+
diff --git a/final/test/FrontendC++/2004-03-08-ReinterpretCastCopy.cpp b/final/test/FrontendC++/2004-03-08-ReinterpretCastCopy.cpp
new file mode 100644
index 00000000000..35880ab3630
--- /dev/null
+++ b/final/test/FrontendC++/2004-03-08-ReinterpretCastCopy.cpp
@@ -0,0 +1,21 @@
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
+
+struct A {
+  virtual void Method() = 0;
+};
+
+struct B : public A {
+  virtual void Method() { }
+};
+
+typedef void (A::*fn_type_a)(void);
+typedef void (B::*fn_type_b)(void);
+
+int main(int argc, char **argv)
+{
+  fn_type_a f = reinterpret_cast<fn_type_a>(&B::Method);
+  fn_type_b g = reinterpret_cast<fn_type_b>(f);
+  B b;
+  (b.*g)();
+  return 0;
+}
diff --git a/final/test/FrontendC++/2004-03-09-UnmangledBuiltinMethods.cpp b/final/test/FrontendC++/2004-03-09-UnmangledBuiltinMethods.cpp
new file mode 100644
index 00000000000..a600e841e7a
--- /dev/null
+++ b/final/test/FrontendC++/2004-03-09-UnmangledBuiltinMethods.cpp
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -xc++ -S -o - %s | grep _ZN11AccessFlags6strlenEv
+
+struct AccessFlags {
+  void strlen();
+};
+
+void AccessFlags::strlen() { }
+
diff --git a/final/test/FrontendC++/2004-03-15-CleanupsAndGotos.cpp b/final/test/FrontendC++/2004-03-15-CleanupsAndGotos.cpp
new file mode 100644
index 00000000000..c2e52f66dce
--- /dev/null
+++ b/final/test/FrontendC++/2004-03-15-CleanupsAndGotos.cpp
@@ -0,0 +1,14 @@
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
+
+// Testcase from Bug 291
+
+struct X {
+  ~X();
+};
+
+void foo() {
+  X v;
+
+TryAgain:
+  goto TryAgain;
+}
diff --git a/final/test/FrontendC++/2004-06-08-LateTemplateInstantiation.cpp b/final/test/FrontendC++/2004-06-08-LateTemplateInstantiation.cpp
new file mode 100644
index 00000000000..4ad4c7d061a
--- /dev/null
+++ b/final/test/FrontendC++/2004-06-08-LateTemplateInstantiation.cpp
@@ -0,0 +1,19 @@
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
+
+
+
+template<typename Ty>
+struct normal_iterator {
+  int FIELD;
+};
+
+void foo(normal_iterator<int>);
+normal_iterator<int> baz();
+
+void bar() {
+  foo(baz());
+}
+
+void *bar2() {
+  return (void*)foo;
+}
diff --git a/final/test/FrontendC++/2004-09-27-CompilerCrash.cpp b/final/test/FrontendC++/2004-09-27-CompilerCrash.cpp
new file mode 100644
index 00000000000..f52baaf7058
--- /dev/null
+++ b/final/test/FrontendC++/2004-09-27-CompilerCrash.cpp
@@ -0,0 +1,13 @@
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
+
+struct Pass {} ;
+template<typename PassName>
+Pass *callDefaultCtor() { return new PassName(); }
+
+void foo(Pass *(*C)());
+
+#include <string>
+
+bool foo(std::string &X) {
+  return X.empty();
+}
diff --git a/final/test/FrontendC++/2004-09-27-DidntEmitTemplate.cpp b/final/test/FrontendC++/2004-09-27-DidntEmitTemplate.cpp
new file mode 100644
index 00000000000..66b970cb6fd
--- /dev/null
+++ b/final/test/FrontendC++/2004-09-27-DidntEmitTemplate.cpp
@@ -0,0 +1,23 @@
+// RUN: %llvmgxx -xc++ %s -S -o - | grep callDefaultCtor | \
+// RUN:   not grep declare
+
+// This is a testcase for LLVM PR445, which was a problem where the 
+// instantiation of callDefaultCtor was not being emitted correctly.
+
+struct Pass {};
+
+template<typename PassName>
+Pass *callDefaultCtor() { return new Pass(); }
+
+void foo(Pass *(*C)());
+
+struct basic_string {
+  bool empty() const { return true; }
+};
+
+
+bool foo2(basic_string &X) {
+  return X.empty();
+}
+void baz() { foo(callDefaultCtor<Pass>); }
+
diff --git a/final/test/FrontendC++/2004-11-27-EmitsUnusedInlineFunctions.cpp b/final/test/FrontendC++/2004-11-27-EmitsUnusedInlineFunctions.cpp
new file mode 100644
index 00000000000..794b7d7fd8b
--- /dev/null
+++ b/final/test/FrontendC++/2004-11-27-EmitsUnusedInlineFunctions.cpp
@@ -0,0 +1,7 @@
+// The C++ front-end was emitting WAY too many inline functions.  This test
+// verifies that it does not emit the body of getchar, because it is not used.
+// This corresponds to PR459
+
+// RUN: %llvmgxx %s -S -o - | not grep {^i32 .getchar}
+
+#include <stdio.h>
diff --git a/final/test/FrontendC++/2004-11-27-ExceptionCleanupAssertion.cpp b/final/test/FrontendC++/2004-11-27-ExceptionCleanupAssertion.cpp
new file mode 100644
index 00000000000..f3d225e81a6
--- /dev/null
+++ b/final/test/FrontendC++/2004-11-27-ExceptionCleanupAssertion.cpp
@@ -0,0 +1,14 @@
+// RUN: %llvmgxx %s -S -o /dev/null
+
+// This is PR421
+
+struct Strongbad {
+    Strongbad(const char *str );
+    ~Strongbad();
+    operator const char *() const;
+};
+
+void TheCheat () {
+  Strongbad foo(0);
+  Strongbad dirs[] = { Strongbad(0) + 1};
+}
diff --git a/final/test/FrontendC++/2004-11-27-FriendDefaultArgCrash.cpp b/final/test/FrontendC++/2004-11-27-FriendDefaultArgCrash.cpp
new file mode 100644
index 00000000000..731e72617b9
--- /dev/null
+++ b/final/test/FrontendC++/2004-11-27-FriendDefaultArgCrash.cpp
@@ -0,0 +1,9 @@
+// RUN: %llvmgxx %s -o /dev/null -S
+
+// PR447
+
+namespace nm {
+  struct str {
+    friend int foo(int arg = 0);
+  };
+}
diff --git a/final/test/FrontendC++/2004-11-27-InlineAsmFunctionRedefinition.cpp b/final/test/FrontendC++/2004-11-27-InlineAsmFunctionRedefinition.cpp
new file mode 100644
index 00000000000..42b223bc99c
--- /dev/null
+++ b/final/test/FrontendC++/2004-11-27-InlineAsmFunctionRedefinition.cpp
@@ -0,0 +1,26 @@
+// RUN: %llvmgxx %s -S -o /dev/null
+
+// PR397
+
+struct stat { };
+struct stat64 { };
+
+extern "C" {
+
+extern int lstat(const char *, struct stat *) __asm__("lstat64");
+extern int lstat64(const char *, struct stat64 *);
+
+extern int __lxstat(int, const char *, struct stat *) __asm__("__lxstat64");
+extern int __lxstat64(int, const char *, struct stat64 *);
+
+extern __inline__ int lstat(const char *path, struct stat *statbuf) {
+    return __lxstat(3, path, statbuf);
+}
+extern __inline__ int lstat64(const char *path, struct stat64 *statbuf) {
+    return __lxstat64(3, path, statbuf);
+}
+}
+
+int do_one_file(void) {
+    return lstat(0, 0) + lstat64(0,0);
+}
diff --git a/final/test/FrontendC++/2005-01-03-StaticInitializers.cpp b/final/test/FrontendC++/2005-01-03-StaticInitializers.cpp
new file mode 100644
index 00000000000..da1b005cf4a
--- /dev/null
+++ b/final/test/FrontendC++/2005-01-03-StaticInitializers.cpp
@@ -0,0 +1,8 @@
+// RUN: %llvmgxx %s -S -o - | not grep llvm.global_ctor
+
+struct S {
+  int  A[2];
+};
+
+int XX = (int)(long)&(((struct S*)0)->A[1]);
+
diff --git a/final/test/FrontendC++/2005-02-11-AnonymousUnion.cpp b/final/test/FrontendC++/2005-02-11-AnonymousUnion.cpp
new file mode 100644
index 00000000000..87ababc5f18
--- /dev/null
+++ b/final/test/FrontendC++/2005-02-11-AnonymousUnion.cpp
@@ -0,0 +1,32 @@
+// RUN: %llvmgxx %s -S -o -
+
+// Test anonymous union with members of the same size.
+int test1(float F) {
+  union {
+     float G;
+     int i;
+  };
+  G = F;
+  return i;
+}
+
+// test anonymous union with members of differing size.
+int test2(short F) {
+  volatile union {
+     short G;
+     int i;
+  };
+  G = F;
+  return i;
+}
+
+// Make sure that normal unions work.  duh :)
+volatile union U_t {
+  short S;
+  int i;
+} U;
+
+int test3(short s) {
+  U.S = s;
+  return U.i;
+}
diff --git a/final/test/FrontendC++/2005-02-13-BadDynamicInit.cpp b/final/test/FrontendC++/2005-02-13-BadDynamicInit.cpp
new file mode 100644
index 00000000000..84fa565f240
--- /dev/null
+++ b/final/test/FrontendC++/2005-02-13-BadDynamicInit.cpp
@@ -0,0 +1,9 @@
+// RUN: %llvmgxx %s -S -o - | not grep llvm.global_ctors
+// This testcase corresponds to PR509
+struct Data {
+  unsigned *data;
+  unsigned array[1];
+};
+
+Data shared_null = { shared_null.array };
+
diff --git a/final/test/FrontendC++/2005-02-14-BitFieldOffset.cpp b/final/test/FrontendC++/2005-02-14-BitFieldOffset.cpp
new file mode 100644
index 00000000000..522e20a478d
--- /dev/null
+++ b/final/test/FrontendC++/2005-02-14-BitFieldOffset.cpp
@@ -0,0 +1,11 @@
+// RUN: %llvmgxx %s -S -o - | not grep {i32 6}
+
+struct QVectorTypedData {
+    int size;
+    unsigned int sharable : 1;
+    unsigned short array[1];
+};
+
+void foo(QVectorTypedData *X) {
+  X->array[0] = 123;
+}
diff --git a/final/test/FrontendC++/2005-02-19-BitfieldStructCrash.cpp b/final/test/FrontendC++/2005-02-19-BitfieldStructCrash.cpp
new file mode 100644
index 00000000000..8f571e074b8
--- /dev/null
+++ b/final/test/FrontendC++/2005-02-19-BitfieldStructCrash.cpp
@@ -0,0 +1,14 @@
+// RUN: %llvmgxx -S %s -o -
+
+struct QChar {unsigned short X; QChar(unsigned short); } ;
+
+struct Command {
+        Command(QChar c) : c(c) {}
+        unsigned int type : 4;
+        QChar c;
+    };
+
+Command X(QChar('c'));
+
+void Foo(QChar );
+void bar() { Foo(X.c); }
diff --git a/final/test/FrontendC++/2005-02-19-UnnamedVirtualThunkArgument.cpp b/final/test/FrontendC++/2005-02-19-UnnamedVirtualThunkArgument.cpp
new file mode 100644
index 00000000000..853fee7dcf3
--- /dev/null
+++ b/final/test/FrontendC++/2005-02-19-UnnamedVirtualThunkArgument.cpp
@@ -0,0 +1,22 @@
+// RUN: %llvmgxx -S %s -o /dev/null
+
+struct Foo  {
+    Foo();
+    virtual ~Foo();
+};
+
+struct Bar  {
+    Bar();
+    virtual ~Bar();
+    virtual bool test(bool) const;
+};
+
+struct Baz : public Foo, public Bar  {
+    Baz();
+    virtual ~Baz();
+    virtual bool test(bool) const;
+};
+
+bool Baz::test(bool) const  {
+    return true;
+}
diff --git a/final/test/FrontendC++/2005-02-20-BrokenReferenceTest.cpp b/final/test/FrontendC++/2005-02-20-BrokenReferenceTest.cpp
new file mode 100644
index 00000000000..31026d30e90
--- /dev/null
+++ b/final/test/FrontendC++/2005-02-20-BrokenReferenceTest.cpp
@@ -0,0 +1,11 @@
+// RUN: %llvmgxx %s -S -o /dev/null
+
+void test(unsigned char *b, int rb) {
+  typedef unsigned char imgfoo[10][rb];
+  imgfoo &br = *(imgfoo *)b;
+
+  br[0][0] = 1;
+
+  rb = br[0][0];
+}
+
diff --git a/final/test/FrontendC++/2005-02-27-PlacementArrayNewCrash.cpp b/final/test/FrontendC++/2005-02-27-PlacementArrayNewCrash.cpp
new file mode 100644
index 00000000000..a8fc6685ac4
--- /dev/null
+++ b/final/test/FrontendC++/2005-02-27-PlacementArrayNewCrash.cpp
@@ -0,0 +1,8 @@
+// RUN: %llvmgxx -S %s -o -
+
+#include <new>
+typedef double Ty[4];
+
+void foo(Ty *XX) {
+  new(XX) Ty();
+}
diff --git a/final/test/FrontendC++/2005-07-21-VirtualBaseAccess.cpp b/final/test/FrontendC++/2005-07-21-VirtualBaseAccess.cpp
new file mode 100644
index 00000000000..ca600d6433d
--- /dev/null
+++ b/final/test/FrontendC++/2005-07-21-VirtualBaseAccess.cpp
@@ -0,0 +1,14 @@
+// RUN: %llvmgxx -xc++ %s -S -o - | opt -die -S | not grep cast
+
+void foo(int*);
+
+struct FOO {
+  int X;
+};
+
+struct BAR : virtual FOO { BAR(); };
+
+int testfn() {
+  BAR B;
+  foo(&B.X);
+}
diff --git a/final/test/FrontendC++/2006-03-01-GimplifyCrash.cpp b/final/test/FrontendC++/2006-03-01-GimplifyCrash.cpp
new file mode 100644
index 00000000000..b0d00fe17dd
--- /dev/null
+++ b/final/test/FrontendC++/2006-03-01-GimplifyCrash.cpp
@@ -0,0 +1,14 @@
+// RUN: %llvmgxx -S %s -o -
+
+struct PrefMapElem {
+  virtual ~PrefMapElem(); 
+  unsigned int fPrefId;
+};
+
+int foo() {
+  PrefMapElem* fMap;
+  if (fMap[0].fPrefId == 1)
+    return 1;
+  
+  return 0;
+}
diff --git a/final/test/FrontendC++/2006-03-06-C++RecurseCrash.cpp b/final/test/FrontendC++/2006-03-06-C++RecurseCrash.cpp
new file mode 100644
index 00000000000..2fb3fb7cdc2
--- /dev/null
+++ b/final/test/FrontendC++/2006-03-06-C++RecurseCrash.cpp
@@ -0,0 +1,24 @@
+// RUN: %llvmgcc %s -S -o -
+namespace std {
+  class exception { };
+
+  class type_info {
+  public:
+    virtual ~type_info();
+  };
+
+}
+
+namespace __cxxabiv1 {
+  class __si_class_type_info : public std::type_info {
+    ~__si_class_type_info();
+  };
+}
+
+class recursive_init: public std::exception {
+public:
+  virtual ~recursive_init() throw ();
+};
+
+recursive_init::~recursive_init() throw() { }
+
diff --git a/final/test/FrontendC++/2006-09-08-powi.cpp b/final/test/FrontendC++/2006-09-08-powi.cpp
new file mode 100644
index 00000000000..75cbfda7cbc
--- /dev/null
+++ b/final/test/FrontendC++/2006-09-08-powi.cpp
@@ -0,0 +1,7 @@
+// RUN: %llvmgxx -O3 -S -o - %s
+
+#include <cmath>
+
+double foo(double X, int Y) {
+  return std::pow(X, Y);
+}
diff --git a/final/test/FrontendC++/2006-09-12-OpaqueStructCrash.cpp b/final/test/FrontendC++/2006-09-12-OpaqueStructCrash.cpp
new file mode 100644
index 00000000000..f3160e84e2c
--- /dev/null
+++ b/final/test/FrontendC++/2006-09-12-OpaqueStructCrash.cpp
@@ -0,0 +1,28 @@
+// RUN: %llvmgxx -O3 -S -o - %s
+
+struct A {
+   virtual ~A();
+};
+
+template <typename Ty>
+struct B : public A {
+   ~B () { delete [] val; }
+private:
+     Ty* val;
+};
+
+template <typename Ty>
+struct C : public A {
+   C ();
+   ~C ();
+};
+
+template <typename Ty>
+struct D : public A {
+     D () {}
+   private:
+     B<C<Ty> > blocks;
+};
+
+template class D<double>;
+
diff --git a/final/test/FrontendC++/2006-09-27-Debug-Protection.cpp b/final/test/FrontendC++/2006-09-27-Debug-Protection.cpp
new file mode 100644
index 00000000000..2a70a0f5b45
--- /dev/null
+++ b/final/test/FrontendC++/2006-09-27-Debug-Protection.cpp
@@ -0,0 +1,12 @@
+// RUN: %llvmgxx -O0 -S -g -o - %s | grep {i32 1,}
+// RUN: %llvmgxx -O0 -S -g -o - %s | grep {i32 2,}
+class A {
+public:
+  int x;
+protected:
+  int y;
+private:
+  int z;
+};
+
+A a;
diff --git a/final/test/FrontendC++/2006-10-30-ClassBitfield.cpp b/final/test/FrontendC++/2006-10-30-ClassBitfield.cpp
new file mode 100644
index 00000000000..b3b43fb30ce
--- /dev/null
+++ b/final/test/FrontendC++/2006-10-30-ClassBitfield.cpp
@@ -0,0 +1,16 @@
+// RUN: %llvmgxx %s -S -o -
+// PR954
+
+struct _Refcount_Base   {
+  unsigned long _M_ref_count;
+  int _M_ref_count_lock;
+  _Refcount_Base() : _M_ref_count(0) {}
+};
+
+struct _Rope_RopeRep : public _Refcount_Base 
+{
+public:
+  int _M_tag:8; 
+};
+
+int foo(_Rope_RopeRep* r) { return r->_M_tag; }
diff --git a/final/test/FrontendC++/2006-11-06-StackTrace.cpp b/final/test/FrontendC++/2006-11-06-StackTrace.cpp
new file mode 100644
index 00000000000..b79c0bf9934
--- /dev/null
+++ b/final/test/FrontendC++/2006-11-06-StackTrace.cpp
@@ -0,0 +1,38 @@
+// This is a regression test on debug info to make sure that we can get a
+// meaningful stack trace from a C++ program.
+// RUN: %llvmgcc -S -O0 -g %s -o - | \
+// RUN:    llc --disable-fp-elim -o %t.s -O0 -relocation-model=pic
+// RUN: %compile_c %t.s -o %t.o
+// RUN: %link %t.o -o %t.exe
+// RUN: echo {break DeepStack::deepest\nrun 17\nwhere\n} > %t.in 
+// RN: gdb -q -batch -n -x %t.in %t.exe | tee %t.out | \
+// RN:   grep {#0  DeepStack::deepest.*(this=.*,.*x=33)}
+// RN: gdb -q -batch -n -x %t.in %t.exe | \
+// RN:   grep {#7  0x.* in main.*(argc=\[12\],.*argv=.*)}
+
+// Only works on ppc (but not apple-darwin9), x86 and x86_64.  Should
+// generalize?
+// XAIL: alpha,arm,powerpc-apple-darwin9
+
+#include <stdlib.h>
+
+class DeepStack {
+  int seedVal;
+public:
+  DeepStack(int seed) : seedVal(seed) {}
+
+  int shallowest( int x ) { return shallower(x + 1); }
+  int shallower ( int x ) { return shallow(x + 2); }
+  int shallow   ( int x ) { return deep(x + 3); }
+  int deep      ( int x ) { return deeper(x + 4); }
+  int deeper    ( int x ) { return deepest(x + 6); }
+  int deepest   ( int x ) { return x + 7; }
+
+  int runit() { return shallowest(seedVal); }
+};
+
+int main ( int argc, char** argv) {
+
+  DeepStack DS9( (argc > 1 ? atoi(argv[1]) : 0) );
+  return DS9.runit();
+}
diff --git a/final/test/FrontendC++/2006-11-20-GlobalSymbols.cpp b/final/test/FrontendC++/2006-11-20-GlobalSymbols.cpp
new file mode 100644
index 00000000000..c4afd32b66d
--- /dev/null
+++ b/final/test/FrontendC++/2006-11-20-GlobalSymbols.cpp
@@ -0,0 +1,10 @@
+// PR1013
+// Check to make sure debug symbols use the correct name for globals and
+// functions.  Will not assemble if it fails to.
+// RUN: %llvmgcc_only -O0 -g -c %s
+
+int foo __asm__("f\001oo");
+
+int bar() {
+  return foo;
+}
diff --git a/final/test/FrontendC++/2006-11-30-ConstantExprCrash.cpp b/final/test/FrontendC++/2006-11-30-ConstantExprCrash.cpp
new file mode 100644
index 00000000000..d351b9413af
--- /dev/null
+++ b/final/test/FrontendC++/2006-11-30-ConstantExprCrash.cpp
@@ -0,0 +1,27 @@
+// RUN: %llvmgxx %s -S -o -
+// PR1027
+
+struct sys_var {
+  unsigned name_length;
+
+  bool no_support_one_shot;
+  sys_var() {}
+};
+
+
+struct sys_var_thd : public sys_var {
+};
+
+extern sys_var_thd sys_auto_is_null;
+
+sys_var *getsys_variables() {
+  return &sys_auto_is_null;
+}
+
+sys_var *sys_variables = &sys_auto_is_null;
+
+
+
+
+
+
diff --git a/final/test/FrontendC++/2006-11-30-Pubnames.cpp b/final/test/FrontendC++/2006-11-30-Pubnames.cpp
new file mode 100644
index 00000000000..239d3f525b7
--- /dev/null
+++ b/final/test/FrontendC++/2006-11-30-Pubnames.cpp
@@ -0,0 +1,22 @@
+// This is a regression test on debug info to make sure that we can access 
+// qualified global names.
+// RUN: %llvmgcc -S -O0 -g %s -o - | \
+// RUN:   llc --disable-fp-elim -o %t.s -O0
+// RUN: %compile_c %t.s -o %t.o
+// RUN: %link %t.o -o %t.exe
+// RUN: %llvmdsymutil %t.exe 
+// RUN: echo {break main\nrun\np Pubnames::pubname} > %t.in
+// RUN: gdb -q -batch -n -x %t.in %t.exe | tee %t.out | grep {\$1 = 10}
+//
+// XFAIL: alpha,arm
+
+struct Pubnames {
+  static int pubname;
+};
+
+int Pubnames::pubname = 10;
+
+int main (int argc, char** argv) {
+  Pubnames p;
+  return 0;
+}
diff --git a/final/test/FrontendC++/2007-01-02-UnboundedArray.cpp b/final/test/FrontendC++/2007-01-02-UnboundedArray.cpp
new file mode 100644
index 00000000000..310308694b7
--- /dev/null
+++ b/final/test/FrontendC++/2007-01-02-UnboundedArray.cpp
@@ -0,0 +1,14 @@
+// Make sure unbounded arrays compile with debug information.
+// 
+// RUN: %llvmgcc -O0 -S -g %s
+
+// PR1068
+
+struct Object {
+  char buffer[];
+};
+
+int main(int argc, char** argv) {
+  new Object;
+  return 0;
+}
diff --git a/final/test/FrontendC++/2007-01-06-ELF-Thunk-Sections.cpp b/final/test/FrontendC++/2007-01-06-ELF-Thunk-Sections.cpp
new file mode 100644
index 00000000000..5206640be10
--- /dev/null
+++ b/final/test/FrontendC++/2007-01-06-ELF-Thunk-Sections.cpp
@@ -0,0 +1,49 @@
+// RUN: %llvmgxx %s -S -o - | not grep gnu.linkonce.
+// PR1085
+
+class 
+__attribute__((visibility("default"))) QGenericArgument
+{
+	public:inline QGenericArgument(const char *aName = 0, const void *aData = 0):_data(aData), _name(aName) {
+	}
+	private:const void *_data;
+	const char     *_name;
+};
+struct __attribute__ ((
+		       visibility("default"))) QMetaObject
+{
+	struct {
+	}
+	                d;
+};
+class 
+__attribute__((visibility("default"))) QObject
+{
+	virtual const QMetaObject *metaObject() const;
+};
+class 
+__attribute__((visibility("default"))) QPaintDevice
+{
+	public:enum PaintDeviceMetric {
+		PdmWidth = 1, PdmHeight, PdmWidthMM, PdmHeightMM, PdmNumColors, PdmDepth, PdmDpiX, PdmDpiY, PdmPhysicalDpiX, PdmPhysicalDpiY
+	};
+	virtual ~ QPaintDevice();
+	union {
+	}
+	                ct;
+};
+class 
+__attribute__((visibility("default"))) QWidget:public QObject, public QPaintDevice
+{
+};
+class 
+__attribute__((visibility("default"))) QDialog:public QWidget
+{
+};
+class           TopicChooser:public QDialog {
+	virtual const QMetaObject *metaObject() const;
+};
+const QMetaObject *TopicChooser::
+metaObject() const
+{
+}
diff --git a/final/test/FrontendC++/2007-01-06-PtrMethodInit.cpp b/final/test/FrontendC++/2007-01-06-PtrMethodInit.cpp
new file mode 100644
index 00000000000..beb79457b55
--- /dev/null
+++ b/final/test/FrontendC++/2007-01-06-PtrMethodInit.cpp
@@ -0,0 +1,75 @@
+// RUN: %llvmgxx %s -S -o -
+// PR1084
+
+extern "C"
+{
+  typedef unsigned char PRUint8;
+  typedef unsigned int PRUint32;
+}
+typedef PRUint32 nsresult;
+struct nsID
+{
+};
+typedef nsID nsIID;
+class nsISupports
+{
+};
+extern "C++"
+{
+  template < class T > struct nsCOMTypeInfo
+  {
+    static const nsIID & GetIID ()
+    {
+    }
+  };
+}
+
+class nsIDOMEvent:public nsISupports
+{
+};
+class nsIDOMEventListener:public nsISupports
+{
+public:static const nsIID & GetIID ()
+  {
+  }
+  virtual nsresult
+    __attribute__ ((regparm (0), cdecl)) HandleEvent (nsIDOMEvent * event) =
+    0;
+};
+class nsIDOMMouseListener:public nsIDOMEventListener
+{
+public:static const nsIID & GetIID ()
+  {
+    static const nsIID iid = {
+    };
+  }
+  virtual nsresult
+    __attribute__ ((regparm (0),
+		    cdecl)) MouseDown (nsIDOMEvent * aMouseEvent) = 0;
+};
+typedef
+typeof (&nsIDOMEventListener::HandleEvent)
+  GenericHandler;
+     struct EventDispatchData
+     {
+       PRUint32 message;
+       GenericHandler method;
+       PRUint8 bits;
+     };
+     struct EventTypeData
+     {
+       const EventDispatchData *events;
+       int numEvents;
+       const nsIID *iid;
+     };
+     static const EventDispatchData sMouseEvents[] = {
+       {
+	(300 + 2),
+	reinterpret_cast < GenericHandler > (&nsIDOMMouseListener::MouseDown),
+	0x01}
+     };
+static const EventTypeData sEventTypes[] = {
+  {
+   sMouseEvents, (sizeof (sMouseEvents) / sizeof (sMouseEvents[0])),
+   &nsCOMTypeInfo < nsIDOMMouseListener >::GetIID ()}
+};
diff --git a/final/test/FrontendC++/2007-03-27-FunctionVarRename.cpp b/final/test/FrontendC++/2007-03-27-FunctionVarRename.cpp
new file mode 100644
index 00000000000..6ff1284ddb4
--- /dev/null
+++ b/final/test/FrontendC++/2007-03-27-FunctionVarRename.cpp
@@ -0,0 +1,17 @@
+// RUN: %llvmgxx %s -S -o - | not grep eprintf1
+// RUN: %llvmgxx %s -S -o - | grep eprintf
+
+// Only one eprintf should exist in the output
+
+extern "C" 
+void __eprintf();
+
+void foo() {
+
+  __eprintf();
+}
+
+void *bar() {
+  extern void *__eprintf;
+  return &__eprintf;
+}
diff --git a/final/test/FrontendC++/2007-04-05-PackedBitFields-1.cpp b/final/test/FrontendC++/2007-04-05-PackedBitFields-1.cpp
new file mode 100644
index 00000000000..174dddf6ab6
--- /dev/null
+++ b/final/test/FrontendC++/2007-04-05-PackedBitFields-1.cpp
@@ -0,0 +1,23 @@
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
+
+#ifdef PACKED
+#define P __attribute__((packed))
+#else
+#define P
+#endif
+
+struct P M_Packed { 
+  unsigned int l_Packed; 
+  unsigned short k_Packed : 6, 
+    i_Packed : 15,
+    j_Packed : 11;
+  
+}; 
+
+struct M_Packed sM_Packed; 
+
+int testM_Packed (void) { 
+  struct M_Packed x; 
+  return (x.i_Packed != 0);
+}
+      
diff --git a/final/test/FrontendC++/2007-04-05-PackedBitFieldsOverlap-2.cpp b/final/test/FrontendC++/2007-04-05-PackedBitFieldsOverlap-2.cpp
new file mode 100644
index 00000000000..55da1a6ab40
--- /dev/null
+++ b/final/test/FrontendC++/2007-04-05-PackedBitFieldsOverlap-2.cpp
@@ -0,0 +1,24 @@
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
+
+#ifdef PACKED
+#define P __attribute__((packed))
+#else
+#define P
+#endif
+
+struct P M_Packed { 
+  unsigned long sorted : 1;
+  unsigned long from_array : 1;
+  unsigned long mixed_encoding : 1;
+  unsigned long encoding : 8;
+  unsigned long count : 21;
+
+}; 
+
+struct M_Packed sM_Packed; 
+
+int testM_Packed (void) { 
+  struct M_Packed x; 
+  return (x.count != 0);
+}
+      
diff --git a/final/test/FrontendC++/2007-04-05-PackedBitFieldsOverlap.cpp b/final/test/FrontendC++/2007-04-05-PackedBitFieldsOverlap.cpp
new file mode 100644
index 00000000000..46a89491ee2
--- /dev/null
+++ b/final/test/FrontendC++/2007-04-05-PackedBitFieldsOverlap.cpp
@@ -0,0 +1,24 @@
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
+
+
+#ifdef PACKED
+#define P __attribute__((packed))
+#else
+#define P
+#endif
+
+struct P M_Packed { 
+  unsigned int l_Packed; 
+  unsigned short k_Packed : 6, 
+    i_Packed : 15;
+  char c;
+  
+}; 
+
+struct M_Packed sM_Packed; 
+
+int testM_Packed (void) { 
+  struct M_Packed x; 
+  return (x.i_Packed != 0);
+}
+      
diff --git a/final/test/FrontendC++/2007-04-05-PackedBitFieldsSmall.cpp b/final/test/FrontendC++/2007-04-05-PackedBitFieldsSmall.cpp
new file mode 100644
index 00000000000..7377b8292d5
--- /dev/null
+++ b/final/test/FrontendC++/2007-04-05-PackedBitFieldsSmall.cpp
@@ -0,0 +1,27 @@
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
+
+
+#ifdef PACKED
+// This is an example where size of Packed struct is smaller then 
+// the size of bit field type.
+#define P __attribute__((packed))
+#else
+#define P
+#endif
+
+struct P M_Packed { 
+  unsigned long long X:50;
+  unsigned Y:2;
+}; 
+
+struct M_Packed sM_Packed; 
+
+int testM_Packed (void) { 
+  struct M_Packed x; 
+  return (0 != x.Y);
+}
+      
+int testM_Packed2 (void) { 
+  struct M_Packed x; 
+  return (0 != x.X);
+}
diff --git a/final/test/FrontendC++/2007-04-05-StructPackedFieldUnpacked.cpp b/final/test/FrontendC++/2007-04-05-StructPackedFieldUnpacked.cpp
new file mode 100644
index 00000000000..b550b5fdb01
--- /dev/null
+++ b/final/test/FrontendC++/2007-04-05-StructPackedFieldUnpacked.cpp
@@ -0,0 +1,25 @@
+// RUN: %llvmgxx -S %s -o - | llvm-as -o /dev/null
+
+#ifdef PACKED
+#define P __attribute__((packed))
+#else
+#define P
+#endif
+
+struct UnPacked {
+ 	int X;	
+	int Y;
+};
+
+struct P M_Packed { 
+  unsigned char A;
+  struct UnPacked B;
+}; 
+
+struct M_Packed sM_Packed; 
+
+int testM_Packed (void) { 
+  struct M_Packed x; 
+  return (x.B.Y != 0);
+}
+      
diff --git a/final/test/FrontendC++/2007-04-10-PackedUnion.cpp b/final/test/FrontendC++/2007-04-10-PackedUnion.cpp
new file mode 100644
index 00000000000..b4b8894ae2c
--- /dev/null
+++ b/final/test/FrontendC++/2007-04-10-PackedUnion.cpp
@@ -0,0 +1,41 @@
+// RUN: %llvmgxx -S %s -o /dev/null
+extern "C" {
+
+#pragma pack(push, 2)
+  typedef struct ABC* abc;
+
+  struct ABCS {
+    float red;
+    float green;
+    float blue;
+    float alpha;
+  };
+
+  typedef void (*XYZ)();
+#pragma pack(pop)
+}
+
+
+union ABCU {
+  ABCS color;
+  XYZ bg;
+};
+
+struct AData {
+  ABCU data;
+};
+
+class L {
+ public:
+  L() {}
+  L(const L& other);
+
+ private:
+  AData fdata;
+};
+
+
+L::L(const L& other)
+{
+  fdata = other.fdata;
+}
diff --git a/final/test/FrontendC++/2007-04-11-InlineStorageClassC++.cpp b/final/test/FrontendC++/2007-04-11-InlineStorageClassC++.cpp
new file mode 100644
index 00000000000..4c2aad397c8
--- /dev/null
+++ b/final/test/FrontendC++/2007-04-11-InlineStorageClassC++.cpp
@@ -0,0 +1,44 @@
+// RUN: %llvmgxx %s -S -O0 -o - | grep define | \
+// RUN:   grep xglobWeak | grep linkonce | count 1
+// RUN: %llvmgxx %s -S -O0 -o - | grep define | \
+// RUN:   grep xextWeak | grep linkonce | count 1
+// RUN: %llvmgxx %s -S -O0 -o - | grep define | \
+// RUN:   grep xWeaknoinline | grep weak | count 1
+// RUN: %llvmgxx %s -S -O0 -o - | grep define | \
+// RUN:   grep xWeakextnoinline | grep weak | count 1
+// RUN: %llvmgxx %s -S -O0 -o - | grep define | \
+// RUN:   grep xglobnoWeak | grep linkonce | count 1
+// RUN: %llvmgxx %s -S -O0 -o - | grep define | \
+// RUN:   grep xstatnoWeak | grep internal | count 1
+// RUN: %llvmgxx %s -S -O0 -o - | grep define | \
+// RUN:   grep xextnoWeak | grep linkonce | count 1
+inline int xglobWeak(int) __attribute__((weak));
+inline int xglobWeak (int i) {
+  return i*2;
+}
+inline int xextWeak(int) __attribute__((weak));
+extern  inline int xextWeak (int i) {
+  return i*4;
+}
+int xWeaknoinline(int) __attribute__((weak));
+int xWeaknoinline(int i) {
+  return i*8;
+}
+int xWeakextnoinline(int) __attribute__((weak));
+extern int xWeakextnoinline(int i) {
+  return i*16;
+}
+inline int xglobnoWeak (int i) {
+  return i*32;
+}
+static inline int xstatnoWeak (int i) {
+  return i*64;
+}
+extern  inline int xextnoWeak (int i) {
+  return i*128;
+}
+int j(int y) {
+  return xglobnoWeak(y)+xstatnoWeak(y)+xextnoWeak(y)+
+        xglobWeak(y)+xextWeak(y)+
+        xWeakextnoinline(y)+xWeaknoinline(y);
+}
diff --git a/final/test/FrontendC++/2007-04-14-FNoBuiltin.cpp b/final/test/FrontendC++/2007-04-14-FNoBuiltin.cpp
new file mode 100644
index 00000000000..31e4528754e
--- /dev/null
+++ b/final/test/FrontendC++/2007-04-14-FNoBuiltin.cpp
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S %s -O2 -fno-builtin -o - | grep call.*printf
+// Check that -fno-builtin is honored.
+
+extern "C" int printf(const char*, ...);
+void foo(const char *msg) {
+	printf("%s\n",msg);
+}
diff --git a/final/test/FrontendC++/2007-04-31-TryCatch.cpp b/final/test/FrontendC++/2007-04-31-TryCatch.cpp
new file mode 100644
index 00000000000..8b8254d76e7
--- /dev/null
+++ b/final/test/FrontendC++/2007-04-31-TryCatch.cpp
@@ -0,0 +1,12 @@
+// RUN: %llvmgxx -S %s -o /dev/null
+
+#include <locale>
+
+namespace std 
+{
+  codecvt<char, char, mbstate_t>::
+  codecvt(size_t __refs)
+  : __codecvt_abstract_base<char, char, mbstate_t>(__refs),
+  _M_c_locale_codecvt(_S_get_c_locale())
+  { }
+}
diff --git a/final/test/FrontendC++/2007-05-03-VectorInit.cpp b/final/test/FrontendC++/2007-05-03-VectorInit.cpp
new file mode 100644
index 00000000000..af56d3a08a8
--- /dev/null
+++ b/final/test/FrontendC++/2007-05-03-VectorInit.cpp
@@ -0,0 +1,17 @@
+// RUN: %llvmgxx %s -S -O0 -o - 
+// PR1378
+
+typedef float v4sf __attribute__((vector_size(16)));
+
+typedef v4sf float4;
+
+static float4 splat4(float a) 
+{
+  float4 tmp = {a,a,a,a};
+  return tmp;
+}
+
+float4 foo(float a)
+{
+  return splat4(a);
+}
diff --git a/final/test/FrontendC++/2007-05-16-ReverseBitFieldCrash.cpp b/final/test/FrontendC++/2007-05-16-ReverseBitFieldCrash.cpp
new file mode 100644
index 00000000000..42342fc9486
--- /dev/null
+++ b/final/test/FrontendC++/2007-05-16-ReverseBitFieldCrash.cpp
@@ -0,0 +1,24 @@
+// RUN: %llvmgxx %s -S -o -
+
+#pragma reverse_bitfields on
+typedef unsigned long UINT32;
+
+extern void abort(void);
+
+typedef struct TestStruct
+{
+  long	first: 15,
+    second: 17;	
+} TestStruct;
+
+int main (int argc, char * const argv[]) {
+
+  TestStruct testStruct = {1, 0};
+  
+  UINT32 dw = *(UINT32 *)(&testStruct);
+  
+  if(!(dw & 0xFFFF))
+    abort ();
+
+  return 0;
+}
diff --git a/final/test/FrontendC++/2007-05-23-TryFinally.cpp b/final/test/FrontendC++/2007-05-23-TryFinally.cpp
new file mode 100644
index 00000000000..c7971820ec7
--- /dev/null
+++ b/final/test/FrontendC++/2007-05-23-TryFinally.cpp
@@ -0,0 +1,16 @@
+// RUN: %llvmgxx %s -S -O2 -o - | ignore grep _Unwind_Resume | \
+// RUN:   wc -l | grep {\[23\]}
+
+struct One { };
+struct Two { };
+
+void handle_unexpected () {
+  try
+  {
+    throw;
+  }
+  catch (One &)
+  {
+    throw Two ();
+  }
+}
diff --git a/final/test/FrontendC++/2007-07-04-NestedCatches.cpp b/final/test/FrontendC++/2007-07-04-NestedCatches.cpp
new file mode 100644
index 00000000000..b10a5db754b
--- /dev/null
+++ b/final/test/FrontendC++/2007-07-04-NestedCatches.cpp
@@ -0,0 +1,32 @@
+// RUN: %llvmgxx %s -S -O2 -o - | \
+// RUN:   ignore grep {eh\.selector.*One.*Two.*Three.*Four.*Five.*Six.*null} | \
+// RUN:     wc -l | grep {\[01\]}
+
+extern void X(void);
+
+struct One   {};
+struct Two   {};
+struct Three {};
+struct Four  {};
+struct Five  {};
+struct Six   {};
+
+static void A(void) throw ()
+{
+  X();
+}
+
+static void B(void) throw (Two)
+{
+  try { A(); } catch (One) {}
+}
+
+static void C(void) throw (Six, Five)
+{
+  try { B(); } catch (Three) {} catch (Four) {}
+}
+
+int main ()
+{
+  try { C(); } catch (...) {}
+}
diff --git a/final/test/FrontendC++/2007-07-29-RestrictPtrArg.cpp b/final/test/FrontendC++/2007-07-29-RestrictPtrArg.cpp
new file mode 100644
index 00000000000..2e85abdf25e
--- /dev/null
+++ b/final/test/FrontendC++/2007-07-29-RestrictPtrArg.cpp
@@ -0,0 +1,6 @@
+// RUN: %llvmgxx -S %s -o - | grep noalias
+
+void foo(int * __restrict myptr1, int * myptr2) {
+  myptr1[0] = 0;
+  myptr2[0] = 0;
+}
diff --git a/final/test/FrontendC++/2007-07-29-RestrictRefArg.cpp b/final/test/FrontendC++/2007-07-29-RestrictRefArg.cpp
new file mode 100644
index 00000000000..128ddb3ab59
--- /dev/null
+++ b/final/test/FrontendC++/2007-07-29-RestrictRefArg.cpp
@@ -0,0 +1,6 @@
+// RUN: %llvmgxx -S %s -o - | grep noalias
+
+void foo(int & __restrict myptr1, int & myptr2) {
+  myptr1 = 0;
+  myptr2 = 0;
+}
diff --git a/final/test/FrontendC++/2007-08-01-RestrictMethod.cpp b/final/test/FrontendC++/2007-08-01-RestrictMethod.cpp
new file mode 100644
index 00000000000..feefaa1759c
--- /dev/null
+++ b/final/test/FrontendC++/2007-08-01-RestrictMethod.cpp
@@ -0,0 +1,13 @@
+// RUN: %llvmgxx -S %s -o - | grep noalias
+
+
+class foo {
+  int member[4];
+  
+  void bar(int * a);
+  
+};
+
+void foo::bar(int * a) __restrict {
+  member[3] = *a;
+}
diff --git a/final/test/FrontendC++/2007-09-10-RecursiveTypeResolution.cpp b/final/test/FrontendC++/2007-09-10-RecursiveTypeResolution.cpp
new file mode 100644
index 00000000000..1fcf15f0d9d
--- /dev/null
+++ b/final/test/FrontendC++/2007-09-10-RecursiveTypeResolution.cpp
@@ -0,0 +1,88 @@
+// RUN: %llvmgxx -S %s -o -
+// PR1634
+
+namespace Manta
+{
+  class CallbackHandle
+  {
+  protected:virtual ~ CallbackHandle (void)
+    {
+    }
+  };
+template < typename Data1 > class CallbackBase_1Data:public CallbackHandle
+  {
+  };
+}
+
+namespace __gnu_cxx
+{
+  template < typename _Iterator, typename _Container >
+  class __normal_iterator
+  {
+    _Iterator _M_current;
+  };
+}
+
+namespace std
+{
+  template < typename _Tp > struct allocator
+  {
+    typedef _Tp *pointer;
+  };
+  template < typename _InputIterator,
+    typename _Tp > inline void find (_InputIterator __last,
+					       const _Tp & __val)
+  {
+  };
+}
+
+namespace Manta
+{
+  template < typename _Tp, typename _Alloc> struct _Vector_base
+  {
+    struct _Vector_impl
+    {
+      _Tp *_M_start;
+    };
+  public:
+    _Vector_impl _M_impl;
+  };
+  template < typename _Tp, typename _Alloc = std::allocator < _Tp > >
+  class vector:protected _Vector_base < _Tp,_Alloc >
+  {
+  public:
+    typedef __gnu_cxx::__normal_iterator < typename _Alloc::pointer,
+      vector < _Tp, _Alloc > > iterator;
+    iterator end ()
+    {
+    }
+  };
+  class MantaInterface
+  {
+  };
+  class RTRT
+  {
+    virtual CallbackHandle *registerTerminationCallback (CallbackBase_1Data <
+							 MantaInterface * >*);
+    virtual void unregisterCallback (CallbackHandle *);
+    typedef vector < CallbackBase_1Data < int >*>PRCallbackMapType;
+    PRCallbackMapType parallelPreRenderCallbacks;
+  };
+}
+using namespace Manta;
+CallbackHandle *
+RTRT::registerTerminationCallback (CallbackBase_1Data < MantaInterface * >*cb)
+{
+  return cb;
+}
+
+void
+RTRT::unregisterCallback (CallbackHandle * callback)
+{
+  {
+    typedef CallbackBase_1Data < int > callback_t;
+    callback_t *cb = static_cast < callback_t * >(callback);
+    find (parallelPreRenderCallbacks.end (), cb);
+  }
+}
+
diff --git a/final/test/FrontendC++/2007-10-01-StructResize.cpp b/final/test/FrontendC++/2007-10-01-StructResize.cpp
new file mode 100644
index 00000000000..71109eb7b6a
--- /dev/null
+++ b/final/test/FrontendC++/2007-10-01-StructResize.cpp
@@ -0,0 +1,14 @@
+// RUN: %llvmgxx -S %s -o /dev/null
+
+#pragma pack(4)
+
+struct Bork {
+  unsigned int f1 : 3;
+  unsigned int f2 : 30;
+};
+
+int Foo(Bork *hdr) {
+  hdr->f1 = 7;
+  hdr->f2 = 927;
+}
+
diff --git a/final/test/FrontendC++/2008-01-11-BadWarning.cpp b/final/test/FrontendC++/2008-01-11-BadWarning.cpp
new file mode 100644
index 00000000000..43f6a7173b0
--- /dev/null
+++ b/final/test/FrontendC++/2008-01-11-BadWarning.cpp
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -xc++ %s -S -o /dev/null |& not grep warning
+// rdar://5683899
+void** f(void **Buckets, unsigned NumBuckets) {
+  return Buckets + NumBuckets;
+}
+
diff --git a/final/test/FrontendC++/2008-01-12-VecInit.cpp b/final/test/FrontendC++/2008-01-12-VecInit.cpp
new file mode 100644
index 00000000000..e21bbb9468c
--- /dev/null
+++ b/final/test/FrontendC++/2008-01-12-VecInit.cpp
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -xc++ %s -S -o -
+// rdar://5685492
+
+typedef int __attribute__((vector_size(16))) v;
+v vt = {1, 2, 3, 4};
+
diff --git a/final/test/FrontendC++/2008-05-07-CrazyOffsetOf.cpp b/final/test/FrontendC++/2008-05-07-CrazyOffsetOf.cpp
new file mode 100644
index 00000000000..f1831976f4e
--- /dev/null
+++ b/final/test/FrontendC++/2008-05-07-CrazyOffsetOf.cpp
@@ -0,0 +1,8 @@
+// RUN: %llvmgxx -S %s -o -
+// rdar://5914926
+
+struct bork {
+  struct bork *next_local;
+  char * query;
+};
+int offset =  (char *) &(((struct bork *) 0x10)->query) - (char *) 0x10;
diff --git a/final/test/FrontendC++/2008-10-29-WrongOffset.cpp b/final/test/FrontendC++/2008-10-29-WrongOffset.cpp
new file mode 100644
index 00000000000..c261c312376
--- /dev/null
+++ b/final/test/FrontendC++/2008-10-29-WrongOffset.cpp
@@ -0,0 +1,489 @@
+// RUN: %llvmgxx %s -S -o /dev/null
+// PR2917
+
+#include <complex>
+template < int Dim, class T, class EngineTag > class Engine;
+template < class Subject, class Sub1, bool SV > struct View1Implementation;
+template < class LayoutTag, class PatchTag > struct MultiPatch;
+template < class LayoutTag, class PatchTag, int Dim2 > struct MultiPatchView;
+template < class Engine, class SubDomain > struct NewEngine
+{
+};
+template < class T > class DomainTraits;
+template < class DomT, class T, int Dim > struct DomainTraitsDomain
+{
+  typedef DomT NewDomain1_t;
+};
+template < int Dim > class Interval;
+template < int Dim > class Loc;
+template < class DT > class DomainBase
+{
+};
+
+template < int Dim, class DT > class Domain:public DomainBase < DT >
+{
+};
+template < int Dim > struct DomainTraits <Interval < Dim >
+  >:public DomainTraitsDomain < Interval < Dim >, int, Dim >
+{
+  enum
+  {
+    singleValued = false
+  };
+};
+template < class T1 > struct NewDomain1
+{
+  typedef typename DomainTraits < T1 >::NewDomain1_t SliceType_t;
+};
+template < class Domain, class Sub > struct TemporaryNewDomain1
+{
+  typedef typename NewDomain1 < Sub >::SliceType_t SliceType_t;
+};
+template < int Dim > class Interval:public Domain < Dim,
+  DomainTraits < Interval < Dim > > >
+{
+};
+template < int Dim > class GuardLayers
+{
+};
+template < class T > class Observer
+{
+};
+
+template < class T > class Observable
+{
+private:T & observed_m;
+  int count_m;
+};
+
+class RefCounted
+{
+};
+template < class T > class RefCountedPtr
+{
+public:typedef RefCountedPtr < T > This_t;
+  RefCountedPtr (T * const pT):ptr_m (pT)
+  {
+  }
+  inline T *operator-> () const
+  {
+  }
+  T *ptr_m;
+};
+
+template < class Dom, class T > class DomainMap
+{
+};
+
+template < class LayoutTag, int Dim > struct MultiPatchLayoutTraits
+{
+};
+template < int Dim > class LayoutBaseData
+{
+public:typedef Interval < Dim > Domain_t;
+  Domain_t domain_m;
+};
+template < int Dim, class LBD > class LayoutBase
+{
+public:typedef LayoutBaseData < Dim > LayoutData_t;
+  typedef typename LayoutData_t::Domain_t Domain_t;
+  typedef GuardLayers < Dim > GuardLayers_t;
+  inline const Domain_t & domain () const
+  {
+    return pdata_m->domain_m;
+  }
+  inline const Domain_t & innerDomain () const
+  {
+  }
+  inline GuardLayers_t externalGuards () const
+  {
+  }
+  RefCountedPtr < LBD > pdata_m;
+};
+template < class Tag > struct Remote;
+struct Brick
+{
+};
+template < class Thing, class Sub > struct View1
+{
+};
+template < int Dim, class T, class LayoutTag,
+  class PatchTag > struct NewEngine <Engine < Dim, T, MultiPatch < LayoutTag,
+  PatchTag > >, Interval < Dim > >
+{
+  typedef Engine < Dim, T, MultiPatchView < LayoutTag, PatchTag,
+    Dim > >Type_t;
+};
+template < int Dim, class T, class LayoutTag, class PatchTag,
+  int Dim2 > struct NewEngine <Engine < Dim, T, MultiPatchView < LayoutTag,
+  PatchTag, Dim2 > >, Interval < Dim > >
+{
+  typedef Engine < Dim, T, MultiPatchView < LayoutTag, PatchTag,
+    Dim2 > >Type_t;
+};
+template < int Dim, class T, class LayoutTag,
+  class PatchTag > class Engine < Dim, T, MultiPatch < LayoutTag,
+  PatchTag > >:public Observer < typename MultiPatchLayoutTraits < LayoutTag,
+  Dim >::Layout_t >
+{
+public:typedef MultiPatch < LayoutTag, PatchTag > Tag_t;
+  typedef Interval < Dim > Domain_t;
+};
+template < int Dim, class T, class LayoutTag, class PatchTag,
+  int Dim2 > class Engine < Dim, T, MultiPatchView < LayoutTag, PatchTag,
+  Dim2 > >
+{
+public:typedef MultiPatchView < LayoutTag, PatchTag, Dim2 > Tag_t;
+  typedef Interval < Dim > Domain_t;
+  typedef T Element_t;
+  enum
+  {
+    dimensions = Dim
+  };
+};
+class Full;
+template < int Dim, class T = double, class EngineTag = Full > class Vector {
+};
+
+template < int Dim > inline Interval < Dim >
+shrinkRight (const Interval < Dim > &dom, int s)
+{
+}
+
+template < int Dim > class GridLayout;
+struct GridTag
+{
+};
+template < int Dim > struct MultiPatchLayoutTraits <GridTag, Dim >
+{
+  typedef GridLayout < Dim > Layout_t;
+};
+template < int Dim > class GridLayoutData:public LayoutBaseData < Dim >,
+  public RefCounted, public Observable < GridLayoutData < Dim > >
+{
+  typedef int AxisIndex_t;
+  mutable DomainMap < Interval < 1 >, AxisIndex_t > mapAloc_m[Dim];
+};
+template < int Dim > class GridLayout:public LayoutBase < Dim,
+  GridLayoutData < Dim > >, public Observable < GridLayout < Dim > >,
+  public Observer < GridLayoutData < Dim > >
+{
+public:typedef GridLayout < Dim > This_t;
+    GridLayout ();
+};
+template < class MeshTag, class T, class EngineTag > class Field;
+enum CenteringType
+{
+  VertexType, EdgeType, FaceType, CellType
+};
+enum ContinuityType
+{
+  Continuous = 0, Discontinuous
+};
+template < int Dim > class Centering
+{
+public:typedef Loc < Dim > Orientation;
+  inline int size () const
+  {
+  }
+};
+template < int Dim > const Centering < Dim >
+canonicalCentering (const enum CenteringType type,
+		    const enum ContinuityType discontinuous,
+		    const int dimension = 0);
+template < class Mesh, class T, class EngineTag > class FieldEngine
+{
+public:enum
+  {
+    dimensions = Mesh::dimensions
+  };
+  enum
+  {
+    Dim = dimensions
+  };
+  typedef Engine < Dim, T, EngineTag > Engine_t;
+  typedef typename Engine_t::Domain_t Domain_t;
+  typedef GuardLayers < Dim > GuardLayers_t;
+template < class Layout2 > FieldEngine (const Centering < Dim > &centering, const Layout2 & layout, const Mesh & mesh, int materials = 1):num_materials_m (materials), centering_m (centering),
+    stride_m (centering.size ()), physicalCellDomain_m (layout.domain ()),
+    guards_m (layout.externalGuards ()), mesh_m (mesh)
+  {
+  }
+  unsigned int num_materials_m;
+  Centering < Dim > centering_m;
+  int stride_m;
+  Domain_t physicalCellDomain_m;
+  GuardLayers_t guards_m;
+  Mesh mesh_m;
+};
+
+template < class Subject > class SubFieldView;
+template < class Mesh, class T,
+  class EngineTag > class SubFieldView < Field < Mesh, T, EngineTag > >
+{
+public:typedef Field < Mesh, T, EngineTag > Type_t;
+};
+
+template < int Dim, class Mesh, class Domain > struct NewMeshTag
+{
+  typedef Mesh Type_t;
+};
+template < class Mesh, class T, class EngineTag,
+  class Domain > struct View1Implementation <Field < Mesh, T, EngineTag >,
+  Domain, false >
+{
+  typedef Field < Mesh, T, EngineTag > Subject_t;
+  typedef typename Subject_t::Engine_t Engine_t;
+  typedef typename NewEngine < Engine_t, Domain >::Type_t NewEngine_t;
+  typedef typename NewEngine_t::Element_t NewT_t;
+  typedef typename NewEngine_t::Tag_t NewEngineTag_t;
+  typedef typename NewMeshTag < NewEngine_t::dimensions, Mesh,
+    Domain >::Type_t NewMeshTag_t;
+  typedef Field < NewMeshTag_t, NewT_t, NewEngineTag_t > Type_t;
+};
+template < class Mesh, class T, class EngineTag,
+  class Sub1 > struct View1 <Field < Mesh, T, EngineTag >, Sub1 >
+{
+  typedef Field < Mesh, T, EngineTag > Subject_t;
+  typedef typename Subject_t::Domain_t Domain_t;
+  typedef TemporaryNewDomain1 < Domain_t, Sub1 > NewDomain_t;
+  typedef typename NewDomain_t::SliceType_t SDomain_t;
+  enum
+  {
+    sv = DomainTraits < SDomain_t >::singleValued
+  };
+  typedef View1Implementation < Subject_t, SDomain_t, sv > Dispatch_t;
+  typedef typename Dispatch_t::Type_t Type_t;
+};
+template < class Mesh, class T = double, class EngineTag = Brick > class Field {
+public:typedef Mesh MeshTag_t;
+  typedef Mesh Mesh_t;
+  typedef Field < Mesh, T, EngineTag > This_t;
+  typedef FieldEngine < Mesh, T, EngineTag > FieldEngine_t;
+  enum
+  {
+    dimensions = FieldEngine_t::dimensions
+  };
+  typedef Engine < dimensions, T, EngineTag > Engine_t;
+  typedef typename Engine_t::Domain_t Domain_t;
+  typedef Centering < dimensions > Centering_t;
+  template < class Layout2 > Field (const Centering_t & centering,
+				    const Layout2 & layout,
+				    const Mesh_t &
+				    mesh):fieldEngine_m (centering, layout,
+							 mesh)
+  {
+  }
+  inline typename SubFieldView < This_t >::Type_t center (int c) const
+  {
+  }
+  inline typename View1 < This_t, Domain_t >::Type_t all () const
+  {
+  }
+  template < class T1 > const This_t & operator= (const T1 & rhs) const
+  {
+  }
+private:  FieldEngine_t fieldEngine_m;
+};
+
+struct UniformRectilinearTag
+{
+};
+struct CartesianTag
+{
+};
+template < class MeshTraits > struct CartesianURM;
+template < class MeshTraits > class UniformRectilinearMeshData;
+template < class MeshTraits > class UniformRectilinearMesh;
+template < int Dim, typename T = double, class MeshTag =
+  UniformRectilinearTag, class CoordinateSystemTag = CartesianTag, int CDim =
+  Dim > struct MeshTraits;
+template < int Dim, typename T, class MeshTag, class CoordinateSystemTag,
+  int CDim > struct MeshTraitsBase
+{
+  typedef MeshTraits < Dim, T, MeshTag, CoordinateSystemTag,
+    CDim > MeshTraits_t;
+  enum
+  {
+    dimensions = Dim
+  };
+  typedef Vector < CDim, T > PointType_t;
+};
+template < int Dim, typename T, int CDim > struct MeshTraits <Dim, T,
+  UniformRectilinearTag, CartesianTag, CDim >:public MeshTraitsBase < Dim, T,
+  UniformRectilinearTag, CartesianTag, CDim >
+{
+  typedef typename MeshTraitsBase < Dim, T, UniformRectilinearTag,
+    CartesianTag, CDim >::MeshTraits_t MeshTraits_t;
+  typedef CartesianURM < MeshTraits_t > CoordinateSystem_t;
+  typedef UniformRectilinearMeshData < MeshTraits_t > MeshData_t;
+  typedef UniformRectilinearMesh < MeshTraits_t > Mesh_t;
+  typedef Vector < CDim, T > SpacingsType_t;
+};
+template < int Dim > class NoMeshData:public RefCounted
+{
+public:NoMeshData ()
+  {
+  }
+  template < class Layout >
+    explicit NoMeshData (const Layout &
+			 layout):physicalVertexDomain_m (layout.
+							 innerDomain ()),
+    physicalCellDomain_m (shrinkRight (physicalVertexDomain_m, 1)),
+    totalVertexDomain_m (layout.domain ()),
+    totalCellDomain_m (shrinkRight (totalVertexDomain_m, 1))
+  {
+  }
+private:Interval < Dim > physicalVertexDomain_m, physicalCellDomain_m;
+  Interval < Dim > totalVertexDomain_m, totalCellDomain_m;
+};
+
+template < class MeshTraits > class UniformRectilinearMeshData:public NoMeshData <
+  MeshTraits::
+  dimensions >
+{
+public:typedef typename
+    MeshTraits::MeshData_t
+    MeshData_t;
+  typedef typename
+    MeshTraits::PointType_t
+    PointType_t;
+  typedef typename
+    MeshTraits::SpacingsType_t
+    SpacingsType_t;
+  enum
+  {
+    dimensions = MeshTraits::dimensions
+  };
+  template < class Layout > UniformRectilinearMeshData (const Layout & layout,
+							const PointType_t &
+							origin,
+							const SpacingsType_t &
+							spacings):
+    NoMeshData <
+  dimensions > (layout),
+  origin_m (origin),
+  spacings_m (spacings)
+  {
+  }
+private:PointType_t origin_m;
+  SpacingsType_t
+    spacings_m;
+};
+
+template < class MeshTraits > class UniformRectilinearMesh:public MeshTraits::
+  CoordinateSystem_t
+{
+public:typedef MeshTraits
+    MeshTraits_t;
+  typedef typename
+    MeshTraits::MeshData_t
+    MeshData_t;
+  typedef typename
+    MeshTraits::PointType_t
+    PointType_t;
+  typedef typename
+    MeshTraits::SpacingsType_t
+    SpacingsType_t;
+  enum
+  {
+    dimensions = MeshTraits::dimensions
+  };
+  template < class Layout >
+    inline UniformRectilinearMesh (const Layout & layout,
+				   const PointType_t & origin,
+				   const SpacingsType_t & spacings):
+  data_m (new MeshData_t (layout, origin, spacings))
+  {
+  }
+private:RefCountedPtr < MeshData_t > data_m;
+};
+
+template < class MeshTraits > struct GenericURM
+{
+};
+template < class MeshTraits > struct CartesianURM:
+  public
+  GenericURM <
+  MeshTraits >
+{
+};
+template < int
+  dim,
+  class
+  MeshTag = UniformRectilinearTag, class CoordinateSystemTag = CartesianTag > struct ParallelTraits {
+  enum
+  {
+    Dim = dim
+  };
+  typedef
+    GridLayout <
+    dim >
+    Layout_t;
+  typedef
+    MeshTraits <
+    dim, double,
+    MeshTag,
+    CoordinateSystemTag >
+    MeshTraits_t;
+  typedef typename
+    MeshTraits_t::Mesh_t
+    Mesh_t;
+  typedef
+    MultiPatch <
+    GridTag,
+    Remote <
+  Brick > >
+    Engine_t;
+};
+template < class ComputeTraits > struct RhalkTraits:
+  public
+  ComputeTraits
+{
+  typedef typename
+    ComputeTraits::Mesh_t
+    Mesh_t;
+  typedef typename
+    ComputeTraits::Engine_t
+    Engine_t;
+  enum
+  {
+    Dim = ComputeTraits::Dim
+  };
+  typedef
+    Centering <
+    Dim >
+    Centering_t;
+  typedef typename
+    Mesh_t::SpacingsType_t
+    Spacings_t;
+  typedef
+    Field <
+    Mesh_t, double,
+    Engine_t >
+    Scalar_t;
+};
+enum
+{
+  Dim = 3
+};
+typedef
+  RhalkTraits <
+  ParallelTraits <
+  Dim,
+  UniformRectilinearTag,
+CartesianTag > >
+  Traits_t;
+Vector < Dim > origin;
+Traits_t::Spacings_t spacings;
+int
+main (int argc, char **argv)
+{
+  Traits_t::Layout_t layout;
+  Traits_t::Mesh_t mesh (layout, origin, spacings);
+  Traits_t::Centering_t face =
+    canonicalCentering < Traits_t::Dim > (FaceType, Continuous);
+  Traits_t::Scalar_t v (face, layout, mesh);
+  for (int i = 0; i < Dim; ++i)
+    v.center (i).all () = std::numeric_limits < double >::signaling_NaN ();
+}
diff --git a/final/test/FrontendC++/2009-02-07-VolatileArrayRefHack.cpp b/final/test/FrontendC++/2009-02-07-VolatileArrayRefHack.cpp
new file mode 100644
index 00000000000..b8589b067d2
--- /dev/null
+++ b/final/test/FrontendC++/2009-02-07-VolatileArrayRefHack.cpp
@@ -0,0 +1,7 @@
+// RUN: %llvmgxx -S %s -o - | grep {volatile load}
+// PR3320
+
+void test(volatile int *a) {
+    // should be a volatile load.
+    a[0];
+}
diff --git a/final/test/FrontendC++/2009-02-16-CtorNames-dbg.cpp b/final/test/FrontendC++/2009-02-16-CtorNames-dbg.cpp
new file mode 100644
index 00000000000..eb69963e534
--- /dev/null
+++ b/final/test/FrontendC++/2009-02-16-CtorNames-dbg.cpp
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -S -g %s -o - | grep "\~A"
+class A {
+  int i;
+public:
+  A() { i = 0; }
+ ~A() { i = 42; }
+};
+
+A a;
+
diff --git a/final/test/FrontendC++/2009-03-17-dbg.cpp b/final/test/FrontendC++/2009-03-17-dbg.cpp
new file mode 100644
index 00000000000..6708e12f9b9
--- /dev/null
+++ b/final/test/FrontendC++/2009-03-17-dbg.cpp
@@ -0,0 +1,16 @@
+// RUN: %llvmgxx -S %s -o /dev/null -g
+// XTARGET: darwin,linux
+// XFAIL: *
+template <typename T1,typename T2>
+inline void f(const T1&,const T2&) { }
+
+template <typename T1,typename T2,void F(const T1&,const T2&)>
+struct A {
+    template <typename T> void g(T& i) { }
+};
+
+int main() {
+    int i;
+    A<int,int,f> a;
+    a.g(i);
+}
diff --git a/final/test/FrontendC++/2009-04-21-DtorNames-dbg.cpp b/final/test/FrontendC++/2009-04-21-DtorNames-dbg.cpp
new file mode 100644
index 00000000000..e3616da073b
--- /dev/null
+++ b/final/test/FrontendC++/2009-04-21-DtorNames-dbg.cpp
@@ -0,0 +1,32 @@
+// RUN: %llvmgcc -S -g %s -o - | llc -O0 -o %t.s
+// RUN: %compile_c %t.s -o %t.o
+// PR4025
+
+template <typename _Tp> class vector
+{
+public:
+  ~vector ()
+  {
+  }
+};
+
+class Foo
+{
+  ~Foo();
+  class FooImpl *impl_;
+};
+
+namespace {
+  class Bar;
+}
+
+class FooImpl
+{
+  vector<Bar*> thing;
+};
+
+Foo::~Foo()
+{
+  delete impl_;
+}
+
diff --git a/final/test/FrontendC++/2009-04-23-bool2.cpp b/final/test/FrontendC++/2009-04-23-bool2.cpp
new file mode 100644
index 00000000000..2c76d982ea6
--- /dev/null
+++ b/final/test/FrontendC++/2009-04-23-bool2.cpp
@@ -0,0 +1,15 @@
+// RUN: %llvmgxx -S %s -o /dev/null
+// g++.old-deja/g++.jason/bool2.C from gcc testsuite.
+// Crashed before 67975 went in.
+struct F {
+  bool b1 : 1;
+  bool b2 : 7;
+};
+
+int main()
+{
+  F f = { true, true };
+
+  if (int (f.b1) != 1)
+    return 1;
+}
diff --git a/final/test/FrontendC++/2009-05-04-PureConstNounwind.cpp b/final/test/FrontendC++/2009-05-04-PureConstNounwind.cpp
new file mode 100644
index 00000000000..e275c340a95
--- /dev/null
+++ b/final/test/FrontendC++/2009-05-04-PureConstNounwind.cpp
@@ -0,0 +1,8 @@
+// RUN: %llvmgxx -S %s -o - | grep nounwind | count 4
+int c(void) __attribute__((const));
+int p(void) __attribute__((pure));
+int t(void);
+
+int f(void) {
+	return c() + p() + t();
+}
diff --git a/final/test/FrontendC++/2009-06-16-DebugInfoCrash.cpp b/final/test/FrontendC++/2009-06-16-DebugInfoCrash.cpp
new file mode 100644
index 00000000000..c2a841b1a67
--- /dev/null
+++ b/final/test/FrontendC++/2009-06-16-DebugInfoCrash.cpp
@@ -0,0 +1,10 @@
+// RUN: %llvmgxx -S %s -o /dev/null -g
+// This crashes if we try to emit debug info for TEMPLATE_DECL members.
+template <class T> class K2PtrVectorBase {};
+template <class T> class K2Vector {};
+template <class U > class K2Vector<U*> : public K2PtrVectorBase<U*> {};
+class ScriptInfoManager {
+  void PostRegister() ;
+  template <class SI> short ReplaceExistingElement(K2Vector<SI*>& v);
+};
+void ScriptInfoManager::PostRegister() {}
diff --git a/final/test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp b/final/test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp
new file mode 100644
index 00000000000..e0bc043adad
--- /dev/null
+++ b/final/test/FrontendC++/2009-06-20-DarwinPPCLayout.cpp
@@ -0,0 +1,32 @@
+// RUN: %llvmgxx -S -m32 %s -o - | grep baz | grep global | grep {struct.bar}
+// RUN: %llvmgxx -S -m32 %s -o - | grep ccc | grep global | grep {struct.CC}
+// RUN: %llvmgxx -S -m32 %s -o - | grep quux | grep global | grep {struct.bar}
+// RUN: %llvmgxx -S -m32 %s -o - | grep foo | grep global | grep {struct.SRCFilter::FilterEntry}
+// RUN: %llvmgxx -S -m32 %s -o - | grep {struct.bar} | grep {1 x i32}
+// RUN: %llvmgxx -S -m32 %s -o - | grep {struct.CC} | grep {struct.payre<KBFP,float*} | grep {.base.32} | grep {1 x i32}
+// RUN: %llvmgxx -S -m32 %s -o - | grep {struct.SRCFilter::FilterEntry} | not grep {1 x i32}
+// XFAIL: *
+// XTARGET: powerpc-apple-darwin
+
+template<class _T1, class _T2>     struct payre     {
+  _T1 first;
+  _T2 second;
+  payre()       : first(), second() {    }
+};
+struct KBFP {
+  double mCutoffFrequency;
+};
+class SRCFilter {
+  struct FilterEntry: public payre<KBFP, float*>{};
+  static FilterEntry foo;
+};
+SRCFilter::FilterEntry SRCFilter::foo;    // 12 bytes
+payre<KBFP, float*> baz;                  // 16 bytes
+class CC {                                // 16 bytes
+  public: payre<KBFP, float*> x;          
+};
+class CC ccc;
+
+struct bar { KBFP x; float* y;};          // 16 bytes
+struct bar quux;
+
diff --git a/final/test/FrontendC++/2009-06-30-ByrefBlock.cpp b/final/test/FrontendC++/2009-06-30-ByrefBlock.cpp
new file mode 100644
index 00000000000..be9c94fd176
--- /dev/null
+++ b/final/test/FrontendC++/2009-06-30-ByrefBlock.cpp
@@ -0,0 +1,11 @@
+// Insure __block_holder_tmp is allocated on the stack.  Darwin only.
+// RUN: %llvmgxx %s -S -O2 -o - | egrep {__block_holder_tmp.*alloca}
+// XFAIL: *
+// XTARGET: darwin
+// <rdar://problem/5865221>
+// END.
+extern void fubar_dispatch_sync(void (^PP)(void));
+void fubar() {
+  __block void *voodoo;
+ fubar_dispatch_sync(^(void){voodoo=0;});
+}
diff --git a/final/test/FrontendC++/2009-07-15-LineNumbers.cpp b/final/test/FrontendC++/2009-07-15-LineNumbers.cpp
new file mode 100644
index 00000000000..e1cc81f40f7
--- /dev/null
+++ b/final/test/FrontendC++/2009-07-15-LineNumbers.cpp
@@ -0,0 +1,27 @@
+// This is a regression test on debug info to make sure that we can
+// print line numbers in asm.
+// RUN: %llvmgcc -S -O0 -g %s -o - | \
+// RUN:    llc --disable-fp-elim -O0 -relocation-model=pic | grep {2009-07-15-LineNumbers.cpp:25$}
+
+#include <stdlib.h>
+
+class DeepStack {
+  int seedVal;
+public:
+  DeepStack(int seed) : seedVal(seed) {}
+
+  int shallowest( int x ) { return shallower(x + 1); }
+  int shallower ( int x ) { return shallow(x + 2); }
+  int shallow   ( int x ) { return deep(x + 3); }
+  int deep      ( int x ) { return deeper(x + 4); }
+  int deeper    ( int x ) { return deepest(x + 6); }
+  int deepest   ( int x ) { return x + 7; }
+
+  int runit() { return shallowest(seedVal); }
+};
+
+int main ( int argc, char** argv) {
+
+  DeepStack DS9( (argc > 1 ? atoi(argv[1]) : 0) );
+  return DS9.runit();
+}
diff --git a/final/test/FrontendC++/2009-07-16-PrivateCopyConstructor.cpp b/final/test/FrontendC++/2009-07-16-PrivateCopyConstructor.cpp
new file mode 100644
index 00000000000..96e85b24767
--- /dev/null
+++ b/final/test/FrontendC++/2009-07-16-PrivateCopyConstructor.cpp
@@ -0,0 +1,15 @@
+// RUN: %llvmgxx %s -S
+// XFAIL: darwin
+
+#include <set>
+
+class A {
+public:
+  A();
+private:
+  A(const A&);
+};
+void B()
+{
+  std::set<void *, A> foo;
+}
diff --git a/final/test/FrontendC++/2009-07-16-Using.cpp b/final/test/FrontendC++/2009-07-16-Using.cpp
new file mode 100644
index 00000000000..c0e031424ac
--- /dev/null
+++ b/final/test/FrontendC++/2009-07-16-Using.cpp
@@ -0,0 +1,8 @@
+// RUN: %llvmgxx %s -S -o /dev/null
+
+namespace A {
+  typedef int B;
+}
+struct B {
+};
+using ::A::B;
diff --git a/final/test/FrontendC++/2009-08-05-ZeroInitWidth.cpp b/final/test/FrontendC++/2009-08-05-ZeroInitWidth.cpp
new file mode 100644
index 00000000000..89a79f24463
--- /dev/null
+++ b/final/test/FrontendC++/2009-08-05-ZeroInitWidth.cpp
@@ -0,0 +1,12 @@
+// RUN: %llvmgxx -S %s -o -
+// rdar://7114564
+struct A {
+  unsigned long long : (sizeof(unsigned long long) * 8) - 16;
+};
+struct B {
+  A a;
+};
+struct B b = {
+  {}
+};
+
diff --git a/final/test/FrontendC++/2009-08-11-VectorRetTy.cpp b/final/test/FrontendC++/2009-08-11-VectorRetTy.cpp
new file mode 100644
index 00000000000..403b59d8f99
--- /dev/null
+++ b/final/test/FrontendC++/2009-08-11-VectorRetTy.cpp
@@ -0,0 +1,13 @@
+// RUN: %llvmgxx %s -S -o /dev/null
+// <rdar://problem/7096460>
+typedef void (*Func) ();
+typedef long long m64 __attribute__((__vector_size__(8), __may_alias__));
+static inline m64 __attribute__((__always_inline__, __nodebug__)) _mm_set1_pi16() {}
+template <class MM>
+static void Bork() {
+  const m64 mmx_0x00ff = _mm_set1_pi16();
+}
+struct A {};
+Func arr[] = {
+  Bork<A>
+};
diff --git a/final/test/FrontendC++/2009-09-04-modify-crash.cpp b/final/test/FrontendC++/2009-09-04-modify-crash.cpp
new file mode 100644
index 00000000000..89274e09c7e
--- /dev/null
+++ b/final/test/FrontendC++/2009-09-04-modify-crash.cpp
@@ -0,0 +1,7 @@
+// RUN: %llvmgxx %s -fapple-kext -S -o -
+// The extra check in 71555 caused this to crash on Darwin X86
+// in an assert build.
+class foo {
+ virtual ~foo ();
+};
+foo::~foo(){}
diff --git a/final/test/FrontendC++/2009-09-09-packed-layout.cpp b/final/test/FrontendC++/2009-09-09-packed-layout.cpp
new file mode 100644
index 00000000000..921aad79f73
--- /dev/null
+++ b/final/test/FrontendC++/2009-09-09-packed-layout.cpp
@@ -0,0 +1,18 @@
+// RUN: %llvmgxx -S -m32 %s -o /dev/null
+class X { 
+ public:
+  virtual ~X();
+  short y;
+};
+#pragma pack(push, 1)
+class Z : public X {
+ public: enum { foo = ('x') };
+ virtual int y() const;
+};
+#pragma pack(pop)
+class Y : public X {
+public: enum { foo = ('y'), bar = 0 };
+};
+X x;
+Y y;
+Z z;
diff --git a/final/test/FrontendC++/2009-10-27-crash.cpp b/final/test/FrontendC++/2009-10-27-crash.cpp
new file mode 100644
index 00000000000..da73988b697
--- /dev/null
+++ b/final/test/FrontendC++/2009-10-27-crash.cpp
@@ -0,0 +1,43 @@
+// RUN: %llvmgxx -S %s -o /dev/null
+// Radar 7328944
+
+typedef struct
+{
+	unsigned short a : 1;
+	unsigned short b : 2;
+	unsigned short c : 1;
+	unsigned short d : 1;
+	unsigned short e : 1;
+	unsigned short f : 1;
+	unsigned short g : 2;
+	unsigned short : 7;
+	union
+	{
+		struct
+		{
+			unsigned char h : 1;
+			unsigned char i : 1;
+			unsigned char j : 1;
+			unsigned char : 5;
+		};
+		struct
+		{
+			unsigned char k : 3;
+			unsigned char : 5;
+		};
+	};
+	unsigned char : 8;
+} tt;
+
+typedef struct
+{
+ unsigned char s;
+ tt t;
+ unsigned int u;
+} ttt;
+
+ttt X = {
+    4,
+       { 0 },
+	55,
+};
diff --git a/final/test/FrontendC++/2009-12-23-MissingSext.cpp b/final/test/FrontendC++/2009-12-23-MissingSext.cpp
new file mode 100644
index 00000000000..ee978812c29
--- /dev/null
+++ b/final/test/FrontendC++/2009-12-23-MissingSext.cpp
@@ -0,0 +1,16 @@
+// RUN: %llvmgxx %s -S -o - | FileCheck %s
+// The store of p.y into the temporary was not
+// getting extended to 32 bits, so uninitialized
+// bits of the temporary were used.  7366161.
+struct foo {
+  char x:8;
+  signed int y:24;
+};
+int bar(struct foo p, int x) {
+// CHECK: bar
+// CHECK: sext
+// CHECK: sext
+  x = (p.y > x ? x : p.y);
+  return x;
+// CHECK: return
+}
diff --git a/final/test/FrontendC++/2010-02-17-DbgArtificialArg.cpp b/final/test/FrontendC++/2010-02-17-DbgArtificialArg.cpp
new file mode 100644
index 00000000000..ff45412b441
--- /dev/null
+++ b/final/test/FrontendC++/2010-02-17-DbgArtificialArg.cpp
@@ -0,0 +1,16 @@
+// RUN: %llvmgcc -g -S %s -o - | FileCheck %s
+// Here, second to last argument "i32 64" indicates that artificial type is set.                                               
+// Test to artificial attribute attahed to "this" pointer type.
+// Radar 7655792 and 7655002
+
+class A {
+public:
+  int fn1(int i) const { return i + 2; };
+};
+
+int foo() {
+  A a;
+  // Matching "i32 64, metadata !<number>} ; [ DW_TAG_pointer_type ]"
+  // CHECK: i32 64, metadata {{![0-9]+\} ; \[ DW_TAG_pointer_type \]}}
+  return a.fn1(1);
+}
diff --git a/final/test/FrontendC++/2010-03-22-empty-baseclass.cpp b/final/test/FrontendC++/2010-03-22-empty-baseclass.cpp
new file mode 100644
index 00000000000..bb741c42c84
--- /dev/null
+++ b/final/test/FrontendC++/2010-03-22-empty-baseclass.cpp
@@ -0,0 +1,134 @@
+// RUN: %llvmgxx -S %s -o - -O2 | FileCheck %s
+namespace boost {
+  namespace detail {
+    template <typename T> struct cv_traits_imp {};
+    template <typename T> struct cv_traits_imp<T*> {typedef T unqualified_type;};
+  }
+}
+namespace mpl_ {}
+namespace boost {
+  namespace mpl {using namespace mpl_;}
+  template< typename T > struct remove_cv {typedef typename boost::detail::cv_traits_imp<T*>::unqualified_type type;};
+  namespace type_traits {
+    typedef char yes_type;
+    struct no_type {char padding[8];};
+  }
+}
+namespace mpl_ {
+  template< bool C_ > struct bool_;
+  typedef bool_<true> true_;
+  typedef bool_<false> false_;
+  template< bool C_ > struct bool_ {static const bool value = C_;};
+  template< typename T, T N > struct integral_c;
+}
+namespace boost{
+  template <class T, T val>   struct integral_constant :
+    public mpl::integral_c<T, val> {};
+  template<> struct integral_constant<bool,true> : public mpl::true_ {};
+  template<> struct integral_constant<bool,false> : public mpl::false_ {};
+  namespace type_traits {
+    template <bool b1, bool b2, bool b3 = false, bool b4 = false,
+              bool b5 = false, bool b6 = false, bool b7 = false> struct ice_or;
+    template <bool b1, bool b2, bool b3, bool b4, bool b5, bool b6, bool b7>
+    struct ice_or {static const bool value = true; };
+    template <> struct ice_or<false, false, false, false, false, false, false>
+    {static const bool value = false;};
+    template <bool b1, bool b2, bool b3 = true, bool b4 = true, bool b5 = true,
+              bool b6 = true, bool b7 = true> struct ice_and;
+    template <bool b1, bool b2, bool b3, bool b4, bool b5, bool b6, bool b7>
+    struct ice_and {static const bool value = false;};
+    template <> struct ice_and<true, true, true, true, true, true, true>
+    {static const bool value = true;};
+    template <bool b> struct ice_not {static const bool value = true;};
+  };
+  namespace detail {
+    template <typename T> struct is_union_impl {static const bool value = false;};
+  }
+  template< typename T > struct is_union :
+  ::boost::integral_constant<bool, ::boost::detail::is_union_impl<T>::value> {};
+  namespace detail {
+    template <class U> ::boost::type_traits::yes_type is_class_tester(void(U::*)(void));
+    template <class U> ::boost::type_traits::no_type is_class_tester(...);
+    template <typename T> struct is_class_impl {
+      static const bool value = (::boost::type_traits::ice_and< sizeof(is_class_tester<T>(0))
+                                 == sizeof(::boost::type_traits::yes_type),
+                                 ::boost::type_traits::ice_not< ::boost::is_union<T>::value >::value >::value);};
+}
+  template<typename T> struct is_class:
+  ::boost::integral_constant<bool,::boost::detail::is_class_impl<T>::value> {  };
+namespace detail {
+  template <typename T> struct empty_helper_t1: public T {int i[256];};
+  struct empty_helper_t2 {int i[256];};
+  template <typename T, bool is_a_class = false> struct empty_helper
+  {static const bool value = false;};
+  template <typename T> struct empty_helper<T, true>
+  {static const bool value = (sizeof(empty_helper_t1<T>) == sizeof(empty_helper_t2));};
+  template <typename T> struct is_empty_impl {
+    typedef typename remove_cv<T>::type cvt;
+    static const bool value = (::boost::type_traits::ice_or< ::boost::detail::empty_helper
+                               <cvt,::boost::is_class<T>::value>::value, false>::value);
+  };
+}
+template<typename T> struct is_empty:
+::boost::integral_constant<bool,::boost::detail::is_empty_impl<T>::value> {};
+template<typename T, typename U > struct is_same:
+::boost::integral_constant<bool,false> {};
+template<typename T> struct call_traits {typedef T& reference;};
+namespace details {
+  template <class T1, class T2, bool IsSame, bool FirstEmpty, bool SecondEmpty>
+  struct compressed_pair_switch;
+  template <class T1, class T2>
+  struct compressed_pair_switch<T1, T2, false, true, false>
+  {static const int value = 1;};
+  template <class T1, class T2, int Version> class compressed_pair_imp;
+  template <class T1, class T2> class compressed_pair_imp<T1, T2, 1>:
+  protected ::boost::remove_cv<T1>::type {
+  public:
+    typedef T1 first_type;
+    typedef T2 second_type;
+    typedef typename call_traits<first_type>::reference first_reference;
+    typedef typename call_traits<second_type>::reference second_reference;
+    first_reference first() {return *this;}
+    second_reference second() {return second_;}
+    second_type second_;
+  };
+}
+template <class T1, class T2> class compressed_pair:
+  private ::boost::details::compressed_pair_imp<T1, T2, ::boost::details::compressed_pair_switch<
+                                                          T1, T2, ::boost::is_same<typename remove_cv<T1>::type,
+                                                                                   typename remove_cv<T2>::type>::value,
+                                                          ::boost::is_empty<T1>::value, ::boost::is_empty<T2>::value>::value>
+  {
+  private:
+    typedef details::compressed_pair_imp<T1, T2, ::boost::details::compressed_pair_switch<
+                                                   T1, T2, ::boost::is_same<typename remove_cv<T1>::type,
+                                                                            typename remove_cv<T2>::type>::value,
+                                                                              ::boost::is_empty<T1>::value, ::boost::is_empty<T2>::value>::value> base;
+  public:
+    typedef T1 first_type;
+    typedef T2 second_type;
+    typedef typename call_traits<first_type>::reference first_reference;
+    typedef typename call_traits<second_type>::reference second_reference;
+    first_reference first() {return base::first();}
+    second_reference second() {return base::second();}
+  };
+}
+struct empty_base_t {};
+struct empty_t : empty_base_t {};
+typedef boost::compressed_pair<empty_t, int> data_t;
+extern "C" {int printf(const char * , ...);}
+extern "C" {void abort(void);}
+int main (int argc, char * const argv[]) {
+  data_t x;
+  x.second() = -3;
+  // This store should be elided:
+  x.first() = empty_t();
+  // If x.second() has been clobbered by the elided store, fail.
+  if (x.second() != -3) {
+    printf("x.second() was clobbered\n");
+    // CHECK-NOT: x.second() was clobbered
+    abort();
+  }
+  return 0;
+}
+// CHECK: ret i32
diff --git a/final/test/FrontendC++/2010-04-30-OptimizedMethod-Dbg.cpp b/final/test/FrontendC++/2010-04-30-OptimizedMethod-Dbg.cpp
new file mode 100644
index 00000000000..761c0dc097a
--- /dev/null
+++ b/final/test/FrontendC++/2010-04-30-OptimizedMethod-Dbg.cpp
@@ -0,0 +1,18 @@
+// RUN: %llvmgcc -g -S -O2 %s -o - | FileCheck %s
+
+class foo {
+public:
+      int bar(int x);
+      static int baz(int x);
+};
+
+int foo::bar(int x) {
+  // CHECK: {{i32 [0-9]+, i1 true(, i[0-9]+ [^\}]+[}]|[}]) ; \[ DW_TAG_subprogram \]}}
+    return x*4 + 1;
+}
+
+int foo::baz(int x) {
+  // CHECK: {{i32 [0-9]+, i1 true(, i[0-9]+ [^\},]+[}]|[}]) ; \[ DW_TAG_subprogram \]}}
+    return x*4 + 1;
+}
+
diff --git a/final/test/FrontendC++/2010-05-10-Var-DbgInfo.cpp b/final/test/FrontendC++/2010-05-10-Var-DbgInfo.cpp
new file mode 100644
index 00000000000..e6165c0cce2
--- /dev/null
+++ b/final/test/FrontendC++/2010-05-10-Var-DbgInfo.cpp
@@ -0,0 +1,43 @@
+// RUN: %llvmgcc -S -O0 -g %s -o /dev/null
+// PR 7104
+
+struct A {
+  int Ai;
+}; 
+
+struct B : public A {}; 
+struct C : public B {}; 
+
+const char * f(int C::*){ return ""; } 
+int f(int B::*) { return 1; } 
+
+struct D : public C {}; 
+
+const char * g(int B::*){ return ""; } 
+int g(int D::*) { return 1; } 
+
+void test() 
+{
+  int i = f(&A::Ai);
+
+  const char * str = g(&A::Ai);
+}
+
+// conversion of B::* to C::* is better than conversion of A::* to C::*
+typedef void (A::*pmfa)();
+typedef void (B::*pmfb)();
+typedef void (C::*pmfc)();
+
+struct X {
+  operator pmfa();
+  operator pmfb();
+};
+
+
+void g(pmfc);
+
+void test2(X x) 
+{
+  g(x);
+}
+
diff --git a/final/test/FrontendC++/2010-05-11-alwaysinlineinstantiation.cpp b/final/test/FrontendC++/2010-05-11-alwaysinlineinstantiation.cpp
new file mode 100644
index 00000000000..9203dbd0bd9
--- /dev/null
+++ b/final/test/FrontendC++/2010-05-11-alwaysinlineinstantiation.cpp
@@ -0,0 +1,31 @@
+// RUN: %llvmgxx -xc++ %s -S -o - | not grep ZN12basic_stringIcEC1Ev
+// RUN: %llvmgxx -xc++ %s -S -o - | grep ZN12basic_stringIcED1Ev | count 2
+
+template<class charT> 
+class basic_string
+{
+public:
+	basic_string();
+	~basic_string();
+};
+
+template <class charT>
+__attribute__ ((__visibility__("hidden"), __always_inline__)) inline
+basic_string<charT>::basic_string()
+{
+}
+
+template <class charT>
+inline
+basic_string<charT>::~basic_string()
+{
+}
+
+typedef basic_string<char> string;
+
+extern template class basic_string<char>;
+
+int main()
+{
+	string s;
+}
diff --git a/final/test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp b/final/test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp
new file mode 100644
index 00000000000..c2d6abe97fc
--- /dev/null
+++ b/final/test/FrontendC++/2010-05-12-PtrToMember-Dbg.cpp
@@ -0,0 +1,17 @@
+//RUN: %llvmgxx -O0 -S -g -o - %s | grep DW_TAG_auto_variable
+class Foo
+{
+ public:
+  int x;
+  int y;
+  Foo (int i, int j) { x = i; y = j; }
+};
+
+
+Foo foo(10, 11);
+
+int main() {
+  int Foo::* pmi = &Foo::y;
+  return foo.*pmi;
+}
+
diff --git a/final/test/FrontendC++/2010-06-21-LocalVarDbg.cpp b/final/test/FrontendC++/2010-06-21-LocalVarDbg.cpp
new file mode 100644
index 00000000000..48d821508dd
--- /dev/null
+++ b/final/test/FrontendC++/2010-06-21-LocalVarDbg.cpp
@@ -0,0 +1,13 @@
+// RUN: %llvmgxx -g -Os -S %s -o - | llvm-as -disable-output
+// Do not use function name to create named metadata used to hold
+// local variable info. For example. llvm.dbg.lv.~A is an invalid name.
+class A {
+public:
+  ~A() { int i = 0; i++; }
+};
+
+int foo(int i) {
+  A a;
+  return 0;
+}
+
diff --git a/final/test/FrontendC++/2010-06-22-BitfieldInit.cpp b/final/test/FrontendC++/2010-06-22-BitfieldInit.cpp
new file mode 100644
index 00000000000..8dceb78bfc6
--- /dev/null
+++ b/final/test/FrontendC++/2010-06-22-BitfieldInit.cpp
@@ -0,0 +1,20 @@
+// RUN: %llvmgxx -g -S %s
+struct TEST2
+{
+  int subid:32;
+  int :0;
+};
+
+typedef struct _TEST3
+{
+  TEST2 foo;
+  TEST2 foo2;
+} TEST3;
+
+TEST3 test =
+  {
+    {0},
+    {0}
+  };
+
+int main() { return 0; }
diff --git a/final/test/FrontendC++/2010-06-22-ZeroBitfield.cpp b/final/test/FrontendC++/2010-06-22-ZeroBitfield.cpp
new file mode 100644
index 00000000000..9c4f2629f74
--- /dev/null
+++ b/final/test/FrontendC++/2010-06-22-ZeroBitfield.cpp
@@ -0,0 +1,5 @@
+// RUN: %llvmgxx -g -S %s
+struct s8_0 { unsigned : 0; };
+struct s8_1 { double x; };
+struct s8 { s8_0 a; s8_1 b; };
+s8 f8() { return s8(); }
diff --git a/final/test/FrontendC++/2010-07-19-nowarn.cpp b/final/test/FrontendC++/2010-07-19-nowarn.cpp
new file mode 100644
index 00000000000..a61a84ff28b
--- /dev/null
+++ b/final/test/FrontendC++/2010-07-19-nowarn.cpp
@@ -0,0 +1,21 @@
+// RUN: %llvmgcc %s -S -m32 -fasm-blocks -o /dev/null
+// This should not warn about unreferenced label. 8195660.
+// XFAIL: *
+// XTARGET: x86,i386,i686
+
+void quarterAsm(int array[], int len)
+{
+  __asm
+  {
+    mov esi, array;
+    mov ecx, len;
+    shr ecx, 2;
+loop:
+    movdqa xmm0, [esi];
+    psrad xmm0, 2;
+    movdqa [esi], xmm0;
+    add esi, 16;
+    sub ecx, 1;
+    jnz loop;
+  }
+}
diff --git a/final/test/FrontendC++/2010-07-23-DeclLoc.cpp b/final/test/FrontendC++/2010-07-23-DeclLoc.cpp
new file mode 100644
index 00000000000..9bf432beb72
--- /dev/null
+++ b/final/test/FrontendC++/2010-07-23-DeclLoc.cpp
@@ -0,0 +1,86 @@
+// RUN: %llvmgxx -S -g %s -o - | FileCheck %s
+// Require the template function declaration refer to the correct filename.
+// First, locate the function decl in metadata, and pluck out the file handle:
+// CHECK: {{extract_dwarf_data_from_header.*extract_dwarf_data_from_header.*extract_dwarf_data_from_header.*[^ ]+", metadata !}}[[filehandle:[0-9]+]],
+// Second: Require that filehandle refer to the correct filename:
+// CHECK: {{^!}}[[filehandle]] = metadata {{![{].*}} metadata !"decl_should_be_here.hpp",
+typedef long unsigned int __darwin_size_t;
+typedef __darwin_size_t size_t;
+typedef unsigned char uint8_t;
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+namespace std {
+  template<typename _Tp>   class auto_ptr   {
+    _Tp* _M_ptr;
+  public:
+    typedef _Tp element_type;
+    auto_ptr(element_type* __p = 0) throw() : _M_ptr(__p) {    }
+    element_type&     operator*() const throw()     {    }
+  };
+}
+class Pointer32 {
+public:
+  typedef uint32_t ptr_t;
+  typedef uint32_t size_t;
+};
+class Pointer64 {
+public:
+  typedef uint64_t ptr_t;
+  typedef uint64_t size_t;
+};
+class BigEndian {};
+class LittleEndian {};
+template <typename _SIZE, typename _ENDIANNESS> class SizeAndEndianness {
+public:
+  typedef _SIZE SIZE;
+};
+typedef SizeAndEndianness<Pointer32, LittleEndian> ISA32Little;
+typedef SizeAndEndianness<Pointer32, BigEndian> ISA32Big;
+typedef SizeAndEndianness<Pointer64, LittleEndian> ISA64Little;
+typedef SizeAndEndianness<Pointer64, BigEndian> ISA64Big;
+template <typename SIZE> class TRange {
+protected:
+  typename SIZE::ptr_t _location;
+  typename SIZE::size_t _length;
+  TRange(typename SIZE::ptr_t location, typename SIZE::size_t length) : _location(location), _length(length) {  }
+};
+template <typename SIZE, typename T> class TRangeValue : public TRange<SIZE> {
+  T _value;
+public:
+  TRangeValue(typename SIZE::ptr_t location, typename SIZE::size_t length, T value) : TRange<SIZE>(location, length), _value(value) {};
+};
+template <typename SIZE> class TAddressRelocator {};
+class CSCppSymbolOwner{};
+class CSCppSymbolOwnerData{};
+template <typename SIZE> class TRawSymbolOwnerData
+{
+  TRangeValue< SIZE, uint8_t* > _TEXT_text_section;
+  const char* _dsym_path;
+  uint32_t _dylib_current_version;
+  uint32_t _dylib_compatibility_version;
+public:
+  TRawSymbolOwnerData() :
+    _TEXT_text_section(0, 0, __null), _dsym_path(__null), _dylib_current_version(0), _dylib_compatibility_version(0) {}
+};
+template <typename SIZE_AND_ENDIANNESS> class TExtendedMachOHeader {};
+# 16 "decl_should_be_here.hpp"
+template <typename SIZE_AND_ENDIANNESS> void extract_dwarf_data_from_header(TExtendedMachOHeader<SIZE_AND_ENDIANNESS>& header,
+                                                                            TRawSymbolOwnerData<typename SIZE_AND_ENDIANNESS::SIZE>& symbol_owner_data,
+                                                                            TAddressRelocator<typename SIZE_AND_ENDIANNESS::SIZE>* address_relocator) {}
+struct CSCppSymbolOwnerHashFunctor {
+  size_t operator()(const CSCppSymbolOwner& symbol_owner) const {
+# 97 "wrong_place_for_decl.cpp"
+  }
+};
+template <typename SIZE_AND_ENDIANNESS> CSCppSymbolOwnerData* create_symbol_owner_data_arch_specific(CSCppSymbolOwner* symbol_owner, const char* dsym_path) {
+  typedef typename SIZE_AND_ENDIANNESS::SIZE SIZE;
+  std::auto_ptr< TRawSymbolOwnerData<SIZE> > data(new TRawSymbolOwnerData<SIZE>());
+  std::auto_ptr< TExtendedMachOHeader<SIZE_AND_ENDIANNESS> > header;
+  extract_dwarf_data_from_header(*header, *data, (TAddressRelocator<typename SIZE_AND_ENDIANNESS::SIZE>*)__null);
+}
+CSCppSymbolOwnerData* create_symbol_owner_data2(CSCppSymbolOwner* symbol_owner, const char* dsym_path) {
+  create_symbol_owner_data_arch_specific< ISA32Little >(symbol_owner, dsym_path);
+  create_symbol_owner_data_arch_specific< ISA32Big >(symbol_owner, dsym_path);
+  create_symbol_owner_data_arch_specific< ISA64Little >(symbol_owner, dsym_path);
+  create_symbol_owner_data_arch_specific< ISA64Big >(symbol_owner, dsym_path);
+}
diff --git a/final/test/FrontendC++/2010-08-31-ByValArg.cpp b/final/test/FrontendC++/2010-08-31-ByValArg.cpp
new file mode 100644
index 00000000000..be0d354b1d9
--- /dev/null
+++ b/final/test/FrontendC++/2010-08-31-ByValArg.cpp
@@ -0,0 +1,53 @@
+// This regression test checks byval arguments' debug info.
+// Radar 8367011
+// RUN: %llvmgcc -S -O0 -g %s -o - | \
+// RUN:    llc --disable-fp-elim -o %t.s -O0 -relocation-model=pic
+// RUN: %compile_c %t.s -o %t.o
+// RUN: %link %t.o -o %t.exe
+// RUN: echo {break get\nrun\np missing_arg.b} > %t.in 
+// RUN: gdb -q -batch -n -x %t.in %t.exe | tee %t.out | \
+// RUN:   grep {1 = 4242}
+
+// XTARGET: x86_64-apple-darwin
+
+class EVT {
+public:
+  int a;
+  int b;
+  int c;
+};
+
+class VAL {
+public:
+  int x;
+  int y;
+};
+void foo(EVT e);
+EVT bar();
+
+void get(int *i, unsigned dl, VAL v, VAL *p, unsigned n, EVT missing_arg) {
+//CHECK: .ascii "missing_arg"
+  EVT e = bar();
+  if (dl == n)
+    foo(missing_arg);
+}
+
+
+EVT bar() {
+	EVT e;
+	return e;
+}
+
+void foo(EVT e) {}
+
+int main(){
+	VAL v;
+	EVT ma;
+	ma.a = 1;
+	ma.b = 4242;
+	ma.c = 3;
+	int i = 42;	
+	get (&i, 1, v, &v, 2, ma);
+	return 0;
+}
+
diff --git a/final/test/FrontendC++/alignstack.cpp b/final/test/FrontendC++/alignstack.cpp
new file mode 100644
index 00000000000..4f993d669bf
--- /dev/null
+++ b/final/test/FrontendC++/alignstack.cpp
@@ -0,0 +1,23 @@
+// RUN: %llvmgxx %s -fasm-blocks -S -o - | FileCheck %s
+// Complicated expression as jump target
+// XFAIL: *
+// XTARGET: x86,i386,i686,darwin
+
+void Method3()
+{
+// CHECK: Method3
+// CHECK-NOT: alignstack
+    asm("foo:");
+// CHECK: return
+}
+
+void Method4()
+{
+// CHECK: Method4
+// CHECK: alignstack
+  asm {
+    bar:
+  }
+// CHECK: return
+}
+
diff --git a/final/test/FrontendC++/dg.exp b/final/test/FrontendC++/dg.exp
new file mode 100644
index 00000000000..fc852e30acf
--- /dev/null
+++ b/final/test/FrontendC++/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if [ llvm_gcc_supports c++ ] then {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/FrontendC++/integration-O2.cpp b/final/test/FrontendC++/integration-O2.cpp
new file mode 100644
index 00000000000..bb65ac21033
--- /dev/null
+++ b/final/test/FrontendC++/integration-O2.cpp
@@ -0,0 +1,19 @@
+// RUN: %llvmgxx %s -O2 -S -o - | FileCheck %s
+
+// This test verifies that we get expected codegen out of the -O2 optimization
+// level from the full optimizer.
+
+
+
+// Verify that ipsccp is running and can eliminate globals.
+static int test1g = 42;
+void test1f1() {
+  if (test1g == 0) test1g = 0;
+}
+int test1f2() {
+  return test1g;
+}
+
+// CHECK: @_Z7test1f2v()
+// CHECK: entry:
+// CHECK-NEXT: ret i32 42
diff --git a/final/test/FrontendC++/m64-ptr.cpp b/final/test/FrontendC++/m64-ptr.cpp
new file mode 100644
index 00000000000..f91e2f4f858
--- /dev/null
+++ b/final/test/FrontendC++/m64-ptr.cpp
@@ -0,0 +1,19 @@
+// RUN: %llvmgxx %s -S -o - | FileCheck %s
+// XFAIL: powerpc-apple-darwin
+
+// Make sure pointers are passed as pointers, not converted to int.
+// The first load should be of type i8** in either 32 or 64 bit mode.
+// This formerly happened on x86-64, 7375899.
+
+class StringRef {
+public:
+  const char *Data;
+  long Len;
+};
+void foo(StringRef X);
+void bar(StringRef &A) {
+// CHECK: @_Z3barR9StringRef
+// CHECK: load i8**
+  foo(A);
+// CHECK: ret void
+}
diff --git a/final/test/FrontendC++/member-alignment.cpp b/final/test/FrontendC++/member-alignment.cpp
new file mode 100644
index 00000000000..c5b20b27932
--- /dev/null
+++ b/final/test/FrontendC++/member-alignment.cpp
@@ -0,0 +1,20 @@
+// RUN: %llvmgxx -S %s -o - | FileCheck %s
+// XFAIL: arm,powerpc
+
+// rdar://7268289
+
+class t {
+public:
+  virtual void foo(void);
+  void bar(void);
+};
+
+void
+t::bar(void) {
+// CHECK: _ZN1t3barEv{{.*}} align 2
+}
+
+void
+t::foo(void) {
+// CHECK: _ZN1t3fooEv{{.*}} align 2
+}
diff --git a/final/test/FrontendC++/ptr-to-method-devirt.cpp b/final/test/FrontendC++/ptr-to-method-devirt.cpp
new file mode 100644
index 00000000000..a5ca5c76559
--- /dev/null
+++ b/final/test/FrontendC++/ptr-to-method-devirt.cpp
@@ -0,0 +1,14 @@
+// PR1602
+// RUN: %llvmgxx -S %s -o - -O3 | not grep ptrtoint
+// RUN: %llvmgxx -S %s -o - -O3 | grep getelementptr | count 1
+
+
+struct S { virtual void f(); };
+
+typedef void (S::*P)(void);
+
+const P p = &S::f; 
+
+void g(S s) {
+   (s.*p)();
+ }
diff --git a/final/test/FrontendC++/thunk-linkonce-odr.cpp b/final/test/FrontendC++/thunk-linkonce-odr.cpp
new file mode 100644
index 00000000000..ad72e64aa86
--- /dev/null
+++ b/final/test/FrontendC++/thunk-linkonce-odr.cpp
@@ -0,0 +1,33 @@
+// RUN: %llvmgxx %s -S -o - | FileCheck %s
+// <rdar://problem/7929157> & <rdar://problem/8104369>
+
+struct A {
+  virtual int f() { return 1; }
+};
+
+struct B {
+  virtual int f() { return 2; }
+};
+
+struct C : A, B {
+  virtual int f() { return 3; }
+};
+
+struct D : C {
+  virtual int f() { return 4; }
+};
+
+static int f(D* d) {
+  B* b = d;
+  return b->f();
+};
+
+int g() {
+  D d;
+  return f(&d);
+}
+
+// Thunks should be marked as "linkonce ODR" not "weak".
+//
+// CHECK: define linkonce_odr i32 @_ZThn{{[48]}}_N1C1fEv
+// CHECK: define linkonce_odr i32 @_ZThn{{[48]}}_N1D1fEv
diff --git a/final/test/FrontendC++/varargs.cpp b/final/test/FrontendC++/varargs.cpp
new file mode 100644
index 00000000000..c4de76acc30
--- /dev/null
+++ b/final/test/FrontendC++/varargs.cpp
@@ -0,0 +1,19 @@
+// RUN: %llvmgxx -S %s -o - | FileCheck %s
+// rdar://7309675
+// PR4678
+
+// test1 should be compmiled to be a varargs function in the IR even 
+// though there is no way to do a va_begin.  Otherwise, the optimizer
+// will warn about 'dropped arguments' at the call site.
+
+// CHECK: define i32 @_Z5test1z(...)
+int test1(...) {
+  return -1;
+}
+
+// CHECK: call i32 (...)* @_Z5test1z(i32 0)
+void test() {
+  test1(0);
+}
+
+
diff --git a/final/test/FrontendC++/weak-external.cpp b/final/test/FrontendC++/weak-external.cpp
new file mode 100644
index 00000000000..f4f0ba19ef3
--- /dev/null
+++ b/final/test/FrontendC++/weak-external.cpp
@@ -0,0 +1,17 @@
+// RUN: %llvmgxx %s -S -O2 -o - | not grep {_ZNSs12_S_constructIPKcEEPcT_S3_RKSaIcESt20forward_iterator_tag}
+// PR4262
+
+// The "basic_string" extern template instantiation declaration is supposed to
+// suppress the implicit instantiation of non-inline member functions. Make sure
+// that we suppress the implicit instantiation of non-inline member functions
+// defined out-of-line. That we aren't instantiating the basic_string
+// constructor when we shouldn't be. Such an instantiation forces the implicit
+// instantiation of _S_construct<const char*>. Since _S_construct is a member
+// template, it's instantiation is *not* suppressed (despite being in
+// basic_string<char>), so we would emit it as a weak definition.
+
+#include <stdexcept>
+
+void dummysymbol() {
+  throw(std::runtime_error("string"));
+}
diff --git a/final/test/FrontendC++/x86-64-abi-sret-vs-2word-struct-param.cpp b/final/test/FrontendC++/x86-64-abi-sret-vs-2word-struct-param.cpp
new file mode 100644
index 00000000000..f81854e0cb9
--- /dev/null
+++ b/final/test/FrontendC++/x86-64-abi-sret-vs-2word-struct-param.cpp
@@ -0,0 +1,27 @@
+// RUN: %llvmgxx -S %s -o - | grep byval | count 2
+// XTARGET: x86
+// PR4242
+// (PR 4242 bug is on 64-bit only, test passes on x86-32 as well)
+
+struct S {
+    void* data[3];
+};
+
+struct T {
+    void* data[2];
+};
+
+extern "C" S fail(int, int, int, int, T t, void* p) {
+    S s;
+    s.data[0] = t.data[0];
+    s.data[1] = t.data[1];
+    s.data[2] = p;
+    return s;
+}
+
+extern "C" S* succeed(S* sret, int, int, int, int, T t, void* p) {
+    sret->data[0] = t.data[0];
+    sret->data[1] = t.data[1];
+    sret->data[2] = p;
+    return sret;
+}
diff --git a/final/test/FrontendC/2002-01-23-LoadQISIReloadFailure.c b/final/test/FrontendC/2002-01-23-LoadQISIReloadFailure.c
new file mode 100644
index 00000000000..1779a99942e
--- /dev/null
+++ b/final/test/FrontendC/2002-01-23-LoadQISIReloadFailure.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* Regression test.  Just compile .c -> .ll to test */
+int foo(void) {
+  unsigned char *pp;
+  unsigned w_cnt;
+
+  w_cnt += *pp;
+  
+  return w_cnt;
+}
diff --git a/final/test/FrontendC/2002-01-24-ComplexSpaceInType.c b/final/test/FrontendC/2002-01-24-ComplexSpaceInType.c
new file mode 100644
index 00000000000..13d92c7306e
--- /dev/null
+++ b/final/test/FrontendC/2002-01-24-ComplexSpaceInType.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+// This caused generation of the following type name:
+//   %Array = uninitialized global [10 x %complex int]
+//
+// which caused problems because of the space int the complex int type
+//
+
+struct { int X, Y; } Array[10];
+
+void foo() {}
diff --git a/final/test/FrontendC/2002-01-24-HandleCallInsnSEGV.c b/final/test/FrontendC/2002-01-24-HandleCallInsnSEGV.c
new file mode 100644
index 00000000000..e619cf46992
--- /dev/null
+++ b/final/test/FrontendC/2002-01-24-HandleCallInsnSEGV.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+void *dlclose(void*);
+
+void ap_os_dso_unload(void *handle)
+{
+    dlclose(handle);
+    return;     /* This return triggers the bug: Weird */
+}
diff --git a/final/test/FrontendC/2002-02-13-ConditionalInCall.c b/final/test/FrontendC/2002-02-13-ConditionalInCall.c
new file mode 100644
index 00000000000..f361088c1cf
--- /dev/null
+++ b/final/test/FrontendC/2002-02-13-ConditionalInCall.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* Test problem where bad code was generated with a ?: statement was 
+   in a function call argument */
+
+void foo(int, double, float);
+
+void bar(int x) {
+  foo(x, x ? 1.0 : 12.5, 1.0f);
+}
+
diff --git a/final/test/FrontendC/2002-02-13-ReloadProblem.c b/final/test/FrontendC/2002-02-13-ReloadProblem.c
new file mode 100644
index 00000000000..2ae97b72276
--- /dev/null
+++ b/final/test/FrontendC/2002-02-13-ReloadProblem.c
@@ -0,0 +1,18 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* This triggered a problem in reload, fixed by disabling most of the 
+ * steps of compilation in GCC.  Before this change, the code went through
+ * the entire backend of GCC, even though it was unnecessary for LLVM output
+ * now it is skipped entirely, and since reload doesn't run, it can't cause
+ * a problem.
+ */
+
+extern int tolower(int);
+
+const char *rangematch(const char *pattern, int test, int c) {
+
+  if ((c <= test) | (tolower(c) <= tolower((unsigned char)test)))
+    return 0;
+
+  return pattern;
+}
diff --git a/final/test/FrontendC/2002-02-13-TypeVarNameCollision.c b/final/test/FrontendC/2002-02-13-TypeVarNameCollision.c
new file mode 100644
index 00000000000..2dede68a38d
--- /dev/null
+++ b/final/test/FrontendC/2002-02-13-TypeVarNameCollision.c
@@ -0,0 +1,16 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* This testcase causes a symbol table collision.  Type names and variable
+ * names should be in distinct namespaces
+ */
+
+typedef struct foo {
+  int X, Y;
+} FOO;
+
+static FOO foo[100];
+
+int test() {
+  return foo[4].Y;
+}
+
diff --git a/final/test/FrontendC/2002-02-13-UnnamedLocal.c b/final/test/FrontendC/2002-02-13-UnnamedLocal.c
new file mode 100644
index 00000000000..85aa615205c
--- /dev/null
+++ b/final/test/FrontendC/2002-02-13-UnnamedLocal.c
@@ -0,0 +1,21 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* Testcase for a problem where GCC allocated xqic to a register,
+ * and did not have a VAR_DECL that explained the stack slot to LLVM.
+ * Now the LLVM code synthesizes a stack slot if one is presented that
+ * has not been previously recognized.  This is where alloca's named 
+ * 'local' come from now. 
+ */
+
+typedef struct {
+  short x;
+} foostruct;
+
+int foo(foostruct ic);
+
+void test() {
+  foostruct xqic;
+  foo(xqic);
+}
+
+
diff --git a/final/test/FrontendC/2002-02-14-EntryNodePreds.c b/final/test/FrontendC/2002-02-14-EntryNodePreds.c
new file mode 100644
index 00000000000..851af912174
--- /dev/null
+++ b/final/test/FrontendC/2002-02-14-EntryNodePreds.c
@@ -0,0 +1,37 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* GCC Used to generate code that contained a branch to the entry node of 
+ * the do_merge function.  This is illegal LLVM code.  To fix this, GCC now
+ * inserts an entry node regardless of whether or not it has to insert allocas.
+ */
+
+struct edge_rec
+{
+  struct VERTEX *v;
+  struct edge_rec *next;
+  int wasseen;
+  int more_data;
+};
+
+typedef struct edge_rec *QUAD_EDGE;
+
+typedef struct {
+  QUAD_EDGE left, right;
+} EDGE_PAIR;
+
+struct EDGE_STACK {
+    int ptr;
+    QUAD_EDGE *elts;
+    int stack_size;
+};
+
+int do_merge(QUAD_EDGE ldo, QUAD_EDGE rdo) {
+  int lvalid;
+  QUAD_EDGE basel,rcand;
+  while (1) {
+    if (!lvalid) {
+      return (int)basel->next;
+    }
+  }
+}
+
diff --git a/final/test/FrontendC/2002-02-16-RenamingTest.c b/final/test/FrontendC/2002-02-16-RenamingTest.c
new file mode 100644
index 00000000000..6042b67dc0c
--- /dev/null
+++ b/final/test/FrontendC/2002-02-16-RenamingTest.c
@@ -0,0 +1,18 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* test that locals are renamed with . notation */
+
+void abc(void *);
+
+void Test5(double X) {
+  abc(&X);
+  {
+    int X;
+    abc(&X);
+    {
+      float X;
+      abc(&X);
+    }
+  }
+}
+
diff --git a/final/test/FrontendC/2002-02-17-ArgumentAddress.c b/final/test/FrontendC/2002-02-17-ArgumentAddress.c
new file mode 100644
index 00000000000..acd7e37a756
--- /dev/null
+++ b/final/test/FrontendC/2002-02-17-ArgumentAddress.c
@@ -0,0 +1,39 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+int test(int X) {
+  return X;
+}
+
+void abc(int *X);
+int def(int Y, int Z) {
+  abc(&Z);
+  return Y;
+}
+
+struct Test { short X, x; int Y, Z; };
+
+int Testing(struct Test *A) {
+  return A->X+A->Y;
+}
+
+int Test2(int X, struct Test A, int Y) {
+  return X+Y+A.X+A.Y;
+}
+int Test3(struct Test A, struct Test B) {
+  return A.X+A.Y+B.Y+B.Z;
+}
+
+struct Test Test4(struct Test A) {
+  return A;
+}
+
+int Test6() {
+  int B[200];
+  return B[4];
+}
+
+struct STest2 { int X; short Y[4]; double Z; };
+
+struct STest2 Test7(struct STest2 X) {
+  return X;
+}
diff --git a/final/test/FrontendC/2002-02-18-64bitConstant.c b/final/test/FrontendC/2002-02-18-64bitConstant.c
new file mode 100644
index 00000000000..a88587a960d
--- /dev/null
+++ b/final/test/FrontendC/2002-02-18-64bitConstant.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* GCC wasn't handling 64 bit constants right fixed */
+
+#include <stdio.h>
+
+int main() {
+  long long Var = 123455678902ll;
+  printf("%lld\n", Var);
+}
diff --git a/final/test/FrontendC/2002-02-18-StaticData.c b/final/test/FrontendC/2002-02-18-StaticData.c
new file mode 100644
index 00000000000..76cb0e670a7
--- /dev/null
+++ b/final/test/FrontendC/2002-02-18-StaticData.c
@@ -0,0 +1,13 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+double FOO = 17;
+double BAR = 12.0;
+float XX = 12.0f;
+
+static char *procnames[] = {
+  "EXIT"
+};
+
+void *Data[] = { &FOO, &BAR, &XX };
+
diff --git a/final/test/FrontendC/2002-03-11-LargeCharInString.c b/final/test/FrontendC/2002-03-11-LargeCharInString.c
new file mode 100644
index 00000000000..b383d03f799
--- /dev/null
+++ b/final/test/FrontendC/2002-03-11-LargeCharInString.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+#include <string.h>
+
+int test(char *X) {
+  /* LLVM-GCC used to emit:
+     %.LC0 = internal global [3 x sbyte] c"\1F\FFFFFF8B\00"
+   */
+  return strcmp(X, "\037\213");
+}
diff --git a/final/test/FrontendC/2002-03-12-ArrayInitialization.c b/final/test/FrontendC/2002-03-12-ArrayInitialization.c
new file mode 100644
index 00000000000..1997a3cd0d9
--- /dev/null
+++ b/final/test/FrontendC/2002-03-12-ArrayInitialization.c
@@ -0,0 +1,19 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* GCC would generate bad code if not enough initializers are 
+   specified for an array.
+ */
+
+int a[10] = { 0, 2};
+
+char str[10] = "x";
+
+void *Arr[5] = { 0, 0 };
+
+float F[12] = { 1.23f, 34.7f };
+
+struct Test { int X; double Y; };
+
+struct Test Array[10] = { { 2, 12.0 }, { 3, 24.0 } };
+
+int B[4][4] = { { 1, 2, 3, 4}, { 5, 6, 7 }, { 8, 9 } };
diff --git a/final/test/FrontendC/2002-03-12-StructInitialize.c b/final/test/FrontendC/2002-03-12-StructInitialize.c
new file mode 100644
index 00000000000..9eb11e187a1
--- /dev/null
+++ b/final/test/FrontendC/2002-03-12-StructInitialize.c
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+typedef struct Connection_Type {
+   long    to;
+   char    type[10];
+   long    length;
+} Connection;
+
+Connection link[3]
+= { {1, "link1", 10},
+    {2, "link2", 20},
+    {3, "link3", 30} };
+
diff --git a/final/test/FrontendC/2002-03-12-StructInitializer.c b/final/test/FrontendC/2002-03-12-StructInitializer.c
new file mode 100644
index 00000000000..fa333b78a95
--- /dev/null
+++ b/final/test/FrontendC/2002-03-12-StructInitializer.c
@@ -0,0 +1,18 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* GCC was not emitting string constants of the correct length when
+ * embedded into a structure field like this.  It thought the strlength
+ * was -1.
+ */
+
+typedef struct Connection_Type {
+   long    to;
+   char    type[10];
+   long    length;
+} Connection;
+
+Connection link[3]
+= { {1, "link1", 10},
+    {2, "link2", 20},
+    {3, "link3", 30} };
+
diff --git a/final/test/FrontendC/2002-03-14-BrokenPHINode.c b/final/test/FrontendC/2002-03-14-BrokenPHINode.c
new file mode 100644
index 00000000000..48d9ab705a7
--- /dev/null
+++ b/final/test/FrontendC/2002-03-14-BrokenPHINode.c
@@ -0,0 +1,19 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* GCC was generating PHI nodes with an arity < #pred of the basic block the
+ * PHI node lived in.  This was breaking LLVM because the number of entries
+ * in a PHI node must equal the number of predecessors for a basic block.
+ */
+
+int trys(char *s, int x)
+{
+  int asa;
+  double Val;
+  int LLS;
+  if (x) {
+    asa = LLS + asa;
+  } else {
+  }
+  return asa+(int)Val;
+}
+
diff --git a/final/test/FrontendC/2002-03-14-BrokenSSA.c b/final/test/FrontendC/2002-03-14-BrokenSSA.c
new file mode 100644
index 00000000000..9dc674aea27
--- /dev/null
+++ b/final/test/FrontendC/2002-03-14-BrokenSSA.c
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* This code used to break GCC's SSA computation code.  It would create
+   uses of B & C that are not dominated by their definitions.  See:
+   http://gcc.gnu.org/ml/gcc/2002-03/msg00697.html
+ */
+int bar();
+int foo()
+{
+  int a,b,c;
+
+  a = b + c;
+  b = bar();
+  c = bar();
+  return a + b + c;
+}
+
diff --git a/final/test/FrontendC/2002-03-14-QuotesInStrConst.c b/final/test/FrontendC/2002-03-14-QuotesInStrConst.c
new file mode 100644
index 00000000000..63eaeef46a4
--- /dev/null
+++ b/final/test/FrontendC/2002-03-14-QuotesInStrConst.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* GCC was not escaping quotes in string constants correctly, so this would
+ * get emitted:
+ *  %.LC1 = internal global [32 x sbyte] c"*** Word "%s" on line %d is not\00"
+ */
+
+const char *Foo() {
+  return "*** Word \"%s\" on line %d is not";
+}
diff --git a/final/test/FrontendC/2002-04-07-SwitchStmt.c b/final/test/FrontendC/2002-04-07-SwitchStmt.c
new file mode 100644
index 00000000000..33e9c3d7a78
--- /dev/null
+++ b/final/test/FrontendC/2002-04-07-SwitchStmt.c
@@ -0,0 +1,22 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+int printf(const char *, ...);
+int foo();
+
+int main() {
+  while (foo()) {
+     switch (foo()) {
+     case 0:
+     case 1:
+     case 2:
+     case 3:
+       printf("3");
+     case 4: printf("4");
+     case 5:
+     case 6:
+     default:
+       break;
+     }
+   }
+   return 0;
+}
diff --git a/final/test/FrontendC/2002-04-08-LocalArray.c b/final/test/FrontendC/2002-04-08-LocalArray.c
new file mode 100644
index 00000000000..1dc51a09284
--- /dev/null
+++ b/final/test/FrontendC/2002-04-08-LocalArray.c
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* GCC is not outputting the static array to the LLVM backend, so bad things
+ * happen.  Note that if this is defined static, everything seems fine.
+ */
+double test(unsigned X) {
+  double student_t[30]={0.0 , 12.706 , 4.303 , 3.182 , 2.776 , 2.571 ,
+                               2.447 , 2.365 , 2.306 , 2.262 , 2.228 ,
+                               2.201 , 2.179 , 2.160 , 2.145 , 2.131 ,
+                               2.120 , 2.110 , 2.101 , 2.093 , 2.086 ,
+                               2.080 , 2.074 , 2.069 , 2.064 , 2.060 ,
+                               2.056 , 2.052 , 2.048 , 2.045 };
+  return student_t[X];
+}
diff --git a/final/test/FrontendC/2002-04-09-StructRetVal.c b/final/test/FrontendC/2002-04-09-StructRetVal.c
new file mode 100644
index 00000000000..de3b6fc26e0
--- /dev/null
+++ b/final/test/FrontendC/2002-04-09-StructRetVal.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+struct S {
+  int i;
+  short s1, s2;
+};
+
+struct S func_returning_struct(void);
+
+void loop(void) {
+  func_returning_struct();
+}
diff --git a/final/test/FrontendC/2002-04-10-StructParameters.c b/final/test/FrontendC/2002-04-10-StructParameters.c
new file mode 100644
index 00000000000..aaaba2abdde
--- /dev/null
+++ b/final/test/FrontendC/2002-04-10-StructParameters.c
@@ -0,0 +1,25 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+typedef struct {
+  char p;
+  short q;
+  char r;
+  int X;
+  short Y, Z;
+  int Q;
+} foo;
+
+int test(foo X, float);
+int testE(char,short,char,int,int,float);
+void test3(foo *X) {
+  X->q = 1;
+}
+
+void test2(foo Y) {
+  testE(Y.p, Y.q, Y.r, Y.X, Y.Y, 0.1f);
+  test(Y, 0.1f);
+  test2(Y);
+  test3(&Y);
+}
+
diff --git a/final/test/FrontendC/2002-05-23-StaticValues.c b/final/test/FrontendC/2002-05-23-StaticValues.c
new file mode 100644
index 00000000000..a5753b95f16
--- /dev/null
+++ b/final/test/FrontendC/2002-05-23-StaticValues.c
@@ -0,0 +1,15 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* Make sure the frontend is correctly marking static stuff as internal! */
+
+int X;
+static int Y = 12;
+
+static void foo(int Z) {
+  Y = Z;
+}
+
+void *test() {
+  foo(12);
+  return &Y;
+}
diff --git a/final/test/FrontendC/2002-05-23-TypeNameCollision.c b/final/test/FrontendC/2002-05-23-TypeNameCollision.c
new file mode 100644
index 00000000000..25d114965d4
--- /dev/null
+++ b/final/test/FrontendC/2002-05-23-TypeNameCollision.c
@@ -0,0 +1,19 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* Testcase for when struct tag conflicts with typedef name... grr */
+
+typedef struct foo {
+  struct foo *X;
+  int Y;
+} * foo;
+
+foo F1;
+struct foo *F2;
+
+enum bar { test1, test2 };
+
+typedef float bar;
+
+enum bar B1;
+bar B2;
+
diff --git a/final/test/FrontendC/2002-05-24-Alloca.c b/final/test/FrontendC/2002-05-24-Alloca.c
new file mode 100644
index 00000000000..128bc8b7cfc
--- /dev/null
+++ b/final/test/FrontendC/2002-05-24-Alloca.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char **argv) {
+  char *C = (char*)alloca(argc);
+  strcpy(C, argv[0]);
+  puts(C);
+}
diff --git a/final/test/FrontendC/2002-06-25-FWriteInterfaceFailure.c b/final/test/FrontendC/2002-06-25-FWriteInterfaceFailure.c
new file mode 100644
index 00000000000..4380dc7b227
--- /dev/null
+++ b/final/test/FrontendC/2002-06-25-FWriteInterfaceFailure.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+#include <stdio.h>
+
+void  test() {
+  fprintf(stderr, "testing\n");
+}
diff --git a/final/test/FrontendC/2002-07-14-MiscListTests.c b/final/test/FrontendC/2002-07-14-MiscListTests.c
new file mode 100644
index 00000000000..4a5459ad713
--- /dev/null
+++ b/final/test/FrontendC/2002-07-14-MiscListTests.c
@@ -0,0 +1,71 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+// Test list stuff
+
+void *malloc(unsigned);
+
+// Test opaque structure support.  the list type is defined later
+struct list;
+
+struct list *PassThroughList(struct list *L) {
+  return L;
+}
+
+
+// Recursive data structure tests...
+
+typedef struct list {
+  int Data;
+  struct list *Next;
+} list;
+
+list *Data;
+
+void foo() {
+  static int Foo = 0;            // Test static local variable
+  Foo += 1;                      // Increment static variable
+
+  Data = (list*)malloc(12);      // This is not a proper list allocation
+}
+
+extern list ListNode1;
+list ListNode3 = { 4, 0          };
+list ListNode2 = { 3, &ListNode3 };
+list ListNode0 = { 1, &ListNode1 };
+list ListNode1 = { 2, &ListNode2 };
+
+
+list ListArray[10];
+
+// Iterative insert fn
+void InsertIntoListTail(list **L, int Data) {
+  while (*L)
+    L = &(*L)->Next;
+  *L = (list*)malloc(sizeof(list));
+  (*L)->Data = Data;
+  (*L)->Next = 0;
+}
+
+// Recursive list search fn
+list *FindData(list *L, int Data) {
+  if (L == 0) return 0;
+  if (L->Data == Data) return L;
+  return FindData(L->Next, Data);
+}
+
+void foundIt(void);
+
+// Driver fn...
+void DoListStuff() {
+  list *MyList = 0;
+  InsertIntoListTail(&MyList, 100);
+  InsertIntoListTail(&MyList, 12);
+  InsertIntoListTail(&MyList, 42);
+  InsertIntoListTail(&MyList, 1123);
+  InsertIntoListTail(&MyList, 1213);
+
+  if (FindData(MyList, 75)) foundIt();
+  if (FindData(MyList, 42)) foundIt();
+  if (FindData(MyList, 700)) foundIt();
+}
+
diff --git a/final/test/FrontendC/2002-07-14-MiscTests.c b/final/test/FrontendC/2002-07-14-MiscTests.c
new file mode 100644
index 00000000000..57c412083a6
--- /dev/null
+++ b/final/test/FrontendC/2002-07-14-MiscTests.c
@@ -0,0 +1,57 @@
+// RUN: %llvmgcc -w -S %s -o - | llvm-as -o /dev/null
+
+/* These are random tests that I used when working on the GCC frontend 
+   originally. */
+
+// test floating point comparison!
+int floatcomptest(double *X, double *Y, float *x, float *y) {
+  return *X < *Y || *x < *y;
+}
+
+extern void *malloc(unsigned);
+
+// Exposed a bug
+void *memset_impl(void *dstpp, int c, unsigned len) {
+  long long int dstp = (long long int) dstpp;
+
+  while (dstp % 4 != 0)
+    {
+      ((unsigned char *) dstp)[0] = c;
+      dstp += 1;
+      len -= 1;
+    }
+  return dstpp;
+}
+
+// TEST problem with signed/unsigned versions of the same constants being shared
+// incorrectly!
+//
+static char *temp;
+static int remaining;
+static char *localmalloc(int size) {
+  char *blah;
+  
+  if (size>remaining) 
+    {
+      temp = (char *) malloc(32768);
+      remaining = 32768;
+      return temp;
+    }
+  return 0;
+}
+
+typedef struct { double X; double Y; int Z; } PBVTest;
+
+PBVTest testRetStruct(float X, double Y, int Z) {
+  PBVTest T = { X, Y, Z };
+  return T;
+}
+PBVTest testRetStruct2(void);  // external func no inlining
+
+
+double CallRetStruct(float X, double Y, int Z) {
+  PBVTest T = testRetStruct2();
+  return T.X+X+Y+Z;
+}
+
+
diff --git a/final/test/FrontendC/2002-07-14-MiscTests2.c b/final/test/FrontendC/2002-07-14-MiscTests2.c
new file mode 100644
index 00000000000..f2c7c81c4da
--- /dev/null
+++ b/final/test/FrontendC/2002-07-14-MiscTests2.c
@@ -0,0 +1,13 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+// Test ?: in function calls
+extern fp(int, char*);
+char *Ext;
+void
+__bb_exit_func (void)
+{
+  fp (12, Ext ? Ext : "<none>");
+}
+
+
diff --git a/final/test/FrontendC/2002-07-14-MiscTests3.c b/final/test/FrontendC/2002-07-14-MiscTests3.c
new file mode 100644
index 00000000000..7ef7e232d99
--- /dev/null
+++ b/final/test/FrontendC/2002-07-14-MiscTests3.c
@@ -0,0 +1,187 @@
+// RUN: %llvmgcc -w -S %s -o - | llvm-as -o /dev/null
+
+
+
+void *malloc(unsigned);
+
+//#include <stdio.h>
+int puts(const char *s);
+
+struct FunStructTest {
+  int Test1;
+  char *Pointer;
+  int Array[12];
+};
+
+struct SubStruct {
+  short X, Y;
+};
+
+struct Quad {
+  int w;
+  struct SubStruct SS;
+  struct SubStruct *SSP;
+  char c;
+  int y;
+};
+
+struct Quad GlobalQuad = { 4, {1, 2}, 0, 3, 156 };
+
+typedef int (*FuncPtr)(int);
+
+unsigned PtrFunc(int (*Func)(int), int X) {
+  return Func(X);
+}
+
+char PtrFunc2(FuncPtr FuncTab[30], int Num) {
+  return FuncTab[Num]('b');
+}
+
+extern char SmallArgs2(char w, char x, long long Zrrk, char y, char z);
+extern int SomeFunc(void);
+char SmallArgs(char w, char x, char y, char z) {
+  SomeFunc();
+  return SmallArgs2(w-1, x+1, y, z, w);
+}
+
+static int F0(struct Quad Q, int i) {              /* Pass Q by value */
+  struct Quad R;
+  if (i) R.SS = Q.SS;
+  Q.SSP = &R.SS;
+  Q.w = Q.y = Q.c = 1;
+  return Q.SS.Y + i + R.y - Q.c;
+}
+
+int F1(struct Quad *Q, int i) {             /* Pass Q by address */
+  struct Quad R;
+#if 0
+  if (i) R.SS = Q->SS;
+#else
+  if (i) R = *Q;
+#endif
+  Q->w = Q->y = Q->c = 1;
+  return Q->SS.Y+i+R.y-Q->c;
+}
+
+
+int BadFunc(float Val) {
+  int Result;
+  if (Val > 12.345) Result = 4;
+  return Result;     /* Test use of undefined value */
+}
+
+int RealFunc(void) {
+  return SomeUndefinedFunction(1, 4, 5);
+}
+
+extern int EF1(int *, char *, int *);
+
+int Func(int Param, long long Param2) {
+  int Result = Param;
+
+  {{{{
+      char c; int X;
+      EF1(&Result, &c, &X);
+    }}}
+
+    {   // c & X are duplicate names!
+      char c; int X;
+      EF1(&Result, &c, &X);
+    }
+
+  }
+  return Result;
+}
+
+
+short FunFunc(long long x, char z) {
+  return x+z;
+}
+
+unsigned castTest(int X) { return X; }
+
+double TestAdd(double X, float Y) {
+  return X+Y+.5;
+}
+
+int func(int i, int j) {
+  while (i != 20)
+    i += 2;
+
+  j += func(2, i);
+  return (i * 3 + j*2)*j;
+}
+
+int SumArray(int Array[], int Num) {
+  int i, Result = 0;
+  for (i = 0; i < Num; ++i)
+    Result += Array[i];
+
+  return Result;
+}
+
+int ArrayParam(int Values[100]) {
+  return EF1((int*)Values[50], (char*)1, &Values[50]);
+}
+
+int ArrayToSum(void) {
+  int A[100], i;
+  for (i = 0; i < 100; ++i)
+    A[i] = i*4;
+
+  return A[A[0]]; //SumArray(A, 100);
+}
+
+
+int ExternFunc(long long, unsigned*, short, unsigned char);
+
+int main(int argc, char *argv[]) {
+  unsigned i;
+  puts("Hello world!\n");
+
+  ExternFunc(-1, 0, (short)argc, 2);
+  //func(argc, argc);
+
+  for (i = 0; i < 10; i++)
+    puts(argv[3]);
+  return 0;
+}
+
+double MathFunc(double X, double Y, double Z,
+                double AA, double BB, double CC, double DD,
+                double EE, double FF, double GG, double HH,
+                double aAA, double aBB, double aCC, double aDD,
+                double aEE, double aFF) {
+  return X + Y + Z + AA + BB + CC + DD + EE + FF + GG + HH
+       + aAA + aBB + aCC + aDD + aEE + aFF;
+}
+
+
+
+void strcpy(char *s1, char *s2) {
+  while (*s1++ = *s2++);
+}
+
+void strcat(char *s1, char *s2) {
+  while (*s1++);
+  s1--;
+  while (*s1++ = *s2++);
+}
+
+int strcmp(char *s1, char *s2) {
+  while (*s1++ == *s2++);
+  if (*s1 == 0) {
+    if (*s2 == 0) {
+      return 0;
+    } else {
+      return -1;
+    }
+  } else {
+    if (*s2 == 0) {
+      return 1;
+    } else {
+      return (*(--s1) - *(--s2));
+    }
+  }
+}
+
diff --git a/final/test/FrontendC/2002-07-16-HardStringInit.c b/final/test/FrontendC/2002-07-16-HardStringInit.c
new file mode 100644
index 00000000000..2785e5189d9
--- /dev/null
+++ b/final/test/FrontendC/2002-07-16-HardStringInit.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+  char      auto_kibitz_list[100][20] = {
+                                      {"diepx"},
+                                      {"ferret"},
+                                      {"knightc"},
+                                      {"knightcap"}};
+
diff --git a/final/test/FrontendC/2002-07-17-StringConstant.c b/final/test/FrontendC/2002-07-17-StringConstant.c
new file mode 100644
index 00000000000..9ba0c25213d
--- /dev/null
+++ b/final/test/FrontendC/2002-07-17-StringConstant.c
@@ -0,0 +1,4 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+char * foo() { return "\\begin{"; }
diff --git a/final/test/FrontendC/2002-07-29-Casts.c b/final/test/FrontendC/2002-07-29-Casts.c
new file mode 100644
index 00000000000..44bb6101955
--- /dev/null
+++ b/final/test/FrontendC/2002-07-29-Casts.c
@@ -0,0 +1,86 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+int
+main(int argc, char** argv)
+{
+  char     c1;
+  short    s1, ssf1, ssd1;
+  unsigned char  ubs0;
+  signed char   bs0;
+  unsigned char ubc0, uc2;
+  unsigned short us2, usf1, usd1;
+  int ic3, is3, sif1, sid1;
+  unsigned int     uic4, uis4, uif1, uid1;
+  long     slf1, sld1;
+  unsigned long    ulf1, uld1;
+  float    f1;
+  double   d1;
+  
+  /* Test integer to integer conversions */
+  
+  c1 = (char)  (argc >= 2)? atoi(argv[1]) : 0xff64; /* 100 = 'd' */
+  s1 = (short) (argc >= 3)? atoi(argv[2]) : -769;   /* 0xf7ff = -769 */
+  
+  ubc0 = (unsigned char) c1;                      /* 100 = 'd' */
+  ubs0 = (unsigned char) s1;                            /* 0xff = 255 */
+  bs0  = (signed char) s1;                             /* 0xff = -1 */
+  
+  uc2 = (unsigned char) c1;                       /* 100 = 'd' */
+  us2 = (unsigned short) s1;                      /* 0xf7ff = 64767 */
+  
+  ic3 = (int) c1;                                 /* 100 = 'd' */
+  is3 = (int) s1;                                 /* 0xfffff7ff = -769 */
+  
+  uic4 = (unsigned int) c1;                       /*  100 = 'd' */
+  uis4 = (unsigned int) s1;                       /* 0xfffff7ff = 4294966527 */
+  
+  printf("ubc0 = '%c'\n", ubc0);
+  printf("ubs0 = %u\n",   ubs0);
+  printf("bs0  = %d\n",   bs0);
+  printf("c1   = '%c'\n", c1);
+  printf("s1   = %d\n",   s1);
+  printf("uc2  = '%c'\n", uc2);
+  printf("us2  = %u\n",   us2);
+  printf("ic3  = '%c'\n", ic3);
+  printf("is3  = %d\n",   is3);
+  printf("uic4 = '%c'\n", uic4);
+  printf("uis4 = %u\n",   uis4);
+  
+  /* Test floating-point to integer conversions */
+  f1 = (float)  (argc >= 4)? atof(argv[3]) : 1.0;
+  d1 =          (argc >= 5)? atof(argv[4]) : 2.0;
+  
+  usf1 = (unsigned short) f1;
+  usd1 = (unsigned short) d1;
+  uif1 = (unsigned int) f1;
+  uid1 = (unsigned int) d1;
+  ulf1 = (unsigned long) f1;
+  uld1 = (unsigned long) d1;
+  
+  ssf1 = (short) f1;
+  ssd1 = (short) d1;
+  sif1 = (int) f1;
+  sid1 = (int) d1;
+  slf1 = (long) f1;
+  sld1 = (long) d1;
+  
+  printf("usf1 = %u\n", usf1);
+  printf("usd1 = %u\n", usd1);
+  printf("uif1 = %u\n", uif1);
+  printf("uid1 = %u\n", uid1);
+  printf("ulf1 = %u\n", ulf1);
+  printf("uld1 = %u\n", uld1);
+  
+  printf("ssf1 = %d\n", ssf1);
+  printf("ssd1 = %d\n", ssd1);
+  printf("sif1 = %d\n", sif1);
+  printf("sid1 = %d\n", sid1);
+  printf("slf1 = %d\n", slf1);
+  printf("sld1 = %d\n", sld1);
+  
+  return 0;
+}
diff --git a/final/test/FrontendC/2002-07-30-SubregSetAssertion.c b/final/test/FrontendC/2002-07-30-SubregSetAssertion.c
new file mode 100644
index 00000000000..af72eda6524
--- /dev/null
+++ b/final/test/FrontendC/2002-07-30-SubregSetAssertion.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+union X {
+  void *B;
+};
+
+union X foo() {
+  union X A;
+  A.B = (void*)123;
+  return A;
+}
diff --git a/final/test/FrontendC/2002-07-30-UnionTest.c b/final/test/FrontendC/2002-07-30-UnionTest.c
new file mode 100644
index 00000000000..c931b8024f0
--- /dev/null
+++ b/final/test/FrontendC/2002-07-30-UnionTest.c
@@ -0,0 +1,22 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+union X;
+struct Empty {};
+union F {};
+union Q { union Q *X; };
+union X {
+  char C;
+  int A, Z;
+  long long B;
+  void *b1;
+  struct { int A; long long Z; } Q;
+};
+
+union X foo(union X A) {
+  A.C = 123;
+  A.A = 39249;
+  //A.B = (void*)123040123321;
+  A.B = 12301230123123LL;
+  A.Z = 1;
+  return A;
+}
diff --git a/final/test/FrontendC/2002-07-30-VarArgsCallFailure.c b/final/test/FrontendC/2002-07-30-VarArgsCallFailure.c
new file mode 100644
index 00000000000..5d93947a727
--- /dev/null
+++ b/final/test/FrontendC/2002-07-30-VarArgsCallFailure.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+int tcount;
+void test(char *, const char*, int);
+void foo() {
+  char Buf[10];
+  test(Buf, "n%%%d", tcount++);
+}
diff --git a/final/test/FrontendC/2002-07-31-BadAssert.c b/final/test/FrontendC/2002-07-31-BadAssert.c
new file mode 100644
index 00000000000..5c3d74cfb6b
--- /dev/null
+++ b/final/test/FrontendC/2002-07-31-BadAssert.c
@@ -0,0 +1,16 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+typedef struct
+{
+        unsigned char type;        /* Indicates, NORMAL, SUBNORMAL, etc. */
+} InternalFPF;
+
+
+static void SetInternalFPFZero(InternalFPF *dest) {
+  dest->type=0;
+}
+
+void denormalize(InternalFPF *ptr) {
+   SetInternalFPFZero(ptr);
+}
+
diff --git a/final/test/FrontendC/2002-07-31-SubregFailure.c b/final/test/FrontendC/2002-07-31-SubregFailure.c
new file mode 100644
index 00000000000..72fcb496cb0
--- /dev/null
+++ b/final/test/FrontendC/2002-07-31-SubregFailure.c
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+typedef union {
+   long (*ap)[4];
+} ptrs;
+
+void DoAssignIteration() {
+  ptrs abase;
+  abase.ap+=27;
+  Assignment(*abase.ap);
+}
+
+
diff --git a/final/test/FrontendC/2002-08-02-UnionTest.c b/final/test/FrontendC/2002-08-02-UnionTest.c
new file mode 100644
index 00000000000..e2b8c3dd401
--- /dev/null
+++ b/final/test/FrontendC/2002-08-02-UnionTest.c
@@ -0,0 +1,19 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* In this testcase, the return value of foo() is being promotedto a register
+ * which breaks stuff
+ */
+#include <stdio.h>
+
+union X { char X; void *B; int a, b, c, d;};
+
+union X foo() {
+  union X Global;
+  Global.B = (void*)123;   /* Interesting part */
+  return Global;
+}
+
+int main() {
+  union X test = foo();
+  printf("0x%p", test.B);
+}
diff --git a/final/test/FrontendC/2002-08-19-RecursiveLocals.c b/final/test/FrontendC/2002-08-19-RecursiveLocals.c
new file mode 100644
index 00000000000..59220ac9b0d
--- /dev/null
+++ b/final/test/FrontendC/2002-08-19-RecursiveLocals.c
@@ -0,0 +1,18 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* This testcase doesn't actually test a bug, it's just the result of me 
+ * figuring out the syntax for forward declaring a static variable. */
+struct list {
+  int x;
+  struct list *Next;
+};
+
+static struct list B;  /* Forward declare static */
+static struct list A = { 7, &B };
+static struct list B = { 8, &A };
+
+extern struct list D;  /* forward declare normal var */
+
+struct list C = { 7, &D };
+struct list D = { 8, &C };
+
diff --git a/final/test/FrontendC/2002-09-08-PointerShifts.c b/final/test/FrontendC/2002-09-08-PointerShifts.c
new file mode 100644
index 00000000000..86ff2f98afc
--- /dev/null
+++ b/final/test/FrontendC/2002-09-08-PointerShifts.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+int foo(int *A, unsigned X) {
+  return A[X];
+}
diff --git a/final/test/FrontendC/2002-09-18-UnionProblem.c b/final/test/FrontendC/2002-09-18-UnionProblem.c
new file mode 100644
index 00000000000..54588f12142
--- /dev/null
+++ b/final/test/FrontendC/2002-09-18-UnionProblem.c
@@ -0,0 +1,26 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+struct DWstruct {
+  char high, low;
+};
+
+typedef union {
+  struct DWstruct s;
+  short ll;
+} DWunion;
+
+short __udivmodhi4 (char n1, char bm) {
+  DWunion rr;
+
+  if (bm == 0)
+    {
+      rr.s.high = n1;
+    }
+  else
+    {
+      rr.s.high = bm;
+    }
+
+  return rr.ll;
+}
diff --git a/final/test/FrontendC/2002-09-19-StarInLabel.c b/final/test/FrontendC/2002-09-19-StarInLabel.c
new file mode 100644
index 00000000000..171acca2f11
--- /dev/null
+++ b/final/test/FrontendC/2002-09-19-StarInLabel.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+extern void start() __asm__("start");
+extern void _start() __asm__("_start");
+extern void __start() __asm__("__start");
+void start() {}
+void _start() {}
+void __start() {}
+
diff --git a/final/test/FrontendC/2002-10-12-TooManyArguments.c b/final/test/FrontendC/2002-10-12-TooManyArguments.c
new file mode 100644
index 00000000000..73c267ad30d
--- /dev/null
+++ b/final/test/FrontendC/2002-10-12-TooManyArguments.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+void foo() {}
+
+void bar() {
+  foo(1, 2, 3);  /* Too many arguments passed */
+}
diff --git a/final/test/FrontendC/2002-12-15-GlobalBoolTest.c b/final/test/FrontendC/2002-12-15-GlobalBoolTest.c
new file mode 100644
index 00000000000..c27a23abc6e
--- /dev/null
+++ b/final/test/FrontendC/2002-12-15-GlobalBoolTest.c
@@ -0,0 +1,5 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+_Bool X = 0;
+
diff --git a/final/test/FrontendC/2002-12-15-GlobalConstantTest.c b/final/test/FrontendC/2002-12-15-GlobalConstantTest.c
new file mode 100644
index 00000000000..26de48fbb77
--- /dev/null
+++ b/final/test/FrontendC/2002-12-15-GlobalConstantTest.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+const char *W = "foo";
+const int X = 7;
+int Y = 8;
+const char * const Z = "bar";
+
diff --git a/final/test/FrontendC/2002-12-15-GlobalRedefinition.c b/final/test/FrontendC/2002-12-15-GlobalRedefinition.c
new file mode 100644
index 00000000000..3b76953b094
--- /dev/null
+++ b/final/test/FrontendC/2002-12-15-GlobalRedefinition.c
@@ -0,0 +1,5 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+extern char algbrfile[9];
+char algbrfile[9] = "abcdefgh";
+
diff --git a/final/test/FrontendC/2002-12-15-StructParameters.c b/final/test/FrontendC/2002-12-15-StructParameters.c
new file mode 100644
index 00000000000..90ab1ff4404
--- /dev/null
+++ b/final/test/FrontendC/2002-12-15-StructParameters.c
@@ -0,0 +1,18 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+typedef struct
+{
+  void *stack;
+  unsigned size;
+  unsigned avail;
+} compile_stack_type;
+
+void foo(void*);
+void bar(compile_stack_type T, unsigned);
+
+void test() {
+  compile_stack_type CST;
+  foo(&CST);
+
+  bar(CST, 12);
+}
diff --git a/final/test/FrontendC/2003-01-30-UnionInit.c b/final/test/FrontendC/2003-01-30-UnionInit.c
new file mode 100644
index 00000000000..576958442ae
--- /dev/null
+++ b/final/test/FrontendC/2003-01-30-UnionInit.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -S %s -o /dev/null
+
+union foo {
+  struct { char A, B; } X;
+  int C;
+};
+
+union foo V = { {1, 2} };
diff --git a/final/test/FrontendC/2003-03-03-DeferredType.c b/final/test/FrontendC/2003-03-03-DeferredType.c
new file mode 100644
index 00000000000..9e60df6f6a0
--- /dev/null
+++ b/final/test/FrontendC/2003-03-03-DeferredType.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+
+
+struct foo A;
+
+struct foo {
+  int x;
+double D;
+};
+
diff --git a/final/test/FrontendC/2003-06-22-UnionCrash.c b/final/test/FrontendC/2003-06-22-UnionCrash.c
new file mode 100644
index 00000000000..54d8dc6dda9
--- /dev/null
+++ b/final/test/FrontendC/2003-06-22-UnionCrash.c
@@ -0,0 +1,13 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+struct Blend_Map_Entry {
+  union {
+   float Colour[5];
+   double Point_Slope[2];
+  } Vals;
+};
+
+void test(struct Blend_Map_Entry* Foo)
+{
+}
+
diff --git a/final/test/FrontendC/2003-06-23-GCC-fold-infinite-recursion.c b/final/test/FrontendC/2003-06-23-GCC-fold-infinite-recursion.c
new file mode 100644
index 00000000000..80562c8849b
--- /dev/null
+++ b/final/test/FrontendC/2003-06-23-GCC-fold-infinite-recursion.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+double Test(double A, double B, double C, double D) {
+  return -(A-B) - (C-D);
+}
+
diff --git a/final/test/FrontendC/2003-06-26-CFECrash.c b/final/test/FrontendC/2003-06-26-CFECrash.c
new file mode 100644
index 00000000000..10a7ed44458
--- /dev/null
+++ b/final/test/FrontendC/2003-06-26-CFECrash.c
@@ -0,0 +1,19 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+typedef struct min_info {
+  long offset;
+  unsigned file_attr;
+} min_info;
+
+typedef struct Globals {
+  char answerbuf;
+  min_info info[1];
+  min_info *pInfo;
+} Uz_Globs;
+
+extern Uz_Globs G;
+
+int extract_or_test_files() {  
+  G.pInfo = G.info;
+}
+
diff --git a/final/test/FrontendC/2003-06-29-MultipleFunctionDefinition.c b/final/test/FrontendC/2003-06-29-MultipleFunctionDefinition.c
new file mode 100644
index 00000000000..be042cedf9f
--- /dev/null
+++ b/final/test/FrontendC/2003-06-29-MultipleFunctionDefinition.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+/* This is apparently legal C.  
+ */
+extern __inline__ void test() { }
+
+void test() {
+}
diff --git a/final/test/FrontendC/2003-07-22-ArrayAccessTypeSafety.c b/final/test/FrontendC/2003-07-22-ArrayAccessTypeSafety.c
new file mode 100644
index 00000000000..51e66c9b839
--- /dev/null
+++ b/final/test/FrontendC/2003-07-22-ArrayAccessTypeSafety.c
@@ -0,0 +1,7 @@
+/* RUN: %llvmgcc -xc %s -S -o - | grep -v alloca | not grep bitcast
+ */
+
+void test(int* array, long long N) {
+    array[N] = N[array] = 33;
+}
+
diff --git a/final/test/FrontendC/2003-08-06-BuiltinSetjmpLongjmp.c b/final/test/FrontendC/2003-08-06-BuiltinSetjmpLongjmp.c
new file mode 100644
index 00000000000..12b4f7b9332
--- /dev/null
+++ b/final/test/FrontendC/2003-08-06-BuiltinSetjmpLongjmp.c
@@ -0,0 +1,14 @@
+/* RUN: %llvmgcc -xc %s -S -o - | not grep __builtin_
+ *
+ * __builtin_longjmp/setjmp should get transformed into llvm.setjmp/longjmp 
+ * just like explicit setjmp/longjmp calls are.
+ */
+
+void jumpaway(int *ptr) {
+  __builtin_longjmp(ptr,1);
+}
+    
+int main(void) {
+  __builtin_setjmp(0);
+  jumpaway(0);
+}
diff --git a/final/test/FrontendC/2003-08-17-DeadCodeShortCircuit.c b/final/test/FrontendC/2003-08-17-DeadCodeShortCircuit.c
new file mode 100644
index 00000000000..9ae633ee081
--- /dev/null
+++ b/final/test/FrontendC/2003-08-17-DeadCodeShortCircuit.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -xc %s -S -o %t.o
+
+int test(_Bool pos, _Bool color) {
+  return 0;
+  return (pos && color);
+}
+
diff --git a/final/test/FrontendC/2003-08-18-SigSetJmp.c b/final/test/FrontendC/2003-08-18-SigSetJmp.c
new file mode 100644
index 00000000000..fc0d7659de6
--- /dev/null
+++ b/final/test/FrontendC/2003-08-18-SigSetJmp.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+#include <setjmp.h>
+
+sigjmp_buf B;
+int foo() {
+  sigsetjmp(B, 1);
+  bar();
+}
diff --git a/final/test/FrontendC/2003-08-18-StructAsValue.c b/final/test/FrontendC/2003-08-18-StructAsValue.c
new file mode 100644
index 00000000000..26cb78a4d24
--- /dev/null
+++ b/final/test/FrontendC/2003-08-18-StructAsValue.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+typedef struct {
+  int op;
+} event_t;
+
+event_t test(int X) {
+  event_t foo = { 1 }, bar = { 2 };
+  return X ? foo : bar;
+}
diff --git a/final/test/FrontendC/2003-08-20-BadBitfieldRef.c b/final/test/FrontendC/2003-08-20-BadBitfieldRef.c
new file mode 100644
index 00000000000..ef54d8ad9c0
--- /dev/null
+++ b/final/test/FrontendC/2003-08-20-BadBitfieldRef.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+void foo()
+{
+  char *ap;
+  ap[1] == '-' && ap[2] == 0;
+}
+
diff --git a/final/test/FrontendC/2003-08-20-PrototypeMismatch.c b/final/test/FrontendC/2003-08-20-PrototypeMismatch.c
new file mode 100644
index 00000000000..85c89f694c5
--- /dev/null
+++ b/final/test/FrontendC/2003-08-20-PrototypeMismatch.c
@@ -0,0 +1,15 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+
+static int foo(int);
+
+static int foo(C)
+char C;
+{
+  return C;
+}
+
+void test() {
+  foo(7);
+}
diff --git a/final/test/FrontendC/2003-08-20-vfork-bug.c b/final/test/FrontendC/2003-08-20-vfork-bug.c
new file mode 100644
index 00000000000..cfe316162ad
--- /dev/null
+++ b/final/test/FrontendC/2003-08-20-vfork-bug.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+extern int vfork(void);
+test() {
+  vfork();
+}
diff --git a/final/test/FrontendC/2003-08-21-BinOp-Type-Mismatch.c b/final/test/FrontendC/2003-08-21-BinOp-Type-Mismatch.c
new file mode 100644
index 00000000000..a1d4574dcdb
--- /dev/null
+++ b/final/test/FrontendC/2003-08-21-BinOp-Type-Mismatch.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+struct bar;
+
+void foo()
+{
+  unsigned int frame, focus;
+  (struct bar *) focus == (focus ? ((struct bar *) frame) : 0);
+}
+
diff --git a/final/test/FrontendC/2003-08-21-StmtExpr.c b/final/test/FrontendC/2003-08-21-StmtExpr.c
new file mode 100644
index 00000000000..7f7d22ea9d7
--- /dev/null
+++ b/final/test/FrontendC/2003-08-21-StmtExpr.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+typedef struct {
+  unsigned long val;
+} structty;
+
+void bar(structty new_mask);
+static void foo() {
+  bar(({ structty mask; mask; }));
+}
+
diff --git a/final/test/FrontendC/2003-08-21-WideString.c b/final/test/FrontendC/2003-08-21-WideString.c
new file mode 100644
index 00000000000..bf67a21896b
--- /dev/null
+++ b/final/test/FrontendC/2003-08-21-WideString.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+#include <wchar.h>
+
+struct {
+  wchar_t *name;
+} syms = { L"NUL" };
diff --git a/final/test/FrontendC/2003-08-23-LocalUnionTest.c b/final/test/FrontendC/2003-08-23-LocalUnionTest.c
new file mode 100644
index 00000000000..987accca1cc
--- /dev/null
+++ b/final/test/FrontendC/2003-08-23-LocalUnionTest.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+
+union foo { int X; };
+
+int test(union foo* F) {
+  {
+    union foo { float X; } A;
+  }
+}
diff --git a/final/test/FrontendC/2003-08-29-BitFieldStruct.c b/final/test/FrontendC/2003-08-29-BitFieldStruct.c
new file mode 100644
index 00000000000..57273cd8639
--- /dev/null
+++ b/final/test/FrontendC/2003-08-29-BitFieldStruct.c
@@ -0,0 +1,13 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+struct Word {
+  short bar;
+  short baz;
+  int final:1;
+  short quux;
+} *word_limit;
+
+void foo ()
+{
+  word_limit->final = (word_limit->final && word_limit->final);
+}
diff --git a/final/test/FrontendC/2003-08-29-HugeCharConst.c b/final/test/FrontendC/2003-08-29-HugeCharConst.c
new file mode 100644
index 00000000000..236eb2e2748
--- /dev/null
+++ b/final/test/FrontendC/2003-08-29-HugeCharConst.c
@@ -0,0 +1,5 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+void foo() {
+  unsigned char int_latin1[] = "f\200\372b\200\343\200\340";
+}
diff --git a/final/test/FrontendC/2003-08-29-StructLayoutBug.c b/final/test/FrontendC/2003-08-29-StructLayoutBug.c
new file mode 100644
index 00000000000..16731945b77
--- /dev/null
+++ b/final/test/FrontendC/2003-08-29-StructLayoutBug.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+struct foo {
+  unsigned int I:1;
+  unsigned char J[1];
+  unsigned int K:1;
+ };
+
+void test(struct foo *X) {}
+
diff --git a/final/test/FrontendC/2003-08-30-AggregateInitializer.c b/final/test/FrontendC/2003-08-30-AggregateInitializer.c
new file mode 100644
index 00000000000..58c77b6aa04
--- /dev/null
+++ b/final/test/FrontendC/2003-08-30-AggregateInitializer.c
@@ -0,0 +1,16 @@
+// RUN: %llvmgcc -S %s -o /dev/null
+
+struct istruct {
+  unsigned char C;
+};
+
+struct foo {
+  unsigned int I:1;
+  struct istruct J;
+  unsigned char L[1];
+  unsigned int K:1;
+};
+
+struct foo F = { 1, { 7 }, { 123 } , 1 };
+
+
diff --git a/final/test/FrontendC/2003-08-30-LargeIntegerBitfieldMember.c b/final/test/FrontendC/2003-08-30-LargeIntegerBitfieldMember.c
new file mode 100644
index 00000000000..e1ca88cdc6f
--- /dev/null
+++ b/final/test/FrontendC/2003-08-30-LargeIntegerBitfieldMember.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+struct foo {
+  unsigned int I:1;
+  unsigned char J[1][123];
+  unsigned int K:1;
+ };
+
+struct foo F;
diff --git a/final/test/FrontendC/2003-09-18-BitfieldTests.c b/final/test/FrontendC/2003-09-18-BitfieldTests.c
new file mode 100644
index 00000000000..2d74cb401dc
--- /dev/null
+++ b/final/test/FrontendC/2003-09-18-BitfieldTests.c
@@ -0,0 +1,30 @@
+// RUN: %llvmgcc -w -S %s -o - | llvm-as -o /dev/null
+
+
+typedef struct BF {
+  int A : 1;
+  char B;
+  int C : 13;
+} BF;
+
+char *test1(BF *b) {
+  return &b->B;        // Must be able to address non-bitfield
+}
+
+void test2(BF *b) {    // Increment and decrement operators
+  b->A++;
+  --b->C;
+}
+
+void test3(BF *b) {
+   b->C = 12345;        // Store
+}
+
+int test4(BF *b) {
+  return b->C;         // Load
+}
+
+void test5(BF *b, int i) { // array ref
+  b[i].C = 12345;
+}
+
diff --git a/final/test/FrontendC/2003-09-30-StructLayout.c b/final/test/FrontendC/2003-09-30-StructLayout.c
new file mode 100644
index 00000000000..177d1f49b2f
--- /dev/null
+++ b/final/test/FrontendC/2003-09-30-StructLayout.c
@@ -0,0 +1,18 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+enum En {
+  ENUM_VAL
+};
+
+struct St {
+  unsigned char A;
+  enum En B;
+  unsigned char C;
+  enum En D;
+  float E;
+};
+
+
+void func(struct St* A) {
+  A->D = ENUM_VAL;
+}
diff --git a/final/test/FrontendC/2003-10-02-UnionLValueError.c b/final/test/FrontendC/2003-10-02-UnionLValueError.c
new file mode 100644
index 00000000000..a4d17a4a0ba
--- /dev/null
+++ b/final/test/FrontendC/2003-10-02-UnionLValueError.c
@@ -0,0 +1,13 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+#include <stdio.h>
+
+union U{
+  int i[8];
+  char s[80];
+};
+
+void format_message(char *buffer, union U *u) {
+  sprintf(buffer, u->s);
+}
+
diff --git a/final/test/FrontendC/2003-10-06-NegateExprType.c b/final/test/FrontendC/2003-10-06-NegateExprType.c
new file mode 100644
index 00000000000..fb8329b344b
--- /dev/null
+++ b/final/test/FrontendC/2003-10-06-NegateExprType.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+extern int A[10];
+void Func(int *B) { 
+  B - &A[5]; 
+}
+
diff --git a/final/test/FrontendC/2003-10-09-UnionInitializerBug.c b/final/test/FrontendC/2003-10-09-UnionInitializerBug.c
new file mode 100644
index 00000000000..57e113a7cc2
--- /dev/null
+++ b/final/test/FrontendC/2003-10-09-UnionInitializerBug.c
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+struct Foo {
+    unsigned a;
+    unsigned b;
+    unsigned c;
+};
+
+struct Bar {
+    union {
+        void **a;
+        struct Foo b;
+    }u;
+};
+
+struct Bar test = {0};
+
diff --git a/final/test/FrontendC/2003-10-28-ident.c b/final/test/FrontendC/2003-10-28-ident.c
new file mode 100644
index 00000000000..06cacf87a90
--- /dev/null
+++ b/final/test/FrontendC/2003-10-28-ident.c
@@ -0,0 +1,4 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+#ident "foo"
diff --git a/final/test/FrontendC/2003-10-29-AsmRename.c b/final/test/FrontendC/2003-10-29-AsmRename.c
new file mode 100644
index 00000000000..d07ccf7fd2c
--- /dev/null
+++ b/final/test/FrontendC/2003-10-29-AsmRename.c
@@ -0,0 +1,22 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+struct foo { int X; };
+struct bar { int Y; };
+
+extern int Func(struct foo*) __asm__("Func64");
+extern int Func64(struct bar*);
+
+int Func(struct foo *F) {
+  return 1;
+}
+
+int Func64(struct bar* B) {
+  return 0;
+}
+
+
+int test() {
+  Func(0);    /* should be renamed to call Func64 */
+  Func64(0);
+}
diff --git a/final/test/FrontendC/2003-11-01-C99-CompoundLiteral.c b/final/test/FrontendC/2003-11-01-C99-CompoundLiteral.c
new file mode 100644
index 00000000000..2912c97c546
--- /dev/null
+++ b/final/test/FrontendC/2003-11-01-C99-CompoundLiteral.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+typedef struct { int foo; } spinlock_t;
+typedef struct wait_queue_head_t { spinlock_t lock; } wait_queue_head_t;
+void call_usermodehelper(void) { 
+  struct wait_queue_head_t work = { lock: (spinlock_t) { 0 }, }; 
+}
+
diff --git a/final/test/FrontendC/2003-11-01-EmptyStructCrash.c b/final/test/FrontendC/2003-11-01-EmptyStructCrash.c
new file mode 100644
index 00000000000..c1161195daf
--- /dev/null
+++ b/final/test/FrontendC/2003-11-01-EmptyStructCrash.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+typedef struct { } the_coolest_struct_in_the_world;
+extern the_coolest_struct_in_the_world xyzzy;
+void *foo() { return &xyzzy; }
+
diff --git a/final/test/FrontendC/2003-11-01-GlobalUnionInit.c b/final/test/FrontendC/2003-11-01-GlobalUnionInit.c
new file mode 100644
index 00000000000..7cd707348ca
--- /dev/null
+++ b/final/test/FrontendC/2003-11-01-GlobalUnionInit.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+union bdflush_param {
+    struct { int x; } b_un;
+    int y[1];
+} bdf_prm = {{30}};
+
diff --git a/final/test/FrontendC/2003-11-03-AddrArrayElement.c b/final/test/FrontendC/2003-11-03-AddrArrayElement.c
new file mode 100644
index 00000000000..4337da7d1e4
--- /dev/null
+++ b/final/test/FrontendC/2003-11-03-AddrArrayElement.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc -xc %s -S -o - | grep getelementptr
+
+// This should be turned into a tasty getelementptr instruction, not a nasty
+// series of casts and address arithmetic.
+
+char Global[100];
+
+char *test1(unsigned i) {
+  return &Global[i];
+}
+
diff --git a/final/test/FrontendC/2003-11-04-EmptyStruct.c b/final/test/FrontendC/2003-11-04-EmptyStruct.c
new file mode 100644
index 00000000000..b4f37befffa
--- /dev/null
+++ b/final/test/FrontendC/2003-11-04-EmptyStruct.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+typedef struct { } rwlock_t;
+struct fs_struct { rwlock_t lock; int umask; };
+void __copy_fs_struct(struct fs_struct *fs) { fs->lock = (rwlock_t) { }; }
+
diff --git a/final/test/FrontendC/2003-11-04-OutOfMemory.c b/final/test/FrontendC/2003-11-04-OutOfMemory.c
new file mode 100644
index 00000000000..40cb6c2e21e
--- /dev/null
+++ b/final/test/FrontendC/2003-11-04-OutOfMemory.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+void schedule_timeout(signed long timeout)
+{
+ switch (timeout)
+ {
+ case ((long)(~0UL>>1)): break;
+ }
+}
diff --git a/final/test/FrontendC/2003-11-08-PointerSubNotGetelementptr.c b/final/test/FrontendC/2003-11-08-PointerSubNotGetelementptr.c
new file mode 100644
index 00000000000..58f9f82e154
--- /dev/null
+++ b/final/test/FrontendC/2003-11-08-PointerSubNotGetelementptr.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc -xc %s -S -o - | grep getelementptr
+
+char *test(char* C) {
+  return C-1;   // Should turn into a GEP
+}
+
+int *test2(int* I) {
+  return I-1;
+}
diff --git a/final/test/FrontendC/2003-11-12-VoidString.c b/final/test/FrontendC/2003-11-12-VoidString.c
new file mode 100644
index 00000000000..5770b3661a9
--- /dev/null
+++ b/final/test/FrontendC/2003-11-12-VoidString.c
@@ -0,0 +1,4 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+void query_newnamebuf(void) { ((void)"query_newnamebuf"); }
+
diff --git a/final/test/FrontendC/2003-11-13-TypeSafety.c b/final/test/FrontendC/2003-11-13-TypeSafety.c
new file mode 100644
index 00000000000..9b76bb11c98
--- /dev/null
+++ b/final/test/FrontendC/2003-11-13-TypeSafety.c
@@ -0,0 +1,5 @@
+// RUN: %llvmgcc -xc %s -S -o - | grep getelementptr
+
+int *test(int *X, int Y) {
+  return X + Y;
+}
diff --git a/final/test/FrontendC/2003-11-16-StaticArrayInit.c b/final/test/FrontendC/2003-11-16-StaticArrayInit.c
new file mode 100644
index 00000000000..eb83b3ad0c6
--- /dev/null
+++ b/final/test/FrontendC/2003-11-16-StaticArrayInit.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+void bar () {
+ static char x[10];
+ static char *xend = x + 10;
+}
+
+
diff --git a/final/test/FrontendC/2003-11-18-CondExprLValue.c b/final/test/FrontendC/2003-11-18-CondExprLValue.c
new file mode 100644
index 00000000000..68ee622c641
--- /dev/null
+++ b/final/test/FrontendC/2003-11-18-CondExprLValue.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+typedef struct { unsigned long pgprot; } pgprot_t;
+
+void split_large_page(unsigned long addr, pgprot_t prot)
+{
+  (addr ? prot : ((pgprot_t) { 0x001 } )).pgprot;
+}
+
diff --git a/final/test/FrontendC/2003-11-19-AddressOfRegister.c b/final/test/FrontendC/2003-11-19-AddressOfRegister.c
new file mode 100644
index 00000000000..69dc54d9289
--- /dev/null
+++ b/final/test/FrontendC/2003-11-19-AddressOfRegister.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc -xc %s -S -o /dev/null |& not grep warning
+
+struct item {
+  short delta[4];
+};
+
+int TEST(int nt) {
+ register struct item *aa;
+ aa[nt].delta;
+ return 1;
+}
+
diff --git a/final/test/FrontendC/2003-11-19-BitFieldArray.c b/final/test/FrontendC/2003-11-19-BitFieldArray.c
new file mode 100644
index 00000000000..250268a3b85
--- /dev/null
+++ b/final/test/FrontendC/2003-11-19-BitFieldArray.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+struct _GIOChannel {
+  int write_buf;
+  char partial_write_buf[6];
+  int d :1;
+};
+
+void g_io_channel_init (struct _GIOChannel *channel) {
+  channel->partial_write_buf[0];
+}
+
diff --git a/final/test/FrontendC/2003-11-20-Bitfields.c b/final/test/FrontendC/2003-11-20-Bitfields.c
new file mode 100644
index 00000000000..4be9942ccf3
--- /dev/null
+++ b/final/test/FrontendC/2003-11-20-Bitfields.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+struct face_cachel {
+  unsigned int reverse :1;
+  unsigned char font_specified[1];
+};
+
+void
+ensure_face_cachel_contains_charset (struct face_cachel *cachel) {
+  cachel->font_specified[0] = 0;
+}
+
diff --git a/final/test/FrontendC/2003-11-20-ComplexDivision.c b/final/test/FrontendC/2003-11-20-ComplexDivision.c
new file mode 100644
index 00000000000..172de8c0e19
--- /dev/null
+++ b/final/test/FrontendC/2003-11-20-ComplexDivision.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+int test() {
+  __complex__ double C;
+  double D;
+  C / D;
+}
diff --git a/final/test/FrontendC/2003-11-20-UnionBitfield.c b/final/test/FrontendC/2003-11-20-UnionBitfield.c
new file mode 100644
index 00000000000..f999c207772
--- /dev/null
+++ b/final/test/FrontendC/2003-11-20-UnionBitfield.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+struct printf_spec {
+  unsigned int minus_flag:1;
+  char converter;
+};
+
+void parse_doprnt_spec () {
+  struct printf_spec spec;
+  spec.minus_flag = 1;
+}
+
diff --git a/final/test/FrontendC/2003-11-26-PointerShift.c b/final/test/FrontendC/2003-11-26-PointerShift.c
new file mode 100644
index 00000000000..6b5205a6e7e
--- /dev/null
+++ b/final/test/FrontendC/2003-11-26-PointerShift.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+unsigned long do_csum(const unsigned char *buff, int len, unsigned long result) {
+  if (2 & (unsigned long) buff) result += 1;
+  return result;
+}
diff --git a/final/test/FrontendC/2003-11-27-ConstructorCast.c b/final/test/FrontendC/2003-11-27-ConstructorCast.c
new file mode 100644
index 00000000000..15eb7694795
--- /dev/null
+++ b/final/test/FrontendC/2003-11-27-ConstructorCast.c
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+struct i387_soft_struct {
+  long cwd;
+};
+union i387_union {
+  struct i387_soft_struct soft;
+};
+struct thread_struct {
+  union i387_union i387;
+};
+void _init_task_union(void) {
+   struct thread_struct thread = (struct thread_struct) { {{0}} };
+}
diff --git a/final/test/FrontendC/2003-11-27-UnionCtorInitialization.c b/final/test/FrontendC/2003-11-27-UnionCtorInitialization.c
new file mode 100644
index 00000000000..e3ae1e96a68
--- /dev/null
+++ b/final/test/FrontendC/2003-11-27-UnionCtorInitialization.c
@@ -0,0 +1,16 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+struct i387_soft_struct {
+ long cwd;
+ long twd;
+ long fip;
+};
+union i387_union {
+ struct i387_soft_struct soft;
+};
+struct thread_struct {
+ union i387_union i387;
+};
+void _init_task_union(void) {
+  struct thread_struct thread = (struct thread_struct) { {{0}} };
+}
diff --git a/final/test/FrontendC/2003-12-14-ExternInlineSupport.c b/final/test/FrontendC/2003-12-14-ExternInlineSupport.c
new file mode 100644
index 00000000000..a45eb98dca2
--- /dev/null
+++ b/final/test/FrontendC/2003-12-14-ExternInlineSupport.c
@@ -0,0 +1,3 @@
+// RUN: %llvmgcc -xc %s -S -o - | not grep dead_function
+
+extern __inline__ void dead_function() {}
diff --git a/final/test/FrontendC/2004-01-01-UnknownInitSize.c b/final/test/FrontendC/2004-01-01-UnknownInitSize.c
new file mode 100644
index 00000000000..b26b6cd8756
--- /dev/null
+++ b/final/test/FrontendC/2004-01-01-UnknownInitSize.c
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc -S %s -o /dev/null
+
+/*
+ * This regression test ensures that the C front end can compile initializers
+ * even when it cannot determine the size (as below).
+*/
+struct one
+{
+  int a;
+  int values [];
+};
+
+struct one hobbit = {5, {1, 2, 3}};
+
diff --git a/final/test/FrontendC/2004-01-08-ExternInlineRedefine.c b/final/test/FrontendC/2004-01-08-ExternInlineRedefine.c
new file mode 100644
index 00000000000..4366b9b5659
--- /dev/null
+++ b/final/test/FrontendC/2004-01-08-ExternInlineRedefine.c
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+extern __inline long int
+__strtol_l (int a)
+{
+  return 0;
+}
+
+long int
+__strtol_l (int a)
+{
+  return 0;
+}
diff --git a/final/test/FrontendC/2004-02-12-LargeAggregateCopy.c b/final/test/FrontendC/2004-02-12-LargeAggregateCopy.c
new file mode 100644
index 00000000000..93b7fe44bf6
--- /dev/null
+++ b/final/test/FrontendC/2004-02-12-LargeAggregateCopy.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -xc %s -S -o - | grep llvm.memcpy
+
+struct X { int V[10000]; };
+struct X Global1, Global2;
+void test() {
+  Global2 = Global1;
+}
+
diff --git a/final/test/FrontendC/2004-02-13-BuiltinFrameReturnAddress.c b/final/test/FrontendC/2004-02-13-BuiltinFrameReturnAddress.c
new file mode 100644
index 00000000000..f115b5a5f01
--- /dev/null
+++ b/final/test/FrontendC/2004-02-13-BuiltinFrameReturnAddress.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -xc %s -S -o - | grep llvm.*address | count 4
+
+void *test1() {
+  return __builtin_return_address(1);
+}
+void *test2() {
+  return __builtin_frame_address(0);
+}
diff --git a/final/test/FrontendC/2004-02-13-IllegalVararg.c b/final/test/FrontendC/2004-02-13-IllegalVararg.c
new file mode 100644
index 00000000000..0d003c8033c
--- /dev/null
+++ b/final/test/FrontendC/2004-02-13-IllegalVararg.c
@@ -0,0 +1,13 @@
+// RUN: %llvmgcc -xc %s -w -S -o - | llc
+// XFAIL: *
+// See PR2452
+
+#include <stdarg.h>
+
+float test(int X, ...) {
+  va_list ap;
+  float F;
+  va_start(ap, X);
+  F = va_arg(ap, float);
+  return F;
+}
diff --git a/final/test/FrontendC/2004-02-13-Memset.c b/final/test/FrontendC/2004-02-13-Memset.c
new file mode 100644
index 00000000000..fb6ed2352ea
--- /dev/null
+++ b/final/test/FrontendC/2004-02-13-Memset.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc -xc %s -S -o - | grep llvm.memset | count 3
+
+void *memset(void*, int, long);
+void bzero(void*, long);
+
+void test(int* X, char *Y) {
+  memset(X, 4, 1000);
+  bzero(Y, 100);
+}
diff --git a/final/test/FrontendC/2004-02-14-ZeroInitializer.c b/final/test/FrontendC/2004-02-14-ZeroInitializer.c
new file mode 100644
index 00000000000..bede9078741
--- /dev/null
+++ b/final/test/FrontendC/2004-02-14-ZeroInitializer.c
@@ -0,0 +1,4 @@
+// RUN: %llvmgcc -xc %s -S -o - | grep zeroinitializer
+
+int X[1000];
+
diff --git a/final/test/FrontendC/2004-02-20-Builtins.c b/final/test/FrontendC/2004-02-20-Builtins.c
new file mode 100644
index 00000000000..c056a8405f7
--- /dev/null
+++ b/final/test/FrontendC/2004-02-20-Builtins.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -O3 -xc %s -S -o - | not grep builtin
+
+#include <math.h>
+
+void zsqrtxxx(float num) {
+   num = sqrt(num);
+}
+
diff --git a/final/test/FrontendC/2004-03-07-ComplexDivEquals.c b/final/test/FrontendC/2004-03-07-ComplexDivEquals.c
new file mode 100644
index 00000000000..c6c805a7b32
--- /dev/null
+++ b/final/test/FrontendC/2004-03-07-ComplexDivEquals.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+void test(__complex__ double D, double X) {
+  D /= X;
+}
diff --git a/final/test/FrontendC/2004-03-07-ExternalConstant.c b/final/test/FrontendC/2004-03-07-ExternalConstant.c
new file mode 100644
index 00000000000..4a9094bdf34
--- /dev/null
+++ b/final/test/FrontendC/2004-03-07-ExternalConstant.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -xc %s -S -o - | grep constant
+
+extern const int a[];   // 'a' should be marked constant even though it's external!
+int foo () {
+  return a[0];
+}
+
diff --git a/final/test/FrontendC/2004-03-09-LargeArrayInitializers.c b/final/test/FrontendC/2004-03-09-LargeArrayInitializers.c
new file mode 100644
index 00000000000..265206fabb6
--- /dev/null
+++ b/final/test/FrontendC/2004-03-09-LargeArrayInitializers.c
@@ -0,0 +1,32 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+// Test that these initializers are handled efficiently
+
+int test(int x) {
+  const int XX[1000] = { 0, 0 };
+  const char S [1000] = "foo";
+
+  const int array[] = {
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+     17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 17, 23, 123, 123, 49, 
+   };
+   return array[x];
+} 
diff --git a/final/test/FrontendC/2004-03-15-SimpleIndirectGoto.c b/final/test/FrontendC/2004-03-15-SimpleIndirectGoto.c
new file mode 100644
index 00000000000..a3f27b2a330
--- /dev/null
+++ b/final/test/FrontendC/2004-03-15-SimpleIndirectGoto.c
@@ -0,0 +1,23 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+int code[]={0,0,0,0,1};
+void foo(int x) {
+  volatile int b;
+  b = 0xffffffff;
+}
+void bar(int *pc) {
+  static const void *l[] = {&&lab0, &&end};
+
+  foo(0);
+  goto *l[*pc];
+ lab0:
+  foo(0);
+  pc++;
+  goto *l[*pc];
+ end:
+  return;
+}
+int main() {
+  bar(code);
+  return 0;
+}
diff --git a/final/test/FrontendC/2004-03-16-AsmRegisterCrash.c b/final/test/FrontendC/2004-03-16-AsmRegisterCrash.c
new file mode 100644
index 00000000000..f13368c2562
--- /dev/null
+++ b/final/test/FrontendC/2004-03-16-AsmRegisterCrash.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+int foo() {
+#ifdef __ppc__
+  register int X __asm__("r1");
+#else
+  register int X __asm__("ebx");
+#endif
+  return X;
+}
diff --git a/final/test/FrontendC/2004-05-07-VarArrays.c b/final/test/FrontendC/2004-05-07-VarArrays.c
new file mode 100644
index 00000000000..3a39c4fe63a
--- /dev/null
+++ b/final/test/FrontendC/2004-05-07-VarArrays.c
@@ -0,0 +1,5 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+int foo(int len, char arr[][len], int X) {
+  return arr[X][0];
+}
diff --git a/final/test/FrontendC/2004-05-21-IncompleteEnum.c b/final/test/FrontendC/2004-05-21-IncompleteEnum.c
new file mode 100644
index 00000000000..958a8d1c0ea
--- /dev/null
+++ b/final/test/FrontendC/2004-05-21-IncompleteEnum.c
@@ -0,0 +1,5 @@
+// RUN: %llvmgcc -w -S %s -o - | llvm-as -o /dev/null
+
+void test(enum foo *X) {
+}
+
diff --git a/final/test/FrontendC/2004-06-08-OpaqueStructArg.c b/final/test/FrontendC/2004-06-08-OpaqueStructArg.c
new file mode 100644
index 00000000000..5dfdd83c9e2
--- /dev/null
+++ b/final/test/FrontendC/2004-06-08-OpaqueStructArg.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+   struct fu;
+   void foo(struct fu);
+   void bar() {
+      foo;
+   }
diff --git a/final/test/FrontendC/2004-06-17-UnorderedBuiltins.c b/final/test/FrontendC/2004-06-17-UnorderedBuiltins.c
new file mode 100644
index 00000000000..02780f0f057
--- /dev/null
+++ b/final/test/FrontendC/2004-06-17-UnorderedBuiltins.c
@@ -0,0 +1,24 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+_Bool A, B, C, D, E, F, G, H;
+void TestF(float X, float Y) {
+  A = __builtin_isgreater(X, Y);
+  B = __builtin_isgreaterequal(X, Y);
+  C = __builtin_isless(X, Y);
+  D = __builtin_islessequal(X, Y);
+  E = __builtin_islessgreater(X, Y);
+  F = __builtin_isunordered(X, Y);
+  //G = __builtin_isordered(X, Y);    // Our current snapshot of GCC doesn't include this builtin
+  H = __builtin_isunordered(X, Y);
+}
+void TestD(double X, double Y) {
+  A = __builtin_isgreater(X, Y);
+  B = __builtin_isgreaterequal(X, Y);
+  C = __builtin_isless(X, Y);
+  D = __builtin_islessequal(X, Y);
+  E = __builtin_islessgreater(X, Y);
+  F = __builtin_isunordered(X, Y);
+  //G = __builtin_isordered(X, Y);    // Our current snapshot doesn't include this builtin.  FIXME
+  H = __builtin_isunordered(X, Y);
+}
diff --git a/final/test/FrontendC/2004-06-17-UnorderedCompares.c b/final/test/FrontendC/2004-06-17-UnorderedCompares.c
new file mode 100644
index 00000000000..286e7bc7cf7
--- /dev/null
+++ b/final/test/FrontendC/2004-06-17-UnorderedCompares.c
@@ -0,0 +1,21 @@
+// RUN: %llvmgcc -xc -std=c99 %s -S -o - | grep -v llvm.isunordered | not grep call
+
+#include <math.h>
+
+_Bool A, B, C, D, E, F;
+void TestF(float X, float Y) {
+  A = __builtin_isgreater(X, Y);
+  B = __builtin_isgreaterequal(X, Y);
+  C = __builtin_isless(X, Y);
+  D = __builtin_islessequal(X, Y);
+  E = __builtin_islessgreater(X, Y);
+  F = __builtin_isunordered(X, Y);
+}
+void TestD(double X, double Y) {
+  A = __builtin_isgreater(X, Y);
+  B = __builtin_isgreaterequal(X, Y);
+  C = __builtin_isless(X, Y);
+  D = __builtin_islessequal(X, Y);
+  E = __builtin_islessgreater(X, Y);
+  F = __builtin_isunordered(X, Y);
+}
diff --git a/final/test/FrontendC/2004-06-18-VariableLengthArrayOfStructures.c b/final/test/FrontendC/2004-06-18-VariableLengthArrayOfStructures.c
new file mode 100644
index 00000000000..3e450a4b936
--- /dev/null
+++ b/final/test/FrontendC/2004-06-18-VariableLengthArrayOfStructures.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+struct S { };
+
+int xxxx(int a) {
+  struct S comps[a];
+  comps[0];
+}
+
diff --git a/final/test/FrontendC/2004-07-06-FunctionCast.c b/final/test/FrontendC/2004-07-06-FunctionCast.c
new file mode 100644
index 00000000000..6d80f86fa1e
--- /dev/null
+++ b/final/test/FrontendC/2004-07-06-FunctionCast.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+static int unused_func(void) {
+  return 1;
+}
+
+int foo(void) {
+  (void)unused_func; /* avoid compiler warning */
+  return 2;
+}
diff --git a/final/test/FrontendC/2004-08-06-LargeStructTest.c b/final/test/FrontendC/2004-08-06-LargeStructTest.c
new file mode 100644
index 00000000000..8fbb7f8368c
--- /dev/null
+++ b/final/test/FrontendC/2004-08-06-LargeStructTest.c
@@ -0,0 +1,19 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+
+#define A(X) int X;
+#define B(X) A(X##0) A(X##1) A(X##2) A(X##3) A(X##4) A(X##5) A(X##6) A(X##7) \
+             A(X##8) A(X##9) A(X##A) A(X##B) A(X##C) A(X##D) A(X##E) A(X##F)
+#define C(X) B(X##0) B(X##1) B(X##2) B(X##3) B(X##4) B(X##5) B(X##6) B(X##7) \
+             B(X##8) B(X##9) B(X##A) B(X##B) B(X##C) B(X##D) B(X##E) B(X##F)
+
+struct foo {
+  C(x);   // 256
+  C(y);   // 256
+  C(z);
+};
+
+
+int test(struct foo *F) {
+   return F->xA1 + F->yFF + F->zC4;
+}
diff --git a/final/test/FrontendC/2004-11-25-UnnamedBitfieldPadding.c b/final/test/FrontendC/2004-11-25-UnnamedBitfieldPadding.c
new file mode 100644
index 00000000000..b3f4a829a9e
--- /dev/null
+++ b/final/test/FrontendC/2004-11-25-UnnamedBitfieldPadding.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -S %s -o /dev/null
+// This is a testcase for PR461
+typedef struct {
+  unsigned min_align: 1;
+  unsigned : 1;
+} addr_diff_vec_flags;
+
+addr_diff_vec_flags X;
diff --git a/final/test/FrontendC/2004-11-27-InvalidConstantExpr.c b/final/test/FrontendC/2004-11-27-InvalidConstantExpr.c
new file mode 100644
index 00000000000..ee8642fa3aa
--- /dev/null
+++ b/final/test/FrontendC/2004-11-27-InvalidConstantExpr.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc %s -S -o - | not grep {foo\\* sub}
+// This should not produce a subtrace constantexpr of a pointer
+struct foo {
+  int Y;
+  char X[100];
+} F;
+
+int test(char *Y) {
+   return Y - F.X;
+} 
diff --git a/final/test/FrontendC/2004-11-27-StaticFunctionRedeclare.c b/final/test/FrontendC/2004-11-27-StaticFunctionRedeclare.c
new file mode 100644
index 00000000000..994ac8f8436
--- /dev/null
+++ b/final/test/FrontendC/2004-11-27-StaticFunctionRedeclare.c
@@ -0,0 +1,15 @@
+// RUN: %llvmgcc -S %s -o - | \
+// RUN:   opt -std-compile-opts -S | not grep {declare i32.*func}
+
+// There should not be an unresolved reference to func here.  Believe it or not,
+// the "expected result" is a function named 'func' which is internal and 
+// referenced by bar().
+
+// This is PR244
+
+static int func();
+void bar() {
+  int func();
+  foo(func);
+}
+static int func(char** A, char ** B) {}
diff --git a/final/test/FrontendC/2004-11-27-VariableSizeInStructure.c b/final/test/FrontendC/2004-11-27-VariableSizeInStructure.c
new file mode 100644
index 00000000000..bd63ae3b012
--- /dev/null
+++ b/final/test/FrontendC/2004-11-27-VariableSizeInStructure.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc %s -S -o /dev/null
+
+// GCC allows variable sized arrays in structures, crazy!
+
+// This is PR360.
+
+int sub1(int i, char *pi) {
+  typedef int foo[i];
+  struct bar {foo f1; int f2;} *p = (struct bar *) pi;
+  return p->f2;
+}
diff --git a/final/test/FrontendC/2005-01-02-ConstantInits.c b/final/test/FrontendC/2005-01-02-ConstantInits.c
new file mode 100644
index 00000000000..735278e0f93
--- /dev/null
+++ b/final/test/FrontendC/2005-01-02-ConstantInits.c
@@ -0,0 +1,24 @@
+// RUN: %llvmgcc %s -S -o -
+
+// This tests all kinds of hard cases with initializers and
+// array subscripts.  This corresponds to PR487.
+
+struct X { int a[2]; };
+
+int test() {
+  static int i23 = (int) &(((struct X *)0)->a[1]);
+  return i23;
+}
+
+int i = (int) &( ((struct X *)0) -> a[1]);
+
+int Arr[100];
+
+int foo(int i) { return bar(&Arr[49])+bar(&Arr[i]); }
+int foo2(int i) { 
+  static const int *X = &Arr[49];
+   static int i23 = (int) &( ((struct X *)0) -> a[0]);
+  int *P = Arr;
+  ++P;
+  return bar(Arr+i);
+}
diff --git a/final/test/FrontendC/2005-01-02-PointerDifference.c b/final/test/FrontendC/2005-01-02-PointerDifference.c
new file mode 100644
index 00000000000..2c108e5f6ca
--- /dev/null
+++ b/final/test/FrontendC/2005-01-02-PointerDifference.c
@@ -0,0 +1,3 @@
+// RUN: %llvmgcc -xc %s -S -o - | grep -v div
+
+int Diff(int *P, int *Q) { return P-Q; }
diff --git a/final/test/FrontendC/2005-01-02-VAArgError-ICE.c b/final/test/FrontendC/2005-01-02-VAArgError-ICE.c
new file mode 100644
index 00000000000..db825584627
--- /dev/null
+++ b/final/test/FrontendC/2005-01-02-VAArgError-ICE.c
@@ -0,0 +1,10 @@
+// This file is erroneous, but should not cause the compiler to ICE.
+// PR481
+// RUN: %llvmgcc %s -S -o /dev/null |& not grep {internal compiler error}
+
+#include <stdarg.h>
+int flags(int a, int b, ...) {
+        va_list         args;
+        va_start(args,a);       // not the last named arg
+        foo(args);
+}
diff --git a/final/test/FrontendC/2005-02-20-AggregateSAVEEXPR.c b/final/test/FrontendC/2005-02-20-AggregateSAVEEXPR.c
new file mode 100644
index 00000000000..7a955330331
--- /dev/null
+++ b/final/test/FrontendC/2005-02-20-AggregateSAVEEXPR.c
@@ -0,0 +1,19 @@
+// RUN: %llvmgcc %s -o /dev/null -S
+// Note:
+//  We fail this on Sparc because the C library seems to be missing complex.h
+//  and the corresponding C99 complex support.
+//
+//  We could modify the test to use only GCC extensions, but I don't know if
+//  that would change the nature of the test.
+//
+// XFAIL: sparc
+
+#ifdef __CYGWIN__
+  #include <mingw/complex.h>
+#else
+  #include <complex.h>
+#endif
+
+int foo(complex float c) {
+    return creal(c);
+}
diff --git a/final/test/FrontendC/2005-02-27-MarkGlobalConstant.c b/final/test/FrontendC/2005-02-27-MarkGlobalConstant.c
new file mode 100644
index 00000000000..6806c94c10b
--- /dev/null
+++ b/final/test/FrontendC/2005-02-27-MarkGlobalConstant.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -xc %s -S -o - | grep {private unnamed_addr constant }
+
+// The synthetic global made by the CFE for big initializer should be marked
+// constant.
+
+void bar();
+void foo() {
+  char Blah[] = "asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd";
+  bar(Blah);
+}
diff --git a/final/test/FrontendC/2005-03-05-OffsetOfHack.c b/final/test/FrontendC/2005-03-05-OffsetOfHack.c
new file mode 100644
index 00000000000..8df7231df6a
--- /dev/null
+++ b/final/test/FrontendC/2005-03-05-OffsetOfHack.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc %s -S -o - 
+
+struct s {
+  unsigned long int field[0];
+};
+
+#define OFFS \
+        (((char *) &((struct s *) 0)->field[0]) - (char *) 0)
+
+int foo[OFFS];
+
+
diff --git a/final/test/FrontendC/2005-03-06-OffsetOfStructCrash.c b/final/test/FrontendC/2005-03-06-OffsetOfStructCrash.c
new file mode 100644
index 00000000000..91e68628023
--- /dev/null
+++ b/final/test/FrontendC/2005-03-06-OffsetOfStructCrash.c
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc %s -S -o -
+
+struct Y {};
+struct XXX {
+  struct  Y F;
+};
+
+void test1() {
+   (int)&((struct XXX*)(((void *)0)))->F;
+}
+
+void test2() {
+   &((struct XXX*)(((void *)0)))->F;
+}
diff --git a/final/test/FrontendC/2005-03-11-Prefetch.c b/final/test/FrontendC/2005-03-11-Prefetch.c
new file mode 100644
index 00000000000..bf7965304f1
--- /dev/null
+++ b/final/test/FrontendC/2005-03-11-Prefetch.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc %s -S -o - | llvm-as | llvm-dis | grep llvm.prefetch
+
+void foo(int *P) {
+  __builtin_prefetch(P);
+  __builtin_prefetch(P, 1);
+}
diff --git a/final/test/FrontendC/2005-04-09-ComplexOps.c b/final/test/FrontendC/2005-04-09-ComplexOps.c
new file mode 100644
index 00000000000..2962b745534
--- /dev/null
+++ b/final/test/FrontendC/2005-04-09-ComplexOps.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc %s -S -o -
+
+#include <math.h>
+#define I 1.0iF
+
+double __complex test(double X) { return ~-(X*I); }
+
+_Bool EQ(double __complex A, double __complex B) { return A == B; }
+_Bool NE(double __complex A, double __complex B) { return A != B; }
diff --git a/final/test/FrontendC/2005-05-06-CountBuiltins.c b/final/test/FrontendC/2005-05-06-CountBuiltins.c
new file mode 100644
index 00000000000..da40a142ae9
--- /dev/null
+++ b/final/test/FrontendC/2005-05-06-CountBuiltins.c
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc %s -S -o - | llvm-as | llvm-dis | not grep call.*__builtin
+
+int G, H, I;
+void foo(int P) {
+  G = __builtin_clz(P);
+  H = __builtin_ctz(P);
+  I = __builtin_popcount(P);
+}
+
+long long g, h, i;
+void fooll(float P) {
+  g = __builtin_clzll(P);
+  g = __builtin_clzll(P);
+  h = __builtin_ctzll(P);
+  i = __builtin_popcountll(P);
+}
+
diff --git a/final/test/FrontendC/2005-05-10-GlobalUnionInit.c b/final/test/FrontendC/2005-05-10-GlobalUnionInit.c
new file mode 100644
index 00000000000..443064c921d
--- /dev/null
+++ b/final/test/FrontendC/2005-05-10-GlobalUnionInit.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc %s -S -o -
+
+union A {                    // { uint }
+  union B { double *C; } D;
+} E = { { (double*)12312 } };
+
diff --git a/final/test/FrontendC/2005-06-15-ExpandGotoInternalProblem.c b/final/test/FrontendC/2005-06-15-ExpandGotoInternalProblem.c
new file mode 100644
index 00000000000..0f076c9bf79
--- /dev/null
+++ b/final/test/FrontendC/2005-06-15-ExpandGotoInternalProblem.c
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc -std=c99 %s -S -o - | \
+// RUN:    opt -std-compile-opts -disable-output
+// PR580
+
+int X, Y;
+int foo() {
+  int i;
+        for (i=0; i<100; i++ )
+        {
+                break;
+                i = ( X || Y ) ;
+        }
+}
+
diff --git a/final/test/FrontendC/2005-07-20-SqrtNoErrno.c b/final/test/FrontendC/2005-07-20-SqrtNoErrno.c
new file mode 100644
index 00000000000..a321a3884e8
--- /dev/null
+++ b/final/test/FrontendC/2005-07-20-SqrtNoErrno.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc %s -S -o - -fno-math-errno | FileCheck %s
+// llvm.sqrt has undefined behavior on negative inputs, so it is
+// inappropriate to translate C/C++ sqrt to this.
+#include <math.h>
+
+float foo(float X) {
+// CHECK: foo
+// CHECK: sqrtf(float %1) nounwind readonly
+  // Check that this is marked readonly when errno is ignored.
+  return sqrtf(X);
+}
diff --git a/final/test/FrontendC/2005-07-26-UnionInitCrash.c b/final/test/FrontendC/2005-07-26-UnionInitCrash.c
new file mode 100644
index 00000000000..563278a9c68
--- /dev/null
+++ b/final/test/FrontendC/2005-07-26-UnionInitCrash.c
@@ -0,0 +1,3 @@
+// PR607
+// RUN: %llvmgcc %s -S -o -
+union { char bytes[8]; double alignment; }EQ1 = {0,0,0,0,0,0,0,0};
diff --git a/final/test/FrontendC/2005-07-28-IncorrectWeakGlobal.c b/final/test/FrontendC/2005-07-28-IncorrectWeakGlobal.c
new file mode 100644
index 00000000000..1a8c409439c
--- /dev/null
+++ b/final/test/FrontendC/2005-07-28-IncorrectWeakGlobal.c
@@ -0,0 +1,5 @@
+// RUN: %llvmgcc %s -S -o - | grep TheGlobal | not grep weak
+
+extern int TheGlobal;
+int foo() { return TheGlobal; }
+int TheGlobal = 1;
diff --git a/final/test/FrontendC/2005-09-20-ComplexConstants.c b/final/test/FrontendC/2005-09-20-ComplexConstants.c
new file mode 100644
index 00000000000..209adc502fa
--- /dev/null
+++ b/final/test/FrontendC/2005-09-20-ComplexConstants.c
@@ -0,0 +1,4 @@
+// RUN: %llvmgcc %s -S -o - | llvm-as -o /dev/null
+
+const double _Complex x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; 
+
diff --git a/final/test/FrontendC/2005-09-24-AsmUserPrefix.c b/final/test/FrontendC/2005-09-24-AsmUserPrefix.c
new file mode 100644
index 00000000000..952c7b3c35d
--- /dev/null
+++ b/final/test/FrontendC/2005-09-24-AsmUserPrefix.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc %s -S -o - | opt -std-compile-opts | llc | \
+// RUN:    not grep _foo2
+
+void foo() __asm__("foo2");
+
+void bar() {
+  foo();
+}
diff --git a/final/test/FrontendC/2005-09-24-BitFieldCrash.c b/final/test/FrontendC/2005-09-24-BitFieldCrash.c
new file mode 100644
index 00000000000..b4c85ffb2d8
--- /dev/null
+++ b/final/test/FrontendC/2005-09-24-BitFieldCrash.c
@@ -0,0 +1,33 @@
+// RUN: %llvmgcc %s -S -o - 
+
+struct tree_common {};
+
+struct tree_int_cst {
+ struct tree_common common;
+  struct tree_int_cst_lowhi {
+    unsigned long long low;
+    long long high;
+  } int_cst;
+};
+
+enum XXX { yyy };
+
+struct tree_function_decl {
+  struct tree_common common;
+  long long locus, y;
+  __extension__ enum  XXX built_in_class : 2;
+
+};
+
+
+union tree_node {
+  struct tree_int_cst int_cst;
+  struct tree_function_decl function_decl;
+};
+
+
+void foo (union tree_node * decl) {
+  decl->function_decl.built_in_class != 0;
+}
+
+
diff --git a/final/test/FrontendC/2005-10-18-VariableSizedElementCrash.c b/final/test/FrontendC/2005-10-18-VariableSizedElementCrash.c
new file mode 100644
index 00000000000..b9166621db4
--- /dev/null
+++ b/final/test/FrontendC/2005-10-18-VariableSizedElementCrash.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc %s -S -o -
+
+int sub1(int i, char *pi) {
+  typedef int foo[i];
+  struct bar {foo f1; int f2:3; int f3:4;} *p = (struct bar *) pi;
+  xxx(p->f1);  
+  return p->f3;
+}
+
diff --git a/final/test/FrontendC/2005-12-04-AttributeUsed.c b/final/test/FrontendC/2005-12-04-AttributeUsed.c
new file mode 100644
index 00000000000..f47e977f486
--- /dev/null
+++ b/final/test/FrontendC/2005-12-04-AttributeUsed.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc %s -S -o - | llvm-as | llvm-dis | \
+// RUN:   grep llvm.used | grep foo | grep X
+
+int X __attribute__((used));
+int Y;
+
+__attribute__((used)) void foo() {}
+
diff --git a/final/test/FrontendC/2005-12-04-DeclarationLineNumbers.c b/final/test/FrontendC/2005-12-04-DeclarationLineNumbers.c
new file mode 100644
index 00000000000..f3f69ddb0bc
--- /dev/null
+++ b/final/test/FrontendC/2005-12-04-DeclarationLineNumbers.c
@@ -0,0 +1,23 @@
+// RUN: %llvmgcc %s -S -g -o - | grep DW_TAG_compile_unit | count 1
+// PR664: ensure that line #'s are emitted for declarations
+
+
+short test(short br_data_0,
+short br_data_1,
+short br_data_2,
+short br_data_3,
+short br_data_4,
+short br_data_5,
+short br_data_6,
+short br_data_7) {
+
+short sm07 = br_data_0 + br_data_7;
+short sm16 = br_data_1 + br_data_6;
+short sm25 = br_data_2 + br_data_5;
+short sm34 = br_data_3 + br_data_4;
+short s0734 = sm07 + sm34;
+short s1625 = sm16 + sm25;
+
+return s0734 + s1625;
+}
+
diff --git a/final/test/FrontendC/2006-01-13-Includes.c b/final/test/FrontendC/2006-01-13-Includes.c
new file mode 100644
index 00000000000..7fa0b3b5a6d
--- /dev/null
+++ b/final/test/FrontendC/2006-01-13-Includes.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc %s -g -S -o - | grep {test/FrontendC}
+// PR676
+
+#include <stdio.h>
+
+void test() {
+  printf("Hello World\n");
+}
diff --git a/final/test/FrontendC/2006-01-13-StackSave.c b/final/test/FrontendC/2006-01-13-StackSave.c
new file mode 100644
index 00000000000..ae8d9082101
--- /dev/null
+++ b/final/test/FrontendC/2006-01-13-StackSave.c
@@ -0,0 +1,11 @@
+// PR691
+// RUN: %llvmgcc %s -S -o - | opt -std-compile-opts | \
+// RUN:    llvm-dis | grep llvm.stacksave
+
+void test(int N) {
+  int i;
+  for (i = 0; i < N; ++i) {
+    int VLA[i];
+    external(VLA);
+  }
+}
diff --git a/final/test/FrontendC/2006-01-16-BitCountIntrinsicsUnsigned.c b/final/test/FrontendC/2006-01-16-BitCountIntrinsicsUnsigned.c
new file mode 100644
index 00000000000..eafcb62814b
--- /dev/null
+++ b/final/test/FrontendC/2006-01-16-BitCountIntrinsicsUnsigned.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc -S %s -o - | grep {llvm.ctlz.i32(i32} | count 3
+// RUN: %llvmgcc -S %s -o - | grep {llvm.ctlz.i32(i32} | grep declare | count 1
+
+unsigned t2(unsigned X) {
+  return __builtin_clz(X);
+}
+int t1(int X) {
+  return __builtin_clz(X);
+}
diff --git a/final/test/FrontendC/2006-01-23-FileScopeAsm.c b/final/test/FrontendC/2006-01-23-FileScopeAsm.c
new file mode 100644
index 00000000000..80e71955623
--- /dev/null
+++ b/final/test/FrontendC/2006-01-23-FileScopeAsm.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc %s -S -o - | opt -std-compile-opts | \
+// RUN:    llvm-dis | grep {foo\[12345\]} | count 5
+
+__asm__ ("foo1");
+__asm__ ("foo2");
+__asm__ ("foo3");
+__asm__ ("foo4");
+__asm__ ("foo5");
diff --git a/final/test/FrontendC/2006-03-03-MissingInitializer.c b/final/test/FrontendC/2006-03-03-MissingInitializer.c
new file mode 100644
index 00000000000..5e027b1894a
--- /dev/null
+++ b/final/test/FrontendC/2006-03-03-MissingInitializer.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc %s -S -o - | opt -std-compile-opts | \
+// RUN:    llvm-dis | grep {@nate.*internal unnamed_addr global i32 0}
+
+struct X { int *XX; int Y;};
+
+void foo() {
+  static int nate = 0;
+  struct X bob = { &nate, 14 };
+  bar(&bob);
+}
+
diff --git a/final/test/FrontendC/2006-03-16-VectorCtor.c b/final/test/FrontendC/2006-03-16-VectorCtor.c
new file mode 100644
index 00000000000..b95593b1214
--- /dev/null
+++ b/final/test/FrontendC/2006-03-16-VectorCtor.c
@@ -0,0 +1,10 @@
+// Test that basic generic vector support works
+// RUN: %llvmgcc %s -S -o -
+
+typedef int v4si __attribute__ ((__vector_size__ (16)));
+void test(v4si *P, v4si *Q, float X) {
+  *P = (v4si){ X, X, X, X } * *Q;
+}
+
+v4si G = (v4si){ 0.1, 1.2, 4.2, 17.2 };
+
diff --git a/final/test/FrontendC/2006-03-17-KnRMismatch.c b/final/test/FrontendC/2006-03-17-KnRMismatch.c
new file mode 100644
index 00000000000..19391122fca
--- /dev/null
+++ b/final/test/FrontendC/2006-03-17-KnRMismatch.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc %s -S -o -
+
+void regnode(int op);
+
+void regnode(op)
+char op;
+{
+}
diff --git a/final/test/FrontendC/2006-05-01-AppleAlignmentPragma.c b/final/test/FrontendC/2006-05-01-AppleAlignmentPragma.c
new file mode 100644
index 00000000000..c9050aa9e4e
--- /dev/null
+++ b/final/test/FrontendC/2006-05-01-AppleAlignmentPragma.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc %s -S -o -
+
+#ifdef __APPLE__
+/* test that X is layed out correctly when this pragma is used. */
+#pragma options align=mac68k
+#endif
+
+struct S {
+  unsigned A;
+  unsigned short B;
+} X;
+
diff --git a/final/test/FrontendC/2006-05-19-SingleEltReturn.c b/final/test/FrontendC/2006-05-19-SingleEltReturn.c
new file mode 100644
index 00000000000..70c94c62052
--- /dev/null
+++ b/final/test/FrontendC/2006-05-19-SingleEltReturn.c
@@ -0,0 +1,23 @@
+// Test returning a single element aggregate value containing a double.
+// RUN: %llvmgcc %s -S -o -
+
+struct X {
+  double D;
+};
+
+struct Y { 
+  struct X x; 
+};
+
+struct Y bar();
+
+void foo(struct Y *P) {
+  *P = bar();
+}
+
+struct Y bar() {
+  struct Y a;
+  a.x.D = 0;
+  return a;
+}
+
diff --git a/final/test/FrontendC/2006-07-31-PR854.c b/final/test/FrontendC/2006-07-31-PR854.c
new file mode 100644
index 00000000000..3802de8fc45
--- /dev/null
+++ b/final/test/FrontendC/2006-07-31-PR854.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc -w %s -S -o -
+// PR854
+  struct kernel_symbol {
+    unsigned long value;
+  };
+  unsigned long loops_per_jiffy = (1<<12);
+  static const char __kstrtab_loops_per_jiffy[]
+__attribute__((section("__ksymtab_strings"))) = "loops_per_jiffy";
+  static const struct kernel_symbol __ksymtab_loops_per_jiffy
+__attribute__((__used__)) __attribute__((section("__ksymtab"))) = { (unsigned
+long)&loops_per_jiffy, __kstrtab_loops_per_jiffy };
diff --git a/final/test/FrontendC/2006-09-11-BitfieldRefCrash.c b/final/test/FrontendC/2006-09-11-BitfieldRefCrash.c
new file mode 100644
index 00000000000..d06cc3afbf3
--- /dev/null
+++ b/final/test/FrontendC/2006-09-11-BitfieldRefCrash.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc %s -S -o -
+// PR906
+
+struct state_struct {
+  unsigned long long phys_frame: 50;
+  unsigned valid : 2;
+} s;
+
+int mem_access(struct state_struct *p) {
+  return p->valid;
+}
+
diff --git a/final/test/FrontendC/2006-09-18-fwrite-cast-crash.c b/final/test/FrontendC/2006-09-18-fwrite-cast-crash.c
new file mode 100644
index 00000000000..a693c5666f2
--- /dev/null
+++ b/final/test/FrontendC/2006-09-18-fwrite-cast-crash.c
@@ -0,0 +1,15 @@
+// RUN: %llvmgcc %s -S -o /dev/null
+// PR910
+// XFAIL: *
+// See PR2452
+
+struct l_struct_2E_FILE { char x; };
+unsigned fwrite(signed char *, unsigned , unsigned , signed char *);
+static signed char str301[39];
+static void Usage(signed char *ltmp_611_6) {
+  struct l_struct_2E_FILE *ltmp_6202_16;
+  unsigned ltmp_6203_92;
+  ltmp_6203_92 =  /*tail*/ ((unsigned  (*) (signed char *, unsigned , unsigned ,
+struct l_struct_2E_FILE *))(void*)fwrite)((&(str301[0u])), 38u, 1u, ltmp_6202_16);
+}
+
diff --git a/final/test/FrontendC/2006-09-21-IncompleteElementType.c b/final/test/FrontendC/2006-09-21-IncompleteElementType.c
new file mode 100644
index 00000000000..a5091821cb6
--- /dev/null
+++ b/final/test/FrontendC/2006-09-21-IncompleteElementType.c
@@ -0,0 +1,3 @@
+// RUN: not %llvmgcc %s -S -o /dev/null |& not grep {internal compiler error}
+
+struct A X[(927 - 37) / sizeof(struct A)];
diff --git a/final/test/FrontendC/2006-09-25-DebugFilename.c b/final/test/FrontendC/2006-09-25-DebugFilename.c
new file mode 100644
index 00000000000..eea52ba7608
--- /dev/null
+++ b/final/test/FrontendC/2006-09-25-DebugFilename.c
@@ -0,0 +1,6 @@
+// RUN: not %llvmgcc -xc %s -S -o /dev/null |& \
+// RUN:   grep fluffy | grep 2006-09-25-DebugFilename.c
+#include "2006-09-25-DebugFilename.h"
+int func1() { return hfunc1(); }
+int func2() { fluffy; return hfunc1(); }
+
diff --git a/final/test/FrontendC/2006-09-25-DebugFilename.h b/final/test/FrontendC/2006-09-25-DebugFilename.h
new file mode 100644
index 00000000000..9b03666b3c2
--- /dev/null
+++ b/final/test/FrontendC/2006-09-25-DebugFilename.h
@@ -0,0 +1,6 @@
+extern int exfunc(int a);
+
+static inline int hfunc1()
+{
+  return exfunc(1);
+}
diff --git a/final/test/FrontendC/2006-09-28-SimpleAsm.c b/final/test/FrontendC/2006-09-28-SimpleAsm.c
new file mode 100644
index 00000000000..e3040200a60
--- /dev/null
+++ b/final/test/FrontendC/2006-09-28-SimpleAsm.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc %s -S -o - | grep {ext: xorl %eax, eax; movl}
+// RUN: %llvmgcc %s -S -o - | grep {nonext: xorl %eax, %eax; mov}
+// PR924
+
+void bar() {
+   // Extended asm
+   asm volatile ("ext: xorl %%eax, eax; movl eax, fs; movl eax, gs  %%blah %= %% " : : "r"(1));
+   // Non-extended asm.
+   asm volatile ("nonext: xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs  %%blah %= %% ");
+}
diff --git a/final/test/FrontendC/2006-10-30-ArrayCrash.c b/final/test/FrontendC/2006-10-30-ArrayCrash.c
new file mode 100644
index 00000000000..09464dd3a06
--- /dev/null
+++ b/final/test/FrontendC/2006-10-30-ArrayCrash.c
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc -O3 -S -o - %s
+// PR954, PR911
+
+extern void foo();
+
+struct S {
+  short        f1[3];
+  unsigned int f2 : 1;
+};
+
+void bar()
+{
+  struct S *A;
+
+  if (A->f2)
+    foo();
+}
diff --git a/final/test/FrontendC/2006-12-14-ordered_expr.c b/final/test/FrontendC/2006-12-14-ordered_expr.c
new file mode 100644
index 00000000000..8ff2eb60721
--- /dev/null
+++ b/final/test/FrontendC/2006-12-14-ordered_expr.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -O3 -S %s -o - | grep {fcmp ord float %X, %Y}
+
+int test2(float X, float Y) {
+  return !__builtin_isunordered(X, Y);
+}
+
diff --git a/final/test/FrontendC/2007-01-06-KNR-Proto.c b/final/test/FrontendC/2007-01-06-KNR-Proto.c
new file mode 100644
index 00000000000..6aa74d4cb25
--- /dev/null
+++ b/final/test/FrontendC/2007-01-06-KNR-Proto.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -S -o - %s
+// PR1083
+
+int svc_register (void (*dispatch) (int));
+
+int svc_register (dispatch)
+     void (*dispatch) ();
+{
+}
+
diff --git a/final/test/FrontendC/2007-01-20-VectorICE.c b/final/test/FrontendC/2007-01-20-VectorICE.c
new file mode 100644
index 00000000000..c2dcdef1944
--- /dev/null
+++ b/final/test/FrontendC/2007-01-20-VectorICE.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc %s -S -o - 
+
+typedef float __m128 __attribute__((__vector_size__(16)));
+typedef long long __v2di __attribute__((__vector_size__(16)));
+typedef int __v4si __attribute__((__vector_size__(16)));
+
+__v2di  bar(void);
+void foo(int X, __v4si *P) {
+	*P = X == 2 ? bar() : bar();
+}
+
diff --git a/final/test/FrontendC/2007-01-24-InlineAsmCModifier.c b/final/test/FrontendC/2007-01-24-InlineAsmCModifier.c
new file mode 100644
index 00000000000..c601ccf2eb1
--- /dev/null
+++ b/final/test/FrontendC/2007-01-24-InlineAsmCModifier.c
@@ -0,0 +1,10 @@
+// Verify that the %c modifier works and strips off any prefixes from 
+// immediates.
+// RUN: %llvmgcc -S %s -o - | llc | grep {pickANumber: 789514}
+
+void foo() {
+  __asm__         volatile("/* " "pickANumber" ": %c0 */"::"i"(0xC0C0A));
+  
+  // Check that non-c modifiers work also (not greped for above).
+   __asm__         volatile("/* " "pickANumber2 " ": %0 */"::"i"(123));
+}
diff --git a/final/test/FrontendC/2007-02-04-AddrLValue-2.c b/final/test/FrontendC/2007-02-04-AddrLValue-2.c
new file mode 100644
index 00000000000..fa20faff3e1
--- /dev/null
+++ b/final/test/FrontendC/2007-02-04-AddrLValue-2.c
@@ -0,0 +1,13 @@
+// RUN: %llvmgcc %s -O3 -S -o -
+// PR1173
+
+struct S { char s; };
+struct T { struct S t; };
+
+struct S *const p = &((struct T * const) (0x4000))->t;
+
+void
+foo (void)
+{
+  p->s = 0;
+}
diff --git a/final/test/FrontendC/2007-02-04-AddrLValue.c b/final/test/FrontendC/2007-02-04-AddrLValue.c
new file mode 100644
index 00000000000..214fce7747c
--- /dev/null
+++ b/final/test/FrontendC/2007-02-04-AddrLValue.c
@@ -0,0 +1,23 @@
+// RUN: %llvmgcc %s -O3 -S -o -
+// PR1176
+
+typedef struct
+{
+  char *key;
+  char *value;
+} T1;
+
+typedef struct
+{
+  long type;
+  char *value;
+} T3;
+
+T1 a[] =
+{
+  {
+    "",
+    ((char *)&((T3) {1, (char *) 1}))
+  }
+};
+
diff --git a/final/test/FrontendC/2007-02-04-EmptyStruct.c b/final/test/FrontendC/2007-02-04-EmptyStruct.c
new file mode 100644
index 00000000000..5ad2c705cce
--- /dev/null
+++ b/final/test/FrontendC/2007-02-04-EmptyStruct.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc %s -O3 -S -o -
+// PR1175
+
+struct empty { };
+
+void foo(struct empty *p) {
+   p++;
+}
+
diff --git a/final/test/FrontendC/2007-02-04-WITH_SIZE_EXPR.c b/final/test/FrontendC/2007-02-04-WITH_SIZE_EXPR.c
new file mode 100644
index 00000000000..d5a9fbb0ecc
--- /dev/null
+++ b/final/test/FrontendC/2007-02-04-WITH_SIZE_EXPR.c
@@ -0,0 +1,21 @@
+// RUN: %llvmgcc %s -O3 -S -o -
+// PR1174
+
+void zzz (char *s1, char *s2, int len, int *q)
+{
+  int z = 5;
+  unsigned int i,  b;
+  struct { char a[z]; } x;
+          
+  for (i = 0; i < len; i++)
+    s1[i] = s2[i];
+
+  b = z & 0x3;
+
+  len += (b == 0 ? 0 : 1) + z;
+    
+  *q = len;
+
+  foo (x, x);
+}
+
diff --git a/final/test/FrontendC/2007-02-05-nested.c b/final/test/FrontendC/2007-02-05-nested.c
new file mode 100644
index 00000000000..bd6d30695ba
--- /dev/null
+++ b/final/test/FrontendC/2007-02-05-nested.c
@@ -0,0 +1,54 @@
+// RUN: %llvmgcc -S -fnested-functions -O0 -o - %s 
+// PR915
+
+extern void abort(void);
+
+void nest(int n)
+{
+  int a = 0;
+  int b = 5;
+  int c = 0;
+  int d = 7;
+
+  void o(int i, int j)
+  {
+    if (i!=j)
+      abort();
+  }
+
+  void f(x)
+    int x; /* K&R style */
+  {
+    int e = 0;
+    int f = 2;
+    int g = 0;
+
+    void y(void)
+    {
+      c = n;
+      e = 1;
+      g = x;
+    }
+
+    void z(void)
+    {
+      a = 4;
+      g = 3;
+    }
+
+    a = 5;
+    y();
+    c = x;
+    z();
+    o(1,e);
+    o(2,f);
+    o(3,g);
+  }
+
+  c = 2;
+  f(6);
+  o(4,a);
+  o(5,b);
+  o(6,c);
+  o(7,d);
+}
diff --git a/final/test/FrontendC/2007-02-07-AddrLabel.c b/final/test/FrontendC/2007-02-07-AddrLabel.c
new file mode 100644
index 00000000000..03ed4c987e4
--- /dev/null
+++ b/final/test/FrontendC/2007-02-07-AddrLabel.c
@@ -0,0 +1,10 @@
+// PR947
+// RUN: %llvmgcc %s -S -o - 
+
+void foo() {
+    void *ptr;
+  label:
+    ptr = &&label;
+
+    goto *ptr;
+  }
diff --git a/final/test/FrontendC/2007-02-16-VariableSizeStructArg.c b/final/test/FrontendC/2007-02-16-VariableSizeStructArg.c
new file mode 100644
index 00000000000..ec6971acdb1
--- /dev/null
+++ b/final/test/FrontendC/2007-02-16-VariableSizeStructArg.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S -w %s -o - 
+// PR1170
+int f(int a, struct {int b[a];} c) {  return c.b[0]; }
+
+int g(struct {int b[1];} c) {
+  return c.b[0];
+}
diff --git a/final/test/FrontendC/2007-02-16-VoidPtrDiff.c b/final/test/FrontendC/2007-02-16-VoidPtrDiff.c
new file mode 100644
index 00000000000..15df28cae3f
--- /dev/null
+++ b/final/test/FrontendC/2007-02-16-VoidPtrDiff.c
@@ -0,0 +1,5 @@
+// RUN: %llvmgcc %s -S -o -
+
+void foo(void *ptr, int test) {
+  (ptr - ((void *) test + 0x2000));
+}
diff --git a/final/test/FrontendC/2007-02-16-WritableStrings.c b/final/test/FrontendC/2007-02-16-WritableStrings.c
new file mode 100644
index 00000000000..8fa7f15dc6c
--- /dev/null
+++ b/final/test/FrontendC/2007-02-16-WritableStrings.c
@@ -0,0 +1,7 @@
+// Test the -fwritable-strings option.
+
+// RUN: %llvmgcc -O3 -S -o - -fwritable-strings %s | \
+// RUN:    grep {internal unnamed_addr global}
+// RUN: %llvmgcc -O3 -S -o - %s | grep {private unnamed_addr constant}
+
+char *X = "foo";
diff --git a/final/test/FrontendC/2007-02-25-C-DotDotDot.c b/final/test/FrontendC/2007-02-25-C-DotDotDot.c
new file mode 100644
index 00000000000..3f96fd1f9e1
--- /dev/null
+++ b/final/test/FrontendC/2007-02-25-C-DotDotDot.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc -O0 -S -o - -fno-inline -fno-unit-at-a-time %s | \
+// RUN:   grep {call float @foo}
+
+// Make sure the call to foo is compiled as:
+//  call float @foo()
+// not
+//  call float (...)* bitcast (float ()* @foo to float (...)*)( )
+
+static float foo() { return 0.0; }
+float bar() { return foo()*10.0;}
+
+
diff --git a/final/test/FrontendC/2007-03-01-VarSizeArrayIdx.c b/final/test/FrontendC/2007-03-01-VarSizeArrayIdx.c
new file mode 100644
index 00000000000..6ebe79672f5
--- /dev/null
+++ b/final/test/FrontendC/2007-03-01-VarSizeArrayIdx.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc %s -O3 -S -o - | grep mul
+// PR1233
+
+float foo(int w, float A[][w], int g, int h) {
+  return A[g][0];
+}
+
diff --git a/final/test/FrontendC/2007-03-05-DataLayout.c b/final/test/FrontendC/2007-03-05-DataLayout.c
new file mode 100644
index 00000000000..18819f16908
--- /dev/null
+++ b/final/test/FrontendC/2007-03-05-DataLayout.c
@@ -0,0 +1,53 @@
+// Testcase for PR1242
+// RUN: %llvmgcc -S %s -o - | grep datalayout | \
+// RUN:    not grep {"\[Ee\]-p:\[36\]\[24\]:\[36\]\[24\]"}
+// END.
+#include <stdlib.h>
+#define NDIM 3
+#define BODY 01
+typedef double vector[NDIM];
+typedef struct bnode* bodyptr;
+// { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x
+// double], double, \2 *, \2 * }
+struct bnode {
+  short int type;
+  double mass;
+  vector pos;
+  int proc;
+  int new_proc;
+  vector vel;
+  vector acc;
+  vector new_acc;
+  double phi;
+  bodyptr next;
+  bodyptr proc_next;
+} body;
+
+#define Type(x) ((x)->type)
+#define Mass(x) ((x)->mass)
+#define Pos(x)  ((x)->pos)
+#define Proc(x) ((x)->proc)
+#define New_Proc(x) ((x)->new_proc)
+#define Vel(x)  ((x)->vel)
+#define Acc(x)  ((x)->acc)
+#define New_Acc(x)  ((x)->new_acc)
+#define Phi(x)  ((x)->phi)
+#define Next(x) ((x)->next)
+#define Proc_Next(x) ((x)->proc_next)
+
+bodyptr ubody_alloc(int p)
+{ 
+  register bodyptr tmp;
+  tmp = (bodyptr)malloc(sizeof(body));
+
+  Type(tmp) = BODY;
+  Proc(tmp) = p;
+  Proc_Next(tmp) = NULL;
+  New_Proc(tmp) = p;
+  return tmp;
+}
+
+int main(int argc, char** argv) {
+  bodyptr b = ubody_alloc(17);
+  return 0;
+}
diff --git a/final/test/FrontendC/2007-03-06-VarSizeInStruct1.c b/final/test/FrontendC/2007-03-06-VarSizeInStruct1.c
new file mode 100644
index 00000000000..b4ae5654931
--- /dev/null
+++ b/final/test/FrontendC/2007-03-06-VarSizeInStruct1.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc %s -w -S -o -
+void* p (int n) {
+  struct f {
+    char w; char x[n]; char z[];
+  } F;
+  F.x[0]='x';
+  return &F;
+}
diff --git a/final/test/FrontendC/2007-03-06-VarSizeInStruct2.c b/final/test/FrontendC/2007-03-06-VarSizeInStruct2.c
new file mode 100644
index 00000000000..13bc3aaf9ae
--- /dev/null
+++ b/final/test/FrontendC/2007-03-06-VarSizeInStruct2.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc %s -S -o -
+char p (int n) {
+  struct f {
+    char w; char x[n]; char y[n];
+  } F;
+
+  return F.x[0];
+}
diff --git a/final/test/FrontendC/2007-03-26-BitfieldAfterZeroWidth.c b/final/test/FrontendC/2007-03-26-BitfieldAfterZeroWidth.c
new file mode 100644
index 00000000000..9b6a8690a33
--- /dev/null
+++ b/final/test/FrontendC/2007-03-26-BitfieldAfterZeroWidth.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc %s -S -o -
+struct W {};
+struct Y {
+  struct W w;
+  int i:1;
+} __attribute__ ((packed)) y;
diff --git a/final/test/FrontendC/2007-03-26-ZeroWidthBitfield.c b/final/test/FrontendC/2007-03-26-ZeroWidthBitfield.c
new file mode 100644
index 00000000000..89bfb8e1cb0
--- /dev/null
+++ b/final/test/FrontendC/2007-03-26-ZeroWidthBitfield.c
@@ -0,0 +1,2 @@
+// RUN: %llvmgcc %s -S -o -
+struct Z { int :0; } z;
diff --git a/final/test/FrontendC/2007-03-27-ArrayCompatible.c b/final/test/FrontendC/2007-03-27-ArrayCompatible.c
new file mode 100644
index 00000000000..fa3d2db23cc
--- /dev/null
+++ b/final/test/FrontendC/2007-03-27-ArrayCompatible.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -S %s -O2 -o - | grep {ret i8 0}
+static char c(int n) {
+  char x[2][n];
+  x[1][0]=0;
+  return *(n+(char *)x);
+}
+
+char d(void) {
+  return c(2);
+}
diff --git a/final/test/FrontendC/2007-03-27-VarLengthArray.c b/final/test/FrontendC/2007-03-27-VarLengthArray.c
new file mode 100644
index 00000000000..b555690068d
--- /dev/null
+++ b/final/test/FrontendC/2007-03-27-VarLengthArray.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S %s -o - | grep {getelementptr inbounds \\\[0 x i32\\\]}
+extern void f(int *);
+int e(int m, int n) {
+  int x[n];
+  f(x);
+  return x[m];
+}
diff --git a/final/test/FrontendC/2007-04-05-PackedBitFields-2.c b/final/test/FrontendC/2007-04-05-PackedBitFields-2.c
new file mode 100644
index 00000000000..d9db4206c16
--- /dev/null
+++ b/final/test/FrontendC/2007-04-05-PackedBitFields-2.c
@@ -0,0 +1,16 @@
+// RUN: %llvmgcc %s -S -o -
+
+# define pck __attribute__((packed))
+
+
+struct pck F { 
+  unsigned long long i : 12, 
+    j : 23, 
+    k : 27, 
+    l; 
+}; 
+struct F f1;
+
+void foo() {
+	f1.l = 5;
+}
diff --git a/final/test/FrontendC/2007-04-05-PackedBitFields.c b/final/test/FrontendC/2007-04-05-PackedBitFields.c
new file mode 100644
index 00000000000..f9de35639b0
--- /dev/null
+++ b/final/test/FrontendC/2007-04-05-PackedBitFields.c
@@ -0,0 +1,16 @@
+// RUN: %llvmgcc %s -S -o -
+
+# define pck __attribute__((packed))
+
+
+struct pck E { 
+  unsigned long long l, 
+    i : 12, 
+    j : 23, 
+    k : 29; };
+
+struct E e1;
+
+void foo() {
+	e1.k = 5;
+}
diff --git a/final/test/FrontendC/2007-04-05-PackedStruct.c b/final/test/FrontendC/2007-04-05-PackedStruct.c
new file mode 100644
index 00000000000..0d524c489ea
--- /dev/null
+++ b/final/test/FrontendC/2007-04-05-PackedStruct.c
@@ -0,0 +1,18 @@
+// RUN: %llvmgcc %s -S -o -
+
+#pragma pack(push, 2)
+
+enum {
+  tA = 0,
+  tB = 1
+};
+
+struct MyStruct {
+  unsigned long A;
+  char C;
+  void * B;
+};
+
+void bar(){
+struct MyStruct MS = { tB, 0 };
+}
diff --git a/final/test/FrontendC/2007-04-05-PadBeforeZeroLengthField.c b/final/test/FrontendC/2007-04-05-PadBeforeZeroLengthField.c
new file mode 100644
index 00000000000..acc38219925
--- /dev/null
+++ b/final/test/FrontendC/2007-04-05-PadBeforeZeroLengthField.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc %s -S -o -
+struct c__ { unsigned int type:4; };
+union A { struct c__ c;  } __attribute__((aligned(8)));
+struct B {
+    unsigned int retainCount;
+    union A objects[];
+};
+void foo(union A * objects, struct B *array, unsigned long k)
+{  array->objects[k] = objects[k]; }
diff --git a/final/test/FrontendC/2007-04-05-UnPackedStruct.c b/final/test/FrontendC/2007-04-05-UnPackedStruct.c
new file mode 100644
index 00000000000..9e168ed34fe
--- /dev/null
+++ b/final/test/FrontendC/2007-04-05-UnPackedStruct.c
@@ -0,0 +1,16 @@
+// RUN: %llvmgcc %s -S -o -
+
+
+enum {
+  tA = 0,
+  tB = 1
+};
+
+struct MyStruct {
+  unsigned long A;
+  void * B;
+};
+
+void bar(){
+struct MyStruct MS = { tB, 0 };
+}
diff --git a/final/test/FrontendC/2007-04-11-InlineAsmStruct.c b/final/test/FrontendC/2007-04-11-InlineAsmStruct.c
new file mode 100644
index 00000000000..6c6c1509903
--- /dev/null
+++ b/final/test/FrontendC/2007-04-11-InlineAsmStruct.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc %s -S -o - | llc
+
+struct V { short X, Y; };
+int bar() {
+  struct V bar;
+  __asm__ volatile("foo %0\n" : "=r"(bar));
+  return bar.X;
+}
+
diff --git a/final/test/FrontendC/2007-04-11-InlineAsmUnion.c b/final/test/FrontendC/2007-04-11-InlineAsmUnion.c
new file mode 100644
index 00000000000..014470102d3
--- /dev/null
+++ b/final/test/FrontendC/2007-04-11-InlineAsmUnion.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc %s -S -o - | llc
+
+union U { int x; float p; };
+void foo() {
+  union U bar;
+  __asm__ volatile("foo %0\n" : "=r"(bar));
+}
diff --git a/final/test/FrontendC/2007-04-11-InlineStorageClassC89.c b/final/test/FrontendC/2007-04-11-InlineStorageClassC89.c
new file mode 100644
index 00000000000..834fb07a262
--- /dev/null
+++ b/final/test/FrontendC/2007-04-11-InlineStorageClassC89.c
@@ -0,0 +1,46 @@
+// RUN: %llvmgcc %s -S -O0 -o - | grep define | grep xglobWeak | \
+// RUN:   grep weak | count 1
+// RUN: %llvmgcc %s -S -O0 -o - | grep define | grep xextWeak | \
+// RUN:   grep weak | count 1
+// RUN: %llvmgcc %s -S -O0 -o - | grep define | \
+// RUN:   grep xWeaknoinline | grep weak | count 1
+// RUN: %llvmgcc %s -S -O0 -o - | grep define | \
+// RUN:   grep xWeakextnoinline | grep weak | count 1
+// RUN: %llvmgcc %s -S -O0 -o - | grep define | \
+// RUN:   grep xglobnoWeak | grep -v internal | grep -v weak | \
+// RUN:   grep -v linkonce | count 1
+// RUN: %llvmgcc %s -S -O0 -o - | grep define | \
+// RUN:   grep xstatnoWeak | grep internal | count 1
+// RUN: %llvmgcc %s -S -O0 -o - | grep define | \
+// RUN:   grep xextnoWeak | grep available_externally | grep -v weak | \
+// RUN:   grep -v linkonce | count 1
+inline int xglobWeak(int) __attribute__((weak));
+inline int xglobWeak (int i) {
+  return i*2;
+}
+inline int xextWeak(int) __attribute__((weak));
+extern  inline int xextWeak (int i) {
+  return i*4;
+}
+int xWeaknoinline(int) __attribute__((weak));
+int xWeaknoinline(int i) {
+  return i*8;
+}
+int xWeakextnoinline(int) __attribute__((weak));
+extern int xWeakextnoinline(int i) {
+  return i*16;
+}
+inline int xglobnoWeak (int i) {
+  return i*32;
+}
+static inline int xstatnoWeak (int i) {
+  return i*64;
+}
+extern  inline int xextnoWeak (int i) {
+  return i*128;
+}
+int j(int y) {
+  return xglobnoWeak(y)+xstatnoWeak(y)+xextnoWeak(y)+
+        xglobWeak(y)+xextWeak(y)+
+        xWeakextnoinline(y)+xWeaknoinline(y);
+}
diff --git a/final/test/FrontendC/2007-04-11-InlineStorageClassC99.c b/final/test/FrontendC/2007-04-11-InlineStorageClassC99.c
new file mode 100644
index 00000000000..6031071e346
--- /dev/null
+++ b/final/test/FrontendC/2007-04-11-InlineStorageClassC99.c
@@ -0,0 +1,46 @@
+// RUN: %llvmgcc -std=c99 %s -S -O0 -o - | grep declare | \
+// RUN:   grep xglobWeak | grep extern_weak | count 1
+// RUN: %llvmgcc -std=c99 %s -S -O0 -o - | grep define | \
+// RUN:   grep xextWeak | grep weak | count 1
+// RUN: %llvmgcc -std=c99 %s -S -O0 -o - | grep define | \
+// RUN:   grep xWeaknoinline | grep weak | count 1
+// RUN: %llvmgcc -std=c99 %s -S -O0 -o - | grep define | \
+// RUN:   grep xWeakextnoinline | grep weak | count 1
+// RUN: %llvmgcc -std=c99 %s -S -O0 -o - | grep define | \
+// RUN:   grep xglobnoWeak | grep available_externally | grep -v weak | \
+// RUN:   grep -v linkonce | count 1
+// RUN: %llvmgcc -std=c99 %s -S -O0 -o - | grep define | \
+// RUN:   grep xstatnoWeak | grep internal | count 1
+// RUN: %llvmgcc -std=c99 %s -S -O0 -o - | grep define | \
+// RUN:   grep xextnoWeak | grep -v available_externally | grep -v weak | \
+// RUN:   grep -v linkonce | count 1
+inline int xglobWeak(int) __attribute__((weak));
+inline int xglobWeak (int i) {
+  return i*2;
+}
+inline int xextWeak(int) __attribute__((weak));
+extern  inline int xextWeak (int i) {
+  return i*4;
+}
+int xWeaknoinline(int) __attribute__((weak));
+int xWeaknoinline(int i) {
+  return i*8;
+}
+int xWeakextnoinline(int) __attribute__((weak));
+extern int xWeakextnoinline(int i) {
+  return i*16;
+}
+inline int xglobnoWeak (int i) {
+  return i*32;
+}
+static inline int xstatnoWeak (int i) {
+  return i*64;
+}
+extern  inline int xextnoWeak (int i) {
+  return i*128;
+}
+int j(int y) {
+  return xglobnoWeak(y)+xstatnoWeak(y)+xextnoWeak(y)+
+        xglobWeak(y)+xextWeak(y)+
+        xWeakextnoinline(y)+xWeaknoinline(y);
+}
diff --git a/final/test/FrontendC/2007-04-11-PR1321.c b/final/test/FrontendC/2007-04-11-PR1321.c
new file mode 100644
index 00000000000..f391329a0f1
--- /dev/null
+++ b/final/test/FrontendC/2007-04-11-PR1321.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc %s -S -o /dev/null
+
+struct X {
+  unsigned int e0 : 17;
+  unsigned int e1 : 17;
+  unsigned int e2 : 17;
+  unsigned int e3 : 17;
+  unsigned int e4 : 17;
+  unsigned int e5 : 17;
+  unsigned int e6 : 17;
+  unsigned int e7 : 17;
+} __attribute__((packed)) x;
diff --git a/final/test/FrontendC/2007-04-13-InlineAsmStruct2.c b/final/test/FrontendC/2007-04-13-InlineAsmStruct2.c
new file mode 100644
index 00000000000..44ddeb3f95d
--- /dev/null
+++ b/final/test/FrontendC/2007-04-13-InlineAsmStruct2.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc %s -S -o - | grep {call void asm}
+
+struct V { short X, Y; };
+int bar() {
+  struct V bar;
+  __asm__ volatile("foo %0\n" :: "r"(bar));
+  return bar.X;
+}
+
diff --git a/final/test/FrontendC/2007-04-13-InlineAsmUnion2.c b/final/test/FrontendC/2007-04-13-InlineAsmUnion2.c
new file mode 100644
index 00000000000..a0944a7b640
--- /dev/null
+++ b/final/test/FrontendC/2007-04-13-InlineAsmUnion2.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc %s -S -o - | grep {call void asm}
+
+union U { int x; char* p; };
+void foo() {
+  union U bar;
+  __asm__ volatile("foo %0\n" :: "r"(bar));
+}
diff --git a/final/test/FrontendC/2007-04-14-FNoBuiltin.c b/final/test/FrontendC/2007-04-14-FNoBuiltin.c
new file mode 100644
index 00000000000..88bf0e01430
--- /dev/null
+++ b/final/test/FrontendC/2007-04-14-FNoBuiltin.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S %s -O2 -fno-builtin -o - | grep call.*printf
+// Check that -fno-builtin is honored.
+
+extern int printf(const char*, ...);
+void foo(const char *msg) {
+	printf("%s\n",msg);
+}
diff --git a/final/test/FrontendC/2007-04-17-ZeroSizeBitFields.c b/final/test/FrontendC/2007-04-17-ZeroSizeBitFields.c
new file mode 100644
index 00000000000..ec7b7ea273f
--- /dev/null
+++ b/final/test/FrontendC/2007-04-17-ZeroSizeBitFields.c
@@ -0,0 +1,4 @@
+// PR 1332
+// RUN: %llvmgcc %s -S -o /dev/null
+
+struct Z { int a:1; int :0; int c:1; } z;
diff --git a/final/test/FrontendC/2007-04-24-VolatileStructCopy.c b/final/test/FrontendC/2007-04-24-VolatileStructCopy.c
new file mode 100644
index 00000000000..d49e75e0254
--- /dev/null
+++ b/final/test/FrontendC/2007-04-24-VolatileStructCopy.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -O3 -S -o - %s | grep {volatile store}
+// PR1352
+
+struct foo {
+  int x;
+};
+
+void copy(volatile struct foo *p, struct foo *q) {
+  *p = *q;
+}
diff --git a/final/test/FrontendC/2007-04-24-bit-not-expr.c b/final/test/FrontendC/2007-04-24-bit-not-expr.c
new file mode 100644
index 00000000000..fab0b90bb15
--- /dev/null
+++ b/final/test/FrontendC/2007-04-24-bit-not-expr.c
@@ -0,0 +1,7 @@
+// PR 1346
+// RUN: %llvmgcc -S %s  -o /dev/null
+extern bar(void *);
+
+void f(void *cd) {
+  bar(((void *)((unsigned long)(cd) ^ -1)));
+}
diff --git a/final/test/FrontendC/2007-04-24-str-const.c b/final/test/FrontendC/2007-04-24-str-const.c
new file mode 100644
index 00000000000..3c3dab372ab
--- /dev/null
+++ b/final/test/FrontendC/2007-04-24-str-const.c
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc -S %s  -o /dev/null
+static char *str;
+
+static const struct {
+ const char *name;
+ unsigned type;
+} scan_special[] = {
+ {"shift", 1},
+ {0, 0}
+};
+
+static void
+sb(void)
+{
+ while (*str == ' ' || *str == '\t')
+  str++;
+}
diff --git a/final/test/FrontendC/2007-05-07-NestedStructReturn.c b/final/test/FrontendC/2007-05-07-NestedStructReturn.c
new file mode 100644
index 00000000000..aea58e3ae8b
--- /dev/null
+++ b/final/test/FrontendC/2007-05-07-NestedStructReturn.c
@@ -0,0 +1,13 @@
+// RUN: %llvmgcc %s -S -fnested-functions -o - | grep {sret *%agg.result}
+
+struct X { long m, n, o, p; };
+
+struct X p(int n) {
+  struct X c(int m) {
+    struct X x;
+    x.m = m;
+    x.n = n;
+    return x;
+  }
+  return c(n);
+}
diff --git a/final/test/FrontendC/2007-05-07-PaddingElements.c b/final/test/FrontendC/2007-05-07-PaddingElements.c
new file mode 100644
index 00000000000..1e4f4d0a751
--- /dev/null
+++ b/final/test/FrontendC/2007-05-07-PaddingElements.c
@@ -0,0 +1,12 @@
+// PR 1278
+// RUN: %llvmgcc %s -S -O0 -o - | grep {struct.s} | not grep "4 x i8] zeroinitializer"
+// RUN: %llvmgcc %s -S -O0 -o - | not grep "i32 0, i32 2"
+struct s {
+  double d1;
+  int s1;
+};
+
+struct s foo(void) {
+  struct s S = {1.1, 2};
+  return S;
+}
diff --git a/final/test/FrontendC/2007-05-08-PCH.c b/final/test/FrontendC/2007-05-08-PCH.c
new file mode 100644
index 00000000000..aa277ece99e
--- /dev/null
+++ b/final/test/FrontendC/2007-05-08-PCH.c
@@ -0,0 +1,7 @@
+// PR 1400
+// RUN: %llvmgcc -x c-header %s -o /dev/null
+
+int main() {
+  return 0;
+}
+
diff --git a/final/test/FrontendC/2007-05-11-str-const.c b/final/test/FrontendC/2007-05-11-str-const.c
new file mode 100644
index 00000000000..46a74c19e01
--- /dev/null
+++ b/final/test/FrontendC/2007-05-11-str-const.c
@@ -0,0 +1,5 @@
+// RUN: %llvmgcc -S -g %s  -o /dev/null
+
+static unsigned char out[]={0,1};
+static const unsigned char str1[]="1";
+
diff --git a/final/test/FrontendC/2007-05-15-PaddingElement.c b/final/test/FrontendC/2007-05-15-PaddingElement.c
new file mode 100644
index 00000000000..bad6a11dae8
--- /dev/null
+++ b/final/test/FrontendC/2007-05-15-PaddingElement.c
@@ -0,0 +1,23 @@
+// PR 1419
+
+// RUN: %llvmgcc -xc  -O2 %s -S -o - | grep "ret i32 1"
+struct A {
+  short x;
+  long long :0;
+};
+
+struct B {
+  char a;
+  char b;
+  unsigned char i;
+};
+
+union X { struct A a; struct B b; };
+
+int check(void) {
+  union X x, y;
+
+  y.b.i = 0xff;
+  x = y;
+  return (x.b.i == 0xff);
+}
diff --git a/final/test/FrontendC/2007-05-16-EmptyStruct.c b/final/test/FrontendC/2007-05-16-EmptyStruct.c
new file mode 100644
index 00000000000..7b2ab61bcca
--- /dev/null
+++ b/final/test/FrontendC/2007-05-16-EmptyStruct.c
@@ -0,0 +1,5 @@
+// PR 1417
+
+// RUN: %llvmgcc -xc  %s -S -o - | grep "struct.anon = type \{\}"
+
+struct { } *X;
diff --git a/final/test/FrontendC/2007-05-29-UnionCopy.c b/final/test/FrontendC/2007-05-29-UnionCopy.c
new file mode 100644
index 00000000000..95ab388c842
--- /dev/null
+++ b/final/test/FrontendC/2007-05-29-UnionCopy.c
@@ -0,0 +1,18 @@
+// RUN: %llvmgcc -S -o - %s | grep memcpy
+// PR1421
+
+struct A {
+  char c;
+  int i;
+};
+
+struct B {
+  int c;
+  unsigned char x;
+};
+
+union U { struct A a; struct B b; };
+
+void check(union U *u, union U *v) {
+  *u = *v;
+}
diff --git a/final/test/FrontendC/2007-06-05-NoInlineAttribute.c b/final/test/FrontendC/2007-06-05-NoInlineAttribute.c
new file mode 100644
index 00000000000..9543538fb1b
--- /dev/null
+++ b/final/test/FrontendC/2007-06-05-NoInlineAttribute.c
@@ -0,0 +1,13 @@
+// RUN: %llvmgcc -O2 -S %s -o - | grep call
+
+static int bar(int x, int y) __attribute__((noinline));
+
+static int bar(int x, int y)  
+{
+ return x + y;
+}
+
+int foo(int a, int b) {
+ return  bar(b, a);
+}
+
diff --git a/final/test/FrontendC/2007-06-15-AnnotateAttribute.c b/final/test/FrontendC/2007-06-15-AnnotateAttribute.c
new file mode 100644
index 00000000000..115c3f73b90
--- /dev/null
+++ b/final/test/FrontendC/2007-06-15-AnnotateAttribute.c
@@ -0,0 +1,24 @@
+// RUN: %llvmgcc -S %s -o - | grep llvm.global.annotations
+// RUN: %llvmgcc -S %s -o - | grep llvm.var.annotation | count 3 
+
+#include <stdio.h>
+
+/* Global variable with attribute */
+int X __attribute__((annotate("GlobalValAnnotation")));
+
+/* Function with attribute */
+int foo(int y) __attribute__((annotate("GlobalValAnnotation"))) 
+               __attribute__((noinline));
+
+int foo(int y __attribute__((annotate("LocalValAnnotation")))) {
+  int x __attribute__((annotate("LocalValAnnotation")));
+  x = 34;
+  return y + x;
+} 
+
+int main() {
+  static int a __attribute__((annotate("GlobalValAnnotation")));
+  a = foo(2);
+  printf("hello world%d\n", a);
+  return 0;
+}
diff --git a/final/test/FrontendC/2007-06-18-SextAttrAggregate.c b/final/test/FrontendC/2007-06-18-SextAttrAggregate.c
new file mode 100644
index 00000000000..c395db220dc
--- /dev/null
+++ b/final/test/FrontendC/2007-06-18-SextAttrAggregate.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc %s -o - -S -O3 | grep {i8 signext}
+// PR1513
+
+struct s{
+long a;
+long b;
+};
+
+void f(struct s a, char *b, signed char C) {
+
+}
diff --git a/final/test/FrontendC/2007-07-29-RestrictPtrArg.c b/final/test/FrontendC/2007-07-29-RestrictPtrArg.c
new file mode 100644
index 00000000000..5925d972b26
--- /dev/null
+++ b/final/test/FrontendC/2007-07-29-RestrictPtrArg.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S %s -o - | grep noalias
+
+void foo(int * __restrict myptr1, int * myptr2) {
+  myptr1[0] = 0;
+  myptr2[0] = 0;
+}
diff --git a/final/test/FrontendC/2007-08-01-LoadStoreAlign.c b/final/test/FrontendC/2007-08-01-LoadStoreAlign.c
new file mode 100644
index 00000000000..5365c06c257
--- /dev/null
+++ b/final/test/FrontendC/2007-08-01-LoadStoreAlign.c
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc -O3 -S -o - %s | grep {align 1} | count 2
+// RUN: %llvmgcc -O3 -S -o - %s | llc
+
+struct p {
+  char a;
+  int b;
+} __attribute__ ((packed));
+
+struct p t = { 1, 10 };
+struct p u;
+
+int main () {
+  int tmp = t.b;
+  u.b = tmp;
+  return tmp;
+
+}
diff --git a/final/test/FrontendC/2007-08-21-ComplexCst.c b/final/test/FrontendC/2007-08-21-ComplexCst.c
new file mode 100644
index 00000000000..ebdee14bba3
--- /dev/null
+++ b/final/test/FrontendC/2007-08-21-ComplexCst.c
@@ -0,0 +1,3 @@
+// RUN: %llvmgcc -O2 -S %s -o /dev/null
+void f(_Complex float z);
+void g() { f(1.0i); }
diff --git a/final/test/FrontendC/2007-08-22-CTTZ.c b/final/test/FrontendC/2007-08-22-CTTZ.c
new file mode 100644
index 00000000000..9e74f24cdcb
--- /dev/null
+++ b/final/test/FrontendC/2007-08-22-CTTZ.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -O2 -S -o - %s | grep {llvm.cttz.i64} | count 2
+// RUN: %llvmgcc -O2 -S -o - %s | not grep {lshr}
+
+int bork(unsigned long long x) {
+  return __builtin_ctzll(x);
+}
diff --git a/final/test/FrontendC/2007-09-05-ConstCtor.c b/final/test/FrontendC/2007-09-05-ConstCtor.c
new file mode 100644
index 00000000000..adae4a69b10
--- /dev/null
+++ b/final/test/FrontendC/2007-09-05-ConstCtor.c
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc -xc -Os -S %s -o /dev/null
+// PR1641
+
+struct A {
+  unsigned long l;
+};
+
+void bar(struct A *a);
+
+void bork() {
+  const unsigned long vcgt = 1234;
+  struct A a = { vcgt };
+  bar(&a);
+}
diff --git a/final/test/FrontendC/2007-09-12-PragmaPack.c b/final/test/FrontendC/2007-09-12-PragmaPack.c
new file mode 100644
index 00000000000..4fc7f48be01
--- /dev/null
+++ b/final/test/FrontendC/2007-09-12-PragmaPack.c
@@ -0,0 +1,30 @@
+// RUN: %llvmgcc -O3 -S -o - %s | grep {18}
+
+#include <stdint.h>
+
+#pragma pack(push, 1)
+typedef struct
+{
+        uint32_t        a;
+} foo;
+
+typedef struct {
+        uint8_t         major;
+        uint8_t         minor;
+        uint16_t        build;
+} VERSION;
+
+typedef struct {
+        uint8_t       a[5];
+        VERSION       version;
+        uint8_t       b;
+        foo           d;
+        uint32_t      guard;
+} bar;
+#pragma pack(pop)
+
+
+unsigned barsize(void) {
+  return sizeof(bar);
+}
+
diff --git a/final/test/FrontendC/2007-09-14-NegatePointer.c b/final/test/FrontendC/2007-09-14-NegatePointer.c
new file mode 100644
index 00000000000..cb49e46ddb1
--- /dev/null
+++ b/final/test/FrontendC/2007-09-14-NegatePointer.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S %s -o - 
+// PR1662
+
+int foo(unsigned char *test) {
+  return 0U - (unsigned int )test;
+}
+
diff --git a/final/test/FrontendC/2007-09-17-WeakRef.c b/final/test/FrontendC/2007-09-17-WeakRef.c
new file mode 100644
index 00000000000..6c420ea38a7
--- /dev/null
+++ b/final/test/FrontendC/2007-09-17-WeakRef.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -O1 -S %s -o - | grep icmp
+// PR1678
+
+extern void B (void);
+static __typeof(B) A __attribute__ ((__weakref__("B")));
+int active (void)
+{
+  static void *const p = __extension__ (void *) &A;
+  return p != 0;
+}
diff --git a/final/test/FrontendC/2007-09-20-GcrootAttribute.c b/final/test/FrontendC/2007-09-20-GcrootAttribute.c
new file mode 100644
index 00000000000..b67b474c4c1
--- /dev/null
+++ b/final/test/FrontendC/2007-09-20-GcrootAttribute.c
@@ -0,0 +1,29 @@
+// RUN: %llvmgcc -S %s -o - | grep llvm.gcroot
+// RUN: %llvmgcc -S %s -o - | grep llvm.gcroot | count 6
+// RUN: %llvmgcc -S %s -o - | llvm-as
+
+typedef struct foo_s
+{
+  int a;
+} foo, __attribute__ ((gcroot)) *foo_p;
+
+foo my_foo;
+
+int alpha ()
+{
+  foo my_foo2 = my_foo;
+  
+  return my_foo2.a;
+}
+
+int bar (foo a)
+{
+  foo_p b;
+  return b->a;
+}
+
+foo_p baz (foo_p a, foo_p b, foo_p *c)
+{
+  a = b = *c;
+  return a;
+}
diff --git a/final/test/FrontendC/2007-09-26-Alignment.c b/final/test/FrontendC/2007-09-26-Alignment.c
new file mode 100644
index 00000000000..1638fed0587
--- /dev/null
+++ b/final/test/FrontendC/2007-09-26-Alignment.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S %s -o - | grep {align 16}
+extern p(int *);
+int q(void) {
+  int x __attribute__ ((aligned (16)));
+  p(&x);
+  return x;
+}
diff --git a/final/test/FrontendC/2007-09-27-ComplexIntCompare.c b/final/test/FrontendC/2007-09-27-ComplexIntCompare.c
new file mode 100644
index 00000000000..50626e548c6
--- /dev/null
+++ b/final/test/FrontendC/2007-09-27-ComplexIntCompare.c
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc -S %s -o -  
+// PR1708
+
+#include <stdlib.h>
+
+struct s { _Complex unsigned short x; };
+struct s gs = { 100 + 200i };
+struct s __attribute__((noinline)) foo (void) { return gs; }
+
+int main ()
+{
+  if (foo ().x != gs.x)
+    abort ();
+  exit (0);
+}
+
+
diff --git a/final/test/FrontendC/2007-09-28-PackedUnionMember.c b/final/test/FrontendC/2007-09-28-PackedUnionMember.c
new file mode 100644
index 00000000000..79f48ceaeea
--- /dev/null
+++ b/final/test/FrontendC/2007-09-28-PackedUnionMember.c
@@ -0,0 +1,38 @@
+// RUN: %llvmgcc %s -S -o -
+
+#pragma pack(push, 2)
+struct H {
+  unsigned long f1;
+  unsigned long f2;
+  union {
+    struct opaque1 *f3;
+    struct opaque2 *f4;
+    struct {
+      struct opaque3 *f5;
+      unsigned short  f6;
+    } f7;
+  } f8;
+};
+#pragma pack(pop)
+
+struct E {
+  unsigned long f1;
+  unsigned long f2;
+};
+
+typedef long (*FuncPtr) ();
+
+extern long bork(FuncPtr handler, const struct E *list);
+
+static long hndlr()
+{
+  struct H cmd = { 4, 412 };
+  return 0;
+}
+void foo(void *inWindow) {
+  static const struct E events[] = {
+    { 123124, 1 }
+  };
+  bork(hndlr, events);
+}
+
diff --git a/final/test/FrontendC/2007-10-01-BuildArrayRef.c b/final/test/FrontendC/2007-10-01-BuildArrayRef.c
new file mode 100644
index 00000000000..e87a5b63054
--- /dev/null
+++ b/final/test/FrontendC/2007-10-01-BuildArrayRef.c
@@ -0,0 +1,20 @@
+// RUN: not %llvmgcc_only -c %s -o /dev/null |& FileCheck %s
+// PR 1603
+void func()
+{
+   const int *arr;
+   arr[0] = 1;  // CHECK: error: assignment of read-only location
+}
+
+struct foo {
+  int bar;
+};
+struct foo sfoo = { 0 };
+
+int func2()
+{
+  const struct foo *fp;
+  fp = &sfoo;
+  fp[0].bar = 1;  // CHECK: error: assignment of read-only member 'bar'
+  return sfoo.bar;
+}
diff --git a/final/test/FrontendC/2007-10-02-VolatileArray.c b/final/test/FrontendC/2007-10-02-VolatileArray.c
new file mode 100644
index 00000000000..7e8bf24a84b
--- /dev/null
+++ b/final/test/FrontendC/2007-10-02-VolatileArray.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S %s -o - | grep volatile
+// PR1647
+
+void foo(volatile int *p)
+{
+p[0] = 0;
+}
diff --git a/final/test/FrontendC/2007-10-15-VoidPtr.c b/final/test/FrontendC/2007-10-15-VoidPtr.c
new file mode 100644
index 00000000000..c5948b93e16
--- /dev/null
+++ b/final/test/FrontendC/2007-10-15-VoidPtr.c
@@ -0,0 +1,4 @@
+// RUN: %llvmgcc -S %s -o /dev/null
+void bork(void **data) {
+  (*(unsigned short *) (&(data[37])[927]) = 0);
+}
diff --git a/final/test/FrontendC/2007-10-30-Volatile.c b/final/test/FrontendC/2007-10-30-Volatile.c
new file mode 100644
index 00000000000..7a75b05d5d2
--- /dev/null
+++ b/final/test/FrontendC/2007-10-30-Volatile.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S %s -o /dev/null -Wall -Werror
+void bork() {
+  char * volatile p;
+  volatile int cc;
+  p += cc;
+}
diff --git a/final/test/FrontendC/2007-11-07-AlignedMemcpy.c b/final/test/FrontendC/2007-11-07-AlignedMemcpy.c
new file mode 100644
index 00000000000..eb9d22c6252
--- /dev/null
+++ b/final/test/FrontendC/2007-11-07-AlignedMemcpy.c
@@ -0,0 +1,4 @@
+// RUN: %llvmgcc -S %s -o /dev/null
+void bork() {
+  int Qux[33] = {0};
+}
diff --git a/final/test/FrontendC/2007-11-07-CopyAggregateAlign.c b/final/test/FrontendC/2007-11-07-CopyAggregateAlign.c
new file mode 100644
index 00000000000..8bd94b00a78
--- /dev/null
+++ b/final/test/FrontendC/2007-11-07-CopyAggregateAlign.c
@@ -0,0 +1,3 @@
+// RUN: %llvmgcc -S %s -o - | grep "align 2" | count 6
+struct A { char s, t, u, v; short a; };
+void q() { struct A a, b; a = b; }
diff --git a/final/test/FrontendC/2007-11-07-ZeroAggregateAlign.c b/final/test/FrontendC/2007-11-07-ZeroAggregateAlign.c
new file mode 100644
index 00000000000..424120d6c21
--- /dev/null
+++ b/final/test/FrontendC/2007-11-07-ZeroAggregateAlign.c
@@ -0,0 +1,3 @@
+// RUN: %llvmgcc -S %s -o - | grep "align 2"
+struct A { short s; short t; int i; };
+void q() { struct A a = {0}; }
diff --git a/final/test/FrontendC/2007-11-27-SExtZExt.c b/final/test/FrontendC/2007-11-27-SExtZExt.c
new file mode 100644
index 00000000000..8ea4786af36
--- /dev/null
+++ b/final/test/FrontendC/2007-11-27-SExtZExt.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc -S %s -o - | grep "signext" | count 4
+
+signed char foo1() { return 1; }
+
+void foo2(signed short a) { }
+
+signed char foo3(void) { return 1; }
+
+void foo4(a) signed short a; { }
+
+
+
diff --git a/final/test/FrontendC/2007-11-28-GlobalInitializer.c b/final/test/FrontendC/2007-11-28-GlobalInitializer.c
new file mode 100644
index 00000000000..c8c7a594d0b
--- /dev/null
+++ b/final/test/FrontendC/2007-11-28-GlobalInitializer.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -S %s -o - 
+// PR1744
+typedef struct foo { int x; char *p; } FOO;
+extern FOO yy[];
+
+int *y = &((yy + 1)->x);
+void *z = &((yy + 1)->x);
+
diff --git a/final/test/FrontendC/2007-12-16-AsmNoUnwind.c b/final/test/FrontendC/2007-12-16-AsmNoUnwind.c
new file mode 100644
index 00000000000..b080e6a511e
--- /dev/null
+++ b/final/test/FrontendC/2007-12-16-AsmNoUnwind.c
@@ -0,0 +1,3 @@
+// RUN: %llvmgcc %s -S -o - | grep nounwind
+
+void bar() { asm (""); }
diff --git a/final/test/FrontendC/2007-12-VarArrayDebug.c b/final/test/FrontendC/2007-12-VarArrayDebug.c
new file mode 100644
index 00000000000..966789eef30
--- /dev/null
+++ b/final/test/FrontendC/2007-12-VarArrayDebug.c
@@ -0,0 +1,18 @@
+// RUN: %llvmgcc -S -g -O %s -o - | llc
+// RUN: %llvmgcc -S -g %s -o - | llc
+
+extern void foo (void);
+
+static
+void baz (int i)
+{
+  foo ();
+  typedef char A[i];
+  struct { A b; } *x = 0;
+}
+
+void
+bar (i)
+{
+  baz (i);
+}
diff --git a/final/test/FrontendC/2008-01-04-WideBitfield.c b/final/test/FrontendC/2008-01-04-WideBitfield.c
new file mode 100644
index 00000000000..a0045a40276
--- /dev/null
+++ b/final/test/FrontendC/2008-01-04-WideBitfield.c
@@ -0,0 +1,13 @@
+// RUN: %llvmgcc -S -o - %s
+// PR1386
+#include <stdint.h>
+
+struct X {
+  unsigned char pad : 4;
+  uint64_t a : 64;
+} __attribute__((packed)) x;
+
+uint64_t f(void)
+{
+  return x.a;
+}
diff --git a/final/test/FrontendC/2008-01-07-UnusualIntSize.c b/final/test/FrontendC/2008-01-07-UnusualIntSize.c
new file mode 100644
index 00000000000..91beaf3528d
--- /dev/null
+++ b/final/test/FrontendC/2008-01-07-UnusualIntSize.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc %s -S -o - -O | grep i33
+// PR1721
+
+struct s {
+  unsigned long long u33: 33;
+} a, b;
+
+// This should turn into a real 33-bit add, not a 64-bit add.
+_Bool test(void) {
+  return a.u33 + b.u33 != 0;
+}
diff --git a/final/test/FrontendC/2008-01-11-ChainConsistency.c b/final/test/FrontendC/2008-01-11-ChainConsistency.c
new file mode 100644
index 00000000000..13e48a34ac5
--- /dev/null
+++ b/final/test/FrontendC/2008-01-11-ChainConsistency.c
@@ -0,0 +1,3 @@
+// RUN: %llvmgcc -S %s -o - -fnested-functions | not grep nest
+
+void n1(void) { void a(void) { a(); } a(); }
diff --git a/final/test/FrontendC/2008-01-21-PackedBitFields.c b/final/test/FrontendC/2008-01-21-PackedBitFields.c
new file mode 100644
index 00000000000..4c38dee408c
--- /dev/null
+++ b/final/test/FrontendC/2008-01-21-PackedBitFields.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc %s -S -o -
+
+typedef double Al1Double __attribute__((aligned(1)));
+struct x { int a:23; Al1Double v; };
+struct x X = { 5, 3.0 };
+double foo() { return X.v; }
+
diff --git a/final/test/FrontendC/2008-01-21-PackedStructField.c b/final/test/FrontendC/2008-01-21-PackedStructField.c
new file mode 100644
index 00000000000..9cc1731063f
--- /dev/null
+++ b/final/test/FrontendC/2008-01-21-PackedStructField.c
@@ -0,0 +1,18 @@
+// RUN: %llvmgcc %s -S -o -
+
+struct X { long double b; unsigned char c; double __attribute__((packed)) d; };
+struct X x = { 3.0L, 5, 3.0 };
+
+
+struct S2504 {
+  int e:17;
+    __attribute__((packed)) unsigned long long int f; 
+} ;
+int fails;
+ extern struct S2504 s2504; 
+void check2504va (int z) { 
+  struct S2504 arg, *p;
+  long long int i = 0; 
+  arg.f = i;
+}
+
diff --git a/final/test/FrontendC/2008-01-24-StructAlignAndBitFields.c b/final/test/FrontendC/2008-01-24-StructAlignAndBitFields.c
new file mode 100644
index 00000000000..380a7ef77ca
--- /dev/null
+++ b/final/test/FrontendC/2008-01-24-StructAlignAndBitFields.c
@@ -0,0 +1,4 @@
+// RUN: %llvmgcc %s -S -o -
+
+struct U { char a; short b; int c:25; char d; } u;
+
diff --git a/final/test/FrontendC/2008-01-25-ByValReadNone.c b/final/test/FrontendC/2008-01-25-ByValReadNone.c
new file mode 100644
index 00000000000..4cb1a6394ea
--- /dev/null
+++ b/final/test/FrontendC/2008-01-25-ByValReadNone.c
@@ -0,0 +1,15 @@
+// RUN: %llvmgcc -O3 -S -o - %s | not grep readonly
+// RUN: %llvmgcc -O3 -S -o - %s | not grep readnone
+
+
+// The struct being passed byval means that we cannot mark the
+// function readnone.  Readnone would allow stores to the arg to
+// be deleted in the caller.  We also don't allow readonly since
+// the callee might write to the byval parameter.  The inliner
+// would have to assume the worse and introduce an explicit
+// temporary when inlining such a function, which is costly for
+// the common case in which the byval argument is not written.
+struct S { int A[1000]; };
+int __attribute__ ((const)) f(struct S x) { x.A[1] = 0; return x.A[0]; }
+int g(struct S x) __attribute__ ((pure));
+int h(struct S x) { return g(x); }
diff --git a/final/test/FrontendC/2008-01-25-ZeroSizedAggregate.c b/final/test/FrontendC/2008-01-25-ZeroSizedAggregate.c
new file mode 100644
index 00000000000..643caffb6d2
--- /dev/null
+++ b/final/test/FrontendC/2008-01-25-ZeroSizedAggregate.c
@@ -0,0 +1,39 @@
+// RUN: %llvmgcc %s -S -o -
+
+// Aggregates of size zero should be dropped from argument list.
+typedef long int Tlong;
+struct S2411 {
+  __attribute__((aligned)) Tlong:0;
+};
+
+extern struct S2411 a2411[5];
+extern void checkx2411(struct S2411);
+void test2411(void) {
+  checkx2411(a2411[0]);
+}
+
+// Proper handling of zero sized fields during type conversion.
+typedef unsigned long long int Tal2ullong __attribute__((aligned(2)));
+struct S2525 {
+ Tal2ullong: 0;
+ struct {
+ } e;
+};
+struct S2525 s2525;
+
+struct {
+  signed char f;
+  char :0;
+  struct{}h;
+  char * i[5];
+} data; 
+
+// Taking address of a zero sized field.
+struct Z {};
+struct Y {
+  int i;
+  struct Z z;
+};
+void *f(struct Y *y) {
+  return &y->z;
+}
diff --git a/final/test/FrontendC/2008-01-28-PragmaMark.c b/final/test/FrontendC/2008-01-28-PragmaMark.c
new file mode 100644
index 00000000000..6a4b5b52ff2
--- /dev/null
+++ b/final/test/FrontendC/2008-01-28-PragmaMark.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -Werror -S %s -o /dev/null
+#pragma mark LLVM's world
+#ifdef DO_ERROR
+#error LLVM's world
+#endif
+int i;
diff --git a/final/test/FrontendC/2008-01-28-UnionSize.c b/final/test/FrontendC/2008-01-28-UnionSize.c
new file mode 100644
index 00000000000..ea2c863b184
--- /dev/null
+++ b/final/test/FrontendC/2008-01-28-UnionSize.c
@@ -0,0 +1,24 @@
+// RUN: %llvmgcc %s -S -o -
+// PR 1861
+
+typedef unsigned char __u8;
+typedef unsigned int __u32;
+typedef unsigned short u16;
+typedef __u32 __le32;
+struct bcm43xx_plcp_hdr6 {
+  union {
+    __le32 data;
+    __u8 raw[6];
+  }
+    __attribute__((__packed__));
+}
+  __attribute__((__packed__));
+struct bcm43xx_txhdr {
+  union {
+    struct {
+      struct bcm43xx_plcp_hdr6 plcp;
+    };
+  };
+}
+  __attribute__((__packed__));
+static void bcm43xx_generate_rts(struct bcm43xx_txhdr *txhdr ) { }
diff --git a/final/test/FrontendC/2008-02-11-AnnotateBuiltin.c b/final/test/FrontendC/2008-02-11-AnnotateBuiltin.c
new file mode 100644
index 00000000000..32bc7a82745
--- /dev/null
+++ b/final/test/FrontendC/2008-02-11-AnnotateBuiltin.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc %s -S -o - | llvm-as | llvm-dis | grep llvm.annotation
+
+int main() {
+  int x = 0;
+  return __builtin_annotation(x, "annotate");
+}
+
diff --git a/final/test/FrontendC/2008-03-03-CtorAttrType.c b/final/test/FrontendC/2008-03-03-CtorAttrType.c
new file mode 100644
index 00000000000..96648f4ec5a
--- /dev/null
+++ b/final/test/FrontendC/2008-03-03-CtorAttrType.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc %s -S -o - | grep llvm.global_ctors
+int __attribute__((constructor)) foo(void) {
+  return 0;
+}
+void __attribute__((constructor)) bar(void) {}
+
diff --git a/final/test/FrontendC/2008-03-05-syncPtr.c b/final/test/FrontendC/2008-03-05-syncPtr.c
new file mode 100644
index 00000000000..7b271f7ee74
--- /dev/null
+++ b/final/test/FrontendC/2008-03-05-syncPtr.c
@@ -0,0 +1,27 @@
+// RUN: %llvmgcc %s -S -o - | grep llvm.atomic
+// XFAIL: sparc-sun-solaris2|arm
+// Feature currently implemented only for x86, alpha, powerpc.
+
+int* foo(int** a, int* b, int* c) {
+return __sync_val_compare_and_swap (a, b, c);
+}
+
+int foo2(int** a, int* b, int* c) {
+return __sync_bool_compare_and_swap (a, b, c);
+}
+
+int* foo3(int** a, int b) {
+  return __sync_fetch_and_add (a, b);
+}
+
+int* foo4(int** a, int b) {
+  return __sync_fetch_and_sub (a, b);
+}
+
+int* foo5(int** a, int* b) {
+  return __sync_lock_test_and_set (a, b);
+}
+
+int* foo6(int** a, int*** b) {
+  return __sync_lock_test_and_set (a, b);
+}
diff --git a/final/test/FrontendC/2008-03-24-BitField-And-Alloca.c b/final/test/FrontendC/2008-03-24-BitField-And-Alloca.c
new file mode 100644
index 00000000000..641bcf1dbeb
--- /dev/null
+++ b/final/test/FrontendC/2008-03-24-BitField-And-Alloca.c
@@ -0,0 +1,89 @@
+// RUN: %llvmgcc -O2 -S %s -o - | not grep alloca
+// RUN: %llvmgcc -m32 -O2 -S %s -o - | not grep {store }
+
+enum {
+ PP_C,
+ PP_D,
+ PP_R,
+ PP_2D,
+ PP_1D,
+ PP_SR,
+ PP_S2D,
+ PP_S1D,
+ PP_SC
+};
+
+enum {
+ G_VP,
+ G_FP,
+ G_VS,
+ G_GS,
+ G_FS
+};
+
+enum {
+ G_NONE,
+ G_B,
+ G_R
+};
+
+typedef union _Key {
+ struct {
+  unsigned int count : 2;
+  unsigned int Aconst : 1;
+  unsigned int Bconst : 1;
+  unsigned int Cconst : 1;
+  unsigned int Xused : 1;
+  unsigned int Yused : 1;
+  unsigned int Zused : 1;
+  unsigned int Wused : 1;
+  unsigned int ttype : 3;
+  unsigned int scalar : 1;
+  unsigned int AType : 4;
+  unsigned int BType : 4;
+  unsigned int CType : 4;
+  unsigned int RType : 4;
+  unsigned int Size : 2;
+  unsigned int prec : 1;
+
+  unsigned int ASize : 2;
+  unsigned int BSize : 2;
+  unsigned int CSize : 2;
+  unsigned int tTex : 4;
+  unsigned int proj : 1;
+  unsigned int lod : 2;
+  unsigned int dvts : 1;
+  unsigned int uipad : 18;
+ } key_io;
+ struct {
+  unsigned int key0;
+  unsigned int key1;
+ } key;
+ unsigned long long lkey;
+} Key;
+
+static void foo(const Key iospec, int* ret)
+{
+  *ret=0;
+ if(((iospec.key_io.lod == G_B) &&
+  (iospec.key_io.ttype != G_VS) &&
+  (iospec.key_io.ttype != G_GS) &&
+  (iospec.key_io.ttype != G_FS)) ||
+
+  (((iospec.key_io.tTex == PP_C) ||
+    (iospec.key_io.tTex == PP_SC)) &&
+   ((iospec.key_io.tTex == PP_SR) ||
+    (iospec.key_io.tTex == PP_S2D) ||
+    (iospec.key_io.tTex == PP_S1D) ||
+    (iospec.key_io.tTex == PP_SC))))
+  *ret=1;
+}
+
+
+extern int bar(unsigned long long key_token2)
+{
+ int ret;
+ __attribute__ ((unused)) Key iospec = (Key) key_token2;
+ foo(iospec, &ret);
+ return ret;
+}
diff --git a/final/test/FrontendC/2008-03-26-PackedBitFields.c b/final/test/FrontendC/2008-03-26-PackedBitFields.c
new file mode 100644
index 00000000000..7214281d020
--- /dev/null
+++ b/final/test/FrontendC/2008-03-26-PackedBitFields.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc %s -S -o -
+
+
+struct S1757 { 
+  long double c;
+  long int __attribute__((packed)) e:28;
+} x;
diff --git a/final/test/FrontendC/2008-04-08-NoExceptions.c b/final/test/FrontendC/2008-04-08-NoExceptions.c
new file mode 100644
index 00000000000..257fee23b06
--- /dev/null
+++ b/final/test/FrontendC/2008-04-08-NoExceptions.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S -o - %s | grep nounwind | count 2
+// RUN: %llvmgcc -S -o - %s | not grep {declare.*nounwind}
+
+void f(void);
+void g(void) {
+  f();
+}
diff --git a/final/test/FrontendC/2008-05-06-CFECrash.c b/final/test/FrontendC/2008-05-06-CFECrash.c
new file mode 100644
index 00000000000..94d556c1ec2
--- /dev/null
+++ b/final/test/FrontendC/2008-05-06-CFECrash.c
@@ -0,0 +1,4 @@
+// RUN: %llvmgcc -S -O2 %s -o /dev/null
+// PR2292.
+__inline__ __attribute__ ((__pure__)) int g (void) {}
+void f (int k) { k = g (); }
diff --git a/final/test/FrontendC/2008-05-12-TempUsedBeforeDef.c b/final/test/FrontendC/2008-05-12-TempUsedBeforeDef.c
new file mode 100644
index 00000000000..21724c1c830
--- /dev/null
+++ b/final/test/FrontendC/2008-05-12-TempUsedBeforeDef.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -w -S -o /dev/null %s
+// PR2264.
+unsigned foo = 8L;
+unsigned bar = 0L;
+volatile unsigned char baz = 6L;
+int test() {
+  char qux = 1L;
+  for (; baz >= -29; baz--)
+    bork(bar && foo, qux);
+}
diff --git a/final/test/FrontendC/2008-05-19-AlwaysInline.c b/final/test/FrontendC/2008-05-19-AlwaysInline.c
new file mode 100644
index 00000000000..8dcb57b1862
--- /dev/null
+++ b/final/test/FrontendC/2008-05-19-AlwaysInline.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc %s -S -fno-unit-at-a-time -O0 -o - | not grep sabrina
+// RUN: %llvmgcc %s -S -funit-at-a-time -O0 -o - | not grep sabrina
+
+static inline int sabrina (void) __attribute__((always_inline));
+static inline int sabrina (void)
+{
+  return 13;
+}
+int bar (void)
+{
+  return sabrina () + 68;
+}
diff --git a/final/test/FrontendC/2008-07-08-FAbsAttributes.c b/final/test/FrontendC/2008-07-08-FAbsAttributes.c
new file mode 100644
index 00000000000..1eb01dcedbe
--- /dev/null
+++ b/final/test/FrontendC/2008-07-08-FAbsAttributes.c
@@ -0,0 +1,4 @@
+// RUN: %llvmgcc -S %s -o - | grep readnone
+// PR2520
+#include <math.h>
+double f(double *x, double *y) { return fabs(*x + *y); }
diff --git a/final/test/FrontendC/2008-07-29-EHLabel.ll b/final/test/FrontendC/2008-07-29-EHLabel.ll
new file mode 100644
index 00000000000..7577bc980ec
--- /dev/null
+++ b/final/test/FrontendC/2008-07-29-EHLabel.ll
@@ -0,0 +1,282 @@
+; RUN: llc %s -o - | %llvmgcc -xassembler -c -o /dev/null -
+; PR2609
+	%struct..0._11 = type { i32 }
+	%struct..1__pthread_mutex_s = type { i32, i32, i32, i32, i32, %struct..0._11 }
+	%struct.pthread_attr_t = type { i32, [32 x i8] }
+	%struct.pthread_mutex_t = type { %struct..1__pthread_mutex_s }
+	%"struct.std::__ctype_abstract_base<wchar_t>" = type { %"struct.std::locale::facet" }
+	%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::__ctype_abstract_base<wchar_t>"*, %"struct.std::__ctype_abstract_base<wchar_t>"* }
+	%"struct.std::basic_istream<char,std::char_traits<char> >" = type { i32 (...)**, i32, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_istream<char,std::char_traits<char> >::sentry" = type { i8 }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 }
+	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
+	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+	%"struct.std::ios_base::_Words" = type { i8*, i32 }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+
+@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
+@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
+@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %struct..0._11*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct..0._11*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%struct..0._11*)* @pthread_mutexattr_init		; <i32 (%struct..0._11*)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%struct..0._11*, i32)* @pthread_mutexattr_settype		; <i32 (%struct..0._11*, i32)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%struct..0._11*)* @pthread_mutexattr_destroy		; <i32 (%struct..0._11*)*> [#uses=0]
+
+define %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSi7getlineEPcic(%"struct.std::basic_istream<char,std::char_traits<char> >"* %this, i8* %__s, i32 %__n, i8 signext  %__delim) {
+entry:
+	%__cerb = alloca %"struct.std::basic_istream<char,std::char_traits<char> >::sentry"		; <%"struct.std::basic_istream<char,std::char_traits<char> >::sentry"*> [#uses=2]
+	getelementptr %"struct.std::basic_istream<char,std::char_traits<char> >"* %this, i32 0, i32 1		; <i32*>:0 [#uses=7]
+	store i32 0, i32* %0, align 4
+	call void @_ZNSi6sentryC1ERSib( %"struct.std::basic_istream<char,std::char_traits<char> >::sentry"* %__cerb, %"struct.std::basic_istream<char,std::char_traits<char> >"* %this, i8 zeroext  1 )
+	getelementptr %"struct.std::basic_istream<char,std::char_traits<char> >::sentry"* %__cerb, i32 0, i32 0		; <i8*>:1 [#uses=1]
+	load i8* %1, align 8		; <i8>:2 [#uses=1]
+	%toBool = icmp eq i8 %2, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb162, label %bb
+
+bb:		; preds = %entry
+	zext i8 %__delim to i32		; <i32>:3 [#uses=1]
+	getelementptr %"struct.std::basic_istream<char,std::char_traits<char> >"* %this, i32 0, i32 0		; <i32 (...)***>:4 [#uses=1]
+	load i32 (...)*** %4, align 4		; <i32 (...)**>:5 [#uses=1]
+	getelementptr i32 (...)** %5, i32 -3		; <i32 (...)**>:6 [#uses=1]
+	bitcast i32 (...)** %6 to i32*		; <i32*>:7 [#uses=1]
+	load i32* %7, align 4		; <i32>:8 [#uses=1]
+	bitcast %"struct.std::basic_istream<char,std::char_traits<char> >"* %this to i8*		; <i8*>:9 [#uses=1]
+	%ctg2186 = getelementptr i8* %9, i32 %8		; <i8*> [#uses=1]
+	bitcast i8* %ctg2186 to %"struct.std::basic_ios<char,std::char_traits<char> >"*		; <%"struct.std::basic_ios<char,std::char_traits<char> >"*>:10 [#uses=1]
+	getelementptr %"struct.std::basic_ios<char,std::char_traits<char> >"* %10, i32 0, i32 4		; <%"struct.std::basic_streambuf<char,std::char_traits<char> >"**>:11 [#uses=1]
+	load %"struct.std::basic_streambuf<char,std::char_traits<char> >"** %11, align 4		; <%"struct.std::basic_streambuf<char,std::char_traits<char> >"*>:12 [#uses=9]
+	getelementptr %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12, i32 0, i32 2		; <i8**>:13 [#uses=10]
+	load i8** %13, align 4		; <i8*>:14 [#uses=2]
+	getelementptr %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12, i32 0, i32 3		; <i8**>:15 [#uses=6]
+	load i8** %15, align 4		; <i8*>:16 [#uses=1]
+	icmp ult i8* %14, %16		; <i1>:17 [#uses=1]
+	br i1 %17, label %bb81, label %bb82
+
+bb81:		; preds = %bb
+	load i8* %14, align 1		; <i8>:18 [#uses=1]
+	zext i8 %18 to i32		; <i32>:19 [#uses=1]
+	%.pre = getelementptr %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12, i32 0, i32 0		; <i32 (...)***> [#uses=1]
+	br label %bb119.preheader
+
+bb82:		; preds = %bb
+	getelementptr %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12, i32 0, i32 0		; <i32 (...)***>:20 [#uses=2]
+	load i32 (...)*** %20, align 4		; <i32 (...)**>:21 [#uses=1]
+	getelementptr i32 (...)** %21, i32 9		; <i32 (...)**>:22 [#uses=1]
+	load i32 (...)** %22, align 4		; <i32 (...)*>:23 [#uses=1]
+	bitcast i32 (...)* %23 to i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*		; <i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*>:24 [#uses=1]
+	invoke i32 %24( %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12 )
+			to label %bb119.preheader unwind label %lpad		; <i32>:25 [#uses=1]
+
+bb119.preheader:		; preds = %bb82, %bb81
+	%.pre-phi = phi i32 (...)*** [ %.pre, %bb81 ], [ %20, %bb82 ]		; <i32 (...)***> [#uses=4]
+	%__c79.0.ph = phi i32 [ %19, %bb81 ], [ %25, %bb82 ]		; <i32> [#uses=1]
+	sext i8 %__delim to i32		; <i32>:26 [#uses=1]
+	br label %bb119
+
+bb84:		; preds = %bb119
+	sub i32 %__n, %82		; <i32>:27 [#uses=1]
+	add i32 %27, -1		; <i32>:28 [#uses=2]
+	load i8** %15, align 4		; <i8*>:29 [#uses=1]
+	ptrtoint i8* %29 to i32		; <i32>:30 [#uses=1]
+	load i8** %13, align 4		; <i8*>:31 [#uses=3]
+	ptrtoint i8* %31 to i32		; <i32>:32 [#uses=2]
+	sub i32 %30, %32		; <i32>:33 [#uses=2]
+	icmp slt i32 %28, %33		; <i1>:34 [#uses=1]
+	select i1 %34, i32 %28, i32 %33		; <i32>:35 [#uses=3]
+	icmp sgt i32 %35, 1		; <i1>:36 [#uses=1]
+	br i1 %36, label %bb90, label %bb99
+
+bb90:		; preds = %bb84
+	call i8* @memchr( i8* %31, i32 %26, i32 %35 ) nounwind readonly 		; <i8*>:37 [#uses=2]
+	icmp eq i8* %37, null		; <i1>:38 [#uses=1]
+	br i1 %38, label %bb93, label %bb92
+
+bb92:		; preds = %bb90
+	ptrtoint i8* %37 to i32		; <i32>:39 [#uses=1]
+	sub i32 %39, %32		; <i32>:40 [#uses=1]
+	br label %bb93
+
+bb93:		; preds = %bb92, %bb90
+	%__size.0 = phi i32 [ %40, %bb92 ], [ %35, %bb90 ]		; <i32> [#uses=4]
+	call void @llvm.memcpy.i32( i8* %__s_addr.0, i8* %31, i32 %__size.0, i32 1 )
+	getelementptr i8* %__s_addr.0, i32 %__size.0		; <i8*>:41 [#uses=3]
+	load i8** %13, align 4		; <i8*>:42 [#uses=1]
+	getelementptr i8* %42, i32 %__size.0		; <i8*>:43 [#uses=1]
+	store i8* %43, i8** %13, align 4
+	load i32* %0, align 4		; <i32>:44 [#uses=1]
+	add i32 %44, %__size.0		; <i32>:45 [#uses=1]
+	store i32 %45, i32* %0, align 4
+	load i8** %13, align 4		; <i8*>:46 [#uses=2]
+	load i8** %15, align 4		; <i8*>:47 [#uses=1]
+	icmp ult i8* %46, %47		; <i1>:48 [#uses=1]
+	br i1 %48, label %bb95, label %bb96
+
+bb95:		; preds = %bb93
+	load i8* %46, align 1		; <i8>:49 [#uses=1]
+	zext i8 %49 to i32		; <i32>:50 [#uses=1]
+	br label %bb119
+
+bb96:		; preds = %bb93
+	load i32 (...)*** %.pre-phi, align 4		; <i32 (...)**>:51 [#uses=1]
+	getelementptr i32 (...)** %51, i32 9		; <i32 (...)**>:52 [#uses=1]
+	load i32 (...)** %52, align 4		; <i32 (...)*>:53 [#uses=1]
+	bitcast i32 (...)* %53 to i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*		; <i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*>:54 [#uses=1]
+	invoke i32 %54( %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12 )
+			to label %bb119 unwind label %lpad		; <i32>:55 [#uses=1]
+
+bb99:		; preds = %bb84
+	trunc i32 %__c79.0 to i8		; <i8>:56 [#uses=1]
+	store i8 %56, i8* %__s_addr.0, align 1
+	getelementptr i8* %__s_addr.0, i32 1		; <i8*>:57 [#uses=5]
+	load i32* %0, align 4		; <i32>:58 [#uses=1]
+	add i32 %58, 1		; <i32>:59 [#uses=1]
+	store i32 %59, i32* %0, align 4
+	load i8** %13, align 4		; <i8*>:60 [#uses=3]
+	load i8** %15, align 4		; <i8*>:61 [#uses=1]
+	icmp ult i8* %60, %61		; <i1>:62 [#uses=1]
+	br i1 %62, label %bb101, label %bb102
+
+bb101:		; preds = %bb99
+	load i8* %60, align 1		; <i8>:63 [#uses=1]
+	zext i8 %63 to i32		; <i32>:64 [#uses=1]
+	getelementptr i8* %60, i32 1		; <i8*>:65 [#uses=1]
+	store i8* %65, i8** %13, align 4
+	br label %bb104
+
+bb102:		; preds = %bb99
+	load i32 (...)*** %.pre-phi, align 4		; <i32 (...)**>:66 [#uses=1]
+	getelementptr i32 (...)** %66, i32 10		; <i32 (...)**>:67 [#uses=1]
+	load i32 (...)** %67, align 4		; <i32 (...)*>:68 [#uses=1]
+	bitcast i32 (...)* %68 to i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*		; <i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*>:69 [#uses=1]
+	invoke i32 %69( %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12 )
+			to label %bb104 unwind label %lpad		; <i32>:70 [#uses=1]
+
+bb104:		; preds = %bb102, %bb101
+	%__ret44.0 = phi i32 [ %64, %bb101 ], [ %70, %bb102 ]		; <i32> [#uses=1]
+	icmp eq i32 %__ret44.0, -1		; <i1>:71 [#uses=1]
+	br i1 %71, label %bb119, label %bb112
+
+bb112:		; preds = %bb104
+	load i8** %13, align 4		; <i8*>:72 [#uses=2]
+	load i8** %15, align 4		; <i8*>:73 [#uses=1]
+	icmp ult i8* %72, %73		; <i1>:74 [#uses=1]
+	br i1 %74, label %bb114, label %bb115
+
+bb114:		; preds = %bb112
+	load i8* %72, align 1		; <i8>:75 [#uses=1]
+	zext i8 %75 to i32		; <i32>:76 [#uses=1]
+	br label %bb119
+
+bb115:		; preds = %bb112
+	load i32 (...)*** %.pre-phi, align 4		; <i32 (...)**>:77 [#uses=1]
+	getelementptr i32 (...)** %77, i32 9		; <i32 (...)**>:78 [#uses=1]
+	load i32 (...)** %78, align 4		; <i32 (...)*>:79 [#uses=1]
+	bitcast i32 (...)* %79 to i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*		; <i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*>:80 [#uses=1]
+	invoke i32 %80( %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12 )
+			to label %bb119 unwind label %lpad		; <i32>:81 [#uses=1]
+
+bb119:		; preds = %bb115, %bb114, %bb104, %bb96, %bb95, %bb119.preheader
+	%__c79.0 = phi i32 [ %__c79.0.ph, %bb119.preheader ], [ %50, %bb95 ], [ %76, %bb114 ], [ %55, %bb96 ], [ -1, %bb104 ], [ %81, %bb115 ]		; <i32> [#uses=3]
+	%__s_addr.0 = phi i8* [ %__s, %bb119.preheader ], [ %41, %bb95 ], [ %57, %bb114 ], [ %41, %bb96 ], [ %57, %bb104 ], [ %57, %bb115 ]		; <i8*> [#uses=5]
+	load i32* %0, align 4		; <i32>:82 [#uses=2]
+	add i32 %82, 1		; <i32>:83 [#uses=2]
+	%.not = icmp sge i32 %83, %__n		; <i1> [#uses=1]
+	icmp eq i32 %__c79.0, -1		; <i1>:84 [#uses=3]
+	icmp eq i32 %__c79.0, %3		; <i1>:85 [#uses=2]
+	%or.cond = or i1 %84, %85		; <i1> [#uses=1]
+	%or.cond188 = or i1 %or.cond, %.not		; <i1> [#uses=1]
+	br i1 %or.cond188, label %bb141, label %bb84
+
+bb141:		; preds = %bb119
+	%.not194 = xor i1 %85, true		; <i1> [#uses=1]
+	%brmerge = or i1 %84, %.not194		; <i1> [#uses=1]
+	%.mux = select i1 %84, i32 2, i32 4		; <i32> [#uses=0]
+	br i1 %brmerge, label %bb162, label %bb146
+
+bb146:		; preds = %bb141
+	store i32 %83, i32* %0, align 4
+	load i8** %13, align 4		; <i8*>:86 [#uses=2]
+	load i8** %15, align 4		; <i8*>:87 [#uses=1]
+	icmp ult i8* %86, %87		; <i1>:88 [#uses=1]
+	br i1 %88, label %bb148, label %bb149
+
+bb148:		; preds = %bb146
+	getelementptr i8* %86, i32 1		; <i8*>:89 [#uses=1]
+	store i8* %89, i8** %13, align 4
+	ret %"struct.std::basic_istream<char,std::char_traits<char> >"* %this
+
+bb149:		; preds = %bb146
+	load i32 (...)*** %.pre-phi, align 4		; <i32 (...)**>:90 [#uses=1]
+	getelementptr i32 (...)** %90, i32 10		; <i32 (...)**>:91 [#uses=1]
+	load i32 (...)** %91, align 4		; <i32 (...)*>:92 [#uses=1]
+	bitcast i32 (...)* %92 to i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*		; <i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*>:93 [#uses=1]
+	invoke i32 %93( %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12 )
+			to label %bb162 unwind label %lpad		; <i32>:94 [#uses=0]
+
+bb162:		; preds = %bb149, %bb141, %entry
+	ret %"struct.std::basic_istream<char,std::char_traits<char> >"* %this
+
+lpad:		; preds = %bb149, %bb115, %bb102, %bb96, %bb82
+	%__s_addr.1 = phi i8* [ %__s, %bb82 ], [ %__s_addr.0, %bb149 ], [ %41, %bb96 ], [ %57, %bb102 ], [ %57, %bb115 ]		; <i8*> [#uses=0]
+	call void @__cxa_rethrow( ) noreturn 
+	unreachable
+}
+
+declare i8* @__cxa_begin_catch(i8*) nounwind 
+
+declare i8* @llvm.eh.exception() nounwind 
+
+declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind 
+
+declare void @__cxa_rethrow() noreturn 
+
+declare void @__cxa_end_catch()
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZNSi6sentryC1ERSib(%"struct.std::basic_istream<char,std::char_traits<char> >::sentry"*, %"struct.std::basic_istream<char,std::char_traits<char> >"*, i8 zeroext )
+
+declare i8* @memchr(i8*, i32, i32) nounwind readonly 
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+
+declare void @_ZNSt9basic_iosIcSt11char_traitsIcEE5clearESt12_Ios_Iostate(%"struct.std::basic_ios<char,std::char_traits<char> >"*, i32)
+
+declare extern_weak i32 @pthread_once(i32*, void ()*)
+
+declare extern_weak i8* @pthread_getspecific(i32)
+
+declare extern_weak i32 @pthread_setspecific(i32, i8*)
+
+declare extern_weak i32 @pthread_create(i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
+
+declare extern_weak i32 @pthread_cancel(i32)
+
+declare extern_weak i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
+
+declare extern_weak i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
+
+declare extern_weak i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
+
+declare extern_weak i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct..0._11*)
+
+declare extern_weak i32 @pthread_key_create(i32*, void (i8*)*)
+
+declare extern_weak i32 @pthread_key_delete(i32)
+
+declare extern_weak i32 @pthread_mutexattr_init(%struct..0._11*)
+
+declare extern_weak i32 @pthread_mutexattr_settype(%struct..0._11*, i32)
+
+declare extern_weak i32 @pthread_mutexattr_destroy(%struct..0._11*)
diff --git a/final/test/FrontendC/2008-08-07-AlignPadding1.c b/final/test/FrontendC/2008-08-07-AlignPadding1.c
new file mode 100644
index 00000000000..6be9fe4ed3b
--- /dev/null
+++ b/final/test/FrontendC/2008-08-07-AlignPadding1.c
@@ -0,0 +1,29 @@
+/* RUN: %llvmgcc %s -S -o - -O0 | grep {zeroinitializer.*zeroinitializer.*zeroinitializer.*zeroinitializer.*zeroinitializer.*zeroinitializer}
+
+The FE must generate padding here both at the end of each PyG_Head and
+between array elements.  Reduced from Python. */
+
+typedef union _gc_head {
+  struct {
+    union _gc_head *gc_next;
+    union _gc_head *gc_prev;
+    long gc_refs;
+  } gc;
+  int dummy __attribute__((aligned(16)));
+} PyGC_Head;
+
+struct gc_generation {
+  PyGC_Head head;
+  int threshold;
+  int count;
+};
+
+#define GEN_HEAD(n) (&generations[n].head)
+
+/* linked lists of container objects */
+static struct gc_generation generations[3] = {
+        /* PyGC_Head,                           threshold,      count */
+        {{{GEN_HEAD(0), GEN_HEAD(0), 0}},       700,            0},
+        {{{GEN_HEAD(1), GEN_HEAD(1), 0}},       10,             0},
+        {{{GEN_HEAD(2), GEN_HEAD(2), 0}},       10,             0},
+};
diff --git a/final/test/FrontendC/2008-08-07-AlignPadding2.c b/final/test/FrontendC/2008-08-07-AlignPadding2.c
new file mode 100644
index 00000000000..51135ba633a
--- /dev/null
+++ b/final/test/FrontendC/2008-08-07-AlignPadding2.c
@@ -0,0 +1,18 @@
+/* RUN: %llvmgcc %s -S -o - -O0 | grep zeroinitializer | count 1
+
+The FE must not generate padding here between array elements.  PR 2533. */
+
+typedef struct {
+ const char *name;
+ int flags;
+ union {
+   int x;
+ } u;
+} OptionDef;
+
+const OptionDef options[] = {
+ /* main options */
+ { "a", 0, {3} },
+ { "b", 0, {4} },
+ { 0, },
+};
diff --git a/final/test/FrontendC/2008-08-07-GEPIntToPtr.c b/final/test/FrontendC/2008-08-07-GEPIntToPtr.c
new file mode 100644
index 00000000000..3ef3b66b88f
--- /dev/null
+++ b/final/test/FrontendC/2008-08-07-GEPIntToPtr.c
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc -S %s -o - | grep {i8 1}
+// PR2603
+
+struct A {
+  char num_fields;
+};
+
+struct B {
+  char a, b[1];
+};
+
+const struct A Foo = {
+  (char *)(&( (struct B *)(16) )->b[0]) - (char *)(16)
+};
diff --git a/final/test/FrontendC/2008-09-03-WeakAlias.c b/final/test/FrontendC/2008-09-03-WeakAlias.c
new file mode 100644
index 00000000000..2e5f3dae8a0
--- /dev/null
+++ b/final/test/FrontendC/2008-09-03-WeakAlias.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc -S -O1 -o - %s | grep icmp
+// PR1678
+extern void B (void);
+static __typeof(B) A __attribute__ ((__weakref__("B")));
+int active (void)
+{
+  static void *const p = __extension__ (void *) &A;
+  return p != 0;
+}
diff --git a/final/test/FrontendC/2008-10-13-FrontendCrash.c b/final/test/FrontendC/2008-10-13-FrontendCrash.c
new file mode 100644
index 00000000000..c9731e34ced
--- /dev/null
+++ b/final/test/FrontendC/2008-10-13-FrontendCrash.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc %s -S -o -
+// PR2797
+
+unsigned int
+func_48 (signed char p_49)
+{
+  signed char l_340;
+  func_44 (1&((1 ^ 1 == (lshift_u_s (1)) != (l_340 < 1)) & 1L));
+}
diff --git a/final/test/FrontendC/2008-10-30-ZeroPlacement.c b/final/test/FrontendC/2008-10-30-ZeroPlacement.c
new file mode 100644
index 00000000000..d73442dca8b
--- /dev/null
+++ b/final/test/FrontendC/2008-10-30-ZeroPlacement.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc -S %s
+// PR2987
+struct S2045
+{
+  unsigned short int a;
+  union { } b;
+  union __attribute__ ((aligned (4))) { } c[0];
+};
+struct S2045 s2045;
diff --git a/final/test/FrontendC/2008-11-02-WeakAlias.c b/final/test/FrontendC/2008-11-02-WeakAlias.c
new file mode 100644
index 00000000000..d10e57f5efe
--- /dev/null
+++ b/final/test/FrontendC/2008-11-02-WeakAlias.c
@@ -0,0 +1,5 @@
+// RUN: %llvmgcc -S -o - %s | grep weak
+// PR2691
+
+void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
+void native_init_IRQ(void) {}
diff --git a/final/test/FrontendC/2008-11-08-InstCombineSelect.c b/final/test/FrontendC/2008-11-08-InstCombineSelect.c
new file mode 100644
index 00000000000..b850d3ff6f2
--- /dev/null
+++ b/final/test/FrontendC/2008-11-08-InstCombineSelect.c
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc %s -S -O2 -o -
+// PR3028
+
+int g_187;
+int g_204;
+int g_434;
+
+int func_89 (void)
+{
+  return 1;
+}
+
+void func_20 (int p_22)
+{
+  if (1 & p_22 | g_204 & (1 < g_187) - func_89 ())
+    g_434 = 1;
+}
diff --git a/final/test/FrontendC/2008-11-11-AnnotateStructFieldAttribute.c b/final/test/FrontendC/2008-11-11-AnnotateStructFieldAttribute.c
new file mode 100644
index 00000000000..8af59d54f75
--- /dev/null
+++ b/final/test/FrontendC/2008-11-11-AnnotateStructFieldAttribute.c
@@ -0,0 +1,18 @@
+// RUN: %llvmgcc -S %s -o - | grep llvm.ptr.annotation | count 3
+
+#include <stdio.h>
+
+/* Struct with element X being annotated */
+struct foo {
+    int X  __attribute__((annotate("StructAnnotation")));
+    int Y;
+    int Z;
+};
+
+
+void test(struct foo *F) {
+    F->X = 42;
+    F->Z = 1;
+    F->Y = F->X;
+}
+
diff --git a/final/test/FrontendC/2008-12-23-AsmIntPointerTie.c b/final/test/FrontendC/2008-12-23-AsmIntPointerTie.c
new file mode 100644
index 00000000000..57061422b8f
--- /dev/null
+++ b/final/test/FrontendC/2008-12-23-AsmIntPointerTie.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc %s -S -O1 -o - 
+
+#include <stdint.h>
+
+int test(void *b) {
+ intptr_t a;
+ __asm__ __volatile__ ("%0 %1 " : "=r" (a): "0" (b));
+  return a;
+}
diff --git a/final/test/FrontendC/2009-01-05-BlockInlining.c b/final/test/FrontendC/2009-01-05-BlockInlining.c
new file mode 100644
index 00000000000..8fb6e54514a
--- /dev/null
+++ b/final/test/FrontendC/2009-01-05-BlockInlining.c
@@ -0,0 +1,28 @@
+// RUN: %llvmgcc %s -S -O2 -o %t.s
+// RUN: grep {call i32 .*printf.*argc} %t.s | count 3
+// RUN: not grep __block_holder_tmp %t.s
+// rdar://5865221
+
+// All of these should be inlined equivalently into a single printf call.
+
+static int fun(int x) {
+	return x+1;
+}
+
+static int block(int x) {
+	return (^(int x){return x+1;})(x);
+}
+
+static void print(int result) {
+    printf("%d\n", result);
+}
+
+int main (int argc, const char * argv[]) {
+    int	x = argc-1;
+    print(fun(x));
+    print(block(x));
+    int	(^block_inline)(int) = ^(int x){return x+1;};
+    print(block_inline(x));
+    return 0;
+}
+
diff --git a/final/test/FrontendC/2009-01-20-k8.c b/final/test/FrontendC/2009-01-20-k8.c
new file mode 100644
index 00000000000..2cd15387390
--- /dev/null
+++ b/final/test/FrontendC/2009-01-20-k8.c
@@ -0,0 +1,4 @@
+// RUN: %llvmgcc %s -S -march=k8 -o /dev/null
+// XFAIL: *
+// XTARGET: x86,i386,i686
+long double x;
diff --git a/final/test/FrontendC/2009-01-21-InvalidIterator.c b/final/test/FrontendC/2009-01-21-InvalidIterator.c
new file mode 100644
index 00000000000..6ac61f8a748
--- /dev/null
+++ b/final/test/FrontendC/2009-01-21-InvalidIterator.c
@@ -0,0 +1,74 @@
+// RUN: %llvmgcc %s -S -g -o /dev/null
+
+typedef long unsigned int size_t;
+typedef unsigned short int uint16_t;
+typedef unsigned int uint32_t;
+typedef unsigned long int uint64_t;
+typedef uint16_t Elf64_Half;
+typedef uint32_t Elf64_Word;
+typedef uint64_t Elf64_Xword;
+typedef uint64_t Elf64_Addr;
+typedef uint64_t Elf64_Off;
+typedef struct
+{
+  Elf64_Word p_type;
+  Elf64_Off p_offset;
+  Elf64_Addr p_vaddr;
+  Elf64_Xword p_align;
+}
+Elf64_Phdr;
+struct dl_phdr_info
+{
+  const char *dlpi_name;
+  const Elf64_Phdr *dlpi_phdr;
+  Elf64_Half dlpi_phnum;
+  unsigned long long int dlpi_adds;
+};
+typedef unsigned _Unwind_Ptr;
+struct object
+{
+  union
+  {
+    const struct dwarf_fde *single;
+    struct dwarf_fde **array;
+    struct fde_vector *sort;
+  }
+  u;
+  union
+  {
+    struct
+    {
+    }
+    b;
+  }
+  s;
+  struct object *next;
+};
+typedef int sword;
+typedef unsigned int uword;
+struct dwarf_fde
+{
+  uword length;
+  sword CIE_delta;
+  unsigned char pc_begin[];
+};
+typedef struct dwarf_fde fde;
+struct unw_eh_callback_data
+{
+  const fde *ret;
+  struct frame_hdr_cache_element *link;
+}
+frame_hdr_cache[8];
+
+_Unwind_Ptr
+base_from_cb_data (struct unw_eh_callback_data *data)
+{
+}
+
+void
+_Unwind_IteratePhdrCallback (struct dl_phdr_info *info, size_t size, void *ptr)
+{
+  const unsigned char *p;
+  const struct unw_eh_frame_hdr *hdr;
+  struct object ob;
+}
diff --git a/final/test/FrontendC/2009-02-13-zerosize-union-field-ppc.c b/final/test/FrontendC/2009-02-13-zerosize-union-field-ppc.c
new file mode 100644
index 00000000000..947166d54ac
--- /dev/null
+++ b/final/test/FrontendC/2009-02-13-zerosize-union-field-ppc.c
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc %s -m32 -S -o - | grep {i32 32} | count 3
+// XFAIL: *
+// XTARGET: powerpc
+//  Every printf has 'i32 0' for the GEP of the string; no point counting those.
+typedef unsigned int Foo __attribute__((aligned(32)));
+typedef union{Foo:0;}a;
+typedef union{int x; Foo:0;}b;
+extern int printf(const char*, ...);
+main() {
+  printf("%ld\n", sizeof(a));
+  printf("%ld\n", __alignof__(a));
+  printf("%ld\n", sizeof(b));
+  printf("%ld\n", __alignof__(b));
+}
diff --git a/final/test/FrontendC/2009-02-13-zerosize-union-field.c b/final/test/FrontendC/2009-02-13-zerosize-union-field.c
new file mode 100644
index 00000000000..ad335583d46
--- /dev/null
+++ b/final/test/FrontendC/2009-02-13-zerosize-union-field.c
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc %s -m32 -S -o - | grep {i32 1} | count 1
+// RUN: %llvmgcc %s -m32 -S -o - | grep {i32 4} | count 2
+// XFAIL: powerpc
+//  Every printf has 'i32 0' for the GEP of the string; no point counting those.
+typedef unsigned int Foo __attribute__((aligned(32)));
+typedef union{Foo:0;}a;
+typedef union{int x; Foo:0;}b;
+extern int printf(const char*, ...);
+main() {
+  printf("%ld\n", sizeof(a));
+  printf("%ld\n", __alignof__(a));
+  printf("%ld\n", sizeof(b));
+  printf("%ld\n", __alignof__(b));
+}
diff --git a/final/test/FrontendC/2009-02-17-BitField-dbg.c b/final/test/FrontendC/2009-02-17-BitField-dbg.c
new file mode 100644
index 00000000000..80ccc4a0ea3
--- /dev/null
+++ b/final/test/FrontendC/2009-02-17-BitField-dbg.c
@@ -0,0 +1,14 @@
+// Check bitfields.
+// RUN: %llvmgcc -S -O0 -g %s -o - | \
+// RUN: llc --disable-fp-elim -o 2009-02-17-BitField-dbg.s
+// RUN: %compile_c 2009-02-17-BitField-dbg.s -o 2009-02-17-BitField-dbg.o
+// RUN: echo {ptype mystruct} > %t2
+// RUN: gdb -q -batch -n -x %t2 2009-02-17-BitField-dbg.o | \
+// RUN:   tee 2009-02-17-BitField-dbg.out | grep "int a : 4"
+//
+
+struct {
+  int  a:4;
+  int  b:2;
+} mystruct;
+
diff --git a/final/test/FrontendC/2009-03-01-MallocNoAlias.c b/final/test/FrontendC/2009-03-01-MallocNoAlias.c
new file mode 100644
index 00000000000..22ff6cb5c30
--- /dev/null
+++ b/final/test/FrontendC/2009-03-01-MallocNoAlias.c
@@ -0,0 +1,3 @@
+// RUN: %llvmgcc %s -S -o - | grep noalias
+
+void * __attribute__ ((malloc)) foo (void) { return 0; }
diff --git a/final/test/FrontendC/2009-03-08-ZeroEltStructCrash.c b/final/test/FrontendC/2009-03-08-ZeroEltStructCrash.c
new file mode 100644
index 00000000000..454e0fb79c6
--- /dev/null
+++ b/final/test/FrontendC/2009-03-08-ZeroEltStructCrash.c
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc -S %s -o - 
+// PR3744
+struct Empty {};
+struct Union {
+ union {
+   int zero_arr[0];
+ } contents;
+};
+static inline void Foo(struct Union *u) {
+ int *array = u->contents.zero_arr;
+}
+static void Bar(struct Union *u) {
+ Foo(u);
+}
diff --git a/final/test/FrontendC/2009-03-09-WeakDeclarations-1.c b/final/test/FrontendC/2009-03-09-WeakDeclarations-1.c
new file mode 100644
index 00000000000..13ea84f7bae
--- /dev/null
+++ b/final/test/FrontendC/2009-03-09-WeakDeclarations-1.c
@@ -0,0 +1,22 @@
+// RUN: %llvmgcc_only %s -c -o /dev/null |& \
+// RUN: egrep {(14|15|22): warning:} |	\
+// RUN: wc -l | grep --quiet 3
+// XTARGET: darwin,linux
+// XFAIL: *
+// END.
+// Insist upon warnings for inappropriate weak attributes.
+// Note the line numbers (14|15|22) embedded in the check.
+
+// O.K.
+extern int ext_weak_import __attribute__ ((__weak_import__));
+
+// These are inappropriate, and should generate warnings:
+int decl_weak_import __attribute__ ((__weak_import__));
+int decl_initialized_weak_import __attribute__ ((__weak_import__)) = 13;
+
+// O.K.
+extern int ext_f(void) __attribute__ ((__weak_import__));
+
+// These are inappropriate, and should generate warnings:
+int def_f(void) __attribute__ ((__weak_import__));
+int __attribute__ ((__weak_import__)) decl_f(void) {return 0;};
diff --git a/final/test/FrontendC/2009-03-13-dbg.c b/final/test/FrontendC/2009-03-13-dbg.c
new file mode 100644
index 00000000000..46abd3a9638
--- /dev/null
+++ b/final/test/FrontendC/2009-03-13-dbg.c
@@ -0,0 +1,5 @@
+// RUN: %llvmgcc %s -S -g -o /dev/null
+// XTARGET: darwin,linux
+// XFAIL: *
+void foo() {}
+
diff --git a/final/test/FrontendC/2009-04-22-UnknownSize.c b/final/test/FrontendC/2009-04-22-UnknownSize.c
new file mode 100644
index 00000000000..7db9c0730c8
--- /dev/null
+++ b/final/test/FrontendC/2009-04-22-UnknownSize.c
@@ -0,0 +1,4 @@
+// RUN: not %llvmgcc -O1 %s -S -o /dev/null |& grep {error: storage size}
+// PR2958
+static struct foo s;
+struct foo *p = &s;
diff --git a/final/test/FrontendC/2009-04-28-UnionArrayCrash.c b/final/test/FrontendC/2009-04-28-UnionArrayCrash.c
new file mode 100644
index 00000000000..75851d0f5c0
--- /dev/null
+++ b/final/test/FrontendC/2009-04-28-UnionArrayCrash.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc -S %s -o - 
+// PR4082
+union U {
+  int I;
+  double F;
+};
+
+union U arr[] = { { .I = 4 }, { .F = 123.} };
+union U *P = &arr[0];
+
+
diff --git a/final/test/FrontendC/2009-05-04-EnumInreg.c b/final/test/FrontendC/2009-05-04-EnumInreg.c
new file mode 100644
index 00000000000..fb0c03e439e
--- /dev/null
+++ b/final/test/FrontendC/2009-05-04-EnumInreg.c
@@ -0,0 +1,18 @@
+// RUN: %llvmgcc -S -m32 -mregparm=3 %s -o - | grep {inreg %action}
+// XFAIL: *
+// XTARGET: x86,i386,i686
+// PR3967
+
+enum kobject_action {
+        KOBJ_ADD,
+        KOBJ_REMOVE,
+        KOBJ_CHANGE,
+        KOBJ_MOVE,
+        KOBJ_ONLINE,
+        KOBJ_OFFLINE,
+        KOBJ_MAX
+};
+
+struct kobject;
+
+int kobject_uevent(struct kobject *kobj, enum kobject_action action) {}
diff --git a/final/test/FrontendC/2009-05-17-AlwaysInline.c b/final/test/FrontendC/2009-05-17-AlwaysInline.c
new file mode 100644
index 00000000000..a93fabe0525
--- /dev/null
+++ b/final/test/FrontendC/2009-05-17-AlwaysInline.c
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc -S %s -O0 -o - -mllvm -disable-llvm-optzns | grep bar
+// Check that the gcc inliner is turned off.
+
+#include <stdio.h>
+static __inline__ __attribute__ ((always_inline))
+     int bar (int x)
+{
+  return 4;
+}
+
+void
+foo ()
+{
+  long long b = 1;
+  int Y = bar (4);
+  printf ("%d\n", Y);
+}
diff --git a/final/test/FrontendC/2009-06-14-HighlyAligned.c b/final/test/FrontendC/2009-06-14-HighlyAligned.c
new file mode 100644
index 00000000000..227db74f47a
--- /dev/null
+++ b/final/test/FrontendC/2009-06-14-HighlyAligned.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc %s -S -o /dev/null
+// PR4332
+
+static int highly_aligned __attribute__((aligned(4096)));
+
+int f() {
+	return highly_aligned;
+}
diff --git a/final/test/FrontendC/2009-06-18-StaticInitTailPadPack.c b/final/test/FrontendC/2009-06-18-StaticInitTailPadPack.c
new file mode 100644
index 00000000000..17f35c04a9e
--- /dev/null
+++ b/final/test/FrontendC/2009-06-18-StaticInitTailPadPack.c
@@ -0,0 +1,26 @@
+// RUN: %llvmgcc %s -S -o -
+// rdar://6983634
+
+  typedef struct A *Foo;
+#pragma pack(push, 2)
+  struct Bar {
+    Foo             f1;
+    unsigned short  f2;
+    float           f3;
+  };
+  struct Baz {
+    struct Bar   f1;
+    struct Bar   f2;
+  };
+  struct Qux {
+    unsigned long   f1;
+    struct Baz             f2;
+  };
+extern const struct Qux Bork;
+const struct Qux Bork = {
+  0,
+  {
+    {0},
+    {0}
+  }
+};
diff --git a/final/test/FrontendC/2009-07-14-VoidPtr.c b/final/test/FrontendC/2009-07-14-VoidPtr.c
new file mode 100644
index 00000000000..8001c56ad52
--- /dev/null
+++ b/final/test/FrontendC/2009-07-14-VoidPtr.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S %s -o -
+// PR4556
+
+extern void foo;
+void *bar = &foo;
+
diff --git a/final/test/FrontendC/2009-07-15-pad-wchar_t-array.c b/final/test/FrontendC/2009-07-15-pad-wchar_t-array.c
new file mode 100644
index 00000000000..41bdef25ecc
--- /dev/null
+++ b/final/test/FrontendC/2009-07-15-pad-wchar_t-array.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+
+#include <stddef.h>
+signed short _iodbcdm_sqlerror( )
+{
+  wchar_t _sqlState[6] = { L"\0" };
+}
diff --git a/final/test/FrontendC/2009-07-17-VoidParameter.c b/final/test/FrontendC/2009-07-17-VoidParameter.c
new file mode 100644
index 00000000000..d5769524386
--- /dev/null
+++ b/final/test/FrontendC/2009-07-17-VoidParameter.c
@@ -0,0 +1,4 @@
+// RUN: %llvmgcc -S %s -o -
+// PR4214
+typedef void vt;
+void (*func_ptr)(vt my_vt);
diff --git a/final/test/FrontendC/2009-07-22-StructLayout.c b/final/test/FrontendC/2009-07-22-StructLayout.c
new file mode 100644
index 00000000000..74904da33e0
--- /dev/null
+++ b/final/test/FrontendC/2009-07-22-StructLayout.c
@@ -0,0 +1,34 @@
+// RUN: %llvmgcc %s -S -o /dev/null
+// PR4590
+
+typedef unsigned char __u8;
+typedef unsigned int __le32;
+typedef unsigned int __u32;
+typedef unsigned short __le16;
+typedef unsigned short __u16;
+
+struct usb_cdc_ether_desc {
+ __u8 bLength;
+ __u8 bDescriptorType;
+ __u8 bDescriptorSubType;
+
+ __u8 iMACAddress;
+ __le32 bmEthernetStatistics;
+ __le16 wMaxSegmentSize;
+ __le16 wNumberMCFilters;
+ __u8 bNumberPowerFilters;
+} __attribute__ ((packed));
+
+
+static struct usb_cdc_ether_desc ecm_desc __attribute__ ((__section__(".init.data"))) = {
+ .bLength = sizeof ecm_desc,
+ .bDescriptorType = ((0x01 << 5) | 0x04),
+ .bDescriptorSubType = 0x0f,
+
+
+
+ .bmEthernetStatistics = (( __le32)(__u32)(0)),
+ .wMaxSegmentSize = (( __le16)(__u16)(1514)),
+ .wNumberMCFilters = (( __le16)(__u16)(0)),
+ .bNumberPowerFilters = 0,
+};
diff --git a/final/test/FrontendC/2009-08-11-AsmBlocksComplexJumpTarget.c b/final/test/FrontendC/2009-08-11-AsmBlocksComplexJumpTarget.c
new file mode 100644
index 00000000000..e141c9a16c8
--- /dev/null
+++ b/final/test/FrontendC/2009-08-11-AsmBlocksComplexJumpTarget.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc %s -fasm-blocks -S -o - | grep {\\\*1192}
+// Complicated expression as jump target
+// XFAIL: *
+// XTARGET: x86,i386,i686
+
+asm void Method3()
+{
+    mov   eax,[esp+4]           
+    jmp   [eax+(299-1)*4]       
+}
diff --git a/final/test/FrontendC/2009-09-24-SqrtErrno.c b/final/test/FrontendC/2009-09-24-SqrtErrno.c
new file mode 100644
index 00000000000..09fc8764ea5
--- /dev/null
+++ b/final/test/FrontendC/2009-09-24-SqrtErrno.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc %s -S -o - -fmath-errno | FileCheck %s
+// llvm.sqrt has undefined behavior on negative inputs, so it is
+// inappropriate to translate C/C++ sqrt to this.
+#include <math.h>
+
+float foo(float X) {
+// CHECK: foo
+// CHECK-NOT: readonly
+// CHECK: return
+  // Check that this is not marked readonly when errno is used.
+  return sqrtf(X);
+}
diff --git a/final/test/FrontendC/2009-12-07-BitFieldAlignment.c b/final/test/FrontendC/2009-12-07-BitFieldAlignment.c
new file mode 100644
index 00000000000..02ff8bce182
--- /dev/null
+++ b/final/test/FrontendC/2009-12-07-BitFieldAlignment.c
@@ -0,0 +1,15 @@
+// RUN: %llvmgcc -m32 %s -S -o - | FileCheck %s
+// Set alignment on bitfield accesses.
+
+struct S {
+  int a, b;
+  void *c;
+  unsigned d : 8;
+  unsigned e : 8;
+};
+
+void f0(struct S *a) {
+// CHECK: load {{.*}}, align 4
+// CHECK: store {{.*}}, align 4
+  a->e = 0;
+}
diff --git a/final/test/FrontendC/2010-01-05-LinkageName.c b/final/test/FrontendC/2010-01-05-LinkageName.c
new file mode 100644
index 00000000000..9c1a2155032
--- /dev/null
+++ b/final/test/FrontendC/2010-01-05-LinkageName.c
@@ -0,0 +1,15 @@
+// RUN: %llvmgcc -O2 -S -g %s -o - | llc -o 2010-01-05-LinkageName.s -O0 
+// RUN: %compile_c 2010-01-05-LinkageName.s -o 2010-01-05-LinkageName.s
+
+struct tm {};
+long mktime(struct tm *) __asm("_mktime$UNIX2003");
+tzload(name, sp, doextend){}
+long mktime(tmp)
+     struct tm *const tmp;
+{
+  tzset();
+}
+timelocal(tmp) {
+  return mktime(tmp);
+}
+
diff --git a/final/test/FrontendC/2010-01-13-MemBarrier.c b/final/test/FrontendC/2010-01-13-MemBarrier.c
new file mode 100644
index 00000000000..a540e59c6ca
--- /dev/null
+++ b/final/test/FrontendC/2010-01-13-MemBarrier.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc %s -S -o - | FileCheck %s
+// XFAIL: sparc
+// rdar://7536390
+
+unsigned t(unsigned *ptr, unsigned val) {
+  // CHECK:      @t
+  // CHECK:      call void @llvm.memory.barrier
+  // CHECK-NEXT: call i32 @llvm.atomic.swap.i32
+  // CHECK-NEXT: call void @llvm.memory.barrier
+  return __sync_lock_test_and_set(ptr, val);
+}
diff --git a/final/test/FrontendC/2010-01-14-FnType-DebugInfo.c b/final/test/FrontendC/2010-01-14-FnType-DebugInfo.c
new file mode 100644
index 00000000000..beaad91330b
--- /dev/null
+++ b/final/test/FrontendC/2010-01-14-FnType-DebugInfo.c
@@ -0,0 +1,4 @@
+// RUN: %llvmgcc %s -S -g -o /dev/null
+typedef void (*sigcatch_t)( struct sigcontext *);
+sigcatch_t sigcatch[50] = {(sigcatch_t) 0};
+
diff --git a/final/test/FrontendC/2010-01-14-StaticVariable.c b/final/test/FrontendC/2010-01-14-StaticVariable.c
new file mode 100644
index 00000000000..80dd4d45259
--- /dev/null
+++ b/final/test/FrontendC/2010-01-14-StaticVariable.c
@@ -0,0 +1,12 @@
+// This is a regression test on debug info to make sure that llvm emitted
+// debug info does not crash gdb.
+// RUN: %llvmgcc -S -O0 -g %s -o - | \
+// RUN:    llc --disable-fp-elim -o %t.s -O0 -relocation-model=pic
+// RUN: %compile_c %t.s -o %t.o
+// RUN: echo {quit\n} > %t.in 
+// RUN: gdb -q -batch -n -x %t.in %t.o > /dev/null
+
+int foo() {
+	static int i = 42;
+        return i;
+}
diff --git a/final/test/FrontendC/2010-01-18-Inlined-Debug.c b/final/test/FrontendC/2010-01-18-Inlined-Debug.c
new file mode 100644
index 00000000000..4aec7b26476
--- /dev/null
+++ b/final/test/FrontendC/2010-01-18-Inlined-Debug.c
@@ -0,0 +1,12 @@
+// PR: 6058
+// RUN: %llvmgcc -g -S %s -o - | llc -O0 -o /dev/null
+
+static inline int foo(double) __attribute__ ((always_inline));
+static inline int foo(double __x) { return __x; }
+
+void bar(double x) {
+  foo(x);
+}
+
+
+
diff --git a/final/test/FrontendC/2010-02-10-PointerName.c b/final/test/FrontendC/2010-02-10-PointerName.c
new file mode 100644
index 00000000000..7880fa8345e
--- /dev/null
+++ b/final/test/FrontendC/2010-02-10-PointerName.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc %s -S -g -o - | grep DW_TAG_pointer_type | grep -v char
+
+char i = 1;
+void foo() {
+  char *cp = &i;
+}
+
diff --git a/final/test/FrontendC/2010-02-15-DbgStaticVar.c b/final/test/FrontendC/2010-02-15-DbgStaticVar.c
new file mode 100644
index 00000000000..7827d96ce61
--- /dev/null
+++ b/final/test/FrontendC/2010-02-15-DbgStaticVar.c
@@ -0,0 +1,13 @@
+// RUN: %llvmgcc -g -S %s -o - | grep "metadata ..b., metadata ..b., metadata ...,"
+// Test to check intentionally empty linkage name for a static variable.
+// Radar 7651244.
+static int foo(int a)
+{
+	static int b = 1;
+	return b+a;
+}
+
+int main() {
+	int j = foo(1);
+	return 0;
+}
diff --git a/final/test/FrontendC/2010-02-16-DbgVarScope.c b/final/test/FrontendC/2010-02-16-DbgVarScope.c
new file mode 100644
index 00000000000..1d912d0f5e5
--- /dev/null
+++ b/final/test/FrontendC/2010-02-16-DbgVarScope.c
@@ -0,0 +1,30 @@
+// RUN: %llvmgcc -S -O0 -g %s -o - | \
+// RUN:    llc --disable-fp-elim -o %t.s -O0 -relocation-model=pic
+// RUN: %compile_c %t.s -o %t.o
+// RUN: %link %t.o -o %t.exe
+// RUN: echo {break 24\nrun\np loc\n} > %t.in 
+// RN: gdb -q -batch -n -x %t.in %t.exe | tee %t.out | \
+// RN:   grep {$1 = 1}
+
+int g1 = 1;
+int g2 = 2;
+
+int  __attribute__((always_inline)) bar() {
+  return g2 - g1; 
+}
+void foobar() {}
+
+void foo(int s) {
+  unsigned loc = 0;
+  if (s) {
+    loc = 1;
+    foobar();
+  } else {
+    loc = bar();
+    foobar();
+  }
+}
+
+int main() {
+	foo(0);
+}
diff --git a/final/test/FrontendC/2010-02-18-Dbg-VectorType.c b/final/test/FrontendC/2010-02-18-Dbg-VectorType.c
new file mode 100644
index 00000000000..d34031f09a1
--- /dev/null
+++ b/final/test/FrontendC/2010-02-18-Dbg-VectorType.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc -S -O0 -g %s -o - | grep DW_TAG_typedef | grep float4
+typedef float float4 __attribute__((vector_size(16)));
+
+int main(){
+  volatile float4 x = (float4) { 0.0f, 1.0f, 2.0f, 3.0f };
+  x += x;
+  return 0;
+}
+
diff --git a/final/test/FrontendC/2010-03-10-arm-asmreg.c b/final/test/FrontendC/2010-03-10-arm-asmreg.c
new file mode 100644
index 00000000000..70d3681ea40
--- /dev/null
+++ b/final/test/FrontendC/2010-03-10-arm-asmreg.c
@@ -0,0 +1,15 @@
+// RUN: %llvmgcc %s -S -O0 -o - | FileCheck %s
+// pr6552
+
+// XFAIL: *
+// XTARGET: arm
+
+extern void bar(unsigned int ip);
+
+// CHECK: mov r0, r12
+void foo(void)
+{
+  register unsigned int ip __asm ("ip");
+  bar(ip);
+}
+
diff --git a/final/test/FrontendC/2010-03-5-LexicalScope.c b/final/test/FrontendC/2010-03-5-LexicalScope.c
new file mode 100644
index 00000000000..93a841a8f29
--- /dev/null
+++ b/final/test/FrontendC/2010-03-5-LexicalScope.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -S -O0 -g %s -o - | grep DW_TAG_lexical_block | count 3
+int foo(int i) {
+	if (i) {
+		int j = 2;
+	}
+	else {
+		int j = 3;
+	}
+	return i;
+}
diff --git a/final/test/FrontendC/2010-05-14-Optimized-VarType.c b/final/test/FrontendC/2010-05-14-Optimized-VarType.c
new file mode 100644
index 00000000000..2aa85b5846e
--- /dev/null
+++ b/final/test/FrontendC/2010-05-14-Optimized-VarType.c
@@ -0,0 +1,23 @@
+// RUN: %llvmgcc %s -Os -S -g -o - | grep DW_TAG_structure_type | count 1
+// Variable 'a' is optimized but the debug info should preserve its type info.
+#include <stdlib.h>
+
+struct foo {
+	int Attribute;
+};
+
+void *getfoo(void) __attribute__((noinline));
+
+void *getfoo(void)
+{
+	int *x = malloc(sizeof(int));
+	*x = 42;
+	return (void *)x;
+}
+
+int main(int argc, char *argv[]) {
+	struct foo *a = (struct foo *)getfoo();
+
+	return a->Attribute;
+}
+
diff --git a/final/test/FrontendC/2010-05-18-asmsched.c b/final/test/FrontendC/2010-05-18-asmsched.c
new file mode 100644
index 00000000000..33b87703220
--- /dev/null
+++ b/final/test/FrontendC/2010-05-18-asmsched.c
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc %s -S -O3 -o - | llc -march=x86-64 -mtriple=x86_64-apple-darwin | FileCheck %s
+// r9 used to be clobbered before its value was moved to r10.  7993104.
+
+void foo(int x, int y) {
+// CHECK: bar
+// CHECK: movq  %r9, %r10
+// CHECK: movq  %rdi, %r9
+// CHECK: bar
+  register int lr9 asm("r9") = x;
+  register int lr10 asm("r10") = y;
+  int foo;
+  asm volatile("bar" : "=r"(lr9) : "r"(lr9), "r"(lr10));
+  foo = lr9;
+  lr9 = x;
+  lr10 = foo;
+  asm volatile("bar" : "=r"(lr9) : "r"(lr9), "r"(lr10));
+}
diff --git a/final/test/FrontendC/2010-05-18-palignr.c b/final/test/FrontendC/2010-05-18-palignr.c
new file mode 100644
index 00000000000..0b78eed0dcd
--- /dev/null
+++ b/final/test/FrontendC/2010-05-18-palignr.c
@@ -0,0 +1,24 @@
+// RUN: %llvmgcc -mssse3 -S -o - %s | llc -mtriple=x86_64-apple-darwin | FileCheck %s
+// XFAIL: *
+// XTARGET: x86,i386,i686
+
+#include <tmmintrin.h>
+
+int main ()
+{
+#if defined( __SSSE3__ )
+
+#define vec_rld_epi16( _a, _i )  ({ vSInt16 _t = _a; _t = _mm_alignr_epi8( _t, _t, _i ); /*return*/ _t; })
+  typedef int16_t     vSInt16         __attribute__ ((__vector_size__ (16)));
+
+  short   dtbl[] = {1,2,3,4,5,6,7,8};
+  vSInt16 *vdtbl = (vSInt16*) dtbl;
+
+  vSInt16 v0;
+  v0 = *vdtbl;
+  // CHECK: pshufd  $57
+  v0 = vec_rld_epi16( v0, 4 );
+
+  return 0;
+#endif
+}
diff --git a/final/test/FrontendC/2010-05-26-AsmSideEffect.c b/final/test/FrontendC/2010-05-26-AsmSideEffect.c
new file mode 100644
index 00000000000..acc38b783ba
--- /dev/null
+++ b/final/test/FrontendC/2010-05-26-AsmSideEffect.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc %s -S -o - | FileCheck %s
+// Radar 8026855
+
+int test (void *src) {
+  register int w0 asm ("0");
+  // CHECK: call i32 asm sideeffect
+  asm ("ldr %0, [%1]": "=r" (w0): "r" (src));
+  // The asm to read the value of w0 has a sideeffect for a different reason
+  // (see 2010-05-18-asmsched.c) but that's not what this is testing for.
+  // CHECK: call i32 asm
+  return w0;
+}
diff --git a/final/test/FrontendC/2010-05-31-palignr.c b/final/test/FrontendC/2010-05-31-palignr.c
new file mode 100644
index 00000000000..9da3145153f
--- /dev/null
+++ b/final/test/FrontendC/2010-05-31-palignr.c
@@ -0,0 +1,24 @@
+// RUN: not %llvmgcc -mssse3 -S -o /dev/null %s |& grep "error: mask must be an immediate"
+// XFAIL: *
+// XTARGET: x86,i386,i686
+
+#include <tmmintrin.h>
+
+extern int i;
+
+int main ()
+{
+#if defined( __SSSE3__ )
+
+  typedef int16_t     vSInt16         __attribute__ ((__vector_size__ (16)));
+
+  short   dtbl[] = {1,2,3,4,5,6,7,8};
+  vSInt16 *vdtbl = (vSInt16*) dtbl;
+
+  vSInt16 v0;
+  v0 = *vdtbl;
+  v0 = _mm_alignr_epi8(v0, v0, i);
+
+  return 0;
+#endif
+}
diff --git a/final/test/FrontendC/2010-06-11-SaveExpr.c b/final/test/FrontendC/2010-06-11-SaveExpr.c
new file mode 100644
index 00000000000..d1c122d79b4
--- /dev/null
+++ b/final/test/FrontendC/2010-06-11-SaveExpr.c
@@ -0,0 +1,8 @@
+// RUN: %llvmgcc -S %s
+// Test case by Eric Postpischil!
+void foo(void)
+{
+  char a[1];
+  int t = 1;
+  ((char (*)[t]) a)[0][0] = 0;
+}
diff --git a/final/test/FrontendC/2010-06-17-asmcrash.c b/final/test/FrontendC/2010-06-17-asmcrash.c
new file mode 100644
index 00000000000..5063054fd46
--- /dev/null
+++ b/final/test/FrontendC/2010-06-17-asmcrash.c
@@ -0,0 +1,16 @@
+// RUN: %llvmgcc -S -o - %s | llc -mtriple=x86_64-apple-darwin | FileCheck %s
+// XFAIL: *
+// XTARGET: x86,i386,i686
+
+typedef long long int64_t;
+typedef unsigned char uint8_t;
+typedef int64_t x86_reg;
+
+void avg_pixels8_mmx2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+	__asm__ volatile("# %0 %1 %2 %3"
+     :"+g"(h), "+S"(pixels), "+D"(block)
+     :"r" ((x86_reg)line_size)         
+     :"%""rax", "memory");
+// CHECK: # %ecx %rsi %rdi %rdx
+ }
diff --git a/final/test/FrontendC/2010-06-28-DbgLocalVar.c b/final/test/FrontendC/2010-06-28-DbgLocalVar.c
new file mode 100644
index 00000000000..e5df8856c0d
--- /dev/null
+++ b/final/test/FrontendC/2010-06-28-DbgLocalVar.c
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc -S -O2 -g %s -o - | llc -O2 -o %t.s 
+// RUN: grep DW_TAG_structure_type %t.s | count 2
+// Radar 8122864
+
+// Code is not generated for function foo, but preserve type information of
+// local variable xyz.
+static foo() {
+  struct X { int a; int b; } xyz;
+}
+
+int bar() {
+  foo();
+  return 1;
+}
diff --git a/final/test/FrontendC/2010-06-28-nowarn.c b/final/test/FrontendC/2010-06-28-nowarn.c
new file mode 100644
index 00000000000..3db8df10c18
--- /dev/null
+++ b/final/test/FrontendC/2010-06-28-nowarn.c
@@ -0,0 +1,21 @@
+// RUN: %llvmgcc %s -S -m32 -fasm-blocks -o /dev/null
+// This should not warn about unreferenced label. 7729514.
+// XFAIL: *
+// XTARGET: x86,i386,i686
+
+void quarterAsm(int array[], int len)
+{
+  __asm
+  {
+    mov esi, array;
+    mov ecx, len;
+    shr ecx, 2;
+loop:
+    movdqa xmm0, [esi];
+    psrad xmm0, 2;
+    movdqa [esi], xmm0;
+    add esi, 16;
+    sub ecx, 1;
+    jnz loop;
+  }
+}
diff --git a/final/test/FrontendC/2010-07-08-DeclDebugLineNo.c b/final/test/FrontendC/2010-07-08-DeclDebugLineNo.c
new file mode 100644
index 00000000000..491b7dbe749
--- /dev/null
+++ b/final/test/FrontendC/2010-07-08-DeclDebugLineNo.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -S -O0 -g %s -o - | FileCheck %s
+// Insure that dbg.declare lines for locals refer to correct line number records.
+// Radar 8152866.
+void foo() {
+  int l = 0;    // line #4: CHECK: {{call.*llvm.dbg.declare.*%l.*\!dbg }}[[variable_l:![0-9]+]]
+  int p = 0;    // line #5: CHECK: {{call.*llvm.dbg.declare.*%p.*\!dbg }}[[variable_p:![0-9]+]]
+}
+// Now match the line number records:
+// CHECK: {{^}}[[variable_l]]{{ = metadata ![{]i32 5,}}
+// CHECK: {{^}}[[variable_p]]{{ = metadata ![{]i32 6,}}
diff --git a/final/test/FrontendC/2010-07-14-overconservative-align.c b/final/test/FrontendC/2010-07-14-overconservative-align.c
new file mode 100644
index 00000000000..c4a9caac666
--- /dev/null
+++ b/final/test/FrontendC/2010-07-14-overconservative-align.c
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc %s -S -o - | FileCheck %s
+// PR 5995
+struct s {
+    int word;
+    struct {
+        int filler __attribute__ ((aligned (8)));
+    };
+};
+
+void func (struct s *s)
+{
+// CHECK: load %struct.s** %s_addr, align {{[48]}}
+    s->word = 0;
+}
diff --git a/final/test/FrontendC/2010-07-14-ref-off-end.c b/final/test/FrontendC/2010-07-14-ref-off-end.c
new file mode 100644
index 00000000000..c7fdd95a7aa
--- /dev/null
+++ b/final/test/FrontendC/2010-07-14-ref-off-end.c
@@ -0,0 +1,27 @@
+// RUN: %llvmgcc %s -S -m32 -o - | FileCheck %s
+// Formerly this generated code that did a load past the end of the structure.
+// That was fixed by 46726, but that patch had bad side effects and was
+// reverted.  This has been fixed another way in the meantime.
+extern void abort();
+extern void exit(int);
+struct T
+{
+unsigned i:8;
+unsigned c:24;
+};
+f(struct T t)
+{
+struct T s[1];
+s[0]=t;
+return(char)s->c;
+}
+main()
+{
+// CHECK:  getelementptr inbounds %struct.T* %t, i32 0, i32 0 
+// CHECK:  getelementptr inbounds %struct.T* %t, i32 0, i32 0
+struct T t;
+t.i=0xff;
+t.c=0xffff11;
+if(f(t)!=0x11)abort();
+exit(0);
+}
diff --git a/final/test/FrontendC/2010-07-27-MinNoFoldConst.c b/final/test/FrontendC/2010-07-27-MinNoFoldConst.c
new file mode 100644
index 00000000000..7cd8b4c4376
--- /dev/null
+++ b/final/test/FrontendC/2010-07-27-MinNoFoldConst.c
@@ -0,0 +1,18 @@
+// RUN: %llvmgcc -S %s -o - | FileCheck %s
+extern int printf(const char *, ...);
+static void bad(unsigned int v1, unsigned int v2) {
+  printf("%u\n", 1631381461u * (((v2 - 1273463329u <= v1 - 1273463329u) ? v2 : v1) - 1273463329u) + 121322179u);
+}
+// Radar 8198362
+// GCC FE wants to convert the above to
+//   1631381461u * MIN(v2 - 1273463329u, v1 - 1273463329u)
+// and then to
+//   MIN(1631381461u * v2 - 4047041419, 1631381461u * v1 - 4047041419)
+//
+// 1631381461u * 1273463329u = 2077504466193943669, but 32-bit overflow clips
+// this to 4047041419. This breaks the comparision implicit in the MIN().
+// Two multiply operations suggests the bad optimization is happening;
+// one multiplication, after the MIN(), is correct.
+// CHECK: mul
+// CHECK-NOT: mul
+// CHECK: ret
diff --git a/final/test/FrontendC/2010-08-12-asm-aggr-arg.c b/final/test/FrontendC/2010-08-12-asm-aggr-arg.c
new file mode 100644
index 00000000000..81ec14b2882
--- /dev/null
+++ b/final/test/FrontendC/2010-08-12-asm-aggr-arg.c
@@ -0,0 +1,16 @@
+// RUN: %llvmgcc %s -S -O0 -o - | FileCheck %s
+// Radar 8288710: A small aggregate can be passed as an integer.  Make sure
+// we don't get an error with "input constraint with a matching output
+// constraint of incompatible type!" 
+
+struct wrapper {
+  int i;
+};
+
+// CHECK: xyz
+int test(int i) {
+  struct wrapper w;
+  w.i = i;
+  __asm__("xyz" : "=r" (w) : "0" (w));
+  return w.i;
+}
diff --git a/final/test/FrontendC/2010-11-16-asmblock.c b/final/test/FrontendC/2010-11-16-asmblock.c
new file mode 100644
index 00000000000..c2642235cfc
--- /dev/null
+++ b/final/test/FrontendC/2010-11-16-asmblock.c
@@ -0,0 +1,16 @@
+// RUN: %llvmgcc -S %s -fasm-blocks -o - | FileCheck %s
+// XFAIL: *
+// XTARGET: x86,i386,i686
+// 84282548
+
+void foo()
+{
+// CHECK:  %0 = call i32 asm sideeffect "", "={ecx}"() nounwind 
+// CHECK:  %asmtmp = call i32 asm sideeffect alignstack "sall $$3, $0", "={ecx},{ecx},~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %0) nounwind 
+// CHECK:  store i32 %asmtmp, i32* %"%ecx"
+ __asm {
+   sal ecx, 3;
+   add esi, ecx;
+   add edi, ecx;
+ }
+}
diff --git a/final/test/FrontendC/2010-12-01-CommonGlobal.c b/final/test/FrontendC/2010-12-01-CommonGlobal.c
new file mode 100644
index 00000000000..3f6d7e88580
--- /dev/null
+++ b/final/test/FrontendC/2010-12-01-CommonGlobal.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -S %s -o - | llvm-as -o /dev/null
+// Don't crash on a common-linkage constant global.
+extern const int kABSourceTypeProperty;
+int foo(void) {
+  return kABSourceTypeProperty;
+}
+const int kABSourceTypeProperty;
diff --git a/final/test/FrontendC/2011-02-21-DATA-common.c b/final/test/FrontendC/2011-02-21-DATA-common.c
new file mode 100644
index 00000000000..650ae7edddb
--- /dev/null
+++ b/final/test/FrontendC/2011-02-21-DATA-common.c
@@ -0,0 +1,5 @@
+// RUN: %llvmgcc -S %s -o /dev/null
+struct rtxc_snapshot {
+  int a, b, c, d;
+};
+__attribute__ ((section("__DATA, __common"))) static struct rtxc_snapshot rtxc_log_A[4];
diff --git a/final/test/FrontendC/Atomics-no64bit.c b/final/test/FrontendC/Atomics-no64bit.c
new file mode 100644
index 00000000000..6fb61098110
--- /dev/null
+++ b/final/test/FrontendC/Atomics-no64bit.c
@@ -0,0 +1,190 @@
+// Test frontend handling of __sync builtins.
+// Modified from a gcc testcase.
+// RUN: %llvmgcc -S %s -o - | grep atomic | count 150
+// RUN: %llvmgcc -S %s -o - | grep p0i8 | count 50
+// RUN: %llvmgcc -S %s -o - | grep p0i16 | count 50
+// RUN: %llvmgcc -S %s -o - | grep p0i32 | count 50
+// RUN: %llvmgcc -S %s -o - | grep volatile | count 6
+
+// Currently this is implemented only for Alpha, X86, PowerPC.
+// Add your target here if it doesn't work.
+// This version of the test does not include long long.
+// XFAIL: sparc,arm
+
+signed char sc;
+unsigned char uc;
+signed short ss;
+unsigned short us;
+signed int si;
+unsigned int ui;
+
+void test_op_ignore (void)
+{
+  (void) __sync_fetch_and_add (&sc, 1);
+  (void) __sync_fetch_and_add (&uc, 1);
+  (void) __sync_fetch_and_add (&ss, 1);
+  (void) __sync_fetch_and_add (&us, 1);
+  (void) __sync_fetch_and_add (&si, 1);
+  (void) __sync_fetch_and_add (&ui, 1);
+
+  (void) __sync_fetch_and_sub (&sc, 1);
+  (void) __sync_fetch_and_sub (&uc, 1);
+  (void) __sync_fetch_and_sub (&ss, 1);
+  (void) __sync_fetch_and_sub (&us, 1);
+  (void) __sync_fetch_and_sub (&si, 1);
+  (void) __sync_fetch_and_sub (&ui, 1);
+
+  (void) __sync_fetch_and_or (&sc, 1);
+  (void) __sync_fetch_and_or (&uc, 1);
+  (void) __sync_fetch_and_or (&ss, 1);
+  (void) __sync_fetch_and_or (&us, 1);
+  (void) __sync_fetch_and_or (&si, 1);
+  (void) __sync_fetch_and_or (&ui, 1);
+
+  (void) __sync_fetch_and_xor (&sc, 1);
+  (void) __sync_fetch_and_xor (&uc, 1);
+  (void) __sync_fetch_and_xor (&ss, 1);
+  (void) __sync_fetch_and_xor (&us, 1);
+  (void) __sync_fetch_and_xor (&si, 1);
+  (void) __sync_fetch_and_xor (&ui, 1);
+
+  (void) __sync_fetch_and_and (&sc, 1);
+  (void) __sync_fetch_and_and (&uc, 1);
+  (void) __sync_fetch_and_and (&ss, 1);
+  (void) __sync_fetch_and_and (&us, 1);
+  (void) __sync_fetch_and_and (&si, 1);
+  (void) __sync_fetch_and_and (&ui, 1);
+
+  (void) __sync_fetch_and_nand (&sc, 1);
+  (void) __sync_fetch_and_nand (&uc, 1);
+  (void) __sync_fetch_and_nand (&ss, 1);
+  (void) __sync_fetch_and_nand (&us, 1);
+  (void) __sync_fetch_and_nand (&si, 1);
+  (void) __sync_fetch_and_nand (&ui, 1);
+}
+
+void test_fetch_and_op (void)
+{
+  sc = __sync_fetch_and_add (&sc, 11);
+  uc = __sync_fetch_and_add (&uc, 11);
+  ss = __sync_fetch_and_add (&ss, 11);
+  us = __sync_fetch_and_add (&us, 11);
+  si = __sync_fetch_and_add (&si, 11);
+  ui = __sync_fetch_and_add (&ui, 11);
+
+  sc = __sync_fetch_and_sub (&sc, 11);
+  uc = __sync_fetch_and_sub (&uc, 11);
+  ss = __sync_fetch_and_sub (&ss, 11);
+  us = __sync_fetch_and_sub (&us, 11);
+  si = __sync_fetch_and_sub (&si, 11);
+  ui = __sync_fetch_and_sub (&ui, 11);
+
+  sc = __sync_fetch_and_or (&sc, 11);
+  uc = __sync_fetch_and_or (&uc, 11);
+  ss = __sync_fetch_and_or (&ss, 11);
+  us = __sync_fetch_and_or (&us, 11);
+  si = __sync_fetch_and_or (&si, 11);
+  ui = __sync_fetch_and_or (&ui, 11);
+
+  sc = __sync_fetch_and_xor (&sc, 11);
+  uc = __sync_fetch_and_xor (&uc, 11);
+  ss = __sync_fetch_and_xor (&ss, 11);
+  us = __sync_fetch_and_xor (&us, 11);
+  si = __sync_fetch_and_xor (&si, 11);
+  ui = __sync_fetch_and_xor (&ui, 11);
+
+  sc = __sync_fetch_and_and (&sc, 11);
+  uc = __sync_fetch_and_and (&uc, 11);
+  ss = __sync_fetch_and_and (&ss, 11);
+  us = __sync_fetch_and_and (&us, 11);
+  si = __sync_fetch_and_and (&si, 11);
+  ui = __sync_fetch_and_and (&ui, 11);
+
+  sc = __sync_fetch_and_nand (&sc, 11);
+  uc = __sync_fetch_and_nand (&uc, 11);
+  ss = __sync_fetch_and_nand (&ss, 11);
+  us = __sync_fetch_and_nand (&us, 11);
+  si = __sync_fetch_and_nand (&si, 11);
+  ui = __sync_fetch_and_nand (&ui, 11);
+}
+
+void test_op_and_fetch (void)
+{
+  sc = __sync_add_and_fetch (&sc, uc);
+  uc = __sync_add_and_fetch (&uc, uc);
+  ss = __sync_add_and_fetch (&ss, uc);
+  us = __sync_add_and_fetch (&us, uc);
+  si = __sync_add_and_fetch (&si, uc);
+  ui = __sync_add_and_fetch (&ui, uc);
+
+  sc = __sync_sub_and_fetch (&sc, uc);
+  uc = __sync_sub_and_fetch (&uc, uc);
+  ss = __sync_sub_and_fetch (&ss, uc);
+  us = __sync_sub_and_fetch (&us, uc);
+  si = __sync_sub_and_fetch (&si, uc);
+  ui = __sync_sub_and_fetch (&ui, uc);
+
+  sc = __sync_or_and_fetch (&sc, uc);
+  uc = __sync_or_and_fetch (&uc, uc);
+  ss = __sync_or_and_fetch (&ss, uc);
+  us = __sync_or_and_fetch (&us, uc);
+  si = __sync_or_and_fetch (&si, uc);
+  ui = __sync_or_and_fetch (&ui, uc);
+
+  sc = __sync_xor_and_fetch (&sc, uc);
+  uc = __sync_xor_and_fetch (&uc, uc);
+  ss = __sync_xor_and_fetch (&ss, uc);
+  us = __sync_xor_and_fetch (&us, uc);
+  si = __sync_xor_and_fetch (&si, uc);
+  ui = __sync_xor_and_fetch (&ui, uc);
+
+  sc = __sync_and_and_fetch (&sc, uc);
+  uc = __sync_and_and_fetch (&uc, uc);
+  ss = __sync_and_and_fetch (&ss, uc);
+  us = __sync_and_and_fetch (&us, uc);
+  si = __sync_and_and_fetch (&si, uc);
+  ui = __sync_and_and_fetch (&ui, uc);
+
+  sc = __sync_nand_and_fetch (&sc, uc);
+  uc = __sync_nand_and_fetch (&uc, uc);
+  ss = __sync_nand_and_fetch (&ss, uc);
+  us = __sync_nand_and_fetch (&us, uc);
+  si = __sync_nand_and_fetch (&si, uc);
+  ui = __sync_nand_and_fetch (&ui, uc);
+}
+
+void test_compare_and_swap (void)
+{
+  sc = __sync_val_compare_and_swap (&sc, uc, sc);
+  uc = __sync_val_compare_and_swap (&uc, uc, sc);
+  ss = __sync_val_compare_and_swap (&ss, uc, sc);
+  us = __sync_val_compare_and_swap (&us, uc, sc);
+  si = __sync_val_compare_and_swap (&si, uc, sc);
+  ui = __sync_val_compare_and_swap (&ui, uc, sc);
+
+  ui = __sync_bool_compare_and_swap (&sc, uc, sc);
+  ui = __sync_bool_compare_and_swap (&uc, uc, sc);
+  ui = __sync_bool_compare_and_swap (&ss, uc, sc);
+  ui = __sync_bool_compare_and_swap (&us, uc, sc);
+  ui = __sync_bool_compare_and_swap (&si, uc, sc);
+  ui = __sync_bool_compare_and_swap (&ui, uc, sc);
+}
+
+void test_lock (void)
+{
+  sc = __sync_lock_test_and_set (&sc, 1);
+  uc = __sync_lock_test_and_set (&uc, 1);
+  ss = __sync_lock_test_and_set (&ss, 1);
+  us = __sync_lock_test_and_set (&us, 1);
+  si = __sync_lock_test_and_set (&si, 1);
+  ui = __sync_lock_test_and_set (&ui, 1);
+
+  __sync_synchronize ();
+
+  __sync_lock_release (&sc);
+  __sync_lock_release (&uc);
+  __sync_lock_release (&ss);
+  __sync_lock_release (&us);
+  __sync_lock_release (&si);
+  __sync_lock_release (&ui);
+}
diff --git a/final/test/FrontendC/Atomics.c b/final/test/FrontendC/Atomics.c
new file mode 100644
index 00000000000..2b96ae0f629
--- /dev/null
+++ b/final/test/FrontendC/Atomics.c
@@ -0,0 +1,236 @@
+// Test frontend handling of __sync builtins.
+// Modified from a gcc testcase.
+// RUN: %llvmgcc -S %s -o - | grep atomic | count 200
+// RUN: %llvmgcc -S %s -o - | grep p0i8 | count 50
+// RUN: %llvmgcc -S %s -o - | grep p0i16 | count 50
+// RUN: %llvmgcc -S %s -o - | grep p0i32 | count 50
+// RUN: %llvmgcc -S %s -o - | grep volatile | count 8
+
+// Currently this is implemented only for Alpha, X86, PowerPC.
+// Add your target here if it doesn't work.
+// PPC32 does not translate the long long variants, so fails this test.
+// XFAIL: sparc,arm,powerpc
+
+signed char sc;
+unsigned char uc;
+signed short ss;
+unsigned short us;
+signed int si;
+unsigned int ui;
+signed long long sll;
+unsigned long long ull;
+
+void test_op_ignore (void)
+{
+  (void) __sync_fetch_and_add (&sc, 1);
+  (void) __sync_fetch_and_add (&uc, 1);
+  (void) __sync_fetch_and_add (&ss, 1);
+  (void) __sync_fetch_and_add (&us, 1);
+  (void) __sync_fetch_and_add (&si, 1);
+  (void) __sync_fetch_and_add (&ui, 1);
+  (void) __sync_fetch_and_add (&sll, 1);
+  (void) __sync_fetch_and_add (&ull, 1);
+
+  (void) __sync_fetch_and_sub (&sc, 1);
+  (void) __sync_fetch_and_sub (&uc, 1);
+  (void) __sync_fetch_and_sub (&ss, 1);
+  (void) __sync_fetch_and_sub (&us, 1);
+  (void) __sync_fetch_and_sub (&si, 1);
+  (void) __sync_fetch_and_sub (&ui, 1);
+  (void) __sync_fetch_and_sub (&sll, 1);
+  (void) __sync_fetch_and_sub (&ull, 1);
+
+  (void) __sync_fetch_and_or (&sc, 1);
+  (void) __sync_fetch_and_or (&uc, 1);
+  (void) __sync_fetch_and_or (&ss, 1);
+  (void) __sync_fetch_and_or (&us, 1);
+  (void) __sync_fetch_and_or (&si, 1);
+  (void) __sync_fetch_and_or (&ui, 1);
+  (void) __sync_fetch_and_or (&sll, 1);
+  (void) __sync_fetch_and_or (&ull, 1);
+
+  (void) __sync_fetch_and_xor (&sc, 1);
+  (void) __sync_fetch_and_xor (&uc, 1);
+  (void) __sync_fetch_and_xor (&ss, 1);
+  (void) __sync_fetch_and_xor (&us, 1);
+  (void) __sync_fetch_and_xor (&si, 1);
+  (void) __sync_fetch_and_xor (&ui, 1);
+  (void) __sync_fetch_and_xor (&sll, 1);
+  (void) __sync_fetch_and_xor (&ull, 1);
+
+  (void) __sync_fetch_and_and (&sc, 1);
+  (void) __sync_fetch_and_and (&uc, 1);
+  (void) __sync_fetch_and_and (&ss, 1);
+  (void) __sync_fetch_and_and (&us, 1);
+  (void) __sync_fetch_and_and (&si, 1);
+  (void) __sync_fetch_and_and (&ui, 1);
+  (void) __sync_fetch_and_and (&sll, 1);
+  (void) __sync_fetch_and_and (&ull, 1);
+
+  (void) __sync_fetch_and_nand (&sc, 1);
+  (void) __sync_fetch_and_nand (&uc, 1);
+  (void) __sync_fetch_and_nand (&ss, 1);
+  (void) __sync_fetch_and_nand (&us, 1);
+  (void) __sync_fetch_and_nand (&si, 1);
+  (void) __sync_fetch_and_nand (&ui, 1);
+  (void) __sync_fetch_and_nand (&sll, 1);
+  (void) __sync_fetch_and_nand (&ull, 1);
+}
+
+void test_fetch_and_op (void)
+{
+  sc = __sync_fetch_and_add (&sc, 11);
+  uc = __sync_fetch_and_add (&uc, 11);
+  ss = __sync_fetch_and_add (&ss, 11);
+  us = __sync_fetch_and_add (&us, 11);
+  si = __sync_fetch_and_add (&si, 11);
+  ui = __sync_fetch_and_add (&ui, 11);
+  sll = __sync_fetch_and_add (&sll, 11);
+  ull = __sync_fetch_and_add (&ull, 11);
+
+  sc = __sync_fetch_and_sub (&sc, 11);
+  uc = __sync_fetch_and_sub (&uc, 11);
+  ss = __sync_fetch_and_sub (&ss, 11);
+  us = __sync_fetch_and_sub (&us, 11);
+  si = __sync_fetch_and_sub (&si, 11);
+  ui = __sync_fetch_and_sub (&ui, 11);
+  sll = __sync_fetch_and_sub (&sll, 11);
+  ull = __sync_fetch_and_sub (&ull, 11);
+
+  sc = __sync_fetch_and_or (&sc, 11);
+  uc = __sync_fetch_and_or (&uc, 11);
+  ss = __sync_fetch_and_or (&ss, 11);
+  us = __sync_fetch_and_or (&us, 11);
+  si = __sync_fetch_and_or (&si, 11);
+  ui = __sync_fetch_and_or (&ui, 11);
+  sll = __sync_fetch_and_or (&sll, 11);
+  ull = __sync_fetch_and_or (&ull, 11);
+
+  sc = __sync_fetch_and_xor (&sc, 11);
+  uc = __sync_fetch_and_xor (&uc, 11);
+  ss = __sync_fetch_and_xor (&ss, 11);
+  us = __sync_fetch_and_xor (&us, 11);
+  si = __sync_fetch_and_xor (&si, 11);
+  ui = __sync_fetch_and_xor (&ui, 11);
+  sll = __sync_fetch_and_xor (&sll, 11);
+  ull = __sync_fetch_and_xor (&ull, 11);
+
+  sc = __sync_fetch_and_and (&sc, 11);
+  uc = __sync_fetch_and_and (&uc, 11);
+  ss = __sync_fetch_and_and (&ss, 11);
+  us = __sync_fetch_and_and (&us, 11);
+  si = __sync_fetch_and_and (&si, 11);
+  ui = __sync_fetch_and_and (&ui, 11);
+  sll = __sync_fetch_and_and (&sll, 11);
+  ull = __sync_fetch_and_and (&ull, 11);
+
+  sc = __sync_fetch_and_nand (&sc, 11);
+  uc = __sync_fetch_and_nand (&uc, 11);
+  ss = __sync_fetch_and_nand (&ss, 11);
+  us = __sync_fetch_and_nand (&us, 11);
+  si = __sync_fetch_and_nand (&si, 11);
+  ui = __sync_fetch_and_nand (&ui, 11);
+  sll = __sync_fetch_and_nand (&sll, 11);
+  ull = __sync_fetch_and_nand (&ull, 11);
+}
+
+void test_op_and_fetch (void)
+{
+  sc = __sync_add_and_fetch (&sc, uc);
+  uc = __sync_add_and_fetch (&uc, uc);
+  ss = __sync_add_and_fetch (&ss, uc);
+  us = __sync_add_and_fetch (&us, uc);
+  si = __sync_add_and_fetch (&si, uc);
+  ui = __sync_add_and_fetch (&ui, uc);
+  sll = __sync_add_and_fetch (&sll, uc);
+  ull = __sync_add_and_fetch (&ull, uc);
+
+  sc = __sync_sub_and_fetch (&sc, uc);
+  uc = __sync_sub_and_fetch (&uc, uc);
+  ss = __sync_sub_and_fetch (&ss, uc);
+  us = __sync_sub_and_fetch (&us, uc);
+  si = __sync_sub_and_fetch (&si, uc);
+  ui = __sync_sub_and_fetch (&ui, uc);
+  sll = __sync_sub_and_fetch (&sll, uc);
+  ull = __sync_sub_and_fetch (&ull, uc);
+
+  sc = __sync_or_and_fetch (&sc, uc);
+  uc = __sync_or_and_fetch (&uc, uc);
+  ss = __sync_or_and_fetch (&ss, uc);
+  us = __sync_or_and_fetch (&us, uc);
+  si = __sync_or_and_fetch (&si, uc);
+  ui = __sync_or_and_fetch (&ui, uc);
+  sll = __sync_or_and_fetch (&sll, uc);
+  ull = __sync_or_and_fetch (&ull, uc);
+
+  sc = __sync_xor_and_fetch (&sc, uc);
+  uc = __sync_xor_and_fetch (&uc, uc);
+  ss = __sync_xor_and_fetch (&ss, uc);
+  us = __sync_xor_and_fetch (&us, uc);
+  si = __sync_xor_and_fetch (&si, uc);
+  ui = __sync_xor_and_fetch (&ui, uc);
+  sll = __sync_xor_and_fetch (&sll, uc);
+  ull = __sync_xor_and_fetch (&ull, uc);
+
+  sc = __sync_and_and_fetch (&sc, uc);
+  uc = __sync_and_and_fetch (&uc, uc);
+  ss = __sync_and_and_fetch (&ss, uc);
+  us = __sync_and_and_fetch (&us, uc);
+  si = __sync_and_and_fetch (&si, uc);
+  ui = __sync_and_and_fetch (&ui, uc);
+  sll = __sync_and_and_fetch (&sll, uc);
+  ull = __sync_and_and_fetch (&ull, uc);
+
+  sc = __sync_nand_and_fetch (&sc, uc);
+  uc = __sync_nand_and_fetch (&uc, uc);
+  ss = __sync_nand_and_fetch (&ss, uc);
+  us = __sync_nand_and_fetch (&us, uc);
+  si = __sync_nand_and_fetch (&si, uc);
+  ui = __sync_nand_and_fetch (&ui, uc);
+  sll = __sync_nand_and_fetch (&sll, uc);
+  ull = __sync_nand_and_fetch (&ull, uc);
+}
+
+void test_compare_and_swap (void)
+{
+  sc = __sync_val_compare_and_swap (&sc, uc, sc);
+  uc = __sync_val_compare_and_swap (&uc, uc, sc);
+  ss = __sync_val_compare_and_swap (&ss, uc, sc);
+  us = __sync_val_compare_and_swap (&us, uc, sc);
+  si = __sync_val_compare_and_swap (&si, uc, sc);
+  ui = __sync_val_compare_and_swap (&ui, uc, sc);
+  sll = __sync_val_compare_and_swap (&sll, uc, sc);
+  ull = __sync_val_compare_and_swap (&ull, uc, sc);
+
+  ui = __sync_bool_compare_and_swap (&sc, uc, sc);
+  ui = __sync_bool_compare_and_swap (&uc, uc, sc);
+  ui = __sync_bool_compare_and_swap (&ss, uc, sc);
+  ui = __sync_bool_compare_and_swap (&us, uc, sc);
+  ui = __sync_bool_compare_and_swap (&si, uc, sc);
+  ui = __sync_bool_compare_and_swap (&ui, uc, sc);
+  ui = __sync_bool_compare_and_swap (&sll, uc, sc);
+  ui = __sync_bool_compare_and_swap (&ull, uc, sc);
+}
+
+void test_lock (void)
+{
+  sc = __sync_lock_test_and_set (&sc, 1);
+  uc = __sync_lock_test_and_set (&uc, 1);
+  ss = __sync_lock_test_and_set (&ss, 1);
+  us = __sync_lock_test_and_set (&us, 1);
+  si = __sync_lock_test_and_set (&si, 1);
+  ui = __sync_lock_test_and_set (&ui, 1);
+  sll = __sync_lock_test_and_set (&sll, 1);
+  ull = __sync_lock_test_and_set (&ull, 1);
+
+  __sync_synchronize ();
+
+  __sync_lock_release (&sc);
+  __sync_lock_release (&uc);
+  __sync_lock_release (&ss);
+  __sync_lock_release (&us);
+  __sync_lock_release (&si);
+  __sync_lock_release (&ui);
+  __sync_lock_release (&sll);
+  __sync_lock_release (&ull);
+}
diff --git a/final/test/FrontendC/BasicInstrs.c b/final/test/FrontendC/BasicInstrs.c
new file mode 100644
index 00000000000..ceed17c2ba9
--- /dev/null
+++ b/final/test/FrontendC/BasicInstrs.c
@@ -0,0 +1,26 @@
+// This file can be used to see what a native C compiler is generating for a
+// variety of interesting operations.
+//
+// RUN: %llvmgcc -S %s -o - | llc
+
+unsigned int udiv(unsigned int X, unsigned int Y) {
+  return X/Y;
+}
+int sdiv(int X, int Y) {
+  return X/Y;
+}
+unsigned int urem(unsigned int X, unsigned int Y) {
+  return X%Y;
+}
+int srem(int X, int Y) {
+  return X%Y;
+}
+
+_Bool setlt(int X, int Y) {
+  return X < Y;
+}
+
+_Bool setgt(int X, int Y) {
+  return X > Y;
+}
+
diff --git a/final/test/FrontendC/alignstack.c b/final/test/FrontendC/alignstack.c
new file mode 100644
index 00000000000..30c00ff88e4
--- /dev/null
+++ b/final/test/FrontendC/alignstack.c
@@ -0,0 +1,23 @@
+// RUN: %llvmgcc %s -fasm-blocks -S -o - | FileCheck %s
+// Complicated expression as jump target
+// XFAIL: *
+// XTARGET: x86,i386,i686,darwin
+
+void Method3()
+{
+// CHECK: Method3
+// CHECK-NOT: alignstack
+    asm("foo:");
+// CHECK: return
+}
+
+void Method4()
+{
+// CHECK: Method4
+// CHECK: alignstack
+  asm {
+    bar:
+  }
+// CHECK: return
+}
+
diff --git a/final/test/FrontendC/always-inline.c b/final/test/FrontendC/always-inline.c
new file mode 100644
index 00000000000..22f6c7a20ef
--- /dev/null
+++ b/final/test/FrontendC/always-inline.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc -S %s -o - | grep call | not grep foo
+
+void bar() {
+}
+
+inline void __attribute__((__always_inline__)) foo() {
+  bar();
+}
+
+void i_want_bar() {
+  foo();
+}
diff --git a/final/test/FrontendC/arrayderef.c b/final/test/FrontendC/arrayderef.c
new file mode 100644
index 00000000000..66c2e0ba416
--- /dev/null
+++ b/final/test/FrontendC/arrayderef.c
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc %s -S -O -o - | FileCheck %s
+// The load here was getting lost because this code was close
+// enough to the traditional (wrong) implementation of offsetof
+// to confuse the gcc FE.  8629268.
+
+struct foo {
+  int x;
+  int *y;
+};
+
+struct foo Foo[1];
+
+int * bar(unsigned int ix) {
+// CHECK: load
+  return &Foo->y[ix];
+}
+
diff --git a/final/test/FrontendC/asm-reg-var-local.c b/final/test/FrontendC/asm-reg-var-local.c
new file mode 100644
index 00000000000..22bd43c076d
--- /dev/null
+++ b/final/test/FrontendC/asm-reg-var-local.c
@@ -0,0 +1,32 @@
+// RUN: %llvmgcc %s -S -o - | FileCheck %s
+// Exercise various use cases for local asm "register variables".
+// XFAIL: *
+// XTARGET: x86_64,i686,i386
+
+int foo() {
+// CHECK: %a = alloca i32
+
+  register int a asm("rsi")=5;
+// CHECK: store i32 5, i32* %a, align 4
+
+  asm volatile("; %0 This asm defines rsi" : "=r"(a));
+// CHECK: %asmtmp = call i32 asm sideeffect "; $0 This asm defines rsi", "={rsi}
+// CHECK: store i32 %asmtmp, i32* %a
+
+  a = 42;
+// CHECK:  store i32 42, i32* %a, align 4
+
+  asm volatile("; %0 This asm uses rsi" : : "r"(a));
+// CHECK:  %1 = load i32* %a, align 4                    
+// CHECK:  call void asm sideeffect "", "{rsi}"(i32 %1) nounwind
+// CHECK:  %2 = call i32 asm sideeffect "", "={rsi}"() nounwind
+// CHECK:  call void asm sideeffect "; $0 This asm uses rsi", "{rsi},~{dirflag},~{fpsr},~{flags}"(i32 %2)
+
+  return a;
+// CHECK:  %3 = load i32* %a, align 4
+// CHECK:  call void asm sideeffect "", "{rsi}"(i32 %3) nounwind
+// CHECK:  %4 = call i32 asm sideeffect "", "={rsi}"() nounwind 
+// CHECK:  store i32 %4, i32* %0, align 4
+// CHECK:  %5 = load i32* %0, align 4                     
+// CHECK:  store i32 %5, i32* %retval, align 4
+}
diff --git a/final/test/FrontendC/attribute_constructor.c b/final/test/FrontendC/attribute_constructor.c
new file mode 100644
index 00000000000..da17a37e260
--- /dev/null
+++ b/final/test/FrontendC/attribute_constructor.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc %s -S -o - | grep llvm.global_ctors
+
+void foo() __attribute__((constructor));
+void foo() {
+  bar();
+}
diff --git a/final/test/FrontendC/block-copy.c b/final/test/FrontendC/block-copy.c
new file mode 100644
index 00000000000..c088f2dc195
--- /dev/null
+++ b/final/test/FrontendC/block-copy.c
@@ -0,0 +1,20 @@
+/* RUN: %llvmgcc %s -S -o - -O3 | grep {call.*memcpy}
+
+ This should compile into a memcpy from a global, not 128 stores. */
+
+
+
+void foo();
+
+float bar() {
+	float lookupTable[] = {-1,-1,-1,0, -1,-1,0,-1, -1,-1,0,1, -1,-1,1,0,
+						   -1,0,-1,-1, -1,0,-1,1, -1,0,1,-1, -1,0,1,1,
+						   -1,1,-1,0, -1,1,0,-1, -1,1,0,1, -1,1,1,0,
+						   0,-1,-1,-1, 0,-1,-1,1, 0,-1,1,-1, 0,-1,1,1,
+						   1,-1,-1,0, 1,-1,0,-1, 1,-1,0,1, 1,-1,1,0,
+						   1,0,-1,-1, 1,0,-1,1, 1,0,1,-1, 1,0,1,1,
+						   1,1,-1,0, 1,1,0,-1, 1,1,0,1, 1,1,1,0,
+						   0,1,-1,-1, 0,1,-1,1, 0,1,1,-1, 0,1,1,1};
+   foo(lookupTable);
+}
+
diff --git a/final/test/FrontendC/crash-invalid-array.c b/final/test/FrontendC/crash-invalid-array.c
new file mode 100644
index 00000000000..d602f785458
--- /dev/null
+++ b/final/test/FrontendC/crash-invalid-array.c
@@ -0,0 +1,17 @@
+// RUN: not %llvmgcc -O1 %s -S |& grep {error: invalid use of array with unspecified bounds}
+// PR6913
+
+#include <stdio.h>
+
+int main()
+{
+   int x[10][10];
+   int (*p)[] = x;   // <-- this line is what triggered it
+
+   int i;
+
+   for(i = 0; i < 10; ++i)
+   {
+       p[i][i] = i;
+   }
+}
diff --git a/final/test/FrontendC/cstring-align.c b/final/test/FrontendC/cstring-align.c
new file mode 100644
index 00000000000..544c9f3d3fb
--- /dev/null
+++ b/final/test/FrontendC/cstring-align.c
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc %s -S -Os -o - | llc -march=x86 -mtriple=i386-apple-darwin10 | FileCheck %s
+
+extern void func(const char *, const char *);
+
+void long_function_name() {
+  func("%s: the function name", __func__);
+}
+
+// CHECK: .align 4
+// CHECK: ___func__.
+// CHECK: .asciz "long_function_name"
diff --git a/final/test/FrontendC/dg.exp b/final/test/FrontendC/dg.exp
new file mode 100644
index 00000000000..a9be28a63cf
--- /dev/null
+++ b/final/test/FrontendC/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if [ llvm_gcc_supports c ] then {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/FrontendC/exact-div-expr.c b/final/test/FrontendC/exact-div-expr.c
new file mode 100644
index 00000000000..9dce922f953
--- /dev/null
+++ b/final/test/FrontendC/exact-div-expr.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S %s -o - -O1 | grep ashr
+// RUN: %llvmgcc -S %s -o - -O1 | not grep sdiv
+
+long long test(int *A, int *B) {
+  return A-B;
+}
diff --git a/final/test/FrontendC/extern-weak.c b/final/test/FrontendC/extern-weak.c
new file mode 100644
index 00000000000..73b59cc48c4
--- /dev/null
+++ b/final/test/FrontendC/extern-weak.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc -O3 -S -o - %s | grep extern_weak
+// RUN: %llvmgcc -O3 -S -o - %s | llc
+
+#if !defined(__linux__) && !defined(__FreeBSD__) && \
+    !defined(__OpenBSD__) && !defined(__CYGWIN__) && !defined(__DragonFly__)
+void foo() __attribute__((weak_import));
+#else
+void foo() __attribute__((weak));
+#endif
+
+void bar() { foo(); }
+
diff --git a/final/test/FrontendC/fp-logical.c b/final/test/FrontendC/fp-logical.c
new file mode 100644
index 00000000000..60404f67022
--- /dev/null
+++ b/final/test/FrontendC/fp-logical.c
@@ -0,0 +1,15 @@
+// RUN: %llvmgcc %s -S -o - | grep bitcast | count 14
+
+typedef float vFloat __attribute__ ((__vector_size__ (16)));
+typedef unsigned int vUInt32 __attribute__ ((__vector_size__ (16)));
+void foo(vFloat *X) {
+  vFloat NoSignBit = (vFloat) ~ (vUInt32) (vFloat) { -0.f, -0.f, -0.f, -0.f };
+  vFloat ExtremeValue = *X & NoSignBit;
+  *X = ExtremeValue;
+}
+
+void bar(vFloat *X) {
+  vFloat NoSignBit = (vFloat) ~ (vUInt32) (vFloat) { -0.f, -0.f, -0.f, -0.f };
+  vFloat ExtremeValue = *X & ~NoSignBit;
+  *X = ExtremeValue;
+}
diff --git a/final/test/FrontendC/func-aligned.c b/final/test/FrontendC/func-aligned.c
new file mode 100644
index 00000000000..477e82418ae
--- /dev/null
+++ b/final/test/FrontendC/func-aligned.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc %s -S -o - | FileCheck %s
+
+// rdar://7270273
+void foo() __attribute__((aligned (64)));
+void foo() {
+// CHECK: define void @foo() {{.*}} align 64
+}
diff --git a/final/test/FrontendC/funccall.c b/final/test/FrontendC/funccall.c
new file mode 100644
index 00000000000..9735e347057
--- /dev/null
+++ b/final/test/FrontendC/funccall.c
@@ -0,0 +1,17 @@
+
+static int q;
+
+void foo() {
+  int t = q;
+  q = t + 1;
+}
+int main() {
+  q = 0;
+  foo();
+  q = q - 1;
+
+  return q;
+}
+
+// This is the source that corresponds to funccall.ll
+// RUN: echo foo
diff --git a/final/test/FrontendC/hidden-visibility.c b/final/test/FrontendC/hidden-visibility.c
new file mode 100644
index 00000000000..589bb53453f
--- /dev/null
+++ b/final/test/FrontendC/hidden-visibility.c
@@ -0,0 +1,3 @@
+// RUN: %llvmgcc %s -S -o - | grep {hidden unnamed_addr global}
+
+int X __attribute__ ((__visibility__ ("hidden"))) = 123;
diff --git a/final/test/FrontendC/implicit-arg.c b/final/test/FrontendC/implicit-arg.c
new file mode 100644
index 00000000000..a6cb8bce7ed
--- /dev/null
+++ b/final/test/FrontendC/implicit-arg.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc %s -S -O0 -o -
+// RUN: %llvmgcc %s -S -O1 -o -
+// rdar://6518089
+
+static int bar();
+void foo() {
+  int a = bar();
+}
+int bar(unsigned a) {
+}
diff --git a/final/test/FrontendC/inline-asm-function.c b/final/test/FrontendC/inline-asm-function.c
new file mode 100644
index 00000000000..e5848409865
--- /dev/null
+++ b/final/test/FrontendC/inline-asm-function.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S %s -fasm-blocks -o - -O | grep naked
+// 7533078 (partial).
+
+asm int f() {
+  xyz
+}
diff --git a/final/test/FrontendC/inline-asm-mrv.c b/final/test/FrontendC/inline-asm-mrv.c
new file mode 100644
index 00000000000..6d1df67af1b
--- /dev/null
+++ b/final/test/FrontendC/inline-asm-mrv.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc -S %s -o - -O | not grep alloca
+// PR2094
+
+int sad16_sse2(void *v, unsigned char *blk2, unsigned char *blk1,
+               int stride, int h) {
+    int ret;
+    asm volatile( "%0 %1 %2 %3"
+        : "+r" (h), "+r" (blk1), "+r" (blk2)
+        : "r" ((long)stride));
+    asm volatile("set %0 %1" : "=r"(ret) : "r"(blk1));
+    return ret;
+}
diff --git a/final/test/FrontendC/libcalls-d.c b/final/test/FrontendC/libcalls-d.c
new file mode 100644
index 00000000000..d92208d89ed
--- /dev/null
+++ b/final/test/FrontendC/libcalls-d.c
@@ -0,0 +1,14 @@
+// llvm-gcc -O1+ should run simplify libcalls, O0 shouldn't
+// and -fno-builtins shouldn't.
+// -fno-math-errno should emit an llvm intrinsic, -fmath-errno should not.
+// RUN: %llvmgcc %s -S -fno-math-errno -O0 -o - | grep {call.*exp2\\.f64}
+// RUN: %llvmgcc %s -S -fmath-errno -O0 -o - | grep {call.*exp2}
+// RUN: %llvmgcc %s -S -O1 -o - | grep {call.*ldexp}
+// RUN: %llvmgcc %s -S -O3 -fno-builtin -o - | grep {call.*exp2}
+
+double exp2(double);
+
+double t4(unsigned char x) {
+  return exp2(x);
+}
+
diff --git a/final/test/FrontendC/libcalls-ld.c b/final/test/FrontendC/libcalls-ld.c
new file mode 100644
index 00000000000..cf71d19eaa3
--- /dev/null
+++ b/final/test/FrontendC/libcalls-ld.c
@@ -0,0 +1,17 @@
+// llvm-gcc -O1+ should run simplify libcalls, O0 shouldn't
+// and -fno-builtins shouldn't.
+// -fno-math-errno should emit an llvm intrinsic, -fmath-errno should not.
+// RUN: %llvmgcc %s -S -fno-math-errno -O0 -o - | grep {call.*exp2\\..*f}
+// RUN: %llvmgcc %s -S -fmath-errno -O0 -o - | grep {call.*exp2l}
+// RUN: %llvmgcc %s -S -O1 -o - | grep {call.*ldexp}
+// RUN: %llvmgcc %s -S -O3 -fno-builtin -o - | grep {call.*exp2l}
+
+// If this fails for you because your target doesn't support long double,
+// please xfail the test.
+
+long double exp2l(long double);
+
+long double t4(unsigned char x) {
+  return exp2l(x);
+}
+
diff --git a/final/test/FrontendC/libcalls.c b/final/test/FrontendC/libcalls.c
new file mode 100644
index 00000000000..60e22e7e690
--- /dev/null
+++ b/final/test/FrontendC/libcalls.c
@@ -0,0 +1,14 @@
+// llvm-gcc -O1+ should run simplify libcalls, O0 shouldn't
+// and -fno-builtins shouldn't.
+// -fno-math-errno should emit an llvm intrinsic, -fmath-errno should not.
+// RUN: %llvmgcc %s -S -fno-math-errno -O0 -o - | grep {call.*exp2\\.f32}
+// RUN: %llvmgcc %s -S -fmath-errno -O0 -o - | grep {call.*exp2f}
+// RUN: %llvmgcc %s -S -O1 -o - | grep {call.*ldexp}
+// RUN: %llvmgcc %s -S -O3 -fno-builtin -o - | grep {call.*exp2f}
+
+float exp2f(float);
+
+float t4(unsigned char x) {
+  return exp2f(x);
+}
+
diff --git a/final/test/FrontendC/misaligned-param.c b/final/test/FrontendC/misaligned-param.c
new file mode 100644
index 00000000000..b4fcfe312f5
--- /dev/null
+++ b/final/test/FrontendC/misaligned-param.c
@@ -0,0 +1,15 @@
+// RUN: %llvmgcc %s -m32 -S -o - | FileCheck %s
+// Misaligned parameter must be memcpy'd to correctly aligned temporary.
+// XFAIL: *
+// XTARGET: i386-apple-darwin,i686-apple-darwin,x86_64-apple-darwin
+
+struct s { int x; long double y; };
+long double foo(struct s x, int i, struct s y) {
+// CHECK: foo
+// CHECK: %x_addr = alloca %struct.s, align 16
+// CHECK: %y_addr = alloca %struct.s, align 16
+// CHECK: memcpy
+// CHECK: memcpy
+// CHECK: bar
+  return bar(&x, &y);
+}
diff --git a/final/test/FrontendC/nested-functions.c b/final/test/FrontendC/nested-functions.c
new file mode 100644
index 00000000000..bccbef3dbdd
--- /dev/null
+++ b/final/test/FrontendC/nested-functions.c
@@ -0,0 +1,18 @@
+// RUN: %llvmgcc -S %s -o -  -fnested-functions
+// PR1274
+
+void Bork() {
+  void Fork(const int *src, int size) {
+    int i = 1;
+    int x;
+
+    while (i < size)
+      x = src[i];
+  }
+}
+
+void foo(void *a){
+  inline void foo_bar() {
+    a += 1;
+  }
+}
diff --git a/final/test/FrontendC/pr2394.c b/final/test/FrontendC/pr2394.c
new file mode 100644
index 00000000000..ca8b046f72f
--- /dev/null
+++ b/final/test/FrontendC/pr2394.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc %s -S -o - | FileCheck %s
+struct __attribute((packed)) x {int a : 24;};
+int a(struct x* g) {
+  // CHECK: load i24
+  return g->a;
+}
diff --git a/final/test/FrontendC/pr3518.c b/final/test/FrontendC/pr3518.c
new file mode 100644
index 00000000000..112394a651b
--- /dev/null
+++ b/final/test/FrontendC/pr3518.c
@@ -0,0 +1,24 @@
+// RUN: %llvmgcc %s -S -O0 -o - | grep {= internal unnamed_addr global} | count 4
+// PR 3518
+// Some of the objects were coming out as unintialized (external) before 3518
+// was fixed.  Internal names are different between llvm-gcc and clang so they
+// are not tested.
+
+extern void abort (void);
+
+struct A { int i; int j; };
+struct B { struct A *a; struct A *b; };
+struct C { struct B *c; struct A *d; };
+struct C e = { &(struct B) { &(struct A) { 1, 2 }, &(struct A) { 3, 4 } }, &(struct A) { 5, 6 } };
+
+int
+main (void)
+{
+  if (e.c->a->i != 1 || e.c->a->j != 2)
+    abort ();
+  if (e.c->b->i != 3 || e.c->b->j != 4)
+    abort ();
+  if (e.d->i != 5 || e.d->j != 6)
+    abort ();
+  return 0;
+}
diff --git a/final/test/FrontendC/pr4349.c b/final/test/FrontendC/pr4349.c
new file mode 100644
index 00000000000..24acd9c950f
--- /dev/null
+++ b/final/test/FrontendC/pr4349.c
@@ -0,0 +1,38 @@
+// RUN: %llvmgcc %s -S -O0 -o - | FileCheck %s
+// PR 4349
+
+union reg
+{
+    unsigned char b[2][2];
+    unsigned short w[2];
+    unsigned int d;
+};
+struct cpu
+{
+    union reg pc;
+};
+extern struct cpu cpu;
+struct svar
+{
+    void *ptr;
+};
+// CHECK: @svars1 = unnamed_addr global [1 x %struct.svar] [%struct.svar { i8* bitcast (%struct.cpu* @cpu to i8*) }]
+struct svar svars1[] =
+{
+    { &((cpu.pc).w[0]) }
+};
+// CHECK: @svars2 = unnamed_addr global [1 x %struct.svar] [%struct.svar { i8* getelementptr ([2 x i8]* bitcast (%struct.cpu* @cpu to [2 x i8]*), i{{[0-9]+}} 0, i{{[0-9]+}} 1) }]
+struct svar svars2[] =
+{
+    { &((cpu.pc).b[0][1]) }
+};
+// CHECK: @svars3 = unnamed_addr global [1 x %struct.svar] [%struct.svar { i8* bitcast (i16* getelementptr ([2 x i16]* bitcast (%struct.cpu* @cpu to [2 x i16]*), i{{[0-9]+}} 0, i{{[0-9]+}} 1) to i8*) }]
+struct svar svars3[] =
+{
+    { &((cpu.pc).w[1]) }
+};
+// CHECK: @svars4 = unnamed_addr global [1 x %struct.svar] [%struct.svar { i8* getelementptr ([2 x [2 x i8]]* bitcast (%struct.cpu* @cpu to [2 x [2 x i8]]*), i{{[0-9]+}} 0, i{{[0-9]+}} 1, i{{[0-9]+}} 1) }]
+struct svar svars4[] =
+{
+    { &((cpu.pc).b[1][1]) }
+};
diff --git a/final/test/FrontendC/pr5406.c b/final/test/FrontendC/pr5406.c
new file mode 100644
index 00000000000..0b1f277592f
--- /dev/null
+++ b/final/test/FrontendC/pr5406.c
@@ -0,0 +1,20 @@
+// RUN: %llvmgcc %s -S -O0 -o - | FileCheck %s
+// PR 5406
+
+// XFAIL: *
+// XTARGET: arm
+
+typedef struct { char x[3]; } A0;
+void foo (int i, ...);
+
+
+// CHECK: call void (i32, ...)* @foo(i32 1, i32 {{.*}}) nounwind
+int main (void)
+{
+  A0 a3;
+  a3.x[0] = 0;
+  a3.x[0] = 0;
+  a3.x[2] = 26;
+  foo (1,  a3 );
+  return 0;
+}
diff --git a/final/test/FrontendC/ptr-rotate.c b/final/test/FrontendC/ptr-rotate.c
new file mode 100644
index 00000000000..36d9755dd67
--- /dev/null
+++ b/final/test/FrontendC/ptr-rotate.c
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc %s -S -m32 -o /dev/null
+// RUN: %llvmgcc %s -S -O1 -m32 -o - | llc -march=x86 -mtriple=i386-apple-darwin9.7 | FileCheck %s -check-prefix=DARWIN
+
+unsigned int func(void *A) {
+  // DARWIN: roll $27
+  return ((((unsigned long long) A) >> 5) | (((unsigned long long) A) << 27));
+}
diff --git a/final/test/FrontendC/redef-ext-inline.c b/final/test/FrontendC/redef-ext-inline.c
new file mode 100644
index 00000000000..240beb1f6f6
--- /dev/null
+++ b/final/test/FrontendC/redef-ext-inline.c
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S %s -o -
+// rdar://7208839
+
+extern inline int f1 (void) {return 1;}
+int f3 (void) {return f1();}
+int f1 (void) {return 0;}
diff --git a/final/test/FrontendC/sret.c b/final/test/FrontendC/sret.c
new file mode 100644
index 00000000000..42666917a8d
--- /dev/null
+++ b/final/test/FrontendC/sret.c
@@ -0,0 +1,15 @@
+// RUN: %llvmgcc %s -S -O0 -o - | grep sret | count 5
+
+struct abc {
+ long a;
+ long b;
+ long c;
+};
+ 
+struct abc foo1(void);
+struct abc foo2();
+
+void bar() {
+  struct abc dummy1 = foo1();
+  struct abc dummy2 = foo2();
+}
diff --git a/final/test/FrontendC/sret2.c b/final/test/FrontendC/sret2.c
new file mode 100644
index 00000000000..0f35b1c2586
--- /dev/null
+++ b/final/test/FrontendC/sret2.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc %s -S -O0 -o - | grep sret | count 2
+
+struct abc {
+ long a;
+ long b;
+ long c;
+};
+ 
+struct abc foo2(){}
diff --git a/final/test/FrontendC/unaligned-memcpy.c b/final/test/FrontendC/unaligned-memcpy.c
new file mode 100644
index 00000000000..8fb84e4f515
--- /dev/null
+++ b/final/test/FrontendC/unaligned-memcpy.c
@@ -0,0 +1,5 @@
+// RUN: %llvmgcc %s -S -o - | llc
+
+void bork() {
+  char Qux[33] = {0};
+}
diff --git a/final/test/FrontendC/union-align.c b/final/test/FrontendC/union-align.c
new file mode 100644
index 00000000000..f99a7608056
--- /dev/null
+++ b/final/test/FrontendC/union-align.c
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc -S %s -o - | grep load | grep "4 x float" | not grep "align 4"
+// RUN: %llvmgcc -S %s -o - | grep load | grep "4 x float" | grep "align 16"
+// PR3432
+// rdar://6536377
+
+typedef float __m128 __attribute__ ((__vector_size__ (16)));
+
+typedef union
+{
+  int i[4];
+  float f[4];
+  __m128 v;
+} u_t;
+
+__m128 t(u_t *a) {
+  return a->v;
+}
diff --git a/final/test/FrontendC/vla-1.c b/final/test/FrontendC/vla-1.c
new file mode 100644
index 00000000000..77f78a5e3af
--- /dev/null
+++ b/final/test/FrontendC/vla-1.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc_only -std=gnu99 %s -S |& grep {warning: alignment for}
+// ppc does not support this feature, and gets a fatal error at runtime.
+// XFAIL: powerpc
+
+int foo(int a)
+{
+  int var[a] __attribute__((__aligned__(32)));
+  return 4;
+}
diff --git a/final/test/FrontendC/vla-2.c b/final/test/FrontendC/vla-2.c
new file mode 100644
index 00000000000..555cfc78925
--- /dev/null
+++ b/final/test/FrontendC/vla-2.c
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc -std=gnu99 %s -S -o - | grep ".*alloca.*align 16"
+
+extern void bar(int[]);
+
+void foo(int a)
+{
+  int var[a] __attribute__((__aligned__(16)));
+  bar(var);
+  return;
+}
diff --git a/final/test/FrontendC/wchar-const.c b/final/test/FrontendC/wchar-const.c
new file mode 100644
index 00000000000..7cf3322e8cf
--- /dev/null
+++ b/final/test/FrontendC/wchar-const.c
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc -S %s -o - | grep {constant \\\[18 x} | grep { 84, }
+// This should pass for any endianness combination of host and target.
+#include <wchar.h>
+extern void foo(const wchar_t* p);
+int main (int argc, const char * argv[])
+{
+ foo(L"This is some text");
+ return 0;
+}
diff --git a/final/test/FrontendC/weak_constant.c b/final/test/FrontendC/weak_constant.c
new file mode 100644
index 00000000000..53379482cb4
--- /dev/null
+++ b/final/test/FrontendC/weak_constant.c
@@ -0,0 +1,12 @@
+// RUN: %llvmgcc -S %s -O1 -o - | grep {ret.*123}
+// Check for bug compatibility with gcc.
+
+const int x __attribute((weak)) = 123;
+
+int* f(void) {
+  return &x;
+}
+
+int g(void) {
+  return *f();
+}
diff --git a/final/test/FrontendFortran/2008-11-03-OptionOverride.f90 b/final/test/FrontendFortran/2008-11-03-OptionOverride.f90
new file mode 100644
index 00000000000..d65ba9b4736
--- /dev/null
+++ b/final/test/FrontendFortran/2008-11-03-OptionOverride.f90
@@ -0,0 +1,4 @@
+! RUN: %llvmgcc -S %s -march=k8
+! XTARGET: x86
+! Note: this file intentionally left blank, the problem itself is in
+! frontend initialization routines and march flag!
diff --git a/final/test/FrontendFortran/2009-02-09-FloorDivExpr.f90 b/final/test/FrontendFortran/2009-02-09-FloorDivExpr.f90
new file mode 100644
index 00000000000..ddd05c54949
--- /dev/null
+++ b/final/test/FrontendFortran/2009-02-09-FloorDivExpr.f90
@@ -0,0 +1,32 @@
+! RUN: %llvmgcc -S %s
+! PR2437
+program main
+  implicit none
+  call build (77)
+contains
+  subroutine build (order)
+    integer :: order, i, j
+
+
+    call test (1, order, 3,  (/ (i, i = 1, order, 3) /))
+    call test (order, 1, -3, (/ (i, i = order, 1, -3) /))
+
+    do j = -10, 10
+      call test (order + j, order, 5,  (/ (i, i = order + j, order, 5) /))
+      call test (order + j, order, -5, (/ (i, i = order + j, order, -5) /))
+    end do
+
+  end subroutine build
+
+  subroutine test (from, to, step, values)
+    integer, dimension (:) :: values
+    integer :: from, to, step, last, i
+
+    last = 0
+    do i = from, to, step
+      last = last + 1
+      if (values (last) .ne. i) call abort
+    end do
+    if (size (values, dim = 1) .ne. last) call abort
+  end subroutine test
+end program main
diff --git a/final/test/FrontendFortran/cpow.f90 b/final/test/FrontendFortran/cpow.f90
new file mode 100644
index 00000000000..25156fd5897
--- /dev/null
+++ b/final/test/FrontendFortran/cpow.f90
@@ -0,0 +1,18 @@
+! RUN: %llvmgcc -S %s
+! PR2443
+
+! Program to test the power (**) operator
+program testpow
+   implicit none
+   real(kind=4) r, s, two
+   real(kind=8) :: q
+   complex(kind=4) :: c, z
+   real, parameter :: del = 0.0001
+   integer i, j
+
+   two = 2.0
+
+   c = (2.0, 3.0)
+   c = c ** two
+   if (abs(c - (-5.0, 12.0)) .gt. del) call abort
+end program
diff --git a/final/test/FrontendFortran/dg.exp b/final/test/FrontendFortran/dg.exp
new file mode 100644
index 00000000000..45bffc6fdcb
--- /dev/null
+++ b/final/test/FrontendFortran/dg.exp
@@ -0,0 +1,6 @@
+load_lib llvm.exp
+
+if [ llvm_gcc_supports fortran ] then {
+    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{f,f90}]]
+}
+
diff --git a/final/test/FrontendObjC++/2007-10-03-MetadataPointers.mm b/final/test/FrontendObjC++/2007-10-03-MetadataPointers.mm
new file mode 100644
index 00000000000..2ab76c1db59
--- /dev/null
+++ b/final/test/FrontendObjC++/2007-10-03-MetadataPointers.mm
@@ -0,0 +1,7 @@
+// RUN: %llvmgcc -w -x objective-c++ -S %s -o /dev/null
+
+@class NSImage;
+void bork() {
+  NSImage *nsimage;
+  [nsimage release];
+}
diff --git a/final/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm b/final/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm
new file mode 100644
index 00000000000..da47ed0c122
--- /dev/null
+++ b/final/test/FrontendObjC++/2010-08-02-NonPODObjectValue.mm
@@ -0,0 +1,27 @@
+// RUN: not %llvmgcc %s -S -o - |& FileCheck %s
+// This tests for a specific diagnostic in LLVM-GCC.
+// Clang compiles this correctly with no diagnostic,
+// ergo this test will fail with a Clang-based front-end.
+class TFENodeVector  {
+public:
+ TFENodeVector(const TFENodeVector& inNodeVector);
+ TFENodeVector();
+};
+
+@interface TWindowHistoryEntry  {}
+@property (assign, nonatomic) TFENodeVector targetPath;
+@end
+
+@implementation TWindowHistoryEntry
+@synthesize targetPath;
+- (void) initWithWindowController {
+   TWindowHistoryEntry* entry;
+   TFENodeVector newPath;
+   // CHECK: setting a C++ non-POD object value is not implemented
+#ifdef __clang__
+#error setting a C++ non-POD object value is not implemented
+#endif
+   entry.targetPath = newPath;
+   [entry setTargetPath:newPath];
+}
+@end
diff --git a/final/test/FrontendObjC++/2010-08-04-Template.mm b/final/test/FrontendObjC++/2010-08-04-Template.mm
new file mode 100644
index 00000000000..2ebfd3e17ce
--- /dev/null
+++ b/final/test/FrontendObjC++/2010-08-04-Template.mm
@@ -0,0 +1,10 @@
+// RUN: %llvmgcc %s -S
+struct TRunSoon {
+  template <class P1> static void Post() {}
+};
+
+@implementation TPrivsTableViewMainController
+- (void) applyToEnclosed {
+  TRunSoon::Post<int>();
+}
+@end
diff --git a/final/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm b/final/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm
new file mode 100644
index 00000000000..986094c0723
--- /dev/null
+++ b/final/test/FrontendObjC++/2010-08-06-X.Y-syntax.mm
@@ -0,0 +1,16 @@
+// RUN: %llvmgcc %s -S
+struct TFENode {
+  TFENode(const TFENode& inNode);
+};
+
+@interface TIconViewController
+- (const TFENode&) target;
+@end
+
+void sortAllChildrenForNode(const TFENode&node);
+
+@implementation TIconViewController
+- (void) setArrangeBy {
+  sortAllChildrenForNode(self.target);
+}
+@end
diff --git a/final/test/FrontendObjC++/dg.exp b/final/test/FrontendObjC++/dg.exp
new file mode 100644
index 00000000000..41c3db2af09
--- /dev/null
+++ b/final/test/FrontendObjC++/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if [ llvm_gcc_supports obj-c++ ] then {
+    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{mm}]]
+}
diff --git a/final/test/FrontendObjC/2007-04-03-ObjcEH.m b/final/test/FrontendObjC/2007-04-03-ObjcEH.m
new file mode 100644
index 00000000000..ae744c78500
--- /dev/null
+++ b/final/test/FrontendObjC/2007-04-03-ObjcEH.m
@@ -0,0 +1,29 @@
+// RUN: %llvmgcc -S %s -o /dev/null
+
+@interface B 
+-(int)bar;
+@end
+
+@interface A
+-(void) Foo:(int) state;
+@end
+
+@implementation A 
+- (void) Foo:(int) state {
+
+        int wasResponded = 0;
+        @try {
+        if (state) {
+           B * b = 0;
+           @try { }
+           @finally {
+             wasResponded = ![b bar];
+           }
+        }
+        }
+        @finally {
+        }
+}
+@end
+
+
diff --git a/final/test/FrontendObjC/2007-05-02-Strong.m b/final/test/FrontendObjC/2007-05-02-Strong.m
new file mode 100644
index 00000000000..34b41ad964f
--- /dev/null
+++ b/final/test/FrontendObjC/2007-05-02-Strong.m
@@ -0,0 +1,23 @@
+// RUN: %llvmgcc -S %s -fobjc-gc -o /dev/null
+typedef int NSInteger;
+typedef struct _NSRect {
+  int origin;
+  int size;
+} NSRect;
+
+__attribute__((objc_gc(strong))) NSRect *_cachedRectArray;
+extern const NSRect NSZeroRect;
+@interface A{
+}
+-(void)bar:(NSInteger *)rectCount;
+@end
+
+@implementation A 
+
+-(void)bar:(NSInteger *)rectCount {
+  NSRect appendRect = NSZeroRect; 
+
+  _cachedRectArray[*rectCount - 1] = NSZeroRect; 
+}
+
+@end
diff --git a/final/test/FrontendObjC/2007-09-25-EH.m b/final/test/FrontendObjC/2007-09-25-EH.m
new file mode 100644
index 00000000000..d625584a6c5
--- /dev/null
+++ b/final/test/FrontendObjC/2007-09-25-EH.m
@@ -0,0 +1,27 @@
+// RUN: %llvmgcc -S -w -m64 -mmacosx-version-min=10.5 %s -o /dev/null
+// XFAIL: *
+// XTARGET: darwin
+@class NSDictionary, DSoBuffer, DSoDirectory, NSMutableArray;
+@interface NSException {}
+@end
+@interface DSoNode {
+  DSoDirectory  *mDirectory;
+}
+@end
+@implementation DSoNode
+- (void) _findRecordsOfTypes {
+  DSoBuffer      *dbData;
+  void           *recInfo;
+  NSMutableArray *results;
+  @try {
+    dsGetRecordEntry([dbData dsDataBuffer], (void**)&recInfo);
+    @try {
+        [results addObject:37];
+    } @finally {
+      dsDeallocRecordEntry([mDirectory dsDirRef], recInfo);
+    }
+  } @catch(NSException * exception) {
+  }
+}
+
+
diff --git a/final/test/FrontendObjC/2007-10-17-SJLJExceptions.m b/final/test/FrontendObjC/2007-10-17-SJLJExceptions.m
new file mode 100644
index 00000000000..970207e0d8a
--- /dev/null
+++ b/final/test/FrontendObjC/2007-10-17-SJLJExceptions.m
@@ -0,0 +1,24 @@
+// RUN: %llvmgcc -m32 -x objective-c %s -pipe -std=gnu99 -O2 -fexceptions -S -o - | not grep Unwind_Resume
+#import <stdio.h>
+
+@interface Foo {
+  char c;
+  short s;
+  int i;
+  long l;
+  float f;
+  double d;
+}
+-(Foo*)retain;
+@end
+
+struct Foo *bork(Foo *FooArray) {
+  struct Foo *result = 0;
+  @try {
+    result = [FooArray retain];
+  } @catch(id any) {
+    printf("hello world\n");
+  }
+
+  return result;
+}
diff --git a/final/test/FrontendObjC/2007-10-18-ProDescriptor.m b/final/test/FrontendObjC/2007-10-18-ProDescriptor.m
new file mode 100644
index 00000000000..220fdd2c232
--- /dev/null
+++ b/final/test/FrontendObjC/2007-10-18-ProDescriptor.m
@@ -0,0 +1,19 @@
+// RUN: %llvmgcc -x objective-c -S %s -o /dev/null
+@protocol O
+@end
+@interface O < O > {
+}
+@end
+struct A {
+};
+@protocol AB
+- (unsigned) ver;
+@end
+@interface AGy:O < AB > {
+}
+@end
+@implementation AGy
+- (unsigned) ver {
+}
+@end
+
diff --git a/final/test/FrontendObjC/2007-10-23-GC-WriteBarrier.m b/final/test/FrontendObjC/2007-10-23-GC-WriteBarrier.m
new file mode 100644
index 00000000000..4bbe4407bed
--- /dev/null
+++ b/final/test/FrontendObjC/2007-10-23-GC-WriteBarrier.m
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc -x objective-c -S %s -o /dev/null -fobjc-gc
+// rdar://5541393
+
+typedef unsigned int NSUInteger;
+__attribute__((objc_gc(strong))) float *_scores;
+
+void foo(int i, float f) {
+  _scores[i] = f; 
+}
diff --git a/final/test/FrontendObjC/2008-10-3-EhValue.m b/final/test/FrontendObjC/2008-10-3-EhValue.m
new file mode 100644
index 00000000000..c7aabe271ee
--- /dev/null
+++ b/final/test/FrontendObjC/2008-10-3-EhValue.m
@@ -0,0 +1,50 @@
+// RUN: %llvmgcc -w -x objective-c -S %s -o /dev/null
+
+@interface Object {
+@public
+     Class isa;
+}
++initialize;
++alloc;
++new;
++free;
+-free;
++(Class)class;
+-(Class)class;
+-init;
+-superclass;
+-(const char *)name;
+@end
+
+@interface Frob: Object
+@end
+
+@implementation Frob: Object
+@end
+
+static Frob* _connection = ((void *)0);
+
+extern void abort(void);
+
+void test (Object* sendPort)
+{
+ int cleanupPorts = 1;
+ Frob* receivePort = ((void *)0);
+
+ @try {
+  receivePort = (Frob *) -1;
+  _connection = (Frob *) -1;
+  receivePort = ((void *)0);
+  sendPort = ((void *)0);
+  cleanupPorts = 0;
+  @throw [Object new];
+ }
+ @catch(Frob *obj) {
+  if(!(0)) abort();
+ }
+ @catch(id exc) {
+  if(!(!receivePort)) abort();
+  if(!(!sendPort)) abort();
+  if(!(!cleanupPorts)) abort();
+ }
+}
diff --git a/final/test/FrontendObjC/2008-11-12-Metadata.m b/final/test/FrontendObjC/2008-11-12-Metadata.m
new file mode 100644
index 00000000000..be8ee41e77a
--- /dev/null
+++ b/final/test/FrontendObjC/2008-11-12-Metadata.m
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc -x objective-c -m64 -S %s -o /dev/null
+
+@interface A
+@end
+@protocol P
+@end
+@interface B : A <P>
+{
+}
+@end
+@implementation B
+- (void)test {
+}
+@end
diff --git a/final/test/FrontendObjC/2008-11-24-ConstCFStrings.m b/final/test/FrontendObjC/2008-11-24-ConstCFStrings.m
new file mode 100644
index 00000000000..976adc47f12
--- /dev/null
+++ b/final/test/FrontendObjC/2008-11-24-ConstCFStrings.m
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc -x objective-c -m64 -S %s -o - | grep {L_unnamed_cfstring_}
+
+@class NSString;
+
+@interface A
+- (void)bork:(NSString*)msg;
+@end
+
+void func(A *a) {
+  [a bork:@"Hello world!"];
+}
diff --git a/final/test/FrontendObjC/2008-11-25-Blocks.m b/final/test/FrontendObjC/2008-11-25-Blocks.m
new file mode 100644
index 00000000000..c5cd3d2a0b2
--- /dev/null
+++ b/final/test/FrontendObjC/2008-11-25-Blocks.m
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc -S %s -o /dev/null
+// rdar://6394879
+
+@interface bork
+- (id)B:(void (^)())blk;
+- (void)C;
+@end
+@implementation bork
+- (id)B:(void (^)())blk {
+  __attribute__((__blocks__(byref))) bork* new = ((void *)0);
+  blk();
+}
+- (void)C {
+  __attribute__((__blocks__(byref))) id var;
+  [self B:^() {}];
+}
+@end
diff --git a/final/test/FrontendObjC/2009-01-26-WriteBarrier-2.m b/final/test/FrontendObjC/2009-01-26-WriteBarrier-2.m
new file mode 100644
index 00000000000..32833a81e16
--- /dev/null
+++ b/final/test/FrontendObjC/2009-01-26-WriteBarrier-2.m
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc -x objective-c -S %s -fobjc-gc -o - | grep objc_assign_strongCast
+// rdar://5541393
+
+typedef struct {
+    void (^ivarBlock)(void);
+} StructWithBlock_t;
+
+int main(char *argc, char *argv[]) {
+   StructWithBlock_t *swbp = (StructWithBlock_t *)malloc(sizeof(StructWithBlock_t*));
+   __block   int i = 10;
+   // assigning a Block into an struct slot should elicit a write-barrier under GC
+   swbp->ivarBlock = ^ { ++i; }; 
+   return 0;
+}
diff --git a/final/test/FrontendObjC/2009-02-05-VolatileProp.m b/final/test/FrontendObjC/2009-02-05-VolatileProp.m
new file mode 100644
index 00000000000..1deef739bee
--- /dev/null
+++ b/final/test/FrontendObjC/2009-02-05-VolatileProp.m
@@ -0,0 +1,11 @@
+/* RUN: %llvmgcc -w -x objective-c -S %s -o /dev/null -pedantic-errors
+   rdar://6551276 */
+
+void foo(const unsigned short *);
+void bar() {
+  unsigned short *s[3];
+  int i;
+  @try { } @catch (id anException) { }
+  foo(2+s[i]);
+}
+
diff --git a/final/test/FrontendObjC/2009-04-14-AsmSection.m b/final/test/FrontendObjC/2009-04-14-AsmSection.m
new file mode 100644
index 00000000000..aefe0887671
--- /dev/null
+++ b/final/test/FrontendObjC/2009-04-14-AsmSection.m
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc -S %s -fobjc-abi-version=2 -o %t
+// RUN: grep {OBJC_CLASS_\\\$_A.*section.*__DATA, __objc_data.*align} %t
+// XTARGET: darwin
+
+@interface A
+@end
+
+@implementation A
+@end
diff --git a/final/test/FrontendObjC/2009-04-27-bitfield-vs-ivar.m b/final/test/FrontendObjC/2009-04-27-bitfield-vs-ivar.m
new file mode 100644
index 00000000000..cada8438bc9
--- /dev/null
+++ b/final/test/FrontendObjC/2009-04-27-bitfield-vs-ivar.m
@@ -0,0 +1,44 @@
+// RUN: %llvmgcc -S -x objective-c -m64 -fobjc-abi-version=2 %s -o %t
+// RUN: grep {OBJC_CLASS_RO_\\\$_I4} %t | grep {i32 0, i32 1, i32 2, i32 0}
+// RUN: grep {OBJC_CLASS_RO_\\\$_I2} %t | grep {i32 0, i32 1, i32 1, i32 0}
+// RUN: grep {OBJC_CLASS_RO_\\\$_I5} %t | grep {i32 0, i32 0, i32 0, i32 0}
+// XTARGET: darwin
+
+// Test instance variable sizing when base class ends in bitfield
+@interface I3 {
+  unsigned int _iv2 :1;
+}
+@end
+
+@interface I4 : I3 {
+  char _iv4;
+}
+@end
+
+// Test case with no instance variables in derived class
+@interface I1 {
+  unsigned int _iv2 :1;
+}
+@end
+
+@interface I2 : I1 {
+}
+@end
+
+// Test case with no instance variables anywhere
+@interface I6 {
+}
+@end
+
+@interface I5 : I6 {
+}
+@end
+
+@implementation I4
+@end
+
+@implementation I2
+@end
+
+@implementation I5
+@end
diff --git a/final/test/FrontendObjC/2009-04-28-bitfield-vs-vbc.m b/final/test/FrontendObjC/2009-04-28-bitfield-vs-vbc.m
new file mode 100644
index 00000000000..8306fcc7e07
--- /dev/null
+++ b/final/test/FrontendObjC/2009-04-28-bitfield-vs-vbc.m
@@ -0,0 +1,127 @@
+// RUN: %llvmgcc -S -x objective-c -m32 %s -o %t
+// This used to crash, 6831493.
+#include <stdlib.h>
+
+struct s0 {
+  double x;
+};
+
+@interface I2 {
+  struct s0 _iv1;
+}
+@end
+
+@interface I3 : I2 {
+  unsigned int _iv2 :1;
+  unsigned : 0;
+  unsigned int _iv3 : 3;
+}
+@end
+
+@interface I4 : I3 {
+  char _iv4;
+}
+@end
+
+@interface I5 : I4 {
+  char _iv5;
+  int _iv6;
+  int _iv7;
+}
+
+@property int P1;
+@end
+
+@implementation I2
+@end
+
+@implementation I3
+@end
+
+@implementation I4 
+@end
+
+@interface I5 ()
+@property int P2;
+@end
+
+#if 0
+int g2 = sizeof(I2);
+int g3 = sizeof(I3);
+int g4 = sizeof(I4);
+int g5_0 = sizeof(I5);
+#endif
+
+@implementation I5
+#ifdef __x86_64
+@synthesize P1 = _MadeUpName;
+@synthesize P2 = _AnotherMadeUpName;
+#else
+@synthesize P1 = _iv6;
+@synthesize P2 = _iv7;
+#endif
+@end
+
+#if 0
+int g5_1 = sizeof(I5);
+#endif
+
+@interface T0_I0 {
+  double iv_A_0;
+  char iv_A_1;
+}
+@end
+
+@interface T0_I1 : T0_I0 {
+  char iv_B_0;
+}
+@end
+
+@interface T0_I2 : T0_I1 {
+  char iv_C_0;
+}
+@end
+
+#if 0
+int g6 = sizeof(T0_I0);
+int g7 = sizeof(T0_I1);
+int g8 = sizeof(T0_I2);
+#endif
+  
+@implementation T0_I0 @end
+@implementation T0_I1 @end  
+@implementation T0_I2 @end
+
+void f0(I2*i2,I3*i3,I4*i4,I5*i5,T0_I0*t0_i0,T0_I1*t0_i1,T0_I2*t0_i2) {
+}
+
+// Thomas Wang's ui32 hash.
+unsigned hash_ui32_to_ui32(unsigned a) {
+  a = (a ^ 61) ^ (a >> 16);
+  a = a + (a << 3);
+  a = a ^ (a >> 4);
+  a = a * 0x27d4eb2d;
+  a = a ^ (a >> 15);
+  return a;
+}
+
+unsigned char hash_ui32_to_ui8(unsigned ui) {
+  ui = hash_ui32_to_ui32(ui);
+  ui ^= ui>>8;
+  ui ^= ui>>8;
+  ui ^= ui>>8;
+  return (unsigned char) ui;
+}
+
+void *init() {
+  unsigned i, N = 1024;
+  unsigned char *p = malloc(N);
+  for (i=0; i != N; ++i)
+    p[i] = hash_ui32_to_ui8(i);
+  return p;
+}
+
+int main(){
+  void *p = init();
+  f0(p,p,p,p,p,p,p);
+}
diff --git a/final/test/FrontendObjC/2009-08-05-utf16.m b/final/test/FrontendObjC/2009-08-05-utf16.m
new file mode 100644
index 00000000000..df3745c4870
--- /dev/null
+++ b/final/test/FrontendObjC/2009-08-05-utf16.m
@@ -0,0 +1,5 @@
+/* RUN: %llvmgcc -w -x objective-c -S %s -o - | grep {__utf16_string_1} | grep {internal unnamed_addr constant} | grep {12 x i8}
+   rdar://7095855 rdar://7115749 */
+
+void *P = @"iPod™";
+
diff --git a/final/test/FrontendObjC/2009-08-17-DebugInfo.m b/final/test/FrontendObjC/2009-08-17-DebugInfo.m
new file mode 100644
index 00000000000..8ed7c24dc13
--- /dev/null
+++ b/final/test/FrontendObjC/2009-08-17-DebugInfo.m
@@ -0,0 +1,28 @@
+// This is a regression test on debug info to make sure that we can set a
+// breakpoint on a objective message.
+// RUN: %llvmgcc -S -O0 -g %s -o - | llc -o %t.s -O0
+// RUN: %compile_c %t.s -o %t.o
+// RUN: %link %t.o -o %t.exe -framework Foundation
+// RUN: echo {break randomFunc\n} > %t.in 
+// RUN: gdb -q -batch -n -x %t.in %t.exe | tee %t.out | \
+// RUN:   grep {Breakpoint 1 at 0x.*: file .*2009-08-17-DebugInfo.m, line 21}
+// XTARGET: darwin
+@interface MyClass
+{
+ int my;
+}
++ init;
+- randomFunc;
+@end
+
+@implementation MyClass
++ init {
+}
+- randomFunc { my = 42; }
+@end
+
+int main() {
+  id o = [MyClass init];
+  [o randomFunc];
+  return 0;
+}
diff --git a/final/test/FrontendObjC/2009-11-30-Objc-ID.m b/final/test/FrontendObjC/2009-11-30-Objc-ID.m
new file mode 100644
index 00000000000..787bf72efe3
--- /dev/null
+++ b/final/test/FrontendObjC/2009-11-30-Objc-ID.m
@@ -0,0 +1,14 @@
+// RUN: %llvmgcc -S -O0 -g %s -o - | \
+// RUN:     llc --disable-fp-elim -o %t.s -O0 
+// RUN: grep id %t.s | grep DW_AT_name
+@interface A
+-(id) blah;
+@end
+
+@implementation A
+-(id)blah {
+  int i = 1;
+  i++;
+  return i;
+}
+@end
diff --git a/final/test/FrontendObjC/2010-02-01-utf16-with-null.m b/final/test/FrontendObjC/2010-02-01-utf16-with-null.m
new file mode 100644
index 00000000000..86e46376bd2
--- /dev/null
+++ b/final/test/FrontendObjC/2010-02-01-utf16-with-null.m
@@ -0,0 +1,5 @@
+/* RUN: %llvmgcc -w -x objective-c -S %s -o - | not grep {__ustring}
+   rdar://7589850 */
+
+void *P = @"good\0bye";
+
diff --git a/final/test/FrontendObjC/2010-02-11-fwritable-stringsBug.m b/final/test/FrontendObjC/2010-02-11-fwritable-stringsBug.m
new file mode 100644
index 00000000000..bb00f6a1e2c
--- /dev/null
+++ b/final/test/FrontendObjC/2010-02-11-fwritable-stringsBug.m
@@ -0,0 +1,17 @@
+// RUN: %llvmgcc -x objective-c -fwritable-strings -S %s -o - | FileCheck %s
+// CHECK: @.str = private unnamed_addr constant
+// CHECK: @.str1 = internal unnamed_addr global
+
+// rdar://7634471
+
+@class NSString;
+
+@interface A
+- (void)foo:(NSString*)msg;
+- (void)bar:(const char*)msg;
+@end
+
+void func(A *a) {
+  [a foo:@"Hello world!"];
+  [a bar:"Goodbye world!"];
+}
diff --git a/final/test/FrontendObjC/2010-02-23-DbgInheritance.m b/final/test/FrontendObjC/2010-02-23-DbgInheritance.m
new file mode 100644
index 00000000000..7e1cf67b475
--- /dev/null
+++ b/final/test/FrontendObjC/2010-02-23-DbgInheritance.m
@@ -0,0 +1,9 @@
+// RUN: %llvmgcc %s -S -g -o - | grep -v DW_TAG_member
+// Interface P should not be a member of interface I in debug info.
+@interface P 
+@end
+
+@interface I : P 
+@end
+
+void fn(I *iptr) {}
diff --git a/final/test/FrontendObjC/2010-03-17-StructRef.m b/final/test/FrontendObjC/2010-03-17-StructRef.m
new file mode 100644
index 00000000000..3594684a194
--- /dev/null
+++ b/final/test/FrontendObjC/2010-03-17-StructRef.m
@@ -0,0 +1,43 @@
+// RUN: %llvmgcc %s -m64 -S -o - | FileCheck %s
+// Bitfield references must not touch memory outside of the enclosing
+// struct.   Radar 7639995
+typedef signed char BOOL;
+@protocol NSObject
+- (id)init;
+@end
+@interface NSObject <NSObject> {}
+@end
+@interface IMAVChatParticipant : NSObject {
+  int _ardRole;
+  int _state;
+  int _avRelayStatus;
+  int _chatEndedReason;
+  int _chatError;
+  unsigned _sendingAudio:1;
+  unsigned _sendingVideo:1;
+  unsigned _sendingAuxVideo:1;
+  unsigned _audioMuted:1;
+  unsigned _videoPaused:1;
+  unsigned _networkStalled:1;
+  unsigned _isInitiator:1;
+  unsigned _isAOLInterop:1;
+  unsigned _isRecording:1;
+  unsigned _isUsingICE:1;
+}
+@end
+@implementation IMAVChatParticipant
+- (id) init {
+  self = [super init];
+  if ( self ) {
+    BOOL blah = (BOOL)1;
+    // We're expecting these three bitfield assignments will generate i8 stores.
+    _sendingAudio = (BOOL)1;
+    _isUsingICE = (BOOL)1;
+    _isUsingICE = blah;
+    // CHECK: store i8
+    // CHECK: store i8
+    // CHECK: store i8
+  }
+  return self;
+}
+@end
diff --git a/final/test/FrontendObjC/2010-06-04-UnnamedCFString-dbg.m b/final/test/FrontendObjC/2010-06-04-UnnamedCFString-dbg.m
new file mode 100644
index 00000000000..bded9ea2a40
--- /dev/null
+++ b/final/test/FrontendObjC/2010-06-04-UnnamedCFString-dbg.m
@@ -0,0 +1,6 @@
+// RUN: %llvmgcc -S -O0 -g %s -o - | grep DW_TAG_variable | count 1
+
+// Do not emit debug info for unnamed builtin CFString variable.
+@interface Foo
+@end
+Foo *FooName = @"FooBar";
diff --git a/final/test/FrontendObjC/2011-03-02-ConstCFStringLiteralAlign.m b/final/test/FrontendObjC/2011-03-02-ConstCFStringLiteralAlign.m
new file mode 100644
index 00000000000..a5bd2b73bf0
--- /dev/null
+++ b/final/test/FrontendObjC/2011-03-02-ConstCFStringLiteralAlign.m
@@ -0,0 +1,11 @@
+// RUN: %llvmgcc -S -w -m64 -mmacosx-version-min=10.5 %s -o - | \
+// RUN:     llc --disable-fp-elim -o - | FileCheck %s
+// XFAIL: *
+// XTARGET: darwin
+
+@interface Foo
+@end
+Foo *FooName = @"FooBar";
+
+// CHECK:      .section __TEXT,__cstring,cstring_literals
+// CHECK-NEXT: L_.str:
diff --git a/final/test/FrontendObjC/dg.exp b/final/test/FrontendObjC/dg.exp
new file mode 100644
index 00000000000..18f73a79787
--- /dev/null
+++ b/final/test/FrontendObjC/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if [ llvm_gcc_supports objc ] then {
+    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{m}]]
+}
diff --git a/final/test/Integer/2007-01-19-TruncSext.ll b/final/test/Integer/2007-01-19-TruncSext.ll
new file mode 100644
index 00000000000..3fee6bc8b6e
--- /dev/null
+++ b/final/test/Integer/2007-01-19-TruncSext.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+; RUN: llvm-as < %s | lli --force-interpreter=true | grep -- -255
+
+@ARRAY   = global [ 20 x i17 ] zeroinitializer
+@FORMAT  = constant [ 4 x i8 ] c"%d\0A\00"
+
+declare i32 @printf(i8* %format, ...)
+
+define void @multiply(i32 %index, i32 %X, i32 %Y) {
+  %Z = mul i32 %X, %Y
+  %P = getelementptr [20 x i17]* @ARRAY, i32 0, i32 %index
+  %Result = trunc i32 %Z to i17
+  store i17 %Result, i17* %P
+  ret void
+}
+
+define i32 @main(i32 %argc, i8** %argv) {
+  %i = bitcast i32 0 to i32
+  call void @multiply(i32 %i, i32 -1, i32 255) 
+  %P = getelementptr [20 x i17]* @ARRAY, i32 0, i32 0
+  %X = load i17* %P
+  %result = sext i17 %X to i32
+  %fmt = getelementptr [4 x i8]* @FORMAT, i32 0, i32 0
+  call i32 (i8*,...)* @printf(i8* %fmt, i32 %result)
+  ret i32 0
+}
+
diff --git a/final/test/Integer/BitArith.ll b/final/test/Integer/BitArith.ll
new file mode 100644
index 00000000000..350a9849947
--- /dev/null
+++ b/final/test/Integer/BitArith.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+declare void @"foo"(i31 %i, i63 %j, i10 %k)
+
+
+; foo test basic arith operations
+define void @"foo"(i31 %i, i63 %j, i10 %k)
+begin
+	%t1 = trunc i63 %j to i31 
+        %t2 = add i31 %t1, %i
+        %t20 = add i31 3, %t1
+        %t3 = zext i31 %i to i63
+        %t4 = sub i63 %t3, %j
+        %t40 = sub i63 %j, -100 
+        %t5 = mul i10 %k, 7
+        %t6 = sdiv i63 %j, -2
+        %t7 = udiv i63 %j, %t3
+        %t8 = urem i10 %k, 10
+        %t9 = srem i10 %k, -10
+	ret void
+end
+
diff --git a/final/test/Integer/BitBit.ll b/final/test/Integer/BitBit.ll
new file mode 100644
index 00000000000..420bbe5a5fc
--- /dev/null
+++ b/final/test/Integer/BitBit.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+declare void @"foo"(i31 %i, i33 %j)
+
+
+; foo test basic bitwise operations
+define void @"foo"(i31 %i, i33 %j)
+begin
+	%t1 = trunc i33 %j to i31 
+        %t2 = and i31 %t1, %i
+        %t3 = sext i31 %i to i33
+        %t4 = or i33 %t3, %j 
+        %t5 = xor i31 %t2, 7 
+        %t6 = shl i31 %i, 2
+        %t7 = trunc i31 %i to i8
+        %t8 = shl i8 %t7, 3
+        %t9 = lshr i33 %j, 31
+        %t7z = zext i8 %t7 to i33
+        %t10 = ashr i33 %j, %t7z
+	ret void
+end
+
diff --git a/final/test/Integer/BitCast.ll b/final/test/Integer/BitCast.ll
new file mode 100644
index 00000000000..0bef0230e95
--- /dev/null
+++ b/final/test/Integer/BitCast.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+declare void @"foo"(i31 %i, i1280 %j, i1 %k, float %f)
+
+
+; foo test basic arith operations
+define void @"foo"(i31 %i, i1280 %j, i1 %k, float %f)
+begin
+	%t1 = trunc i1280 %j to i31
+        %t2 = trunc i31 %t1 to i1
+ 
+        %t3 = zext i31 %i to i1280
+        %t4 = sext i31 %i to i1280
+
+        %t5 = fptoui float 0x400921FA00000000 to i31
+        %t6 = uitofp i31 %t5 to double
+
+        %t7 = fptosi double 0xC0934A456D5CFAAD to i28
+        %t8 = sitofp i8 -1 to double
+        %t9 = uitofp i8 255 to double
+        
+	ret void
+end
+
diff --git a/final/test/Integer/BitIcmp.ll b/final/test/Integer/BitIcmp.ll
new file mode 100644
index 00000000000..c22461224d0
--- /dev/null
+++ b/final/test/Integer/BitIcmp.ll
@@ -0,0 +1,43 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+define i55 @"simpleIcmp"(i55 %i0, i55 %j0)
+begin
+	%t1 = icmp eq i55 %i0, %j0
+	%t2 = icmp ne i55 %i0, %j0
+	%t3 = icmp ult i55 %i0, %j0
+        %t4 = icmp sgt i55 %i0, %j0
+	%t5 = icmp ule i55 %i0, %j0
+        %t6 = icmp sge i55 %i0, %j0
+
+	%t7 = icmp eq i55 %i0, 1098765432
+        %t8 = icmp ne i55 %i0, -31415926
+
+        %t9 = icmp ult i55 10000, %j0
+        %t10 = icmp sgt i55 -10000, %j0
+
+	ret i55 %i0
+end
+
+define i31 @"phitest"(i12 %i)
+begin
+
+HasArg:
+        %n1 = add i12 1, %i
+        br label %Continue
+        
+Continue:
+        %n = phi i12 [%n1, %HasArg], [%next, %Continue]
+        %next = add i12 1, %n
+        br label %Continue
+end
+
+define i18 @"select"(i18 %i)
+begin
+        %t = icmp sgt i18 %i, 100
+        %k = select i1 %t, i18 %i, i18 999
+        ret i18 %k
+end
+
diff --git a/final/test/Integer/BitMem.ll b/final/test/Integer/BitMem.ll
new file mode 100644
index 00000000000..2c093bc9902
--- /dev/null
+++ b/final/test/Integer/BitMem.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+declare void @"foo"()
+
+
+; foo test basic arith operations
+define void @"foo"() {
+	%t1 = malloc i31, i32 4
+        %t2 = malloc i31, i32 7, align 1024
+        %t3 = malloc [4 x i15]
+
+        %idx = getelementptr [4 x i15]* %t3, i64 0, i64 2
+        store i15 -123, i15* %idx
+
+        free [4 x i15]* %t3
+        free i31* %t2
+        free i31* %t1
+        
+        %t4 = alloca i12, i32 100
+        free i12* %t4
+
+        %t5 = alloca i31
+        store i31 -123, i31* %t5
+
+        free i31* %t5
+	ret void
+}
diff --git a/final/test/Integer/BitMisc.ll b/final/test/Integer/BitMisc.ll
new file mode 100644
index 00000000000..8ce4d4add7d
--- /dev/null
+++ b/final/test/Integer/BitMisc.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+@MyVar     = external global i19
+@MyIntList = external global { i39 *, i19 }
+             external global i19      ; i19*:0
+
+@AConst    = constant i19 -123
+
+@AString   = constant [4 x i8] c"test"
+
+@ZeroInit  = global { [100 x i19 ], [40 x float ] } { [100 x i19] zeroinitializer,
+                                                      [40  x float] zeroinitializer }
+
+
+define i19 @"foo"(i19 %blah)
+begin
+	store i19 5, i19* @MyVar
+	%idx = getelementptr { i39 *, i19 } * @MyIntList, i64 0, i32 1
+  	store i19 12, i19* %idx
+  	ret i19 %blah
+end
diff --git a/final/test/Integer/BitPacked.ll b/final/test/Integer/BitPacked.ll
new file mode 100644
index 00000000000..e6e453ab0c2
--- /dev/null
+++ b/final/test/Integer/BitPacked.ll
@@ -0,0 +1,21 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+@foo1 = external global <4 x float>
+@foo2 = external global <2 x i10>
+
+
+define void @main() 
+{
+        store <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <4 x float>* @foo1
+        store <2 x i10> <i10 4, i10 4>, <2 x i10>* @foo2
+	%l1 = load <4 x float>* @foo1
+        %l2 = load <2 x i10>* @foo2
+        %r1 = extractelement <2 x i10> %l2, i32 1    
+        %r2 = extractelement <2 x i10> %l2, i32 0
+        %t = mul i10 %r1, %r2
+        %r3 = insertelement <2 x i10> %l2, i10 %t, i32 0    
+        store <2 x i10> %r3, <2 x i10>* @foo2
+        ret void
+}
diff --git a/final/test/Integer/alignment_bt.ll b/final/test/Integer/alignment_bt.ll
new file mode 100644
index 00000000000..3a9d0511d7f
--- /dev/null
+++ b/final/test/Integer/alignment_bt.ll
@@ -0,0 +1,21 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+@X = global i19 4, align 16
+
+define i19 *@test() align 32 {
+	%X = alloca i19, align 4
+	%Y = alloca i51, i32 42, align 16
+	%Z = alloca i32, align 1
+	ret i19 *%X
+}
+
+define i19 *@test2() {
+	%X = malloc i19, align 4
+	%Y = malloc i51, i32 42, align 16
+	%Z = malloc i32, align 1
+	ret i19 *%X
+}
+
+
diff --git a/final/test/Integer/basictest_bt.ll b/final/test/Integer/basictest_bt.ll
new file mode 100644
index 00000000000..5c98856d06b
--- /dev/null
+++ b/final/test/Integer/basictest_bt.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+; Test "stripped" format where nothing is symbolic... this is how the bytecode
+; format looks anyways (except for negative vs positive offsets)...
+;
+define void @void(i39, i39) {
+	add i39 0, 0			; <i39>:3 [#uses=2]
+	sub i39 0, 4			; <i39>:4 [#uses=2]
+	br label %5
+
+; <label>:5				; preds = %5, %2
+	add i39 %0, %1			; <i39>:6 [#uses=2]
+	sub i39 %6, %4			; <i39>:7 [#uses=1]
+	icmp sle i39 %7, %3		; <i1>:8 [#uses=1]
+	br i1 %8, label %9, label %5
+
+; <label>:9				; preds = %5
+	add i39 %0, %1			; <i39>:10 [#uses=0]
+	sub i39 %6, %4			; <i39>:11 [#uses=1]
+	icmp sle i39 %11, %3		; <i1>:12 [#uses=0]
+	ret void
+}
+
+; This function always returns zero
+define i39 @zarro() {
+Startup:
+	ret i39 0
+}
diff --git a/final/test/Integer/cfgstructures_bt.ll b/final/test/Integer/cfgstructures_bt.ll
new file mode 100644
index 00000000000..09aec1fed21
--- /dev/null
+++ b/final/test/Integer/cfgstructures_bt.ll
@@ -0,0 +1,56 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+;; This is an irreducible flow graph
+
+
+define void @"irreducible"(i1 %cond)
+begin
+	br i1 %cond, label %X, label %Y
+
+X:
+	br label %Y
+Y:
+	br label %X
+end
+
+;; This is a pair of loops that share the same header
+
+define void @"sharedheader"(i1 %cond)
+begin
+	br label %A
+A:
+	br i1 %cond, label %X, label %Y
+
+X:
+	br label %A
+Y:
+	br label %A
+end
+
+;; This is a simple nested loop
+define void @"nested"(i1 %cond1, i1 %cond2, i1 %cond3)
+begin
+	br label %Loop1
+
+Loop1:
+	br label %Loop2
+
+Loop2:
+	br label %Loop3
+
+Loop3:
+	br i1 %cond3, label %Loop3, label %L3Exit
+
+L3Exit:
+	br i1 %cond2, label %Loop2, label %L2Exit
+
+L2Exit:
+	br i1 %cond1, label %Loop1, label %L1Exit
+
+L1Exit:
+	ret void
+end
+
diff --git a/final/test/Integer/constexpr_bt.ll b/final/test/Integer/constexpr_bt.ll
new file mode 100644
index 00000000000..fc8b06d4f40
--- /dev/null
+++ b/final/test/Integer/constexpr_bt.ll
@@ -0,0 +1,84 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+; This testcase is for testing expressions constructed from
+; constant values, including constant pointers to globals.
+;
+
+;;-------------------------------
+;; Test constant cast expressions
+;;-------------------------------
+
+global i63 u0x00001     ; hexadecimal unsigned integer constants
+global i63  s0x012312   ; hexadecimal signed integer constants
+
+@t2 = global i33* @t1                             ;; Forward reference without cast
+@t3 = global i33* bitcast (i33* @t1 to i33*)       ;; Forward reference with cast
+@t1 = global i33 4                                ;; i32* @0
+@t4 = global i33** bitcast (i33** @t3 to i33**)     ;; Cast of a previous cast
+@t5 = global i33** @t3                           ;; Reference to a previous cast
+@t6 = global i33*** @t4
+@t7 = global float* inttoptr (i32 12345678 to float*) ;; Cast ordinary value to ptr
+@t9 = global i33 fptosi (float sitofp (i33 8 to float) to i33) ;; Nested cast expression
+
+
+global i32* bitcast (float* @4 to i32*)   ;; Forward numeric reference
+global float* @4                       ;; Duplicate forward numeric reference
+global float 0.0
+
+
+;;---------------------------------------------------
+;; Test constant getelementpr expressions for arrays
+;;---------------------------------------------------
+
+@array  = constant [2 x i33] [ i33 12, i33 52 ]
+@arrayPtr = global i33* getelementptr ([2 x i33]* @array, i64 0, i64 0)    ;; i33* &@array[0][0]
+@arrayPtr5 = global i33** getelementptr (i33** @arrayPtr, i64 5)    ;; i33* &@arrayPtr[5]
+
+@somestr = constant [11x i8] c"hello world"
+@char5  = global i8* getelementptr([11x i8]* @somestr, i64 0, i64 5)
+
+;; cast of getelementptr
+@char8a = global i33* bitcast (i8* getelementptr([11x i8]* @somestr, i64 0, i64 8) to i33*)
+
+;; getelementptr containing casts
+@char8b = global i8* getelementptr([11x i8]* @somestr, i64 sext (i8 0 to i64), i64 sext (i8 8 to i64))
+
+;;-------------------------------------------------------
+;; TODO: Test constant getelementpr expressions for structures
+;;-------------------------------------------------------
+
+%SType  = type { i33 , {float, {i8} }, i64 } ;; struct containing struct
+%SAType = type { i33 , {[2x float], i64} } ;; struct containing array
+
+@S1 = global %SType* null			;; Global initialized to NULL
+@S2c = constant %SType { i33 1, {float,{i8}} {float 2.0, {i8} {i8 3}}, i64 4}
+
+@S3c = constant %SAType { i33 1, {[2x float], i64} {[2x float] [float 2.0, float 3.0], i64 4} }
+
+@S1ptr = global %SType** @S1		    ;; Ref. to global S1
+@S2  = global %SType* @S2c		    ;; Ref. to constant S2
+@S3  = global %SAType* @S3c		    ;; Ref. to constant S3
+
+					    ;; Pointer to float (**@S1).1.0
+@S1fld1a = global float* getelementptr (%SType* @S2c, i64 0, i32 1, i32 0)
+					    ;; Another ptr to the same!
+@S1fld1b = global float* getelementptr (%SType* @S2c, i64 0, i32 1, i32 0)
+
+@S1fld1bptr = global float** @S1fld1b	    ;; Ref. to previous pointer
+
+					    ;; Pointer to i8 (**@S2).1.1.0
+@S2fld3 = global i8* getelementptr (%SType* @S2c, i64 0, i32 1, i32 1, i32 0) 
+
+					    ;; Pointer to float (**@S2).1.0[0]
+;@S3fld3 = global float* getelementptr (%SAType** @S3, i64 0, i64 0, i32 1, i32 0, i64 0) 
+
+;;---------------------------------------------------------
+;; TODO: Test constant expressions for unary and binary operators
+;;---------------------------------------------------------
+
+;;---------------------------------------------------
+
+
diff --git a/final/test/Integer/constpointer_bt.ll b/final/test/Integer/constpointer_bt.ll
new file mode 100644
index 00000000000..6be9ec336a3
--- /dev/null
+++ b/final/test/Integer/constpointer_bt.ll
@@ -0,0 +1,32 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+; This testcase is primarily used for testing that global values can be used as 
+; constant pointer initializers.  This is tricky because they can be forward
+; declared and involves an icky bytecode encoding.  There is no meaningful 
+; optimization that can be performed on this file, it is just here to test 
+; assembly and disassembly.
+;
+
+
+@t3 = global i40 * @t1           ;; Forward reference
+@t1 = global i40 4
+@t4 = global i40 ** @t3		 ;; reference to reference
+
+@t2 = global i40 * @t1
+
+global float * @2                ;; Forward numeric reference
+global float * @2                ;; Duplicate forward numeric reference
+global float 0.0
+global float * @2                ;; Numeric reference
+
+
+@fptr = global void() * @f       ;; Forward ref method defn
+declare void @"f"()               ;; External method
+
+@sptr1   = global [11x i8]* @somestr		;; Forward ref to a constant
+@somestr = constant [11x i8] c"hello world"
+@sptr2   = global [11x i8]* @somestr
+
+
diff --git a/final/test/Integer/dg.exp b/final/test/Integer/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Integer/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Integer/fold-fpcast_bt.ll b/final/test/Integer/fold-fpcast_bt.ll
new file mode 100644
index 00000000000..8e5f8386d77
--- /dev/null
+++ b/final/test/Integer/fold-fpcast_bt.ll
@@ -0,0 +1,33 @@
+; RUN: llvm-as < %s | llvm-dis | not grep bitcast
+
+define i60 @test1() {
+   ret i60 fptoui(float 0x400D9999A0000000 to i60)
+}
+
+define float @test2() {
+  ret float uitofp(i60 17 to float)
+}
+
+define i64 @test3() {
+  ret i64 bitcast (double 0x400921FB4D12D84A to i64)
+}
+
+define double @test4() {
+  ret double bitcast (i64 42 to double)
+}
+
+define i30 @test5() {
+  ret i30 fptoui(float 0x400D9999A0000000 to i30)
+}
+
+define float @test6() {
+  ret float uitofp(i30 17 to float)
+}
+
+define i64 @test7() {
+  ret i64 bitcast (double 0x400921FB4D12D84A to i64)
+}
+
+define double @test8() {
+  ret double bitcast (i64 42 to double)
+}
diff --git a/final/test/Integer/forwardreftest_bt.ll b/final/test/Integer/forwardreftest_bt.ll
new file mode 100644
index 00000000000..5d73eff2f5a
--- /dev/null
+++ b/final/test/Integer/forwardreftest_bt.ll
@@ -0,0 +1,33 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+  %myty = type i55 
+  %myfn = type float (i55,double,i55,i16)
+  type i55(%myfn*)
+  type i55(i55)
+  type i55(i55(i55)*)
+
+  %thisfuncty = type i55 (i55) *
+
+declare void @F(%thisfuncty, %thisfuncty, %thisfuncty)
+
+; This function always returns zero
+define i55 @zarro(i55 %Func)
+begin
+Startup:
+    add i55 0, 10
+    ret i55 0 
+end
+
+define i55 @test(i55) 
+begin
+    call void @F(%thisfuncty @zarro, %thisfuncty @test, %thisfuncty @foozball)
+    ret i55 0
+end
+
+define i55 @foozball(i55)
+begin
+    ret i55 0
+end
+
diff --git a/final/test/Integer/globalredefinition_bt.ll b/final/test/Integer/globalredefinition_bt.ll
new file mode 100644
index 00000000000..b369b2a5d80
--- /dev/null
+++ b/final/test/Integer/globalredefinition_bt.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+; Test forward references and redefinitions of globals
+
+@A = global i17* @B
+@B = global i17 7
+
+declare void @X()
+
+declare void @X()
+
+define void @X() {
+  ret void
+}
+
+declare void @X()
diff --git a/final/test/Integer/globalvars_bt.ll b/final/test/Integer/globalvars_bt.ll
new file mode 100644
index 00000000000..5c43185574d
--- /dev/null
+++ b/final/test/Integer/globalvars_bt.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+
+@MyVar     = external global i27
+@MyIntList = external global { \2 *, i27 }
+             external global i27      ; i27*:0
+
+@AConst    = constant i27 123
+
+@AString   = constant [4 x i8] c"test"
+
+@ZeroInit  = global { [100 x i27 ], [40 x float ] } { [100 x i27] zeroinitializer,
+                                                      [40  x float] zeroinitializer }
+
+
+define i27 @"foo"(i27 %blah)
+begin
+	store i27 5, i27 *@MyVar
+        %idx = getelementptr { \2 *, i27 } * @MyIntList, i64 0, i32 1
+  	store i27 12, i27* %idx
+  	ret i27 %blah
+end
+
diff --git a/final/test/Integer/indirectcall2_bt.ll b/final/test/Integer/indirectcall2_bt.ll
new file mode 100644
index 00000000000..5b7c68df22c
--- /dev/null
+++ b/final/test/Integer/indirectcall2_bt.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+define i63 @"test"(i63 %X)
+begin
+	ret i63 %X
+end
+
+define i63 @"fib"(i63 %n)
+begin
+  %T = icmp ult i63 %n, 2       ; {i1}:0
+  br i1 %T, label %BaseCase, label %RecurseCase
+
+RecurseCase:
+  %result = call i63 @test(i63 %n)
+  br label %BaseCase
+
+BaseCase:
+  %X = phi i63 [1, %0], [2, %RecurseCase]
+  ret i63 %X
+end
+
diff --git a/final/test/Integer/indirectcall_bt.ll b/final/test/Integer/indirectcall_bt.ll
new file mode 100644
index 00000000000..d586fca821f
--- /dev/null
+++ b/final/test/Integer/indirectcall_bt.ll
@@ -0,0 +1,52 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+declare i32 @"atoi"(i8 *)
+
+define i63 @"fib"(i63 %n)
+begin
+  icmp ult i63 %n, 2       ; {i1}:1
+  br i1 %1, label %BaseCase, label %RecurseCase
+
+BaseCase:
+  ret i63 1
+
+RecurseCase:
+  %n2 = sub i63 %n, 2
+  %n1 = sub i63 %n, 1
+  %f2 = call i63(i63) * @fib(i63 %n2)
+  %f1 = call i63(i63) * @fib(i63 %n1)
+  %result = add i63 %f2, %f1
+  ret i63 %result
+end
+
+define i63 @"realmain"(i32 %argc, i8 ** %argv)
+begin
+  icmp eq i32 %argc, 2      ; {i1}:1
+  br i1 %1, label %HasArg, label %Continue
+HasArg:
+  ; %n1 = atoi(argv[1])
+  %n1 = add i32 1, 1
+  br label %Continue
+
+Continue:
+  %n = phi i32 [%n1, %HasArg], [1, %0]
+  %N = sext i32 %n to i63
+  %F = call i63(i63) *@fib(i63 %N)
+  ret i63 %F
+end
+
+define i63 @"trampoline"(i63 %n, i63(i63)* %fibfunc)
+begin
+  %F = call i63(i63) *%fibfunc(i63 %n)
+  ret i63 %F
+end
+
+define i32 @"main"()
+begin
+  %Result = call i63 @trampoline(i63 10, i63(i63) *@fib)
+  %Result2 = trunc i63 %Result to i32
+  ret i32 %Result2
+end
diff --git a/final/test/Integer/instructions_bt.ll b/final/test/Integer/instructions_bt.ll
new file mode 100644
index 00000000000..7ca58903527
--- /dev/null
+++ b/final/test/Integer/instructions_bt.ll
@@ -0,0 +1,26 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+define i39 @test_extractelement(<4 x i39> %V) {
+        %R = extractelement <4 x i39> %V, i32 1
+        ret i39 %R
+}
+
+define <4 x i39> @test_insertelement(<4 x i39> %V) {
+        %R = insertelement <4 x i39> %V, i39 0, i32 0
+        ret <4 x i39> %R
+}
+
+define <4 x i39> @test_shufflevector_u(<4 x i39> %V) {
+        %R = shufflevector <4 x i39> %V, <4 x i39> %V, 
+                  <4 x i32> < i32 1, i32 undef, i32 7, i32 2>
+        ret <4 x i39> %R
+}
+
+define <4 x float> @test_shufflevector_f(<4 x float> %V) {
+        %R = shufflevector <4 x float> %V, <4 x float> undef, 
+                  <4 x i32> < i32 1, i32 undef, i32 7, i32 2>
+        ret <4 x float> %R
+}
diff --git a/final/test/Integer/newcasts_bt.ll b/final/test/Integer/newcasts_bt.ll
new file mode 100644
index 00000000000..e2eee4f7f12
--- /dev/null
+++ b/final/test/Integer/newcasts_bt.ll
@@ -0,0 +1,28 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+define void @"NewCasts" (i17 %x) {
+  %a = zext i17 %x to i32
+  %b = sext i17 %x to i32
+  %c = trunc i17 %x to i8
+  %d = uitofp i17 %x to float
+  %e = sitofp i17 %x to double
+  %f = fptoui float %d to i17
+  %g = fptosi double %e to i17 
+  %i = fpext float %d to double
+  %j = fptrunc double %i to float
+  %k = bitcast i32 %a to float
+  %l = inttoptr i17 %x to i32*
+  %m = ptrtoint i32* %l to i64
+  ret void
+}
+
+
+define i17 @"ZExtConst" () {
+  ret i17 trunc ( i32 zext ( i17 42 to i32) to i17 )
+}
+
+define i17 @"SExtConst" () {
+  ret i17 trunc (i32 sext (i17 42 to i32) to i17 )
+}
diff --git a/final/test/Integer/opaquetypes_bt.ll b/final/test/Integer/opaquetypes_bt.ll
new file mode 100644
index 00000000000..5771342c97a
--- /dev/null
+++ b/final/test/Integer/opaquetypes_bt.ll
@@ -0,0 +1,58 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+; This test case is used to test opaque type processing, forward references,
+; and recursive types.  Oh my.
+; 
+
+%SQ1 = type { i31 }
+%SQ2 = type { %ITy }
+%ITy = type i31
+
+
+%CCC = type { \2* }
+%BBB = type { \2*, \2 * }
+%AAA = type { \2*, {\2*}, [12x{\2*}], {[1x{\2*}]} }
+
+; Test numbered types
+type %CCC
+type %BBB
+%Composite = type { %0, %1 }
+
+; Test simple opaque type resolution...
+%i31ty = type i31
+
+; Perform a simple forward reference...
+%ty1 = type { %ty2, i31 }
+%ty2 = type float
+
+; Do a recursive type...
+%list = type { %list * }
+%listp = type { %listp } *
+
+; Do two mutually recursive types...
+%TyA = type { %ty2, %TyB * }
+%TyB = type { double, %TyA * }
+
+; A complex recursive type...
+%Y = type { {%Y*}, %Y* }
+%Z = type { { %Z * }, [12x%Z] *, {{{ %Z * }}} }
+
+; More ridiculous test cases...
+%A = type [ 123x %A*]
+%M = type %M (%M, %M) *
+%P = type %P*
+
+; Recursive ptrs
+%u = type %v*
+%v = type %u*
+
+; Test the parser for unnamed recursive types...
+%P1 = type \1 *
+%Y1 = type { { \3 * }, \2 * }
+%Z1 = type { { \3 * }, [12x\3] *, { { { \5 * } } } }
+
+
+
+
diff --git a/final/test/Integer/packed_bt.ll b/final/test/Integer/packed_bt.ll
new file mode 100644
index 00000000000..f6ea87ce6ce
--- /dev/null
+++ b/final/test/Integer/packed_bt.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+@foo1 = external global <4 x float>
+@foo2 = external global <2 x i10>
+
+
+define void @main() 
+{
+        store <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, <4 x float>* @foo1
+        store <2 x i10> <i10 4, i10 4>, <2 x i10>* @foo2
+	%l1 = load <4 x float>* @foo1
+        %l2 = load <2 x i10>* @foo2
+        ret void
+}
diff --git a/final/test/Integer/packed_struct_bt.ll b/final/test/Integer/packed_struct_bt.ll
new file mode 100644
index 00000000000..a4d01e7d84b
--- /dev/null
+++ b/final/test/Integer/packed_struct_bt.ll
@@ -0,0 +1,33 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+; RUN: not grep cast %t2.ll
+; RUN: grep {\\}>} %t2.ll
+; END.
+
+%struct.anon = type <{ i8, i35, i35, i35 }>
+@foos = external global %struct.anon 
+@bara = external global [2 x <{ i35, i8 }>]
+
+;initializers should work for packed and non-packed the same way
+@E1 = global <{i8, i35, i35}> <{i8 1, i35 2, i35 3}>
+@E2 = global {i8, i35, i35} {i8 4, i35 5, i35 6}
+
+
+define i35 @main() 
+{
+        %tmp = load i35*  getelementptr (%struct.anon* @foos, i32 0, i32 1)            ; <i35> [#uses=1]
+        %tmp3 = load i35* getelementptr (%struct.anon* @foos, i32 0, i32 2)            ; <i35> [#uses=1]
+        %tmp6 = load i35* getelementptr (%struct.anon* @foos, i32 0, i32 3)            ; <i35> [#uses=1]
+        %tmp4 = add i35 %tmp3, %tmp             ; <i35> [#uses=1]
+        %tmp7 = add i35 %tmp4, %tmp6            ; <i35> [#uses=1]
+        ret i35 %tmp7
+}
+
+define i35 @bar() {
+entry:
+        %tmp = load i35* getelementptr([2 x <{ i35, i8 }>]* @bara, i32 0, i32 0, i32 0 )            ; <i35> [#uses=1]
+        %tmp4 = load i35* getelementptr ([2 x <{ i35, i8 }>]* @bara, i32 0, i32 1, i32 0)           ; <i35> [#uses=1]
+        %tmp5 = add i35 %tmp4, %tmp             ; <i35> [#uses=1]
+        ret i35 %tmp5
+}
diff --git a/final/test/Integer/paramattrs_bt.ll b/final/test/Integer/paramattrs_bt.ll
new file mode 100644
index 00000000000..47ef7539b3e
--- /dev/null
+++ b/final/test/Integer/paramattrs_bt.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+%ZFunTy = type i33(i8 zeroext)
+%SFunTy = type i33(i8 signext)
+
+declare i16 @"test"(i16 signext %arg) signext 
+declare i8  @"test2" (i16 zeroext %a2) zeroext 
+
+
+define i33 @main(i33 %argc, i8 **%argv) {
+    %val = trunc i33 %argc to i16
+    %res = call i16 (i16 signext) signext *@test(i16 signext %val) signext
+    %two = add i16 %res, %res
+    %res2 = call i8 @test2(i16 %two zeroext) zeroext 
+    %retVal = sext i16 %two to i33
+    ret i33 %retVal
+}
diff --git a/final/test/Integer/properties_bt.ll b/final/test/Integer/properties_bt.ll
new file mode 100644
index 00000000000..f24ddc2e80e
--- /dev/null
+++ b/final/test/Integer/properties_bt.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+target datalayout = "e-p:32:32"
+target triple = "proc-vend-sys"
+deplibs = [ "m", "c" ]
+
diff --git a/final/test/Integer/prototype_bt.ll b/final/test/Integer/prototype_bt.ll
new file mode 100644
index 00000000000..2236e8bf38f
--- /dev/null
+++ b/final/test/Integer/prototype_bt.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+declare i31 @"bar"(i31 %in) 
+
+define i31 @"foo"(i31 %blah)
+begin
+  %xx = call i31 @bar(i31 %blah)
+  ret i31 %xx
+end
+
diff --git a/final/test/Integer/recursivetype_bt.ll b/final/test/Integer/recursivetype_bt.ll
new file mode 100644
index 00000000000..d5ce3f5dd40
--- /dev/null
+++ b/final/test/Integer/recursivetype_bt.ll
@@ -0,0 +1,108 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+; This file contains the output from the following compiled C code:
+; typedef struct list {
+;   struct list *Next;
+;   i32 Data;
+; } list;
+;
+; // Iterative insert fn
+; void InsertIntoListTail(list **L, i32 Data) {
+;   while (*L)
+;     L = &(*L)->Next;
+;   *L = (list*)malloc(sizeof(list));
+;   (*L)->Data = Data;
+;   (*L)->Next = 0;
+; }
+;
+; // Recursive list search fn
+; list *FindData(list *L, i32 Data) {
+;   if (L == 0) return 0;
+;   if (L->Data == Data) return L;
+;   return FindData(L->Next, Data);
+; }
+;
+; void DoListStuff() {
+;   list *MyList = 0;
+;   InsertIntoListTail(&MyList, 100);
+;   InsertIntoListTail(&MyList, 12);
+;   InsertIntoListTail(&MyList, 42);
+;   InsertIntoListTail(&MyList, 1123);
+;   InsertIntoListTail(&MyList, 1213);
+;
+;   if (FindData(MyList, 75)) foundIt();
+;   if (FindData(MyList, 42)) foundIt();
+;   if (FindData(MyList, 700)) foundIt();
+; }
+
+%list = type { %list*, i36 }
+
+declare i8 *@"malloc"(i32)
+
+;;**********************
+;;**********************
+
+define void @"InsertIntoListTail"(%list** %L, i36 %Data)
+begin
+bb1:
+        %reg116 = load %list** %L                               ;;<%list*>
+        %cast1004 = inttoptr i64 0 to %list*                      ;;<%list*>
+        %cond1000 = icmp eq %list* %reg116, %cast1004             ;;<i1>
+        br i1 %cond1000, label %bb3, label %bb2
+
+bb2:
+        %reg117 = phi %list** [ %reg118, %bb2 ], [ %L, %bb1 ]   ;;<%list**>
+        %cast1010 = bitcast %list** %reg117 to %list***            ;;<%list***>
+        %reg118 = load %list*** %cast1010                       ;;<%list**>
+        %reg109 = load %list** %reg118                          ;;<%list*>
+        %cast1005 = inttoptr i64 0 to %list*                      ;;<%list*>
+        %cond1001 = icmp ne %list* %reg109, %cast1005             ;;<i1>
+        br i1 %cond1001, label %bb2, label %bb3
+
+bb3:
+        %reg119 = phi %list** [ %reg118, %bb2 ], [ %L, %bb1 ]   ;;<%list**>
+        %cast1006 = bitcast %list** %reg119 to i8**             ;;<i8**>
+        %reg111 = call i8* @malloc(i32 16)                  ;;<i8*>
+        store i8* %reg111, i8** %cast1006                 ;;<void>
+	%reg112 = ptrtoint i8* %reg111 to i64
+	%reg1002 = add i64 %reg112, 8
+        %reg1005 = inttoptr i64 %reg1002 to i8*             ;;<i8*>
+        %cast1008 = bitcast i8* %reg1005 to i36*                ;;<i36*>
+        store i36 %Data, i36* %cast1008                         ;;<void>
+        %cast1003 = inttoptr i64 0 to i64*                      ;;<i64*>
+        %cast1009 = bitcast i8* %reg111 to i64**              ;;<i64**>
+        store i64* %cast1003, i64** %cast1009               ;;<void>
+        ret void
+end
+
+define %list* @"FindData"(%list* %L, i36 %Data)
+begin
+bb1:
+        br label %bb2
+
+bb2:
+        %reg115 = phi %list* [ %reg116, %bb6 ], [ %L, %bb1 ]    ;;<%list*>
+        %cast1014 = inttoptr i64 0 to %list*                      ;;<%list*>
+        %cond1011 = icmp ne %list* %reg115, %cast1014             ;;<i1>
+        br i1 %cond1011, label %bb4, label %bb3
+
+bb3:
+        ret %list* null
+
+bb4:
+	%idx = getelementptr %list* %reg115, i64 0, i32 1                  ;;<i36>
+        %reg111 = load i36* %idx
+        %cond1013 = icmp ne i36 %reg111, %Data                    ;;<i1>
+        br i1 %cond1013, label %bb6, label %bb5
+
+bb5:
+        ret %list* %reg115
+
+bb6:
+	%idx2 = getelementptr %list* %reg115, i64 0, i32 0                  ;;<%list*>
+        %reg116 = load %list** %idx2
+        br label %bb2
+end
diff --git a/final/test/Integer/simplecalltest_bt.ll b/final/test/Integer/simplecalltest_bt.ll
new file mode 100644
index 00000000000..45dc0f1aaca
--- /dev/null
+++ b/final/test/Integer/simplecalltest_bt.ll
@@ -0,0 +1,28 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+%FunTy = type i31(i31)
+
+
+define void @"invoke"(%FunTy *%x)
+begin
+	%foo = call %FunTy* %x(i31 123)
+	ret void
+end
+
+define i31 @"main"(i31 %argc, i8 **%argv, i8 **%envp)
+begin
+        %retval = call i31 (i31) *@test(i31 %argc)
+        %two    = add i31 %retval, %retval
+	%retval2 = call i31 @test(i31 %argc)
+
+	%two2 = add i31 %two, %retval2
+	call void @invoke (%FunTy* @test)
+        ret i31 %two2
+end
+
+define i31 @"test"(i31 %i0)
+begin
+    ret i31 %i0
+end
diff --git a/final/test/Integer/small_bt.ll b/final/test/Integer/small_bt.ll
new file mode 100644
index 00000000000..00fcace0fbe
--- /dev/null
+++ b/final/test/Integer/small_bt.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+%x = type i19
+
+
+define i19 @"foo"(i19 %in) 
+begin
+label: 
+  ret i19 2
+end
+
diff --git a/final/test/Integer/testalloca_bt.ll b/final/test/Integer/testalloca_bt.ll
new file mode 100644
index 00000000000..e8e73c50878
--- /dev/null
+++ b/final/test/Integer/testalloca_bt.ll
@@ -0,0 +1,28 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+%inners = type {float, {i8 } }
+%struct = type { i33 , {float, {i8 } } , i64 }
+
+
+define i33 @testfunction(i33 %i0, i33 %j0)
+begin
+    alloca i8, i32 5
+    %ptr = alloca i33                       ; yields {i33*}:ptr
+    store i33 3, i33* %ptr                  ; yields {void}
+    %val = load i33* %ptr                   ; yields {i33}:val = i33 %3
+
+    %sptr = alloca %struct                  ; yields {%struct*}:sptr
+    %nsptr = getelementptr %struct * %sptr, i64 0, i32 1  ; yields {inners*}:nsptr
+    %ubsptr = getelementptr %inners * %nsptr, i64 0, i32 1  ; yields {{i8}*}:ubsptr
+    %idx = getelementptr {i8} * %ubsptr, i64 0, i32 0
+    store i8 4, i8* %idx
+    
+    %fptr = getelementptr %struct * %sptr, i64 0, i32 1, i32 0  ; yields {float*}:fptr
+    store float 4.0, float * %fptr
+    
+    ret i33 3
+end
+
diff --git a/final/test/Integer/testarith_bt.ll b/final/test/Integer/testarith_bt.ll
new file mode 100644
index 00000000000..0820399a309
--- /dev/null
+++ b/final/test/Integer/testarith_bt.ll
@@ -0,0 +1,21 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+define i31 @"simpleArith"(i31 %i0, i31 %j0)
+begin
+	%t1 = add i31 %i0, %j0
+	%t2 = sub i31 %i0, %j0
+	%t3 = mul i31 %t1, %t2
+        %t4 = udiv i31 %t1, %t2
+        %t5 = sdiv i31 %t1, %t2
+        %t6 = urem i31 %t1, %t2
+        %t7 = srem i31 %t1, %t2
+        %t8 = shl  i31 %t1, 9
+        %t9 = lshr i31 %t1, 9
+        %t10= ashr i31 %t1, 9
+        %f1 = sitofp i31 %t1 to float
+        %f2 = fdiv float 4.0, %f1
+	ret i31 %t3
+end
diff --git a/final/test/Integer/testconstants_bt.ll b/final/test/Integer/testconstants_bt.ll
new file mode 100644
index 00000000000..8ca49cf5242
--- /dev/null
+++ b/final/test/Integer/testconstants_bt.ll
@@ -0,0 +1,32 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+@somestr = constant [11x i8] c"hello world"
+@array   = constant [2 x i55] [ i55 12, i55 52 ]
+           constant { i55, i55 } { i55 4, i55 3 }
+
+ 
+define [2 x i55]* @testfunction(i55 %i0, i55 %j0)
+begin
+	ret [2x i55]* @array
+end
+
+define  i8* @otherfunc(i55, double)
+begin
+	%somestr = getelementptr [11x i8]* @somestr, i64 0, i64 0
+	ret i8* %somestr
+end
+
+define i8* @yetanotherfunc(i55, double)
+begin
+	ret i8* null            ; Test null
+end
+
+define i55 @negativeUnsigned() {
+        ret i55 -1
+}
+
+define i55 @largeSigned() {
+       ret i55 3900000000
+}
diff --git a/final/test/Integer/testicmp_bt.ll b/final/test/Integer/testicmp_bt.ll
new file mode 100644
index 00000000000..40a2465b5a4
--- /dev/null
+++ b/final/test/Integer/testicmp_bt.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+define i31 @"simpleIcmp"(i31 %i0, i31 %j0)
+begin
+	%t1 = icmp eq i31 %i0, %j0
+	%t2 = icmp ne i31 %i0, %j0
+	%t3 = icmp ult i31 %i0, %j0
+        %t4 = icmp sgt i31 %i0, %j0
+	%t5 = icmp ule i31 %i0, %j0
+        %t6 = icmp sge i31 %i0, %j0
+
+	%t7 = icmp eq i31 %i0, 1098765432
+        %t8 = icmp ne i31 %i0, -31415926
+
+        %t9 = icmp ult i31 10000, %j0
+        %t10 = icmp sgt i31 -10000, %j0
+
+
+	ret i31 %i0
+end
diff --git a/final/test/Integer/testlogical_bt.ll b/final/test/Integer/testlogical_bt.ll
new file mode 100644
index 00000000000..a2c927d5d7f
--- /dev/null
+++ b/final/test/Integer/testlogical_bt.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+define i31 @"simpleAdd"(i31 %i0, i31 %j0)
+begin
+	%t1 = xor i31 %i0, %j0
+	%t2 = or i31 %i0, %j0
+	%t3 = and i31 %t1, %t2
+	ret i31 %t3
+end
+
diff --git a/final/test/Integer/testlogical_new_bt.ll b/final/test/Integer/testlogical_new_bt.ll
new file mode 100644
index 00000000000..49a26dc07b0
--- /dev/null
+++ b/final/test/Integer/testlogical_new_bt.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+define i31 @"simpleAdd"(i31 %i0, i31 %j0)
+begin
+	%t1 = xor i31 %i0, %j0
+	%t2 = or i31 %i0, %j0
+	%t3 = and i31 %t1, %t2
+        %t4 = shl i31 %i0, 2
+        %t5 = ashr i31 %i0, 2
+        %t6 = lshr i31 %j0, 22
+	ret i31 %t3
+end
diff --git a/final/test/Integer/testmemory_bt.ll b/final/test/Integer/testmemory_bt.ll
new file mode 100644
index 00000000000..e503c56a33e
--- /dev/null
+++ b/final/test/Integer/testmemory_bt.ll
@@ -0,0 +1,45 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+%struct = type { i31 , {float, {i9 } } , i64 }
+%complexty = type {i31, {[4 x i9 *], float}, double}
+
+
+define i31 @"main"()
+begin
+  call i31 @testfunction(i64 0, i64 1)
+  ret i31 0
+end
+
+define i31 @"testfunction"(i64 %i0, i64 %j0)
+begin
+    %array0 = malloc [4 x i9]            ; yields {[4 x i9]*}:array0
+    %size   = add i32 2, 2                 ; yields {i31}:size = i31 %4
+    %array1 = malloc i9, i32 4          ; yields {i9*}:array1
+    %array2 = malloc i9, i32 %size      ; yields {i9*}:array2
+
+    %idx = getelementptr [4 x i9]* %array0, i64 0, i64 2
+    store i9 123, i9* %idx
+    free [4x i9]* %array0
+    free i9* %array1
+    free i9* %array2
+
+
+    %aa = alloca %complexty, i32 5
+    %idx2 = getelementptr %complexty* %aa, i64 %i0, i32 1, i32 0, i64 %j0
+    store i9 *null, i9** %idx2
+    
+    %ptr = alloca i31                       ; yields {i31*}:ptr
+    store i31 3, i31* %ptr                  ; yields {void}
+    %val = load i31* %ptr                   ; yields {i31}:val = i31 %3
+
+    %sptr = alloca %struct                  ; yields {%struct*}:sptr
+    %ubsptr = getelementptr %struct * %sptr, i64 0, i32 1, i32 1  ; yields {{i9}*}:ubsptr
+    %idx3 = getelementptr {i9} * %ubsptr, i64 0, i32 0
+    store i9 4, i9* %idx3
+
+    ret i31 3
+end
+
diff --git a/final/test/Integer/testswitch_bt.ll b/final/test/Integer/testswitch_bt.ll
new file mode 100644
index 00000000000..bf7cdc510d6
--- /dev/null
+++ b/final/test/Integer/testswitch_bt.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+  %i35 = type i35
+
+
+define i35 @"squared"(%i35 %i0)
+begin
+	switch i35 %i0, label %Default [ 
+		i35 1, label %Case1
+		i35 2, label %Case2
+		i35 4, label %Case4 ]
+
+Default:
+    ret i35 -1                      ; Unrecognized input value
+
+Case1:
+    ret i35 1
+Case2:
+    ret i35 4
+Case4:
+    ret i35 16
+end
diff --git a/final/test/Integer/testvarargs_bt.ll b/final/test/Integer/testvarargs_bt.ll
new file mode 100644
index 00000000000..3227d145392
--- /dev/null
+++ b/final/test/Integer/testvarargs_bt.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+declare i31 @"printf"(i8*, ...)   ;; Prototype for: i32 __builtin_printf(const char*, ...)
+
+define i31 @"testvarar"()
+begin
+	call i31(i8*, ...) *@printf(i8 * null, i31 12, i8 42)
+	ret i31 %1
+end
+
+
diff --git a/final/test/Integer/undefined_bt.ll b/final/test/Integer/undefined_bt.ll
new file mode 100644
index 00000000000..7eba59039b9
--- /dev/null
+++ b/final/test/Integer/undefined_bt.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+@X = global i31 undef
+
+
+declare i32 @"atoi"(i8 *)
+
+define i63 @test() {
+	ret i63 undef
+}
+
+define i31 @test2() {
+	%X = add i31 undef, 1
+	ret i31 %X
+}
diff --git a/final/test/Integer/unreachable_bt.ll b/final/test/Integer/unreachable_bt.ll
new file mode 100644
index 00000000000..cb65d4b9ec8
--- /dev/null
+++ b/final/test/Integer/unreachable_bt.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+
+
+declare void @bar()
+
+define i9 @foo() {  ;; Calling this function has undefined behavior
+	unreachable
+}
+
+define double @xyz() {
+	call void @bar()
+	unreachable          ;; Bar must not return.
+}
diff --git a/final/test/Integer/varargs_bt.ll b/final/test/Integer/varargs_bt.ll
new file mode 100644
index 00000000000..25ad58ad793
--- /dev/null
+++ b/final/test/Integer/varargs_bt.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+; Demonstrate all of the variable argument handling intrinsic functions plus 
+; the va_arg instruction.
+
+declare void @llvm.va_start(i8** %ap)
+declare void @llvm.va_copy(i8** %aq, i8** %ap)
+declare void @llvm.va_end(i8** %ap)
+
+define i33 @test(i33 %X, ...) {
+        %ap = alloca i8*
+	call void @llvm.va_start(i8** %ap)
+	%tmp = va_arg i8** %ap, i33 
+
+        %aq = alloca i8*
+	call void @llvm.va_copy(i8** %aq, i8** %ap)
+	call void @llvm.va_end(i8** %aq)
+	
+	call void @llvm.va_end(i8** %ap)
+	ret i33 %tmp
+}
diff --git a/final/test/Integer/varargs_new_bt.ll b/final/test/Integer/varargs_new_bt.ll
new file mode 100644
index 00000000000..59bb3f2969e
--- /dev/null
+++ b/final/test/Integer/varargs_new_bt.ll
@@ -0,0 +1,32 @@
+; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
+; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+
+; Demonstrate all of the variable argument handling intrinsic functions plus 
+; the va_arg instruction.
+
+declare void @llvm.va_start(i8**)
+declare void @llvm.va_copy(i8**, i8*)
+declare void @llvm.va_end(i8**)
+
+define i31 @test(i31 %X, ...) {
+        ; Allocate two va_list items.  On this target, va_list is of type i8*
+        %ap = alloca i8*             ; <i8**> [#uses=4]
+        %aq = alloca i8*             ; <i8**> [#uses=2]
+
+        ; Initialize variable argument processing
+        call void @llvm.va_start(i8** %ap)
+
+        ; Read a single integer argument
+        %tmp = va_arg i8** %ap, i31           ; <i31> [#uses=1]
+
+        ; Demonstrate usage of llvm.va_copy and llvm_va_end
+        %apv = load i8** %ap         ; <i8*> [#uses=1]
+        call void @llvm.va_copy(i8** %aq, i8* %apv)
+        call void @llvm.va_end(i8** %aq)
+
+        ; Stop processing of arguments.
+        call void @llvm.va_end(i8** %ap)
+        ret i31 %tmp
+
+}
diff --git a/final/test/LLVMC/Alias.td b/final/test/LLVMC/Alias.td
new file mode 100644
index 00000000000..5d37889304b
--- /dev/null
+++ b/final/test/LLVMC/Alias.td
@@ -0,0 +1,24 @@
+// Test alias generation.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def OptList : OptionList<[
+
+(switch_option "dummy1", (help "none")),
+// CHECK: cl::alias Alias_dummy2
+(alias_option "dummy2", "dummy1")
+]>;
+
+def dummy_tool : Tool<[
+(command "dummy_cmd"),
+(in_language "dummy_lang"),
+(out_language "dummy_lang"),
+(actions (case
+         (switch_on "dummy1"), (forward "dummy1")))
+]>;
+
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/final/test/LLVMC/AppendCmdHook.td b/final/test/LLVMC/AppendCmdHook.td
new file mode 100644
index 00000000000..c85f002e6e8
--- /dev/null
+++ b/final/test/LLVMC/AppendCmdHook.td
@@ -0,0 +1,29 @@
+// Check that hooks can be invoked from 'append_cmd'.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+// CHECK: std::string MyHook()
+
+def OptList : OptionList<[
+(switch_option "dummy1", (help "none")),
+(switch_option "dummy2", (help "none"))
+]>;
+
+def dummy_tool : Tool<[
+(command "dummy_cmd"),
+(in_language "dummy_lang"),
+(out_language "dummy_lang"),
+(actions (case
+         // CHECK: , "-arg1"));
+         // CHECK: , "-arg2"));
+         (switch_on "dummy1"), (append_cmd "-arg1 -arg2"),
+         // CHECK: , "-arg3"));
+         // CHECK: hooks::MyHook()
+         (switch_on "dummy2"), (append_cmd "-arg3 $CALL(MyHook)")))
+]>;
+
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/final/test/LLVMC/C++/dash-x.cpp b/final/test/LLVMC/C++/dash-x.cpp
new file mode 100644
index 00000000000..7d4cf19fa41
--- /dev/null
+++ b/final/test/LLVMC/C++/dash-x.cpp
@@ -0,0 +1,10 @@
+// Test that we can compile .c files as C++ and vice versa
+// RUN: llvmc %s -x c++ %p/../test_data/false.c -x c %p/../test_data/false.cpp -x lisp -x whatnot -x none %p/../test_data/false2.cpp -o %t
+// RUN: %abs_tmp | grep hello
+// XFAIL: vg
+
+extern int test_main();
+
+int main() {
+  test_main();
+}
diff --git a/final/test/LLVMC/C++/dg.exp b/final/test/LLVMC/C++/dg.exp
new file mode 100644
index 00000000000..209345540c1
--- /dev/null
+++ b/final/test/LLVMC/C++/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if [ llvm_gcc_supports c++ ] then {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{unk,ll,c,cpp}]]
+}
diff --git a/final/test/LLVMC/C++/filelist.cpp b/final/test/LLVMC/C++/filelist.cpp
new file mode 100644
index 00000000000..6f5f6f7ae85
--- /dev/null
+++ b/final/test/LLVMC/C++/filelist.cpp
@@ -0,0 +1,3 @@
+// Test that the -filelist option works correctly with -linker=c++.
+// RUN: llvmc --dry-run -filelist DUMMY -linker c++ |& grep llvm-g++
+// XFAIL: vg
diff --git a/final/test/LLVMC/C++/hello.cpp b/final/test/LLVMC/C++/hello.cpp
new file mode 100644
index 00000000000..8f38306e9e9
--- /dev/null
+++ b/final/test/LLVMC/C++/hello.cpp
@@ -0,0 +1,9 @@
+// Test that we can compile C++ code.
+// RUN: llvmc %s -o %t
+// RUN: %abs_tmp | grep hello
+// XFAIL: vg
+#include <iostream>
+
+int main() {
+    std::cout << "hello" << '\n';
+}
diff --git a/final/test/LLVMC/C++/just-compile.cpp b/final/test/LLVMC/C++/just-compile.cpp
new file mode 100644
index 00000000000..771c9822da6
--- /dev/null
+++ b/final/test/LLVMC/C++/just-compile.cpp
@@ -0,0 +1,10 @@
+// Test that the -c flag works.
+// RUN: llvmc -c %s -o %t.o
+// RUN: llvmc --linker=c++ %t.o -o %t
+// RUN: %abs_tmp | grep hello
+// XFAIL: vg
+#include <iostream>
+
+int main() {
+    std::cout << "hello" << '\n';
+}
diff --git a/final/test/LLVMC/C++/together.cpp b/final/test/LLVMC/C++/together.cpp
new file mode 100644
index 00000000000..925215a4db5
--- /dev/null
+++ b/final/test/LLVMC/C++/together.cpp
@@ -0,0 +1,10 @@
+// Check that we can compile files of different types together.
+// RUN: llvmc %s %p/../test_data/together.c -o %t
+// RUN: %abs_tmp | grep hello
+// XFAIL: vg
+
+extern "C" void test();
+
+int main() {
+  test();
+}
diff --git a/final/test/LLVMC/C++/unknown_suffix.unk b/final/test/LLVMC/C++/unknown_suffix.unk
new file mode 100644
index 00000000000..bf4aea28624
--- /dev/null
+++ b/final/test/LLVMC/C++/unknown_suffix.unk
@@ -0,0 +1,9 @@
+// Test that the -x option works for files with unknown suffixes.
+// RUN: llvmc -x c++ %s -o %t
+// RUN: %abs_tmp | grep hello
+// XFAIL: vg
+#include <iostream>
+
+int main() {
+    std::cout << "hello" << '\n';
+}
diff --git a/final/test/LLVMC/C/dg.exp b/final/test/LLVMC/C/dg.exp
new file mode 100644
index 00000000000..a9be28a63cf
--- /dev/null
+++ b/final/test/LLVMC/C/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if [ llvm_gcc_supports c ] then {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/final/test/LLVMC/C/emit-llvm-opt.c b/final/test/LLVMC/C/emit-llvm-opt.c
new file mode 100644
index 00000000000..50710cf9dba
--- /dev/null
+++ b/final/test/LLVMC/C/emit-llvm-opt.c
@@ -0,0 +1,9 @@
+// Check that -emit-llvm [-S] works with -opt.
+
+// RUN: llvmc -c -opt -emit-llvm -o - %s | llvm-dis | grep "@f0()" | count 1
+// RUN: llvmc -c -opt -emit-llvm -S -o - %s | grep "@f0()" | count 1
+// RUN: llvmc --dry-run -c -opt -emit-llvm %s |& grep "^opt"
+// XFAIL: vg_leak
+
+int f0(void) {
+}
diff --git a/final/test/LLVMC/C/emit-llvm.c b/final/test/LLVMC/C/emit-llvm.c
new file mode 100644
index 00000000000..56a1e30b2e4
--- /dev/null
+++ b/final/test/LLVMC/C/emit-llvm.c
@@ -0,0 +1,8 @@
+// Check that -emit-llvm [-S] works correctly.
+
+// RUN: llvmc -c -emit-llvm -o - %s | llvm-dis | grep "@f0()" | count 1
+// RUN: llvmc -c -emit-llvm -S -o - %s | grep "@f0()" | count 1
+// XFAIL: vg_leak
+
+int f0(void) {
+}
diff --git a/final/test/LLVMC/C/hello.c b/final/test/LLVMC/C/hello.c
new file mode 100644
index 00000000000..29ad39fd2cb
--- /dev/null
+++ b/final/test/LLVMC/C/hello.c
@@ -0,0 +1,13 @@
+/*
+ * Check that we can compile helloworld
+ * RUN: llvmc %s -o %t
+ * RUN: %abs_tmp | grep hello
+ * XFAIL: vg_leak
+ */
+
+#include <stdio.h>
+
+int main() {
+    printf("hello\n");
+    return 0;
+}
diff --git a/final/test/LLVMC/C/include.c b/final/test/LLVMC/C/include.c
new file mode 100644
index 00000000000..9c9530bfb49
--- /dev/null
+++ b/final/test/LLVMC/C/include.c
@@ -0,0 +1,10 @@
+/*
+ * Check that the 'include' options work.
+ * RUN: echo "int x;\n" > %t1.inc
+ * RUN: llvmc -include %t1.inc -fsyntax-only %s
+ * XFAIL: vg_leak
+ */
+
+int f0(void) {
+  return x;
+}
diff --git a/final/test/LLVMC/C/opt-test.c b/final/test/LLVMC/C/opt-test.c
new file mode 100644
index 00000000000..7924def203a
--- /dev/null
+++ b/final/test/LLVMC/C/opt-test.c
@@ -0,0 +1,13 @@
+/*
+ * Check that the -opt switch works.
+ * RUN: llvmc %s -opt -o %t
+ * RUN: %abs_tmp | grep hello
+ * XFAIL: vg_leak
+ */
+
+#include <stdio.h>
+
+int main() {
+    printf("hello\n");
+    return 0;
+}
diff --git a/final/test/LLVMC/C/sink.c b/final/test/LLVMC/C/sink.c
new file mode 100644
index 00000000000..c4f9beba8c3
--- /dev/null
+++ b/final/test/LLVMC/C/sink.c
@@ -0,0 +1,13 @@
+/*
+ * Check that the 'sink' options work.
+ * RUN: llvmc -v -Wall %s -o %t |& grep "Wall"
+ * RUN: %abs_tmp | grep hello
+ * XFAIL: vg_leak
+ */
+
+#include <stdio.h>
+
+int main() {
+    printf("hello\n");
+    return 0;
+}
diff --git a/final/test/LLVMC/C/wall.c b/final/test/LLVMC/C/wall.c
new file mode 100644
index 00000000000..36813ba0f83
--- /dev/null
+++ b/final/test/LLVMC/C/wall.c
@@ -0,0 +1,13 @@
+/*
+ * Check that -Wall works as intended
+ * RUN: llvmc -Wall %s -o %t
+ * RUN: %abs_tmp | grep hello
+ * XFAIL: vg_leak
+ */
+
+#include <stdio.h>
+
+int main() {
+    printf("hello\n");
+    return 0;
+}
diff --git a/final/test/LLVMC/EmptyCompilationGraph.td b/final/test/LLVMC/EmptyCompilationGraph.td
new file mode 100644
index 00000000000..a52b8a8c199
--- /dev/null
+++ b/final/test/LLVMC/EmptyCompilationGraph.td
@@ -0,0 +1,8 @@
+// Check that the compilation graph can be empty.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def Graph : CompilationGraph<[]>;
diff --git a/final/test/LLVMC/EnvParentheses.td b/final/test/LLVMC/EnvParentheses.td
new file mode 100644
index 00000000000..ce0cb824604
--- /dev/null
+++ b/final/test/LLVMC/EnvParentheses.td
@@ -0,0 +1,18 @@
+// Check the fix for PR4157.
+// http://llvm.org/bugs/show_bug.cgi?id=4157
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: not grep {FOO")));} %t
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def dummy_tool : Tool<[
+(command "gcc $ENV(FOO)/bar"),
+(in_language "dummy"),
+(out_language "dummy")
+]>;
+
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
+
+def Graph : CompilationGraph<[]>;
diff --git a/final/test/LLVMC/ForwardAs.td b/final/test/LLVMC/ForwardAs.td
new file mode 100644
index 00000000000..99b240e30fb
--- /dev/null
+++ b/final/test/LLVMC/ForwardAs.td
@@ -0,0 +1,21 @@
+// Check the fix for PR4159.
+// http://llvm.org/bugs/show_bug.cgi?id=4159
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def OptList : OptionList<[(parameter_option "dummy", (help "dummmy"))]>;
+
+def dummy_tool : Tool<[
+(command "dummy_cmd"),
+(in_language "dummy"),
+(out_language "dummy"),
+(actions (case
+         // CHECK: "unique_name"));
+         (not_empty "dummy"), (forward_as "dummy", "unique_name")))
+]>;
+
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/final/test/LLVMC/ForwardTransformedValue.td b/final/test/LLVMC/ForwardTransformedValue.td
new file mode 100644
index 00000000000..9184ede3610
--- /dev/null
+++ b/final/test/LLVMC/ForwardTransformedValue.td
@@ -0,0 +1,27 @@
+// Check that forward_transformed_value works.
+// The dummy tool and graph are required to silence warnings.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def OptList : OptionList<[(parameter_option "a", (help "dummy")),
+                          (prefix_list_option "b", (help "dummy"))]>;
+
+// CHECK: std::string HookA
+// CHECK: std::string HookB
+
+def dummy_tool : Tool<[
+(command "dummy_cmd"),
+(in_language "dummy"),
+(out_language "dummy"),
+(actions (case
+         // CHECK: HookA(autogenerated::Parameter_a
+         (not_empty "a"), (forward_transformed_value "a", "HookA"),
+         // CHECK: HookB(autogenerated::List_b
+         (not_empty "b"), (forward_transformed_value "b", "HookB")))
+]>;
+
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/final/test/LLVMC/ForwardValue.td b/final/test/LLVMC/ForwardValue.td
new file mode 100644
index 00000000000..a42a3f06ec3
--- /dev/null
+++ b/final/test/LLVMC/ForwardValue.td
@@ -0,0 +1,24 @@
+// Check that forward_value works.
+// The dummy tool and graph are required to silence warnings.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def OptList : OptionList<[(parameter_option "a", (help "dummy")),
+                          (prefix_list_option "b", (help "dummy"))]>;
+
+def dummy_tool : Tool<[
+(command "dummy_cmd"),
+(in_language "dummy"),
+(out_language "dummy"),
+(actions (case
+         // CHECK: , autogenerated::Parameter_a));
+         (not_empty "a"), (forward_value "a"),
+         // CHECK: B = autogenerated::List_b.begin()
+         (not_empty "b"), (forward_value "b")))
+]>;
+
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/final/test/LLVMC/HookWithArguments.td b/final/test/LLVMC/HookWithArguments.td
new file mode 100644
index 00000000000..bbba2e98459
--- /dev/null
+++ b/final/test/LLVMC/HookWithArguments.td
@@ -0,0 +1,20 @@
+// Check that hooks with arguments work.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+// CHECK: Hook(const char* Arg0, const char* Arg1, const char* Arg2);
+// CHECK: "/path"
+// CHECK: std::getenv("VARIABLE")
+// CHECK: "/2path"
+
+def dummy_tool : Tool<[
+(command "$CALL(Hook, 'Arg1',   'Arg2', 'Arg3 Arg3Cont')/path arg1 $ENV(VARIABLE)/2path arg2"),
+(in_language "dummy"),
+(out_language "dummy")
+]>;
+
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/final/test/LLVMC/HookWithInFile.td b/final/test/LLVMC/HookWithInFile.td
new file mode 100644
index 00000000000..ed08b5321cc
--- /dev/null
+++ b/final/test/LLVMC/HookWithInFile.td
@@ -0,0 +1,16 @@
+// Check that a hook can be given $INFILE as an argument.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def dummy_tool : Tool<[
+// CHECK: Hook(inFile.c_str())
+(command "$CALL(Hook, '$INFILE')/path"),
+(in_language "dummy"),
+(out_language "dummy")
+]>;
+
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/final/test/LLVMC/Init.td b/final/test/LLVMC/Init.td
new file mode 100644
index 00000000000..c3846797026
--- /dev/null
+++ b/final/test/LLVMC/Init.td
@@ -0,0 +1,25 @@
+// Check that (init true/false) and (init "str") work.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def OptList : OptionList<[
+// CHECK: cl::init(true)
+(switch_option "dummy1", (help "none"), (init true)),
+// CHECK: cl::init("some-string")
+(parameter_option "dummy2", (help "none"), (init "some-string"))
+]>;
+
+def dummy_tool : Tool<[
+(command "dummy_cmd"),
+(in_language "dummy_lang"),
+(out_language "dummy_lang"),
+(actions (case
+         (switch_on "dummy1"), (forward "dummy1"),
+         (not_empty "dummy2"), (forward "dummy2")))
+]>;
+
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/final/test/LLVMC/LanguageMap.td b/final/test/LLVMC/LanguageMap.td
new file mode 100644
index 00000000000..a0502142e6d
--- /dev/null
+++ b/final/test/LLVMC/LanguageMap.td
@@ -0,0 +1,29 @@
+// Check that LanguageMap is processed properly.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def OptList : OptionList<[
+(switch_option "dummy1", (help "none"))
+]>;
+
+def dummy_tool : Tool<[
+(command "dummy_cmd"),
+(in_language "dummy_lang"),
+(out_language "dummy_lang"),
+(actions (case
+         (switch_on "dummy1"), (forward "dummy1")))
+]>;
+
+def lang_map : LanguageMap<[
+    // CHECK: langMap["dummy"] = "dummy_lang"
+    // CHECK: langMap["DUM"] = "dummy_lang"
+    (lang_to_suffixes "dummy_lang", ["dummy", "DUM"]),
+    // CHECK: langMap["DUM2"] = "dummy_lang_2"
+    (lang_to_suffixes "dummy_lang_2", "DUM2")
+]>;
+
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/final/test/LLVMC/MultiValuedOption.td b/final/test/LLVMC/MultiValuedOption.td
new file mode 100644
index 00000000000..08c753380d4
--- /dev/null
+++ b/final/test/LLVMC/MultiValuedOption.td
@@ -0,0 +1,24 @@
+// Check that multivalued options work.
+// The dummy tool and graph are required to silence warnings.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def OptList : OptionList<[
+    // CHECK: cl::multi_val(2)
+    (prefix_list_option "foo", (multi_val 2)),
+    (parameter_list_option "baz", (multi_val 2))]>;
+
+def dummy_tool : Tool<[
+(command "dummy_cmd"),
+(in_language "dummy"),
+(out_language "dummy"),
+(actions (case
+         (not_empty "foo"), (forward_as "foo", "bar"),
+         (not_empty "baz"), (forward "baz")))
+]>;
+
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/final/test/LLVMC/MultipleCompilationGraphs.td b/final/test/LLVMC/MultipleCompilationGraphs.td
new file mode 100644
index 00000000000..b3746c03b6c
--- /dev/null
+++ b/final/test/LLVMC/MultipleCompilationGraphs.td
@@ -0,0 +1,10 @@
+// Check that multiple compilation graphs are allowed.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def Graph1 : CompilationGraph<[]>;
+def Graph2 : CompilationGraph<[]>;
+def Graph3 : CompilationGraph<[]>;
diff --git a/final/test/LLVMC/MultipleOutputLanguages.td b/final/test/LLVMC/MultipleOutputLanguages.td
new file mode 100644
index 00000000000..ae0c92eefcf
--- /dev/null
+++ b/final/test/LLVMC/MultipleOutputLanguages.td
@@ -0,0 +1,27 @@
+// Check that multiple output languages work.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def dummy_tool : Tool<[
+    (command "dummy_cmd"),
+    (in_language "dummy_lang"),
+    (out_language "another_dummy_lang", "yet_another_dummy_lang")
+]>;
+
+def another_dummy_tool : Tool<[
+    (command "another_dummy_cmd"),
+    (in_language "another_dummy_lang", "some_other_dummy_lang"),
+    (out_language "executable"),
+    (join)
+]>;
+
+// CHECK: new SimpleEdge("dummy_tool")
+// CHECK: new SimpleEdge("another_dummy_tool")
+def DummyGraph : CompilationGraph<[
+    (edge "root", "dummy_tool"),
+    (edge "dummy_tool", "another_dummy_tool")
+]>;
diff --git a/final/test/LLVMC/NoActions.td b/final/test/LLVMC/NoActions.td
new file mode 100644
index 00000000000..34b44406635
--- /dev/null
+++ b/final/test/LLVMC/NoActions.td
@@ -0,0 +1,16 @@
+// Check that tools without associated actions are accepted.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+// CHECK: class dummy_tool : public Tool {
+def dummy_tool : Tool<[
+(command "dummy_cmd"),
+(in_language "dummy"),
+(out_language "dummy")
+]>;
+
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/final/test/LLVMC/NoCompilationGraph.td b/final/test/LLVMC/NoCompilationGraph.td
new file mode 100644
index 00000000000..4182882c451
--- /dev/null
+++ b/final/test/LLVMC/NoCompilationGraph.td
@@ -0,0 +1,6 @@
+// Check that the compilation graph is not required.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
diff --git a/final/test/LLVMC/ObjC++/dg.exp b/final/test/LLVMC/ObjC++/dg.exp
new file mode 100644
index 00000000000..41c3db2af09
--- /dev/null
+++ b/final/test/LLVMC/ObjC++/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if [ llvm_gcc_supports obj-c++ ] then {
+    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{mm}]]
+}
diff --git a/final/test/LLVMC/ObjC++/hello.mm b/final/test/LLVMC/ObjC++/hello.mm
new file mode 100644
index 00000000000..2125dc76b72
--- /dev/null
+++ b/final/test/LLVMC/ObjC++/hello.mm
@@ -0,0 +1,8 @@
+// Test that we can compile Objective-C++ code.
+// RUN: llvmc %s -o %t
+// RUN: %abs_tmp | grep hello
+#include <iostream>
+
+int main() {
+    std::cout << "hello" << '\n';
+}
diff --git a/final/test/LLVMC/ObjC/dg.exp b/final/test/LLVMC/ObjC/dg.exp
new file mode 100644
index 00000000000..18f73a79787
--- /dev/null
+++ b/final/test/LLVMC/ObjC/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if [ llvm_gcc_supports objc ] then {
+    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{m}]]
+}
diff --git a/final/test/LLVMC/ObjC/hello.m b/final/test/LLVMC/ObjC/hello.m
new file mode 100644
index 00000000000..b2d903f8d53
--- /dev/null
+++ b/final/test/LLVMC/ObjC/hello.m
@@ -0,0 +1,12 @@
+/*
+ * Check that we can compile helloworld
+ * RUN: llvmc %s -o %t
+ * RUN: %abs_tmp | grep hello
+ */
+
+#include <stdio.h>
+
+int main() {
+    printf("hello\n");
+    return 0;
+}
diff --git a/final/test/LLVMC/OneOrMore.td b/final/test/LLVMC/OneOrMore.td
new file mode 100644
index 00000000000..54fa62d1ff0
--- /dev/null
+++ b/final/test/LLVMC/OneOrMore.td
@@ -0,0 +1,25 @@
+// Check that (one_or_more) and (zero_or_one) properties work.
+// The dummy tool and graph are required to silence warnings.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def OptList : OptionList<[
+    // CHECK: cl::OneOrMore
+    (prefix_list_option "foo", (one_or_more)),
+    // CHECK: cl::Optional
+    (parameter_list_option "baz", (optional))]>;
+
+def dummy_tool : Tool<[
+(command "dummy_cmd"),
+(in_language "dummy"),
+(out_language "dummy"),
+(actions (case
+         (not_empty "foo"), (forward_as "foo", "bar"),
+         (not_empty "baz"), (forward "baz")))
+]>;
+
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/final/test/LLVMC/OptionPreprocessor.td b/final/test/LLVMC/OptionPreprocessor.td
new file mode 100644
index 00000000000..5fdc35a187e
--- /dev/null
+++ b/final/test/LLVMC/OptionPreprocessor.td
@@ -0,0 +1,67 @@
+// Test for the OptionPreprocessor and related functionality.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def OptList : OptionList<[
+(switch_option "foo", (help "dummy")),
+(switch_option "bar", (help "dummy")),
+(switch_option "baz", (help "dummy")),
+(parameter_option "foo_p", (help "dummy")),
+(parameter_option "bar_p", (help "dummy")),
+(parameter_option "baz_p", (help "dummy")),
+(parameter_list_option "foo_l", (help "dummy"))
+]>;
+
+def Preprocess : OptionPreprocessor<
+(case
+      // CHECK: W1
+      // CHECK: foo = false;
+      // CHECK: foo_p = "";
+      // CHECK: foo_l.clear();
+      (and (switch_on "foo"), (any_switch_on "bar", "baz")),
+           [(warning "W1"), (unset_option "foo"),
+                            (unset_option "foo_p"), (unset_option "foo_l")],
+      // CHECK: W2
+      // CHECK: foo = true;
+      // CHECK: bar = true;
+      // CHECK: baz = false;
+      // CHECK: foo_p = "asdf";
+      // CHECK: foo_l.clear();
+      // CHECK: foo_l.push_back("qwert");
+      // CHECK: foo_l.push_back("yuiop");
+      // CHECK: foo_l.push_back("asdf");
+      (and (switch_on "foo", "bar"), (any_empty "foo_p", "bar_p")),
+           [(warning "W2"), (set_option "foo"),
+                            (set_option "bar", true),
+                            (set_option "baz", false),
+                            (set_option "foo_p", "asdf"),
+                            (set_option "foo_l", ["qwert", "yuiop", "asdf"])],
+      // CHECK: W3
+      // CHECK: foo = true;
+      // CHECK: bar = true;
+      // CHECK: baz = true;
+      (and (empty "foo_p", "bar_p"), (any_not_empty "baz_p")),
+           [(warning "W3"), (set_option "foo", "bar", "baz")])
+>;
+
+// Shut up warnings...
+def dummy : Tool<
+[(in_language "dummy"),
+ (out_language "dummy"),
+ (output_suffix "d"),
+ (command "dummy"),
+ (actions (case (switch_on "foo"), (error),
+                (switch_on "bar"), (error),
+                (switch_on "baz"), (error),
+                (not_empty "foo_p"), (error),
+                (not_empty "bar_p"), (error),
+                (not_empty "baz_p"), (error),
+                (not_empty "foo_l"), (error)))
+]>;
+
+def Graph : CompilationGraph<[(edge "root", "dummy")]>;
+
diff --git a/final/test/LLVMC/OutputSuffixHook.td b/final/test/LLVMC/OutputSuffixHook.td
new file mode 100644
index 00000000000..1f5ecd1237f
--- /dev/null
+++ b/final/test/LLVMC/OutputSuffixHook.td
@@ -0,0 +1,24 @@
+// Check that hooks can be invoked from 'output_suffix'.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx %t
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+// CHECK: std::string MyHook()
+
+def OptList : OptionList<[
+(switch_option "dummy1", (help "none"))
+]>;
+
+def dummy_tool : Tool<[
+(command "dummy_cmd"),
+(in_language "dummy_lang"),
+(out_language "dummy_lang"),
+(actions (case
+         // CHECK: hooks::MyHook()
+         (switch_on "dummy1"), (output_suffix "$CALL(MyHook)")))
+]>;
+
+def DummyGraph : CompilationGraph<[(edge "root", "dummy_tool")]>;
diff --git a/final/test/LLVMC/TestWarnings.td b/final/test/LLVMC/TestWarnings.td
new file mode 100644
index 00000000000..b0f57e97e0d
--- /dev/null
+++ b/final/test/LLVMC/TestWarnings.td
@@ -0,0 +1,8 @@
+// Check that warnings about unused options are really emitted.
+// This should fail because the output is printed on stderr.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s |& grep "option '-Wall' has no effect!"
+// XFAIL: vg_leak
+
+include "llvm/CompilerDriver/Common.td"
+
+def OptList : OptionList<[(switch_option "Wall", (help "dummy"))]>;
diff --git a/final/test/LLVMC/dg.exp b/final/test/LLVMC/dg.exp
new file mode 100644
index 00000000000..f7d275ad8cb
--- /dev/null
+++ b/final/test/LLVMC/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{td}]]
diff --git a/final/test/LLVMC/test_data/false.c b/final/test/LLVMC/test_data/false.c
new file mode 100644
index 00000000000..3e4e8a7e928
--- /dev/null
+++ b/final/test/LLVMC/test_data/false.c
@@ -0,0 +1,10 @@
+#include <iostream>
+
+extern "C" void test();
+extern std::string test2();
+
+int test_main() {
+  std::cout << "h";
+  test();
+  std::cout << test2() << '\n';
+}
diff --git a/final/test/LLVMC/test_data/false.cpp b/final/test/LLVMC/test_data/false.cpp
new file mode 100644
index 00000000000..593fcd5805a
--- /dev/null
+++ b/final/test/LLVMC/test_data/false.cpp
@@ -0,0 +1,16 @@
+#include <stdio.h>
+
+/* Make this invalid C++ */
+typedef struct {
+    int i;
+    char c;
+} a;
+
+static a b = { .i = 65, .c = 'r'};
+
+void test() {
+    b.i = 9;
+    fflush(stdout);
+    printf("el");
+}
+
diff --git a/final/test/LLVMC/test_data/false2.cpp b/final/test/LLVMC/test_data/false2.cpp
new file mode 100644
index 00000000000..bba064c8e04
--- /dev/null
+++ b/final/test/LLVMC/test_data/false2.cpp
@@ -0,0 +1,5 @@
+#include <string>
+
+std::string test2() {
+    return "lo";
+}
diff --git a/final/test/LLVMC/test_data/together.c b/final/test/LLVMC/test_data/together.c
new file mode 100644
index 00000000000..a828c475779
--- /dev/null
+++ b/final/test/LLVMC/test_data/together.c
@@ -0,0 +1,5 @@
+#include <stdio.h>
+
+void test() {
+  printf("hello\n");
+}
diff --git a/final/test/Linker/2002-07-17-GlobalFail.ll b/final/test/Linker/2002-07-17-GlobalFail.ll
new file mode 100644
index 00000000000..d3283106f6e
--- /dev/null
+++ b/final/test/Linker/2002-07-17-GlobalFail.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s > %t.bc
+; RUN: echo | llvm-as > %t.tmp.bc
+; RUN: llvm-link %t.tmp.bc %t.bc
+
+@X = constant i32 5		; <i32*> [#uses=2]
+@Y = internal global [2 x i32*] [ i32* @X, i32* @X ]		; <[2 x i32*]*> [#uses=0]
+
+
diff --git a/final/test/Linker/2002-07-17-LinkTest2.ll b/final/test/Linker/2002-07-17-LinkTest2.ll
new file mode 100644
index 00000000000..fa986f15728
--- /dev/null
+++ b/final/test/Linker/2002-07-17-LinkTest2.ll
@@ -0,0 +1,10 @@
+; This fails linking when it is linked with an empty file as the first object file
+
+; RUN: llvm-as > %t1.bc < /dev/null
+; RUN: llvm-as < %s  > %t2.bc
+; RUN: llvm-link %t1.bc %t2.bc
+
+@work = global i32 (i32, i32)* @zip		; <i32 (i32, i32)**> [#uses=0]
+
+declare i32 @zip(i32, i32)
+
diff --git a/final/test/Linker/2002-08-20-ConstantExpr.ll b/final/test/Linker/2002-08-20-ConstantExpr.ll
new file mode 100644
index 00000000000..5672014ff66
--- /dev/null
+++ b/final/test/Linker/2002-08-20-ConstantExpr.ll
@@ -0,0 +1,9 @@
+; This fails linking when it is linked with an empty file as the first object file
+
+; RUN: llvm-as > %t.LinkTest.bc < /dev/null
+; RUN: llvm-as < %s > %t.bc
+; RUN: llvm-link %t.LinkTest.bc %t.bc
+
+@work = global i32 4		; <i32*> [#uses=1]
+@test = global i32* getelementptr (i32* @work, i64 1)		; <i32**> [#uses=0]
+
diff --git a/final/test/Linker/2003-01-30-LinkerRename.ll b/final/test/Linker/2003-01-30-LinkerRename.ll
new file mode 100644
index 00000000000..af0e6434fb1
--- /dev/null
+++ b/final/test/Linker/2003-01-30-LinkerRename.ll
@@ -0,0 +1,9 @@
+; This fails because the linker renames the external symbol not the internal 
+; one...
+
+; RUN: echo {define internal i32 @foo() \{ ret i32 7 \} } | llvm-as > %t.1.bc
+; RUN: llvm-as %s -o %t.2.bc
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep @foo() | grep -v internal
+
+define i32 @foo() { ret i32 0 }
+
diff --git a/final/test/Linker/2003-01-30-LinkerTypeRename.ll b/final/test/Linker/2003-01-30-LinkerTypeRename.ll
new file mode 100644
index 00000000000..67a0626ec03
--- /dev/null
+++ b/final/test/Linker/2003-01-30-LinkerTypeRename.ll
@@ -0,0 +1,9 @@
+; This fails because the linker renames the non-opaque type not the opaque 
+; one...
+
+; RUN: echo {%Ty = type opaque @GV = external global %Ty*} | llvm-as > %t.1.bc
+; RUN: llvm-as < %s > %t.2.bc
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {%Ty } | not grep opaque
+
+%Ty = type i32
+
diff --git a/final/test/Linker/2003-04-21-Linkage.ll b/final/test/Linker/2003-04-21-Linkage.ll
new file mode 100644
index 00000000000..f6d4c4b03b7
--- /dev/null
+++ b/final/test/Linker/2003-04-21-Linkage.ll
@@ -0,0 +1,14 @@
+; RUN: echo {@X = linkonce global i32 5 \
+; RUN:   define linkonce i32 @foo() \{ ret i32 7 \} } | llvm-as > %t.1.bc
+; RUN: llvm-as %s -o %t.2.bc
+; RUN: llvm-link %t.1.bc  %t.2.bc
+@X = external global i32 
+
+declare i32 @foo() 
+
+define void @bar() {
+	load i32* @X
+	call i32 @foo()
+	ret void
+}
+
diff --git a/final/test/Linker/2003-04-23-LinkOnceLost.ll b/final/test/Linker/2003-04-23-LinkOnceLost.ll
new file mode 100644
index 00000000000..beaf6ec5217
--- /dev/null
+++ b/final/test/Linker/2003-04-23-LinkOnceLost.ll
@@ -0,0 +1,10 @@
+; This fails because the linker renames the non-opaque type not the opaque 
+; one...
+
+; RUN: echo { define linkonce void @foo() \{ ret void \} } | \
+; RUN:   llvm-as -o %t.2.bc
+; RUN: llvm-as %s -o %t.1.bc
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep foo | grep linkonce
+
+declare void @foo()
+
diff --git a/final/test/Linker/2003-04-26-NullPtrLinkProblem.ll b/final/test/Linker/2003-04-26-NullPtrLinkProblem.ll
new file mode 100644
index 00000000000..54ba05153f4
--- /dev/null
+++ b/final/test/Linker/2003-04-26-NullPtrLinkProblem.ll
@@ -0,0 +1,17 @@
+; This one fails because the LLVM runtime is allowing two null pointers of
+; the same type to be created!
+
+; RUN: echo {%T = type i32} | llvm-as > %t.2.bc
+; RUN: llvm-as %s -o %t.1.bc
+; RUN: llvm-link %t.1.bc %t.2.bc
+
+%T = type opaque
+
+declare %T* @create()
+
+define void @test() {
+	%X = call %T* @create( )		; <%T*> [#uses=1]
+	%v = icmp eq %T* %X, null		; <i1> [#uses=0]
+	ret void
+}
+
diff --git a/final/test/Linker/2003-05-15-TypeProblem.ll b/final/test/Linker/2003-05-15-TypeProblem.ll
new file mode 100644
index 00000000000..18fcea00a13
--- /dev/null
+++ b/final/test/Linker/2003-05-15-TypeProblem.ll
@@ -0,0 +1,10 @@
+; This one fails because the LLVM runtime is allowing two null pointers of
+; the same type to be created!
+
+; RUN: echo {%M = type \{ %N*\} %N = type opaque} | llvm-as > %t.2.bc
+; RUN: llvm-as < %s > %t.1.bc
+; RUN: llvm-link %t.1.bc %t.2.bc
+
+%M = type { i32* }
+%N = type i32
+
diff --git a/final/test/Linker/2003-05-31-LinkerRename.ll b/final/test/Linker/2003-05-31-LinkerRename.ll
new file mode 100644
index 00000000000..498fc14b353
--- /dev/null
+++ b/final/test/Linker/2003-05-31-LinkerRename.ll
@@ -0,0 +1,17 @@
+; The funcresolve pass will (intentionally) llvm-link an _internal_ function 
+; body with an external declaration.  Because of this, if we LINK an internal 
+; function body into a program that already has an external declaration for 
+; the function name, we must rename the internal function to something that 
+; does not conflict.
+
+; RUN: echo { define internal i32 @foo() \{ ret i32 7 \} } | llvm-as > %t.1.bc
+; RUN: llvm-as < %s > %t.2.bc
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep internal | not grep @foo(
+
+declare i32 @foo() 
+
+define i32 @test() { 
+  %X = call i32 @foo()
+  ret i32 %X
+}
+
diff --git a/final/test/Linker/2003-06-02-TypeResolveProblem.ll b/final/test/Linker/2003-06-02-TypeResolveProblem.ll
new file mode 100644
index 00000000000..86979f60d17
--- /dev/null
+++ b/final/test/Linker/2003-06-02-TypeResolveProblem.ll
@@ -0,0 +1,7 @@
+; RUN: echo {%T = type opaque} | llvm-as > %t.2.bc
+; RUN: llvm-as < %s > %t.1.bc
+; RUN: llvm-link %t.1.bc %t.2.bc
+
+%T = type opaque
+@a = constant { %T* } zeroinitializer		; <{ %T* }*> [#uses=0]
+
diff --git a/final/test/Linker/2003-06-02-TypeResolveProblem2.ll b/final/test/Linker/2003-06-02-TypeResolveProblem2.ll
new file mode 100644
index 00000000000..42cc0403ae7
--- /dev/null
+++ b/final/test/Linker/2003-06-02-TypeResolveProblem2.ll
@@ -0,0 +1,7 @@
+; RUN: echo {%T = type i32} | llvm-as > %t.1.bc
+; RUN: llvm-as < %s > %t.2.bc
+; RUN: llvm-link %t.1.bc %t.2.bc
+
+%T = type opaque
+@X = constant { %T* } zeroinitializer		; <{ %T* }*> [#uses=0]
+
diff --git a/final/test/Linker/2003-08-20-OpaqueTypeResolve.ll b/final/test/Linker/2003-08-20-OpaqueTypeResolve.ll
new file mode 100644
index 00000000000..c0fc620cfa6
--- /dev/null
+++ b/final/test/Linker/2003-08-20-OpaqueTypeResolve.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s > %t.out1.bc
+; RUN: echo {%M = type \{ i32, i32* \} } | llvm-as > %t.out2.bc
+; RUN: llvm-link %t.out1.bc %t.out2.bc
+
+%M = type { i32, %N* }
+%N = type opaque
+
+;%X = global { int, %N* } { int 5, %N* null }
diff --git a/final/test/Linker/2003-08-23-GlobalVarLinking.ll b/final/test/Linker/2003-08-23-GlobalVarLinking.ll
new file mode 100644
index 00000000000..c3f61f89345
--- /dev/null
+++ b/final/test/Linker/2003-08-23-GlobalVarLinking.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s > %t.out1.bc
+; RUN: echo {@S = external global \{ i32, opaque* \} declare void @F(opaque*)}\
+; RUN:   | llvm-as > %t.out2.bc
+; RUN: llvm-link %t.out1.bc %t.out2.bc -S | not grep opaque
+
+; After linking this testcase, there should be no opaque types left.  The two
+; S's should cause the opaque type to be resolved to 'int'.
+@S = global { i32, i32* } { i32 5, i32* null }		; <{ i32, i32* }*> [#uses=0]
+
+declare void @F(i32*)
diff --git a/final/test/Linker/2003-08-23-RecursiveOpaqueTypeResolve.ll b/final/test/Linker/2003-08-23-RecursiveOpaqueTypeResolve.ll
new file mode 100644
index 00000000000..ea820753079
--- /dev/null
+++ b/final/test/Linker/2003-08-23-RecursiveOpaqueTypeResolve.ll
@@ -0,0 +1,9 @@
+; It's a bad idea to go recursively traipsing through types without a safety 
+; net.
+
+; RUN: llvm-as < %s > %t.out1.bc
+; RUN: echo "%M = type { %M*, i32* }" | llvm-as > %t.out2.bc
+; RUN: llvm-link %t.out1.bc %t.out2.bc
+
+%M = type { %M*, opaque* }
+
diff --git a/final/test/Linker/2003-08-24-InheritPtrSize.ll b/final/test/Linker/2003-08-24-InheritPtrSize.ll
new file mode 100644
index 00000000000..f93c054dec7
--- /dev/null
+++ b/final/test/Linker/2003-08-24-InheritPtrSize.ll
@@ -0,0 +1,9 @@
+; Linking a module with a specified pointer size to one without a 
+; specified pointer size should not cause a warning!
+
+; RUN: llvm-as < %s > %t.out1.bc
+; RUN: echo {} | llvm-as > %t.out2.bc
+; RUN: llvm-link %t.out1.bc %t.out2.bc |& not grep warning
+
+target datalayout = "e-p:64:64"
+
diff --git a/final/test/Linker/2003-08-28-TypeResolvesGlobal.ll b/final/test/Linker/2003-08-28-TypeResolvesGlobal.ll
new file mode 100644
index 00000000000..80b61626994
--- /dev/null
+++ b/final/test/Linker/2003-08-28-TypeResolvesGlobal.ll
@@ -0,0 +1,12 @@
+; RUN: llvm-as < %s > %t.out1.bc
+; RUN: echo "%M = type i32" | llvm-as > %t.out2.bc
+; RUN: llvm-link %t.out2.bc %t.out1.bc
+
+%M = type opaque
+
+define void @foo(i32* %V) {
+	ret void
+}
+
+declare void @foo.upgrd.1(%M*)
+
diff --git a/final/test/Linker/2003-08-28-TypeResolvesGlobal2.ll b/final/test/Linker/2003-08-28-TypeResolvesGlobal2.ll
new file mode 100644
index 00000000000..601b917210d
--- /dev/null
+++ b/final/test/Linker/2003-08-28-TypeResolvesGlobal2.ll
@@ -0,0 +1,18 @@
+; RUN: llvm-as < %s > %t.out1.bc
+; RUN: echo "%M = type i32" | llvm-as > %t.out2.bc
+; RUN: llvm-link %t.out2.bc %t.out1.bc
+
+%M = type opaque
+
+define void @foo(i32* %V) {
+	ret void
+}
+
+declare void @foo.upgrd.1(%M*)
+
+define void @other() {
+	call void @foo.upgrd.1( %M* null )
+	call void @foo( i32* null )
+	ret void
+}
+
diff --git a/final/test/Linker/2003-08-28-TypeResolvesGlobal3.ll b/final/test/Linker/2003-08-28-TypeResolvesGlobal3.ll
new file mode 100644
index 00000000000..f77d9e6d3b9
--- /dev/null
+++ b/final/test/Linker/2003-08-28-TypeResolvesGlobal3.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s > %t.out1.bc
+; RUN: echo "%M = type i32" | llvm-as > %t.out2.bc
+; RUN: llvm-link %t.out2.bc %t.out1.bc
+
+%M = type opaque
+
+; GLobal using the resolved function prototype
+global void (%M*)* @foo		; <void (%M*)**>:0 [#uses=0]
+
+define void @foo.upgrd.1(i32* %V) {
+	ret void
+}
+
+declare void @foo(%M*)
+
diff --git a/final/test/Linker/2003-10-21-ConflictingTypesTolerance.ll b/final/test/Linker/2003-10-21-ConflictingTypesTolerance.ll
new file mode 100644
index 00000000000..7cdf7ad0dad
--- /dev/null
+++ b/final/test/Linker/2003-10-21-ConflictingTypesTolerance.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s > %t.out1.bc
+; RUN: echo { %M = type \[8 x i32\] external global %M } | llvm-as > %t.out2.bc
+; RUN: llvm-link %t.out1.bc %t.out2.bc -S | grep %M | grep \\{
+%M = type { i32 }
+
+
diff --git a/final/test/Linker/2003-10-27-LinkOncePromote.ll b/final/test/Linker/2003-10-27-LinkOncePromote.ll
new file mode 100644
index 00000000000..f2d465ed80e
--- /dev/null
+++ b/final/test/Linker/2003-10-27-LinkOncePromote.ll
@@ -0,0 +1,8 @@
+; The linker should merge link-once globals into strong external globals,
+; just like it does for weak symbols!
+
+; RUN: echo "@X = global i32 7" | llvm-as > %t.2.bc
+; RUN: llvm-as < %s > %t.1.bc
+; RUN: llvm-link %t.1.bc %t.2.bc
+
+@X = linkonce global i32 7
diff --git a/final/test/Linker/2003-11-18-TypeResolution.ll b/final/test/Linker/2003-11-18-TypeResolution.ll
new file mode 100644
index 00000000000..d3152eda8e9
--- /dev/null
+++ b/final/test/Linker/2003-11-18-TypeResolution.ll
@@ -0,0 +1,20 @@
+; Linking these two translation units causes there to be two LLVM values in the
+; symbol table with the same name and same type.  When this occurs, the symbol
+; table class is DROPPING one of the values, instead of renaming it like a nice
+; little symbol table.  This is causing llvm-link to die, at no fault of its
+; own.
+
+; RUN: llvm-as < %s > %t.out2.bc
+; RUN: echo "%T1 = type opaque  @GVar = external global %T1*" | llvm-as > %t.out1.bc
+; RUN: llvm-link %t.out1.bc %t.out2.bc
+
+%T1 = type opaque
+%T2 = type i32
+@GVar = global i32* null		; <i32**> [#uses=0]
+
+define void @foo(i32* %X) {
+	%X.upgrd.1 = bitcast i32* %X to %T1*		; <%T1*> [#uses=0]
+	ret void
+}
+
+
diff --git a/final/test/Linker/2004-02-17-WeakStrongLinkage.ll b/final/test/Linker/2004-02-17-WeakStrongLinkage.ll
new file mode 100644
index 00000000000..224463949d3
--- /dev/null
+++ b/final/test/Linker/2004-02-17-WeakStrongLinkage.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s > %t.out2.bc
+; RUN: echo "@me = global i32* null" | llvm-as > %t.out1.bc
+; RUN: llvm-link %t.out1.bc %t.out2.bc -o /dev/null
+
+@me = weak global i32* null		; <i32**> [#uses=0]
+
+
diff --git a/final/test/Linker/2004-05-07-TypeResolution1.ll b/final/test/Linker/2004-05-07-TypeResolution1.ll
new file mode 100644
index 00000000000..f0ade337138
--- /dev/null
+++ b/final/test/Linker/2004-05-07-TypeResolution1.ll
@@ -0,0 +1,35 @@
+; RUN: llvm-as %s -o %t1.bc
+; RUN: llvm-as < %p/2004-05-07-TypeResolution2.ll -o %t2.bc
+; RUN: llvm-link -o %t3.bc %t1.bc %t2.bc
+
+target datalayout = "e-p:32:32"
+	%myint = type opaque
+	%struct1 = type { i32, void (%struct2*)*, %myint*, i32 (i32*)* }
+	%struct2 = type { %struct1 }
+@driver1 = global %struct1 zeroinitializer		; <%struct1*> [#uses=1]
+@m1 = external global [1 x i8]*		; <[1 x i8]**> [#uses=0]
+@str1 = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=0]
+@str2 = constant [2 x i8] zeroinitializer		; <[2 x i8]*> [#uses=0]
+@str3 = constant [3 x i8] zeroinitializer		; <[3 x i8]*> [#uses=0]
+@str4 = constant [4 x i8] zeroinitializer		; <[4 x i8]*> [#uses=0]
+@str5 = constant [5 x i8] zeroinitializer		; <[5 x i8]*> [#uses=0]
+@str6 = constant [6 x i8] zeroinitializer		; <[6 x i8]*> [#uses=0]
+@str7 = constant [7 x i8] zeroinitializer		; <[7 x i8]*> [#uses=0]
+@str8 = constant [8 x i8] zeroinitializer		; <[8 x i8]*> [#uses=0]
+@str9 = constant [9 x i8] zeroinitializer		; <[9 x i8]*> [#uses=0]
+@stra = constant [10 x i8] zeroinitializer		; <[10 x i8]*> [#uses=0]
+@strb = constant [11 x i8] zeroinitializer		; <[11 x i8]*> [#uses=0]
+@strc = constant [12 x i8] zeroinitializer		; <[12 x i8]*> [#uses=0]
+@strd = constant [13 x i8] zeroinitializer		; <[13 x i8]*> [#uses=0]
+@stre = constant [14 x i8] zeroinitializer		; <[14 x i8]*> [#uses=0]
+@strf = constant [15 x i8] zeroinitializer		; <[15 x i8]*> [#uses=0]
+@strg = constant [16 x i8] zeroinitializer		; <[16 x i8]*> [#uses=0]
+@strh = constant [17 x i8] zeroinitializer		; <[17 x i8]*> [#uses=0]
+
+declare void @func(%struct2*)
+
+define void @tty_init() {
+entry:
+	volatile store void (%struct2*)* @func, void (%struct2*)** getelementptr (%struct1* @driver1, i64 0, i32 1)
+	ret void
+}
diff --git a/final/test/Linker/2004-05-07-TypeResolution2.ll b/final/test/Linker/2004-05-07-TypeResolution2.ll
new file mode 100644
index 00000000000..74fe39f4d9f
--- /dev/null
+++ b/final/test/Linker/2004-05-07-TypeResolution2.ll
@@ -0,0 +1,15 @@
+; This file is used by testlink1.ll, so it doesn't actually do anything itself
+;
+; RUN: echo
+target datalayout = "e-p:32:32"
+	%myint = type i16
+	%struct1 = type { i32, void (%struct2*)*, i16*, i32 (i32*)* }
+	%struct2 = type { %struct1 }
+
+define internal void @f1(%struct1* %tty) {
+loopentry.preheader:
+	%tmp.2.i.i = getelementptr %struct1* %tty, i64 0, i32 1		; <void (%struct2*)**> [#uses=1]
+	%tmp.3.i.i = volatile load void (%struct2*)** %tmp.2.i.i		; <void (%struct2*)*> [#uses=0]
+	ret void
+}
+
diff --git a/final/test/Linker/2004-12-03-DisagreeingType.ll b/final/test/Linker/2004-12-03-DisagreeingType.ll
new file mode 100644
index 00000000000..570bda87e2c
--- /dev/null
+++ b/final/test/Linker/2004-12-03-DisagreeingType.ll
@@ -0,0 +1,9 @@
+; RUN: echo {@G = weak global \{\{\{\{double\}\}\}\} zeroinitializer } | \
+; RUN:   llvm-as > %t.out2.bc
+; RUN: llvm-as < %s > %t.out1.bc
+; RUN: llvm-link %t.out1.bc %t.out2.bc -S | not grep {\\}}
+
+; When linked, the global above should be eliminated, being merged with the 
+; global below.
+
+@G = global double 1.0
diff --git a/final/test/Linker/2005-02-12-ConstantGlobals-2.ll b/final/test/Linker/2005-02-12-ConstantGlobals-2.ll
new file mode 100644
index 00000000000..2ceae3146f2
--- /dev/null
+++ b/final/test/Linker/2005-02-12-ConstantGlobals-2.ll
@@ -0,0 +1,8 @@
+; Test that a prototype can be marked const, and the definition is allowed
+; to be nonconst.
+
+; RUN: echo {@X = external constant i32} | llvm-as > %t.2.bc
+; RUN: llvm-as < %s > %t.1.bc
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {global i32 7}
+
+@X = global i32 7
diff --git a/final/test/Linker/2005-02-12-ConstantGlobals.ll b/final/test/Linker/2005-02-12-ConstantGlobals.ll
new file mode 100644
index 00000000000..60f176b0534
--- /dev/null
+++ b/final/test/Linker/2005-02-12-ConstantGlobals.ll
@@ -0,0 +1,8 @@
+; Test that a prototype can be marked const, and the definition is allowed
+; to be nonconst.
+
+; RUN: echo {@X = global i32 7} | llvm-as > %t.2.bc
+; RUN: llvm-as < %s > %t.1.bc
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {global i32 7}
+
+@X = external constant i32		; <i32*> [#uses=0]
diff --git a/final/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll b/final/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll
new file mode 100644
index 00000000000..7d1020ddf28
--- /dev/null
+++ b/final/test/Linker/2005-12-06-AppendingZeroLengthArrays.ll
@@ -0,0 +1,10 @@
+; RUN: echo { @G = appending global \[0 x i32\] zeroinitializer } | \
+; RUN:   llvm-as > %t.out2.bc
+; RUN: llvm-as < %s > %t.out1.bc
+; RUN: llvm-link %t.out1.bc %t.out2.bc -S | grep {@G =}
+
+; When linked, the globals should be merged, and the result should still 
+; be named '@G'.
+
+@G = appending global [1 x i32] zeroinitializer		; <[1 x i32]*> [#uses=0]
+
diff --git a/final/test/Linker/2006-01-19-ConstantPacked.ll b/final/test/Linker/2006-01-19-ConstantPacked.ll
new file mode 100644
index 00000000000..d2409e20c4d
--- /dev/null
+++ b/final/test/Linker/2006-01-19-ConstantPacked.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as %s -o %t1.bc
+; RUN: llvm-link -o %t2.bc %t1.bc
+
+target datalayout = "E-p:32:32"
+target triple = "powerpc-apple-darwin7.7.0"
+deplibs = [ "c", "crtend" ]
+@source = global <4 x i32> < i32 0, i32 1, i32 2, i32 3 >		; <<4 x i32>*> [#uses=0]
+
+define i32 @main() {
+entry:
+	ret i32 0
+}
+
diff --git a/final/test/Linker/2006-06-15-GlobalVarAlignment.ll b/final/test/Linker/2006-06-15-GlobalVarAlignment.ll
new file mode 100644
index 00000000000..df3284bedea
--- /dev/null
+++ b/final/test/Linker/2006-06-15-GlobalVarAlignment.ll
@@ -0,0 +1,7 @@
+; The linker should choose the largest alignment when linking.
+
+; RUN: echo {@X = global i32 7, align 8} | llvm-as > %t.2.bc
+; RUN: llvm-as < %s > %t.1.bc
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {align 8}
+
+@X = weak global i32 7, align 4
diff --git a/final/test/Linker/2008-03-05-AliasReference.ll b/final/test/Linker/2008-03-05-AliasReference.ll
new file mode 100644
index 00000000000..7c19dfa15a0
--- /dev/null
+++ b/final/test/Linker/2008-03-05-AliasReference.ll
@@ -0,0 +1,17 @@
+; PR2054
+; RUN: llvm-as %s -o %t1.bc
+; RUN: llvm-as %p/2008-03-05-AliasReference2.ll -o %t2.bc
+; RUN: llvm-link %t2.bc %t1.bc -o %t3.bc
+
+; ModuleID = 'bug.o'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+@foo = weak global i32 0		; <i32*> [#uses=1]
+
+@bar = alias weak i32* @foo		; <i32*> [#uses=1]
+
+define i32 @baz() nounwind  {
+entry:
+	%tmp1 = load i32* @bar, align 4		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
diff --git a/final/test/Linker/2008-03-05-AliasReference2.ll b/final/test/Linker/2008-03-05-AliasReference2.ll
new file mode 100644
index 00000000000..05c0a25bb9d
--- /dev/null
+++ b/final/test/Linker/2008-03-05-AliasReference2.ll
@@ -0,0 +1,11 @@
+; This file is used by 2008-03-05-AliasReference.ll
+; RUN: true
+
+; ModuleID = 'bug.o'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @baz1() nounwind  {
+entry:
+	ret i32 0
+}
diff --git a/final/test/Linker/2008-03-07-DroppedSection_a.ll b/final/test/Linker/2008-03-07-DroppedSection_a.ll
new file mode 100644
index 00000000000..4458971eba0
--- /dev/null
+++ b/final/test/Linker/2008-03-07-DroppedSection_a.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s > %t.bc
+; RUN: llvm-as < %p/2008-03-07-DroppedSection_b.ll > %t2.bc
+; RUN: llvm-ld -r -disable-opt %t.bc %t2.bc -o %t3.bc
+; RUN: llvm-dis < %t3.bc | grep ".data.init_task"
+
+; ModuleID = 't.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+@init_task_union = global i32 1, section ".data.init_task", align 32
+
diff --git a/final/test/Linker/2008-03-07-DroppedSection_b.ll b/final/test/Linker/2008-03-07-DroppedSection_b.ll
new file mode 100644
index 00000000000..884bf0a2744
--- /dev/null
+++ b/final/test/Linker/2008-03-07-DroppedSection_b.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s > %t.bc
+; RUN: llvm-as < %p/2008-03-07-DroppedSection_a.ll > %t2.bc
+; RUN: llvm-ld -r -disable-opt %t.bc %t2.bc -o %t3.bc
+; RUN: llvm-dis < %t3.bc | grep ".data.init_task"
+
+; ModuleID = 'u.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+@init_task_union = external global i32
+
diff --git a/final/test/Linker/2008-06-13-LinkOnceRedefinition.ll b/final/test/Linker/2008-06-13-LinkOnceRedefinition.ll
new file mode 100644
index 00000000000..49da96af949
--- /dev/null
+++ b/final/test/Linker/2008-06-13-LinkOnceRedefinition.ll
@@ -0,0 +1,8 @@
+; Test linking two functions with different prototypes and two globals 
+; in different modules.
+; RUN: llvm-as %s -o %t.foo1.bc
+; RUN: llvm-as %s -o %t.foo2.bc
+; RUN: echo {define linkonce void @foo(i32 %x) { ret void }} | llvm-as -o %t.foo3.bc
+; RUN: llvm-link %t.foo1.bc %t.foo2.bc -S
+; RUN: llvm-link %t.foo1.bc %t.foo3.bc -S
+define linkonce void @foo() { ret void }
diff --git a/final/test/Linker/2008-06-26-AddressSpace.ll b/final/test/Linker/2008-06-26-AddressSpace.ll
new file mode 100644
index 00000000000..e3ed385b68a
--- /dev/null
+++ b/final/test/Linker/2008-06-26-AddressSpace.ll
@@ -0,0 +1,9 @@
+; Test linking two functions with different prototypes and two globals 
+; in different modules.
+; RUN: llvm-as %s -o %t.foo1.bc
+; RUN: echo | llvm-as -o %t.foo2.bc
+; RUN: llvm-link %t.foo2.bc %t.foo1.bc -S | grep {addrspace(2)}
+; RUN: llvm-link %t.foo1.bc %t.foo2.bc -S | grep {addrspace(2)}
+; rdar://6038021
+
+@G = addrspace(2) global i32 256 
diff --git a/final/test/Linker/2008-07-06-AliasFnDecl.ll b/final/test/Linker/2008-07-06-AliasFnDecl.ll
new file mode 100644
index 00000000000..8e8c8454d94
--- /dev/null
+++ b/final/test/Linker/2008-07-06-AliasFnDecl.ll
@@ -0,0 +1,14 @@
+; PR2146
+; RUN: llvm-as %s -o %t1.bc
+; RUN: llvm-as %p/2008-07-06-AliasFnDecl2.ll -o %t2.bc
+; RUN: llvm-link %t1.bc %t2.bc -o %t3.bc
+
+@b = alias void ()* @a
+
+define void @a() nounwind  {
+entry:
+	br label %return
+
+return:
+	ret void
+}
diff --git a/final/test/Linker/2008-07-06-AliasFnDecl2.ll b/final/test/Linker/2008-07-06-AliasFnDecl2.ll
new file mode 100644
index 00000000000..2380dffff61
--- /dev/null
+++ b/final/test/Linker/2008-07-06-AliasFnDecl2.ll
@@ -0,0 +1,13 @@
+; This file is used by 2008-07-06-AliasFnDecl2.ll
+; RUN: true
+
+define void @c() nounwind  {
+entry:
+	call void @b( ) nounwind 
+	br label %return
+
+return:
+	ret void
+}
+
+declare void @b()
diff --git a/final/test/Linker/2008-07-06-AliasWeakDest.ll b/final/test/Linker/2008-07-06-AliasWeakDest.ll
new file mode 100644
index 00000000000..e631175444c
--- /dev/null
+++ b/final/test/Linker/2008-07-06-AliasWeakDest.ll
@@ -0,0 +1,18 @@
+; PR2463
+; RUN: llvm-as %s -o %t1.bc
+; RUN: llvm-as %p/2008-07-06-AliasWeakDest2.ll -o %t2.bc
+; RUN: llvm-link %t1.bc %t2.bc -o %t3.bc
+; RUN: llvm-link %t2.bc %t1.bc -o %t4.bc
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+@sched_clock = alias i64 ()* @native_sched_clock
+
+@foo = alias i32* @realfoo
+@realfoo = global i32 0
+
+define i64 @native_sched_clock() nounwind  {
+entry:
+        ret i64 0
+}
diff --git a/final/test/Linker/2008-07-06-AliasWeakDest2.ll b/final/test/Linker/2008-07-06-AliasWeakDest2.ll
new file mode 100644
index 00000000000..e4e2bf395bb
--- /dev/null
+++ b/final/test/Linker/2008-07-06-AliasWeakDest2.ll
@@ -0,0 +1,18 @@
+; This file is used by 2008-07-06-AliasWeakDest2.ll
+; RUN: true
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+@foo = weak global i32 2
+
+define i64 @sched_clock_cpu(i32 inreg  %cpu) nounwind  {
+entry:
+        %tmp = call i64 @sched_clock( ) nounwind                ; <i64>
+        ret i64 %tmp
+}
+
+define weak i64 @sched_clock() {
+entry:
+        ret i64 1
+}
diff --git a/final/test/Linker/2009-09-03-mdnode.ll b/final/test/Linker/2009-09-03-mdnode.ll
new file mode 100644
index 00000000000..11862f70b29
--- /dev/null
+++ b/final/test/Linker/2009-09-03-mdnode.ll
@@ -0,0 +1,30 @@
+; RUN: llvm-as < %s > %t.bc
+; RUN: llvm-as < %p/2009-09-03-mdnode2.ll > %t2.bc
+; RUN: llvm-link %t.bc %t2.bc
+
+declare void @f() nounwind
+
+define i32 @main(...) nounwind {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=2]
+  call void @llvm.dbg.func.start(metadata !0)
+  store i32 0, i32* %retval
+  call void @llvm.dbg.stoppoint(i32 4, i32 5, metadata !1)
+  call void @f()
+  br label %return
+
+return:                                           ; preds = %entry
+  %0 = load i32* %retval                          ; <i32> [#uses=1]
+  call void @llvm.dbg.stoppoint(i32 5, i32 1, metadata !1)
+  call void @llvm.dbg.region.end(metadata !0)
+  ret i32 %0
+}
+
+declare void @llvm.dbg.func.start(metadata) nounwind readnone
+
+declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
+
+declare void @llvm.dbg.region.end(metadata) nounwind readnone
+
+!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 2, null, i1 false, i1 true}
+!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"a.c", metadata !"/home/rich/ellcc/test/source", metadata !"ellcc 0.1.0", i1 true, i1 true, metadata !"", i32 0}
diff --git a/final/test/Linker/2009-09-03-mdnode2.ll b/final/test/Linker/2009-09-03-mdnode2.ll
new file mode 100644
index 00000000000..21589a49b79
--- /dev/null
+++ b/final/test/Linker/2009-09-03-mdnode2.ll
@@ -0,0 +1,25 @@
+; This file is used by 2009-09-03-mdnode.ll, so it doesn't actually do anything itself
+;
+; RUN: true
+
+define i32 @f(...) nounwind {
+entry:
+  %retval = alloca i32                            ; <i32*> [#uses=1]
+  call void @llvm.dbg.func.start(metadata !0)
+  br label %return
+
+return:                                           ; preds = %entry
+  %0 = load i32* %retval                          ; <i32> [#uses=1]
+  call void @llvm.dbg.stoppoint(i32 3, i32 1, metadata !1)
+  call void @llvm.dbg.region.end(metadata !0)
+  ret i32 %0
+}
+
+declare void @llvm.dbg.func.start(metadata) nounwind readnone
+
+declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
+
+declare void @llvm.dbg.region.end(metadata) nounwind readnone
+
+!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"f", metadata !"f", metadata !"f", metadata !1, i32 1, null, i1 false, i1 true}
+!1 = metadata !{i32 458769, i32 0, i32 12, metadata !"b.c", metadata !"/home/rich/ellcc/test/source", metadata !"ellcc 0.1.0", i1 true, i1 true, metadata !"", i32 0}
diff --git a/final/test/Linker/AppendingLinkage.ll b/final/test/Linker/AppendingLinkage.ll
new file mode 100644
index 00000000000..134a42ef215
--- /dev/null
+++ b/final/test/Linker/AppendingLinkage.ll
@@ -0,0 +1,15 @@
+; Test that appending linkage works correctly.
+
+; RUN: echo {@X = appending global \[1 x i32\] \[i32 8\] } | \
+; RUN:   llvm-as > %t.2.bc
+; RUN: llvm-as < %s > %t.1.bc
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep 7 | grep 4 | grep 8
+
+@X = appending global [2 x i32] [ i32 7, i32 4 ]		; <[2 x i32]*> [#uses=2]
+@Y = global i32* getelementptr ([2 x i32]* @X, i64 0, i64 0)		; <i32**> [#uses=0]
+
+define void @foo(i64 %V) {
+	%Y = getelementptr [2 x i32]* @X, i64 0, i64 %V		; <i32*> [#uses=0]
+	ret void
+}
+
diff --git a/final/test/Linker/AppendingLinkage2.ll b/final/test/Linker/AppendingLinkage2.ll
new file mode 100644
index 00000000000..2c1302f39b1
--- /dev/null
+++ b/final/test/Linker/AppendingLinkage2.ll
@@ -0,0 +1,8 @@
+; Test that appending linkage works correctly when arrays are the same size.
+
+; RUN: echo {@X = appending global \[1 x i32\] \[i32 8\] } | \
+; RUN:   llvm-as > %t.2.bc
+; RUN: llvm-as < %s > %t.1.bc
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep 7 | grep 8
+
+@X = appending global [1 x i32] [ i32 7 ]		; <[1 x i32]*> [#uses=0]
diff --git a/final/test/Linker/ConstantGlobals1.ll b/final/test/Linker/ConstantGlobals1.ll
new file mode 100644
index 00000000000..8fdbe508db7
--- /dev/null
+++ b/final/test/Linker/ConstantGlobals1.ll
@@ -0,0 +1,9 @@
+; Test that appending linkage works correctly when arrays are the same size.
+
+; RUN: echo {@X = constant \[1 x i32\] \[i32 8\] } | \
+; RUN:   llvm-as > %t.2.bc
+; RUN: llvm-as < %s > %t.1.bc
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant
+
+@X = external global [1 x i32]		; <[1 x i32]*> [#uses=0]
+
diff --git a/final/test/Linker/ConstantGlobals2.ll b/final/test/Linker/ConstantGlobals2.ll
new file mode 100644
index 00000000000..ad4428b9522
--- /dev/null
+++ b/final/test/Linker/ConstantGlobals2.ll
@@ -0,0 +1,9 @@
+; Test that appending linkage works correctly when arrays are the same size.
+
+; RUN: echo {@X = external global \[1 x i32\] } | \
+; RUN:   llvm-as > %t.2.bc
+; RUN: llvm-as < %s > %t.1.bc
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant
+
+@X = constant [1 x i32] [ i32 12 ]		; <[1 x i32]*> [#uses=0]
+
diff --git a/final/test/Linker/ConstantGlobals3.ll b/final/test/Linker/ConstantGlobals3.ll
new file mode 100644
index 00000000000..e25529ae1bf
--- /dev/null
+++ b/final/test/Linker/ConstantGlobals3.ll
@@ -0,0 +1,8 @@
+; Test that appending linkage works correctly when arrays are the same size.
+
+; RUN: echo {@X = external constant \[1 x i32\] } | \
+; RUN:   llvm-as > %t.2.bc
+; RUN: llvm-as < %s > %t.1.bc
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep constant
+
+@X = external global [1 x i32]		; <[1 x i32]*> [#uses=0]
diff --git a/final/test/Linker/LinkOnce.ll b/final/test/Linker/LinkOnce.ll
new file mode 100644
index 00000000000..56633fb8da7
--- /dev/null
+++ b/final/test/Linker/LinkOnce.ll
@@ -0,0 +1,8 @@
+; This fails because the linker renames the non-opaque type not the opaque 
+; one...
+
+; RUN: echo "@X = linkonce global i32 8" | llvm-as > %t.2.bc
+; RUN: llvm-as < %s > %t.1.bc
+; RUN: llvm-link %t.1.bc %t.2.bc -S
+
+@X = linkonce global i32 7		; <i32*> [#uses=0]
diff --git a/final/test/Linker/PR8300.ll b/final/test/Linker/PR8300.ll
new file mode 100644
index 00000000000..f0fc1e7a5cc
--- /dev/null
+++ b/final/test/Linker/PR8300.ll
@@ -0,0 +1,13 @@
+; RUN: echo {%foo2 = type \{ \[8 x i8\] \} \
+; RUN:       declare void @zed(%foo2*) } > %t.ll
+; RUN: llvm-link %t.ll %s -o %t.bc
+
+%foo = type { [8 x i8] }
+%bar = type { [9 x i8] }
+
+@zed = alias bitcast (void (%bar*)* @xyz to void (%foo*)*)
+
+define void @xyz(%bar* %this) {
+entry:
+  ret void
+}
diff --git a/final/test/Linker/available_externally_a.ll b/final/test/Linker/available_externally_a.ll
new file mode 100644
index 00000000000..3ae4ce29140
--- /dev/null
+++ b/final/test/Linker/available_externally_a.ll
@@ -0,0 +1,5 @@
+; RUN: llvm-link %s %p/available_externally_b.ll -S -o - | FileCheck %s
+
+@foo = available_externally unnamed_addr constant i32 0
+
+; CHECK: @foo = hidden unnamed_addr constant i32 0
diff --git a/final/test/Linker/available_externally_b.ll b/final/test/Linker/available_externally_b.ll
new file mode 100644
index 00000000000..526981715a6
--- /dev/null
+++ b/final/test/Linker/available_externally_b.ll
@@ -0,0 +1,4 @@
+; This file is for use with available_externally_a.ll
+; RUN: true
+
+@foo = hidden unnamed_addr constant i32 0
diff --git a/final/test/Linker/basiclink.ll b/final/test/Linker/basiclink.ll
new file mode 100644
index 00000000000..afe0320ba92
--- /dev/null
+++ b/final/test/Linker/basiclink.ll
@@ -0,0 +1,13 @@
+; Test linking two functions with different prototypes and two globals 
+; in different modules. This is for PR411
+; RUN: llvm-as %s -o %t.bar.bc
+; RUN: echo {define i32* @foo(i32 %x) \{ ret i32* @baz \} \
+; RUN:   @baz = external global i32 } | llvm-as -o %t.foo.bc
+; RUN: llvm-link %t.bar.bc %t.foo.bc -o %t.bc
+; RUN: llvm-link %t.foo.bc %t.bar.bc -o %t.bc
+declare i32* @foo(...)
+define i32* @bar() {
+	%ret = call i32* (...)* @foo( i32 123 )
+	ret i32* %ret
+}
+@baz = global i32 0
diff --git a/final/test/Linker/dg.exp b/final/test/Linker/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Linker/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Linker/link-archive.ll b/final/test/Linker/link-archive.ll
new file mode 100644
index 00000000000..9251b4e597f
--- /dev/null
+++ b/final/test/Linker/link-archive.ll
@@ -0,0 +1,16 @@
+; Test linking of a bc file to an archive via llvm-ld. 
+; PR1434
+; RUN: rm -f %t.bar.a %t.foo.a
+; RUN: llvm-as %s -o %t.bar.bc
+; RUN: echo {define i32* @foo(i32 %x) \{ ret i32* @baz \} \
+; RUN:   @baz = external global i32 } | llvm-as -o %t.foo.bc
+; RUN: llvm-ar rcf %t.foo.a %t.foo.bc
+; RUN: llvm-ar rcf %t.bar.a %t.bar.bc
+; RUN: llvm-ld -disable-opt %t.bar.bc %t.foo.a -o %t.bc 
+; RUN: llvm-ld -disable-opt %t.foo.bc %t.bar.a -o %t.bc
+declare i32* @foo(...)
+define i32* @bar() {
+	%ret = call i32* (...)* @foo( i32 123 )
+	ret i32* %ret
+}
+@baz = global i32 0
diff --git a/final/test/Linker/link-global-to-func.ll b/final/test/Linker/link-global-to-func.ll
new file mode 100644
index 00000000000..2fc501dedc9
--- /dev/null
+++ b/final/test/Linker/link-global-to-func.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as %s -o %t1.bc
+; RUN: echo {declare void @__eprintf(i8*, i8*, i32, i8*) noreturn     define void @foo() {      tail call void @__eprintf( i8* undef, i8* undef, i32 4, i8* null ) noreturn nounwind       unreachable }} | llvm-as -o %t2.bc
+; RUN: llvm-link %t2.bc %t1.bc -S | grep __eprintf
+; RUN: llvm-link %t1.bc %t2.bc -S | grep __eprintf
+
+; rdar://6072702
+
+@__eprintf = external global i8*		; <i8**> [#uses=1]
+
+define i8* @test() {
+	%A = load i8** @__eprintf		; <i8*> [#uses=1]
+	ret i8* %A
+}
diff --git a/final/test/Linker/link-messages.ll b/final/test/Linker/link-messages.ll
new file mode 100644
index 00000000000..920782d15bb
--- /dev/null
+++ b/final/test/Linker/link-messages.ll
@@ -0,0 +1,11 @@
+; Test that linking two files with the same definition causes an error and
+; that error is printed out.
+; RUN: llvm-as %s -o %t.one.bc
+; RUN: llvm-as %s -o %t.two.bc
+; RUN: not llvm-ld -disable-opt -link-as-library %t.one.bc %t.two.bc \
+; RUN:   -o %t.bc 2>%t.err 
+; RUN: grep "symbol multiply defined" %t.err
+
+define i32 @bar() {
+	ret i32 0
+}
diff --git a/final/test/Linker/linkmdnode.ll b/final/test/Linker/linkmdnode.ll
new file mode 100644
index 00000000000..5f1158039fc
--- /dev/null
+++ b/final/test/Linker/linkmdnode.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s > %t.bc
+; RUN: llvm-as < %p/linkmdnode2.ll > %t2.bc
+; RUN: llvm-link %t.bc %t2.bc
+
+
+!21 = metadata !{i32 42, metadata !"foobar"}
+
+declare i8 @llvm.something(metadata %a)
+define void @foo() {
+  %x = call i8 @llvm.something(metadata !21)
+  ret void
+}
+
diff --git a/final/test/Linker/linkmdnode2.ll b/final/test/Linker/linkmdnode2.ll
new file mode 100644
index 00000000000..a7d991a8a4f
--- /dev/null
+++ b/final/test/Linker/linkmdnode2.ll
@@ -0,0 +1,22 @@
+; This file is used by linkmdnode.ll, so it doesn't actually do anything itself
+;
+; RUN: true
+
+!22 = metadata !{i32 42, metadata !"foobar"}
+
+declare i8 @llvm.something(metadata %a)
+define void @foo1() {
+  ;; Intrinsic using MDNode and MDString
+  %x = call i8 @llvm.something(metadata !22)
+  ret void
+}
+
+
+
+; PR9015
+define void @test() {
+  ret void, !abc !0
+}
+
+!0 = metadata !{metadata !0, i32 42 }
+
diff --git a/final/test/Linker/linknamedmdnode.ll b/final/test/Linker/linknamedmdnode.ll
new file mode 100644
index 00000000000..e6b779f1fc5
--- /dev/null
+++ b/final/test/Linker/linknamedmdnode.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s > %t.bc
+; RUN: llvm-as < %p/linknamedmdnode2.ll > %t2.bc
+; RUN: llvm-link %t.bc %t2.bc -S | grep "!llvm.stuff = !{!0, !1}"
+
+!0 = metadata !{i32 42}
+!llvm.stuff = !{!0}
diff --git a/final/test/Linker/linknamedmdnode2.ll b/final/test/Linker/linknamedmdnode2.ll
new file mode 100644
index 00000000000..d16f62abed3
--- /dev/null
+++ b/final/test/Linker/linknamedmdnode2.ll
@@ -0,0 +1,6 @@
+; This file is used by linknamedmdnode.ll, so it doesn't actually do anything itself
+;
+; RUN: true
+
+!0 = metadata !{i32 41}
+!llvm.stuff = !{!0}
diff --git a/final/test/Linker/metadata-a.ll b/final/test/Linker/metadata-a.ll
new file mode 100644
index 00000000000..5a9d2e40b94
--- /dev/null
+++ b/final/test/Linker/metadata-a.ll
@@ -0,0 +1,15 @@
+; RUN: llvm-link %s %p/metadata-b.ll -S -o - | FileCheck %s
+
+; CHECK: define void @foo(i32 %a)
+; CHECK: ret void, !attach !0, !also !{i32 %a}
+; CHECK: define void @goo(i32 %b)
+; CHECK: ret void, !attach !1, !and !{i32 %b}
+; CHECK: !0 = metadata !{i32 524334, void (i32)* @foo}
+; CHECK: !1 = metadata !{i32 524334, void (i32)* @goo}
+
+define void @foo(i32 %a) nounwind {
+entry:
+  ret void, !attach !0, !also !{ i32 %a }
+}
+
+!0 = metadata !{i32 524334, void (i32)* @foo}
diff --git a/final/test/Linker/metadata-b.ll b/final/test/Linker/metadata-b.ll
new file mode 100644
index 00000000000..ef0270af075
--- /dev/null
+++ b/final/test/Linker/metadata-b.ll
@@ -0,0 +1,9 @@
+; This file is for use with metadata-a.ll
+; RUN: true
+
+define void @goo(i32 %b) nounwind {
+entry:
+  ret void, !attach !0, !and !{ i32 %b }
+}
+
+!0 = metadata !{i32 524334, void (i32)* @goo}
diff --git a/final/test/Linker/partial-type-refinement-link.ll b/final/test/Linker/partial-type-refinement-link.ll
new file mode 100644
index 00000000000..320ef969f83
--- /dev/null
+++ b/final/test/Linker/partial-type-refinement-link.ll
@@ -0,0 +1,20 @@
+; This file is used by first.ll, so it doesn't actually do anything itself
+; RUN: true
+
+%AnalysisResolver = type { i8, %PMDataManager* }
+%"DenseMap<P*,AU*>" = type { i64, %"pair<P*,AU*>"*, i64, i64 }
+%PMDataManager = type { i8, %PMTopLevelManager*, i8, i8, i8, i8, i8, i64, i8 }
+%PMTopLevelManager = type { i8, i8, i8, i8, i8, i8, i8, i8, %"DenseMap<P*,AU*>" }
+%P = type { i8, %AnalysisResolver*, i64 }
+%PI = type { i8, i8, i8, i8, i8, i8, %"vector<const PI*>", %P* }
+%"SmallVImpl<const PI*>" = type { i8, %PI* }
+%"_V_base<const PI*>" = type { %"_V_base<const PI*>::_V_impl" }
+%"_V_base<const PI*>::_V_impl" = type { %PI*, i8, i8 }
+%"pair<P*,AU*>" = type opaque
+%"vector<const PI*>" = type { %"_V_base<const PI*>" }
+
+define void @f(%"SmallVImpl<const PI*>"* %this) {
+entry:
+  %x = getelementptr inbounds %"SmallVImpl<const PI*>"* %this, i64 0, i32 1
+  ret void
+}
diff --git a/final/test/Linker/partial-type-refinement.ll b/final/test/Linker/partial-type-refinement.ll
new file mode 100644
index 00000000000..b995f11533f
--- /dev/null
+++ b/final/test/Linker/partial-type-refinement.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-link %s %p/partial-type-refinement-link.ll -S | FileCheck %s
+; PR4954
+
+; CHECK: load %PI** getelementptr inbounds (%"RegisterP<LowerArrayLength>"* @_ZN3mvmL1XE, i64 0, i32 0, i32 6, i32 0, i32 0, i32 0), align 16
+
+%AnalysisResolver = type { i8, %PMDataManager* }
+%"DenseMap<P*,AU*>" = type { i64, %"pair<P*,AU*>"*, i64, i64 }
+%PMDataManager = type { i8, %PMTopLevelManager*, i8, i8, i8, i8, i8, i64, i8 }
+%PMTopLevelManager = type { i8, i8, i8, i8, i8, i8, i8, i8, %"DenseMap<P*,AU*>" }
+%P = type { i8, %AnalysisResolver*, i64 }
+%PI = type { i8, i8, i8, i8, i8, i8, %"vector<const PI*>", %P* }
+%"RegisterP<LowerArrayLength>" = type { %PI }
+%"_V_base<const PI*>" = type { %"_V_base<const PI*>::_V_impl" }
+%"_V_base<const PI*>::_V_impl" = type { %PI*, i8, i8 }
+%"pair<P*,AU*>" = type opaque
+%"vector<const PI*>" = type { %"_V_base<const PI*>" }
+
+@_ZN3mvmL1XE = external global %"RegisterP<LowerArrayLength>"
+
+define void @__tcf_0() nounwind {
+entry:
+  %0 = load %PI** getelementptr inbounds (%"RegisterP<LowerArrayLength>"* @_ZN3mvmL1XE, i64 0, i32 0, i32 6, i32 0, i32 0, i32 0), align 16
+  ret void
+}
diff --git a/final/test/Linker/redefinition.ll b/final/test/Linker/redefinition.ll
new file mode 100644
index 00000000000..0d056891d5b
--- /dev/null
+++ b/final/test/Linker/redefinition.ll
@@ -0,0 +1,10 @@
+; Test linking two functions with different prototypes and two globals 
+; in different modules.
+; RUN: llvm-as %s -o %t.foo1.bc
+; RUN: llvm-as %s -o %t.foo2.bc
+; RUN: echo {define void @foo(i32 %x) { ret void }} | llvm-as -o %t.foo3.bc
+; RUN: not llvm-link %t.foo1.bc %t.foo2.bc -o %t.bc |& \
+; RUN:   grep {symbol multiply defined}
+; RUN: not llvm-link %t.foo1.bc %t.foo3.bc -o %t.bc |& \
+; RUN:   grep {symbol multiply defined}
+define void @foo() { ret void }
diff --git a/final/test/Linker/testlink1.ll b/final/test/Linker/testlink1.ll
new file mode 100644
index 00000000000..4a9402576ef
--- /dev/null
+++ b/final/test/Linker/testlink1.ll
@@ -0,0 +1,42 @@
+; RUN: llvm-as < %s > %t.bc
+; RUN: llvm-as < %p/testlink2.ll > %t2.bc
+; RUN: llvm-link %t.bc %t2.bc
+
+@MyVar = external global i32		; <i32*> [#uses=3]
+@MyIntList = global { \2*, i32 } { { \2*, i32 }* null, i32 17 }		; <{ \2*, i32 }*> [#uses=1]
+external global i32		; <i32*>:0 [#uses=0]
+@Inte = global i32 1		; <i32*> [#uses=0]
+@AConst = linkonce constant i32 123		; <i32*> [#uses=0]
+@Intern1 = internal constant i32 42		; <i32*> [#uses=0]
+@Intern2 = internal constant i32 792		; <i32*> [#uses=0]
+@MyVarPtr = linkonce global { i32* } { i32* @MyVar }		; <{ i32* }*> [#uses=0]
+
+declare i32 @foo(i32)
+
+declare void @print(i32)
+
+define void @main() {
+	%v1 = load i32* @MyVar		; <i32> [#uses=1]
+	call void @print( i32 %v1 )
+	%idx = getelementptr { \2*, i32 }* @MyIntList, i64 0, i32 1		; <i32*> [#uses=2]
+	%v2 = load i32* %idx		; <i32> [#uses=1]
+	call void @print( i32 %v2 )
+	call i32 @foo( i32 5 )		; <i32>:1 [#uses=0]
+	%v3 = load i32* @MyVar		; <i32> [#uses=1]
+	call void @print( i32 %v3 )
+	%v4 = load i32* %idx		; <i32> [#uses=1]
+	call void @print( i32 %v4 )
+	ret void
+}
+
+define internal void @testintern() {
+	ret void
+}
+
+define internal void @Testintern() {
+	ret void
+}
+
+define void @testIntern() {
+	ret void
+}
diff --git a/final/test/Linker/testlink2.ll b/final/test/Linker/testlink2.ll
new file mode 100644
index 00000000000..d243e3c7d1a
--- /dev/null
+++ b/final/test/Linker/testlink2.ll
@@ -0,0 +1,41 @@
+; This file is used by testlink1.ll, so it doesn't actually do anything itself
+;
+; RUN: true
+
+@MyVar = global i32 4		; <i32*> [#uses=2]
+@MyIntList = external global { \2*, i32 }		; <{ \2*, i32 }*> [#uses=2]
+@AConst = constant i32 123		; <i32*> [#uses=0]
+
+;; Intern in both testlink[12].ll
+@Intern1 = internal constant i32 52		; <i32*> [#uses=0]
+
+;; Intern in one but not in other
+@Intern2 = constant i32 12345		; <i32*> [#uses=0]
+
+@MyIntListPtr = constant { { \2*, i32 }* } { { \2*, i32 }* @MyIntList }		; <{ { \2*, i32 }* }*> [#uses=0]
+@MyVarPtr = linkonce global { i32* } { i32* @MyVar }		; <{ i32* }*> [#uses=0]
+constant i32 412		; <i32*>:0 [#uses=1]
+
+define i32 @foo(i32 %blah) {
+	store i32 %blah, i32* @MyVar
+	%idx = getelementptr { \2*, i32 }* @MyIntList, i64 0, i32 1		; <i32*> [#uses=1]
+	store i32 12, i32* %idx
+	%ack = load i32* @0		; <i32> [#uses=1]
+	%fzo = add i32 %ack, %blah		; <i32> [#uses=1]
+	ret i32 %fzo
+}
+
+declare void @unimp(float, double)
+
+define internal void @testintern() {
+	ret void
+}
+
+define void @Testintern() {
+	ret void
+}
+
+define internal void @testIntern() {
+	ret void
+}
+
diff --git a/final/test/Linker/unnamed-addr1-a.ll b/final/test/Linker/unnamed-addr1-a.ll
new file mode 100644
index 00000000000..1ddac9ccc02
--- /dev/null
+++ b/final/test/Linker/unnamed-addr1-a.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-link %s %p/unnamed-addr1-b.ll -S -o - | sort | FileCheck %s
+
+; Only in this file
+@a = common global i32 0
+; CHECK: @a = common global i32 0
+@b = common unnamed_addr global i32 0
+; CHECK: @b = common unnamed_addr global i32 0
+
+; Other file has unnamed_addr definition
+@c = common unnamed_addr global i32 0
+; CHECK: @c = common unnamed_addr global i32 0
+@d = external global i32
+; CHECK: @d = global i32 42
+@e = external unnamed_addr global i32
+; CHECK: @e = unnamed_addr global i32 42
+@f = weak global i32 42
+; CHECK: @f = global i32 42
+
+; Other file has non-unnamed_addr definition
+@g = common unnamed_addr global i32 0
+; CHECK: @g = common global i32 0
+@h = external global i32
+; CHECK: @h = global i32 42
+@i = external unnamed_addr global i32
+; CHECK: @i = global i32 42
+@j = weak global i32 42
+; CHECK: @j = global i32 42
diff --git a/final/test/Linker/unnamed-addr1-b.ll b/final/test/Linker/unnamed-addr1-b.ll
new file mode 100644
index 00000000000..7d94dc1928c
--- /dev/null
+++ b/final/test/Linker/unnamed-addr1-b.ll
@@ -0,0 +1,12 @@
+; This file is for use with unnamed-addr1-a.ll
+; RUN: true
+
+@c = common unnamed_addr global i32 42
+@d = unnamed_addr global i32 42
+@e = unnamed_addr global i32 42
+@f = unnamed_addr global i32 42
+
+@g = common global i32 42
+@h = global i32 42
+@i = global i32 42
+@j = global i32 42
diff --git a/final/test/Linker/weakextern.ll b/final/test/Linker/weakextern.ll
new file mode 100644
index 00000000000..aa38b1264c3
--- /dev/null
+++ b/final/test/Linker/weakextern.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s > %t.bc
+; RUN: llvm-as < %p/testlink1.ll > %t2.bc
+; RUN: llvm-link %t.bc %t.bc %t2.bc -o %t1.bc
+; RUN: llvm-dis < %t1.bc | grep {kallsyms_names = extern_weak}
+; RUN: llvm-dis < %t1.bc | grep {MyVar = external global i32}
+; RUN: llvm-dis < %t1.bc | grep {Inte = global i32}
+
+@kallsyms_names = extern_weak global [0 x i8]		; <[0 x i8]*> [#uses=0]
+@MyVar = extern_weak global i32		; <i32*> [#uses=0]
+@Inte = extern_weak global i32		; <i32*> [#uses=0]
+
diff --git a/final/test/MC/ARM/arm_fixups.s b/final/test/MC/ARM/arm_fixups.s
new file mode 100644
index 00000000000..0dceb83c24a
--- /dev/null
+++ b/final/test/MC/ARM/arm_fixups.s
@@ -0,0 +1,7 @@
+// RUN: llvm-mc -triple arm-unknown-unknown %s --show-encoding > %t
+// RUN: FileCheck < %t %s
+
+// CHECK: bl _printf @ encoding: [A,A,A,0xeb]
+// CHECK: @ fixup A - offset: 0, value: _printf, kind: fixup_arm_uncondbranch
+bl _printf
+        
\ No newline at end of file
diff --git a/final/test/MC/ARM/arm_instructions.s b/final/test/MC/ARM/arm_instructions.s
new file mode 100644
index 00000000000..fbec7891c80
--- /dev/null
+++ b/final/test/MC/ARM/arm_instructions.s
@@ -0,0 +1,284 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding %s | FileCheck %s
+
+@ CHECK: nop
+@ CHECK: encoding: [0x00,0xf0,0x20,0xe3]
+        nop
+
+@ CHECK: nopeq
+@ CHECK: encoding: [0x00,0xf0,0x20,0x03]
+        nopeq
+
+@ CHECK: trap
+@ CHECK: encoding: [0xfe,0xde,0xff,0xe7]
+        trap
+
+@ CHECK: bx	lr
+@ CHECK: encoding: [0x1e,0xff,0x2f,0xe1]
+        bx lr
+
+@ CHECK: vqdmull.s32	q8, d17, d16
+@ CHECK: encoding: [0xa0,0x0d,0xe1,0xf2]
+        vqdmull.s32     q8, d17, d16
+
+@ CHECK: ldmia r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe8]
+@ CHECK: ldmib r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe9]
+@ CHECK: ldmda r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x12,0xe8]
+@ CHECK: ldmdb r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x12,0xe9]
+        ldmia     r2, {r1,r3-r6,sp}
+        ldmib     r2, {r1,r3-r6,sp}
+        ldmda     r2, {r1,r3-r6,sp}
+        ldmdb     r2, {r1,r3-r6,sp}
+
+@ CHECK: stmia r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x82,0xe8]
+@ CHECK: stmib r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x82,0xe9]
+@ CHECK: stmda r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x02,0xe8]
+@ CHECK: stmdb r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x02,0xe9]
+        stmia     r2, {r1,r3-r6,sp}
+        stmib     r2, {r1,r3-r6,sp}
+        stmda     r2, {r1,r3-r6,sp}
+        stmdb     r2, {r1,r3-r6,sp}
+
+@ CHECK: ldmia r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xb2,0xe8]
+@ CHECK: ldmib r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xb2,0xe9]
+@ CHECK: ldmda r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x32,0xe8]
+@ CHECK: ldmdb r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x32,0xe9]
+        ldmia     r2!, {r1,r3-r6,sp}
+        ldmib     r2!, {r1,r3-r6,sp}
+        ldmda     r2!, {r1,r3-r6,sp}
+        ldmdb     r2!, {r1,r3-r6,sp}
+
+@ CHECK: stmia r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xa2,0xe8]
+@ CHECK: stmib r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xa2,0xe9]
+@ CHECK: stmda r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x22,0xe8]
+@ CHECK: stmdb r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x22,0xe9]
+        stmia     r2!, {r1,r3-r6,sp}
+        stmib     r2!, {r1,r3-r6,sp}
+        stmda     r2!, {r1,r3-r6,sp}
+        stmdb     r2!, {r1,r3-r6,sp}
+
+@ CHECK: and	r1, r2, r3 @ encoding: [0x03,0x10,0x02,0xe0]
+        and r1,r2,r3
+
+@ FIXME: This is wrong, we are dropping the 's' for now.
+@ CHECK-FIXME: ands	r1, r2, r3 @ encoding: [0x03,0x10,0x12,0xe0]
+        ands r1,r2,r3
+
+@ CHECK: eor	r1, r2, r3 @ encoding: [0x03,0x10,0x22,0xe0]
+        eor r1,r2,r3
+
+@ FIXME: This is wrong, we are dropping the 's' for now.
+@ CHECK-FIXME: eors	r1, r2, r3 @ encoding: [0x03,0x10,0x32,0xe0]
+        eors r1,r2,r3
+
+@ CHECK: sub	r1, r2, r3 @ encoding: [0x03,0x10,0x42,0xe0]
+        sub r1,r2,r3
+
+@ FIXME: This is wrong, we are dropping the 's' for now.
+@ CHECK-FIXME: subs	r1, r2, r3 @ encoding: [0x03,0x10,0x52,0xe0]
+        subs r1,r2,r3
+
+@ CHECK: add	r1, r2, r3 @ encoding: [0x03,0x10,0x82,0xe0]
+        add r1,r2,r3
+
+@ FIXME: This is wrong, we are dropping the 's' for now.
+@ CHECK-FIXME: adds	r1, r2, r3 @ encoding: [0x03,0x10,0x92,0xe0]
+        adds r1,r2,r3
+
+@ CHECK: adc	r1, r2, r3 @ encoding: [0x03,0x10,0xa2,0xe0]
+        adc r1,r2,r3
+
+@ CHECK: sbc	r1, r2, r3 @ encoding: [0x03,0x10,0xc2,0xe0]
+        sbc r1,r2,r3
+
+@ CHECK: orr	r1, r2, r3 @ encoding: [0x03,0x10,0x82,0xe1]
+        orr r1,r2,r3
+
+@ FIXME: This is wrong, we are dropping the 's' for now.
+@ CHECK-FIXME: orrs	r1, r2, r3 @ encoding: [0x03,0x10,0x92,0xe1]
+        orrs r1,r2,r3
+
+@ CHECK: bic	r1, r2, r3 @ encoding: [0x03,0x10,0xc2,0xe1]
+        bic r1,r2,r3
+
+@ FIXME: This is wrong, we are dropping the 's' for now.
+@ CHECK-FIXME: bics	r1, r2, r3 @ encoding: [0x03,0x10,0xd2,0xe1]
+        bics r1,r2,r3
+
+@ CHECK: mov	r1, r2 @ encoding: [0x02,0x10,0xa0,0xe1]
+        mov r1,r2
+
+@ CHECK: mvn	r1, r2 @ encoding: [0x02,0x10,0xe0,0xe1]
+        mvn r1,r2
+
+@ FIXME: This is wrong, we are dropping the 's' for now.
+@ CHECK-FIXME: mvns	r1, r2 @ encoding: [0x02,0x10,0xf0,0xe1]
+        mvns r1,r2
+
+@ CHECK: rsb	r1, r2, r3 @ encoding: [0x03,0x10,0x62,0xe0]
+        rsb r1,r2,r3
+
+@ CHECK: rsc	r1, r2, r3 @ encoding: [0x03,0x10,0xe2,0xe0]
+        rsc r1,r2,r3
+
+@ FIXME: This is broken, CCOut operands don't work correctly when their presence
+@ may depend on flags.
+@ CHECK-FIXME:: mlas	r1, r2, r3, r4 @ encoding: [0x92,0x43,0x31,0xe0]
+@        mlas r1,r2,r3,r4
+
+@ CHECK: bfi  r0, r0, #5, #7 @ encoding: [0x90,0x02,0xcb,0xe7]
+        bfi  r0, r0, #5, #7
+
+@ CHECK: bkpt  #10 @ encoding: [0x7a,0x00,0x20,0xe1]
+        bkpt  #10
+
+@ CHECK: isb @ encoding: [0x6f,0xf0,0x7f,0xf5]
+        isb
+@ CHECK: mrs  r8, cpsr @ encoding: [0x00,0x80,0x0f,0xe1]
+        mrs  r8, cpsr
+
+@ CHECK: mcr  p7, #1, r5, c1, c1, #4 @ encoding: [0x91,0x57,0x21,0xee]
+        mcr  p7, #1, r5, c1, c1, #4
+@ CHECK: mrc  p14, #0, r1, c1, c2, #4 @ encoding: [0x92,0x1e,0x11,0xee]
+        mrc  p14, #0, r1, c1, c2, #4
+@ CHECK: mcrr  p7, #1, r5, r4, c1 @ encoding: [0x11,0x57,0x44,0xec]
+        mcrr  p7, #1, r5, r4, c1
+@ CHECK: mrrc  p7, #1, r5, r4, c1 @ encoding: [0x11,0x57,0x54,0xec]
+        mrrc  p7, #1, r5, r4, c1
+
+@ CHECK: mcr2  p7, #1, r5, c1, c1, #4 @ encoding: [0x91,0x57,0x21,0xfe]
+        mcr2  p7, #1, r5, c1, c1, #4
+@ CHECK: mrc2  p14, #0, r1, c1, c2, #4 @ encoding: [0x92,0x1e,0x11,0xfe]
+        mrc2  p14, #0, r1, c1, c2, #4
+@ CHECK: mcrr2  p7, #1, r5, r4, c1 @ encoding: [0x11,0x57,0x44,0xfc]
+        mcrr2  p7, #1, r5, r4, c1
+@ CHECK: mrrc2  p7, #1, r5, r4, c1 @ encoding: [0x11,0x57,0x54,0xfc]
+        mrrc2  p7, #1, r5, r4, c1
+
+@ CHECK: cdp  p7, #1, c1, c1, c1, #4 @ encoding: [0x81,0x17,0x11,0xee]
+        cdp  p7, #1, c1, c1, c1, #4
+@ CHECK: cdp2  p7, #1, c1, c1, c1, #4 @ encoding: [0x81,0x17,0x11,0xfe]
+        cdp2  p7, #1, c1, c1, c1, #4
+
+@ CHECK: clrex @ encoding: [0x1f,0xf0,0x7f,0xf5]
+        clrex
+
+@ CHECK: clz  r9, r0 @ encoding: [0x10,0x9f,0x6f,0xe1]
+        clz  r9, r0
+
+@ CHECK: qadd  r1, r2, r3 @ encoding: [0x52,0x10,0x03,0xe1]
+        qadd  r1, r2, r3
+
+@ CHECK: qsub  r1, r2, r3 @ encoding: [0x52,0x10,0x23,0xe1]
+        qsub  r1, r2, r3
+
+@ CHECK: qdadd  r1, r2, r3 @ encoding: [0x52,0x10,0x43,0xe1]
+        qdadd  r1, r2, r3
+
+@ CHECK: qdsub  r1, r2, r3 @ encoding: [0x52,0x10,0x63,0xe1]
+        qdsub  r1, r2, r3
+
+@ CHECK: wfe @ encoding: [0x02,0xf0,0x20,0xe3]
+        wfe
+
+@ CHECK: wfi @ encoding: [0x03,0xf0,0x20,0xe3]
+        wfi
+
+@ CHECK: yield @ encoding: [0x01,0xf0,0x20,0xe3]
+        yield
+
+@ CHECK: nop @ encoding: [0x00,0xf0,0x20,0xe3]
+        nop
+
+@ CHECK: dmb  sy @ encoding: [0x5f,0xf0,0x7f,0xf5]
+        dmb  sy
+
+@ CHECK: dmb  st @ encoding: [0x5e,0xf0,0x7f,0xf5]
+        dmb  st
+
+@ CHECK: dmb  ish @ encoding: [0x5b,0xf0,0x7f,0xf5]
+        dmb  ish
+
+@ CHECK: dmb  ishst @ encoding: [0x5a,0xf0,0x7f,0xf5]
+        dmb  ishst
+
+@ CHECK: dmb  nsh @ encoding: [0x57,0xf0,0x7f,0xf5]
+        dmb  nsh
+
+@ CHECK: dmb  nshst @ encoding: [0x56,0xf0,0x7f,0xf5]
+        dmb  nshst
+
+@ CHECK: dmb  osh @ encoding: [0x53,0xf0,0x7f,0xf5]
+        dmb  osh
+
+@ CHECK: dmb  oshst @ encoding: [0x52,0xf0,0x7f,0xf5]
+        dmb  oshst
+
+@ CHECK: dsb  sy @ encoding: [0x4f,0xf0,0x7f,0xf5]
+        dsb  sy
+
+@ CHECK: dsb  st @ encoding: [0x4e,0xf0,0x7f,0xf5]
+        dsb  st
+
+@ CHECK: dsb  ish @ encoding: [0x4b,0xf0,0x7f,0xf5]
+        dsb  ish
+
+@ CHECK: dsb  ishst @ encoding: [0x4a,0xf0,0x7f,0xf5]
+        dsb  ishst
+
+@ CHECK: dsb  nsh @ encoding: [0x47,0xf0,0x7f,0xf5]
+        dsb  nsh
+
+@ CHECK: dsb  nshst @ encoding: [0x46,0xf0,0x7f,0xf5]
+        dsb  nshst
+
+@ CHECK: dsb  osh @ encoding: [0x43,0xf0,0x7f,0xf5]
+        dsb  osh
+
+@ CHECK: dsb  oshst @ encoding: [0x42,0xf0,0x7f,0xf5]
+        dsb  oshst
+
+@ CHECK: cpsie  aif @ encoding: [0xc0,0x01,0x08,0xf1]
+        cpsie  aif
+
+@ CHECK: cps  #15 @ encoding: [0x0f,0x00,0x02,0xf1]
+        cps  #15
+
+@ CHECK: cpsie  if, #10 @ encoding: [0xca,0x00,0x0a,0xf1]
+        cpsie  if, #10
+
+@ CHECK: msr  cpsr_fc, r0 @ encoding: [0x00,0xf0,0x29,0xe1]
+        msr  apsr, r0
+
+@ CHECK: msr  cpsr_s, r0 @ encoding: [0x00,0xf0,0x24,0xe1]
+        msr  apsr_g, r0
+
+@ CHECK: msr  cpsr_f, r0 @ encoding: [0x00,0xf0,0x28,0xe1]
+        msr  apsr_nzcvq, r0
+
+@ CHECK: msr  cpsr_fs, r0 @ encoding: [0x00,0xf0,0x2c,0xe1]
+        msr  apsr_nzcvqg, r0
+
+@ CHECK: msr  cpsr_fc, r0 @ encoding: [0x00,0xf0,0x29,0xe1]
+        msr  cpsr_fc, r0
+
+@ CHECK: msr  cpsr_c, r0 @ encoding: [0x00,0xf0,0x21,0xe1]
+        msr  cpsr_c, r0
+
+@ CHECK: msr  cpsr_x, r0 @ encoding: [0x00,0xf0,0x22,0xe1]
+        msr  cpsr_x, r0
+
+@ CHECK: msr  cpsr_fc, r0 @ encoding: [0x00,0xf0,0x29,0xe1]
+        msr  cpsr_fc, r0
+
+@ CHECK: msr  cpsr_fsx, r0 @ encoding: [0x00,0xf0,0x2e,0xe1]
+        msr  cpsr_fsx, r0
+
+@ CHECK: msr  spsr_fc, r0 @ encoding: [0x00,0xf0,0x69,0xe1]
+        msr  spsr_fc, r0
+
+@ CHECK: msr  spsr_fsxc, r0 @ encoding: [0x00,0xf0,0x6f,0xe1]
+        msr  spsr_fsxc, r0
+
+@ CHECK: msr  cpsr_fsxc, r0 @ encoding: [0x00,0xf0,0x2f,0xe1]
+        msr  cpsr_fsxc, r0
+
diff --git a/final/test/MC/ARM/arm_word_directive.s b/final/test/MC/ARM/arm_word_directive.s
new file mode 100644
index 00000000000..e782479b608
--- /dev/null
+++ b/final/test/MC/ARM/arm_word_directive.s
@@ -0,0 +1,6 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown %s | FileCheck %s
+
+@ CHECK: TEST0:
+@ CHECK: .long 3
+TEST0:  
+        .word 3
diff --git a/final/test/MC/ARM/bracket-darwin.s b/final/test/MC/ARM/bracket-darwin.s
new file mode 100644
index 00000000000..dc8b3485755
--- /dev/null
+++ b/final/test/MC/ARM/bracket-darwin.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple arm-apple-darwin %s 2> %t
+// RUN: FileCheck -input-file %t %s
+
+// CHECK: error: brackets expression not supported on this target
+.byte	[4-3]
diff --git a/final/test/MC/ARM/bracket-exprs.s b/final/test/MC/ARM/bracket-exprs.s
new file mode 100644
index 00000000000..922bf703701
--- /dev/null
+++ b/final/test/MC/ARM/bracket-exprs.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -triple arm-unknown-linux %s | FileCheck %s
+
+// CHECK: .byte 1
+.if [~0 >> 1] == -1
+.byte 1
+.else
+.byte 2
+.endif
+
+// CHECK: .byte 3
+.if 4 * [4 + (3 + [2 * 2] + 1)] == 48
+.byte 3
+.else
+.byte 4
+.endif
diff --git a/final/test/MC/ARM/darwin-ARM-reloc.s b/final/test/MC/ARM/darwin-ARM-reloc.s
new file mode 100644
index 00000000000..86b45e07bf3
--- /dev/null
+++ b/final/test/MC/ARM/darwin-ARM-reloc.s
@@ -0,0 +1,171 @@
+@ RUN: llvm-mc -n -triple armv7-apple-darwin10 %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck < %t.dump %s
+
+	.syntax unified
+        .text
+_f0:
+        bl _printf
+
+_f1:
+        bl _f0
+
+        .data
+_d0:
+Ld0_0:  
+        .long Lsc0_0 - Ld0_0
+        
+	.section	__TEXT,__cstring,cstring_literals
+Lsc0_0:
+        .long 0
+
+@ CHECK: ('cputype', 12)
+@ CHECK: ('cpusubtype', 9)
+@ CHECK: ('filetype', 1)
+@ CHECK: ('num_load_commands', 3)
+@ CHECK: ('load_commands_size', 364)
+@ CHECK: ('flag', 0)
+@ CHECK: ('load_commands', [
+@ CHECK:   # Load Command 0
+@ CHECK:  (('command', 1)
+@ CHECK:   ('size', 260)
+@ CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:   ('vm_addr', 0)
+@ CHECK:   ('vm_size', 16)
+@ CHECK:   ('file_offset', 392)
+@ CHECK:   ('file_size', 16)
+@ CHECK:   ('maxprot', 7)
+@ CHECK:   ('initprot', 7)
+@ CHECK:   ('num_sections', 3)
+@ CHECK:   ('flags', 0)
+@ CHECK:   ('sections', [
+@ CHECK:     # Section 0
+@ CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('address', 0)
+@ CHECK:     ('size', 8)
+@ CHECK:     ('offset', 392)
+@ CHECK:     ('alignment', 0)
+@ CHECK:     ('reloc_offset', 408)
+@ CHECK:     ('num_reloc', 2)
+@ CHECK:     ('flags', 0x80000400)
+@ CHECK:     ('reserved1', 0)
+@ CHECK:     ('reserved2', 0)
+@ CHECK:    ),
+@ CHECK:   ('_relocations', [
+@ CHECK:     # Relocation 0
+@ CHECK:     (('word-0', 0x4),
+@ CHECK:      ('word-1', 0x55000001)),
+@ CHECK:     # Relocation 1
+@ CHECK:     (('word-0', 0x0),
+@ CHECK:      ('word-1', 0x5d000003)),
+@ CHECK:   ])
+@ CHECK:   ('_section_data', 'feffffeb fdffffeb')
+@ CHECK:     # Section 1
+@ CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('address', 8)
+@ CHECK:     ('size', 4)
+@ CHECK:     ('offset', 400)
+@ CHECK:     ('alignment', 0)
+@ CHECK:     ('reloc_offset', 424)
+@ CHECK:     ('num_reloc', 2)
+@ CHECK:     ('flags', 0x0)
+@ CHECK:     ('reserved1', 0)
+@ CHECK:     ('reserved2', 0)
+@ CHECK:    ),
+@ CHECK:   ('_relocations', [
+@ CHECK:     # Relocation 0
+@ CHECK:     (('word-0', 0xa2000000),
+@ CHECK:      ('word-1', 0xc)),
+@ CHECK:     # Relocation 1
+@ CHECK:     (('word-0', 0xa1000000),
+@ CHECK:      ('word-1', 0x8)),
+@ CHECK:   ])
+@ CHECK:   ('_section_data', '04000000')
+@ CHECK:     # Section 2
+@ CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('address', 12)
+@ CHECK:     ('size', 4)
+@ CHECK:     ('offset', 404)
+@ CHECK:     ('alignment', 0)
+@ CHECK:     ('reloc_offset', 0)
+@ CHECK:     ('num_reloc', 0)
+@ CHECK:     ('flags', 0x2)
+@ CHECK:     ('reserved1', 0)
+@ CHECK:     ('reserved2', 0)
+@ CHECK:    ),
+@ CHECK:   ('_relocations', [
+@ CHECK:   ])
+@ CHECK:   ('_section_data', '00000000')
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK:   # Load Command 1
+@ CHECK:  (('command', 2)
+@ CHECK:   ('size', 24)
+@ CHECK:   ('symoff', 440)
+@ CHECK:   ('nsyms', 4)
+@ CHECK:   ('stroff', 488)
+@ CHECK:   ('strsize', 24)
+@ CHECK:   ('_string_data', '\x00_printf\x00_f0\x00_f1\x00_d0\x00\x00\x00\x00')
+@ CHECK:   ('_symbols', [
+@ CHECK:     # Symbol 0
+@ CHECK:    (('n_strx', 9)
+@ CHECK:     ('n_type', 0xe)
+@ CHECK:     ('n_sect', 1)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 0)
+@ CHECK:     ('_string', '_f0')
+@ CHECK:    ),
+@ CHECK:     # Symbol 1
+@ CHECK:    (('n_strx', 13)
+@ CHECK:     ('n_type', 0xe)
+@ CHECK:     ('n_sect', 1)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 4)
+@ CHECK:     ('_string', '_f1')
+@ CHECK:    ),
+@ CHECK:     # Symbol 2
+@ CHECK:    (('n_strx', 17)
+@ CHECK:     ('n_type', 0xe)
+@ CHECK:     ('n_sect', 2)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 8)
+@ CHECK:     ('_string', '_d0')
+@ CHECK:    ),
+@ CHECK:     # Symbol 3
+@ CHECK:    (('n_strx', 1)
+@ CHECK:     ('n_type', 0x1)
+@ CHECK:     ('n_sect', 0)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 0)
+@ CHECK:     ('_string', '_printf')
+@ CHECK:    ),
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK:   # Load Command 2
+@ CHECK:  (('command', 11)
+@ CHECK:   ('size', 80)
+@ CHECK:   ('ilocalsym', 0)
+@ CHECK:   ('nlocalsym', 3)
+@ CHECK:   ('iextdefsym', 3)
+@ CHECK:   ('nextdefsym', 0)
+@ CHECK:   ('iundefsym', 3)
+@ CHECK:   ('nundefsym', 1)
+@ CHECK:   ('tocoff', 0)
+@ CHECK:   ('ntoc', 0)
+@ CHECK:   ('modtaboff', 0)
+@ CHECK:   ('nmodtab', 0)
+@ CHECK:   ('extrefsymoff', 0)
+@ CHECK:   ('nextrefsyms', 0)
+@ CHECK:   ('indirectsymoff', 0)
+@ CHECK:   ('nindirectsyms', 0)
+@ CHECK:   ('extreloff', 0)
+@ CHECK:   ('nextrel', 0)
+@ CHECK:   ('locreloff', 0)
+@ CHECK:   ('nlocrel', 0)
+@ CHECK:   ('_indirect_symbols', [
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK: ])
diff --git a/final/test/MC/ARM/darwin-Thumb-reloc.s b/final/test/MC/ARM/darwin-Thumb-reloc.s
new file mode 100644
index 00000000000..567573d9ef1
--- /dev/null
+++ b/final/test/MC/ARM/darwin-Thumb-reloc.s
@@ -0,0 +1,139 @@
+@ RUN: llvm-mc -n -triple thumbv7-apple-darwin10 %s -filetype=obj -o %t.obj
+@ RUN: macho-dump --dump-section-data < %t.obj > %t.dump
+@ RUN: FileCheck < %t.dump %s
+
+	.syntax unified
+	.section	__TEXT,__text,regular,pure_instructions
+	.globl	_main
+	.align	2
+	.code	16
+	.thumb_func	_main
+_main:
+LPC0_0:
+	blx	_printf
+	.align	2
+LCPI0_0:
+	.long	L_.str-(LPC0_0+4)
+
+	.section	__TEXT,__cstring,cstring_literals
+	.align	2
+L_.str:
+	.asciz	 "s0"
+
+.subsections_via_symbols
+
+@ CHECK: ('cputype', 12)
+@ CHECK: ('cpusubtype', 9)
+@ CHECK: ('filetype', 1)
+@ CHECK: ('num_load_commands', 3)
+@ CHECK: ('load_commands_size', 296)
+@ CHECK: ('flag', 8192)
+@ CHECK: ('load_commands', [
+@ CHECK:   # Load Command 0
+@ CHECK:  (('command', 1)
+@ CHECK:   ('size', 192)
+@ CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:   ('vm_addr', 0)
+@ CHECK:   ('vm_size', 11)
+@ CHECK:   ('file_offset', 324)
+@ CHECK:   ('file_size', 11)
+@ CHECK:   ('maxprot', 7)
+@ CHECK:   ('initprot', 7)
+@ CHECK:   ('num_sections', 2)
+@ CHECK:   ('flags', 0)
+@ CHECK:   ('sections', [
+@ CHECK:     # Section 0
+@ CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('address', 0)
+@ CHECK:     ('size', 8)
+@ CHECK:     ('offset', 324)
+@ CHECK:     ('alignment', 2)
+@ CHECK:     ('reloc_offset', 336)
+@ CHECK:     ('num_reloc', 3)
+@ CHECK:     ('flags', 0x80000400)
+@ CHECK:     ('reserved1', 0)
+@ CHECK:     ('reserved2', 0)
+@ CHECK:    ),
+@ CHECK:   ('_relocations', [
+@ CHECK:     # Relocation 0
+@ CHECK:     (('word-0', 0xa2000004),
+@ CHECK:      ('word-1', 0x8)),
+@ CHECK:     # Relocation 1
+@ CHECK:     (('word-0', 0xa1000000),
+@ CHECK:      ('word-1', 0x0)),
+@ CHECK:     # Relocation 2
+@ CHECK:     (('word-0', 0x0),
+@ CHECK:      ('word-1', 0x6d000001)),
+@ CHECK:   ])
+@ CHECK-FIXME:   ('_section_data', 'fff7feef 04000000')
+@ CHECK:     # Section 1
+@ CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+@ CHECK:     ('address', 8)
+@ CHECK:     ('size', 3)
+@ CHECK:     ('offset', 332)
+@ CHECK:     ('alignment', 2)
+@ CHECK:     ('reloc_offset', 0)
+@ CHECK:     ('num_reloc', 0)
+@ CHECK:     ('flags', 0x2)
+@ CHECK:     ('reserved1', 0)
+@ CHECK:     ('reserved2', 0)
+@ CHECK:    ),
+@ CHECK:   ('_relocations', [
+@ CHECK:   ])
+@ CHECK:   ('_section_data', '733000')
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK:   # Load Command 1
+@ CHECK:  (('command', 2)
+@ CHECK:   ('size', 24)
+@ CHECK:   ('symoff', 360)
+@ CHECK:   ('nsyms', 2)
+@ CHECK:   ('stroff', 384)
+@ CHECK:   ('strsize', 16)
+@ CHECK:   ('_string_data', '\x00_main\x00_printf\x00\x00')
+@ CHECK:   ('_symbols', [
+@ CHECK:     # Symbol 0
+@ CHECK:    (('n_strx', 1)
+@ CHECK:     ('n_type', 0xf)
+@ CHECK:     ('n_sect', 1)
+@ CHECK:     ('n_desc', 8)
+@ CHECK:     ('n_value', 0)
+@ CHECK:     ('_string', '_main')
+@ CHECK:    ),
+@ CHECK:     # Symbol 1
+@ CHECK:    (('n_strx', 7)
+@ CHECK:     ('n_type', 0x1)
+@ CHECK:     ('n_sect', 0)
+@ CHECK:     ('n_desc', 0)
+@ CHECK:     ('n_value', 0)
+@ CHECK:     ('_string', '_printf')
+@ CHECK:    ),
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK:   # Load Command 2
+@ CHECK:  (('command', 11)
+@ CHECK:   ('size', 80)
+@ CHECK:   ('ilocalsym', 0)
+@ CHECK:   ('nlocalsym', 0)
+@ CHECK:   ('iextdefsym', 0)
+@ CHECK:   ('nextdefsym', 1)
+@ CHECK:   ('iundefsym', 1)
+@ CHECK:   ('nundefsym', 1)
+@ CHECK:   ('tocoff', 0)
+@ CHECK:   ('ntoc', 0)
+@ CHECK:   ('modtaboff', 0)
+@ CHECK:   ('nmodtab', 0)
+@ CHECK:   ('extrefsymoff', 0)
+@ CHECK:   ('nextrefsyms', 0)
+@ CHECK:   ('indirectsymoff', 0)
+@ CHECK:   ('nindirectsyms', 0)
+@ CHECK:   ('extreloff', 0)
+@ CHECK:   ('nextrel', 0)
+@ CHECK:   ('locreloff', 0)
+@ CHECK:   ('nlocrel', 0)
+@ CHECK:   ('_indirect_symbols', [
+@ CHECK:   ])
+@ CHECK:  ),
+@ CHECK: ])
diff --git a/final/test/MC/ARM/dg.exp b/final/test/MC/ARM/dg.exp
new file mode 100644
index 00000000000..055fa2507d3
--- /dev/null
+++ b/final/test/MC/ARM/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target ARM] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
+}
diff --git a/final/test/MC/ARM/elf-eflags-eabi.s b/final/test/MC/ARM/elf-eflags-eabi.s
new file mode 100644
index 00000000000..ea89eacf74f
--- /dev/null
+++ b/final/test/MC/ARM/elf-eflags-eabi.s
@@ -0,0 +1,13 @@
+@ RUN: llvm-mc %s -triple=armv7-linux-gnueabi -filetype=obj -o - | \
+@ RUN:    elf-dump --dump-section-data  | FileCheck -check-prefix=OBJ %s
+	.syntax unified
+	.text
+	.globl	barf
+	.align	2
+	.type	barf,%function
+barf:                                   @ @barf
+@ BB#0:                                 @ %entry
+        b foo
+
+@@@ make sure the EF_ARM_EABIMASK comes out OK
+@OBJ:    'e_flags', 0x05000000
diff --git a/final/test/MC/ARM/elf-movt.s b/final/test/MC/ARM/elf-movt.s
new file mode 100644
index 00000000000..0fe7c50a313
--- /dev/null
+++ b/final/test/MC/ARM/elf-movt.s
@@ -0,0 +1,39 @@
+@ RUN: llvm-mc %s -triple=armv7-linux-gnueabi | FileCheck -check-prefix=ASM %s
+@ RUN: llvm-mc %s -triple=armv7-linux-gnueabi -filetype=obj -o - | \
+@ RUN:    elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s
+	.syntax unified
+	.text
+	.globl	barf
+	.align	2
+	.type	barf,%function
+barf:                                   @ @barf
+@ BB#0:                                 @ %entry
+	movw	r0, :lower16:GOT-(.LPC0_2+8)
+	movt	r0, :upper16:GOT-(.LPC0_2+16)
+.LPC0_2:
+@ ASM:          movw    r0, :lower16:(GOT-(.LPC0_2+8))
+@ ASM-NEXT:     movt    r0, :upper16:(GOT-(.LPC0_2+16))
+
+@@ make sure that the text section fixups are sane too
+@ OBJ:                 '.text'
+@ OBJ-NEXT:            'sh_type', 0x00000001
+@ OBJ-NEXT:            'sh_flags', 0x00000006
+@ OBJ-NEXT:            'sh_addr', 0x00000000
+@ OBJ-NEXT:            'sh_offset', 0x00000034
+@ OBJ-NEXT:            'sh_size', 0x00000008
+@ OBJ-NEXT:            'sh_link', 0x00000000
+@ OBJ-NEXT:            'sh_info', 0x00000000
+@ OBJ-NEXT:            'sh_addralign', 0x00000004
+@ OBJ-NEXT:            'sh_entsize', 0x00000000
+@ OBJ-NEXT:            '_section_data', 'f00f0fe3 ff0f4fe3'
+
+@ OBJ:              Relocation 0x00000000
+@ OBJ-NEXT:         'r_offset', 0x00000000
+@ OBJ-NEXT:         'r_sym'
+@ OBJ-NEXT:         'r_type', 0x0000002d
+
+@ OBJ:              Relocation 0x00000001
+@ OBJ-NEXT:         'r_offset', 0x00000004
+@ OBJ-NEXT:         'r_sym'
+@ OBJ-NEXT:         'r_type', 0x0000002e
+
diff --git a/final/test/MC/ARM/elf-reloc-01.ll b/final/test/MC/ARM/elf-reloc-01.ll
new file mode 100644
index 00000000000..6b83c95032c
--- /dev/null
+++ b/final/test/MC/ARM/elf-reloc-01.ll
@@ -0,0 +1,71 @@
+;; RUN: llc -mtriple=armv7-linux-gnueabi -O3  \
+;; RUN:    -mcpu=cortex-a8 -mattr=-neon -mattr=+vfp2  -arm-reserve-r9  \
+;; RUN:    -filetype=obj %s -o - | \
+;; RUN:   elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s
+
+;; FIXME: This file needs to be in .s form!
+;; The args to llc are there to constrain the codegen only.
+;; 
+;; Ensure no regression on ARM/gcc compatibility for 
+;; emitting explicit symbol relocs for nonexternal symbols 
+;; versus section symbol relocs (with offset) - 
+;;
+;; Default llvm behavior is to emit as section symbol relocs nearly
+;; everything that is not an undefined external. Unfortunately, this 
+;; diverges from what codesourcery ARM/gcc does!
+;;
+;; Tests that reloc to _MergedGlobals show up as explicit symbol reloc
+
+
+target triple = "armv7-none-linux-gnueabi"
+
+@var_tls = thread_local global i32 1
+@var_tls_double = thread_local global double 1.000000e+00
+@var_static = internal global i32 1
+@var_static_double = internal global double 1.000000e+00
+@var_global = global i32 1
+@var_global_double = global double 1.000000e+00
+
+declare i32 @mystrlen(i8* nocapture %s) nounwind  
+
+declare void @myhextochar(i32 %n, i8* nocapture %buffer)
+
+declare void @__aeabi_read_tp() nounwind 
+
+declare void @__nacl_read_tp() nounwind  
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+  switch i32 %argc, label %bb3 [
+    i32 555, label %bb
+    i32 6666, label %bb2
+  ]
+
+bb:                                               ; preds = %entry
+  volatile store i32 11, i32* @var_tls, align 4
+  volatile store double 2.200000e+01, double* @var_tls_double, align 8
+  volatile store i32 33, i32* @var_static, align 4
+  volatile store double 4.400000e+01, double* @var_static_double, align 8
+  volatile store i32 55, i32* @var_global, align 4
+  volatile store double 6.600000e+01, double* @var_global_double, align 8
+  br label %bb3
+
+bb2:                                              ; preds = %entry
+  ret i32 add (i32 add (i32 add (i32 ptrtoint (i32* @var_tls to i32), i32 add (i32 ptrtoint (i32* @var_static to i32), i32 ptrtoint (i32* @var_global to i32))), i32 ptrtoint (double* @var_tls_double to i32)), i32 add (i32 ptrtoint (double* @var_static_double to i32), i32 ptrtoint (double* @var_global_double to i32)))
+
+bb3:                                              ; preds = %bb, %entry
+  tail call void @exit(i32 55) noreturn nounwind
+  unreachable
+}
+
+declare void @exit(i32) noreturn nounwind
+
+
+;; OBJ:         Symbol 0x00000002
+;; OBJ-NEXT:    '_MergedGlobals'
+;; OBJ-NEXT:    'st_value', 0x00000010
+
+;; OBJ:          Relocation 0x00000001
+;; OBJ-NEXT:     'r_offset', 
+;; OBJ-NEXT:     'r_sym', 0x00000002
+;; OBJ-NEXT:     'r_type', 0x0000002b
diff --git a/final/test/MC/ARM/elf-reloc-02.ll b/final/test/MC/ARM/elf-reloc-02.ll
new file mode 100644
index 00000000000..132a47758da
--- /dev/null
+++ b/final/test/MC/ARM/elf-reloc-02.ll
@@ -0,0 +1,51 @@
+;; RUN: llc -mtriple=armv7-linux-gnueabi -O3  \
+;; RUN:    -mcpu=cortex-a8 -mattr=-neon -mattr=+vfp2  -arm-reserve-r9  \
+;; RUN:    -filetype=obj %s -o - | \
+;; RUN:   elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s
+
+;; FIXME: This file needs to be in .s form!
+;; The args to llc are there to constrain the codegen only.
+;; 
+;; Ensure no regression on ARM/gcc compatibility for 
+;; emitting explicit symbol relocs for nonexternal symbols 
+;; versus section symbol relocs (with offset) - 
+;;
+;; Default llvm behavior is to emit as section symbol relocs nearly
+;; everything that is not an undefined external. Unfortunately, this 
+;; diverges from what codesourcery ARM/gcc does!
+;;
+;; Tests that reloc to .L.str* show up as explicit symbols
+
+target triple = "armv7-none-linux-gnueabi"
+
+@.str = private constant [7 x i8] c"@null\0A\00", align 4
+@.str1 = private constant [8 x i8] c"@write\0A\00", align 4
+@.str2 = private constant [13 x i8] c"hello worldn\00", align 4
+@.str3 = private constant [7 x i8] c"@exit\0A\00", align 4
+
+declare i32 @mystrlen(i8* nocapture %s) nounwind readonly 
+
+declare void @myhextochar(i32 %n, i8* nocapture %buffer) nounwind 
+
+define i32 @main() nounwind {
+entry:
+  %0 = tail call i32 (...)* @write(i32 1, i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), i32 6) nounwind
+  %1 = tail call i32 (...)* @write(i32 1, i8* getelementptr inbounds ([8 x i8]* @.str1, i32 0, i32 0), i32 7) nounwind
+  %2 = tail call i32 (...)* @write(i32 1, i8* getelementptr inbounds ([13 x i8]* @.str2, i32 0, i32 0), i32 12) nounwind
+  %3 = tail call i32 (...)* @write(i32 1, i8* getelementptr inbounds ([7 x i8]* @.str3, i32 0, i32 0), i32 6) nounwind
+  tail call void @exit(i32 55) noreturn nounwind
+  unreachable
+}
+
+declare i32 @write(...)
+
+declare void @exit(i32) noreturn nounwind
+
+
+;; OBJ:          Symbol 0x00000002
+;; OBJ-NEXT:    '.L.str'
+
+;; OBJ:        Relocation 0x00000000
+;; OBJ-NEXT:    'r_offset', 
+;; OBJ-NEXT:    'r_sym', 0x00000002
+;; OBJ-NEXT:    'r_type', 0x0000002b
diff --git a/final/test/MC/ARM/elf-reloc-03.ll b/final/test/MC/ARM/elf-reloc-03.ll
new file mode 100644
index 00000000000..e052f39a615
--- /dev/null
+++ b/final/test/MC/ARM/elf-reloc-03.ll
@@ -0,0 +1,98 @@
+;; RUN: llc -mtriple=armv7-linux-gnueabi -O3  \
+;; RUN:    -mcpu=cortex-a8 -mattr=-neon -mattr=+vfp2  -arm-reserve-r9  \
+;; RUN:    -filetype=obj %s -o - | \
+;; RUN:   elf-dump --dump-section-data | FileCheck -check-prefix=OBJ %s
+
+;; FIXME: This file needs to be in .s form!
+;; The args to llc are there to constrain the codegen only.
+;; 
+;; Ensure no regression on ARM/gcc compatibility for 
+;; emitting explicit symbol relocs for nonexternal symbols 
+;; versus section symbol relocs (with offset) - 
+;;
+;; Default llvm behavior is to emit as section symbol relocs nearly
+;; everything that is not an undefined external. Unfortunately, this 
+;; diverges from what codesourcery ARM/gcc does!
+;;
+;; Verifies that internal constants appear as explict symbol relocs
+
+
+target triple = "armv7-none-linux-gnueabi"
+
+@startval = global i32 5
+@vtable = internal constant [10 x i32 (...)*] [i32 (...)* bitcast (i32 ()* @foo0 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo1 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo2 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo3 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo4 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo5 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo6 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo7 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo8 to i32 (...)*), i32 (...)* bitcast (i32 ()* @foo9 to i32 (...)*)]
+
+declare i32 @mystrlen(i8* nocapture %s) nounwind readonly 
+
+declare void @myhextochar(i32 %n, i8* nocapture %buffer) nounwind 
+
+define internal i32 @foo0() nounwind readnone {
+entry:
+  ret i32 0
+}
+
+define internal i32 @foo1() nounwind readnone {
+entry:
+  ret i32 1
+}
+
+define internal i32 @foo2() nounwind readnone {
+entry:
+  ret i32 2
+}
+
+define internal i32 @foo3() nounwind readnone {
+entry:
+  ret i32 3
+}
+
+define internal i32 @foo4() nounwind readnone {
+entry:
+  ret i32 4
+}
+
+define internal i32 @foo5() nounwind readnone {
+entry:
+  ret i32 55
+}
+
+define internal i32 @foo6() nounwind readnone {
+entry:
+  ret i32 6
+}
+
+define internal i32 @foo7() nounwind readnone {
+entry:
+  ret i32 7
+}
+
+define internal i32 @foo8() nounwind readnone {
+entry:
+  ret i32 8
+}
+
+define internal i32 @foo9() nounwind readnone {
+entry:
+  ret i32 9
+}
+
+define i32 @main() nounwind {
+entry:
+  %0 = load i32* @startval, align 4
+  %1 = getelementptr inbounds [10 x i32 (...)*]* @vtable, i32 0, i32 %0
+  %2 = load i32 (...)** %1, align 4
+  %3 = tail call i32 (...)* %2() nounwind
+  tail call void @exit(i32 %3) noreturn nounwind
+  unreachable
+}
+
+declare void @exit(i32) noreturn nounwind
+
+
+;; OBJ:      Symbol 0x0000000c
+;; OBJ-NEXT:    'vtable'
+
+;; OBJ:           Relocation 0x00000001
+;; OBJ-NEXT:     'r_offset', 
+;; OBJ-NEXT:     'r_sym', 0x0000000c
+;; OBJ-NEXT:     'r_type', 0x0000002b
diff --git a/final/test/MC/ARM/full_line_comment.s b/final/test/MC/ARM/full_line_comment.s
new file mode 100644
index 00000000000..4c919863483
--- /dev/null
+++ b/final/test/MC/ARM/full_line_comment.s
@@ -0,0 +1,8 @@
+// RUN: llvm-mc -triple arm-apple-darwin10 %s | FileCheck %s
+# this is a full line comment starting at column 1
+ # this starting at column 2
+
+        .data
+// CHECK: .long 0
+.long 0
+# .long 1 this line is commented out
diff --git a/final/test/MC/ARM/hilo-16bit-relocations.s b/final/test/MC/ARM/hilo-16bit-relocations.s
new file mode 100644
index 00000000000..7d6b4988dff
--- /dev/null
+++ b/final/test/MC/ARM/hilo-16bit-relocations.s
@@ -0,0 +1,20 @@
+@ RUN: llvm-mc %s -triple armv7-apple-darwin | FileCheck %s
+@ RUN: llvm-mc %s -triple armv7-apple-darwin | FileCheck %s        
+        
+_t:
+        movw    r0, :lower16:(L_foo$non_lazy_ptr - (L1 + 8))
+        movt    r0, :upper16:(L_foo$non_lazy_ptr - (L1 + 8))
+L1:
+
+@ CHECK: movw	r0, :lower16:(L_foo$non_lazy_ptr-(L1+8))
+@ CHECK: movt	r0, :upper16:(L_foo$non_lazy_ptr-(L1+8))
+        
+        .comm	_foo,4,2
+
+	.section	__DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
+	.align	2
+L_foo$non_lazy_ptr:
+	.indirect_symbol	_foo
+	.long	0
+        
+.subsections_via_symbols
diff --git a/final/test/MC/ARM/neon-abs-encoding.s b/final/test/MC/ARM/neon-abs-encoding.s
new file mode 100644
index 00000000000..398f2db039d
--- /dev/null
+++ b/final/test/MC/ARM/neon-abs-encoding.s
@@ -0,0 +1,31 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vabs.s8	d16, d16                @ encoding: [0x20,0x03,0xf1,0xf3]
+	vabs.s8	d16, d16
+@ CHECK: vabs.s16	d16, d16        @ encoding: [0x20,0x03,0xf5,0xf3]
+	vabs.s16	d16, d16
+@ CHECK: vabs.s32	d16, d16        @ encoding: [0x20,0x03,0xf9,0xf3]
+	vabs.s32	d16, d16
+@ CHECK: vabs.f32	d16, d16        @ encoding: [0x20,0x07,0xf9,0xf3]
+	vabs.f32	d16, d16
+@ CHECK: vabs.s8	q8, q8                  @ encoding: [0x60,0x03,0xf1,0xf3]
+	vabs.s8	q8, q8
+@ CHECK: vabs.s16	q8, q8          @ encoding: [0x60,0x03,0xf5,0xf3]
+	vabs.s16	q8, q8
+@ CHECK: vabs.s32	q8, q8          @ encoding: [0x60,0x03,0xf9,0xf3]
+	vabs.s32	q8, q8
+@ CHECK: vabs.f32	q8, q8          @ encoding: [0x60,0x07,0xf9,0xf3]
+	vabs.f32	q8, q8
+
+@ CHECK: vqabs.s8	d16, d16        @ encoding: [0x20,0x07,0xf0,0xf3]
+	vqabs.s8	d16, d16
+@ CHECK: vqabs.s16	d16, d16        @ encoding: [0x20,0x07,0xf4,0xf3]
+	vqabs.s16	d16, d16
+@ CHECK: vqabs.s32	d16, d16        @ encoding: [0x20,0x07,0xf8,0xf3]
+	vqabs.s32	d16, d16
+@ CHECK: vqabs.s8	q8, q8          @ encoding: [0x60,0x07,0xf0,0xf3]
+	vqabs.s8	q8, q8
+@ CHECK: vqabs.s16	q8, q8          @ encoding: [0x60,0x07,0xf4,0xf3]
+	vqabs.s16	q8, q8
+@ CHECK: vqabs.s32	q8, q8          @ encoding: [0x60,0x07,0xf8,0xf3]
+	vqabs.s32	q8, q8
diff --git a/final/test/MC/ARM/neon-absdiff-encoding.s b/final/test/MC/ARM/neon-absdiff-encoding.s
new file mode 100644
index 00000000000..f43ea6582a2
--- /dev/null
+++ b/final/test/MC/ARM/neon-absdiff-encoding.s
@@ -0,0 +1,82 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vabd.s8	d16, d16, d17           @ encoding: [0xa1,0x07,0x40,0xf2]
+	vabd.s8	d16, d16, d17
+@ CHECK: vabd.s16	d16, d16, d17   @ encoding: [0xa1,0x07,0x50,0xf2]
+	vabd.s16	d16, d16, d17
+@ CHECK: vabd.s32	d16, d16, d17   @ encoding: [0xa1,0x07,0x60,0xf2]
+	vabd.s32	d16, d16, d17
+@ CHECK: vabd.u8	d16, d16, d17           @ encoding: [0xa1,0x07,0x40,0xf3]
+	vabd.u8	d16, d16, d17
+@ CHECK: vabd.u16	d16, d16, d17   @ encoding: [0xa1,0x07,0x50,0xf3]
+	vabd.u16	d16, d16, d17
+  @ CHECK: vabd.u32	d16, d16, d17   @ encoding: [0xa1,0x07,0x60,0xf3]
+	vabd.u32	d16, d16, d17
+@ CHECK: vabd.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x60,0xf3]
+	vabd.f32	d16, d16, d17
+@ CHECK: vabd.s8	q8, q8, q9              @ encoding: [0xe2,0x07,0x40,0xf2]
+	vabd.s8	q8, q8, q9
+@ CHECK: vabd.s16	q8, q8, q9      @ encoding: [0xe2,0x07,0x50,0xf2]
+	vabd.s16	q8, q8, q9
+@ CHECK: vabd.s32	q8, q8, q9      @ encoding: [0xe2,0x07,0x60,0xf2]
+	vabd.s32	q8, q8, q9
+@ CHECK: vabd.u8	q8, q8, q9              @ encoding: [0xe2,0x07,0x40,0xf3]
+	vabd.u8	q8, q8, q9
+@ CHECK: vabd.u16	q8, q8, q9      @ encoding: [0xe2,0x07,0x50,0xf3]
+	vabd.u16	q8, q8, q9
+@ CHECK: vabd.u32	q8, q8, q9      @ encoding: [0xe2,0x07,0x60,0xf3]
+	vabd.u32	q8, q8, q9
+@ CHECK: vabd.f32	q8, q8, q9      @ encoding: [0xe2,0x0d,0x60,0xf3]
+	vabd.f32	q8, q8, q9
+
+@ CHECK: vabdl.s8	q8, d16, d17    @ encoding: [0xa1,0x07,0xc0,0xf2]
+	vabdl.s8	q8, d16, d17
+@ CHECK: vabdl.s16	q8, d16, d17    @ encoding: [0xa1,0x07,0xd0,0xf2]
+	vabdl.s16	q8, d16, d17
+@ CHECK: vabdl.s32	q8, d16, d17    @ encoding: [0xa1,0x07,0xe0,0xf2]
+	vabdl.s32	q8, d16, d17
+@ CHECK: vabdl.u8	q8, d16, d17    @ encoding: [0xa1,0x07,0xc0,0xf3]
+	vabdl.u8	q8, d16, d17
+@ CHECK: vabdl.u16	q8, d16, d17    @ encoding: [0xa1,0x07,0xd0,0xf3]
+	vabdl.u16	q8, d16, d17
+@ CHECK: vabdl.u32	q8, d16, d17    @ encoding: [0xa1,0x07,0xe0,0xf3]
+	vabdl.u32	q8, d16, d17
+
+@ CHECK: vaba.s8	d16, d18, d17           @ encoding: [0xb1,0x07,0x42,0xf2]
+	vaba.s8	d16, d18, d17
+@ CHECK: vaba.s16	d16, d18, d17   @ encoding: [0xb1,0x07,0x52,0xf2]
+	vaba.s16	d16, d18, d17
+@ CHECK: vaba.s32	d16, d18, d17   @ encoding: [0xb1,0x07,0x62,0xf2]
+	vaba.s32	d16, d18, d17
+@ CHECK: vaba.u8	d16, d18, d17           @ encoding: [0xb1,0x07,0x42,0xf3]
+	vaba.u8	d16, d18, d17
+@ CHECK: vaba.u16	d16, d18, d17   @ encoding: [0xb1,0x07,0x52,0xf3]
+	vaba.u16	d16, d18, d17
+@ CHECK: vaba.u32	d16, d18, d17   @ encoding: [0xb1,0x07,0x62,0xf3]
+	vaba.u32	d16, d18, d17
+@ CHECK: vaba.s8	q9, q8, q10             @ encoding: [0xf4,0x27,0x40,0xf2]
+	vaba.s8	q9, q8, q10
+@ CHECK: vaba.s16	q9, q8, q10     @ encoding: [0xf4,0x27,0x50,0xf2]
+	vaba.s16	q9, q8, q10
+@ CHECK: vaba.s32	q9, q8, q10     @ encoding: [0xf4,0x27,0x60,0xf2]
+	vaba.s32	q9, q8, q10
+@ CHECK: vaba.u8	q9, q8, q10             @ encoding: [0xf4,0x27,0x40,0xf3]
+	vaba.u8	q9, q8, q10
+@ CHECK: vaba.u16	q9, q8, q10     @ encoding: [0xf4,0x27,0x50,0xf3]
+	vaba.u16	q9, q8, q10
+@ CHECK: vaba.u32	q9, q8, q10     @ encoding: [0xf4,0x27,0x60,0xf3]
+	vaba.u32	q9, q8, q10
+
+@ CHECK: vabal.s8	q8, d19, d18    @ encoding: [0xa2,0x05,0xc3,0xf2]
+	vabal.s8	q8, d19, d18
+@ CHECK: vabal.s16	q8, d19, d18    @ encoding: [0xa2,0x05,0xd3,0xf2]
+	vabal.s16	q8, d19, d18
+@ CHECK: vabal.s32	q8, d19, d18    @ encoding: [0xa2,0x05,0xe3,0xf2]
+	vabal.s32	q8, d19, d18
+@ CHECK: vabal.u8	q8, d19, d18    @ encoding: [0xa2,0x05,0xc3,0xf3]
+	vabal.u8	q8, d19, d18
+@ CHECK: 	vabal.u16	q8, d19, d18    @ encoding: [0xa2,0x05,0xd3,0xf3]
+	vabal.u16	q8, d19, d18
+@ CHECK: vabal.u32	q8, d19, d18    @ encoding: [0xa2,0x05,0xe3,0xf3]
+	vabal.u32	q8, d19, d18
+
diff --git a/final/test/MC/ARM/neon-add-encoding.s b/final/test/MC/ARM/neon-add-encoding.s
new file mode 100644
index 00000000000..e425397b790
--- /dev/null
+++ b/final/test/MC/ARM/neon-add-encoding.s
@@ -0,0 +1,137 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
+
+
+@ CHECK: vadd.i8	d16, d17, d16           @ encoding: [0xa0,0x08,0x41,0xf2]
+	vadd.i8	d16, d17, d16
+@ CHECK: vadd.i16	d16, d17, d16   @ encoding: [0xa0,0x08,0x51,0xf2]
+	vadd.i16	d16, d17, d16
+@ CHECK: vadd.i64	d16, d17, d16   @ encoding: [0xa0,0x08,0x71,0xf2]
+	vadd.i64	d16, d17, d16
+@ CHECK: vadd.i32	d16, d17, d16   @ encoding: [0xa0,0x08,0x61,0xf2]
+	vadd.i32	d16, d17, d16
+@ CHECK: vadd.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x40,0xf2]
+	vadd.f32	d16, d16, d17
+@ CHECK: vadd.f32	q8, q8, q9      @ encoding: [0xe2,0x0d,0x40,0xf2]
+	vadd.f32	q8, q8, q9
+
+@ CHECK: vaddl.s8	q8, d17, d16    @ encoding: [0xa0,0x00,0xc1,0xf2]
+	vaddl.s8	q8, d17, d16
+@ CHECK: vaddl.s16	q8, d17, d16    @ encoding: [0xa0,0x00,0xd1,0xf2]
+	vaddl.s16	q8, d17, d16
+@ CHECK: vaddl.s32	q8, d17, d16    @ encoding: [0xa0,0x00,0xe1,0xf2]
+	vaddl.s32	q8, d17, d16
+@ CHECK: vaddl.u8	q8, d17, d16    @ encoding: [0xa0,0x00,0xc1,0xf3]
+	vaddl.u8	q8, d17, d16
+@ CHECK: vaddl.u16	q8, d17, d16    @ encoding: [0xa0,0x00,0xd1,0xf3]
+	vaddl.u16	q8, d17, d16
+@ CHECK: vaddl.u32	q8, d17, d16    @ encoding: [0xa0,0x00,0xe1,0xf3]
+	vaddl.u32	q8, d17, d16
+
+@ CHECK: vaddw.s8	q8, q8, d18     @ encoding: [0xa2,0x01,0xc0,0xf2]
+	vaddw.s8	q8, q8, d18
+@ CHECK: vaddw.s16	q8, q8, d18     @ encoding: [0xa2,0x01,0xd0,0xf2]
+	vaddw.s16	q8, q8, d18
+@ CHECK: vaddw.s32	q8, q8, d18     @ encoding: [0xa2,0x01,0xe0,0xf2]
+	vaddw.s32	q8, q8, d18
+@ CHECK: vaddw.u8	q8, q8, d18     @ encoding: [0xa2,0x01,0xc0,0xf3]
+	vaddw.u8	q8, q8, d18
+@ CHECK: vaddw.u16	q8, q8, d18     @ encoding: [0xa2,0x01,0xd0,0xf3]
+	vaddw.u16	q8, q8, d18
+@ CHECK: vaddw.u32	q8, q8, d18     @ encoding: [0xa2,0x01,0xe0,0xf3]
+	vaddw.u32	q8, q8, d18
+
+@ CHECK: vhadd.s8	d16, d16, d17   @ encoding: [0xa1,0x00,0x40,0xf2]
+	vhadd.s8	d16, d16, d17
+@ CHECK: vhadd.s16	d16, d16, d17   @ encoding: [0xa1,0x00,0x50,0xf2]
+	vhadd.s16	d16, d16, d17
+@ CHECK: vhadd.s32	d16, d16, d17   @ encoding: [0xa1,0x00,0x60,0xf2]
+	vhadd.s32	d16, d16, d17
+@ CHECK: vhadd.u8	d16, d16, d17   @ encoding: [0xa1,0x00,0x40,0xf3]
+	vhadd.u8	d16, d16, d17
+@ CHECK: vhadd.u16	d16, d16, d17   @ encoding: [0xa1,0x00,0x50,0xf3]
+	vhadd.u16	d16, d16, d17
+@ CHECK: vhadd.u32	d16, d16, d17   @ encoding: [0xa1,0x00,0x60,0xf3]
+	vhadd.u32	d16, d16, d17
+@ CHECK: vhadd.s8	q8, q8, q9      @ encoding: [0xe2,0x00,0x40,0xf2]
+	vhadd.s8	q8, q8, q9
+@ CHECK: vhadd.s16	q8, q8, q9      @ encoding: [0xe2,0x00,0x50,0xf2]
+	vhadd.s16	q8, q8, q9
+@ CHECK: vhadd.s32	q8, q8, q9      @ encoding: [0xe2,0x00,0x60,0xf2]
+	vhadd.s32	q8, q8, q9
+  @ CHECK: vhadd.u8	q8, q8, q9      @ encoding: [0xe2,0x00,0x40,0xf3]
+	vhadd.u8	q8, q8, q9
+@ CHECK: vhadd.u16	q8, q8, q9      @ encoding: [0xe2,0x00,0x50,0xf3]
+	vhadd.u16	q8, q8, q9
+@ CHECK: vhadd.u32	q8, q8, q9      @ encoding: [0xe2,0x00,0x60,0xf3]
+	vhadd.u32	q8, q8, q9
+	
+@ CHECK: vrhadd.s8	d16, d16, d17   @ encoding: [0xa1,0x01,0x40,0xf2]
+	vrhadd.s8	d16, d16, d17
+@ CHECK: vrhadd.s16	d16, d16, d17   @ encoding: [0xa1,0x01,0x50,0xf2]
+	vrhadd.s16	d16, d16, d17
+@ CHECK: vrhadd.s32	d16, d16, d17   @ encoding: [0xa1,0x01,0x60,0xf2]
+	vrhadd.s32	d16, d16, d17
+@ CHECK: vrhadd.u8	d16, d16, d17   @ encoding: [0xa1,0x01,0x40,0xf3]
+	vrhadd.u8	d16, d16, d17
+@ CHECK: vrhadd.u16	d16, d16, d17   @ encoding: [0xa1,0x01,0x50,0xf3]
+	vrhadd.u16	d16, d16, d17
+@ CHECK: vrhadd.u32	d16, d16, d17   @ encoding: [0xa1,0x01,0x60,0xf3]
+	vrhadd.u32	d16, d16, d17
+@ CHECK: vrhadd.s8	q8, q8, q9      @ encoding: [0xe2,0x01,0x40,0xf2]
+	vrhadd.s8	q8, q8, q9
+@ CHECK: vrhadd.s16	q8, q8, q9      @ encoding: [0xe2,0x01,0x50,0xf2]
+	vrhadd.s16	q8, q8, q9
+@ CHECK: vrhadd.s32	q8, q8, q9      @ encoding: [0xe2,0x01,0x60,0xf2]
+	vrhadd.s32	q8, q8, q9
+@ CHECK: vrhadd.u8	q8, q8, q9      @ encoding: [0xe2,0x01,0x40,0xf3]
+	vrhadd.u8	q8, q8, q9
+@ CHECK: vrhadd.u16	q8, q8, q9      @ encoding: [0xe2,0x01,0x50,0xf3]
+	vrhadd.u16	q8, q8, q9
+@ CHECK: vrhadd.u32	q8, q8, q9      @ encoding: [0xe2,0x01,0x60,0xf3]
+	vrhadd.u32	q8, q8, q9
+
+@ CHECK: vqadd.s8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf2]
+	vqadd.s8	d16, d16, d17
+@ CHECK: vqadd.s16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf2]
+	vqadd.s16	d16, d16, d17
+@ CHECK: vqadd.s32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf2]
+	vqadd.s32	d16, d16, d17
+@ CHECK: vqadd.s64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf2]
+	vqadd.s64	d16, d16, d17
+@ CHECK: vqadd.u8	d16, d16, d17   @ encoding: [0xb1,0x00,0x40,0xf3]
+	vqadd.u8	d16, d16, d17
+@ CHECK: vqadd.u16	d16, d16, d17   @ encoding: [0xb1,0x00,0x50,0xf3]
+	vqadd.u16	d16, d16, d17
+@ CHECK: vqadd.u32	d16, d16, d17   @ encoding: [0xb1,0x00,0x60,0xf3]
+	vqadd.u32	d16, d16, d17
+@ CHECK: vqadd.u64	d16, d16, d17   @ encoding: [0xb1,0x00,0x70,0xf3]
+	vqadd.u64	d16, d16, d17
+@ CHECK: vqadd.s8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf2]
+	vqadd.s8	q8, q8, q9
+@ CHECK: vqadd.s16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf2]
+	vqadd.s16	q8, q8, q9
+@ CHECK: vqadd.s32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf2]
+	vqadd.s32	q8, q8, q9
+@ CHECK: vqadd.s64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf2]
+	vqadd.s64	q8, q8, q9
+@ CHECK: vqadd.u8	q8, q8, q9      @ encoding: [0xf2,0x00,0x40,0xf3]
+	vqadd.u8	q8, q8, q9
+@ CHECK: vqadd.u16	q8, q8, q9      @ encoding: [0xf2,0x00,0x50,0xf3]
+	vqadd.u16	q8, q8, q9
+@ CHECK: vqadd.u32	q8, q8, q9      @ encoding: [0xf2,0x00,0x60,0xf3]
+	vqadd.u32	q8, q8, q9
+@ CHECK: vqadd.u64	q8, q8, q9      @ encoding: [0xf2,0x00,0x70,0xf3]
+	vqadd.u64	q8, q8, q9
+
+@ CHECK: vaddhn.i16	d16, q8, q9     @ encoding: [0xa2,0x04,0xc0,0xf2]
+	vaddhn.i16	d16, q8, q9
+@ CHECK: vaddhn.i32	d16, q8, q9     @ encoding: [0xa2,0x04,0xd0,0xf2]
+	vaddhn.i32	d16, q8, q9
+@ CHECK: vaddhn.i64	d16, q8, q9     @ encoding: [0xa2,0x04,0xe0,0xf2]
+	vaddhn.i64	d16, q8, q9
+@ CHECK: vraddhn.i16	d16, q8, q9     @ encoding: [0xa2,0x04,0xc0,0xf3]
+	vraddhn.i16	d16, q8, q9
+@ CHECK: vraddhn.i32	d16, q8, q9     @ encoding: [0xa2,0x04,0xd0,0xf3]
+	vraddhn.i32	d16, q8, q9
+@ CHECK: vraddhn.i64	d16, q8, q9     @ encoding: [0xa2,0x04,0xe0,0xf3]
+	vraddhn.i64	d16, q8, q9
diff --git a/final/test/MC/ARM/neon-bitcount-encoding.s b/final/test/MC/ARM/neon-bitcount-encoding.s
new file mode 100644
index 00000000000..2c9518b32c1
--- /dev/null
+++ b/final/test/MC/ARM/neon-bitcount-encoding.s
@@ -0,0 +1,31 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vcnt.8	d16, d16                @ encoding: [0x20,0x05,0xf0,0xf3]
+	vcnt.8	d16, d16
+@ CHECK: vcnt.8	q8, q8                  @ encoding: [0x60,0x05,0xf0,0xf3]
+	vcnt.8	q8, q8
+@ CHECK: vclz.i8	d16, d16                @ encoding: [0xa0,0x04,0xf0,0xf3]
+	vclz.i8	d16, d16
+@ CHECK: vclz.i16	d16, d16        @ encoding: [0xa0,0x04,0xf4,0xf3]
+	vclz.i16	d16, d16
+@ CHECK: vclz.i32	d16, d16        @ encoding: [0xa0,0x04,0xf8,0xf3]
+	vclz.i32	d16, d16
+@ CHECK: vclz.i8	q8, q8                  @ encoding: [0xe0,0x04,0xf0,0xf3]
+	vclz.i8	q8, q8
+@ CHECK: vclz.i16	q8, q8          @ encoding: [0xe0,0x04,0xf4,0xf3]
+	vclz.i16	q8, q8
+@ CHECK: vclz.i32	q8, q8          @ encoding: [0xe0,0x04,0xf8,0xf3]
+	vclz.i32	q8, q8
+@ CHECK: vcls.s8	d16, d16                @ encoding: [0x20,0x04,0xf0,0xf3]
+	vcls.s8	d16, d16
+@ CHECK: vcls.s16	d16, d16        @ encoding: [0x20,0x04,0xf4,0xf3]
+	vcls.s16	d16, d16
+@ CHECK: vcls.s32	d16, d16        @ encoding: [0x20,0x04,0xf8,0xf3]
+	vcls.s32	d16, d16
+@ CHECK: vcls.s8	q8, q8                  @ encoding: [0x60,0x04,0xf0,0xf3]
+	vcls.s8	q8, q8
+@ CHECK: vcls.s16	q8, q8          @ encoding: [0x60,0x04,0xf4,0xf3]
+	vcls.s16	q8, q8
+@ CHECK: vcls.s32	q8, q8          @ encoding: [0x60,0x04,0xf8,0xf3]
+	vcls.s32	q8, q8
+
diff --git a/final/test/MC/ARM/neon-bitwise-encoding.s b/final/test/MC/ARM/neon-bitwise-encoding.s
new file mode 100644
index 00000000000..8710923c670
--- /dev/null
+++ b/final/test/MC/ARM/neon-bitwise-encoding.s
@@ -0,0 +1,47 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+@ CHECK: vand	d16, d17, d16           @ encoding: [0xb0,0x01,0x41,0xf2]
+	vand	d16, d17, d16
+@ CHECK: vand	q8, q8, q9              @ encoding: [0xf2,0x01,0x40,0xf2]
+	vand	q8, q8, q9
+
+@ CHECK: veor	d16, d17, d16           @ encoding: [0xb0,0x01,0x41,0xf3]
+	veor	d16, d17, d16
+@ CHECK: veor	q8, q8, q9              @ encoding: [0xf2,0x01,0x40,0xf3]
+	veor	q8, q8, q9
+
+@ CHECK: vorr	d16, d17, d16           @ encoding: [0xb0,0x01,0x61,0xf2]
+	vorr	d16, d17, d16
+@ CHECK: vorr	q8, q8, q9              @ encoding: [0xf2,0x01,0x60,0xf2]
+	vorr	q8, q8, q9
+@ CHECK: vorr.i32	d16, #0x1000000 @ encoding: [0x11,0x07,0xc0,0xf2]
+  vorr.i32	d16, #0x1000000
+@ CHECK: vorr.i32	q8, #0x1000000  @ encoding: [0x51,0x07,0xc0,0xf2]
+  vorr.i32	q8, #0x1000000
+@ CHECK: vorr.i32	q8, #0x0        @ encoding: [0x50,0x01,0xc0,0xf2]
+  vorr.i32	q8, #0x0
+
+@ CHECK: vbic	d16, d17, d16           @ encoding: [0xb0,0x01,0x51,0xf2]
+	vbic	d16, d17, d16
+@ CHECK: vbic	q8, q8, q9              @ encoding: [0xf2,0x01,0x50,0xf2]
+	vbic	q8, q8, q9
+@ CHECK: vbic.i32	d16, #0xFF000000 @ encoding: [0x3f,0x07,0xc7,0xf3]
+  vbic.i32	d16, #0xFF000000
+@ CHECK: vbic.i32	q8, #0xFF000000 @ encoding: [0x7f,0x07,0xc7,0xf3]
+  vbic.i32	q8, #0xFF000000
+
+@ CHECK: vorn	d16, d17, d16           @ encoding: [0xb0,0x01,0x71,0xf2]
+	vorn	d16, d17, d16
+@ CHECK: vorn	q8, q8, q9              @ encoding: [0xf2,0x01,0x70,0xf2]
+	vorn	q8, q8, q9
+
+@ CHECK: vmvn	d16, d16                @ encoding: [0xa0,0x05,0xf0,0xf3]
+	vmvn	d16, d16
+@ CHECK: vmvn	q8, q8                  @ encoding: [0xe0,0x05,0xf0,0xf3]
+	vmvn	q8, q8
+
+@ CHECK: vbsl	d18, d17, d16           @ encoding: [0xb0,0x21,0x51,0xf3]
+	vbsl	d18, d17, d16
+@ CHECK: vbsl	q8, q10, q9             @ encoding: [0xf2,0x01,0x54,0xf3]
+	vbsl	q8, q10, q9
diff --git a/final/test/MC/ARM/neon-cmp-encoding.s b/final/test/MC/ARM/neon-cmp-encoding.s
new file mode 100644
index 00000000000..6bfc549e947
--- /dev/null
+++ b/final/test/MC/ARM/neon-cmp-encoding.s
@@ -0,0 +1,115 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+@ FIXME: We cannot currently test the following instructions, which are 
+@ currently marked as for-disassembly only in the .td files:
+@  - VCEQz
+@  - VCGEz, VCLEz
+@  - VCGTz, VCLTz
+
+@ CHECK: vceq.i8	d16, d16, d17           @ encoding: [0xb1,0x08,0x40,0xf3]
+	vceq.i8	d16, d16, d17
+@ CHECK: vceq.i16	d16, d16, d17   @ encoding: [0xb1,0x08,0x50,0xf3]
+	vceq.i16	d16, d16, d17
+@ CHECK: vceq.i32	d16, d16, d17   @ encoding: [0xb1,0x08,0x60,0xf3]
+	vceq.i32	d16, d16, d17
+@ CHECK: vceq.f32	d16, d16, d17   @ encoding: [0xa1,0x0e,0x40,0xf2]
+	vceq.f32	d16, d16, d17
+@ CHECK: vceq.i8	q8, q8, q9              @ encoding: [0xf2,0x08,0x40,0xf3]
+	vceq.i8	q8, q8, q9
+@ CHECK: vceq.i16	q8, q8, q9      @ encoding: [0xf2,0x08,0x50,0xf3]
+	vceq.i16	q8, q8, q9
+@ CHECK: vceq.i32	q8, q8, q9      @ encoding: [0xf2,0x08,0x60,0xf3]
+	vceq.i32	q8, q8, q9
+@ CHECK: vceq.f32	q8, q8, q9      @ encoding: [0xe2,0x0e,0x40,0xf2]
+	vceq.f32	q8, q8, q9
+
+@ CHECK: vcge.s8	d16, d16, d17           @ encoding: [0xb1,0x03,0x40,0xf2]
+	vcge.s8	d16, d16, d17
+@ CHECK: vcge.s16	d16, d16, d17   @ encoding: [0xb1,0x03,0x50,0xf2]
+	vcge.s16	d16, d16, d17
+@ CHECK: vcge.s32	d16, d16, d17   @ encoding: [0xb1,0x03,0x60,0xf2]
+	vcge.s32	d16, d16, d17
+@ CHECK: vcge.u8	d16, d16, d17           @ encoding: [0xb1,0x03,0x40,0xf3]
+	vcge.u8	d16, d16, d17
+@ CHECK: vcge.u16	d16, d16, d17   @ encoding: [0xb1,0x03,0x50,0xf3]
+	vcge.u16	d16, d16, d17
+@ CHECK: vcge.u32	d16, d16, d17   @ encoding: [0xb1,0x03,0x60,0xf3]
+	vcge.u32	d16, d16, d17
+@ CHECK: vcge.f32	d16, d16, d17   @ encoding: [0xa1,0x0e,0x40,0xf3]
+	vcge.f32	d16, d16, d17
+@ CHECK: vcge.s8	q8, q8, q9              @ encoding: [0xf2,0x03,0x40,0xf2]
+	vcge.s8	q8, q8, q9
+@ CHECK: vcge.s16	q8, q8, q9      @ encoding: [0xf2,0x03,0x50,0xf2]
+	vcge.s16	q8, q8, q9
+@ CHECK: vcge.s32	q8, q8, q9      @ encoding: [0xf2,0x03,0x60,0xf2]
+	vcge.s32	q8, q8, q9
+@ CHECK: vcge.u8	q8, q8, q9              @ encoding: [0xf2,0x03,0x40,0xf3]
+	vcge.u8	q8, q8, q9
+@ CHECK: vcge.u16	q8, q8, q9      @ encoding: [0xf2,0x03,0x50,0xf3]
+	vcge.u16	q8, q8, q9
+@ CHECK: vcge.u32	q8, q8, q9      @ encoding: [0xf2,0x03,0x60,0xf3]
+	vcge.u32	q8, q8, q9
+@ CHECK: vcge.f32	q8, q8, q9      @ encoding: [0xe2,0x0e,0x40,0xf3]
+	vcge.f32	q8, q8, q9
+@ CHECK: vacge.f32	d16, d16, d17   @ encoding: [0xb1,0x0e,0x40,0xf3]
+	vacge.f32	d16, d16, d17
+@ CHECK: vacge.f32	q8, q8, q9      @ encoding: [0xf2,0x0e,0x40,0xf3]
+	vacge.f32	q8, q8, q9
+
+@ CHECK: vcgt.s8	d16, d16, d17           @ encoding: [0xa1,0x03,0x40,0xf2]
+	vcgt.s8	d16, d16, d17
+@ CHECK: vcgt.s16	d16, d16, d17   @ encoding: [0xa1,0x03,0x50,0xf2]
+	vcgt.s16	d16, d16, d17
+@ CHECK: vcgt.s32	d16, d16, d17   @ encoding: [0xa1,0x03,0x60,0xf2]
+	vcgt.s32	d16, d16, d17
+@ CHECK: vcgt.u8	d16, d16, d17           @ encoding: [0xa1,0x03,0x40,0xf3]
+	vcgt.u8	d16, d16, d17
+@ CHECK: vcgt.u16	d16, d16, d17   @ encoding: [0xa1,0x03,0x50,0xf3]
+	vcgt.u16	d16, d16, d17
+@ CHECK: vcgt.u32	d16, d16, d17   @ encoding: [0xa1,0x03,0x60,0xf3]
+	vcgt.u32	d16, d16, d17
+@ CHECK: vcgt.f32	d16, d16, d17   @ encoding: [0xa1,0x0e,0x60,0xf3]
+	vcgt.f32	d16, d16, d17
+@ CHECK: vcgt.s8	q8, q8, q9              @ encoding: [0xe2,0x03,0x40,0xf2]
+	vcgt.s8	q8, q8, q9
+@ CHECK: vcgt.s16	q8, q8, q9      @ encoding: [0xe2,0x03,0x50,0xf2]
+	vcgt.s16	q8, q8, q9
+@ CHECK: vcgt.s32	q8, q8, q9      @ encoding: [0xe2,0x03,0x60,0xf2]
+	vcgt.s32	q8, q8, q9
+@ CHECK: vcgt.u8	q8, q8, q9              @ encoding: [0xe2,0x03,0x40,0xf3]
+	vcgt.u8	q8, q8, q9
+@ CHECK: vcgt.u16	q8, q8, q9      @ encoding: [0xe2,0x03,0x50,0xf3]
+	vcgt.u16	q8, q8, q9
+@ CHECK: vcgt.u32	q8, q8, q9      @ encoding: [0xe2,0x03,0x60,0xf3]
+	vcgt.u32	q8, q8, q9
+@ CHECK: vcgt.f32	q8, q8, q9      @ encoding: [0xe2,0x0e,0x60,0xf3]
+	vcgt.f32	q8, q8, q9
+@ CHECK: vacgt.f32	d16, d16, d17   @ encoding: [0xb1,0x0e,0x60,0xf3]
+	vacgt.f32	d16, d16, d17
+@ CHECK: vacgt.f32	q8, q8, q9      @ encoding: [0xf2,0x0e,0x60,0xf3]
+	vacgt.f32	q8, q8, q9
+
+@ CHECK: vtst.8	d16, d16, d17           @ encoding: [0xb1,0x08,0x40,0xf2]
+	vtst.8	d16, d16, d17
+@ CHECK: vtst.16	d16, d16, d17           @ encoding: [0xb1,0x08,0x50,0xf2]
+	vtst.16	d16, d16, d17
+@ CHECK: vtst.32	d16, d16, d17           @ encoding: [0xb1,0x08,0x60,0xf2]
+	vtst.32	d16, d16, d17
+@ CHECK: vtst.8	q8, q8, q9              @ encoding: [0xf2,0x08,0x40,0xf2]
+	vtst.8	q8, q8, q9
+@ CHECK: vtst.16	q8, q8, q9              @ encoding: [0xf2,0x08,0x50,0xf2]
+	vtst.16	q8, q8, q9
+@ CHECK: vtst.32	q8, q8, q9              @ encoding: [0xf2,0x08,0x60,0xf2]
+	vtst.32	q8, q8, q9
+
+@ CHECK: vceq.i8	d16, d16, #0            @ encoding: [0x20,0x01,0xf1,0xf3]
+  vceq.i8	d16, d16, #0
+@ CHECK: vcge.s8	d16, d16, #0            @ encoding: [0xa0,0x00,0xf1,0xf3]
+  vcge.s8	d16, d16, #0
+@ CHECK: vcle.s8	d16, d16, #0            @ encoding: [0xa0,0x01,0xf1,0xf3]
+  vcle.s8	d16, d16, #0
+@ CHECK: vcgt.s8	d16, d16, #0            @ encoding: [0x20,0x00,0xf1,0xf3]
+  vcgt.s8	d16, d16, #0
+@ CHECK: vclt.s8	d16, d16, #0            @ encoding: [0x20,0x02,0xf1,0xf3]
+  vclt.s8	d16, d16, #0
diff --git a/final/test/MC/ARM/neon-convert-encoding.s b/final/test/MC/ARM/neon-convert-encoding.s
new file mode 100644
index 00000000000..1733c5222be
--- /dev/null
+++ b/final/test/MC/ARM/neon-convert-encoding.s
@@ -0,0 +1,38 @@
+@ RUN: llvm-mc -mcpu=cortex-a9 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vcvt.s32.f32	d16, d16        @ encoding: [0x20,0x07,0xfb,0xf3]
+	vcvt.s32.f32	d16, d16
+@ CHECK: vcvt.u32.f32	d16, d16        @ encoding: [0xa0,0x07,0xfb,0xf3]
+	vcvt.u32.f32	d16, d16
+@ CHECK: vcvt.f32.s32	d16, d16        @ encoding: [0x20,0x06,0xfb,0xf3]
+	vcvt.f32.s32	d16, d16
+@ CHECK: vcvt.f32.u32	d16, d16        @ encoding: [0xa0,0x06,0xfb,0xf3]
+	vcvt.f32.u32	d16, d16
+@ CHECK: vcvt.s32.f32	q8, q8          @ encoding: [0x60,0x07,0xfb,0xf3]
+	vcvt.s32.f32	q8, q8
+@ CHECK: vcvt.u32.f32	q8, q8          @ encoding: [0xe0,0x07,0xfb,0xf3]
+	vcvt.u32.f32	q8, q8
+@ CHECK: vcvt.f32.s32	q8, q8          @ encoding: [0x60,0x06,0xfb,0xf3]
+	vcvt.f32.s32	q8, q8
+@ CHECK: vcvt.f32.u32	q8, q8          @ encoding: [0xe0,0x06,0xfb,0xf3]
+	vcvt.f32.u32	q8, q8
+@ CHECK: vcvt.s32.f32	d16, d16, #1    @ encoding: [0x30,0x0f,0xff,0xf2]
+	vcvt.s32.f32	d16, d16, #1
+@ CHECK: vcvt.u32.f32	d16, d16, #1    @ encoding: [0x30,0x0f,0xff,0xf3]
+	vcvt.u32.f32	d16, d16, #1
+@ CHECK: vcvt.f32.s32	d16, d16, #1    @ encoding: [0x30,0x0e,0xff,0xf2]
+	vcvt.f32.s32	d16, d16, #1
+@ CHECK: vcvt.f32.u32	d16, d16, #1    @ encoding: [0x30,0x0e,0xff,0xf3]
+	vcvt.f32.u32	d16, d16, #1
+@ CHECK: vcvt.s32.f32	q8, q8, #1      @ encoding: [0x70,0x0f,0xff,0xf2]
+	vcvt.s32.f32	q8, q8, #1
+@ CHECK: vcvt.u32.f32	q8, q8, #1      @ encoding: [0x70,0x0f,0xff,0xf3]
+	vcvt.u32.f32	q8, q8, #1
+@ CHECK: vcvt.f32.s32	q8, q8, #1      @ encoding: [0x70,0x0e,0xff,0xf2]
+	vcvt.f32.s32	q8, q8, #1
+@ CHECK: vcvt.f32.u32	q8, q8, #1      @ encoding: [0x70,0x0e,0xff,0xf3]
+	vcvt.f32.u32	q8, q8, #1
+@ CHECK: vcvt.f32.f16	q8, d16         @ encoding: [0x20,0x07,0xf6,0xf3]
+	vcvt.f32.f16	q8, d16
+@ CHECK: vcvt.f16.f32	d16, q8         @ encoding: [0x20,0x06,0xf6,0xf3]
+	vcvt.f16.f32	d16, q8
diff --git a/final/test/MC/ARM/neon-dup-encoding.s b/final/test/MC/ARM/neon-dup-encoding.s
new file mode 100644
index 00000000000..0aebdce304a
--- /dev/null
+++ b/final/test/MC/ARM/neon-dup-encoding.s
@@ -0,0 +1,27 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+@ CHECK: vdup.8	d16, r0                 @ encoding: [0x90,0x0b,0xc0,0xee]
+	vdup.8	d16, r0
+@ CHECK: vdup.16	d16, r0                 @ encoding: [0xb0,0x0b,0x80,0xee]
+	vdup.16	d16, r0
+@ CHECK: vdup.32	d16, r0                 @ encoding: [0x90,0x0b,0x80,0xee]
+	vdup.32	d16, r0
+@ CHECK: vdup.8	q8, r0                  @ encoding: [0x90,0x0b,0xe0,0xee]
+	vdup.8	q8, r0
+@ CHECK: vdup.16	q8, r0                  @ encoding: [0xb0,0x0b,0xa0,0xee]
+	vdup.16	q8, r0
+@ CHECK: vdup.32	q8, r0                  @ encoding: [0x90,0x0b,0xa0,0xee]
+	vdup.32	q8, r0
+@ CHECK: vdup.8	d16, d16[1]             @ encoding: [0x20,0x0c,0xf3,0xf3]
+	vdup.8	d16, d16[1]
+@ CHECK: vdup.16	d16, d16[1]             @ encoding: [0x20,0x0c,0xf6,0xf3]
+	vdup.16	d16, d16[1]
+@ CHECK: vdup.32	d16, d16[1]             @ encoding: [0x20,0x0c,0xfc,0xf3]
+	vdup.32	d16, d16[1]
+@ CHECK: vdup.8	q8, d16[1]              @ encoding: [0x60,0x0c,0xf3,0xf3]
+	vdup.8	q8, d16[1]
+@ CHECK: vdup.16	q8, d16[1]              @ encoding: [0x60,0x0c,0xf6,0xf3]
+	vdup.16	q8, d16[1]
+@ CHECK: vdup.32	q8, d16[1]              @ encoding: [0x60,0x0c,0xfc,0xf3]
+	vdup.32	q8, d16[1]
diff --git a/final/test/MC/ARM/neon-minmax-encoding.s b/final/test/MC/ARM/neon-minmax-encoding.s
new file mode 100644
index 00000000000..2d0d8c9b8ae
--- /dev/null
+++ b/final/test/MC/ARM/neon-minmax-encoding.s
@@ -0,0 +1,58 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vmin.s8	d16, d16, d17           @ encoding: [0xb1,0x06,0x40,0xf2]
+	vmin.s8	d16, d16, d17
+@ CHECK: vmin.s16	d16, d16, d17   @ encoding: [0xb1,0x06,0x50,0xf2]
+	vmin.s16	d16, d16, d17
+@ CHECK: vmin.s32	d16, d16, d17   @ encoding: [0xb1,0x06,0x60,0xf2]
+	vmin.s32	d16, d16, d17
+@ CHECK: vmin.u8	d16, d16, d17           @ encoding: [0xb1,0x06,0x40,0xf3]
+	vmin.u8	d16, d16, d17
+@ CHECK: vmin.u16	d16, d16, d17   @ encoding: [0xb1,0x06,0x50,0xf3]
+	vmin.u16	d16, d16, d17
+@ CHECK: vmin.u32	d16, d16, d17   @ encoding: [0xb1,0x06,0x60,0xf3]
+	vmin.u32	d16, d16, d17
+@ CHECK: vmin.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x60,0xf2]
+	vmin.f32	d16, d16, d17
+@ CHECK: vmin.s8	q8, q8, q9              @ encoding: [0xf2,0x06,0x40,0xf2]
+	vmin.s8	q8, q8, q9
+@ CHECK: vmin.s16	q8, q8, q9      @ encoding: [0xf2,0x06,0x50,0xf2]
+	vmin.s16	q8, q8, q9
+@ CHECK: vmin.s32	q8, q8, q9      @ encoding: [0xf2,0x06,0x60,0xf2]
+	vmin.s32	q8, q8, q9
+@ CHECK: vmin.u8	q8, q8, q9              @ encoding: [0xf2,0x06,0x40,0xf3]
+	vmin.u8	q8, q8, q9
+@ CHECK: vmin.u16	q8, q8, q9      @ encoding: [0xf2,0x06,0x50,0xf3]
+	vmin.u16	q8, q8, q9
+@ CHECK: vmin.u32	q8, q8, q9      @ encoding: [0xf2,0x06,0x60,0xf3]
+	vmin.u32	q8, q8, q9
+@ CHECK: vmin.f32	q8, q8, q9      @ encoding: [0xe2,0x0f,0x60,0xf2]
+	vmin.f32	q8, q8, q9
+@ CHECK: vmax.s8	d16, d16, d17           @ encoding: [0xa1,0x06,0x40,0xf2]
+	vmax.s8	d16, d16, d17
+@ CHECK: vmax.s16	d16, d16, d17   @ encoding: [0xa1,0x06,0x50,0xf2]
+	vmax.s16	d16, d16, d17
+@ CHECK: vmax.s32	d16, d16, d17   @ encoding: [0xa1,0x06,0x60,0xf2]
+	vmax.s32	d16, d16, d17
+@ CHECK: vmax.u8	d16, d16, d17           @ encoding: [0xa1,0x06,0x40,0xf3]
+	vmax.u8	d16, d16, d17
+@ CHECK: vmax.u16	d16, d16, d17   @ encoding: [0xa1,0x06,0x50,0xf3]
+	vmax.u16	d16, d16, d17
+@ CHECK: vmax.u32	d16, d16, d17   @ encoding: [0xa1,0x06,0x60,0xf3]
+	vmax.u32	d16, d16, d17
+@ CHECK: vmax.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x40,0xf2]
+	vmax.f32	d16, d16, d17
+@ CHECK: vmax.s8	q8, q8, q9              @ encoding: [0xe2,0x06,0x40,0xf2]
+	vmax.s8	q8, q8, q9
+@ CHECK: vmax.s16	q8, q8, q9      @ encoding: [0xe2,0x06,0x50,0xf2]
+	vmax.s16	q8, q8, q9
+@ CHECK: vmax.s32	q8, q8, q9      @ encoding: [0xe2,0x06,0x60,0xf2]
+	vmax.s32	q8, q8, q9
+@ CHECK: vmax.u8	q8, q8, q9              @ encoding: [0xe2,0x06,0x40,0xf3]
+	vmax.u8	q8, q8, q9
+@ CHECK: vmax.u16	q8, q8, q9      @ encoding: [0xe2,0x06,0x50,0xf3]
+	vmax.u16	q8, q8, q9
+@ CHECK: vmax.u32	q8, q8, q9      @ encoding: [0xe2,0x06,0x60,0xf3]
+	vmax.u32	q8, q8, q9
+@ CHECK: vmax.f32	q8, q8, q9      @ encoding: [0xe2,0x0f,0x40,0xf2]
+	vmax.f32	q8, q8, q9
diff --git a/final/test/MC/ARM/neon-mov-encoding.s b/final/test/MC/ARM/neon-mov-encoding.s
new file mode 100644
index 00000000000..ca678d022df
--- /dev/null
+++ b/final/test/MC/ARM/neon-mov-encoding.s
@@ -0,0 +1,117 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+@ CHECK: vmov.i8	d16, #0x8               @ encoding: [0x18,0x0e,0xc0,0xf2]
+	vmov.i8	d16, #0x8
+@ CHECK: vmov.i16	d16, #0x10      @ encoding: [0x10,0x08,0xc1,0xf2]
+	vmov.i16	d16, #0x10
+@ CHECK: vmov.i16	d16, #0x1000    @ encoding: [0x10,0x0a,0xc1,0xf2]
+	vmov.i16	d16, #0x1000
+@ CHECK: vmov.i32	d16, #0x20      @ encoding: [0x10,0x00,0xc2,0xf2]
+	vmov.i32	d16, #0x20
+@ CHECK: vmov.i32	d16, #0x2000    @ encoding: [0x10,0x02,0xc2,0xf2]
+	vmov.i32	d16, #0x2000
+@ CHECK: vmov.i32	d16, #0x200000  @ encoding: [0x10,0x04,0xc2,0xf2]
+	vmov.i32	d16, #0x200000
+@ CHECK: vmov.i32	d16, #0x20000000 @ encoding: [0x10,0x06,0xc2,0xf2]
+	vmov.i32	d16, #0x20000000
+@ CHECK: vmov.i32	d16, #0x20FF    @ encoding: [0x10,0x0c,0xc2,0xf2]
+	vmov.i32	d16, #0x20FF
+@ CHECK: vmov.i32	d16, #0x20FFFF  @ encoding: [0x10,0x0d,0xc2,0xf2]
+	vmov.i32	d16, #0x20FFFF
+@ CHECK: vmov.i64	d16, #0xFF0000FF0000FFFF @ encoding: [0x33,0x0e,0xc1,0xf3]
+	vmov.i64	d16, #0xFF0000FF0000FFFF
+@ CHECK: vmov.i8	q8, #0x8                @ encoding: [0x58,0x0e,0xc0,0xf2]
+	vmov.i8	q8, #0x8
+@ CHECK: vmov.i16	q8, #0x10       @ encoding: [0x50,0x08,0xc1,0xf2]
+	vmov.i16	q8, #0x10
+@ CHECK: vmov.i16	q8, #0x1000     @ encoding: [0x50,0x0a,0xc1,0xf2]
+	vmov.i16	q8, #0x1000
+@ CHECK: vmov.i32	q8, #0x20       @ encoding: [0x50,0x00,0xc2,0xf2]
+	vmov.i32	q8, #0x20
+@ CHECK: vmov.i32	q8, #0x2000     @ encoding: [0x50,0x02,0xc2,0xf2]
+	vmov.i32	q8, #0x2000
+@ CHECK: vmov.i32	q8, #0x200000   @ encoding: [0x50,0x04,0xc2,0xf2]
+	vmov.i32	q8, #0x200000
+@ CHECK: vmov.i32	q8, #0x20000000 @ encoding: [0x50,0x06,0xc2,0xf2]
+	vmov.i32	q8, #0x20000000
+@ CHECK: vmov.i32	q8, #0x20FF     @ encoding: [0x50,0x0c,0xc2,0xf2]
+	vmov.i32	q8, #0x20FF
+@ CHECK: vmov.i32	q8, #0x20FFFF   @ encoding: [0x50,0x0d,0xc2,0xf2]
+	vmov.i32	q8, #0x20FFFF
+@ CHECK: vmov.i64	q8, #0xFF0000FF0000FFFF @ encoding: [0x73,0x0e,0xc1,0xf3]
+	vmov.i64	q8, #0xFF0000FF0000FFFF
+@ CHECK: vmvn.i16	d16, #0x10      @ encoding: [0x30,0x08,0xc1,0xf2]
+	vmvn.i16	d16, #0x10
+@ CHECK: vmvn.i16	d16, #0x1000    @ encoding: [0x30,0x0a,0xc1,0xf2]
+	vmvn.i16	d16, #0x1000
+@ CHECK: vmvn.i32	d16, #0x20      @ encoding: [0x30,0x00,0xc2,0xf2]
+	vmvn.i32	d16, #0x20
+@ CHECK: vmvn.i32	d16, #0x2000    @ encoding: [0x30,0x02,0xc2,0xf2]
+	vmvn.i32	d16, #0x2000
+@ CHECK: vmvn.i32	d16, #0x200000  @ encoding: [0x30,0x04,0xc2,0xf2]
+	vmvn.i32	d16, #0x200000
+@ CHECK: vmvn.i32	d16, #0x20000000 @ encoding: [0x30,0x06,0xc2,0xf2]
+	vmvn.i32	d16, #0x20000000
+@ CHECK: vmvn.i32	d16, #0x20FF    @ encoding: [0x30,0x0c,0xc2,0xf2]
+	vmvn.i32	d16, #0x20FF
+@ CHECK: vmvn.i32	d16, #0x20FFFF  @ encoding: [0x30,0x0d,0xc2,0xf2]
+	vmvn.i32	d16, #0x20FFFF
+@ CHECK: vmovl.s8	q8, d16         @ encoding: [0x30,0x0a,0xc8,0xf2]
+	vmovl.s8	q8, d16
+@ CHECK: vmovl.s16	q8, d16         @ encoding: [0x30,0x0a,0xd0,0xf2]
+	vmovl.s16	q8, d16
+@ CHECK: vmovl.s32	q8, d16         @ encoding: [0x30,0x0a,0xe0,0xf2]
+	vmovl.s32	q8, d16
+@ CHECK: vmovl.u8	q8, d16         @ encoding: [0x30,0x0a,0xc8,0xf3]
+	vmovl.u8	q8, d16
+@ CHECK: vmovl.u16	q8, d16         @ encoding: [0x30,0x0a,0xd0,0xf3]
+	vmovl.u16	q8, d16
+@ CHECK: vmovl.u32	q8, d16         @ encoding: [0x30,0x0a,0xe0,0xf3]
+	vmovl.u32	q8, d16
+@ CHECK: vmovn.i16	d16, q8         @ encoding: [0x20,0x02,0xf2,0xf3]
+	vmovn.i16	d16, q8
+@ CHECK: vmovn.i32	d16, q8         @ encoding: [0x20,0x02,0xf6,0xf3]
+	vmovn.i32	d16, q8
+@ CHECK: vmovn.i64	d16, q8         @ encoding: [0x20,0x02,0xfa,0xf3]
+	vmovn.i64	d16, q8
+@ CHECK: vqmovn.s16	d16, q8         @ encoding: [0xa0,0x02,0xf2,0xf3]
+	vqmovn.s16	d16, q8
+@ CHECK: vqmovn.s32	d16, q8         @ encoding: [0xa0,0x02,0xf6,0xf3]
+	vqmovn.s32	d16, q8
+@ CHECK: vqmovn.s64	d16, q8         @ encoding: [0xa0,0x02,0xfa,0xf3]
+	vqmovn.s64	d16, q8
+@ CHECK: vqmovn.u16	d16, q8         @ encoding: [0xe0,0x02,0xf2,0xf3]
+	vqmovn.u16	d16, q8
+@ CHECK: vqmovn.u32	d16, q8         @ encoding: [0xe0,0x02,0xf6,0xf3]
+	vqmovn.u32	d16, q8
+@ CHECK: vqmovn.u64	d16, q8         @ encoding: [0xe0,0x02,0xfa,0xf3]
+	vqmovn.u64	d16, q8
+@ CHECK: vqmovun.s16	d16, q8         @ encoding: [0x60,0x02,0xf2,0xf3]
+	vqmovun.s16	d16, q8
+@ CHECK: vqmovun.s32	d16, q8         @ encoding: [0x60,0x02,0xf6,0xf3]
+	vqmovun.s32	d16, q8
+@ CHECK: vqmovun.s64	d16, q8         @ encoding: [0x60,0x02,0xfa,0xf3]
+	vqmovun.s64	d16, q8
+@ CHECK: vmov.s8	r0, d16[1]              @ encoding: [0xb0,0x0b,0x50,0xee]
+	vmov.s8	r0, d16[1]
+@ CHECK: vmov.s16	r0, d16[1]      @ encoding: [0xf0,0x0b,0x10,0xee]
+	vmov.s16	r0, d16[1]
+@ CHECK: vmov.u8	r0, d16[1]              @ encoding: [0xb0,0x0b,0xd0,0xee]
+	vmov.u8	r0, d16[1]
+@ CHECK: vmov.u16	r0, d16[1]      @ encoding: [0xf0,0x0b,0x90,0xee]
+	vmov.u16	r0, d16[1]
+@ CHECK: vmov.32	r0, d16[1]              @ encoding: [0x90,0x0b,0x30,0xee]
+	vmov.32	r0, d16[1]
+@ CHECK: vmov.8	d16[1], r1              @ encoding: [0xb0,0x1b,0x40,0xee]
+	vmov.8	d16[1], r1
+@ CHECK: vmov.16	d16[1], r1              @ encoding: [0xf0,0x1b,0x00,0xee]
+	vmov.16	d16[1], r1
+@ CHECK: vmov.32	d16[1], r1              @ encoding: [0x90,0x1b,0x20,0xee]
+	vmov.32	d16[1], r1
+@ CHECK: vmov.8	d18[1], r1              @ encoding: [0xb0,0x1b,0x42,0xee]
+	vmov.8	d18[1], r1
+@ CHECK: vmov.16	d18[1], r1              @ encoding: [0xf0,0x1b,0x02,0xee]
+	vmov.16	d18[1], r1
+@ CHECK: vmov.32	d18[1], r1              @ encoding: [0x90,0x1b,0x22,0xee]
+	vmov.32	d18[1], r1
diff --git a/final/test/MC/ARM/neon-mul-accum-encoding.s b/final/test/MC/ARM/neon-mul-accum-encoding.s
new file mode 100644
index 00000000000..e269dea360f
--- /dev/null
+++ b/final/test/MC/ARM/neon-mul-accum-encoding.s
@@ -0,0 +1,67 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+@ CHECK: vmla.i8	d16, d18, d17           @ encoding: [0xa1,0x09,0x42,0xf2]
+	vmla.i8	d16, d18, d17
+@ CHECK: vmla.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf2]
+	vmla.i16	d16, d18, d17
+@ CHECK: vmla.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf2]
+	vmla.i32	d16, d18, d17
+@ CHECK: vmla.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x42,0xf2]
+	vmla.f32	d16, d18, d17
+@ CHECK: vmla.i8	q9, q8, q10             @ encoding: [0xe4,0x29,0x40,0xf2]
+	vmla.i8	q9, q8, q10
+@ CHECK: vmla.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xf2]
+	vmla.i16	q9, q8, q10
+@ CHECK: vmla.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xf2]
+	vmla.i32	q9, q8, q10
+@ CHECK: vmla.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x40,0xf2]
+	vmla.f32	q9, q8, q10
+@ CHECK: vmlal.s8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xf2]
+	vmlal.s8	q8, d19, d18
+@ CHECK: vmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xf2]
+	vmlal.s16	q8, d19, d18
+@ CHECK: vmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xf2]
+	vmlal.s32	q8, d19, d18
+@ CHECK: vmlal.u8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xf3]
+	vmlal.u8	q8, d19, d18
+@ CHECK: vmlal.u16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xf3]
+	vmlal.u16	q8, d19, d18
+@ CHECK: vmlal.u32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xf3]
+	vmlal.u32	q8, d19, d18
+@ CHECK: vqdmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x09,0xd3,0xf2]
+	vqdmlal.s16	q8, d19, d18
+@ CHECK: vqdmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x09,0xe3,0xf2]
+	vqdmlal.s32	q8, d19, d18
+@ CHECK: vmls.i8	d16, d18, d17           @ encoding: [0xa1,0x09,0x42,0xf3]
+	vmls.i8	d16, d18, d17
+@ CHECK: vmls.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xf3]
+	vmls.i16	d16, d18, d17
+@ CHECK: vmls.i32	d16, d18, d17   @ encoding: [0xa1,0x09,0x62,0xf3]
+	vmls.i32	d16, d18, d17
+@ CHECK: vmls.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x62,0xf2]
+	vmls.f32	d16, d18, d17
+@ CHECK: vmls.i8	q9, q8, q10             @ encoding: [0xe4,0x29,0x40,0xf3]
+	vmls.i8	q9, q8, q10
+@ CHECK: vmls.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xf3]
+	vmls.i16	q9, q8, q10
+@ CHECK: vmls.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xf3]
+	vmls.i32	q9, q8, q10
+@ CHECK: vmls.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x60,0xf2]
+	vmls.f32	q9, q8, q10
+@ CHECK: vmlsl.s8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xf2]
+	vmlsl.s8	q8, d19, d18
+@ CHECK: vmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xf2]
+	vmlsl.s16	q8, d19, d18
+@ CHECK: vmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xf2]
+	vmlsl.s32	q8, d19, d18
+@ CHECK: vmlsl.u8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xf3]
+	vmlsl.u8	q8, d19, d18
+@ CHECK: vmlsl.u16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xf3]
+	vmlsl.u16	q8, d19, d18
+@ CHECK: vmlsl.u32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xf3]
+	vmlsl.u32	q8, d19, d18
+@ CHECK: vqdmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0b,0xd3,0xf2]
+	vqdmlsl.s16	q8, d19, d18
+@ CHECK: vqdmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0b,0xe3,0xf2]
+	vqdmlsl.s32	q8, d19, d18
diff --git a/final/test/MC/ARM/neon-mul-encoding.s b/final/test/MC/ARM/neon-mul-encoding.s
new file mode 100644
index 00000000000..4ff192f6e55
--- /dev/null
+++ b/final/test/MC/ARM/neon-mul-encoding.s
@@ -0,0 +1,56 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vmul.i8	d16, d16, d17           @ encoding: [0xb1,0x09,0x40,0xf2]
+	vmul.i8	d16, d16, d17
+@ CHECK: vmul.i16	d16, d16, d17   @ encoding: [0xb1,0x09,0x50,0xf2]
+	vmul.i16	d16, d16, d17
+@ CHECK: vmul.i32	d16, d16, d17   @ encoding: [0xb1,0x09,0x60,0xf2]
+	vmul.i32	d16, d16, d17
+@ CHECK: vmul.f32	d16, d16, d17   @ encoding: [0xb1,0x0d,0x40,0xf3]
+	vmul.f32	d16, d16, d17
+@ CHECK: vmul.i8	q8, q8, q9              @ encoding: [0xf2,0x09,0x40,0xf2]
+	vmul.i8	q8, q8, q9
+@ CHECK: vmul.i16	q8, q8, q9      @ encoding: [0xf2,0x09,0x50,0xf2]
+	vmul.i16	q8, q8, q9
+@ CHECK: vmul.i32	q8, q8, q9      @ encoding: [0xf2,0x09,0x60,0xf2]
+	vmul.i32	q8, q8, q9
+@ CHECK: vmul.f32	q8, q8, q9      @ encoding: [0xf2,0x0d,0x40,0xf3]
+	vmul.f32	q8, q8, q9
+@ CHECK: vmul.p8	d16, d16, d17           @ encoding: [0xb1,0x09,0x40,0xf3]
+	vmul.p8	d16, d16, d17
+@ CHECK: vmul.p8	q8, q8, q9              @ encoding: [0xf2,0x09,0x40,0xf3]
+	vmul.p8	q8, q8, q9
+@ CHECK: vqdmulh.s16	d16, d16, d17   @ encoding: [0xa1,0x0b,0x50,0xf2]
+	vqdmulh.s16	d16, d16, d17
+@ CHECK: vqdmulh.s32	d16, d16, d17   @ encoding: [0xa1,0x0b,0x60,0xf2]
+	vqdmulh.s32	d16, d16, d17
+@ CHECK: vqdmulh.s16	q8, q8, q9      @ encoding: [0xe2,0x0b,0x50,0xf2]
+	vqdmulh.s16	q8, q8, q9
+@ CHECK: vqdmulh.s32	q8, q8, q9      @ encoding: [0xe2,0x0b,0x60,0xf2]
+	vqdmulh.s32	q8, q8, q9
+@ CHECK: vqrdmulh.s16	d16, d16, d17   @ encoding: [0xa1,0x0b,0x50,0xf3]
+	vqrdmulh.s16	d16, d16, d17
+@ CHECK: vqrdmulh.s32	d16, d16, d17   @ encoding: [0xa1,0x0b,0x60,0xf3]
+	vqrdmulh.s32	d16, d16, d17
+@ CHECK: vqrdmulh.s16	q8, q8, q9      @ encoding: [0xe2,0x0b,0x50,0xf3]
+	vqrdmulh.s16	q8, q8, q9
+@ CHECK: vqrdmulh.s32	q8, q8, q9      @ encoding: [0xe2,0x0b,0x60,0xf3]
+	vqrdmulh.s32	q8, q8, q9
+@ CHECK: vmull.s8	q8, d16, d17    @ encoding: [0xa1,0x0c,0xc0,0xf2]
+	vmull.s8	q8, d16, d17
+@ CHECK: vmull.s16	q8, d16, d17    @ encoding: [0xa1,0x0c,0xd0,0xf2]
+	vmull.s16	q8, d16, d17
+@ CHECK: vmull.s32	q8, d16, d17    @ encoding: [0xa1,0x0c,0xe0,0xf2]
+	vmull.s32	q8, d16, d17
+@ CHECK: vmull.u8	q8, d16, d17    @ encoding: [0xa1,0x0c,0xc0,0xf3]
+	vmull.u8	q8, d16, d17
+@ CHECK: vmull.u16	q8, d16, d17    @ encoding: [0xa1,0x0c,0xd0,0xf3]
+	vmull.u16	q8, d16, d17
+@ CHECK: vmull.u32	q8, d16, d17    @ encoding: [0xa1,0x0c,0xe0,0xf3]
+	vmull.u32	q8, d16, d17
+@ CHECK: vmull.p8	q8, d16, d17    @ encoding: [0xa1,0x0e,0xc0,0xf2]
+	vmull.p8	q8, d16, d17
+@ CHECK: vqdmull.s16	q8, d16, d17    @ encoding: [0xa1,0x0d,0xd0,0xf2]
+	vqdmull.s16	q8, d16, d17
+@ CHECK: vqdmull.s32	q8, d16, d17    @ encoding: [0xa1,0x0d,0xe0,0xf2]
+	vqdmull.s32	q8, d16, d17
diff --git a/final/test/MC/ARM/neon-neg-encoding.s b/final/test/MC/ARM/neon-neg-encoding.s
new file mode 100644
index 00000000000..014bdb0a86d
--- /dev/null
+++ b/final/test/MC/ARM/neon-neg-encoding.s
@@ -0,0 +1,30 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vneg.s8	d16, d16                @ encoding: [0xa0,0x03,0xf1,0xf3]
+	vneg.s8	d16, d16
+@ CHECK: vneg.s16	d16, d16        @ encoding: [0xa0,0x03,0xf5,0xf3]
+	vneg.s16	d16, d16
+@ CHECK: vneg.s32	d16, d16        @ encoding: [0xa0,0x03,0xf9,0xf3]
+	vneg.s32	d16, d16
+@ CHECK: vneg.f32	d16, d16        @ encoding: [0xa0,0x07,0xf9,0xf3]
+	vneg.f32	d16, d16
+@ CHECK: vneg.s8	q8, q8                  @ encoding: [0xe0,0x03,0xf1,0xf3]
+	vneg.s8	q8, q8
+@ CHECK: vneg.s16	q8, q8          @ encoding: [0xe0,0x03,0xf5,0xf3]
+	vneg.s16	q8, q8
+@ CHECK: vneg.s32	q8, q8          @ encoding: [0xe0,0x03,0xf9,0xf3]
+	vneg.s32	q8, q8
+@ CHECK: vneg.f32	q8, q8          @ encoding: [0xe0,0x07,0xf9,0xf3]
+	vneg.f32	q8, q8
+@ CHECK: vqneg.s8	d16, d16        @ encoding: [0xa0,0x07,0xf0,0xf3]
+	vqneg.s8	d16, d16
+@ CHECK: vqneg.s16	d16, d16        @ encoding: [0xa0,0x07,0xf4,0xf3]
+	vqneg.s16	d16, d16
+@ CHECK: vqneg.s32	d16, d16        @ encoding: [0xa0,0x07,0xf8,0xf3]
+	vqneg.s32	d16, d16
+@ CHECK: vqneg.s8	q8, q8          @ encoding: [0xe0,0x07,0xf0,0xf3]
+	vqneg.s8	q8, q8
+@ CHECK: vqneg.s16	q8, q8          @ encoding: [0xe0,0x07,0xf4,0xf3]
+	vqneg.s16	q8, q8
+@ CHECK: vqneg.s32	q8, q8          @ encoding: [0xe0,0x07,0xf8,0xf3]
+	vqneg.s32	q8, q8
diff --git a/final/test/MC/ARM/neon-pairwise-encoding.s b/final/test/MC/ARM/neon-pairwise-encoding.s
new file mode 100644
index 00000000000..65c47bd64ae
--- /dev/null
+++ b/final/test/MC/ARM/neon-pairwise-encoding.s
@@ -0,0 +1,86 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vpadd.i8	d16, d17, d16   @ encoding: [0xb0,0x0b,0x41,0xf2]
+	vpadd.i8	d16, d17, d16
+@ CHECK: vpadd.i16	d16, d17, d16   @ encoding: [0xb0,0x0b,0x51,0xf2]
+	vpadd.i16	d16, d17, d16
+@ CHECK: vpadd.i32	d16, d17, d16   @ encoding: [0xb0,0x0b,0x61,0xf2]
+	vpadd.i32	d16, d17, d16
+@ CHECK: vpadd.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x40,0xf3]
+	vpadd.f32	d16, d16, d17
+@ CHECK: vpaddl.s8	d16, d16        @ encoding: [0x20,0x02,0xf0,0xf3]
+	vpaddl.s8	d16, d16
+@ CHECK: vpaddl.s16	d16, d16        @ encoding: [0x20,0x02,0xf4,0xf3]
+	vpaddl.s16	d16, d16
+@ CHECK: vpaddl.s32	d16, d16        @ encoding: [0x20,0x02,0xf8,0xf3]
+	vpaddl.s32	d16, d16
+@ CHECK: vpaddl.u8	d16, d16        @ encoding: [0xa0,0x02,0xf0,0xf3]
+	vpaddl.u8	d16, d16
+@ CHECK: vpaddl.u16	d16, d16        @ encoding: [0xa0,0x02,0xf4,0xf3]
+	vpaddl.u16	d16, d16
+@ CHECK: vpaddl.u32	d16, d16        @ encoding: [0xa0,0x02,0xf8,0xf3]
+	vpaddl.u32	d16, d16
+@ CHECK: vpaddl.s8	q8, q8          @ encoding: [0x60,0x02,0xf0,0xf3]
+	vpaddl.s8	q8, q8
+@ CHECK: vpaddl.s16	q8, q8          @ encoding: [0x60,0x02,0xf4,0xf3]
+	vpaddl.s16	q8, q8
+@ CHECK: vpaddl.s32	q8, q8          @ encoding: [0x60,0x02,0xf8,0xf3]
+	vpaddl.s32	q8, q8
+@ CHECK: vpaddl.u8	q8, q8          @ encoding: [0xe0,0x02,0xf0,0xf3]
+	vpaddl.u8	q8, q8
+@ CHECK: vpaddl.u16	q8, q8          @ encoding: [0xe0,0x02,0xf4,0xf3]
+	vpaddl.u16	q8, q8
+@ CHECK: vpaddl.u32	q8, q8          @ encoding: [0xe0,0x02,0xf8,0xf3]
+	vpaddl.u32	q8, q8
+@ CHECK: vpadal.s8	d16, d17        @ encoding: [0x21,0x06,0xf0,0xf3]
+	vpadal.s8	d16, d17
+@ CHECK: vpadal.s16	d16, d17        @ encoding: [0x21,0x06,0xf4,0xf3]
+	vpadal.s16	d16, d17
+@ CHECK: vpadal.s32	d16, d17        @ encoding: [0x21,0x06,0xf8,0xf3]
+	vpadal.s32	d16, d17
+@ CHECK: vpadal.u8	d16, d17        @ encoding: [0xa1,0x06,0xf0,0xf3]
+	vpadal.u8	d16, d17
+@ CHECK: vpadal.u16	d16, d17        @ encoding: [0xa1,0x06,0xf4,0xf3]
+	vpadal.u16	d16, d17
+@ CHECK: vpadal.u32	d16, d17        @ encoding: [0xa1,0x06,0xf8,0xf3]
+	vpadal.u32	d16, d17
+@ CHECK: vpadal.s8	q9, q8          @ encoding: [0x60,0x26,0xf0,0xf3]
+	vpadal.s8	q9, q8
+@ CHECK: vpadal.s16	q9, q8          @ encoding: [0x60,0x26,0xf4,0xf3]
+	vpadal.s16	q9, q8
+@ CHECK: vpadal.s32	q9, q8          @ encoding: [0x60,0x26,0xf8,0xf3]
+	vpadal.s32	q9, q8
+@ CHECK: vpadal.u8	q9, q8          @ encoding: [0xe0,0x26,0xf0,0xf3]
+	vpadal.u8	q9, q8
+@ CHECK: vpadal.u16	q9, q8          @ encoding: [0xe0,0x26,0xf4,0xf3]
+	vpadal.u16	q9, q8
+@ CHECK: vpadal.u32	q9, q8          @ encoding: [0xe0,0x26,0xf8,0xf3]
+	vpadal.u32	q9, q8
+@ CHECK: vpmin.s8	d16, d16, d17   @ encoding: [0xb1,0x0a,0x40,0xf2]
+	vpmin.s8	d16, d16, d17
+@ CHECK: vpmin.s16	d16, d16, d17   @ encoding: [0xb1,0x0a,0x50,0xf2]
+	vpmin.s16	d16, d16, d17
+@ CHECK: vpmin.s32	d16, d16, d17   @ encoding: [0xb1,0x0a,0x60,0xf2]
+	vpmin.s32	d16, d16, d17
+@ CHECK: vpmin.u8	d16, d16, d17   @ encoding: [0xb1,0x0a,0x40,0xf3]
+	vpmin.u8	d16, d16, d17
+@ CHECK: vpmin.u16	d16, d16, d17   @ encoding: [0xb1,0x0a,0x50,0xf3]
+	vpmin.u16	d16, d16, d17
+@ CHECK: vpmin.u32	d16, d16, d17   @ encoding: [0xb1,0x0a,0x60,0xf3]
+	vpmin.u32	d16, d16, d17
+@ CHECK: vpmin.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x60,0xf3]
+	vpmin.f32	d16, d16, d17
+@ CHECK: vpmax.s8	d16, d16, d17   @ encoding: [0xa1,0x0a,0x40,0xf2]
+	vpmax.s8	d16, d16, d17
+@ CHECK: vpmax.s16	d16, d16, d17   @ encoding: [0xa1,0x0a,0x50,0xf2]
+	vpmax.s16	d16, d16, d17
+@ CHECK: vpmax.s32	d16, d16, d17   @ encoding: [0xa1,0x0a,0x60,0xf2]
+	vpmax.s32	d16, d16, d17
+@ CHECK: vpmax.u8	d16, d16, d17   @ encoding: [0xa1,0x0a,0x40,0xf3]
+	vpmax.u8	d16, d16, d17
+@ CHECK: vpmax.u16	d16, d16, d17   @ encoding: [0xa1,0x0a,0x50,0xf3]
+	vpmax.u16	d16, d16, d17
+@ CHECK: vpmax.u32	d16, d16, d17   @ encoding: [0xa1,0x0a,0x60,0xf3]
+	vpmax.u32	d16, d16, d17
+@ CHECK: vpmax.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x40,0xf3]
+	vpmax.f32	d16, d16, d17
diff --git a/final/test/MC/ARM/neon-reciprocal-encoding.s b/final/test/MC/ARM/neon-reciprocal-encoding.s
new file mode 100644
index 00000000000..e12a4730876
--- /dev/null
+++ b/final/test/MC/ARM/neon-reciprocal-encoding.s
@@ -0,0 +1,26 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vrecpe.u32	d16, d16        @ encoding: [0x20,0x04,0xfb,0xf3]
+	vrecpe.u32	d16, d16
+@ CHECK: vrecpe.u32	q8, q8          @ encoding: [0x60,0x04,0xfb,0xf3]
+	vrecpe.u32	q8, q8
+@ CHECK: vrecpe.f32	d16, d16        @ encoding: [0x20,0x05,0xfb,0xf3]
+	vrecpe.f32	d16, d16
+@ CHECK: vrecpe.f32	q8, q8          @ encoding: [0x60,0x05,0xfb,0xf3]
+	vrecpe.f32	q8, q8
+@ CHECK: vrecps.f32	d16, d16, d17   @ encoding: [0xb1,0x0f,0x40,0xf2]
+	vrecps.f32	d16, d16, d17
+@ CHECK: vrecps.f32	q8, q8, q9      @ encoding: [0xf2,0x0f,0x40,0xf2]
+	vrecps.f32	q8, q8, q9
+@ CHECK: vrsqrte.u32	d16, d16        @ encoding: [0xa0,0x04,0xfb,0xf3]
+	vrsqrte.u32	d16, d16
+@ CHECK: vrsqrte.u32	q8, q8          @ encoding: [0xe0,0x04,0xfb,0xf3]
+	vrsqrte.u32	q8, q8
+@ CHECK: vrsqrte.f32	d16, d16        @ encoding: [0xa0,0x05,0xfb,0xf3]
+	vrsqrte.f32	d16, d16
+@ CHECK: vrsqrte.f32	q8, q8          @ encoding: [0xe0,0x05,0xfb,0xf3]
+	vrsqrte.f32	q8, q8
+@ CHECK: vrsqrts.f32	d16, d16, d17   @ encoding: [0xb1,0x0f,0x60,0xf2]
+	vrsqrts.f32	d16, d16, d17
+@ CHECK: vrsqrts.f32	q8, q8, q9      @ encoding: [0xf2,0x0f,0x60,0xf2]
+	vrsqrts.f32	q8, q8, q9
diff --git a/final/test/MC/ARM/neon-reverse-encoding.s b/final/test/MC/ARM/neon-reverse-encoding.s
new file mode 100644
index 00000000000..e33b9f3f237
--- /dev/null
+++ b/final/test/MC/ARM/neon-reverse-encoding.s
@@ -0,0 +1,26 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vrev64.8	d16, d16        @ encoding: [0x20,0x00,0xf0,0xf3]
+	vrev64.8	d16, d16
+@ CHECK: vrev64.16	d16, d16        @ encoding: [0x20,0x00,0xf4,0xf3]
+	vrev64.16	d16, d16
+@ CHECK: vrev64.32	d16, d16        @ encoding: [0x20,0x00,0xf8,0xf3]
+	vrev64.32	d16, d16
+@ CHECK: vrev64.8	q8, q8          @ encoding: [0x60,0x00,0xf0,0xf3]
+	vrev64.8	q8, q8
+@ CHECK: vrev64.16	q8, q8          @ encoding: [0x60,0x00,0xf4,0xf3]
+	vrev64.16	q8, q8
+@ CHECK: vrev64.32	q8, q8          @ encoding: [0x60,0x00,0xf8,0xf3]
+	vrev64.32	q8, q8
+@ CHECK: vrev32.8	d16, d16        @ encoding: [0xa0,0x00,0xf0,0xf3]
+	vrev32.8	d16, d16
+@ CHECK: vrev32.16	d16, d16        @ encoding: [0xa0,0x00,0xf4,0xf3]
+	vrev32.16	d16, d16
+@ CHECK: vrev32.8	q8, q8          @ encoding: [0xe0,0x00,0xf0,0xf3]
+	vrev32.8	q8, q8
+@ CHECK: vrev32.16	q8, q8          @ encoding: [0xe0,0x00,0xf4,0xf3]
+	vrev32.16	q8, q8
+@ CHECK: vrev16.8	d16, d16        @ encoding: [0x20,0x01,0xf0,0xf3]
+	vrev16.8	d16, d16
+@ CHECK: vrev16.8	q8, q8          @ encoding: [0x60,0x01,0xf0,0xf3]
+	vrev16.8	q8, q8
diff --git a/final/test/MC/ARM/neon-satshift-encoding.s b/final/test/MC/ARM/neon-satshift-encoding.s
new file mode 100644
index 00000000000..506f48a6e3c
--- /dev/null
+++ b/final/test/MC/ARM/neon-satshift-encoding.s
@@ -0,0 +1,150 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vqshl.s8	d16, d16, d17   @ encoding: [0xb0,0x04,0x41,0xf2]
+	vqshl.s8	d16, d16, d17
+@ CHECK: vqshl.s16	d16, d16, d17   @ encoding: [0xb0,0x04,0x51,0xf2]
+	vqshl.s16	d16, d16, d17
+@ CHECK: vqshl.s32	d16, d16, d17   @ encoding: [0xb0,0x04,0x61,0xf2]
+	vqshl.s32	d16, d16, d17
+@ CHECK: vqshl.s64	d16, d16, d17   @ encoding: [0xb0,0x04,0x71,0xf2]
+	vqshl.s64	d16, d16, d17
+@ CHECK: vqshl.u8	d16, d16, d17   @ encoding: [0xb0,0x04,0x41,0xf3]
+	vqshl.u8	d16, d16, d17
+@ CHECK: vqshl.u16	d16, d16, d17   @ encoding: [0xb0,0x04,0x51,0xf3]
+	vqshl.u16	d16, d16, d17
+@ CHECK: vqshl.u32	d16, d16, d17   @ encoding: [0xb0,0x04,0x61,0xf3]
+	vqshl.u32	d16, d16, d17
+@ CHECK: vqshl.u64	d16, d16, d17   @ encoding: [0xb0,0x04,0x71,0xf3]
+	vqshl.u64	d16, d16, d17
+@ CHECK: vqshl.s8	q8, q8, q9      @ encoding: [0xf0,0x04,0x42,0xf2]
+	vqshl.s8	q8, q8, q9
+@ CHECK: vqshl.s16	q8, q8, q9      @ encoding: [0xf0,0x04,0x52,0xf2]
+	vqshl.s16	q8, q8, q9
+@ CHECK: vqshl.s32	q8, q8, q9      @ encoding: [0xf0,0x04,0x62,0xf2]
+	vqshl.s32	q8, q8, q9
+@ CHECK: vqshl.s64	q8, q8, q9      @ encoding: [0xf0,0x04,0x72,0xf2]
+	vqshl.s64	q8, q8, q9
+@ CHECK: vqshl.u8	q8, q8, q9      @ encoding: [0xf0,0x04,0x42,0xf3]
+	vqshl.u8	q8, q8, q9
+@ CHECK: vqshl.u16	q8, q8, q9      @ encoding: [0xf0,0x04,0x52,0xf3]
+	vqshl.u16	q8, q8, q9
+@ CHECK: vqshl.u32	q8, q8, q9      @ encoding: [0xf0,0x04,0x62,0xf3]
+	vqshl.u32	q8, q8, q9
+@ CHECK: vqshl.u64	q8, q8, q9      @ encoding: [0xf0,0x04,0x72,0xf3]
+	vqshl.u64	q8, q8, q9
+@ CHECK: vqshl.s8	d16, d16, #7    @ encoding: [0x30,0x07,0xcf,0xf2]
+	vqshl.s8	d16, d16, #7
+@ CHECK: vqshl.s16	d16, d16, #15   @ encoding: [0x30,0x07,0xdf,0xf2]
+	vqshl.s16	d16, d16, #15
+@ CHECK: vqshl.s32	d16, d16, #31   @ encoding: [0x30,0x07,0xff,0xf2]
+	vqshl.s32	d16, d16, #31
+@ CHECK: vqshl.s64	d16, d16, #63   @ encoding: [0xb0,0x07,0xff,0xf2]
+	vqshl.s64	d16, d16, #63
+@ CHECK: vqshl.u8	d16, d16, #7    @ encoding: [0x30,0x07,0xcf,0xf3]
+	vqshl.u8	d16, d16, #7
+@ CHECK: vqshl.u16	d16, d16, #15   @ encoding: [0x30,0x07,0xdf,0xf3]
+	vqshl.u16	d16, d16, #15
+@ CHECK: vqshl.u32	d16, d16, #31   @ encoding: [0x30,0x07,0xff,0xf3]
+	vqshl.u32	d16, d16, #31
+@ CHECK: vqshl.u64	d16, d16, #63   @ encoding: [0xb0,0x07,0xff,0xf3]
+	vqshl.u64	d16, d16, #63
+@ CHECK: vqshlu.s8	d16, d16, #7    @ encoding: [0x30,0x06,0xcf,0xf3]
+	vqshlu.s8	d16, d16, #7
+@ CHECK: vqshlu.s16	d16, d16, #15   @ encoding: [0x30,0x06,0xdf,0xf3]
+	vqshlu.s16	d16, d16, #15
+@ CHECK: vqshlu.s32	d16, d16, #31   @ encoding: [0x30,0x06,0xff,0xf3]
+	vqshlu.s32	d16, d16, #31
+@ CHECK: vqshlu.s64	d16, d16, #63   @ encoding: [0xb0,0x06,0xff,0xf3]
+	vqshlu.s64	d16, d16, #63
+@ CHECK: vqshl.s8	q8, q8, #7      @ encoding: [0x70,0x07,0xcf,0xf2]
+	vqshl.s8	q8, q8, #7
+@ CHECK: vqshl.s16	q8, q8, #15     @ encoding: [0x70,0x07,0xdf,0xf2]
+	vqshl.s16	q8, q8, #15
+@ CHECK: vqshl.s32	q8, q8, #31     @ encoding: [0x70,0x07,0xff,0xf2]
+	vqshl.s32	q8, q8, #31
+@ CHECK: vqshl.s64	q8, q8, #63     @ encoding: [0xf0,0x07,0xff,0xf2]
+	vqshl.s64	q8, q8, #63
+@ CHECK: vqshl.u8	q8, q8, #7      @ encoding: [0x70,0x07,0xcf,0xf3]
+	vqshl.u8	q8, q8, #7
+@ CHECK: vqshl.u16	q8, q8, #15     @ encoding: [0x70,0x07,0xdf,0xf3]
+	vqshl.u16	q8, q8, #15
+@ CHECK: vqshl.u32	q8, q8, #31     @ encoding: [0x70,0x07,0xff,0xf3]
+	vqshl.u32	q8, q8, #31
+@ CHECK: vqshl.u64	q8, q8, #63     @ encoding: [0xf0,0x07,0xff,0xf3]
+	vqshl.u64	q8, q8, #63
+@ CHECK: vqshlu.s8	q8, q8, #7      @ encoding: [0x70,0x06,0xcf,0xf3]
+	vqshlu.s8	q8, q8, #7
+@ CHECK: vqshlu.s16	q8, q8, #15     @ encoding: [0x70,0x06,0xdf,0xf3]
+	vqshlu.s16	q8, q8, #15
+@ CHECK: vqshlu.s32	q8, q8, #31     @ encoding: [0x70,0x06,0xff,0xf3]
+	vqshlu.s32	q8, q8, #31
+@ CHECK: vqshlu.s64	q8, q8, #63     @ encoding: [0xf0,0x06,0xff,0xf3]
+	vqshlu.s64	q8, q8, #63
+@ CHECK:   vqrshl.s8	d16, d16, d17   @ encoding: [0xb0,0x05,0x41,0xf2]
+	vqrshl.s8	d16, d16, d17
+@ CHECK: vqrshl.s16	d16, d16, d17   @ encoding: [0xb0,0x05,0x51,0xf2]
+	vqrshl.s16	d16, d16, d17
+@ CHECK: vqrshl.s32	d16, d16, d17   @ encoding: [0xb0,0x05,0x61,0xf2]
+	vqrshl.s32	d16, d16, d17
+@ CHECK: vqrshl.s64	d16, d16, d17   @ encoding: [0xb0,0x05,0x71,0xf2]
+	vqrshl.s64	d16, d16, d17
+@ CHECK: vqrshl.u8	d16, d16, d17   @ encoding: [0xb0,0x05,0x41,0xf3]
+	vqrshl.u8	d16, d16, d17
+@ CHECK: vqrshl.u16	d16, d16, d17   @ encoding: [0xb0,0x05,0x51,0xf3]
+	vqrshl.u16	d16, d16, d17
+@ CHECK: vqrshl.u32	d16, d16, d17   @ encoding: [0xb0,0x05,0x61,0xf3]
+	vqrshl.u32	d16, d16, d17
+@ CHECK: vqrshl.u64	d16, d16, d17   @ encoding: [0xb0,0x05,0x71,0xf3]
+	vqrshl.u64	d16, d16, d17
+@ CHECK: vqrshl.s8	q8, q8, q9      @ encoding: [0xf0,0x05,0x42,0xf2]
+	vqrshl.s8	q8, q8, q9
+@ CHECK: vqrshl.s16	q8, q8, q9      @ encoding: [0xf0,0x05,0x52,0xf2]
+	vqrshl.s16	q8, q8, q9
+@ CHECK: vqrshl.s32	q8, q8, q9      @ encoding: [0xf0,0x05,0x62,0xf2]
+	vqrshl.s32	q8, q8, q9
+@ CHECK: vqrshl.s64	q8, q8, q9      @ encoding: [0xf0,0x05,0x72,0xf2]
+	vqrshl.s64	q8, q8, q9
+@ CHECK: vqrshl.u8	q8, q8, q9      @ encoding: [0xf0,0x05,0x42,0xf3]
+	vqrshl.u8	q8, q8, q9
+@ CHECK: vqrshl.u16	q8, q8, q9      @ encoding: [0xf0,0x05,0x52,0xf3]
+	vqrshl.u16	q8, q8, q9
+@ CHECK: vqrshl.u32	q8, q8, q9      @ encoding: [0xf0,0x05,0x62,0xf3]
+	vqrshl.u32	q8, q8, q9
+@ CHECK: vqrshl.u64	q8, q8, q9      @ encoding: [0xf0,0x05,0x72,0xf3]
+	vqrshl.u64	q8, q8, q9
+@ CHECK: vqshrn.s16	d16, q8, #8     @ encoding: [0x30,0x09,0xc8,0xf2]
+	vqshrn.s16	d16, q8, #8
+@ CHECK: vqshrn.s32	d16, q8, #16    @ encoding: [0x30,0x09,0xd0,0xf2]
+	vqshrn.s32	d16, q8, #16
+@ CHECK: vqshrn.s64	d16, q8, #32    @ encoding: [0x30,0x09,0xe0,0xf2]
+	vqshrn.s64	d16, q8, #32
+@ CHECK: vqshrn.u16	d16, q8, #8     @ encoding: [0x30,0x09,0xc8,0xf3]
+	vqshrn.u16	d16, q8, #8
+@ CHECK: vqshrn.u32	d16, q8, #16    @ encoding: [0x30,0x09,0xd0,0xf3]
+	vqshrn.u32	d16, q8, #16
+@ CHECK: vqshrn.u64	d16, q8, #32    @ encoding: [0x30,0x09,0xe0,0xf3]
+	vqshrn.u64	d16, q8, #32
+@ CHECK: vqshrun.s16	d16, q8, #8     @ encoding: [0x30,0x08,0xc8,0xf3]
+	vqshrun.s16	d16, q8, #8
+@ CHECK: vqshrun.s32	d16, q8, #16    @ encoding: [0x30,0x08,0xd0,0xf3]
+	vqshrun.s32	d16, q8, #16
+@ CHECK: vqshrun.s64	d16, q8, #32    @ encoding: [0x30,0x08,0xe0,0xf3]
+	vqshrun.s64	d16, q8, #32
+@ CHECK: vqrshrn.s16	d16, q8, #8     @ encoding: [0x70,0x09,0xc8,0xf2]
+	vqrshrn.s16	d16, q8, #8
+@ CHECK: vqrshrn.s32	d16, q8, #16    @ encoding: [0x70,0x09,0xd0,0xf2]
+	vqrshrn.s32	d16, q8, #16
+@ CHECK: vqrshrn.s64	d16, q8, #32    @ encoding: [0x70,0x09,0xe0,0xf2]
+	vqrshrn.s64	d16, q8, #32
+@ CHECK: vqrshrn.u16	d16, q8, #8     @ encoding: [0x70,0x09,0xc8,0xf3]
+	vqrshrn.u16	d16, q8, #8
+@ CHECK: vqrshrn.u32	d16, q8, #16    @ encoding: [0x70,0x09,0xd0,0xf3]
+	vqrshrn.u32	d16, q8, #16
+@ CHECK: vqrshrn.u64	d16, q8, #32    @ encoding: [0x70,0x09,0xe0,0xf3]
+	vqrshrn.u64	d16, q8, #32
+@ CHECK: vqrshrun.s16	d16, q8, #8     @ encoding: [0x70,0x08,0xc8,0xf3]
+	vqrshrun.s16	d16, q8, #8
+@ CHECK: vqrshrun.s32	d16, q8, #16    @ encoding: [0x70,0x08,0xd0,0xf3]
+	vqrshrun.s32	d16, q8, #16
+@ CHECK: vqrshrun.s64	d16, q8, #32    @ encoding: [0x70,0x08,0xe0,0xf3]
+	vqrshrun.s64	d16, q8, #32
diff --git a/final/test/MC/ARM/neon-shift-encoding.s b/final/test/MC/ARM/neon-shift-encoding.s
new file mode 100644
index 00000000000..292692d007c
--- /dev/null
+++ b/final/test/MC/ARM/neon-shift-encoding.s
@@ -0,0 +1,174 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vshl.u8	d16, d17, d16           @ encoding: [0xa1,0x04,0x40,0xf3]
+	vshl.u8	d16, d17, d16
+@ CHECK: vshl.u16	d16, d17, d16   @ encoding: [0xa1,0x04,0x50,0xf3]
+	vshl.u16	d16, d17, d16
+@ CHECK: vshl.u32	d16, d17, d16   @ encoding: [0xa1,0x04,0x60,0xf3]
+	vshl.u32	d16, d17, d16
+@ CHECK: vshl.u64	d16, d17, d16   @ encoding: [0xa1,0x04,0x70,0xf3]
+	vshl.u64	d16, d17, d16
+@ CHECK: vshl.i8	d16, d16, #7            @ encoding: [0x30,0x05,0xcf,0xf2]
+	vshl.i8	d16, d16, #7
+@ CHECK: vshl.i16	d16, d16, #15   @ encoding: [0x30,0x05,0xdf,0xf2]
+	vshl.i16	d16, d16, #15
+@ CHECK: vshl.i32	d16, d16, #31   @ encoding: [0x30,0x05,0xff,0xf2]
+	vshl.i32	d16, d16, #31
+@ CHECK: vshl.i64	d16, d16, #63   @ encoding: [0xb0,0x05,0xff,0xf2]
+	vshl.i64	d16, d16, #63
+@ CHECK: vshl.u8	q8, q9, q8              @ encoding: [0xe2,0x04,0x40,0xf3]
+	vshl.u8	q8, q9, q8
+@ CHECK: vshl.u16	q8, q9, q8      @ encoding: [0xe2,0x04,0x50,0xf3]
+	vshl.u16	q8, q9, q8
+@ CHECK: vshl.u32	q8, q9, q8      @ encoding: [0xe2,0x04,0x60,0xf3]
+	vshl.u32	q8, q9, q8
+@ CHECK: vshl.u64	q8, q9, q8      @ encoding: [0xe2,0x04,0x70,0xf3]
+	vshl.u64	q8, q9, q8
+@ CHECK: vshl.i8	q8, q8, #7              @ encoding: [0x70,0x05,0xcf,0xf2]
+	vshl.i8	q8, q8, #7
+@ CHECK: vshl.i16	q8, q8, #15     @ encoding: [0x70,0x05,0xdf,0xf2]
+	vshl.i16	q8, q8, #15
+@ CHECK: vshl.i32	q8, q8, #31     @ encoding: [0x70,0x05,0xff,0xf2]
+	vshl.i32	q8, q8, #31
+@ CHECK: vshl.i64	q8, q8, #63     @ encoding: [0xf0,0x05,0xff,0xf2]
+	vshl.i64	q8, q8, #63
+@ CHECK: vshr.u8	d16, d16, #8            @ encoding: [0x30,0x00,0xc8,0xf3]
+	vshr.u8	d16, d16, #8
+@ CHECK: vshr.u16	d16, d16, #16   @ encoding: [0x30,0x00,0xd0,0xf3]
+	vshr.u16	d16, d16, #16
+@ CHECK: vshr.u32	d16, d16, #32   @ encoding: [0x30,0x00,0xe0,0xf3]
+	vshr.u32	d16, d16, #32
+@ CHECK: vshr.u64	d16, d16, #64   @ encoding: [0xb0,0x00,0xc0,0xf3]
+	vshr.u64	d16, d16, #64
+@ CHECK: vshr.u8	q8, q8, #8              @ encoding: [0x70,0x00,0xc8,0xf3]
+	vshr.u8	q8, q8, #8
+@ CHECK: vshr.u16	q8, q8, #16     @ encoding: [0x70,0x00,0xd0,0xf3]
+	vshr.u16	q8, q8, #16
+@ CHECK: vshr.u32	q8, q8, #32     @ encoding: [0x70,0x00,0xe0,0xf3]
+	vshr.u32	q8, q8, #32
+@ CHECK: vshr.u64	q8, q8, #64     @ encoding: [0xf0,0x00,0xc0,0xf3]
+	vshr.u64	q8, q8, #64
+@ CHECK: vshr.s8	d16, d16, #8            @ encoding: [0x30,0x00,0xc8,0xf2]
+	vshr.s8	d16, d16, #8
+@ CHECK: vshr.s16	d16, d16, #16   @ encoding: [0x30,0x00,0xd0,0xf2]
+	vshr.s16	d16, d16, #16
+@ CHECK: vshr.s32	d16, d16, #32   @ encoding: [0x30,0x00,0xe0,0xf2]
+	vshr.s32	d16, d16, #32
+@ CHECK: vshr.s64	d16, d16, #64   @ encoding: [0xb0,0x00,0xc0,0xf2]
+	vshr.s64	d16, d16, #64
+@ CHECK: vshr.s8	q8, q8, #8              @ encoding: [0x70,0x00,0xc8,0xf2]
+	vshr.s8	q8, q8, #8
+@ CHECK: vshr.s16	q8, q8, #16     @ encoding: [0x70,0x00,0xd0,0xf2]
+	vshr.s16	q8, q8, #16
+@ CHECK: vshr.s32	q8, q8, #32     @ encoding: [0x70,0x00,0xe0,0xf2
+	vshr.s32	q8, q8, #32
+@ CHECK: vshr.s64	q8, q8, #64     @ encoding: [0xf0,0x00,0xc0,0xf2]
+	vshr.s64	q8, q8, #64
+@ CHECK: vshll.s8	q8, d16, #7     @ encoding: [0x30,0x0a,0xcf,0xf2]
+	vshll.s8	q8, d16, #7
+@ CHECK: vshll.s16	q8, d16, #15    @ encoding: [0x30,0x0a,0xdf,0xf2]
+	vshll.s16	q8, d16, #15
+@ CHECK: vshll.s32	q8, d16, #31    @ encoding: [0x30,0x0a,0xff,0xf2]
+	vshll.s32	q8, d16, #31
+@ CHECK: vshll.u8	q8, d16, #7     @ encoding: [0x30,0x0a,0xcf,0xf3]
+	vshll.u8	q8, d16, #7
+@ CHECK: vshll.u16	q8, d16, #15    @ encoding: [0x30,0x0a,0xdf,0xf3]
+	vshll.u16	q8, d16, #15
+@ CHECK: vshll.u32	q8, d16, #31    @ encoding: [0x30,0x0a,0xff,0xf3]
+	vshll.u32	q8, d16, #31
+@ CHECK: vshll.i8	q8, d16, #8     @ encoding: [0x20,0x03,0xf2,0xf3]
+	vshll.i8	q8, d16, #8
+@ CHECK: vshll.i16	q8, d16, #16    @ encoding: [0x20,0x03,0xf6,0xf3]
+	vshll.i16	q8, d16, #16
+@ CHECK: vshll.i32	q8, d16, #32    @ encoding: [0x20,0x03,0xfa,0xf3]
+	vshll.i32	q8, d16, #32
+@ CHECK: vshrn.i16	d16, q8, #8     @ encoding: [0x30,0x08,0xc8,0xf2]
+	vshrn.i16	d16, q8, #8
+@ CHECK: vshrn.i32	d16, q8, #16    @ encoding: [0x30,0x08,0xd0,0xf2]
+	vshrn.i32	d16, q8, #16
+@ CHECK: vshrn.i64	d16, q8, #32    @ encoding: [0x30,0x08,0xe0,0xf2]
+	vshrn.i64	d16, q8, #32
+@ CHECK: vrshl.s8	d16, d17, d16   @ encoding: [0xa1,0x05,0x40,0xf2]
+	vrshl.s8	d16, d17, d16
+@ CHECK: vrshl.s16	d16, d17, d16   @ encoding: [0xa1,0x05,0x50,0xf2]
+	vrshl.s16	d16, d17, d16
+@ CHECK: vrshl.s32	d16, d17, d16   @ encoding: [0xa1,0x05,0x60,0xf2]
+	vrshl.s32	d16, d17, d16
+@ CHECK: vrshl.s64	d16, d17, d16   @ encoding: [0xa1,0x05,0x70,0
+	vrshl.s64	d16, d17, d16
+@ CHECK: vrshl.u8	d16, d17, d16   @ encoding: [0xa1,0x05,0x40,0xf3]
+	vrshl.u8	d16, d17, d16
+@ CHECK: vrshl.u16	d16, d17, d16   @ encoding: [0xa1,0x05,0x50,0xf3]
+	vrshl.u16	d16, d17, d16
+@ CHECK: vrshl.u32	d16, d17, d16   @ encoding: [0xa1,0x05,0x60,0xf3]
+	vrshl.u32	d16, d17, d16
+@ CHECK: vrshl.u64	d16, d17, d16   @ encoding: [0xa1,0x05,0x70,0xf3]
+	vrshl.u64	d16, d17, d16
+@ CHECK: vrshl.s8	q8, q9, q8      @ encoding: [0xe2,0x05,0x40,0xf2]
+	vrshl.s8	q8, q9, q8
+@ CHECK: vrshl.s16	q8, q9, q8      @ encoding: [0xe2,0x05,0x50,0xf2]
+	vrshl.s16	q8, q9, q8
+@ CHECK: vrshl.s32	q8, q9, q8      @ encoding: [0xe2,0x05,0x60,0xf2]
+	vrshl.s32	q8, q9, q8
+@ CHECK: vrshl.s64	q8, q9, q8      @ encoding: [0xe2,0x05,0x70,0xf2]
+	vrshl.s64	q8, q9, q8
+@ CHECK: vrshl.u8	q8, q9, q8      @ encoding: [0xe2,0x05,0x40,0xf3]
+	vrshl.u8	q8, q9, q8
+@ CHECK: vrshl.u16	q8, q9, q8      @ encoding: [0xe2,0x05,0x50,0xf3]
+	vrshl.u16	q8, q9, q8
+@ CHECK: vrshl.u32	q8, q9, q8      @ encoding: [0xe2,0x05,0x60,0xf3]
+	vrshl.u32	q8, q9, q8
+@ CHECK: vrshl.u64	q8, q9, q8      @ encoding: [0xe2,0x05,0x70,0xf3]
+	vrshl.u64	q8, q9, q8
+@ CHECK: vrshr.s8	d16, d16, #8    @ encoding: [0x30,0x02,0xc8,0xf2]
+	vrshr.s8	d16, d16, #8
+@ CHECK: vrshr.s16	d16, d16, #16   @ encoding: [0x30,0x02,0xd0,0xf2]
+	vrshr.s16	d16, d16, #16
+@ CHECK: vrshr.s32	d16, d16, #32   @ encoding: [0x30,0x02,0xe0,0xf2]
+	vrshr.s32	d16, d16, #32
+@ CHECK: vrshr.s64	d16, d16, #64   @ encoding: [0xb0,0x02,0xc0,0xf2]
+	vrshr.s64	d16, d16, #64
+@ CHECK: vrshr.u8	d16, d16, #8    @ encoding: [0x30,0x02,0xc8,0xf3]
+	vrshr.u8	d16, d16, #8
+@ CHECK: vrshr.u16	d16, d16, #16   @ encoding: [0x30,0x02,0xd0,0xf3]
+	vrshr.u16	d16, d16, #16
+@ CHECK: vrshr.u32	d16, d16, #32   @ encoding: [0x30,0x02,0xe0,0xf3]
+	vrshr.u32	d16, d16, #32
+@ CHECK: vrshr.u64	d16, d16, #64   @ encoding: [0xb0,0x02,0xc0,0xf3]
+	vrshr.u64	d16, d16, #64
+@ CHECK: vrshr.s8	q8, q8, #8      @ encoding: [0x70,0x02,0xc8,0xf2]
+	vrshr.s8	q8, q8, #8
+@ CHECK: vrshr.s16	q8, q8, #16     @ encoding: [0x70,0x02,0xd0,0xf2]
+	vrshr.s16	q8, q8, #16
+@ CHECK: vrshr.s32	q8, q8, #32     @ encoding: [0x70,0x02,0xe0,0xf2]
+	vrshr.s32	q8, q8, #32
+@ CHECK: vrshr.s64	q8, q8, #64     @ encoding: [0xf0,0x02,0xc0,0xf2]
+	vrshr.s64	q8, q8, #64
+@ CHECK: vrshr.u8	q8, q8, #8      @ encoding: [0x70,0x02,0xc8,0xf3]
+	vrshr.u8	q8, q8, #8
+@ CHECK: vrshr.u16	q8, q8, #16     @ encoding: [0x70,0x02,0xd0,0xf3]
+	vrshr.u16	q8, q8, #16
+@ CHECK: vrshr.u32	q8, q8, #32     @ encoding: [0x70,0x02,0xe0,0xf3]
+	vrshr.u32	q8, q8, #32
+@ CHECK: vrshr.u64	q8, q8, #64     @ encoding: [0xf0,0x02,0xc0,0xf3]
+	vrshr.u64	q8, q8, #64
+@ CHECK: vrshrn.i16	d16, q8, #8     @ encoding: [0x70,0x08,0xc8,0xf2]
+	vrshrn.i16	d16, q8, #8
+@ CHECK: vrshrn.i32	d16, q8, #16    @ encoding: [0x70,0x08,0xd0,0xf2]
+	vrshrn.i32	d16, q8, #16
+@ CHECK: vrshrn.i64	d16, q8, #32    @ encoding: [0x70,0x08,0xe0,0xf2]
+	vrshrn.i64	d16, q8, #32
+
+@ CHECK: vqrshrn.s16	d16, q8, #4     @ encoding: [0x70,0x09,0xcc,0xf2]
+        vqrshrn.s16	d16, q8, #4
+@ CHECK: vqrshrn.s32	d16, q8, #13    @ encoding: [0x70,0x09,0xd3,0xf2]
+        vqrshrn.s32	d16, q8, #13
+@ CHECK: vqrshrn.s64	d16, q8, #13    @ encoding: [0x70,0x09,0xf3,0xf2]
+        vqrshrn.s64	d16, q8, #13
+
+@ CHECK: vqrshrn.u16	d16, q8, #4     @ encoding: [0x70,0x09,0xcc,0xf3]
+        vqrshrn.u16	d16, q8, #4
+@ CHECK: vqrshrn.u32	d16, q8, #13    @ encoding: [0x70,0x09,0xd3,0xf3]
+        vqrshrn.u32	d16, q8, #13
+@ CHECK: vqrshrn.u64	d16, q8, #13    @ encoding: [0x70,0x09,0xf3,0xf3]
+        vqrshrn.u64	d16, q8, #13
diff --git a/final/test/MC/ARM/neon-shiftaccum-encoding.s b/final/test/MC/ARM/neon-shiftaccum-encoding.s
new file mode 100644
index 00000000000..0dc630d395d
--- /dev/null
+++ b/final/test/MC/ARM/neon-shiftaccum-encoding.s
@@ -0,0 +1,98 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vsra.s8	d17, d16, #8            @ encoding: [0x30,0x11,0xc8,0xf2]
+	vsra.s8	d17, d16, #8
+@ CHECK: vsra.s16	d17, d16, #16   @ encoding: [0x30,0x11,0xd0,0xf2]
+	vsra.s16	d17, d16, #16
+@ CHECK: vsra.s32	d17, d16, #32   @ encoding: [0x30,0x11,0xe0,0xf2]
+	vsra.s32	d17, d16, #32
+@ CHECK: vsra.s64	d17, d16, #64   @ encoding: [0xb0,0x11,0xc0,0xf2]
+	vsra.s64	d17, d16, #64
+@ CHECK: vsra.s8	q8, q9, #8              @ encoding: [0x72,0x01,0xc8,0xf2]
+	vsra.s8	q8, q9, #8
+@ CHECK: vsra.s16	q8, q9, #16     @ encoding: [0x72,0x01,0xd0,0xf2]
+	vsra.s16	q8, q9, #16
+@ CHECK: vsra.s32	q8, q9, #32     @ encoding: [0x72,0x01,0xe0,0xf2]
+	vsra.s32	q8, q9, #32
+@ CHECK: vsra.s64	q8, q9, #64     @ encoding: [0xf2,0x01,0xc0,0xf2]
+	vsra.s64	q8, q9, #64
+@ CHECK: vsra.u8	d17, d16, #8            @ encoding: [0x30,0x11,0xc8,0xf3]
+	vsra.u8	d17, d16, #8
+@ CHECK: vsra.u16	d17, d16, #16   @ encoding: [0x30,0x11,0xd0,0xf3]
+	vsra.u16	d17, d16, #16
+@ CHECK: vsra.u32	d17, d16, #32   @ encoding: [0x30,0x11,0xe0,0xf3]
+	vsra.u32	d17, d16, #32
+@ CHECK: vsra.u64	d17, d16, #64   @ encoding: [0xb0,0x11,0xc0,0xf3]
+	vsra.u64	d17, d16, #64
+@ CHECK: vsra.u8	q8, q9, #8              @ encoding: [0x72,0x01,0xc8,0xf3]
+	vsra.u8	q8, q9, #8
+@ CHECK: vsra.u16	q8, q9, #16     @ encoding: [0x72,0x01,0xd0,0xf3]
+	vsra.u16	q8, q9, #16
+@ CHECK: vsra.u32	q8, q9, #32     @ encoding: [0x72,0x01,0xe0,0xf3]
+	vsra.u32	q8, q9, #32
+@ CHECK: vsra.u64	q8, q9, #64     @ encoding: [0xf2,0x01,0xc0,0xf3]
+	vsra.u64	q8, q9, #64
+@ CHECK: vrsra.s8	d17, d16, #8    @ encoding: [0x30,0x13,0xc8,0xf2]
+	vrsra.s8	d17, d16, #8
+@ CHECK: vrsra.s16	d17, d16, #16   @ encoding: [0x30,0x13,0xd0,0xf2]
+	vrsra.s16	d17, d16, #16
+@ CHECK: vrsra.s32	d17, d16, #32   @ encoding: [0x30,0x13,0xe0,0xf2]
+	vrsra.s32	d17, d16, #32
+@ CHECK: vrsra.s64	d17, d16, #64   @ encoding: [0xb0,0x13,0xc0,0xf2]
+	vrsra.s64	d17, d16, #64
+@ CHECK: vrsra.u8	d17, d16, #8    @ encoding: [0x30,0x13,0xc8,0xf3]
+	vrsra.u8	d17, d16, #8
+@ CHECK: vrsra.u16	d17, d16, #16   @ encoding: [0x30,0x13,0xd0,0xf3]
+	vrsra.u16	d17, d16, #16
+@ CHECK: vrsra.u32	d17, d16, #32   @ encoding: [0x30,0x13,0xe0,0xf3]
+	vrsra.u32	d17, d16, #32
+@ CHECK: vrsra.u64	d17, d16, #64   @ encoding: [0xb0,0x13,0xc0,0xf3]
+	vrsra.u64	d17, d16, #64
+@ CHECK: vrsra.s8	q8, q9, #8      @ encoding: [0x72,0x03,0xc8,0xf2]
+	vrsra.s8	q8, q9, #8
+@ CHECK: vrsra.s16	q8, q9, #16     @ encoding: [0x72,0x03,0xd0,0xf2]
+	vrsra.s16	q8, q9, #16
+@ CHECK: vrsra.s32	q8, q9, #32     @ encoding: [0x72,0x03,0xe0,0xf2]
+	vrsra.s32	q8, q9, #32
+@ CHECK: vrsra.s64	q8, q9, #64     @ encoding: [0xf2,0x03,0xc0,0xf2]
+	vrsra.s64	q8, q9, #64
+@ CHECK: vrsra.u8	q8, q9, #8      @ encoding: [0x72,0x03,0xc8,0xf3]
+	vrsra.u8	q8, q9, #8
+@ CHECK: vrsra.u16	q8, q9, #16     @ encoding: [0x72,0x03,0xd0,0xf3]
+	vrsra.u16	q8, q9, #16
+@ CHECK: vrsra.u32	q8, q9, #32     @ encoding: [0x72,0x03,0xe0,0xf3]
+	vrsra.u32	q8, q9, #32
+@ CHECK: vrsra.u64	q8, q9, #64     @ encoding: [0xf2,0x03,0xc0,0xf3]
+	vrsra.u64	q8, q9, #64
+@ CHECK: vsli.8	d17, d16, #7            @ encoding: [0x30,0x15,0xcf,0xf3]
+	vsli.8	d17, d16, #7
+@ CHECK: vsli.16	d17, d16, #15           @ encoding: [0x30,0x15,0xdf,0xf3]
+	vsli.16	d17, d16, #15
+@ CHECK: vsli.32	d17, d16, #31           @ encoding: [0x30,0x15,0xff,0xf3]
+	vsli.32	d17, d16, #31
+@ CHECK: vsli.64	d17, d16, #63           @ encoding: [0xb0,0x15,0xff,0xf3]
+	vsli.64	d17, d16, #63
+@ CHECK: vsli.8	q9, q8, #7              @ encoding: [0x70,0x25,0xcf,0xf3]
+	vsli.8	q9, q8, #7
+@ CHECK: vsli.16	q9, q8, #15             @ encoding: [0x70,0x25,0xdf,0xf3]
+	vsli.16	q9, q8, #15
+@ CHECK: vsli.32	q9, q8, #31             @ encoding: [0x70,0x25,0xff,0xf3]
+	vsli.32	q9, q8, #31
+@ CHECK: vsli.64	q9, q8, #63             @ encoding: [0xf0,0x25,0xff,0xf3]
+	vsli.64	q9, q8, #63
+@ CHECK: vsri.8	d17, d16, #8            @ encoding: [0x30,0x14,0xc8,0xf3]
+	vsri.8	d17, d16, #8
+@ CHECK: vsri.16	d17, d16, #16           @ encoding: [0x30,0x14,0xd0,0xf3]
+	vsri.16	d17, d16, #16
+@ CHECK: vsri.32	d17, d16, #32           @ encoding: [0x30,0x14,0xe0,0xf3]
+	vsri.32	d17, d16, #32
+@ CHECK: vsri.64	d17, d16, #64           @ encoding: [0xb0,0x14,0xc0,0xf3]
+	vsri.64	d17, d16, #64
+@ CHECK: vsri.8	q9, q8, #8              @ encoding: [0x70,0x24,0xc8,0xf3]
+	vsri.8	q9, q8, #8
+@ CHECK: vsri.16	q9, q8, #16             @ encoding: [0x70,0x24,0xd0,0xf3]
+	vsri.16	q9, q8, #16
+@ CHECK: vsri.32	q9, q8, #32             @ encoding: [0x70,0x24,0xe0,0xf3]
+	vsri.32	q9, q8, #32
+@ CHECK: vsri.64	q9, q8, #64             @ encoding: [0xf0,0x24,0xc0,0xf3]
+	vsri.64	q9, q8, #64
diff --git a/final/test/MC/ARM/neon-shuffle-encoding.s b/final/test/MC/ARM/neon-shuffle-encoding.s
new file mode 100644
index 00000000000..ce7eb66a08a
--- /dev/null
+++ b/final/test/MC/ARM/neon-shuffle-encoding.s
@@ -0,0 +1,46 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vext.8	d16, d17, d16, #3       @ encoding: [0xa0,0x03,0xf1,0xf2]
+	vext.8	d16, d17, d16, #3
+@ CHECK: vext.8	d16, d17, d16, #5       @ encoding: [0xa0,0x05,0xf1,0xf2]
+	vext.8	d16, d17, d16, #5
+@ CHECK: vext.8	q8, q9, q8, #3          @ encoding: [0xe0,0x03,0xf2,0xf2]
+	vext.8	q8, q9, q8, #3
+@ CHECK: vext.8	q8, q9, q8, #7          @ encoding: [0xe0,0x07,0xf2,0xf2]
+	vext.8	q8, q9, q8, #7
+@ CHECK: vext.16	d16, d17, d16, #3       @ encoding: [0xa0,0x06,0xf1,0xf2]
+	vext.16	d16, d17, d16, #3
+@ CHECK: vext.32	q8, q9, q8, #3          @ encoding: [0xe0,0x0c,0xf2,0xf2]
+	vext.32	q8, q9, q8, #3
+@ CHECK: vtrn.8	d17, d16                @ encoding: [0xa0,0x10,0xf2,0xf3]
+	vtrn.8	d17, d16
+@ CHECK: vtrn.16	d17, d16                @ encoding: [0xa0,0x10,0xf6,0xf3]
+	vtrn.16	d17, d16
+@ CHECK: vtrn.32	d17, d16                @ encoding: [0xa0,0x10,0xfa,0xf3]
+	vtrn.32	d17, d16
+@ CHECK: vtrn.8	q9, q8                  @ encoding: [0xe0,0x20,0xf2,0xf3]
+	vtrn.8	q9, q8
+@ CHECK: vtrn.16	q9, q8                  @ encoding: [0xe0,0x20,0xf6,0xf3]
+	vtrn.16	q9, q8
+@ CHECK: vtrn.32	q9, q8                  @ encoding: [0xe0,0x20,0xfa,0xf3]
+	vtrn.32	q9, q8
+@ CHECK: vuzp.8	d17, d16                @ encoding: [0x20,0x11,0xf2,0xf3]
+	vuzp.8	d17, d16
+@ CHECK: vuzp.16	d17, d16                @ encoding: [0x20,0x11,0xf6,0xf3]
+	vuzp.16	d17, d16
+@ CHECK: vuzp.8	q9, q8                  @ encoding: [0x60,0x21,0xf2,0xf3]
+	vuzp.8	q9, q8
+@ CHECK: vuzp.16	q9, q8                  @ encoding: [0x60,0x21,0xf6,0xf3]
+	vuzp.16	q9, q8
+@ CHECK: vuzp.32	q9, q8                  @ encoding: [0x60,0x21,0xfa,0xf3]
+	vuzp.32	q9, q8
+@ CHECK: vzip.8	d17, d16                @ encoding: [0xa0,0x11,0xf2,0xf3]
+	vzip.8	d17, d16
+@ CHECK: vzip.16	d17, d16                @ encoding: [0xa0,0x11,0xf6,0xf3]
+	vzip.16	d17, d16
+@ CHECK: vzip.8	q9, q8                  @ encoding: [0xe0,0x21,0xf2,0xf3]
+	vzip.8	q9, q8
+@ CHECK: vzip.16	q9, q8                  @ encoding: [0xe0,0x21,0xf6,0xf3]
+	vzip.16	q9, q8
+@ CHECK: vzip.32	q9, q8                  @ encoding: [0xe0,0x21,0xfa,0xf3]
+	vzip.32	q9, q8
diff --git a/final/test/MC/ARM/neon-sub-encoding.s b/final/test/MC/ARM/neon-sub-encoding.s
new file mode 100644
index 00000000000..241a01ffd4d
--- /dev/null
+++ b/final/test/MC/ARM/neon-sub-encoding.s
@@ -0,0 +1,108 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vsub.i8	d16, d17, d16           @ encoding: [0xa0,0x08,0x41,0xf3]
+	vsub.i8	d16, d17, d16
+@ CHECK: vsub.i16	d16, d17, d16   @ encoding: [0xa0,0x08,0x51,0xf3]
+	vsub.i16	d16, d17, d16
+@ CHECK: vsub.i32	d16, d17, d16   @ encoding: [0xa0,0x08,0x61,0xf3]
+	vsub.i32	d16, d17, d16
+@ CHECK: vsub.i64	d16, d17, d16   @ encoding: [0xa0,0x08,0x71,0xf3]
+	vsub.i64	d16, d17, d16
+@ CHECK: vsub.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x60,0xf2]
+	vsub.f32	d16, d16, d17
+@ CHECK: vsub.i8	q8, q8, q9              @ encoding: [0xe2,0x08,0x40,0xf3]
+	vsub.i8	q8, q8, q9
+@ CHECK: vsub.i16	q8, q8, q9      @ encoding: [0xe2,0x08,0x50,0xf3]
+	vsub.i16	q8, q8, q9
+@ CHECK: vsub.i32	q8, q8, q9      @ encoding: [0xe2,0x08,0x60,0xf3]
+	vsub.i32	q8, q8, q9
+@ CHECK: vsub.i64	q8, q8, q9      @ encoding: [0xe2,0x08,0x70,0xf3]
+	vsub.i64	q8, q8, q9
+@ CHECK: vsub.f32	q8, q8, q9      @ encoding: [0xe2,0x0d,0x60,0xf2]
+	vsub.f32	q8, q8, q9
+@ CHECK: vsubl.s8	q8, d17, d16    @ encoding: [0xa0,0x02,0xc1,0xf2]
+	vsubl.s8	q8, d17, d16
+@ CHECK: vsubl.s16	q8, d17, d16    @ encoding: [0xa0,0x02,0xd1,0xf2]
+	vsubl.s16	q8, d17, d16
+@ CHECK: vsubl.s32	q8, d17, d16    @ encoding: [0xa0,0x02,0xe1,0xf2]
+	vsubl.s32	q8, d17, d16
+@ CHECK: vsubl.u8	q8, d17, d16    @ encoding: [0xa0,0x02,0xc1,0xf3]
+	vsubl.u8	q8, d17, d16
+@ CHECK: vsubl.u16	q8, d17, d16    @ encoding: [0xa0,0x02,0xd1,0xf3]
+	vsubl.u16	q8, d17, d16
+@ CHECK: vsubl.u32	q8, d17, d16    @ encoding: [0xa0,0x02,0xe1,0xf3]
+	vsubl.u32	q8, d17, d16
+@ CHECK: vsubw.s8	q8, q8, d18     @ encoding: [0xa2,0x03,0xc0,0xf2]
+	vsubw.s8	q8, q8, d18
+@ CHECK: vsubw.s16	q8, q8, d18     @ encoding: [0xa2,0x03,0xd0,0xf2]
+	vsubw.s16	q8, q8, d18
+@ CHECK: vsubw.s32	q8, q8, d18     @ encoding: [0xa2,0x03,0xe0,0xf2]
+	vsubw.s32	q8, q8, d18
+@ CHECK: vsubw.u8	q8, q8, d18     @ encoding: [0xa2,0x03,0xc0,0xf3]
+	vsubw.u8	q8, q8, d18
+@ CHECK: vsubw.u16	q8, q8, d18     @ encoding: [0xa2,0x03,0xd0,0xf3]
+	vsubw.u16	q8, q8, d18
+@ CHECK: vsubw.u32	q8, q8, d18     @ encoding: [0xa2,0x03,0xe0,0xf3]
+	vsubw.u32	q8, q8, d18
+@ CHECK: vhsub.s8	d16, d16, d17   @ encoding: [0xa1,0x02,0x40,0xf2]
+	vhsub.s8	d16, d16, d17
+@ CHECK: vhsub.s16	d16, d16, d17   @ encoding: [0xa1,0x02,0x50,0xf2]
+	vhsub.s16	d16, d16, d17
+@ CHECK: vhsub.s32	d16, d16, d17   @ encoding: [0xa1,0x02,0x60,0xf2]
+	vhsub.s32	d16, d16, d17
+@ CHECK: vhsub.u8	d16, d16, d17   @ encoding: [0xa1,0x02,0x40,0xf3]
+	vhsub.u8	d16, d16, d17
+@ CHECK: vhsub.u16	d16, d16, d17   @ encoding: [0xa1,0x02,0x50,0xf3]
+	vhsub.u16	d16, d16, d17
+@ CHECK: vhsub.u32	d16, d16, d17   @ encoding: [0xa1,0x02,0x60,0xf3]
+	vhsub.u32	d16, d16, d17
+@ CHECK: vhsub.s8	q8, q8, q9      @ encoding: [0xe2,0x02,0x40,0xf2]
+	vhsub.s8	q8, q8, q9
+@ CHECK: vhsub.s16	q8, q8, q9      @ encoding: [0xe2,0x02,0x50,0xf2]
+	vhsub.s16	q8, q8, q9
+@ CHECK: vhsub.s32	q8, q8, q9      @ encoding: [0xe2,0x02,0x60,0xf2]
+	vhsub.s32	q8, q8, q9
+@ CHECK: vqsub.s8	d16, d16, d17   @ encoding: [0xb1,0x02,0x40,0xf2]
+	vqsub.s8	d16, d16, d17
+@ CHECK: vqsub.s16	d16, d16, d17   @ encoding: [0xb1,0x02,0x50,0xf2]
+	vqsub.s16	d16, d16, d17
+@ CHECK: vqsub.s32	d16, d16, d17   @ encoding: [0xb1,0x02,0x60,0xf2]
+	vqsub.s32	d16, d16, d17
+@ CHECK: vqsub.s64	d16, d16, d17   @ encoding: [0xb1,0x02,0x70,0xf2]
+	vqsub.s64	d16, d16, d17
+@ CHECK: vqsub.u8	d16, d16, d17   @ encoding: [0xb1,0x02,0x40,0xf3]
+	vqsub.u8	d16, d16, d17
+@ CHECK: vqsub.u16	d16, d16, d17   @ encoding: [0xb1,0x02,0x50,0xf3]
+	vqsub.u16	d16, d16, d17
+@ CHECK: vqsub.u32	d16, d16, d17   @ encoding: [0xb1,0x02,0x60,0xf3]
+	vqsub.u32	d16, d16, d17
+@ CHECK: vqsub.u64	d16, d16, d17   @ encoding: [0xb1,0x02,0x70,0xf3]
+	vqsub.u64	d16, d16, d17
+@ CHECK: vqsub.s8	q8, q8, q9      @ encoding: [0xf2,0x02,0x40,0xf2]
+	vqsub.s8	q8, q8, q9
+@ CHECK: vqsub.s16	q8, q8, q9      @ encoding: [0xf2,0x02,0x50,0xf2]
+	vqsub.s16	q8, q8, q9
+@ CHECK: vqsub.s32	q8, q8, q9      @ encoding: [0xf2,0x02,0x60,0xf2]
+	vqsub.s32	q8, q8, q9
+@ CHECK: vqsub.s64	q8, q8, q9      @ encoding: [0xf2,0x02,0x70,0xf2]
+	vqsub.s64	q8, q8, q9
+@ CHECK: vqsub.u8	q8, q8, q9      @ encoding: [0xf2,0x02,0x40,0xf3]
+	vqsub.u8	q8, q8, q9
+@ CHECK: vqsub.u16	q8, q8, q9      @ encoding: [0xf2,0x02,0x50,0xf3]
+	vqsub.u16	q8, q8, q9
+@ CHECK: vqsub.u32	q8, q8, q9      @ encoding: [0xf2,0x02,0x60,0xf3]
+	vqsub.u32	q8, q8, q9
+@ CHECK: vqsub.u64	q8, q8, q9      @ encoding: [0xf2,0x02,0x70,0xf3]
+	vqsub.u64	q8, q8, q9
+@ CHECK: vsubhn.i16	d16, q8, q9     @ encoding: [0xa2,0x06,0xc0,0xf2]
+	vsubhn.i16	d16, q8, q9
+@ CHECK: vsubhn.i32	d16, q8, q9     @ encoding: [0xa2,0x06,0xd0,0xf2]
+	vsubhn.i32	d16, q8, q9
+@ CHECK: vsubhn.i64	d16, q8, q9     @ encoding: [0xa2,0x06,0xe0,0xf2]
+	vsubhn.i64	d16, q8, q9
+@ CHECK: vrsubhn.i16	d16, q8, q9     @ encoding: [0xa2,0x06,0xc0,0xf3]
+	vrsubhn.i16	d16, q8, q9
+@ CHECK: vrsubhn.i32	d16, q8, q9     @ encoding: [0xa2,0x06,0xd0,0xf3]
+	vrsubhn.i32	d16, q8, q9
+@ CHECK: vrsubhn.i64	d16, q8, q9     @ encoding: [0xa2,0x06,0xe0,0xf3]
+	vrsubhn.i64	d16, q8, q9
diff --git a/final/test/MC/ARM/neon-table-encoding.s b/final/test/MC/ARM/neon-table-encoding.s
new file mode 100644
index 00000000000..7bf47c782f8
--- /dev/null
+++ b/final/test/MC/ARM/neon-table-encoding.s
@@ -0,0 +1,19 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+@ CHECK: vtbl.8	d16, {d17}, d16         @ encoding: [0xa0,0x08,0xf1,0xf3]
+	vtbl.8	d16, {d17}, d16
+@ CHECK: vtbl.8	d16, {d16, d17}, d18    @ encoding: [0xa2,0x09,0xf0,0xf3]
+	vtbl.8	d16, {d16, d17}, d18
+@ CHECK: vtbl.8	d16, {d16, d17, d18}, d20 @ encoding: [0xa4,0x0a,0xf0,0xf3]
+	vtbl.8	d16, {d16, d17, d18}, d20
+@ CHECK: vtbl.8	d16, {d16, d17, d18, d19}, d20 @ encoding: [0xa4,0x0b,0xf0,0xf3]
+	vtbl.8	d16, {d16, d17, d18, d19}, d20
+@ CHECK: vtbx.8	d18, {d16}, d17         @ encoding: [0xe1,0x28,0xf0,0xf3]
+	vtbx.8	d18, {d16}, d17
+@ CHECK: vtbx.8	d19, {d16, d17}, d18    @ encoding: [0xe2,0x39,0xf0,0xf3]
+	vtbx.8	d19, {d16, d17}, d18
+@ CHECK: vtbx.8	d20, {d16, d17, d18}, d21 @ encoding: [0xe5,0x4a,0xf0,0xf3]
+	vtbx.8	d20, {d16, d17, d18}, d21
+@ CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21 @ encoding: [0xe5,0x4b,0xf0,0xf3]
+	vtbx.8	d20, {d16, d17, d18, d19}, d21
diff --git a/final/test/MC/ARM/neon-vld-encoding.s b/final/test/MC/ARM/neon-vld-encoding.s
new file mode 100644
index 00000000000..be55f47900c
--- /dev/null
+++ b/final/test/MC/ARM/neon-vld-encoding.s
@@ -0,0 +1,110 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+@ CHECK: vld1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x60,0xf4]
+	vld1.8	{d16}, [r0, :64]
+@ CHECK: vld1.16	{d16}, [r0]             @ encoding: [0x4f,0x07,0x60,0xf4]
+  vld1.16	{d16}, [r0]
+@ CHECK: vld1.32	{d16}, [r0]             @ encoding: [0x8f,0x07,0x60,0xf4]
+  vld1.32	{d16}, [r0]
+@ CHECK: vld1.64	{d16}, [r0]             @ encoding: [0xcf,0x07,0x60,0xf4]
+  vld1.64	{d16}, [r0]
+@ CHECK: vld1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x60,0xf4]
+  vld1.8	{d16, d17}, [r0, :64]
+@ CHECK: vld1.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x0a,0x60,0xf4]
+  vld1.16	{d16, d17}, [r0, :128]
+@ CHECK: vld1.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x0a,0x60,0xf4]
+  vld1.32	{d16, d17}, [r0]
+@ CHECK: vld1.64	{d16, d17}, [r0]        @ encoding: [0xcf,0x0a,0x60,0xf4]
+  vld1.64	{d16, d17}, [r0]
+
+@ CHECK: vld2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x60,0xf4]
+  vld2.8	{d16, d17}, [r0, :64]
+@ CHECK: vld2.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x08,0x60,0xf4]
+  vld2.16	{d16, d17}, [r0, :128]
+@ CHECK: vld2.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x08,0x60,0xf4]
+  vld2.32	{d16, d17}, [r0]
+@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x60,0xf4]
+  vld2.8	{d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vld2.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x60,0xf4]
+  vld2.16	{d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vld2.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x60,0xf4]
+  vld2.32	{d16, d17, d18, d19}, [r0, :256]
+
+@ CHECK: vld3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x60,0xf4]
+  vld3.8	{d16, d17, d18}, [r0, :64]
+@ CHECK: vld3.16	{d16, d17, d18}, [r0]   @ encoding: [0x4f,0x04,0x60,0xf4]
+  vld3.16	{d16, d17, d18}, [r0]
+@ CHECK: vld3.32	{d16, d17, d18}, [r0]   @ encoding: [0x8f,0x04,0x60,0xf4]
+  vld3.32	{d16, d17, d18}, [r0]
+@ CHECK: vld3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x60,0xf4]
+  vld3.8	{d16, d18, d20}, [r0, :64]!
+@ CHECK: vld3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x60,0xf4]
+  vld3.8	{d17, d19, d21}, [r0, :64]!
+@ CHECK: vld3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x4d,0x05,0x60,0xf4] 
+  vld3.16	{d16, d18, d20}, [r0]!
+@ CHECK: vld3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x4d,0x15,0x60,0xf4]
+  vld3.16	{d17, d19, d21}, [r0]!
+@ CHECK: vld3.32	{d16, d18, d20}, [r0]!  @ encoding: [0x8d,0x05,0x60,0xf4]
+  vld3.32	{d16, d18, d20}, [r0]!
+@ CHECK: vld3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x8d,0x15,0x60,0xf4]
+  vld3.32	{d17, d19, d21}, [r0]!
+
+@ CHECK: vld4.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x60,0xf4]
+  vld4.8	{d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vld4.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x60,0xf4]
+  vld4.16	{d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vld4.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x00,0x60,0xf4]
+  vld4.32	{d16, d17, d18, d19}, [r0, :256]
+@ CHECK: vld4.8	{d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x60,0xf4]
+  vld4.8	{d16, d18, d20, d22}, [r0, :256]!
+@ CHECK: vld4.8	{d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x60,0xf4]
+  vld4.8	{d17, d19, d21, d23}, [r0, :256]!
+@ CHECK: vld4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x60,0xf4]
+  vld4.16	{d16, d18, d20, d22}, [r0]!
+@ CHECK: vld4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x60,0xf4]
+  vld4.16	{d17, d19, d21, d23}, [r0]!
+@ CHECK: vld4.32	{d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x60,0xf4]
+  vld4.32	{d16, d18, d20, d22}, [r0]!
+@ CHECK: vld4.32	{d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x60,0xf4]
+  vld4.32	{d17, d19, d21, d23}, [r0]!
+
+@ CHECK: vld1.8	{d16[3]}, [r0]          @ encoding: [0x6f,0x00,0xe0,0xf4]
+  vld1.8	{d16[3]}, [r0]
+@ CHECK: vld1.16	{d16[2]}, [r0, :16]     @ encoding: [0x9f,0x04,0xe0,0xf4]
+  vld1.16	{d16[2]}, [r0, :16]
+@ CHECK: vld1.32	{d16[1]}, [r0, :32]     @ encoding: [0xbf,0x08,0xe0,0xf4]
+  vld1.32	{d16[1]}, [r0, :32]
+
+@ CHECK: vld2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xe0,0xf4]
+  vld2.8	{d16[1], d17[1]}, [r0, :16]
+@ CHECK: vld2.16	{d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xe0,0xf4]
+  vld2.16	{d16[1], d17[1]}, [r0, :32]
+@ CHECK: vld2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xe0,0xf4]
+  vld2.32	{d16[1], d17[1]}, [r0]
+@ CHECK: vld2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xe0,0xf4]
+  vld2.16	{d17[1], d19[1]}, [r0]
+@ CHECK: vld2.32	{d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xe0,0xf4]
+  vld2.32	{d17[0], d19[0]}, [r0, :64]
+
+@ CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xe0,0xf4]
+  vld3.8	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vld3.16	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x4f,0x06,0xe0,0xf4]
+  vld3.16	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vld3.32	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x8f,0x0a,0xe0,0xf4]
+  vld3.32	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vld3.16	{d16[1], d18[1], d20[1]}, [r0] @ encoding: [0x6f,0x06,0xe0,0xf4]
+  vld3.16	{d16[1], d18[1], d20[1]}, [r0]
+@ CHECK: vld3.32	{d17[1], d19[1], d21[1]}, [r0] @ encoding: [0xcf,0x1a,0xe0,0xf4]
+  vld3.32	{d17[1], d19[1], d21[1]}, [r0]
+
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xe0,0xf4]
+  vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+@ CHECK: vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xe0,0xf4]
+  vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
+@ CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xe0,0xf4]
+  vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+@ CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64] @ encoding: [0x7f,0x07,0xe0,0xf4]
+  vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
+@ CHECK: vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xe0,0xf4]
+  vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
diff --git a/final/test/MC/ARM/neon-vst-encoding.s b/final/test/MC/ARM/neon-vst-encoding.s
new file mode 100644
index 00000000000..c595aa2d5a4
--- /dev/null
+++ b/final/test/MC/ARM/neon-vst-encoding.s
@@ -0,0 +1,101 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+@ CHECK: vst1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x40,0xf4]
+  vst1.8	{d16}, [r0, :64]
+@ CHECK: vst1.16	{d16}, [r0]             @ encoding: [0x4f,0x07,0x40,0xf4]
+  vst1.16	{d16}, [r0]
+@ CHECK: vst1.32	{d16}, [r0]             @ encoding: [0x8f,0x07,0x40,0xf4]
+  vst1.32	{d16}, [r0]
+@ CHECK: vst1.64	{d16}, [r0]             @ encoding: [0xcf,0x07,0x40,0xf4]
+  vst1.64	{d16}, [r0]
+@ CHECK: vst1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x40,0xf4]
+  vst1.8	{d16, d17}, [r0, :64]
+@ CHECK: vst1.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x0a,0x40,0xf4]
+  vst1.16	{d16, d17}, [r0, :128]
+@ CHECK: vst1.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x0a,0x40,0xf4]
+  vst1.32	{d16, d17}, [r0]
+@ CHECK: vst1.64	{d16, d17}, [r0]        @ encoding: [0xcf,0x0a,0x40,0xf4]
+  vst1.64	{d16, d17}, [r0]
+
+@ CHECK: vst2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x40,0xf4]
+  vst2.8	{d16, d17}, [r0, :64]
+@ CHECK: vst2.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x08,0x40,0xf4]
+  vst2.16	{d16, d17}, [r0, :128]
+@ CHECK: vst2.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x08,0x40,0xf4]
+  vst2.32	{d16, d17}, [r0]
+@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x40,0xf4]
+  vst2.8	{d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vst2.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x40,0xf4]
+  vst2.16	{d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vst2.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x40,0xf4]
+  vst2.32	{d16, d17, d18, d19}, [r0, :256]
+
+@ CHECK: vst3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x40,0xf4]
+  vst3.8	{d16, d17, d18}, [r0, :64]
+@ CHECK: vst3.16	{d16, d17, d18}, [r0]   @ encoding: [0x4f,0x04,0x40,0xf4]
+  vst3.16	{d16, d17, d18}, [r0]
+@ CHECK: vst3.32	{d16, d17, d18}, [r0]   @ encoding: [0x8f,0x04,0x40,0xf4]
+  vst3.32	{d16, d17, d18}, [r0]
+@ CHECK: vst3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x40,0xf4]
+  vst3.8	{d16, d18, d20}, [r0, :64]!
+@ CHECK: vst3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x40,0xf4]
+  vst3.8	{d17, d19, d21}, [r0, :64]!
+@ CHECK: vst3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x4d,0x05,0x40,0xf4]
+  vst3.16	{d16, d18, d20}, [r0]!
+@ CHECK: vst3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x4d,0x15,0x40,0xf4]
+  vst3.16	{d17, d19, d21}, [r0]!
+@ CHECK: vst3.32	{d16, d18, d20}, [r0]!  @ encoding: [0x8d,0x05,0x40,0xf4]
+  vst3.32	{d16, d18, d20}, [r0]!
+@ CHECK: vst3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x8d,0x15,0x40,0xf4]
+  vst3.32	{d17, d19, d21}, [r0]!
+
+@ CHECK: vst4.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x40,0xf4]
+  vst4.8	{d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vst4.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x40,0xf4]
+  vst4.16	{d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vst4.8	{d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x40,0xf4]
+  vst4.8	{d16, d18, d20, d22}, [r0, :256]!
+@ CHECK: vst4.8	{d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x40,0xf4]
+  vst4.8	{d17, d19, d21, d23}, [r0, :256]!
+@ CHECK: vst4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x40,0xf4]
+  vst4.16	{d16, d18, d20, d22}, [r0]!
+@ CHECK: vst4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x40,0xf4]
+  vst4.16	{d17, d19, d21, d23}, [r0]!
+@ CHECK: vst4.32	{d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x40,0xf4]
+  vst4.32	{d16, d18, d20, d22}, [r0]!
+@ CHECK: vst4.32	{d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x40,0xf4]
+  vst4.32	{d17, d19, d21, d23}, [r0]!
+
+@ CHECK: vst2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xc0,0xf4]
+  vst2.8	{d16[1], d17[1]}, [r0, :16]
+@ CHECK: vst2.16	{d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf4]
+  vst2.16	{d16[1], d17[1]}, [r0, :32]
+@ CHECK: vst2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xc0,0xf4]
+  vst2.32	{d16[1], d17[1]}, [r0]
+@ CHECK: vst2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf4]
+  vst2.16	{d17[1], d19[1]}, [r0]
+@ CHECK: vst2.32	{d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf4]
+  vst2.32	{d17[0], d19[0]}, [r0, :64]
+
+@ CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xc0,0xf4]
+  vst3.8	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vst3.16	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x4f,0x06,0xc0,0xf4]
+  vst3.16	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vst3.32	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x8f,0x0a,0xc0,0xf4]
+  vst3.32	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vst3.16	{d17[2], d19[2], d21[2]}, [r0] @ encoding: [0xaf,0x16,0xc0,0xf4]
+  vst3.16	{d17[2], d19[2], d21[2]}, [r0]
+@ CHECK: vst3.32	{d16[0], d18[0], d20[0]}, [r0] @ encoding: [0x4f,0x0a,0xc0,0xf4]
+  vst3.32	{d16[0], d18[0], d20[0]}, [r0]
+
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xc0,0xf4]
+  vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+@ CHECK: vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xc0,0xf4]
+  vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
+@ CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xc0,0xf4]
+  vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+@ CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64] @ encoding: [0xff,0x17,0xc0,0xf4]
+  vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
+@ CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xc0,0xf4]
+  vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
diff --git a/final/test/MC/ARM/neont2-abs-encoding.s b/final/test/MC/ARM/neont2-abs-encoding.s
new file mode 100644
index 00000000000..5c8bc33d0f8
--- /dev/null
+++ b/final/test/MC/ARM/neont2-abs-encoding.s
@@ -0,0 +1,33 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vabs.s8	d16, d16                @ encoding: [0xf1,0xff,0x20,0x03]
+	vabs.s8	d16, d16
+@ CHECK: vabs.s16	d16, d16        @ encoding: [0xf5,0xff,0x20,0x03]
+	vabs.s16	d16, d16
+@ CHECK: vabs.s32	d16, d16        @ encoding: [0xf9,0xff,0x20,0x03]
+	vabs.s32	d16, d16
+@ CHECK: vabs.f32	d16, d16        @ encoding: [0xf9,0xff,0x20,0x07]
+	vabs.f32	d16, d16
+@ CHECK: vabs.s8	q8, q8                  @ encoding: [0xf1,0xff,0x60,0x03]
+	vabs.s8	q8, q8
+@ CHECK: vabs.s16	q8, q8          @ encoding: [0xf5,0xff,0x60,0x03]
+	vabs.s16	q8, q8
+@ CHECK: vabs.s32	q8, q8          @ encoding: [0xf9,0xff,0x60,0x03]
+	vabs.s32	q8, q8
+@ CHECK: vabs.f32	q8, q8          @ encoding: [0xf9,0xff,0x60,0x07]
+	vabs.f32	q8, q8
+
+@ CHECK: vqabs.s8	d16, d16        @ encoding: [0xf0,0xff,0x20,0x07]
+	vqabs.s8	d16, d16
+@ CHECK: vqabs.s16	d16, d16        @ encoding: [0xf4,0xff,0x20,0x07]
+	vqabs.s16	d16, d16
+@ CHECK: vqabs.s32	d16, d16        @ encoding: [0xf8,0xff,0x20,0x07]
+	vqabs.s32	d16, d16
+@ CHECK: vqabs.s8	q8, q8          @ encoding: [0xf0,0xff,0x60,0x07]
+	vqabs.s8	q8, q8
+@ CHECK: vqabs.s16	q8, q8          @ encoding: [0xf4,0xff,0x60,0x07]
+	vqabs.s16	q8, q8
+@ CHECK: vqabs.s32	q8, q8          @ encoding: [0xf8,0xff,0x60,0x07]
+	vqabs.s32	q8, q8
diff --git a/final/test/MC/ARM/neont2-absdiff-encoding.s b/final/test/MC/ARM/neont2-absdiff-encoding.s
new file mode 100644
index 00000000000..2096357ce8f
--- /dev/null
+++ b/final/test/MC/ARM/neont2-absdiff-encoding.s
@@ -0,0 +1,86 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+@ NOTE: This currently fails because the ASM parser doesn't parse vabal.
+
+.code 16
+
+@ CHECK: vabd.s8	d16, d16, d17           @ encoding: [0xa1,0x07,0x40,0xef]
+	vabd.s8	d16, d16, d17
+@ CHECK: vabd.s16	d16, d16, d17   @ encoding: [0xa1,0x07,0x50,0xef]
+	vabd.s16	d16, d16, d17
+@ CHECK: vabd.s32	d16, d16, d17   @ encoding: [0xa1,0x07,0x60,0xef]
+	vabd.s32	d16, d16, d17
+@ CHECK: vabd.u8	d16, d16, d17           @ encoding: [0xa1,0x07,0x40,0xff]
+	vabd.u8	d16, d16, d17
+@ CHECK: vabd.u16	d16, d16, d17   @ encoding: [0xa1,0x07,0x50,0xff]
+	vabd.u16	d16, d16, d17
+  @ CHECK: vabd.u32	d16, d16, d17   @ encoding: [0xa1,0x07,0x60,0xff]
+	vabd.u32	d16, d16, d17
+@ CHECK: vabd.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x60,0xff]
+	vabd.f32	d16, d16, d17
+@ CHECK: vabd.s8	q8, q8, q9              @ encoding: [0xe2,0x07,0x40,0xef]
+	vabd.s8	q8, q8, q9
+@ CHECK: vabd.s16	q8, q8, q9      @ encoding: [0xe2,0x07,0x50,0xef]
+	vabd.s16	q8, q8, q9
+@ CHECK: vabd.s32	q8, q8, q9      @ encoding: [0xe2,0x07,0x60,0xef]
+	vabd.s32	q8, q8, q9
+@ CHECK: vabd.u8	q8, q8, q9              @ encoding: [0xe2,0x07,0x40,0xff]
+	vabd.u8	q8, q8, q9
+@ CHECK: vabd.u16	q8, q8, q9      @ encoding: [0xe2,0x07,0x50,0xff]
+	vabd.u16	q8, q8, q9
+@ CHECK: vabd.u32	q8, q8, q9      @ encoding: [0xe2,0x07,0x60,0xff]
+	vabd.u32	q8, q8, q9
+@ CHECK: vabd.f32	q8, q8, q9      @ encoding: [0xe2,0x0d,0x60,0xff]
+	vabd.f32	q8, q8, q9
+
+@ CHECK: vabdl.s8	q8, d16, d17    @ encoding: [0xa1,0x07,0xc0,0xef]
+	vabdl.s8	q8, d16, d17
+@ CHECK: vabdl.s16	q8, d16, d17    @ encoding: [0xa1,0x07,0xd0,0xef]
+	vabdl.s16	q8, d16, d17
+@ CHECK: vabdl.s32	q8, d16, d17    @ encoding: [0xa1,0x07,0xe0,0xef]
+	vabdl.s32	q8, d16, d17
+@ CHECK: vabdl.u8	q8, d16, d17    @ encoding: [0xa1,0x07,0xc0,0xff]
+	vabdl.u8	q8, d16, d17
+@ CHECK: vabdl.u16	q8, d16, d17    @ encoding: [0xa1,0x07,0xd0,0xff]
+	vabdl.u16	q8, d16, d17
+@ CHECK: vabdl.u32	q8, d16, d17    @ encoding: [0xa1,0x07,0xe0,0xff]
+	vabdl.u32	q8, d16, d17
+
+@ CHECK: vaba.s8	d16, d18, d17           @ encoding: [0xb1,0x07,0x42,0xef]
+	vaba.s8	d16, d18, d17
+@ CHECK: vaba.s16	d16, d18, d17   @ encoding: [0xb1,0x07,0x52,0xef]
+	vaba.s16	d16, d18, d17
+@ CHECK: vaba.s32	d16, d18, d17   @ encoding: [0xb1,0x07,0x62,0xef]
+	vaba.s32	d16, d18, d17
+@ CHECK: vaba.u8	d16, d18, d17           @ encoding: [0xb1,0x07,0x42,0xff]
+	vaba.u8	d16, d18, d17
+@ CHECK: vaba.u16	d16, d18, d17   @ encoding: [0xb1,0x07,0x52,0xff]
+	vaba.u16	d16, d18, d17
+@ CHECK: vaba.u32	d16, d18, d17   @ encoding: [0xb1,0x07,0x62,0xff]
+	vaba.u32	d16, d18, d17
+@ CHECK: vaba.s8	q9, q8, q10             @ encoding: [0xf4,0x27,0x40,0xef]
+	vaba.s8	q9, q8, q10
+@ CHECK: vaba.s16	q9, q8, q10     @ encoding: [0xf4,0x27,0x50,0xef]
+	vaba.s16	q9, q8, q10
+@ CHECK: vaba.s32	q9, q8, q10     @ encoding: [0xf4,0x27,0x60,0xef]
+	vaba.s32	q9, q8, q10
+@ CHECK: vaba.u8	q9, q8, q10             @ encoding: [0xf4,0x27,0x40,0xff]
+	vaba.u8	q9, q8, q10
+@ CHECK: vaba.u16	q9, q8, q10     @ encoding: [0xf4,0x27,0x50,0xff]
+	vaba.u16	q9, q8, q10
+@ CHECK: vaba.u32	q9, q8, q10     @ encoding: [0xf4,0x27,0x60,0xff]
+	vaba.u32	q9, q8, q10
+
+@ CHECK: vabal.s8	q8, d19, d18    @ encoding: [0xa2,0x05,0xc3,0xef]
+	vabal.s8	q8, d19, d18
+@ CHECK: vabal.s16	q8, d19, d18    @ encoding: [0xa2,0x05,0xd3,0xef]
+	vabal.s16	q8, d19, d18
+@ CHECK: vabal.s32	q8, d19, d18    @ encoding: [0xa2,0x05,0xe3,0xef]
+	vabal.s32	q8, d19, d18
+@ CHECK: vabal.u8	q8, d19, d18    @ encoding: [0xa2,0x05,0xc3,0xff]
+	vabal.u8	q8, d19, d18
+@ CHECK: 	vabal.u16	q8, d19, d18    @ encoding: [0xa2,0x05,0xd3,0xff]
+	vabal.u16	q8, d19, d18
+@ CHECK: vabal.u32	q8, d19, d18    @ encoding: [0xa2,0x05,0xe3,0xff]
+	vabal.u32	q8, d19, d18
+
diff --git a/final/test/MC/ARM/neont2-add-encoding.s b/final/test/MC/ARM/neont2-add-encoding.s
new file mode 100644
index 00000000000..c384d76dbea
--- /dev/null
+++ b/final/test/MC/ARM/neont2-add-encoding.s
@@ -0,0 +1,138 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vadd.i8	d16, d17, d16           @ encoding: [0x41,0xef,0xa0,0x08]
+	vadd.i8	d16, d17, d16
+@ CHECK: vadd.i16	d16, d17, d16   @ encoding: [0x51,0xef,0xa0,0x08]
+	vadd.i16	d16, d17, d16
+@ CHECK: vadd.i64	d16, d17, d16   @ encoding: [0x71,0xef,0xa0,0x08]
+	vadd.i64	d16, d17, d16
+@ CHECK: vadd.i32	d16, d17, d16   @ encoding: [0x61,0xef,0xa0,0x08]
+	vadd.i32	d16, d17, d16
+@ CHECK: vadd.f32	d16, d16, d17   @ encoding: [0x40,0xef,0xa1,0x0d]
+	vadd.f32	d16, d16, d17
+@ CHECK: vadd.f32	q8, q8, q9      @ encoding: [0x40,0xef,0xe2,0x0d]
+	vadd.f32	q8, q8, q9
+
+@ CHECK: vaddl.s8	q8, d17, d16    @ encoding: [0xc1,0xef,0xa0,0x00]
+	vaddl.s8	q8, d17, d16
+@ CHECK: vaddl.s16	q8, d17, d16    @ encoding: [0xd1,0xef,0xa0,0x00]
+	vaddl.s16	q8, d17, d16
+@ CHECK: vaddl.s32	q8, d17, d16    @ encoding: [0xe1,0xef,0xa0,0x00]
+	vaddl.s32	q8, d17, d16
+@ CHECK: vaddl.u8	q8, d17, d16    @ encoding: [0xc1,0xff,0xa0,0x00]
+	vaddl.u8	q8, d17, d16
+@ CHECK: vaddl.u16	q8, d17, d16    @ encoding: [0xd1,0xff,0xa0,0x00]
+	vaddl.u16	q8, d17, d16
+@ CHECK: vaddl.u32	q8, d17, d16    @ encoding: [0xe1,0xff,0xa0,0x00]
+	vaddl.u32	q8, d17, d16
+
+@ CHECK: vaddw.s8	q8, q8, d18     @ encoding: [0xc0,0xef,0xa2,0x01]
+	vaddw.s8	q8, q8, d18
+@ CHECK: vaddw.s16	q8, q8, d18     @ encoding: [0xd0,0xef,0xa2,0x01]
+	vaddw.s16	q8, q8, d18
+@ CHECK: vaddw.s32	q8, q8, d18     @ encoding: [0xe0,0xef,0xa2,0x01]
+	vaddw.s32	q8, q8, d18
+@ CHECK: vaddw.u8	q8, q8, d18     @ encoding: [0xc0,0xff,0xa2,0x01]
+	vaddw.u8	q8, q8, d18
+@ CHECK: vaddw.u16	q8, q8, d18     @ encoding: [0xd0,0xff,0xa2,0x01]
+	vaddw.u16	q8, q8, d18
+@ CHECK: vaddw.u32	q8, q8, d18     @ encoding: [0xe0,0xff,0xa2,0x01]
+	vaddw.u32	q8, q8, d18
+
+@ CHECK: vhadd.s8	d16, d16, d17   @ encoding: [0x40,0xef,0xa1,0x00]
+	vhadd.s8	d16, d16, d17
+@ CHECK: vhadd.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xa1,0x00]
+	vhadd.s16	d16, d16, d17
+@ CHECK: vhadd.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x00]
+	vhadd.s32	d16, d16, d17
+@ CHECK: vhadd.u8	d16, d16, d17   @ encoding: [0x40,0xff,0xa1,0x00]
+	vhadd.u8	d16, d16, d17
+@ CHECK: vhadd.u16	d16, d16, d17   @ encoding: [0x50,0xff,0xa1,0x00]
+	vhadd.u16	d16, d16, d17
+@ CHECK: vhadd.u32	d16, d16, d17   @ encoding: [0x60,0xff,0xa1,0x00]
+	vhadd.u32	d16, d16, d17
+@ CHECK: vhadd.s8	q8, q8, q9      @ encoding: [0x40,0xef,0xe2,0x00]
+	vhadd.s8	q8, q8, q9
+@ CHECK: vhadd.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xe2,0x00]
+	vhadd.s16	q8, q8, q9
+@ CHECK: vhadd.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x00]
+	vhadd.s32	q8, q8, q9
+  @ CHECK: vhadd.u8	q8, q8, q9      @ encoding: [0x40,0xff,0xe2,0x00]
+	vhadd.u8	q8, q8, q9
+@ CHECK: vhadd.u16	q8, q8, q9      @ encoding: [0x50,0xff,0xe2,0x00]
+	vhadd.u16	q8, q8, q9
+@ CHECK: vhadd.u32	q8, q8, q9      @ encoding: [0x60,0xff,0xe2,0x00]
+	vhadd.u32	q8, q8, q9
+	
+@ CHECK: vrhadd.s8	d16, d16, d17   @ encoding: [0x40,0xef,0xa1,0x01]
+	vrhadd.s8	d16, d16, d17
+@ CHECK: vrhadd.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xa1,0x01]
+	vrhadd.s16	d16, d16, d17
+@ CHECK: vrhadd.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x01]
+	vrhadd.s32	d16, d16, d17
+@ CHECK: vrhadd.u8	d16, d16, d17   @ encoding: [0x40,0xff,0xa1,0x01]
+	vrhadd.u8	d16, d16, d17
+@ CHECK: vrhadd.u16	d16, d16, d17   @ encoding: [0x50,0xff,0xa1,0x01]
+	vrhadd.u16	d16, d16, d17
+@ CHECK: vrhadd.u32	d16, d16, d17   @ encoding: [0x60,0xff,0xa1,0x01]
+	vrhadd.u32	d16, d16, d17
+@ CHECK: vrhadd.s8	q8, q8, q9      @ encoding: [0x40,0xef,0xe2,0x01]
+	vrhadd.s8	q8, q8, q9
+@ CHECK: vrhadd.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xe2,0x01]
+	vrhadd.s16	q8, q8, q9
+@ CHECK: vrhadd.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x01]
+	vrhadd.s32	q8, q8, q9
+@ CHECK: vrhadd.u8	q8, q8, q9      @ encoding: [0x40,0xff,0xe2,0x01]
+	vrhadd.u8	q8, q8, q9
+@ CHECK: vrhadd.u16	q8, q8, q9      @ encoding: [0x50,0xff,0xe2,0x01]
+	vrhadd.u16	q8, q8, q9
+@ CHECK: vrhadd.u32	q8, q8, q9      @ encoding: [0x60,0xff,0xe2,0x01]
+	vrhadd.u32	q8, q8, q9
+
+@ CHECK: vqadd.s8	d16, d16, d17   @ encoding: [0x40,0xef,0xb1,0x00]
+	vqadd.s8	d16, d16, d17
+@ CHECK: vqadd.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xb1,0x00]
+	vqadd.s16	d16, d16, d17
+@ CHECK: vqadd.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xb1,0x00]
+	vqadd.s32	d16, d16, d17
+@ CHECK: vqadd.s64	d16, d16, d17   @ encoding: [0x70,0xef,0xb1,0x00]
+	vqadd.s64	d16, d16, d17
+@ CHECK: vqadd.u8	d16, d16, d17   @ encoding: [0x40,0xff,0xb1,0x00]
+	vqadd.u8	d16, d16, d17
+@ CHECK: vqadd.u16	d16, d16, d17   @ encoding: [0x50,0xff,0xb1,0x00]
+	vqadd.u16	d16, d16, d17
+@ CHECK: vqadd.u32	d16, d16, d17   @ encoding: [0x60,0xff,0xb1,0x00]
+	vqadd.u32	d16, d16, d17
+@ CHECK: vqadd.u64	d16, d16, d17   @ encoding: [0x70,0xff,0xb1,0x00]
+	vqadd.u64	d16, d16, d17
+@ CHECK: vqadd.s8	q8, q8, q9      @ encoding: [0x40,0xef,0xf2,0x00]
+	vqadd.s8	q8, q8, q9
+@ CHECK: vqadd.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xf2,0x00]
+	vqadd.s16	q8, q8, q9
+@ CHECK: vqadd.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x00]
+	vqadd.s32	q8, q8, q9
+@ CHECK: vqadd.s64	q8, q8, q9      @ encoding: [0x70,0xef,0xf2,0x00]
+	vqadd.s64	q8, q8, q9
+@ CHECK: vqadd.u8	q8, q8, q9      @ encoding: [0x40,0xff,0xf2,0x00]
+	vqadd.u8	q8, q8, q9
+@ CHECK: vqadd.u16	q8, q8, q9      @ encoding: [0x50,0xff,0xf2,0x00]
+	vqadd.u16	q8, q8, q9
+@ CHECK: vqadd.u32	q8, q8, q9      @ encoding: [0x60,0xff,0xf2,0x00]
+	vqadd.u32	q8, q8, q9
+@ CHECK: vqadd.u64	q8, q8, q9      @ encoding: [0x70,0xff,0xf2,0x00]
+	vqadd.u64	q8, q8, q9
+
+@ CHECK: vaddhn.i16	d16, q8, q9     @ encoding: [0xc0,0xef,0xa2,0x04]
+	vaddhn.i16	d16, q8, q9
+@ CHECK: vaddhn.i32	d16, q8, q9     @ encoding: [0xd0,0xef,0xa2,0x04]
+	vaddhn.i32	d16, q8, q9
+@ CHECK: vaddhn.i64	d16, q8, q9     @ encoding: [0xe0,0xef,0xa2,0x04]
+	vaddhn.i64	d16, q8, q9
+@ CHECK: vraddhn.i16	d16, q8, q9     @ encoding: [0xc0,0xff,0xa2,0x04]
+	vraddhn.i16	d16, q8, q9
+@ CHECK: vraddhn.i32	d16, q8, q9     @ encoding: [0xd0,0xff,0xa2,0x04]
+	vraddhn.i32	d16, q8, q9
+@ CHECK: vraddhn.i64	d16, q8, q9     @ encoding: [0xe0,0xff,0xa2,0x04]
+	vraddhn.i64	d16, q8, q9
diff --git a/final/test/MC/ARM/neont2-bitcount-encoding.s b/final/test/MC/ARM/neont2-bitcount-encoding.s
new file mode 100644
index 00000000000..4280cbd4a44
--- /dev/null
+++ b/final/test/MC/ARM/neont2-bitcount-encoding.s
@@ -0,0 +1,34 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vcnt.8	d16, d16                @ encoding: [0x20,0x05,0xf0,0xff]
+	vcnt.8	d16, d16
+@ CHECK: vcnt.8	q8, q8                  @ encoding: [0x60,0x05,0xf0,0xff]
+	vcnt.8	q8, q8
+@ CHECK: vclz.i8	d16, d16                @ encoding: [0xa0,0x04,0xf0,0xff]
+	vclz.i8	d16, d16
+@ CHECK: vclz.i16	d16, d16        @ encoding: [0xa0,0x04,0xf4,0xff]
+	vclz.i16	d16, d16
+@ CHECK: vclz.i32	d16, d16        @ encoding: [0xa0,0x04,0xf8,0xff]
+	vclz.i32	d16, d16
+@ CHECK: vclz.i8	q8, q8                  @ encoding: [0xe0,0x04,0xf0,0xff]
+	vclz.i8	q8, q8
+@ CHECK: vclz.i16	q8, q8          @ encoding: [0xe0,0x04,0xf4,0xff]
+	vclz.i16	q8, q8
+@ CHECK: vclz.i32	q8, q8          @ encoding: [0xe0,0x04,0xf8,0xff]
+	vclz.i32	q8, q8
+@ CHECK: vcls.s8	d16, d16                @ encoding: [0x20,0x04,0xf0,0xff]
+	vcls.s8	d16, d16
+@ CHECK: vcls.s16	d16, d16        @ encoding: [0x20,0x04,0xf4,0xff]
+	vcls.s16	d16, d16
+@ CHECK: vcls.s32	d16, d16        @ encoding: [0x20,0x04,0xf8,0xff]
+	vcls.s32	d16, d16
+@ CHECK: vcls.s8	q8, q8                  @ encoding: [0x60,0x04,0xf0,0xff]
+	vcls.s8	q8, q8
+@ CHECK: vcls.s16	q8, q8          @ encoding: [0x60,0x04,0xf4,0xff]
+	vcls.s16	q8, q8
+@ CHECK: vcls.s32	q8, q8          @ encoding: [0x60,0x04,0xf8,0xff]
+	vcls.s32	q8, q8
+
diff --git a/final/test/MC/ARM/neont2-bitwise-encoding.s b/final/test/MC/ARM/neont2-bitwise-encoding.s
new file mode 100644
index 00000000000..3acd7a8c991
--- /dev/null
+++ b/final/test/MC/ARM/neont2-bitwise-encoding.s
@@ -0,0 +1,49 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vand	d16, d17, d16           @ encoding: [0xb0,0x01,0x41,0xef]
+	vand	d16, d17, d16
+@ CHECK: vand	q8, q8, q9              @ encoding: [0xf2,0x01,0x40,0xef]
+	vand	q8, q8, q9
+
+@ CHECK: veor	d16, d17, d16           @ encoding: [0xb0,0x01,0x41,0xff]
+	veor	d16, d17, d16
+@ CHECK: veor	q8, q8, q9              @ encoding: [0xf2,0x01,0x40,0xff]
+	veor	q8, q8, q9
+
+@ CHECK: vorr	d16, d17, d16           @ encoding: [0xb0,0x01,0x61,0xef]
+	vorr	d16, d17, d16
+@ CHECK: vorr	q8, q8, q9              @ encoding: [0xf2,0x01,0x60,0xef]
+	vorr	q8, q8, q9
+@ CHECK: vorr.i32	d16, #0x1000000 @ encoding: [0x11,0x07,0xc0,0xef]
+  vorr.i32	d16, #0x1000000
+@ CHECK: vorr.i32	q8, #0x1000000  @ encoding: [0x51,0x07,0xc0,0xef]
+  vorr.i32	q8, #0x1000000
+@ CHECK: vorr.i32	q8, #0x0        @ encoding: [0x50,0x01,0xc0,0xef]
+  vorr.i32	q8, #0x0
+
+@ CHECK: vbic	d16, d17, d16           @ encoding: [0xb0,0x01,0x51,0xef]
+	vbic	d16, d17, d16
+@ CHECK: vbic	q8, q8, q9              @ encoding: [0xf2,0x01,0x50,0xef]
+	vbic	q8, q8, q9
+@ CHECK: vbic.i32	d16, #0xFF000000 @ encoding: [0x3f,0x07,0xc7,0xff]
+  vbic.i32	d16, #0xFF000000
+@ CHECK: vbic.i32	q8, #0xFF000000 @ encoding: [0x7f,0x07,0xc7,0xff]
+  vbic.i32	q8, #0xFF000000
+
+@ CHECK: vorn	d16, d17, d16           @ encoding: [0xb0,0x01,0x71,0xef]
+	vorn	d16, d17, d16
+@ CHECK: vorn	q8, q8, q9              @ encoding: [0xf2,0x01,0x70,0xef]
+	vorn	q8, q8, q9
+
+@ CHECK: vmvn	d16, d16                @ encoding: [0xa0,0x05,0xf0,0xff]
+	vmvn	d16, d16
+@ CHECK: vmvn	q8, q8                  @ encoding: [0xe0,0x05,0xf0,0xff]
+	vmvn	q8, q8
+
+@ CHECK: vbsl	d18, d17, d16           @ encoding: [0xb0,0x21,0x51,0xff]
+	vbsl	d18, d17, d16
+@ CHECK: vbsl	q8, q10, q9             @ encoding: [0xf2,0x01,0x54,0xff]
+	vbsl	q8, q10, q9
diff --git a/final/test/MC/ARM/neont2-cmp-encoding.s b/final/test/MC/ARM/neont2-cmp-encoding.s
new file mode 100644
index 00000000000..1dbd42a3946
--- /dev/null
+++ b/final/test/MC/ARM/neont2-cmp-encoding.s
@@ -0,0 +1,36 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vcvt.s32.f32	d16, d16        @ encoding: [0xfb,0xff,0x20,0x07]
+	vcvt.s32.f32	d16, d16
+@ CHECK: vcvt.u32.f32	d16, d16        @ encoding: [0xfb,0xff,0xa0,0x07]
+	vcvt.u32.f32	d16, d16
+@ CHECK: vcvt.f32.s32	d16, d16        @ encoding: [0xfb,0xff,0x20,0x06]
+	vcvt.f32.s32	d16, d16
+@ CHECK: vcvt.f32.u32	d16, d16        @ encoding: [0xfb,0xff,0xa0,0x06]
+	vcvt.f32.u32	d16, d16
+@ CHECK: vcvt.s32.f32	q8, q8          @ encoding: [0xfb,0xff,0x60,0x07]
+	vcvt.s32.f32	q8, q8
+@ CHECK: vcvt.u32.f32	q8, q8          @ encoding: [0xfb,0xff,0xe0,0x07]
+	vcvt.u32.f32	q8, q8
+@ CHECK: vcvt.f32.s32	q8, q8          @ encoding: [0xfb,0xff,0x60,0x06]
+	vcvt.f32.s32	q8, q8
+@ CHECK: vcvt.f32.u32	q8, q8          @ encoding: [0xfb,0xff,0xe0,0x06]
+	vcvt.f32.u32	q8, q8
+@ CHECK: vcvt.s32.f32	d16, d16, #1    @ encoding: [0xff,0xef,0x30,0x0f]
+	vcvt.s32.f32	d16, d16, #1
+@ CHECK: vcvt.u32.f32	d16, d16, #1    @ encoding: [0xff,0xff,0x30,0x0f]
+	vcvt.u32.f32	d16, d16, #1
+@ CHECK: vcvt.f32.s32	d16, d16, #1    @ encoding: [0xff,0xef,0x30,0x0e]
+	vcvt.f32.s32	d16, d16, #1
+@ CHECK: vcvt.f32.u32	d16, d16, #1    @ encoding: [0xff,0xff,0x30,0x0e]
+	vcvt.f32.u32	d16, d16, #1
+@ CHECK: vcvt.s32.f32	q8, q8, #1      @ encoding: [0xff,0xef,0x70,0x0f]
+	vcvt.s32.f32	q8, q8, #1
+@ CHECK: vcvt.u32.f32	q8, q8, #1      @ encoding: [0xff,0xff,0x70,0x0f]
+	vcvt.u32.f32	q8, q8, #1
+@ CHECK: vcvt.f32.s32	q8, q8, #1      @ encoding: [0xff,0xef,0x70,0x0e]
+	vcvt.f32.s32	q8, q8, #1
+@ CHECK: vcvt.f32.u32	q8, q8, #1      @ encoding: [0xff,0xff,0x70,0x0e]
+	vcvt.f32.u32	q8, q8, #1
diff --git a/final/test/MC/ARM/neont2-convert-encoding.s b/final/test/MC/ARM/neont2-convert-encoding.s
new file mode 100644
index 00000000000..1df3b43f305
--- /dev/null
+++ b/final/test/MC/ARM/neont2-convert-encoding.s
@@ -0,0 +1,40 @@
+@ RUN: llvm-mc -mcpu=cortex-a9 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vcvt.s32.f32	d16, d16        @ encoding: [0xfb,0xff,0x20,0x07]
+	vcvt.s32.f32	d16, d16
+@ CHECK: vcvt.u32.f32	d16, d16        @ encoding: [0xfb,0xff,0xa0,0x07]
+	vcvt.u32.f32	d16, d16
+@ CHECK: vcvt.f32.s32	d16, d16        @ encoding: [0xfb,0xff,0x20,0x06]
+	vcvt.f32.s32	d16, d16
+@ CHECK: vcvt.f32.u32	d16, d16        @ encoding: [0xfb,0xff,0xa0,0x06]
+	vcvt.f32.u32	d16, d16
+@ CHECK: vcvt.s32.f32	q8, q8          @ encoding: [0xfb,0xff,0x60,0x07]
+	vcvt.s32.f32	q8, q8
+@ CHECK: vcvt.u32.f32	q8, q8          @ encoding: [0xfb,0xff,0xe0,0x07]
+	vcvt.u32.f32	q8, q8
+@ CHECK: vcvt.f32.s32	q8, q8          @ encoding: [0xfb,0xff,0x60,0x06]
+	vcvt.f32.s32	q8, q8
+@ CHECK: vcvt.f32.u32	q8, q8          @ encoding: [0xfb,0xff,0xe0,0x06]
+	vcvt.f32.u32	q8, q8
+@ CHECK: vcvt.s32.f32	d16, d16, #1    @ encoding: [0xff,0xef,0x30,0x0f]
+	vcvt.s32.f32	d16, d16, #1
+@ CHECK: vcvt.u32.f32	d16, d16, #1    @ encoding: [0xff,0xff,0x30,0x0f]
+	vcvt.u32.f32	d16, d16, #1
+@ CHECK: vcvt.f32.s32	d16, d16, #1    @ encoding: [0xff,0xef,0x30,0x0e]
+	vcvt.f32.s32	d16, d16, #1
+@ CHECK: vcvt.f32.u32	d16, d16, #1    @ encoding: [0xff,0xff,0x30,0x0e]
+	vcvt.f32.u32	d16, d16, #1
+@ CHECK: vcvt.s32.f32	q8, q8, #1      @ encoding: [0xff,0xef,0x70,0x0f]
+	vcvt.s32.f32	q8, q8, #1
+@ CHECK: vcvt.u32.f32	q8, q8, #1      @ encoding: [0xff,0xff,0x70,0x0f]
+	vcvt.u32.f32	q8, q8, #1
+@ CHECK: vcvt.f32.s32	q8, q8, #1      @ encoding: [0xff,0xef,0x70,0x0e]
+	vcvt.f32.s32	q8, q8, #1
+@ CHECK: vcvt.f32.u32	q8, q8, #1      @ encoding: [0xff,0xff,0x70,0x0e]
+	vcvt.f32.u32	q8, q8, #1
+@ CHECK: vcvt.f32.f16	q8, d16         @ encoding: [0xf6,0xff,0x20,0x07]
+	vcvt.f32.f16	q8, d16
+@ CHECK: vcvt.f16.f32	d16, q8         @ encoding: [0xf6,0xff,0x20,0x06]
+	vcvt.f16.f32	d16, q8
diff --git a/final/test/MC/ARM/neont2-dup-encoding.s b/final/test/MC/ARM/neont2-dup-encoding.s
new file mode 100644
index 00000000000..da6e78f5601
--- /dev/null
+++ b/final/test/MC/ARM/neont2-dup-encoding.s
@@ -0,0 +1,29 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vdup.8	d16, r0                 @ encoding: [0x90,0x0b,0xc0,0xee]
+	vdup.8	d16, r0
+@ CHECK: vdup.16	d16, r0                 @ encoding: [0xb0,0x0b,0x80,0xee]
+	vdup.16	d16, r0
+@ CHECK: vdup.32	d16, r0                 @ encoding: [0x90,0x0b,0x80,0xee]
+	vdup.32	d16, r0
+@ CHECK: vdup.8	q8, r0                  @ encoding: [0x90,0x0b,0xe0,0xee]
+	vdup.8	q8, r0
+@ CHECK: vdup.16	q8, r0                  @ encoding: [0xb0,0x0b,0xa0,0xee]
+	vdup.16	q8, r0
+@ CHECK: vdup.32	q8, r0                  @ encoding: [0x90,0x0b,0xa0,0xee]
+	vdup.32	q8, r0
+@ CHECK: vdup.8	d16, d16[1]             @ encoding: [0x20,0x0c,0xf3,0xff]
+	vdup.8	d16, d16[1]
+@ CHECK: vdup.16	d16, d16[1]             @ encoding: [0x20,0x0c,0xf6,0xff]
+	vdup.16	d16, d16[1]
+@ CHECK: vdup.32	d16, d16[1]             @ encoding: [0x20,0x0c,0xfc,0xff]
+	vdup.32	d16, d16[1]
+@ CHECK: vdup.8	q8, d16[1]              @ encoding: [0x60,0x0c,0xf3,0xff]
+	vdup.8	q8, d16[1]
+@ CHECK: vdup.16	q8, d16[1]              @ encoding: [0x60,0x0c,0xf6,0xff]
+	vdup.16	q8, d16[1]
+@ CHECK: vdup.32	q8, d16[1]              @ encoding: [0x60,0x0c,0xfc,0xff]
+	vdup.32	q8, d16[1]
diff --git a/final/test/MC/ARM/neont2-minmax-encoding.s b/final/test/MC/ARM/neont2-minmax-encoding.s
new file mode 100644
index 00000000000..7e86d45bb14
--- /dev/null
+++ b/final/test/MC/ARM/neont2-minmax-encoding.s
@@ -0,0 +1,60 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vmin.s8	d16, d16, d17           @ encoding: [0x40,0xef,0xb1,0x06]
+	vmin.s8	d16, d16, d17
+@ CHECK: vmin.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xb1,0x06]
+	vmin.s16	d16, d16, d17
+@ CHECK: vmin.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xb1,0x06]
+	vmin.s32	d16, d16, d17
+@ CHECK: vmin.u8	d16, d16, d17           @ encoding: [0x40,0xff,0xb1,0x06]
+	vmin.u8	d16, d16, d17
+@ CHECK: vmin.u16	d16, d16, d17   @ encoding: [0x50,0xff,0xb1,0x06]
+	vmin.u16	d16, d16, d17
+@ CHECK: vmin.u32	d16, d16, d17   @ encoding: [0x60,0xff,0xb1,0x06]
+	vmin.u32	d16, d16, d17
+@ CHECK: vmin.f32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x0f]
+	vmin.f32	d16, d16, d17
+@ CHECK: vmin.s8	q8, q8, q9              @ encoding: [0x40,0xef,0xf2,0x06]
+	vmin.s8	q8, q8, q9
+@ CHECK: vmin.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xf2,0x06]
+	vmin.s16	q8, q8, q9
+@ CHECK: vmin.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x06]
+	vmin.s32	q8, q8, q9
+@ CHECK: vmin.u8	q8, q8, q9              @ encoding: [0x40,0xff,0xf2,0x06]
+	vmin.u8	q8, q8, q9
+@ CHECK: vmin.u16	q8, q8, q9      @ encoding: [0x50,0xff,0xf2,0x06]
+	vmin.u16	q8, q8, q9
+@ CHECK: vmin.u32	q8, q8, q9      @ encoding: [0x60,0xff,0xf2,0x06]
+	vmin.u32	q8, q8, q9
+@ CHECK: vmin.f32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x0f]
+	vmin.f32	q8, q8, q9
+@ CHECK: vmax.s8	d16, d16, d17           @ encoding: [0x40,0xef,0xa1,0x06]
+	vmax.s8	d16, d16, d17
+@ CHECK: vmax.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xa1,0x06]
+	vmax.s16	d16, d16, d17
+@ CHECK: vmax.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x06]
+	vmax.s32	d16, d16, d17
+@ CHECK: vmax.u8	d16, d16, d17           @ encoding: [0x40,0xff,0xa1,0x06]
+	vmax.u8	d16, d16, d17
+@ CHECK: vmax.u16	d16, d16, d17   @ encoding: [0x50,0xff,0xa1,0x06]
+	vmax.u16	d16, d16, d17
+@ CHECK: vmax.u32	d16, d16, d17   @ encoding: [0x60,0xff,0xa1,0x06]
+	vmax.u32	d16, d16, d17
+@ CHECK: vmax.f32	d16, d16, d17   @ encoding: [0x40,0xef,0xa1,0x0f]
+	vmax.f32	d16, d16, d17
+@ CHECK: vmax.s8	q8, q8, q9              @ encoding: [0x40,0xef,0xe2,0x06]
+	vmax.s8	q8, q8, q9
+@ CHECK: vmax.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xe2,0x06]
+	vmax.s16	q8, q8, q9
+@ CHECK: vmax.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x06]
+	vmax.s32	q8, q8, q9
+@ CHECK: vmax.u8	q8, q8, q9              @ encoding: [0x40,0xff,0xe2,0x06]
+	vmax.u8	q8, q8, q9
+@ CHECK: vmax.u16	q8, q8, q9      @ encoding: [0x50,0xff,0xe2,0x06]
+	vmax.u16	q8, q8, q9
+@ CHECK: vmax.u32	q8, q8, q9      @ encoding: [0x60,0xff,0xe2,0x06]
+	vmax.u32	q8, q8, q9
+@ CHECK: vmax.f32	q8, q8, q9      @ encoding: [0x40,0xef,0xe2,0x0f]
+	vmax.f32	q8, q8, q9
diff --git a/final/test/MC/ARM/neont2-mov-encoding.s b/final/test/MC/ARM/neont2-mov-encoding.s
new file mode 100644
index 00000000000..ababbb79572
--- /dev/null
+++ b/final/test/MC/ARM/neont2-mov-encoding.s
@@ -0,0 +1,119 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vmov.i8	d16, #0x8               @ encoding: [0x18,0x0e,0xc0,0xef]
+	vmov.i8	d16, #0x8
+@ CHECK: vmov.i16	d16, #0x10      @ encoding: [0x10,0x08,0xc1,0xef]
+	vmov.i16	d16, #0x10
+@ CHECK: vmov.i16	d16, #0x1000    @ encoding: [0x10,0x0a,0xc1,0xef]
+	vmov.i16	d16, #0x1000
+@ CHECK: vmov.i32	d16, #0x20      @ encoding: [0x10,0x00,0xc2,0xef]
+	vmov.i32	d16, #0x20
+@ CHECK: vmov.i32	d16, #0x2000    @ encoding: [0x10,0x02,0xc2,0xef]
+	vmov.i32	d16, #0x2000
+@ CHECK: vmov.i32	d16, #0x200000  @ encoding: [0x10,0x04,0xc2,0xef]
+	vmov.i32	d16, #0x200000
+@ CHECK: vmov.i32	d16, #0x20000000 @ encoding: [0x10,0x06,0xc2,0xef]
+	vmov.i32	d16, #0x20000000
+@ CHECK: vmov.i32	d16, #0x20FF    @ encoding: [0x10,0x0c,0xc2,0xef]
+	vmov.i32	d16, #0x20FF
+@ CHECK: vmov.i32	d16, #0x20FFFF  @ encoding: [0x10,0x0d,0xc2,0xef]
+	vmov.i32	d16, #0x20FFFF
+@ CHECK: vmov.i64	d16, #0xFF0000FF0000FFFF @ encoding: [0x33,0x0e,0xc1,0xff]
+	vmov.i64	d16, #0xFF0000FF0000FFFF
+@ CHECK: vmov.i8	q8, #0x8                @ encoding: [0x58,0x0e,0xc0,0xef]
+	vmov.i8	q8, #0x8
+@ CHECK: vmov.i16	q8, #0x10       @ encoding: [0x50,0x08,0xc1,0xef]
+	vmov.i16	q8, #0x10
+@ CHECK: vmov.i16	q8, #0x1000     @ encoding: [0x50,0x0a,0xc1,0xef]
+	vmov.i16	q8, #0x1000
+@ CHECK: vmov.i32	q8, #0x20       @ encoding: [0x50,0x00,0xc2,0xef]
+	vmov.i32	q8, #0x20
+@ CHECK: vmov.i32	q8, #0x2000     @ encoding: [0x50,0x02,0xc2,0xef]
+	vmov.i32	q8, #0x2000
+@ CHECK: vmov.i32	q8, #0x200000   @ encoding: [0x50,0x04,0xc2,0xef]
+	vmov.i32	q8, #0x200000
+@ CHECK: vmov.i32	q8, #0x20000000 @ encoding: [0x50,0x06,0xc2,0xef]
+	vmov.i32	q8, #0x20000000
+@ CHECK: vmov.i32	q8, #0x20FF     @ encoding: [0x50,0x0c,0xc2,0xef]
+	vmov.i32	q8, #0x20FF
+@ CHECK: vmov.i32	q8, #0x20FFFF   @ encoding: [0x50,0x0d,0xc2,0xef]
+	vmov.i32	q8, #0x20FFFF
+@ CHECK: vmov.i64	q8, #0xFF0000FF0000FFFF @ encoding: [0x73,0x0e,0xc1,0xff]
+	vmov.i64	q8, #0xFF0000FF0000FFFF
+@ CHECK: vmvn.i16	d16, #0x10      @ encoding: [0x30,0x08,0xc1,0xef]
+	vmvn.i16	d16, #0x10
+@ CHECK: vmvn.i16	d16, #0x1000    @ encoding: [0x30,0x0a,0xc1,0xef]
+	vmvn.i16	d16, #0x1000
+@ CHECK: vmvn.i32	d16, #0x20      @ encoding: [0x30,0x00,0xc2,0xef]
+	vmvn.i32	d16, #0x20
+@ CHECK: vmvn.i32	d16, #0x2000    @ encoding: [0x30,0x02,0xc2,0xef]
+	vmvn.i32	d16, #0x2000
+@ CHECK: vmvn.i32	d16, #0x200000  @ encoding: [0x30,0x04,0xc2,0xef]
+	vmvn.i32	d16, #0x200000
+@ CHECK: vmvn.i32	d16, #0x20000000 @ encoding: [0x30,0x06,0xc2,0xef]
+	vmvn.i32	d16, #0x20000000
+@ CHECK: vmvn.i32	d16, #0x20FF    @ encoding: [0x30,0x0c,0xc2,0xef]
+	vmvn.i32	d16, #0x20FF
+@ CHECK: vmvn.i32	d16, #0x20FFFF  @ encoding: [0x30,0x0d,0xc2,0xef]
+	vmvn.i32	d16, #0x20FFFF
+@ CHECK: vmovl.s8	q8, d16         @ encoding: [0x30,0x0a,0xc8,0xef]
+	vmovl.s8	q8, d16
+@ CHECK: vmovl.s16	q8, d16         @ encoding: [0x30,0x0a,0xd0,0xef]
+	vmovl.s16	q8, d16
+@ CHECK: vmovl.s32	q8, d16         @ encoding: [0x30,0x0a,0xe0,0xef]
+	vmovl.s32	q8, d16
+@ CHECK: vmovl.u8	q8, d16         @ encoding: [0x30,0x0a,0xc8,0xff]
+	vmovl.u8	q8, d16
+@ CHECK: vmovl.u16	q8, d16         @ encoding: [0x30,0x0a,0xd0,0xff]
+	vmovl.u16	q8, d16
+@ CHECK: vmovl.u32	q8, d16         @ encoding: [0x30,0x0a,0xe0,0xff]
+	vmovl.u32	q8, d16
+@ CHECK: vmovn.i16	d16, q8         @ encoding: [0x20,0x02,0xf2,0xff]
+	vmovn.i16	d16, q8
+@ CHECK: vmovn.i32	d16, q8         @ encoding: [0x20,0x02,0xf6,0xff]
+	vmovn.i32	d16, q8
+@ CHECK: vmovn.i64	d16, q8         @ encoding: [0x20,0x02,0xfa,0xff]
+	vmovn.i64	d16, q8
+@ CHECK: vqmovn.s16	d16, q8         @ encoding: [0xa0,0x02,0xf2,0xff]
+	vqmovn.s16	d16, q8
+@ CHECK: vqmovn.s32	d16, q8         @ encoding: [0xa0,0x02,0xf6,0xff]
+	vqmovn.s32	d16, q8
+@ CHECK: vqmovn.s64	d16, q8         @ encoding: [0xa0,0x02,0xfa,0xff]
+	vqmovn.s64	d16, q8
+@ CHECK: vqmovn.u16	d16, q8         @ encoding: [0xe0,0x02,0xf2,0xff]
+	vqmovn.u16	d16, q8
+@ CHECK: vqmovn.u32	d16, q8         @ encoding: [0xe0,0x02,0xf6,0xff]
+	vqmovn.u32	d16, q8
+@ CHECK: vqmovn.u64	d16, q8         @ encoding: [0xe0,0x02,0xfa,0xff]
+	vqmovn.u64	d16, q8
+@ CHECK: vqmovun.s16	d16, q8         @ encoding: [0x60,0x02,0xf2,0xff]
+	vqmovun.s16	d16, q8
+@ CHECK: vqmovun.s32	d16, q8         @ encoding: [0x60,0x02,0xf6,0xff]
+	vqmovun.s32	d16, q8
+@ CHECK: vqmovun.s64	d16, q8         @ encoding: [0x60,0x02,0xfa,0xff]
+	vqmovun.s64	d16, q8
+@ CHECK: vmov.s8	r0, d16[1]              @ encoding: [0xb0,0x0b,0x50,0xee]
+	vmov.s8	r0, d16[1]
+@ CHECK: vmov.s16	r0, d16[1]      @ encoding: [0xf0,0x0b,0x10,0xee]
+	vmov.s16	r0, d16[1]
+@ CHECK: vmov.u8	r0, d16[1]              @ encoding: [0xb0,0x0b,0xd0,0xee]
+	vmov.u8	r0, d16[1]
+@ CHECK: vmov.u16	r0, d16[1]      @ encoding: [0xf0,0x0b,0x90,0xee]
+	vmov.u16	r0, d16[1]
+@ CHECK: vmov.32	r0, d16[1]              @ encoding: [0x90,0x0b,0x30,0xee]
+	vmov.32	r0, d16[1]
+@ CHECK: vmov.8	d16[1], r1              @ encoding: [0xb0,0x1b,0x40,0xee]
+	vmov.8	d16[1], r1
+@ CHECK: vmov.16	d16[1], r1              @ encoding: [0xf0,0x1b,0x00,0xee]
+	vmov.16	d16[1], r1
+@ CHECK: vmov.32	d16[1], r1              @ encoding: [0x90,0x1b,0x20,0xee]
+	vmov.32	d16[1], r1
+@ CHECK: vmov.8	d18[1], r1              @ encoding: [0xb0,0x1b,0x42,0xee]
+	vmov.8	d18[1], r1
+@ CHECK: vmov.16	d18[1], r1              @ encoding: [0xf0,0x1b,0x02,0xee]
+	vmov.16	d18[1], r1
+@ CHECK: vmov.32	d18[1], r1              @ encoding: [0x90,0x1b,0x22,0xee]
+	vmov.32	d18[1], r1
diff --git a/final/test/MC/ARM/neont2-mul-accum-encoding.s b/final/test/MC/ARM/neont2-mul-accum-encoding.s
new file mode 100644
index 00000000000..e21c67d2e8d
--- /dev/null
+++ b/final/test/MC/ARM/neont2-mul-accum-encoding.s
@@ -0,0 +1,69 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vmla.i8	d16, d18, d17           @ encoding: [0xa1,0x09,0x42,0xef]
+	vmla.i8	d16, d18, d17
+@ CHECK: vmla.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xef]
+	vmla.i16	d16, d18, d17
+@ CHECK: vmla.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xef]
+	vmla.i32	d16, d18, d17
+@ CHECK: vmla.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x42,0xef]
+	vmla.f32	d16, d18, d17
+@ CHECK: vmla.i8	q9, q8, q10             @ encoding: [0xe4,0x29,0x40,0xef]
+	vmla.i8	q9, q8, q10
+@ CHECK: vmla.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xef]
+	vmla.i16	q9, q8, q10
+@ CHECK: vmla.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xef]
+	vmla.i32	q9, q8, q10
+@ CHECK: vmla.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x40,0xef]
+	vmla.f32	q9, q8, q10
+@ CHECK: vmlal.s8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xef]
+	vmlal.s8	q8, d19, d18
+@ CHECK: vmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xef]
+	vmlal.s16	q8, d19, d18
+@ CHECK: vmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xef]
+	vmlal.s32	q8, d19, d18
+@ CHECK: vmlal.u8	q8, d19, d18    @ encoding: [0xa2,0x08,0xc3,0xff]
+	vmlal.u8	q8, d19, d18
+@ CHECK: vmlal.u16	q8, d19, d18    @ encoding: [0xa2,0x08,0xd3,0xff]
+	vmlal.u16	q8, d19, d18
+@ CHECK: vmlal.u32	q8, d19, d18    @ encoding: [0xa2,0x08,0xe3,0xff]
+	vmlal.u32	q8, d19, d18
+@ CHECK: vqdmlal.s16	q8, d19, d18    @ encoding: [0xa2,0x09,0xd3,0xef]
+	vqdmlal.s16	q8, d19, d18
+@ CHECK: vqdmlal.s32	q8, d19, d18    @ encoding: [0xa2,0x09,0xe3,0xef]
+	vqdmlal.s32	q8, d19, d18
+@ CHECK: vmls.i8	d16, d18, d17           @ encoding: [0xa1,0x09,0x42,0xff]
+	vmls.i8	d16, d18, d17
+@ CHECK: vmls.i16	d16, d18, d17   @ encoding: [0xa1,0x09,0x52,0xff]
+	vmls.i16	d16, d18, d17
+@ CHECK: vmls.i32	d16, d18, d17   @ encoding: [0xa1,0x09,0x62,0xff]
+	vmls.i32	d16, d18, d17
+@ CHECK: vmls.f32	d16, d18, d17   @ encoding: [0xb1,0x0d,0x62,0xef]
+	vmls.f32	d16, d18, d17
+@ CHECK: vmls.i8	q9, q8, q10             @ encoding: [0xe4,0x29,0x40,0xff]
+	vmls.i8	q9, q8, q10
+@ CHECK: vmls.i16	q9, q8, q10     @ encoding: [0xe4,0x29,0x50,0xff]
+	vmls.i16	q9, q8, q10
+@ CHECK: vmls.i32	q9, q8, q10     @ encoding: [0xe4,0x29,0x60,0xff]
+	vmls.i32	q9, q8, q10
+@ CHECK: vmls.f32	q9, q8, q10     @ encoding: [0xf4,0x2d,0x60,0xef]
+	vmls.f32	q9, q8, q10
+@ CHECK: vmlsl.s8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xef]
+	vmlsl.s8	q8, d19, d18
+@ CHECK: vmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xef]
+	vmlsl.s16	q8, d19, d18
+@ CHECK: vmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xef]
+	vmlsl.s32	q8, d19, d18
+@ CHECK: vmlsl.u8	q8, d19, d18    @ encoding: [0xa2,0x0a,0xc3,0xff]
+	vmlsl.u8	q8, d19, d18
+@ CHECK: vmlsl.u16	q8, d19, d18    @ encoding: [0xa2,0x0a,0xd3,0xff]
+	vmlsl.u16	q8, d19, d18
+@ CHECK: vmlsl.u32	q8, d19, d18    @ encoding: [0xa2,0x0a,0xe3,0xff]
+	vmlsl.u32	q8, d19, d18
+@ CHECK: vqdmlsl.s16	q8, d19, d18    @ encoding: [0xa2,0x0b,0xd3,0xef]
+	vqdmlsl.s16	q8, d19, d18
+@ CHECK: vqdmlsl.s32	q8, d19, d18    @ encoding: [0xa2,0x0b,0xe3,0xef]
+	vqdmlsl.s32	q8, d19, d18
diff --git a/final/test/MC/ARM/neont2-mul-encoding.s b/final/test/MC/ARM/neont2-mul-encoding.s
new file mode 100644
index 00000000000..93ecabb50bb
--- /dev/null
+++ b/final/test/MC/ARM/neont2-mul-encoding.s
@@ -0,0 +1,58 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vmul.i8	d16, d16, d17           @ encoding: [0x40,0xef,0xb1,0x09]
+	vmul.i8	d16, d16, d17
+@ CHECK: vmul.i16	d16, d16, d17   @ encoding: [0x50,0xef,0xb1,0x09]
+	vmul.i16	d16, d16, d17
+@ CHECK: vmul.i32	d16, d16, d17   @ encoding: [0x60,0xef,0xb1,0x09]
+	vmul.i32	d16, d16, d17
+@ CHECK: vmul.f32	d16, d16, d17   @ encoding: [0x40,0xff,0xb1,0x0d]
+	vmul.f32	d16, d16, d17
+@ CHECK: vmul.i8	q8, q8, q9              @ encoding: [0x40,0xef,0xf2,0x09]
+	vmul.i8	q8, q8, q9
+@ CHECK: vmul.i16	q8, q8, q9      @ encoding: [0x50,0xef,0xf2,0x09]
+	vmul.i16	q8, q8, q9
+@ CHECK: vmul.i32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x09]
+	vmul.i32	q8, q8, q9
+@ CHECK: vmul.f32	q8, q8, q9      @ encoding: [0x40,0xff,0xf2,0x0d]
+	vmul.f32	q8, q8, q9
+@ CHECK: vmul.p8	d16, d16, d17           @ encoding: [0x40,0xff,0xb1,0x09]
+	vmul.p8	d16, d16, d17
+@ CHECK: vmul.p8	q8, q8, q9              @ encoding: [0x40,0xff,0xf2,0x09]
+	vmul.p8	q8, q8, q9
+@ CHECK: vqdmulh.s16	d16, d16, d17   @ encoding: [0x50,0xef,0xa1,0x0b]
+	vqdmulh.s16	d16, d16, d17
+@ CHECK: vqdmulh.s32	d16, d16, d17   @ encoding: [0x60,0xef,0xa1,0x0b]
+	vqdmulh.s32	d16, d16, d17
+@ CHECK: vqdmulh.s16	q8, q8, q9      @ encoding: [0x50,0xef,0xe2,0x0b]
+	vqdmulh.s16	q8, q8, q9
+@ CHECK: vqdmulh.s32	q8, q8, q9      @ encoding: [0x60,0xef,0xe2,0x0b]
+	vqdmulh.s32	q8, q8, q9
+@ CHECK: vqrdmulh.s16	d16, d16, d17   @ encoding: [0x50,0xff,0xa1,0x0b]
+	vqrdmulh.s16	d16, d16, d17
+@ CHECK: vqrdmulh.s32	d16, d16, d17   @ encoding: [0x60,0xff,0xa1,0x0b]
+	vqrdmulh.s32	d16, d16, d17
+@ CHECK: vqrdmulh.s16	q8, q8, q9      @ encoding: [0x50,0xff,0xe2,0x0b]
+	vqrdmulh.s16	q8, q8, q9
+@ CHECK: vqrdmulh.s32	q8, q8, q9      @ encoding: [0x60,0xff,0xe2,0x0b]
+	vqrdmulh.s32	q8, q8, q9
+@ CHECK: vmull.s8	q8, d16, d17    @ encoding: [0xc0,0xef,0xa1,0x0c]
+	vmull.s8	q8, d16, d17
+@ CHECK: vmull.s16	q8, d16, d17    @ encoding: [0xd0,0xef,0xa1,0x0c]
+	vmull.s16	q8, d16, d17
+@ CHECK: vmull.s32	q8, d16, d17    @ encoding: [0xe0,0xef,0xa1,0x0c]
+	vmull.s32	q8, d16, d17
+@ CHECK: vmull.u8	q8, d16, d17    @ encoding: [0xc0,0xff,0xa1,0x0c]
+	vmull.u8	q8, d16, d17
+@ CHECK: vmull.u16	q8, d16, d17    @ encoding: [0xd0,0xff,0xa1,0x0c]
+	vmull.u16	q8, d16, d17
+@ CHECK: vmull.u32	q8, d16, d17    @ encoding: [0xe0,0xff,0xa1,0x0c]
+	vmull.u32	q8, d16, d17
+@ CHECK: vmull.p8	q8, d16, d17    @ encoding: [0xc0,0xef,0xa1,0x0e]
+	vmull.p8	q8, d16, d17
+@ CHECK: vqdmull.s16	q8, d16, d17    @ encoding: [0xd0,0xef,0xa1,0x0d]
+	vqdmull.s16	q8, d16, d17
+@ CHECK: vqdmull.s32	q8, d16, d17    @ encoding: [0xe0,0xef,0xa1,0x0d]
+	vqdmull.s32	q8, d16, d17
diff --git a/final/test/MC/ARM/neont2-neg-encoding.s b/final/test/MC/ARM/neont2-neg-encoding.s
new file mode 100644
index 00000000000..21dab65cc96
--- /dev/null
+++ b/final/test/MC/ARM/neont2-neg-encoding.s
@@ -0,0 +1,32 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vneg.s8	d16, d16                @ encoding: [0xf1,0xff,0xa0,0x03]
+	vneg.s8	d16, d16
+@ CHECK: vneg.s16	d16, d16        @ encoding: [0xf5,0xff,0xa0,0x03]
+	vneg.s16	d16, d16
+@ CHECK: vneg.s32	d16, d16        @ encoding: [0xf9,0xff,0xa0,0x03]
+	vneg.s32	d16, d16
+@ CHECK: vneg.f32	d16, d16        @ encoding: [0xf9,0xff,0xa0,0x07]
+	vneg.f32	d16, d16
+@ CHECK: vneg.s8	q8, q8                  @ encoding: [0xf1,0xff,0xe0,0x03]
+	vneg.s8	q8, q8
+@ CHECK: vneg.s16	q8, q8          @ encoding: [0xf5,0xff,0xe0,0x03]
+	vneg.s16	q8, q8
+@ CHECK: vneg.s32	q8, q8          @ encoding: [0xf9,0xff,0xe0,0x03]
+	vneg.s32	q8, q8
+@ CHECK: vneg.f32	q8, q8          @ encoding: [0xf9,0xff,0xe0,0x07]
+	vneg.f32	q8, q8
+@ CHECK: vqneg.s8	d16, d16        @ encoding: [0xf0,0xff,0xa0,0x07]
+	vqneg.s8	d16, d16
+@ CHECK: vqneg.s16	d16, d16        @ encoding: [0xf4,0xff,0xa0,0x07]
+	vqneg.s16	d16, d16
+@ CHECK: vqneg.s32	d16, d16        @ encoding: [0xf8,0xff,0xa0,0x07]
+	vqneg.s32	d16, d16
+@ CHECK: vqneg.s8	q8, q8          @ encoding: [0xf0,0xff,0xe0,0x07]
+	vqneg.s8	q8, q8
+@ CHECK: vqneg.s16	q8, q8          @ encoding: [0xf4,0xff,0xe0,0x07]
+	vqneg.s16	q8, q8
+@ CHECK: vqneg.s32	q8, q8          @ encoding: [0xf8,0xff,0xe0,0x07]
+	vqneg.s32	q8, q8
diff --git a/final/test/MC/ARM/neont2-pairwise-encoding.s b/final/test/MC/ARM/neont2-pairwise-encoding.s
new file mode 100644
index 00000000000..ef9092214cf
--- /dev/null
+++ b/final/test/MC/ARM/neont2-pairwise-encoding.s
@@ -0,0 +1,89 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vpadd.i8	d16, d17, d16   @ encoding: [0xb0,0x0b,0x41,0xef]
+	vpadd.i8	d16, d17, d16
+@ CHECK: vpadd.i16	d16, d17, d16   @ encoding: [0xb0,0x0b,0x51,0xef]
+	vpadd.i16	d16, d17, d16
+@ CHECK: vpadd.i32	d16, d17, d16   @ encoding: [0xb0,0x0b,0x61,0xef]
+	vpadd.i32	d16, d17, d16
+@ CHECK: vpadd.f32	d16, d16, d17   @ encoding: [0xa1,0x0d,0x40,0xff]
+	vpadd.f32	d16, d16, d17
+@ CHECK: vpaddl.s8	d16, d16        @ encoding: [0x20,0x02,0xf0,0xff]
+	vpaddl.s8	d16, d16
+@ CHECK: vpaddl.s16	d16, d16        @ encoding: [0x20,0x02,0xf4,0xff]
+	vpaddl.s16	d16, d16
+@ CHECK: vpaddl.s32	d16, d16        @ encoding: [0x20,0x02,0xf8,0xff]
+	vpaddl.s32	d16, d16
+@ CHECK: vpaddl.u8	d16, d16        @ encoding: [0xa0,0x02,0xf0,0xff]
+	vpaddl.u8	d16, d16
+@ CHECK: vpaddl.u16	d16, d16        @ encoding: [0xa0,0x02,0xf4,0xff]
+	vpaddl.u16	d16, d16
+@ CHECK: vpaddl.u32	d16, d16        @ encoding: [0xa0,0x02,0xf8,0xff]
+	vpaddl.u32	d16, d16
+@ CHECK: vpaddl.s8	q8, q8          @ encoding: [0x60,0x02,0xf0,0xff]
+	vpaddl.s8	q8, q8
+@ CHECK: vpaddl.s16	q8, q8          @ encoding: [0x60,0x02,0xf4,0xff]
+	vpaddl.s16	q8, q8
+@ CHECK: vpaddl.s32	q8, q8          @ encoding: [0x60,0x02,0xf8,0xff]
+	vpaddl.s32	q8, q8
+@ CHECK: vpaddl.u8	q8, q8          @ encoding: [0xe0,0x02,0xf0,0xff]
+	vpaddl.u8	q8, q8
+@ CHECK: vpaddl.u16	q8, q8          @ encoding: [0xe0,0x02,0xf4,0xff]
+	vpaddl.u16	q8, q8
+@ CHECK: vpaddl.u32	q8, q8          @ encoding: [0xe0,0x02,0xf8,0xff]
+	vpaddl.u32	q8, q8
+@ CHECK: vpadal.s8	d16, d17        @ encoding: [0x21,0x06,0xf0,0xff]
+	vpadal.s8	d16, d17
+@ CHECK: vpadal.s16	d16, d17        @ encoding: [0x21,0x06,0xf4,0xff]
+	vpadal.s16	d16, d17
+@ CHECK: vpadal.s32	d16, d17        @ encoding: [0x21,0x06,0xf8,0xff]
+	vpadal.s32	d16, d17
+@ CHECK: vpadal.u8	d16, d17        @ encoding: [0xa1,0x06,0xf0,0xff]
+	vpadal.u8	d16, d17
+@ CHECK: vpadal.u16	d16, d17        @ encoding: [0xa1,0x06,0xf4,0xff]
+	vpadal.u16	d16, d17
+@ CHECK: vpadal.u32	d16, d17        @ encoding: [0xa1,0x06,0xf8,0xff]
+	vpadal.u32	d16, d17
+@ CHECK: vpadal.s8	q9, q8          @ encoding: [0x60,0x26,0xf0,0xff]
+	vpadal.s8	q9, q8
+@ CHECK: vpadal.s16	q9, q8          @ encoding: [0x60,0x26,0xf4,0xff]
+	vpadal.s16	q9, q8
+@ CHECK: vpadal.s32	q9, q8          @ encoding: [0x60,0x26,0xf8,0xff]
+	vpadal.s32	q9, q8
+@ CHECK: vpadal.u8	q9, q8          @ encoding: [0xe0,0x26,0xf0,0xff]
+	vpadal.u8	q9, q8
+@ CHECK: vpadal.u16	q9, q8          @ encoding: [0xe0,0x26,0xf4,0xff]
+	vpadal.u16	q9, q8
+@ CHECK: vpadal.u32	q9, q8          @ encoding: [0xe0,0x26,0xf8,0xff]
+	vpadal.u32	q9, q8
+@ CHECK: vpmin.s8	d16, d16, d17   @ encoding: [0xb1,0x0a,0x40,0xef]
+	vpmin.s8	d16, d16, d17
+@ CHECK: vpmin.s16	d16, d16, d17   @ encoding: [0xb1,0x0a,0x50,0xef]
+	vpmin.s16	d16, d16, d17
+@ CHECK: vpmin.s32	d16, d16, d17   @ encoding: [0xb1,0x0a,0x60,0xef]
+	vpmin.s32	d16, d16, d17
+@ CHECK: vpmin.u8	d16, d16, d17   @ encoding: [0xb1,0x0a,0x40,0xff]
+	vpmin.u8	d16, d16, d17
+@ CHECK: vpmin.u16	d16, d16, d17   @ encoding: [0xb1,0x0a,0x50,0xff]
+	vpmin.u16	d16, d16, d17
+@ CHECK: vpmin.u32	d16, d16, d17   @ encoding: [0xb1,0x0a,0x60,0xff]
+	vpmin.u32	d16, d16, d17
+@ CHECK: vpmin.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x60,0xff]
+	vpmin.f32	d16, d16, d17
+@ CHECK: vpmax.s8	d16, d16, d17   @ encoding: [0xa1,0x0a,0x40,0xef]
+	vpmax.s8	d16, d16, d17
+@ CHECK: vpmax.s16	d16, d16, d17   @ encoding: [0xa1,0x0a,0x50,0xef]
+	vpmax.s16	d16, d16, d17
+@ CHECK: vpmax.s32	d16, d16, d17   @ encoding: [0xa1,0x0a,0x60,0xef]
+	vpmax.s32	d16, d16, d17
+@ CHECK: vpmax.u8	d16, d16, d17   @ encoding: [0xa1,0x0a,0x40,0xff]
+	vpmax.u8	d16, d16, d17
+@ CHECK: vpmax.u16	d16, d16, d17   @ encoding: [0xa1,0x0a,0x50,0xff]
+	vpmax.u16	d16, d16, d17
+@ CHECK: vpmax.u32	d16, d16, d17   @ encoding: [0xa1,0x0a,0x60,0xff]
+	vpmax.u32	d16, d16, d17
+@ CHECK: vpmax.f32	d16, d16, d17   @ encoding: [0xa1,0x0f,0x40,0xff]
+	vpmax.f32	d16, d16, d17
diff --git a/final/test/MC/ARM/neont2-reciprocal-encoding.s b/final/test/MC/ARM/neont2-reciprocal-encoding.s
new file mode 100644
index 00000000000..8ea77d78c33
--- /dev/null
+++ b/final/test/MC/ARM/neont2-reciprocal-encoding.s
@@ -0,0 +1,28 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vrecpe.u32	d16, d16        @ encoding: [0xfb,0xff,0x20,0x04]
+	vrecpe.u32	d16, d16
+@ CHECK: vrecpe.u32	q8, q8          @ encoding: [0xfb,0xff,0x60,0x04]
+	vrecpe.u32	q8, q8
+@ CHECK: vrecpe.f32	d16, d16        @ encoding: [0xfb,0xff,0x20,0x05]
+	vrecpe.f32	d16, d16
+@ CHECK: vrecpe.f32	q8, q8          @ encoding: [0xfb,0xff,0x60,0x05]
+	vrecpe.f32	q8, q8
+@ CHECK: vrecps.f32	d16, d16, d17   @ encoding: [0x40,0xef,0xb1,0x0f]
+	vrecps.f32	d16, d16, d17
+@ CHECK: vrecps.f32	q8, q8, q9      @ encoding: [0x40,0xef,0xf2,0x0f]
+	vrecps.f32	q8, q8, q9
+@ CHECK: vrsqrte.u32	d16, d16        @ encoding: [0xfb,0xff,0xa0,0x04]
+	vrsqrte.u32	d16, d16
+@ CHECK: vrsqrte.u32	q8, q8          @ encoding: [0xfb,0xff,0xe0,0x04]
+	vrsqrte.u32	q8, q8
+@ CHECK: vrsqrte.f32	d16, d16        @ encoding: [0xfb,0xff,0xa0,0x05]
+	vrsqrte.f32	d16, d16
+@ CHECK: vrsqrte.f32	q8, q8          @ encoding: [0xfb,0xff,0xe0,0x05]
+	vrsqrte.f32	q8, q8
+@ CHECK: vrsqrts.f32	d16, d16, d17   @ encoding: [0x60,0xef,0xb1,0x0f]
+	vrsqrts.f32	d16, d16, d17
+@ CHECK: vrsqrts.f32	q8, q8, q9      @ encoding: [0x60,0xef,0xf2,0x0f]
+	vrsqrts.f32	q8, q8, q9
diff --git a/final/test/MC/ARM/neont2-reverse-encoding.s b/final/test/MC/ARM/neont2-reverse-encoding.s
new file mode 100644
index 00000000000..f37d72da11a
--- /dev/null
+++ b/final/test/MC/ARM/neont2-reverse-encoding.s
@@ -0,0 +1,26 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vrev64.8	d16, d16        @ encoding: [0xf0,0xff,0x20,0x00]
+	vrev64.8	d16, d16
+@ CHECK: vrev64.16	d16, d16        @ encoding: [0xf4,0xff,0x20,0x00]
+	vrev64.16	d16, d16
+@ CHECK: vrev64.32	d16, d16        @ encoding: [0xf8,0xff,0x20,0x00]
+	vrev64.32	d16, d16
+@ CHECK: vrev64.8	q8, q8          @ encoding: [0xf0,0xff,0x60,0x00]
+	vrev64.8	q8, q8
+@ CHECK: vrev64.16	q8, q8          @ encoding: [0xf4,0xff,0x60,0x00]
+	vrev64.16	q8, q8
+@ CHECK: vrev64.32	q8, q8          @ encoding: [0xf8,0xff,0x60,0x00]
+	vrev64.32	q8, q8
+@ CHECK: vrev32.8	d16, d16        @ encoding: [0xf0,0xff,0xa0,0x00]
+	vrev32.8	d16, d16
+@ CHECK: vrev32.16	d16, d16        @ encoding: [0xf4,0xff,0xa0,0x00]
+	vrev32.16	d16, d16
+@ CHECK: vrev32.8	q8, q8          @ encoding: [0xf0,0xff,0xe0,0x00]
+	vrev32.8	q8, q8
+@ CHECK: vrev32.16	q8, q8          @ encoding: [0xf4,0xff,0xe0,0x00]
+	vrev32.16	q8, q8
+@ CHECK: vrev16.8	d16, d16        @ encoding: [0xf0,0xff,0x20,0x01]
+	vrev16.8	d16, d16
+@ CHECK: vrev16.8	q8, q8          @ encoding: [0xf0,0xff,0x60,0x01]
+	vrev16.8	q8, q8
diff --git a/final/test/MC/ARM/neont2-satshift-encoding.s b/final/test/MC/ARM/neont2-satshift-encoding.s
new file mode 100644
index 00000000000..34e50f1e29e
--- /dev/null
+++ b/final/test/MC/ARM/neont2-satshift-encoding.s
@@ -0,0 +1,152 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vqshl.s8	d16, d16, d17   @ encoding: [0x41,0xef,0xb0,0x04]
+	vqshl.s8	d16, d16, d17
+@ CHECK: vqshl.s16	d16, d16, d17   @ encoding: [0x51,0xef,0xb0,0x04]
+	vqshl.s16	d16, d16, d17
+@ CHECK: vqshl.s32	d16, d16, d17   @ encoding: [0x61,0xef,0xb0,0x04]
+	vqshl.s32	d16, d16, d17
+@ CHECK: vqshl.s64	d16, d16, d17   @ encoding: [0x71,0xef,0xb0,0x04]
+	vqshl.s64	d16, d16, d17
+@ CHECK: vqshl.u8	d16, d16, d17   @ encoding: [0x41,0xff,0xb0,0x04]
+	vqshl.u8	d16, d16, d17
+@ CHECK: vqshl.u16	d16, d16, d17   @ encoding: [0x51,0xff,0xb0,0x04]
+	vqshl.u16	d16, d16, d17
+@ CHECK: vqshl.u32	d16, d16, d17   @ encoding: [0x61,0xff,0xb0,0x04]
+	vqshl.u32	d16, d16, d17
+@ CHECK: vqshl.u64	d16, d16, d17   @ encoding: [0x71,0xff,0xb0,0x04]
+	vqshl.u64	d16, d16, d17
+@ CHECK: vqshl.s8	q8, q8, q9      @ encoding: [0x42,0xef,0xf0,0x04]
+	vqshl.s8	q8, q8, q9
+@ CHECK: vqshl.s16	q8, q8, q9      @ encoding: [0x52,0xef,0xf0,0x04]
+	vqshl.s16	q8, q8, q9
+@ CHECK: vqshl.s32	q8, q8, q9      @ encoding: [0x62,0xef,0xf0,0x04]
+	vqshl.s32	q8, q8, q9
+@ CHECK: vqshl.s64	q8, q8, q9      @ encoding: [0x72,0xef,0xf0,0x04]
+	vqshl.s64	q8, q8, q9
+@ CHECK: vqshl.u8	q8, q8, q9      @ encoding: [0x42,0xff,0xf0,0x04]
+	vqshl.u8	q8, q8, q9
+@ CHECK: vqshl.u16	q8, q8, q9      @ encoding: [0x52,0xff,0xf0,0x04]
+	vqshl.u16	q8, q8, q9
+@ CHECK: vqshl.u32	q8, q8, q9      @ encoding: [0x62,0xff,0xf0,0x04]
+	vqshl.u32	q8, q8, q9
+@ CHECK: vqshl.u64	q8, q8, q9      @ encoding: [0x72,0xff,0xf0,0x04]
+	vqshl.u64	q8, q8, q9
+@ CHECK: vqshl.s8	d16, d16, #7    @ encoding: [0xcf,0xef,0x30,0x07]
+	vqshl.s8	d16, d16, #7
+@ CHECK: vqshl.s16	d16, d16, #15   @ encoding: [0xdf,0xef,0x30,0x07]
+	vqshl.s16	d16, d16, #15
+@ CHECK: vqshl.s32	d16, d16, #31   @ encoding: [0xff,0xef,0x30,0x07]
+	vqshl.s32	d16, d16, #31
+@ CHECK: vqshl.s64	d16, d16, #63   @ encoding: [0xff,0xef,0xb0,0x07]
+	vqshl.s64	d16, d16, #63
+@ CHECK: vqshl.u8	d16, d16, #7    @ encoding: [0xcf,0xff,0x30,0x07]
+	vqshl.u8	d16, d16, #7
+@ CHECK: vqshl.u16	d16, d16, #15   @ encoding: [0xdf,0xff,0x30,0x07]
+	vqshl.u16	d16, d16, #15
+@ CHECK: vqshl.u32	d16, d16, #31   @ encoding: [0xff,0xff,0x30,0x07]
+	vqshl.u32	d16, d16, #31
+@ CHECK: vqshl.u64	d16, d16, #63   @ encoding: [0xff,0xff,0xb0,0x07]
+	vqshl.u64	d16, d16, #63
+@ CHECK: vqshlu.s8	d16, d16, #7    @ encoding: [0xcf,0xff,0x30,0x06]
+	vqshlu.s8	d16, d16, #7
+@ CHECK: vqshlu.s16	d16, d16, #15   @ encoding: [0xdf,0xff,0x30,0x06]
+	vqshlu.s16	d16, d16, #15
+@ CHECK: vqshlu.s32	d16, d16, #31   @ encoding: [0xff,0xff,0x30,0x06]
+	vqshlu.s32	d16, d16, #31
+@ CHECK: vqshlu.s64	d16, d16, #63   @ encoding: [0xff,0xff,0xb0,0x06]
+	vqshlu.s64	d16, d16, #63
+@ CHECK: vqshl.s8	q8, q8, #7      @ encoding: [0xcf,0xef,0x70,0x07]
+	vqshl.s8	q8, q8, #7
+@ CHECK: vqshl.s16	q8, q8, #15     @ encoding: [0xdf,0xef,0x70,0x07]
+	vqshl.s16	q8, q8, #15
+@ CHECK: vqshl.s32	q8, q8, #31     @ encoding: [0xff,0xef,0x70,0x07]
+	vqshl.s32	q8, q8, #31
+@ CHECK: vqshl.s64	q8, q8, #63     @ encoding: [0xff,0xef,0xf0,0x07]
+	vqshl.s64	q8, q8, #63
+@ CHECK: vqshl.u8	q8, q8, #7      @ encoding: [0xcf,0xff,0x70,0x07]
+	vqshl.u8	q8, q8, #7
+@ CHECK: vqshl.u16	q8, q8, #15     @ encoding: [0xdf,0xff,0x70,0x07]
+	vqshl.u16	q8, q8, #15
+@ CHECK: vqshl.u32	q8, q8, #31     @ encoding: [0xff,0xff,0x70,0x07]
+	vqshl.u32	q8, q8, #31
+@ CHECK: vqshl.u64	q8, q8, #63     @ encoding: [0xff,0xff,0xf0,0x07]
+	vqshl.u64	q8, q8, #63
+@ CHECK: vqshlu.s8	q8, q8, #7      @ encoding: [0xcf,0xff,0x70,0x06]
+	vqshlu.s8	q8, q8, #7
+@ CHECK: vqshlu.s16	q8, q8, #15     @ encoding: [0xdf,0xff,0x70,0x06]
+	vqshlu.s16	q8, q8, #15
+@ CHECK: vqshlu.s32	q8, q8, #31     @ encoding: [0xff,0xff,0x70,0x06]
+	vqshlu.s32	q8, q8, #31
+@ CHECK: vqshlu.s64	q8, q8, #63     @ encoding: [0xff,0xff,0xf0,0x06]
+	vqshlu.s64	q8, q8, #63
+@ CHECK:   vqrshl.s8	d16, d16, d17   @ encoding: [0x41,0xef,0xb0,0x05]
+	vqrshl.s8	d16, d16, d17
+@ CHECK: vqrshl.s16	d16, d16, d17   @ encoding: [0x51,0xef,0xb0,0x05]
+	vqrshl.s16	d16, d16, d17
+@ CHECK: vqrshl.s32	d16, d16, d17   @ encoding: [0x61,0xef,0xb0,0x05]
+	vqrshl.s32	d16, d16, d17
+@ CHECK: vqrshl.s64	d16, d16, d17   @ encoding: [0x71,0xef,0xb0,0x05]
+	vqrshl.s64	d16, d16, d17
+@ CHECK: vqrshl.u8	d16, d16, d17   @ encoding: [0x41,0xff,0xb0,0x05]
+	vqrshl.u8	d16, d16, d17
+@ CHECK: vqrshl.u16	d16, d16, d17   @ encoding: [0x51,0xff,0xb0,0x05]
+	vqrshl.u16	d16, d16, d17
+@ CHECK: vqrshl.u32	d16, d16, d17   @ encoding: [0x61,0xff,0xb0,0x05]
+	vqrshl.u32	d16, d16, d17
+@ CHECK: vqrshl.u64	d16, d16, d17   @ encoding: [0x71,0xff,0xb0,0x05]
+	vqrshl.u64	d16, d16, d17
+@ CHECK: vqrshl.s8	q8, q8, q9      @ encoding: [0x42,0xef,0xf0,0x05]
+	vqrshl.s8	q8, q8, q9
+@ CHECK: vqrshl.s16	q8, q8, q9      @ encoding: [0x52,0xef,0xf0,0x05]
+	vqrshl.s16	q8, q8, q9
+@ CHECK: vqrshl.s32	q8, q8, q9      @ encoding: [0x62,0xef,0xf0,0x05]
+	vqrshl.s32	q8, q8, q9
+@ CHECK: vqrshl.s64	q8, q8, q9      @ encoding: [0x72,0xef,0xf0,0x05]
+	vqrshl.s64	q8, q8, q9
+@ CHECK: vqrshl.u8	q8, q8, q9      @ encoding: [0x42,0xff,0xf0,0x05]
+	vqrshl.u8	q8, q8, q9
+@ CHECK: vqrshl.u16	q8, q8, q9      @ encoding: [0x52,0xff,0xf0,0x05]
+	vqrshl.u16	q8, q8, q9
+@ CHECK: vqrshl.u32	q8, q8, q9      @ encoding: [0x62,0xff,0xf0,0x05]
+	vqrshl.u32	q8, q8, q9
+@ CHECK: vqrshl.u64	q8, q8, q9      @ encoding: [0x72,0xff,0xf0,0x05]
+	vqrshl.u64	q8, q8, q9
+@ CHECK: vqshrn.s16	d16, q8, #8     @ encoding: [0xc8,0xef,0x30,0x09]
+	vqshrn.s16	d16, q8, #8
+@ CHECK: vqshrn.s32	d16, q8, #16    @ encoding: [0xd0,0xef,0x30,0x09]
+	vqshrn.s32	d16, q8, #16
+@ CHECK: vqshrn.s64	d16, q8, #32    @ encoding: [0xe0,0xef,0x30,0x09]
+	vqshrn.s64	d16, q8, #32
+@ CHECK: vqshrn.u16	d16, q8, #8     @ encoding: [0xc8,0xff,0x30,0x09]
+	vqshrn.u16	d16, q8, #8
+@ CHECK: vqshrn.u32	d16, q8, #16    @ encoding: [0xd0,0xff,0x30,0x09]
+	vqshrn.u32	d16, q8, #16
+@ CHECK: vqshrn.u64	d16, q8, #32    @ encoding: [0xe0,0xff,0x30,0x09]
+	vqshrn.u64	d16, q8, #32
+@ CHECK: vqshrun.s16	d16, q8, #8     @ encoding: [0xc8,0xff,0x30,0x08]
+	vqshrun.s16	d16, q8, #8
+@ CHECK: vqshrun.s32	d16, q8, #16    @ encoding: [0xd0,0xff,0x30,0x08]
+	vqshrun.s32	d16, q8, #16
+@ CHECK: vqshrun.s64	d16, q8, #32    @ encoding: [0xe0,0xff,0x30,0x08]
+	vqshrun.s64	d16, q8, #32
+@ CHECK: vqrshrn.s16	d16, q8, #8     @ encoding: [0xc8,0xef,0x70,0x09]
+	vqrshrn.s16	d16, q8, #8
+@ CHECK: vqrshrn.s32	d16, q8, #16    @ encoding: [0xd0,0xef,0x70,0x09]
+	vqrshrn.s32	d16, q8, #16
+@ CHECK: vqrshrn.s64	d16, q8, #32    @ encoding: [0xe0,0xef,0x70,0x09]
+	vqrshrn.s64	d16, q8, #32
+@ CHECK: vqrshrn.u16	d16, q8, #8     @ encoding: [0xc8,0xff,0x70,0x09]
+	vqrshrn.u16	d16, q8, #8
+@ CHECK: vqrshrn.u32	d16, q8, #16    @ encoding: [0xd0,0xff,0x70,0x09]
+	vqrshrn.u32	d16, q8, #16
+@ CHECK: vqrshrn.u64	d16, q8, #32    @ encoding: [0xe0,0xff,0x70,0x09]
+	vqrshrn.u64	d16, q8, #32
+@ CHECK: vqrshrun.s16	d16, q8, #8     @ encoding: [0xc8,0xff,0x70,0x08]
+	vqrshrun.s16	d16, q8, #8
+@ CHECK: vqrshrun.s32	d16, q8, #16    @ encoding: [0xd0,0xff,0x70,0x08]
+	vqrshrun.s32	d16, q8, #16
+@ CHECK: vqrshrun.s64	d16, q8, #32    @ encoding: [0xe0,0xff,0x70,0x08]
+	vqrshrun.s64	d16, q8, #32
diff --git a/final/test/MC/ARM/neont2-shift-encoding.s b/final/test/MC/ARM/neont2-shift-encoding.s
new file mode 100644
index 00000000000..d098f543c9c
--- /dev/null
+++ b/final/test/MC/ARM/neont2-shift-encoding.s
@@ -0,0 +1,162 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vshl.u8	d16, d17, d16           @ encoding: [0x40,0xff,0xa1,0x04]
+	vshl.u8	d16, d17, d16
+@ CHECK: vshl.u16	d16, d17, d16   @ encoding: [0x50,0xff,0xa1,0x04]
+	vshl.u16	d16, d17, d16
+@ CHECK: vshl.u32	d16, d17, d16   @ encoding: [0x60,0xff,0xa1,0x04]
+	vshl.u32	d16, d17, d16
+@ CHECK: vshl.u64	d16, d17, d16   @ encoding: [0x70,0xff,0xa1,0x04]
+	vshl.u64	d16, d17, d16
+@ CHECK: vshl.i8	d16, d16, #7            @ encoding: [0xcf,0xef,0x30,0x05]
+	vshl.i8	d16, d16, #7
+@ CHECK: vshl.i16	d16, d16, #15   @ encoding: [0xdf,0xef,0x30,0x05]
+	vshl.i16	d16, d16, #15
+@ CHECK: vshl.i32	d16, d16, #31   @ encoding: [0xff,0xef,0x30,0x05]
+	vshl.i32	d16, d16, #31
+@ CHECK: vshl.i64	d16, d16, #63   @ encoding: [0xff,0xef,0xb0,0x05]
+	vshl.i64	d16, d16, #63
+@ CHECK: vshl.u8	q8, q9, q8              @ encoding: [0x40,0xff,0xe2,0x04]
+	vshl.u8	q8, q9, q8
+@ CHECK: vshl.u16	q8, q9, q8      @ encoding: [0x50,0xff,0xe2,0x04]
+	vshl.u16	q8, q9, q8
+@ CHECK: vshl.u32	q8, q9, q8      @ encoding: [0x60,0xff,0xe2,0x04]
+	vshl.u32	q8, q9, q8
+@ CHECK: vshl.u64	q8, q9, q8      @ encoding: [0x70,0xff,0xe2,0x04]
+	vshl.u64	q8, q9, q8
+@ CHECK: vshl.i8	q8, q8, #7              @ encoding: [0xcf,0xef,0x70,0x05]
+	vshl.i8	q8, q8, #7
+@ CHECK: vshl.i16	q8, q8, #15     @ encoding: [0xdf,0xef,0x70,0x05]
+	vshl.i16	q8, q8, #15
+@ CHECK: vshl.i32	q8, q8, #31     @ encoding: [0xff,0xef,0x70,0x05]
+	vshl.i32	q8, q8, #31
+@ CHECK: vshl.i64	q8, q8, #63     @ encoding: [0xff,0xef,0xf0,0x05]
+	vshl.i64	q8, q8, #63
+@ CHECK: vshr.u8	d16, d16, #8            @ encoding: [0xc8,0xff,0x30,0x00]
+	vshr.u8	d16, d16, #8
+@ CHECK: vshr.u16	d16, d16, #16   @ encoding: [0xd0,0xff,0x30,0x00]
+	vshr.u16	d16, d16, #16
+@ CHECK: vshr.u32	d16, d16, #32   @ encoding: [0xe0,0xff,0x30,0x00]
+	vshr.u32	d16, d16, #32
+@ CHECK: vshr.u64	d16, d16, #64   @ encoding: [0xc0,0xff,0xb0,0x00]
+	vshr.u64	d16, d16, #64
+@ CHECK: vshr.u8	q8, q8, #8              @ encoding: [0xc8,0xff,0x70,0x00]
+	vshr.u8	q8, q8, #8
+@ CHECK: vshr.u16	q8, q8, #16     @ encoding: [0xd0,0xff,0x70,0x00]
+	vshr.u16	q8, q8, #16
+@ CHECK: vshr.u32	q8, q8, #32     @ encoding: [0xe0,0xff,0x70,0x00]
+	vshr.u32	q8, q8, #32
+@ CHECK: vshr.u64	q8, q8, #64     @ encoding: [0xc0,0xff,0xf0,0x00]
+	vshr.u64	q8, q8, #64
+@ CHECK: vshr.s8	d16, d16, #8            @ encoding: [0xc8,0xef,0x30,0x00]
+	vshr.s8	d16, d16, #8
+@ CHECK: vshr.s16	d16, d16, #16   @ encoding: [0xd0,0xef,0x30,0x00]
+	vshr.s16	d16, d16, #16
+@ CHECK: vshr.s32	d16, d16, #32   @ encoding: [0xe0,0xef,0x30,0x00]
+	vshr.s32	d16, d16, #32
+@ CHECK: vshr.s64	d16, d16, #64   @ encoding: [0xc0,0xef,0xb0,0x00]
+	vshr.s64	d16, d16, #64
+@ CHECK: vshr.s8	q8, q8, #8              @ encoding: [0xc8,0xef,0x70,0x00]
+	vshr.s8	q8, q8, #8
+@ CHECK: vshr.s16	q8, q8, #16     @ encoding: [0xd0,0xef,0x70,0x00]
+	vshr.s16	q8, q8, #16
+@ CHECK: vshr.s32	q8, q8, #32     @ encoding: [0xe0,0xef,0x70,0x00]
+	vshr.s32	q8, q8, #32
+@ CHECK: vshr.s64	q8, q8, #64     @ encoding: [0xc0,0xef,0xf0,0x00]
+	vshr.s64	q8, q8, #64
+@ CHECK: vshll.s8	q8, d16, #7     @ encoding: [0xcf,0xef,0x30,0x0a]
+	vshll.s8	q8, d16, #7
+@ CHECK: vshll.s16	q8, d16, #15    @ encoding: [0xdf,0xef,0x30,0x0a]
+	vshll.s16	q8, d16, #15
+@ CHECK: vshll.s32	q8, d16, #31    @ encoding: [0xff,0xef,0x30,0x0a]
+	vshll.s32	q8, d16, #31
+@ CHECK: vshll.u8	q8, d16, #7     @ encoding: [0xcf,0xff,0x30,0x0a]
+	vshll.u8	q8, d16, #7
+@ CHECK: vshll.u16	q8, d16, #15    @ encoding: [0xdf,0xff,0x30,0x0a]
+	vshll.u16	q8, d16, #15
+@ CHECK: vshll.u32	q8, d16, #31    @ encoding: [0xff,0xff,0x30,0x0a]
+	vshll.u32	q8, d16, #31
+@ CHECK: vshll.i8	q8, d16, #8     @ encoding: [0xf2,0xff,0x20,0x03]
+	vshll.i8	q8, d16, #8
+@ CHECK: vshll.i16	q8, d16, #16    @ encoding: [0xf6,0xff,0x20,0x03]
+	vshll.i16	q8, d16, #16
+@ CHECK: vshll.i32	q8, d16, #32    @ encoding: [0xfa,0xff,0x20,0x03]
+	vshll.i32	q8, d16, #32
+@ CHECK: vshrn.i16	d16, q8, #8     @ encoding: [0xc8,0xef,0x30,0x08]
+	vshrn.i16	d16, q8, #8
+@ CHECK: vshrn.i32	d16, q8, #16    @ encoding: [0xd0,0xef,0x30,0x08]
+	vshrn.i32	d16, q8, #16
+@ CHECK: vshrn.i64	d16, q8, #32    @ encoding: [0xe0,0xef,0x30,0x08]
+	vshrn.i64	d16, q8, #32
+@ CHECK: vrshl.s8	d16, d17, d16   @ encoding: [0x40,0xef,0xa1,0x05]
+	vrshl.s8	d16, d17, d16
+@ CHECK: vrshl.s16	d16, d17, d16   @ encoding: [0x50,0xef,0xa1,0x05]
+	vrshl.s16	d16, d17, d16
+@ CHECK: vrshl.s32	d16, d17, d16   @ encoding: [0x60,0xef,0xa1,0x05]
+	vrshl.s32	d16, d17, d16
+@ CHECK: vrshl.s64	d16, d17, d16   @ encoding: [0x70,0xef,0xa1,0x05]
+	vrshl.s64	d16, d17, d16
+@ CHECK: vrshl.u8	d16, d17, d16   @ encoding: [0x40,0xff,0xa1,0x05]
+	vrshl.u8	d16, d17, d16
+@ CHECK: vrshl.u16	d16, d17, d16   @ encoding: [0x50,0xff,0xa1,0x05]
+	vrshl.u16	d16, d17, d16
+@ CHECK: vrshl.u32	d16, d17, d16   @ encoding: [0x60,0xff,0xa1,0x05]
+	vrshl.u32	d16, d17, d16
+@ CHECK: vrshl.u64	d16, d17, d16   @ encoding: [0x70,0xff,0xa1,0x05]
+	vrshl.u64	d16, d17, d16
+@ CHECK: vrshl.s8	q8, q9, q8      @ encoding: [0x40,0xef,0xe2,0x05]
+	vrshl.s8	q8, q9, q8
+@ CHECK: vrshl.s16	q8, q9, q8      @ encoding: [0x50,0xef,0xe2,0x05]
+	vrshl.s16	q8, q9, q8
+@ CHECK: vrshl.s32	q8, q9, q8      @ encoding: [0x60,0xef,0xe2,0x05]
+	vrshl.s32	q8, q9, q8
+@ CHECK: vrshl.s64	q8, q9, q8      @ encoding: [0x70,0xef,0xe2,0x05]
+	vrshl.s64	q8, q9, q8
+@ CHECK: vrshl.u8	q8, q9, q8      @ encoding: [0x40,0xff,0xe2,0x05]
+	vrshl.u8	q8, q9, q8
+@ CHECK: vrshl.u16	q8, q9, q8      @ encoding: [0x50,0xff,0xe2,0x05]
+	vrshl.u16	q8, q9, q8
+@ CHECK: vrshl.u32	q8, q9, q8      @ encoding: [0x60,0xff,0xe2,0x05]
+	vrshl.u32	q8, q9, q8
+@ CHECK: vrshl.u64	q8, q9, q8      @ encoding: [0x70,0xff,0xe2,0x05]
+	vrshl.u64	q8, q9, q8
+@ CHECK: vrshr.s8	d16, d16, #8    @ encoding: [0xc8,0xef,0x30,0x02]
+	vrshr.s8	d16, d16, #8
+@ CHECK: vrshr.s16	d16, d16, #16   @ encoding: [0xd0,0xef,0x30,0x02]
+	vrshr.s16	d16, d16, #16
+@ CHECK: vrshr.s32	d16, d16, #32   @ encoding: [0xe0,0xef,0x30,0x02]
+	vrshr.s32	d16, d16, #32
+@ CHECK: vrshr.s64	d16, d16, #64   @ encoding: [0xc0,0xef,0xb0,0x02]
+	vrshr.s64	d16, d16, #64
+@ CHECK: vrshr.u8	d16, d16, #8    @ encoding: [0xc8,0xff,0x30,0x02]
+	vrshr.u8	d16, d16, #8
+@ CHECK: vrshr.u16	d16, d16, #16   @ encoding: [0xd0,0xff,0x30,0x02]
+	vrshr.u16	d16, d16, #16
+@ CHECK: vrshr.u32	d16, d16, #32   @ encoding: [0xe0,0xff,0x30,0x02]
+	vrshr.u32	d16, d16, #32
+@ CHECK: vrshr.u64	d16, d16, #64   @ encoding: [0xc0,0xff,0xb0,0x02]
+	vrshr.u64	d16, d16, #64
+@ CHECK: vrshr.s8	q8, q8, #8      @ encoding: [0xc8,0xef,0x70,0x02]
+	vrshr.s8	q8, q8, #8
+@ CHECK: vrshr.s16	q8, q8, #16     @ encoding: [0xd0,0xef,0x70,0x02]
+	vrshr.s16	q8, q8, #16
+@ CHECK: vrshr.s32	q8, q8, #32     @ encoding: [0xe0,0xef,0x70,0x02]
+	vrshr.s32	q8, q8, #32
+@ CHECK: vrshr.s64	q8, q8, #64     @ encoding: [0xc0,0xef,0xf0,0x02]
+	vrshr.s64	q8, q8, #64
+@ CHECK: vrshr.u8	q8, q8, #8      @ encoding: [0xc8,0xff,0x70,0x02]
+	vrshr.u8	q8, q8, #8
+@ CHECK: vrshr.u16	q8, q8, #16     @ encoding: [0xd0,0xff,0x70,0x02]
+	vrshr.u16	q8, q8, #16
+@ CHECK: vrshr.u32	q8, q8, #32     @ encoding: [0xe0,0xff,0x70,0x02]
+	vrshr.u32	q8, q8, #32
+@ CHECK: vrshr.u64	q8, q8, #64     @ encoding: [0xc0,0xff,0xf0,0x02]
+	vrshr.u64	q8, q8, #64
+@ CHECK: vrshrn.i16	d16, q8, #8     @ encoding: [0xc8,0xef,0x70,0x08]
+	vrshrn.i16	d16, q8, #8
+@ CHECK: vrshrn.i32	d16, q8, #16    @ encoding: [0xd0,0xef,0x70,0x08]
+	vrshrn.i32	d16, q8, #16
+@ CHECK: vrshrn.i64	d16, q8, #32    @ encoding: [0xe0,0xef,0x70,0x08]
+	vrshrn.i64	d16, q8, #32
diff --git a/final/test/MC/ARM/neont2-shiftaccum-encoding.s b/final/test/MC/ARM/neont2-shiftaccum-encoding.s
new file mode 100644
index 00000000000..a3a18fcee87
--- /dev/null
+++ b/final/test/MC/ARM/neont2-shiftaccum-encoding.s
@@ -0,0 +1,100 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vsra.s8	d17, d16, #8            @ encoding: [0xc8,0xef,0x30,0x11]
+	vsra.s8	d17, d16, #8
+@ CHECK: vsra.s16	d17, d16, #16   @ encoding: [0xd0,0xef,0x30,0x11]
+	vsra.s16	d17, d16, #16
+@ CHECK: vsra.s32	d17, d16, #32   @ encoding: [0xe0,0xef,0x30,0x11]
+	vsra.s32	d17, d16, #32
+@ CHECK: vsra.s64	d17, d16, #64   @ encoding: [0xc0,0xef,0xb0,0x11]
+	vsra.s64	d17, d16, #64
+@ CHECK: vsra.s8	q8, q9, #8              @ encoding: [0xc8,0xef,0x72,0x01]
+	vsra.s8	q8, q9, #8
+@ CHECK: vsra.s16	q8, q9, #16     @ encoding: [0xd0,0xef,0x72,0x01]
+	vsra.s16	q8, q9, #16
+@ CHECK: vsra.s32	q8, q9, #32     @ encoding: [0xe0,0xef,0x72,0x01]
+	vsra.s32	q8, q9, #32
+@ CHECK: vsra.s64	q8, q9, #64     @ encoding: [0xc0,0xef,0xf2,0x01]
+	vsra.s64	q8, q9, #64
+@ CHECK: vsra.u8	d17, d16, #8            @ encoding: [0xc8,0xff,0x30,0x11]
+	vsra.u8	d17, d16, #8
+@ CHECK: vsra.u16	d17, d16, #16   @ encoding: [0xd0,0xff,0x30,0x11]
+	vsra.u16	d17, d16, #16
+@ CHECK: vsra.u32	d17, d16, #32   @ encoding: [0xe0,0xff,0x30,0x11]
+	vsra.u32	d17, d16, #32
+@ CHECK: vsra.u64	d17, d16, #64   @ encoding: [0xc0,0xff,0xb0,0x11]
+	vsra.u64	d17, d16, #64
+@ CHECK: vsra.u8	q8, q9, #8              @ encoding: [0xc8,0xff,0x72,0x01]
+	vsra.u8	q8, q9, #8
+@ CHECK: vsra.u16	q8, q9, #16     @ encoding: [0xd0,0xff,0x72,0x01]
+	vsra.u16	q8, q9, #16
+@ CHECK: vsra.u32	q8, q9, #32     @ encoding: [0xe0,0xff,0x72,0x01]
+	vsra.u32	q8, q9, #32
+@ CHECK: vsra.u64	q8, q9, #64     @ encoding: [0xc0,0xff,0xf2,0x01]
+	vsra.u64	q8, q9, #64
+@ CHECK: vrsra.s8	d17, d16, #8    @ encoding: [0xc8,0xef,0x30,0x13]
+	vrsra.s8	d17, d16, #8
+@ CHECK: vrsra.s16	d17, d16, #16   @ encoding: [0xd0,0xef,0x30,0x13]
+	vrsra.s16	d17, d16, #16
+@ CHECK: vrsra.s32	d17, d16, #32   @ encoding: [0xe0,0xef,0x30,0x13]
+	vrsra.s32	d17, d16, #32
+@ CHECK: vrsra.s64	d17, d16, #64   @ encoding: [0xc0,0xef,0xb0,0x13]
+	vrsra.s64	d17, d16, #64
+@ CHECK: vrsra.u8	d17, d16, #8    @ encoding: [0xc8,0xff,0x30,0x13]
+	vrsra.u8	d17, d16, #8
+@ CHECK: vrsra.u16	d17, d16, #16   @ encoding: [0xd0,0xff,0x30,0x13]
+	vrsra.u16	d17, d16, #16
+@ CHECK: vrsra.u32	d17, d16, #32   @ encoding: [0xe0,0xff,0x30,0x13]
+	vrsra.u32	d17, d16, #32
+@ CHECK: vrsra.u64	d17, d16, #64   @ encoding: [0xc0,0xff,0xb0,0x13]
+	vrsra.u64	d17, d16, #64
+@ CHECK: vrsra.s8	q8, q9, #8      @ encoding: [0xc8,0xef,0x72,0x03]
+	vrsra.s8	q8, q9, #8
+@ CHECK: vrsra.s16	q8, q9, #16     @ encoding: [0xd0,0xef,0x72,0x03]
+	vrsra.s16	q8, q9, #16
+@ CHECK: vrsra.s32	q8, q9, #32     @ encoding: [0xe0,0xef,0x72,0x03]
+	vrsra.s32	q8, q9, #32
+@ CHECK: vrsra.s64	q8, q9, #64     @ encoding: [0xc0,0xef,0xf2,0x03]
+	vrsra.s64	q8, q9, #64
+@ CHECK: vrsra.u8	q8, q9, #8      @ encoding: [0xc8,0xff,0x72,0x03]
+	vrsra.u8	q8, q9, #8
+@ CHECK: vrsra.u16	q8, q9, #16     @ encoding: [0xd0,0xff,0x72,0x03]
+	vrsra.u16	q8, q9, #16
+@ CHECK: vrsra.u32	q8, q9, #32     @ encoding: [0xe0,0xff,0x72,0x03]
+	vrsra.u32	q8, q9, #32
+@ CHECK: vrsra.u64	q8, q9, #64     @ encoding: [0xc0,0xff,0xf2,0x03]
+	vrsra.u64	q8, q9, #64
+@ CHECK: vsli.8	d17, d16, #7            @ encoding: [0xcf,0xff,0x30,0x15]
+	vsli.8	d17, d16, #7
+@ CHECK: vsli.16	d17, d16, #15           @ encoding: [0xdf,0xff,0x30,0x15]
+	vsli.16	d17, d16, #15
+@ CHECK: vsli.32	d17, d16, #31           @ encoding: [0xff,0xff,0x30,0x15]
+	vsli.32	d17, d16, #31
+@ CHECK: vsli.64	d17, d16, #63           @ encoding: [0xff,0xff,0xb0,0x15]
+	vsli.64	d17, d16, #63
+@ CHECK: vsli.8	q9, q8, #7              @ encoding: [0xcf,0xff,0x70,0x25]
+	vsli.8	q9, q8, #7
+@ CHECK: vsli.16	q9, q8, #15             @ encoding: [0xdf,0xff,0x70,0x25]
+	vsli.16	q9, q8, #15
+@ CHECK: vsli.32	q9, q8, #31             @ encoding: [0xff,0xff,0x70,0x25]
+	vsli.32	q9, q8, #31
+@ CHECK: vsli.64	q9, q8, #63             @ encoding: [0xff,0xff,0xf0,0x25]
+	vsli.64	q9, q8, #63
+@ CHECK: vsri.8	d17, d16, #8            @ encoding: [0xc8,0xff,0x30,0x14]
+	vsri.8	d17, d16, #8
+@ CHECK: vsri.16	d17, d16, #16           @ encoding: [0xd0,0xff,0x30,0x14]
+	vsri.16	d17, d16, #16
+@ CHECK: vsri.32	d17, d16, #32           @ encoding: [0xe0,0xff,0x30,0x14]
+	vsri.32	d17, d16, #32
+@ CHECK: vsri.64	d17, d16, #64           @ encoding: [0xc0,0xff,0xb0,0x14]
+	vsri.64	d17, d16, #64
+@ CHECK: vsri.8	q9, q8, #8              @ encoding: [0xc8,0xff,0x70,0x24]
+	vsri.8	q9, q8, #8
+@ CHECK: vsri.16	q9, q8, #16             @ encoding: [0xd0,0xff,0x70,0x24]
+	vsri.16	q9, q8, #16
+@ CHECK: vsri.32	q9, q8, #32             @ encoding: [0xe0,0xff,0x70,0x24]
+	vsri.32	q9, q8, #32
+@ CHECK: vsri.64	q9, q8, #64             @ encoding: [0xc0,0xff,0xf0,0x24]
+	vsri.64	q9, q8, #64
diff --git a/final/test/MC/ARM/neont2-shuffle-encoding.s b/final/test/MC/ARM/neont2-shuffle-encoding.s
new file mode 100644
index 00000000000..f471a2b5417
--- /dev/null
+++ b/final/test/MC/ARM/neont2-shuffle-encoding.s
@@ -0,0 +1,48 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK: vext.8	d16, d17, d16, #3       @ encoding: [0xf1,0xef,0xa0,0x03]
+	vext.8	d16, d17, d16, #3
+@ CHECK: vext.8	d16, d17, d16, #5       @ encoding: [0xf1,0xef,0xa0,0x05]
+	vext.8	d16, d17, d16, #5
+@ CHECK: vext.8	q8, q9, q8, #3          @ encoding: [0xf2,0xef,0xe0,0x03]
+	vext.8	q8, q9, q8, #3
+@ CHECK: vext.8	q8, q9, q8, #7          @ encoding: [0xf2,0xef,0xe0,0x07]
+	vext.8	q8, q9, q8, #7
+@ CHECK: vext.16	d16, d17, d16, #3       @ encoding: [0xf1,0xef,0xa0,0x06]
+	vext.16	d16, d17, d16, #3
+@ CHECK: vext.32	q8, q9, q8, #3          @ encoding: [0xf2,0xef,0xe0,0x0c]
+	vext.32	q8, q9, q8, #3
+@ CHECK: vtrn.8	d17, d16                @ encoding: [0xf2,0xff,0xa0,0x10]
+	vtrn.8	d17, d16
+@ CHECK: vtrn.16	d17, d16                @ encoding: [0xf6,0xff,0xa0,0x10]
+	vtrn.16	d17, d16
+@ CHECK: vtrn.32	d17, d16                @ encoding: [0xfa,0xff,0xa0,0x10]
+	vtrn.32	d17, d16
+@ CHECK: vtrn.8	q9, q8                  @ encoding: [0xf2,0xff,0xe0,0x20]
+	vtrn.8	q9, q8
+@ CHECK: vtrn.16	q9, q8                  @ encoding: [0xf6,0xff,0xe0,0x20]
+	vtrn.16	q9, q8
+@ CHECK: vtrn.32	q9, q8                  @ encoding: [0xfa,0xff,0xe0,0x20]
+	vtrn.32	q9, q8
+@ CHECK: vuzp.8	d17, d16                @ encoding: [0xf2,0xff,0x20,0x11]
+	vuzp.8	d17, d16
+@ CHECK: vuzp.16	d17, d16                @ encoding: [0xf6,0xff,0x20,0x11]
+	vuzp.16	d17, d16
+@ CHECK: vuzp.8	q9, q8                  @ encoding: [0xf2,0xff,0x60,0x21]
+	vuzp.8	q9, q8
+@ CHECK: vuzp.16	q9, q8                  @ encoding: [0xf6,0xff,0x60,0x21]
+	vuzp.16	q9, q8
+@ CHECK: vuzp.32	q9, q8                  @ encoding: [0xfa,0xff,0x60,0x21]
+	vuzp.32	q9, q8
+@ CHECK: vzip.8	d17, d16                @ encoding: [0xf2,0xff,0xa0,0x11]
+	vzip.8	d17, d16
+@ CHECK: vzip.16	d17, d16                @ encoding: [0xf6,0xff,0xa0,0x11]
+	vzip.16	d17, d16
+@ CHECK: vzip.8	q9, q8                  @ encoding: [0xf2,0xff,0xe0,0x21]
+	vzip.8	q9, q8
+@ CHECK: vzip.16	q9, q8                  @ encoding: [0xf6,0xff,0xe0,0x21]
+	vzip.16	q9, q8
+@ CHECK: vzip.32	q9, q8                  @ encoding: [0xfa,0xff,0xe0,0x21]
+	vzip.32	q9, q8
diff --git a/final/test/MC/ARM/neont2-sub-encoding.s b/final/test/MC/ARM/neont2-sub-encoding.s
new file mode 100644
index 00000000000..fa9d145df92
--- /dev/null
+++ b/final/test/MC/ARM/neont2-sub-encoding.s
@@ -0,0 +1,46 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+@ CHECK: vext.8	d16, d17, d16, #3       @ encoding: [0xf1,0xef,0xa0,0x03]
+	vext.8	d16, d17, d16, #3
+@ CHECK: vext.8	d16, d17, d16, #5       @ encoding: [0xf1,0xef,0xa0,0x05]
+	vext.8	d16, d17, d16, #5
+@ CHECK: vext.8	q8, q9, q8, #3          @ encoding: [0xf2,0xef,0xe0,0x03]
+	vext.8	q8, q9, q8, #3
+@ CHECK: vext.8	q8, q9, q8, #7          @ encoding: [0xf2,0xef,0xe0,0x07]
+	vext.8	q8, q9, q8, #7
+@ CHECK: vext.16	d16, d17, d16, #3       @ encoding: [0xf1,0xef,0xa0,0x06]
+	vext.16	d16, d17, d16, #3
+@ CHECK: vext.32	q8, q9, q8, #3          @ encoding: [0xf2,0xef,0xe0,0x0c]
+	vext.32	q8, q9, q8, #3
+@ CHECK: vtrn.8	d17, d16                @ encoding: [0xf2,0xff,0xa0,0x10]
+	vtrn.8	d17, d16
+@ CHECK: vtrn.16	d17, d16                @ encoding: [0xf6,0xff,0xa0,0x10]
+	vtrn.16	d17, d16
+@ CHECK: vtrn.32	d17, d16                @ encoding: [0xfa,0xff,0xa0,0x10]
+	vtrn.32	d17, d16
+@ CHECK: vtrn.8	q9, q8                  @ encoding: [0xf2,0xff,0xe0,0x20]
+	vtrn.8	q9, q8
+@ CHECK: vtrn.16	q9, q8                  @ encoding: [0xf6,0xff,0xe0,0x20]
+	vtrn.16	q9, q8
+@ CHECK: vtrn.32	q9, q8                  @ encoding: [0xfa,0xff,0xe0,0x20]
+	vtrn.32	q9, q8
+@ CHECK: vuzp.8	d17, d16                @ encoding: [0xf2,0xff,0x20,0x11]
+	vuzp.8	d17, d16
+@ CHECK: vuzp.16	d17, d16                @ encoding: [0xf6,0xff,0x20,0x11]
+	vuzp.16	d17, d16
+@ CHECK: vuzp.8	q9, q8                  @ encoding: [0xf2,0xff,0x60,0x21]
+	vuzp.8	q9, q8
+@ CHECK: vuzp.16	q9, q8                  @ encoding: [0xf6,0xff,0x60,0x21]
+	vuzp.16	q9, q8
+@ CHECK: vuzp.32	q9, q8                  @ encoding: [0xfa,0xff,0x60,0x21]
+	vuzp.32	q9, q8
+@ CHECK: vzip.8	d17, d16                @ encoding: [0xf2,0xff,0xa0,0x11]
+	vzip.8	d17, d16
+@ CHECK: vzip.16	d17, d16                @ encoding: [0xf6,0xff,0xa0,0x11]
+	vzip.16	d17, d16
+@ CHECK: vzip.8	q9, q8                  @ encoding: [0xf2,0xff,0xe0,0x21]
+	vzip.8	q9, q8
+@ CHECK: vzip.16	q9, q8                  @ encoding: [0xf6,0xff,0xe0,0x21]
+	vzip.16	q9, q8
+@ CHECK: vzip.32	q9, q8                  @ encoding: [0xfa,0xff,0xe0,0x21]
+	vzip.32	q9, q8
diff --git a/final/test/MC/ARM/neont2-table-encoding.s b/final/test/MC/ARM/neont2-table-encoding.s
new file mode 100644
index 00000000000..46fb9345fbb
--- /dev/null
+++ b/final/test/MC/ARM/neont2-table-encoding.s
@@ -0,0 +1,21 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vtbl.8	d16, {d17}, d16         @ encoding: [0xa0,0x08,0xf1,0xff]
+	vtbl.8	d16, {d17}, d16
+@ CHECK: vtbl.8	d16, {d16, d17}, d18    @ encoding: [0xa2,0x09,0xf0,0xff]
+	vtbl.8	d16, {d16, d17}, d18
+@ CHECK: vtbl.8	d16, {d16, d17, d18}, d20 @ encoding: [0xa4,0x0a,0xf0,0xff]
+	vtbl.8	d16, {d16, d17, d18}, d20
+@ CHECK: vtbl.8	d16, {d16, d17, d18, d19}, d20 @ encoding: [0xa4,0x0b,0xf0,0xff]
+	vtbl.8	d16, {d16, d17, d18, d19}, d20
+@ CHECK: vtbx.8	d18, {d16}, d17         @ encoding: [0xe1,0x28,0xf0,0xff]
+	vtbx.8	d18, {d16}, d17
+@ CHECK: vtbx.8	d19, {d16, d17}, d18    @ encoding: [0xe2,0x39,0xf0,0xff]
+	vtbx.8	d19, {d16, d17}, d18
+@ CHECK: vtbx.8	d20, {d16, d17, d18}, d21 @ encoding: [0xe5,0x4a,0xf0,0xff]
+	vtbx.8	d20, {d16, d17, d18}, d21
+@ CHECK: vtbx.8	d20, {d16, d17, d18, d19}, d21 @ encoding: [0xe5,0x4b,0xf0,0xff]
+	vtbx.8	d20, {d16, d17, d18, d19}, d21
diff --git a/final/test/MC/ARM/neont2-vld-encoding.s b/final/test/MC/ARM/neont2-vld-encoding.s
new file mode 100644
index 00000000000..031205a5cc8
--- /dev/null
+++ b/final/test/MC/ARM/neont2-vld-encoding.s
@@ -0,0 +1,112 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vld1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x60,0xf9]
+	vld1.8	{d16}, [r0, :64]
+@ CHECK: vld1.16	{d16}, [r0]             @ encoding: [0x4f,0x07,0x60,0xf9]
+  vld1.16	{d16}, [r0]
+@ CHECK: vld1.32	{d16}, [r0]             @ encoding: [0x8f,0x07,0x60,0xf9]
+  vld1.32	{d16}, [r0]
+@ CHECK: vld1.64	{d16}, [r0]             @ encoding: [0xcf,0x07,0x60,0xf9]
+  vld1.64	{d16}, [r0]
+@ CHECK: vld1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x60,0xf9]
+  vld1.8	{d16, d17}, [r0, :64]
+@ CHECK: vld1.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x0a,0x60,0xf9]
+  vld1.16	{d16, d17}, [r0, :128]
+@ CHECK: vld1.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x0a,0x60,0xf9]
+  vld1.32	{d16, d17}, [r0]
+@ CHECK: vld1.64	{d16, d17}, [r0]        @ encoding: [0xcf,0x0a,0x60,0xf9]
+  vld1.64	{d16, d17}, [r0]
+
+@ CHECK: vld2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x60,0xf9]
+  vld2.8	{d16, d17}, [r0, :64]
+@ CHECK: vld2.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x08,0x60,0xf9]
+  vld2.16	{d16, d17}, [r0, :128]
+@ CHECK: vld2.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x08,0x60,0xf9]
+  vld2.32	{d16, d17}, [r0]
+@ CHECK: vld2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x60,0xf9]
+  vld2.8	{d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vld2.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x60,0xf9]
+  vld2.16	{d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vld2.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x60,0xf9]
+  vld2.32	{d16, d17, d18, d19}, [r0, :256]
+
+@ CHECK: vld3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x60,0xf9]
+  vld3.8	{d16, d17, d18}, [r0, :64]
+@ CHECK: vld3.16	{d16, d17, d18}, [r0]   @ encoding: [0x4f,0x04,0x60,0xf9]
+  vld3.16	{d16, d17, d18}, [r0]
+@ CHECK: vld3.32	{d16, d17, d18}, [r0]   @ encoding: [0x8f,0x04,0x60,0xf9]
+  vld3.32	{d16, d17, d18}, [r0]
+@ CHECK: vld3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x60,0xf9]
+  vld3.8	{d16, d18, d20}, [r0, :64]!
+@ CHECK: vld3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x60,0xf9]
+  vld3.8	{d17, d19, d21}, [r0, :64]!
+@ CHECK: vld3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x4d,0x05,0x60,0xf9] 
+  vld3.16	{d16, d18, d20}, [r0]!
+@ CHECK: vld3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x4d,0x15,0x60,0xf9]
+  vld3.16	{d17, d19, d21}, [r0]!
+@ CHECK: vld3.32	{d16, d18, d20}, [r0]!  @ encoding: [0x8d,0x05,0x60,0xf9]
+  vld3.32	{d16, d18, d20}, [r0]!
+@ CHECK: vld3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x8d,0x15,0x60,0xf9]
+  vld3.32	{d17, d19, d21}, [r0]!
+
+@ CHECK: vld4.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x60,0xf9]
+  vld4.8	{d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vld4.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x60,0xf9]
+  vld4.16	{d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vld4.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x00,0x60,0xf9]
+  vld4.32	{d16, d17, d18, d19}, [r0, :256]
+@ CHECK: vld4.8	{d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x60,0xf9]
+  vld4.8	{d16, d18, d20, d22}, [r0, :256]!
+@ CHECK: vld4.8	{d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x60,0xf9]
+  vld4.8	{d17, d19, d21, d23}, [r0, :256]!
+@ CHECK: vld4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x60,0xf9]
+  vld4.16	{d16, d18, d20, d22}, [r0]!
+@ CHECK: vld4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x60,0xf9]
+  vld4.16	{d17, d19, d21, d23}, [r0]!
+@ CHECK: vld4.32	{d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x60,0xf9]
+  vld4.32	{d16, d18, d20, d22}, [r0]!
+@ CHECK: vld4.32	{d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x60,0xf9]
+  vld4.32	{d17, d19, d21, d23}, [r0]!
+
+@ CHECK: vld1.8	{d16[3]}, [r0]          @ encoding: [0x6f,0x00,0xe0,0xf9]
+  vld1.8	{d16[3]}, [r0]
+@ CHECK: vld1.16	{d16[2]}, [r0, :16]     @ encoding: [0x9f,0x04,0xe0,0xf9]
+  vld1.16	{d16[2]}, [r0, :16]
+@ CHECK: vld1.32	{d16[1]}, [r0, :32]     @ encoding: [0xbf,0x08,0xe0,0xf9]
+  vld1.32	{d16[1]}, [r0, :32]
+
+@ CHECK: vld2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xe0,0xf9]
+  vld2.8	{d16[1], d17[1]}, [r0, :16]
+@ CHECK: vld2.16	{d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xe0,0xf9]
+  vld2.16	{d16[1], d17[1]}, [r0, :32]
+@ CHECK: vld2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xe0,0xf9]
+  vld2.32	{d16[1], d17[1]}, [r0]
+@ CHECK: vld2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xe0,0xf9]
+  vld2.16	{d17[1], d19[1]}, [r0]
+@ CHECK: vld2.32	{d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xe0,0xf9]
+  vld2.32	{d17[0], d19[0]}, [r0, :64]
+
+@ CHECK: vld3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xe0,0xf9]
+  vld3.8	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vld3.16	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x4f,0x06,0xe0,0xf9]
+  vld3.16	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vld3.32	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x8f,0x0a,0xe0,0xf9]
+  vld3.32	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vld3.16	{d16[1], d18[1], d20[1]}, [r0] @ encoding: [0x6f,0x06,0xe0,0xf9]
+  vld3.16	{d16[1], d18[1], d20[1]}, [r0]
+@ CHECK: vld3.32	{d17[1], d19[1], d21[1]}, [r0] @ encoding: [0xcf,0x1a,0xe0,0xf9]
+  vld3.32	{d17[1], d19[1], d21[1]}, [r0]
+
+@ CHECK: vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xe0,0xf9]
+  vld4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+@ CHECK: vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xe0,0xf9]
+  vld4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
+@ CHECK: vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xe0,0xf9]
+  vld4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+@ CHECK: vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64] @ encoding: [0x7f,0x07,0xe0,0xf9]
+  vld4.16	{d16[1], d18[1], d20[1], d22[1]}, [r0, :64]
+@ CHECK: vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xe0,0xf9]
+  vld4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
diff --git a/final/test/MC/ARM/neont2-vst-encoding.s b/final/test/MC/ARM/neont2-vst-encoding.s
new file mode 100644
index 00000000000..1722f12a00f
--- /dev/null
+++ b/final/test/MC/ARM/neont2-vst-encoding.s
@@ -0,0 +1,103 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+
+.code 16
+
+@ CHECK: vst1.8	{d16}, [r0, :64]        @ encoding: [0x1f,0x07,0x40,0xf9]
+  vst1.8	{d16}, [r0, :64]
+@ CHECK: vst1.16	{d16}, [r0]             @ encoding: [0x4f,0x07,0x40,0xf9]
+  vst1.16	{d16}, [r0]
+@ CHECK: vst1.32	{d16}, [r0]             @ encoding: [0x8f,0x07,0x40,0xf9]
+  vst1.32	{d16}, [r0]
+@ CHECK: vst1.64	{d16}, [r0]             @ encoding: [0xcf,0x07,0x40,0xf9]
+  vst1.64	{d16}, [r0]
+@ CHECK: vst1.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x0a,0x40,0xf9]
+  vst1.8	{d16, d17}, [r0, :64]
+@ CHECK: vst1.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x0a,0x40,0xf9]
+  vst1.16	{d16, d17}, [r0, :128]
+@ CHECK: vst1.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x0a,0x40,0xf9]
+  vst1.32	{d16, d17}, [r0]
+@ CHECK: vst1.64	{d16, d17}, [r0]        @ encoding: [0xcf,0x0a,0x40,0xf9]
+  vst1.64	{d16, d17}, [r0]
+
+@ CHECK: vst2.8	{d16, d17}, [r0, :64]   @ encoding: [0x1f,0x08,0x40,0xf9]
+  vst2.8	{d16, d17}, [r0, :64]
+@ CHECK: vst2.16	{d16, d17}, [r0, :128]  @ encoding: [0x6f,0x08,0x40,0xf9]
+  vst2.16	{d16, d17}, [r0, :128]
+@ CHECK: vst2.32	{d16, d17}, [r0]        @ encoding: [0x8f,0x08,0x40,0xf9]
+  vst2.32	{d16, d17}, [r0]
+@ CHECK: vst2.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x03,0x40,0xf9]
+  vst2.8	{d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vst2.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x03,0x40,0xf9]
+  vst2.16	{d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vst2.32	{d16, d17, d18, d19}, [r0, :256] @ encoding: [0xbf,0x03,0x40,0xf9]
+  vst2.32	{d16, d17, d18, d19}, [r0, :256]
+
+@ CHECK: vst3.8	{d16, d17, d18}, [r0, :64] @ encoding: [0x1f,0x04,0x40,0xf9]
+  vst3.8	{d16, d17, d18}, [r0, :64]
+@ CHECK: vst3.16	{d16, d17, d18}, [r0]   @ encoding: [0x4f,0x04,0x40,0xf9]
+  vst3.16	{d16, d17, d18}, [r0]
+@ CHECK: vst3.32	{d16, d17, d18}, [r0]   @ encoding: [0x8f,0x04,0x40,0xf9]
+  vst3.32	{d16, d17, d18}, [r0]
+@ CHECK: vst3.8	{d16, d18, d20}, [r0, :64]! @ encoding: [0x1d,0x05,0x40,0xf9]
+  vst3.8	{d16, d18, d20}, [r0, :64]!
+@ CHECK: vst3.8	{d17, d19, d21}, [r0, :64]! @ encoding: [0x1d,0x15,0x40,0xf9]
+  vst3.8	{d17, d19, d21}, [r0, :64]!
+@ CHECK: vst3.16	{d16, d18, d20}, [r0]!  @ encoding: [0x4d,0x05,0x40,0xf9]
+  vst3.16	{d16, d18, d20}, [r0]!
+@ CHECK: vst3.16	{d17, d19, d21}, [r0]!  @ encoding: [0x4d,0x15,0x40,0xf9]
+  vst3.16	{d17, d19, d21}, [r0]!
+@ CHECK: vst3.32	{d16, d18, d20}, [r0]!  @ encoding: [0x8d,0x05,0x40,0xf9]
+  vst3.32	{d16, d18, d20}, [r0]!
+@ CHECK: vst3.32	{d17, d19, d21}, [r0]!  @ encoding: [0x8d,0x15,0x40,0xf9]
+  vst3.32	{d17, d19, d21}, [r0]!
+
+@ CHECK: vst4.8	{d16, d17, d18, d19}, [r0, :64] @ encoding: [0x1f,0x00,0x40,0xf9]
+  vst4.8	{d16, d17, d18, d19}, [r0, :64]
+@ CHECK: vst4.16	{d16, d17, d18, d19}, [r0, :128] @ encoding: [0x6f,0x00,0x40,0xf9]
+  vst4.16	{d16, d17, d18, d19}, [r0, :128]
+@ CHECK: vst4.8	{d16, d18, d20, d22}, [r0, :256]! @ encoding: [0x3d,0x01,0x40,0xf9]
+  vst4.8	{d16, d18, d20, d22}, [r0, :256]!
+@ CHECK: vst4.8	{d17, d19, d21, d23}, [r0, :256]! @ encoding: [0x3d,0x11,0x40,0xf9]
+  vst4.8	{d17, d19, d21, d23}, [r0, :256]!
+@ CHECK: vst4.16	{d16, d18, d20, d22}, [r0]! @ encoding: [0x4d,0x01,0x40,0xf9]
+  vst4.16	{d16, d18, d20, d22}, [r0]!
+@ CHECK: vst4.16	{d17, d19, d21, d23}, [r0]! @ encoding: [0x4d,0x11,0x40,0xf9]
+  vst4.16	{d17, d19, d21, d23}, [r0]!
+@ CHECK: vst4.32	{d16, d18, d20, d22}, [r0]! @ encoding: [0x8d,0x01,0x40,0xf9]
+  vst4.32	{d16, d18, d20, d22}, [r0]!
+@ CHECK: vst4.32	{d17, d19, d21, d23}, [r0]! @ encoding: [0x8d,0x11,0x40,0xf9]
+  vst4.32	{d17, d19, d21, d23}, [r0]!
+
+@ CHECK: vst2.8	{d16[1], d17[1]}, [r0, :16] @ encoding: [0x3f,0x01,0xc0,0xf9]
+  vst2.8	{d16[1], d17[1]}, [r0, :16]
+@ CHECK: vst2.16	{d16[1], d17[1]}, [r0, :32] @ encoding: [0x5f,0x05,0xc0,0xf9]
+  vst2.16	{d16[1], d17[1]}, [r0, :32]
+@ CHECK: vst2.32	{d16[1], d17[1]}, [r0]  @ encoding: [0x8f,0x09,0xc0,0xf9]
+  vst2.32	{d16[1], d17[1]}, [r0]
+@ CHECK: vst2.16	{d17[1], d19[1]}, [r0]  @ encoding: [0x6f,0x15,0xc0,0xf9]
+  vst2.16	{d17[1], d19[1]}, [r0]
+@ CHECK: vst2.32	{d17[0], d19[0]}, [r0, :64] @ encoding: [0x5f,0x19,0xc0,0xf9]
+  vst2.32	{d17[0], d19[0]}, [r0, :64]
+
+@ CHECK: vst3.8	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x2f,0x02,0xc0,0xf9]
+  vst3.8	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vst3.16	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x4f,0x06,0xc0,0xf9]
+  vst3.16	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vst3.32	{d16[1], d17[1], d18[1]}, [r0] @ encoding: [0x8f,0x0a,0xc0,0xf9]
+  vst3.32	{d16[1], d17[1], d18[1]}, [r0]
+@ CHECK: vst3.16	{d17[2], d19[2], d21[2]}, [r0] @ encoding: [0xaf,0x16,0xc0,0xf9]
+  vst3.16	{d17[2], d19[2], d21[2]}, [r0]
+@ CHECK: vst3.32	{d16[0], d18[0], d20[0]}, [r0] @ encoding: [0x4f,0x0a,0xc0,0xf9]
+  vst3.32	{d16[0], d18[0], d20[0]}, [r0]
+
+@ CHECK: vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32] @ encoding: [0x3f,0x03,0xc0,0xf9]
+  vst4.8	{d16[1], d17[1], d18[1], d19[1]}, [r0, :32]
+@ CHECK: vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0] @ encoding: [0x4f,0x07,0xc0,0xf9]
+  vst4.16	{d16[1], d17[1], d18[1], d19[1]}, [r0]
+@ CHECK: vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128] @ encoding: [0xaf,0x0b,0xc0,0xf9]
+  vst4.32	{d16[1], d17[1], d18[1], d19[1]}, [r0, :128]
+@ CHECK: vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64] @ encoding: [0xff,0x17,0xc0,0xf9]
+  vst4.16	{d17[3], d19[3], d21[3], d23[3]}, [r0, :64]
+@ CHECK: vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0] @ encoding: [0x4f,0x1b,0xc0,0xf9]
+  vst4.32	{d17[0], d19[0], d21[0], d23[0]}, [r0]
diff --git a/final/test/MC/ARM/prefetch.ll b/final/test/MC/ARM/prefetch.ll
new file mode 100644
index 00000000000..674b8f323f0
--- /dev/null
+++ b/final/test/MC/ARM/prefetch.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -mtriple=armv7-apple-darwin   -mattr=+v7a,+mp -show-mc-encoding | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+v7a     -show-mc-encoding | FileCheck %s -check-prefix=T2
+; rdar://8924681
+
+define void @t1(i8* %ptr) nounwind  {
+entry:
+; ARM: t1:
+; ARM: pldw [r0]                        @ encoding: [0x00,0xf0,0x90,0xf5]
+; ARM: pld [r0]                         @ encoding: [0x00,0xf0,0xd0,0xf5]
+
+; T2: t1:
+; T2: pld [r0]                      @ encoding: [0x90,0xf8,0x00,0xf0]
+  tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3 )
+  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
+  ret void
+}
+
+define void @t2(i8* %ptr) nounwind  {
+entry:
+; ARM: t2:
+; ARM: pld [r0, #1023]                  @ encoding: [0xff,0xf3,0xd0,0xf5]
+
+; T2: t2:
+; T2: pld [r0, #1023]               @ encoding: [0x90,0xf8,0xff,0xf3]
+  %tmp = getelementptr i8* %ptr, i32 1023
+  tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3 )
+  ret void
+}
+
+define void @t3(i32 %base, i32 %offset) nounwind  {
+entry:
+; ARM: t3:
+; ARM: pld [r0, r1, lsr #2]             @ encoding: [0x21,0xf1,0xd0,0xf7]
+
+; T2: t3:
+; T2: pld [r0, r1]                  @ encoding: [0x10,0xf8,0x01,0xf0]
+  %tmp1 = lshr i32 %offset, 2
+  %tmp2 = add i32 %base, %tmp1
+  %tmp3 = inttoptr i32 %tmp2 to i8*
+  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
+  ret void
+}
+
+define void @t4(i32 %base, i32 %offset) nounwind  {
+entry:
+; ARM: t4:
+; ARM: pld [r0, r1, lsl #2]             @ encoding: [0x01,0xf1,0xd0,0xf7]
+
+; T2: t4:
+; T2: pld [r0, r1, lsl #2]          @ encoding: [0x10,0xf8,0x21,0xf0]
+  %tmp1 = shl i32 %offset, 2
+  %tmp2 = add i32 %base, %tmp1
+  %tmp3 = inttoptr i32 %tmp2 to i8*
+  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
+  ret void
+}
+
+declare void @llvm.prefetch(i8*, i32, i32) nounwind 
diff --git a/final/test/MC/ARM/reg-list.s b/final/test/MC/ARM/reg-list.s
new file mode 100644
index 00000000000..4dd392e8379
--- /dev/null
+++ b/final/test/MC/ARM/reg-list.s
@@ -0,0 +1,8 @@
+@ RUN: llvm-mc -triple thumb-apple-darwin10 -show-encoding < %s 2> %t | FileCheck %s
+@ RUN: FileCheck --check-prefix=CHECK-WARNINGS < %t %s
+        
+        push    {r7, lr}
+@ CHECK-WARNINGS: register not in ascending order in register list
+
+        push	{lr, r7}
+@ CHECK: push {lr, r7}
diff --git a/final/test/MC/ARM/simple-encoding.ll b/final/test/MC/ARM/simple-encoding.ll
new file mode 100644
index 00000000000..0877e8e30c6
--- /dev/null
+++ b/final/test/MC/ARM/simple-encoding.ll
@@ -0,0 +1,237 @@
+;RUN: llc -mtriple=armv7-apple-darwin -show-mc-encoding < %s | FileCheck %s
+
+
+;FIXME: Once the ARM integrated assembler is up and going, these sorts of tests
+;       should run on .s source files rather than using llc to generate the
+;       assembly. There's also a large number of instruction encodings the
+;       compiler never generates, so we need the integrated assembler to be
+;       able to test those at all.
+
+declare void @llvm.trap() nounwind
+declare i32 @llvm.ctlz.i32(i32)
+
+define i32 @foo(i32 %a, i32 %b) {
+; CHECK: foo
+; CHECK: trap                         @ encoding: [0xfe,0xde,0xff,0xe7]
+; CHECK: bx lr                        @ encoding: [0x1e,0xff,0x2f,0xe1]
+
+  tail call void @llvm.trap()
+  ret i32 undef
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+; CHECK: f2
+; CHECK: add  r0, r1, r0              @ encoding: [0x00,0x00,0x81,0xe0]
+; CHECK: bx lr                        @ encoding: [0x1e,0xff,0x2f,0xe1]
+  %add = add nsw i32 %b, %a
+  ret i32 %add
+}
+
+
+define i32 @f3(i32 %a, i32 %b) {
+; CHECK: f3
+; CHECK: add  r0, r0, r1, lsl #3      @ encoding: [0x81,0x01,0x80,0xe0]
+; CHECK: bx lr                        @ encoding: [0x1e,0xff,0x2f,0xe1]
+  %mul = shl i32 %b, 3
+  %add = add nsw i32 %mul, %a
+  ret i32 %add
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+; CHECK: f4
+; CHECK: add r0, r0, #254, 28         @ encoding: [0xfe,0x0e,0x80,0xe2]
+; CHECK:                              @ 4064
+; CHECK: bx lr                        @ encoding: [0x1e,0xff,0x2f,0xe1]
+  %add = add nsw i32 %a, 4064
+  ret i32 %add
+}
+
+define i32 @f5(i32 %a, i32 %b, i32 %c) {
+; CHECK: f5
+; CHECK: cmp r0, r1                   @ encoding: [0x01,0x00,0x50,0xe1]
+; CHECK: mov r0, r2                   @ encoding: [0x02,0x00,0xa0,0xe1]
+; CHECK: movgt r0, r1                 @ encoding: [0x01,0x00,0xa0,0xc1]
+  %cmp = icmp sgt i32 %a, %b
+  %retval.0 = select i1 %cmp, i32 %b, i32 %c
+  ret i32 %retval.0
+}
+
+define i64 @f6(i64 %a, i64 %b, i64 %c) {
+; CHECK: f6
+; CHECK: adds r0, r2, r0              @ encoding: [0x00,0x00,0x92,0xe0]
+; CHECK: adc r1, r3, r1               @ encoding: [0x01,0x10,0xa3,0xe0]
+  %add = add nsw i64 %b, %a
+  ret i64 %add
+}
+
+define i32 @f7(i32 %a, i32 %b) {
+; CHECK: f7
+; CHECK: uxtab  r0, r0, r1            @ encoding: [0x71,0x00,0xe0,0xe6]
+  %and = and i32 %b, 255
+  %add = add i32 %and, %a
+  ret i32 %add
+}
+
+define i32 @f8(i32 %a) {
+; CHECK: f8
+; CHECK: movt r0, #42405              @ encoding: [0xa5,0x05,0x4a,0xe3]
+  %and = and i32 %a, 65535
+  %or = or i32 %and, -1515913216
+  ret i32 %or
+}
+
+define i32 @f9() {
+; CHECK: f9
+; CHECK: movw r0, #42405              @ encoding: [0xa5,0x05,0x0a,0xe3]
+  ret i32 42405
+}
+
+define i64 @f10(i64 %a) {
+; CHECK: f10
+; CHECK: asrs  r1, r1, #1             @ encoding: [0xc1,0x10,0xb0,0xe1]
+; CHECK: rrx r0, r0                   @ encoding: [0x60,0x00,0xa0,0xe1]
+  %shr = ashr i64 %a, 1
+  ret i64 %shr
+}
+
+define i32 @f11([1 x i32] %A.coerce0, [1 x i32] %B.coerce0) {
+; CHECK: f11
+; CHECK: ubfx  r1, r1, #8, #5         @ encoding: [0x51,0x14,0xe4,0xe7]
+; CHECK: sbfx  r0, r0, #13, #7        @ encoding: [0xd0,0x06,0xa6,0xe7]
+  %tmp1 = extractvalue [1 x i32] %A.coerce0, 0
+  %tmp2 = extractvalue [1 x i32] %B.coerce0, 0
+  %tmp3 = shl i32 %tmp1, 12
+  %bf.val.sext = ashr i32 %tmp3, 25
+  %tmp4 = lshr i32 %tmp2, 8
+  %bf.clear2 = and i32 %tmp4, 31
+  %mul = mul nsw i32 %bf.val.sext, %bf.clear2
+  ret i32 %mul
+}
+
+define i32 @f12(i32 %a) {
+; CHECK: f12:
+; CHECK: bfc  r0, #4, #20             @ encoding: [0x1f,0x02,0xd7,0xe7]
+    %tmp = and i32 %a, 4278190095
+    ret i32 %tmp
+}
+
+define i64 @f13() {
+; CHECK: f13:
+; CHECK: mvn r0, #0                   @ encoding: [0x00,0x00,0xe0,0xe3]
+; CHECK: mvn r1, #2, 2                @ encoding: [0x02,0x11,0xe0,0xe3]
+        ret i64 9223372036854775807
+}
+
+define i32 @f14(i32 %x, i32 %y) {
+; CHECK: f14:
+; CHECK: smmul  r0, r1, r0            @ encoding: [0x11,0xf0,0x50,0xe7]
+        %tmp = sext i32 %x to i64
+        %tmp1 = sext i32 %y to i64
+        %tmp2 = mul i64 %tmp1, %tmp
+        %tmp3 = lshr i64 %tmp2, 32
+        %tmp3.upgrd.1 = trunc i64 %tmp3 to i32
+        ret i32 %tmp3.upgrd.1
+}
+
+define i32 @f15(i32 %x, i32 %y) {
+; CHECK: f15:
+; CHECK: umull  r1, r0, r1, r0        @ encoding: [0x91,0x10,0x80,0xe0]
+        %tmp = zext i32 %x to i64
+        %tmp1 = zext i32 %y to i64
+        %tmp2 = mul i64 %tmp1, %tmp
+        %tmp3 = lshr i64 %tmp2, 32
+        %tmp3.upgrd.2 = trunc i64 %tmp3 to i32
+        ret i32 %tmp3.upgrd.2
+}
+
+define i32 @f16(i16 %x, i32 %y) {
+; CHECK: f16:
+; CHECK: smulbt r0, r0, r1            @ encoding: [0xc0,0x01,0x60,0xe1]
+        %tmp1 = add i16 %x, 2
+        %tmp2 = sext i16 %tmp1 to i32
+        %tmp3 = ashr i32 %y, 16
+        %tmp4 = mul i32 %tmp2, %tmp3
+        ret i32 %tmp4
+}
+
+define i32 @f17(i32 %x, i32 %y) {
+; CHECK: f17:
+; CHECK: smultt r0, r1, r0            @ encoding: [0xe1,0x00,0x60,0xe1]
+        %tmp1 = ashr i32 %x, 16
+        %tmp3 = ashr i32 %y, 16
+        %tmp4 = mul i32 %tmp3, %tmp1
+        ret i32 %tmp4
+}
+
+define i32 @f18(i32 %a, i16 %x, i32 %y) {
+; CHECK: f18:
+; CHECK: smlabt r0, r1, r2, r0        @ encoding: [0xc1,0x02,0x00,0xe1]
+        %tmp = sext i16 %x to i32
+        %tmp2 = ashr i32 %y, 16
+        %tmp3 = mul i32 %tmp2, %tmp
+        %tmp5 = add i32 %tmp3, %a
+        ret i32 %tmp5
+}
+
+define i32 @f19(i32 %x) {
+; CHECK: f19
+; CHECK: clz r0, r0                   @ encoding: [0x10,0x0f,0x6f,0xe1]
+        %tmp.1 = call i32 @llvm.ctlz.i32( i32 %x )
+        ret i32 %tmp.1
+}
+
+define i32 @f20(i32 %X) {
+; CHECK: f20
+; CHECK: rev16 r0, r0                 @ encoding: [0xb0,0x0f,0xbf,0xe6]
+        %tmp1 = lshr i32 %X, 8
+        %X15 = bitcast i32 %X to i32
+        %tmp4 = shl i32 %X15, 8
+        %tmp2 = and i32 %tmp1, 16711680
+        %tmp5 = and i32 %tmp4, -16777216
+        %tmp9 = and i32 %tmp1, 255
+        %tmp13 = and i32 %tmp4, 65280
+        %tmp6 = or i32 %tmp5, %tmp2
+        %tmp10 = or i32 %tmp6, %tmp13
+        %tmp14 = or i32 %tmp10, %tmp9
+        ret i32 %tmp14
+}
+
+define i32 @f21(i32 %X) {
+; CHECK: f21
+; CHECK: revsh r0, r0                 @ encoding: [0xb0,0x0f,0xff,0xe6]
+        %tmp1 = lshr i32 %X, 8
+        %tmp1.upgrd.1 = trunc i32 %tmp1 to i16
+        %tmp3 = trunc i32 %X to i16
+        %tmp2 = and i16 %tmp1.upgrd.1, 255
+        %tmp4 = shl i16 %tmp3, 8
+        %tmp5 = or i16 %tmp2, %tmp4
+        %tmp5.upgrd.2 = sext i16 %tmp5 to i32
+        ret i32 %tmp5.upgrd.2
+}
+
+define i32 @f22(i32 %X, i32 %Y) {
+; CHECK: f22
+; CHECK: pkhtb   r0, r0, r1, asr #22  @ encoding: [0x51,0x0b,0x80,0xe6]
+	%tmp1 = and i32 %X, -65536
+	%tmp2 = lshr i32 %Y, 22
+	%tmp3 = or i32 %tmp2, %tmp1
+	ret i32 %tmp3
+}
+
+define i32 @f23(i32 %X, i32 %Y) {
+; CHECK: f23
+; CHECK: pkhbt   r0, r0, r1, lsl #18  @ encoding: [0x11,0x09,0x80,0xe6]
+	%tmp1 = and i32 %X, 65535
+	%tmp2 = shl i32 %Y, 18
+	%tmp3 = or i32 %tmp1, %tmp2
+	ret i32 %tmp3
+}
+
+define void @f24(i32 %a) {
+; CHECK: f24
+; CHECK: cmp r0, #1, 16               @ encoding: [0x01,0x08,0x50,0xe3]
+        %b = icmp ugt i32 %a, 65536
+        br i1 %b, label %r, label %r
+r:
+        ret void
+}
diff --git a/final/test/MC/ARM/simple-fp-encoding.s b/final/test/MC/ARM/simple-fp-encoding.s
new file mode 100644
index 00000000000..891738085a2
--- /dev/null
+++ b/final/test/MC/ARM/simple-fp-encoding.s
@@ -0,0 +1,236 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple armv7-apple-darwin -show-encoding < %s | FileCheck %s
+
+@ CHECK: vadd.f64 d16, d17, d16      @ encoding: [0xa0,0x0b,0x71,0xee]
+        vadd.f64        d16, d17, d16
+        
+@ CHECK: vadd.f32 s0, s1, s0         @ encoding: [0x80,0x0a,0x30,0xee]
+        vadd.f32        s0, s1, s0
+
+@ CHECK: vsub.f64 d16, d17, d16      @ encoding: [0xe0,0x0b,0x71,0xee]
+        vsub.f64        d16, d17, d16
+
+@ CHECK: vsub.f32 s0, s1, s0         @ encoding: [0xc0,0x0a,0x30,0xee]
+        vsub.f32        s0, s1, s0
+
+@ CHECK: vdiv.f64 d16, d17, d16      @ encoding: [0xa0,0x0b,0xc1,0xee]
+        vdiv.f64        d16, d17, d16
+
+@ CHECK: vdiv.f32 s0, s1, s0         @ encoding: [0x80,0x0a,0x80,0xee]
+        vdiv.f32        s0, s1, s0
+
+@ CHECK: vmul.f64 d16, d17, d16      @ encoding: [0xa0,0x0b,0x61,0xee]
+        vmul.f64        d16, d17, d16
+
+@ CHECK: vmul.f32 s0, s1, s0         @ encoding: [0x80,0x0a,0x20,0xee]
+        vmul.f32        s0, s1, s0
+
+@ CHECK: vnmul.f64 d16, d17, d16     @ encoding: [0xe0,0x0b,0x61,0xee]
+        vnmul.f64       d16, d17, d16
+
+@ CHECK: vnmul.f32 s0, s1, s0        @ encoding: [0xc0,0x0a,0x20,0xee]
+        vnmul.f32       s0, s1, s0
+
+@ CHECK: vcmpe.f64 d17, d16          @ encoding: [0xe0,0x1b,0xf4,0xee]
+        vcmpe.f64       d17, d16
+
+@ CHECK: vcmpe.f32 s1, s0            @ encoding: [0xc0,0x0a,0xf4,0xee]
+        vcmpe.f32       s1, s0
+
+@ FIXME: vcmpe.f64 d16, #0           @ encoding: [0xc0,0x0b,0xf5,0xee]
+@        vcmpe.f64       d16, #0
+
+@ FIXME: vcmpe.f32 s0, #0            @ encoding: [0xc0,0x0a,0xb5,0xee]
+@        vcmpe.f32       s0, #0
+
+@ CHECK: vabs.f64 d16, d16           @ encoding: [0xe0,0x0b,0xf0,0xee]
+        vabs.f64        d16, d16
+
+@ CHECK: vabs.f32 s0, s0             @ encoding: [0xc0,0x0a,0xb0,0xee]
+        vabs.f32        s0, s0
+        
+@ CHECK: vcvt.f32.f64 s0, d16        @ encoding: [0xe0,0x0b,0xb7,0xee]
+        vcvt.f32.f64    s0, d16
+
+@ CHECK: vcvt.f64.f32 d16, s0        @ encoding: [0xc0,0x0a,0xf7,0xee]
+        vcvt.f64.f32    d16, s0
+
+@ CHECK: vneg.f64 d16, d16           @ encoding: [0x60,0x0b,0xf1,0xee]
+        vneg.f64        d16, d16
+
+@ CHECK: vneg.f32 s0, s0             @ encoding: [0x40,0x0a,0xb1,0xee]
+        vneg.f32        s0, s0
+
+@ CHECK: vsqrt.f64 d16, d16          @ encoding: [0xe0,0x0b,0xf1,0xee]
+        vsqrt.f64       d16, d16
+
+@ CHECK: vsqrt.f32 s0, s0            @ encoding: [0xc0,0x0a,0xb1,0xee]
+        vsqrt.f32       s0, s0
+
+@ CHECK: vcvt.f64.s32 d16, s0        @ encoding: [0xc0,0x0b,0xf8,0xee]
+        vcvt.f64.s32    d16, s0
+
+@ CHECK: vcvt.f32.s32 s0, s0         @ encoding: [0xc0,0x0a,0xb8,0xee]
+        vcvt.f32.s32    s0, s0
+
+@ CHECK: vcvt.f64.u32 d16, s0        @ encoding: [0x40,0x0b,0xf8,0xee]
+        vcvt.f64.u32    d16, s0
+
+@ CHECK: vcvt.f32.u32 s0, s0         @ encoding: [0x40,0x0a,0xb8,0xee]
+        vcvt.f32.u32    s0, s0
+
+@ CHECK: vcvt.s32.f64 s0, d16        @ encoding: [0xe0,0x0b,0xbd,0xee]
+        vcvt.s32.f64    s0, d16
+
+@ CHECK: vcvt.s32.f32 s0, s0         @ encoding: [0xc0,0x0a,0xbd,0xee]
+        vcvt.s32.f32    s0, s0
+
+@ CHECK: vcvt.u32.f64 s0, d16        @ encoding: [0xe0,0x0b,0xbc,0xee]
+        vcvt.u32.f64    s0, d16
+
+@ CHECK: vcvt.u32.f32 s0, s0         @ encoding: [0xc0,0x0a,0xbc,0xee]
+        vcvt.u32.f32    s0, s0
+
+@ CHECK: vmla.f64 d16, d18, d17      @ encoding: [0xa1,0x0b,0x42,0xee]
+        vmla.f64        d16, d18, d17
+
+@ CHECK: vmla.f32 s1, s2, s0         @ encoding: [0x00,0x0a,0x41,0xee]
+        vmla.f32        s1, s2, s0
+
+@ CHECK: vmls.f64 d16, d18, d17      @ encoding: [0xe1,0x0b,0x42,0xee]
+        vmls.f64        d16, d18, d17
+
+@ CHECK: vmls.f32 s1, s2, s0         @ encoding: [0x40,0x0a,0x41,0xee]
+        vmls.f32        s1, s2, s0
+
+@ CHECK: vnmla.f64 d16, d18, d17     @ encoding: [0xe1,0x0b,0x52,0xee]
+        vnmla.f64       d16, d18, d17
+
+@ CHECK: vnmla.f32 s1, s2, s0        @ encoding: [0x40,0x0a,0x51,0xee]
+        vnmla.f32       s1, s2, s0
+
+@ CHECK: vnmls.f64 d16, d18, d17     @ encoding: [0xa1,0x0b,0x52,0xee]
+        vnmls.f64       d16, d18, d17
+
+@ CHECK: vnmls.f32 s1, s2, s0        @ encoding: [0x00,0x0a,0x51,0xee]
+        vnmls.f32       s1, s2, s0
+
+@ FIXME: vmrs apsr_nzcv, fpscr       @ encoding: [0x10,0xfa,0xf1,0xee]
+@        vmrs    apsr_nzcv, fpscr
+        
+@ CHECK: vnegne.f64 d16, d16         @ encoding: [0x60,0x0b,0xf1,0x1e]
+        vnegne.f64      d16, d16
+
+@ CHECK: vmovne s0, r0               @ encoding: [0x10,0x0a,0x00,0x1e]
+@ CHECK: vmoveq s0, r1               @ encoding: [0x10,0x1a,0x00,0x0e]
+        vmovne  s0, r0
+        vmoveq  s0, r1
+
+@ CHECK: vmrs r0, fpscr              @ encoding: [0x10,0x0a,0xf1,0xee]
+        vmrs    r0, fpscr
+@ CHECK: vmrs  r0, fpexc             @ encoding: [0x10,0x0a,0xf8,0xee]
+        vmrs  r0, fpexc
+@ CHECK: vmrs  r0, fpsid             @ encoding: [0x10,0x0a,0xf0,0xee]
+        vmrs  r0, fpsid
+
+@ CHECK: vmsr fpscr, r0              @ encoding: [0x10,0x0a,0xe1,0xee]
+        vmsr    fpscr, r0
+@ CHECK: vmsr  fpexc, r0             @ encoding: [0x10,0x0a,0xe8,0xee]
+        vmsr  fpexc, r0
+@ CHECK: vmsr  fpsid, r0             @ encoding: [0x10,0x0a,0xe0,0xee]
+        vmsr  fpsid, r0
+
+@ FIXME: vmov.f64 d16, #3.000000e+00 @ encoding: [0x08,0x0b,0xf0,0xee]
+@        vmov.f64        d16, #3.000000e+00
+
+@ FIXME: vmov.f32 s0, #3.000000e+00  @ encoding: [0x08,0x0a,0xb0,0xee]
+@        vmov.f32        s0, #3.000000e+00
+
+@ CHECK: vmov s0, r0                 @ encoding: [0x10,0x0a,0x00,0xee]
+@ CHECK: vmov s1, r1                 @ encoding: [0x90,0x1a,0x00,0xee]
+@ CHECK: vmov s2, r2                 @ encoding: [0x10,0x2a,0x01,0xee]
+@ CHECK: vmov s3, r3                 @ encoding: [0x90,0x3a,0x01,0xee]
+        vmov    s0, r0
+        vmov    s1, r1
+        vmov    s2, r2
+        vmov    s3, r3
+
+@ CHECK: vmov r0, s0                 @ encoding: [0x10,0x0a,0x10,0xee]
+@ CHECK: vmov r1, s1                 @ encoding: [0x90,0x1a,0x10,0xee]
+@ CHECK: vmov r2, s2                 @ encoding: [0x10,0x2a,0x11,0xee]
+@ CHECK: vmov r3, s3                 @ encoding: [0x90,0x3a,0x11,0xee]
+        vmov    r0, s0
+        vmov    r1, s1
+        vmov    r2, s2
+        vmov    r3, s3
+
+@ CHECK: vmov r0, r1, d16            @ encoding: [0x30,0x0b,0x51,0xec]
+        vmov    r0, r1, d16
+
+@ CHECK: vldr.64 d17, [r0]           @ encoding: [0x00,0x1b,0xd0,0xed]
+        vldr.64	d17, [r0]
+
+@ CHECK: vldr.64 d1, [r2, #32]       @ encoding: [0x08,0x1b,0x92,0xed]
+@ CHECK: vldr.64 d1, [r2, #-32]      @ encoding: [0x08,0x1b,0x12,0xed]
+        vldr.64	d1, [r2, #32]
+        vldr.64	d1, [r2, #-32]
+        
+@ CHECK: vldr.64 d2, [r3]            @ encoding: [0x00,0x2b,0x93,0xed]
+        vldr.64 d2, [r3]
+
+@ CHECK: vldr.64 d3, [pc]            @ encoding: [0x00,0x3b,0x9f,0xed]
+@ CHECK: vldr.64 d3, [pc]            @ encoding: [0x00,0x3b,0x9f,0xed]
+@ CHECK: vldr.64 d3, [pc]            @ encoding: [0x00,0x3b,0x9f,0xed]
+        vldr.64 d3, [pc]
+        vldr.64 d3, [pc,#0]
+        vldr.64 d3, [pc,#-0]
+
+@ CHECK: vldr.32 s13, [r0]           @ encoding: [0x00,0x6a,0xd0,0xed]
+        vldr.32	s13, [r0]
+
+@ CHECK: vldr.32 s1, [r2, #32]       @ encoding: [0x08,0x0a,0xd2,0xed]
+@ CHECK: vldr.32 s1, [r2, #-32]      @ encoding: [0x08,0x0a,0x52,0xed]
+        vldr.32	s1, [r2, #32]
+        vldr.32	s1, [r2, #-32]
+        
+@ CHECK: vldr.32 s2, [r3]            @ encoding: [0x00,0x1a,0x93,0xed]
+        vldr.32 s2, [r3]
+
+@ CHECK: vldr.32 s5, [pc]            @ encoding: [0x00,0x2a,0xdf,0xed]
+@ CHECK: vldr.32 s5, [pc]            @ encoding: [0x00,0x2a,0xdf,0xed]
+@ CHECK: vldr.32 s5, [pc]            @ encoding: [0x00,0x2a,0xdf,0xed]
+        vldr.32 s5, [pc]
+        vldr.32 s5, [pc,#0]
+        vldr.32 s5, [pc,#-0]
+
+@ CHECK: vstr.64 d4, [r1]            @ encoding: [0x00,0x4b,0x81,0xed]
+@ CHECK: vstr.64 d4, [r1, #24]       @ encoding: [0x06,0x4b,0x81,0xed]
+@ CHECK: vstr.64 d4, [r1, #-24]      @ encoding: [0x06,0x4b,0x01,0xed]
+        vstr.64 d4, [r1]
+        vstr.64 d4, [r1, #24]
+        vstr.64 d4, [r1, #-24]
+
+@ CHECK: vstr.32 s4, [r1]            @ encoding: [0x00,0x2a,0x81,0xed]
+@ CHECK: vstr.32 s4, [r1, #24]       @ encoding: [0x06,0x2a,0x81,0xed]
+@ CHECK: vstr.32 s4, [r1, #-24]      @ encoding: [0x06,0x2a,0x01,0xed]
+        vstr.32 s4, [r1]
+        vstr.32 s4, [r1, #24]
+        vstr.32 s4, [r1, #-24]
+
+@ CHECK: vldmia r1, {d2, d3, d4, d5, d6, d7} @ encoding: [0x0c,0x2b,0x91,0xec]
+@ CHECK: vldmia r1, {s2, s3, s4, s5, s6, s7} @ encoding: [0x06,0x1a,0x91,0xec]
+        vldmia  r1, {d2,d3-d6,d7}
+        vldmia  r1, {s2,s3-s6,s7}
+
+@ CHECK: vstmia r1, {d2, d3, d4, d5, d6, d7} @ encoding: [0x0c,0x2b,0x81,0xec]
+@ CHECK: vstmia	r1, {s2, s3, s4, s5, s6, s7} @ encoding: [0x06,0x1a,0x81,0xec]
+        vstmia  r1, {d2,d3-d6,d7}
+        vstmia  r1, {s2,s3-s6,s7}
+
+@ CHECK: vcvtr.s32.f64  s0, d0 @ encoding: [0x40,0x0b,0xbd,0xee]
+@ CHECK: vcvtr.s32.f32  s0, s1 @ encoding: [0x60,0x0a,0xbd,0xee]
+@ CHECK: vcvtr.u32.f64  s0, d0 @ encoding: [0x40,0x0b,0xbc,0xee]
+@ CHECK: vcvtr.u32.f32  s0, s1 @ encoding: [0x60,0x0a,0xbc,0xee]
+        vcvtr.s32.f64  s0, d0
+        vcvtr.s32.f32  s0, s1
+        vcvtr.u32.f64  s0, d0
+        vcvtr.u32.f32  s0, s1
diff --git a/final/test/MC/ARM/thumb.s b/final/test/MC/ARM/thumb.s
new file mode 100644
index 00000000000..342a390d81a
--- /dev/null
+++ b/final/test/MC/ARM/thumb.s
@@ -0,0 +1,70 @@
+@ RUN: llvm-mc -triple thumbv6-apple-darwin -show-encoding < %s | FileCheck %s
+        .code 16
+
+@ CHECK: cmp	r1, r2               @ encoding: [0x91,0x42]
+        cmp     r1, r2
+
+@ CHECK: pop    {r1, r2, r4}         @ encoding: [0x16,0xbc]
+        pop     {r1, r2, r4}
+
+@ CHECK: trap                        @ encoding: [0xfe,0xde]
+        trap
+
+@ CHECK: blx	r9                   @ encoding: [0xc8,0x47]
+	blx	r9
+
+@ CHECK: rev	r2, r3               @ encoding: [0x1a,0xba]
+@ CHECK: rev16	r3, r4               @ encoding: [0x63,0xba]
+@ CHECK: revsh	r5, r6               @ encoding: [0xf5,0xba]
+        rev     r2, r3
+        rev16   r3, r4
+        revsh   r5, r6
+
+@ CHECK: sxtb	r2, r3               @ encoding: [0x5a,0xb2]
+@ CHECK: sxth	r2, r3               @ encoding: [0x1a,0xb2]
+	sxtb	r2, r3
+	sxth	r2, r3
+
+@ CHECK: tst	r4, r5               @ encoding: [0x2c,0x42]
+	tst	r4, r5
+
+@ CHECK: uxtb	r3, r6               @ encoding: [0xf3,0xb2]
+@ CHECK: uxth	r3, r6               @ encoding: [0xb3,0xb2]
+	uxtb	r3, r6
+	uxth	r3, r6
+
+@ CHECK: ldr	r3, [r1, r2]         @ encoding: [0x8b,0x58]
+	ldr	r3, [r1, r2]
+
+@ CHECK: bkpt  #2                  @ encoding: [0x02,0xbe]
+         bkpt  #2
+
+@ CHECK: mcr  p7, #1, r5, c1, c1, #4 @ encoding: [0x21,0xee,0x91,0x57]
+        mcr  p7, #1, r5, c1, c1, #4
+
+@ CHECK: mrc  p14, #0, r1, c1, c2, #4 @ encoding: [0x11,0xee,0x92,0x1e]
+        mrc  p14, #0, r1, c1, c2, #4
+
+@ CHECK: mcrr  p7, #1, r5, r4, c1 @ encoding: [0x44,0xec,0x11,0x57]
+        mcrr  p7, #1, r5, r4, c1
+
+@ CHECK: mrrc  p7, #1, r5, r4, c1 @ encoding: [0x54,0xec,0x11,0x57]
+        mrrc  p7, #1, r5, r4, c1
+
+@ CHECK: cdp  p7, #1, c1, c1, c1, #4 @ encoding: [0x11,0xee,0x81,0x17]
+        cdp  p7, #1, c1, c1, c1, #4
+
+@ CHECK: nop @ encoding: [0x00,0xbf]
+        nop
+
+@ CHECK: yield @ encoding: [0x10,0xbf]
+        yield
+
+@ CHECK: wfe @ encoding: [0x20,0xbf]
+        wfe
+
+@ CHECK: wfi @ encoding: [0x30,0xbf]
+        wfi
+
+@ CHECK: cpsie aif @ encoding: [0x67,0xb6]
+        cpsie aif
diff --git a/final/test/MC/ARM/thumb2.s b/final/test/MC/ARM/thumb2.s
new file mode 100644
index 00000000000..cd093119e58
--- /dev/null
+++ b/final/test/MC/ARM/thumb2.s
@@ -0,0 +1,286 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+@ XFAIL: *
+.code 16
+
+@ CHECK: adc	r1, r1, #171            @ encoding: [0xab,0x01,0x41,0xf1]
+  adc	r1, r1, #171
+@ CHECK: adc	r1, r1, #1179666        @ encoding: [0x12,0x11,0x41,0xf1]
+  adc	r1, r1, #1179666
+@ CHECK: adc	r1, r1, #872428544      @ encoding: [0x34,0x21,0x41,0xf1]
+  adc	r1, r1, #872428544
+@ CHECK: adc	r1, r1, #1448498774     @ encoding: [0x56,0x31,0x41,0xf1]
+  adc	r1, r1, #1448498774
+@ CHECK: adc	r1, r1, #66846720       @ encoding: [0x7f,0x71,0x41,0xf1]
+  adc	r1, r1, #66846720
+
+@ CHECK: mvn	r0, #187                @ encoding: [0xbb,0x00,0x6f,0xf0]
+  mvn	r0, #187
+@ CHECK: mvn	r0, #11141290           @ encoding: [0xaa,0x10,0x6f,0xf0]
+  mvn	r0, #11141290
+@ CHECK: mvn	r0, #-872363008         @ encoding: [0xcc,0x20,0x6f,0xf0]
+  mvn	r0, #-872363008
+@ CHECK: mvn	r0, #1114112            @ encoding: [0x88,0x10,0x6f,0xf4]
+  mvn	r0, #1114112
+
+@ CHECK: cmp.w	r0, #11141290           @ encoding: [0xaa,0x1f,0xb0,0xf1]
+  cmp.w	r0, #11141290
+@ CHECK: cmp.w	r0, #-872363008         @ encoding: [0xcc,0x2f,0xb0,0xf1]
+  cmp.w	r0, #-872363008
+@ CHECK: cmp.w	r0, #-572662307         @ encoding: [0xdd,0x3f,0xb0,0xf1]
+  cmp.w	r0, #-572662307
+@ CHECK: cmp.w	r0, #1114112            @ encoding: [0x88,0x1f,0xb0,0xf5]
+  cmp.w	r0, #1114112
+@ CHECK: cmp.w	r0, r1, lsl #5          @ encoding: [0x41,0x1f,0xb0,0xeb]
+  cmp.w	r0, r1, lsl #5
+
+@ CHECK: sxtab	r0, r1, r0              @ encoding: [0x80,0xf0,0x41,0xfa]
+  sxtab	r0, r1, r0              @ encoding: [0x80,0xf0,0x41,0xfa]
+  
+@ CHECK: movw	r0, #65535              @ encoding: [0xff,0x70,0x4f,0xf6]
+  movw	r0, #65535
+@ CHECK: movw	r1, #43777              @ encoding: [0x01,0x31,0x4a,0xf6]
+  movw	r1, #43777
+@ CHECK: movt	r1, #427                @ encoding: [0xab,0x11,0xc0,0xf2]
+  movt	r1, #427
+@ CHECK: movw	r1, #43792              @ encoding: [0x10,0x31,0x4a,0xf6]
+  movw	r1, #43792
+@ CHECK: movt	r1, #4267               @ encoding: [0xab,0x01,0xc0,0xf2]
+  movt	r1, #4267
+@ CHECK: mov.w	r0, #66846720           @ encoding: [0x7f,0x70,0x4f,0xf0]
+  mov.w	r0, #66846720
+
+@ CHECK: rrx	r0, r0                  @ encoding: [0x30,0x00,0x4f,0xea]
+  rrx	r0, r0
+
+@ CHECK: bfc	r0, #4, #20             @ encoding: [0x17,0x10,0x6f,0xf3]
+  bfc	r0, #4, #20
+@ CHECK: bfc	r0, #0, #23             @ encoding: [0x16,0x00,0x6f,0xf3]
+  bfc	r0, #0, #23
+@ CHECK: bfc	r0, #12, #20            @ encoding: [0x1f,0x30,0x6f,0xf3]
+  bfc	r0, #12, #20
+
+@ CHECK: sbfx	r0, r0, #7, #11         @ encoding: [0xca,0x10,0x40,0xf3]
+  sbfx	r0, r0, #7, #11
+@ CHECK: ubfx	r0, r0, #7, #11         @ encoding: [0xca,0x10,0xc0,0xf3]
+  ubfx	r0, r0, #7, #11
+
+@ CHECK: mla	r0, r0, r1, r2          @ encoding: [0x01,0x20,0x00,0xfb]
+  mla	r0, r0, r1, r2
+@ CHECK: mls	r0, r0, r1, r2          @ encoding: [0x11,0x20,0x00,0xfb]
+  mls	r0, r0, r1, r2
+
+@ CHECK: smlabt	r0, r1, r2, r0          @ encoding: [0x12,0x00,0x11,0xfb]
+  smlabt	r0, r1, r2, r0
+
+@ CHECK: clz	r0, r0                  @ encoding: [0x80,0xf0,0xb0,0xfa]
+  clz	r0, r0
+
+@ CHECK: pkhbt	r0, r0, r1, lsl #16     @ encoding: [0x01,0x40,0xc0,0xea]
+  pkhbt	r0, r0, r1, lsl #16
+@ CHECK: pkhbt	r0, r0, r1, lsl #12     @ encoding: [0x01,0x30,0xc0,0xea]
+  pkhbt	r0, r0, r1, lsl #16
+@ CHECK: pkhbt	r0, r0, r1, lsl #18     @ encoding: [0x81,0x40,0xc0,0xea]
+  pkhbt	r0, r0, r1, lsl #18
+@ CHECK: pkhbt	r0, r0, r1              @ encoding: [0x01,0x00,0xc0,0xea]
+  pkhbt	r0, r0, r1
+@ CHECK: pkhtb	r0, r0, r1, asr #16     @ encoding: [0x21,0x40,0xc0,0xea]
+  pkhtb	r0, r0, r1, asr #16
+@ CHECK: pkhtb	r0, r0, r1, asr #12     @ encoding: [0x21,0x30,0xc0,0xea]
+  pkhtb	r0, r0, r1, asr #12
+@ CHECK: pkhtb	r0, r0, r1, asr #18     @ encoding: [0xa1,0x40,0xc0,0xea]
+  pkhtb	r0, r0, r1, asr #18
+@ CHECK: pkhtb	r0, r0, r1, asr #22     @ encoding: [0xa1,0x50,0xc0,0xea]
+  pkhtb	r0, r0, r1, asr #22
+
+@ CHECK: str.w	r0, [r1, #4092]         @ encoding: [0xfc,0x0f,0xc1,0xf8]
+  str.w	r0, [r1, #4092]
+@ CHECK: str	r0, [r1, #-128]         @ encoding: [0x80,0x0c,0x41,0xf8]
+  str	r0, [r1, #-128]
+@ CHECK: str.w	r0, [r1, r2, lsl #2]    @ encoding: [0x22,0x00,0x41,0xf8
+  str.w	r0, [r1, r2, lsl #2]
+
+@ CHECK: ldr.w	r0, [r0, #4092]         @ encoding: [0xfc,0x0f,0xd0,0xf8]
+  ldr.w	r0, [r0, #4092]
+@ CHECK: ldr	r0, [r0, #-128]         @ encoding: [0x80,0x0c,0x50,0xf8]
+  ldr	r0, [r0, #-128]
+@ CHECK: ldr.w	r0, [r0, r1, lsl #2]    @ encoding: [0x21,0x00,0x50,0xf8]
+  ldr.w	r0, [r0, r1, lsl #2]
+
+@ CHECK: str	r1, [r0, #16]!          @ encoding: [0x10,0x1f,0x40,0xf8]
+  str	r1, [r0, #16]!
+@ CHECK: strh	r1, [r0, #8]!           @ encoding: [0x08,0x1f,0x20,0xf8]
+  strh	r1, [r0, #8]!
+@ CHECK: strh	r2, [r0], #-4           @ encoding: [0x04,0x29,0x20,0xf8]
+  strh	r2, [r0], #-4
+@ CHECK: str	r2, [r0], #-4           @ encoding: [0x04,0x29,0x40,0xf8]
+  str	r2, [r0], #-4
+
+@ CHECK: ldr	r2, [r0, #16]!          @ encoding: [0x10,0x2f,0x50,0xf8]
+  ldr	r2, [r0, #16]!
+@ CHECK: ldr	r2, [r0, #-64]!         @ encoding: [0x40,0x2d,0x50,0xf8]
+  ldr	r2, [r0, #-64]!
+@ CHECK: ldrsb	r2, [r0, #4]!           @ encoding: [0x04,0x2f,0x10,0xf9]
+  ldrsb	r2, [r0, #4]!
+
+@ CHECK: strb.w	r0, [r1, #4092]         @ encoding: [0xfc,0x0f,0x81,0xf8]
+  strb.w	r0, [r1, #4092]
+@ CHECK: strb	r0, [r1, #-128]         @ encoding: [0x80,0x0c,0x01,0xf8]
+  strb	r0, [r1, #-128]
+@ CHECK: strb.w	r0, [r1, r2, lsl #2]    @ encoding: [0x22,0x00,0x01,0xf8]
+  strb.w	r0, [r1, r2, lsl #2]
+@ CHECK: strh.w	r0, [r1, #4092]         @ encoding: [0xfc,0x0f,0xa1,0xf8]
+  strh.w	r0, [r1, #4092]
+@ CHECK: strh	r0, [r1, #-128]         @ encoding: [0x80,0x0c,0x21,0xf8]
+  strh	r0, [r1, #-128]
+@ CHECK: strh	r0, [r1, #-128]         @ encoding: [0x80,0x0c,0x21,0xf8]
+  strh	r0, [r1, #-128]
+@ CHECK: strh.w	r0, [r1, r2, lsl #2]    @ encoding: [0x22,0x00,0x21,0xf8]
+  strh.w	r0, [r1, r2, lsl #2]
+
+@ CHECK: ldrb	r0, [r0, #-1]           @ encoding: [0x01,0x0c,0x10,0xf8]
+  ldrb	r0, [r0, #-1]
+@ CHECK: ldrb	r0, [r0, #-128]         @ encoding: [0x80,0x0c,0x10,0xf8]
+  ldrb	r0, [r0, #-128]
+@ CHECK: ldrb.w	r0, [r0, r1, lsl #2]    @ encoding: [0x21,0x00,0x10,0xf8]
+  ldrb.w	r0, [r0, r1, lsl #2]
+@ CHECK: ldrh.w	r0, [r0, #2046]         @ encoding: [0xfe,0x07,0xb0,0xf8]
+  ldrh.w	r0, [r0, #2046]
+@ CHECK: ldrh	r0, [r0, #-128]         @ encoding: [0x80,0x0c,0x30,0xf8]
+  ldrh	r0, [r0, #-128]
+@ CHECK: ldrh.w	r0, [r0, r1, lsl #2]    @ encoding: [0x21,0x00,0x30,0xf8]
+  ldrh.w	r0, [r0, r1, lsl #2]
+@ CHECK: ldrsb.w	r0, [r0]                @ encoding: [0x00,0x00,0x90,0xf9]
+  ldrsb.w	r0, [r0]
+@ CHECK: ldrsh.w	r0, [r0]                @ encoding: [0x00,0x00,0xb0,0xf9]
+  ldrsh.w	r0, [r0]
+@ CHECK: bfi  r0, r0, #5, #7 @ encoding: [0x60,0xf3,0x4b,0x10]
+  bfi  r0, r0, #5, #7
+@ CHECK: isb @ encoding: [0xbf,0xf3,0x6f,0x8f]
+  isb
+@ CHECK: mrs  r0, cpsr @ encoding: [0xef,0xf3,0x00,0x80]
+  mrs  r0, cpsr
+@ CHECK: vmrs  r0, fpscr @ encoding: [0xf1,0xee,0x10,0x0a]
+  vmrs  r0, fpscr
+@ CHECK: vmrs  r0, fpexc @ encoding: [0xf8,0xee,0x10,0x0a]
+  vmrs  r0, fpexc
+@ CHECK: vmrs  r0, fpsid @ encoding: [0xf0,0xee,0x10,0x0a]
+  vmrs  r0, fpsid
+
+@ CHECK: vmsr  fpscr, r0 @ encoding: [0xe1,0xee,0x10,0x0a]
+  vmsr  fpscr, r0
+@ CHECK: vmsr  fpexc, r0 @ encoding: [0xe8,0xee,0x10,0x0a]
+  vmsr  fpexc, r0
+@ CHECK: vmsr  fpsid, r0 @ encoding: [0xe0,0xee,0x10,0x0a]
+  vmsr  fpsid, r0
+
+@ CHECK: mcr2  p7, #1, r5, c1, c1, #4 @ encoding: [0x21,0xfe,0x91,0x57]
+        mcr2  p7, #1, r5, c1, c1, #4
+
+@ CHECK: mrc2  p14, #0, r1, c1, c2, #4 @ encoding: [0x11,0xfe,0x92,0x1e]
+        mrc2  p14, #0, r1, c1, c2, #4
+
+@ CHECK: mcrr2  p7, #1, r5, r4, c1 @ encoding: [0x44,0xfc,0x11,0x57]
+        mcrr2  p7, #1, r5, r4, c1
+
+@ CHECK: mrrc2  p7, #1, r5, r4, c1 @ encoding: [0x54,0xfc,0x11,0x57]
+        mrrc2  p7, #1, r5, r4, c1
+
+@ CHECK: cdp2  p7, #1, c1, c1, c1, #4 @ encoding: [0x11,0xfe,0x81,0x17]
+        cdp2  p7, #1, c1, c1, c1, #4
+
+@ CHECK: clrex @ encoding: [0xbf,0xf3,0x2f,0x8f]
+        clrex
+
+@ CHECK: clz  r9, r0 @ encoding: [0xb0,0xfa,0x80,0xf9]
+        clz  r9, r0
+
+@ CHECK: qadd  r1, r2, r3 @ encoding: [0x83,0xfa,0x82,0xf1]
+        qadd  r1, r2, r3
+
+@ CHECK: qsub  r1, r2, r3 @ encoding: [0x83,0xfa,0xa2,0xf1]
+        qsub  r1, r2, r3
+
+@ CHECK: qdadd  r1, r2, r3 @ encoding: [0x83,0xfa,0x92,0xf1]
+        qdadd  r1, r2, r3
+
+@ CHECK: qdsub  r1, r2, r3 @ encoding: [0x83,0xfa,0xb2,0xf1]
+        qdsub  r1, r2, r3
+
+@ CHECK: nop.w @ encoding: [0xaf,0xf3,0x00,0x80]
+        nop.w
+
+@ CHECK: yield.w @ encoding: [0xaf,0xf3,0x01,0x80]
+        yield.w
+
+@ CHECK: wfe.w @ encoding: [0xaf,0xf3,0x02,0x80]
+        wfe.w
+
+@ CHECK: wfi.w @ encoding: [0xaf,0xf3,0x03,0x80]
+        wfi.w
+
+@ CHECK: dmb  sy @ encoding: [0xbf,0xf3,0x5f,0x8f]
+  dmb  sy
+@ CHECK: dmb  st @ encoding: [0xbf,0xf3,0x5e,0x8f]
+  dmb  st
+@ CHECK: dmb  ish @ encoding: [0xbf,0xf3,0x5b,0x8f]
+  dmb  ish
+@ CHECK: dmb  ishst @ encoding: [0xbf,0xf3,0x5a,0x8f]
+  dmb  ishst
+@ CHECK: dmb  nsh @ encoding: [0xbf,0xf3,0x57,0x8f]
+  dmb  nsh
+@ CHECK: dmb  nshst @ encoding: [0xbf,0xf3,0x56,0x8f]
+  dmb  nshst
+@ CHECK: dmb  osh @ encoding: [0xbf,0xf3,0x53,0x8f]
+  dmb  osh
+@ CHECK: dmb  oshst @ encoding: [0xbf,0xf3,0x52,0x8f]
+  dmb  oshst
+
+@ CHECK: dsb  sy @ encoding: [0xbf,0xf3,0x4f,0x8f]
+  dsb  sy
+@ CHECK: dsb  st @ encoding: [0xbf,0xf3,0x4e,0x8f]
+  dsb  st
+@ CHECK: dsb  ish @ encoding: [0xbf,0xf3,0x4b,0x8f]
+  dsb  ish
+@ CHECK: dsb  ishst @ encoding: [0xbf,0xf3,0x4a,0x8f]
+  dsb  ishst
+@ CHECK: dsb  nsh @ encoding: [0xbf,0xf3,0x47,0x8f]
+  dsb  nsh
+@ CHECK: dsb  nshst @ encoding: [0xbf,0xf3,0x46,0x8f]
+  dsb  nshst
+@ CHECK: dsb  osh @ encoding: [0xbf,0xf3,0x43,0x8f]
+  dsb  osh
+@ CHECK: dsb  oshst @ encoding: [0xbf,0xf3,0x42,0x8f]
+  dsb  oshst
+
+@ CHECK: cpsie.w  aif @ encoding: [0xaf,0xf3,0xe0,0x84]
+  cpsie.w  aif
+@ CHECK: cps  #15 @ encoding: [0xaf,0xf3,0x0f,0x81]
+  cps  #15
+@ CHECK: cpsie.w  if, #10 @ encoding: [0xaf,0xf3,0x6a,0x85]
+  cpsie.w  if, #10
+
+@ CHECK: msr  cpsr_fc, r0 @ encoding: [0x80,0xf3,0x00,0x89]
+  msr  apsr, r0
+@ CHECK: msr  cpsr_s, r0 @ encoding: [0x80,0xf3,0x00,0x84]
+  msr  apsr_g, r0
+@ CHECK: msr  cpsr_f, r0 @ encoding: [0x80,0xf3,0x00,0x88]
+  msr  apsr_nzcvq, r0
+@ CHECK: msr  cpsr_fs, r0 @ encoding: [0x80,0xf3,0x00,0x8c]
+  msr  apsr_nzcvqg, r0
+@ CHECK: msr  cpsr_fc, r0 @ encoding: [0x80,0xf3,0x00,0x89]
+  msr  cpsr_fc, r0
+@ CHECK: msr  cpsr_c, r0 @ encoding: [0x80,0xf3,0x00,0x81]
+  msr  cpsr_c, r0
+@ CHECK: msr  cpsr_x, r0 @ encoding: [0x80,0xf3,0x00,0x82]
+  msr  cpsr_x, r0
+@ CHECK: msr  cpsr_fc, r0 @ encoding: [0x80,0xf3,0x00,0x89]
+  msr  cpsr_fc, r0
+@ CHECK: msr  cpsr_fsx, r0 @ encoding: [0x80,0xf3,0x00,0x8e]
+  msr  cpsr_fsx, r0
+@ CHECK: msr  spsr_fc, r0 @ encoding: [0x90,0xf3,0x00,0x89]
+  msr  spsr_fc, r0
+@ CHECK: msr  spsr_fsxc, r0 @ encoding: [0x90,0xf3,0x00,0x8f]
+  msr  spsr_fsxc, r0
+@ CHECK: msr  cpsr_fsxc, r0 @ encoding: [0x80,0xf3,0x00,0x8f]
+  msr  cpsr_fsxc, r0
+
diff --git a/final/test/MC/ARM/thumb2_instructions.s b/final/test/MC/ARM/thumb2_instructions.s
new file mode 100644
index 00000000000..71cd4aea2f8
--- /dev/null
+++ b/final/test/MC/ARM/thumb2_instructions.s
@@ -0,0 +1,12 @@
+@ RUN: llvm-mc -triple thumbv7-unknown-unknown -show-encoding %s > %t
+@ RUN: FileCheck < %t %s
+
+	.syntax unified
+	.text
+
+@ FIXME: This is not the correct instruction representation, but at least we are
+@ parsing the ldr to something.
+@
+@ CHECK: ldr r0, [r7, #258]
+	ldr	r0, [r7, #-8]
+        
diff --git a/final/test/MC/AsmParser/assignment.s b/final/test/MC/AsmParser/assignment.s
new file mode 100644
index 00000000000..73ce8600db0
--- /dev/null
+++ b/final/test/MC/AsmParser/assignment.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: a = 0
+TEST0:
+        a = 0
+
+# CHECK: .globl	_f1
+# CHECK: _f1 = 0
+        .globl _f1
+        _f1 = 0
diff --git a/final/test/MC/AsmParser/conditional_asm.s b/final/test/MC/AsmParser/conditional_asm.s
new file mode 100644
index 00000000000..f619ef9bb42
--- /dev/null
+++ b/final/test/MC/AsmParser/conditional_asm.s
@@ -0,0 +1,12 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s -I  %p | FileCheck %s
+
+# CHECK: .byte 2
+.if 1+2
+    .if 1-1
+        .byte 1
+    .elseif 2+2
+        .byte 1+1
+    .else
+        .byte 0
+    .endif
+.endif
diff --git a/final/test/MC/AsmParser/dash-n.s b/final/test/MC/AsmParser/dash-n.s
new file mode 100644
index 00000000000..b6243a4a01d
--- /dev/null
+++ b/final/test/MC/AsmParser/dash-n.s
@@ -0,0 +1,7 @@
+// RUN: not llvm-mc -n -triple i386-unknown-unknown %s 2> %t
+// RUN: FileCheck < %t %s
+
+.globl a
+// CHECK: error: expected section directive before assembly directive
+.long 0
+        
diff --git a/final/test/MC/AsmParser/dg.exp b/final/test/MC/AsmParser/dg.exp
new file mode 100644
index 00000000000..a6d81da5b71
--- /dev/null
+++ b/final/test/MC/AsmParser/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
+}
diff --git a/final/test/MC/AsmParser/directive_abort.s b/final/test/MC/AsmParser/directive_abort.s
new file mode 100644
index 00000000000..86e6267a7a1
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_abort.s
@@ -0,0 +1,6 @@
+# RUN: not llvm-mc -triple i386-unknown-unknown %s 2> %t
+# RUN: FileCheck -input-file %t %s
+
+# CHECK: error: .abort 'please stop assembing'
+TEST0:
+	.abort       please stop assembing
diff --git a/final/test/MC/AsmParser/directive_align.s b/final/test/MC/AsmParser/directive_align.s
new file mode 100644
index 00000000000..15eb430bdaf
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_align.s
@@ -0,0 +1,16 @@
+# RUN: llvm-mc -triple i386-apple-darwin9 %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .align 1
+TEST0:  
+        .align 1
+
+# CHECK: TEST1:
+# CHECK: .p2alignl 3, 0x0, 2
+TEST1:  
+        .align32 3,,2
+
+# CHECK: TEST2:
+# CHECK: .balign 3, 10
+TEST2:  
+        .balign 3,10
diff --git a/final/test/MC/AsmParser/directive_ascii.s b/final/test/MC/AsmParser/directive_ascii.s
new file mode 100644
index 00000000000..a7ba7bbd5da
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_ascii.s
@@ -0,0 +1,41 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+        .data
+# CHECK: TEST0:
+TEST0:  
+        .ascii
+
+# CHECK: TEST1:
+TEST1:  
+        .asciz
+
+# CHECK: TEST2:
+# CHECK: .byte 65
+TEST2:  
+        .ascii "A"
+
+# CHECK: TEST3:
+# CHECK: .byte 66
+# CHECK: .byte 0
+# CHECK: .byte 67
+# CHECK: .byte 0
+TEST3:  
+        .asciz "B", "C"
+        
+# CHECK: TEST4:
+# CHECK: .asciz "\001\001\007\0008\001\0001\200"
+TEST4:  
+        .ascii "\1\01\07\08\001\0001\200\0"
+        
+# CHECK: TEST5:
+# CHECK: .ascii "\b\f\n\r\t\\\""
+TEST5:
+        .ascii "\b\f\n\r\t\\\""
+        
+# CHECK: TEST6:
+# CHECK: .byte 66
+# CHECK: .byte 0
+# CHECK: .byte 67
+# CHECK: .byte 0
+TEST6:
+        .string "B", "C"
diff --git a/final/test/MC/AsmParser/directive_comm.s b/final/test/MC/AsmParser/directive_comm.s
new file mode 100644
index 00000000000..6cc79371de8
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_comm.s
@@ -0,0 +1,8 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .comm a,6,2
+# CHECK: .comm b,8
+TEST0:  
+        .comm a, 4+2, 2
+        .comm b,8
diff --git a/final/test/MC/AsmParser/directive_darwin_section.s b/final/test/MC/AsmParser/directive_darwin_section.s
new file mode 100644
index 00000000000..4fea2ead930
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_darwin_section.s
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -triple i386-apple-darwin9 %s | FileCheck %s
+
+# CHECK: .section __DWARF,__debug_frame,regular,debug
+	.section	__DWARF,__debug_frame,regular,debug
diff --git a/final/test/MC/AsmParser/directive_desc.s b/final/test/MC/AsmParser/directive_desc.s
new file mode 100644
index 00000000000..4bc56155ea5
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_desc.s
@@ -0,0 +1,8 @@
+# RUN: llvm-mc -triple i386-apple-darwin9 %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .desc foo,16
+# CHECK: .desc bar,4
+TEST0:  
+	.desc foo,0x10
+	.desc     bar, 1 +3
diff --git a/final/test/MC/AsmParser/directive_elf_size.s b/final/test/MC/AsmParser/directive_elf_size.s
new file mode 100644
index 00000000000..af35ae07ed6
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_elf_size.s
@@ -0,0 +1,8 @@
+# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s
+
+a:
+	ret
+.Lt:
+# CHECK: .size	a, .Lt-a
+	.size	a, .Lt-a
+
diff --git a/final/test/MC/AsmParser/directive_file.s b/final/test/MC/AsmParser/directive_file.s
new file mode 100644
index 00000000000..3160d5c2bfd
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_file.s
@@ -0,0 +1,8 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+        .file "hello"
+        .file 1 "world"
+
+# CHECK: .file "hello"
+# CHECK: .file 1 "world"
+
diff --git a/final/test/MC/AsmParser/directive_fill.s b/final/test/MC/AsmParser/directive_fill.s
new file mode 100644
index 00000000000..60bd468cd34
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_fill.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .byte 10
+TEST0:  
+        .fill 1, 1, 10
+
+# CHECK: TEST1:
+# CHECK: .short 3
+# CHECK: .short 3
+TEST1:  
+        .fill 2, 2, 3
+
+# CHECK: TEST2:
+# CHECK: .quad 4
+TEST2:  
+        .fill 1, 8, 4
diff --git a/final/test/MC/AsmParser/directive_include.s b/final/test/MC/AsmParser/directive_include.s
new file mode 100644
index 00000000000..fabd941d999
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_include.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s -I  %p | FileCheck %s
+
+# CHECK: TESTA:
+# CHECK: TEST0:
+# CHECK: a = 0
+# CHECK: TESTB:
+TESTA:  
+	.include       "directive_set.s"
+TESTB:
diff --git a/final/test/MC/AsmParser/directive_lcomm.s b/final/test/MC/AsmParser/directive_lcomm.s
new file mode 100644
index 00000000000..0a0add513fe
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_lcomm.s
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -triple i386-apple-darwin10 %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .zerofill __DATA,__bss,a,7,4
+# CHECK: .zerofill __DATA,__bss,b,8
+# CHECK: .zerofill __DATA,__bss,c,0
+TEST0:  
+        .lcomm a, 8-1, 4
+        .lcomm b,8
+        .lcomm  c,  0
diff --git a/final/test/MC/AsmParser/directive_line.s b/final/test/MC/AsmParser/directive_line.s
new file mode 100644
index 00000000000..94ce4460299
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_line.s
@@ -0,0 +1,5 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s
+# FIXME: Actually test the output.
+
+        .line
+        .line 1
diff --git a/final/test/MC/AsmParser/directive_loc.s b/final/test/MC/AsmParser/directive_loc.s
new file mode 100644
index 00000000000..164d42a3fa7
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_loc.s
@@ -0,0 +1,8 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s
+# FIXME: Actually test the output.
+
+        .file 1 "hello"
+        .loc 1
+        .loc 1 2
+        .loc 1 2 3
+        .loc 1 2 discriminator 1
diff --git a/final/test/MC/AsmParser/directive_lsym.s b/final/test/MC/AsmParser/directive_lsym.s
new file mode 100644
index 00000000000..7b70cac3d1b
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_lsym.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# FIXME: This is currently unsupported. If it turns out no one uses it, we
+# should just rip it out.
+        
+# XFAIL: *
+
+# CHECK: TEST0:
+# CHECK: .lsym bar,foo
+# CHECK: .lsym baz,3
+TEST0:  
+        .lsym   bar, foo
+        .lsym baz, 2+1
diff --git a/final/test/MC/AsmParser/directive_org.s b/final/test/MC/AsmParser/directive_org.s
new file mode 100644
index 00000000000..f4414c31cd2
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_org.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .org 1, 0
+TEST0:  
+        .org 1
+
+# CHECK: TEST1:
+# CHECK: .org 1, 3
+TEST1:  
+        .org 1, 3
diff --git a/final/test/MC/AsmParser/directive_set.s b/final/test/MC/AsmParser/directive_set.s
new file mode 100644
index 00000000000..69abce0db2f
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_set.s
@@ -0,0 +1,12 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: a = 0
+TEST0:  
+        .set a, 0
+        
+# CHECK: TEST1:
+# CHECK: a = 0
+TEST1:  
+        .equ a, 0
+
diff --git a/final/test/MC/AsmParser/directive_space.s b/final/test/MC/AsmParser/directive_space.s
new file mode 100644
index 00000000000..e6353a4af4d
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_space.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple i386-apple-darwin %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .space 1
+TEST0:  
+        .space 1
+
+# CHECK: TEST1:
+# CHECK: .space	2,3
+TEST1:  
+        .space 2, 3
diff --git a/final/test/MC/AsmParser/directive_subsections_via_symbols.s b/final/test/MC/AsmParser/directive_subsections_via_symbols.s
new file mode 100644
index 00000000000..355bf509652
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_subsections_via_symbols.s
@@ -0,0 +1,6 @@
+# RUN: llvm-mc -triple i386-apple-darwin9 %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .subsections_via_symbols
+TEST0:  
+	.subsections_via_symbols
diff --git a/final/test/MC/AsmParser/directive_symbol_attrs.s b/final/test/MC/AsmParser/directive_symbol_attrs.s
new file mode 100644
index 00000000000..99ef3b8e13f
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_symbol_attrs.s
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .globl a
+# CHECK: .globl b
+TEST0:  
+        .globl a, b
diff --git a/final/test/MC/AsmParser/directive_tbss.s b/final/test/MC/AsmParser/directive_tbss.s
new file mode 100644
index 00000000000..62d71230172
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_tbss.s
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple x86_64-unknown-darwin %s | FileCheck %s
+
+# CHECK: .tbss _a$tlv$init, 4
+# CHECK: .tbss _b$tlv$init, 4, 3
+
+.tbss _a$tlv$init, 4
+.tbss _b$tlv$init, 4, 3
diff --git a/final/test/MC/AsmParser/directive_tdata.s b/final/test/MC/AsmParser/directive_tdata.s
new file mode 100644
index 00000000000..240bef0dd1a
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_tdata.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple x86_64-unknown-darwin %s | FileCheck %s
+
+# CHECK:	__DATA,__thread_data,thread_local_regular
+# CHECK:	_a$tlv$init:
+# CHECK:	.quad 4
+
+	.tdata
+_a$tlv$init:
+	.quad 4
diff --git a/final/test/MC/AsmParser/directive_thread_init_func.s b/final/test/MC/AsmParser/directive_thread_init_func.s
new file mode 100644
index 00000000000..4abd5bf2664
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_thread_init_func.s
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple x86_64-unknown-darwin %s | FileCheck %s
+
+# CHECK: __DATA,__thread_init,thread_local_init_function_pointers
+# CHECK: .quad 0
+
+.thread_init_func
+	.quad 0
diff --git a/final/test/MC/AsmParser/directive_tlv.s b/final/test/MC/AsmParser/directive_tlv.s
new file mode 100644
index 00000000000..c4b3e10ed4c
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_tlv.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple x86_64-unknown-darwin %s | FileCheck %s
+
+# CHECK:	__DATA,__thread_vars,thread_local_variables
+# CHECK:	.globl _a
+# CHECK:	_a:
+# CHECK:	.quad 0
+
+	.tlv
+.globl _a
+_a:
+	.quad 0
+	.quad 0
+	.quad 0
diff --git a/final/test/MC/AsmParser/directive_values.s b/final/test/MC/AsmParser/directive_values.s
new file mode 100644
index 00000000000..98259bdb29d
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_values.s
@@ -0,0 +1,58 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .byte 0
+TEST0:  
+        .byte 0
+
+# CHECK: TEST1:
+# CHECK: .short 3
+TEST1:  
+        .short 3
+
+# CHECK: TEST2:
+# CHECK: .long 8
+TEST2:  
+        .long 8
+
+# CHECK: TEST3:
+# CHECK: .quad 9
+TEST3:  
+        .quad 9
+
+
+# rdar://7997827
+TEST4:
+        .quad 0b0100
+        .quad 4294967295
+        .quad 4294967295+1
+        .quad 4294967295LL+1
+        .quad 0b10LL + 07ULL + 0x42AULL
+# CHECK: TEST4
+# CHECK: 	.quad	4
+# CHECK: .quad	4294967295
+# CHECK: 	.quad	4294967296
+# CHECK: 	.quad	4294967296
+# CHECK: 	.quad	1075
+
+
+TEST5:
+        .value 8
+# CHECK: TEST5:
+# CHECK: .short 8
+
+TEST6:
+        .byte 'c'
+        .byte '\''
+        .byte '\\'
+        .byte '\#'
+        .byte '\t'
+        .byte '\n'
+
+# CHECK: TEST6
+# CHECK:        .byte   99
+# CHECK:        .byte   39
+# CHECK:        .byte   92
+# CHECK:        .byte   35
+# CHECK:        .byte   9
+# CHECK:        .byte   10
diff --git a/final/test/MC/AsmParser/directive_zerofill.s b/final/test/MC/AsmParser/directive_zerofill.s
new file mode 100644
index 00000000000..bd6e485a707
--- /dev/null
+++ b/final/test/MC/AsmParser/directive_zerofill.s
@@ -0,0 +1,14 @@
+# RUN: llvm-mc -triple i386-apple-darwin9 %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .zerofill __FOO,__bar,x,1
+# CHECK: .zerofill __FOO,__bar,y,8,2
+# CHECK: .zerofill __EMPTY,__NoSymbol
+# CHECK: .zerofill __DATA,__bss,"what you say?",8,3
+TEST0:  
+	.zerofill __FOO, __bar, x, 2-1
+	.zerofill __FOO,   __bar, y ,  8 , 1+1
+	.zerofill __EMPTY,__NoSymbol
+        
+        # rdar://7965971
+        .zerofill __DATA, __bss, "what you say?", 8, 3
diff --git a/final/test/MC/AsmParser/dollars-in-identifiers.s b/final/test/MC/AsmParser/dollars-in-identifiers.s
new file mode 100644
index 00000000000..e56959062ad
--- /dev/null
+++ b/final/test/MC/AsmParser/dollars-in-identifiers.s
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s > %t
+# RUN: FileCheck < %t %s
+
+// CHECK: .globl $foo
+.globl $foo
+// CHECK: .long ($foo)
+.long ($foo)
diff --git a/final/test/MC/AsmParser/equ.s b/final/test/MC/AsmParser/equ.s
new file mode 100644
index 00000000000..568f58fa129
--- /dev/null
+++ b/final/test/MC/AsmParser/equ.s
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -n -triple i386-unknown-unknown %s 2> %t
+// RUN: FileCheck < %t %s
+
+.equ	a, 0
+.set	a, 1
+.equ	a, 2
+.equiv	a, 3
+// CHECK: error: redefinition of 'a'
+
diff --git a/final/test/MC/AsmParser/expr_symbol_modifiers.s b/final/test/MC/AsmParser/expr_symbol_modifiers.s
new file mode 100644
index 00000000000..7371c97cbdf
--- /dev/null
+++ b/final/test/MC/AsmParser/expr_symbol_modifiers.s
@@ -0,0 +1,14 @@
+// RUN: not llvm-mc -triple x86_64-unknown-unknown %s > %t 2> %t.err
+// RUN: FileCheck < %t %s
+// RUN: FileCheck -check-prefix=CHECK-STDERR < %t.err %s
+
+// CHECK: .long 1
+.long 1
+// CHECK-STDERR: invalid modifier 'GOTPCREL' (no symbols present)
+.long 10 + 4@GOTPCREL
+// CHECK: .long a@GOTPCREL+4
+.long a + 4@GOTPCREL
+// CHECK: .long a@GOTPCREL+b@GOTPCREL
+.long (a + b)@GOTPCREL
+// CHECK: .long (10+b@GOTPCREL)+4
+.long 10 + b + 4@GOTPCREL
diff --git a/final/test/MC/AsmParser/exprs-invalid.s b/final/test/MC/AsmParser/exprs-invalid.s
new file mode 100644
index 00000000000..5358fc5d753
--- /dev/null
+++ b/final/test/MC/AsmParser/exprs-invalid.s
@@ -0,0 +1,13 @@
+// RUN: not llvm-mc -triple i386-unknown-unknown %s 2> %t
+// RUN: FileCheck -input-file %t %s
+
+// Currently XFAIL'ed, since the front-end isn't validating this. Figure out the
+// right resolution.
+//
+// XFAIL: *
+
+        .text
+a:
+        .data
+// CHECK: expected relocatable expression
+        .long -(0 + a)
diff --git a/final/test/MC/AsmParser/exprs.s b/final/test/MC/AsmParser/exprs.s
new file mode 100644
index 00000000000..153701d6852
--- /dev/null
+++ b/final/test/MC/AsmParser/exprs.s
@@ -0,0 +1,77 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s > %t
+
+.macro check_expr
+  .if ($0) != ($1)
+        .abort Unexpected $0 != $1.
+  .endif
+.endmacro
+        
+        .text
+g:
+h:
+j:
+k:      
+        .data
+        check_expr !1 + 2, 2
+        check_expr !0, 1
+        check_expr ~0, -1
+        check_expr -1, ~0
+        check_expr +1, 1
+        check_expr 1 + 2, 3
+        check_expr 1 & 3, 1
+        check_expr 4 / 2, 2
+        check_expr 4 / -2, -2
+        check_expr 1 == 1, 1
+        check_expr 1 == 0, 0
+        check_expr 1 > 0, 1
+        check_expr 1 >= 1, 1
+        check_expr 1 < 2, 1
+        check_expr 1 <= 1, 1
+        check_expr 4 % 3, 1
+        check_expr 2 * 2, 4
+        check_expr 2 != 2, 0
+        check_expr 2 <> 2, 0
+        check_expr 1 | 2, 3
+        check_expr 1 << 1, 2
+        check_expr 2 >> 1, 1
+        check_expr (~0 >> 1), -1
+        check_expr 3 - 2, 1
+        check_expr 1 ^ 3, 2
+        check_expr 1 && 2, 1
+        check_expr 3 && 0, 0
+        check_expr 0 && 1, 0
+        check_expr 1 || 2, 1
+        check_expr 0 || 1, 1
+        check_expr 0 || 0, 0
+        check_expr 1 + 2 < 3 + 4, 1
+        
+        .set c, 10
+        check_expr c + 1, 11
+
+        d = e + 10
+        .long d
+
+        f = g - h + 5
+        .long f
+
+        i = (j + 10) - (k + 2)
+        .long i
+        
+        l = m - n + 4
+        
+        .text
+m:
+n:
+        nop
+        
+        
+        movw	$8, (42)+66(%eax)
+
+// "." support:
+_f0:
+L0:
+        jmp L1
+        .long . - L0
+L1:
+        jmp A
+        .long . - L1
diff --git a/final/test/MC/AsmParser/floating-literals.s b/final/test/MC/AsmParser/floating-literals.s
new file mode 100644
index 00000000000..bd122a8cf0e
--- /dev/null
+++ b/final/test/MC/AsmParser/floating-literals.s
@@ -0,0 +1,35 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: .long	1067412619
+# CHECK: .long	1075000115
+# CHECK: .long	1077936128
+# CHECK: .long	1082549862
+.single 1.2455, +2.3, 3, + 4.2
+
+# CHECK: .long  1067928519
+.float 1.307
+        
+# CHECK: .quad	4617315517961601024
+# CHECK: .quad	4597526701198935065
+# CHECK: .quad	-4600933674317040845
+.double 5, .232, -11.1
+
+# CHECK: .quad  0
+.double 0.0
+
+# CHECK: .quad  -4570379565595099136
+.double -1.2e3
+# CHECK: .quad  -4690170861623122860
+.double -1.2e-5
+# CHECK: .quad  -4465782973978902528
+.double -1.2e+10
+# CHECK: .quad  4681608360884174848
+.double 1e5
+# CHECK: .quad  4681608360884174848
+.double 1.e5
+# CHECK: .quad  4611686018427387904
+.double 2.
+
+// APFloat should reject these with an error, not crash:
+//.double -1.2e+
+//.double -1.2e
diff --git a/final/test/MC/AsmParser/hello.s b/final/test/MC/AsmParser/hello.s
new file mode 100644
index 00000000000..2b62e0f008c
--- /dev/null
+++ b/final/test/MC/AsmParser/hello.s
@@ -0,0 +1,28 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -o -
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -o - -output-asm-variant=1
+
+	.text
+	.align	4,0x90
+	.globl	_main
+_main:
+	pushl	%ebp
+	movl	%esp, %ebp
+	subl	$8, %esp
+	call	"L1$pb"
+"L1$pb":
+	popl	%eax
+	movl	$0, -4(%ebp)
+	movl	%esp, %ecx
+	leal	L_.str-"L1$pb"(%eax), %eax
+	movl	%eax, (%ecx)
+	call	_printf
+	movl	$0, -4(%ebp)
+	movl	-4(%ebp), %eax
+	addl	$8, %esp
+	popl	%ebp
+	//ret
+	.subsections_via_symbols
+	.cstring
+L_.str:
+	.asciz	"hello world!\n"
+
diff --git a/final/test/MC/AsmParser/ifdef.s b/final/test/MC/AsmParser/ifdef.s
new file mode 100644
index 00000000000..98bff6525b2
--- /dev/null
+++ b/final/test/MC/AsmParser/ifdef.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifdef undefined
+	.byte 0
+.else
+	.byte 1
+.endif
+
+defined:
+
+# CHECK: .byte 1
+# CHECK-NOT: .byte 0
+.ifdef defined
+	.byte 1
+.else
+	.byte 0
+.endif
+
+	movl	%eax, undefined
+
+# CHECK-NOT: .byte 0
+# CHECK: .byte 1
+.ifdef undefined
+	.byte 0
+.else
+	.byte 1
+.endif
diff --git a/final/test/MC/AsmParser/ifndef.s b/final/test/MC/AsmParser/ifndef.s
new file mode 100644
index 00000000000..d9c9c5457a7
--- /dev/null
+++ b/final/test/MC/AsmParser/ifndef.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: .byte 1
+# CHECK-NOT: byte 0
+.ifndef undefined
+	.byte 1
+.else
+	.byte 0
+.endif
+
+defined:
+
+# CHECK-NOT: byte 0
+# CHECK: .byte 1
+.ifndef defined
+	.byte 0
+.else
+	.byte 1
+.endif
+
+	movl	%eax, undefined
+
+# CHECK: .byte 1
+# CHECK-NOT: byte 0
+.ifndef undefined
+	.byte 1
+.else
+	.byte 0
+.endif
diff --git a/final/test/MC/AsmParser/labels.s b/final/test/MC/AsmParser/labels.s
new file mode 100644
index 00000000000..3bc7e630ce4
--- /dev/null
+++ b/final/test/MC/AsmParser/labels.s
@@ -0,0 +1,59 @@
+// RUN: llvm-mc -triple i686-apple-darwin10 %s | FileCheck %s
+
+        .data
+// CHECK: a:
+a:
+        .long 0
+// CHECK: b:
+"b":
+        .long 0
+// CHECK: a$b:
+"a$b":
+        .long 0
+
+        .text
+foo:
+// CHECK: addl $24, a$b(%eax)
+        addl $24, "a$b"(%eax)
+// CHECK: addl $24, a$b+10(%eax)
+        addl $24, ("a$b" + 10)(%eax)
+
+// CHECK: b$c = 10
+"b$c" = 10
+// CHECK: addl $10, %eax
+        addl $"b$c", %eax
+
+// CHECK: "a 0" = 11
+        .set "a 0", 11
+
+// CHECK: .long 11
+        .long "a 0"
+
+// XXCHCK: .section "a 1,a 2"
+//.section "a 1", "a 2"
+
+// CHECK: .globl "a 3"
+        .globl "a 3"
+
+// CHECK: .weak "a 4"
+        .weak "a 4"
+
+// CHECK: .desc "a 5",1
+        .desc "a 5", 1
+
+// CHECK: .comm "a 6",1
+        .comm "a 6", 1
+
+// CHECK: .zerofill __DATA,__bss,"a 7",1,0
+        .lcomm "a 7", 1
+
+// FIXME: We don't bother to support .lsym.
+
+// CHECX: .lsym "a 8",1
+//        .lsym "a 8", 1
+
+// CHECK: "a 9" = a-b
+        .set "a 9", a - b
+
+// CHECK: .long "a 9"
+        .long "a 9"
diff --git a/final/test/MC/AsmParser/macro-def-in-instantiation.s b/final/test/MC/AsmParser/macro-def-in-instantiation.s
new file mode 100644
index 00000000000..b6483b3b32b
--- /dev/null
+++ b/final/test/MC/AsmParser/macro-def-in-instantiation.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s | FileCheck %s
+
+.macro .make_macro
+$0 $1
+$2 $3
+$4
+.endmacro
+
+.make_macro .macro,.mybyte,.byte,$0,.endmacro
+
+.data
+// CHECK: .byte 10
+.mybyte 10
diff --git a/final/test/MC/AsmParser/macros-parsing.s b/final/test/MC/AsmParser/macros-parsing.s
new file mode 100644
index 00000000000..65f64546cc1
--- /dev/null
+++ b/final/test/MC/AsmParser/macros-parsing.s
@@ -0,0 +1,23 @@
+// RUN: not llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err
+// RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err
+
+.macro .test0
+.endmacro
+
+.macros_off
+// CHECK-ERRORS: 9:1: warning: ignoring directive for now
+.test0
+.macros_on
+
+.test0
+
+// CHECK-ERRORS: macro '.test0' is already defined
+.macro .test0
+.endmacro
+
+// CHECK-ERRORS: unexpected '.endmacro' in file
+.endmacro
+
+// CHECK-ERRORS: no matching '.endmacro' in definition
+.macro dummy
+
diff --git a/final/test/MC/AsmParser/macros.s b/final/test/MC/AsmParser/macros.s
new file mode 100644
index 00000000000..214274d9aa8
--- /dev/null
+++ b/final/test/MC/AsmParser/macros.s
@@ -0,0 +1,39 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s 2> %t.err | FileCheck %s
+// RUN: FileCheck --check-prefix=CHECK-ERRORS %s < %t.err
+
+.macro .test0
+.macrobody0
+.endmacro
+.macro .test1
+.test0
+.endmacro
+
+.test1
+// CHECK-ERRORS: <instantiation>:1:1: warning: ignoring directive for now
+// CHECK-ERRORS-NEXT: macrobody0
+// CHECK-ERRORS-NEXT: ^
+// CHECK-ERRORS: <instantiation>:1:1: note: while in macro instantiation
+// CHECK-ERRORS-NEXT: .test0
+// CHECK-ERRORS-NEXT: ^
+// CHECK-ERRORS: 11:1: note: while in macro instantiation
+// CHECK-ERRORS-NEXT: .test1
+// CHECK-ERRORS-NEXT: ^
+
+.macro test2
+.byte $0
+.endmacro
+test2 10
+
+.macro test3
+.globl "$0 $1 $2 $$3 $n"
+.endmacro
+
+// CHECK: .globl	"1 23  $3 2"
+test3 1,2 3
+
+.macro test4
+.globl "$0 -- $1"
+.endmacro
+
+// CHECK: .globl	"ab)(,) -- (cd)"
+test4 a b)(,),(cd)
diff --git a/final/test/MC/AsmParser/rename.s b/final/test/MC/AsmParser/rename.s
new file mode 100644
index 00000000000..64ca5153812
--- /dev/null
+++ b/final/test/MC/AsmParser/rename.s
@@ -0,0 +1,10 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+        .size bar, . - bar
+.Ltmp0:
+       .size foo, .Ltmp0 - foo
+
+// CHECK: .Ltmp0:
+// CHECK: .size  bar, .Ltmp0-bar
+// CHECK: .Ltmp01
+// CHECK: .size foo, .Ltmp01-foo
diff --git a/final/test/MC/AsmParser/section.s b/final/test/MC/AsmParser/section.s
new file mode 100644
index 00000000000..414fc6d5397
--- /dev/null
+++ b/final/test/MC/AsmParser/section.s
@@ -0,0 +1,107 @@
+# RUN: llvm-mc -triple i386-pc-linux-gnu -filetype=obj -o %t %s
+# RUN: elf-dump --dump-section-data < %t | FileCheck %s
+.section test1
+.byte 1
+.section test2
+.byte 2
+.previous
+.byte 1
+.section test2
+.byte 2
+.previous
+.byte 1
+.section test1
+.byte 1
+.previous
+.byte 1
+.section test2
+.byte 2
+.pushsection test3
+.byte 3
+.pushsection test4
+.byte 4
+.pushsection test5
+.byte 5
+.popsection
+.byte 4
+.popsection
+.byte 3
+.popsection
+.byte 2
+.pushsection test3
+.byte 3
+.pushsection test4
+.byte 4
+.previous
+.byte 3
+.popsection
+.byte 3
+.previous
+.byte 2
+.section test1
+.byte 1
+.popsection
+.byte 2
+.previous
+.byte 1
+.previous
+# CHECK:       (('sh_name', 0x00000012) # 'test1'
+# CHECK-NEXT:   ('sh_type', 0x00000001)
+# CHECK-NEXT:   ('sh_flags', 0x00000000)
+# CHECK-NEXT:   ('sh_addr', 0x00000000)
+# CHECK-NEXT:   ('sh_offset', 0x00000034)
+# CHECK-NEXT:   ('sh_size', 0x00000007)
+# CHECK-NEXT:   ('sh_link', 0x00000000)
+# CHECK-NEXT:   ('sh_info', 0x00000000)
+# CHECK-NEXT:   ('sh_addralign', 0x00000001)
+# CHECK-NEXT:   ('sh_entsize', 0x00000000)
+# CHECK-NEXT:   ('_section_data', '01010101 010101')
+# CHECK-NEXT:  ),
+# CHECK:       (('sh_name', 0x00000018) # 'test2'
+# CHECK-NEXT:   ('sh_type', 0x00000001)
+# CHECK-NEXT:   ('sh_flags', 0x00000000)
+# CHECK-NEXT:   ('sh_addr', 0x00000000)
+# CHECK-NEXT:   ('sh_offset', 0x0000003b)
+# CHECK-NEXT:   ('sh_size', 0x00000006)
+# CHECK-NEXT:   ('sh_link', 0x00000000)
+# CHECK-NEXT:   ('sh_info', 0x00000000)
+# CHECK-NEXT:   ('sh_addralign', 0x00000001)
+# CHECK-NEXT:   ('sh_entsize', 0x00000000)
+# CHECK-NEXT:   ('_section_data', '02020202 0202')
+# CHECK-NEXT:  ),
+# CHECK:       (('sh_name', 0x0000001e) # 'test3'
+# CHECK-NEXT:   ('sh_type', 0x00000001)
+# CHECK-NEXT:   ('sh_flags', 0x00000000)
+# CHECK-NEXT:   ('sh_addr', 0x00000000)
+# CHECK-NEXT:   ('sh_offset', 0x00000041)
+# CHECK-NEXT:   ('sh_size', 0x00000005)
+# CHECK-NEXT:   ('sh_link', 0x00000000)
+# CHECK-NEXT:   ('sh_info', 0x00000000)
+# CHECK-NEXT:   ('sh_addralign', 0x00000001)
+# CHECK-NEXT:   ('sh_entsize', 0x00000000)
+# CHECK-NEXT:   ('_section_data', '03030303 03')
+# CHECK-NEXT:  ),
+# CHECK:       (('sh_name', 0x00000024) # 'test4'
+# CHECK-NEXT:   ('sh_type', 0x00000001)
+# CHECK-NEXT:   ('sh_flags', 0x00000000)
+# CHECK-NEXT:   ('sh_addr', 0x00000000)
+# CHECK-NEXT:   ('sh_offset', 0x00000046)
+# CHECK-NEXT:   ('sh_size', 0x00000003)
+# CHECK-NEXT:   ('sh_link', 0x00000000)
+# CHECK-NEXT:   ('sh_info', 0x00000000)
+# CHECK-NEXT:   ('sh_addralign', 0x00000001)
+# CHECK-NEXT:   ('sh_entsize', 0x00000000)
+# CHECK-NEXT:   ('_section_data', '040404')
+# CHECK-NEXT:  ),
+# CHECK:       (('sh_name', 0x0000002a) # 'test5'
+# CHECK-NEXT:   ('sh_type', 0x00000001)
+# CHECK-NEXT:   ('sh_flags', 0x00000000)
+# CHECK-NEXT:   ('sh_addr', 0x00000000)
+# CHECK-NEXT:   ('sh_offset', 0x00000049)
+# CHECK-NEXT:   ('sh_size', 0x00000001)
+# CHECK-NEXT:   ('sh_link', 0x00000000)
+# CHECK-NEXT:   ('sh_info', 0x00000000)
+# CHECK-NEXT:   ('sh_addralign', 0x00000001)
+# CHECK-NEXT:   ('sh_entsize', 0x00000000)
+# CHECK-NEXT:   ('_section_data', '05')
+# CHECK-NEXT:  ),
diff --git a/final/test/MC/AsmParser/variables-invalid.s b/final/test/MC/AsmParser/variables-invalid.s
new file mode 100644
index 00000000000..9656889c5b1
--- /dev/null
+++ b/final/test/MC/AsmParser/variables-invalid.s
@@ -0,0 +1,17 @@
+// RUN: not llvm-mc -triple i386-unknown-unknown %s 2> %t
+// RUN: FileCheck --input-file %t %s
+
+        .data
+// CHECK: invalid assignment to 't0_v0'
+        t0_v0 = t0_v0 + 1
+
+        t1_v1 = 1
+        t1_v1 = 2
+
+t2_s0:
+// CHECK: redefinition of 't2_s0'
+        t2_s0 = 2
+
+        t3_s0 = t2_s0 + 1
+// CHECK: invalid reassignment of non-absolute variable 't3_s0'
+        t3_s0 = 1
diff --git a/final/test/MC/AsmParser/variables.s b/final/test/MC/AsmParser/variables.s
new file mode 100644
index 00000000000..cb004d78882
--- /dev/null
+++ b/final/test/MC/AsmParser/variables.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s
+
+        .data
+        t0_v0 = 1
+        t0_v1 = t0_v0
+        .if t0_v1 != 1
+        .abort "invalid value"
+        .endif
+
+        t1_v0 = 1
+        t1_v1 = t0_v0
+        t1_v0 = 2
+        .if t0_v1 != 1
+        .abort "invalid value"
+        .endif
diff --git a/final/test/MC/COFF/align-nops.s b/final/test/MC/COFF/align-nops.s
new file mode 100644
index 00000000000..2971ec67798
--- /dev/null
+++ b/final/test/MC/COFF/align-nops.s
@@ -0,0 +1,50 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s -o - | coff-dump.py | FileCheck %s
+
+// Test that we get optimal nops in text
+    .text
+f0:
+    .long 0
+    .align  8, 0x90
+    .long 0
+    .align  8
+
+// But not in another section
+    .data
+    .long 0
+    .align  8, 0x90
+    .long 0
+    .align  8
+
+//CHECK:         Name                     = .text
+//CHECK-NEXT:    VirtualSize
+//CHECK-NEXT:    VirtualAddress
+//CHECK-NEXT:    SizeOfRawData            = 16
+//CHECK-NEXT:    PointerToRawData
+//CHECK-NEXT:    PointerToRelocations
+//CHECK-NEXT:    PointerToLineNumbers
+//CHECK-NEXT:    NumberOfRelocations
+//CHECK-NEXT:    NumberOfLineNumbers
+//CHECK-NEXT:    Charateristics           = 0x60400020
+//CHECK-NEXT:        IMAGE_SCN_CNT_CODE
+//CHECK-NEXT:        IMAGE_SCN_ALIGN_8BYTES
+//CHECK-NEXT:        IMAGE_SCN_MEM_EXECUTE
+//CHECK-NEXT:        IMAGE_SCN_MEM_READ
+//CHECK-NEXT:      SectionData              =
+//CHECK-NEXT:        00 00 00 00 0F 1F 40 00 - 00 00 00 00 0F 1F 40 00
+
+//CHECK:         Name                     = .data
+//CHECK-NEXT:      VirtualSize
+//CHECK-NEXT:      VirtualAddress
+//CHECK-NEXT:      SizeOfRawData            = 16
+//CHECK-NEXT:      PointerToRawData
+//CHECK-NEXT:      PointerToRelocations
+//CHECK-NEXT:      PointerToLineNumbers
+//CHECK-NEXT:      NumberOfRelocations
+//CHECK-NEXT:      NumberOfLineNumbers
+//CHECK-NEXT:      Charateristics           = 0xC0400040
+//CHECK-NEXT:        IMAGE_SCN_CNT_INITIALIZED_DATA
+//CHECK-NEXT:        IMAGE_SCN_ALIGN_8BYTES
+//CHECK-NEXT:        IMAGE_SCN_MEM_READ
+//CHECK-NEXT:        IMAGE_SCN_MEM_WRITE
+//CHECK-NEXT:      SectionData              =
+//CHECK-NEXT:        00 00 00 00 90 90 90 90 - 00 00 00 00 00 00 00 00
diff --git a/final/test/MC/COFF/basic-coff.s b/final/test/MC/COFF/basic-coff.s
new file mode 100644
index 00000000000..0c8658258ed
--- /dev/null
+++ b/final/test/MC/COFF/basic-coff.s
@@ -0,0 +1,133 @@
+// This test checks that the COFF object emitter works for the most basic
+// programs.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s
+
+.def	 _main;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_main
+	.align	16, 0x90
+_main:                                  # @main
+# BB#0:                                 # %entry
+	subl	$4, %esp
+	movl	$L_.str, (%esp)
+	calll	_printf
+	xorl	%eax, %eax
+	addl	$4, %esp
+	ret
+
+	.data
+L_.str:                                 # @.str
+	.asciz	 "Hello World"
+
+// CHECK: {
+// CHECK:   MachineType              = IMAGE_FILE_MACHINE_I386 (0x14C)
+// CHECK:   NumberOfSections         = 2
+// CHECK:   TimeDateStamp            = {{[0-9]+}}
+// CHECK:   PointerToSymbolTable     = 0x{{[0-9A-F]+}}
+// CHECK:   NumberOfSymbols          = 6
+// CHECK:   SizeOfOptionalHeader     = 0
+// CHECK:   Characteristics          = 0x0
+// CHECK:   Sections                 = [
+// CHECK:     1 = {
+// CHECK:       Name                     = .text
+// CHECK:       VirtualSize              = 0
+// CHECK:       VirtualAddress           = 0
+// CHECK:       SizeOfRawData            = {{[0-9]+}}
+// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToRelocations     = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToLineNumbers     = 0x0
+// CHECK:       NumberOfRelocations      = 2
+// CHECK:       NumberOfLineNumbers      = 0
+// CHECK:       Charateristics           = 0x60500020
+// CHECK:         IMAGE_SCN_CNT_CODE
+// CHECK:         IMAGE_SCN_ALIGN_16BYTES
+// CHECK:         IMAGE_SCN_MEM_EXECUTE
+// CHECK:         IMAGE_SCN_MEM_READ
+// CHECK:       SectionData              =
+// CHECK:       Relocations              = [
+// CHECK:         0 = {
+// CHECK:           VirtualAddress           = 0x{{[0-9A-F]+}}
+// CHECK:           SymbolTableIndex         = 2
+// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
+// CHECK:           SymbolName               = .data
+// CHECK:         }
+// CHECK:         1 = {
+// CHECK:           VirtualAddress           = 0x{{[0-9A-F]+}}
+// CHECK:           SymbolTableIndex         = 5
+// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
+// CHECK:           SymbolName               = _printf
+// CHECK:         }
+// CHECK:       ]
+// CHECK:     }
+// CHECK:     2 = {
+// CHECK:       Name                     = .data
+// CHECK:       VirtualSize              = 0
+// CHECK:       VirtualAddress           = 0
+// CHECK:       SizeOfRawData            = {{[0-9]+}}
+// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToRelocations     = 0x0
+// CHECK:       PointerToLineNumbers     = 0x0
+// CHECK:       NumberOfRelocations      = 0
+// CHECK:       NumberOfLineNumbers      = 0
+// CHECK:       Charateristics           = 0xC0300040
+// CHECK:         IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK:         IMAGE_SCN_ALIGN_4BYTES
+// CHECK:         IMAGE_SCN_MEM_READ
+// CHECK:         IMAGE_SCN_MEM_WRITE
+// CHECK:       SectionData              =
+// CHECK:         48 65 6C 6C 6F 20 57 6F - 72 6C 64 00             |Hello World.|
+// CHECK:       Relocations              = None
+// CHECK:     }
+// CHECK:   ]
+// CHECK:   Symbols                  = [
+// CHECK:     0 = {
+// CHECK:       Name                     = .text
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 1
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
+// CHECK:       NumberOfAuxSymbols       = 1
+// CHECK:       AuxillaryData            =
+// CHECK:         15 00 00 00 02 00 00 00 - 00 00 00 00 01 00 00 00 |................|
+// CHECK:         00 00                                             |..|
+// CHECK:     }
+// CHECK:     2 = {
+// CHECK:       Name                     = .data
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 2
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
+// CHECK:       NumberOfAuxSymbols       = 1
+// CHECK:       AuxillaryData            =
+// CHECK:         0C 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |................|
+// CHECK:         00 00                                             |..|
+// CHECK:     }
+// CHECK:     4 = {
+// CHECK:       Name                     = _main
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 1
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_FUNCTION (2)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK:       NumberOfAuxSymbols       = 0
+// CHECK:       AuxillaryData            =
+// CHECK:     }
+// CHECK:     5 = {
+// CHECK:       Name                     = _printf
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 0
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK:       NumberOfAuxSymbols       = 0
+// CHECK:       AuxillaryData            =
+// CHECK:     }
+// CHECK:   ]
+// CHECK: }
diff --git a/final/test/MC/COFF/bss.s b/final/test/MC/COFF/bss.s
new file mode 100644
index 00000000000..f44225b5a33
--- /dev/null
+++ b/final/test/MC/COFF/bss.s
@@ -0,0 +1,15 @@
+// The purpose of this test is to verify that bss sections are emited correctly.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+
+    .bss
+    .globl _g0
+    .align 4
+_g0:
+    .long 0
+
+// CHECK:      Name           = .bss
+// CHECK-NEXT: VirtualSize    = 0
+// CHECK-NEXT: VirtualAddress = 0
+// CHECK-NEXT: SizeOfRawData  = 4
diff --git a/final/test/MC/COFF/dg.exp b/final/test/MC/COFF/dg.exp
new file mode 100644
index 00000000000..d46d700975e
--- /dev/null
+++ b/final/test/MC/COFF/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,s}]]
+}
diff --git a/final/test/MC/COFF/module-asm.ll b/final/test/MC/COFF/module-asm.ll
new file mode 100644
index 00000000000..9c6d00d2f50
--- /dev/null
+++ b/final/test/MC/COFF/module-asm.ll
@@ -0,0 +1,26 @@
+; The purpose of this test is to verify that various module level assembly
+; constructs work.
+
+; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o - | coff-dump.py | FileCheck %s
+; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o - | coff-dump.py | FileCheck %s
+
+module asm ".text"
+module asm "_foo:"
+module asm "  ret"
+
+; CHECK:            Name                     = .text
+; CHECK-NEXT:       VirtualSize              = 0
+; CHECK-NEXT:       VirtualAddress           = 0
+; CHECK-NEXT:       SizeOfRawData            = {{[0-9]+}}
+; CHECK-NEXT:       PointerToRawData         = 0x{{[0-9A-F]+}}
+; CHECK-NEXT:       PointerToRelocations     = 0x{{[0-9A-F]+}}
+; CHECK-NEXT:       PointerToLineNumbers     = 0x0
+; CHECK-NEXT:       NumberOfRelocations      = 0
+; CHECK-NEXT:       NumberOfLineNumbers      = 0
+; CHECK-NEXT:       Charateristics           = 0x60300020
+; CHECK-NEXT:         IMAGE_SCN_CNT_CODE
+; CHECK-NEXT:         IMAGE_SCN_ALIGN_4BYTES
+; CHECK-NEXT:         IMAGE_SCN_MEM_EXECUTE
+; CHECK-NEXT:         IMAGE_SCN_MEM_READ
+; CHECK-NEXT:       SectionData              =
+; CHECK-NEXT:         C3
diff --git a/final/test/MC/COFF/simple-fixups.s b/final/test/MC/COFF/simple-fixups.s
new file mode 100644
index 00000000000..f86f4a9ff9e
--- /dev/null
+++ b/final/test/MC/COFF/simple-fixups.s
@@ -0,0 +1,50 @@
+// The purpose of this test is to verify that we do not produce unneeded
+// relocations when symbols are in the same section and we know their offset.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+
+	.def	 _foo;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_foo
+	.align	16, 0x90
+_foo:                                   # @foo
+# BB#0:                                 # %e
+	.align	16, 0x90
+LBB0_1:                                 # %i
+                                        # =>This Inner Loop Header: Depth=1
+	jmp	LBB0_1
+
+	.def	 _bar;
+	.scl	2;
+	.type	32;
+	.endef
+	.globl	_bar
+	.align	16, 0x90
+_bar:                                   # @bar
+# BB#0:                                 # %e
+	.align	16, 0x90
+LBB1_1:                                 # %i
+                                        # =>This Inner Loop Header: Depth=1
+	jmp	LBB1_1
+
+	.def	 _baz;
+	.scl	2;
+	.type	32;
+	.endef
+	.globl	_baz
+	.align	16, 0x90
+_baz:                                   # @baz
+# BB#0:                                 # %e
+	subl	$4, %esp
+Ltmp0:
+	calll	_baz
+	addl	$4, %esp
+	ret
+
+// CHECK:     Sections = [
+// CHECK-NOT: NumberOfRelocations = {{[^0]}}
+// CHECK:     Symbols = [
diff --git a/final/test/MC/COFF/switch-relocations.ll b/final/test/MC/COFF/switch-relocations.ll
new file mode 100644
index 00000000000..faf185fb729
--- /dev/null
+++ b/final/test/MC/COFF/switch-relocations.ll
@@ -0,0 +1,37 @@
+; The purpose of this test is to see if the COFF object writer can properly
+; relax the fixups that are created for jump tables on x86-64. See PR7960.
+
+; This test case was reduced from Lua/lapi.c.
+
+; This test has yet to be converted to assembly becase llvm-mc cannot read
+; x86-64 COFF code yet.
+
+; RUN: llc -filetype=obj -mtriple i686-pc-win32 %s -o %t
+; RUN: llc -filetype=obj -mtriple x86_64-pc-win32 %s -o %t
+
+define void @lua_gc(i32 %what) nounwind {
+entry:
+  switch i32 %what, label %sw.epilog [
+    i32 0, label %sw.bb
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb
+    i32 3, label %sw.bb14
+    i32 4, label %sw.bb18
+    i32 6, label %sw.bb57
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry, %entry
+  ret void
+
+sw.bb14:                                          ; preds = %entry
+  ret void
+
+sw.bb18:                                          ; preds = %entry
+  ret void
+
+sw.bb57:                                          ; preds = %entry
+  ret void
+
+sw.epilog:                                        ; preds = %entry
+  ret void
+}
diff --git a/final/test/MC/COFF/symbol-alias.s b/final/test/MC/COFF/symbol-alias.s
new file mode 100644
index 00000000000..ede6b53c451
--- /dev/null
+++ b/final/test/MC/COFF/symbol-alias.s
@@ -0,0 +1,62 @@
+// The purpose of this test is to verify that symbol aliases
+// (@foo = alias <type> @bar) generate the correct entries in the symbol table.
+// They should be identical except for the name.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s | coff-dump.py | FileCheck %s
+
+	.def	 _foo;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_foo
+	.align	16, 0x90
+_foo:                                   # @foo
+# BB#0:                                 # %entry
+	ret
+
+	.data
+	.globl	_bar                    # @bar
+	.align	4
+_bar:
+	.long	0                       # 0x0
+
+
+	.globl	_foo_alias
+_foo_alias = _foo
+	.globl	_bar_alias
+_bar_alias = _bar
+
+// CHECK:      Name               = {{_?}}foo
+// CHECK-NEXT: Value              = [[FOO_VALUE:.*$]]
+// CHECK-NEXT: SectionNumber      = [[FOO_SECTION_NUMBER:.*$]]
+// CHECK-NEXT: SimpleType         = [[FOO_SIMPLE_TYPE:.*$]]
+// CHECK-NEXT: ComplexType        = [[FOO_COMPLEX_TYPE:.*$]]
+// CHECK-NEXT: StorageClass       = [[FOO_STORAGE_CLASS:.*$]]
+// CHECK-NEXT: NumberOfAuxSymbols = [[FOO_NUMBER_OF_AUX_SYMBOLS:.*$]]
+
+// CHECK:      Name               = {{_?}}bar
+// CHECK-NEXT: Value              = [[BAR_VALUE:.*$]]
+// CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER:.*$]]
+// CHECK-NEXT: SimpleType         = [[BAR_SIMPLE_TYPE:.*$]]
+// CHECK-NEXT: ComplexType        = [[BAR_COMPLEX_TYPE:.*$]]
+// CHECK-NEXT: StorageClass       = [[BAR_STORAGE_CLASS:.*$]]
+// CHECK-NEXT: NumberOfAuxSymbols = [[BAR_NUMBER_OF_AUX_SYMBOLS:.*$]]
+
+// CHECK:      Name               = {{_?}}foo_alias
+// CHECK-NEXT: Value              = [[FOO_VALUE]]
+// CHECK-NEXT: SectionNumber      = [[FOO_SECTION_NUMBER]]
+// CHECK-NEXT: SimpleType         = [[FOO_SIMPLE_TYPE]]
+// CHECK-NEXT: ComplexType        = [[FOO_COMPLEX_TYPE]]
+// CHECK-NEXT: StorageClass       = [[FOO_STORAGE_CLASS]]
+// CHECK-NEXT: NumberOfAuxSymbols = [[FOO_NUMBER_OF_AUX_SYMBOLS]]
+
+// CHECK:      Name               = {{_?}}bar_alias
+// CHECK-NEXT: Value              = [[BAR_VALUE]]
+// CHECK-NEXT: SectionNumber      = [[BAR_SECTION_NUMBER]]
+// CHECK-NEXT: SimpleType         = [[BAR_SIMPLE_TYPE]]
+// CHECK-NEXT: ComplexType        = [[BAR_COMPLEX_TYPE]]
+// CHECK-NEXT: StorageClass       = [[BAR_STORAGE_CLASS]]
+// CHECK-NEXT: NumberOfAuxSymbols = [[BAR_NUMBER_OF_AUX_SYMBOLS]]
+
diff --git a/final/test/MC/COFF/symbol-fragment-offset.s b/final/test/MC/COFF/symbol-fragment-offset.s
new file mode 100644
index 00000000000..c314ac20f4d
--- /dev/null
+++ b/final/test/MC/COFF/symbol-fragment-offset.s
@@ -0,0 +1,187 @@
+// The purpose of this test is to see if the COFF object writer is emitting the
+// proper relocations for multiple pieces of data in a single data fragment.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 %s | coff-dump.py | FileCheck %s
+// I WOULD RUN, BUT THIS FAILS: llvm-mc -filetype=obj -triple x86_64-pc-win32 %s
+
+.def	 _main;
+	.scl	2;
+	.type	32;
+	.endef
+	.text
+	.globl	_main
+	.align	16, 0x90
+_main:                                  # @main
+# BB#0:                                 # %entry
+	subl	$4, %esp
+	movl	$L_.str0, (%esp)
+	calll	_printf
+	movl	$L_.str1, (%esp)
+	calll	_puts
+	movl	$L_.str2, (%esp)
+	calll	_puts
+	xorl	%eax, %eax
+	addl	$4, %esp
+	ret
+
+	.data
+L_.str0:                                # @.str0
+	.asciz	 "Hello "
+
+L_.str1:                                # @.str1
+	.asciz	 "World!"
+
+	.align	16                      # @.str2
+L_.str2:
+	.asciz	 "I'm The Last Line."
+
+// CHECK: {
+// CHECK:   MachineType              = IMAGE_FILE_MACHINE_I386 (0x14C)
+// CHECK:   NumberOfSections         = 2
+// CHECK:   TimeDateStamp            = {{[0-9]+}}
+// CHECK:   PointerToSymbolTable     = 0x{{[0-9A-F]+}}
+// CHECK:   NumberOfSymbols          = 7
+// CHECK:   SizeOfOptionalHeader     = 0
+// CHECK:   Characteristics          = 0x0
+// CHECK:   Sections                 = [
+// CHECK:     1 = {
+// CHECK:       Name                     = .text
+// CHECK:       VirtualSize              = 0
+// CHECK:       VirtualAddress           = 0
+// CHECK:       SizeOfRawData            = {{[0-9]+}}
+// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToRelocations     = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToLineNumbers     = 0x0
+// CHECK:       NumberOfRelocations      = 6
+// CHECK:       NumberOfLineNumbers      = 0
+// CHECK:       Charateristics           = 0x60500020
+// CHECK:         IMAGE_SCN_CNT_CODE
+// CHECK:         IMAGE_SCN_ALIGN_16BYTES
+// CHECK:         IMAGE_SCN_MEM_EXECUTE
+// CHECK:         IMAGE_SCN_MEM_READ
+// CHECK:       SectionData              =
+// CHECK:         83 EC 04 C7 04 24 00 00 - 00 00 E8 00 00 00 00 C7 |.....$..........|
+// CHECK:         04 24 07 00 00 00 E8 00 - 00 00 00 C7 04 24 10 00 |.$...........$..|
+// CHECK:         00 00 E8 00 00 00 00 31 - C0 83 C4 04 C3 |.......1.....|
+// CHECK:       Relocations              = [
+// CHECK:         0 = {
+// CHECK:           VirtualAddress           = 0x6
+// CHECK:           SymbolTableIndex         = 2
+// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
+// CHECK:           SymbolName               = .data
+// CHECK:         }
+// CHECK:         1 = {
+// CHECK:           VirtualAddress           = 0xB
+// CHECK:           SymbolTableIndex         = 5
+// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
+// CHECK:           SymbolName               = _printf
+// CHECK:         }
+// CHECK:         2 = {
+// CHECK:           VirtualAddress           = 0x12
+// CHECK:           SymbolTableIndex         = 2
+// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
+// CHECK:           SymbolName               = .data
+// CHECK:         }
+// CHECK:         3 = {
+// CHECK:           VirtualAddress           = 0x17
+// CHECK:           SymbolTableIndex         = 6
+// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
+// CHECK:           SymbolName               = _puts
+// CHECK:         }
+// CHECK:         4 = {
+// CHECK:           VirtualAddress           = 0x1E
+// CHECK:           SymbolTableIndex         = 2
+// CHECK:           Type                     = IMAGE_REL_I386_DIR32 (6)
+// CHECK:           SymbolName               = .data
+// CHECK:         }
+// CHECK:         5 = {
+// CHECK:           VirtualAddress           = 0x23
+// CHECK:           SymbolTableIndex         = 6
+// CHECK:           Type                     = IMAGE_REL_I386_REL32 (20)
+// CHECK:           SymbolName               = _puts
+// CHECK:         }
+// CHECK:       ]
+// CHECK:     }
+// CHECK:     2 = {
+// CHECK:       Name                     = .data
+// CHECK:       VirtualSize              = 0
+// CHECK:       VirtualAddress           = 0
+// CHECK:       SizeOfRawData            = {{[0-9]+}}
+// CHECK:       PointerToRawData         = 0x{{[0-9A-F]+}}
+// CHECK:       PointerToRelocations     = 0x0
+// CHECK:       PointerToLineNumbers     = 0x0
+// CHECK:       NumberOfRelocations      = 0
+// CHECK:       NumberOfLineNumbers      = 0
+// CHECK:       Charateristics           = 0xC0500040
+// CHECK:         IMAGE_SCN_CNT_INITIALIZED_DATA
+// CHECK:         IMAGE_SCN_ALIGN_16BYTES
+// CHECK:         IMAGE_SCN_MEM_READ
+// CHECK:         IMAGE_SCN_MEM_WRITE
+// CHECK:       SectionData              =
+// CHECK:         48 65 6C 6C 6F 20 00 57 - 6F 72 6C 64 21 00 00 00 |Hello .World!...|
+// CHECK:         49 27 6D 20 54 68 65 20 - 4C 61 73 74 20 4C 69 6E |I'm The Last Lin|
+// CHECK:         65 2E 00                                          |e..|
+// CHECK:       Relocations              = None
+// CHECK:     }
+// CHECK:   ]
+// CHECK:   Symbols                  = [
+// CHECK:     0 = {
+// CHECK:       Name                     = .text
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 1
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
+// CHECK:       NumberOfAuxSymbols       = 1
+// CHECK:       AuxillaryData            =
+// CHECK:         2D 00 00 00 06 00 00 00 - 00 00 00 00 01 00 00 00 |-...............|
+// CHECK:         00 00                                             |..|
+
+// CHECK:     }
+// CHECK:     2 = {
+// CHECK:       Name                     = .data
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 2
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_STATIC (3)
+// CHECK:       NumberOfAuxSymbols       = 1
+// CHECK:       AuxillaryData            =
+// CHECK:         23 00 00 00 00 00 00 00 - 00 00 00 00 02 00 00 00 |#...............|
+// CHECK:         00 00                                             |..|
+
+// CHECK:     }
+// CHECK:     4 = {
+// CHECK:       Name                     = _main
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 1
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_FUNCTION (2)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK:       NumberOfAuxSymbols       = 0
+// CHECK:       AuxillaryData            =
+
+// CHECK:     5 = {
+// CHECK:       Name                     = _printf
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 0
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK:       NumberOfAuxSymbols       = 0
+// CHECK:       AuxillaryData            =
+
+// CHECK:     }
+// CHECK:     6 = {
+// CHECK:       Name                     = _puts
+// CHECK:       Value                    = 0
+// CHECK:       SectionNumber            = 0
+// CHECK:       SimpleType               = IMAGE_SYM_TYPE_NULL (0)
+// CHECK:       ComplexType              = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK:       StorageClass             = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK:       NumberOfAuxSymbols       = 0
+// CHECK:       AuxillaryData            =
+
+// CHECK:     }
+// CHECK:   ]
+// CHECK: }
diff --git a/final/test/MC/COFF/weak.s b/final/test/MC/COFF/weak.s
new file mode 100644
index 00000000000..a240d7152c7
--- /dev/null
+++ b/final/test/MC/COFF/weak.s
@@ -0,0 +1,51 @@
+// This tests that default-null weak symbols (a GNU extension) are created
+// properly via the .weak directive.
+
+// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 < %s | coff-dump.py | FileCheck %s
+
+    .def    _main;
+    .scl    2;
+    .type   32;
+    .endef
+    .text
+    .globl  _main
+    .align  16, 0x90
+_main:                                  # @main
+# BB#0:                                 # %entry
+    subl    $4, %esp
+    movl    $_test_weak, %eax
+    testl   %eax, %eax
+    je      LBB0_2
+# BB#1:                                 # %if.then
+    calll   _test_weak
+    movl    $1, %eax
+    addl    $4, %esp
+    ret
+LBB0_2:                                 # %return
+    xorl    %eax, %eax
+    addl    $4, %esp
+    ret
+
+    .weak   _test_weak
+
+// CHECK: Symbols = [
+
+// CHECK:      Name               = _test_weak
+// CHECK-NEXT: Value              = 0
+// CHECK-NEXT: SectionNumber      = 0
+// CHECK-NEXT: SimpleType         = IMAGE_SYM_TYPE_NULL (0)
+// CHECK-NEXT: ComplexType        = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK-NEXT: StorageClass       = IMAGE_SYM_CLASS_WEAK_EXTERNAL (105)
+// CHECK-NEXT: NumberOfAuxSymbols = 1
+// CHECK-NEXT: AuxillaryData      =
+// CHECK-NEXT: 05 00 00 00 02 00 00 00 - 00 00 00 00 00 00 00 00 |................|
+// CHECK-NEXT: 00 00                                             |..|
+
+// CHECK:      Name               = .weak._test_weak.default
+// CHECK-NEXT: Value              = 0
+// CHECK-NEXT: SectionNumber      = 65535
+// CHECK-NEXT: SimpleType         = IMAGE_SYM_TYPE_NULL (0)
+// CHECK-NEXT: ComplexType        = IMAGE_SYM_DTYPE_NULL (0)
+// CHECK-NEXT: StorageClass       = IMAGE_SYM_CLASS_EXTERNAL (2)
+// CHECK-NEXT: NumberOfAuxSymbols = 0
+// CHECK-NEXT: AuxillaryData      =
diff --git a/final/test/MC/Disassembler/ARM/arm-tests.txt b/final/test/MC/Disassembler/ARM/arm-tests.txt
new file mode 100644
index 00000000000..d707565894b
--- /dev/null
+++ b/final/test/MC/Disassembler/ARM/arm-tests.txt
@@ -0,0 +1,141 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 | FileCheck %s
+
+# CHECK:	b	#0
+0xfe 0xff 0xff 0xea
+
+# CHECK:	bfc	r8, #0, #16
+0x1f 0x80 0xcf 0xe7
+
+# CHECK:	bfi	r8, r0, #16, #1
+0x10 0x88 0xd0 0xe7
+
+# CHECK:	mov	pc, lr
+0x0e 0xf0 0xa0 0xe1
+
+# CHECK:	cmn	r0, #1
+0x01 0x00 0x70 0xe3
+
+# CHECK:	dmb
+0x5f 0xf0 0x7f 0xf5
+
+# CHECK:	dmb	nshst
+0x56 0xf0 0x7f 0xf5
+
+# CHECK:	dsb
+0x4f 0xf0 0x7f 0xf5
+
+# CHECK:	dsb	st
+0x4e 0xf0 0x7f 0xf5
+
+# CHECK:	isb
+0x6f 0xf0 0x7f 0xf5
+
+# CHECK:	ldclvc	p5, cr15, [r8], #-0
+0x00 0xf5 0x78 0x7c
+
+# CHECK:	ldr	r0, [r2], #15
+0x0f 0x00 0x92 0xe4
+
+# CHECK:	ldrh	r0, [r2], #0
+0xb0 0x00 0xd2 0xe0
+
+# CHECK:	ldrht	r0, [r2], #15
+0xbf 0x00 0xf2 0xe0
+
+# CHECK:	ldrsbtvs	lr, [r2], -r9
+0xd9 0xe9 0x32 0x60
+
+# CHECK:	lsls	r0, r2, #31
+0x82 0x0f 0xb0 0xe1
+
+# CHECK:	mcr2	p0, #0, r2, c1, c0, #7
+0xf0 0x20 0x01 0xfe
+
+# CHECK:	movt	r8, #65535
+0xff 0x8f 0x4f 0xe3
+
+# CHECK:	mvnspl	r7, #245, 2
+0xf5 0x71 0xf0 0x53
+
+# CHECK-NOT:	orr	r7, r8, r7, rrx #0
+# CHECK:	orr	r7, r8, r7, rrx
+0x67 0x70 0x88 0xe1
+
+# CHECK:	pkhbt	r8, r9, r10, lsl #4
+0x1a 0x82 0x89 0xe6
+
+# CHECK-NOT:	pkhbtls	pc, r11, r11, lsl #0
+# CHECK:	pkhbtls	pc, r11, r11
+0x1b 0xf0 0x8b 0x96
+
+# CHECK:	pop	{r0, r2, r4, r6, r8, r10}
+0x55 0x05 0xbd 0xe8
+
+# CHECK:	push	{r0, r2, r4, r6, r8, r10}
+0x55 0x05 0x2d 0xe9
+
+# CHECK:	qsax	r8, r9, r10
+0x5a 0x8f 0x29 0xe6
+
+# CHECK:	rfedb	r0!
+0x00 0x0a 0x30 0xf9
+
+# CHECK-NOT:	rsbeq	r0, r2, r0, lsl #0
+# CHECK:	rsbeq	r0, r2, r0
+0x00 0x00 0x62 0x00
+
+# CHECK-NOT:	rscseq	r0, r0, r1, lsl #0
+# CHECK:	rscseq	r0, r0, r1
+0x01 0x00 0xf0 0x00
+
+# CHECK:	sbcs	r0, pc, #1
+0x01 0x00 0xdf 0xe2
+
+# CHECK:	sbfx	r0, r1, #0, #8
+0x51 0x00 0xa7 0xe7
+
+# CHECK:	ssat	r8, #1, r10, lsl #8
+0x1a 0x84 0xa0 0xe6
+
+# CHECK-NOT:	ssatmi	r0, #17, r12, lsl #0
+# CHECK:	ssatmi	r0, #17, r12
+0x1c 0x00 0xb0 0x46
+
+# CHECK:	stmdb	r10!, {r4, r5, r6, r7, lr}
+0xf0 0x40 0x2a 0xe9
+
+# CHECK:	teq	r0, #31
+0x1f 0x00 0x30 0xe3
+
+# CHECK:	ubfx	r0, r0, #16, #1
+0x50 0x08 0xe0 0xe7
+
+# CHECK:	usat	r8, #0, r10, asr #32
+0x5a 0x80 0xe0 0xe6
+
+# CHECK:        setend be
+0x00 0x02 0x01 0xf1
+
+# CHECK:        setend le
+0x00 0x00 0x01 0xf1
+
+# CHECK: cpsie  aif
+0xc0 0x01 0x08 0xf1
+
+# CHECK: cps  #15
+0x0f 0x00 0x02 0xf1
+
+# CHECK: cpsie if, #10
+0xca 0x00 0x0a 0xf1
+
+# CHECK: msr cpsr_fc, r0
+0x00 0xf0 0x29 0xe1
+
+# CHECK: rsbs r6, r7, r8
+0x08 0x60 0x77 0xe0
+
+# CHECK: blxeq r5
+0x35 0xff 0x2f 0x01
+
+# CHECK: bx r12
+0x1c 0xff 0x2f 0xe1
diff --git a/final/test/MC/Disassembler/ARM/dg.exp b/final/test/MC/Disassembler/ARM/dg.exp
new file mode 100644
index 00000000000..fc2f17a6fba
--- /dev/null
+++ b/final/test/MC/Disassembler/ARM/dg.exp
@@ -0,0 +1,6 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target ARM] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
+}
+
diff --git a/final/test/MC/Disassembler/ARM/neon-tests.txt b/final/test/MC/Disassembler/ARM/neon-tests.txt
new file mode 100644
index 00000000000..eb9adb7b6c2
--- /dev/null
+++ b/final/test/MC/Disassembler/ARM/neon-tests.txt
@@ -0,0 +1,61 @@
+# RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 | FileCheck %s
+
+# CHECK:	vbif	q15, q7, q0
+0x50 0xe1 0x7e 0xf3
+
+# CHECK:	vcvt.f32.s32	q15, q0, #1
+0x50 0xee 0xff 0xf2
+
+# CHECK:	vdup.32	q3, d1[0]
+0x41 0x6c 0xb4 0xf3
+
+# CHECK:	vld1.8	{d17, d18}, [r6], r5
+0x05 0x1a 0x66 0xf4
+
+# CHECK:        vld1.8  {d17, d18, d19}, [r6], r5
+0x05 0x16 0x66 0xf4
+
+# CHECK:	vld4.8	{d0, d1, d2, d3}, [r2], r7
+0x07 0x00 0x22 0xf4
+
+# CHECK:	vld4.8	{d4, d6, d8, d10}, [r2]
+0x0f 0x41 0x22 0xf4
+
+# CHECK:	vmov	d0, d15
+0x1f 0x01 0x2f 0xf2
+
+# CHECK:	vmov.i64	q6, #0xFF00FF00FF
+0x75 0xce 0x81 0xf2
+
+# CHECK:	vmvn.i32	d0, #0x0
+0x30 0x00 0x80 0xf2
+
+# CHECK:	vmul.f32	d0, d0, d6
+0x16 0x0d 0x00 0xf3
+
+# CHECK:	vneg.f32	q0, q0
+0xc0 0x07 0xb9 0xf3
+
+# CHECK:	vqrdmulh.s32	d0, d0, d3[1]
+0x63 0x0d 0xa0 0xf2
+
+# CHECK:	vrshr.s32	d0, d0, #16
+0x10 0x02 0xb0 0xf2
+
+# CHECK:	vshll.i16	q3, d1, #16
+0x01 0x63 0xb6 0xf3
+
+# CHECK:	vsri.32	q15, q0, #1
+0x50 0xe4 0xff 0xf3
+
+# CHECK:	vtbx.8	d18, {d4, d5, d6}, d7
+0x47 0x2a 0xf4 0xf3
+
+# CHECK: vmov.f32 s0, #5.000000e-01
+0x00 0x0a 0xb6 0xee
+
+# CHECK: vmov.f32 s0, #1.328125e-01
+0x01 0x0a 0xb4 0xee
+
+# CHECK: vmov.f64 d0, #5.000000e-01
+0x00 0x0b 0xb6 0xee
diff --git a/final/test/MC/Disassembler/ARM/thumb-tests.txt b/final/test/MC/Disassembler/ARM/thumb-tests.txt
new file mode 100644
index 00000000000..36f74381a5e
--- /dev/null
+++ b/final/test/MC/Disassembler/ARM/thumb-tests.txt
@@ -0,0 +1,123 @@
+# RUN: llvm-mc --disassemble %s -triple=thumb-apple-darwin9 | FileCheck %s
+
+# CHECK:	add	r5, sp, #68
+0x11 0xad
+
+# CHECK:	adcs	r0, r0, #1
+0x50 0xf1 0x01 0x00
+
+# CHECK:	b	#34
+0x0f 0xe0
+
+# CHECK:	b.w	#-12
+0xff 0xf7 0xf8 0xaf
+
+# CHECK:	bfi	r2, r10, #0, #1
+0x6a 0xf3 0x00 0x02
+
+# CHECK:	cbnz	r7, #20
+0x57 0xb9
+
+# CHECK:	cmp	r3, r4
+0xa3 0x42
+
+# CHECK:	cmn.w	r0, #31
+0x10 0xf1 0x1f 0x0f
+
+# CHECK:	ldmia	r0!, {r1}
+0x02 0xc8
+
+# CHECK:	ldrb.w	r8, #-24
+0x1f 0xf8 0x18 0x80
+
+# CHECK:	ldrd	r0, r1, [r7, #64]!
+0xf7 0xe9 0x10 0x01
+
+# CHECK:	lsls.w	r0, pc, #1
+0x5f 0xea 0x4f 0x00
+
+# CHECK:	mov	r11, r7
+0xbb 0x46
+
+# CHECK:	pkhtb	r2, r4, r6, asr #16
+0xc4 0xea 0x26 0x42
+
+# CHECK-NOT:	pkhbt	r2, r4, r6, lsl #0
+# CHECK:	pkhbt	r2, r4, r6
+0xc4 0xea 0x06 0x02
+
+# CHECK:	pop.w	{r2, r4, r6, r8, r10, r12}
+0xbd 0xe8 0x54 0x15
+
+# CHECK:	push.w	{r2, r4, r6, r8, r10, r12}
+0x2d 0xe9 0x54 0x15
+
+# CHECK:	rsbs	r0, r0, #0
+0x40 0x42
+
+# CHECK-NOT:	rsb	r0, r2, r0, lsl #0
+# CHECK:	rsb	r0, r2, r0
+0xc2 0xeb 0x00 0x00
+
+# CHECK-NOT:	ssat	r0, #17, r12, lsl #0
+# CHECK:	ssat	r0, #17, r12
+0x0c 0xf3 0x10 0x00
+
+# CHECK:	strd	r0, r1, [r7, #64]
+0xc7 0xe9 0x10 0x01
+
+# CHECK:	sub	sp, #60
+0x8f 0xb0
+
+# CHECK:	subw	r0, pc, #1
+0xaf 0xf2 0x01 0x00
+
+# CHECK:	subw	r0, sp, #835
+0xad 0xf2 0x43 0x30
+
+# CHECK:	uqadd16	r3, r4, r5
+0x94 0xfa 0x55 0xf3
+
+# CHECK:	usada8	r5, r4, r3, r2
+0x74 0xfb 0x03 0x25
+
+# CHECK:	uxtab16	r1, r2, r3, ror #8
+0x32 0xfa 0x93 0xf1
+
+# IT block begin
+# CHECK:	ittte	eq
+0x03 0xbf
+
+# CHECK:	moveq	r3, #3
+0x03 0x23
+
+# CHECK:	asreq	r1, r0, #5
+0x41 0x11
+
+# CHECK:	lsleq	r1, r0, #28
+0x01 0x07
+
+# CHECK:	stmiane	r0!, {r1, r2, r3}
+0x0e 0xc0
+
+# IT block end
+# CHECK:	rsbs	r1, r2, #0
+0x51 0x42
+
+# CHECK: cpsid.w  f
+0xaf 0xf3 0x20 0x86
+
+# CHECK: cps  #15
+0xaf 0xf3 0x0f 0x81
+
+# CHECK: cpsie.w  if, #10
+0xaf 0xf3 0x6a 0x85
+
+# CHECK: cpsie aif
+0x67 0xb6
+
+# CHECK: msr cpsr_fc, r0
+0x80 0xf3 0x00 0x89
+
+# CHECK: blx	#0
+0xff 0xf7 0xfe 0xef
diff --git a/final/test/MC/Disassembler/MBlaze/dg.exp b/final/test/MC/Disassembler/MBlaze/dg.exp
new file mode 100644
index 00000000000..0be99a34235
--- /dev/null
+++ b/final/test/MC/Disassembler/MBlaze/dg.exp
@@ -0,0 +1,6 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target MBlaze] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
+}
+
diff --git a/final/test/MC/Disassembler/MBlaze/mblaze_branch.txt b/final/test/MC/Disassembler/MBlaze/mblaze_branch.txt
new file mode 100644
index 00000000000..5f4051712fa
--- /dev/null
+++ b/final/test/MC/Disassembler/MBlaze/mblaze_branch.txt
@@ -0,0 +1,119 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# Branch instructions
+################################################################################
+
+# CHECK:    beq     r2, r3
+0x9c 0x02 0x18 0x00
+
+# CHECK:    bge     r2, r3
+0x9c 0xa2 0x18 0x00
+
+# CHECK:    bgt     r2, r3
+0x9c 0x82 0x18 0x00
+
+# CHECK:    ble     r2, r3
+0x9c 0x62 0x18 0x00
+
+# CHECK:    blt     r2, r3
+0x9c 0x42 0x18 0x00
+
+# CHECK:    bne     r2, r3
+0x9c 0x22 0x18 0x00
+
+# CHECK:    beqd    r2, r3
+0x9e 0x02 0x18 0x00
+
+# CHECK:    bged    r2, r3
+0x9e 0xa2 0x18 0x00
+
+# CHECK:    bgtd    r2, r3
+0x9e 0x82 0x18 0x00
+
+# CHECK:    bled    r2, r3
+0x9e 0x62 0x18 0x00
+
+# CHECK:    bltd    r2, r3
+0x9e 0x42 0x18 0x00
+
+# CHECK:    bned    r2, r3
+0x9e 0x22 0x18 0x00
+
+# CHECK:    br      r3
+0x98 0x00 0x18 0x00
+
+# CHECK:    bra     r3
+0x98 0x08 0x18 0x00
+
+# CHECK:    brd     r3
+0x98 0x10 0x18 0x00
+
+# CHECK:    brad    r3
+0x98 0x18 0x18 0x00
+
+# CHECK:    brld    r15, r3
+0x99 0xf4 0x18 0x00
+
+# CHECK:    brald   r15, r3
+0x99 0xfc 0x18 0x00
+
+# CHECK:    brk     r15, r3
+0x99 0xec 0x18 0x00
+
+# CHECK:    beqi    r2, 0
+0xbc 0x02 0x00 0x00
+
+# CHECK:    bgei    r2, 0
+0xbc 0xa2 0x00 0x00
+
+# CHECK:    bgti    r2, 0
+0xbc 0x82 0x00 0x00
+
+ # CHECK:   blei    r2, 0
+0xbc 0x62 0x00 0x00
+
+# CHECK:    blti    r2, 0
+0xbc 0x42 0x00 0x00
+
+# CHECK:    bnei    r2, 0
+0xbc 0x22 0x00 0x00
+
+# CHECK:    beqid   r2, 0
+0xbe 0x02 0x00 0x00
+
+# CHECK:    bgeid   r2, 0
+0xbe 0xa2 0x00 0x00
+
+# CHECK:    bgtid   r2, 0
+0xbe 0x82 0x00 0x00
+
+# CHECK:    bleid   r2, 0
+0xbe 0x62 0x00 0x00
+
+# CHECK:    bltid   r2, 0
+0xbe 0x42 0x00 0x00
+
+# CHECK:    bneid   r2, 0
+0xbe 0x22 0x00 0x00
+
+# CHECK:    bri     0
+0xb8 0x00 0x00 0x00
+
+# CHECK:    brai    0
+0xb8 0x08 0x00 0x00
+
+# CHECK:    brid    0
+0xb8 0x10 0x00 0x00
+
+# CHECK:    braid   0
+0xb8 0x18 0x00 0x00
+
+# CHECK:    brlid   r15, 0
+0xb9 0xf4 0x00 0x00
+
+# CHECK:    bralid  r15, 0
+0xb9 0xfc 0x00 0x00
+
+# CHECK:    brki    r15, 0
+0xb9 0xec 0x00 0x00
diff --git a/final/test/MC/Disassembler/MBlaze/mblaze_fpu.txt b/final/test/MC/Disassembler/MBlaze/mblaze_fpu.txt
new file mode 100644
index 00000000000..0fb7abcdea5
--- /dev/null
+++ b/final/test/MC/Disassembler/MBlaze/mblaze_fpu.txt
@@ -0,0 +1,47 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# FPU instructions
+################################################################################
+
+# CHECK:    fadd         r0, r1, r2
+0x58 0x01 0x10 0x00
+
+# CHECK:    frsub        r0, r1, r2
+0x58 0x01 0x10 0x80
+
+# CHECK:    fmul         r0, r1, r2
+0x58 0x01 0x11 0x00
+
+# CHECK:    fdiv         r0, r1, r2
+0x58 0x01 0x11 0x80
+
+# CHECK:    fsqrt        r0, r1
+0x58 0x01 0x03 0x80
+
+# CHECK:    fint         r0, r1
+0x58 0x01 0x03 0x00
+
+# CHECK:    flt          r0, r1
+0x58 0x01 0x02 0x80
+
+# CHECK:    fcmp.un     r0, r1, r2
+0x58 0x01 0x12 0x00
+
+# CHECK:    fcmp.lt     r0, r1, r2
+0x58 0x01 0x12 0x10
+
+# CHECK:    fcmp.eq     r0, r1, r2
+0x58 0x01 0x12 0x20
+
+# CHECK:    fcmp.le     r0, r1, r2
+0x58 0x01 0x12 0x30
+
+# CHECK:    fcmp.gt     r0, r1, r2
+0x58 0x01 0x12 0x40
+
+# CHECK:    fcmp.ne     r0, r1, r2
+0x58 0x01 0x12 0x50
+
+# CHECK:    fcmp.ge     r0, r1, r2
+0x58 0x01 0x12 0x60
diff --git a/final/test/MC/Disassembler/MBlaze/mblaze_fsl.txt b/final/test/MC/Disassembler/MBlaze/mblaze_fsl.txt
new file mode 100644
index 00000000000..a12b3b486e6
--- /dev/null
+++ b/final/test/MC/Disassembler/MBlaze/mblaze_fsl.txt
@@ -0,0 +1,338 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# FSL instructions
+################################################################################
+
+# CHECK:    get         r0, rfsl0
+0x6c 0x00 0x00 0x00
+
+# CHECK:    nget        r0, rfsl0
+0x6c 0x00 0x40 0x00
+
+# CHECK:    cget        r0, rfsl0
+0x6c 0x00 0x20 0x00
+
+# CHECK:    ncget       r0, rfsl0
+0x6c 0x00 0x60 0x00
+
+# CHECK:    tget        r0, rfsl0
+0x6c 0x00 0x10 0x00
+
+# CHECK:    tnget       r0, rfsl0
+0x6c 0x00 0x50 0x00
+
+# CHECK:    tcget       r0, rfsl0
+0x6c 0x00 0x30 0x00
+
+# CHECK:    tncget      r0, rfsl0
+0x6c 0x00 0x70 0x00
+
+# CHECK:    aget        r0, rfsl0
+0x6c 0x00 0x08 0x00
+
+# CHECK:    naget       r0, rfsl0
+0x6c 0x00 0x48 0x00
+
+# CHECK:    caget       r0, rfsl0
+0x6c 0x00 0x28 0x00
+
+# CHECK:    ncaget      r0, rfsl0
+0x6c 0x00 0x68 0x00
+
+# CHECK:    taget       r0, rfsl0
+0x6c 0x00 0x18 0x00
+
+# CHECK:    tnaget      r0, rfsl0
+0x6c 0x00 0x58 0x00
+
+# CHECK:    tcaget      r0, rfsl0
+0x6c 0x00 0x38 0x00
+
+# CHECK:    tncaget     r0, rfsl0
+0x6c 0x00 0x78 0x00
+
+# CHECK:    eget        r0, rfsl0
+0x6c 0x00 0x04 0x00
+
+# CHECK:    neget       r0, rfsl0
+0x6c 0x00 0x44 0x00
+
+# CHECK:    ecget       r0, rfsl0
+0x6c 0x00 0x24 0x00
+
+# CHECK:    necget      r0, rfsl0
+0x6c 0x00 0x64 0x00
+
+# CHECK:    teget       r0, rfsl0
+0x6c 0x00 0x14 0x00
+
+# CHECK:    tneget      r0, rfsl0
+0x6c 0x00 0x54 0x00
+
+# CHECK:    tecget      r0, rfsl0
+0x6c 0x00 0x34 0x00
+
+# CHECK:    tnecget     r0, rfsl0
+0x6c 0x00 0x74 0x00
+
+# CHECK:    eaget       r0, rfsl0
+0x6c 0x00 0x0c 0x00
+
+# CHECK:    neaget      r0, rfsl0
+0x6c 0x00 0x4c 0x00
+
+# CHECK:    ecaget      r0, rfsl0
+0x6c 0x00 0x2c 0x00
+
+# CHECK:    necaget     r0, rfsl0
+0x6c 0x00 0x6c 0x00
+
+# CHECK:    teaget      r0, rfsl0
+0x6c 0x00 0x1c 0x00
+
+# CHECK:    tneaget     r0, rfsl0
+0x6c 0x00 0x5c 0x00
+
+# CHECK:    tecaget     r0, rfsl0
+0x6c 0x00 0x3c 0x00
+
+# CHECK:    tnecaget    r0, rfsl0
+0x6c 0x00 0x7c 0x00
+
+# CHECK:    getd        r0, r1
+0x4c 0x00 0x08 0x00
+
+# CHECK:    ngetd       r0, r1
+0x4c 0x00 0x0a 0x00
+
+# CHECK:    cgetd       r0, r1
+0x4c 0x00 0x09 0x00
+
+# CHECK:    ncgetd      r0, r1
+0x4c 0x00 0x0b 0x00
+
+# CHECK:    tgetd       r0, r1
+0x4c 0x00 0x08 0x80
+
+# CHECK:    tngetd      r0, r1
+0x4c 0x00 0x0a 0x80
+
+# CHECK:    tcgetd      r0, r1
+0x4c 0x00 0x09 0x80
+
+# CHECK:    tncgetd     r0, r1
+0x4c 0x00 0x0b 0x80
+
+# CHECK:    agetd       r0, r1
+0x4c 0x00 0x08 0x40
+
+# CHECK:    nagetd      r0, r1
+0x4c 0x00 0x0a 0x40
+
+# CHECK:    cagetd     r0, r1
+0x4c 0x00 0x09 0x40
+
+# CHECK:    ncagetd     r0, r1
+0x4c 0x00 0x0b 0x40
+
+# CHECK:    tagetd      r0, r1
+0x4c 0x00 0x08 0xc0
+
+# CHECK:    tnagetd     r0, r1
+0x4c 0x00 0x0a 0xc0
+
+# CHECK:    tcagetd     r0, r1
+0x4c 0x00 0x09 0xc0
+
+# CHECK:    tncagetd    r0, r1
+0x4c 0x00 0x0b 0xc0
+
+# CHECK:    egetd       r0, r1
+0x4c 0x00 0x08 0x20
+
+# CHECK:    negetd      r0, r1
+0x4c 0x00 0x0a 0x20
+
+# CHECK:    ecgetd      r0, r1
+0x4c 0x00 0x09 0x20
+
+# CHECK:    necgetd     r0, r1
+0x4c 0x00 0x0b 0x20
+
+# CHECK:    tegetd      r0, r1
+0x4c 0x00 0x08 0xa0
+
+# CHECK:    tnegetd     r0, r1
+0x4c 0x00 0x0a 0xa0
+
+# CHECK:    tecgetd     r0, r1
+0x4c 0x00 0x09 0xa0
+
+# CHECK:    tnecgetd    r0, r1
+0x4c 0x00 0x0b 0xa0
+
+# CHECK:    eagetd      r0, r1
+0x4c 0x00 0x08 0x60
+
+# CHECK:    neagetd     r0, r1
+0x4c 0x00 0x0a 0x60
+
+# CHECK:    ecagetd     r0, r1
+0x4c 0x00 0x09 0x60
+
+# CHECK:    necagetd    r0, r1
+0x4c 0x00 0x0b 0x60
+
+# CHECK:    teagetd     r0, r1
+0x4c 0x00 0x08 0xe0
+
+# CHECK:    tneagetd    r0, r1
+0x4c 0x00 0x0a 0xe0
+
+# CHECK:    tecagetd    r0, r1
+0x4c 0x00 0x09 0xe0
+
+# CHECK:    tnecagetd   r0, r1
+0x4c 0x00 0x0b 0xe0
+
+# CHECK:    put         r0, rfsl0
+0x6c 0x00 0x80 0x00
+
+# CHECK:    aput        r0, rfsl0
+0x6c 0x00 0x88 0x00
+
+# CHECK:    cput        r0, rfsl0
+0x6c 0x00 0xa0 0x00
+
+# CHECK:    caput       r0, rfsl0
+0x6c 0x00 0xa8 0x00
+
+# CHECK:    nput        r0, rfsl0
+0x6c 0x00 0xc0 0x00
+
+# CHECK:    naput       r0, rfsl0
+0x6c 0x00 0xc8 0x00
+
+# CHECK:    ncput       r0, rfsl0
+0x6c 0x00 0xe0 0x00
+
+# CHECK:    ncaput      r0, rfsl0
+0x6c 0x00 0xe8 0x00
+
+# CHECK:    tput        rfsl0
+0x6c 0x00 0x90 0x00
+
+# CHECK:    taput       rfsl0
+0x6c 0x00 0x98 0x00
+
+# CHECK:    tcput       rfsl0
+0x6c 0x00 0xb0 0x00
+
+# CHECK:    tcaput      rfsl0
+0x6c 0x00 0xb8 0x00
+
+# CHECK:    tnput       rfsl0
+0x6c 0x00 0xd0 0x00
+
+# CHECK:    tnaput      rfsl0
+0x6c 0x00 0xd8 0x00
+
+# CHECK:    tncput      rfsl0
+0x6c 0x00 0xf0 0x00
+
+# CHECK:    tncaput     rfsl0
+0x6c 0x00 0xf8 0x00
+
+# CHECK:    putd        r0, r1
+0x4c 0x00 0x0c 0x00
+
+# CHECK:    aputd       r0, r1
+0x4c 0x00 0x0c 0x40
+
+# CHECK:    cputd       r0, r1
+0x4c 0x00 0x0d 0x00
+
+# CHECK:    caputd      r0, r1
+0x4c 0x00 0x0d 0x40
+
+# CHECK:    nputd       r0, r1
+0x4c 0x00 0x0e 0x00
+
+# CHECK:    naputd      r0, r1
+0x4c 0x00 0x0e 0x40
+
+# CHECK:    ncputd      r0, r1
+0x4c 0x00 0x0f 0x00
+
+# CHECK:    ncaputd     r0, r1
+0x4c 0x00 0x0f 0x40
+
+# CHECK:    tputd       r1
+0x4c 0x00 0x0c 0x80
+
+# CHECK:    taputd      r1
+0x4c 0x00 0x0c 0xc0
+
+# CHECK:    tcputd      r1
+0x4c 0x00 0x0d 0x80
+
+# CHECK:    tcaputd     r1
+0x4c 0x00 0x0d 0xc0
+
+# CHECK:    tnputd      r1
+0x4c 0x00 0x0e 0x80
+
+# CHECK:    tnaputd     r1
+0x4c 0x00 0x0e 0xc0
+
+# CHECK:    tncputd     r1
+0x4c 0x00 0x0f 0x80
+
+# CHECK:    tncaputd    r1
+0x4c 0x00 0x0f 0xc0
+
+# CHECK:    get     r0, rfsl1
+0x6c 0x00 0x00 0x01
+
+# CHECK:    get     r0, rfsl2
+0x6c 0x00 0x00 0x02
+
+# CHECK:    get     r0, rfsl3
+0x6c 0x00 0x00 0x03
+
+# CHECK:    get     r0, rfsl4
+0x6c 0x00 0x00 0x04
+
+# CHECK:    get     r0, rfsl5
+0x6c 0x00 0x00 0x05
+
+# CHECK:    get     r0, rfsl6
+0x6c 0x00 0x00 0x06
+
+# CHECK:    get     r0, rfsl7
+0x6c 0x00 0x00 0x07
+
+# CHECK:    get     r0, rfsl8
+0x6c 0x00 0x00 0x08
+
+# CHECK:    get     r0, rfsl9
+0x6c 0x00 0x00 0x09
+
+# CHECK:    get     r0, rfsl10
+0x6c 0x00 0x00 0x0a
+
+# CHECK:    get     r0, rfsl11
+0x6c 0x00 0x00 0x0b
+
+# CHECK:    get     r0, rfsl12
+0x6c 0x00 0x00 0x0c
+
+# CHECK:    get     r0, rfsl13
+0x6c 0x00 0x00 0x0d
+
+# CHECK:    get     r0, rfsl14
+0x6c 0x00 0x00 0x0e
+
+# CHECK:    get     r0, rfsl15
+0x6c 0x00 0x00 0x0f
diff --git a/final/test/MC/Disassembler/MBlaze/mblaze_imm.txt b/final/test/MC/Disassembler/MBlaze/mblaze_imm.txt
new file mode 100644
index 00000000000..3833ea85d77
--- /dev/null
+++ b/final/test/MC/Disassembler/MBlaze/mblaze_imm.txt
@@ -0,0 +1,121 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# IMM instruction processing
+################################################################################
+
+# CHECK:    addi    r0, r0, 0
+0x20 0x00 0x00 0x00
+
+# CHECK:    addi    r0, r0, 1
+0x20 0x00 0x00 0x01
+
+# CHECK:    addi    r0, r0, 2
+0x20 0x00 0x00 0x02
+
+# CHECK:    addi    r0, r0, 4
+0x20 0x00 0x00 0x04
+
+# CHECK:    addi    r0, r0, 8
+0x20 0x00 0x00 0x08
+
+# CHECK:    addi    r0, r0, 16
+0x20 0x00 0x00 0x10
+
+# CHECK:    addi    r0, r0, 32
+0x20 0x00 0x00 0x20
+
+# CHECK:    addi    r0, r0, 64
+0x20 0x00 0x00 0x40
+
+# CHECK:    addi    r0, r0, 128
+0x20 0x00 0x00 0x80
+
+# CHECK:    addi    r0, r0, 256
+0x20 0x00 0x01 0x00
+
+# CHECK:    addi    r0, r0, 512
+0x20 0x00 0x02 0x00
+
+# CHECK:    addi    r0, r0, 1024
+0x20 0x00 0x04 0x00
+
+# CHECK:    addi    r0, r0, 2048
+0x20 0x00 0x08 0x00
+
+# CHECK:    addi    r0, r0, 4096
+0x20 0x00 0x10 0x00
+
+# CHECK:    addi    r0, r0, 8192
+0x20 0x00 0x20 0x00
+
+# CHECK:    addi    r0, r0, 16384
+0x20 0x00 0x40 0x00
+
+# CHECK:    imm     0
+# CHECK:    addi    r0, r0, -32768
+0xb0 0x00 0x00 0x00 0x20 0x00 0x80 0x00
+
+# CHECK:    imm     1
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x00 0x01 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     2
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x00 0x02 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     4
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x00 0x04 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     8
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x00 0x08 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     16
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x00 0x10 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     32
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x00 0x20 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     64
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x00 0x40 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     128
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x00 0x80 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     256
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x01 0x00 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     512
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x02 0x00 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     1024
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x04 0x00 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     2048
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x08 0x00 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     4096
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x10 0x00 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     8192
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x20 0x00 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     16384
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x40 0x00 0x20 0x00 0x00 0x00
+
+# CHECK:    imm     -32768
+# CHECK:    addi    r0, r0, 0
+0xb0 0x00 0x80 0x00 0x20 0x00 0x00 0x00
diff --git a/final/test/MC/Disassembler/MBlaze/mblaze_memory.txt b/final/test/MC/Disassembler/MBlaze/mblaze_memory.txt
new file mode 100644
index 00000000000..584d61c47dc
--- /dev/null
+++ b/final/test/MC/Disassembler/MBlaze/mblaze_memory.txt
@@ -0,0 +1,65 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# Memory instructions
+################################################################################
+
+# CHECK:    lbu     r1, r2, r3
+0xc0 0x22 0x18 0x00
+
+# CHECK:    lbur    r1, r2, r3
+0xc0 0x22 0x1a 0x00
+
+# CHECK:    lbui    r1, r2, 28
+0xe0 0x22 0x00 0x1c
+
+# CHECK:    lhu     r1, r2, r3
+0xc4 0x22 0x18 0x00
+
+# CHECK:    lhur    r1, r2, r3
+0xc4 0x22 0x1a 0x00
+
+# CHECK:    lhui    r1, r2, 28
+0xe4 0x22 0x00 0x1c
+
+# CHECK:    lw      r1, r2, r3
+0xc8 0x22 0x18 0x00
+
+# CHECK:    lwr    r1, r2, r3
+0xc8 0x22 0x1a 0x00
+
+# CHECK:    lwi     r1, r2, 28
+0xe8 0x22 0x00 0x1c
+
+# CHECK:    lwx      r1, r2, r3
+0xc8 0x22 0x1c 0x00
+
+# CHECK:    sb      r1, r2, r3
+0xd0 0x22 0x18 0x00
+
+# CHECK:    sbr     r1, r2, r3
+0xd0 0x22 0x1a 0x00
+
+# CHECK:    sbi     r1, r2, 28
+0xf0 0x22 0x00 0x1c
+
+# CHECK:    sh      r1, r2, r3
+0xd4 0x22 0x18 0x00
+
+# CHECK:    shr     r1, r2, r3
+0xd4 0x22 0x1a 0x00
+
+# CHECK:    shi     r1, r2, 28
+0xf4 0x22 0x00 0x1c
+
+# CHECK:    sw      r1, r2, r3
+0xd8 0x22 0x18 0x00
+
+# CHECK:    swr    r1, r2, r3
+0xd8 0x22 0x1a 0x00
+
+# CHECK:    swi     r1, r2, 28
+0xf8 0x22 0x00 0x1c
+
+# CHECK:    swx      r1, r2, r3
+0xd8 0x22 0x1c 0x00
diff --git a/final/test/MC/Disassembler/MBlaze/mblaze_operands.txt b/final/test/MC/Disassembler/MBlaze/mblaze_operands.txt
new file mode 100644
index 00000000000..f0304b12bd7
--- /dev/null
+++ b/final/test/MC/Disassembler/MBlaze/mblaze_operands.txt
@@ -0,0 +1,197 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# Operands disassembly
+################################################################################
+
+# CHECK:    add     r0, r0, r0
+0x00 0x00 0x00 0x00
+
+# CHECK:    add     r1, r1, r1
+0x00 0x21 0x08 0x00
+
+# CHECK:    add     r2, r2, r2
+0x00 0x42 0x10 0x00
+
+# CHECK:    add     r3, r3, r3
+0x00 0x63 0x18 0x00
+
+# CHECK:    add     r4, r4, r4
+0x00 0x84 0x20 0x00
+
+# CHECK:    add     r5, r5, r5
+0x00 0xa5 0x28 0x00
+
+# CHECK:    add     r6, r6, r6
+0x00 0xc6 0x30 0x00
+
+# CHECK:    add     r7, r7, r7
+0x00 0xe7 0x38 0x00
+
+# CHECK:    add     r8, r8, r8
+0x01 0x08 0x40 0x00
+
+# CHECK:    add     r9, r9, r9
+0x01 0x29 0x48 0x00
+
+# CHECK:    add     r10, r10, r10
+0x01 0x4a 0x50 0x00
+
+# CHECK:    add     r11, r11, r11
+0x01 0x6b 0x58 0x00
+
+# CHECK:    add     r12, r12, r12
+0x01 0x8c 0x60 0x00
+
+# CHECK:    add     r13, r13, r13
+0x01 0xad 0x68 0x00
+
+# CHECK:    add     r14, r14, r14
+0x01 0xce 0x70 0x00
+
+# CHECK:    add     r15, r15, r15
+0x01 0xef 0x78 0x00
+
+# CHECK:    add     r16, r16, r16
+0x02 0x10 0x80 0x00
+
+# CHECK:    add     r17, r17, r17
+0x02 0x31 0x88 0x00
+
+# CHECK:    add     r18, r18, r18
+0x02 0x52 0x90 0x00
+
+# CHECK:    add     r19, r19, r19
+0x02 0x73 0x98 0x00
+
+# CHECK:    add     r20, r20, r20
+0x02 0x94 0xa0 0x00
+
+# CHECK:    add     r21, r21, r21
+0x02 0xb5 0xa8 0x00
+
+# CHECK:    add     r22, r22, r22
+0x02 0xd6 0xb0 0x00
+
+# CHECK:    add     r23, r23, r23
+0x02 0xf7 0xb8 0x00
+
+# CHECK:    add     r24, r24, r24
+0x03 0x18 0xc0 0x00
+
+# CHECK:    add     r25, r25, r25
+0x03 0x39 0xc8 0x00
+
+# CHECK:    add     r26, r26, r26
+0x03 0x5a 0xd0 0x00
+
+# CHECK:    add     r27, r27, r27
+0x03 0x7b 0xd8 0x00
+
+# CHECK:    add     r28, r28, r28
+0x03 0x9c 0xe0 0x00
+
+# CHECK:    add     r29, r29, r29
+0x03 0xbd 0xe8 0x00
+
+# CHECK:    add     r30, r30, r30
+0x03 0xde 0xf0 0x00
+
+# CHECK:    add     r31, r31, r31
+0x03 0xff 0xf8 0x00
+
+# CHECK:    addi    r0, r0, 0
+0x20 0x00 0x00 0x00
+
+# CHECK:    addi    r0, r0, 1
+0x20 0x00 0x00 0x01
+
+# CHECK:    addi    r0, r0, 2
+0x20 0x00 0x00 0x02
+
+# CHECK:    addi    r0, r0, 4
+0x20 0x00 0x00 0x04
+
+# CHECK:    addi    r0, r0, 8
+0x20 0x00 0x00 0x08
+
+# CHECK:    addi    r0, r0, 16
+0x20 0x00 0x00 0x10
+
+# CHECK:    addi    r0, r0, 32
+0x20 0x00 0x00 0x20
+
+# CHECK:    addi    r0, r0, 64
+0x20 0x00 0x00 0x40
+
+# CHECK:    addi    r0, r0, 128
+0x20 0x00 0x00 0x80
+
+# CHECK:    addi    r0, r0, 256
+0x20 0x00 0x01 0x00
+
+# CHECK:    addi    r0, r0, 512
+0x20 0x00 0x02 0x00
+
+# CHECK:    addi    r0, r0, 1024
+0x20 0x00 0x04 0x00
+
+# CHECK:    addi    r0, r0, 2048
+0x20 0x00 0x08 0x00
+
+# CHECK:    addi    r0, r0, 4096
+0x20 0x00 0x10 0x00
+
+# CHECK:    addi    r0, r0, 8192
+0x20 0x00 0x20 0x00
+
+# CHECK:    addi    r0, r0, 16384
+0x20 0x00 0x40 0x00
+
+# CHECK:    addi    r0, r0, -1
+0x20 0x00 0xff 0xff
+
+# CHECK:    addi    r0, r0, -2
+0x20 0x00 0xff 0xfe
+
+# CHECK:    addi    r0, r0, -4
+0x20 0x00 0xff 0xfc
+
+# CHECK:    addi    r0, r0, -8
+0x20 0x00 0xff 0xf8
+
+# CHECK:    addi    r0, r0, -16
+0x20 0x00 0xff 0xf0
+
+# CHECK:    addi    r0, r0, -32
+0x20 0x00 0xff 0xe0
+
+# CHECK:    addi    r0, r0, -64
+0x20 0x00 0xff 0xc0
+
+# CHECK:    addi    r0, r0, -128
+0x20 0x00 0xff 0x80
+
+# CHECK:    addi    r0, r0, -256
+0x20 0x00 0xff 0x00
+
+# CHECK:    addi    r0, r0, -512
+0x20 0x00 0xfe 0x00
+
+# CHECK:    addi    r0, r0, -1024
+0x20 0x00 0xfc 0x00
+
+# CHECK:    addi    r0, r0, -2048
+0x20 0x00 0xf8 0x00
+
+# CHECK:    addi    r0, r0, -4096
+0x20 0x00 0xf0 0x00
+
+# CHECK:    addi    r0, r0, -8192
+0x20 0x00 0xe0 0x00
+
+# CHECK:    addi    r0, r0, -16384
+0x20 0x00 0xc0 0x00
+
+# CHECK:    addi    r0, r0, -32768
+0x20 0x00 0x80 0x00
diff --git a/final/test/MC/Disassembler/MBlaze/mblaze_pattern.txt b/final/test/MC/Disassembler/MBlaze/mblaze_pattern.txt
new file mode 100644
index 00000000000..1268378fa0f
--- /dev/null
+++ b/final/test/MC/Disassembler/MBlaze/mblaze_pattern.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# Pattern instructions
+################################################################################
+
+# CHECK:    pcmpbf      r0, r1, r2
+0x80 0x01 0x14 0x00
+
+# CHECK:    pcmpne      r0, r1, r2
+0x8c 0x01 0x14 0x00
+
+# CHECK:    pcmpeq      r0, r1, r2
+0x88 0x01 0x14 0x00
diff --git a/final/test/MC/Disassembler/MBlaze/mblaze_shift.txt b/final/test/MC/Disassembler/MBlaze/mblaze_shift.txt
new file mode 100644
index 00000000000..2783ffcb3e7
--- /dev/null
+++ b/final/test/MC/Disassembler/MBlaze/mblaze_shift.txt
@@ -0,0 +1,29 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# Shift instructions
+################################################################################
+
+# CHECK:    bsrl    r1, r2, r3
+0x44 0x22 0x18 0x00
+
+# CHECK:    bsra    r1, r2, r3
+0x44 0x22 0x1a 0x00
+
+# CHECK:    bsll    r1, r2, r3
+0x44 0x22 0x1c 0x00
+
+# CHECK:    bsrli   r1, r2, 0
+0x64 0x22 0x00 0x00
+
+# CHECK:    bsrai   r1, r2, 0
+0x64 0x22 0x02 0x00
+
+# CHECK:    bslli   r1, r2, 0
+0x64 0x22 0x04 0x00
+
+# CHECK:    sra     r1, r2
+0x90 0x22 0x00 0x01
+
+# CHECK:    srl     r1, r2
+0x90 0x22 0x00 0x41
diff --git a/final/test/MC/Disassembler/MBlaze/mblaze_special.txt b/final/test/MC/Disassembler/MBlaze/mblaze_special.txt
new file mode 100644
index 00000000000..a808cc9ccfb
--- /dev/null
+++ b/final/test/MC/Disassembler/MBlaze/mblaze_special.txt
@@ -0,0 +1,105 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# Special instructions
+################################################################################
+
+# CHECK:    mfs         r0, rpc
+0x94 0x00 0x80 0x00
+
+# CHECK:    msrclr      r0, 0
+0x94 0x11 0x00 0x00
+
+# CHECK:    msrset      r0, 0
+0x94 0x10 0x00 0x00
+
+# CHECK:    mts         rpc, r0
+0x94 0x00 0xc0 0x00
+
+# CHECK:    wdc         r0, r1
+0x90 0x00 0x08 0x64
+
+# CHECK:    wdc.clear   r0, r1
+0x90 0x00 0x08 0x66
+
+# CHECK:    wdc.flush   r0, r1
+0x90 0x00 0x08 0x74
+
+# CHECK:    wic         r0, r1
+0x90 0x00 0x08 0x68
+
+################################################################################
+# Special registers
+################################################################################
+
+# CHECK:    mfs         r1, rpc
+0x94 0x20 0x80 0x00
+
+# CHECK:    mfs         r1, rmsr
+0x94 0x20 0x80 0x01
+
+# CHECK:    mfs         r1, rear
+0x94 0x20 0x80 0x03
+
+# CHECK:    mfs         r1, resr
+0x94 0x20 0x80 0x05
+
+# CHECK:    mfs         r1, rfsr
+0x94 0x20 0x80 0x07
+
+# CHECK:    mfs         r1, rbtr
+0x94 0x20 0x80 0x0b
+
+# CHECK:    mfs         r1, redr
+0x94 0x20 0x80 0x0d
+
+# CHECK:    mfs         r1, rpid
+0x94 0x20 0x90 0x00
+
+# CHECK:    mfs         r1, rzpr
+0x94 0x20 0x90 0x01
+
+# CHECK:    mfs         r1, rtlbx
+0x94 0x20 0x90 0x02
+
+# CHECK:    mfs         r1, rtlbhi
+0x94 0x20 0x90 0x04
+
+# CHECK:    mfs         r1, rtlblo
+0x94 0x20 0x90 0x03
+
+# CHECK:    mfs         r1, rpvr0
+0x94 0x20 0xa0 0x00
+
+# CHECK:    mfs         r1, rpvr1
+0x94 0x20 0xa0 0x01
+
+# CHECK:    mfs         r1, rpvr2
+0x94 0x20 0xa0 0x02
+
+# CHECK:    mfs         r1, rpvr3
+0x94 0x20 0xa0 0x03
+
+# CHECK:    mfs         r1, rpvr4
+0x94 0x20 0xa0 0x04
+
+# CHECK:    mfs         r1, rpvr5
+0x94 0x20 0xa0 0x05
+
+# CHECK:    mfs         r1, rpvr6
+0x94 0x20 0xa0 0x06
+
+# CHECK:    mfs         r1, rpvr7
+0x94 0x20 0xa0 0x07
+
+# CHECK:    mfs         r1, rpvr8
+0x94 0x20 0xa0 0x08
+
+# CHECK:    mfs         r1, rpvr9
+0x94 0x20 0xa0 0x09
+
+# CHECK:    mfs         r1, rpvr10
+0x94 0x20 0xa0 0x0a
+
+# CHECK:    mfs         r1, rpvr11
+0x94 0x20 0xa0 0x0b
diff --git a/final/test/MC/Disassembler/MBlaze/mblaze_typea.txt b/final/test/MC/Disassembler/MBlaze/mblaze_typea.txt
new file mode 100644
index 00000000000..ce99950548b
--- /dev/null
+++ b/final/test/MC/Disassembler/MBlaze/mblaze_typea.txt
@@ -0,0 +1,74 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# TYPE A instructions
+################################################################################
+
+# CHECK:    add     r1, r2, r3
+0x00 0x22 0x18 0x00
+
+# CHECK:    addc    r1, r2, r3
+0x08 0x22 0x18 0x00
+
+# CHECK:    addk    r1, r2, r3
+0x10 0x22 0x18 0x00
+
+# CHECK:    addkc   r1, r2, r3
+0x18 0x22 0x18 0x00
+
+# CHECK:    and     r1, r2, r3
+0x84 0x22 0x18 0x00
+
+# CHECK:    andn    r1, r2, r3
+0x8c 0x22 0x18 0x00
+
+# CHECK:    cmp     r1, r2, r3
+0x14 0x22 0x18 0x01
+
+# CHECK:    cmpu    r1, r2, r3
+0x14 0x22 0x18 0x03
+
+# CHECK:    idiv    r1, r2, r3
+0x48 0x22 0x18 0x00
+
+# CHECK:    idivu   r1, r2, r3
+0x48 0x22 0x18 0x02
+
+# CHECK:    mul    r1, r2, r3
+0x40 0x22 0x18 0x00
+
+# CHECK:    mulh   r1, r2, r3
+0x40 0x22 0x18 0x01
+
+# CHECK:    mulhu  r1, r2, r3
+0x40 0x22 0x18 0x03
+
+# CHECK:    mulhsu r1, r2, r3
+0x40 0x22 0x18 0x02
+
+# CHECK:    or      r1, r2, r3
+0x80 0x22 0x18 0x00
+
+# CHECK:    rsub    r1, r2, r3
+0x04 0x22 0x18 0x00
+
+# CHECK:    rsubc   r1, r2, r3
+0x0c 0x22 0x18 0x00
+
+# CHECK:    rsubk   r1, r2, r3
+0x14 0x22 0x18 0x00
+
+# CHECK:    rsubkc  r1, r2, r3
+0x1c 0x22 0x18 0x00
+
+# CHECK:    sext16  r1, r2
+0x90 0x22 0x00 0x61
+
+# CHECK:    sext8   r1, r2
+0x90 0x22 0x00 0x60
+
+# CHECK:    xor     r1, r2, r3
+0x88 0x22 0x18 0x00
+
+# CHECK:    or      r0, r0, r0
+0x80 0x00 0x00 0x00
diff --git a/final/test/MC/Disassembler/MBlaze/mblaze_typeb.txt b/final/test/MC/Disassembler/MBlaze/mblaze_typeb.txt
new file mode 100644
index 00000000000..99782ac2c15
--- /dev/null
+++ b/final/test/MC/Disassembler/MBlaze/mblaze_typeb.txt
@@ -0,0 +1,56 @@
+# RUN: llvm-mc --disassemble %s -triple=mblaze-unknown-unknown | FileCheck %s
+
+################################################################################
+# TYPE B instructions
+################################################################################
+
+# CHECK:    addi    r1, r2, 15
+0x20 0x22 0x00 0x0f
+
+# CHECK:    addic   r1, r2, 15
+0x28 0x22 0x00 0x0f
+
+# CHECK:    addik   r1, r2, 15
+0x30 0x22 0x00 0x0f
+
+# CHECK:    addikc  r1, r2, 15
+0x38 0x22 0x00 0x0f
+
+# CHECK:    andi    r1, r2, 15
+0xa4 0x22 0x00 0x0f
+
+# CHECK:    andni   r1, r2, 15
+0xac 0x22 0x00 0x0f
+
+# CHECK:    muli    r1, r2, 15
+0x60 0x22 0x00 0x0f
+
+# CHECK:    ori     r1, r2, 15
+0xa0 0x22 0x00 0x0f
+
+# CHECK:    rsubi   r1, r2, 15
+0x24 0x22 0x00 0x0f
+
+# CHECK:    rsubic  r1, r2, 15
+0x2c 0x22 0x00 0x0f
+
+# CHECK:    rsubik  r1, r2, 15
+0x34 0x22 0x00 0x0f
+
+# CHECK:    rsubikc r1, r2, 15
+0x3c 0x22 0x00 0x0f
+
+# CHECK:    rtbd r15, 15
+0xb6 0x4f 0x00 0x0f
+
+# CHECK:    rted r15, 15
+0xb6 0x8f 0x00 0x0f
+
+# CHECK:    rtid r15, 15
+0xb6 0x2f 0x00 0x0f
+
+# CHECK:    rtsd r15, 15
+0xb6 0x0f 0x00 0x0f
+
+# CHECK:    xori r1, r2, 15
+0xa8 0x22 0x00 0x0f
diff --git a/final/test/MC/Disassembler/X86/dg.exp b/final/test/MC/Disassembler/X86/dg.exp
new file mode 100644
index 00000000000..a4d0e7c718c
--- /dev/null
+++ b/final/test/MC/Disassembler/X86/dg.exp
@@ -0,0 +1,6 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{txt}]]
+}
+
diff --git a/final/test/MC/Disassembler/X86/enhanced.txt b/final/test/MC/Disassembler/X86/enhanced.txt
new file mode 100644
index 00000000000..fc6949901b7
--- /dev/null
+++ b/final/test/MC/Disassembler/X86/enhanced.txt
@@ -0,0 +1,6 @@
+# RUN: llvm-mc --edis %s -triple=x86_64-apple-darwin9 |& FileCheck %s
+
+# CHECK: [o:jne][w:	][0-p:-][0-l:10=10] <br> 0:[RIP/111](pc)=18446744073709551606
+0x0f 0x85 0xf6 0xff 0xff 0xff
+# CHECK: [o:movq][w:	][1-r:%gs=r63][1-p::][1-l:8=8][p:,][w: ][0-r:%rcx=r108] <mov> 0:[RCX/108]=0 1:[GS/63]=8
+0x65 0x48 0x8b 0x0c 0x25 0x08 0x00 0x00 0x00
diff --git a/final/test/MC/Disassembler/X86/simple-tests.txt b/final/test/MC/Disassembler/X86/simple-tests.txt
new file mode 100644
index 00000000000..13a19d2ca4c
--- /dev/null
+++ b/final/test/MC/Disassembler/X86/simple-tests.txt
@@ -0,0 +1,68 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 | FileCheck %s
+
+# CHECK: int	$33
+0xCD 0x21 
+
+# CHECK: int	$33
+0xCD 0x21
+
+
+# CHECK: addb	%al, (%rax)
+0 0
+
+# CHECK: callq	-1234
+0xe8 0x2e 0xfb 0xff 0xff
+
+# CHECK: lfence
+0x0f 0xae 0xe8
+
+# CHECK: mfence
+0x0f 0xae 0xf0
+
+# CHECK: monitor
+0x0f 0x01 0xc8
+
+# CHECK: mwait
+0x0f 0x01 0xc9
+
+# CHECK: vmcall
+0x0f 0x01 0xc1
+
+# CHECK: vmlaunch
+0x0f 0x01 0xc2
+
+# CHECK: vmresume
+0x0f 0x01 0xc3
+
+# CHECK: vmxoff
+0x0f 0x01 0xc4
+
+# CHECK: swapgs
+0x0f 0x01 0xf8
+
+# CHECK: rdtscp
+0x0f 0x01 0xf9
+
+# CHECK: vmxon
+0xf3 0x0f 0xc7 0x30
+
+# CHECK: vmptrld
+0x0f 0xc7 0x30
+
+# CHECK: vmptrst
+0x0f 0xc7 0x38
+
+# CHECK: movl $0, -4(%rbp)
+0xc7 0x45 0xfc 0x00 0x00 0x00 0x00
+
+# CHECK: movq	%cr0, %rcx
+0x0f 0x20 0xc1
+
+# CHECK: leal	4(%rsp), %ecx
+0x8d 0x4c 0x24 0x04 
+
+# CHECK: enter	$1, $2
+0xc8 0x01 0x00 0x02
+
+# CHECK: movw	$47416, -66(%rbp)
+0x66 0xc7 0x45 0xbe 0x38 0xb9
diff --git a/final/test/MC/Disassembler/X86/truncated-input.txt b/final/test/MC/Disassembler/X86/truncated-input.txt
new file mode 100644
index 00000000000..34cf0382a74
--- /dev/null
+++ b/final/test/MC/Disassembler/X86/truncated-input.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 |& FileCheck %s
+
+# CHECK: warning
+0x00
diff --git a/final/test/MC/ELF/abs.s b/final/test/MC/ELF/abs.s
new file mode 100644
index 00000000000..c598b11e291
--- /dev/null
+++ b/final/test/MC/ELF/abs.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that zed will be an ABS symbol
+
+.Lfoo:
+.Lbar:
+        zed = .Lfoo - .Lbar
+
+// CHECK:      # Symbol 0x00000001
+// CHECK-NEXT: (('st_name', 0x00000001) # 'zed'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x0000fff1)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
diff --git a/final/test/MC/ELF/alias-reloc.s b/final/test/MC/ELF/alias-reloc.s
new file mode 100644
index 00000000000..c908c12404d
--- /dev/null
+++ b/final/test/MC/ELF/alias-reloc.s
@@ -0,0 +1,52 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that this produces a R_X86_64_PLT32 with bar.
+
+        .globl foo
+foo:
+bar = foo
+        .section zed, "", @progbits
+        call bar@PLT
+
+
+// Test that this produres a relocation with bar2
+
+    .weak    foo2
+foo2:
+    .weak    bar2
+    .set    bar2,foo2
+    .quad    bar2
+
+// CHECK:       # Symbol 0x00000001
+// CHECK-NEXT:  (('st_name', 0x00000005) # 'bar'
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+
+// CHECK:      # Symbol 0x00000006
+// CHECK-NEXT: (('st_name', 0x0000000e) # 'bar2'
+// CHECK-NEXT:  ('st_bind', 0x00000002)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000004)
+// CHECK-NEXT:  ('st_value', 0x0000000000000005)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+
+// CHECK:       # Relocation 0x00000000
+// CHECK-NEXT:  (('r_offset', 0x00000001)
+// CHECK-NEXT:   ('r_sym', 0x00000001)
+// CHECK-NEXT:   ('r_type', 0x00000004)
+// CHECK-NEXT:   ('r_addend', 0xfffffffc)
+// CHECK-NEXT:  ),
+
+// CHECK:      # Relocation 0x00000001
+// CHECK-NEXT: (('r_offset', 0x00000005)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x00000001)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/alias.s b/final/test/MC/ELF/alias.s
new file mode 100644
index 00000000000..42d54bc431d
--- /dev/null
+++ b/final/test/MC/ELF/alias.s
@@ -0,0 +1,85 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+foo:
+bar = foo
+
+        .globl	foo2
+foo2 = bar2
+
+foo3:
+	.globl	bar3
+bar3 = foo3
+
+// Test that bar4 is also a function
+        .type	foo4,@function
+foo4:
+bar4 = foo4
+
+        .long foo2
+// CHECK:       # Symbol 0x00000001
+// CHECK-NEXT:  (('st_name', 0x00000005) # 'bar'
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000002
+// CHECK-NEXT: (('st_name', 0x0000001d) # 'bar4'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000002)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT:  # Symbol 0x00000003
+// CHECK-NEXT:  (('st_name', 0x00000001) # 'foo'
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT:  # Symbol 0x00000004
+// CHECK-NEXT:  (('st_name', 0x0000000e) # 'foo3'
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000005
+// CHECK-NEXT: (('st_name', 0x00000018) # 'foo4'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000002)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000006
+// CHECK-NEXT: (('st_name', 0x00000000) # ''
+// CHECK:       # Symbol 0x00000007
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK:       # Symbol 0x00000008
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK:       # Symbol 0x00000009
+// CHECK-NEXT:  (('st_name', 0x00000013) # 'bar3'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK:       # Symbol 0x0000000a
+// CHECK-NEXT:  (('st_name', 0x00000009) # 'bar2'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
diff --git a/final/test/MC/ELF/align-bss.s b/final/test/MC/ELF/align-bss.s
new file mode 100644
index 00000000000..4f73a29f8e2
--- /dev/null
+++ b/final/test/MC/ELF/align-bss.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that the bss section is correctly aligned
+
+	.local	foo
+	.comm	foo,2048,16
+
+// CHECK:        ('sh_name', 0x0000000d) # '.bss'
+// CHECK-NEXT:   ('sh_type', 0x00000008)
+// CHECK-NEXT:   ('sh_flags', 0x00000003)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000040)
+// CHECK-NEXT:   ('sh_size', 0x00000800)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000010)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
diff --git a/final/test/MC/ELF/align-nops.s b/final/test/MC/ELF/align-nops.s
new file mode 100644
index 00000000000..28d4b895f5d
--- /dev/null
+++ b/final/test/MC/ELF/align-nops.s
@@ -0,0 +1,40 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump --dump-section-data | FileCheck %s
+
+// Test that we get optimal nops in text
+    .text
+f0:
+    .long 0
+    .align  8, 0x00000090
+    .long 0
+    .align  8
+
+// But not in another section
+    .data
+    .long 0
+    .align  8, 0x00000090
+    .long 0
+    .align  8
+
+// CHECK: (('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000006)
+// CHECK-NEXT:  ('sh_addr',
+// CHECK-NEXT:  ('sh_offset',
+// CHECK-NEXT:  ('sh_size', 0x00000010)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '00000000 0f1f4000 00000000 0f1f4000')
+
+// CHECK: (('sh_name', 0x00000007) # '.data'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000003)
+// CHECK-NEXT:  ('sh_addr',
+// CHECK-NEXT:  ('sh_offset',
+// CHECK-NEXT:  ('sh_size', 0x00000010)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '00000000 90909090 00000000 00000000')
diff --git a/final/test/MC/ELF/align-size.s b/final/test/MC/ELF/align-size.s
new file mode 100644
index 00000000000..85331d7ae91
--- /dev/null
+++ b/final/test/MC/ELF/align-size.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that the alignment does contribute to the size of the section.
+
+	.zero 4
+	.align	8
+
+// CHECK:      (('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000006)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000040)
+// CHECK-NEXT:  ('sh_size', 0x00000008)
diff --git a/final/test/MC/ELF/align-text.s b/final/test/MC/ELF/align-text.s
new file mode 100644
index 00000000000..1d2dacb3a06
--- /dev/null
+++ b/final/test/MC/ELF/align-text.s
@@ -0,0 +1,19 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that the .text directive doesn't cause alignment.
+
+        .zero 1
+        .text
+        .zero 1
+
+// CHECK:      (('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000006)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000040)
+// CHECK-NEXT:   ('sh_size', 0x00000002)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000004)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ),
diff --git a/final/test/MC/ELF/align.s b/final/test/MC/ELF/align.s
new file mode 100644
index 00000000000..c3912a7c67b
--- /dev/null
+++ b/final/test/MC/ELF/align.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that the alignment of rodata doesn't force a alignment of the
+// previous section (.bss)
+
+	nop
+	.section	.rodata,"a",@progbits
+	.align	8
+
+// CHECK: # Section 0x00000003
+// CHECK-NEXT:  (('sh_name', 0x0000000d) # '.bss'
+// CHECK-NEXT:   ('sh_type', 0x00000008)
+// CHECK-NEXT:   ('sh_flags', 0x00000003)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000044)
+// CHECK-NEXT:   ('sh_size', 0x00000000)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000004)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 0x00000004
+// CHECK-NEXT:  (('sh_name', 0x00000012) # '.rodata'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000002)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000048)
+// CHECK-NEXT:   ('sh_size', 0x00000000)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000008)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
diff --git a/final/test/MC/ELF/bad-section.s b/final/test/MC/ELF/bad-section.s
new file mode 100644
index 00000000000..73d89ce765a
--- /dev/null
+++ b/final/test/MC/ELF/bad-section.s
@@ -0,0 +1,9 @@
+// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o /dev/null 2>%t
+// RUN: FileCheck --input-file=%t %s
+
+// CHECK: error: unexpected token in directive
+// CHECK: .section "foo"-bar
+
+// test that we don't accept this, as gas doesn't.
+
+.section "foo"-bar
diff --git a/final/test/MC/ELF/basic-elf-32.s b/final/test/MC/ELF/basic-elf-32.s
new file mode 100644
index 00000000000..fa97da44d42
--- /dev/null
+++ b/final/test/MC/ELF/basic-elf-32.s
@@ -0,0 +1,78 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+
+	.text
+	.globl	main
+	.align	16, 0x90
+	.type	main,@function
+main:                                   # @main
+# BB#0:
+	subl	$4, %esp
+	movl	$.L.str1, (%esp)
+	calll	puts
+	movl	$.L.str2, (%esp)
+	calll	puts
+	xorl	%eax, %eax
+	addl	$4, %esp
+	ret
+.Ltmp0:
+	.size	main, .Ltmp0-main
+
+	.type	.L.str1,@object         # @.str1
+	.section	.rodata.str1.1,"aMS",@progbits,1
+.L.str1:
+	.asciz	 "Hello"
+	.size	.L.str1, 6
+
+	.type	.L.str2,@object         # @.str2
+.L.str2:
+	.asciz	 "World!"
+	.size	.L.str2, 7
+
+	.section	.note.GNU-stack,"",@progbits
+
+// CHECK: ('e_indent[EI_CLASS]', 0x00000001)
+// CHECK: ('e_indent[EI_DATA]', 0x00000001)
+// CHECK: ('e_indent[EI_VERSION]', 0x00000001)
+// CHECK: ('_sections', [
+// CHECK:   # Section 0
+// CHECK:   (('sh_name', 0x00000000) # ''
+
+// CHECK:   # '.text'
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK:   # 'main'
+// CHECK:   ('st_bind', 0x00000001)
+// CHECK-NEXT: ('st_type', 0x00000002)
+
+// CHECK:   # 'puts'
+// CHECK:   ('st_bind', 0x00000001)
+// CHECK-NEXT: ('st_type', 0x00000000)
+
+// CHECK:   # '.rel.text'
+
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0x00000000
+// CHECK:     (('r_offset', 0x00000006)
+// CHECK:      ('r_type', 0x00000001)
+// CHECK:     ),
+// CHECK:     # Relocation 0x00000001
+// CHECK:     (('r_offset', 0x0000000b)
+// CHECK:      ('r_type', 0x00000002)
+// CHECK:     ),
+// CHECK:     # Relocation 0x00000002
+// CHECK:     (('r_offset', 0x00000012)
+// CHECK:      ('r_type', 0x00000001)
+// CHECK:     ),
+// CHECK:     # Relocation 0x00000003
+// CHECK:     (('r_offset', 0x00000017)
+// CHECK:      ('r_type', 0x00000002)
+// CHECK:     ),
+// CHECK:   ])
diff --git a/final/test/MC/ELF/basic-elf-64.s b/final/test/MC/ELF/basic-elf-64.s
new file mode 100644
index 00000000000..7fc40b790ff
--- /dev/null
+++ b/final/test/MC/ELF/basic-elf-64.s
@@ -0,0 +1,82 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+
+        .text
+	.globl	main
+	.align	16, 0x90
+	.type	main,@function
+main:                                   # @main
+# BB#0:
+	subq	$8, %rsp
+	movl	$.L.str1, %edi
+	callq	puts
+	movl	$.L.str2, %edi
+	callq	puts
+	xorl	%eax, %eax
+	addq	$8, %rsp
+	ret
+.Ltmp0:
+	.size	main, .Ltmp0-main
+
+	.type	.L.str1,@object         # @.str1
+	.section	.rodata.str1.1,"aMS",@progbits,1
+.L.str1:
+	.asciz	 "Hello"
+	.size	.L.str1, 6
+
+	.type	.L.str2,@object         # @.str2
+.L.str2:
+	.asciz	 "World!"
+	.size	.L.str2, 7
+
+	.section	.note.GNU-stack,"",@progbits
+
+// CHECK: ('e_indent[EI_CLASS]', 0x00000002)
+// CHECK: ('e_indent[EI_DATA]', 0x00000001)
+// CHECK: ('e_indent[EI_VERSION]', 0x00000001)
+// CHECK: ('_sections', [
+// CHECK:   # Section 0
+// CHECK:   (('sh_name', 0x00000000) # ''
+
+// CHECK:   # '.text'
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK: ('st_bind', 0x00000000)
+// CHECK: ('st_type', 0x00000003)
+
+// CHECK:   # 'main'
+// CHECK-NEXT: ('st_bind', 0x00000001)
+// CHECK-NEXT: ('st_type', 0x00000002)
+
+// CHECK:   # 'puts'
+// CHECK-NEXT: ('st_bind', 0x00000001)
+// CHECK-NEXT: ('st_type', 0x00000000)
+
+// CHECK:   # '.rela.text'
+
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0x00000000
+// CHECK:     (('r_offset', 0x00000005)
+// CHECK:      ('r_type', 0x0000000a)
+// CHECK:      ('r_addend', 0x00000000)
+// CHECK:     ),
+// CHECK:     # Relocation 0x00000001
+// CHECK:     (('r_offset', 0x0000000a)
+// CHECK:      ('r_type', 0x00000002)
+// CHECK:      ('r_addend', 0xfffffffc)
+// CHECK:     ),
+// CHECK:     # Relocation 0x00000002
+// CHECK:     (('r_offset', 0x0000000f)
+// CHECK:      ('r_type', 0x0000000a)
+// CHECK:      ('r_addend', 0x00000006)
+// CHECK:     ),
+// CHECK:     # Relocation 0x00000003
+// CHECK:     (('r_offset', 0x00000014)
+// CHECK:      ('r_type', 0x00000002)
+// CHECK:      ('r_addend', 0xfffffffc)
+// CHECK:     ),
+// CHECK:   ])
diff --git a/final/test/MC/ELF/bracket-exprs.s b/final/test/MC/ELF/bracket-exprs.s
new file mode 100644
index 00000000000..96f9f9aa450
--- /dev/null
+++ b/final/test/MC/ELF/bracket-exprs.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+// CHECK: .byte 1
+.if [~0 >> 1] == -1
+.byte 1
+.else
+.byte 2
+.endif
+
+// CHECK: .byte 3
+.if 4 * [4 + (3 + [2 * 2] + 1)] == 48
+.byte 3
+.else
+.byte 4
+.endif
diff --git a/final/test/MC/ELF/bracket.s b/final/test/MC/ELF/bracket.s
new file mode 100644
index 00000000000..702e309ddee
--- /dev/null
+++ b/final/test/MC/ELF/bracket.s
@@ -0,0 +1,8 @@
+// RUN: not llvm-mc -triple i386-unknown-unknown %s 2> %t1 > %t2
+// RUN: FileCheck < %t1 %s
+
+// CHECK: error: expected ']' in brackets expression
+.size	x, [.-x)
+
+// CHECK: error: expected ')' in parentheses expression
+.size	y, (.-y]
diff --git a/final/test/MC/ELF/bss.ll b/final/test/MC/ELF/bss.ll
new file mode 100644
index 00000000000..5112d2c9b0a
--- /dev/null
+++ b/final/test/MC/ELF/bss.ll
@@ -0,0 +1,8 @@
+; RUN: llc -filetype=obj %s -o %t
+; FIXME: Add ELF dumping tool to check results.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+@g0 = global i8* null, align 4             ; <i8**> [#uses=0]
+
diff --git a/final/test/MC/ELF/call-abs.s b/final/test/MC/ELF/call-abs.s
new file mode 100644
index 00000000000..885c2d19bad
--- /dev/null
+++ b/final/test/MC/ELF/call-abs.s
@@ -0,0 +1,24 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+
+	.text
+	.globl	f
+	.type	f,@function
+f:                                      # @f
+# BB#0:                                 # %entry
+	subl	$4, %esp
+	calll	42
+	incl	%eax
+	addl	$4, %esp
+	ret
+.Ltmp0:
+	.size	f, .Ltmp0-f
+
+	.section	.note.GNU-stack,"",@progbits
+
+// CHECK:      ('_relocations', [
+// CHECK-NEXT:  # Relocation 0x00000000
+// CHECK-NEXT:  (('r_offset', 0x00000004)
+// CHECK-NEXT:   ('r_sym', 0x00000000)
+// CHECK-NEXT:   ('r_type', 0x00000002)
+// CHECK-NEXT:  ),
+// CHECK-NEXT: ])
diff --git a/final/test/MC/ELF/cfi-advance-loc2.s b/final/test/MC/ELF/cfi-advance-loc2.s
new file mode 100644
index 00000000000..3ffdd6cf028
--- /dev/null
+++ b/final/test/MC/ELF/cfi-advance-loc2.s
@@ -0,0 +1,45 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+// test that this produces a correctly encoded cfi_advance_loc2
+
+f:
+	.cfi_startproc
+        nop
+        .zero 255, 0x90
+	.cfi_def_cfa_offset 8
+        nop
+	.cfi_endproc
+
+// CHECK:      (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000148)
+// CHECK-NEXT:  ('sh_size', 0x00000030)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 01010000 00030001 0e080000')
+// CHECK-NEXT: ),
+
+
+// CHECK:      (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000258)
+// CHECK-NEXT:  ('sh_size', 0x00000018)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_info', 0x00000004)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000020)
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/cfi-def-cfa-offset.s b/final/test/MC/ELF/cfi-def-cfa-offset.s
new file mode 100644
index 00000000000..efefb8789dc
--- /dev/null
+++ b/final/test/MC/ELF/cfi-def-cfa-offset.s
@@ -0,0 +1,46 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+	subq	$8, %rsp
+	.cfi_def_cfa_offset 16
+        nop
+	addq	$8, %rsp
+	.cfi_def_cfa_offset 8
+	ret
+	.cfi_endproc
+
+// CHECK:      # Section 0x00000004
+// CHECK-NEXT: (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000050)
+// CHECK-NEXT:  ('sh_size', 0x00000030)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 0a000000 00440e10 450e0800')
+// CHECK-NEXT: ),
+
+// CHECK:       # Section 0x00000008
+// CHECK-NEXT: (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000160)
+// CHECK-NEXT:  ('sh_size', 0x00000018)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_info', 0x00000004)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000020)
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/cfi-def-cfa-register.s b/final/test/MC/ELF/cfi-def-cfa-register.s
new file mode 100644
index 00000000000..3df20218a27
--- /dev/null
+++ b/final/test/MC/ELF/cfi-def-cfa-register.s
@@ -0,0 +1,41 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_def_cfa_register 6
+        nop
+	.cfi_endproc
+
+// CHECK:      (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000048)
+// CHECK-NEXT:  ('sh_size', 0x00000030)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410d06 00000000')
+// CHECK-NEXT: ),
+
+// CHECK:      (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000158)
+// CHECK-NEXT:  ('sh_size', 0x00000018)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_info', 0x00000004)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000020)
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/cfi-def-cfa.s b/final/test/MC/ELF/cfi-def-cfa.s
new file mode 100644
index 00000000000..1ad427b310c
--- /dev/null
+++ b/final/test/MC/ELF/cfi-def-cfa.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_def_cfa 7, 8
+        nop
+	.cfi_endproc
+
+// CHECK:      (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000048)
+// CHECK-NEXT:  ('sh_size', 0x00000030)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00410c07 08000000')
+// CHECK-NEXT: ),
+
+
+// CHECK:      (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000158)
+// CHECK-NEXT:  ('sh_size', 0x00000018)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_info', 0x00000004)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000020)
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/cfi-offset.s b/final/test/MC/ELF/cfi-offset.s
new file mode 100644
index 00000000000..2f7e7976fa5
--- /dev/null
+++ b/final/test/MC/ELF/cfi-offset.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_offset %ebp, -16
+        nop
+	.cfi_endproc
+
+// CHECK:      (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000048)
+// CHECK-NEXT:  ('sh_size', 0x00000030)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 02000000 00418602 00000000')
+// CHECK-NEXT: ),
+
+
+// CHECK:      (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000158)
+// CHECK-NEXT:  ('sh_size', 0x00000018)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_info', 0x00000004)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000020)
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/cfi-remember.s b/final/test/MC/ELF/cfi-remember.s
new file mode 100644
index 00000000000..b5b380368f0
--- /dev/null
+++ b/final/test/MC/ELF/cfi-remember.s
@@ -0,0 +1,45 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_remember_state
+        nop
+	.cfi_restore_state
+        nop
+	.cfi_endproc
+
+// CHECK:      # Section 0x00000004
+// CHECK-NEXT: (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000048)
+// CHECK-NEXT:  ('sh_size', 0x00000030)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 14000000 1c000000 00000000 03000000 00410a41 0b000000')
+// CHECK-NEXT: ),
+
+// CHECK:      # Section 0x00000008
+// CHECK-NEXT: (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000158)
+// CHECK-NEXT:  ('sh_size', 0x00000018)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_info', 0x00000004)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000020)
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/cfi-zero-addr-delta.s b/final/test/MC/ELF/cfi-zero-addr-delta.s
new file mode 100644
index 00000000000..5585e296da5
--- /dev/null
+++ b/final/test/MC/ELF/cfi-zero-addr-delta.s
@@ -0,0 +1,48 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+// Test that we don't produce a DW_CFA_advance_loc 0
+
+f:
+	.cfi_startproc
+        nop
+	.cfi_def_cfa_offset 16
+        nop
+	.cfi_remember_state
+	.cfi_def_cfa_offset 8
+        nop
+	.cfi_restore_state
+        nop
+	.cfi_endproc
+
+// CHECK:      (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000048)
+// CHECK-NEXT:  ('sh_size', 0x00000038)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a5200 01781001 1b0c0708 90010000 1c000000 1c000000 00000000 04000000 00410e10 410a0e08 410b0000 00000000')
+// CHECK-NEXT: ),
+
+// CHECK:      (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000160)
+// CHECK-NEXT:  ('sh_size', 0x00000018)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_info', 0x00000004)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000020)
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/cfi.s b/final/test/MC/ELF/cfi.s
new file mode 100644
index 00000000000..93fd2e79223
--- /dev/null
+++ b/final/test/MC/ELF/cfi.s
@@ -0,0 +1,674 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+f1:
+        .cfi_startproc
+	.cfi_lsda 0x3, bar
+        nop
+        .cfi_endproc
+
+f2:
+        .cfi_startproc
+        .cfi_personality 0x00, foo
+	.cfi_lsda 0x3, bar
+        nop
+        .cfi_endproc
+
+f3:
+        .cfi_startproc
+	.cfi_lsda 0x3, bar
+        nop
+        .cfi_endproc
+
+f4:
+        .cfi_startproc
+        .cfi_personality 0x00, foo
+	.cfi_lsda 0x2, bar
+        nop
+        .cfi_endproc
+
+f5:
+        .cfi_startproc
+        .cfi_personality 0x02, foo
+        nop
+        .cfi_endproc
+
+f6:
+        .cfi_startproc
+        .cfi_personality 0x03, foo
+        nop
+        .cfi_endproc
+
+f7:
+        .cfi_startproc
+        .cfi_personality 0x04, foo
+        nop
+        .cfi_endproc
+
+f8:
+        .cfi_startproc
+        .cfi_personality 0x0a, foo
+        nop
+        .cfi_endproc
+
+f9:
+        .cfi_startproc
+        .cfi_personality 0x0b, foo
+        nop
+        .cfi_endproc
+
+f10:
+        .cfi_startproc
+        .cfi_personality 0x0c, foo
+        nop
+        .cfi_endproc
+
+f11:
+        .cfi_startproc
+        .cfi_personality 0x08, foo
+        nop
+        .cfi_endproc
+
+f12:
+        .cfi_startproc
+        .cfi_personality 0x10, foo
+        nop
+        .cfi_endproc
+
+f13:
+        .cfi_startproc
+        .cfi_personality 0x12, foo
+        nop
+        .cfi_endproc
+
+f14:
+        .cfi_startproc
+        .cfi_personality 0x13, foo
+        nop
+        .cfi_endproc
+
+f15:
+        .cfi_startproc
+        .cfi_personality 0x14, foo
+        nop
+        .cfi_endproc
+
+f16:
+        .cfi_startproc
+        .cfi_personality 0x1a, foo
+        nop
+        .cfi_endproc
+
+f17:
+        .cfi_startproc
+        .cfi_personality 0x1b, foo
+        nop
+        .cfi_endproc
+
+f18:
+        .cfi_startproc
+        .cfi_personality 0x1c, foo
+        nop
+        .cfi_endproc
+
+f19:
+        .cfi_startproc
+        .cfi_personality 0x18, foo
+        nop
+        .cfi_endproc
+
+f20:
+        .cfi_startproc
+        .cfi_personality 0x80, foo
+        nop
+        .cfi_endproc
+
+f21:
+        .cfi_startproc
+        .cfi_personality 0x82, foo
+        nop
+        .cfi_endproc
+
+f22:
+        .cfi_startproc
+        .cfi_personality 0x83, foo
+        nop
+        .cfi_endproc
+
+f23:
+        .cfi_startproc
+        .cfi_personality 0x84, foo
+        nop
+        .cfi_endproc
+
+f24:
+        .cfi_startproc
+        .cfi_personality 0x8a, foo
+        nop
+        .cfi_endproc
+
+f25:
+        .cfi_startproc
+        .cfi_personality 0x8b, foo
+        nop
+        .cfi_endproc
+
+f26:
+        .cfi_startproc
+        .cfi_personality 0x8c, foo
+        nop
+        .cfi_endproc
+
+f27:
+        .cfi_startproc
+        .cfi_personality 0x88, foo
+        nop
+        .cfi_endproc
+
+f28:
+        .cfi_startproc
+        .cfi_personality 0x90, foo
+        nop
+        .cfi_endproc
+
+f29:
+        .cfi_startproc
+        .cfi_personality 0x92, foo
+        nop
+        .cfi_endproc
+
+f30:
+        .cfi_startproc
+        .cfi_personality 0x93, foo
+        nop
+        .cfi_endproc
+
+f31:
+        .cfi_startproc
+        .cfi_personality 0x94, foo
+        nop
+        .cfi_endproc
+
+f32:
+        .cfi_startproc
+        .cfi_personality 0x9a, foo
+        nop
+        .cfi_endproc
+
+f33:
+        .cfi_startproc
+        .cfi_personality 0x9b, foo
+        nop
+        .cfi_endproc
+
+f34:
+        .cfi_startproc
+        .cfi_personality 0x9c, foo
+        nop
+        .cfi_endproc
+
+f36:
+        .cfi_startproc
+        .cfi_personality 0x98, foo
+        nop
+        .cfi_endproc
+
+// CHECK:      # Section 0x00000004
+// CHECK-NEXT: (('sh_name', 0x00000012) # '.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000068)
+// CHECK-NEXT:  ('sh_size', 0x000006c8)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '14000000 00000000 017a4c52 00017810 02031b0c 07089001 14000000 1c000000 00000000 01000000 04000000 00000000 20000000 00000000 017a504c 52000178 100b0000 00000000 00000003 1b0c0708 90010000 14000000 28000000 00000000 01000000 04000000 00000000 14000000 70000000 00000000 01000000 04000000 00000000 20000000 00000000 017a504c 52000178 100b0000 00000000 00000002 1b0c0708 90010000 10000000 28000000 00000000 01000000 02000000 18000000 00000000 017a5052 00017810 04020000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06030000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a040000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 040a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 060b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a0c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a080000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a100000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 04120000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06130000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a140000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 041a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 061b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a1c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a180000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a800000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 04820000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06830000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a840000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 048a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 068b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a8c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a880000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a900000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 04920000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 06930000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a940000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 049a0000 1b0c0708 90010000 10000000 20000000 00000000 01000000 00000000 18000000 00000000 017a5052 00017810 069b0000 00001b0c 07089001 10000000 20000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a9c0000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000 1c000000 00000000 017a5052 00017810 0a980000 00000000 00001b0c 07089001 10000000 24000000 00000000 01000000 00000000')
+// CHECK-NEXT: ),
+
+// CHECK:        # Section 0x00000008
+// CHECK-NEXT: (('sh_name', 0x00000036) # '.rela.eh_frame'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000bf8)
+// CHECK-NEXT:  ('sh_size', 0x000006c0)
+// CHECK-NEXT:  ('sh_link', 0x00000006)
+// CHECK-NEXT:  ('sh_info', 0x00000004)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000020)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000001
+// CHECK-NEXT:   (('r_offset', 0x00000029)
+// CHECK-NEXT:    ('r_sym', 0x00000028)
+// CHECK-NEXT:    ('r_type', 0x0000000a)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000002
+// CHECK-NEXT:   (('r_offset', 0x00000043)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000003
+// CHECK-NEXT:   (('r_offset', 0x0000005c)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000001)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000004
+// CHECK-NEXT:   (('r_offset', 0x00000065)
+// CHECK-NEXT:    ('r_sym', 0x00000028)
+// CHECK-NEXT:    ('r_type', 0x0000000a)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000005
+// CHECK-NEXT:   (('r_offset', 0x00000074)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000002)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000006
+// CHECK-NEXT:   (('r_offset', 0x0000007d)
+// CHECK-NEXT:    ('r_sym', 0x00000028)
+// CHECK-NEXT:    ('r_type', 0x0000000a)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000007
+// CHECK-NEXT:   (('r_offset', 0x00000097)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000008
+// CHECK-NEXT:   (('r_offset', 0x000000b0)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000003)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000009
+// CHECK-NEXT:   (('r_offset', 0x000000b9)
+// CHECK-NEXT:    ('r_sym', 0x00000028)
+// CHECK-NEXT:    ('r_type', 0x0000000c)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000000a
+// CHECK-NEXT:   (('r_offset', 0x000000ce)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000c)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000000b
+// CHECK-NEXT:   (('r_offset', 0x000000e0)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000004)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000000c
+// CHECK-NEXT:   (('r_offset', 0x000000fe)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000a)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000000d
+// CHECK-NEXT:   (('r_offset', 0x00000110)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000005)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000000e
+// CHECK-NEXT:   (('r_offset', 0x0000012e)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000000f
+// CHECK-NEXT:   (('r_offset', 0x00000144)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000006)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000010
+// CHECK-NEXT:   (('r_offset', 0x00000162)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000c)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000011
+// CHECK-NEXT:   (('r_offset', 0x00000174)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000007)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000012
+// CHECK-NEXT:   (('r_offset', 0x00000192)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000a)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000013
+// CHECK-NEXT:   (('r_offset', 0x000001a4)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000008)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000014
+// CHECK-NEXT:   (('r_offset', 0x000001c2)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000015
+// CHECK-NEXT:   (('r_offset', 0x000001d8)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000009)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000016
+// CHECK-NEXT:   (('r_offset', 0x000001f6)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000017
+// CHECK-NEXT:   (('r_offset', 0x0000020c)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000000a)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000018
+// CHECK-NEXT:   (('r_offset', 0x0000022a)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000018)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000019
+// CHECK-NEXT:   (('r_offset', 0x00000240)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000000b)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000001a
+// CHECK-NEXT:   (('r_offset', 0x0000025e)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000d)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000001b
+// CHECK-NEXT:   (('r_offset', 0x00000270)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000000c)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000001c
+// CHECK-NEXT:   (('r_offset', 0x0000028e)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000001d
+// CHECK-NEXT:   (('r_offset', 0x000002a0)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000000d)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000001e
+// CHECK-NEXT:   (('r_offset', 0x000002be)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000018)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000001f
+// CHECK-NEXT:   (('r_offset', 0x000002d4)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000000e)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000020
+// CHECK-NEXT:   (('r_offset', 0x000002f2)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000d)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000021
+// CHECK-NEXT:   (('r_offset', 0x00000304)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000000f)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000022
+// CHECK-NEXT:   (('r_offset', 0x00000322)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000023
+// CHECK-NEXT:   (('r_offset', 0x00000334)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000010)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000024
+// CHECK-NEXT:   (('r_offset', 0x00000352)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000018)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000025
+// CHECK-NEXT:   (('r_offset', 0x00000368)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000011)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000026
+// CHECK-NEXT:   (('r_offset', 0x00000386)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000018)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000027
+// CHECK-NEXT:   (('r_offset', 0x0000039c)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000012)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000028
+// CHECK-NEXT:   (('r_offset', 0x000003ba)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000029
+// CHECK-NEXT:   (('r_offset', 0x000003d0)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000013)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000002a
+// CHECK-NEXT:   (('r_offset', 0x000003ee)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000c)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000002b
+// CHECK-NEXT:   (('r_offset', 0x00000400)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000014)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000002c
+// CHECK-NEXT:   (('r_offset', 0x0000041e)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000a)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000002d
+// CHECK-NEXT:   (('r_offset', 0x00000430)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000015)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000002e
+// CHECK-NEXT:   (('r_offset', 0x0000044e)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000002f
+// CHECK-NEXT:   (('r_offset', 0x00000464)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000016)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000030
+// CHECK-NEXT:   (('r_offset', 0x00000482)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000c)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000031
+// CHECK-NEXT:   (('r_offset', 0x00000494)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000017)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000032
+// CHECK-NEXT:   (('r_offset', 0x000004b2)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000a)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000033
+// CHECK-NEXT:   (('r_offset', 0x000004c4)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000018)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000034
+// CHECK-NEXT:   (('r_offset', 0x000004e2)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000035
+// CHECK-NEXT:   (('r_offset', 0x000004f8)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000019)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000036
+// CHECK-NEXT:   (('r_offset', 0x00000516)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000001)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000037
+// CHECK-NEXT:   (('r_offset', 0x0000052c)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000001a)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000038
+// CHECK-NEXT:   (('r_offset', 0x0000054a)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000018)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000039
+// CHECK-NEXT:   (('r_offset', 0x00000560)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000001b)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000003a
+// CHECK-NEXT:   (('r_offset', 0x0000057e)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000d)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000003b
+// CHECK-NEXT:   (('r_offset', 0x00000590)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000001c)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000003c
+// CHECK-NEXT:   (('r_offset', 0x000005ae)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000003d
+// CHECK-NEXT:   (('r_offset', 0x000005c0)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000001d)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000003e
+// CHECK-NEXT:   (('r_offset', 0x000005de)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000018)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x0000003f
+// CHECK-NEXT:   (('r_offset', 0x000005f4)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000001e)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000040
+// CHECK-NEXT:   (('r_offset', 0x00000612)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x0000000d)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000041
+// CHECK-NEXT:   (('r_offset', 0x00000624)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x0000001f)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000042
+// CHECK-NEXT:   (('r_offset', 0x00000642)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000043
+// CHECK-NEXT:   (('r_offset', 0x00000654)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000020)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000044
+// CHECK-NEXT:   (('r_offset', 0x00000672)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000018)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000045
+// CHECK-NEXT:   (('r_offset', 0x00000688)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000021)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000046
+// CHECK-NEXT:   (('r_offset', 0x000006a6)
+// CHECK-NEXT:    ('r_sym', 0x00000029)
+// CHECK-NEXT:    ('r_type', 0x00000018)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000047
+// CHECK-NEXT:   (('r_offset', 0x000006bc)
+// CHECK-NEXT:    ('r_sym', 0x00000024)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000022)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/comdat.s b/final/test/MC/ELF/comdat.s
new file mode 100644
index 00000000000..0f1164e8206
--- /dev/null
+++ b/final/test/MC/ELF/comdat.s
@@ -0,0 +1,86 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump   | FileCheck %s
+
+// Test that we produce the group sections and that they are a the beginning
+// of the file.
+
+// CHECK:       # Section 0x00000001
+// CHECK-NEXT:  (('sh_name', 0x00000026) # '.group'
+// CHECK-NEXT:   ('sh_type', 0x00000011)
+// CHECK-NEXT:   ('sh_flags', 0x00000000)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000040)
+// CHECK-NEXT:   ('sh_size', 0x0000000c)
+// CHECK-NEXT:   ('sh_link', 0x0000000c)
+// CHECK-NEXT:   ('sh_info', 0x00000001)
+// CHECK-NEXT:   ('sh_addralign', 0x00000004)
+// CHECK-NEXT:   ('sh_entsize', 0x00000004)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 0x00000002
+// CHECK-NEXT:  (('sh_name', 0x00000026) # '.group'
+// CHECK-NEXT:   ('sh_type', 0x00000011)
+// CHECK-NEXT:   ('sh_flags', 0x00000000)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x0000004c)
+// CHECK-NEXT:   ('sh_size', 0x00000008)
+// CHECK-NEXT:   ('sh_link', 0x0000000c)
+// CHECK-NEXT:   ('sh_info', 0x00000002)
+// CHECK-NEXT:   ('sh_addralign', 0x00000004)
+// CHECK-NEXT:   ('sh_entsize', 0x00000004)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Section 0x00000003
+// CHECK-NEXT:  (('sh_name', 0x00000026) # '.group'
+// CHECK-NEXT:   ('sh_type', 0x00000011)
+// CHECK-NEXT:   ('sh_flags', 0x00000000)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000054)
+// CHECK-NEXT:   ('sh_size', 0x00000008)
+// CHECK-NEXT:   ('sh_link', 0x0000000c)
+// CHECK-NEXT:   ('sh_info', 0x0000000d)
+// CHECK-NEXT:   ('sh_addralign', 0x00000004)
+// CHECK-NEXT:   ('sh_entsize', 0x00000004)
+// CHECK-NEXT:  ),
+
+// Test that g1 and g2 are local, but g3 is an undefined global.
+
+// CHECK:      # Symbol 0x00000001
+// CHECK-NEXT: (('st_name', 0x00000001) # 'g1'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000007)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000002
+// CHECK-NEXT: (('st_name', 0x00000004) # 'g2'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000002)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+
+// CHECK:      # Symbol 0x0000000d
+// CHECK-NEXT: (('st_name', 0x00000007) # 'g3'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+
+
+	.section	.foo,"axG",@progbits,g1,comdat
+g1:
+        nop
+
+        .section	.bar,"axG",@progbits,g1,comdat
+        nop
+
+        .section	.zed,"axG",@progbits,g2,comdat
+        nop
+
+        .section	.baz,"axG",@progbits,g3,comdat
+        .long g3
diff --git a/final/test/MC/ELF/common.s b/final/test/MC/ELF/common.s
new file mode 100644
index 00000000000..16b677b9e88
--- /dev/null
+++ b/final/test/MC/ELF/common.s
@@ -0,0 +1,88 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+
+	.text
+
+// Test that this produces a regular local symbol.
+	.type	common1,@object
+	.local	common1
+	.comm	common1,1,1
+
+// CHECK: ('st_name', 0x00000001) # 'common1'
+// CHECK-NEXT: ('st_bind', 0x00000000)
+// CHECK-NEXT: ('st_type', 0x00000001)
+// CHECK-NEXT: ('st_other', 0x00000000)
+// CHECK-NEXT: ('st_shndx',
+// CHECK-NEXT: ('st_value', 0x0000000000000000)
+// CHECK-NEXT: ('st_size', 0x0000000000000001)
+
+
+// Same as common1, but with directives in a different order.
+	.local	common2
+	.type	common2,@object
+	.comm	common2,1,1
+
+// CHECK: ('st_name', 0x00000009) # 'common2'
+// CHECK-NEXT: ('st_bind', 0x00000000)
+// CHECK-NEXT: ('st_type', 0x00000001)
+// CHECK-NEXT: ('st_other', 0x00000000)
+// CHECK-NEXT: ('st_shndx',
+// CHECK-NEXT: ('st_value', 0x0000000000000001)
+// CHECK-NEXT: ('st_size', 0x0000000000000001)
+
+        .local	common6
+        .comm	common6,8,16
+
+// CHECK:      # Symbol 0x00000003
+// CHECK-NEXT: (('st_name', 0x00000011) # 'common6'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000001)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000003)
+// CHECK-NEXT:  ('st_value', 0x0000000000000010)
+// CHECK-NEXT:  ('st_size', 0x0000000000000008)
+// CHECK-NEXT: ),
+
+// Test that without an explicit .local we produce a global.
+	.type	common3,@object
+	.comm	common3,4,4
+
+// CHECK: ('st_name', 0x00000019) # 'common3'
+// CHECK-NEXT: ('st_bind', 0x00000001)
+// CHECK-NEXT: ('st_type', 0x00000001)
+// CHECK-NEXT: ('st_other', 0x00000000)
+// CHECK-NEXT: ('st_shndx', 0x0000fff2)
+// CHECK-NEXT: ('st_value', 0x0000000000000004)
+// CHECK-NEXT: ('st_size', 0x0000000000000004)
+
+
+// Test that without an explicit .local we produce a global, even if the first
+// occurrence is not in a directive.
+	.globl	foo
+	.type	foo,@function
+foo:
+	movsbl	common4+3(%rip), %eax
+
+
+	.type	common4,@object
+	.comm	common4,40,16
+
+// CHECK: ('st_name', 0x00000025) # 'common4'
+// CHECK-NEXT: ('st_bind', 0x00000001)
+// CHECK-NEXT: ('st_type', 0x00000001)
+// CHECK-NEXT: ('st_other', 0x00000000)
+// CHECK-NEXT: ('st_shndx', 0x0000fff2)
+// CHECK-NEXT: ('st_value', 0x0000000000000010)
+// CHECK-NEXT: ('st_size', 0x0000000000000028)
+
+        .comm	common5,4,4
+
+// CHECK:      # Symbol 0x00000009
+// CHECK-NEXT: (('st_name', 0x0000002d) # 'common5'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000001)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x0000fff2)
+// CHECK-NEXT:  ('st_value', 0x0000000000000004)
+// CHECK-NEXT:  ('st_size', 0x0000000000000004)
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/common2.s b/final/test/MC/ELF/common2.s
new file mode 100644
index 00000000000..b54cdfe143f
--- /dev/null
+++ b/final/test/MC/ELF/common2.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that the common symbols are placed at the end of .bss. In this example
+// it causes .bss to have size 9 instead of 8.
+
+	.local	vimvardict
+	.comm	vimvardict,1,8
+	.bss
+        .zero 1
+	.align	8
+
+// CHECK:      (('sh_name', 0x0000000d) # '.bss'
+// CHECK-NEXT:  ('sh_type',
+// CHECK-NEXT:  ('sh_flags'
+// CHECK-NEXT:  ('sh_addr',
+// CHECK-NEXT:  ('sh_offset',
+// CHECK-NEXT:  ('sh_size', 0x00000009)
+// CHECK-NEXT:  ('sh_link',
+// CHECK-NEXT:  ('sh_info',
+// CHECK-NEXT:  ('sh_addralign',
+// CHECK-NEXT:  ('sh_entsize',
diff --git a/final/test/MC/ELF/debug-line.s b/final/test/MC/ELF/debug-line.s
new file mode 100644
index 00000000000..2979ca28f1e
--- /dev/null
+++ b/final/test/MC/ELF/debug-line.s
@@ -0,0 +1,22 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+// Test that .debug_line is populated.
+
+// CHECK:     (('sh_name', 0x00000012) # '.debug_line'
+// CHECK-NEXT: ('sh_type', 0x00000001)
+// CHECK-NEXT: ('sh_flags', 0x00000000)
+// CHECK-NEXT: ('sh_addr', 0x00000000)
+// CHECK-NEXT: ('sh_offset', 0x00000044)
+// CHECK-NEXT: ('sh_size', 0x00000037)
+// CHECK-NEXT: ('sh_link', 0x00000000)
+// CHECK-NEXT: ('sh_info', 0x00000000)
+// CHECK-NEXT: ('sh_addralign', 0x00000001)
+// CHECK-NEXT: ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ('_section_data', '33000000 02001c00 00000101 fb0e0d00 01010101 00000001 00000100 666f6f2e 63000000 00000009 02000000 00000000 00150204 000101')
+
+	.section	.debug_line,"",@progbits
+	.text
+
+	.file 1 "foo.c"
+	.loc 1 4 0
+	subq	$8, %rsp
diff --git a/final/test/MC/ELF/debug-loc.s b/final/test/MC/ELF/debug-loc.s
new file mode 100644
index 00000000000..36ae485ef06
--- /dev/null
+++ b/final/test/MC/ELF/debug-loc.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that we don't regress on the size of the line info section. We used
+// to handle negative line diffs incorrectly which manifested as very
+// large integers being passed to DW_LNS_advance_line.
+
+// FIXME: This size is the same as gnu as, but we can probably do a bit better.
+// FIXME2: We need a debug_line dumper so that we can test the actual contents.
+
+// CHECK:      # Section 0x00000004
+// CHECK-NEXT: (('sh_name', 0x00000012) # '.debug_line'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000044)
+// CHECK-NEXT:  ('sh_size', 0x0000003d)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000001)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ),
+
+	.section	.debug_line,"",@progbits
+	.text
+foo:
+	.file 1 "Driver.ii"
+	.loc 1 2 0
+        nop
+	.loc 1 4 0
+        nop
+	.loc 1 3 0
+        nop
diff --git a/final/test/MC/ELF/dg.exp b/final/test/MC/ELF/dg.exp
new file mode 100644
index 00000000000..d46d700975e
--- /dev/null
+++ b/final/test/MC/ELF/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,s}]]
+}
diff --git a/final/test/MC/ELF/diff.s b/final/test/MC/ELF/diff.s
new file mode 100644
index 00000000000..1879a39e8b9
--- /dev/null
+++ b/final/test/MC/ELF/diff.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+        .global zed
+foo:
+        nop
+bar:
+        nop
+zed:
+        mov zed+(bar-foo), %eax
+
+// CHECK:       # Relocation 0x00000000
+// CHECK-NEXT:  (('r_offset', 0x00000005)
+// CHECK-NEXT:   ('r_sym', 0x00000006)
+// CHECK-NEXT:   ('r_type', 0x0000000b)
+// CHECK-NEXT:   ('r_addend', 0x00000001)
diff --git a/final/test/MC/ELF/diff2.s b/final/test/MC/ELF/diff2.s
new file mode 100644
index 00000000000..4a9fbd1d83a
--- /dev/null
+++ b/final/test/MC/ELF/diff2.s
@@ -0,0 +1,13 @@
+// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s 2> %t
+// RUN: FileCheck -input-file %t %s
+
+.global zed
+        .data
+foo:
+        .text
+        nop
+bar:
+        nop
+zed:
+// CHECK: expected relocatable expression
+        mov zed+(bar-foo), %eax
diff --git a/final/test/MC/ELF/elf_directive_previous.s b/final/test/MC/ELF/elf_directive_previous.s
new file mode 100644
index 00000000000..5db1eac03d3
--- /dev/null
+++ b/final/test/MC/ELF/elf_directive_previous.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s
+
+.bss
+# CHECK: .bss
+
+.text
+# CHECK: .text
+
+.previous
+# CHECK: .bss
+
+.previous
+# CHECK: .text
diff --git a/final/test/MC/ELF/elf_directive_section.s b/final/test/MC/ELF/elf_directive_section.s
new file mode 100644
index 00000000000..9531c026e67
--- /dev/null
+++ b/final/test/MC/ELF/elf_directive_section.s
@@ -0,0 +1,23 @@
+# RUN: llvm-mc -triple i386-pc-linux-gnu %s | FileCheck %s
+
+	.bss
+# CHECK: .bss
+
+	.data.rel.ro
+# CHECK: .data.rel.ro
+
+	.data.rel
+# CHECK: .data.rel
+
+	.eh_frame
+# CHECK: .eh_frame
+
+	.rodata
+# CHECK: .rodata
+
+	.tbss
+# CHECK: .tbss
+
+	.tdata
+# CHECK: .tdata
+
diff --git a/final/test/MC/ELF/empty-dwarf-lines.s b/final/test/MC/ELF/empty-dwarf-lines.s
new file mode 100644
index 00000000000..0f791ae2aa3
--- /dev/null
+++ b/final/test/MC/ELF/empty-dwarf-lines.s
@@ -0,0 +1,21 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+
+// Test that the dwarf debug_line section contains no line directives.
+
+        .file   1 "test.c"
+        .globl  c
+c:
+        .asciz   "hi\n"
+
+// CHECK:      # Section 0x00000004
+// CHECK-NEXT: (('sh_name', 0x00000012) # '.debug_line'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000044)
+// CHECK-NEXT:  ('sh_size', 0x00000027)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000001)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/empty.s b/final/test/MC/ELF/empty.s
new file mode 100644
index 00000000000..e351936b901
--- /dev/null
+++ b/final/test/MC/ELF/empty.s
@@ -0,0 +1,70 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump   | FileCheck %s
+
+// Test that like gnu as we create text, data and bss by default. Also test
+// that shstrtab, symtab and strtab are listed in that order.
+
+// CHECK:      ('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT: ('sh_type', 0x00000001)
+// CHECK-NEXT: ('sh_flags', 0x00000006)
+// CHECK-NEXT: ('sh_addr', 0x00000000)
+// CHECK-NEXT: ('sh_offset', 0x00000040)
+// CHECK-NEXT: ('sh_size', 0x00000000)
+// CHECK-NEXT: ('sh_link', 0x00000000)
+// CHECK-NEXT: ('sh_info', 0x00000000)
+// CHECK-NEXT: ('sh_addralign', 0x00000004)
+// CHECK-NEXT: ('sh_entsize', 0x00000000)
+
+// CHECK:      ('sh_name', 0x00000007) # '.data'
+// CHECK-NEXT: ('sh_type', 0x00000001)
+// CHECK-NEXT: ('sh_flags', 0x00000003)
+// CHECK-NEXT: ('sh_addr', 0x00000000)
+// CHECK-NEXT: ('sh_offset', 0x00000040)
+// CHECK-NEXT: ('sh_size', 0x00000000)
+// CHECK-NEXT: ('sh_link', 0x00000000)
+// CHECK-NEXT: ('sh_info', 0x00000000)
+// CHECK-NEXT: ('sh_addralign', 0x00000004)
+// CHECK-NEXT: ('sh_entsize', 0x00000000)
+
+// CHECK:      ('sh_name', 0x0000000d) # '.bss'
+// CHECK-NEXT: ('sh_type', 0x00000008)
+// CHECK-NEXT: ('sh_flags', 0x00000003)
+// CHECK-NEXT: ('sh_addr', 0x00000000)
+// CHECK-NEXT: ('sh_offset', 0x00000040)
+// CHECK-NEXT: ('sh_size', 0x00000000)
+// CHECK-NEXT: ('sh_link', 0x00000000)
+// CHECK-NEXT: ('sh_info', 0x00000000)
+// CHECK-NEXT: ('sh_addralign', 0x00000004)
+// CHECK-NEXT: ('sh_entsize', 0x00000000)
+
+// CHECK:      ('sh_name', 0x00000012) # '.shstrtab'
+// CHECK-NEXT: ('sh_type', 0x00000003)
+// CHECK-NEXT:    ('sh_flags', 0x00000000)
+// CHECK-NEXT:    ('sh_addr', 0x00000000)
+// CHECK-NEXT:    ('sh_offset', 0x00000040)
+// CHECK-NEXT:    ('sh_size', 0x0000002c)
+// CHECK-NEXT:    ('sh_link', 0x00000000)
+// CHECK-NEXT:    ('sh_info', 0x00000000)
+// CHECK-NEXT:    ('sh_addralign', 0x00000001)
+// CHECK-NEXT:    ('sh_entsize', 0x00000000)
+
+// CHECK: ('sh_name', 0x0000001c) # '.symtab'
+// CHECK-NEXT:    ('sh_type', 0x00000002)
+// CHECK-NEXT:    ('sh_flags', 0x00000000)
+// CHECK-NEXT:    ('sh_addr', 0x00000000)
+// CHECK-NEXT:    ('sh_offset',
+// CHECK-NEXT:    ('sh_size', 0x00000060)
+// CHECK-NEXT:    ('sh_link', 0x00000006)
+// CHECK-NEXT:    ('sh_info', 0x00000004)
+// CHECK-NEXT:    ('sh_addralign', 0x00000008)
+// CHECK-NEXT:    ('sh_entsize', 0x00000018)
+
+// CHECK: ('sh_name', 0x00000024) # '.strtab'
+// CHECK-NEXT:    ('sh_type', 0x00000003)
+// CHECK-NEXT:    ('sh_flags', 0x00000000)
+// CHECK-NEXT:    ('sh_addr', 0x00000000)
+// CHECK-NEXT:    ('sh_offset',
+// CHECK-NEXT:    ('sh_size', 0x00000001)
+// CHECK-NEXT:    ('sh_link', 0x00000000)
+// CHECK-NEXT:    ('sh_info', 0x00000000)
+// CHECK-NEXT:    ('sh_addralign', 0x00000001)
+// CHECK-NEXT:    ('sh_entsize', 0x00000000)
diff --git a/final/test/MC/ELF/entsize.ll b/final/test/MC/ELF/entsize.ll
new file mode 100644
index 00000000000..21179dfda9a
--- /dev/null
+++ b/final/test/MC/ELF/entsize.ll
@@ -0,0 +1,44 @@
+; RUN: llc -filetype=obj -mtriple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck -check-prefix=64 %s
+
+; Test that constant mergeable strings have sh_entsize set.
+
+@.str1 = private unnamed_addr constant [6 x i8] c"tring\00"
+@.str2 = private unnamed_addr constant [7 x i8] c"String\00"
+@.c8a = private unnamed_addr constant [1 x i64] [i64 42]
+@.c8b = private unnamed_addr constant [1 x i64] [i64 42]
+
+define i32 @main() nounwind {
+  %1 = call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0))
+  %2 = call i32 @puts(i8* getelementptr inbounds ([7 x i8]* @.str2, i32 0, i32 0))
+  call void @foo(i64* getelementptr inbounds ([1 x i64]* @.c8a, i32 0, i32 0))
+  call void @foo(i64* getelementptr inbounds ([1 x i64]* @.c8b, i32 0, i32 0))
+  ret i32 0
+}
+
+declare i32 @puts(i8* nocapture) nounwind
+declare void @foo(i64* nocapture) nounwind
+
+;;;;;
+
+; 64: (('sh_name', 0x00000012) # '.rodata.str1.1'
+; 64-NEXT:   ('sh_type', 0x00000001)
+; 64-NEXT:   ('sh_flags', 0x00000032)
+; 64-NEXT:   ('sh_addr',
+; 64-NEXT:   ('sh_offset',
+; 64-NEXT:   ('sh_size', 0x0000000d)
+; 64-NEXT:   ('sh_link',
+; 64-NEXT:   ('sh_info',
+; 64-NEXT:   ('sh_addralign', 0x00000001)
+; 64-NEXT:   ('sh_entsize', 0x00000001)
+
+; 64: (('sh_name', 0x00000021) # '.rodata.cst8'
+; 64-NEXT:   ('sh_type', 0x00000001)
+; 64-NEXT:   ('sh_flags', 0x00000012)
+; 64-NEXT:   ('sh_addr',
+; 64-NEXT:   ('sh_offset',
+; 64-NEXT:   ('sh_size', 0x00000010)
+; 64-NEXT:   ('sh_link',
+; 64-NEXT:   ('sh_info',
+; 64-NEXT:   ('sh_addralign', 0x00000008)
+; 64-NEXT:   ('sh_entsize', 0x00000008)
+
diff --git a/final/test/MC/ELF/entsize.s b/final/test/MC/ELF/entsize.s
new file mode 100644
index 00000000000..e8eb62eb2ea
--- /dev/null
+++ b/final/test/MC/ELF/entsize.s
@@ -0,0 +1,69 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// Test that mergeable constants have sh_entsize set.
+
+// 1 byte strings
+    .section	.rodata.str1.1,"aMS",@progbits,1
+
+    .type	.L.str1,@object         # @.str1
+.L.str1:
+	.asciz	 "tring"
+	.size	.L.str1, 6
+
+	.type	.L.str2,@object         # @.str2
+.L.str2:
+	.asciz	 "String"
+	.size	.L.str2, 7
+
+// 2 byte strings
+    .section	.rodata.str2.1,"aMS",@progbits,2
+	.type	.L.str3,@object         # @.str3
+.L.str3:
+	.asciz	 "L\000o\000n\000g\000"
+	.size	.L.str3, 9
+
+	.type	.L.str4,@object         # @.str4
+.L.str4:
+	.asciz	 "o\000n\000g\000"
+	.size	.L.str4, 7
+
+ // 8 byte constants
+    .section	.rodata.cst8,"aM",@progbits,8
+    .quad 42
+    .quad 42
+
+// CHECK: # Section 0x00000004
+// CHECK-NEXT:   ('sh_name', 0x00000012) # '.rodata.str1.1'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000032)
+// CHECK-NEXT:   ('sh_addr',
+// CHECK-NEXT:   ('sh_offset',
+// CHECK-NEXT:   ('sh_size', 0x0000000d)
+// CHECK-NEXT:   ('sh_link',
+// CHECK-NEXT:   ('sh_info',
+// CHECK-NEXT:   ('sh_addralign', 0x00000001)
+// CHECK-NEXT:   ('sh_entsize', 0x00000001)
+
+// CHECK: # Section 0x00000005
+// CHECK-NEXT:   ('sh_name', 0x00000021) # '.rodata.str2.1'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000032)
+// CHECK-NEXT:   ('sh_addr',
+// CHECK-NEXT:   ('sh_offset',
+// CHECK-NEXT:   ('sh_size', 0x00000010)
+// CHECK-NEXT:   ('sh_link',
+// CHECK-NEXT:   ('sh_info',
+// CHECK-NEXT:   ('sh_addralign', 0x00000001)
+// CHECK-NEXT:   ('sh_entsize', 0x00000002)
+
+// CHECK: # Section 0x00000006
+// CHECK-NEXT:   ('sh_name', 0x00000030) # '.rodata.cst8
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000012)
+// CHECK-NEXT:   ('sh_addr',
+// CHECK-NEXT:   ('sh_offset',
+// CHECK-NEXT:   ('sh_size', 0x00000010)
+// CHECK-NEXT:   ('sh_link',
+// CHECK-NEXT:   ('sh_info',
+// CHECK-NEXT:   ('sh_addralign', 0x00000001)
+// CHECK-NEXT:   ('sh_entsize', 0x00000008)
diff --git a/final/test/MC/ELF/file.s b/final/test/MC/ELF/file.s
new file mode 100644
index 00000000000..d8ccbe6e5ae
--- /dev/null
+++ b/final/test/MC/ELF/file.s
@@ -0,0 +1,23 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck %s
+
+// Test that the STT_FILE symbol precedes the other local symbols.
+
+.file "foo"
+foa:
+// CHECK:    # Symbol 0x00000001
+// CHECK-NEXT:    (('st_name', 0x00000001) # 'foo'
+// CHECK-NEXT:     ('st_bind', 0x00000000)
+// CHECK-NEXT:     ('st_type', 0x00000004)
+// CHECK-NEXT:     ('st_other', 0x00000000)
+// CHECK-NEXT:     ('st_shndx', 0x0000fff1)
+// CHECK-NEXT:     ('st_value', 0x0000000000000000)
+// CHECK-NEXT:     ('st_size', 0x0000000000000000)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:    # Symbol 0x00000002
+// CHECK-NEXT:    (('st_name', 0x00000005) # 'foa'
+// CHECK-NEXT:     ('st_bind', 0x00000000)
+// CHECK-NEXT:     ('st_type', 0x00000000)
+// CHECK-NEXT:     ('st_other', 0x00000000)
+// CHECK-NEXT:     ('st_shndx', 0x00000001)
+// CHECK-NEXT:     ('st_value', 0x0000000000000000)
+// CHECK-NEXT:     ('st_size', 0x0000000000000000)
diff --git a/final/test/MC/ELF/global-offset.s b/final/test/MC/ELF/global-offset.s
new file mode 100644
index 00000000000..aa6328760d4
--- /dev/null
+++ b/final/test/MC/ELF/global-offset.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// We test that _GLOBAL_OFFSET_TABLE_ will account for the two bytes at the
+// start of the addl.
+
+        addl    $_GLOBAL_OFFSET_TABLE_, %ebx
+
+// CHECK:      ('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT: ('sh_type',
+// CHECK-NEXT: ('sh_flags',
+// CHECK-NEXT: ('sh_addr',
+// CHECK-NEXT: ('sh_offset',
+// CHECK-NEXT: ('sh_size',
+// CHECK-NEXT: ('sh_link',
+// CHECK-NEXT: ('sh_info',
+// CHECK-NEXT: ('sh_addralign',
+// CHECK-NEXT: ('sh_entsize',
+// CHECK-NEXT: ('_section_data', '81c30200 0000')
diff --git a/final/test/MC/ELF/got.s b/final/test/MC/ELF/got.s
new file mode 100644
index 00000000000..798150e0f5f
--- /dev/null
+++ b/final/test/MC/ELF/got.s
@@ -0,0 +1,25 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that this produces a R_X86_64_GOT32 and that we have an undefined
+// reference to _GLOBAL_OFFSET_TABLE_.
+
+        movl	foo@GOT, %eax
+        movl	foo@GOTPCREL(%rip), %eax
+
+// CHECK:     (('st_name', 0x00000005) # '_GLOBAL_OFFSET_TABLE_'
+// CHECK-NEXT: ('st_bind', 0x00000001)
+
+// CHECK:      ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:    (('r_offset',
+// CHECK-NEXT:     ('r_sym',
+// CHECK-NEXT:     ('r_type', 0x00000003)
+// CHECK-NEXT:     ('r_addend',
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   # Relocation 0x00000001
+// CHECK-NEXT:    (('r_offset',
+// CHECK-NEXT:     ('r_sym',
+// CHECK-NEXT:     ('r_type', 0x00000009)
+// CHECK-NEXT:     ('r_addend',
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
diff --git a/final/test/MC/ELF/ident.s b/final/test/MC/ELF/ident.s
new file mode 100644
index 00000000000..f79458f3450
--- /dev/null
+++ b/final/test/MC/ELF/ident.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+// CHECK:       (('sh_name', 0x00000012) # '.comment'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000030)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000040)
+// CHECK-NEXT:   ('sh_size', 0x0000000d)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000001)
+// CHECK-NEXT:   ('sh_entsize', 0x00000001)
+// CHECK-NEXT:   ('_section_data', '00666f6f 00626172 007a6564 00')
+
+        .ident "foo"
+        .ident "bar"
+        .ident "zed"
diff --git a/final/test/MC/ELF/invalid-symver.s b/final/test/MC/ELF/invalid-symver.s
new file mode 100644
index 00000000000..3c4f8c084b9
--- /dev/null
+++ b/final/test/MC/ELF/invalid-symver.s
@@ -0,0 +1,7 @@
+// RUN: not llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t 2> %t.out
+// RUN: FileCheck --input-file=%t.out %s
+
+// CHECK: A @@ version cannot be undefined
+
+        .symver undefined, foo@@bar
+        .long undefined
diff --git a/final/test/MC/ELF/leb128.s b/final/test/MC/ELF/leb128.s
new file mode 100644
index 00000000000..e5f31f4834d
--- /dev/null
+++ b/final/test/MC/ELF/leb128.s
@@ -0,0 +1,19 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+        .sleb128 .Lfoo - .Lbar
+.Lfoo:
+        .uleb128 .Lbar - .Lfoo
+        .fill 126, 1, 0x90
+.Lbar:
+
+// CHECK:     (('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT: ('sh_type', 0x00000001)
+// CHECK-NEXT: ('sh_flags', 0x00000006)
+// CHECK-NEXT: ('sh_addr', 0x00000000)
+// CHECK-NEXT: ('sh_offset', 0x00000040)
+// CHECK-NEXT: ('sh_size', 0x00000081)
+// CHECK-NEXT: ('sh_link', 0x00000000)
+// CHECK-NEXT: ('sh_info', 0x00000000)
+// CHECK-NEXT: ('sh_addralign', 0x00000004)
+// CHECK-NEXT: ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ('_section_data', '817f7f90 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90909090 90')
diff --git a/final/test/MC/ELF/local-reloc.s b/final/test/MC/ELF/local-reloc.s
new file mode 100644
index 00000000000..c2b477188cb
--- /dev/null
+++ b/final/test/MC/ELF/local-reloc.s
@@ -0,0 +1,31 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// Test that relocations with local symbols are represented as relocations
+// with the section. They should be equivalent, but gas behaves like this.
+
+	movl	foo, %r14d
+foo:
+
+// Section number 1 is .text
+// CHECK:        # Section 0x00000001
+// CHECK-next:  (('sh_name', 0x00000001) # '.text'
+
+// Symbol number 2 is section number 1
+// CHECK:    # Symbol 0x00000002
+// CHECK-NEXT:    (('st_name', 0x00000000) # ''
+// CHECK-NEXT:     ('st_bind', 0x00000000)
+// CHECK-NEXT:     ('st_type', 0x00000003)
+// CHECK-NEXT:     ('st_other', 0x00000000)
+// CHECK-NEXT:     ('st_shndx', 0x00000001)
+// CHECK-NEXT:     ('st_value', 0x0000000000000000)
+// CHECK-NEXT:     ('st_size', 0x0000000000000000)
+
+// Relocation refers to symbol number 2
+// CHECK:      ('_relocations', [
+// CHECK-NEXT:  # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset',
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type',
+// CHECK-NEXT:    ('r_addend',
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
diff --git a/final/test/MC/ELF/merge.s b/final/test/MC/ELF/merge.s
new file mode 100644
index 00000000000..ec022289007
--- /dev/null
+++ b/final/test/MC/ELF/merge.s
@@ -0,0 +1,97 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// Test that PIC relocations with local symbols in a mergeable section are done
+// with a reference to the symbol. Not sure if this is a linker limitation,
+// but this matches the behavior of gas.
+
+// Non-PIC relocations with 0 offset don't use the symbol.
+
+
+        movsd   .Lfoo(%rip), %xmm1
+        movl	$.Lfoo, %edi
+        movl	$.Lfoo+2, %edi
+        jmp	foo@PLT
+        movq 	foo@GOTPCREL, %rax
+        movq    zed, %rax
+
+        .section        .sec1,"aM",@progbits,16
+.Lfoo:
+zed:
+        .global zed
+
+        .section	bar,"ax",@progbits
+foo:
+
+// Section 4 is "sec1"
+// CHECK: # Section 0x00000004
+// CHECK-NEXT:  (('sh_name', 0x00000012) # '.sec1'
+
+// Symbol number 1 is .Lfoo
+// CHECK:      # Symbol 0x00000001
+// CHECK-NEXT: (('st_name', 0x00000001) # '.Lfoo'
+
+// Symbol number 2 is foo
+// CHECK:      # Symbol 0x00000002
+// CHECK-NEXT: (('st_name', 0x00000007) # 'foo'
+
+// Symbol number 6 is section 4
+// CHECK:        # Symbol 0x00000006
+// CHECK-NEXT:    (('st_name', 0x00000000) # ''
+// CHECK-NEXT:     ('st_bind', 0x00000000)
+// CHECK-NEXT:     ('st_type', 0x00000003)
+// CHECK-NEXT:     ('st_other', 0x00000000)
+// CHECK-NEXT:     ('st_shndx', 0x00000004)
+
+// Symbol number 8 is zed
+// CHECK:        # Symbol 0x00000008
+// CHECK-NEXT:    (('st_name', 0x0000000b) # 'zed'
+
+// Relocation 0 refers to symbol 1
+// CHECK:       ('_relocations', [
+// CHECK-NEXT:   # Relocation 0
+// CHECK-NEXT:   (('r_offset',
+// CHECK-NEXT:    ('r_sym', 0x00000001)
+// CHECK-NEXT:    ('r_type', 0x00000002
+// CHECK-NEXT:    ('r_addend',
+// CHECK-NEXT:   ),
+
+// Relocation 1 refers to symbol 6
+// CHECK-NEXT:  # Relocation 0x00000001
+// CHECK-NEXT: (('r_offset',
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend',
+// CHECK-NEXT: ),
+
+// Relocation 2 refers to symbol 1
+// CHECK-NEXT:   # Relocation 0x00000002
+// CHECK-NEXT:   (('r_offset',
+// CHECK-NEXT:    ('r_sym', 0x00000001)
+// CHECK-NEXT:    ('r_type', 0x0000000a
+// CHECK-NEXT:    ('r_addend',
+// CHECK-NEXT:   ),
+
+// Relocation 3 refers to symbol 2
+// CHECK-NEXT:   # Relocation 0x00000003
+// CHECK-NEXT:   (('r_offset',
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000004
+// CHECK-NEXT:    ('r_addend',
+// CHECK-NEXT:   ),
+
+// Relocation 4 refers to symbol 2
+// CHECK-NEXT:   # Relocation 0x00000004
+// CHECK-NEXT:   (('r_offset',
+// CHECK-NEXT:    ('r_sym', 0x00000002)
+// CHECK-NEXT:    ('r_type', 0x00000009
+// CHECK-NEXT:    ('r_addend',
+// CHECK-NEXT:   ),
+
+// Relocation 5 refers to symbol 8
+// CHECK-NEXT:   # Relocation 0x00000005
+// CHECK-NEXT:   (('r_offset', 0x00000023)
+// CHECK-NEXT:    ('r_sym', 0x00000008)
+// CHECK-NEXT:    ('r_type', 0x0000000b)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
diff --git a/final/test/MC/ELF/n_bytes.s b/final/test/MC/ELF/n_bytes.s
new file mode 100644
index 00000000000..59d67bfa71f
--- /dev/null
+++ b/final/test/MC/ELF/n_bytes.s
@@ -0,0 +1,20 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+        .2byte 42, 1, 2, 3
+        .4byte 42, 1, 2, 3
+        .8byte 42, 1, 2, 3
+        .int 42, 1, 2, 3
+
+// CHECK:      # Section 0x00000001
+// CHECK-NEXT: (('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000006)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000040)
+// CHECK-NEXT:  ('sh_size', 0x00000048)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000004)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ('_section_data', '2a000100 02000300 2a000000 01000000 02000000 03000000 2a000000 00000000 01000000 00000000 02000000 00000000 03000000 00000000 2a000000 01000000 02000000 03000000')
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/no-fixup.s b/final/test/MC/ELF/no-fixup.s
new file mode 100644
index 00000000000..6e719bcc8c1
--- /dev/null
+++ b/final/test/MC/ELF/no-fixup.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t -stats 2>%t.out
+// RUN: FileCheck --input-file=%t.out %s
+
+// Test that we create no fixups for this file since "a" and "b" are in the
+// same fragment.
+
+// CHECK:      assembler - Number of assembler layout and relaxation steps
+// CHECK-NEXT: assembler - Number of emitted assembler fragments
+// CHECK-NEXT: assembler - Number of emitted object file bytes
+// CHECK-NEXT: assembler - Number of fragment layouts
+// CHECK-NEXT: mcexpr    - Number of MCExpr evaluations
+
+a:
+  nop
+b:
+  .long b - a
diff --git a/final/test/MC/ELF/noexec.s b/final/test/MC/ELF/noexec.s
new file mode 100644
index 00000000000..87b6f3aa85b
--- /dev/null
+++ b/final/test/MC/ELF/noexec.s
@@ -0,0 +1,24 @@
+// RUN: llvm-mc -mc-no-exec-stack -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck  %s
+
+// CHECK:       # Section 0x00000004
+// CHECK-NEXT:  (('sh_name', 0x00000012) # '.note.GNU-stack'
+// CHECK-NEXT:   ('sh_type', 0x00000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000000)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000040)
+// CHECK-NEXT:   ('sh_size', 0x00000000)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000001)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ),
+
+// CHECK:       # Symbol 0x00000004
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000003)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000004)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
diff --git a/final/test/MC/ELF/norelocation.s b/final/test/MC/ELF/norelocation.s
new file mode 100644
index 00000000000..0a0efe1ed6d
--- /dev/null
+++ b/final/test/MC/ELF/norelocation.s
@@ -0,0 +1,18 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+        call bar
+bar:
+
+// CHECK: ('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT: ('sh_type', 0x00000001)
+// CHECK-NEXT: ('sh_flags', 0x00000006)
+// CHECK-NEXT: ('sh_addr', 0x00000000)
+// CHECK-NEXT: ('sh_offset', 0x00000040)
+// CHECK-NEXT: ('sh_size', 0x00000005)
+// CHECK-NEXT: ('sh_link', 0x00000000)
+// CHECK-NEXT: ('sh_info', 0x00000000)
+// CHECK-NEXT: ('sh_addralign', 0x00000004)
+// CHECK-NEXT: ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ('_section_data', 'e8000000 00')
+// CHECK-NOT: .rela.text
+// CHECK: shstrtab
diff --git a/final/test/MC/ELF/org.s b/final/test/MC/ELF/org.s
new file mode 100644
index 00000000000..c073fa5d808
--- /dev/null
+++ b/final/test/MC/ELF/org.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump   | FileCheck %s
+
+        .zero 4
+foo:
+        .zero 4
+        .org foo+16
+
+// CHECK:     (('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT: ('sh_type',
+// CHECK-NEXT: ('sh_flags',
+// CHECK-NEXT: ('sh_addr',
+// CHECK-NEXT: ('sh_offset'
+// CHECK-NEXT: ('sh_size', 0x00000014)
diff --git a/final/test/MC/ELF/pic-diff.s b/final/test/MC/ELF/pic-diff.s
new file mode 100644
index 00000000000..d1fc909dba8
--- /dev/null
+++ b/final/test/MC/ELF/pic-diff.s
@@ -0,0 +1,29 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// CHECK:         # Symbol 0x00000005
+// CHECK-NEXT:    (('st_name', 0x00000005) # 'baz'
+// CHECK-NEXT:     ('st_bind', 0x00000001)
+// CHECK-NEXT:     ('st_type', 0x00000000)
+// CHECK-NEXT:     ('st_other', 0x00000000)
+// CHECK-NEXT:     ('st_shndx', 0x00000000)
+// CHECK-NEXT:     ('st_value', 0x0000000000000000)
+// CHECK-NEXT:     ('st_size', 0x0000000000000000)
+// CHECK-NEXT:    ),
+
+// CHECK:       ('_relocations', [
+// CHECK-NEXT:    # Relocation 0x00000000
+// CHECK-NEXT:    (('r_offset', 0x0000000c)
+// CHECK-NEXT:     ('r_sym', 0x00000005)
+// CHECK-NEXT:     ('r_type', 0x00000002)
+// CHECK-NEXT:     ('r_addend', 0x00000008)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+
+.zero 4
+.data
+
+.zero 1
+.align 4
+foo:
+.zero 8
+.long baz - foo
diff --git a/final/test/MC/ELF/plt.s b/final/test/MC/ELF/plt.s
new file mode 100644
index 00000000000..7d0073c400d
--- /dev/null
+++ b/final/test/MC/ELF/plt.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that this produces a R_X86_64_PLT32.
+
+	jmp	foo@PLT
+
+// CHECK:      ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:    (('r_offset',
+// CHECK-NEXT:     ('r_sym',
+// CHECK-NEXT:     ('r_type', 0x00000004)
+// CHECK-NEXT:     ('r_addend',
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
diff --git a/final/test/MC/ELF/pr9292.s b/final/test/MC/ELF/pr9292.s
new file mode 100644
index 00000000000..a198fed8794
--- /dev/null
+++ b/final/test/MC/ELF/pr9292.s
@@ -0,0 +1,26 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that both foo and bar are undefined.
+
+.globl foo
+.globl bar
+mov %eax,bar
+
+
+// CHECK:      (('st_name', 0x00000005) # 'bar'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000005
+// CHECK-NEXT: (('st_name', 0x00000001) # 'foo'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/relax-arith.s b/final/test/MC/ELF/relax-arith.s
new file mode 100644
index 00000000000..3236b41e532
--- /dev/null
+++ b/final/test/MC/ELF/relax-arith.s
@@ -0,0 +1,75 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// Test that we correctly relax these instructions into versions that use
+// 16 or 32 bit immediate values.
+
+bar:
+// CHECK: 'imul'
+// CHECK: ('_section_data', '6669db00 0066691c 25000000 00000069 db000000 00691c25 00000000 00000000 4869db00 00000048 691c2500 00000000 000000')
+        .section imul
+        imul $foo, %bx,  %bx
+        imul $foo, bar,  %bx
+        imul $foo, %ebx, %ebx
+        imul $foo, bar,  %ebx
+        imul $foo, %rbx, %rbx
+        imul $foo, bar,  %rbx
+
+// CHECK: and'
+// CHECK:('_section_data', '6681e300 00668124 25000000 00000081 e3000000 00812425 00000000 00000000 4881e300 00000048 81242500 00000000 000000')
+        .section and
+        and  $foo, %bx
+        andw $foo, bar
+        and  $foo, %ebx
+        andl $foo, bar
+        and  $foo, %rbx
+        andq $foo, bar
+
+// CHECK: 'or'
+// CHECK: ('_section_data', '6681cb00 0066810c 25000000 00000081 cb000000 00810c25 00000000 00000000 4881cb00 00000048 810c2500 00000000 000000')
+        .section or
+        or  $foo, %bx
+        orw $foo, bar
+        or  $foo, %ebx
+        orl $foo, bar
+        or  $foo, %rbx
+        orq $foo, bar
+
+// CHECK: 'xor'
+// CHECK: ('_section_data', '6681f300 00668134 25000000 00000081 f3000000 00813425 00000000 00000000 4881f300 00000048 81342500 00000000 000000')
+        .section xor
+        xor  $foo, %bx
+        xorw $foo, bar
+        xor  $foo, %ebx
+        xorl $foo, bar
+        xor  $foo, %rbx
+        xorq $foo, bar
+
+// CHECK: 'add'
+// CHECK: ('_section_data', '6681c300 00668104 25000000 00000081 c3000000 00810425 00000000 00000000 4881c300 00000048 81042500 00000000 000000')
+        .section add
+        add  $foo, %bx
+        addw $foo, bar
+        add  $foo, %ebx
+        addl $foo, bar
+        add  $foo, %rbx
+        addq $foo, bar
+
+// CHECK: 'sub'
+// CHECK: ('_section_data', '6681eb00 0066812c 25000000 00000081 eb000000 00812c25 00000000 00000000 4881eb00 00000048 812c2500 00000000 000000')
+        .section sub
+        sub  $foo, %bx
+        subw $foo, bar
+        sub  $foo, %ebx
+        subl $foo, bar
+        sub  $foo, %rbx
+        subq $foo, bar
+
+// CHECK: 'cmp'
+// CHECK: ('_section_data', '6681fb00 0066813c 25000000 00000081 fb000000 00813c25 00000000 00000000 4881fb00 00000048 813c2500 00000000 000000')
+        .section cmp
+        cmp  $foo, %bx
+        cmpw $foo, bar
+        cmp  $foo, %ebx
+        cmpl $foo, bar
+        cmp  $foo, %rbx
+        cmpq $foo, bar
diff --git a/final/test/MC/ELF/relax-crash.s b/final/test/MC/ELF/relax-crash.s
new file mode 100644
index 00000000000..442825db81a
--- /dev/null
+++ b/final/test/MC/ELF/relax-crash.s
@@ -0,0 +1,11 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o %t
+
+// This is a test that we don't crash. We used to do so by going in a infinite
+// recursion trying to compute the size of a MCDwarfLineAddrFragment.
+
+       .section        .debug_line,"",@progbits
+       .text
+       .file 1 "Disassembler.ii"
+       .section foo
+       .loc 1 1 0
+       ret
diff --git a/final/test/MC/ELF/relax.s b/final/test/MC/ELF/relax.s
new file mode 100644
index 00000000000..2c0e285db59
--- /dev/null
+++ b/final/test/MC/ELF/relax.s
@@ -0,0 +1,27 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// Test that we do not relax these.
+
+bar:
+.globl foo
+foo:
+        .set	zed,foo
+
+        jmp bar
+        jmp foo
+        jmp zed
+
+// CHECK: ('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT: ('sh_type', 0x00000001)
+// CHECK-NEXT: ('sh_flags', 0x00000006)
+// CHECK-NEXT: ('sh_addr', 0x00000000)
+// CHECK-NEXT: ('sh_offset', 0x00000040)
+// CHECK-NEXT: ('sh_size', 0x00000006)
+// CHECK-NEXT: ('sh_link', 0x00000000)
+// CHECK-NEXT: ('sh_info', 0x00000000)
+// CHECK-NEXT: ('sh_addralign', 0x00000004)
+// CHECK-NEXT: ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ('_section_data', 'ebfeebfc ebfa')
+
+// CHECK:       # Symbol 0x00000006
+// CHECK-NEXT: (('st_name', 0x00000005) # 'foo'
diff --git a/final/test/MC/ELF/relocation-386.s b/final/test/MC/ELF/relocation-386.s
new file mode 100644
index 00000000000..f106f89b708
--- /dev/null
+++ b/final/test/MC/ELF/relocation-386.s
@@ -0,0 +1,226 @@
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | elf-dump | FileCheck  %s
+
+// Test that we produce the correct relocation types and that the relocations
+// correctly point to the section or the symbol.
+
+// Section 3 is bss
+// CHECK:      # Section 0x00000003
+// CHECK-NEXT: (('sh_name', 0x0000000d) # '.bss'
+
+// CHECK:      # Symbol 0x00000001
+// CHECK-NEXT: (('st_name', 0x00000005) # '.Lfoo'
+
+// Symbol 4 is zed
+// CHECK:      # Symbol 0x00000004
+// CHECK-NEXT: (('st_name', 0x00000035) # 'zed'
+// CHECK-NEXT:  ('st_value', 0x00000000)
+// CHECK-NEXT:  ('st_size', 0x00000000)
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000006)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000004)
+
+// Symbol 7 is section 3
+// CHECK:      # Symbol 0x00000007
+// CHECK-NEXT: (('st_name', 0x00000000) # ''
+// CHECK-NEXT:  ('st_value', 0x00000000)
+// CHECK-NEXT:  ('st_size', 0x00000000)
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000003)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000003)
+
+// CHECK:      # Relocation 0x00000000
+// CHECK-NEXT: (('r_offset', 0x00000002)
+// CHECK-NEXT:  ('r_sym', 0x00000001)
+// CHECK-NEXT:  ('r_type', 0x00000009)
+// CHECK-NEXT: ),
+// CHECK-NEXT:  # Relocation 0x00000001
+// CHECK-NEXT: (('r_offset',
+// CHECK-NEXT:  ('r_sym',
+// CHECK-NEXT:  ('r_type', 0x00000004)
+// CHECK-NEXT: ),
+// CHECK-NEXT:  # Relocation 0x00000002
+// CHECK-NEXT: (('r_offset',
+// CHECK-NEXT:  ('r_sym',
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT: ),
+
+// Relocation 3 (bar3@GOTOFF) is done with symbol 7 (bss)
+// CHECK-NEXT:  # Relocation 0x00000003
+// CHECK-NEXT: (('r_offset',
+// CHECK-NEXT:  ('r_sym', 0x00000007
+// CHECK-NEXT:  ('r_type',
+// CHECK-NEXT: ),
+
+// Relocation 4 (bar2@GOT) is of type R_386_GOT32
+// CHECK-NEXT:  # Relocation 0x00000004
+// CHECK-NEXT: (('r_offset',
+// CHECK-NEXT:  ('r_sym',
+// CHECK-NEXT:  ('r_type', 0x00000003
+// CHECK-NEXT: ),
+
+// Relocation 5 (foo@TLSGD) is of type R_386_TLS_GD
+// CHECK-NEXT: # Relocation 0x00000005
+// CHECK-NEXT: (('r_offset', 0x00000020)
+// CHECK-NEXT:  ('r_sym', 0x0000000d)
+// CHECK-NEXT:  ('r_type', 0x00000012)
+// CHECK-NEXT: ),
+
+// Relocation 6 ($foo@TPOFF) is of type R_386_TLS_LE_32
+// CHECK-NEXT: # Relocation 0x00000006
+// CHECK-NEXT: (('r_offset', 0x00000025)
+// CHECK-NEXT:  ('r_sym', 0x0000000d)
+// CHECK-NEXT:  ('r_type', 0x00000022)
+// CHECK-NEXT: ),
+
+// Relocation 7 (foo@INDNTPOFF) is of type R_386_TLS_IE
+// CHECK-NEXT: # Relocation 0x00000007
+// CHECK-NEXT: (('r_offset', 0x0000002b)
+// CHECK-NEXT:  ('r_sym', 0x0000000d)
+// CHECK-NEXT:  ('r_type', 0x0000000f)
+// CHECK-NEXT: ),
+
+// Relocation 8 (foo@NTPOFF) is of type R_386_TLS_LE
+// CHECK-NEXT: # Relocation 0x00000008
+// CHECK-NEXT: (('r_offset', 0x00000031)
+// CHECK-NEXT:  ('r_sym', 0x0000000d)
+// CHECK-NEXT:  ('r_type', 0x00000011)
+// CHECK-NEXT: ),
+
+// Relocation 9 (foo@GOTNTPOFF) is of type R_386_TLS_GOTIE
+// CHECK-NEXT: # Relocation 0x00000009
+// CHECK-NEXT: (('r_offset', 0x00000037)
+// CHECK-NEXT:  ('r_sym', 0x0000000d)
+// CHECK-NEXT:  ('r_type', 0x00000010)
+// CHECK-NEXT: ),
+
+// Relocation 10 (foo@TLSLDM) is of type R_386_TLS_LDM
+// CHECK-NEXT: # Relocation 0x0000000a
+// CHECK-NEXT: (('r_offset', 0x0000003d)
+// CHECK-NEXT:  ('r_sym', 0x0000000d)
+// CHECK-NEXT:  ('r_type', 0x00000013)
+// CHECK-NEXT: ),
+
+// Relocation 11 (foo@DTPOFF) is of type R_386_TLS_LDO_32
+// CHECK-NEXT: # Relocation 0x0000000b
+// CHECK-NEXT: (('r_offset', 0x00000043)
+// CHECK-NEXT:  ('r_sym', 0x0000000d)
+// CHECK-NEXT:  ('r_type', 0x00000020)
+// CHECK-NEXT: ),
+// Relocation 12 (calll 4096) is of type R_386_PC32
+// CHECK-NEXT: # Relocation 0x0000000c
+// CHECK-NEXT: (('r_offset', 0x00000048)
+// CHECK-NEXT:  ('r_sym', 0x00000000)
+// CHECK-NEXT:  ('r_type', 0x00000002)
+// CHECK-NEXT: ),
+// Relocation 13 (zed@GOT) is of type R_386_GOT32 and uses the symbol
+// CHECK-NEXT: # Relocation 0x0000000d
+// CHECK-NEXT: (('r_offset', 0x0000004e)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000003)
+// CHECK-NEXT: ),
+// Relocation 14 (zed@GOTOFF) is of type R_386_GOTOFF and uses the symbol
+// CHECK-NEXT: # Relocation 0x0000000e
+// CHECK-NEXT: (('r_offset', 0x00000054)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000009)
+// CHECK-NEXT: ),
+// Relocation 15 (zed@INDNTPOFF) is of type R_386_TLS_IE and uses the symbol
+// CHECK-NEXT: # Relocation 0x0000000f
+// CHECK-NEXT: (('r_offset', 0x0000005a)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x0000000f)
+// CHECK-NEXT: ),
+// Relocation 16 (zed@NTPOFF) is of type R_386_TLS_LE and uses the symbol
+// CHECK-NEXT: # Relocation 0x00000010
+// CHECK-NEXT: (('r_offset', 0x00000060)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000011)
+// CHECK-NEXT: ),
+// Relocation 17 (zed@GOTNTPOFF) is of type R_386_TLS_GOTIE and uses the symbol
+// CHECK-NEXT: # Relocation 0x00000011
+// CHECK-NEXT: (('r_offset', 0x00000066)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000010)
+// CHECK-NEXT: ),
+// Relocation 18 (zed@PLT) is of type R_386_PLT32 and uses the symbol
+// CHECK-NEXT: # Relocation 0x00000012
+// CHECK-NEXT: (('r_offset', 0x0000006b)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000004)
+// CHECK-NEXT: ),
+// Relocation 19 (zed@TLSGD) is of type R_386_TLS_GD and uses the symbol
+// CHECK-NEXT: # Relocation 0x00000013
+// CHECK-NEXT: (('r_offset', 0x00000071)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000012)
+// CHECK-NEXT: ),
+// Relocation 20 (zed@TLSLDM) is of type R_386_TLS_LDM and uses the symbol
+// CHECK-NEXT: # Relocation 0x00000014
+// CHECK-NEXT: (('r_offset', 0x00000077)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000013)
+// CHECK-NEXT: ),
+// Relocation 21 (zed@TPOFF) is of type R_386_TLS_LE_32 and uses the symbol
+// CHECK-NEXT:# Relocation 0x00000015
+// CHECK-NEXT: (('r_offset', 0x0000007d)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000022)
+// CHECK-NEXT: ),
+// Relocation 22 (zed@DTPOFF) is of type R_386_TLS_LDO_32 and uses the symbol
+// CHECK-NEXT: Relocation 0x00000016
+// CHECK-NEXT: (('r_offset', 0x00000083)
+// CHECK-NEXT:  ('r_sym', 0x00000004)
+// CHECK-NEXT:  ('r_type', 0x00000020)
+// CHECK-NEXT: ),
+// Relocation 23 ($bar) is of type R_386_32 and uses the section
+// CHECK-NEXT: Relocation 0x00000017
+// CHECK-NEXT: (('r_offset',
+// CHECK-NEXT:  ('r_sym',
+// CHECK-NEXT:  ('r_type', 0x00000001)
+// CHECK-NEXT: ),
+
+        .text
+bar:
+	leal	.Lfoo@GOTOFF(%ebx), %eax
+
+        .global bar2
+bar2:
+	calll	bar2@PLT
+	addl	$_GLOBAL_OFFSET_TABLE_, %ebx
+	movb	bar3@GOTOFF(%ebx), %al
+
+	.type	bar3,@object
+	.local	bar3
+	.comm	bar3,1,1
+
+        movl	bar2j@GOT(%eax), %eax
+
+        leal foo@TLSGD(, %ebx,1), %eax
+        movl $foo@TPOFF, %edx
+        movl foo@INDNTPOFF, %ecx
+        addl foo@NTPOFF(%eax), %eax
+        addl foo@GOTNTPOFF(%ebx), %ecx
+        leal foo@TLSLDM(%ebx), %eax
+        leal foo@DTPOFF(%eax), %edx
+        calll 4096
+        movl zed@GOT(%eax), %eax
+        movl zed@GOTOFF(%eax), %eax
+        movl zed@INDNTPOFF(%eax), %eax
+        movl zed@NTPOFF(%eax), %eax
+        movl zed@GOTNTPOFF(%eax), %eax
+        call zed@PLT
+        movl zed@TLSGD(%eax), %eax
+        movl zed@TLSLDM(%eax), %eax
+        movl zed@TPOFF(%eax), %eax
+        movl zed@DTPOFF(%eax), %eax
+        pushl $bar
+
+        .section        zedsec,"awT",@progbits
+zed:
+        .long 0
+
+        .section	.rodata.str1.16,"aMS",@progbits,1
+.Lfoo:
+	.asciz	 "bool llvm::llvm_start_multithreaded()"
diff --git a/final/test/MC/ELF/relocation-pc.s b/final/test/MC/ELF/relocation-pc.s
new file mode 100644
index 00000000000..58c5f410b51
--- /dev/null
+++ b/final/test/MC/ELF/relocation-pc.s
@@ -0,0 +1,33 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// Test that we produce the correct relocation.
+
+	loope	0                 # R_X86_64_PC8
+	jmp	-256              # R_X86_64_PC32
+
+// CHECK:      # Section 0x00000007
+// CHECK-NEXT: (('sh_name', 0x0000002c) # '.rela.text'
+// CHECK-NEXT:  ('sh_type', 0x00000004)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x000000e8)
+// CHECK-NEXT:  ('sh_size', 0x00000030)
+// CHECK-NEXT:  ('sh_link', 0x00000005)
+// CHECK-NEXT:  ('sh_info', 0x00000001)
+// CHECK-NEXT:  ('sh_addralign', 0x00000008)
+// CHECK-NEXT:  ('sh_entsize', 0x00000018)
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:   # Relocation 0x00000000
+// CHECK-NEXT:   (('r_offset', 0x00000001)
+// CHECK-NEXT:    ('r_sym', 0x00000000)
+// CHECK-NEXT:    ('r_type', 0x0000000f)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:   # Relocation 0x00000001
+// CHECK-NEXT:   (('r_offset', 0x00000003)
+// CHECK-NEXT:    ('r_sym', 0x00000000)
+// CHECK-NEXT:    ('r_type', 0x00000002)
+// CHECK-NEXT:    ('r_addend', 0x00000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/relocation.s b/final/test/MC/ELF/relocation.s
new file mode 100644
index 00000000000..dabe721d90b
--- /dev/null
+++ b/final/test/MC/ELF/relocation.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// Test that we produce the correct relocation.
+
+bar:
+        movl	$bar, %edx        # R_X86_64_32
+        movq	$bar, %rdx        # R_X86_64_32S
+        movq	$bar, bar(%rip)   # R_X86_64_32S
+        movl	bar, %edx         # R_X86_64_32S
+        movq	bar, %rdx         # R_X86_64_32S
+.long bar                         # R_X86_64_32
+        leaq	foo@GOTTPOFF(%rip), %rax # R_X86_64_GOTTPOFF
+        leaq	foo@TLSGD(%rip), %rax    # R_X86_64_TLSGD
+        leaq	foo@TPOFF(%rax), %rax    # R_X86_64_TPOFF32
+        leaq	foo@TLSLD(%rip), %rdi    # R_X86_64_TLSLD
+        leaq	foo@dtpoff(%rax), %rcx   # R_X86_64_DTPOFF32
+        pushq    $bar
+        movq	foo(%rip), %rdx
+        leaq    foo-bar(%r14),%r14
+
+
+// CHECK:  # Section 0x00000001
+// CHECK: (('sh_name', 0x00000001) # '.text'
+
+// CHECK:   # Symbol 0x00000002
+// CHECK: (('st_name', 0x00000000) # ''
+// CHECK:  ('st_bind', 0x00000000)
+// CHECK:  ('st_type', 0x00000003)
+// CHECK:  ('st_other', 0x00000000)
+// CHECK:  ('st_shndx', 0x00000001)
+
+// CHECK: # Relocation 0x00000000
+// CHECK-NEXT:  (('r_offset', 0x00000001)
+// CHECK-NEXT:   ('r_sym', 0x00000002)
+// CHECK-NEXT:   ('r_type', 0x0000000a)
+// CHECK-NEXT:   ('r_addend',
+
+// CHECK: # Relocation 0x00000001
+// CHECK-NEXT:  (('r_offset', 0x00000008)
+// CHECK-NEXT:   ('r_sym', 0x00000002)
+// CHECK-NEXT:   ('r_type', 0x0000000b)
+// CHECK-NEXT:   ('r_addend',
+
+// CHECK: # Relocation 0x00000002
+// CHECK-NEXT:  (('r_offset', 0x00000013)
+// CHECK-NEXT:   ('r_sym', 0x00000002)
+// CHECK-NEXT:   ('r_type', 0x0000000b)
+// CHECK-NEXT:   ('r_addend',
+
+// CHECK: # Relocation 0x00000003
+// CHECK-NEXT:  (('r_offset', 0x0000001a)
+// CHECK-NEXT:   ('r_sym', 0x00000002)
+// CHECK-NEXT:   ('r_type', 0x0000000b)
+// CHECK-NEXT:   ('r_addend',
+
+// CHECK: # Relocation 0x00000004
+// CHECK-NEXT:  (('r_offset', 0x00000022)
+// CHECK-NEXT:   ('r_sym', 0x00000002)
+// CHECK-NEXT:   ('r_type', 0x0000000b)
+// CHECK-NEXT:   ('r_addend',
+
+// CHECK: # Relocation 0x00000005
+// CHECK-NEXT:  (('r_offset', 0x00000026)
+// CHECK-NEXT:   ('r_sym', 0x00000002)
+// CHECK-NEXT:   ('r_type', 0x0000000a)
+// CHECK-NEXT:   ('r_addend',
+
+// CHECK: # Relocation 0x00000006
+// CHECK-NEXT:  (('r_offset', 0x0000002d)
+// CHECK-NEXT:   ('r_sym', 0x00000006)
+// CHECK-NEXT:   ('r_type', 0x00000016)
+// CHECK-NEXT:   ('r_addend', 0xfffffffc)
+
+// CHECK:  # Relocation 0x00000007
+// CHECK-NEXT:  (('r_offset', 0x00000034)
+// CHECK-NEXT:   ('r_sym', 0x00000006)
+// CHECK-NEXT:   ('r_type', 0x00000013)
+// CHECK-NEXT:   ('r_addend', 0xfffffffc)
+
+// CHECK:  # Relocation 0x00000008
+// CHECK-NEXT:  (('r_offset', 0x0000003b)
+// CHECK-NEXT:   ('r_sym', 0x00000006)
+// CHECK-NEXT:   ('r_type', 0x00000017)
+// CHECK-NEXT:   ('r_addend', 0x00000000)
+
+// CHECK:  # Relocation 0x00000009
+// CHECK-NEXT:  (('r_offset', 0x00000042)
+// CHECK-NEXT:   ('r_sym', 0x00000006)
+// CHECK-NEXT:   ('r_type', 0x00000014)
+// CHECK-NEXT:   ('r_addend', 0xfffffffc)
+
+// CHECK:  # Relocation 0x0000000a
+// CHECK-NEXT:  (('r_offset', 0x00000049)
+// CHECK-NEXT:   ('r_sym', 0x00000006)
+// CHECK-NEXT:   ('r_type', 0x00000015)
+// CHECK-NEXT:   ('r_addend', 0x00000000)
+
+// CHECK: # Relocation 0x0000000b
+// CHECK-NEXT:  (('r_offset', 0x0000004e)
+// CHECK-NEXT:   ('r_sym', 0x00000002)
+// CHECK-NEXT:   ('r_type', 0x0000000b)
+// CHECK-NEXT:   ('r_addend', 0x00000000)
+
+// CHECK: # Relocation 0x0000000c
+// CHECK-NEXT: (('r_offset', 0x00000055)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x00000002)
+// CHECK-NEXT:  ('r_addend', 0xfffffffc)
+
+// CHECK: # Relocation 0x0000000d
+// CHECK-NEXT: (('r_offset', 0x0000005c)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x00000002)
+// CHECK-NEXT:  ('r_addend', 0x0000005c)
diff --git a/final/test/MC/ELF/rename.s b/final/test/MC/ELF/rename.s
new file mode 100644
index 00000000000..36065603e78
--- /dev/null
+++ b/final/test/MC/ELF/rename.s
@@ -0,0 +1,46 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// When doing a rename, all the checks for where the relocation should go
+// should be performed with the original symbol. Only if we decide to relocate
+// with the symbol we should then use the renamed one.
+
+// This is a regression test for a bug where we used bar5@@@zed when deciding
+// if we should relocate with the symbol or with the section and we would then
+// not produce a relocation with .text.
+
+defined1:
+defined3:
+        .symver defined3, bar5@@@zed
+        .long defined3
+
+        .global defined1
+
+// Section 1 is .text
+// CHECK:      # Section 0x00000001
+// CHECK-NEXT: (('sh_name', 0x00000001) # '.text'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000006)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000040)
+// CHECK-NEXT:  ('sh_size', 0x00000004)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000004)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+
+// Symbol 2 is section 1
+// CHECK:      # Symbol 0x00000002
+// CHECK-NEXT: (('st_name', 0x00000000) # ''
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000003)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+
+// The relocation uses symbol 2
+// CHECK:      # Relocation 0x00000000
+// CHECK-NEXT: (('r_offset', 0x00000000)
+// CHECK-NEXT:  ('r_sym', 0x00000002)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
diff --git a/final/test/MC/ELF/section-quoting.s b/final/test/MC/ELF/section-quoting.s
new file mode 100644
index 00000000000..3751e722952
--- /dev/null
+++ b/final/test/MC/ELF/section-quoting.s
@@ -0,0 +1,10 @@
+// RUN: llvm-mc -triple x86_64-pc-linux-gnu %s -o - | FileCheck %s
+
+// Test that we handle the strings like gas
+.section bar-"foo"
+.section "foo"
+.section "foo bar"
+
+// CHECK: .section "bar-\"foo\""
+// CHECK: .section foo
+// CHECK: .section "foo bar"
diff --git a/final/test/MC/ELF/section.s b/final/test/MC/ELF/section.s
new file mode 100644
index 00000000000..861dc4f057f
--- /dev/null
+++ b/final/test/MC/ELF/section.s
@@ -0,0 +1,110 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that these names are accepted.
+
+.section	.note.GNU-stack,"",@progbits
+.section	.note.GNU-stack2,"",%progbits
+.section	.note.GNU-,"",@progbits
+.section	-.note.GNU,"",@progbits
+
+// CHECK: ('sh_name', 0x00000012) # '.note.GNU-stack'
+// CHECK: ('sh_name', 0x00000022) # '.note.GNU-stack2'
+// CHECK: ('sh_name', 0x00000033) # '.note.GNU-'
+// CHECK: ('sh_name', 0x0000003e) # '-.note.GNU'
+
+// Test that the defaults are used
+
+.section	.init
+.section	.fini
+.section	.rodata
+.section	zed, ""
+
+// CHECK:      (('sh_name', 0x00000049) # '.init'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000006)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000050)
+// CHECK-NEXT:  ('sh_size', 0x00000000)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000001)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Section 0x0000000b
+// CHECK-NEXT: (('sh_name', 0x0000004f) # '.fini'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000006)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000050)
+// CHECK-NEXT:  ('sh_size', 0x00000000)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000001)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Section 0x0000000c
+// CHECK-NEXT: (('sh_name', 0x00000055) # '.rodata'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000002)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000050)
+// CHECK-NEXT:  ('sh_size', 0x00000000)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000001)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Section 0x0000000d
+// CHECK-NEXT: (('sh_name', 0x0000005d) # 'zed'
+// CHECK-NEXT:  ('sh_type', 0x00000001)
+// CHECK-NEXT:  ('sh_flags', 0x00000000)
+// CHECK-NEXT:  ('sh_addr', 0x00000000)
+// CHECK-NEXT:  ('sh_offset', 0x00000050)
+// CHECK-NEXT:  ('sh_size', 0x00000000)
+// CHECK-NEXT:  ('sh_link', 0x00000000)
+// CHECK-NEXT:  ('sh_info', 0x00000000)
+// CHECK-NEXT:  ('sh_addralign', 0x00000001)
+// CHECK-NEXT:  ('sh_entsize', 0x00000000)
+// CHECK-NEXT: ),
+
+.section	.note.test,"",@note
+// CHECK:       (('sh_name', 0x00000061) # '.note.test'
+// CHECK-NEXT:   ('sh_type', 0x00000007)
+// CHECK-NEXT:   ('sh_flags', 0x00000000)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000050)
+// CHECK-NEXT:   ('sh_size', 0x00000000)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000001)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ),
+
+// Test that we can parse these
+foo:
+bar:
+.section        .text.foo,"axG",@progbits,foo,comdat
+.section        .text.bar,"axMG",@progbits,42,bar,comdat
+
+// Test that the default values are not used
+
+.section .eh_frame,"a",@unwind
+
+// CHECK:       (('sh_name', 0x00000080) # '.eh_frame'
+// CHECK-NEXT:   ('sh_type', 0x70000001)
+// CHECK-NEXT:   ('sh_flags', 0x00000002)
+// CHECK-NEXT:   ('sh_addr', 0x00000000)
+// CHECK-NEXT:   ('sh_offset', 0x00000050)
+// CHECK-NEXT:   ('sh_size', 0x00000000)
+// CHECK-NEXT:   ('sh_link', 0x00000000)
+// CHECK-NEXT:   ('sh_info', 0x00000000)
+// CHECK-NEXT:   ('sh_addralign', 0x00000001)
+// CHECK-NEXT:   ('sh_entsize', 0x00000000)
+// CHECK-NEXT:  ),
+
+// Test that we handle the strings like gas
+.section bar-"foo"
+.section "foo"
+
+// CHECK: ('sh_name', 0x0000008a) # 'bar-"foo"'
+// CHECK: ('sh_name', 0x00000094) # 'foo'
diff --git a/final/test/MC/ELF/set.s b/final/test/MC/ELF/set.s
new file mode 100644
index 00000000000..69d6c910636
--- /dev/null
+++ b/final/test/MC/ELF/set.s
@@ -0,0 +1,34 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck  %s
+
+// Test that we emit the correct value.
+
+.set kernbase,0xffffffff80000000
+
+// CHECK:         (('st_name', 0x00000001) # 'kernbase'
+// CHECK-NEXT:     ('st_bind', 0x00000000)
+// CHECK-NEXT:     ('st_type', 0x00000000)
+// CHECK-NEXT:     ('st_other', 0x00000000)
+// CHECK-NEXT:     ('st_shndx', 0x0000fff1)
+// CHECK-NEXT:     ('st_value', 0xffffffff80000000)
+// CHECK-NEXT:     ('st_size', 0x0000000000000000)
+// CHECK-NEXT:    ),
+
+// Test that we accept .set of a symbol after it has been used in a statement.
+
+        jmp foo
+        .set foo, bar
+
+// or a .quad
+
+        .quad	foo2
+	.set	foo2,bar2
+
+// Test that there is an undefined reference to bar
+// CHECK:      (('st_name', 0x0000000a) # 'bar'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/sleb.s b/final/test/MC/ELF/sleb.s
new file mode 100644
index 00000000000..00e5b4bf282
--- /dev/null
+++ b/final/test/MC/ELF/sleb.s
@@ -0,0 +1,29 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_64 %s
+// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_64 %s
+
+	.text
+foo:
+	.sleb128	0
+	.sleb128	1
+	.sleb128	-1
+	.sleb128	63
+	.sleb128	-64
+
+	.sleb128	64
+	.sleb128	-65
+
+	.sleb128	8191
+	.sleb128        -8192
+
+	.sleb128        8193
+
+// ELF_32: ('sh_name', 0x00000001) # '.text'
+// ELF_32: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
+// ELF_64: ('sh_name', 0x00000001) # '.text'
+// ELF_64: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
+// MACHO_32: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// MACHO_32: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
+// MACHO_64: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// MACHO_64: ('_section_data', '00017f3f 40c000bf 7fff3f80 4081c000')
diff --git a/final/test/MC/ELF/symref.s b/final/test/MC/ELF/symref.s
new file mode 100644
index 00000000000..b99e71b869b
--- /dev/null
+++ b/final/test/MC/ELF/symref.s
@@ -0,0 +1,165 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+defined1:
+defined2:
+defined3:
+        .symver defined1, bar1@zed
+        .symver undefined1, bar2@zed
+
+        .symver defined2, bar3@@zed
+
+        .symver defined3, bar5@@@zed
+        .symver undefined3, bar6@@@zed
+
+        .long defined1
+        .long undefined1
+        .long defined2
+        .long defined3
+        .long undefined3
+
+        .global global1
+        .symver global1, g1@@zed
+global1:
+
+
+// CHECK:      # Symbol 0x00000001
+// CHECK-NEXT: (('st_name', 0x00000013) # 'bar1@zed'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000002
+// CHECK-NEXT: (('st_name', 0x00000025) # 'bar3@@zed'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000003
+// CHECK-NEXT: (('st_name', 0x0000002f) # 'bar5@@zed'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000004
+// CHECK-NEXT: (('st_name', 0x00000001) # 'defined1'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000005
+// CHECK-NEXT: (('st_name', 0x0000000a) # 'defined2'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000006
+// CHECK-NEXT: (('st_name', 0x00000000) # ''
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000003)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000007
+// CHECK-NEXT: (('st_name', 0x00000000) # ''
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000003)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000002)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000008
+// CHECK-NEXT: (('st_name', 0x00000000) # ''
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000003)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000003)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000009
+// CHECK-NEXT: (('st_name', 0x0000004a) # 'g1@@zed'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000014)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x0000000a
+// CHECK-NEXT: (('st_name', 0x00000042) # 'global1'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000014)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x0000000b
+// CHECK-NEXT: (('st_name', 0x0000001c) # 'bar2@zed'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x0000000c
+// CHECK-NEXT: (('st_name', 0x00000039) # 'bar6@zed'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT:])
+
+// CHECK:      # Relocation 0x00000000
+// CHECK-NEXT: (('r_offset', 0x00000000)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Relocation 0x00000001
+// CHECK-NEXT: (('r_offset', 0x00000004)
+// CHECK-NEXT:  ('r_sym', 0x0000000b)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Relocation 0x00000002
+// CHECK-NEXT: (('r_offset', 0x00000008)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Relocation 0x00000003
+// CHECK-NEXT: (('r_offset', 0x0000000c)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Relocation 0x00000004
+// CHECK-NEXT: (('r_offset', 0x00000010)
+// CHECK-NEXT:  ('r_sym', 0x0000000c)
+// CHECK-NEXT:  ('r_type', 0x0000000a)
+// CHECK-NEXT:  ('r_addend', 0x00000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT:])
diff --git a/final/test/MC/ELF/tls-i386.s b/final/test/MC/ELF/tls-i386.s
new file mode 100644
index 00000000000..c754121cd69
--- /dev/null
+++ b/final/test/MC/ELF/tls-i386.s
@@ -0,0 +1,74 @@
+// RUN: llvm-mc -filetype=obj -triple i386-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that all symbols are of type STT_TLS.
+
+        movl    foo1@NTPOFF(%eax), %eax
+        movl    foo2@GOTNTPOFF(%eax), %eax
+        movl    foo3@TLSGD(%eax), %eax
+        movl    foo4@TLSLDM(%eax), %eax
+        movl    foo5@TPOFF(%eax), %eax
+        movl    foo6@DTPOFF(%eax), %eax
+        movl    foo7@INDNTPOFF, %eax
+
+// CHECK:       (('st_name', 0x00000001) # 'foo1'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000006
+// CHECK-NEXT:  (('st_name', 0x00000006) # 'foo2'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000007
+// CHECK-NEXT:  (('st_name', 0x0000000b) # 'foo3'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000008
+// CHECK-NEXT:  (('st_name', 0x00000010) # 'foo4'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000009
+// CHECK-NEXT:  (('st_name', 0x00000015) # 'foo5'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x0000000a
+// CHECK-NEXT:  (('st_name', 0x0000001a) # 'foo6'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x0000000b
+// CHECK-NEXT:  (('st_name', 0x0000001f) # 'foo7'
+// CHECK-NEXT:   ('st_value', 0x00000000)
+// CHECK-NEXT:   ('st_size', 0x00000000)
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ),
diff --git a/final/test/MC/ELF/tls.s b/final/test/MC/ELF/tls.s
new file mode 100644
index 00000000000..2517a5bddc1
--- /dev/null
+++ b/final/test/MC/ELF/tls.s
@@ -0,0 +1,48 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that all symbols are of type STT_TLS.
+
+	leaq	foo1@TLSGD(%rip), %rdi
+        leaq    foo2@GOTTPOFF(%rip), %rdi
+        leaq    foo3@TLSLD(%rip), %rdi
+
+	.section	.zed,"awT",@progbits
+foobar:
+	.long	43
+
+// CHECK:      (('st_name', 0x00000010) # 'foobar'
+// CHECK-NEXT:  ('st_bind', 0x00000000)
+// CHECK-NEXT:  ('st_type', 0x00000006)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000004)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+
+// CHECK:       # Symbol 0x00000007
+// CHECK-NEXT:  (('st_name', 0x00000001) # 'foo1'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000008
+// CHECK-NEXT:  (('st_name', 0x00000006) # 'foo2'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000009
+// CHECK-NEXT:  (('st_name', 0x0000000b) # 'foo3'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000006)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
diff --git a/final/test/MC/ELF/type.s b/final/test/MC/ELF/type.s
new file mode 100644
index 00000000000..4b98c02e97d
--- /dev/null
+++ b/final/test/MC/ELF/type.s
@@ -0,0 +1,32 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that both % and @ are accepted.
+        .global foo
+        .type foo,%function
+foo:
+
+        .global bar
+        .type bar,@object
+bar:
+
+// Test that gnu_unique_object is accepted.
+        .type zed,@gnu_unique_object
+
+// CHECK:      # Symbol 0x00000004
+// CHECK-NEXT: (('st_name', 0x00000005) # 'bar'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000001)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000005
+// CHECK-NEXT: (('st_name', 0x00000001) # 'foo'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000002)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000001)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/uleb.s b/final/test/MC/ELF/uleb.s
new file mode 100644
index 00000000000..1e4734bcafc
--- /dev/null
+++ b/final/test/MC/ELF/uleb.s
@@ -0,0 +1,22 @@
+// RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck -check-prefix=ELF_64 %s
+// RUN: llvm-mc -filetype=obj -triple i386-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_32 %s
+// RUN: llvm-mc -filetype=obj -triple x86_64-apple-darwin9 %s -o - | macho-dump  --dump-section-data | FileCheck -check-prefix=MACHO_64 %s
+
+	.text
+foo:
+	.uleb128	0
+	.uleb128	1
+	.uleb128	127
+	.uleb128	128
+	.uleb128	16383
+	.uleb128	16384
+
+// ELF_32: ('sh_name', 0x00000001) # '.text'
+// ELF_32: ('_section_data', '00017f80 01ff7f80 8001')
+// ELF_64: ('sh_name', 0x00000001) # '.text'
+// ELF_64: ('_section_data', '00017f80 01ff7f80 8001')
+// MACHO_32: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// MACHO_32: ('_section_data', '00017f80 01ff7f80 8001')
+// MACHO_64: ('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// MACHO_64: ('_section_data', '00017f80 01ff7f80 8001')
diff --git a/final/test/MC/ELF/undef.s b/final/test/MC/ELF/undef.s
new file mode 100644
index 00000000000..fc3a2d23be3
--- /dev/null
+++ b/final/test/MC/ELF/undef.s
@@ -0,0 +1,46 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test which symbols should be in the symbol table
+
+        .long	.Lsym1
+.Lsym2:
+.Lsym3:
+.Lsym4 = .Lsym2 - .Lsym3
+        .long	.Lsym4
+
+	.type	.Lsym5,@object
+        .type   sym6,@object
+        .long sym6
+
+	.section	.rodata.str1.1,"aMS",@progbits,1
+.Lsym7:
+.Lsym8:
+
+        .text
+        movsd   .Lsym8(%rip), %xmm1
+
+// CHECK:      ('_symbols', [
+// CHECK-NEXT:  # Symbol 0x00000000
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK:       # Symbol 0x00000001
+// CHECK-NEXT:  (('st_name', 0x0000000d) # '.Lsym8'
+// CHECK:       # Symbol 0x00000002
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK:       # Symbol 0x00000003
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK:       # Symbol 0x00000004
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK:       # Symbol 0x00000005
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK:       # Symbol 0x00000006
+// CHECK-NEXT:  (('st_name', 0x00000001) # '.Lsym1'
+// CHECK:       # Symbol 0x00000007
+// CHECK-NEXT:  (('st_name', 0x00000008) # 'sym6'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000001)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
diff --git a/final/test/MC/ELF/undef2.s b/final/test/MC/ELF/undef2.s
new file mode 100644
index 00000000000..9544fbc42ac
--- /dev/null
+++ b/final/test/MC/ELF/undef2.s
@@ -0,0 +1,10 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that this produces an undefined reference to .Lfoo
+
+        je	.Lfoo
+
+// CHECK: ('_symbols', [
+// CHECK:      (('st_name', 0x00000001) # '.Lfoo'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK: (('sh_name', 0x00000024) # '.strtab'
diff --git a/final/test/MC/ELF/weak-relocation.s b/final/test/MC/ELF/weak-relocation.s
new file mode 100644
index 00000000000..ef331d7e6da
--- /dev/null
+++ b/final/test/MC/ELF/weak-relocation.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that weak symbols always produce relocations
+
+	.weak	foo
+foo:
+bar:
+        call    foo
+
+//CHECK:        # Relocation 0x00000000
+//CHECK-NEXT:   (('r_offset', 0x00000001)
+//CHECK-NEXT:    ('r_sym', 0x00000005)
+//CHECK-NEXT:    ('r_type', 0x00000002)
+//CHECK-NEXT:    ('r_addend', 0xfffffffc)
+//CHECK-NEXT:   ),
diff --git a/final/test/MC/ELF/weak.s b/final/test/MC/ELF/weak.s
new file mode 100644
index 00000000000..67e9b188eb0
--- /dev/null
+++ b/final/test/MC/ELF/weak.s
@@ -0,0 +1,30 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that this produces a weak undefined symbol.
+
+	.weak	foo
+        .long   foo
+
+// And that bar is after all local symbols and has non zero value.
+        .weak bar
+bar:
+
+//CHECK:        # Symbol 0x00000004
+//CHECK-NEXT:   (('st_name', 0x00000005) # 'bar'
+//CHECK-NEXT:    ('st_bind', 0x00000002)
+//CHECK-NEXT:    ('st_type', 0x00000000)
+//CHECK-NEXT:    ('st_other', 0x00000000)
+//CHECK-NEXT:    ('st_shndx', 0x00000001)
+//CHECK-NEXT:    ('st_value', 0x0000000000000004)
+//CHECK-NEXT:    ('st_size', 0x0000000000000000)
+//CHECK-NEXT:   ),
+//CHECK-NEXT:   # Symbol 0x00000005
+//CHECK:       (('st_name', 0x00000001) # 'foo'
+//CHECK-NEXT:   ('st_bind', 0x00000002)
+//CHECK-NEXT:   ('st_type', 0x00000000)
+//CHECK-NEXT:   ('st_other', 0x00000000)
+//CHECK-NEXT:   ('st_shndx', 0x00000000)
+//CHECK-NEXT:   ('st_value', 0x0000000000000000)
+//CHECK-NEXT:   ('st_size', 0x0000000000000000)
+//CHECK-NEXT:  ),
+//CHECK-NEXT: ])
diff --git a/final/test/MC/ELF/weakref-plt.s b/final/test/MC/ELF/weakref-plt.s
new file mode 100644
index 00000000000..26ba3f6df54
--- /dev/null
+++ b/final/test/MC/ELF/weakref-plt.s
@@ -0,0 +1,8 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+	.weakref	bar,foo
+	call	bar@PLT
+
+// CHECK:      # Symbol 0x00000005
+// CHECK-NEXT: (('st_name', 0x00000001) # 'foo'
+// CHECK-NEXT:  ('st_bind', 0x00000002)
diff --git a/final/test/MC/ELF/weakref-reloc.s b/final/test/MC/ELF/weakref-reloc.s
new file mode 100644
index 00000000000..c7cd7649d6f
--- /dev/null
+++ b/final/test/MC/ELF/weakref-reloc.s
@@ -0,0 +1,49 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// Test that the relocations point to the correct symbols. We used to get the
+// symbol index wrong for weakrefs when creating _GLOBAL_OFFSET_TABLE_.
+
+	.weakref	bar,foo
+        call    zed@PLT
+	call	bar
+
+// CHECK:      # Symbol 0x00000004
+// CHECK-NEXT: (('st_name', 0x00000009) # '_GLOBAL_OFFSET_TABLE_'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000005
+// CHECK-NEXT: (('st_name', 0x00000001) # 'foo'
+// CHECK-NEXT:  ('st_bind', 0x00000002)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Symbol 0x00000006
+// CHECK-NEXT: (('st_name', 0x00000005) # 'zed'
+// CHECK-NEXT:  ('st_bind', 0x00000001)
+// CHECK-NEXT:  ('st_type', 0x00000000)
+// CHECK-NEXT:  ('st_other', 0x00000000)
+// CHECK-NEXT:  ('st_shndx', 0x00000000)
+// CHECK-NEXT:  ('st_value', 0x0000000000000000)
+// CHECK-NEXT:  ('st_size', 0x0000000000000000)
+// CHECK-NEXT: ),
+
+// CHECK:      # Relocation 0x00000000
+// CHECK-NEXT: (('r_offset', 0x00000001)
+// CHECK-NEXT:  ('r_sym', 0x00000006)
+// CHECK-NEXT:  ('r_type', 0x00000004)
+// CHECK-NEXT:  ('r_addend', 0xfffffffc)
+// CHECK-NEXT: ),
+// CHECK-NEXT: # Relocation 0x00000001
+// CHECK-NEXT: (('r_offset', 0x00000006)
+// CHECK-NEXT:  ('r_sym', 0x00000005)
+// CHECK-NEXT:  ('r_type', 0x00000002)
+// CHECK-NEXT:  ('r_addend', 0xfffffffc)
+// CHECK-NEXT: ),
diff --git a/final/test/MC/ELF/weakref.s b/final/test/MC/ELF/weakref.s
new file mode 100644
index 00000000000..aea10d1d929
--- /dev/null
+++ b/final/test/MC/ELF/weakref.s
@@ -0,0 +1,234 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  | FileCheck %s
+
+// This is a long test that checks that the aliases created by weakref are
+// never in the symbol table and that the only case it causes a symbol to
+// be output as a weak undefined symbol is if that variable is not defined
+// in this file and all the references to it are done via the alias.
+
+        .weakref foo1, bar1
+
+        .weakref foo2, bar2
+        .long bar2
+
+        .weakref foo3, bar3
+        .long foo3
+
+        .weakref foo4, bar4
+        .long foo4
+        .long bar4
+
+        .weakref foo5, bar5
+        .long bar5
+        .long foo5
+
+bar6:
+        .weakref foo6, bar6
+
+bar7:
+        .weakref foo7, bar7
+        .long bar7
+
+bar8:
+        .weakref foo8, bar8
+        .long foo8
+
+bar9:
+        .weakref foo9, bar9
+        .long foo9
+        .long bar9
+
+bar10:
+        .global bar10
+        .weakref foo10, bar10
+        .long bar10
+        .long foo10
+
+bar11:
+        .global bar11
+        .weakref foo11, bar11
+
+bar12:
+        .global bar12
+        .weakref foo12, bar12
+        .long bar12
+
+bar13:
+        .global bar13
+        .weakref foo13, bar13
+        .long foo13
+
+bar14:
+        .global bar14
+        .weakref foo14, bar14
+        .long foo14
+        .long bar14
+
+bar15:
+        .global bar15
+        .weakref foo15, bar15
+        .long bar15
+        .long foo15
+
+// CHECK:       # Symbol 0x00000000
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000001
+// CHECK-NEXT:  (('st_name', 0x00000015) # 'bar6'
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000018)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000002
+// CHECK-NEXT:  (('st_name', 0x0000001a) # 'bar7'
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000018)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000003
+// CHECK-NEXT:  (('st_name', 0x0000001f) # 'bar8'
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x000000000000001c)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000004
+// CHECK-NEXT:  (('st_name', 0x00000024) # 'bar9'
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000020)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000005
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000003)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000006
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000003)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000002)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000007
+// CHECK-NEXT:  (('st_name', 0x00000000) # ''
+// CHECK-NEXT:   ('st_bind', 0x00000000)
+// CHECK-NEXT:   ('st_type', 0x00000003)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000003)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000008
+// CHECK-NEXT:  (('st_name', 0x00000029) # 'bar10'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000028)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000009
+// CHECK-NEXT:  (('st_name', 0x0000002f) # 'bar11'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000030)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x0000000a
+// CHECK-NEXT:  (('st_name', 0x00000035) # 'bar12'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000030)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x0000000b
+// CHECK-NEXT:  (('st_name', 0x0000003b) # 'bar13'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000034)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x0000000c
+// CHECK-NEXT:  (('st_name', 0x00000041) # 'bar14'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000038)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x0000000d
+// CHECK-NEXT:  (('st_name', 0x00000047) # 'bar15'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000001)
+// CHECK-NEXT:   ('st_value', 0x0000000000000040)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x0000000e
+// CHECK-NEXT:  (('st_name', 0x00000001) # 'bar2'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x0000000f
+// CHECK-NEXT:  (('st_name', 0x00000006) # 'bar3'
+// CHECK-NEXT:   ('st_bind', 0x00000002)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000010
+// CHECK-NEXT:  (('st_name', 0x0000000b) # 'bar4'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT:  # Symbol 0x00000011
+// CHECK-NEXT:  (('st_name', 0x00000010) # 'bar5'
+// CHECK-NEXT:   ('st_bind', 0x00000001)
+// CHECK-NEXT:   ('st_type', 0x00000000)
+// CHECK-NEXT:   ('st_other', 0x00000000)
+// CHECK-NEXT:   ('st_shndx', 0x00000000)
+// CHECK-NEXT:   ('st_value', 0x0000000000000000)
+// CHECK-NEXT:   ('st_size', 0x0000000000000000)
+// CHECK-NEXT:  ),
+// CHECK-NEXT: ])
diff --git a/final/test/MC/ELF/zero.s b/final/test/MC/ELF/zero.s
new file mode 100644
index 00000000000..adf21f8a6be
--- /dev/null
+++ b/final/test/MC/ELF/zero.s
@@ -0,0 +1,16 @@
+// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump  --dump-section-data | FileCheck %s
+
+.zero 4
+.zero 1,42
+
+// CHECK: ('sh_name', 0x00000001) # '.text'
+// CHECK: ('sh_type', 0x00000001)
+// CHECK: ('sh_flags', 0x00000006)
+// CHECK: ('sh_addr', 0x00000000)
+// CHECK: ('sh_offset', 0x00000040)
+// CHECK: ('sh_size', 0x00000005)
+// CHECK: ('sh_link', 0x00000000)
+// CHECK: ('sh_info', 0x00000000)
+// CHECK: ('sh_addralign', 0x00000004)
+// CHECK: ('sh_entsize', 0x00000000)
+// CHECK: ('_section_data', '00000000 2a')
diff --git a/final/test/MC/MBlaze/dg.exp b/final/test/MC/MBlaze/dg.exp
new file mode 100644
index 00000000000..0c4e78e88dc
--- /dev/null
+++ b/final/test/MC/MBlaze/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target MBlaze] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
+}
diff --git a/final/test/MC/MBlaze/mblaze_branch.s b/final/test/MC/MBlaze/mblaze_branch.s
new file mode 100644
index 00000000000..2ec43192660
--- /dev/null
+++ b/final/test/MC/MBlaze/mblaze_branch.s
@@ -0,0 +1,197 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to make sure that all of the TYPE-A instructions supported by
+# the Microblaze can be parsed by the assembly parser.
+
+# TYPE A:   OPCODE RD    RA    RB    FLAGS
+# BINARY:   000000 00000 00000 00000 00000000000
+
+# CHECK:    beq
+# BINARY:   100111 00000 00010 00011 00000000000
+# CHECK:    encoding: [0x9c,0x02,0x18,0x00]
+            beq     r2, r3
+
+# CHECK:    bge
+# BINARY:   100111 00101 00010 00011 00000000000
+# CHECK:    encoding: [0x9c,0xa2,0x18,0x00]
+            bge     r2, r3
+
+# CHECK:    bgt
+# BINARY:   100111 00100 00010 00011 00000000000
+# CHECK:    encoding: [0x9c,0x82,0x18,0x00]
+            bgt     r2, r3
+
+# CHECK:    ble
+# BINARY:   100111 00011 00010 00011 00000000000
+# CHECK:    encoding: [0x9c,0x62,0x18,0x00]
+            ble     r2, r3
+
+# CHECK:    blt
+# BINARY:   100111 00010 00010 00011 00000000000
+# CHECK:    encoding: [0x9c,0x42,0x18,0x00]
+            blt     r2, r3
+
+# CHECK:    bne
+# BINARY:   100111 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x9c,0x22,0x18,0x00]
+            bne     r2, r3
+
+# CHECK:    beqd
+# BINARY:   100111 10000 00010 00011 00000000000
+# CHECK:    encoding: [0x9e,0x02,0x18,0x00]
+            beqd    r2, r3
+
+# CHECK:    bged
+# BINARY:   100111 10101 00010 00011 00000000000
+# CHECK:    encoding: [0x9e,0xa2,0x18,0x00]
+            bged    r2, r3
+
+# CHECK:    bgtd
+# BINARY:   100111 10100 00010 00011 00000000000
+# CHECK:    encoding: [0x9e,0x82,0x18,0x00]
+            bgtd    r2, r3
+
+# CHECK:    bled
+# BINARY:   100111 10011 00010 00011 00000000000
+# CHECK:    encoding: [0x9e,0x62,0x18,0x00]
+            bled    r2, r3
+
+# CHECK:    bltd
+# BINARY:   100111 10010 00010 00011 00000000000
+# CHECK:    encoding: [0x9e,0x42,0x18,0x00]
+            bltd    r2, r3
+
+# CHECK:    bned
+# BINARY:   100111 10001 00010 00011 00000000000
+# CHECK:    encoding: [0x9e,0x22,0x18,0x00]
+            bned    r2, r3
+
+# CHECK:    br
+# BINARY:   100110 00000 00000 00011 00000000000
+# CHECK:    encoding: [0x98,0x00,0x18,0x00]
+            br      r3
+
+# CHECK:    bra
+# BINARY:   100110 00000 01000 00011 00000000000
+# CHECK:    encoding: [0x98,0x08,0x18,0x00]
+            bra     r3
+
+# CHECK:    brd
+# BINARY:   100110 00000 10000 00011 00000000000
+# CHECK:    encoding: [0x98,0x10,0x18,0x00]
+            brd     r3
+
+# CHECK:    brad
+# BINARY:   100110 00000 11000 00011 00000000000
+# CHECK:    encoding: [0x98,0x18,0x18,0x00]
+            brad    r3
+
+# CHECK:    brld
+# BINARY:   100110 01111 10100 00011 00000000000
+# CHECK:    encoding: [0x99,0xf4,0x18,0x00]
+            brld    r15, r3
+
+# CHECK:    brald
+# BINARY:   100110 01111 11100 00011 00000000000
+# CHECK:    encoding: [0x99,0xfc,0x18,0x00]
+            brald   r15, r3
+
+# CHECK:    brk
+# BINARY:   100110 01111 01100 00011 00000000000
+# CHECK:    encoding: [0x99,0xec,0x18,0x00]
+            brk     r15, r3
+
+# CHECK:    beqi
+# BINARY:   101111 00000 00010 0000000000000000
+# CHECK:    encoding: [0xbc,0x02,0x00,0x00]
+            beqi    r2, 0
+
+# CHECK:    bgei
+# BINARY:   101111 00101 00010 0000000000000000
+# CHECK:    encoding: [0xbc,0xa2,0x00,0x00]
+            bgei    r2, 0
+
+# CHECK:    bgti
+# BINARY:   101111 00100 00010 0000000000000000
+# CHECK:    encoding: [0xbc,0x82,0x00,0x00]
+            bgti    r2, 0
+
+# CHECK:    blei
+# BINARY:   101111 00011 00010 0000000000000000
+# CHECK:    encoding: [0xbc,0x62,0x00,0x00]
+            blei    r2, 0
+
+# CHECK:    blti
+# BINARY:   101111 00010 00010 0000000000000000
+# CHECK:    encoding: [0xbc,0x42,0x00,0x00]
+            blti    r2, 0
+
+# CHECK:    bnei
+# BINARY:   101111 00001 00010 0000000000000000
+# CHECK:    encoding: [0xbc,0x22,0x00,0x00]
+            bnei    r2, 0
+
+# CHECK:    beqid
+# BINARY:   101111 10000 00010 0000000000000000
+# CHECK:    encoding: [0xbe,0x02,0x00,0x00]
+            beqid   r2, 0
+
+# CHECK:    bgeid
+# BINARY:   101111 10101 00010 0000000000000000
+# CHECK:    encoding: [0xbe,0xa2,0x00,0x00]
+            bgeid   r2, 0
+
+# CHECK:    bgtid
+# BINARY:   101111 10100 00010 0000000000000000
+# CHECK:    encoding: [0xbe,0x82,0x00,0x00]
+            bgtid   r2, 0
+
+# CHECK:    bleid
+# BINARY:   101111 10011 00010 0000000000000000
+# CHECK:    encoding: [0xbe,0x62,0x00,0x00]
+            bleid   r2, 0
+
+# CHECK:    bltid
+# BINARY:   101111 10010 00010 0000000000000000
+# CHECK:    encoding: [0xbe,0x42,0x00,0x00]
+            bltid   r2, 0
+
+# CHECK:    bneid
+# BINARY:   101111 10001 00010 0000000000000000
+# CHECK:    encoding: [0xbe,0x22,0x00,0x00]
+            bneid   r2, 0
+
+# CHECK:    bri
+# BINARY:   101110 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb8,0x00,0x00,0x00]
+            bri     0
+
+# CHECK:    brai
+# BINARY:   101110 00000 01000 0000000000000000
+# CHECK:    encoding: [0xb8,0x08,0x00,0x00]
+            brai    0
+
+# CHECK:    brid
+# BINARY:   101110 00000 10000 0000000000000000
+# CHECK:    encoding: [0xb8,0x10,0x00,0x00]
+            brid    0
+
+# CHECK:    braid
+# BINARY:   101110 00000 11000 0000000000000000
+# CHECK:    encoding: [0xb8,0x18,0x00,0x00]
+            braid   0
+
+# CHECK:    brlid
+# BINARY:   101110 01111 10100 0000000000000000
+# CHECK:    encoding: [0xb9,0xf4,0x00,0x00]
+            brlid   r15, 0
+
+# CHECK:    bralid
+# BINARY:   101110 01111 11100 0000000000000000
+# CHECK:    encoding: [0xb9,0xfc,0x00,0x00]
+            bralid  r15, 0
+
+# CHECK:    brki
+# BINARY:   101110 01111 01100 0000000000000000
+# CHECK:    encoding: [0xb9,0xec,0x00,0x00]
+            brki    r15, 0
diff --git a/final/test/MC/MBlaze/mblaze_fpu.s b/final/test/MC/MBlaze/mblaze_fpu.s
new file mode 100644
index 00000000000..a3b68384851
--- /dev/null
+++ b/final/test/MC/MBlaze/mblaze_fpu.s
@@ -0,0 +1,77 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to ensure that all FPU instructions can be parsed by the
+# assembly parser correctly.
+
+# TYPE A:   OPCODE RD    RA    RB    FLAGS
+# BINARY:   011011 00000 00000 00000 00000000000
+
+# CHECK:    fadd
+# BINARY:   010110 00000 00001 00010 00000000000
+# CHECK:    encoding: [0x58,0x01,0x10,0x00]
+            fadd         r0, r1, r2
+
+# CHECK:    frsub
+# BINARY:   010110 00000 00001 00010 00010000000
+# CHECK:    encoding: [0x58,0x01,0x10,0x80]
+            frsub        r0, r1, r2
+
+# CHECK:    fmul
+# BINARY:   010110 00000 00001 00010 00100000000
+# CHECK:    encoding: [0x58,0x01,0x11,0x00]
+            fmul         r0, r1, r2
+
+# CHECK:    fdiv
+# BINARY:   010110 00000 00001 00010 00110000000
+# CHECK:    encoding: [0x58,0x01,0x11,0x80]
+            fdiv         r0, r1, r2
+
+# CHECK:    fsqrt
+# BINARY:   010110 00000 00001 00000 01110000000
+# CHECK:    encoding: [0x58,0x01,0x03,0x80]
+            fsqrt        r0, r1
+
+# CHECK:    fint
+# BINARY:   010110 00000 00001 00000 01100000000
+# CHECK:    encoding: [0x58,0x01,0x03,0x00]
+            fint         r0, r1
+
+# CHECK:    flt
+# BINARY:   010110 00000 00001 00000 01010000000
+# CHECK:    encoding: [0x58,0x01,0x02,0x80]
+            flt          r0, r1
+
+# CHECK:    fcmp.un
+# BINARY:   010110 00000 00001 00010 01000000000
+# CHECK:    encoding: [0x58,0x01,0x12,0x00]
+            fcmp.un     r0, r1, r2
+
+# CHECK:    fcmp.lt
+# BINARY:   010110 00000 00001 00010 01000010000
+# CHECK:    encoding: [0x58,0x01,0x12,0x10]
+            fcmp.lt     r0, r1, r2
+
+# CHECK:    fcmp.eq
+# BINARY:   010110 00000 00001 00010 01000100000
+# CHECK:    encoding: [0x58,0x01,0x12,0x20]
+            fcmp.eq     r0, r1, r2
+
+# CHECK:    fcmp.le
+# BINARY:   010110 00000 00001 00010 01000110000
+# CHECK:    encoding: [0x58,0x01,0x12,0x30]
+            fcmp.le     r0, r1, r2
+
+# CHECK:    fcmp.gt
+# BINARY:   010110 00000 00001 00010 01001000000
+# CHECK:    encoding: [0x58,0x01,0x12,0x40]
+            fcmp.gt     r0, r1, r2
+
+# CHECK:    fcmp.ne
+# BINARY:   010110 00000 00001 00010 01001010000
+# CHECK:    encoding: [0x58,0x01,0x12,0x50]
+            fcmp.ne     r0, r1, r2
+
+# CHECK:    fcmp.ge
+# BINARY:   010110 00000 00001 00010 01001100000
+# CHECK:    encoding: [0x58,0x01,0x12,0x60]
+            fcmp.ge     r0, r1, r2
diff --git a/final/test/MC/MBlaze/mblaze_fsl.s b/final/test/MC/MBlaze/mblaze_fsl.s
new file mode 100644
index 00000000000..d0a42b34991
--- /dev/null
+++ b/final/test/MC/MBlaze/mblaze_fsl.s
@@ -0,0 +1,568 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to ensure that all FSL immediate operands and FSL instructions
+# can be parsed by the assembly parser correctly.
+
+# TYPE F:   OPCODE RD           NCTAE        FSL
+# BINARY:   011011 00000 000000 00000 000000 0000
+
+# TYPE FD:  OPCODE RD          RB      NCTAE
+# BINARY:   011011 00000 00000 00000 0 00000 00000
+
+# TYPE FP:  OPCODE       RA      NCTA         FSL
+#           000000 00000 00000 1 0000 0000000 0000
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x00,0x00]
+            get         r0, rfsl0
+
+# CHECK:    nget
+# BINARY:   011011 00000 000000 10000 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x40,0x00]
+            nget        r0, rfsl0
+
+# CHECK:    cget
+# BINARY:   011011 00000 000000 01000 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x20,0x00]
+            cget        r0, rfsl0
+
+# CHECK:    ncget
+# BINARY:   011011 00000 000000 11000 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x60,0x00]
+            ncget       r0, rfsl0
+
+# CHECK:    tget
+# BINARY:   011011 00000 000000 00100 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x10,0x00]
+            tget        r0, rfsl0
+
+# CHECK:    tnget
+# BINARY:   011011 00000 000000 10100 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x50,0x00]
+            tnget       r0, rfsl0
+
+# CHECK:    tcget
+# BINARY:   011011 00000 000000 01100 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x30,0x00]
+            tcget       r0, rfsl0
+
+# CHECK:    tncget
+# BINARY:   011011 00000 000000 11100 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x70,0x00]
+            tncget      r0, rfsl0
+
+# CHECK:    aget
+# BINARY:   011011 00000 000000 00010 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x08,0x00]
+            aget        r0, rfsl0
+
+# CHECK:    naget
+# BINARY:   011011 00000 000000 10010 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x48,0x00]
+            naget       r0, rfsl0
+
+# CHECK:    caget
+# BINARY:   011011 00000 000000 01010 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x28,0x00]
+            caget       r0, rfsl0
+
+# CHECK:    ncaget
+# BINARY:   011011 00000 000000 11010 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x68,0x00]
+            ncaget      r0, rfsl0
+
+# CHECK:    taget
+# BINARY:   011011 00000 000000 00110 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x18,0x00]
+            taget       r0, rfsl0
+
+# CHECK:    tnaget
+# BINARY:   011011 00000 000000 10110 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x58,0x00]
+            tnaget      r0, rfsl0
+
+# CHECK:    tcaget
+# BINARY:   011011 00000 000000 01110 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x38,0x00]
+            tcaget      r0, rfsl0
+
+# CHECK:    tncaget
+# BINARY:   011011 00000 000000 11110 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x78,0x00]
+            tncaget     r0, rfsl0
+
+# CHECK:    eget
+# BINARY:   011011 00000 000000 00001 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x04,0x00]
+            eget        r0, rfsl0
+
+# CHECK:    neget
+# BINARY:   011011 00000 000000 10001 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x44,0x00]
+            neget       r0, rfsl0
+
+# CHECK:    ecget
+# BINARY:   011011 00000 000000 01001 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x24,0x00]
+            ecget       r0, rfsl0
+
+# CHECK:    necget
+# BINARY:   011011 00000 000000 11001 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x64,0x00]
+            necget      r0, rfsl0
+
+# CHECK:    teget
+# BINARY:   011011 00000 000000 00101 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x14,0x00]
+            teget       r0, rfsl0
+
+# CHECK:    tneget
+# BINARY:   011011 00000 000000 10101 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x54,0x00]
+            tneget      r0, rfsl0
+
+# CHECK:    tecget
+# BINARY:   011011 00000 000000 01101 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x34,0x00]
+            tecget      r0, rfsl0
+
+# CHECK:    tnecget
+# BINARY:   011011 00000 000000 11101 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x74,0x00]
+            tnecget     r0, rfsl0
+
+# CHECK:    eaget
+# BINARY:   011011 00000 000000 00011 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x0c,0x00]
+            eaget       r0, rfsl0
+
+# CHECK:    neaget
+# BINARY:   011011 00000 000000 10011 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x4c,0x00]
+            neaget      r0, rfsl0
+
+# CHECK:    ecaget
+# BINARY:   011011 00000 000000 01011 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x2c,0x00]
+            ecaget      r0, rfsl0
+
+# CHECK:    necaget
+# BINARY:   011011 00000 000000 11011 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x6c,0x00]
+            necaget     r0, rfsl0
+
+# CHECK:    teaget
+# BINARY:   011011 00000 000000 00111 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x1c,0x00]
+            teaget      r0, rfsl0
+
+# CHECK:    tneaget
+# BINARY:   011011 00000 000000 10111 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x5c,0x00]
+            tneaget     r0, rfsl0
+
+# CHECK:    tecaget
+# BINARY:   011011 00000 000000 01111 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x3c,0x00]
+            tecaget     r0, rfsl0
+
+# CHECK:    tnecaget
+# BINARY:   011011 00000 000000 11111 000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x7c,0x00]
+            tnecaget    r0, rfsl0
+
+# CHECK:    getd
+# BINARY:   010011 00000 00000 00001 0 00000 00000
+# CHECK:    encoding: [0x4c,0x00,0x08,0x00]
+            getd        r0, r1
+
+# CHECK:    ngetd
+# BINARY:   010011 00000 00000 00001 0 10000 00000
+# CHECK:    encoding: [0x4c,0x00,0x0a,0x00]
+            ngetd       r0, r1
+
+# CHECK:    cgetd
+# BINARY:   010011 00000 00000 00001 0 01000 00000
+# CHECK:    encoding: [0x4c,0x00,0x09,0x00]
+            cgetd       r0, r1
+
+# CHECK:    ncgetd
+# BINARY:   010011 00000 00000 00001 0 11000 00000
+# CHECK:    encoding: [0x4c,0x00,0x0b,0x00]
+            ncgetd      r0, r1
+
+# CHECK:    tgetd
+# BINARY:   010011 00000 00000 00001 0 00100 00000
+# CHECK:    encoding: [0x4c,0x00,0x08,0x80]
+            tgetd       r0, r1
+
+# CHECK:    tngetd
+# BINARY:   010011 00000 00000 00001 0 10100 00000
+# CHECK:    encoding: [0x4c,0x00,0x0a,0x80]
+            tngetd      r0, r1
+
+# CHECK:    tcgetd
+# BINARY:   010011 00000 00000 00001 0 01100 00000
+# CHECK:    encoding: [0x4c,0x00,0x09,0x80]
+            tcgetd      r0, r1
+
+# CHECK:    tncgetd
+# BINARY:   010011 00000 00000 00001 0 11100 00000
+# CHECK:    encoding: [0x4c,0x00,0x0b,0x80]
+            tncgetd     r0, r1
+
+# CHECK:    agetd
+# BINARY:   010011 00000 00000 00001 0 00010 00000
+# CHECK:    encoding: [0x4c,0x00,0x08,0x40]
+            agetd       r0, r1
+
+# CHECK:    nagetd
+# BINARY:   010011 00000 00000 00001 0 10010 00000
+# CHECK:    encoding: [0x4c,0x00,0x0a,0x40]
+            nagetd      r0, r1
+
+# CHECK:    cagetd
+# BINARY:   010011 00000 00000 00001 0 01010 00000
+# CHECK:    encoding: [0x4c,0x00,0x09,0x40]
+            cagetd     r0, r1
+
+# CHECK:    ncagetd
+# BINARY:   010011 00000 00000 00001 0 11010 00000
+# CHECK:    encoding: [0x4c,0x00,0x0b,0x40]
+            ncagetd     r0, r1
+
+# CHECK:    tagetd
+# BINARY:   010011 00000 00000 00001 0 00110 00000
+# CHECK:    encoding: [0x4c,0x00,0x08,0xc0]
+            tagetd      r0, r1
+
+# CHECK:    tnagetd
+# BINARY:   010011 00000 00000 00001 0 10110 00000
+# CHECK:    encoding: [0x4c,0x00,0x0a,0xc0]
+            tnagetd     r0, r1
+
+# CHECK:    tcagetd
+# BINARY:   010011 00000 00000 00001 0 01110 00000
+# CHECK:    encoding: [0x4c,0x00,0x09,0xc0]
+            tcagetd     r0, r1
+
+# CHECK:    tncagetd
+# BINARY:   010011 00000 00000 00001 0 11110 00000
+# CHECK:    encoding: [0x4c,0x00,0x0b,0xc0]
+            tncagetd    r0, r1
+
+# CHECK:    egetd
+# BINARY:   010011 00000 00000 00001 0 00001 00000
+# CHECK:    encoding: [0x4c,0x00,0x08,0x20]
+            egetd       r0, r1
+
+# CHECK:    negetd
+# BINARY:   010011 00000 00000 00001 0 10001 00000
+# CHECK:    encoding: [0x4c,0x00,0x0a,0x20]
+            negetd      r0, r1
+
+# CHECK:    ecgetd
+# BINARY:   010011 00000 00000 00001 0 01001 00000
+# CHECK:    encoding: [0x4c,0x00,0x09,0x20]
+            ecgetd      r0, r1
+
+# CHECK:    necgetd
+# BINARY:   010011 00000 00000 00001 0 11001 00000
+# CHECK:    encoding: [0x4c,0x00,0x0b,0x20]
+            necgetd     r0, r1
+
+# CHECK:    tegetd
+# BINARY:   010011 00000 00000 00001 0 00101 00000
+# CHECK:    encoding: [0x4c,0x00,0x08,0xa0]
+            tegetd      r0, r1
+
+# CHECK:    tnegetd
+# BINARY:   010011 00000 00000 00001 0 10101 00000
+# CHECK:    encoding: [0x4c,0x00,0x0a,0xa0]
+            tnegetd     r0, r1
+
+# CHECK:    tecgetd
+# BINARY:   010011 00000 00000 00001 0 01101 00000
+# CHECK:    encoding: [0x4c,0x00,0x09,0xa0]
+            tecgetd     r0, r1
+
+# CHECK:    tnecgetd
+# BINARY:   010011 00000 00000 00001 0 11101 00000
+# CHECK:    encoding: [0x4c,0x00,0x0b,0xa0]
+            tnecgetd    r0, r1
+
+# CHECK:    eagetd
+# BINARY:   010011 00000 00000 00001 0 00011 00000
+# CHECK:    encoding: [0x4c,0x00,0x08,0x60]
+            eagetd      r0, r1
+
+# CHECK:    neagetd
+# BINARY:   010011 00000 00000 00001 0 10011 00000
+# CHECK:    encoding: [0x4c,0x00,0x0a,0x60]
+            neagetd     r0, r1
+
+# CHECK:    ecagetd
+# BINARY:   010011 00000 00000 00001 0 01011 00000
+# CHECK:    encoding: [0x4c,0x00,0x09,0x60]
+            ecagetd     r0, r1
+
+# CHECK:    necagetd
+# BINARY:   010011 00000 00000 00001 0 11011 00000
+# CHECK:    encoding: [0x4c,0x00,0x0b,0x60]
+            necagetd    r0, r1
+
+# CHECK:    teagetd
+# BINARY:   010011 00000 00000 00001 0 00111 00000
+# CHECK:    encoding: [0x4c,0x00,0x08,0xe0]
+            teagetd     r0, r1
+
+# CHECK:    tneagetd
+# BINARY:   010011 00000 00000 00001 0 10111 00000
+# CHECK:    encoding: [0x4c,0x00,0x0a,0xe0]
+            tneagetd    r0, r1
+
+# CHECK:    tecagetd
+# BINARY:   010011 00000 00000 00001 0 01111 00000
+# CHECK:    encoding: [0x4c,0x00,0x09,0xe0]
+            tecagetd    r0, r1
+
+# CHECK:    tnecagetd
+# BINARY:   010011 00000 00000 00001 0 11111 00000
+# CHECK:    encoding: [0x4c,0x00,0x0b,0xe0]
+            tnecagetd   r0, r1
+
+# CHECK:    put
+# BINARY:   011011 00000 00000 1 0000 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x80,0x00]
+            put         r0, rfsl0
+
+# CHECK:    aput
+# BINARY:   011011 00000 00000 1 0001 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x88,0x00]
+            aput        r0, rfsl0
+
+# CHECK:    cput
+# BINARY:   011011 00000 00000 1 0100 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xa0,0x00]
+            cput        r0, rfsl0
+
+# CHECK:    caput
+# BINARY:   011011 00000 00000 1 0101 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xa8,0x00]
+            caput       r0, rfsl0
+
+# CHECK:    nput
+# BINARY:   011011 00000 00000 1 1000 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xc0,0x00]
+            nput        r0, rfsl0
+
+# CHECK:    naput
+# BINARY:   011011 00000 00000 1 1001 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xc8,0x00]
+            naput       r0, rfsl0
+
+# CHECK:    ncput
+# BINARY:   011011 00000 00000 1 1100 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xe0,0x00]
+            ncput       r0, rfsl0
+
+# CHECK:    ncaput
+# BINARY:   011011 00000 00000 1 1101 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xe8,0x00]
+            ncaput      r0, rfsl0
+
+# CHECK:    tput
+# BINARY:   011011 00000 00000 1 0010 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x90,0x00]
+            tput        rfsl0
+
+# CHECK:    taput
+# BINARY:   011011 00000 00000 1 0011 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0x98,0x00]
+            taput       rfsl0
+
+# CHECK:    tcput
+# BINARY:   011011 00000 00000 1 0110 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xb0,0x00]
+            tcput       rfsl0
+
+# CHECK:    tcaput
+# BINARY:   011011 00000 00000 1 0111 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xb8,0x00]
+            tcaput      rfsl0
+
+# CHECK:    tnput
+# BINARY:   011011 00000 00000 1 1010 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xd0,0x00]
+            tnput       rfsl0
+
+# CHECK:    tnaput
+# BINARY:   011011 00000 00000 1 1011 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xd8,0x00]
+            tnaput      rfsl0
+
+# CHECK:    tncput
+# BINARY:   011011 00000 00000 1 1110 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xf0,0x00]
+            tncput      rfsl0
+
+# CHECK:    tncaput
+# BINARY:   011011 00000 00000 1 1111 0000000 0000
+# CHECK:    encoding: [0x6c,0x00,0xf8,0x00]
+            tncaput     rfsl0
+
+# CHECK:    putd
+# BINARY:   010011 00000 00000 00001 1 0000 000000
+# CHECK:    encoding: [0x4c,0x00,0x0c,0x00]
+            putd        r0, r1
+
+# CHECK:    aputd
+# BINARY:   010011 00000 00000 00001 1 0001 000000
+# CHECK:    encoding: [0x4c,0x00,0x0c,0x40]
+            aputd       r0, r1
+
+# CHECK:    cputd
+# BINARY:   010011 00000 00000 00001 1 0100 000000
+# CHECK:    encoding: [0x4c,0x00,0x0d,0x00]
+            cputd       r0, r1
+
+# CHECK:    caputd
+# BINARY:   010011 00000 00000 00001 1 0101 000000
+# CHECK:    encoding: [0x4c,0x00,0x0d,0x40]
+            caputd      r0, r1
+
+# CHECK:    nputd
+# BINARY:   010011 00000 00000 00001 1 1000 000000
+# CHECK:    encoding: [0x4c,0x00,0x0e,0x00]
+            nputd       r0, r1
+
+# CHECK:    naputd
+# BINARY:   010011 00000 00000 00001 1 1001 000000
+# CHECK:    encoding: [0x4c,0x00,0x0e,0x40]
+            naputd      r0, r1
+
+# CHECK:    ncputd
+# BINARY:   010011 00000 00000 00001 1 1100 000000
+# CHECK:    encoding: [0x4c,0x00,0x0f,0x00]
+            ncputd      r0, r1
+
+# CHECK:    ncaputd
+# BINARY:   010011 00000 00000 00001 1 1101 000000
+# CHECK:    encoding: [0x4c,0x00,0x0f,0x40]
+            ncaputd     r0, r1
+
+# CHECK:    tputd
+# BINARY:   010011 00000 00000 00001 1 0010 000000
+# CHECK:    encoding: [0x4c,0x00,0x0c,0x80]
+            tputd       r1
+
+# CHECK:    taputd
+# BINARY:   010011 00000 00000 00001 1 0011 000000
+# CHECK:    encoding: [0x4c,0x00,0x0c,0xc0]
+            taputd      r1
+
+# CHECK:    tcputd
+# BINARY:   010011 00000 00000 00001 1 0110 000000
+# CHECK:    encoding: [0x4c,0x00,0x0d,0x80]
+            tcputd      r1
+
+# CHECK:    tcaputd
+# BINARY:   010011 00000 00000 00001 1 0111 000000
+# CHECK:    encoding: [0x4c,0x00,0x0d,0xc0]
+            tcaputd     r1
+
+# CHECK:    tnputd
+# BINARY:   010011 00000 00000 00001 1 1010 000000
+# CHECK:    encoding: [0x4c,0x00,0x0e,0x80]
+            tnputd      r1
+
+# CHECK:    tnaputd
+# BINARY:   010011 00000 00000 00001 1 1011 000000
+# CHECK:    encoding: [0x4c,0x00,0x0e,0xc0]
+            tnaputd     r1
+
+# CHECK:    tncputd
+# BINARY:   010011 00000 00000 00001 1 1110 000000
+# CHECK:    encoding: [0x4c,0x00,0x0f,0x80]
+            tncputd     r1
+
+# CHECK:    tncaputd
+# BINARY:   010011 00000 00000 00001 1 1111 000000
+# CHECK:    encoding: [0x4c,0x00,0x0f,0xc0]
+            tncaputd    r1
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 0001
+# CHECK:    encoding: [0x6c,0x00,0x00,0x01]
+            get     r0, rfsl1
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 0010
+# CHECK:    encoding: [0x6c,0x00,0x00,0x02]
+            get     r0, rfsl2
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 0011
+# CHECK:    encoding: [0x6c,0x00,0x00,0x03]
+            get     r0, rfsl3
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 0100
+# CHECK:    encoding: [0x6c,0x00,0x00,0x04]
+            get     r0, rfsl4
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 0101
+# CHECK:    encoding: [0x6c,0x00,0x00,0x05]
+            get     r0, rfsl5
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 0110
+# CHECK:    encoding: [0x6c,0x00,0x00,0x06]
+            get     r0, rfsl6
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 0111
+# CHECK:    encoding: [0x6c,0x00,0x00,0x07]
+            get     r0, rfsl7
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 1000
+# CHECK:    encoding: [0x6c,0x00,0x00,0x08]
+            get     r0, rfsl8
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 1001
+# CHECK:    encoding: [0x6c,0x00,0x00,0x09]
+            get     r0, rfsl9
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 1010
+# CHECK:    encoding: [0x6c,0x00,0x00,0x0a]
+            get     r0, rfsl10
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 1011
+# CHECK:    encoding: [0x6c,0x00,0x00,0x0b]
+            get     r0, rfsl11
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 1100
+# CHECK:    encoding: [0x6c,0x00,0x00,0x0c]
+            get     r0, rfsl12
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 1101
+# CHECK:    encoding: [0x6c,0x00,0x00,0x0d]
+            get     r0, rfsl13
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 1110
+# CHECK:    encoding: [0x6c,0x00,0x00,0x0e]
+            get     r0, rfsl14
+
+# CHECK:    get
+# BINARY:   011011 00000 000000 00000 000000 1111
+# CHECK:    encoding: [0x6c,0x00,0x00,0x0f]
+            get     r0, rfsl15
diff --git a/final/test/MC/MBlaze/mblaze_imm.s b/final/test/MC/MBlaze/mblaze_imm.s
new file mode 100644
index 00000000000..08b8a0f2681
--- /dev/null
+++ b/final/test/MC/MBlaze/mblaze_imm.s
@@ -0,0 +1,194 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# In the microblaze instruction set, any TYPE-B instruction with a
+# signed immediate value requiring more than 16-bits must be prefixed
+# with an IMM instruction that contains the high 16-bits. The higher
+# 16-bits are then combined with the lower 16-bits in the original
+# instruction to form a 32-bit immediate value.
+#
+# The generation of IMM instructions is handled automatically by the
+# code emitter. Test to ensure that IMM instructions are generated
+# when they are suppose to and are not generated when they are not
+# needed.
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00000000
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000000001
+# CHECK:    encoding: [0x20,0x00,0x00,0x01]
+            addi    r0, r0, 0x00000001
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000000010
+# CHECK:    encoding: [0x20,0x00,0x00,0x02]
+            addi    r0, r0, 0x00000002
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000000100
+# CHECK:    encoding: [0x20,0x00,0x00,0x04]
+            addi    r0, r0, 0x00000004
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000001000
+# CHECK:    encoding: [0x20,0x00,0x00,0x08]
+            addi    r0, r0, 0x00000008
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000010000
+# CHECK:    encoding: [0x20,0x00,0x00,0x10]
+            addi    r0, r0, 0x00000010
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000100000
+# CHECK:    encoding: [0x20,0x00,0x00,0x20]
+            addi    r0, r0, 0x00000020
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000001000000
+# CHECK:    encoding: [0x20,0x00,0x00,0x40]
+            addi    r0, r0, 0x00000040
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000010000000
+# CHECK:    encoding: [0x20,0x00,0x00,0x80]
+            addi    r0, r0, 0x00000080
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000100000000
+# CHECK:    encoding: [0x20,0x00,0x01,0x00]
+            addi    r0, r0, 0x00000100
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000001000000000
+# CHECK:    encoding: [0x20,0x00,0x02,0x00]
+            addi    r0, r0, 0x00000200
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000010000000000
+# CHECK:    encoding: [0x20,0x00,0x04,0x00]
+            addi    r0, r0, 0x00000400
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000100000000000
+# CHECK:    encoding: [0x20,0x00,0x08,0x00]
+            addi    r0, r0, 0x00000800
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0001000000000000
+# CHECK:    encoding: [0x20,0x00,0x10,0x00]
+            addi    r0, r0, 0x00001000
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0010000000000000
+# CHECK:    encoding: [0x20,0x00,0x20,0x00]
+            addi    r0, r0, 0x00002000
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0100000000000000
+# CHECK:    encoding: [0x20,0x00,0x40,0x00]
+            addi    r0, r0, 0x00004000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000000000000
+# BINARY:   001000 00000 00000 1000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x00,0x20,0x00,0x80,0x00]
+            addi    r0, r0, 0x00008000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000000000001
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x01,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00010000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000000000010
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x02,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00020000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000000000100
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x04,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00040000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000000001000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x08,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00080000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000000010000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x10,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00100000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000000100000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x20,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00200000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000001000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x40,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00400000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000010000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x00,0x80,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x00800000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000000100000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x01,0x00,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x01000000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000001000000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x02,0x00,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x02000000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000010000000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x04,0x00,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x04000000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0000100000000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x08,0x00,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x08000000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0001000000000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x10,0x00,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x10000000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0010000000000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x20,0x00,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x20000000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 0100000000000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x40,0x00,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x40000000
+
+# CHECK:    addi
+# BINARY:   101100 00000 00000 1000000000000000
+#           001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0xb0,0x00,0x80,0x00,0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0x80000000
diff --git a/final/test/MC/MBlaze/mblaze_memory.s b/final/test/MC/MBlaze/mblaze_memory.s
new file mode 100644
index 00000000000..fe744753ee4
--- /dev/null
+++ b/final/test/MC/MBlaze/mblaze_memory.s
@@ -0,0 +1,107 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to make sure that all of the TYPE-A instructions supported by
+# the Microblaze can be parsed by the assembly parser.
+
+# TYPE A:   OPCODE RD    RA    RB    FLAGS
+# BINARY:   000000 00000 00000 00000 00000000000
+
+# CHECK:    lbu
+# BINARY:   110000 00001 00010 00011 00000000000
+# CHECK:    encoding: [0xc0,0x22,0x18,0x00]
+            lbu     r1, r2, r3
+
+# CHECK:    lbur
+# BINARY:   110000 00001 00010 00011 01000000000
+# CHECK:    encoding: [0xc0,0x22,0x1a,0x00]
+            lbur    r1, r2, r3
+
+# CHECK:    lbui
+# BINARY:   111000 00001 00010 0000000000011100
+# CHECK:    encoding: [0xe0,0x22,0x00,0x1c]
+            lbui    r1, r2, 28
+
+# CHECK:    lhu
+# BINARY:   110001 00001 00010 00011 00000000000
+# CHECK:    encoding: [0xc4,0x22,0x18,0x00]
+            lhu     r1, r2, r3
+
+# CHECK:    lhur
+# BINARY:   110001 00001 00010 00011 01000000000
+# CHECK:    encoding: [0xc4,0x22,0x1a,0x00]
+            lhur    r1, r2, r3
+
+# CHECK:    lhui
+# BINARY:   111001 00001 00010 0000000000011100
+# CHECK:    encoding: [0xe4,0x22,0x00,0x1c]
+            lhui    r1, r2, 28
+
+# CHECK:    lw
+# BINARY:   110010 00001 00010 00011 00000000000
+# CHECK:    encoding: [0xc8,0x22,0x18,0x00]
+            lw      r1, r2, r3
+
+# CHECK:    lwr
+# BINARY:   110010 00001 00010 00011 01000000000
+# CHECK:    encoding: [0xc8,0x22,0x1a,0x00]
+            lwr    r1, r2, r3
+
+# CHECK:    lwi
+# BINARY:   111010 00001 00010 0000000000011100
+# CHECK:    encoding: [0xe8,0x22,0x00,0x1c]
+            lwi     r1, r2, 28
+
+# CHECK:    lwx
+# BINARY:   110010 00001 00010 00011 10000000000
+# CHECK:    encoding: [0xc8,0x22,0x1c,0x00]
+            lwx      r1, r2, r3
+
+# CHECK:    sb
+# BINARY:   110100 00001 00010 00011 00000000000
+# CHECK:    encoding: [0xd0,0x22,0x18,0x00]
+            sb      r1, r2, r3
+
+# CHECK:    sbr
+# BINARY:   110100 00001 00010 00011 01000000000
+# CHECK:    encoding: [0xd0,0x22,0x1a,0x00]
+            sbr     r1, r2, r3
+
+# CHECK:    sbi
+# BINARY:   111100 00001 00010 0000000000011100
+# CHECK:    encoding: [0xf0,0x22,0x00,0x1c]
+            sbi     r1, r2, 28
+
+# CHECK:    sh
+# BINARY:   110101 00001 00010 00011 00000000000
+# CHECK:    encoding: [0xd4,0x22,0x18,0x00]
+            sh      r1, r2, r3
+
+# CHECK:    shr
+# BINARY:   110101 00001 00010 00011 01000000000
+# CHECK:    encoding: [0xd4,0x22,0x1a,0x00]
+            shr     r1, r2, r3
+
+# CHECK:    shi
+# BINARY:   111101 00001 00010 0000000000011100
+# CHECK:    encoding: [0xf4,0x22,0x00,0x1c]
+            shi     r1, r2, 28
+
+# CHECK:    sw
+# BINARY:   110110 00001 00010 00011 00000000000
+# CHECK:    encoding: [0xd8,0x22,0x18,0x00]
+            sw      r1, r2, r3
+
+# CHECK:    swr
+# BINARY:   110110 00001 00010 00011 01000000000
+# CHECK:    encoding: [0xd8,0x22,0x1a,0x00]
+            swr    r1, r2, r3
+
+# CHECK:    swi
+# BINARY:   111110 00001 00010 0000000000011100
+# CHECK:    encoding: [0xf8,0x22,0x00,0x1c]
+            swi     r1, r2, 28
+
+# CHECK:    swx
+# BINARY:   110110 00001 00010 00011 10000000000
+# CHECK:    encoding: [0xd8,0x22,0x1c,0x00]
+            swx      r1, r2, r3
diff --git a/final/test/MC/MBlaze/mblaze_operands.s b/final/test/MC/MBlaze/mblaze_operands.s
new file mode 100644
index 00000000000..d5f1d8059f3
--- /dev/null
+++ b/final/test/MC/MBlaze/mblaze_operands.s
@@ -0,0 +1,328 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to ensure that all register and immediate operands can be parsed by
+# the assembly parser correctly. Testing the parsing of FSL immediate
+# values is done in a different test.
+
+# TYPE A:   OPCODE RD    RA    RB    FLAGS
+# BINARY:   000000 00000 00000 00000 00000000000
+
+# CHECK:    add
+# BINARY:   000000 00000 00000 00000 00000000000
+# CHECK:    encoding: [0x00,0x00,0x00,0x00]
+            add     r0, r0, r0
+
+# CHECK:    add
+# BINARY:   000000 00001 00001 00001 00000000000
+# CHECK:    encoding: [0x00,0x21,0x08,0x00]
+            add     r1, r1, r1
+
+# CHECK:    add
+# BINARY:   000000 00010 00010 00010 00000000000
+# CHECK:    encoding: [0x00,0x42,0x10,0x00]
+            add     r2, r2, r2
+
+# CHECK:    add
+# BINARY:   000000 00011 00011 00011 00000000000
+# CHECK:    encoding: [0x00,0x63,0x18,0x00]
+            add     r3, r3, r3
+
+# CHECK:    add
+# BINARY:   000000 00100 00100 00100 00000000000
+# CHECK:    encoding: [0x00,0x84,0x20,0x00]
+            add     r4, r4, r4
+
+# CHECK:    add
+# BINARY:   000000 00101 00101 00101 00000000000
+# CHECK:    encoding: [0x00,0xa5,0x28,0x00]
+            add     r5, r5, r5
+
+# CHECK:    add
+# BINARY:   000000 00110 00110 00110 00000000000
+# CHECK:    encoding: [0x00,0xc6,0x30,0x00]
+            add     r6, r6, r6
+
+# CHECK:    add
+# BINARY:   000000 00111 00111 00111 00000000000
+# CHECK:    encoding: [0x00,0xe7,0x38,0x00]
+            add     r7, r7, r7
+
+# CHECK:    add
+# BINARY:   000000 01000 01000 01000 00000000000
+# CHECK:    encoding: [0x01,0x08,0x40,0x00]
+            add     r8, r8, r8
+
+# CHECK:    add
+# BINARY:   000000 01001 01001 01001 00000000000
+# CHECK:    encoding: [0x01,0x29,0x48,0x00]
+            add     r9, r9, r9
+
+# CHECK:    add
+# BINARY:   000000 01010 01010 01010 00000000000
+# CHECK:    encoding: [0x01,0x4a,0x50,0x00]
+            add     r10, r10, r10
+
+# CHECK:    add
+# BINARY:   000000 01011 01011 01011 00000000000
+# CHECK:    encoding: [0x01,0x6b,0x58,0x00]
+            add     r11, r11, r11
+
+# CHECK:    add
+# BINARY:   000000 01100 01100 01100 00000000000
+# CHECK:    encoding: [0x01,0x8c,0x60,0x00]
+            add     r12, r12, r12
+
+# CHECK:    add
+# BINARY:   000000 01101 01101 01101 00000000000
+# CHECK:    encoding: [0x01,0xad,0x68,0x00]
+            add     r13, r13, r13
+
+# CHECK:    add
+# BINARY:   000000 01110 01110 01110 00000000000
+# CHECK:    encoding: [0x01,0xce,0x70,0x00]
+            add     r14, r14, r14
+
+# CHECK:    add
+# BINARY:   000000 01111 01111 01111 00000000000
+# CHECK:    encoding: [0x01,0xef,0x78,0x00]
+            add     r15, r15, r15
+
+# CHECK:    add
+# BINARY:   000000 10000 10000 10000 00000000000
+# CHECK:    encoding: [0x02,0x10,0x80,0x00]
+            add     r16, r16, r16
+
+# CHECK:    add
+# BINARY:   000000 10001 10001 10001 00000000000
+# CHECK:    encoding: [0x02,0x31,0x88,0x00]
+            add     r17, r17, r17
+
+# CHECK:    add
+# BINARY:   000000 10010 10010 10010 00000000000
+# CHECK:    encoding: [0x02,0x52,0x90,0x00]
+            add     r18, r18, r18
+
+# CHECK:    add
+# BINARY:   000000 10011 10011 10011 00000000000
+# CHECK:    encoding: [0x02,0x73,0x98,0x00]
+            add     r19, r19, r19
+
+# CHECK:    add
+# BINARY:   000000 10100 10100 10100 00000000000
+# CHECK:    encoding: [0x02,0x94,0xa0,0x00]
+            add     r20, r20, r20
+
+# CHECK:    add
+# BINARY:   000000 10101 10101 10101 00000000000
+# CHECK:    encoding: [0x02,0xb5,0xa8,0x00]
+            add     r21, r21, r21
+
+# CHECK:    add
+# BINARY:   000000 10110 10110 10110 00000000000
+# CHECK:    encoding: [0x02,0xd6,0xb0,0x00]
+            add     r22, r22, r22
+
+# CHECK:    add
+# BINARY:   000000 10111 10111 10111 00000000000
+# CHECK:    encoding: [0x02,0xf7,0xb8,0x00]
+            add     r23, r23, r23
+
+# CHECK:    add
+# BINARY:   000000 11000 11000 11000 00000000000
+# CHECK:    encoding: [0x03,0x18,0xc0,0x00]
+            add     r24, r24, r24
+
+# CHECK:    add
+# BINARY:   000000 11001 11001 11001 00000000000
+# CHECK:    encoding: [0x03,0x39,0xc8,0x00]
+            add     r25, r25, r25
+
+# CHECK:    add
+# BINARY:   000000 11010 11010 11010 00000000000
+# CHECK:    encoding: [0x03,0x5a,0xd0,0x00]
+            add     r26, r26, r26
+
+# CHECK:    add
+# BINARY:   000000 11011 11011 11011 00000000000
+# CHECK:    encoding: [0x03,0x7b,0xd8,0x00]
+            add     r27, r27, r27
+
+# CHECK:    add
+# BINARY:   000000 11100 11100 11100 00000000000
+# CHECK:    encoding: [0x03,0x9c,0xe0,0x00]
+            add     r28, r28, r28
+
+# CHECK:    add
+# BINARY:   000000 11101 11101 11101 00000000000
+# CHECK:    encoding: [0x03,0xbd,0xe8,0x00]
+            add     r29, r29, r29
+
+# CHECK:    add
+# BINARY:   000000 11110 11110 11110 00000000000
+# CHECK:    encoding: [0x03,0xde,0xf0,0x00]
+            add     r30, r30, r30
+
+# CHECK:    add
+# BINARY:   000000 11111 11111 11111 00000000000
+# CHECK:    encoding: [0x03,0xff,0xf8,0x00]
+            add     r31, r31, r31
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000000000
+# CHECK:    encoding: [0x20,0x00,0x00,0x00]
+            addi    r0, r0, 0
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000000001
+# CHECK:    encoding: [0x20,0x00,0x00,0x01]
+            addi    r0, r0, 1
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000000010
+# CHECK:    encoding: [0x20,0x00,0x00,0x02]
+            addi    r0, r0, 2
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000000100
+# CHECK:    encoding: [0x20,0x00,0x00,0x04]
+            addi    r0, r0, 4
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000001000
+# CHECK:    encoding: [0x20,0x00,0x00,0x08]
+            addi    r0, r0, 8
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000010000
+# CHECK:    encoding: [0x20,0x00,0x00,0x10]
+            addi    r0, r0, 16
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000000100000
+# CHECK:    encoding: [0x20,0x00,0x00,0x20]
+            addi    r0, r0, 32
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000001000000
+# CHECK:    encoding: [0x20,0x00,0x00,0x40]
+            addi    r0, r0, 64
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000010000000
+# CHECK:    encoding: [0x20,0x00,0x00,0x80]
+            addi    r0, r0, 128
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000000100000000
+# CHECK:    encoding: [0x20,0x00,0x01,0x00]
+            addi    r0, r0, 256
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000001000000000
+# CHECK:    encoding: [0x20,0x00,0x02,0x00]
+            addi    r0, r0, 512
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000010000000000
+# CHECK:    encoding: [0x20,0x00,0x04,0x00]
+            addi    r0, r0, 1024
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0000100000000000
+# CHECK:    encoding: [0x20,0x00,0x08,0x00]
+            addi    r0, r0, 2048
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0001000000000000
+# CHECK:    encoding: [0x20,0x00,0x10,0x00]
+            addi    r0, r0, 4096
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0010000000000000
+# CHECK:    encoding: [0x20,0x00,0x20,0x00]
+            addi    r0, r0, 8192
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 0100000000000000
+# CHECK:    encoding: [0x20,0x00,0x40,0x00]
+            addi    r0, r0, 16384
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111111111111
+# CHECK:    encoding: [0x20,0x00,0xff,0xff]
+            addi    r0, r0, -1
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111111111110
+# CHECK:    encoding: [0x20,0x00,0xff,0xfe]
+            addi    r0, r0, -2
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111111111100
+# CHECK:    encoding: [0x20,0x00,0xff,0xfc]
+            addi    r0, r0, -4
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111111111000
+# CHECK:    encoding: [0x20,0x00,0xff,0xf8]
+            addi    r0, r0, -8
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111111110000
+# CHECK:    encoding: [0x20,0x00,0xff,0xf0]
+            addi    r0, r0, -16
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111111100000
+# CHECK:    encoding: [0x20,0x00,0xff,0xe0]
+            addi    r0, r0, -32
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111111000000
+# CHECK:    encoding: [0x20,0x00,0xff,0xc0]
+            addi    r0, r0, -64
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111110000000
+# CHECK:    encoding: [0x20,0x00,0xff,0x80]
+            addi    r0, r0, -128
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111100000000
+# CHECK:    encoding: [0x20,0x00,0xff,0x00]
+            addi    r0, r0, -256
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111111000000000
+# CHECK:    encoding: [0x20,0x00,0xfe,0x00]
+            addi    r0, r0, -512
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111110000000000
+# CHECK:    encoding: [0x20,0x00,0xfc,0x00]
+            addi    r0, r0, -1024
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111100000000000
+# CHECK:    encoding: [0x20,0x00,0xf8,0x00]
+            addi    r0, r0, -2048
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1111000000000000
+# CHECK:    encoding: [0x20,0x00,0xf0,0x00]
+            addi    r0, r0, -4096
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1110000000000000
+# CHECK:    encoding: [0x20,0x00,0xe0,0x00]
+            addi    r0, r0, -8192
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1100000000000000
+# CHECK:    encoding: [0x20,0x00,0xc0,0x00]
+            addi    r0, r0, -16384
+
+# CHECK:    addi
+# BINARY:   001000 00000 00000 1000000000000000
+# CHECK:    encoding: [0x20,0x00,0x80,0x00]
+            addi    r0, r0, -32768
diff --git a/final/test/MC/MBlaze/mblaze_pattern.s b/final/test/MC/MBlaze/mblaze_pattern.s
new file mode 100644
index 00000000000..6bbc234e3d4
--- /dev/null
+++ b/final/test/MC/MBlaze/mblaze_pattern.s
@@ -0,0 +1,22 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to ensure that all FPU instructions can be parsed by the
+# assembly parser correctly.
+
+# TYPE A:   OPCODE RD    RA    RB    FLAGS
+# BINARY:   011011 00000 00000 00000 00000000000
+
+# CHECK:    pcmpbf
+# BINARY:   100000 00000 00001 00010 10000000000
+# CHECK:    encoding: [0x80,0x01,0x14,0x00]
+            pcmpbf      r0, r1, r2
+
+# CHECK:    pcmpne
+# BINARY:   100011 00000 00001 00010 10000000000
+# CHECK:    encoding: [0x8c,0x01,0x14,0x00]
+            pcmpne      r0, r1, r2
+
+# CHECK:    pcmpeq
+# BINARY:   100010 00000 00001 00010 10000000000
+# CHECK:    encoding: [0x88,0x01,0x14,0x00]
+            pcmpeq      r0, r1, r2
diff --git a/final/test/MC/MBlaze/mblaze_shift.s b/final/test/MC/MBlaze/mblaze_shift.s
new file mode 100644
index 00000000000..a25502b3509
--- /dev/null
+++ b/final/test/MC/MBlaze/mblaze_shift.s
@@ -0,0 +1,47 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to make sure that all of the TYPE-A instructions supported by
+# the Microblaze can be parsed by the assembly parser.
+
+# TYPE A:   OPCODE RD    RA    RB    FLAGS
+# BINARY:   000000 00000 00000 00000 00000000000
+
+# CHECK:    bsrl
+# BINARY:   010001 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x44,0x22,0x18,0x00]
+            bsrl    r1, r2, r3
+
+# CHECK:    bsra
+# BINARY:   010001 00001 00010 00011 01000000000
+# CHECK:    encoding: [0x44,0x22,0x1a,0x00]
+            bsra    r1, r2, r3
+
+# CHECK:    bsll
+# BINARY:   010001 00001 00010 00011 10000000000
+# CHECK:    encoding: [0x44,0x22,0x1c,0x00]
+            bsll    r1, r2, r3
+
+# CHECK:    bsrli
+# BINARY:   011001 00001 00010 0000000000000000
+# CHECK:    encoding: [0x64,0x22,0x00,0x00]
+            bsrli   r1, r2, 0
+
+# CHECK:    bsrai
+# BINARY:   011001 00001 00010 0000001000000000
+# CHECK:    encoding: [0x64,0x22,0x02,0x00]
+            bsrai   r1, r2, 0
+
+# CHECK:    bslli
+# BINARY:   011001 00001 00010 0000010000000000
+# CHECK:    encoding: [0x64,0x22,0x04,0x00]
+            bslli   r1, r2, 0
+
+# CHECK:    sra
+# BINARY:   100100 00001 00010 00000 00000000001
+# CHECK:    encoding: [0x90,0x22,0x00,0x01]
+            sra     r1, r2
+
+# CHECK:    srl
+# BINARY:   100100 00001 00010 00000 00001000001
+# CHECK:    encoding: [0x90,0x22,0x00,0x41]
+            srl     r1, r2
diff --git a/final/test/MC/MBlaze/mblaze_special.s b/final/test/MC/MBlaze/mblaze_special.s
new file mode 100644
index 00000000000..c55ec277c18
--- /dev/null
+++ b/final/test/MC/MBlaze/mblaze_special.s
@@ -0,0 +1,167 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to ensure that all special instructions and special registers can be
+# parsed by the assembly parser correctly.
+
+# TYPE A:   OPCODE RD    RA    RB    FLAGS
+# BINARY:   011011 00000 00000 00000 00000000000
+
+# CHECK:    mfs
+# BINARY:   100101 00000 00000 10000 00000000000
+# CHECK:    encoding: [0x94,0x00,0x80,0x00]
+            mfs         r0, rpc
+
+# CHECK:    msrclr
+# BINARY:   100101 00000 100010 000000000000000
+# CHECK:    encoding: [0x94,0x11,0x00,0x00]
+            msrclr      r0, 0x0
+
+# CHECK:    msrset
+# BINARY:   100101 00000 100000 000000000000000
+# CHECK:    encoding: [0x94,0x10,0x00,0x00]
+            msrset      r0, 0x0
+
+# CHECK:    mts
+# BINARY:   100101 00000 00000 11 00000000000000
+# CHECK:    encoding: [0x94,0x00,0xc0,0x00]
+            mts         rpc, r0
+
+# CHECK:    wdc
+# BINARY:   100100 00000 00000 00001 00001100100
+# CHECK:    encoding: [0x90,0x00,0x08,0x64]
+            wdc         r0, r1
+
+# CHECK:    wdc.clear
+# BINARY:   100100 00000 00000 00001 00001100110
+# CHECK:    encoding: [0x90,0x00,0x08,0x66]
+            wdc.clear   r0, r1
+
+# CHECK:    wdc.flush
+# BINARY:   100100 00000 00000 00001 00001110100
+# CHECK:    encoding: [0x90,0x00,0x08,0x74]
+            wdc.flush   r0, r1
+
+# CHECK:    wic
+# BINARY:   100100 00000 00000 00001 00001101000
+# CHECK:    encoding: [0x90,0x00,0x08,0x68]
+            wic         r0, r1
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10000 00000000000
+# CHECK:    encoding: [0x94,0x20,0x80,0x00]
+            mfs         r1, rpc
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10000 00000000001
+# CHECK:    encoding: [0x94,0x20,0x80,0x01]
+            mfs         r1, rmsr
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10000 00000000011
+# CHECK:    encoding: [0x94,0x20,0x80,0x03]
+            mfs         r1, rear
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10000 00000000101
+# CHECK:    encoding: [0x94,0x20,0x80,0x05]
+            mfs         r1, resr
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10000 00000000111
+# CHECK:    encoding: [0x94,0x20,0x80,0x07]
+            mfs         r1, rfsr
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10000 00000001011
+# CHECK:    encoding: [0x94,0x20,0x80,0x0b]
+            mfs         r1, rbtr
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10000 00000001101
+# CHECK:    encoding: [0x94,0x20,0x80,0x0d]
+            mfs         r1, redr
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10010 00000000000
+# CHECK:    encoding: [0x94,0x20,0x90,0x00]
+            mfs         r1, rpid
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10010 00000000001
+# CHECK:    encoding: [0x94,0x20,0x90,0x01]
+            mfs         r1, rzpr
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10010 00000000010
+# CHECK:    encoding: [0x94,0x20,0x90,0x02]
+            mfs         r1, rtlbx
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10010 00000000100
+# CHECK:    encoding: [0x94,0x20,0x90,0x04]
+            mfs         r1, rtlbhi
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10010 00000000011
+# CHECK:    encoding: [0x94,0x20,0x90,0x03]
+            mfs         r1, rtlblo
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000000000
+# CHECK:    encoding: [0x94,0x20,0xa0,0x00]
+            mfs         r1, rpvr0
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000000001
+# CHECK:    encoding: [0x94,0x20,0xa0,0x01]
+            mfs         r1, rpvr1
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000000010
+# CHECK:    encoding: [0x94,0x20,0xa0,0x02]
+            mfs         r1, rpvr2
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000000011
+# CHECK:    encoding: [0x94,0x20,0xa0,0x03]
+            mfs         r1, rpvr3
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000000100
+# CHECK:    encoding: [0x94,0x20,0xa0,0x04]
+            mfs         r1, rpvr4
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000000101
+# CHECK:    encoding: [0x94,0x20,0xa0,0x05]
+            mfs         r1, rpvr5
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000000110
+# CHECK:    encoding: [0x94,0x20,0xa0,0x06]
+            mfs         r1, rpvr6
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000000111
+# CHECK:    encoding: [0x94,0x20,0xa0,0x07]
+            mfs         r1, rpvr7
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000001000
+# CHECK:    encoding: [0x94,0x20,0xa0,0x08]
+            mfs         r1, rpvr8
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000001001
+# CHECK:    encoding: [0x94,0x20,0xa0,0x09]
+            mfs         r1, rpvr9
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000001010
+# CHECK:    encoding: [0x94,0x20,0xa0,0x0a]
+            mfs         r1, rpvr10
+
+# CHECK:    mfs
+# BINARY:   100101 00001 00000 10100 00000001011
+# CHECK:    encoding: [0x94,0x20,0xa0,0x0b]
+            mfs         r1, rpvr11
diff --git a/final/test/MC/MBlaze/mblaze_typea.s b/final/test/MC/MBlaze/mblaze_typea.s
new file mode 100644
index 00000000000..a0735e482cb
--- /dev/null
+++ b/final/test/MC/MBlaze/mblaze_typea.s
@@ -0,0 +1,122 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to make sure that all of the TYPE-A instructions supported by
+# the Microblaze can be parsed by the assembly parser.
+
+# TYPE A:   OPCODE RD    RA    RB    FLAGS
+# BINARY:   000000 00000 00000 00000 00000000000
+
+# CHECK:    add
+# BINARY:   000000 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x00,0x22,0x18,0x00]
+            add     r1, r2, r3
+
+# CHECK:    addc
+# BINARY:   000010 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x08,0x22,0x18,0x00]
+            addc    r1, r2, r3
+
+# CHECK:    addk
+# BINARY:   000100 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x10,0x22,0x18,0x00]
+            addk    r1, r2, r3
+
+# CHECK:    addkc
+# BINARY:   000110 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x18,0x22,0x18,0x00]
+            addkc   r1, r2, r3
+
+# CHECK:    and
+# BINARY:   100001 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x84,0x22,0x18,0x00]
+            and     r1, r2, r3
+
+# CHECK:    andn
+# BINARY:   100011 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x8c,0x22,0x18,0x00]
+            andn    r1, r2, r3
+
+# CHECK:    cmp
+# BINARY:   000101 00001 00010 00011 00000000001
+# CHECK:    encoding: [0x14,0x22,0x18,0x01]
+            cmp     r1, r2, r3
+
+# CHECK:    cmpu
+# BINARY:   000101 00001 00010 00011 00000000011
+# CHECK:    encoding: [0x14,0x22,0x18,0x03]
+            cmpu    r1, r2, r3
+
+# CHECK:    idiv
+# BINARY:   010010 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x48,0x22,0x18,0x00]
+            idiv    r1, r2, r3
+
+# CHECK:    idivu
+# BINARY:   010010 00001 00010 00011 00000000010
+# CHECK:    encoding: [0x48,0x22,0x18,0x02]
+            idivu   r1, r2, r3
+
+# CHECK:    mul
+# BINARY:   010000 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x40,0x22,0x18,0x00]
+            mul    r1, r2, r3
+
+# CHECK:    mulh
+# BINARY:   010000 00001 00010 00011 00000000001
+# CHECK:    encoding: [0x40,0x22,0x18,0x01]
+            mulh   r1, r2, r3
+
+# CHECK:    mulhu
+# BINARY:   010000 00001 00010 00011 00000000011
+# CHECK:    encoding: [0x40,0x22,0x18,0x03]
+            mulhu  r1, r2, r3
+
+# CHECK:    mulhsu
+# BINARY:   010000 00001 00010 00011 00000000010
+# CHECK:    encoding: [0x40,0x22,0x18,0x02]
+            mulhsu r1, r2, r3
+
+# CHECK:    or
+# BINARY:   100000 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x80,0x22,0x18,0x00]
+            or      r1, r2, r3
+
+# CHECK:    rsub
+# BINARY:   000001 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x04,0x22,0x18,0x00]
+            rsub    r1, r2, r3
+
+# CHECK:    rsubc
+# BINARY:   000011 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x0c,0x22,0x18,0x00]
+            rsubc   r1, r2, r3
+
+# CHECK:    rsubk
+# BINARY:   000101 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x14,0x22,0x18,0x00]
+            rsubk   r1, r2, r3
+
+# CHECK:    rsubkc
+# BINARY:   000111 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x1c,0x22,0x18,0x00]
+            rsubkc  r1, r2, r3
+
+# CHECK:    sext16
+# BINARY:   100100 00001 00010 00000 00001100001
+# CHECK:    encoding: [0x90,0x22,0x00,0x61]
+            sext16  r1, r2
+
+# CHECK:    sext8
+# BINARY:   100100 00001 00010 00000 00001100000
+# CHECK:    encoding: [0x90,0x22,0x00,0x60]
+            sext8   r1, r2
+
+# CHECK:    xor
+# BINARY:   100010 00001 00010 00011 00000000000
+# CHECK:    encoding: [0x88,0x22,0x18,0x00]
+            xor     r1, r2, r3
+
+# CHECK:    nop
+# BINARY:   100000 00000 00000 00000 00000000000
+# CHECK:    encoding: [0x80,0x00,0x00,0x00]
+        nop
diff --git a/final/test/MC/MBlaze/mblaze_typeb.s b/final/test/MC/MBlaze/mblaze_typeb.s
new file mode 100644
index 00000000000..ac4f1e2932a
--- /dev/null
+++ b/final/test/MC/MBlaze/mblaze_typeb.s
@@ -0,0 +1,92 @@
+# RUN: llvm-mc -triple mblaze-unknown-unknown -show-encoding %s | FileCheck %s
+
+# Test to make sure that all of the TYPE-B instructions supported by
+# the Microblaze can be parsed by the assembly parser.
+
+# TYPE B:   OPCODE RD    RA    IMMEDIATE
+#           000000 00000 00000 0000000000000000
+
+# CHECK:    addi
+# BINARY:   001000 00001 00010 0000000000001111
+# CHECK:    encoding: [0x20,0x22,0x00,0x0f]
+            addi    r1, r2, 0x000F
+
+# CHECK:    addic
+# BINARY:   001010 00001 00010 0000000000001111
+# CHECK:    encoding: [0x28,0x22,0x00,0x0f]
+            addic   r1, r2, 0x000F
+
+# CHECK:    addik
+# BINARY:   001100 00001 00010 0000000000001111
+# CHECK:    encoding: [0x30,0x22,0x00,0x0f]
+            addik   r1, r2, 0x000F
+
+# CHECK:    addikc
+# BINARY:   001110 00001 00010 0000000000001111
+# CHECK:    encoding: [0x38,0x22,0x00,0x0f]
+            addikc  r1, r2, 0x000F
+
+# CHECK:    andi
+# BINARY:   101001 00001 00010 0000000000001111
+# CHECK:    encoding: [0xa4,0x22,0x00,0x0f]
+            andi    r1, r2, 0x000F
+
+# CHECK:    andni
+# BINARY:   101011 00001 00010 0000000000001111
+# CHECK:    encoding: [0xac,0x22,0x00,0x0f]
+            andni   r1, r2, 0x000F
+
+# CHECK:    muli
+# BINARY:   011000 00001 00010 0000000000001111
+# CHECK:    encoding: [0x60,0x22,0x00,0x0f]
+            muli    r1, r2, 0x000F
+
+# CHECK:    ori
+# BINARY:   101000 00001 00010 0000000000001111
+# CHECK:    encoding: [0xa0,0x22,0x00,0x0f]
+            ori     r1, r2, 0x000F
+
+# CHECK:    rsubi
+# BINARY:   001001 00001 00010 0000000000001111
+# CHECK:    encoding: [0x24,0x22,0x00,0x0f]
+            rsubi   r1, r2, 0x000F
+
+# CHECK:    rsubic
+# BINARY:   001011 00001 00010 0000000000001111
+# CHECK:    encoding: [0x2c,0x22,0x00,0x0f]
+            rsubic  r1, r2, 0x000F
+
+# CHECK:    rsubik
+# BINARY:   001101 00001 00010 0000000000001111
+# CHECK:    encoding: [0x34,0x22,0x00,0x0f]
+            rsubik  r1, r2, 0x000F
+
+# CHECK:    rsubikc
+# BINARY:   001111 00001 00010 0000000000001111
+# CHECK:    encoding: [0x3c,0x22,0x00,0x0f]
+            rsubikc r1, r2, 0x000F
+
+# CHECK:    rtbd
+# BINARY:   101101 10010 01111 0000000000001111
+# CHECK:    encoding: [0xb6,0x4f,0x00,0x0f]
+            rtbd r15, 0x000F
+
+# CHECK:    rted
+# BINARY:   101101 10001 01111 0000000000001111
+# CHECK:    encoding: [0xb6,0x8f,0x00,0x0f]
+            rted r15, 0x000F
+
+# CHECK:    rtid
+# BINARY:   101101 10001 01111 0000000000001111
+# CHECK:    encoding: [0xb6,0x2f,0x00,0x0f]
+            rtid r15, 0x000F
+
+# CHECK:    rtsd
+# BINARY:   101101 10000 01111 0000000000001111
+# CHECK:    encoding: [0xb6,0x0f,0x00,0x0f]
+            rtsd r15, 0x000F
+
+# CHECK:    xori
+# BINARY:   101010 00001 00010 0000000000001111
+# CHECK:    encoding: [0xa8,0x22,0x00,0x0f]
+            xori r1, r2, 0x000F
diff --git a/final/test/MC/MachO/absolutize.s b/final/test/MC/MachO/absolutize.s
new file mode 100644
index 00000000000..39571dddebd
--- /dev/null
+++ b/final/test/MC/MachO/absolutize.s
@@ -0,0 +1,213 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+_text_a:
+        xorl %eax,%eax
+_text_b:
+        xorl %eax,%eax
+Ltext_c:
+        xorl %eax,%eax
+Ltext_d:        
+        xorl %eax,%eax
+        
+        movl $(_text_a - _text_b), %eax
+Ltext_expr_0 = _text_a - _text_b
+        movl $(Ltext_expr_0), %eax
+
+        movl $(Ltext_c - _text_b), %eax
+Ltext_expr_1 = Ltext_c - _text_b
+        movl $(Ltext_expr_1), %eax
+
+        movl $(Ltext_d - Ltext_c), %eax
+Ltext_expr_2 = Ltext_d - Ltext_c
+        movl $(Ltext_expr_2), %eax
+
+        movl $(_text_a + Ltext_expr_0), %eax
+
+        .data
+_data_a:
+        .long 0
+_data_b:
+        .long 0
+Ldata_c:
+        .long 0
+Ldata_d:        
+        .long 0
+        
+        .long _data_a - _data_b
+Ldata_expr_0 = _data_a - _data_b
+        .long Ldata_expr_0
+
+        .long Ldata_c - _data_b
+Ldata_expr_1 = Ldata_c - _data_b
+        .long Ldata_expr_1
+
+        .long Ldata_d - Ldata_c
+Ldata_expr_2 = Ldata_d - Ldata_c
+        .long Ldata_expr_2
+
+        .long _data_a + Ldata_expr_0
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 296)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 192)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 87)
+// CHECK:   ('file_offset', 324)
+// CHECK:   ('file_size', 87)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 43)
+// CHECK:     ('offset', 324)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 412)
+// CHECK:     ('num_reloc', 7)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0xa0000027),
+// CHECK:      ('word-1', 0x0)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0xa400001d),
+// CHECK:      ('word-1', 0x6)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0xa1000000),
+// CHECK:      ('word-1', 0x4)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0xa4000013),
+// CHECK:      ('word-1', 0x4)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0xa1000000),
+// CHECK:      ('word-1', 0x2)),
+// CHECK:     # Relocation 5
+// CHECK:     (('word-0', 0xa4000009),
+// CHECK:      ('word-1', 0x0)),
+// CHECK:     # Relocation 6
+// CHECK:     (('word-0', 0xa1000000),
+// CHECK:      ('word-1', 0x2)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '31c031c0 31c031c0 b8feffff ffb8feff ffffb802 000000b8 02000000 b8020000 00b80200 0000b8fe ffffff')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 43)
+// CHECK:     ('size', 44)
+// CHECK:     ('offset', 367)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 468)
+// CHECK:     ('num_reloc', 7)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0xa0000028),
+// CHECK:      ('word-1', 0x2b)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0xa4000020),
+// CHECK:      ('word-1', 0x37)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0xa1000000),
+// CHECK:      ('word-1', 0x33)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0xa4000018),
+// CHECK:      ('word-1', 0x33)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0xa1000000),
+// CHECK:      ('word-1', 0x2f)),
+// CHECK:     # Relocation 5
+// CHECK:     (('word-0', 0xa4000010),
+// CHECK:      ('word-1', 0x2b)),
+// CHECK:     # Relocation 6
+// CHECK:     (('word-0', 0xa1000000),
+// CHECK:      ('word-1', 0x2f)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 fcffffff fcffffff 04000000 04000000 04000000 04000000 27000000')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 524)
+// CHECK:   ('nsyms', 4)
+// CHECK:   ('stroff', 572)
+// CHECK:   ('strsize', 36)
+// CHECK:   ('_string_data', '\x00_text_a\x00_text_b\x00_data_a\x00_data_b\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_text_a')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 9)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 2)
+// CHECK:     ('_string', '_text_b')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 17)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 43)
+// CHECK:     ('_string', '_data_a')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 25)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 47)
+// CHECK:     ('_string', '_data_b')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 4)
+// CHECK:   ('iextdefsym', 4)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 4)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/comm-1.s b/final/test/MC/MachO/comm-1.s
new file mode 100644
index 00000000000..5ffa979eb34
--- /dev/null
+++ b/final/test/MC/MachO/comm-1.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        .comm           sym_comm_B, 2
+        .comm           sym_comm_A, 4
+        .comm           sym_comm_C, 8, 2
+        .comm           sym_comm_D, 2, 3
+
+        .no_dead_strip sym_comm_C
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 228)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 124)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 0)
+// CHECK:   ('file_offset', 256)
+// CHECK:   ('file_size', 0)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 256)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 256)
+// CHECK:   ('nsyms', 4)
+// CHECK:   ('stroff', 304)
+// CHECK:   ('strsize', 48)
+// CHECK:   ('_string_data', '\x00sym_comm_B\x00sym_comm_A\x00sym_comm_C\x00sym_comm_D\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 12)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 4)
+// CHECK:     ('_string', 'sym_comm_A')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 2)
+// CHECK:     ('_string', 'sym_comm_B')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 23)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 544)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', 'sym_comm_C')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 34)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 768)
+// CHECK:     ('n_value', 2)
+// CHECK:     ('_string', 'sym_comm_D')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 0)
+// CHECK:   ('iextdefsym', 0)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 0)
+// CHECK:   ('nundefsym', 4)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/darwin-complex-difference.s b/final/test/MC/MachO/darwin-complex-difference.s
new file mode 100644
index 00000000000..e66bd096711
--- /dev/null
+++ b/final/test/MC/MachO/darwin-complex-difference.s
@@ -0,0 +1,129 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o %t.o
+// RUN: macho-dump --dump-section-data < %t.o > %t.dump
+// RUN: FileCheck < %t.dump %s
+        
+_a:
+L0:     
+        .long 1
+L1:     
+        .long 2
+        .long _c - _d + 4
+        .long (_c - L0) - (_d - L1) // == (_c - _d) + (L1 - L0)
+                                    // == (_c - _d + 4)
+_c:
+        .long 0
+_d:
+        .long 0
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 256)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 152)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 24)
+// CHECK:   ('file_offset', 288)
+// CHECK:   ('file_size', 24)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 24)
+// CHECK:     ('offset', 288)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 312)
+// CHECK:     ('num_reloc', 4)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0xc),
+// CHECK:      ('word-1', 0x5c000002)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0xc),
+// CHECK:      ('word-1', 0xc000001)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0x8),
+// CHECK:      ('word-1', 0x5c000002)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0x8),
+// CHECK:      ('word-1', 0xc000001)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '01000000 02000000 04000000 04000000 00000000 00000000')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 344)
+// CHECK:   ('nsyms', 3)
+// CHECK:   ('stroff', 392)
+// CHECK:   ('strsize', 12)
+// CHECK:   ('_string_data', '\x00_a\x00_c\x00_d\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_a')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 4)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 16)
+// CHECK:     ('_string', '_c')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 7)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 20)
+// CHECK:     ('_string', '_d')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 3)
+// CHECK:   ('iextdefsym', 3)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 3)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/darwin-x86_64-diff-relocs.s b/final/test/MC/MachO/darwin-x86_64-diff-relocs.s
new file mode 100644
index 00000000000..449d2f593e7
--- /dev/null
+++ b/final/test/MC/MachO/darwin-x86_64-diff-relocs.s
@@ -0,0 +1,329 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .text
+
+// FIXME: llvm-mc doesn't handle this in a way we can make compatible with 'as',
+// currently, because of how we handle assembler variables.
+//
+// See <rdar://problem/7763719> improve handling of absolute symbols
+
+// _baz = 4
+
+_foo:
+        xorl %eax,%eax
+_g0:
+        xorl %eax,%eax
+L0:
+        jmp 4
+//        jmp _baz
+
+// FIXME: Darwin 'as' for historical reasons widens this jump, but doesn't emit
+// a relocation. It seems like 'as' widens any jump that is not to a temporary,
+// which is inherited from the x86_32 behavior, even though x86_64 could do
+// better.
+//        jmp _g0
+
+        jmp L0
+        jmp _g1
+
+// FIXME: Darwin 'as' gets this wrong as well, even though it could get it right
+// given the other things we do on x86_64. It is using a short jump here. This
+// is probably fallout of the hack that exists for x86_32.
+//        jmp L1
+
+// FIXME: We don't support this, and would currently get it wrong, it should be a jump to an absolute address.
+//        jmp L0 - _g0
+
+//        jmp _g1 - _g0
+// FIXME: Darwin 'as' comes up with 'SIGNED' here instead of 'BRANCH'.
+//        jmp _g1 - L1
+// FIXME: Darwin 'as' gets this completely wrong. It ends up with a single
+// branch relocation. Fallout from the other delta hack?
+//        jmp L1 - _g0
+
+        jmp _g2
+        jmp L2
+        jmp _g3
+        jmp L3
+// FIXME: Darwin 'as' gets this completely wrong. It ends up with a single
+// branch relocation. Fallout from the other delta hack?
+//        jmp L2 - _g3
+//        jmp _g3 - _g2
+// FIXME: Darwin 'as' comes up with 'SIGNED' here instead of 'BRANCH'.
+//        jmp _g3 - L3
+// FIXME: Darwin 'as' gets this completely wrong. It ends up with a single
+// branch relocation. Fallout from the other delta hack?
+//        jmp L3 - _g2
+
+        movl %eax,4(%rip)
+//        movl %eax,_baz(%rip)
+        movl %eax,_g0(%rip)
+        movl %eax,L0(%rip)
+        movl %eax,_g1(%rip)
+        movl %eax,L1(%rip)
+
+// FIXME: Darwin 'as' gets most of these wrong, and there is an ambiguity in ATT
+// syntax in what they should mean in the first place (absolute or
+// rip-relative address).
+//        movl %eax,L0 - _g0(%rip)
+//        movl %eax,_g1 - _g0(%rip)
+//        movl %eax,_g1 - L1(%rip)
+//        movl %eax,L1 - _g0(%rip)
+
+        movl %eax,_g2(%rip)
+        movl %eax,L2(%rip)
+        movl %eax,_g3(%rip)
+        movl %eax,L3(%rip)
+
+// FIXME: Darwin 'as' gets most of these wrong, and there is an ambiguity in ATT
+// syntax in what they should mean in the first place (absolute or
+// rip-relative address).
+//        movl %eax,L2 - _g2(%rip)
+//        movl %eax,_g3 - _g2(%rip)
+//        movl %eax,_g3 - L3(%rip)
+//        movl %eax,L3 - _g2(%rip)
+
+_g1:
+        xorl %eax,%eax
+L1:
+        xorl %eax,%eax
+
+        .data
+_g2:
+        xorl %eax,%eax
+L2:
+        .quad 4
+//        .quad _baz
+        .quad _g2
+        .quad L2
+        .quad _g3
+        .quad L3
+        .quad L2 - _g2
+        .quad _g3 - _g2
+        .quad L3 - _g2
+        .quad L3 - _g3
+
+        .quad _g0
+        .quad L0
+        .quad _g1
+        .quad L1
+        .quad L0 - _g0
+        .quad _g1 - _g0
+        .quad L1 - _g0
+        .quad L1 - _g1
+
+_g3:
+        xorl %eax,%eax
+L3:
+        xorl %eax,%eax
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 336)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 232)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 236)
+// CHECK:   ('file_offset', 368)
+// CHECK:   ('file_size', 236)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 94)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 604)
+// CHECK:     ('num_reloc', 12)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+
+// FIXME: Unfortunately, we do not get these relocations in exactly the same
+// order as Darwin 'as'. It turns out that 'as' *usually* ends up emitting
+// them in reverse address order, but sometimes it allocates some
+// additional relocations late so these end up preceed the other entries. I
+// haven't figured out the exact criteria for this yet.
+        
+// CHECK:     (('word-0', 0x56),
+// CHECK:      ('word-1', 0x1d000004)),
+// CHECK:     (('word-0', 0x50),
+// CHECK:      ('word-1', 0x1d000004)),
+// CHECK:     (('word-0', 0x4a),
+// CHECK:      ('word-1', 0x1d000003)),
+// CHECK:     (('word-0', 0x44),
+// CHECK:      ('word-1', 0x1d000003)),
+// CHECK:     (('word-0', 0x3e),
+// CHECK:      ('word-1', 0x1d000002)),
+// CHECK:     (('word-0', 0x38),
+// CHECK:      ('word-1', 0x1d000002)),
+// CHECK:     (('word-0', 0x20),
+// CHECK:      ('word-1', 0x2d000004)),
+// CHECK:     (('word-0', 0x1b),
+// CHECK:      ('word-1', 0x2d000004)),
+// CHECK:     (('word-0', 0x16),
+// CHECK:      ('word-1', 0x2d000003)),
+// CHECK:     (('word-0', 0x11),
+// CHECK:      ('word-1', 0x2d000003)),
+// CHECK:     (('word-0', 0xc),
+// CHECK:      ('word-1', 0x2d000002)),
+// CHECK:     (('word-0', 0x5),
+// CHECK:      ('word-1', 0x2d000000)),
+// CHECK:   ])
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 94)
+// CHECK:     ('size', 142)
+// CHECK:     ('offset', 462)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 700)
+// CHECK:     ('num_reloc', 16)
+// CHECK:     ('flags', 0x400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x7a),
+// CHECK:      ('word-1', 0x5e000001)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x7a),
+// CHECK:      ('word-1', 0xe000002)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0x72),
+// CHECK:      ('word-1', 0x5e000001)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0x72),
+// CHECK:      ('word-1', 0xe000002)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0x62),
+// CHECK:      ('word-1', 0xe000002)),
+// CHECK:     # Relocation 5
+// CHECK:     (('word-0', 0x5a),
+// CHECK:      ('word-1', 0xe000002)),
+// CHECK:     # Relocation 6
+// CHECK:     (('word-0', 0x52),
+// CHECK:      ('word-1', 0xe000001)),
+// CHECK:     # Relocation 7
+// CHECK:     (('word-0', 0x4a),
+// CHECK:      ('word-1', 0xe000001)),
+// CHECK:     # Relocation 8
+// CHECK:     (('word-0', 0x3a),
+// CHECK:      ('word-1', 0x5e000003)),
+// CHECK:     # Relocation 9
+// CHECK:     (('word-0', 0x3a),
+// CHECK:      ('word-1', 0xe000004)),
+// CHECK:     # Relocation 10
+// CHECK:     (('word-0', 0x32),
+// CHECK:      ('word-1', 0x5e000003)),
+// CHECK:     # Relocation 11
+// CHECK:     (('word-0', 0x32),
+// CHECK:      ('word-1', 0xe000004)),
+// CHECK:     # Relocation 12
+// CHECK:     (('word-0', 0x22),
+// CHECK:      ('word-1', 0xe000004)),
+// CHECK:     # Relocation 13
+// CHECK:     (('word-0', 0x1a),
+// CHECK:      ('word-1', 0xe000004)),
+// CHECK:     # Relocation 14
+// CHECK:     (('word-0', 0x12),
+// CHECK:      ('word-1', 0xe000003)),
+// CHECK:     # Relocation 15
+// CHECK:     (('word-0', 0xa),
+// CHECK:      ('word-1', 0xe000003)),
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 828)
+// CHECK:   ('nsyms', 5)
+// CHECK:   ('stroff', 908)
+// CHECK:   ('strsize', 24)
+// CHECK:   ('_string_data', '\x00_foo\x00_g0\x00_g1\x00_g2\x00_g3\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_foo')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 6)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 2)
+// CHECK:     ('_string', '_g0')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 10)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 90)
+// CHECK:     ('_string', '_g1')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 14)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 94)
+// CHECK:     ('_string', '_g2')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 18)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 232)
+// CHECK:     ('_string', '_g3')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 5)
+// CHECK:   ('iextdefsym', 5)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 5)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/darwin-x86_64-reloc-offsets.s b/final/test/MC/MachO/darwin-x86_64-reloc-offsets.s
new file mode 100644
index 00000000000..f748064b2bf
--- /dev/null
+++ b/final/test/MC/MachO/darwin-x86_64-reloc-offsets.s
@@ -0,0 +1,343 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .data
+
+        .org 0x10
+L0:
+        .long 0
+        .long 0
+        .long 0
+        .long 0
+
+_d:
+        .long 0
+L1:
+        .long 0
+
+        .text
+
+// These generate normal x86_64 (external) relocations. They could all use
+// SIGNED, but don't for pedantic compatibility with Darwin 'as'.
+
+        // SIGNED1
+ 	movb  $0x12, _d(%rip)
+
+        // SIGNED
+ 	movb  $0x12, _d + 1(%rip)
+
+        // SIGNED4
+ 	movl  $0x12345678, _d(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, _d + 1(%rip)
+
+        // SIGNED2
+ 	movl  $0x12345678, _d + 2(%rip)
+
+        // SIGNED1
+ 	movl  $0x12345678, _d + 3(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, _d + 4(%rip)
+
+	movb  %al, _d(%rip)
+ 	movb  %al, _d + 1(%rip)
+ 	movl  %eax, _d(%rip)
+ 	movl  %eax, _d + 1(%rip)
+ 	movl  %eax, _d + 2(%rip)
+ 	movl  %eax, _d + 3(%rip)
+ 	movl  %eax, _d + 4(%rip)
+
+// These have to use local relocations. Since that uses an offset into the
+// section in x86_64 (as opposed to a scattered relocation), and since the
+// linker can only decode this to an atom + offset by scanning the section,
+// it is not possible to correctly encode these without SIGNED<N>. This is
+// ultimately due to a design flaw in the x86_64 relocation format, it is
+// not possible to encode an address (L<foo> + <constant>) which is outside the
+// atom containing L<foo>.
+
+        // SIGNED1
+ 	movb  $0x12, L0(%rip)
+
+        // SIGNED
+ 	movb  $0x12, L0 + 1(%rip)
+
+        // SIGNED4
+ 	movl  $0x12345678, L0(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, L0 + 1(%rip)
+
+        // SIGNED2
+ 	movl  $0x12345678, L0 + 2(%rip)
+
+        // SIGNED1
+ 	movl  $0x12345678, L0 + 3(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, L0 + 4(%rip)
+
+ 	movb  %al, L0(%rip)
+ 	movb  %al, L0 + 1(%rip)
+ 	movl  %eax, L0(%rip)
+ 	movl  %eax, L0 + 1(%rip)
+ 	movl  %eax, L0 + 2(%rip)
+ 	movl  %eax, L0 + 3(%rip)
+ 	movl  %eax, L0 + 4(%rip)
+
+        // SIGNED1
+ 	movb  $0x12, L1(%rip)
+
+        // SIGNED
+ 	movb  $0x12, L1 + 1(%rip)
+
+        // SIGNED4
+ 	movl  $0x12345678, L1(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, L1 + 1(%rip)
+
+        // SIGNED2
+ 	movl  $0x12345678, L1 + 2(%rip)
+
+        // SIGNED1
+ 	movl  $0x12345678, L1 + 3(%rip)
+
+        // SIGNED
+ 	movl  $0x12345678, L1 + 4(%rip)
+
+ 	movb  %al, L1(%rip)
+ 	movb  %al, L1 + 1(%rip)
+ 	movl  %eax, L1(%rip)
+ 	movl  %eax, L1 + 1(%rip)
+ 	movl  %eax, L1 + 2(%rip)
+ 	movl  %eax, L1 + 3(%rip)
+ 	movl  %eax, L1 + 4(%rip)
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 336)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 232)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 358)
+// CHECK:   ('file_offset', 368)
+// CHECK:   ('file_size', 358)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 318)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 728)
+// CHECK:     ('num_reloc', 42)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x13a),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x134),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0x12e),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0x128),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0x122),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 5
+// CHECK:     (('word-0', 0x11c),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 6
+// CHECK:     (('word-0', 0x116),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 7
+// CHECK:     (('word-0', 0x10c),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 8
+// CHECK:     (('word-0', 0x102),
+// CHECK:      ('word-1', 0x6d000000)),
+// CHECK:     # Relocation 9
+// CHECK:     (('word-0', 0xf8),
+// CHECK:      ('word-1', 0x7d000000)),
+// CHECK:     # Relocation 10
+// CHECK:     (('word-0', 0xee),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 11
+// CHECK:     (('word-0', 0xe4),
+// CHECK:      ('word-1', 0x8d000000)),
+// CHECK:     # Relocation 12
+// CHECK:     (('word-0', 0xdd),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 13
+// CHECK:     (('word-0', 0xd6),
+// CHECK:      ('word-1', 0x6d000000)),
+// CHECK:     # Relocation 14
+// CHECK:     (('word-0', 0xd0),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 15
+// CHECK:     (('word-0', 0xca),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 16
+// CHECK:     (('word-0', 0xc4),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 17
+// CHECK:     (('word-0', 0xbe),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 18
+// CHECK:     (('word-0', 0xb8),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 19
+// CHECK:     (('word-0', 0xb2),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 20
+// CHECK:     (('word-0', 0xac),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 21
+// CHECK:     (('word-0', 0xa2),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 22
+// CHECK:     (('word-0', 0x98),
+// CHECK:      ('word-1', 0x65000002)),
+// CHECK:     # Relocation 23
+// CHECK:     (('word-0', 0x8e),
+// CHECK:      ('word-1', 0x75000002)),
+// CHECK:     # Relocation 24
+// CHECK:     (('word-0', 0x84),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 25
+// CHECK:     (('word-0', 0x7a),
+// CHECK:      ('word-1', 0x85000002)),
+// CHECK:     # Relocation 26
+// CHECK:     (('word-0', 0x73),
+// CHECK:      ('word-1', 0x15000002)),
+// CHECK:     # Relocation 27
+// CHECK:     (('word-0', 0x6c),
+// CHECK:      ('word-1', 0x65000002)),
+// CHECK:     # Relocation 28
+// CHECK:     (('word-0', 0x66),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 29
+// CHECK:     (('word-0', 0x60),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 30
+// CHECK:     (('word-0', 0x5a),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 31
+// CHECK:     (('word-0', 0x54),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 32
+// CHECK:     (('word-0', 0x4e),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 33
+// CHECK:     (('word-0', 0x48),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 34
+// CHECK:     (('word-0', 0x42),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 35
+// CHECK:     (('word-0', 0x38),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 36
+// CHECK:     (('word-0', 0x2e),
+// CHECK:      ('word-1', 0x6d000000)),
+// CHECK:     # Relocation 37
+// CHECK:     (('word-0', 0x24),
+// CHECK:      ('word-1', 0x7d000000)),
+// CHECK:     # Relocation 38
+// CHECK:     (('word-0', 0x1a),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 39
+// CHECK:     (('word-0', 0x10),
+// CHECK:      ('word-1', 0x8d000000)),
+// CHECK:     # Relocation 40
+// CHECK:     (('word-0', 0x9),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 41
+// CHECK:     (('word-0', 0x2),
+// CHECK:      ('word-1', 0x6d000000)),
+// CHECK:   ])
+// CHECK:   ('_section_data', 'c605ffff ffff12c6 05000000 0012c705 fcffffff 78563412 c705fdff ffff7856 3412c705 feffffff 78563412 c705ffff ffff7856 3412c705 00000000 78563412 88050000 00008805 01000000 89050000 00008905 01000000 89050200 00008905 03000000 89050400 0000c605 dd000000 12c605d7 00000012 c705cc00 00007856 3412c705 c3000000 78563412 c705ba00 00007856 3412c705 b1000000 78563412 c705a800 00007856 34128805 9e000000 88059900 00008905 92000000 89058d00 00008905 88000000 89058300 00008905 7e000000 c6050300 000012c6 05040000 0012c705 00000000 78563412 c7050100 00007856 3412c705 02000000 78563412 c7050300 00007856 3412c705 04000000 78563412 88050400 00008805 05000000 89050400 00008905 05000000 89050600 00008905 07000000 89050800 0000')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 318)
+// CHECK:     ('size', 40)
+// CHECK:     ('offset', 686)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 1064)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 1080)
+// CHECK:   ('strsize', 4)
+// CHECK:   ('_string_data', '\x00_d\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 350)
+// CHECK:     ('_string', '_d')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/darwin-x86_64-reloc.s b/final/test/MC/MachO/darwin-x86_64-reloc.s
new file mode 100644
index 00000000000..83c0de788f8
--- /dev/null
+++ b/final/test/MC/MachO/darwin-x86_64-reloc.s
@@ -0,0 +1,405 @@
+// RUN: llvm-mc -n -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+// These examples are taken from <mach-o/x86_64/reloc.h>.
+
+        .data
+        .long 0
+
+        .text
+_foo:
+        ret
+
+_baz:
+        call _foo
+ 	call _foo+4
+ 	movq _foo@GOTPCREL(%rip), %rax
+ 	pushq _foo@GOTPCREL(%rip)
+ 	movl _foo(%rip), %eax
+ 	movl _foo+4(%rip), %eax
+ 	movb  $0x12, _foo(%rip)
+ 	movl  $0x12345678, _foo(%rip)
+ 	.quad _foo
+_bar:
+ 	.quad _foo+4
+ 	.quad _foo - _bar
+ 	.quad _foo - _bar + 4
+ 	.long _foo - _bar
+ 	leaq L1(%rip), %rax
+ 	leaq L0(%rip), %rax
+        addl $6,L0(%rip)
+        addw $500,L0(%rip)
+        addl $500,L0(%rip)
+
+_prev:
+        .space 12,0x90
+ 	.quad L1
+L0:
+        .quad L0
+L_pc:
+ 	.quad _foo - L_pc
+ 	.quad _foo - L1
+L1:
+ 	.quad L1 - _prev
+
+        .data
+.long	_foobar@GOTPCREL+4
+.long	_foo@GOTPCREL+4
+
+        .section	__DWARF,__debug_frame,regular,debug
+        .quad L1
+        .quad _ext_foo
+
+// Make sure local label which overlaps with non-local one is assigned to the
+// right atom.
+        .text
+_f2:
+L2_0:
+        addl $0, %eax
+L2_1:        
+_f3:
+        addl L2_1 - L2_0, %eax
+        
+        .data
+L4:     
+        .long 0
+        .text
+        movl L4(%rip), %eax
+
+        .section __TEXT,__literal8,8byte_literals
+	.quad 0
+L5:     
+	.quad 0
+f6:
+        .quad 0
+L6:
+        .quad 0
+        
+        .text
+	movl L5(%rip), %eax
+	movl f6(%rip), %eax
+	movl L6(%rip), %eax
+        
+        .data
+        .quad L5
+        .quad f6
+	.quad L6
+
+        .text
+        cmpq $0, _foo@GOTPCREL(%rip)
+        
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 496)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 392)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 311)
+// CHECK:   ('file_offset', 528)
+// CHECK:   ('file_size', 311)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 4)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 40)
+// CHECK:     ('offset', 528)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 840)
+// CHECK:     ('num_reloc', 5)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x20),
+// CHECK:      ('word-1', 0x6000004)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x18),
+// CHECK:      ('word-1', 0xe000006)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0x10),
+// CHECK:      ('word-1', 0x6000004)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0x8),
+// CHECK:      ('word-1', 0x4d000000)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0x4),
+// CHECK:      ('word-1', 0x4d000008)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '00000000 04000000 04000000 00000000 1f010000 00000000 00000000 00000000 2f010000 00000000')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 40)
+// CHECK:     ('size', 223)
+// CHECK:     ('offset', 568)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 880)
+// CHECK:     ('num_reloc', 32)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0xda),
+// CHECK:      ('word-1', 0x4d000000)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0xd3),
+// CHECK:      ('word-1', 0x15000004)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0xcd),
+// CHECK:      ('word-1', 0x1d000006)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0xc7),
+// CHECK:      ('word-1', 0x15000004)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0xc1),
+// CHECK:      ('word-1', 0x15000001)),
+// CHECK:     # Relocation 5
+// CHECK:     (('word-0', 0xa5),
+// CHECK:      ('word-1', 0x5e000003)),
+// CHECK:     # Relocation 6
+// CHECK:     (('word-0', 0xa5),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 7
+// CHECK:     (('word-0', 0x9d),
+// CHECK:      ('word-1', 0x5e000003)),
+// CHECK:     # Relocation 8
+// CHECK:     (('word-0', 0x9d),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 9
+// CHECK:     (('word-0', 0x95),
+// CHECK:      ('word-1', 0xe000003)),
+// CHECK:     # Relocation 10
+// CHECK:     (('word-0', 0x8d),
+// CHECK:      ('word-1', 0xe000003)),
+// CHECK:     # Relocation 11
+// CHECK:     (('word-0', 0x79),
+// CHECK:      ('word-1', 0x8d000003)),
+// CHECK:     # Relocation 12
+// CHECK:     (('word-0', 0x71),
+// CHECK:      ('word-1', 0x7d000003)),
+// CHECK:     # Relocation 13
+// CHECK:     (('word-0', 0x69),
+// CHECK:      ('word-1', 0x6d000003)),
+// CHECK:     # Relocation 14
+// CHECK:     (('word-0', 0x63),
+// CHECK:      ('word-1', 0x1d000003)),
+// CHECK:     # Relocation 15
+// CHECK:     (('word-0', 0x5c),
+// CHECK:      ('word-1', 0x1d000003)),
+// CHECK:     # Relocation 16
+// CHECK:     (('word-0', 0x55),
+// CHECK:      ('word-1', 0x5c000002)),
+// CHECK:     # Relocation 17
+// CHECK:     (('word-0', 0x55),
+// CHECK:      ('word-1', 0xc000000)),
+// CHECK:     # Relocation 18
+// CHECK:     (('word-0', 0x4d),
+// CHECK:      ('word-1', 0x5e000002)),
+// CHECK:     # Relocation 19
+// CHECK:     (('word-0', 0x4d),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 20
+// CHECK:     (('word-0', 0x45),
+// CHECK:      ('word-1', 0x5e000002)),
+// CHECK:     # Relocation 21
+// CHECK:     (('word-0', 0x45),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 22
+// CHECK:     (('word-0', 0x3d),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 23
+// CHECK:     (('word-0', 0x35),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 24
+// CHECK:     (('word-0', 0x2d),
+// CHECK:      ('word-1', 0x8d000000)),
+// CHECK:     # Relocation 25
+// CHECK:     (('word-0', 0x26),
+// CHECK:      ('word-1', 0x6d000000)),
+// CHECK:     # Relocation 26
+// CHECK:     (('word-0', 0x20),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 27
+// CHECK:     (('word-0', 0x1a),
+// CHECK:      ('word-1', 0x1d000000)),
+// CHECK:     # Relocation 28
+// CHECK:     (('word-0', 0x14),
+// CHECK:      ('word-1', 0x4d000000)),
+// CHECK:     # Relocation 29
+// CHECK:     (('word-0', 0xe),
+// CHECK:      ('word-1', 0x3d000000)),
+// CHECK:     # Relocation 30
+// CHECK:     (('word-0', 0x7),
+// CHECK:      ('word-1', 0x2d000000)),
+// CHECK:     # Relocation 31
+// CHECK:     (('word-0', 0x2),
+// CHECK:      ('word-1', 0x2d000000)),
+// CHECK:   ])
+// CHECK:   ('_section_data', 'c3e80000 0000e804 00000048 8b050000 0000ff35 00000000 8b050000 00008b05 04000000 c605ffff ffff12c7 05fcffff ff785634 12000000 00000000 00040000 00000000 00000000 00000000 00040000 00000000 00000000 00488d05 2c000000 488d0514 00000083 05130000 00066681 05120000 00f40181 05100000 00f40100 00909090 90909090 90909090 902c0000 00000000 00140000 00000000 00e4ffff ffffffff ffd4ffff ffffffff ff2c0000 00000000 0083c000 03042503 0000008b 051fffff ff8b052c 0000008b 05000000 008b0530 00000048 833dffff ffff00')
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__debug_frame\x00\x00\x00')
+// CHECK:     ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 263)
+// CHECK:     ('size', 16)
+// CHECK:     ('offset', 791)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 1136)
+// CHECK:     ('num_reloc', 2)
+// CHECK:     ('flags', 0x2000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x8),
+// CHECK:      ('word-1', 0xe000007)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x0),
+// CHECK:      ('word-1', 0x6000002)),
+// CHECK:   ])
+// CHECK:   ('_section_data', 'd5000000 00000000 00000000 00000000')
+// CHECK:     # Section 3
+// CHECK:    (('section_name', '__literal8\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 279)
+// CHECK:     ('size', 32)
+// CHECK:     ('offset', 807)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x4)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 1152)
+// CHECK:   ('nsyms', 9)
+// CHECK:   ('stroff', 1296)
+// CHECK:   ('strsize', 52)
+// CHECK:   ('_string_data', '\x00_foobar\x00_ext_foo\x00_foo\x00_baz\x00_bar\x00_prev\x00_f2\x00_f3\x00f6\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 18)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 40)
+// CHECK:     ('_string', '_foo')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 23)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 41)
+// CHECK:     ('_string', '_baz')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 28)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 101)
+// CHECK:     ('_string', '_bar')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 33)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 169)
+// CHECK:     ('_string', '_prev')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 39)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 221)
+// CHECK:     ('_string', '_f2')
+// CHECK:    ),
+// CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 43)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 224)
+// CHECK:     ('_string', '_f3')
+// CHECK:    ),
+// CHECK:     # Symbol 6
+// CHECK:    (('n_strx', 47)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 295)
+// CHECK:     ('_string', 'f6')
+// CHECK:    ),
+// CHECK:     # Symbol 7
+// CHECK:    (('n_strx', 9)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_ext_foo')
+// CHECK:    ),
+// CHECK:     # Symbol 8
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_foobar')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 7)
+// CHECK:   ('iextdefsym', 7)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 7)
+// CHECK:   ('nundefsym', 2)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/data.s b/final/test/MC/MachO/data.s
new file mode 100644
index 00000000000..0ff2854801a
--- /dev/null
+++ b/final/test/MC/MachO/data.s
@@ -0,0 +1,67 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        .data
+        .ascii "hello"
+        .byte 0xAB
+        .short 0xABCD
+        .long 0xABCDABCD
+        .quad 0xABCDABCDABCDABCD
+.org 30
+        .long 0xF000            // 34
+        .p2align  3, 0xAB       // 40 (0xAB * 6)
+        .short 0                // 42
+        .p2alignw 3, 0xABCD     // 48 (0xABCD * 2)
+        .short 0                // 50
+        .p2alignw 3, 0xABCD, 5  // 50
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 192)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 192)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 50)
+// CHECK:   ('file_offset', 220)
+// CHECK:   ('file_size', 50)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 220)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 50)
+// CHECK:     ('offset', 220)
+// CHECK:     ('alignment', 3)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
+
+// FIXME: Dump contents, so we can check those too.
diff --git a/final/test/MC/MachO/dg.exp b/final/test/MC/MachO/dg.exp
new file mode 100644
index 00000000000..ca6aefe9c53
--- /dev/null
+++ b/final/test/MC/MachO/dg.exp
@@ -0,0 +1,6 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
+}
+
diff --git a/final/test/MC/MachO/diff-with-two-sections.s b/final/test/MC/MachO/diff-with-two-sections.s
new file mode 100644
index 00000000000..b5e09885f31
--- /dev/null
+++ b/final/test/MC/MachO/diff-with-two-sections.s
@@ -0,0 +1,64 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+	.section	__TEXT,__text,regular,pure_instructions
+Leh_func_begin0:
+	.section	__TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+Ltmp3:
+Ltmp4 = Leh_func_begin0-Ltmp3
+	.long	Ltmp4
+
+// CHECK:      ('cputype', 7)
+// CHECK-NEXT: ('cpusubtype', 3)
+// CHECK-NEXT: ('filetype', 1)
+// CHECK-NEXT: ('num_load_commands', 1)
+// CHECK-NEXT: ('load_commands_size', 192)
+// CHECK-NEXT: ('flag', 0)
+// CHECK-NEXT: ('load_commands', [
+// CHECK-NEXT:   # Load Command 0
+// CHECK-NEXT:  (('command', 1)
+// CHECK-NEXT:   ('size', 192)
+// CHECK-NEXT:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:   ('vm_addr', 0)
+// CHECK-NEXT:   ('vm_size', 4)
+// CHECK-NEXT:   ('file_offset', 220)
+// CHECK-NEXT:   ('file_size', 4)
+// CHECK-NEXT:   ('maxprot', 7)
+// CHECK-NEXT:   ('initprot', 7)
+// CHECK-NEXT:   ('num_sections', 2)
+// CHECK-NEXT:   ('flags', 0)
+// CHECK-NEXT:   ('sections', [
+// CHECK-NEXT:     # Section 0
+// CHECK-NEXT:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('address', 0)
+// CHECK-NEXT:     ('size', 0)
+// CHECK-NEXT:     ('offset', 220)
+// CHECK-NEXT:     ('alignment', 0)
+// CHECK-NEXT:     ('reloc_offset', 0)
+// CHECK-NEXT:     ('num_reloc', 0)
+// CHECK-NEXT:     ('flags', 0x80000000)
+// CHECK-NEXT:     ('reserved1', 0)
+// CHECK-NEXT:     ('reserved2', 0)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:   ])
+// CHECK-NEXT:   ('_section_data', '')
+// CHECK-NEXT:     # Section 1
+// CHECK-NEXT:    (('section_name', '__eh_frame\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('address', 0)
+// CHECK-NEXT:     ('size', 4)
+// CHECK-NEXT:     ('offset', 220)
+// CHECK-NEXT:     ('alignment', 0)
+// CHECK-NEXT:     ('reloc_offset', 0)
+// CHECK-NEXT:     ('num_reloc', 0)
+// CHECK-NEXT:     ('flags', 0x6800000b)
+// CHECK-NEXT:     ('reserved1', 0)
+// CHECK-NEXT:     ('reserved2', 0)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:   ])
+// CHECK-NEXT:   ('_section_data', '00000000')
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
+// CHECK-NEXT: ])
diff --git a/final/test/MC/MachO/direction_labels.s b/final/test/MC/MachO/direction_labels.s
new file mode 100644
index 00000000000..e224ed3a147
--- /dev/null
+++ b/final/test/MC/MachO/direction_labels.s
@@ -0,0 +1,95 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+direction_labels:
+10:     nop
+        jmp 10b
+        nop
+	jne 0f
+0:	nop
+	jne 0b
+        jmp 11f
+11:     nop
+        ret
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 228)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 124)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 13)
+// CHECK:   ('file_offset', 256)
+// CHECK:   ('file_size', 13)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 13)
+// CHECK:     ('offset', 256)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '90ebfd90 75009075 fdeb0090 c3')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 272)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 284)
+// CHECK:   ('strsize', 20)
+// CHECK:   ('_string_data', '\x00direction_labels\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'direction_labels')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/empty-dwarf-lines.s b/final/test/MC/MachO/empty-dwarf-lines.s
new file mode 100644
index 00000000000..4bdc16b55f5
--- /dev/null
+++ b/final/test/MC/MachO/empty-dwarf-lines.s
@@ -0,0 +1,25 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+// This tests that when producing files for darwin9 or older we make sure
+// that debug_line sections are of a minimum size to avoid the linker bug
+// described in PR8715.
+
+        .section        __DATA,__data
+        .file   1 "test.c"
+        .globl  _c                      ## @c
+_c:
+        .asciz   "hi\n"
+
+// CHECK:      (('section_name', '__debug_line\x00\x00\x00\x00')
+// CHECK-NEXT:  ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:  ('address', 4)
+// CHECK-NEXT:  ('size', 44)
+// CHECK-NEXT:  ('offset', 452)
+// CHECK-NEXT:  ('alignment', 0)
+// CHECK-NEXT:  ('reloc_offset', 0)
+// CHECK-NEXT:  ('num_reloc', 0)
+// CHECK-NEXT:  ('flags', 0x2000000)
+// CHECK-NEXT:  ('reserved1', 0)
+// CHECK-NEXT:  ('reserved2', 0)
+// CHECK-NEXT:  ('reserved3', 0)
+// CHECK-NEXT: ),
diff --git a/final/test/MC/MachO/indirect-symbols.s b/final/test/MC/MachO/indirect-symbols.s
new file mode 100644
index 00000000000..90fd2315475
--- /dev/null
+++ b/final/test/MC/MachO/indirect-symbols.s
@@ -0,0 +1,188 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+_b:
+        _c = 0
+_e:
+        _f = 0
+        
+	.section	__IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5
+.indirect_symbol _a
+	.ascii	 "\364\364\364\364\364"        
+.indirect_symbol _b
+	.ascii	 "\364\364\364\364\364"        
+.indirect_symbol _c
+	.ascii	 "\364\364\364\364\364"        
+	.section	__IMPORT,__pointers,non_lazy_symbol_pointers
+.indirect_symbol _d
+	.long	0
+.indirect_symbol _e
+	.long	0
+.indirect_symbol _f
+	.long	0
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 364)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 260)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 27)
+// CHECK:   ('file_offset', 392)
+// CHECK:   ('file_size', 27)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 3)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 392)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__jump_table\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__IMPORT\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 15)
+// CHECK:     ('offset', 392)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x84000008)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 5)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', 'f4f4f4f4 f4f4f4f4 f4f4f4f4 f4f4f4')
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__pointers\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__IMPORT\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 15)
+// CHECK:     ('size', 12)
+// CHECK:     ('offset', 407)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x6)
+// CHECK:     ('reserved1', 3)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '00000000 00000000 00000000')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 444)
+// CHECK:   ('nsyms', 6)
+// CHECK:   ('stroff', 516)
+// CHECK:   ('strsize', 20)
+// CHECK:   ('_string_data', '\x00_d\x00_a\x00_b\x00_c\x00_e\x00_f\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 7)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_b')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 10)
+// CHECK:     ('n_type', 0x2)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_c')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 13)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_e')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 16)
+// CHECK:     ('n_type', 0x2)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_f')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 4)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 1)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_a')
+// CHECK:    ),
+// CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_d')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 4)
+// CHECK:   ('iextdefsym', 4)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 4)
+// CHECK:   ('nundefsym', 2)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 420)
+// CHECK:   ('nindirectsyms', 6)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:     # Indirect Symbol 0
+// CHECK:     (('symbol_index', 0x4),),
+// CHECK:     # Indirect Symbol 1
+// CHECK:     (('symbol_index', 0x0),),
+// CHECK:     # Indirect Symbol 2
+// CHECK:     (('symbol_index', 0x1),),
+// CHECK:     # Indirect Symbol 3
+// CHECK:     (('symbol_index', 0x5),),
+// CHECK:     # Indirect Symbol 4
+// CHECK:     (('symbol_index', 0x80000000),),
+// CHECK:     # Indirect Symbol 5
+// CHECK:     (('symbol_index', 0xc0000000),),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/jcc.s b/final/test/MC/MachO/jcc.s
new file mode 100644
index 00000000000..2288a20fa27
--- /dev/null
+++ b/final/test/MC/MachO/jcc.s
@@ -0,0 +1,106 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+   ja 1f
+1: nop
+   jae 1f
+1: nop
+   jb 1f
+1: nop
+   jbe 1f
+1: nop
+   jc 1f
+1: nop
+   jecxz 1f
+1: nop
+   jecxz 1f
+1: nop
+   je 1f
+1: nop
+   jg 1f
+1: nop
+   jge 1f
+1: nop
+   jl 1f
+1: nop
+   jle 1f
+1: nop
+   jna 1f
+1: nop
+   jnae 1f
+1: nop
+   jnb 1f
+1: nop
+   jnbe 1f
+1: nop
+   jnc 1f
+1: nop
+   jne 1f
+1: nop
+   jng 1f
+1: nop
+   jnge 1f
+1: nop
+   jnl 1f
+1: nop
+   jnle 1f
+1: nop
+   jno 1f
+1: nop
+   jnp 1f
+1: nop
+   jns 1f
+1: nop
+   jnz 1f
+1: nop
+   jo 1f
+1: nop
+   jp 1f
+1: nop
+   jpe 1f
+1: nop
+   jpo 1f
+1: nop
+   js 1f
+1: nop
+   jz 1f
+1: nop
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 124)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 124)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 96)
+// CHECK:   ('file_offset', 152)
+// CHECK:   ('file_size', 96)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 96)
+// CHECK:     ('offset', 152)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '77009073 00907200 90760090 720090e3 0090e300 90740090 7f00907d 00907c00 907e0090 76009072 00907300 90770090 73009075 00907e00 907c0090 7d00907f 00907100 907b0090 79009075 00907000 907a0090 7a00907b 00907800 90740090')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/lcomm-attributes.s b/final/test/MC/MachO/lcomm-attributes.s
new file mode 100644
index 00000000000..1e9592438fe
--- /dev/null
+++ b/final/test/MC/MachO/lcomm-attributes.s
@@ -0,0 +1,136 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        // Note, this test intentionally mismatches Darwin 'as', which loses the
+	// following global marker.
+        //
+        // FIXME: We should probably warn about our interpretation of this.
+        .globl sym_lcomm_ext_A
+        .lcomm sym_lcomm_ext_A, 4
+        .lcomm sym_lcomm_ext_B, 4
+        .globl sym_lcomm_ext_B
+
+        .globl sym_zfill_ext_A
+        .zerofill __DATA, __bss, sym_zfill_ext_A, 4
+        .zerofill __DATA, __bss, sym_zfill_ext_B, 4
+        .globl sym_zfill_ext_B
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 296)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 192)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 16)
+// CHECK:   ('file_offset', 324)
+// CHECK:   ('file_size', 0)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 324)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 16)
+// CHECK:     ('offset', 0)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x1)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 324)
+// CHECK:   ('nsyms', 4)
+// CHECK:   ('stroff', 372)
+// CHECK:   ('strsize', 68)
+// CHECK:   ('_string_data', '\x00sym_lcomm_ext_A\x00sym_lcomm_ext_B\x00sym_zfill_ext_A\x00sym_zfill_ext_B\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lcomm_ext_A')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 17)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 4)
+// CHECK:     ('_string', 'sym_lcomm_ext_B')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 33)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', 'sym_zfill_ext_A')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 49)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 12)
+// CHECK:     ('_string', 'sym_zfill_ext_B')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 0)
+// CHECK:   ('iextdefsym', 0)
+// CHECK:   ('nextdefsym', 4)
+// CHECK:   ('iundefsym', 4)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/loc.s b/final/test/MC/MachO/loc.s
new file mode 100644
index 00000000000..6e7faa3bf9a
--- /dev/null
+++ b/final/test/MC/MachO/loc.s
@@ -0,0 +1,25 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .file	1 "foo"
+	.loc	1 64 0
+        nop
+
+// CHECK:         # Section 1
+// CHECK-NEXT:   (('section_name', '__debug_line\x00\x00\x00\x00')
+// CHECK-NEXT:    ('segment_name', '__DWARF\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:    ('address', 1)
+// CHECK-NEXT:    ('size', 51)
+// CHECK-NEXT:    ('offset', 221)
+// CHECK-NEXT:    ('alignment', 0)
+// CHECK-NEXT:    ('reloc_offset', 272)
+// CHECK-NEXT:    ('num_reloc', 1)
+// CHECK-NEXT:    ('flags', 0x2000000)
+// CHECK-NEXT:    ('reserved1', 0)
+// CHECK-NEXT:    ('reserved2', 0)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:    # Relocation 0
+// CHECK-NEXT:    (('word-0', 0x27),
+// CHECK-NEXT:     ('word-1', 0x4000001)),
+// CHECK-NEXT:  ])
+// CHECK-NEXT:  ('_section_data', '2f000000 02001a00 00000101 fb0e0d00 01010101 00000001 00000100 666f6f00 00000000 00050200 00000003 3f010201 000101')
diff --git a/final/test/MC/MachO/pcrel-to-other-section.s b/final/test/MC/MachO/pcrel-to-other-section.s
new file mode 100644
index 00000000000..22a7822d957
--- /dev/null
+++ b/final/test/MC/MachO/pcrel-to-other-section.s
@@ -0,0 +1,107 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+nop
+	.section	__TEXT,__StaticInit,regular,pure_instructions
+	calll	foo
+
+// CHECK:      ('cputype', 7)
+// CHECK-NEXT: ('cpusubtype', 3)
+// CHECK-NEXT: ('filetype', 1)
+// CHECK-NEXT: ('num_load_commands', 3)
+// CHECK-NEXT: ('load_commands_size', 296)
+// CHECK-NEXT: ('flag', 0)
+// CHECK-NEXT: ('load_commands', [
+// CHECK-NEXT:   # Load Command 0
+// CHECK-NEXT:  (('command', 1)
+// CHECK-NEXT:   ('size', 192)
+// CHECK-NEXT:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:   ('vm_addr', 0)
+// CHECK-NEXT:   ('vm_size', 6)
+// CHECK-NEXT:   ('file_offset', 324)
+// CHECK-NEXT:   ('file_size', 6)
+// CHECK-NEXT:   ('maxprot', 7)
+// CHECK-NEXT:   ('initprot', 7)
+// CHECK-NEXT:   ('num_sections', 2)
+// CHECK-NEXT:   ('flags', 0)
+// CHECK-NEXT:   ('sections', [
+// CHECK-NEXT:     # Section 0
+// CHECK-NEXT:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('address', 0)
+// CHECK-NEXT:     ('size', 1)
+// CHECK-NEXT:     ('offset', 324)
+// CHECK-NEXT:     ('alignment', 0)
+// CHECK-NEXT:     ('reloc_offset', 0)
+// CHECK-NEXT:     ('num_reloc', 0)
+// CHECK-NEXT:     ('flags', 0x80000400)
+// CHECK-NEXT:     ('reserved1', 0)
+// CHECK-NEXT:     ('reserved2', 0)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:   ])
+// CHECK-NEXT:   ('_section_data', '90')
+// CHECK-NEXT:     # Section 1
+// CHECK-NEXT:    (('section_name', '__StaticInit\x00\x00\x00\x00')
+// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('address', 1)
+// CHECK-NEXT:     ('size', 5)
+// CHECK-NEXT:     ('offset', 325)
+// CHECK-NEXT:     ('alignment', 0)
+// CHECK-NEXT:     ('reloc_offset', 332)
+// CHECK-NEXT:     ('num_reloc', 1)
+// CHECK-NEXT:     ('flags', 0x80000400)
+// CHECK-NEXT:     ('reserved1', 0)
+// CHECK-NEXT:     ('reserved2', 0)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:     # Relocation 0
+// CHECK-NEXT:     (('word-0', 0x1),
+// CHECK-NEXT:      ('word-1', 0xd000000)),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:   ('_section_data', 'e8faffff ff')
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
+// CHECK-NEXT:   # Load Command 1
+// CHECK-NEXT:  (('command', 2)
+// CHECK-NEXT:   ('size', 24)
+// CHECK-NEXT:   ('symoff', 340)
+// CHECK-NEXT:   ('nsyms', 1)
+// CHECK-NEXT:   ('stroff', 352)
+// CHECK-NEXT:   ('strsize', 8)
+// CHECK-NEXT:   ('_string_data', '\x00foo\x00\x00\x00\x00')
+// CHECK-NEXT:   ('_symbols', [
+// CHECK-NEXT:     # Symbol 0
+// CHECK-NEXT:    (('n_strx', 1)
+// CHECK-NEXT:     ('n_type', 0x1)
+// CHECK-NEXT:     ('n_sect', 0)
+// CHECK-NEXT:     ('n_desc', 0)
+// CHECK-NEXT:     ('n_value', 0)
+// CHECK-NEXT:     ('_string', 'foo')
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
+// CHECK-NEXT:   # Load Command 2
+// CHECK-NEXT:  (('command', 11)
+// CHECK-NEXT:   ('size', 80)
+// CHECK-NEXT:   ('ilocalsym', 0)
+// CHECK-NEXT:   ('nlocalsym', 0)
+// CHECK-NEXT:   ('iextdefsym', 0)
+// CHECK-NEXT:   ('nextdefsym', 0)
+// CHECK-NEXT:   ('iundefsym', 0)
+// CHECK-NEXT:   ('nundefsym', 1)
+// CHECK-NEXT:   ('tocoff', 0)
+// CHECK-NEXT:   ('ntoc', 0)
+// CHECK-NEXT:   ('modtaboff', 0)
+// CHECK-NEXT:   ('nmodtab', 0)
+// CHECK-NEXT:   ('extrefsymoff', 0)
+// CHECK-NEXT:   ('nextrefsyms', 0)
+// CHECK-NEXT:   ('indirectsymoff', 0)
+// CHECK-NEXT:   ('nindirectsyms', 0)
+// CHECK-NEXT:   ('extreloff', 0)
+// CHECK-NEXT:   ('nextrel', 0)
+// CHECK-NEXT:   ('locreloff', 0)
+// CHECK-NEXT:   ('nlocrel', 0)
+// CHECK-NEXT:   ('_indirect_symbols', [
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
+// CHECK-NEXT: ])
diff --git a/final/test/MC/MachO/relax-jumps.s b/final/test/MC/MachO/relax-jumps.s
new file mode 100644
index 00000000000..65a51e92b37
--- /dev/null
+++ b/final/test/MC/MachO/relax-jumps.s
@@ -0,0 +1,31 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+// FIXME: This is a horrible way of checking the output, we need an llvm-mc
+// based 'otool'. Use:
+//   (f=relax-jumps;
+//    llvm-mc -filetype=obj -o $f.mc.o $f.s &&
+//    as -arch i386 -o $f.as.o $f.s &&
+//    otool -tvr $f.mc.o | tail +2 > $f.mc.dump &&
+//    otool -tvr $f.as.o | tail +2 > $f.as.dump &&
+//    diff $f.{as,mc}.dump)
+// to examine the results in a more sensible fashion.
+
+// CHECK: ('_section_data', '90
+// CHECK: 0f8432ff ffff0f82 e6000000 0f8726ff ffff0f8f da000000 0f881aff ffff0f83 ce000000 0f890eff ffff90
+// CHECK: 9031c0')
+
+L1:
+        .space 200, 0x90
+
+        je L1
+        jb L2
+        ja L1
+        jg L2
+        js L1
+        jae L2
+        jns L1
+
+        .space 200, 0x90
+L2:
+
+        xorl %eax, %eax
diff --git a/final/test/MC/MachO/relax-recompute-align.s b/final/test/MC/MachO/relax-recompute-align.s
new file mode 100644
index 00000000000..249402502f7
--- /dev/null
+++ b/final/test/MC/MachO/relax-recompute-align.s
@@ -0,0 +1,37 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+// FIXME: This is a horrible way of checking the output, we need an llvm-mc
+// based 'otool'.
+
+// This is a case where llvm-mc computes a better layout than Darwin 'as'. This
+// issue is that after the first jmp slides, the .align size must be
+// recomputed -- otherwise the second jump will appear to be out-of-range for a
+// 1-byte jump.
+
+// CHECK:  # Section 0
+// CHECK: (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:  ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:  ('address', 0)
+// CHECK:  ('size', 306)
+// CHECK:  ('offset', 324)
+// CHECK:  ('alignment', 4)
+// CHECK:  ('reloc_offset', 0)
+// CHECK:  ('num_reloc', 0)
+// CHECK:  ('flags', 0x80000400)
+// CHECK:  ('reserved1', 0)
+// CHECK:  ('reserved2', 0)
+// CHECK: ),
+
+L0:
+        .space 0x8a, 0x90
+	jmp	L0
+        .space (0xb3 - 0x8f), 0x90
+	jle	L2
+        .space (0xcd - 0xb5), 0x90
+	.align	4, 0x90
+L1:
+        .space (0x130 - 0xd0),0x90
+	jl	L1
+L2:
+
+.zerofill __DATA,__bss,_sym,4,2
diff --git a/final/test/MC/MachO/reloc-diff.s b/final/test/MC/MachO/reloc-diff.s
new file mode 100644
index 00000000000..601edba47b4
--- /dev/null
+++ b/final/test/MC/MachO/reloc-diff.s
@@ -0,0 +1,55 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+// CHECK: # Relocation 0
+// CHECK: (('word-0', 0xa2000014),
+// CHECK:  ('word-1', 0x0)),
+// CHECK: # Relocation 1
+// CHECK: (('word-0', 0xa1000000),
+// CHECK:  ('word-1', 0x0)),
+// CHECK: # Relocation 2
+// CHECK: (('word-0', 0xa4000010),
+// CHECK:  ('word-1', 0x0)),
+// CHECK: # Relocation 3
+// CHECK: (('word-0', 0xa1000000),
+// CHECK:  ('word-1', 0x0)),
+// CHECK: # Relocation 4
+// CHECK: (('word-0', 0xa400000c),
+// CHECK:  ('word-1', 0x0)),
+// CHECK: # Relocation 5
+// CHECK: (('word-0', 0xa1000000),
+// CHECK:  ('word-1', 0x0)),
+// CHECK: # Relocation 6
+// CHECK: (('word-0', 0xa4000008),
+// CHECK:  ('word-1', 0x0)),
+// CHECK: # Relocation 7
+// CHECK: (('word-0', 0xa1000000),
+// CHECK:  ('word-1', 0x0)),
+// CHECK: # Relocation 8
+// CHECK: (('word-0', 0xa4000004),
+// CHECK:  ('word-1', 0x0)),
+// CHECK: # Relocation 9
+// CHECK: (('word-0', 0xa1000000),
+// CHECK:  ('word-1', 0x0)),
+// CHECK: # Relocation 10
+// CHECK: (('word-0', 0xa2000000),
+// CHECK:  ('word-1', 0x0)),
+// CHECK: # Relocation 11
+// CHECK: (('word-0', 0xa1000000),
+// CHECK:  ('word-1', 0x0)),
+// CHECK-NEXT: ])
+
+_local_def:
+        .globl _external_def
+_external_def:
+Ltemp:
+        ret
+        
+        .data
+        .long _external_def - _local_def
+        .long Ltemp - _local_def
+
+        .long _local_def - _external_def
+        .long Ltemp - _external_def
+
+        .long _local_def - Ltemp
+        .long _external_def - Ltemp
diff --git a/final/test/MC/MachO/reloc-pcrel-offset.s b/final/test/MC/MachO/reloc-pcrel-offset.s
new file mode 100644
index 00000000000..e0f12bf4ba2
--- /dev/null
+++ b/final/test/MC/MachO/reloc-pcrel-offset.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -n -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+// CHECK: # Relocation 0
+// CHECK: (('word-0', 0x1),
+// CHECK: ('word-1', 0x5000002)),
+// CHECK-NEXT: ])
+// CHECK: ('_section_data', 'e8fbffff ff')
+
+        .data
+        .long 0
+
+        .text
+_a:
+        call _a
diff --git a/final/test/MC/MachO/reloc-pcrel.s b/final/test/MC/MachO/reloc-pcrel.s
new file mode 100644
index 00000000000..fff7cc0ada0
--- /dev/null
+++ b/final/test/MC/MachO/reloc-pcrel.s
@@ -0,0 +1,62 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+// CHECK: # Relocation 0
+// CHECK: (('word-0', 0xe4000045),
+// CHECK:  ('word-1', 0x4)),
+// CHECK: # Relocation 1
+// CHECK: (('word-0', 0xe1000000),
+// CHECK:  ('word-1', 0x6)),
+// CHECK: # Relocation 2
+// CHECK: (('word-0', 0x40),
+// CHECK:  ('word-1', 0xd000002)),
+// CHECK: # Relocation 3
+// CHECK: (('word-0', 0x3b),
+// CHECK:  ('word-1', 0xd000002)),
+// CHECK: # Relocation 4
+// CHECK: (('word-0', 0x36),
+// CHECK:  ('word-1', 0xd000002)),
+// CHECK: # Relocation 5
+// CHECK: (('word-0', 0xe0000031),
+// CHECK:  ('word-1', 0x4)),
+// CHECK: # Relocation 6
+// CHECK: (('word-0', 0xe000002c),
+// CHECK:  ('word-1', 0x4)),
+// CHECK: # Relocation 7
+// CHECK: (('word-0', 0x27),
+// CHECK:  ('word-1', 0x5000001)),
+// CHECK: # Relocation 8
+// CHECK: (('word-0', 0xe0000022),
+// CHECK:  ('word-1', 0x2)),
+// CHECK: # Relocation 9
+// CHECK: (('word-0', 0xe000001d),
+// CHECK:  ('word-1', 0x2)),
+// CHECK: # Relocation 10
+// CHECK: (('word-0', 0x18),
+// CHECK:  ('word-1', 0x5000001)),
+// CHECK-NEXT: ])
+
+        xorl %eax,%eax
+        
+        .globl _a
+_a:
+        xorl %eax,%eax
+_b:
+        xorl %eax,%eax
+L0:
+        xorl %eax,%eax
+L1:     
+
+        call L0
+        call L0 - 1
+        call L0 + 1
+        call _a
+        call _a - 1
+        call _a + 1
+        call _b
+        call _b - 1
+        call _b + 1
+        call _c
+        call _c - 1
+        call _c + 1
+//        call _a - L0
+        call _b - L0
diff --git a/final/test/MC/MachO/reloc.s b/final/test/MC/MachO/reloc.s
new file mode 100644
index 00000000000..f6a3446b51b
--- /dev/null
+++ b/final/test/MC/MachO/reloc.s
@@ -0,0 +1,292 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .data
+        .long undef
+        .long (undef + 4)
+
+        .globl local_a_ext
+local_a_ext:
+        .long local_a_ext
+
+local_a:
+        .long 0
+local_a_elt:
+        .long 0
+local_b:
+        .long local_b - local_c + 245
+        .long 0
+local_c:
+        .long 0
+
+
+        .long local_a_elt + 1
+        .long local_a_elt + 10
+        .short local_a_elt + 20
+        .byte local_a_elt + 89
+
+        .const
+
+        .long
+bar:
+        .long local_a_elt - bar + 33
+
+L0:
+        .long L0
+        .long L1
+
+        .text
+_f0:
+L1:
+        jmp	0xbabecafe
+        jmp L0
+        jmp L1
+        ret
+
+        .objc_class_name_A=0
+	.globl .objc_class_name_A
+
+        .text
+        .globl _f1
+        .weak_definition _f1
+_f1:
+        .data
+        .long _f1
+        .long _f1 + 4
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 364)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 260)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 76)
+// CHECK:   ('file_offset', 392)
+// CHECK:   ('file_size', 76)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 3)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 13)
+// CHECK:     ('offset', 392)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 468)
+// CHECK:     ('num_reloc', 2)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x6),
+// CHECK:      ('word-1', 0x5000003)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x1),
+// CHECK:      ('word-1', 0x5000000)),
+// CHECK:   ])
+// CHECK:   ('_section_data', 'e9f9cabe bae93a00 0000ebf4 c3')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 13)
+// CHECK:     ('size', 51)
+// CHECK:     ('offset', 405)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 484)
+// CHECK:     ('num_reloc', 11)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x2f),
+// CHECK:      ('word-1', 0xc000007)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x2b),
+// CHECK:      ('word-1', 0xc000007)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0x8000002a),
+// CHECK:      ('word-1', 0x1d)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0x90000028),
+// CHECK:      ('word-1', 0x1d)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0xa0000024),
+// CHECK:      ('word-1', 0x1d)),
+// CHECK:     # Relocation 5
+// CHECK:     (('word-0', 0xa0000020),
+// CHECK:      ('word-1', 0x1d)),
+// CHECK:     # Relocation 6
+// CHECK:     (('word-0', 0xa4000014),
+// CHECK:      ('word-1', 0x21)),
+// CHECK:     # Relocation 7
+// CHECK:     (('word-0', 0xa1000000),
+// CHECK:      ('word-1', 0x29)),
+// CHECK:     # Relocation 8
+// CHECK:     (('word-0', 0x8),
+// CHECK:      ('word-1', 0x4000002)),
+// CHECK:     # Relocation 9
+// CHECK:     (('word-0', 0x4),
+// CHECK:      ('word-1', 0xc000009)),
+// CHECK:     # Relocation 10
+// CHECK:     (('word-0', 0x0),
+// CHECK:      ('word-1', 0xc000009)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '00000000 04000000 15000000 00000000 00000000 ed000000 00000000 00000000 1e000000 27000000 31007600 00000004 000000')
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 64)
+// CHECK:     ('size', 12)
+// CHECK:     ('offset', 456)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 572)
+// CHECK:     ('num_reloc', 4)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x8),
+// CHECK:      ('word-1', 0x4000001)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x4),
+// CHECK:      ('word-1', 0x4000003)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0xa4000000),
+// CHECK:      ('word-1', 0x1d)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0xa1000000),
+// CHECK:      ('word-1', 0x40)),
+// CHECK:   ])
+// CHECK:   ('_section_data', 'feffffff 44000000 00000000')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 604)
+// CHECK:   ('nsyms', 10)
+// CHECK:   ('stroff', 724)
+// CHECK:   ('strsize', 88)
+// CHECK:   ('_string_data', '\x00undef\x00local_a_ext\x00.objc_class_name_A\x00_f1\x00local_a\x00local_a_elt\x00local_b\x00local_c\x00bar\x00_f0\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 42)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 25)
+// CHECK:     ('_string', 'local_a')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 50)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 29)
+// CHECK:     ('_string', 'local_a_elt')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 62)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 33)
+// CHECK:     ('_string', 'local_b')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 70)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 41)
+// CHECK:     ('_string', 'local_c')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 78)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 64)
+// CHECK:     ('_string', 'bar')
+// CHECK:    ),
+// CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 82)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_f0')
+// CHECK:    ),
+// CHECK:     # Symbol 6
+// CHECK:    (('n_strx', 19)
+// CHECK:     ('n_type', 0x3)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '.objc_class_name_A')
+// CHECK:    ),
+// CHECK:     # Symbol 7
+// CHECK:    (('n_strx', 38)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 128)
+// CHECK:     ('n_value', 13)
+// CHECK:     ('_string', '_f1')
+// CHECK:    ),
+// CHECK:     # Symbol 8
+// CHECK:    (('n_strx', 7)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 21)
+// CHECK:     ('_string', 'local_a_ext')
+// CHECK:    ),
+// CHECK:     # Symbol 9
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'undef')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 6)
+// CHECK:   ('iextdefsym', 6)
+// CHECK:   ('nextdefsym', 3)
+// CHECK:   ('iundefsym', 9)
+// CHECK:   ('nundefsym', 1)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/section-align-1.s b/final/test/MC/MachO/section-align-1.s
new file mode 100644
index 00000000000..360c0a84231
--- /dev/null
+++ b/final/test/MC/MachO/section-align-1.s
@@ -0,0 +1,87 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+name:
+        .byte 0
+
+        // Check that symbol table is aligned to 4 bytes.
+        
+        
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 228)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 124)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 1)
+// CHECK:   ('file_offset', 256)
+// CHECK:   ('file_size', 1)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 1)
+// CHECK:     ('offset', 256)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 260)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 272)
+// CHECK:   ('strsize', 8)
+// CHECK:   ('_string_data', '\x00name\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'name')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/section-align-2.s b/final/test/MC/MachO/section-align-2.s
new file mode 100644
index 00000000000..e0d7b8df8f4
--- /dev/null
+++ b/final/test/MC/MachO/section-align-2.s
@@ -0,0 +1,137 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        .byte 0
+
+        // There should be 3 padding bytes here.
+        
+        .data
+        .align 2
+foo:
+        .org 8
+bar:
+        .byte 0
+
+        .const
+baz:
+        
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 364)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 260)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 13)
+// CHECK:   ('file_offset', 392)
+// CHECK:   ('file_size', 13)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 3)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 1)
+// CHECK:     ('offset', 392)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 4)
+// CHECK:     ('size', 9)
+// CHECK:     ('offset', 396)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 13)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 405)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 408)
+// CHECK:   ('nsyms', 3)
+// CHECK:   ('stroff', 444)
+// CHECK:   ('strsize', 16)
+// CHECK:   ('_string_data', '\x00foo\x00bar\x00baz\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 4)
+// CHECK:     ('_string', 'foo')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 5)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 12)
+// CHECK:     ('_string', 'bar')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 9)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 13)
+// CHECK:     ('_string', 'baz')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 3)
+// CHECK:   ('iextdefsym', 3)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 3)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/section-flags.s b/final/test/MC/MachO/section-flags.s
new file mode 100644
index 00000000000..8ac1bbff755
--- /dev/null
+++ b/final/test/MC/MachO/section-flags.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+//
+// CHECK: # Section 0
+// CHECK: 'section_name', '__text
+// CHECK: 'flags', 0x80000000
+// CHECK: # Section 1
+// CHECK: 'section_name', '__data
+// CHECK: 'flags', 0x400
+        
+        .text
+
+        .data
+f0:
+        movl $0, %eax
diff --git a/final/test/MC/MachO/string-table.s b/final/test/MC/MachO/string-table.s
new file mode 100644
index 00000000000..179528eaae5
--- /dev/null
+++ b/final/test/MC/MachO/string-table.s
@@ -0,0 +1,100 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+        
+	movl	$a, b
+        
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 228)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 124)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 10)
+// CHECK:   ('file_offset', 256)
+// CHECK:   ('file_size', 10)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 1)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 10)
+// CHECK:     ('offset', 256)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 268)
+// CHECK:     ('num_reloc', 2)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x6),
+// CHECK:      ('word-1', 0xc000000)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x2),
+// CHECK:      ('word-1', 0xc000001)),
+// CHECK:   ])
+// CHECK:   ('_section_data', 'c7050000 00000000 0000')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 284)
+// CHECK:   ('nsyms', 2)
+// CHECK:   ('stroff', 308)
+// CHECK:   ('strsize', 8)
+// CHECK:   ('_string_data', '\x00a\x00b\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'a')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 3)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'b')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 0)
+// CHECK:   ('iextdefsym', 0)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 0)
+// CHECK:   ('nundefsym', 2)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/symbol-diff.s b/final/test/MC/MachO/symbol-diff.s
new file mode 100644
index 00000000000..1483df16333
--- /dev/null
+++ b/final/test/MC/MachO/symbol-diff.s
@@ -0,0 +1,122 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+_g:
+LFB2:
+	.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+_g.eh:
+	.quad	LFB2-.
+
+// CHECK:      ('cputype', 16777223)
+// CHECK-NEXT: ('cpusubtype', 3)
+// CHECK-NEXT: ('filetype', 1)
+// CHECK-NEXT: ('num_load_commands', 3)
+// CHECK-NEXT: ('load_commands_size', 336)
+// CHECK-NEXT: ('flag', 0)
+// CHECK-NEXT: ('reserved', 0)
+// CHECK-NEXT: ('load_commands', [
+// CHECK-NEXT:   # Load Command 0
+// CHECK-NEXT:  (('command', 25)
+// CHECK-NEXT:   ('size', 232)
+// CHECK-NEXT:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:   ('vm_addr', 0)
+// CHECK-NEXT:   ('vm_size', 8)
+// CHECK-NEXT:   ('file_offset', 368)
+// CHECK-NEXT:   ('file_size', 8)
+// CHECK-NEXT:   ('maxprot', 7)
+// CHECK-NEXT:   ('initprot', 7)
+// CHECK-NEXT:   ('num_sections', 2)
+// CHECK-NEXT:   ('flags', 0)
+// CHECK-NEXT:   ('sections', [
+// CHECK-NEXT:    # Section 0
+// CHECK-NEXT:   (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:    ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:    ('address', 0)
+// CHECK-NEXT:    ('size', 0)
+// CHECK-NEXT:    ('offset', 368)
+// CHECK-NEXT:    ('alignment', 0)
+// CHECK-NEXT:    ('reloc_offset', 0)
+// CHECK-NEXT:    ('num_reloc', 0)
+// CHECK-NEXT:    ('flags', 0x80000000)
+// CHECK-NEXT:    ('reserved1', 0)
+// CHECK-NEXT:    ('reserved2', 0)
+// CHECK-NEXT:    ('reserved3', 0)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:  ])
+// CHECK-NEXT:  ('_section_data', '')
+// CHECK-NEXT:    # Section 1
+// CHECK-NEXT:   (('section_name', '__eh_frame\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:    ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:    ('address', 0)
+// CHECK-NEXT:    ('size', 8)
+// CHECK-NEXT:    ('offset', 368)
+// CHECK-NEXT:    ('alignment', 0)
+// CHECK-NEXT:    ('reloc_offset', 376)
+// CHECK-NEXT:    ('num_reloc', 2)
+// CHECK-NEXT:    ('flags', 0x6800000b)
+// CHECK-NEXT:    ('reserved1', 0)
+// CHECK-NEXT:    ('reserved2', 0)
+// CHECK-NEXT:    ('reserved3', 0)
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ('_relocations', [
+// CHECK-NEXT:    # Relocation 0
+// CHECK-NEXT:    (('word-0', 0x0),
+// CHECK-NEXT:     ('word-1', 0x5e000001)),
+// CHECK-NEXT:    # Relocation 1
+// CHECK-NEXT:    (('word-0', 0x0),
+// CHECK-NEXT:     ('word-1', 0xe000000)),
+// CHECK-NEXT:  ])
+// CHECK-NEXT:  ('_section_data', '00000000 00000000')
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
+// CHECK-NEXT:  # Load Command 1
+// CHECK-NEXT: (('command', 2)
+// CHECK-NEXT:  ('size', 24)
+// CHECK-NEXT:  ('symoff', 392)
+// CHECK-NEXT:  ('nsyms', 2)
+// CHECK-NEXT:  ('stroff', 424)
+// CHECK-NEXT:  ('strsize', 12)
+// CHECK-NEXT:  ('_string_data', '\x00_g\x00_g.eh\x00\x00\x00')
+// CHECK-NEXT:  ('_symbols', [
+// CHECK-NEXT:    # Symbol 0
+// CHECK-NEXT:   (('n_strx', 1)
+// CHECK-NEXT:    ('n_type', 0xe)
+// CHECK-NEXT:    ('n_sect', 1)
+// CHECK-NEXT:    ('n_desc', 0)
+// CHECK-NEXT:    ('n_value', 0)
+// CHECK-NEXT:    ('_string', '_g')
+// CHECK-NEXT:   ),
+// CHECK-NEXT:    # Symbol 1
+// CHECK-NEXT:   (('n_strx', 4)
+// CHECK-NEXT:    ('n_type', 0xe)
+// CHECK-NEXT:    ('n_sect', 2)
+// CHECK-NEXT:    ('n_desc', 0)
+// CHECK-NEXT:    ('n_value', 0)
+// CHECK-NEXT:    ('_string', '_g.eh')
+// CHECK-NEXT:   ),
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
+// CHECK-NEXT:  # Load Command 2
+// CHECK-NEXT: (('command', 11)
+// CHECK-NEXT:  ('size', 80)
+// CHECK-NEXT:  ('ilocalsym', 0)
+// CHECK-NEXT:  ('nlocalsym', 2)
+// CHECK-NEXT:  ('iextdefsym', 2)
+// CHECK-NEXT:  ('nextdefsym', 0)
+// CHECK-NEXT:  ('iundefsym', 2)
+// CHECK-NEXT:  ('nundefsym', 0)
+// CHECK-NEXT:  ('tocoff', 0)
+// CHECK-NEXT:  ('ntoc', 0)
+// CHECK-NEXT:  ('modtaboff', 0)
+// CHECK-NEXT:  ('nmodtab', 0)
+// CHECK-NEXT:  ('extrefsymoff', 0)
+// CHECK-NEXT:  ('nextrefsyms', 0)
+// CHECK-NEXT:  ('indirectsymoff', 0)
+// CHECK-NEXT:  ('nindirectsyms', 0)
+// CHECK-NEXT:  ('extreloff', 0)
+// CHECK-NEXT:  ('nextrel', 0)
+// CHECK-NEXT:  ('locreloff', 0)
+// CHECK-NEXT:  ('nlocrel', 0)
+// CHECK-NEXT:  ('_indirect_symbols', [
+// CHECK-NEXT:  ])
+// CHECK-NEXT: ),
+// CHECK-NEXT:])
diff --git a/final/test/MC/MachO/symbol-flags.s b/final/test/MC/MachO/symbol-flags.s
new file mode 100644
index 00000000000..7a4f8e4031a
--- /dev/null
+++ b/final/test/MC/MachO/symbol-flags.s
@@ -0,0 +1,341 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        .reference sym_ref_A
+        .reference sym_ref_def_A
+sym_ref_def_A:
+sym_ref_def_C:  
+        .reference sym_ref_def_C
+        .reference sym_ref_def_D
+        .globl sym_ref_def_D
+        .globl sym_ref_def_E
+        .reference sym_ref_def_E
+        
+        .weak_reference sym_weak_ref_A
+        .weak_reference sym_weak_ref_def_A
+sym_weak_ref_def_A:        
+sym_weak_ref_def_B:
+        .weak_reference sym_weak_ref_def_B
+
+        .data
+        .globl sym_weak_def_A
+        .weak_definition sym_weak_def_A        
+sym_weak_def_A:
+sym_weak_def_B:
+        .weak_definition sym_weak_def_B
+        .globl sym_weak_def_B
+        .weak_definition sym_weak_def_C
+sym_weak_def_C:
+        .globl sym_weak_def_C
+
+        .lazy_reference sym_lazy_ref_A
+        .lazy_reference sym_lazy_ref_B
+sym_lazy_ref_B:
+sym_lazy_ref_C:
+        .lazy_reference sym_lazy_ref_C
+        .lazy_reference sym_lazy_ref_D
+        .globl sym_lazy_ref_D
+        .globl sym_lazy_ref_E
+        .lazy_reference sym_lazy_ref_E
+
+        .private_extern sym_private_ext_A
+        .private_extern sym_private_ext_B
+sym_private_ext_B:
+sym_private_ext_C:
+        .private_extern sym_private_ext_C
+        .private_extern sym_private_ext_D
+        .globl sym_private_ext_D
+        .globl sym_private_ext_E
+        .private_extern sym_private_ext_E
+
+        .no_dead_strip sym_no_dead_strip_A
+
+sym_symbol_resolver_A:
+	.symbol_resolver sym_symbol_resolver_A
+
+        .reference sym_ref_A
+        .desc sym_ref_A, 1
+        .desc sym_ref_A, 0x1234
+
+        .desc sym_desc_flags,0x47
+sym_desc_flags:
+        
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 296)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 192)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 0)
+// CHECK:   ('file_offset', 324)
+// CHECK:   ('file_size', 0)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 324)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 324)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 324)
+// CHECK:   ('nsyms', 24)
+// CHECK:   ('stroff', 612)
+// CHECK:   ('strsize', 388)
+// CHECK:   ('_string_data', '\x00sym_ref_A\x00sym_ref_def_D\x00sym_ref_def_E\x00sym_weak_ref_A\x00sym_weak_def_A\x00sym_weak_def_B\x00sym_weak_def_C\x00sym_lazy_ref_A\x00sym_lazy_ref_D\x00sym_lazy_ref_E\x00sym_private_ext_A\x00sym_private_ext_B\x00sym_private_ext_C\x00sym_private_ext_D\x00sym_private_ext_E\x00sym_no_dead_strip_A\x00sym_ref_def_A\x00sym_ref_def_C\x00sym_weak_ref_def_A\x00sym_weak_ref_def_B\x00sym_lazy_ref_B\x00sym_lazy_ref_C\x00sym_symbol_resolver_A\x00sym_desc_flags\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 254)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 32)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_ref_def_A')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 268)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 32)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_ref_def_C')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 282)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 64)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_weak_ref_def_A')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 301)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_weak_ref_def_B')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 320)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 32)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lazy_ref_B')
+// CHECK:    ),
+// CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 335)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 32)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lazy_ref_C')
+// CHECK:    ),
+// CHECK:     # Symbol 6
+// CHECK:    (('n_strx', 350)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 256)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_symbol_resolver_A')
+// CHECK:    ),
+// CHECK:     # Symbol 7
+// CHECK:    (('n_strx', 372)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 64)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_desc_flags')
+// CHECK:    ),
+// CHECK:     # Symbol 8
+// CHECK:    (('n_strx', 162)
+// CHECK:     ('n_type', 0x1f)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_private_ext_B')
+// CHECK:    ),
+// CHECK:     # Symbol 9
+// CHECK:    (('n_strx', 180)
+// CHECK:     ('n_type', 0x1f)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_private_ext_C')
+// CHECK:    ),
+// CHECK:     # Symbol 10
+// CHECK:    (('n_strx', 54)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 128)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_weak_def_A')
+// CHECK:    ),
+// CHECK:     # Symbol 11
+// CHECK:    (('n_strx', 69)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 128)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_weak_def_B')
+// CHECK:    ),
+// CHECK:     # Symbol 12
+// CHECK:    (('n_strx', 84)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 128)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_weak_def_C')
+// CHECK:    ),
+// CHECK:     # Symbol 13
+// CHECK:    (('n_strx', 99)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 33)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lazy_ref_A')
+// CHECK:    ),
+// CHECK:     # Symbol 14
+// CHECK:    (('n_strx', 114)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 32)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lazy_ref_D')
+// CHECK:    ),
+// CHECK:     # Symbol 15
+// CHECK:    (('n_strx', 129)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 33)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lazy_ref_E')
+// CHECK:    ),
+// CHECK:     # Symbol 16
+// CHECK:    (('n_strx', 234)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 32)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_no_dead_strip_A')
+// CHECK:    ),
+// CHECK:     # Symbol 17
+// CHECK:    (('n_strx', 144)
+// CHECK:     ('n_type', 0x11)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_private_ext_A')
+// CHECK:    ),
+// CHECK:     # Symbol 18
+// CHECK:    (('n_strx', 198)
+// CHECK:     ('n_type', 0x11)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_private_ext_D')
+// CHECK:    ),
+// CHECK:     # Symbol 19
+// CHECK:    (('n_strx', 216)
+// CHECK:     ('n_type', 0x11)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_private_ext_E')
+// CHECK:    ),
+// CHECK:     # Symbol 20
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 4660)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_ref_A')
+// CHECK:    ),
+// CHECK:     # Symbol 21
+// CHECK:    (('n_strx', 11)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 32)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_ref_def_D')
+// CHECK:    ),
+// CHECK:     # Symbol 22
+// CHECK:    (('n_strx', 25)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 32)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_ref_def_E')
+// CHECK:    ),
+// CHECK:     # Symbol 23
+// CHECK:    (('n_strx', 39)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 64)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_weak_ref_A')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 8)
+// CHECK:   ('iextdefsym', 8)
+// CHECK:   ('nextdefsym', 5)
+// CHECK:   ('iundefsym', 13)
+// CHECK:   ('nundefsym', 11)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/symbol-indirect.s b/final/test/MC/MachO/symbol-indirect.s
new file mode 100644
index 00000000000..2412970322b
--- /dev/null
+++ b/final/test/MC/MachO/symbol-indirect.s
@@ -0,0 +1,268 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+// FIXME: We are missing a lot of diagnostics on this kind of stuff which the
+// assembler has.
+        
+        .lazy_symbol_pointer
+        .indirect_symbol sym_lsp_B
+        .long 0
+        
+        .globl sym_lsp_A
+        .indirect_symbol sym_lsp_A
+        .long 0
+        
+sym_lsp_C:      
+        .indirect_symbol sym_lsp_C
+        .long 0
+
+// FIXME: Enable this test once missing llvm-mc support is in place.
+.if 0
+        .indirect_symbol sym_lsp_D
+        .long sym_lsp_D
+.endif
+
+        .indirect_symbol sym_lsp_E
+        .long 0xFA
+
+// FIXME: Enable this test once missing llvm-mc support is in place.
+.if 0
+sym_lsp_F = 10
+        .indirect_symbol sym_lsp_F
+        .long 0
+.endif
+
+        .globl sym_lsp_G
+sym_lsp_G:
+        .indirect_symbol sym_lsp_G
+        .long 0
+        
+        .non_lazy_symbol_pointer
+        .indirect_symbol sym_nlp_B
+        .long 0
+
+        .globl sym_nlp_A
+        .indirect_symbol sym_nlp_A
+        .long 0
+
+sym_nlp_C:      
+        .indirect_symbol sym_nlp_C
+        .long 0
+
+// FIXME: Enable this test once missing llvm-mc support is in place.
+.if 0
+        .indirect_symbol sym_nlp_D
+        .long sym_nlp_D
+.endif
+
+        .indirect_symbol sym_nlp_E
+        .long 0xAF
+
+// FIXME: Enable this test once missing llvm-mc support is in place.
+.if 0
+sym_nlp_F = 10
+        .indirect_symbol sym_nlp_F
+        .long 0
+.endif
+
+        .globl sym_nlp_G
+sym_nlp_G:
+        .indirect_symbol sym_nlp_G
+        .long 0
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 364)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 260)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 40)
+// CHECK:   ('file_offset', 392)
+// CHECK:   ('file_size', 40)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 3)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 392)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__la_symbol_ptr\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 20)
+// CHECK:     ('offset', 392)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x7)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__nl_symbol_ptr\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 20)
+// CHECK:     ('size', 20)
+// CHECK:     ('offset', 412)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x6)
+        // FIXME: Enable this when fixed!
+// CHECX:     ('reserved1', 5)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 472)
+// CHECK:   ('nsyms', 10)
+// CHECK:   ('stroff', 592)
+// CHECK:   ('strsize', 104)
+// CHECK:   ('_string_data', '\x00sym_lsp_A\x00sym_lsp_G\x00sym_nlp_A\x00sym_nlp_G\x00sym_nlp_B\x00sym_nlp_E\x00sym_lsp_B\x00sym_lsp_E\x00sym_lsp_C\x00sym_nlp_C\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 81)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', 'sym_lsp_C')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 91)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 28)
+// CHECK:     ('_string', 'sym_nlp_C')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 11)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 16)
+// CHECK:     ('_string', 'sym_lsp_G')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 31)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 36)
+// CHECK:     ('_string', 'sym_nlp_G')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lsp_A')
+// CHECK:    ),
+// CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 61)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 1)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lsp_B')
+// CHECK:    ),
+// CHECK:     # Symbol 6
+// CHECK:    (('n_strx', 71)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 1)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lsp_E')
+// CHECK:    ),
+// CHECK:     # Symbol 7
+// CHECK:    (('n_strx', 21)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_nlp_A')
+// CHECK:    ),
+// CHECK:     # Symbol 8
+// CHECK:    (('n_strx', 41)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_nlp_B')
+// CHECK:    ),
+// CHECK:     # Symbol 9
+// CHECK:    (('n_strx', 51)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_nlp_E')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 2)
+// CHECK:   ('iextdefsym', 2)
+// CHECK:   ('nextdefsym', 2)
+// CHECK:   ('iundefsym', 4)
+// CHECK:   ('nundefsym', 6)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 432)
+// CHECK:   ('nindirectsyms', 10)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:     # Indirect Symbol 0
+// CHECK:     (('symbol_index', 0x5),),
+// CHECK:     # Indirect Symbol 1
+// CHECK:     (('symbol_index', 0x4),),
+// CHECK:     # Indirect Symbol 2
+// CHECK:     (('symbol_index', 0x0),),
+// CHECK:     # Indirect Symbol 3
+// CHECK:     (('symbol_index', 0x6),),
+// CHECK:     # Indirect Symbol 4
+// CHECK:     (('symbol_index', 0x2),),
+// CHECK:     # Indirect Symbol 5
+// CHECK:     (('symbol_index', 0x8),),
+// CHECK:     # Indirect Symbol 6
+// CHECK:     (('symbol_index', 0x7),),
+// CHECK:     # Indirect Symbol 7
+// CHECK:     (('symbol_index', 0x80000000),),
+// CHECK:     # Indirect Symbol 8
+// CHECK:     (('symbol_index', 0x9),),
+// CHECK:     # Indirect Symbol 9
+// CHECK:     (('symbol_index', 0x3),),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/symbols-1.s b/final/test/MC/MachO/symbols-1.s
new file mode 100644
index 00000000000..cf05afa7509
--- /dev/null
+++ b/final/test/MC/MachO/symbols-1.s
@@ -0,0 +1,310 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck -check-prefix CHECK-X86_32 %s
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck -check-prefix CHECK-X86_64 %s
+
+sym_local_B:
+.globl sym_globl_def_B
+.globl sym_globl_undef_B
+sym_local_A:
+.globl sym_globl_def_A
+.globl sym_globl_undef_A
+sym_local_C:
+.globl sym_globl_def_C
+.globl sym_globl_undef_C
+        
+sym_globl_def_A: 
+sym_globl_def_B: 
+sym_globl_def_C: 
+Lsym_asm_temp:
+        .long 0
+        
+// CHECK-X86_32: ('cputype', 7)
+// CHECK-X86_32: ('cpusubtype', 3)
+// CHECK-X86_32: ('filetype', 1)
+// CHECK-X86_32: ('num_load_commands', 3)
+// CHECK-X86_32: ('load_commands_size', 228)
+// CHECK-X86_32: ('flag', 0)
+// CHECK-X86_32: ('load_commands', [
+// CHECK-X86_32:   # Load Command 0
+// CHECK-X86_32:  (('command', 1)
+// CHECK-X86_32:   ('size', 124)
+// CHECK-X86_32:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-X86_32:   ('vm_addr', 0)
+// CHECK-X86_32:   ('vm_size', 4)
+// CHECK-X86_32:   ('file_offset', 256)
+// CHECK-X86_32:   ('file_size', 4)
+// CHECK-X86_32:   ('maxprot', 7)
+// CHECK-X86_32:   ('initprot', 7)
+// CHECK-X86_32:   ('num_sections', 1)
+// CHECK-X86_32:   ('flags', 0)
+// CHECK-X86_32:   ('sections', [
+// CHECK-X86_32:     # Section 0
+// CHECK-X86_32:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-X86_32:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-X86_32:     ('address', 0)
+// CHECK-X86_32:     ('size', 4)
+// CHECK-X86_32:     ('offset', 256)
+// CHECK-X86_32:     ('alignment', 0)
+// CHECK-X86_32:     ('reloc_offset', 0)
+// CHECK-X86_32:     ('num_reloc', 0)
+// CHECK-X86_32:     ('flags', 0x80000000)
+// CHECK-X86_32:     ('reserved1', 0)
+// CHECK-X86_32:     ('reserved2', 0)
+// CHECK-X86_32:    ),
+// CHECK-X86_32:   ])
+// CHECK-X86_32:  ),
+// CHECK-X86_32:   # Load Command 1
+// CHECK-X86_32:  (('command', 2)
+// CHECK-X86_32:   ('size', 24)
+// CHECK-X86_32:   ('symoff', 260)
+// CHECK-X86_32:   ('nsyms', 9)
+// CHECK-X86_32:   ('stroff', 368)
+// CHECK-X86_32:   ('strsize', 140)
+// CHECK-X86_32:   ('_string_data', '\x00sym_globl_def_B\x00sym_globl_undef_B\x00sym_globl_def_A\x00sym_globl_undef_A\x00sym_globl_def_C\x00sym_globl_undef_C\x00sym_local_B\x00sym_local_A\x00sym_local_C\x00\x00')
+// CHECK-X86_32:   ('_symbols', [
+// CHECK-X86_32:     # Symbol 0
+// CHECK-X86_32:    (('n_strx', 103)
+// CHECK-X86_32:     ('n_type', 0xe)
+// CHECK-X86_32:     ('n_sect', 1)
+// CHECK-X86_32:     ('n_desc', 0)
+// CHECK-X86_32:     ('n_value', 0)
+// CHECK-X86_32:     ('_string', 'sym_local_B')
+// CHECK-X86_32:    ),
+// CHECK-X86_32:     # Symbol 1
+// CHECK-X86_32:    (('n_strx', 115)
+// CHECK-X86_32:     ('n_type', 0xe)
+// CHECK-X86_32:     ('n_sect', 1)
+// CHECK-X86_32:     ('n_desc', 0)
+// CHECK-X86_32:     ('n_value', 0)
+// CHECK-X86_32:     ('_string', 'sym_local_A')
+// CHECK-X86_32:    ),
+// CHECK-X86_32:     # Symbol 2
+// CHECK-X86_32:    (('n_strx', 127)
+// CHECK-X86_32:     ('n_type', 0xe)
+// CHECK-X86_32:     ('n_sect', 1)
+// CHECK-X86_32:     ('n_desc', 0)
+// CHECK-X86_32:     ('n_value', 0)
+// CHECK-X86_32:     ('_string', 'sym_local_C')
+// CHECK-X86_32:    ),
+// CHECK-X86_32:     # Symbol 3
+// CHECK-X86_32:    (('n_strx', 35)
+// CHECK-X86_32:     ('n_type', 0xf)
+// CHECK-X86_32:     ('n_sect', 1)
+// CHECK-X86_32:     ('n_desc', 0)
+// CHECK-X86_32:     ('n_value', 0)
+// CHECK-X86_32:     ('_string', 'sym_globl_def_A')
+// CHECK-X86_32:    ),
+// CHECK-X86_32:     # Symbol 4
+// CHECK-X86_32:    (('n_strx', 1)
+// CHECK-X86_32:     ('n_type', 0xf)
+// CHECK-X86_32:     ('n_sect', 1)
+// CHECK-X86_32:     ('n_desc', 0)
+// CHECK-X86_32:     ('n_value', 0)
+// CHECK-X86_32:     ('_string', 'sym_globl_def_B')
+// CHECK-X86_32:    ),
+// CHECK-X86_32:     # Symbol 5
+// CHECK-X86_32:    (('n_strx', 69)
+// CHECK-X86_32:     ('n_type', 0xf)
+// CHECK-X86_32:     ('n_sect', 1)
+// CHECK-X86_32:     ('n_desc', 0)
+// CHECK-X86_32:     ('n_value', 0)
+// CHECK-X86_32:     ('_string', 'sym_globl_def_C')
+// CHECK-X86_32:    ),
+// CHECK-X86_32:     # Symbol 6
+// CHECK-X86_32:    (('n_strx', 51)
+// CHECK-X86_32:     ('n_type', 0x1)
+// CHECK-X86_32:     ('n_sect', 0)
+// CHECK-X86_32:     ('n_desc', 0)
+// CHECK-X86_32:     ('n_value', 0)
+// CHECK-X86_32:     ('_string', 'sym_globl_undef_A')
+// CHECK-X86_32:    ),
+// CHECK-X86_32:     # Symbol 7
+// CHECK-X86_32:    (('n_strx', 17)
+// CHECK-X86_32:     ('n_type', 0x1)
+// CHECK-X86_32:     ('n_sect', 0)
+// CHECK-X86_32:     ('n_desc', 0)
+// CHECK-X86_32:     ('n_value', 0)
+// CHECK-X86_32:     ('_string', 'sym_globl_undef_B')
+// CHECK-X86_32:    ),
+// CHECK-X86_32:     # Symbol 8
+// CHECK-X86_32:    (('n_strx', 85)
+// CHECK-X86_32:     ('n_type', 0x1)
+// CHECK-X86_32:     ('n_sect', 0)
+// CHECK-X86_32:     ('n_desc', 0)
+// CHECK-X86_32:     ('n_value', 0)
+// CHECK-X86_32:     ('_string', 'sym_globl_undef_C')
+// CHECK-X86_32:    ),
+// CHECK-X86_32:   ])
+// CHECK-X86_32:  ),
+// CHECK-X86_32:   # Load Command 2
+// CHECK-X86_32:  (('command', 11)
+// CHECK-X86_32:   ('size', 80)
+// CHECK-X86_32:   ('ilocalsym', 0)
+// CHECK-X86_32:   ('nlocalsym', 3)
+// CHECK-X86_32:   ('iextdefsym', 3)
+// CHECK-X86_32:   ('nextdefsym', 3)
+// CHECK-X86_32:   ('iundefsym', 6)
+// CHECK-X86_32:   ('nundefsym', 3)
+// CHECK-X86_32:   ('tocoff', 0)
+// CHECK-X86_32:   ('ntoc', 0)
+// CHECK-X86_32:   ('modtaboff', 0)
+// CHECK-X86_32:   ('nmodtab', 0)
+// CHECK-X86_32:   ('extrefsymoff', 0)
+// CHECK-X86_32:   ('nextrefsyms', 0)
+// CHECK-X86_32:   ('indirectsymoff', 0)
+// CHECK-X86_32:   ('nindirectsyms', 0)
+// CHECK-X86_32:   ('extreloff', 0)
+// CHECK-X86_32:   ('nextrel', 0)
+// CHECK-X86_32:   ('locreloff', 0)
+// CHECK-X86_32:   ('nlocrel', 0)
+// CHECK-X86_32:   ('_indirect_symbols', [
+// CHECK-X86_32:   ])
+// CHECK-X86_32:  ),
+// CHECK-X86_32: ])
+
+// CHECK-X86_64: ('cputype', 16777223)
+// CHECK-X86_64: ('cpusubtype', 3)
+// CHECK-X86_64: ('filetype', 1)
+// CHECK-X86_64: ('num_load_commands', 3)
+// CHECK-X86_64: ('load_commands_size', 256)
+// CHECK-X86_64: ('flag', 0)
+// CHECK-X86_64: ('reserved', 0)
+// CHECK-X86_64: ('load_commands', [
+// CHECK-X86_64:   # Load Command 0
+// CHECK-X86_64:  (('command', 25)
+// CHECK-X86_64:   ('size', 152)
+// CHECK-X86_64:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-X86_64:   ('vm_addr', 0)
+// CHECK-X86_64:   ('vm_size', 4)
+// CHECK-X86_64:   ('file_offset', 288)
+// CHECK-X86_64:   ('file_size', 4)
+// CHECK-X86_64:   ('maxprot', 7)
+// CHECK-X86_64:   ('initprot', 7)
+// CHECK-X86_64:   ('num_sections', 1)
+// CHECK-X86_64:   ('flags', 0)
+// CHECK-X86_64:   ('sections', [
+// CHECK-X86_64:     # Section 0
+// CHECK-X86_64:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-X86_64:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-X86_64:     ('address', 0)
+// CHECK-X86_64:     ('size', 4)
+// CHECK-X86_64:     ('offset', 288)
+// CHECK-X86_64:     ('alignment', 0)
+// CHECK-X86_64:     ('reloc_offset', 0)
+// CHECK-X86_64:     ('num_reloc', 0)
+// CHECK-X86_64:     ('flags', 0x80000000)
+// CHECK-X86_64:     ('reserved1', 0)
+// CHECK-X86_64:     ('reserved2', 0)
+// CHECK-X86_64:     ('reserved3', 0)
+// CHECK-X86_64:    ),
+// CHECK-X86_64:   ('_relocations', [
+// CHECK-X86_64:   ])
+// CHECK-X86_64:   ])
+// CHECK-X86_64:  ),
+// CHECK-X86_64:   # Load Command 1
+// CHECK-X86_64:  (('command', 2)
+// CHECK-X86_64:   ('size', 24)
+// CHECK-X86_64:   ('symoff', 292)
+// CHECK-X86_64:   ('nsyms', 9)
+// CHECK-X86_64:   ('stroff', 436)
+// CHECK-X86_64:   ('strsize', 140)
+// CHECK-X86_64:   ('_string_data', '\x00sym_globl_def_B\x00sym_globl_undef_B\x00sym_globl_def_A\x00sym_globl_undef_A\x00sym_globl_def_C\x00sym_globl_undef_C\x00sym_local_B\x00sym_local_A\x00sym_local_C\x00\x00')
+// CHECK-X86_64:   ('_symbols', [
+// CHECK-X86_64:     # Symbol 0
+// CHECK-X86_64:    (('n_strx', 103)
+// CHECK-X86_64:     ('n_type', 0xe)
+// CHECK-X86_64:     ('n_sect', 1)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 0)
+// CHECK-X86_64:     ('_string', 'sym_local_B')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 1
+// CHECK-X86_64:    (('n_strx', 115)
+// CHECK-X86_64:     ('n_type', 0xe)
+// CHECK-X86_64:     ('n_sect', 1)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 0)
+// CHECK-X86_64:     ('_string', 'sym_local_A')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 2
+// CHECK-X86_64:    (('n_strx', 127)
+// CHECK-X86_64:     ('n_type', 0xe)
+// CHECK-X86_64:     ('n_sect', 1)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 0)
+// CHECK-X86_64:     ('_string', 'sym_local_C')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 3
+// CHECK-X86_64:    (('n_strx', 35)
+// CHECK-X86_64:     ('n_type', 0xf)
+// CHECK-X86_64:     ('n_sect', 1)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 0)
+// CHECK-X86_64:     ('_string', 'sym_globl_def_A')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 4
+// CHECK-X86_64:    (('n_strx', 1)
+// CHECK-X86_64:     ('n_type', 0xf)
+// CHECK-X86_64:     ('n_sect', 1)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 0)
+// CHECK-X86_64:     ('_string', 'sym_globl_def_B')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 5
+// CHECK-X86_64:    (('n_strx', 69)
+// CHECK-X86_64:     ('n_type', 0xf)
+// CHECK-X86_64:     ('n_sect', 1)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 0)
+// CHECK-X86_64:     ('_string', 'sym_globl_def_C')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 6
+// CHECK-X86_64:    (('n_strx', 51)
+// CHECK-X86_64:     ('n_type', 0x1)
+// CHECK-X86_64:     ('n_sect', 0)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 0)
+// CHECK-X86_64:     ('_string', 'sym_globl_undef_A')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 7
+// CHECK-X86_64:    (('n_strx', 17)
+// CHECK-X86_64:     ('n_type', 0x1)
+// CHECK-X86_64:     ('n_sect', 0)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 0)
+// CHECK-X86_64:     ('_string', 'sym_globl_undef_B')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:     # Symbol 8
+// CHECK-X86_64:    (('n_strx', 85)
+// CHECK-X86_64:     ('n_type', 0x1)
+// CHECK-X86_64:     ('n_sect', 0)
+// CHECK-X86_64:     ('n_desc', 0)
+// CHECK-X86_64:     ('n_value', 0)
+// CHECK-X86_64:     ('_string', 'sym_globl_undef_C')
+// CHECK-X86_64:    ),
+// CHECK-X86_64:   ])
+// CHECK-X86_64:  ),
+// CHECK-X86_64:   # Load Command 2
+// CHECK-X86_64:  (('command', 11)
+// CHECK-X86_64:   ('size', 80)
+// CHECK-X86_64:   ('ilocalsym', 0)
+// CHECK-X86_64:   ('nlocalsym', 3)
+// CHECK-X86_64:   ('iextdefsym', 3)
+// CHECK-X86_64:   ('nextdefsym', 3)
+// CHECK-X86_64:   ('iundefsym', 6)
+// CHECK-X86_64:   ('nundefsym', 3)
+// CHECK-X86_64:   ('tocoff', 0)
+// CHECK-X86_64:   ('ntoc', 0)
+// CHECK-X86_64:   ('modtaboff', 0)
+// CHECK-X86_64:   ('nmodtab', 0)
+// CHECK-X86_64:   ('extrefsymoff', 0)
+// CHECK-X86_64:   ('nextrefsyms', 0)
+// CHECK-X86_64:   ('indirectsymoff', 0)
+// CHECK-X86_64:   ('nindirectsyms', 0)
+// CHECK-X86_64:   ('extreloff', 0)
+// CHECK-X86_64:   ('nextrel', 0)
+// CHECK-X86_64:   ('locreloff', 0)
+// CHECK-X86_64:   ('nlocrel', 0)
+// CHECK-X86_64:   ('_indirect_symbols', [
+// CHECK-X86_64:   ])
+// CHECK-X86_64:  ),
+// CHECK-X86_64: ])
diff --git a/final/test/MC/MachO/tbss.s b/final/test/MC/MachO/tbss.s
new file mode 100644
index 00000000000..8eae1429686
--- /dev/null
+++ b/final/test/MC/MachO/tbss.s
@@ -0,0 +1,114 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+.tbss _a$tlv$init, 4
+.tbss _b$tlv$init, 4, 3
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 336)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 232)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 12)
+// CHECK:   ('file_offset', 368)
+// CHECK:   ('file_size', 0)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__thread_bss\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 12)
+// CHECK:     ('offset', 0)
+// CHECK:     ('alignment', 3)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x12)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', 'cffaedfe 07000001 03000000')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 368)
+// CHECK:   ('nsyms', 2)
+// CHECK:   ('stroff', 400)
+// CHECK:   ('strsize', 28)
+// CHECK:   ('_string_data', '\x00_a$tlv$init\x00_b$tlv$init\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_a$tlv$init')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 13)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', '_b$tlv$init')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 2)
+// CHECK:   ('iextdefsym', 2)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 2)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/tdata.s b/final/test/MC/MachO/tdata.s
new file mode 100644
index 00000000000..4829ca73a51
--- /dev/null
+++ b/final/test/MC/MachO/tdata.s
@@ -0,0 +1,106 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+.tdata
+_a$tlv$init:
+	.long 4
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 336)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 232)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 4)
+// CHECK:   ('file_offset', 368)
+// CHECK:   ('file_size', 4)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__thread_data\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 4)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x11)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '04000000')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 372)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 388)
+// CHECK:   ('strsize', 16)
+// CHECK:   ('_string_data', '\x00_a$tlv$init\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_a$tlv$init')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/thread_init_func.s b/final/test/MC/MachO/thread_init_func.s
new file mode 100644
index 00000000000..d3ead83fd25
--- /dev/null
+++ b/final/test/MC/MachO/thread_init_func.s
@@ -0,0 +1,63 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump	--dump-section-data | FileCheck %s
+
+	.thread_init_func
+	.quad 0
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 232)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 232)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 8)
+// CHECK:   ('file_offset', 264)
+// CHECK:   ('file_size', 8)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 264)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__thread_init\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 8)
+// CHECK:     ('offset', 264)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x15)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '00000000 00000000')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/tls.s b/final/test/MC/MachO/tls.s
new file mode 100644
index 00000000000..44b61beeb48
--- /dev/null
+++ b/final/test/MC/MachO/tls.s
@@ -0,0 +1,270 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .section        __TEXT,__text,regular,pure_instructions
+        .section        __DATA,__thread_data,thread_local_regular
+        .globl  _c$tlv$init
+        .align  2
+_c$tlv$init:
+        .long   4
+
+        .section        __DATA,__thread_vars,thread_local_variables
+        .globl  _c
+_c:
+        .quad   ___tlv_bootstrap
+        .quad   0
+        .quad   _c$tlv$init
+
+        .section        __DATA,__thread_data,thread_local_regular
+        .globl  _d$tlv$init
+        .align  2
+_d$tlv$init:
+        .long   5
+
+        .section        __DATA,__thread_vars,thread_local_variables
+        .globl  _d
+_d:
+        .quad   ___tlv_bootstrap
+        .quad   0
+        .quad   _d$tlv$init
+
+.tbss _a$tlv$init, 4, 2
+
+        .globl  _a
+_a:
+        .quad   ___tlv_bootstrap
+        .quad   0
+        .quad   _a$tlv$init
+
+.tbss _b$tlv$init, 4, 2
+
+        .globl  _b
+_b:
+        .quad   ___tlv_bootstrap
+        .quad   0
+        .quad   _b$tlv$init
+
+.subsections_via_symbols
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 496)
+// CHECK: ('flag', 8192)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 392)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 112)
+// CHECK:   ('file_offset', 528)
+// CHECK:   ('file_size', 104)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 4)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 528)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__thread_data\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 8)
+// CHECK:     ('offset', 528)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x11)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '04000000 05000000')
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__thread_vars\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 8)
+// CHECK:     ('size', 96)
+// CHECK:     ('offset', 536)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 632)
+// CHECK:     ('num_reloc', 8)
+// CHECK:     ('flags', 0x13)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x58),
+// CHECK:      ('word-1', 0xe000001)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x48),
+// CHECK:      ('word-1', 0xe000008)),
+// CHECK:     # Relocation 2
+// CHECK:     (('word-0', 0x40),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 3
+// CHECK:     (('word-0', 0x30),
+// CHECK:      ('word-1', 0xe000008)),
+// CHECK:     # Relocation 4
+// CHECK:     (('word-0', 0x28),
+// CHECK:      ('word-1', 0xe000007)),
+// CHECK:     # Relocation 5
+// CHECK:     (('word-0', 0x18),
+// CHECK:      ('word-1', 0xe000008)),
+// CHECK:     # Relocation 6
+// CHECK:     (('word-0', 0x10),
+// CHECK:      ('word-1', 0xe000005)),
+// CHECK:     # Relocation 7
+// CHECK:     (('word-0', 0x0),
+// CHECK:      ('word-1', 0xe000008)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000')
+// CHECK:     # Section 3
+// CHECK:    (('section_name', '__thread_bss\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 104)
+// CHECK:     ('size', 8)
+// CHECK:     ('offset', 0)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x12)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', 'cffaedfe 07000001')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 696)
+// CHECK:   ('nsyms', 9)
+// CHECK:   ('stroff', 840)
+// CHECK:   ('strsize', 80)
+// CHECK:   ('_string_data', '\x00_c$tlv$init\x00_c\x00___tlv_bootstrap\x00_d$tlv$init\x00_d\x00_a\x00_b\x00_a$tlv$init\x00_b$tlv$init\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 54)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 104)
+// CHECK:     ('_string', '_a$tlv$init')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 66)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 108)
+// CHECK:     ('_string', '_b$tlv$init')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 48)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 56)
+// CHECK:     ('_string', '_a')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 51)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 80)
+// CHECK:     ('_string', '_b')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 13)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', '_c')
+// CHECK:    ),
+// CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_c$tlv$init')
+// CHECK:    ),
+// CHECK:     # Symbol 6
+// CHECK:    (('n_strx', 45)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 32)
+// CHECK:     ('_string', '_d')
+// CHECK:    ),
+// CHECK:     # Symbol 7
+// CHECK:    (('n_strx', 33)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 4)
+// CHECK:     ('_string', '_d$tlv$init')
+// CHECK:    ),
+// CHECK:     # Symbol 8
+// CHECK:    (('n_strx', 16)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '___tlv_bootstrap')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 2)
+// CHECK:   ('iextdefsym', 2)
+// CHECK:   ('nextdefsym', 6)
+// CHECK:   ('iundefsym', 8)
+// CHECK:   ('nundefsym', 1)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/tlv-reloc.s b/final/test/MC/MachO/tlv-reloc.s
new file mode 100644
index 00000000000..d1112418472
--- /dev/null
+++ b/final/test/MC/MachO/tlv-reloc.s
@@ -0,0 +1,174 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+.tdata
+_a$tlv$init:
+	.long 4
+
+
+.tlv
+	.globl _a
+_a:
+	.quad __tlv_bootstrap
+	.quad 0
+	.quad _a$tlv$init
+
+.text
+	.globl _foo
+	.align 4, 0x90
+
+_foo:
+	 movq   _a@TLVP(%rip), %rdi
+	 call	*(%rdi) # returns &a in %rax
+	 ret
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 416)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 312)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 38)
+// CHECK:   ('file_offset', 448)
+// CHECK:   ('file_size', 38)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 3)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 10)
+// CHECK:     ('offset', 448)
+// CHECK:     ('alignment', 4)
+// CHECK:     ('reloc_offset', 488)
+// CHECK:     ('num_reloc', 1)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x3),
+// CHECK:      ('word-1', 0x9d000001)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '488b3d00 000000ff 17c3')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__thread_data\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 10)
+// CHECK:     ('size', 4)
+// CHECK:     ('offset', 458)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x11)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '04000000')
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__thread_vars\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 14)
+// CHECK:     ('size', 24)
+// CHECK:     ('offset', 462)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 496)
+// CHECK:     ('num_reloc', 2)
+// CHECK:     ('flags', 0x13)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:     # Relocation 0
+// CHECK:     (('word-0', 0x10),
+// CHECK:      ('word-1', 0xe000000)),
+// CHECK:     # Relocation 1
+// CHECK:     (('word-0', 0x0),
+// CHECK:      ('word-1', 0xe000003)),
+// CHECK:   ])
+// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 512)
+// CHECK:   ('nsyms', 4)
+// CHECK:   ('stroff', 576)
+// CHECK:   ('strsize', 40)
+// CHECK:   ('_string_data', '\x00_a\x00__tlv_bootstrap\x00_foo\x00_a$tlv$init\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 25)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 10)
+// CHECK:     ('_string', '_a$tlv$init')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 14)
+// CHECK:     ('_string', '_a')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 20)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_foo')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 4)
+// CHECK:     ('n_type', 0x1)
+// CHECK:     ('n_sect', 0)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '__tlv_bootstrap')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 2)
+// CHECK:   ('iundefsym', 3)
+// CHECK:   ('nundefsym', 1)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/tlv.s b/final/test/MC/MachO/tlv.s
new file mode 100644
index 00000000000..0fe028e7d50
--- /dev/null
+++ b/final/test/MC/MachO/tlv.s
@@ -0,0 +1,110 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+	.tlv
+	.globl _a
+_a:
+	.quad 0
+	.quad 0
+	.quad 0
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 336)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 232)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 24)
+// CHECK:   ('file_offset', 368)
+// CHECK:   ('file_size', 24)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__thread_vars\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 24)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x13)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '00000000 00000000 00000000 00000000 00000000 00000000')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 392)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 408)
+// CHECK:   ('strsize', 4)
+// CHECK:   ('_string_data', '\x00_a\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', '_a')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 0)
+// CHECK:   ('iextdefsym', 0)
+// CHECK:   ('nextdefsym', 1)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/values.s b/final/test/MC/MachO/values.s
new file mode 100644
index 00000000000..96115990636
--- /dev/null
+++ b/final/test/MC/MachO/values.s
@@ -0,0 +1,135 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        .long 0
+text_def_int:
+        .long 0
+
+        .globl text_def_ext
+text_def_ext:
+        .long 0
+
+        .data
+        .long 0
+data_def_int:
+        .long 0
+
+        .globl data_def_ext
+data_def_ext:
+        .long 0
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 296)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 192)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 24)
+// CHECK:   ('file_offset', 324)
+// CHECK:   ('file_size', 24)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 12)
+// CHECK:     ('offset', 324)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 12)
+// CHECK:     ('size', 12)
+// CHECK:     ('offset', 336)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 348)
+// CHECK:   ('nsyms', 4)
+// CHECK:   ('stroff', 396)
+// CHECK:   ('strsize', 56)
+// CHECK:   ('_string_data', '\x00text_def_ext\x00data_def_ext\x00text_def_int\x00data_def_int\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 27)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 4)
+// CHECK:     ('_string', 'text_def_int')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 40)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 16)
+// CHECK:     ('_string', 'data_def_int')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 14)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 20)
+// CHECK:     ('_string', 'data_def_ext')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', 'text_def_ext')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 2)
+// CHECK:   ('iextdefsym', 2)
+// CHECK:   ('nextdefsym', 2)
+// CHECK:   ('iundefsym', 4)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/weakdef.s b/final/test/MC/MachO/weakdef.s
new file mode 100644
index 00000000000..494079df500
--- /dev/null
+++ b/final/test/MC/MachO/weakdef.s
@@ -0,0 +1,141 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+	.section	__DATA,__datacoal_nt,coalesced
+	.section	__TEXT,__const_coal,coalesced
+	.globl	__ZTS3optIbE            ## @_ZTS3optIbE
+	.weak_definition	__ZTS3optIbE
+__ZTS3optIbE:
+
+
+	.section	__DATA,__datacoal_nt,coalesced
+	.globl	__ZTI3optIbE            ## @_ZTI3optIbE
+	.weak_definition	__ZTI3optIbE
+
+__ZTI3optIbE:
+	.long	__ZTS3optIbE
+
+// CHECK:      ('cputype', 7)
+// CHECK-NEXT: ('cpusubtype', 3)
+// CHECK-NEXT: ('filetype', 1)
+// CHECK-NEXT: ('num_load_commands', 3)
+// CHECK-NEXT: ('load_commands_size', 364)
+// CHECK-NEXT: ('flag', 0)
+// CHECK-NEXT: ('load_commands', [
+// CHECK-NEXT:   # Load Command 0
+// CHECK-NEXT:  (('command', 1)
+// CHECK-NEXT:   ('size', 260)
+// CHECK-NEXT:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:   ('vm_addr', 0)
+// CHECK-NEXT:   ('vm_size', 4)
+// CHECK-NEXT:   ('file_offset', 392)
+// CHECK-NEXT:   ('file_size', 4)
+// CHECK-NEXT:   ('maxprot', 7)
+// CHECK-NEXT:   ('initprot', 7)
+// CHECK-NEXT:   ('num_sections', 3)
+// CHECK-NEXT:   ('flags', 0)
+// CHECK-NEXT:   ('sections', [
+// CHECK-NEXT:     # Section 0
+// CHECK-NEXT:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('address', 0)
+// CHECK-NEXT:     ('size', 0)
+// CHECK-NEXT:     ('offset', 392)
+// CHECK-NEXT:     ('alignment', 0)
+// CHECK-NEXT:     ('reloc_offset', 0)
+// CHECK-NEXT:     ('num_reloc', 0)
+// CHECK-NEXT:     ('flags', 0x80000000)
+// CHECK-NEXT:     ('reserved1', 0)
+// CHECK-NEXT:     ('reserved2', 0)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:   ])
+// CHECK-NEXT:   ('_section_data', '')
+// CHECK-NEXT:     # Section 1
+// CHECK-NEXT:    (('section_name', '__datacoal_nt\x00\x00\x00')
+// CHECK-NEXT:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('address', 0)
+// CHECK-NEXT:     ('size', 4)
+// CHECK-NEXT:     ('offset', 392)
+// CHECK-NEXT:     ('alignment', 0)
+// CHECK-NEXT:     ('reloc_offset', 396)
+// CHECK-NEXT:     ('num_reloc', 1)
+// CHECK-NEXT:     ('flags', 0xb)
+// CHECK-NEXT:     ('reserved1', 0)
+// CHECK-NEXT:     ('reserved2', 0)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:     # Relocation 0
+// CHECK-NEXT:     (('word-0', 0x0),
+// CHECK-NEXT:      ('word-1', 0xc000001)),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:   ('_section_data', '00000000')
+// CHECK-NEXT:     # Section 2
+// CHECK-NEXT:    (('section_name', '__const_coal\x00\x00\x00\x00')
+// CHECK-NEXT:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK-NEXT:     ('address', 4)
+// CHECK-NEXT:     ('size', 0)
+// CHECK-NEXT:     ('offset', 396)
+// CHECK-NEXT:     ('alignment', 0)
+// CHECK-NEXT:     ('reloc_offset', 0)
+// CHECK-NEXT:     ('num_reloc', 0)
+// CHECK-NEXT:     ('flags', 0xb)
+// CHECK-NEXT:     ('reserved1', 0)
+// CHECK-NEXT:     ('reserved2', 0)
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ('_relocations', [
+// CHECK-NEXT:   ])
+// CHECK-NEXT:   ('_section_data', '')
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
+// CHECK-NEXT:   # Load Command 1
+// CHECK-NEXT:  (('command', 2)
+// CHECK-NEXT:   ('size', 24)
+// CHECK-NEXT:   ('symoff', 404)
+// CHECK-NEXT:   ('nsyms', 2)
+// CHECK-NEXT:   ('stroff', 428)
+// CHECK-NEXT:   ('strsize', 28)
+// CHECK-NEXT:   ('_string_data', '\x00__ZTS3optIbE\x00__ZTI3optIbE\x00\x00')
+// CHECK-NEXT:   ('_symbols', [
+// CHECK-NEXT:     # Symbol 0
+// CHECK-NEXT:    (('n_strx', 14)
+// CHECK-NEXT:     ('n_type', 0xf)
+// CHECK-NEXT:     ('n_sect', 2)
+// CHECK-NEXT:     ('n_desc', 128)
+// CHECK-NEXT:     ('n_value', 0)
+// CHECK-NEXT:     ('_string', '__ZTI3optIbE')
+// CHECK-NEXT:    ),
+// CHECK-NEXT:     # Symbol 1
+// CHECK-NEXT:    (('n_strx', 1)
+// CHECK-NEXT:     ('n_type', 0xf)
+// CHECK-NEXT:     ('n_sect', 3)
+// CHECK-NEXT:     ('n_desc', 128)
+// CHECK-NEXT:     ('n_value', 4)
+// CHECK-NEXT:     ('_string', '__ZTS3optIbE')
+// CHECK-NEXT:    ),
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
+// CHECK-NEXT:   # Load Command 2
+// CHECK-NEXT:  (('command', 11)
+// CHECK-NEXT:   ('size', 80)
+// CHECK-NEXT:   ('ilocalsym', 0)
+// CHECK-NEXT:   ('nlocalsym', 0)
+// CHECK-NEXT:   ('iextdefsym', 0)
+// CHECK-NEXT:   ('nextdefsym', 2)
+// CHECK-NEXT:   ('iundefsym', 2)
+// CHECK-NEXT:   ('nundefsym', 0)
+// CHECK-NEXT:   ('tocoff', 0)
+// CHECK-NEXT:   ('ntoc', 0)
+// CHECK-NEXT:   ('modtaboff', 0)
+// CHECK-NEXT:   ('nmodtab', 0)
+// CHECK-NEXT:   ('extrefsymoff', 0)
+// CHECK-NEXT:   ('nextrefsyms', 0)
+// CHECK-NEXT:   ('indirectsymoff', 0)
+// CHECK-NEXT:   ('nindirectsyms', 0)
+// CHECK-NEXT:   ('extreloff', 0)
+// CHECK-NEXT:   ('nextrel', 0)
+// CHECK-NEXT:   ('locreloff', 0)
+// CHECK-NEXT:   ('nlocrel', 0)
+// CHECK-NEXT:   ('_indirect_symbols', [
+// CHECK-NEXT:   ])
+// CHECK-NEXT:  ),
+// CHECK-NEXT: ])
diff --git a/final/test/MC/MachO/x86_32-optimal_nop.s b/final/test/MC/MachO/x86_32-optimal_nop.s
new file mode 100644
index 00000000000..24751409bdb
--- /dev/null
+++ b/final/test/MC/MachO/x86_32-optimal_nop.s
@@ -0,0 +1,257 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+# 1 byte nop test
+        .align 4, 0 # start with 16 byte alignment filled with zeros
+        ret
+        # nop
+        # 0x90
+        .align 1, 0x90
+        ret
+# 2 byte nop test
+        .align 4, 0 # start with 16 byte alignment filled with zeros
+        ret
+        ret
+        # xchg %ax,%ax
+        # 0x66, 0x90
+        .align 2, 0x90
+        ret
+# 3 byte nop test
+        .align 4, 0 # start with 16 byte alignment filled with zeros
+        ret
+        # nopl (%[re]ax)
+        # 0x0f, 0x1f, 0x00
+        .align 2, 0x90
+        ret
+# 4 byte nop test
+        .align 4, 0 # start with 16 byte alignment filled with zeros
+        ret
+        ret
+        ret
+        ret
+        # nopl 0(%[re]ax)
+        # 0x0f, 0x1f, 0x40, 0x00
+        .align 3, 0x90
+        ret
+# 5 byte nop test
+        .align 4, 0 # start with 16 byte alignment filled with zeros
+        ret
+        ret
+        ret
+        # nopl 0(%[re]ax,%[re]ax,1)
+        # 0x0f, 0x1f, 0x44, 0x00, 0x00
+        .align 3, 0x90
+        ret
+# 6 byte nop test
+        .align 4, 0 # start with 16 byte alignment filled with zeros
+        ret
+        ret
+        # nopw 0(%[re]ax,%[re]ax,1)
+        # 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00
+        .align 3, 0x90
+        ret
+# 7 byte nop test
+        .align 4, 0 # start with 16 byte alignment filled with zeros
+        ret
+        # nopl 0L(%[re]ax)
+        # 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
+        .align 3, 0x90
+        ret
+# 8 byte nop test
+        .align 4, 0 # start with 16 byte alignment filled with zeros
+        ret
+        ret
+        ret
+        ret
+        ret
+        ret
+        ret
+        ret
+        # nopl 0L(%[re]ax,%[re]ax,1)
+        # 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
+        .align 3, 0x90
+        ret
+# 9 byte nop test
+        .align 4, 0 # start with 16 byte alignment filled with zeros
+        ret
+        ret
+        ret
+        ret
+        ret
+        ret
+        ret
+        # nopw 0L(%[re]ax,%[re]ax,1)
+        # 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
+        .align 4, 0x90
+        ret
+# 10 byte nop test
+        .align 4, 0 # start with 16 byte alignment filled with zeros
+        ret
+        ret
+        ret
+        ret
+        ret
+        ret
+        ret
+        # nopw %cs:0L(%[re]ax,%[re]ax,1)
+        # 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
+        .align 4, 0x90
+        ret
+# 11 byte nop test
+        .align 4, 0 # start with 16 byte alignment filled with zeros
+        ret
+        ret
+        ret
+        ret
+        ret
+        # nopw %cs:0L(%[re]ax,%[re]ax,1)
+        # 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
+        .align 4, 0x90
+        ret
+# 12 byte nop test
+        .align 4, 0 # start with 16 byte alignment filled with zeros
+        ret
+        ret
+        ret
+        ret
+        # nopw 0(%[re]ax,%[re]ax,1)
+        # nopw 0(%[re]ax,%[re]ax,1)
+        # 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
+        # 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00
+        .align 4, 0x90
+        ret
+# 13 byte nop test
+        .align 4, 0 # start with 16 byte alignment filled with zeros
+        ret
+        ret
+        ret
+        # nopw 0(%[re]ax,%[re]ax,1)
+        # nopl 0L(%[re]ax)
+        # 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
+        # 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
+        .align 4, 0x90
+        ret
+# 14 byte nop test
+        .align 4, 0 # start with 16 byte alignment filled with zeros
+        ret
+        ret
+        # nopl 0L(%[re]ax)
+        # nopl 0L(%[re]ax)
+        # 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
+        # 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00
+        .align 4, 0x90
+        ret
+# 15 byte nop test
+        .align 4, 0 # start with 16 byte alignment filled with zeros
+        ret
+        # nopl 0L(%[re]ax)
+        # nopl 0L(%[re]ax,%[re]ax,1)
+        # 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
+        # 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00
+        .align 4, 0x90
+        ret
+
+        # Only the .text sections gets optimal nops.
+	.section	__TEXT,__const
+f0:
+        .byte 0
+	.align	4, 0x90
+        .long 0
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 296)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 192)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 372)
+// CHECK:   ('file_offset', 324)
+// CHECK:   ('file_size', 372)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 337)
+// CHECK:     ('offset', 324)
+// CHECK:     ('alignment', 4)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000400)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', 'c390c300 00000000 00000000 00000000 c3c36690 c3000000 00000000 00000000 c30f1f00 c3000000 00000000 00000000 c3c3c3c3 0f1f4000 c3000000 00000000 c3c3c30f 1f440000 c3000000 00000000 c3c3660f 1f440000 c3000000 00000000 c30f1f80 00000000 c3000000 00000000 c3c3c3c3 c3c3c3c3 c3000000 00000000 c3c3c3c3 c3c3c366 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c3c3c3 c3c3c366 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c3c3c3 c366662e 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c3c3c3 6666662e 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c3c366 6666662e 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3c36666 6666662e 0f1f8400 00000000 c3000000 00000000 00000000 00000000 c3666666 6666662e 0f1f8400 00000000 c3')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 352)
+// CHECK:     ('size', 20)
+// CHECK:     ('offset', 676)
+// CHECK:     ('alignment', 4)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '00909090 90909090 90909090 90909090 00000000')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 696)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 708)
+// CHECK:   ('strsize', 4)
+// CHECK:   ('_string_data', '\x00f0\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 352)
+// CHECK:     ('_string', 'f0')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/x86_32-sections.s b/final/test/MC/MachO/x86_32-sections.s
new file mode 100644
index 00000000000..66ada2807ef
--- /dev/null
+++ b/final/test/MC/MachO/x86_32-sections.s
@@ -0,0 +1,536 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        .text
+	.section	__TEXT,__text,regular,pure_instructions
+        
+        .const
+        .static_const
+        .cstring
+        .literal4
+        .literal8
+        .literal16
+        .constructor
+        .destructor
+        .symbol_stub
+        .picsymbol_stub
+        .data
+        .static_data
+        .non_lazy_symbol_pointer
+        .lazy_symbol_pointer
+        .dyld
+        .mod_init_func
+        .mod_term_func
+        .const_data
+        .objc_class
+        .objc_meta_class
+        .objc_cat_cls_meth
+        .objc_cat_inst_meth
+        .objc_protocol
+        .objc_string_object
+        .objc_cls_meth
+        .objc_inst_meth
+        .objc_cls_refs
+        .objc_message_refs
+        .objc_symbols
+        .objc_category
+        .objc_class_vars
+        .objc_instance_vars
+        .objc_module_info
+        .objc_class_names
+        .objc_meth_var_types
+        .objc_meth_var_names
+        .objc_selector_strs
+        .section __TEXT,__picsymbolstub4,symbol_stubs,none,16
+
+        .subsections_via_symbols
+        
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 2504)
+// CHECK: ('flag', 8192)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 2504)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 0)
+// CHECK:   ('file_offset', 2532)
+// CHECK:   ('file_size', 0)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 36)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__static_const\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 3
+// CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x2)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 4
+// CHECK:    (('section_name', '__literal4\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x3)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 5
+// CHECK:    (('section_name', '__literal8\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 3)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x4)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 6
+// CHECK:    (('section_name', '__literal16\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 4)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0xe)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 7
+// CHECK:    (('section_name', '__constructor\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 8
+// CHECK:    (('section_name', '__destructor\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 9
+// CHECK:    (('section_name', '__symbol_stub\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000008)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 16)
+// CHECK:    ),
+// CHECK:     # Section 10
+// CHECK:    (('section_name', '__picsymbol_stub')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000008)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 26)
+// CHECK:    ),
+// CHECK:     # Section 11
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 12
+// CHECK:    (('section_name', '__static_data\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 13
+// CHECK:    (('section_name', '__nl_symbol_ptr\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x6)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 14
+// CHECK:    (('section_name', '__la_symbol_ptr\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x7)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 15
+// CHECK:    (('section_name', '__dyld\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 16
+// CHECK:    (('section_name', '__mod_init_func\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x9)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 17
+// CHECK:    (('section_name', '__mod_term_func\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0xa)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 18
+// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 19
+// CHECK:    (('section_name', '__class\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 20
+// CHECK:    (('section_name', '__meta_class\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 21
+// CHECK:    (('section_name', '__cat_cls_meth\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 22
+// CHECK:    (('section_name', '__cat_inst_meth\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 23
+// CHECK:    (('section_name', '__protocol\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 24
+// CHECK:    (('section_name', '__string_object\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 25
+// CHECK:    (('section_name', '__cls_meth\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 26
+// CHECK:    (('section_name', '__inst_meth\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 27
+// CHECK:    (('section_name', '__cls_refs\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000005)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 28
+// CHECK:    (('section_name', '__message_refs\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000005)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 29
+// CHECK:    (('section_name', '__symbols\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 30
+// CHECK:    (('section_name', '__category\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 31
+// CHECK:    (('section_name', '__class_vars\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 32
+// CHECK:    (('section_name', '__instance_vars\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 33
+// CHECK:    (('section_name', '__module_info\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:     # Section 34
+// CHECK:    (('section_name', '__selector_strs\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x2)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     # Section 35
+// CHECK:    (('section_name', '__picsymbolstub4')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2532)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x8)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 16)
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/x86_32-symbols.s b/final/test/MC/MachO/x86_32-symbols.s
new file mode 100644
index 00000000000..35ada354d29
--- /dev/null
+++ b/final/test/MC/MachO/x86_32-symbols.s
@@ -0,0 +1,1041 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        .text
+L0:
+D0:
+        .section	__TEXT,__text,regular,pure_instructions
+L1:
+D1:
+        .const
+L2:
+D2:
+        .static_const
+L3:
+D3:
+        .cstring
+L4:
+D4:
+        .literal4
+L5:
+D5:
+        .literal8
+L6:
+D6:
+        .literal16
+L7:
+D7:
+        .constructor
+L8:
+D8:
+        .destructor
+L9:
+D9:
+        .symbol_stub
+L10:
+D10:
+        .picsymbol_stub
+L11:
+D11:
+        .data
+L12:
+D12:
+        .static_data
+L13:
+D13:
+        .non_lazy_symbol_pointer
+L14:
+D14:
+        .lazy_symbol_pointer
+L15:
+D15:
+        .dyld
+L16:
+D16:
+        .mod_init_func
+L17:
+D17:
+        .mod_term_func
+L18:
+D18:
+        .const_data
+L19:
+D19:
+        .objc_class
+L20:
+D20:
+        .objc_meta_class
+L21:
+D21:
+        .objc_cat_cls_meth
+L22:
+D22:
+        .objc_cat_inst_meth
+L23:
+D23:
+        .objc_protocol
+L24:
+D24:
+        .objc_string_object
+L25:
+D25:
+        .objc_cls_meth
+L26:
+D26:
+        .objc_inst_meth
+L27:
+D27:
+        .objc_cls_refs
+L28:
+D28:
+        .objc_message_refs
+L29:
+D29:
+        .objc_symbols
+L30:
+D30:
+        .objc_category
+L31:
+D31:
+        .objc_class_vars
+L32:
+D32:
+        .objc_instance_vars
+L33:
+D33:
+        .objc_module_info
+L34:
+D34:
+        .objc_class_names
+L35:
+D35:
+        .objc_meth_var_types
+L36:
+D36:
+        .objc_meth_var_names
+L37:
+D37:
+        .objc_selector_strs
+L38:
+D38:
+        .section __TEXT,__picsymbolstub4,symbol_stubs,none,16
+L39:
+D39:
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 2608)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 2504)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 0)
+// CHECK:   ('file_offset', 2636)
+// CHECK:   ('file_size', 0)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 36)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__static_const\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 3
+// CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x2)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 4
+// CHECK:    (('section_name', '__literal4\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x3)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 5
+// CHECK:    (('section_name', '__literal8\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 3)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x4)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 6
+// CHECK:    (('section_name', '__literal16\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 4)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0xe)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 7
+// CHECK:    (('section_name', '__constructor\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 8
+// CHECK:    (('section_name', '__destructor\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 9
+// CHECK:    (('section_name', '__symbol_stub\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000008)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 16)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 10
+// CHECK:    (('section_name', '__picsymbol_stub')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000008)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 26)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 11
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 12
+// CHECK:    (('section_name', '__static_data\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 13
+// CHECK:    (('section_name', '__nl_symbol_ptr\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x6)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 14
+// CHECK:    (('section_name', '__la_symbol_ptr\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x7)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 15
+// CHECK:    (('section_name', '__dyld\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 16
+// CHECK:    (('section_name', '__mod_init_func\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x9)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 17
+// CHECK:    (('section_name', '__mod_term_func\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0xa)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 18
+// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 19
+// CHECK:    (('section_name', '__class\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 20
+// CHECK:    (('section_name', '__meta_class\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 21
+// CHECK:    (('section_name', '__cat_cls_meth\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 22
+// CHECK:    (('section_name', '__cat_inst_meth\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 23
+// CHECK:    (('section_name', '__protocol\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 24
+// CHECK:    (('section_name', '__string_object\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 25
+// CHECK:    (('section_name', '__cls_meth\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 26
+// CHECK:    (('section_name', '__inst_meth\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 27
+// CHECK:    (('section_name', '__cls_refs\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000005)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 28
+// CHECK:    (('section_name', '__message_refs\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000005)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 29
+// CHECK:    (('section_name', '__symbols\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 30
+// CHECK:    (('section_name', '__category\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 31
+// CHECK:    (('section_name', '__class_vars\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 32
+// CHECK:    (('section_name', '__instance_vars\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 33
+// CHECK:    (('section_name', '__module_info\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 34
+// CHECK:    (('section_name', '__selector_strs\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x2)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 35
+// CHECK:    (('section_name', '__picsymbolstub4')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2636)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x8)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 16)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 2636)
+// CHECK:   ('nsyms', 40)
+// CHECK:   ('stroff', 3116)
+// CHECK:   ('strsize', 152)
+// CHECK:   ('_string_data', '\x00D0\x00D1\x00D2\x00D3\x00D4\x00D5\x00D6\x00D7\x00D8\x00D9\x00D10\x00D11\x00D12\x00D13\x00D14\x00D15\x00D16\x00D17\x00D18\x00D19\x00D20\x00D21\x00D22\x00D23\x00D24\x00D25\x00D26\x00D27\x00D28\x00D29\x00D30\x00D31\x00D32\x00D33\x00D34\x00D35\x00D36\x00D37\x00D38\x00D39\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D0')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 4)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D1')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 7)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D2')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 10)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D3')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 13)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D4')
+// CHECK:    ),
+// CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 16)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 5)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D5')
+// CHECK:    ),
+// CHECK:     # Symbol 6
+// CHECK:    (('n_strx', 19)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 6)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D6')
+// CHECK:    ),
+// CHECK:     # Symbol 7
+// CHECK:    (('n_strx', 22)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 7)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D7')
+// CHECK:    ),
+// CHECK:     # Symbol 8
+// CHECK:    (('n_strx', 25)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 8)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D8')
+// CHECK:    ),
+// CHECK:     # Symbol 9
+// CHECK:    (('n_strx', 28)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 9)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D9')
+// CHECK:    ),
+// CHECK:     # Symbol 10
+// CHECK:    (('n_strx', 31)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 10)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D10')
+// CHECK:    ),
+// CHECK:     # Symbol 11
+// CHECK:    (('n_strx', 35)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 11)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D11')
+// CHECK:    ),
+// CHECK:     # Symbol 12
+// CHECK:    (('n_strx', 39)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 12)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D12')
+// CHECK:    ),
+// CHECK:     # Symbol 13
+// CHECK:    (('n_strx', 43)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 13)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D13')
+// CHECK:    ),
+// CHECK:     # Symbol 14
+// CHECK:    (('n_strx', 47)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 14)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D14')
+// CHECK:    ),
+// CHECK:     # Symbol 15
+// CHECK:    (('n_strx', 51)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 15)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D15')
+// CHECK:    ),
+// CHECK:     # Symbol 16
+// CHECK:    (('n_strx', 55)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 16)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D16')
+// CHECK:    ),
+// CHECK:     # Symbol 17
+// CHECK:    (('n_strx', 59)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 17)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D17')
+// CHECK:    ),
+// CHECK:     # Symbol 18
+// CHECK:    (('n_strx', 63)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 18)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D18')
+// CHECK:    ),
+// CHECK:     # Symbol 19
+// CHECK:    (('n_strx', 67)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 19)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D19')
+// CHECK:    ),
+// CHECK:     # Symbol 20
+// CHECK:    (('n_strx', 71)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 20)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D20')
+// CHECK:    ),
+// CHECK:     # Symbol 21
+// CHECK:    (('n_strx', 75)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 21)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D21')
+// CHECK:    ),
+// CHECK:     # Symbol 22
+// CHECK:    (('n_strx', 79)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 22)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D22')
+// CHECK:    ),
+// CHECK:     # Symbol 23
+// CHECK:    (('n_strx', 83)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 23)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D23')
+// CHECK:    ),
+// CHECK:     # Symbol 24
+// CHECK:    (('n_strx', 87)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 24)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D24')
+// CHECK:    ),
+// CHECK:     # Symbol 25
+// CHECK:    (('n_strx', 91)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 25)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D25')
+// CHECK:    ),
+// CHECK:     # Symbol 26
+// CHECK:    (('n_strx', 95)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 26)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D26')
+// CHECK:    ),
+// CHECK:     # Symbol 27
+// CHECK:    (('n_strx', 99)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 27)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D27')
+// CHECK:    ),
+// CHECK:     # Symbol 28
+// CHECK:    (('n_strx', 103)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 28)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D28')
+// CHECK:    ),
+// CHECK:     # Symbol 29
+// CHECK:    (('n_strx', 107)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 29)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D29')
+// CHECK:    ),
+// CHECK:     # Symbol 30
+// CHECK:    (('n_strx', 111)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 30)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D30')
+// CHECK:    ),
+// CHECK:     # Symbol 31
+// CHECK:    (('n_strx', 115)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 31)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D31')
+// CHECK:    ),
+// CHECK:     # Symbol 32
+// CHECK:    (('n_strx', 119)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 32)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D32')
+// CHECK:    ),
+// CHECK:     # Symbol 33
+// CHECK:    (('n_strx', 123)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 33)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D33')
+// CHECK:    ),
+// CHECK:     # Symbol 34
+// CHECK:    (('n_strx', 127)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 34)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D34')
+// CHECK:    ),
+// CHECK:     # Symbol 35
+// CHECK:    (('n_strx', 131)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D35')
+// CHECK:    ),
+// CHECK:     # Symbol 36
+// CHECK:    (('n_strx', 135)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D36')
+// CHECK:    ),
+// CHECK:     # Symbol 37
+// CHECK:    (('n_strx', 139)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D37')
+// CHECK:    ),
+// CHECK:     # Symbol 38
+// CHECK:    (('n_strx', 143)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 35)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D38')
+// CHECK:    ),
+// CHECK:     # Symbol 39
+// CHECK:    (('n_strx', 147)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 36)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D39')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 40)
+// CHECK:   ('iextdefsym', 40)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 40)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/x86_64-sections.s b/final/test/MC/MachO/x86_64-sections.s
new file mode 100644
index 00000000000..8efd35e6cbf
--- /dev/null
+++ b/final/test/MC/MachO/x86_64-sections.s
@@ -0,0 +1,561 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        .text
+	.section	__TEXT,__text,regular,pure_instructions
+        
+        .const
+        .static_const
+        .cstring
+        .literal4
+        .literal8
+        .literal16
+        .constructor
+        .destructor
+        .data
+        .static_data
+        .dyld
+        .mod_init_func
+        .mod_term_func
+        .const_data
+        .objc_class
+        .objc_meta_class
+        .objc_cat_cls_meth
+        .objc_cat_inst_meth
+        .objc_protocol
+        .objc_string_object
+        .objc_cls_meth
+        .objc_inst_meth
+        .objc_cls_refs
+        .objc_message_refs
+        .objc_symbols
+        .objc_category
+        .objc_class_vars
+        .objc_instance_vars
+        .objc_module_info
+        .objc_class_names
+        .objc_meth_var_types
+        .objc_meth_var_names
+        .objc_selector_strs
+
+        .subsections_via_symbols
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 1)
+// CHECK: ('load_commands_size', 2552)
+// CHECK: ('flag', 8192)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 2552)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 0)
+// CHECK:   ('file_offset', 2584)
+// CHECK:   ('file_size', 0)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 31)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__static_const\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 3
+// CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x2)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 4
+// CHECK:    (('section_name', '__literal4\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x3)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 5
+// CHECK:    (('section_name', '__literal8\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 3)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x4)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 6
+// CHECK:    (('section_name', '__literal16\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 4)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0xe)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 7
+// CHECK:    (('section_name', '__constructor\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 8
+// CHECK:    (('section_name', '__destructor\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 9
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 10
+// CHECK:    (('section_name', '__static_data\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 11
+// CHECK:    (('section_name', '__dyld\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 12
+// CHECK:    (('section_name', '__mod_init_func\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x9)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 13
+// CHECK:    (('section_name', '__mod_term_func\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0xa)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 14
+// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 15
+// CHECK:    (('section_name', '__class\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 16
+// CHECK:    (('section_name', '__meta_class\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 17
+// CHECK:    (('section_name', '__cat_cls_meth\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 18
+// CHECK:    (('section_name', '__cat_inst_meth\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 19
+// CHECK:    (('section_name', '__protocol\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 20
+// CHECK:    (('section_name', '__string_object\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 21
+// CHECK:    (('section_name', '__cls_meth\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 22
+// CHECK:    (('section_name', '__inst_meth\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 23
+// CHECK:    (('section_name', '__cls_refs\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000005)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 24
+// CHECK:    (('section_name', '__message_refs\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000005)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 25
+// CHECK:    (('section_name', '__symbols\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 26
+// CHECK:    (('section_name', '__category\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 27
+// CHECK:    (('section_name', '__class_vars\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 28
+// CHECK:    (('section_name', '__instance_vars\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 29
+// CHECK:    (('section_name', '__module_info\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 30
+// CHECK:    (('section_name', '__selector_strs\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2584)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x2)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/x86_64-symbols.s b/final/test/MC/MachO/x86_64-symbols.s
new file mode 100644
index 00000000000..804cee84791
--- /dev/null
+++ b/final/test/MC/MachO/x86_64-symbols.s
@@ -0,0 +1,998 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin10 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+        .text
+L0:
+D0:
+        .section	__TEXT,__text,regular,pure_instructions
+L1:
+D1:
+        .const
+L2:
+D2:
+        .static_const
+L3:
+D3:
+        .cstring
+L4:
+D4:
+        .literal4
+L5:
+D5:
+        .literal8
+L6:
+D6:
+        .literal16
+L7:
+D7:
+        .constructor
+L8:
+D8:
+        .destructor
+L9:
+D9:
+//        .symbol_stub
+//L10:
+//D10:
+//        .picsymbol_stub
+//L11:
+//D11:
+        .data
+L12:
+D12:
+        .static_data
+L13:
+D13:
+//        .non_lazy_symbol_pointer
+//L14:
+//D14:
+//        .lazy_symbol_pointer
+//L15:
+//D15:
+        .dyld
+L16:
+D16:
+        .mod_init_func
+L17:
+D17:
+        .mod_term_func
+L18:
+D18:
+        .const_data
+L19:
+D19:
+        .objc_class
+L20:
+D20:
+        .objc_meta_class
+L21:
+D21:
+        .objc_cat_cls_meth
+L22:
+D22:
+        .objc_cat_inst_meth
+L23:
+D23:
+        .objc_protocol
+L24:
+D24:
+        .objc_string_object
+L25:
+D25:
+        .objc_cls_meth
+L26:
+D26:
+        .objc_inst_meth
+L27:
+D27:
+        .objc_cls_refs
+L28:
+D28:
+        .objc_message_refs
+L29:
+D29:
+        .objc_symbols
+L30:
+D30:
+        .objc_category
+L31:
+D31:
+        .objc_class_vars
+L32:
+D32:
+        .objc_instance_vars
+L33:
+D33:
+        .objc_module_info
+L34:
+D34:
+        .objc_class_names
+L35:
+D35:
+        .objc_meth_var_types
+L36:
+D36:
+        .objc_meth_var_names
+L37:
+D37:
+        .objc_selector_strs
+L38:
+D38:
+//        .section __TEXT,__picsymbolstub4,symbol_stubs,none,16
+//L39:
+//D39:
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 2656)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 2552)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 0)
+// CHECK:   ('file_offset', 2688)
+// CHECK:   ('file_size', 0)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 31)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__static_const\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 3
+// CHECK:    (('section_name', '__cstring\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x2)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 4
+// CHECK:    (('section_name', '__literal4\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x3)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 5
+// CHECK:    (('section_name', '__literal8\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 3)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x4)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 6
+// CHECK:    (('section_name', '__literal16\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 4)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0xe)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 7
+// CHECK:    (('section_name', '__constructor\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 8
+// CHECK:    (('section_name', '__destructor\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 9
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 10
+// CHECK:    (('section_name', '__static_data\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 11
+// CHECK:    (('section_name', '__dyld\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 12
+// CHECK:    (('section_name', '__mod_init_func\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x9)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 13
+// CHECK:    (('section_name', '__mod_term_func\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0xa)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 14
+// CHECK:    (('section_name', '__const\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 15
+// CHECK:    (('section_name', '__class\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 16
+// CHECK:    (('section_name', '__meta_class\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 17
+// CHECK:    (('section_name', '__cat_cls_meth\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 18
+// CHECK:    (('section_name', '__cat_inst_meth\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 19
+// CHECK:    (('section_name', '__protocol\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 20
+// CHECK:    (('section_name', '__string_object\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 21
+// CHECK:    (('section_name', '__cls_meth\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 22
+// CHECK:    (('section_name', '__inst_meth\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 23
+// CHECK:    (('section_name', '__cls_refs\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000005)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 24
+// CHECK:    (('section_name', '__message_refs\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 2)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000005)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 25
+// CHECK:    (('section_name', '__symbols\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 26
+// CHECK:    (('section_name', '__category\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 27
+// CHECK:    (('section_name', '__class_vars\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 28
+// CHECK:    (('section_name', '__instance_vars\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 29
+// CHECK:    (('section_name', '__module_info\x00\x00\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x10000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 30
+// CHECK:    (('section_name', '__selector_strs\x00')
+// CHECK:     ('segment_name', '__OBJC\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 2688)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x2)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 2688)
+// CHECK:   ('nsyms', 40)
+// CHECK:   ('stroff', 3328)
+// CHECK:   ('strsize', 152)
+// CHECK:   ('_string_data', '\x00D0\x00D1\x00D2\x00D3\x00L4\x00D4\x00D5\x00D6\x00D7\x00D8\x00D9\x00D12\x00D13\x00D16\x00D17\x00D18\x00D19\x00D20\x00D21\x00D22\x00D23\x00D24\x00D25\x00D26\x00D27\x00D28\x00D29\x00D30\x00D31\x00D32\x00D33\x00D34\x00L35\x00D35\x00L36\x00D36\x00L37\x00D37\x00L38\x00D38\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D0')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 4)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 1)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D1')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 7)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D2')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 10)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 3)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D3')
+// CHECK:    ),
+// CHECK:     # Symbol 4
+// CHECK:    (('n_strx', 13)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'L4')
+// CHECK:    ),
+// CHECK:     # Symbol 5
+// CHECK:    (('n_strx', 16)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D4')
+// CHECK:    ),
+// CHECK:     # Symbol 6
+// CHECK:    (('n_strx', 19)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 5)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D5')
+// CHECK:    ),
+// CHECK:     # Symbol 7
+// CHECK:    (('n_strx', 22)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 6)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D6')
+// CHECK:    ),
+// CHECK:     # Symbol 8
+// CHECK:    (('n_strx', 25)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 7)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D7')
+// CHECK:    ),
+// CHECK:     # Symbol 9
+// CHECK:    (('n_strx', 28)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 8)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D8')
+// CHECK:    ),
+// CHECK:     # Symbol 10
+// CHECK:    (('n_strx', 31)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 9)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D9')
+// CHECK:    ),
+// CHECK:     # Symbol 11
+// CHECK:    (('n_strx', 34)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 10)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D12')
+// CHECK:    ),
+// CHECK:     # Symbol 12
+// CHECK:    (('n_strx', 38)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 11)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D13')
+// CHECK:    ),
+// CHECK:     # Symbol 13
+// CHECK:    (('n_strx', 42)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 12)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D16')
+// CHECK:    ),
+// CHECK:     # Symbol 14
+// CHECK:    (('n_strx', 46)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 13)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D17')
+// CHECK:    ),
+// CHECK:     # Symbol 15
+// CHECK:    (('n_strx', 50)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 14)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D18')
+// CHECK:    ),
+// CHECK:     # Symbol 16
+// CHECK:    (('n_strx', 54)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 15)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D19')
+// CHECK:    ),
+// CHECK:     # Symbol 17
+// CHECK:    (('n_strx', 58)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 16)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D20')
+// CHECK:    ),
+// CHECK:     # Symbol 18
+// CHECK:    (('n_strx', 62)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 17)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D21')
+// CHECK:    ),
+// CHECK:     # Symbol 19
+// CHECK:    (('n_strx', 66)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 18)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D22')
+// CHECK:    ),
+// CHECK:     # Symbol 20
+// CHECK:    (('n_strx', 70)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 19)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D23')
+// CHECK:    ),
+// CHECK:     # Symbol 21
+// CHECK:    (('n_strx', 74)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 20)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D24')
+// CHECK:    ),
+// CHECK:     # Symbol 22
+// CHECK:    (('n_strx', 78)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 21)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D25')
+// CHECK:    ),
+// CHECK:     # Symbol 23
+// CHECK:    (('n_strx', 82)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 22)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D26')
+// CHECK:    ),
+// CHECK:     # Symbol 24
+// CHECK:    (('n_strx', 86)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 23)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D27')
+// CHECK:    ),
+// CHECK:     # Symbol 25
+// CHECK:    (('n_strx', 90)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 24)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D28')
+// CHECK:    ),
+// CHECK:     # Symbol 26
+// CHECK:    (('n_strx', 94)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 25)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D29')
+// CHECK:    ),
+// CHECK:     # Symbol 27
+// CHECK:    (('n_strx', 98)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 26)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D30')
+// CHECK:    ),
+// CHECK:     # Symbol 28
+// CHECK:    (('n_strx', 102)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 27)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D31')
+// CHECK:    ),
+// CHECK:     # Symbol 29
+// CHECK:    (('n_strx', 106)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 28)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D32')
+// CHECK:    ),
+// CHECK:     # Symbol 30
+// CHECK:    (('n_strx', 110)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 29)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D33')
+// CHECK:    ),
+// CHECK:     # Symbol 31
+// CHECK:    (('n_strx', 114)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 30)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D34')
+// CHECK:    ),
+// CHECK:     # Symbol 32
+// CHECK:    (('n_strx', 118)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'L35')
+// CHECK:    ),
+// CHECK:     # Symbol 33
+// CHECK:    (('n_strx', 122)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D35')
+// CHECK:    ),
+// CHECK:     # Symbol 34
+// CHECK:    (('n_strx', 126)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'L36')
+// CHECK:    ),
+// CHECK:     # Symbol 35
+// CHECK:    (('n_strx', 130)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D36')
+// CHECK:    ),
+// CHECK:     # Symbol 36
+// CHECK:    (('n_strx', 134)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'L37')
+// CHECK:    ),
+// CHECK:     # Symbol 37
+// CHECK:    (('n_strx', 138)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 4)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D37')
+// CHECK:    ),
+// CHECK:     # Symbol 38
+// CHECK:    (('n_strx', 142)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 31)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'L38')
+// CHECK:    ),
+// CHECK:     # Symbol 39
+// CHECK:    (('n_strx', 146)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 31)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'D38')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 40)
+// CHECK:   ('iextdefsym', 40)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 40)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/zerofill-1.s b/final/test/MC/MachO/zerofill-1.s
new file mode 100644
index 00000000000..805a7861e1f
--- /dev/null
+++ b/final/test/MC/MachO/zerofill-1.s
@@ -0,0 +1,121 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .text
+        .byte 0                 // Align to 2**3 bytes, not 2**1
+        
+        .zerofill       __DATA, __common, zfill, 2, 1
+        
+        .data
+        .align 3
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 364)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 260)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 10)
+// CHECK:   ('file_offset', 392)
+// CHECK:   ('file_size', 8)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 3)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 1)
+// CHECK:     ('offset', 392)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__common\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 8)
+// CHECK:     ('size', 2)
+// CHECK:     ('offset', 0)
+// CHECK:     ('alignment', 1)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x1)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 2
+// CHECK:    (('section_name', '__data\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 8)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 400)
+// CHECK:     ('alignment', 3)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x0)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 400)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 412)
+// CHECK:   ('strsize', 8)
+// CHECK:   ('_string_data', '\x00zfill\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', 'zfill')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/zerofill-2.s b/final/test/MC/MachO/zerofill-2.s
new file mode 100644
index 00000000000..16577e41d03
--- /dev/null
+++ b/final/test/MC/MachO/zerofill-2.s
@@ -0,0 +1,103 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        .byte 0
+
+        // This file has size 2, the tail padding doesn't count.
+        .zerofill       __DATA, __bss, sym_a, 1
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 296)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 192)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 2)
+// CHECK:   ('file_offset', 324)
+// CHECK:   ('file_size', 1)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 1)
+// CHECK:     ('offset', 324)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 1)
+// CHECK:     ('size', 1)
+// CHECK:     ('offset', 0)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x1)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 328)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 340)
+// CHECK:   ('strsize', 8)
+// CHECK:   ('_string_data', '\x00sym_a\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 1)
+// CHECK:     ('_string', 'sym_a')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/zerofill-3.s b/final/test/MC/MachO/zerofill-3.s
new file mode 100644
index 00000000000..cc81fa86342
--- /dev/null
+++ b/final/test/MC/MachO/zerofill-3.s
@@ -0,0 +1,141 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+        // FIXME: We don't get the order right currently, the assembler first
+        // orders the symbols, then assigns addresses. :(
+.if 0        
+        .lcomm          sym_lcomm_B, 4
+        .lcomm          sym_lcomm_C, 4, 4 
+        .lcomm          sym_lcomm_A, 4, 3
+        .lcomm          sym_lcomm_D, 4
+        .globl          sym_lcomm_D
+        .globl          sym_lcomm_C
+.else
+        .lcomm          sym_lcomm_C, 4, 4 
+        .lcomm          sym_lcomm_D, 4
+        .globl          sym_lcomm_D
+        .globl          sym_lcomm_C
+        
+        .lcomm          sym_lcomm_A, 4, 3
+        .lcomm          sym_lcomm_B, 4
+.endif
+
+// CHECK: ('cputype', 7)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 296)
+// CHECK: ('flag', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 1)
+// CHECK:   ('size', 192)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 16)
+// CHECK:   ('file_offset', 324)
+// CHECK:   ('file_size', 0)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 0)
+// CHECK:     ('offset', 324)
+// CHECK:     ('alignment', 0)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 16)
+// CHECK:     ('offset', 0)
+// CHECK:     ('alignment', 4)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x1)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 324)
+// CHECK:   ('nsyms', 4)
+// CHECK:   ('stroff', 372)
+// CHECK:   ('strsize', 52)
+// CHECK:   ('_string_data', '\x00sym_lcomm_C\x00sym_lcomm_D\x00sym_lcomm_A\x00sym_lcomm_B\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 25)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', 'sym_lcomm_A')
+// CHECK:    ),
+// CHECK:     # Symbol 1
+// CHECK:    (('n_strx', 37)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 12)
+// CHECK:     ('_string', 'sym_lcomm_B')
+// CHECK:    ),
+// CHECK:     # Symbol 2
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 0)
+// CHECK:     ('_string', 'sym_lcomm_C')
+// CHECK:    ),
+// CHECK:     # Symbol 3
+// CHECK:    (('n_strx', 13)
+// CHECK:     ('n_type', 0xf)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 4)
+// CHECK:     ('_string', 'sym_lcomm_D')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 2)
+// CHECK:   ('iextdefsym', 2)
+// CHECK:   ('nextdefsym', 2)
+// CHECK:   ('iundefsym', 4)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/zerofill-4.s b/final/test/MC/MachO/zerofill-4.s
new file mode 100644
index 00000000000..d9c987c9b65
--- /dev/null
+++ b/final/test/MC/MachO/zerofill-4.s
@@ -0,0 +1,35 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+
+.zerofill __DATA,__bss,_fill0,1,0
+.zerofill __DATA,__bss,_a,4,2
+.zerofill __DATA,__bss,_fill1,1,0
+.zerofill __DATA,__bss,_b,4,3
+.zerofill __DATA,__bss,_fill2,1,0
+.zerofill __DATA,__bss,_c,4,4
+.zerofill __DATA,__bss,_fill3,1,0
+.zerofill __DATA,__bss,_d,4,5
+
+// CHECK: # Symbol 0
+// CHECK: ('n_value', 0)
+// CHECK: ('_string', '_fill0')
+// CHECK: # Symbol 1
+// CHECK: ('n_value', 4)
+// CHECK: ('_string', '_a')
+// CHECK: # Symbol 2
+// CHECK: ('n_value', 8)
+// CHECK: ('_string', '_fill1')
+// CHECK: # Symbol 3
+// CHECK: ('n_value', 16)
+// CHECK: ('_string', '_b')
+// CHECK: # Symbol 4
+// CHECK: ('n_value', 20)
+// CHECK: ('_string', '_fill2')
+// CHECK: # Symbol 5
+// CHECK: ('n_value', 32)
+// CHECK: ('_string', '_c')
+// CHECK: # Symbol 6
+// CHECK: ('n_value', 36)
+// CHECK: ('_string', '_fill3')
+// CHECK: # Symbol 7
+// CHECK: ('n_value', 64)
+// CHECK: ('_string', '_d')
diff --git a/final/test/MC/MachO/zerofill-5.s b/final/test/MC/MachO/zerofill-5.s
new file mode 100644
index 00000000000..91f251b9983
--- /dev/null
+++ b/final/test/MC/MachO/zerofill-5.s
@@ -0,0 +1,109 @@
+// RUN: llvm-mc -triple x86_64-apple-darwin9 %s -filetype=obj -o - | macho-dump --dump-section-data | FileCheck %s
+
+	.text
+	.align	3
+	.long	2
+
+        .zerofill __DATA,__bss,_g0,8,3
+
+// CHECK: ('cputype', 16777223)
+// CHECK: ('cpusubtype', 3)
+// CHECK: ('filetype', 1)
+// CHECK: ('num_load_commands', 3)
+// CHECK: ('load_commands_size', 336)
+// CHECK: ('flag', 0)
+// CHECK: ('reserved', 0)
+// CHECK: ('load_commands', [
+// CHECK:   # Load Command 0
+// CHECK:  (('command', 25)
+// CHECK:   ('size', 232)
+// CHECK:   ('segment_name', '\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:   ('vm_addr', 0)
+// CHECK:   ('vm_size', 16)
+// CHECK:   ('file_offset', 368)
+// CHECK:   ('file_size', 4)
+// CHECK:   ('maxprot', 7)
+// CHECK:   ('initprot', 7)
+// CHECK:   ('num_sections', 2)
+// CHECK:   ('flags', 0)
+// CHECK:   ('sections', [
+// CHECK:     # Section 0
+// CHECK:    (('section_name', '__text\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__TEXT\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 0)
+// CHECK:     ('size', 4)
+// CHECK:     ('offset', 368)
+// CHECK:     ('alignment', 3)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x80000000)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', '02000000')
+// CHECK:     # Section 1
+// CHECK:    (('section_name', '__bss\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('segment_name', '__DATA\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
+// CHECK:     ('address', 8)
+// CHECK:     ('size', 8)
+// CHECK:     ('offset', 0)
+// CHECK:     ('alignment', 3)
+// CHECK:     ('reloc_offset', 0)
+// CHECK:     ('num_reloc', 0)
+// CHECK:     ('flags', 0x1)
+// CHECK:     ('reserved1', 0)
+// CHECK:     ('reserved2', 0)
+// CHECK:     ('reserved3', 0)
+// CHECK:    ),
+// CHECK:   ('_relocations', [
+// CHECK:   ])
+// CHECK:   ('_section_data', 'cffaedfe 07000001')
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 1
+// CHECK:  (('command', 2)
+// CHECK:   ('size', 24)
+// CHECK:   ('symoff', 372)
+// CHECK:   ('nsyms', 1)
+// CHECK:   ('stroff', 388)
+// CHECK:   ('strsize', 8)
+// CHECK:   ('_string_data', '\x00_g0\x00\x00\x00\x00')
+// CHECK:   ('_symbols', [
+// CHECK:     # Symbol 0
+// CHECK:    (('n_strx', 1)
+// CHECK:     ('n_type', 0xe)
+// CHECK:     ('n_sect', 2)
+// CHECK:     ('n_desc', 0)
+// CHECK:     ('n_value', 8)
+// CHECK:     ('_string', '_g0')
+// CHECK:    ),
+// CHECK:   ])
+// CHECK:  ),
+// CHECK:   # Load Command 2
+// CHECK:  (('command', 11)
+// CHECK:   ('size', 80)
+// CHECK:   ('ilocalsym', 0)
+// CHECK:   ('nlocalsym', 1)
+// CHECK:   ('iextdefsym', 1)
+// CHECK:   ('nextdefsym', 0)
+// CHECK:   ('iundefsym', 1)
+// CHECK:   ('nundefsym', 0)
+// CHECK:   ('tocoff', 0)
+// CHECK:   ('ntoc', 0)
+// CHECK:   ('modtaboff', 0)
+// CHECK:   ('nmodtab', 0)
+// CHECK:   ('extrefsymoff', 0)
+// CHECK:   ('nextrefsyms', 0)
+// CHECK:   ('indirectsymoff', 0)
+// CHECK:   ('nindirectsyms', 0)
+// CHECK:   ('extreloff', 0)
+// CHECK:   ('nextrel', 0)
+// CHECK:   ('locreloff', 0)
+// CHECK:   ('nlocrel', 0)
+// CHECK:   ('_indirect_symbols', [
+// CHECK:   ])
+// CHECK:  ),
+// CHECK: ])
diff --git a/final/test/MC/MachO/zerofill-sect-align.s b/final/test/MC/MachO/zerofill-sect-align.s
new file mode 100644
index 00000000000..5d7730f439e
--- /dev/null
+++ b/final/test/MC/MachO/zerofill-sect-align.s
@@ -0,0 +1,15 @@
+// RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | macho-dump | FileCheck %s
+//
+// Check that the section itself is aligned.
+
+        .byte 0
+        
+.zerofill __DATA,__bss,_a,1,0
+.zerofill __DATA,__bss,_b,4,4
+
+// CHECK: # Symbol 0
+// CHECK: ('n_value', 16)
+// CHECK: ('_string', '_a')
+// CHECK: # Symbol 1
+// CHECK: ('n_value', 32)
+// CHECK: ('_string', '_b')
diff --git a/final/test/MC/X86/3DNow.s b/final/test/MC/X86/3DNow.s
new file mode 100644
index 00000000000..4dc68aecf4f
--- /dev/null
+++ b/final/test/MC/X86/3DNow.s
@@ -0,0 +1,92 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+// PR8283
+
+// CHECK: pavgusb %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xbf]
+pavgusb	%mm2, %mm1
+
+// CHECK: pavgusb 9(%esi,%edx), %mm3 # encoding: [0x0f,0x0f,0x5c,0x16,0x09,0xbf]
+pavgusb	9(%esi,%edx), %mm3
+
+        
+// CHECK: pf2id %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x1d]
+pf2id	%mm2, %mm1
+
+// CHECK: pf2id 9(%esi,%edx), %mm3 # encoding: [0x0f,0x0f,0x5c,0x16,0x09,0x1d]
+pf2id	9(%esi,%edx), %mm3
+
+// CHECK: pfacc %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xae]
+pfacc	%mm2, %mm1
+
+// CHECK: pfadd %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x9e]
+pfadd	%mm2, %mm1
+
+// CHECK: pfcmpeq %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xb0]
+pfcmpeq	%mm2, %mm1
+
+// CHECK: pfcmpge %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x90]
+pfcmpge	%mm2, %mm1
+
+// CHECK: pfcmpgt %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xa0]
+pfcmpgt	%mm2, %mm1
+
+// CHECK: pfmax %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xa4]
+pfmax	%mm2, %mm1
+
+// CHECK: pfmin %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x94]
+pfmin	%mm2, %mm1
+
+// CHECK: pfmul %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xb4]
+pfmul	%mm2, %mm1
+
+// CHECK: pfrcp %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x96]
+pfrcp	%mm2, %mm1
+
+// CHECK: pfrcpit1 %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xa6]
+pfrcpit1	%mm2, %mm1
+
+// CHECK: pfrcpit2 %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xb6]
+pfrcpit2	%mm2, %mm1
+
+// CHECK: pfrsqit1 %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xa7]
+pfrsqit1	%mm2, %mm1
+
+// CHECK: pfrsqrt %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x97]
+pfrsqrt	%mm2, %mm1
+
+// CHECK: pfsub %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x9a]
+pfsub	%mm2, %mm1
+
+// CHECK: pfsubr %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xaa]
+pfsubr	%mm2, %mm1
+
+// CHECK: pi2fd %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x0d]
+pi2fd	%mm2, %mm1
+
+// CHECK: pmulhrw %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xb7]
+pmulhrw	%mm2, %mm1
+
+
+// CHECK: femms # encoding: [0x0f,0x0e]
+femms
+
+// CHECK: prefetch (%rax)   # encoding: [0x0f,0x0d,0x00]
+// CHECK: prefetchw (%rax)  # encoding: [0x0f,0x0d,0x08]
+prefetch (%rax)
+prefetchw (%rax)
+
+
+// CHECK: pf2iw %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x1c]
+pf2iw %mm2, %mm1
+
+// CHECK: pi2fw %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x0c]
+pi2fw %mm2, %mm1
+
+// CHECK: pfnacc %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x8a]
+pfnacc %mm2, %mm1
+
+// CHECK: pfpnacc %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0x8e]
+pfpnacc %mm2, %mm1
+
+// CHECK: pswapd %mm2, %mm1  # encoding: [0x0f,0x0f,0xca,0xbb]
+pswapd %mm2, %mm1
diff --git a/final/test/MC/X86/dg.exp b/final/test/MC/X86/dg.exp
new file mode 100644
index 00000000000..ec87b695b7e
--- /dev/null
+++ b/final/test/MC/X86/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
+}
diff --git a/final/test/MC/X86/x86-32-avx.s b/final/test/MC/X86/x86-32-avx.s
new file mode 100644
index 00000000000..1927e4e7a6b
--- /dev/null
+++ b/final/test/MC/X86/x86-32-avx.s
@@ -0,0 +1,3283 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vaddss  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xca,0x58,0xd4]
+          vaddss  %xmm4, %xmm6, %xmm2
+
+// CHECK: vmulss  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xca,0x59,0xd4]
+          vmulss  %xmm4, %xmm6, %xmm2
+
+// CHECK: vsubss  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xca,0x5c,0xd4]
+          vsubss  %xmm4, %xmm6, %xmm2
+
+// CHECK: vdivss  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xca,0x5e,0xd4]
+          vdivss  %xmm4, %xmm6, %xmm2
+
+// CHECK: vaddsd  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xcb,0x58,0xd4]
+          vaddsd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vmulsd  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xcb,0x59,0xd4]
+          vmulsd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vsubsd  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xcb,0x5c,0xd4]
+          vsubsd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vdivsd  %xmm4, %xmm6, %xmm2
+// CHECK:  encoding: [0xc5,0xcb,0x5e,0xd4]
+          vdivsd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vaddss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK:  encoding: [0xc5,0xea,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vaddss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vsubss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK:  encoding: [0xc5,0xea,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vsubss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmulss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK:  encoding: [0xc5,0xea,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vmulss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vdivss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK:  encoding: [0xc5,0xea,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vdivss  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vaddsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK:  encoding: [0xc5,0xeb,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vaddsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vsubsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK:  encoding: [0xc5,0xeb,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vsubsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmulsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK:  encoding: [0xc5,0xeb,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vmulsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vdivsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK:  encoding: [0xc5,0xeb,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vdivsd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vaddps  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0x58,0xd4]
+          vaddps  %xmm4, %xmm6, %xmm2
+
+// CHECK: vsubps  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0x5c,0xd4]
+          vsubps  %xmm4, %xmm6, %xmm2
+
+// CHECK: vmulps  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0x59,0xd4]
+          vmulps  %xmm4, %xmm6, %xmm2
+
+// CHECK: vdivps  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0x5e,0xd4]
+          vdivps  %xmm4, %xmm6, %xmm2
+
+// CHECK: vaddpd  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0x58,0xd4]
+          vaddpd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vsubpd  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0x5c,0xd4]
+          vsubpd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vmulpd  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0x59,0xd4]
+          vmulpd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vdivpd  %xmm4, %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0x5e,0xd4]
+          vdivpd  %xmm4, %xmm6, %xmm2
+
+// CHECK: vaddps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vaddps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vsubps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vsubps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmulps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vmulps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vdivps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vdivps  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vaddpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vaddpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vsubpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vsubpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmulpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vmulpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vdivpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          vdivpd  3735928559(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: vmaxss  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xda,0x5f,0xf2]
+          vmaxss  %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxsd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xdb,0x5f,0xf2]
+          vmaxsd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vminss  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xda,0x5d,0xf2]
+          vminss  %xmm2, %xmm4, %xmm6
+
+// CHECK: vminsd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xdb,0x5d,0xf2]
+          vminsd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxss  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xea,0x5f,0x6c,0xcb,0xfc]
+          vmaxss  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmaxsd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xeb,0x5f,0x6c,0xcb,0xfc]
+          vmaxsd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminss  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xea,0x5d,0x6c,0xcb,0xfc]
+          vminss  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminsd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xeb,0x5d,0x6c,0xcb,0xfc]
+          vminsd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmaxps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x5f,0xf2]
+          vmaxps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x5f,0xf2]
+          vmaxpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vminps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x5d,0xf2]
+          vminps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vminpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x5d,0xf2]
+          vminpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vmaxps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5f,0x6c,0xcb,0xfc]
+          vmaxps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmaxpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5f,0x6c,0xcb,0xfc]
+          vmaxpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x5d,0x6c,0xcb,0xfc]
+          vminps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vminpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x5d,0x6c,0xcb,0xfc]
+          vminpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x54,0xf2]
+          vandps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vandpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x54,0xf2]
+          vandpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vandps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x54,0x6c,0xcb,0xfc]
+          vandps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x54,0x6c,0xcb,0xfc]
+          vandpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vorps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x56,0xf2]
+          vorps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vorpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x56,0xf2]
+          vorpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vorps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x56,0x6c,0xcb,0xfc]
+          vorps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vorpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x56,0x6c,0xcb,0xfc]
+          vorpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vxorps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x57,0xf2]
+          vxorps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vxorpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x57,0xf2]
+          vxorpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vxorps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x57,0x6c,0xcb,0xfc]
+          vxorps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vxorpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x57,0x6c,0xcb,0xfc]
+          vxorpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandnps  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd8,0x55,0xf2]
+          vandnps  %xmm2, %xmm4, %xmm6
+
+// CHECK: vandnpd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0x55,0xf2]
+          vandnpd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vandnps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x55,0x6c,0xcb,0xfc]
+          vandnps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vandnpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x55,0x6c,0xcb,0xfc]
+          vandnpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vmovss  -4(%ebx,%ecx,8), %xmm5
+// CHECK: encoding: [0xc5,0xfa,0x10,0x6c,0xcb,0xfc]
+          vmovss  -4(%ebx,%ecx,8), %xmm5
+
+// CHECK: vmovss  %xmm4, %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xea,0x10,0xec]
+          vmovss  %xmm4, %xmm2, %xmm5
+
+// CHECK: vmovsd  -4(%ebx,%ecx,8), %xmm5
+// CHECK: encoding: [0xc5,0xfb,0x10,0x6c,0xcb,0xfc]
+          vmovsd  -4(%ebx,%ecx,8), %xmm5
+
+// CHECK: vmovsd  %xmm4, %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xeb,0x10,0xec]
+          vmovsd  %xmm4, %xmm2, %xmm5
+
+// CHECK: vunpckhps  %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe8,0x15,0xe1]
+          vunpckhps  %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpckhpd  %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe9,0x15,0xe1]
+          vunpckhpd  %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpcklps  %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe8,0x14,0xe1]
+          vunpcklps  %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpcklpd  %xmm1, %xmm2, %xmm4
+// CHECK: encoding: [0xc5,0xe9,0x14,0xe1]
+          vunpcklpd  %xmm1, %xmm2, %xmm4
+
+// CHECK: vunpckhps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x15,0x6c,0xcb,0xfc]
+          vunpckhps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vunpckhpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x15,0x6c,0xcb,0xfc]
+          vunpckhpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vunpcklps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe8,0x14,0x6c,0xcb,0xfc]
+          vunpcklps  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vunpcklpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0x14,0x6c,0xcb,0xfc]
+          vunpcklpd  -4(%ebx,%ecx,8), %xmm2, %xmm5
+
+// CHECK: vcmpps  $0, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x00]
+          vcmpps  $0, %xmm0, %xmm6, %xmm1
+
+// CHECK: vcmpps  $0, (%eax), %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x08,0x00]
+          vcmpps  $0, (%eax), %xmm6, %xmm1
+
+// CHECK: vcmpps  $7, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc8,0xc2,0xc8,0x07]
+          vcmpps  $7, %xmm0, %xmm6, %xmm1
+
+// CHECK: vcmppd  $0, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x00]
+          vcmppd  $0, %xmm0, %xmm6, %xmm1
+
+// CHECK: vcmppd  $0, (%eax), %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x08,0x00]
+          vcmppd  $0, (%eax), %xmm6, %xmm1
+
+// CHECK: vcmppd  $7, %xmm0, %xmm6, %xmm1
+// CHECK: encoding: [0xc5,0xc9,0xc2,0xc8,0x07]
+          vcmppd  $7, %xmm0, %xmm6, %xmm1
+
+// CHECK: vshufps  $8, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc6,0xd9,0x08]
+          vshufps  $8, %xmm1, %xmm2, %xmm3
+
+// CHECK: vshufps  $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc6,0x5c,0xcb,0xfc,0x08]
+          vshufps  $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vshufpd  $8, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc6,0xd9,0x08]
+          vshufpd  $8, %xmm1, %xmm2, %xmm3
+
+// CHECK: vshufpd  $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc6,0x5c,0xcb,0xfc,0x08]
+          vshufpd  $8, -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x00]
+          vcmpeqps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x02]
+          vcmpleps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x01]
+          vcmpltps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x04]
+          vcmpneqps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x06]
+          vcmpnleps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x05]
+          vcmpnltps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x07]
+          vcmpordps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x03]
+          vcmpunordps   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmpleps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnleps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpps  $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordps   -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmpps  $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordps   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x00]
+          vcmpeqpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x02]
+          vcmplepd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x01]
+          vcmpltpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x04]
+          vcmpneqpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x06]
+          vcmpnlepd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x05]
+          vcmpnltpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x07]
+          vcmpordpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0xd9,0x03]
+          vcmpunordpd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmppd  $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmplepd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnlepd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmppd  $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordpd   -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmppd  $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordpd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vmovmskps  %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf8,0x50,0xc2]
+          vmovmskps  %xmm2, %eax
+
+// CHECK: vmovmskpd  %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf9,0x50,0xc2]
+          vmovmskpd  %xmm2, %eax
+
+// CHECK: vcmpss  $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x00]
+          vcmpeqss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x02]
+          vcmpless   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x01]
+          vcmpltss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x04]
+          vcmpneqss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x06]
+          vcmpnless   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x05]
+          vcmpnltss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x07]
+          vcmpordss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0xd9,0x03]
+          vcmpunordss   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpss  $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqss   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmpless   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltss   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqss   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnless   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltss   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpss  $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordss   -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmpss  $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordss   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $0, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x00]
+          vcmpeqsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $2, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x02]
+          vcmplesd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $1, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x01]
+          vcmpltsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $4, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x04]
+          vcmpneqsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $6, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x06]
+          vcmpnlesd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $5, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x05]
+          vcmpnltsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x07]
+          vcmpordsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $3, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0xd9,0x03]
+          vcmpunordsd   %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpsd  $0, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $2, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmplesd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $1, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $4, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $6, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnlesd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $5, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vcmpsd  $7, -4(%ebx,%ecx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordsd   -4(%ebx,%ecx,8), %xmm6, %xmm2
+
+// CHECK: vcmpsd  $3, -4(%ebx,%ecx,8), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordsd   -4(%ebx,%ecx,8), %xmm2, %xmm3
+
+// CHECK: vucomiss  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2e,0xd1]
+          vucomiss  %xmm1, %xmm2
+
+// CHECK: vucomiss  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2e,0x10]
+          vucomiss  (%eax), %xmm2
+
+// CHECK: vcomiss  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2f,0xd1]
+          vcomiss  %xmm1, %xmm2
+
+// CHECK: vcomiss  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x2f,0x10]
+          vcomiss  (%eax), %xmm2
+
+// CHECK: vucomisd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2e,0xd1]
+          vucomisd  %xmm1, %xmm2
+
+// CHECK: vucomisd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2e,0x10]
+          vucomisd  (%eax), %xmm2
+
+// CHECK: vcomisd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2f,0xd1]
+          vcomisd  %xmm1, %xmm2
+
+// CHECK: vcomisd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x2f,0x10]
+          vcomisd  (%eax), %xmm2
+
+// CHECK: vcvttss2si  %xmm1, %eax
+// CHECK: encoding: [0xc5,0xfa,0x2c,0xc1]
+          vcvttss2si  %xmm1, %eax
+
+// CHECK: vcvttss2si  (%ecx), %eax
+// CHECK: encoding: [0xc5,0xfa,0x2c,0x01]
+          vcvttss2si  (%ecx), %eax
+
+// CHECK: vcvtsi2ss  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
+          vcvtsi2ss  (%eax), %xmm1, %xmm2
+
+// CHECK: vcvtsi2ss  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf2,0x2a,0x10]
+          vcvtsi2ss  (%eax), %xmm1, %xmm2
+
+// CHECK: vcvttsd2si  %xmm1, %eax
+// CHECK: encoding: [0xc5,0xfb,0x2c,0xc1]
+          vcvttsd2si  %xmm1, %eax
+
+// CHECK: vcvttsd2si  (%ecx), %eax
+// CHECK: encoding: [0xc5,0xfb,0x2c,0x01]
+          vcvttsd2si  (%ecx), %eax
+
+// CHECK: vcvtsi2sd  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
+          vcvtsi2sd  (%eax), %xmm1, %xmm2
+
+// CHECK: vcvtsi2sd  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0x2a,0x10]
+          vcvtsi2sd  (%eax), %xmm1, %xmm2
+
+// CHECK: vmovaps  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x28,0x10]
+          vmovaps  (%eax), %xmm2
+
+// CHECK: vmovaps  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x28,0xd1]
+          vmovaps  %xmm1, %xmm2
+
+// CHECK: vmovaps  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x29,0x08]
+          vmovaps  %xmm1, (%eax)
+
+// CHECK: vmovapd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x28,0x10]
+          vmovapd  (%eax), %xmm2
+
+// CHECK: vmovapd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x28,0xd1]
+          vmovapd  %xmm1, %xmm2
+
+// CHECK: vmovapd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x29,0x08]
+          vmovapd  %xmm1, (%eax)
+
+// CHECK: vmovups  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x10,0x10]
+          vmovups  (%eax), %xmm2
+
+// CHECK: vmovups  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x10,0xd1]
+          vmovups  %xmm1, %xmm2
+
+// CHECK: vmovups  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x11,0x08]
+          vmovups  %xmm1, (%eax)
+
+// CHECK: vmovupd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x10,0x10]
+          vmovupd  (%eax), %xmm2
+
+// CHECK: vmovupd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x10,0xd1]
+          vmovupd  %xmm1, %xmm2
+
+// CHECK: vmovupd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x11,0x08]
+          vmovupd  %xmm1, (%eax)
+
+// CHECK: vmovlps  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x13,0x08]
+          vmovlps  %xmm1, (%eax)
+
+// CHECK: vmovlps  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x12,0x18]
+          vmovlps  (%eax), %xmm2, %xmm3
+
+// CHECK: vmovlpd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x13,0x08]
+          vmovlpd  %xmm1, (%eax)
+
+// CHECK: vmovlpd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x12,0x18]
+          vmovlpd  (%eax), %xmm2, %xmm3
+
+// CHECK: vmovhps  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x17,0x08]
+          vmovhps  %xmm1, (%eax)
+
+// CHECK: vmovhps  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x16,0x18]
+          vmovhps  (%eax), %xmm2, %xmm3
+
+// CHECK: vmovhpd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x17,0x08]
+          vmovhpd  %xmm1, (%eax)
+
+// CHECK: vmovhpd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x16,0x18]
+          vmovhpd  (%eax), %xmm2, %xmm3
+
+// CHECK: vmovlhps  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x16,0xd9]
+          vmovlhps  %xmm1, %xmm2, %xmm3
+
+// CHECK: vmovhlps  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0x12,0xd9]
+          vmovhlps  %xmm1, %xmm2, %xmm3
+
+// CHECK: vcvtss2sil  %xmm1, %eax
+// CHECK: encoding: [0xc5,0xfa,0x2d,0xc1]
+          vcvtss2si  %xmm1, %eax
+
+// CHECK: vcvtss2sil  (%eax), %ebx
+// CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
+          vcvtss2si  (%eax), %ebx
+
+// CHECK: vcvtdq2ps  %xmm5, %xmm6
+// CHECK: encoding: [0xc5,0xf8,0x5b,0xf5]
+          vcvtdq2ps  %xmm5, %xmm6
+
+// CHECK: vcvtdq2ps  (%eax), %xmm6
+// CHECK: encoding: [0xc5,0xf8,0x5b,0x30]
+          vcvtdq2ps  (%eax), %xmm6
+
+// CHECK: vcvtsd2ss  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xdb,0x5a,0xf2]
+          vcvtsd2ss  %xmm2, %xmm4, %xmm6
+
+// CHECK: vcvtsd2ss  (%eax), %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xdb,0x5a,0x30]
+          vcvtsd2ss  (%eax), %xmm4, %xmm6
+
+// CHECK: vcvtps2dq  %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x5b,0xda]
+          vcvtps2dq  %xmm2, %xmm3
+
+// CHECK: vcvtps2dq  (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x5b,0x18]
+          vcvtps2dq  (%eax), %xmm3
+
+// CHECK: vcvtss2sd  %xmm2, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xda,0x5a,0xf2]
+          vcvtss2sd  %xmm2, %xmm4, %xmm6
+
+// CHECK: vcvtss2sd  (%eax), %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xda,0x5a,0x30]
+          vcvtss2sd  (%eax), %xmm4, %xmm6
+
+// CHECK: vcvtdq2ps  %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xf8,0x5b,0xf4]
+          vcvtdq2ps  %xmm4, %xmm6
+
+// CHECK: vcvtdq2ps  (%ecx), %xmm4
+// CHECK: encoding: [0xc5,0xf8,0x5b,0x21]
+          vcvtdq2ps  (%ecx), %xmm4
+
+// CHECK: vcvttps2dq  %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x5b,0xda]
+          vcvttps2dq  %xmm2, %xmm3
+
+// CHECK: vcvttps2dq  (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x5b,0x18]
+          vcvttps2dq  (%eax), %xmm3
+
+// CHECK: vcvtps2pd  %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf8,0x5a,0xda]
+          vcvtps2pd  %xmm2, %xmm3
+
+// CHECK: vcvtps2pd  (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xf8,0x5a,0x18]
+          vcvtps2pd  (%eax), %xmm3
+
+// CHECK: vcvtpd2ps  %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x5a,0xda]
+          vcvtpd2ps  %xmm2, %xmm3
+
+// CHECK: vsqrtpd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x51,0xd1]
+          vsqrtpd  %xmm1, %xmm2
+
+// CHECK: vsqrtpd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x51,0x10]
+          vsqrtpd  (%eax), %xmm2
+
+// CHECK: vsqrtps  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x51,0xd1]
+          vsqrtps  %xmm1, %xmm2
+
+// CHECK: vsqrtps  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x51,0x10]
+          vsqrtps  (%eax), %xmm2
+
+// CHECK: vsqrtsd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x51,0xd9]
+          vsqrtsd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vsqrtsd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x51,0x18]
+          vsqrtsd  (%eax), %xmm2, %xmm3
+
+// CHECK: vsqrtss  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x51,0xd9]
+          vsqrtss  %xmm1, %xmm2, %xmm3
+
+// CHECK: vsqrtss  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x51,0x18]
+          vsqrtss  (%eax), %xmm2, %xmm3
+
+// CHECK: vrsqrtps  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x52,0xd1]
+          vrsqrtps  %xmm1, %xmm2
+
+// CHECK: vrsqrtps  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x52,0x10]
+          vrsqrtps  (%eax), %xmm2
+
+// CHECK: vrsqrtss  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x52,0xd9]
+          vrsqrtss  %xmm1, %xmm2, %xmm3
+
+// CHECK: vrsqrtss  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x52,0x18]
+          vrsqrtss  (%eax), %xmm2, %xmm3
+
+// CHECK: vrcpps  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x53,0xd1]
+          vrcpps  %xmm1, %xmm2
+
+// CHECK: vrcpps  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf8,0x53,0x10]
+          vrcpps  (%eax), %xmm2
+
+// CHECK: vrcpss  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x53,0xd9]
+          vrcpss  %xmm1, %xmm2, %xmm3
+
+// CHECK: vrcpss  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xea,0x53,0x18]
+          vrcpss  (%eax), %xmm2, %xmm3
+
+// CHECK: vmovntdq  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0xe7,0x08]
+          vmovntdq  %xmm1, (%eax)
+
+// CHECK: vmovntpd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x2b,0x08]
+          vmovntpd  %xmm1, (%eax)
+
+// CHECK: vmovntps  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf8,0x2b,0x08]
+          vmovntps  %xmm1, (%eax)
+
+// CHECK: vldmxcsr  (%eax)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x10]
+          vldmxcsr  (%eax)
+
+// CHECK: vstmxcsr  (%eax)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x18]
+          vstmxcsr  (%eax)
+
+// CHECK: vldmxcsr  3735928559
+// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xef,0xbe,0xad,0xde]
+          vldmxcsr  0xdeadbeef
+
+// CHECK: vstmxcsr  3735928559
+// CHECK: encoding: [0xc5,0xf8,0xae,0x1d,0xef,0xbe,0xad,0xde]
+          vstmxcsr  0xdeadbeef
+
+// CHECK: vpsubb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf8,0xd9]
+          vpsubb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf8,0x18]
+          vpsubb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf9,0xd9]
+          vpsubw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf9,0x18]
+          vpsubw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfa,0xd9]
+          vpsubd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfa,0x18]
+          vpsubd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfb,0xd9]
+          vpsubq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfb,0x18]
+          vpsubq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubsb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe8,0xd9]
+          vpsubsb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubsb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe8,0x18]
+          vpsubsb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe9,0xd9]
+          vpsubsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe9,0x18]
+          vpsubsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubusb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd8,0xd9]
+          vpsubusb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubusb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd8,0x18]
+          vpsubusb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsubusw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd9,0xd9]
+          vpsubusw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsubusw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd9,0x18]
+          vpsubusw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfc,0xd9]
+          vpaddb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfc,0x18]
+          vpaddb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfd,0xd9]
+          vpaddw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfd,0x18]
+          vpaddw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfe,0xd9]
+          vpaddd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xfe,0x18]
+          vpaddd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd4,0xd9]
+          vpaddq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd4,0x18]
+          vpaddq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddsb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xec,0xd9]
+          vpaddsb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddsb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xec,0x18]
+          vpaddsb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xed,0xd9]
+          vpaddsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xed,0x18]
+          vpaddsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddusb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdc,0xd9]
+          vpaddusb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddusb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdc,0x18]
+          vpaddusb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpaddusw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdd,0xd9]
+          vpaddusw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpaddusw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdd,0x18]
+          vpaddusw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmulhuw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe4,0xd9]
+          vpmulhuw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmulhuw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe4,0x18]
+          vpmulhuw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmulhw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe5,0xd9]
+          vpmulhw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmulhw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe5,0x18]
+          vpmulhw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmullw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd5,0xd9]
+          vpmullw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmullw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd5,0x18]
+          vpmullw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmuludq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf4,0xd9]
+          vpmuludq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmuludq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf4,0x18]
+          vpmuludq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpavgb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe0,0xd9]
+          vpavgb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpavgb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe0,0x18]
+          vpavgb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpavgw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe3,0xd9]
+          vpavgw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpavgw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe3,0x18]
+          vpavgw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpminsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xea,0xd9]
+          vpminsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpminsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xea,0x18]
+          vpminsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpminub  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xda,0xd9]
+          vpminub  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpminub  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xda,0x18]
+          vpminub  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xee,0xd9]
+          vpmaxsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmaxsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xee,0x18]
+          vpmaxsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxub  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xde,0xd9]
+          vpmaxub  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmaxub  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xde,0x18]
+          vpmaxub  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsadbw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf6,0xd9]
+          vpsadbw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsadbw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf6,0x18]
+          vpsadbw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsllw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf1,0xd9]
+          vpsllw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsllw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf1,0x18]
+          vpsllw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpslld  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf2,0xd9]
+          vpslld  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpslld  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf2,0x18]
+          vpslld  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsllq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf3,0xd9]
+          vpsllq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsllq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xf3,0x18]
+          vpsllq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsraw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe1,0xd9]
+          vpsraw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsraw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe1,0x18]
+          vpsraw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrad  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe2,0xd9]
+          vpsrad  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrad  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xe2,0x18]
+          vpsrad  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrlw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd1,0xd9]
+          vpsrlw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrlw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd1,0x18]
+          vpsrlw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrld  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd2,0xd9]
+          vpsrld  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrld  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd2,0x18]
+          vpsrld  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsrlq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd3,0xd9]
+          vpsrlq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsrlq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd3,0x18]
+          vpsrlq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpslld  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a]
+          vpslld  $10, %xmm2, %xmm3
+
+// CHECK: vpslldq  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xfa,0x0a]
+          vpslldq  $10, %xmm2, %xmm3
+
+// CHECK: vpsllq  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xf2,0x0a]
+          vpsllq  $10, %xmm2, %xmm3
+
+// CHECK: vpsllw  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x71,0xf2,0x0a]
+          vpsllw  $10, %xmm2, %xmm3
+
+// CHECK: vpsrad  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xe2,0x0a]
+          vpsrad  $10, %xmm2, %xmm3
+
+// CHECK: vpsraw  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x71,0xe2,0x0a]
+          vpsraw  $10, %xmm2, %xmm3
+
+// CHECK: vpsrld  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xd2,0x0a]
+          vpsrld  $10, %xmm2, %xmm3
+
+// CHECK: vpsrldq  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xda,0x0a]
+          vpsrldq  $10, %xmm2, %xmm3
+
+// CHECK: vpsrlq  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x73,0xd2,0x0a]
+          vpsrlq  $10, %xmm2, %xmm3
+
+// CHECK: vpsrlw  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x71,0xd2,0x0a]
+          vpsrlw  $10, %xmm2, %xmm3
+
+// CHECK: vpslld  $10, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe1,0x72,0xf2,0x0a]
+          vpslld  $10, %xmm2, %xmm3
+
+// CHECK: vpand  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdb,0xd9]
+          vpand  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpand  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdb,0x18]
+          vpand  (%eax), %xmm2, %xmm3
+
+// CHECK: vpor  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xeb,0xd9]
+          vpor  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpor  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xeb,0x18]
+          vpor  (%eax), %xmm2, %xmm3
+
+// CHECK: vpxor  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xef,0xd9]
+          vpxor  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpxor  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xef,0x18]
+          vpxor  (%eax), %xmm2, %xmm3
+
+// CHECK: vpandn  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdf,0xd9]
+          vpandn  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpandn  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xdf,0x18]
+          vpandn  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x74,0xd9]
+          vpcmpeqb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpeqb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x74,0x18]
+          vpcmpeqb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x75,0xd9]
+          vpcmpeqw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpeqw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x75,0x18]
+          vpcmpeqw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x76,0xd9]
+          vpcmpeqd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpeqd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x76,0x18]
+          vpcmpeqd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpgtb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x64,0xd9]
+          vpcmpgtb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpgtb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x64,0x18]
+          vpcmpgtb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpgtw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x65,0xd9]
+          vpcmpgtw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpgtw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x65,0x18]
+          vpcmpgtw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpgtd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x66,0xd9]
+          vpcmpgtd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpcmpgtd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x66,0x18]
+          vpcmpgtd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpacksswb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x63,0xd9]
+          vpacksswb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpacksswb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x63,0x18]
+          vpacksswb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpackssdw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6b,0xd9]
+          vpackssdw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpackssdw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6b,0x18]
+          vpackssdw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpackuswb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x67,0xd9]
+          vpackuswb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpackuswb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x67,0x18]
+          vpackuswb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpshufd  $4, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x70,0xda,0x04]
+          vpshufd  $4, %xmm2, %xmm3
+
+// CHECK: vpshufd  $4, (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xf9,0x70,0x18,0x04]
+          vpshufd  $4, (%eax), %xmm3
+
+// CHECK: vpshufhw  $4, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x70,0xda,0x04]
+          vpshufhw  $4, %xmm2, %xmm3
+
+// CHECK: vpshufhw  $4, (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xfa,0x70,0x18,0x04]
+          vpshufhw  $4, (%eax), %xmm3
+
+// CHECK: vpshuflw  $4, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xfb,0x70,0xda,0x04]
+          vpshuflw  $4, %xmm2, %xmm3
+
+// CHECK: vpshuflw  $4, (%eax), %xmm3
+// CHECK: encoding: [0xc5,0xfb,0x70,0x18,0x04]
+          vpshuflw  $4, (%eax), %xmm3
+
+// CHECK: vpunpcklbw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x60,0xd9]
+          vpunpcklbw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpcklbw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x60,0x18]
+          vpunpcklbw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpcklwd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x61,0xd9]
+          vpunpcklwd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpcklwd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x61,0x18]
+          vpunpcklwd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckldq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x62,0xd9]
+          vpunpckldq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckldq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x62,0x18]
+          vpunpckldq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpcklqdq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6c,0xd9]
+          vpunpcklqdq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpcklqdq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6c,0x18]
+          vpunpcklqdq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhbw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x68,0xd9]
+          vpunpckhbw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhbw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x68,0x18]
+          vpunpckhbw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhwd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x69,0xd9]
+          vpunpckhwd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhwd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x69,0x18]
+          vpunpckhwd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhdq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6a,0xd9]
+          vpunpckhdq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhdq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6a,0x18]
+          vpunpckhdq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpunpckhqdq  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6d,0xd9]
+          vpunpckhqdq  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpunpckhqdq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x6d,0x18]
+          vpunpckhqdq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpinsrw  $7, %eax, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc4,0xd8,0x07]
+          vpinsrw  $7, %eax, %xmm2, %xmm3
+
+// CHECK: vpinsrw  $7, (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xc4,0x18,0x07]
+          vpinsrw  $7, (%eax), %xmm2, %xmm3
+
+// CHECK: vpextrw  $7, %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07]
+          vpextrw  $7, %xmm2, %eax
+
+// CHECK: vpmovmskb  %xmm1, %eax
+// CHECK: encoding: [0xc5,0xf9,0xd7,0xc1]
+          vpmovmskb  %xmm1, %eax
+
+// CHECK: vmaskmovdqu  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf9,0xf7,0xd1]
+          vmaskmovdqu  %xmm1, %xmm2
+
+// CHECK: vmovd  %xmm1, %eax
+// CHECK: encoding: [0xc5,0xf9,0x7e,0xc8]
+          vmovd  %xmm1, %eax
+
+// CHECK: vmovd  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x08]
+          vmovd  %xmm1, (%eax)
+
+// CHECK: vmovd  %eax, %xmm1
+// CHECK: encoding: [0xc5,0xf9,0x6e,0xc8]
+          vmovd  %eax, %xmm1
+
+// CHECK: vmovd  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x08]
+          vmovd  (%eax), %xmm1
+
+// CHECK: vmovq  %xmm1, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0xd6,0x08]
+          vmovq  %xmm1, (%eax)
+
+// CHECK: vmovq  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x7e,0xd1]
+          vmovq  %xmm1, %xmm2
+
+// CHECK: vmovq  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfa,0x7e,0x08]
+          vmovq  (%eax), %xmm1
+
+// CHECK: vcvtpd2dq  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfb,0xe6,0xd1]
+          vcvtpd2dq  %xmm1, %xmm2
+
+// CHECK: vcvtdq2pd  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0xe6,0xd1]
+          vcvtdq2pd  %xmm1, %xmm2
+
+// CHECK: vcvtdq2pd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfa,0xe6,0x10]
+          vcvtdq2pd  (%eax), %xmm2
+
+// CHECK: vmovshdup  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x16,0xd1]
+          vmovshdup  %xmm1, %xmm2
+
+// CHECK: vmovshdup  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x16,0x10]
+          vmovshdup  (%eax), %xmm2
+
+// CHECK: vmovsldup  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x12,0xd1]
+          vmovsldup  %xmm1, %xmm2
+
+// CHECK: vmovsldup  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfa,0x12,0x10]
+          vmovsldup  (%eax), %xmm2
+
+// CHECK: vmovddup  %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xfb,0x12,0xd1]
+          vmovddup  %xmm1, %xmm2
+
+// CHECK: vmovddup  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xfb,0x12,0x10]
+          vmovddup  (%eax), %xmm2
+
+// CHECK: vaddsubps  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0xd0,0xd9]
+          vaddsubps  %xmm1, %xmm2, %xmm3
+
+// CHECK: vaddsubps  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf3,0xd0,0x10]
+          vaddsubps  (%eax), %xmm1, %xmm2
+
+// CHECK: vaddsubpd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0xd0,0xd9]
+          vaddsubpd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vaddsubpd  (%eax), %xmm1, %xmm2
+// CHECK: encoding: [0xc5,0xf1,0xd0,0x10]
+          vaddsubpd  (%eax), %xmm1, %xmm2
+
+// CHECK: vhaddps  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7c,0xd9]
+          vhaddps  %xmm1, %xmm2, %xmm3
+
+// CHECK: vhaddps  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7c,0x18]
+          vhaddps  (%eax), %xmm2, %xmm3
+
+// CHECK: vhaddpd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7c,0xd9]
+          vhaddpd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vhaddpd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7c,0x18]
+          vhaddpd  (%eax), %xmm2, %xmm3
+
+// CHECK: vhsubps  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7d,0xd9]
+          vhsubps  %xmm1, %xmm2, %xmm3
+
+// CHECK: vhsubps  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xeb,0x7d,0x18]
+          vhsubps  (%eax), %xmm2, %xmm3
+
+// CHECK: vhsubpd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7d,0xd9]
+          vhsubpd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vhsubpd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe9,0x7d,0x18]
+          vhsubpd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpabsb  %xmm1, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0xd1]
+          vpabsb  %xmm1, %xmm2
+
+// CHECK: vpabsb  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1c,0x10]
+          vpabsb  (%eax), %xmm2
+
+// CHECK: vpabsw  %xmm1, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0xd1]
+          vpabsw  %xmm1, %xmm2
+
+// CHECK: vpabsw  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1d,0x10]
+          vpabsw  (%eax), %xmm2
+
+// CHECK: vpabsd  %xmm1, %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0xd1]
+          vpabsd  %xmm1, %xmm2
+
+// CHECK: vpabsd  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x1e,0x10]
+          vpabsd  (%eax), %xmm2
+
+// CHECK: vphaddw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0xd9]
+          vphaddw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vphaddw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x01,0x18]
+          vphaddw  (%eax), %xmm2, %xmm3
+
+// CHECK: vphaddd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0xd9]
+          vphaddd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vphaddd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x02,0x18]
+          vphaddd  (%eax), %xmm2, %xmm3
+
+// CHECK: vphaddsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0xd9]
+          vphaddsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vphaddsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x03,0x18]
+          vphaddsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vphsubw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0xd9]
+          vphsubw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vphsubw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x05,0x18]
+          vphsubw  (%eax), %xmm2, %xmm3
+
+// CHECK: vphsubd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0xd9]
+          vphsubd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vphsubd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x06,0x18]
+          vphsubd  (%eax), %xmm2, %xmm3
+
+// CHECK: vphsubsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0xd9]
+          vphsubsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vphsubsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x07,0x18]
+          vphsubsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaddubsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0xd9]
+          vpmaddubsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmaddubsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x04,0x18]
+          vpmaddubsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpshufb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0xd9]
+          vpshufb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpshufb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x00,0x18]
+          vpshufb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsignb  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0xd9]
+          vpsignb  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsignb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x08,0x18]
+          vpsignb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsignw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0xd9]
+          vpsignw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsignw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x09,0x18]
+          vpsignw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpsignd  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0xd9]
+          vpsignd  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpsignd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x0a,0x18]
+          vpsignd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmulhrsw  %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0xd9]
+          vpmulhrsw  %xmm1, %xmm2, %xmm3
+
+// CHECK: vpmulhrsw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x0b,0x18]
+          vpmulhrsw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpalignr  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0xd9,0x07]
+          vpalignr  $7, %xmm1, %xmm2, %xmm3
+
+// CHECK: vpalignr  $7, (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0f,0x18,0x07]
+          vpalignr  $7, (%eax), %xmm2, %xmm3
+
+// CHECK: vroundsd  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0xd9,0x07]
+          vroundsd  $7, %xmm1, %xmm2, %xmm3
+
+// CHECK: vroundsd  $7, (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0b,0x18,0x07]
+          vroundsd  $7, (%eax), %xmm2, %xmm3
+
+// CHECK: vroundss  $7, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0xd9,0x07]
+          vroundss  $7, %xmm1, %xmm2, %xmm3
+
+// CHECK: vroundss  $7, (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x69,0x0a,0x18,0x07]
+          vroundss  $7, (%eax), %xmm2, %xmm3
+
+// CHECK: vroundpd  $7, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0xda,0x07]
+          vroundpd  $7, %xmm2, %xmm3
+
+// CHECK: vroundpd  $7, (%eax), %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x79,0x09,0x18,0x07]
+          vroundpd  $7, (%eax), %xmm3
+
+// CHECK: vroundps  $7, %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0xda,0x07]
+          vroundps  $7, %xmm2, %xmm3
+
+// CHECK: vroundps  $7, (%eax), %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x79,0x08,0x18,0x07]
+          vroundps  $7, (%eax), %xmm3
+
+// CHECK: vphminposuw  %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0xda]
+          vphminposuw  %xmm2, %xmm3
+
+// CHECK: vphminposuw  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x41,0x10]
+          vphminposuw  (%eax), %xmm2
+
+// CHECK: vpackusdw  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x2b,0xca]
+          vpackusdw  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpackusdw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x2b,0x18]
+          vpackusdw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpcmpeqq  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x29,0xca]
+          vpcmpeqq  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpcmpeqq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x29,0x18]
+          vpcmpeqq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpminsb  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x38,0xca]
+          vpminsb  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpminsb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x38,0x18]
+          vpminsb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpminsd  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x39,0xca]
+          vpminsd  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpminsd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x39,0x18]
+          vpminsd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpminud  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3b,0xca]
+          vpminud  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpminud  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3b,0x18]
+          vpminud  (%eax), %xmm2, %xmm3
+
+// CHECK: vpminuw  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3a,0xca]
+          vpminuw  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpminuw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3a,0x18]
+          vpminuw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxsb  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3c,0xca]
+          vpmaxsb  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpmaxsb  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3c,0x18]
+          vpmaxsb  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxsd  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3d,0xca]
+          vpmaxsd  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpmaxsd  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3d,0x18]
+          vpmaxsd  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxud  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3f,0xca]
+          vpmaxud  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpmaxud  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3f,0x18]
+          vpmaxud  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmaxuw  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x3e,0xca]
+          vpmaxuw  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpmaxuw  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x3e,0x18]
+          vpmaxuw  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmuldq  %xmm2, %xmm3, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x61,0x28,0xca]
+          vpmuldq  %xmm2, %xmm3, %xmm1
+
+// CHECK: vpmuldq  (%eax), %xmm2, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x69,0x28,0x18]
+          vpmuldq  (%eax), %xmm2, %xmm3
+
+// CHECK: vpmulld  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0xca]
+          vpmulld  %xmm2, %xmm5, %xmm1
+
+// CHECK: vpmulld  (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0x40,0x18]
+          vpmulld  (%eax), %xmm5, %xmm3
+
+// CHECK: vblendps  $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0xca,0x03]
+          vblendps  $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vblendps  $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0c,0x08,0x03]
+          vblendps  $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vblendpd  $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0xca,0x03]
+          vblendpd  $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vblendpd  $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0d,0x08,0x03]
+          vblendpd  $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vpblendw  $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0xca,0x03]
+          vpblendw  $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vpblendw  $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x0e,0x08,0x03]
+          vpblendw  $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vmpsadbw  $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0xca,0x03]
+          vmpsadbw  $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vmpsadbw  $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x42,0x08,0x03]
+          vmpsadbw  $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vdpps  $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0xca,0x03]
+          vdpps  $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vdpps  $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x40,0x08,0x03]
+          vdpps  $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vdppd  $3, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0xca,0x03]
+          vdppd  $3, %xmm2, %xmm5, %xmm1
+
+// CHECK: vdppd  $3, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x41,0x08,0x03]
+          vdppd  $3, (%eax), %xmm5, %xmm1
+
+// CHECK: vblendvpd  %xmm2, %xmm5, %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0xdd,0x20]
+          vblendvpd  %xmm2, %xmm5, %xmm1, %xmm3
+
+// CHECK: vblendvpd  %xmm2, (%eax), %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4b,0x18,0x20]
+          vblendvpd  %xmm2, (%eax), %xmm1, %xmm3
+
+// CHECK: vblendvps  %xmm2, %xmm5, %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0xdd,0x20]
+          vblendvps  %xmm2, %xmm5, %xmm1, %xmm3
+
+// CHECK: vblendvps  %xmm2, (%eax), %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4a,0x18,0x20]
+          vblendvps  %xmm2, (%eax), %xmm1, %xmm3
+
+// CHECK: vpblendvb  %xmm2, %xmm5, %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0xdd,0x20]
+          vpblendvb  %xmm2, %xmm5, %xmm1, %xmm3
+
+// CHECK: vpblendvb  %xmm2, (%eax), %xmm1, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x71,0x4c,0x18,0x20]
+          vpblendvb  %xmm2, (%eax), %xmm1, %xmm3
+
+// CHECK: vpmovsxbw  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0xea]
+          vpmovsxbw  %xmm2, %xmm5
+
+// CHECK: vpmovsxbw  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x20,0x10]
+          vpmovsxbw  (%eax), %xmm2
+
+// CHECK: vpmovsxwd  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0xea]
+          vpmovsxwd  %xmm2, %xmm5
+
+// CHECK: vpmovsxwd  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x23,0x10]
+          vpmovsxwd  (%eax), %xmm2
+
+// CHECK: vpmovsxdq  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0xea]
+          vpmovsxdq  %xmm2, %xmm5
+
+// CHECK: vpmovsxdq  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x25,0x10]
+          vpmovsxdq  (%eax), %xmm2
+
+// CHECK: vpmovzxbw  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0xea]
+          vpmovzxbw  %xmm2, %xmm5
+
+// CHECK: vpmovzxbw  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x30,0x10]
+          vpmovzxbw  (%eax), %xmm2
+
+// CHECK: vpmovzxwd  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0xea]
+          vpmovzxwd  %xmm2, %xmm5
+
+// CHECK: vpmovzxwd  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x33,0x10]
+          vpmovzxwd  (%eax), %xmm2
+
+// CHECK: vpmovzxdq  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0xea]
+          vpmovzxdq  %xmm2, %xmm5
+
+// CHECK: vpmovzxdq  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x35,0x10]
+          vpmovzxdq  (%eax), %xmm2
+
+// CHECK: vpmovsxbq  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0xea]
+          vpmovsxbq  %xmm2, %xmm5
+
+// CHECK: vpmovsxbq  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x22,0x10]
+          vpmovsxbq  (%eax), %xmm2
+
+// CHECK: vpmovzxbq  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0xea]
+          vpmovzxbq  %xmm2, %xmm5
+
+// CHECK: vpmovzxbq  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x32,0x10]
+          vpmovzxbq  (%eax), %xmm2
+
+// CHECK: vpmovsxbd  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0xea]
+          vpmovsxbd  %xmm2, %xmm5
+
+// CHECK: vpmovsxbd  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x21,0x10]
+          vpmovsxbd  (%eax), %xmm2
+
+// CHECK: vpmovsxwq  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0xea]
+          vpmovsxwq  %xmm2, %xmm5
+
+// CHECK: vpmovsxwq  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x24,0x10]
+          vpmovsxwq  (%eax), %xmm2
+
+// CHECK: vpmovzxbd  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0xea]
+          vpmovzxbd  %xmm2, %xmm5
+
+// CHECK: vpmovzxbd  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x31,0x10]
+          vpmovzxbd  (%eax), %xmm2
+
+// CHECK: vpmovzxwq  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0xea]
+          vpmovzxwq  %xmm2, %xmm5
+
+// CHECK: vpmovzxwq  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x34,0x10]
+          vpmovzxwq  (%eax), %xmm2
+
+// CHECK: vpextrw  $7, %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf9,0xc5,0xc2,0x07]
+          vpextrw  $7, %xmm2, %eax
+
+// CHECK: vpextrw  $7, %xmm2, (%eax)
+// CHECK: encoding: [0xc4,0xe3,0x79,0x15,0x10,0x07]
+          vpextrw  $7, %xmm2, (%eax)
+
+// CHECK: vpextrd  $7, %xmm2, %eax
+// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0xd0,0x07]
+          vpextrd  $7, %xmm2, %eax
+
+// CHECK: vpextrd  $7, %xmm2, (%eax)
+// CHECK: encoding: [0xc4,0xe3,0x79,0x16,0x10,0x07]
+          vpextrd  $7, %xmm2, (%eax)
+
+// CHECK: vpextrb  $7, %xmm2, %eax
+// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xd0,0x07]
+          vpextrb  $7, %xmm2, %eax
+
+// CHECK: vpextrb  $7, %xmm2, (%eax)
+// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0x10,0x07]
+          vpextrb  $7, %xmm2, (%eax)
+
+// CHECK: vextractps  $7, %xmm2, (%eax)
+// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0x10,0x07]
+          vextractps  $7, %xmm2, (%eax)
+
+// CHECK: vextractps  $7, %xmm2, %eax
+// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xd0,0x07]
+          vextractps  $7, %xmm2, %eax
+
+// CHECK: vpinsrw  $7, %eax, %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0xc4,0xe8,0x07]
+          vpinsrw  $7, %eax, %xmm2, %xmm5
+
+// CHECK: vpinsrw  $7, (%eax), %xmm2, %xmm5
+// CHECK: encoding: [0xc5,0xe9,0xc4,0x28,0x07]
+          vpinsrw  $7, (%eax), %xmm2, %xmm5
+
+// CHECK: vpinsrb  $7, %eax, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0xe8,0x07]
+          vpinsrb  $7, %eax, %xmm2, %xmm5
+
+// CHECK: vpinsrb  $7, (%eax), %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x69,0x20,0x28,0x07]
+          vpinsrb  $7, (%eax), %xmm2, %xmm5
+
+// CHECK: vpinsrd  $7, %eax, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0xe8,0x07]
+          vpinsrd  $7, %eax, %xmm2, %xmm5
+
+// CHECK: vpinsrd  $7, (%eax), %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x69,0x22,0x28,0x07]
+          vpinsrd  $7, (%eax), %xmm2, %xmm5
+
+// CHECK: vinsertps  $7, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0xca,0x07]
+          vinsertps  $7, %xmm2, %xmm5, %xmm1
+
+// CHECK: vinsertps  $7, (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x21,0x08,0x07]
+          vinsertps  $7, (%eax), %xmm5, %xmm1
+
+// CHECK: vptest  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0xea]
+          vptest  %xmm2, %xmm5
+
+// CHECK: vptest  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x17,0x10]
+          vptest  (%eax), %xmm2
+
+// CHECK: vmovntdqa  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x2a,0x10]
+          vmovntdqa  (%eax), %xmm2
+
+// CHECK: vpcmpgtq  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0xca]
+          vpcmpgtq  %xmm2, %xmm5, %xmm1
+
+// CHECK: vpcmpgtq  (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0x37,0x18]
+          vpcmpgtq  (%eax), %xmm5, %xmm3
+
+// CHECK: vpcmpistrm  $7, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0xea,0x07]
+          vpcmpistrm  $7, %xmm2, %xmm5
+
+// CHECK: vpcmpistrm  $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x62,0x28,0x07]
+          vpcmpistrm  $7, (%eax), %xmm5
+
+// CHECK: vpcmpestrm  $7, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0xea,0x07]
+          vpcmpestrm  $7, %xmm2, %xmm5
+
+// CHECK: vpcmpestrm  $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x60,0x28,0x07]
+          vpcmpestrm  $7, (%eax), %xmm5
+
+// CHECK: vpcmpistri  $7, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0xea,0x07]
+          vpcmpistri  $7, %xmm2, %xmm5
+
+// CHECK: vpcmpistri  $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x63,0x28,0x07]
+          vpcmpistri  $7, (%eax), %xmm5
+
+// CHECK: vpcmpestri  $7, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0xea,0x07]
+          vpcmpestri  $7, %xmm2, %xmm5
+
+// CHECK: vpcmpestri  $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x61,0x28,0x07]
+          vpcmpestri  $7, (%eax), %xmm5
+
+// CHECK: vaesimc  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0xea]
+          vaesimc  %xmm2, %xmm5
+
+// CHECK: vaesimc  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0xdb,0x10]
+          vaesimc  (%eax), %xmm2
+
+// CHECK: vaesenc  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0xca]
+          vaesenc  %xmm2, %xmm5, %xmm1
+
+// CHECK: vaesenc  (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdc,0x18]
+          vaesenc  (%eax), %xmm5, %xmm3
+
+// CHECK: vaesenclast  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0xca]
+          vaesenclast  %xmm2, %xmm5, %xmm1
+
+// CHECK: vaesenclast  (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdd,0x18]
+          vaesenclast  (%eax), %xmm5, %xmm3
+
+// CHECK: vaesdec  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0xca]
+          vaesdec  %xmm2, %xmm5, %xmm1
+
+// CHECK: vaesdec  (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0xde,0x18]
+          vaesdec  (%eax), %xmm5, %xmm3
+
+// CHECK: vaesdeclast  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0xca]
+          vaesdeclast  %xmm2, %xmm5, %xmm1
+
+// CHECK: vaesdeclast  (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0xdf,0x18]
+          vaesdeclast  (%eax), %xmm5, %xmm3
+
+// CHECK: vaeskeygenassist  $7, %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0xea,0x07]
+          vaeskeygenassist  $7, %xmm2, %xmm5
+
+// CHECK: vaeskeygenassist  $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0xdf,0x28,0x07]
+          vaeskeygenassist  $7, (%eax), %xmm5
+
+// CHECK: vcmpps  $8, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x08]
+          vcmpeq_uqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $9, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x09]
+          vcmpngeps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $10, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0a]
+          vcmpngtps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $11, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0b]
+          vcmpfalseps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $12, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0c]
+          vcmpneq_oqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $13, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0d]
+          vcmpgeps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $14, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0e]
+          vcmpgtps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $15, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x0f]
+          vcmptrueps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $16, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x10]
+          vcmpeq_osps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $17, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x11]
+          vcmplt_oqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $18, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x12]
+          vcmple_oqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $19, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x13]
+          vcmpunord_sps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $20, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x14]
+          vcmpneq_usps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $21, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x15]
+          vcmpnlt_uqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $22, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x16]
+          vcmpnle_uqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $23, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x17]
+          vcmpord_sps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $24, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x18]
+          vcmpeq_usps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $25, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x19]
+          vcmpnge_uqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $26, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1a]
+          vcmpngt_uqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $27, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1b]
+          vcmpfalse_osps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $28, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1c]
+          vcmpneq_osps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $29, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1d]
+          vcmpge_oqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $30, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1e]
+          vcmpgt_oqps %xmm1, %xmm2, %xmm3
+
+// CHECK: vcmpps  $31, %xmm1, %xmm2, %xmm3
+// CHECK: encoding: [0xc5,0xe8,0xc2,0xd9,0x1f]
+          vcmptrue_usps %xmm1, %xmm2, %xmm3
+
+// CHECK: vmovaps  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x28,0x10]
+          vmovaps  (%eax), %ymm2
+
+// CHECK: vmovaps  %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x28,0xd1]
+          vmovaps  %ymm1, %ymm2
+
+// CHECK: vmovaps  %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfc,0x29,0x08]
+          vmovaps  %ymm1, (%eax)
+
+// CHECK: vmovapd  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x28,0x10]
+          vmovapd  (%eax), %ymm2
+
+// CHECK: vmovapd  %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x28,0xd1]
+          vmovapd  %ymm1, %ymm2
+
+// CHECK: vmovapd  %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfd,0x29,0x08]
+          vmovapd  %ymm1, (%eax)
+
+// CHECK: vmovups  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x10,0x10]
+          vmovups  (%eax), %ymm2
+
+// CHECK: vmovups  %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x10,0xd1]
+          vmovups  %ymm1, %ymm2
+
+// CHECK: vmovups  %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfc,0x11,0x08]
+          vmovups  %ymm1, (%eax)
+
+// CHECK: vmovupd  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x10,0x10]
+          vmovupd  (%eax), %ymm2
+
+// CHECK: vmovupd  %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x10,0xd1]
+          vmovupd  %ymm1, %ymm2
+
+// CHECK: vmovupd  %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfd,0x11,0x08]
+          vmovupd  %ymm1, (%eax)
+
+// CHECK: vunpckhps  %ymm1, %ymm2, %ymm4
+// CHECK: encoding: [0xc5,0xec,0x15,0xe1]
+          vunpckhps  %ymm1, %ymm2, %ymm4
+
+// CHECK: vunpckhpd  %ymm1, %ymm2, %ymm4
+// CHECK: encoding: [0xc5,0xed,0x15,0xe1]
+          vunpckhpd  %ymm1, %ymm2, %ymm4
+
+// CHECK: vunpcklps  %ymm1, %ymm2, %ymm4
+// CHECK: encoding: [0xc5,0xec,0x14,0xe1]
+          vunpcklps  %ymm1, %ymm2, %ymm4
+
+// CHECK: vunpcklpd  %ymm1, %ymm2, %ymm4
+// CHECK: encoding: [0xc5,0xed,0x14,0xe1]
+          vunpcklpd  %ymm1, %ymm2, %ymm4
+
+// CHECK: vunpckhps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x15,0x6c,0xcb,0xfc]
+          vunpckhps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vunpckhpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x15,0x6c,0xcb,0xfc]
+          vunpckhpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vunpcklps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x14,0x6c,0xcb,0xfc]
+          vunpcklps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vunpcklpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x14,0x6c,0xcb,0xfc]
+          vunpcklpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vmovntdq  %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfd,0xe7,0x08]
+          vmovntdq  %ymm1, (%eax)
+
+// CHECK: vmovntpd  %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfd,0x2b,0x08]
+          vmovntpd  %ymm1, (%eax)
+
+// CHECK: vmovntps  %ymm1, (%eax)
+// CHECK: encoding: [0xc5,0xfc,0x2b,0x08]
+          vmovntps  %ymm1, (%eax)
+
+// CHECK: vmovmskps  %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf8,0x50,0xc2]
+          vmovmskps  %xmm2, %eax
+
+// CHECK: vmovmskpd  %xmm2, %eax
+// CHECK: encoding: [0xc5,0xf9,0x50,0xc2]
+          vmovmskpd  %xmm2, %eax
+
+// CHECK: vmaxps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5f,0xf2]
+          vmaxps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vmaxpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5f,0xf2]
+          vmaxpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vminps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5d,0xf2]
+          vminps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vminpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5d,0xf2]
+          vminpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vsubps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5c,0xf2]
+          vsubps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vsubpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5c,0xf2]
+          vsubpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vdivps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5e,0xf2]
+          vdivps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vdivpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5e,0xf2]
+          vdivpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vaddps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x58,0xf2]
+          vaddps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vaddpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x58,0xf2]
+          vaddpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vmulps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x59,0xf2]
+          vmulps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vmulpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x59,0xf2]
+          vmulpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vmaxps  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5f,0x30]
+          vmaxps  (%eax), %ymm4, %ymm6
+
+// CHECK: vmaxpd  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5f,0x30]
+          vmaxpd  (%eax), %ymm4, %ymm6
+
+// CHECK: vminps  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5d,0x30]
+          vminps  (%eax), %ymm4, %ymm6
+
+// CHECK: vminpd  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5d,0x30]
+          vminpd  (%eax), %ymm4, %ymm6
+
+// CHECK: vsubps  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5c,0x30]
+          vsubps  (%eax), %ymm4, %ymm6
+
+// CHECK: vsubpd  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5c,0x30]
+          vsubpd  (%eax), %ymm4, %ymm6
+
+// CHECK: vdivps  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5e,0x30]
+          vdivps  (%eax), %ymm4, %ymm6
+
+// CHECK: vdivpd  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5e,0x30]
+          vdivpd  (%eax), %ymm4, %ymm6
+
+// CHECK: vaddps  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x58,0x30]
+          vaddps  (%eax), %ymm4, %ymm6
+
+// CHECK: vaddpd  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x58,0x30]
+          vaddpd  (%eax), %ymm4, %ymm6
+
+// CHECK: vmulps  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x59,0x30]
+          vmulps  (%eax), %ymm4, %ymm6
+
+// CHECK: vmulpd  (%eax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x59,0x30]
+          vmulpd  (%eax), %ymm4, %ymm6
+
+// CHECK: vsqrtpd  %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x51,0xd1]
+          vsqrtpd  %ymm1, %ymm2
+
+// CHECK: vsqrtpd  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x51,0x10]
+          vsqrtpd  (%eax), %ymm2
+
+// CHECK: vsqrtps  %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x51,0xd1]
+          vsqrtps  %ymm1, %ymm2
+
+// CHECK: vsqrtps  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x51,0x10]
+          vsqrtps  (%eax), %ymm2
+
+// CHECK: vrsqrtps  %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x52,0xd1]
+          vrsqrtps  %ymm1, %ymm2
+
+// CHECK: vrsqrtps  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x52,0x10]
+          vrsqrtps  (%eax), %ymm2
+
+// CHECK: vrcpps  %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x53,0xd1]
+          vrcpps  %ymm1, %ymm2
+
+// CHECK: vrcpps  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x53,0x10]
+          vrcpps  (%eax), %ymm2
+
+// CHECK: vandps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x54,0xf2]
+          vandps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vandpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x54,0xf2]
+          vandpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vandps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x54,0x6c,0xcb,0xfc]
+          vandps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vandpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x54,0x6c,0xcb,0xfc]
+          vandpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vorps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x56,0xf2]
+          vorps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vorpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x56,0xf2]
+          vorpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vorps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x56,0x6c,0xcb,0xfc]
+          vorps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vorpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x56,0x6c,0xcb,0xfc]
+          vorpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vxorps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x57,0xf2]
+          vxorps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vxorpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x57,0xf2]
+          vxorpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vxorps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x57,0x6c,0xcb,0xfc]
+          vxorps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vxorpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x57,0x6c,0xcb,0xfc]
+          vxorpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vandnps  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x55,0xf2]
+          vandnps  %ymm2, %ymm4, %ymm6
+
+// CHECK: vandnpd  %ymm2, %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x55,0xf2]
+          vandnpd  %ymm2, %ymm4, %ymm6
+
+// CHECK: vandnps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xec,0x55,0x6c,0xcb,0xfc]
+          vandnps  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vandnpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xed,0x55,0x6c,0xcb,0xfc]
+          vandnpd  -4(%ebx,%ecx,8), %ymm2, %ymm5
+
+// CHECK: vcvtps2pd  %xmm3, %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x5a,0xd3]
+          vcvtps2pd  %xmm3, %ymm2
+
+// CHECK: vcvtps2pd  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x5a,0x10]
+          vcvtps2pd  (%eax), %ymm2
+
+// CHECK: vcvtdq2pd  %xmm3, %ymm2
+// CHECK: encoding: [0xc5,0xfe,0xe6,0xd3]
+          vcvtdq2pd  %xmm3, %ymm2
+
+// CHECK: vcvtdq2pd  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfe,0xe6,0x10]
+          vcvtdq2pd  (%eax), %ymm2
+
+// CHECK: vcvtdq2ps  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfc,0x5b,0xea]
+          vcvtdq2ps  %ymm2, %ymm5
+
+// CHECK: vcvtdq2ps  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfc,0x5b,0x10]
+          vcvtdq2ps  (%eax), %ymm2
+
+// CHECK: vcvtps2dq  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfd,0x5b,0xea]
+          vcvtps2dq  %ymm2, %ymm5
+
+// CHECK: vcvtps2dq  (%eax), %ymm5
+// CHECK: encoding: [0xc5,0xfd,0x5b,0x28]
+          vcvtps2dq  (%eax), %ymm5
+
+// CHECK: vcvttps2dq  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x5b,0xea]
+          vcvttps2dq  %ymm2, %ymm5
+
+// CHECK: vcvttps2dq  (%eax), %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x5b,0x28]
+          vcvttps2dq  (%eax), %ymm5
+
+// CHECK: vcvttpd2dq  %xmm1, %xmm5
+// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9]
+          vcvttpd2dq  %xmm1, %xmm5
+
+// CHECK: vcvttpd2dq  %ymm2, %xmm5
+// CHECK: encoding: [0xc5,0xfd,0xe6,0xea]
+          vcvttpd2dq  %ymm2, %xmm5
+
+// CHECK: vcvttpd2dqx  %xmm1, %xmm5
+// CHECK: encoding: [0xc5,0xf9,0xe6,0xe9]
+          vcvttpd2dqx  %xmm1, %xmm5
+
+// CHECK: vcvttpd2dqx  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xf9,0xe6,0x08]
+          vcvttpd2dqx  (%eax), %xmm1
+
+// CHECK: vcvttpd2dqy  %ymm2, %xmm1
+// CHECK: encoding: [0xc5,0xfd,0xe6,0xca]
+          vcvttpd2dqy  %ymm2, %xmm1
+
+// CHECK: vcvttpd2dqy  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfd,0xe6,0x08]
+          vcvttpd2dqy  (%eax), %xmm1
+
+// CHECK: vcvtpd2ps  %ymm2, %xmm5
+// CHECK: encoding: [0xc5,0xfd,0x5a,0xea]
+          vcvtpd2ps  %ymm2, %xmm5
+
+// CHECK: vcvtpd2psx  %xmm1, %xmm5
+// CHECK: encoding: [0xc5,0xf9,0x5a,0xe9]
+          vcvtpd2psx  %xmm1, %xmm5
+
+// CHECK: vcvtpd2psx  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xf9,0x5a,0x08]
+          vcvtpd2psx  (%eax), %xmm1
+
+// CHECK: vcvtpd2psy  %ymm2, %xmm1
+// CHECK: encoding: [0xc5,0xfd,0x5a,0xca]
+          vcvtpd2psy  %ymm2, %xmm1
+
+// CHECK: vcvtpd2psy  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfd,0x5a,0x08]
+          vcvtpd2psy  (%eax), %xmm1
+
+// CHECK: vcvtpd2dq  %ymm2, %xmm5
+// CHECK: encoding: [0xc5,0xff,0xe6,0xea]
+          vcvtpd2dq  %ymm2, %xmm5
+
+// CHECK: vcvtpd2dqy  %ymm2, %xmm1
+// CHECK: encoding: [0xc5,0xff,0xe6,0xca]
+          vcvtpd2dqy  %ymm2, %xmm1
+
+// CHECK: vcvtpd2dqy  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xff,0xe6,0x08]
+          vcvtpd2dqy  (%eax), %xmm1
+
+// CHECK: vcvtpd2dqx  %xmm1, %xmm5
+// CHECK: encoding: [0xc5,0xfb,0xe6,0xe9]
+          vcvtpd2dqx  %xmm1, %xmm5
+
+// CHECK: vcvtpd2dqx  (%eax), %xmm1
+// CHECK: encoding: [0xc5,0xfb,0xe6,0x08]
+          vcvtpd2dqx  (%eax), %xmm1
+
+// CHECK: vcmpps  $0, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x00]
+          vcmpeqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $2, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x02]
+          vcmpleps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $1, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x01]
+          vcmpltps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $4, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x04]
+          vcmpneqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $6, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x06]
+          vcmpnleps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $5, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x05]
+          vcmpnltps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $7, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x07]
+          vcmpordps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $3, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x03]
+          vcmpunordps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $0, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $2, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmpleps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $1, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $4, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $6, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnleps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $5, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $7, -4(%ebx,%ecx,8), %ymm6, %ymm2
+// CHECK: encoding: [0xc5,0xcc,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordps -4(%ebx,%ecx,8), %ymm6, %ymm2
+
+// CHECK: vcmpps  $3, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordps -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $0, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x00]
+          vcmpeqpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $2, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x02]
+          vcmplepd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $1, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x01]
+          vcmpltpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $4, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x04]
+          vcmpneqpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $6, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x06]
+          vcmpnlepd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $5, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x05]
+          vcmpnltpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $7, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x07]
+          vcmpordpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $3, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0xd9,0x03]
+          vcmpunordpd %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmppd  $0, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x00]
+          vcmpeqpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $2, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x02]
+          vcmplepd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $1, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x01]
+          vcmpltpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $4, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x04]
+          vcmpneqpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $6, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x06]
+          vcmpnlepd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $5, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x05]
+          vcmpnltpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmppd  $7, -4(%ebx,%ecx,8), %ymm6, %ymm2
+// CHECK: encoding: [0xc5,0xcd,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordpd -4(%ebx,%ecx,8), %ymm6, %ymm2
+
+// CHECK: vcmppd  $3, -4(%ebx,%ecx,8), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xc2,0x5c,0xcb,0xfc,0x03]
+          vcmpunordpd -4(%ebx,%ecx,8), %ymm2, %ymm3
+
+// CHECK: vcmpps  $8, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x08]
+          vcmpeq_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $9, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x09]
+          vcmpngeps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $10, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0a]
+          vcmpngtps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $11, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0b]
+          vcmpfalseps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $12, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0c]
+          vcmpneq_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $13, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0d]
+          vcmpgeps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $14, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0e]
+          vcmpgtps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $15, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x0f]
+          vcmptrueps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $16, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x10]
+          vcmpeq_osps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $17, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x11]
+          vcmplt_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $18, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x12]
+          vcmple_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $19, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x13]
+          vcmpunord_sps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $20, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x14]
+          vcmpneq_usps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $21, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x15]
+          vcmpnlt_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $22, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x16]
+          vcmpnle_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $23, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x17]
+          vcmpord_sps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $24, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x18]
+          vcmpeq_usps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $25, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x19]
+          vcmpnge_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $26, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1a]
+          vcmpngt_uqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $27, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1b]
+          vcmpfalse_osps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $28, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1c]
+          vcmpneq_osps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $29, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1d]
+          vcmpge_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $30, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1e]
+          vcmpgt_oqps %ymm1, %ymm2, %ymm3
+
+// CHECK: vcmpps  $31, %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xec,0xc2,0xd9,0x1f]
+          vcmptrue_usps %ymm1, %ymm2, %ymm3
+
+// CHECK: vaddsubps  %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xef,0xd0,0xd9]
+          vaddsubps  %ymm1, %ymm2, %ymm3
+
+// CHECK: vaddsubps  (%eax), %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xf7,0xd0,0x10]
+          vaddsubps  (%eax), %ymm1, %ymm2
+
+// CHECK: vaddsubpd  %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0xd0,0xd9]
+          vaddsubpd  %ymm1, %ymm2, %ymm3
+
+// CHECK: vaddsubpd  (%eax), %ymm1, %ymm2
+// CHECK: encoding: [0xc5,0xf5,0xd0,0x10]
+          vaddsubpd  (%eax), %ymm1, %ymm2
+
+// CHECK: vhaddps  %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xef,0x7c,0xd9]
+          vhaddps  %ymm1, %ymm2, %ymm3
+
+// CHECK: vhaddps  (%eax), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xef,0x7c,0x18]
+          vhaddps  (%eax), %ymm2, %ymm3
+
+// CHECK: vhaddpd  %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0x7c,0xd9]
+          vhaddpd  %ymm1, %ymm2, %ymm3
+
+// CHECK: vhaddpd  (%eax), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0x7c,0x18]
+          vhaddpd  (%eax), %ymm2, %ymm3
+
+// CHECK: vhsubps  %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xef,0x7d,0xd9]
+          vhsubps  %ymm1, %ymm2, %ymm3
+
+// CHECK: vhsubps  (%eax), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xef,0x7d,0x18]
+          vhsubps  (%eax), %ymm2, %ymm3
+
+// CHECK: vhsubpd  %ymm1, %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0x7d,0xd9]
+          vhsubpd  %ymm1, %ymm2, %ymm3
+
+// CHECK: vhsubpd  (%eax), %ymm2, %ymm3
+// CHECK: encoding: [0xc5,0xed,0x7d,0x18]
+          vhsubpd  (%eax), %ymm2, %ymm3
+
+// CHECK: vblendps  $3, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0xca,0x03]
+          vblendps  $3, %ymm2, %ymm5, %ymm1
+
+// CHECK: vblendps  $3, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x0c,0x08,0x03]
+          vblendps  $3, (%eax), %ymm5, %ymm1
+
+// CHECK: vblendpd  $3, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0xca,0x03]
+          vblendpd  $3, %ymm2, %ymm5, %ymm1
+
+// CHECK: vblendpd  $3, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x0d,0x08,0x03]
+          vblendpd  $3, (%eax), %ymm5, %ymm1
+
+// CHECK: vdpps  $3, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0xca,0x03]
+          vdpps  $3, %ymm2, %ymm5, %ymm1
+
+// CHECK: vdpps  $3, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x40,0x08,0x03]
+          vdpps  $3, (%eax), %ymm5, %ymm1
+
+// CHECK: vbroadcastf128  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x1a,0x10]
+          vbroadcastf128  (%eax), %ymm2
+
+// CHECK: vbroadcastsd  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x19,0x10]
+          vbroadcastsd  (%eax), %ymm2
+
+// CHECK: vbroadcastss  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x18,0x10]
+          vbroadcastss  (%eax), %xmm2
+
+// CHECK: vbroadcastss  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x18,0x10]
+          vbroadcastss  (%eax), %ymm2
+
+// CHECK: vinsertf128  $7, %xmm2, %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x6d,0x18,0xea,0x07]
+          vinsertf128  $7, %xmm2, %ymm2, %ymm5
+
+// CHECK: vinsertf128  $7, (%eax), %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x6d,0x18,0x28,0x07]
+          vinsertf128  $7, (%eax), %ymm2, %ymm5
+
+// CHECK: vextractf128  $7, %ymm2, %xmm2
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x19,0xd2,0x07]
+          vextractf128  $7, %ymm2, %xmm2
+
+// CHECK: vextractf128  $7, %ymm2, (%eax)
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x19,0x10,0x07]
+          vextractf128  $7, %ymm2, (%eax)
+
+// CHECK: vmaskmovpd  %xmm2, %xmm5, (%eax)
+// CHECK: encoding: [0xc4,0xe2,0x51,0x2f,0x10]
+          vmaskmovpd  %xmm2, %xmm5, (%eax)
+
+// CHECK: vmaskmovpd  %ymm2, %ymm5, (%eax)
+// CHECK: encoding: [0xc4,0xe2,0x55,0x2f,0x10]
+          vmaskmovpd  %ymm2, %ymm5, (%eax)
+
+// CHECK: vmaskmovpd  (%eax), %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x69,0x2d,0x28]
+          vmaskmovpd  (%eax), %xmm2, %xmm5
+
+// CHECK: vmaskmovpd  (%eax), %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe2,0x6d,0x2d,0x28]
+          vmaskmovpd  (%eax), %ymm2, %ymm5
+
+// CHECK: vmaskmovps  %xmm2, %xmm5, (%eax)
+// CHECK: encoding: [0xc4,0xe2,0x51,0x2e,0x10]
+          vmaskmovps  %xmm2, %xmm5, (%eax)
+
+// CHECK: vmaskmovps  %ymm2, %ymm5, (%eax)
+// CHECK: encoding: [0xc4,0xe2,0x55,0x2e,0x10]
+          vmaskmovps  %ymm2, %ymm5, (%eax)
+
+// CHECK: vmaskmovps  (%eax), %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x69,0x2c,0x28]
+          vmaskmovps  (%eax), %xmm2, %xmm5
+
+// CHECK: vmaskmovps  (%eax), %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe2,0x6d,0x2c,0x28]
+          vmaskmovps  (%eax), %ymm2, %ymm5
+
+// CHECK: vpermilps  $7, %xmm1, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x04,0xe9,0x07]
+          vpermilps  $7, %xmm1, %xmm5
+
+// CHECK: vpermilps  $7, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x04,0xcd,0x07]
+          vpermilps  $7, %ymm5, %ymm1
+
+// CHECK: vpermilps  $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x04,0x28,0x07]
+          vpermilps  $7, (%eax), %xmm5
+
+// CHECK: vpermilps  $7, (%eax), %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x04,0x28,0x07]
+          vpermilps  $7, (%eax), %ymm5
+
+// CHECK: vpermilps  %xmm1, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x0c,0xc9]
+          vpermilps  %xmm1, %xmm5, %xmm1
+
+// CHECK: vpermilps  %ymm1, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x0c,0xc9]
+          vpermilps  %ymm1, %ymm5, %ymm1
+
+// CHECK: vpermilps  (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0x0c,0x18]
+          vpermilps  (%eax), %xmm5, %xmm3
+
+// CHECK: vpermilps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x0c,0x08]
+          vpermilps  (%eax), %ymm5, %ymm1
+
+// CHECK: vpermilpd  $7, %xmm1, %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x05,0xe9,0x07]
+          vpermilpd  $7, %xmm1, %xmm5
+
+// CHECK: vpermilpd  $7, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x05,0xcd,0x07]
+          vpermilpd  $7, %ymm5, %ymm1
+
+// CHECK: vpermilpd  $7, (%eax), %xmm5
+// CHECK: encoding: [0xc4,0xe3,0x79,0x05,0x28,0x07]
+          vpermilpd  $7, (%eax), %xmm5
+
+// CHECK: vpermilpd  $7, (%eax), %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x05,0x28,0x07]
+          vpermilpd  $7, (%eax), %ymm5
+
+// CHECK: vpermilpd  %xmm1, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x0d,0xc9]
+          vpermilpd  %xmm1, %xmm5, %xmm1
+
+// CHECK: vpermilpd  %ymm1, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x0d,0xc9]
+          vpermilpd  %ymm1, %ymm5, %ymm1
+
+// CHECK: vpermilpd  (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe2,0x51,0x0d,0x18]
+          vpermilpd  (%eax), %xmm5, %xmm3
+
+// CHECK: vpermilpd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x0d,0x08]
+          vpermilpd  (%eax), %ymm5, %ymm1
+
+// CHECK: vperm2f128  $7, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x06,0xca,0x07]
+          vperm2f128  $7, %ymm2, %ymm5, %ymm1
+
+// CHECK: vperm2f128  $7, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x55,0x06,0x08,0x07]
+          vperm2f128  $7, (%eax), %ymm5, %ymm1
+
+// CHECK: vzeroall
+// CHECK: encoding: [0xc5,0xfc,0x77]
+          vzeroall
+
+// CHECK: vzeroupper
+// CHECK: encoding: [0xc5,0xf8,0x77]
+          vzeroupper
+
+// CHECK: vcvtsd2si  %xmm4, %ecx
+// CHECK: encoding: [0xc5,0xfb,0x2d,0xcc]
+          vcvtsd2si  %xmm4, %ecx
+
+// CHECK: vcvtsd2si  (%ecx), %ecx
+// CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
+          vcvtsd2si  (%ecx), %ecx
+
+// CHECK: vcvtsi2sdl  (%ebp), %xmm0, %xmm7
+// CHECK: encoding: [0xc5,0xfb,0x2a,0x7d,0x00]
+          vcvtsi2sdl  (%ebp), %xmm0, %xmm7
+
+// CHECK: vcvtsi2sdl  (%esp), %xmm0, %xmm7
+// CHECK: encoding: [0xc5,0xfb,0x2a,0x3c,0x24]
+          vcvtsi2sdl  (%esp), %xmm0, %xmm7
+
+// CHECK: vlddqu  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xff,0xf0,0x10]
+          vlddqu  (%eax), %ymm2
+
+// CHECK: vmovddup  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xff,0x12,0xea]
+          vmovddup  %ymm2, %ymm5
+
+// CHECK: vmovddup  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xff,0x12,0x10]
+          vmovddup  (%eax), %ymm2
+
+// CHECK: vmovdqa  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfd,0x6f,0xea]
+          vmovdqa  %ymm2, %ymm5
+
+// CHECK: vmovdqa  %ymm2, (%eax)
+// CHECK: encoding: [0xc5,0xfd,0x7f,0x10]
+          vmovdqa  %ymm2, (%eax)
+
+// CHECK: vmovdqa  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfd,0x6f,0x10]
+          vmovdqa  (%eax), %ymm2
+
+// CHECK: vmovdqu  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x6f,0xea]
+          vmovdqu  %ymm2, %ymm5
+
+// CHECK: vmovdqu  %ymm2, (%eax)
+// CHECK: encoding: [0xc5,0xfe,0x7f,0x10]
+          vmovdqu  %ymm2, (%eax)
+
+// CHECK: vmovdqu  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfe,0x6f,0x10]
+          vmovdqu  (%eax), %ymm2
+
+// CHECK: vmovshdup  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x16,0xea]
+          vmovshdup  %ymm2, %ymm5
+
+// CHECK: vmovshdup  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfe,0x16,0x10]
+          vmovshdup  (%eax), %ymm2
+
+// CHECK: vmovsldup  %ymm2, %ymm5
+// CHECK: encoding: [0xc5,0xfe,0x12,0xea]
+          vmovsldup  %ymm2, %ymm5
+
+// CHECK: vmovsldup  (%eax), %ymm2
+// CHECK: encoding: [0xc5,0xfe,0x12,0x10]
+          vmovsldup  (%eax), %ymm2
+
+// CHECK: vptest  %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0xea]
+          vptest  %ymm2, %ymm5
+
+// CHECK: vptest  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x17,0x10]
+          vptest  (%eax), %ymm2
+
+// CHECK: vroundpd  $7, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0xcd,0x07]
+          vroundpd  $7, %ymm5, %ymm1
+
+// CHECK: vroundpd  $7, (%eax), %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x09,0x28,0x07]
+          vroundpd  $7, (%eax), %ymm5
+
+// CHECK: vroundps  $7, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0xcd,0x07]
+          vroundps  $7, %ymm5, %ymm1
+
+// CHECK: vroundps  $7, (%eax), %ymm5
+// CHECK: encoding: [0xc4,0xe3,0x7d,0x08,0x28,0x07]
+          vroundps  $7, (%eax), %ymm5
+
+// CHECK: vshufpd  $7, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc5,0xd5,0xc6,0xca,0x07]
+          vshufpd  $7, %ymm2, %ymm5, %ymm1
+
+// CHECK: vshufpd  $7, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc5,0xd5,0xc6,0x08,0x07]
+          vshufpd  $7, (%eax), %ymm5, %ymm1
+
+// CHECK: vshufps  $7, %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc5,0xd4,0xc6,0xca,0x07]
+          vshufps  $7, %ymm2, %ymm5, %ymm1
+
+// CHECK: vshufps  $7, (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc5,0xd4,0xc6,0x08,0x07]
+          vshufps  $7, (%eax), %ymm5, %ymm1
+
+// CHECK: vtestpd  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0xea]
+          vtestpd  %xmm2, %xmm5
+
+// CHECK: vtestpd  %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0xea]
+          vtestpd  %ymm2, %ymm5
+
+// CHECK: vtestpd  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x0f,0x10]
+          vtestpd  (%eax), %xmm2
+
+// CHECK: vtestpd  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x0f,0x10]
+          vtestpd  (%eax), %ymm2
+
+// CHECK: vtestps  %xmm2, %xmm5
+// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0xea]
+          vtestps  %xmm2, %xmm5
+
+// CHECK: vtestps  %ymm2, %ymm5
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0xea]
+          vtestps  %ymm2, %ymm5
+
+// CHECK: vtestps  (%eax), %xmm2
+// CHECK: encoding: [0xc4,0xe2,0x79,0x0e,0x10]
+          vtestps  (%eax), %xmm2
+
+// CHECK: vtestps  (%eax), %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7d,0x0e,0x10]
+          vtestps  (%eax), %ymm2
+
+// CHECK: vblendvpd  %ymm0, 57005(%eax,%eiz), %ymm1, %ymm2
+// CHECK: encoding: [0xc4,0xe3,0x75,0x4b,0x94,0x20,0xad,0xde,0x00,0x00,0x00]
+          vblendvpd  %ymm0, 0xdead(%eax,%eiz), %ymm1, %ymm2
+
+
+
+// CHECK: vpclmulqdq  $17, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x11]
+          vpclmulhqhqdq %xmm2, %xmm5, %xmm1
+
+// CHECK: vpclmulqdq  $17, (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x11]
+          vpclmulhqhqdq (%eax), %xmm5, %xmm3
+
+// CHECK: vpclmulqdq  $1, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x01]
+          vpclmulhqlqdq %xmm2, %xmm5, %xmm1
+
+// CHECK: vpclmulqdq  $1, (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x01]
+          vpclmulhqlqdq (%eax), %xmm5, %xmm3
+
+// CHECK: vpclmulqdq  $16, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x10]
+          vpclmullqhqdq %xmm2, %xmm5, %xmm1
+
+// CHECK: vpclmulqdq  $16, (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x10]
+          vpclmullqhqdq (%eax), %xmm5, %xmm3
+
+// CHECK: vpclmulqdq  $0, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x00]
+          vpclmullqlqdq %xmm2, %xmm5, %xmm1
+
+// CHECK: vpclmulqdq  $0, (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x00]
+          vpclmullqlqdq (%eax), %xmm5, %xmm3
+
+// CHECK: vpclmulqdq  $17, %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0xca,0x11]
+          vpclmulqdq  $17, %xmm2, %xmm5, %xmm1
+
+// CHECK: vpclmulqdq  $17, (%eax), %xmm5, %xmm3
+// CHECK: encoding: [0xc4,0xe3,0x51,0x44,0x18,0x11]
+          vpclmulqdq  $17, (%eax), %xmm5, %xmm3
+
diff --git a/final/test/MC/X86/x86-32-coverage.s b/final/test/MC/X86/x86-32-coverage.s
new file mode 100644
index 00000000000..4ec9fcdb1ee
--- /dev/null
+++ b/final/test/MC/X86/x86-32-coverage.s
@@ -0,0 +1,19564 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s --show-encoding  | FileCheck %s
+
+// CHECK: 	movb	$127, 3735928559(%ebx,%ecx,8)
+        	movb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movw	$31438, 3735928559(%ebx,%ecx,8)
+        	movw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	movl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movl	$324478056, 3735928559(%ebx,%ecx,8)
+        	movl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movsbl	3735928559(%ebx,%ecx,8), %ecx
+        	movsbl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	movswl	3735928559(%ebx,%ecx,8), %ecx
+        	movswl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	movzbl	3735928559(%ebx,%ecx,8), %ecx
+        	movzbl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	movzwl	3735928559(%ebx,%ecx,8), %ecx
+        	movzwl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	pushl	3735928559(%ebx,%ecx,8)
+        	pushl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	popl	3735928559(%ebx,%ecx,8)
+        	popl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	lahf
+        	lahf
+
+// CHECK: 	sahf
+        	sahf
+
+// CHECK: 	addb	$254, 3735928559(%ebx,%ecx,8)
+        	addb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addb	$127, 3735928559(%ebx,%ecx,8)
+        	addb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addw	$31438, 3735928559(%ebx,%ecx,8)
+        	addw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	addl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addl	$324478056, 3735928559(%ebx,%ecx,8)
+        	addl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	incl	3735928559(%ebx,%ecx,8)
+        	incl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subb	$254, 3735928559(%ebx,%ecx,8)
+        	subb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subb	$127, 3735928559(%ebx,%ecx,8)
+        	subb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subw	$31438, 3735928559(%ebx,%ecx,8)
+        	subw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	subl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subl	$324478056, 3735928559(%ebx,%ecx,8)
+        	subl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	decl	3735928559(%ebx,%ecx,8)
+        	decl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sbbw	$31438, 3735928559(%ebx,%ecx,8)
+        	sbbw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sbbl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	sbbl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sbbl	$324478056, 3735928559(%ebx,%ecx,8)
+        	sbbl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpb	$254, 3735928559(%ebx,%ecx,8)
+        	cmpb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpb	$127, 3735928559(%ebx,%ecx,8)
+        	cmpb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpw	$31438, 3735928559(%ebx,%ecx,8)
+        	cmpw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	cmpl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpl	$324478056, 3735928559(%ebx,%ecx,8)
+        	cmpl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	testb	$127, 3735928559(%ebx,%ecx,8)
+        	testb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	testw	$31438, 3735928559(%ebx,%ecx,8)
+        	testw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	testl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	testl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	testl	$324478056, 3735928559(%ebx,%ecx,8)
+        	testl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andb	$254, 3735928559(%ebx,%ecx,8)
+        	andb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andb	$127, 3735928559(%ebx,%ecx,8)
+        	andb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andw	$31438, 3735928559(%ebx,%ecx,8)
+        	andw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	andl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andl	$324478056, 3735928559(%ebx,%ecx,8)
+        	andl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orb	$254, 3735928559(%ebx,%ecx,8)
+        	orb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orb	$127, 3735928559(%ebx,%ecx,8)
+        	orb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orw	$31438, 3735928559(%ebx,%ecx,8)
+        	orw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	orl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orl	$324478056, 3735928559(%ebx,%ecx,8)
+        	orl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorb	$254, 3735928559(%ebx,%ecx,8)
+        	xorb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorb	$127, 3735928559(%ebx,%ecx,8)
+        	xorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorw	$31438, 3735928559(%ebx,%ecx,8)
+        	xorw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	xorl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorl	$324478056, 3735928559(%ebx,%ecx,8)
+        	xorl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcb	$254, 3735928559(%ebx,%ecx,8)
+        	adcb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcb	$127, 3735928559(%ebx,%ecx,8)
+        	adcb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcw	$31438, 3735928559(%ebx,%ecx,8)
+        	adcw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	adcl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcl	$324478056, 3735928559(%ebx,%ecx,8)
+        	adcl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	negl	3735928559(%ebx,%ecx,8)
+        	negl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	notl	3735928559(%ebx,%ecx,8)
+        	notl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cbtw
+        	cbtw
+
+// CHECK: 	cwtl
+        	cwtl
+
+// CHECK: 	cwtd
+        	cwtd
+
+// CHECK: 	cltd
+        	cltd
+
+// CHECK: 	mull	3735928559(%ebx,%ecx,8)
+        	mull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	imull	3735928559(%ebx,%ecx,8)
+        	imull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	divl	3735928559(%ebx,%ecx,8)
+        	divl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	idivl	3735928559(%ebx,%ecx,8)
+        	idivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	roll	$0, 3735928559(%ebx,%ecx,8)
+        	roll	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rolb	$127, 3735928559(%ebx,%ecx,8)
+        	rolb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	roll	3735928559(%ebx,%ecx,8)
+        	roll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rorl	$0, 3735928559(%ebx,%ecx,8)
+        	rorl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rorb	$127, 3735928559(%ebx,%ecx,8)
+        	rorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rorl	3735928559(%ebx,%ecx,8)
+        	rorl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shll	$0, 3735928559(%ebx,%ecx,8)
+        	shll	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shlb	$127, 3735928559(%ebx,%ecx,8)
+        	shlb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shll	3735928559(%ebx,%ecx,8)
+        	shll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shrl	$0, 3735928559(%ebx,%ecx,8)
+        	shrl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shrb	$127, 3735928559(%ebx,%ecx,8)
+        	shrb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shrl	3735928559(%ebx,%ecx,8)
+        	shrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sarl	$0, 3735928559(%ebx,%ecx,8)
+        	sarl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sarb	$127, 3735928559(%ebx,%ecx,8)
+        	sarb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sarl	3735928559(%ebx,%ecx,8)
+        	sarl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	calll	*%ecx
+        	call	*%ecx
+
+// CHECK: 	calll	*3735928559(%ebx,%ecx,8)
+        	call	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	calll	*3735928559(%ebx,%ecx,8)
+        	call	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	jmpl	*3735928559(%ebx,%ecx,8)
+        	jmp	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	jmpl	*3735928559(%ebx,%ecx,8)
+        	jmp	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	ljmpl	*3735928559(%ebx,%ecx,8)
+        	ljmpl	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	lret
+        	lret
+
+// CHECK: 	leave
+        	leave
+
+// CHECK: 	leave
+        	leavel
+
+// CHECK: 	seto	%bl
+        	seto	%bl
+
+// CHECK: 	seto	3735928559(%ebx,%ecx,8)
+        	seto	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setno	%bl
+        	setno	%bl
+
+// CHECK: 	setno	3735928559(%ebx,%ecx,8)
+        	setno	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setb	%bl
+        	setb	%bl
+
+// CHECK: 	setb	3735928559(%ebx,%ecx,8)
+        	setb	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setae	%bl
+        	setae	%bl
+
+// CHECK: 	setae	3735928559(%ebx,%ecx,8)
+        	setae	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sete	%bl
+        	sete	%bl
+
+// CHECK: 	sete	3735928559(%ebx,%ecx,8)
+        	sete	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setne	%bl
+        	setne	%bl
+
+// CHECK: 	setne	3735928559(%ebx,%ecx,8)
+        	setne	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setbe	%bl
+        	setbe	%bl
+
+// CHECK: 	setbe	3735928559(%ebx,%ecx,8)
+        	setbe	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	seta	%bl
+        	seta	%bl
+
+// CHECK: 	seta	3735928559(%ebx,%ecx,8)
+        	seta	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sets	%bl
+        	sets	%bl
+
+// CHECK: 	sets	3735928559(%ebx,%ecx,8)
+        	sets	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setns	%bl
+        	setns	%bl
+
+// CHECK: 	setns	3735928559(%ebx,%ecx,8)
+        	setns	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setp	%bl
+        	setp	%bl
+
+// CHECK: 	setp	3735928559(%ebx,%ecx,8)
+        	setp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setnp	%bl
+        	setnp	%bl
+
+// CHECK: 	setnp	3735928559(%ebx,%ecx,8)
+        	setnp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setl	%bl
+        	setl	%bl
+
+// CHECK: 	setl	3735928559(%ebx,%ecx,8)
+        	setl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setge	%bl
+        	setge	%bl
+
+// CHECK: 	setge	3735928559(%ebx,%ecx,8)
+        	setge	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setle	%bl
+        	setle	%bl
+
+// CHECK: 	setle	3735928559(%ebx,%ecx,8)
+        	setle	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setg	%bl
+        	setg	%bl
+
+// CHECK: 	setg	3735928559(%ebx,%ecx,8)
+        	setg	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	nopl	3735928559(%ebx,%ecx,8)
+        	nopl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	nop
+        	nop
+
+// CHECK: 	fldl	3735928559(%ebx,%ecx,8)
+        	fldl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fildl	3735928559(%ebx,%ecx,8)
+        	fildl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fildll	3735928559(%ebx,%ecx,8)
+        	fildll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fldt	3735928559(%ebx,%ecx,8)
+        	fldt	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fbld	3735928559(%ebx,%ecx,8)
+        	fbld	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fstl	3735928559(%ebx,%ecx,8)
+        	fstl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fistl	3735928559(%ebx,%ecx,8)
+        	fistl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fstpl	3735928559(%ebx,%ecx,8)
+        	fstpl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fistpl	3735928559(%ebx,%ecx,8)
+        	fistpl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fistpll	3735928559(%ebx,%ecx,8)
+        	fistpll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fstpt	3735928559(%ebx,%ecx,8)
+        	fstpt	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fbstp	3735928559(%ebx,%ecx,8)
+        	fbstp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	ficoml	3735928559(%ebx,%ecx,8)
+        	ficoml	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	ficompl	3735928559(%ebx,%ecx,8)
+        	ficompl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fucompp
+        	fucompp
+
+// CHECK: 	ftst
+        	ftst
+
+// CHECK: 	fld1
+        	fld1
+
+// CHECK: 	fldz
+        	fldz
+
+// CHECK: 	faddl	3735928559(%ebx,%ecx,8)
+        	faddl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fiaddl	3735928559(%ebx,%ecx,8)
+        	fiaddl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fsubl	3735928559(%ebx,%ecx,8)
+        	fsubl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fisubl	3735928559(%ebx,%ecx,8)
+        	fisubl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fsubrl	3735928559(%ebx,%ecx,8)
+        	fsubrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fisubrl	3735928559(%ebx,%ecx,8)
+        	fisubrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fmull	3735928559(%ebx,%ecx,8)
+        	fmull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fimull	3735928559(%ebx,%ecx,8)
+        	fimull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fdivl	3735928559(%ebx,%ecx,8)
+        	fdivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fidivl	3735928559(%ebx,%ecx,8)
+        	fidivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fdivrl	3735928559(%ebx,%ecx,8)
+        	fdivrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fidivrl	3735928559(%ebx,%ecx,8)
+        	fidivrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fsqrt
+        	fsqrt
+
+// CHECK: 	fsin
+        	fsin
+
+// CHECK: 	fcos
+        	fcos
+
+// CHECK: 	fchs
+        	fchs
+
+// CHECK: 	fabs
+        	fabs
+
+// CHECK: 	fldcw	3735928559(%ebx,%ecx,8)
+        	fldcw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fnstcw	3735928559(%ebx,%ecx,8)
+        	fnstcw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rdtsc
+        	rdtsc
+
+// CHECK: 	sysenter
+        	sysenter
+
+// CHECK: 	sysexit
+        	sysexit
+
+// CHECK: 	ud2
+        	ud2
+
+// CHECK: 	movnti	%ecx, 3735928559(%ebx,%ecx,8)
+        	movnti	%ecx,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	clflush	3735928559(%ebx,%ecx,8)
+        	clflush	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	emms
+        	emms
+
+// CHECK: 	movd	%ecx, %mm3
+        	movd	%ecx,%mm3
+
+// CHECK: 	movd	3735928559(%ebx,%ecx,8), %mm3
+        	movd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	movd	%ecx, %xmm5
+        	movd	%ecx,%xmm5
+
+// CHECK: 	movd	3735928559(%ebx,%ecx,8), %xmm5
+        	movd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movd	%xmm5, %ecx
+        	movd	%xmm5,%ecx
+
+// CHECK: 	movd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movq	3735928559(%ebx,%ecx,8), %mm3
+        	movq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	movq	%mm3, %mm3
+        	movq	%mm3,%mm3
+
+// CHECK: 	movq	%mm3, %mm3
+        	movq	%mm3,%mm3
+
+// CHECK: 	movq	%xmm5, %xmm5
+        	movq	%xmm5,%xmm5
+
+// CHECK: 	movq	%xmm5, %xmm5
+        	movq	%xmm5,%xmm5
+
+// CHECK: 	packssdw	%mm3, %mm3
+        	packssdw	%mm3,%mm3
+
+// CHECK: 	packssdw	%xmm5, %xmm5
+        	packssdw	%xmm5,%xmm5
+
+// CHECK: 	packsswb	%mm3, %mm3
+        	packsswb	%mm3,%mm3
+
+// CHECK: 	packsswb	%xmm5, %xmm5
+        	packsswb	%xmm5,%xmm5
+
+// CHECK: 	packuswb	%mm3, %mm3
+        	packuswb	%mm3,%mm3
+
+// CHECK: 	packuswb	%xmm5, %xmm5
+        	packuswb	%xmm5,%xmm5
+
+// CHECK: 	paddb	%mm3, %mm3
+        	paddb	%mm3,%mm3
+
+// CHECK: 	paddb	%xmm5, %xmm5
+        	paddb	%xmm5,%xmm5
+
+// CHECK: 	paddw	%mm3, %mm3
+        	paddw	%mm3,%mm3
+
+// CHECK: 	paddw	%xmm5, %xmm5
+        	paddw	%xmm5,%xmm5
+
+// CHECK: 	paddd	%mm3, %mm3
+        	paddd	%mm3,%mm3
+
+// CHECK: 	paddd	%xmm5, %xmm5
+        	paddd	%xmm5,%xmm5
+
+// CHECK: 	paddq	%mm3, %mm3
+        	paddq	%mm3,%mm3
+
+// CHECK: 	paddq	%xmm5, %xmm5
+        	paddq	%xmm5,%xmm5
+
+// CHECK: 	paddsb	%mm3, %mm3
+        	paddsb	%mm3,%mm3
+
+// CHECK: 	paddsb	%xmm5, %xmm5
+        	paddsb	%xmm5,%xmm5
+
+// CHECK: 	paddsw	%mm3, %mm3
+        	paddsw	%mm3,%mm3
+
+// CHECK: 	paddsw	%xmm5, %xmm5
+        	paddsw	%xmm5,%xmm5
+
+// CHECK: 	paddusb	%mm3, %mm3
+        	paddusb	%mm3,%mm3
+
+// CHECK: 	paddusb	%xmm5, %xmm5
+        	paddusb	%xmm5,%xmm5
+
+// CHECK: 	paddusw	%mm3, %mm3
+        	paddusw	%mm3,%mm3
+
+// CHECK: 	paddusw	%xmm5, %xmm5
+        	paddusw	%xmm5,%xmm5
+
+// CHECK: 	pand	%mm3, %mm3
+        	pand	%mm3,%mm3
+
+// CHECK: 	pand	%xmm5, %xmm5
+        	pand	%xmm5,%xmm5
+
+// CHECK: 	pandn	%mm3, %mm3
+        	pandn	%mm3,%mm3
+
+// CHECK: 	pandn	%xmm5, %xmm5
+        	pandn	%xmm5,%xmm5
+
+// CHECK: 	pcmpeqb	%mm3, %mm3
+        	pcmpeqb	%mm3,%mm3
+
+// CHECK: 	pcmpeqb	%xmm5, %xmm5
+        	pcmpeqb	%xmm5,%xmm5
+
+// CHECK: 	pcmpeqw	%mm3, %mm3
+        	pcmpeqw	%mm3,%mm3
+
+// CHECK: 	pcmpeqw	%xmm5, %xmm5
+        	pcmpeqw	%xmm5,%xmm5
+
+// CHECK: 	pcmpeqd	%mm3, %mm3
+        	pcmpeqd	%mm3,%mm3
+
+// CHECK: 	pcmpeqd	%xmm5, %xmm5
+        	pcmpeqd	%xmm5,%xmm5
+
+// CHECK: 	pcmpgtb	%mm3, %mm3
+        	pcmpgtb	%mm3,%mm3
+
+// CHECK: 	pcmpgtb	%xmm5, %xmm5
+        	pcmpgtb	%xmm5,%xmm5
+
+// CHECK: 	pcmpgtw	%mm3, %mm3
+        	pcmpgtw	%mm3,%mm3
+
+// CHECK: 	pcmpgtw	%xmm5, %xmm5
+        	pcmpgtw	%xmm5,%xmm5
+
+// CHECK: 	pcmpgtd	%mm3, %mm3
+        	pcmpgtd	%mm3,%mm3
+
+// CHECK: 	pcmpgtd	%xmm5, %xmm5
+        	pcmpgtd	%xmm5,%xmm5
+
+// CHECK: 	pmaddwd	%mm3, %mm3
+        	pmaddwd	%mm3,%mm3
+
+// CHECK: 	pmaddwd	%xmm5, %xmm5
+        	pmaddwd	%xmm5,%xmm5
+
+// CHECK: 	pmulhw	%mm3, %mm3
+        	pmulhw	%mm3,%mm3
+
+// CHECK: 	pmulhw	%xmm5, %xmm5
+        	pmulhw	%xmm5,%xmm5
+
+// CHECK: 	pmullw	%mm3, %mm3
+        	pmullw	%mm3,%mm3
+
+// CHECK: 	pmullw	%xmm5, %xmm5
+        	pmullw	%xmm5,%xmm5
+
+// CHECK: 	por	%mm3, %mm3
+        	por	%mm3,%mm3
+
+// CHECK: 	por	%xmm5, %xmm5
+        	por	%xmm5,%xmm5
+
+// CHECK: 	psllw	%mm3, %mm3
+        	psllw	%mm3,%mm3
+
+// CHECK: 	psllw	%xmm5, %xmm5
+        	psllw	%xmm5,%xmm5
+
+// CHECK: 	psllw	$127, %mm3
+        	psllw	$0x7f,%mm3
+
+// CHECK: 	psllw	$127, %xmm5
+        	psllw	$0x7f,%xmm5
+
+// CHECK: 	pslld	%mm3, %mm3
+        	pslld	%mm3,%mm3
+
+// CHECK: 	pslld	%xmm5, %xmm5
+        	pslld	%xmm5,%xmm5
+
+// CHECK: 	pslld	$127, %mm3
+        	pslld	$0x7f,%mm3
+
+// CHECK: 	pslld	$127, %xmm5
+        	pslld	$0x7f,%xmm5
+
+// CHECK: 	psllq	%mm3, %mm3
+        	psllq	%mm3,%mm3
+
+// CHECK: 	psllq	%xmm5, %xmm5
+        	psllq	%xmm5,%xmm5
+
+// CHECK: 	psllq	$127, %mm3
+        	psllq	$0x7f,%mm3
+
+// CHECK: 	psllq	$127, %xmm5
+        	psllq	$0x7f,%xmm5
+
+// CHECK: 	psraw	%mm3, %mm3
+        	psraw	%mm3,%mm3
+
+// CHECK: 	psraw	%xmm5, %xmm5
+        	psraw	%xmm5,%xmm5
+
+// CHECK: 	psraw	$127, %mm3
+        	psraw	$0x7f,%mm3
+
+// CHECK: 	psraw	$127, %xmm5
+        	psraw	$0x7f,%xmm5
+
+// CHECK: 	psrad	%mm3, %mm3
+        	psrad	%mm3,%mm3
+
+// CHECK: 	psrad	%xmm5, %xmm5
+        	psrad	%xmm5,%xmm5
+
+// CHECK: 	psrad	$127, %mm3
+        	psrad	$0x7f,%mm3
+
+// CHECK: 	psrad	$127, %xmm5
+        	psrad	$0x7f,%xmm5
+
+// CHECK: 	psrlw	%mm3, %mm3
+        	psrlw	%mm3,%mm3
+
+// CHECK: 	psrlw	%xmm5, %xmm5
+        	psrlw	%xmm5,%xmm5
+
+// CHECK: 	psrlw	$127, %mm3
+        	psrlw	$0x7f,%mm3
+
+// CHECK: 	psrlw	$127, %xmm5
+        	psrlw	$0x7f,%xmm5
+
+// CHECK: 	psrld	%mm3, %mm3
+        	psrld	%mm3,%mm3
+
+// CHECK: 	psrld	%xmm5, %xmm5
+        	psrld	%xmm5,%xmm5
+
+// CHECK: 	psrld	$127, %mm3
+        	psrld	$0x7f,%mm3
+
+// CHECK: 	psrld	$127, %xmm5
+        	psrld	$0x7f,%xmm5
+
+// CHECK: 	psrlq	%mm3, %mm3
+        	psrlq	%mm3,%mm3
+
+// CHECK: 	psrlq	%xmm5, %xmm5
+        	psrlq	%xmm5,%xmm5
+
+// CHECK: 	psrlq	$127, %mm3
+        	psrlq	$0x7f,%mm3
+
+// CHECK: 	psrlq	$127, %xmm5
+        	psrlq	$0x7f,%xmm5
+
+// CHECK: 	psubb	%mm3, %mm3
+        	psubb	%mm3,%mm3
+
+// CHECK: 	psubb	%xmm5, %xmm5
+        	psubb	%xmm5,%xmm5
+
+// CHECK: 	psubw	%mm3, %mm3
+        	psubw	%mm3,%mm3
+
+// CHECK: 	psubw	%xmm5, %xmm5
+        	psubw	%xmm5,%xmm5
+
+// CHECK: 	psubd	%mm3, %mm3
+        	psubd	%mm3,%mm3
+
+// CHECK: 	psubd	%xmm5, %xmm5
+        	psubd	%xmm5,%xmm5
+
+// CHECK: 	psubq	%mm3, %mm3
+        	psubq	%mm3,%mm3
+
+// CHECK: 	psubq	%xmm5, %xmm5
+        	psubq	%xmm5,%xmm5
+
+// CHECK: 	psubsb	%mm3, %mm3
+        	psubsb	%mm3,%mm3
+
+// CHECK: 	psubsb	%xmm5, %xmm5
+        	psubsb	%xmm5,%xmm5
+
+// CHECK: 	psubsw	%mm3, %mm3
+        	psubsw	%mm3,%mm3
+
+// CHECK: 	psubsw	%xmm5, %xmm5
+        	psubsw	%xmm5,%xmm5
+
+// CHECK: 	psubusb	%mm3, %mm3
+        	psubusb	%mm3,%mm3
+
+// CHECK: 	psubusb	%xmm5, %xmm5
+        	psubusb	%xmm5,%xmm5
+
+// CHECK: 	psubusw	%mm3, %mm3
+        	psubusw	%mm3,%mm3
+
+// CHECK: 	psubusw	%xmm5, %xmm5
+        	psubusw	%xmm5,%xmm5
+
+// CHECK: 	punpckhbw	%mm3, %mm3
+        	punpckhbw	%mm3,%mm3
+
+// CHECK: 	punpckhbw	%xmm5, %xmm5
+        	punpckhbw	%xmm5,%xmm5
+
+// CHECK: 	punpckhwd	%mm3, %mm3
+        	punpckhwd	%mm3,%mm3
+
+// CHECK: 	punpckhwd	%xmm5, %xmm5
+        	punpckhwd	%xmm5,%xmm5
+
+// CHECK: 	punpckhdq	%mm3, %mm3
+        	punpckhdq	%mm3,%mm3
+
+// CHECK: 	punpckhdq	%xmm5, %xmm5
+        	punpckhdq	%xmm5,%xmm5
+
+// CHECK: 	punpcklbw	%mm3, %mm3
+        	punpcklbw	%mm3,%mm3
+
+// CHECK: 	punpcklbw	%xmm5, %xmm5
+        	punpcklbw	%xmm5,%xmm5
+
+// CHECK: 	punpcklwd	%mm3, %mm3
+        	punpcklwd	%mm3,%mm3
+
+// CHECK: 	punpcklwd	%xmm5, %xmm5
+        	punpcklwd	%xmm5,%xmm5
+
+// CHECK: 	punpckldq	%mm3, %mm3
+        	punpckldq	%mm3,%mm3
+
+// CHECK: 	punpckldq	%xmm5, %xmm5
+        	punpckldq	%xmm5,%xmm5
+
+// CHECK: 	pxor	%mm3, %mm3
+        	pxor	%mm3,%mm3
+
+// CHECK: 	pxor	%xmm5, %xmm5
+        	pxor	%xmm5,%xmm5
+
+// CHECK: 	addps	%xmm5, %xmm5
+        	addps	%xmm5,%xmm5
+
+// CHECK: 	addss	%xmm5, %xmm5
+        	addss	%xmm5,%xmm5
+
+// CHECK: 	andnps	%xmm5, %xmm5
+        	andnps	%xmm5,%xmm5
+
+// CHECK: 	andps	%xmm5, %xmm5
+        	andps	%xmm5,%xmm5
+
+// CHECK: 	cvtpi2ps	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtpi2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtpi2ps	%mm3, %xmm5
+        	cvtpi2ps	%mm3,%xmm5
+
+// CHECK: 	cvtps2pi	3735928559(%ebx,%ecx,8), %mm3
+        	cvtps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	cvtps2pi	%xmm5, %mm3
+        	cvtps2pi	%xmm5,%mm3
+
+// CHECK: 	cvtsi2ss	%ecx, %xmm5
+        	cvtsi2ss	%ecx,%xmm5
+
+// CHECK: 	cvtsi2ss	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtsi2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvttps2pi	3735928559(%ebx,%ecx,8), %mm3
+        	cvttps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	cvttps2pi	%xmm5, %mm3
+        	cvttps2pi	%xmm5,%mm3
+
+// CHECK: 	cvttss2si	3735928559(%ebx,%ecx,8), %ecx
+        	cvttss2si	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	cvttss2si	%xmm5, %ecx
+        	cvttss2si	%xmm5,%ecx
+
+// CHECK: 	divps	%xmm5, %xmm5
+        	divps	%xmm5,%xmm5
+
+// CHECK: 	divss	%xmm5, %xmm5
+        	divss	%xmm5,%xmm5
+
+// CHECK: 	ldmxcsr	3735928559(%ebx,%ecx,8)
+        	ldmxcsr	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	maskmovq	%mm3, %mm3
+        	maskmovq	%mm3,%mm3
+
+// CHECK: 	maxps	%xmm5, %xmm5
+        	maxps	%xmm5,%xmm5
+
+// CHECK: 	maxss	%xmm5, %xmm5
+        	maxss	%xmm5,%xmm5
+
+// CHECK: 	minps	%xmm5, %xmm5
+        	minps	%xmm5,%xmm5
+
+// CHECK: 	minss	%xmm5, %xmm5
+        	minss	%xmm5,%xmm5
+
+// CHECK: 	movaps	3735928559(%ebx,%ecx,8), %xmm5
+        	movaps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movaps	%xmm5, %xmm5
+        	movaps	%xmm5,%xmm5
+
+// CHECK: 	movaps	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movaps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movaps	%xmm5, %xmm5
+        	movaps	%xmm5,%xmm5
+
+// CHECK: 	movhlps	%xmm5, %xmm5
+        	movhlps	%xmm5,%xmm5
+
+// CHECK: 	movhps	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movhps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movlhps	%xmm5, %xmm5
+        	movlhps	%xmm5,%xmm5
+
+// CHECK: 	movlps	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movlps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movmskps	%xmm5, %ecx
+        	movmskps	%xmm5,%ecx
+
+// CHECK: 	movntps	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movntps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movntq	%mm3, 3735928559(%ebx,%ecx,8)
+        	movntq	%mm3,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movntdq	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movntdq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movss	3735928559(%ebx,%ecx,8), %xmm5
+        	movss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movss	%xmm5, %xmm5
+        	movss	%xmm5,%xmm5
+
+// CHECK: 	movss	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movss	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movss	%xmm5, %xmm5
+        	movss	%xmm5,%xmm5
+
+// CHECK: 	movups	3735928559(%ebx,%ecx,8), %xmm5
+        	movups	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movups	%xmm5, %xmm5
+        	movups	%xmm5,%xmm5
+
+// CHECK: 	movups	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movups	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movups	%xmm5, %xmm5
+        	movups	%xmm5,%xmm5
+
+// CHECK: 	mulps	%xmm5, %xmm5
+        	mulps	%xmm5,%xmm5
+
+// CHECK: 	mulss	%xmm5, %xmm5
+        	mulss	%xmm5,%xmm5
+
+// CHECK: 	orps	%xmm5, %xmm5
+        	orps	%xmm5,%xmm5
+
+// CHECK: 	pavgb	%mm3, %mm3
+        	pavgb	%mm3,%mm3
+
+// CHECK: 	pavgb	%xmm5, %xmm5
+        	pavgb	%xmm5,%xmm5
+
+// CHECK: 	pavgw	%mm3, %mm3
+        	pavgw	%mm3,%mm3
+
+// CHECK: 	pavgw	%xmm5, %xmm5
+        	pavgw	%xmm5,%xmm5
+
+// CHECK: 	pmaxsw	%mm3, %mm3
+        	pmaxsw	%mm3,%mm3
+
+// CHECK: 	pmaxsw	%xmm5, %xmm5
+        	pmaxsw	%xmm5,%xmm5
+
+// CHECK: 	pmaxub	%mm3, %mm3
+        	pmaxub	%mm3,%mm3
+
+// CHECK: 	pmaxub	%xmm5, %xmm5
+        	pmaxub	%xmm5,%xmm5
+
+// CHECK: 	pminsw	%mm3, %mm3
+        	pminsw	%mm3,%mm3
+
+// CHECK: 	pminsw	%xmm5, %xmm5
+        	pminsw	%xmm5,%xmm5
+
+// CHECK: 	pminub	%mm3, %mm3
+        	pminub	%mm3,%mm3
+
+// CHECK: 	pminub	%xmm5, %xmm5
+        	pminub	%xmm5,%xmm5
+
+// CHECK: 	pmovmskb	%mm3, %ecx
+        	pmovmskb	%mm3,%ecx
+
+// CHECK: 	pmovmskb	%xmm5, %ecx
+        	pmovmskb	%xmm5,%ecx
+
+// CHECK: 	pmulhuw	%mm3, %mm3
+        	pmulhuw	%mm3,%mm3
+
+// CHECK: 	pmulhuw	%xmm5, %xmm5
+        	pmulhuw	%xmm5,%xmm5
+
+// CHECK: 	prefetchnta	3735928559(%ebx,%ecx,8)
+        	prefetchnta	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	prefetcht0	3735928559(%ebx,%ecx,8)
+        	prefetcht0	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	prefetcht1	3735928559(%ebx,%ecx,8)
+        	prefetcht1	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	prefetcht2	3735928559(%ebx,%ecx,8)
+        	prefetcht2	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	psadbw	%mm3, %mm3
+        	psadbw	%mm3,%mm3
+
+// CHECK: 	psadbw	%xmm5, %xmm5
+        	psadbw	%xmm5,%xmm5
+
+// CHECK: 	rcpps	3735928559(%ebx,%ecx,8), %xmm5
+        	rcpps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	rcpps	%xmm5, %xmm5
+        	rcpps	%xmm5,%xmm5
+
+// CHECK: 	rcpss	3735928559(%ebx,%ecx,8), %xmm5
+        	rcpss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	rcpss	%xmm5, %xmm5
+        	rcpss	%xmm5,%xmm5
+
+// CHECK: 	rsqrtps	3735928559(%ebx,%ecx,8), %xmm5
+        	rsqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	rsqrtps	%xmm5, %xmm5
+        	rsqrtps	%xmm5,%xmm5
+
+// CHECK: 	rsqrtss	3735928559(%ebx,%ecx,8), %xmm5
+        	rsqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	rsqrtss	%xmm5, %xmm5
+        	rsqrtss	%xmm5,%xmm5
+
+// CHECK: 	sqrtps	3735928559(%ebx,%ecx,8), %xmm5
+        	sqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	sqrtps	%xmm5, %xmm5
+        	sqrtps	%xmm5,%xmm5
+
+// CHECK: 	sqrtss	3735928559(%ebx,%ecx,8), %xmm5
+        	sqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	sqrtss	%xmm5, %xmm5
+        	sqrtss	%xmm5,%xmm5
+
+// CHECK: 	stmxcsr	3735928559(%ebx,%ecx,8)
+        	stmxcsr	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subps	%xmm5, %xmm5
+        	subps	%xmm5,%xmm5
+
+// CHECK: 	subss	%xmm5, %xmm5
+        	subss	%xmm5,%xmm5
+
+// CHECK: 	ucomiss	3735928559(%ebx,%ecx,8), %xmm5
+        	ucomiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	ucomiss	%xmm5, %xmm5
+        	ucomiss	%xmm5,%xmm5
+
+// CHECK: 	unpckhps	%xmm5, %xmm5
+        	unpckhps	%xmm5,%xmm5
+
+// CHECK: 	unpcklps	%xmm5, %xmm5
+        	unpcklps	%xmm5,%xmm5
+
+// CHECK: 	xorps	%xmm5, %xmm5
+        	xorps	%xmm5,%xmm5
+
+// CHECK: 	addpd	%xmm5, %xmm5
+        	addpd	%xmm5,%xmm5
+
+// CHECK: 	addsd	%xmm5, %xmm5
+        	addsd	%xmm5,%xmm5
+
+// CHECK: 	andnpd	%xmm5, %xmm5
+        	andnpd	%xmm5,%xmm5
+
+// CHECK: 	andpd	%xmm5, %xmm5
+        	andpd	%xmm5,%xmm5
+
+// CHECK: 	comisd	3735928559(%ebx,%ecx,8), %xmm5
+        	comisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	comisd	%xmm5, %xmm5
+        	comisd	%xmm5,%xmm5
+
+// CHECK: 	cvtpi2pd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtpi2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtpi2pd	%mm3, %xmm5
+        	cvtpi2pd	%mm3,%xmm5
+
+// CHECK: 	cvtsi2sd	%ecx, %xmm5
+        	cvtsi2sd	%ecx,%xmm5
+
+// CHECK: 	cvtsi2sd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtsi2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	divpd	%xmm5, %xmm5
+        	divpd	%xmm5,%xmm5
+
+// CHECK: 	divsd	%xmm5, %xmm5
+        	divsd	%xmm5,%xmm5
+
+// CHECK: 	maxpd	%xmm5, %xmm5
+        	maxpd	%xmm5,%xmm5
+
+// CHECK: 	maxsd	%xmm5, %xmm5
+        	maxsd	%xmm5,%xmm5
+
+// CHECK: 	minpd	%xmm5, %xmm5
+        	minpd	%xmm5,%xmm5
+
+// CHECK: 	minsd	%xmm5, %xmm5
+        	minsd	%xmm5,%xmm5
+
+// CHECK: 	movapd	3735928559(%ebx,%ecx,8), %xmm5
+        	movapd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movapd	%xmm5, %xmm5
+        	movapd	%xmm5,%xmm5
+
+// CHECK: 	movapd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movapd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movapd	%xmm5, %xmm5
+        	movapd	%xmm5,%xmm5
+
+// CHECK: 	movhpd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movhpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movlpd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movlpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movmskpd	%xmm5, %ecx
+        	movmskpd	%xmm5,%ecx
+
+// CHECK: 	movntpd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movntpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movsd	3735928559(%ebx,%ecx,8), %xmm5
+        	movsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movsd	%xmm5, %xmm5
+        	movsd	%xmm5,%xmm5
+
+// CHECK: 	movsd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movsd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movsd	%xmm5, %xmm5
+        	movsd	%xmm5,%xmm5
+
+// CHECK: 	movupd	3735928559(%ebx,%ecx,8), %xmm5
+        	movupd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movupd	%xmm5, %xmm5
+        	movupd	%xmm5,%xmm5
+
+// CHECK: 	movupd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movupd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movupd	%xmm5, %xmm5
+        	movupd	%xmm5,%xmm5
+
+// CHECK: 	mulpd	%xmm5, %xmm5
+        	mulpd	%xmm5,%xmm5
+
+// CHECK: 	mulsd	%xmm5, %xmm5
+        	mulsd	%xmm5,%xmm5
+
+// CHECK: 	orpd	%xmm5, %xmm5
+        	orpd	%xmm5,%xmm5
+
+// CHECK: 	sqrtpd	3735928559(%ebx,%ecx,8), %xmm5
+        	sqrtpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	sqrtpd	%xmm5, %xmm5
+        	sqrtpd	%xmm5,%xmm5
+
+// CHECK: 	sqrtsd	3735928559(%ebx,%ecx,8), %xmm5
+        	sqrtsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	sqrtsd	%xmm5, %xmm5
+        	sqrtsd	%xmm5,%xmm5
+
+// CHECK: 	subpd	%xmm5, %xmm5
+        	subpd	%xmm5,%xmm5
+
+// CHECK: 	subsd	%xmm5, %xmm5
+        	subsd	%xmm5,%xmm5
+
+// CHECK: 	ucomisd	3735928559(%ebx,%ecx,8), %xmm5
+        	ucomisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	ucomisd	%xmm5, %xmm5
+        	ucomisd	%xmm5,%xmm5
+
+// CHECK: 	unpckhpd	%xmm5, %xmm5
+        	unpckhpd	%xmm5,%xmm5
+
+// CHECK: 	unpcklpd	%xmm5, %xmm5
+        	unpcklpd	%xmm5,%xmm5
+
+// CHECK: 	xorpd	%xmm5, %xmm5
+        	xorpd	%xmm5,%xmm5
+
+// CHECK: 	cvtdq2pd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtdq2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtdq2pd	%xmm5, %xmm5
+        	cvtdq2pd	%xmm5,%xmm5
+
+// CHECK: 	cvtpd2dq	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtpd2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtpd2dq	%xmm5, %xmm5
+        	cvtpd2dq	%xmm5,%xmm5
+
+// CHECK: 	cvtdq2ps	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtdq2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtdq2ps	%xmm5, %xmm5
+        	cvtdq2ps	%xmm5,%xmm5
+
+// CHECK: 	cvtpd2pi	3735928559(%ebx,%ecx,8), %mm3
+        	cvtpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	cvtpd2pi	%xmm5, %mm3
+        	cvtpd2pi	%xmm5,%mm3
+
+// CHECK: 	cvtps2dq	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtps2dq	%xmm5, %xmm5
+        	cvtps2dq	%xmm5,%xmm5
+
+// CHECK: 	cvtsd2ss	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtsd2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtsd2ss	%xmm5, %xmm5
+        	cvtsd2ss	%xmm5,%xmm5
+
+// CHECK: 	cvtss2sd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtss2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtss2sd	%xmm5, %xmm5
+        	cvtss2sd	%xmm5,%xmm5
+
+// CHECK: 	cvttpd2pi	3735928559(%ebx,%ecx,8), %mm3
+        	cvttpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	cvttpd2pi	%xmm5, %mm3
+        	cvttpd2pi	%xmm5,%mm3
+
+// CHECK: 	cvttsd2si	3735928559(%ebx,%ecx,8), %ecx
+        	cvttsd2si	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	cvttsd2si	%xmm5, %ecx
+        	cvttsd2si	%xmm5,%ecx
+
+// CHECK: 	maskmovdqu	%xmm5, %xmm5
+        	maskmovdqu	%xmm5,%xmm5
+
+// CHECK: 	movdqa	3735928559(%ebx,%ecx,8), %xmm5
+        	movdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movdqa	%xmm5, %xmm5
+        	movdqa	%xmm5,%xmm5
+
+// CHECK: 	movdqa	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movdqa	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movdqa	%xmm5, %xmm5
+        	movdqa	%xmm5,%xmm5
+
+// CHECK: 	movdqu	3735928559(%ebx,%ecx,8), %xmm5
+        	movdqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movdqu	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movdqu	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movdq2q	%xmm5, %mm3
+        	movdq2q	%xmm5,%mm3
+
+// CHECK: 	movq2dq	%mm3, %xmm5
+        	movq2dq	%mm3,%xmm5
+
+// CHECK: 	pmuludq	%mm3, %mm3
+        	pmuludq	%mm3,%mm3
+
+// CHECK: 	pmuludq	%xmm5, %xmm5
+        	pmuludq	%xmm5,%xmm5
+
+// CHECK: 	pslldq	$127, %xmm5
+        	pslldq	$0x7f,%xmm5
+
+// CHECK: 	psrldq	$127, %xmm5
+        	psrldq	$0x7f,%xmm5
+
+// CHECK: 	punpckhqdq	%xmm5, %xmm5
+        	punpckhqdq	%xmm5,%xmm5
+
+// CHECK: 	punpcklqdq	%xmm5, %xmm5
+        	punpcklqdq	%xmm5,%xmm5
+
+// CHECK: 	addsubpd	%xmm5, %xmm5
+        	addsubpd	%xmm5,%xmm5
+
+// CHECK: 	addsubps	%xmm5, %xmm5
+        	addsubps	%xmm5,%xmm5
+
+// CHECK: 	haddpd	%xmm5, %xmm5
+        	haddpd	%xmm5,%xmm5
+
+// CHECK: 	haddps	%xmm5, %xmm5
+        	haddps	%xmm5,%xmm5
+
+// CHECK: 	hsubpd	%xmm5, %xmm5
+        	hsubpd	%xmm5,%xmm5
+
+// CHECK: 	hsubps	%xmm5, %xmm5
+        	hsubps	%xmm5,%xmm5
+
+// CHECK: 	lddqu	3735928559(%ebx,%ecx,8), %xmm5
+        	lddqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movddup	3735928559(%ebx,%ecx,8), %xmm5
+        	movddup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movddup	%xmm5, %xmm5
+        	movddup	%xmm5,%xmm5
+
+// CHECK: 	movshdup	3735928559(%ebx,%ecx,8), %xmm5
+        	movshdup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movshdup	%xmm5, %xmm5
+        	movshdup	%xmm5,%xmm5
+
+// CHECK: 	movsldup	3735928559(%ebx,%ecx,8), %xmm5
+        	movsldup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movsldup	%xmm5, %xmm5
+        	movsldup	%xmm5,%xmm5
+
+// CHECK: 	phaddw	%mm3, %mm3
+        	phaddw	%mm3,%mm3
+
+// CHECK: 	phaddw	%xmm5, %xmm5
+        	phaddw	%xmm5,%xmm5
+
+// CHECK: 	phaddd	%mm3, %mm3
+        	phaddd	%mm3,%mm3
+
+// CHECK: 	phaddd	%xmm5, %xmm5
+        	phaddd	%xmm5,%xmm5
+
+// CHECK: 	phaddsw	%mm3, %mm3
+        	phaddsw	%mm3,%mm3
+
+// CHECK: 	phaddsw	%xmm5, %xmm5
+        	phaddsw	%xmm5,%xmm5
+
+// CHECK: 	phsubw	%mm3, %mm3
+        	phsubw	%mm3,%mm3
+
+// CHECK: 	phsubw	%xmm5, %xmm5
+        	phsubw	%xmm5,%xmm5
+
+// CHECK: 	phsubd	%mm3, %mm3
+        	phsubd	%mm3,%mm3
+
+// CHECK: 	phsubd	%xmm5, %xmm5
+        	phsubd	%xmm5,%xmm5
+
+// CHECK: 	phsubsw	%mm3, %mm3
+        	phsubsw	%mm3,%mm3
+
+// CHECK: 	phsubsw	%xmm5, %xmm5
+        	phsubsw	%xmm5,%xmm5
+
+// CHECK: 	pmaddubsw	%mm3, %mm3
+        	pmaddubsw	%mm3,%mm3
+
+// CHECK: 	pmaddubsw	%xmm5, %xmm5
+        	pmaddubsw	%xmm5,%xmm5
+
+// CHECK: 	pmulhrsw	%mm3, %mm3
+        	pmulhrsw	%mm3,%mm3
+
+// CHECK: 	pmulhrsw	%xmm5, %xmm5
+        	pmulhrsw	%xmm5,%xmm5
+
+// CHECK: 	pshufb	%mm3, %mm3
+        	pshufb	%mm3,%mm3
+
+// CHECK: 	pshufb	%xmm5, %xmm5
+        	pshufb	%xmm5,%xmm5
+
+// CHECK: 	psignb	%mm3, %mm3
+        	psignb	%mm3,%mm3
+
+// CHECK: 	psignb	%xmm5, %xmm5
+        	psignb	%xmm5,%xmm5
+
+// CHECK: 	psignw	%mm3, %mm3
+        	psignw	%mm3,%mm3
+
+// CHECK: 	psignw	%xmm5, %xmm5
+        	psignw	%xmm5,%xmm5
+
+// CHECK: 	psignd	%mm3, %mm3
+        	psignd	%mm3,%mm3
+
+// CHECK: 	psignd	%xmm5, %xmm5
+        	psignd	%xmm5,%xmm5
+
+// CHECK: 	pabsb	3735928559(%ebx,%ecx,8), %mm3
+        	pabsb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pabsb	%mm3, %mm3
+        	pabsb	%mm3,%mm3
+
+// CHECK: 	pabsb	3735928559(%ebx,%ecx,8), %xmm5
+        	pabsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pabsb	%xmm5, %xmm5
+        	pabsb	%xmm5,%xmm5
+
+// CHECK: 	pabsw	3735928559(%ebx,%ecx,8), %mm3
+        	pabsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pabsw	%mm3, %mm3
+        	pabsw	%mm3,%mm3
+
+// CHECK: 	pabsw	3735928559(%ebx,%ecx,8), %xmm5
+        	pabsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pabsw	%xmm5, %xmm5
+        	pabsw	%xmm5,%xmm5
+
+// CHECK: 	pabsd	3735928559(%ebx,%ecx,8), %mm3
+        	pabsd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pabsd	%mm3, %mm3
+        	pabsd	%mm3,%mm3
+
+// CHECK: 	pabsd	3735928559(%ebx,%ecx,8), %xmm5
+        	pabsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pabsd	%xmm5, %xmm5
+        	pabsd	%xmm5,%xmm5
+
+// CHECK: 	femms
+        	femms
+
+// CHECK: 	packusdw	%xmm5, %xmm5
+        	packusdw	%xmm5,%xmm5
+
+// CHECK: 	pcmpeqq	%xmm5, %xmm5
+        	pcmpeqq	%xmm5,%xmm5
+
+// CHECK: 	phminposuw	3735928559(%ebx,%ecx,8), %xmm5
+        	phminposuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	phminposuw	%xmm5, %xmm5
+        	phminposuw	%xmm5,%xmm5
+
+// CHECK: 	pmaxsb	%xmm5, %xmm5
+        	pmaxsb	%xmm5,%xmm5
+
+// CHECK: 	pmaxsd	%xmm5, %xmm5
+        	pmaxsd	%xmm5,%xmm5
+
+// CHECK: 	pmaxud	%xmm5, %xmm5
+        	pmaxud	%xmm5,%xmm5
+
+// CHECK: 	pmaxuw	%xmm5, %xmm5
+        	pmaxuw	%xmm5,%xmm5
+
+// CHECK: 	pminsb	%xmm5, %xmm5
+        	pminsb	%xmm5,%xmm5
+
+// CHECK: 	pminsd	%xmm5, %xmm5
+        	pminsd	%xmm5,%xmm5
+
+// CHECK: 	pminud	%xmm5, %xmm5
+        	pminud	%xmm5,%xmm5
+
+// CHECK: 	pminuw	%xmm5, %xmm5
+        	pminuw	%xmm5,%xmm5
+
+// CHECK: 	pmovsxbw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxbw	%xmm5, %xmm5
+        	pmovsxbw	%xmm5,%xmm5
+
+// CHECK: 	pmovsxbd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxbd	%xmm5, %xmm5
+        	pmovsxbd	%xmm5,%xmm5
+
+// CHECK: 	pmovsxbq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxbq	%xmm5, %xmm5
+        	pmovsxbq	%xmm5,%xmm5
+
+// CHECK: 	pmovsxwd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxwd	%xmm5, %xmm5
+        	pmovsxwd	%xmm5,%xmm5
+
+// CHECK: 	pmovsxwq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxwq	%xmm5, %xmm5
+        	pmovsxwq	%xmm5,%xmm5
+
+// CHECK: 	pmovsxdq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxdq	%xmm5, %xmm5
+        	pmovsxdq	%xmm5,%xmm5
+
+// CHECK: 	pmovzxbw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxbw	%xmm5, %xmm5
+        	pmovzxbw	%xmm5,%xmm5
+
+// CHECK: 	pmovzxbd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxbd	%xmm5, %xmm5
+        	pmovzxbd	%xmm5,%xmm5
+
+// CHECK: 	pmovzxbq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxbq	%xmm5, %xmm5
+        	pmovzxbq	%xmm5,%xmm5
+
+// CHECK: 	pmovzxwd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxwd	%xmm5, %xmm5
+        	pmovzxwd	%xmm5,%xmm5
+
+// CHECK: 	pmovzxwq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxwq	%xmm5, %xmm5
+        	pmovzxwq	%xmm5,%xmm5
+
+// CHECK: 	pmovzxdq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxdq	%xmm5, %xmm5
+        	pmovzxdq	%xmm5,%xmm5
+
+// CHECK: 	pmuldq	%xmm5, %xmm5
+        	pmuldq	%xmm5,%xmm5
+
+// CHECK: 	pmulld	%xmm5, %xmm5
+        	pmulld	%xmm5,%xmm5
+
+// CHECK: 	ptest 	3735928559(%ebx,%ecx,8), %xmm5
+        	ptest	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	ptest 	%xmm5, %xmm5
+        	ptest	%xmm5,%xmm5
+
+// CHECK: 	pcmpgtq	%xmm5, %xmm5
+        	pcmpgtq	%xmm5,%xmm5
+
+
+// CHECK: movb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc6,0x84,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	movb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movb	$127, 69
+// CHECK:  encoding: [0xc6,0x05,0x45,0x00,0x00,0x00,0x7f]
+        	movb	$0x7f,0x45
+
+// CHECK: movb	$127, 32493
+// CHECK:  encoding: [0xc6,0x05,0xed,0x7e,0x00,0x00,0x7f]
+        	movb	$0x7f,0x7eed
+
+// CHECK: movb	$127, 3133065982
+// CHECK:  encoding: [0xc6,0x05,0xfe,0xca,0xbe,0xba,0x7f]
+        	movb	$0x7f,0xbabecafe
+
+// CHECK: movb	$127, 305419896
+// CHECK:  encoding: [0xc6,0x05,0x78,0x56,0x34,0x12,0x7f]
+        	movb	$0x7f,0x12345678
+
+// CHECK: movw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0xc7,0x84,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	movw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movw	$31438, 69
+// CHECK:  encoding: [0x66,0xc7,0x05,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	movw	$0x7ace,0x45
+
+// CHECK: movw	$31438, 32493
+// CHECK:  encoding: [0x66,0xc7,0x05,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	movw	$0x7ace,0x7eed
+
+// CHECK: movw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0xc7,0x05,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	movw	$0x7ace,0xbabecafe
+
+// CHECK: movw	$31438, 305419896
+// CHECK:  encoding: [0x66,0xc7,0x05,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	movw	$0x7ace,0x12345678
+
+// CHECK: movl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc7,0x84,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	movl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movl	$2063514302, 69
+// CHECK:  encoding: [0xc7,0x05,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	movl	$0x7afebabe,0x45
+
+// CHECK: movl	$2063514302, 32493
+// CHECK:  encoding: [0xc7,0x05,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	movl	$0x7afebabe,0x7eed
+
+// CHECK: movl	$2063514302, 3133065982
+// CHECK:  encoding: [0xc7,0x05,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	movl	$0x7afebabe,0xbabecafe
+
+// CHECK: movl	$2063514302, 305419896
+// CHECK:  encoding: [0xc7,0x05,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	movl	$0x7afebabe,0x12345678
+
+// CHECK: movl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc7,0x84,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	movl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movl	$324478056, 69
+// CHECK:  encoding: [0xc7,0x05,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	movl	$0x13572468,0x45
+
+// CHECK: movl	$324478056, 32493
+// CHECK:  encoding: [0xc7,0x05,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	movl	$0x13572468,0x7eed
+
+// CHECK: movl	$324478056, 3133065982
+// CHECK:  encoding: [0xc7,0x05,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	movl	$0x13572468,0xbabecafe
+
+// CHECK: movl	$324478056, 305419896
+// CHECK:  encoding: [0xc7,0x05,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	movl	$0x13572468,0x12345678
+
+// CHECK: movsbl	3735928559(%ebx,%ecx,8), %ecx
+// CHECK:  encoding: [0x0f,0xbe,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	movsbl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: movsbl	69, %ecx
+// CHECK:  encoding: [0x0f,0xbe,0x0d,0x45,0x00,0x00,0x00]
+        	movsbl	0x45,%ecx
+
+// CHECK: movsbl	32493, %ecx
+// CHECK:  encoding: [0x0f,0xbe,0x0d,0xed,0x7e,0x00,0x00]
+        	movsbl	0x7eed,%ecx
+
+// CHECK: movsbl	3133065982, %ecx
+// CHECK:  encoding: [0x0f,0xbe,0x0d,0xfe,0xca,0xbe,0xba]
+        	movsbl	0xbabecafe,%ecx
+
+// CHECK: movsbl	305419896, %ecx
+// CHECK:  encoding: [0x0f,0xbe,0x0d,0x78,0x56,0x34,0x12]
+        	movsbl	0x12345678,%ecx
+
+// CHECK: movsbw	3735928559(%ebx,%ecx,8), %bx
+// CHECK:  encoding: [0x66,0x0f,0xbe,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	movsbw	0xdeadbeef(%ebx,%ecx,8),%bx
+
+// CHECK: movsbw	69, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbe,0x1d,0x45,0x00,0x00,0x00]
+        	movsbw	0x45,%bx
+
+// CHECK: movsbw	32493, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbe,0x1d,0xed,0x7e,0x00,0x00]
+        	movsbw	0x7eed,%bx
+
+// CHECK: movsbw	3133065982, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbe,0x1d,0xfe,0xca,0xbe,0xba]
+        	movsbw	0xbabecafe,%bx
+
+// CHECK: movsbw	305419896, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbe,0x1d,0x78,0x56,0x34,0x12]
+        	movsbw	0x12345678,%bx
+
+// CHECK: movswl	3735928559(%ebx,%ecx,8), %ecx
+// CHECK:  encoding: [0x0f,0xbf,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	movswl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: movswl	69, %ecx
+// CHECK:  encoding: [0x0f,0xbf,0x0d,0x45,0x00,0x00,0x00]
+        	movswl	0x45,%ecx
+
+// CHECK: movswl	32493, %ecx
+// CHECK:  encoding: [0x0f,0xbf,0x0d,0xed,0x7e,0x00,0x00]
+        	movswl	0x7eed,%ecx
+
+// CHECK: movswl	3133065982, %ecx
+// CHECK:  encoding: [0x0f,0xbf,0x0d,0xfe,0xca,0xbe,0xba]
+        	movswl	0xbabecafe,%ecx
+
+// CHECK: movswl	305419896, %ecx
+// CHECK:  encoding: [0x0f,0xbf,0x0d,0x78,0x56,0x34,0x12]
+        	movswl	0x12345678,%ecx
+
+// CHECK: movzbl	3735928559(%ebx,%ecx,8), %ecx
+// CHECK:  encoding: [0x0f,0xb6,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	movzbl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: movzbl	69, %ecx
+// CHECK:  encoding: [0x0f,0xb6,0x0d,0x45,0x00,0x00,0x00]
+        	movzbl	0x45,%ecx
+
+// CHECK: movzbl	32493, %ecx
+// CHECK:  encoding: [0x0f,0xb6,0x0d,0xed,0x7e,0x00,0x00]
+        	movzbl	0x7eed,%ecx
+
+// CHECK: movzbl	3133065982, %ecx
+// CHECK:  encoding: [0x0f,0xb6,0x0d,0xfe,0xca,0xbe,0xba]
+        	movzbl	0xbabecafe,%ecx
+
+// CHECK: movzbl	305419896, %ecx
+// CHECK:  encoding: [0x0f,0xb6,0x0d,0x78,0x56,0x34,0x12]
+        	movzbl	0x12345678,%ecx
+
+// CHECK: movzbw	3735928559(%ebx,%ecx,8), %bx
+// CHECK:  encoding: [0x66,0x0f,0xb6,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	movzbw	0xdeadbeef(%ebx,%ecx,8),%bx
+
+// CHECK: movzbw	69, %bx
+// CHECK:  encoding: [0x66,0x0f,0xb6,0x1d,0x45,0x00,0x00,0x00]
+        	movzbw	0x45,%bx
+
+// CHECK: movzbw	32493, %bx
+// CHECK:  encoding: [0x66,0x0f,0xb6,0x1d,0xed,0x7e,0x00,0x00]
+        	movzbw	0x7eed,%bx
+
+// CHECK: movzbw	3133065982, %bx
+// CHECK:  encoding: [0x66,0x0f,0xb6,0x1d,0xfe,0xca,0xbe,0xba]
+        	movzbw	0xbabecafe,%bx
+
+// CHECK: movzbw	305419896, %bx
+// CHECK:  encoding: [0x66,0x0f,0xb6,0x1d,0x78,0x56,0x34,0x12]
+        	movzbw	0x12345678,%bx
+
+// CHECK: movzwl	3735928559(%ebx,%ecx,8), %ecx
+// CHECK:  encoding: [0x0f,0xb7,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	movzwl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: movzwl	69, %ecx
+// CHECK:  encoding: [0x0f,0xb7,0x0d,0x45,0x00,0x00,0x00]
+        	movzwl	0x45,%ecx
+
+// CHECK: movzwl	32493, %ecx
+// CHECK:  encoding: [0x0f,0xb7,0x0d,0xed,0x7e,0x00,0x00]
+        	movzwl	0x7eed,%ecx
+
+// CHECK: movzwl	3133065982, %ecx
+// CHECK:  encoding: [0x0f,0xb7,0x0d,0xfe,0xca,0xbe,0xba]
+        	movzwl	0xbabecafe,%ecx
+
+// CHECK: movzwl	305419896, %ecx
+// CHECK:  encoding: [0x0f,0xb7,0x0d,0x78,0x56,0x34,0x12]
+        	movzwl	0x12345678,%ecx
+
+// CHECK: pushl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xff,0xb4,0xcb,0xef,0xbe,0xad,0xde]
+        	pushl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: pushw	32493
+// CHECK:  encoding: [0x66,0xff,0x35,0xed,0x7e,0x00,0x00]
+        	pushw	0x7eed
+
+// CHECK: pushl	3133065982
+// CHECK:  encoding: [0xff,0x35,0xfe,0xca,0xbe,0xba]
+        	pushl	0xbabecafe
+
+// CHECK: pushl	305419896
+// CHECK:  encoding: [0xff,0x35,0x78,0x56,0x34,0x12]
+        	pushl	0x12345678
+
+// CHECK: popl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x8f,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	popl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: popw	32493
+// CHECK:  encoding: [0x66,0x8f,0x05,0xed,0x7e,0x00,0x00]
+        	popw	0x7eed
+
+// CHECK: popl	3133065982
+// CHECK:  encoding: [0x8f,0x05,0xfe,0xca,0xbe,0xba]
+        	popl	0xbabecafe
+
+// CHECK: popl	305419896
+// CHECK:  encoding: [0x8f,0x05,0x78,0x56,0x34,0x12]
+        	popl	0x12345678
+
+// CHECK: clc
+// CHECK:  encoding: [0xf8]
+        	clc
+
+// CHECK: cld
+// CHECK:  encoding: [0xfc]
+        	cld
+
+// CHECK: cli
+// CHECK:  encoding: [0xfa]
+        	cli
+
+// CHECK: clts
+// CHECK:  encoding: [0x0f,0x06]
+        	clts
+
+// CHECK: cmc
+// CHECK:  encoding: [0xf5]
+        	cmc
+
+// CHECK: lahf
+// CHECK:  encoding: [0x9f]
+        	lahf
+
+// CHECK: sahf
+// CHECK:  encoding: [0x9e]
+        	sahf
+
+// CHECK: stc
+// CHECK:  encoding: [0xf9]
+        	stc
+
+// CHECK: std
+// CHECK:  encoding: [0xfd]
+        	std
+
+// CHECK: sti
+// CHECK:  encoding: [0xfb]
+        	sti
+
+// CHECK: addb	$254, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0x84,0xcb,0xef,0xbe,0xad,0xde,0xfe]
+        	addb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: addb	$254, 69
+// CHECK:  encoding: [0x80,0x05,0x45,0x00,0x00,0x00,0xfe]
+        	addb	$0xfe,0x45
+
+// CHECK: addb	$254, 32493
+// CHECK:  encoding: [0x80,0x05,0xed,0x7e,0x00,0x00,0xfe]
+        	addb	$0xfe,0x7eed
+
+// CHECK: addb	$254, 3133065982
+// CHECK:  encoding: [0x80,0x05,0xfe,0xca,0xbe,0xba,0xfe]
+        	addb	$0xfe,0xbabecafe
+
+// CHECK: addb	$254, 305419896
+// CHECK:  encoding: [0x80,0x05,0x78,0x56,0x34,0x12,0xfe]
+        	addb	$0xfe,0x12345678
+
+// CHECK: addb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0x84,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	addb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: addb	$127, 69
+// CHECK:  encoding: [0x80,0x05,0x45,0x00,0x00,0x00,0x7f]
+        	addb	$0x7f,0x45
+
+// CHECK: addb	$127, 32493
+// CHECK:  encoding: [0x80,0x05,0xed,0x7e,0x00,0x00,0x7f]
+        	addb	$0x7f,0x7eed
+
+// CHECK: addb	$127, 3133065982
+// CHECK:  encoding: [0x80,0x05,0xfe,0xca,0xbe,0xba,0x7f]
+        	addb	$0x7f,0xbabecafe
+
+// CHECK: addb	$127, 305419896
+// CHECK:  encoding: [0x80,0x05,0x78,0x56,0x34,0x12,0x7f]
+        	addb	$0x7f,0x12345678
+
+// CHECK: addw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x81,0x84,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	addw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: addw	$31438, 69
+// CHECK:  encoding: [0x66,0x81,0x05,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	addw	$0x7ace,0x45
+
+// CHECK: addw	$31438, 32493
+// CHECK:  encoding: [0x66,0x81,0x05,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	addw	$0x7ace,0x7eed
+
+// CHECK: addw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0x81,0x05,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	addw	$0x7ace,0xbabecafe
+
+// CHECK: addw	$31438, 305419896
+// CHECK:  encoding: [0x66,0x81,0x05,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	addw	$0x7ace,0x12345678
+
+// CHECK: addl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0x84,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	addl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: addl	$2063514302, 69
+// CHECK:  encoding: [0x81,0x05,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	addl	$0x7afebabe,0x45
+
+// CHECK: addl	$2063514302, 32493
+// CHECK:  encoding: [0x81,0x05,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	addl	$0x7afebabe,0x7eed
+
+// CHECK: addl	$2063514302, 3133065982
+// CHECK:  encoding: [0x81,0x05,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	addl	$0x7afebabe,0xbabecafe
+
+// CHECK: addl	$2063514302, 305419896
+// CHECK:  encoding: [0x81,0x05,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	addl	$0x7afebabe,0x12345678
+
+// CHECK: addl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0x84,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	addl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: addl	$324478056, 69
+// CHECK:  encoding: [0x81,0x05,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	addl	$0x13572468,0x45
+
+// CHECK: addl	$324478056, 32493
+// CHECK:  encoding: [0x81,0x05,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	addl	$0x13572468,0x7eed
+
+// CHECK: addl	$324478056, 3133065982
+// CHECK:  encoding: [0x81,0x05,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	addl	$0x13572468,0xbabecafe
+
+// CHECK: addl	$324478056, 305419896
+// CHECK:  encoding: [0x81,0x05,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	addl	$0x13572468,0x12345678
+
+// CHECK: incl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xff,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	incl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: incw	32493
+// CHECK:  encoding: [0x66,0xff,0x05,0xed,0x7e,0x00,0x00]
+        	incw	0x7eed
+
+// CHECK: incl	3133065982
+// CHECK:  encoding: [0xff,0x05,0xfe,0xca,0xbe,0xba]
+        	incl	0xbabecafe
+
+// CHECK: incl	305419896
+// CHECK:  encoding: [0xff,0x05,0x78,0x56,0x34,0x12]
+        	incl	0x12345678
+
+// CHECK: subb	$254, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0xac,0xcb,0xef,0xbe,0xad,0xde,0xfe]
+        	subb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: subb	$254, 69
+// CHECK:  encoding: [0x80,0x2d,0x45,0x00,0x00,0x00,0xfe]
+        	subb	$0xfe,0x45
+
+// CHECK: subb	$254, 32493
+// CHECK:  encoding: [0x80,0x2d,0xed,0x7e,0x00,0x00,0xfe]
+        	subb	$0xfe,0x7eed
+
+// CHECK: subb	$254, 3133065982
+// CHECK:  encoding: [0x80,0x2d,0xfe,0xca,0xbe,0xba,0xfe]
+        	subb	$0xfe,0xbabecafe
+
+// CHECK: subb	$254, 305419896
+// CHECK:  encoding: [0x80,0x2d,0x78,0x56,0x34,0x12,0xfe]
+        	subb	$0xfe,0x12345678
+
+// CHECK: subb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0xac,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	subb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: subb	$127, 69
+// CHECK:  encoding: [0x80,0x2d,0x45,0x00,0x00,0x00,0x7f]
+        	subb	$0x7f,0x45
+
+// CHECK: subb	$127, 32493
+// CHECK:  encoding: [0x80,0x2d,0xed,0x7e,0x00,0x00,0x7f]
+        	subb	$0x7f,0x7eed
+
+// CHECK: subb	$127, 3133065982
+// CHECK:  encoding: [0x80,0x2d,0xfe,0xca,0xbe,0xba,0x7f]
+        	subb	$0x7f,0xbabecafe
+
+// CHECK: subb	$127, 305419896
+// CHECK:  encoding: [0x80,0x2d,0x78,0x56,0x34,0x12,0x7f]
+        	subb	$0x7f,0x12345678
+
+// CHECK: subw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x81,0xac,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	subw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: subw	$31438, 69
+// CHECK:  encoding: [0x66,0x81,0x2d,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	subw	$0x7ace,0x45
+
+// CHECK: subw	$31438, 32493
+// CHECK:  encoding: [0x66,0x81,0x2d,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	subw	$0x7ace,0x7eed
+
+// CHECK: subw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0x81,0x2d,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	subw	$0x7ace,0xbabecafe
+
+// CHECK: subw	$31438, 305419896
+// CHECK:  encoding: [0x66,0x81,0x2d,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	subw	$0x7ace,0x12345678
+
+// CHECK: subl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0xac,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	subl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: subl	$2063514302, 69
+// CHECK:  encoding: [0x81,0x2d,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	subl	$0x7afebabe,0x45
+
+// CHECK: subl	$2063514302, 32493
+// CHECK:  encoding: [0x81,0x2d,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	subl	$0x7afebabe,0x7eed
+
+// CHECK: subl	$2063514302, 3133065982
+// CHECK:  encoding: [0x81,0x2d,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	subl	$0x7afebabe,0xbabecafe
+
+// CHECK: subl	$2063514302, 305419896
+// CHECK:  encoding: [0x81,0x2d,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	subl	$0x7afebabe,0x12345678
+
+// CHECK: subl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0xac,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	subl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: subl	$324478056, 69
+// CHECK:  encoding: [0x81,0x2d,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	subl	$0x13572468,0x45
+
+// CHECK: subl	$324478056, 32493
+// CHECK:  encoding: [0x81,0x2d,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	subl	$0x13572468,0x7eed
+
+// CHECK: subl	$324478056, 3133065982
+// CHECK:  encoding: [0x81,0x2d,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	subl	$0x13572468,0xbabecafe
+
+// CHECK: subl	$324478056, 305419896
+// CHECK:  encoding: [0x81,0x2d,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	subl	$0x13572468,0x12345678
+
+// CHECK: decl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xff,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	decl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: decw	32493
+// CHECK:  encoding: [0x66,0xff,0x0d,0xed,0x7e,0x00,0x00]
+        	decw	0x7eed
+
+// CHECK: decl	3133065982
+// CHECK:  encoding: [0xff,0x0d,0xfe,0xca,0xbe,0xba]
+        	decl	0xbabecafe
+
+// CHECK: decl	305419896
+// CHECK:  encoding: [0xff,0x0d,0x78,0x56,0x34,0x12]
+        	decl	0x12345678
+
+// CHECK: sbbb	$254, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0x9c,0xcb,0xef,0xbe,0xad,0xde,0xfe]
+        	sbbb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sbbb	$254, 69
+// CHECK:  encoding: [0x80,0x1d,0x45,0x00,0x00,0x00,0xfe]
+        	sbbb	$0xfe,0x45
+
+// CHECK: sbbb	$254, 32493
+// CHECK:  encoding: [0x80,0x1d,0xed,0x7e,0x00,0x00,0xfe]
+        	sbbb	$0xfe,0x7eed
+
+// CHECK: sbbb	$254, 3133065982
+// CHECK:  encoding: [0x80,0x1d,0xfe,0xca,0xbe,0xba,0xfe]
+        	sbbb	$0xfe,0xbabecafe
+
+// CHECK: sbbb	$254, 305419896
+// CHECK:  encoding: [0x80,0x1d,0x78,0x56,0x34,0x12,0xfe]
+        	sbbb	$0xfe,0x12345678
+
+// CHECK: sbbb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0x9c,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	sbbb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sbbb	$127, 69
+// CHECK:  encoding: [0x80,0x1d,0x45,0x00,0x00,0x00,0x7f]
+        	sbbb	$0x7f,0x45
+
+// CHECK: sbbb	$127, 32493
+// CHECK:  encoding: [0x80,0x1d,0xed,0x7e,0x00,0x00,0x7f]
+        	sbbb	$0x7f,0x7eed
+
+// CHECK: sbbb	$127, 3133065982
+// CHECK:  encoding: [0x80,0x1d,0xfe,0xca,0xbe,0xba,0x7f]
+        	sbbb	$0x7f,0xbabecafe
+
+// CHECK: sbbb	$127, 305419896
+// CHECK:  encoding: [0x80,0x1d,0x78,0x56,0x34,0x12,0x7f]
+        	sbbb	$0x7f,0x12345678
+
+// CHECK: sbbw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x81,0x9c,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	sbbw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sbbw	$31438, 69
+// CHECK:  encoding: [0x66,0x81,0x1d,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	sbbw	$0x7ace,0x45
+
+// CHECK: sbbw	$31438, 32493
+// CHECK:  encoding: [0x66,0x81,0x1d,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	sbbw	$0x7ace,0x7eed
+
+// CHECK: sbbw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0x81,0x1d,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	sbbw	$0x7ace,0xbabecafe
+
+// CHECK: sbbw	$31438, 305419896
+// CHECK:  encoding: [0x66,0x81,0x1d,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	sbbw	$0x7ace,0x12345678
+
+// CHECK: sbbl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0x9c,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	sbbl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sbbl	$2063514302, 69
+// CHECK:  encoding: [0x81,0x1d,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	sbbl	$0x7afebabe,0x45
+
+// CHECK: sbbl	$2063514302, 32493
+// CHECK:  encoding: [0x81,0x1d,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	sbbl	$0x7afebabe,0x7eed
+
+// CHECK: sbbl	$2063514302, 3133065982
+// CHECK:  encoding: [0x81,0x1d,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	sbbl	$0x7afebabe,0xbabecafe
+
+// CHECK: sbbl	$2063514302, 305419896
+// CHECK:  encoding: [0x81,0x1d,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	sbbl	$0x7afebabe,0x12345678
+
+// CHECK: sbbl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0x9c,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	sbbl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sbbl	$324478056, 69
+// CHECK:  encoding: [0x81,0x1d,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	sbbl	$0x13572468,0x45
+
+// CHECK: sbbl	$324478056, 32493
+// CHECK:  encoding: [0x81,0x1d,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	sbbl	$0x13572468,0x7eed
+
+// CHECK: sbbl	$324478056, 3133065982
+// CHECK:  encoding: [0x81,0x1d,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	sbbl	$0x13572468,0xbabecafe
+
+// CHECK: sbbl	$324478056, 305419896
+// CHECK:  encoding: [0x81,0x1d,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	sbbl	$0x13572468,0x12345678
+
+// CHECK: cmpb	$254, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0xbc,0xcb,0xef,0xbe,0xad,0xde,0xfe]
+        	cmpb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: cmpb	$254, 69
+// CHECK:  encoding: [0x80,0x3d,0x45,0x00,0x00,0x00,0xfe]
+        	cmpb	$0xfe,0x45
+
+// CHECK: cmpb	$254, 32493
+// CHECK:  encoding: [0x80,0x3d,0xed,0x7e,0x00,0x00,0xfe]
+        	cmpb	$0xfe,0x7eed
+
+// CHECK: cmpb	$254, 3133065982
+// CHECK:  encoding: [0x80,0x3d,0xfe,0xca,0xbe,0xba,0xfe]
+        	cmpb	$0xfe,0xbabecafe
+
+// CHECK: cmpb	$254, 305419896
+// CHECK:  encoding: [0x80,0x3d,0x78,0x56,0x34,0x12,0xfe]
+        	cmpb	$0xfe,0x12345678
+
+// CHECK: cmpb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0xbc,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	cmpb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: cmpb	$127, 69
+// CHECK:  encoding: [0x80,0x3d,0x45,0x00,0x00,0x00,0x7f]
+        	cmpb	$0x7f,0x45
+
+// CHECK: cmpb	$127, 32493
+// CHECK:  encoding: [0x80,0x3d,0xed,0x7e,0x00,0x00,0x7f]
+        	cmpb	$0x7f,0x7eed
+
+// CHECK: cmpb	$127, 3133065982
+// CHECK:  encoding: [0x80,0x3d,0xfe,0xca,0xbe,0xba,0x7f]
+        	cmpb	$0x7f,0xbabecafe
+
+// CHECK: cmpb	$127, 305419896
+// CHECK:  encoding: [0x80,0x3d,0x78,0x56,0x34,0x12,0x7f]
+        	cmpb	$0x7f,0x12345678
+
+// CHECK: cmpw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x81,0xbc,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	cmpw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: cmpw	$31438, 69
+// CHECK:  encoding: [0x66,0x81,0x3d,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	cmpw	$0x7ace,0x45
+
+// CHECK: cmpw	$31438, 32493
+// CHECK:  encoding: [0x66,0x81,0x3d,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	cmpw	$0x7ace,0x7eed
+
+// CHECK: cmpw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0x81,0x3d,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	cmpw	$0x7ace,0xbabecafe
+
+// CHECK: cmpw	$31438, 305419896
+// CHECK:  encoding: [0x66,0x81,0x3d,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	cmpw	$0x7ace,0x12345678
+
+// CHECK: cmpl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0xbc,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	cmpl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: cmpl	$2063514302, 69
+// CHECK:  encoding: [0x81,0x3d,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	cmpl	$0x7afebabe,0x45
+
+// CHECK: cmpl	$2063514302, 32493
+// CHECK:  encoding: [0x81,0x3d,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	cmpl	$0x7afebabe,0x7eed
+
+// CHECK: cmpl	$2063514302, 3133065982
+// CHECK:  encoding: [0x81,0x3d,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	cmpl	$0x7afebabe,0xbabecafe
+
+// CHECK: cmpl	$2063514302, 305419896
+// CHECK:  encoding: [0x81,0x3d,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	cmpl	$0x7afebabe,0x12345678
+
+// CHECK: cmpl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0xbc,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	cmpl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: cmpl	$324478056, 69
+// CHECK:  encoding: [0x81,0x3d,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	cmpl	$0x13572468,0x45
+
+// CHECK: cmpl	$324478056, 32493
+// CHECK:  encoding: [0x81,0x3d,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	cmpl	$0x13572468,0x7eed
+
+// CHECK: cmpl	$324478056, 3133065982
+// CHECK:  encoding: [0x81,0x3d,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	cmpl	$0x13572468,0xbabecafe
+
+// CHECK: cmpl	$324478056, 305419896
+// CHECK:  encoding: [0x81,0x3d,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	cmpl	$0x13572468,0x12345678
+
+// CHECK: testb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf6,0x84,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	testb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: testb	$127, 69
+// CHECK:  encoding: [0xf6,0x05,0x45,0x00,0x00,0x00,0x7f]
+        	testb	$0x7f,0x45
+
+// CHECK: testb	$127, 32493
+// CHECK:  encoding: [0xf6,0x05,0xed,0x7e,0x00,0x00,0x7f]
+        	testb	$0x7f,0x7eed
+
+// CHECK: testb	$127, 3133065982
+// CHECK:  encoding: [0xf6,0x05,0xfe,0xca,0xbe,0xba,0x7f]
+        	testb	$0x7f,0xbabecafe
+
+// CHECK: testb	$127, 305419896
+// CHECK:  encoding: [0xf6,0x05,0x78,0x56,0x34,0x12,0x7f]
+        	testb	$0x7f,0x12345678
+
+// CHECK: testw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0xf7,0x84,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	testw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: testw	$31438, 69
+// CHECK:  encoding: [0x66,0xf7,0x05,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	testw	$0x7ace,0x45
+
+// CHECK: testw	$31438, 32493
+// CHECK:  encoding: [0x66,0xf7,0x05,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	testw	$0x7ace,0x7eed
+
+// CHECK: testw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0xf7,0x05,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	testw	$0x7ace,0xbabecafe
+
+// CHECK: testw	$31438, 305419896
+// CHECK:  encoding: [0x66,0xf7,0x05,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	testw	$0x7ace,0x12345678
+
+// CHECK: testl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf7,0x84,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	testl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: testl	$2063514302, 69
+// CHECK:  encoding: [0xf7,0x05,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	testl	$0x7afebabe,0x45
+
+// CHECK: testl	$2063514302, 32493
+// CHECK:  encoding: [0xf7,0x05,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	testl	$0x7afebabe,0x7eed
+
+// CHECK: testl	$2063514302, 3133065982
+// CHECK:  encoding: [0xf7,0x05,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	testl	$0x7afebabe,0xbabecafe
+
+// CHECK: testl	$2063514302, 305419896
+// CHECK:  encoding: [0xf7,0x05,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	testl	$0x7afebabe,0x12345678
+
+// CHECK: testl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf7,0x84,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	testl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: testl	$324478056, 69
+// CHECK:  encoding: [0xf7,0x05,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	testl	$0x13572468,0x45
+
+// CHECK: testl	$324478056, 32493
+// CHECK:  encoding: [0xf7,0x05,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	testl	$0x13572468,0x7eed
+
+// CHECK: testl	$324478056, 3133065982
+// CHECK:  encoding: [0xf7,0x05,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	testl	$0x13572468,0xbabecafe
+
+// CHECK: testl	$324478056, 305419896
+// CHECK:  encoding: [0xf7,0x05,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	testl	$0x13572468,0x12345678
+
+// CHECK: andb	$254, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0xa4,0xcb,0xef,0xbe,0xad,0xde,0xfe]
+        	andb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: andb	$254, 69
+// CHECK:  encoding: [0x80,0x25,0x45,0x00,0x00,0x00,0xfe]
+        	andb	$0xfe,0x45
+
+// CHECK: andb	$254, 32493
+// CHECK:  encoding: [0x80,0x25,0xed,0x7e,0x00,0x00,0xfe]
+        	andb	$0xfe,0x7eed
+
+// CHECK: andb	$254, 3133065982
+// CHECK:  encoding: [0x80,0x25,0xfe,0xca,0xbe,0xba,0xfe]
+        	andb	$0xfe,0xbabecafe
+
+// CHECK: andb	$254, 305419896
+// CHECK:  encoding: [0x80,0x25,0x78,0x56,0x34,0x12,0xfe]
+        	andb	$0xfe,0x12345678
+
+// CHECK: andb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	andb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: andb	$127, 69
+// CHECK:  encoding: [0x80,0x25,0x45,0x00,0x00,0x00,0x7f]
+        	andb	$0x7f,0x45
+
+// CHECK: andb	$127, 32493
+// CHECK:  encoding: [0x80,0x25,0xed,0x7e,0x00,0x00,0x7f]
+        	andb	$0x7f,0x7eed
+
+// CHECK: andb	$127, 3133065982
+// CHECK:  encoding: [0x80,0x25,0xfe,0xca,0xbe,0xba,0x7f]
+        	andb	$0x7f,0xbabecafe
+
+// CHECK: andb	$127, 305419896
+// CHECK:  encoding: [0x80,0x25,0x78,0x56,0x34,0x12,0x7f]
+        	andb	$0x7f,0x12345678
+
+// CHECK: andw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x81,0xa4,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	andw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: andw	$31438, 69
+// CHECK:  encoding: [0x66,0x81,0x25,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	andw	$0x7ace,0x45
+
+// CHECK: andw	$31438, 32493
+// CHECK:  encoding: [0x66,0x81,0x25,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	andw	$0x7ace,0x7eed
+
+// CHECK: andw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0x81,0x25,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	andw	$0x7ace,0xbabecafe
+
+// CHECK: andw	$31438, 305419896
+// CHECK:  encoding: [0x66,0x81,0x25,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	andw	$0x7ace,0x12345678
+
+// CHECK: andl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0xa4,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	andl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: andl	$2063514302, 69
+// CHECK:  encoding: [0x81,0x25,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	andl	$0x7afebabe,0x45
+
+// CHECK: andl	$2063514302, 32493
+// CHECK:  encoding: [0x81,0x25,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	andl	$0x7afebabe,0x7eed
+
+// CHECK: andl	$2063514302, 3133065982
+// CHECK:  encoding: [0x81,0x25,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	andl	$0x7afebabe,0xbabecafe
+
+// CHECK: andl	$2063514302, 305419896
+// CHECK:  encoding: [0x81,0x25,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	andl	$0x7afebabe,0x12345678
+
+// CHECK: andl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	andl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: andl	$324478056, 69
+// CHECK:  encoding: [0x81,0x25,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	andl	$0x13572468,0x45
+
+// CHECK: andl	$324478056, 32493
+// CHECK:  encoding: [0x81,0x25,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	andl	$0x13572468,0x7eed
+
+// CHECK: andl	$324478056, 3133065982
+// CHECK:  encoding: [0x81,0x25,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	andl	$0x13572468,0xbabecafe
+
+// CHECK: andl	$324478056, 305419896
+// CHECK:  encoding: [0x81,0x25,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	andl	$0x13572468,0x12345678
+
+// CHECK: orb	$254, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0x8c,0xcb,0xef,0xbe,0xad,0xde,0xfe]
+        	orb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: orb	$254, 69
+// CHECK:  encoding: [0x80,0x0d,0x45,0x00,0x00,0x00,0xfe]
+        	orb	$0xfe,0x45
+
+// CHECK: orb	$254, 32493
+// CHECK:  encoding: [0x80,0x0d,0xed,0x7e,0x00,0x00,0xfe]
+        	orb	$0xfe,0x7eed
+
+// CHECK: orb	$254, 3133065982
+// CHECK:  encoding: [0x80,0x0d,0xfe,0xca,0xbe,0xba,0xfe]
+        	orb	$0xfe,0xbabecafe
+
+// CHECK: orb	$254, 305419896
+// CHECK:  encoding: [0x80,0x0d,0x78,0x56,0x34,0x12,0xfe]
+        	orb	$0xfe,0x12345678
+
+// CHECK: orb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0x8c,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	orb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: orb	$127, 69
+// CHECK:  encoding: [0x80,0x0d,0x45,0x00,0x00,0x00,0x7f]
+        	orb	$0x7f,0x45
+
+// CHECK: orb	$127, 32493
+// CHECK:  encoding: [0x80,0x0d,0xed,0x7e,0x00,0x00,0x7f]
+        	orb	$0x7f,0x7eed
+
+// CHECK: orb	$127, 3133065982
+// CHECK:  encoding: [0x80,0x0d,0xfe,0xca,0xbe,0xba,0x7f]
+        	orb	$0x7f,0xbabecafe
+
+// CHECK: orb	$127, 305419896
+// CHECK:  encoding: [0x80,0x0d,0x78,0x56,0x34,0x12,0x7f]
+        	orb	$0x7f,0x12345678
+
+// CHECK: orw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x81,0x8c,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	orw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: orw	$31438, 69
+// CHECK:  encoding: [0x66,0x81,0x0d,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	orw	$0x7ace,0x45
+
+// CHECK: orw	$31438, 32493
+// CHECK:  encoding: [0x66,0x81,0x0d,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	orw	$0x7ace,0x7eed
+
+// CHECK: orw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0x81,0x0d,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	orw	$0x7ace,0xbabecafe
+
+// CHECK: orw	$31438, 305419896
+// CHECK:  encoding: [0x66,0x81,0x0d,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	orw	$0x7ace,0x12345678
+
+// CHECK: orl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0x8c,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	orl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: orl	$2063514302, 69
+// CHECK:  encoding: [0x81,0x0d,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	orl	$0x7afebabe,0x45
+
+// CHECK: orl	$2063514302, 32493
+// CHECK:  encoding: [0x81,0x0d,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	orl	$0x7afebabe,0x7eed
+
+// CHECK: orl	$2063514302, 3133065982
+// CHECK:  encoding: [0x81,0x0d,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	orl	$0x7afebabe,0xbabecafe
+
+// CHECK: orl	$2063514302, 305419896
+// CHECK:  encoding: [0x81,0x0d,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	orl	$0x7afebabe,0x12345678
+
+// CHECK: orl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0x8c,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	orl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: orl	$324478056, 69
+// CHECK:  encoding: [0x81,0x0d,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	orl	$0x13572468,0x45
+
+// CHECK: orl	$324478056, 32493
+// CHECK:  encoding: [0x81,0x0d,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	orl	$0x13572468,0x7eed
+
+// CHECK: orl	$324478056, 3133065982
+// CHECK:  encoding: [0x81,0x0d,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	orl	$0x13572468,0xbabecafe
+
+// CHECK: orl	$324478056, 305419896
+// CHECK:  encoding: [0x81,0x0d,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	orl	$0x13572468,0x12345678
+
+// CHECK: xorb	$254, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0xb4,0xcb,0xef,0xbe,0xad,0xde,0xfe]
+        	xorb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: xorb	$254, 69
+// CHECK:  encoding: [0x80,0x35,0x45,0x00,0x00,0x00,0xfe]
+        	xorb	$0xfe,0x45
+
+// CHECK: xorb	$254, 32493
+// CHECK:  encoding: [0x80,0x35,0xed,0x7e,0x00,0x00,0xfe]
+        	xorb	$0xfe,0x7eed
+
+// CHECK: xorb	$254, 3133065982
+// CHECK:  encoding: [0x80,0x35,0xfe,0xca,0xbe,0xba,0xfe]
+        	xorb	$0xfe,0xbabecafe
+
+// CHECK: xorb	$254, 305419896
+// CHECK:  encoding: [0x80,0x35,0x78,0x56,0x34,0x12,0xfe]
+        	xorb	$0xfe,0x12345678
+
+// CHECK: xorb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0xb4,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	xorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: xorb	$127, 69
+// CHECK:  encoding: [0x80,0x35,0x45,0x00,0x00,0x00,0x7f]
+        	xorb	$0x7f,0x45
+
+// CHECK: xorb	$127, 32493
+// CHECK:  encoding: [0x80,0x35,0xed,0x7e,0x00,0x00,0x7f]
+        	xorb	$0x7f,0x7eed
+
+// CHECK: xorb	$127, 3133065982
+// CHECK:  encoding: [0x80,0x35,0xfe,0xca,0xbe,0xba,0x7f]
+        	xorb	$0x7f,0xbabecafe
+
+// CHECK: xorb	$127, 305419896
+// CHECK:  encoding: [0x80,0x35,0x78,0x56,0x34,0x12,0x7f]
+        	xorb	$0x7f,0x12345678
+
+// CHECK: xorw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x81,0xb4,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	xorw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: xorw	$31438, 69
+// CHECK:  encoding: [0x66,0x81,0x35,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	xorw	$0x7ace,0x45
+
+// CHECK: xorw	$31438, 32493
+// CHECK:  encoding: [0x66,0x81,0x35,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	xorw	$0x7ace,0x7eed
+
+// CHECK: xorw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0x81,0x35,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	xorw	$0x7ace,0xbabecafe
+
+// CHECK: xorw	$31438, 305419896
+// CHECK:  encoding: [0x66,0x81,0x35,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	xorw	$0x7ace,0x12345678
+
+// CHECK: xorl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0xb4,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	xorl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: xorl	$2063514302, 69
+// CHECK:  encoding: [0x81,0x35,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	xorl	$0x7afebabe,0x45
+
+// CHECK: xorl	$2063514302, 32493
+// CHECK:  encoding: [0x81,0x35,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	xorl	$0x7afebabe,0x7eed
+
+// CHECK: xorl	$2063514302, 3133065982
+// CHECK:  encoding: [0x81,0x35,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	xorl	$0x7afebabe,0xbabecafe
+
+// CHECK: xorl	$2063514302, 305419896
+// CHECK:  encoding: [0x81,0x35,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	xorl	$0x7afebabe,0x12345678
+
+// CHECK: xorl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0xb4,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	xorl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: xorl	$324478056, 69
+// CHECK:  encoding: [0x81,0x35,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	xorl	$0x13572468,0x45
+
+// CHECK: xorl	$324478056, 32493
+// CHECK:  encoding: [0x81,0x35,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	xorl	$0x13572468,0x7eed
+
+// CHECK: xorl	$324478056, 3133065982
+// CHECK:  encoding: [0x81,0x35,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	xorl	$0x13572468,0xbabecafe
+
+// CHECK: xorl	$324478056, 305419896
+// CHECK:  encoding: [0x81,0x35,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	xorl	$0x13572468,0x12345678
+
+// CHECK: adcb	$254, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0x94,0xcb,0xef,0xbe,0xad,0xde,0xfe]
+        	adcb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: adcb	$254, 69
+// CHECK:  encoding: [0x80,0x15,0x45,0x00,0x00,0x00,0xfe]
+        	adcb	$0xfe,0x45
+
+// CHECK: adcb	$254, 32493
+// CHECK:  encoding: [0x80,0x15,0xed,0x7e,0x00,0x00,0xfe]
+        	adcb	$0xfe,0x7eed
+
+// CHECK: adcb	$254, 3133065982
+// CHECK:  encoding: [0x80,0x15,0xfe,0xca,0xbe,0xba,0xfe]
+        	adcb	$0xfe,0xbabecafe
+
+// CHECK: adcb	$254, 305419896
+// CHECK:  encoding: [0x80,0x15,0x78,0x56,0x34,0x12,0xfe]
+        	adcb	$0xfe,0x12345678
+
+// CHECK: adcb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x80,0x94,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	adcb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: adcb	$127, 69
+// CHECK:  encoding: [0x80,0x15,0x45,0x00,0x00,0x00,0x7f]
+        	adcb	$0x7f,0x45
+
+// CHECK: adcb	$127, 32493
+// CHECK:  encoding: [0x80,0x15,0xed,0x7e,0x00,0x00,0x7f]
+        	adcb	$0x7f,0x7eed
+
+// CHECK: adcb	$127, 3133065982
+// CHECK:  encoding: [0x80,0x15,0xfe,0xca,0xbe,0xba,0x7f]
+        	adcb	$0x7f,0xbabecafe
+
+// CHECK: adcb	$127, 305419896
+// CHECK:  encoding: [0x80,0x15,0x78,0x56,0x34,0x12,0x7f]
+        	adcb	$0x7f,0x12345678
+
+// CHECK: adcw	$31438, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x81,0x94,0xcb,0xef,0xbe,0xad,0xde,0xce,0x7a]
+        	adcw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: adcw	$31438, 69
+// CHECK:  encoding: [0x66,0x81,0x15,0x45,0x00,0x00,0x00,0xce,0x7a]
+        	adcw	$0x7ace,0x45
+
+// CHECK: adcw	$31438, 32493
+// CHECK:  encoding: [0x66,0x81,0x15,0xed,0x7e,0x00,0x00,0xce,0x7a]
+        	adcw	$0x7ace,0x7eed
+
+// CHECK: adcw	$31438, 3133065982
+// CHECK:  encoding: [0x66,0x81,0x15,0xfe,0xca,0xbe,0xba,0xce,0x7a]
+        	adcw	$0x7ace,0xbabecafe
+
+// CHECK: adcw	$31438, 305419896
+// CHECK:  encoding: [0x66,0x81,0x15,0x78,0x56,0x34,0x12,0xce,0x7a]
+        	adcw	$0x7ace,0x12345678
+
+// CHECK: adcl	$2063514302, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0x94,0xcb,0xef,0xbe,0xad,0xde,0xbe,0xba,0xfe,0x7a]
+        	adcl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: adcl	$2063514302, 69
+// CHECK:  encoding: [0x81,0x15,0x45,0x00,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	adcl	$0x7afebabe,0x45
+
+// CHECK: adcl	$2063514302, 32493
+// CHECK:  encoding: [0x81,0x15,0xed,0x7e,0x00,0x00,0xbe,0xba,0xfe,0x7a]
+        	adcl	$0x7afebabe,0x7eed
+
+// CHECK: adcl	$2063514302, 3133065982
+// CHECK:  encoding: [0x81,0x15,0xfe,0xca,0xbe,0xba,0xbe,0xba,0xfe,0x7a]
+        	adcl	$0x7afebabe,0xbabecafe
+
+// CHECK: adcl	$2063514302, 305419896
+// CHECK:  encoding: [0x81,0x15,0x78,0x56,0x34,0x12,0xbe,0xba,0xfe,0x7a]
+        	adcl	$0x7afebabe,0x12345678
+
+// CHECK: adcl	$324478056, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x81,0x94,0xcb,0xef,0xbe,0xad,0xde,0x68,0x24,0x57,0x13]
+        	adcl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: adcl	$324478056, 69
+// CHECK:  encoding: [0x81,0x15,0x45,0x00,0x00,0x00,0x68,0x24,0x57,0x13]
+        	adcl	$0x13572468,0x45
+
+// CHECK: adcl	$324478056, 32493
+// CHECK:  encoding: [0x81,0x15,0xed,0x7e,0x00,0x00,0x68,0x24,0x57,0x13]
+        	adcl	$0x13572468,0x7eed
+
+// CHECK: adcl	$324478056, 3133065982
+// CHECK:  encoding: [0x81,0x15,0xfe,0xca,0xbe,0xba,0x68,0x24,0x57,0x13]
+        	adcl	$0x13572468,0xbabecafe
+
+// CHECK: adcl	$324478056, 305419896
+// CHECK:  encoding: [0x81,0x15,0x78,0x56,0x34,0x12,0x68,0x24,0x57,0x13]
+        	adcl	$0x13572468,0x12345678
+
+// CHECK: negl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf7,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	negl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: negw	32493
+// CHECK:  encoding: [0x66,0xf7,0x1d,0xed,0x7e,0x00,0x00]
+        	negw	0x7eed
+
+// CHECK: negl	3133065982
+// CHECK:  encoding: [0xf7,0x1d,0xfe,0xca,0xbe,0xba]
+        	negl	0xbabecafe
+
+// CHECK: negl	305419896
+// CHECK:  encoding: [0xf7,0x1d,0x78,0x56,0x34,0x12]
+        	negl	0x12345678
+
+// CHECK: notl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf7,0x94,0xcb,0xef,0xbe,0xad,0xde]
+        	notl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: notw	32493
+// CHECK:  encoding: [0x66,0xf7,0x15,0xed,0x7e,0x00,0x00]
+        	notw	0x7eed
+
+// CHECK: notl	3133065982
+// CHECK:  encoding: [0xf7,0x15,0xfe,0xca,0xbe,0xba]
+        	notl	0xbabecafe
+
+// CHECK: notl	305419896
+// CHECK:  encoding: [0xf7,0x15,0x78,0x56,0x34,0x12]
+        	notl	0x12345678
+
+// CHECK: cbtw
+// CHECK:  encoding: [0x66,0x98]
+        	cbtw
+
+// CHECK: cwtl
+// CHECK:  encoding: [0x98]
+        	cwtl
+
+// CHECK: cwtd
+// CHECK:  encoding: [0x66,0x99]
+        	cwtd
+
+// CHECK: cltd
+// CHECK:  encoding: [0x99]
+        	cltd
+
+// CHECK: mull	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf7,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	mull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: mulw	32493
+// CHECK:  encoding: [0x66,0xf7,0x25,0xed,0x7e,0x00,0x00]
+        	mulw	0x7eed
+
+// CHECK: mull	3133065982
+// CHECK:  encoding: [0xf7,0x25,0xfe,0xca,0xbe,0xba]
+        	mull	0xbabecafe
+
+// CHECK: mull	305419896
+// CHECK:  encoding: [0xf7,0x25,0x78,0x56,0x34,0x12]
+        	mull	0x12345678
+
+// CHECK: imull	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf7,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	imull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: imulw	32493
+// CHECK:  encoding: [0x66,0xf7,0x2d,0xed,0x7e,0x00,0x00]
+        	imulw	0x7eed
+
+// CHECK: imull	3133065982
+// CHECK:  encoding: [0xf7,0x2d,0xfe,0xca,0xbe,0xba]
+        	imull	0xbabecafe
+
+// CHECK: imull	305419896
+// CHECK:  encoding: [0xf7,0x2d,0x78,0x56,0x34,0x12]
+        	imull	0x12345678
+
+// CHECK: divl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf7,0xb4,0xcb,0xef,0xbe,0xad,0xde]
+        	divl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: divw	32493
+// CHECK:  encoding: [0x66,0xf7,0x35,0xed,0x7e,0x00,0x00]
+        	divw	0x7eed
+
+// CHECK: divl	3133065982
+// CHECK:  encoding: [0xf7,0x35,0xfe,0xca,0xbe,0xba]
+        	divl	0xbabecafe
+
+// CHECK: divl	305419896
+// CHECK:  encoding: [0xf7,0x35,0x78,0x56,0x34,0x12]
+        	divl	0x12345678
+
+// CHECK: idivl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf7,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	idivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: idivw	32493
+// CHECK:  encoding: [0x66,0xf7,0x3d,0xed,0x7e,0x00,0x00]
+        	idivw	0x7eed
+
+// CHECK: idivl	3133065982
+// CHECK:  encoding: [0xf7,0x3d,0xfe,0xca,0xbe,0xba]
+        	idivl	0xbabecafe
+
+// CHECK: idivl	305419896
+// CHECK:  encoding: [0xf7,0x3d,0x78,0x56,0x34,0x12]
+        	idivl	0x12345678
+
+// CHECK: roll	$0, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc1,0x84,0xcb,0xef,0xbe,0xad,0xde,0x00]
+        	roll	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: roll	$0, 69
+// CHECK:  encoding: [0xc1,0x05,0x45,0x00,0x00,0x00,0x00]
+        	roll	$0,0x45
+
+// CHECK: roll	$0, 32493
+// CHECK:  encoding: [0xc1,0x05,0xed,0x7e,0x00,0x00,0x00]
+        	roll	$0,0x7eed
+
+// CHECK: roll	$0, 3133065982
+// CHECK:  encoding: [0xc1,0x05,0xfe,0xca,0xbe,0xba,0x00]
+        	roll	$0,0xbabecafe
+
+// CHECK: roll	$0, 305419896
+// CHECK:  encoding: [0xc1,0x05,0x78,0x56,0x34,0x12,0x00]
+        	roll	$0,0x12345678
+
+// CHECK: rolb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc0,0x84,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	rolb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: rolb	$127, 69
+// CHECK:  encoding: [0xc0,0x05,0x45,0x00,0x00,0x00,0x7f]
+        	rolb	$0x7f,0x45
+
+// CHECK: rolb	$127, 32493
+// CHECK:  encoding: [0xc0,0x05,0xed,0x7e,0x00,0x00,0x7f]
+        	rolb	$0x7f,0x7eed
+
+// CHECK: rolb	$127, 3133065982
+// CHECK:  encoding: [0xc0,0x05,0xfe,0xca,0xbe,0xba,0x7f]
+        	rolb	$0x7f,0xbabecafe
+
+// CHECK: rolb	$127, 305419896
+// CHECK:  encoding: [0xc0,0x05,0x78,0x56,0x34,0x12,0x7f]
+        	rolb	$0x7f,0x12345678
+
+// CHECK: roll	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xd1,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	roll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: rolw	32493
+// CHECK:  encoding: [0x66,0xd1,0x05,0xed,0x7e,0x00,0x00]
+        	rolw	0x7eed
+
+// CHECK: roll	3133065982
+// CHECK:  encoding: [0xd1,0x05,0xfe,0xca,0xbe,0xba]
+        	roll	0xbabecafe
+
+// CHECK: roll	305419896
+// CHECK:  encoding: [0xd1,0x05,0x78,0x56,0x34,0x12]
+        	roll	0x12345678
+
+// CHECK: rorl	$0, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc1,0x8c,0xcb,0xef,0xbe,0xad,0xde,0x00]
+        	rorl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: rorl	$0, 69
+// CHECK:  encoding: [0xc1,0x0d,0x45,0x00,0x00,0x00,0x00]
+        	rorl	$0,0x45
+
+// CHECK: rorl	$0, 32493
+// CHECK:  encoding: [0xc1,0x0d,0xed,0x7e,0x00,0x00,0x00]
+        	rorl	$0,0x7eed
+
+// CHECK: rorl	$0, 3133065982
+// CHECK:  encoding: [0xc1,0x0d,0xfe,0xca,0xbe,0xba,0x00]
+        	rorl	$0,0xbabecafe
+
+// CHECK: rorl	$0, 305419896
+// CHECK:  encoding: [0xc1,0x0d,0x78,0x56,0x34,0x12,0x00]
+        	rorl	$0,0x12345678
+
+// CHECK: rorb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc0,0x8c,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	rorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: rorb	$127, 69
+// CHECK:  encoding: [0xc0,0x0d,0x45,0x00,0x00,0x00,0x7f]
+        	rorb	$0x7f,0x45
+
+// CHECK: rorb	$127, 32493
+// CHECK:  encoding: [0xc0,0x0d,0xed,0x7e,0x00,0x00,0x7f]
+        	rorb	$0x7f,0x7eed
+
+// CHECK: rorb	$127, 3133065982
+// CHECK:  encoding: [0xc0,0x0d,0xfe,0xca,0xbe,0xba,0x7f]
+        	rorb	$0x7f,0xbabecafe
+
+// CHECK: rorb	$127, 305419896
+// CHECK:  encoding: [0xc0,0x0d,0x78,0x56,0x34,0x12,0x7f]
+        	rorb	$0x7f,0x12345678
+
+// CHECK: rorl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xd1,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	rorl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: rorw	32493
+// CHECK:  encoding: [0x66,0xd1,0x0d,0xed,0x7e,0x00,0x00]
+        	rorw	0x7eed
+
+// CHECK: rorl	3133065982
+// CHECK:  encoding: [0xd1,0x0d,0xfe,0xca,0xbe,0xba]
+        	rorl	0xbabecafe
+
+// CHECK: rorl	305419896
+// CHECK:  encoding: [0xd1,0x0d,0x78,0x56,0x34,0x12]
+        	rorl	0x12345678
+
+// CHECK: shll	$0, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc1,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x00]
+        	sall	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shll	$0, 69
+// CHECK:  encoding: [0xc1,0x25,0x45,0x00,0x00,0x00,0x00]
+        	sall	$0,0x45
+
+// CHECK: shll	$0, 32493
+// CHECK:  encoding: [0xc1,0x25,0xed,0x7e,0x00,0x00,0x00]
+        	sall	$0,0x7eed
+
+// CHECK: shll	$0, 3133065982
+// CHECK:  encoding: [0xc1,0x25,0xfe,0xca,0xbe,0xba,0x00]
+        	sall	$0,0xbabecafe
+
+// CHECK: shll	$0, 305419896
+// CHECK:  encoding: [0xc1,0x25,0x78,0x56,0x34,0x12,0x00]
+        	sall	$0,0x12345678
+
+// CHECK: shlb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc0,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	salb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shlb	$127, 69
+// CHECK:  encoding: [0xc0,0x25,0x45,0x00,0x00,0x00,0x7f]
+        	salb	$0x7f,0x45
+
+// CHECK: shlb	$127, 32493
+// CHECK:  encoding: [0xc0,0x25,0xed,0x7e,0x00,0x00,0x7f]
+        	salb	$0x7f,0x7eed
+
+// CHECK: shlb	$127, 3133065982
+// CHECK:  encoding: [0xc0,0x25,0xfe,0xca,0xbe,0xba,0x7f]
+        	salb	$0x7f,0xbabecafe
+
+// CHECK: shlb	$127, 305419896
+// CHECK:  encoding: [0xc0,0x25,0x78,0x56,0x34,0x12,0x7f]
+        	salb	$0x7f,0x12345678
+
+// CHECK: shll	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xd1,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	sall	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shlw	32493
+// CHECK:  encoding: [0x66,0xd1,0x25,0xed,0x7e,0x00,0x00]
+        	salw	0x7eed
+
+// CHECK: shll	3133065982
+// CHECK:  encoding: [0xd1,0x25,0xfe,0xca,0xbe,0xba]
+        	sall	0xbabecafe
+
+// CHECK: shll	305419896
+// CHECK:  encoding: [0xd1,0x25,0x78,0x56,0x34,0x12]
+        	sall	0x12345678
+
+// CHECK: shll	$0, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc1,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x00]
+        	shll	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shll	$0, 69
+// CHECK:  encoding: [0xc1,0x25,0x45,0x00,0x00,0x00,0x00]
+        	shll	$0,0x45
+
+// CHECK: shll	$0, 32493
+// CHECK:  encoding: [0xc1,0x25,0xed,0x7e,0x00,0x00,0x00]
+        	shll	$0,0x7eed
+
+// CHECK: shll	$0, 3133065982
+// CHECK:  encoding: [0xc1,0x25,0xfe,0xca,0xbe,0xba,0x00]
+        	shll	$0,0xbabecafe
+
+// CHECK: shll	$0, 305419896
+// CHECK:  encoding: [0xc1,0x25,0x78,0x56,0x34,0x12,0x00]
+        	shll	$0,0x12345678
+
+// CHECK: shlb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc0,0xa4,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	shlb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shlb	$127, 69
+// CHECK:  encoding: [0xc0,0x25,0x45,0x00,0x00,0x00,0x7f]
+        	shlb	$0x7f,0x45
+
+// CHECK: shlb	$127, 32493
+// CHECK:  encoding: [0xc0,0x25,0xed,0x7e,0x00,0x00,0x7f]
+        	shlb	$0x7f,0x7eed
+
+// CHECK: shlb	$127, 3133065982
+// CHECK:  encoding: [0xc0,0x25,0xfe,0xca,0xbe,0xba,0x7f]
+        	shlb	$0x7f,0xbabecafe
+
+// CHECK: shlb	$127, 305419896
+// CHECK:  encoding: [0xc0,0x25,0x78,0x56,0x34,0x12,0x7f]
+        	shlb	$0x7f,0x12345678
+
+// CHECK: shll	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xd1,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	shll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shlw	32493
+// CHECK:  encoding: [0x66,0xd1,0x25,0xed,0x7e,0x00,0x00]
+        	shlw	0x7eed
+
+// CHECK: shll	3133065982
+// CHECK:  encoding: [0xd1,0x25,0xfe,0xca,0xbe,0xba]
+        	shll	0xbabecafe
+
+// CHECK: shll	305419896
+// CHECK:  encoding: [0xd1,0x25,0x78,0x56,0x34,0x12]
+        	shll	0x12345678
+
+// CHECK: shrl	$0, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc1,0xac,0xcb,0xef,0xbe,0xad,0xde,0x00]
+        	shrl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shrl	$0, 69
+// CHECK:  encoding: [0xc1,0x2d,0x45,0x00,0x00,0x00,0x00]
+        	shrl	$0,0x45
+
+// CHECK: shrl	$0, 32493
+// CHECK:  encoding: [0xc1,0x2d,0xed,0x7e,0x00,0x00,0x00]
+        	shrl	$0,0x7eed
+
+// CHECK: shrl	$0, 3133065982
+// CHECK:  encoding: [0xc1,0x2d,0xfe,0xca,0xbe,0xba,0x00]
+        	shrl	$0,0xbabecafe
+
+// CHECK: shrl	$0, 305419896
+// CHECK:  encoding: [0xc1,0x2d,0x78,0x56,0x34,0x12,0x00]
+        	shrl	$0,0x12345678
+
+// CHECK: shrb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc0,0xac,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	shrb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shrb	$127, 69
+// CHECK:  encoding: [0xc0,0x2d,0x45,0x00,0x00,0x00,0x7f]
+        	shrb	$0x7f,0x45
+
+// CHECK: shrb	$127, 32493
+// CHECK:  encoding: [0xc0,0x2d,0xed,0x7e,0x00,0x00,0x7f]
+        	shrb	$0x7f,0x7eed
+
+// CHECK: shrb	$127, 3133065982
+// CHECK:  encoding: [0xc0,0x2d,0xfe,0xca,0xbe,0xba,0x7f]
+        	shrb	$0x7f,0xbabecafe
+
+// CHECK: shrb	$127, 305419896
+// CHECK:  encoding: [0xc0,0x2d,0x78,0x56,0x34,0x12,0x7f]
+        	shrb	$0x7f,0x12345678
+
+// CHECK: shrl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xd1,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	shrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: shrw	32493
+// CHECK:  encoding: [0x66,0xd1,0x2d,0xed,0x7e,0x00,0x00]
+        	shrw	0x7eed
+
+// CHECK: shrl	3133065982
+// CHECK:  encoding: [0xd1,0x2d,0xfe,0xca,0xbe,0xba]
+        	shrl	0xbabecafe
+
+// CHECK: shrl	305419896
+// CHECK:  encoding: [0xd1,0x2d,0x78,0x56,0x34,0x12]
+        	shrl	0x12345678
+
+// CHECK: sarl	$0, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc1,0xbc,0xcb,0xef,0xbe,0xad,0xde,0x00]
+        	sarl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sarl	$0, 69
+// CHECK:  encoding: [0xc1,0x3d,0x45,0x00,0x00,0x00,0x00]
+        	sarl	$0,0x45
+
+// CHECK: sarl	$0, 32493
+// CHECK:  encoding: [0xc1,0x3d,0xed,0x7e,0x00,0x00,0x00]
+        	sarl	$0,0x7eed
+
+// CHECK: sarl	$0, 3133065982
+// CHECK:  encoding: [0xc1,0x3d,0xfe,0xca,0xbe,0xba,0x00]
+        	sarl	$0,0xbabecafe
+
+// CHECK: sarl	$0, 305419896
+// CHECK:  encoding: [0xc1,0x3d,0x78,0x56,0x34,0x12,0x00]
+        	sarl	$0,0x12345678
+
+// CHECK: sarb	$127, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xc0,0xbc,0xcb,0xef,0xbe,0xad,0xde,0x7f]
+        	sarb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sarb	$127, 69
+// CHECK:  encoding: [0xc0,0x3d,0x45,0x00,0x00,0x00,0x7f]
+        	sarb	$0x7f,0x45
+
+// CHECK: sarb	$127, 32493
+// CHECK:  encoding: [0xc0,0x3d,0xed,0x7e,0x00,0x00,0x7f]
+        	sarb	$0x7f,0x7eed
+
+// CHECK: sarb	$127, 3133065982
+// CHECK:  encoding: [0xc0,0x3d,0xfe,0xca,0xbe,0xba,0x7f]
+        	sarb	$0x7f,0xbabecafe
+
+// CHECK: sarb	$127, 305419896
+// CHECK:  encoding: [0xc0,0x3d,0x78,0x56,0x34,0x12,0x7f]
+        	sarb	$0x7f,0x12345678
+
+// CHECK: sarl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xd1,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	sarl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sarw	32493
+// CHECK:  encoding: [0x66,0xd1,0x3d,0xed,0x7e,0x00,0x00]
+        	sarw	0x7eed
+
+// CHECK: sarl	3133065982
+// CHECK:  encoding: [0xd1,0x3d,0xfe,0xca,0xbe,0xba]
+        	sarl	0xbabecafe
+
+// CHECK: sarl	305419896
+// CHECK:  encoding: [0xd1,0x3d,0x78,0x56,0x34,0x12]
+        	sarl	0x12345678
+
+// CHECK: calll	*%ecx
+// CHECK:  encoding: [0xff,0xd1]
+        	call	*%ecx
+
+// CHECK: calll	*3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xff,0x94,0xcb,0xef,0xbe,0xad,0xde]
+        	call	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: calll	*3135175374
+// CHECK:  encoding: [0xff,0x15,0xce,0xfa,0xde,0xba]
+        	call	*0xbadeface
+
+// CHECK: calll	*3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xff,0x94,0xcb,0xef,0xbe,0xad,0xde]
+        	call	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: calll	*3135175374
+// CHECK:  encoding: [0xff,0x15,0xce,0xfa,0xde,0xba]
+        	call	*0xbadeface
+
+// CHECK: lcallw	*32493
+// CHECK:  encoding: [0x66,0xff,0x1d,0xed,0x7e,0x00,0x00]
+        	lcallw	*0x7eed
+
+// CHECK: jmpl	*3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xff,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	jmp	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: jmpl	*3135175374
+// CHECK:  encoding: [0xff,0x25,0xce,0xfa,0xde,0xba]
+        	jmp	*0xbadeface
+
+// CHECK: jmpl	*3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xff,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	jmp	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: jmpl	*3135175374
+// CHECK:  encoding: [0xff,0x25,0xce,0xfa,0xde,0xba]
+        	jmp	*0xbadeface
+
+// CHECK: ljmpl	*3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xff,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	ljmpl	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: ljmpw	*32493
+// CHECK:  encoding: [0x66,0xff,0x2d,0xed,0x7e,0x00,0x00]
+        	ljmpw	*0x7eed
+
+// CHECK: ljmpl	*3133065982
+// CHECK:  encoding: [0xff,0x2d,0xfe,0xca,0xbe,0xba]
+        	ljmpl	*0xbabecafe
+
+// CHECK: ljmpl	*305419896
+// CHECK:  encoding: [0xff,0x2d,0x78,0x56,0x34,0x12]
+        	ljmpl	*0x12345678
+
+// CHECK: ret
+// CHECK:  encoding: [0xc3]
+        	ret
+
+// CHECK: lret
+// CHECK:  encoding: [0xcb]
+        	lret
+
+// CHECK: leave
+// CHECK:  encoding: [0xc9]
+        	leave
+
+// CHECK: leave
+// CHECK:  encoding: [0xc9]
+        	leavel
+
+// CHECK: seto	%bl
+// CHECK:  encoding: [0x0f,0x90,0xc3]
+        	seto	%bl
+
+// CHECK: seto	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x90,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	seto	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: seto	32493
+// CHECK:  encoding: [0x0f,0x90,0x05,0xed,0x7e,0x00,0x00]
+        	seto	0x7eed
+
+// CHECK: seto	3133065982
+// CHECK:  encoding: [0x0f,0x90,0x05,0xfe,0xca,0xbe,0xba]
+        	seto	0xbabecafe
+
+// CHECK: seto	305419896
+// CHECK:  encoding: [0x0f,0x90,0x05,0x78,0x56,0x34,0x12]
+        	seto	0x12345678
+
+// CHECK: setno	%bl
+// CHECK:  encoding: [0x0f,0x91,0xc3]
+        	setno	%bl
+
+// CHECK: setno	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x91,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setno	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setno	32493
+// CHECK:  encoding: [0x0f,0x91,0x05,0xed,0x7e,0x00,0x00]
+        	setno	0x7eed
+
+// CHECK: setno	3133065982
+// CHECK:  encoding: [0x0f,0x91,0x05,0xfe,0xca,0xbe,0xba]
+        	setno	0xbabecafe
+
+// CHECK: setno	305419896
+// CHECK:  encoding: [0x0f,0x91,0x05,0x78,0x56,0x34,0x12]
+        	setno	0x12345678
+
+// CHECK: setb	%bl
+// CHECK:  encoding: [0x0f,0x92,0xc3]
+        	setb	%bl
+
+// CHECK: setb	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x92,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setb	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setb	32493
+// CHECK:  encoding: [0x0f,0x92,0x05,0xed,0x7e,0x00,0x00]
+        	setb	0x7eed
+
+// CHECK: setb	3133065982
+// CHECK:  encoding: [0x0f,0x92,0x05,0xfe,0xca,0xbe,0xba]
+        	setb	0xbabecafe
+
+// CHECK: setb	305419896
+// CHECK:  encoding: [0x0f,0x92,0x05,0x78,0x56,0x34,0x12]
+        	setb	0x12345678
+
+// CHECK: setae	%bl
+// CHECK:  encoding: [0x0f,0x93,0xc3]
+        	setae	%bl
+
+// CHECK: setae	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x93,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setae	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setae	32493
+// CHECK:  encoding: [0x0f,0x93,0x05,0xed,0x7e,0x00,0x00]
+        	setae	0x7eed
+
+// CHECK: setae	3133065982
+// CHECK:  encoding: [0x0f,0x93,0x05,0xfe,0xca,0xbe,0xba]
+        	setae	0xbabecafe
+
+// CHECK: setae	305419896
+// CHECK:  encoding: [0x0f,0x93,0x05,0x78,0x56,0x34,0x12]
+        	setae	0x12345678
+
+// CHECK: sete	%bl
+// CHECK:  encoding: [0x0f,0x94,0xc3]
+        	sete	%bl
+
+// CHECK: sete	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x94,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	sete	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sete	32493
+// CHECK:  encoding: [0x0f,0x94,0x05,0xed,0x7e,0x00,0x00]
+        	sete	0x7eed
+
+// CHECK: sete	3133065982
+// CHECK:  encoding: [0x0f,0x94,0x05,0xfe,0xca,0xbe,0xba]
+        	sete	0xbabecafe
+
+// CHECK: sete	305419896
+// CHECK:  encoding: [0x0f,0x94,0x05,0x78,0x56,0x34,0x12]
+        	sete	0x12345678
+
+// CHECK: setne	%bl
+// CHECK:  encoding: [0x0f,0x95,0xc3]
+        	setne	%bl
+
+// CHECK: setne	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x95,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setne	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setne	32493
+// CHECK:  encoding: [0x0f,0x95,0x05,0xed,0x7e,0x00,0x00]
+        	setne	0x7eed
+
+// CHECK: setne	3133065982
+// CHECK:  encoding: [0x0f,0x95,0x05,0xfe,0xca,0xbe,0xba]
+        	setne	0xbabecafe
+
+// CHECK: setne	305419896
+// CHECK:  encoding: [0x0f,0x95,0x05,0x78,0x56,0x34,0x12]
+        	setne	0x12345678
+
+// CHECK: setbe	%bl
+// CHECK:  encoding: [0x0f,0x96,0xc3]
+        	setbe	%bl
+
+// CHECK: setbe	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x96,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setbe	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setbe	32493
+// CHECK:  encoding: [0x0f,0x96,0x05,0xed,0x7e,0x00,0x00]
+        	setbe	0x7eed
+
+// CHECK: setbe	3133065982
+// CHECK:  encoding: [0x0f,0x96,0x05,0xfe,0xca,0xbe,0xba]
+        	setbe	0xbabecafe
+
+// CHECK: setbe	305419896
+// CHECK:  encoding: [0x0f,0x96,0x05,0x78,0x56,0x34,0x12]
+        	setbe	0x12345678
+
+// CHECK: seta	%bl
+// CHECK:  encoding: [0x0f,0x97,0xc3]
+        	seta	%bl
+
+// CHECK: seta	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x97,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	seta	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: seta	32493
+// CHECK:  encoding: [0x0f,0x97,0x05,0xed,0x7e,0x00,0x00]
+        	seta	0x7eed
+
+// CHECK: seta	3133065982
+// CHECK:  encoding: [0x0f,0x97,0x05,0xfe,0xca,0xbe,0xba]
+        	seta	0xbabecafe
+
+// CHECK: seta	305419896
+// CHECK:  encoding: [0x0f,0x97,0x05,0x78,0x56,0x34,0x12]
+        	seta	0x12345678
+
+// CHECK: sets	%bl
+// CHECK:  encoding: [0x0f,0x98,0xc3]
+        	sets	%bl
+
+// CHECK: sets	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x98,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	sets	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: sets	32493
+// CHECK:  encoding: [0x0f,0x98,0x05,0xed,0x7e,0x00,0x00]
+        	sets	0x7eed
+
+// CHECK: sets	3133065982
+// CHECK:  encoding: [0x0f,0x98,0x05,0xfe,0xca,0xbe,0xba]
+        	sets	0xbabecafe
+
+// CHECK: sets	305419896
+// CHECK:  encoding: [0x0f,0x98,0x05,0x78,0x56,0x34,0x12]
+        	sets	0x12345678
+
+// CHECK: setns	%bl
+// CHECK:  encoding: [0x0f,0x99,0xc3]
+        	setns	%bl
+
+// CHECK: setns	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x99,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setns	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setns	32493
+// CHECK:  encoding: [0x0f,0x99,0x05,0xed,0x7e,0x00,0x00]
+        	setns	0x7eed
+
+// CHECK: setns	3133065982
+// CHECK:  encoding: [0x0f,0x99,0x05,0xfe,0xca,0xbe,0xba]
+        	setns	0xbabecafe
+
+// CHECK: setns	305419896
+// CHECK:  encoding: [0x0f,0x99,0x05,0x78,0x56,0x34,0x12]
+        	setns	0x12345678
+
+// CHECK: setp	%bl
+// CHECK:  encoding: [0x0f,0x9a,0xc3]
+        	setp	%bl
+
+// CHECK: setp	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x9a,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setp	32493
+// CHECK:  encoding: [0x0f,0x9a,0x05,0xed,0x7e,0x00,0x00]
+        	setp	0x7eed
+
+// CHECK: setp	3133065982
+// CHECK:  encoding: [0x0f,0x9a,0x05,0xfe,0xca,0xbe,0xba]
+        	setp	0xbabecafe
+
+// CHECK: setp	305419896
+// CHECK:  encoding: [0x0f,0x9a,0x05,0x78,0x56,0x34,0x12]
+        	setp	0x12345678
+
+// CHECK: setnp	%bl
+// CHECK:  encoding: [0x0f,0x9b,0xc3]
+        	setnp	%bl
+
+// CHECK: setnp	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x9b,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setnp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setnp	32493
+// CHECK:  encoding: [0x0f,0x9b,0x05,0xed,0x7e,0x00,0x00]
+        	setnp	0x7eed
+
+// CHECK: setnp	3133065982
+// CHECK:  encoding: [0x0f,0x9b,0x05,0xfe,0xca,0xbe,0xba]
+        	setnp	0xbabecafe
+
+// CHECK: setnp	305419896
+// CHECK:  encoding: [0x0f,0x9b,0x05,0x78,0x56,0x34,0x12]
+        	setnp	0x12345678
+
+// CHECK: setl	%bl
+// CHECK:  encoding: [0x0f,0x9c,0xc3]
+        	setl	%bl
+
+// CHECK: setl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x9c,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setl	32493
+// CHECK:  encoding: [0x0f,0x9c,0x05,0xed,0x7e,0x00,0x00]
+        	setl	0x7eed
+
+// CHECK: setl	3133065982
+// CHECK:  encoding: [0x0f,0x9c,0x05,0xfe,0xca,0xbe,0xba]
+        	setl	0xbabecafe
+
+// CHECK: setl	305419896
+// CHECK:  encoding: [0x0f,0x9c,0x05,0x78,0x56,0x34,0x12]
+        	setl	0x12345678
+
+// CHECK: setge	%bl
+// CHECK:  encoding: [0x0f,0x9d,0xc3]
+        	setge	%bl
+
+// CHECK: setge	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x9d,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setge	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setge	32493
+// CHECK:  encoding: [0x0f,0x9d,0x05,0xed,0x7e,0x00,0x00]
+        	setge	0x7eed
+
+// CHECK: setge	3133065982
+// CHECK:  encoding: [0x0f,0x9d,0x05,0xfe,0xca,0xbe,0xba]
+        	setge	0xbabecafe
+
+// CHECK: setge	305419896
+// CHECK:  encoding: [0x0f,0x9d,0x05,0x78,0x56,0x34,0x12]
+        	setge	0x12345678
+
+// CHECK: setle	%bl
+// CHECK:  encoding: [0x0f,0x9e,0xc3]
+        	setle	%bl
+
+// CHECK: setle	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x9e,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setle	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setle	32493
+// CHECK:  encoding: [0x0f,0x9e,0x05,0xed,0x7e,0x00,0x00]
+        	setle	0x7eed
+
+// CHECK: setle	3133065982
+// CHECK:  encoding: [0x0f,0x9e,0x05,0xfe,0xca,0xbe,0xba]
+        	setle	0xbabecafe
+
+// CHECK: setle	305419896
+// CHECK:  encoding: [0x0f,0x9e,0x05,0x78,0x56,0x34,0x12]
+        	setle	0x12345678
+
+// CHECK: setg	%bl
+// CHECK:  encoding: [0x0f,0x9f,0xc3]
+        	setg	%bl
+
+// CHECK: setg	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x9f,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	setg	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: setg	32493
+// CHECK:  encoding: [0x0f,0x9f,0x05,0xed,0x7e,0x00,0x00]
+        	setg	0x7eed
+
+// CHECK: setg	3133065982
+// CHECK:  encoding: [0x0f,0x9f,0x05,0xfe,0xca,0xbe,0xba]
+        	setg	0xbabecafe
+
+// CHECK: setg	305419896
+// CHECK:  encoding: [0x0f,0x9f,0x05,0x78,0x56,0x34,0x12]
+        	setg	0x12345678
+
+// CHECK: rsm
+// CHECK:  encoding: [0x0f,0xaa]
+        	rsm
+
+// CHECK: hlt
+// CHECK:  encoding: [0xf4]
+        	hlt
+
+// CHECK: nopl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x1f,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	nopl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: nopw	32493
+// CHECK:  encoding: [0x66,0x0f,0x1f,0x05,0xed,0x7e,0x00,0x00]
+        	nopw	0x7eed
+
+// CHECK: nopl	3133065982
+// CHECK:  encoding: [0x0f,0x1f,0x05,0xfe,0xca,0xbe,0xba]
+        	nopl	0xbabecafe
+
+// CHECK: nopl	305419896
+// CHECK:  encoding: [0x0f,0x1f,0x05,0x78,0x56,0x34,0x12]
+        	nopl	0x12345678
+
+// CHECK: nop
+// CHECK:  encoding: [0x90]
+        	nop
+
+// CHECK: lldtw	32493
+// CHECK:  encoding: [0x0f,0x00,0x15,0xed,0x7e,0x00,0x00]
+        	lldtw	0x7eed
+
+// CHECK: lmsww	32493
+// CHECK:  encoding: [0x0f,0x01,0x35,0xed,0x7e,0x00,0x00]
+        	lmsww	0x7eed
+
+// CHECK: ltrw	32493
+// CHECK:  encoding: [0x0f,0x00,0x1d,0xed,0x7e,0x00,0x00]
+        	ltrw	0x7eed
+
+// CHECK: sldtw	32493
+// CHECK:  encoding: [0x0f,0x00,0x05,0xed,0x7e,0x00,0x00]
+        	sldtw	0x7eed
+
+// CHECK: smsww	32493
+// CHECK:  encoding: [0x0f,0x01,0x25,0xed,0x7e,0x00,0x00]
+        	smsww	0x7eed
+
+// CHECK: strw	32493
+// CHECK:  encoding: [0x0f,0x00,0x0d,0xed,0x7e,0x00,0x00]
+        	strw	0x7eed
+
+// CHECK: verr	%bx
+// CHECK:  encoding: [0x0f,0x00,0xe3]
+        	verr	%bx
+
+// CHECK: verr	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x00,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	verr	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: verr	3133065982
+// CHECK:  encoding: [0x0f,0x00,0x25,0xfe,0xca,0xbe,0xba]
+        	verr	0xbabecafe
+
+// CHECK: verr	305419896
+// CHECK:  encoding: [0x0f,0x00,0x25,0x78,0x56,0x34,0x12]
+        	verr	0x12345678
+
+// CHECK: verw	%bx
+// CHECK:  encoding: [0x0f,0x00,0xeb]
+        	verw	%bx
+
+// CHECK: verw	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x00,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	verw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: verw	3133065982
+// CHECK:  encoding: [0x0f,0x00,0x2d,0xfe,0xca,0xbe,0xba]
+        	verw	0xbabecafe
+
+// CHECK: verw	305419896
+// CHECK:  encoding: [0x0f,0x00,0x2d,0x78,0x56,0x34,0x12]
+        	verw	0x12345678
+
+// CHECK: fld	%st(2)
+// CHECK:  encoding: [0xd9,0xc2]
+        	fld	%st(2)
+
+// CHECK: fldl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdd,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	fldl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fldl	3133065982
+// CHECK:  encoding: [0xdd,0x05,0xfe,0xca,0xbe,0xba]
+        	fldl	0xbabecafe
+
+// CHECK: fldl	305419896
+// CHECK:  encoding: [0xdd,0x05,0x78,0x56,0x34,0x12]
+        	fldl	0x12345678
+
+// CHECK: fld	%st(2)
+// CHECK:  encoding: [0xd9,0xc2]
+        	fld	%st(2)
+
+// CHECK: fildl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdb,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	fildl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fildl	3133065982
+// CHECK:  encoding: [0xdb,0x05,0xfe,0xca,0xbe,0xba]
+        	fildl	0xbabecafe
+
+// CHECK: fildl	305419896
+// CHECK:  encoding: [0xdb,0x05,0x78,0x56,0x34,0x12]
+        	fildl	0x12345678
+
+// CHECK: fildll	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdf,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	fildll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fildll	32493
+// CHECK:  encoding: [0xdf,0x2d,0xed,0x7e,0x00,0x00]
+        	fildll	0x7eed
+
+// CHECK: fildll	3133065982
+// CHECK:  encoding: [0xdf,0x2d,0xfe,0xca,0xbe,0xba]
+        	fildll	0xbabecafe
+
+// CHECK: fildll	305419896
+// CHECK:  encoding: [0xdf,0x2d,0x78,0x56,0x34,0x12]
+        	fildll	0x12345678
+
+// CHECK: fldt	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdb,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	fldt	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fldt	32493
+// CHECK:  encoding: [0xdb,0x2d,0xed,0x7e,0x00,0x00]
+        	fldt	0x7eed
+
+// CHECK: fldt	3133065982
+// CHECK:  encoding: [0xdb,0x2d,0xfe,0xca,0xbe,0xba]
+        	fldt	0xbabecafe
+
+// CHECK: fldt	305419896
+// CHECK:  encoding: [0xdb,0x2d,0x78,0x56,0x34,0x12]
+        	fldt	0x12345678
+
+// CHECK: fbld	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdf,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	fbld	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fbld	32493
+// CHECK:  encoding: [0xdf,0x25,0xed,0x7e,0x00,0x00]
+        	fbld	0x7eed
+
+// CHECK: fbld	3133065982
+// CHECK:  encoding: [0xdf,0x25,0xfe,0xca,0xbe,0xba]
+        	fbld	0xbabecafe
+
+// CHECK: fbld	305419896
+// CHECK:  encoding: [0xdf,0x25,0x78,0x56,0x34,0x12]
+        	fbld	0x12345678
+
+// CHECK: fst	%st(2)
+// CHECK:  encoding: [0xdd,0xd2]
+        	fst	%st(2)
+
+// CHECK: fstl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdd,0x94,0xcb,0xef,0xbe,0xad,0xde]
+        	fstl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fstl	3133065982
+// CHECK:  encoding: [0xdd,0x15,0xfe,0xca,0xbe,0xba]
+        	fstl	0xbabecafe
+
+// CHECK: fstl	305419896
+// CHECK:  encoding: [0xdd,0x15,0x78,0x56,0x34,0x12]
+        	fstl	0x12345678
+
+// CHECK: fst	%st(2)
+// CHECK:  encoding: [0xdd,0xd2]
+        	fst	%st(2)
+
+// CHECK: fistl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdb,0x94,0xcb,0xef,0xbe,0xad,0xde]
+        	fistl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fistl	3133065982
+// CHECK:  encoding: [0xdb,0x15,0xfe,0xca,0xbe,0xba]
+        	fistl	0xbabecafe
+
+// CHECK: fistl	305419896
+// CHECK:  encoding: [0xdb,0x15,0x78,0x56,0x34,0x12]
+        	fistl	0x12345678
+
+// CHECK: fstp	%st(2)
+// CHECK:  encoding: [0xdd,0xda]
+        	fstp	%st(2)
+
+// CHECK: fstpl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdd,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	fstpl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fstpl	3133065982
+// CHECK:  encoding: [0xdd,0x1d,0xfe,0xca,0xbe,0xba]
+        	fstpl	0xbabecafe
+
+// CHECK: fstpl	305419896
+// CHECK:  encoding: [0xdd,0x1d,0x78,0x56,0x34,0x12]
+        	fstpl	0x12345678
+
+// CHECK: fstp	%st(2)
+// CHECK:  encoding: [0xdd,0xda]
+        	fstp	%st(2)
+
+// CHECK: fistpl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdb,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	fistpl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fistpl	3133065982
+// CHECK:  encoding: [0xdb,0x1d,0xfe,0xca,0xbe,0xba]
+        	fistpl	0xbabecafe
+
+// CHECK: fistpl	305419896
+// CHECK:  encoding: [0xdb,0x1d,0x78,0x56,0x34,0x12]
+        	fistpl	0x12345678
+
+// CHECK: fistpll	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdf,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	fistpll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fistpll	32493
+// CHECK:  encoding: [0xdf,0x3d,0xed,0x7e,0x00,0x00]
+        	fistpll	0x7eed
+
+// CHECK: fistpll	3133065982
+// CHECK:  encoding: [0xdf,0x3d,0xfe,0xca,0xbe,0xba]
+        	fistpll	0xbabecafe
+
+// CHECK: fistpll	305419896
+// CHECK:  encoding: [0xdf,0x3d,0x78,0x56,0x34,0x12]
+        	fistpll	0x12345678
+
+// CHECK: fstpt	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdb,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	fstpt	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fstpt	32493
+// CHECK:  encoding: [0xdb,0x3d,0xed,0x7e,0x00,0x00]
+        	fstpt	0x7eed
+
+// CHECK: fstpt	3133065982
+// CHECK:  encoding: [0xdb,0x3d,0xfe,0xca,0xbe,0xba]
+        	fstpt	0xbabecafe
+
+// CHECK: fstpt	305419896
+// CHECK:  encoding: [0xdb,0x3d,0x78,0x56,0x34,0x12]
+        	fstpt	0x12345678
+
+// CHECK: fbstp	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdf,0xb4,0xcb,0xef,0xbe,0xad,0xde]
+        	fbstp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fbstp	32493
+// CHECK:  encoding: [0xdf,0x35,0xed,0x7e,0x00,0x00]
+        	fbstp	0x7eed
+
+// CHECK: fbstp	3133065982
+// CHECK:  encoding: [0xdf,0x35,0xfe,0xca,0xbe,0xba]
+        	fbstp	0xbabecafe
+
+// CHECK: fbstp	305419896
+// CHECK:  encoding: [0xdf,0x35,0x78,0x56,0x34,0x12]
+        	fbstp	0x12345678
+
+// CHECK: fxch	%st(2)
+// CHECK:  encoding: [0xd9,0xca]
+        	fxch	%st(2)
+
+// CHECK: fcom	%st(2)
+// CHECK:  encoding: [0xd8,0xd2]
+        	fcom	%st(2)
+
+// CHECK: fcom	%st(2)
+// CHECK:  encoding: [0xd8,0xd2]
+        	fcom	%st(2)
+
+// CHECK: ficoml	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0x94,0xcb,0xef,0xbe,0xad,0xde]
+        	ficoml	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: ficoml	3133065982
+// CHECK:  encoding: [0xda,0x15,0xfe,0xca,0xbe,0xba]
+        	ficoml	0xbabecafe
+
+// CHECK: ficoml	305419896
+// CHECK:  encoding: [0xda,0x15,0x78,0x56,0x34,0x12]
+        	ficoml	0x12345678
+
+// CHECK: fcomp	%st(2)
+// CHECK:  encoding: [0xd8,0xda]
+        	fcomp	%st(2)
+
+// CHECK: fcomp	%st(2)
+// CHECK:  encoding: [0xd8,0xda]
+        	fcomp	%st(2)
+
+// CHECK: ficompl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	ficompl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: ficompl	3133065982
+// CHECK:  encoding: [0xda,0x1d,0xfe,0xca,0xbe,0xba]
+        	ficompl	0xbabecafe
+
+// CHECK: ficompl	305419896
+// CHECK:  encoding: [0xda,0x1d,0x78,0x56,0x34,0x12]
+        	ficompl	0x12345678
+
+// CHECK: fcompp
+// CHECK:  encoding: [0xde,0xd9]
+        	fcompp
+
+// CHECK: fucom	%st(2)
+// CHECK:  encoding: [0xdd,0xe2]
+        	fucom	%st(2)
+
+// CHECK: fucomp	%st(2)
+// CHECK:  encoding: [0xdd,0xea]
+        	fucomp	%st(2)
+
+// CHECK: fucompp
+// CHECK:  encoding: [0xda,0xe9]
+        	fucompp
+
+// CHECK: ftst
+// CHECK:  encoding: [0xd9,0xe4]
+        	ftst
+
+// CHECK: fxam
+// CHECK:  encoding: [0xd9,0xe5]
+        	fxam
+
+// CHECK: fld1
+// CHECK:  encoding: [0xd9,0xe8]
+        	fld1
+
+// CHECK: fldl2t
+// CHECK:  encoding: [0xd9,0xe9]
+        	fldl2t
+
+// CHECK: fldl2e
+// CHECK:  encoding: [0xd9,0xea]
+        	fldl2e
+
+// CHECK: fldpi
+// CHECK:  encoding: [0xd9,0xeb]
+        	fldpi
+
+// CHECK: fldlg2
+// CHECK:  encoding: [0xd9,0xec]
+        	fldlg2
+
+// CHECK: fldln2
+// CHECK:  encoding: [0xd9,0xed]
+        	fldln2
+
+// CHECK: fldz
+// CHECK:  encoding: [0xd9,0xee]
+        	fldz
+
+// CHECK: fadd	%st(2)
+// CHECK:  encoding: [0xd8,0xc2]
+        	fadd	%st(2)
+
+// CHECK: faddl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	faddl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: faddl	3133065982
+// CHECK:  encoding: [0xdc,0x05,0xfe,0xca,0xbe,0xba]
+        	faddl	0xbabecafe
+
+// CHECK: faddl	305419896
+// CHECK:  encoding: [0xdc,0x05,0x78,0x56,0x34,0x12]
+        	faddl	0x12345678
+
+// CHECK: fiaddl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	fiaddl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fiaddl	3133065982
+// CHECK:  encoding: [0xda,0x05,0xfe,0xca,0xbe,0xba]
+        	fiaddl	0xbabecafe
+
+// CHECK: fiaddl	305419896
+// CHECK:  encoding: [0xda,0x05,0x78,0x56,0x34,0x12]
+        	fiaddl	0x12345678
+
+// CHECK: faddp	%st(2)
+// CHECK:  encoding: [0xde,0xc2]
+        	faddp	%st(2)
+
+// CHECK: fsub	%st(2)
+// CHECK:  encoding: [0xd8,0xe2]
+        	fsub	%st(2)
+
+// CHECK: fsubl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	fsubl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fsubl	3133065982
+// CHECK:  encoding: [0xdc,0x25,0xfe,0xca,0xbe,0xba]
+        	fsubl	0xbabecafe
+
+// CHECK: fsubl	305419896
+// CHECK:  encoding: [0xdc,0x25,0x78,0x56,0x34,0x12]
+        	fsubl	0x12345678
+
+// CHECK: fisubl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0xa4,0xcb,0xef,0xbe,0xad,0xde]
+        	fisubl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fisubl	3133065982
+// CHECK:  encoding: [0xda,0x25,0xfe,0xca,0xbe,0xba]
+        	fisubl	0xbabecafe
+
+// CHECK: fisubl	305419896
+// CHECK:  encoding: [0xda,0x25,0x78,0x56,0x34,0x12]
+        	fisubl	0x12345678
+
+// CHECK: fsubp	%st(2)
+// CHECK:  encoding: [0xde,0xe2]
+        	fsubp	%st(2)
+
+// CHECK: fsubr	%st(2)
+// CHECK:  encoding: [0xd8,0xea]
+        	fsubr	%st(2)
+
+// CHECK: fsubrl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	fsubrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fsubrl	3133065982
+// CHECK:  encoding: [0xdc,0x2d,0xfe,0xca,0xbe,0xba]
+        	fsubrl	0xbabecafe
+
+// CHECK: fsubrl	305419896
+// CHECK:  encoding: [0xdc,0x2d,0x78,0x56,0x34,0x12]
+        	fsubrl	0x12345678
+
+// CHECK: fisubrl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	fisubrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fisubrl	3133065982
+// CHECK:  encoding: [0xda,0x2d,0xfe,0xca,0xbe,0xba]
+        	fisubrl	0xbabecafe
+
+// CHECK: fisubrl	305419896
+// CHECK:  encoding: [0xda,0x2d,0x78,0x56,0x34,0x12]
+        	fisubrl	0x12345678
+
+// CHECK: fsubrp	%st(2)
+// CHECK:  encoding: [0xde,0xea]
+        	fsubrp	%st(2)
+
+// CHECK: fmul	%st(2)
+// CHECK:  encoding: [0xd8,0xca]
+        	fmul	%st(2)
+
+// CHECK: fmull	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	fmull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fmull	3133065982
+// CHECK:  encoding: [0xdc,0x0d,0xfe,0xca,0xbe,0xba]
+        	fmull	0xbabecafe
+
+// CHECK: fmull	305419896
+// CHECK:  encoding: [0xdc,0x0d,0x78,0x56,0x34,0x12]
+        	fmull	0x12345678
+
+// CHECK: fimull	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	fimull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fimull	3133065982
+// CHECK:  encoding: [0xda,0x0d,0xfe,0xca,0xbe,0xba]
+        	fimull	0xbabecafe
+
+// CHECK: fimull	305419896
+// CHECK:  encoding: [0xda,0x0d,0x78,0x56,0x34,0x12]
+        	fimull	0x12345678
+
+// CHECK: fmulp	%st(2)
+// CHECK:  encoding: [0xde,0xca]
+        	fmulp	%st(2)
+
+// CHECK: fdiv	%st(2)
+// CHECK:  encoding: [0xd8,0xf2]
+        	fdiv	%st(2)
+
+// CHECK: fdivl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0xb4,0xcb,0xef,0xbe,0xad,0xde]
+        	fdivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fdivl	3133065982
+// CHECK:  encoding: [0xdc,0x35,0xfe,0xca,0xbe,0xba]
+        	fdivl	0xbabecafe
+
+// CHECK: fdivl	305419896
+// CHECK:  encoding: [0xdc,0x35,0x78,0x56,0x34,0x12]
+        	fdivl	0x12345678
+
+// CHECK: fidivl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0xb4,0xcb,0xef,0xbe,0xad,0xde]
+        	fidivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fidivl	3133065982
+// CHECK:  encoding: [0xda,0x35,0xfe,0xca,0xbe,0xba]
+        	fidivl	0xbabecafe
+
+// CHECK: fidivl	305419896
+// CHECK:  encoding: [0xda,0x35,0x78,0x56,0x34,0x12]
+        	fidivl	0x12345678
+
+// CHECK: fdivp	%st(2)
+// CHECK:  encoding: [0xde,0xf2]
+        	fdivp	%st(2)
+
+// CHECK: fdivr	%st(2)
+// CHECK:  encoding: [0xd8,0xfa]
+        	fdivr	%st(2)
+
+// CHECK: fdivrl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	fdivrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fdivrl	3133065982
+// CHECK:  encoding: [0xdc,0x3d,0xfe,0xca,0xbe,0xba]
+        	fdivrl	0xbabecafe
+
+// CHECK: fdivrl	305419896
+// CHECK:  encoding: [0xdc,0x3d,0x78,0x56,0x34,0x12]
+        	fdivrl	0x12345678
+
+// CHECK: fidivrl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	fidivrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fidivrl	3133065982
+// CHECK:  encoding: [0xda,0x3d,0xfe,0xca,0xbe,0xba]
+        	fidivrl	0xbabecafe
+
+// CHECK: fidivrl	305419896
+// CHECK:  encoding: [0xda,0x3d,0x78,0x56,0x34,0x12]
+        	fidivrl	0x12345678
+
+// CHECK: fdivrp	%st(2)
+// CHECK:  encoding: [0xde,0xfa]
+        	fdivrp	%st(2)
+
+// CHECK: f2xm1
+// CHECK:  encoding: [0xd9,0xf0]
+        	f2xm1
+
+// CHECK: fyl2x
+// CHECK:  encoding: [0xd9,0xf1]
+        	fyl2x
+
+// CHECK: fptan
+// CHECK:  encoding: [0xd9,0xf2]
+        	fptan
+
+// CHECK: fpatan
+// CHECK:  encoding: [0xd9,0xf3]
+        	fpatan
+
+// CHECK: fxtract
+// CHECK:  encoding: [0xd9,0xf4]
+        	fxtract
+
+// CHECK: fprem1
+// CHECK:  encoding: [0xd9,0xf5]
+        	fprem1
+
+// CHECK: fdecstp
+// CHECK:  encoding: [0xd9,0xf6]
+        	fdecstp
+
+// CHECK: fincstp
+// CHECK:  encoding: [0xd9,0xf7]
+        	fincstp
+
+// CHECK: fprem
+// CHECK:  encoding: [0xd9,0xf8]
+        	fprem
+
+// CHECK: fyl2xp1
+// CHECK:  encoding: [0xd9,0xf9]
+        	fyl2xp1
+
+// CHECK: fsqrt
+// CHECK:  encoding: [0xd9,0xfa]
+        	fsqrt
+
+// CHECK: fsincos
+// CHECK:  encoding: [0xd9,0xfb]
+        	fsincos
+
+// CHECK: frndint
+// CHECK:  encoding: [0xd9,0xfc]
+        	frndint
+
+// CHECK: fscale
+// CHECK:  encoding: [0xd9,0xfd]
+        	fscale
+
+// CHECK: fsin
+// CHECK:  encoding: [0xd9,0xfe]
+        	fsin
+
+// CHECK: fcos
+// CHECK:  encoding: [0xd9,0xff]
+        	fcos
+
+// CHECK: fchs
+// CHECK:  encoding: [0xd9,0xe0]
+        	fchs
+
+// CHECK: fabs
+// CHECK:  encoding: [0xd9,0xe1]
+        	fabs
+
+// CHECK: fninit
+// CHECK:  encoding: [0xdb,0xe3]
+        	fninit
+
+// CHECK: fldcw	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xd9,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	fldcw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fldcw	3133065982
+// CHECK:  encoding: [0xd9,0x2d,0xfe,0xca,0xbe,0xba]
+        	fldcw	0xbabecafe
+
+// CHECK: fldcw	305419896
+// CHECK:  encoding: [0xd9,0x2d,0x78,0x56,0x34,0x12]
+        	fldcw	0x12345678
+
+// CHECK: fnstcw	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xd9,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	fnstcw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fnstcw	3133065982
+// CHECK:  encoding: [0xd9,0x3d,0xfe,0xca,0xbe,0xba]
+        	fnstcw	0xbabecafe
+
+// CHECK: fnstcw	305419896
+// CHECK:  encoding: [0xd9,0x3d,0x78,0x56,0x34,0x12]
+        	fnstcw	0x12345678
+
+// CHECK: fnstsw	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdd,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	fnstsw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fnstsw	3133065982
+// CHECK:  encoding: [0xdd,0x3d,0xfe,0xca,0xbe,0xba]
+        	fnstsw	0xbabecafe
+
+// CHECK: fnstsw	305419896
+// CHECK:  encoding: [0xdd,0x3d,0x78,0x56,0x34,0x12]
+        	fnstsw	0x12345678
+
+// CHECK: fnclex
+// CHECK:  encoding: [0xdb,0xe2]
+        	fnclex
+
+// CHECK: fnstenv	32493
+// CHECK:  encoding: [0xd9,0x35,0xed,0x7e,0x00,0x00]
+        	fnstenv	0x7eed
+
+// CHECK: fldenv	32493
+// CHECK:  encoding: [0xd9,0x25,0xed,0x7e,0x00,0x00]
+        	fldenv	0x7eed
+
+// CHECK: fnsave	32493
+// CHECK:  encoding: [0xdd,0x35,0xed,0x7e,0x00,0x00]
+        	fnsave	0x7eed
+
+// CHECK: frstor	32493
+// CHECK:  encoding: [0xdd,0x25,0xed,0x7e,0x00,0x00]
+        	frstor	0x7eed
+
+// CHECK: ffree	%st(2)
+// CHECK:  encoding: [0xdd,0xc2]
+        	ffree	%st(2)
+
+// CHECK: fnop
+// CHECK:  encoding: [0xd9,0xd0]
+        	fnop
+
+// CHECK: invd
+// CHECK:  encoding: [0x0f,0x08]
+        	invd
+
+// CHECK: wbinvd
+// CHECK:  encoding: [0x0f,0x09]
+        	wbinvd
+
+// CHECK: cpuid
+// CHECK:  encoding: [0x0f,0xa2]
+        	cpuid
+
+// CHECK: wrmsr
+// CHECK:  encoding: [0x0f,0x30]
+        	wrmsr
+
+// CHECK: rdtsc
+// CHECK:  encoding: [0x0f,0x31]
+        	rdtsc
+
+// CHECK: rdmsr
+// CHECK:  encoding: [0x0f,0x32]
+        	rdmsr
+
+// CHECK: cmpxchg8b	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xc7,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	cmpxchg8b	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: cmpxchg8b	32493
+// CHECK:  encoding: [0x0f,0xc7,0x0d,0xed,0x7e,0x00,0x00]
+        	cmpxchg8b	0x7eed
+
+// CHECK: cmpxchg8b	3133065982
+// CHECK:  encoding: [0x0f,0xc7,0x0d,0xfe,0xca,0xbe,0xba]
+        	cmpxchg8b	0xbabecafe
+
+// CHECK: cmpxchg8b	305419896
+// CHECK:  encoding: [0x0f,0xc7,0x0d,0x78,0x56,0x34,0x12]
+        	cmpxchg8b	0x12345678
+
+// CHECK: sysenter
+// CHECK:  encoding: [0x0f,0x34]
+        	sysenter
+
+// CHECK: sysexit
+// CHECK:  encoding: [0x0f,0x35]
+        	sysexit
+
+// CHECK: fxsave	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xae,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	fxsave	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fxsave	32493
+// CHECK:  encoding: [0x0f,0xae,0x05,0xed,0x7e,0x00,0x00]
+        	fxsave	0x7eed
+
+// CHECK: fxsave	3133065982
+// CHECK:  encoding: [0x0f,0xae,0x05,0xfe,0xca,0xbe,0xba]
+        	fxsave	0xbabecafe
+
+// CHECK: fxsave	305419896
+// CHECK:  encoding: [0x0f,0xae,0x05,0x78,0x56,0x34,0x12]
+        	fxsave	0x12345678
+
+// CHECK: fxrstor	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xae,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	fxrstor	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fxrstor	32493
+// CHECK:  encoding: [0x0f,0xae,0x0d,0xed,0x7e,0x00,0x00]
+        	fxrstor	0x7eed
+
+// CHECK: fxrstor	3133065982
+// CHECK:  encoding: [0x0f,0xae,0x0d,0xfe,0xca,0xbe,0xba]
+        	fxrstor	0xbabecafe
+
+// CHECK: fxrstor	305419896
+// CHECK:  encoding: [0x0f,0xae,0x0d,0x78,0x56,0x34,0x12]
+        	fxrstor	0x12345678
+
+// CHECK: rdpmc
+// CHECK:  encoding: [0x0f,0x33]
+        	rdpmc
+
+// CHECK: ud2
+// CHECK:  encoding: [0x0f,0x0b]
+        	ud2
+
+// CHECK: fcmovb	%st(2), %st(0)
+// CHECK:  encoding: [0xda,0xc2]
+        	fcmovb	%st(2),%st
+
+// CHECK: fcmove	%st(2), %st(0)
+// CHECK:  encoding: [0xda,0xca]
+        	fcmove	%st(2),%st
+
+// CHECK: fcmovbe	%st(2), %st(0)
+// CHECK:  encoding: [0xda,0xd2]
+        	fcmovbe	%st(2),%st
+
+// CHECK: fcmovu	 %st(2), %st(0)
+// CHECK:  encoding: [0xda,0xda]
+        	fcmovu	%st(2),%st
+
+// CHECK: fcmovnb	%st(2), %st(0)
+// CHECK:  encoding: [0xdb,0xc2]
+        	fcmovnb	%st(2),%st
+
+// CHECK: fcmovne	%st(2), %st(0)
+// CHECK:  encoding: [0xdb,0xca]
+        	fcmovne	%st(2),%st
+
+// CHECK: fcmovnbe	%st(2), %st(0)
+// CHECK:  encoding: [0xdb,0xd2]
+        	fcmovnbe	%st(2),%st
+
+// CHECK: fcmovnu	%st(2), %st(0)
+// CHECK:  encoding: [0xdb,0xda]
+        	fcmovnu	%st(2),%st
+
+// CHECK: fcomi	%st(2)
+// CHECK:  encoding: [0xdb,0xf2]
+        	fcomi	%st(2),%st
+
+// CHECK: fucomi	%st(2)
+// CHECK:  encoding: [0xdb,0xea]
+        	fucomi	%st(2),%st
+
+// CHECK: fcompi	%st(2)
+// CHECK:  encoding: [0xdf,0xf2]
+        	fcomip	%st(2),%st
+
+// CHECK: fucompi	%st(2)
+// CHECK:  encoding: [0xdf,0xea]
+        	fucomip	%st(2),%st
+
+// CHECK: movnti	%ecx, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xc3,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	movnti	%ecx,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movnti	%ecx, 69
+// CHECK:  encoding: [0x0f,0xc3,0x0d,0x45,0x00,0x00,0x00]
+        	movnti	%ecx,0x45
+
+// CHECK: movnti	%ecx, 32493
+// CHECK:  encoding: [0x0f,0xc3,0x0d,0xed,0x7e,0x00,0x00]
+        	movnti	%ecx,0x7eed
+
+// CHECK: movnti	%ecx, 3133065982
+// CHECK:  encoding: [0x0f,0xc3,0x0d,0xfe,0xca,0xbe,0xba]
+        	movnti	%ecx,0xbabecafe
+
+// CHECK: movnti	%ecx, 305419896
+// CHECK:  encoding: [0x0f,0xc3,0x0d,0x78,0x56,0x34,0x12]
+        	movnti	%ecx,0x12345678
+
+// CHECK: clflush	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xae,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	clflush	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: clflush	32493
+// CHECK:  encoding: [0x0f,0xae,0x3d,0xed,0x7e,0x00,0x00]
+        	clflush	0x7eed
+
+// CHECK: clflush	3133065982
+// CHECK:  encoding: [0x0f,0xae,0x3d,0xfe,0xca,0xbe,0xba]
+        	clflush	0xbabecafe
+
+// CHECK: clflush	305419896
+// CHECK:  encoding: [0x0f,0xae,0x3d,0x78,0x56,0x34,0x12]
+        	clflush	0x12345678
+
+// CHECK: emms
+// CHECK:  encoding: [0x0f,0x77]
+        	emms
+
+// CHECK: movd	%ecx, %mm3
+// CHECK:  encoding: [0x0f,0x6e,0xd9]
+        	movd	%ecx,%mm3
+
+// CHECK: movd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x6e,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	movd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: movd	69, %mm3
+// CHECK:  encoding: [0x0f,0x6e,0x1d,0x45,0x00,0x00,0x00]
+        	movd	0x45,%mm3
+
+// CHECK: movd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x6e,0x1d,0xed,0x7e,0x00,0x00]
+        	movd	0x7eed,%mm3
+
+// CHECK: movd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x6e,0x1d,0xfe,0xca,0xbe,0xba]
+        	movd	0xbabecafe,%mm3
+
+// CHECK: movd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x6e,0x1d,0x78,0x56,0x34,0x12]
+        	movd	0x12345678,%mm3
+
+// CHECK: movd	%mm3, %ecx
+// CHECK:  encoding: [0x0f,0x7e,0xd9]
+        	movd	%mm3,%ecx
+
+// CHECK: movd	%mm3, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x7e,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	movd	%mm3,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movd	%mm3, 69
+// CHECK:  encoding: [0x0f,0x7e,0x1d,0x45,0x00,0x00,0x00]
+        	movd	%mm3,0x45
+
+// CHECK: movd	%mm3, 32493
+// CHECK:  encoding: [0x0f,0x7e,0x1d,0xed,0x7e,0x00,0x00]
+        	movd	%mm3,0x7eed
+
+// CHECK: movd	%mm3, 3133065982
+// CHECK:  encoding: [0x0f,0x7e,0x1d,0xfe,0xca,0xbe,0xba]
+        	movd	%mm3,0xbabecafe
+
+// CHECK: movd	%mm3, 305419896
+// CHECK:  encoding: [0x0f,0x7e,0x1d,0x78,0x56,0x34,0x12]
+        	movd	%mm3,0x12345678
+
+// CHECK: movd	%ecx, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6e,0xe9]
+        	movd	%ecx,%xmm5
+
+// CHECK: movd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6e,0x2d,0x45,0x00,0x00,0x00]
+        	movd	0x45,%xmm5
+
+// CHECK: movd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6e,0x2d,0xed,0x7e,0x00,0x00]
+        	movd	0x7eed,%xmm5
+
+// CHECK: movd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6e,0x2d,0xfe,0xca,0xbe,0xba]
+        	movd	0xbabecafe,%xmm5
+
+// CHECK: movd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6e,0x2d,0x78,0x56,0x34,0x12]
+        	movd	0x12345678,%xmm5
+
+// CHECK: movd	%xmm5, %ecx
+// CHECK:  encoding: [0x66,0x0f,0x7e,0xe9]
+        	movd	%xmm5,%ecx
+
+// CHECK: movd	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0x7e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movd	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0x7e,0x2d,0x45,0x00,0x00,0x00]
+        	movd	%xmm5,0x45
+
+// CHECK: movd	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0x7e,0x2d,0xed,0x7e,0x00,0x00]
+        	movd	%xmm5,0x7eed
+
+// CHECK: movd	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0x7e,0x2d,0xfe,0xca,0xbe,0xba]
+        	movd	%xmm5,0xbabecafe
+
+// CHECK: movd	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0x7e,0x2d,0x78,0x56,0x34,0x12]
+        	movd	%xmm5,0x12345678
+
+// CHECK: movq	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x6f,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	movq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: movq	69, %mm3
+// CHECK:  encoding: [0x0f,0x6f,0x1d,0x45,0x00,0x00,0x00]
+        	movq	0x45,%mm3
+
+// CHECK: movq	32493, %mm3
+// CHECK:  encoding: [0x0f,0x6f,0x1d,0xed,0x7e,0x00,0x00]
+        	movq	0x7eed,%mm3
+
+// CHECK: movq	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x6f,0x1d,0xfe,0xca,0xbe,0xba]
+        	movq	0xbabecafe,%mm3
+
+// CHECK: movq	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x6f,0x1d,0x78,0x56,0x34,0x12]
+        	movq	0x12345678,%mm3
+
+// CHECK: movq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x6f,0xdb]
+        	movq	%mm3,%mm3
+
+// CHECK: movq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x6f,0xdb]
+        	movq	%mm3,%mm3
+
+// CHECK: movq	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x7e,0xed]
+        	movq	%xmm5,%xmm5
+
+// CHECK: movq	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0xd6,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movq	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0xd6,0x2d,0x45,0x00,0x00,0x00]
+        	movq	%xmm5,0x45
+
+// CHECK: movq	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0xd6,0x2d,0xed,0x7e,0x00,0x00]
+        	movq	%xmm5,0x7eed
+
+// CHECK: movq	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0xd6,0x2d,0xfe,0xca,0xbe,0xba]
+        	movq	%xmm5,0xbabecafe
+
+// CHECK: movq	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0xd6,0x2d,0x78,0x56,0x34,0x12]
+        	movq	%xmm5,0x12345678
+
+// CHECK: movq	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x7e,0xed]
+        	movq	%xmm5,%xmm5
+
+// CHECK: packssdw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x6b,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	packssdw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: packssdw	69, %mm3
+// CHECK:  encoding: [0x0f,0x6b,0x1d,0x45,0x00,0x00,0x00]
+        	packssdw	0x45,%mm3
+
+// CHECK: packssdw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x6b,0x1d,0xed,0x7e,0x00,0x00]
+        	packssdw	0x7eed,%mm3
+
+// CHECK: packssdw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x6b,0x1d,0xfe,0xca,0xbe,0xba]
+        	packssdw	0xbabecafe,%mm3
+
+// CHECK: packssdw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x6b,0x1d,0x78,0x56,0x34,0x12]
+        	packssdw	0x12345678,%mm3
+
+// CHECK: packssdw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x6b,0xdb]
+        	packssdw	%mm3,%mm3
+
+// CHECK: packssdw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	packssdw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: packssdw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6b,0x2d,0x45,0x00,0x00,0x00]
+        	packssdw	0x45,%xmm5
+
+// CHECK: packssdw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6b,0x2d,0xed,0x7e,0x00,0x00]
+        	packssdw	0x7eed,%xmm5
+
+// CHECK: packssdw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6b,0x2d,0xfe,0xca,0xbe,0xba]
+        	packssdw	0xbabecafe,%xmm5
+
+// CHECK: packssdw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6b,0x2d,0x78,0x56,0x34,0x12]
+        	packssdw	0x12345678,%xmm5
+
+// CHECK: packssdw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6b,0xed]
+        	packssdw	%xmm5,%xmm5
+
+// CHECK: packsswb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x63,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	packsswb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: packsswb	69, %mm3
+// CHECK:  encoding: [0x0f,0x63,0x1d,0x45,0x00,0x00,0x00]
+        	packsswb	0x45,%mm3
+
+// CHECK: packsswb	32493, %mm3
+// CHECK:  encoding: [0x0f,0x63,0x1d,0xed,0x7e,0x00,0x00]
+        	packsswb	0x7eed,%mm3
+
+// CHECK: packsswb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x63,0x1d,0xfe,0xca,0xbe,0xba]
+        	packsswb	0xbabecafe,%mm3
+
+// CHECK: packsswb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x63,0x1d,0x78,0x56,0x34,0x12]
+        	packsswb	0x12345678,%mm3
+
+// CHECK: packsswb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x63,0xdb]
+        	packsswb	%mm3,%mm3
+
+// CHECK: packsswb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x63,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	packsswb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: packsswb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x63,0x2d,0x45,0x00,0x00,0x00]
+        	packsswb	0x45,%xmm5
+
+// CHECK: packsswb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x63,0x2d,0xed,0x7e,0x00,0x00]
+        	packsswb	0x7eed,%xmm5
+
+// CHECK: packsswb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x63,0x2d,0xfe,0xca,0xbe,0xba]
+        	packsswb	0xbabecafe,%xmm5
+
+// CHECK: packsswb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x63,0x2d,0x78,0x56,0x34,0x12]
+        	packsswb	0x12345678,%xmm5
+
+// CHECK: packsswb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x63,0xed]
+        	packsswb	%xmm5,%xmm5
+
+// CHECK: packuswb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x67,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	packuswb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: packuswb	69, %mm3
+// CHECK:  encoding: [0x0f,0x67,0x1d,0x45,0x00,0x00,0x00]
+        	packuswb	0x45,%mm3
+
+// CHECK: packuswb	32493, %mm3
+// CHECK:  encoding: [0x0f,0x67,0x1d,0xed,0x7e,0x00,0x00]
+        	packuswb	0x7eed,%mm3
+
+// CHECK: packuswb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x67,0x1d,0xfe,0xca,0xbe,0xba]
+        	packuswb	0xbabecafe,%mm3
+
+// CHECK: packuswb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x67,0x1d,0x78,0x56,0x34,0x12]
+        	packuswb	0x12345678,%mm3
+
+// CHECK: packuswb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x67,0xdb]
+        	packuswb	%mm3,%mm3
+
+// CHECK: packuswb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x67,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	packuswb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: packuswb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x67,0x2d,0x45,0x00,0x00,0x00]
+        	packuswb	0x45,%xmm5
+
+// CHECK: packuswb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x67,0x2d,0xed,0x7e,0x00,0x00]
+        	packuswb	0x7eed,%xmm5
+
+// CHECK: packuswb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x67,0x2d,0xfe,0xca,0xbe,0xba]
+        	packuswb	0xbabecafe,%xmm5
+
+// CHECK: packuswb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x67,0x2d,0x78,0x56,0x34,0x12]
+        	packuswb	0x12345678,%xmm5
+
+// CHECK: packuswb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x67,0xed]
+        	packuswb	%xmm5,%xmm5
+
+// CHECK: paddb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xfc,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	paddb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: paddb	69, %mm3
+// CHECK:  encoding: [0x0f,0xfc,0x1d,0x45,0x00,0x00,0x00]
+        	paddb	0x45,%mm3
+
+// CHECK: paddb	32493, %mm3
+// CHECK:  encoding: [0x0f,0xfc,0x1d,0xed,0x7e,0x00,0x00]
+        	paddb	0x7eed,%mm3
+
+// CHECK: paddb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xfc,0x1d,0xfe,0xca,0xbe,0xba]
+        	paddb	0xbabecafe,%mm3
+
+// CHECK: paddb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xfc,0x1d,0x78,0x56,0x34,0x12]
+        	paddb	0x12345678,%mm3
+
+// CHECK: paddb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xfc,0xdb]
+        	paddb	%mm3,%mm3
+
+// CHECK: paddb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfc,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	paddb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: paddb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfc,0x2d,0x45,0x00,0x00,0x00]
+        	paddb	0x45,%xmm5
+
+// CHECK: paddb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfc,0x2d,0xed,0x7e,0x00,0x00]
+        	paddb	0x7eed,%xmm5
+
+// CHECK: paddb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfc,0x2d,0xfe,0xca,0xbe,0xba]
+        	paddb	0xbabecafe,%xmm5
+
+// CHECK: paddb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfc,0x2d,0x78,0x56,0x34,0x12]
+        	paddb	0x12345678,%xmm5
+
+// CHECK: paddb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfc,0xed]
+        	paddb	%xmm5,%xmm5
+
+// CHECK: paddw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xfd,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	paddw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: paddw	69, %mm3
+// CHECK:  encoding: [0x0f,0xfd,0x1d,0x45,0x00,0x00,0x00]
+        	paddw	0x45,%mm3
+
+// CHECK: paddw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xfd,0x1d,0xed,0x7e,0x00,0x00]
+        	paddw	0x7eed,%mm3
+
+// CHECK: paddw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xfd,0x1d,0xfe,0xca,0xbe,0xba]
+        	paddw	0xbabecafe,%mm3
+
+// CHECK: paddw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xfd,0x1d,0x78,0x56,0x34,0x12]
+        	paddw	0x12345678,%mm3
+
+// CHECK: paddw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xfd,0xdb]
+        	paddw	%mm3,%mm3
+
+// CHECK: paddw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfd,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	paddw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: paddw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfd,0x2d,0x45,0x00,0x00,0x00]
+        	paddw	0x45,%xmm5
+
+// CHECK: paddw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfd,0x2d,0xed,0x7e,0x00,0x00]
+        	paddw	0x7eed,%xmm5
+
+// CHECK: paddw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfd,0x2d,0xfe,0xca,0xbe,0xba]
+        	paddw	0xbabecafe,%xmm5
+
+// CHECK: paddw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfd,0x2d,0x78,0x56,0x34,0x12]
+        	paddw	0x12345678,%xmm5
+
+// CHECK: paddw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfd,0xed]
+        	paddw	%xmm5,%xmm5
+
+// CHECK: paddd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xfe,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	paddd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: paddd	69, %mm3
+// CHECK:  encoding: [0x0f,0xfe,0x1d,0x45,0x00,0x00,0x00]
+        	paddd	0x45,%mm3
+
+// CHECK: paddd	32493, %mm3
+// CHECK:  encoding: [0x0f,0xfe,0x1d,0xed,0x7e,0x00,0x00]
+        	paddd	0x7eed,%mm3
+
+// CHECK: paddd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xfe,0x1d,0xfe,0xca,0xbe,0xba]
+        	paddd	0xbabecafe,%mm3
+
+// CHECK: paddd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xfe,0x1d,0x78,0x56,0x34,0x12]
+        	paddd	0x12345678,%mm3
+
+// CHECK: paddd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xfe,0xdb]
+        	paddd	%mm3,%mm3
+
+// CHECK: paddd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfe,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	paddd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: paddd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfe,0x2d,0x45,0x00,0x00,0x00]
+        	paddd	0x45,%xmm5
+
+// CHECK: paddd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfe,0x2d,0xed,0x7e,0x00,0x00]
+        	paddd	0x7eed,%xmm5
+
+// CHECK: paddd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfe,0x2d,0xfe,0xca,0xbe,0xba]
+        	paddd	0xbabecafe,%xmm5
+
+// CHECK: paddd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfe,0x2d,0x78,0x56,0x34,0x12]
+        	paddd	0x12345678,%xmm5
+
+// CHECK: paddd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfe,0xed]
+        	paddd	%xmm5,%xmm5
+
+// CHECK: paddq	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xd4,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	paddq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: paddq	69, %mm3
+// CHECK:  encoding: [0x0f,0xd4,0x1d,0x45,0x00,0x00,0x00]
+        	paddq	0x45,%mm3
+
+// CHECK: paddq	32493, %mm3
+// CHECK:  encoding: [0x0f,0xd4,0x1d,0xed,0x7e,0x00,0x00]
+        	paddq	0x7eed,%mm3
+
+// CHECK: paddq	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xd4,0x1d,0xfe,0xca,0xbe,0xba]
+        	paddq	0xbabecafe,%mm3
+
+// CHECK: paddq	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xd4,0x1d,0x78,0x56,0x34,0x12]
+        	paddq	0x12345678,%mm3
+
+// CHECK: paddq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xd4,0xdb]
+        	paddq	%mm3,%mm3
+
+// CHECK: paddq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd4,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	paddq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: paddq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd4,0x2d,0x45,0x00,0x00,0x00]
+        	paddq	0x45,%xmm5
+
+// CHECK: paddq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd4,0x2d,0xed,0x7e,0x00,0x00]
+        	paddq	0x7eed,%xmm5
+
+// CHECK: paddq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd4,0x2d,0xfe,0xca,0xbe,0xba]
+        	paddq	0xbabecafe,%xmm5
+
+// CHECK: paddq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd4,0x2d,0x78,0x56,0x34,0x12]
+        	paddq	0x12345678,%xmm5
+
+// CHECK: paddq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd4,0xed]
+        	paddq	%xmm5,%xmm5
+
+// CHECK: paddsb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xec,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	paddsb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: paddsb	69, %mm3
+// CHECK:  encoding: [0x0f,0xec,0x1d,0x45,0x00,0x00,0x00]
+        	paddsb	0x45,%mm3
+
+// CHECK: paddsb	32493, %mm3
+// CHECK:  encoding: [0x0f,0xec,0x1d,0xed,0x7e,0x00,0x00]
+        	paddsb	0x7eed,%mm3
+
+// CHECK: paddsb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xec,0x1d,0xfe,0xca,0xbe,0xba]
+        	paddsb	0xbabecafe,%mm3
+
+// CHECK: paddsb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xec,0x1d,0x78,0x56,0x34,0x12]
+        	paddsb	0x12345678,%mm3
+
+// CHECK: paddsb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xec,0xdb]
+        	paddsb	%mm3,%mm3
+
+// CHECK: paddsb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xec,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	paddsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: paddsb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xec,0x2d,0x45,0x00,0x00,0x00]
+        	paddsb	0x45,%xmm5
+
+// CHECK: paddsb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xec,0x2d,0xed,0x7e,0x00,0x00]
+        	paddsb	0x7eed,%xmm5
+
+// CHECK: paddsb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xec,0x2d,0xfe,0xca,0xbe,0xba]
+        	paddsb	0xbabecafe,%xmm5
+
+// CHECK: paddsb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xec,0x2d,0x78,0x56,0x34,0x12]
+        	paddsb	0x12345678,%xmm5
+
+// CHECK: paddsb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xec,0xed]
+        	paddsb	%xmm5,%xmm5
+
+// CHECK: paddsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xed,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	paddsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: paddsw	69, %mm3
+// CHECK:  encoding: [0x0f,0xed,0x1d,0x45,0x00,0x00,0x00]
+        	paddsw	0x45,%mm3
+
+// CHECK: paddsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xed,0x1d,0xed,0x7e,0x00,0x00]
+        	paddsw	0x7eed,%mm3
+
+// CHECK: paddsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xed,0x1d,0xfe,0xca,0xbe,0xba]
+        	paddsw	0xbabecafe,%mm3
+
+// CHECK: paddsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xed,0x1d,0x78,0x56,0x34,0x12]
+        	paddsw	0x12345678,%mm3
+
+// CHECK: paddsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xed,0xdb]
+        	paddsw	%mm3,%mm3
+
+// CHECK: paddsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xed,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	paddsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: paddsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xed,0x2d,0x45,0x00,0x00,0x00]
+        	paddsw	0x45,%xmm5
+
+// CHECK: paddsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xed,0x2d,0xed,0x7e,0x00,0x00]
+        	paddsw	0x7eed,%xmm5
+
+// CHECK: paddsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xed,0x2d,0xfe,0xca,0xbe,0xba]
+        	paddsw	0xbabecafe,%xmm5
+
+// CHECK: paddsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xed,0x2d,0x78,0x56,0x34,0x12]
+        	paddsw	0x12345678,%xmm5
+
+// CHECK: paddsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xed,0xed]
+        	paddsw	%xmm5,%xmm5
+
+// CHECK: paddusb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xdc,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	paddusb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: paddusb	69, %mm3
+// CHECK:  encoding: [0x0f,0xdc,0x1d,0x45,0x00,0x00,0x00]
+        	paddusb	0x45,%mm3
+
+// CHECK: paddusb	32493, %mm3
+// CHECK:  encoding: [0x0f,0xdc,0x1d,0xed,0x7e,0x00,0x00]
+        	paddusb	0x7eed,%mm3
+
+// CHECK: paddusb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xdc,0x1d,0xfe,0xca,0xbe,0xba]
+        	paddusb	0xbabecafe,%mm3
+
+// CHECK: paddusb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xdc,0x1d,0x78,0x56,0x34,0x12]
+        	paddusb	0x12345678,%mm3
+
+// CHECK: paddusb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xdc,0xdb]
+        	paddusb	%mm3,%mm3
+
+// CHECK: paddusb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdc,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	paddusb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: paddusb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdc,0x2d,0x45,0x00,0x00,0x00]
+        	paddusb	0x45,%xmm5
+
+// CHECK: paddusb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdc,0x2d,0xed,0x7e,0x00,0x00]
+        	paddusb	0x7eed,%xmm5
+
+// CHECK: paddusb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdc,0x2d,0xfe,0xca,0xbe,0xba]
+        	paddusb	0xbabecafe,%xmm5
+
+// CHECK: paddusb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdc,0x2d,0x78,0x56,0x34,0x12]
+        	paddusb	0x12345678,%xmm5
+
+// CHECK: paddusb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdc,0xed]
+        	paddusb	%xmm5,%xmm5
+
+// CHECK: paddusw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xdd,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	paddusw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: paddusw	69, %mm3
+// CHECK:  encoding: [0x0f,0xdd,0x1d,0x45,0x00,0x00,0x00]
+        	paddusw	0x45,%mm3
+
+// CHECK: paddusw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xdd,0x1d,0xed,0x7e,0x00,0x00]
+        	paddusw	0x7eed,%mm3
+
+// CHECK: paddusw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xdd,0x1d,0xfe,0xca,0xbe,0xba]
+        	paddusw	0xbabecafe,%mm3
+
+// CHECK: paddusw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xdd,0x1d,0x78,0x56,0x34,0x12]
+        	paddusw	0x12345678,%mm3
+
+// CHECK: paddusw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xdd,0xdb]
+        	paddusw	%mm3,%mm3
+
+// CHECK: paddusw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdd,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	paddusw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: paddusw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdd,0x2d,0x45,0x00,0x00,0x00]
+        	paddusw	0x45,%xmm5
+
+// CHECK: paddusw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdd,0x2d,0xed,0x7e,0x00,0x00]
+        	paddusw	0x7eed,%xmm5
+
+// CHECK: paddusw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdd,0x2d,0xfe,0xca,0xbe,0xba]
+        	paddusw	0xbabecafe,%xmm5
+
+// CHECK: paddusw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdd,0x2d,0x78,0x56,0x34,0x12]
+        	paddusw	0x12345678,%xmm5
+
+// CHECK: paddusw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdd,0xed]
+        	paddusw	%xmm5,%xmm5
+
+// CHECK: pand	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xdb,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pand	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pand	69, %mm3
+// CHECK:  encoding: [0x0f,0xdb,0x1d,0x45,0x00,0x00,0x00]
+        	pand	0x45,%mm3
+
+// CHECK: pand	32493, %mm3
+// CHECK:  encoding: [0x0f,0xdb,0x1d,0xed,0x7e,0x00,0x00]
+        	pand	0x7eed,%mm3
+
+// CHECK: pand	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xdb,0x1d,0xfe,0xca,0xbe,0xba]
+        	pand	0xbabecafe,%mm3
+
+// CHECK: pand	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xdb,0x1d,0x78,0x56,0x34,0x12]
+        	pand	0x12345678,%mm3
+
+// CHECK: pand	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xdb,0xdb]
+        	pand	%mm3,%mm3
+
+// CHECK: pand	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdb,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pand	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pand	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdb,0x2d,0x45,0x00,0x00,0x00]
+        	pand	0x45,%xmm5
+
+// CHECK: pand	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdb,0x2d,0xed,0x7e,0x00,0x00]
+        	pand	0x7eed,%xmm5
+
+// CHECK: pand	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdb,0x2d,0xfe,0xca,0xbe,0xba]
+        	pand	0xbabecafe,%xmm5
+
+// CHECK: pand	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdb,0x2d,0x78,0x56,0x34,0x12]
+        	pand	0x12345678,%xmm5
+
+// CHECK: pand	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdb,0xed]
+        	pand	%xmm5,%xmm5
+
+// CHECK: pandn	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xdf,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pandn	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pandn	69, %mm3
+// CHECK:  encoding: [0x0f,0xdf,0x1d,0x45,0x00,0x00,0x00]
+        	pandn	0x45,%mm3
+
+// CHECK: pandn	32493, %mm3
+// CHECK:  encoding: [0x0f,0xdf,0x1d,0xed,0x7e,0x00,0x00]
+        	pandn	0x7eed,%mm3
+
+// CHECK: pandn	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xdf,0x1d,0xfe,0xca,0xbe,0xba]
+        	pandn	0xbabecafe,%mm3
+
+// CHECK: pandn	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xdf,0x1d,0x78,0x56,0x34,0x12]
+        	pandn	0x12345678,%mm3
+
+// CHECK: pandn	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xdf,0xdb]
+        	pandn	%mm3,%mm3
+
+// CHECK: pandn	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdf,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pandn	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pandn	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdf,0x2d,0x45,0x00,0x00,0x00]
+        	pandn	0x45,%xmm5
+
+// CHECK: pandn	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdf,0x2d,0xed,0x7e,0x00,0x00]
+        	pandn	0x7eed,%xmm5
+
+// CHECK: pandn	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdf,0x2d,0xfe,0xca,0xbe,0xba]
+        	pandn	0xbabecafe,%xmm5
+
+// CHECK: pandn	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdf,0x2d,0x78,0x56,0x34,0x12]
+        	pandn	0x12345678,%xmm5
+
+// CHECK: pandn	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xdf,0xed]
+        	pandn	%xmm5,%xmm5
+
+// CHECK: pcmpeqb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x74,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpeqb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pcmpeqb	69, %mm3
+// CHECK:  encoding: [0x0f,0x74,0x1d,0x45,0x00,0x00,0x00]
+        	pcmpeqb	0x45,%mm3
+
+// CHECK: pcmpeqb	32493, %mm3
+// CHECK:  encoding: [0x0f,0x74,0x1d,0xed,0x7e,0x00,0x00]
+        	pcmpeqb	0x7eed,%mm3
+
+// CHECK: pcmpeqb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x74,0x1d,0xfe,0xca,0xbe,0xba]
+        	pcmpeqb	0xbabecafe,%mm3
+
+// CHECK: pcmpeqb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x74,0x1d,0x78,0x56,0x34,0x12]
+        	pcmpeqb	0x12345678,%mm3
+
+// CHECK: pcmpeqb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x74,0xdb]
+        	pcmpeqb	%mm3,%mm3
+
+// CHECK: pcmpeqb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x74,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpeqb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pcmpeqb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x74,0x2d,0x45,0x00,0x00,0x00]
+        	pcmpeqb	0x45,%xmm5
+
+// CHECK: pcmpeqb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x74,0x2d,0xed,0x7e,0x00,0x00]
+        	pcmpeqb	0x7eed,%xmm5
+
+// CHECK: pcmpeqb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x74,0x2d,0xfe,0xca,0xbe,0xba]
+        	pcmpeqb	0xbabecafe,%xmm5
+
+// CHECK: pcmpeqb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x74,0x2d,0x78,0x56,0x34,0x12]
+        	pcmpeqb	0x12345678,%xmm5
+
+// CHECK: pcmpeqb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x74,0xed]
+        	pcmpeqb	%xmm5,%xmm5
+
+// CHECK: pcmpeqw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x75,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpeqw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pcmpeqw	69, %mm3
+// CHECK:  encoding: [0x0f,0x75,0x1d,0x45,0x00,0x00,0x00]
+        	pcmpeqw	0x45,%mm3
+
+// CHECK: pcmpeqw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x75,0x1d,0xed,0x7e,0x00,0x00]
+        	pcmpeqw	0x7eed,%mm3
+
+// CHECK: pcmpeqw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x75,0x1d,0xfe,0xca,0xbe,0xba]
+        	pcmpeqw	0xbabecafe,%mm3
+
+// CHECK: pcmpeqw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x75,0x1d,0x78,0x56,0x34,0x12]
+        	pcmpeqw	0x12345678,%mm3
+
+// CHECK: pcmpeqw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x75,0xdb]
+        	pcmpeqw	%mm3,%mm3
+
+// CHECK: pcmpeqw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x75,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpeqw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pcmpeqw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x75,0x2d,0x45,0x00,0x00,0x00]
+        	pcmpeqw	0x45,%xmm5
+
+// CHECK: pcmpeqw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x75,0x2d,0xed,0x7e,0x00,0x00]
+        	pcmpeqw	0x7eed,%xmm5
+
+// CHECK: pcmpeqw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x75,0x2d,0xfe,0xca,0xbe,0xba]
+        	pcmpeqw	0xbabecafe,%xmm5
+
+// CHECK: pcmpeqw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x75,0x2d,0x78,0x56,0x34,0x12]
+        	pcmpeqw	0x12345678,%xmm5
+
+// CHECK: pcmpeqw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x75,0xed]
+        	pcmpeqw	%xmm5,%xmm5
+
+// CHECK: pcmpeqd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x76,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpeqd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pcmpeqd	69, %mm3
+// CHECK:  encoding: [0x0f,0x76,0x1d,0x45,0x00,0x00,0x00]
+        	pcmpeqd	0x45,%mm3
+
+// CHECK: pcmpeqd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x76,0x1d,0xed,0x7e,0x00,0x00]
+        	pcmpeqd	0x7eed,%mm3
+
+// CHECK: pcmpeqd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x76,0x1d,0xfe,0xca,0xbe,0xba]
+        	pcmpeqd	0xbabecafe,%mm3
+
+// CHECK: pcmpeqd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x76,0x1d,0x78,0x56,0x34,0x12]
+        	pcmpeqd	0x12345678,%mm3
+
+// CHECK: pcmpeqd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x76,0xdb]
+        	pcmpeqd	%mm3,%mm3
+
+// CHECK: pcmpeqd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x76,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpeqd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pcmpeqd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x76,0x2d,0x45,0x00,0x00,0x00]
+        	pcmpeqd	0x45,%xmm5
+
+// CHECK: pcmpeqd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x76,0x2d,0xed,0x7e,0x00,0x00]
+        	pcmpeqd	0x7eed,%xmm5
+
+// CHECK: pcmpeqd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x76,0x2d,0xfe,0xca,0xbe,0xba]
+        	pcmpeqd	0xbabecafe,%xmm5
+
+// CHECK: pcmpeqd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x76,0x2d,0x78,0x56,0x34,0x12]
+        	pcmpeqd	0x12345678,%xmm5
+
+// CHECK: pcmpeqd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x76,0xed]
+        	pcmpeqd	%xmm5,%xmm5
+
+// CHECK: pcmpgtb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x64,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpgtb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pcmpgtb	69, %mm3
+// CHECK:  encoding: [0x0f,0x64,0x1d,0x45,0x00,0x00,0x00]
+        	pcmpgtb	0x45,%mm3
+
+// CHECK: pcmpgtb	32493, %mm3
+// CHECK:  encoding: [0x0f,0x64,0x1d,0xed,0x7e,0x00,0x00]
+        	pcmpgtb	0x7eed,%mm3
+
+// CHECK: pcmpgtb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x64,0x1d,0xfe,0xca,0xbe,0xba]
+        	pcmpgtb	0xbabecafe,%mm3
+
+// CHECK: pcmpgtb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x64,0x1d,0x78,0x56,0x34,0x12]
+        	pcmpgtb	0x12345678,%mm3
+
+// CHECK: pcmpgtb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x64,0xdb]
+        	pcmpgtb	%mm3,%mm3
+
+// CHECK: pcmpgtb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x64,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpgtb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pcmpgtb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x64,0x2d,0x45,0x00,0x00,0x00]
+        	pcmpgtb	0x45,%xmm5
+
+// CHECK: pcmpgtb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x64,0x2d,0xed,0x7e,0x00,0x00]
+        	pcmpgtb	0x7eed,%xmm5
+
+// CHECK: pcmpgtb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x64,0x2d,0xfe,0xca,0xbe,0xba]
+        	pcmpgtb	0xbabecafe,%xmm5
+
+// CHECK: pcmpgtb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x64,0x2d,0x78,0x56,0x34,0x12]
+        	pcmpgtb	0x12345678,%xmm5
+
+// CHECK: pcmpgtb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x64,0xed]
+        	pcmpgtb	%xmm5,%xmm5
+
+// CHECK: pcmpgtw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x65,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpgtw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pcmpgtw	69, %mm3
+// CHECK:  encoding: [0x0f,0x65,0x1d,0x45,0x00,0x00,0x00]
+        	pcmpgtw	0x45,%mm3
+
+// CHECK: pcmpgtw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x65,0x1d,0xed,0x7e,0x00,0x00]
+        	pcmpgtw	0x7eed,%mm3
+
+// CHECK: pcmpgtw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x65,0x1d,0xfe,0xca,0xbe,0xba]
+        	pcmpgtw	0xbabecafe,%mm3
+
+// CHECK: pcmpgtw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x65,0x1d,0x78,0x56,0x34,0x12]
+        	pcmpgtw	0x12345678,%mm3
+
+// CHECK: pcmpgtw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x65,0xdb]
+        	pcmpgtw	%mm3,%mm3
+
+// CHECK: pcmpgtw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x65,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpgtw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pcmpgtw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x65,0x2d,0x45,0x00,0x00,0x00]
+        	pcmpgtw	0x45,%xmm5
+
+// CHECK: pcmpgtw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x65,0x2d,0xed,0x7e,0x00,0x00]
+        	pcmpgtw	0x7eed,%xmm5
+
+// CHECK: pcmpgtw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x65,0x2d,0xfe,0xca,0xbe,0xba]
+        	pcmpgtw	0xbabecafe,%xmm5
+
+// CHECK: pcmpgtw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x65,0x2d,0x78,0x56,0x34,0x12]
+        	pcmpgtw	0x12345678,%xmm5
+
+// CHECK: pcmpgtw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x65,0xed]
+        	pcmpgtw	%xmm5,%xmm5
+
+// CHECK: pcmpgtd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x66,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpgtd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pcmpgtd	69, %mm3
+// CHECK:  encoding: [0x0f,0x66,0x1d,0x45,0x00,0x00,0x00]
+        	pcmpgtd	0x45,%mm3
+
+// CHECK: pcmpgtd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x66,0x1d,0xed,0x7e,0x00,0x00]
+        	pcmpgtd	0x7eed,%mm3
+
+// CHECK: pcmpgtd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x66,0x1d,0xfe,0xca,0xbe,0xba]
+        	pcmpgtd	0xbabecafe,%mm3
+
+// CHECK: pcmpgtd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x66,0x1d,0x78,0x56,0x34,0x12]
+        	pcmpgtd	0x12345678,%mm3
+
+// CHECK: pcmpgtd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x66,0xdb]
+        	pcmpgtd	%mm3,%mm3
+
+// CHECK: pcmpgtd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x66,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpgtd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pcmpgtd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x66,0x2d,0x45,0x00,0x00,0x00]
+        	pcmpgtd	0x45,%xmm5
+
+// CHECK: pcmpgtd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x66,0x2d,0xed,0x7e,0x00,0x00]
+        	pcmpgtd	0x7eed,%xmm5
+
+// CHECK: pcmpgtd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x66,0x2d,0xfe,0xca,0xbe,0xba]
+        	pcmpgtd	0xbabecafe,%xmm5
+
+// CHECK: pcmpgtd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x66,0x2d,0x78,0x56,0x34,0x12]
+        	pcmpgtd	0x12345678,%xmm5
+
+// CHECK: pcmpgtd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x66,0xed]
+        	pcmpgtd	%xmm5,%xmm5
+
+// CHECK: pmaddwd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xf5,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaddwd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmaddwd	69, %mm3
+// CHECK:  encoding: [0x0f,0xf5,0x1d,0x45,0x00,0x00,0x00]
+        	pmaddwd	0x45,%mm3
+
+// CHECK: pmaddwd	32493, %mm3
+// CHECK:  encoding: [0x0f,0xf5,0x1d,0xed,0x7e,0x00,0x00]
+        	pmaddwd	0x7eed,%mm3
+
+// CHECK: pmaddwd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xf5,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmaddwd	0xbabecafe,%mm3
+
+// CHECK: pmaddwd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xf5,0x1d,0x78,0x56,0x34,0x12]
+        	pmaddwd	0x12345678,%mm3
+
+// CHECK: pmaddwd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf5,0xdb]
+        	pmaddwd	%mm3,%mm3
+
+// CHECK: pmaddwd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf5,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaddwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmaddwd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf5,0x2d,0x45,0x00,0x00,0x00]
+        	pmaddwd	0x45,%xmm5
+
+// CHECK: pmaddwd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf5,0x2d,0xed,0x7e,0x00,0x00]
+        	pmaddwd	0x7eed,%xmm5
+
+// CHECK: pmaddwd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf5,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmaddwd	0xbabecafe,%xmm5
+
+// CHECK: pmaddwd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf5,0x2d,0x78,0x56,0x34,0x12]
+        	pmaddwd	0x12345678,%xmm5
+
+// CHECK: pmaddwd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf5,0xed]
+        	pmaddwd	%xmm5,%xmm5
+
+// CHECK: pmulhw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xe5,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmulhw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmulhw	69, %mm3
+// CHECK:  encoding: [0x0f,0xe5,0x1d,0x45,0x00,0x00,0x00]
+        	pmulhw	0x45,%mm3
+
+// CHECK: pmulhw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xe5,0x1d,0xed,0x7e,0x00,0x00]
+        	pmulhw	0x7eed,%mm3
+
+// CHECK: pmulhw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xe5,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmulhw	0xbabecafe,%mm3
+
+// CHECK: pmulhw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xe5,0x1d,0x78,0x56,0x34,0x12]
+        	pmulhw	0x12345678,%mm3
+
+// CHECK: pmulhw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xe5,0xdb]
+        	pmulhw	%mm3,%mm3
+
+// CHECK: pmulhw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe5,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmulhw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmulhw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe5,0x2d,0x45,0x00,0x00,0x00]
+        	pmulhw	0x45,%xmm5
+
+// CHECK: pmulhw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe5,0x2d,0xed,0x7e,0x00,0x00]
+        	pmulhw	0x7eed,%xmm5
+
+// CHECK: pmulhw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe5,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmulhw	0xbabecafe,%xmm5
+
+// CHECK: pmulhw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe5,0x2d,0x78,0x56,0x34,0x12]
+        	pmulhw	0x12345678,%xmm5
+
+// CHECK: pmulhw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe5,0xed]
+        	pmulhw	%xmm5,%xmm5
+
+// CHECK: pmullw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xd5,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmullw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmullw	69, %mm3
+// CHECK:  encoding: [0x0f,0xd5,0x1d,0x45,0x00,0x00,0x00]
+        	pmullw	0x45,%mm3
+
+// CHECK: pmullw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xd5,0x1d,0xed,0x7e,0x00,0x00]
+        	pmullw	0x7eed,%mm3
+
+// CHECK: pmullw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xd5,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmullw	0xbabecafe,%mm3
+
+// CHECK: pmullw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xd5,0x1d,0x78,0x56,0x34,0x12]
+        	pmullw	0x12345678,%mm3
+
+// CHECK: pmullw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xd5,0xdb]
+        	pmullw	%mm3,%mm3
+
+// CHECK: pmullw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd5,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmullw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmullw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd5,0x2d,0x45,0x00,0x00,0x00]
+        	pmullw	0x45,%xmm5
+
+// CHECK: pmullw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd5,0x2d,0xed,0x7e,0x00,0x00]
+        	pmullw	0x7eed,%xmm5
+
+// CHECK: pmullw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd5,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmullw	0xbabecafe,%xmm5
+
+// CHECK: pmullw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd5,0x2d,0x78,0x56,0x34,0x12]
+        	pmullw	0x12345678,%xmm5
+
+// CHECK: pmullw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd5,0xed]
+        	pmullw	%xmm5,%xmm5
+
+// CHECK: por	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xeb,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	por	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: por	69, %mm3
+// CHECK:  encoding: [0x0f,0xeb,0x1d,0x45,0x00,0x00,0x00]
+        	por	0x45,%mm3
+
+// CHECK: por	32493, %mm3
+// CHECK:  encoding: [0x0f,0xeb,0x1d,0xed,0x7e,0x00,0x00]
+        	por	0x7eed,%mm3
+
+// CHECK: por	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xeb,0x1d,0xfe,0xca,0xbe,0xba]
+        	por	0xbabecafe,%mm3
+
+// CHECK: por	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xeb,0x1d,0x78,0x56,0x34,0x12]
+        	por	0x12345678,%mm3
+
+// CHECK: por	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xeb,0xdb]
+        	por	%mm3,%mm3
+
+// CHECK: por	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xeb,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	por	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: por	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xeb,0x2d,0x45,0x00,0x00,0x00]
+        	por	0x45,%xmm5
+
+// CHECK: por	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xeb,0x2d,0xed,0x7e,0x00,0x00]
+        	por	0x7eed,%xmm5
+
+// CHECK: por	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xeb,0x2d,0xfe,0xca,0xbe,0xba]
+        	por	0xbabecafe,%xmm5
+
+// CHECK: por	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xeb,0x2d,0x78,0x56,0x34,0x12]
+        	por	0x12345678,%xmm5
+
+// CHECK: por	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xeb,0xed]
+        	por	%xmm5,%xmm5
+
+// CHECK: psllw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xf1,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psllw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psllw	69, %mm3
+// CHECK:  encoding: [0x0f,0xf1,0x1d,0x45,0x00,0x00,0x00]
+        	psllw	0x45,%mm3
+
+// CHECK: psllw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xf1,0x1d,0xed,0x7e,0x00,0x00]
+        	psllw	0x7eed,%mm3
+
+// CHECK: psllw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xf1,0x1d,0xfe,0xca,0xbe,0xba]
+        	psllw	0xbabecafe,%mm3
+
+// CHECK: psllw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xf1,0x1d,0x78,0x56,0x34,0x12]
+        	psllw	0x12345678,%mm3
+
+// CHECK: psllw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf1,0xdb]
+        	psllw	%mm3,%mm3
+
+// CHECK: psllw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf1,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psllw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psllw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf1,0x2d,0x45,0x00,0x00,0x00]
+        	psllw	0x45,%xmm5
+
+// CHECK: psllw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf1,0x2d,0xed,0x7e,0x00,0x00]
+        	psllw	0x7eed,%xmm5
+
+// CHECK: psllw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf1,0x2d,0xfe,0xca,0xbe,0xba]
+        	psllw	0xbabecafe,%xmm5
+
+// CHECK: psllw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf1,0x2d,0x78,0x56,0x34,0x12]
+        	psllw	0x12345678,%xmm5
+
+// CHECK: psllw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf1,0xed]
+        	psllw	%xmm5,%xmm5
+
+// CHECK: psllw	$127, %mm3
+// CHECK:  encoding: [0x0f,0x71,0xf3,0x7f]
+        	psllw	$0x7f,%mm3
+
+// CHECK: psllw	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x71,0xf5,0x7f]
+        	psllw	$0x7f,%xmm5
+
+// CHECK: pslld	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xf2,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pslld	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pslld	69, %mm3
+// CHECK:  encoding: [0x0f,0xf2,0x1d,0x45,0x00,0x00,0x00]
+        	pslld	0x45,%mm3
+
+// CHECK: pslld	32493, %mm3
+// CHECK:  encoding: [0x0f,0xf2,0x1d,0xed,0x7e,0x00,0x00]
+        	pslld	0x7eed,%mm3
+
+// CHECK: pslld	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xf2,0x1d,0xfe,0xca,0xbe,0xba]
+        	pslld	0xbabecafe,%mm3
+
+// CHECK: pslld	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xf2,0x1d,0x78,0x56,0x34,0x12]
+        	pslld	0x12345678,%mm3
+
+// CHECK: pslld	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf2,0xdb]
+        	pslld	%mm3,%mm3
+
+// CHECK: pslld	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf2,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pslld	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pslld	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf2,0x2d,0x45,0x00,0x00,0x00]
+        	pslld	0x45,%xmm5
+
+// CHECK: pslld	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf2,0x2d,0xed,0x7e,0x00,0x00]
+        	pslld	0x7eed,%xmm5
+
+// CHECK: pslld	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf2,0x2d,0xfe,0xca,0xbe,0xba]
+        	pslld	0xbabecafe,%xmm5
+
+// CHECK: pslld	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf2,0x2d,0x78,0x56,0x34,0x12]
+        	pslld	0x12345678,%xmm5
+
+// CHECK: pslld	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf2,0xed]
+        	pslld	%xmm5,%xmm5
+
+// CHECK: pslld	$127, %mm3
+// CHECK:  encoding: [0x0f,0x72,0xf3,0x7f]
+        	pslld	$0x7f,%mm3
+
+// CHECK: pslld	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x72,0xf5,0x7f]
+        	pslld	$0x7f,%xmm5
+
+// CHECK: psllq	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xf3,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psllq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psllq	69, %mm3
+// CHECK:  encoding: [0x0f,0xf3,0x1d,0x45,0x00,0x00,0x00]
+        	psllq	0x45,%mm3
+
+// CHECK: psllq	32493, %mm3
+// CHECK:  encoding: [0x0f,0xf3,0x1d,0xed,0x7e,0x00,0x00]
+        	psllq	0x7eed,%mm3
+
+// CHECK: psllq	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xf3,0x1d,0xfe,0xca,0xbe,0xba]
+        	psllq	0xbabecafe,%mm3
+
+// CHECK: psllq	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xf3,0x1d,0x78,0x56,0x34,0x12]
+        	psllq	0x12345678,%mm3
+
+// CHECK: psllq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf3,0xdb]
+        	psllq	%mm3,%mm3
+
+// CHECK: psllq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf3,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psllq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psllq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf3,0x2d,0x45,0x00,0x00,0x00]
+        	psllq	0x45,%xmm5
+
+// CHECK: psllq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf3,0x2d,0xed,0x7e,0x00,0x00]
+        	psllq	0x7eed,%xmm5
+
+// CHECK: psllq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf3,0x2d,0xfe,0xca,0xbe,0xba]
+        	psllq	0xbabecafe,%xmm5
+
+// CHECK: psllq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf3,0x2d,0x78,0x56,0x34,0x12]
+        	psllq	0x12345678,%xmm5
+
+// CHECK: psllq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf3,0xed]
+        	psllq	%xmm5,%xmm5
+
+// CHECK: psllq	$127, %mm3
+// CHECK:  encoding: [0x0f,0x73,0xf3,0x7f]
+        	psllq	$0x7f,%mm3
+
+// CHECK: psllq	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x73,0xf5,0x7f]
+        	psllq	$0x7f,%xmm5
+
+// CHECK: psraw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xe1,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psraw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psraw	69, %mm3
+// CHECK:  encoding: [0x0f,0xe1,0x1d,0x45,0x00,0x00,0x00]
+        	psraw	0x45,%mm3
+
+// CHECK: psraw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xe1,0x1d,0xed,0x7e,0x00,0x00]
+        	psraw	0x7eed,%mm3
+
+// CHECK: psraw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xe1,0x1d,0xfe,0xca,0xbe,0xba]
+        	psraw	0xbabecafe,%mm3
+
+// CHECK: psraw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xe1,0x1d,0x78,0x56,0x34,0x12]
+        	psraw	0x12345678,%mm3
+
+// CHECK: psraw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xe1,0xdb]
+        	psraw	%mm3,%mm3
+
+// CHECK: psraw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe1,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psraw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psraw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe1,0x2d,0x45,0x00,0x00,0x00]
+        	psraw	0x45,%xmm5
+
+// CHECK: psraw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe1,0x2d,0xed,0x7e,0x00,0x00]
+        	psraw	0x7eed,%xmm5
+
+// CHECK: psraw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe1,0x2d,0xfe,0xca,0xbe,0xba]
+        	psraw	0xbabecafe,%xmm5
+
+// CHECK: psraw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe1,0x2d,0x78,0x56,0x34,0x12]
+        	psraw	0x12345678,%xmm5
+
+// CHECK: psraw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe1,0xed]
+        	psraw	%xmm5,%xmm5
+
+// CHECK: psraw	$127, %mm3
+// CHECK:  encoding: [0x0f,0x71,0xe3,0x7f]
+        	psraw	$0x7f,%mm3
+
+// CHECK: psraw	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x71,0xe5,0x7f]
+        	psraw	$0x7f,%xmm5
+
+// CHECK: psrad	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xe2,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psrad	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psrad	69, %mm3
+// CHECK:  encoding: [0x0f,0xe2,0x1d,0x45,0x00,0x00,0x00]
+        	psrad	0x45,%mm3
+
+// CHECK: psrad	32493, %mm3
+// CHECK:  encoding: [0x0f,0xe2,0x1d,0xed,0x7e,0x00,0x00]
+        	psrad	0x7eed,%mm3
+
+// CHECK: psrad	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xe2,0x1d,0xfe,0xca,0xbe,0xba]
+        	psrad	0xbabecafe,%mm3
+
+// CHECK: psrad	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xe2,0x1d,0x78,0x56,0x34,0x12]
+        	psrad	0x12345678,%mm3
+
+// CHECK: psrad	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xe2,0xdb]
+        	psrad	%mm3,%mm3
+
+// CHECK: psrad	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe2,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psrad	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psrad	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe2,0x2d,0x45,0x00,0x00,0x00]
+        	psrad	0x45,%xmm5
+
+// CHECK: psrad	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe2,0x2d,0xed,0x7e,0x00,0x00]
+        	psrad	0x7eed,%xmm5
+
+// CHECK: psrad	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe2,0x2d,0xfe,0xca,0xbe,0xba]
+        	psrad	0xbabecafe,%xmm5
+
+// CHECK: psrad	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe2,0x2d,0x78,0x56,0x34,0x12]
+        	psrad	0x12345678,%xmm5
+
+// CHECK: psrad	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe2,0xed]
+        	psrad	%xmm5,%xmm5
+
+// CHECK: psrad	$127, %mm3
+// CHECK:  encoding: [0x0f,0x72,0xe3,0x7f]
+        	psrad	$0x7f,%mm3
+
+// CHECK: psrad	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x72,0xe5,0x7f]
+        	psrad	$0x7f,%xmm5
+
+// CHECK: psrlw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xd1,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psrlw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psrlw	69, %mm3
+// CHECK:  encoding: [0x0f,0xd1,0x1d,0x45,0x00,0x00,0x00]
+        	psrlw	0x45,%mm3
+
+// CHECK: psrlw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xd1,0x1d,0xed,0x7e,0x00,0x00]
+        	psrlw	0x7eed,%mm3
+
+// CHECK: psrlw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xd1,0x1d,0xfe,0xca,0xbe,0xba]
+        	psrlw	0xbabecafe,%mm3
+
+// CHECK: psrlw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xd1,0x1d,0x78,0x56,0x34,0x12]
+        	psrlw	0x12345678,%mm3
+
+// CHECK: psrlw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xd1,0xdb]
+        	psrlw	%mm3,%mm3
+
+// CHECK: psrlw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd1,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psrlw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psrlw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd1,0x2d,0x45,0x00,0x00,0x00]
+        	psrlw	0x45,%xmm5
+
+// CHECK: psrlw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd1,0x2d,0xed,0x7e,0x00,0x00]
+        	psrlw	0x7eed,%xmm5
+
+// CHECK: psrlw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd1,0x2d,0xfe,0xca,0xbe,0xba]
+        	psrlw	0xbabecafe,%xmm5
+
+// CHECK: psrlw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd1,0x2d,0x78,0x56,0x34,0x12]
+        	psrlw	0x12345678,%xmm5
+
+// CHECK: psrlw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd1,0xed]
+        	psrlw	%xmm5,%xmm5
+
+// CHECK: psrlw	$127, %mm3
+// CHECK:  encoding: [0x0f,0x71,0xd3,0x7f]
+        	psrlw	$0x7f,%mm3
+
+// CHECK: psrlw	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x71,0xd5,0x7f]
+        	psrlw	$0x7f,%xmm5
+
+// CHECK: psrld	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xd2,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psrld	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psrld	69, %mm3
+// CHECK:  encoding: [0x0f,0xd2,0x1d,0x45,0x00,0x00,0x00]
+        	psrld	0x45,%mm3
+
+// CHECK: psrld	32493, %mm3
+// CHECK:  encoding: [0x0f,0xd2,0x1d,0xed,0x7e,0x00,0x00]
+        	psrld	0x7eed,%mm3
+
+// CHECK: psrld	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xd2,0x1d,0xfe,0xca,0xbe,0xba]
+        	psrld	0xbabecafe,%mm3
+
+// CHECK: psrld	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xd2,0x1d,0x78,0x56,0x34,0x12]
+        	psrld	0x12345678,%mm3
+
+// CHECK: psrld	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xd2,0xdb]
+        	psrld	%mm3,%mm3
+
+// CHECK: psrld	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd2,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psrld	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psrld	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd2,0x2d,0x45,0x00,0x00,0x00]
+        	psrld	0x45,%xmm5
+
+// CHECK: psrld	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd2,0x2d,0xed,0x7e,0x00,0x00]
+        	psrld	0x7eed,%xmm5
+
+// CHECK: psrld	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd2,0x2d,0xfe,0xca,0xbe,0xba]
+        	psrld	0xbabecafe,%xmm5
+
+// CHECK: psrld	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd2,0x2d,0x78,0x56,0x34,0x12]
+        	psrld	0x12345678,%xmm5
+
+// CHECK: psrld	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd2,0xed]
+        	psrld	%xmm5,%xmm5
+
+// CHECK: psrld	$127, %mm3
+// CHECK:  encoding: [0x0f,0x72,0xd3,0x7f]
+        	psrld	$0x7f,%mm3
+
+// CHECK: psrld	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x72,0xd5,0x7f]
+        	psrld	$0x7f,%xmm5
+
+// CHECK: psrlq	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xd3,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psrlq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psrlq	69, %mm3
+// CHECK:  encoding: [0x0f,0xd3,0x1d,0x45,0x00,0x00,0x00]
+        	psrlq	0x45,%mm3
+
+// CHECK: psrlq	32493, %mm3
+// CHECK:  encoding: [0x0f,0xd3,0x1d,0xed,0x7e,0x00,0x00]
+        	psrlq	0x7eed,%mm3
+
+// CHECK: psrlq	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xd3,0x1d,0xfe,0xca,0xbe,0xba]
+        	psrlq	0xbabecafe,%mm3
+
+// CHECK: psrlq	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xd3,0x1d,0x78,0x56,0x34,0x12]
+        	psrlq	0x12345678,%mm3
+
+// CHECK: psrlq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xd3,0xdb]
+        	psrlq	%mm3,%mm3
+
+// CHECK: psrlq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd3,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psrlq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psrlq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd3,0x2d,0x45,0x00,0x00,0x00]
+        	psrlq	0x45,%xmm5
+
+// CHECK: psrlq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd3,0x2d,0xed,0x7e,0x00,0x00]
+        	psrlq	0x7eed,%xmm5
+
+// CHECK: psrlq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd3,0x2d,0xfe,0xca,0xbe,0xba]
+        	psrlq	0xbabecafe,%xmm5
+
+// CHECK: psrlq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd3,0x2d,0x78,0x56,0x34,0x12]
+        	psrlq	0x12345678,%xmm5
+
+// CHECK: psrlq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd3,0xed]
+        	psrlq	%xmm5,%xmm5
+
+// CHECK: psrlq	$127, %mm3
+// CHECK:  encoding: [0x0f,0x73,0xd3,0x7f]
+        	psrlq	$0x7f,%mm3
+
+// CHECK: psrlq	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x73,0xd5,0x7f]
+        	psrlq	$0x7f,%xmm5
+
+// CHECK: psubb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xf8,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psubb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psubb	69, %mm3
+// CHECK:  encoding: [0x0f,0xf8,0x1d,0x45,0x00,0x00,0x00]
+        	psubb	0x45,%mm3
+
+// CHECK: psubb	32493, %mm3
+// CHECK:  encoding: [0x0f,0xf8,0x1d,0xed,0x7e,0x00,0x00]
+        	psubb	0x7eed,%mm3
+
+// CHECK: psubb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xf8,0x1d,0xfe,0xca,0xbe,0xba]
+        	psubb	0xbabecafe,%mm3
+
+// CHECK: psubb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xf8,0x1d,0x78,0x56,0x34,0x12]
+        	psubb	0x12345678,%mm3
+
+// CHECK: psubb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf8,0xdb]
+        	psubb	%mm3,%mm3
+
+// CHECK: psubb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf8,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psubb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psubb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf8,0x2d,0x45,0x00,0x00,0x00]
+        	psubb	0x45,%xmm5
+
+// CHECK: psubb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf8,0x2d,0xed,0x7e,0x00,0x00]
+        	psubb	0x7eed,%xmm5
+
+// CHECK: psubb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf8,0x2d,0xfe,0xca,0xbe,0xba]
+        	psubb	0xbabecafe,%xmm5
+
+// CHECK: psubb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf8,0x2d,0x78,0x56,0x34,0x12]
+        	psubb	0x12345678,%xmm5
+
+// CHECK: psubb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf8,0xed]
+        	psubb	%xmm5,%xmm5
+
+// CHECK: psubw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xf9,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psubw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psubw	69, %mm3
+// CHECK:  encoding: [0x0f,0xf9,0x1d,0x45,0x00,0x00,0x00]
+        	psubw	0x45,%mm3
+
+// CHECK: psubw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xf9,0x1d,0xed,0x7e,0x00,0x00]
+        	psubw	0x7eed,%mm3
+
+// CHECK: psubw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xf9,0x1d,0xfe,0xca,0xbe,0xba]
+        	psubw	0xbabecafe,%mm3
+
+// CHECK: psubw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xf9,0x1d,0x78,0x56,0x34,0x12]
+        	psubw	0x12345678,%mm3
+
+// CHECK: psubw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf9,0xdb]
+        	psubw	%mm3,%mm3
+
+// CHECK: psubw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf9,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psubw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psubw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf9,0x2d,0x45,0x00,0x00,0x00]
+        	psubw	0x45,%xmm5
+
+// CHECK: psubw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf9,0x2d,0xed,0x7e,0x00,0x00]
+        	psubw	0x7eed,%xmm5
+
+// CHECK: psubw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf9,0x2d,0xfe,0xca,0xbe,0xba]
+        	psubw	0xbabecafe,%xmm5
+
+// CHECK: psubw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf9,0x2d,0x78,0x56,0x34,0x12]
+        	psubw	0x12345678,%xmm5
+
+// CHECK: psubw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf9,0xed]
+        	psubw	%xmm5,%xmm5
+
+// CHECK: psubd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xfa,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psubd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psubd	69, %mm3
+// CHECK:  encoding: [0x0f,0xfa,0x1d,0x45,0x00,0x00,0x00]
+        	psubd	0x45,%mm3
+
+// CHECK: psubd	32493, %mm3
+// CHECK:  encoding: [0x0f,0xfa,0x1d,0xed,0x7e,0x00,0x00]
+        	psubd	0x7eed,%mm3
+
+// CHECK: psubd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xfa,0x1d,0xfe,0xca,0xbe,0xba]
+        	psubd	0xbabecafe,%mm3
+
+// CHECK: psubd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xfa,0x1d,0x78,0x56,0x34,0x12]
+        	psubd	0x12345678,%mm3
+
+// CHECK: psubd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xfa,0xdb]
+        	psubd	%mm3,%mm3
+
+// CHECK: psubd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfa,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psubd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psubd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfa,0x2d,0x45,0x00,0x00,0x00]
+        	psubd	0x45,%xmm5
+
+// CHECK: psubd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfa,0x2d,0xed,0x7e,0x00,0x00]
+        	psubd	0x7eed,%xmm5
+
+// CHECK: psubd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfa,0x2d,0xfe,0xca,0xbe,0xba]
+        	psubd	0xbabecafe,%xmm5
+
+// CHECK: psubd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfa,0x2d,0x78,0x56,0x34,0x12]
+        	psubd	0x12345678,%xmm5
+
+// CHECK: psubd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfa,0xed]
+        	psubd	%xmm5,%xmm5
+
+// CHECK: psubq	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xfb,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psubq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psubq	69, %mm3
+// CHECK:  encoding: [0x0f,0xfb,0x1d,0x45,0x00,0x00,0x00]
+        	psubq	0x45,%mm3
+
+// CHECK: psubq	32493, %mm3
+// CHECK:  encoding: [0x0f,0xfb,0x1d,0xed,0x7e,0x00,0x00]
+        	psubq	0x7eed,%mm3
+
+// CHECK: psubq	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xfb,0x1d,0xfe,0xca,0xbe,0xba]
+        	psubq	0xbabecafe,%mm3
+
+// CHECK: psubq	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xfb,0x1d,0x78,0x56,0x34,0x12]
+        	psubq	0x12345678,%mm3
+
+// CHECK: psubq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xfb,0xdb]
+        	psubq	%mm3,%mm3
+
+// CHECK: psubq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfb,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psubq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psubq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfb,0x2d,0x45,0x00,0x00,0x00]
+        	psubq	0x45,%xmm5
+
+// CHECK: psubq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfb,0x2d,0xed,0x7e,0x00,0x00]
+        	psubq	0x7eed,%xmm5
+
+// CHECK: psubq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfb,0x2d,0xfe,0xca,0xbe,0xba]
+        	psubq	0xbabecafe,%xmm5
+
+// CHECK: psubq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfb,0x2d,0x78,0x56,0x34,0x12]
+        	psubq	0x12345678,%xmm5
+
+// CHECK: psubq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xfb,0xed]
+        	psubq	%xmm5,%xmm5
+
+// CHECK: psubsb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xe8,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psubsb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psubsb	69, %mm3
+// CHECK:  encoding: [0x0f,0xe8,0x1d,0x45,0x00,0x00,0x00]
+        	psubsb	0x45,%mm3
+
+// CHECK: psubsb	32493, %mm3
+// CHECK:  encoding: [0x0f,0xe8,0x1d,0xed,0x7e,0x00,0x00]
+        	psubsb	0x7eed,%mm3
+
+// CHECK: psubsb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xe8,0x1d,0xfe,0xca,0xbe,0xba]
+        	psubsb	0xbabecafe,%mm3
+
+// CHECK: psubsb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xe8,0x1d,0x78,0x56,0x34,0x12]
+        	psubsb	0x12345678,%mm3
+
+// CHECK: psubsb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xe8,0xdb]
+        	psubsb	%mm3,%mm3
+
+// CHECK: psubsb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe8,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psubsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psubsb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe8,0x2d,0x45,0x00,0x00,0x00]
+        	psubsb	0x45,%xmm5
+
+// CHECK: psubsb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe8,0x2d,0xed,0x7e,0x00,0x00]
+        	psubsb	0x7eed,%xmm5
+
+// CHECK: psubsb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe8,0x2d,0xfe,0xca,0xbe,0xba]
+        	psubsb	0xbabecafe,%xmm5
+
+// CHECK: psubsb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe8,0x2d,0x78,0x56,0x34,0x12]
+        	psubsb	0x12345678,%xmm5
+
+// CHECK: psubsb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe8,0xed]
+        	psubsb	%xmm5,%xmm5
+
+// CHECK: psubsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xe9,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psubsw	69, %mm3
+// CHECK:  encoding: [0x0f,0xe9,0x1d,0x45,0x00,0x00,0x00]
+        	psubsw	0x45,%mm3
+
+// CHECK: psubsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xe9,0x1d,0xed,0x7e,0x00,0x00]
+        	psubsw	0x7eed,%mm3
+
+// CHECK: psubsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xe9,0x1d,0xfe,0xca,0xbe,0xba]
+        	psubsw	0xbabecafe,%mm3
+
+// CHECK: psubsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xe9,0x1d,0x78,0x56,0x34,0x12]
+        	psubsw	0x12345678,%mm3
+
+// CHECK: psubsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xe9,0xdb]
+        	psubsw	%mm3,%mm3
+
+// CHECK: psubsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe9,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psubsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe9,0x2d,0x45,0x00,0x00,0x00]
+        	psubsw	0x45,%xmm5
+
+// CHECK: psubsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe9,0x2d,0xed,0x7e,0x00,0x00]
+        	psubsw	0x7eed,%xmm5
+
+// CHECK: psubsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe9,0x2d,0xfe,0xca,0xbe,0xba]
+        	psubsw	0xbabecafe,%xmm5
+
+// CHECK: psubsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe9,0x2d,0x78,0x56,0x34,0x12]
+        	psubsw	0x12345678,%xmm5
+
+// CHECK: psubsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe9,0xed]
+        	psubsw	%xmm5,%xmm5
+
+// CHECK: psubusb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xd8,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psubusb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psubusb	69, %mm3
+// CHECK:  encoding: [0x0f,0xd8,0x1d,0x45,0x00,0x00,0x00]
+        	psubusb	0x45,%mm3
+
+// CHECK: psubusb	32493, %mm3
+// CHECK:  encoding: [0x0f,0xd8,0x1d,0xed,0x7e,0x00,0x00]
+        	psubusb	0x7eed,%mm3
+
+// CHECK: psubusb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xd8,0x1d,0xfe,0xca,0xbe,0xba]
+        	psubusb	0xbabecafe,%mm3
+
+// CHECK: psubusb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xd8,0x1d,0x78,0x56,0x34,0x12]
+        	psubusb	0x12345678,%mm3
+
+// CHECK: psubusb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xd8,0xdb]
+        	psubusb	%mm3,%mm3
+
+// CHECK: psubusb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd8,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psubusb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psubusb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd8,0x2d,0x45,0x00,0x00,0x00]
+        	psubusb	0x45,%xmm5
+
+// CHECK: psubusb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd8,0x2d,0xed,0x7e,0x00,0x00]
+        	psubusb	0x7eed,%xmm5
+
+// CHECK: psubusb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd8,0x2d,0xfe,0xca,0xbe,0xba]
+        	psubusb	0xbabecafe,%xmm5
+
+// CHECK: psubusb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd8,0x2d,0x78,0x56,0x34,0x12]
+        	psubusb	0x12345678,%xmm5
+
+// CHECK: psubusb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd8,0xed]
+        	psubusb	%xmm5,%xmm5
+
+// CHECK: psubusw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xd9,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psubusw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psubusw	69, %mm3
+// CHECK:  encoding: [0x0f,0xd9,0x1d,0x45,0x00,0x00,0x00]
+        	psubusw	0x45,%mm3
+
+// CHECK: psubusw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xd9,0x1d,0xed,0x7e,0x00,0x00]
+        	psubusw	0x7eed,%mm3
+
+// CHECK: psubusw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xd9,0x1d,0xfe,0xca,0xbe,0xba]
+        	psubusw	0xbabecafe,%mm3
+
+// CHECK: psubusw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xd9,0x1d,0x78,0x56,0x34,0x12]
+        	psubusw	0x12345678,%mm3
+
+// CHECK: psubusw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xd9,0xdb]
+        	psubusw	%mm3,%mm3
+
+// CHECK: psubusw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd9,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psubusw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psubusw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd9,0x2d,0x45,0x00,0x00,0x00]
+        	psubusw	0x45,%xmm5
+
+// CHECK: psubusw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd9,0x2d,0xed,0x7e,0x00,0x00]
+        	psubusw	0x7eed,%xmm5
+
+// CHECK: psubusw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd9,0x2d,0xfe,0xca,0xbe,0xba]
+        	psubusw	0xbabecafe,%xmm5
+
+// CHECK: psubusw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd9,0x2d,0x78,0x56,0x34,0x12]
+        	psubusw	0x12345678,%xmm5
+
+// CHECK: psubusw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd9,0xed]
+        	psubusw	%xmm5,%xmm5
+
+// CHECK: punpckhbw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x68,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckhbw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: punpckhbw	69, %mm3
+// CHECK:  encoding: [0x0f,0x68,0x1d,0x45,0x00,0x00,0x00]
+        	punpckhbw	0x45,%mm3
+
+// CHECK: punpckhbw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x68,0x1d,0xed,0x7e,0x00,0x00]
+        	punpckhbw	0x7eed,%mm3
+
+// CHECK: punpckhbw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x68,0x1d,0xfe,0xca,0xbe,0xba]
+        	punpckhbw	0xbabecafe,%mm3
+
+// CHECK: punpckhbw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x68,0x1d,0x78,0x56,0x34,0x12]
+        	punpckhbw	0x12345678,%mm3
+
+// CHECK: punpckhbw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x68,0xdb]
+        	punpckhbw	%mm3,%mm3
+
+// CHECK: punpckhbw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x68,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckhbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: punpckhbw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x68,0x2d,0x45,0x00,0x00,0x00]
+        	punpckhbw	0x45,%xmm5
+
+// CHECK: punpckhbw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x68,0x2d,0xed,0x7e,0x00,0x00]
+        	punpckhbw	0x7eed,%xmm5
+
+// CHECK: punpckhbw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x68,0x2d,0xfe,0xca,0xbe,0xba]
+        	punpckhbw	0xbabecafe,%xmm5
+
+// CHECK: punpckhbw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x68,0x2d,0x78,0x56,0x34,0x12]
+        	punpckhbw	0x12345678,%xmm5
+
+// CHECK: punpckhbw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x68,0xed]
+        	punpckhbw	%xmm5,%xmm5
+
+// CHECK: punpckhwd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x69,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckhwd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: punpckhwd	69, %mm3
+// CHECK:  encoding: [0x0f,0x69,0x1d,0x45,0x00,0x00,0x00]
+        	punpckhwd	0x45,%mm3
+
+// CHECK: punpckhwd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x69,0x1d,0xed,0x7e,0x00,0x00]
+        	punpckhwd	0x7eed,%mm3
+
+// CHECK: punpckhwd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x69,0x1d,0xfe,0xca,0xbe,0xba]
+        	punpckhwd	0xbabecafe,%mm3
+
+// CHECK: punpckhwd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x69,0x1d,0x78,0x56,0x34,0x12]
+        	punpckhwd	0x12345678,%mm3
+
+// CHECK: punpckhwd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x69,0xdb]
+        	punpckhwd	%mm3,%mm3
+
+// CHECK: punpckhwd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x69,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckhwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: punpckhwd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x69,0x2d,0x45,0x00,0x00,0x00]
+        	punpckhwd	0x45,%xmm5
+
+// CHECK: punpckhwd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x69,0x2d,0xed,0x7e,0x00,0x00]
+        	punpckhwd	0x7eed,%xmm5
+
+// CHECK: punpckhwd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x69,0x2d,0xfe,0xca,0xbe,0xba]
+        	punpckhwd	0xbabecafe,%xmm5
+
+// CHECK: punpckhwd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x69,0x2d,0x78,0x56,0x34,0x12]
+        	punpckhwd	0x12345678,%xmm5
+
+// CHECK: punpckhwd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x69,0xed]
+        	punpckhwd	%xmm5,%xmm5
+
+// CHECK: punpckhdq	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x6a,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckhdq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: punpckhdq	69, %mm3
+// CHECK:  encoding: [0x0f,0x6a,0x1d,0x45,0x00,0x00,0x00]
+        	punpckhdq	0x45,%mm3
+
+// CHECK: punpckhdq	32493, %mm3
+// CHECK:  encoding: [0x0f,0x6a,0x1d,0xed,0x7e,0x00,0x00]
+        	punpckhdq	0x7eed,%mm3
+
+// CHECK: punpckhdq	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x6a,0x1d,0xfe,0xca,0xbe,0xba]
+        	punpckhdq	0xbabecafe,%mm3
+
+// CHECK: punpckhdq	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x6a,0x1d,0x78,0x56,0x34,0x12]
+        	punpckhdq	0x12345678,%mm3
+
+// CHECK: punpckhdq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x6a,0xdb]
+        	punpckhdq	%mm3,%mm3
+
+// CHECK: punpckhdq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckhdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: punpckhdq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6a,0x2d,0x45,0x00,0x00,0x00]
+        	punpckhdq	0x45,%xmm5
+
+// CHECK: punpckhdq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6a,0x2d,0xed,0x7e,0x00,0x00]
+        	punpckhdq	0x7eed,%xmm5
+
+// CHECK: punpckhdq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6a,0x2d,0xfe,0xca,0xbe,0xba]
+        	punpckhdq	0xbabecafe,%xmm5
+
+// CHECK: punpckhdq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6a,0x2d,0x78,0x56,0x34,0x12]
+        	punpckhdq	0x12345678,%xmm5
+
+// CHECK: punpckhdq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6a,0xed]
+        	punpckhdq	%xmm5,%xmm5
+
+// CHECK: punpcklbw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x60,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	punpcklbw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: punpcklbw	69, %mm3
+// CHECK:  encoding: [0x0f,0x60,0x1d,0x45,0x00,0x00,0x00]
+        	punpcklbw	0x45,%mm3
+
+// CHECK: punpcklbw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x60,0x1d,0xed,0x7e,0x00,0x00]
+        	punpcklbw	0x7eed,%mm3
+
+// CHECK: punpcklbw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x60,0x1d,0xfe,0xca,0xbe,0xba]
+        	punpcklbw	0xbabecafe,%mm3
+
+// CHECK: punpcklbw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x60,0x1d,0x78,0x56,0x34,0x12]
+        	punpcklbw	0x12345678,%mm3
+
+// CHECK: punpcklbw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x60,0xdb]
+        	punpcklbw	%mm3,%mm3
+
+// CHECK: punpcklbw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x60,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	punpcklbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: punpcklbw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x60,0x2d,0x45,0x00,0x00,0x00]
+        	punpcklbw	0x45,%xmm5
+
+// CHECK: punpcklbw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x60,0x2d,0xed,0x7e,0x00,0x00]
+        	punpcklbw	0x7eed,%xmm5
+
+// CHECK: punpcklbw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x60,0x2d,0xfe,0xca,0xbe,0xba]
+        	punpcklbw	0xbabecafe,%xmm5
+
+// CHECK: punpcklbw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x60,0x2d,0x78,0x56,0x34,0x12]
+        	punpcklbw	0x12345678,%xmm5
+
+// CHECK: punpcklbw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x60,0xed]
+        	punpcklbw	%xmm5,%xmm5
+
+// CHECK: punpcklwd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x61,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	punpcklwd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: punpcklwd	69, %mm3
+// CHECK:  encoding: [0x0f,0x61,0x1d,0x45,0x00,0x00,0x00]
+        	punpcklwd	0x45,%mm3
+
+// CHECK: punpcklwd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x61,0x1d,0xed,0x7e,0x00,0x00]
+        	punpcklwd	0x7eed,%mm3
+
+// CHECK: punpcklwd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x61,0x1d,0xfe,0xca,0xbe,0xba]
+        	punpcklwd	0xbabecafe,%mm3
+
+// CHECK: punpcklwd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x61,0x1d,0x78,0x56,0x34,0x12]
+        	punpcklwd	0x12345678,%mm3
+
+// CHECK: punpcklwd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x61,0xdb]
+        	punpcklwd	%mm3,%mm3
+
+// CHECK: punpcklwd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x61,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	punpcklwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: punpcklwd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x61,0x2d,0x45,0x00,0x00,0x00]
+        	punpcklwd	0x45,%xmm5
+
+// CHECK: punpcklwd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x61,0x2d,0xed,0x7e,0x00,0x00]
+        	punpcklwd	0x7eed,%xmm5
+
+// CHECK: punpcklwd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x61,0x2d,0xfe,0xca,0xbe,0xba]
+        	punpcklwd	0xbabecafe,%xmm5
+
+// CHECK: punpcklwd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x61,0x2d,0x78,0x56,0x34,0x12]
+        	punpcklwd	0x12345678,%xmm5
+
+// CHECK: punpcklwd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x61,0xed]
+        	punpcklwd	%xmm5,%xmm5
+
+// CHECK: punpckldq	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x62,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckldq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: punpckldq	69, %mm3
+// CHECK:  encoding: [0x0f,0x62,0x1d,0x45,0x00,0x00,0x00]
+        	punpckldq	0x45,%mm3
+
+// CHECK: punpckldq	32493, %mm3
+// CHECK:  encoding: [0x0f,0x62,0x1d,0xed,0x7e,0x00,0x00]
+        	punpckldq	0x7eed,%mm3
+
+// CHECK: punpckldq	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x62,0x1d,0xfe,0xca,0xbe,0xba]
+        	punpckldq	0xbabecafe,%mm3
+
+// CHECK: punpckldq	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x62,0x1d,0x78,0x56,0x34,0x12]
+        	punpckldq	0x12345678,%mm3
+
+// CHECK: punpckldq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x62,0xdb]
+        	punpckldq	%mm3,%mm3
+
+// CHECK: punpckldq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x62,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckldq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: punpckldq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x62,0x2d,0x45,0x00,0x00,0x00]
+        	punpckldq	0x45,%xmm5
+
+// CHECK: punpckldq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x62,0x2d,0xed,0x7e,0x00,0x00]
+        	punpckldq	0x7eed,%xmm5
+
+// CHECK: punpckldq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x62,0x2d,0xfe,0xca,0xbe,0xba]
+        	punpckldq	0xbabecafe,%xmm5
+
+// CHECK: punpckldq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x62,0x2d,0x78,0x56,0x34,0x12]
+        	punpckldq	0x12345678,%xmm5
+
+// CHECK: punpckldq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x62,0xed]
+        	punpckldq	%xmm5,%xmm5
+
+// CHECK: pxor	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xef,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pxor	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pxor	69, %mm3
+// CHECK:  encoding: [0x0f,0xef,0x1d,0x45,0x00,0x00,0x00]
+        	pxor	0x45,%mm3
+
+// CHECK: pxor	32493, %mm3
+// CHECK:  encoding: [0x0f,0xef,0x1d,0xed,0x7e,0x00,0x00]
+        	pxor	0x7eed,%mm3
+
+// CHECK: pxor	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xef,0x1d,0xfe,0xca,0xbe,0xba]
+        	pxor	0xbabecafe,%mm3
+
+// CHECK: pxor	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xef,0x1d,0x78,0x56,0x34,0x12]
+        	pxor	0x12345678,%mm3
+
+// CHECK: pxor	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xef,0xdb]
+        	pxor	%mm3,%mm3
+
+// CHECK: pxor	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xef,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pxor	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pxor	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xef,0x2d,0x45,0x00,0x00,0x00]
+        	pxor	0x45,%xmm5
+
+// CHECK: pxor	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xef,0x2d,0xed,0x7e,0x00,0x00]
+        	pxor	0x7eed,%xmm5
+
+// CHECK: pxor	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xef,0x2d,0xfe,0xca,0xbe,0xba]
+        	pxor	0xbabecafe,%xmm5
+
+// CHECK: pxor	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xef,0x2d,0x78,0x56,0x34,0x12]
+        	pxor	0x12345678,%xmm5
+
+// CHECK: pxor	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xef,0xed]
+        	pxor	%xmm5,%xmm5
+
+// CHECK: addps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	addps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: addps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x58,0x2d,0x45,0x00,0x00,0x00]
+        	addps	0x45,%xmm5
+
+// CHECK: addps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x58,0x2d,0xed,0x7e,0x00,0x00]
+        	addps	0x7eed,%xmm5
+
+// CHECK: addps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x58,0x2d,0xfe,0xca,0xbe,0xba]
+        	addps	0xbabecafe,%xmm5
+
+// CHECK: addps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x58,0x2d,0x78,0x56,0x34,0x12]
+        	addps	0x12345678,%xmm5
+
+// CHECK: addps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x58,0xed]
+        	addps	%xmm5,%xmm5
+
+// CHECK: addss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	addss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: addss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x58,0x2d,0x45,0x00,0x00,0x00]
+        	addss	0x45,%xmm5
+
+// CHECK: addss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x58,0x2d,0xed,0x7e,0x00,0x00]
+        	addss	0x7eed,%xmm5
+
+// CHECK: addss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x58,0x2d,0xfe,0xca,0xbe,0xba]
+        	addss	0xbabecafe,%xmm5
+
+// CHECK: addss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x58,0x2d,0x78,0x56,0x34,0x12]
+        	addss	0x12345678,%xmm5
+
+// CHECK: addss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x58,0xed]
+        	addss	%xmm5,%xmm5
+
+// CHECK: andnps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x55,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	andnps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: andnps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x55,0x2d,0x45,0x00,0x00,0x00]
+        	andnps	0x45,%xmm5
+
+// CHECK: andnps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x55,0x2d,0xed,0x7e,0x00,0x00]
+        	andnps	0x7eed,%xmm5
+
+// CHECK: andnps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x55,0x2d,0xfe,0xca,0xbe,0xba]
+        	andnps	0xbabecafe,%xmm5
+
+// CHECK: andnps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x55,0x2d,0x78,0x56,0x34,0x12]
+        	andnps	0x12345678,%xmm5
+
+// CHECK: andnps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x55,0xed]
+        	andnps	%xmm5,%xmm5
+
+// CHECK: andps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x54,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	andps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: andps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x54,0x2d,0x45,0x00,0x00,0x00]
+        	andps	0x45,%xmm5
+
+// CHECK: andps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x54,0x2d,0xed,0x7e,0x00,0x00]
+        	andps	0x7eed,%xmm5
+
+// CHECK: andps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x54,0x2d,0xfe,0xca,0xbe,0xba]
+        	andps	0xbabecafe,%xmm5
+
+// CHECK: andps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x54,0x2d,0x78,0x56,0x34,0x12]
+        	andps	0x12345678,%xmm5
+
+// CHECK: andps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x54,0xed]
+        	andps	%xmm5,%xmm5
+
+// CHECK: comiss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x2f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	comiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: comiss	69, %xmm5
+// CHECK:  encoding: [0x0f,0x2f,0x2d,0x45,0x00,0x00,0x00]
+        	comiss	0x45,%xmm5
+
+// CHECK: comiss	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x2f,0x2d,0xed,0x7e,0x00,0x00]
+        	comiss	0x7eed,%xmm5
+
+// CHECK: comiss	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x2f,0x2d,0xfe,0xca,0xbe,0xba]
+        	comiss	0xbabecafe,%xmm5
+
+// CHECK: comiss	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x2f,0x2d,0x78,0x56,0x34,0x12]
+        	comiss	0x12345678,%xmm5
+
+// CHECK: comiss	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x2f,0xed]
+        	comiss	%xmm5,%xmm5
+
+// CHECK: cvtpi2ps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtpi2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtpi2ps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x2a,0x2d,0x45,0x00,0x00,0x00]
+        	cvtpi2ps	0x45,%xmm5
+
+// CHECK: cvtpi2ps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x2a,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtpi2ps	0x7eed,%xmm5
+
+// CHECK: cvtpi2ps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtpi2ps	0xbabecafe,%xmm5
+
+// CHECK: cvtpi2ps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x2a,0x2d,0x78,0x56,0x34,0x12]
+        	cvtpi2ps	0x12345678,%xmm5
+
+// CHECK: cvtpi2ps	%mm3, %xmm5
+// CHECK:  encoding: [0x0f,0x2a,0xeb]
+        	cvtpi2ps	%mm3,%xmm5
+
+// CHECK: cvtps2pi	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x2d,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: cvtps2pi	69, %mm3
+// CHECK:  encoding: [0x0f,0x2d,0x1d,0x45,0x00,0x00,0x00]
+        	cvtps2pi	0x45,%mm3
+
+// CHECK: cvtps2pi	32493, %mm3
+// CHECK:  encoding: [0x0f,0x2d,0x1d,0xed,0x7e,0x00,0x00]
+        	cvtps2pi	0x7eed,%mm3
+
+// CHECK: cvtps2pi	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x2d,0x1d,0xfe,0xca,0xbe,0xba]
+        	cvtps2pi	0xbabecafe,%mm3
+
+// CHECK: cvtps2pi	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x2d,0x1d,0x78,0x56,0x34,0x12]
+        	cvtps2pi	0x12345678,%mm3
+
+// CHECK: cvtps2pi	%xmm5, %mm3
+// CHECK:  encoding: [0x0f,0x2d,0xdd]
+        	cvtps2pi	%xmm5,%mm3
+
+// CHECK: cvtsi2ss	%ecx, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x2a,0xe9]
+        	cvtsi2ss	%ecx,%xmm5
+
+// CHECK: cvtsi2ss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtsi2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtsi2ss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x2a,0x2d,0x45,0x00,0x00,0x00]
+        	cvtsi2ss	0x45,%xmm5
+
+// CHECK: cvtsi2ss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x2a,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtsi2ss	0x7eed,%xmm5
+
+// CHECK: cvtsi2ss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtsi2ss	0xbabecafe,%xmm5
+
+// CHECK: cvtsi2ss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x2a,0x2d,0x78,0x56,0x34,0x12]
+        	cvtsi2ss	0x12345678,%xmm5
+
+// CHECK: cvttps2pi	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x2c,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	cvttps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: cvttps2pi	69, %mm3
+// CHECK:  encoding: [0x0f,0x2c,0x1d,0x45,0x00,0x00,0x00]
+        	cvttps2pi	0x45,%mm3
+
+// CHECK: cvttps2pi	32493, %mm3
+// CHECK:  encoding: [0x0f,0x2c,0x1d,0xed,0x7e,0x00,0x00]
+        	cvttps2pi	0x7eed,%mm3
+
+// CHECK: cvttps2pi	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x2c,0x1d,0xfe,0xca,0xbe,0xba]
+        	cvttps2pi	0xbabecafe,%mm3
+
+// CHECK: cvttps2pi	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x2c,0x1d,0x78,0x56,0x34,0x12]
+        	cvttps2pi	0x12345678,%mm3
+
+// CHECK: cvttps2pi	%xmm5, %mm3
+// CHECK:  encoding: [0x0f,0x2c,0xdd]
+        	cvttps2pi	%xmm5,%mm3
+
+// CHECK: cvttss2si	3735928559(%ebx,%ecx,8), %ecx
+// CHECK:  encoding: [0xf3,0x0f,0x2c,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	cvttss2si	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: cvttss2si	69, %ecx
+// CHECK:  encoding: [0xf3,0x0f,0x2c,0x0d,0x45,0x00,0x00,0x00]
+        	cvttss2si	0x45,%ecx
+
+// CHECK: cvttss2si	32493, %ecx
+// CHECK:  encoding: [0xf3,0x0f,0x2c,0x0d,0xed,0x7e,0x00,0x00]
+        	cvttss2si	0x7eed,%ecx
+
+// CHECK: cvttss2si	3133065982, %ecx
+// CHECK:  encoding: [0xf3,0x0f,0x2c,0x0d,0xfe,0xca,0xbe,0xba]
+        	cvttss2si	0xbabecafe,%ecx
+
+// CHECK: cvttss2si	305419896, %ecx
+// CHECK:  encoding: [0xf3,0x0f,0x2c,0x0d,0x78,0x56,0x34,0x12]
+        	cvttss2si	0x12345678,%ecx
+
+// CHECK: cvttss2si	%xmm5, %ecx
+// CHECK:  encoding: [0xf3,0x0f,0x2c,0xcd]
+        	cvttss2si	%xmm5,%ecx
+
+// CHECK: divps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	divps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: divps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x5e,0x2d,0x45,0x00,0x00,0x00]
+        	divps	0x45,%xmm5
+
+// CHECK: divps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x5e,0x2d,0xed,0x7e,0x00,0x00]
+        	divps	0x7eed,%xmm5
+
+// CHECK: divps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x5e,0x2d,0xfe,0xca,0xbe,0xba]
+        	divps	0xbabecafe,%xmm5
+
+// CHECK: divps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x5e,0x2d,0x78,0x56,0x34,0x12]
+        	divps	0x12345678,%xmm5
+
+// CHECK: divps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x5e,0xed]
+        	divps	%xmm5,%xmm5
+
+// CHECK: divss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	divss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: divss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5e,0x2d,0x45,0x00,0x00,0x00]
+        	divss	0x45,%xmm5
+
+// CHECK: divss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5e,0x2d,0xed,0x7e,0x00,0x00]
+        	divss	0x7eed,%xmm5
+
+// CHECK: divss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5e,0x2d,0xfe,0xca,0xbe,0xba]
+        	divss	0xbabecafe,%xmm5
+
+// CHECK: divss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5e,0x2d,0x78,0x56,0x34,0x12]
+        	divss	0x12345678,%xmm5
+
+// CHECK: divss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5e,0xed]
+        	divss	%xmm5,%xmm5
+
+// CHECK: ldmxcsr	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xae,0x94,0xcb,0xef,0xbe,0xad,0xde]
+        	ldmxcsr	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: ldmxcsr	32493
+// CHECK:  encoding: [0x0f,0xae,0x15,0xed,0x7e,0x00,0x00]
+        	ldmxcsr	0x7eed
+
+// CHECK: ldmxcsr	3133065982
+// CHECK:  encoding: [0x0f,0xae,0x15,0xfe,0xca,0xbe,0xba]
+        	ldmxcsr	0xbabecafe
+
+// CHECK: ldmxcsr	305419896
+// CHECK:  encoding: [0x0f,0xae,0x15,0x78,0x56,0x34,0x12]
+        	ldmxcsr	0x12345678
+
+// CHECK: maskmovq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf7,0xdb]
+        	maskmovq	%mm3,%mm3
+
+// CHECK: maxps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x5f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	maxps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: maxps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x5f,0x2d,0x45,0x00,0x00,0x00]
+        	maxps	0x45,%xmm5
+
+// CHECK: maxps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x5f,0x2d,0xed,0x7e,0x00,0x00]
+        	maxps	0x7eed,%xmm5
+
+// CHECK: maxps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x5f,0x2d,0xfe,0xca,0xbe,0xba]
+        	maxps	0xbabecafe,%xmm5
+
+// CHECK: maxps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x5f,0x2d,0x78,0x56,0x34,0x12]
+        	maxps	0x12345678,%xmm5
+
+// CHECK: maxps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x5f,0xed]
+        	maxps	%xmm5,%xmm5
+
+// CHECK: maxss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	maxss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: maxss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5f,0x2d,0x45,0x00,0x00,0x00]
+        	maxss	0x45,%xmm5
+
+// CHECK: maxss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5f,0x2d,0xed,0x7e,0x00,0x00]
+        	maxss	0x7eed,%xmm5
+
+// CHECK: maxss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5f,0x2d,0xfe,0xca,0xbe,0xba]
+        	maxss	0xbabecafe,%xmm5
+
+// CHECK: maxss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5f,0x2d,0x78,0x56,0x34,0x12]
+        	maxss	0x12345678,%xmm5
+
+// CHECK: maxss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5f,0xed]
+        	maxss	%xmm5,%xmm5
+
+// CHECK: minps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x5d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	minps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: minps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x5d,0x2d,0x45,0x00,0x00,0x00]
+        	minps	0x45,%xmm5
+
+// CHECK: minps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x5d,0x2d,0xed,0x7e,0x00,0x00]
+        	minps	0x7eed,%xmm5
+
+// CHECK: minps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x5d,0x2d,0xfe,0xca,0xbe,0xba]
+        	minps	0xbabecafe,%xmm5
+
+// CHECK: minps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x5d,0x2d,0x78,0x56,0x34,0x12]
+        	minps	0x12345678,%xmm5
+
+// CHECK: minps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x5d,0xed]
+        	minps	%xmm5,%xmm5
+
+// CHECK: minss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	minss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: minss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5d,0x2d,0x45,0x00,0x00,0x00]
+        	minss	0x45,%xmm5
+
+// CHECK: minss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5d,0x2d,0xed,0x7e,0x00,0x00]
+        	minss	0x7eed,%xmm5
+
+// CHECK: minss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5d,0x2d,0xfe,0xca,0xbe,0xba]
+        	minss	0xbabecafe,%xmm5
+
+// CHECK: minss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5d,0x2d,0x78,0x56,0x34,0x12]
+        	minss	0x12345678,%xmm5
+
+// CHECK: minss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5d,0xed]
+        	minss	%xmm5,%xmm5
+
+// CHECK: movaps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x28,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movaps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movaps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x28,0x2d,0x45,0x00,0x00,0x00]
+        	movaps	0x45,%xmm5
+
+// CHECK: movaps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x28,0x2d,0xed,0x7e,0x00,0x00]
+        	movaps	0x7eed,%xmm5
+
+// CHECK: movaps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x28,0x2d,0xfe,0xca,0xbe,0xba]
+        	movaps	0xbabecafe,%xmm5
+
+// CHECK: movaps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x28,0x2d,0x78,0x56,0x34,0x12]
+        	movaps	0x12345678,%xmm5
+
+// CHECK: movaps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x28,0xed]
+        	movaps	%xmm5,%xmm5
+
+// CHECK: movaps	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x29,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movaps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movaps	%xmm5, 69
+// CHECK:  encoding: [0x0f,0x29,0x2d,0x45,0x00,0x00,0x00]
+        	movaps	%xmm5,0x45
+
+// CHECK: movaps	%xmm5, 32493
+// CHECK:  encoding: [0x0f,0x29,0x2d,0xed,0x7e,0x00,0x00]
+        	movaps	%xmm5,0x7eed
+
+// CHECK: movaps	%xmm5, 3133065982
+// CHECK:  encoding: [0x0f,0x29,0x2d,0xfe,0xca,0xbe,0xba]
+        	movaps	%xmm5,0xbabecafe
+
+// CHECK: movaps	%xmm5, 305419896
+// CHECK:  encoding: [0x0f,0x29,0x2d,0x78,0x56,0x34,0x12]
+        	movaps	%xmm5,0x12345678
+
+// CHECK: movaps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x28,0xed]
+        	movaps	%xmm5,%xmm5
+
+// CHECK: movhlps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x12,0xed]
+        	movhlps	%xmm5,%xmm5
+
+// CHECK: movhps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x16,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movhps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movhps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x16,0x2d,0x45,0x00,0x00,0x00]
+        	movhps	0x45,%xmm5
+
+// CHECK: movhps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x16,0x2d,0xed,0x7e,0x00,0x00]
+        	movhps	0x7eed,%xmm5
+
+// CHECK: movhps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x16,0x2d,0xfe,0xca,0xbe,0xba]
+        	movhps	0xbabecafe,%xmm5
+
+// CHECK: movhps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x16,0x2d,0x78,0x56,0x34,0x12]
+        	movhps	0x12345678,%xmm5
+
+// CHECK: movhps	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x17,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movhps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movhps	%xmm5, 69
+// CHECK:  encoding: [0x0f,0x17,0x2d,0x45,0x00,0x00,0x00]
+        	movhps	%xmm5,0x45
+
+// CHECK: movhps	%xmm5, 32493
+// CHECK:  encoding: [0x0f,0x17,0x2d,0xed,0x7e,0x00,0x00]
+        	movhps	%xmm5,0x7eed
+
+// CHECK: movhps	%xmm5, 3133065982
+// CHECK:  encoding: [0x0f,0x17,0x2d,0xfe,0xca,0xbe,0xba]
+        	movhps	%xmm5,0xbabecafe
+
+// CHECK: movhps	%xmm5, 305419896
+// CHECK:  encoding: [0x0f,0x17,0x2d,0x78,0x56,0x34,0x12]
+        	movhps	%xmm5,0x12345678
+
+// CHECK: movlhps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x16,0xed]
+        	movlhps	%xmm5,%xmm5
+
+// CHECK: movlps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x12,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movlps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movlps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x12,0x2d,0x45,0x00,0x00,0x00]
+        	movlps	0x45,%xmm5
+
+// CHECK: movlps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x12,0x2d,0xed,0x7e,0x00,0x00]
+        	movlps	0x7eed,%xmm5
+
+// CHECK: movlps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x12,0x2d,0xfe,0xca,0xbe,0xba]
+        	movlps	0xbabecafe,%xmm5
+
+// CHECK: movlps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x12,0x2d,0x78,0x56,0x34,0x12]
+        	movlps	0x12345678,%xmm5
+
+// CHECK: movlps	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x13,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movlps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movlps	%xmm5, 69
+// CHECK:  encoding: [0x0f,0x13,0x2d,0x45,0x00,0x00,0x00]
+        	movlps	%xmm5,0x45
+
+// CHECK: movlps	%xmm5, 32493
+// CHECK:  encoding: [0x0f,0x13,0x2d,0xed,0x7e,0x00,0x00]
+        	movlps	%xmm5,0x7eed
+
+// CHECK: movlps	%xmm5, 3133065982
+// CHECK:  encoding: [0x0f,0x13,0x2d,0xfe,0xca,0xbe,0xba]
+        	movlps	%xmm5,0xbabecafe
+
+// CHECK: movlps	%xmm5, 305419896
+// CHECK:  encoding: [0x0f,0x13,0x2d,0x78,0x56,0x34,0x12]
+        	movlps	%xmm5,0x12345678
+
+// CHECK: movmskps	%xmm5, %ecx
+// CHECK:  encoding: [0x0f,0x50,0xcd]
+        	movmskps	%xmm5,%ecx
+
+// CHECK: movntps	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x2b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movntps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movntps	%xmm5, 69
+// CHECK:  encoding: [0x0f,0x2b,0x2d,0x45,0x00,0x00,0x00]
+        	movntps	%xmm5,0x45
+
+// CHECK: movntps	%xmm5, 32493
+// CHECK:  encoding: [0x0f,0x2b,0x2d,0xed,0x7e,0x00,0x00]
+        	movntps	%xmm5,0x7eed
+
+// CHECK: movntps	%xmm5, 3133065982
+// CHECK:  encoding: [0x0f,0x2b,0x2d,0xfe,0xca,0xbe,0xba]
+        	movntps	%xmm5,0xbabecafe
+
+// CHECK: movntps	%xmm5, 305419896
+// CHECK:  encoding: [0x0f,0x2b,0x2d,0x78,0x56,0x34,0x12]
+        	movntps	%xmm5,0x12345678
+
+// CHECK: movntq	%mm3, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xe7,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	movntq	%mm3,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movntq	%mm3, 69
+// CHECK:  encoding: [0x0f,0xe7,0x1d,0x45,0x00,0x00,0x00]
+        	movntq	%mm3,0x45
+
+// CHECK: movntq	%mm3, 32493
+// CHECK:  encoding: [0x0f,0xe7,0x1d,0xed,0x7e,0x00,0x00]
+        	movntq	%mm3,0x7eed
+
+// CHECK: movntq	%mm3, 3133065982
+// CHECK:  encoding: [0x0f,0xe7,0x1d,0xfe,0xca,0xbe,0xba]
+        	movntq	%mm3,0xbabecafe
+
+// CHECK: movntq	%mm3, 305419896
+// CHECK:  encoding: [0x0f,0xe7,0x1d,0x78,0x56,0x34,0x12]
+        	movntq	%mm3,0x12345678
+
+// CHECK: movntdq	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0xe7,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movntdq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movntdq	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0xe7,0x2d,0x45,0x00,0x00,0x00]
+        	movntdq	%xmm5,0x45
+
+// CHECK: movntdq	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0xe7,0x2d,0xed,0x7e,0x00,0x00]
+        	movntdq	%xmm5,0x7eed
+
+// CHECK: movntdq	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0xe7,0x2d,0xfe,0xca,0xbe,0xba]
+        	movntdq	%xmm5,0xbabecafe
+
+// CHECK: movntdq	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0xe7,0x2d,0x78,0x56,0x34,0x12]
+        	movntdq	%xmm5,0x12345678
+
+// CHECK: movss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x10,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x10,0x2d,0x45,0x00,0x00,0x00]
+        	movss	0x45,%xmm5
+
+// CHECK: movss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x10,0x2d,0xed,0x7e,0x00,0x00]
+        	movss	0x7eed,%xmm5
+
+// CHECK: movss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x10,0x2d,0xfe,0xca,0xbe,0xba]
+        	movss	0xbabecafe,%xmm5
+
+// CHECK: movss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x10,0x2d,0x78,0x56,0x34,0x12]
+        	movss	0x12345678,%xmm5
+
+// CHECK: movss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x10,0xed]
+        	movss	%xmm5,%xmm5
+
+// CHECK: movss	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf3,0x0f,0x11,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movss	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movss	%xmm5, 69
+// CHECK:  encoding: [0xf3,0x0f,0x11,0x2d,0x45,0x00,0x00,0x00]
+        	movss	%xmm5,0x45
+
+// CHECK: movss	%xmm5, 32493
+// CHECK:  encoding: [0xf3,0x0f,0x11,0x2d,0xed,0x7e,0x00,0x00]
+        	movss	%xmm5,0x7eed
+
+// CHECK: movss	%xmm5, 3133065982
+// CHECK:  encoding: [0xf3,0x0f,0x11,0x2d,0xfe,0xca,0xbe,0xba]
+        	movss	%xmm5,0xbabecafe
+
+// CHECK: movss	%xmm5, 305419896
+// CHECK:  encoding: [0xf3,0x0f,0x11,0x2d,0x78,0x56,0x34,0x12]
+        	movss	%xmm5,0x12345678
+
+// CHECK: movss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x10,0xed]
+        	movss	%xmm5,%xmm5
+
+// CHECK: movups	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x10,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movups	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movups	69, %xmm5
+// CHECK:  encoding: [0x0f,0x10,0x2d,0x45,0x00,0x00,0x00]
+        	movups	0x45,%xmm5
+
+// CHECK: movups	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x10,0x2d,0xed,0x7e,0x00,0x00]
+        	movups	0x7eed,%xmm5
+
+// CHECK: movups	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x10,0x2d,0xfe,0xca,0xbe,0xba]
+        	movups	0xbabecafe,%xmm5
+
+// CHECK: movups	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x10,0x2d,0x78,0x56,0x34,0x12]
+        	movups	0x12345678,%xmm5
+
+// CHECK: movups	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x10,0xed]
+        	movups	%xmm5,%xmm5
+
+// CHECK: movups	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x11,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movups	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movups	%xmm5, 69
+// CHECK:  encoding: [0x0f,0x11,0x2d,0x45,0x00,0x00,0x00]
+        	movups	%xmm5,0x45
+
+// CHECK: movups	%xmm5, 32493
+// CHECK:  encoding: [0x0f,0x11,0x2d,0xed,0x7e,0x00,0x00]
+        	movups	%xmm5,0x7eed
+
+// CHECK: movups	%xmm5, 3133065982
+// CHECK:  encoding: [0x0f,0x11,0x2d,0xfe,0xca,0xbe,0xba]
+        	movups	%xmm5,0xbabecafe
+
+// CHECK: movups	%xmm5, 305419896
+// CHECK:  encoding: [0x0f,0x11,0x2d,0x78,0x56,0x34,0x12]
+        	movups	%xmm5,0x12345678
+
+// CHECK: movups	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x10,0xed]
+        	movups	%xmm5,%xmm5
+
+// CHECK: mulps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	mulps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: mulps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x59,0x2d,0x45,0x00,0x00,0x00]
+        	mulps	0x45,%xmm5
+
+// CHECK: mulps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x59,0x2d,0xed,0x7e,0x00,0x00]
+        	mulps	0x7eed,%xmm5
+
+// CHECK: mulps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x59,0x2d,0xfe,0xca,0xbe,0xba]
+        	mulps	0xbabecafe,%xmm5
+
+// CHECK: mulps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x59,0x2d,0x78,0x56,0x34,0x12]
+        	mulps	0x12345678,%xmm5
+
+// CHECK: mulps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x59,0xed]
+        	mulps	%xmm5,%xmm5
+
+// CHECK: mulss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	mulss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: mulss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x59,0x2d,0x45,0x00,0x00,0x00]
+        	mulss	0x45,%xmm5
+
+// CHECK: mulss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x59,0x2d,0xed,0x7e,0x00,0x00]
+        	mulss	0x7eed,%xmm5
+
+// CHECK: mulss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x59,0x2d,0xfe,0xca,0xbe,0xba]
+        	mulss	0xbabecafe,%xmm5
+
+// CHECK: mulss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x59,0x2d,0x78,0x56,0x34,0x12]
+        	mulss	0x12345678,%xmm5
+
+// CHECK: mulss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x59,0xed]
+        	mulss	%xmm5,%xmm5
+
+// CHECK: orps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x56,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	orps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: orps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x56,0x2d,0x45,0x00,0x00,0x00]
+        	orps	0x45,%xmm5
+
+// CHECK: orps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x56,0x2d,0xed,0x7e,0x00,0x00]
+        	orps	0x7eed,%xmm5
+
+// CHECK: orps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x56,0x2d,0xfe,0xca,0xbe,0xba]
+        	orps	0xbabecafe,%xmm5
+
+// CHECK: orps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x56,0x2d,0x78,0x56,0x34,0x12]
+        	orps	0x12345678,%xmm5
+
+// CHECK: orps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x56,0xed]
+        	orps	%xmm5,%xmm5
+
+// CHECK: pavgb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xe0,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pavgb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pavgb	69, %mm3
+// CHECK:  encoding: [0x0f,0xe0,0x1d,0x45,0x00,0x00,0x00]
+        	pavgb	0x45,%mm3
+
+// CHECK: pavgb	32493, %mm3
+// CHECK:  encoding: [0x0f,0xe0,0x1d,0xed,0x7e,0x00,0x00]
+        	pavgb	0x7eed,%mm3
+
+// CHECK: pavgb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xe0,0x1d,0xfe,0xca,0xbe,0xba]
+        	pavgb	0xbabecafe,%mm3
+
+// CHECK: pavgb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xe0,0x1d,0x78,0x56,0x34,0x12]
+        	pavgb	0x12345678,%mm3
+
+// CHECK: pavgb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xe0,0xdb]
+        	pavgb	%mm3,%mm3
+
+// CHECK: pavgb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe0,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pavgb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pavgb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe0,0x2d,0x45,0x00,0x00,0x00]
+        	pavgb	0x45,%xmm5
+
+// CHECK: pavgb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe0,0x2d,0xed,0x7e,0x00,0x00]
+        	pavgb	0x7eed,%xmm5
+
+// CHECK: pavgb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe0,0x2d,0xfe,0xca,0xbe,0xba]
+        	pavgb	0xbabecafe,%xmm5
+
+// CHECK: pavgb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe0,0x2d,0x78,0x56,0x34,0x12]
+        	pavgb	0x12345678,%xmm5
+
+// CHECK: pavgb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe0,0xed]
+        	pavgb	%xmm5,%xmm5
+
+// CHECK: pavgw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xe3,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pavgw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pavgw	69, %mm3
+// CHECK:  encoding: [0x0f,0xe3,0x1d,0x45,0x00,0x00,0x00]
+        	pavgw	0x45,%mm3
+
+// CHECK: pavgw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xe3,0x1d,0xed,0x7e,0x00,0x00]
+        	pavgw	0x7eed,%mm3
+
+// CHECK: pavgw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xe3,0x1d,0xfe,0xca,0xbe,0xba]
+        	pavgw	0xbabecafe,%mm3
+
+// CHECK: pavgw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xe3,0x1d,0x78,0x56,0x34,0x12]
+        	pavgw	0x12345678,%mm3
+
+// CHECK: pavgw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xe3,0xdb]
+        	pavgw	%mm3,%mm3
+
+// CHECK: pavgw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe3,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pavgw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pavgw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe3,0x2d,0x45,0x00,0x00,0x00]
+        	pavgw	0x45,%xmm5
+
+// CHECK: pavgw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe3,0x2d,0xed,0x7e,0x00,0x00]
+        	pavgw	0x7eed,%xmm5
+
+// CHECK: pavgw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe3,0x2d,0xfe,0xca,0xbe,0xba]
+        	pavgw	0xbabecafe,%xmm5
+
+// CHECK: pavgw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe3,0x2d,0x78,0x56,0x34,0x12]
+        	pavgw	0x12345678,%xmm5
+
+// CHECK: pavgw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe3,0xed]
+        	pavgw	%xmm5,%xmm5
+
+// CHECK: pmaxsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xee,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaxsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmaxsw	69, %mm3
+// CHECK:  encoding: [0x0f,0xee,0x1d,0x45,0x00,0x00,0x00]
+        	pmaxsw	0x45,%mm3
+
+// CHECK: pmaxsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xee,0x1d,0xed,0x7e,0x00,0x00]
+        	pmaxsw	0x7eed,%mm3
+
+// CHECK: pmaxsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xee,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmaxsw	0xbabecafe,%mm3
+
+// CHECK: pmaxsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xee,0x1d,0x78,0x56,0x34,0x12]
+        	pmaxsw	0x12345678,%mm3
+
+// CHECK: pmaxsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xee,0xdb]
+        	pmaxsw	%mm3,%mm3
+
+// CHECK: pmaxsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xee,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaxsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmaxsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xee,0x2d,0x45,0x00,0x00,0x00]
+        	pmaxsw	0x45,%xmm5
+
+// CHECK: pmaxsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xee,0x2d,0xed,0x7e,0x00,0x00]
+        	pmaxsw	0x7eed,%xmm5
+
+// CHECK: pmaxsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xee,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmaxsw	0xbabecafe,%xmm5
+
+// CHECK: pmaxsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xee,0x2d,0x78,0x56,0x34,0x12]
+        	pmaxsw	0x12345678,%xmm5
+
+// CHECK: pmaxsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xee,0xed]
+        	pmaxsw	%xmm5,%xmm5
+
+// CHECK: pmaxub	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xde,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaxub	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmaxub	69, %mm3
+// CHECK:  encoding: [0x0f,0xde,0x1d,0x45,0x00,0x00,0x00]
+        	pmaxub	0x45,%mm3
+
+// CHECK: pmaxub	32493, %mm3
+// CHECK:  encoding: [0x0f,0xde,0x1d,0xed,0x7e,0x00,0x00]
+        	pmaxub	0x7eed,%mm3
+
+// CHECK: pmaxub	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xde,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmaxub	0xbabecafe,%mm3
+
+// CHECK: pmaxub	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xde,0x1d,0x78,0x56,0x34,0x12]
+        	pmaxub	0x12345678,%mm3
+
+// CHECK: pmaxub	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xde,0xdb]
+        	pmaxub	%mm3,%mm3
+
+// CHECK: pmaxub	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xde,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaxub	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmaxub	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xde,0x2d,0x45,0x00,0x00,0x00]
+        	pmaxub	0x45,%xmm5
+
+// CHECK: pmaxub	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xde,0x2d,0xed,0x7e,0x00,0x00]
+        	pmaxub	0x7eed,%xmm5
+
+// CHECK: pmaxub	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xde,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmaxub	0xbabecafe,%xmm5
+
+// CHECK: pmaxub	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xde,0x2d,0x78,0x56,0x34,0x12]
+        	pmaxub	0x12345678,%xmm5
+
+// CHECK: pmaxub	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xde,0xed]
+        	pmaxub	%xmm5,%xmm5
+
+// CHECK: pminsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xea,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pminsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pminsw	69, %mm3
+// CHECK:  encoding: [0x0f,0xea,0x1d,0x45,0x00,0x00,0x00]
+        	pminsw	0x45,%mm3
+
+// CHECK: pminsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xea,0x1d,0xed,0x7e,0x00,0x00]
+        	pminsw	0x7eed,%mm3
+
+// CHECK: pminsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xea,0x1d,0xfe,0xca,0xbe,0xba]
+        	pminsw	0xbabecafe,%mm3
+
+// CHECK: pminsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xea,0x1d,0x78,0x56,0x34,0x12]
+        	pminsw	0x12345678,%mm3
+
+// CHECK: pminsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xea,0xdb]
+        	pminsw	%mm3,%mm3
+
+// CHECK: pminsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xea,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pminsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pminsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xea,0x2d,0x45,0x00,0x00,0x00]
+        	pminsw	0x45,%xmm5
+
+// CHECK: pminsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xea,0x2d,0xed,0x7e,0x00,0x00]
+        	pminsw	0x7eed,%xmm5
+
+// CHECK: pminsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xea,0x2d,0xfe,0xca,0xbe,0xba]
+        	pminsw	0xbabecafe,%xmm5
+
+// CHECK: pminsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xea,0x2d,0x78,0x56,0x34,0x12]
+        	pminsw	0x12345678,%xmm5
+
+// CHECK: pminsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xea,0xed]
+        	pminsw	%xmm5,%xmm5
+
+// CHECK: pminub	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xda,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pminub	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pminub	69, %mm3
+// CHECK:  encoding: [0x0f,0xda,0x1d,0x45,0x00,0x00,0x00]
+        	pminub	0x45,%mm3
+
+// CHECK: pminub	32493, %mm3
+// CHECK:  encoding: [0x0f,0xda,0x1d,0xed,0x7e,0x00,0x00]
+        	pminub	0x7eed,%mm3
+
+// CHECK: pminub	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xda,0x1d,0xfe,0xca,0xbe,0xba]
+        	pminub	0xbabecafe,%mm3
+
+// CHECK: pminub	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xda,0x1d,0x78,0x56,0x34,0x12]
+        	pminub	0x12345678,%mm3
+
+// CHECK: pminub	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xda,0xdb]
+        	pminub	%mm3,%mm3
+
+// CHECK: pminub	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xda,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pminub	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pminub	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xda,0x2d,0x45,0x00,0x00,0x00]
+        	pminub	0x45,%xmm5
+
+// CHECK: pminub	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xda,0x2d,0xed,0x7e,0x00,0x00]
+        	pminub	0x7eed,%xmm5
+
+// CHECK: pminub	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xda,0x2d,0xfe,0xca,0xbe,0xba]
+        	pminub	0xbabecafe,%xmm5
+
+// CHECK: pminub	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xda,0x2d,0x78,0x56,0x34,0x12]
+        	pminub	0x12345678,%xmm5
+
+// CHECK: pminub	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xda,0xed]
+        	pminub	%xmm5,%xmm5
+
+// CHECK: pmovmskb	%mm3, %ecx
+// CHECK:  encoding: [0x0f,0xd7,0xcb]
+        	pmovmskb	%mm3,%ecx
+
+// CHECK: pmovmskb	%xmm5, %ecx
+// CHECK:  encoding: [0x66,0x0f,0xd7,0xcd]
+        	pmovmskb	%xmm5,%ecx
+
+// CHECK: pmulhuw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xe4,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmulhuw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmulhuw	69, %mm3
+// CHECK:  encoding: [0x0f,0xe4,0x1d,0x45,0x00,0x00,0x00]
+        	pmulhuw	0x45,%mm3
+
+// CHECK: pmulhuw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xe4,0x1d,0xed,0x7e,0x00,0x00]
+        	pmulhuw	0x7eed,%mm3
+
+// CHECK: pmulhuw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xe4,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmulhuw	0xbabecafe,%mm3
+
+// CHECK: pmulhuw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xe4,0x1d,0x78,0x56,0x34,0x12]
+        	pmulhuw	0x12345678,%mm3
+
+// CHECK: pmulhuw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xe4,0xdb]
+        	pmulhuw	%mm3,%mm3
+
+// CHECK: pmulhuw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe4,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmulhuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmulhuw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe4,0x2d,0x45,0x00,0x00,0x00]
+        	pmulhuw	0x45,%xmm5
+
+// CHECK: pmulhuw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe4,0x2d,0xed,0x7e,0x00,0x00]
+        	pmulhuw	0x7eed,%xmm5
+
+// CHECK: pmulhuw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe4,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmulhuw	0xbabecafe,%xmm5
+
+// CHECK: pmulhuw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe4,0x2d,0x78,0x56,0x34,0x12]
+        	pmulhuw	0x12345678,%xmm5
+
+// CHECK: pmulhuw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xe4,0xed]
+        	pmulhuw	%xmm5,%xmm5
+
+// CHECK: prefetchnta	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x18,0x84,0xcb,0xef,0xbe,0xad,0xde]
+        	prefetchnta	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: prefetchnta	32493
+// CHECK:  encoding: [0x0f,0x18,0x05,0xed,0x7e,0x00,0x00]
+        	prefetchnta	0x7eed
+
+// CHECK: prefetchnta	3133065982
+// CHECK:  encoding: [0x0f,0x18,0x05,0xfe,0xca,0xbe,0xba]
+        	prefetchnta	0xbabecafe
+
+// CHECK: prefetchnta	305419896
+// CHECK:  encoding: [0x0f,0x18,0x05,0x78,0x56,0x34,0x12]
+        	prefetchnta	0x12345678
+
+// CHECK: prefetcht0	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x18,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	prefetcht0	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: prefetcht0	32493
+// CHECK:  encoding: [0x0f,0x18,0x0d,0xed,0x7e,0x00,0x00]
+        	prefetcht0	0x7eed
+
+// CHECK: prefetcht0	3133065982
+// CHECK:  encoding: [0x0f,0x18,0x0d,0xfe,0xca,0xbe,0xba]
+        	prefetcht0	0xbabecafe
+
+// CHECK: prefetcht0	305419896
+// CHECK:  encoding: [0x0f,0x18,0x0d,0x78,0x56,0x34,0x12]
+        	prefetcht0	0x12345678
+
+// CHECK: prefetcht1	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x18,0x94,0xcb,0xef,0xbe,0xad,0xde]
+        	prefetcht1	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: prefetcht1	32493
+// CHECK:  encoding: [0x0f,0x18,0x15,0xed,0x7e,0x00,0x00]
+        	prefetcht1	0x7eed
+
+// CHECK: prefetcht1	3133065982
+// CHECK:  encoding: [0x0f,0x18,0x15,0xfe,0xca,0xbe,0xba]
+        	prefetcht1	0xbabecafe
+
+// CHECK: prefetcht1	305419896
+// CHECK:  encoding: [0x0f,0x18,0x15,0x78,0x56,0x34,0x12]
+        	prefetcht1	0x12345678
+
+// CHECK: prefetcht2	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x18,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	prefetcht2	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: prefetcht2	32493
+// CHECK:  encoding: [0x0f,0x18,0x1d,0xed,0x7e,0x00,0x00]
+        	prefetcht2	0x7eed
+
+// CHECK: prefetcht2	3133065982
+// CHECK:  encoding: [0x0f,0x18,0x1d,0xfe,0xca,0xbe,0xba]
+        	prefetcht2	0xbabecafe
+
+// CHECK: prefetcht2	305419896
+// CHECK:  encoding: [0x0f,0x18,0x1d,0x78,0x56,0x34,0x12]
+        	prefetcht2	0x12345678
+
+// CHECK: psadbw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xf6,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psadbw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psadbw	69, %mm3
+// CHECK:  encoding: [0x0f,0xf6,0x1d,0x45,0x00,0x00,0x00]
+        	psadbw	0x45,%mm3
+
+// CHECK: psadbw	32493, %mm3
+// CHECK:  encoding: [0x0f,0xf6,0x1d,0xed,0x7e,0x00,0x00]
+        	psadbw	0x7eed,%mm3
+
+// CHECK: psadbw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xf6,0x1d,0xfe,0xca,0xbe,0xba]
+        	psadbw	0xbabecafe,%mm3
+
+// CHECK: psadbw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xf6,0x1d,0x78,0x56,0x34,0x12]
+        	psadbw	0x12345678,%mm3
+
+// CHECK: psadbw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf6,0xdb]
+        	psadbw	%mm3,%mm3
+
+// CHECK: psadbw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf6,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psadbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psadbw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf6,0x2d,0x45,0x00,0x00,0x00]
+        	psadbw	0x45,%xmm5
+
+// CHECK: psadbw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf6,0x2d,0xed,0x7e,0x00,0x00]
+        	psadbw	0x7eed,%xmm5
+
+// CHECK: psadbw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf6,0x2d,0xfe,0xca,0xbe,0xba]
+        	psadbw	0xbabecafe,%xmm5
+
+// CHECK: psadbw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf6,0x2d,0x78,0x56,0x34,0x12]
+        	psadbw	0x12345678,%xmm5
+
+// CHECK: psadbw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf6,0xed]
+        	psadbw	%xmm5,%xmm5
+
+// CHECK: rcpps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x53,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	rcpps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: rcpps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x53,0x2d,0x45,0x00,0x00,0x00]
+        	rcpps	0x45,%xmm5
+
+// CHECK: rcpps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x53,0x2d,0xed,0x7e,0x00,0x00]
+        	rcpps	0x7eed,%xmm5
+
+// CHECK: rcpps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x53,0x2d,0xfe,0xca,0xbe,0xba]
+        	rcpps	0xbabecafe,%xmm5
+
+// CHECK: rcpps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x53,0x2d,0x78,0x56,0x34,0x12]
+        	rcpps	0x12345678,%xmm5
+
+// CHECK: rcpps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x53,0xed]
+        	rcpps	%xmm5,%xmm5
+
+// CHECK: rcpss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x53,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	rcpss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: rcpss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x53,0x2d,0x45,0x00,0x00,0x00]
+        	rcpss	0x45,%xmm5
+
+// CHECK: rcpss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x53,0x2d,0xed,0x7e,0x00,0x00]
+        	rcpss	0x7eed,%xmm5
+
+// CHECK: rcpss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x53,0x2d,0xfe,0xca,0xbe,0xba]
+        	rcpss	0xbabecafe,%xmm5
+
+// CHECK: rcpss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x53,0x2d,0x78,0x56,0x34,0x12]
+        	rcpss	0x12345678,%xmm5
+
+// CHECK: rcpss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x53,0xed]
+        	rcpss	%xmm5,%xmm5
+
+// CHECK: rsqrtps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x52,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	rsqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: rsqrtps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x52,0x2d,0x45,0x00,0x00,0x00]
+        	rsqrtps	0x45,%xmm5
+
+// CHECK: rsqrtps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x52,0x2d,0xed,0x7e,0x00,0x00]
+        	rsqrtps	0x7eed,%xmm5
+
+// CHECK: rsqrtps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x52,0x2d,0xfe,0xca,0xbe,0xba]
+        	rsqrtps	0xbabecafe,%xmm5
+
+// CHECK: rsqrtps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x52,0x2d,0x78,0x56,0x34,0x12]
+        	rsqrtps	0x12345678,%xmm5
+
+// CHECK: rsqrtps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x52,0xed]
+        	rsqrtps	%xmm5,%xmm5
+
+// CHECK: rsqrtss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x52,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	rsqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: rsqrtss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x52,0x2d,0x45,0x00,0x00,0x00]
+        	rsqrtss	0x45,%xmm5
+
+// CHECK: rsqrtss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x52,0x2d,0xed,0x7e,0x00,0x00]
+        	rsqrtss	0x7eed,%xmm5
+
+// CHECK: rsqrtss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x52,0x2d,0xfe,0xca,0xbe,0xba]
+        	rsqrtss	0xbabecafe,%xmm5
+
+// CHECK: rsqrtss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x52,0x2d,0x78,0x56,0x34,0x12]
+        	rsqrtss	0x12345678,%xmm5
+
+// CHECK: rsqrtss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x52,0xed]
+        	rsqrtss	%xmm5,%xmm5
+
+// CHECK: sqrtps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x51,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	sqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: sqrtps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x51,0x2d,0x45,0x00,0x00,0x00]
+        	sqrtps	0x45,%xmm5
+
+// CHECK: sqrtps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x51,0x2d,0xed,0x7e,0x00,0x00]
+        	sqrtps	0x7eed,%xmm5
+
+// CHECK: sqrtps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x51,0x2d,0xfe,0xca,0xbe,0xba]
+        	sqrtps	0xbabecafe,%xmm5
+
+// CHECK: sqrtps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x51,0x2d,0x78,0x56,0x34,0x12]
+        	sqrtps	0x12345678,%xmm5
+
+// CHECK: sqrtps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x51,0xed]
+        	sqrtps	%xmm5,%xmm5
+
+// CHECK: sqrtss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x51,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	sqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: sqrtss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x51,0x2d,0x45,0x00,0x00,0x00]
+        	sqrtss	0x45,%xmm5
+
+// CHECK: sqrtss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x51,0x2d,0xed,0x7e,0x00,0x00]
+        	sqrtss	0x7eed,%xmm5
+
+// CHECK: sqrtss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x51,0x2d,0xfe,0xca,0xbe,0xba]
+        	sqrtss	0xbabecafe,%xmm5
+
+// CHECK: sqrtss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x51,0x2d,0x78,0x56,0x34,0x12]
+        	sqrtss	0x12345678,%xmm5
+
+// CHECK: sqrtss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x51,0xed]
+        	sqrtss	%xmm5,%xmm5
+
+// CHECK: stmxcsr	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xae,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	stmxcsr	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: stmxcsr	32493
+// CHECK:  encoding: [0x0f,0xae,0x1d,0xed,0x7e,0x00,0x00]
+        	stmxcsr	0x7eed
+
+// CHECK: stmxcsr	3133065982
+// CHECK:  encoding: [0x0f,0xae,0x1d,0xfe,0xca,0xbe,0xba]
+        	stmxcsr	0xbabecafe
+
+// CHECK: stmxcsr	305419896
+// CHECK:  encoding: [0x0f,0xae,0x1d,0x78,0x56,0x34,0x12]
+        	stmxcsr	0x12345678
+
+// CHECK: subps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	subps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: subps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x5c,0x2d,0x45,0x00,0x00,0x00]
+        	subps	0x45,%xmm5
+
+// CHECK: subps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x5c,0x2d,0xed,0x7e,0x00,0x00]
+        	subps	0x7eed,%xmm5
+
+// CHECK: subps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x5c,0x2d,0xfe,0xca,0xbe,0xba]
+        	subps	0xbabecafe,%xmm5
+
+// CHECK: subps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x5c,0x2d,0x78,0x56,0x34,0x12]
+        	subps	0x12345678,%xmm5
+
+// CHECK: subps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x5c,0xed]
+        	subps	%xmm5,%xmm5
+
+// CHECK: subss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	subss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: subss	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5c,0x2d,0x45,0x00,0x00,0x00]
+        	subss	0x45,%xmm5
+
+// CHECK: subss	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5c,0x2d,0xed,0x7e,0x00,0x00]
+        	subss	0x7eed,%xmm5
+
+// CHECK: subss	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5c,0x2d,0xfe,0xca,0xbe,0xba]
+        	subss	0xbabecafe,%xmm5
+
+// CHECK: subss	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5c,0x2d,0x78,0x56,0x34,0x12]
+        	subss	0x12345678,%xmm5
+
+// CHECK: subss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5c,0xed]
+        	subss	%xmm5,%xmm5
+
+// CHECK: ucomiss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x2e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	ucomiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: ucomiss	69, %xmm5
+// CHECK:  encoding: [0x0f,0x2e,0x2d,0x45,0x00,0x00,0x00]
+        	ucomiss	0x45,%xmm5
+
+// CHECK: ucomiss	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x2e,0x2d,0xed,0x7e,0x00,0x00]
+        	ucomiss	0x7eed,%xmm5
+
+// CHECK: ucomiss	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x2e,0x2d,0xfe,0xca,0xbe,0xba]
+        	ucomiss	0xbabecafe,%xmm5
+
+// CHECK: ucomiss	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x2e,0x2d,0x78,0x56,0x34,0x12]
+        	ucomiss	0x12345678,%xmm5
+
+// CHECK: ucomiss	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x2e,0xed]
+        	ucomiss	%xmm5,%xmm5
+
+// CHECK: unpckhps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x15,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	unpckhps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: unpckhps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x15,0x2d,0x45,0x00,0x00,0x00]
+        	unpckhps	0x45,%xmm5
+
+// CHECK: unpckhps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x15,0x2d,0xed,0x7e,0x00,0x00]
+        	unpckhps	0x7eed,%xmm5
+
+// CHECK: unpckhps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x15,0x2d,0xfe,0xca,0xbe,0xba]
+        	unpckhps	0xbabecafe,%xmm5
+
+// CHECK: unpckhps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x15,0x2d,0x78,0x56,0x34,0x12]
+        	unpckhps	0x12345678,%xmm5
+
+// CHECK: unpckhps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x15,0xed]
+        	unpckhps	%xmm5,%xmm5
+
+// CHECK: unpcklps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x14,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	unpcklps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: unpcklps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x14,0x2d,0x45,0x00,0x00,0x00]
+        	unpcklps	0x45,%xmm5
+
+// CHECK: unpcklps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x14,0x2d,0xed,0x7e,0x00,0x00]
+        	unpcklps	0x7eed,%xmm5
+
+// CHECK: unpcklps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x14,0x2d,0xfe,0xca,0xbe,0xba]
+        	unpcklps	0xbabecafe,%xmm5
+
+// CHECK: unpcklps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x14,0x2d,0x78,0x56,0x34,0x12]
+        	unpcklps	0x12345678,%xmm5
+
+// CHECK: unpcklps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x14,0xed]
+        	unpcklps	%xmm5,%xmm5
+
+// CHECK: xorps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x57,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	xorps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: xorps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x57,0x2d,0x45,0x00,0x00,0x00]
+        	xorps	0x45,%xmm5
+
+// CHECK: xorps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x57,0x2d,0xed,0x7e,0x00,0x00]
+        	xorps	0x7eed,%xmm5
+
+// CHECK: xorps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x57,0x2d,0xfe,0xca,0xbe,0xba]
+        	xorps	0xbabecafe,%xmm5
+
+// CHECK: xorps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x57,0x2d,0x78,0x56,0x34,0x12]
+        	xorps	0x12345678,%xmm5
+
+// CHECK: xorps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x57,0xed]
+        	xorps	%xmm5,%xmm5
+
+// CHECK: addpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	addpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: addpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x58,0x2d,0x45,0x00,0x00,0x00]
+        	addpd	0x45,%xmm5
+
+// CHECK: addpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x58,0x2d,0xed,0x7e,0x00,0x00]
+        	addpd	0x7eed,%xmm5
+
+// CHECK: addpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x58,0x2d,0xfe,0xca,0xbe,0xba]
+        	addpd	0xbabecafe,%xmm5
+
+// CHECK: addpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x58,0x2d,0x78,0x56,0x34,0x12]
+        	addpd	0x12345678,%xmm5
+
+// CHECK: addpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x58,0xed]
+        	addpd	%xmm5,%xmm5
+
+// CHECK: addsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x58,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	addsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: addsd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x58,0x2d,0x45,0x00,0x00,0x00]
+        	addsd	0x45,%xmm5
+
+// CHECK: addsd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x58,0x2d,0xed,0x7e,0x00,0x00]
+        	addsd	0x7eed,%xmm5
+
+// CHECK: addsd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x58,0x2d,0xfe,0xca,0xbe,0xba]
+        	addsd	0xbabecafe,%xmm5
+
+// CHECK: addsd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x58,0x2d,0x78,0x56,0x34,0x12]
+        	addsd	0x12345678,%xmm5
+
+// CHECK: addsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x58,0xed]
+        	addsd	%xmm5,%xmm5
+
+// CHECK: andnpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x55,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	andnpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: andnpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x55,0x2d,0x45,0x00,0x00,0x00]
+        	andnpd	0x45,%xmm5
+
+// CHECK: andnpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x55,0x2d,0xed,0x7e,0x00,0x00]
+        	andnpd	0x7eed,%xmm5
+
+// CHECK: andnpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x55,0x2d,0xfe,0xca,0xbe,0xba]
+        	andnpd	0xbabecafe,%xmm5
+
+// CHECK: andnpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x55,0x2d,0x78,0x56,0x34,0x12]
+        	andnpd	0x12345678,%xmm5
+
+// CHECK: andnpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x55,0xed]
+        	andnpd	%xmm5,%xmm5
+
+// CHECK: andpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x54,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	andpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: andpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x54,0x2d,0x45,0x00,0x00,0x00]
+        	andpd	0x45,%xmm5
+
+// CHECK: andpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x54,0x2d,0xed,0x7e,0x00,0x00]
+        	andpd	0x7eed,%xmm5
+
+// CHECK: andpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x54,0x2d,0xfe,0xca,0xbe,0xba]
+        	andpd	0xbabecafe,%xmm5
+
+// CHECK: andpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x54,0x2d,0x78,0x56,0x34,0x12]
+        	andpd	0x12345678,%xmm5
+
+// CHECK: andpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x54,0xed]
+        	andpd	%xmm5,%xmm5
+
+// CHECK: comisd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	comisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: comisd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2f,0x2d,0x45,0x00,0x00,0x00]
+        	comisd	0x45,%xmm5
+
+// CHECK: comisd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2f,0x2d,0xed,0x7e,0x00,0x00]
+        	comisd	0x7eed,%xmm5
+
+// CHECK: comisd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2f,0x2d,0xfe,0xca,0xbe,0xba]
+        	comisd	0xbabecafe,%xmm5
+
+// CHECK: comisd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2f,0x2d,0x78,0x56,0x34,0x12]
+        	comisd	0x12345678,%xmm5
+
+// CHECK: comisd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2f,0xed]
+        	comisd	%xmm5,%xmm5
+
+// CHECK: cvtpi2pd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtpi2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtpi2pd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2a,0x2d,0x45,0x00,0x00,0x00]
+        	cvtpi2pd	0x45,%xmm5
+
+// CHECK: cvtpi2pd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2a,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtpi2pd	0x7eed,%xmm5
+
+// CHECK: cvtpi2pd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtpi2pd	0xbabecafe,%xmm5
+
+// CHECK: cvtpi2pd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2a,0x2d,0x78,0x56,0x34,0x12]
+        	cvtpi2pd	0x12345678,%xmm5
+
+// CHECK: cvtpi2pd	%mm3, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2a,0xeb]
+        	cvtpi2pd	%mm3,%xmm5
+
+// CHECK: cvtsi2sd	%ecx, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x2a,0xe9]
+        	cvtsi2sd	%ecx,%xmm5
+
+// CHECK: cvtsi2sd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtsi2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtsi2sd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x2a,0x2d,0x45,0x00,0x00,0x00]
+        	cvtsi2sd	0x45,%xmm5
+
+// CHECK: cvtsi2sd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x2a,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtsi2sd	0x7eed,%xmm5
+
+// CHECK: cvtsi2sd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtsi2sd	0xbabecafe,%xmm5
+
+// CHECK: cvtsi2sd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x2a,0x2d,0x78,0x56,0x34,0x12]
+        	cvtsi2sd	0x12345678,%xmm5
+
+// CHECK: divpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	divpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: divpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5e,0x2d,0x45,0x00,0x00,0x00]
+        	divpd	0x45,%xmm5
+
+// CHECK: divpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5e,0x2d,0xed,0x7e,0x00,0x00]
+        	divpd	0x7eed,%xmm5
+
+// CHECK: divpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5e,0x2d,0xfe,0xca,0xbe,0xba]
+        	divpd	0xbabecafe,%xmm5
+
+// CHECK: divpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5e,0x2d,0x78,0x56,0x34,0x12]
+        	divpd	0x12345678,%xmm5
+
+// CHECK: divpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5e,0xed]
+        	divpd	%xmm5,%xmm5
+
+// CHECK: divsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	divsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: divsd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5e,0x2d,0x45,0x00,0x00,0x00]
+        	divsd	0x45,%xmm5
+
+// CHECK: divsd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5e,0x2d,0xed,0x7e,0x00,0x00]
+        	divsd	0x7eed,%xmm5
+
+// CHECK: divsd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5e,0x2d,0xfe,0xca,0xbe,0xba]
+        	divsd	0xbabecafe,%xmm5
+
+// CHECK: divsd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5e,0x2d,0x78,0x56,0x34,0x12]
+        	divsd	0x12345678,%xmm5
+
+// CHECK: divsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5e,0xed]
+        	divsd	%xmm5,%xmm5
+
+// CHECK: maxpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	maxpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: maxpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5f,0x2d,0x45,0x00,0x00,0x00]
+        	maxpd	0x45,%xmm5
+
+// CHECK: maxpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5f,0x2d,0xed,0x7e,0x00,0x00]
+        	maxpd	0x7eed,%xmm5
+
+// CHECK: maxpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5f,0x2d,0xfe,0xca,0xbe,0xba]
+        	maxpd	0xbabecafe,%xmm5
+
+// CHECK: maxpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5f,0x2d,0x78,0x56,0x34,0x12]
+        	maxpd	0x12345678,%xmm5
+
+// CHECK: maxpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5f,0xed]
+        	maxpd	%xmm5,%xmm5
+
+// CHECK: maxsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	maxsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: maxsd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5f,0x2d,0x45,0x00,0x00,0x00]
+        	maxsd	0x45,%xmm5
+
+// CHECK: maxsd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5f,0x2d,0xed,0x7e,0x00,0x00]
+        	maxsd	0x7eed,%xmm5
+
+// CHECK: maxsd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5f,0x2d,0xfe,0xca,0xbe,0xba]
+        	maxsd	0xbabecafe,%xmm5
+
+// CHECK: maxsd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5f,0x2d,0x78,0x56,0x34,0x12]
+        	maxsd	0x12345678,%xmm5
+
+// CHECK: maxsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5f,0xed]
+        	maxsd	%xmm5,%xmm5
+
+// CHECK: minpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	minpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: minpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5d,0x2d,0x45,0x00,0x00,0x00]
+        	minpd	0x45,%xmm5
+
+// CHECK: minpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5d,0x2d,0xed,0x7e,0x00,0x00]
+        	minpd	0x7eed,%xmm5
+
+// CHECK: minpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5d,0x2d,0xfe,0xca,0xbe,0xba]
+        	minpd	0xbabecafe,%xmm5
+
+// CHECK: minpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5d,0x2d,0x78,0x56,0x34,0x12]
+        	minpd	0x12345678,%xmm5
+
+// CHECK: minpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5d,0xed]
+        	minpd	%xmm5,%xmm5
+
+// CHECK: minsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	minsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: minsd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5d,0x2d,0x45,0x00,0x00,0x00]
+        	minsd	0x45,%xmm5
+
+// CHECK: minsd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5d,0x2d,0xed,0x7e,0x00,0x00]
+        	minsd	0x7eed,%xmm5
+
+// CHECK: minsd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5d,0x2d,0xfe,0xca,0xbe,0xba]
+        	minsd	0xbabecafe,%xmm5
+
+// CHECK: minsd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5d,0x2d,0x78,0x56,0x34,0x12]
+        	minsd	0x12345678,%xmm5
+
+// CHECK: minsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5d,0xed]
+        	minsd	%xmm5,%xmm5
+
+// CHECK: movapd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x28,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movapd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movapd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x28,0x2d,0x45,0x00,0x00,0x00]
+        	movapd	0x45,%xmm5
+
+// CHECK: movapd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x28,0x2d,0xed,0x7e,0x00,0x00]
+        	movapd	0x7eed,%xmm5
+
+// CHECK: movapd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x28,0x2d,0xfe,0xca,0xbe,0xba]
+        	movapd	0xbabecafe,%xmm5
+
+// CHECK: movapd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x28,0x2d,0x78,0x56,0x34,0x12]
+        	movapd	0x12345678,%xmm5
+
+// CHECK: movapd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x28,0xed]
+        	movapd	%xmm5,%xmm5
+
+// CHECK: movapd	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0x29,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movapd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movapd	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0x29,0x2d,0x45,0x00,0x00,0x00]
+        	movapd	%xmm5,0x45
+
+// CHECK: movapd	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0x29,0x2d,0xed,0x7e,0x00,0x00]
+        	movapd	%xmm5,0x7eed
+
+// CHECK: movapd	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0x29,0x2d,0xfe,0xca,0xbe,0xba]
+        	movapd	%xmm5,0xbabecafe
+
+// CHECK: movapd	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0x29,0x2d,0x78,0x56,0x34,0x12]
+        	movapd	%xmm5,0x12345678
+
+// CHECK: movapd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x28,0xed]
+        	movapd	%xmm5,%xmm5
+
+// CHECK: movhpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x16,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movhpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movhpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x16,0x2d,0x45,0x00,0x00,0x00]
+        	movhpd	0x45,%xmm5
+
+// CHECK: movhpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x16,0x2d,0xed,0x7e,0x00,0x00]
+        	movhpd	0x7eed,%xmm5
+
+// CHECK: movhpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x16,0x2d,0xfe,0xca,0xbe,0xba]
+        	movhpd	0xbabecafe,%xmm5
+
+// CHECK: movhpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x16,0x2d,0x78,0x56,0x34,0x12]
+        	movhpd	0x12345678,%xmm5
+
+// CHECK: movhpd	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0x17,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movhpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movhpd	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0x17,0x2d,0x45,0x00,0x00,0x00]
+        	movhpd	%xmm5,0x45
+
+// CHECK: movhpd	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0x17,0x2d,0xed,0x7e,0x00,0x00]
+        	movhpd	%xmm5,0x7eed
+
+// CHECK: movhpd	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0x17,0x2d,0xfe,0xca,0xbe,0xba]
+        	movhpd	%xmm5,0xbabecafe
+
+// CHECK: movhpd	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0x17,0x2d,0x78,0x56,0x34,0x12]
+        	movhpd	%xmm5,0x12345678
+
+// CHECK: movlpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x12,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movlpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movlpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x12,0x2d,0x45,0x00,0x00,0x00]
+        	movlpd	0x45,%xmm5
+
+// CHECK: movlpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x12,0x2d,0xed,0x7e,0x00,0x00]
+        	movlpd	0x7eed,%xmm5
+
+// CHECK: movlpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x12,0x2d,0xfe,0xca,0xbe,0xba]
+        	movlpd	0xbabecafe,%xmm5
+
+// CHECK: movlpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x12,0x2d,0x78,0x56,0x34,0x12]
+        	movlpd	0x12345678,%xmm5
+
+// CHECK: movlpd	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0x13,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movlpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movlpd	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0x13,0x2d,0x45,0x00,0x00,0x00]
+        	movlpd	%xmm5,0x45
+
+// CHECK: movlpd	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0x13,0x2d,0xed,0x7e,0x00,0x00]
+        	movlpd	%xmm5,0x7eed
+
+// CHECK: movlpd	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0x13,0x2d,0xfe,0xca,0xbe,0xba]
+        	movlpd	%xmm5,0xbabecafe
+
+// CHECK: movlpd	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0x13,0x2d,0x78,0x56,0x34,0x12]
+        	movlpd	%xmm5,0x12345678
+
+// CHECK: movmskpd	%xmm5, %ecx
+// CHECK:  encoding: [0x66,0x0f,0x50,0xcd]
+        	movmskpd	%xmm5,%ecx
+
+// CHECK: movntpd	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0x2b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movntpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movntpd	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0x2b,0x2d,0x45,0x00,0x00,0x00]
+        	movntpd	%xmm5,0x45
+
+// CHECK: movntpd	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0x2b,0x2d,0xed,0x7e,0x00,0x00]
+        	movntpd	%xmm5,0x7eed
+
+// CHECK: movntpd	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0x2b,0x2d,0xfe,0xca,0xbe,0xba]
+        	movntpd	%xmm5,0xbabecafe
+
+// CHECK: movntpd	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0x2b,0x2d,0x78,0x56,0x34,0x12]
+        	movntpd	%xmm5,0x12345678
+
+// CHECK: movsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x10,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movsd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x10,0x2d,0x45,0x00,0x00,0x00]
+        	movsd	0x45,%xmm5
+
+// CHECK: movsd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x10,0x2d,0xed,0x7e,0x00,0x00]
+        	movsd	0x7eed,%xmm5
+
+// CHECK: movsd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x10,0x2d,0xfe,0xca,0xbe,0xba]
+        	movsd	0xbabecafe,%xmm5
+
+// CHECK: movsd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x10,0x2d,0x78,0x56,0x34,0x12]
+        	movsd	0x12345678,%xmm5
+
+// CHECK: movsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x10,0xed]
+        	movsd	%xmm5,%xmm5
+
+// CHECK: movsd	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf2,0x0f,0x11,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movsd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movsd	%xmm5, 69
+// CHECK:  encoding: [0xf2,0x0f,0x11,0x2d,0x45,0x00,0x00,0x00]
+        	movsd	%xmm5,0x45
+
+// CHECK: movsd	%xmm5, 32493
+// CHECK:  encoding: [0xf2,0x0f,0x11,0x2d,0xed,0x7e,0x00,0x00]
+        	movsd	%xmm5,0x7eed
+
+// CHECK: movsd	%xmm5, 3133065982
+// CHECK:  encoding: [0xf2,0x0f,0x11,0x2d,0xfe,0xca,0xbe,0xba]
+        	movsd	%xmm5,0xbabecafe
+
+// CHECK: movsd	%xmm5, 305419896
+// CHECK:  encoding: [0xf2,0x0f,0x11,0x2d,0x78,0x56,0x34,0x12]
+        	movsd	%xmm5,0x12345678
+
+// CHECK: movsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x10,0xed]
+        	movsd	%xmm5,%xmm5
+
+// CHECK: movupd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x10,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movupd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movupd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x10,0x2d,0x45,0x00,0x00,0x00]
+        	movupd	0x45,%xmm5
+
+// CHECK: movupd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x10,0x2d,0xed,0x7e,0x00,0x00]
+        	movupd	0x7eed,%xmm5
+
+// CHECK: movupd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x10,0x2d,0xfe,0xca,0xbe,0xba]
+        	movupd	0xbabecafe,%xmm5
+
+// CHECK: movupd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x10,0x2d,0x78,0x56,0x34,0x12]
+        	movupd	0x12345678,%xmm5
+
+// CHECK: movupd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x10,0xed]
+        	movupd	%xmm5,%xmm5
+
+// CHECK: movupd	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0x11,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movupd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movupd	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0x11,0x2d,0x45,0x00,0x00,0x00]
+        	movupd	%xmm5,0x45
+
+// CHECK: movupd	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0x11,0x2d,0xed,0x7e,0x00,0x00]
+        	movupd	%xmm5,0x7eed
+
+// CHECK: movupd	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0x11,0x2d,0xfe,0xca,0xbe,0xba]
+        	movupd	%xmm5,0xbabecafe
+
+// CHECK: movupd	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0x11,0x2d,0x78,0x56,0x34,0x12]
+        	movupd	%xmm5,0x12345678
+
+// CHECK: movupd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x10,0xed]
+        	movupd	%xmm5,%xmm5
+
+// CHECK: mulpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	mulpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: mulpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x59,0x2d,0x45,0x00,0x00,0x00]
+        	mulpd	0x45,%xmm5
+
+// CHECK: mulpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x59,0x2d,0xed,0x7e,0x00,0x00]
+        	mulpd	0x7eed,%xmm5
+
+// CHECK: mulpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x59,0x2d,0xfe,0xca,0xbe,0xba]
+        	mulpd	0xbabecafe,%xmm5
+
+// CHECK: mulpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x59,0x2d,0x78,0x56,0x34,0x12]
+        	mulpd	0x12345678,%xmm5
+
+// CHECK: mulpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x59,0xed]
+        	mulpd	%xmm5,%xmm5
+
+// CHECK: mulsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x59,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	mulsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: mulsd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x59,0x2d,0x45,0x00,0x00,0x00]
+        	mulsd	0x45,%xmm5
+
+// CHECK: mulsd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x59,0x2d,0xed,0x7e,0x00,0x00]
+        	mulsd	0x7eed,%xmm5
+
+// CHECK: mulsd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x59,0x2d,0xfe,0xca,0xbe,0xba]
+        	mulsd	0xbabecafe,%xmm5
+
+// CHECK: mulsd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x59,0x2d,0x78,0x56,0x34,0x12]
+        	mulsd	0x12345678,%xmm5
+
+// CHECK: mulsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x59,0xed]
+        	mulsd	%xmm5,%xmm5
+
+// CHECK: orpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x56,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	orpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: orpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x56,0x2d,0x45,0x00,0x00,0x00]
+        	orpd	0x45,%xmm5
+
+// CHECK: orpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x56,0x2d,0xed,0x7e,0x00,0x00]
+        	orpd	0x7eed,%xmm5
+
+// CHECK: orpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x56,0x2d,0xfe,0xca,0xbe,0xba]
+        	orpd	0xbabecafe,%xmm5
+
+// CHECK: orpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x56,0x2d,0x78,0x56,0x34,0x12]
+        	orpd	0x12345678,%xmm5
+
+// CHECK: orpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x56,0xed]
+        	orpd	%xmm5,%xmm5
+
+// CHECK: sqrtpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x51,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	sqrtpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: sqrtpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x51,0x2d,0x45,0x00,0x00,0x00]
+        	sqrtpd	0x45,%xmm5
+
+// CHECK: sqrtpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x51,0x2d,0xed,0x7e,0x00,0x00]
+        	sqrtpd	0x7eed,%xmm5
+
+// CHECK: sqrtpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x51,0x2d,0xfe,0xca,0xbe,0xba]
+        	sqrtpd	0xbabecafe,%xmm5
+
+// CHECK: sqrtpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x51,0x2d,0x78,0x56,0x34,0x12]
+        	sqrtpd	0x12345678,%xmm5
+
+// CHECK: sqrtpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x51,0xed]
+        	sqrtpd	%xmm5,%xmm5
+
+// CHECK: sqrtsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x51,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	sqrtsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: sqrtsd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x51,0x2d,0x45,0x00,0x00,0x00]
+        	sqrtsd	0x45,%xmm5
+
+// CHECK: sqrtsd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x51,0x2d,0xed,0x7e,0x00,0x00]
+        	sqrtsd	0x7eed,%xmm5
+
+// CHECK: sqrtsd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x51,0x2d,0xfe,0xca,0xbe,0xba]
+        	sqrtsd	0xbabecafe,%xmm5
+
+// CHECK: sqrtsd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x51,0x2d,0x78,0x56,0x34,0x12]
+        	sqrtsd	0x12345678,%xmm5
+
+// CHECK: sqrtsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x51,0xed]
+        	sqrtsd	%xmm5,%xmm5
+
+// CHECK: subpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	subpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: subpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5c,0x2d,0x45,0x00,0x00,0x00]
+        	subpd	0x45,%xmm5
+
+// CHECK: subpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5c,0x2d,0xed,0x7e,0x00,0x00]
+        	subpd	0x7eed,%xmm5
+
+// CHECK: subpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5c,0x2d,0xfe,0xca,0xbe,0xba]
+        	subpd	0xbabecafe,%xmm5
+
+// CHECK: subpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5c,0x2d,0x78,0x56,0x34,0x12]
+        	subpd	0x12345678,%xmm5
+
+// CHECK: subpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5c,0xed]
+        	subpd	%xmm5,%xmm5
+
+// CHECK: subsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	subsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: subsd	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5c,0x2d,0x45,0x00,0x00,0x00]
+        	subsd	0x45,%xmm5
+
+// CHECK: subsd	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5c,0x2d,0xed,0x7e,0x00,0x00]
+        	subsd	0x7eed,%xmm5
+
+// CHECK: subsd	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5c,0x2d,0xfe,0xca,0xbe,0xba]
+        	subsd	0xbabecafe,%xmm5
+
+// CHECK: subsd	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5c,0x2d,0x78,0x56,0x34,0x12]
+        	subsd	0x12345678,%xmm5
+
+// CHECK: subsd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5c,0xed]
+        	subsd	%xmm5,%xmm5
+
+// CHECK: ucomisd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	ucomisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: ucomisd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2e,0x2d,0x45,0x00,0x00,0x00]
+        	ucomisd	0x45,%xmm5
+
+// CHECK: ucomisd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2e,0x2d,0xed,0x7e,0x00,0x00]
+        	ucomisd	0x7eed,%xmm5
+
+// CHECK: ucomisd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2e,0x2d,0xfe,0xca,0xbe,0xba]
+        	ucomisd	0xbabecafe,%xmm5
+
+// CHECK: ucomisd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2e,0x2d,0x78,0x56,0x34,0x12]
+        	ucomisd	0x12345678,%xmm5
+
+// CHECK: ucomisd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x2e,0xed]
+        	ucomisd	%xmm5,%xmm5
+
+// CHECK: unpckhpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x15,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	unpckhpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: unpckhpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x15,0x2d,0x45,0x00,0x00,0x00]
+        	unpckhpd	0x45,%xmm5
+
+// CHECK: unpckhpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x15,0x2d,0xed,0x7e,0x00,0x00]
+        	unpckhpd	0x7eed,%xmm5
+
+// CHECK: unpckhpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x15,0x2d,0xfe,0xca,0xbe,0xba]
+        	unpckhpd	0xbabecafe,%xmm5
+
+// CHECK: unpckhpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x15,0x2d,0x78,0x56,0x34,0x12]
+        	unpckhpd	0x12345678,%xmm5
+
+// CHECK: unpckhpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x15,0xed]
+        	unpckhpd	%xmm5,%xmm5
+
+// CHECK: unpcklpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x14,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	unpcklpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: unpcklpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x14,0x2d,0x45,0x00,0x00,0x00]
+        	unpcklpd	0x45,%xmm5
+
+// CHECK: unpcklpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x14,0x2d,0xed,0x7e,0x00,0x00]
+        	unpcklpd	0x7eed,%xmm5
+
+// CHECK: unpcklpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x14,0x2d,0xfe,0xca,0xbe,0xba]
+        	unpcklpd	0xbabecafe,%xmm5
+
+// CHECK: unpcklpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x14,0x2d,0x78,0x56,0x34,0x12]
+        	unpcklpd	0x12345678,%xmm5
+
+// CHECK: unpcklpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x14,0xed]
+        	unpcklpd	%xmm5,%xmm5
+
+// CHECK: xorpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x57,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	xorpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: xorpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x57,0x2d,0x45,0x00,0x00,0x00]
+        	xorpd	0x45,%xmm5
+
+// CHECK: xorpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x57,0x2d,0xed,0x7e,0x00,0x00]
+        	xorpd	0x7eed,%xmm5
+
+// CHECK: xorpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x57,0x2d,0xfe,0xca,0xbe,0xba]
+        	xorpd	0xbabecafe,%xmm5
+
+// CHECK: xorpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x57,0x2d,0x78,0x56,0x34,0x12]
+        	xorpd	0x12345678,%xmm5
+
+// CHECK: xorpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x57,0xed]
+        	xorpd	%xmm5,%xmm5
+
+// CHECK: cvtdq2pd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0xe6,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtdq2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtdq2pd	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0xe6,0x2d,0x45,0x00,0x00,0x00]
+        	cvtdq2pd	0x45,%xmm5
+
+// CHECK: cvtdq2pd	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0xe6,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtdq2pd	0x7eed,%xmm5
+
+// CHECK: cvtdq2pd	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0xe6,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtdq2pd	0xbabecafe,%xmm5
+
+// CHECK: cvtdq2pd	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0xe6,0x2d,0x78,0x56,0x34,0x12]
+        	cvtdq2pd	0x12345678,%xmm5
+
+// CHECK: cvtdq2pd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0xe6,0xed]
+        	cvtdq2pd	%xmm5,%xmm5
+
+// CHECK: cvtpd2dq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xe6,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtpd2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtpd2dq	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xe6,0x2d,0x45,0x00,0x00,0x00]
+        	cvtpd2dq	0x45,%xmm5
+
+// CHECK: cvtpd2dq	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xe6,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtpd2dq	0x7eed,%xmm5
+
+// CHECK: cvtpd2dq	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xe6,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtpd2dq	0xbabecafe,%xmm5
+
+// CHECK: cvtpd2dq	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xe6,0x2d,0x78,0x56,0x34,0x12]
+        	cvtpd2dq	0x12345678,%xmm5
+
+// CHECK: cvtpd2dq	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xe6,0xed]
+        	cvtpd2dq	%xmm5,%xmm5
+
+// CHECK: cvtdq2ps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x5b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtdq2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtdq2ps	69, %xmm5
+// CHECK:  encoding: [0x0f,0x5b,0x2d,0x45,0x00,0x00,0x00]
+        	cvtdq2ps	0x45,%xmm5
+
+// CHECK: cvtdq2ps	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x5b,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtdq2ps	0x7eed,%xmm5
+
+// CHECK: cvtdq2ps	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x5b,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtdq2ps	0xbabecafe,%xmm5
+
+// CHECK: cvtdq2ps	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x5b,0x2d,0x78,0x56,0x34,0x12]
+        	cvtdq2ps	0x12345678,%xmm5
+
+// CHECK: cvtdq2ps	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x5b,0xed]
+        	cvtdq2ps	%xmm5,%xmm5
+
+// CHECK: cvtpd2pi	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2d,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: cvtpd2pi	69, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2d,0x1d,0x45,0x00,0x00,0x00]
+        	cvtpd2pi	0x45,%mm3
+
+// CHECK: cvtpd2pi	32493, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2d,0x1d,0xed,0x7e,0x00,0x00]
+        	cvtpd2pi	0x7eed,%mm3
+
+// CHECK: cvtpd2pi	3133065982, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2d,0x1d,0xfe,0xca,0xbe,0xba]
+        	cvtpd2pi	0xbabecafe,%mm3
+
+// CHECK: cvtpd2pi	305419896, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2d,0x1d,0x78,0x56,0x34,0x12]
+        	cvtpd2pi	0x12345678,%mm3
+
+// CHECK: cvtpd2pi	%xmm5, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2d,0xdd]
+        	cvtpd2pi	%xmm5,%mm3
+
+// CHECK: cvtpd2ps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtpd2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtpd2ps	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5a,0x2d,0x45,0x00,0x00,0x00]
+        	cvtpd2ps	0x45,%xmm5
+
+// CHECK: cvtpd2ps	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5a,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtpd2ps	0x7eed,%xmm5
+
+// CHECK: cvtpd2ps	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5a,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtpd2ps	0xbabecafe,%xmm5
+
+// CHECK: cvtpd2ps	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5a,0x2d,0x78,0x56,0x34,0x12]
+        	cvtpd2ps	0x12345678,%xmm5
+
+// CHECK: cvtpd2ps	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5a,0xed]
+        	cvtpd2ps	%xmm5,%xmm5
+
+// CHECK: cvtps2pd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x0f,0x5a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtps2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtps2pd	69, %xmm5
+// CHECK:  encoding: [0x0f,0x5a,0x2d,0x45,0x00,0x00,0x00]
+        	cvtps2pd	0x45,%xmm5
+
+// CHECK: cvtps2pd	32493, %xmm5
+// CHECK:  encoding: [0x0f,0x5a,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtps2pd	0x7eed,%xmm5
+
+// CHECK: cvtps2pd	3133065982, %xmm5
+// CHECK:  encoding: [0x0f,0x5a,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtps2pd	0xbabecafe,%xmm5
+
+// CHECK: cvtps2pd	305419896, %xmm5
+// CHECK:  encoding: [0x0f,0x5a,0x2d,0x78,0x56,0x34,0x12]
+        	cvtps2pd	0x12345678,%xmm5
+
+// CHECK: cvtps2pd	%xmm5, %xmm5
+// CHECK:  encoding: [0x0f,0x5a,0xed]
+        	cvtps2pd	%xmm5,%xmm5
+
+// CHECK: cvtps2dq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtps2dq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5b,0x2d,0x45,0x00,0x00,0x00]
+        	cvtps2dq	0x45,%xmm5
+
+// CHECK: cvtps2dq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5b,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtps2dq	0x7eed,%xmm5
+
+// CHECK: cvtps2dq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5b,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtps2dq	0xbabecafe,%xmm5
+
+// CHECK: cvtps2dq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5b,0x2d,0x78,0x56,0x34,0x12]
+        	cvtps2dq	0x12345678,%xmm5
+
+// CHECK: cvtps2dq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x5b,0xed]
+        	cvtps2dq	%xmm5,%xmm5
+
+// CHECK: cvtsd2ss	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtsd2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtsd2ss	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5a,0x2d,0x45,0x00,0x00,0x00]
+        	cvtsd2ss	0x45,%xmm5
+
+// CHECK: cvtsd2ss	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5a,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtsd2ss	0x7eed,%xmm5
+
+// CHECK: cvtsd2ss	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5a,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtsd2ss	0xbabecafe,%xmm5
+
+// CHECK: cvtsd2ss	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5a,0x2d,0x78,0x56,0x34,0x12]
+        	cvtsd2ss	0x12345678,%xmm5
+
+// CHECK: cvtsd2ss	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x5a,0xed]
+        	cvtsd2ss	%xmm5,%xmm5
+
+// CHECK: cvtss2sd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvtss2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvtss2sd	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5a,0x2d,0x45,0x00,0x00,0x00]
+        	cvtss2sd	0x45,%xmm5
+
+// CHECK: cvtss2sd	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5a,0x2d,0xed,0x7e,0x00,0x00]
+        	cvtss2sd	0x7eed,%xmm5
+
+// CHECK: cvtss2sd	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5a,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvtss2sd	0xbabecafe,%xmm5
+
+// CHECK: cvtss2sd	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5a,0x2d,0x78,0x56,0x34,0x12]
+        	cvtss2sd	0x12345678,%xmm5
+
+// CHECK: cvtss2sd	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5a,0xed]
+        	cvtss2sd	%xmm5,%xmm5
+
+// CHECK: cvttpd2pi	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2c,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	cvttpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: cvttpd2pi	69, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2c,0x1d,0x45,0x00,0x00,0x00]
+        	cvttpd2pi	0x45,%mm3
+
+// CHECK: cvttpd2pi	32493, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2c,0x1d,0xed,0x7e,0x00,0x00]
+        	cvttpd2pi	0x7eed,%mm3
+
+// CHECK: cvttpd2pi	3133065982, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2c,0x1d,0xfe,0xca,0xbe,0xba]
+        	cvttpd2pi	0xbabecafe,%mm3
+
+// CHECK: cvttpd2pi	305419896, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2c,0x1d,0x78,0x56,0x34,0x12]
+        	cvttpd2pi	0x12345678,%mm3
+
+// CHECK: cvttpd2pi	%xmm5, %mm3
+// CHECK:  encoding: [0x66,0x0f,0x2c,0xdd]
+        	cvttpd2pi	%xmm5,%mm3
+
+// CHECK: cvttsd2si	3735928559(%ebx,%ecx,8), %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x2c,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	cvttsd2si	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: cvttsd2si	69, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x2c,0x0d,0x45,0x00,0x00,0x00]
+        	cvttsd2si	0x45,%ecx
+
+// CHECK: cvttsd2si	32493, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x2c,0x0d,0xed,0x7e,0x00,0x00]
+        	cvttsd2si	0x7eed,%ecx
+
+// CHECK: cvttsd2si	3133065982, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x2c,0x0d,0xfe,0xca,0xbe,0xba]
+        	cvttsd2si	0xbabecafe,%ecx
+
+// CHECK: cvttsd2si	305419896, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x2c,0x0d,0x78,0x56,0x34,0x12]
+        	cvttsd2si	0x12345678,%ecx
+
+// CHECK: cvttsd2si	%xmm5, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x2c,0xcd]
+        	cvttsd2si	%xmm5,%ecx
+
+// CHECK: cvttps2dq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	cvttps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: cvttps2dq	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5b,0x2d,0x45,0x00,0x00,0x00]
+        	cvttps2dq	0x45,%xmm5
+
+// CHECK: cvttps2dq	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5b,0x2d,0xed,0x7e,0x00,0x00]
+        	cvttps2dq	0x7eed,%xmm5
+
+// CHECK: cvttps2dq	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5b,0x2d,0xfe,0xca,0xbe,0xba]
+        	cvttps2dq	0xbabecafe,%xmm5
+
+// CHECK: cvttps2dq	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5b,0x2d,0x78,0x56,0x34,0x12]
+        	cvttps2dq	0x12345678,%xmm5
+
+// CHECK: cvttps2dq	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x5b,0xed]
+        	cvttps2dq	%xmm5,%xmm5
+
+// CHECK: maskmovdqu	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf7,0xed]
+        	maskmovdqu	%xmm5,%xmm5
+
+// CHECK: movdqa	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movdqa	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6f,0x2d,0x45,0x00,0x00,0x00]
+        	movdqa	0x45,%xmm5
+
+// CHECK: movdqa	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6f,0x2d,0xed,0x7e,0x00,0x00]
+        	movdqa	0x7eed,%xmm5
+
+// CHECK: movdqa	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6f,0x2d,0xfe,0xca,0xbe,0xba]
+        	movdqa	0xbabecafe,%xmm5
+
+// CHECK: movdqa	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6f,0x2d,0x78,0x56,0x34,0x12]
+        	movdqa	0x12345678,%xmm5
+
+// CHECK: movdqa	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6f,0xed]
+        	movdqa	%xmm5,%xmm5
+
+// CHECK: movdqa	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0x7f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movdqa	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movdqa	%xmm5, 69
+// CHECK:  encoding: [0x66,0x0f,0x7f,0x2d,0x45,0x00,0x00,0x00]
+        	movdqa	%xmm5,0x45
+
+// CHECK: movdqa	%xmm5, 32493
+// CHECK:  encoding: [0x66,0x0f,0x7f,0x2d,0xed,0x7e,0x00,0x00]
+        	movdqa	%xmm5,0x7eed
+
+// CHECK: movdqa	%xmm5, 3133065982
+// CHECK:  encoding: [0x66,0x0f,0x7f,0x2d,0xfe,0xca,0xbe,0xba]
+        	movdqa	%xmm5,0xbabecafe
+
+// CHECK: movdqa	%xmm5, 305419896
+// CHECK:  encoding: [0x66,0x0f,0x7f,0x2d,0x78,0x56,0x34,0x12]
+        	movdqa	%xmm5,0x12345678
+
+// CHECK: movdqa	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6f,0xed]
+        	movdqa	%xmm5,%xmm5
+
+// CHECK: movdqu	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x6f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movdqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movdqu	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x6f,0x2d,0x45,0x00,0x00,0x00]
+        	movdqu	0x45,%xmm5
+
+// CHECK: movdqu	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x6f,0x2d,0xed,0x7e,0x00,0x00]
+        	movdqu	0x7eed,%xmm5
+
+// CHECK: movdqu	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x6f,0x2d,0xfe,0xca,0xbe,0xba]
+        	movdqu	0xbabecafe,%xmm5
+
+// CHECK: movdqu	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x6f,0x2d,0x78,0x56,0x34,0x12]
+        	movdqu	0x12345678,%xmm5
+
+// CHECK: movdqu	%xmm5, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xf3,0x0f,0x7f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movdqu	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: movdqu	%xmm5, 69
+// CHECK:  encoding: [0xf3,0x0f,0x7f,0x2d,0x45,0x00,0x00,0x00]
+        	movdqu	%xmm5,0x45
+
+// CHECK: movdqu	%xmm5, 32493
+// CHECK:  encoding: [0xf3,0x0f,0x7f,0x2d,0xed,0x7e,0x00,0x00]
+        	movdqu	%xmm5,0x7eed
+
+// CHECK: movdqu	%xmm5, 3133065982
+// CHECK:  encoding: [0xf3,0x0f,0x7f,0x2d,0xfe,0xca,0xbe,0xba]
+        	movdqu	%xmm5,0xbabecafe
+
+// CHECK: movdqu	%xmm5, 305419896
+// CHECK:  encoding: [0xf3,0x0f,0x7f,0x2d,0x78,0x56,0x34,0x12]
+        	movdqu	%xmm5,0x12345678
+
+// CHECK: movdq2q	%xmm5, %mm3
+// CHECK:  encoding: [0xf2,0x0f,0xd6,0xdd]
+        	movdq2q	%xmm5,%mm3
+
+// CHECK: movq2dq	%mm3, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0xd6,0xeb]
+        	movq2dq	%mm3,%xmm5
+
+// CHECK: pmuludq	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0xf4,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmuludq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmuludq	69, %mm3
+// CHECK:  encoding: [0x0f,0xf4,0x1d,0x45,0x00,0x00,0x00]
+        	pmuludq	0x45,%mm3
+
+// CHECK: pmuludq	32493, %mm3
+// CHECK:  encoding: [0x0f,0xf4,0x1d,0xed,0x7e,0x00,0x00]
+        	pmuludq	0x7eed,%mm3
+
+// CHECK: pmuludq	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0xf4,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmuludq	0xbabecafe,%mm3
+
+// CHECK: pmuludq	305419896, %mm3
+// CHECK:  encoding: [0x0f,0xf4,0x1d,0x78,0x56,0x34,0x12]
+        	pmuludq	0x12345678,%mm3
+
+// CHECK: pmuludq	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0xf4,0xdb]
+        	pmuludq	%mm3,%mm3
+
+// CHECK: pmuludq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf4,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmuludq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmuludq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf4,0x2d,0x45,0x00,0x00,0x00]
+        	pmuludq	0x45,%xmm5
+
+// CHECK: pmuludq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf4,0x2d,0xed,0x7e,0x00,0x00]
+        	pmuludq	0x7eed,%xmm5
+
+// CHECK: pmuludq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf4,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmuludq	0xbabecafe,%xmm5
+
+// CHECK: pmuludq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf4,0x2d,0x78,0x56,0x34,0x12]
+        	pmuludq	0x12345678,%xmm5
+
+// CHECK: pmuludq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xf4,0xed]
+        	pmuludq	%xmm5,%xmm5
+
+// CHECK: pslldq	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x73,0xfd,0x7f]
+        	pslldq	$0x7f,%xmm5
+
+// CHECK: psrldq	$127, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x73,0xdd,0x7f]
+        	psrldq	$0x7f,%xmm5
+
+// CHECK: punpckhqdq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	punpckhqdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: punpckhqdq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6d,0x2d,0x45,0x00,0x00,0x00]
+        	punpckhqdq	0x45,%xmm5
+
+// CHECK: punpckhqdq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6d,0x2d,0xed,0x7e,0x00,0x00]
+        	punpckhqdq	0x7eed,%xmm5
+
+// CHECK: punpckhqdq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6d,0x2d,0xfe,0xca,0xbe,0xba]
+        	punpckhqdq	0xbabecafe,%xmm5
+
+// CHECK: punpckhqdq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6d,0x2d,0x78,0x56,0x34,0x12]
+        	punpckhqdq	0x12345678,%xmm5
+
+// CHECK: punpckhqdq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6d,0xed]
+        	punpckhqdq	%xmm5,%xmm5
+
+// CHECK: punpcklqdq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	punpcklqdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: punpcklqdq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6c,0x2d,0x45,0x00,0x00,0x00]
+        	punpcklqdq	0x45,%xmm5
+
+// CHECK: punpcklqdq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6c,0x2d,0xed,0x7e,0x00,0x00]
+        	punpcklqdq	0x7eed,%xmm5
+
+// CHECK: punpcklqdq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6c,0x2d,0xfe,0xca,0xbe,0xba]
+        	punpcklqdq	0xbabecafe,%xmm5
+
+// CHECK: punpcklqdq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6c,0x2d,0x78,0x56,0x34,0x12]
+        	punpcklqdq	0x12345678,%xmm5
+
+// CHECK: punpcklqdq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6c,0xed]
+        	punpcklqdq	%xmm5,%xmm5
+
+// CHECK: addsubpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd0,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	addsubpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: addsubpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd0,0x2d,0x45,0x00,0x00,0x00]
+        	addsubpd	0x45,%xmm5
+
+// CHECK: addsubpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd0,0x2d,0xed,0x7e,0x00,0x00]
+        	addsubpd	0x7eed,%xmm5
+
+// CHECK: addsubpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd0,0x2d,0xfe,0xca,0xbe,0xba]
+        	addsubpd	0xbabecafe,%xmm5
+
+// CHECK: addsubpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd0,0x2d,0x78,0x56,0x34,0x12]
+        	addsubpd	0x12345678,%xmm5
+
+// CHECK: addsubpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0xd0,0xed]
+        	addsubpd	%xmm5,%xmm5
+
+// CHECK: addsubps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xd0,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	addsubps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: addsubps	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xd0,0x2d,0x45,0x00,0x00,0x00]
+        	addsubps	0x45,%xmm5
+
+// CHECK: addsubps	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xd0,0x2d,0xed,0x7e,0x00,0x00]
+        	addsubps	0x7eed,%xmm5
+
+// CHECK: addsubps	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xd0,0x2d,0xfe,0xca,0xbe,0xba]
+        	addsubps	0xbabecafe,%xmm5
+
+// CHECK: addsubps	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xd0,0x2d,0x78,0x56,0x34,0x12]
+        	addsubps	0x12345678,%xmm5
+
+// CHECK: addsubps	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xd0,0xed]
+        	addsubps	%xmm5,%xmm5
+
+// CHECK: fisttpl	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdb,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	fisttpl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: fisttpl	3133065982
+// CHECK:  encoding: [0xdb,0x0d,0xfe,0xca,0xbe,0xba]
+        	fisttpl	0xbabecafe
+
+// CHECK: fisttpl	305419896
+// CHECK:  encoding: [0xdb,0x0d,0x78,0x56,0x34,0x12]
+        	fisttpl	0x12345678
+
+// CHECK: haddpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	haddpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: haddpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7c,0x2d,0x45,0x00,0x00,0x00]
+        	haddpd	0x45,%xmm5
+
+// CHECK: haddpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7c,0x2d,0xed,0x7e,0x00,0x00]
+        	haddpd	0x7eed,%xmm5
+
+// CHECK: haddpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7c,0x2d,0xfe,0xca,0xbe,0xba]
+        	haddpd	0xbabecafe,%xmm5
+
+// CHECK: haddpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7c,0x2d,0x78,0x56,0x34,0x12]
+        	haddpd	0x12345678,%xmm5
+
+// CHECK: haddpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7c,0xed]
+        	haddpd	%xmm5,%xmm5
+
+// CHECK: haddps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	haddps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: haddps	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7c,0x2d,0x45,0x00,0x00,0x00]
+        	haddps	0x45,%xmm5
+
+// CHECK: haddps	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7c,0x2d,0xed,0x7e,0x00,0x00]
+        	haddps	0x7eed,%xmm5
+
+// CHECK: haddps	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7c,0x2d,0xfe,0xca,0xbe,0xba]
+        	haddps	0xbabecafe,%xmm5
+
+// CHECK: haddps	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7c,0x2d,0x78,0x56,0x34,0x12]
+        	haddps	0x12345678,%xmm5
+
+// CHECK: haddps	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7c,0xed]
+        	haddps	%xmm5,%xmm5
+
+// CHECK: hsubpd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	hsubpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: hsubpd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7d,0x2d,0x45,0x00,0x00,0x00]
+        	hsubpd	0x45,%xmm5
+
+// CHECK: hsubpd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7d,0x2d,0xed,0x7e,0x00,0x00]
+        	hsubpd	0x7eed,%xmm5
+
+// CHECK: hsubpd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7d,0x2d,0xfe,0xca,0xbe,0xba]
+        	hsubpd	0xbabecafe,%xmm5
+
+// CHECK: hsubpd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7d,0x2d,0x78,0x56,0x34,0x12]
+        	hsubpd	0x12345678,%xmm5
+
+// CHECK: hsubpd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x7d,0xed]
+        	hsubpd	%xmm5,%xmm5
+
+// CHECK: hsubps	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	hsubps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: hsubps	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7d,0x2d,0x45,0x00,0x00,0x00]
+        	hsubps	0x45,%xmm5
+
+// CHECK: hsubps	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7d,0x2d,0xed,0x7e,0x00,0x00]
+        	hsubps	0x7eed,%xmm5
+
+// CHECK: hsubps	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7d,0x2d,0xfe,0xca,0xbe,0xba]
+        	hsubps	0xbabecafe,%xmm5
+
+// CHECK: hsubps	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7d,0x2d,0x78,0x56,0x34,0x12]
+        	hsubps	0x12345678,%xmm5
+
+// CHECK: hsubps	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x7d,0xed]
+        	hsubps	%xmm5,%xmm5
+
+// CHECK: lddqu	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xf0,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	lddqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: lddqu	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xf0,0x2d,0x45,0x00,0x00,0x00]
+        	lddqu	0x45,%xmm5
+
+// CHECK: lddqu	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xf0,0x2d,0xed,0x7e,0x00,0x00]
+        	lddqu	0x7eed,%xmm5
+
+// CHECK: lddqu	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xf0,0x2d,0xfe,0xca,0xbe,0xba]
+        	lddqu	0xbabecafe,%xmm5
+
+// CHECK: lddqu	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0xf0,0x2d,0x78,0x56,0x34,0x12]
+        	lddqu	0x12345678,%xmm5
+
+// CHECK: movddup	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x12,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movddup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movddup	69, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x12,0x2d,0x45,0x00,0x00,0x00]
+        	movddup	0x45,%xmm5
+
+// CHECK: movddup	32493, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x12,0x2d,0xed,0x7e,0x00,0x00]
+        	movddup	0x7eed,%xmm5
+
+// CHECK: movddup	3133065982, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x12,0x2d,0xfe,0xca,0xbe,0xba]
+        	movddup	0xbabecafe,%xmm5
+
+// CHECK: movddup	305419896, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x12,0x2d,0x78,0x56,0x34,0x12]
+        	movddup	0x12345678,%xmm5
+
+// CHECK: movddup	%xmm5, %xmm5
+// CHECK:  encoding: [0xf2,0x0f,0x12,0xed]
+        	movddup	%xmm5,%xmm5
+
+// CHECK: movshdup	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x16,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movshdup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movshdup	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x16,0x2d,0x45,0x00,0x00,0x00]
+        	movshdup	0x45,%xmm5
+
+// CHECK: movshdup	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x16,0x2d,0xed,0x7e,0x00,0x00]
+        	movshdup	0x7eed,%xmm5
+
+// CHECK: movshdup	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x16,0x2d,0xfe,0xca,0xbe,0xba]
+        	movshdup	0xbabecafe,%xmm5
+
+// CHECK: movshdup	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x16,0x2d,0x78,0x56,0x34,0x12]
+        	movshdup	0x12345678,%xmm5
+
+// CHECK: movshdup	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x16,0xed]
+        	movshdup	%xmm5,%xmm5
+
+// CHECK: movsldup	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x12,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movsldup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movsldup	69, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x12,0x2d,0x45,0x00,0x00,0x00]
+        	movsldup	0x45,%xmm5
+
+// CHECK: movsldup	32493, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x12,0x2d,0xed,0x7e,0x00,0x00]
+        	movsldup	0x7eed,%xmm5
+
+// CHECK: movsldup	3133065982, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x12,0x2d,0xfe,0xca,0xbe,0xba]
+        	movsldup	0xbabecafe,%xmm5
+
+// CHECK: movsldup	305419896, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x12,0x2d,0x78,0x56,0x34,0x12]
+        	movsldup	0x12345678,%xmm5
+
+// CHECK: movsldup	%xmm5, %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x12,0xed]
+        	movsldup	%xmm5,%xmm5
+
+// CHECK: vmclear	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x66,0x0f,0xc7,0xb4,0xcb,0xef,0xbe,0xad,0xde]
+        	vmclear	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: vmclear	32493
+// CHECK:  encoding: [0x66,0x0f,0xc7,0x35,0xed,0x7e,0x00,0x00]
+        	vmclear	0x7eed
+
+// CHECK: vmclear	3133065982
+// CHECK:  encoding: [0x66,0x0f,0xc7,0x35,0xfe,0xca,0xbe,0xba]
+        	vmclear	0xbabecafe
+
+// CHECK: vmclear	305419896
+// CHECK:  encoding: [0x66,0x0f,0xc7,0x35,0x78,0x56,0x34,0x12]
+        	vmclear	0x12345678
+
+// CHECK: vmptrld	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xc7,0xb4,0xcb,0xef,0xbe,0xad,0xde]
+        	vmptrld	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: vmptrld	32493
+// CHECK:  encoding: [0x0f,0xc7,0x35,0xed,0x7e,0x00,0x00]
+        	vmptrld	0x7eed
+
+// CHECK: vmptrld	3133065982
+// CHECK:  encoding: [0x0f,0xc7,0x35,0xfe,0xca,0xbe,0xba]
+        	vmptrld	0xbabecafe
+
+// CHECK: vmptrld	305419896
+// CHECK:  encoding: [0x0f,0xc7,0x35,0x78,0x56,0x34,0x12]
+        	vmptrld	0x12345678
+
+// CHECK: vmptrst	3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0xc7,0xbc,0xcb,0xef,0xbe,0xad,0xde]
+        	vmptrst	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: vmptrst	32493
+// CHECK:  encoding: [0x0f,0xc7,0x3d,0xed,0x7e,0x00,0x00]
+        	vmptrst	0x7eed
+
+// CHECK: vmptrst	3133065982
+// CHECK:  encoding: [0x0f,0xc7,0x3d,0xfe,0xca,0xbe,0xba]
+        	vmptrst	0xbabecafe
+
+// CHECK: vmptrst	305419896
+// CHECK:  encoding: [0x0f,0xc7,0x3d,0x78,0x56,0x34,0x12]
+        	vmptrst	0x12345678
+
+// CHECK: phaddw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x01,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	phaddw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: phaddw	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x01,0x1d,0x45,0x00,0x00,0x00]
+        	phaddw	0x45,%mm3
+
+// CHECK: phaddw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x01,0x1d,0xed,0x7e,0x00,0x00]
+        	phaddw	0x7eed,%mm3
+
+// CHECK: phaddw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x01,0x1d,0xfe,0xca,0xbe,0xba]
+        	phaddw	0xbabecafe,%mm3
+
+// CHECK: phaddw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x01,0x1d,0x78,0x56,0x34,0x12]
+        	phaddw	0x12345678,%mm3
+
+// CHECK: phaddw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x01,0xdb]
+        	phaddw	%mm3,%mm3
+
+// CHECK: phaddw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	phaddw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: phaddw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0x2d,0x45,0x00,0x00,0x00]
+        	phaddw	0x45,%xmm5
+
+// CHECK: phaddw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0x2d,0xed,0x7e,0x00,0x00]
+        	phaddw	0x7eed,%xmm5
+
+// CHECK: phaddw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0x2d,0xfe,0xca,0xbe,0xba]
+        	phaddw	0xbabecafe,%xmm5
+
+// CHECK: phaddw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0x2d,0x78,0x56,0x34,0x12]
+        	phaddw	0x12345678,%xmm5
+
+// CHECK: phaddw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x01,0xed]
+        	phaddw	%xmm5,%xmm5
+
+// CHECK: phaddd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x02,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	phaddd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: phaddd	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x02,0x1d,0x45,0x00,0x00,0x00]
+        	phaddd	0x45,%mm3
+
+// CHECK: phaddd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x02,0x1d,0xed,0x7e,0x00,0x00]
+        	phaddd	0x7eed,%mm3
+
+// CHECK: phaddd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x02,0x1d,0xfe,0xca,0xbe,0xba]
+        	phaddd	0xbabecafe,%mm3
+
+// CHECK: phaddd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x02,0x1d,0x78,0x56,0x34,0x12]
+        	phaddd	0x12345678,%mm3
+
+// CHECK: phaddd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x02,0xdb]
+        	phaddd	%mm3,%mm3
+
+// CHECK: phaddd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	phaddd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: phaddd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0x2d,0x45,0x00,0x00,0x00]
+        	phaddd	0x45,%xmm5
+
+// CHECK: phaddd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0x2d,0xed,0x7e,0x00,0x00]
+        	phaddd	0x7eed,%xmm5
+
+// CHECK: phaddd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0x2d,0xfe,0xca,0xbe,0xba]
+        	phaddd	0xbabecafe,%xmm5
+
+// CHECK: phaddd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0x2d,0x78,0x56,0x34,0x12]
+        	phaddd	0x12345678,%xmm5
+
+// CHECK: phaddd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x02,0xed]
+        	phaddd	%xmm5,%xmm5
+
+// CHECK: phaddsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x03,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	phaddsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: phaddsw	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x03,0x1d,0x45,0x00,0x00,0x00]
+        	phaddsw	0x45,%mm3
+
+// CHECK: phaddsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x03,0x1d,0xed,0x7e,0x00,0x00]
+        	phaddsw	0x7eed,%mm3
+
+// CHECK: phaddsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x03,0x1d,0xfe,0xca,0xbe,0xba]
+        	phaddsw	0xbabecafe,%mm3
+
+// CHECK: phaddsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x03,0x1d,0x78,0x56,0x34,0x12]
+        	phaddsw	0x12345678,%mm3
+
+// CHECK: phaddsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x03,0xdb]
+        	phaddsw	%mm3,%mm3
+
+// CHECK: phaddsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	phaddsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: phaddsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0x2d,0x45,0x00,0x00,0x00]
+        	phaddsw	0x45,%xmm5
+
+// CHECK: phaddsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0x2d,0xed,0x7e,0x00,0x00]
+        	phaddsw	0x7eed,%xmm5
+
+// CHECK: phaddsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0x2d,0xfe,0xca,0xbe,0xba]
+        	phaddsw	0xbabecafe,%xmm5
+
+// CHECK: phaddsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0x2d,0x78,0x56,0x34,0x12]
+        	phaddsw	0x12345678,%xmm5
+
+// CHECK: phaddsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x03,0xed]
+        	phaddsw	%xmm5,%xmm5
+
+// CHECK: phsubw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x05,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	phsubw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: phsubw	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x05,0x1d,0x45,0x00,0x00,0x00]
+        	phsubw	0x45,%mm3
+
+// CHECK: phsubw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x05,0x1d,0xed,0x7e,0x00,0x00]
+        	phsubw	0x7eed,%mm3
+
+// CHECK: phsubw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x05,0x1d,0xfe,0xca,0xbe,0xba]
+        	phsubw	0xbabecafe,%mm3
+
+// CHECK: phsubw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x05,0x1d,0x78,0x56,0x34,0x12]
+        	phsubw	0x12345678,%mm3
+
+// CHECK: phsubw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x05,0xdb]
+        	phsubw	%mm3,%mm3
+
+// CHECK: phsubw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	phsubw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: phsubw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0x2d,0x45,0x00,0x00,0x00]
+        	phsubw	0x45,%xmm5
+
+// CHECK: phsubw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0x2d,0xed,0x7e,0x00,0x00]
+        	phsubw	0x7eed,%xmm5
+
+// CHECK: phsubw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0x2d,0xfe,0xca,0xbe,0xba]
+        	phsubw	0xbabecafe,%xmm5
+
+// CHECK: phsubw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0x2d,0x78,0x56,0x34,0x12]
+        	phsubw	0x12345678,%xmm5
+
+// CHECK: phsubw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x05,0xed]
+        	phsubw	%xmm5,%xmm5
+
+// CHECK: phsubd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x06,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	phsubd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: phsubd	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x06,0x1d,0x45,0x00,0x00,0x00]
+        	phsubd	0x45,%mm3
+
+// CHECK: phsubd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x06,0x1d,0xed,0x7e,0x00,0x00]
+        	phsubd	0x7eed,%mm3
+
+// CHECK: phsubd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x06,0x1d,0xfe,0xca,0xbe,0xba]
+        	phsubd	0xbabecafe,%mm3
+
+// CHECK: phsubd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x06,0x1d,0x78,0x56,0x34,0x12]
+        	phsubd	0x12345678,%mm3
+
+// CHECK: phsubd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x06,0xdb]
+        	phsubd	%mm3,%mm3
+
+// CHECK: phsubd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	phsubd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: phsubd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0x2d,0x45,0x00,0x00,0x00]
+        	phsubd	0x45,%xmm5
+
+// CHECK: phsubd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0x2d,0xed,0x7e,0x00,0x00]
+        	phsubd	0x7eed,%xmm5
+
+// CHECK: phsubd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0x2d,0xfe,0xca,0xbe,0xba]
+        	phsubd	0xbabecafe,%xmm5
+
+// CHECK: phsubd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0x2d,0x78,0x56,0x34,0x12]
+        	phsubd	0x12345678,%xmm5
+
+// CHECK: phsubd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x06,0xed]
+        	phsubd	%xmm5,%xmm5
+
+// CHECK: phsubsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x07,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	phsubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: phsubsw	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x07,0x1d,0x45,0x00,0x00,0x00]
+        	phsubsw	0x45,%mm3
+
+// CHECK: phsubsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x07,0x1d,0xed,0x7e,0x00,0x00]
+        	phsubsw	0x7eed,%mm3
+
+// CHECK: phsubsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x07,0x1d,0xfe,0xca,0xbe,0xba]
+        	phsubsw	0xbabecafe,%mm3
+
+// CHECK: phsubsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x07,0x1d,0x78,0x56,0x34,0x12]
+        	phsubsw	0x12345678,%mm3
+
+// CHECK: phsubsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x07,0xdb]
+        	phsubsw	%mm3,%mm3
+
+// CHECK: phsubsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	phsubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: phsubsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0x2d,0x45,0x00,0x00,0x00]
+        	phsubsw	0x45,%xmm5
+
+// CHECK: phsubsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0x2d,0xed,0x7e,0x00,0x00]
+        	phsubsw	0x7eed,%xmm5
+
+// CHECK: phsubsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0x2d,0xfe,0xca,0xbe,0xba]
+        	phsubsw	0xbabecafe,%xmm5
+
+// CHECK: phsubsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0x2d,0x78,0x56,0x34,0x12]
+        	phsubsw	0x12345678,%xmm5
+
+// CHECK: phsubsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x07,0xed]
+        	phsubsw	%xmm5,%xmm5
+
+// CHECK: pmaddubsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x04,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaddubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmaddubsw	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x04,0x1d,0x45,0x00,0x00,0x00]
+        	pmaddubsw	0x45,%mm3
+
+// CHECK: pmaddubsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x04,0x1d,0xed,0x7e,0x00,0x00]
+        	pmaddubsw	0x7eed,%mm3
+
+// CHECK: pmaddubsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x04,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmaddubsw	0xbabecafe,%mm3
+
+// CHECK: pmaddubsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x04,0x1d,0x78,0x56,0x34,0x12]
+        	pmaddubsw	0x12345678,%mm3
+
+// CHECK: pmaddubsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x04,0xdb]
+        	pmaddubsw	%mm3,%mm3
+
+// CHECK: pmaddubsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaddubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmaddubsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0x2d,0x45,0x00,0x00,0x00]
+        	pmaddubsw	0x45,%xmm5
+
+// CHECK: pmaddubsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0x2d,0xed,0x7e,0x00,0x00]
+        	pmaddubsw	0x7eed,%xmm5
+
+// CHECK: pmaddubsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmaddubsw	0xbabecafe,%xmm5
+
+// CHECK: pmaddubsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0x2d,0x78,0x56,0x34,0x12]
+        	pmaddubsw	0x12345678,%xmm5
+
+// CHECK: pmaddubsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x04,0xed]
+        	pmaddubsw	%xmm5,%xmm5
+
+// CHECK: pmulhrsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0b,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pmulhrsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pmulhrsw	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0b,0x1d,0x45,0x00,0x00,0x00]
+        	pmulhrsw	0x45,%mm3
+
+// CHECK: pmulhrsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0b,0x1d,0xed,0x7e,0x00,0x00]
+        	pmulhrsw	0x7eed,%mm3
+
+// CHECK: pmulhrsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0b,0x1d,0xfe,0xca,0xbe,0xba]
+        	pmulhrsw	0xbabecafe,%mm3
+
+// CHECK: pmulhrsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0b,0x1d,0x78,0x56,0x34,0x12]
+        	pmulhrsw	0x12345678,%mm3
+
+// CHECK: pmulhrsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0b,0xdb]
+        	pmulhrsw	%mm3,%mm3
+
+// CHECK: pmulhrsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmulhrsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmulhrsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0x2d,0x45,0x00,0x00,0x00]
+        	pmulhrsw	0x45,%xmm5
+
+// CHECK: pmulhrsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0x2d,0xed,0x7e,0x00,0x00]
+        	pmulhrsw	0x7eed,%xmm5
+
+// CHECK: pmulhrsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmulhrsw	0xbabecafe,%xmm5
+
+// CHECK: pmulhrsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0x2d,0x78,0x56,0x34,0x12]
+        	pmulhrsw	0x12345678,%xmm5
+
+// CHECK: pmulhrsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0b,0xed]
+        	pmulhrsw	%xmm5,%xmm5
+
+// CHECK: pshufb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x00,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pshufb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pshufb	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x00,0x1d,0x45,0x00,0x00,0x00]
+        	pshufb	0x45,%mm3
+
+// CHECK: pshufb	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x00,0x1d,0xed,0x7e,0x00,0x00]
+        	pshufb	0x7eed,%mm3
+
+// CHECK: pshufb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x00,0x1d,0xfe,0xca,0xbe,0xba]
+        	pshufb	0xbabecafe,%mm3
+
+// CHECK: pshufb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x00,0x1d,0x78,0x56,0x34,0x12]
+        	pshufb	0x12345678,%mm3
+
+// CHECK: pshufb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x00,0xdb]
+        	pshufb	%mm3,%mm3
+
+// CHECK: pshufb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pshufb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pshufb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x2d,0x45,0x00,0x00,0x00]
+        	pshufb	0x45,%xmm5
+
+// CHECK: pshufb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x2d,0xed,0x7e,0x00,0x00]
+        	pshufb	0x7eed,%xmm5
+
+// CHECK: pshufb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x2d,0xfe,0xca,0xbe,0xba]
+        	pshufb	0xbabecafe,%xmm5
+
+// CHECK: pshufb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x2d,0x78,0x56,0x34,0x12]
+        	pshufb	0x12345678,%xmm5
+
+// CHECK: pshufb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0xed]
+        	pshufb	%xmm5,%xmm5
+
+// CHECK: psignb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x08,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psignb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psignb	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x08,0x1d,0x45,0x00,0x00,0x00]
+        	psignb	0x45,%mm3
+
+// CHECK: psignb	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x08,0x1d,0xed,0x7e,0x00,0x00]
+        	psignb	0x7eed,%mm3
+
+// CHECK: psignb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x08,0x1d,0xfe,0xca,0xbe,0xba]
+        	psignb	0xbabecafe,%mm3
+
+// CHECK: psignb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x08,0x1d,0x78,0x56,0x34,0x12]
+        	psignb	0x12345678,%mm3
+
+// CHECK: psignb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x08,0xdb]
+        	psignb	%mm3,%mm3
+
+// CHECK: psignb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psignb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psignb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0x2d,0x45,0x00,0x00,0x00]
+        	psignb	0x45,%xmm5
+
+// CHECK: psignb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0x2d,0xed,0x7e,0x00,0x00]
+        	psignb	0x7eed,%xmm5
+
+// CHECK: psignb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0x2d,0xfe,0xca,0xbe,0xba]
+        	psignb	0xbabecafe,%xmm5
+
+// CHECK: psignb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0x2d,0x78,0x56,0x34,0x12]
+        	psignb	0x12345678,%xmm5
+
+// CHECK: psignb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x08,0xed]
+        	psignb	%xmm5,%xmm5
+
+// CHECK: psignw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x09,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psignw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psignw	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x09,0x1d,0x45,0x00,0x00,0x00]
+        	psignw	0x45,%mm3
+
+// CHECK: psignw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x09,0x1d,0xed,0x7e,0x00,0x00]
+        	psignw	0x7eed,%mm3
+
+// CHECK: psignw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x09,0x1d,0xfe,0xca,0xbe,0xba]
+        	psignw	0xbabecafe,%mm3
+
+// CHECK: psignw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x09,0x1d,0x78,0x56,0x34,0x12]
+        	psignw	0x12345678,%mm3
+
+// CHECK: psignw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x09,0xdb]
+        	psignw	%mm3,%mm3
+
+// CHECK: psignw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psignw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psignw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0x2d,0x45,0x00,0x00,0x00]
+        	psignw	0x45,%xmm5
+
+// CHECK: psignw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0x2d,0xed,0x7e,0x00,0x00]
+        	psignw	0x7eed,%xmm5
+
+// CHECK: psignw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0x2d,0xfe,0xca,0xbe,0xba]
+        	psignw	0xbabecafe,%xmm5
+
+// CHECK: psignw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0x2d,0x78,0x56,0x34,0x12]
+        	psignw	0x12345678,%xmm5
+
+// CHECK: psignw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x09,0xed]
+        	psignw	%xmm5,%xmm5
+
+// CHECK: psignd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0a,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	psignd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: psignd	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0a,0x1d,0x45,0x00,0x00,0x00]
+        	psignd	0x45,%mm3
+
+// CHECK: psignd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0a,0x1d,0xed,0x7e,0x00,0x00]
+        	psignd	0x7eed,%mm3
+
+// CHECK: psignd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0a,0x1d,0xfe,0xca,0xbe,0xba]
+        	psignd	0xbabecafe,%mm3
+
+// CHECK: psignd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0a,0x1d,0x78,0x56,0x34,0x12]
+        	psignd	0x12345678,%mm3
+
+// CHECK: psignd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x0a,0xdb]
+        	psignd	%mm3,%mm3
+
+// CHECK: psignd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	psignd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: psignd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0x2d,0x45,0x00,0x00,0x00]
+        	psignd	0x45,%xmm5
+
+// CHECK: psignd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0x2d,0xed,0x7e,0x00,0x00]
+        	psignd	0x7eed,%xmm5
+
+// CHECK: psignd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0x2d,0xfe,0xca,0xbe,0xba]
+        	psignd	0xbabecafe,%xmm5
+
+// CHECK: psignd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0x2d,0x78,0x56,0x34,0x12]
+        	psignd	0x12345678,%xmm5
+
+// CHECK: psignd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x0a,0xed]
+        	psignd	%xmm5,%xmm5
+
+// CHECK: pabsb	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1c,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pabsb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pabsb	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1c,0x1d,0x45,0x00,0x00,0x00]
+        	pabsb	0x45,%mm3
+
+// CHECK: pabsb	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1c,0x1d,0xed,0x7e,0x00,0x00]
+        	pabsb	0x7eed,%mm3
+
+// CHECK: pabsb	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1c,0x1d,0xfe,0xca,0xbe,0xba]
+        	pabsb	0xbabecafe,%mm3
+
+// CHECK: pabsb	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1c,0x1d,0x78,0x56,0x34,0x12]
+        	pabsb	0x12345678,%mm3
+
+// CHECK: pabsb	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1c,0xdb]
+        	pabsb	%mm3,%mm3
+
+// CHECK: pabsb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pabsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pabsb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0x2d,0x45,0x00,0x00,0x00]
+        	pabsb	0x45,%xmm5
+
+// CHECK: pabsb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0x2d,0xed,0x7e,0x00,0x00]
+        	pabsb	0x7eed,%xmm5
+
+// CHECK: pabsb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0x2d,0xfe,0xca,0xbe,0xba]
+        	pabsb	0xbabecafe,%xmm5
+
+// CHECK: pabsb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0x2d,0x78,0x56,0x34,0x12]
+        	pabsb	0x12345678,%xmm5
+
+// CHECK: pabsb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1c,0xed]
+        	pabsb	%xmm5,%xmm5
+
+// CHECK: pabsw	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1d,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pabsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pabsw	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1d,0x1d,0x45,0x00,0x00,0x00]
+        	pabsw	0x45,%mm3
+
+// CHECK: pabsw	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1d,0x1d,0xed,0x7e,0x00,0x00]
+        	pabsw	0x7eed,%mm3
+
+// CHECK: pabsw	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1d,0x1d,0xfe,0xca,0xbe,0xba]
+        	pabsw	0xbabecafe,%mm3
+
+// CHECK: pabsw	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1d,0x1d,0x78,0x56,0x34,0x12]
+        	pabsw	0x12345678,%mm3
+
+// CHECK: pabsw	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1d,0xdb]
+        	pabsw	%mm3,%mm3
+
+// CHECK: pabsw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pabsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pabsw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0x2d,0x45,0x00,0x00,0x00]
+        	pabsw	0x45,%xmm5
+
+// CHECK: pabsw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0x2d,0xed,0x7e,0x00,0x00]
+        	pabsw	0x7eed,%xmm5
+
+// CHECK: pabsw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0x2d,0xfe,0xca,0xbe,0xba]
+        	pabsw	0xbabecafe,%xmm5
+
+// CHECK: pabsw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0x2d,0x78,0x56,0x34,0x12]
+        	pabsw	0x12345678,%xmm5
+
+// CHECK: pabsw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1d,0xed]
+        	pabsw	%xmm5,%xmm5
+
+// CHECK: pabsd	3735928559(%ebx,%ecx,8), %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1e,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+        	pabsd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: pabsd	69, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1e,0x1d,0x45,0x00,0x00,0x00]
+        	pabsd	0x45,%mm3
+
+// CHECK: pabsd	32493, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1e,0x1d,0xed,0x7e,0x00,0x00]
+        	pabsd	0x7eed,%mm3
+
+// CHECK: pabsd	3133065982, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1e,0x1d,0xfe,0xca,0xbe,0xba]
+        	pabsd	0xbabecafe,%mm3
+
+// CHECK: pabsd	305419896, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1e,0x1d,0x78,0x56,0x34,0x12]
+        	pabsd	0x12345678,%mm3
+
+// CHECK: pabsd	%mm3, %mm3
+// CHECK:  encoding: [0x0f,0x38,0x1e,0xdb]
+        	pabsd	%mm3,%mm3
+
+// CHECK: pabsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pabsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pabsd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0x2d,0x45,0x00,0x00,0x00]
+        	pabsd	0x45,%xmm5
+
+// CHECK: pabsd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0x2d,0xed,0x7e,0x00,0x00]
+        	pabsd	0x7eed,%xmm5
+
+// CHECK: pabsd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0x2d,0xfe,0xca,0xbe,0xba]
+        	pabsd	0xbabecafe,%xmm5
+
+// CHECK: pabsd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0x2d,0x78,0x56,0x34,0x12]
+        	pabsd	0x12345678,%xmm5
+
+// CHECK: pabsd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x1e,0xed]
+        	pabsd	%xmm5,%xmm5
+
+// CHECK: femms
+// CHECK:  encoding: [0x0f,0x0e]
+        	femms
+
+// CHECK: movntdqa	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	movntdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: movntdqa	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2a,0x2d,0x45,0x00,0x00,0x00]
+        	movntdqa	0x45,%xmm5
+
+// CHECK: movntdqa	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2a,0x2d,0xed,0x7e,0x00,0x00]
+        	movntdqa	0x7eed,%xmm5
+
+// CHECK: movntdqa	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2a,0x2d,0xfe,0xca,0xbe,0xba]
+        	movntdqa	0xbabecafe,%xmm5
+
+// CHECK: movntdqa	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2a,0x2d,0x78,0x56,0x34,0x12]
+        	movntdqa	0x12345678,%xmm5
+
+// CHECK: packusdw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	packusdw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: packusdw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0x2d,0x45,0x00,0x00,0x00]
+        	packusdw	0x45,%xmm5
+
+// CHECK: packusdw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0x2d,0xed,0x7e,0x00,0x00]
+        	packusdw	0x7eed,%xmm5
+
+// CHECK: packusdw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0x2d,0xfe,0xca,0xbe,0xba]
+        	packusdw	0xbabecafe,%xmm5
+
+// CHECK: packusdw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0x2d,0x78,0x56,0x34,0x12]
+        	packusdw	0x12345678,%xmm5
+
+// CHECK: packusdw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x2b,0xed]
+        	packusdw	%xmm5,%xmm5
+
+// CHECK: pcmpeqq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpeqq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pcmpeqq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0x2d,0x45,0x00,0x00,0x00]
+        	pcmpeqq	0x45,%xmm5
+
+// CHECK: pcmpeqq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0x2d,0xed,0x7e,0x00,0x00]
+        	pcmpeqq	0x7eed,%xmm5
+
+// CHECK: pcmpeqq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0x2d,0xfe,0xca,0xbe,0xba]
+        	pcmpeqq	0xbabecafe,%xmm5
+
+// CHECK: pcmpeqq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0x2d,0x78,0x56,0x34,0x12]
+        	pcmpeqq	0x12345678,%xmm5
+
+// CHECK: pcmpeqq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x29,0xed]
+        	pcmpeqq	%xmm5,%xmm5
+
+// CHECK: phminposuw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	phminposuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: phminposuw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0x2d,0x45,0x00,0x00,0x00]
+        	phminposuw	0x45,%xmm5
+
+// CHECK: phminposuw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0x2d,0xed,0x7e,0x00,0x00]
+        	phminposuw	0x7eed,%xmm5
+
+// CHECK: phminposuw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0x2d,0xfe,0xca,0xbe,0xba]
+        	phminposuw	0xbabecafe,%xmm5
+
+// CHECK: phminposuw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0x2d,0x78,0x56,0x34,0x12]
+        	phminposuw	0x12345678,%xmm5
+
+// CHECK: phminposuw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x41,0xed]
+        	phminposuw	%xmm5,%xmm5
+
+// CHECK: pmaxsb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaxsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmaxsb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0x2d,0x45,0x00,0x00,0x00]
+        	pmaxsb	0x45,%xmm5
+
+// CHECK: pmaxsb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0x2d,0xed,0x7e,0x00,0x00]
+        	pmaxsb	0x7eed,%xmm5
+
+// CHECK: pmaxsb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmaxsb	0xbabecafe,%xmm5
+
+// CHECK: pmaxsb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0x2d,0x78,0x56,0x34,0x12]
+        	pmaxsb	0x12345678,%xmm5
+
+// CHECK: pmaxsb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3c,0xed]
+        	pmaxsb	%xmm5,%xmm5
+
+// CHECK: pmaxsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaxsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmaxsd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0x2d,0x45,0x00,0x00,0x00]
+        	pmaxsd	0x45,%xmm5
+
+// CHECK: pmaxsd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0x2d,0xed,0x7e,0x00,0x00]
+        	pmaxsd	0x7eed,%xmm5
+
+// CHECK: pmaxsd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmaxsd	0xbabecafe,%xmm5
+
+// CHECK: pmaxsd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0x2d,0x78,0x56,0x34,0x12]
+        	pmaxsd	0x12345678,%xmm5
+
+// CHECK: pmaxsd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3d,0xed]
+        	pmaxsd	%xmm5,%xmm5
+
+// CHECK: pmaxud	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaxud	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmaxud	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0x2d,0x45,0x00,0x00,0x00]
+        	pmaxud	0x45,%xmm5
+
+// CHECK: pmaxud	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0x2d,0xed,0x7e,0x00,0x00]
+        	pmaxud	0x7eed,%xmm5
+
+// CHECK: pmaxud	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmaxud	0xbabecafe,%xmm5
+
+// CHECK: pmaxud	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0x2d,0x78,0x56,0x34,0x12]
+        	pmaxud	0x12345678,%xmm5
+
+// CHECK: pmaxud	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3f,0xed]
+        	pmaxud	%xmm5,%xmm5
+
+// CHECK: pmaxuw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmaxuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmaxuw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0x2d,0x45,0x00,0x00,0x00]
+        	pmaxuw	0x45,%xmm5
+
+// CHECK: pmaxuw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0x2d,0xed,0x7e,0x00,0x00]
+        	pmaxuw	0x7eed,%xmm5
+
+// CHECK: pmaxuw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmaxuw	0xbabecafe,%xmm5
+
+// CHECK: pmaxuw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0x2d,0x78,0x56,0x34,0x12]
+        	pmaxuw	0x12345678,%xmm5
+
+// CHECK: pmaxuw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3e,0xed]
+        	pmaxuw	%xmm5,%xmm5
+
+// CHECK: pminsb	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pminsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pminsb	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0x2d,0x45,0x00,0x00,0x00]
+        	pminsb	0x45,%xmm5
+
+// CHECK: pminsb	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0x2d,0xed,0x7e,0x00,0x00]
+        	pminsb	0x7eed,%xmm5
+
+// CHECK: pminsb	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0x2d,0xfe,0xca,0xbe,0xba]
+        	pminsb	0xbabecafe,%xmm5
+
+// CHECK: pminsb	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0x2d,0x78,0x56,0x34,0x12]
+        	pminsb	0x12345678,%xmm5
+
+// CHECK: pminsb	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x38,0xed]
+        	pminsb	%xmm5,%xmm5
+
+// CHECK: pminsd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pminsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pminsd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0x2d,0x45,0x00,0x00,0x00]
+        	pminsd	0x45,%xmm5
+
+// CHECK: pminsd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0x2d,0xed,0x7e,0x00,0x00]
+        	pminsd	0x7eed,%xmm5
+
+// CHECK: pminsd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0x2d,0xfe,0xca,0xbe,0xba]
+        	pminsd	0xbabecafe,%xmm5
+
+// CHECK: pminsd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0x2d,0x78,0x56,0x34,0x12]
+        	pminsd	0x12345678,%xmm5
+
+// CHECK: pminsd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x39,0xed]
+        	pminsd	%xmm5,%xmm5
+
+// CHECK: pminud	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pminud	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pminud	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0x2d,0x45,0x00,0x00,0x00]
+        	pminud	0x45,%xmm5
+
+// CHECK: pminud	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0x2d,0xed,0x7e,0x00,0x00]
+        	pminud	0x7eed,%xmm5
+
+// CHECK: pminud	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0x2d,0xfe,0xca,0xbe,0xba]
+        	pminud	0xbabecafe,%xmm5
+
+// CHECK: pminud	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0x2d,0x78,0x56,0x34,0x12]
+        	pminud	0x12345678,%xmm5
+
+// CHECK: pminud	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3b,0xed]
+        	pminud	%xmm5,%xmm5
+
+// CHECK: pminuw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pminuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pminuw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0x2d,0x45,0x00,0x00,0x00]
+        	pminuw	0x45,%xmm5
+
+// CHECK: pminuw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0x2d,0xed,0x7e,0x00,0x00]
+        	pminuw	0x7eed,%xmm5
+
+// CHECK: pminuw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0x2d,0xfe,0xca,0xbe,0xba]
+        	pminuw	0xbabecafe,%xmm5
+
+// CHECK: pminuw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0x2d,0x78,0x56,0x34,0x12]
+        	pminuw	0x12345678,%xmm5
+
+// CHECK: pminuw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x3a,0xed]
+        	pminuw	%xmm5,%xmm5
+
+// CHECK: pmovsxbw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovsxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovsxbw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0x2d,0x45,0x00,0x00,0x00]
+        	pmovsxbw	0x45,%xmm5
+
+// CHECK: pmovsxbw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovsxbw	0x7eed,%xmm5
+
+// CHECK: pmovsxbw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovsxbw	0xbabecafe,%xmm5
+
+// CHECK: pmovsxbw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0x2d,0x78,0x56,0x34,0x12]
+        	pmovsxbw	0x12345678,%xmm5
+
+// CHECK: pmovsxbw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x20,0xed]
+        	pmovsxbw	%xmm5,%xmm5
+
+// CHECK: pmovsxbd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovsxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovsxbd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0x2d,0x45,0x00,0x00,0x00]
+        	pmovsxbd	0x45,%xmm5
+
+// CHECK: pmovsxbd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovsxbd	0x7eed,%xmm5
+
+// CHECK: pmovsxbd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovsxbd	0xbabecafe,%xmm5
+
+// CHECK: pmovsxbd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0x2d,0x78,0x56,0x34,0x12]
+        	pmovsxbd	0x12345678,%xmm5
+
+// CHECK: pmovsxbd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x21,0xed]
+        	pmovsxbd	%xmm5,%xmm5
+
+// CHECK: pmovsxbq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovsxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovsxbq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0x2d,0x45,0x00,0x00,0x00]
+        	pmovsxbq	0x45,%xmm5
+
+// CHECK: pmovsxbq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovsxbq	0x7eed,%xmm5
+
+// CHECK: pmovsxbq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovsxbq	0xbabecafe,%xmm5
+
+// CHECK: pmovsxbq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0x2d,0x78,0x56,0x34,0x12]
+        	pmovsxbq	0x12345678,%xmm5
+
+// CHECK: pmovsxbq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x22,0xed]
+        	pmovsxbq	%xmm5,%xmm5
+
+// CHECK: pmovsxwd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovsxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovsxwd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0x2d,0x45,0x00,0x00,0x00]
+        	pmovsxwd	0x45,%xmm5
+
+// CHECK: pmovsxwd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovsxwd	0x7eed,%xmm5
+
+// CHECK: pmovsxwd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovsxwd	0xbabecafe,%xmm5
+
+// CHECK: pmovsxwd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0x2d,0x78,0x56,0x34,0x12]
+        	pmovsxwd	0x12345678,%xmm5
+
+// CHECK: pmovsxwd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x23,0xed]
+        	pmovsxwd	%xmm5,%xmm5
+
+// CHECK: pmovsxwq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovsxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovsxwq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0x2d,0x45,0x00,0x00,0x00]
+        	pmovsxwq	0x45,%xmm5
+
+// CHECK: pmovsxwq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovsxwq	0x7eed,%xmm5
+
+// CHECK: pmovsxwq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovsxwq	0xbabecafe,%xmm5
+
+// CHECK: pmovsxwq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0x2d,0x78,0x56,0x34,0x12]
+        	pmovsxwq	0x12345678,%xmm5
+
+// CHECK: pmovsxwq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x24,0xed]
+        	pmovsxwq	%xmm5,%xmm5
+
+// CHECK: pmovsxdq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovsxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovsxdq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0x2d,0x45,0x00,0x00,0x00]
+        	pmovsxdq	0x45,%xmm5
+
+// CHECK: pmovsxdq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovsxdq	0x7eed,%xmm5
+
+// CHECK: pmovsxdq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovsxdq	0xbabecafe,%xmm5
+
+// CHECK: pmovsxdq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0x2d,0x78,0x56,0x34,0x12]
+        	pmovsxdq	0x12345678,%xmm5
+
+// CHECK: pmovsxdq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x25,0xed]
+        	pmovsxdq	%xmm5,%xmm5
+
+// CHECK: pmovzxbw	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovzxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovzxbw	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0x2d,0x45,0x00,0x00,0x00]
+        	pmovzxbw	0x45,%xmm5
+
+// CHECK: pmovzxbw	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovzxbw	0x7eed,%xmm5
+
+// CHECK: pmovzxbw	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovzxbw	0xbabecafe,%xmm5
+
+// CHECK: pmovzxbw	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0x2d,0x78,0x56,0x34,0x12]
+        	pmovzxbw	0x12345678,%xmm5
+
+// CHECK: pmovzxbw	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x30,0xed]
+        	pmovzxbw	%xmm5,%xmm5
+
+// CHECK: pmovzxbd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovzxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovzxbd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0x2d,0x45,0x00,0x00,0x00]
+        	pmovzxbd	0x45,%xmm5
+
+// CHECK: pmovzxbd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovzxbd	0x7eed,%xmm5
+
+// CHECK: pmovzxbd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovzxbd	0xbabecafe,%xmm5
+
+// CHECK: pmovzxbd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0x2d,0x78,0x56,0x34,0x12]
+        	pmovzxbd	0x12345678,%xmm5
+
+// CHECK: pmovzxbd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x31,0xed]
+        	pmovzxbd	%xmm5,%xmm5
+
+// CHECK: pmovzxbq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovzxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovzxbq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0x2d,0x45,0x00,0x00,0x00]
+        	pmovzxbq	0x45,%xmm5
+
+// CHECK: pmovzxbq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovzxbq	0x7eed,%xmm5
+
+// CHECK: pmovzxbq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovzxbq	0xbabecafe,%xmm5
+
+// CHECK: pmovzxbq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0x2d,0x78,0x56,0x34,0x12]
+        	pmovzxbq	0x12345678,%xmm5
+
+// CHECK: pmovzxbq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x32,0xed]
+        	pmovzxbq	%xmm5,%xmm5
+
+// CHECK: pmovzxwd	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovzxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovzxwd	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0x2d,0x45,0x00,0x00,0x00]
+        	pmovzxwd	0x45,%xmm5
+
+// CHECK: pmovzxwd	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovzxwd	0x7eed,%xmm5
+
+// CHECK: pmovzxwd	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovzxwd	0xbabecafe,%xmm5
+
+// CHECK: pmovzxwd	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0x2d,0x78,0x56,0x34,0x12]
+        	pmovzxwd	0x12345678,%xmm5
+
+// CHECK: pmovzxwd	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x33,0xed]
+        	pmovzxwd	%xmm5,%xmm5
+
+// CHECK: pmovzxwq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovzxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovzxwq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0x2d,0x45,0x00,0x00,0x00]
+        	pmovzxwq	0x45,%xmm5
+
+// CHECK: pmovzxwq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovzxwq	0x7eed,%xmm5
+
+// CHECK: pmovzxwq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovzxwq	0xbabecafe,%xmm5
+
+// CHECK: pmovzxwq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0x2d,0x78,0x56,0x34,0x12]
+        	pmovzxwq	0x12345678,%xmm5
+
+// CHECK: pmovzxwq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x34,0xed]
+        	pmovzxwq	%xmm5,%xmm5
+
+// CHECK: pmovzxdq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmovzxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmovzxdq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0x2d,0x45,0x00,0x00,0x00]
+        	pmovzxdq	0x45,%xmm5
+
+// CHECK: pmovzxdq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0x2d,0xed,0x7e,0x00,0x00]
+        	pmovzxdq	0x7eed,%xmm5
+
+// CHECK: pmovzxdq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmovzxdq	0xbabecafe,%xmm5
+
+// CHECK: pmovzxdq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0x2d,0x78,0x56,0x34,0x12]
+        	pmovzxdq	0x12345678,%xmm5
+
+// CHECK: pmovzxdq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x35,0xed]
+        	pmovzxdq	%xmm5,%xmm5
+
+// CHECK: pmuldq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmuldq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmuldq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0x2d,0x45,0x00,0x00,0x00]
+        	pmuldq	0x45,%xmm5
+
+// CHECK: pmuldq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0x2d,0xed,0x7e,0x00,0x00]
+        	pmuldq	0x7eed,%xmm5
+
+// CHECK: pmuldq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmuldq	0xbabecafe,%xmm5
+
+// CHECK: pmuldq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0x2d,0x78,0x56,0x34,0x12]
+        	pmuldq	0x12345678,%xmm5
+
+// CHECK: pmuldq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x28,0xed]
+        	pmuldq	%xmm5,%xmm5
+
+// CHECK: pmulld	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pmulld	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pmulld	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0x2d,0x45,0x00,0x00,0x00]
+        	pmulld	0x45,%xmm5
+
+// CHECK: pmulld	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0x2d,0xed,0x7e,0x00,0x00]
+        	pmulld	0x7eed,%xmm5
+
+// CHECK: pmulld	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0x2d,0xfe,0xca,0xbe,0xba]
+        	pmulld	0xbabecafe,%xmm5
+
+// CHECK: pmulld	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0x2d,0x78,0x56,0x34,0x12]
+        	pmulld	0x12345678,%xmm5
+
+// CHECK: pmulld	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x40,0xed]
+        	pmulld	%xmm5,%xmm5
+
+// CHECK: ptest 	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	ptest	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: ptest 	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0x2d,0x45,0x00,0x00,0x00]
+        	ptest	0x45,%xmm5
+
+// CHECK: ptest 	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0x2d,0xed,0x7e,0x00,0x00]
+        	ptest	0x7eed,%xmm5
+
+// CHECK: ptest 	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0x2d,0xfe,0xca,0xbe,0xba]
+        	ptest	0xbabecafe,%xmm5
+
+// CHECK: ptest 	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0x2d,0x78,0x56,0x34,0x12]
+        	ptest	0x12345678,%xmm5
+
+// CHECK: ptest 	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x17,0xed]
+        	ptest	%xmm5,%xmm5
+
+// CHECK: pcmpgtq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0xac,0xcb,0xef,0xbe,0xad,0xde]
+        	pcmpgtq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: pcmpgtq	69, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0x2d,0x45,0x00,0x00,0x00]
+        	pcmpgtq	0x45,%xmm5
+
+// CHECK: pcmpgtq	32493, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0x2d,0xed,0x7e,0x00,0x00]
+        	pcmpgtq	0x7eed,%xmm5
+
+// CHECK: pcmpgtq	3133065982, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0x2d,0xfe,0xca,0xbe,0xba]
+        	pcmpgtq	0xbabecafe,%xmm5
+
+// CHECK: pcmpgtq	305419896, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0x2d,0x78,0x56,0x34,0x12]
+        	pcmpgtq	0x12345678,%xmm5
+
+// CHECK: pcmpgtq	%xmm5, %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x38,0x37,0xed]
+        	pcmpgtq	%xmm5,%xmm5
+
+// CHECK: crc32b 	%bl, %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0xc3]
+                crc32b %bl, %eax
+
+// CHECK: crc32b 	4(%ebx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0x43,0x04]
+                crc32b 4(%ebx), %eax
+
+// CHECK: crc32w 	%bx, %eax
+// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc3]
+                crc32w %bx, %eax
+
+// CHECK: crc32w 	4(%ebx), %eax
+// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x43,0x04]
+                crc32w 4(%ebx), %eax
+
+// CHECK: crc32l 	%ebx, %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc3]
+                crc32l %ebx, %eax
+
+// CHECK: crc32l 	4(%ebx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x43,0x04]
+                crc32l 4(%ebx), %eax
+
+// CHECK: crc32l 	3735928559(%ebx,%ecx,8), %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+                crc32l 0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: crc32l 	69, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0d,0x45,0x00,0x00,0x00]
+                crc32l 0x45,%ecx
+
+// CHECK: crc32l 	32493, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0d,0xed,0x7e,0x00,0x00]
+                crc32l 0x7eed,%ecx
+
+// CHECK: crc32l 	3133065982, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0d,0xfe,0xca,0xbe,0xba]
+                crc32l 0xbabecafe,%ecx
+
+// CHECK: crc32l 	%ecx, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc9]
+                crc32l %ecx,%ecx
+
+// CHECK: pcmpistrm	$125, %xmm1, %xmm2
+// CHECK:  encoding: [0x66,0x0f,0x3a,0x62,0xd1,0x7d]
+                pcmpistrm $125, %xmm1, %xmm2
+
+// CHECK: pcmpistrm	$125, (%edx,%eax,4), %xmm2
+// CHECK:  encoding: [0x66,0x0f,0x3a,0x62,0x14,0x82,0x7d]
+                pcmpistrm $125, (%edx,%eax,4), %xmm2
+
+// CHECK: aesimc	%xmm0, %xmm1
+// CHECK:  encoding: [0x66,0x0f,0x38,0xdb,0xc8]
+                aesimc %xmm0,%xmm1
+
+// CHECK: aesimc	(%eax), %xmm1
+// CHECK:  encoding: [0x66,0x0f,0x38,0xdb,0x08]
+                aesimc (%eax),%xmm1
+
+// CHECK: aesenc	%xmm1, %xmm2
+// CHECK:  encoding: [0x66,0x0f,0x38,0xdc,0xd1]
+                aesenc %xmm1,%xmm2
+
+// CHECK: aesenc	4(%ebx), %xmm2
+// CHECK:  encoding: [0x66,0x0f,0x38,0xdc,0x53,0x04]
+                aesenc 4(%ebx),%xmm2
+
+// CHECK: aesenclast	%xmm3, %xmm4
+// CHECK:  encoding: [0x66,0x0f,0x38,0xdd,0xe3]
+                aesenclast %xmm3,%xmm4
+
+// CHECK: aesenclast	4(%edx,%edi), %xmm4
+// CHECK:  encoding: [0x66,0x0f,0x38,0xdd,0x64,0x3a,0x04]
+                aesenclast 4(%edx,%edi),%xmm4
+
+// CHECK: aesdec	%xmm5, %xmm6
+// CHECK:  encoding: [0x66,0x0f,0x38,0xde,0xf5]
+                aesdec %xmm5,%xmm6
+
+// CHECK: aesdec	4(%ecx,%eax,8), %xmm6
+// CHECK:  encoding: [0x66,0x0f,0x38,0xde,0x74,0xc1,0x04]
+                aesdec 4(%ecx,%eax,8),%xmm6
+
+// CHECK: aesdeclast	%xmm7, %xmm0
+// CHECK:  encoding: [0x66,0x0f,0x38,0xdf,0xc7]
+                aesdeclast %xmm7,%xmm0
+
+// CHECK: aesdeclast	3405691582, %xmm0
+// CHECK:  encoding: [0x66,0x0f,0x38,0xdf,0x05,0xbe,0xba,0xfe,0xca]
+                aesdeclast 0xcafebabe,%xmm0
+
+// CHECK: aeskeygenassist	$125, %xmm1, %xmm2
+// CHECK:  encoding: [0x66,0x0f,0x3a,0xdf,0xd1,0x7d]
+                aeskeygenassist $125, %xmm1, %xmm2
+
+// CHECK: aeskeygenassist	$125, (%edx,%eax,4), %xmm2
+// CHECK:  encoding: [0x66,0x0f,0x3a,0xdf,0x14,0x82,0x7d]
+                aeskeygenassist $125, (%edx,%eax,4), %xmm2
+
+// rdar://8017638
+// CHECK: aeskeygenassist	$128, %xmm1, %xmm2
+// CHECK:  encoding: [0x66,0x0f,0x3a,0xdf,0xd1,0x80]
+		aeskeygenassist $128, %xmm1, %xmm2
+
+// rdar://7910087
+// CHECK: bsfw	%bx, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbc,0xdb]
+          bsfw  %bx, %bx
+
+// CHECK: bsfw	3735928559(%ebx,%ecx,8), %bx
+// CHECK:  encoding: [0x66,0x0f,0xbc,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+          bsfw  3735928559(%ebx,%ecx,8), %bx
+
+// CHECK: bsrw	%bx, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbd,0xdb]
+          bsrw  %bx, %bx
+
+// CHECK: bsrw	305419896, %bx
+// CHECK:  encoding: [0x66,0x0f,0xbd,0x1d,0x78,0x56,0x34,0x12]
+          bsrw  305419896, %bx
+
+// radr://7901779
+// CHECK: pushl   $127
+// CHECK:  encoding: [0x6a,0x7f]
+          pushl   $127
+
+// CHECK: pushw   $254
+// CHECK:  encoding: [0x66,0x68,0xfe,0x00]
+          pushw   $254
+
+// CHECK: pushl   $254
+// CHECK:  encoding: [0x68,0xfe,0x00,0x00,0x00]
+          pushl   $254
+
+// radr://7928400
+// CHECK: movq    %mm3, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x7f,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+          movq    %mm3, 3735928559(%ebx,%ecx,8)
+
+// CHECK: movd    %mm3, 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0x0f,0x7e,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+          movd    %mm3, 3735928559(%ebx,%ecx,8)
+
+// CHECK: movq    3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0xf3,0x0f,0x7e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          movq    3735928559(%ebx,%ecx,8), %xmm5
+
+// CHECK: movd    3735928559(%ebx,%ecx,8), %xmm5
+// CHECK:  encoding: [0x66,0x0f,0x6e,0xac,0xcb,0xef,0xbe,0xad,0xde]
+          movd    3735928559(%ebx,%ecx,8), %xmm5
+
+// radr://7914715
+// CHECK: fcoml   3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0x94,0xcb,0xef,0xbe,0xad,0xde]
+          fcoml   3735928559(%ebx,%ecx,8)
+
+// CHECK: fcoms   32493
+// CHECK:  encoding: [0xd8,0x15,0xed,0x7e,0x00,0x00]
+          fcoms   32493
+
+// CHECK: fcompl  3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xdc,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+          fcompl  3735928559(%ebx,%ecx,8)
+
+// CHECK: fcomps  32493
+// CHECK:  encoding: [0xd8,0x1d,0xed,0x7e,0x00,0x00]
+          fcomps  32493
+
+// CHECK: ficoml  3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0x94,0xcb,0xef,0xbe,0xad,0xde]
+          ficoml  3735928559(%ebx,%ecx,8)
+
+// CHECK: ficoms  32493
+// CHECK:  encoding: [0xde,0x15,0xed,0x7e,0x00,0x00]
+          ficoms  32493
+
+// CHECK: ficompl 3735928559(%ebx,%ecx,8)
+// CHECK:  encoding: [0xda,0x9c,0xcb,0xef,0xbe,0xad,0xde]
+          ficompl 3735928559(%ebx,%ecx,8)
+
+// CHECK: ficomps 32493
+// CHECK:  encoding: [0xde,0x1d,0xed,0x7e,0x00,0x00]
+          ficomps 32493
+
+// CHECK: movl  57005(,%eiz), %ebx
+// CHECK: encoding: [0x8b,0x1c,0x25,0xad,0xde,0x00,0x00]
+          movl  57005(,%eiz), %ebx
+
+// CHECK: movl  48879(,%eiz), %eax
+// CHECK: encoding: [0x8b,0x04,0x25,0xef,0xbe,0x00,0x00]
+          movl  48879(,%eiz), %eax
+
+// CHECK: movl  -4(,%eiz,8), %eax
+// CHECK: encoding: [0x8b,0x04,0xe5,0xfc,0xff,0xff,0xff]
+          movl  -4(,%eiz,8), %eax
+
+// CHECK: movl  (%ecx,%eiz), %eax
+// CHECK: encoding: [0x8b,0x04,0x21]
+          movl  (%ecx,%eiz), %eax
+
+// CHECK: movl  (%ecx,%eiz,8), %eax
+// CHECK: encoding: [0x8b,0x04,0xe1]
+          movl  (%ecx,%eiz,8), %eax
+
+// CHECK: addl	$4294967295, %eax       # encoding: [0x83,0xc0,0xff]
+        addl $0xFFFFFFFF, %eax
+
+// CHECK: addw	$65535, %ax       # encoding: [0x66,0x83,0xc0,0xff]
+        addw $0xFFFF, %ax
+
+
+// CHECK: 	movb	$127, 3735928559(%ebx,%ecx,8)
+        	movb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movb	$127, 69
+        	movb	$0x7f,0x45
+
+// CHECK: 	movb	$127, 32493
+        	movb	$0x7f,0x7eed
+
+// CHECK: 	movb	$127, 3133065982
+        	movb	$0x7f,0xbabecafe
+
+// CHECK: 	movb	$127, 305419896
+        	movb	$0x7f,0x12345678
+
+// CHECK: 	movw	$31438, 3735928559(%ebx,%ecx,8)
+        	movw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movw	$31438, 69
+        	movw	$0x7ace,0x45
+
+// CHECK: 	movw	$31438, 32493
+        	movw	$0x7ace,0x7eed
+
+// CHECK: 	movw	$31438, 3133065982
+        	movw	$0x7ace,0xbabecafe
+
+// CHECK: 	movw	$31438, 305419896
+        	movw	$0x7ace,0x12345678
+
+// CHECK: 	movl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	movl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movl	$2063514302, 69
+        	movl	$0x7afebabe,0x45
+
+// CHECK: 	movl	$2063514302, 32493
+        	movl	$0x7afebabe,0x7eed
+
+// CHECK: 	movl	$2063514302, 3133065982
+        	movl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	movl	$2063514302, 305419896
+        	movl	$0x7afebabe,0x12345678
+
+// CHECK: 	movl	$324478056, 3735928559(%ebx,%ecx,8)
+        	movl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movl	$324478056, 69
+        	movl	$0x13572468,0x45
+
+// CHECK: 	movl	$324478056, 32493
+        	movl	$0x13572468,0x7eed
+
+// CHECK: 	movl	$324478056, 3133065982
+        	movl	$0x13572468,0xbabecafe
+
+// CHECK: 	movl	$324478056, 305419896
+        	movl	$0x13572468,0x12345678
+
+// CHECK: 	movsbl	3735928559(%ebx,%ecx,8), %ecx
+        	movsbl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	movsbl	69, %ecx
+        	movsbl	0x45,%ecx
+
+// CHECK: 	movsbl	32493, %ecx
+        	movsbl	0x7eed,%ecx
+
+// CHECK: 	movsbl	3133065982, %ecx
+        	movsbl	0xbabecafe,%ecx
+
+// CHECK: 	movsbl	305419896, %ecx
+        	movsbl	0x12345678,%ecx
+
+// CHECK: 	movsbw	3735928559(%ebx,%ecx,8), %bx
+        	movsbw	0xdeadbeef(%ebx,%ecx,8),%bx
+
+// CHECK: 	movsbw	69, %bx
+        	movsbw	0x45,%bx
+
+// CHECK: 	movsbw	32493, %bx
+        	movsbw	0x7eed,%bx
+
+// CHECK: 	movsbw	3133065982, %bx
+        	movsbw	0xbabecafe,%bx
+
+// CHECK: 	movsbw	305419896, %bx
+        	movsbw	0x12345678,%bx
+
+// CHECK: 	movswl	3735928559(%ebx,%ecx,8), %ecx
+        	movswl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	movswl	69, %ecx
+        	movswl	0x45,%ecx
+
+// CHECK: 	movswl	32493, %ecx
+        	movswl	0x7eed,%ecx
+
+// CHECK: 	movswl	3133065982, %ecx
+        	movswl	0xbabecafe,%ecx
+
+// CHECK: 	movswl	305419896, %ecx
+        	movswl	0x12345678,%ecx
+
+// CHECK: 	movzbl	3735928559(%ebx,%ecx,8), %ecx
+        	movzbl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	movzbl	69, %ecx
+        	movzbl	0x45,%ecx
+
+// CHECK: 	movzbl	32493, %ecx
+        	movzbl	0x7eed,%ecx
+
+// CHECK: 	movzbl	3133065982, %ecx
+        	movzbl	0xbabecafe,%ecx
+
+// CHECK: 	movzbl	305419896, %ecx
+        	movzbl	0x12345678,%ecx
+
+// CHECK: 	movzbw	3735928559(%ebx,%ecx,8), %bx
+        	movzbw	0xdeadbeef(%ebx,%ecx,8),%bx
+
+// CHECK: 	movzbw	69, %bx
+        	movzbw	0x45,%bx
+
+// CHECK: 	movzbw	32493, %bx
+        	movzbw	0x7eed,%bx
+
+// CHECK: 	movzbw	3133065982, %bx
+        	movzbw	0xbabecafe,%bx
+
+// CHECK: 	movzbw	305419896, %bx
+        	movzbw	0x12345678,%bx
+
+// CHECK: 	movzwl	3735928559(%ebx,%ecx,8), %ecx
+        	movzwl	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	movzwl	69, %ecx
+        	movzwl	0x45,%ecx
+
+// CHECK: 	movzwl	32493, %ecx
+        	movzwl	0x7eed,%ecx
+
+// CHECK: 	movzwl	3133065982, %ecx
+        	movzwl	0xbabecafe,%ecx
+
+// CHECK: 	movzwl	305419896, %ecx
+        	movzwl	0x12345678,%ecx
+
+// CHECK: 	pushw	32493
+        	pushw	0x7eed
+
+// CHECK: 	popw	32493
+        	popw	0x7eed
+
+// CHECK: 	pushf
+        	pushfl
+
+// CHECK: 	pushfl
+        	pushfl
+
+// CHECK: 	popf
+        	popfl
+
+// CHECK: 	popfl
+        	popfl
+
+// CHECK: 	clc
+        	clc
+
+// CHECK: 	cld
+        	cld
+
+// CHECK: 	cli
+        	cli
+
+// CHECK: 	clts
+        	clts
+
+// CHECK: 	cmc
+        	cmc
+
+// CHECK: 	lahf
+        	lahf
+
+// CHECK: 	sahf
+        	sahf
+
+// CHECK: 	stc
+        	stc
+
+// CHECK: 	std
+        	std
+
+// CHECK: 	sti
+        	sti
+
+// CHECK: 	addb	$254, 3735928559(%ebx,%ecx,8)
+        	addb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addb	$254, 69
+        	addb	$0xfe,0x45
+
+// CHECK: 	addb	$254, 32493
+        	addb	$0xfe,0x7eed
+
+// CHECK: 	addb	$254, 3133065982
+        	addb	$0xfe,0xbabecafe
+
+// CHECK: 	addb	$254, 305419896
+        	addb	$0xfe,0x12345678
+
+// CHECK: 	addb	$127, 3735928559(%ebx,%ecx,8)
+        	addb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addb	$127, 69
+        	addb	$0x7f,0x45
+
+// CHECK: 	addb	$127, 32493
+        	addb	$0x7f,0x7eed
+
+// CHECK: 	addb	$127, 3133065982
+        	addb	$0x7f,0xbabecafe
+
+// CHECK: 	addb	$127, 305419896
+        	addb	$0x7f,0x12345678
+
+// CHECK: 	addw	$31438, 3735928559(%ebx,%ecx,8)
+        	addw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addw	$31438, 69
+        	addw	$0x7ace,0x45
+
+// CHECK: 	addw	$31438, 32493
+        	addw	$0x7ace,0x7eed
+
+// CHECK: 	addw	$31438, 3133065982
+        	addw	$0x7ace,0xbabecafe
+
+// CHECK: 	addw	$31438, 305419896
+        	addw	$0x7ace,0x12345678
+
+// CHECK: 	addl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	addl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addl	$2063514302, 69
+        	addl	$0x7afebabe,0x45
+
+// CHECK: 	addl	$2063514302, 32493
+        	addl	$0x7afebabe,0x7eed
+
+// CHECK: 	addl	$2063514302, 3133065982
+        	addl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	addl	$2063514302, 305419896
+        	addl	$0x7afebabe,0x12345678
+
+// CHECK: 	addl	$324478056, 3735928559(%ebx,%ecx,8)
+        	addl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	addl	$324478056, 69
+        	addl	$0x13572468,0x45
+
+// CHECK: 	addl	$324478056, 32493
+        	addl	$0x13572468,0x7eed
+
+// CHECK: 	addl	$324478056, 3133065982
+        	addl	$0x13572468,0xbabecafe
+
+// CHECK: 	addl	$324478056, 305419896
+        	addl	$0x13572468,0x12345678
+
+// CHECK: 	incl	3735928559(%ebx,%ecx,8)
+        	incl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	incw	32493
+        	incw	0x7eed
+
+// CHECK: 	incl	3133065982
+        	incl	0xbabecafe
+
+// CHECK: 	incl	305419896
+        	incl	0x12345678
+
+// CHECK: 	subb	$254, 3735928559(%ebx,%ecx,8)
+        	subb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subb	$254, 69
+        	subb	$0xfe,0x45
+
+// CHECK: 	subb	$254, 32493
+        	subb	$0xfe,0x7eed
+
+// CHECK: 	subb	$254, 3133065982
+        	subb	$0xfe,0xbabecafe
+
+// CHECK: 	subb	$254, 305419896
+        	subb	$0xfe,0x12345678
+
+// CHECK: 	subb	$127, 3735928559(%ebx,%ecx,8)
+        	subb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subb	$127, 69
+        	subb	$0x7f,0x45
+
+// CHECK: 	subb	$127, 32493
+        	subb	$0x7f,0x7eed
+
+// CHECK: 	subb	$127, 3133065982
+        	subb	$0x7f,0xbabecafe
+
+// CHECK: 	subb	$127, 305419896
+        	subb	$0x7f,0x12345678
+
+// CHECK: 	subw	$31438, 3735928559(%ebx,%ecx,8)
+        	subw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subw	$31438, 69
+        	subw	$0x7ace,0x45
+
+// CHECK: 	subw	$31438, 32493
+        	subw	$0x7ace,0x7eed
+
+// CHECK: 	subw	$31438, 3133065982
+        	subw	$0x7ace,0xbabecafe
+
+// CHECK: 	subw	$31438, 305419896
+        	subw	$0x7ace,0x12345678
+
+// CHECK: 	subl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	subl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subl	$2063514302, 69
+        	subl	$0x7afebabe,0x45
+
+// CHECK: 	subl	$2063514302, 32493
+        	subl	$0x7afebabe,0x7eed
+
+// CHECK: 	subl	$2063514302, 3133065982
+        	subl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	subl	$2063514302, 305419896
+        	subl	$0x7afebabe,0x12345678
+
+// CHECK: 	subl	$324478056, 3735928559(%ebx,%ecx,8)
+        	subl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	subl	$324478056, 69
+        	subl	$0x13572468,0x45
+
+// CHECK: 	subl	$324478056, 32493
+        	subl	$0x13572468,0x7eed
+
+// CHECK: 	subl	$324478056, 3133065982
+        	subl	$0x13572468,0xbabecafe
+
+// CHECK: 	subl	$324478056, 305419896
+        	subl	$0x13572468,0x12345678
+
+// CHECK: 	decl	3735928559(%ebx,%ecx,8)
+        	decl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	decw	32493
+        	decw	0x7eed
+
+// CHECK: 	decl	3133065982
+        	decl	0xbabecafe
+
+// CHECK: 	decl	305419896
+        	decl	0x12345678
+
+// CHECK: 	sbbb	$254, 3735928559(%ebx,%ecx,8)
+        	sbbb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sbbb	$254, 69
+        	sbbb	$0xfe,0x45
+
+// CHECK: 	sbbb	$254, 32493
+        	sbbb	$0xfe,0x7eed
+
+// CHECK: 	sbbb	$254, 3133065982
+        	sbbb	$0xfe,0xbabecafe
+
+// CHECK: 	sbbb	$254, 305419896
+        	sbbb	$0xfe,0x12345678
+
+// CHECK: 	sbbb	$127, 3735928559(%ebx,%ecx,8)
+        	sbbb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sbbb	$127, 69
+        	sbbb	$0x7f,0x45
+
+// CHECK: 	sbbb	$127, 32493
+        	sbbb	$0x7f,0x7eed
+
+// CHECK: 	sbbb	$127, 3133065982
+        	sbbb	$0x7f,0xbabecafe
+
+// CHECK: 	sbbb	$127, 305419896
+        	sbbb	$0x7f,0x12345678
+
+// CHECK: 	sbbw	$31438, 3735928559(%ebx,%ecx,8)
+        	sbbw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sbbw	$31438, 69
+        	sbbw	$0x7ace,0x45
+
+// CHECK: 	sbbw	$31438, 32493
+        	sbbw	$0x7ace,0x7eed
+
+// CHECK: 	sbbw	$31438, 3133065982
+        	sbbw	$0x7ace,0xbabecafe
+
+// CHECK: 	sbbw	$31438, 305419896
+        	sbbw	$0x7ace,0x12345678
+
+// CHECK: 	sbbl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	sbbl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sbbl	$2063514302, 69
+        	sbbl	$0x7afebabe,0x45
+
+// CHECK: 	sbbl	$2063514302, 32493
+        	sbbl	$0x7afebabe,0x7eed
+
+// CHECK: 	sbbl	$2063514302, 3133065982
+        	sbbl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	sbbl	$2063514302, 305419896
+        	sbbl	$0x7afebabe,0x12345678
+
+// CHECK: 	sbbl	$324478056, 3735928559(%ebx,%ecx,8)
+        	sbbl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sbbl	$324478056, 69
+        	sbbl	$0x13572468,0x45
+
+// CHECK: 	sbbl	$324478056, 32493
+        	sbbl	$0x13572468,0x7eed
+
+// CHECK: 	sbbl	$324478056, 3133065982
+        	sbbl	$0x13572468,0xbabecafe
+
+// CHECK: 	sbbl	$324478056, 305419896
+        	sbbl	$0x13572468,0x12345678
+
+// CHECK: 	cmpb	$254, 3735928559(%ebx,%ecx,8)
+        	cmpb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpb	$254, 69
+        	cmpb	$0xfe,0x45
+
+// CHECK: 	cmpb	$254, 32493
+        	cmpb	$0xfe,0x7eed
+
+// CHECK: 	cmpb	$254, 3133065982
+        	cmpb	$0xfe,0xbabecafe
+
+// CHECK: 	cmpb	$254, 305419896
+        	cmpb	$0xfe,0x12345678
+
+// CHECK: 	cmpb	$127, 3735928559(%ebx,%ecx,8)
+        	cmpb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpb	$127, 69
+        	cmpb	$0x7f,0x45
+
+// CHECK: 	cmpb	$127, 32493
+        	cmpb	$0x7f,0x7eed
+
+// CHECK: 	cmpb	$127, 3133065982
+        	cmpb	$0x7f,0xbabecafe
+
+// CHECK: 	cmpb	$127, 305419896
+        	cmpb	$0x7f,0x12345678
+
+// CHECK: 	cmpw	$31438, 3735928559(%ebx,%ecx,8)
+        	cmpw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpw	$31438, 69
+        	cmpw	$0x7ace,0x45
+
+// CHECK: 	cmpw	$31438, 32493
+        	cmpw	$0x7ace,0x7eed
+
+// CHECK: 	cmpw	$31438, 3133065982
+        	cmpw	$0x7ace,0xbabecafe
+
+// CHECK: 	cmpw	$31438, 305419896
+        	cmpw	$0x7ace,0x12345678
+
+// CHECK: 	cmpl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	cmpl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpl	$2063514302, 69
+        	cmpl	$0x7afebabe,0x45
+
+// CHECK: 	cmpl	$2063514302, 32493
+        	cmpl	$0x7afebabe,0x7eed
+
+// CHECK: 	cmpl	$2063514302, 3133065982
+        	cmpl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	cmpl	$2063514302, 305419896
+        	cmpl	$0x7afebabe,0x12345678
+
+// CHECK: 	cmpl	$324478056, 3735928559(%ebx,%ecx,8)
+        	cmpl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpl	$324478056, 69
+        	cmpl	$0x13572468,0x45
+
+// CHECK: 	cmpl	$324478056, 32493
+        	cmpl	$0x13572468,0x7eed
+
+// CHECK: 	cmpl	$324478056, 3133065982
+        	cmpl	$0x13572468,0xbabecafe
+
+// CHECK: 	cmpl	$324478056, 305419896
+        	cmpl	$0x13572468,0x12345678
+
+// CHECK: 	testb	$127, 3735928559(%ebx,%ecx,8)
+        	testb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	testb	$127, 69
+        	testb	$0x7f,0x45
+
+// CHECK: 	testb	$127, 32493
+        	testb	$0x7f,0x7eed
+
+// CHECK: 	testb	$127, 3133065982
+        	testb	$0x7f,0xbabecafe
+
+// CHECK: 	testb	$127, 305419896
+        	testb	$0x7f,0x12345678
+
+// CHECK: 	testw	$31438, 3735928559(%ebx,%ecx,8)
+        	testw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	testw	$31438, 69
+        	testw	$0x7ace,0x45
+
+// CHECK: 	testw	$31438, 32493
+        	testw	$0x7ace,0x7eed
+
+// CHECK: 	testw	$31438, 3133065982
+        	testw	$0x7ace,0xbabecafe
+
+// CHECK: 	testw	$31438, 305419896
+        	testw	$0x7ace,0x12345678
+
+// CHECK: 	testl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	testl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	testl	$2063514302, 69
+        	testl	$0x7afebabe,0x45
+
+// CHECK: 	testl	$2063514302, 32493
+        	testl	$0x7afebabe,0x7eed
+
+// CHECK: 	testl	$2063514302, 3133065982
+        	testl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	testl	$2063514302, 305419896
+        	testl	$0x7afebabe,0x12345678
+
+// CHECK: 	testl	$324478056, 3735928559(%ebx,%ecx,8)
+        	testl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	testl	$324478056, 69
+        	testl	$0x13572468,0x45
+
+// CHECK: 	testl	$324478056, 32493
+        	testl	$0x13572468,0x7eed
+
+// CHECK: 	testl	$324478056, 3133065982
+        	testl	$0x13572468,0xbabecafe
+
+// CHECK: 	testl	$324478056, 305419896
+        	testl	$0x13572468,0x12345678
+
+// CHECK: 	andb	$254, 3735928559(%ebx,%ecx,8)
+        	andb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andb	$254, 69
+        	andb	$0xfe,0x45
+
+// CHECK: 	andb	$254, 32493
+        	andb	$0xfe,0x7eed
+
+// CHECK: 	andb	$254, 3133065982
+        	andb	$0xfe,0xbabecafe
+
+// CHECK: 	andb	$254, 305419896
+        	andb	$0xfe,0x12345678
+
+// CHECK: 	andb	$127, 3735928559(%ebx,%ecx,8)
+        	andb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andb	$127, 69
+        	andb	$0x7f,0x45
+
+// CHECK: 	andb	$127, 32493
+        	andb	$0x7f,0x7eed
+
+// CHECK: 	andb	$127, 3133065982
+        	andb	$0x7f,0xbabecafe
+
+// CHECK: 	andb	$127, 305419896
+        	andb	$0x7f,0x12345678
+
+// CHECK: 	andw	$31438, 3735928559(%ebx,%ecx,8)
+        	andw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andw	$31438, 69
+        	andw	$0x7ace,0x45
+
+// CHECK: 	andw	$31438, 32493
+        	andw	$0x7ace,0x7eed
+
+// CHECK: 	andw	$31438, 3133065982
+        	andw	$0x7ace,0xbabecafe
+
+// CHECK: 	andw	$31438, 305419896
+        	andw	$0x7ace,0x12345678
+
+// CHECK: 	andl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	andl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andl	$2063514302, 69
+        	andl	$0x7afebabe,0x45
+
+// CHECK: 	andl	$2063514302, 32493
+        	andl	$0x7afebabe,0x7eed
+
+// CHECK: 	andl	$2063514302, 3133065982
+        	andl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	andl	$2063514302, 305419896
+        	andl	$0x7afebabe,0x12345678
+
+// CHECK: 	andl	$324478056, 3735928559(%ebx,%ecx,8)
+        	andl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	andl	$324478056, 69
+        	andl	$0x13572468,0x45
+
+// CHECK: 	andl	$324478056, 32493
+        	andl	$0x13572468,0x7eed
+
+// CHECK: 	andl	$324478056, 3133065982
+        	andl	$0x13572468,0xbabecafe
+
+// CHECK: 	andl	$324478056, 305419896
+        	andl	$0x13572468,0x12345678
+
+// CHECK: 	orb	$254, 3735928559(%ebx,%ecx,8)
+        	orb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orb	$254, 69
+        	orb	$0xfe,0x45
+
+// CHECK: 	orb	$254, 32493
+        	orb	$0xfe,0x7eed
+
+// CHECK: 	orb	$254, 3133065982
+        	orb	$0xfe,0xbabecafe
+
+// CHECK: 	orb	$254, 305419896
+        	orb	$0xfe,0x12345678
+
+// CHECK: 	orb	$127, 3735928559(%ebx,%ecx,8)
+        	orb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orb	$127, 69
+        	orb	$0x7f,0x45
+
+// CHECK: 	orb	$127, 32493
+        	orb	$0x7f,0x7eed
+
+// CHECK: 	orb	$127, 3133065982
+        	orb	$0x7f,0xbabecafe
+
+// CHECK: 	orb	$127, 305419896
+        	orb	$0x7f,0x12345678
+
+// CHECK: 	orw	$31438, 3735928559(%ebx,%ecx,8)
+        	orw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orw	$31438, 69
+        	orw	$0x7ace,0x45
+
+// CHECK: 	orw	$31438, 32493
+        	orw	$0x7ace,0x7eed
+
+// CHECK: 	orw	$31438, 3133065982
+        	orw	$0x7ace,0xbabecafe
+
+// CHECK: 	orw	$31438, 305419896
+        	orw	$0x7ace,0x12345678
+
+// CHECK: 	orl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	orl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orl	$2063514302, 69
+        	orl	$0x7afebabe,0x45
+
+// CHECK: 	orl	$2063514302, 32493
+        	orl	$0x7afebabe,0x7eed
+
+// CHECK: 	orl	$2063514302, 3133065982
+        	orl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	orl	$2063514302, 305419896
+        	orl	$0x7afebabe,0x12345678
+
+// CHECK: 	orl	$324478056, 3735928559(%ebx,%ecx,8)
+        	orl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	orl	$324478056, 69
+        	orl	$0x13572468,0x45
+
+// CHECK: 	orl	$324478056, 32493
+        	orl	$0x13572468,0x7eed
+
+// CHECK: 	orl	$324478056, 3133065982
+        	orl	$0x13572468,0xbabecafe
+
+// CHECK: 	orl	$324478056, 305419896
+        	orl	$0x13572468,0x12345678
+
+// CHECK: 	xorb	$254, 3735928559(%ebx,%ecx,8)
+        	xorb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorb	$254, 69
+        	xorb	$0xfe,0x45
+
+// CHECK: 	xorb	$254, 32493
+        	xorb	$0xfe,0x7eed
+
+// CHECK: 	xorb	$254, 3133065982
+        	xorb	$0xfe,0xbabecafe
+
+// CHECK: 	xorb	$254, 305419896
+        	xorb	$0xfe,0x12345678
+
+// CHECK: 	xorb	$127, 3735928559(%ebx,%ecx,8)
+        	xorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorb	$127, 69
+        	xorb	$0x7f,0x45
+
+// CHECK: 	xorb	$127, 32493
+        	xorb	$0x7f,0x7eed
+
+// CHECK: 	xorb	$127, 3133065982
+        	xorb	$0x7f,0xbabecafe
+
+// CHECK: 	xorb	$127, 305419896
+        	xorb	$0x7f,0x12345678
+
+// CHECK: 	xorw	$31438, 3735928559(%ebx,%ecx,8)
+        	xorw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorw	$31438, 69
+        	xorw	$0x7ace,0x45
+
+// CHECK: 	xorw	$31438, 32493
+        	xorw	$0x7ace,0x7eed
+
+// CHECK: 	xorw	$31438, 3133065982
+        	xorw	$0x7ace,0xbabecafe
+
+// CHECK: 	xorw	$31438, 305419896
+        	xorw	$0x7ace,0x12345678
+
+// CHECK: 	xorl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	xorl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorl	$2063514302, 69
+        	xorl	$0x7afebabe,0x45
+
+// CHECK: 	xorl	$2063514302, 32493
+        	xorl	$0x7afebabe,0x7eed
+
+// CHECK: 	xorl	$2063514302, 3133065982
+        	xorl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	xorl	$2063514302, 305419896
+        	xorl	$0x7afebabe,0x12345678
+
+// CHECK: 	xorl	$324478056, 3735928559(%ebx,%ecx,8)
+        	xorl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	xorl	$324478056, 69
+        	xorl	$0x13572468,0x45
+
+// CHECK: 	xorl	$324478056, 32493
+        	xorl	$0x13572468,0x7eed
+
+// CHECK: 	xorl	$324478056, 3133065982
+        	xorl	$0x13572468,0xbabecafe
+
+// CHECK: 	xorl	$324478056, 305419896
+        	xorl	$0x13572468,0x12345678
+
+// CHECK: 	adcb	$254, 3735928559(%ebx,%ecx,8)
+        	adcb	$0xfe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcb	$254, 69
+        	adcb	$0xfe,0x45
+
+// CHECK: 	adcb	$254, 32493
+        	adcb	$0xfe,0x7eed
+
+// CHECK: 	adcb	$254, 3133065982
+        	adcb	$0xfe,0xbabecafe
+
+// CHECK: 	adcb	$254, 305419896
+        	adcb	$0xfe,0x12345678
+
+// CHECK: 	adcb	$127, 3735928559(%ebx,%ecx,8)
+        	adcb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcb	$127, 69
+        	adcb	$0x7f,0x45
+
+// CHECK: 	adcb	$127, 32493
+        	adcb	$0x7f,0x7eed
+
+// CHECK: 	adcb	$127, 3133065982
+        	adcb	$0x7f,0xbabecafe
+
+// CHECK: 	adcb	$127, 305419896
+        	adcb	$0x7f,0x12345678
+
+// CHECK: 	adcw	$31438, 3735928559(%ebx,%ecx,8)
+        	adcw	$0x7ace,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcw	$31438, 69
+        	adcw	$0x7ace,0x45
+
+// CHECK: 	adcw	$31438, 32493
+        	adcw	$0x7ace,0x7eed
+
+// CHECK: 	adcw	$31438, 3133065982
+        	adcw	$0x7ace,0xbabecafe
+
+// CHECK: 	adcw	$31438, 305419896
+        	adcw	$0x7ace,0x12345678
+
+// CHECK: 	adcl	$2063514302, 3735928559(%ebx,%ecx,8)
+        	adcl	$0x7afebabe,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcl	$2063514302, 69
+        	adcl	$0x7afebabe,0x45
+
+// CHECK: 	adcl	$2063514302, 32493
+        	adcl	$0x7afebabe,0x7eed
+
+// CHECK: 	adcl	$2063514302, 3133065982
+        	adcl	$0x7afebabe,0xbabecafe
+
+// CHECK: 	adcl	$2063514302, 305419896
+        	adcl	$0x7afebabe,0x12345678
+
+// CHECK: 	adcl	$324478056, 3735928559(%ebx,%ecx,8)
+        	adcl	$0x13572468,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	adcl	$324478056, 69
+        	adcl	$0x13572468,0x45
+
+// CHECK: 	adcl	$324478056, 32493
+        	adcl	$0x13572468,0x7eed
+
+// CHECK: 	adcl	$324478056, 3133065982
+        	adcl	$0x13572468,0xbabecafe
+
+// CHECK: 	adcl	$324478056, 305419896
+        	adcl	$0x13572468,0x12345678
+
+// CHECK: 	negl	3735928559(%ebx,%ecx,8)
+        	negl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	negw	32493
+        	negw	0x7eed
+
+// CHECK: 	negl	3133065982
+        	negl	0xbabecafe
+
+// CHECK: 	negl	305419896
+        	negl	0x12345678
+
+// CHECK: 	notl	3735928559(%ebx,%ecx,8)
+        	notl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	notw	32493
+        	notw	0x7eed
+
+// CHECK: 	notl	3133065982
+        	notl	0xbabecafe
+
+// CHECK: 	notl	305419896
+        	notl	0x12345678
+
+// CHECK: 	cbtw
+        	cbtw
+
+// CHECK: 	cwtl
+        	cwtl
+
+// CHECK: 	cwtd
+        	cwtd
+
+// CHECK: 	cltd
+        	cltd
+
+// CHECK: 	mull	3735928559(%ebx,%ecx,8)
+        	mull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	mulw	32493
+        	mulw	0x7eed
+
+// CHECK: 	mull	3133065982
+        	mull	0xbabecafe
+
+// CHECK: 	mull	305419896
+        	mull	0x12345678
+
+// CHECK: 	imull	3735928559(%ebx,%ecx,8)
+        	imull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	imulw	32493
+        	imulw	0x7eed
+
+// CHECK: 	imull	3133065982
+        	imull	0xbabecafe
+
+// CHECK: 	imull	305419896
+        	imull	0x12345678
+
+// CHECK: 	divl	3735928559(%ebx,%ecx,8)
+        	divl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	divw	32493
+        	divw	0x7eed
+
+// CHECK: 	divl	3133065982
+        	divl	0xbabecafe
+
+// CHECK: 	divl	305419896
+        	divl	0x12345678
+
+// CHECK: 	idivl	3735928559(%ebx,%ecx,8)
+        	idivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	idivw	32493
+        	idivw	0x7eed
+
+// CHECK: 	idivl	3133065982
+        	idivl	0xbabecafe
+
+// CHECK: 	idivl	305419896
+        	idivl	0x12345678
+
+// CHECK: 	roll	$0, 3735928559(%ebx,%ecx,8)
+        	roll	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	roll	$0, 69
+        	roll	$0,0x45
+
+// CHECK: 	roll	$0, 32493
+        	roll	$0,0x7eed
+
+// CHECK: 	roll	$0, 3133065982
+        	roll	$0,0xbabecafe
+
+// CHECK: 	roll	$0, 305419896
+        	roll	$0,0x12345678
+
+// CHECK: 	rolb	$127, 3735928559(%ebx,%ecx,8)
+        	rolb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rolb	$127, 69
+        	rolb	$0x7f,0x45
+
+// CHECK: 	rolb	$127, 32493
+        	rolb	$0x7f,0x7eed
+
+// CHECK: 	rolb	$127, 3133065982
+        	rolb	$0x7f,0xbabecafe
+
+// CHECK: 	rolb	$127, 305419896
+        	rolb	$0x7f,0x12345678
+
+// CHECK: 	roll	3735928559(%ebx,%ecx,8)
+        	roll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rolw	32493
+        	rolw	0x7eed
+
+// CHECK: 	roll	3133065982
+        	roll	0xbabecafe
+
+// CHECK: 	roll	305419896
+        	roll	0x12345678
+
+// CHECK: 	rorl	$0, 3735928559(%ebx,%ecx,8)
+        	rorl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rorl	$0, 69
+        	rorl	$0,0x45
+
+// CHECK: 	rorl	$0, 32493
+        	rorl	$0,0x7eed
+
+// CHECK: 	rorl	$0, 3133065982
+        	rorl	$0,0xbabecafe
+
+// CHECK: 	rorl	$0, 305419896
+        	rorl	$0,0x12345678
+
+// CHECK: 	rorb	$127, 3735928559(%ebx,%ecx,8)
+        	rorb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rorb	$127, 69
+        	rorb	$0x7f,0x45
+
+// CHECK: 	rorb	$127, 32493
+        	rorb	$0x7f,0x7eed
+
+// CHECK: 	rorb	$127, 3133065982
+        	rorb	$0x7f,0xbabecafe
+
+// CHECK: 	rorb	$127, 305419896
+        	rorb	$0x7f,0x12345678
+
+// CHECK: 	rorl	3735928559(%ebx,%ecx,8)
+        	rorl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rorw	32493
+        	rorw	0x7eed
+
+// CHECK: 	rorl	3133065982
+        	rorl	0xbabecafe
+
+// CHECK: 	rorl	305419896
+        	rorl	0x12345678
+
+// CHECK: 	rcll	$0, 3735928559(%ebx,%ecx,8)
+        	rcll	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rcll	$0, 69
+        	rcll	$0,0x45
+
+// CHECK: 	rcll	$0, 32493
+        	rcll	$0,0x7eed
+
+// CHECK: 	rcll	$0, 3133065982
+        	rcll	$0,0xbabecafe
+
+// CHECK: 	rcll	$0, 305419896
+        	rcll	$0,0x12345678
+
+// CHECK: 	rclb	$127, 3735928559(%ebx,%ecx,8)
+        	rclb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rclb	$127, 69
+        	rclb	$0x7f,0x45
+
+// CHECK: 	rclb	$127, 32493
+        	rclb	$0x7f,0x7eed
+
+// CHECK: 	rclb	$127, 3133065982
+        	rclb	$0x7f,0xbabecafe
+
+// CHECK: 	rclb	$127, 305419896
+        	rclb	$0x7f,0x12345678
+
+// CHECK: 	rcrl	$0, 3735928559(%ebx,%ecx,8)
+        	rcrl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rcrl	$0, 69
+        	rcrl	$0,0x45
+
+// CHECK: 	rcrl	$0, 32493
+        	rcrl	$0,0x7eed
+
+// CHECK: 	rcrl	$0, 3133065982
+        	rcrl	$0,0xbabecafe
+
+// CHECK: 	rcrl	$0, 305419896
+        	rcrl	$0,0x12345678
+
+// CHECK: 	rcrb	$127, 3735928559(%ebx,%ecx,8)
+        	rcrb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	rcrb	$127, 69
+        	rcrb	$0x7f,0x45
+
+// CHECK: 	rcrb	$127, 32493
+        	rcrb	$0x7f,0x7eed
+
+// CHECK: 	rcrb	$127, 3133065982
+        	rcrb	$0x7f,0xbabecafe
+
+// CHECK: 	rcrb	$127, 305419896
+        	rcrb	$0x7f,0x12345678
+
+// CHECK: 	shll	$0, 3735928559(%ebx,%ecx,8)
+        	sall	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shll	$0, 69
+        	sall	$0,0x45
+
+// CHECK: 	shll	$0, 32493
+        	sall	$0,0x7eed
+
+// CHECK: 	shll	$0, 3133065982
+        	sall	$0,0xbabecafe
+
+// CHECK: 	shll	$0, 305419896
+        	sall	$0,0x12345678
+
+// CHECK: 	shlb	$127, 3735928559(%ebx,%ecx,8)
+        	salb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shlb	$127, 69
+        	salb	$0x7f,0x45
+
+// CHECK: 	shlb	$127, 32493
+        	salb	$0x7f,0x7eed
+
+// CHECK: 	shlb	$127, 3133065982
+        	salb	$0x7f,0xbabecafe
+
+// CHECK: 	shlb	$127, 305419896
+        	salb	$0x7f,0x12345678
+
+// CHECK: 	shll	3735928559(%ebx,%ecx,8)
+        	sall	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shlw	32493
+        	salw	0x7eed
+
+// CHECK: 	shll	3133065982
+        	sall	0xbabecafe
+
+// CHECK: 	shll	305419896
+        	sall	0x12345678
+
+// CHECK: 	shll	$0, 3735928559(%ebx,%ecx,8)
+        	shll	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shll	$0, 69
+        	shll	$0,0x45
+
+// CHECK: 	shll	$0, 32493
+        	shll	$0,0x7eed
+
+// CHECK: 	shll	$0, 3133065982
+        	shll	$0,0xbabecafe
+
+// CHECK: 	shll	$0, 305419896
+        	shll	$0,0x12345678
+
+// CHECK: 	shlb	$127, 3735928559(%ebx,%ecx,8)
+        	shlb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shlb	$127, 69
+        	shlb	$0x7f,0x45
+
+// CHECK: 	shlb	$127, 32493
+        	shlb	$0x7f,0x7eed
+
+// CHECK: 	shlb	$127, 3133065982
+        	shlb	$0x7f,0xbabecafe
+
+// CHECK: 	shlb	$127, 305419896
+        	shlb	$0x7f,0x12345678
+
+// CHECK: 	shll	3735928559(%ebx,%ecx,8)
+        	shll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shlw	32493
+        	shlw	0x7eed
+
+// CHECK: 	shll	3133065982
+        	shll	0xbabecafe
+
+// CHECK: 	shll	305419896
+        	shll	0x12345678
+
+// CHECK: 	shrl	$0, 3735928559(%ebx,%ecx,8)
+        	shrl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shrl	$0, 69
+        	shrl	$0,0x45
+
+// CHECK: 	shrl	$0, 32493
+        	shrl	$0,0x7eed
+
+// CHECK: 	shrl	$0, 3133065982
+        	shrl	$0,0xbabecafe
+
+// CHECK: 	shrl	$0, 305419896
+        	shrl	$0,0x12345678
+
+// CHECK: 	shrb	$127, 3735928559(%ebx,%ecx,8)
+        	shrb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shrb	$127, 69
+        	shrb	$0x7f,0x45
+
+// CHECK: 	shrb	$127, 32493
+        	shrb	$0x7f,0x7eed
+
+// CHECK: 	shrb	$127, 3133065982
+        	shrb	$0x7f,0xbabecafe
+
+// CHECK: 	shrb	$127, 305419896
+        	shrb	$0x7f,0x12345678
+
+// CHECK: 	shrl	3735928559(%ebx,%ecx,8)
+        	shrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	shrw	32493
+        	shrw	0x7eed
+
+// CHECK: 	shrl	3133065982
+        	shrl	0xbabecafe
+
+// CHECK: 	shrl	305419896
+        	shrl	0x12345678
+
+// CHECK: 	sarl	$0, 3735928559(%ebx,%ecx,8)
+        	sarl	$0,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sarl	$0, 69
+        	sarl	$0,0x45
+
+// CHECK: 	sarl	$0, 32493
+        	sarl	$0,0x7eed
+
+// CHECK: 	sarl	$0, 3133065982
+        	sarl	$0,0xbabecafe
+
+// CHECK: 	sarl	$0, 305419896
+        	sarl	$0,0x12345678
+
+// CHECK: 	sarb	$127, 3735928559(%ebx,%ecx,8)
+        	sarb	$0x7f,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sarb	$127, 69
+        	sarb	$0x7f,0x45
+
+// CHECK: 	sarb	$127, 32493
+        	sarb	$0x7f,0x7eed
+
+// CHECK: 	sarb	$127, 3133065982
+        	sarb	$0x7f,0xbabecafe
+
+// CHECK: 	sarb	$127, 305419896
+        	sarb	$0x7f,0x12345678
+
+// CHECK: 	sarl	3735928559(%ebx,%ecx,8)
+        	sarl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sarw	32493
+        	sarw	0x7eed
+
+// CHECK: 	sarl	3133065982
+        	sarl	0xbabecafe
+
+// CHECK: 	sarl	305419896
+        	sarl	0x12345678
+
+// CHECK: 	calll	3133065982
+        	calll	0xbabecafe
+
+// CHECK: 	calll	*3735928559(%ebx,%ecx,8)
+        	calll	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	calll	3133065982
+        	calll	0xbabecafe
+
+// CHECK: 	calll	305419896
+        	calll	0x12345678
+
+// CHECK: 	calll	*3135175374
+        	call	*0xbadeface
+
+// CHECK: 	calll	*3735928559(%ebx,%ecx,8)
+        	call	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	calll	32493
+        	call	0x7eed
+
+// CHECK: 	calll	3133065982
+        	call	0xbabecafe
+
+// CHECK: 	calll	305419896
+        	call	0x12345678
+
+// CHECK: 	calll	*3135175374
+        	call	*0xbadeface
+
+// CHECK: 	lcallw	*32493
+        	lcallw	*0x7eed
+
+// CHECK: 	jmp	32493
+        	jmp	0x7eed
+
+// CHECK: 	jmp	3133065982
+        	jmp	0xbabecafe
+
+// CHECK: 	jmp	305419896
+        	jmp	0x12345678
+
+// CHECK: 	jmp	-77129852792157442
+        	jmp	0xfeedfacebabecafe
+
+// CHECK: 	jmpl	*3735928559(%ebx,%ecx,8)
+        	jmp	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	jmp	32493
+        	jmp	0x7eed
+
+// CHECK: 	jmp	3133065982
+        	jmp	0xbabecafe
+
+// CHECK: 	jmp	305419896
+        	jmp	0x12345678
+
+// CHECK: 	jmpl	*3135175374
+        	jmp	*0xbadeface
+
+// CHECK: 	jmpl	*3735928559(%ebx,%ecx,8)
+        	jmp	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	jmp	32493
+        	jmp	0x7eed
+
+// CHECK: 	jmp	3133065982
+        	jmp	0xbabecafe
+
+// CHECK: 	jmp	305419896
+        	jmp	0x12345678
+
+// CHECK: 	jmpl	*3135175374
+        	jmp	*0xbadeface
+
+// CHECK: 	ljmpl	*3735928559(%ebx,%ecx,8)
+        	ljmpl	*0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	ljmpw	*32493
+        	ljmpw	*0x7eed
+
+// CHECK: 	ljmpl	*3133065982
+        	ljmpl	*0xbabecafe
+
+// CHECK: 	ljmpl	*305419896
+        	ljmpl	*0x12345678
+
+// CHECK: 	ret
+        	ret
+
+// CHECK: 	lret
+        	lret
+
+// CHECK: 	enter	$31438, $127
+        	enter	$0x7ace,$0x7f
+
+// CHECK: 	leave
+        	leave
+
+// CHECK: 	jo	32493
+        	jo	0x7eed
+
+// CHECK: 	jo	3133065982
+        	jo	0xbabecafe
+
+// CHECK: 	jo	305419896
+        	jo	0x12345678
+
+// CHECK: 	jo	-77129852792157442
+        	jo	0xfeedfacebabecafe
+
+// CHECK: 	jno	32493
+        	jno	0x7eed
+
+// CHECK: 	jno	3133065982
+        	jno	0xbabecafe
+
+// CHECK: 	jno	305419896
+        	jno	0x12345678
+
+// CHECK: 	jno	-77129852792157442
+        	jno	0xfeedfacebabecafe
+
+// CHECK: 	jb	32493
+        	jb	0x7eed
+
+// CHECK: 	jb	3133065982
+        	jb	0xbabecafe
+
+// CHECK: 	jb	305419896
+        	jb	0x12345678
+
+// CHECK: 	jb	-77129852792157442
+        	jb	0xfeedfacebabecafe
+
+// CHECK: 	jae	32493
+        	jae	0x7eed
+
+// CHECK: 	jae	3133065982
+        	jae	0xbabecafe
+
+// CHECK: 	jae	305419896
+        	jae	0x12345678
+
+// CHECK: 	jae	-77129852792157442
+        	jae	0xfeedfacebabecafe
+
+// CHECK: 	je	32493
+        	je	0x7eed
+
+// CHECK: 	je	3133065982
+        	je	0xbabecafe
+
+// CHECK: 	je	305419896
+        	je	0x12345678
+
+// CHECK: 	je	-77129852792157442
+        	je	0xfeedfacebabecafe
+
+// CHECK: 	jne	32493
+        	jne	0x7eed
+
+// CHECK: 	jne	3133065982
+        	jne	0xbabecafe
+
+// CHECK: 	jne	305419896
+        	jne	0x12345678
+
+// CHECK: 	jne	-77129852792157442
+        	jne	0xfeedfacebabecafe
+
+// CHECK: 	jbe	32493
+        	jbe	0x7eed
+
+// CHECK: 	jbe	3133065982
+        	jbe	0xbabecafe
+
+// CHECK: 	jbe	305419896
+        	jbe	0x12345678
+
+// CHECK: 	jbe	-77129852792157442
+        	jbe	0xfeedfacebabecafe
+
+// CHECK: 	ja	32493
+        	ja	0x7eed
+
+// CHECK: 	ja	3133065982
+        	ja	0xbabecafe
+
+// CHECK: 	ja	305419896
+        	ja	0x12345678
+
+// CHECK: 	ja	-77129852792157442
+        	ja	0xfeedfacebabecafe
+
+// CHECK: 	js	32493
+        	js	0x7eed
+
+// CHECK: 	js	3133065982
+        	js	0xbabecafe
+
+// CHECK: 	js	305419896
+        	js	0x12345678
+
+// CHECK: 	js	-77129852792157442
+        	js	0xfeedfacebabecafe
+
+// CHECK: 	jns	32493
+        	jns	0x7eed
+
+// CHECK: 	jns	3133065982
+        	jns	0xbabecafe
+
+// CHECK: 	jns	305419896
+        	jns	0x12345678
+
+// CHECK: 	jns	-77129852792157442
+        	jns	0xfeedfacebabecafe
+
+// CHECK: 	jp	32493
+        	jp	0x7eed
+
+// CHECK: 	jp	3133065982
+        	jp	0xbabecafe
+
+// CHECK: 	jp	305419896
+        	jp	0x12345678
+
+// CHECK: 	jp	-77129852792157442
+        	jp	0xfeedfacebabecafe
+
+// CHECK: 	jnp	32493
+        	jnp	0x7eed
+
+// CHECK: 	jnp	3133065982
+        	jnp	0xbabecafe
+
+// CHECK: 	jnp	305419896
+        	jnp	0x12345678
+
+// CHECK: 	jnp	-77129852792157442
+        	jnp	0xfeedfacebabecafe
+
+// CHECK: 	jl	32493
+        	jl	0x7eed
+
+// CHECK: 	jl	3133065982
+        	jl	0xbabecafe
+
+// CHECK: 	jl	305419896
+        	jl	0x12345678
+
+// CHECK: 	jl	-77129852792157442
+        	jl	0xfeedfacebabecafe
+
+// CHECK: 	jge	32493
+        	jge	0x7eed
+
+// CHECK: 	jge	3133065982
+        	jge	0xbabecafe
+
+// CHECK: 	jge	305419896
+        	jge	0x12345678
+
+// CHECK: 	jge	-77129852792157442
+        	jge	0xfeedfacebabecafe
+
+// CHECK: 	jle	32493
+        	jle	0x7eed
+
+// CHECK: 	jle	3133065982
+        	jle	0xbabecafe
+
+// CHECK: 	jle	305419896
+        	jle	0x12345678
+
+// CHECK: 	jle	-77129852792157442
+        	jle	0xfeedfacebabecafe
+
+// CHECK: 	jg	32493
+        	jg	0x7eed
+
+// CHECK: 	jg	3133065982
+        	jg	0xbabecafe
+
+// CHECK: 	jg	305419896
+        	jg	0x12345678
+
+// CHECK: 	jg	-77129852792157442
+        	jg	0xfeedfacebabecafe
+
+// CHECK: 	seto	%bl
+        	seto	%bl
+
+// CHECK: 	seto	3735928559(%ebx,%ecx,8)
+        	seto	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	seto	32493
+        	seto	0x7eed
+
+// CHECK: 	seto	3133065982
+        	seto	0xbabecafe
+
+// CHECK: 	seto	305419896
+        	seto	0x12345678
+
+// CHECK: 	setno	%bl
+        	setno	%bl
+
+// CHECK: 	setno	3735928559(%ebx,%ecx,8)
+        	setno	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setno	32493
+        	setno	0x7eed
+
+// CHECK: 	setno	3133065982
+        	setno	0xbabecafe
+
+// CHECK: 	setno	305419896
+        	setno	0x12345678
+
+// CHECK: 	setb	%bl
+        	setb	%bl
+
+// CHECK: 	setb	3735928559(%ebx,%ecx,8)
+        	setb	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setb	32493
+        	setb	0x7eed
+
+// CHECK: 	setb	3133065982
+        	setb	0xbabecafe
+
+// CHECK: 	setb	305419896
+        	setb	0x12345678
+
+// CHECK: 	setae	%bl
+        	setae	%bl
+
+// CHECK: 	setae	3735928559(%ebx,%ecx,8)
+        	setae	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setae	32493
+        	setae	0x7eed
+
+// CHECK: 	setae	3133065982
+        	setae	0xbabecafe
+
+// CHECK: 	setae	305419896
+        	setae	0x12345678
+
+// CHECK: 	sete	%bl
+        	sete	%bl
+
+// CHECK: 	sete	3735928559(%ebx,%ecx,8)
+        	sete	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sete	32493
+        	sete	0x7eed
+
+// CHECK: 	sete	3133065982
+        	sete	0xbabecafe
+
+// CHECK: 	sete	305419896
+        	sete	0x12345678
+
+// CHECK: 	setne	%bl
+        	setne	%bl
+
+// CHECK: 	setne	3735928559(%ebx,%ecx,8)
+        	setne	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setne	32493
+        	setne	0x7eed
+
+// CHECK: 	setne	3133065982
+        	setne	0xbabecafe
+
+// CHECK: 	setne	305419896
+        	setne	0x12345678
+
+// CHECK: 	setbe	%bl
+        	setbe	%bl
+
+// CHECK: 	setbe	3735928559(%ebx,%ecx,8)
+        	setbe	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setbe	32493
+        	setbe	0x7eed
+
+// CHECK: 	setbe	3133065982
+        	setbe	0xbabecafe
+
+// CHECK: 	setbe	305419896
+        	setbe	0x12345678
+
+// CHECK: 	seta	%bl
+        	seta	%bl
+
+// CHECK: 	seta	3735928559(%ebx,%ecx,8)
+        	seta	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	seta	32493
+        	seta	0x7eed
+
+// CHECK: 	seta	3133065982
+        	seta	0xbabecafe
+
+// CHECK: 	seta	305419896
+        	seta	0x12345678
+
+// CHECK: 	sets	%bl
+        	sets	%bl
+
+// CHECK: 	sets	3735928559(%ebx,%ecx,8)
+        	sets	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	sets	32493
+        	sets	0x7eed
+
+// CHECK: 	sets	3133065982
+        	sets	0xbabecafe
+
+// CHECK: 	sets	305419896
+        	sets	0x12345678
+
+// CHECK: 	setns	%bl
+        	setns	%bl
+
+// CHECK: 	setns	3735928559(%ebx,%ecx,8)
+        	setns	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setns	32493
+        	setns	0x7eed
+
+// CHECK: 	setns	3133065982
+        	setns	0xbabecafe
+
+// CHECK: 	setns	305419896
+        	setns	0x12345678
+
+// CHECK: 	setp	%bl
+        	setp	%bl
+
+// CHECK: 	setp	3735928559(%ebx,%ecx,8)
+        	setp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setp	32493
+        	setp	0x7eed
+
+// CHECK: 	setp	3133065982
+        	setp	0xbabecafe
+
+// CHECK: 	setp	305419896
+        	setp	0x12345678
+
+// CHECK: 	setnp	%bl
+        	setnp	%bl
+
+// CHECK: 	setnp	3735928559(%ebx,%ecx,8)
+        	setnp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setnp	32493
+        	setnp	0x7eed
+
+// CHECK: 	setnp	3133065982
+        	setnp	0xbabecafe
+
+// CHECK: 	setnp	305419896
+        	setnp	0x12345678
+
+// CHECK: 	setl	%bl
+        	setl	%bl
+
+// CHECK: 	setl	3735928559(%ebx,%ecx,8)
+        	setl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setl	32493
+        	setl	0x7eed
+
+// CHECK: 	setl	3133065982
+        	setl	0xbabecafe
+
+// CHECK: 	setl	305419896
+        	setl	0x12345678
+
+// CHECK: 	setge	%bl
+        	setge	%bl
+
+// CHECK: 	setge	3735928559(%ebx,%ecx,8)
+        	setge	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setge	32493
+        	setge	0x7eed
+
+// CHECK: 	setge	3133065982
+        	setge	0xbabecafe
+
+// CHECK: 	setge	305419896
+        	setge	0x12345678
+
+// CHECK: 	setle	%bl
+        	setle	%bl
+
+// CHECK: 	setle	3735928559(%ebx,%ecx,8)
+        	setle	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setle	32493
+        	setle	0x7eed
+
+// CHECK: 	setle	3133065982
+        	setle	0xbabecafe
+
+// CHECK: 	setle	305419896
+        	setle	0x12345678
+
+// CHECK: 	setg	%bl
+        	setg	%bl
+
+// CHECK: 	setg	3735928559(%ebx,%ecx,8)
+        	setg	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	setg	32493
+        	setg	0x7eed
+
+// CHECK: 	setg	3133065982
+        	setg	0xbabecafe
+
+// CHECK: 	setg	305419896
+        	setg	0x12345678
+
+// CHECK: 	int	$127
+        	int	$0x7f
+
+// CHECK: 	rsm
+        	rsm
+
+// CHECK: 	hlt
+        	hlt
+
+// CHECK: 	nopl	3735928559(%ebx,%ecx,8)
+        	nopl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	nopw	32493
+        	nopw	0x7eed
+
+// CHECK: 	nopl	3133065982
+        	nopl	0xbabecafe
+
+// CHECK: 	nopl	305419896
+        	nopl	0x12345678
+
+// CHECK: 	nop
+        	nop
+
+// CHECK: 	lldtw	32493
+        	lldtw	0x7eed
+
+// CHECK: 	lmsww	32493
+        	lmsww	0x7eed
+
+// CHECK: 	ltrw	32493
+        	ltrw	0x7eed
+
+// CHECK: 	sldtw	32493
+        	sldtw	0x7eed
+
+// CHECK: 	smsww	32493
+        	smsww	0x7eed
+
+// CHECK: 	strw	32493
+        	strw	0x7eed
+
+// CHECK: 	verr	%bx
+        	verr	%bx
+
+// CHECK: 	verr	3735928559(%ebx,%ecx,8)
+        	verr	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	verr	3133065982
+        	verr	0xbabecafe
+
+// CHECK: 	verr	305419896
+        	verr	0x12345678
+
+// CHECK: 	verw	%bx
+        	verw	%bx
+
+// CHECK: 	verw	3735928559(%ebx,%ecx,8)
+        	verw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	verw	3133065982
+        	verw	0xbabecafe
+
+// CHECK: 	verw	305419896
+        	verw	0x12345678
+
+// CHECK: 	fld	%st(2)
+        	fld	%st(2)
+
+// CHECK: 	fldl	3735928559(%ebx,%ecx,8)
+        	fldl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fldl	3133065982
+        	fldl	0xbabecafe
+
+// CHECK: 	fldl	305419896
+        	fldl	0x12345678
+
+// CHECK: 	fld	%st(2)
+        	fld	%st(2)
+
+// CHECK: 	fildl	3735928559(%ebx,%ecx,8)
+        	fildl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fildl	3133065982
+        	fildl	0xbabecafe
+
+// CHECK: 	fildl	305419896
+        	fildl	0x12345678
+
+// CHECK: 	fildll	3735928559(%ebx,%ecx,8)
+        	fildll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fildll	32493
+        	fildll	0x7eed
+
+// CHECK: 	fildll	3133065982
+        	fildll	0xbabecafe
+
+// CHECK: 	fildll	305419896
+        	fildll	0x12345678
+
+// CHECK: 	fldt	3735928559(%ebx,%ecx,8)
+        	fldt	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fldt	32493
+        	fldt	0x7eed
+
+// CHECK: 	fldt	3133065982
+        	fldt	0xbabecafe
+
+// CHECK: 	fldt	305419896
+        	fldt	0x12345678
+
+// CHECK: 	fbld	3735928559(%ebx,%ecx,8)
+        	fbld	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fbld	32493
+        	fbld	0x7eed
+
+// CHECK: 	fbld	3133065982
+        	fbld	0xbabecafe
+
+// CHECK: 	fbld	305419896
+        	fbld	0x12345678
+
+// CHECK: 	fst	%st(2)
+        	fst	%st(2)
+
+// CHECK: 	fstl	3735928559(%ebx,%ecx,8)
+        	fstl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fstl	3133065982
+        	fstl	0xbabecafe
+
+// CHECK: 	fstl	305419896
+        	fstl	0x12345678
+
+// CHECK: 	fst	%st(2)
+        	fst	%st(2)
+
+// CHECK: 	fistl	3735928559(%ebx,%ecx,8)
+        	fistl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fistl	3133065982
+        	fistl	0xbabecafe
+
+// CHECK: 	fistl	305419896
+        	fistl	0x12345678
+
+// CHECK: 	fstp	%st(2)
+        	fstp	%st(2)
+
+// CHECK: 	fstpl	3735928559(%ebx,%ecx,8)
+        	fstpl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fstpl	3133065982
+        	fstpl	0xbabecafe
+
+// CHECK: 	fstpl	305419896
+        	fstpl	0x12345678
+
+// CHECK: 	fstp	%st(2)
+        	fstp	%st(2)
+
+// CHECK: 	fistpl	3735928559(%ebx,%ecx,8)
+        	fistpl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fistpl	3133065982
+        	fistpl	0xbabecafe
+
+// CHECK: 	fistpl	305419896
+        	fistpl	0x12345678
+
+// CHECK: 	fistpll	3735928559(%ebx,%ecx,8)
+        	fistpll	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fistpll	32493
+        	fistpll	0x7eed
+
+// CHECK: 	fistpll	3133065982
+        	fistpll	0xbabecafe
+
+// CHECK: 	fistpll	305419896
+        	fistpll	0x12345678
+
+// CHECK: 	fstpt	3735928559(%ebx,%ecx,8)
+        	fstpt	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fstpt	32493
+        	fstpt	0x7eed
+
+// CHECK: 	fstpt	3133065982
+        	fstpt	0xbabecafe
+
+// CHECK: 	fstpt	305419896
+        	fstpt	0x12345678
+
+// CHECK: 	fbstp	3735928559(%ebx,%ecx,8)
+        	fbstp	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fbstp	32493
+        	fbstp	0x7eed
+
+// CHECK: 	fbstp	3133065982
+        	fbstp	0xbabecafe
+
+// CHECK: 	fbstp	305419896
+        	fbstp	0x12345678
+
+// CHECK: 	fxch	%st(2)
+        	fxch	%st(2)
+
+// CHECK: 	fcom	%st(2)
+        	fcom	%st(2)
+
+// CHECK: 	fcoml	3735928559(%ebx,%ecx,8)
+        	fcoml	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fcoml	3133065982
+        	fcoml	0xbabecafe
+
+// CHECK: 	fcoml	305419896
+        	fcoml	0x12345678
+
+// CHECK: 	fcom	%st(2)
+        	fcom	%st(2)
+
+// CHECK: 	ficoml	3735928559(%ebx,%ecx,8)
+        	ficoml	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	ficoml	3133065982
+        	ficoml	0xbabecafe
+
+// CHECK: 	ficoml	305419896
+        	ficoml	0x12345678
+
+// CHECK: 	fcomp	%st(2)
+        	fcomp	%st(2)
+
+// CHECK: 	fcompl	3735928559(%ebx,%ecx,8)
+        	fcompl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fcompl	3133065982
+        	fcompl	0xbabecafe
+
+// CHECK: 	fcompl	305419896
+        	fcompl	0x12345678
+
+// CHECK: 	fcomp	%st(2)
+        	fcomp	%st(2)
+
+// CHECK: 	ficompl	3735928559(%ebx,%ecx,8)
+        	ficompl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	ficompl	3133065982
+        	ficompl	0xbabecafe
+
+// CHECK: 	ficompl	305419896
+        	ficompl	0x12345678
+
+// CHECK: 	fcompp
+        	fcompp
+
+// CHECK: 	fucom	%st(2)
+        	fucom	%st(2)
+
+// CHECK: 	fucomp	%st(2)
+        	fucomp	%st(2)
+
+// CHECK: 	fucompp
+        	fucompp
+
+// CHECK: 	ftst
+        	ftst
+
+// CHECK: 	fxam
+        	fxam
+
+// CHECK: 	fld1
+        	fld1
+
+// CHECK: 	fldl2t
+        	fldl2t
+
+// CHECK: 	fldl2e
+        	fldl2e
+
+// CHECK: 	fldpi
+        	fldpi
+
+// CHECK: 	fldlg2
+        	fldlg2
+
+// CHECK: 	fldln2
+        	fldln2
+
+// CHECK: 	fldz
+        	fldz
+
+// CHECK: 	fadd	%st(2)
+        	fadd	%st(2)
+
+// CHECK: 	faddl	3735928559(%ebx,%ecx,8)
+        	faddl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	faddl	3133065982
+        	faddl	0xbabecafe
+
+// CHECK: 	faddl	305419896
+        	faddl	0x12345678
+
+// CHECK: 	fiaddl	3735928559(%ebx,%ecx,8)
+        	fiaddl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fiaddl	3133065982
+        	fiaddl	0xbabecafe
+
+// CHECK: 	fiaddl	305419896
+        	fiaddl	0x12345678
+
+// CHECK: 	faddp	%st(2)
+        	faddp	%st(2)
+
+// CHECK: 	fsub	%st(2)
+        	fsub	%st(2)
+
+// CHECK: 	fsubl	3735928559(%ebx,%ecx,8)
+        	fsubl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fsubl	3133065982
+        	fsubl	0xbabecafe
+
+// CHECK: 	fsubl	305419896
+        	fsubl	0x12345678
+
+// CHECK: 	fisubl	3735928559(%ebx,%ecx,8)
+        	fisubl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fisubl	3133065982
+        	fisubl	0xbabecafe
+
+// CHECK: 	fisubl	305419896
+        	fisubl	0x12345678
+
+// CHECK: 	fsubp	%st(2)
+        	fsubp	%st(2)
+
+// CHECK: 	fsubr	%st(2)
+        	fsubr	%st(2)
+
+// CHECK: 	fsubrl	3735928559(%ebx,%ecx,8)
+        	fsubrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fsubrl	3133065982
+        	fsubrl	0xbabecafe
+
+// CHECK: 	fsubrl	305419896
+        	fsubrl	0x12345678
+
+// CHECK: 	fisubrl	3735928559(%ebx,%ecx,8)
+        	fisubrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fisubrl	3133065982
+        	fisubrl	0xbabecafe
+
+// CHECK: 	fisubrl	305419896
+        	fisubrl	0x12345678
+
+// CHECK: 	fsubrp	%st(2)
+        	fsubrp	%st(2)
+
+// CHECK: 	fmul	%st(2)
+        	fmul	%st(2)
+
+// CHECK: 	fmull	3735928559(%ebx,%ecx,8)
+        	fmull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fmull	3133065982
+        	fmull	0xbabecafe
+
+// CHECK: 	fmull	305419896
+        	fmull	0x12345678
+
+// CHECK: 	fimull	3735928559(%ebx,%ecx,8)
+        	fimull	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fimull	3133065982
+        	fimull	0xbabecafe
+
+// CHECK: 	fimull	305419896
+        	fimull	0x12345678
+
+// CHECK: 	fmulp	%st(2)
+        	fmulp	%st(2)
+
+// CHECK: 	fdiv	%st(2)
+        	fdiv	%st(2)
+
+// CHECK: 	fdivl	3735928559(%ebx,%ecx,8)
+        	fdivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fdivl	3133065982
+        	fdivl	0xbabecafe
+
+// CHECK: 	fdivl	305419896
+        	fdivl	0x12345678
+
+// CHECK: 	fidivl	3735928559(%ebx,%ecx,8)
+        	fidivl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fidivl	3133065982
+        	fidivl	0xbabecafe
+
+// CHECK: 	fidivl	305419896
+        	fidivl	0x12345678
+
+// CHECK: 	fdivp	%st(2)
+        	fdivp	%st(2)
+
+// CHECK: 	fdivr	%st(2)
+        	fdivr	%st(2)
+
+// CHECK: 	fdivrl	3735928559(%ebx,%ecx,8)
+        	fdivrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fdivrl	3133065982
+        	fdivrl	0xbabecafe
+
+// CHECK: 	fdivrl	305419896
+        	fdivrl	0x12345678
+
+// CHECK: 	fidivrl	3735928559(%ebx,%ecx,8)
+        	fidivrl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fidivrl	3133065982
+        	fidivrl	0xbabecafe
+
+// CHECK: 	fidivrl	305419896
+        	fidivrl	0x12345678
+
+// CHECK: 	fdivrp	%st(2)
+        	fdivrp	%st(2)
+
+// CHECK: 	f2xm1
+        	f2xm1
+
+// CHECK: 	fyl2x
+        	fyl2x
+
+// CHECK: 	fptan
+        	fptan
+
+// CHECK: 	fpatan
+        	fpatan
+
+// CHECK: 	fxtract
+        	fxtract
+
+// CHECK: 	fprem1
+        	fprem1
+
+// CHECK: 	fdecstp
+        	fdecstp
+
+// CHECK: 	fincstp
+        	fincstp
+
+// CHECK: 	fprem
+        	fprem
+
+// CHECK: 	fyl2xp1
+        	fyl2xp1
+
+// CHECK: 	fsqrt
+        	fsqrt
+
+// CHECK: 	fsincos
+        	fsincos
+
+// CHECK: 	frndint
+        	frndint
+
+// CHECK: 	fscale
+        	fscale
+
+// CHECK: 	fsin
+        	fsin
+
+// CHECK: 	fcos
+        	fcos
+
+// CHECK: 	fchs
+        	fchs
+
+// CHECK: 	fabs
+        	fabs
+
+// CHECK: 	fninit
+        	fninit
+
+// CHECK: 	fldcw	3735928559(%ebx,%ecx,8)
+        	fldcw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fldcw	3133065982
+        	fldcw	0xbabecafe
+
+// CHECK: 	fldcw	305419896
+        	fldcw	0x12345678
+
+// CHECK: 	fnstcw	3735928559(%ebx,%ecx,8)
+        	fnstcw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fnstcw	3133065982
+        	fnstcw	0xbabecafe
+
+// CHECK: 	fnstcw	305419896
+        	fnstcw	0x12345678
+
+// CHECK: 	fnstsw	3735928559(%ebx,%ecx,8)
+        	fnstsw	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fnstsw	3133065982
+        	fnstsw	0xbabecafe
+
+// CHECK: 	fnstsw	305419896
+        	fnstsw	0x12345678
+
+// CHECK: 	fnclex
+        	fnclex
+
+// CHECK: 	fnstenv	32493
+        	fnstenv	0x7eed
+
+// CHECK: 	fldenv	32493
+        	fldenv	0x7eed
+
+// CHECK: 	fnsave	32493
+        	fnsave	0x7eed
+
+// CHECK: 	frstor	32493
+        	frstor	0x7eed
+
+// CHECK: 	ffree	%st(2)
+        	ffree	%st(2)
+
+// CHECK: 	fnop
+        	fnop
+
+// CHECK: 	invd
+        	invd
+
+// CHECK: 	wbinvd
+        	wbinvd
+
+// CHECK: 	cpuid
+        	cpuid
+
+// CHECK: 	wrmsr
+        	wrmsr
+
+// CHECK: 	rdtsc
+        	rdtsc
+
+// CHECK: 	rdmsr
+        	rdmsr
+
+// CHECK: 	cmpxchg8b	3735928559(%ebx,%ecx,8)
+        	cmpxchg8b	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	cmpxchg8b	32493
+        	cmpxchg8b	0x7eed
+
+// CHECK: 	cmpxchg8b	3133065982
+        	cmpxchg8b	0xbabecafe
+
+// CHECK: 	cmpxchg8b	305419896
+        	cmpxchg8b	0x12345678
+
+// CHECK: 	sysenter
+        	sysenter
+
+// CHECK: 	sysexit
+        	sysexit
+
+// CHECK: 	fxsave	3735928559(%ebx,%ecx,8)
+        	fxsave	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fxsave	32493
+        	fxsave	0x7eed
+
+// CHECK: 	fxsave	3133065982
+        	fxsave	0xbabecafe
+
+// CHECK: 	fxsave	305419896
+        	fxsave	0x12345678
+
+// CHECK: 	fxrstor	3735928559(%ebx,%ecx,8)
+        	fxrstor	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fxrstor	32493
+        	fxrstor	0x7eed
+
+// CHECK: 	fxrstor	3133065982
+        	fxrstor	0xbabecafe
+
+// CHECK: 	fxrstor	305419896
+        	fxrstor	0x12345678
+
+// CHECK: 	rdpmc
+        	rdpmc
+
+// CHECK: 	ud2
+        	ud2
+
+// CHECK: 	fcmovb	%st(2), %st(0)
+        	fcmovb	%st(2),%st
+
+// CHECK: 	fcmove	%st(2), %st(0)
+        	fcmove	%st(2),%st
+
+// CHECK: 	fcmovbe	%st(2), %st(0)
+        	fcmovbe	%st(2),%st
+
+// CHECK: 	fcmovu	 %st(2), %st(0)
+        	fcmovu	%st(2),%st
+
+// CHECK: 	fcmovnb	%st(2), %st(0)
+        	fcmovnb	%st(2),%st
+
+// CHECK: 	fcmovne	%st(2), %st(0)
+        	fcmovne	%st(2),%st
+
+// CHECK: 	fcmovnbe	%st(2), %st(0)
+        	fcmovnbe	%st(2),%st
+
+// CHECK: 	fcmovnu	%st(2), %st(0)
+        	fcmovnu	%st(2),%st
+
+// CHECK: 	fcomi	%st(2)
+        	fcomi	%st(2),%st
+
+// CHECK: 	fucomi	%st(2)
+        	fucomi	%st(2),%st
+
+// CHECK: 	fcompi	%st(2)
+        	fcomip	%st(2),%st
+
+// CHECK: 	fucompi	%st(2)
+        	fucomip	%st(2),%st
+
+// CHECK: 	movnti	%ecx, 3735928559(%ebx,%ecx,8)
+        	movnti	%ecx,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movnti	%ecx, 69
+        	movnti	%ecx,0x45
+
+// CHECK: 	movnti	%ecx, 32493
+        	movnti	%ecx,0x7eed
+
+// CHECK: 	movnti	%ecx, 3133065982
+        	movnti	%ecx,0xbabecafe
+
+// CHECK: 	movnti	%ecx, 305419896
+        	movnti	%ecx,0x12345678
+
+// CHECK: 	clflush	3735928559(%ebx,%ecx,8)
+        	clflush	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	clflush	32493
+        	clflush	0x7eed
+
+// CHECK: 	clflush	3133065982
+        	clflush	0xbabecafe
+
+// CHECK: 	clflush	305419896
+        	clflush	0x12345678
+
+// CHECK: 	pause
+        	pause
+
+// CHECK: 	sfence
+        	sfence
+
+// CHECK: 	lfence
+        	lfence
+
+// CHECK: 	mfence
+        	mfence
+
+// CHECK: 	emms
+        	emms
+
+// CHECK: 	movd	%ecx, %mm3
+        	movd	%ecx,%mm3
+
+// CHECK: 	movd	3735928559(%ebx,%ecx,8), %mm3
+        	movd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	movd	69, %mm3
+        	movd	0x45,%mm3
+
+// CHECK: 	movd	32493, %mm3
+        	movd	0x7eed,%mm3
+
+// CHECK: 	movd	3133065982, %mm3
+        	movd	0xbabecafe,%mm3
+
+// CHECK: 	movd	305419896, %mm3
+        	movd	0x12345678,%mm3
+
+// CHECK: 	movd	%mm3, %ecx
+        	movd	%mm3,%ecx
+
+// CHECK: 	movd	%mm3, 3735928559(%ebx,%ecx,8)
+        	movd	%mm3,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movd	%mm3, 69
+        	movd	%mm3,0x45
+
+// CHECK: 	movd	%mm3, 32493
+        	movd	%mm3,0x7eed
+
+// CHECK: 	movd	%mm3, 3133065982
+        	movd	%mm3,0xbabecafe
+
+// CHECK: 	movd	%mm3, 305419896
+        	movd	%mm3,0x12345678
+
+// CHECK: 	movd	%ecx, %xmm5
+        	movd	%ecx,%xmm5
+
+// CHECK: 	movd	3735928559(%ebx,%ecx,8), %xmm5
+        	movd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movd	69, %xmm5
+        	movd	0x45,%xmm5
+
+// CHECK: 	movd	32493, %xmm5
+        	movd	0x7eed,%xmm5
+
+// CHECK: 	movd	3133065982, %xmm5
+        	movd	0xbabecafe,%xmm5
+
+// CHECK: 	movd	305419896, %xmm5
+        	movd	0x12345678,%xmm5
+
+// CHECK: 	movd	%xmm5, %ecx
+        	movd	%xmm5,%ecx
+
+// CHECK: 	movd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movd	%xmm5, 69
+        	movd	%xmm5,0x45
+
+// CHECK: 	movd	%xmm5, 32493
+        	movd	%xmm5,0x7eed
+
+// CHECK: 	movd	%xmm5, 3133065982
+        	movd	%xmm5,0xbabecafe
+
+// CHECK: 	movd	%xmm5, 305419896
+        	movd	%xmm5,0x12345678
+
+// CHECK: 	movq	3735928559(%ebx,%ecx,8), %mm3
+        	movq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	movq	69, %mm3
+        	movq	0x45,%mm3
+
+// CHECK: 	movq	32493, %mm3
+        	movq	0x7eed,%mm3
+
+// CHECK: 	movq	3133065982, %mm3
+        	movq	0xbabecafe,%mm3
+
+// CHECK: 	movq	305419896, %mm3
+        	movq	0x12345678,%mm3
+
+// CHECK: 	movq	%mm3, %mm3
+        	movq	%mm3,%mm3
+
+// CHECK: 	movq	%mm3, 3735928559(%ebx,%ecx,8)
+        	movq	%mm3,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movq	%mm3, 69
+        	movq	%mm3,0x45
+
+// CHECK: 	movq	%mm3, 32493
+        	movq	%mm3,0x7eed
+
+// CHECK: 	movq	%mm3, 3133065982
+        	movq	%mm3,0xbabecafe
+
+// CHECK: 	movq	%mm3, 305419896
+        	movq	%mm3,0x12345678
+
+// CHECK: 	movq	%mm3, %mm3
+        	movq	%mm3,%mm3
+
+// CHECK: 	movq	3735928559(%ebx,%ecx,8), %xmm5
+        	movq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movq	69, %xmm5
+        	movq	0x45,%xmm5
+
+// CHECK: 	movq	32493, %xmm5
+        	movq	0x7eed,%xmm5
+
+// CHECK: 	movq	3133065982, %xmm5
+        	movq	0xbabecafe,%xmm5
+
+// CHECK: 	movq	305419896, %xmm5
+        	movq	0x12345678,%xmm5
+
+// CHECK: 	movq	%xmm5, %xmm5
+        	movq	%xmm5,%xmm5
+
+// CHECK: 	movq	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movq	%xmm5, 69
+        	movq	%xmm5,0x45
+
+// CHECK: 	movq	%xmm5, 32493
+        	movq	%xmm5,0x7eed
+
+// CHECK: 	movq	%xmm5, 3133065982
+        	movq	%xmm5,0xbabecafe
+
+// CHECK: 	movq	%xmm5, 305419896
+        	movq	%xmm5,0x12345678
+
+// CHECK: 	movq	%xmm5, %xmm5
+        	movq	%xmm5,%xmm5
+
+// CHECK: 	packssdw	3735928559(%ebx,%ecx,8), %mm3
+        	packssdw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	packssdw	69, %mm3
+        	packssdw	0x45,%mm3
+
+// CHECK: 	packssdw	32493, %mm3
+        	packssdw	0x7eed,%mm3
+
+// CHECK: 	packssdw	3133065982, %mm3
+        	packssdw	0xbabecafe,%mm3
+
+// CHECK: 	packssdw	305419896, %mm3
+        	packssdw	0x12345678,%mm3
+
+// CHECK: 	packssdw	%mm3, %mm3
+        	packssdw	%mm3,%mm3
+
+// CHECK: 	packssdw	3735928559(%ebx,%ecx,8), %xmm5
+        	packssdw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	packssdw	69, %xmm5
+        	packssdw	0x45,%xmm5
+
+// CHECK: 	packssdw	32493, %xmm5
+        	packssdw	0x7eed,%xmm5
+
+// CHECK: 	packssdw	3133065982, %xmm5
+        	packssdw	0xbabecafe,%xmm5
+
+// CHECK: 	packssdw	305419896, %xmm5
+        	packssdw	0x12345678,%xmm5
+
+// CHECK: 	packssdw	%xmm5, %xmm5
+        	packssdw	%xmm5,%xmm5
+
+// CHECK: 	packsswb	3735928559(%ebx,%ecx,8), %mm3
+        	packsswb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	packsswb	69, %mm3
+        	packsswb	0x45,%mm3
+
+// CHECK: 	packsswb	32493, %mm3
+        	packsswb	0x7eed,%mm3
+
+// CHECK: 	packsswb	3133065982, %mm3
+        	packsswb	0xbabecafe,%mm3
+
+// CHECK: 	packsswb	305419896, %mm3
+        	packsswb	0x12345678,%mm3
+
+// CHECK: 	packsswb	%mm3, %mm3
+        	packsswb	%mm3,%mm3
+
+// CHECK: 	packsswb	3735928559(%ebx,%ecx,8), %xmm5
+        	packsswb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	packsswb	69, %xmm5
+        	packsswb	0x45,%xmm5
+
+// CHECK: 	packsswb	32493, %xmm5
+        	packsswb	0x7eed,%xmm5
+
+// CHECK: 	packsswb	3133065982, %xmm5
+        	packsswb	0xbabecafe,%xmm5
+
+// CHECK: 	packsswb	305419896, %xmm5
+        	packsswb	0x12345678,%xmm5
+
+// CHECK: 	packsswb	%xmm5, %xmm5
+        	packsswb	%xmm5,%xmm5
+
+// CHECK: 	packuswb	3735928559(%ebx,%ecx,8), %mm3
+        	packuswb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	packuswb	69, %mm3
+        	packuswb	0x45,%mm3
+
+// CHECK: 	packuswb	32493, %mm3
+        	packuswb	0x7eed,%mm3
+
+// CHECK: 	packuswb	3133065982, %mm3
+        	packuswb	0xbabecafe,%mm3
+
+// CHECK: 	packuswb	305419896, %mm3
+        	packuswb	0x12345678,%mm3
+
+// CHECK: 	packuswb	%mm3, %mm3
+        	packuswb	%mm3,%mm3
+
+// CHECK: 	packuswb	3735928559(%ebx,%ecx,8), %xmm5
+        	packuswb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	packuswb	69, %xmm5
+        	packuswb	0x45,%xmm5
+
+// CHECK: 	packuswb	32493, %xmm5
+        	packuswb	0x7eed,%xmm5
+
+// CHECK: 	packuswb	3133065982, %xmm5
+        	packuswb	0xbabecafe,%xmm5
+
+// CHECK: 	packuswb	305419896, %xmm5
+        	packuswb	0x12345678,%xmm5
+
+// CHECK: 	packuswb	%xmm5, %xmm5
+        	packuswb	%xmm5,%xmm5
+
+// CHECK: 	paddb	3735928559(%ebx,%ecx,8), %mm3
+        	paddb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	paddb	69, %mm3
+        	paddb	0x45,%mm3
+
+// CHECK: 	paddb	32493, %mm3
+        	paddb	0x7eed,%mm3
+
+// CHECK: 	paddb	3133065982, %mm3
+        	paddb	0xbabecafe,%mm3
+
+// CHECK: 	paddb	305419896, %mm3
+        	paddb	0x12345678,%mm3
+
+// CHECK: 	paddb	%mm3, %mm3
+        	paddb	%mm3,%mm3
+
+// CHECK: 	paddb	3735928559(%ebx,%ecx,8), %xmm5
+        	paddb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	paddb	69, %xmm5
+        	paddb	0x45,%xmm5
+
+// CHECK: 	paddb	32493, %xmm5
+        	paddb	0x7eed,%xmm5
+
+// CHECK: 	paddb	3133065982, %xmm5
+        	paddb	0xbabecafe,%xmm5
+
+// CHECK: 	paddb	305419896, %xmm5
+        	paddb	0x12345678,%xmm5
+
+// CHECK: 	paddb	%xmm5, %xmm5
+        	paddb	%xmm5,%xmm5
+
+// CHECK: 	paddw	3735928559(%ebx,%ecx,8), %mm3
+        	paddw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	paddw	69, %mm3
+        	paddw	0x45,%mm3
+
+// CHECK: 	paddw	32493, %mm3
+        	paddw	0x7eed,%mm3
+
+// CHECK: 	paddw	3133065982, %mm3
+        	paddw	0xbabecafe,%mm3
+
+// CHECK: 	paddw	305419896, %mm3
+        	paddw	0x12345678,%mm3
+
+// CHECK: 	paddw	%mm3, %mm3
+        	paddw	%mm3,%mm3
+
+// CHECK: 	paddw	3735928559(%ebx,%ecx,8), %xmm5
+        	paddw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	paddw	69, %xmm5
+        	paddw	0x45,%xmm5
+
+// CHECK: 	paddw	32493, %xmm5
+        	paddw	0x7eed,%xmm5
+
+// CHECK: 	paddw	3133065982, %xmm5
+        	paddw	0xbabecafe,%xmm5
+
+// CHECK: 	paddw	305419896, %xmm5
+        	paddw	0x12345678,%xmm5
+
+// CHECK: 	paddw	%xmm5, %xmm5
+        	paddw	%xmm5,%xmm5
+
+// CHECK: 	paddd	3735928559(%ebx,%ecx,8), %mm3
+        	paddd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	paddd	69, %mm3
+        	paddd	0x45,%mm3
+
+// CHECK: 	paddd	32493, %mm3
+        	paddd	0x7eed,%mm3
+
+// CHECK: 	paddd	3133065982, %mm3
+        	paddd	0xbabecafe,%mm3
+
+// CHECK: 	paddd	305419896, %mm3
+        	paddd	0x12345678,%mm3
+
+// CHECK: 	paddd	%mm3, %mm3
+        	paddd	%mm3,%mm3
+
+// CHECK: 	paddd	3735928559(%ebx,%ecx,8), %xmm5
+        	paddd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	paddd	69, %xmm5
+        	paddd	0x45,%xmm5
+
+// CHECK: 	paddd	32493, %xmm5
+        	paddd	0x7eed,%xmm5
+
+// CHECK: 	paddd	3133065982, %xmm5
+        	paddd	0xbabecafe,%xmm5
+
+// CHECK: 	paddd	305419896, %xmm5
+        	paddd	0x12345678,%xmm5
+
+// CHECK: 	paddd	%xmm5, %xmm5
+        	paddd	%xmm5,%xmm5
+
+// CHECK: 	paddq	3735928559(%ebx,%ecx,8), %mm3
+        	paddq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	paddq	69, %mm3
+        	paddq	0x45,%mm3
+
+// CHECK: 	paddq	32493, %mm3
+        	paddq	0x7eed,%mm3
+
+// CHECK: 	paddq	3133065982, %mm3
+        	paddq	0xbabecafe,%mm3
+
+// CHECK: 	paddq	305419896, %mm3
+        	paddq	0x12345678,%mm3
+
+// CHECK: 	paddq	%mm3, %mm3
+        	paddq	%mm3,%mm3
+
+// CHECK: 	paddq	3735928559(%ebx,%ecx,8), %xmm5
+        	paddq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	paddq	69, %xmm5
+        	paddq	0x45,%xmm5
+
+// CHECK: 	paddq	32493, %xmm5
+        	paddq	0x7eed,%xmm5
+
+// CHECK: 	paddq	3133065982, %xmm5
+        	paddq	0xbabecafe,%xmm5
+
+// CHECK: 	paddq	305419896, %xmm5
+        	paddq	0x12345678,%xmm5
+
+// CHECK: 	paddq	%xmm5, %xmm5
+        	paddq	%xmm5,%xmm5
+
+// CHECK: 	paddsb	3735928559(%ebx,%ecx,8), %mm3
+        	paddsb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	paddsb	69, %mm3
+        	paddsb	0x45,%mm3
+
+// CHECK: 	paddsb	32493, %mm3
+        	paddsb	0x7eed,%mm3
+
+// CHECK: 	paddsb	3133065982, %mm3
+        	paddsb	0xbabecafe,%mm3
+
+// CHECK: 	paddsb	305419896, %mm3
+        	paddsb	0x12345678,%mm3
+
+// CHECK: 	paddsb	%mm3, %mm3
+        	paddsb	%mm3,%mm3
+
+// CHECK: 	paddsb	3735928559(%ebx,%ecx,8), %xmm5
+        	paddsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	paddsb	69, %xmm5
+        	paddsb	0x45,%xmm5
+
+// CHECK: 	paddsb	32493, %xmm5
+        	paddsb	0x7eed,%xmm5
+
+// CHECK: 	paddsb	3133065982, %xmm5
+        	paddsb	0xbabecafe,%xmm5
+
+// CHECK: 	paddsb	305419896, %xmm5
+        	paddsb	0x12345678,%xmm5
+
+// CHECK: 	paddsb	%xmm5, %xmm5
+        	paddsb	%xmm5,%xmm5
+
+// CHECK: 	paddsw	3735928559(%ebx,%ecx,8), %mm3
+        	paddsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	paddsw	69, %mm3
+        	paddsw	0x45,%mm3
+
+// CHECK: 	paddsw	32493, %mm3
+        	paddsw	0x7eed,%mm3
+
+// CHECK: 	paddsw	3133065982, %mm3
+        	paddsw	0xbabecafe,%mm3
+
+// CHECK: 	paddsw	305419896, %mm3
+        	paddsw	0x12345678,%mm3
+
+// CHECK: 	paddsw	%mm3, %mm3
+        	paddsw	%mm3,%mm3
+
+// CHECK: 	paddsw	3735928559(%ebx,%ecx,8), %xmm5
+        	paddsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	paddsw	69, %xmm5
+        	paddsw	0x45,%xmm5
+
+// CHECK: 	paddsw	32493, %xmm5
+        	paddsw	0x7eed,%xmm5
+
+// CHECK: 	paddsw	3133065982, %xmm5
+        	paddsw	0xbabecafe,%xmm5
+
+// CHECK: 	paddsw	305419896, %xmm5
+        	paddsw	0x12345678,%xmm5
+
+// CHECK: 	paddsw	%xmm5, %xmm5
+        	paddsw	%xmm5,%xmm5
+
+// CHECK: 	paddusb	3735928559(%ebx,%ecx,8), %mm3
+        	paddusb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	paddusb	69, %mm3
+        	paddusb	0x45,%mm3
+
+// CHECK: 	paddusb	32493, %mm3
+        	paddusb	0x7eed,%mm3
+
+// CHECK: 	paddusb	3133065982, %mm3
+        	paddusb	0xbabecafe,%mm3
+
+// CHECK: 	paddusb	305419896, %mm3
+        	paddusb	0x12345678,%mm3
+
+// CHECK: 	paddusb	%mm3, %mm3
+        	paddusb	%mm3,%mm3
+
+// CHECK: 	paddusb	3735928559(%ebx,%ecx,8), %xmm5
+        	paddusb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	paddusb	69, %xmm5
+        	paddusb	0x45,%xmm5
+
+// CHECK: 	paddusb	32493, %xmm5
+        	paddusb	0x7eed,%xmm5
+
+// CHECK: 	paddusb	3133065982, %xmm5
+        	paddusb	0xbabecafe,%xmm5
+
+// CHECK: 	paddusb	305419896, %xmm5
+        	paddusb	0x12345678,%xmm5
+
+// CHECK: 	paddusb	%xmm5, %xmm5
+        	paddusb	%xmm5,%xmm5
+
+// CHECK: 	paddusw	3735928559(%ebx,%ecx,8), %mm3
+        	paddusw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	paddusw	69, %mm3
+        	paddusw	0x45,%mm3
+
+// CHECK: 	paddusw	32493, %mm3
+        	paddusw	0x7eed,%mm3
+
+// CHECK: 	paddusw	3133065982, %mm3
+        	paddusw	0xbabecafe,%mm3
+
+// CHECK: 	paddusw	305419896, %mm3
+        	paddusw	0x12345678,%mm3
+
+// CHECK: 	paddusw	%mm3, %mm3
+        	paddusw	%mm3,%mm3
+
+// CHECK: 	paddusw	3735928559(%ebx,%ecx,8), %xmm5
+        	paddusw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	paddusw	69, %xmm5
+        	paddusw	0x45,%xmm5
+
+// CHECK: 	paddusw	32493, %xmm5
+        	paddusw	0x7eed,%xmm5
+
+// CHECK: 	paddusw	3133065982, %xmm5
+        	paddusw	0xbabecafe,%xmm5
+
+// CHECK: 	paddusw	305419896, %xmm5
+        	paddusw	0x12345678,%xmm5
+
+// CHECK: 	paddusw	%xmm5, %xmm5
+        	paddusw	%xmm5,%xmm5
+
+// CHECK: 	pand	3735928559(%ebx,%ecx,8), %mm3
+        	pand	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pand	69, %mm3
+        	pand	0x45,%mm3
+
+// CHECK: 	pand	32493, %mm3
+        	pand	0x7eed,%mm3
+
+// CHECK: 	pand	3133065982, %mm3
+        	pand	0xbabecafe,%mm3
+
+// CHECK: 	pand	305419896, %mm3
+        	pand	0x12345678,%mm3
+
+// CHECK: 	pand	%mm3, %mm3
+        	pand	%mm3,%mm3
+
+// CHECK: 	pand	3735928559(%ebx,%ecx,8), %xmm5
+        	pand	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pand	69, %xmm5
+        	pand	0x45,%xmm5
+
+// CHECK: 	pand	32493, %xmm5
+        	pand	0x7eed,%xmm5
+
+// CHECK: 	pand	3133065982, %xmm5
+        	pand	0xbabecafe,%xmm5
+
+// CHECK: 	pand	305419896, %xmm5
+        	pand	0x12345678,%xmm5
+
+// CHECK: 	pand	%xmm5, %xmm5
+        	pand	%xmm5,%xmm5
+
+// CHECK: 	pandn	3735928559(%ebx,%ecx,8), %mm3
+        	pandn	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pandn	69, %mm3
+        	pandn	0x45,%mm3
+
+// CHECK: 	pandn	32493, %mm3
+        	pandn	0x7eed,%mm3
+
+// CHECK: 	pandn	3133065982, %mm3
+        	pandn	0xbabecafe,%mm3
+
+// CHECK: 	pandn	305419896, %mm3
+        	pandn	0x12345678,%mm3
+
+// CHECK: 	pandn	%mm3, %mm3
+        	pandn	%mm3,%mm3
+
+// CHECK: 	pandn	3735928559(%ebx,%ecx,8), %xmm5
+        	pandn	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pandn	69, %xmm5
+        	pandn	0x45,%xmm5
+
+// CHECK: 	pandn	32493, %xmm5
+        	pandn	0x7eed,%xmm5
+
+// CHECK: 	pandn	3133065982, %xmm5
+        	pandn	0xbabecafe,%xmm5
+
+// CHECK: 	pandn	305419896, %xmm5
+        	pandn	0x12345678,%xmm5
+
+// CHECK: 	pandn	%xmm5, %xmm5
+        	pandn	%xmm5,%xmm5
+
+// CHECK: 	pcmpeqb	3735928559(%ebx,%ecx,8), %mm3
+        	pcmpeqb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pcmpeqb	69, %mm3
+        	pcmpeqb	0x45,%mm3
+
+// CHECK: 	pcmpeqb	32493, %mm3
+        	pcmpeqb	0x7eed,%mm3
+
+// CHECK: 	pcmpeqb	3133065982, %mm3
+        	pcmpeqb	0xbabecafe,%mm3
+
+// CHECK: 	pcmpeqb	305419896, %mm3
+        	pcmpeqb	0x12345678,%mm3
+
+// CHECK: 	pcmpeqb	%mm3, %mm3
+        	pcmpeqb	%mm3,%mm3
+
+// CHECK: 	pcmpeqb	3735928559(%ebx,%ecx,8), %xmm5
+        	pcmpeqb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pcmpeqb	69, %xmm5
+        	pcmpeqb	0x45,%xmm5
+
+// CHECK: 	pcmpeqb	32493, %xmm5
+        	pcmpeqb	0x7eed,%xmm5
+
+// CHECK: 	pcmpeqb	3133065982, %xmm5
+        	pcmpeqb	0xbabecafe,%xmm5
+
+// CHECK: 	pcmpeqb	305419896, %xmm5
+        	pcmpeqb	0x12345678,%xmm5
+
+// CHECK: 	pcmpeqb	%xmm5, %xmm5
+        	pcmpeqb	%xmm5,%xmm5
+
+// CHECK: 	pcmpeqw	3735928559(%ebx,%ecx,8), %mm3
+        	pcmpeqw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pcmpeqw	69, %mm3
+        	pcmpeqw	0x45,%mm3
+
+// CHECK: 	pcmpeqw	32493, %mm3
+        	pcmpeqw	0x7eed,%mm3
+
+// CHECK: 	pcmpeqw	3133065982, %mm3
+        	pcmpeqw	0xbabecafe,%mm3
+
+// CHECK: 	pcmpeqw	305419896, %mm3
+        	pcmpeqw	0x12345678,%mm3
+
+// CHECK: 	pcmpeqw	%mm3, %mm3
+        	pcmpeqw	%mm3,%mm3
+
+// CHECK: 	pcmpeqw	3735928559(%ebx,%ecx,8), %xmm5
+        	pcmpeqw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pcmpeqw	69, %xmm5
+        	pcmpeqw	0x45,%xmm5
+
+// CHECK: 	pcmpeqw	32493, %xmm5
+        	pcmpeqw	0x7eed,%xmm5
+
+// CHECK: 	pcmpeqw	3133065982, %xmm5
+        	pcmpeqw	0xbabecafe,%xmm5
+
+// CHECK: 	pcmpeqw	305419896, %xmm5
+        	pcmpeqw	0x12345678,%xmm5
+
+// CHECK: 	pcmpeqw	%xmm5, %xmm5
+        	pcmpeqw	%xmm5,%xmm5
+
+// CHECK: 	pcmpeqd	3735928559(%ebx,%ecx,8), %mm3
+        	pcmpeqd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pcmpeqd	69, %mm3
+        	pcmpeqd	0x45,%mm3
+
+// CHECK: 	pcmpeqd	32493, %mm3
+        	pcmpeqd	0x7eed,%mm3
+
+// CHECK: 	pcmpeqd	3133065982, %mm3
+        	pcmpeqd	0xbabecafe,%mm3
+
+// CHECK: 	pcmpeqd	305419896, %mm3
+        	pcmpeqd	0x12345678,%mm3
+
+// CHECK: 	pcmpeqd	%mm3, %mm3
+        	pcmpeqd	%mm3,%mm3
+
+// CHECK: 	pcmpeqd	3735928559(%ebx,%ecx,8), %xmm5
+        	pcmpeqd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pcmpeqd	69, %xmm5
+        	pcmpeqd	0x45,%xmm5
+
+// CHECK: 	pcmpeqd	32493, %xmm5
+        	pcmpeqd	0x7eed,%xmm5
+
+// CHECK: 	pcmpeqd	3133065982, %xmm5
+        	pcmpeqd	0xbabecafe,%xmm5
+
+// CHECK: 	pcmpeqd	305419896, %xmm5
+        	pcmpeqd	0x12345678,%xmm5
+
+// CHECK: 	pcmpeqd	%xmm5, %xmm5
+        	pcmpeqd	%xmm5,%xmm5
+
+// CHECK: 	pcmpgtb	3735928559(%ebx,%ecx,8), %mm3
+        	pcmpgtb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pcmpgtb	69, %mm3
+        	pcmpgtb	0x45,%mm3
+
+// CHECK: 	pcmpgtb	32493, %mm3
+        	pcmpgtb	0x7eed,%mm3
+
+// CHECK: 	pcmpgtb	3133065982, %mm3
+        	pcmpgtb	0xbabecafe,%mm3
+
+// CHECK: 	pcmpgtb	305419896, %mm3
+        	pcmpgtb	0x12345678,%mm3
+
+// CHECK: 	pcmpgtb	%mm3, %mm3
+        	pcmpgtb	%mm3,%mm3
+
+// CHECK: 	pcmpgtb	3735928559(%ebx,%ecx,8), %xmm5
+        	pcmpgtb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pcmpgtb	69, %xmm5
+        	pcmpgtb	0x45,%xmm5
+
+// CHECK: 	pcmpgtb	32493, %xmm5
+        	pcmpgtb	0x7eed,%xmm5
+
+// CHECK: 	pcmpgtb	3133065982, %xmm5
+        	pcmpgtb	0xbabecafe,%xmm5
+
+// CHECK: 	pcmpgtb	305419896, %xmm5
+        	pcmpgtb	0x12345678,%xmm5
+
+// CHECK: 	pcmpgtb	%xmm5, %xmm5
+        	pcmpgtb	%xmm5,%xmm5
+
+// CHECK: 	pcmpgtw	3735928559(%ebx,%ecx,8), %mm3
+        	pcmpgtw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pcmpgtw	69, %mm3
+        	pcmpgtw	0x45,%mm3
+
+// CHECK: 	pcmpgtw	32493, %mm3
+        	pcmpgtw	0x7eed,%mm3
+
+// CHECK: 	pcmpgtw	3133065982, %mm3
+        	pcmpgtw	0xbabecafe,%mm3
+
+// CHECK: 	pcmpgtw	305419896, %mm3
+        	pcmpgtw	0x12345678,%mm3
+
+// CHECK: 	pcmpgtw	%mm3, %mm3
+        	pcmpgtw	%mm3,%mm3
+
+// CHECK: 	pcmpgtw	3735928559(%ebx,%ecx,8), %xmm5
+        	pcmpgtw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pcmpgtw	69, %xmm5
+        	pcmpgtw	0x45,%xmm5
+
+// CHECK: 	pcmpgtw	32493, %xmm5
+        	pcmpgtw	0x7eed,%xmm5
+
+// CHECK: 	pcmpgtw	3133065982, %xmm5
+        	pcmpgtw	0xbabecafe,%xmm5
+
+// CHECK: 	pcmpgtw	305419896, %xmm5
+        	pcmpgtw	0x12345678,%xmm5
+
+// CHECK: 	pcmpgtw	%xmm5, %xmm5
+        	pcmpgtw	%xmm5,%xmm5
+
+// CHECK: 	pcmpgtd	3735928559(%ebx,%ecx,8), %mm3
+        	pcmpgtd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pcmpgtd	69, %mm3
+        	pcmpgtd	0x45,%mm3
+
+// CHECK: 	pcmpgtd	32493, %mm3
+        	pcmpgtd	0x7eed,%mm3
+
+// CHECK: 	pcmpgtd	3133065982, %mm3
+        	pcmpgtd	0xbabecafe,%mm3
+
+// CHECK: 	pcmpgtd	305419896, %mm3
+        	pcmpgtd	0x12345678,%mm3
+
+// CHECK: 	pcmpgtd	%mm3, %mm3
+        	pcmpgtd	%mm3,%mm3
+
+// CHECK: 	pcmpgtd	3735928559(%ebx,%ecx,8), %xmm5
+        	pcmpgtd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pcmpgtd	69, %xmm5
+        	pcmpgtd	0x45,%xmm5
+
+// CHECK: 	pcmpgtd	32493, %xmm5
+        	pcmpgtd	0x7eed,%xmm5
+
+// CHECK: 	pcmpgtd	3133065982, %xmm5
+        	pcmpgtd	0xbabecafe,%xmm5
+
+// CHECK: 	pcmpgtd	305419896, %xmm5
+        	pcmpgtd	0x12345678,%xmm5
+
+// CHECK: 	pcmpgtd	%xmm5, %xmm5
+        	pcmpgtd	%xmm5,%xmm5
+
+// CHECK: 	pmaddwd	3735928559(%ebx,%ecx,8), %mm3
+        	pmaddwd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmaddwd	69, %mm3
+        	pmaddwd	0x45,%mm3
+
+// CHECK: 	pmaddwd	32493, %mm3
+        	pmaddwd	0x7eed,%mm3
+
+// CHECK: 	pmaddwd	3133065982, %mm3
+        	pmaddwd	0xbabecafe,%mm3
+
+// CHECK: 	pmaddwd	305419896, %mm3
+        	pmaddwd	0x12345678,%mm3
+
+// CHECK: 	pmaddwd	%mm3, %mm3
+        	pmaddwd	%mm3,%mm3
+
+// CHECK: 	pmaddwd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmaddwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmaddwd	69, %xmm5
+        	pmaddwd	0x45,%xmm5
+
+// CHECK: 	pmaddwd	32493, %xmm5
+        	pmaddwd	0x7eed,%xmm5
+
+// CHECK: 	pmaddwd	3133065982, %xmm5
+        	pmaddwd	0xbabecafe,%xmm5
+
+// CHECK: 	pmaddwd	305419896, %xmm5
+        	pmaddwd	0x12345678,%xmm5
+
+// CHECK: 	pmaddwd	%xmm5, %xmm5
+        	pmaddwd	%xmm5,%xmm5
+
+// CHECK: 	pmulhw	3735928559(%ebx,%ecx,8), %mm3
+        	pmulhw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmulhw	69, %mm3
+        	pmulhw	0x45,%mm3
+
+// CHECK: 	pmulhw	32493, %mm3
+        	pmulhw	0x7eed,%mm3
+
+// CHECK: 	pmulhw	3133065982, %mm3
+        	pmulhw	0xbabecafe,%mm3
+
+// CHECK: 	pmulhw	305419896, %mm3
+        	pmulhw	0x12345678,%mm3
+
+// CHECK: 	pmulhw	%mm3, %mm3
+        	pmulhw	%mm3,%mm3
+
+// CHECK: 	pmulhw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmulhw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmulhw	69, %xmm5
+        	pmulhw	0x45,%xmm5
+
+// CHECK: 	pmulhw	32493, %xmm5
+        	pmulhw	0x7eed,%xmm5
+
+// CHECK: 	pmulhw	3133065982, %xmm5
+        	pmulhw	0xbabecafe,%xmm5
+
+// CHECK: 	pmulhw	305419896, %xmm5
+        	pmulhw	0x12345678,%xmm5
+
+// CHECK: 	pmulhw	%xmm5, %xmm5
+        	pmulhw	%xmm5,%xmm5
+
+// CHECK: 	pmullw	3735928559(%ebx,%ecx,8), %mm3
+        	pmullw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmullw	69, %mm3
+        	pmullw	0x45,%mm3
+
+// CHECK: 	pmullw	32493, %mm3
+        	pmullw	0x7eed,%mm3
+
+// CHECK: 	pmullw	3133065982, %mm3
+        	pmullw	0xbabecafe,%mm3
+
+// CHECK: 	pmullw	305419896, %mm3
+        	pmullw	0x12345678,%mm3
+
+// CHECK: 	pmullw	%mm3, %mm3
+        	pmullw	%mm3,%mm3
+
+// CHECK: 	pmullw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmullw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmullw	69, %xmm5
+        	pmullw	0x45,%xmm5
+
+// CHECK: 	pmullw	32493, %xmm5
+        	pmullw	0x7eed,%xmm5
+
+// CHECK: 	pmullw	3133065982, %xmm5
+        	pmullw	0xbabecafe,%xmm5
+
+// CHECK: 	pmullw	305419896, %xmm5
+        	pmullw	0x12345678,%xmm5
+
+// CHECK: 	pmullw	%xmm5, %xmm5
+        	pmullw	%xmm5,%xmm5
+
+// CHECK: 	por	3735928559(%ebx,%ecx,8), %mm3
+        	por	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	por	69, %mm3
+        	por	0x45,%mm3
+
+// CHECK: 	por	32493, %mm3
+        	por	0x7eed,%mm3
+
+// CHECK: 	por	3133065982, %mm3
+        	por	0xbabecafe,%mm3
+
+// CHECK: 	por	305419896, %mm3
+        	por	0x12345678,%mm3
+
+// CHECK: 	por	%mm3, %mm3
+        	por	%mm3,%mm3
+
+// CHECK: 	por	3735928559(%ebx,%ecx,8), %xmm5
+        	por	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	por	69, %xmm5
+        	por	0x45,%xmm5
+
+// CHECK: 	por	32493, %xmm5
+        	por	0x7eed,%xmm5
+
+// CHECK: 	por	3133065982, %xmm5
+        	por	0xbabecafe,%xmm5
+
+// CHECK: 	por	305419896, %xmm5
+        	por	0x12345678,%xmm5
+
+// CHECK: 	por	%xmm5, %xmm5
+        	por	%xmm5,%xmm5
+
+// CHECK: 	psllw	3735928559(%ebx,%ecx,8), %mm3
+        	psllw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psllw	69, %mm3
+        	psllw	0x45,%mm3
+
+// CHECK: 	psllw	32493, %mm3
+        	psllw	0x7eed,%mm3
+
+// CHECK: 	psllw	3133065982, %mm3
+        	psllw	0xbabecafe,%mm3
+
+// CHECK: 	psllw	305419896, %mm3
+        	psllw	0x12345678,%mm3
+
+// CHECK: 	psllw	%mm3, %mm3
+        	psllw	%mm3,%mm3
+
+// CHECK: 	psllw	3735928559(%ebx,%ecx,8), %xmm5
+        	psllw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psllw	69, %xmm5
+        	psllw	0x45,%xmm5
+
+// CHECK: 	psllw	32493, %xmm5
+        	psllw	0x7eed,%xmm5
+
+// CHECK: 	psllw	3133065982, %xmm5
+        	psllw	0xbabecafe,%xmm5
+
+// CHECK: 	psllw	305419896, %xmm5
+        	psllw	0x12345678,%xmm5
+
+// CHECK: 	psllw	%xmm5, %xmm5
+        	psllw	%xmm5,%xmm5
+
+// CHECK: 	psllw	$127, %mm3
+        	psllw	$0x7f,%mm3
+
+// CHECK: 	psllw	$127, %xmm5
+        	psllw	$0x7f,%xmm5
+
+// CHECK: 	pslld	3735928559(%ebx,%ecx,8), %mm3
+        	pslld	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pslld	69, %mm3
+        	pslld	0x45,%mm3
+
+// CHECK: 	pslld	32493, %mm3
+        	pslld	0x7eed,%mm3
+
+// CHECK: 	pslld	3133065982, %mm3
+        	pslld	0xbabecafe,%mm3
+
+// CHECK: 	pslld	305419896, %mm3
+        	pslld	0x12345678,%mm3
+
+// CHECK: 	pslld	%mm3, %mm3
+        	pslld	%mm3,%mm3
+
+// CHECK: 	pslld	3735928559(%ebx,%ecx,8), %xmm5
+        	pslld	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pslld	69, %xmm5
+        	pslld	0x45,%xmm5
+
+// CHECK: 	pslld	32493, %xmm5
+        	pslld	0x7eed,%xmm5
+
+// CHECK: 	pslld	3133065982, %xmm5
+        	pslld	0xbabecafe,%xmm5
+
+// CHECK: 	pslld	305419896, %xmm5
+        	pslld	0x12345678,%xmm5
+
+// CHECK: 	pslld	%xmm5, %xmm5
+        	pslld	%xmm5,%xmm5
+
+// CHECK: 	pslld	$127, %mm3
+        	pslld	$0x7f,%mm3
+
+// CHECK: 	pslld	$127, %xmm5
+        	pslld	$0x7f,%xmm5
+
+// CHECK: 	psllq	3735928559(%ebx,%ecx,8), %mm3
+        	psllq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psllq	69, %mm3
+        	psllq	0x45,%mm3
+
+// CHECK: 	psllq	32493, %mm3
+        	psllq	0x7eed,%mm3
+
+// CHECK: 	psllq	3133065982, %mm3
+        	psllq	0xbabecafe,%mm3
+
+// CHECK: 	psllq	305419896, %mm3
+        	psllq	0x12345678,%mm3
+
+// CHECK: 	psllq	%mm3, %mm3
+        	psllq	%mm3,%mm3
+
+// CHECK: 	psllq	3735928559(%ebx,%ecx,8), %xmm5
+        	psllq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psllq	69, %xmm5
+        	psllq	0x45,%xmm5
+
+// CHECK: 	psllq	32493, %xmm5
+        	psllq	0x7eed,%xmm5
+
+// CHECK: 	psllq	3133065982, %xmm5
+        	psllq	0xbabecafe,%xmm5
+
+// CHECK: 	psllq	305419896, %xmm5
+        	psllq	0x12345678,%xmm5
+
+// CHECK: 	psllq	%xmm5, %xmm5
+        	psllq	%xmm5,%xmm5
+
+// CHECK: 	psllq	$127, %mm3
+        	psllq	$0x7f,%mm3
+
+// CHECK: 	psllq	$127, %xmm5
+        	psllq	$0x7f,%xmm5
+
+// CHECK: 	psraw	3735928559(%ebx,%ecx,8), %mm3
+        	psraw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psraw	69, %mm3
+        	psraw	0x45,%mm3
+
+// CHECK: 	psraw	32493, %mm3
+        	psraw	0x7eed,%mm3
+
+// CHECK: 	psraw	3133065982, %mm3
+        	psraw	0xbabecafe,%mm3
+
+// CHECK: 	psraw	305419896, %mm3
+        	psraw	0x12345678,%mm3
+
+// CHECK: 	psraw	%mm3, %mm3
+        	psraw	%mm3,%mm3
+
+// CHECK: 	psraw	3735928559(%ebx,%ecx,8), %xmm5
+        	psraw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psraw	69, %xmm5
+        	psraw	0x45,%xmm5
+
+// CHECK: 	psraw	32493, %xmm5
+        	psraw	0x7eed,%xmm5
+
+// CHECK: 	psraw	3133065982, %xmm5
+        	psraw	0xbabecafe,%xmm5
+
+// CHECK: 	psraw	305419896, %xmm5
+        	psraw	0x12345678,%xmm5
+
+// CHECK: 	psraw	%xmm5, %xmm5
+        	psraw	%xmm5,%xmm5
+
+// CHECK: 	psraw	$127, %mm3
+        	psraw	$0x7f,%mm3
+
+// CHECK: 	psraw	$127, %xmm5
+        	psraw	$0x7f,%xmm5
+
+// CHECK: 	psrad	3735928559(%ebx,%ecx,8), %mm3
+        	psrad	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psrad	69, %mm3
+        	psrad	0x45,%mm3
+
+// CHECK: 	psrad	32493, %mm3
+        	psrad	0x7eed,%mm3
+
+// CHECK: 	psrad	3133065982, %mm3
+        	psrad	0xbabecafe,%mm3
+
+// CHECK: 	psrad	305419896, %mm3
+        	psrad	0x12345678,%mm3
+
+// CHECK: 	psrad	%mm3, %mm3
+        	psrad	%mm3,%mm3
+
+// CHECK: 	psrad	3735928559(%ebx,%ecx,8), %xmm5
+        	psrad	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psrad	69, %xmm5
+        	psrad	0x45,%xmm5
+
+// CHECK: 	psrad	32493, %xmm5
+        	psrad	0x7eed,%xmm5
+
+// CHECK: 	psrad	3133065982, %xmm5
+        	psrad	0xbabecafe,%xmm5
+
+// CHECK: 	psrad	305419896, %xmm5
+        	psrad	0x12345678,%xmm5
+
+// CHECK: 	psrad	%xmm5, %xmm5
+        	psrad	%xmm5,%xmm5
+
+// CHECK: 	psrad	$127, %mm3
+        	psrad	$0x7f,%mm3
+
+// CHECK: 	psrad	$127, %xmm5
+        	psrad	$0x7f,%xmm5
+
+// CHECK: 	psrlw	3735928559(%ebx,%ecx,8), %mm3
+        	psrlw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psrlw	69, %mm3
+        	psrlw	0x45,%mm3
+
+// CHECK: 	psrlw	32493, %mm3
+        	psrlw	0x7eed,%mm3
+
+// CHECK: 	psrlw	3133065982, %mm3
+        	psrlw	0xbabecafe,%mm3
+
+// CHECK: 	psrlw	305419896, %mm3
+        	psrlw	0x12345678,%mm3
+
+// CHECK: 	psrlw	%mm3, %mm3
+        	psrlw	%mm3,%mm3
+
+// CHECK: 	psrlw	3735928559(%ebx,%ecx,8), %xmm5
+        	psrlw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psrlw	69, %xmm5
+        	psrlw	0x45,%xmm5
+
+// CHECK: 	psrlw	32493, %xmm5
+        	psrlw	0x7eed,%xmm5
+
+// CHECK: 	psrlw	3133065982, %xmm5
+        	psrlw	0xbabecafe,%xmm5
+
+// CHECK: 	psrlw	305419896, %xmm5
+        	psrlw	0x12345678,%xmm5
+
+// CHECK: 	psrlw	%xmm5, %xmm5
+        	psrlw	%xmm5,%xmm5
+
+// CHECK: 	psrlw	$127, %mm3
+        	psrlw	$0x7f,%mm3
+
+// CHECK: 	psrlw	$127, %xmm5
+        	psrlw	$0x7f,%xmm5
+
+// CHECK: 	psrld	3735928559(%ebx,%ecx,8), %mm3
+        	psrld	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psrld	69, %mm3
+        	psrld	0x45,%mm3
+
+// CHECK: 	psrld	32493, %mm3
+        	psrld	0x7eed,%mm3
+
+// CHECK: 	psrld	3133065982, %mm3
+        	psrld	0xbabecafe,%mm3
+
+// CHECK: 	psrld	305419896, %mm3
+        	psrld	0x12345678,%mm3
+
+// CHECK: 	psrld	%mm3, %mm3
+        	psrld	%mm3,%mm3
+
+// CHECK: 	psrld	3735928559(%ebx,%ecx,8), %xmm5
+        	psrld	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psrld	69, %xmm5
+        	psrld	0x45,%xmm5
+
+// CHECK: 	psrld	32493, %xmm5
+        	psrld	0x7eed,%xmm5
+
+// CHECK: 	psrld	3133065982, %xmm5
+        	psrld	0xbabecafe,%xmm5
+
+// CHECK: 	psrld	305419896, %xmm5
+        	psrld	0x12345678,%xmm5
+
+// CHECK: 	psrld	%xmm5, %xmm5
+        	psrld	%xmm5,%xmm5
+
+// CHECK: 	psrld	$127, %mm3
+        	psrld	$0x7f,%mm3
+
+// CHECK: 	psrld	$127, %xmm5
+        	psrld	$0x7f,%xmm5
+
+// CHECK: 	psrlq	3735928559(%ebx,%ecx,8), %mm3
+        	psrlq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psrlq	69, %mm3
+        	psrlq	0x45,%mm3
+
+// CHECK: 	psrlq	32493, %mm3
+        	psrlq	0x7eed,%mm3
+
+// CHECK: 	psrlq	3133065982, %mm3
+        	psrlq	0xbabecafe,%mm3
+
+// CHECK: 	psrlq	305419896, %mm3
+        	psrlq	0x12345678,%mm3
+
+// CHECK: 	psrlq	%mm3, %mm3
+        	psrlq	%mm3,%mm3
+
+// CHECK: 	psrlq	3735928559(%ebx,%ecx,8), %xmm5
+        	psrlq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psrlq	69, %xmm5
+        	psrlq	0x45,%xmm5
+
+// CHECK: 	psrlq	32493, %xmm5
+        	psrlq	0x7eed,%xmm5
+
+// CHECK: 	psrlq	3133065982, %xmm5
+        	psrlq	0xbabecafe,%xmm5
+
+// CHECK: 	psrlq	305419896, %xmm5
+        	psrlq	0x12345678,%xmm5
+
+// CHECK: 	psrlq	%xmm5, %xmm5
+        	psrlq	%xmm5,%xmm5
+
+// CHECK: 	psrlq	$127, %mm3
+        	psrlq	$0x7f,%mm3
+
+// CHECK: 	psrlq	$127, %xmm5
+        	psrlq	$0x7f,%xmm5
+
+// CHECK: 	psubb	3735928559(%ebx,%ecx,8), %mm3
+        	psubb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psubb	69, %mm3
+        	psubb	0x45,%mm3
+
+// CHECK: 	psubb	32493, %mm3
+        	psubb	0x7eed,%mm3
+
+// CHECK: 	psubb	3133065982, %mm3
+        	psubb	0xbabecafe,%mm3
+
+// CHECK: 	psubb	305419896, %mm3
+        	psubb	0x12345678,%mm3
+
+// CHECK: 	psubb	%mm3, %mm3
+        	psubb	%mm3,%mm3
+
+// CHECK: 	psubb	3735928559(%ebx,%ecx,8), %xmm5
+        	psubb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psubb	69, %xmm5
+        	psubb	0x45,%xmm5
+
+// CHECK: 	psubb	32493, %xmm5
+        	psubb	0x7eed,%xmm5
+
+// CHECK: 	psubb	3133065982, %xmm5
+        	psubb	0xbabecafe,%xmm5
+
+// CHECK: 	psubb	305419896, %xmm5
+        	psubb	0x12345678,%xmm5
+
+// CHECK: 	psubb	%xmm5, %xmm5
+        	psubb	%xmm5,%xmm5
+
+// CHECK: 	psubw	3735928559(%ebx,%ecx,8), %mm3
+        	psubw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psubw	69, %mm3
+        	psubw	0x45,%mm3
+
+// CHECK: 	psubw	32493, %mm3
+        	psubw	0x7eed,%mm3
+
+// CHECK: 	psubw	3133065982, %mm3
+        	psubw	0xbabecafe,%mm3
+
+// CHECK: 	psubw	305419896, %mm3
+        	psubw	0x12345678,%mm3
+
+// CHECK: 	psubw	%mm3, %mm3
+        	psubw	%mm3,%mm3
+
+// CHECK: 	psubw	3735928559(%ebx,%ecx,8), %xmm5
+        	psubw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psubw	69, %xmm5
+        	psubw	0x45,%xmm5
+
+// CHECK: 	psubw	32493, %xmm5
+        	psubw	0x7eed,%xmm5
+
+// CHECK: 	psubw	3133065982, %xmm5
+        	psubw	0xbabecafe,%xmm5
+
+// CHECK: 	psubw	305419896, %xmm5
+        	psubw	0x12345678,%xmm5
+
+// CHECK: 	psubw	%xmm5, %xmm5
+        	psubw	%xmm5,%xmm5
+
+// CHECK: 	psubd	3735928559(%ebx,%ecx,8), %mm3
+        	psubd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psubd	69, %mm3
+        	psubd	0x45,%mm3
+
+// CHECK: 	psubd	32493, %mm3
+        	psubd	0x7eed,%mm3
+
+// CHECK: 	psubd	3133065982, %mm3
+        	psubd	0xbabecafe,%mm3
+
+// CHECK: 	psubd	305419896, %mm3
+        	psubd	0x12345678,%mm3
+
+// CHECK: 	psubd	%mm3, %mm3
+        	psubd	%mm3,%mm3
+
+// CHECK: 	psubd	3735928559(%ebx,%ecx,8), %xmm5
+        	psubd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psubd	69, %xmm5
+        	psubd	0x45,%xmm5
+
+// CHECK: 	psubd	32493, %xmm5
+        	psubd	0x7eed,%xmm5
+
+// CHECK: 	psubd	3133065982, %xmm5
+        	psubd	0xbabecafe,%xmm5
+
+// CHECK: 	psubd	305419896, %xmm5
+        	psubd	0x12345678,%xmm5
+
+// CHECK: 	psubd	%xmm5, %xmm5
+        	psubd	%xmm5,%xmm5
+
+// CHECK: 	psubq	3735928559(%ebx,%ecx,8), %mm3
+        	psubq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psubq	69, %mm3
+        	psubq	0x45,%mm3
+
+// CHECK: 	psubq	32493, %mm3
+        	psubq	0x7eed,%mm3
+
+// CHECK: 	psubq	3133065982, %mm3
+        	psubq	0xbabecafe,%mm3
+
+// CHECK: 	psubq	305419896, %mm3
+        	psubq	0x12345678,%mm3
+
+// CHECK: 	psubq	%mm3, %mm3
+        	psubq	%mm3,%mm3
+
+// CHECK: 	psubq	3735928559(%ebx,%ecx,8), %xmm5
+        	psubq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psubq	69, %xmm5
+        	psubq	0x45,%xmm5
+
+// CHECK: 	psubq	32493, %xmm5
+        	psubq	0x7eed,%xmm5
+
+// CHECK: 	psubq	3133065982, %xmm5
+        	psubq	0xbabecafe,%xmm5
+
+// CHECK: 	psubq	305419896, %xmm5
+        	psubq	0x12345678,%xmm5
+
+// CHECK: 	psubq	%xmm5, %xmm5
+        	psubq	%xmm5,%xmm5
+
+// CHECK: 	psubsb	3735928559(%ebx,%ecx,8), %mm3
+        	psubsb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psubsb	69, %mm3
+        	psubsb	0x45,%mm3
+
+// CHECK: 	psubsb	32493, %mm3
+        	psubsb	0x7eed,%mm3
+
+// CHECK: 	psubsb	3133065982, %mm3
+        	psubsb	0xbabecafe,%mm3
+
+// CHECK: 	psubsb	305419896, %mm3
+        	psubsb	0x12345678,%mm3
+
+// CHECK: 	psubsb	%mm3, %mm3
+        	psubsb	%mm3,%mm3
+
+// CHECK: 	psubsb	3735928559(%ebx,%ecx,8), %xmm5
+        	psubsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psubsb	69, %xmm5
+        	psubsb	0x45,%xmm5
+
+// CHECK: 	psubsb	32493, %xmm5
+        	psubsb	0x7eed,%xmm5
+
+// CHECK: 	psubsb	3133065982, %xmm5
+        	psubsb	0xbabecafe,%xmm5
+
+// CHECK: 	psubsb	305419896, %xmm5
+        	psubsb	0x12345678,%xmm5
+
+// CHECK: 	psubsb	%xmm5, %xmm5
+        	psubsb	%xmm5,%xmm5
+
+// CHECK: 	psubsw	3735928559(%ebx,%ecx,8), %mm3
+        	psubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psubsw	69, %mm3
+        	psubsw	0x45,%mm3
+
+// CHECK: 	psubsw	32493, %mm3
+        	psubsw	0x7eed,%mm3
+
+// CHECK: 	psubsw	3133065982, %mm3
+        	psubsw	0xbabecafe,%mm3
+
+// CHECK: 	psubsw	305419896, %mm3
+        	psubsw	0x12345678,%mm3
+
+// CHECK: 	psubsw	%mm3, %mm3
+        	psubsw	%mm3,%mm3
+
+// CHECK: 	psubsw	3735928559(%ebx,%ecx,8), %xmm5
+        	psubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psubsw	69, %xmm5
+        	psubsw	0x45,%xmm5
+
+// CHECK: 	psubsw	32493, %xmm5
+        	psubsw	0x7eed,%xmm5
+
+// CHECK: 	psubsw	3133065982, %xmm5
+        	psubsw	0xbabecafe,%xmm5
+
+// CHECK: 	psubsw	305419896, %xmm5
+        	psubsw	0x12345678,%xmm5
+
+// CHECK: 	psubsw	%xmm5, %xmm5
+        	psubsw	%xmm5,%xmm5
+
+// CHECK: 	psubusb	3735928559(%ebx,%ecx,8), %mm3
+        	psubusb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psubusb	69, %mm3
+        	psubusb	0x45,%mm3
+
+// CHECK: 	psubusb	32493, %mm3
+        	psubusb	0x7eed,%mm3
+
+// CHECK: 	psubusb	3133065982, %mm3
+        	psubusb	0xbabecafe,%mm3
+
+// CHECK: 	psubusb	305419896, %mm3
+        	psubusb	0x12345678,%mm3
+
+// CHECK: 	psubusb	%mm3, %mm3
+        	psubusb	%mm3,%mm3
+
+// CHECK: 	psubusb	3735928559(%ebx,%ecx,8), %xmm5
+        	psubusb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psubusb	69, %xmm5
+        	psubusb	0x45,%xmm5
+
+// CHECK: 	psubusb	32493, %xmm5
+        	psubusb	0x7eed,%xmm5
+
+// CHECK: 	psubusb	3133065982, %xmm5
+        	psubusb	0xbabecafe,%xmm5
+
+// CHECK: 	psubusb	305419896, %xmm5
+        	psubusb	0x12345678,%xmm5
+
+// CHECK: 	psubusb	%xmm5, %xmm5
+        	psubusb	%xmm5,%xmm5
+
+// CHECK: 	psubusw	3735928559(%ebx,%ecx,8), %mm3
+        	psubusw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psubusw	69, %mm3
+        	psubusw	0x45,%mm3
+
+// CHECK: 	psubusw	32493, %mm3
+        	psubusw	0x7eed,%mm3
+
+// CHECK: 	psubusw	3133065982, %mm3
+        	psubusw	0xbabecafe,%mm3
+
+// CHECK: 	psubusw	305419896, %mm3
+        	psubusw	0x12345678,%mm3
+
+// CHECK: 	psubusw	%mm3, %mm3
+        	psubusw	%mm3,%mm3
+
+// CHECK: 	psubusw	3735928559(%ebx,%ecx,8), %xmm5
+        	psubusw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psubusw	69, %xmm5
+        	psubusw	0x45,%xmm5
+
+// CHECK: 	psubusw	32493, %xmm5
+        	psubusw	0x7eed,%xmm5
+
+// CHECK: 	psubusw	3133065982, %xmm5
+        	psubusw	0xbabecafe,%xmm5
+
+// CHECK: 	psubusw	305419896, %xmm5
+        	psubusw	0x12345678,%xmm5
+
+// CHECK: 	psubusw	%xmm5, %xmm5
+        	psubusw	%xmm5,%xmm5
+
+// CHECK: 	punpckhbw	3735928559(%ebx,%ecx,8), %mm3
+        	punpckhbw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	punpckhbw	69, %mm3
+        	punpckhbw	0x45,%mm3
+
+// CHECK: 	punpckhbw	32493, %mm3
+        	punpckhbw	0x7eed,%mm3
+
+// CHECK: 	punpckhbw	3133065982, %mm3
+        	punpckhbw	0xbabecafe,%mm3
+
+// CHECK: 	punpckhbw	305419896, %mm3
+        	punpckhbw	0x12345678,%mm3
+
+// CHECK: 	punpckhbw	%mm3, %mm3
+        	punpckhbw	%mm3,%mm3
+
+// CHECK: 	punpckhbw	3735928559(%ebx,%ecx,8), %xmm5
+        	punpckhbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	punpckhbw	69, %xmm5
+        	punpckhbw	0x45,%xmm5
+
+// CHECK: 	punpckhbw	32493, %xmm5
+        	punpckhbw	0x7eed,%xmm5
+
+// CHECK: 	punpckhbw	3133065982, %xmm5
+        	punpckhbw	0xbabecafe,%xmm5
+
+// CHECK: 	punpckhbw	305419896, %xmm5
+        	punpckhbw	0x12345678,%xmm5
+
+// CHECK: 	punpckhbw	%xmm5, %xmm5
+        	punpckhbw	%xmm5,%xmm5
+
+// CHECK: 	punpckhwd	3735928559(%ebx,%ecx,8), %mm3
+        	punpckhwd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	punpckhwd	69, %mm3
+        	punpckhwd	0x45,%mm3
+
+// CHECK: 	punpckhwd	32493, %mm3
+        	punpckhwd	0x7eed,%mm3
+
+// CHECK: 	punpckhwd	3133065982, %mm3
+        	punpckhwd	0xbabecafe,%mm3
+
+// CHECK: 	punpckhwd	305419896, %mm3
+        	punpckhwd	0x12345678,%mm3
+
+// CHECK: 	punpckhwd	%mm3, %mm3
+        	punpckhwd	%mm3,%mm3
+
+// CHECK: 	punpckhwd	3735928559(%ebx,%ecx,8), %xmm5
+        	punpckhwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	punpckhwd	69, %xmm5
+        	punpckhwd	0x45,%xmm5
+
+// CHECK: 	punpckhwd	32493, %xmm5
+        	punpckhwd	0x7eed,%xmm5
+
+// CHECK: 	punpckhwd	3133065982, %xmm5
+        	punpckhwd	0xbabecafe,%xmm5
+
+// CHECK: 	punpckhwd	305419896, %xmm5
+        	punpckhwd	0x12345678,%xmm5
+
+// CHECK: 	punpckhwd	%xmm5, %xmm5
+        	punpckhwd	%xmm5,%xmm5
+
+// CHECK: 	punpckhdq	3735928559(%ebx,%ecx,8), %mm3
+        	punpckhdq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	punpckhdq	69, %mm3
+        	punpckhdq	0x45,%mm3
+
+// CHECK: 	punpckhdq	32493, %mm3
+        	punpckhdq	0x7eed,%mm3
+
+// CHECK: 	punpckhdq	3133065982, %mm3
+        	punpckhdq	0xbabecafe,%mm3
+
+// CHECK: 	punpckhdq	305419896, %mm3
+        	punpckhdq	0x12345678,%mm3
+
+// CHECK: 	punpckhdq	%mm3, %mm3
+        	punpckhdq	%mm3,%mm3
+
+// CHECK: 	punpckhdq	3735928559(%ebx,%ecx,8), %xmm5
+        	punpckhdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	punpckhdq	69, %xmm5
+        	punpckhdq	0x45,%xmm5
+
+// CHECK: 	punpckhdq	32493, %xmm5
+        	punpckhdq	0x7eed,%xmm5
+
+// CHECK: 	punpckhdq	3133065982, %xmm5
+        	punpckhdq	0xbabecafe,%xmm5
+
+// CHECK: 	punpckhdq	305419896, %xmm5
+        	punpckhdq	0x12345678,%xmm5
+
+// CHECK: 	punpckhdq	%xmm5, %xmm5
+        	punpckhdq	%xmm5,%xmm5
+
+// CHECK: 	punpcklbw	3735928559(%ebx,%ecx,8), %mm3
+        	punpcklbw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	punpcklbw	69, %mm3
+        	punpcklbw	0x45,%mm3
+
+// CHECK: 	punpcklbw	32493, %mm3
+        	punpcklbw	0x7eed,%mm3
+
+// CHECK: 	punpcklbw	3133065982, %mm3
+        	punpcklbw	0xbabecafe,%mm3
+
+// CHECK: 	punpcklbw	305419896, %mm3
+        	punpcklbw	0x12345678,%mm3
+
+// CHECK: 	punpcklbw	%mm3, %mm3
+        	punpcklbw	%mm3,%mm3
+
+// CHECK: 	punpcklbw	3735928559(%ebx,%ecx,8), %xmm5
+        	punpcklbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	punpcklbw	69, %xmm5
+        	punpcklbw	0x45,%xmm5
+
+// CHECK: 	punpcklbw	32493, %xmm5
+        	punpcklbw	0x7eed,%xmm5
+
+// CHECK: 	punpcklbw	3133065982, %xmm5
+        	punpcklbw	0xbabecafe,%xmm5
+
+// CHECK: 	punpcklbw	305419896, %xmm5
+        	punpcklbw	0x12345678,%xmm5
+
+// CHECK: 	punpcklbw	%xmm5, %xmm5
+        	punpcklbw	%xmm5,%xmm5
+
+// CHECK: 	punpcklwd	3735928559(%ebx,%ecx,8), %mm3
+        	punpcklwd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	punpcklwd	69, %mm3
+        	punpcklwd	0x45,%mm3
+
+// CHECK: 	punpcklwd	32493, %mm3
+        	punpcklwd	0x7eed,%mm3
+
+// CHECK: 	punpcklwd	3133065982, %mm3
+        	punpcklwd	0xbabecafe,%mm3
+
+// CHECK: 	punpcklwd	305419896, %mm3
+        	punpcklwd	0x12345678,%mm3
+
+// CHECK: 	punpcklwd	%mm3, %mm3
+        	punpcklwd	%mm3,%mm3
+
+// CHECK: 	punpcklwd	3735928559(%ebx,%ecx,8), %xmm5
+        	punpcklwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	punpcklwd	69, %xmm5
+        	punpcklwd	0x45,%xmm5
+
+// CHECK: 	punpcklwd	32493, %xmm5
+        	punpcklwd	0x7eed,%xmm5
+
+// CHECK: 	punpcklwd	3133065982, %xmm5
+        	punpcklwd	0xbabecafe,%xmm5
+
+// CHECK: 	punpcklwd	305419896, %xmm5
+        	punpcklwd	0x12345678,%xmm5
+
+// CHECK: 	punpcklwd	%xmm5, %xmm5
+        	punpcklwd	%xmm5,%xmm5
+
+// CHECK: 	punpckldq	3735928559(%ebx,%ecx,8), %mm3
+        	punpckldq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	punpckldq	69, %mm3
+        	punpckldq	0x45,%mm3
+
+// CHECK: 	punpckldq	32493, %mm3
+        	punpckldq	0x7eed,%mm3
+
+// CHECK: 	punpckldq	3133065982, %mm3
+        	punpckldq	0xbabecafe,%mm3
+
+// CHECK: 	punpckldq	305419896, %mm3
+        	punpckldq	0x12345678,%mm3
+
+// CHECK: 	punpckldq	%mm3, %mm3
+        	punpckldq	%mm3,%mm3
+
+// CHECK: 	punpckldq	3735928559(%ebx,%ecx,8), %xmm5
+        	punpckldq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	punpckldq	69, %xmm5
+        	punpckldq	0x45,%xmm5
+
+// CHECK: 	punpckldq	32493, %xmm5
+        	punpckldq	0x7eed,%xmm5
+
+// CHECK: 	punpckldq	3133065982, %xmm5
+        	punpckldq	0xbabecafe,%xmm5
+
+// CHECK: 	punpckldq	305419896, %xmm5
+        	punpckldq	0x12345678,%xmm5
+
+// CHECK: 	punpckldq	%xmm5, %xmm5
+        	punpckldq	%xmm5,%xmm5
+
+// CHECK: 	pxor	3735928559(%ebx,%ecx,8), %mm3
+        	pxor	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pxor	69, %mm3
+        	pxor	0x45,%mm3
+
+// CHECK: 	pxor	32493, %mm3
+        	pxor	0x7eed,%mm3
+
+// CHECK: 	pxor	3133065982, %mm3
+        	pxor	0xbabecafe,%mm3
+
+// CHECK: 	pxor	305419896, %mm3
+        	pxor	0x12345678,%mm3
+
+// CHECK: 	pxor	%mm3, %mm3
+        	pxor	%mm3,%mm3
+
+// CHECK: 	pxor	3735928559(%ebx,%ecx,8), %xmm5
+        	pxor	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pxor	69, %xmm5
+        	pxor	0x45,%xmm5
+
+// CHECK: 	pxor	32493, %xmm5
+        	pxor	0x7eed,%xmm5
+
+// CHECK: 	pxor	3133065982, %xmm5
+        	pxor	0xbabecafe,%xmm5
+
+// CHECK: 	pxor	305419896, %xmm5
+        	pxor	0x12345678,%xmm5
+
+// CHECK: 	pxor	%xmm5, %xmm5
+        	pxor	%xmm5,%xmm5
+
+// CHECK: 	addps	3735928559(%ebx,%ecx,8), %xmm5
+        	addps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	addps	69, %xmm5
+        	addps	0x45,%xmm5
+
+// CHECK: 	addps	32493, %xmm5
+        	addps	0x7eed,%xmm5
+
+// CHECK: 	addps	3133065982, %xmm5
+        	addps	0xbabecafe,%xmm5
+
+// CHECK: 	addps	305419896, %xmm5
+        	addps	0x12345678,%xmm5
+
+// CHECK: 	addps	%xmm5, %xmm5
+        	addps	%xmm5,%xmm5
+
+// CHECK: 	addss	3735928559(%ebx,%ecx,8), %xmm5
+        	addss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	addss	69, %xmm5
+        	addss	0x45,%xmm5
+
+// CHECK: 	addss	32493, %xmm5
+        	addss	0x7eed,%xmm5
+
+// CHECK: 	addss	3133065982, %xmm5
+        	addss	0xbabecafe,%xmm5
+
+// CHECK: 	addss	305419896, %xmm5
+        	addss	0x12345678,%xmm5
+
+// CHECK: 	addss	%xmm5, %xmm5
+        	addss	%xmm5,%xmm5
+
+// CHECK: 	andnps	3735928559(%ebx,%ecx,8), %xmm5
+        	andnps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	andnps	69, %xmm5
+        	andnps	0x45,%xmm5
+
+// CHECK: 	andnps	32493, %xmm5
+        	andnps	0x7eed,%xmm5
+
+// CHECK: 	andnps	3133065982, %xmm5
+        	andnps	0xbabecafe,%xmm5
+
+// CHECK: 	andnps	305419896, %xmm5
+        	andnps	0x12345678,%xmm5
+
+// CHECK: 	andnps	%xmm5, %xmm5
+        	andnps	%xmm5,%xmm5
+
+// CHECK: 	andps	3735928559(%ebx,%ecx,8), %xmm5
+        	andps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	andps	69, %xmm5
+        	andps	0x45,%xmm5
+
+// CHECK: 	andps	32493, %xmm5
+        	andps	0x7eed,%xmm5
+
+// CHECK: 	andps	3133065982, %xmm5
+        	andps	0xbabecafe,%xmm5
+
+// CHECK: 	andps	305419896, %xmm5
+        	andps	0x12345678,%xmm5
+
+// CHECK: 	andps	%xmm5, %xmm5
+        	andps	%xmm5,%xmm5
+
+// CHECK: 	comiss	3735928559(%ebx,%ecx,8), %xmm5
+        	comiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	comiss	69, %xmm5
+        	comiss	0x45,%xmm5
+
+// CHECK: 	comiss	32493, %xmm5
+        	comiss	0x7eed,%xmm5
+
+// CHECK: 	comiss	3133065982, %xmm5
+        	comiss	0xbabecafe,%xmm5
+
+// CHECK: 	comiss	305419896, %xmm5
+        	comiss	0x12345678,%xmm5
+
+// CHECK: 	comiss	%xmm5, %xmm5
+        	comiss	%xmm5,%xmm5
+
+// CHECK: 	cvtpi2ps	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtpi2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtpi2ps	69, %xmm5
+        	cvtpi2ps	0x45,%xmm5
+
+// CHECK: 	cvtpi2ps	32493, %xmm5
+        	cvtpi2ps	0x7eed,%xmm5
+
+// CHECK: 	cvtpi2ps	3133065982, %xmm5
+        	cvtpi2ps	0xbabecafe,%xmm5
+
+// CHECK: 	cvtpi2ps	305419896, %xmm5
+        	cvtpi2ps	0x12345678,%xmm5
+
+// CHECK: 	cvtpi2ps	%mm3, %xmm5
+        	cvtpi2ps	%mm3,%xmm5
+
+// CHECK: 	cvtps2pi	3735928559(%ebx,%ecx,8), %mm3
+        	cvtps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	cvtps2pi	69, %mm3
+        	cvtps2pi	0x45,%mm3
+
+// CHECK: 	cvtps2pi	32493, %mm3
+        	cvtps2pi	0x7eed,%mm3
+
+// CHECK: 	cvtps2pi	3133065982, %mm3
+        	cvtps2pi	0xbabecafe,%mm3
+
+// CHECK: 	cvtps2pi	305419896, %mm3
+        	cvtps2pi	0x12345678,%mm3
+
+// CHECK: 	cvtps2pi	%xmm5, %mm3
+        	cvtps2pi	%xmm5,%mm3
+
+// CHECK: 	cvtsi2ss	%ecx, %xmm5
+        	cvtsi2ss	%ecx,%xmm5
+
+// CHECK: 	cvtsi2ss	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtsi2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtsi2ss	69, %xmm5
+        	cvtsi2ss	0x45,%xmm5
+
+// CHECK: 	cvtsi2ss	32493, %xmm5
+        	cvtsi2ss	0x7eed,%xmm5
+
+// CHECK: 	cvtsi2ss	3133065982, %xmm5
+        	cvtsi2ss	0xbabecafe,%xmm5
+
+// CHECK: 	cvtsi2ss	305419896, %xmm5
+        	cvtsi2ss	0x12345678,%xmm5
+
+// CHECK: 	cvttps2pi	3735928559(%ebx,%ecx,8), %mm3
+        	cvttps2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	cvttps2pi	69, %mm3
+        	cvttps2pi	0x45,%mm3
+
+// CHECK: 	cvttps2pi	32493, %mm3
+        	cvttps2pi	0x7eed,%mm3
+
+// CHECK: 	cvttps2pi	3133065982, %mm3
+        	cvttps2pi	0xbabecafe,%mm3
+
+// CHECK: 	cvttps2pi	305419896, %mm3
+        	cvttps2pi	0x12345678,%mm3
+
+// CHECK: 	cvttps2pi	%xmm5, %mm3
+        	cvttps2pi	%xmm5,%mm3
+
+// CHECK: 	cvttss2si	3735928559(%ebx,%ecx,8), %ecx
+        	cvttss2si	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	cvttss2si	69, %ecx
+        	cvttss2si	0x45,%ecx
+
+// CHECK: 	cvttss2si	32493, %ecx
+        	cvttss2si	0x7eed,%ecx
+
+// CHECK: 	cvttss2si	3133065982, %ecx
+        	cvttss2si	0xbabecafe,%ecx
+
+// CHECK: 	cvttss2si	305419896, %ecx
+        	cvttss2si	0x12345678,%ecx
+
+// CHECK: 	cvttss2si	%xmm5, %ecx
+        	cvttss2si	%xmm5,%ecx
+
+// CHECK: 	divps	3735928559(%ebx,%ecx,8), %xmm5
+        	divps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	divps	69, %xmm5
+        	divps	0x45,%xmm5
+
+// CHECK: 	divps	32493, %xmm5
+        	divps	0x7eed,%xmm5
+
+// CHECK: 	divps	3133065982, %xmm5
+        	divps	0xbabecafe,%xmm5
+
+// CHECK: 	divps	305419896, %xmm5
+        	divps	0x12345678,%xmm5
+
+// CHECK: 	divps	%xmm5, %xmm5
+        	divps	%xmm5,%xmm5
+
+// CHECK: 	divss	3735928559(%ebx,%ecx,8), %xmm5
+        	divss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	divss	69, %xmm5
+        	divss	0x45,%xmm5
+
+// CHECK: 	divss	32493, %xmm5
+        	divss	0x7eed,%xmm5
+
+// CHECK: 	divss	3133065982, %xmm5
+        	divss	0xbabecafe,%xmm5
+
+// CHECK: 	divss	305419896, %xmm5
+        	divss	0x12345678,%xmm5
+
+// CHECK: 	divss	%xmm5, %xmm5
+        	divss	%xmm5,%xmm5
+
+// CHECK: 	ldmxcsr	3735928559(%ebx,%ecx,8)
+        	ldmxcsr	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	ldmxcsr	32493
+        	ldmxcsr	0x7eed
+
+// CHECK: 	ldmxcsr	3133065982
+        	ldmxcsr	0xbabecafe
+
+// CHECK: 	ldmxcsr	305419896
+        	ldmxcsr	0x12345678
+
+// CHECK: 	maskmovq	%mm3, %mm3
+        	maskmovq	%mm3,%mm3
+
+// CHECK: 	maxps	3735928559(%ebx,%ecx,8), %xmm5
+        	maxps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	maxps	69, %xmm5
+        	maxps	0x45,%xmm5
+
+// CHECK: 	maxps	32493, %xmm5
+        	maxps	0x7eed,%xmm5
+
+// CHECK: 	maxps	3133065982, %xmm5
+        	maxps	0xbabecafe,%xmm5
+
+// CHECK: 	maxps	305419896, %xmm5
+        	maxps	0x12345678,%xmm5
+
+// CHECK: 	maxps	%xmm5, %xmm5
+        	maxps	%xmm5,%xmm5
+
+// CHECK: 	maxss	3735928559(%ebx,%ecx,8), %xmm5
+        	maxss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	maxss	69, %xmm5
+        	maxss	0x45,%xmm5
+
+// CHECK: 	maxss	32493, %xmm5
+        	maxss	0x7eed,%xmm5
+
+// CHECK: 	maxss	3133065982, %xmm5
+        	maxss	0xbabecafe,%xmm5
+
+// CHECK: 	maxss	305419896, %xmm5
+        	maxss	0x12345678,%xmm5
+
+// CHECK: 	maxss	%xmm5, %xmm5
+        	maxss	%xmm5,%xmm5
+
+// CHECK: 	minps	3735928559(%ebx,%ecx,8), %xmm5
+        	minps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	minps	69, %xmm5
+        	minps	0x45,%xmm5
+
+// CHECK: 	minps	32493, %xmm5
+        	minps	0x7eed,%xmm5
+
+// CHECK: 	minps	3133065982, %xmm5
+        	minps	0xbabecafe,%xmm5
+
+// CHECK: 	minps	305419896, %xmm5
+        	minps	0x12345678,%xmm5
+
+// CHECK: 	minps	%xmm5, %xmm5
+        	minps	%xmm5,%xmm5
+
+// CHECK: 	minss	3735928559(%ebx,%ecx,8), %xmm5
+        	minss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	minss	69, %xmm5
+        	minss	0x45,%xmm5
+
+// CHECK: 	minss	32493, %xmm5
+        	minss	0x7eed,%xmm5
+
+// CHECK: 	minss	3133065982, %xmm5
+        	minss	0xbabecafe,%xmm5
+
+// CHECK: 	minss	305419896, %xmm5
+        	minss	0x12345678,%xmm5
+
+// CHECK: 	minss	%xmm5, %xmm5
+        	minss	%xmm5,%xmm5
+
+// CHECK: 	movaps	3735928559(%ebx,%ecx,8), %xmm5
+        	movaps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movaps	69, %xmm5
+        	movaps	0x45,%xmm5
+
+// CHECK: 	movaps	32493, %xmm5
+        	movaps	0x7eed,%xmm5
+
+// CHECK: 	movaps	3133065982, %xmm5
+        	movaps	0xbabecafe,%xmm5
+
+// CHECK: 	movaps	305419896, %xmm5
+        	movaps	0x12345678,%xmm5
+
+// CHECK: 	movaps	%xmm5, %xmm5
+        	movaps	%xmm5,%xmm5
+
+// CHECK: 	movaps	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movaps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movaps	%xmm5, 69
+        	movaps	%xmm5,0x45
+
+// CHECK: 	movaps	%xmm5, 32493
+        	movaps	%xmm5,0x7eed
+
+// CHECK: 	movaps	%xmm5, 3133065982
+        	movaps	%xmm5,0xbabecafe
+
+// CHECK: 	movaps	%xmm5, 305419896
+        	movaps	%xmm5,0x12345678
+
+// CHECK: 	movaps	%xmm5, %xmm5
+        	movaps	%xmm5,%xmm5
+
+// CHECK: 	movhlps	%xmm5, %xmm5
+        	movhlps	%xmm5,%xmm5
+
+// CHECK: 	movhps	3735928559(%ebx,%ecx,8), %xmm5
+        	movhps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movhps	69, %xmm5
+        	movhps	0x45,%xmm5
+
+// CHECK: 	movhps	32493, %xmm5
+        	movhps	0x7eed,%xmm5
+
+// CHECK: 	movhps	3133065982, %xmm5
+        	movhps	0xbabecafe,%xmm5
+
+// CHECK: 	movhps	305419896, %xmm5
+        	movhps	0x12345678,%xmm5
+
+// CHECK: 	movhps	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movhps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movhps	%xmm5, 69
+        	movhps	%xmm5,0x45
+
+// CHECK: 	movhps	%xmm5, 32493
+        	movhps	%xmm5,0x7eed
+
+// CHECK: 	movhps	%xmm5, 3133065982
+        	movhps	%xmm5,0xbabecafe
+
+// CHECK: 	movhps	%xmm5, 305419896
+        	movhps	%xmm5,0x12345678
+
+// CHECK: 	movlhps	%xmm5, %xmm5
+        	movlhps	%xmm5,%xmm5
+
+// CHECK: 	movlps	3735928559(%ebx,%ecx,8), %xmm5
+        	movlps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movlps	69, %xmm5
+        	movlps	0x45,%xmm5
+
+// CHECK: 	movlps	32493, %xmm5
+        	movlps	0x7eed,%xmm5
+
+// CHECK: 	movlps	3133065982, %xmm5
+        	movlps	0xbabecafe,%xmm5
+
+// CHECK: 	movlps	305419896, %xmm5
+        	movlps	0x12345678,%xmm5
+
+// CHECK: 	movlps	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movlps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movlps	%xmm5, 69
+        	movlps	%xmm5,0x45
+
+// CHECK: 	movlps	%xmm5, 32493
+        	movlps	%xmm5,0x7eed
+
+// CHECK: 	movlps	%xmm5, 3133065982
+        	movlps	%xmm5,0xbabecafe
+
+// CHECK: 	movlps	%xmm5, 305419896
+        	movlps	%xmm5,0x12345678
+
+// CHECK: 	movmskps	%xmm5, %ecx
+        	movmskps	%xmm5,%ecx
+
+// CHECK: 	movntps	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movntps	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movntps	%xmm5, 69
+        	movntps	%xmm5,0x45
+
+// CHECK: 	movntps	%xmm5, 32493
+        	movntps	%xmm5,0x7eed
+
+// CHECK: 	movntps	%xmm5, 3133065982
+        	movntps	%xmm5,0xbabecafe
+
+// CHECK: 	movntps	%xmm5, 305419896
+        	movntps	%xmm5,0x12345678
+
+// CHECK: 	movntq	%mm3, 3735928559(%ebx,%ecx,8)
+        	movntq	%mm3,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movntq	%mm3, 69
+        	movntq	%mm3,0x45
+
+// CHECK: 	movntq	%mm3, 32493
+        	movntq	%mm3,0x7eed
+
+// CHECK: 	movntq	%mm3, 3133065982
+        	movntq	%mm3,0xbabecafe
+
+// CHECK: 	movntq	%mm3, 305419896
+        	movntq	%mm3,0x12345678
+
+// CHECK: 	movntdq	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movntdq	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movntdq	%xmm5, 69
+        	movntdq	%xmm5,0x45
+
+// CHECK: 	movntdq	%xmm5, 32493
+        	movntdq	%xmm5,0x7eed
+
+// CHECK: 	movntdq	%xmm5, 3133065982
+        	movntdq	%xmm5,0xbabecafe
+
+// CHECK: 	movntdq	%xmm5, 305419896
+        	movntdq	%xmm5,0x12345678
+
+// CHECK: 	movss	3735928559(%ebx,%ecx,8), %xmm5
+        	movss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movss	69, %xmm5
+        	movss	0x45,%xmm5
+
+// CHECK: 	movss	32493, %xmm5
+        	movss	0x7eed,%xmm5
+
+// CHECK: 	movss	3133065982, %xmm5
+        	movss	0xbabecafe,%xmm5
+
+// CHECK: 	movss	305419896, %xmm5
+        	movss	0x12345678,%xmm5
+
+// CHECK: 	movss	%xmm5, %xmm5
+        	movss	%xmm5,%xmm5
+
+// CHECK: 	movss	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movss	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movss	%xmm5, 69
+        	movss	%xmm5,0x45
+
+// CHECK: 	movss	%xmm5, 32493
+        	movss	%xmm5,0x7eed
+
+// CHECK: 	movss	%xmm5, 3133065982
+        	movss	%xmm5,0xbabecafe
+
+// CHECK: 	movss	%xmm5, 305419896
+        	movss	%xmm5,0x12345678
+
+// CHECK: 	movss	%xmm5, %xmm5
+        	movss	%xmm5,%xmm5
+
+// CHECK: 	movups	3735928559(%ebx,%ecx,8), %xmm5
+        	movups	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movups	69, %xmm5
+        	movups	0x45,%xmm5
+
+// CHECK: 	movups	32493, %xmm5
+        	movups	0x7eed,%xmm5
+
+// CHECK: 	movups	3133065982, %xmm5
+        	movups	0xbabecafe,%xmm5
+
+// CHECK: 	movups	305419896, %xmm5
+        	movups	0x12345678,%xmm5
+
+// CHECK: 	movups	%xmm5, %xmm5
+        	movups	%xmm5,%xmm5
+
+// CHECK: 	movups	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movups	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movups	%xmm5, 69
+        	movups	%xmm5,0x45
+
+// CHECK: 	movups	%xmm5, 32493
+        	movups	%xmm5,0x7eed
+
+// CHECK: 	movups	%xmm5, 3133065982
+        	movups	%xmm5,0xbabecafe
+
+// CHECK: 	movups	%xmm5, 305419896
+        	movups	%xmm5,0x12345678
+
+// CHECK: 	movups	%xmm5, %xmm5
+        	movups	%xmm5,%xmm5
+
+// CHECK: 	mulps	3735928559(%ebx,%ecx,8), %xmm5
+        	mulps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	mulps	69, %xmm5
+        	mulps	0x45,%xmm5
+
+// CHECK: 	mulps	32493, %xmm5
+        	mulps	0x7eed,%xmm5
+
+// CHECK: 	mulps	3133065982, %xmm5
+        	mulps	0xbabecafe,%xmm5
+
+// CHECK: 	mulps	305419896, %xmm5
+        	mulps	0x12345678,%xmm5
+
+// CHECK: 	mulps	%xmm5, %xmm5
+        	mulps	%xmm5,%xmm5
+
+// CHECK: 	mulss	3735928559(%ebx,%ecx,8), %xmm5
+        	mulss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	mulss	69, %xmm5
+        	mulss	0x45,%xmm5
+
+// CHECK: 	mulss	32493, %xmm5
+        	mulss	0x7eed,%xmm5
+
+// CHECK: 	mulss	3133065982, %xmm5
+        	mulss	0xbabecafe,%xmm5
+
+// CHECK: 	mulss	305419896, %xmm5
+        	mulss	0x12345678,%xmm5
+
+// CHECK: 	mulss	%xmm5, %xmm5
+        	mulss	%xmm5,%xmm5
+
+// CHECK: 	orps	3735928559(%ebx,%ecx,8), %xmm5
+        	orps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	orps	69, %xmm5
+        	orps	0x45,%xmm5
+
+// CHECK: 	orps	32493, %xmm5
+        	orps	0x7eed,%xmm5
+
+// CHECK: 	orps	3133065982, %xmm5
+        	orps	0xbabecafe,%xmm5
+
+// CHECK: 	orps	305419896, %xmm5
+        	orps	0x12345678,%xmm5
+
+// CHECK: 	orps	%xmm5, %xmm5
+        	orps	%xmm5,%xmm5
+
+// CHECK: 	pavgb	3735928559(%ebx,%ecx,8), %mm3
+        	pavgb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pavgb	69, %mm3
+        	pavgb	0x45,%mm3
+
+// CHECK: 	pavgb	32493, %mm3
+        	pavgb	0x7eed,%mm3
+
+// CHECK: 	pavgb	3133065982, %mm3
+        	pavgb	0xbabecafe,%mm3
+
+// CHECK: 	pavgb	305419896, %mm3
+        	pavgb	0x12345678,%mm3
+
+// CHECK: 	pavgb	%mm3, %mm3
+        	pavgb	%mm3,%mm3
+
+// CHECK: 	pavgb	3735928559(%ebx,%ecx,8), %xmm5
+        	pavgb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pavgb	69, %xmm5
+        	pavgb	0x45,%xmm5
+
+// CHECK: 	pavgb	32493, %xmm5
+        	pavgb	0x7eed,%xmm5
+
+// CHECK: 	pavgb	3133065982, %xmm5
+        	pavgb	0xbabecafe,%xmm5
+
+// CHECK: 	pavgb	305419896, %xmm5
+        	pavgb	0x12345678,%xmm5
+
+// CHECK: 	pavgb	%xmm5, %xmm5
+        	pavgb	%xmm5,%xmm5
+
+// CHECK: 	pavgw	3735928559(%ebx,%ecx,8), %mm3
+        	pavgw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pavgw	69, %mm3
+        	pavgw	0x45,%mm3
+
+// CHECK: 	pavgw	32493, %mm3
+        	pavgw	0x7eed,%mm3
+
+// CHECK: 	pavgw	3133065982, %mm3
+        	pavgw	0xbabecafe,%mm3
+
+// CHECK: 	pavgw	305419896, %mm3
+        	pavgw	0x12345678,%mm3
+
+// CHECK: 	pavgw	%mm3, %mm3
+        	pavgw	%mm3,%mm3
+
+// CHECK: 	pavgw	3735928559(%ebx,%ecx,8), %xmm5
+        	pavgw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pavgw	69, %xmm5
+        	pavgw	0x45,%xmm5
+
+// CHECK: 	pavgw	32493, %xmm5
+        	pavgw	0x7eed,%xmm5
+
+// CHECK: 	pavgw	3133065982, %xmm5
+        	pavgw	0xbabecafe,%xmm5
+
+// CHECK: 	pavgw	305419896, %xmm5
+        	pavgw	0x12345678,%xmm5
+
+// CHECK: 	pavgw	%xmm5, %xmm5
+        	pavgw	%xmm5,%xmm5
+
+// CHECK: 	pmaxsw	3735928559(%ebx,%ecx,8), %mm3
+        	pmaxsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmaxsw	69, %mm3
+        	pmaxsw	0x45,%mm3
+
+// CHECK: 	pmaxsw	32493, %mm3
+        	pmaxsw	0x7eed,%mm3
+
+// CHECK: 	pmaxsw	3133065982, %mm3
+        	pmaxsw	0xbabecafe,%mm3
+
+// CHECK: 	pmaxsw	305419896, %mm3
+        	pmaxsw	0x12345678,%mm3
+
+// CHECK: 	pmaxsw	%mm3, %mm3
+        	pmaxsw	%mm3,%mm3
+
+// CHECK: 	pmaxsw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmaxsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmaxsw	69, %xmm5
+        	pmaxsw	0x45,%xmm5
+
+// CHECK: 	pmaxsw	32493, %xmm5
+        	pmaxsw	0x7eed,%xmm5
+
+// CHECK: 	pmaxsw	3133065982, %xmm5
+        	pmaxsw	0xbabecafe,%xmm5
+
+// CHECK: 	pmaxsw	305419896, %xmm5
+        	pmaxsw	0x12345678,%xmm5
+
+// CHECK: 	pmaxsw	%xmm5, %xmm5
+        	pmaxsw	%xmm5,%xmm5
+
+// CHECK: 	pmaxub	3735928559(%ebx,%ecx,8), %mm3
+        	pmaxub	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmaxub	69, %mm3
+        	pmaxub	0x45,%mm3
+
+// CHECK: 	pmaxub	32493, %mm3
+        	pmaxub	0x7eed,%mm3
+
+// CHECK: 	pmaxub	3133065982, %mm3
+        	pmaxub	0xbabecafe,%mm3
+
+// CHECK: 	pmaxub	305419896, %mm3
+        	pmaxub	0x12345678,%mm3
+
+// CHECK: 	pmaxub	%mm3, %mm3
+        	pmaxub	%mm3,%mm3
+
+// CHECK: 	pmaxub	3735928559(%ebx,%ecx,8), %xmm5
+        	pmaxub	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmaxub	69, %xmm5
+        	pmaxub	0x45,%xmm5
+
+// CHECK: 	pmaxub	32493, %xmm5
+        	pmaxub	0x7eed,%xmm5
+
+// CHECK: 	pmaxub	3133065982, %xmm5
+        	pmaxub	0xbabecafe,%xmm5
+
+// CHECK: 	pmaxub	305419896, %xmm5
+        	pmaxub	0x12345678,%xmm5
+
+// CHECK: 	pmaxub	%xmm5, %xmm5
+        	pmaxub	%xmm5,%xmm5
+
+// CHECK: 	pminsw	3735928559(%ebx,%ecx,8), %mm3
+        	pminsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pminsw	69, %mm3
+        	pminsw	0x45,%mm3
+
+// CHECK: 	pminsw	32493, %mm3
+        	pminsw	0x7eed,%mm3
+
+// CHECK: 	pminsw	3133065982, %mm3
+        	pminsw	0xbabecafe,%mm3
+
+// CHECK: 	pminsw	305419896, %mm3
+        	pminsw	0x12345678,%mm3
+
+// CHECK: 	pminsw	%mm3, %mm3
+        	pminsw	%mm3,%mm3
+
+// CHECK: 	pminsw	3735928559(%ebx,%ecx,8), %xmm5
+        	pminsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pminsw	69, %xmm5
+        	pminsw	0x45,%xmm5
+
+// CHECK: 	pminsw	32493, %xmm5
+        	pminsw	0x7eed,%xmm5
+
+// CHECK: 	pminsw	3133065982, %xmm5
+        	pminsw	0xbabecafe,%xmm5
+
+// CHECK: 	pminsw	305419896, %xmm5
+        	pminsw	0x12345678,%xmm5
+
+// CHECK: 	pminsw	%xmm5, %xmm5
+        	pminsw	%xmm5,%xmm5
+
+// CHECK: 	pminub	3735928559(%ebx,%ecx,8), %mm3
+        	pminub	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pminub	69, %mm3
+        	pminub	0x45,%mm3
+
+// CHECK: 	pminub	32493, %mm3
+        	pminub	0x7eed,%mm3
+
+// CHECK: 	pminub	3133065982, %mm3
+        	pminub	0xbabecafe,%mm3
+
+// CHECK: 	pminub	305419896, %mm3
+        	pminub	0x12345678,%mm3
+
+// CHECK: 	pminub	%mm3, %mm3
+        	pminub	%mm3,%mm3
+
+// CHECK: 	pminub	3735928559(%ebx,%ecx,8), %xmm5
+        	pminub	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pminub	69, %xmm5
+        	pminub	0x45,%xmm5
+
+// CHECK: 	pminub	32493, %xmm5
+        	pminub	0x7eed,%xmm5
+
+// CHECK: 	pminub	3133065982, %xmm5
+        	pminub	0xbabecafe,%xmm5
+
+// CHECK: 	pminub	305419896, %xmm5
+        	pminub	0x12345678,%xmm5
+
+// CHECK: 	pminub	%xmm5, %xmm5
+        	pminub	%xmm5,%xmm5
+
+// CHECK: 	pmovmskb	%mm3, %ecx
+        	pmovmskb	%mm3,%ecx
+
+// CHECK: 	pmovmskb	%xmm5, %ecx
+        	pmovmskb	%xmm5,%ecx
+
+// CHECK: 	pmulhuw	3735928559(%ebx,%ecx,8), %mm3
+        	pmulhuw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmulhuw	69, %mm3
+        	pmulhuw	0x45,%mm3
+
+// CHECK: 	pmulhuw	32493, %mm3
+        	pmulhuw	0x7eed,%mm3
+
+// CHECK: 	pmulhuw	3133065982, %mm3
+        	pmulhuw	0xbabecafe,%mm3
+
+// CHECK: 	pmulhuw	305419896, %mm3
+        	pmulhuw	0x12345678,%mm3
+
+// CHECK: 	pmulhuw	%mm3, %mm3
+        	pmulhuw	%mm3,%mm3
+
+// CHECK: 	pmulhuw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmulhuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmulhuw	69, %xmm5
+        	pmulhuw	0x45,%xmm5
+
+// CHECK: 	pmulhuw	32493, %xmm5
+        	pmulhuw	0x7eed,%xmm5
+
+// CHECK: 	pmulhuw	3133065982, %xmm5
+        	pmulhuw	0xbabecafe,%xmm5
+
+// CHECK: 	pmulhuw	305419896, %xmm5
+        	pmulhuw	0x12345678,%xmm5
+
+// CHECK: 	pmulhuw	%xmm5, %xmm5
+        	pmulhuw	%xmm5,%xmm5
+
+// CHECK: 	prefetchnta	3735928559(%ebx,%ecx,8)
+        	prefetchnta	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	prefetchnta	32493
+        	prefetchnta	0x7eed
+
+// CHECK: 	prefetchnta	3133065982
+        	prefetchnta	0xbabecafe
+
+// CHECK: 	prefetchnta	305419896
+        	prefetchnta	0x12345678
+
+// CHECK: 	prefetcht0	3735928559(%ebx,%ecx,8)
+        	prefetcht0	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	prefetcht0	32493
+        	prefetcht0	0x7eed
+
+// CHECK: 	prefetcht0	3133065982
+        	prefetcht0	0xbabecafe
+
+// CHECK: 	prefetcht0	305419896
+        	prefetcht0	0x12345678
+
+// CHECK: 	prefetcht1	3735928559(%ebx,%ecx,8)
+        	prefetcht1	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	prefetcht1	32493
+        	prefetcht1	0x7eed
+
+// CHECK: 	prefetcht1	3133065982
+        	prefetcht1	0xbabecafe
+
+// CHECK: 	prefetcht1	305419896
+        	prefetcht1	0x12345678
+
+// CHECK: 	prefetcht2	3735928559(%ebx,%ecx,8)
+        	prefetcht2	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	prefetcht2	32493
+        	prefetcht2	0x7eed
+
+// CHECK: 	prefetcht2	3133065982
+        	prefetcht2	0xbabecafe
+
+// CHECK: 	prefetcht2	305419896
+        	prefetcht2	0x12345678
+
+// CHECK: 	psadbw	3735928559(%ebx,%ecx,8), %mm3
+        	psadbw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psadbw	69, %mm3
+        	psadbw	0x45,%mm3
+
+// CHECK: 	psadbw	32493, %mm3
+        	psadbw	0x7eed,%mm3
+
+// CHECK: 	psadbw	3133065982, %mm3
+        	psadbw	0xbabecafe,%mm3
+
+// CHECK: 	psadbw	305419896, %mm3
+        	psadbw	0x12345678,%mm3
+
+// CHECK: 	psadbw	%mm3, %mm3
+        	psadbw	%mm3,%mm3
+
+// CHECK: 	psadbw	3735928559(%ebx,%ecx,8), %xmm5
+        	psadbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psadbw	69, %xmm5
+        	psadbw	0x45,%xmm5
+
+// CHECK: 	psadbw	32493, %xmm5
+        	psadbw	0x7eed,%xmm5
+
+// CHECK: 	psadbw	3133065982, %xmm5
+        	psadbw	0xbabecafe,%xmm5
+
+// CHECK: 	psadbw	305419896, %xmm5
+        	psadbw	0x12345678,%xmm5
+
+// CHECK: 	psadbw	%xmm5, %xmm5
+        	psadbw	%xmm5,%xmm5
+
+// CHECK: 	rcpps	3735928559(%ebx,%ecx,8), %xmm5
+        	rcpps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	rcpps	69, %xmm5
+        	rcpps	0x45,%xmm5
+
+// CHECK: 	rcpps	32493, %xmm5
+        	rcpps	0x7eed,%xmm5
+
+// CHECK: 	rcpps	3133065982, %xmm5
+        	rcpps	0xbabecafe,%xmm5
+
+// CHECK: 	rcpps	305419896, %xmm5
+        	rcpps	0x12345678,%xmm5
+
+// CHECK: 	rcpps	%xmm5, %xmm5
+        	rcpps	%xmm5,%xmm5
+
+// CHECK: 	rcpss	3735928559(%ebx,%ecx,8), %xmm5
+        	rcpss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	rcpss	69, %xmm5
+        	rcpss	0x45,%xmm5
+
+// CHECK: 	rcpss	32493, %xmm5
+        	rcpss	0x7eed,%xmm5
+
+// CHECK: 	rcpss	3133065982, %xmm5
+        	rcpss	0xbabecafe,%xmm5
+
+// CHECK: 	rcpss	305419896, %xmm5
+        	rcpss	0x12345678,%xmm5
+
+// CHECK: 	rcpss	%xmm5, %xmm5
+        	rcpss	%xmm5,%xmm5
+
+// CHECK: 	rsqrtps	3735928559(%ebx,%ecx,8), %xmm5
+        	rsqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	rsqrtps	69, %xmm5
+        	rsqrtps	0x45,%xmm5
+
+// CHECK: 	rsqrtps	32493, %xmm5
+        	rsqrtps	0x7eed,%xmm5
+
+// CHECK: 	rsqrtps	3133065982, %xmm5
+        	rsqrtps	0xbabecafe,%xmm5
+
+// CHECK: 	rsqrtps	305419896, %xmm5
+        	rsqrtps	0x12345678,%xmm5
+
+// CHECK: 	rsqrtps	%xmm5, %xmm5
+        	rsqrtps	%xmm5,%xmm5
+
+// CHECK: 	rsqrtss	3735928559(%ebx,%ecx,8), %xmm5
+        	rsqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	rsqrtss	69, %xmm5
+        	rsqrtss	0x45,%xmm5
+
+// CHECK: 	rsqrtss	32493, %xmm5
+        	rsqrtss	0x7eed,%xmm5
+
+// CHECK: 	rsqrtss	3133065982, %xmm5
+        	rsqrtss	0xbabecafe,%xmm5
+
+// CHECK: 	rsqrtss	305419896, %xmm5
+        	rsqrtss	0x12345678,%xmm5
+
+// CHECK: 	rsqrtss	%xmm5, %xmm5
+        	rsqrtss	%xmm5,%xmm5
+
+// CHECK: 	sfence
+        	sfence
+
+// CHECK: 	sqrtps	3735928559(%ebx,%ecx,8), %xmm5
+        	sqrtps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	sqrtps	69, %xmm5
+        	sqrtps	0x45,%xmm5
+
+// CHECK: 	sqrtps	32493, %xmm5
+        	sqrtps	0x7eed,%xmm5
+
+// CHECK: 	sqrtps	3133065982, %xmm5
+        	sqrtps	0xbabecafe,%xmm5
+
+// CHECK: 	sqrtps	305419896, %xmm5
+        	sqrtps	0x12345678,%xmm5
+
+// CHECK: 	sqrtps	%xmm5, %xmm5
+        	sqrtps	%xmm5,%xmm5
+
+// CHECK: 	sqrtss	3735928559(%ebx,%ecx,8), %xmm5
+        	sqrtss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	sqrtss	69, %xmm5
+        	sqrtss	0x45,%xmm5
+
+// CHECK: 	sqrtss	32493, %xmm5
+        	sqrtss	0x7eed,%xmm5
+
+// CHECK: 	sqrtss	3133065982, %xmm5
+        	sqrtss	0xbabecafe,%xmm5
+
+// CHECK: 	sqrtss	305419896, %xmm5
+        	sqrtss	0x12345678,%xmm5
+
+// CHECK: 	sqrtss	%xmm5, %xmm5
+        	sqrtss	%xmm5,%xmm5
+
+// CHECK: 	stmxcsr	3735928559(%ebx,%ecx,8)
+        	stmxcsr	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	stmxcsr	32493
+        	stmxcsr	0x7eed
+
+// CHECK: 	stmxcsr	3133065982
+        	stmxcsr	0xbabecafe
+
+// CHECK: 	stmxcsr	305419896
+        	stmxcsr	0x12345678
+
+// CHECK: 	subps	3735928559(%ebx,%ecx,8), %xmm5
+        	subps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	subps	69, %xmm5
+        	subps	0x45,%xmm5
+
+// CHECK: 	subps	32493, %xmm5
+        	subps	0x7eed,%xmm5
+
+// CHECK: 	subps	3133065982, %xmm5
+        	subps	0xbabecafe,%xmm5
+
+// CHECK: 	subps	305419896, %xmm5
+        	subps	0x12345678,%xmm5
+
+// CHECK: 	subps	%xmm5, %xmm5
+        	subps	%xmm5,%xmm5
+
+// CHECK: 	subss	3735928559(%ebx,%ecx,8), %xmm5
+        	subss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	subss	69, %xmm5
+        	subss	0x45,%xmm5
+
+// CHECK: 	subss	32493, %xmm5
+        	subss	0x7eed,%xmm5
+
+// CHECK: 	subss	3133065982, %xmm5
+        	subss	0xbabecafe,%xmm5
+
+// CHECK: 	subss	305419896, %xmm5
+        	subss	0x12345678,%xmm5
+
+// CHECK: 	subss	%xmm5, %xmm5
+        	subss	%xmm5,%xmm5
+
+// CHECK: 	ucomiss	3735928559(%ebx,%ecx,8), %xmm5
+        	ucomiss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	ucomiss	69, %xmm5
+        	ucomiss	0x45,%xmm5
+
+// CHECK: 	ucomiss	32493, %xmm5
+        	ucomiss	0x7eed,%xmm5
+
+// CHECK: 	ucomiss	3133065982, %xmm5
+        	ucomiss	0xbabecafe,%xmm5
+
+// CHECK: 	ucomiss	305419896, %xmm5
+        	ucomiss	0x12345678,%xmm5
+
+// CHECK: 	ucomiss	%xmm5, %xmm5
+        	ucomiss	%xmm5,%xmm5
+
+// CHECK: 	unpckhps	3735928559(%ebx,%ecx,8), %xmm5
+        	unpckhps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	unpckhps	69, %xmm5
+        	unpckhps	0x45,%xmm5
+
+// CHECK: 	unpckhps	32493, %xmm5
+        	unpckhps	0x7eed,%xmm5
+
+// CHECK: 	unpckhps	3133065982, %xmm5
+        	unpckhps	0xbabecafe,%xmm5
+
+// CHECK: 	unpckhps	305419896, %xmm5
+        	unpckhps	0x12345678,%xmm5
+
+// CHECK: 	unpckhps	%xmm5, %xmm5
+        	unpckhps	%xmm5,%xmm5
+
+// CHECK: 	unpcklps	3735928559(%ebx,%ecx,8), %xmm5
+        	unpcklps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	unpcklps	69, %xmm5
+        	unpcklps	0x45,%xmm5
+
+// CHECK: 	unpcklps	32493, %xmm5
+        	unpcklps	0x7eed,%xmm5
+
+// CHECK: 	unpcklps	3133065982, %xmm5
+        	unpcklps	0xbabecafe,%xmm5
+
+// CHECK: 	unpcklps	305419896, %xmm5
+        	unpcklps	0x12345678,%xmm5
+
+// CHECK: 	unpcklps	%xmm5, %xmm5
+        	unpcklps	%xmm5,%xmm5
+
+// CHECK: 	xorps	3735928559(%ebx,%ecx,8), %xmm5
+        	xorps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	xorps	69, %xmm5
+        	xorps	0x45,%xmm5
+
+// CHECK: 	xorps	32493, %xmm5
+        	xorps	0x7eed,%xmm5
+
+// CHECK: 	xorps	3133065982, %xmm5
+        	xorps	0xbabecafe,%xmm5
+
+// CHECK: 	xorps	305419896, %xmm5
+        	xorps	0x12345678,%xmm5
+
+// CHECK: 	xorps	%xmm5, %xmm5
+        	xorps	%xmm5,%xmm5
+
+// CHECK: 	addpd	3735928559(%ebx,%ecx,8), %xmm5
+        	addpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	addpd	69, %xmm5
+        	addpd	0x45,%xmm5
+
+// CHECK: 	addpd	32493, %xmm5
+        	addpd	0x7eed,%xmm5
+
+// CHECK: 	addpd	3133065982, %xmm5
+        	addpd	0xbabecafe,%xmm5
+
+// CHECK: 	addpd	305419896, %xmm5
+        	addpd	0x12345678,%xmm5
+
+// CHECK: 	addpd	%xmm5, %xmm5
+        	addpd	%xmm5,%xmm5
+
+// CHECK: 	addsd	3735928559(%ebx,%ecx,8), %xmm5
+        	addsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	addsd	69, %xmm5
+        	addsd	0x45,%xmm5
+
+// CHECK: 	addsd	32493, %xmm5
+        	addsd	0x7eed,%xmm5
+
+// CHECK: 	addsd	3133065982, %xmm5
+        	addsd	0xbabecafe,%xmm5
+
+// CHECK: 	addsd	305419896, %xmm5
+        	addsd	0x12345678,%xmm5
+
+// CHECK: 	addsd	%xmm5, %xmm5
+        	addsd	%xmm5,%xmm5
+
+// CHECK: 	andnpd	3735928559(%ebx,%ecx,8), %xmm5
+        	andnpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	andnpd	69, %xmm5
+        	andnpd	0x45,%xmm5
+
+// CHECK: 	andnpd	32493, %xmm5
+        	andnpd	0x7eed,%xmm5
+
+// CHECK: 	andnpd	3133065982, %xmm5
+        	andnpd	0xbabecafe,%xmm5
+
+// CHECK: 	andnpd	305419896, %xmm5
+        	andnpd	0x12345678,%xmm5
+
+// CHECK: 	andnpd	%xmm5, %xmm5
+        	andnpd	%xmm5,%xmm5
+
+// CHECK: 	andpd	3735928559(%ebx,%ecx,8), %xmm5
+        	andpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	andpd	69, %xmm5
+        	andpd	0x45,%xmm5
+
+// CHECK: 	andpd	32493, %xmm5
+        	andpd	0x7eed,%xmm5
+
+// CHECK: 	andpd	3133065982, %xmm5
+        	andpd	0xbabecafe,%xmm5
+
+// CHECK: 	andpd	305419896, %xmm5
+        	andpd	0x12345678,%xmm5
+
+// CHECK: 	andpd	%xmm5, %xmm5
+        	andpd	%xmm5,%xmm5
+
+// CHECK: 	comisd	3735928559(%ebx,%ecx,8), %xmm5
+        	comisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	comisd	69, %xmm5
+        	comisd	0x45,%xmm5
+
+// CHECK: 	comisd	32493, %xmm5
+        	comisd	0x7eed,%xmm5
+
+// CHECK: 	comisd	3133065982, %xmm5
+        	comisd	0xbabecafe,%xmm5
+
+// CHECK: 	comisd	305419896, %xmm5
+        	comisd	0x12345678,%xmm5
+
+// CHECK: 	comisd	%xmm5, %xmm5
+        	comisd	%xmm5,%xmm5
+
+// CHECK: 	cvtpi2pd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtpi2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtpi2pd	69, %xmm5
+        	cvtpi2pd	0x45,%xmm5
+
+// CHECK: 	cvtpi2pd	32493, %xmm5
+        	cvtpi2pd	0x7eed,%xmm5
+
+// CHECK: 	cvtpi2pd	3133065982, %xmm5
+        	cvtpi2pd	0xbabecafe,%xmm5
+
+// CHECK: 	cvtpi2pd	305419896, %xmm5
+        	cvtpi2pd	0x12345678,%xmm5
+
+// CHECK: 	cvtpi2pd	%mm3, %xmm5
+        	cvtpi2pd	%mm3,%xmm5
+
+// CHECK: 	cvtsi2sd	%ecx, %xmm5
+        	cvtsi2sd	%ecx,%xmm5
+
+// CHECK: 	cvtsi2sd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtsi2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtsi2sd	69, %xmm5
+        	cvtsi2sd	0x45,%xmm5
+
+// CHECK: 	cvtsi2sd	32493, %xmm5
+        	cvtsi2sd	0x7eed,%xmm5
+
+// CHECK: 	cvtsi2sd	3133065982, %xmm5
+        	cvtsi2sd	0xbabecafe,%xmm5
+
+// CHECK: 	cvtsi2sd	305419896, %xmm5
+        	cvtsi2sd	0x12345678,%xmm5
+
+// CHECK: 	divpd	3735928559(%ebx,%ecx,8), %xmm5
+        	divpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	divpd	69, %xmm5
+        	divpd	0x45,%xmm5
+
+// CHECK: 	divpd	32493, %xmm5
+        	divpd	0x7eed,%xmm5
+
+// CHECK: 	divpd	3133065982, %xmm5
+        	divpd	0xbabecafe,%xmm5
+
+// CHECK: 	divpd	305419896, %xmm5
+        	divpd	0x12345678,%xmm5
+
+// CHECK: 	divpd	%xmm5, %xmm5
+        	divpd	%xmm5,%xmm5
+
+// CHECK: 	divsd	3735928559(%ebx,%ecx,8), %xmm5
+        	divsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	divsd	69, %xmm5
+        	divsd	0x45,%xmm5
+
+// CHECK: 	divsd	32493, %xmm5
+        	divsd	0x7eed,%xmm5
+
+// CHECK: 	divsd	3133065982, %xmm5
+        	divsd	0xbabecafe,%xmm5
+
+// CHECK: 	divsd	305419896, %xmm5
+        	divsd	0x12345678,%xmm5
+
+// CHECK: 	divsd	%xmm5, %xmm5
+        	divsd	%xmm5,%xmm5
+
+// CHECK: 	maxpd	3735928559(%ebx,%ecx,8), %xmm5
+        	maxpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	maxpd	69, %xmm5
+        	maxpd	0x45,%xmm5
+
+// CHECK: 	maxpd	32493, %xmm5
+        	maxpd	0x7eed,%xmm5
+
+// CHECK: 	maxpd	3133065982, %xmm5
+        	maxpd	0xbabecafe,%xmm5
+
+// CHECK: 	maxpd	305419896, %xmm5
+        	maxpd	0x12345678,%xmm5
+
+// CHECK: 	maxpd	%xmm5, %xmm5
+        	maxpd	%xmm5,%xmm5
+
+// CHECK: 	maxsd	3735928559(%ebx,%ecx,8), %xmm5
+        	maxsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	maxsd	69, %xmm5
+        	maxsd	0x45,%xmm5
+
+// CHECK: 	maxsd	32493, %xmm5
+        	maxsd	0x7eed,%xmm5
+
+// CHECK: 	maxsd	3133065982, %xmm5
+        	maxsd	0xbabecafe,%xmm5
+
+// CHECK: 	maxsd	305419896, %xmm5
+        	maxsd	0x12345678,%xmm5
+
+// CHECK: 	maxsd	%xmm5, %xmm5
+        	maxsd	%xmm5,%xmm5
+
+// CHECK: 	minpd	3735928559(%ebx,%ecx,8), %xmm5
+        	minpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	minpd	69, %xmm5
+        	minpd	0x45,%xmm5
+
+// CHECK: 	minpd	32493, %xmm5
+        	minpd	0x7eed,%xmm5
+
+// CHECK: 	minpd	3133065982, %xmm5
+        	minpd	0xbabecafe,%xmm5
+
+// CHECK: 	minpd	305419896, %xmm5
+        	minpd	0x12345678,%xmm5
+
+// CHECK: 	minpd	%xmm5, %xmm5
+        	minpd	%xmm5,%xmm5
+
+// CHECK: 	minsd	3735928559(%ebx,%ecx,8), %xmm5
+        	minsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	minsd	69, %xmm5
+        	minsd	0x45,%xmm5
+
+// CHECK: 	minsd	32493, %xmm5
+        	minsd	0x7eed,%xmm5
+
+// CHECK: 	minsd	3133065982, %xmm5
+        	minsd	0xbabecafe,%xmm5
+
+// CHECK: 	minsd	305419896, %xmm5
+        	minsd	0x12345678,%xmm5
+
+// CHECK: 	minsd	%xmm5, %xmm5
+        	minsd	%xmm5,%xmm5
+
+// CHECK: 	movapd	3735928559(%ebx,%ecx,8), %xmm5
+        	movapd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movapd	69, %xmm5
+        	movapd	0x45,%xmm5
+
+// CHECK: 	movapd	32493, %xmm5
+        	movapd	0x7eed,%xmm5
+
+// CHECK: 	movapd	3133065982, %xmm5
+        	movapd	0xbabecafe,%xmm5
+
+// CHECK: 	movapd	305419896, %xmm5
+        	movapd	0x12345678,%xmm5
+
+// CHECK: 	movapd	%xmm5, %xmm5
+        	movapd	%xmm5,%xmm5
+
+// CHECK: 	movapd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movapd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movapd	%xmm5, 69
+        	movapd	%xmm5,0x45
+
+// CHECK: 	movapd	%xmm5, 32493
+        	movapd	%xmm5,0x7eed
+
+// CHECK: 	movapd	%xmm5, 3133065982
+        	movapd	%xmm5,0xbabecafe
+
+// CHECK: 	movapd	%xmm5, 305419896
+        	movapd	%xmm5,0x12345678
+
+// CHECK: 	movapd	%xmm5, %xmm5
+        	movapd	%xmm5,%xmm5
+
+// CHECK: 	movhpd	3735928559(%ebx,%ecx,8), %xmm5
+        	movhpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movhpd	69, %xmm5
+        	movhpd	0x45,%xmm5
+
+// CHECK: 	movhpd	32493, %xmm5
+        	movhpd	0x7eed,%xmm5
+
+// CHECK: 	movhpd	3133065982, %xmm5
+        	movhpd	0xbabecafe,%xmm5
+
+// CHECK: 	movhpd	305419896, %xmm5
+        	movhpd	0x12345678,%xmm5
+
+// CHECK: 	movhpd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movhpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movhpd	%xmm5, 69
+        	movhpd	%xmm5,0x45
+
+// CHECK: 	movhpd	%xmm5, 32493
+        	movhpd	%xmm5,0x7eed
+
+// CHECK: 	movhpd	%xmm5, 3133065982
+        	movhpd	%xmm5,0xbabecafe
+
+// CHECK: 	movhpd	%xmm5, 305419896
+        	movhpd	%xmm5,0x12345678
+
+// CHECK: 	movlpd	3735928559(%ebx,%ecx,8), %xmm5
+        	movlpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movlpd	69, %xmm5
+        	movlpd	0x45,%xmm5
+
+// CHECK: 	movlpd	32493, %xmm5
+        	movlpd	0x7eed,%xmm5
+
+// CHECK: 	movlpd	3133065982, %xmm5
+        	movlpd	0xbabecafe,%xmm5
+
+// CHECK: 	movlpd	305419896, %xmm5
+        	movlpd	0x12345678,%xmm5
+
+// CHECK: 	movlpd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movlpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movlpd	%xmm5, 69
+        	movlpd	%xmm5,0x45
+
+// CHECK: 	movlpd	%xmm5, 32493
+        	movlpd	%xmm5,0x7eed
+
+// CHECK: 	movlpd	%xmm5, 3133065982
+        	movlpd	%xmm5,0xbabecafe
+
+// CHECK: 	movlpd	%xmm5, 305419896
+        	movlpd	%xmm5,0x12345678
+
+// CHECK: 	movmskpd	%xmm5, %ecx
+        	movmskpd	%xmm5,%ecx
+
+// CHECK: 	movntpd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movntpd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movntpd	%xmm5, 69
+        	movntpd	%xmm5,0x45
+
+// CHECK: 	movntpd	%xmm5, 32493
+        	movntpd	%xmm5,0x7eed
+
+// CHECK: 	movntpd	%xmm5, 3133065982
+        	movntpd	%xmm5,0xbabecafe
+
+// CHECK: 	movntpd	%xmm5, 305419896
+        	movntpd	%xmm5,0x12345678
+
+// CHECK: 	movsd	3735928559(%ebx,%ecx,8), %xmm5
+        	movsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movsd	69, %xmm5
+        	movsd	0x45,%xmm5
+
+// CHECK: 	movsd	32493, %xmm5
+        	movsd	0x7eed,%xmm5
+
+// CHECK: 	movsd	3133065982, %xmm5
+        	movsd	0xbabecafe,%xmm5
+
+// CHECK: 	movsd	305419896, %xmm5
+        	movsd	0x12345678,%xmm5
+
+// CHECK: 	movsd	%xmm5, %xmm5
+        	movsd	%xmm5,%xmm5
+
+// CHECK: 	movsd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movsd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movsd	%xmm5, 69
+        	movsd	%xmm5,0x45
+
+// CHECK: 	movsd	%xmm5, 32493
+        	movsd	%xmm5,0x7eed
+
+// CHECK: 	movsd	%xmm5, 3133065982
+        	movsd	%xmm5,0xbabecafe
+
+// CHECK: 	movsd	%xmm5, 305419896
+        	movsd	%xmm5,0x12345678
+
+// CHECK: 	movsd	%xmm5, %xmm5
+        	movsd	%xmm5,%xmm5
+
+// CHECK: 	movupd	3735928559(%ebx,%ecx,8), %xmm5
+        	movupd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movupd	69, %xmm5
+        	movupd	0x45,%xmm5
+
+// CHECK: 	movupd	32493, %xmm5
+        	movupd	0x7eed,%xmm5
+
+// CHECK: 	movupd	3133065982, %xmm5
+        	movupd	0xbabecafe,%xmm5
+
+// CHECK: 	movupd	305419896, %xmm5
+        	movupd	0x12345678,%xmm5
+
+// CHECK: 	movupd	%xmm5, %xmm5
+        	movupd	%xmm5,%xmm5
+
+// CHECK: 	movupd	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movupd	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movupd	%xmm5, 69
+        	movupd	%xmm5,0x45
+
+// CHECK: 	movupd	%xmm5, 32493
+        	movupd	%xmm5,0x7eed
+
+// CHECK: 	movupd	%xmm5, 3133065982
+        	movupd	%xmm5,0xbabecafe
+
+// CHECK: 	movupd	%xmm5, 305419896
+        	movupd	%xmm5,0x12345678
+
+// CHECK: 	movupd	%xmm5, %xmm5
+        	movupd	%xmm5,%xmm5
+
+// CHECK: 	mulpd	3735928559(%ebx,%ecx,8), %xmm5
+        	mulpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	mulpd	69, %xmm5
+        	mulpd	0x45,%xmm5
+
+// CHECK: 	mulpd	32493, %xmm5
+        	mulpd	0x7eed,%xmm5
+
+// CHECK: 	mulpd	3133065982, %xmm5
+        	mulpd	0xbabecafe,%xmm5
+
+// CHECK: 	mulpd	305419896, %xmm5
+        	mulpd	0x12345678,%xmm5
+
+// CHECK: 	mulpd	%xmm5, %xmm5
+        	mulpd	%xmm5,%xmm5
+
+// CHECK: 	mulsd	3735928559(%ebx,%ecx,8), %xmm5
+        	mulsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	mulsd	69, %xmm5
+        	mulsd	0x45,%xmm5
+
+// CHECK: 	mulsd	32493, %xmm5
+        	mulsd	0x7eed,%xmm5
+
+// CHECK: 	mulsd	3133065982, %xmm5
+        	mulsd	0xbabecafe,%xmm5
+
+// CHECK: 	mulsd	305419896, %xmm5
+        	mulsd	0x12345678,%xmm5
+
+// CHECK: 	mulsd	%xmm5, %xmm5
+        	mulsd	%xmm5,%xmm5
+
+// CHECK: 	orpd	3735928559(%ebx,%ecx,8), %xmm5
+        	orpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	orpd	69, %xmm5
+        	orpd	0x45,%xmm5
+
+// CHECK: 	orpd	32493, %xmm5
+        	orpd	0x7eed,%xmm5
+
+// CHECK: 	orpd	3133065982, %xmm5
+        	orpd	0xbabecafe,%xmm5
+
+// CHECK: 	orpd	305419896, %xmm5
+        	orpd	0x12345678,%xmm5
+
+// CHECK: 	orpd	%xmm5, %xmm5
+        	orpd	%xmm5,%xmm5
+
+// CHECK: 	sqrtpd	3735928559(%ebx,%ecx,8), %xmm5
+        	sqrtpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	sqrtpd	69, %xmm5
+        	sqrtpd	0x45,%xmm5
+
+// CHECK: 	sqrtpd	32493, %xmm5
+        	sqrtpd	0x7eed,%xmm5
+
+// CHECK: 	sqrtpd	3133065982, %xmm5
+        	sqrtpd	0xbabecafe,%xmm5
+
+// CHECK: 	sqrtpd	305419896, %xmm5
+        	sqrtpd	0x12345678,%xmm5
+
+// CHECK: 	sqrtpd	%xmm5, %xmm5
+        	sqrtpd	%xmm5,%xmm5
+
+// CHECK: 	sqrtsd	3735928559(%ebx,%ecx,8), %xmm5
+        	sqrtsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	sqrtsd	69, %xmm5
+        	sqrtsd	0x45,%xmm5
+
+// CHECK: 	sqrtsd	32493, %xmm5
+        	sqrtsd	0x7eed,%xmm5
+
+// CHECK: 	sqrtsd	3133065982, %xmm5
+        	sqrtsd	0xbabecafe,%xmm5
+
+// CHECK: 	sqrtsd	305419896, %xmm5
+        	sqrtsd	0x12345678,%xmm5
+
+// CHECK: 	sqrtsd	%xmm5, %xmm5
+        	sqrtsd	%xmm5,%xmm5
+
+// CHECK: 	subpd	3735928559(%ebx,%ecx,8), %xmm5
+        	subpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	subpd	69, %xmm5
+        	subpd	0x45,%xmm5
+
+// CHECK: 	subpd	32493, %xmm5
+        	subpd	0x7eed,%xmm5
+
+// CHECK: 	subpd	3133065982, %xmm5
+        	subpd	0xbabecafe,%xmm5
+
+// CHECK: 	subpd	305419896, %xmm5
+        	subpd	0x12345678,%xmm5
+
+// CHECK: 	subpd	%xmm5, %xmm5
+        	subpd	%xmm5,%xmm5
+
+// CHECK: 	subsd	3735928559(%ebx,%ecx,8), %xmm5
+        	subsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	subsd	69, %xmm5
+        	subsd	0x45,%xmm5
+
+// CHECK: 	subsd	32493, %xmm5
+        	subsd	0x7eed,%xmm5
+
+// CHECK: 	subsd	3133065982, %xmm5
+        	subsd	0xbabecafe,%xmm5
+
+// CHECK: 	subsd	305419896, %xmm5
+        	subsd	0x12345678,%xmm5
+
+// CHECK: 	subsd	%xmm5, %xmm5
+        	subsd	%xmm5,%xmm5
+
+// CHECK: 	ucomisd	3735928559(%ebx,%ecx,8), %xmm5
+        	ucomisd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	ucomisd	69, %xmm5
+        	ucomisd	0x45,%xmm5
+
+// CHECK: 	ucomisd	32493, %xmm5
+        	ucomisd	0x7eed,%xmm5
+
+// CHECK: 	ucomisd	3133065982, %xmm5
+        	ucomisd	0xbabecafe,%xmm5
+
+// CHECK: 	ucomisd	305419896, %xmm5
+        	ucomisd	0x12345678,%xmm5
+
+// CHECK: 	ucomisd	%xmm5, %xmm5
+        	ucomisd	%xmm5,%xmm5
+
+// CHECK: 	unpckhpd	3735928559(%ebx,%ecx,8), %xmm5
+        	unpckhpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	unpckhpd	69, %xmm5
+        	unpckhpd	0x45,%xmm5
+
+// CHECK: 	unpckhpd	32493, %xmm5
+        	unpckhpd	0x7eed,%xmm5
+
+// CHECK: 	unpckhpd	3133065982, %xmm5
+        	unpckhpd	0xbabecafe,%xmm5
+
+// CHECK: 	unpckhpd	305419896, %xmm5
+        	unpckhpd	0x12345678,%xmm5
+
+// CHECK: 	unpckhpd	%xmm5, %xmm5
+        	unpckhpd	%xmm5,%xmm5
+
+// CHECK: 	unpcklpd	3735928559(%ebx,%ecx,8), %xmm5
+        	unpcklpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	unpcklpd	69, %xmm5
+        	unpcklpd	0x45,%xmm5
+
+// CHECK: 	unpcklpd	32493, %xmm5
+        	unpcklpd	0x7eed,%xmm5
+
+// CHECK: 	unpcklpd	3133065982, %xmm5
+        	unpcklpd	0xbabecafe,%xmm5
+
+// CHECK: 	unpcklpd	305419896, %xmm5
+        	unpcklpd	0x12345678,%xmm5
+
+// CHECK: 	unpcklpd	%xmm5, %xmm5
+        	unpcklpd	%xmm5,%xmm5
+
+// CHECK: 	xorpd	3735928559(%ebx,%ecx,8), %xmm5
+        	xorpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	xorpd	69, %xmm5
+        	xorpd	0x45,%xmm5
+
+// CHECK: 	xorpd	32493, %xmm5
+        	xorpd	0x7eed,%xmm5
+
+// CHECK: 	xorpd	3133065982, %xmm5
+        	xorpd	0xbabecafe,%xmm5
+
+// CHECK: 	xorpd	305419896, %xmm5
+        	xorpd	0x12345678,%xmm5
+
+// CHECK: 	xorpd	%xmm5, %xmm5
+        	xorpd	%xmm5,%xmm5
+
+// CHECK: 	cvtdq2pd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtdq2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtdq2pd	69, %xmm5
+        	cvtdq2pd	0x45,%xmm5
+
+// CHECK: 	cvtdq2pd	32493, %xmm5
+        	cvtdq2pd	0x7eed,%xmm5
+
+// CHECK: 	cvtdq2pd	3133065982, %xmm5
+        	cvtdq2pd	0xbabecafe,%xmm5
+
+// CHECK: 	cvtdq2pd	305419896, %xmm5
+        	cvtdq2pd	0x12345678,%xmm5
+
+// CHECK: 	cvtdq2pd	%xmm5, %xmm5
+        	cvtdq2pd	%xmm5,%xmm5
+
+// CHECK: 	cvtpd2dq	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtpd2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtpd2dq	69, %xmm5
+        	cvtpd2dq	0x45,%xmm5
+
+// CHECK: 	cvtpd2dq	32493, %xmm5
+        	cvtpd2dq	0x7eed,%xmm5
+
+// CHECK: 	cvtpd2dq	3133065982, %xmm5
+        	cvtpd2dq	0xbabecafe,%xmm5
+
+// CHECK: 	cvtpd2dq	305419896, %xmm5
+        	cvtpd2dq	0x12345678,%xmm5
+
+// CHECK: 	cvtpd2dq	%xmm5, %xmm5
+        	cvtpd2dq	%xmm5,%xmm5
+
+// CHECK: 	cvtdq2ps	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtdq2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtdq2ps	69, %xmm5
+        	cvtdq2ps	0x45,%xmm5
+
+// CHECK: 	cvtdq2ps	32493, %xmm5
+        	cvtdq2ps	0x7eed,%xmm5
+
+// CHECK: 	cvtdq2ps	3133065982, %xmm5
+        	cvtdq2ps	0xbabecafe,%xmm5
+
+// CHECK: 	cvtdq2ps	305419896, %xmm5
+        	cvtdq2ps	0x12345678,%xmm5
+
+// CHECK: 	cvtdq2ps	%xmm5, %xmm5
+        	cvtdq2ps	%xmm5,%xmm5
+
+// CHECK: 	cvtpd2pi	3735928559(%ebx,%ecx,8), %mm3
+        	cvtpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	cvtpd2pi	69, %mm3
+        	cvtpd2pi	0x45,%mm3
+
+// CHECK: 	cvtpd2pi	32493, %mm3
+        	cvtpd2pi	0x7eed,%mm3
+
+// CHECK: 	cvtpd2pi	3133065982, %mm3
+        	cvtpd2pi	0xbabecafe,%mm3
+
+// CHECK: 	cvtpd2pi	305419896, %mm3
+        	cvtpd2pi	0x12345678,%mm3
+
+// CHECK: 	cvtpd2pi	%xmm5, %mm3
+        	cvtpd2pi	%xmm5,%mm3
+
+// CHECK: 	cvtpd2ps	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtpd2ps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtpd2ps	69, %xmm5
+        	cvtpd2ps	0x45,%xmm5
+
+// CHECK: 	cvtpd2ps	32493, %xmm5
+        	cvtpd2ps	0x7eed,%xmm5
+
+// CHECK: 	cvtpd2ps	3133065982, %xmm5
+        	cvtpd2ps	0xbabecafe,%xmm5
+
+// CHECK: 	cvtpd2ps	305419896, %xmm5
+        	cvtpd2ps	0x12345678,%xmm5
+
+// CHECK: 	cvtpd2ps	%xmm5, %xmm5
+        	cvtpd2ps	%xmm5,%xmm5
+
+// CHECK: 	cvtps2pd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtps2pd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtps2pd	69, %xmm5
+        	cvtps2pd	0x45,%xmm5
+
+// CHECK: 	cvtps2pd	32493, %xmm5
+        	cvtps2pd	0x7eed,%xmm5
+
+// CHECK: 	cvtps2pd	3133065982, %xmm5
+        	cvtps2pd	0xbabecafe,%xmm5
+
+// CHECK: 	cvtps2pd	305419896, %xmm5
+        	cvtps2pd	0x12345678,%xmm5
+
+// CHECK: 	cvtps2pd	%xmm5, %xmm5
+        	cvtps2pd	%xmm5,%xmm5
+
+// CHECK: 	cvtps2dq	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtps2dq	69, %xmm5
+        	cvtps2dq	0x45,%xmm5
+
+// CHECK: 	cvtps2dq	32493, %xmm5
+        	cvtps2dq	0x7eed,%xmm5
+
+// CHECK: 	cvtps2dq	3133065982, %xmm5
+        	cvtps2dq	0xbabecafe,%xmm5
+
+// CHECK: 	cvtps2dq	305419896, %xmm5
+        	cvtps2dq	0x12345678,%xmm5
+
+// CHECK: 	cvtps2dq	%xmm5, %xmm5
+        	cvtps2dq	%xmm5,%xmm5
+
+// CHECK: 	cvtsd2ss	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtsd2ss	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtsd2ss	69, %xmm5
+        	cvtsd2ss	0x45,%xmm5
+
+// CHECK: 	cvtsd2ss	32493, %xmm5
+        	cvtsd2ss	0x7eed,%xmm5
+
+// CHECK: 	cvtsd2ss	3133065982, %xmm5
+        	cvtsd2ss	0xbabecafe,%xmm5
+
+// CHECK: 	cvtsd2ss	305419896, %xmm5
+        	cvtsd2ss	0x12345678,%xmm5
+
+// CHECK: 	cvtsd2ss	%xmm5, %xmm5
+        	cvtsd2ss	%xmm5,%xmm5
+
+// CHECK: 	cvtss2sd	3735928559(%ebx,%ecx,8), %xmm5
+        	cvtss2sd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvtss2sd	69, %xmm5
+        	cvtss2sd	0x45,%xmm5
+
+// CHECK: 	cvtss2sd	32493, %xmm5
+        	cvtss2sd	0x7eed,%xmm5
+
+// CHECK: 	cvtss2sd	3133065982, %xmm5
+        	cvtss2sd	0xbabecafe,%xmm5
+
+// CHECK: 	cvtss2sd	305419896, %xmm5
+        	cvtss2sd	0x12345678,%xmm5
+
+// CHECK: 	cvtss2sd	%xmm5, %xmm5
+        	cvtss2sd	%xmm5,%xmm5
+
+// CHECK: 	cvttpd2pi	3735928559(%ebx,%ecx,8), %mm3
+        	cvttpd2pi	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	cvttpd2pi	69, %mm3
+        	cvttpd2pi	0x45,%mm3
+
+// CHECK: 	cvttpd2pi	32493, %mm3
+        	cvttpd2pi	0x7eed,%mm3
+
+// CHECK: 	cvttpd2pi	3133065982, %mm3
+        	cvttpd2pi	0xbabecafe,%mm3
+
+// CHECK: 	cvttpd2pi	305419896, %mm3
+        	cvttpd2pi	0x12345678,%mm3
+
+// CHECK: 	cvttpd2pi	%xmm5, %mm3
+        	cvttpd2pi	%xmm5,%mm3
+
+// CHECK: 	cvttsd2si	3735928559(%ebx,%ecx,8), %ecx
+        	cvttsd2si	0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	cvttsd2si	69, %ecx
+        	cvttsd2si	0x45,%ecx
+
+// CHECK: 	cvttsd2si	32493, %ecx
+        	cvttsd2si	0x7eed,%ecx
+
+// CHECK: 	cvttsd2si	3133065982, %ecx
+        	cvttsd2si	0xbabecafe,%ecx
+
+// CHECK: 	cvttsd2si	305419896, %ecx
+        	cvttsd2si	0x12345678,%ecx
+
+// CHECK: 	cvttsd2si	%xmm5, %ecx
+        	cvttsd2si	%xmm5,%ecx
+
+// CHECK: 	cvttps2dq	3735928559(%ebx,%ecx,8), %xmm5
+        	cvttps2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	cvttps2dq	69, %xmm5
+        	cvttps2dq	0x45,%xmm5
+
+// CHECK: 	cvttps2dq	32493, %xmm5
+        	cvttps2dq	0x7eed,%xmm5
+
+// CHECK: 	cvttps2dq	3133065982, %xmm5
+        	cvttps2dq	0xbabecafe,%xmm5
+
+// CHECK: 	cvttps2dq	305419896, %xmm5
+        	cvttps2dq	0x12345678,%xmm5
+
+// CHECK: 	cvttps2dq	%xmm5, %xmm5
+        	cvttps2dq	%xmm5,%xmm5
+
+// CHECK: 	maskmovdqu	%xmm5, %xmm5
+        	maskmovdqu	%xmm5,%xmm5
+
+// CHECK: 	movdqa	3735928559(%ebx,%ecx,8), %xmm5
+        	movdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movdqa	69, %xmm5
+        	movdqa	0x45,%xmm5
+
+// CHECK: 	movdqa	32493, %xmm5
+        	movdqa	0x7eed,%xmm5
+
+// CHECK: 	movdqa	3133065982, %xmm5
+        	movdqa	0xbabecafe,%xmm5
+
+// CHECK: 	movdqa	305419896, %xmm5
+        	movdqa	0x12345678,%xmm5
+
+// CHECK: 	movdqa	%xmm5, %xmm5
+        	movdqa	%xmm5,%xmm5
+
+// CHECK: 	movdqa	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movdqa	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movdqa	%xmm5, 69
+        	movdqa	%xmm5,0x45
+
+// CHECK: 	movdqa	%xmm5, 32493
+        	movdqa	%xmm5,0x7eed
+
+// CHECK: 	movdqa	%xmm5, 3133065982
+        	movdqa	%xmm5,0xbabecafe
+
+// CHECK: 	movdqa	%xmm5, 305419896
+        	movdqa	%xmm5,0x12345678
+
+// CHECK: 	movdqa	%xmm5, %xmm5
+        	movdqa	%xmm5,%xmm5
+
+// CHECK: 	movdqu	3735928559(%ebx,%ecx,8), %xmm5
+        	movdqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movdqu	69, %xmm5
+        	movdqu	0x45,%xmm5
+
+// CHECK: 	movdqu	32493, %xmm5
+        	movdqu	0x7eed,%xmm5
+
+// CHECK: 	movdqu	3133065982, %xmm5
+        	movdqu	0xbabecafe,%xmm5
+
+// CHECK: 	movdqu	305419896, %xmm5
+        	movdqu	0x12345678,%xmm5
+
+// CHECK: 	movdqu	%xmm5, 3735928559(%ebx,%ecx,8)
+        	movdqu	%xmm5,0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	movdqu	%xmm5, 69
+        	movdqu	%xmm5,0x45
+
+// CHECK: 	movdqu	%xmm5, 32493
+        	movdqu	%xmm5,0x7eed
+
+// CHECK: 	movdqu	%xmm5, 3133065982
+        	movdqu	%xmm5,0xbabecafe
+
+// CHECK: 	movdqu	%xmm5, 305419896
+        	movdqu	%xmm5,0x12345678
+
+// CHECK: 	movdq2q	%xmm5, %mm3
+        	movdq2q	%xmm5,%mm3
+
+// CHECK: 	movq2dq	%mm3, %xmm5
+        	movq2dq	%mm3,%xmm5
+
+// CHECK: 	pmuludq	3735928559(%ebx,%ecx,8), %mm3
+        	pmuludq	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmuludq	69, %mm3
+        	pmuludq	0x45,%mm3
+
+// CHECK: 	pmuludq	32493, %mm3
+        	pmuludq	0x7eed,%mm3
+
+// CHECK: 	pmuludq	3133065982, %mm3
+        	pmuludq	0xbabecafe,%mm3
+
+// CHECK: 	pmuludq	305419896, %mm3
+        	pmuludq	0x12345678,%mm3
+
+// CHECK: 	pmuludq	%mm3, %mm3
+        	pmuludq	%mm3,%mm3
+
+// CHECK: 	pmuludq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmuludq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmuludq	69, %xmm5
+        	pmuludq	0x45,%xmm5
+
+// CHECK: 	pmuludq	32493, %xmm5
+        	pmuludq	0x7eed,%xmm5
+
+// CHECK: 	pmuludq	3133065982, %xmm5
+        	pmuludq	0xbabecafe,%xmm5
+
+// CHECK: 	pmuludq	305419896, %xmm5
+        	pmuludq	0x12345678,%xmm5
+
+// CHECK: 	pmuludq	%xmm5, %xmm5
+        	pmuludq	%xmm5,%xmm5
+
+// CHECK: 	pslldq	$127, %xmm5
+        	pslldq	$0x7f,%xmm5
+
+// CHECK: 	psrldq	$127, %xmm5
+        	psrldq	$0x7f,%xmm5
+
+// CHECK: 	punpckhqdq	3735928559(%ebx,%ecx,8), %xmm5
+        	punpckhqdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	punpckhqdq	69, %xmm5
+        	punpckhqdq	0x45,%xmm5
+
+// CHECK: 	punpckhqdq	32493, %xmm5
+        	punpckhqdq	0x7eed,%xmm5
+
+// CHECK: 	punpckhqdq	3133065982, %xmm5
+        	punpckhqdq	0xbabecafe,%xmm5
+
+// CHECK: 	punpckhqdq	305419896, %xmm5
+        	punpckhqdq	0x12345678,%xmm5
+
+// CHECK: 	punpckhqdq	%xmm5, %xmm5
+        	punpckhqdq	%xmm5,%xmm5
+
+// CHECK: 	punpcklqdq	3735928559(%ebx,%ecx,8), %xmm5
+        	punpcklqdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	punpcklqdq	69, %xmm5
+        	punpcklqdq	0x45,%xmm5
+
+// CHECK: 	punpcklqdq	32493, %xmm5
+        	punpcklqdq	0x7eed,%xmm5
+
+// CHECK: 	punpcklqdq	3133065982, %xmm5
+        	punpcklqdq	0xbabecafe,%xmm5
+
+// CHECK: 	punpcklqdq	305419896, %xmm5
+        	punpcklqdq	0x12345678,%xmm5
+
+// CHECK: 	punpcklqdq	%xmm5, %xmm5
+        	punpcklqdq	%xmm5,%xmm5
+
+// CHECK: 	addsubpd	3735928559(%ebx,%ecx,8), %xmm5
+        	addsubpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	addsubpd	69, %xmm5
+        	addsubpd	0x45,%xmm5
+
+// CHECK: 	addsubpd	32493, %xmm5
+        	addsubpd	0x7eed,%xmm5
+
+// CHECK: 	addsubpd	3133065982, %xmm5
+        	addsubpd	0xbabecafe,%xmm5
+
+// CHECK: 	addsubpd	305419896, %xmm5
+        	addsubpd	0x12345678,%xmm5
+
+// CHECK: 	addsubpd	%xmm5, %xmm5
+        	addsubpd	%xmm5,%xmm5
+
+// CHECK: 	addsubps	3735928559(%ebx,%ecx,8), %xmm5
+        	addsubps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	addsubps	69, %xmm5
+        	addsubps	0x45,%xmm5
+
+// CHECK: 	addsubps	32493, %xmm5
+        	addsubps	0x7eed,%xmm5
+
+// CHECK: 	addsubps	3133065982, %xmm5
+        	addsubps	0xbabecafe,%xmm5
+
+// CHECK: 	addsubps	305419896, %xmm5
+        	addsubps	0x12345678,%xmm5
+
+// CHECK: 	addsubps	%xmm5, %xmm5
+        	addsubps	%xmm5,%xmm5
+
+// CHECK: 	fisttpl	3735928559(%ebx,%ecx,8)
+        	fisttpl	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	fisttpl	3133065982
+        	fisttpl	0xbabecafe
+
+// CHECK: 	fisttpl	305419896
+        	fisttpl	0x12345678
+
+// CHECK: 	haddpd	3735928559(%ebx,%ecx,8), %xmm5
+        	haddpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	haddpd	69, %xmm5
+        	haddpd	0x45,%xmm5
+
+// CHECK: 	haddpd	32493, %xmm5
+        	haddpd	0x7eed,%xmm5
+
+// CHECK: 	haddpd	3133065982, %xmm5
+        	haddpd	0xbabecafe,%xmm5
+
+// CHECK: 	haddpd	305419896, %xmm5
+        	haddpd	0x12345678,%xmm5
+
+// CHECK: 	haddpd	%xmm5, %xmm5
+        	haddpd	%xmm5,%xmm5
+
+// CHECK: 	haddps	3735928559(%ebx,%ecx,8), %xmm5
+        	haddps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	haddps	69, %xmm5
+        	haddps	0x45,%xmm5
+
+// CHECK: 	haddps	32493, %xmm5
+        	haddps	0x7eed,%xmm5
+
+// CHECK: 	haddps	3133065982, %xmm5
+        	haddps	0xbabecafe,%xmm5
+
+// CHECK: 	haddps	305419896, %xmm5
+        	haddps	0x12345678,%xmm5
+
+// CHECK: 	haddps	%xmm5, %xmm5
+        	haddps	%xmm5,%xmm5
+
+// CHECK: 	hsubpd	3735928559(%ebx,%ecx,8), %xmm5
+        	hsubpd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	hsubpd	69, %xmm5
+        	hsubpd	0x45,%xmm5
+
+// CHECK: 	hsubpd	32493, %xmm5
+        	hsubpd	0x7eed,%xmm5
+
+// CHECK: 	hsubpd	3133065982, %xmm5
+        	hsubpd	0xbabecafe,%xmm5
+
+// CHECK: 	hsubpd	305419896, %xmm5
+        	hsubpd	0x12345678,%xmm5
+
+// CHECK: 	hsubpd	%xmm5, %xmm5
+        	hsubpd	%xmm5,%xmm5
+
+// CHECK: 	hsubps	3735928559(%ebx,%ecx,8), %xmm5
+        	hsubps	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	hsubps	69, %xmm5
+        	hsubps	0x45,%xmm5
+
+// CHECK: 	hsubps	32493, %xmm5
+        	hsubps	0x7eed,%xmm5
+
+// CHECK: 	hsubps	3133065982, %xmm5
+        	hsubps	0xbabecafe,%xmm5
+
+// CHECK: 	hsubps	305419896, %xmm5
+        	hsubps	0x12345678,%xmm5
+
+// CHECK: 	hsubps	%xmm5, %xmm5
+        	hsubps	%xmm5,%xmm5
+
+// CHECK: 	lddqu	3735928559(%ebx,%ecx,8), %xmm5
+        	lddqu	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	lddqu	69, %xmm5
+        	lddqu	0x45,%xmm5
+
+// CHECK: 	lddqu	32493, %xmm5
+        	lddqu	0x7eed,%xmm5
+
+// CHECK: 	lddqu	3133065982, %xmm5
+        	lddqu	0xbabecafe,%xmm5
+
+// CHECK: 	lddqu	305419896, %xmm5
+        	lddqu	0x12345678,%xmm5
+
+// CHECK: 	monitor
+        	monitor
+
+// CHECK: 	movddup	3735928559(%ebx,%ecx,8), %xmm5
+        	movddup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movddup	69, %xmm5
+        	movddup	0x45,%xmm5
+
+// CHECK: 	movddup	32493, %xmm5
+        	movddup	0x7eed,%xmm5
+
+// CHECK: 	movddup	3133065982, %xmm5
+        	movddup	0xbabecafe,%xmm5
+
+// CHECK: 	movddup	305419896, %xmm5
+        	movddup	0x12345678,%xmm5
+
+// CHECK: 	movddup	%xmm5, %xmm5
+        	movddup	%xmm5,%xmm5
+
+// CHECK: 	movshdup	3735928559(%ebx,%ecx,8), %xmm5
+        	movshdup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movshdup	69, %xmm5
+        	movshdup	0x45,%xmm5
+
+// CHECK: 	movshdup	32493, %xmm5
+        	movshdup	0x7eed,%xmm5
+
+// CHECK: 	movshdup	3133065982, %xmm5
+        	movshdup	0xbabecafe,%xmm5
+
+// CHECK: 	movshdup	305419896, %xmm5
+        	movshdup	0x12345678,%xmm5
+
+// CHECK: 	movshdup	%xmm5, %xmm5
+        	movshdup	%xmm5,%xmm5
+
+// CHECK: 	movsldup	3735928559(%ebx,%ecx,8), %xmm5
+        	movsldup	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movsldup	69, %xmm5
+        	movsldup	0x45,%xmm5
+
+// CHECK: 	movsldup	32493, %xmm5
+        	movsldup	0x7eed,%xmm5
+
+// CHECK: 	movsldup	3133065982, %xmm5
+        	movsldup	0xbabecafe,%xmm5
+
+// CHECK: 	movsldup	305419896, %xmm5
+        	movsldup	0x12345678,%xmm5
+
+// CHECK: 	movsldup	%xmm5, %xmm5
+        	movsldup	%xmm5,%xmm5
+
+// CHECK: 	mwait
+        	mwait
+
+// CHECK: 	vmcall
+        	vmcall
+
+// CHECK: 	vmclear	3735928559(%ebx,%ecx,8)
+        	vmclear	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	vmclear	32493
+        	vmclear	0x7eed
+
+// CHECK: 	vmclear	3133065982
+        	vmclear	0xbabecafe
+
+// CHECK: 	vmclear	305419896
+        	vmclear	0x12345678
+
+// CHECK: 	vmlaunch
+        	vmlaunch
+
+// CHECK: 	vmresume
+        	vmresume
+
+// CHECK: 	vmptrld	3735928559(%ebx,%ecx,8)
+        	vmptrld	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	vmptrld	32493
+        	vmptrld	0x7eed
+
+// CHECK: 	vmptrld	3133065982
+        	vmptrld	0xbabecafe
+
+// CHECK: 	vmptrld	305419896
+        	vmptrld	0x12345678
+
+// CHECK: 	vmptrst	3735928559(%ebx,%ecx,8)
+        	vmptrst	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	vmptrst	32493
+        	vmptrst	0x7eed
+
+// CHECK: 	vmptrst	3133065982
+        	vmptrst	0xbabecafe
+
+// CHECK: 	vmptrst	305419896
+        	vmptrst	0x12345678
+
+// CHECK: 	vmxoff
+        	vmxoff
+
+// CHECK: 	vmxon	3735928559(%ebx,%ecx,8)
+        	vmxon	0xdeadbeef(%ebx,%ecx,8)
+
+// CHECK: 	vmxon	32493
+        	vmxon	0x7eed
+
+// CHECK: 	vmxon	3133065982
+        	vmxon	0xbabecafe
+
+// CHECK: 	vmxon	305419896
+        	vmxon	0x12345678
+
+// CHECK: 	phaddw	3735928559(%ebx,%ecx,8), %mm3
+        	phaddw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	phaddw	69, %mm3
+        	phaddw	0x45,%mm3
+
+// CHECK: 	phaddw	32493, %mm3
+        	phaddw	0x7eed,%mm3
+
+// CHECK: 	phaddw	3133065982, %mm3
+        	phaddw	0xbabecafe,%mm3
+
+// CHECK: 	phaddw	305419896, %mm3
+        	phaddw	0x12345678,%mm3
+
+// CHECK: 	phaddw	%mm3, %mm3
+        	phaddw	%mm3,%mm3
+
+// CHECK: 	phaddw	3735928559(%ebx,%ecx,8), %xmm5
+        	phaddw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	phaddw	69, %xmm5
+        	phaddw	0x45,%xmm5
+
+// CHECK: 	phaddw	32493, %xmm5
+        	phaddw	0x7eed,%xmm5
+
+// CHECK: 	phaddw	3133065982, %xmm5
+        	phaddw	0xbabecafe,%xmm5
+
+// CHECK: 	phaddw	305419896, %xmm5
+        	phaddw	0x12345678,%xmm5
+
+// CHECK: 	phaddw	%xmm5, %xmm5
+        	phaddw	%xmm5,%xmm5
+
+// CHECK: 	phaddd	3735928559(%ebx,%ecx,8), %mm3
+        	phaddd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	phaddd	69, %mm3
+        	phaddd	0x45,%mm3
+
+// CHECK: 	phaddd	32493, %mm3
+        	phaddd	0x7eed,%mm3
+
+// CHECK: 	phaddd	3133065982, %mm3
+        	phaddd	0xbabecafe,%mm3
+
+// CHECK: 	phaddd	305419896, %mm3
+        	phaddd	0x12345678,%mm3
+
+// CHECK: 	phaddd	%mm3, %mm3
+        	phaddd	%mm3,%mm3
+
+// CHECK: 	phaddd	3735928559(%ebx,%ecx,8), %xmm5
+        	phaddd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	phaddd	69, %xmm5
+        	phaddd	0x45,%xmm5
+
+// CHECK: 	phaddd	32493, %xmm5
+        	phaddd	0x7eed,%xmm5
+
+// CHECK: 	phaddd	3133065982, %xmm5
+        	phaddd	0xbabecafe,%xmm5
+
+// CHECK: 	phaddd	305419896, %xmm5
+        	phaddd	0x12345678,%xmm5
+
+// CHECK: 	phaddd	%xmm5, %xmm5
+        	phaddd	%xmm5,%xmm5
+
+// CHECK: 	phaddsw	3735928559(%ebx,%ecx,8), %mm3
+        	phaddsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	phaddsw	69, %mm3
+        	phaddsw	0x45,%mm3
+
+// CHECK: 	phaddsw	32493, %mm3
+        	phaddsw	0x7eed,%mm3
+
+// CHECK: 	phaddsw	3133065982, %mm3
+        	phaddsw	0xbabecafe,%mm3
+
+// CHECK: 	phaddsw	305419896, %mm3
+        	phaddsw	0x12345678,%mm3
+
+// CHECK: 	phaddsw	%mm3, %mm3
+        	phaddsw	%mm3,%mm3
+
+// CHECK: 	phaddsw	3735928559(%ebx,%ecx,8), %xmm5
+        	phaddsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	phaddsw	69, %xmm5
+        	phaddsw	0x45,%xmm5
+
+// CHECK: 	phaddsw	32493, %xmm5
+        	phaddsw	0x7eed,%xmm5
+
+// CHECK: 	phaddsw	3133065982, %xmm5
+        	phaddsw	0xbabecafe,%xmm5
+
+// CHECK: 	phaddsw	305419896, %xmm5
+        	phaddsw	0x12345678,%xmm5
+
+// CHECK: 	phaddsw	%xmm5, %xmm5
+        	phaddsw	%xmm5,%xmm5
+
+// CHECK: 	phsubw	3735928559(%ebx,%ecx,8), %mm3
+        	phsubw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	phsubw	69, %mm3
+        	phsubw	0x45,%mm3
+
+// CHECK: 	phsubw	32493, %mm3
+        	phsubw	0x7eed,%mm3
+
+// CHECK: 	phsubw	3133065982, %mm3
+        	phsubw	0xbabecafe,%mm3
+
+// CHECK: 	phsubw	305419896, %mm3
+        	phsubw	0x12345678,%mm3
+
+// CHECK: 	phsubw	%mm3, %mm3
+        	phsubw	%mm3,%mm3
+
+// CHECK: 	phsubw	3735928559(%ebx,%ecx,8), %xmm5
+        	phsubw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	phsubw	69, %xmm5
+        	phsubw	0x45,%xmm5
+
+// CHECK: 	phsubw	32493, %xmm5
+        	phsubw	0x7eed,%xmm5
+
+// CHECK: 	phsubw	3133065982, %xmm5
+        	phsubw	0xbabecafe,%xmm5
+
+// CHECK: 	phsubw	305419896, %xmm5
+        	phsubw	0x12345678,%xmm5
+
+// CHECK: 	phsubw	%xmm5, %xmm5
+        	phsubw	%xmm5,%xmm5
+
+// CHECK: 	phsubd	3735928559(%ebx,%ecx,8), %mm3
+        	phsubd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	phsubd	69, %mm3
+        	phsubd	0x45,%mm3
+
+// CHECK: 	phsubd	32493, %mm3
+        	phsubd	0x7eed,%mm3
+
+// CHECK: 	phsubd	3133065982, %mm3
+        	phsubd	0xbabecafe,%mm3
+
+// CHECK: 	phsubd	305419896, %mm3
+        	phsubd	0x12345678,%mm3
+
+// CHECK: 	phsubd	%mm3, %mm3
+        	phsubd	%mm3,%mm3
+
+// CHECK: 	phsubd	3735928559(%ebx,%ecx,8), %xmm5
+        	phsubd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	phsubd	69, %xmm5
+        	phsubd	0x45,%xmm5
+
+// CHECK: 	phsubd	32493, %xmm5
+        	phsubd	0x7eed,%xmm5
+
+// CHECK: 	phsubd	3133065982, %xmm5
+        	phsubd	0xbabecafe,%xmm5
+
+// CHECK: 	phsubd	305419896, %xmm5
+        	phsubd	0x12345678,%xmm5
+
+// CHECK: 	phsubd	%xmm5, %xmm5
+        	phsubd	%xmm5,%xmm5
+
+// CHECK: 	phsubsw	3735928559(%ebx,%ecx,8), %mm3
+        	phsubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	phsubsw	69, %mm3
+        	phsubsw	0x45,%mm3
+
+// CHECK: 	phsubsw	32493, %mm3
+        	phsubsw	0x7eed,%mm3
+
+// CHECK: 	phsubsw	3133065982, %mm3
+        	phsubsw	0xbabecafe,%mm3
+
+// CHECK: 	phsubsw	305419896, %mm3
+        	phsubsw	0x12345678,%mm3
+
+// CHECK: 	phsubsw	%mm3, %mm3
+        	phsubsw	%mm3,%mm3
+
+// CHECK: 	phsubsw	3735928559(%ebx,%ecx,8), %xmm5
+        	phsubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	phsubsw	69, %xmm5
+        	phsubsw	0x45,%xmm5
+
+// CHECK: 	phsubsw	32493, %xmm5
+        	phsubsw	0x7eed,%xmm5
+
+// CHECK: 	phsubsw	3133065982, %xmm5
+        	phsubsw	0xbabecafe,%xmm5
+
+// CHECK: 	phsubsw	305419896, %xmm5
+        	phsubsw	0x12345678,%xmm5
+
+// CHECK: 	phsubsw	%xmm5, %xmm5
+        	phsubsw	%xmm5,%xmm5
+
+// CHECK: 	pmaddubsw	3735928559(%ebx,%ecx,8), %mm3
+        	pmaddubsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmaddubsw	69, %mm3
+        	pmaddubsw	0x45,%mm3
+
+// CHECK: 	pmaddubsw	32493, %mm3
+        	pmaddubsw	0x7eed,%mm3
+
+// CHECK: 	pmaddubsw	3133065982, %mm3
+        	pmaddubsw	0xbabecafe,%mm3
+
+// CHECK: 	pmaddubsw	305419896, %mm3
+        	pmaddubsw	0x12345678,%mm3
+
+// CHECK: 	pmaddubsw	%mm3, %mm3
+        	pmaddubsw	%mm3,%mm3
+
+// CHECK: 	pmaddubsw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmaddubsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmaddubsw	69, %xmm5
+        	pmaddubsw	0x45,%xmm5
+
+// CHECK: 	pmaddubsw	32493, %xmm5
+        	pmaddubsw	0x7eed,%xmm5
+
+// CHECK: 	pmaddubsw	3133065982, %xmm5
+        	pmaddubsw	0xbabecafe,%xmm5
+
+// CHECK: 	pmaddubsw	305419896, %xmm5
+        	pmaddubsw	0x12345678,%xmm5
+
+// CHECK: 	pmaddubsw	%xmm5, %xmm5
+        	pmaddubsw	%xmm5,%xmm5
+
+// CHECK: 	pmulhrsw	3735928559(%ebx,%ecx,8), %mm3
+        	pmulhrsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pmulhrsw	69, %mm3
+        	pmulhrsw	0x45,%mm3
+
+// CHECK: 	pmulhrsw	32493, %mm3
+        	pmulhrsw	0x7eed,%mm3
+
+// CHECK: 	pmulhrsw	3133065982, %mm3
+        	pmulhrsw	0xbabecafe,%mm3
+
+// CHECK: 	pmulhrsw	305419896, %mm3
+        	pmulhrsw	0x12345678,%mm3
+
+// CHECK: 	pmulhrsw	%mm3, %mm3
+        	pmulhrsw	%mm3,%mm3
+
+// CHECK: 	pmulhrsw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmulhrsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmulhrsw	69, %xmm5
+        	pmulhrsw	0x45,%xmm5
+
+// CHECK: 	pmulhrsw	32493, %xmm5
+        	pmulhrsw	0x7eed,%xmm5
+
+// CHECK: 	pmulhrsw	3133065982, %xmm5
+        	pmulhrsw	0xbabecafe,%xmm5
+
+// CHECK: 	pmulhrsw	305419896, %xmm5
+        	pmulhrsw	0x12345678,%xmm5
+
+// CHECK: 	pmulhrsw	%xmm5, %xmm5
+        	pmulhrsw	%xmm5,%xmm5
+
+// CHECK: 	pshufb	3735928559(%ebx,%ecx,8), %mm3
+        	pshufb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pshufb	69, %mm3
+        	pshufb	0x45,%mm3
+
+// CHECK: 	pshufb	32493, %mm3
+        	pshufb	0x7eed,%mm3
+
+// CHECK: 	pshufb	3133065982, %mm3
+        	pshufb	0xbabecafe,%mm3
+
+// CHECK: 	pshufb	305419896, %mm3
+        	pshufb	0x12345678,%mm3
+
+// CHECK: 	pshufb	%mm3, %mm3
+        	pshufb	%mm3,%mm3
+
+// CHECK: 	pshufb	3735928559(%ebx,%ecx,8), %xmm5
+        	pshufb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pshufb	69, %xmm5
+        	pshufb	0x45,%xmm5
+
+// CHECK: 	pshufb	32493, %xmm5
+        	pshufb	0x7eed,%xmm5
+
+// CHECK: 	pshufb	3133065982, %xmm5
+        	pshufb	0xbabecafe,%xmm5
+
+// CHECK: 	pshufb	305419896, %xmm5
+        	pshufb	0x12345678,%xmm5
+
+// CHECK: 	pshufb	%xmm5, %xmm5
+        	pshufb	%xmm5,%xmm5
+
+// CHECK: 	psignb	3735928559(%ebx,%ecx,8), %mm3
+        	psignb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psignb	69, %mm3
+        	psignb	0x45,%mm3
+
+// CHECK: 	psignb	32493, %mm3
+        	psignb	0x7eed,%mm3
+
+// CHECK: 	psignb	3133065982, %mm3
+        	psignb	0xbabecafe,%mm3
+
+// CHECK: 	psignb	305419896, %mm3
+        	psignb	0x12345678,%mm3
+
+// CHECK: 	psignb	%mm3, %mm3
+        	psignb	%mm3,%mm3
+
+// CHECK: 	psignb	3735928559(%ebx,%ecx,8), %xmm5
+        	psignb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psignb	69, %xmm5
+        	psignb	0x45,%xmm5
+
+// CHECK: 	psignb	32493, %xmm5
+        	psignb	0x7eed,%xmm5
+
+// CHECK: 	psignb	3133065982, %xmm5
+        	psignb	0xbabecafe,%xmm5
+
+// CHECK: 	psignb	305419896, %xmm5
+        	psignb	0x12345678,%xmm5
+
+// CHECK: 	psignb	%xmm5, %xmm5
+        	psignb	%xmm5,%xmm5
+
+// CHECK: 	psignw	3735928559(%ebx,%ecx,8), %mm3
+        	psignw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psignw	69, %mm3
+        	psignw	0x45,%mm3
+
+// CHECK: 	psignw	32493, %mm3
+        	psignw	0x7eed,%mm3
+
+// CHECK: 	psignw	3133065982, %mm3
+        	psignw	0xbabecafe,%mm3
+
+// CHECK: 	psignw	305419896, %mm3
+        	psignw	0x12345678,%mm3
+
+// CHECK: 	psignw	%mm3, %mm3
+        	psignw	%mm3,%mm3
+
+// CHECK: 	psignw	3735928559(%ebx,%ecx,8), %xmm5
+        	psignw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psignw	69, %xmm5
+        	psignw	0x45,%xmm5
+
+// CHECK: 	psignw	32493, %xmm5
+        	psignw	0x7eed,%xmm5
+
+// CHECK: 	psignw	3133065982, %xmm5
+        	psignw	0xbabecafe,%xmm5
+
+// CHECK: 	psignw	305419896, %xmm5
+        	psignw	0x12345678,%xmm5
+
+// CHECK: 	psignw	%xmm5, %xmm5
+        	psignw	%xmm5,%xmm5
+
+// CHECK: 	psignd	3735928559(%ebx,%ecx,8), %mm3
+        	psignd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	psignd	69, %mm3
+        	psignd	0x45,%mm3
+
+// CHECK: 	psignd	32493, %mm3
+        	psignd	0x7eed,%mm3
+
+// CHECK: 	psignd	3133065982, %mm3
+        	psignd	0xbabecafe,%mm3
+
+// CHECK: 	psignd	305419896, %mm3
+        	psignd	0x12345678,%mm3
+
+// CHECK: 	psignd	%mm3, %mm3
+        	psignd	%mm3,%mm3
+
+// CHECK: 	psignd	3735928559(%ebx,%ecx,8), %xmm5
+        	psignd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	psignd	69, %xmm5
+        	psignd	0x45,%xmm5
+
+// CHECK: 	psignd	32493, %xmm5
+        	psignd	0x7eed,%xmm5
+
+// CHECK: 	psignd	3133065982, %xmm5
+        	psignd	0xbabecafe,%xmm5
+
+// CHECK: 	psignd	305419896, %xmm5
+        	psignd	0x12345678,%xmm5
+
+// CHECK: 	psignd	%xmm5, %xmm5
+        	psignd	%xmm5,%xmm5
+
+// CHECK: 	pabsb	3735928559(%ebx,%ecx,8), %mm3
+        	pabsb	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pabsb	69, %mm3
+        	pabsb	0x45,%mm3
+
+// CHECK: 	pabsb	32493, %mm3
+        	pabsb	0x7eed,%mm3
+
+// CHECK: 	pabsb	3133065982, %mm3
+        	pabsb	0xbabecafe,%mm3
+
+// CHECK: 	pabsb	305419896, %mm3
+        	pabsb	0x12345678,%mm3
+
+// CHECK: 	pabsb	%mm3, %mm3
+        	pabsb	%mm3,%mm3
+
+// CHECK: 	pabsb	3735928559(%ebx,%ecx,8), %xmm5
+        	pabsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pabsb	69, %xmm5
+        	pabsb	0x45,%xmm5
+
+// CHECK: 	pabsb	32493, %xmm5
+        	pabsb	0x7eed,%xmm5
+
+// CHECK: 	pabsb	3133065982, %xmm5
+        	pabsb	0xbabecafe,%xmm5
+
+// CHECK: 	pabsb	305419896, %xmm5
+        	pabsb	0x12345678,%xmm5
+
+// CHECK: 	pabsb	%xmm5, %xmm5
+        	pabsb	%xmm5,%xmm5
+
+// CHECK: 	pabsw	3735928559(%ebx,%ecx,8), %mm3
+        	pabsw	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pabsw	69, %mm3
+        	pabsw	0x45,%mm3
+
+// CHECK: 	pabsw	32493, %mm3
+        	pabsw	0x7eed,%mm3
+
+// CHECK: 	pabsw	3133065982, %mm3
+        	pabsw	0xbabecafe,%mm3
+
+// CHECK: 	pabsw	305419896, %mm3
+        	pabsw	0x12345678,%mm3
+
+// CHECK: 	pabsw	%mm3, %mm3
+        	pabsw	%mm3,%mm3
+
+// CHECK: 	pabsw	3735928559(%ebx,%ecx,8), %xmm5
+        	pabsw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pabsw	69, %xmm5
+        	pabsw	0x45,%xmm5
+
+// CHECK: 	pabsw	32493, %xmm5
+        	pabsw	0x7eed,%xmm5
+
+// CHECK: 	pabsw	3133065982, %xmm5
+        	pabsw	0xbabecafe,%xmm5
+
+// CHECK: 	pabsw	305419896, %xmm5
+        	pabsw	0x12345678,%xmm5
+
+// CHECK: 	pabsw	%xmm5, %xmm5
+        	pabsw	%xmm5,%xmm5
+
+// CHECK: 	pabsd	3735928559(%ebx,%ecx,8), %mm3
+        	pabsd	0xdeadbeef(%ebx,%ecx,8),%mm3
+
+// CHECK: 	pabsd	69, %mm3
+        	pabsd	0x45,%mm3
+
+// CHECK: 	pabsd	32493, %mm3
+        	pabsd	0x7eed,%mm3
+
+// CHECK: 	pabsd	3133065982, %mm3
+        	pabsd	0xbabecafe,%mm3
+
+// CHECK: 	pabsd	305419896, %mm3
+        	pabsd	0x12345678,%mm3
+
+// CHECK: 	pabsd	%mm3, %mm3
+        	pabsd	%mm3,%mm3
+
+// CHECK: 	pabsd	3735928559(%ebx,%ecx,8), %xmm5
+        	pabsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pabsd	69, %xmm5
+        	pabsd	0x45,%xmm5
+
+// CHECK: 	pabsd	32493, %xmm5
+        	pabsd	0x7eed,%xmm5
+
+// CHECK: 	pabsd	3133065982, %xmm5
+        	pabsd	0xbabecafe,%xmm5
+
+// CHECK: 	pabsd	305419896, %xmm5
+        	pabsd	0x12345678,%xmm5
+
+// CHECK: 	pabsd	%xmm5, %xmm5
+        	pabsd	%xmm5,%xmm5
+
+// CHECK: 	femms
+        	femms
+
+// CHECK: 	movntdqa	3735928559(%ebx,%ecx,8), %xmm5
+        	movntdqa	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	movntdqa	69, %xmm5
+        	movntdqa	0x45,%xmm5
+
+// CHECK: 	movntdqa	32493, %xmm5
+        	movntdqa	0x7eed,%xmm5
+
+// CHECK: 	movntdqa	3133065982, %xmm5
+        	movntdqa	0xbabecafe,%xmm5
+
+// CHECK: 	movntdqa	305419896, %xmm5
+        	movntdqa	0x12345678,%xmm5
+
+// CHECK: 	packusdw	3735928559(%ebx,%ecx,8), %xmm5
+        	packusdw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	packusdw	69, %xmm5
+        	packusdw	0x45,%xmm5
+
+// CHECK: 	packusdw	32493, %xmm5
+        	packusdw	0x7eed,%xmm5
+
+// CHECK: 	packusdw	3133065982, %xmm5
+        	packusdw	0xbabecafe,%xmm5
+
+// CHECK: 	packusdw	305419896, %xmm5
+        	packusdw	0x12345678,%xmm5
+
+// CHECK: 	packusdw	%xmm5, %xmm5
+        	packusdw	%xmm5,%xmm5
+
+// CHECK: 	pcmpeqq	3735928559(%ebx,%ecx,8), %xmm5
+        	pcmpeqq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pcmpeqq	69, %xmm5
+        	pcmpeqq	0x45,%xmm5
+
+// CHECK: 	pcmpeqq	32493, %xmm5
+        	pcmpeqq	0x7eed,%xmm5
+
+// CHECK: 	pcmpeqq	3133065982, %xmm5
+        	pcmpeqq	0xbabecafe,%xmm5
+
+// CHECK: 	pcmpeqq	305419896, %xmm5
+        	pcmpeqq	0x12345678,%xmm5
+
+// CHECK: 	pcmpeqq	%xmm5, %xmm5
+        	pcmpeqq	%xmm5,%xmm5
+
+// CHECK: 	phminposuw	3735928559(%ebx,%ecx,8), %xmm5
+        	phminposuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	phminposuw	69, %xmm5
+        	phminposuw	0x45,%xmm5
+
+// CHECK: 	phminposuw	32493, %xmm5
+        	phminposuw	0x7eed,%xmm5
+
+// CHECK: 	phminposuw	3133065982, %xmm5
+        	phminposuw	0xbabecafe,%xmm5
+
+// CHECK: 	phminposuw	305419896, %xmm5
+        	phminposuw	0x12345678,%xmm5
+
+// CHECK: 	phminposuw	%xmm5, %xmm5
+        	phminposuw	%xmm5,%xmm5
+
+// CHECK: 	pmaxsb	3735928559(%ebx,%ecx,8), %xmm5
+        	pmaxsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmaxsb	69, %xmm5
+        	pmaxsb	0x45,%xmm5
+
+// CHECK: 	pmaxsb	32493, %xmm5
+        	pmaxsb	0x7eed,%xmm5
+
+// CHECK: 	pmaxsb	3133065982, %xmm5
+        	pmaxsb	0xbabecafe,%xmm5
+
+// CHECK: 	pmaxsb	305419896, %xmm5
+        	pmaxsb	0x12345678,%xmm5
+
+// CHECK: 	pmaxsb	%xmm5, %xmm5
+        	pmaxsb	%xmm5,%xmm5
+
+// CHECK: 	pmaxsd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmaxsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmaxsd	69, %xmm5
+        	pmaxsd	0x45,%xmm5
+
+// CHECK: 	pmaxsd	32493, %xmm5
+        	pmaxsd	0x7eed,%xmm5
+
+// CHECK: 	pmaxsd	3133065982, %xmm5
+        	pmaxsd	0xbabecafe,%xmm5
+
+// CHECK: 	pmaxsd	305419896, %xmm5
+        	pmaxsd	0x12345678,%xmm5
+
+// CHECK: 	pmaxsd	%xmm5, %xmm5
+        	pmaxsd	%xmm5,%xmm5
+
+// CHECK: 	pmaxud	3735928559(%ebx,%ecx,8), %xmm5
+        	pmaxud	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmaxud	69, %xmm5
+        	pmaxud	0x45,%xmm5
+
+// CHECK: 	pmaxud	32493, %xmm5
+        	pmaxud	0x7eed,%xmm5
+
+// CHECK: 	pmaxud	3133065982, %xmm5
+        	pmaxud	0xbabecafe,%xmm5
+
+// CHECK: 	pmaxud	305419896, %xmm5
+        	pmaxud	0x12345678,%xmm5
+
+// CHECK: 	pmaxud	%xmm5, %xmm5
+        	pmaxud	%xmm5,%xmm5
+
+// CHECK: 	pmaxuw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmaxuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmaxuw	69, %xmm5
+        	pmaxuw	0x45,%xmm5
+
+// CHECK: 	pmaxuw	32493, %xmm5
+        	pmaxuw	0x7eed,%xmm5
+
+// CHECK: 	pmaxuw	3133065982, %xmm5
+        	pmaxuw	0xbabecafe,%xmm5
+
+// CHECK: 	pmaxuw	305419896, %xmm5
+        	pmaxuw	0x12345678,%xmm5
+
+// CHECK: 	pmaxuw	%xmm5, %xmm5
+        	pmaxuw	%xmm5,%xmm5
+
+// CHECK: 	pminsb	3735928559(%ebx,%ecx,8), %xmm5
+        	pminsb	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pminsb	69, %xmm5
+        	pminsb	0x45,%xmm5
+
+// CHECK: 	pminsb	32493, %xmm5
+        	pminsb	0x7eed,%xmm5
+
+// CHECK: 	pminsb	3133065982, %xmm5
+        	pminsb	0xbabecafe,%xmm5
+
+// CHECK: 	pminsb	305419896, %xmm5
+        	pminsb	0x12345678,%xmm5
+
+// CHECK: 	pminsb	%xmm5, %xmm5
+        	pminsb	%xmm5,%xmm5
+
+// CHECK: 	pminsd	3735928559(%ebx,%ecx,8), %xmm5
+        	pminsd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pminsd	69, %xmm5
+        	pminsd	0x45,%xmm5
+
+// CHECK: 	pminsd	32493, %xmm5
+        	pminsd	0x7eed,%xmm5
+
+// CHECK: 	pminsd	3133065982, %xmm5
+        	pminsd	0xbabecafe,%xmm5
+
+// CHECK: 	pminsd	305419896, %xmm5
+        	pminsd	0x12345678,%xmm5
+
+// CHECK: 	pminsd	%xmm5, %xmm5
+        	pminsd	%xmm5,%xmm5
+
+// CHECK: 	pminud	3735928559(%ebx,%ecx,8), %xmm5
+        	pminud	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pminud	69, %xmm5
+        	pminud	0x45,%xmm5
+
+// CHECK: 	pminud	32493, %xmm5
+        	pminud	0x7eed,%xmm5
+
+// CHECK: 	pminud	3133065982, %xmm5
+        	pminud	0xbabecafe,%xmm5
+
+// CHECK: 	pminud	305419896, %xmm5
+        	pminud	0x12345678,%xmm5
+
+// CHECK: 	pminud	%xmm5, %xmm5
+        	pminud	%xmm5,%xmm5
+
+// CHECK: 	pminuw	3735928559(%ebx,%ecx,8), %xmm5
+        	pminuw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pminuw	69, %xmm5
+        	pminuw	0x45,%xmm5
+
+// CHECK: 	pminuw	32493, %xmm5
+        	pminuw	0x7eed,%xmm5
+
+// CHECK: 	pminuw	3133065982, %xmm5
+        	pminuw	0xbabecafe,%xmm5
+
+// CHECK: 	pminuw	305419896, %xmm5
+        	pminuw	0x12345678,%xmm5
+
+// CHECK: 	pminuw	%xmm5, %xmm5
+        	pminuw	%xmm5,%xmm5
+
+// CHECK: 	pmovsxbw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxbw	69, %xmm5
+        	pmovsxbw	0x45,%xmm5
+
+// CHECK: 	pmovsxbw	32493, %xmm5
+        	pmovsxbw	0x7eed,%xmm5
+
+// CHECK: 	pmovsxbw	3133065982, %xmm5
+        	pmovsxbw	0xbabecafe,%xmm5
+
+// CHECK: 	pmovsxbw	305419896, %xmm5
+        	pmovsxbw	0x12345678,%xmm5
+
+// CHECK: 	pmovsxbw	%xmm5, %xmm5
+        	pmovsxbw	%xmm5,%xmm5
+
+// CHECK: 	pmovsxbd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxbd	69, %xmm5
+        	pmovsxbd	0x45,%xmm5
+
+// CHECK: 	pmovsxbd	32493, %xmm5
+        	pmovsxbd	0x7eed,%xmm5
+
+// CHECK: 	pmovsxbd	3133065982, %xmm5
+        	pmovsxbd	0xbabecafe,%xmm5
+
+// CHECK: 	pmovsxbd	305419896, %xmm5
+        	pmovsxbd	0x12345678,%xmm5
+
+// CHECK: 	pmovsxbd	%xmm5, %xmm5
+        	pmovsxbd	%xmm5,%xmm5
+
+// CHECK: 	pmovsxbq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxbq	69, %xmm5
+        	pmovsxbq	0x45,%xmm5
+
+// CHECK: 	pmovsxbq	32493, %xmm5
+        	pmovsxbq	0x7eed,%xmm5
+
+// CHECK: 	pmovsxbq	3133065982, %xmm5
+        	pmovsxbq	0xbabecafe,%xmm5
+
+// CHECK: 	pmovsxbq	305419896, %xmm5
+        	pmovsxbq	0x12345678,%xmm5
+
+// CHECK: 	pmovsxbq	%xmm5, %xmm5
+        	pmovsxbq	%xmm5,%xmm5
+
+// CHECK: 	pmovsxwd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxwd	69, %xmm5
+        	pmovsxwd	0x45,%xmm5
+
+// CHECK: 	pmovsxwd	32493, %xmm5
+        	pmovsxwd	0x7eed,%xmm5
+
+// CHECK: 	pmovsxwd	3133065982, %xmm5
+        	pmovsxwd	0xbabecafe,%xmm5
+
+// CHECK: 	pmovsxwd	305419896, %xmm5
+        	pmovsxwd	0x12345678,%xmm5
+
+// CHECK: 	pmovsxwd	%xmm5, %xmm5
+        	pmovsxwd	%xmm5,%xmm5
+
+// CHECK: 	pmovsxwq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxwq	69, %xmm5
+        	pmovsxwq	0x45,%xmm5
+
+// CHECK: 	pmovsxwq	32493, %xmm5
+        	pmovsxwq	0x7eed,%xmm5
+
+// CHECK: 	pmovsxwq	3133065982, %xmm5
+        	pmovsxwq	0xbabecafe,%xmm5
+
+// CHECK: 	pmovsxwq	305419896, %xmm5
+        	pmovsxwq	0x12345678,%xmm5
+
+// CHECK: 	pmovsxwq	%xmm5, %xmm5
+        	pmovsxwq	%xmm5,%xmm5
+
+// CHECK: 	pmovsxdq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovsxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovsxdq	69, %xmm5
+        	pmovsxdq	0x45,%xmm5
+
+// CHECK: 	pmovsxdq	32493, %xmm5
+        	pmovsxdq	0x7eed,%xmm5
+
+// CHECK: 	pmovsxdq	3133065982, %xmm5
+        	pmovsxdq	0xbabecafe,%xmm5
+
+// CHECK: 	pmovsxdq	305419896, %xmm5
+        	pmovsxdq	0x12345678,%xmm5
+
+// CHECK: 	pmovsxdq	%xmm5, %xmm5
+        	pmovsxdq	%xmm5,%xmm5
+
+// CHECK: 	pmovzxbw	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxbw	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxbw	69, %xmm5
+        	pmovzxbw	0x45,%xmm5
+
+// CHECK: 	pmovzxbw	32493, %xmm5
+        	pmovzxbw	0x7eed,%xmm5
+
+// CHECK: 	pmovzxbw	3133065982, %xmm5
+        	pmovzxbw	0xbabecafe,%xmm5
+
+// CHECK: 	pmovzxbw	305419896, %xmm5
+        	pmovzxbw	0x12345678,%xmm5
+
+// CHECK: 	pmovzxbw	%xmm5, %xmm5
+        	pmovzxbw	%xmm5,%xmm5
+
+// CHECK: 	pmovzxbd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxbd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxbd	69, %xmm5
+        	pmovzxbd	0x45,%xmm5
+
+// CHECK: 	pmovzxbd	32493, %xmm5
+        	pmovzxbd	0x7eed,%xmm5
+
+// CHECK: 	pmovzxbd	3133065982, %xmm5
+        	pmovzxbd	0xbabecafe,%xmm5
+
+// CHECK: 	pmovzxbd	305419896, %xmm5
+        	pmovzxbd	0x12345678,%xmm5
+
+// CHECK: 	pmovzxbd	%xmm5, %xmm5
+        	pmovzxbd	%xmm5,%xmm5
+
+// CHECK: 	pmovzxbq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxbq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxbq	69, %xmm5
+        	pmovzxbq	0x45,%xmm5
+
+// CHECK: 	pmovzxbq	32493, %xmm5
+        	pmovzxbq	0x7eed,%xmm5
+
+// CHECK: 	pmovzxbq	3133065982, %xmm5
+        	pmovzxbq	0xbabecafe,%xmm5
+
+// CHECK: 	pmovzxbq	305419896, %xmm5
+        	pmovzxbq	0x12345678,%xmm5
+
+// CHECK: 	pmovzxbq	%xmm5, %xmm5
+        	pmovzxbq	%xmm5,%xmm5
+
+// CHECK: 	pmovzxwd	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxwd	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxwd	69, %xmm5
+        	pmovzxwd	0x45,%xmm5
+
+// CHECK: 	pmovzxwd	32493, %xmm5
+        	pmovzxwd	0x7eed,%xmm5
+
+// CHECK: 	pmovzxwd	3133065982, %xmm5
+        	pmovzxwd	0xbabecafe,%xmm5
+
+// CHECK: 	pmovzxwd	305419896, %xmm5
+        	pmovzxwd	0x12345678,%xmm5
+
+// CHECK: 	pmovzxwd	%xmm5, %xmm5
+        	pmovzxwd	%xmm5,%xmm5
+
+// CHECK: 	pmovzxwq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxwq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxwq	69, %xmm5
+        	pmovzxwq	0x45,%xmm5
+
+// CHECK: 	pmovzxwq	32493, %xmm5
+        	pmovzxwq	0x7eed,%xmm5
+
+// CHECK: 	pmovzxwq	3133065982, %xmm5
+        	pmovzxwq	0xbabecafe,%xmm5
+
+// CHECK: 	pmovzxwq	305419896, %xmm5
+        	pmovzxwq	0x12345678,%xmm5
+
+// CHECK: 	pmovzxwq	%xmm5, %xmm5
+        	pmovzxwq	%xmm5,%xmm5
+
+// CHECK: 	pmovzxdq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmovzxdq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmovzxdq	69, %xmm5
+        	pmovzxdq	0x45,%xmm5
+
+// CHECK: 	pmovzxdq	32493, %xmm5
+        	pmovzxdq	0x7eed,%xmm5
+
+// CHECK: 	pmovzxdq	3133065982, %xmm5
+        	pmovzxdq	0xbabecafe,%xmm5
+
+// CHECK: 	pmovzxdq	305419896, %xmm5
+        	pmovzxdq	0x12345678,%xmm5
+
+// CHECK: 	pmovzxdq	%xmm5, %xmm5
+        	pmovzxdq	%xmm5,%xmm5
+
+// CHECK: 	pmuldq	3735928559(%ebx,%ecx,8), %xmm5
+        	pmuldq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmuldq	69, %xmm5
+        	pmuldq	0x45,%xmm5
+
+// CHECK: 	pmuldq	32493, %xmm5
+        	pmuldq	0x7eed,%xmm5
+
+// CHECK: 	pmuldq	3133065982, %xmm5
+        	pmuldq	0xbabecafe,%xmm5
+
+// CHECK: 	pmuldq	305419896, %xmm5
+        	pmuldq	0x12345678,%xmm5
+
+// CHECK: 	pmuldq	%xmm5, %xmm5
+        	pmuldq	%xmm5,%xmm5
+
+// CHECK: 	pmulld	3735928559(%ebx,%ecx,8), %xmm5
+        	pmulld	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pmulld	69, %xmm5
+        	pmulld	0x45,%xmm5
+
+// CHECK: 	pmulld	32493, %xmm5
+        	pmulld	0x7eed,%xmm5
+
+// CHECK: 	pmulld	3133065982, %xmm5
+        	pmulld	0xbabecafe,%xmm5
+
+// CHECK: 	pmulld	305419896, %xmm5
+        	pmulld	0x12345678,%xmm5
+
+// CHECK: 	pmulld	%xmm5, %xmm5
+        	pmulld	%xmm5,%xmm5
+
+// CHECK: 	ptest 	3735928559(%ebx,%ecx,8), %xmm5
+        	ptest	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	ptest 	69, %xmm5
+        	ptest	0x45,%xmm5
+
+// CHECK: 	ptest 	32493, %xmm5
+        	ptest	0x7eed,%xmm5
+
+// CHECK: 	ptest 	3133065982, %xmm5
+        	ptest	0xbabecafe,%xmm5
+
+// CHECK: 	ptest 	305419896, %xmm5
+        	ptest	0x12345678,%xmm5
+
+// CHECK: 	ptest 	%xmm5, %xmm5
+        	ptest	%xmm5,%xmm5
+
+// CHECK: 	crc32b 	%bl, %eax
+                crc32b %bl, %eax
+
+// CHECK: 	crc32b 	4(%ebx), %eax
+                crc32b 4(%ebx), %eax
+
+// CHECK: 	crc32w 	%bx, %eax
+                crc32w %bx, %eax
+
+// CHECK: 	crc32w 	4(%ebx), %eax
+                crc32w 4(%ebx), %eax
+
+// CHECK: 	crc32l 	%ebx, %eax
+                crc32l %ebx, %eax
+
+// CHECK: 	crc32l 	4(%ebx), %eax
+                crc32l 4(%ebx), %eax
+
+// CHECK: 	crc32l 	3735928559(%ebx,%ecx,8), %ecx
+                crc32l 0xdeadbeef(%ebx,%ecx,8),%ecx
+
+// CHECK: 	crc32l 	69, %ecx
+                crc32l 0x45,%ecx
+
+// CHECK: 	crc32l 	32493, %ecx
+                crc32l 0x7eed,%ecx
+
+// CHECK: 	crc32l 	3133065982, %ecx
+                crc32l 0xbabecafe,%ecx
+
+// CHECK: 	crc32l 	%ecx, %ecx
+                crc32l %ecx,%ecx
+
+// CHECK: 	pcmpgtq	3735928559(%ebx,%ecx,8), %xmm5
+        	pcmpgtq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+
+// CHECK: 	pcmpgtq	69, %xmm5
+        	pcmpgtq	0x45,%xmm5
+
+// CHECK: 	pcmpgtq	32493, %xmm5
+        	pcmpgtq	0x7eed,%xmm5
+
+// CHECK: 	pcmpgtq	3133065982, %xmm5
+        	pcmpgtq	0xbabecafe,%xmm5
+
+// CHECK: 	pcmpgtq	305419896, %xmm5
+        	pcmpgtq	0x12345678,%xmm5
+
+// CHECK: 	pcmpgtq	%xmm5, %xmm5
+        	pcmpgtq	%xmm5,%xmm5
+
+// CHECK: 	aesimc	%xmm0, %xmm1
+                aesimc %xmm0,%xmm1
+
+// CHECK: 	aesimc	(%eax), %xmm1
+                aesimc (%eax),%xmm1
+
+// CHECK: 	aesenc	%xmm1, %xmm2
+                aesenc %xmm1,%xmm2
+
+// CHECK: 	aesenc	4(%ebx), %xmm2
+                aesenc 4(%ebx),%xmm2
+
+// CHECK: 	aesenclast	%xmm3, %xmm4
+                aesenclast %xmm3,%xmm4
+
+// CHECK: 	aesenclast	4(%edx,%edi), %xmm4
+                aesenclast 4(%edx,%edi),%xmm4
+
+// CHECK: 	aesdec	%xmm5, %xmm6
+                aesdec %xmm5,%xmm6
+
+// CHECK: 	aesdec	4(%ecx,%eax,8), %xmm6
+                aesdec 4(%ecx,%eax,8),%xmm6
+
+// CHECK: 	aesdeclast	%xmm7, %xmm0
+                aesdeclast %xmm7,%xmm0
+
+// CHECK: 	aesdeclast	3405691582, %xmm0
+                aesdeclast 0xcafebabe,%xmm0
+
+// CHECK: 	aeskeygenassist	$125, %xmm1, %xmm2
+                aeskeygenassist $125, %xmm1, %xmm2
+
+// CHECK: 	aeskeygenassist	$125, (%edx,%eax,4), %xmm2
+                aeskeygenassist $125, (%edx,%eax,4), %xmm2
diff --git a/final/test/MC/X86/x86-32-fma3.s b/final/test/MC/X86/x86-32-fma3.s
new file mode 100644
index 00000000000..db7efecfb51
--- /dev/null
+++ b/final/test/MC/X86/x86-32-fma3.s
@@ -0,0 +1,674 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vfmadd132pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0xca]
+          vfmadd132pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd132pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0x08]
+          vfmadd132pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd132ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0xca]
+          vfmadd132ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd132ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0x08]
+          vfmadd132ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd213pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0xca]
+          vfmadd213pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd213pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0x08]
+          vfmadd213pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd213ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0xca]
+          vfmadd213ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd213ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0x08]
+          vfmadd213ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd231pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0xca]
+          vfmadd231pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd231pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0x08]
+          vfmadd231pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd231ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0xca]
+          vfmadd231ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd231ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0x08]
+          vfmadd231ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd132pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0xca]
+          vfmadd132pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd132pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0x08]
+          vfmadd132pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd132ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0xca]
+          vfmadd132ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd132ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0x08]
+          vfmadd132ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd213pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0xca]
+          vfmadd213pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd213pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0x08]
+          vfmadd213pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd213ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0xca]
+          vfmadd213ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd213ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0x08]
+          vfmadd213ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd231pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0xca]
+          vfmadd231pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd231pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0x08]
+          vfmadd231pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd231ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0xca]
+          vfmadd231ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd231ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0x08]
+          vfmadd231ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd132pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0xca]
+          vfmadd132pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd132pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x98,0x08]
+          vfmadd132pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd132ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0xca]
+          vfmadd132ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd132ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x98,0x08]
+          vfmadd132ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd213pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0xca]
+          vfmadd213pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd213pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa8,0x08]
+          vfmadd213pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd213ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0xca]
+          vfmadd213ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd213ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa8,0x08]
+          vfmadd213ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd231pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0xca]
+          vfmadd231pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd231pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb8,0x08]
+          vfmadd231pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd231ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0xca]
+          vfmadd231ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmadd231ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb8,0x08]
+          vfmadd231ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub132pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x96,0xca]
+          vfmaddsub132pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub132pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x96,0x08]
+          vfmaddsub132pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub132ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x96,0xca]
+          vfmaddsub132ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub132ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x96,0x08]
+          vfmaddsub132ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub213pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa6,0xca]
+          vfmaddsub213pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub213pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa6,0x08]
+          vfmaddsub213pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub213ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa6,0xca]
+          vfmaddsub213ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub213ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa6,0x08]
+          vfmaddsub213ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub231pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb6,0xca]
+          vfmaddsub231pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub231pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb6,0x08]
+          vfmaddsub231pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmaddsub231ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb6,0xca]
+          vfmaddsub231ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmaddsub231ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb6,0x08]
+          vfmaddsub231ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd132pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x97,0xca]
+          vfmsubadd132pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd132pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x97,0x08]
+          vfmsubadd132pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd132ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x97,0xca]
+          vfmsubadd132ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd132ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x97,0x08]
+          vfmsubadd132ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd213pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa7,0xca]
+          vfmsubadd213pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd213pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xa7,0x08]
+          vfmsubadd213pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd213ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa7,0xca]
+          vfmsubadd213ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd213ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xa7,0x08]
+          vfmsubadd213ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd231pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb7,0xca]
+          vfmsubadd231pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd231pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xb7,0x08]
+          vfmsubadd231pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsubadd231ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb7,0xca]
+          vfmsubadd231ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsubadd231ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xb7,0x08]
+          vfmsubadd231ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub132pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9a,0xca]
+          vfmsub132pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub132pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9a,0x08]
+          vfmsub132pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub132ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9a,0xca]
+          vfmsub132ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub132ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9a,0x08]
+          vfmsub132ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub213pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xaa,0xca]
+          vfmsub213pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub213pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xaa,0x08]
+          vfmsub213pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub213ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xaa,0xca]
+          vfmsub213ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub213ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xaa,0x08]
+          vfmsub213ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub231pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xba,0xca]
+          vfmsub231pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub231pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xba,0x08]
+          vfmsub231pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmsub231ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xba,0xca]
+          vfmsub231ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfmsub231ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xba,0x08]
+          vfmsub231ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd132pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9c,0xca]
+          vfnmadd132pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd132pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9c,0x08]
+          vfnmadd132pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd132ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9c,0xca]
+          vfnmadd132ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd132ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9c,0x08]
+          vfnmadd132ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd213pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xac,0xca]
+          vfnmadd213pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd213pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xac,0x08]
+          vfnmadd213pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd213ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xac,0xca]
+          vfnmadd213ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd213ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xac,0x08]
+          vfnmadd213ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd231pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xbc,0xca]
+          vfnmadd231pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd231pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xbc,0x08]
+          vfnmadd231pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmadd231ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xbc,0xca]
+          vfnmadd231ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmadd231ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xbc,0x08]
+          vfnmadd231ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub132pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9e,0xca]
+          vfnmsub132pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub132pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0x9e,0x08]
+          vfnmsub132pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub132ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9e,0xca]
+          vfnmsub132ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub132ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0x9e,0x08]
+          vfnmsub132ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub213pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xae,0xca]
+          vfnmsub213pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub213pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xae,0x08]
+          vfnmsub213pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub213ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xae,0xca]
+          vfnmsub213ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub213ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xae,0x08]
+          vfnmsub213ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub231pd  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xbe,0xca]
+          vfnmsub231pd  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub231pd  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0xd1,0xbe,0x08]
+          vfnmsub231pd  (%eax), %xmm5, %xmm1
+
+// CHECK: vfnmsub231ps  %xmm2, %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xbe,0xca]
+          vfnmsub231ps  %xmm2, %xmm5, %xmm1
+
+// CHECK: vfnmsub231ps  (%eax), %xmm5, %xmm1
+// CHECK: encoding: [0xc4,0xe2,0x51,0xbe,0x08]
+          vfnmsub231ps  (%eax), %xmm5, %xmm1
+
+// CHECK: vfmadd132pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0xca]
+          vfmadd132pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd132pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x98,0x08]
+          vfmadd132pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd132ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0xca]
+          vfmadd132ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd132ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x98,0x08]
+          vfmadd132ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd213pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0xca]
+          vfmadd213pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd213pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa8,0x08]
+          vfmadd213pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd213ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0xca]
+          vfmadd213ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd213ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa8,0x08]
+          vfmadd213ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd231pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0xca]
+          vfmadd231pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd231pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb8,0x08]
+          vfmadd231pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmadd231ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0xca]
+          vfmadd231ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmadd231ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb8,0x08]
+          vfmadd231ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub132pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x96,0xca]
+          vfmaddsub132pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub132pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x96,0x08]
+          vfmaddsub132pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub132ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x96,0xca]
+          vfmaddsub132ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub132ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x96,0x08]
+          vfmaddsub132ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub213pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa6,0xca]
+          vfmaddsub213pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub213pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa6,0x08]
+          vfmaddsub213pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub213ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa6,0xca]
+          vfmaddsub213ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub213ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa6,0x08]
+          vfmaddsub213ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub231pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb6,0xca]
+          vfmaddsub231pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub231pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb6,0x08]
+          vfmaddsub231pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmaddsub231ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb6,0xca]
+          vfmaddsub231ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmaddsub231ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb6,0x08]
+          vfmaddsub231ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd132pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x97,0xca]
+          vfmsubadd132pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd132pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x97,0x08]
+          vfmsubadd132pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd132ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x97,0xca]
+          vfmsubadd132ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd132ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x97,0x08]
+          vfmsubadd132ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd213pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa7,0xca]
+          vfmsubadd213pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd213pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xa7,0x08]
+          vfmsubadd213pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd213ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa7,0xca]
+          vfmsubadd213ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd213ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xa7,0x08]
+          vfmsubadd213ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd231pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb7,0xca]
+          vfmsubadd231pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd231pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xb7,0x08]
+          vfmsubadd231pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsubadd231ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb7,0xca]
+          vfmsubadd231ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsubadd231ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xb7,0x08]
+          vfmsubadd231ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub132pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9a,0xca]
+          vfmsub132pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub132pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9a,0x08]
+          vfmsub132pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub132ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9a,0xca]
+          vfmsub132ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub132ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9a,0x08]
+          vfmsub132ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub213pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xaa,0xca]
+          vfmsub213pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub213pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xaa,0x08]
+          vfmsub213pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub213ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xaa,0xca]
+          vfmsub213ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub213ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xaa,0x08]
+          vfmsub213ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub231pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xba,0xca]
+          vfmsub231pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub231pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xba,0x08]
+          vfmsub231pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfmsub231ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xba,0xca]
+          vfmsub231ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfmsub231ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xba,0x08]
+          vfmsub231ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd132pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9c,0xca]
+          vfnmadd132pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd132pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9c,0x08]
+          vfnmadd132pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd132ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9c,0xca]
+          vfnmadd132ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd132ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9c,0x08]
+          vfnmadd132ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd213pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xac,0xca]
+          vfnmadd213pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd213pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xac,0x08]
+          vfnmadd213pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd213ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xac,0xca]
+          vfnmadd213ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd213ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xac,0x08]
+          vfnmadd213ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd231pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xbc,0xca]
+          vfnmadd231pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd231pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xbc,0x08]
+          vfnmadd231pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmadd231ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xbc,0xca]
+          vfnmadd231ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmadd231ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xbc,0x08]
+          vfnmadd231ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub132pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9e,0xca]
+          vfnmsub132pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub132pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0x9e,0x08]
+          vfnmsub132pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub132ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9e,0xca]
+          vfnmsub132ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub132ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0x9e,0x08]
+          vfnmsub132ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub213pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xae,0xca]
+          vfnmsub213pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub213pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xae,0x08]
+          vfnmsub213pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub213ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xae,0xca]
+          vfnmsub213ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub213ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xae,0x08]
+          vfnmsub213ps  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub231pd  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xbe,0xca]
+          vfnmsub231pd  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub231pd  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0xd5,0xbe,0x08]
+          vfnmsub231pd  (%eax), %ymm5, %ymm1
+
+// CHECK: vfnmsub231ps  %ymm2, %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xbe,0xca]
+          vfnmsub231ps  %ymm2, %ymm5, %ymm1
+
+// CHECK: vfnmsub231ps  (%eax), %ymm5, %ymm1
+// CHECK: encoding: [0xc4,0xe2,0x55,0xbe,0x08]
+          vfnmsub231ps  (%eax), %ymm5, %ymm1
+
diff --git a/final/test/MC/X86/x86-32.s b/final/test/MC/X86/x86-32.s
new file mode 100644
index 00000000000..9dba65298c1
--- /dev/null
+++ b/final/test/MC/X86/x86-32.s
@@ -0,0 +1,826 @@
+// RUN: llvm-mc -triple i386-unknown-unknown --show-encoding %s | FileCheck %s
+
+	pause
+// CHECK: pause
+// CHECK: encoding: [0xf3,0x90]
+	sfence
+// CHECK: sfence
+// CHECK: encoding: [0x0f,0xae,0xf8]
+	lfence
+// CHECK: lfence
+// CHECK: encoding: [0x0f,0xae,0xe8]
+	mfence
+// CHECK: mfence
+// CHECK: encoding: [0x0f,0xae,0xf0]
+	monitor
+// CHECK: monitor
+// CHECK: encoding: [0x0f,0x01,0xc8]
+	monitor %eax, %ecx, %edx
+// CHECK: monitor
+// CHECK: encoding: [0x0f,0x01,0xc8]
+	mwait
+// CHECK: mwait
+// CHECK: encoding: [0x0f,0x01,0xc9]
+	mwait %eax, %ecx
+// CHECK: mwait
+// CHECK: encoding: [0x0f,0x01,0xc9]
+
+	vmcall
+// CHECK: vmcall
+// CHECK: encoding: [0x0f,0x01,0xc1]
+	vmlaunch
+// CHECK: vmlaunch
+// CHECK: encoding: [0x0f,0x01,0xc2]
+	vmresume
+// CHECK: vmresume
+// CHECK: encoding: [0x0f,0x01,0xc3]
+	vmxoff
+// CHECK: vmxoff
+// CHECK: encoding: [0x0f,0x01,0xc4]
+	swapgs
+// CHECK: swapgs
+// CHECK: encoding: [0x0f,0x01,0xf8]
+
+rdtscp
+// CHECK: rdtscp
+// CHECK:  encoding: [0x0f,0x01,0xf9]
+
+
+// CHECK: movl	%eax, 16(%ebp)          # encoding: [0x89,0x45,0x10]
+	movl	%eax, 16(%ebp)
+// CHECK: movl	%eax, -16(%ebp)          # encoding: [0x89,0x45,0xf0]
+	movl	%eax, -16(%ebp)
+
+// CHECK: testb	%bl, %cl                # encoding: [0x84,0xcb]
+        testb %bl, %cl
+
+// CHECK: cmpl	%eax, %ebx              # encoding: [0x39,0xc3]
+        cmpl %eax, %ebx
+
+// CHECK: addw	%ax, %ax                # encoding: [0x66,0x01,0xc0]
+        addw %ax, %ax
+
+// CHECK: shrl	%eax                    # encoding: [0xd1,0xe8]
+        shrl $1, %eax
+
+// CHECK: shll	%eax                    # encoding: [0xd1,0xe0]
+        sall $1, %eax
+// CHECK: shll	%eax                    # encoding: [0xd1,0xe0]
+        sal $1, %eax
+
+// moffset forms of moves, rdar://7947184
+movb	0, %al    // CHECK: movb 0, %al  # encoding: [0xa0,A,A,A,A]
+movw	0, %ax    // CHECK: movw 0, %ax  # encoding: [0x66,0xa1,A,A,A,A]
+movl	0, %eax   // CHECK: movl 0, %eax  # encoding: [0xa1,A,A,A,A]
+
+// rdar://7973775
+into
+// CHECK: into
+// CHECK:  encoding: [0xce]
+int3
+// CHECK: int3
+// CHECK:  encoding: [0xcc]
+int $4
+// CHECK: int $4
+// CHECK:  encoding: [0xcd,0x04]
+int $255
+// CHECK: int $255
+// CHECK:  encoding: [0xcd,0xff]
+
+// CHECK: pushfl	# encoding: [0x9c]
+        pushf
+// CHECK: pushfl	# encoding: [0x9c]
+        pushfl
+// CHECK: popfl	        # encoding: [0x9d]
+        popf
+// CHECK: popfl	        # encoding: [0x9d]
+        popfl
+
+// rdar://8014869
+retl
+// CHECK: ret
+// CHECK:  encoding: [0xc3]
+
+// rdar://7973854
+// CHECK: cmoval	%eax, %edx
+// CHECK:  encoding: [0x0f,0x47,0xd0]
+        	cmoval	%eax,%edx
+
+// CHECK: cmovael	%eax, %edx
+// CHECK:  encoding: [0x0f,0x43,0xd0]
+        	cmovael	%eax,%edx
+
+// CHECK: cmovbel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x46,0xd0]
+        	cmovbel	%eax,%edx
+
+// CHECK: cmovbl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x42,0xd0]
+        	cmovbl	%eax,%edx
+
+// CHECK: cmovbw %bx, %bx
+cmovnae	%bx,%bx
+
+
+// CHECK: cmovbel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x46,0xd0]
+        	cmovbel	%eax,%edx
+
+// CHECK: cmovbl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x42,0xd0]
+        	cmovcl	%eax,%edx
+
+// CHECK: cmovel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x44,0xd0]
+        	cmovel	%eax,%edx
+
+// CHECK: cmovgl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4f,0xd0]
+        	cmovgl	%eax,%edx
+
+// CHECK: cmovgel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4d,0xd0]
+        	cmovgel	%eax,%edx
+
+// CHECK: cmovll	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4c,0xd0]
+        	cmovll	%eax,%edx
+
+// CHECK: cmovlel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4e,0xd0]
+        	cmovlel	%eax,%edx
+
+// CHECK: cmovbel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x46,0xd0]
+        	cmovnal	%eax,%edx
+
+// CHECK: cmovnel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x45,0xd0]
+        	cmovnel	%eax,%edx
+
+// CHECK: cmovael	%eax, %edx
+// CHECK:  encoding: [0x0f,0x43,0xd0]
+        	cmovnbl	%eax,%edx
+
+// CHECK: cmoval	%eax, %edx
+// CHECK:  encoding: [0x0f,0x47,0xd0]
+        	cmovnbel	%eax,%edx
+
+// CHECK: cmovael	%eax, %edx
+// CHECK:  encoding: [0x0f,0x43,0xd0]
+        	cmovncl	%eax,%edx
+
+// CHECK: cmovnel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x45,0xd0]
+        	cmovnel	%eax,%edx
+
+// CHECK: cmovlel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4e,0xd0]
+        	cmovngl	%eax,%edx
+
+// CHECK: cmovgel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4d,0xd0]
+        	cmovnl	%eax,%edx
+
+// CHECK: cmovnel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x45,0xd0]
+        	cmovnel	%eax,%edx
+
+// CHECK: cmovlel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4e,0xd0]
+        	cmovngl	%eax,%edx
+
+// CHECK: cmovll	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4c,0xd0]
+        	cmovngel	%eax,%edx
+
+// CHECK: cmovgel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4d,0xd0]
+        	cmovnll	%eax,%edx
+
+// CHECK: cmovgl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4f,0xd0]
+        	cmovnlel	%eax,%edx
+
+// CHECK: cmovnol	%eax, %edx
+// CHECK:  encoding: [0x0f,0x41,0xd0]
+        	cmovnol	%eax,%edx
+
+// CHECK: cmovnpl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4b,0xd0]
+        	cmovnpl	%eax,%edx
+
+// CHECK: cmovnsl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x49,0xd0]
+        	cmovnsl	%eax,%edx
+
+// CHECK: cmovnel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x45,0xd0]
+        	cmovnzl	%eax,%edx
+
+// CHECK: cmovol	%eax, %edx
+// CHECK:  encoding: [0x0f,0x40,0xd0]
+        	cmovol	%eax,%edx
+
+// CHECK: cmovpl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x4a,0xd0]
+        	cmovpl	%eax,%edx
+
+// CHECK: cmovsl	%eax, %edx
+// CHECK:  encoding: [0x0f,0x48,0xd0]
+        	cmovsl	%eax,%edx
+
+// CHECK: cmovel	%eax, %edx
+// CHECK:  encoding: [0x0f,0x44,0xd0]
+        	cmovzl	%eax,%edx
+
+// CHECK: cmpps	$0, %xmm0, %xmm1
+// CHECK: encoding: [0x0f,0xc2,0xc8,0x00]
+        cmpps $0, %xmm0, %xmm1
+// CHECK:	cmpps	$0, (%eax), %xmm1
+// CHECK: encoding: [0x0f,0xc2,0x08,0x00]
+        cmpps $0, 0(%eax), %xmm1
+// CHECK:	cmppd	$0, %xmm0, %xmm1
+// CHECK: encoding: [0x66,0x0f,0xc2,0xc8,0x00]
+        cmppd $0, %xmm0, %xmm1
+// CHECK:	cmppd	$0, (%eax), %xmm1
+// CHECK: encoding: [0x66,0x0f,0xc2,0x08,0x00]
+        cmppd $0, 0(%eax), %xmm1
+// CHECK:	cmpss	$0, %xmm0, %xmm1
+// CHECK: encoding: [0xf3,0x0f,0xc2,0xc8,0x00]
+        cmpss $0, %xmm0, %xmm1
+// CHECK:	cmpss	$0, (%eax), %xmm1
+// CHECK: encoding: [0xf3,0x0f,0xc2,0x08,0x00]
+        cmpss $0, 0(%eax), %xmm1
+// CHECK:	cmpsd	$0, %xmm0, %xmm1
+// CHECK: encoding: [0xf2,0x0f,0xc2,0xc8,0x00]
+        cmpsd $0, %xmm0, %xmm1
+// CHECK:	cmpsd	$0, (%eax), %xmm1
+// CHECK: encoding: [0xf2,0x0f,0xc2,0x08,0x00]
+        cmpsd $0, 0(%eax), %xmm1
+
+// Check matching of instructions which embed the SSE comparison code.
+
+// CHECK: cmpps $0, %xmm0, %xmm1
+// CHECK: encoding: [0x0f,0xc2,0xc8,0x00]
+        cmpeqps %xmm0, %xmm1
+
+// CHECK: cmppd $1, %xmm0, %xmm1
+// CHECK: encoding: [0x66,0x0f,0xc2,0xc8,0x01]
+        cmpltpd %xmm0, %xmm1
+
+// CHECK: cmpss $2, %xmm0, %xmm1
+// CHECK: encoding: [0xf3,0x0f,0xc2,0xc8,0x02]
+        cmpless %xmm0, %xmm1
+
+// CHECK: cmppd $3, %xmm0, %xmm1
+// CHECK: encoding: [0x66,0x0f,0xc2,0xc8,0x03]
+        cmpunordpd %xmm0, %xmm1
+
+// CHECK: cmpps $4, %xmm0, %xmm1
+// CHECK: encoding: [0x0f,0xc2,0xc8,0x04]
+        cmpneqps %xmm0, %xmm1
+
+// CHECK: cmppd $5, %xmm0, %xmm1
+// CHECK: encoding: [0x66,0x0f,0xc2,0xc8,0x05]
+        cmpnltpd %xmm0, %xmm1
+
+// CHECK: cmpss $6, %xmm0, %xmm1
+// CHECK: encoding: [0xf3,0x0f,0xc2,0xc8,0x06]
+        cmpnless %xmm0, %xmm1
+
+// CHECK: cmpsd $7, %xmm0, %xmm1
+// CHECK: encoding: [0xf2,0x0f,0xc2,0xc8,0x07]
+        cmpordsd %xmm0, %xmm1
+
+// rdar://7995856
+// CHECK: fmul	%st(0)
+// CHECK:  encoding: [0xd8,0xc8]
+        fmul %st(0), %st
+
+// CHECK: fadd	%st(0)
+// CHECK:  encoding: [0xd8,0xc0]
+        fadd %st(0), %st
+
+// CHECK: fsub	%st(0)
+// CHECK:  encoding: [0xd8,0xe0]
+        fsub %st(0), %st
+
+// CHECK: fsubr	%st(0)
+// CHECK:  encoding: [0xd8,0xe8]
+        fsubr %st(0), %st
+
+// CHECK: fdivr	%st(0)
+// CHECK:  encoding: [0xd8,0xf8]
+        fdivr %st(0), %st
+
+// CHECK: fdiv	%st(0)
+// CHECK:  encoding: [0xd8,0xf0]
+        fdiv %st(0), %st
+
+// radr://8017519
+// CHECK: movl	%cs, %eax
+// CHECK:  encoding: [0x8c,0xc8]
+        movl %cs, %eax
+
+// CHECK: movw	%cs, %ax
+// CHECK:  encoding: [0x66,0x8c,0xc8]
+        movw %cs, %ax
+
+// CHECK: movl	%cs, (%eax)
+// CHECK:  encoding: [0x8c,0x08]
+        movl %cs, (%eax)
+
+// CHECK: movw	%cs, (%eax)
+// CHECK:  encoding: [0x66,0x8c,0x08]
+        movw %cs, (%eax)
+
+// CHECK: movl	%eax, %cs
+// CHECK:  encoding: [0x8e,0xc8]
+        movl %eax, %cs
+
+// CHECK: movl	(%eax), %cs
+// CHECK:  encoding: [0x8e,0x08]
+        movl (%eax), %cs
+
+// CHECK: movw	(%eax), %cs
+// CHECK:  encoding: [0x66,0x8e,0x08]
+        movw (%eax), %cs
+
+// radr://8033374
+// CHECK: movl	%cr0, %eax
+// CHECK:  encoding: [0x0f,0x20,0xc0]
+        movl %cr0,%eax
+
+// CHECK: movl	%cr1, %eax
+// CHECK:  encoding: [0x0f,0x20,0xc8]
+        movl %cr1,%eax
+
+// CHECK: movl	%cr2, %eax
+// CHECK:  encoding: [0x0f,0x20,0xd0]
+        movl %cr2,%eax
+
+// CHECK: movl	%cr3, %eax
+// CHECK:  encoding: [0x0f,0x20,0xd8]
+        movl %cr3,%eax
+
+// CHECK: movl	%cr4, %eax
+// CHECK:  encoding: [0x0f,0x20,0xe0]
+        movl %cr4,%eax
+
+// CHECK: movl	%dr0, %eax
+// CHECK:  encoding: [0x0f,0x21,0xc0]
+        movl %dr0,%eax
+
+// CHECK: movl	%dr1, %eax
+// CHECK:  encoding: [0x0f,0x21,0xc8]
+        movl %dr1,%eax
+
+// CHECK: movl	%dr1, %eax
+// CHECK:  encoding: [0x0f,0x21,0xc8]
+        movl %dr1,%eax
+
+// CHECK: movl	%dr2, %eax
+// CHECK:  encoding: [0x0f,0x21,0xd0]
+        movl %dr2,%eax
+
+// CHECK: movl	%dr3, %eax
+// CHECK:  encoding: [0x0f,0x21,0xd8]
+        movl %dr3,%eax
+
+// CHECK: movl	%dr4, %eax
+// CHECK:  encoding: [0x0f,0x21,0xe0]
+        movl %dr4,%eax
+
+// CHECK: movl	%dr5, %eax
+// CHECK:  encoding: [0x0f,0x21,0xe8]
+        movl %dr5,%eax
+
+// CHECK: movl	%dr6, %eax
+// CHECK:  encoding: [0x0f,0x21,0xf0]
+        movl %dr6,%eax
+
+// CHECK: movl	%dr7, %eax
+// CHECK:  encoding: [0x0f,0x21,0xf8]
+        movl %dr7,%eax
+
+// radr://8017522
+// CHECK: wait
+// CHECK:  encoding: [0x9b]
+	fwait
+
+// rdar://7873482
+// CHECK: [0x65,0x8b,0x05,0x7c,0x00,0x00,0x00]
+// FIXME: This is a correct bug poor encoding: Use 65 a1 7c 00 00 00 
+        movl	%gs:124, %eax
+
+// CHECK: pusha
+// CHECK:  encoding: [0x60]
+        	pusha
+
+// CHECK: popa
+// CHECK:  encoding: [0x61]
+        	popa
+
+// CHECK: pushal
+// CHECK:  encoding: [0x60]
+        	pushal
+
+// CHECK: popal
+// CHECK:  encoding: [0x61]
+        	popal
+
+// CHECK: jmpl *8(%eax)
+// CHECK:   encoding: [0xff,0x60,0x08]
+	jmp	*8(%eax)
+
+// PR7465
+// CHECK: lcalll $2, $4660
+// CHECK:   encoding: [0x9a,0x34,0x12,0x00,0x00,0x02,0x00]
+lcalll $0x2, $0x1234
+
+
+// rdar://8061602
+L1:
+  jcxz L1
+// CHECK: jcxz L1
+// CHECK:   encoding: [0x67,0xe3,A]
+  jecxz L1
+// CHECK: jecxz L1
+// CHECK:   encoding: [0xe3,A]
+
+// rdar://8403974
+iret
+// CHECK: iretl
+// CHECK: encoding: [0xcf]
+iretw
+// CHECK: iretw
+// CHECK: encoding: [0x66,0xcf]
+iretl
+// CHECK: iretl
+// CHECK: encoding: [0xcf]
+
+// rdar://8403907
+sysret
+// CHECK: sysretl
+// CHECK: encoding: [0x0f,0x07]
+sysretl
+// CHECK: sysretl
+// CHECK: encoding: [0x0f,0x07]
+
+// rdar://8018260
+testl	%ecx, -24(%ebp)
+// CHECK: testl	-24(%ebp), %ecx
+testl	-24(%ebp), %ecx
+// CHECK: testl	-24(%ebp), %ecx
+
+
+// rdar://8407242
+push %cs
+// CHECK: pushl	%cs
+// CHECK: encoding: [0x0e]
+push %ds
+// CHECK: pushl	%ds
+// CHECK: encoding: [0x1e]
+push %ss
+// CHECK: pushl	%ss
+// CHECK: encoding: [0x16]
+push %es
+// CHECK: pushl	%es
+// CHECK: encoding: [0x06]
+push %fs
+// CHECK: pushl	%fs
+// CHECK: encoding: [0x0f,0xa0]
+push %gs
+// CHECK: pushl	%gs
+// CHECK: encoding: [0x0f,0xa8]
+
+pushw %cs
+// CHECK: pushw	%cs
+// CHECK: encoding: [0x66,0x0e]
+pushw %ds
+// CHECK: pushw	%ds
+// CHECK: encoding: [0x66,0x1e]
+pushw %ss
+// CHECK: pushw	%ss
+// CHECK: encoding: [0x66,0x16]
+pushw %es
+// CHECK: pushw	%es
+// CHECK: encoding: [0x66,0x06]
+pushw %fs
+// CHECK: pushw	%fs
+// CHECK: encoding: [0x66,0x0f,0xa0]
+pushw %gs
+// CHECK: pushw	%gs
+// CHECK: encoding: [0x66,0x0f,0xa8]
+
+pop %ss
+// CHECK: popl	%ss
+// CHECK: encoding: [0x17]
+pop %ds
+// CHECK: popl	%ds
+// CHECK: encoding: [0x1f]
+pop %es
+// CHECK: popl	%es
+// CHECK: encoding: [0x07]
+
+// rdar://8408129
+pushfd
+// CHECK: pushfl
+popfd
+// CHECK: popfl
+pushfl
+// CHECK: pushfl
+popfl
+// CHECK: popfl
+
+
+// rdar://8416805
+	setc	%bl
+	setnae	%bl
+	setnb	%bl
+	setnc	%bl
+	setna	%bl
+	setnbe	%bl
+	setpe	%bl
+	setpo	%bl
+	setnge	%bl
+	setnl	%bl
+	setng	%bl
+	setnle	%bl
+
+// PR8686
+        setneb  %cl // CHECK: setne %cl
+	setcb	%bl // CHECK: setb %bl
+	setnaeb	%bl // CHECK: setb %bl
+
+
+// CHECK: lcalll	$31438, $31438
+// CHECK: lcalll	$31438, $31438
+// CHECK: ljmpl	$31438, $31438
+// CHECK: ljmpl	$31438, $31438
+
+calll	$0x7ace,$0x7ace
+lcalll	$0x7ace,$0x7ace
+jmpl	$0x7ace,$0x7ace
+ljmpl	$0x7ace,$0x7ace
+
+// CHECK: lcalll	$31438, $31438
+// CHECK: lcalll	$31438, $31438
+// CHECK: ljmpl	$31438, $31438
+// CHECK: ljmpl	$31438, $31438
+
+call	$0x7ace,$0x7ace
+lcall	$0x7ace,$0x7ace
+jmp	$0x7ace,$0x7ace
+ljmp	$0x7ace,$0x7ace
+
+// rdar://8456370
+// CHECK: calll a
+ calll a
+
+// CHECK:	incb	%al # encoding: [0xfe,0xc0]
+	incb %al
+
+// CHECK:	incw	%ax # encoding: [0x66,0x40]
+	incw %ax
+
+// CHECK:	incl	%eax # encoding: [0x40]
+	incl %eax
+
+// CHECK:	decb	%al # encoding: [0xfe,0xc8]
+	decb %al
+
+// CHECK:	decw	%ax # encoding: [0x66,0x48]
+	decw %ax
+
+// CHECK:	decl	%eax # encoding: [0x48]
+	decl %eax
+
+// CHECK: pshufw $14, %mm4, %mm0 # encoding: [0x0f,0x70,0xc4,0x0e]
+pshufw $14, %mm4, %mm0
+
+// CHECK: pshufw $90, %mm4, %mm0 # encoding: [0x0f,0x70,0xc4,0x5a]
+// PR8288
+pshufw $90, %mm4, %mm0
+
+// rdar://8416805
+// CHECK: aaa
+// CHECK:  encoding: [0x37]
+        	aaa
+
+// CHECK: aad	$1
+// CHECK:  encoding: [0xd5,0x01]
+        	aad	$1
+
+// CHECK: aad	$10
+// CHECK:  encoding: [0xd5,0x0a]
+        	aad	$0xA
+
+// CHECK: aad	$10
+// CHECK:  encoding: [0xd5,0x0a]
+        	aad
+
+// CHECK: aam	$2
+// CHECK:  encoding: [0xd4,0x02]
+        	aam	$2
+
+// CHECK: aam	$10
+// CHECK:  encoding: [0xd4,0x0a]
+        	aam	$0xA
+
+// CHECK: aam	$10
+// CHECK:  encoding: [0xd4,0x0a]
+        	aam
+
+// CHECK: aas
+// CHECK:  encoding: [0x3f]
+        	aas
+
+// CHECK: daa
+// CHECK:  encoding: [0x27]
+        	daa
+
+// CHECK: das
+// CHECK:  encoding: [0x2f]
+        	das
+
+// CHECK: retw	$31438
+// CHECK:  encoding: [0x66,0xc2,0xce,0x7a]
+        	retw	$0x7ace
+
+// CHECK: lretw	$31438
+// CHECK:  encoding: [0x66,0xca,0xce,0x7a]
+        	lretw	$0x7ace
+
+// CHECK: bound	2(%eax), %bx
+// CHECK:  encoding: [0x66,0x62,0x58,0x02]
+        	bound	2(%eax),%bx
+
+// CHECK: bound	4(%ebx), %ecx
+// CHECK:  encoding: [0x62,0x4b,0x04]
+        	bound	4(%ebx),%ecx
+
+// CHECK: arpl	%bx, %bx
+// CHECK:  encoding: [0x63,0xdb]
+        	arpl	%bx,%bx
+
+// CHECK: arpl	%bx, 6(%ecx)
+// CHECK:  encoding: [0x63,0x59,0x06]
+        	arpl	%bx,6(%ecx)
+
+// CHECK: lgdtw	4(%eax)
+// CHECK:  encoding: [0x66,0x0f,0x01,0x50,0x04]
+        	lgdtw	4(%eax)
+
+// CHECK: lgdt	4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x50,0x04]
+        	lgdt	4(%eax)
+
+// CHECK: lgdt	4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x50,0x04]
+        	lgdtl	4(%eax)
+
+// CHECK: lidtw	4(%eax)
+// CHECK:  encoding: [0x66,0x0f,0x01,0x58,0x04]
+        	lidtw	4(%eax)
+
+// CHECK: lidt	4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x58,0x04]
+        	lidt	4(%eax)
+
+// CHECK: lidt	4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x58,0x04]
+        	lidtl	4(%eax)
+
+// CHECK: sgdtw	4(%eax)
+// CHECK:  encoding: [0x66,0x0f,0x01,0x40,0x04]
+        	sgdtw	4(%eax)
+
+// CHECK: sgdt	4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x40,0x04]
+        	sgdt	4(%eax)
+
+// CHECK: sgdt	4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x40,0x04]
+        	sgdtl	4(%eax)
+
+// CHECK: sidtw	4(%eax)
+// CHECK:  encoding: [0x66,0x0f,0x01,0x48,0x04]
+        	sidtw	4(%eax)
+
+// CHECK: sidt	4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x48,0x04]
+        	sidt	4(%eax)
+
+// CHECK: sidt	4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x48,0x04]
+        	sidtl	4(%eax)
+
+// CHECK: fcompi	%st(2)
+// CHECK:  encoding: [0xdf,0xf2]
+        	fcompi	%st(2), %st
+
+// CHECK: fcompi	%st(2)
+// CHECK:  encoding: [0xdf,0xf2]
+        	fcompi	%st(2)
+
+// CHECK: fcompi	%st(1)
+// CHECK:  encoding: [0xdf,0xf1]
+        	fcompi
+
+// CHECK: fucompi	%st(2)
+// CHECK:  encoding: [0xdf,0xea]
+        	fucompi	%st(2),%st
+
+// CHECK: fucompi	%st(2)
+// CHECK:  encoding: [0xdf,0xea]
+        	fucompi	%st(2)
+
+// CHECK: fucompi	%st(1)
+// CHECK:  encoding: [0xdf,0xe9]
+        	fucompi
+
+// CHECK: fldcw	32493
+// CHECK:  encoding: [0xd9,0x2d,0xed,0x7e,0x00,0x00]
+        	fldcww	0x7eed
+
+// CHECK: fldcw	32493
+// CHECK:  encoding: [0xd9,0x2d,0xed,0x7e,0x00,0x00]
+        	fldcw	0x7eed
+
+// CHECK: fnstcw	32493
+// CHECK:  encoding: [0xd9,0x3d,0xed,0x7e,0x00,0x00]
+        	fnstcww	0x7eed
+
+// CHECK: fnstcw	32493
+// CHECK:  encoding: [0xd9,0x3d,0xed,0x7e,0x00,0x00]
+        	fnstcw	0x7eed
+
+// CHECK: wait
+// CHECK:  encoding: [0x9b]
+        	fstcww	0x7eed
+
+// CHECK: wait
+// CHECK:  encoding: [0x9b]
+        	fstcw	0x7eed
+
+// CHECK: fnstsw	32493
+// CHECK:  encoding: [0xdd,0x3d,0xed,0x7e,0x00,0x00]
+        	fnstsww	0x7eed
+
+// CHECK: fnstsw	32493
+// CHECK:  encoding: [0xdd,0x3d,0xed,0x7e,0x00,0x00]
+        	fnstsw	0x7eed
+
+// CHECK: wait
+// CHECK:  encoding: [0x9b]
+        	fstsww	0x7eed
+
+// CHECK: wait
+// CHECK:  encoding: [0x9b]
+        	fstsw	0x7eed
+
+// CHECK: verr	32493
+// CHECK:  encoding: [0x0f,0x00,0x25,0xed,0x7e,0x00,0x00]
+        	verrw	0x7eed
+
+// CHECK: verr	32493
+// CHECK:  encoding: [0x0f,0x00,0x25,0xed,0x7e,0x00,0x00]
+        	verr	0x7eed
+
+// CHECK: wait
+// CHECK:  encoding: [0x9b]
+        	fclex
+
+// CHECK: fnclex
+// CHECK:  encoding: [0xdb,0xe2]
+        	fnclex
+
+// CHECK: ud2
+// CHECK:  encoding: [0x0f,0x0b]
+        	ud2
+
+// CHECK: ud2
+// CHECK:  encoding: [0x0f,0x0b]
+        	ud2a
+
+// CHECK: ud2b
+// CHECK:  encoding: [0x0f,0xb9]
+        	ud2b
+
+// CHECK: loope 0
+// CHECK: encoding: [0xe1,A]
+	loopz 0
+
+// CHECK: loopne 0
+// CHECK: encoding: [0xe0,A]
+	loopnz 0
+
+// CHECK: strw
+// CHECK: encoding: [0x66,0x0f,0x00,0xc8]
+	str %ax
+
+// CHECK: strl
+// CHECK: encoding: [0x0f,0x00,0xc8]
+	str %eax
diff --git a/final/test/MC/X86/x86-64.s b/final/test/MC/X86/x86-64.s
new file mode 100644
index 00000000000..fe056be012e
--- /dev/null
+++ b/final/test/MC/X86/x86-64.s
@@ -0,0 +1,991 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -show-encoding %s > %t 2> %t.err
+// RUN: FileCheck < %t %s
+// RUN: FileCheck --check-prefix=CHECK-STDERR < %t.err %s
+
+	monitor
+// CHECK: monitor
+// CHECK: encoding: [0x0f,0x01,0xc8]
+	monitor %rax, %rcx, %rdx
+// CHECK: monitor
+// CHECK: encoding: [0x0f,0x01,0xc8]
+	mwait
+// CHECK: mwait
+// CHECK: encoding: [0x0f,0x01,0xc9]
+	mwait %rax, %rcx
+// CHECK: mwait
+// CHECK: encoding: [0x0f,0x01,0xc9]
+
+// Suffix inference:
+
+// CHECK: addl $0, %eax
+        add $0, %eax
+// CHECK: addb $255, %al
+        add $0xFF, %al
+// CHECK: orq %rax, %rdx
+        or %rax, %rdx
+// CHECK: shlq $3, %rax
+        shl $3, %rax
+
+
+// CHECK: subb %al, %al
+        subb %al, %al
+
+// CHECK: addl $24, %eax
+        addl $24, %eax
+
+// CHECK: movl %eax, 10(%ebp)
+        movl %eax, 10(%ebp)
+// CHECK: movl %eax, 10(%ebp,%ebx)
+        movl %eax, 10(%ebp, %ebx)
+// CHECK: movl %eax, 10(%ebp,%ebx,4)
+        movl %eax, 10(%ebp, %ebx, 4)
+// CHECK: movl %eax, 10(,%ebx,4)
+        movl %eax, 10(, %ebx, 4)
+
+// CHECK: movl 0, %eax        
+        movl 0, %eax
+// CHECK: movl $0, %eax        
+        movl $0, %eax
+        
+// CHECK: ret
+        ret
+        
+// FIXME: Check that this matches SUB32ri8
+// CHECK: subl $1, %eax
+        subl $1, %eax
+        
+// FIXME: Check that this matches SUB32ri8
+// CHECK: subl $-1, %eax
+        subl $-1, %eax
+        
+// FIXME: Check that this matches SUB32ri
+// CHECK: subl $256, %eax
+        subl $256, %eax
+
+// FIXME: Check that this matches XOR64ri8
+// CHECK: xorq $1, %rax
+        xorq $1, %rax
+        
+// FIXME: Check that this matches XOR64ri32
+// CHECK: xorq $256, %rax
+        xorq $256, %rax
+
+// FIXME: Check that this matches SUB8rr
+// CHECK: subb %al, %bl
+        subb %al, %bl
+
+// FIXME: Check that this matches SUB16rr
+// CHECK: subw %ax, %bx
+        subw %ax, %bx
+        
+// FIXME: Check that this matches SUB32rr
+// CHECK: subl %eax, %ebx
+        subl %eax, %ebx
+        
+// FIXME: Check that this matches the correct instruction.
+// CHECK: callq *%rax
+        call *%rax
+
+// FIXME: Check that this matches the correct instruction.
+// CHECK: shldl %cl, %eax, %ebx
+        shldl %cl, %eax, %ebx
+
+// CHECK: shll $2, %eax
+        shll $2, %eax
+
+// CHECK: shll $2, %eax
+        sall $2, %eax
+
+// CHECK: rep
+// CHECK: insb
+        rep;insb
+
+// CHECK: rep
+// CHECK: outsb
+        rep;outsb
+
+// CHECK: rep
+// CHECK: movsb
+        rep;movsb
+
+
+// rdar://8470918
+smovb // CHECK: movsb
+smovw // CHECK: movsw
+smovl // CHECK: movsl
+smovq // CHECK: movsq
+
+// rdar://8456361
+// CHECK: rep
+// CHECK: movsl
+        rep movsd
+
+// CHECK: rep
+// CHECK: lodsb
+        rep;lodsb
+
+// CHECK: rep
+// CHECK: stosb
+        rep;stosb
+
+// NOTE: repz and repe have the same opcode as rep
+// CHECK: rep
+// CHECK: cmpsb
+        repz;cmpsb
+
+// NOTE: repnz has the same opcode as repne
+// CHECK: repne
+// CHECK: cmpsb
+        repnz;cmpsb
+
+// NOTE: repe and repz have the same opcode as rep
+// CHECK: rep
+// CHECK: scasb
+        repe;scasb
+
+// CHECK: repne
+// CHECK: scasb
+        repne;scasb
+
+// CHECK: lock
+// CHECK: cmpxchgb %al, (%ebx)
+        lock;cmpxchgb %al, 0(%ebx)
+
+// CHECK: cs
+// CHECK: movb (%eax), %al
+        cs;movb 0(%eax), %al
+
+// CHECK: ss
+// CHECK: movb (%eax), %al
+        ss;movb 0(%eax), %al
+
+// CHECK: ds
+// CHECK: movb (%eax), %al
+        ds;movb 0(%eax), %al
+
+// CHECK: es
+// CHECK: movb (%eax), %al
+        es;movb 0(%eax), %al
+
+// CHECK: fs
+// CHECK: movb (%eax), %al
+        fs;movb 0(%eax), %al
+
+// CHECK: gs
+// CHECK: movb (%eax), %al
+        gs;movb 0(%eax), %al
+
+// CHECK: fadd %st(0)
+// CHECK: fadd %st(1)
+// CHECK: fadd %st(7)
+
+fadd %st(0)
+fadd %st(1)
+fadd %st(7)
+
+// CHECK: leal 0, %eax
+        leal 0, %eax
+
+// rdar://7986634 - Insensitivity on opcodes.
+// CHECK: int3
+INT3
+
+
+// Allow scale factor without index register.
+// CHECK: movaps	%xmm3, (%esi)
+// CHECK-STDERR: warning: scale factor without index register is ignored
+movaps %xmm3, (%esi, 2)
+
+// CHECK: imull $12, %eax, %eax
+imul $12, %eax
+
+// CHECK: imull %ecx, %eax
+imull %ecx, %eax
+
+
+// rdar://8208481
+// CHECK: outb	%al, $161
+outb	%al, $161
+// CHECK: outw	%ax, $128
+outw	%ax, $128
+// CHECK: inb	$161, %al
+inb	$161, %al
+
+// rdar://8017621
+// CHECK: pushq	$1
+push $1
+
+// rdar://8017530
+// CHECK: sldtw	4
+sldt	4
+
+// rdar://8208499
+// CHECK: cmovnew	%bx, %ax
+cmovnz %bx, %ax
+// CHECK: cmovneq	%rbx, %rax
+cmovnzq %rbx, %rax
+
+
+// rdar://8407928
+// CHECK: inb	$127, %al
+// CHECK: inw	%dx, %ax
+// CHECK: outb	%al, $127
+// CHECK: outw	%ax, %dx
+// CHECK: inl	%dx, %eax
+inb	$0x7f
+inw	%dx
+outb	$0x7f
+outw	%dx
+inl	%dx
+
+
+// PR8114
+// CHECK: outb	%al, %dx
+// CHECK: outb	%al, %dx
+// CHECK: outw	%ax, %dx
+// CHECK: outw	%ax, %dx
+// CHECK: outl	%eax, %dx
+// CHECK: outl	%eax, %dx
+
+out	%al, (%dx)
+outb	%al, (%dx)
+out	%ax, (%dx)
+outw	%ax, (%dx)
+out	%eax, (%dx)
+outl	%eax, (%dx)
+
+// CHECK: inb	%dx, %al
+// CHECK: inb	%dx, %al
+// CHECK: inw	%dx, %ax
+// CHECK: inw	%dx, %ax
+// CHECK: inl	%dx, %eax
+// CHECK: inl	%dx, %eax
+
+in	(%dx), %al
+inb	(%dx), %al
+in	(%dx), %ax
+inw	(%dx), %ax
+in	(%dx), %eax
+inl	(%dx), %eax
+
+// rdar://8431422
+
+// CHECK: fxch	%st(1)
+// CHECK: fucom	%st(1)
+// CHECK: fucomp	%st(1)
+// CHECK: faddp	%st(1)
+// CHECK: faddp	%st(0)
+// CHECK: fsubp	%st(1)
+// CHECK: fsubrp	%st(1)
+// CHECK: fmulp	%st(1)
+// CHECK: fdivp	%st(1)
+// CHECK: fdivrp	%st(1)
+
+fxch
+fucom
+fucomp
+faddp
+faddp %st
+fsubp
+fsubrp
+fmulp
+fdivp
+fdivrp
+
+// CHECK: fcomi	%st(1)
+// CHECK: fcomi	%st(2)
+// CHECK: fucomi	%st(1)
+// CHECK: fucomi	%st(2)
+// CHECK: fucomi	%st(2)
+
+fcomi
+fcomi	%st(2)
+fucomi
+fucomi	%st(2)
+fucomi	%st(2), %st
+
+// CHECK: fnstsw %ax
+// CHECK: fnstsw %ax
+// CHECK: fnstsw %ax
+// CHECK: fnstsw %ax
+
+fnstsw
+fnstsw %ax
+fnstsw %eax
+fnstsw %al
+
+// rdar://8431880
+// CHECK: rclb	%bl
+// CHECK: rcll	3735928559(%ebx,%ecx,8)
+// CHECK: rcrl	%ecx
+// CHECK: rcrl	305419896
+rcl	%bl
+rcll	0xdeadbeef(%ebx,%ecx,8)
+rcr	%ecx
+rcrl	0x12345678
+
+rclb	%bl       // CHECK: rclb %bl     # encoding: [0xd0,0xd3]
+rclb	$1, %bl   // CHECK: rclb %bl     # encoding: [0xd0,0xd3]
+rclb	$2, %bl   // CHECK: rclb $2, %bl # encoding: [0xc0,0xd3,0x02]
+
+// rdar://8418316
+// CHECK: shldw	$1, %bx, %bx
+// CHECK: shldw	$1, %bx, %bx
+// CHECK: shrdw	$1, %bx, %bx
+// CHECK: shrdw	$1, %bx, %bx
+
+shld	%bx,%bx
+shld	$1, %bx,%bx
+shrd	%bx,%bx
+shrd	$1, %bx,%bx
+
+// CHECK: sldtl	%ecx
+// CHECK: encoding: [0x0f,0x00,0xc1]
+// CHECK: sldtw	%cx
+// CHECK: encoding: [0x66,0x0f,0x00,0xc1]
+
+sldt	%ecx
+sldt	%cx
+
+// CHECK: lcalll	*3135175374 
+// CHECK: ljmpl	*3135175374
+lcall	*0xbadeface
+ljmp	*0xbadeface
+
+
+// rdar://8444631
+// CHECK: enter	$31438, $0
+// CHECK: encoding: [0xc8,0xce,0x7a,0x00]
+// CHECK: enter	$31438, $1
+// CHECK: encoding: [0xc8,0xce,0x7a,0x01]
+// CHECK: enter	$31438, $127
+// CHECK: encoding: [0xc8,0xce,0x7a,0x7f]
+enter $0x7ace,$0
+enter $0x7ace,$1
+enter $0x7ace,$0x7f
+
+
+// rdar://8456364
+// CHECK: movw	%cs, %ax
+mov %CS, %ax
+
+// rdar://8456391
+fcmovb %st(1), %st(0)   // CHECK: fcmovb	%st(1), %st(0)
+fcmove %st(1), %st(0)   // CHECK: fcmove	%st(1), %st(0)
+fcmovbe %st(1), %st(0)  // CHECK: fcmovbe	%st(1), %st(0)
+fcmovu %st(1), %st(0)   // CHECK: fcmovu	 %st(1), %st(0)
+
+fcmovnb %st(1), %st(0)  // CHECK: fcmovnb	%st(1), %st(0)
+fcmovne %st(1), %st(0)  // CHECK: fcmovne	%st(1), %st(0)
+fcmovnbe %st(1), %st(0) // CHECK: fcmovnbe	%st(1), %st(0)
+fcmovnu %st(1), %st(0)  // CHECK: fcmovnu	%st(1), %st(0)
+
+fcmovnae %st(1), %st(0) // CHECK: fcmovb	%st(1), %st(0)
+fcmovna %st(1), %st(0)  // CHECK: fcmovbe	%st(1), %st(0)
+
+fcmovae %st(1), %st(0)  // CHECK: fcmovnb	%st(1), %st(0)
+fcmova %st(1), %st(0)   // CHECK: fcmovnbe	%st(1), %st(0)
+
+// rdar://8456417
+.byte 88 + 1 & 15  // CHECK: .byte	9
+
+// rdar://8456412
+mov %rdx, %cr0
+// CHECK: movq	%rdx, %cr0
+// CHECK: encoding: [0x0f,0x22,0xc2]
+mov %rdx, %cr4
+// CHECK: movq	%rdx, %cr4
+// CHECK: encoding: [0x0f,0x22,0xe2]
+mov %rdx, %cr8
+// CHECK: movq	%rdx, %cr8
+// CHECK: encoding: [0x44,0x0f,0x22,0xc2]
+mov %rdx, %cr15
+// CHECK: movq	%rdx, %cr15
+// CHECK: encoding: [0x44,0x0f,0x22,0xfa]
+
+// rdar://8456371 - Handle commutable instructions written backward.
+// CHECK: 	faddp	%st(1)
+// CHECK:	fmulp	%st(2)
+faddp %st, %st(1)
+fmulp %st, %st(2)
+
+// rdar://8468087 - Encode these accurately, they are not synonyms.
+// CHECK: fmul	%st(0), %st(1)
+// CHECK: encoding: [0xdc,0xc9]
+// CHECK: fmul	%st(1)
+// CHECK: encoding: [0xd8,0xc9]
+fmul %st, %st(1)
+fmul %st(1), %st
+
+// CHECK: fadd	%st(0), %st(1)
+// CHECK: encoding: [0xdc,0xc1]
+// CHECK: fadd	%st(1)
+// CHECK: encoding: [0xd8,0xc1]
+fadd %st, %st(1)
+fadd %st(1), %st
+
+
+// rdar://8416805
+// CHECK: xorb	%al, %al
+// CHECK: encoding: [0x30,0xc0]
+// CHECK: xorw	%di, %di
+// CHECK: encoding: [0x66,0x31,0xff]
+// CHECK: xorl	%esi, %esi
+// CHECK: encoding: [0x31,0xf6]
+// CHECK: xorq	%rsi, %rsi
+// CHECK: encoding: [0x48,0x31,0xf6]
+clrb    %al
+clr    %di
+clr    %esi
+clr    %rsi
+
+// rdar://8456378
+cltq  // CHECK: cltq
+cdqe  // CHECK: cltq
+cwde  // CHECK: cwtl
+cwtl  // CHECK: cwtl
+
+// rdar://8416805
+cbw   // CHECK: cbtw
+cwd   // CHECK: cwtd
+cdq   // CHECK: cltd
+
+// rdar://8456378 and PR7557 - fstsw
+fstsw %ax
+// CHECK: wait
+// CHECK: fnstsw %ax
+fstsw (%rax)
+// CHECK: wait
+// CHECK: fnstsw (%rax)
+
+// PR8259
+fstcw (%rsp)
+// CHECK: wait
+// CHECK: fnstcw (%rsp)
+
+// PR8259
+fstcw (%rsp)
+// CHECK: wait
+// CHECK: fnstcw (%rsp)
+
+// PR8258
+finit
+// CHECK: wait
+// CHECK: fninit
+
+fsave	32493
+// CHECK: wait
+// CHECK: fnsave 32493
+
+
+// rdar://8456382 - cvtsd2si support.
+cvtsd2si	%xmm1, %rax
+// CHECK: cvtsd2siq	%xmm1, %rax
+// CHECK: encoding: [0xf2,0x48,0x0f,0x2d,0xc1]
+cvtsd2si	%xmm1, %eax
+// CHECK: cvtsd2sil	%xmm1, %eax
+// CHECK: encoding: [0xf2,0x0f,0x2d,0xc1]
+
+cvtsd2siq %xmm0, %rax // CHECK: cvtsd2siq	%xmm0, %rax
+cvtsd2sil %xmm0, %eax // CHECK: cvtsd2sil	%xmm0, %eax
+cvtsd2si %xmm0, %rax  // CHECK: cvtsd2siq	%xmm0, %rax
+
+
+cvttpd2dq %xmm1, %xmm0  // CHECK: cvttpd2dq %xmm1, %xmm0
+cvttpd2dq (%rax), %xmm0 // CHECK: cvttpd2dq (%rax), %xmm0
+
+cvttps2dq %xmm1, %xmm0  // CHECK: cvttps2dq %xmm1, %xmm0
+cvttps2dq (%rax), %xmm0 // CHECK: cvttps2dq (%rax), %xmm0
+
+// rdar://8456376 - llvm-mc rejects 'roundss'
+roundss $0xE, %xmm0, %xmm0 // CHECK: encoding: [0x66,0x0f,0x3a,0x0a,0xc0,0x0e]
+roundps $0xE, %xmm0, %xmm0 // CHECK: encoding: [0x66,0x0f,0x3a,0x08,0xc0,0x0e]
+roundsd $0xE, %xmm0, %xmm0 // CHECK: encoding: [0x66,0x0f,0x3a,0x0b,0xc0,0x0e]
+roundpd $0xE, %xmm0, %xmm0 // CHECK: encoding: [0x66,0x0f,0x3a,0x09,0xc0,0x0e]
+
+
+// rdar://8482675 - 32-bit mem operand support in 64-bit mode (0x67 prefix)
+leal	8(%eax), %esi
+// CHECK: leal	8(%eax), %esi
+// CHECK: encoding: [0x67,0x8d,0x70,0x08]
+leaq	8(%eax), %rsi
+// CHECK: leaq	8(%eax), %rsi
+// CHECK: encoding: [0x67,0x48,0x8d,0x70,0x08]
+leaq	8(%rax), %rsi
+// CHECK: leaq	8(%rax), %rsi
+// CHECK: encoding: [0x48,0x8d,0x70,0x08]
+
+
+cvttpd2dq	0xdeadbeef(%ebx,%ecx,8),%xmm5
+// CHECK: cvttpd2dq	3735928559(%ebx,%ecx,8), %xmm5
+// CHECK: encoding: [0x67,0x66,0x0f,0xe6,0xac,0xcb,0xef,0xbe,0xad,0xde]
+
+// rdar://8490728 - llvm-mc rejects 'movmskpd'
+movmskpd	%xmm6, %rax
+// CHECK: movmskpd	%xmm6, %rax
+// CHECK: encoding: [0x66,0x48,0x0f,0x50,0xc6]
+movmskpd	%xmm6, %eax
+// CHECK: movmskpd	%xmm6, %eax
+// CHECK: encoding: [0x66,0x0f,0x50,0xc6]
+
+// rdar://8491845 - Gas supports commuted forms of non-commutable instructions.
+fdivrp %st(0), %st(1) // CHECK: encoding: [0xde,0xf9]
+fdivrp %st(1), %st(0) // CHECK: encoding: [0xde,0xf9]
+
+fsubrp %ST(0), %ST(1) // CHECK: encoding: [0xde,0xe9]
+fsubrp %ST(1), %ST(0) // CHECK: encoding: [0xde,0xe9]
+
+// also PR8861
+fdivp %st(0), %st(1) // CHECK: encoding: [0xde,0xf1]
+fdivp %st(1), %st(0) // CHECK: encoding: [0xde,0xf1]
+
+
+movl	foo(%rip), %eax
+// CHECK: movl	foo(%rip), %eax
+// CHECK: encoding: [0x8b,0x05,A,A,A,A]
+// CHECK: fixup A - offset: 2, value: foo-4, kind: reloc_riprel_4byte
+
+movb	$12, foo(%rip)
+// CHECK: movb	$12, foo(%rip)
+// CHECK: encoding: [0xc6,0x05,A,A,A,A,0x0c]
+// CHECK:    fixup A - offset: 2, value: foo-5, kind: reloc_riprel_4byte
+
+movw	$12, foo(%rip)
+// CHECK: movw	$12, foo(%rip)
+// CHECK: encoding: [0x66,0xc7,0x05,A,A,A,A,0x0c,0x00]
+// CHECK:    fixup A - offset: 3, value: foo-6, kind: reloc_riprel_4byte
+
+movl	$12, foo(%rip)
+// CHECK: movl	$12, foo(%rip)
+// CHECK: encoding: [0xc7,0x05,A,A,A,A,0x0c,0x00,0x00,0x00]
+// CHECK:    fixup A - offset: 2, value: foo-8, kind: reloc_riprel_4byte
+
+movq	$12, foo(%rip)
+// CHECK:  movq	$12, foo(%rip)
+// CHECK: encoding: [0x48,0xc7,0x05,A,A,A,A,0x0c,0x00,0x00,0x00]
+// CHECK:    fixup A - offset: 3, value: foo-8, kind: reloc_riprel_4byte
+
+// CHECK: addq	$-424, %rax
+// CHECK: encoding: [0x48,0x05,0x58,0xfe,0xff,0xff]
+addq $-424, %rax
+
+
+// CHECK: movq	_foo@GOTPCREL(%rip), %rax
+// CHECK:  encoding: [0x48,0x8b,0x05,A,A,A,A]
+// CHECK:  fixup A - offset: 3, value: _foo@GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
+movq _foo@GOTPCREL(%rip), %rax
+
+// CHECK: movq	_foo@GOTPCREL(%rip), %r14
+// CHECK:  encoding: [0x4c,0x8b,0x35,A,A,A,A]
+// CHECK:  fixup A - offset: 3, value: _foo@GOTPCREL-4, kind: reloc_riprel_4byte_movq_load
+movq _foo@GOTPCREL(%rip), %r14
+
+
+// CHECK: movq	(%r13,%rax,8), %r13
+// CHECK:  encoding: [0x4d,0x8b,0x6c,0xc5,0x00]
+movq 0x00(%r13,%rax,8),%r13
+
+// CHECK: testq	%rax, %rbx
+// CHECK:  encoding: [0x48,0x85,0xd8]
+testq %rax, %rbx
+
+// CHECK: cmpq	%rbx, %r14
+// CHECK:   encoding: [0x49,0x39,0xde]
+        cmpq %rbx, %r14
+
+// rdar://7947167
+
+movsq
+// CHECK: movsq
+// CHECK:   encoding: [0x48,0xa5]
+
+movsl
+// CHECK: movsl
+// CHECK:   encoding: [0xa5]
+
+stosq
+// CHECK: stosq
+// CHECK:   encoding: [0x48,0xab]
+stosl
+// CHECK: stosl
+// CHECK:   encoding: [0xab]
+
+
+// Not moffset forms of moves, they are x86-32 only! rdar://7947184
+movb	0, %al    // CHECK: movb 0, %al # encoding: [0x8a,0x04,0x25,0x00,0x00,0x00,0x00]
+movw	0, %ax    // CHECK: movw 0, %ax # encoding: [0x66,0x8b,0x04,0x25,0x00,0x00,0x00,0x00]
+movl	0, %eax   // CHECK: movl 0, %eax # encoding: [0x8b,0x04,0x25,0x00,0x00,0x00,0x00]
+
+// CHECK: pushfq	# encoding: [0x9c]
+        pushf
+// CHECK: pushfq	# encoding: [0x9c]
+        pushfq
+// CHECK: popfq	        # encoding: [0x9d]
+        popf
+// CHECK: popfq	        # encoding: [0x9d]
+        popfq
+
+// CHECK: movabsq $-281474976710654, %rax
+// CHECK: encoding: [0x48,0xb8,0x02,0x00,0x00,0x00,0x00,0x00,0xff,0xff]
+        movabsq $0xFFFF000000000002, %rax
+
+// CHECK: movabsq $-281474976710654, %rax
+// CHECK: encoding: [0x48,0xb8,0x02,0x00,0x00,0x00,0x00,0x00,0xff,0xff]
+        movq $0xFFFF000000000002, %rax
+
+// CHECK: movq $-65536, %rax
+// CHECK: encoding: [0x48,0xc7,0xc0,0x00,0x00,0xff,0xff]
+        movq $0xFFFFFFFFFFFF0000, %rax
+
+// CHECK: movq $-256, %rax
+// CHECK: encoding: [0x48,0xc7,0xc0,0x00,0xff,0xff,0xff]
+        movq $0xFFFFFFFFFFFFFF00, %rax
+
+// CHECK: movq $10, %rax
+// CHECK: encoding: [0x48,0xc7,0xc0,0x0a,0x00,0x00,0x00]
+        movq $10, %rax
+
+// rdar://8014869
+//
+// CHECK: ret
+// CHECK:  encoding: [0xc3]
+        retq
+
+// CHECK: sete %al
+// CHECK: encoding: [0x0f,0x94,0xc0]
+        setz %al
+
+// CHECK: setne %al
+// CHECK: encoding: [0x0f,0x95,0xc0]
+        setnz %al
+
+// CHECK: je 0
+// CHECK: encoding: [0x74,A]
+        jz 0
+
+// CHECK: jne
+// CHECK: encoding: [0x75,A]
+        jnz 0
+
+// PR9264
+btl	$1, 0 // CHECK: btl $1, 0 # encoding: [0x0f,0xba,0x24,0x25,0x00,0x00,0x00,0x00,0x01]
+bt	$1, 0 // CHECK: btl $1, 0 # encoding: [0x0f,0xba,0x24,0x25,0x00,0x00,0x00,0x00,0x01]
+
+// rdar://8017515
+btq $0x01,%rdx
+// CHECK: btq	$1, %rdx
+// CHECK:  encoding: [0x48,0x0f,0xba,0xe2,0x01]
+
+//rdar://8017633
+// CHECK: movzbl	%al, %esi
+// CHECK:  encoding: [0x0f,0xb6,0xf0]
+        movzx %al, %esi
+
+// CHECK: movzbq	%al, %rsi
+// CHECK:  encoding: [0x48,0x0f,0xb6,0xf0]
+        movzx %al, %rsi
+
+// CHECK: movsbw	%al, %ax
+// CHECK: encoding: [0x66,0x0f,0xbe,0xc0]
+movsx %al, %ax
+
+// CHECK: movsbl	%al, %eax
+// CHECK: encoding: [0x0f,0xbe,0xc0]
+movsx %al, %eax
+
+// CHECK: movswl	%ax, %eax
+// CHECK: encoding: [0x0f,0xbf,0xc0]
+movsx %ax, %eax
+
+// CHECK: movsbq	%bl, %rax
+// CHECK: encoding: [0x48,0x0f,0xbe,0xc3]
+movsx %bl, %rax
+
+// CHECK: movswq %cx, %rax
+// CHECK: encoding: [0x48,0x0f,0xbf,0xc1]
+movsx %cx, %rax
+
+// CHECK: movslq	%edi, %rax
+// CHECK: encoding: [0x48,0x63,0xc7]
+movsx %edi, %rax
+
+// CHECK: movzbw	%al, %ax
+// CHECK: encoding: [0x66,0x0f,0xb6,0xc0]
+movzx %al, %ax
+
+// CHECK: movzbl	%al, %eax
+// CHECK: encoding: [0x0f,0xb6,0xc0]
+movzx %al, %eax
+
+// CHECK: movzwl	%ax, %eax
+// CHECK: encoding: [0x0f,0xb7,0xc0]
+movzx %ax, %eax
+
+// CHECK: movzbq	%bl, %rax
+// CHECK: encoding: [0x48,0x0f,0xb6,0xc3]
+movzx %bl, %rax
+
+// CHECK: movzwq	%cx, %rax
+// CHECK: encoding: [0x48,0x0f,0xb7,0xc1]
+movzx %cx, %rax
+
+// CHECK: movsbw	(%rax), %ax
+// CHECK: encoding: [0x66,0x0f,0xbe,0x00]
+movsx (%rax), %ax
+
+// CHECK: movzbw	(%rax), %ax
+// CHECK: encoding: [0x66,0x0f,0xb6,0x00]
+movzx (%rax), %ax
+
+
+// rdar://7873482
+// CHECK: [0x65,0x8b,0x04,0x25,0x7c,0x00,0x00,0x00]
+        movl	%gs:124, %eax
+
+// CHECK: jmpq *8(%rax)
+// CHECK:   encoding: [0xff,0x60,0x08]
+	jmp	*8(%rax)
+
+// CHECK: btq $61, -216(%rbp)
+// CHECK:   encoding: [0x48,0x0f,0xba,0xa5,0x28,0xff,0xff,0xff,0x3d]
+	btq	$61, -216(%rbp)
+
+
+// rdar://8061602
+L1:
+  jecxz L1
+// CHECK: jecxz L1
+// CHECK:   encoding: [0x67,0xe3,A]
+  jrcxz L1
+// CHECK: jrcxz L1
+// CHECK:   encoding: [0xe3,A]
+
+// PR8061
+xchgl   368(%rax),%ecx
+// CHECK: xchgl	%ecx, 368(%rax)
+xchgl   %ecx, 368(%rax)
+// CHECK: xchgl	%ecx, 368(%rax)
+
+// rdar://8407548
+xchg	0xdeadbeef(%rbx,%rcx,8),%bl
+// CHECK: xchgb	%bl, 3735928559(%rbx,%rcx,8)
+
+
+
+// PR7254
+lock  incl 1(%rsp)
+// CHECK: lock
+// CHECK: incl 1(%rsp)
+
+// rdar://8741045
+lock/incl 1(%rsp)
+// CHECK: lock
+// CHECK: incl 1(%rsp)
+
+// rdar://8033482
+rep movsl
+// CHECK: rep
+// CHECK: encoding: [0xf3]
+// CHECK: movsl
+// CHECK: encoding: [0xa5]
+
+
+// rdar://8403974
+iret
+// CHECK: iretl
+// CHECK: encoding: [0xcf]
+iretw
+// CHECK: iretw
+// CHECK: encoding: [0x66,0xcf]
+iretl
+// CHECK: iretl
+// CHECK: encoding: [0xcf]
+iretq
+// CHECK: iretq
+// CHECK: encoding: [0x48,0xcf]
+
+// rdar://8416805
+// CHECK: retw	$31438
+// CHECK:  encoding: [0x66,0xc2,0xce,0x7a]
+        	retw	$0x7ace
+
+// CHECK: lretw	$31438
+// CHECK:  encoding: [0x66,0xca,0xce,0x7a]
+        	lretw	$0x7ace
+
+// PR8592
+lretq  // CHECK: lretq # encoding: [0x48,0xcb]
+lretl  // CHECK: lretl # encoding: [0xcb]
+lret   // CHECK: lretl # encoding: [0xcb]
+
+// rdar://8403907
+sysret
+// CHECK: sysretl
+// CHECK: encoding: [0x0f,0x07]
+sysretl
+// CHECK: sysretl
+// CHECK: encoding: [0x0f,0x07]
+sysretq
+// CHECK: sysretq
+// CHECK: encoding: [0x48,0x0f,0x07]
+
+// rdar://8407242
+push %fs
+// CHECK: pushq	%fs
+// CHECK: encoding: [0x0f,0xa0]
+push %gs
+// CHECK: pushq	%gs
+// CHECK: encoding: [0x0f,0xa8]
+
+pushw %fs
+// CHECK: pushw	%fs
+// CHECK: encoding: [0x66,0x0f,0xa0]
+pushw %gs
+// CHECK: pushw	%gs
+// CHECK: encoding: [0x66,0x0f,0xa8]
+
+
+pop %fs
+// CHECK: popq	%fs
+// CHECK: encoding: [0x0f,0xa1]
+pop %gs
+// CHECK: popq	%gs
+// CHECK: encoding: [0x0f,0xa9]
+
+popw %fs
+// CHECK: popw	%fs
+// CHECK: encoding: [0x66,0x0f,0xa1]
+popw %gs
+// CHECK: popw	%gs
+// CHECK: encoding: [0x66,0x0f,0xa9]
+
+// rdar://8438816
+fildq -8(%rsp)
+fildll -8(%rsp)
+// CHECK: fildll	-8(%rsp)
+// CHECK: encoding: [0xdf,0x6c,0x24,0xf8]
+// CHECK: fildll	-8(%rsp)
+// CHECK: encoding: [0xdf,0x6c,0x24,0xf8]
+
+// CHECK: callq a
+        callq a
+
+// CHECK: leaq	-40(%rbp), %r15
+	leaq	-40(%rbp), %r15
+
+
+
+// rdar://8013734 - Alias dr6=db6
+mov %dr6, %rax
+mov %db6, %rax
+// CHECK: movq	%dr6, %rax
+// CHECK: movq	%dr6, %rax
+
+
+// INC/DEC encodings.
+incb %al  // CHECK:	incb	%al # encoding: [0xfe,0xc0]
+incw %ax  // CHECK:	incw	%ax # encoding: [0x66,0xff,0xc0]
+incl %eax // CHECK:	incl	%eax # encoding: [0xff,0xc0]
+decb %al  // CHECK:	decb	%al # encoding: [0xfe,0xc8]
+decw %ax  // CHECK:	decw	%ax # encoding: [0x66,0xff,0xc8]
+decl %eax // CHECK:	decl	%eax # encoding: [0xff,0xc8]
+
+// rdar://8416805
+// CHECK: lgdt	4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x50,0x04]
+        	lgdt	4(%rax)
+
+// CHECK: lgdt	4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x50,0x04]
+        	lgdtq	4(%rax)
+
+// CHECK: lidt	4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x58,0x04]
+        	lidt	4(%rax)
+
+// CHECK: lidt	4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x58,0x04]
+        	lidtq	4(%rax)
+
+// CHECK: sgdt	4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x40,0x04]
+        	sgdt	4(%rax)
+
+// CHECK: sgdt	4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x40,0x04]
+        	sgdtq	4(%rax)
+
+// CHECK: sidt	4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x48,0x04]
+        	sidt	4(%rax)
+
+// CHECK: sidt	4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x48,0x04]
+        	sidtq	4(%rax)
+
+
+// rdar://8208615
+mov (%rsi), %gs  // CHECK: movl	(%rsi), %gs # encoding: [0x8e,0x2e]
+mov %gs, (%rsi)  // CHECK: movl	%gs, (%rsi) # encoding: [0x8c,0x2e]
+
+
+// rdar://8431864
+	div	%bl,%al
+	div	%bx,%ax
+	div	%ecx,%eax
+	div	0xdeadbeef(%ebx,%ecx,8),%eax
+	div	0x45,%eax
+	div	0x7eed,%eax
+	div	0xbabecafe,%eax
+	div	0x12345678,%eax
+	idiv	%bl,%al
+	idiv	%bx,%ax
+	idiv	%ecx,%eax
+	idiv	0xdeadbeef(%ebx,%ecx,8),%eax
+	idiv	0x45,%eax
+	idiv	0x7eed,%eax
+	idiv	0xbabecafe,%eax
+	idiv	0x12345678,%eax
+
+// PR8524
+movd	%rax, %mm5 // CHECK: movd %rax, %mm5 # encoding: [0x48,0x0f,0x6e,0xe8]
+movd	%mm5, %rbx // CHECK: movd %mm5, %rbx # encoding: [0x48,0x0f,0x7e,0xeb]
+movq	%rax, %mm5 // CHECK: movd %rax, %mm5 # encoding: [0x48,0x0f,0x6e,0xe8]
+movq	%mm5, %rbx // CHECK: movd %mm5, %rbx # encoding: [0x48,0x0f,0x7e,0xeb]
+
+rex64 // CHECK: rex64 # encoding: [0x48]
+data16 // CHECK: data16 # encoding: [0x66]
+
+// PR8855
+movq 18446744073709551615,%rbx   // CHECK: movq	-1, %rbx
+
+// PR8946
+movdqu	%xmm0, %xmm1 // CHECK: movdqu	%xmm0, %xmm1 # encoding: [0xf3,0x0f,0x6f,0xc8]
+
+// PR8935
+xgetbv // CHECK: xgetbv # encoding: [0x0f,0x01,0xd0]
+xsetbv // CHECK: xsetbv # encoding: [0x0f,0x01,0xd1]
+
+// CHECK: loope 0
+// CHECK: encoding: [0xe1,A]
+	loopz 0
+
+// CHECK: loopne 0
+// CHECK: encoding: [0xe0,A]
+	loopnz 0
+
+// CHECK: strw
+// CHECK: encoding: [0x66,0x0f,0x00,0xc8]
+	str %ax
+
+// CHECK: strl
+// CHECK: encoding: [0x0f,0x00,0xc8]
+	str %eax
+
+// CHECK: strw
+// CHECK: encoding: [0x66,0x0f,0x00,0xc8]
+	str %ax
+
+// CHECK: strq
+// CHECK: encoding: [0x48,0x0f,0x00,0xc8]
+	str %rax
diff --git a/final/test/MC/X86/x86_64-avx-clmul-encoding.s b/final/test/MC/X86/x86_64-avx-clmul-encoding.s
new file mode 100644
index 00000000000..67e82c6cd0d
--- /dev/null
+++ b/final/test/MC/X86/x86_64-avx-clmul-encoding.s
@@ -0,0 +1,42 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vpclmulqdq  $17, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x11]
+          vpclmulhqhqdq %xmm12, %xmm10, %xmm11
+
+// CHECK: vpclmulqdq  $17, (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x11]
+          vpclmulhqhqdq (%rax), %xmm10, %xmm13
+
+// CHECK: vpclmulqdq  $1, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x01]
+          vpclmulhqlqdq %xmm12, %xmm10, %xmm11
+
+// CHECK: vpclmulqdq  $1, (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x01]
+          vpclmulhqlqdq (%rax), %xmm10, %xmm13
+
+// CHECK: vpclmulqdq  $16, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x10]
+          vpclmullqhqdq %xmm12, %xmm10, %xmm11
+
+// CHECK: vpclmulqdq  $16, (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x10]
+          vpclmullqhqdq (%rax), %xmm10, %xmm13
+
+// CHECK: vpclmulqdq  $0, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x00]
+          vpclmullqlqdq %xmm12, %xmm10, %xmm11
+
+// CHECK: vpclmulqdq  $0, (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x00]
+          vpclmullqlqdq (%rax), %xmm10, %xmm13
+
+// CHECK: vpclmulqdq  $17, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x44,0xdc,0x11]
+          vpclmulqdq  $17, %xmm12, %xmm10, %xmm11
+
+// CHECK: vpclmulqdq  $17, (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x29,0x44,0x28,0x11]
+          vpclmulqdq  $17, (%rax), %xmm10, %xmm13
+
diff --git a/final/test/MC/X86/x86_64-avx-encoding.s b/final/test/MC/X86/x86_64-avx-encoding.s
new file mode 100644
index 00000000000..7a96bb5a2b4
--- /dev/null
+++ b/final/test/MC/X86/x86_64-avx-encoding.s
@@ -0,0 +1,3318 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vaddss  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x32,0x58,0xd0]
+vaddss  %xmm8, %xmm9, %xmm10
+
+// CHECK: vmulss  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x32,0x59,0xd0]
+vmulss  %xmm8, %xmm9, %xmm10
+
+// CHECK: vsubss  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x32,0x5c,0xd0]
+vsubss  %xmm8, %xmm9, %xmm10
+
+// CHECK: vdivss  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x32,0x5e,0xd0]
+vdivss  %xmm8, %xmm9, %xmm10
+
+// CHECK: vaddsd  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x33,0x58,0xd0]
+vaddsd  %xmm8, %xmm9, %xmm10
+
+// CHECK: vmulsd  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x33,0x59,0xd0]
+vmulsd  %xmm8, %xmm9, %xmm10
+
+// CHECK: vsubsd  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x33,0x5c,0xd0]
+vsubsd  %xmm8, %xmm9, %xmm10
+
+// CHECK: vdivsd  %xmm8, %xmm9, %xmm10
+// CHECK:  encoding: [0xc4,0x41,0x33,0x5e,0xd0]
+vdivsd  %xmm8, %xmm9, %xmm10
+
+// CHECK:   vaddss  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK:   encoding: [0xc5,0x2a,0x58,0x5c,0xd9,0xfc]
+vaddss  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK:   vsubss  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK:   encoding: [0xc5,0x2a,0x5c,0x5c,0xd9,0xfc]
+vsubss  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK:   vmulss  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK:   encoding: [0xc5,0x2a,0x59,0x5c,0xd9,0xfc]
+vmulss  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK:   vdivss  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK:   encoding: [0xc5,0x2a,0x5e,0x5c,0xd9,0xfc]
+vdivss  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK:   vaddsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK:   encoding: [0xc5,0x2b,0x58,0x5c,0xd9,0xfc]
+vaddsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK:   vsubsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK:   encoding: [0xc5,0x2b,0x5c,0x5c,0xd9,0xfc]
+vsubsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK:   vmulsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK:   encoding: [0xc5,0x2b,0x59,0x5c,0xd9,0xfc]
+vmulsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK:   vdivsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK:   encoding: [0xc5,0x2b,0x5e,0x5c,0xd9,0xfc]
+vdivsd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vaddps  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x20,0x58,0xfa]
+vaddps  %xmm10, %xmm11, %xmm15
+
+// CHECK: vsubps  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x20,0x5c,0xfa]
+vsubps  %xmm10, %xmm11, %xmm15
+
+// CHECK: vmulps  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x20,0x59,0xfa]
+vmulps  %xmm10, %xmm11, %xmm15
+
+// CHECK: vdivps  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x20,0x5e,0xfa]
+vdivps  %xmm10, %xmm11, %xmm15
+
+// CHECK: vaddpd  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x21,0x58,0xfa]
+vaddpd  %xmm10, %xmm11, %xmm15
+
+// CHECK: vsubpd  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x21,0x5c,0xfa]
+vsubpd  %xmm10, %xmm11, %xmm15
+
+// CHECK: vmulpd  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x21,0x59,0xfa]
+vmulpd  %xmm10, %xmm11, %xmm15
+
+// CHECK: vdivpd  %xmm10, %xmm11, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x21,0x5e,0xfa]
+vdivpd  %xmm10, %xmm11, %xmm15
+
+// CHECK: vaddps  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x28,0x58,0x5c,0xd9,0xfc]
+vaddps  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vsubps  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x28,0x5c,0x5c,0xd9,0xfc]
+vsubps  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vmulps  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x28,0x59,0x5c,0xd9,0xfc]
+vmulps  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vdivps  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x28,0x5e,0x5c,0xd9,0xfc]
+vdivps  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vaddpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x29,0x58,0x5c,0xd9,0xfc]
+vaddpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vsubpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x29,0x5c,0x5c,0xd9,0xfc]
+vsubpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vmulpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x29,0x59,0x5c,0xd9,0xfc]
+vmulpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+// CHECK: encoding: [0xc5,0x29,0x5e,0x5c,0xd9,0xfc]
+vdivpd  -4(%rcx,%rbx,8), %xmm10, %xmm11
+
+// CHECK: vmaxss  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x0a,0x5f,0xe2]
+          vmaxss  %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxsd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x0b,0x5f,0xe2]
+          vmaxsd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vminss  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x0a,0x5d,0xe2]
+          vminss  %xmm10, %xmm14, %xmm12
+
+// CHECK: vminsd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x0b,0x5d,0xe2]
+          vminsd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxss  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x5f,0x54,0xcb,0xfc]
+          vmaxss  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmaxsd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1b,0x5f,0x54,0xcb,0xfc]
+          vmaxsd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminss  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x5d,0x54,0xcb,0xfc]
+          vminss  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminsd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1b,0x5d,0x54,0xcb,0xfc]
+          vminsd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmaxps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x5f,0xe2]
+          vmaxps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x5f,0xe2]
+          vmaxpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vminps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x5d,0xe2]
+          vminps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vminpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x5d,0xe2]
+          vminpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vmaxps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x5f,0x54,0xcb,0xfc]
+          vmaxps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmaxpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x5f,0x54,0xcb,0xfc]
+          vmaxpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x5d,0x54,0xcb,0xfc]
+          vminps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vminpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x5d,0x54,0xcb,0xfc]
+          vminpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x54,0xe2]
+          vandps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vandpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x54,0xe2]
+          vandpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vandps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x54,0x54,0xcb,0xfc]
+          vandps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x54,0x54,0xcb,0xfc]
+          vandpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vorps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x56,0xe2]
+          vorps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vorpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x56,0xe2]
+          vorpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vorps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x56,0x54,0xcb,0xfc]
+          vorps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vorpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x56,0x54,0xcb,0xfc]
+          vorpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vxorps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x57,0xe2]
+          vxorps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vxorpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x57,0xe2]
+          vxorpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vxorps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x57,0x54,0xcb,0xfc]
+          vxorps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vxorpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x57,0x54,0xcb,0xfc]
+          vxorpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandnps  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x08,0x55,0xe2]
+          vandnps  %xmm10, %xmm14, %xmm12
+
+// CHECK: vandnpd  %xmm10, %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x09,0x55,0xe2]
+          vandnpd  %xmm10, %xmm14, %xmm12
+
+// CHECK: vandnps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x18,0x55,0x54,0xcb,0xfc]
+          vandnps  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vandnpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0x55,0x54,0xcb,0xfc]
+          vandnpd  -4(%rbx,%rcx,8), %xmm12, %xmm10
+
+// CHECK: vmovss  -4(%rbx,%rcx,8), %xmm10
+// CHECK: encoding: [0xc5,0x7a,0x10,0x54,0xcb,0xfc]
+          vmovss  -4(%rbx,%rcx,8), %xmm10
+
+// CHECK: vmovss  %xmm14, %xmm10, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x2a,0x10,0xfe]
+          vmovss  %xmm14, %xmm10, %xmm15
+
+// CHECK: vmovsd  -4(%rbx,%rcx,8), %xmm10
+// CHECK: encoding: [0xc5,0x7b,0x10,0x54,0xcb,0xfc]
+          vmovsd  -4(%rbx,%rcx,8), %xmm10
+
+// CHECK: vmovsd  %xmm14, %xmm10, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x2b,0x10,0xfe]
+          vmovsd  %xmm14, %xmm10, %xmm15
+
+// CHECK: vunpckhps  %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x15,0xef]
+          vunpckhps  %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpckhpd  %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x15,0xef]
+          vunpckhpd  %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpcklps  %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x14,0xef]
+          vunpcklps  %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpcklpd  %xmm15, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x14,0xef]
+          vunpcklpd  %xmm15, %xmm12, %xmm13
+
+// CHECK: vunpckhps  -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x18,0x15,0x7c,0xcb,0xfc]
+          vunpckhps  -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vunpckhpd  -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x19,0x15,0x7c,0xcb,0xfc]
+          vunpckhpd  -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vunpcklps  -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x18,0x14,0x7c,0xcb,0xfc]
+          vunpcklps  -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vunpcklpd  -4(%rbx,%rcx,8), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x19,0x14,0x7c,0xcb,0xfc]
+          vunpcklpd  -4(%rbx,%rcx,8), %xmm12, %xmm15
+
+// CHECK: vcmpps  $0, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x00]
+          vcmpps  $0, %xmm10, %xmm12, %xmm15
+
+// CHECK: vcmpps  $0, (%rax), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x18,0xc2,0x38,0x00]
+          vcmpps  $0, (%rax), %xmm12, %xmm15
+
+// CHECK: vcmpps  $7, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xfa,0x07]
+          vcmpps  $7, %xmm10, %xmm12, %xmm15
+
+// CHECK: vcmppd  $0, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x00]
+          vcmppd  $0, %xmm10, %xmm12, %xmm15
+
+// CHECK: vcmppd  $0, (%rax), %xmm12, %xmm15
+// CHECK: encoding: [0xc5,0x19,0xc2,0x38,0x00]
+          vcmppd  $0, (%rax), %xmm12, %xmm15
+
+// CHECK: vcmppd  $7, %xmm10, %xmm12, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xfa,0x07]
+          vcmppd  $7, %xmm10, %xmm12, %xmm15
+
+// CHECK: vshufps  $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc6,0xeb,0x08]
+          vshufps  $8, %xmm11, %xmm12, %xmm13
+
+// CHECK: vshufps  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc6,0x6c,0xcb,0xfc,0x08]
+          vshufps  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vshufpd  $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc6,0xeb,0x08]
+          vshufpd  $8, %xmm11, %xmm12, %xmm13
+
+// CHECK: vshufpd  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc6,0x6c,0xcb,0xfc,0x08]
+          vshufpd  $8, -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x00]
+          vcmpeqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x02]
+          vcmpleps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x01]
+          vcmpltps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x04]
+          vcmpneqps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x06]
+          vcmpnleps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x05]
+          vcmpnltps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x07]
+          vcmpordps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x03]
+          vcmpunordps   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmpleps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnleps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpps  $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc8,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordps   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpps  $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordps   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x00]
+          vcmpeqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x02]
+          vcmplepd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x01]
+          vcmpltpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x04]
+          vcmpneqpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x06]
+          vcmpnlepd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x05]
+          vcmpnltpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x07]
+          vcmpordpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xc2,0xeb,0x03]
+          vcmpunordpd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmppd  $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmplepd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnlepd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmppd  $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xc9,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordpd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmppd  $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordpd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x00]
+          vcmpeqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x02]
+          vcmpless   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x01]
+          vcmpltss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x04]
+          vcmpneqss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x06]
+          vcmpnless   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x05]
+          vcmpnltss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x07]
+          vcmpordss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1a,0xc2,0xeb,0x03]
+          vcmpunordss   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpss  $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmpless   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnless   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpss  $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xca,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordss   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpss  $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1a,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordss   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $0, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x00]
+          vcmpeqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $2, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x02]
+          vcmplesd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $1, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x01]
+          vcmpltsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $4, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x04]
+          vcmpneqsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $6, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x06]
+          vcmpnlesd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $5, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x05]
+          vcmpnltsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x07]
+          vcmpordsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $3, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xc2,0xeb,0x03]
+          vcmpunordsd   %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpsd  $0, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $2, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmplesd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $1, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $4, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $6, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnlesd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $5, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vcmpsd  $7, -4(%rbx,%rcx,8), %xmm6, %xmm2
+// CHECK: encoding: [0xc5,0xcb,0xc2,0x54,0xcb,0xfc,0x07]
+          vcmpordsd   -4(%rbx,%rcx,8), %xmm6, %xmm2
+
+// CHECK: vcmpsd  $3, -4(%rbx,%rcx,8), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordsd   -4(%rbx,%rcx,8), %xmm12, %xmm13
+
+// CHECK: vucomiss  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x2e,0xe3]
+          vucomiss  %xmm11, %xmm12
+
+// CHECK: vucomiss  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x2e,0x20]
+          vucomiss  (%rax), %xmm12
+
+// CHECK: vcomiss  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x2f,0xe3]
+          vcomiss  %xmm11, %xmm12
+
+// CHECK: vcomiss  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x2f,0x20]
+          vcomiss  (%rax), %xmm12
+
+// CHECK: vucomisd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x2e,0xe3]
+          vucomisd  %xmm11, %xmm12
+
+// CHECK: vucomisd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x2e,0x20]
+          vucomisd  (%rax), %xmm12
+
+// CHECK: vcomisd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x2f,0xe3]
+          vcomisd  %xmm11, %xmm12
+
+// CHECK: vcomisd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x2f,0x20]
+          vcomisd  (%rax), %xmm12
+
+// CHECK: vcvttss2si  (%rcx), %eax
+// CHECK: encoding: [0xc5,0xfa,0x2c,0x01]
+          vcvttss2si  (%rcx), %eax
+
+// CHECK: vcvtsi2ss  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x22,0x2a,0x20]
+          vcvtsi2ss  (%rax), %xmm11, %xmm12
+
+// CHECK: vcvtsi2ss  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x22,0x2a,0x20]
+          vcvtsi2ss  (%rax), %xmm11, %xmm12
+
+// CHECK: vcvttsd2si  (%rcx), %eax
+// CHECK: encoding: [0xc5,0xfb,0x2c,0x01]
+          vcvttsd2si  (%rcx), %eax
+
+// CHECK: vcvtsi2sd  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x23,0x2a,0x20]
+          vcvtsi2sd  (%rax), %xmm11, %xmm12
+
+// CHECK: vcvtsi2sd  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x23,0x2a,0x20]
+          vcvtsi2sd  (%rax), %xmm11, %xmm12
+
+// CHECK: vmovaps  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x28,0x20]
+          vmovaps  (%rax), %xmm12
+
+// CHECK: vmovaps  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x28,0xe3]
+          vmovaps  %xmm11, %xmm12
+
+// CHECK: vmovaps  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x29,0x18]
+          vmovaps  %xmm11, (%rax)
+
+// CHECK: vmovapd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x28,0x20]
+          vmovapd  (%rax), %xmm12
+
+// CHECK: vmovapd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x28,0xe3]
+          vmovapd  %xmm11, %xmm12
+
+// CHECK: vmovapd  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x29,0x18]
+          vmovapd  %xmm11, (%rax)
+
+// CHECK: vmovups  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x10,0x20]
+          vmovups  (%rax), %xmm12
+
+// CHECK: vmovups  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x10,0xe3]
+          vmovups  %xmm11, %xmm12
+
+// CHECK: vmovups  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x11,0x18]
+          vmovups  %xmm11, (%rax)
+
+// CHECK: vmovupd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x10,0x20]
+          vmovupd  (%rax), %xmm12
+
+// CHECK: vmovupd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x10,0xe3]
+          vmovupd  %xmm11, %xmm12
+
+// CHECK: vmovupd  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x11,0x18]
+          vmovupd  %xmm11, (%rax)
+
+// CHECK: vmovlps  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x13,0x18]
+          vmovlps  %xmm11, (%rax)
+
+// CHECK: vmovlps  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0x12,0x28]
+          vmovlps  (%rax), %xmm12, %xmm13
+
+// CHECK: vmovlpd  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x13,0x18]
+          vmovlpd  %xmm11, (%rax)
+
+// CHECK: vmovlpd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x12,0x28]
+          vmovlpd  (%rax), %xmm12, %xmm13
+
+// CHECK: vmovhps  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x17,0x18]
+          vmovhps  %xmm11, (%rax)
+
+// CHECK: vmovhps  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x18,0x16,0x28]
+          vmovhps  (%rax), %xmm12, %xmm13
+
+// CHECK: vmovhpd  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x17,0x18]
+          vmovhpd  %xmm11, (%rax)
+
+// CHECK: vmovhpd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x16,0x28]
+          vmovhpd  (%rax), %xmm12, %xmm13
+
+// CHECK: vmovlhps  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x16,0xeb]
+          vmovlhps  %xmm11, %xmm12, %xmm13
+
+// CHECK: vmovhlps  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0x12,0xeb]
+          vmovhlps  %xmm11, %xmm12, %xmm13
+
+// CHECK: vcvtss2sil  %xmm11, %eax
+// CHECK: encoding: [0xc4,0xc1,0x7a,0x2d,0xc3]
+          vcvtss2si  %xmm11, %eax
+
+// CHECK: vcvtss2sil  (%rax), %ebx
+// CHECK: encoding: [0xc5,0xfa,0x2d,0x18]
+          vcvtss2si  (%rax), %ebx
+
+// CHECK: vcvtdq2ps  %xmm10, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xe2]
+          vcvtdq2ps  %xmm10, %xmm12
+
+// CHECK: vcvtdq2ps  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x5b,0x20]
+          vcvtdq2ps  (%rax), %xmm12
+
+// CHECK: vcvtsd2ss  %xmm12, %xmm13, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x13,0x5a,0xd4]
+          vcvtsd2ss  %xmm12, %xmm13, %xmm10
+
+// CHECK: vcvtsd2ss  (%rax), %xmm13, %xmm10
+// CHECK: encoding: [0xc5,0x13,0x5a,0x10]
+          vcvtsd2ss  (%rax), %xmm13, %xmm10
+
+// CHECK: vcvtps2dq  %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x79,0x5b,0xdc]
+          vcvtps2dq  %xmm12, %xmm11
+
+// CHECK: vcvtps2dq  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x79,0x5b,0x18]
+          vcvtps2dq  (%rax), %xmm11
+
+// CHECK: vcvtss2sd  %xmm12, %xmm13, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x12,0x5a,0xd4]
+          vcvtss2sd  %xmm12, %xmm13, %xmm10
+
+// CHECK: vcvtss2sd  (%rax), %xmm13, %xmm10
+// CHECK: encoding: [0xc5,0x12,0x5a,0x10]
+          vcvtss2sd  (%rax), %xmm13, %xmm10
+
+// CHECK: vcvtdq2ps  %xmm13, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x78,0x5b,0xd5]
+          vcvtdq2ps  %xmm13, %xmm10
+
+// CHECK: vcvtdq2ps  (%ecx), %xmm13
+// CHECK: encoding: [0xc5,0x78,0x5b,0x29]
+          vcvtdq2ps  (%ecx), %xmm13
+
+// CHECK: vcvttps2dq  %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7a,0x5b,0xdc]
+          vcvttps2dq  %xmm12, %xmm11
+
+// CHECK: vcvttps2dq  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7a,0x5b,0x18]
+          vcvttps2dq  (%rax), %xmm11
+
+// CHECK: vcvtps2pd  %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x78,0x5a,0xdc]
+          vcvtps2pd  %xmm12, %xmm11
+
+// CHECK: vcvtps2pd  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x78,0x5a,0x18]
+          vcvtps2pd  (%rax), %xmm11
+
+// CHECK: vcvtpd2ps  %xmm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xdc]
+          vcvtpd2ps  %xmm12, %xmm11
+
+// CHECK: vsqrtpd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x79,0x51,0xe3]
+          vsqrtpd  %xmm11, %xmm12
+
+// CHECK: vsqrtpd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x79,0x51,0x20]
+          vsqrtpd  (%rax), %xmm12
+
+// CHECK: vsqrtps  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x51,0xe3]
+          vsqrtps  %xmm11, %xmm12
+
+// CHECK: vsqrtps  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x51,0x20]
+          vsqrtps  (%rax), %xmm12
+
+// CHECK: vsqrtsd  %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1b,0x51,0xd3]
+          vsqrtsd  %xmm11, %xmm12, %xmm10
+
+// CHECK: vsqrtsd  (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1b,0x51,0x10]
+          vsqrtsd  (%rax), %xmm12, %xmm10
+
+// CHECK: vsqrtss  %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1a,0x51,0xd3]
+          vsqrtss  %xmm11, %xmm12, %xmm10
+
+// CHECK: vsqrtss  (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x51,0x10]
+          vsqrtss  (%rax), %xmm12, %xmm10
+
+// CHECK: vrsqrtps  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x52,0xe3]
+          vrsqrtps  %xmm11, %xmm12
+
+// CHECK: vrsqrtps  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x52,0x20]
+          vrsqrtps  (%rax), %xmm12
+
+// CHECK: vrsqrtss  %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1a,0x52,0xd3]
+          vrsqrtss  %xmm11, %xmm12, %xmm10
+
+// CHECK: vrsqrtss  (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x52,0x10]
+          vrsqrtss  (%rax), %xmm12, %xmm10
+
+// CHECK: vrcpps  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x78,0x53,0xe3]
+          vrcpps  %xmm11, %xmm12
+
+// CHECK: vrcpps  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x78,0x53,0x20]
+          vrcpps  (%rax), %xmm12
+
+// CHECK: vrcpss  %xmm11, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x1a,0x53,0xd3]
+          vrcpss  %xmm11, %xmm12, %xmm10
+
+// CHECK: vrcpss  (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x1a,0x53,0x10]
+          vrcpss  (%rax), %xmm12, %xmm10
+
+// CHECK: vmovntdq  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0xe7,0x18]
+          vmovntdq  %xmm11, (%rax)
+
+// CHECK: vmovntpd  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x2b,0x18]
+          vmovntpd  %xmm11, (%rax)
+
+// CHECK: vmovntps  %xmm11, (%rax)
+// CHECK: encoding: [0xc5,0x78,0x2b,0x18]
+          vmovntps  %xmm11, (%rax)
+
+// CHECK: vldmxcsr  -4(%rip)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x15,0xfc,0xff,0xff,0xff]
+          vldmxcsr  -4(%rip)
+
+// CHECK: vstmxcsr  -4(%rsp)
+// CHECK: encoding: [0xc5,0xf8,0xae,0x5c,0x24,0xfc]
+          vstmxcsr  -4(%rsp)
+
+// CHECK: vpsubb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf8,0xeb]
+          vpsubb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf8,0x28]
+          vpsubb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf9,0xeb]
+          vpsubw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf9,0x28]
+          vpsubw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfa,0xeb]
+          vpsubd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfa,0x28]
+          vpsubd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfb,0xeb]
+          vpsubq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfb,0x28]
+          vpsubq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubsb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe8,0xeb]
+          vpsubsb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubsb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe8,0x28]
+          vpsubsb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe9,0xeb]
+          vpsubsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe9,0x28]
+          vpsubsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubusb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd8,0xeb]
+          vpsubusb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubusb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd8,0x28]
+          vpsubusb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsubusw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd9,0xeb]
+          vpsubusw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsubusw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd9,0x28]
+          vpsubusw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfc,0xeb]
+          vpaddb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfc,0x28]
+          vpaddb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfd,0xeb]
+          vpaddw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfd,0x28]
+          vpaddw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xfe,0xeb]
+          vpaddd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xfe,0x28]
+          vpaddd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd4,0xeb]
+          vpaddq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd4,0x28]
+          vpaddq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddsb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xec,0xeb]
+          vpaddsb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddsb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xec,0x28]
+          vpaddsb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xed,0xeb]
+          vpaddsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xed,0x28]
+          vpaddsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddusb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdc,0xeb]
+          vpaddusb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddusb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdc,0x28]
+          vpaddusb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpaddusw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdd,0xeb]
+          vpaddusw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpaddusw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdd,0x28]
+          vpaddusw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmulhuw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe4,0xeb]
+          vpmulhuw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmulhuw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe4,0x28]
+          vpmulhuw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmulhw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe5,0xeb]
+          vpmulhw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmulhw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe5,0x28]
+          vpmulhw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmullw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd5,0xeb]
+          vpmullw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmullw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd5,0x28]
+          vpmullw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmuludq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf4,0xeb]
+          vpmuludq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmuludq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf4,0x28]
+          vpmuludq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpavgb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe0,0xeb]
+          vpavgb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpavgb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe0,0x28]
+          vpavgb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpavgw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe3,0xeb]
+          vpavgw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpavgw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe3,0x28]
+          vpavgw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpminsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xea,0xeb]
+          vpminsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpminsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xea,0x28]
+          vpminsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpminub  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xda,0xeb]
+          vpminub  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpminub  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xda,0x28]
+          vpminub  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xee,0xeb]
+          vpmaxsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmaxsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xee,0x28]
+          vpmaxsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxub  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xde,0xeb]
+          vpmaxub  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmaxub  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xde,0x28]
+          vpmaxub  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsadbw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf6,0xeb]
+          vpsadbw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsadbw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf6,0x28]
+          vpsadbw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsllw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf1,0xeb]
+          vpsllw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsllw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf1,0x28]
+          vpsllw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpslld  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf2,0xeb]
+          vpslld  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpslld  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf2,0x28]
+          vpslld  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsllq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xf3,0xeb]
+          vpsllq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsllq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xf3,0x28]
+          vpsllq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsraw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe1,0xeb]
+          vpsraw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsraw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe1,0x28]
+          vpsraw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrad  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xe2,0xeb]
+          vpsrad  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrad  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xe2,0x28]
+          vpsrad  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrlw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd1,0xeb]
+          vpsrlw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrlw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd1,0x28]
+          vpsrlw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrld  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd2,0xeb]
+          vpsrld  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrld  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd2,0x28]
+          vpsrld  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsrlq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd3,0xeb]
+          vpsrlq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsrlq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xd3,0x28]
+          vpsrlq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpslld  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a]
+          vpslld  $10, %xmm12, %xmm13
+
+// CHECK: vpslldq  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xfc,0x0a]
+          vpslldq  $10, %xmm12, %xmm13
+
+// CHECK: vpsllq  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xf4,0x0a]
+          vpsllq  $10, %xmm12, %xmm13
+
+// CHECK: vpsllw  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xf4,0x0a]
+          vpsllw  $10, %xmm12, %xmm13
+
+// CHECK: vpsrad  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xe4,0x0a]
+          vpsrad  $10, %xmm12, %xmm13
+
+// CHECK: vpsraw  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xe4,0x0a]
+          vpsraw  $10, %xmm12, %xmm13
+
+// CHECK: vpsrld  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xd4,0x0a]
+          vpsrld  $10, %xmm12, %xmm13
+
+// CHECK: vpsrldq  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xdc,0x0a]
+          vpsrldq  $10, %xmm12, %xmm13
+
+// CHECK: vpsrlq  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x73,0xd4,0x0a]
+          vpsrlq  $10, %xmm12, %xmm13
+
+// CHECK: vpsrlw  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x71,0xd4,0x0a]
+          vpsrlw  $10, %xmm12, %xmm13
+
+// CHECK: vpslld  $10, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0xc1,0x11,0x72,0xf4,0x0a]
+          vpslld  $10, %xmm12, %xmm13
+
+// CHECK: vpand  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdb,0xeb]
+          vpand  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpand  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdb,0x28]
+          vpand  (%rax), %xmm12, %xmm13
+
+// CHECK: vpor  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xeb,0xeb]
+          vpor  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpor  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xeb,0x28]
+          vpor  (%rax), %xmm12, %xmm13
+
+// CHECK: vpxor  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xef,0xeb]
+          vpxor  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpxor  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xef,0x28]
+          vpxor  (%rax), %xmm12, %xmm13
+
+// CHECK: vpandn  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xdf,0xeb]
+          vpandn  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpandn  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xdf,0x28]
+          vpandn  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x74,0xeb]
+          vpcmpeqb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpeqb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x74,0x28]
+          vpcmpeqb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x75,0xeb]
+          vpcmpeqw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpeqw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x75,0x28]
+          vpcmpeqw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x76,0xeb]
+          vpcmpeqd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpeqd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x76,0x28]
+          vpcmpeqd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpgtb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x64,0xeb]
+          vpcmpgtb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpgtb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x64,0x28]
+          vpcmpgtb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpgtw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x65,0xeb]
+          vpcmpgtw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpgtw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x65,0x28]
+          vpcmpgtw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpgtd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x66,0xeb]
+          vpcmpgtd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpcmpgtd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x66,0x28]
+          vpcmpgtd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpacksswb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x63,0xeb]
+          vpacksswb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpacksswb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x63,0x28]
+          vpacksswb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpackssdw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6b,0xeb]
+          vpackssdw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpackssdw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6b,0x28]
+          vpackssdw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpackuswb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x67,0xeb]
+          vpackuswb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpackuswb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x67,0x28]
+          vpackuswb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpshufd  $4, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x79,0x70,0xec,0x04]
+          vpshufd  $4, %xmm12, %xmm13
+
+// CHECK: vpshufd  $4, (%rax), %xmm13
+// CHECK: encoding: [0xc5,0x79,0x70,0x28,0x04]
+          vpshufd  $4, (%rax), %xmm13
+
+// CHECK: vpshufhw  $4, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x7a,0x70,0xec,0x04]
+          vpshufhw  $4, %xmm12, %xmm13
+
+// CHECK: vpshufhw  $4, (%rax), %xmm13
+// CHECK: encoding: [0xc5,0x7a,0x70,0x28,0x04]
+          vpshufhw  $4, (%rax), %xmm13
+
+// CHECK: vpshuflw  $4, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x7b,0x70,0xec,0x04]
+          vpshuflw  $4, %xmm12, %xmm13
+
+// CHECK: vpshuflw  $4, (%rax), %xmm13
+// CHECK: encoding: [0xc5,0x7b,0x70,0x28,0x04]
+          vpshuflw  $4, (%rax), %xmm13
+
+// CHECK: vpunpcklbw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x60,0xeb]
+          vpunpcklbw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpcklbw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x60,0x28]
+          vpunpcklbw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpcklwd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x61,0xeb]
+          vpunpcklwd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpcklwd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x61,0x28]
+          vpunpcklwd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckldq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x62,0xeb]
+          vpunpckldq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckldq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x62,0x28]
+          vpunpckldq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpcklqdq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6c,0xeb]
+          vpunpcklqdq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpcklqdq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6c,0x28]
+          vpunpcklqdq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhbw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x68,0xeb]
+          vpunpckhbw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhbw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x68,0x28]
+          vpunpckhbw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhwd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x69,0xeb]
+          vpunpckhwd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhwd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x69,0x28]
+          vpunpckhwd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhdq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6a,0xeb]
+          vpunpckhdq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhdq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6a,0x28]
+          vpunpckhdq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpunpckhqdq  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x6d,0xeb]
+          vpunpckhqdq  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpunpckhqdq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x6d,0x28]
+          vpunpckhqdq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpinsrw  $7, %eax, %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc4,0xe8,0x07]
+          vpinsrw  $7, %eax, %xmm12, %xmm13
+
+// CHECK: vpinsrw  $7, (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0xc4,0x28,0x07]
+          vpinsrw  $7, (%rax), %xmm12, %xmm13
+
+// CHECK: vpextrw  $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07]
+          vpextrw  $7, %xmm12, %eax
+
+// CHECK: vpmovmskb  %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x79,0xd7,0xc4]
+          vpmovmskb  %xmm12, %eax
+
+// CHECK: vmaskmovdqu  %xmm14, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x79,0xf7,0xfe]
+          vmaskmovdqu  %xmm14, %xmm15
+
+// CHECK: vmovd  %eax, %xmm14
+// CHECK: encoding: [0xc5,0x79,0x6e,0xf0]
+          vmovd  %eax, %xmm14
+
+// CHECK: vmovd  (%rax), %xmm14
+// CHECK: encoding: [0xc5,0x79,0x6e,0x30]
+          vmovd  (%rax), %xmm14
+
+// CHECK: vmovd  %xmm14, (%rax)
+// CHECK: encoding: [0xc5,0x79,0x7e,0x30]
+          vmovd  %xmm14, (%rax)
+
+// CHECK: vmovd  %rax, %xmm14
+// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
+          vmovd  %rax, %xmm14
+
+// CHECK: vmovq  %xmm14, (%rax)
+// CHECK: encoding: [0xc5,0x79,0xd6,0x30]
+          vmovq  %xmm14, (%rax)
+
+// CHECK: vmovq  %xmm14, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0x7e,0xe6]
+          vmovq  %xmm14, %xmm12
+
+// CHECK: vmovq  (%rax), %xmm14
+// CHECK: encoding: [0xc5,0x7a,0x7e,0x30]
+          vmovq  (%rax), %xmm14
+
+// CHECK: vmovq  %rax, %xmm14
+// CHECK: encoding: [0xc4,0x61,0xf9,0x6e,0xf0]
+          vmovq  %rax, %xmm14
+
+// CHECK: vmovq  %xmm14, %rax
+// CHECK: encoding: [0xc4,0x61,0xf9,0x7e,0xf0]
+          vmovq  %xmm14, %rax
+
+// CHECK: vcvtpd2dq  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xe3]
+          vcvtpd2dq  %xmm11, %xmm12
+
+// CHECK: vcvtdq2pd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0xe6,0xe3]
+          vcvtdq2pd  %xmm11, %xmm12
+
+// CHECK: vcvtdq2pd  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7a,0xe6,0x20]
+          vcvtdq2pd  (%rax), %xmm12
+
+// CHECK: vmovshdup  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0x16,0xe3]
+          vmovshdup  %xmm11, %xmm12
+
+// CHECK: vmovshdup  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7a,0x16,0x20]
+          vmovshdup  (%rax), %xmm12
+
+// CHECK: vmovsldup  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7a,0x12,0xe3]
+          vmovsldup  %xmm11, %xmm12
+
+// CHECK: vmovsldup  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7a,0x12,0x20]
+          vmovsldup  (%rax), %xmm12
+
+// CHECK: vmovddup  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x41,0x7b,0x12,0xe3]
+          vmovddup  %xmm11, %xmm12
+
+// CHECK: vmovddup  (%rax), %xmm12
+// CHECK: encoding: [0xc5,0x7b,0x12,0x20]
+          vmovddup  (%rax), %xmm12
+
+// CHECK: vaddsubps  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0xd0,0xeb]
+          vaddsubps  %xmm11, %xmm12, %xmm13
+
+// CHECK: vaddsubps  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x23,0xd0,0x20]
+          vaddsubps  (%rax), %xmm11, %xmm12
+
+// CHECK: vaddsubpd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0xd0,0xeb]
+          vaddsubpd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vaddsubpd  (%rax), %xmm11, %xmm12
+// CHECK: encoding: [0xc5,0x21,0xd0,0x20]
+          vaddsubpd  (%rax), %xmm11, %xmm12
+
+// CHECK: vhaddps  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0x7c,0xeb]
+          vhaddps  %xmm11, %xmm12, %xmm13
+
+// CHECK: vhaddps  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0x7c,0x28]
+          vhaddps  (%rax), %xmm12, %xmm13
+
+// CHECK: vhaddpd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x7c,0xeb]
+          vhaddpd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vhaddpd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x7c,0x28]
+          vhaddpd  (%rax), %xmm12, %xmm13
+
+// CHECK: vhsubps  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x1b,0x7d,0xeb]
+          vhsubps  %xmm11, %xmm12, %xmm13
+
+// CHECK: vhsubps  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x1b,0x7d,0x28]
+          vhsubps  (%rax), %xmm12, %xmm13
+
+// CHECK: vhsubpd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x19,0x7d,0xeb]
+          vhsubpd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vhsubpd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc5,0x19,0x7d,0x28]
+          vhsubpd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpabsb  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x42,0x79,0x1c,0xe3]
+          vpabsb  %xmm11, %xmm12
+
+// CHECK: vpabsb  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x1c,0x20]
+          vpabsb  (%rax), %xmm12
+
+// CHECK: vpabsw  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x42,0x79,0x1d,0xe3]
+          vpabsw  %xmm11, %xmm12
+
+// CHECK: vpabsw  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x1d,0x20]
+          vpabsw  (%rax), %xmm12
+
+// CHECK: vpabsd  %xmm11, %xmm12
+// CHECK: encoding: [0xc4,0x42,0x79,0x1e,0xe3]
+          vpabsd  %xmm11, %xmm12
+
+// CHECK: vpabsd  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x1e,0x20]
+          vpabsd  (%rax), %xmm12
+
+// CHECK: vphaddw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x01,0xeb]
+          vphaddw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vphaddw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x01,0x28]
+          vphaddw  (%rax), %xmm12, %xmm13
+
+// CHECK: vphaddd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x02,0xeb]
+          vphaddd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vphaddd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x02,0x28]
+          vphaddd  (%rax), %xmm12, %xmm13
+
+// CHECK: vphaddsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x03,0xeb]
+          vphaddsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vphaddsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x03,0x28]
+          vphaddsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vphsubw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x05,0xeb]
+          vphsubw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vphsubw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x05,0x28]
+          vphsubw  (%rax), %xmm12, %xmm13
+
+// CHECK: vphsubd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x06,0xeb]
+          vphsubd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vphsubd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x06,0x28]
+          vphsubd  (%rax), %xmm12, %xmm13
+
+// CHECK: vphsubsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x07,0xeb]
+          vphsubsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vphsubsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x07,0x28]
+          vphsubsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaddubsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x04,0xeb]
+          vpmaddubsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmaddubsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x04,0x28]
+          vpmaddubsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpshufb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x00,0xeb]
+          vpshufb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpshufb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x00,0x28]
+          vpshufb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsignb  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x08,0xeb]
+          vpsignb  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsignb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x08,0x28]
+          vpsignb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsignw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x09,0xeb]
+          vpsignw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsignw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x09,0x28]
+          vpsignw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpsignd  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x0a,0xeb]
+          vpsignd  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpsignd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x0a,0x28]
+          vpsignd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmulhrsw  %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x19,0x0b,0xeb]
+          vpmulhrsw  %xmm11, %xmm12, %xmm13
+
+// CHECK: vpmulhrsw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x0b,0x28]
+          vpmulhrsw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpalignr  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x43,0x19,0x0f,0xeb,0x07]
+          vpalignr  $7, %xmm11, %xmm12, %xmm13
+
+// CHECK: vpalignr  $7, (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x19,0x0f,0x28,0x07]
+          vpalignr  $7, (%rax), %xmm12, %xmm13
+
+// CHECK: vroundsd  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x43,0x19,0x0b,0xeb,0x07]
+          vroundsd  $7, %xmm11, %xmm12, %xmm13
+
+// CHECK: vroundsd  $7, (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x19,0x0b,0x28,0x07]
+          vroundsd  $7, (%rax), %xmm12, %xmm13
+
+// CHECK: vroundss  $7, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x43,0x19,0x0a,0xeb,0x07]
+          vroundss  $7, %xmm11, %xmm12, %xmm13
+
+// CHECK: vroundss  $7, (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x19,0x0a,0x28,0x07]
+          vroundss  $7, (%rax), %xmm12, %xmm13
+
+// CHECK: vroundpd  $7, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x43,0x79,0x09,0xec,0x07]
+          vroundpd  $7, %xmm12, %xmm13
+
+// CHECK: vroundpd  $7, (%rax), %xmm13
+// CHECK: encoding: [0xc4,0x63,0x79,0x09,0x28,0x07]
+          vroundpd  $7, (%rax), %xmm13
+
+// CHECK: vroundps  $7, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x43,0x79,0x08,0xec,0x07]
+          vroundps  $7, %xmm12, %xmm13
+
+// CHECK: vroundps  $7, (%rax), %xmm13
+// CHECK: encoding: [0xc4,0x63,0x79,0x08,0x28,0x07]
+          vroundps  $7, (%rax), %xmm13
+
+// CHECK: vphminposuw  %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x42,0x79,0x41,0xec]
+          vphminposuw  %xmm12, %xmm13
+
+// CHECK: vphminposuw  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x41,0x20]
+          vphminposuw  (%rax), %xmm12
+
+// CHECK: vpackusdw  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x2b,0xdc]
+          vpackusdw  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpackusdw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x2b,0x28]
+          vpackusdw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpcmpeqq  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x29,0xdc]
+          vpcmpeqq  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpcmpeqq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x29,0x28]
+          vpcmpeqq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpminsb  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x38,0xdc]
+          vpminsb  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpminsb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x38,0x28]
+          vpminsb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpminsd  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x39,0xdc]
+          vpminsd  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpminsd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x39,0x28]
+          vpminsd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpminud  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3b,0xdc]
+          vpminud  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpminud  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3b,0x28]
+          vpminud  (%rax), %xmm12, %xmm13
+
+// CHECK: vpminuw  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3a,0xdc]
+          vpminuw  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpminuw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3a,0x28]
+          vpminuw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxsb  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3c,0xdc]
+          vpmaxsb  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpmaxsb  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3c,0x28]
+          vpmaxsb  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxsd  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3d,0xdc]
+          vpmaxsd  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpmaxsd  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3d,0x28]
+          vpmaxsd  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxud  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3f,0xdc]
+          vpmaxud  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpmaxud  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3f,0x28]
+          vpmaxud  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmaxuw  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x3e,0xdc]
+          vpmaxuw  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpmaxuw  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x3e,0x28]
+          vpmaxuw  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmuldq  %xmm12, %xmm13, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x11,0x28,0xdc]
+          vpmuldq  %xmm12, %xmm13, %xmm11
+
+// CHECK: vpmuldq  (%rax), %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x19,0x28,0x28]
+          vpmuldq  (%rax), %xmm12, %xmm13
+
+// CHECK: vpmulld  %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x51,0x40,0xdc]
+          vpmulld  %xmm12, %xmm5, %xmm11
+
+// CHECK: vpmulld  (%rax), %xmm5, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x51,0x40,0x28]
+          vpmulld  (%rax), %xmm5, %xmm13
+
+// CHECK: vblendps  $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x0c,0xdc,0x03]
+          vblendps  $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vblendps  $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x0c,0x18,0x03]
+          vblendps  $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vblendpd  $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x0d,0xdc,0x03]
+          vblendpd  $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vblendpd  $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x0d,0x18,0x03]
+          vblendpd  $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vpblendw  $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x0e,0xdc,0x03]
+          vpblendw  $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vpblendw  $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x0e,0x18,0x03]
+          vpblendw  $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vmpsadbw  $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x42,0xdc,0x03]
+          vmpsadbw  $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vmpsadbw  $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x42,0x18,0x03]
+          vmpsadbw  $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vdpps  $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x40,0xdc,0x03]
+          vdpps  $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vdpps  $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x40,0x18,0x03]
+          vdpps  $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vdppd  $3, %xmm12, %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x51,0x41,0xdc,0x03]
+          vdppd  $3, %xmm12, %xmm5, %xmm11
+
+// CHECK: vdppd  $3, (%rax), %xmm5, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x51,0x41,0x18,0x03]
+          vdppd  $3, (%rax), %xmm5, %xmm11
+
+// CHECK: vblendvpd  %xmm12, %xmm5, %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0xed,0xc0]
+          vblendvpd  %xmm12, %xmm5, %xmm11, %xmm13
+
+// CHECK: vblendvpd  %xmm12, (%rax), %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4b,0x28,0xc0]
+          vblendvpd  %xmm12, (%rax), %xmm11, %xmm13
+
+// CHECK: vblendvps  %xmm12, %xmm5, %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0xed,0xc0]
+          vblendvps  %xmm12, %xmm5, %xmm11, %xmm13
+
+// CHECK: vblendvps  %xmm12, (%rax), %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4a,0x28,0xc0]
+          vblendvps  %xmm12, (%rax), %xmm11, %xmm13
+
+// CHECK: vpblendvb  %xmm12, %xmm5, %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0xed,0xc0]
+          vpblendvb  %xmm12, %xmm5, %xmm11, %xmm13
+
+// CHECK: vpblendvb  %xmm12, (%rax), %xmm11, %xmm13
+// CHECK: encoding: [0xc4,0x63,0x21,0x4c,0x28,0xc0]
+          vpblendvb  %xmm12, (%rax), %xmm11, %xmm13
+
+// CHECK: vpmovsxbw  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x20,0xd4]
+          vpmovsxbw  %xmm12, %xmm10
+
+// CHECK: vpmovsxbw  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x20,0x20]
+          vpmovsxbw  (%rax), %xmm12
+
+// CHECK: vpmovsxwd  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x23,0xd4]
+          vpmovsxwd  %xmm12, %xmm10
+
+// CHECK: vpmovsxwd  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x23,0x20]
+          vpmovsxwd  (%rax), %xmm12
+
+// CHECK: vpmovsxdq  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x25,0xd4]
+          vpmovsxdq  %xmm12, %xmm10
+
+// CHECK: vpmovsxdq  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x25,0x20]
+          vpmovsxdq  (%rax), %xmm12
+
+// CHECK: vpmovzxbw  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x30,0xd4]
+          vpmovzxbw  %xmm12, %xmm10
+
+// CHECK: vpmovzxbw  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x30,0x20]
+          vpmovzxbw  (%rax), %xmm12
+
+// CHECK: vpmovzxwd  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x33,0xd4]
+          vpmovzxwd  %xmm12, %xmm10
+
+// CHECK: vpmovzxwd  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x33,0x20]
+          vpmovzxwd  (%rax), %xmm12
+
+// CHECK: vpmovzxdq  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x35,0xd4]
+          vpmovzxdq  %xmm12, %xmm10
+
+// CHECK: vpmovzxdq  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x35,0x20]
+          vpmovzxdq  (%rax), %xmm12
+
+// CHECK: vpmovsxbq  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x22,0xd4]
+          vpmovsxbq  %xmm12, %xmm10
+
+// CHECK: vpmovsxbq  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x22,0x20]
+          vpmovsxbq  (%rax), %xmm12
+
+// CHECK: vpmovzxbq  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x32,0xd4]
+          vpmovzxbq  %xmm12, %xmm10
+
+// CHECK: vpmovzxbq  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x32,0x20]
+          vpmovzxbq  (%rax), %xmm12
+
+// CHECK: vpmovsxbd  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x21,0xd4]
+          vpmovsxbd  %xmm12, %xmm10
+
+// CHECK: vpmovsxbd  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x21,0x20]
+          vpmovsxbd  (%rax), %xmm12
+
+// CHECK: vpmovsxwq  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x24,0xd4]
+          vpmovsxwq  %xmm12, %xmm10
+
+// CHECK: vpmovsxwq  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x24,0x20]
+          vpmovsxwq  (%rax), %xmm12
+
+// CHECK: vpmovzxbd  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x31,0xd4]
+          vpmovzxbd  %xmm12, %xmm10
+
+// CHECK: vpmovzxbd  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x31,0x20]
+          vpmovzxbd  (%rax), %xmm12
+
+// CHECK: vpmovzxwq  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x34,0xd4]
+          vpmovzxwq  %xmm12, %xmm10
+
+// CHECK: vpmovzxwq  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x34,0x20]
+          vpmovzxwq  (%rax), %xmm12
+
+// CHECK: vpextrw  $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x79,0xc5,0xc4,0x07]
+          vpextrw  $7, %xmm12, %eax
+
+// CHECK: vpextrw  $7, %xmm12, (%rax)
+// CHECK: encoding: [0xc4,0x63,0x79,0x15,0x20,0x07]
+          vpextrw  $7, %xmm12, (%rax)
+
+// CHECK: vpextrd  $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0x63,0x79,0x16,0xe0,0x07]
+          vpextrd  $7, %xmm12, %eax
+
+// CHECK: vpextrd  $7, %xmm12, (%rax)
+// CHECK: encoding: [0xc4,0x63,0x79,0x16,0x20,0x07]
+          vpextrd  $7, %xmm12, (%rax)
+
+// CHECK: vpextrb  $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0x63,0x79,0x14,0xe0,0x07]
+          vpextrb  $7, %xmm12, %eax
+
+// CHECK: vpextrb  $7, %xmm12, (%rax)
+// CHECK: encoding: [0xc4,0x63,0x79,0x14,0x20,0x07]
+          vpextrb  $7, %xmm12, (%rax)
+
+// CHECK: vpextrq  $7, %xmm12, %rcx
+// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0xe1,0x07]
+          vpextrq  $7, %xmm12, %rcx
+
+// CHECK: vpextrq  $7, %xmm12, (%rcx)
+// CHECK: encoding: [0xc4,0x63,0xf9,0x16,0x21,0x07]
+          vpextrq  $7, %xmm12, (%rcx)
+
+// CHECK: vextractps  $7, %xmm12, (%rax)
+// CHECK: encoding: [0xc4,0x63,0x79,0x17,0x20,0x07]
+          vextractps  $7, %xmm12, (%rax)
+
+// CHECK: vextractps  $7, %xmm12, %eax
+// CHECK: encoding: [0xc4,0x63,0x79,0x17,0xe0,0x07]
+          vextractps  $7, %xmm12, %eax
+
+// CHECK: vpinsrw  $7, %eax, %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0xc4,0xd0,0x07]
+          vpinsrw  $7, %eax, %xmm12, %xmm10
+
+// CHECK: vpinsrw  $7, (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc5,0x19,0xc4,0x10,0x07]
+          vpinsrw  $7, (%rax), %xmm12, %xmm10
+
+// CHECK: vpinsrb  $7, %eax, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x19,0x20,0xd0,0x07]
+          vpinsrb  $7, %eax, %xmm12, %xmm10
+
+// CHECK: vpinsrb  $7, (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x19,0x20,0x10,0x07]
+          vpinsrb  $7, (%rax), %xmm12, %xmm10
+
+// CHECK: vpinsrd  $7, %eax, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x19,0x22,0xd0,0x07]
+          vpinsrd  $7, %eax, %xmm12, %xmm10
+
+// CHECK: vpinsrd  $7, (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x19,0x22,0x10,0x07]
+          vpinsrd  $7, (%rax), %xmm12, %xmm10
+
+// CHECK: vpinsrq  $7, %rax, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x99,0x22,0xd0,0x07]
+          vpinsrq  $7, %rax, %xmm12, %xmm10
+
+// CHECK: vpinsrq  $7, (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x63,0x99,0x22,0x10,0x07]
+          vpinsrq  $7, (%rax), %xmm12, %xmm10
+
+// CHECK: vinsertps  $7, %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x43,0x29,0x21,0xdc,0x07]
+          vinsertps  $7, %xmm12, %xmm10, %xmm11
+
+// CHECK: vinsertps  $7, (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x63,0x29,0x21,0x18,0x07]
+          vinsertps  $7, (%rax), %xmm10, %xmm11
+
+// CHECK: vptest  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x17,0xd4]
+          vptest  %xmm12, %xmm10
+
+// CHECK: vptest  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x17,0x20]
+          vptest  (%rax), %xmm12
+
+// CHECK: vmovntdqa  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x2a,0x20]
+          vmovntdqa  (%rax), %xmm12
+
+// CHECK: vpcmpgtq  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x37,0xdc]
+          vpcmpgtq  %xmm12, %xmm10, %xmm11
+
+// CHECK: vpcmpgtq  (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0x37,0x28]
+          vpcmpgtq  (%rax), %xmm10, %xmm13
+
+// CHECK: vpcmpistrm  $7, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x62,0xd4,0x07]
+          vpcmpistrm  $7, %xmm12, %xmm10
+
+// CHECK: vpcmpistrm  $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x62,0x10,0x07]
+          vpcmpistrm  $7, (%rax), %xmm10
+
+// CHECK: vpcmpestrm  $7, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x60,0xd4,0x07]
+          vpcmpestrm  $7, %xmm12, %xmm10
+
+// CHECK: vpcmpestrm  $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x60,0x10,0x07]
+          vpcmpestrm  $7, (%rax), %xmm10
+
+// CHECK: vpcmpistri  $7, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x63,0xd4,0x07]
+          vpcmpistri  $7, %xmm12, %xmm10
+
+// CHECK: vpcmpistri  $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x63,0x10,0x07]
+          vpcmpistri  $7, (%rax), %xmm10
+
+// CHECK: vpcmpestri  $7, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x61,0xd4,0x07]
+          vpcmpestri  $7, %xmm12, %xmm10
+
+// CHECK: vpcmpestri  $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x61,0x10,0x07]
+          vpcmpestri  $7, (%rax), %xmm10
+
+// CHECK: vaesimc  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0xdb,0xd4]
+          vaesimc  %xmm12, %xmm10
+
+// CHECK: vaesimc  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0xdb,0x20]
+          vaesimc  (%rax), %xmm12
+
+// CHECK: vaesenc  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xdc,0xdc]
+          vaesenc  %xmm12, %xmm10, %xmm11
+
+// CHECK: vaesenc  (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0xdc,0x28]
+          vaesenc  (%rax), %xmm10, %xmm13
+
+// CHECK: vaesenclast  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xdd,0xdc]
+          vaesenclast  %xmm12, %xmm10, %xmm11
+
+// CHECK: vaesenclast  (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0xdd,0x28]
+          vaesenclast  (%rax), %xmm10, %xmm13
+
+// CHECK: vaesdec  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xde,0xdc]
+          vaesdec  %xmm12, %xmm10, %xmm11
+
+// CHECK: vaesdec  (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0xde,0x28]
+          vaesdec  (%rax), %xmm10, %xmm13
+
+// CHECK: vaesdeclast  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xdf,0xdc]
+          vaesdeclast  %xmm12, %xmm10, %xmm11
+
+// CHECK: vaesdeclast  (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0xdf,0x28]
+          vaesdeclast  (%rax), %xmm10, %xmm13
+
+// CHECK: vaeskeygenassist  $7, %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0xdf,0xd4,0x07]
+          vaeskeygenassist  $7, %xmm12, %xmm10
+
+// CHECK: vaeskeygenassist  $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0xdf,0x10,0x07]
+          vaeskeygenassist  $7, (%rax), %xmm10
+
+// CHECK: vcmpps  $8, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x08]
+          vcmpeq_uqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $9, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x09]
+          vcmpngeps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $10, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0a]
+          vcmpngtps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $11, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0b]
+          vcmpfalseps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $12, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0c]
+          vcmpneq_oqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $13, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0d]
+          vcmpgeps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $14, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0e]
+          vcmpgtps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $15, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x0f]
+          vcmptrueps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $16, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x10]
+          vcmpeq_osps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $17, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x11]
+          vcmplt_oqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $18, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x12]
+          vcmple_oqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $19, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x13]
+          vcmpunord_sps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $20, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x14]
+          vcmpneq_usps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $21, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x15]
+          vcmpnlt_uqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $22, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x16]
+          vcmpnle_uqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $23, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x17]
+          vcmpord_sps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $24, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x18]
+          vcmpeq_usps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $25, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x19]
+          vcmpnge_uqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $26, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1a]
+          vcmpngt_uqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $27, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1b]
+          vcmpfalse_osps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $28, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1c]
+          vcmpneq_osps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $29, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1d]
+          vcmpge_oqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $30, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1e]
+          vcmpgt_oqps %xmm11, %xmm12, %xmm13
+
+// CHECK: vcmpps  $31, %xmm11, %xmm12, %xmm13
+// CHECK: encoding: [0xc4,0x41,0x18,0xc2,0xeb,0x1f]
+          vcmptrue_usps %xmm11, %xmm12, %xmm13
+
+// CHECK: vmovaps  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x28,0x20]
+          vmovaps  (%rax), %ymm12
+
+// CHECK: vmovaps  %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x28,0xe3]
+          vmovaps  %ymm11, %ymm12
+
+// CHECK: vmovaps  %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7c,0x29,0x18]
+          vmovaps  %ymm11, (%rax)
+
+// CHECK: vmovapd  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7d,0x28,0x20]
+          vmovapd  (%rax), %ymm12
+
+// CHECK: vmovapd  %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7d,0x28,0xe3]
+          vmovapd  %ymm11, %ymm12
+
+// CHECK: vmovapd  %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7d,0x29,0x18]
+          vmovapd  %ymm11, (%rax)
+
+// CHECK: vmovups  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x10,0x20]
+          vmovups  (%rax), %ymm12
+
+// CHECK: vmovups  %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x10,0xe3]
+          vmovups  %ymm11, %ymm12
+
+// CHECK: vmovups  %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7c,0x11,0x18]
+          vmovups  %ymm11, (%rax)
+
+// CHECK: vmovupd  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7d,0x10,0x20]
+          vmovupd  (%rax), %ymm12
+
+// CHECK: vmovupd  %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7d,0x10,0xe3]
+          vmovupd  %ymm11, %ymm12
+
+// CHECK: vmovupd  %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7d,0x11,0x18]
+          vmovupd  %ymm11, (%rax)
+
+// CHECK: vunpckhps  %ymm11, %ymm12, %ymm4
+// CHECK: encoding: [0xc4,0xc1,0x1c,0x15,0xe3]
+          vunpckhps  %ymm11, %ymm12, %ymm4
+
+// CHECK: vunpckhpd  %ymm11, %ymm12, %ymm4
+// CHECK: encoding: [0xc4,0xc1,0x1d,0x15,0xe3]
+          vunpckhpd  %ymm11, %ymm12, %ymm4
+
+// CHECK: vunpcklps  %ymm11, %ymm12, %ymm4
+// CHECK: encoding: [0xc4,0xc1,0x1c,0x14,0xe3]
+          vunpcklps  %ymm11, %ymm12, %ymm4
+
+// CHECK: vunpcklpd  %ymm11, %ymm12, %ymm4
+// CHECK: encoding: [0xc4,0xc1,0x1d,0x14,0xe3]
+          vunpcklpd  %ymm11, %ymm12, %ymm4
+
+// CHECK: vunpckhps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x15,0x54,0xcb,0xfc]
+          vunpckhps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vunpckhpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x15,0x54,0xcb,0xfc]
+          vunpckhpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vunpcklps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x14,0x54,0xcb,0xfc]
+          vunpcklps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vunpcklpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x14,0x54,0xcb,0xfc]
+          vunpcklpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vmovntdq  %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7d,0xe7,0x18]
+          vmovntdq  %ymm11, (%rax)
+
+// CHECK: vmovntpd  %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7d,0x2b,0x18]
+          vmovntpd  %ymm11, (%rax)
+
+// CHECK: vmovntps  %ymm11, (%rax)
+// CHECK: encoding: [0xc5,0x7c,0x2b,0x18]
+          vmovntps  %ymm11, (%rax)
+
+// CHECK: vmovmskps  %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x78,0x50,0xc4]
+          vmovmskps  %xmm12, %eax
+
+// CHECK: vmovmskpd  %xmm12, %eax
+// CHECK: encoding: [0xc4,0xc1,0x79,0x50,0xc4]
+          vmovmskpd  %xmm12, %eax
+
+// CHECK: vmaxps  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x5f,0xf4]
+          vmaxps  %ymm12, %ymm4, %ymm6
+
+// CHECK: vmaxpd  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x5f,0xf4]
+          vmaxpd  %ymm12, %ymm4, %ymm6
+
+// CHECK: vminps  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x5d,0xf4]
+          vminps  %ymm12, %ymm4, %ymm6
+
+// CHECK: vminpd  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x5d,0xf4]
+          vminpd  %ymm12, %ymm4, %ymm6
+
+// CHECK: vsubps  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x5c,0xf4]
+          vsubps  %ymm12, %ymm4, %ymm6
+
+// CHECK: vsubpd  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x5c,0xf4]
+          vsubpd  %ymm12, %ymm4, %ymm6
+
+// CHECK: vdivps  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x5e,0xf4]
+          vdivps  %ymm12, %ymm4, %ymm6
+
+// CHECK: vdivpd  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x5e,0xf4]
+          vdivpd  %ymm12, %ymm4, %ymm6
+
+// CHECK: vaddps  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x58,0xf4]
+          vaddps  %ymm12, %ymm4, %ymm6
+
+// CHECK: vaddpd  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x58,0xf4]
+          vaddpd  %ymm12, %ymm4, %ymm6
+
+// CHECK: vmulps  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5c,0x59,0xf4]
+          vmulps  %ymm12, %ymm4, %ymm6
+
+// CHECK: vmulpd  %ymm12, %ymm4, %ymm6
+// CHECK: encoding: [0xc4,0xc1,0x5d,0x59,0xf4]
+          vmulpd  %ymm12, %ymm4, %ymm6
+
+// CHECK: vmaxps  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5f,0x30]
+          vmaxps  (%rax), %ymm4, %ymm6
+
+// CHECK: vmaxpd  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5f,0x30]
+          vmaxpd  (%rax), %ymm4, %ymm6
+
+// CHECK: vminps  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5d,0x30]
+          vminps  (%rax), %ymm4, %ymm6
+
+// CHECK: vminpd  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5d,0x30]
+          vminpd  (%rax), %ymm4, %ymm6
+
+// CHECK: vsubps  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5c,0x30]
+          vsubps  (%rax), %ymm4, %ymm6
+
+// CHECK: vsubpd  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5c,0x30]
+          vsubpd  (%rax), %ymm4, %ymm6
+
+// CHECK: vdivps  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x5e,0x30]
+          vdivps  (%rax), %ymm4, %ymm6
+
+// CHECK: vdivpd  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x5e,0x30]
+          vdivpd  (%rax), %ymm4, %ymm6
+
+// CHECK: vaddps  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x58,0x30]
+          vaddps  (%rax), %ymm4, %ymm6
+
+// CHECK: vaddpd  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x58,0x30]
+          vaddpd  (%rax), %ymm4, %ymm6
+
+// CHECK: vmulps  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdc,0x59,0x30]
+          vmulps  (%rax), %ymm4, %ymm6
+
+// CHECK: vmulpd  (%rax), %ymm4, %ymm6
+// CHECK: encoding: [0xc5,0xdd,0x59,0x30]
+          vmulpd  (%rax), %ymm4, %ymm6
+
+// CHECK: vsqrtpd  %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7d,0x51,0xe3]
+          vsqrtpd  %ymm11, %ymm12
+
+// CHECK: vsqrtpd  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7d,0x51,0x20]
+          vsqrtpd  (%rax), %ymm12
+
+// CHECK: vsqrtps  %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x51,0xe3]
+          vsqrtps  %ymm11, %ymm12
+
+// CHECK: vsqrtps  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x51,0x20]
+          vsqrtps  (%rax), %ymm12
+
+// CHECK: vrsqrtps  %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x52,0xe3]
+          vrsqrtps  %ymm11, %ymm12
+
+// CHECK: vrsqrtps  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x52,0x20]
+          vrsqrtps  (%rax), %ymm12
+
+// CHECK: vrcpps  %ymm11, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x53,0xe3]
+          vrcpps  %ymm11, %ymm12
+
+// CHECK: vrcpps  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x53,0x20]
+          vrcpps  (%rax), %ymm12
+
+// CHECK: vandps  %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0c,0x54,0xdc]
+          vandps  %ymm12, %ymm14, %ymm11
+
+// CHECK: vandpd  %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0d,0x54,0xdc]
+          vandpd  %ymm12, %ymm14, %ymm11
+
+// CHECK: vandps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x54,0x54,0xcb,0xfc]
+          vandps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vandpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x54,0x54,0xcb,0xfc]
+          vandpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vorps  %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0c,0x56,0xdc]
+          vorps  %ymm12, %ymm14, %ymm11
+
+// CHECK: vorpd  %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0d,0x56,0xdc]
+          vorpd  %ymm12, %ymm14, %ymm11
+
+// CHECK: vorps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x56,0x54,0xcb,0xfc]
+          vorps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vorpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x56,0x54,0xcb,0xfc]
+          vorpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vxorps  %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0c,0x57,0xdc]
+          vxorps  %ymm12, %ymm14, %ymm11
+
+// CHECK: vxorpd  %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0d,0x57,0xdc]
+          vxorpd  %ymm12, %ymm14, %ymm11
+
+// CHECK: vxorps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x57,0x54,0xcb,0xfc]
+          vxorps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vxorpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x57,0x54,0xcb,0xfc]
+          vxorpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vandnps  %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0c,0x55,0xdc]
+          vandnps  %ymm12, %ymm14, %ymm11
+
+// CHECK: vandnpd  %ymm12, %ymm14, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x0d,0x55,0xdc]
+          vandnpd  %ymm12, %ymm14, %ymm11
+
+// CHECK: vandnps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1c,0x55,0x54,0xcb,0xfc]
+          vandnps  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vandnpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+// CHECK: encoding: [0xc5,0x1d,0x55,0x54,0xcb,0xfc]
+          vandnpd  -4(%rbx,%rcx,8), %ymm12, %ymm10
+
+// CHECK: vcvtps2pd  %xmm13, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7c,0x5a,0xe5]
+          vcvtps2pd  %xmm13, %ymm12
+
+// CHECK: vcvtps2pd  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x5a,0x20]
+          vcvtps2pd  (%rax), %ymm12
+
+// CHECK: vcvtdq2pd  %xmm13, %ymm12
+// CHECK: encoding: [0xc4,0x41,0x7e,0xe6,0xe5]
+          vcvtdq2pd  %xmm13, %ymm12
+
+// CHECK: vcvtdq2pd  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7e,0xe6,0x20]
+          vcvtdq2pd  (%rax), %ymm12
+
+// CHECK: vcvtdq2ps  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7c,0x5b,0xd4]
+          vcvtdq2ps  %ymm12, %ymm10
+
+// CHECK: vcvtdq2ps  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7c,0x5b,0x20]
+          vcvtdq2ps  (%rax), %ymm12
+
+// CHECK: vcvtps2dq  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7d,0x5b,0xd4]
+          vcvtps2dq  %ymm12, %ymm10
+
+// CHECK: vcvtps2dq  (%rax), %ymm10
+// CHECK: encoding: [0xc5,0x7d,0x5b,0x10]
+          vcvtps2dq  (%rax), %ymm10
+
+// CHECK: vcvttps2dq  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7e,0x5b,0xd4]
+          vcvttps2dq  %ymm12, %ymm10
+
+// CHECK: vcvttps2dq  (%rax), %ymm10
+// CHECK: encoding: [0xc5,0x7e,0x5b,0x10]
+          vcvttps2dq  (%rax), %ymm10
+
+// CHECK: vcvttpd2dq  %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3]
+          vcvttpd2dq  %xmm11, %xmm10
+
+// CHECK: vcvttpd2dq  %ymm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xd4]
+          vcvttpd2dq  %ymm12, %xmm10
+
+// CHECK: vcvttpd2dqx  %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x79,0xe6,0xd3]
+          vcvttpd2dqx  %xmm11, %xmm10
+
+// CHECK: vcvttpd2dqx  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x79,0xe6,0x18]
+          vcvttpd2dqx  (%rax), %xmm11
+
+// CHECK: vcvttpd2dqy  %ymm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7d,0xe6,0xdc]
+          vcvttpd2dqy  %ymm12, %xmm11
+
+// CHECK: vcvttpd2dqy  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7d,0xe6,0x18]
+          vcvttpd2dqy  (%rax), %xmm11
+
+// CHECK: vcvtpd2ps  %ymm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xd4]
+          vcvtpd2ps  %ymm12, %xmm10
+
+// CHECK: vcvtpd2psx  %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x79,0x5a,0xd3]
+          vcvtpd2psx  %xmm11, %xmm10
+
+// CHECK: vcvtpd2psx  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x79,0x5a,0x18]
+          vcvtpd2psx  (%rax), %xmm11
+
+// CHECK: vcvtpd2psy  %ymm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7d,0x5a,0xdc]
+          vcvtpd2psy  %ymm12, %xmm11
+
+// CHECK: vcvtpd2psy  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7d,0x5a,0x18]
+          vcvtpd2psy  (%rax), %xmm11
+
+// CHECK: vcvtpd2dq  %ymm12, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xd4]
+          vcvtpd2dq  %ymm12, %xmm10
+
+// CHECK: vcvtpd2dqy  %ymm12, %xmm11
+// CHECK: encoding: [0xc4,0x41,0x7f,0xe6,0xdc]
+          vcvtpd2dqy  %ymm12, %xmm11
+
+// CHECK: vcvtpd2dqy  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7f,0xe6,0x18]
+          vcvtpd2dqy  (%rax), %xmm11
+
+// CHECK: vcvtpd2dqx  %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x41,0x7b,0xe6,0xd3]
+          vcvtpd2dqx  %xmm11, %xmm10
+
+// CHECK: vcvtpd2dqx  (%rax), %xmm11
+// CHECK: encoding: [0xc5,0x7b,0xe6,0x18]
+          vcvtpd2dqx  (%rax), %xmm11
+
+// CHECK: vcmpps  $0, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x00]
+          vcmpeqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $2, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x02]
+          vcmpleps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $1, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x01]
+          vcmpltps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $4, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x04]
+          vcmpneqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $6, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x06]
+          vcmpnleps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $5, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x05]
+          vcmpnltps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $7, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x07]
+          vcmpordps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $3, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x03]
+          vcmpunordps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $0, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $2, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmpleps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $1, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $4, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $6, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnleps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $5, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $7, -4(%rbx,%rcx,8), %ymm6, %ymm12
+// CHECK: encoding: [0xc5,0x4c,0xc2,0x64,0xcb,0xfc,0x07]
+          vcmpordps -4(%rbx,%rcx,8), %ymm6, %ymm12
+
+// CHECK: vcmpps  $3, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1c,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordps -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $0, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x00]
+          vcmpeqpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $2, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x02]
+          vcmplepd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $1, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x01]
+          vcmpltpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $4, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x04]
+          vcmpneqpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $6, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x06]
+          vcmpnlepd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $5, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x05]
+          vcmpnltpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $7, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x07]
+          vcmpordpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $3, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xc2,0xeb,0x03]
+          vcmpunordpd %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmppd  $0, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x00]
+          vcmpeqpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $2, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x02]
+          vcmplepd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $1, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x01]
+          vcmpltpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $4, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x04]
+          vcmpneqpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $6, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x06]
+          vcmpnlepd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $5, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x05]
+          vcmpnltpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmppd  $7, -4(%rbx,%rcx,8), %ymm6, %ymm12
+// CHECK: encoding: [0xc5,0x4d,0xc2,0x64,0xcb,0xfc,0x07]
+          vcmpordpd -4(%rbx,%rcx,8), %ymm6, %ymm12
+
+// CHECK: vcmppd  $3, -4(%rbx,%rcx,8), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0xc2,0x6c,0xcb,0xfc,0x03]
+          vcmpunordpd -4(%rbx,%rcx,8), %ymm12, %ymm13
+
+// CHECK: vcmpps  $8, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x08]
+          vcmpeq_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $9, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x09]
+          vcmpngeps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $10, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0a]
+          vcmpngtps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $11, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0b]
+          vcmpfalseps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $12, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0c]
+          vcmpneq_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $13, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0d]
+          vcmpgeps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $14, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0e]
+          vcmpgtps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $15, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x0f]
+          vcmptrueps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $16, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x10]
+          vcmpeq_osps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $17, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x11]
+          vcmplt_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $18, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x12]
+          vcmple_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $19, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x13]
+          vcmpunord_sps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $20, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x14]
+          vcmpneq_usps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $21, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x15]
+          vcmpnlt_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $22, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x16]
+          vcmpnle_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $23, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x17]
+          vcmpord_sps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $24, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x18]
+          vcmpeq_usps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $25, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x19]
+          vcmpnge_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $26, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1a]
+          vcmpngt_uqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $27, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1b]
+          vcmpfalse_osps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $28, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1c]
+          vcmpneq_osps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $29, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1d]
+          vcmpge_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $30, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1e]
+          vcmpgt_oqps %ymm11, %ymm12, %ymm13
+
+// CHECK: vcmpps  $31, %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1c,0xc2,0xeb,0x1f]
+          vcmptrue_usps %ymm11, %ymm12, %ymm13
+
+// CHECK: vaddsubps  %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1f,0xd0,0xeb]
+          vaddsubps  %ymm11, %ymm12, %ymm13
+
+// CHECK: vaddsubps  (%rax), %ymm11, %ymm12
+// CHECK: encoding: [0xc5,0x27,0xd0,0x20]
+          vaddsubps  (%rax), %ymm11, %ymm12
+
+// CHECK: vaddsubpd  %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0xd0,0xeb]
+          vaddsubpd  %ymm11, %ymm12, %ymm13
+
+// CHECK: vaddsubpd  (%rax), %ymm11, %ymm12
+// CHECK: encoding: [0xc5,0x25,0xd0,0x20]
+          vaddsubpd  (%rax), %ymm11, %ymm12
+
+// CHECK: vhaddps  %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1f,0x7c,0xeb]
+          vhaddps  %ymm11, %ymm12, %ymm13
+
+// CHECK: vhaddps  (%rax), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1f,0x7c,0x28]
+          vhaddps  (%rax), %ymm12, %ymm13
+
+// CHECK: vhaddpd  %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0x7c,0xeb]
+          vhaddpd  %ymm11, %ymm12, %ymm13
+
+// CHECK: vhaddpd  (%rax), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0x7c,0x28]
+          vhaddpd  (%rax), %ymm12, %ymm13
+
+// CHECK: vhsubps  %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1f,0x7d,0xeb]
+          vhsubps  %ymm11, %ymm12, %ymm13
+
+// CHECK: vhsubps  (%rax), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1f,0x7d,0x28]
+          vhsubps  (%rax), %ymm12, %ymm13
+
+// CHECK: vhsubpd  %ymm11, %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x41,0x1d,0x7d,0xeb]
+          vhsubpd  %ymm11, %ymm12, %ymm13
+
+// CHECK: vhsubpd  (%rax), %ymm12, %ymm13
+// CHECK: encoding: [0xc5,0x1d,0x7d,0x28]
+          vhsubpd  (%rax), %ymm12, %ymm13
+
+// CHECK: vblendps  $3, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x2d,0x0c,0xdc,0x03]
+          vblendps  $3, %ymm12, %ymm10, %ymm11
+
+// CHECK: vblendps  $3, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x63,0x2d,0x0c,0x18,0x03]
+          vblendps  $3, (%rax), %ymm10, %ymm11
+
+// CHECK: vblendpd  $3, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x2d,0x0d,0xdc,0x03]
+          vblendpd  $3, %ymm12, %ymm10, %ymm11
+
+// CHECK: vblendpd  $3, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x63,0x2d,0x0d,0x18,0x03]
+          vblendpd  $3, (%rax), %ymm10, %ymm11
+
+// CHECK: vdpps  $3, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x2d,0x40,0xdc,0x03]
+          vdpps  $3, %ymm12, %ymm10, %ymm11
+
+// CHECK: vdpps  $3, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x63,0x2d,0x40,0x18,0x03]
+          vdpps  $3, (%rax), %ymm10, %ymm11
+
+// CHECK: vbroadcastf128  (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x1a,0x20]
+          vbroadcastf128  (%rax), %ymm12
+
+// CHECK: vbroadcastsd  (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x19,0x20]
+          vbroadcastsd  (%rax), %ymm12
+
+// CHECK: vbroadcastss  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x18,0x20]
+          vbroadcastss  (%rax), %xmm12
+
+// CHECK: vbroadcastss  (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x18,0x20]
+          vbroadcastss  (%rax), %ymm12
+
+// CHECK: vinsertf128  $7, %xmm12, %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x43,0x1d,0x18,0xd4,0x07]
+          vinsertf128  $7, %xmm12, %ymm12, %ymm10
+
+// CHECK: vinsertf128  $7, (%rax), %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x63,0x1d,0x18,0x10,0x07]
+          vinsertf128  $7, (%rax), %ymm12, %ymm10
+
+// CHECK: vextractf128  $7, %ymm12, %xmm12
+// CHECK: encoding: [0xc4,0x43,0x7d,0x19,0xe4,0x07]
+          vextractf128  $7, %ymm12, %xmm12
+
+// CHECK: vextractf128  $7, %ymm12, (%rax)
+// CHECK: encoding: [0xc4,0x63,0x7d,0x19,0x20,0x07]
+          vextractf128  $7, %ymm12, (%rax)
+
+// CHECK: vmaskmovpd  %xmm12, %xmm10, (%rax)
+// CHECK: encoding: [0xc4,0x62,0x29,0x2f,0x20]
+          vmaskmovpd  %xmm12, %xmm10, (%rax)
+
+// CHECK: vmaskmovpd  %ymm12, %ymm10, (%rax)
+// CHECK: encoding: [0xc4,0x62,0x2d,0x2f,0x20]
+          vmaskmovpd  %ymm12, %ymm10, (%rax)
+
+// CHECK: vmaskmovpd  (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x62,0x19,0x2d,0x10]
+          vmaskmovpd  (%rax), %xmm12, %xmm10
+
+// CHECK: vmaskmovpd  (%rax), %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x62,0x1d,0x2d,0x10]
+          vmaskmovpd  (%rax), %ymm12, %ymm10
+
+// CHECK: vmaskmovps  %xmm12, %xmm10, (%rax)
+// CHECK: encoding: [0xc4,0x62,0x29,0x2e,0x20]
+          vmaskmovps  %xmm12, %xmm10, (%rax)
+
+// CHECK: vmaskmovps  %ymm12, %ymm10, (%rax)
+// CHECK: encoding: [0xc4,0x62,0x2d,0x2e,0x20]
+          vmaskmovps  %ymm12, %ymm10, (%rax)
+
+// CHECK: vmaskmovps  (%rax), %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x62,0x19,0x2c,0x10]
+          vmaskmovps  (%rax), %xmm12, %xmm10
+
+// CHECK: vmaskmovps  (%rax), %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x62,0x1d,0x2c,0x10]
+          vmaskmovps  (%rax), %ymm12, %ymm10
+
+// CHECK: vpermilps  $7, %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x04,0xd3,0x07]
+          vpermilps  $7, %xmm11, %xmm10
+
+// CHECK: vpermilps  $7, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x7d,0x04,0xda,0x07]
+          vpermilps  $7, %ymm10, %ymm11
+
+// CHECK: vpermilps  $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x04,0x10,0x07]
+          vpermilps  $7, (%rax), %xmm10
+
+// CHECK: vpermilps  $7, (%rax), %ymm10
+// CHECK: encoding: [0xc4,0x63,0x7d,0x04,0x10,0x07]
+          vpermilps  $7, (%rax), %ymm10
+
+// CHECK: vpermilps  %xmm11, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x0c,0xdb]
+          vpermilps  %xmm11, %xmm10, %xmm11
+
+// CHECK: vpermilps  %ymm11, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x0c,0xdb]
+          vpermilps  %ymm11, %ymm10, %ymm11
+
+// CHECK: vpermilps  (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0x0c,0x28]
+          vpermilps  (%rax), %xmm10, %xmm13
+
+// CHECK: vpermilps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x0c,0x18]
+          vpermilps  (%rax), %ymm10, %ymm11
+
+// CHECK: vpermilpd  $7, %xmm11, %xmm10
+// CHECK: encoding: [0xc4,0x43,0x79,0x05,0xd3,0x07]
+          vpermilpd  $7, %xmm11, %xmm10
+
+// CHECK: vpermilpd  $7, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x7d,0x05,0xda,0x07]
+          vpermilpd  $7, %ymm10, %ymm11
+
+// CHECK: vpermilpd  $7, (%rax), %xmm10
+// CHECK: encoding: [0xc4,0x63,0x79,0x05,0x10,0x07]
+          vpermilpd  $7, (%rax), %xmm10
+
+// CHECK: vpermilpd  $7, (%rax), %ymm10
+// CHECK: encoding: [0xc4,0x63,0x7d,0x05,0x10,0x07]
+          vpermilpd  $7, (%rax), %ymm10
+
+// CHECK: vpermilpd  %xmm11, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x0d,0xdb]
+          vpermilpd  %xmm11, %xmm10, %xmm11
+
+// CHECK: vpermilpd  %ymm11, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x0d,0xdb]
+          vpermilpd  %ymm11, %ymm10, %ymm11
+
+// CHECK: vpermilpd  (%rax), %xmm10, %xmm13
+// CHECK: encoding: [0xc4,0x62,0x29,0x0d,0x28]
+          vpermilpd  (%rax), %xmm10, %xmm13
+
+// CHECK: vpermilpd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x0d,0x18]
+          vpermilpd  (%rax), %ymm10, %ymm11
+
+// CHECK: vperm2f128  $7, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x2d,0x06,0xdc,0x07]
+          vperm2f128  $7, %ymm12, %ymm10, %ymm11
+
+// CHECK: vperm2f128  $7, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x63,0x2d,0x06,0x18,0x07]
+          vperm2f128  $7, (%rax), %ymm10, %ymm11
+
+// CHECK: vcvtsd2si  %xmm8, %r8d
+// CHECK: encoding: [0xc4,0x41,0x7b,0x2d,0xc0]
+          vcvtsd2si  %xmm8, %r8d
+
+// CHECK: vcvtsd2si  (%rcx), %ecx
+// CHECK: encoding: [0xc5,0xfb,0x2d,0x09]
+          vcvtsd2si  (%rcx), %ecx
+
+// CHECK: vcvtss2si  %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe1,0xfa,0x2d,0xcc]
+          vcvtss2si  %xmm4, %rcx
+
+// CHECK: vcvtss2si  (%rcx), %r8
+// CHECK: encoding: [0xc4,0x61,0xfa,0x2d,0x01]
+          vcvtss2si  (%rcx), %r8
+
+// CHECK: vcvtsi2sdl  %r8d, %xmm8, %xmm15
+// CHECK: encoding: [0xc4,0x41,0x3b,0x2a,0xf8]
+          vcvtsi2sdl  %r8d, %xmm8, %xmm15
+
+// CHECK: vcvtsi2sdl  (%rbp), %xmm8, %xmm15
+// CHECK: encoding: [0xc5,0x3b,0x2a,0x7d,0x00]
+          vcvtsi2sdl  (%rbp), %xmm8, %xmm15
+
+// CHECK: vcvtsi2sdq  %rcx, %xmm4, %xmm6
+// CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0xf1]
+          vcvtsi2sdq  %rcx, %xmm4, %xmm6
+
+// CHECK: vcvtsi2sdq  (%rcx), %xmm4, %xmm6
+// CHECK: encoding: [0xc4,0xe1,0xdb,0x2a,0x31]
+          vcvtsi2sdq  (%rcx), %xmm4, %xmm6
+
+// CHECK: vcvtsi2ssq  %rcx, %xmm4, %xmm6
+// CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0xf1]
+          vcvtsi2ssq  %rcx, %xmm4, %xmm6
+
+// CHECK: vcvtsi2ssq  (%rcx), %xmm4, %xmm6
+// CHECK: encoding: [0xc4,0xe1,0xda,0x2a,0x31]
+          vcvtsi2ssq  (%rcx), %xmm4, %xmm6
+
+// CHECK: vcvttsd2si  %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0xcc]
+          vcvttsd2si  %xmm4, %rcx
+
+// CHECK: vcvttsd2si  (%rcx), %rcx
+// CHECK: encoding: [0xc4,0xe1,0xfb,0x2c,0x09]
+          vcvttsd2si  (%rcx), %rcx
+
+// CHECK: vcvttss2si  %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0xcc]
+          vcvttss2si  %xmm4, %rcx
+
+// CHECK: vcvttss2si  (%rcx), %rcx
+// CHECK: encoding: [0xc4,0xe1,0xfa,0x2c,0x09]
+          vcvttss2si  (%rcx), %rcx
+
+// CHECK: vlddqu  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7f,0xf0,0x20]
+          vlddqu  (%rax), %ymm12
+
+// CHECK: vmovddup  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7f,0x12,0xd4]
+          vmovddup  %ymm12, %ymm10
+
+// CHECK: vmovddup  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7f,0x12,0x20]
+          vmovddup  (%rax), %ymm12
+
+// CHECK: vmovdqa  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7d,0x6f,0xd4]
+          vmovdqa  %ymm12, %ymm10
+
+// CHECK: vmovdqa  %ymm12, (%rax)
+// CHECK: encoding: [0xc5,0x7d,0x7f,0x20]
+          vmovdqa  %ymm12, (%rax)
+
+// CHECK: vmovdqa  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7d,0x6f,0x20]
+          vmovdqa  (%rax), %ymm12
+
+// CHECK: vmovdqu  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7e,0x6f,0xd4]
+          vmovdqu  %ymm12, %ymm10
+
+// CHECK: vmovdqu  %ymm12, (%rax)
+// CHECK: encoding: [0xc5,0x7e,0x7f,0x20]
+          vmovdqu  %ymm12, (%rax)
+
+// CHECK: vmovdqu  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7e,0x6f,0x20]
+          vmovdqu  (%rax), %ymm12
+
+// CHECK: vmovshdup  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7e,0x16,0xd4]
+          vmovshdup  %ymm12, %ymm10
+
+// CHECK: vmovshdup  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7e,0x16,0x20]
+          vmovshdup  (%rax), %ymm12
+
+// CHECK: vmovsldup  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x41,0x7e,0x12,0xd4]
+          vmovsldup  %ymm12, %ymm10
+
+// CHECK: vmovsldup  (%rax), %ymm12
+// CHECK: encoding: [0xc5,0x7e,0x12,0x20]
+          vmovsldup  (%rax), %ymm12
+
+// CHECK: vptest  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x42,0x7d,0x17,0xd4]
+          vptest  %ymm12, %ymm10
+
+// CHECK: vptest  (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x17,0x20]
+          vptest  (%rax), %ymm12
+
+// CHECK: vroundpd  $7, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x7d,0x09,0xda,0x07]
+          vroundpd  $7, %ymm10, %ymm11
+
+// CHECK: vroundpd  $7, (%rax), %ymm10
+// CHECK: encoding: [0xc4,0x63,0x7d,0x09,0x10,0x07]
+          vroundpd  $7, (%rax), %ymm10
+
+// CHECK: vroundps  $7, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x43,0x7d,0x08,0xda,0x07]
+          vroundps  $7, %ymm10, %ymm11
+
+// CHECK: vroundps  $7, (%rax), %ymm10
+// CHECK: encoding: [0xc4,0x63,0x7d,0x08,0x10,0x07]
+          vroundps  $7, (%rax), %ymm10
+
+// CHECK: vshufpd  $7, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x2d,0xc6,0xdc,0x07]
+          vshufpd  $7, %ymm12, %ymm10, %ymm11
+
+// CHECK: vshufpd  $7, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc5,0x2d,0xc6,0x18,0x07]
+          vshufpd  $7, (%rax), %ymm10, %ymm11
+
+// CHECK: vshufps  $7, %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x41,0x2c,0xc6,0xdc,0x07]
+          vshufps  $7, %ymm12, %ymm10, %ymm11
+
+// CHECK: vshufps  $7, (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc5,0x2c,0xc6,0x18,0x07]
+          vshufps  $7, (%rax), %ymm10, %ymm11
+
+// CHECK: vtestpd  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x0f,0xd4]
+          vtestpd  %xmm12, %xmm10
+
+// CHECK: vtestpd  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x42,0x7d,0x0f,0xd4]
+          vtestpd  %ymm12, %ymm10
+
+// CHECK: vtestpd  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x0f,0x20]
+          vtestpd  (%rax), %xmm12
+
+// CHECK: vtestpd  (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x0f,0x20]
+          vtestpd  (%rax), %ymm12
+
+// CHECK: vtestps  %xmm12, %xmm10
+// CHECK: encoding: [0xc4,0x42,0x79,0x0e,0xd4]
+          vtestps  %xmm12, %xmm10
+
+// CHECK: vtestps  %ymm12, %ymm10
+// CHECK: encoding: [0xc4,0x42,0x7d,0x0e,0xd4]
+          vtestps  %ymm12, %ymm10
+
+// CHECK: vtestps  (%rax), %xmm12
+// CHECK: encoding: [0xc4,0x62,0x79,0x0e,0x20]
+          vtestps  (%rax), %xmm12
+
+// CHECK: vtestps  (%rax), %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7d,0x0e,0x20]
+          vtestps  (%rax), %ymm12
+
+// CHECK: vextractps   $10, %xmm8, %r8
+// CHECK: encoding: [0xc4,0x43,0x79,0x17,0xc0,0x0a]
+          vextractps   $10, %xmm8, %r8
+
+// CHECK: vextractps   $7, %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe3,0x79,0x17,0xe1,0x07]
+          vextractps   $7, %xmm4, %rcx
+
+// CHECK: vmovd  %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe1,0xf9,0x7e,0xe1]
+          vmovd  %xmm4, %rcx
+
+// CHECK: vmovmskpd  %xmm4, %rcx
+// CHECK: encoding: [0xc5,0xf9,0x50,0xcc]
+          vmovmskpd  %xmm4, %rcx
+
+// CHECK: vmovmskpd  %ymm4, %rcx
+// CHECK: encoding: [0xc5,0xfd,0x50,0xcc]
+          vmovmskpd  %ymm4, %rcx
+
+// CHECK: vmovmskps  %xmm4, %rcx
+// CHECK: encoding: [0xc5,0xf8,0x50,0xcc]
+          vmovmskps  %xmm4, %rcx
+
+// CHECK: vmovmskps  %ymm4, %rcx
+// CHECK: encoding: [0xc5,0xfc,0x50,0xcc]
+          vmovmskps  %ymm4, %rcx
+
+// CHECK: vpextrb  $7, %xmm4, %rcx
+// CHECK: encoding: [0xc4,0xe3,0x79,0x14,0xe1,0x07]
+          vpextrb  $7, %xmm4, %rcx
+
+// CHECK: vpinsrw  $7, %r8, %xmm15, %xmm8
+// CHECK: encoding: [0xc4,0x41,0x01,0xc4,0xc0,0x07]
+          vpinsrw  $7, %r8, %xmm15, %xmm8
+
+// CHECK: vpinsrw  $7, %rcx, %xmm4, %xmm6
+// CHECK: encoding: [0xc5,0xd9,0xc4,0xf1,0x07]
+          vpinsrw  $7, %rcx, %xmm4, %xmm6
+
+// CHECK: vpmovmskb  %xmm4, %rcx
+// CHECK: encoding: [0xc5,0xf9,0xd7,0xcc]
+          vpmovmskb  %xmm4, %rcx
+
+// CHECK: vblendvpd  %ymm11, 57005(%rax,%riz), %ymm12, %ymm13
+// CHECK: encoding: [0xc4,0x63,0x1d,0x4b,0xac,0x20,0xad,0xde,0x00,0x00,0xb0]
+          vblendvpd  %ymm11, 0xdead(%rax,%riz), %ymm12, %ymm13
+
diff --git a/final/test/MC/X86/x86_64-encoding.s b/final/test/MC/X86/x86_64-encoding.s
new file mode 100644
index 00000000000..756da4dc352
--- /dev/null
+++ b/final/test/MC/X86/x86_64-encoding.s
@@ -0,0 +1,157 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// PR7195
+// CHECK: callw 42
+// CHECK: encoding: [0x66,0xe8,A,A]
+       callw 42
+
+// rdar://8127102
+// CHECK: movq	%gs:(%rdi), %rax
+// CHECK: encoding: [0x65,0x48,0x8b,0x07]
+movq	%gs:(%rdi), %rax
+
+// CHECK: crc32b 	%bl, %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0xc3]
+        crc32b	%bl, %eax
+
+// CHECK: crc32b 	4(%rbx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0x43,0x04]
+        crc32b	4(%rbx), %eax
+
+// CHECK: crc32w 	%bx, %eax
+// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0xc3]
+        crc32w	%bx, %eax
+
+// CHECK: crc32w 	4(%rbx), %eax
+// CHECK:  encoding: [0x66,0xf2,0x0f,0x38,0xf1,0x43,0x04]
+        crc32w	4(%rbx), %eax
+
+// CHECK: crc32l 	%ebx, %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc3]
+        crc32l	%ebx, %eax
+
+// CHECK: crc32l 	4(%rbx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x43,0x04]
+        crc32l	4(%rbx), %eax
+
+// CHECK: crc32l 	3735928559(%rbx,%rcx,8), %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x8c,0xcb,0xef,0xbe,0xad,0xde]
+        	crc32l   0xdeadbeef(%rbx,%rcx,8),%ecx
+
+// CHECK: crc32l 	69, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0c,0x25,0x45,0x00,0x00,0x00]
+        	crc32l   0x45,%ecx
+
+// CHECK: crc32l 	32493, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0c,0x25,0xed,0x7e,0x00,0x00]
+        	crc32l   0x7eed,%ecx
+
+// CHECK: crc32l 	3133065982, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0x0c,0x25,0xfe,0xca,0xbe,0xba]
+        	crc32l   0xbabecafe,%ecx
+
+// CHECK: crc32l 	%ecx, %ecx
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf1,0xc9]
+        	crc32l   %ecx,%ecx
+
+// CHECK: crc32b 	%r11b, %eax
+// CHECK:  encoding: [0xf2,0x41,0x0f,0x38,0xf0,0xc3]
+        crc32b	%r11b, %eax
+
+// CHECK: crc32b 	4(%rbx), %eax
+// CHECK:  encoding: [0xf2,0x0f,0x38,0xf0,0x43,0x04]
+        crc32b	4(%rbx), %eax
+
+// CHECK: crc32b 	%dil, %rax
+// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf0,0xc7]
+        crc32b	%dil,%rax
+
+// CHECK: crc32b 	%r11b, %rax
+// CHECK:  encoding: [0xf2,0x49,0x0f,0x38,0xf0,0xc3]
+        crc32b	%r11b,%rax
+
+// CHECK: crc32b 	4(%rbx), %rax
+// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf0,0x43,0x04]
+        crc32b	4(%rbx), %rax
+
+// CHECK: crc32q 	%rbx, %rax
+// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf1,0xc3]
+        crc32q	%rbx, %rax
+
+// CHECK: crc32q 	4(%rbx), %rax
+// CHECK:  encoding: [0xf2,0x48,0x0f,0x38,0xf1,0x43,0x04]
+        crc32q	4(%rbx), %rax
+
+// CHECK: movd %r8, %mm1
+// CHECK:  encoding: [0x49,0x0f,0x6e,0xc8]
+movd %r8, %mm1
+
+// CHECK: movd %r8d, %mm1
+// CHECK:  encoding: [0x41,0x0f,0x6e,0xc8]
+movd %r8d, %mm1
+
+// CHECK: movd %rdx, %mm1
+// CHECK:  encoding: [0x48,0x0f,0x6e,0xca]
+movd %rdx, %mm1
+
+// CHECK: movd %edx, %mm1
+// CHECK:  encoding: [0x0f,0x6e,0xca]
+movd %edx, %mm1
+
+// CHECK: movd %mm1, %r8
+// CHECK:  encoding: [0x49,0x0f,0x7e,0xc8]
+movd %mm1, %r8
+
+// CHECK: movd %mm1, %r8d
+// CHECK:  encoding: [0x41,0x0f,0x7e,0xc8]
+movd %mm1, %r8d
+
+// CHECK: movd %mm1, %rdx
+// CHECK:  encoding: [0x48,0x0f,0x7e,0xca]
+movd %mm1, %rdx
+
+// CHECK: movd %mm1, %edx
+// CHECK:  encoding: [0x0f,0x7e,0xca]
+movd %mm1, %edx
+
+// rdar://7840289
+// CHECK: pshufb	CPI1_0(%rip), %xmm1
+// CHECK:  encoding: [0x66,0x0f,0x38,0x00,0x0d,A,A,A,A]
+// CHECK:  fixup A - offset: 5, value: CPI1_0-4
+pshufb	CPI1_0(%rip), %xmm1
+
+// CHECK: movq  57005(,%riz), %rbx
+// CHECK: encoding: [0x48,0x8b,0x1c,0x25,0xad,0xde,0x00,0x00]
+          movq  57005(,%riz), %rbx
+
+// CHECK: movq  48879(,%riz), %rax
+// CHECK: encoding: [0x48,0x8b,0x04,0x25,0xef,0xbe,0x00,0x00]
+          movq  48879(,%riz), %rax
+
+// CHECK: movq  -4(,%riz,8), %rax
+// CHECK: encoding: [0x48,0x8b,0x04,0xe5,0xfc,0xff,0xff,0xff]
+          movq  -4(,%riz,8), %rax
+
+// CHECK: movq  (%rcx,%riz), %rax
+// CHECK: encoding: [0x48,0x8b,0x04,0x21]
+          movq  (%rcx,%riz), %rax
+
+// CHECK: movq  (%rcx,%riz,8), %rax
+// CHECK: encoding: [0x48,0x8b,0x04,0xe1]
+          movq  (%rcx,%riz,8), %rax
+
+// CHECK: fxsaveq (%rax)
+// CHECK: encoding: [0x48,0x0f,0xae,0x00]
+          fxsaveq (%rax)
+
+// CHECK: fxrstorq (%rax)
+// CHECK: encoding: [0x48,0x0f,0xae,0x08]
+          fxrstorq (%rax)
+
+// CHECK: leave
+// CHECK:  encoding: [0xc9]
+        	leave
+
+// CHECK: leave
+// CHECK:  encoding: [0xc9]
+        	leaveq
diff --git a/final/test/MC/X86/x86_64-fma3-encoding.s b/final/test/MC/X86/x86_64-fma3-encoding.s
new file mode 100644
index 00000000000..d08a7329a09
--- /dev/null
+++ b/final/test/MC/X86/x86_64-fma3-encoding.s
@@ -0,0 +1,674 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vfmadd132pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x98,0xdc]
+          vfmadd132pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd132pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x98,0x18]
+          vfmadd132pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd132ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x98,0xdc]
+          vfmadd132ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd132ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x98,0x18]
+          vfmadd132ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd213pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xa8,0xdc]
+          vfmadd213pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd213pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xa8,0x18]
+          vfmadd213pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd213ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xa8,0xdc]
+          vfmadd213ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd213ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xa8,0x18]
+          vfmadd213ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd231pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xb8,0xdc]
+          vfmadd231pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd231pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xb8,0x18]
+          vfmadd231pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd231ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xb8,0xdc]
+          vfmadd231ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd231ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xb8,0x18]
+          vfmadd231ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd132pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x98,0xdc]
+          vfmadd132pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd132pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x98,0x18]
+          vfmadd132pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd132ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x98,0xdc]
+          vfmadd132ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd132ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x98,0x18]
+          vfmadd132ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd213pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xa8,0xdc]
+          vfmadd213pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd213pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xa8,0x18]
+          vfmadd213pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd213ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xa8,0xdc]
+          vfmadd213ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd213ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xa8,0x18]
+          vfmadd213ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd231pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xb8,0xdc]
+          vfmadd231pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd231pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xb8,0x18]
+          vfmadd231pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd231ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xb8,0xdc]
+          vfmadd231ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd231ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xb8,0x18]
+          vfmadd231ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd132pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x98,0xdc]
+          vfmadd132pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd132pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x98,0x18]
+          vfmadd132pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd132ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x98,0xdc]
+          vfmadd132ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd132ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x98,0x18]
+          vfmadd132ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd213pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xa8,0xdc]
+          vfmadd213pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd213pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xa8,0x18]
+          vfmadd213pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd213ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xa8,0xdc]
+          vfmadd213ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd213ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xa8,0x18]
+          vfmadd213ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd231pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xb8,0xdc]
+          vfmadd231pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd231pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xb8,0x18]
+          vfmadd231pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd231ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xb8,0xdc]
+          vfmadd231ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmadd231ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xb8,0x18]
+          vfmadd231ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub132pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x96,0xdc]
+          vfmaddsub132pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub132pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x96,0x18]
+          vfmaddsub132pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub132ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x96,0xdc]
+          vfmaddsub132ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub132ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x96,0x18]
+          vfmaddsub132ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub213pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xa6,0xdc]
+          vfmaddsub213pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub213pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xa6,0x18]
+          vfmaddsub213pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub213ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xa6,0xdc]
+          vfmaddsub213ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub213ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xa6,0x18]
+          vfmaddsub213ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub231pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xb6,0xdc]
+          vfmaddsub231pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub231pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xb6,0x18]
+          vfmaddsub231pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmaddsub231ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xb6,0xdc]
+          vfmaddsub231ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmaddsub231ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xb6,0x18]
+          vfmaddsub231ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd132pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x97,0xdc]
+          vfmsubadd132pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd132pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x97,0x18]
+          vfmsubadd132pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd132ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x97,0xdc]
+          vfmsubadd132ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd132ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x97,0x18]
+          vfmsubadd132ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd213pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xa7,0xdc]
+          vfmsubadd213pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd213pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xa7,0x18]
+          vfmsubadd213pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd213ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xa7,0xdc]
+          vfmsubadd213ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd213ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xa7,0x18]
+          vfmsubadd213ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd231pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xb7,0xdc]
+          vfmsubadd231pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd231pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xb7,0x18]
+          vfmsubadd231pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsubadd231ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xb7,0xdc]
+          vfmsubadd231ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsubadd231ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xb7,0x18]
+          vfmsubadd231ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub132pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x9a,0xdc]
+          vfmsub132pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub132pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x9a,0x18]
+          vfmsub132pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub132ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x9a,0xdc]
+          vfmsub132ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub132ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x9a,0x18]
+          vfmsub132ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub213pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xaa,0xdc]
+          vfmsub213pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub213pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xaa,0x18]
+          vfmsub213pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub213ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xaa,0xdc]
+          vfmsub213ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub213ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xaa,0x18]
+          vfmsub213ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub231pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xba,0xdc]
+          vfmsub231pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub231pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xba,0x18]
+          vfmsub231pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmsub231ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xba,0xdc]
+          vfmsub231ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfmsub231ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xba,0x18]
+          vfmsub231ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd132pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x9c,0xdc]
+          vfnmadd132pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd132pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x9c,0x18]
+          vfnmadd132pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd132ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x9c,0xdc]
+          vfnmadd132ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd132ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x9c,0x18]
+          vfnmadd132ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd213pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xac,0xdc]
+          vfnmadd213pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd213pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xac,0x18]
+          vfnmadd213pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd213ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xac,0xdc]
+          vfnmadd213ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd213ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xac,0x18]
+          vfnmadd213ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd231pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xbc,0xdc]
+          vfnmadd231pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd231pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xbc,0x18]
+          vfnmadd231pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmadd231ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xbc,0xdc]
+          vfnmadd231ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmadd231ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xbc,0x18]
+          vfnmadd231ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub132pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0x9e,0xdc]
+          vfnmsub132pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub132pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0x9e,0x18]
+          vfnmsub132pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub132ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0x9e,0xdc]
+          vfnmsub132ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub132ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0x9e,0x18]
+          vfnmsub132ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub213pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xae,0xdc]
+          vfnmsub213pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub213pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xae,0x18]
+          vfnmsub213pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub213ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xae,0xdc]
+          vfnmsub213ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub213ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xae,0x18]
+          vfnmsub213ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub231pd  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0xa9,0xbe,0xdc]
+          vfnmsub231pd  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub231pd  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0xa9,0xbe,0x18]
+          vfnmsub231pd  (%rax), %xmm10, %xmm11
+
+// CHECK: vfnmsub231ps  %xmm12, %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x42,0x29,0xbe,0xdc]
+          vfnmsub231ps  %xmm12, %xmm10, %xmm11
+
+// CHECK: vfnmsub231ps  (%rax), %xmm10, %xmm11
+// CHECK: encoding: [0xc4,0x62,0x29,0xbe,0x18]
+          vfnmsub231ps  (%rax), %xmm10, %xmm11
+
+// CHECK: vfmadd132pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x98,0xdc]
+          vfmadd132pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd132pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x98,0x18]
+          vfmadd132pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd132ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x98,0xdc]
+          vfmadd132ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd132ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x98,0x18]
+          vfmadd132ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd213pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xa8,0xdc]
+          vfmadd213pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd213pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xa8,0x18]
+          vfmadd213pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd213ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xa8,0xdc]
+          vfmadd213ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd213ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xa8,0x18]
+          vfmadd213ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd231pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xb8,0xdc]
+          vfmadd231pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd231pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xb8,0x18]
+          vfmadd231pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmadd231ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xb8,0xdc]
+          vfmadd231ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmadd231ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xb8,0x18]
+          vfmadd231ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub132pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x96,0xdc]
+          vfmaddsub132pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub132pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x96,0x18]
+          vfmaddsub132pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub132ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x96,0xdc]
+          vfmaddsub132ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub132ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x96,0x18]
+          vfmaddsub132ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub213pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xa6,0xdc]
+          vfmaddsub213pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub213pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xa6,0x18]
+          vfmaddsub213pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub213ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xa6,0xdc]
+          vfmaddsub213ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub213ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xa6,0x18]
+          vfmaddsub213ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub231pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xb6,0xdc]
+          vfmaddsub231pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub231pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xb6,0x18]
+          vfmaddsub231pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmaddsub231ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xb6,0xdc]
+          vfmaddsub231ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmaddsub231ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xb6,0x18]
+          vfmaddsub231ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd132pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x97,0xdc]
+          vfmsubadd132pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd132pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x97,0x18]
+          vfmsubadd132pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd132ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x97,0xdc]
+          vfmsubadd132ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd132ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x97,0x18]
+          vfmsubadd132ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd213pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xa7,0xdc]
+          vfmsubadd213pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd213pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xa7,0x18]
+          vfmsubadd213pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd213ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xa7,0xdc]
+          vfmsubadd213ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd213ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xa7,0x18]
+          vfmsubadd213ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd231pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xb7,0xdc]
+          vfmsubadd231pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd231pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xb7,0x18]
+          vfmsubadd231pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsubadd231ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xb7,0xdc]
+          vfmsubadd231ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsubadd231ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xb7,0x18]
+          vfmsubadd231ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub132pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x9a,0xdc]
+          vfmsub132pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub132pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x9a,0x18]
+          vfmsub132pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub132ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x9a,0xdc]
+          vfmsub132ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub132ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x9a,0x18]
+          vfmsub132ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub213pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xaa,0xdc]
+          vfmsub213pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub213pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xaa,0x18]
+          vfmsub213pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub213ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xaa,0xdc]
+          vfmsub213ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub213ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xaa,0x18]
+          vfmsub213ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub231pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xba,0xdc]
+          vfmsub231pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub231pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xba,0x18]
+          vfmsub231pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfmsub231ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xba,0xdc]
+          vfmsub231ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfmsub231ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xba,0x18]
+          vfmsub231ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd132pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x9c,0xdc]
+          vfnmadd132pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd132pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x9c,0x18]
+          vfnmadd132pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd132ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x9c,0xdc]
+          vfnmadd132ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd132ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x9c,0x18]
+          vfnmadd132ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd213pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xac,0xdc]
+          vfnmadd213pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd213pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xac,0x18]
+          vfnmadd213pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd213ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xac,0xdc]
+          vfnmadd213ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd213ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xac,0x18]
+          vfnmadd213ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd231pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xbc,0xdc]
+          vfnmadd231pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd231pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xbc,0x18]
+          vfnmadd231pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmadd231ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xbc,0xdc]
+          vfnmadd231ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmadd231ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xbc,0x18]
+          vfnmadd231ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub132pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0x9e,0xdc]
+          vfnmsub132pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub132pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0x9e,0x18]
+          vfnmsub132pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub132ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0x9e,0xdc]
+          vfnmsub132ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub132ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0x9e,0x18]
+          vfnmsub132ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub213pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xae,0xdc]
+          vfnmsub213pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub213pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xae,0x18]
+          vfnmsub213pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub213ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xae,0xdc]
+          vfnmsub213ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub213ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xae,0x18]
+          vfnmsub213ps  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub231pd  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0xad,0xbe,0xdc]
+          vfnmsub231pd  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub231pd  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0xad,0xbe,0x18]
+          vfnmsub231pd  (%rax), %ymm10, %ymm11
+
+// CHECK: vfnmsub231ps  %ymm12, %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x42,0x2d,0xbe,0xdc]
+          vfnmsub231ps  %ymm12, %ymm10, %ymm11
+
+// CHECK: vfnmsub231ps  (%rax), %ymm10, %ymm11
+// CHECK: encoding: [0xc4,0x62,0x2d,0xbe,0x18]
+          vfnmsub231ps  (%rax), %ymm10, %ymm11
+
diff --git a/final/test/MC/X86/x86_64-imm-widths.s b/final/test/MC/X86/x86_64-imm-widths.s
new file mode 100644
index 00000000000..97b60ff3aff
--- /dev/null
+++ b/final/test/MC/X86/x86_64-imm-widths.s
@@ -0,0 +1,105 @@
+// RUN: llvm-mc -triple x86_64- --show-encoding %s | FileCheck %s
+
+// CHECK: addb $0, %al
+// CHECK: encoding: [0x04,0x00]
+ 	addb $0x00, %al
+
+// CHECK: addb $127, %al
+// CHECK: encoding: [0x04,0x7f]
+ 	addb $0x7F, %al
+
+// CHECK: addb $128, %al
+// CHECK: encoding: [0x04,0x80]
+ 	addb $0x80, %al
+
+// CHECK: addb $255, %al
+// CHECK: encoding: [0x04,0xff]
+ 	addb $0xFF, %al
+
+// CHECK: addw $0, %ax
+// CHECK: encoding: [0x66,0x83,0xc0,0x00]
+ 	addw $0x0000, %ax
+
+// CHECK: addw $127, %ax
+// CHECK: encoding: [0x66,0x83,0xc0,0x7f]
+ 	addw $0x007F, %ax
+
+// CHECK: addw $65408, %ax
+// CHECK: encoding: [0x66,0x83,0xc0,0x80]
+ 	addw $0xFF80, %ax
+
+// CHECK: addw $65535, %ax
+// CHECK: encoding: [0x66,0x83,0xc0,0xff]
+	addw $0xFFFF, %ax
+
+// CHECK: addl $0, %eax
+// CHECK: encoding: [0x83,0xc0,0x00]
+ 	addl $0x00000000, %eax
+
+// CHECK: addl $127, %eax
+// CHECK: encoding: [0x83,0xc0,0x7f]
+ 	addl $0x0000007F, %eax
+
+// CHECK: addl $65408, %eax
+// CHECK: encoding: [0x05,0x80,0xff,0x00,0x00]
+ 	addl $0xFF80, %eax
+
+// CHECK: addl $65535, %eax
+// CHECK: encoding: [0x05,0xff,0xff,0x00,0x00]
+	addl $0xFFFF, %eax
+
+// CHECK: addl $4294967168, %eax
+// CHECK: encoding: [0x83,0xc0,0x80]
+ 	addl $0xFFFFFF80, %eax
+
+// CHECK: addl $4294967295, %eax
+// CHECK: encoding: [0x83,0xc0,0xff]
+ 	addl $0xFFFFFFFF, %eax
+
+// CHECK: addq $0, %rax
+// CHECK: encoding: [0x48,0x83,0xc0,0x00]
+ 	addq $0x0000000000000000, %rax
+
+// CHECK: addq $127, %rax
+// CHECK: encoding: [0x48,0x83,0xc0,0x7f]
+ 	addq $0x000000000000007F, %rax
+
+// CHECK: addq $-128, %rax
+// CHECK: encoding: [0x48,0x83,0xc0,0x80]
+ 	addq $0xFFFFFFFFFFFFFF80, %rax
+
+// CHECK: addq $-1, %rax
+// CHECK: encoding: [0x48,0x83,0xc0,0xff]
+ 	addq $0xFFFFFFFFFFFFFFFF, %rax
+
+// CHECK: addq $0, %rax
+// CHECK: encoding: [0x48,0x83,0xc0,0x00]
+ 	addq $0x0000000000000000, %rax
+
+// CHECK: addq $65408, %rax
+// CHECK: encoding: [0x48,0x05,0x80,0xff,0x00,0x00]
+ 	addq $0xFF80, %rax
+
+// CHECK: addq $65535, %rax
+// CHECK: encoding: [0x48,0x05,0xff,0xff,0x00,0x00]
+	addq $0xFFFF, %rax
+
+// CHECK: movabsq $4294967168, %rax
+// CHECK: encoding: [0x48,0xb8,0x80,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
+ 	movq $0xFFFFFF80, %rax
+
+// CHECK: movabsq $4294967295, %rax
+// CHECK: encoding: [0x48,0xb8,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00]
+        movq $0xFFFFFFFF, %rax
+
+// CHECK: addq $2147483647, %rax
+// CHECK: encoding: [0x48,0x05,0xff,0xff,0xff,0x7f]
+ 	addq $0x000000007FFFFFFF, %rax
+
+// CHECK: addq $-2147483648, %rax
+// CHECK: encoding: [0x48,0x05,0x00,0x00,0x00,0x80]
+	addq $0xFFFFFFFF80000000, %rax
+
+// CHECK: addq $-256, %rax
+// CHECK: encoding: [0x48,0x05,0x00,0xff,0xff,0xff]
+ 	addq $0xFFFFFFFFFFFFFF00, %rax
diff --git a/final/test/MC/X86/x86_directives.s b/final/test/MC/X86/x86_directives.s
new file mode 100644
index 00000000000..2950c8cd5f1
--- /dev/null
+++ b/final/test/MC/X86/x86_directives.s
@@ -0,0 +1,6 @@
+# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# CHECK: TEST0:
+# CHECK: .short 3
+TEST0:  
+        .word 3
diff --git a/final/test/MC/X86/x86_errors.s b/final/test/MC/X86/x86_errors.s
new file mode 100644
index 00000000000..183306be2c1
--- /dev/null
+++ b/final/test/MC/X86/x86_errors.s
@@ -0,0 +1,5 @@
+// RUN: not llvm-mc -triple x86_64-unknown-unknown %s 2> %t.err
+// RUN: FileCheck < %t.err %s
+
+// CHECK: error: ambiguous instructions require an explicit suffix (could be 'cmpb', 'cmpw', 'cmpl', or 'cmpq')
+cmp $0, 0(%eax)
diff --git a/final/test/MC/X86/x86_operands.s b/final/test/MC/X86/x86_operands.s
new file mode 100644
index 00000000000..b34713db863
--- /dev/null
+++ b/final/test/MC/X86/x86_operands.s
@@ -0,0 +1,58 @@
+// RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
+
+# Immediates
+# CHECK: addl $1, %eax
+        addl $1, %eax
+# CHECK: addl $3, %eax
+        addl $(1+2), %eax
+# CHECK: addl $a, %eax
+        addl $a, %eax
+# CHECK: addl $3, %eax
+        addl $1 + 2, %eax
+        
+# Disambiguation
+
+# CHECK: addl $1, 8
+        addl $1, 4+4
+# CHECK: addl $1, 8
+        addl $1, (4+4)
+# CHECK: addl $1, 8(%eax)
+        addl $1, 4+4(%eax)
+# CHECK: addl $1, 8(%eax)
+        addl $1, (4+4)(%eax)
+# CHECK: addl $1, 8(%eax)
+        addl $1, 8(%eax)
+# CHECK: addl $1, (%eax)
+        addl $1, (%eax)
+# CHECK: addl $1, 8(,%eax)
+        addl $1, (4+4)(,%eax)
+        
+# Indirect Memory Operands
+# CHECK: addl $1, 1(%eax)
+        addl $1, 1(%eax)
+# CHECK: addl $1, 1(%eax,%ebx)
+        addl $1, 1(%eax,%ebx)
+# CHECK: addl $1, 1(%eax,%ebx)
+        addl $1, 1(%eax,%ebx,)
+# CHECK: addl $1, 1(%eax,%ebx,4)
+        addl $1, 1(%eax,%ebx,4)
+# CHECK: addl $1, 1(,%ebx)
+        addl $1, 1(,%ebx)
+# CHECK: addl $1, 1(,%ebx)
+        addl $1, 1(,%ebx,)
+# CHECK: addl $1, 1(,%ebx,4)
+        addl $1, 1(,%ebx,4)
+# CHECK: addl $1, 1(,%ebx,4)
+        addl $1, 1(,%ebx,(2+2))
+
+# '*'
+# CHECK: calll a
+        call a
+# CHECK: calll *%eax
+        call *%eax
+# CHECK: calll *4(%eax)
+        call *4(%eax)
+
+# CHECK: movl	%gs:8, %eax
+movl %gs:8, %eax
+
diff --git a/final/test/Makefile b/final/test/Makefile
new file mode 100644
index 00000000000..0d84186b1e8
--- /dev/null
+++ b/final/test/Makefile
@@ -0,0 +1,196 @@
+#===- test/Makefile ----------------------------------------*- Makefile -*--===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+LEVEL = ..
+DIRS  =
+
+all:: check-local
+
+# 'lit' is the default test runner.
+check-local:: check-local-lit
+
+# Include other test rules
+include Makefile.tests
+
+#===------------------------------------------------------------------------===#
+# DejaGNU testing support
+#===------------------------------------------------------------------------===#
+
+ifneq ($(GREP_OPTIONS),)
+$(warning GREP_OPTIONS environment variable may interfere with test results)
+endif
+
+ifdef VERBOSE
+RUNTESTFLAGS := $(VERBOSE)
+LIT_ARGS := -v
+else
+LIT_ARGS := -s -v
+endif
+
+# -jN causes crash on Cygwin's python.
+ifneq (,$(filter $(HOST_OS),Cygwin))
+  LIT_ARGS += -j1
+endif
+
+ifdef TESTSUITE
+LIT_TESTSUITE := $(TESTSUITE)
+CLEANED_TESTSUITE := $(patsubst %/,%,$(TESTSUITE))
+CLEANED_TESTSUITE := $(patsubst test/%,%,$(CLEANED_TESTSUITE))
+RUNTESTFLAGS += --tool $(CLEANED_TESTSUITE)
+else
+LIT_TESTSUITE := .
+endif
+
+ifdef VG
+VALGRIND := valgrind --tool=memcheck --quiet --trace-children=yes --error-exitcode=3 --leak-check=full $(VALGRIND_EXTRA_ARGS)
+endif
+
+# Check what to run for -all.
+LIT_ALL_TESTSUITES := $(LIT_TESTSUITE)
+
+extra-lit-site-cfgs::
+.PHONY: extra-lit-site-cfgs
+
+ifneq ($(strip $(filter check-local-all,$(MAKECMDGOALS))),)
+ifndef TESTSUITE
+ifeq ($(shell test -d $(PROJ_SRC_DIR)/../tools/clang && echo OK), OK)
+LIT_ALL_TESTSUITES += $(PROJ_OBJ_DIR)/../tools/clang/test
+
+# Force creation of Clang's lit.site.cfg.
+clang-lit-site-cfg: FORCE
+	$(MAKE) -C $(PROJ_OBJ_DIR)/../tools/clang/test lit.site.cfg Unit/lit.site.cfg
+extra-lit-site-cfgs:: clang-lit-site-cfg
+endif
+endif
+endif
+
+IGNORE_TESTS :=
+
+ifndef RUNLLVM2CPP
+IGNORE_TESTS += llvm2cpp.exp
+endif
+
+ifdef IGNORE_TESTS
+RUNTESTFLAGS += --ignore "$(strip $(IGNORE_TESTS))"
+endif
+
+# ulimits like these are redundantly enforced by the buildbots, so
+# just removing them here won't work.
+# Both AuroraUX & Solaris do not have the -m flag for ulimit
+ifeq ($(HOST_OS),SunOS)
+ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ;
+else # !SunOS
+ifeq ($(HOST_OS),AuroraUX)
+ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -v 512000 ;
+else # !AuroraUX
+# Fedora 13 x86-64 python fails with -v 76800
+ULIMIT=ulimit -t 600 ; ulimit -d 512000 ; ulimit -m 512000 ; ulimit -v 1024000 ;
+endif # AuroraUX
+endif # SunOS
+
+ifneq ($(RUNTEST),)
+check-local-dg:: site.exp
+	( $(ULIMIT) \
+	  PATH="$(LLVMToolDir):$(LLVM_SRC_ROOT)/test/Scripts:$(LLVMGCCDIR)/bin:$(PATH)" \
+	  $(RUNTEST) $(RUNTESTFLAGS) )
+else
+check-local-dg:: site.exp
+	@echo "*** dejagnu not found.  Make sure 'runtest' is in your PATH, then reconfigure LLVM."
+endif
+
+check-local-lit:: lit.site.cfg Unit/lit.site.cfg
+	( $(ULIMIT) \
+	  $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_TESTSUITE) )
+
+check-local-all:: lit.site.cfg Unit/lit.site.cfg extra-lit-site-cfgs
+	( $(ULIMIT) \
+	  $(LLVM_SRC_ROOT)/utils/lit/lit.py $(LIT_ARGS) $(LIT_ALL_TESTSUITES) )
+
+clean::
+	$(RM) -rf `find $(LLVM_OBJ_ROOT)/test -name Output -type d -print`
+
+# dsymutil is used on the Darwin to manipulate DWARF debugging information.
+ifeq ($(TARGET_OS),Darwin)
+DSYMUTIL=dsymutil
+else
+DSYMUTIL=true
+endif
+ifdef TargetCommonOpts
+BUGPOINT_TOPTS="-gcc-tool-args $(TargetCommonOpts)"
+else
+BUGPOINT_TOPTS=""
+endif
+
+ifneq ($(OCAMLOPT),)
+CC_FOR_OCAMLOPT := $(shell $(OCAMLOPT) -config | grep native_c_compiler | sed -e 's/native_c_compiler: //')
+CXX_FOR_OCAMLOPT := $(subst gcc,g++,$(CC_FOR_OCAMLOPT))
+endif
+
+FORCE:
+
+site.exp: FORCE
+	@echo 'Making a new site.exp file...'
+	@echo '## Autogenerated by LLVM configuration.' > site.tmp
+	@echo '# Do not edit!' >> site.tmp
+	@echo 'set target_triplet "$(TARGET_TRIPLE)"' >> site.tmp
+	@echo 'set TARGETS_TO_BUILD "$(TARGETS_TO_BUILD)"' >> site.tmp
+	@echo 'set llvmgcc_langs "$(LLVMGCC_LANGS)"' >> site.tmp
+	@echo 'set llvmtoolsdir "$(ToolDir)"' >>site.tmp
+	@echo 'set llvmlibsdir "$(LibDir)"' >>site.tmp
+	@echo 'set llvmshlibdir "$(SharedLibDir)"' >>site.tmp
+	@echo 'set llvm_bindings "$(BINDINGS_TO_BUILD)"' >> site.tmp
+	@echo 'set srcroot "$(LLVM_SRC_ROOT)"' >>site.tmp
+	@echo 'set objroot "$(LLVM_OBJ_ROOT)"' >>site.tmp
+	@echo 'set srcdir "$(LLVM_SRC_ROOT)/test"' >>site.tmp
+	@echo 'set objdir "$(LLVM_OBJ_ROOT)/test"' >>site.tmp
+	@echo 'set gccpath "$(CC)"' >>site.tmp
+	@echo 'set gxxpath "$(CXX)"' >>site.tmp
+	@echo 'set compile_c "' $(CC) $(CPP.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c '"' >>site.tmp
+	@echo 'set compile_cxx "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) -c -x c++ '"' >> site.tmp
+	@echo 'set link "' $(CXX) $(CPP.Flags) $(CXX.Flags) $(TargetCommonOpts) $(CompileCommonOpts) $(LD.Flags) '"' >>site.tmp
+	@echo 'set llvmgcc "$(LLVMGCC) $(TargetCommonOpts) $(EXTRA_OPTIONS)"' >> site.tmp
+	@echo 'set llvmgxx "$(LLVMGCC) $(TargetCommonOpts) $(EXTRA_OPTIONS)"' >> site.tmp
+	@echo 'set bugpoint_topts $(BUGPOINT_TOPTS)' >> site.tmp
+	@echo 'set shlibext "$(SHLIBEXT)"' >> site.tmp
+	@echo 'set ocamlopt "$(OCAMLOPT) -cc \"$(CXX_FOR_OCAMLOPT)\" -I $(LibDir)/ocaml"' >> site.tmp
+	@echo 'set valgrind "$(VALGRIND)"' >> site.tmp
+	@echo 'set grep "$(GREP)"' >>site.tmp
+	@echo 'set gas "$(GAS)"' >>site.tmp
+	@echo 'set llvmdsymutil "$(DSYMUTIL)"' >>site.tmp
+	@echo 'set emitir "$(LLVMCC_EMITIR_FLAG)"' >>site.tmp
+	@echo '## All variables above are generated by configure. Do Not Edit ## ' >>site.tmp
+	@test ! -f site.exp || \
+	sed '1,/^## All variables above are.*##/ d' site.exp >> site.tmp
+	@-rm -f site.bak
+	@test ! -f site.exp || mv site.exp site.bak
+	@mv site.tmp site.exp
+
+lit.site.cfg: site.exp
+	@echo "Making LLVM 'lit.site.cfg' file..."
+	@$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g > lit.tmp
+	@$(ECHOPATH) s=@LLVM_BINARY_DIR@=$(LLVM_OBJ_ROOT)=g >> lit.tmp
+	@$(ECHOPATH) s=@LLVM_TOOLS_DIR@=$(ToolDir)=g >> lit.tmp
+	@$(ECHOPATH) s=@LLVMGCCDIR@=$(LLVMGCCDIR)=g >> lit.tmp
+	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=python=g >> lit.tmp
+	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp
+	@sed -f lit.tmp $(PROJ_SRC_DIR)/lit.site.cfg.in > $@
+	@-rm -f lit.tmp
+
+Unit/lit.site.cfg: $(PROJ_OBJ_DIR)/Unit/.dir FORCE
+	@echo "Making LLVM unittest 'lit.site.cfg' file..."
+	@$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g > unit.tmp
+	@$(ECHOPATH) s=@LLVM_BINARY_DIR@=$(LLVM_OBJ_ROOT)=g >> unit.tmp
+	@$(ECHOPATH) s=@LLVM_TOOLS_DIR@=$(ToolDir)=g >> unit.tmp
+	@$(ECHOPATH) s=@LLVMGCCDIR@=$(LLVMGCCDIR)=g >> unit.tmp
+	@$(ECHOPATH) s=@LLVM_BUILD_MODE@=$(BuildMode)=g >> unit.tmp
+	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> unit.tmp
+	@$(ECHOPATH) s=@SHLIBDIR@=$(SharedLibDir)=g >> unit.tmp
+	@$(ECHOPATH) s=@SHLIBPATH_VAR@=$(SHLIBPATH_VAR)=g >> unit.tmp
+	@sed -f unit.tmp $(PROJ_SRC_DIR)/Unit/lit.site.cfg.in > $@
+	@-rm -f unit.tmp
diff --git a/final/test/Makefile.tests b/final/test/Makefile.tests
new file mode 100644
index 00000000000..aeb5871e7cd
--- /dev/null
+++ b/final/test/Makefile.tests
@@ -0,0 +1,80 @@
+##----------------------------------------------------------*- Makefile -*-===##
+##
+## Common rules for generating, linking, and compiling via LLVM.  This is
+## used to implement a robust testing framework for LLVM
+##
+##-------------------------------------------------------------------------===##
+
+# If the user specified a TEST= option on the command line, we do not want to do
+# the default testing type.  Instead, we change the default target to be the
+# test:: target.
+#
+ifdef TEST
+test::
+endif
+
+# We do not want to make .d files for tests! 
+DISABLE_AUTO_DEPENDENCIES=1
+
+include ${LEVEL}/Makefile.common
+
+# Specify ENABLE_STATS on the command line to enable -stats and -time-passes
+# output from gccas and gccld.
+ifdef ENABLE_STATS
+STATS = -stats -time-passes
+endif
+
+.PHONY: clean default
+
+# These files, which might be intermediate results, should not be deleted by
+# make
+.PRECIOUS: Output/%.bc  Output/%.ll
+.PRECIOUS: Output/%.tbc Output/%.tll
+.PRECIOUS: Output/.dir
+.PRECIOUS: Output/%.llvm.bc
+.PRECIOUS: Output/%.llvm
+
+LCCFLAGS  += -O2 -Wall
+LCXXFLAGS += -O2 -Wall
+LLCFLAGS =
+TESTRUNR = @echo Running test: $<; \
+             PATH="$(LLVMTOOLCURRENT):$(LLVM_SRC_ROOT)/test/Scripts:$(PATH)" \
+                  $(LLVM_SRC_ROOT)/test/TestRunner.sh
+
+LLCLIBS := $(LLCLIBS) -lm
+
+clean::
+	$(RM) -f a.out core
+	$(RM) -rf Output/
+
+# Compile from X.c to Output/X.ll
+Output/%.ll: %.c $(LCC1) Output/.dir $(INCLUDES)
+	-$(LLVMCC) $(CPPFLAGS) $(LCCFLAGS) -S $< -o $@
+
+# Compile from X.cpp to Output/X.ll
+Output/%.ll: %.cpp $(LCC1XX) Output/.dir $(INCLUDES)
+	-$(LLVMCXX) $(CPPFLAGS) $(LCXXFLAGS) -S $< -o $@
+
+# Compile from X.cc to Output/X.ll
+Output/%.ll: %.cc $(LCC1XX) Output/.dir $(INCLUDES)
+	-$(LLVMCXX) $(CPPFLAGS) $(LCXXFLAGS) -S $< -o $@
+
+# LLVM Assemble from Output/X.ll to Output/X.bc.  Output/X.ll must have come
+# from GCC output, so use GCCAS.
+#
+Output/%.bc: Output/%.ll $(LGCCAS)
+	-$(LGCCAS) $(STATS) $< -o $@
+
+# LLVM Assemble from X.ll to Output/X.bc.  Because we are coming directly from
+# LLVM source, use the non-transforming assembler.
+#
+Output/%.bc: %.ll $(LLVMAS) Output/.dir
+	-$(LLVMAS) $< -o $@
+
+## Cancel built-in implicit rules that override above rules
+%: %.s
+
+%: %.c
+
+%.o: %.c
+
diff --git a/final/test/Object/TestObjectFiles/trivial-object-test.coff-i386 b/final/test/Object/TestObjectFiles/trivial-object-test.coff-i386
new file mode 100644
index 0000000000000000000000000000000000000000..8cfd9949b1bd304722b31da0003d3ed9feda4b67
GIT binary patch
literal 346
zcmeZaWMWw7(C4$6fq{Voh(SQFB(<Uh!UU5lKr#i0JD_6BKw5zzAVDuBu_O^H2BLU@
zco`BO#B^W~IMDot=eUas*!1HpAj245z=>|4MDr1z!+byyRv>0$5C)SB3?d8?Ko&a?
zi!gYk=H%ongy$FKq$r{|j}a)%%m^|Q<R}mTi2%KT<TNHApP3QlI2ZtlFfhdDCT8YA
zg@C%4m>7KG^@>Z1AVOeW4Dkh}CB;xRAPS<B1w}+0$cqoo&rS6&$w)17E6qzT$;{7V
F003|1DJ}p2

literal 0
HcmV?d00001

diff --git a/final/test/Object/TestObjectFiles/trivial-object-test.coff-x86-64 b/final/test/Object/TestObjectFiles/trivial-object-test.coff-x86-64
new file mode 100644
index 0000000000000000000000000000000000000000..077591482cea64b9fba25cfc15c4f7bd7ab3e077
GIT binary patch
literal 347
zcmYdkV`4~e>GRpjz`(!(#2}zol3Gy$VS-6DAejQhT~IM*Ag#a<kf4{6Sds`815vy{
zyaI_2VmdGg9Pns<qjB6tMFqt7=mn`_cmXH6ff63gM>G!e1BF<Dn1w+ENV0)hl0cFj
z%<@Rh$;np;&o9bJQABYdBT$-|5#&UWvp@hO0`vru<CuVaW=62ZU^xZ`kO%`qZenI0
zQ~;!liHX5SuehWLDg@TWP*7S@3{wN5Av#%*gqRq_fV|-R+*JRPjMO5x(!Au7%=|nC
E0I!HCDF6Tf

literal 0
HcmV?d00001

diff --git a/final/test/Object/TestObjectFiles/trivial-object-test.elf-i386 b/final/test/Object/TestObjectFiles/trivial-object-test.elf-i386
new file mode 100644
index 0000000000000000000000000000000000000000..1a0ea40dfe122648bdb3abaadf05ffd1fdd686e3
GIT binary patch
literal 716
zcma)4Jxc>Y5S>e+Mu;K`YGI)e#3qZ;6n3W2C=#>~vA!e=IlbKK-9)rfNNN6+LK=UF
zg^j&%zBzZ{v~u9>o1ORAo&9(`Y_~kmE0PV5isZQocx)$)thPmEn#rj|qmRnupjwvN
zq*N7p|4P#|8=fJIUMnxhIt<PFrHRAdhB)Q{*>MS6z;dg?(-{6lzK;@prg`~ymxQhh
zc^UH}t=NjRwTM$?o7>s~c+KWQGAE9MyTA`?rgNpcTw|TEN@(B*5#0<qp>s3PXWZ9u
zYZ!GoFcFbreb1y~)AQL^3CDF+Clwji>ge>MmayNwhO9$tOdba8cSxK++_#zR|Id(n
z5x6UBBG0ZKERmfBEH5R#9YFLRFbB90O=1~vc^&z!6m#LUP1I&>UH$_D@^({zcuIi$
vgl)jRMjQDUcmkwf@rqA++ymUldqZBqBD-K0Zr>&f&L^K>AHs<<ft~Lg!oW<j

literal 0
HcmV?d00001

diff --git a/final/test/Object/TestObjectFiles/trivial-object-test.elf-x86-64 b/final/test/Object/TestObjectFiles/trivial-object-test.elf-x86-64
new file mode 100644
index 0000000000000000000000000000000000000000..889f5d96a699578366a7abe1b39aaf8e1615dfde
GIT binary patch
literal 1024
zcmbV~%}T>S5XUENwE+>N2S31rf_Q7NdeMtS&>B$mAmVx3t~8ovCA%&4WT8hN#|N;W
z;H!A>;$5ga&1~JKdT}B7&HQI(voqO;qtj~6aq>XRfqig{V>9>T_#@K?7m5({Kis#&
zjRH!~_<G0J)?kE39SHib?n}U<NUU9nBx<Z<9AG{L%xR84uI&=(__?BfzJ}>qvGiF>
zCwU6kLjgXAQvogHfA3K1R+Cm?e&U2_U`eB7?XyYw*xmx!Hdg=8K_y`)WKmhvZg^dl
z6_?7B&|+Z>cWou%LbUj~YVxGoj_WEEF@UFdSAmy^22;$FDyeuC@M593cYJnPmWtJH
zb(lwtGUa_su^M1%$|hO%3IAu~obkbbVlST@GZz!f&IbQ6)^BI>ugN#}gj{Rlsj{en
z^=9pmA&rosor@tn-{|sE-Y&6A42UzDSnHC%Osr_yx28w>AVZuF?vbtE)0%z%eL7%|
ZX!@b;CVEBfAX6`??umF}3W+3-)o;~NR|Wt8

literal 0
HcmV?d00001

diff --git a/final/test/Object/TestObjectFiles/trivial-object-test.macho-i386 b/final/test/Object/TestObjectFiles/trivial-object-test.macho-i386
new file mode 100644
index 0000000000000000000000000000000000000000..099bd1ebf23ba16ad2da9ffd8b36a330a3fae9ba
GIT binary patch
literal 552
zcmX^2>+L^w1_lOZAZCQpIUt4tgwFzEV*%Dcu@0adhz9A0nE_H7A77GMQ37H?Kzw|N
zYeWc)2ccDf6i7b{kUj#WLG}YV4Il~x;^UKxONuh{(ji<RAJq(yeqJD6gXTUasCp0;
zAD>m4TM%E8n3R(W;d}Z9_y@sVVh9xE0^*%$W^gbtw1CvJ0x`%u2_Sv~bt4B*Kmo|-
z2I2r9Mg|~bKp5E#$nqfe9iRei^388}j=QJ;ZDwFN&Y}YH-3xgTAqphAffCI}cn*7{
z=H%ongy$FKq$o0cfdjDnfZ@j~3{(rE`G7Rg4sg_g>;k!oA4C8FD4rO>A`A>tNNk4q
h+{Da0hWOz8+*JRPjMO5x(!Au7%zU6wL1{@b0|1|UKz;xK

literal 0
HcmV?d00001

diff --git a/final/test/Object/TestObjectFiles/trivial-object-test.macho-x86-64 b/final/test/Object/TestObjectFiles/trivial-object-test.macho-x86-64
new file mode 100644
index 0000000000000000000000000000000000000000..93eeb5deceb07f0407b85d423cc44a835c74d8ba
GIT binary patch
literal 552
zcmX^A>+L^w1_nlE1|R{%AUXiVPyk{ekOblvU>0`56e?T*(gOrAKF~}C5M}}i0zrIy
zNoqw2gbm?`xJHB^_%N0RlmRl21;}0jq(N>1avDGs48+GL7nc-e=A}ag;^R@xgPFq%
zm7f81A51^YUC4Y1Am;$oUXa5<W^e;>08|mk2p9(GM>hki3@_!;{D$MWiwekV3?99<
zAPOA31_!`YH&Du>`3T2hkJOx;e1-7*qMQ^(kRoXy2HK}90;GZAq00{vWMGh001|vq
zw}J#f?&Aj$Pyq5j5*HjN$b5$Q+{Da0hWLWgl46GV;QZWF|B{T<BDd1K<dV$%JO-b5
KJzzL9FaQ8Gd@+##

literal 0
HcmV?d00001

diff --git a/final/test/Object/dg.exp b/final/test/Object/dg.exp
new file mode 100644
index 00000000000..be82c513920
--- /dev/null
+++ b/final/test/Object/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{test}]]
diff --git a/final/test/Object/nm-trivial-object.test-broken b/final/test/Object/nm-trivial-object.test-broken
new file mode 100644
index 00000000000..6de1780a160
--- /dev/null
+++ b/final/test/Object/nm-trivial-object.test-broken
@@ -0,0 +1,19 @@
+RUN: llvm-nm %p/TestObjectFiles/trivial-object-test.coff-i386 \
+RUN:         | FileCheck %s -check-prefix COFF
+RUN: llvm-nm %p/TestObjectFiles/trivial-object-test.coff-x86-64 \
+RUN:         | FileCheck %s -check-prefix COFF
+RUN: llvm-nm %p/TestObjectFiles/trivial-object-test.elf-i386 \
+RUN:         | FileCheck %s -check-prefix ELF
+RUN: llvm-nm %p/TestObjectFiles/trivial-object-test.elf-x86-64 \
+RUN:         | FileCheck %s -check-prefix ELF
+
+COFF: 00000000 d .data
+COFF: 00000000 t .text
+COFF: 00000000 d L{{_?}}.str
+COFF:          U {{_?}}SomeOtherFunction
+COFF: 00000000 T {{_?}}main
+COFF:          U {{_?}}puts
+
+ELF:          U SomeOtherFunction
+ELF: 00000000 T main
+ELF:          U puts
diff --git a/final/test/Object/objdump-trivial-object.test-broken b/final/test/Object/objdump-trivial-object.test-broken
new file mode 100644
index 00000000000..c4855fdfd68
--- /dev/null
+++ b/final/test/Object/objdump-trivial-object.test-broken
@@ -0,0 +1,54 @@
+RUN: llvm-objdump -d %p/TestObjectFiles/trivial-object-test.coff-i386 \
+RUN:              | FileCheck %s -check-prefix COFF-i386
+RUN: llvm-objdump -d %p/TestObjectFiles/trivial-object-test.coff-x86-64 \
+RUN:              | FileCheck %s -check-prefix COFF-x86-64
+RUN: llvm-objdump -d %p/TestObjectFiles/trivial-object-test.elf-i386 \
+RUN:              | FileCheck %s -check-prefix ELF-i386
+RUN: llvm-objdump -d %p/TestObjectFiles/trivial-object-test.elf-x86-64 \
+RUN:              | FileCheck %s -check-prefix ELF-x86-64
+
+COFF-i386: file format COFF-i386
+COFF-i386: Disassembly of section .text:
+COFF-i386:        0:       83 ec 0c                                        subl    $12, %esp
+COFF-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
+COFF-i386:        b:       c7 04 24 00 00 00 00                            movl    $0, (%esp)
+COFF-i386:       12:       e8 00 00 00 00                                  calll   0
+COFF-i386:       17:       e8 00 00 00 00                                  calll   0
+COFF-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
+COFF-i386:       20:       83 c4 0c                                        addl    $12, %esp
+COFF-i386:       23:       c3                                              ret
+
+COFF-x86-64: file format COFF-x86-64
+COFF-x86-64: Disassembly of section .text:
+COFF-x86-64:        0:       48 83 ec 28                                     subq    $40, %rsp
+COFF-x86-64:        4:       c7 44 24 24 00 00 00 00                         movl    $0, 36(%rsp)
+COFF-x86-64:        c:       48 8d 0d 00 00 00 00                            leaq    (%rip), %rcx
+COFF-x86-64:       13:       e8 00 00 00 00                                  callq   0
+COFF-x86-64:       18:       e8 00 00 00 00                                  callq   0
+COFF-x86-64:       1d:       8b 44 24 24                                     movl    36(%rsp), %eax
+COFF-x86-64:       21:       48 83 c4 28                                     addq    $40, %rsp
+COFF-x86-64:       25:       c3                                              ret
+
+
+ELF-i386: file format ELF32-i386
+ELF-i386: Disassembly of section .text:
+ELF-i386:        0:       83 ec 0c                                        subl    $12, %esp
+ELF-i386:        3:       c7 44 24 08 00 00 00 00                         movl    $0, 8(%esp)
+ELF-i386:        b:       c7 04 24 00 00 00 00                            movl    $0, (%esp)
+ELF-i386:       12:       e8 fc ff ff ff                                  calll   -4
+ELF-i386:       17:       e8 fc ff ff ff                                  calll   -4
+ELF-i386:       1c:       8b 44 24 08                                     movl    8(%esp), %eax
+ELF-i386:       20:       83 c4 0c                                        addl    $12, %esp
+ELF-i386:       23:       c3                                              ret
+
+ELF-x86-64: file format ELF64-x86-64
+ELF-x86-64: Disassembly of section .text:
+ELF-x86-64:        0:       48 83 ec 08                                     subq    $8, %rsp
+ELF-x86-64:        4:       c7 44 24 04 00 00 00 00                         movl    $0, 4(%rsp)
+ELF-x86-64:        c:       bf 00 00 00 00                                  movl    $0, %edi
+ELF-x86-64:       11:       e8 00 00 00 00                                  callq   0
+ELF-x86-64:       16:       30 c0                                           xorb    %al, %al
+ELF-x86-64:       18:       e8 00 00 00 00                                  callq   0
+ELF-x86-64:       1d:       8b 44 24 04                                     movl    4(%rsp), %eax
+ELF-x86-64:       21:       48 83 c4 08                                     addq    $8, %rsp
+ELF-x86-64:       25:       c3                                              ret
diff --git a/final/test/Other/2002-01-31-CallGraph.ll b/final/test/Other/2002-01-31-CallGraph.ll
new file mode 100644
index 00000000000..0e4c8775126
--- /dev/null
+++ b/final/test/Other/2002-01-31-CallGraph.ll
@@ -0,0 +1,13 @@
+;  Call graph construction crash: Not handling indirect calls right
+;
+; RUN: opt < %s -analyze -print-callgraph >& /dev/null
+;
+
+        %FunTy = type i32 (i32)
+
+define void @invoke(%FunTy* %x) {
+        %foo = call i32 %x( i32 123 )           ; <i32> [#uses=0]
+        ret void
+}
+
+
diff --git a/final/test/Other/2002-02-24-InlineBrokePHINodes.ll b/final/test/Other/2002-02-24-InlineBrokePHINodes.ll
new file mode 100644
index 00000000000..db26942096d
--- /dev/null
+++ b/final/test/Other/2002-02-24-InlineBrokePHINodes.ll
@@ -0,0 +1,23 @@
+; Inlining used to break PHI nodes.  This tests that they are correctly updated
+; when a node is split around the call instruction.  The verifier caught the error.
+;
+; RUN: opt < %s -inline
+;
+
+define i64 @test(i64 %X) {
+	ret i64 %X
+}
+
+define i64 @fib(i64 %n) {
+; <label>:0
+	%T = icmp ult i64 %n, 2		; <i1> [#uses=1]
+	br i1 %T, label %BaseCase, label %RecurseCase
+
+RecurseCase:		; preds = %0
+	%result = call i64 @test( i64 %n )		; <i64> [#uses=0]
+	br label %BaseCase
+
+BaseCase:		; preds = %RecurseCase, %0
+	%X = phi i64 [ 1, %0 ], [ 2, %RecurseCase ]		; <i64> [#uses=1]
+	ret i64 %X
+}
diff --git a/final/test/Other/2002-03-11-ConstPropCrash.ll b/final/test/Other/2002-03-11-ConstPropCrash.ll
new file mode 100644
index 00000000000..a6d4f5b3dbc
--- /dev/null
+++ b/final/test/Other/2002-03-11-ConstPropCrash.ll
@@ -0,0 +1,24 @@
+; When constant propogating terminator instructions, the basic block iterator
+; was not updated to refer to the final position of the new terminator.  This
+; can be bad, f.e. because constproping a terminator can lead to the 
+; destruction of PHI nodes, which invalidates the iterator!
+;
+; Fixed by adding new arguments to ConstantFoldTerminator
+;
+; RUN: opt < %s -constprop
+
+define void @build_tree(i32 %ml) {
+; <label>:0
+        br label %bb2
+
+bb2:            ; preds = %bb2, %0
+        %reg137 = phi i32 [ %reg140, %bb2 ], [ 12, %0 ]         ; <i32> [#uses=1]
+        %reg138 = phi i32 [ %reg139, %bb2 ], [ 0, %0 ]          ; <i32> [#uses=1]
+        %reg139 = add i32 %reg138, 1            ; <i32> [#uses=1]
+        %reg140 = add i32 %reg137, -1           ; <i32> [#uses=1]
+        br i1 false, label %bb2, label %bb3
+
+bb3:            ; preds = %bb2
+        ret void
+}
+
diff --git a/final/test/Other/2003-02-19-LoopInfoNestingBug.ll b/final/test/Other/2003-02-19-LoopInfoNestingBug.ll
new file mode 100644
index 00000000000..13f83516375
--- /dev/null
+++ b/final/test/Other/2003-02-19-LoopInfoNestingBug.ll
@@ -0,0 +1,29 @@
+; LoopInfo is incorrectly calculating loop nesting!  In this case it doesn't 
+; figure out that loop "Inner" should be nested inside of leep "LoopHeader", 
+; and instead nests it just inside loop "Top"
+;
+; RUN: opt < %s -analyze -loops | \
+; RUN:   grep {     Loop at depth 3 containing: %Inner<header><latch><exiting>}
+;
+define void @test() {
+        br label %Top
+
+Top:            ; preds = %Out, %0
+        br label %LoopHeader
+
+Next:           ; preds = %LoopHeader
+        br i1 false, label %Inner, label %Out
+
+Inner:          ; preds = %Inner, %Next
+        br i1 false, label %Inner, label %LoopHeader
+
+LoopHeader:             ; preds = %Inner, %Top
+        br label %Next
+
+Out:            ; preds = %Next
+        br i1 false, label %Top, label %Done
+
+Done:           ; preds = %Out
+        ret void
+}
+
diff --git a/final/test/Other/2004-08-16-PackedConstantInlineStore.ll b/final/test/Other/2004-08-16-PackedConstantInlineStore.ll
new file mode 100644
index 00000000000..36ac4fd5af4
--- /dev/null
+++ b/final/test/Other/2004-08-16-PackedConstantInlineStore.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llvm-dis
+@bar = external global <2 x i32>                ; <<2 x i32>*> [#uses=1]
+
+define void @main() {
+        store <2 x i32> < i32 0, i32 1 >, <2 x i32>* @bar
+        ret void
+}
+
diff --git a/final/test/Other/2004-08-16-PackedGlobalConstant.ll b/final/test/Other/2004-08-16-PackedGlobalConstant.ll
new file mode 100644
index 00000000000..9130ccba173
--- /dev/null
+++ b/final/test/Other/2004-08-16-PackedGlobalConstant.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llvm-dis
+
+@foo = global <2 x i32> < i32 0, i32 1 >                ; <<2 x i32>*> [#uses=1]
+@bar = external global <2 x i32>                ; <<2 x i32>*> [#uses=1]
+
+define void @main() {
+        %t0 = load <2 x i32>* @foo              ; <<2 x i32>> [#uses=1]
+        store <2 x i32> %t0, <2 x i32>* @bar
+        ret void
+}
+
diff --git a/final/test/Other/2004-08-16-PackedSelect.ll b/final/test/Other/2004-08-16-PackedSelect.ll
new file mode 100644
index 00000000000..c1d6214dc63
--- /dev/null
+++ b/final/test/Other/2004-08-16-PackedSelect.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | llvm-dis
+
+@foo = external global <4 x float>              ; <<4 x float>*> [#uses=1]
+@bar = external global <4 x float>              ; <<4 x float>*> [#uses=1]
+
+define void @main() {
+        %t0 = load <4 x float>* @foo            ; <<4 x float>> [#uses=3]
+        %t1 = fadd <4 x float> %t0, %t0          ; <<4 x float>> [#uses=1]
+        %t2 = select i1 true, <4 x float> %t0, <4 x float> %t1          ; <<4 x float>> [#uses=1]
+        store <4 x float> %t2, <4 x float>* @bar
+        ret void
+}
+
diff --git a/final/test/Other/2004-08-16-PackedSimple.ll b/final/test/Other/2004-08-16-PackedSimple.ll
new file mode 100644
index 00000000000..81cecd4235f
--- /dev/null
+++ b/final/test/Other/2004-08-16-PackedSimple.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | llvm-dis
+
+@foo = external global <4 x float>              ; <<4 x float>*> [#uses=1]
+@bar = external global <4 x float>              ; <<4 x float>*> [#uses=1]
+
+define void @main() {
+        %t0 = load <4 x float>* @foo            ; <<4 x float>> [#uses=3]
+        %t2 = fadd <4 x float> %t0, %t0          ; <<4 x float>> [#uses=1]
+        %t3 = select i1 false, <4 x float> %t0, <4 x float> %t2         ; <<4 x float>> [#uses=1]
+        store <4 x float> %t3, <4 x float>* @bar
+        ret void
+}
+
diff --git a/final/test/Other/2004-08-20-PackedControlFlow.ll b/final/test/Other/2004-08-20-PackedControlFlow.ll
new file mode 100644
index 00000000000..39435706934
--- /dev/null
+++ b/final/test/Other/2004-08-20-PackedControlFlow.ll
@@ -0,0 +1,22 @@
+; RUN: llvm-as < %s | llvm-dis | llvm-as > /dev/null
+
+        %v4f = type <4 x float>
+@foo = external global %v4f             ; <%v4f*> [#uses=1]
+@bar = external global %v4f             ; <%v4f*> [#uses=1]
+
+define void @main() {
+        br label %A
+
+C:              ; preds = %B
+        store %v4f %t2, %v4f* @bar
+        ret void
+
+B:              ; preds = %A
+        %t2 = fadd %v4f %t0, %t0         ; <%v4f> [#uses=1]
+        br label %C
+
+A:              ; preds = %0
+        %t0 = load %v4f* @foo           ; <%v4f> [#uses=2]
+        br label %B
+}
+
diff --git a/final/test/Other/2006-02-05-PassManager.ll b/final/test/Other/2006-02-05-PassManager.ll
new file mode 100644
index 00000000000..0ab5411aa19
--- /dev/null
+++ b/final/test/Other/2006-02-05-PassManager.ll
@@ -0,0 +1,5 @@
+; RUN: opt < %s -domtree -gvn -domtree -constmerge -disable-output
+
+define i32 @test1() {
+       unreachable
+}
diff --git a/final/test/Other/2007-04-24-eliminate-mostly-empty-blocks.ll b/final/test/Other/2007-04-24-eliminate-mostly-empty-blocks.ll
new file mode 100644
index 00000000000..c436e07a9ca
--- /dev/null
+++ b/final/test/Other/2007-04-24-eliminate-mostly-empty-blocks.ll
@@ -0,0 +1,309 @@
+;RUN: opt < %s -codegenprepare -disable-output
+
+define void @foo() {
+entry:
+	br i1 false, label %cond_next31, label %cond_true
+
+cond_true:		; preds = %entry
+	br i1 false, label %cond_true19, label %cond_next31
+
+cond_true19:		; preds = %cond_true
+	br i1 false, label %bb510, label %cond_next31
+
+cond_next31:		; preds = %cond_true19, %cond_true, %entry
+	br i1 false, label %cond_true61, label %cond_next78
+
+cond_true61:		; preds = %cond_next31
+	br label %cond_next78
+
+cond_next78:		; preds = %cond_true61, %cond_next31
+	br i1 false, label %cond_true93, label %bb.preheader
+
+cond_true93:		; preds = %cond_next78
+	br label %bb.preheader
+
+bb.preheader:		; preds = %cond_true93, %cond_next78
+	%iftmp.11.0.ph.ph = phi i16 [ 0, %cond_true93 ], [ 0, %cond_next78 ]		; <i16> [#uses=1]
+	br label %bb
+
+bb:		; preds = %cond_next499, %bb.preheader
+	%n.1 = phi i16 [ %iftmp.11.0.ph.ph, %cond_next499 ], [ 0, %bb.preheader ]		; <i16> [#uses=0]
+	br i1 false, label %bb148.preheader, label %bb493
+
+bb148.preheader:		; preds = %bb
+	br label %bb148
+
+bb148:		; preds = %cond_next475, %bb148.preheader
+	br i1 false, label %cond_next175, label %bb184
+
+cond_next175:		; preds = %bb148
+	br i1 false, label %bb184, label %bb185
+
+bb184:		; preds = %cond_next175, %bb148
+	br label %bb185
+
+bb185:		; preds = %bb184, %cond_next175
+	br i1 false, label %bb420.preheader, label %cond_true198
+
+bb420.preheader:		; preds = %bb185
+	br label %bb420
+
+cond_true198:		; preds = %bb185
+	br i1 false, label %bb294, label %cond_next208
+
+cond_next208:		; preds = %cond_true198
+	br i1 false, label %cond_next249, label %cond_true214
+
+cond_true214:		; preds = %cond_next208
+	br i1 false, label %bb294, label %cond_next262
+
+cond_next249:		; preds = %cond_next208
+	br i1 false, label %bb294, label %cond_next262
+
+cond_next262:		; preds = %cond_next249, %cond_true214
+	br label %bb269
+
+bb269:		; preds = %cond_next285, %cond_next262
+	br i1 false, label %cond_next285, label %cond_true279
+
+cond_true279:		; preds = %bb269
+	br label %cond_next285
+
+cond_next285:		; preds = %cond_true279, %bb269
+	br i1 false, label %bb269, label %cond_next446.loopexit
+
+bb294:		; preds = %cond_next249, %cond_true214, %cond_true198
+	br i1 false, label %cond_next336, label %cond_true301
+
+cond_true301:		; preds = %bb294
+	br i1 false, label %cond_false398, label %cond_true344
+
+cond_next336:		; preds = %bb294
+	br i1 false, label %cond_false398, label %cond_true344
+
+cond_true344:		; preds = %cond_next336, %cond_true301
+	br i1 false, label %cond_false381, label %cond_true351
+
+cond_true351:		; preds = %cond_true344
+	br label %cond_next387
+
+cond_false381:		; preds = %cond_true344
+	br label %cond_next387
+
+cond_next387:		; preds = %cond_false381, %cond_true351
+	br label %cond_next401
+
+cond_false398:		; preds = %cond_next336, %cond_true301
+	br label %cond_next401
+
+cond_next401:		; preds = %cond_false398, %cond_next387
+	br i1 false, label %cond_next475, label %cond_true453
+
+bb420:		; preds = %cond_next434, %bb420.preheader
+	br i1 false, label %cond_next434, label %cond_true428
+
+cond_true428:		; preds = %bb420
+	br label %cond_next434
+
+cond_next434:		; preds = %cond_true428, %bb420
+	br i1 false, label %bb420, label %cond_next446.loopexit1
+
+cond_next446.loopexit:		; preds = %cond_next285
+	br label %cond_next446
+
+cond_next446.loopexit1:		; preds = %cond_next434
+	br label %cond_next446
+
+cond_next446:		; preds = %cond_next446.loopexit1, %cond_next446.loopexit
+	br i1 false, label %cond_next475, label %cond_true453
+
+cond_true453:		; preds = %cond_next446, %cond_next401
+	br i1 false, label %cond_true458, label %cond_next475
+
+cond_true458:		; preds = %cond_true453
+	br label %cond_next475
+
+cond_next475:		; preds = %cond_true458, %cond_true453, %cond_next446, %cond_next401
+	br i1 false, label %bb493.loopexit, label %bb148
+
+bb493.loopexit:		; preds = %cond_next475
+	br label %bb493
+
+bb493:		; preds = %bb493.loopexit, %bb
+	br i1 false, label %cond_next499, label %bb510.loopexit
+
+cond_next499:		; preds = %bb493
+	br label %bb
+
+bb510.loopexit:		; preds = %bb493
+	br label %bb510
+
+bb510:		; preds = %bb510.loopexit, %cond_true19
+	br i1 false, label %cond_next524, label %cond_true517
+
+cond_true517:		; preds = %bb510
+	br label %cond_next524
+
+cond_next524:		; preds = %cond_true517, %bb510
+	br i1 false, label %cond_next540, label %cond_true533
+
+cond_true533:		; preds = %cond_next524
+	br label %cond_next540
+
+cond_next540:		; preds = %cond_true533, %cond_next524
+	br i1 false, label %cond_true554, label %cond_next560
+
+cond_true554:		; preds = %cond_next540
+	br label %cond_next560
+
+cond_next560:		; preds = %cond_true554, %cond_next540
+	br i1 false, label %cond_true566, label %cond_next572
+
+cond_true566:		; preds = %cond_next560
+	br label %cond_next572
+
+cond_next572:		; preds = %cond_true566, %cond_next560
+	br i1 false, label %bb608.preheader, label %bb791.preheader
+
+bb608.preheader:		; preds = %cond_next797.us, %cond_next572
+	br label %bb608
+
+bb608:		; preds = %cond_next771, %bb608.preheader
+	br i1 false, label %cond_false627, label %cond_true613
+
+cond_true613:		; preds = %bb608
+	br label %cond_next640
+
+cond_false627:		; preds = %bb608
+	br label %cond_next640
+
+cond_next640:		; preds = %cond_false627, %cond_true613
+	br i1 false, label %cond_true653, label %cond_next671
+
+cond_true653:		; preds = %cond_next640
+	br label %cond_next671
+
+cond_next671:		; preds = %cond_true653, %cond_next640
+	br i1 false, label %cond_true683, label %cond_next724
+
+cond_true683:		; preds = %cond_next671
+	br i1 false, label %cond_next724, label %L1
+
+cond_next724:		; preds = %cond_true683, %cond_next671
+	br i1 false, label %cond_true735, label %L1
+
+cond_true735:		; preds = %cond_next724
+	br label %L1
+
+L1:		; preds = %cond_true735, %cond_next724, %cond_true683
+	br i1 false, label %cond_true745, label %cond_next771
+
+cond_true745:		; preds = %L1
+	br label %cond_next771
+
+cond_next771:		; preds = %cond_true745, %L1
+	br i1 false, label %bb608, label %bb791.preheader.loopexit
+
+bb791.preheader.loopexit:		; preds = %cond_next771
+	br label %bb791.preheader
+
+bb791.preheader:		; preds = %bb791.preheader.loopexit, %cond_next572
+	br i1 false, label %cond_next797.us, label %bb809.split
+
+cond_next797.us:		; preds = %bb791.preheader
+	br label %bb608.preheader
+
+bb809.split:		; preds = %bb791.preheader
+	br i1 false, label %cond_next827, label %cond_true820
+
+cond_true820:		; preds = %bb809.split
+	br label %cond_next827
+
+cond_next827:		; preds = %cond_true820, %bb809.split
+	br i1 false, label %cond_true833, label %cond_next840
+
+cond_true833:		; preds = %cond_next827
+	br label %cond_next840
+
+cond_next840:		; preds = %cond_true833, %cond_next827
+	br i1 false, label %bb866, label %bb1245
+
+bb866:		; preds = %bb1239, %cond_next840
+	br i1 false, label %cond_true875, label %bb911
+
+cond_true875:		; preds = %bb866
+	br label %cond_next1180
+
+bb911:		; preds = %bb866
+	switch i32 0, label %bb1165 [
+		 i32 0, label %bb915
+		 i32 1, label %bb932
+		 i32 2, label %bb941
+		 i32 3, label %bb1029
+		 i32 4, label %bb1036
+		 i32 5, label %bb1069
+		 i32 6, label %L3
+	]
+
+bb915:		; preds = %cond_next1171, %bb911
+	br i1 false, label %cond_next1171, label %cond_next1180
+
+bb932:		; preds = %cond_next1171, %bb911
+	br label %L1970
+
+bb941:		; preds = %cond_next1171, %bb911
+	br label %L1970
+
+L1970:		; preds = %bb941, %bb932
+	br label %bb1165
+
+bb1029:		; preds = %cond_next1171, %bb911
+	br label %L4
+
+bb1036:		; preds = %cond_next1171, %bb911
+	br label %L4
+
+bb1069:		; preds = %cond_next1171, %bb911
+	br i1 false, label %cond_next1121, label %cond_true1108
+
+L3:		; preds = %cond_next1171, %bb911
+	br i1 false, label %cond_next1121, label %cond_true1108
+
+cond_true1108:		; preds = %L3, %bb1069
+	br label %L4
+
+cond_next1121:		; preds = %L3, %bb1069
+	br label %L4
+
+L4:		; preds = %cond_next1121, %cond_true1108, %bb1036, %bb1029
+	br label %bb1165
+
+bb1165:		; preds = %cond_next1171, %L4, %L1970, %bb911
+	br i1 false, label %cond_next1171, label %cond_next1180
+
+cond_next1171:		; preds = %bb1165, %bb915
+	switch i32 0, label %bb1165 [
+		 i32 0, label %bb915
+		 i32 1, label %bb932
+		 i32 2, label %bb941
+		 i32 3, label %bb1029
+		 i32 4, label %bb1036
+		 i32 5, label %bb1069
+		 i32 6, label %L3
+	]
+
+cond_next1180:		; preds = %bb1165, %bb915, %cond_true875
+	br label %bb1239
+
+bb1239:		; preds = %cond_next1251, %cond_next1180
+	br i1 false, label %bb866, label %bb1245
+
+bb1245:		; preds = %bb1239, %cond_next840
+	br i1 false, label %cond_next1251, label %bb1257
+
+cond_next1251:		; preds = %bb1245
+	br label %bb1239
+
+bb1257:		; preds = %bb1245
+	ret void
+}
diff --git a/final/test/Other/2007-06-05-PassID.ll b/final/test/Other/2007-06-05-PassID.ll
new file mode 100644
index 00000000000..2554b8b9edd
--- /dev/null
+++ b/final/test/Other/2007-06-05-PassID.ll
@@ -0,0 +1,11 @@
+;RUN: opt < %s -analyze -dot-cfg-only 2>/dev/null
+;PR 1497
+
+define void @foo() {
+entry:
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
diff --git a/final/test/Other/2007-06-28-PassManager.ll b/final/test/Other/2007-06-28-PassManager.ll
new file mode 100644
index 00000000000..0ed275948db
--- /dev/null
+++ b/final/test/Other/2007-06-28-PassManager.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -analyze -inline
+; PR1526
+; RUN: opt < %s -analyze -indvars
+; PR1539
+define i32 @test1() {
+       ret i32 0
+}
diff --git a/final/test/Other/2007-09-10-PassManager.ll b/final/test/Other/2007-09-10-PassManager.ll
new file mode 100644
index 00000000000..ded15e56951
--- /dev/null
+++ b/final/test/Other/2007-09-10-PassManager.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -loop-unswitch -indvars -disable-output
+; Require SCEV before LCSSA.
+define void @foo() {
+entry:
+	%i = alloca i32, align 4		; <i32*> [#uses=5]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 0, i32* %i, align 4
+	br label %bb3
+
+bb:		; preds = %bb3
+	%tmp = load i32* %i, align 4		; <i32> [#uses=1]
+	call void @bar( i32 %tmp )
+	%tmp1 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp2 = add i32 %tmp1, 1		; <i32> [#uses=1]
+	store i32 %tmp2, i32* %i, align 4
+	br label %bb3
+
+bb3:		; preds = %bb, %entry
+	%tmp4 = load i32* %i, align 4		; <i32> [#uses=1]
+	%tmp5 = icmp sle i32 %tmp4, 9		; <i1> [#uses=1]
+	%tmp56 = zext i1 %tmp5 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp56, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb7
+
+bb7:		; preds = %bb3
+	br label %return
+
+return:		; preds = %bb7
+	ret void
+}
+
+declare void @bar(i32)
diff --git a/final/test/Other/2008-02-14-PassManager.ll b/final/test/Other/2008-02-14-PassManager.ll
new file mode 100644
index 00000000000..bdaf9330d23
--- /dev/null
+++ b/final/test/Other/2008-02-14-PassManager.ll
@@ -0,0 +1,5 @@
+; RUN: opt < %s -loop-unroll -loop-rotate -simplifycfg -disable-output
+; PR2028
+define i32 @test1() {
+       ret i32 0
+}
diff --git a/final/test/Other/2008-06-04-FieldSizeInPacked.ll b/final/test/Other/2008-06-04-FieldSizeInPacked.ll
new file mode 100644
index 00000000000..d90209f6af3
--- /dev/null
+++ b/final/test/Other/2008-06-04-FieldSizeInPacked.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | grep true
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%packed = type <{ x86_fp80, i8 }>
+	%unpacked = type { x86_fp80, i8 }
+
+define i1 @q() nounwind  {
+entry:
+	%char_p = getelementptr %packed* null, i32 0, i32 1		; <i8*> [#uses=1]
+	%char_u = getelementptr %unpacked* null, i32 0, i32 1		; <i8*> [#uses=1]
+	%res = icmp eq i8* %char_p, %char_u		; <i1> [#uses=1]
+	ret i1 %res
+}
diff --git a/final/test/Other/2008-10-06-RemoveDeadPass.ll b/final/test/Other/2008-10-06-RemoveDeadPass.ll
new file mode 100644
index 00000000000..7cec2c57c2f
--- /dev/null
+++ b/final/test/Other/2008-10-06-RemoveDeadPass.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -inline -internalize -disable-output
+define void @foo() nounwind {
+  ret void
+}
+
+define void @main(...) nounwind {
+  call void @foo()
+  ret void
+}
+
+
diff --git a/final/test/Other/2008-10-15-MissingSpace.ll b/final/test/Other/2008-10-15-MissingSpace.ll
new file mode 100644
index 00000000000..a61fa614213
--- /dev/null
+++ b/final/test/Other/2008-10-15-MissingSpace.ll
@@ -0,0 +1,7 @@
+; RUN: llvm-as < %s | llvm-dis | not grep {void@}
+; PR2894
+declare void @g()
+define void @f() {
+  invoke void @g() to label %c unwind label %c
+  c: ret void
+}
diff --git a/final/test/Other/2009-03-31-CallGraph.ll b/final/test/Other/2009-03-31-CallGraph.ll
new file mode 100644
index 00000000000..d6653ecbe8e
--- /dev/null
+++ b/final/test/Other/2009-03-31-CallGraph.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -inline -prune-eh -disable-output
+define void @f2() {
+    invoke void @f6()
+        to label %ok1 unwind label %lpad1
+
+ok1:
+    ret void
+
+lpad1:
+    invoke void @f4()
+        to label %ok2 unwind label %lpad2
+
+ok2:
+    call void @f8()
+    unreachable
+
+lpad2:
+    unreachable
+}
+
+declare void @f3()
+
+define void @f4() {
+    call void @f3()
+    ret void
+}
+
+declare void @f6() nounwind
+
+declare void @f8()
+
diff --git a/final/test/Other/2009-06-05-no-implicit-float.ll b/final/test/Other/2009-06-05-no-implicit-float.ll
new file mode 100644
index 00000000000..3f071703167
--- /dev/null
+++ b/final/test/Other/2009-06-05-no-implicit-float.ll
@@ -0,0 +1,4 @@
+
+; RUN: opt < %s -verify -S | grep noimplicitfloat
+declare void @f() noimplicitfloat
+
diff --git a/final/test/Other/2009-09-14-function-elements.ll b/final/test/Other/2009-09-14-function-elements.ll
new file mode 100644
index 00000000000..883d76d1075
--- /dev/null
+++ b/final/test/Other/2009-09-14-function-elements.ll
@@ -0,0 +1,6 @@
+; RUN: not llvm-as %s -disable-output 2>/dev/null
+
+; Arrays and structures with function types (not function pointers) are illegal.
+
+@foo = external global [4 x i32 (i32)]
+@bar = external global { i32 (i32) }
diff --git a/final/test/Other/2010-05-06-Printer.ll b/final/test/Other/2010-05-06-Printer.ll
new file mode 100644
index 00000000000..1cbe78dab7d
--- /dev/null
+++ b/final/test/Other/2010-05-06-Printer.ll
@@ -0,0 +1,6 @@
+; RUN: llc -O2 -print-after-all < %s 2>/dev/null
+
+define void @tester(){
+  ret void
+}
+
diff --git a/final/test/Other/close-stderr.ll b/final/test/Other/close-stderr.ll
new file mode 100644
index 00000000000..40a01cc3143
--- /dev/null
+++ b/final/test/Other/close-stderr.ll
@@ -0,0 +1,11 @@
+; RUN: sh -c "\
+; RUN:        opt --reject-this-option 2>&-; echo \$?; \
+; RUN:        opt -o /dev/null /dev/null 2>&-; echo \$?; \
+; RUN:       " | FileCheck %s
+; CHECK: {{^1$}}
+; CHECK: {{^0$}}
+; XFAIL: vg_leak
+; REQUIRES: shell
+
+; Test that the error handling when writing to stderr fails exits the
+; program cleanly rather than aborting.
diff --git a/final/test/Other/constant-fold-gep.ll b/final/test/Other/constant-fold-gep.ll
new file mode 100644
index 00000000000..926bdbc1b46
--- /dev/null
+++ b/final/test/Other/constant-fold-gep.ll
@@ -0,0 +1,459 @@
+; "PLAIN" - No optimizations. This tests the target-independent
+; constant folder.
+; RUN: opt -S -o - < %s | FileCheck --check-prefix=PLAIN %s
+
+; "OPT" - Optimizations but no targetdata. This tests target-independent
+; folding in the optimizers.
+; RUN: opt -S -o - -instcombine -globalopt < %s | FileCheck --check-prefix=OPT %s
+
+; "TO" - Optimizations and targetdata. This tests target-dependent
+; folding in the optimizers.
+; RUN: opt -S -o - -instcombine -globalopt -default-data-layout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64" < %s | FileCheck --check-prefix=TO %s
+
+; "SCEV" - ScalarEvolution but no targetdata.
+; RUN: opt -analyze -scalar-evolution < %s | FileCheck --check-prefix=SCEV %s
+
+; ScalarEvolution with targetdata isn't interesting on these testcases
+; because ScalarEvolution doesn't attempt to duplicate all of instcombine's
+; and the constant folders' folding.
+
+; PLAIN: %0 = type { i1, double }
+; PLAIN: %1 = type { double, float, double, double }
+; PLAIN: %2 = type { i1, i1* }
+; PLAIN: %3 = type { i64, i64 }
+; PLAIN: %4 = type { i32, i32 }
+; OPT: %0 = type { i1, double }
+; OPT: %1 = type { double, float, double, double }
+; OPT: %2 = type { i1, i1* }
+; OPT: %3 = type { i64, i64 }
+; OPT: %4 = type { i32, i32 }
+
+; The automatic constant folder in opt does not have targetdata access, so
+; it can't fold gep arithmetic, in general. However, the constant folder run
+; from instcombine and global opt can use targetdata.
+
+; PLAIN: @G8 = global i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -1)
+; PLAIN: @G1 = global i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -1)
+; PLAIN: @F8 = global i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -2)
+; PLAIN: @F1 = global i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -2)
+; PLAIN: @H8 = global i8* getelementptr (i8* null, i32 -1)
+; PLAIN: @H1 = global i1* getelementptr (i1* null, i32 -1)
+; OPT: @G8 = global i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -1)
+; OPT: @G1 = global i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -1)
+; OPT: @F8 = global i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -2)
+; OPT: @F1 = global i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -2)
+; OPT: @H8 = global i8* getelementptr (i8* null, i32 -1)
+; OPT: @H1 = global i1* getelementptr (i1* null, i32 -1)
+; TO: @G8 = global i8* null
+; TO: @G1 = global i1* null
+; TO: @F8 = global i8* inttoptr (i64 -1 to i8*)
+; TO: @F1 = global i1* inttoptr (i64 -1 to i1*)
+; TO: @H8 = global i8* inttoptr (i64 -1 to i8*)
+; TO: @H1 = global i1* inttoptr (i64 -1 to i1*)
+
+@G8 = global i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -1)
+@G1 = global i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -1)
+@F8 = global i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -2)
+@F1 = global i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -2)
+@H8 = global i8* getelementptr (i8* inttoptr (i32 0 to i8*), i32 -1)
+@H1 = global i1* getelementptr (i1* inttoptr (i32 0 to i1*), i32 -1)
+
+; The target-independent folder should be able to do some clever
+; simplifications on sizeof, alignof, and offsetof expressions. The
+; target-dependent folder should fold these down to constants.
+
+; PLAIN: @a = constant i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310)
+; PLAIN: @b = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
+; PLAIN: @c = constant i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2)
+; PLAIN: @d = constant i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 11)
+; PLAIN: @e = constant i64 ptrtoint (double* getelementptr (%1* null, i64 0, i32 2) to i64)
+; PLAIN: @f = constant i64 1
+; PLAIN: @g = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
+; PLAIN: @h = constant i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64)
+; PLAIN: @i = constant i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64)
+; OPT: @a = constant i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310)
+; OPT: @b = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
+; OPT: @c = constant i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2)
+; OPT: @d = constant i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 11)
+; OPT: @e = constant i64 ptrtoint (double* getelementptr (%1* null, i64 0, i32 2) to i64)
+; OPT: @f = constant i64 1
+; OPT: @g = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
+; OPT: @h = constant i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64)
+; OPT: @i = constant i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64)
+; TO: @a = constant i64 18480
+; TO: @b = constant i64 8
+; TO: @c = constant i64 16
+; TO: @d = constant i64 88
+; TO: @e = constant i64 16
+; TO: @f = constant i64 1
+; TO: @g = constant i64 8
+; TO: @h = constant i64 8
+; TO: @i = constant i64 8
+
+@a = constant i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}* null, i64 11) to i64), i64 5))
+@b = constant i64 ptrtoint ([13 x double]* getelementptr ({i1, [13 x double]}* null, i64 0, i32 1) to i64)
+@c = constant i64 ptrtoint (double* getelementptr ({double, double, double, double}* null, i64 0, i32 2) to i64)
+@d = constant i64 ptrtoint (double* getelementptr ([13 x double]* null, i64 0, i32 11) to i64)
+@e = constant i64 ptrtoint (double* getelementptr ({double, float, double, double}* null, i64 0, i32 2) to i64)
+@f = constant i64 ptrtoint (<{ i16, i128 }>* getelementptr ({i1, <{ i16, i128 }>}* null, i64 0, i32 1) to i64)
+@g = constant i64 ptrtoint ({double, double}* getelementptr ({i1, {double, double}}* null, i64 0, i32 1) to i64)
+@h = constant i64 ptrtoint (double** getelementptr (double** null, i64 1) to i64)
+@i = constant i64 ptrtoint (double** getelementptr ({i1, double*}* null, i64 0, i32 1) to i64)
+
+; The target-dependent folder should cast GEP indices to integer-sized pointers.
+
+; PLAIN: @M = constant i64* getelementptr (i64* null, i32 1)
+; PLAIN: @N = constant i64* getelementptr (%3* null, i32 0, i32 1)
+; PLAIN: @O = constant i64* getelementptr ([2 x i64]* null, i32 0, i32 1)
+; OPT: @M = constant i64* getelementptr (i64* null, i32 1)
+; OPT: @N = constant i64* getelementptr (%3* null, i32 0, i32 1)
+; OPT: @O = constant i64* getelementptr ([2 x i64]* null, i32 0, i32 1)
+; TO: @M = constant i64* inttoptr (i64 8 to i64*)
+; TO: @N = constant i64* inttoptr (i64 8 to i64*)
+; TO: @O = constant i64* inttoptr (i64 8 to i64*)
+
+@M = constant i64* getelementptr (i64* null, i32 1)
+@N = constant i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1)
+@O = constant i64* getelementptr ([2 x i64]* null, i32 0, i32 1)
+
+; Fold GEP of a GEP. Theoretically some of these cases could be folded
+; without using targetdata, however that's not implemented yet.
+
+; PLAIN: @Z = global i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x %4]* @ext, i64 0, i64 1, i32 0), i64 1)
+; OPT: @Z = global i32* getelementptr (i32* getelementptr inbounds ([3 x %4]* @ext, i64 0, i64 1, i32 0), i64 1)
+; TO: @Z = global i32* getelementptr inbounds ([3 x %0]* @ext, i64 0, i64 1, i32 1)
+
+@ext = external global [3 x { i32, i32 }]
+@Z = global i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
+
+; Duplicate all of the above as function return values rather than
+; global initializers.
+
+; PLAIN: define i8* @goo8() nounwind {
+; PLAIN:   %t = bitcast i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -1) to i8*
+; PLAIN:   ret i8* %t
+; PLAIN: }
+; PLAIN: define i1* @goo1() nounwind {
+; PLAIN:   %t = bitcast i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -1) to i1*
+; PLAIN:   ret i1* %t
+; PLAIN: }
+; PLAIN: define i8* @foo8() nounwind {
+; PLAIN:   %t = bitcast i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -2) to i8*
+; PLAIN:   ret i8* %t
+; PLAIN: }
+; PLAIN: define i1* @foo1() nounwind {
+; PLAIN:   %t = bitcast i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -2) to i1*
+; PLAIN:   ret i1* %t
+; PLAIN: }
+; PLAIN: define i8* @hoo8() nounwind {
+; PLAIN:   %t = bitcast i8* getelementptr (i8* null, i32 -1) to i8*
+; PLAIN:   ret i8* %t
+; PLAIN: }
+; PLAIN: define i1* @hoo1() nounwind {
+; PLAIN:   %t = bitcast i1* getelementptr (i1* null, i32 -1) to i1*
+; PLAIN:   ret i1* %t
+; PLAIN: }
+; OPT: define i8* @goo8() nounwind {
+; OPT:   ret i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -1)
+; OPT: }
+; OPT: define i1* @goo1() nounwind {
+; OPT:   ret i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -1)
+; OPT: }
+; OPT: define i8* @foo8() nounwind {
+; OPT:   ret i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -2)
+; OPT: }
+; OPT: define i1* @foo1() nounwind {
+; OPT:   ret i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -2)
+; OPT: }
+; OPT: define i8* @hoo8() nounwind {
+; OPT:   ret i8* getelementptr (i8* null, i32 -1)
+; OPT: }
+; OPT: define i1* @hoo1() nounwind {
+; OPT:   ret i1* getelementptr (i1* null, i32 -1)
+; OPT: }
+; TO: define i8* @goo8() nounwind {
+; TO:   ret i8* null
+; TO: }
+; TO: define i1* @goo1() nounwind {
+; TO:   ret i1* null
+; TO: }
+; TO: define i8* @foo8() nounwind {
+; TO:   ret i8* inttoptr (i64 -1 to i8*)
+; TO: }
+; TO: define i1* @foo1() nounwind {
+; TO:   ret i1* inttoptr (i64 -1 to i1*)
+; TO: }
+; TO: define i8* @hoo8() nounwind {
+; TO:   ret i8* inttoptr (i64 -1 to i8*)
+; TO: }
+; TO: define i1* @hoo1() nounwind {
+; TO:   ret i1* inttoptr (i64 -1 to i1*)
+; TO: }
+; SCEV: Classifying expressions for: @goo8
+; SCEV:   %t = bitcast i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -1) to i8*
+; SCEV:   -->  ((-1 * sizeof(i8)) + inttoptr (i32 1 to i8*))
+; SCEV: Classifying expressions for: @goo1
+; SCEV:   %t = bitcast i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -1) to i1*
+; SCEV:   -->  ((-1 * sizeof(i1)) + inttoptr (i32 1 to i1*))
+; SCEV: Classifying expressions for: @foo8
+; SCEV:   %t = bitcast i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -2) to i8*
+; SCEV:   -->  ((-2 * sizeof(i8)) + inttoptr (i32 1 to i8*))
+; SCEV: Classifying expressions for: @foo1
+; SCEV:   %t = bitcast i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -2) to i1*
+; SCEV:   -->  ((-2 * sizeof(i1)) + inttoptr (i32 1 to i1*))
+; SCEV: Classifying expressions for: @hoo8
+; SCEV:   -->  (-1 * sizeof(i8))
+; SCEV: Classifying expressions for: @hoo1
+; SCEV:   -->  (-1 * sizeof(i1))
+
+define i8* @goo8() nounwind {
+  %t = bitcast i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -1) to i8*
+  ret i8* %t
+}
+define i1* @goo1() nounwind {
+  %t = bitcast i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -1) to i1*
+  ret i1* %t
+}
+define i8* @foo8() nounwind {
+  %t = bitcast i8* getelementptr (i8* inttoptr (i32 1 to i8*), i32 -2) to i8*
+  ret i8* %t
+}
+define i1* @foo1() nounwind {
+  %t = bitcast i1* getelementptr (i1* inttoptr (i32 1 to i1*), i32 -2) to i1*
+  ret i1* %t
+}
+define i8* @hoo8() nounwind {
+  %t = bitcast i8* getelementptr (i8* inttoptr (i32 0 to i8*), i32 -1) to i8*
+  ret i8* %t
+}
+define i1* @hoo1() nounwind {
+  %t = bitcast i1* getelementptr (i1* inttoptr (i32 0 to i1*), i32 -1) to i1*
+  ret i1* %t
+}
+
+; PLAIN: define i64 @fa() nounwind {
+; PLAIN:   %t = bitcast i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310) to i64
+; PLAIN:   ret i64 %t
+; PLAIN: }
+; PLAIN: define i64 @fb() nounwind {
+; PLAIN:   %t = bitcast i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) to i64
+; PLAIN:   ret i64 %t
+; PLAIN: }
+; PLAIN: define i64 @fc() nounwind {
+; PLAIN:   %t = bitcast i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2) to i64
+; PLAIN:   ret i64 %t
+; PLAIN: }
+; PLAIN: define i64 @fd() nounwind {
+; PLAIN:   %t = bitcast i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 11) to i64
+; PLAIN:   ret i64 %t
+; PLAIN: }
+; PLAIN: define i64 @fe() nounwind {
+; PLAIN:   %t = bitcast i64 ptrtoint (double* getelementptr (%1* null, i64 0, i32 2) to i64) to i64
+; PLAIN:   ret i64 %t
+; PLAIN: }
+; PLAIN: define i64 @ff() nounwind {
+; PLAIN:   %t = bitcast i64 1 to i64
+; PLAIN:   ret i64 %t
+; PLAIN: }
+; PLAIN: define i64 @fg() nounwind {
+; PLAIN:   %t = bitcast i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) to i64
+; PLAIN:   ret i64 %t
+; PLAIN: }
+; PLAIN: define i64 @fh() nounwind {
+; PLAIN:   %t = bitcast i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64) to i64
+; PLAIN:   ret i64 %t
+; PLAIN: }
+; PLAIN: define i64 @fi() nounwind {
+; PLAIN:   %t = bitcast i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) to i64
+; PLAIN:   ret i64 %t
+; PLAIN: }
+; OPT: define i64 @fa() nounwind {
+; OPT:   ret i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310)
+; OPT: }
+; OPT: define i64 @fb() nounwind {
+; OPT:   ret i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
+; OPT: }
+; OPT: define i64 @fc() nounwind {
+; OPT:   ret i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2)
+; OPT: }
+; OPT: define i64 @fd() nounwind {
+; OPT:   ret i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 11)
+; OPT: }
+; OPT: define i64 @fe() nounwind {
+; OPT:   ret i64 ptrtoint (double* getelementptr (%1* null, i64 0, i32 2) to i64)
+; OPT: }
+; OPT: define i64 @ff() nounwind {
+; OPT:   ret i64 1
+; OPT: }
+; OPT: define i64 @fg() nounwind {
+; OPT:   ret i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
+; OPT: }
+; OPT: define i64 @fh() nounwind {
+; OPT:   ret i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64)
+; OPT: }
+; OPT: define i64 @fi() nounwind {
+; OPT:   ret i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64)
+; OPT: }
+; TO: define i64 @fa() nounwind {
+; TO:   ret i64 18480
+; TO: }
+; TO: define i64 @fb() nounwind {
+; TO:   ret i64 8
+; TO: }
+; TO: define i64 @fc() nounwind {
+; TO:   ret i64 16
+; TO: }
+; TO: define i64 @fd() nounwind {
+; TO:   ret i64 88
+; TO: }
+; TO: define i64 @fe() nounwind {
+; TO:   ret i64 16
+; TO: }
+; TO: define i64 @ff() nounwind {
+; TO:   ret i64 1
+; TO: }
+; TO: define i64 @fg() nounwind {
+; TO:   ret i64 8
+; TO: }
+; TO: define i64 @fh() nounwind {
+; TO:   ret i64 8
+; TO: }
+; TO: define i64 @fi() nounwind {
+; TO:   ret i64 8
+; TO: }
+; SCEV: Classifying expressions for: @fa
+; SCEV:   %t = bitcast i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310) to i64 
+; SCEV:   -->  (2310 * sizeof(double))
+; SCEV: Classifying expressions for: @fb
+; SCEV:   %t = bitcast i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) to i64 
+; SCEV:   -->  alignof(double)
+; SCEV: Classifying expressions for: @fc
+; SCEV:   %t = bitcast i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2) to i64 
+; SCEV:   -->  (2 * sizeof(double))
+; SCEV: Classifying expressions for: @fd
+; SCEV:   %t = bitcast i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 11) to i64 
+; SCEV:   -->  (11 * sizeof(double))
+; SCEV: Classifying expressions for: @fe
+; SCEV:   %t = bitcast i64 ptrtoint (double* getelementptr (%1* null, i64 0, i32 2) to i64) to i64 
+; SCEV:   -->  offsetof({ double, float, double, double }, 2)
+; SCEV: Classifying expressions for: @ff
+; SCEV:   %t = bitcast i64 1 to i64 
+; SCEV:   -->  1
+; SCEV: Classifying expressions for: @fg
+; SCEV:   %t = bitcast i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) to i64
+; SCEV:   -->  alignof(double)
+; SCEV: Classifying expressions for: @fh
+; SCEV:   %t = bitcast i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64) to i64
+; SCEV:   -->  sizeof(i1*)
+; SCEV: Classifying expressions for: @fi
+; SCEV:   %t = bitcast i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) to i64
+; SCEV:   -->  alignof(i1*)
+
+define i64 @fa() nounwind {
+  %t = bitcast i64 mul (i64 3, i64 mul (i64 ptrtoint ({[7 x double], [7 x double]}* getelementptr ({[7 x double], [7 x double]}* null, i64 11) to i64), i64 5)) to i64
+  ret i64 %t
+}
+define i64 @fb() nounwind {
+  %t = bitcast i64 ptrtoint ([13 x double]* getelementptr ({i1, [13 x double]}* null, i64 0, i32 1) to i64) to i64
+  ret i64 %t
+}
+define i64 @fc() nounwind {
+  %t = bitcast i64 ptrtoint (double* getelementptr ({double, double, double, double}* null, i64 0, i32 2) to i64) to i64
+  ret i64 %t
+}
+define i64 @fd() nounwind {
+  %t = bitcast i64 ptrtoint (double* getelementptr ([13 x double]* null, i64 0, i32 11) to i64) to i64
+  ret i64 %t
+}
+define i64 @fe() nounwind {
+  %t = bitcast i64 ptrtoint (double* getelementptr ({double, float, double, double}* null, i64 0, i32 2) to i64) to i64
+  ret i64 %t
+}
+define i64 @ff() nounwind {
+  %t = bitcast i64 ptrtoint (<{ i16, i128 }>* getelementptr ({i1, <{ i16, i128 }>}* null, i64 0, i32 1) to i64) to i64
+  ret i64 %t
+}
+define i64 @fg() nounwind {
+  %t = bitcast i64 ptrtoint ({double, double}* getelementptr ({i1, {double, double}}* null, i64 0, i32 1) to i64) to i64
+  ret i64 %t
+}
+define i64 @fh() nounwind {
+  %t = bitcast i64 ptrtoint (double** getelementptr (double** null, i32 1) to i64) to i64
+  ret i64 %t
+}
+define i64 @fi() nounwind {
+  %t = bitcast i64 ptrtoint (double** getelementptr ({i1, double*}* null, i64 0, i32 1) to i64) to i64
+  ret i64 %t
+}
+
+; PLAIN: define i64* @fM() nounwind {
+; PLAIN:   %t = bitcast i64* getelementptr (i64* null, i32 1) to i64*
+; PLAIN:   ret i64* %t
+; PLAIN: }
+; PLAIN: define i64* @fN() nounwind {
+; PLAIN:   %t = bitcast i64* getelementptr (%3* null, i32 0, i32 1) to i64*
+; PLAIN:   ret i64* %t
+; PLAIN: }
+; PLAIN: define i64* @fO() nounwind {
+; PLAIN:   %t = bitcast i64* getelementptr ([2 x i64]* null, i32 0, i32 1) to i64*
+; PLAIN:   ret i64* %t
+; PLAIN: }
+; OPT: define i64* @fM() nounwind {
+; OPT:   ret i64* getelementptr (i64* null, i32 1)
+; OPT: }
+; OPT: define i64* @fN() nounwind {
+; OPT:   ret i64* getelementptr (%3* null, i32 0, i32 1)
+; OPT: }
+; OPT: define i64* @fO() nounwind {
+; OPT:   ret i64* getelementptr ([2 x i64]* null, i32 0, i32 1)
+; OPT: }
+; TO: define i64* @fM() nounwind {
+; TO:   ret i64* inttoptr (i64 8 to i64*)
+; TO: }
+; TO: define i64* @fN() nounwind {
+; TO:   ret i64* inttoptr (i64 8 to i64*)
+; TO: }
+; TO: define i64* @fO() nounwind {
+; TO:   ret i64* inttoptr (i64 8 to i64*)
+; TO: }
+; SCEV: Classifying expressions for: @fM
+; SCEV:   %t = bitcast i64* getelementptr (i64* null, i32 1) to i64* 
+; SCEV:   -->  sizeof(i64)
+; SCEV: Classifying expressions for: @fN
+; SCEV:   %t = bitcast i64* getelementptr (%3* null, i32 0, i32 1) to i64* 
+; SCEV:   -->  sizeof(i64)
+; SCEV: Classifying expressions for: @fO
+; SCEV:   %t = bitcast i64* getelementptr ([2 x i64]* null, i32 0, i32 1) to i64* 
+; SCEV:   -->  sizeof(i64)
+
+define i64* @fM() nounwind {
+  %t = bitcast i64* getelementptr (i64* null, i32 1) to i64*
+  ret i64* %t
+}
+define i64* @fN() nounwind {
+  %t = bitcast i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1) to i64*
+  ret i64* %t
+}
+define i64* @fO() nounwind {
+  %t = bitcast i64* getelementptr ([2 x i64]* null, i32 0, i32 1) to i64*
+  ret i64* %t
+}
+
+; PLAIN: define i32* @fZ() nounwind {
+; PLAIN:   %t = bitcast i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x %4]* @ext, i64 0, i64 1, i32 0), i64 1) to i32*
+; PLAIN:   ret i32* %t
+; PLAIN: }
+; OPT: define i32* @fZ() nounwind {
+; OPT:   ret i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x %4]* @ext, i64 0, i64 1, i32 0), i64 1)
+; OPT: }
+; TO: define i32* @fZ() nounwind {
+; TO:   ret i32* getelementptr inbounds ([3 x %0]* @ext, i64 0, i64 1, i32 1)
+; TO: }
+; SCEV: Classifying expressions for: @fZ
+; SCEV:   %t = bitcast i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x %4]* @ext, i64 0, i64 1, i32 0), i64 1) to i32*
+; SCEV:   -->  ((3 * sizeof(i32)) + @ext)
+
+define i32* @fZ() nounwind {
+  %t = bitcast i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1) to i32*
+  ret i32* %t
+}
diff --git a/final/test/Other/dg.exp b/final/test/Other/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Other/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Other/extract.ll b/final/test/Other/extract.ll
new file mode 100644
index 00000000000..57573ed76f9
--- /dev/null
+++ b/final/test/Other/extract.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-extract -func foo -S < %s | FileCheck %s
+; RUN: llvm-extract -delete -func foo -S < %s | FileCheck --check-prefix=DELETE %s
+; RUN: llvm-as < %s > %t
+; RUN: llvm-extract -func foo -S %t | FileCheck %s
+; RUN: llvm-extract -delete -func foo -S %t | FileCheck --check-prefix=DELETE %s
+
+; llvm-extract uses lazy bitcode loading, so make sure it correctly reads
+; from bitcode files in addition to assembly files.
+
+; CHECK: define void @foo() {
+; CHECK:   ret void
+; CHECK: }
+
+; The linkonce_odr linkage for foo() should be changed to external linkage.
+; DELETE: declare void @foo()
+; DELETE: define void @bar() {
+; DELETE:   call void @foo()
+; DELETE:   ret void
+; DELETE: }
+
+define linkonce_odr void @foo() {
+  ret void
+}
+define void @bar() {
+  call void @foo()
+  ret void
+}
diff --git a/final/test/Other/inline-asm-newline-terminator.ll b/final/test/Other/inline-asm-newline-terminator.ll
new file mode 100644
index 00000000000..af93cc0dd2a
--- /dev/null
+++ b/final/test/Other/inline-asm-newline-terminator.ll
@@ -0,0 +1,6 @@
+; RUN: llc -filetype=obj -o - < %s
+
+; ModuleID = 't.c'
+target triple = "x86_64-apple-darwin10.0.0"
+
+module asm ".desc _f0, 0x10"
diff --git a/final/test/Other/invalid-commandline-option.ll b/final/test/Other/invalid-commandline-option.ll
new file mode 100644
index 00000000000..60840fa010a
--- /dev/null
+++ b/final/test/Other/invalid-commandline-option.ll
@@ -0,0 +1,3 @@
+; RUN: not opt --foo |& grep {Unknown command line argument}
+
+; there is no --foo
diff --git a/final/test/Other/lint.ll b/final/test/Other/lint.ll
new file mode 100644
index 00000000000..4aa984e2e1b
--- /dev/null
+++ b/final/test/Other/lint.ll
@@ -0,0 +1,167 @@
+; RUN: opt -basicaa -lint -disable-output < %s |& FileCheck %s
+target datalayout = "e-p:64:64:64"
+
+declare fastcc void @bar()
+declare void @llvm.stackrestore(i8*)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @has_sret(i8* sret %p)
+declare void @has_noaliases(i32* noalias %p, i32* %q)
+declare void @one_arg(i32)
+
+@CG = constant i32 7
+
+define i32 @foo() noreturn {
+; CHECK: Caller and callee calling convention differ
+  call void @bar()
+; CHECK: Null pointer dereference
+  store i32 0, i32* null
+; CHECK: Null pointer dereference
+  %t = load i32* null
+; CHECK: Undef pointer dereference
+  store i32 0, i32* undef
+; CHECK: Undef pointer dereference
+  %u = load i32* undef
+; CHECK: All-ones pointer dereference
+  store i32 0, i32* inttoptr (i64 -1 to i32*)
+; CHECK: Address one pointer dereference
+  store i32 0, i32* inttoptr (i64 1 to i32*)
+; CHECK: Memory reference address is misaligned
+  %x = inttoptr i32 1 to i32*
+  load i32* %x, align 4
+; CHECK: Division by zero
+  %sd = sdiv i32 2, 0
+; CHECK: Division by zero
+  %ud = udiv i32 2, 0
+; CHECK: Division by zero
+  %sr = srem i32 2, 0
+; CHECK: Division by zero
+  %ur = urem i32 2, 0
+; CHECK: extractelement index out of range
+  %ee = extractelement <4 x i32> zeroinitializer, i32 4
+; CHECK: insertelement index out of range
+  %ie = insertelement <4 x i32> zeroinitializer, i32 0, i32 4
+; CHECK: Shift count out of range
+  %r = lshr i32 0, 32
+; CHECK: Shift count out of range
+  %q = ashr i32 0, 32
+; CHECK: Shift count out of range
+  %l = shl i32 0, 32
+; CHECK: xor(undef, undef)
+  %xx = xor i32 undef, undef
+; CHECK: sub(undef, undef)
+  %xs = sub i32 undef, undef
+
+; CHECK: Write to read-only memory
+  store i32 8, i32* @CG
+; CHECK: Write to text section
+  store i32 8, i32* bitcast (i32()* @foo to i32*)
+; CHECK: Load from block address
+  %lb = load i32* bitcast (i8* blockaddress(@foo, %next) to i32*)
+; CHECK: Call to block address
+  call void()* bitcast (i8* blockaddress(@foo, %next) to void()*)()
+; CHECK: Undefined behavior: Null pointer dereference
+  call void @llvm.stackrestore(i8* null)
+; CHECK: Undefined behavior: Null pointer dereference
+  call void @has_sret(i8* null)
+; CHECK: Unusual: noalias argument aliases another argument
+  call void @has_noaliases(i32* @CG, i32* @CG)
+; CHECK: Call argument count mismatches callee argument count
+  call void (i32, i32)* bitcast (void (i32)* @one_arg to void (i32, i32)*)(i32 0, i32 0)
+; CHECK: Call argument count mismatches callee argument count
+  call void ()* bitcast (void (i32)* @one_arg to void ()*)()
+; CHECK: Call argument type mismatches callee parameter type
+  call void (float)* bitcast (void (i32)* @one_arg to void (float)*)(float 0.0)
+
+; CHECK: Write to read-only memory
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (i32* @CG to i8*), i8* bitcast (i32* @CG to i8*), i64 1, i32 1, i1 0)
+
+  br label %next
+
+next:
+; CHECK: Static alloca outside of entry block
+  %a = alloca i32
+; CHECK: Return statement in function with noreturn attribute
+  ret i32 0
+
+foo:
+  %z = add i32 0, 0
+; CHECK: unreachable immediately preceded by instruction without side effects
+  unreachable
+}
+
+; CHECK: Unnamed function with non-local linkage
+define void @0() nounwind {
+  ret void
+}
+
+; CHECK: va_start called in a non-varargs function
+declare void @llvm.va_start(i8*)
+define void @not_vararg(i8* %p) nounwind {
+  call void @llvm.va_start(i8* %p)
+  ret void
+}
+
+; CHECK: Undefined behavior: Branch to non-blockaddress
+define void @use_indbr() {
+  indirectbr i8* bitcast (i32()* @foo to i8*), [label %block]
+block:
+  unreachable
+}
+
+; CHECK: Undefined behavior: Call with "tail" keyword references alloca
+declare void @tailcallee(i8*)
+define void @use_tail(i8* %valist) {
+  %t = alloca i8
+  tail call void @tailcallee(i8* %t)
+  ret void
+}
+
+; CHECK: Unusual: Returning alloca value
+define i8* @return_local(i32 %n, i32 %m) {
+  %t = alloca i8, i32 %n
+  %s = getelementptr i8* %t, i32 %m
+  ret i8* %s
+}
+
+; CHECK: Unusual: Returning alloca value
+define i32* @return_obscured_local() {
+entry:
+  %retval = alloca i32*
+  %x = alloca i32
+  store i32* %x, i32** %retval
+  br label %next
+next:
+  %t0 = load i32** %retval
+  %t1 = insertvalue { i32, i32, i32* } zeroinitializer, i32* %t0, 2
+  %t2 = extractvalue { i32, i32, i32* } %t1, 2
+  br label %exit
+exit:
+  %t3 = phi i32* [ %t2, %next ]
+  %t4 = bitcast i32* %t3 to i32*
+  %t5 = ptrtoint i32* %t4 to i64
+  %t6 = add i64 %t5, 0
+  %t7 = inttoptr i64 %t6 to i32*
+  ret i32* %t7
+}
+
+; CHECK: Undefined behavior: Undef pointer dereference
+define i32* @self_reference() {
+entry:
+  unreachable
+exit:
+  %t3 = phi i32* [ %t4, %exit ]
+  %t4 = bitcast i32* %t3 to i32*
+  %x = volatile load i32* %t3
+  br label %exit
+}
+
+; CHECK: Call return type mismatches callee return type
+%struct = type { double, double }
+declare i32 @nonstruct_callee() nounwind
+define void @struct_caller() nounwind {
+entry:
+  call %struct bitcast (i32 ()* @foo to %struct ()*)()
+
+  ; CHECK: Undefined behavior: indirectbr with no destinations
+  indirectbr i8* null, []
+}
diff --git a/final/test/Scripts/README.txt b/final/test/Scripts/README.txt
new file mode 100644
index 00000000000..b0b11050375
--- /dev/null
+++ b/final/test/Scripts/README.txt
@@ -0,0 +1,2 @@
+This directory contains scripts which are used by the TestRunner style
+tests, which allows them to be simpler and more direct.
diff --git a/final/test/Scripts/coff-dump.py b/final/test/Scripts/coff-dump.py
new file mode 100755
index 00000000000..36ec53932c6
--- /dev/null
+++ b/final/test/Scripts/coff-dump.py
@@ -0,0 +1,590 @@
+#!/usr/bin/env python
+#===-- coff-dump.py - COFF object file dump utility-------------------------===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+#===------------------------------------------------------------------------===#
+
+#
+# COFF File Definition
+#
+
+def string_table_entry (offset):
+  return ('ptr', '+ + PointerToSymbolTable * NumberOfSymbols 18 %s' % offset, ('scalar', 'cstr', '%s'))
+
+def secname(value):
+  if value[0] == '/':
+    return string_table_entry(value[1:].rstrip('\0'))
+  else:
+    return '%s'
+
+def symname(value):
+  parts = struct.unpack("<2L", value)
+  if parts[0] == 0:
+    return string_table_entry(parts[1])
+  else:
+    return '%s'
+
+file = ('struct', [
+  ('MachineType', ('enum', '<H', '0x%X', {
+    0x0:    'IMAGE_FILE_MACHINE_UNKNOWN',
+    0x1d3:  'IMAGE_FILE_MACHINE_AM33',
+    0x8664: 'IMAGE_FILE_MACHINE_AMD64',
+    0x1c0:  'IMAGE_FILE_MACHINE_ARM',
+    0xebc:  'IMAGE_FILE_MACHINE_EBC',
+    0x14c:  'IMAGE_FILE_MACHINE_I386',
+    0x200:  'IMAGE_FILE_MACHINE_IA64',
+    0x904:  'IMAGE_FILE_MACHINE_M32R',
+    0x266:  'IMAGE_FILE_MACHINE_MIPS16',
+    0x366:  'IMAGE_FILE_MACHINE_MIPSFPU',
+    0x466:  'IMAGE_FILE_MACHINE_MIPSFPU16',
+    0x1f0:  'IMAGE_FILE_MACHINE_POWERPC',
+    0x1f1:  'IMAGE_FILE_MACHINE_POWERPCFP',
+    0x166:  'IMAGE_FILE_MACHINE_R4000',
+    0x1a2:  'IMAGE_FILE_MACHINE_SH3',
+    0x1a3:  'IMAGE_FILE_MACHINE_SH3DSP',
+    0x1a6:  'IMAGE_FILE_MACHINE_SH4',
+    0x1a8:  'IMAGE_FILE_MACHINE_SH5',
+    0x1c2:  'IMAGE_FILE_MACHINE_THUMB',
+    0x169:  'IMAGE_FILE_MACHINE_WCEMIPSV2',
+  })),
+  ('NumberOfSections',     ('scalar',  '<H', '%d')),
+  ('TimeDateStamp',        ('scalar',  '<L', '%d')),
+  ('PointerToSymbolTable', ('scalar',  '<L', '0x%0X')),
+  ('NumberOfSymbols',      ('scalar',  '<L', '%d')),
+  ('SizeOfOptionalHeader', ('scalar',  '<H', '%d')),
+  ('Characteristics',      ('flags',   '<H', '0x%x', [
+    (0x0001,      'IMAGE_FILE_RELOCS_STRIPPED',         ),
+    (0x0002,      'IMAGE_FILE_EXECUTABLE_IMAGE',        ),
+    (0x0004,      'IMAGE_FILE_LINE_NUMS_STRIPPED',      ),
+    (0x0008,      'IMAGE_FILE_LOCAL_SYMS_STRIPPED',     ),
+    (0x0010,      'IMAGE_FILE_AGGRESSIVE_WS_TRIM',      ),
+    (0x0020,      'IMAGE_FILE_LARGE_ADDRESS_AWARE',     ),
+    (0x0080,      'IMAGE_FILE_BYTES_REVERSED_LO',       ),
+    (0x0100,      'IMAGE_FILE_32BIT_MACHINE',           ),
+    (0x0200,      'IMAGE_FILE_DEBUG_STRIPPED',          ),
+    (0x0400,      'IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP', ),
+    (0x0800,      'IMAGE_FILE_NET_RUN_FROM_SWAP',       ),
+    (0x1000,      'IMAGE_FILE_SYSTEM',                  ),
+    (0x2000,      'IMAGE_FILE_DLL',                     ),
+    (0x4000,      'IMAGE_FILE_UP_SYSTEM_ONLY',          ),
+    (0x8000,      'IMAGE_FILE_BYTES_REVERSED_HI',       ),
+  ])),
+  ('Sections', ('array', '1', 'NumberOfSections', ('struct', [
+    ('Name',                 ('scalar',  '<8s', secname)),
+    ('VirtualSize',          ('scalar',  '<L',  '%d'   )),
+    ('VirtualAddress',       ('scalar',  '<L',  '%d'   )),
+    ('SizeOfRawData',        ('scalar',  '<L',  '%d'   )),
+    ('PointerToRawData',     ('scalar',  '<L',  '0x%X' )),
+    ('PointerToRelocations', ('scalar',  '<L',  '0x%X' )),
+    ('PointerToLineNumbers', ('scalar',  '<L',  '0x%X' )),
+    ('NumberOfRelocations',  ('scalar',  '<H',  '%d'   )),
+    ('NumberOfLineNumbers',  ('scalar',  '<H',  '%d'   )),
+    ('Charateristics',       ('flags',   '<L',  '0x%X', [
+      (0x00000008, 'IMAGE_SCN_TYPE_NO_PAD'),
+      (0x00000020, 'IMAGE_SCN_CNT_CODE'),
+      (0x00000040, 'IMAGE_SCN_CNT_INITIALIZED_DATA'),
+      (0x00000080, 'IMAGE_SCN_CNT_UNINITIALIZED_DATA'),
+      (0x00000100, 'IMAGE_SCN_LNK_OTHER'),
+      (0x00000200, 'IMAGE_SCN_LNK_INFO'),
+      (0x00000800, 'IMAGE_SCN_LNK_REMOVE'),
+      (0x00001000, 'IMAGE_SCN_LNK_COMDAT'),
+      (0x00008000, 'IMAGE_SCN_GPREL'),
+      (0x00020000, 'IMAGE_SCN_MEM_PURGEABLE'),
+      (0x00020000, 'IMAGE_SCN_MEM_16BIT'),
+      (0x00040000, 'IMAGE_SCN_MEM_LOCKED'),
+      (0x00080000, 'IMAGE_SCN_MEM_PRELOAD'),
+      (0x00F00000, 'IMAGE_SCN_ALIGN', {
+        0x00100000: 'IMAGE_SCN_ALIGN_1BYTES',
+        0x00200000: 'IMAGE_SCN_ALIGN_2BYTES',
+        0x00300000: 'IMAGE_SCN_ALIGN_4BYTES',
+        0x00400000: 'IMAGE_SCN_ALIGN_8BYTES',
+        0x00500000: 'IMAGE_SCN_ALIGN_16BYTES',
+        0x00600000: 'IMAGE_SCN_ALIGN_32BYTES',
+        0x00700000: 'IMAGE_SCN_ALIGN_64BYTES',
+        0x00800000: 'IMAGE_SCN_ALIGN_128BYTES',
+        0x00900000: 'IMAGE_SCN_ALIGN_256BYTES',
+        0x00A00000: 'IMAGE_SCN_ALIGN_512BYTES',
+        0x00B00000: 'IMAGE_SCN_ALIGN_1024BYTES',
+        0x00C00000: 'IMAGE_SCN_ALIGN_2048BYTES',
+        0x00D00000: 'IMAGE_SCN_ALIGN_4096BYTES',
+        0x00E00000: 'IMAGE_SCN_ALIGN_8192BYTES',
+      }),
+      (0x01000000, 'IMAGE_SCN_LNK_NRELOC_OVFL'),
+      (0x02000000, 'IMAGE_SCN_MEM_DISCARDABLE'),
+      (0x04000000, 'IMAGE_SCN_MEM_NOT_CACHED'),
+      (0x08000000, 'IMAGE_SCN_MEM_NOT_PAGED'),
+      (0x10000000, 'IMAGE_SCN_MEM_SHARED'),
+      (0x20000000, 'IMAGE_SCN_MEM_EXECUTE'),
+      (0x40000000, 'IMAGE_SCN_MEM_READ'),
+      (0x80000000, 'IMAGE_SCN_MEM_WRITE'),
+    ])),
+    ('SectionData', ('ptr', 'PointerToRawData', ('blob', 'SizeOfRawData'))),
+    ('Relocations', ('ptr', 'PointerToRelocations', ('array', '0', 'NumberOfRelocations', ('struct', [
+      ('VirtualAddress',   ('scalar', '<L', '0x%X')),
+      ('SymbolTableIndex', ('scalar', '<L', '%d'  )),
+      ('Type',             ('enum', '<H', '%d', ('MachineType', {
+        0x14c: {
+          0x0000: 'IMAGE_REL_I386_ABSOLUTE',
+          0x0001: 'IMAGE_REL_I386_DIR16',
+          0x0002: 'IMAGE_REL_I386_REL16',
+          0x0006: 'IMAGE_REL_I386_DIR32',
+          0x0007: 'IMAGE_REL_I386_DIR32NB',
+          0x0009: 'IMAGE_REL_I386_SEG12',
+          0x000A: 'IMAGE_REL_I386_SECTION',
+          0x000B: 'IMAGE_REL_I386_SECREL',
+          0x000C: 'IMAGE_REL_I386_TOKEN',
+          0x000D: 'IMAGE_REL_I386_SECREL7',
+          0x0014: 'IMAGE_REL_I386_REL32',
+        },
+        0x8664: {
+          0x0000: 'IMAGE_REL_AMD64_ABSOLUTE',
+          0x0001: 'IMAGE_REL_AMD64_ADDR64',
+          0x0002: 'IMAGE_REL_AMD64_ADDR32',
+          0x0003: 'IMAGE_REL_AMD64_ADDR32NB',
+          0x0004: 'IMAGE_REL_AMD64_REL32',
+          0x0005: 'IMAGE_REL_AMD64_REL32_1',
+          0x0006: 'IMAGE_REL_AMD64_REL32_2',
+          0x0007: 'IMAGE_REL_AMD64_REL32_3',
+          0x0008: 'IMAGE_REL_AMD64_REL32_4',
+          0x0009: 'IMAGE_REL_AMD64_REL32_5',
+          0x000A: 'IMAGE_REL_AMD64_SECTION',
+          0x000B: 'IMAGE_REL_AMD64_SECREL',
+          0x000C: 'IMAGE_REL_AMD64_SECREL7',
+          0x000D: 'IMAGE_REL_AMD64_TOKEN',
+          0x000E: 'IMAGE_REL_AMD64_SREL32',
+          0x000F: 'IMAGE_REL_AMD64_PAIR',
+          0x0010: 'IMAGE_REL_AMD64_SSPAN32',
+        },
+      }))),
+      ('SymbolName',       ('ptr', '+ PointerToSymbolTable * SymbolTableIndex 18', ('scalar',  '<8s', symname)))
+    ])))),
+  ]))),
+  ('Symbols', ('ptr', 'PointerToSymbolTable', ('byte-array', '18', '* NumberOfSymbols 18',  ('struct', [
+    ('Name',                ('scalar',  '<8s', symname)),
+    ('Value',               ('scalar',  '<L',  '%d'   )),
+    ('SectionNumber',       ('scalar',  '<H',  '%d'   )),
+    ('_Type',               ('scalar',  '<H',  None   )),
+    ('SimpleType',          ('enum',    '& _Type 15',  '%d', {
+      0: 'IMAGE_SYM_TYPE_NULL',
+      1: 'IMAGE_SYM_TYPE_VOID',
+      2: 'IMAGE_SYM_TYPE_CHAR',
+      3: 'IMAGE_SYM_TYPE_SHORT',
+      4: 'IMAGE_SYM_TYPE_INT',
+      5: 'IMAGE_SYM_TYPE_LONG',
+      6: 'IMAGE_SYM_TYPE_FLOAT',
+      7: 'IMAGE_SYM_TYPE_DOUBLE',
+      8: 'IMAGE_SYM_TYPE_STRUCT',
+      9: 'IMAGE_SYM_TYPE_UNION',
+      10: 'IMAGE_SYM_TYPE_ENUM',
+      11: 'IMAGE_SYM_TYPE_MOE',
+      12: 'IMAGE_SYM_TYPE_BYTE',
+      13: 'IMAGE_SYM_TYPE_WORD',
+      14: 'IMAGE_SYM_TYPE_UINT',
+      15: 'IMAGE_SYM_TYPE_DWORD',
+    })),                                # (Type & 0xF0) >> 4
+    ('ComplexType',         ('enum',    '>> & _Type 240 4',  '%d', {
+      0: 'IMAGE_SYM_DTYPE_NULL',
+      1: 'IMAGE_SYM_DTYPE_POINTER',
+      2: 'IMAGE_SYM_DTYPE_FUNCTION',
+      3: 'IMAGE_SYM_DTYPE_ARRAY',
+    })),
+    ('StorageClass',        ('enum',    '<B',  '%d', {
+      -1:  'IMAGE_SYM_CLASS_END_OF_FUNCTION',
+      0: 'IMAGE_SYM_CLASS_NULL',
+      1: 'IMAGE_SYM_CLASS_AUTOMATIC',
+      2: 'IMAGE_SYM_CLASS_EXTERNAL',
+      3: 'IMAGE_SYM_CLASS_STATIC',
+      4: 'IMAGE_SYM_CLASS_REGISTER',
+      5: 'IMAGE_SYM_CLASS_EXTERNAL_DEF',
+      6: 'IMAGE_SYM_CLASS_LABEL',
+      7: 'IMAGE_SYM_CLASS_UNDEFINED_LABEL',
+      8: 'IMAGE_SYM_CLASS_MEMBER_OF_STRUCT',
+      9: 'IMAGE_SYM_CLASS_ARGUMENT',
+      10: 'IMAGE_SYM_CLASS_STRUCT_TAG',
+      11: 'IMAGE_SYM_CLASS_MEMBER_OF_UNION',
+      12: 'IMAGE_SYM_CLASS_UNION_TAG',
+      13: 'IMAGE_SYM_CLASS_TYPE_DEFINITION',
+      14: 'IMAGE_SYM_CLASS_UNDEFINED_STATIC',
+      15: 'IMAGE_SYM_CLASS_ENUM_TAG',
+      16: 'IMAGE_SYM_CLASS_MEMBER_OF_ENUM',
+      17: 'IMAGE_SYM_CLASS_REGISTER_PARAM',
+      18: 'IMAGE_SYM_CLASS_BIT_FIELD',
+      100: 'IMAGE_SYM_CLASS_BLOCK',
+      101: 'IMAGE_SYM_CLASS_FUNCTION',
+      102: 'IMAGE_SYM_CLASS_END_OF_STRUCT',
+      103: 'IMAGE_SYM_CLASS_FILE',
+      104: 'IMAGE_SYM_CLASS_SECTION',
+      105: 'IMAGE_SYM_CLASS_WEAK_EXTERNAL',
+      107: 'IMAGE_SYM_CLASS_CLR_TOKEN',
+    })),
+    ('NumberOfAuxSymbols',  ('scalar',  '<B',  '%d'  )),
+    ('AuxillaryData', ('blob', '* NumberOfAuxSymbols 18')),
+  ])))),
+])
+
+#
+# Definition Interpreter
+#
+
+import sys, types, struct, re
+
+Input = None
+Stack = []
+Fields = {}
+
+Indent = 0
+NewLine = True
+
+def indent():
+  global Indent
+  Indent += 1
+
+def dedent():
+  global Indent
+  Indent -= 1
+
+def write(input):
+  global NewLine
+  output = ""
+
+  for char in input:
+
+    if NewLine:
+      output += Indent * '  '
+      NewLine = False
+
+    output += char
+
+    if char == '\n':
+      NewLine = True
+
+  sys.stdout.write(output)
+
+def read(format):
+  return struct.unpack(format, Input.read(struct.calcsize(format)))
+
+def read_cstr():
+  output = ""
+  while True:
+    char = Input.read(1)
+    if len(char) == 0:
+      raise RuntimeError ("EOF while reading cstr")
+    if char == '\0':
+      break
+    output += char
+  return output
+
+def push_pos(seek_to = None):
+  Stack [0:0] = [Input.tell()]
+  if seek_to:
+    Input.seek(seek_to)
+
+def pop_pos():
+  assert(len(Stack) > 0)
+  Input.seek(Stack[0])
+  del Stack[0]
+
+def print_binary_data(size):
+  value = ""
+  while size > 0:
+    if size >= 16:
+      data = Input.read(16)
+      size -= 16
+    else:
+      data = Input.read(size)
+      size = 0
+    value += data
+    bytes = ""
+    text = ""
+    for index in xrange(16):
+      if index < len(data):
+        if index == 8:
+          bytes += "- "
+        ch = ord(data[index])
+        bytes += "%02X " % ch
+        if ch >= 0x20 and ch <= 0x7F:
+          text += data[index]
+        else:
+          text += "."
+      else:
+        if index == 8:
+          bytes += "  "
+        bytes += "   "
+
+    write("%s|%s|\n" % (bytes, text))
+  return value
+
+idlit = re.compile("[a-zA-Z_][a-zA-Z0-9_-]*")
+numlit = re.compile("[0-9]+")
+
+def read_value(expr):
+
+  input = iter(expr.split())
+
+  def eval():
+
+    token = input.next()
+
+    if expr == 'cstr':
+      return read_cstr()
+    if expr == 'true':
+      return True
+    if expr == 'false':
+      return False
+
+    if token == '+':
+      return eval() + eval()
+    if token == '-':
+      return eval() - eval()
+    if token == '*':
+      return eval() * eval()
+    if token == '/':
+      return eval() / eval()
+    if token == '&':
+      return eval() & eval()
+    if token == '|':
+      return eval() | eval()
+    if token == '>>':
+      return eval() >> eval()
+    if token == '<<':
+      return eval() << eval()
+
+    if len(token) > 1 and token[0] in ('=', '@', '<', '!', '>'):
+      val = read(expr)
+      assert(len(val) == 1)
+      return val[0]
+
+    if idlit.match(token):
+      return Fields[token]
+    if numlit.match(token):
+      return int(token)
+
+    raise RuntimeError("unexpected token %s" % repr(token))
+
+  value = eval()
+
+  try:
+    input.next()
+  except StopIteration:
+    return value
+  raise RuntimeError("unexpected input at end of expression")
+
+def write_value(format,value):
+  format_type = type(format)
+  if format_type is types.StringType:
+    write(format % value)
+  elif format_type is types.FunctionType:
+    write_value(format(value), value)
+  elif format_type is types.TupleType:
+    Fields['this'] = value
+    handle_element(format)
+  elif format_type is types.NoneType:
+    pass
+  else:
+    raise RuntimeError("unexpected type: %s" % repr(format_type))
+
+def handle_scalar(entry):
+  iformat = entry[1]
+  oformat = entry[2]
+
+  value = read_value(iformat)
+
+  write_value(oformat, value)
+
+  return value
+
+def handle_enum(entry):
+  iformat = entry[1]
+  oformat = entry[2]
+  definitions = entry[3]
+
+  value = read_value(iformat)
+
+  if type(definitions) is types.TupleType:
+    selector = read_value(definitions[0])
+    definitions = definitions[1][selector]
+
+  if value in definitions:
+    description = definitions[value]
+  else:
+    description = "unknown"
+
+  write("%s (" % description)
+  write_value(oformat, value)
+  write(")")
+
+  return value
+
+def handle_flags(entry):
+  iformat = entry[1]
+  oformat = entry[2]
+  definitions = entry[3]
+
+  value = read_value(iformat)
+
+  write_value(oformat, value)
+
+  indent()
+  for entry in definitions:
+    mask = entry[0]
+    name = entry[1]
+    if len (entry) == 3:
+      map = entry[2]
+      selection = value & mask
+      if selection in map:
+        write("\n%s" % map[selection])
+      else:
+        write("\n%s <%d>" % (name, selection))
+    elif len(entry) == 2:
+      if value & mask != 0:
+        write("\n%s" % name)
+  dedent()
+
+  return value
+
+def handle_struct(entry):
+  global Fields
+  members = entry[1]
+
+  newFields = {}
+
+  write("{\n");
+  indent()
+
+  for member in members:
+    name = member[0]
+    type = member[1]
+
+    if name[0] != "_":
+      write("%s = " % name.ljust(24))
+
+    value = handle_element(type)
+
+    if name[0] != "_":
+      write("\n")
+
+    Fields[name] = value
+    newFields[name] = value
+
+  dedent()
+  write("}")
+
+  return newFields
+
+def handle_array(entry):
+  start_index = entry[1]
+  length = entry[2]
+  element = entry[3]
+
+  newItems = []
+
+  write("[\n")
+  indent()
+
+  start_index = read_value(start_index)
+  value = read_value(length)
+
+  for index in xrange(value):
+    write("%d = " % (index + start_index))
+    value = handle_element(element)
+    write("\n")
+    newItems.append(value)
+
+  dedent()
+  write("]")
+
+  return newItems
+
+def handle_byte_array(entry):
+  ent_size = entry[1]
+  length = entry[2]
+  element = entry[3]
+
+  newItems = []
+
+  write("[\n")
+  indent()
+
+  item_size = read_value(ent_size)
+  value = read_value(length)
+  end_of_array = Input.tell() + value
+
+  prev_loc = Input.tell()
+  index = 0
+  while Input.tell() < end_of_array:
+    write("%d = " % index)
+    value = handle_element(element)
+    write("\n")
+    newItems.append(value)
+    index += (Input.tell() - prev_loc) / item_size
+    prev_loc = Input.tell()
+
+  dedent()
+  write("]")
+
+  return newItems
+
+def handle_ptr(entry):
+  offset = entry[1]
+  element = entry[2]
+
+  value = None
+  offset = read_value(offset)
+
+  if offset != 0:
+
+    push_pos(offset)
+
+    value = handle_element(element)
+
+    pop_pos()
+
+  else:
+    write("None")
+
+  return value
+
+def handle_blob(entry):
+  length = entry[1]
+
+  write("\n")
+  indent()
+
+  value = print_binary_data(read_value(length))
+
+  dedent()
+
+  return value
+
+def handle_element(entry):
+  handlers = {
+    'struct':      handle_struct,
+    'scalar':      handle_scalar,
+    'enum':        handle_enum,
+    'flags':       handle_flags,
+    'ptr':         handle_ptr,
+    'blob':        handle_blob,
+    'array':       handle_array,
+    'byte-array':  handle_byte_array,
+  }
+
+  if not entry[0] in handlers:
+    raise RuntimeError ("unexpected type '%s'" % str (entry[0]))
+
+  return handlers[entry[0]](entry)
+
+if len(sys.argv) <= 1 or sys.argv[1] == '-':
+  import StringIO
+  Input = StringIO.StringIO(sys.stdin.read())
+else:
+  Input = open (sys.argv[1], "rb")
+
+try:
+  handle_element(file)
+finally:
+  Input.close()
+  Input = None
diff --git a/final/test/Scripts/coff-dump.py.bat b/final/test/Scripts/coff-dump.py.bat
new file mode 100644
index 00000000000..56428e1a605
--- /dev/null
+++ b/final/test/Scripts/coff-dump.py.bat
@@ -0,0 +1,7 @@
+@echo off
+
+@rem We need to set -u to treat stdin as binary. Python 3 has support for doing
+@rem this in code, but I haven't found a way to do this in 2.6 yet.
+
+%PYTHON_EXECUTABLE% -u %LLVM_SRC_ROOT%\test\Scripts\coff-dump.py %1 %2 %3 %4 %5 %6 %7 %8 %9
+
diff --git a/final/test/Scripts/common_dump.py b/final/test/Scripts/common_dump.py
new file mode 100644
index 00000000000..3d69c3fb27f
--- /dev/null
+++ b/final/test/Scripts/common_dump.py
@@ -0,0 +1,46 @@
+def dataToHex(d):
+    """ Convert the raw data in 'd' to an hex string with a space every 4 bytes.
+    """
+    bytes = []
+    for i,c in enumerate(d):
+        byte = ord(c)
+        hex_byte = hex(byte)[2:]
+        if byte <= 0xf:
+            hex_byte = '0' + hex_byte
+        if i % 4 == 3:
+            hex_byte += ' '
+        bytes.append(hex_byte)
+    return ''.join(bytes).strip()
+
+def dataToHexUnified(d):
+    """ Convert the raw data in 'd' to an hex string with a space every 4 bytes.
+    Each 4byte number is prefixed with 0x for easy sed/rx
+    Fixme: convert all MC tests to use this routine instead of the above
+    """
+    bytes = []
+    for i,c in enumerate(d):
+        byte = ord(c)
+        hex_byte = hex(byte)[2:]
+        if byte <= 0xf:
+            hex_byte = '0' + hex_byte
+        if i % 4 == 0:
+            hex_byte = '0x' + hex_byte
+        if i % 4 == 3:
+            hex_byte += ' '
+        bytes.append(hex_byte)
+    return ''.join(bytes).strip()
+
+
+def HexDump(val, numBits=32):
+    """
+    1. do not print 'L'
+    2. Handle negatives and large numbers by mod (2^numBits)
+    3. print fixed length, prepend with zeros.
+       Length is exactly 2+(numBits/4)
+    4. Do print 0x Why?
+       so that they can be easily distinguished using sed/rx
+    """
+    val = val & (( 1 << numBits) - 1)
+    newFmt = "0x%0" + "%d" % (numBits / 4) + "x"
+    return newFmt % val
+
diff --git a/final/test/Scripts/elf-dump b/final/test/Scripts/elf-dump
new file mode 100755
index 00000000000..76cdbf91c73
--- /dev/null
+++ b/final/test/Scripts/elf-dump
@@ -0,0 +1,231 @@
+#!/usr/bin/env python
+
+import struct
+import sys
+import StringIO
+
+import common_dump
+
+class Reader:
+    def __init__(self, path):
+        if path == "-":
+            # Snarf all the data so we can seek.
+            self.file = StringIO.StringIO(sys.stdin.read())
+        else:
+            self.file = open(path, "rb")
+        self.isLSB = None
+        self.is64Bit = None
+
+    def seek(self, pos):
+        self.file.seek(pos)
+
+    def read(self, N):
+        data = self.file.read(N)
+        if len(data) != N:
+            raise ValueError, "Out of data!"
+        return data
+
+    def read8(self):
+        return ord(self.read(1))
+
+    def read16(self):
+        return struct.unpack('><'[self.isLSB] + 'H', self.read(2))[0]
+
+    def read32(self):
+        return struct.unpack('><'[self.isLSB] + 'I', self.read(4))[0]
+
+    def read32S(self):
+        return struct.unpack('><'[self.isLSB] + 'i', self.read(4))[0]
+
+    def read64(self):
+        return struct.unpack('><'[self.isLSB] + 'Q', self.read(8))[0]
+
+    def read64S(self):
+        return struct.unpack('><'[self.isLSB] + 'q', self.read(8))[0]
+
+    def readWord(self):
+        if self.is64Bit:
+            return self.read64()
+        else:
+            return self.read32()
+
+    def readWordS(self):
+        if self.is64Bit:
+            return self.read64S()
+        else:
+            return self.read32S()
+
+class StringTable:
+    def __init__(self, strings):
+       self.string_table = strings
+
+    def __getitem__(self, index):
+       end = self.string_table.index('\x00', index)
+       return self.string_table[index:end]
+
+class Section:
+    def __init__(self, f):
+        self.sh_name = f.read32()
+        self.sh_type = f.read32()
+        self.sh_flags = f.readWord()
+        self.sh_addr = f.readWord()
+        self.sh_offset = f.readWord()
+        self.sh_size = f.readWord()
+        self.sh_link = f.read32()
+        self.sh_info = f.read32()
+        self.sh_addralign = f.readWord()
+        self.sh_entsize = f.readWord()
+
+    def dump(self, shstrtab, f, strtab, dumpdata):
+        print "  (('sh_name', %s)" % common_dump.HexDump(self.sh_name), "# %r" % shstrtab[self.sh_name]
+        print "   ('sh_type', %s)" % common_dump.HexDump(self.sh_type)
+        print "   ('sh_flags', %s)" % common_dump.HexDump(self.sh_flags)
+        print "   ('sh_addr', %s)" % common_dump.HexDump(self.sh_addr)
+        print "   ('sh_offset', %s)" % common_dump.HexDump(self.sh_offset)
+        print "   ('sh_size', %s)" % common_dump.HexDump(self.sh_size)
+        print "   ('sh_link', %s)" % common_dump.HexDump(self.sh_link)
+        print "   ('sh_info', %s)" % common_dump.HexDump(self.sh_info)
+        print "   ('sh_addralign', %s)" % common_dump.HexDump(self.sh_addralign)
+        print "   ('sh_entsize', %s)" % common_dump.HexDump(self.sh_entsize)
+        if self.sh_type == 2: # SHT_SYMTAB
+            print "   ('_symbols', ["
+            dumpSymtab(f, self, strtab)
+            print "   ])"
+        elif self.sh_type == 4 or self.sh_type == 9: # SHT_RELA / SHT_REL
+            print "   ('_relocations', ["
+            dumpRel(f, self, self.sh_type == 4)
+            print "   ])"
+        elif dumpdata:
+            f.seek(self.sh_offset)
+            if self.sh_type != 8: # != SHT_NOBITS
+                data = f.read(self.sh_size)
+                print "   ('_section_data', '%s')" % common_dump.dataToHex(data)
+            else:
+                print "   ('_section_data', '')" 
+        print "  ),"
+
+def dumpSymtab(f, section, strtab):
+    entries = section.sh_size // section.sh_entsize
+
+    for index in range(entries):
+        f.seek(section.sh_offset + index * section.sh_entsize)
+        print "    # Symbol %s" % common_dump.HexDump(index)
+        name = f.read32()
+        print "    (('st_name', %s)" % common_dump.HexDump(name), "# %r" % strtab[name]
+        if not f.is64Bit:
+            print "     ('st_value', %s)" % common_dump.HexDump(f.read32())
+            print "     ('st_size', %s)" % common_dump.HexDump(f.read32())
+        st_info = f.read8()
+        print "     ('st_bind', %s)" % common_dump.HexDump((st_info >> 4))
+        print "     ('st_type', %s)" % common_dump.HexDump((st_info & 0xf))
+        print "     ('st_other', %s)" % common_dump.HexDump(f.read8())
+        print "     ('st_shndx', %s)" % common_dump.HexDump(f.read16())
+        if f.is64Bit:
+            print "     ('st_value', %s)" % common_dump.HexDump(f.read64(), 64)
+            print "     ('st_size', %s)" % common_dump.HexDump(f.read64(), 64)
+        print "    ),"
+
+def dumpRel(f, section, dumprela = False):
+    entries = section.sh_size // section.sh_entsize
+
+    for index in range(entries):
+        f.seek(section.sh_offset + index * section.sh_entsize)
+        print "    # Relocation %s" % common_dump.HexDump(index)
+        print "    (('r_offset', %s)" % common_dump.HexDump(f.readWord())
+        r_info = f.readWord()
+        if f.is64Bit:
+            print "     ('r_sym', %s)" % common_dump.HexDump((r_info >> 32))
+            print "     ('r_type', %s)" % common_dump.HexDump((r_info & 0xffffffff))
+        else:
+            print "     ('r_sym', %s)" % common_dump.HexDump((r_info >> 8))
+            print "     ('r_type', %s)" % common_dump.HexDump((r_info & 0xff))
+        if dumprela:
+            print "     ('r_addend', %s)" % common_dump.HexDump(f.readWordS())
+        print "    ),"
+
+def dumpELF(path, opts):
+    f = Reader(path)
+
+    magic = f.read(4)
+    assert magic == '\x7FELF'
+
+    fileclass = f.read8()
+    if fileclass == 1: # ELFCLASS32
+        f.is64Bit = False
+    elif fileclass == 2: # ELFCLASS64
+        f.is64Bit = True
+    else:
+        raise ValueError, "Unknown file class %s" % common_dump.HexDump(fileclass)
+    print "('e_indent[EI_CLASS]', %s)" % common_dump.HexDump(fileclass)
+
+    byteordering = f.read8()
+    if byteordering == 1: # ELFDATA2LSB
+        f.isLSB = True
+    elif byteordering == 2: # ELFDATA2MSB
+        f.isLSB = False
+    else:
+        raise ValueError, "Unknown byte ordering %s" % common_dump.HexDump(byteordering)
+    print "('e_indent[EI_DATA]', %s)" % common_dump.HexDump(byteordering)
+
+    print "('e_indent[EI_VERSION]', %s)" % common_dump.HexDump(f.read8())
+    print "('e_indent[EI_OSABI]', %s)" % common_dump.HexDump(f.read8())
+    print "('e_indent[EI_ABIVERSION]', %s)" % common_dump.HexDump(f.read8())
+
+    f.seek(16) # Seek to end of e_ident.
+
+    print "('e_type', %s)" % common_dump.HexDump(f.read16())
+    print "('e_machine', %s)" % common_dump.HexDump(f.read16())
+    print "('e_version', %s)" % common_dump.HexDump(f.read32())
+    print "('e_entry', %s)" % common_dump.HexDump(f.readWord())
+    print "('e_phoff', %s)" % common_dump.HexDump(f.readWord())
+    e_shoff = f.readWord()
+    print "('e_shoff', %s)" % common_dump.HexDump(e_shoff)
+    print "('e_flags', %s)" % common_dump.HexDump(f.read32())
+    print "('e_ehsize', %s)" % common_dump.HexDump(f.read16())
+    print "('e_phentsize', %s)" % common_dump.HexDump(f.read16())
+    print "('e_phnum', %s)" % common_dump.HexDump(f.read16())
+    e_shentsize = f.read16()
+    print "('e_shentsize', %s)" % common_dump.HexDump(e_shentsize)
+    e_shnum = f.read16()
+    print "('e_shnum', %s)" % common_dump.HexDump(e_shnum)
+    e_shstrndx = f.read16()
+    print "('e_shstrndx', %s)" % common_dump.HexDump(e_shstrndx)
+
+    # Read all section headers
+    sections = []
+    for index in range(e_shnum):
+        f.seek(e_shoff + index * e_shentsize)
+        s = Section(f)
+        sections.append(s)
+
+    # Read .shstrtab so we can resolve section names
+    f.seek(sections[e_shstrndx].sh_offset)
+    shstrtab = StringTable(f.read(sections[e_shstrndx].sh_size))
+
+    # Get the symbol string table
+    strtab = None
+    for section in sections:
+        if shstrtab[section.sh_name] == ".strtab":
+            f.seek(section.sh_offset)
+            strtab = StringTable(f.read(section.sh_size))
+            break
+
+    print "('_sections', ["
+    for index in range(e_shnum):
+        print "  # Section %s" % common_dump.HexDump(index)
+        sections[index].dump(shstrtab, f, strtab, opts.dumpSectionData)
+    print "])"
+
+if __name__ == "__main__":
+    from optparse import OptionParser, OptionGroup
+    parser = OptionParser("usage: %prog [options] {files}")
+    parser.add_option("", "--dump-section-data", dest="dumpSectionData",
+                      help="Dump the contents of sections",
+                      action="store_true", default=False)
+    (opts, args) = parser.parse_args()
+
+    if not args:
+        args.append('-')
+
+    for arg in args:
+        dumpELF(arg, opts)
diff --git a/final/test/Scripts/elf-dump.bat b/final/test/Scripts/elf-dump.bat
new file mode 100644
index 00000000000..9c708083b30
--- /dev/null
+++ b/final/test/Scripts/elf-dump.bat
@@ -0,0 +1,7 @@
+@echo off
+
+@rem We need to set -u to treat stdin as binary. Python 3 has support for doing
+@rem this in code, but I haven't found a way to do this in 2.6 yet.
+
+%PYTHON_EXECUTABLE% -u %LLVM_SRC_ROOT%\test\Scripts\elf-dump %1 %2 %3 %4 %5 %6 %7 %8 %9
+
diff --git a/final/test/Scripts/ignore b/final/test/Scripts/ignore
new file mode 100755
index 00000000000..865ae4df1bd
--- /dev/null
+++ b/final/test/Scripts/ignore
@@ -0,0 +1,10 @@
+#!/bin/sh
+#
+# Program: ignore
+#
+# Synopsis: Ignore the result code of the command and always return 0
+#
+# Syntax:   ignore command <arguments>
+
+"$@" || exit 0 && exit 0
+exit 0
diff --git a/final/test/Scripts/macho-dumpx b/final/test/Scripts/macho-dumpx
new file mode 100755
index 00000000000..71e06d837b9
--- /dev/null
+++ b/final/test/Scripts/macho-dumpx
@@ -0,0 +1,294 @@
+#!/usr/bin/env python
+
+import struct
+import sys
+import StringIO
+
+import common_dump
+
+class Reader:
+   def __init__(self, path):
+      if path == '-':
+         # Snarf all the data so we can seek.
+         self.file = StringIO.StringIO(sys.stdin.read())
+      else:
+         self.file = open(path,'rb')
+      self.isLSB = None
+      self.is64Bit = None
+
+      self.string_table = None
+
+   def tell(self):
+      return self.file.tell()
+
+   def seek(self, pos):
+      self.file.seek(pos)
+
+   def read(self, N):
+      data = self.file.read(N)
+      if len(data) != N:
+         raise ValueError,"Out of data!"
+      return data
+
+   def read8(self):
+      return ord(self.read(1))
+
+   def read16(self):
+      return struct.unpack('><'[self.isLSB] + 'H', self.read(2))[0]
+
+   def read32(self):
+      # Force to 32-bit, if possible; otherwise these might be long ints on a
+      # big-endian platform. FIXME: Why???
+      Value = struct.unpack('><'[self.isLSB] + 'I', self.read(4))[0]
+      return int(Value)
+
+   def read64(self):
+      Value = struct.unpack('><'[self.isLSB] + 'Q', self.read(8))[0]
+      if Value == int(Value):
+         Value = int(Value)
+      return Value
+
+   def registerStringTable(self, strings):
+      if self.string_table is not None:
+         raise ValueError,"%s: warning: multiple string tables" % sys.argv[0]
+
+      self.string_table = strings
+
+   def getString(self, index):
+      if self.string_table is None:
+         raise ValueError,"%s: warning: no string table registered" % sys.argv[0]
+      
+      end = self.string_table.index('\x00', index)
+      return self.string_table[index:end]
+
+def dumpmacho(path, opts):
+   f = Reader(path)
+
+   magic = f.read(4)
+   if magic == '\xFE\xED\xFA\xCE':
+      f.isLSB, f.is64Bit = False, False
+   elif magic == '\xCE\xFA\xED\xFE':
+      f.isLSB, f.is64Bit = True, False
+   elif magic == '\xFE\xED\xFA\xCF':
+      f.isLSB, f.is64Bit = False, True
+   elif magic == '\xCF\xFA\xED\xFE':
+      f.isLSB, f.is64Bit = True, True
+   else:
+      raise ValueError,"Not a Mach-O object file: %r (bad magic)" % path
+
+   print "('cputype', %r)" % f.read32()
+   print "('cpusubtype', %r)" % f.read32()
+   filetype = f.read32()
+   print "('filetype', %r)" % filetype
+   
+   numLoadCommands = f.read32()
+   print "('num_load_commands', %r)" % numLoadCommands
+
+   loadCommandsSize = f.read32()
+   print "('load_commands_size', %r)" % loadCommandsSize
+
+   print "('flag', %r)" % f.read32()
+
+   if f.is64Bit:
+      print "('reserved', %r)" % f.read32()
+
+   start = f.tell()
+
+   print "('load_commands', ["
+   for i in range(numLoadCommands):
+      dumpLoadCommand(f, i, opts)
+   print "])"
+
+   if f.tell() - start != loadCommandsSize:
+      raise ValueError,"%s: warning: invalid load commands size: %r" % (
+         sys.argv[0], loadCommandsSize)
+
+def dumpLoadCommand(f, i, opts):
+   start = f.tell()
+
+   print "  # Load Command %r" % i
+   cmd = f.read32()
+   print " (('command', %r)" % cmd
+   cmdSize = f.read32()
+   print "  ('size', %r)" % cmdSize
+
+   if cmd == 1:
+      dumpSegmentLoadCommand(f, opts, False)
+   elif cmd == 2:
+      dumpSymtabCommand(f, opts)
+   elif cmd == 11:
+      dumpDysymtabCommand(f, opts)
+   elif cmd == 25:
+      dumpSegmentLoadCommand(f, opts, True)
+   elif cmd == 27:
+      import uuid
+      print "  ('uuid', %s)" % uuid.UUID(bytes=f.read(16))
+   else:
+      print >>sys.stderr,"%s: warning: unknown load command: %r" % (
+         sys.argv[0], cmd)
+      f.read(cmdSize - 8)
+   print " ),"
+
+   if f.tell() - start != cmdSize:
+      raise ValueError,"%s: warning: invalid load command size: %r" % (
+         sys.argv[0], cmdSize)
+
+def dumpSegmentLoadCommand(f, opts, is64Bit):
+   print "  ('segment_name', %r)" % f.read(16) 
+   if is64Bit:
+      print "  ('vm_addr', %r)" % f.read64()
+      print "  ('vm_size', %r)" % f.read64()
+      print "  ('file_offset', %r)" % f.read64()
+      print "  ('file_size', %r)" % f.read64()
+   else:
+      print "  ('vm_addr', %r)" % f.read32()
+      print "  ('vm_size', %r)" % f.read32()
+      print "  ('file_offset', %r)" % f.read32()
+      print "  ('file_size', %r)" % f.read32()
+   print "  ('maxprot', %r)" % f.read32()
+   print "  ('initprot', %r)" % f.read32()
+   numSections = f.read32()
+   print "  ('num_sections', %r)" % numSections
+   print "  ('flags', %r)" % f.read32()
+
+   print "  ('sections', ["
+   for i in range(numSections):
+      dumpSection(f, i, opts, is64Bit)
+   print "  ])"
+
+def dumpSymtabCommand(f, opts):
+   symoff = f.read32()
+   print "  ('symoff', %r)" % symoff
+   nsyms = f.read32()
+   print "  ('nsyms', %r)" % nsyms
+   stroff = f.read32()
+   print "  ('stroff', %r)" % stroff
+   strsize = f.read32()
+   print "  ('strsize', %r)" % strsize
+
+   prev_pos = f.tell()
+
+   f.seek(stroff)
+   string_data = f.read(strsize)
+   print "  ('_string_data', %r)" % string_data
+
+   f.registerStringTable(string_data)
+
+   f.seek(symoff)
+   print "  ('_symbols', ["
+   for i in range(nsyms):
+      dumpNlist32(f, i, opts)
+   print "  ])"
+      
+   f.seek(prev_pos)
+
+def dumpNlist32(f, i, opts):
+   print "    # Symbol %r" % i
+   n_strx = f.read32()
+   print "   (('n_strx', %r)" % n_strx
+   n_type = f.read8()
+   print "    ('n_type', %#x)" % n_type
+   n_sect = f.read8()
+   print "    ('n_sect', %r)" % n_sect
+   n_desc = f.read16()
+   print "    ('n_desc', %r)" % n_desc
+   if f.is64Bit:
+      n_value = f.read64()
+      print "    ('n_value', %r)" % n_value
+   else:
+      n_value = f.read32()
+      print "    ('n_value', %r)" % n_value
+   print "    ('_string', %r)" % f.getString(n_strx)
+   print "   ),"
+
+def dumpDysymtabCommand(f, opts):   
+   print "  ('ilocalsym', %r)" % f.read32()
+   print "  ('nlocalsym', %r)" % f.read32()
+   print "  ('iextdefsym', %r)" % f.read32()
+   print "  ('nextdefsym', %r)" % f.read32()
+   print "  ('iundefsym', %r)" % f.read32()
+   print "  ('nundefsym', %r)" % f.read32()
+   print "  ('tocoff', %r)" % f.read32()
+   print "  ('ntoc', %r)" % f.read32()
+   print "  ('modtaboff', %r)" % f.read32()
+   print "  ('nmodtab', %r)" % f.read32()
+   print "  ('extrefsymoff', %r)" % f.read32()
+   print "  ('nextrefsyms', %r)" % f.read32()
+   indirectsymoff = f.read32()
+   print "  ('indirectsymoff', %r)" % indirectsymoff
+   nindirectsyms = f.read32()
+   print "  ('nindirectsyms', %r)" % nindirectsyms
+   print "  ('extreloff', %r)" % f.read32()
+   print "  ('nextrel', %r)" % f.read32()
+   print "  ('locreloff', %r)" % f.read32()
+   print "  ('nlocrel', %r)" % f.read32()
+
+   prev_pos = f.tell()
+
+   f.seek(indirectsymoff)
+   print "  ('_indirect_symbols', ["
+   for i in range(nindirectsyms):
+      print "    # Indirect Symbol %r" % i
+      print "    (('symbol_index', %#x),)," % f.read32()
+   print "  ])"
+      
+   f.seek(prev_pos)
+
+def dumpSection(f, i, opts, is64Bit):
+   print "    # Section %r" % i
+   print "   (('section_name', %r)" % f.read(16)
+   print "    ('segment_name', %r)" % f.read(16)
+   if is64Bit:
+      print "    ('address', %r)" % f.read64()
+      size = f.read64()
+      print "    ('size', %r)" % size
+   else:
+      print "    ('address', %r)" % f.read32()
+      size = f.read32()
+      print "    ('size', %r)" % size
+   offset = f.read32()
+   print "    ('offset', %r)" % offset
+   print "    ('alignment', %r)" % f.read32()   
+   reloc_offset = f.read32()
+   print "    ('reloc_offset', %r)" % reloc_offset
+   num_reloc = f.read32()
+   print "    ('num_reloc', %r)" % num_reloc
+   print "    ('flags', %#x)" % f.read32()
+   print "    ('reserved1', %r)" % f.read32()
+   print "    ('reserved2', %r)" % f.read32()
+   if is64Bit:
+      print "    ('reserved3', %r)" % f.read32()
+   print "   ),"
+
+   prev_pos = f.tell()
+
+   f.seek(reloc_offset)
+   print "  ('_relocations', ["
+   for i in range(num_reloc):
+      print "    # Relocation %r" % i
+      print "    (('word-0', %#x)," % f.read32()
+      print "     ('word-1', %#x))," % f.read32()
+   print "  ])"
+
+   if opts.dumpSectionData:
+      f.seek(offset)
+      print "  ('_section_data', '%s')" % common_dump.dataToHex(f.read(size))
+      
+   f.seek(prev_pos)
+   
+def main():
+    from optparse import OptionParser, OptionGroup
+    parser = OptionParser("usage: %prog [options] {files}")
+    parser.add_option("", "--dump-section-data", dest="dumpSectionData",
+                      help="Dump the contents of sections",
+                      action="store_true", default=False)    
+    (opts, args) = parser.parse_args()
+
+    if not args:
+       args.append('-')
+
+    for arg in args:
+       dumpmacho(arg, opts)
+
+if __name__ == '__main__':
+   main()
diff --git a/final/test/Scripts/macho-dumpx.bat b/final/test/Scripts/macho-dumpx.bat
new file mode 100644
index 00000000000..81484f67d70
--- /dev/null
+++ b/final/test/Scripts/macho-dumpx.bat
@@ -0,0 +1,7 @@
+@echo off
+
+@rem We need to set -u to treat stdin as binary. Python 3 has support for doing
+@rem this in code, but I haven't found a way to do this in 2.6 yet.
+
+%PYTHON_EXECUTABLE% -u %LLVM_SRC_ROOT%\test\Scripts\macho-dump %1 %2 %3 %4 %5 %6 %7 %8 %9
+
diff --git a/final/test/TableGen/2003-08-03-PassCode.td b/final/test/TableGen/2003-08-03-PassCode.td
new file mode 100644
index 00000000000..c02f499b382
--- /dev/null
+++ b/final/test/TableGen/2003-08-03-PassCode.td
@@ -0,0 +1,8 @@
+// RUN: tblgen %s
+// XFAIL: vg_leak
+
+class test<code C> {
+  code Code = C;
+}
+
+def foo : test<[{ hello world! }]>;
diff --git a/final/test/TableGen/2006-09-18-LargeInt.td b/final/test/TableGen/2006-09-18-LargeInt.td
new file mode 100644
index 00000000000..194699acc63
--- /dev/null
+++ b/final/test/TableGen/2006-09-18-LargeInt.td
@@ -0,0 +1,6 @@
+// RUN: tblgen %s | grep -- 4294901760
+// XFAIL: vg_leak
+
+def X {
+  int Y = 0xFFFF0000;
+}
diff --git a/final/test/TableGen/2010-03-24-PrematureDefaults.td b/final/test/TableGen/2010-03-24-PrematureDefaults.td
new file mode 100644
index 00000000000..2ff2d42d273
--- /dev/null
+++ b/final/test/TableGen/2010-03-24-PrematureDefaults.td
@@ -0,0 +1,44 @@
+// RUN: tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class A<int k, bits<2> x = 1> {
+  int K = k;
+  bits<2> Bits = x;
+}
+
+// CHECK: def a1
+// CHECK: Bits = { 0, 1 }
+def a1 : A<12>;
+
+// CHECK: def a2
+// CHECK: Bits = { 1, 0 }
+def a2 : A<13, 2>;
+
+// Here was the bug: X.Bits would get resolved to the default a1.Bits while
+// resolving the first template argument. When the second template argument
+// was processed, X would be set correctly, but Bits retained the default
+// value.
+class B<int k, A x = a1> {
+  A X = x;
+  bits<2> Bits = X.Bits;
+}
+
+// CHECK: def b1
+// CHECK: Bits = { 0, 1 }
+def b1 : B<27>;
+
+// CHECK: def b2
+// CHECK: Bits = { 1, 0 }
+def b2 : B<28, a2>;
+
+class C<A x = a1> {
+  bits<2> Bits = x.Bits;
+}
+
+// CHECK: def c1
+// CHECK: Bits = { 0, 1 }
+def c1 : C;
+
+// CHECK: def c2
+// CHECK: Bits = { 1, 0 }
+def c2 : C<a2>;
diff --git a/final/test/TableGen/AnonDefinitionOnDemand.td b/final/test/TableGen/AnonDefinitionOnDemand.td
new file mode 100644
index 00000000000..b10ad5870de
--- /dev/null
+++ b/final/test/TableGen/AnonDefinitionOnDemand.td
@@ -0,0 +1,13 @@
+// RUN: tblgen < %s
+// XFAIL: vg_leak
+
+class foo<int X> { int THEVAL = X; }
+def foo_imp : foo<1>;
+
+def x {
+  foo Y = foo_imp;    // This works.
+}
+
+def X {
+  foo Y = foo<1>;     // This should work too, synthesizing a new foo<1>.
+}
diff --git a/final/test/TableGen/BitsInitOverflow.td b/final/test/TableGen/BitsInitOverflow.td
new file mode 100644
index 00000000000..076b3f6f73e
--- /dev/null
+++ b/final/test/TableGen/BitsInitOverflow.td
@@ -0,0 +1,5 @@
+// RUN: not tblgen %s 2> /dev/null
+
+def {
+  bits<2> X = 5;  // bitfield is too small, reject
+}
diff --git a/final/test/TableGen/CStyleComment.td b/final/test/TableGen/CStyleComment.td
new file mode 100644
index 00000000000..703ae6837eb
--- /dev/null
+++ b/final/test/TableGen/CStyleComment.td
@@ -0,0 +1,14 @@
+// Test that multiline, nested, comments work correctly.
+//
+// RUN: tblgen < %s
+
+/* Foo
+  bar
+  /* 
+  blah
+  */
+
+  stuff
+  */
+
+def x;
diff --git a/final/test/TableGen/Dag.td b/final/test/TableGen/Dag.td
new file mode 100644
index 00000000000..d3481a550c3
--- /dev/null
+++ b/final/test/TableGen/Dag.td
@@ -0,0 +1,71 @@
+// RUN: tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+//===----------------------------------------------------------------------===//
+// Substitution of an int.
+def X1;
+
+class C1<int N> {
+  dag d = (X1 N);
+}
+
+def VAL1 : C1<13>;
+
+// CHECK: def VAL1 {
+// CHECK-NEXT: dag d = (X1 13)
+
+
+//===----------------------------------------------------------------------===//
+// Substitution of a DAG.
+def X2;
+
+class yclass;
+def Y2 : yclass;
+
+class C2<yclass N> {
+  dag d = (X2 N);
+  dag e = (N X2);
+}
+
+def VAL2 : C2<Y2>;
+
+// CHECK: def VAL2 {
+// CHECK-NEXT: dag d = (X2 Y2)
+// CHECK-NEXT: dag e = (Y2 X2)
+
+
+//===----------------------------------------------------------------------===//
+// Complex dag operator (F.TheOp).
+
+class operator;
+def somedef1 : operator;
+def somedef2 : operator;
+
+class foo<operator a> {
+ operator TheOp = a;
+}
+
+class bar<foo F, operator a> {
+  dag Dag1 = (somedef1 1);
+  dag Dag2 = (a 2);
+  dag Dag3 = (F.TheOp 2);
+}
+
+def foo1 : foo<somedef1>;
+def foo2 : foo<somedef2>;
+
+def VAL3 : bar<foo1, somedef1>;
+
+// CHECK:     def VAL3 {	// bar
+// CHECK-NEXT:  dag Dag1 = (somedef1 1);
+// CHECK-NEXT:  dag Dag2 = (somedef1 2);
+// CHECK-NEXT:  dag Dag3 = (somedef1 2);
+// CHECK-NEXT: }
+
+
+def VAL4 : bar<foo2, somedef2>;
+// CHECK:      def VAL4 {
+// CHECK-NEXT:  dag Dag1 = (somedef1 1);
+// CHECK-NEXT:  dag Dag2 = (somedef2 2);
+// CHECK-NEXT:  dag Dag3 = (somedef2 2);
+// CHECK-NEXT: }
diff --git a/final/test/TableGen/DefmInherit.td b/final/test/TableGen/DefmInherit.td
new file mode 100644
index 00000000000..9e166705269
--- /dev/null
+++ b/final/test/TableGen/DefmInherit.td
@@ -0,0 +1,33 @@
+// RUN: tblgen %s | grep {zing = 4} | count 4
+// XFAIL: vg_leak
+
+class C1<int A, string B> { 
+  int bar = A;
+  string thestr = B;
+  int zing;
+}
+
+def T : C1<4, "blah">;
+
+multiclass t<int a> {
+  def S1 : C1<a, "foo"> {
+    int foo = 4;
+    let bar = 1;
+  }
+  def S2 : C1<a, "bar">;
+}
+
+multiclass s<int a> {
+  def S3 : C1<a, "moo"> {
+    int moo = 3;
+    let bar = 1;
+  }
+  def S4 : C1<a, "baz">;
+}
+
+defm FOO : t<42>, s<24>;
+
+def T4 : C1<6, "foo">;
+
+let zing = 4 in
+  defm BAZ : t<3>, s<4>;
diff --git a/final/test/TableGen/DefmInsideMultiClass.td b/final/test/TableGen/DefmInsideMultiClass.td
new file mode 100644
index 00000000000..68cc12d5681
--- /dev/null
+++ b/final/test/TableGen/DefmInsideMultiClass.td
@@ -0,0 +1,25 @@
+// RUN: tblgen %s | grep ADDPSrr | count 1
+// XFAIL: vg_leak
+
+class Instruction<bits<4> opc, string Name> {
+  bits<4> opcode = opc;
+  string name = Name;
+}
+
+multiclass basic_r<bits<4> opc> {
+  def rr : Instruction<opc, "rr">;
+  def rm : Instruction<opc, "rm">;
+}
+
+multiclass basic_s<bits<4> opc> {
+  defm SS : basic_r<opc>;
+  defm SD : basic_r<opc>;
+}
+
+multiclass basic_p<bits<4> opc> {
+  defm PS : basic_r<opc>;
+  defm PD : basic_r<opc>;
+}
+
+defm ADD : basic_s<0xf>, basic_p<0xf>;
+defm SUB : basic_s<0xe>, basic_p<0xe>;
diff --git a/final/test/TableGen/FieldAccess.td b/final/test/TableGen/FieldAccess.td
new file mode 100644
index 00000000000..8b4dc83e0a5
--- /dev/null
+++ b/final/test/TableGen/FieldAccess.td
@@ -0,0 +1,16 @@
+// RUN: tblgen %s
+// XFAIL: vg_leak
+
+class Bla<string t>
+{
+  string blu = t;
+}
+
+class Bli<Bla t>
+{
+  Bla bla = t;
+}
+
+def a : Bli<Bla<"">>;
+def b : Bla<!cast<Bla>(a.bla).blu>; // works
+def c : Bla<a.bla.blu>; // doesn't work: Cannot access field 'blu' of value 'a.bla'
diff --git a/final/test/TableGen/ForwardRef.td b/final/test/TableGen/ForwardRef.td
new file mode 100644
index 00000000000..955cc14248f
--- /dev/null
+++ b/final/test/TableGen/ForwardRef.td
@@ -0,0 +1,16 @@
+// RUN: tblgen %s -o -
+// XFAIL: vg_leak
+
+class bar {
+  list<bar> x;
+}
+
+class foo;
+class foo;
+
+class baz { list<foo> y; }
+
+class foo {
+
+}
+
diff --git a/final/test/TableGen/GeneralList.td b/final/test/TableGen/GeneralList.td
new file mode 100644
index 00000000000..ca92a213b22
--- /dev/null
+++ b/final/test/TableGen/GeneralList.td
@@ -0,0 +1,9 @@
+// RUN: tblgen %s
+// XFAIL: vg_leak
+//
+// Test to make sure that lists work with any data-type
+
+class foo {
+  list<int> Test = [1, 2, 3];
+  list<string> Test2 = ["abc", "xyz", "gtq"];
+}
diff --git a/final/test/TableGen/Include.inc b/final/test/TableGen/Include.inc
new file mode 100644
index 00000000000..876bf47ca96
--- /dev/null
+++ b/final/test/TableGen/Include.inc
@@ -0,0 +1,4 @@
+// This is used by the Include.td test
+def InInclude;
+
+
diff --git a/final/test/TableGen/Include.td b/final/test/TableGen/Include.td
new file mode 100644
index 00000000000..29ed5150c62
--- /dev/null
+++ b/final/test/TableGen/Include.td
@@ -0,0 +1,7 @@
+// RUN: tblgen -I %p %s
+def BeforeInclude;
+
+include "Include.inc"
+
+def AfterInclude;
+
diff --git a/final/test/TableGen/IntBitInit.td b/final/test/TableGen/IntBitInit.td
new file mode 100644
index 00000000000..16ac9c8f912
--- /dev/null
+++ b/final/test/TableGen/IntBitInit.td
@@ -0,0 +1,6 @@
+// RUN: tblgen %s
+// XFAIL: vg_leak
+def {
+  bit A = 1;
+  int B = A;
+}
diff --git a/final/test/TableGen/LazyChange.td b/final/test/TableGen/LazyChange.td
new file mode 100644
index 00000000000..fa53562b8c2
--- /dev/null
+++ b/final/test/TableGen/LazyChange.td
@@ -0,0 +1,11 @@
+// RUN: tblgen %s | grep {int Y = 3}
+// XFAIL: vg_leak
+
+class C {
+  int X = 4;
+  int Y = X;
+}
+
+let X = 3 in
+def D : C;    // Y should be 3 too!
+
diff --git a/final/test/TableGen/LetInsideMultiClasses.td b/final/test/TableGen/LetInsideMultiClasses.td
new file mode 100644
index 00000000000..9238bf42d9b
--- /dev/null
+++ b/final/test/TableGen/LetInsideMultiClasses.td
@@ -0,0 +1,29 @@
+// RUN: tblgen %s | grep "bit IsDouble = 1;" | count 3
+// XFAIL: vg_leak
+
+class Instruction<bits<4> opc, string Name> {
+  bits<4> opcode = opc;
+  string name = Name;
+  bit IsDouble = 0;
+}
+
+multiclass basic_r<bits<4> opc> {
+  let name = "newname" in {
+    def rr : Instruction<opc, "rr">;
+    def rm : Instruction<opc, "rm">;
+  }
+
+  let name = "othername" in
+    def rx : Instruction<opc, "rx">;
+}
+
+multiclass basic_ss<bits<4> opc> {
+  let IsDouble = 0 in
+    defm SS : basic_r<opc>;
+
+  let IsDouble = 1 in
+    defm SD : basic_r<opc>;
+}
+
+defm ADD : basic_ss<0xf>;
+
diff --git a/final/test/TableGen/ListArgs.td b/final/test/TableGen/ListArgs.td
new file mode 100644
index 00000000000..a513db6da3c
--- /dev/null
+++ b/final/test/TableGen/ListArgs.td
@@ -0,0 +1,12 @@
+// RUN: tblgen %s
+// XFAIL: vg_leak
+
+class B<list<int> v> {
+  list<int> vals = v;
+}
+
+class BB<list<list<int>> vals> : B<vals[0]>;
+class BBB<list<list<int>> vals> : BB<vals>;
+
+def OneB : BBB<[[1,2,3]]>;
+def TwoB : BBB<[[1,2,3],[4,5,6]]>;
diff --git a/final/test/TableGen/ListArgsSimple.td b/final/test/TableGen/ListArgsSimple.td
new file mode 100644
index 00000000000..f7caed69a99
--- /dev/null
+++ b/final/test/TableGen/ListArgsSimple.td
@@ -0,0 +1,9 @@
+// RUN: tblgen %s
+// XFAIL: vg_leak
+
+class B<int v> {
+  int val = v;
+}
+
+class BB<list<int> vals> : B<vals[0]>;
+class BBB<list<int> vals> : BB<vals>;
diff --git a/final/test/TableGen/ListConversion.td b/final/test/TableGen/ListConversion.td
new file mode 100644
index 00000000000..222b6140564
--- /dev/null
+++ b/final/test/TableGen/ListConversion.td
@@ -0,0 +1,11 @@
+// RUN: tblgen %s
+// XFAIL: vg_leak
+class A;
+class B : A;
+
+def b : B;
+
+def {
+  list<B> X = [b];
+  list<A> Y = X;
+}
diff --git a/final/test/TableGen/ListManip.td b/final/test/TableGen/ListManip.td
new file mode 100644
index 00000000000..6b1e491cd25
--- /dev/null
+++ b/final/test/TableGen/ListManip.td
@@ -0,0 +1,12 @@
+// RUN: tblgen %s
+// XFAIL: vg_leak
+
+class Bli<string _t>
+{
+  string t = _t;
+}
+
+class Bla<list<Bli> _bli>
+: Bli<!head(_bli).t>
+{
+}
diff --git a/final/test/TableGen/ListSlices.td b/final/test/TableGen/ListSlices.td
new file mode 100644
index 00000000000..5848a4e4870
--- /dev/null
+++ b/final/test/TableGen/ListSlices.td
@@ -0,0 +1,19 @@
+// RUN: tblgen %s
+// XFAIL: vg_leak
+
+def A {
+  list<int> B = [10, 20, 30, 4, 1, 1231, 20];
+}
+
+def B {
+  list<int> X = [10, 20, 30, 4, 1, 1231, 20] [2-4,2,2,0-6];
+
+  list<int> Y = X[4,5];
+  int Z = X[4];
+
+  list<int> C = A.B[1-4];
+
+  list<list<int>> AA = [X, Y];
+
+  int BB = AA[0][1];
+}
diff --git a/final/test/TableGen/MultiClass.td b/final/test/TableGen/MultiClass.td
new file mode 100644
index 00000000000..9f92b73dba6
--- /dev/null
+++ b/final/test/TableGen/MultiClass.td
@@ -0,0 +1,26 @@
+// RUN: tblgen %s | grep {zing = 4} | count 2
+// XFAIL: vg_leak
+
+class C1<int A, string B> { 
+  int bar = A;
+  string thestr = B;
+  int zing;
+}
+
+def T : C1<4, "blah">;
+
+multiclass t<int a> {
+  def S1 : C1<a, "foo"> {
+    int foo = 4;
+    let bar = 1;
+  }
+  def S2 : C1<a, "bar">;
+}
+
+defm FOO : t<42>;
+
+def T4 : C1<6, "foo">;
+
+let zing = 4 in
+  defm BAZ : t<3>;
+
diff --git a/final/test/TableGen/MultiClassDefName.td b/final/test/TableGen/MultiClassDefName.td
new file mode 100644
index 00000000000..138c93d9bb0
--- /dev/null
+++ b/final/test/TableGen/MultiClassDefName.td
@@ -0,0 +1,13 @@
+// RUN: tblgen %s | grep WorldHelloCC | count 1
+// XFAIL: vg_leak
+
+class C<string n> {
+  string name = n;
+}
+
+multiclass Names<string n, string m> {
+   def CC : C<n>;
+   def World#NAME#CC : C<m>;
+}
+
+defm Hello : Names<"hello", "world">;
diff --git a/final/test/TableGen/MultiClassInherit.td b/final/test/TableGen/MultiClassInherit.td
new file mode 100644
index 00000000000..9da80bad2d7
--- /dev/null
+++ b/final/test/TableGen/MultiClassInherit.td
@@ -0,0 +1,65 @@
+// RUN: tblgen %s | grep {zing = 4} | count 28
+// XFAIL: vg_leak
+
+class C1<int A, string B> { 
+  int bar = A;
+  string thestr = B;
+  int zing;
+}
+
+def T : C1<4, "blah">;
+
+multiclass t1<int a1> {
+  def S1 : C1<a1, "foo"> {
+    int foo = 4;
+    let bar = 1;
+  }
+  def S2 : C1<a1, "bar">;
+}
+
+multiclass t2<int a2> {
+  def S3 : C1<a2, "foo"> {
+    int foo = 4;
+    let bar = 1;
+  }
+  def S4 : C1<a2, "bar">;
+}
+
+multiclass s1<int as1, int bs1> : t1<as1> {
+  def S5 : C1<bs1, "moo"> {
+    int moo = 3;
+    let bar = 1;
+  }
+  def S6 : C1<bs1, "baz">;
+}
+
+multiclass s2<int as2> : t1<as2>, t2<as2>;
+
+multiclass s3<int as3, int bs3> : t1<as3>, t2<as3> {
+  def S7 : C1<bs3, "moo"> {
+    int moo = 3;
+    let bar = 1;
+  }
+  def S8 : C1<bs3, "baz">;
+}
+
+let zing = 4 in
+defm FOO1 : s1<42, 24>;
+
+let zing = 4 in
+defm FOO2 : s2<99>;
+
+let zing = 4 in
+defm FOO3 : s3<84, 48>;
+
+def T4 : C1<6, "foo">;
+
+let zing = 4 in
+  defm BAZ1 : s1<3, 4>;
+
+let zing = 4 in
+  defm BAZ2 : s2<5>;
+
+let zing = 4 in
+  defm BAZ3 : s3<6, 7>;
+
diff --git a/final/test/TableGen/Slice.td b/final/test/TableGen/Slice.td
new file mode 100644
index 00000000000..13d9da2b9fd
--- /dev/null
+++ b/final/test/TableGen/Slice.td
@@ -0,0 +1,88 @@
+// RUN: tblgen %s | grep {\\\[(set} | count 2
+// RUN: tblgen %s | grep {\\\[\\\]} | count 2
+// XFAIL: vg_leak
+
+class ValueType<int size, int value> {
+  int Size = size;
+  int Value = value;
+}
+
+def f32  : ValueType<32, 1>;   //  2 x i64 vector value
+
+class Intrinsic<string name> {
+  string Name = name;
+}
+
+class Inst<bits<8> opcode, dag oopnds, dag iopnds, string asmstr, 
+           list<dag> pattern> {
+  bits<8> Opcode = opcode;
+  dag OutOperands = oopnds;
+  dag InOperands = iopnds;
+  string AssemblyString = asmstr;
+  list<dag> Pattern = pattern;
+}
+
+def ops;
+def outs;
+def ins;
+
+def set;
+
+// Define registers
+class Register<string n> {
+  string Name = n;
+}
+
+class RegisterClass<list<ValueType> regTypes, list<Register> regList> {
+  list<ValueType> RegTypes = regTypes;
+  list<Register> MemberList = regList;
+}
+
+def XMM0: Register<"xmm0">;
+def XMM1: Register<"xmm1">;
+def XMM2: Register<"xmm2">;
+def XMM3: Register<"xmm3">;
+def XMM4: Register<"xmm4">;
+def XMM5: Register<"xmm5">;
+def XMM6: Register<"xmm6">;
+def XMM7: Register<"xmm7">;
+def XMM8:  Register<"xmm8">;
+def XMM9:  Register<"xmm9">;
+def XMM10: Register<"xmm10">;
+def XMM11: Register<"xmm11">;
+def XMM12: Register<"xmm12">;
+def XMM13: Register<"xmm13">;
+def XMM14: Register<"xmm14">;
+def XMM15: Register<"xmm15">;
+
+def FR32 : RegisterClass<[f32],
+                         [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+                          XMM8, XMM9, XMM10, XMM11,
+                          XMM12, XMM13, XMM14, XMM15]>;
+
+class SDNode {}
+def not : SDNode;
+
+multiclass scalar<bits<8> opcode, string asmstr = "", list<list<dag>> patterns = []> {
+  def SSrr : Inst<opcode, (outs FR32:$dst), (ins FR32:$src),
+                  !strconcat(asmstr, "\t$dst, $src"),
+                  !if(!empty(patterns),[]<dag>,patterns[0])>;
+  def SSrm : Inst<opcode, (outs FR32:$dst), (ins FR32:$src),
+                  !strconcat(asmstr, "\t$dst, $src"),
+                  !if(!empty(patterns),[]<dag>,!if(!empty(!tail(patterns)),patterns[0],patterns[1]))>;
+}
+
+multiclass vscalar<bits<8> opcode, string asmstr = "", list<list<dag>> patterns = []> {
+  def V#NAME#SSrr : Inst<opcode, (outs FR32:$dst), (ins FR32:$src),
+                  !strconcat(asmstr, "\t$dst, $src"),
+                  !if(!empty(patterns),[]<dag>,patterns[0])>;
+  def V#NAME#SSrm : Inst<opcode, (outs FR32:$dst), (ins FR32:$src),
+                  !strconcat(asmstr, "\t$dst, $src"),
+                  !if(!empty(patterns),[]<dag>,!if(!empty(!tail(patterns)),patterns[0],patterns[1]))>;
+}
+
+multiclass myscalar<bits<8> opcode, string asmstr = "", list<list<dag>> patterns = []> :
+  scalar<opcode, asmstr, patterns>,
+  vscalar<opcode, asmstr, patterns>;
+
+defm NOT : myscalar<0x10, "not", [[], [(set FR32:$dst, (f32 (not FR32:$src)))]]>;
diff --git a/final/test/TableGen/String.td b/final/test/TableGen/String.td
new file mode 100644
index 00000000000..fc0f5b8eb54
--- /dev/null
+++ b/final/test/TableGen/String.td
@@ -0,0 +1,6 @@
+// RUN: tblgen %s 
+// XFAIL: vg_leak
+class x {
+  string y = "missing terminating '\"' character";
+}
+
diff --git a/final/test/TableGen/SuperSubclassSameName.td b/final/test/TableGen/SuperSubclassSameName.td
new file mode 100644
index 00000000000..304c883417f
--- /dev/null
+++ b/final/test/TableGen/SuperSubclassSameName.td
@@ -0,0 +1,21 @@
+// RUN: tblgen < %s
+// XFAIL: vg_leak
+// Test for template arguments that have the same name as superclass template
+// arguments.
+
+
+class Arg { int a; }
+def TheArg : Arg { let a = 1; }
+
+
+class Super<Arg F> {
+  int X = F.a;
+}
+class Sub<Arg F> : Super<F>;
+def inst : Sub<TheArg>;
+
+
+class Super2<int F> {
+  int X = F;
+}
+class Sub2<int F> : Super2<F>;
diff --git a/final/test/TableGen/TargetInstrInfo.td b/final/test/TableGen/TargetInstrInfo.td
new file mode 100644
index 00000000000..146ef6fd768
--- /dev/null
+++ b/final/test/TableGen/TargetInstrInfo.td
@@ -0,0 +1,148 @@
+// This test describes how we eventually want to describe instructions in
+// the target independent code generators.
+// RUN: tblgen %s
+// XFAIL: vg_leak
+
+// Target indep stuff.
+class Instruction {   // Would have other stuff eventually
+  bit isTwoAddress = 0;
+  string AssemblyString;
+}
+class RegisterClass;
+
+class RTLNode;
+
+def ops;                 // Marker for operand list.
+
+// Various expressions used in RTL descriptions.
+def imm8    : RTLNode;
+def imm32   : RTLNode;
+def addr    : RTLNode;
+
+def set     : RTLNode;
+def signext : RTLNode;
+def zeroext : RTLNode;
+def plus    : RTLNode;
+def and     : RTLNode;
+def xor     : RTLNode;
+def shl     : RTLNode;
+def load    : RTLNode;
+def store   : RTLNode;
+def unspec  : RTLNode;
+
+// Start of X86 specific stuff.
+
+def R8  : RegisterClass;
+def R16 : RegisterClass;
+def R32 : RegisterClass;
+
+def CL;  // As are currently defined
+def AL;
+def AX;
+def EDX;
+
+class Format<bits<5> val> {
+  bits<5> Value = val;
+}
+
+def Pseudo     : Format<0>; def RawFrm     : Format<1>;
+def AddRegFrm  : Format<2>; def MRMDestReg : Format<3>;
+def MRMDestMem : Format<4>; def MRMSrcReg  : Format<5>;
+def MRMSrcMem  : Format<6>;
+def MRM0r  : Format<16>; def MRM1r  : Format<17>; def MRM2r  : Format<18>;
+def MRM3r  : Format<19>; def MRM4r  : Format<20>; def MRM5r  : Format<21>;
+def MRM6r  : Format<22>; def MRM7r  : Format<23>;
+def MRM0m  : Format<24>; def MRM1m  : Format<25>; def MRM2m  : Format<26>;
+def MRM3m  : Format<27>; def MRM4m  : Format<28>; def MRM5m  : Format<29>;
+def MRM6m  : Format<30>; def MRM7m  : Format<31>;
+
+
+class Inst<dag opnds, string asmstr, bits<8> opcode,
+           Format f, list<dag> rtl> : Instruction {
+  dag Operands = opnds;
+  string AssemblyString = asmstr;
+  bits<8> Opcode = opcode;
+  Format Format = f;
+  list<dag> RTL = rtl;
+}
+
+
+// Start of instruction definitions, the real point of this file.
+//
+// Note that these patterns show a couple of important things:
+//  1. The order and contents of the operands of the MachineInstr are
+//     described here.  Eventually we can do away with this when everything
+//     is generated from the description.
+//  2. The asm string is captured here, which makes it possible to get rid of
+//     a ton of hacks in the various printers and a bunch of flags.
+//  3. Target specific properties (e.g. Format) can still be captured as
+//     needed.
+//  4. We capture the behavior of the instruction with a simplified RTL-like
+//     expression.
+//  5. The use/def properties for each operand are automatically inferred from
+//     the pattern.
+//  6. Address expressions should become first-class entities.
+
+// Simple copy instruction.
+def MOV8rr : Inst<(ops R8:$dst, R8:$src),
+                  "mov $dst, $src", 0x88, MRMDestReg,
+                  [(set R8:$dst, R8:$src)]>;
+
+// Simple immediate initialization.
+def MOV8ri : Inst<(ops R8:$dst, imm8:$src),
+                  "mov $dst, $src", 0xB0, AddRegFrm,
+                  [(set R8:$dst, imm8:$src)]>;
+
+// Two address instructions are described as three-addr instructions, with
+// the special target-independent isTwoAddress flag set.  The asm pattern
+// should not refer to the $src1, this would be enforced by the
+// TargetInstrInfo tablegen backend.
+let isTwoAddress = 1 in
+def AND8rr : Inst<(ops R8:$dst, R8:$src1, R8:$src2),
+                  "and $dst, $src2", 0x20, MRMDestReg,
+                  [(set R8:$dst, (and R8:$src1, R8:$src2))]>;
+
+// Instructions that have explicit uses/defs make them explicit in the RTL.
+// Instructions that need extra stuff emitted in the assembly can, trivially.
+let isTwoAddress = 1 in
+def SHL32rCL : Inst<(ops R32:$dst, R32:$src),
+                  "shl $dst, CL", 0xD2, MRM4r,
+                  [(set R32:$dst, (shl R32:$src, CL))]>;
+
+// The RTL list is a list, allowing complex instructions to be defined easily.
+// Temporary 'internal' registers can be used to break instructions appart.
+let isTwoAddress = 1 in
+def XOR32mi : Inst<(ops addr:$addr, imm32:$imm),
+                   "xor $dst, $src2", 0x81, MRM6m,
+                   [(set R32:$tmp1, (load addr:$addr)),
+                    (set R32:$tmp2, (xor R32:$tmp1, imm32:$imm)),
+                    (store addr:$addr, R32:$tmp2)]>;
+
+// Alternatively, if each tmporary register is only used once, the instruction
+// can just be described in nested form.  This would be the canonical 
+// representation the target generator would convert the above into.  Pick your
+// favorite indentation scheme.
+let isTwoAddress = 1 in
+def AND32mr : Inst<(ops addr:$addr, R32:$src),
+                   "xor $dst, $src2", 0x81, MRM6m,
+                   [(store addr:$addr,
+                       (and
+                            (load addr:$addr),
+                            R32:$src)
+                       )
+                   ]>;
+
+// Describing complex instructions is not too hard!  Note how implicit uses/defs
+// become explicit here.
+def CBW : Inst<(ops),
+               "cbw", 0x98, RawFrm,
+               [(set AX, (signext AL))]>;
+
+// Noop, does nothing.
+def NOOP : Inst<(ops), "nop", 0x90, RawFrm, []>;
+
+
+// Instructions that don't expect optimization can use unspec.
+def IN8rr : Inst<(ops), "in AL, EDX", 0xEC, RawFrm,
+                 [(set AL, (unspec EDX))]>;
+
diff --git a/final/test/TableGen/TargetInstrSpec.td b/final/test/TableGen/TargetInstrSpec.td
new file mode 100644
index 00000000000..a7ca9022f84
--- /dev/null
+++ b/final/test/TableGen/TargetInstrSpec.td
@@ -0,0 +1,98 @@
+// RUN: tblgen %s | grep {\\\[(set VR128:\$dst, (int_x86_sse2_add_pd VR128:\$src1, VR128:\$src2))\\\]} | count 1
+// RUN: tblgen %s | grep {\\\[(set VR128:\$dst, (int_x86_sse2_add_ps VR128:\$src1, VR128:\$src2))\\\]} | count 1
+// XFAIL: vg_leak
+
+class ValueType<int size, int value> {
+  int Size = size;
+  int Value = value;
+}
+
+def v2i64  : ValueType<128, 22>;   //  2 x i64 vector value
+def v2f64  : ValueType<128, 28>;   //  2 x f64 vector value
+
+class Intrinsic<string name> {
+  string Name = name;
+}
+
+class Inst<bits<8> opcode, dag oopnds, dag iopnds, string asmstr, 
+           list<dag> pattern> {
+  bits<8> Opcode = opcode;
+  dag OutOperands = oopnds;
+  dag InOperands = iopnds;
+  string AssemblyString = asmstr;
+  list<dag> Pattern = pattern;
+}
+
+def ops;
+def outs;
+def ins;
+
+def set;
+
+// Define registers
+class Register<string n> {
+  string Name = n;
+}
+
+class RegisterClass<list<ValueType> regTypes, list<Register> regList> {
+  list<ValueType> RegTypes = regTypes;
+  list<Register> MemberList = regList;
+}
+
+def XMM0: Register<"xmm0">;
+def XMM1: Register<"xmm1">;
+def XMM2: Register<"xmm2">;
+def XMM3: Register<"xmm3">;
+def XMM4: Register<"xmm4">;
+def XMM5: Register<"xmm5">;
+def XMM6: Register<"xmm6">;
+def XMM7: Register<"xmm7">;
+def XMM8:  Register<"xmm8">;
+def XMM9:  Register<"xmm9">;
+def XMM10: Register<"xmm10">;
+def XMM11: Register<"xmm11">;
+def XMM12: Register<"xmm12">;
+def XMM13: Register<"xmm13">;
+def XMM14: Register<"xmm14">;
+def XMM15: Register<"xmm15">;
+
+def VR128 : RegisterClass<[v2i64, v2f64],
+                          [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+                           XMM8, XMM9, XMM10, XMM11,
+                           XMM12, XMM13, XMM14, XMM15]>;
+
+// Dummy for subst
+def REGCLASS : RegisterClass<[], []>;
+
+class decls {
+  // Dummy for foreach
+  dag pattern;
+  int operand;
+}
+
+def Decls : decls;
+
+// Define intrinsics
+def int_x86_sse2_add_ps : Intrinsic<"addps">;
+def int_x86_sse2_add_pd : Intrinsic<"addpd">;
+def INTRINSIC : Intrinsic<"Dummy">;
+
+multiclass arith<bits<8> opcode, string asmstr, string intr, list<dag> patterns> {
+  def PS : Inst<opcode, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                 !strconcat(asmstr, "\t$dst, $src1, $src2"),
+                 !foreach(Decls.pattern, patterns, 
+		          !foreach(Decls.operand, Decls.pattern, 
+			           !subst(INTRINSIC, !cast<Intrinsic>(!subst("SUFFIX", "_ps", intr)), 
+				          !subst(REGCLASS, VR128, Decls.operand))))>;
+
+  def PD : Inst<opcode, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                 !strconcat(asmstr, "\t$dst, $src1, $src2"),
+                 !foreach(Decls.pattern, patterns, 
+		          !foreach(Decls.operand, Decls.pattern, 
+			           !subst(INTRINSIC, !cast<Intrinsic>(!subst("SUFFIX", "_pd", intr)), 
+				          !subst(REGCLASS, VR128, Decls.operand))))>;
+}
+
+defm ADD : arith<0x58, "add", "int_x86_sse2_addSUFFIX",
+                 [(set REGCLASS:$dst, (INTRINSIC REGCLASS:$src1, REGCLASS:$src2))]>;
+
diff --git a/final/test/TableGen/TemplateArgRename.td b/final/test/TableGen/TemplateArgRename.td
new file mode 100644
index 00000000000..ee5d2cf7752
--- /dev/null
+++ b/final/test/TableGen/TemplateArgRename.td
@@ -0,0 +1,18 @@
+// RUN: tblgen %s
+// XFAIL: vg_leak
+
+// Make sure there is no collision between XX and XX.
+def S;
+
+class Before<int XX>;
+class After : Before<4> {
+  dag XX = (S);
+}
+
+
+
+class C1<int X> {
+  int Y = X;
+}
+class C2<int Y, dag X> : C1<Y>;
+
diff --git a/final/test/TableGen/Tree.td b/final/test/TableGen/Tree.td
new file mode 100644
index 00000000000..2796cfd3586
--- /dev/null
+++ b/final/test/TableGen/Tree.td
@@ -0,0 +1,19 @@
+// This tests to make sure we can parse tree patterns.
+// RUN: tblgen %s
+// XFAIL: vg_leak
+
+class TreeNode;
+class RegisterClass;
+
+def set  : TreeNode;
+def plus : TreeNode;
+def imm  : TreeNode;
+def R32  : RegisterClass;
+
+class Inst<dag T> {
+  dag Pattern = T;
+}
+
+def ADDrr32 : Inst<(set R32, (plus R32, R32))>;  // a = b + c
+def ADDri32 : Inst<(set R32, (plus R32, imm))>;  // a = b + imm
+
diff --git a/final/test/TableGen/TreeNames.td b/final/test/TableGen/TreeNames.td
new file mode 100644
index 00000000000..ccdeb88dd02
--- /dev/null
+++ b/final/test/TableGen/TreeNames.td
@@ -0,0 +1,18 @@
+// This tests to make sure we can parse tree patterns with names.
+// RUN: tblgen %s
+// XFAIL: vg_leak
+
+class TreeNode;
+class RegisterClass;
+
+def set  : TreeNode;
+def plus : TreeNode;
+def imm  : TreeNode;
+def R32  : RegisterClass;
+
+class Inst<dag T> {
+  dag Pattern = T;
+}
+
+def ADDrr32 : Inst<(set R32, (plus R32:$A, R32:$def))>;
+
diff --git a/final/test/TableGen/UnsetBitInit.td b/final/test/TableGen/UnsetBitInit.td
new file mode 100644
index 00000000000..ff7010868bc
--- /dev/null
+++ b/final/test/TableGen/UnsetBitInit.td
@@ -0,0 +1,11 @@
+// RUN: tblgen %s
+// XFAIL: vg_leak
+class x {
+  field bits<32> A;
+}
+
+class y<bits<2> B> : x {
+  let A{21-20} = B;
+}
+
+def z : y<{0,?}>;
diff --git a/final/test/TableGen/UnterminatedComment.td b/final/test/TableGen/UnterminatedComment.td
new file mode 100644
index 00000000000..158cede2607
--- /dev/null
+++ b/final/test/TableGen/UnterminatedComment.td
@@ -0,0 +1,6 @@
+// RUN: not tblgen < %s >& /dev/null
+
+def x;
+
+/*  /* /* */
+
diff --git a/final/test/TableGen/cast.td b/final/test/TableGen/cast.td
new file mode 100644
index 00000000000..8164e74eae4
--- /dev/null
+++ b/final/test/TableGen/cast.td
@@ -0,0 +1,91 @@
+// RUN: tblgen %s | grep {add_ps} | count 3
+// XFAIL: vg_leak
+
+class ValueType<int size, int value> {
+  int Size = size;
+  int Value = value;
+}
+
+def v2i64  : ValueType<128, 22>;   //  2 x i64 vector value
+def v2f64  : ValueType<128, 28>;   //  2 x f64 vector value
+
+class Intrinsic<string name> {
+  string Name = name;
+}
+
+class Inst<bits<8> opcode, dag oopnds, dag iopnds, string asmstr, 
+           list<dag> pattern> {
+  bits<8> Opcode = opcode;
+  dag OutOperands = oopnds;
+  dag InOperands = iopnds;
+  string AssemblyString = asmstr;
+  list<dag> Pattern = pattern;
+}
+
+def ops;
+def outs;
+def ins;
+
+def set;
+
+// Define registers
+class Register<string n> {
+  string Name = n;
+}
+
+class RegisterClass<list<ValueType> regTypes, list<Register> regList> {
+  list<ValueType> RegTypes = regTypes;
+  list<Register> MemberList = regList;
+}
+
+def XMM0: Register<"xmm0">;
+def XMM1: Register<"xmm1">;
+def XMM2: Register<"xmm2">;
+def XMM3: Register<"xmm3">;
+def XMM4: Register<"xmm4">;
+def XMM5: Register<"xmm5">;
+def XMM6: Register<"xmm6">;
+def XMM7: Register<"xmm7">;
+def XMM8:  Register<"xmm8">;
+def XMM9:  Register<"xmm9">;
+def XMM10: Register<"xmm10">;
+def XMM11: Register<"xmm11">;
+def XMM12: Register<"xmm12">;
+def XMM13: Register<"xmm13">;
+def XMM14: Register<"xmm14">;
+def XMM15: Register<"xmm15">;
+
+def VR128 : RegisterClass<[v2i64, v2f64],
+                          [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+                           XMM8, XMM9, XMM10, XMM11,
+                           XMM12, XMM13, XMM14, XMM15]>;
+
+// Define intrinsics
+def int_x86_sse2_add_ps : Intrinsic<"addps">;
+def int_x86_sse2_add_pd : Intrinsic<"addpd">;
+
+multiclass arith<bits<8> opcode, string asmstr, string Intr> {
+  def PS : Inst<opcode, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                 !strconcat(asmstr, "\t$dst, $src1, $src2"),
+                 [(set VR128:$dst, (!cast<Intrinsic>(!strconcat(Intr, "_ps")) VR128:$src1, VR128:$src2))]>;
+
+  def PD : Inst<opcode, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                 !strconcat(asmstr, "\t$dst, $src1, $src2"),
+                 [(set VR128:$dst, (!cast<Intrinsic>(!strconcat(Intr, "_pd")) VR128:$src1, VR128:$src2))]>;
+}
+
+defm ADD : arith<0x58, "add", "int_x86_sse2_add">;
+
+class IntInst<bits<8> opcode, string asmstr, Intrinsic Intr> :
+  Inst<opcode,(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+       !strconcat(asmstr, "\t$dst, $src1, $src2"),
+       [(set VR128:$dst, (Intr VR128:$src1, VR128:$src2))]>;
+
+
+multiclass arith_int<bits<8> opcode, string asmstr, string Intr> {
+  def PS_Int : IntInst<opcode, asmstr, !cast<Intrinsic>(!strconcat(Intr, "_ps"))>;
+
+  def PD_Int : IntInst<opcode, asmstr, !cast<Intrinsic>(!strconcat(Intr, "_pd"))>;
+}
+
+defm ADD : arith_int<0x58, "add", "int_x86_sse2_add">;
diff --git a/final/test/TableGen/defmclass.td b/final/test/TableGen/defmclass.td
new file mode 100644
index 00000000000..57972b6dae5
--- /dev/null
+++ b/final/test/TableGen/defmclass.td
@@ -0,0 +1,50 @@
+// RUN: tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class XD { bits<4> Prefix = 11; }
+// CHECK: Prefix = { 1, 1, 0, 0 };
+class XS { bits<4> Prefix = 12; }
+class VEX { bit hasVEX_4VPrefix = 1; }
+
+def xd : XD;
+
+class BaseI {
+  bits<4> Prefix = 0;
+  bit hasVEX_4VPrefix = 0;
+}
+
+class I<bits<4> op> : BaseI {
+  bits<4> opcode = op;
+  int val = !if(!eq(Prefix, xd.Prefix), 7, 21);
+  int check = !if(hasVEX_4VPrefix, 0, 10);
+}
+
+multiclass R {
+  def rr : I<4>;
+}
+
+multiclass M {
+  def rm : I<2>;
+}
+
+multiclass Y {
+  defm SS : R, M, XD;
+// CHECK: Prefix = { 1, 1, 0, 0 };
+// CHECK: Prefix = { 1, 1, 0, 0 };
+  defm SD : R, M, XS;
+}
+
+// CHECK: int check = 0;
+defm Instr : Y, VEX;
+
+
+// Anonymous defm.
+
+multiclass SomeAnonymous<int x> {
+  def rm;
+  def mr;
+}
+
+// These multiclasses shouldn't conflict.
+defm : SomeAnonymous<1>;
+defm : SomeAnonymous<2>;
\ No newline at end of file
diff --git a/final/test/TableGen/dg.exp b/final/test/TableGen/dg.exp
new file mode 100644
index 00000000000..f7d275ad8cb
--- /dev/null
+++ b/final/test/TableGen/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{td}]]
diff --git a/final/test/TableGen/eq.td b/final/test/TableGen/eq.td
new file mode 100644
index 00000000000..518a80ac0d2
--- /dev/null
+++ b/final/test/TableGen/eq.td
@@ -0,0 +1,14 @@
+// RUN: tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+// CHECK: Value = 0
+// CHECK: Value = 1
+
+class Base<int V> {
+  int Value = V;
+}
+
+class Derived<string Truth> :
+  Base<!if(!eq(Truth, "true"), 1, 0)>;
+
+def TRUE : Derived<"true">;
+def FALSE : Derived<"false">;
diff --git a/final/test/TableGen/eqbit.td b/final/test/TableGen/eqbit.td
new file mode 100644
index 00000000000..3953252c417
--- /dev/null
+++ b/final/test/TableGen/eqbit.td
@@ -0,0 +1,11 @@
+// RUN: tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+// CHECK: a = 6
+// CHECK: a = 5
+
+class A<bit b = 1> {
+  int a = !if(!eq(b, 1), 5, 6);
+}
+
+def X : A<0>;
+def Y : A;
diff --git a/final/test/TableGen/foreach.td b/final/test/TableGen/foreach.td
new file mode 100644
index 00000000000..d4d81f829ed
--- /dev/null
+++ b/final/test/TableGen/foreach.td
@@ -0,0 +1,32 @@
+// RUN: tblgen %s | grep {Jr} | count 2
+// RUN: tblgen %s | grep {Sr} | count 2
+// RUN: tblgen %s | grep {NAME} | count 1
+// XFAIL: vg_leak
+
+// Variables for foreach
+class decls {
+  string name;
+}
+
+def Decls : decls;
+
+class A<list<string> names> {
+  list<string> Names = names;
+}
+
+class B<list<string> names> : A<!foreach(Decls.name, names, !strconcat(Decls.name, ", Sr."))>;
+
+class C<list<string> names> : A<!foreach(Decls.name, names, !strconcat(Decls.name, ", Jr."))>;
+
+class D<list<string> names> : A<!foreach(Decls.name, names, !subst("NAME", "John Smith", Decls.name))>;
+
+class Names {
+  list<string> values = ["Ken Griffey", "Seymour Cray"];
+}
+
+def People : Names;
+
+def Seniors : B<People.values>;
+def Juniors : C<People.values>;
+def Smiths : D<["NAME", "Jane Smith"]>;
+def Unprocessed : D<People.values>;
diff --git a/final/test/TableGen/if.td b/final/test/TableGen/if.td
new file mode 100644
index 00000000000..c4d953ea224
--- /dev/null
+++ b/final/test/TableGen/if.td
@@ -0,0 +1,45 @@
+// RUN: tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+// Support for an `!if' operator as part of a `let' statement.
+// CHECK:      class C
+// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, ?, ?, ?, !if({ C:x{2} }, 0, 1), !if({ C:x{2} }, 1, 1), !if({ C:x{2} }, 0, 0), !if({ C:x{1} }, C:y{3}, 0), !if({ C:x{1} }, C:y{2}, 1), !if({ C:x{0} }, C:y{3}, C:z), !if({ C:x{0} }, C:y{2}, C:y{2}), !if({ C:x{0} }, C:y{1}, C:y{1}), !if({ C:x{0} }, C:y{0}, C:y{0}) };
+class C<bits<3> x, bits<4> y, bit z> {
+  bits<16> n;
+
+  let n{8-6} = !if(x{2}, 0b010, 0b110);
+  let n{5-4} = !if(x{1}, y{3-2}, {0, 1});
+  let n{3-0} = !if(x{0}, y{3-0}, {z, y{2}, y{1}, y{0}});
+}
+
+// CHECK:      def One
+// CHECK-NEXT: list<int> first = [1, 2, 3];
+// CHECK-NEXT: list<int> rest = [1, 2, 3];
+
+// CHECK:      def OneB
+// CHECK-NEXT: list<int> vals = [1, 2, 3];
+
+// CHECK:      def Two
+// CHECK-NEXT: list<int> first = [1, 2, 3];
+// CHECK-NEXT: list<int> rest = [4, 5, 6];
+
+// CHECK:      def TwoB
+// CHECK-NEXT: list<int> vals = [4, 5, 6];
+
+class A<list<list<int>> vals> {
+  list<int> first = vals[0];
+  list<int> rest  = !if(!empty(!tail(vals)), vals[0], vals[1]);
+}
+
+def One : A<[[1,2,3]]>;
+def Two : A<[[1,2,3], [4,5,6]]>;
+
+class B<list<int> v> {
+  list<int> vals = v;
+}
+
+class BB<list<list<int>> vals> : B<!if(!empty(!tail(vals)), vals[0], vals[1])>;
+class BBB<list<list<int>> vals> : BB<vals>;
+
+def OneB : BBB<[[1,2,3]]>;
+def TwoB : BBB<[[1,2,3],[4,5,6]]>;
diff --git a/final/test/TableGen/ifbit.td b/final/test/TableGen/ifbit.td
new file mode 100644
index 00000000000..3b0349e19b4
--- /dev/null
+++ b/final/test/TableGen/ifbit.td
@@ -0,0 +1,11 @@
+// RUN: tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+// CHECK: a = 6
+// CHECK: a = 5
+
+class A<bit b = 1> {
+  int a = !if(b, 5, 6);
+}
+
+def X : A<0>;
+def Y : A;
diff --git a/final/test/TableGen/lisp.td b/final/test/TableGen/lisp.td
new file mode 100644
index 00000000000..bbed8690dd5
--- /dev/null
+++ b/final/test/TableGen/lisp.td
@@ -0,0 +1,22 @@
+// RUN: tblgen %s | grep {}
+// XFAIL: vg_leak
+
+class List<list<string> n> {
+  list<string> names = n;
+}
+
+class CAR<string e> {
+  string element = e;
+}
+
+class CDR<list<string> r, int n> {
+  list<string> rest = r;
+  int null = n;
+}
+
+class NameList<list<string> Names> :
+  List<Names>, CAR<!head(Names)>, CDR<!tail(Names), !empty(!tail(Names))>;
+
+def Three : NameList<["Tom", "Dick", "Harry"]>;
+
+def One : NameList<["Jeffrey Sinclair"]>;
diff --git a/final/test/TableGen/nested-comment.td b/final/test/TableGen/nested-comment.td
new file mode 100644
index 00000000000..68e29581bc7
--- /dev/null
+++ b/final/test/TableGen/nested-comment.td
@@ -0,0 +1,12 @@
+// RUN: tblgen < %s
+
+/* foo
+
+/ foo
+
+     /*NoReg*/, baz
+     
+      
+      */
+
+def X;
diff --git a/final/test/TableGen/strconcat.td b/final/test/TableGen/strconcat.td
new file mode 100644
index 00000000000..38409a99dc4
--- /dev/null
+++ b/final/test/TableGen/strconcat.td
@@ -0,0 +1,11 @@
+// RUN: tblgen %s | grep fufoo
+// XFAIL: vg_leak
+
+class Y<string S> {
+  string T = !strconcat(S, "foo");
+
+  // String values concatenate lexically, as in C.
+  string S = "foo" "bar";
+}
+
+def Z : Y<"fu">;
diff --git a/final/test/TableGen/subst.td b/final/test/TableGen/subst.td
new file mode 100644
index 00000000000..05d424f6835
--- /dev/null
+++ b/final/test/TableGen/subst.td
@@ -0,0 +1,30 @@
+// RUN: tblgen %s | grep {Smith} | count 7
+// RUN: tblgen %s | grep {Johnson} | count 2
+// RUN: tblgen %s | grep {FIRST} | count 1
+// RUN: tblgen %s | grep {LAST} | count 1
+// RUN: tblgen %s | grep {TVAR} | count 2
+// RUN: tblgen %s | grep {Bogus} | count 1
+// XFAIL: vg_leak
+
+class Honorific<string t> {
+  string honorific = t;
+}
+
+def Mr : Honorific<"Mr.">;
+def Ms : Honorific<"Ms.">;
+def Mrs : Honorific<"Mrs.">;
+def TVAR : Honorific<"Bogus">;
+
+class Name<string n, Honorific t> {
+  string name = n;
+  Honorific honorific = t;
+}
+
+class AName<string name, Honorific honorific> : 
+  Name<!subst("FIRST", "John", !subst("LAST", "Smith", name)),
+       !subst(TVAR, Mr, honorific)>;
+
+def JohnSmith : AName<"FIRST LAST", TVAR>;
+def JaneSmith : AName<"Jane LAST", Ms>;
+def JohnSmithJones : AName<"FIRST LAST-Jones", Mr>;
+def JimmyJohnson : AName<"Jimmy Johnson", Mr>;
diff --git a/final/test/TableGen/subst2.td b/final/test/TableGen/subst2.td
new file mode 100644
index 00000000000..584266ef233
--- /dev/null
+++ b/final/test/TableGen/subst2.td
@@ -0,0 +1,16 @@
+// RUN: tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+// CHECK: No subst
+// CHECK: No foo
+// CHECK: RECURSE foo
+
+class Recurse<string t> {
+  string Text = t;
+}
+
+class Text<string text> : 
+  Recurse<!subst("RECURSE", "RECURSE", !subst("NORECURSE", "foo", text))>;
+
+def Ok1 : Text<"No subst">;
+def Ok2 : Text<"No NORECURSE">;
+def Trouble : Text<"RECURSE NORECURSE">;
diff --git a/final/test/TableGen/usevalname.td b/final/test/TableGen/usevalname.td
new file mode 100644
index 00000000000..1b31c8f1506
--- /dev/null
+++ b/final/test/TableGen/usevalname.td
@@ -0,0 +1,24 @@
+// RUN: tblgen %s | FileCheck %s
+// XFAIL: vg_leak
+
+class Instr<list<dag> pat> {
+  list<dag> Pattern = pat;
+}
+
+class Reg {
+  int a = 3;
+}
+
+def VR128 : Reg;
+def mem_frag;
+def set;
+def addr;
+def shufp : Reg;
+
+multiclass shuffle<Reg RC> {
+  def rri : Instr<[(set RC:$dst, (shufp:$src3
+                                       RC:$src1, RC:$src2))]>;
+}
+
+// CHECK: shufp:src3
+defm ADD : shuffle<VR128>;
diff --git a/final/test/TestRunner.sh b/final/test/TestRunner.sh
new file mode 100755
index 00000000000..ab50856af11
--- /dev/null
+++ b/final/test/TestRunner.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# Deprecated, use 'llvm-lit'.
+
+echo "warning: '$0' is deprecated, use 'llvm-lit' instead."
+exec llvm-lit "$@"
diff --git a/final/test/Transforms/ADCE/2002-01-31-UseStuckAround.ll b/final/test/Transforms/ADCE/2002-01-31-UseStuckAround.ll
new file mode 100644
index 00000000000..43462faa47f
--- /dev/null
+++ b/final/test/Transforms/ADCE/2002-01-31-UseStuckAround.ll
@@ -0,0 +1,13 @@
+; RUN:  opt < %s -adce
+
+define i32 @"main"(i32 %argc)
+begin
+	br label %2
+
+	%retval = phi i32 [ %argc, %2 ]		; <i32>	[#uses=2]
+	%two = add i32 %retval, %retval		; <i32>	[#uses=1]
+	ret i32 %two
+
+	br label %1
+end
+
diff --git a/final/test/Transforms/ADCE/2002-05-22-PHITest.ll b/final/test/Transforms/ADCE/2002-05-22-PHITest.ll
new file mode 100644
index 00000000000..0095be1f5a1
--- /dev/null
+++ b/final/test/Transforms/ADCE/2002-05-22-PHITest.ll
@@ -0,0 +1,16 @@
+; It is illegal to remove BB1 because it will mess up the PHI node!
+;
+; RUN: opt < %s -adce -S | grep BB1
+
+define i32 @test(i1 %C, i32 %A, i32 %B) {
+; <label>:0
+        br i1 %C, label %BB1, label %BB2
+
+BB1:            ; preds = %0
+        br label %BB2
+
+BB2:            ; preds = %BB1, %0
+        %R = phi i32 [ %A, %0 ], [ %B, %BB1 ]           ; <i32> [#uses=1]
+        ret i32 %R
+}
+
diff --git a/final/test/Transforms/ADCE/2002-05-23-ZeroArgPHITest.ll b/final/test/Transforms/ADCE/2002-05-23-ZeroArgPHITest.ll
new file mode 100644
index 00000000000..9407b5a68d6
--- /dev/null
+++ b/final/test/Transforms/ADCE/2002-05-23-ZeroArgPHITest.ll
@@ -0,0 +1,32 @@
+; This testcase contains a entire loop that should be removed.  The only thing
+; left is the store instruction in BB0.  The problem this testcase was running
+; into was that when the reg109 PHI was getting zero predecessors, it was 
+; removed even though there were uses still around.  Now the uses are filled
+; in with a dummy value before the PHI is deleted.
+;
+; RUN: opt < %s -adce
+	
+        %node_t = type { double*, %node_t*, %node_t**, double**, double*, i32, i32 }
+
+define void @localize_local(%node_t* %nodelist) {
+bb0:
+        %nodelist.upgrd.1 = alloca %node_t*             ; <%node_t**> [#uses=2]
+        store %node_t* %nodelist, %node_t** %nodelist.upgrd.1
+        br label %bb1
+
+bb1:            ; preds = %bb0
+        %reg107 = load %node_t** %nodelist.upgrd.1              ; <%node_t*> [#uses=2]
+        %cond211 = icmp eq %node_t* %reg107, null               ; <i1> [#uses=1]
+        br i1 %cond211, label %bb3, label %bb2
+
+bb2:            ; preds = %bb2, %bb1
+        %reg109 = phi %node_t* [ %reg110, %bb2 ], [ %reg107, %bb1 ]             ; <%node_t*> [#uses=1]
+        %reg212 = getelementptr %node_t* %reg109, i64 0, i32 1          ; <%node_t**> [#uses=1]
+        %reg110 = load %node_t** %reg212                ; <%node_t*> [#uses=2]
+        %cond213 = icmp ne %node_t* %reg110, null               ; <i1> [#uses=1]
+        br i1 %cond213, label %bb2, label %bb3
+
+bb3:            ; preds = %bb2, %bb1
+        ret void
+}
+
diff --git a/final/test/Transforms/ADCE/2002-05-28-Crash-distilled.ll b/final/test/Transforms/ADCE/2002-05-28-Crash-distilled.ll
new file mode 100644
index 00000000000..337be9f4fa4
--- /dev/null
+++ b/final/test/Transforms/ADCE/2002-05-28-Crash-distilled.ll
@@ -0,0 +1,17 @@
+; This testcase is a distilled form of: 2002-05-28-Crash.ll
+
+; RUN: opt < %s -adce 
+
+define float @test(i32 %i) {
+        %F = sitofp i32 %i to float             ; <float> [#uses=1]
+        %I = bitcast i32 %i to i32              ; <i32> [#uses=1]
+        br label %Loop
+
+Loop:           ; preds = %Loop, %0
+        %B = icmp ne i32 %I, 0          ; <i1> [#uses=1]
+        br i1 %B, label %Out, label %Loop
+
+Out:            ; preds = %Loop
+        ret float %F
+}
+
diff --git a/final/test/Transforms/ADCE/2002-05-28-Crash.ll b/final/test/Transforms/ADCE/2002-05-28-Crash.ll
new file mode 100644
index 00000000000..9bbbd055ff9
--- /dev/null
+++ b/final/test/Transforms/ADCE/2002-05-28-Crash.ll
@@ -0,0 +1,54 @@
+; This testcase is distilled from the GNU rx package.  The loop should be 
+; removed but causes a problem when ADCE does.  The source function is:
+; int rx_bitset_empty (int size, rx_Bitset set) {
+;  int x;
+;  RX_subset s;
+;  s = set[0];
+;  set[0] = 1;
+;  for (x = rx_bitset_numb_subsets(size) - 1; !set[x]; --x)
+;    ;
+;  set[0] = s;
+;  return !s;
+;}
+;
+; RUN: opt < %s -adce
+
+define i32 @rx_bitset_empty(i32 %size, i32* %set) {
+bb1:
+        %reg110 = load i32* %set                ; <i32> [#uses=2]
+        store i32 1, i32* %set
+        %cast112 = sext i32 %size to i64                ; <i64> [#uses=1]
+        %reg113 = add i64 %cast112, 31          ; <i64> [#uses=1]
+        %reg114 = lshr i64 %reg113, 5           ; <i64> [#uses=2]
+        %cast109 = trunc i64 %reg114 to i32             ; <i32> [#uses=1]
+        %reg129 = add i32 %cast109, -1          ; <i32> [#uses=1]
+        %reg114-idxcast = trunc i64 %reg114 to i32              ; <i32> [#uses=1]
+        %reg114-idxcast-offset = add i32 %reg114-idxcast, 1073741823            ; <i32> [#uses=1]
+        %reg114-idxcast-offset.upgrd.1 = zext i32 %reg114-idxcast-offset to i64         ; <i64> [#uses=1]
+        %reg124 = getelementptr i32* %set, i64 %reg114-idxcast-offset.upgrd.1           ; <i32*> [#uses=1]
+        %reg125 = load i32* %reg124             ; <i32> [#uses=1]
+        %cond232 = icmp ne i32 %reg125, 0               ; <i1> [#uses=1]
+        br i1 %cond232, label %bb3, label %bb2
+
+bb2:            ; preds = %bb2, %bb1
+        %cann-indvar = phi i32 [ 0, %bb1 ], [ %add1-indvar, %bb2 ]              ; <i32> [#uses=2]
+        %reg130-scale = mul i32 %cann-indvar, -1                ; <i32> [#uses=1]
+        %reg130 = add i32 %reg130-scale, %reg129                ; <i32> [#uses=1]
+        %add1-indvar = add i32 %cann-indvar, 1          ; <i32> [#uses=1]
+        %reg130-idxcast = bitcast i32 %reg130 to i32            ; <i32> [#uses=1]
+        %reg130-idxcast-offset = add i32 %reg130-idxcast, 1073741823            ; <i32> [#uses=1]
+        %reg130-idxcast-offset.upgrd.2 = zext i32 %reg130-idxcast-offset to i64         ; <i64> [#uses=1]
+        %reg118 = getelementptr i32* %set, i64 %reg130-idxcast-offset.upgrd.2           ; <i32*> [#uses=1]
+        %reg119 = load i32* %reg118             ; <i32> [#uses=1]
+        %cond233 = icmp eq i32 %reg119, 0               ; <i1> [#uses=1]
+        br i1 %cond233, label %bb2, label %bb3
+
+bb3:            ; preds = %bb2, %bb1
+        store i32 %reg110, i32* %set
+        %cast126 = zext i32 %reg110 to i64              ; <i64> [#uses=1]
+        %reg127 = add i64 %cast126, -1          ; <i64> [#uses=1]
+        %reg128 = lshr i64 %reg127, 63          ; <i64> [#uses=1]
+        %cast120 = trunc i64 %reg128 to i32             ; <i32> [#uses=1]
+        ret i32 %cast120
+}
+
diff --git a/final/test/Transforms/ADCE/2002-07-17-AssertionFailure.ll b/final/test/Transforms/ADCE/2002-07-17-AssertionFailure.ll
new file mode 100644
index 00000000000..8f8dadf7332
--- /dev/null
+++ b/final/test/Transforms/ADCE/2002-07-17-AssertionFailure.ll
@@ -0,0 +1,13 @@
+; This testcase fails because ADCE does not correctly delete the chain of 
+; three instructions that are dead here.  Ironically there were a dead basic
+; block in this function, it would work fine, but that would be the part we 
+; have to fix now, wouldn't it....
+;
+; RUN: opt < %s -adce
+
+define void @foo(i8* %reg5481) {
+        %cast611 = bitcast i8* %reg5481 to i8**         ; <i8**> [#uses=1]
+        %reg162 = load i8** %cast611            ; <i8*> [#uses=1]
+        ptrtoint i8* %reg162 to i32             ; <i32>:1 [#uses=0]
+        ret void
+}
diff --git a/final/test/Transforms/ADCE/2002-07-17-PHIAssertion.ll b/final/test/Transforms/ADCE/2002-07-17-PHIAssertion.ll
new file mode 100644
index 00000000000..2f0df670d6a
--- /dev/null
+++ b/final/test/Transforms/ADCE/2002-07-17-PHIAssertion.ll
@@ -0,0 +1,48 @@
+; This testcase was extracted from the gzip SPEC benchmark
+;
+; RUN: opt < %s -adce
+
+@bk = external global i32               ; <i32*> [#uses=2]
+@hufts = external global i32            ; <i32*> [#uses=1]
+
+define i32 @inflate() {
+bb0:
+        br label %bb2
+
+bb2:            ; preds = %bb6, %bb0
+        %reg128 = phi i32 [ %reg130, %bb6 ], [ 0, %bb0 ]                ; <i32> [#uses=2]
+        br i1 true, label %bb4, label %bb3
+
+bb3:            ; preds = %bb2
+        br label %UnifiedExitNode
+
+bb4:            ; preds = %bb2
+        %reg117 = load i32* @hufts              ; <i32> [#uses=2]
+        %cond241 = icmp ule i32 %reg117, %reg128                ; <i1> [#uses=1]
+        br i1 %cond241, label %bb6, label %bb5
+
+bb5:            ; preds = %bb4
+        br label %bb6
+
+bb6:            ; preds = %bb5, %bb4
+        %reg130 = phi i32 [ %reg117, %bb5 ], [ %reg128, %bb4 ]          ; <i32> [#uses=1]
+        br i1 false, label %bb2, label %bb7
+
+bb7:            ; preds = %bb6
+        %reg126 = load i32* @bk         ; <i32> [#uses=1]
+        %cond247 = icmp ule i32 %reg126, 7              ; <i1> [#uses=1]
+        br i1 %cond247, label %bb9, label %bb8
+
+bb8:            ; preds = %bb8, %bb7
+        %reg119 = load i32* @bk         ; <i32> [#uses=1]
+        %cond256 = icmp ugt i32 %reg119, 7              ; <i1> [#uses=1]
+        br i1 %cond256, label %bb8, label %bb9
+
+bb9:            ; preds = %bb8, %bb7
+        br label %UnifiedExitNode
+
+UnifiedExitNode:                ; preds = %bb9, %bb3
+        %UnifiedRetVal = phi i32 [ 7, %bb3 ], [ 0, %bb9 ]               ; <i32> [#uses=1]
+        ret i32 %UnifiedRetVal
+}
+
diff --git a/final/test/Transforms/ADCE/2002-07-29-Segfault.ll b/final/test/Transforms/ADCE/2002-07-29-Segfault.ll
new file mode 100644
index 00000000000..1c8e6e8adf0
--- /dev/null
+++ b/final/test/Transforms/ADCE/2002-07-29-Segfault.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -adce -disable-output
+
+define void @test() {
+        br label %BB3
+
+BB3:            ; preds = %BB3, %0
+        br label %BB3
+}
+
diff --git a/final/test/Transforms/ADCE/2003-01-22-PredecessorProblem.ll b/final/test/Transforms/ADCE/2003-01-22-PredecessorProblem.ll
new file mode 100644
index 00000000000..17003be9fb1
--- /dev/null
+++ b/final/test/Transforms/ADCE/2003-01-22-PredecessorProblem.ll
@@ -0,0 +1,25 @@
+; Testcase reduced from 197.parser by bugpoint
+; RUN: opt < %s -adce 
+
+define void @conjunction_prune() {
+; <label>:0
+        br label %bb19
+
+bb19:           ; preds = %bb23, %bb22, %0
+        %reg205 = phi i8* [ null, %bb22 ], [ null, %bb23 ], [ null, %0 ]                ; <i8*> [#uses=1]
+        br i1 false, label %bb21, label %bb22
+
+bb21:           ; preds = %bb19
+        %cast455 = bitcast i8* %reg205 to i8**          ; <i8**> [#uses=0]
+        br label %bb22
+
+bb22:           ; preds = %bb21, %bb19
+        br i1 false, label %bb19, label %bb23
+
+bb23:           ; preds = %bb22
+        br i1 false, label %bb19, label %bb28
+
+bb28:           ; preds = %bb23
+        ret void
+}
+
diff --git a/final/test/Transforms/ADCE/2003-04-25-PHIPostDominateProblem.ll b/final/test/Transforms/ADCE/2003-04-25-PHIPostDominateProblem.ll
new file mode 100644
index 00000000000..d30df19fc7c
--- /dev/null
+++ b/final/test/Transforms/ADCE/2003-04-25-PHIPostDominateProblem.ll
@@ -0,0 +1,35 @@
+; THis testcase caused an assertion failure because a PHI node did not have 
+; entries for it's postdominator.  But I think this can only happen when the 
+; PHI node is dead, so we just avoid patching up dead PHI nodes.
+
+; RUN: opt < %s -adce
+
+target datalayout = "e-p:32:32"
+
+define void @dead_test8() {
+entry:
+        br label %loopentry
+
+loopentry:              ; preds = %endif, %entry
+        %k.1 = phi i32 [ %k.0, %endif ], [ 0, %entry ]          ; <i32> [#uses=1]
+        br i1 false, label %no_exit, label %return
+
+no_exit:                ; preds = %loopentry
+        br i1 false, label %then, label %else
+
+then:           ; preds = %no_exit
+        br label %endif
+
+else:           ; preds = %no_exit
+        %dec = add i32 %k.1, -1         ; <i32> [#uses=1]
+        br label %endif
+
+endif:          ; preds = %else, %then
+        %k.0 = phi i32 [ %dec, %else ], [ 0, %then ]            ; <i32> [#uses=1]
+        store i32 2, i32* null
+        br label %loopentry
+
+return:         ; preds = %loopentry
+        ret void
+}
+
diff --git a/final/test/Transforms/ADCE/2003-06-11-InvalidCFG.ll b/final/test/Transforms/ADCE/2003-06-11-InvalidCFG.ll
new file mode 100644
index 00000000000..5206b243e37
--- /dev/null
+++ b/final/test/Transforms/ADCE/2003-06-11-InvalidCFG.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -adce -disable-output
+
+@G = external global i32*               ; <i32**> [#uses=1]
+
+declare void @Fn(i32*)
+
+define i32 @main(i32 %argc.1, i8** %argv.1) {
+entry:
+        br label %endif.42
+
+endif.42:               ; preds = %shortcirc_done.12, %then.66, %endif.42, %entry
+        br i1 false, label %endif.65, label %endif.42
+
+then.66:                ; preds = %shortcirc_done.12
+        call void @Fn( i32* %tmp.2846 )
+        br label %endif.42
+
+endif.65:               ; preds = %endif.42
+        %tmp.2846 = load i32** @G               ; <i32*> [#uses=1]
+        br i1 false, label %shortcirc_next.12, label %shortcirc_done.12
+
+shortcirc_next.12:              ; preds = %endif.65
+        br label %shortcirc_done.12
+
+shortcirc_done.12:              ; preds = %shortcirc_next.12, %endif.65
+        br i1 false, label %then.66, label %endif.42
+}
+
diff --git a/final/test/Transforms/ADCE/2003-06-24-BadSuccessor.ll b/final/test/Transforms/ADCE/2003-06-24-BadSuccessor.ll
new file mode 100644
index 00000000000..eb3ef1e7913
--- /dev/null
+++ b/final/test/Transforms/ADCE/2003-06-24-BadSuccessor.ll
@@ -0,0 +1,91 @@
+; RUN: opt < %s -adce -disable-output
+target datalayout = "e-p:32:32"
+	%struct..CppObjTypeDesc = type { i32, i16, i16 }
+	%struct..TypeToken = type { i32, i16, i16 }
+
+define i32 @C_ReFaxToDb() {
+entry:
+	br i1 false, label %endif.0, label %then.0
+
+then.0:		; preds = %entry
+	ret i32 0
+
+endif.0:		; preds = %entry
+	br i1 false, label %then.11, label %then.4
+
+then.4:		; preds = %endif.0
+	ret i32 0
+
+then.11:		; preds = %endif.0
+	br i1 false, label %loopentry.0, label %else.2
+
+loopentry.0:		; preds = %loopentry.1, %endif.14, %then.11
+	br i1 false, label %endif.14, label %loopexit.0
+
+endif.14:		; preds = %loopentry.0
+	br i1 false, label %loopentry.1, label %loopentry.0
+
+loopentry.1:		; preds = %then.53, %endif.14
+	%SubArrays.10 = phi i32* [ %SubArrays.8, %then.53 ], [ null, %endif.14 ]		; <i32*> [#uses=3]
+	br i1 false, label %no_exit.1, label %loopentry.0
+
+no_exit.1:		; preds = %loopentry.1
+	switch i32 0, label %label.17 [
+		 i32 2, label %label.11
+		 i32 19, label %label.10
+	]
+
+label.10:		; preds = %no_exit.1
+	br i1 false, label %then.43, label %endif.43
+
+then.43:		; preds = %label.10
+	br i1 false, label %then.44, label %endif.44
+
+then.44:		; preds = %then.43
+	br i1 false, label %shortcirc_next.4, label %endif.45
+
+shortcirc_next.4:		; preds = %then.44
+	br i1 false, label %no_exit.2, label %loopexit.2
+
+no_exit.2:		; preds = %shortcirc_next.4
+	%tmp.897 = getelementptr i32* %SubArrays.10, i64 0		; <i32*> [#uses=1]
+	%tmp.899 = load i32* %tmp.897		; <i32> [#uses=1]
+	store i32 %tmp.899, i32* null
+	ret i32 0
+
+loopexit.2:		; preds = %shortcirc_next.4
+	ret i32 0
+
+endif.45:		; preds = %then.44
+	ret i32 0
+
+endif.44:		; preds = %then.43
+	ret i32 0
+
+endif.43:		; preds = %label.10
+	ret i32 0
+
+label.11:		; preds = %no_exit.1
+	ret i32 0
+
+label.17:		; preds = %no_exit.1
+	br i1 false, label %then.53, label %shortcirc_next.7
+
+shortcirc_next.7:		; preds = %label.17
+	br i1 false, label %then.53, label %shortcirc_next.8
+
+shortcirc_next.8:		; preds = %shortcirc_next.7
+	ret i32 0
+
+then.53:		; preds = %shortcirc_next.7, %label.17
+	%SubArrays.8 = phi i32* [ %SubArrays.10, %shortcirc_next.7 ], [ %SubArrays.10, %label.17 ]		; <i32*> [#uses=1]
+	%tmp.1023 = load i32* null		; <i32> [#uses=1]
+	switch i32 %tmp.1023, label %loopentry.1 [
+	]
+
+loopexit.0:		; preds = %loopentry.0
+	ret i32 0
+
+else.2:		; preds = %then.11
+	ret i32 0
+}
diff --git a/final/test/Transforms/ADCE/2003-06-24-BasicFunctionality.ll b/final/test/Transforms/ADCE/2003-06-24-BasicFunctionality.ll
new file mode 100644
index 00000000000..82fa5b2a40e
--- /dev/null
+++ b/final/test/Transforms/ADCE/2003-06-24-BasicFunctionality.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -adce -simplifycfg -S | not grep then:
+
+define void @dead_test8(i32* %data.1, i32 %idx.1) {
+entry:
+        %tmp.1 = load i32* %data.1              ; <i32> [#uses=2]
+        %tmp.41 = icmp sgt i32 %tmp.1, 0                ; <i1> [#uses=1]
+        br i1 %tmp.41, label %no_exit.preheader, label %return
+
+no_exit.preheader:              ; preds = %entry
+        %tmp.11 = getelementptr i32* %data.1, i64 1             ; <i32*> [#uses=1]
+        %tmp.22-idxcast = sext i32 %idx.1 to i64                ; <i64> [#uses=1]
+        %tmp.28 = getelementptr i32* %data.1, i64 %tmp.22-idxcast               ; <i32*> [#uses=1]
+        br label %no_exit
+
+no_exit:                ; preds = %endif, %no_exit.preheader
+        %k.1 = phi i32 [ %k.0, %endif ], [ 0, %no_exit.preheader ]              ; <i32> [#uses=3]
+        %i.0 = phi i32 [ %inc.1, %endif ], [ 0, %no_exit.preheader ]            ; <i32> [#uses=1]
+        %tmp.12 = load i32* %tmp.11             ; <i32> [#uses=1]
+        %tmp.14 = sub i32 0, %tmp.12            ; <i32> [#uses=1]
+        %tmp.161 = icmp ne i32 %k.1, %tmp.14            ; <i1> [#uses=1]
+        br i1 %tmp.161, label %then, label %else
+
+then:           ; preds = %no_exit
+        %inc.0 = add i32 %k.1, 1                ; <i32> [#uses=1]
+        br label %endif
+
+else:           ; preds = %no_exit
+        %dec = add i32 %k.1, -1         ; <i32> [#uses=1]
+        br label %endif
+
+endif:          ; preds = %else, %then
+        %k.0 = phi i32 [ %dec, %else ], [ %inc.0, %then ]               ; <i32> [#uses=1]
+        store i32 2, i32* %tmp.28
+        %inc.1 = add i32 %i.0, 1                ; <i32> [#uses=2]
+        %tmp.4 = icmp slt i32 %inc.1, %tmp.1            ; <i1> [#uses=1]
+        br i1 %tmp.4, label %no_exit, label %return
+
+return:         ; preds = %endif, %entry
+        ret void
+}
+
diff --git a/final/test/Transforms/ADCE/2003-09-10-UnwindInstFail.ll b/final/test/Transforms/ADCE/2003-09-10-UnwindInstFail.ll
new file mode 100644
index 00000000000..444ca8ec904
--- /dev/null
+++ b/final/test/Transforms/ADCE/2003-09-10-UnwindInstFail.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -adce -disable-output
+
+define void @test() {
+        br i1 false, label %then, label %endif
+
+then:           ; preds = %0
+        invoke void null( i8* null )
+                        to label %invoke_cont unwind label %invoke_catch
+
+invoke_catch:           ; preds = %then
+        unwind
+
+invoke_cont:            ; preds = %then
+        ret void
+
+endif:          ; preds = %0
+        ret void
+}
+
diff --git a/final/test/Transforms/ADCE/2003-09-15-InfLoopCrash.ll b/final/test/Transforms/ADCE/2003-09-15-InfLoopCrash.ll
new file mode 100644
index 00000000000..499ac515e44
--- /dev/null
+++ b/final/test/Transforms/ADCE/2003-09-15-InfLoopCrash.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -adce -disable-output
+
+define i32 @main() {
+        br label %loop
+
+loop:           ; preds = %loop, %0
+        br label %loop
+}
+
diff --git a/final/test/Transforms/ADCE/2003-11-16-MissingPostDominanceInfo.ll b/final/test/Transforms/ADCE/2003-11-16-MissingPostDominanceInfo.ll
new file mode 100644
index 00000000000..5ba1a2eadfc
--- /dev/null
+++ b/final/test/Transforms/ADCE/2003-11-16-MissingPostDominanceInfo.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -adce -simplifycfg -S | grep call
+declare void @exit(i32)
+
+define i32 @main(i32 %argc) {
+        %C = icmp eq i32 %argc, 1               ; <i1> [#uses=2]
+        br i1 %C, label %Cond, label %Done
+
+Cond:           ; preds = %0
+        br i1 %C, label %Loop, label %Done
+
+Loop:           ; preds = %Loop, %Cond
+        call void @exit( i32 0 )
+        br label %Loop
+
+Done:           ; preds = %Cond, %0
+        ret i32 1
+}
+
diff --git a/final/test/Transforms/ADCE/2004-05-04-UnreachableBlock.ll b/final/test/Transforms/ADCE/2004-05-04-UnreachableBlock.ll
new file mode 100644
index 00000000000..a6a41fd69ef
--- /dev/null
+++ b/final/test/Transforms/ADCE/2004-05-04-UnreachableBlock.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -adce -disable-output
+
+define void @test() {
+entry:
+        br label %UnifiedReturnBlock
+
+UnifiedReturnBlock:             ; preds = %invoke_catch.0, %entry
+        ret void
+
+invoke_catch.0:         ; No predecessors!
+        br i1 false, label %UnifiedUnwindBlock, label %UnifiedReturnBlock
+
+UnifiedUnwindBlock:             ; preds = %invoke_catch.0
+        unwind
+}
+
diff --git a/final/test/Transforms/ADCE/2005-02-17-PHI-Invoke-Crash.ll b/final/test/Transforms/ADCE/2005-02-17-PHI-Invoke-Crash.ll
new file mode 100644
index 00000000000..991e876a25b
--- /dev/null
+++ b/final/test/Transforms/ADCE/2005-02-17-PHI-Invoke-Crash.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -adce -disable-output
+
+declare void @strlen()
+
+declare void @_ZN10QByteArray6resizeEi()
+
+declare void @q_atomic_decrement()
+
+define void @_ZNK10QByteArray13leftJustifiedEicb() {
+entry:
+        invoke void @strlen( )
+                        to label %tmp.3.i.noexc unwind label %invoke_catch.0
+
+tmp.3.i.noexc:          ; preds = %entry
+        br i1 false, label %then.0, label %else.0
+
+invoke_catch.0:         ; preds = %entry
+        invoke void @q_atomic_decrement( )
+                        to label %tmp.1.i.i183.noexc unwind label %terminate
+
+tmp.1.i.i183.noexc:             ; preds = %invoke_catch.0
+        unwind
+
+then.0:         ; preds = %tmp.3.i.noexc
+        invoke void @_ZN10QByteArray6resizeEi( )
+                        to label %invoke_cont.1 unwind label %invoke_catch.1
+
+invoke_catch.1:         ; preds = %then.0
+        invoke void @q_atomic_decrement( )
+                        to label %tmp.1.i.i162.noexc unwind label %terminate
+
+tmp.1.i.i162.noexc:             ; preds = %invoke_catch.1
+        ret void
+
+invoke_cont.1:          ; preds = %then.0
+        ret void
+
+else.0:         ; preds = %tmp.3.i.noexc
+        ret void
+
+terminate:              ; preds = %invoke_catch.1, %invoke_catch.0
+        %dbg.0.1 = phi {  }* [ null, %invoke_catch.1 ], [ null, %invoke_catch.0 ]               ; <{  }*> [#uses=0]
+        unreachable
+}
+
diff --git a/final/test/Transforms/ADCE/basictest.ll b/final/test/Transforms/ADCE/basictest.ll
new file mode 100644
index 00000000000..378d70288f3
--- /dev/null
+++ b/final/test/Transforms/ADCE/basictest.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -adce -simplifycfg | llvm-dis
+
+define i32 @Test(i32 %A, i32 %B) {
+BB1:
+        br label %BB4
+
+BB2:            ; No predecessors!
+        br label %BB3
+
+BB3:            ; preds = %BB4, %BB2
+        %ret = phi i32 [ %X, %BB4 ], [ %B, %BB2 ]               ; <i32> [#uses=1]
+        ret i32 %ret
+
+BB4:            ; preds = %BB1
+        %X = phi i32 [ %A, %BB1 ]               ; <i32> [#uses=1]
+        br label %BB3
+}
+
+
diff --git a/final/test/Transforms/ADCE/basictest1.ll b/final/test/Transforms/ADCE/basictest1.ll
new file mode 100644
index 00000000000..bbb88783deb
--- /dev/null
+++ b/final/test/Transforms/ADCE/basictest1.ll
@@ -0,0 +1,97 @@
+; RUN: opt < %s -adce -simplifycfg | llvm-dis	
+%FILE = type { i32, i8*, i8*, i8, i8, i32, i32, i32 }
+	%spec_fd_t = type { i32, i32, i32, i8* }
+@__iob = external global [20 x %FILE]		; <[20 x %FILE]*> [#uses=1]
+@dbglvl = global i32 4		; <i32*> [#uses=3]
+@spec_fd = external global [3 x %spec_fd_t]		; <[3 x %spec_fd_t]*> [#uses=4]
+@.LC9 = internal global [34 x i8] c"spec_read: fd=%d, > MAX_SPEC_FD!\0A\00"		; <[34 x i8]*> [#uses=1]
+@.LC10 = internal global [4 x i8] c"EOF\00"		; <[4 x i8]*> [#uses=1]
+@.LC11 = internal global [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+@.LC12 = internal global [17 x i8] c"spec_getc: %d = \00"		; <[17 x i8]*> [#uses=1]
+
+declare i32 @fprintf(%FILE*, i8*, ...)
+
+declare void @exit(i32)
+
+declare i32 @remove(i8*)
+
+declare i32 @fputc(i32, %FILE*)
+
+declare i32 @fwrite(i8*, i32, i32, %FILE*)
+
+declare void @perror(i8*)
+
+define i32 @spec_getc(i32 %fd) {
+	%reg109 = load i32* @dbglvl		; <i32> [#uses=1]
+	%cond266 = icmp sle i32 %reg109, 4		; <i1> [#uses=1]
+	br i1 %cond266, label %bb3, label %bb2
+
+bb2:		; preds = %0
+	%cast273 = getelementptr [17 x i8]* @.LC12, i64 0, i64 0		; <i8*> [#uses=0]
+	br label %bb3
+
+bb3:		; preds = %bb2, %0
+	%cond267 = icmp sle i32 %fd, 3		; <i1> [#uses=1]
+	br i1 %cond267, label %bb5, label %bb4
+
+bb4:		; preds = %bb3
+	%reg111 = getelementptr [20 x %FILE]* @__iob, i64 0, i64 1, i32 3		; <i8*> [#uses=1]
+	%cast274 = getelementptr [34 x i8]* @.LC9, i64 0, i64 0		; <i8*> [#uses=0]
+	%cast282 = bitcast i8* %reg111 to %FILE*		; <%FILE*> [#uses=0]
+	call void @exit( i32 1 )
+	br label %UnifiedExitNode
+
+bb5:		; preds = %bb3
+	%reg107-idxcast1 = sext i32 %fd to i64		; <i64> [#uses=2]
+	%reg107-idxcast2 = sext i32 %fd to i64		; <i64> [#uses=1]
+	%reg1311 = getelementptr [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast2		; <%spec_fd_t*> [#uses=1]
+	%idx1 = getelementptr [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast1, i32 2		; <i32*> [#uses=1]
+	%reg1321 = load i32* %idx1		; <i32> [#uses=3]
+	%idx2 = getelementptr %spec_fd_t* %reg1311, i64 0, i32 1		; <i32*> [#uses=1]
+	%reg1331 = load i32* %idx2		; <i32> [#uses=1]
+	%cond270 = icmp slt i32 %reg1321, %reg1331		; <i1> [#uses=1]
+	br i1 %cond270, label %bb9, label %bb6
+
+bb6:		; preds = %bb5
+	%reg134 = load i32* @dbglvl		; <i32> [#uses=1]
+	%cond271 = icmp sle i32 %reg134, 4		; <i1> [#uses=1]
+	br i1 %cond271, label %bb8, label %bb7
+
+bb7:		; preds = %bb6
+	%cast277 = getelementptr [4 x i8]* @.LC10, i64 0, i64 0		; <i8*> [#uses=0]
+	br label %bb8
+
+bb8:		; preds = %bb7, %bb6
+	br label %UnifiedExitNode
+
+bb9:		; preds = %bb5
+	%reg107-idxcast3 = sext i32 %fd to i64		; <i64> [#uses=1]
+	%idx3 = getelementptr [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast3, i32 3		; <i8**> [#uses=1]
+	%reg1601 = load i8** %idx3		; <i8*> [#uses=1]
+	%reg132-idxcast1 = sext i32 %reg1321 to i64		; <i64> [#uses=1]
+	%idx4 = getelementptr i8* %reg1601, i64 %reg132-idxcast1		; <i8*> [#uses=1]
+	%reg1621 = load i8* %idx4		; <i8> [#uses=2]
+	%cast108 = zext i8 %reg1621 to i64		; <i64> [#uses=0]
+	%reg157 = add i32 %reg1321, 1		; <i32> [#uses=1]
+	%idx5 = getelementptr [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast1, i32 2		; <i32*> [#uses=1]
+	store i32 %reg157, i32* %idx5
+	%reg163 = load i32* @dbglvl		; <i32> [#uses=1]
+	%cond272 = icmp sle i32 %reg163, 4		; <i1> [#uses=1]
+	br i1 %cond272, label %bb11, label %bb10
+
+bb10:		; preds = %bb9
+	%cast279 = getelementptr [4 x i8]* @.LC11, i64 0, i64 0		; <i8*> [#uses=0]
+	br label %bb11
+
+bb11:		; preds = %bb10, %bb9
+	%cast291 = zext i8 %reg1621 to i32		; <i32> [#uses=1]
+	br label %UnifiedExitNode
+
+UnifiedExitNode:		; preds = %bb11, %bb8, %bb4
+	%UnifiedRetVal = phi i32 [ 42, %bb4 ], [ -1, %bb8 ], [ %cast291, %bb11 ]		; <i32> [#uses=1]
+	ret i32 %UnifiedRetVal
+}
+
+declare i32 @puts(i8*)
+
+declare i32 @printf(i8*, ...)
diff --git a/final/test/Transforms/ADCE/basictest2.ll b/final/test/Transforms/ADCE/basictest2.ll
new file mode 100644
index 00000000000..a17795f1424
--- /dev/null
+++ b/final/test/Transforms/ADCE/basictest2.ll
@@ -0,0 +1,97 @@
+; RUN: opt < %s -adce -simplifycfg | llvm-dis
+	%FILE = type { i32, i8*, i8*, i8, i8, i32, i32, i32 }
+	%spec_fd_t = type { i32, i32, i32, i8* }
+@__iob = external global [20 x %FILE]		; <[20 x %FILE]*> [#uses=1]
+@dbglvl = global i32 4		; <i32*> [#uses=3]
+@spec_fd = external global [3 x %spec_fd_t]		; <[3 x %spec_fd_t]*> [#uses=4]
+@.LC9 = internal global [34 x i8] c"spec_read: fd=%d, > MAX_SPEC_FD!\0A\00"		; <[34 x i8]*> [#uses=1]
+@.LC10 = internal global [4 x i8] c"EOF\00"		; <[4 x i8]*> [#uses=1]
+@.LC11 = internal global [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+@.LC12 = internal global [17 x i8] c"spec_getc: %d = \00"		; <[17 x i8]*> [#uses=1]
+
+declare i32 @fprintf(%FILE*, i8*, ...)
+
+declare void @exit(i32)
+
+declare i32 @remove(i8*)
+
+declare i32 @fputc(i32, %FILE*)
+
+declare i32 @fwrite(i8*, i32, i32, %FILE*)
+
+declare void @perror(i8*)
+
+define i32 @spec_getc(i32 %fd) {
+	%reg109 = load i32* @dbglvl		; <i32> [#uses=1]
+	%cond266 = icmp sle i32 %reg109, 4		; <i1> [#uses=1]
+	br i1 %cond266, label %bb3, label %bb2
+
+bb2:		; preds = %0
+	%cast273 = getelementptr [17 x i8]* @.LC12, i64 0, i64 0		; <i8*> [#uses=0]
+	br label %bb3
+
+bb3:		; preds = %bb2, %0
+	%cond267 = icmp sle i32 %fd, 3		; <i1> [#uses=0]
+	br label %bb5
+
+bb4:		; No predecessors!
+	%reg111 = getelementptr [20 x %FILE]* @__iob, i64 0, i64 1, i32 3		; <i8*> [#uses=1]
+	%cast274 = getelementptr [34 x i8]* @.LC9, i64 0, i64 0		; <i8*> [#uses=0]
+	%cast282 = bitcast i8* %reg111 to %FILE*		; <%FILE*> [#uses=0]
+	call void @exit( i32 1 )
+	br label %UnifiedExitNode
+
+bb5:		; preds = %bb3
+	%reg107-idxcast1 = sext i32 %fd to i64		; <i64> [#uses=2]
+	%reg107-idxcast2 = sext i32 %fd to i64		; <i64> [#uses=1]
+	%reg1311 = getelementptr [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast2		; <%spec_fd_t*> [#uses=1]
+	%idx1 = getelementptr [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast1, i32 2		; <i32*> [#uses=1]
+	%reg1321 = load i32* %idx1		; <i32> [#uses=3]
+	%idx2 = getelementptr %spec_fd_t* %reg1311, i64 0, i32 1		; <i32*> [#uses=1]
+	%reg1331 = load i32* %idx2		; <i32> [#uses=1]
+	%cond270 = icmp slt i32 %reg1321, %reg1331		; <i1> [#uses=1]
+	br i1 %cond270, label %bb9, label %bb6
+
+bb6:		; preds = %bb5
+	%reg134 = load i32* @dbglvl		; <i32> [#uses=1]
+	%cond271 = icmp sle i32 %reg134, 4		; <i1> [#uses=1]
+	br i1 %cond271, label %bb8, label %bb7
+
+bb7:		; preds = %bb6
+	%cast277 = getelementptr [4 x i8]* @.LC10, i64 0, i64 0		; <i8*> [#uses=0]
+	br label %bb8
+
+bb8:		; preds = %bb7, %bb6
+	br label %UnifiedExitNode
+
+bb9:		; preds = %bb5
+	%reg107-idxcast3 = sext i32 %fd to i64		; <i64> [#uses=1]
+	%idx3 = getelementptr [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast3, i32 3		; <i8**> [#uses=1]
+	%reg1601 = load i8** %idx3		; <i8*> [#uses=1]
+	%reg132-idxcast1 = sext i32 %reg1321 to i64		; <i64> [#uses=1]
+	%idx4 = getelementptr i8* %reg1601, i64 %reg132-idxcast1		; <i8*> [#uses=1]
+	%reg1621 = load i8* %idx4		; <i8> [#uses=2]
+	%cast108 = zext i8 %reg1621 to i64		; <i64> [#uses=0]
+	%reg157 = add i32 %reg1321, 1		; <i32> [#uses=1]
+	%idx5 = getelementptr [3 x %spec_fd_t]* @spec_fd, i64 0, i64 %reg107-idxcast1, i32 2		; <i32*> [#uses=1]
+	store i32 %reg157, i32* %idx5
+	%reg163 = load i32* @dbglvl		; <i32> [#uses=1]
+	%cond272 = icmp sle i32 %reg163, 4		; <i1> [#uses=1]
+	br i1 %cond272, label %bb11, label %bb10
+
+bb10:		; preds = %bb9
+	%cast279 = getelementptr [4 x i8]* @.LC11, i64 0, i64 0		; <i8*> [#uses=0]
+	br label %bb11
+
+bb11:		; preds = %bb10, %bb9
+	%cast291 = zext i8 %reg1621 to i32		; <i32> [#uses=1]
+	br label %UnifiedExitNode
+
+UnifiedExitNode:		; preds = %bb11, %bb8, %bb4
+	%UnifiedRetVal = phi i32 [ 42, %bb4 ], [ -1, %bb8 ], [ %cast291, %bb11 ]		; <i32> [#uses=1]
+	ret i32 %UnifiedRetVal
+}
+
+declare i32 @puts(i8*)
+
+declare i32 @printf(i8*, ...)
diff --git a/final/test/Transforms/ADCE/dce_pure_call.ll b/final/test/Transforms/ADCE/dce_pure_call.ll
new file mode 100644
index 00000000000..66483abbc91
--- /dev/null
+++ b/final/test/Transforms/ADCE/dce_pure_call.ll
@@ -0,0 +1,8 @@
+; RUN: opt -adce -S < %s | not grep call
+
+declare i32 @strlen(i8*) readonly nounwind
+
+define void @test() {
+	call i32 @strlen( i8* null )		; <i32>:1 [#uses=0]
+	ret void
+}
diff --git a/final/test/Transforms/ADCE/dce_pure_invoke.ll b/final/test/Transforms/ADCE/dce_pure_invoke.ll
new file mode 100644
index 00000000000..c16d45cc239
--- /dev/null
+++ b/final/test/Transforms/ADCE/dce_pure_invoke.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -adce -S | grep null
+
+declare i32 @strlen(i8*) readnone
+
+define i32 @test() {
+	; invoke of pure function should not be deleted!
+	invoke i32 @strlen( i8* null ) readnone
+			to label %Cont unwind label %Other		; <i32>:1 [#uses=0]
+
+Cont:		; preds = %0
+	ret i32 0
+
+Other:		; preds = %0
+	ret i32 1
+}
diff --git a/final/test/Transforms/ADCE/dg.exp b/final/test/Transforms/ADCE/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/ADCE/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/ADCE/unreachable-function.ll b/final/test/Transforms/ADCE/unreachable-function.ll
new file mode 100644
index 00000000000..7c6a30ec6b6
--- /dev/null
+++ b/final/test/Transforms/ADCE/unreachable-function.ll
@@ -0,0 +1,5 @@
+; RUN: opt < %s -adce -disable-output
+
+define void @test() {
+	unreachable
+}
diff --git a/final/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/final/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
new file mode 100644
index 00000000000..e740b29f928
--- /dev/null
+++ b/final/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -argpromotion -S | grep nounwind | count 2
+
+define internal i32 @deref(i32* %x) nounwind {
+entry:
+	%tmp2 = load i32* %x, align 4		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @f(i32 %x) {
+entry:
+	%x_addr = alloca i32		; <i32*> [#uses=2]
+	store i32 %x, i32* %x_addr, align 4
+	%tmp1 = call i32 @deref( i32* %x_addr ) nounwind 		; <i32> [#uses=1]
+	ret i32 %tmp1
+}
diff --git a/final/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll b/final/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
new file mode 100644
index 00000000000..d7d5eb548a1
--- /dev/null
+++ b/final/test/Transforms/ArgumentPromotion/2008-07-02-array-indexing.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -argpromotion -S > %t
+; RUN: cat %t | grep {define.*@callee(.*i32\\*}
+; PR2498
+
+; This test tries to convince argpromotion about promoting the load from %A + 2,
+; because there is a load of %A in the entry block
+define internal i32 @callee(i1 %C, i32* %A) {
+entry:
+        ; Unconditonally load the element at %A
+        %A.0 = load i32* %A
+        br i1 %C, label %T, label %F
+T:
+        ret i32 %A.0
+F:
+        ; Load the element at offset two from %A. This should not be promoted!
+        %A.2 = getelementptr i32* %A, i32 2
+        %R = load i32* %A.2
+        ret i32 %R
+}
+
+define i32 @foo() {
+        %X = call i32 @callee(i1 false, i32* null)             ; <i32> [#uses=1]
+        ret i32 %X
+}
+
diff --git a/final/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll b/final/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll
new file mode 100644
index 00000000000..7ee6654ea46
--- /dev/null
+++ b/final/test/Transforms/ArgumentPromotion/2008-09-07-CGUpdate.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -inline -argpromotion -disable-output
+
+define internal fastcc i32 @hash(i32* %ts, i32 %mod) nounwind {
+entry:
+	unreachable
+}
+
+define void @encode(i32* %m, i32* %ts, i32* %new) nounwind {
+entry:
+	%0 = call fastcc i32 @hash( i32* %ts, i32 0 ) nounwind		; <i32> [#uses=0]
+	unreachable
+}
diff --git a/final/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll b/final/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll
new file mode 100644
index 00000000000..aff917c6a5a
--- /dev/null
+++ b/final/test/Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -argpromotion -disable-output
+
+define internal fastcc i32 @term_SharingList(i32* %Term, i32* %List) nounwind {
+entry:
+	br i1 false, label %bb, label %bb5
+
+bb:		; preds = %entry
+	%0 = call fastcc i32 @term_SharingList( i32* null, i32* %List ) nounwind		; <i32> [#uses=0]
+	unreachable
+
+bb5:		; preds = %entry
+	ret i32 0
+}
+
+define i32 @term_Sharing(i32* %Term) nounwind {
+entry:
+	br i1 false, label %bb.i, label %bb14
+
+bb.i:		; preds = %entry
+	%0 = call fastcc i32 @term_SharingList( i32* null, i32* null ) nounwind		; <i32> [#uses=0]
+	ret i32 1
+
+bb14:		; preds = %entry
+	ret i32 0
+}
diff --git a/final/test/Transforms/ArgumentPromotion/aggregate-promote.ll b/final/test/Transforms/ArgumentPromotion/aggregate-promote.ll
new file mode 100644
index 00000000000..12de5117273
--- /dev/null
+++ b/final/test/Transforms/ArgumentPromotion/aggregate-promote.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -argpromotion -instcombine -S | not grep load
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+%QuadTy = type { i32, i32, i32, i32 }
+@G = constant %QuadTy {
+    i32 0, 
+    i32 0, 
+    i32 17, 
+    i32 25 }            ; <%QuadTy*> [#uses=1]
+
+define internal i32 @test(%QuadTy* %P) {
+        %A = getelementptr %QuadTy* %P, i64 0, i32 3            ; <i32*> [#uses=1]
+        %B = getelementptr %QuadTy* %P, i64 0, i32 2            ; <i32*> [#uses=1]
+        %a = load i32* %A               ; <i32> [#uses=1]
+        %b = load i32* %B               ; <i32> [#uses=1]
+        %V = add i32 %a, %b             ; <i32> [#uses=1]
+        ret i32 %V
+}
+
+define i32 @caller() {
+        %V = call i32 @test( %QuadTy* @G )              ; <i32> [#uses=1]
+        ret i32 %V
+}
+
diff --git a/final/test/Transforms/ArgumentPromotion/attrs.ll b/final/test/Transforms/ArgumentPromotion/attrs.ll
new file mode 100644
index 00000000000..49c07502995
--- /dev/null
+++ b/final/test/Transforms/ArgumentPromotion/attrs.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -argpromotion -S | grep zeroext
+
+	%struct.ss = type { i32, i64 }
+
+define internal void @f(%struct.ss* byval  %b, i32* byval %X, i32 %i) nounwind  {
+entry:
+	%tmp = getelementptr %struct.ss* %b, i32 0, i32 0
+	%tmp1 = load i32* %tmp, align 4
+	%tmp2 = add i32 %tmp1, 1	
+	store i32 %tmp2, i32* %tmp, align 4
+
+	store i32 0, i32* %X
+	ret void
+}
+
+define i32 @test(i32* %X) {
+entry:
+	%S = alloca %struct.ss		; <%struct.ss*> [#uses=4]
+	%tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp1, align 8
+	%tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1		; <i64*> [#uses=1]
+	store i64 2, i64* %tmp4, align 4
+	call void @f( %struct.ss* byval %S, i32* byval %X, i32 zeroext 0) 
+	ret i32 0
+}
diff --git a/final/test/Transforms/ArgumentPromotion/basictest.ll b/final/test/Transforms/ArgumentPromotion/basictest.ll
new file mode 100644
index 00000000000..d3d21fcabee
--- /dev/null
+++ b/final/test/Transforms/ArgumentPromotion/basictest.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -basicaa -argpromotion -mem2reg -S | not grep alloca
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+define internal i32 @test(i32* %X, i32* %Y) {
+        %A = load i32* %X               ; <i32> [#uses=1]
+        %B = load i32* %Y               ; <i32> [#uses=1]
+        %C = add i32 %A, %B             ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define internal i32 @caller(i32* %B) {
+        %A = alloca i32         ; <i32*> [#uses=2]
+        store i32 1, i32* %A
+        %C = call i32 @test( i32* %A, i32* %B )         ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i32 @callercaller() {
+        %B = alloca i32         ; <i32*> [#uses=2]
+        store i32 2, i32* %B
+        %X = call i32 @caller( i32* %B )                ; <i32> [#uses=1]
+        ret i32 %X
+}
+
diff --git a/final/test/Transforms/ArgumentPromotion/byval-2.ll b/final/test/Transforms/ArgumentPromotion/byval-2.ll
new file mode 100644
index 00000000000..bd62c6835f3
--- /dev/null
+++ b/final/test/Transforms/ArgumentPromotion/byval-2.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -argpromotion -S | grep -F {i32* byval} | count 2
+; Argpromote + scalarrepl should change this to passing the two integers by value.
+
+	%struct.ss = type { i32, i64 }
+
+define internal void @f(%struct.ss* byval  %b, i32* byval %X) nounwind  {
+entry:
+	%tmp = getelementptr %struct.ss* %b, i32 0, i32 0
+	%tmp1 = load i32* %tmp, align 4
+	%tmp2 = add i32 %tmp1, 1	
+	store i32 %tmp2, i32* %tmp, align 4
+
+	store i32 0, i32* %X
+	ret void
+}
+
+define i32 @test(i32* %X) {
+entry:
+	%S = alloca %struct.ss		; <%struct.ss*> [#uses=4]
+	%tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp1, align 8
+	%tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1		; <i64*> [#uses=1]
+	store i64 2, i64* %tmp4, align 4
+	call void @f( %struct.ss* byval %S, i32* byval %X) 
+	ret i32 0
+}
diff --git a/final/test/Transforms/ArgumentPromotion/byval.ll b/final/test/Transforms/ArgumentPromotion/byval.ll
new file mode 100644
index 00000000000..44b26fc2f30
--- /dev/null
+++ b/final/test/Transforms/ArgumentPromotion/byval.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -argpromotion -scalarrepl -S | not grep load
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+; Argpromote + scalarrepl should change this to passing the two integers by value.
+
+	%struct.ss = type { i32, i64 }
+
+define internal void @f(%struct.ss* byval  %b) nounwind  {
+entry:
+	%tmp = getelementptr %struct.ss* %b, i32 0, i32 0		; <i32*> [#uses=2]
+	%tmp1 = load i32* %tmp, align 4		; <i32> [#uses=1]
+	%tmp2 = add i32 %tmp1, 1		; <i32> [#uses=1]
+	store i32 %tmp2, i32* %tmp, align 4
+	ret void
+}
+
+define i32 @main() nounwind  {
+entry:
+	%S = alloca %struct.ss		; <%struct.ss*> [#uses=4]
+	%tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp1, align 8
+	%tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1		; <i64*> [#uses=1]
+	store i64 2, i64* %tmp4, align 4
+	call void @f( %struct.ss* byval  %S ) nounwind 
+	ret i32 0
+}
diff --git a/final/test/Transforms/ArgumentPromotion/callgraph-update.ll b/final/test/Transforms/ArgumentPromotion/callgraph-update.ll
new file mode 100644
index 00000000000..989043d7ea5
--- /dev/null
+++ b/final/test/Transforms/ArgumentPromotion/callgraph-update.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -argpromotion -simplifycfg -constmerge | llvm-dis
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+%struct.VEC2 = type { double, double, double }
+%struct.VERTEX = type { %struct.VEC2, %struct.VERTEX*, %struct.VERTEX* }
+%struct.edge_rec = type { %struct.VERTEX*, %struct.edge_rec*, i32, i8* }
+
+declare %struct.edge_rec* @alloc_edge() nounwind ssp
+
+define i64 @build_delaunay(%struct.VERTEX* %tree, %struct.VERTEX* %extra) nounwind ssp {
+entry:
+  br i1 undef, label %bb11, label %bb12
+
+bb11:                                             ; preds = %bb10
+  %a = call %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=0]
+  ret i64 123
+
+bb12:                                             ; preds = %bb10
+  %b = call %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=1]
+  %c = ptrtoint %struct.edge_rec* %b to i64
+  ret i64 %c
+}
diff --git a/final/test/Transforms/ArgumentPromotion/chained.ll b/final/test/Transforms/ArgumentPromotion/chained.ll
new file mode 100644
index 00000000000..c9a453899d7
--- /dev/null
+++ b/final/test/Transforms/ArgumentPromotion/chained.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -argpromotion -instcombine -S | not grep load
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+@G1 = constant i32 0            ; <i32*> [#uses=1]
+@G2 = constant i32* @G1         ; <i32**> [#uses=1]
+
+define internal i32 @test(i32** %X) {
+        %Y = load i32** %X              ; <i32*> [#uses=1]
+        %X.upgrd.1 = load i32* %Y               ; <i32> [#uses=1]
+        ret i32 %X.upgrd.1
+}
+
+define i32 @caller(i32** %P) {
+        %X = call i32 @test( i32** @G2 )                ; <i32> [#uses=1]
+        ret i32 %X
+}
+
diff --git a/final/test/Transforms/ArgumentPromotion/control-flow.ll b/final/test/Transforms/ArgumentPromotion/control-flow.ll
new file mode 100644
index 00000000000..08ca6bccd63
--- /dev/null
+++ b/final/test/Transforms/ArgumentPromotion/control-flow.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -argpromotion -S | \
+; RUN:    not grep {load i32\* null}
+
+define internal i32 @callee(i1 %C, i32* %P) {
+        br i1 %C, label %T, label %F
+
+T:              ; preds = %0
+        ret i32 17
+
+F:              ; preds = %0
+        %X = load i32* %P               ; <i32> [#uses=1]
+        ret i32 %X
+}
+
+define i32 @foo() {
+        %X = call i32 @callee( i1 true, i32* null )             ; <i32> [#uses=1]
+        ret i32 %X
+}
+
diff --git a/final/test/Transforms/ArgumentPromotion/control-flow2.ll b/final/test/Transforms/ArgumentPromotion/control-flow2.ll
new file mode 100644
index 00000000000..9a8afc32a89
--- /dev/null
+++ b/final/test/Transforms/ArgumentPromotion/control-flow2.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -argpromotion -S | \
+; RUN:   grep {load i32\\* %A}
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define internal i32 @callee(i1 %C, i32* %P) {
+        br i1 %C, label %T, label %F
+
+T:              ; preds = %0
+        ret i32 17
+
+F:              ; preds = %0
+        %X = load i32* %P               ; <i32> [#uses=1]
+        ret i32 %X
+}
+
+define i32 @foo() {
+        %A = alloca i32         ; <i32*> [#uses=2]
+        store i32 17, i32* %A
+        %X = call i32 @callee( i1 false, i32* %A )              ; <i32> [#uses=1]
+        ret i32 %X
+}
+
diff --git a/final/test/Transforms/ArgumentPromotion/crash.ll b/final/test/Transforms/ArgumentPromotion/crash.ll
new file mode 100644
index 00000000000..fed002aa98a
--- /dev/null
+++ b/final/test/Transforms/ArgumentPromotion/crash.ll
@@ -0,0 +1,59 @@
+; rdar://7879828
+; RUN: opt -inline -argpromotion %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define void @foo() {
+  invoke void @foo2()
+          to label %if.end432 unwind label %for.end520 
+
+if.end432:  
+  unreachable
+
+for.end520: 
+  unreachable
+}
+
+define internal  void @foo2() ssp {
+  %call7 = call fastcc i8* @foo3(i1 (i8*)* @foo4)
+  %call58 = call fastcc i8* @foo3(i1 (i8*)* @foo5)
+  unreachable
+}
+
+define internal fastcc i8* @foo3(i1 (i8*)* %Pred) {
+entry:
+  unreachable
+}
+
+define internal i1 @foo4(i8* %O) nounwind {
+entry:
+  %call = call zeroext i1 @foo5(i8* %O) ; <i1> [#uses=0]
+  unreachable
+}
+
+define internal i1 @foo5(i8* %O) nounwind {
+entry:
+  ret i1 undef
+}
+
+
+; PR8932 - infinite promotion.
+%0 = type { %0* }
+
+define i32 @test2(i32 %a) {
+init:
+  %0 = alloca %0
+  %1 = alloca %0
+  %2 = call i32 @"clay_assign(Chain, Chain)"(%0* %0, %0* %1)
+  ret i32 0
+}
+
+define internal i32 @"clay_assign(Chain, Chain)"(%0* %c, %0* %d) {
+init:
+  %0 = getelementptr %0* %d, i32 0, i32 0
+  %1 = load %0** %0
+  %2 = getelementptr %0* %c, i32 0, i32 0
+  %3 = load %0** %2
+  %4 = call i32 @"clay_assign(Chain, Chain)"(%0* %3, %0* %1)
+  ret i32 0
+}
diff --git a/final/test/Transforms/ArgumentPromotion/dg.exp b/final/test/Transforms/ArgumentPromotion/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/ArgumentPromotion/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/ArgumentPromotion/pr3085.ll b/final/test/Transforms/ArgumentPromotion/pr3085.ll
new file mode 100644
index 00000000000..3048c603deb
--- /dev/null
+++ b/final/test/Transforms/ArgumentPromotion/pr3085.ll
@@ -0,0 +1,1944 @@
+; RUN: opt < %s -disable-output -loop-extract-single -loop-rotate -loop-reduce -argpromotion
+; PR 3085
+
+	%struct.Lit = type { i8 }
+
+define fastcc %struct.Lit* @import_lit(i32 %lit) nounwind {
+entry:
+	br i1 false, label %bb, label %bb1
+
+bb:		; preds = %entry
+	unreachable
+
+bb1:		; preds = %entry
+	br label %bb3
+
+bb2:		; preds = %bb3
+	br label %bb3
+
+bb3:		; preds = %bb2, %bb1
+	br i1 false, label %bb2, label %bb6
+
+bb6:		; preds = %bb3
+	br i1 false, label %bb.i.i, label %bb1.i.i
+
+bb.i.i:		; preds = %bb6
+	br label %int2lit.exit
+
+bb1.i.i:		; preds = %bb6
+	br label %int2lit.exit
+
+int2lit.exit:		; preds = %bb1.i.i, %bb.i.i
+	ret %struct.Lit* null
+}
+
+define fastcc i32 @picosat_main(i32 %argc, i8** %argv) nounwind {
+entry:
+	br i1 false, label %bb.i, label %picosat_time_stamp.exit
+
+bb.i:		; preds = %entry
+	br label %picosat_time_stamp.exit
+
+picosat_time_stamp.exit:		; preds = %bb.i, %entry
+	br label %bb108
+
+bb:		; preds = %bb108
+	br i1 false, label %bb1, label %bb2
+
+bb1:		; preds = %bb
+	br label %bb106
+
+bb2:		; preds = %bb
+	br i1 false, label %bb3, label %bb4
+
+bb3:		; preds = %bb2
+	br label %bb106
+
+bb4:		; preds = %bb2
+	br i1 false, label %bb5, label %bb6
+
+bb5:		; preds = %bb4
+	br label %bb106
+
+bb6:		; preds = %bb4
+	br i1 false, label %bb7, label %bb8
+
+bb7:		; preds = %bb6
+	br label %bb106
+
+bb8:		; preds = %bb6
+	br i1 false, label %bb106, label %bb10
+
+bb10:		; preds = %bb8
+	br i1 false, label %bb106, label %bb12
+
+bb12:		; preds = %bb10
+	br i1 false, label %bb106, label %bb14
+
+bb14:		; preds = %bb12
+	br i1 false, label %bb15, label %bb19
+
+bb15:		; preds = %bb14
+	br i1 false, label %bb16, label %bb17
+
+bb16:		; preds = %bb15
+	br label %bb106
+
+bb17:		; preds = %bb15
+	br label %bb106
+
+bb19:		; preds = %bb14
+	br i1 false, label %bb20, label %bb28
+
+bb20:		; preds = %bb19
+	br i1 false, label %bb21, label %bb22
+
+bb21:		; preds = %bb20
+	br label %bb106
+
+bb22:		; preds = %bb20
+	br i1 false, label %bb106, label %bb24
+
+bb24:		; preds = %bb22
+	br i1 false, label %bb106, label %bb26
+
+bb26:		; preds = %bb24
+	br label %bb106
+
+bb28:		; preds = %bb19
+	br i1 false, label %bb29, label %bb35
+
+bb29:		; preds = %bb28
+	br i1 false, label %bb30, label %bb31
+
+bb30:		; preds = %bb29
+	br label %bb106
+
+bb31:		; preds = %bb29
+	br i1 false, label %bb32, label %bb33
+
+bb32:		; preds = %bb31
+	br label %bb106
+
+bb33:		; preds = %bb31
+	br label %bb106
+
+bb35:		; preds = %bb28
+	br i1 false, label %bb36, label %bb40
+
+bb36:		; preds = %bb35
+	br i1 false, label %bb37, label %bb38
+
+bb37:		; preds = %bb36
+	br label %bb106
+
+bb38:		; preds = %bb36
+	br label %bb106
+
+bb40:		; preds = %bb35
+	br i1 false, label %bb41, label %bb49
+
+bb41:		; preds = %bb40
+	br i1 false, label %bb43, label %bb42
+
+bb42:		; preds = %bb41
+	br label %bb106
+
+bb43:		; preds = %bb41
+	br i1 false, label %bb44, label %bb45
+
+bb44:		; preds = %bb43
+	br label %bb106
+
+bb45:		; preds = %bb43
+	br i1 false, label %bb46, label %bb47
+
+bb46:		; preds = %bb45
+	br label %bb106
+
+bb47:		; preds = %bb45
+	br label %bb106
+
+bb49:		; preds = %bb40
+	br i1 false, label %bb50, label %bb56
+
+bb50:		; preds = %bb49
+	br i1 false, label %bb52, label %bb51
+
+bb51:		; preds = %bb50
+	br label %bb106
+
+bb52:		; preds = %bb50
+	br i1 false, label %bb53, label %bb54
+
+bb53:		; preds = %bb52
+	br label %bb106
+
+bb54:		; preds = %bb52
+	br label %bb106
+
+bb56:		; preds = %bb49
+	br i1 false, label %bb57, label %bb63
+
+bb57:		; preds = %bb56
+	br i1 false, label %bb59, label %bb58
+
+bb58:		; preds = %bb57
+	br label %bb106
+
+bb59:		; preds = %bb57
+	br i1 false, label %bb60, label %bb61
+
+bb60:		; preds = %bb59
+	br label %bb106
+
+bb61:		; preds = %bb59
+	br label %bb106
+
+bb63:		; preds = %bb56
+	br i1 false, label %bb64, label %bb70
+
+bb64:		; preds = %bb63
+	br i1 false, label %bb66, label %bb65
+
+bb65:		; preds = %bb64
+	br label %bb106
+
+bb66:		; preds = %bb64
+	br i1 false, label %bb67, label %bb68
+
+bb67:		; preds = %bb66
+	br label %bb106
+
+bb68:		; preds = %bb66
+	br label %bb106
+
+bb70:		; preds = %bb63
+	br i1 false, label %bb71, label %bb79
+
+bb71:		; preds = %bb70
+	br i1 false, label %bb73, label %bb72
+
+bb72:		; preds = %bb71
+	br label %bb106
+
+bb73:		; preds = %bb71
+	br i1 false, label %bb74, label %bb75
+
+bb74:		; preds = %bb73
+	br label %bb106
+
+bb75:		; preds = %bb73
+	br i1 false, label %bb76, label %bb77
+
+bb76:		; preds = %bb75
+	br label %bb106
+
+bb77:		; preds = %bb75
+	br label %bb106
+
+bb79:		; preds = %bb70
+	br i1 false, label %bb80, label %bb86
+
+bb80:		; preds = %bb79
+	br i1 false, label %bb82, label %bb81
+
+bb81:		; preds = %bb80
+	br label %bb106
+
+bb82:		; preds = %bb80
+	br i1 false, label %bb83, label %bb84
+
+bb83:		; preds = %bb82
+	br label %bb106
+
+bb84:		; preds = %bb82
+	br label %bb106
+
+bb86:		; preds = %bb79
+	br i1 false, label %bb87, label %bb93
+
+bb87:		; preds = %bb86
+	br i1 false, label %bb89, label %bb88
+
+bb88:		; preds = %bb87
+	br label %bb106
+
+bb89:		; preds = %bb87
+	br i1 false, label %bb90, label %bb91
+
+bb90:		; preds = %bb89
+	br label %bb106
+
+bb91:		; preds = %bb89
+	br label %bb106
+
+bb93:		; preds = %bb86
+	br i1 false, label %bb94, label %bb95
+
+bb94:		; preds = %bb93
+	br label %bb106
+
+bb95:		; preds = %bb93
+	br i1 false, label %bb98, label %bb97
+
+bb97:		; preds = %bb95
+	br label %bb106
+
+bb98:		; preds = %bb95
+	br i1 false, label %bb103, label %bb1.i24
+
+bb1.i24:		; preds = %bb98
+	br i1 false, label %bb99, label %bb103
+
+bb99:		; preds = %bb1.i24
+	br i1 false, label %bb101, label %bb100
+
+bb100:		; preds = %bb99
+	br label %bb102
+
+bb101:		; preds = %bb99
+	br label %bb102
+
+bb102:		; preds = %bb101, %bb100
+	br label %bb106
+
+bb103:		; preds = %bb1.i24, %bb98
+	br i1 false, label %bb104, label %bb105
+
+bb104:		; preds = %bb103
+	br label %bb106
+
+bb105:		; preds = %bb103
+	br label %bb106
+
+bb106:		; preds = %bb105, %bb104, %bb102, %bb97, %bb94, %bb91, %bb90, %bb88, %bb84, %bb83, %bb81, %bb77, %bb76, %bb74, %bb72, %bb68, %bb67, %bb65, %bb61, %bb60, %bb58, %bb54, %bb53, %bb51, %bb47, %bb46, %bb44, %bb42, %bb38, %bb37, %bb33, %bb32, %bb30, %bb26, %bb24, %bb22, %bb21, %bb17, %bb16, %bb12, %bb10, %bb8, %bb7, %bb5, %bb3, %bb1
+	br i1 false, label %bb108, label %bb110
+
+bb108:		; preds = %bb106, %picosat_time_stamp.exit
+	br i1 false, label %bb, label %bb110
+
+bb110:		; preds = %bb108, %bb106
+	br i1 false, label %bb112, label %bb171
+
+bb112:		; preds = %bb110
+	br i1 false, label %bb114, label %bb113
+
+bb113:		; preds = %bb112
+	br label %bb114
+
+bb114:		; preds = %bb113, %bb112
+	br i1 false, label %bb.i.i35, label %bb1.i.i36
+
+bb.i.i35:		; preds = %bb114
+	unreachable
+
+bb1.i.i36:		; preds = %bb114
+	br i1 false, label %bb5.i.i.i41, label %bb6.i.i.i42
+
+bb5.i.i.i41:		; preds = %bb1.i.i36
+	unreachable
+
+bb6.i.i.i42:		; preds = %bb1.i.i36
+	br i1 false, label %bb7.i.i.i43, label %bb8.i.i.i44
+
+bb7.i.i.i43:		; preds = %bb6.i.i.i42
+	br label %bb8.i.i.i44
+
+bb8.i.i.i44:		; preds = %bb7.i.i.i43, %bb6.i.i.i42
+	br i1 false, label %picosat_init.exit, label %bb14.i.i
+
+bb14.i.i:		; preds = %bb8.i.i.i44
+	br label %picosat_init.exit
+
+picosat_init.exit:		; preds = %bb14.i.i, %bb8.i.i.i44
+	br i1 false, label %bb116, label %bb115
+
+bb115:		; preds = %picosat_init.exit
+	br label %bb116
+
+bb116:		; preds = %bb115, %picosat_init.exit
+	br i1 false, label %bb119, label %bb118
+
+bb118:		; preds = %bb116
+	br label %bb119
+
+bb119:		; preds = %bb118, %bb116
+	br i1 false, label %bb121, label %bb120
+
+bb120:		; preds = %bb119
+	br label %bb121
+
+bb121:		; preds = %bb120, %bb119
+	br i1 false, label %bb126, label %bb122
+
+bb122:		; preds = %bb121
+	br label %bb126
+
+bb126:		; preds = %bb122, %bb121
+	br i1 false, label %bb128, label %bb127
+
+bb127:		; preds = %bb126
+	br label %bb128
+
+bb128:		; preds = %bb127, %bb126
+	br label %SKIP_COMMENTS.i
+
+SKIP_COMMENTS.i.loopexit:		; preds = %bb.i149, %bb.i149
+	br label %SKIP_COMMENTS.i.backedge
+
+SKIP_COMMENTS.i:		; preds = %SKIP_COMMENTS.i.backedge, %bb128
+	br i1 false, label %bb.i149.preheader, label %bb3.i152
+
+bb.i149.preheader:		; preds = %SKIP_COMMENTS.i
+	br label %bb.i149
+
+bb.i149:		; preds = %bb.i149, %bb.i149.preheader
+	switch i32 0, label %bb.i149 [
+		i32 -1, label %SKIP_COMMENTS.i.loopexit
+		i32 10, label %SKIP_COMMENTS.i.loopexit
+	]
+
+bb3.i152:		; preds = %SKIP_COMMENTS.i
+	br i1 false, label %bb4.i153, label %SKIP_COMMENTS.i.backedge
+
+SKIP_COMMENTS.i.backedge:		; preds = %bb3.i152, %SKIP_COMMENTS.i.loopexit
+	br label %SKIP_COMMENTS.i
+
+bb4.i153:		; preds = %bb3.i152
+	br i1 false, label %bb5.i154, label %bb129
+
+bb5.i154:		; preds = %bb4.i153
+	br i1 false, label %bb129, label %bb6.i155.preheader
+
+bb6.i155.preheader:		; preds = %bb5.i154
+	br label %bb6.i155
+
+bb6.i155:		; preds = %bb6.i155, %bb6.i155.preheader
+	br i1 false, label %bb7.i156, label %bb6.i155
+
+bb7.i156:		; preds = %bb6.i155
+	br i1 false, label %bb8.i157, label %bb129
+
+bb8.i157:		; preds = %bb7.i156
+	br i1 false, label %bb9.i158, label %bb129
+
+bb9.i158:		; preds = %bb8.i157
+	br i1 false, label %bb10.i159, label %bb129
+
+bb10.i159:		; preds = %bb9.i158
+	br i1 false, label %bb129, label %bb11.i160.preheader
+
+bb11.i160.preheader:		; preds = %bb10.i159
+	br label %bb11.i160
+
+bb11.i160:		; preds = %bb11.i160, %bb11.i160.preheader
+	br i1 false, label %bb12.i161, label %bb11.i160
+
+bb12.i161:		; preds = %bb11.i160
+	br i1 false, label %bb129, label %bb15.i165.preheader
+
+bb15.i165.preheader:		; preds = %bb12.i161
+	br label %bb15.i165
+
+bb14.i163:		; preds = %bb15.i165
+	br label %bb15.i165
+
+bb15.i165:		; preds = %bb14.i163, %bb15.i165.preheader
+	br i1 false, label %bb16.i166, label %bb14.i163
+
+bb16.i166:		; preds = %bb15.i165
+	br i1 false, label %bb129, label %bb17.i167.preheader
+
+bb17.i167.preheader:		; preds = %bb16.i166
+	br label %bb17.i167
+
+bb17.i167:		; preds = %bb17.i167, %bb17.i167.preheader
+	br i1 false, label %bb18.i168, label %bb17.i167
+
+bb18.i168:		; preds = %bb17.i167
+	br i1 false, label %bb129, label %bb21.i172.preheader
+
+bb21.i172.preheader:		; preds = %bb18.i168
+	br label %bb21.i172
+
+bb20.i170:		; preds = %bb21.i172
+	br label %bb21.i172
+
+bb21.i172:		; preds = %bb20.i170, %bb21.i172.preheader
+	br i1 false, label %bb22.i173, label %bb20.i170
+
+bb22.i173:		; preds = %bb21.i172
+	br i1 false, label %bb24.i175, label %bb129
+
+bb24.i175:		; preds = %bb22.i173
+	br i1 false, label %bb26.i180, label %bb25.i176
+
+bb25.i176:		; preds = %bb24.i175
+	br label %bb26.i180
+
+bb26.i180:		; preds = %bb25.i176, %bb24.i175
+	br i1 false, label %bb.i.i181, label %bb3.i.i184.preheader
+
+bb.i.i181:		; preds = %bb26.i180
+	br label %bb3.i.i184.preheader
+
+bb3.i.i184.preheader:		; preds = %bb.i.i181, %bb26.i180
+	br label %bb3.i.i184
+
+bb2.i.i183:		; preds = %bb3.i.i184
+	br label %bb3.i.i184
+
+bb3.i.i184:		; preds = %bb2.i.i183, %bb3.i.i184.preheader
+	br i1 false, label %bb2.i.i183, label %bb4.i.i185
+
+bb4.i.i185:		; preds = %bb3.i.i184
+	br i1 false, label %bb.i.i.i186, label %picosat_adjust.exit.i
+
+bb.i.i.i186:		; preds = %bb4.i.i185
+	br label %picosat_adjust.exit.i
+
+picosat_adjust.exit.i:		; preds = %bb.i.i.i186, %bb4.i.i185
+	br i1 false, label %bb28.i188, label %bb27.i187
+
+bb27.i187:		; preds = %picosat_adjust.exit.i
+	br label %bb28.i188
+
+bb28.i188:		; preds = %bb27.i187, %picosat_adjust.exit.i
+	br label %READ_LITERAL.i.outer
+
+READ_LITERAL.i.outer:		; preds = %READ_LITERAL.i.outer.backedge, %bb28.i188
+	br label %READ_LITERAL.i
+
+READ_LITERAL.i.loopexit:		; preds = %bb29.i189, %bb29.i189
+	br label %READ_LITERAL.i.backedge
+
+READ_LITERAL.i:		; preds = %READ_LITERAL.i.backedge, %READ_LITERAL.i.outer
+	switch i32 0, label %bb39.i199 [
+		i32 99, label %bb29.i189.preheader
+		i32 -1, label %bb33.i193
+	]
+
+bb29.i189.preheader:		; preds = %READ_LITERAL.i
+	br label %bb29.i189
+
+bb29.i189:		; preds = %bb29.i189, %bb29.i189.preheader
+	switch i32 0, label %bb29.i189 [
+		i32 -1, label %READ_LITERAL.i.loopexit
+		i32 10, label %READ_LITERAL.i.loopexit
+	]
+
+bb33.i193:		; preds = %READ_LITERAL.i
+	br i1 false, label %bb35.i195, label %parse.exit
+
+bb35.i195:		; preds = %bb33.i193
+	br i1 false, label %bb38.i198, label %parse.exit
+
+bb38.i198:		; preds = %bb35.i195
+	br label %parse.exit
+
+bb39.i199:		; preds = %READ_LITERAL.i
+	br i1 false, label %bb40.i200, label %READ_LITERAL.i.backedge
+
+READ_LITERAL.i.backedge:		; preds = %bb39.i199, %READ_LITERAL.i.loopexit
+	br label %READ_LITERAL.i
+
+bb40.i200:		; preds = %bb39.i199
+	br i1 false, label %bb41.i201, label %bb42.i202
+
+bb41.i201:		; preds = %bb40.i200
+	br label %bb42.i202
+
+bb42.i202:		; preds = %bb41.i201, %bb40.i200
+	br i1 false, label %parse.exit.loopexit, label %bb46.i.preheader
+
+bb46.i.preheader:		; preds = %bb42.i202
+	br label %bb46.i
+
+bb45.i:		; preds = %bb46.i
+	br label %bb46.i
+
+bb46.i:		; preds = %bb45.i, %bb46.i.preheader
+	br i1 false, label %bb47.i, label %bb45.i
+
+bb47.i:		; preds = %bb46.i
+	br i1 false, label %parse.exit.loopexit, label %bb50.i
+
+bb50.i:		; preds = %bb47.i
+	br i1 false, label %bb55.i, label %bb51.i
+
+bb51.i:		; preds = %bb50.i
+	br i1 false, label %parse.exit.loopexit, label %bb54.i
+
+bb54.i:		; preds = %bb51.i
+	br label %bb56.i
+
+bb55.i:		; preds = %bb50.i
+	br label %bb56.i
+
+bb56.i:		; preds = %bb55.i, %bb54.i
+	br i1 false, label %bb3.i11.i, label %bb.i8.i
+
+bb.i8.i:		; preds = %bb56.i
+	br i1 false, label %bb1.i9.i, label %bb3.i11.i
+
+bb1.i9.i:		; preds = %bb.i8.i
+	br i1 false, label %bb3.i11.i, label %bb2.i10.i
+
+bb2.i10.i:		; preds = %bb1.i9.i
+	unreachable
+
+bb3.i11.i:		; preds = %bb1.i9.i, %bb.i8.i, %bb56.i
+	br i1 false, label %bb7.i.i208, label %bb6.i.i207
+
+bb6.i.i207:		; preds = %bb3.i11.i
+	br label %READ_LITERAL.i.outer.backedge
+
+bb7.i.i208:		; preds = %bb3.i11.i
+	br i1 false, label %bb53.i.i.i.i.preheader, label %bb.i.i.i.i210.preheader
+
+bb.i.i.i.i210.preheader:		; preds = %bb7.i.i208
+	br label %bb.i.i.i.i210
+
+bb.i.i.i.i210:		; preds = %bb.i.i.i.i210.backedge, %bb.i.i.i.i210.preheader
+	br i1 false, label %bb17.i.i.i.i, label %bb18.i.i.i.i
+
+bb17.i.i.i.i:		; preds = %bb.i.i.i.i210
+	br label %bb18.i.i.i.i
+
+bb18.i.i.i.i:		; preds = %bb17.i.i.i.i, %bb.i.i.i.i210
+	br i1 false, label %bb19.i.i.i.i, label %bb20.i.i.i.i
+
+bb19.i.i.i.i:		; preds = %bb18.i.i.i.i
+	br label %bb20.i.i.i.i
+
+bb20.i.i.i.i:		; preds = %bb19.i.i.i.i, %bb18.i.i.i.i
+	br i1 false, label %bb21.i.i.i.i, label %bb22.i.i.i.i
+
+bb21.i.i.i.i:		; preds = %bb20.i.i.i.i
+	br label %bb22.i.i.i.i
+
+bb22.i.i.i.i:		; preds = %bb21.i.i.i.i, %bb20.i.i.i.i
+	br label %bb23.i.i.i.i.outer
+
+bb23.i.i.i.i.outer:		; preds = %bb28.i.i.i.i, %bb22.i.i.i.i
+	br label %bb23.i.i.i.i
+
+bb23.i.i.i.i:		; preds = %bb23.i.i.i.i, %bb23.i.i.i.i.outer
+	br i1 false, label %bb23.i.i.i.i, label %bb26.i.i.i.i.preheader
+
+bb26.i.i.i.i.preheader:		; preds = %bb23.i.i.i.i
+	br label %bb26.i.i.i.i
+
+bb26.i.i.i.i:		; preds = %bb26.i.i.i.i, %bb26.i.i.i.i.preheader
+	br i1 false, label %bb27.i.i.i.i, label %bb26.i.i.i.i
+
+bb27.i.i.i.i:		; preds = %bb26.i.i.i.i
+	br i1 false, label %bb28.i.i.i.i, label %bb29.i.i.i.i
+
+bb28.i.i.i.i:		; preds = %bb27.i.i.i.i
+	br label %bb23.i.i.i.i.outer
+
+bb29.i.i.i.i:		; preds = %bb27.i.i.i.i
+	br i1 false, label %bb33.i.i.i.i, label %bb44.i.i.i.i
+
+bb33.i.i.i.i:		; preds = %bb29.i.i.i.i
+	br i1 false, label %bb34.i.i.i.i, label %bb38.i.i.i.i
+
+bb34.i.i.i.i:		; preds = %bb33.i.i.i.i
+	br i1 false, label %bb37.i.i.i.i, label %bb35.i.i.i.i
+
+bb35.i.i.i.i:		; preds = %bb34.i.i.i.i
+	br label %bb37.i.i.i.i
+
+bb37.i.i.i.i:		; preds = %bb35.i.i.i.i, %bb34.i.i.i.i
+	br label %bb38.i.i.i.i
+
+bb38.i.i.i.i:		; preds = %bb37.i.i.i.i, %bb33.i.i.i.i
+	br i1 false, label %bb39.i.i.i.i, label %bb43.i.i.i.i
+
+bb39.i.i.i.i:		; preds = %bb38.i.i.i.i
+	br i1 false, label %bb42.i.i.i.i, label %bb40.i.i.i.i
+
+bb40.i.i.i.i:		; preds = %bb39.i.i.i.i
+	br label %bb42.i.i.i.i
+
+bb42.i.i.i.i:		; preds = %bb40.i.i.i.i, %bb39.i.i.i.i
+	br label %bb43.i.i.i.i
+
+bb43.i.i.i.i:		; preds = %bb42.i.i.i.i, %bb38.i.i.i.i
+	br label %bb.i.i.i.i210.backedge
+
+bb.i.i.i.i210.backedge:		; preds = %bb47.i.i.i.i, %bb44.i.i.i.i, %bb43.i.i.i.i
+	br label %bb.i.i.i.i210
+
+bb44.i.i.i.i:		; preds = %bb29.i.i.i.i
+	br i1 false, label %bb.i.i.i.i210.backedge, label %bb46.i.i.i.i
+
+bb46.i.i.i.i:		; preds = %bb44.i.i.i.i
+	br i1 false, label %bb47.i.i.i.i, label %bb53.i.i.i.i.preheader.loopexit
+
+bb53.i.i.i.i.preheader.loopexit:		; preds = %bb46.i.i.i.i
+	br label %bb53.i.i.i.i.preheader
+
+bb53.i.i.i.i.preheader:		; preds = %bb53.i.i.i.i.preheader.loopexit, %bb7.i.i208
+	br label %bb53.i.i.i.i
+
+bb47.i.i.i.i:		; preds = %bb46.i.i.i.i
+	br label %bb.i.i.i.i210.backedge
+
+bb50.i.i.i.i:		; preds = %bb53.i.i.i.i
+	br i1 false, label %bb51.i.i.i.i, label %bb52.i.i.i.i
+
+bb51.i.i.i.i:		; preds = %bb50.i.i.i.i
+	br label %bb52.i.i.i.i
+
+bb52.i.i.i.i:		; preds = %bb51.i.i.i.i, %bb50.i.i.i.i
+	br label %bb53.i.i.i.i
+
+bb53.i.i.i.i:		; preds = %bb52.i.i.i.i, %bb53.i.i.i.i.preheader
+	br i1 false, label %bb50.i.i.i.i, label %bb59.i.i.i.i.preheader
+
+bb59.i.i.i.i.preheader:		; preds = %bb53.i.i.i.i
+	br label %bb59.i.i.i.i
+
+bb55.i.i.i.i:		; preds = %bb59.i.i.i.i
+	br label %bb57.i.i.i.i
+
+bb56.i.i.i.i:		; preds = %bb57.i.i.i.i
+	br label %bb57.i.i.i.i
+
+bb57.i.i.i.i:		; preds = %bb56.i.i.i.i, %bb55.i.i.i.i
+	br i1 false, label %bb56.i.i.i.i, label %bb58.i.i.i.i
+
+bb58.i.i.i.i:		; preds = %bb57.i.i.i.i
+	br label %bb59.i.i.i.i
+
+bb59.i.i.i.i:		; preds = %bb58.i.i.i.i, %bb59.i.i.i.i.preheader
+	br i1 false, label %bb60.i.i.i.i, label %bb55.i.i.i.i
+
+bb60.i.i.i.i:		; preds = %bb59.i.i.i.i
+	br label %bb69.i.i.i.i
+
+bb61.i.i.i.i:		; preds = %bb69.i.i.i.i
+	br i1 false, label %bb68.i.i.i.i, label %bb62.i.i.i.i
+
+bb62.i.i.i.i:		; preds = %bb61.i.i.i.i
+	br i1 false, label %bb63.i.i.i.i, label %bb65.i.i.i.i
+
+bb63.i.i.i.i:		; preds = %bb62.i.i.i.i
+	br i1 false, label %bb.i.i12.i, label %bb65.i.i.i.i
+
+bb65.i.i.i.i:		; preds = %bb63.i.i.i.i, %bb62.i.i.i.i
+	br i1 false, label %bb.i.i12.i, label %bb67.i.i.i.i
+
+bb67.i.i.i.i:		; preds = %bb65.i.i.i.i
+	br label %bb68.i.i.i.i
+
+bb68.i.i.i.i:		; preds = %bb67.i.i.i.i, %bb61.i.i.i.i
+	br label %bb69.i.i.i.i
+
+bb69.i.i.i.i:		; preds = %bb68.i.i.i.i, %bb60.i.i.i.i
+	br i1 false, label %bb61.i.i.i.i, label %bb70.i.i.i.i
+
+bb70.i.i.i.i:		; preds = %bb69.i.i.i.i
+	br label %READ_LITERAL.i.outer.backedge
+
+bb.i.i12.i:		; preds = %bb65.i.i.i.i, %bb63.i.i.i.i
+	br i1 false, label %bb1.i.i.i213, label %bb5.i.i.i218
+
+bb1.i.i.i213:		; preds = %bb.i.i12.i
+	br i1 false, label %bb4.i.i.i217, label %bb2.i.i.i214
+
+bb2.i.i.i214:		; preds = %bb1.i.i.i213
+	br label %bb4.i.i.i217
+
+bb4.i.i.i217:		; preds = %bb2.i.i.i214, %bb1.i.i.i213
+	br label %bb5.i.i.i218
+
+bb5.i.i.i218:		; preds = %bb4.i.i.i217, %bb.i.i12.i
+	br label %READ_LITERAL.i.outer.backedge
+
+READ_LITERAL.i.outer.backedge:		; preds = %bb5.i.i.i218, %bb70.i.i.i.i, %bb6.i.i207
+	br label %READ_LITERAL.i.outer
+
+parse.exit.loopexit:		; preds = %bb51.i, %bb47.i, %bb42.i202
+	br label %parse.exit
+
+parse.exit:		; preds = %parse.exit.loopexit, %bb38.i198, %bb35.i195, %bb33.i193
+	br i1 false, label %bb130, label %bb129
+
+bb129:		; preds = %parse.exit, %bb22.i173, %bb18.i168, %bb16.i166, %bb12.i161, %bb10.i159, %bb9.i158, %bb8.i157, %bb7.i156, %bb5.i154, %bb4.i153
+	br label %bb170
+
+bb130:		; preds = %parse.exit
+	br i1 false, label %bb143, label %bb142.preheader
+
+bb142.preheader:		; preds = %bb130
+	br label %bb142
+
+bb132:		; preds = %bb142
+	br i1 false, label %bb137, label %bb133
+
+bb133:		; preds = %bb132
+	br i1 false, label %bb137, label %bb134
+
+bb134:		; preds = %bb133
+	br i1 false, label %bb137, label %bb135
+
+bb135:		; preds = %bb134
+	br i1 false, label %bb137, label %bb136
+
+bb136:		; preds = %bb135
+	br i1 false, label %bb137, label %bb138
+
+bb137:		; preds = %bb136, %bb135, %bb134, %bb133, %bb132
+	br label %bb141
+
+bb138:		; preds = %bb136
+	br i1 false, label %bb139, label %bb141
+
+bb139:		; preds = %bb138
+	br i1 false, label %bb2.i126, label %picosat_assume.exit
+
+bb2.i126:		; preds = %bb139
+	br i1 false, label %bb5.i130, label %bb3.i127
+
+bb3.i127:		; preds = %bb2.i126
+	br label %bb5.i130
+
+bb5.i130:		; preds = %bb3.i127, %bb2.i126
+	br label %picosat_assume.exit
+
+picosat_assume.exit:		; preds = %bb5.i130, %bb139
+	br i1 false, label %bb141, label %bb140
+
+bb140:		; preds = %picosat_assume.exit
+	br label %bb141
+
+bb141:		; preds = %bb140, %picosat_assume.exit, %bb138, %bb137
+	br label %bb142
+
+bb142:		; preds = %bb141, %bb142.preheader
+	br i1 false, label %bb132, label %bb143.loopexit
+
+bb143.loopexit:		; preds = %bb142
+	br label %bb143
+
+bb143:		; preds = %bb143.loopexit, %bb130
+	br i1 false, label %bb145, label %bb144
+
+bb144:		; preds = %bb143
+	br label %bb11.i
+
+bb5.i114:		; preds = %bb11.i
+	br label %bb11.i
+
+bb11.i:		; preds = %bb5.i114, %bb144
+	br i1 false, label %bb12.i, label %bb5.i114
+
+bb12.i:		; preds = %bb11.i
+	br i1 false, label %bb.i.i.i118, label %bb1.i.i.i119
+
+bb.i.i.i118:		; preds = %bb12.i
+	br label %int2lit.exit.i
+
+bb1.i.i.i119:		; preds = %bb12.i
+	br label %int2lit.exit.i
+
+int2lit.exit.i:		; preds = %bb1.i.i.i119, %bb.i.i.i118
+	br label %bb19.i
+
+bb13.i:		; preds = %bb19.i
+	br label %bb17.i
+
+bb14.i:		; preds = %bb17.i
+	br label %bb17.i
+
+bb17.i:		; preds = %bb14.i, %bb13.i
+	br i1 false, label %bb14.i, label %bb18.i
+
+bb18.i:		; preds = %bb17.i
+	br label %bb19.i
+
+bb19.i:		; preds = %bb18.i, %int2lit.exit.i
+	br i1 false, label %bb20.i, label %bb13.i
+
+bb20.i:		; preds = %bb19.i
+	br label %bb33.i
+
+bb24.i:		; preds = %bb33.i
+	br i1 false, label %bb29.i, label %bb25.i
+
+bb25.i:		; preds = %bb24.i
+	br label %bb27.i
+
+bb26.i:		; preds = %bb27.i
+	br label %bb27.i
+
+bb27.i:		; preds = %bb26.i, %bb25.i
+	br i1 false, label %bb26.i, label %bb28.i
+
+bb28.i:		; preds = %bb27.i
+	br label %bb29.i
+
+bb29.i:		; preds = %bb28.i, %bb24.i
+	br label %bb33.i
+
+bb33.i:		; preds = %bb29.i, %bb20.i
+	br i1 false, label %bb34.i, label %bb24.i
+
+bb34.i:		; preds = %bb33.i
+	br i1 false, label %bb.i.i58.i, label %bb1.i.i59.i
+
+bb.i.i58.i:		; preds = %bb34.i
+	br label %int2lit.exit63.i
+
+bb1.i.i59.i:		; preds = %bb34.i
+	br label %int2lit.exit63.i
+
+int2lit.exit63.i:		; preds = %bb1.i.i59.i, %bb.i.i58.i
+	br label %bb41.i
+
+bb35.i:		; preds = %bb41.i
+	br label %bb39.i
+
+bb36.i:		; preds = %bb39.i
+	br i1 false, label %bb38.i, label %bb37.i
+
+bb37.i:		; preds = %bb36.i
+	br label %bb38.i
+
+bb38.i:		; preds = %bb37.i, %bb36.i
+	br label %bb39.i
+
+bb39.i:		; preds = %bb38.i, %bb35.i
+	br i1 false, label %bb36.i, label %bb40.i
+
+bb40.i:		; preds = %bb39.i
+	br label %bb41.i
+
+bb41.i:		; preds = %bb40.i, %int2lit.exit63.i
+	br i1 false, label %bb42.i, label %bb35.i
+
+bb42.i:		; preds = %bb41.i
+	br label %bb44.i
+
+bb43.i:		; preds = %bb44.i
+	br label %bb44.i
+
+bb44.i:		; preds = %bb43.i, %bb42.i
+	br i1 false, label %bb43.i, label %picosat_print.exit
+
+picosat_print.exit:		; preds = %bb44.i
+	br label %bb167
+
+bb145:		; preds = %bb143
+	br i1 false, label %bb147, label %bb146
+
+bb146:		; preds = %bb145
+	br label %bb147
+
+bb147:		; preds = %bb146, %bb145
+	br i1 false, label %bb149, label %bb148
+
+bb148:		; preds = %bb147
+	br label %bb149
+
+bb149:		; preds = %bb148, %bb147
+	br i1 false, label %bb.i54, label %bb1.i55
+
+bb.i54:		; preds = %bb149
+	unreachable
+
+bb1.i55:		; preds = %bb149
+	br i1 false, label %bb.i.i56, label %bb1.i.i57
+
+bb.i.i56:		; preds = %bb1.i55
+	br label %bb1.i.i57
+
+bb1.i.i57:		; preds = %bb.i.i56, %bb1.i55
+	br i1 false, label %bb3.i.i59, label %bb2.i.i58
+
+bb2.i.i58:		; preds = %bb1.i.i57
+	br label %bb3.i.i59
+
+bb3.i.i59:		; preds = %bb2.i.i58, %bb1.i.i57
+	br i1 false, label %bb5.i.i61, label %sat.exit.i
+
+bb5.i.i61:		; preds = %bb3.i.i59
+	br i1 false, label %bb6.i.i65, label %bb1.i.i.i63
+
+bb1.i.i.i63:		; preds = %bb5.i.i61
+	br i1 false, label %sat.exit.i, label %bb6.i.i65
+
+bb6.i.i65:		; preds = %bb1.i.i.i63, %bb5.i.i61
+	br i1 false, label %bb8.i.i67, label %bb7.i.i66
+
+bb7.i.i66:		; preds = %bb6.i.i65
+	br label %bb8.i.i67
+
+bb8.i.i67:		; preds = %bb7.i.i66, %bb6.i.i65
+	br i1 false, label %bb10.i.i69, label %sat.exit.i
+
+bb10.i.i69:		; preds = %bb8.i.i67
+	br i1 false, label %bb11.i.i70, label %bb1.i61.i.i
+
+bb1.i61.i.i:		; preds = %bb10.i.i69
+	br i1 false, label %sat.exit.i, label %bb11.i.i70
+
+bb11.i.i70:		; preds = %bb1.i61.i.i, %bb10.i.i69
+	br label %bb13.i.i71.outer
+
+bb13.i.i71.outer:		; preds = %bb42.i.i, %bb11.i.i70
+	br label %bb13.i.i71
+
+bb13.i.i71:		; preds = %bb13.i.i71.backedge, %bb13.i.i71.outer
+	br i1 false, label %bb14.i.i72, label %bb15.i.i73
+
+bb14.i.i72:		; preds = %bb13.i.i71
+	br label %bb15.i.i73
+
+bb15.i.i73:		; preds = %bb14.i.i72, %bb13.i.i71
+	br i1 false, label %bb19.i.i, label %bb16.i.i
+
+bb16.i.i:		; preds = %bb15.i.i73
+	br i1 false, label %bb.i.i79.i.i, label %incincs.exit.i.i
+
+bb.i.i79.i.i:		; preds = %bb16.i.i
+	br label %bb4.i.i.i85.i.i
+
+bb.i.i.i80.i.i:		; preds = %bb4.i.i.i85.i.i
+	br i1 false, label %bb3.i.i.i83.i.i, label %bb1.i.i.i81.i.i
+
+bb1.i.i.i81.i.i:		; preds = %bb.i.i.i80.i.i
+	br i1 false, label %bb2.i.i.i82.i.i, label %bb3.i.i.i83.i.i
+
+bb2.i.i.i82.i.i:		; preds = %bb1.i.i.i81.i.i
+	br label %bb3.i.i.i83.i.i
+
+bb3.i.i.i83.i.i:		; preds = %bb2.i.i.i82.i.i, %bb1.i.i.i81.i.i, %bb.i.i.i80.i.i
+	br label %bb4.i.i.i85.i.i
+
+bb4.i.i.i85.i.i:		; preds = %bb3.i.i.i83.i.i, %bb.i.i79.i.i
+	br i1 false, label %crescore.exit.i.i.i.i, label %bb.i.i.i80.i.i
+
+crescore.exit.i.i.i.i:		; preds = %bb4.i.i.i85.i.i
+	br label %incincs.exit.i.i
+
+incincs.exit.i.i:		; preds = %crescore.exit.i.i.i.i, %bb16.i.i
+	br i1 false, label %bb13.i.i71.backedge, label %sat.exit.i.loopexit.loopexit
+
+bb13.i.i71.backedge:		; preds = %bb1.i55.i.i, %bb28.i.i, %incincs.exit.i.i
+	br label %bb13.i.i71
+
+bb19.i.i:		; preds = %bb15.i.i73
+	br i1 false, label %bb20.i.i, label %bb1.i68.i.i
+
+bb1.i68.i.i:		; preds = %bb19.i.i
+	br i1 false, label %sat.exit.i.loopexit.loopexit, label %bb20.i.i
+
+bb20.i.i:		; preds = %bb1.i68.i.i, %bb19.i.i
+	br i1 false, label %bb24.i.i, label %bb21.i.i
+
+bb21.i.i:		; preds = %bb20.i.i
+	br i1 false, label %bb22.i.i, label %bb24.i.i
+
+bb22.i.i:		; preds = %bb21.i.i
+	br i1 false, label %bb23.i.i, label %bb24.i.i
+
+bb23.i.i:		; preds = %bb22.i.i
+	br label %bb24.i.i
+
+bb24.i.i:		; preds = %bb23.i.i, %bb22.i.i, %bb21.i.i, %bb20.i.i
+	br i1 false, label %bb26.i.i, label %sat.exit.i.loopexit.loopexit
+
+bb26.i.i:		; preds = %bb24.i.i
+	br i1 false, label %bb27.i.i, label %bb33.i.i.loopexit
+
+bb27.i.i:		; preds = %bb26.i.i
+	br i1 false, label %bb33.i.i.loopexit, label %bb28.i.i
+
+bb28.i.i:		; preds = %bb27.i.i
+	br i1 false, label %bb1.i55.i.i, label %bb13.i.i71.backedge
+
+bb1.i55.i.i:		; preds = %bb28.i.i
+	br i1 false, label %bb29.i.i, label %bb13.i.i71.backedge
+
+bb29.i.i:		; preds = %bb1.i55.i.i
+	br i1 false, label %bb31.i.i, label %sat.exit.i.loopexit.loopexit2
+
+bb31.i.i:		; preds = %bb29.i.i
+	br i1 false, label %bb33.i.i, label %bb1.i48.i.i
+
+bb1.i48.i.i:		; preds = %bb31.i.i
+	br i1 false, label %sat.exit.i.loopexit.loopexit2, label %bb33.i.i
+
+bb33.i.i.loopexit:		; preds = %bb27.i.i, %bb26.i.i
+	br label %bb33.i.i
+
+bb33.i.i:		; preds = %bb33.i.i.loopexit, %bb1.i48.i.i, %bb31.i.i
+	br i1 false, label %bb34.i.i, label %bb35.i.i
+
+bb34.i.i:		; preds = %bb33.i.i
+	br i1 false, label %bb35.i.i, label %bb2.i44.i.i76
+
+bb2.i44.i.i76:		; preds = %bb34.i.i
+	br label %bb35.i.i
+
+bb35.i.i:		; preds = %bb2.i44.i.i76, %bb34.i.i, %bb33.i.i
+	br i1 false, label %bb1.i37.i.i, label %bb.i35.i.i
+
+bb.i35.i.i:		; preds = %bb35.i.i
+	br label %bb36.i.i
+
+bb1.i37.i.i:		; preds = %bb35.i.i
+	br i1 false, label %bb37.i.i, label %bb36.i.i
+
+bb36.i.i:		; preds = %bb1.i37.i.i, %bb.i35.i.i
+	br label %bb25.i23.i.i
+
+bb.i18.i.i:		; preds = %bb25.i23.i.i
+	br i1 false, label %bb24.i22.i.i, label %bb22.i19.i.i
+
+bb22.i19.i.i:		; preds = %bb.i18.i.i
+	br label %bb24.i22.i.i
+
+bb24.i22.i.i:		; preds = %bb22.i19.i.i, %bb.i18.i.i
+	br label %bb25.i23.i.i
+
+bb25.i23.i.i:		; preds = %bb24.i22.i.i, %bb36.i.i
+	br i1 false, label %bb.i18.i.i, label %bb26.i24.i.i
+
+bb26.i24.i.i:		; preds = %bb25.i23.i.i
+	br i1 false, label %bb27.i25.i.i, label %bb32.i.i.i
+
+bb27.i25.i.i:		; preds = %bb26.i24.i.i
+	br label %bb32.i.i.i
+
+bb32.i.i.i:		; preds = %bb27.i25.i.i, %bb26.i24.i.i
+	br label %bb64.i.i.i
+
+bb33.i.i.i:		; preds = %bb64.i.i.i
+	br i1 false, label %bb60.i.i.i, label %bb34.i.i.i
+
+bb34.i.i.i:		; preds = %bb33.i.i.i
+	br i1 false, label %bb38.i.i.i, label %bb60.i.i.i
+
+bb38.i.i.i:		; preds = %bb34.i.i.i
+	br i1 false, label %bb39.i.i.i, label %bb48.i.i.i
+
+bb39.i.i.i:		; preds = %bb38.i.i.i
+	br i1 false, label %bb48.i.i.i, label %bb40.i.i.i
+
+bb40.i.i.i:		; preds = %bb39.i.i.i
+	br i1 false, label %bb60.i.i.i, label %bb45.i.i.i
+
+bb45.i.i.i:		; preds = %bb40.i.i.i
+	br label %bb60.i.i.i
+
+bb48.i.i.i:		; preds = %bb39.i.i.i, %bb38.i.i.i
+	br i1 false, label %bb53.i.i.i, label %bb60.i.i.i
+
+bb53.i.i.i:		; preds = %bb48.i.i.i
+	br i1 false, label %bb60.i.i.i, label %bb58.i.i.i
+
+bb58.i.i.i:		; preds = %bb53.i.i.i
+	br i1 false, label %bb59.i.i.i, label %bb60.i.i.i
+
+bb59.i.i.i:		; preds = %bb58.i.i.i
+	br label %bb60.i.i.i
+
+bb60.i.i.i:		; preds = %bb59.i.i.i, %bb58.i.i.i, %bb53.i.i.i, %bb48.i.i.i, %bb45.i.i.i, %bb40.i.i.i, %bb34.i.i.i, %bb33.i.i.i
+	%lcollect.i.i.i.1 = phi i32 [ %lcollect.i.i.i.2, %bb34.i.i.i ], [ %lcollect.i.i.i.2, %bb48.i.i.i ], [ %lcollect.i.i.i.2, %bb58.i.i.i ], [ %lcollect.i.i.i.2, %bb59.i.i.i ], [ %lcollect.i.i.i.2, %bb53.i.i.i ], [ %lcollect.i.i.i.2, %bb33.i.i.i ], [ %lcollect.i.i.i.2, %bb40.i.i.i ], [ 0, %bb45.i.i.i ]		; <i32> [#uses=1]
+	br label %bb64.i.i.i
+
+bb64.i.i.i:		; preds = %bb60.i.i.i, %bb32.i.i.i
+	%lcollect.i.i.i.2 = phi i32 [ 0, %bb32.i.i.i ], [ %lcollect.i.i.i.1, %bb60.i.i.i ]		; <i32> [#uses=8]
+	br i1 false, label %bb65.i.i.i, label %bb33.i.i.i
+
+bb65.i.i.i:		; preds = %bb64.i.i.i
+	br i1 false, label %bb103.i.i.i.preheader, label %bb66.i.i.i.preheader
+
+bb66.i.i.i.preheader:		; preds = %bb65.i.i.i
+	br label %bb66.i.i.i
+
+bb66.i.i.i:		; preds = %bb66.i.i.i.backedge, %bb66.i.i.i.preheader
+	br i1 false, label %bb67.i.i.i, label %bb68.i.i.i
+
+bb67.i.i.i:		; preds = %bb66.i.i.i
+	br label %bb68.i.i.i
+
+bb68.i.i.i:		; preds = %bb67.i.i.i, %bb66.i.i.i
+	br i1 false, label %bb69.i.i.i, label %bb70.i.i.i
+
+bb69.i.i.i:		; preds = %bb68.i.i.i
+	br label %bb70.i.i.i
+
+bb70.i.i.i:		; preds = %bb69.i.i.i, %bb68.i.i.i
+	br i1 false, label %bb71.i.i.i, label %bb72.i.i.i
+
+bb71.i.i.i:		; preds = %bb70.i.i.i
+	br label %bb72.i.i.i
+
+bb72.i.i.i:		; preds = %bb71.i.i.i, %bb70.i.i.i
+	br label %bb73.i.i.i.outer
+
+bb73.i.i.i.outer:		; preds = %bb78.i.i.i, %bb72.i.i.i
+	br label %bb73.i.i.i
+
+bb73.i.i.i:		; preds = %bb73.i.i.i, %bb73.i.i.i.outer
+	br i1 false, label %bb73.i.i.i, label %bb76.i.i.i.preheader
+
+bb76.i.i.i.preheader:		; preds = %bb73.i.i.i
+	br label %bb76.i.i.i
+
+bb76.i.i.i:		; preds = %bb76.i.i.i, %bb76.i.i.i.preheader
+	br i1 false, label %bb77.i.i.i, label %bb76.i.i.i
+
+bb77.i.i.i:		; preds = %bb76.i.i.i
+	br i1 false, label %bb78.i.i.i, label %bb79.i.i.i
+
+bb78.i.i.i:		; preds = %bb77.i.i.i
+	br label %bb73.i.i.i.outer
+
+bb79.i.i.i:		; preds = %bb77.i.i.i
+	br i1 false, label %bb83.i.i.i, label %bb94.i.i.i
+
+bb83.i.i.i:		; preds = %bb79.i.i.i
+	br i1 false, label %bb84.i.i.i, label %bb88.i.i.i
+
+bb84.i.i.i:		; preds = %bb83.i.i.i
+	br i1 false, label %bb87.i.i.i, label %bb85.i.i.i
+
+bb85.i.i.i:		; preds = %bb84.i.i.i
+	br label %bb87.i.i.i
+
+bb87.i.i.i:		; preds = %bb85.i.i.i, %bb84.i.i.i
+	br label %bb88.i.i.i
+
+bb88.i.i.i:		; preds = %bb87.i.i.i, %bb83.i.i.i
+	br i1 false, label %bb89.i.i.i, label %bb93.i.i.i
+
+bb89.i.i.i:		; preds = %bb88.i.i.i
+	br i1 false, label %bb92.i.i.i, label %bb90.i.i.i
+
+bb90.i.i.i:		; preds = %bb89.i.i.i
+	br label %bb92.i.i.i
+
+bb92.i.i.i:		; preds = %bb90.i.i.i, %bb89.i.i.i
+	br label %bb93.i.i.i
+
+bb93.i.i.i:		; preds = %bb92.i.i.i, %bb88.i.i.i
+	br label %bb66.i.i.i.backedge
+
+bb66.i.i.i.backedge:		; preds = %bb97.i.i.i, %bb94.i.i.i, %bb93.i.i.i
+	br label %bb66.i.i.i
+
+bb94.i.i.i:		; preds = %bb79.i.i.i
+	br i1 false, label %bb66.i.i.i.backedge, label %bb96.i.i.i
+
+bb96.i.i.i:		; preds = %bb94.i.i.i
+	br i1 false, label %bb97.i.i.i, label %bb103.i.i.i.preheader.loopexit
+
+bb103.i.i.i.preheader.loopexit:		; preds = %bb96.i.i.i
+	br label %bb103.i.i.i.preheader
+
+bb103.i.i.i.preheader:		; preds = %bb103.i.i.i.preheader.loopexit, %bb65.i.i.i
+	br label %bb103.i.i.i
+
+bb97.i.i.i:		; preds = %bb96.i.i.i
+	br label %bb66.i.i.i.backedge
+
+bb100.i.i.i:		; preds = %bb103.i.i.i
+	br i1 false, label %bb101.i.i.i, label %bb102.i.i.i
+
+bb101.i.i.i:		; preds = %bb100.i.i.i
+	br label %bb102.i.i.i
+
+bb102.i.i.i:		; preds = %bb101.i.i.i, %bb100.i.i.i
+	br label %bb103.i.i.i
+
+bb103.i.i.i:		; preds = %bb102.i.i.i, %bb103.i.i.i.preheader
+	br i1 false, label %bb100.i.i.i, label %bb109.i.i.i.preheader
+
+bb109.i.i.i.preheader:		; preds = %bb103.i.i.i
+	br label %bb109.i.i.i
+
+bb105.i.i.i:		; preds = %bb109.i.i.i
+	br label %bb107.i.i.i
+
+bb106.i.i.i:		; preds = %bb107.i.i.i
+	br label %bb107.i.i.i
+
+bb107.i.i.i:		; preds = %bb106.i.i.i, %bb105.i.i.i
+	br i1 false, label %bb106.i.i.i, label %bb108.i.i.i
+
+bb108.i.i.i:		; preds = %bb107.i.i.i
+	br label %bb109.i.i.i
+
+bb109.i.i.i:		; preds = %bb108.i.i.i, %bb109.i.i.i.preheader
+	br i1 false, label %bb110.i.i.i, label %bb105.i.i.i
+
+bb110.i.i.i:		; preds = %bb109.i.i.i
+	%0 = sub i32 0, %lcollect.i.i.i.2		; <i32> [#uses=1]
+	%1 = add i32 %0, 1		; <i32> [#uses=1]
+	br label %bb113.i.i.i
+
+bb111.i.i.i:		; preds = %bb113.i.i.i
+	br i1 false, label %bb114.i.i.i, label %bb113.i.i.i
+
+bb113.i.i.i:		; preds = %bb111.i.i.i, %bb110.i.i.i
+	br i1 false, label %bb111.i.i.i, label %bb114.i.i.i
+
+bb114.i.i.i:		; preds = %bb113.i.i.i, %bb111.i.i.i
+	%2 = lshr i32 %1, 1		; <i32> [#uses=2]
+	br i1 false, label %bb116.i.i.i, label %bb124.i.i.i
+
+bb116.i.i.i:		; preds = %bb114.i.i.i
+	br i1 false, label %bb117.i.i.i.preheader, label %bb122.i.i.i.preheader
+
+bb122.i.i.i.preheader:		; preds = %bb116.i.i.i
+	br label %bb122.i.i.i
+
+bb117.i.i.i.preheader:		; preds = %bb116.i.i.i
+	br label %bb117.i.i.i
+
+bb117.i.i.i:		; preds = %bb118.i.i.i, %bb117.i.i.i.preheader
+	%target.i.i.i.1 = phi i32 [ %3, %bb118.i.i.i ], [ %2, %bb117.i.i.i.preheader ]		; <i32> [#uses=1]
+	%3 = add i32 %target.i.i.i.1, 1		; <i32> [#uses=2]
+	br i1 false, label %bb118.i.i.i, label %bb124.i.i.i.loopexit
+
+bb118.i.i.i:		; preds = %bb117.i.i.i
+	br i1 false, label %bb117.i.i.i, label %bb124.i.i.i.loopexit
+
+bb122.i.i.i:		; preds = %bb123.i.i.i, %bb122.i.i.i.preheader
+	%target.i.i.i.2 = phi i32 [ %4, %bb123.i.i.i ], [ %2, %bb122.i.i.i.preheader ]		; <i32> [#uses=2]
+	br i1 false, label %bb124.i.i.i.loopexit1, label %bb123.i.i.i
+
+bb123.i.i.i:		; preds = %bb122.i.i.i
+	%4 = add i32 %target.i.i.i.2, -1		; <i32> [#uses=1]
+	br i1 false, label %bb122.i.i.i, label %bb124.i.i.i.loopexit1
+
+bb124.i.i.i.loopexit:		; preds = %bb118.i.i.i, %bb117.i.i.i
+	br label %bb124.i.i.i
+
+bb124.i.i.i.loopexit1:		; preds = %bb123.i.i.i, %bb122.i.i.i
+	br label %bb124.i.i.i
+
+bb124.i.i.i:		; preds = %bb124.i.i.i.loopexit1, %bb124.i.i.i.loopexit, %bb114.i.i.i
+	%target.i.i.i.0 = phi i32 [ 0, %bb114.i.i.i ], [ %3, %bb124.i.i.i.loopexit ], [ %target.i.i.i.2, %bb124.i.i.i.loopexit1 ]		; <i32> [#uses=0]
+	br label %bb132.i.i.i.outer
+
+bb125.i.i.i:		; preds = %bb132.i.i.i
+	br i1 false, label %bb132.i.i.i, label %bb130.i.i.i
+
+bb130.i.i.i:		; preds = %bb125.i.i.i
+	br label %bb132.i.i.i.outer
+
+bb132.i.i.i.outer:		; preds = %bb130.i.i.i, %bb124.i.i.i
+	br label %bb132.i.i.i
+
+bb132.i.i.i:		; preds = %bb132.i.i.i.outer, %bb125.i.i.i
+	br i1 false, label %bb125.i.i.i, label %bb133.i.i.i
+
+bb133.i.i.i:		; preds = %bb132.i.i.i
+	br i1 false, label %bb136.i.i.i, label %bb134.i.i.i
+
+bb134.i.i.i:		; preds = %bb133.i.i.i
+	br i1 false, label %bb136.i.i.i, label %bb135.i.i.i
+
+bb135.i.i.i:		; preds = %bb134.i.i.i
+	br label %bb136.i.i.i
+
+bb136.i.i.i:		; preds = %bb135.i.i.i, %bb134.i.i.i, %bb133.i.i.i
+	br i1 false, label %bb137.i.i.i, label %bb37.i.i
+
+bb137.i.i.i:		; preds = %bb136.i.i.i
+	br label %bb37.i.i
+
+bb37.i.i:		; preds = %bb137.i.i.i, %bb136.i.i.i, %bb1.i37.i.i
+	br i1 false, label %bb40.i.i, label %bb38.i.i
+
+bb38.i.i:		; preds = %bb37.i.i
+	br i1 false, label %bb39.i.i, label %bb40.i.i
+
+bb39.i.i:		; preds = %bb38.i.i
+	br i1 false, label %bb17.i.i.i, label %bb3.i12.i.i
+
+bb3.i12.i.i:		; preds = %bb39.i.i
+	br label %bb5.i14.i.i
+
+bb5.i14.i.i:		; preds = %bb8.i.i.i79, %bb3.i12.i.i
+	br i1 false, label %bb6.i15.i.i, label %bb9.i.i.i80
+
+bb6.i15.i.i:		; preds = %bb5.i14.i.i
+	br i1 false, label %bb7.i.i.i78, label %bb9.i.i.i80
+
+bb7.i.i.i78:		; preds = %bb6.i15.i.i
+	br i1 false, label %bb9.i.i.i80, label %bb8.i.i.i79
+
+bb8.i.i.i79:		; preds = %bb7.i.i.i78
+	br i1 false, label %bb9.i.i.i80, label %bb5.i14.i.i
+
+bb9.i.i.i80:		; preds = %bb8.i.i.i79, %bb7.i.i.i78, %bb6.i15.i.i, %bb5.i14.i.i
+	br i1 false, label %bb16.i.i.i, label %bb10.i.i.i81
+
+bb10.i.i.i81:		; preds = %bb9.i.i.i80
+	br i1 false, label %bb11.i.i.i, label %bb15.i.i.i
+
+bb11.i.i.i:		; preds = %bb10.i.i.i81
+	br i1 false, label %bb16.i.i.i, label %bb15.i.i.i
+
+bb15.i.i.i:		; preds = %bb11.i.i.i, %bb10.i.i.i81
+	br label %bb16.i.i.i
+
+bb16.i.i.i:		; preds = %bb15.i.i.i, %bb11.i.i.i, %bb9.i.i.i80
+	br label %bb17.i.i.i
+
+bb17.i.i.i:		; preds = %bb16.i.i.i, %bb39.i.i
+	br i1 false, label %bb18.i.i.i, label %bb25.i.i.i
+
+bb18.i.i.i:		; preds = %bb17.i.i.i
+	br i1 false, label %bb24.i.i.i, label %bb23.i.i.i
+
+bb23.i.i.i:		; preds = %bb18.i.i.i
+	br label %bb24.i.i.i
+
+bb24.i.i.i:		; preds = %bb23.i.i.i, %bb18.i.i.i
+	br label %bb29.i.i.i
+
+bb25.i.i.i:		; preds = %bb17.i.i.i
+	br i1 false, label %bb29.i.i.i, label %bb27.i.i.i
+
+bb27.i.i.i:		; preds = %bb25.i.i.i
+	br i1 false, label %bb29.i.i.i, label %bb28.i.i.i
+
+bb28.i.i.i:		; preds = %bb27.i.i.i
+	br i1 false, label %bb29.i.i.i, label %bb.i4.i.i.i
+
+bb.i4.i.i.i:		; preds = %bb28.i.i.i
+	br i1 false, label %bb4.i.i16.i.i, label %bb29.i.i.i
+
+bb4.i.i16.i.i:		; preds = %bb.i4.i.i.i
+	br label %bb29.i.i.i
+
+bb29.i.i.i:		; preds = %bb4.i.i16.i.i, %bb.i4.i.i.i, %bb28.i.i.i, %bb27.i.i.i, %bb25.i.i.i, %bb24.i.i.i
+	br label %bb40.i.i
+
+bb40.i.i:		; preds = %bb29.i.i.i, %bb38.i.i, %bb37.i.i
+	br i1 false, label %bb9.i.i.i.i.preheader, label %bb2.i.i.i87
+
+bb9.i.i.i.i.preheader:		; preds = %bb40.i.i
+	br label %bb9.i.i.i.i
+
+bb.i.i.i.i84:		; preds = %bb9.i.i.i.i
+	switch i8 0, label %bb8.i.i.i.i [
+		i8 -1, label %bb1.i.i.i.i85
+		i8 1, label %bb9.i.i.i.i
+	]
+
+bb1.i.i.i.i85:		; preds = %bb.i.i.i.i84
+	br i1 false, label %bb5.i.i.i.i, label %bb2.i.i.i87
+
+bb5.i.i.i.i:		; preds = %bb1.i.i.i.i85
+	br label %bb2.i.i.i87
+
+bb8.i.i.i.i:		; preds = %bb.i.i.i.i84
+	br i1 false, label %bb2.i.i.i87, label %bb6.i.i.i95
+
+bb9.i.i.i.i:		; preds = %bb.i.i.i.i84, %bb9.i.i.i.i.preheader
+	br i1 false, label %bb.i.i.i.i84, label %bb10.i.i.i.i
+
+bb10.i.i.i.i:		; preds = %bb9.i.i.i.i
+	br label %bb2.i.i.i87
+
+bb2.i.i.i87:		; preds = %bb10.i.i.i.i, %bb8.i.i.i.i, %bb5.i.i.i.i, %bb1.i.i.i.i85, %bb40.i.i
+	br i1 false, label %bb3.i.i.i88, label %decide.exit.i.i
+
+bb3.i.i.i88:		; preds = %bb2.i.i.i87
+	br i1 false, label %bb4.i.i.i90, label %bb1.i23.i.i.i
+
+bb1.i23.i.i.i:		; preds = %bb3.i.i.i88
+	br i1 false, label %decide.exit.i.i, label %bb4.i.i.i90
+
+bb4.i.i.i90:		; preds = %bb1.i23.i.i.i, %bb3.i.i.i88
+	br i1 false, label %bb1.i9.i.i.i, label %bb5.i.i.i94
+
+bb1.i9.i.i.i:		; preds = %bb4.i.i.i90
+	br i1 false, label %bb.i.i27.i.i.i.i, label %bb1.i.i28.i.i.i.i
+
+bb.i.i27.i.i.i.i:		; preds = %bb1.i9.i.i.i
+	br label %int2lit.exit32.i.i.i.i
+
+bb1.i.i28.i.i.i.i:		; preds = %bb1.i9.i.i.i
+	br label %int2lit.exit32.i.i.i.i
+
+int2lit.exit32.i.i.i.i:		; preds = %bb1.i.i28.i.i.i.i, %bb.i.i27.i.i.i.i
+	br i1 false, label %bb8.i19.i.i.i, label %bb2.i.i.i.i91
+
+bb2.i.i.i.i91:		; preds = %int2lit.exit32.i.i.i.i
+	br label %bb4.i.i.i.i
+
+bb3.i.i.i.i92:		; preds = %gcd.exit.i.i.i.i
+	br label %bb4.i.i.i.i
+
+bb4.i.i.i.i:		; preds = %bb3.i.i.i.i92, %bb2.i.i.i.i91
+	br label %bb3.i.i13.i.i.i
+
+bb2.i.i12.i.i.i:		; preds = %bb3.i.i13.i.i.i
+	br label %bb3.i.i13.i.i.i
+
+bb3.i.i13.i.i.i:		; preds = %bb2.i.i12.i.i.i, %bb4.i.i.i.i
+	br i1 false, label %gcd.exit.i.i.i.i, label %bb2.i.i12.i.i.i
+
+gcd.exit.i.i.i.i:		; preds = %bb3.i.i13.i.i.i
+	br i1 false, label %bb5.i14.i.i.i.preheader, label %bb3.i.i.i.i92
+
+bb5.i14.i.i.i.preheader:		; preds = %gcd.exit.i.i.i.i
+	br label %bb5.i14.i.i.i
+
+bb5.i14.i.i.i:		; preds = %int2lit.exit.i.i.i.i, %bb5.i14.i.i.i.preheader
+	br i1 false, label %bb.i.i.i17.i.i.i, label %bb1.i.i.i18.i.i.i
+
+bb.i.i.i17.i.i.i:		; preds = %bb5.i14.i.i.i
+	br label %int2lit.exit.i.i.i.i
+
+bb1.i.i.i18.i.i.i:		; preds = %bb5.i14.i.i.i
+	br label %int2lit.exit.i.i.i.i
+
+int2lit.exit.i.i.i.i:		; preds = %bb1.i.i.i18.i.i.i, %bb.i.i.i17.i.i.i
+	br i1 false, label %bb8.i19.i.i.i.loopexit, label %bb5.i14.i.i.i
+
+bb8.i19.i.i.i.loopexit:		; preds = %int2lit.exit.i.i.i.i
+	br label %bb8.i19.i.i.i
+
+bb8.i19.i.i.i:		; preds = %bb8.i19.i.i.i.loopexit, %int2lit.exit32.i.i.i.i
+	br i1 false, label %bb5.i.i.i94, label %bb6.i.i.i95
+
+bb5.i.i.i94:		; preds = %bb8.i19.i.i.i, %bb4.i.i.i90
+	br label %bb.i2.i.i.i
+
+bb.i2.i.i.i:		; preds = %hpop.exit.i.i.i.i, %bb5.i.i.i94
+	br i1 false, label %hpop.exit.i.i.i.i, label %bb1.i.i.i.i.i
+
+bb1.i.i.i.i.i:		; preds = %bb.i2.i.i.i
+	br label %bb2.i.i.i.i.i
+
+bb2.i.i.i.i.i:		; preds = %bb11.i.i.i.i.i, %bb1.i.i.i.i.i
+	br i1 false, label %bb3.i.i.i.i.i, label %bb12.i.i.i.i.i
+
+bb3.i.i.i.i.i:		; preds = %bb2.i.i.i.i.i
+	br i1 false, label %bb4.i.i.i.i.i, label %bb1.i.i.i.i.i.i
+
+bb1.i.i.i.i.i.i:		; preds = %bb3.i.i.i.i.i
+	br i1 false, label %bb8.i.i.i.i.i, label %bb3.i.i.i.i.i.i
+
+bb3.i.i.i.i.i.i:		; preds = %bb1.i.i.i.i.i.i
+	br i1 false, label %bb4.i.i.i.i.i, label %bb8.i.i.i.i.i
+
+bb4.i.i.i.i.i:		; preds = %bb3.i.i.i.i.i.i, %bb3.i.i.i.i.i
+	br i1 false, label %bb5.i.i.i.i.i, label %bb11.i.i.i.i.i
+
+bb5.i.i.i.i.i:		; preds = %bb4.i.i.i.i.i
+	br i1 false, label %bb6.i.i.i.i.i, label %bb1.i21.i.i.i.i.i
+
+bb1.i21.i.i.i.i.i:		; preds = %bb5.i.i.i.i.i
+	br i1 false, label %bb11.i.i.i.i.i, label %bb3.i24.i.i.i.i.i
+
+bb3.i24.i.i.i.i.i:		; preds = %bb1.i21.i.i.i.i.i
+	br i1 false, label %bb6.i.i.i.i.i, label %bb11.i.i.i.i.i
+
+bb6.i.i.i.i.i:		; preds = %bb3.i24.i.i.i.i.i, %bb5.i.i.i.i.i
+	br label %bb11.i.i.i.i.i
+
+bb8.i.i.i.i.i:		; preds = %bb3.i.i.i.i.i.i, %bb1.i.i.i.i.i.i
+	br i1 false, label %bb9.i.i.i.i.i, label %bb12.i.i.i.i.i
+
+bb9.i.i.i.i.i:		; preds = %bb8.i.i.i.i.i
+	br i1 false, label %bb11.i.i.i.i.i, label %bb1.i8.i.i.i.i.i
+
+bb1.i8.i.i.i.i.i:		; preds = %bb9.i.i.i.i.i
+	br i1 false, label %bb12.i.i.i.i.i, label %bb3.i11.i.i.i.i.i
+
+bb3.i11.i.i.i.i.i:		; preds = %bb1.i8.i.i.i.i.i
+	br i1 false, label %bb11.i.i.i.i.i, label %bb12.i.i.i.i.i
+
+bb11.i.i.i.i.i:		; preds = %bb3.i11.i.i.i.i.i, %bb9.i.i.i.i.i, %bb6.i.i.i.i.i, %bb3.i24.i.i.i.i.i, %bb1.i21.i.i.i.i.i, %bb4.i.i.i.i.i
+	br label %bb2.i.i.i.i.i
+
+bb12.i.i.i.i.i:		; preds = %bb3.i11.i.i.i.i.i, %bb1.i8.i.i.i.i.i, %bb8.i.i.i.i.i, %bb2.i.i.i.i.i
+	br label %hpop.exit.i.i.i.i
+
+hpop.exit.i.i.i.i:		; preds = %bb12.i.i.i.i.i, %bb.i2.i.i.i
+	br i1 false, label %sdecide.exit.i.i.i, label %bb.i2.i.i.i
+
+sdecide.exit.i.i.i:		; preds = %hpop.exit.i.i.i.i
+	br label %bb6.i.i.i95
+
+bb6.i.i.i95:		; preds = %sdecide.exit.i.i.i, %bb8.i19.i.i.i, %bb8.i.i.i.i
+	br label %decide.exit.i.i
+
+decide.exit.i.i:		; preds = %bb6.i.i.i95, %bb1.i23.i.i.i, %bb2.i.i.i87
+	br i1 false, label %bb42.i.i, label %sat.exit.i.loopexit.loopexit2
+
+bb42.i.i:		; preds = %decide.exit.i.i
+	br label %bb13.i.i71.outer
+
+sat.exit.i.loopexit.loopexit:		; preds = %bb24.i.i, %bb1.i68.i.i, %incincs.exit.i.i
+	br label %sat.exit.i.loopexit
+
+sat.exit.i.loopexit.loopexit2:		; preds = %decide.exit.i.i, %bb1.i48.i.i, %bb29.i.i
+	br label %sat.exit.i.loopexit
+
+sat.exit.i.loopexit:		; preds = %sat.exit.i.loopexit.loopexit2, %sat.exit.i.loopexit.loopexit
+	br label %sat.exit.i
+
+sat.exit.i:		; preds = %sat.exit.i.loopexit, %bb1.i61.i.i, %bb8.i.i67, %bb1.i.i.i63, %bb3.i.i59
+	br i1 false, label %bb7.i, label %bb2.i96
+
+bb2.i96:		; preds = %sat.exit.i
+	switch i32 0, label %bb5.i99 [
+		i32 10, label %bb4.i98
+		i32 20, label %bb6.i100
+	]
+
+bb4.i98:		; preds = %bb2.i96
+	br label %bb6.i100
+
+bb5.i99:		; preds = %bb2.i96
+	br label %bb6.i100
+
+bb6.i100:		; preds = %bb5.i99, %bb4.i98, %bb2.i96
+	br label %bb7.i
+
+bb7.i:		; preds = %bb6.i100, %sat.exit.i
+	br i1 false, label %bb.i1.i, label %picosat_sat.exit
+
+bb.i1.i:		; preds = %bb7.i
+	br label %picosat_sat.exit
+
+picosat_sat.exit:		; preds = %bb.i1.i, %bb7.i
+	switch i32 0, label %bb166 [
+		i32 20, label %bb150
+		i32 10, label %bb163
+	]
+
+bb150:		; preds = %picosat_sat.exit
+	br i1 false, label %bb152, label %bb151
+
+bb151:		; preds = %bb150
+	br label %bb152
+
+bb152:		; preds = %bb151, %bb150
+	br i1 false, label %bb154, label %bb153
+
+bb153:		; preds = %bb152
+	br label %bb154
+
+bb154:		; preds = %bb153, %bb152
+	br i1 false, label %bb157, label %bb156
+
+bb156:		; preds = %bb154
+	br label %bb157
+
+bb157:		; preds = %bb156, %bb154
+	br i1 false, label %bb159, label %bb158
+
+bb158:		; preds = %bb157
+	br label %bb159
+
+bb159:		; preds = %bb158, %bb157
+	br i1 false, label %bb167, label %bb160
+
+bb160:		; preds = %bb159
+	br label %bb167
+
+bb163:		; preds = %picosat_sat.exit
+	br i1 false, label %bb167, label %bb164
+
+bb164:		; preds = %bb163
+	br label %bb4.i
+
+bb.i11:		; preds = %bb4.i
+	br i1 false, label %bb.i.i12, label %bb1.i.i14
+
+bb.i.i12:		; preds = %bb.i11
+	unreachable
+
+bb1.i.i14:		; preds = %bb.i11
+	br i1 false, label %bb3.i.i16, label %bb2.i.i15
+
+bb2.i.i15:		; preds = %bb1.i.i14
+	unreachable
+
+bb3.i.i16:		; preds = %bb1.i.i14
+	br i1 false, label %bb3.i, label %bb7.i.i
+
+bb7.i.i:		; preds = %bb3.i.i16
+	br i1 false, label %bb.i.i.i.i17, label %bb1.i.i.i.i18
+
+bb.i.i.i.i17:		; preds = %bb7.i.i
+	br label %int2lit.exit.i.i
+
+bb1.i.i.i.i18:		; preds = %bb7.i.i
+	br label %int2lit.exit.i.i
+
+int2lit.exit.i.i:		; preds = %bb1.i.i.i.i18, %bb.i.i.i.i17
+	br i1 false, label %bb3.i, label %bb9.i.i
+
+bb9.i.i:		; preds = %int2lit.exit.i.i
+	br label %bb3.i
+
+bb3.i:		; preds = %bb9.i.i, %int2lit.exit.i.i, %bb3.i.i16
+	br label %bb4.i
+
+bb4.i:		; preds = %bb3.i, %bb164
+	br i1 false, label %bb5.i, label %bb.i11
+
+bb5.i:		; preds = %bb4.i
+	br i1 false, label %bb6.i, label %bb167
+
+bb6.i:		; preds = %bb5.i
+	br label %bb167
+
+bb166:		; preds = %picosat_sat.exit
+	br label %bb167
+
+bb167:		; preds = %bb166, %bb6.i, %bb5.i, %bb163, %bb160, %bb159, %picosat_print.exit
+	br i1 false, label %bb168, label %bb170
+
+bb168:		; preds = %bb167
+	br i1 false, label %bb170, label %bb169
+
+bb169:		; preds = %bb168
+	br i1 false, label %bb.i7, label %picosat_time_stamp.exit9
+
+bb.i7:		; preds = %bb169
+	br label %picosat_time_stamp.exit9
+
+picosat_time_stamp.exit9:		; preds = %bb.i7, %bb169
+	br label %bb170
+
+bb170:		; preds = %picosat_time_stamp.exit9, %bb168, %bb167, %bb129
+	br i1 false, label %bb.i.i3, label %picosat_leave.exit
+
+bb.i.i3:		; preds = %bb170
+	br label %picosat_leave.exit
+
+picosat_leave.exit:		; preds = %bb.i.i3, %bb170
+	br i1 false, label %bb1.i.i, label %bb.i.i
+
+bb.i.i:		; preds = %picosat_leave.exit
+	unreachable
+
+bb1.i.i:		; preds = %picosat_leave.exit
+	br label %bb9.i.i.i
+
+bb3.i.i.i:		; preds = %bb9.i.i.i
+	br i1 false, label %bb5.i.i.i, label %bb4.i.i.i
+
+bb4.i.i.i:		; preds = %bb3.i.i.i
+	br label %bb5.i.i.i
+
+bb5.i.i.i:		; preds = %bb4.i.i.i, %bb3.i.i.i
+	br label %bb9.i.i.i
+
+bb9.i.i.i:		; preds = %bb5.i.i.i, %bb1.i.i
+	br i1 false, label %bb10.i.i.i, label %bb3.i.i.i
+
+bb10.i.i.i:		; preds = %bb9.i.i.i
+	br i1 false, label %delete.exit.i.i.i, label %bb1.i.i.i.i
+
+bb1.i.i.i.i:		; preds = %bb10.i.i.i
+	br label %delete.exit.i.i.i
+
+delete.exit.i.i.i:		; preds = %bb1.i.i.i.i, %bb10.i.i.i
+	br i1 false, label %delete_clauses.exit.i.i, label %bb1.i7.i.i.i
+
+bb1.i7.i.i.i:		; preds = %delete.exit.i.i.i
+	br label %delete_clauses.exit.i.i
+
+delete_clauses.exit.i.i:		; preds = %bb1.i7.i.i.i, %delete.exit.i.i.i
+	br label %bb3.i.i
+
+bb2.i.i:		; preds = %bb3.i.i
+	br i1 false, label %lrelease.exit.i.i, label %bb1.i.i23.i.i
+
+bb1.i.i23.i.i:		; preds = %bb2.i.i
+	br label %lrelease.exit.i.i
+
+lrelease.exit.i.i:		; preds = %bb1.i.i23.i.i, %bb2.i.i
+	br label %bb3.i.i
+
+bb3.i.i:		; preds = %lrelease.exit.i.i, %delete_clauses.exit.i.i
+	br i1 false, label %bb4.i.i, label %bb2.i.i
+
+bb4.i.i:		; preds = %bb3.i.i
+	br i1 false, label %delete.exit214.i.i, label %bb1.i208.i.i
+
+bb1.i208.i.i:		; preds = %bb4.i.i
+	br label %delete.exit214.i.i
+
+delete.exit214.i.i:		; preds = %bb1.i208.i.i, %bb4.i.i
+	br i1 false, label %delete.exit203.i.i, label %bb1.i197.i.i
+
+bb1.i197.i.i:		; preds = %delete.exit214.i.i
+	br label %delete.exit203.i.i
+
+delete.exit203.i.i:		; preds = %bb1.i197.i.i, %delete.exit214.i.i
+	br i1 false, label %delete.exit192.i.i, label %bb1.i186.i.i
+
+bb1.i186.i.i:		; preds = %delete.exit203.i.i
+	br label %delete.exit192.i.i
+
+delete.exit192.i.i:		; preds = %bb1.i186.i.i, %delete.exit203.i.i
+	br i1 false, label %delete.exit181.i.i, label %bb1.i175.i.i
+
+bb1.i175.i.i:		; preds = %delete.exit192.i.i
+	br label %delete.exit181.i.i
+
+delete.exit181.i.i:		; preds = %bb1.i175.i.i, %delete.exit192.i.i
+	br i1 false, label %delete.exit170.i.i, label %bb1.i164.i.i
+
+bb1.i164.i.i:		; preds = %delete.exit181.i.i
+	br label %delete.exit170.i.i
+
+delete.exit170.i.i:		; preds = %bb1.i164.i.i, %delete.exit181.i.i
+	br i1 false, label %delete.exit159.i.i, label %bb1.i153.i.i
+
+bb1.i153.i.i:		; preds = %delete.exit170.i.i
+	br label %delete.exit159.i.i
+
+delete.exit159.i.i:		; preds = %bb1.i153.i.i, %delete.exit170.i.i
+	br i1 false, label %delete.exit148.i.i, label %bb1.i142.i.i
+
+bb1.i142.i.i:		; preds = %delete.exit159.i.i
+	br label %delete.exit148.i.i
+
+delete.exit148.i.i:		; preds = %bb1.i142.i.i, %delete.exit159.i.i
+	br i1 false, label %delete.exit137.i.i, label %bb1.i131.i.i
+
+bb1.i131.i.i:		; preds = %delete.exit148.i.i
+	br label %delete.exit137.i.i
+
+delete.exit137.i.i:		; preds = %bb1.i131.i.i, %delete.exit148.i.i
+	br i1 false, label %delete.exit126.i.i, label %bb1.i120.i.i
+
+bb1.i120.i.i:		; preds = %delete.exit137.i.i
+	br label %delete.exit126.i.i
+
+delete.exit126.i.i:		; preds = %bb1.i120.i.i, %delete.exit137.i.i
+	br i1 false, label %delete.exit115.i.i, label %bb1.i109.i.i
+
+bb1.i109.i.i:		; preds = %delete.exit126.i.i
+	br label %delete.exit115.i.i
+
+delete.exit115.i.i:		; preds = %bb1.i109.i.i, %delete.exit126.i.i
+	br i1 false, label %delete.exit104.i.i, label %bb1.i98.i.i
+
+bb1.i98.i.i:		; preds = %delete.exit115.i.i
+	br label %delete.exit104.i.i
+
+delete.exit104.i.i:		; preds = %bb1.i98.i.i, %delete.exit115.i.i
+	br i1 false, label %delete.exit93.i.i, label %bb1.i87.i.i
+
+bb1.i87.i.i:		; preds = %delete.exit104.i.i
+	br label %delete.exit93.i.i
+
+delete.exit93.i.i:		; preds = %bb1.i87.i.i, %delete.exit104.i.i
+	br i1 false, label %delete.exit82.i.i, label %bb1.i76.i.i
+
+bb1.i76.i.i:		; preds = %delete.exit93.i.i
+	br label %delete.exit82.i.i
+
+delete.exit82.i.i:		; preds = %bb1.i76.i.i, %delete.exit93.i.i
+	br i1 false, label %delete.exit71.i.i, label %bb1.i65.i.i
+
+bb1.i65.i.i:		; preds = %delete.exit82.i.i
+	br label %delete.exit71.i.i
+
+delete.exit71.i.i:		; preds = %bb1.i65.i.i, %delete.exit82.i.i
+	br i1 false, label %delete.exit60.i.i, label %bb1.i54.i.i
+
+bb1.i54.i.i:		; preds = %delete.exit71.i.i
+	br label %delete.exit60.i.i
+
+delete.exit60.i.i:		; preds = %bb1.i54.i.i, %delete.exit71.i.i
+	br i1 false, label %delete.exit38.i.i, label %bb1.i32.i.i
+
+bb1.i32.i.i:		; preds = %delete.exit60.i.i
+	br label %delete.exit38.i.i
+
+delete.exit38.i.i:		; preds = %bb1.i32.i.i, %delete.exit60.i.i
+	br i1 false, label %delete.exit18.i.i, label %bb1.i12.i.i
+
+bb1.i12.i.i:		; preds = %delete.exit38.i.i
+	br label %delete.exit18.i.i
+
+delete.exit18.i.i:		; preds = %bb1.i12.i.i, %delete.exit38.i.i
+	br i1 false, label %picosat_reset.exit, label %bb1.i2.i.i
+
+bb1.i2.i.i:		; preds = %delete.exit18.i.i
+	br label %picosat_reset.exit
+
+picosat_reset.exit:		; preds = %bb1.i2.i.i, %delete.exit18.i.i
+	br label %bb171
+
+bb171:		; preds = %picosat_reset.exit, %bb110
+	br i1 false, label %bb173, label %bb172
+
+bb172:		; preds = %bb171
+	br label %bb173
+
+bb173:		; preds = %bb172, %bb171
+	br i1 false, label %bb175, label %bb174
+
+bb174:		; preds = %bb173
+	br label %bb175
+
+bb175:		; preds = %bb174, %bb173
+	br i1 false, label %bb177, label %bb176
+
+bb176:		; preds = %bb175
+	br label %bb177
+
+bb177:		; preds = %bb176, %bb175
+	br i1 false, label %bb179, label %bb178
+
+bb178:		; preds = %bb177
+	ret i32 0
+
+bb179:		; preds = %bb177
+	ret i32 0
+}
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	br label %bb2
+
+bb:		; preds = %bb2
+	br i1 false, label %bb3, label %bb2
+
+bb2:		; preds = %bb, %entry
+	br i1 false, label %bb5.loopexit, label %bb
+
+bb3:		; preds = %bb
+	br i1 false, label %bb5, label %bb4
+
+bb4:		; preds = %bb3
+	br label %bb5
+
+bb5.loopexit:		; preds = %bb2
+	br label %bb5
+
+bb5:		; preds = %bb5.loopexit, %bb4, %bb3
+	%0 = call fastcc i32 @picosat_main(i32 %argc, i8** %argv) nounwind		; <i32> [#uses=2]
+	br i1 false, label %bb7, label %bb6
+
+bb6:		; preds = %bb5
+	ret i32 %0
+
+bb7:		; preds = %bb5
+	ret i32 %0
+}
diff --git a/final/test/Transforms/BlockPlacement/basictest.ll b/final/test/Transforms/BlockPlacement/basictest.ll
new file mode 100644
index 00000000000..47b507903bc
--- /dev/null
+++ b/final/test/Transforms/BlockPlacement/basictest.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -block-placement -disable-output -print-function 2> /dev/null
+
+define i32 @test() {
+        br i1 true, label %X, label %Y
+
+A:              ; preds = %Y, %X
+        ret i32 0
+
+X:              ; preds = %0
+        br label %A
+
+Y:              ; preds = %0
+        br label %A
+}
+
diff --git a/final/test/Transforms/BlockPlacement/dg.exp b/final/test/Transforms/BlockPlacement/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/BlockPlacement/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll b/final/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
new file mode 100644
index 00000000000..9d82819f9db
--- /dev/null
+++ b/final/test/Transforms/BranchFolding/2007-10-19-InlineAsmDirectives.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -std-compile-opts -o - | llc -o - | grep bork_directive | wc -l | grep 2
+
+;; We don't want branch folding to fold asm directives.
+
+define void @bork(i32 %param) {
+entry:
+	%tmp = icmp eq i32 %param, 0
+        br i1 %tmp, label %cond_true, label %cond_false
+
+cond_true:   
+        call void asm sideeffect ".bork_directive /* ${0:c}:${1:c} */", "i,i,~{dirflag},~{fpsr},~{flags}"( i32 37, i32 927 )
+        ret void
+
+cond_false:
+	call void asm sideeffect ".foo_directive ${0:c}:${1:c}", "i,i,~{dirflag},~{fpsr},~{flags}"( i32 37, i32 927 )
+        call void asm sideeffect ".bork_directive /* ${0:c}:${1:c} */", "i,i,~{dirflag},~{fpsr},~{flags}"( i32 37, i32 927 )
+        ret void
+}
diff --git a/final/test/Transforms/CodeExtractor/2004-03-13-LoopExtractorCrash.ll b/final/test/Transforms/CodeExtractor/2004-03-13-LoopExtractorCrash.ll
new file mode 100644
index 00000000000..3d0339bc2db
--- /dev/null
+++ b/final/test/Transforms/CodeExtractor/2004-03-13-LoopExtractorCrash.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -loop-extract -disable-output
+
+define void @solve() {
+entry:
+	br label %loopentry.0
+
+loopentry.0:		; preds = %endif.0, %entry
+	br i1 false, label %no_exit.0, label %loopexit.0
+
+no_exit.0:		; preds = %loopentry.0
+	br i1 false, label %then.0, label %endif.0
+
+then.0:		; preds = %no_exit.0
+	br i1 false, label %shortcirc_done, label %shortcirc_next
+
+shortcirc_next:		; preds = %then.0
+	br label %shortcirc_done
+
+shortcirc_done:		; preds = %shortcirc_next, %then.0
+	br i1 false, label %then.1, label %endif.1
+
+then.1:		; preds = %shortcirc_done
+	br i1 false, label %cond_true, label %cond_false
+
+cond_true:		; preds = %then.1
+	br label %cond_continue
+
+cond_false:		; preds = %then.1
+	br label %cond_continue
+
+cond_continue:		; preds = %cond_false, %cond_true
+	br label %return
+
+after_ret.0:		; No predecessors!
+	br label %endif.1
+
+endif.1:		; preds = %after_ret.0, %shortcirc_done
+	br label %endif.0
+
+endif.0:		; preds = %endif.1, %no_exit.0
+	br label %loopentry.0
+
+loopexit.0:		; preds = %loopentry.0
+	br i1 false, label %then.2, label %endif.2
+
+then.2:		; preds = %loopexit.0
+	br i1 false, label %then.3, label %endif.3
+
+then.3:		; preds = %then.2
+	br label %return
+
+after_ret.1:		; No predecessors!
+	br label %endif.3
+
+endif.3:		; preds = %after_ret.1, %then.2
+	br label %endif.2
+
+endif.2:		; preds = %endif.3, %loopexit.0
+	br label %loopentry.1
+
+loopentry.1:		; preds = %no_exit.1, %endif.2
+	br i1 false, label %no_exit.1, label %loopexit.1
+
+no_exit.1:		; preds = %loopentry.1
+	br label %loopentry.1
+
+loopexit.1:		; preds = %loopentry.1
+	br label %return
+
+after_ret.2:		; No predecessors!
+	br label %return
+
+return:		; preds = %after_ret.2, %loopexit.1, %then.3, %cond_continue
+	ret void
+}
diff --git a/final/test/Transforms/CodeExtractor/2004-03-14-DominanceProblem.ll b/final/test/Transforms/CodeExtractor/2004-03-14-DominanceProblem.ll
new file mode 100644
index 00000000000..a6ee63ec45a
--- /dev/null
+++ b/final/test/Transforms/CodeExtractor/2004-03-14-DominanceProblem.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -loop-extract -disable-output
+; This testcase is failing the loop extractor because not all exit blocks 
+; are dominated by all of the live-outs.
+
+define i32 @ab(i32 %alpha, i32 %beta) {
+entry:
+        br label %loopentry.1.preheader
+
+loopentry.1.preheader:          ; preds = %entry
+        br label %loopentry.1
+
+loopentry.1:            ; preds = %no_exit.1, %loopentry.1.preheader
+        br i1 false, label %no_exit.1, label %loopexit.0.loopexit1
+
+no_exit.1:              ; preds = %loopentry.1
+        %tmp.53 = load i32* null                ; <i32> [#uses=1]
+        br i1 false, label %shortcirc_next.2, label %loopentry.1
+
+shortcirc_next.2:               ; preds = %no_exit.1
+        %tmp.563 = call i32 @wins( i32 0, i32 %tmp.53, i32 3 )          ; <i32> [#uses=0]
+        ret i32 0
+
+loopexit.0.loopexit1:           ; preds = %loopentry.1
+        br label %loopexit.0
+
+loopexit.0:             ; preds = %loopexit.0.loopexit1
+        ret i32 0
+}
+
+declare i32 @wins(i32, i32, i32)
+
+declare i16 @ab_code()
+
diff --git a/final/test/Transforms/CodeExtractor/2004-03-14-NoSwitchSupport.ll b/final/test/Transforms/CodeExtractor/2004-03-14-NoSwitchSupport.ll
new file mode 100644
index 00000000000..7cd72797a67
--- /dev/null
+++ b/final/test/Transforms/CodeExtractor/2004-03-14-NoSwitchSupport.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -loop-extract-single -disable-output
+
+define void @ab() {
+entry:
+        br label %codeReplTail
+
+then.1:         ; preds = %codeReplTail
+        br label %loopentry.1
+
+loopentry.1:            ; preds = %no_exit.1, %then.1
+        br i1 false, label %no_exit.1, label %loopexit.0.loopexit1
+
+no_exit.1:              ; preds = %loopentry.1
+        br label %loopentry.1
+
+loopexit.0.loopexit:            ; preds = %codeReplTail
+        ret void
+
+loopexit.0.loopexit1:           ; preds = %loopentry.1
+        ret void
+
+codeReplTail:           ; preds = %codeReplTail, %entry
+        switch i16 0, label %codeReplTail [
+                 i16 0, label %loopexit.0.loopexit
+                 i16 1, label %then.1
+        ]
+}
+
diff --git a/final/test/Transforms/CodeExtractor/2004-03-17-MissedLiveIns.ll b/final/test/Transforms/CodeExtractor/2004-03-17-MissedLiveIns.ll
new file mode 100644
index 00000000000..01fe54be29a
--- /dev/null
+++ b/final/test/Transforms/CodeExtractor/2004-03-17-MissedLiveIns.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -loop-extract -disable-output
+
+define void @sendMTFValues() {
+entry:
+	br i1 false, label %then.1, label %endif.1
+
+then.1:		; preds = %entry
+	br i1 false, label %loopentry.6.preheader, label %else.0
+
+endif.1:		; preds = %entry
+	ret void
+
+else.0:		; preds = %then.1
+	ret void
+
+loopentry.6.preheader:		; preds = %then.1
+	br i1 false, label %endif.7.preheader, label %loopexit.9
+
+endif.7.preheader:		; preds = %loopentry.6.preheader
+	%tmp.183 = add i32 0, -1		; <i32> [#uses=1]
+	br label %endif.7
+
+endif.7:		; preds = %loopexit.15, %endif.7.preheader
+	br i1 false, label %loopentry.10, label %loopentry.12
+
+loopentry.10:		; preds = %endif.7
+	br label %loopentry.12
+
+loopentry.12:		; preds = %loopentry.10, %endif.7
+	%ge.2.1 = phi i32 [ 0, %loopentry.10 ], [ %tmp.183, %endif.7 ]		; <i32> [#uses=0]
+	br i1 false, label %loopexit.14, label %no_exit.11
+
+no_exit.11:		; preds = %loopentry.12
+	ret void
+
+loopexit.14:		; preds = %loopentry.12
+	br i1 false, label %loopexit.15, label %no_exit.14
+
+no_exit.14:		; preds = %loopexit.14
+	ret void
+
+loopexit.15:		; preds = %loopexit.14
+	br i1 false, label %endif.7, label %loopexit.9
+
+loopexit.9:		; preds = %loopexit.15, %loopentry.6.preheader
+	ret void
+}
diff --git a/final/test/Transforms/CodeExtractor/2004-03-17-OutputMismatch.ll b/final/test/Transforms/CodeExtractor/2004-03-17-OutputMismatch.ll
new file mode 100644
index 00000000000..0fbd3307ae3
--- /dev/null
+++ b/final/test/Transforms/CodeExtractor/2004-03-17-OutputMismatch.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -loop-extract -disable-output
+
+%struct.node_t = type { double*, %struct.node_t*, %struct.node_t**, double**, double*, i32, i32 }
+%struct.table_t = type { [1 x %struct.node_t**], [1 x %struct.node_t**] }
+
+define void @make_tables() {
+entry:
+        %tmp.0.i = malloc %struct.node_t                ; <%struct.node_t*> [#uses=1]
+        br i1 false, label %no_exit.i, label %loopexit.i
+
+no_exit.i:              ; preds = %no_exit.i, %entry
+        %prev_node.0.i.1 = phi %struct.node_t* [ %tmp.16.i, %no_exit.i ], [ %tmp.0.i, %entry ]          ; <%struct.node_t*> [#uses=0]
+        %tmp.16.i = malloc %struct.node_t               ; <%struct.node_t*> [#uses=2]
+        br i1 false, label %no_exit.i, label %loopexit.i
+
+loopexit.i:             ; preds = %no_exit.i, %entry
+        %cur_node.0.i.0 = phi %struct.node_t* [ null, %entry ], [ %tmp.16.i, %no_exit.i ]               ; <%struct.node_t*> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/Transforms/CodeExtractor/2004-03-17-UpdatePHIsOutsideRegion.ll b/final/test/Transforms/CodeExtractor/2004-03-17-UpdatePHIsOutsideRegion.ll
new file mode 100644
index 00000000000..6b306d232e0
--- /dev/null
+++ b/final/test/Transforms/CodeExtractor/2004-03-17-UpdatePHIsOutsideRegion.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -loop-extract -disable-output
+
+define void @maketree() {
+entry:
+        br i1 false, label %no_exit.1, label %loopexit.0
+
+no_exit.1:              ; preds = %endif, %expandbox.entry, %entry
+        br i1 false, label %endif, label %expandbox.entry
+
+expandbox.entry:                ; preds = %no_exit.1
+        br i1 false, label %loopexit.1, label %no_exit.1
+
+endif:          ; preds = %no_exit.1
+        br i1 false, label %loopexit.1, label %no_exit.1
+
+loopexit.1:             ; preds = %endif, %expandbox.entry
+        %ic.i.0.0.4 = phi i32 [ 0, %expandbox.entry ], [ 0, %endif ]            ; <i32> [#uses=0]
+        ret void
+
+loopexit.0:             ; preds = %entry
+        ret void
+}
+
diff --git a/final/test/Transforms/CodeExtractor/2004-03-18-InvokeHandling.ll b/final/test/Transforms/CodeExtractor/2004-03-18-InvokeHandling.ll
new file mode 100644
index 00000000000..91e9799ad9c
--- /dev/null
+++ b/final/test/Transforms/CodeExtractor/2004-03-18-InvokeHandling.ll
@@ -0,0 +1,194 @@
+; RUN: opt < %s -loop-extract -disable-output
+
+declare i32 @_IO_getc()
+
+declare void @__errno_location()
+
+define void @yylex() {
+entry:
+	switch i32 0, label %label.126 [
+		 i32 0, label %return
+		 i32 61, label %combine
+		 i32 33, label %combine
+		 i32 94, label %combine
+		 i32 37, label %combine
+		 i32 47, label %combine
+		 i32 42, label %combine
+		 i32 62, label %combine
+		 i32 60, label %combine
+		 i32 58, label %combine
+		 i32 124, label %combine
+		 i32 38, label %combine
+		 i32 45, label %combine
+		 i32 43, label %combine
+		 i32 34, label %string_constant
+		 i32 39, label %char_constant
+		 i32 46, label %loopexit.2
+		 i32 57, label %loopexit.2
+		 i32 56, label %loopexit.2
+		 i32 55, label %loopexit.2
+		 i32 54, label %loopexit.2
+		 i32 53, label %loopexit.2
+		 i32 52, label %loopexit.2
+		 i32 51, label %loopexit.2
+		 i32 50, label %loopexit.2
+		 i32 49, label %loopexit.2
+		 i32 48, label %loopexit.2
+		 i32 95, label %letter
+		 i32 122, label %letter
+		 i32 121, label %letter
+		 i32 120, label %letter
+		 i32 119, label %letter
+		 i32 118, label %letter
+		 i32 117, label %letter
+		 i32 116, label %letter
+		 i32 115, label %letter
+		 i32 114, label %letter
+		 i32 113, label %letter
+		 i32 112, label %letter
+		 i32 111, label %letter
+		 i32 110, label %letter
+		 i32 109, label %letter
+		 i32 108, label %letter
+		 i32 107, label %letter
+		 i32 106, label %letter
+		 i32 105, label %letter
+		 i32 104, label %letter
+		 i32 103, label %letter
+		 i32 102, label %letter
+		 i32 101, label %letter
+		 i32 100, label %letter
+		 i32 99, label %letter
+		 i32 98, label %letter
+		 i32 97, label %letter
+		 i32 90, label %letter
+		 i32 89, label %letter
+		 i32 88, label %letter
+		 i32 87, label %letter
+		 i32 86, label %letter
+		 i32 85, label %letter
+		 i32 84, label %letter
+		 i32 83, label %letter
+		 i32 82, label %letter
+		 i32 81, label %letter
+		 i32 80, label %letter
+		 i32 79, label %letter
+		 i32 78, label %letter
+		 i32 77, label %letter
+		 i32 75, label %letter
+		 i32 74, label %letter
+		 i32 73, label %letter
+		 i32 72, label %letter
+		 i32 71, label %letter
+		 i32 70, label %letter
+		 i32 69, label %letter
+		 i32 68, label %letter
+		 i32 67, label %letter
+		 i32 66, label %letter
+		 i32 65, label %letter
+		 i32 64, label %label.13
+		 i32 76, label %label.12
+		 i32 36, label %label.11
+		 i32 -1, label %label.10
+	]
+
+label.10:		; preds = %entry
+	ret void
+
+label.11:		; preds = %entry
+	ret void
+
+label.12:		; preds = %entry
+	ret void
+
+label.13:		; preds = %entry
+	ret void
+
+letter:		; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
+	ret void
+
+loopexit.2:		; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
+	switch i32 0, label %shortcirc_next.14 [
+		 i32 48, label %then.20
+		 i32 46, label %endif.38
+	]
+
+then.20:		; preds = %loopexit.2
+	switch i32 0, label %else.4 [
+		 i32 120, label %then.21
+		 i32 88, label %then.21
+	]
+
+then.21:		; preds = %then.20, %then.20
+	ret void
+
+else.4:		; preds = %then.20
+	ret void
+
+shortcirc_next.14:		; preds = %loopexit.2
+	ret void
+
+endif.38:		; preds = %loopexit.2
+	br i1 false, label %then.40, label %then.39
+
+then.39:		; preds = %endif.38
+	ret void
+
+then.40:		; preds = %endif.38
+	invoke void @__errno_location( )
+			to label %switchexit.2 unwind label %LongJmpBlkPre
+
+loopentry.6:		; preds = %endif.52
+	switch i32 0, label %switchexit.2 [
+		 i32 73, label %label.82
+		 i32 105, label %label.82
+		 i32 76, label %label.80
+		 i32 108, label %label.80
+		 i32 70, label %label.78
+		 i32 102, label %label.78
+	]
+
+label.78:		; preds = %loopentry.6, %loopentry.6
+	ret void
+
+label.80:		; preds = %loopentry.6, %loopentry.6
+	ret void
+
+label.82:		; preds = %loopentry.6, %loopentry.6
+	%c.0.15.5 = phi i32 [ %tmp.79417, %loopentry.6 ], [ %tmp.79417, %loopentry.6 ]		; <i32> [#uses=0]
+	ret void
+
+switchexit.2:		; preds = %loopentry.6, %then.40
+	br i1 false, label %endif.51, label %loopexit.6
+
+endif.51:		; preds = %switchexit.2
+	br i1 false, label %endif.52, label %then.52
+
+then.52:		; preds = %endif.51
+	ret void
+
+endif.52:		; preds = %endif.51
+	%tmp.79417 = invoke i32 @_IO_getc( )
+			to label %loopentry.6 unwind label %LongJmpBlkPre		; <i32> [#uses=2]
+
+loopexit.6:		; preds = %switchexit.2
+	ret void
+
+char_constant:		; preds = %entry
+	ret void
+
+string_constant:		; preds = %entry
+	ret void
+
+combine:		; preds = %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry, %entry
+	ret void
+
+label.126:		; preds = %entry
+	ret void
+
+return:		; preds = %entry
+	ret void
+
+LongJmpBlkPre:		; preds = %endif.52, %then.40
+	ret void
+}
diff --git a/final/test/Transforms/CodeExtractor/2004-08-12-BlockExtractPHI.ll b/final/test/Transforms/CodeExtractor/2004-08-12-BlockExtractPHI.ll
new file mode 100644
index 00000000000..9f70bdc71b1
--- /dev/null
+++ b/final/test/Transforms/CodeExtractor/2004-08-12-BlockExtractPHI.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -extract-blocks -disable-output
+
+define void @test1() {
+no_exit.0.i:
+        br i1 false, label %yylex.entry, label %yylex.entry
+
+yylex.entry:            ; preds = %no_exit.0.i, %no_exit.0.i
+        %tmp.1027 = phi i32 [ 0, %no_exit.0.i ], [ 0, %no_exit.0.i ]            ; <i32> [#uses=0]
+        ret void
+}
+
+define void @test2() {
+no_exit.0.i:
+        switch i32 0, label %yylex.entry [
+                 i32 0, label %yylex.entry
+                 i32 1, label %foo
+        ]
+
+yylex.entry:            ; preds = %no_exit.0.i, %no_exit.0.i
+        %tmp.1027 = phi i32 [ 0, %no_exit.0.i ], [ 0, %no_exit.0.i ]            ; <i32> [#uses=0]
+        ret void
+
+foo:            ; preds = %no_exit.0.i
+        ret void
+}
+
diff --git a/final/test/Transforms/CodeExtractor/2004-11-12-InvokeExtract.ll b/final/test/Transforms/CodeExtractor/2004-11-12-InvokeExtract.ll
new file mode 100644
index 00000000000..fc58577f67a
--- /dev/null
+++ b/final/test/Transforms/CodeExtractor/2004-11-12-InvokeExtract.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -extract-blocks -disable-output
+define i32 @foo() {
+        br label %EB
+
+EB:             ; preds = %0
+        %V = invoke i32 @foo( )
+                        to label %Cont unwind label %Unw                ; <i32> [#uses=1]
+
+Cont:           ; preds = %EB
+        ret i32 %V
+
+Unw:            ; preds = %EB
+        unwind
+}
+
diff --git a/final/test/Transforms/CodeExtractor/dg.exp b/final/test/Transforms/CodeExtractor/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/CodeExtractor/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/CodeGenPrepare/2008-11-24-RAUW-Self.ll b/final/test/Transforms/CodeGenPrepare/2008-11-24-RAUW-Self.ll
new file mode 100644
index 00000000000..1995c7fda29
--- /dev/null
+++ b/final/test/Transforms/CodeGenPrepare/2008-11-24-RAUW-Self.ll
@@ -0,0 +1,511 @@
+; RUN: opt < %s -codegenprepare | llvm-dis
+; PR3113
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define fastcc i32 @ascii2flt(i8* %str) nounwind {
+entry:
+	br label %bb2.i
+
+bb2.i:		; preds = %bb4.i.bb2.i_crit_edge, %entry
+	br i1 false, label %bb4.i, label %base2flt.exit
+
+bb4.i:		; preds = %bb2.i
+	br i1 false, label %bb11.i, label %bb4.i.bb2.i_crit_edge
+
+bb4.i.bb2.i_crit_edge:		; preds = %bb4.i
+	br label %bb2.i
+
+bb11.i:		; preds = %bb4.i
+	br label %bb11.i.base2flt.exit204_crit_edge
+
+bb11.i.base2flt.exit204_crit_edge:		; preds = %bb11.i
+	br label %base2flt.exit204
+
+bb11.i.bb7.i197_crit_edge:		; No predecessors!
+	br label %bb7.i197
+
+base2flt.exit:		; preds = %bb2.i
+	br label %base2flt.exit.base2flt.exit204_crit_edge
+
+base2flt.exit.base2flt.exit204_crit_edge:		; preds = %base2flt.exit
+	br label %base2flt.exit204
+
+base2flt.exit.bb7.i197_crit_edge:		; No predecessors!
+	br label %bb7.i197
+
+bb10.i196:		; preds = %bb7.i197
+	br label %bb10.i196.base2flt.exit204_crit_edge
+
+bb10.i196.base2flt.exit204_crit_edge:		; preds = %bb7.i197, %bb10.i196
+	br label %base2flt.exit204
+
+bb10.i196.bb7.i197_crit_edge:		; No predecessors!
+	br label %bb7.i197
+
+bb7.i197:		; preds = %bb10.i196.bb7.i197_crit_edge, %base2flt.exit.bb7.i197_crit_edge, %bb11.i.bb7.i197_crit_edge
+	%.reg2mem.0 = phi i32 [ 0, %base2flt.exit.bb7.i197_crit_edge ], [ %.reg2mem.0, %bb10.i196.bb7.i197_crit_edge ], [ 0, %bb11.i.bb7.i197_crit_edge ]		; <i32> [#uses=1]
+	br i1 undef, label %bb10.i196.base2flt.exit204_crit_edge, label %bb10.i196
+
+base2flt.exit204:		; preds = %bb10.i196.base2flt.exit204_crit_edge, %base2flt.exit.base2flt.exit204_crit_edge, %bb11.i.base2flt.exit204_crit_edge
+	br i1 false, label %base2flt.exit204.bb8_crit_edge, label %bb
+
+base2flt.exit204.bb8_crit_edge:		; preds = %base2flt.exit204
+	br label %bb8
+
+bb:		; preds = %base2flt.exit204
+	br i1 false, label %bb.bb18_crit_edge, label %bb1.i
+
+bb.bb18_crit_edge:		; preds = %bb9, %bb
+	br label %bb18
+
+bb1.i:		; preds = %bb
+	br i1 false, label %bb1.i.bb7_crit_edge, label %bb1.i158
+
+bb1.i.bb7_crit_edge.loopexit:		; preds = %bb2.i164
+	br label %bb1.i.bb7_crit_edge
+
+bb1.i.bb7_crit_edge:		; preds = %bb1.i.bb7_crit_edge.loopexit, %bb1.i
+	br label %bb7.preheader
+
+bb1.i158:		; preds = %bb1.i
+	br i1 false, label %bb1.i158.bb10.i179_crit_edge, label %bb1.i158.bb2.i164_crit_edge
+
+bb1.i158.bb2.i164_crit_edge:		; preds = %bb1.i158
+	br label %bb2.i164
+
+bb1.i158.bb10.i179_crit_edge:		; preds = %bb1.i158
+	br label %bb10.i179
+
+bb2.i164:		; preds = %bb4.i166.bb2.i164_crit_edge, %bb1.i158.bb2.i164_crit_edge
+	br i1 false, label %bb4.i166, label %bb1.i.bb7_crit_edge.loopexit
+
+bb4.i166:		; preds = %bb2.i164
+	br i1 false, label %bb4.i166.bb11.i172_crit_edge, label %bb4.i166.bb2.i164_crit_edge
+
+bb4.i166.bb2.i164_crit_edge:		; preds = %bb4.i166
+	br label %bb2.i164
+
+bb4.i166.bb11.i172_crit_edge:		; preds = %bb4.i166
+	br label %bb11.i172
+
+bb11.i172:		; preds = %bb10.i179.bb11.i172_crit_edge, %bb4.i166.bb11.i172_crit_edge
+	br label %bb7.preheader
+
+bb10.i179:		; preds = %bb9.i182, %bb1.i158.bb10.i179_crit_edge
+	br i1 false, label %bb7.i180, label %bb10.i179.bb11.i172_crit_edge
+
+bb10.i179.bb11.i172_crit_edge:		; preds = %bb10.i179
+	br label %bb11.i172
+
+bb7.i180:		; preds = %bb10.i179
+	br i1 false, label %bb7.i180.bb7_crit_edge, label %bb9.i182
+
+bb7.i180.bb7_crit_edge:		; preds = %bb7.i180
+	br label %bb7.preheader
+
+bb7.preheader:		; preds = %bb7.i180.bb7_crit_edge, %bb11.i172, %bb1.i.bb7_crit_edge
+	br label %bb7
+
+bb9.i182:		; preds = %bb7.i180
+	br label %bb10.i179
+
+bb7:		; preds = %addflt.exit114, %bb7.preheader
+	switch i8 0, label %bb4 [
+		i8 0, label %bb7.bb8_crit_edge
+		i8 46, label %bb7.bb8_crit_edge
+	]
+
+bb7.bb8_crit_edge:		; preds = %bb7, %bb7
+	br label %bb8
+
+bb4:		; preds = %bb7
+	br i1 false, label %bb18.loopexit1, label %bb1.i5
+
+bb1.i5:		; preds = %bb4
+	br i1 false, label %bb1.i5.mulflt.exit157_crit_edge, label %bb3.i147
+
+bb1.i5.mulflt.exit157_crit_edge:		; preds = %bb5.i148, %bb1.i5
+	br label %mulflt.exit157
+
+bb3.i147:		; preds = %bb1.i5
+	br i1 false, label %bb3.i147.mulflt.exit157_crit_edge, label %bb5.i148
+
+bb3.i147.mulflt.exit157_crit_edge:		; preds = %bb8.i150, %bb3.i147
+	br label %mulflt.exit157
+
+bb5.i148:		; preds = %bb3.i147
+	br i1 false, label %bb1.i5.mulflt.exit157_crit_edge, label %bb7.i149
+
+bb7.i149:		; preds = %bb5.i148
+	br i1 false, label %bb8.i150, label %bb7.i149.bb12.i154_crit_edge
+
+bb7.i149.bb12.i154_crit_edge:		; preds = %bb7.i149
+	br label %bb12.i154
+
+bb8.i150:		; preds = %bb7.i149
+	br i1 false, label %bb3.i147.mulflt.exit157_crit_edge, label %bb10.i151
+
+bb10.i151:		; preds = %bb8.i150
+	br label %bb12.i154
+
+bb12.i154:		; preds = %bb10.i151, %bb7.i149.bb12.i154_crit_edge
+	br label %mulflt.exit157
+
+mulflt.exit157:		; preds = %bb12.i154, %bb3.i147.mulflt.exit157_crit_edge, %bb1.i5.mulflt.exit157_crit_edge
+	br i1 false, label %mulflt.exit157.base2flt.exit144_crit_edge, label %bb1.i115
+
+mulflt.exit157.base2flt.exit144_crit_edge.loopexit:		; preds = %bb2.i121
+	br label %mulflt.exit157.base2flt.exit144_crit_edge
+
+mulflt.exit157.base2flt.exit144_crit_edge:		; preds = %mulflt.exit157.base2flt.exit144_crit_edge.loopexit, %mulflt.exit157
+	br label %base2flt.exit144
+
+bb1.i115:		; preds = %mulflt.exit157
+	br i1 false, label %bb1.i115.bb10.i136_crit_edge, label %bb1.i115.bb2.i121_crit_edge
+
+bb1.i115.bb2.i121_crit_edge:		; preds = %bb1.i115
+	br label %bb2.i121
+
+bb1.i115.bb10.i136_crit_edge:		; preds = %bb1.i115
+	br label %bb10.i136
+
+bb2.i121:		; preds = %bb4.i123.bb2.i121_crit_edge, %bb1.i115.bb2.i121_crit_edge
+	br i1 false, label %bb4.i123, label %mulflt.exit157.base2flt.exit144_crit_edge.loopexit
+
+bb4.i123:		; preds = %bb2.i121
+	br i1 false, label %bb4.i123.bb11.i129_crit_edge, label %bb4.i123.bb2.i121_crit_edge
+
+bb4.i123.bb2.i121_crit_edge:		; preds = %bb4.i123
+	br label %bb2.i121
+
+bb4.i123.bb11.i129_crit_edge:		; preds = %bb4.i123
+	br label %bb11.i129
+
+bb11.i129:		; preds = %bb10.i136.bb11.i129_crit_edge, %bb4.i123.bb11.i129_crit_edge
+	br label %base2flt.exit144
+
+bb10.i136:		; preds = %bb9.i139, %bb1.i115.bb10.i136_crit_edge
+	br i1 false, label %bb7.i137, label %bb10.i136.bb11.i129_crit_edge
+
+bb10.i136.bb11.i129_crit_edge:		; preds = %bb10.i136
+	br label %bb11.i129
+
+bb7.i137:		; preds = %bb10.i136
+	br i1 false, label %bb7.i137.base2flt.exit144_crit_edge, label %bb9.i139
+
+bb7.i137.base2flt.exit144_crit_edge:		; preds = %bb7.i137
+	br label %base2flt.exit144
+
+bb9.i139:		; preds = %bb7.i137
+	br label %bb10.i136
+
+base2flt.exit144:		; preds = %bb7.i137.base2flt.exit144_crit_edge, %bb11.i129, %mulflt.exit157.base2flt.exit144_crit_edge
+	br i1 false, label %base2flt.exit144.addflt.exit114_crit_edge, label %bb3.i105
+
+base2flt.exit144.addflt.exit114_crit_edge:		; preds = %bb3.i105, %base2flt.exit144
+	br label %addflt.exit114
+
+bb3.i105:		; preds = %base2flt.exit144
+	br i1 false, label %base2flt.exit144.addflt.exit114_crit_edge, label %bb5.i106
+
+bb5.i106:		; preds = %bb3.i105
+	br i1 false, label %bb5.i106.bb9.i111_crit_edge, label %bb6.i107
+
+bb5.i106.bb9.i111_crit_edge:		; preds = %bb5.i106
+	br label %bb9.i111
+
+bb6.i107:		; preds = %bb5.i106
+	br i1 false, label %bb6.i107.addflt.exit114_crit_edge, label %bb8.i108
+
+bb6.i107.addflt.exit114_crit_edge:		; preds = %bb6.i107
+	br label %addflt.exit114
+
+bb8.i108:		; preds = %bb6.i107
+	br label %bb9.i111
+
+bb9.i111:		; preds = %bb8.i108, %bb5.i106.bb9.i111_crit_edge
+	br label %addflt.exit114
+
+addflt.exit114:		; preds = %bb9.i111, %bb6.i107.addflt.exit114_crit_edge, %base2flt.exit144.addflt.exit114_crit_edge
+	br label %bb7
+
+bb18.loopexit1:		; preds = %bb4
+	ret i32 -1
+
+bb18:		; preds = %bb8.bb18_crit_edge, %bb.bb18_crit_edge
+	ret i32 0
+
+bb8:		; preds = %bb7.bb8_crit_edge, %base2flt.exit204.bb8_crit_edge
+	br i1 false, label %bb9, label %bb8.bb18_crit_edge
+
+bb8.bb18_crit_edge:		; preds = %bb8
+	br label %bb18
+
+bb9:		; preds = %bb8
+	br i1 false, label %bb.bb18_crit_edge, label %bb1.i13
+
+bb1.i13:		; preds = %bb9
+	br i1 false, label %bb1.i13.base2flt.exit102_crit_edge, label %bb1.i73
+
+bb1.i13.base2flt.exit102_crit_edge.loopexit:		; preds = %bb2.i79
+	br label %bb1.i13.base2flt.exit102_crit_edge
+
+bb1.i13.base2flt.exit102_crit_edge:		; preds = %bb1.i13.base2flt.exit102_crit_edge.loopexit, %bb1.i13
+	br label %base2flt.exit102
+
+bb1.i73:		; preds = %bb1.i13
+	br i1 false, label %bb1.i73.bb10.i94_crit_edge, label %bb1.i73.bb2.i79_crit_edge
+
+bb1.i73.bb2.i79_crit_edge:		; preds = %bb1.i73
+	br label %bb2.i79
+
+bb1.i73.bb10.i94_crit_edge:		; preds = %bb1.i73
+	br label %bb10.i94
+
+bb2.i79:		; preds = %bb4.i81.bb2.i79_crit_edge, %bb1.i73.bb2.i79_crit_edge
+	br i1 false, label %bb4.i81, label %bb1.i13.base2flt.exit102_crit_edge.loopexit
+
+bb4.i81:		; preds = %bb2.i79
+	br i1 false, label %bb4.i81.bb11.i87_crit_edge, label %bb4.i81.bb2.i79_crit_edge
+
+bb4.i81.bb2.i79_crit_edge:		; preds = %bb4.i81
+	br label %bb2.i79
+
+bb4.i81.bb11.i87_crit_edge:		; preds = %bb4.i81
+	br label %bb11.i87
+
+bb11.i87:		; preds = %bb10.i94.bb11.i87_crit_edge, %bb4.i81.bb11.i87_crit_edge
+	br label %base2flt.exit102
+
+bb10.i94:		; preds = %bb9.i97, %bb1.i73.bb10.i94_crit_edge
+	br i1 false, label %bb7.i95, label %bb10.i94.bb11.i87_crit_edge
+
+bb10.i94.bb11.i87_crit_edge:		; preds = %bb10.i94
+	br label %bb11.i87
+
+bb7.i95:		; preds = %bb10.i94
+	br i1 false, label %bb7.i95.base2flt.exit102_crit_edge, label %bb9.i97
+
+bb7.i95.base2flt.exit102_crit_edge:		; preds = %bb7.i95
+	br label %base2flt.exit102
+
+bb9.i97:		; preds = %bb7.i95
+	br label %bb10.i94
+
+base2flt.exit102:		; preds = %bb7.i95.base2flt.exit102_crit_edge, %bb11.i87, %bb1.i13.base2flt.exit102_crit_edge
+	br i1 false, label %base2flt.exit102.mulflt.exit72_crit_edge, label %bb3.i62
+
+base2flt.exit102.mulflt.exit72_crit_edge:		; preds = %bb5.i63, %base2flt.exit102
+	br label %mulflt.exit72
+
+bb3.i62:		; preds = %base2flt.exit102
+	br i1 false, label %bb3.i62.mulflt.exit72_crit_edge, label %bb5.i63
+
+bb3.i62.mulflt.exit72_crit_edge:		; preds = %bb8.i65, %bb3.i62
+	br label %mulflt.exit72
+
+bb5.i63:		; preds = %bb3.i62
+	br i1 false, label %base2flt.exit102.mulflt.exit72_crit_edge, label %bb7.i64
+
+bb7.i64:		; preds = %bb5.i63
+	br i1 false, label %bb8.i65, label %bb7.i64.bb12.i69_crit_edge
+
+bb7.i64.bb12.i69_crit_edge:		; preds = %bb7.i64
+	br label %bb12.i69
+
+bb8.i65:		; preds = %bb7.i64
+	br i1 false, label %bb3.i62.mulflt.exit72_crit_edge, label %bb10.i66
+
+bb10.i66:		; preds = %bb8.i65
+	br label %bb12.i69
+
+bb12.i69:		; preds = %bb10.i66, %bb7.i64.bb12.i69_crit_edge
+	br label %mulflt.exit72
+
+mulflt.exit72:		; preds = %bb12.i69, %bb3.i62.mulflt.exit72_crit_edge, %base2flt.exit102.mulflt.exit72_crit_edge
+	br i1 false, label %mulflt.exit72.bb10.i58_crit_edge, label %bb3.i50
+
+mulflt.exit72.bb10.i58_crit_edge:		; preds = %bb3.i50, %mulflt.exit72
+	br label %bb10.i58
+
+bb3.i50:		; preds = %mulflt.exit72
+	br i1 false, label %mulflt.exit72.bb10.i58_crit_edge, label %bb5.i51
+
+bb5.i51:		; preds = %bb3.i50
+	br i1 false, label %bb5.i51.bb9.i56_crit_edge, label %bb6.i52
+
+bb5.i51.bb9.i56_crit_edge:		; preds = %bb5.i51
+	br label %bb9.i56
+
+bb6.i52:		; preds = %bb5.i51
+	br i1 false, label %bb6.i52.bb10.i58_crit_edge, label %bb8.i53
+
+bb6.i52.bb10.i58_crit_edge:		; preds = %bb6.i52
+	br label %bb10.i58
+
+bb8.i53:		; preds = %bb6.i52
+	br label %bb9.i56
+
+bb9.i56:		; preds = %bb8.i53, %bb5.i51.bb9.i56_crit_edge
+	br label %bb15.preheader
+
+bb10.i58:		; preds = %bb6.i52.bb10.i58_crit_edge, %mulflt.exit72.bb10.i58_crit_edge
+	br label %bb15.preheader
+
+bb15.preheader:		; preds = %bb10.i58, %bb9.i56
+	br label %bb15
+
+bb15:		; preds = %addflt.exit, %bb15.preheader
+	br i1 false, label %bb15.bb18.loopexit_crit_edge, label %bb12
+
+bb15.bb18.loopexit_crit_edge:		; preds = %bb15
+	br label %bb18.loopexit
+
+bb12:		; preds = %bb15
+	br i1 false, label %bb12.bb18.loopexit_crit_edge, label %bb1.i21
+
+bb12.bb18.loopexit_crit_edge:		; preds = %bb12
+	br label %bb18.loopexit
+
+bb1.i21:		; preds = %bb12
+	br i1 false, label %bb1.i21.mulflt.exit47_crit_edge, label %bb3.i37
+
+bb1.i21.mulflt.exit47_crit_edge:		; preds = %bb5.i38, %bb1.i21
+	br label %mulflt.exit47
+
+bb3.i37:		; preds = %bb1.i21
+	br i1 false, label %bb3.i37.mulflt.exit47_crit_edge, label %bb5.i38
+
+bb3.i37.mulflt.exit47_crit_edge:		; preds = %bb8.i40, %bb3.i37
+	br label %mulflt.exit47
+
+bb5.i38:		; preds = %bb3.i37
+	br i1 false, label %bb1.i21.mulflt.exit47_crit_edge, label %bb7.i39
+
+bb7.i39:		; preds = %bb5.i38
+	br i1 false, label %bb8.i40, label %bb7.i39.bb12.i44_crit_edge
+
+bb7.i39.bb12.i44_crit_edge:		; preds = %bb7.i39
+	br label %bb12.i44
+
+bb8.i40:		; preds = %bb7.i39
+	br i1 false, label %bb3.i37.mulflt.exit47_crit_edge, label %bb10.i41
+
+bb10.i41:		; preds = %bb8.i40
+	br label %bb12.i44
+
+bb12.i44:		; preds = %bb10.i41, %bb7.i39.bb12.i44_crit_edge
+	br label %mulflt.exit47
+
+mulflt.exit47:		; preds = %bb12.i44, %bb3.i37.mulflt.exit47_crit_edge, %bb1.i21.mulflt.exit47_crit_edge
+	br i1 false, label %mulflt.exit47.base2flt.exit34_crit_edge, label %bb1.i15
+
+mulflt.exit47.base2flt.exit34_crit_edge.loopexit:		; preds = %bb2.i20
+	br label %mulflt.exit47.base2flt.exit34_crit_edge
+
+mulflt.exit47.base2flt.exit34_crit_edge:		; preds = %mulflt.exit47.base2flt.exit34_crit_edge.loopexit, %mulflt.exit47
+	br label %base2flt.exit34
+
+bb1.i15:		; preds = %mulflt.exit47
+	br i1 false, label %bb1.i15.bb10.i31_crit_edge, label %bb1.i15.bb2.i20_crit_edge
+
+bb1.i15.bb2.i20_crit_edge:		; preds = %bb1.i15
+	br label %bb2.i20
+
+bb1.i15.bb10.i31_crit_edge:		; preds = %bb1.i15
+	br label %bb10.i31
+
+bb2.i20:		; preds = %bb4.i22.bb2.i20_crit_edge, %bb1.i15.bb2.i20_crit_edge
+	br i1 false, label %bb4.i22, label %mulflt.exit47.base2flt.exit34_crit_edge.loopexit
+
+bb4.i22:		; preds = %bb2.i20
+	br i1 false, label %bb4.i22.bb11.i28_crit_edge, label %bb4.i22.bb2.i20_crit_edge
+
+bb4.i22.bb2.i20_crit_edge:		; preds = %bb4.i22
+	br label %bb2.i20
+
+bb4.i22.bb11.i28_crit_edge:		; preds = %bb4.i22
+	br label %bb11.i28
+
+bb11.i28:		; preds = %bb10.i31.bb11.i28_crit_edge, %bb4.i22.bb11.i28_crit_edge
+	br label %base2flt.exit34
+
+bb10.i31:		; preds = %bb9.i33, %bb1.i15.bb10.i31_crit_edge
+	br i1 false, label %bb7.i32, label %bb10.i31.bb11.i28_crit_edge
+
+bb10.i31.bb11.i28_crit_edge:		; preds = %bb10.i31
+	br label %bb11.i28
+
+bb7.i32:		; preds = %bb10.i31
+	br i1 false, label %bb7.i32.base2flt.exit34_crit_edge, label %bb9.i33
+
+bb7.i32.base2flt.exit34_crit_edge:		; preds = %bb7.i32
+	br label %base2flt.exit34
+
+bb9.i33:		; preds = %bb7.i32
+	br label %bb10.i31
+
+base2flt.exit34:		; preds = %bb7.i32.base2flt.exit34_crit_edge, %bb11.i28, %mulflt.exit47.base2flt.exit34_crit_edge
+	br i1 false, label %base2flt.exit34.mulflt.exit_crit_edge, label %bb3.i9
+
+base2flt.exit34.mulflt.exit_crit_edge:		; preds = %bb5.i10, %base2flt.exit34
+	br label %mulflt.exit
+
+bb3.i9:		; preds = %base2flt.exit34
+	br i1 false, label %bb3.i9.mulflt.exit_crit_edge, label %bb5.i10
+
+bb3.i9.mulflt.exit_crit_edge:		; preds = %bb8.i11, %bb3.i9
+	br label %mulflt.exit
+
+bb5.i10:		; preds = %bb3.i9
+	br i1 false, label %base2flt.exit34.mulflt.exit_crit_edge, label %bb7.i
+
+bb7.i:		; preds = %bb5.i10
+	br i1 false, label %bb8.i11, label %bb7.i.bb12.i_crit_edge
+
+bb7.i.bb12.i_crit_edge:		; preds = %bb7.i
+	br label %bb12.i
+
+bb8.i11:		; preds = %bb7.i
+	br i1 false, label %bb3.i9.mulflt.exit_crit_edge, label %bb10.i12
+
+bb10.i12:		; preds = %bb8.i11
+	br label %bb12.i
+
+bb12.i:		; preds = %bb10.i12, %bb7.i.bb12.i_crit_edge
+	br label %mulflt.exit
+
+mulflt.exit:		; preds = %bb12.i, %bb3.i9.mulflt.exit_crit_edge, %base2flt.exit34.mulflt.exit_crit_edge
+	br i1 false, label %mulflt.exit.addflt.exit_crit_edge, label %bb3.i
+
+mulflt.exit.addflt.exit_crit_edge:		; preds = %bb3.i, %mulflt.exit
+	br label %addflt.exit
+
+bb3.i:		; preds = %mulflt.exit
+	br i1 false, label %mulflt.exit.addflt.exit_crit_edge, label %bb5.i
+
+bb5.i:		; preds = %bb3.i
+	br i1 false, label %bb5.i.bb9.i_crit_edge, label %bb6.i
+
+bb5.i.bb9.i_crit_edge:		; preds = %bb5.i
+	br label %bb9.i
+
+bb6.i:		; preds = %bb5.i
+	br i1 false, label %bb6.i.addflt.exit_crit_edge, label %bb8.i
+
+bb6.i.addflt.exit_crit_edge:		; preds = %bb6.i
+	br label %addflt.exit
+
+bb8.i:		; preds = %bb6.i
+	br label %bb9.i
+
+bb9.i:		; preds = %bb8.i, %bb5.i.bb9.i_crit_edge
+	br label %addflt.exit
+
+addflt.exit:		; preds = %bb9.i, %bb6.i.addflt.exit_crit_edge, %mulflt.exit.addflt.exit_crit_edge
+	br label %bb15
+
+bb18.loopexit:		; preds = %bb12.bb18.loopexit_crit_edge, %bb15.bb18.loopexit_crit_edge
+	ret i32 0
+}
diff --git a/final/test/Transforms/CodeGenPrepare/basic.ll b/final/test/Transforms/CodeGenPrepare/basic.ll
new file mode 100644
index 00000000000..3b1fca328c5
--- /dev/null
+++ b/final/test/Transforms/CodeGenPrepare/basic.ll
@@ -0,0 +1,29 @@
+; RUN: opt -codegenprepare %s -S -o - | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK: @test1
+; objectsize should fold to a constant, which causes the branch to fold to an
+; uncond branch.
+; rdar://8785296
+define i32 @test1(i8* %ptr) nounwind ssp noredzone align 2 {
+entry:
+  %0 = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)
+  %1 = icmp ugt i64 %0, 3
+  br i1 %1, label %T, label %trap
+
+; CHECK: entry:
+; HECK-NEXT: ret i32 4
+
+trap:                                             ; preds = %0, %entry
+  tail call void @llvm.trap() noreturn nounwind
+  unreachable
+
+T:
+  ret i32 4
+}
+
+declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readonly
+
+declare void @llvm.trap() nounwind
diff --git a/final/test/Transforms/CodeGenPrepare/dg.exp b/final/test/Transforms/CodeGenPrepare/dg.exp
new file mode 100644
index 00000000000..de42dad163f
--- /dev/null
+++ b/final/test/Transforms/CodeGenPrepare/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/final/test/Transforms/ConstProp/2002-05-03-DivideByZeroException.ll b/final/test/Transforms/ConstProp/2002-05-03-DivideByZeroException.ll
new file mode 100644
index 00000000000..15a62118938
--- /dev/null
+++ b/final/test/Transforms/ConstProp/2002-05-03-DivideByZeroException.ll
@@ -0,0 +1,15 @@
+; Make sure that the constant propogator doesn't divide by zero!
+;
+; RUN: opt < %s -constprop
+;
+
+define i32 @test() {
+        %R = sdiv i32 12, 0             ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define i32 @test2() {
+        %R = srem i32 12, 0             ; <i32> [#uses=1]
+        ret i32 %R
+}
+
diff --git a/final/test/Transforms/ConstProp/2002-05-03-NotOperator.ll b/final/test/Transforms/ConstProp/2002-05-03-NotOperator.ll
new file mode 100644
index 00000000000..d9cd67406b0
--- /dev/null
+++ b/final/test/Transforms/ConstProp/2002-05-03-NotOperator.ll
@@ -0,0 +1,19 @@
+; This bug has to do with the fact that constant propogation was implemented in
+; terms of _logical_ not (! in C) instead of _bitwise_ not (~ in C).  This was
+; due to a spec change.
+
+; Fix #2: The unary not instruction now no longer exists. Change to xor.
+
+; RUN: opt < %s -constprop -S | \
+; RUN:   not grep {i32 0}
+
+define i32 @test1() {
+        %R = xor i32 123, -1            ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define i32 @test2() {
+        %R = xor i32 -123, -1           ; <i32> [#uses=1]
+        ret i32 %R
+}
+
diff --git a/final/test/Transforms/ConstProp/2002-09-03-SetCC-Bools.ll b/final/test/Transforms/ConstProp/2002-09-03-SetCC-Bools.ll
new file mode 100644
index 00000000000..dd24d965620
--- /dev/null
+++ b/final/test/Transforms/ConstProp/2002-09-03-SetCC-Bools.ll
@@ -0,0 +1,20 @@
+; SetCC on boolean values was not implemented!
+
+; RUN: opt < %s -constprop -die -S | \
+; RUN:   not grep set
+
+define i1 @test1() {
+        %A = icmp ule i1 true, false            ; <i1> [#uses=1]
+        %B = icmp uge i1 true, false            ; <i1> [#uses=1]
+        %C = icmp ult i1 false, true            ; <i1> [#uses=1]
+        %D = icmp ugt i1 true, false            ; <i1> [#uses=1]
+        %E = icmp eq i1 false, false            ; <i1> [#uses=1]
+        %F = icmp ne i1 false, true             ; <i1> [#uses=1]
+        %G = and i1 %A, %B              ; <i1> [#uses=1]
+        %H = and i1 %C, %D              ; <i1> [#uses=1]
+        %I = and i1 %E, %F              ; <i1> [#uses=1]
+        %J = and i1 %G, %H              ; <i1> [#uses=1]
+        %K = and i1 %I, %J              ; <i1> [#uses=1]
+        ret i1 %K
+}
+
diff --git a/final/test/Transforms/ConstProp/2003-05-12-DivideError.ll b/final/test/Transforms/ConstProp/2003-05-12-DivideError.ll
new file mode 100644
index 00000000000..2708dce9852
--- /dev/null
+++ b/final/test/Transforms/ConstProp/2003-05-12-DivideError.ll
@@ -0,0 +1,15 @@
+; Make sure that the constant propagator doesn't cause a sigfpe
+;
+; RUN: opt < %s -constprop
+;
+
+define i32 @test() {
+        %R = sdiv i32 -2147483648, -1           ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define i32 @test2() {
+        %R = srem i32 -2147483648, -1           ; <i32> [#uses=1]
+        ret i32 %R
+}
+
diff --git a/final/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll b/final/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll
new file mode 100644
index 00000000000..0b44b99f6a9
--- /dev/null
+++ b/final/test/Transforms/ConstProp/2005-01-28-SetCCGEP.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -constprop -S | \
+; RUN:    not grep {ret i1 false}
+
+@b = external global [2 x {  }]         ; <[2 x {  }]*> [#uses=2]
+
+define i1 @f() {
+        %tmp.2 = icmp eq {  }* getelementptr ([2 x {  }]* @b, i32 0, i32 0), getelementptr ([2 x {  }]* @b, i32 0, i32 1)                ; <i1> [#uses=1]
+        ret i1 %tmp.2
+}
+
diff --git a/final/test/Transforms/ConstProp/2006-11-30-vector-cast.ll b/final/test/Transforms/ConstProp/2006-11-30-vector-cast.ll
new file mode 100644
index 00000000000..be76783e8b7
--- /dev/null
+++ b/final/test/Transforms/ConstProp/2006-11-30-vector-cast.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -constprop -S | \
+; RUN:   grep {i32 -1}
+; RUN: opt < %s -constprop -S | \
+; RUN:   not grep zeroinitializer
+
+define <4 x i32> @test() {
+        %tmp40 = bitcast <2 x i64> bitcast (<4 x i32> < i32 0, i32 0, i32 -1, i32 0 > to <2 x i64>) to <4 x i32>; <<4 x i32>> [#uses=1]
+        ret <4 x i32> %tmp40
+}
+
diff --git a/final/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll b/final/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll
new file mode 100644
index 00000000000..e46a875a7cf
--- /dev/null
+++ b/final/test/Transforms/ConstProp/2006-12-01-TruncBoolBug.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep {ret i1 false}
+define i1 @test() {
+        %X = trunc i32 320 to i1                ; <i1> [#uses=1]
+        ret i1 %X
+}
+
diff --git a/final/test/Transforms/ConstProp/2006-12-01-bool-casts.ll b/final/test/Transforms/ConstProp/2006-12-01-bool-casts.ll
new file mode 100644
index 00000000000..3c06693b100
--- /dev/null
+++ b/final/test/Transforms/ConstProp/2006-12-01-bool-casts.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -constprop -S | \
+; RUN:    grep {ret i32 -1}
+; RUN: opt < %s -constprop -S | \
+; RUN:    grep {ret i32 1}
+
+define i32 @test1() {
+        %A = sext i1 true to i32                ; <i32> [#uses=1]
+        ret i32 %A
+}
+
+define i32 @test2() {
+        %A = zext i1 true to i32                ; <i32> [#uses=1]
+        ret i32 %A
+}
+
diff --git a/final/test/Transforms/ConstProp/2007-02-05-BitCast.ll b/final/test/Transforms/ConstProp/2007-02-05-BitCast.ll
new file mode 100644
index 00000000000..ebe3d21806b
--- /dev/null
+++ b/final/test/Transforms/ConstProp/2007-02-05-BitCast.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -constprop -S | grep 1065353216
+
+define i32 @test() {
+        %A = bitcast float 1.000000e+00 to i32          ; <i32> [#uses=1]
+        ret i32 %A
+}
+
diff --git a/final/test/Transforms/ConstProp/2007-02-23-sdiv.ll b/final/test/Transforms/ConstProp/2007-02-23-sdiv.ll
new file mode 100644
index 00000000000..721199fc7f1
--- /dev/null
+++ b/final/test/Transforms/ConstProp/2007-02-23-sdiv.ll
@@ -0,0 +1,5 @@
+; RUN: llvm-as < %s | llvm-dis | grep {global i32 0}
+; PR1215
+
+@G = global i32 sdiv (i32 0, i32 -1)
+
diff --git a/final/test/Transforms/ConstProp/2007-11-23-cttz.ll b/final/test/Transforms/ConstProp/2007-11-23-cttz.ll
new file mode 100644
index 00000000000..37cda303713
--- /dev/null
+++ b/final/test/Transforms/ConstProp/2007-11-23-cttz.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -constprop -S | grep {ret i13 13}
+; PR1816
+declare i13 @llvm.cttz.i13(i13)
+
+define i13 @test() {
+	%X = call i13 @llvm.cttz.i13(i13 0)
+	ret i13 %X
+}
diff --git a/final/test/Transforms/ConstProp/2008-07-07-VectorCompare.ll b/final/test/Transforms/ConstProp/2008-07-07-VectorCompare.ll
new file mode 100644
index 00000000000..fd5495445b7
--- /dev/null
+++ b/final/test/Transforms/ConstProp/2008-07-07-VectorCompare.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -constprop -disable-output
+; PR2529
+define <4 x i1> @test1(i32 %argc, i8** %argv) {
+entry:  
+        %foo = icmp slt <4 x i32> undef, <i32 14, i32 undef, i32 undef, i32 undef>
+        ret <4 x i1> %foo
+}
+
+define <4 x i1> @test2(i32 %argc, i8** %argv) {
+entry:  
+        %foo = icmp slt <4 x i32> <i32 undef, i32 undef, i32 undef, i32
+undef>, <i32 undef, i32 undef, i32 undef, i32 undef>
+        ret <4 x i1> %foo
+}
+
+
+define <4 x i1> @test3() {
+       %foo = fcmp ueq <4 x float> <float 0.0, float 0.0, float 0.0, float
+undef>, <float 1.0, float 1.0, float 1.0, float undef>
+	ret <4 x i1> %foo
+}
+
+define <4 x i1> @test4() {
+	%foo = fcmp ueq <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>, <float 1.0, float 1.0, float 1.0, float 0.0>
+
+	ret <4 x i1> %foo
+}
+
diff --git a/final/test/Transforms/ConstProp/2009-06-20-constexpr-zero-lhs.ll b/final/test/Transforms/ConstProp/2009-06-20-constexpr-zero-lhs.ll
new file mode 100644
index 00000000000..332260590ae
--- /dev/null
+++ b/final/test/Transforms/ConstProp/2009-06-20-constexpr-zero-lhs.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llvm-dis | not grep ptrtoint
+; PR4424
+@G = external global i32
+@test1 = constant i32 sdiv (i32 0, i32 ptrtoint (i32* @G to i32))
+@test2 = constant i32 udiv (i32 0, i32 ptrtoint (i32* @G to i32))
+@test3 = constant i32 srem (i32 0, i32 ptrtoint (i32* @G to i32))
+@test4 = constant i32 urem (i32 0, i32 ptrtoint (i32* @G to i32))
+@test5 = constant i32 lshr (i32 0, i32 ptrtoint (i32* @G to i32))
+@test6 = constant i32 ashr (i32 0, i32 ptrtoint (i32* @G to i32))
+@test7 = constant i32 shl (i32 0, i32 ptrtoint (i32* @G to i32))
+
diff --git a/final/test/Transforms/ConstProp/2009-09-01-GEP-Crash.ll b/final/test/Transforms/ConstProp/2009-09-01-GEP-Crash.ll
new file mode 100644
index 00000000000..fc7ff905ecb
--- /dev/null
+++ b/final/test/Transforms/ConstProp/2009-09-01-GEP-Crash.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -constprop | llvm-dis
+; PR4848
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%0 = type { %struct.anon }
+%1 = type { %0, %2, [24 x i8] }
+%2 = type <{ %3, %3 }>
+%3 = type { %struct.hrtimer_cpu_base*, i32, %struct.rb_root, %struct.rb_node*, %struct.pgprot, i64 ()*, [16 x i8] }
+%struct.anon = type { }
+%struct.hrtimer_clock_base = type { %struct.hrtimer_cpu_base*, i32, %struct.rb_root, %struct.rb_node*, %struct.pgprot, i64 ()*, %struct.pgprot, %struct.pgprot }
+%struct.hrtimer_cpu_base = type { %0, [2 x %struct.hrtimer_clock_base], %struct.pgprot, i32, i64 }
+%struct.pgprot = type { i64 }
+%struct.rb_node = type { i64, %struct.rb_node*, %struct.rb_node* }
+%struct.rb_root = type { %struct.rb_node* }
+
+@per_cpu__hrtimer_bases = external global %1, align 8 ; <%1*> [#uses=1]
+
+define void @init_hrtimers_cpu(i32 %cpu) nounwind noredzone section ".cpuinit.text" {
+entry:
+  %tmp3 = getelementptr %struct.hrtimer_cpu_base* bitcast (%1* @per_cpu__hrtimer_bases to %struct.hrtimer_cpu_base*), i32 0, i32 0 ; <%0*> [#uses=1]
+  %tmp5 = bitcast %0* %tmp3 to i8*                ; <i8*> [#uses=0]
+  unreachable
+}
diff --git a/final/test/Transforms/ConstProp/basictest.ll b/final/test/Transforms/ConstProp/basictest.ll
new file mode 100644
index 00000000000..df57fb6870b
--- /dev/null
+++ b/final/test/Transforms/ConstProp/basictest.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -constprop -die -S | FileCheck %s
+
+; This is a basic sanity check for constant propogation.  The add instruction 
+; should be eliminated.
+define i32 @test1(i1 %B) {
+        br i1 %B, label %BB1, label %BB2
+
+BB1:      
+        %Val = add i32 0, 0
+        br label %BB3
+
+BB2:      
+        br label %BB3
+
+BB3:     
+; CHECK: @test1
+; CHECK: %Ret = phi i32 [ 0, %BB1 ], [ 1, %BB2 ]
+        %Ret = phi i32 [ %Val, %BB1 ], [ 1, %BB2 ] 
+        ret i32 %Ret
+}
+
+
+; PR6197
+define i1 @test2(i8* %f) nounwind {
+entry:
+  %V = icmp ne i8* blockaddress(@test2, %bb), null
+  br label %bb
+bb:
+  ret i1 %V
+  
+; CHECK: @test2
+; CHECK: ret i1 true
+}
+
+define i1 @TNAN() {
+; CHECK: @TNAN
+; CHECK: ret i1 true
+  %A = fcmp uno double 0x7FF8000000000000, 1.000000e+00
+  %B = fcmp uno double 1.230000e+02, 1.000000e+00
+  %C = or i1 %A, %B
+  ret i1 %C
+}
diff --git a/final/test/Transforms/ConstProp/bitcast.ll b/final/test/Transforms/ConstProp/bitcast.ll
new file mode 100644
index 00000000000..53239c7e4fe
--- /dev/null
+++ b/final/test/Transforms/ConstProp/bitcast.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+; PR2165
+
+define <1 x i64> @test1() {
+  %A = bitcast i64 63 to <1 x i64>
+  ret <1 x i64> %A
+; CHECK: @test1
+; CHECK: ret <1 x i64> <i64 63>
+}
+
diff --git a/final/test/Transforms/ConstProp/bswap.ll b/final/test/Transforms/ConstProp/bswap.ll
new file mode 100644
index 00000000000..9fce309b788
--- /dev/null
+++ b/final/test/Transforms/ConstProp/bswap.ll
@@ -0,0 +1,25 @@
+; bswap should be constant folded when it is passed a constant argument
+
+; RUN: opt < %s -constprop -S | not grep call
+
+declare i16 @llvm.bswap.i16(i16)
+
+declare i32 @llvm.bswap.i32(i32)
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i16 @W() {
+        %Z = call i16 @llvm.bswap.i16( i16 1 )          ; <i16> [#uses=1]
+        ret i16 %Z
+}
+
+define i32 @X() {
+        %Z = call i32 @llvm.bswap.i32( i32 1 )          ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+define i64 @Y() {
+        %Z = call i64 @llvm.bswap.i64( i64 1 )          ; <i64> [#uses=1]
+        ret i64 %Z
+}
+
diff --git a/final/test/Transforms/ConstProp/calls.ll b/final/test/Transforms/ConstProp/calls.ll
new file mode 100644
index 00000000000..82d73245ad1
--- /dev/null
+++ b/final/test/Transforms/ConstProp/calls.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+declare double @cos(double)
+
+declare double @sin(double)
+
+declare double @tan(double)
+
+declare double @sqrt(double)
+
+define double @T() {
+; CHECK: @T
+; CHECK-NOT: call
+; CHECK: ret
+  %A = call double @cos(double 0.000000e+00)
+  %B = call double @sin(double 0.000000e+00)
+  %a = fadd double %A, %B
+  %C = call double @tan(double 0.000000e+00)
+  %b = fadd double %a, %C
+  %D = call double @sqrt(double 4.000000e+00)
+  %c = fadd double %b, %D
+  ret double %c
+}
+
+define i1 @test_sse_cvt() nounwind readnone {
+; CHECK: @test_sse_cvt
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+  %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+  %i1 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+  %i2 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+  %i3 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+  %i4 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> <double 1.75, double undef>) nounwind
+  %i5 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> <double 1.75, double undef>) nounwind
+  %i6 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> <double 1.75, double undef>) nounwind
+  %i7 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> <double 1.75, double undef>) nounwind
+  %sum11 = add i32 %i0, %i1
+  %sum12 = add i32 %i4, %i5
+  %sum1 = add i32 %sum11, %sum12
+  %sum21 = add i64 %i2, %i3
+  %sum22 = add i64 %i6, %i7
+  %sum2 = add i64 %sum21, %sum22
+  %sum1.sext = sext i32 %sum1 to i64
+  %b = icmp eq i64 %sum1.sext, %sum2
+  ret i1 %b
+}
+
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>) nounwind readnone
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>) nounwind readnone
diff --git a/final/test/Transforms/ConstProp/constant-expr.ll b/final/test/Transforms/ConstProp/constant-expr.ll
new file mode 100644
index 00000000000..1088fa6959a
--- /dev/null
+++ b/final/test/Transforms/ConstProp/constant-expr.ll
@@ -0,0 +1,111 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+@X = external global i8
+@Y = external global i8
+@Z = external global i8
+
+@A = global i1 add (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @A = global i1 xor (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+@B = global i1 sub (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z)), align 2
+; CHECK: @B = global i1 xor (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+@C = global i1 mul (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @C = global i1 and (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+
+@D = global i1 sdiv (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @D = global i1 icmp ult (i8* @X, i8* @Y)
+@E = global i1 udiv (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @E = global i1 icmp ult (i8* @X, i8* @Y)
+@F = global i1 srem (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @F = global i1 false 
+@G = global i1 urem (i1 icmp ult (i8* @X, i8* @Y), i1 icmp ult (i8* @X, i8* @Z))
+; CHECK: @G = global i1 false 
+
+@H = global i1 icmp ule (i32* bitcast (i8* @X to i32*), i32* bitcast (i8* @Y to i32*))
+; CHECK: @H = global i1 icmp ule (i8* @X, i8* @Y)
+
+@I = global i1 xor (i1 icmp ult (i8* @X, i8* @Y), i1 false)
+; CHECK: @I = global i1 icmp ult (i8* @X, i8* @Y)
+@J = global i1 xor (i1 icmp ult (i8* @X, i8* @Y), i1 true)
+; CHECK: @J = global i1 icmp uge (i8* @X, i8* @Y)
+
+@K = global i1 icmp eq (i1 icmp ult (i8* @X, i8* @Y), i1 false)
+; CHECK: @K = global i1 icmp uge (i8* @X, i8* @Y)
+@L = global i1 icmp eq (i1 icmp ult (i8* @X, i8* @Y), i1 true)
+; CHECK: @L = global i1 icmp ult (i8* @X, i8* @Y)
+@M = global i1 icmp ne (i1 icmp ult (i8* @X, i8* @Y), i1 true)
+; CHECK: @M = global i1 icmp uge (i8* @X, i8* @Y)
+@N = global i1 icmp ne (i1 icmp ult (i8* @X, i8* @Y), i1 false)
+; CHECK: @N = global i1 icmp ult (i8* @X, i8* @Y)
+
+@O = global i1 icmp eq (i32 zext (i1 icmp ult (i8* @X, i8* @Y) to i32), i32 0)
+; CHECK: @O = global i1 icmp uge (i8* @X, i8* @Y)
+
+
+
+; PR5176
+
+; CHECK: @T1 = global i1 true
+@T1 = global i1 icmp eq (i64 and (i64 trunc (i256 lshr (i256 or (i256 and (i256 and (i256 shl (i256 zext (i64 ptrtoint (i1* @B to i64) to i256), i256 64), i256 -6277101735386680763495507056286727952638980837032266301441), i256 6277101735386680763835789423207666416102355444464034512895), i256 shl (i256 zext (i64 ptrtoint (i1* @A to i64) to i256), i256 192)), i256 64) to i64), i64 1), i64 0)
+
+; CHECK: @T2 = global i1* @B
+@T2 = global i1* inttoptr (i64 add (i64 trunc (i256 lshr (i256 or (i256 and (i256 and (i256 shl (i256 zext (i64 ptrtoint (i1* @A to i64) to i256), i256 64), i256 -6277101735386680763495507056286727952638980837032266301441), i256 6277101735386680763835789423207666416102355444464034512895), i256 shl (i256 zext (i64 ptrtoint (i1* @B to i64) to i256), i256 192)), i256 192) to i64), i64 trunc (i256 lshr (i256 or (i256 and (i256 and (i256 shl (i256 zext (i64 ptrtoint (i1* @A to i64) to i256), i256 64), i256 -6277101735386680763495507056286727952638980837032266301441), i256 6277101735386680763835789423207666416102355444464034512895), i256 shl (i256 zext (i64 ptrtoint (i1* @B to i64) to i256), i256 192)), i256 128) to i64)) to i1*)
+
+; CHECK: @T3 = global i64 add (i64 ptrtoint (i1* @B to i64), i64 -1)
+@T3 = global i64 add (i64 trunc (i256 lshr (i256 or (i256 and (i256 and (i256 shl (i256 zext (i64 ptrtoint (i1* @B to i64) to i256), i256 64), i256 -6277101735386680763495507056286727952638980837032266301441), i256 6277101735386680763835789423207666416102355444464034512895), i256 shl (i256 zext (i64 ptrtoint (i1* @A to i64) to i256), i256 192)), i256 64) to i64), i64 -1)
+
+; CHECK: @T4 = global i1* @B
+@T4 = global i1* inttoptr (i64 trunc (i256 lshr (i256 or (i256 and (i256 and (i256 shl (i256 zext (i64 ptrtoint (i1* @B to i64) to i256), i256 64), i256 -6277101735386680763495507056286727952638980837032266301441), i256 6277101735386680763835789423207666416102355444464034512895), i256 shl (i256 zext (i64 ptrtoint (i1* @A to i64) to i256), i256 192)), i256 64) to i64) to i1*)
+
+; CHECK: @T5 = global i1* @A
+@T5 = global i1* inttoptr (i64 add (i64 trunc (i256 lshr (i256 or (i256 and (i256 and (i256 shl (i256 zext (i64 ptrtoint (i1* @B to i64) to i256), i256 64), i256 -6277101735386680763495507056286727952638980837032266301441), i256 6277101735386680763835789423207666416102355444464034512895), i256 shl (i256 zext (i64 ptrtoint (i1* @A to i64) to i256), i256 192)), i256 192) to i64), i64 trunc (i256 lshr (i256 or (i256 and (i256 and (i256 shl (i256 zext (i64 ptrtoint (i1* @B to i64) to i256), i256 64), i256 -6277101735386680763495507056286727952638980837032266301441), i256 6277101735386680763835789423207666416102355444464034512895), i256 shl (i256 zext (i64 ptrtoint (i1* @A to i64) to i256), i256 192)), i256 128) to i64)) to i1*)
+
+
+
+; PR6096
+
+; No check line. This used to crash llvm-as.
+@T6 = global <2 x i1> fcmp ole (<2 x float> fdiv (<2 x float> undef, <2 x float> <float 1.000000e+00, float 1.000000e+00>), <2 x float> zeroinitializer)
+
+
+; PR9011
+
+@pr9011_1 = constant <4 x i32> zext (<4 x i8> zeroinitializer to <4 x i32>)
+; CHECK: pr9011_1 = constant <4 x i32> zeroinitializer
+@pr9011_2 = constant <4 x i32> sext (<4 x i8> zeroinitializer to <4 x i32>)
+; CHECK: pr9011_2 = constant <4 x i32> zeroinitializer
+@pr9011_3 = constant <4 x i32> bitcast (<16 x i8> zeroinitializer to <4 x i32>)
+; CHECK: pr9011_3 = constant <4 x i32> zeroinitializer
+@pr9011_4 = constant <4 x float> uitofp (<4 x i8> zeroinitializer to <4 x float>)
+; CHECK: pr9011_4 = constant <4 x float> zeroinitializer
+@pr9011_5 = constant <4 x float> sitofp (<4 x i8> zeroinitializer to <4 x float>)
+; CHECK: pr9011_5 = constant <4 x float> zeroinitializer
+@pr9011_6 = constant <4 x i32> fptosi (<4 x float> zeroinitializer to <4 x i32>)
+; CHECK: pr9011_6 = constant <4 x i32> zeroinitializer
+@pr9011_7 = constant <4 x i32> fptoui (<4 x float> zeroinitializer to <4 x i32>)
+; CHECK: pr9011_7 = constant <4 x i32> zeroinitializer
+@pr9011_8 = constant <4 x float> fptrunc (<4 x double> zeroinitializer to <4 x float>)
+; CHECK: pr9011_8 = constant <4 x float> zeroinitializer
+@pr9011_9 = constant <4 x double> fpext (<4 x float> zeroinitializer to <4 x double>)
+; CHECK: pr9011_9 = constant <4 x double> zeroinitializer
+
+@pr9011_10 = constant <4 x double> bitcast (i256 0 to <4 x double>)
+; CHECK: pr9011_10 = constant <4 x double> zeroinitializer
+@pr9011_11 = constant <4 x float> bitcast (i128 0 to <4 x float>)
+; CHECK: pr9011_11 = constant <4 x float> zeroinitializer
+@pr9011_12 = constant <4 x i32> bitcast (i128 0 to <4 x i32>)
+; CHECK: pr9011_12 = constant <4 x i32> zeroinitializer
+@pr9011_13 = constant i256 bitcast (<4 x double> zeroinitializer to i256)
+; CHECK: pr9011_13 = constant i256 0
+@pr9011_14 = constant i128 bitcast (<4 x float> zeroinitializer to i128)
+; CHECK: pr9011_14 = constant i128 0
+@pr9011_15 = constant i128 bitcast (<4 x i32> zeroinitializer to i128)
+; CHECK: pr9011_15 = constant i128 0
+
+@select = internal constant
+          i32 select (i1 icmp ult (i32 ptrtoint (i8* @X to i32),
+                                   i32 ptrtoint (i8* @Y to i32)),
+            i32 select (i1 icmp ult (i32 ptrtoint (i8* @X to i32),
+                                     i32 ptrtoint (i8* @Y to i32)),
+               i32 10, i32 20),
+            i32 30)
+; CHECK: select = internal constant i32 select {{.*}} i32 10, i32 30
diff --git a/final/test/Transforms/ConstProp/dg.exp b/final/test/Transforms/ConstProp/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/ConstProp/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/ConstProp/div-zero.ll b/final/test/Transforms/ConstProp/div-zero.ll
new file mode 100644
index 00000000000..f78a34fe703
--- /dev/null
+++ b/final/test/Transforms/ConstProp/div-zero.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | grep {ret i32 0}
+; PR4424
+declare void @ext()
+
+define i32 @foo(i32 %ptr) {
+entry:
+        %zero = sub i32 %ptr, %ptr              ; <i32> [#uses=1]
+        %div_zero = sdiv i32 %zero, ptrtoint (i32* getelementptr (i32* null,
+i32 1) to i32)             ; <i32> [#uses=1]
+        ret i32 %div_zero
+}
+
diff --git a/final/test/Transforms/ConstProp/extractvalue.ll b/final/test/Transforms/ConstProp/extractvalue.ll
new file mode 100644
index 00000000000..32d529181b1
--- /dev/null
+++ b/final/test/Transforms/ConstProp/extractvalue.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+%struct = type { i32, [4 x i8] }
+%array = type [3 x %struct]
+
+define i32 @test1() {
+  %A = extractvalue %struct { i32 2, [4 x i8] c"foo\00" }, 0
+  ret i32 %A
+; CHECK: @test1
+; CHECK: ret i32 2
+}
+
+define i8 @test2() {
+  %A = extractvalue %struct { i32 2, [4 x i8] c"foo\00" }, 1, 2
+  ret i8 %A
+; CHECK: @test2
+; CHECK: ret i8 111
+}
+
+define i32 @test3() {
+  %A = extractvalue %array [ %struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" } ], 1, 0
+  ret i32 %A
+; CHECK: @test3
+; CHECK: ret i32 1
+}
+
+define i32 @zeroinitializer-test1() {
+  %A = extractvalue %struct zeroinitializer, 0
+  ret i32 %A
+; CHECK: @zeroinitializer-test1
+; CHECK: ret i32 0
+}
+
+define i8 @zeroinitializer-test2() {
+  %A = extractvalue %struct zeroinitializer, 1, 2
+  ret i8 %A
+; CHECK: @zeroinitializer-test2
+; CHECK: ret i8 0
+}
+
+define i32 @zeroinitializer-test3() {
+  %A = extractvalue %array zeroinitializer, 1, 0
+  ret i32 %A
+; CHECK: @zeroinitializer-test3
+; CHECK: ret i32 0
+}
+
+define i32 @undef-test1() {
+  %A = extractvalue %struct undef, 0
+  ret i32 %A
+; CHECK: @undef-test1
+; CHECK: ret i32 undef
+}
+
+define i8 @undef-test2() {
+  %A = extractvalue %struct undef, 1, 2
+  ret i8 %A
+; CHECK: @undef-test2
+; CHECK: ret i8 undef
+}
+
+define i32 @undef-test3() {
+  %A = extractvalue %array undef, 1, 0
+  ret i32 %A
+; CHECK: @undef-test3
+; CHECK: ret i32 undef
+}
+
diff --git a/final/test/Transforms/ConstProp/float-to-ptr-cast.ll b/final/test/Transforms/ConstProp/float-to-ptr-cast.ll
new file mode 100644
index 00000000000..937f606bf13
--- /dev/null
+++ b/final/test/Transforms/ConstProp/float-to-ptr-cast.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+define i32* @test1() {
+        %X = inttoptr i64 0 to i32*             ; <i32*> [#uses=1]
+        ret i32* %X
+}
+
+; CHECK:  ret i32* null
+
+define i32* @test2() {
+        ret i32* null
+}
+
+; CHECK:  ret i32* null
+
diff --git a/final/test/Transforms/ConstProp/insertvalue.ll b/final/test/Transforms/ConstProp/insertvalue.ll
new file mode 100644
index 00000000000..f0eb553b214
--- /dev/null
+++ b/final/test/Transforms/ConstProp/insertvalue.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+%struct = type { i32, [4 x i8] }
+%array = type [3 x %struct]
+
+define %struct @test1() {
+  %A = insertvalue %struct { i32 2, [4 x i8] c"foo\00" }, i32 1, 0
+  ret %struct %A
+; CHECK: @test1
+; CHECK: ret %struct { i32 1, [4 x i8] c"foo\00" }
+}
+
+define %struct @test2() {
+  %A = insertvalue %struct { i32 2, [4 x i8] c"foo\00" }, i8 1, 1, 2
+  ret %struct %A
+; CHECK: @test2
+; CHECK: ret %struct { i32 2, [4 x i8] c"fo\01\00" }
+}
+
+define %array @test3() {
+  %A = insertvalue %array [ %struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" } ], i32 -1, 1, 0
+  ret %array %A
+; CHECK: @test3
+; CHECK:ret %array [%struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 -1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" }]
+}
+
+define %struct @zeroinitializer-test1() {
+  %A = insertvalue %struct zeroinitializer, i32 1, 0
+  ret %struct %A
+; CHECK: @zeroinitializer-test1
+; CHECK: ret %struct { i32 1, [4 x i8] zeroinitializer }
+}
+
+define %struct @zeroinitializer-test2() {
+  %A = insertvalue %struct zeroinitializer, i8 1, 1, 2
+  ret %struct %A
+; CHECK: @zeroinitializer-test2
+; CHECK: ret %struct { i32 0, [4 x i8] c"\00\00\01\00" }
+}
+
+define %array @zeroinitializer-test3() {
+  %A = insertvalue %array zeroinitializer, i32 1, 1, 0
+  ret %array %A
+; CHECK: @zeroinitializer-test3
+; CHECK: ret %array [%struct zeroinitializer, %struct { i32 1, [4 x i8] zeroinitializer }, %struct zeroinitializer]
+}
+
+define %struct @undef-test1() {
+  %A = insertvalue %struct undef, i32 1, 0
+  ret %struct %A
+; CHECK: @undef-test1
+; CHECK: ret %struct { i32 1, [4 x i8] undef }
+}
+
+define %struct @undef-test2() {
+  %A = insertvalue %struct undef, i8 0, 1, 2
+  ret %struct %A
+; CHECK: @undef-test2
+; CHECK: ret %struct { i32 undef, [4 x i8] [i8 undef, i8 undef, i8 0, i8 undef] }
+}
+
+define %array @undef-test3() {
+  %A = insertvalue %array undef, i32 0, 1, 0
+  ret %array %A
+; CHECK: @undef-test3
+; CHECK: ret %array [%struct undef, %struct { i32 0, [4 x i8] undef }, %struct undef]
+}
+
diff --git a/final/test/Transforms/ConstProp/loads.ll b/final/test/Transforms/ConstProp/loads.ll
new file mode 100644
index 00000000000..74d80aa1872
--- /dev/null
+++ b/final/test/Transforms/ConstProp/loads.ll
@@ -0,0 +1,139 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s 
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+@g1 = constant {{i32,i8},i32} {{i32,i8} { i32 -559038737, i8 186 }, i32 -889275714 }
+@g2 = constant double 1.0
+@g3 = constant {i64, i64} { i64 123, i64 112312312 }
+
+; Simple load
+define i32 @test1() {
+  %r = load i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0)
+  ret i32 %r
+; CHECK: @test1
+; CHECK: ret i32 -559038737
+}
+
+; PR3152
+; Load of first 16 bits of 32-bit value.
+define i16 @test2() {
+  %r = load i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*)
+  ret i16 %r
+
+; CHECK: @test2
+; CHECK: ret i16 -16657 
+}
+
+; Load of second 16 bits of 32-bit value.
+define i16 @test3() {
+  %r = load i16* getelementptr(i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*), i32 1)
+  ret i16 %r
+
+; CHECK: @test3
+; CHECK: ret i16 -8531
+}
+
+; Load of 8 bit field + tail padding.
+define i16 @test4() {
+  %r = load i16* getelementptr(i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*), i32 2)
+  ret i16 %r
+; CHECK: @test4
+; CHECK: ret i16 186
+}
+
+; Load of double bits.
+define i64 @test6() {
+  %r = load i64* bitcast(double* @g2 to i64*)
+  ret i64 %r
+
+; CHECK: @test6
+; CHECK: ret i64 4607182418800017408
+}
+
+; Load of double bits.
+define i16 @test7() {
+  %r = load i16* bitcast(double* @g2 to i16*)
+  ret i16 %r
+
+; CHECK: @test7
+; CHECK: ret i16 0
+}
+
+; Double load.
+define double @test8() {
+  %r = load double* bitcast({{i32,i8},i32}* @g1 to double*)
+  ret double %r
+
+; CHECK: @test8
+; CHECK: ret double 0xBADEADBEEF
+}
+
+
+; i128 load.
+define i128 @test9() {
+  %r = load i128* bitcast({i64, i64}* @g3 to i128*)
+  ret i128 %r
+
+; CHECK: @test9
+; CHECK: ret i128 2071796475790618158476296315
+}
+
+; vector load.
+define <2 x i64> @test10() {
+  %r = load <2 x i64>* bitcast({i64, i64}* @g3 to <2 x i64>*)
+  ret <2 x i64> %r
+
+; CHECK: @test10
+; CHECK: ret <2 x i64> <i64 123, i64 112312312>
+}
+
+
+; PR5287
+@g4 = internal constant { i8, i8 } { i8 -95, i8 8 }
+
+define i16 @test11() nounwind {
+entry:
+  %a = load i16* bitcast ({ i8, i8 }* @g4 to i16*)
+  ret i16 %a
+  
+; CHECK: @test11
+; CHECK: ret i16 2209
+}
+
+
+; PR5551
+@test12g = private constant [6 x i8] c"a\00b\00\00\00"
+
+define i16 @test12() {
+  %a = load i16* getelementptr inbounds ([3 x i16]* bitcast ([6 x i8]* @test12g to [3 x i16]*), i32 0, i64 1) 
+  ret i16 %a
+; CHECK: @test12
+; CHECK: ret i16 98
+}
+
+
+; PR5978
+@g5 = constant i8 4
+define i1 @test13() {
+  %A = load i1* bitcast (i8* @g5 to i1*)
+  ret i1 %A
+; CHECK: @test13
+; CHECK: ret i1 false
+}
+
+@g6 = constant [2 x i8*] [i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 2 to i8*)]
+define i64 @test14() nounwind {
+entry:
+  %tmp = load i64* bitcast ([2 x i8*]* @g6 to i64*)
+  ret i64 %tmp
+; CHECK: @test14
+; CHECK: ret i64 1
+}
+
+define i64 @test15() nounwind {
+entry:
+  %tmp = load i64* bitcast (i8** getelementptr inbounds ([2 x i8*]* @g6, i32 0, i64 1) to i64*)
+  ret i64 %tmp
+; CHECK: @test15
+; CHECK: ret i64 2
+}
diff --git a/final/test/Transforms/ConstProp/logicaltest.ll b/final/test/Transforms/ConstProp/logicaltest.ll
new file mode 100644
index 00000000000..c74296aa2c0
--- /dev/null
+++ b/final/test/Transforms/ConstProp/logicaltest.ll
@@ -0,0 +1,35 @@
+; Ensure constant propogation of logical instructions is working correctly.
+
+; RUN: opt < %s -constprop -die -S | FileCheck %s
+; CHECK-NOT:     {{and|or|xor}}
+
+define i32 @test1() {
+        %R = and i32 4, 1234            ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define i1 @test1.upgrd.1() {
+        %R = and i1 true, false         ; <i1> [#uses=1]
+        ret i1 %R
+}
+
+define i32 @test2() {
+        %R = or i32 4, 1234             ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define i1 @test2.upgrd.2() {
+        %R = or i1 true, false          ; <i1> [#uses=1]
+        ret i1 %R
+}
+
+define i32 @test3() {
+        %R = xor i32 4, 1234            ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define i1 @test3.upgrd.3() {
+        %R = xor i1 true, false         ; <i1> [#uses=1]
+        ret i1 %R
+}
+
diff --git a/final/test/Transforms/ConstProp/overflow-ops.ll b/final/test/Transforms/ConstProp/overflow-ops.ll
new file mode 100644
index 00000000000..5587e9b6233
--- /dev/null
+++ b/final/test/Transforms/ConstProp/overflow-ops.ll
@@ -0,0 +1,183 @@
+; RUN: opt < %s -constprop -S | FileCheck %s
+
+%i8i1 = type {i8, i1}
+
+;;-----------------------------
+;; uadd
+;;-----------------------------
+
+define {i8, i1} @uadd_1() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 42, i8 100)
+  ret {i8, i1} %t
+
+; CHECK: @uadd_1
+; CHECK: ret %i8i1 { i8 -114, i1 false }
+}
+
+define {i8, i1} @uadd_2() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 142, i8 120)
+  ret {i8, i1} %t
+
+; CHECK: @uadd_2
+; CHECK: ret %i8i1 { i8 6, i1 true }
+}
+
+;;-----------------------------
+;; usub
+;;-----------------------------
+
+define {i8, i1} @usub_1() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.usub.with.overflow.i8(i8 4, i8 2)
+  ret {i8, i1} %t
+
+; CHECK: @usub_1
+; CHECK: ret %i8i1 { i8 2, i1 false }
+}
+
+define {i8, i1} @usub_2() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.usub.with.overflow.i8(i8 4, i8 6)
+  ret {i8, i1} %t
+
+; CHECK: @usub_2
+; CHECK: ret %i8i1 { i8 -2, i1 true }
+}
+
+;;-----------------------------
+;; sadd
+;;-----------------------------
+
+define {i8, i1} @sadd_1() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 42, i8 2)
+  ret {i8, i1} %t
+
+; CHECK: @sadd_1
+; CHECK: ret %i8i1 { i8 44, i1 false }
+}
+
+define {i8, i1} @sadd_2() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 120, i8 10)
+  ret {i8, i1} %t
+
+; CHECK: @sadd_2
+; CHECK: ret %i8i1 { i8 -126, i1 true }
+}
+
+define {i8, i1} @sadd_3() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 -120, i8 10)
+  ret {i8, i1} %t
+
+; CHECK: @sadd_3
+; CHECK: ret %i8i1 { i8 -110, i1 false }
+}
+
+define {i8, i1} @sadd_4() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 -120, i8 -10)
+  ret {i8, i1} %t
+
+; CHECK: @sadd_4
+; CHECK: ret %i8i1 { i8 126, i1 true }
+}
+
+define {i8, i1} @sadd_5() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 2, i8 -10)
+  ret {i8, i1} %t
+
+; CHECK: @sadd_5
+; CHECK: ret %i8i1 { i8 -8, i1 false }
+}
+
+
+;;-----------------------------
+;; ssub
+;;-----------------------------
+
+define {i8, i1} @ssub_1() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 4, i8 2)
+  ret {i8, i1} %t
+
+; CHECK: @ssub_1
+; CHECK: ret %i8i1 { i8 2, i1 false }
+}
+
+define {i8, i1} @ssub_2() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 4, i8 6)
+  ret {i8, i1} %t
+
+; CHECK: @ssub_2
+; CHECK: ret %i8i1 { i8 -2, i1 false }
+}
+
+define {i8, i1} @ssub_3() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 -10, i8 120)
+  ret {i8, i1} %t
+
+; CHECK: @ssub_3
+; CHECK: ret %i8i1 { i8 126, i1 true }
+}
+
+define {i8, i1} @ssub_3b() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 -10, i8 10)
+  ret {i8, i1} %t
+
+; CHECK: @ssub_3b
+; CHECK: ret %i8i1 { i8 -20, i1 false }
+}
+
+define {i8, i1} @ssub_4() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 120, i8 -10)
+  ret {i8, i1} %t
+
+; CHECK: @ssub_4
+; CHECK: ret %i8i1 { i8 -126, i1 true }
+}
+
+define {i8, i1} @ssub_4b() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 20, i8 -10)
+  ret {i8, i1} %t
+
+; CHECK: @ssub_4b
+; CHECK: ret %i8i1 { i8 30, i1 false }
+}
+
+define {i8, i1} @ssub_5() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 -20, i8 -10)
+  ret {i8, i1} %t
+
+; CHECK: @ssub_5
+; CHECK: ret %i8i1 { i8 -10, i1 false }
+}
+
+
+
+declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8)
+
+declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8)
+declare {i8, i1} @llvm.smul.with.overflow.i8(i8, i8)
+
+; rdar://8501501
+define {i8, i1} @smul_1() nounwind {
+entry:
+  %t = call {i8, i1} @llvm.smul.with.overflow.i8(i8 -20, i8 -10)
+  ret {i8, i1} %t
+
+; CHECK: @smul_1
+; CHECK: ret %i8i1 { i8 -56, i1 true }
+}
diff --git a/final/test/Transforms/ConstProp/phi.ll b/final/test/Transforms/ConstProp/phi.ll
new file mode 100644
index 00000000000..3d9e284457c
--- /dev/null
+++ b/final/test/Transforms/ConstProp/phi.ll
@@ -0,0 +1,17 @@
+; This is a basic sanity check for constant propogation.  The add instruction 
+; should be eliminated.
+
+; RUN: opt < %s -constprop -die -S | not grep phi
+
+define i32 @test(i1 %B) {
+BB0:
+        br i1 %B, label %BB1, label %BB3
+
+BB1:            ; preds = %BB0
+        br label %BB3
+
+BB3:            ; preds = %BB1, %BB0
+        %Ret = phi i32 [ 1, %BB0 ], [ 1, %BB1 ]         ; <i32> [#uses=1]
+        ret i32 %Ret
+}
+
diff --git a/final/test/Transforms/ConstProp/remtest.ll b/final/test/Transforms/ConstProp/remtest.ll
new file mode 100644
index 00000000000..efd2d48332a
--- /dev/null
+++ b/final/test/Transforms/ConstProp/remtest.ll
@@ -0,0 +1,24 @@
+; Ensure constant propagation of remainder instructions is working correctly.
+
+; RUN: opt < %s -constprop -die -S | not grep rem
+
+define i32 @test1() {
+        %R = srem i32 4, 3              ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define i32 @test2() {
+        %R = srem i32 123, -23          ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define float @test3() {
+        %R = frem float 0x4028E66660000000, 0x405ECDA1C0000000          ; <float> [#uses=1]
+        ret float %R
+}
+
+define double @test4() {
+        %R = frem double 0x4073833BEE07AFF8, 0x4028AAABB2A0D19C         ; <double> [#uses=1]
+        ret double %R
+}
+
diff --git a/final/test/Transforms/ConstantMerge/2002-09-23-CPR-Update.ll b/final/test/Transforms/ConstantMerge/2002-09-23-CPR-Update.ll
new file mode 100644
index 00000000000..b7b05cf1c37
--- /dev/null
+++ b/final/test/Transforms/ConstantMerge/2002-09-23-CPR-Update.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -constmerge > /dev/null
+
+@foo.upgrd.1 = internal constant { i32 } { i32 7 }              ; <{ i32 }*> [#uses=1]
+@bar = internal constant { i32 } { i32 7 }              ; <{ i32 }*> [#uses=1]
+
+declare i32 @test(i32*)
+
+define void @foo() {
+        call i32 @test( i32* getelementptr ({ i32 }* @foo.upgrd.1, i64 0, i32 0) )              ; <i32>:1 [#uses=0]
+        call i32 @test( i32* getelementptr ({ i32 }* @bar, i64 0, i32 0) )              ; <i32>:2 [#uses=0]
+        ret void
+}
+
diff --git a/final/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll b/final/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll
new file mode 100644
index 00000000000..ce79e3b2964
--- /dev/null
+++ b/final/test/Transforms/ConstantMerge/2003-10-28-MergeExternalConstants.ll
@@ -0,0 +1,7 @@
+; RUN: opt -S -constmerge %s | FileCheck %s
+
+; CHECK: @foo = constant i32 6
+; CHECK: @bar = constant i32 6
+@foo = constant i32 6           ; <i32*> [#uses=0]
+@bar = constant i32 6           ; <i32*> [#uses=0]
+
diff --git a/final/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll b/final/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll
new file mode 100644
index 00000000000..f561daf6678
--- /dev/null
+++ b/final/test/Transforms/ConstantMerge/2011-01-15-EitherOrder.ll
@@ -0,0 +1,18 @@
+; RUN: opt -constmerge %s -S -o - | FileCheck %s
+; PR8978
+
+declare i32 @zed(%struct.foobar*, %struct.foobar*)
+
+%struct.foobar = type { i32 }
+; CHECK: bar.d
+@bar.d =  unnamed_addr constant %struct.foobar zeroinitializer, align 4
+; CHECK-NOT: foo.d
+@foo.d = internal constant %struct.foobar zeroinitializer, align 4
+define i32 @main() nounwind ssp {
+entry:
+; CHECK: bar.d
+  %call2 = tail call i32 @zed(%struct.foobar* @foo.d, %struct.foobar* @bar.d)
+nounwind
+  ret i32 0
+}
+
diff --git a/final/test/Transforms/ConstantMerge/dg.exp b/final/test/Transforms/ConstantMerge/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/ConstantMerge/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/ConstantMerge/dont-merge.ll b/final/test/Transforms/ConstantMerge/dont-merge.ll
new file mode 100644
index 00000000000..e5337dff27d
--- /dev/null
+++ b/final/test/Transforms/ConstantMerge/dont-merge.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -constmerge -S | FileCheck %s
+
+; Don't merge constants with specified sections.
+
+@T1G1 = internal constant i32 1, section "foo"
+@T1G2 = internal constant i32 1, section "bar"
+@T1G3 = internal constant i32 1, section "bar"
+
+; CHECK: @T1G1
+; CHECK: @T1G2
+; CHECK: @T1G3
+
+define void @test1(i32** %P1, i32** %P2, i32** %P3) {
+        store i32* @T1G1, i32** %P1
+        store i32* @T1G2, i32** %P2
+        store i32* @T1G3, i32** %P3
+        ret void
+}
+
+@T2a = internal constant i32 224
+@T2b = internal addrspace(30) constant i32 224
+
+; CHECK: @T2a
+; CHECK: @T2b
+
+define void @test2(i32** %P1, i32 addrspace(30)** %P2) {
+        store i32* @T2a, i32** %P1
+        store i32 addrspace(30)*  @T2b, i32 addrspace(30)** %P2
+        ret void
+}
+
+; PR8144 - Don't merge globals marked attribute(used)
+; CHECK: @T3A = 
+; CHECK: @T3B = 
+
+@T3A = internal constant i32 0
+@T3B = internal constant i32 0
+@llvm.used = appending global [2 x i32*] [i32* @T3A, i32* @T3B], section
+"llvm.metadata"
+
+define void @test3() {
+  call void asm sideeffect "T3A, T3B",""() ; invisible use of T3A and T3B
+  ret void
+}
diff --git a/final/test/Transforms/ConstantMerge/merge-both.ll b/final/test/Transforms/ConstantMerge/merge-both.ll
new file mode 100644
index 00000000000..0282f464aee
--- /dev/null
+++ b/final/test/Transforms/ConstantMerge/merge-both.ll
@@ -0,0 +1,26 @@
+; RUN: opt -constmerge %s -S -o - | FileCheck %s
+; Test that in one run var3 is merged into var2 and var1 into var4.
+
+declare void @zed(%struct.foobar*, %struct.foobar*)
+
+%struct.foobar = type { i32 }
+
+@var1 = internal constant %struct.foobar { i32 2 }
+@var2 = unnamed_addr constant %struct.foobar { i32 2 }
+@var3 = internal constant %struct.foobar { i32 2 }
+@var4 = unnamed_addr constant %struct.foobar { i32 2 }
+
+; CHECK:      %struct.foobar = type { i32 }
+; CHECK-NOT: @
+; CHECK: @var2 = constant %struct.foobar { i32 2 }
+; CHECK-NEXT: @var4 = constant %struct.foobar { i32 2 }
+; CHECK-NOT: @
+; CHECK: declare void @zed(%struct.foobar*, %struct.foobar*)
+
+define i32 @main() {
+entry:
+  call void @zed(%struct.foobar* @var1, %struct.foobar* @var2)
+  call void @zed(%struct.foobar* @var3, %struct.foobar* @var4)
+  ret i32 0
+}
+
diff --git a/final/test/Transforms/ConstantMerge/unnamed-addr.ll b/final/test/Transforms/ConstantMerge/unnamed-addr.ll
new file mode 100644
index 00000000000..24100837aab
--- /dev/null
+++ b/final/test/Transforms/ConstantMerge/unnamed-addr.ll
@@ -0,0 +1,40 @@
+; RUN: opt -constmerge %s -S -o - | FileCheck %s
+; Test which corresponding x and y are merged and that unnamed_addr
+; is correctly set.
+
+declare void @zed(%struct.foobar*, %struct.foobar*)
+
+%struct.foobar = type { i32 }
+
+@test1.x = internal constant %struct.foobar { i32 1 }
+@test1.y = constant %struct.foobar { i32 1 }
+
+@test2.x = internal constant %struct.foobar { i32 2 }
+@test2.y = unnamed_addr constant %struct.foobar { i32 2 }
+
+@test3.x = internal unnamed_addr constant %struct.foobar { i32 3 }
+@test3.y = constant %struct.foobar { i32 3 }
+
+@test4.x = internal unnamed_addr constant %struct.foobar { i32 4 }
+@test4.y = unnamed_addr constant %struct.foobar { i32 4 }
+
+
+; CHECK:      %struct.foobar = type { i32 }
+; CHECK-NOT: @
+; CHECK: @test1.x = internal constant %struct.foobar { i32 1 }
+; CHECK-NEXT: @test1.y = constant %struct.foobar { i32 1 }
+; CHECK-NEXT: @test2.y = constant %struct.foobar { i32 2 }
+; CHECK-NEXT: @test3.y = constant %struct.foobar { i32 3 }
+; CHECK-NEXT: @test4.y = unnamed_addr constant %struct.foobar { i32 4 }
+; CHECK-NOT: @
+; CHECK: declare void @zed(%struct.foobar*, %struct.foobar*)
+
+define i32 @main() {
+entry:
+  call void @zed(%struct.foobar* @test1.x, %struct.foobar* @test1.y)
+  call void @zed(%struct.foobar* @test2.x, %struct.foobar* @test2.y)
+  call void @zed(%struct.foobar* @test3.x, %struct.foobar* @test3.y)
+  call void @zed(%struct.foobar* @test4.x, %struct.foobar* @test4.y)
+  ret i32 0
+}
+
diff --git a/final/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll b/final/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll
new file mode 100644
index 00000000000..fef5b8579eb
--- /dev/null
+++ b/final/test/Transforms/CorrelatedValuePropagation/2010-09-02-Trunc.ll
@@ -0,0 +1,25 @@
+; RUN: opt -S < %s -correlated-propagation | FileCheck %s
+
+; CHECK: @test
+define i16 @test(i32 %a, i1 %b) {
+entry:
+  %c = icmp eq i32 %a, 0
+  br i1 %c, label %left, label %right
+
+right:
+  %d = trunc i32 %a to i1
+  br label %merge
+
+left:
+  br i1 %b, label %merge, label %other
+
+other:
+  ret i16 23
+
+merge:
+  %f = phi i1 [%b, %left], [%d, %right]
+; CHECK: select i1 %f, i16 1, i16 0 
+  %h = select i1 %f, i16 1, i16 0 
+; CHECK: ret i16 %h
+  ret i16 %h
+}
\ No newline at end of file
diff --git a/final/test/Transforms/CorrelatedValuePropagation/2010-09-26-MergeConstantRange.ll b/final/test/Transforms/CorrelatedValuePropagation/2010-09-26-MergeConstantRange.ll
new file mode 100644
index 00000000000..9ccc7870631
--- /dev/null
+++ b/final/test/Transforms/CorrelatedValuePropagation/2010-09-26-MergeConstantRange.ll
@@ -0,0 +1,82 @@
+; RUN: opt < %s -jump-threading -correlated-propagation
+
+%struct.S2 = type {}
+
+@g_128 = external global %struct.S2, align 1
+@g_106 = external global i16, align 2
+
+define void @int328(i16 signext %p_82) noreturn nounwind ssp {
+entry:
+  %tobool3 = icmp eq i16 %p_82, 0
+  br label %for.cond.outer
+
+for.cond.outer:                                   ; preds = %for.cond.loopexit, %entry
+  br label %for.cond
+
+for.cond.loopexit:                                ; preds = %bb.nph, %for.cond9.preheader
+  br label %for.cond.outer
+
+for.cond.loopexit4.us-lcssa:                      ; preds = %if.then
+  br label %for.cond.loopexit4
+
+for.cond.loopexit4:                               ; preds = %for.cond.loopexit4.us-lcssa.us, %for.cond.loopexit4.us-lcssa
+  br label %for.cond.backedge
+
+for.cond:                                         ; preds = %for.cond.backedge, %for.cond.outer
+  br i1 %tobool3, label %for.cond.split.us, label %for.cond.for.cond.split_crit_edge
+
+for.cond.for.cond.split_crit_edge:                ; preds = %for.cond
+  br label %lbl_133
+
+for.cond.split.us:                                ; preds = %for.cond
+  br label %lbl_133.us
+
+lbl_133.us:                                       ; preds = %lbl_134.us, %for.cond.split.us
+  br i1 undef, label %if.else14.us-lcssa.us, label %if.then.us
+
+lbl_134.us:                                       ; preds = %if.then.us
+  br i1 icmp eq (i16 ptrtoint (%struct.S2* @g_128 to i16), i16 0), label %for.cond9.preheader.us-lcssa.us, label %lbl_133.us
+
+if.then.us:                                       ; preds = %lbl_133.us
+  br i1 true, label %for.cond.loopexit4.us-lcssa.us, label %lbl_134.us
+
+if.else14.us-lcssa.us:                            ; preds = %lbl_133.us
+  br label %if.else14
+
+for.cond9.preheader.us-lcssa.us:                  ; preds = %lbl_134.us
+  br label %for.cond9.preheader
+
+for.cond.loopexit4.us-lcssa.us:                   ; preds = %if.then.us
+  br label %for.cond.loopexit4
+
+lbl_133:                                          ; preds = %lbl_134, %for.cond.for.cond.split_crit_edge
+  %l_109.0 = phi i16 [ 0, %for.cond.for.cond.split_crit_edge ], [ ptrtoint (%struct.S2* @g_128 to i16), %lbl_134 ]
+  %tobool = icmp eq i32 undef, 0
+  br i1 %tobool, label %if.else14.us-lcssa, label %if.then
+
+if.then:                                          ; preds = %lbl_133
+  br i1 false, label %for.cond.loopexit4.us-lcssa, label %lbl_134
+
+lbl_134:                                          ; preds = %if.then
+  br i1 icmp eq (i16 ptrtoint (%struct.S2* @g_128 to i16), i16 0), label %for.cond9.preheader.us-lcssa, label %lbl_133
+
+for.cond9.preheader.us-lcssa:                     ; preds = %lbl_134
+  br label %for.cond9.preheader
+
+for.cond9.preheader:                              ; preds = %for.cond9.preheader.us-lcssa, %for.cond9.preheader.us-lcssa.us
+  br i1 undef, label %bb.nph, label %for.cond.loopexit
+
+bb.nph:                                           ; preds = %for.cond9.preheader
+  br label %for.cond.loopexit
+
+if.else14.us-lcssa:                               ; preds = %lbl_133
+  br label %if.else14
+
+if.else14:                                        ; preds = %if.else14.us-lcssa, %if.else14.us-lcssa.us
+  %l_109.0.lcssa = phi i16 [ %l_109.0, %if.else14.us-lcssa ], [ 0, %if.else14.us-lcssa.us ]
+  store i16 undef, i16* @g_106, align 2
+  br label %for.cond.backedge
+
+for.cond.backedge:                                ; preds = %if.else14, %for.cond.loopexit4
+  br label %for.cond
+}
diff --git a/final/test/Transforms/CorrelatedValuePropagation/basic.ll b/final/test/Transforms/CorrelatedValuePropagation/basic.ll
new file mode 100644
index 00000000000..270c048e2f9
--- /dev/null
+++ b/final/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -0,0 +1,82 @@
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+; PR2581
+
+; CHECK: @test1
+define i32 @test1(i1 %C) nounwind  {
+        br i1 %C, label %exit, label %body
+
+body:           ; preds = %0
+; CHECK-NOT: select
+        %A = select i1 %C, i32 10, i32 11               ; <i32> [#uses=1]
+; CHECK: ret i32 11
+        ret i32 %A
+
+exit:           ; preds = %0
+; CHECK: ret i32 10
+        ret i32 10
+}
+
+; PR4420
+declare i1 @ext()
+; CHECK: @test2
+define i1 @test2() {
+entry:
+        %cond = tail call i1 @ext()             ; <i1> [#uses=2]
+        br i1 %cond, label %bb1, label %bb2
+
+bb1:            ; preds = %entry
+        %cond2 = tail call i1 @ext()            ; <i1> [#uses=1]
+        br i1 %cond2, label %bb3, label %bb2
+
+bb2:            ; preds = %bb1, %entry
+; CHECK-NOT: phi i1
+        %cond_merge = phi i1 [ %cond, %entry ], [ false, %bb1 ]         ; <i1> [#uses=1]
+; CHECK: ret i1 false
+        ret i1 %cond_merge
+
+bb3:            ; preds = %bb1
+        %res = tail call i1 @ext()              ; <i1> [#uses=1]
+; CHECK: ret i1 %res
+        ret i1 %res
+}
+
+; PR4855
+@gv = internal constant i8 7
+; CHECK: @test3
+define i8 @test3(i8* %a) nounwind {
+entry:
+        %cond = icmp eq i8* %a, @gv
+        br i1 %cond, label %bb2, label %bb
+
+bb:             ; preds = %entry
+        ret i8 0
+
+bb2:            ; preds = %entry
+; CHECK: %should_be_const = load i8* @gv
+        %should_be_const = load i8* %a
+        ret i8 %should_be_const
+}
+
+; PR1757
+; CHECK: @test4
+define i32 @test4(i32) {
+EntryBlock:
+; CHECK: icmp sgt i32 %0, 2  
+  %.demorgan = icmp sgt i32 %0, 2    
+  br i1 %.demorgan, label %GreaterThanTwo, label %LessThanOrEqualToTwo
+
+GreaterThanTwo:
+; CHECK-NOT: icmp eq i32 %0, 2
+  icmp eq i32 %0, 2
+; CHECK: br i1 false
+  br i1 %1, label %Impossible, label %NotTwoAndGreaterThanTwo
+
+NotTwoAndGreaterThanTwo:
+  ret i32 2
+
+Impossible:
+  ret i32 1
+
+LessThanOrEqualToTwo:
+  ret i32 0
+}
\ No newline at end of file
diff --git a/final/test/Transforms/CorrelatedValuePropagation/crash.ll b/final/test/Transforms/CorrelatedValuePropagation/crash.ll
new file mode 100644
index 00000000000..80c43d0f1da
--- /dev/null
+++ b/final/test/Transforms/CorrelatedValuePropagation/crash.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -correlated-propagation
+
+; PR8161
+define void @test1() nounwind ssp {
+entry:
+  br label %for.end
+
+for.cond.us.us:                                   ; preds = %for.cond.us.us
+  %cmp6.i.us.us = icmp sgt i32 1, 0
+  %lor.ext.i.us.us = zext i1 %cmp6.i.us.us to i32
+  %lor.ext.add.i.us.us = select i1 %cmp6.i.us.us, i32 %lor.ext.i.us.us, i32 undef
+  %conv.i.us.us = trunc i32 %lor.ext.add.i.us.us to i16
+  %sext.us.us = shl i16 %conv.i.us.us, 8
+  %conv6.us.us = ashr i16 %sext.us.us, 8
+  %and.us.us = and i16 %conv6.us.us, %and.us.us
+  br i1 false, label %for.end, label %for.cond.us.us
+
+for.end:                                          ; preds = %for.cond.us, %for.cond.us.us, %entry
+  ret void
+}
+
+; PR 8790
+define void @test2() nounwind ssp {
+entry:
+  br label %func_29.exit
+
+sdf.exit.i:
+  %l_44.1.mux.i = select i1 %tobool5.not.i, i8 %l_44.1.mux.i, i8 1
+  br label %srf.exit.i
+
+srf.exit.i:
+  %tobool5.not.i = icmp ne i8 undef, 0
+  br i1 %tobool5.not.i, label %sdf.exit.i, label %func_29.exit
+
+func_29.exit:
+  ret void
+}
diff --git a/final/test/Transforms/CorrelatedValuePropagation/dg.exp b/final/test/Transforms/CorrelatedValuePropagation/dg.exp
new file mode 100644
index 00000000000..de42dad163f
--- /dev/null
+++ b/final/test/Transforms/CorrelatedValuePropagation/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/final/test/Transforms/CorrelatedValuePropagation/non-null.ll b/final/test/Transforms/CorrelatedValuePropagation/non-null.ll
new file mode 100644
index 00000000000..b14abd83694
--- /dev/null
+++ b/final/test/Transforms/CorrelatedValuePropagation/non-null.ll
@@ -0,0 +1,103 @@
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+
+define void @test1(i8* %ptr) {
+; CHECK: test1
+  %A = load i8* %ptr
+  br label %bb
+bb:
+  icmp ne i8* %ptr, null
+; CHECK-NOT: icmp
+  ret void
+}
+
+define void @test2(i8* %ptr) {
+; CHECK: test2
+  store i8 0, i8* %ptr
+  br label %bb
+bb:
+  icmp ne i8* %ptr, null
+; CHECK-NOT: icmp
+  ret void
+}
+
+define void @test3() {
+; CHECK: test3
+  %ptr = alloca i8
+  br label %bb
+bb:
+  icmp ne i8* %ptr, null
+; CHECK-NOT: icmp
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+define void @test4(i8* %dest, i8* %src) {
+; CHECK: test4
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i32 1, i1 false)
+  br label %bb
+bb:
+  icmp ne i8* %dest, null
+  icmp ne i8* %src, null
+; CHECK-NOT: icmp
+  ret void
+}
+
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+define void @test5(i8* %dest, i8* %src) {
+; CHECK: test5
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i32 1, i1 false)
+  br label %bb
+bb:
+  icmp ne i8* %dest, null
+  icmp ne i8* %src, null
+; CHECK-NOT: icmp
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
+define void @test6(i8* %dest) {
+; CHECK: test6
+  call void @llvm.memset.p0i8.i32(i8* %dest, i8 255, i32 1, i32 1, i1 false)
+  br label %bb
+bb:
+  icmp ne i8* %dest, null
+; CHECK-NOT: icmp
+  ret void
+}
+
+define void @test7(i8* %dest, i8* %src, i32 %len) {
+; CHECK: test7
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false)
+  br label %bb
+bb:
+  %KEEP1 = icmp ne i8* %dest, null
+; CHECK: KEEP1
+  %KEEP2 = icmp ne i8* %src, null
+; CHECK: KEEP2
+  ret void
+}
+
+declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1) *, i8 addrspace(1) *, i32, i32, i1)
+define void @test8(i8 addrspace(1) * %dest, i8 addrspace(1) * %src) {
+; CHECK: test8
+  call void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1) * %dest, i8 addrspace(1) * %src, i32 1, i32 1, i1 false)
+  br label %bb
+bb:
+  %KEEP1 = icmp ne i8 addrspace(1) * %dest, null
+; CHECK: KEEP1
+  %KEEP2 = icmp ne i8 addrspace(1) * %src, null
+; CHECK: KEEP2
+  ret void
+}
+
+define void @test9(i8* %dest, i8* %src) {
+; CHECK: test9
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i32 1, i1 true)
+  br label %bb
+bb:
+  %KEEP1 = icmp ne i8* %dest, null
+; CHECK: KEEP1
+  %KEEP2 = icmp ne i8* %src, null
+; CHECK: KEEP2
+  ret void
+}
diff --git a/final/test/Transforms/DeadArgElim/2006-06-27-struct-ret.ll b/final/test/Transforms/DeadArgElim/2006-06-27-struct-ret.ll
new file mode 100644
index 00000000000..fac6dd24efb
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/2006-06-27-struct-ret.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -deadargelim -disable-output
+
+define internal void @build_delaunay({ i32 }* sret  %agg.result) {
+        ret void
+}
+
+define void @test() {
+        call void @build_delaunay( { i32 }* sret  null )
+        ret void
+}
+
diff --git a/final/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll b/final/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll
new file mode 100644
index 00000000000..d5bd6c4df53
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/2007-02-07-FuncRename.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -deadargelim -S | grep {@test(}
+; RUN: opt < %s -deadargelim -S | not grep dead
+
+define internal i32 @test(i32 %X, i32 %dead) {
+	ret i32 %X
+}
+
+define i32 @caller() {
+	%A = call i32 @test(i32 123, i32 456)
+	ret i32 %A
+}
diff --git a/final/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll b/final/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll
new file mode 100644
index 00000000000..d4edce9baf6
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/2007-10-18-VarargsReturn.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -deadargelim -S | not grep {ret i32 0}
+; PR1735
+
+define internal i32 @test(i32 %A, ...) { 
+	ret i32 %A
+}
+
+define i32 @foo() {
+	%A = call i32(i32, ...)* @test(i32 1)
+	ret i32 %A
+}
+
diff --git a/final/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll b/final/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
new file mode 100644
index 00000000000..0e9c4f74e6a
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -deadargelim -S > %t
+; RUN: cat %t | grep nounwind | count 2
+; RUN: cat %t | grep signext | count 2
+; RUN: cat %t | not grep inreg
+; RUN: cat %t | not grep zeroext
+; RUN: cat %t | not grep byval
+
+	%struct = type { }
+
+@g = global i8 0
+
+define internal i8 @foo(i8* inreg %p, i8 signext %y, ... ) zeroext nounwind {
+	store i8 %y, i8* @g
+	ret i8 0
+}
+
+define i32 @bar() {
+	%A = call i8(i8*, i8, ...)* @foo(i8* inreg null, i8 signext 1, %struct* byval null ) zeroext nounwind
+	ret i32 0
+}
diff --git a/final/test/Transforms/DeadArgElim/2008-01-16-VarargsParamAttrs.ll b/final/test/Transforms/DeadArgElim/2008-01-16-VarargsParamAttrs.ll
new file mode 100644
index 00000000000..93282f7f8f2
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/2008-01-16-VarargsParamAttrs.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -deadargelim -S | grep byval
+
+	%struct.point = type { double, double }
+@pts = global [4 x %struct.point] [ %struct.point { double 1.000000e+00, double 2.000000e+00 }, %struct.point { double 3.000000e+00, double 4.000000e+00 }, %struct.point { double 5.000000e+00, double 6.000000e+00 }, %struct.point { double 7.000000e+00, double 8.000000e+00 } ], align 32		; <[4 x %struct.point]*> [#uses=1]
+
+define internal i32 @va1(i32 %nargs, ...) {
+entry:
+	%pi = alloca %struct.point		; <%struct.point*> [#uses=0]
+	%args = alloca i8*		; <i8**> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%args1 = bitcast i8** %args to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_start( i8* %args1 )
+	%args41 = bitcast i8** %args to i8*		; <i8*> [#uses=1]
+	call void @llvm.va_end( i8* %args41 )
+	ret i32 undef
+}
+
+declare void @llvm.va_start(i8*) nounwind 
+
+declare void @llvm.va_end(i8*) nounwind 
+
+define i32 @main() {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = getelementptr [4 x %struct.point]* @pts, i32 0, i32 0		; <%struct.point*> [#uses=1]
+	%tmp1 = call i32 (i32, ...)* @va1( i32 1, %struct.point* byval  %tmp ) nounwind 		; <i32> [#uses=0]
+	call void @exit( i32 0 ) noreturn nounwind 
+	unreachable
+}
+
+declare void @exit(i32) noreturn nounwind 
diff --git a/final/test/Transforms/DeadArgElim/2008-06-23-DeadAfterLive.ll b/final/test/Transforms/DeadArgElim/2008-06-23-DeadAfterLive.ll
new file mode 100644
index 00000000000..adfd0198940
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/2008-06-23-DeadAfterLive.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -deadargelim -die -S > %t
+; RUN: cat %t | grep 123
+
+; This test tries to catch wrongful removal of return values for a specific case
+; that was break llvm-gcc builds.
+
+; This function has a live return value, it is used by @alive.
+define internal i32 @test5() {
+  ret i32 123 
+}
+
+; This function doesn't use the return value @test5 and tries to lure DAE into
+; marking @test5's return value dead because only this call is unused.
+define i32 @dead() {
+  %DEAD = call i32 @test5()
+  ret i32 0
+}
+
+; This function ensures the retval of @test5 is live.
+define i32 @alive() {
+  %LIVE = call i32 @test5()
+  ret i32 %LIVE
+}
diff --git a/final/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll b/final/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll
new file mode 100644
index 00000000000..161821f3f8f
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/2009-03-17-MRE-Invoke.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -deadargelim | llvm-dis
+; PR3807
+
+define internal { i32, i32 } @foo() {
+  ret {i32,i32} {i32 42, i32 4}
+}
+
+define i32 @bar() {
+  %x = invoke {i32,i32} @foo() to label %T unwind label %T2
+T:
+  %y = extractvalue {i32,i32} %x, 1
+  ret i32 %y
+T2:
+  unreachable
+}
+
+define i32 @bar2() {
+entry:
+  %x = invoke {i32,i32} @foo() to label %T unwind label %T2
+T:
+  %PN = phi i32 [0, %entry]
+  %y = extractvalue {i32,i32} %x, 1
+  ret i32 %y
+T2:
+  unreachable
+}
diff --git a/final/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/final/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
new file mode 100644
index 00000000000..2f820bad847
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -0,0 +1,68 @@
+; RUN: opt -S -deadargelim < %s | FileCheck %s
+
+@.str = private constant [1 x i8] zeroinitializer, align 1 ; <[1 x i8]*> [#uses=1]
+
+define i8* @vfs_addname(i8* %name, i32 %len, i32 %hash, i32 %flags) nounwind ssp {
+entry:
+  call void @llvm.dbg.value(metadata !{i8* %name}, i64 0, metadata !0)
+  call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !10)
+  call void @llvm.dbg.value(metadata !{i32 %hash}, i64 0, metadata !11)
+  call void @llvm.dbg.value(metadata !{i32 %flags}, i64 0, metadata !12)
+; CHECK:  call fastcc i8* @add_name_internal(i8* %name, i32 %hash) nounwind, !dbg !13
+  %0 = call fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext 0, i32 %flags) nounwind, !dbg !13 ; <i8*> [#uses=1]
+  ret i8* %0, !dbg !13
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define internal fastcc i8* @add_name_internal(i8* %name, i32 %len, i32 %hash, i8 zeroext %extra, i32 %flags) nounwind noinline ssp {
+entry:
+  call void @llvm.dbg.value(metadata !{i8* %name}, i64 0, metadata !15)
+  call void @llvm.dbg.value(metadata !{i32 %len}, i64 0, metadata !20)
+  call void @llvm.dbg.value(metadata !{i32 %hash}, i64 0, metadata !21)
+  call void @llvm.dbg.value(metadata !{i8 %extra}, i64 0, metadata !22)
+  call void @llvm.dbg.value(metadata !{i32 %flags}, i64 0, metadata !23)
+  %0 = icmp eq i32 %hash, 0, !dbg !24             ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb1, !dbg !24
+
+bb:                                               ; preds = %entry
+  br label %bb2, !dbg !26
+
+bb1:                                              ; preds = %entry
+  br label %bb2, !dbg !27
+
+bb2:                                              ; preds = %bb1, %bb
+  %.0 = phi i8* [ getelementptr inbounds ([1 x i8]* @.str, i64 0, i64 0), %bb ], [ %name, %bb1 ] ; <i8*> [#uses=1]
+  ret i8* %.0, !dbg !27
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"name", metadata !2, i32 8, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"vfs_addname", metadata !"vfs_addname", metadata !"vfs_addname", metadata !2, i32 12, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"tail.c", metadata !"/Users/echeng/LLVM/radars/r7927803/", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"tail.c", metadata !"/Users/echeng/LLVM/radars/r7927803/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !6, metadata !9, metadata !9, metadata !9}
+!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 524326, metadata !2, metadata !"", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
+!8 = metadata !{i32 524324, metadata !2, metadata !"char", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524324, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!10 = metadata !{i32 524545, metadata !1, metadata !"len", metadata !2, i32 9, metadata !9} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 524545, metadata !1, metadata !"hash", metadata !2, i32 10, metadata !9} ; [ DW_TAG_arg_variable ]
+!12 = metadata !{i32 524545, metadata !1, metadata !"flags", metadata !2, i32 11, metadata !9} ; [ DW_TAG_arg_variable ]
+!13 = metadata !{i32 13, i32 0, metadata !14, null}
+!14 = metadata !{i32 524299, metadata !1, i32 12, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 524545, metadata !16, metadata !"name", metadata !2, i32 17, metadata !6} ; [ DW_TAG_arg_variable ]
+!16 = metadata !{i32 524334, i32 0, metadata !2, metadata !"add_name_internal", metadata !"add_name_internal", metadata !"add_name_internal", metadata !2, i32 22, metadata !17, i1 true, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!17 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !18, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!18 = metadata !{metadata !6, metadata !6, metadata !9, metadata !9, metadata !19, metadata !9}
+!19 = metadata !{i32 524324, metadata !2, metadata !"unsigned char", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!20 = metadata !{i32 524545, metadata !16, metadata !"len", metadata !2, i32 18, metadata !9} ; [ DW_TAG_arg_variable ]
+!21 = metadata !{i32 524545, metadata !16, metadata !"hash", metadata !2, i32 19, metadata !9} ; [ DW_TAG_arg_variable ]
+!22 = metadata !{i32 524545, metadata !16, metadata !"extra", metadata !2, i32 20, metadata !19} ; [ DW_TAG_arg_variable ]
+!23 = metadata !{i32 524545, metadata !16, metadata !"flags", metadata !2, i32 21, metadata !9} ; [ DW_TAG_arg_variable ]
+!24 = metadata !{i32 23, i32 0, metadata !25, null}
+!25 = metadata !{i32 524299, metadata !16, i32 22, i32 0} ; [ DW_TAG_lexical_block ]
+!26 = metadata !{i32 24, i32 0, metadata !25, null}
+!27 = metadata !{i32 26, i32 0, metadata !25, null}
diff --git a/final/test/Transforms/DeadArgElim/basictest.ll b/final/test/Transforms/DeadArgElim/basictest.ll
new file mode 100644
index 00000000000..9ac2222d1a5
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/basictest.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -deadargelim -S | not grep DEADARG
+
+; test - an obviously dead argument
+define internal i32 @test(i32 %v, i32 %DEADARG1, i32* %p) {
+        store i32 %v, i32* %p
+        ret i32 %v
+}
+
+; hardertest - an argument which is only used by a call of a function with a 
+; dead argument.
+define internal i32 @hardertest(i32 %DEADARG2) {
+        %p = alloca i32         ; <i32*> [#uses=1]
+        %V = call i32 @test( i32 5, i32 %DEADARG2, i32* %p )            ; <i32> [#uses=1]
+        ret i32 %V
+}
+
+; evenhardertest - recursive dead argument...
+define internal void @evenhardertest(i32 %DEADARG3) {
+        call void @evenhardertest( i32 %DEADARG3 )
+        ret void
+}
+
+define internal void @needarg(i32 %TEST) {
+        call i32 @needarg2( i32 %TEST )         ; <i32>:1 [#uses=0]
+        ret void
+}
+
+define internal i32 @needarg2(i32 %TEST) {
+        ret i32 %TEST
+}
+
+define internal void @needarg3(i32 %TEST3) {
+        call void @needarg( i32 %TEST3 )
+        ret void
+}
+
diff --git a/final/test/Transforms/DeadArgElim/canon.ll b/final/test/Transforms/DeadArgElim/canon.ll
new file mode 100644
index 00000000000..11cd482b7b2
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/canon.ll
@@ -0,0 +1,24 @@
+; This test shows a few canonicalizations made by deadargelim
+; RUN: opt < %s -deadargelim -S > %t
+; This test should remove {} and replace it with void
+; RUN: cat %t | grep {define internal void @test}
+; This test shouls replace the {i32} return value with just i32
+; RUN: cat %t | grep {define internal i32 @test2}
+
+define internal {} @test() {
+  ret {} undef
+}
+
+define internal {i32} @test2() {
+  ret {i32} undef
+}
+
+define void @caller() {
+  call {} @test()
+  %X = call {i32} @test2()
+  %Y = extractvalue {i32} %X, 0
+  call void @user(i32 %Y, {i32} %X)
+  ret void
+}
+
+declare void @user(i32, {i32})
diff --git a/final/test/Transforms/DeadArgElim/dead_vaargs.ll b/final/test/Transforms/DeadArgElim/dead_vaargs.ll
new file mode 100644
index 00000000000..db3135c8393
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/dead_vaargs.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -deadargelim -S | not grep 47 
+; RUN: opt < %s -deadargelim -S | not grep 1.0
+
+define i32 @bar(i32 %A) {
+        %tmp4 = tail call i32 (i32, ...)* @foo( i32 %A, i32 %A, i32 %A, i32 %A, i64 47, double 1.000000e+00 )   ; <i32> [#uses=1]
+        ret i32 %tmp4
+}
+
+define internal i32 @foo(i32 %X, ...) {
+        ret i32 %X
+}
+
diff --git a/final/test/Transforms/DeadArgElim/deadexternal.ll b/final/test/Transforms/DeadArgElim/deadexternal.ll
new file mode 100644
index 00000000000..84092613130
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/deadexternal.ll
@@ -0,0 +1,52 @@
+; RUN: opt -deadargelim -S %s | FileCheck %s
+
+define void @test(i32) {
+  ret void
+}
+
+define void @foo() {
+  call void @test(i32 0)
+  ret void
+; CHECK: @foo
+; CHECK: i32 undef
+}
+
+define void @f(i32 %X) {
+entry:
+  tail call void @sideeffect() nounwind
+  ret void
+}
+
+declare void @sideeffect()
+
+define void @g(i32 %n) {
+entry:
+  %add = add nsw i32 %n, 1
+; CHECK: tail call void @f(i32 undef)
+  tail call void @f(i32 %add)
+  ret void
+}
+
+define void @h() {
+entry:
+  %i = alloca i32, align 4
+  volatile store i32 10, i32* %i, align 4
+; CHECK: %tmp = volatile load i32* %i, align 4
+; CHECK-next: call void @f(i32 undef)
+  %tmp = volatile load i32* %i, align 4
+  call void @f(i32 %tmp)
+  ret void
+}
+
+; Check that callers are not transformed for weak definitions.
+define weak i32 @weak_f(i32 %x) nounwind {
+entry:
+  ret i32 0
+}
+define void @weak_f_caller() nounwind {
+entry:
+; CHECK: call i32 @weak_f(i32 10)
+  %call = tail call i32 @weak_f(i32 10)
+  ret void
+}
+
diff --git a/final/test/Transforms/DeadArgElim/deadretval.ll b/final/test/Transforms/DeadArgElim/deadretval.ll
new file mode 100644
index 00000000000..5f3817c6728
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/deadretval.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -deadargelim -S | not grep DEAD
+
+; Dead arg only used by dead retval
+define internal i32 @test(i32 %DEADARG) {
+        ret i32 %DEADARG
+}
+
+define i32 @test2(i32 %A) {
+        %DEAD = call i32 @test( i32 %A )                ; <i32> [#uses=0]
+        ret i32 123
+}
+
+define i32 @test3() {
+        %X = call i32 @test2( i32 3232 )                ; <i32> [#uses=1]
+        %Y = add i32 %X, -123           ; <i32> [#uses=1]
+        ret i32 %Y
+}
+
diff --git a/final/test/Transforms/DeadArgElim/deadretval2.ll b/final/test/Transforms/DeadArgElim/deadretval2.ll
new file mode 100644
index 00000000000..dcdc36e319a
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/deadretval2.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -deadargelim -die -S > %t
+; RUN: cat %t | not grep DEAD
+; RUN: cat %t | grep LIVE | count 4
+
+@P = external global i32                ; <i32*> [#uses=1]
+
+; Dead arg only used by dead retval
+define internal i32 @test(i32 %DEADARG) {
+        ret i32 %DEADARG
+}
+
+define internal i32 @test2(i32 %DEADARG) {
+        %DEADRETVAL = call i32 @test( i32 %DEADARG )            ; <i32> [#uses=1]
+        ret i32 %DEADRETVAL
+}
+
+define void @test3(i32 %X) {
+        %DEADRETVAL = call i32 @test2( i32 %X )         ; <i32> [#uses=0]
+        ret void
+}
+
+define internal i32 @foo() {
+        %DEAD = load i32* @P            ; <i32> [#uses=1]
+        ret i32 %DEAD
+}
+
+define internal i32 @id(i32 %X) {
+        ret i32 %X
+}
+
+define void @test4() {
+        %DEAD = call i32 @foo( )                ; <i32> [#uses=1]
+        %DEAD2 = call i32 @id( i32 %DEAD )              ; <i32> [#uses=0]
+        ret void
+}
+
+; These test if returning another functions return value properly marks that
+; other function's return value as live. We do this twice, with the functions in
+; different orders (ie, first the caller, than the callee and first the callee
+; and then the caller) since DAE processes functions one by one and handles
+; these cases slightly different.
+
+define internal i32 @test5() {
+  ret i32 123 
+}
+
+define i32 @test6() {
+  %LIVE = call i32 @test5()
+  ret i32 %LIVE
+}
+
+define i32 @test7() {
+  %LIVE = call i32 @test8()
+  ret i32 %LIVE
+}
+
+define internal i32 @test8() {
+  ret i32 124
+}
diff --git a/final/test/Transforms/DeadArgElim/dg.exp b/final/test/Transforms/DeadArgElim/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/DeadArgElim/keepalive.ll b/final/test/Transforms/DeadArgElim/keepalive.ll
new file mode 100644
index 00000000000..b0b9bf3fa13
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/keepalive.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -deadargelim -S > %t
+; RUN: grep {define internal zeroext i32 @test1() nounwind} %t
+; RUN: grep {define internal %Ty @test2} %t
+
+%Ty = type <{ i32, i32 }>
+
+; Check if the pass doesn't modify anything that doesn't need changing. We feed
+; an unused argument to each function to lure it into changing _something_ about
+; the function and then changing too much.
+
+; This checks if the return value attributes are not removed
+define internal zeroext i32 @test1(i32 %DEADARG1) nounwind {
+        ret i32 1
+}
+
+; This checks if the struct doesn't get non-packed
+define internal <{ i32, i32 }> @test2(i32 %DEADARG1) {
+        ret <{ i32, i32 }> <{ i32 1, i32 2 }>
+}
+
+; We use this external function to make sure the return values don't become dead
+declare void @user(i32, <{ i32, i32 }>)
+
+define void @caller() {
+        %B = call i32 @test1(i32 1)
+        %C = call <{ i32, i32 }> @test2(i32 2)
+        call void @user(i32 %B, <{ i32, i32 }> %C)
+        ret void
+}
+
diff --git a/final/test/Transforms/DeadArgElim/multdeadretval.ll b/final/test/Transforms/DeadArgElim/multdeadretval.ll
new file mode 100644
index 00000000000..68d96ee8df4
--- /dev/null
+++ b/final/test/Transforms/DeadArgElim/multdeadretval.ll
@@ -0,0 +1,68 @@
+; This test sees if return values (and arguments) are properly removed when they
+; are unused. All unused values are typed i16, so we can easily check. We also
+; run instcombine to fold insert/extractvalue chains and we run dce to clean up
+; any remaining dead stuff.
+; RUN: opt < %s -deadargelim -instcombine -dce -S | not grep i16
+
+define internal {i16, i32} @test(i16 %DEADARG) {
+        %A = insertvalue {i16,i32} undef, i16 1, 0
+        %B = insertvalue {i16,i32} %A, i32 1001, 1
+        ret {i16,i32} %B
+}
+
+define internal {i32, i16} @test2() {
+        %DEAD = call i16 @test4()
+        %A = insertvalue {i32,i16} undef, i32 1, 0
+        %B = insertvalue {i32,i16} %A, i16 %DEAD, 1
+        ret {i32,i16} %B
+}
+
+; Dead argument, used to check if the second result of test2 is dead even when
+; it's used as a dead argument
+define internal i32 @test3(i16 %A) {
+        %ret = call {i16, i32} @test( i16 %A )                ; <i32> [#uses=0]
+        %DEAD = extractvalue {i16, i32} %ret, 0
+        %LIVE = extractvalue {i16, i32} %ret, 1
+        ret i32 %LIVE
+}
+
+define internal i16 @test4() {
+        ret i16 0
+}
+
+; Multiple return values, multiple live return values
+define internal {i32, i32, i16} @test5() {
+        %A = insertvalue {i32,i32,i16} undef, i32 1, 0
+        %B = insertvalue {i32,i32,i16} %A, i32 2, 1
+        %C = insertvalue {i32,i32,i16} %B, i16 3, 2
+        ret {i32, i32, i16} %C
+}
+
+; Nested return values
+define internal {{i32}, {i16, i16}} @test6() {
+        %A = insertvalue {{i32}, {i16, i16}} undef, i32 1, 0, 0
+        %B = insertvalue {{i32}, {i16, i16}} %A, i16 2, 1, 0
+        %C = insertvalue {{i32}, {i16, i16}} %B, i16 3, 1, 1
+        ret {{i32}, {i16, i16}} %C
+}
+
+define i32 @main() {
+        %ret = call {i32, i16} @test2()                ; <i32> [#uses=1]
+        %LIVE = extractvalue {i32, i16} %ret, 0
+        %DEAD = extractvalue {i32, i16} %ret, 1
+        %Y = add i32 %LIVE, -123           ; <i32> [#uses=1]
+        %LIVE2 = call i32 @test3(i16 %DEAD)                ; <i32> [#uses=1]
+        %Z = add i32 %LIVE2, %Y           ; <i32> [#uses=1]
+        %ret1 = call { i32, i32, i16 } @test5 ()
+        %LIVE3 = extractvalue { i32, i32, i16} %ret1, 0
+        %LIVE4 = extractvalue { i32, i32, i16} %ret1, 1
+        %DEAD2 = extractvalue { i32, i32, i16} %ret1, 2
+        %V = add i32 %LIVE3, %LIVE4
+        %W = add i32 %Z, %V
+        %ret2 = call { { i32 }, { i16, i16 } } @test6 ()
+        %LIVE5 = extractvalue { { i32 }, { i16, i16 } } %ret2, 0, 0
+        %DEAD3 = extractvalue { { i32 }, { i16, i16 } } %ret2, 1, 0
+        %DEAD4 = extractvalue { { i32 }, { i16, i16 } } %ret2, 1, 1
+        %Q = add i32 %W, %LIVE5
+        ret i32 %Q
+}
diff --git a/final/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll b/final/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll
new file mode 100644
index 00000000000..079eec43bf8
--- /dev/null
+++ b/final/test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+; PR9561
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin9.8"
+
+@A = external global [0 x i32]
+
+declare cc10 void @Func2(i32*, i32*, i32*, i32)
+
+define cc10 void @Func1(i32* noalias %Arg1, i32* noalias %Arg2, i32* %Arg3, i32 %Arg4) {
+entry:
+  store i32 add (i32 ptrtoint ([0 x i32]* @A to i32), i32 1), i32* %Arg2
+; CHECK: store i32 add (i32 ptrtoint ([0 x i32]* @A to i32), i32 1), i32* %Arg2
+  %ln2gz = getelementptr i32* %Arg1, i32 14
+  %ln2gA = bitcast i32* %ln2gz to double*
+  %ln2gB = load double* %ln2gA
+  %ln2gD = getelementptr i32* %Arg2, i32 -3
+  %ln2gE = bitcast i32* %ln2gD to double*
+  store double %ln2gB, double* %ln2gE
+; CHECK: store double %ln2gB, double* %ln2gE
+  tail call cc10 void @Func2(i32* %Arg1, i32* %Arg2, i32* %Arg3, i32 %Arg4) nounwind
+  ret void
+}
diff --git a/final/test/Transforms/DeadStoreElimination/PartialStore.ll b/final/test/Transforms/DeadStoreElimination/PartialStore.ll
new file mode 100644
index 00000000000..7ac1e0844ed
--- /dev/null
+++ b/final/test/Transforms/DeadStoreElimination/PartialStore.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; Ensure that the dead store is deleted in this case.  It is wholely
+; overwritten by the second store.
+define void @test1(i32 *%V) {
+        %V2 = bitcast i32* %V to i8*            ; <i8*> [#uses=1]
+        store i8 0, i8* %V2
+        store i32 1234567, i32* %V
+        ret void
+; CHECK: @test1
+; CHECK-NEXT: store i32 1234567
+}
+
+; Note that we could do better by merging the two stores into one.
+define void @test2(i32* %P) {
+; CHECK: @test2
+  store i32 0, i32* %P
+; CHECK: store i32
+  %Q = bitcast i32* %P to i16*
+  store i16 1, i16* %Q
+; CHECK: store i16
+  ret void
+}
+
+
+define i32 @test3(double %__x) {
+; CHECK: @test3
+; CHECK: store double
+  %__u = alloca { [3 x i32] }
+  %tmp.1 = bitcast { [3 x i32] }* %__u to double*
+  store double %__x, double* %tmp.1
+  %tmp.4 = getelementptr { [3 x i32] }* %__u, i32 0, i32 0, i32 1
+  %tmp.5 = load i32* %tmp.4
+  %tmp.6 = icmp slt i32 %tmp.5, 0
+  %tmp.7 = zext i1 %tmp.6 to i32
+  ret i32 %tmp.7
+}
+
+; PR6043
+define void @test4(i8* %P) {
+; CHECK: @test4
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: store double
+
+  store i8 19, i8* %P  ;; dead
+  %A = getelementptr i8* %P, i32 3
+  
+  store i8 42, i8* %A  ;; dead
+  
+  %Q = bitcast i8* %P to double*
+  store double 0.0, double* %Q
+  ret void
+}
+
+; PR8657
+declare void @test5a(i32*)
+define void @test5(i32 %i) nounwind ssp {
+  %A = alloca i32
+  %B = bitcast i32* %A to i8*
+  %C = getelementptr i8* %B, i32 %i
+  store i8 10, i8* %C        ;; Dead store to variable index.
+  store i32 20, i32* %A
+  
+  call void @test5a(i32* %A)
+  ret void
+; CHECK: @test5(
+; CHECK-NEXT: alloca
+; CHECK-NEXT: store i32 20
+; CHECK-NEXT: call void @test5a
+}
diff --git a/final/test/Transforms/DeadStoreElimination/const-pointers.ll b/final/test/Transforms/DeadStoreElimination/const-pointers.ll
new file mode 100644
index 00000000000..7d57804631d
--- /dev/null
+++ b/final/test/Transforms/DeadStoreElimination/const-pointers.ll
@@ -0,0 +1,39 @@
+; RUN: opt %s -basicaa -dse -S | FileCheck %s
+
+%t = type { i32 }
+
+@g = global i32 42
+
+define void @test1(%t* noalias %pp) {
+  %p = getelementptr inbounds %t* %pp, i32 0, i32 0
+
+  store i32 1, i32* %p; <-- This is dead
+  %x = load i32* inttoptr (i32 12345 to i32*)
+  store i32 %x, i32* %p
+  ret void
+; CHECK: define void @test1
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+define void @test3() {
+  store i32 1, i32* @g; <-- This is dead.
+  store i32 42, i32* @g
+  ret void
+; CHECK: define void @test3
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+define void @test4(i32* %p) {
+  store i32 1, i32* %p
+  %x = load i32* @g; <-- %p and @g could alias
+  store i32 %x, i32* %p
+  ret void
+; CHECK: define void @test4
+; CHECK: store
+; CHECK: store
+; CHECK: ret void
+}
diff --git a/final/test/Transforms/DeadStoreElimination/crash.ll b/final/test/Transforms/DeadStoreElimination/crash.ll
new file mode 100644
index 00000000000..bb279cdb97f
--- /dev/null
+++ b/final/test/Transforms/DeadStoreElimination/crash.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -basicaa -dse -S
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+@g80 = external global i8                         ; <i8*> [#uses=3]
+
+declare signext i8 @foo(i8 signext, i8 signext) nounwind readnone ssp
+
+declare i32 @func68(i32) nounwind readonly ssp
+
+; PR4815
+define void @test1(i32 %int32p54) noreturn nounwind ssp {
+entry:
+  br label %bb
+
+bb:                                               ; preds = %bb, %entry
+  %storemerge = phi i8 [ %2, %bb ], [ 1, %entry ] ; <i8> [#uses=1]
+  store i8 %storemerge, i8* @g80
+  %0 = tail call i32 @func68(i32 1) nounwind ssp  ; <i32> [#uses=1]
+  %1 = trunc i32 %0 to i8                         ; <i8> [#uses=1]
+  store i8 %1, i8* @g80, align 1
+  store i8 undef, i8* @g80, align 1
+  %2 = tail call signext i8 @foo(i8 signext undef, i8 signext 1) nounwind ; <i8> [#uses=1]
+  br label %bb
+}
+
+define fastcc i32 @test2() nounwind ssp {
+bb14:                                             ; preds = %bb4
+  %0 = bitcast i8* undef to i8**                  ; <i8**> [#uses=1]
+  %1 = getelementptr inbounds i8** %0, i64 undef  ; <i8**> [#uses=1]
+  %2 = bitcast i8** %1 to i16*                    ; <i16*> [#uses=2]
+  %3 = getelementptr inbounds i16* %2, i64 undef  ; <i16*> [#uses=1]
+  %4 = bitcast i16* %3 to i8*                     ; <i8*> [#uses=1]
+  %5 = getelementptr inbounds i8* %4, i64 undef   ; <i8*> [#uses=1]
+  %6 = getelementptr inbounds i16* %2, i64 undef  ; <i16*> [#uses=1]
+  store i16 undef, i16* %6, align 2
+  %7 = getelementptr inbounds i8* %5, i64 undef   ; <i8*> [#uses=1]
+  call void @llvm.memcpy.i64(i8* %7, i8* undef, i64 undef, i32 1) nounwind
+  unreachable
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+
+
+; rdar://7635088
+define i32 @test3() {
+entry:
+  ret i32 0
+  
+dead:
+  %P2 = getelementptr i32 *%P2, i32 52
+  %Q2 = getelementptr i32 *%Q2, i32 52
+  store i32 4, i32* %P2
+  store i32 4, i32* %Q2
+  br label %dead
+}
+
+
+; PR3141
+%struct.ada__tags__dispatch_table = type { [1 x i32] }
+%struct.f393a00_1__object = type { %struct.ada__tags__dispatch_table*, i8 }
+%struct.f393a00_2__windmill = type { %struct.f393a00_1__object, i16 }
+
+define void @test4(%struct.f393a00_2__windmill* %a, %struct.f393a00_2__windmill* %b) {
+entry:
+	%t = alloca %struct.f393a00_2__windmill		; <%struct.f393a00_2__windmill*> [#uses=1]
+	%0 = getelementptr %struct.f393a00_2__windmill* %t, i32 0, i32 0, i32 0		; <%struct.ada__tags__dispatch_table**> [#uses=1]
+	%1 = load %struct.ada__tags__dispatch_table** null, align 4		; <%struct.ada__tags__dispatch_table*> [#uses=1]
+	%2 = load %struct.ada__tags__dispatch_table** %0, align 8		; <%struct.ada__tags__dispatch_table*> [#uses=1]
+	store %struct.ada__tags__dispatch_table* %2, %struct.ada__tags__dispatch_table** null, align 4
+	store %struct.ada__tags__dispatch_table* %1, %struct.ada__tags__dispatch_table** null, align 4
+	ret void
+}
diff --git a/final/test/Transforms/DeadStoreElimination/dg.exp b/final/test/Transforms/DeadStoreElimination/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/DeadStoreElimination/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/DeadStoreElimination/free.ll b/final/test/Transforms/DeadStoreElimination/free.ll
new file mode 100644
index 00000000000..3c980ccac6b
--- /dev/null
+++ b/final/test/Transforms/DeadStoreElimination/free.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+; CHECK: @test
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: @free
+; CHECK-NEXT: ret void
+define void @test(i32* %Q, i32* %P) {
+        %DEAD = load i32* %Q            ; <i32> [#uses=1]
+        store i32 %DEAD, i32* %P
+        free i32* %P
+        ret void
+}
+
+; CHECK: @test2
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: @free
+; CHECK-NEXT: ret void
+define void @test2({i32, i32}* %P) {
+	%Q = getelementptr {i32, i32} *%P, i32 0, i32 1
+	store i32 4, i32* %Q
+	free {i32,i32}* %P
+	ret void
+}
+
+; CHECK: @test4
+; CHECK-NOT: store
+; CHECK: ret void
+define void @test4() {
+  %m = call i8* @malloc(i64 24)
+  store i8 0, i8* %m
+  %m1 = getelementptr i8* %m, i64 1
+  store i8 1, i8* %m1
+  call void @free(i8* %m)
+  ret void
+}
+
+declare void @free(i8*)
+declare i8* @malloc(i64)
diff --git a/final/test/Transforms/DeadStoreElimination/lifetime.ll b/final/test/Transforms/DeadStoreElimination/lifetime.ll
new file mode 100644
index 00000000000..2b5cc5aedb7
--- /dev/null
+++ b/final/test/Transforms/DeadStoreElimination/lifetime.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -basicaa -dse < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
+declare void @llvm.memset.i8(i8*, i8, i8, i32)
+
+define void @test1() {
+; CHECK: @test1
+  %A = alloca i8
+
+  store i8 0, i8* %A  ;; Written to by memset
+  call void @llvm.lifetime.end(i64 1, i8* %A)
+; CHECK: lifetime.end
+
+  call void @llvm.memset.i8(i8* %A, i8 0, i8 -1, i32 0)
+; CHECK-NOT: memset
+
+  ret void
+; CHECK: ret void
+}
+
+define void @test2(i32* %P) {
+; CHECK: test2
+  %Q = getelementptr i32* %P, i32 1
+  %R = bitcast i32* %Q to i8*
+  call void @llvm.lifetime.start(i64 4, i8* %R)
+; CHECK: lifetime.start
+  store i32 0, i32* %Q  ;; This store is dead.
+; CHECK-NOT: store
+  call void @llvm.lifetime.end(i64 4, i8* %R)
+; CHECK: lifetime.end
+  ret void
+}
+
+
diff --git a/final/test/Transforms/DeadStoreElimination/memintrinsics.ll b/final/test/Transforms/DeadStoreElimination/memintrinsics.ll
new file mode 100644
index 00000000000..e31e9fa3ca6
--- /dev/null
+++ b/final/test/Transforms/DeadStoreElimination/memintrinsics.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -dse < %s | FileCheck %s
+
+declare void @llvm.memcpy.i8(i8*, i8*, i8, i32)
+declare void @llvm.memmove.i8(i8*, i8*, i8, i32)
+declare void @llvm.memset.i8(i8*, i8, i8, i32)
+
+define void @test1() {
+; CHECK: @test1
+  %A = alloca i8
+  %B = alloca i8
+
+  store i8 0, i8* %A  ;; Written to by memcpy
+; CHECK-NOT: store
+
+  call void @llvm.memcpy.i8(i8* %A, i8* %B, i8 -1, i32 0)
+
+  ret void
+; CHECK: ret void
+}
+
+define void @test2() {
+; CHECK: @test2
+  %A = alloca i8
+  %B = alloca i8
+
+  store i8 0, i8* %A  ;; Written to by memmove
+; CHECK-NOT: store
+
+  call void @llvm.memmove.i8(i8* %A, i8* %B, i8 -1, i32 0)
+
+  ret void
+; CHECK: ret void
+}
+
+define void @test3() {
+; CHECK: @test3
+  %A = alloca i8
+  %B = alloca i8
+
+  store i8 0, i8* %A  ;; Written to by memset
+; CHECK-NOT: store
+
+  call void @llvm.memset.i8(i8* %A, i8 0, i8 -1, i32 0)
+
+  ret void
+; CHECK: ret void
+}
diff --git a/final/test/Transforms/DeadStoreElimination/no-targetdata.ll b/final/test/Transforms/DeadStoreElimination/no-targetdata.ll
new file mode 100644
index 00000000000..6c7f940316a
--- /dev/null
+++ b/final/test/Transforms/DeadStoreElimination/no-targetdata.ll
@@ -0,0 +1,15 @@
+; RUN: opt %s -basicaa -dse -S | FileCheck %s
+
+declare void @test1f()
+
+define void @test1(i32* noalias %p) {
+       store i32 1, i32* %p
+       call void @test1f()
+       store i32 2, i32 *%p
+       ret void
+; CHECK: define void @test1
+; CHECK-NOT: store
+; CHECK-NEXT: call void
+; CHECK-NEXT: store i32 2
+; CHECK-NEXT: ret void
+}
diff --git a/final/test/Transforms/DeadStoreElimination/simple.ll b/final/test/Transforms/DeadStoreElimination/simple.ll
new file mode 100644
index 00000000000..a61eac9729e
--- /dev/null
+++ b/final/test/Transforms/DeadStoreElimination/simple.ll
@@ -0,0 +1,238 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memset.i64(i8*, i8, i64, i32)
+declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
+declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
+
+define void @test1(i32* %Q, i32* %P) {
+        %DEAD = load i32* %Q
+        store i32 %DEAD, i32* %P
+        store i32 0, i32* %P
+        ret void
+; CHECK: @test1
+; CHECK-NEXT: store i32 0, i32* %P
+; CHECK-NEXT: ret void
+}
+
+; PR8576 - Should delete store of 10 even though p/q are may aliases.
+define void @test2(i32 *%p, i32 *%q) {
+  store i32 10, i32* %p, align 4
+  store i32 20, i32* %q, align 4
+  store i32 30, i32* %p, align 4
+  ret void
+; CHECK: @test2
+; CHECK-NEXT: store i32 20
+}
+
+
+; PR8677
+@g = global i32 1
+
+define i32 @test3(i32* %g_addr) nounwind {
+; CHECK: @test3
+; CHECK: load i32* %g_addr
+  %g_value = load i32* %g_addr, align 4
+  store i32 -1, i32* @g, align 4
+  store i32 %g_value, i32* %g_addr, align 4
+  %tmp3 = load i32* @g, align 4
+  ret i32 %tmp3
+}
+
+
+define void @test4(i32* %Q) {
+        %a = load i32* %Q
+        volatile store i32 %a, i32* %Q
+        ret void
+; CHECK: @test4
+; CHECK-NEXT: load i32
+; CHECK-NEXT: volatile store
+; CHECK-NEXT: ret void
+}
+
+define void @test5(i32* %Q) {
+        %a = volatile load i32* %Q
+        store i32 %a, i32* %Q
+        ret void
+; CHECK: @test5
+; CHECK-NEXT: volatile load
+; CHECK-NEXT: ret void
+}
+
+; Should delete store of 10 even though memset is a may-store to P (P and Q may
+; alias).
+define void @test6(i32 *%p, i8 *%q) {
+  store i32 10, i32* %p, align 4       ;; dead.
+  call void @llvm.memset.i64(i8* %q, i8 42, i64 900, i32 1)
+  store i32 30, i32* %p, align 4
+  ret void
+; CHECK: @test6
+; CHECK-NEXT: call void @llvm.memset
+}
+
+; Should delete store of 10 even though memcpy is a may-store to P (P and Q may
+; alias).
+define void @test7(i32 *%p, i8 *%q, i8* noalias %r) {
+  store i32 10, i32* %p, align 4       ;; dead.
+  call void @llvm.memcpy.i64(i8* %q, i8* %r, i64 900, i32 1)
+  store i32 30, i32* %p, align 4
+  ret void
+; CHECK: @test7
+; CHECK-NEXT: call void @llvm.memcpy
+}
+
+; Do not delete stores that are only partially killed.
+define i32 @test8() {
+        %V = alloca i32
+        store i32 1234567, i32* %V
+        %V2 = bitcast i32* %V to i8*
+        store i8 0, i8* %V2
+        %X = load i32* %V
+        ret i32 %X
+        
+; CHECK: @test8
+; CHECK: store i32 1234567
+}
+
+
+; Test for byval handling.
+%struct.x = type { i32, i32, i32, i32 }
+define void @test9(%struct.x* byval  %a) nounwind  {
+	%tmp2 = getelementptr %struct.x* %a, i32 0, i32 0
+	store i32 1, i32* %tmp2, align 4
+	ret void
+; CHECK: @test9
+; CHECK-NEXT: ret void
+}
+
+; va_arg has fuzzy dependence, the store shouldn't be zapped.
+define double @test10(i8* %X) {
+        %X_addr = alloca i8*
+        store i8* %X, i8** %X_addr
+        %tmp.0 = va_arg i8** %X_addr, double
+        ret double %tmp.0
+; CHECK: @test10
+; CHECK: store
+}
+
+
+; DSE should delete the dead trampoline.
+declare void @test11f()
+define void @test11() {
+; CHECK: @test11
+	%storage = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
+; CHECK-NOT: alloca
+	%cast = getelementptr [10 x i8]* %storage, i32 0, i32 0		; <i8*> [#uses=1]
+	%tramp = call i8* @llvm.init.trampoline( i8* %cast, i8* bitcast (void ()* @test11f to i8*), i8* null )		; <i8*> [#uses=1]
+; CHECK-NOT: trampoline
+	ret void
+; CHECK: ret void
+}
+
+
+; PR2599 - load -> store to same address.
+define void @test12({ i32, i32 }* %x) nounwind  {
+	%tmp4 = getelementptr { i32, i32 }* %x, i32 0, i32 0
+	%tmp5 = load i32* %tmp4, align 4
+	%tmp7 = getelementptr { i32, i32 }* %x, i32 0, i32 1
+	%tmp8 = load i32* %tmp7, align 4
+	%tmp17 = sub i32 0, %tmp8
+	store i32 %tmp5, i32* %tmp4, align 4
+	store i32 %tmp17, i32* %tmp7, align 4
+	ret void
+; CHECK: @test12
+; CHECK-NOT: tmp5
+; CHECK: ret void
+}
+
+
+; %P doesn't escape, the DEAD instructions should be removed.
+declare void @test13f()
+define i32* @test13() {
+        %p = tail call i8* @malloc(i32 4)
+        %P = bitcast i8* %p to i32*
+        %DEAD = load i32* %P
+        %DEAD2 = add i32 %DEAD, 1
+        store i32 %DEAD2, i32* %P
+        call void @test13f( )
+        store i32 0, i32* %P
+        ret i32* %P
+; CHECK: @test13()
+; CHECK-NEXT: malloc
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: call void
+}
+
+declare noalias i8* @malloc(i32)
+
+
+
+define void @test14(i32* %Q) {
+        %P = alloca i32
+        %DEAD = load i32* %Q
+        store i32 %DEAD, i32* %P
+        ret void
+
+; CHECK: @test14
+; CHECK-NEXT: ret void
+}
+
+
+; PR8701
+
+;; Fully dead overwrite of memcpy.
+define void @test15(i8* %P, i8* %Q) nounwind ssp {
+  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  ret void
+; CHECK: @test15
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: ret
+}
+
+;; Full overwrite of smaller memcpy.
+define void @test16(i8* %P, i8* %Q) nounwind ssp {
+  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 8, i32 1)
+  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  ret void
+; CHECK: @test16
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: ret
+}
+
+;; Overwrite of memset by memcpy.
+define void @test17(i8* %P, i8* noalias %Q) nounwind ssp {
+  tail call void @llvm.memset.i64(i8* %P, i8 42, i64 8, i32 1)
+  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  ret void
+; CHECK: @test17
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: ret
+}
+
+; Should not delete the volatile memset.
+define void @test17v(i8* %P, i8* %Q) nounwind ssp {
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 true)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  ret void
+; CHECK: @test17v
+; CHECK-NEXT: call void @llvm.memset
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: ret
+}
+
+; PR8728
+; Do not delete instruction where possible situation is:
+; A = B
+; A = A
+define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
+  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  tail call void @llvm.memcpy.i64(i8* %P, i8* %R, i64 12, i32 1)
+  ret void
+; CHECK: @test18
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: ret
+}
diff --git a/final/test/Transforms/EarlyCSE/basic.ll b/final/test/Transforms/EarlyCSE/basic.ll
new file mode 100644
index 00000000000..e3c75f97dcf
--- /dev/null
+++ b/final/test/Transforms/EarlyCSE/basic.ll
@@ -0,0 +1,121 @@
+; RUN: opt < %s -S -early-cse | FileCheck %s
+
+
+; CHECK: @test1
+define void @test1(i8 %V, i32 *%P) {
+  %A = bitcast i64 42 to double  ;; dead
+  %B = add i32 4, 19             ;; constant folds
+  store i32 %B, i32* %P
+  ; CHECK-NEXT: store i32 23, i32* %P
+  
+  %C = zext i8 %V to i32
+  %D = zext i8 %V to i32  ;; CSE
+  volatile store i32 %C, i32* %P
+  volatile store i32 %D, i32* %P
+  ; CHECK-NEXT: %C = zext i8 %V to i32
+  ; CHECK-NEXT: volatile store i32 %C
+  ; CHECK-NEXT: volatile store i32 %C
+  
+  %E = add i32 %C, %C
+  %F = add i32 %C, %C
+  volatile store i32 %E, i32* %P
+  volatile store i32 %F, i32* %P
+  ; CHECK-NEXT: %E = add i32 %C, %C
+  ; CHECK-NEXT: volatile store i32 %E
+  ; CHECK-NEXT: volatile store i32 %E
+
+  %G = add nuw i32 %C, %C         ;; not a CSE with E
+  volatile store i32 %G, i32* %P
+  ; CHECK-NEXT: %G = add nuw i32 %C, %C
+  ; CHECK-NEXT: volatile store i32 %G
+  ret void
+}
+
+
+;; Simple load value numbering.
+; CHECK: @test2
+define i32 @test2(i32 *%P) {
+  %V1 = load i32* %P
+  %V2 = load i32* %P
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK: ret i32 0
+}
+
+;; Cross block load value numbering.
+; CHECK: @test3
+define i32 @test3(i32 *%P, i1 %Cond) {
+  %V1 = load i32* %P
+  br i1 %Cond, label %T, label %F
+T:
+  store i32 4, i32* %P
+  ret i32 42
+F:
+  %V2 = load i32* %P
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK: F:
+  ; CHECK: ret i32 0
+}
+
+;; Cross block load value numbering stops when stores happen.
+; CHECK: @test4
+define i32 @test4(i32 *%P, i1 %Cond) {
+  %V1 = load i32* %P
+  br i1 %Cond, label %T, label %F
+T:
+  ret i32 42
+F:
+  ; Clobbers V1
+  store i32 42, i32* %P
+  
+  %V2 = load i32* %P
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK: F:
+  ; CHECK: ret i32 %Diff
+}
+
+declare i32 @func(i32 *%P) readonly
+
+;; Simple call CSE'ing.
+; CHECK: @test5
+define i32 @test5(i32 *%P) {
+  %V1 = call i32 @func(i32* %P)
+  %V2 = call i32 @func(i32* %P)
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK: ret i32 0
+}
+
+;; Trivial Store->load forwarding
+; CHECK: @test6
+define i32 @test6(i32 *%P) {
+  store i32 42, i32* %P
+  %V1 = load i32* %P
+  ret i32 %V1
+  ; CHECK: ret i32 42
+}
+
+;; Trivial dead store elimination.
+; CHECK: @test7
+define void @test7(i32 *%P) {
+  store i32 42, i32* %P
+  store i32 45, i32* %P
+  ret void
+  ; CHECK-NEXT: store i32 45
+  ; CHECK-NEXT: ret void
+}
+
+;; Readnone functions aren't invalidated by stores.
+; CHECK: @test8
+define i32 @test8(i32 *%P) {
+  %V1 = call i32 @func(i32* %P) readnone
+  store i32 4, i32* %P
+  %V2 = call i32 @func(i32* %P) readnone
+  %Diff = sub i32 %V1, %V2
+  ret i32 %Diff
+  ; CHECK: ret i32 0
+}
+
+
diff --git a/final/test/Transforms/EarlyCSE/dg.exp b/final/test/Transforms/EarlyCSE/dg.exp
new file mode 100644
index 00000000000..de42dad163f
--- /dev/null
+++ b/final/test/Transforms/EarlyCSE/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/final/test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll b/final/test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll
new file mode 100644
index 00000000000..b0aecfa56f7
--- /dev/null
+++ b/final/test/Transforms/FunctionAttrs/2008-09-03-Mutual.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -functionattrs -S | grep readnone
+
+define i32 @a() {
+	%tmp = call i32 @b( )		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define i32 @b() {
+	%tmp = call i32 @a( )		; <i32> [#uses=1]
+	ret i32 %tmp
+}
diff --git a/final/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll b/final/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
new file mode 100644
index 00000000000..946453f586e
--- /dev/null
+++ b/final/test/Transforms/FunctionAttrs/2008-09-03-ReadNone.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -basicaa -functionattrs -S | grep readnone | count 4
+@x = global i32 0
+
+declare i32 @e() readnone
+
+define i32 @f() {
+	%tmp = call i32 @e( )		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define i32 @g() readonly {
+	ret i32 0
+}
+
+define i32 @h() readnone {
+	%tmp = load i32* @x		; <i32> [#uses=1]
+	ret i32 %tmp
+}
diff --git a/final/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll b/final/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
new file mode 100644
index 00000000000..22eca132041
--- /dev/null
+++ b/final/test/Transforms/FunctionAttrs/2008-09-03-ReadOnly.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -basicaa -functionattrs -S | grep readonly | count 2
+
+define i32 @f() {
+entry:
+	%tmp = call i32 @e( )		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+declare i32 @e() readonly
diff --git a/final/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll b/final/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
new file mode 100644
index 00000000000..85df09ebd7f
--- /dev/null
+++ b/final/test/Transforms/FunctionAttrs/2008-09-13-VolatileRead.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -functionattrs -S | not grep read
+; PR2792
+
+@g = global i32 0		; <i32*> [#uses=1]
+
+define i32 @f() {
+	%t = volatile load i32* @g		; <i32> [#uses=1]
+	ret i32 %t
+}
diff --git a/final/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll b/final/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
new file mode 100644
index 00000000000..faac1184b60
--- /dev/null
+++ b/final/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
@@ -0,0 +1,64 @@
+; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
+
+%struct.X = type { i32*, i32* }
+
+declare i32 @g(i32*) readnone
+
+define i32 @f() {
+; CHECK: @f() readnone
+	%x = alloca i32		; <i32*> [#uses=2]
+	store i32 0, i32* %x
+	%y = call i32 @g(i32* %x)		; <i32> [#uses=1]
+	ret i32 %y
+}
+
+define i32 @foo() nounwind {
+; CHECK: @foo() nounwind readonly
+entry:
+  %y = alloca %struct.X                           ; <%struct.X*> [#uses=2]
+  %x = alloca %struct.X                           ; <%struct.X*> [#uses=2]
+  %j = alloca i32                                 ; <i32*> [#uses=2]
+  %i = alloca i32                                 ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 0, i32* %i, align 4
+  store i32 1, i32* %j, align 4
+  %0 = getelementptr inbounds %struct.X* %y, i32 0, i32 0 ; <i32**> [#uses=1]
+  store i32* %i, i32** %0, align 8
+  %1 = getelementptr inbounds %struct.X* %x, i32 0, i32 1 ; <i32**> [#uses=1]
+  store i32* %j, i32** %1, align 8
+  %x1 = bitcast %struct.X* %x to i8*              ; <i8*> [#uses=2]
+  %y2 = bitcast %struct.X* %y to i8*              ; <i8*> [#uses=1]
+  call void @llvm.memcpy.i64(i8* %x1, i8* %y2, i64 8, i32 1)
+  %2 = bitcast i8* %x1 to i32**                   ; <i32**> [#uses=1]
+  %3 = load i32** %2, align 8                     ; <i32*> [#uses=1]
+  %4 = load i32* %3, align 4                      ; <i32> [#uses=1]
+  br label %return
+
+return:                                           ; preds = %entry
+  ret i32 %4
+}
+
+define i32 @t(i32 %a, i32 %b, i32 %c) nounwind {
+; CHECK: @t(i32 %a, i32 %b, i32 %c) nounwind readnone
+entry:
+  %a.addr = alloca i32                            ; <i32*> [#uses=3]
+  %c.addr = alloca i32                            ; <i32*> [#uses=2]
+  store i32 %a, i32* %a.addr
+  store i32 %c, i32* %c.addr
+  %tmp = load i32* %a.addr                        ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %tmp, 0                   ; <i1> [#uses=1]
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %p.0 = phi i32* [ %a.addr, %if.then ], [ %c.addr, %if.else ] ; <i32*> [#uses=1]
+  %tmp2 = load i32* %p.0                          ; <i32> [#uses=1]
+  ret i32 %tmp2
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
diff --git a/final/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll b/final/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll
new file mode 100644
index 00000000000..9655da45c64
--- /dev/null
+++ b/final/test/Transforms/FunctionAttrs/2008-12-29-Constant.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -basicaa -functionattrs -S | grep readnone
+
+@s = external constant i8		; <i8*> [#uses=1]
+
+define i8 @f() {
+	%tmp = load i8* @s		; <i8> [#uses=1]
+	ret i8 %tmp
+}
diff --git a/final/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll b/final/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll
new file mode 100644
index 00000000000..53857f61ce5
--- /dev/null
+++ b/final/test/Transforms/FunctionAttrs/2008-12-31-NoCapture.ll
@@ -0,0 +1,101 @@
+; RUN: opt < %s -functionattrs -S | not grep {nocapture *%%q}
+; RUN: opt < %s -functionattrs -S | grep {nocapture *%%p} | count 6
+@g = global i32* null		; <i32**> [#uses=1]
+
+define i32* @c1(i32* %q) {
+	ret i32* %q
+}
+
+define void @c2(i32* %q) {
+	store i32* %q, i32** @g
+	ret void
+}
+
+define void @c3(i32* %q) {
+	call void @c2(i32* %q)
+	ret void
+}
+
+define i1 @c4(i32* %q, i32 %bitno) {
+	%tmp = ptrtoint i32* %q to i32
+	%tmp2 = lshr i32 %tmp, %bitno
+	%bit = trunc i32 %tmp2 to i1
+	br i1 %bit, label %l1, label %l0
+l0:
+	ret i1 0 ; escaping value not caught by def-use chaining.
+l1:
+	ret i1 1 ; escaping value not caught by def-use chaining.
+}
+
+@lookup_table = global [2 x i1] [ i1 0, i1 1 ]
+
+define i1 @c5(i32* %q, i32 %bitno) {
+	%tmp = ptrtoint i32* %q to i32
+	%tmp2 = lshr i32 %tmp, %bitno
+	%bit = and i32 %tmp2, 1
+        ; subtle escape mechanism follows
+	%lookup = getelementptr [2 x i1]* @lookup_table, i32 0, i32 %bit
+	%val = load i1* %lookup
+	ret i1 %val
+}
+
+declare void @throw_if_bit_set(i8*, i8) readonly
+define i1 @c6(i8* %q, i8 %bit) {
+	invoke void @throw_if_bit_set(i8* %q, i8 %bit)
+		to label %ret0 unwind label %ret1
+ret0:
+	ret i1 0
+ret1:
+	ret i1 1
+}
+
+define i1* @lookup_bit(i32* %q, i32 %bitno) readnone nounwind {
+	%tmp = ptrtoint i32* %q to i32
+	%tmp2 = lshr i32 %tmp, %bitno
+	%bit = and i32 %tmp2, 1
+	%lookup = getelementptr [2 x i1]* @lookup_table, i32 0, i32 %bit
+	ret i1* %lookup
+}
+
+define i1 @c7(i32* %q, i32 %bitno) {
+	%ptr = call i1* @lookup_bit(i32* %q, i32 %bitno)
+	%val = load i1* %ptr
+	ret i1 %val
+}
+
+
+define i32 @nc1(i32* %q, i32* %p, i1 %b) {
+e:
+	br label %l
+l:
+	%x = phi i32* [ %p, %e ]
+	%y = phi i32* [ %q, %e ]
+	%tmp = bitcast i32* %x to i32*		; <i32*> [#uses=2]
+	%tmp2 = select i1 %b, i32* %tmp, i32* %y
+	%val = load i32* %tmp2		; <i32> [#uses=1]
+	store i32 0, i32* %tmp
+	store i32* %y, i32** @g
+	ret i32 %val
+}
+
+define void @nc2(i32* %p, i32* %q) {
+	%1 = call i32 @nc1(i32* %q, i32* %p, i1 0)		; <i32> [#uses=0]
+	ret void
+}
+
+define void @nc3(void ()* %p) {
+	call void %p()
+	ret void
+}
+
+declare void @external(i8*) readonly nounwind
+define void @nc4(i8* %p) {
+	call void @external(i8* %p)
+	ret void
+}
+
+define void @nc5(void (i8*)* %f, i8* %p) {
+	call void %f(i8* %p) readonly nounwind
+	call void %f(i8* nocapture %p)
+	ret void
+}
diff --git a/final/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll b/final/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
new file mode 100644
index 00000000000..7ef5f06f061
--- /dev/null
+++ b/final/test/Transforms/FunctionAttrs/2009-01-02-LocalStores.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -functionattrs -S | not grep {nocapture *%%q}
+; RUN: opt < %s -functionattrs -S | grep {nocapture *%%p}
+
+define i32* @a(i32** %p) {
+	%tmp = load i32** %p
+	ret i32* %tmp
+}
+
+define i32* @b(i32 *%q) {
+	%mem = alloca i32*
+	store i32* %q, i32** %mem
+	%tmp = call i32* @a(i32** %mem)
+	ret i32* %tmp
+}
diff --git a/final/test/Transforms/FunctionAttrs/2009-05-06-Malloc.ll b/final/test/Transforms/FunctionAttrs/2009-05-06-Malloc.ll
new file mode 100644
index 00000000000..488e6a9ec2c
--- /dev/null
+++ b/final/test/Transforms/FunctionAttrs/2009-05-06-Malloc.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -functionattrs -S | not grep read
+; PR3754
+
+define i8* @m(i32 %size) {
+	%tmp = malloc i8, i32 %size		; <i8*> [#uses=1]
+	ret i8* %tmp
+}
diff --git a/final/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll b/final/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
new file mode 100644
index 00000000000..f21fabc493c
--- /dev/null
+++ b/final/test/Transforms/FunctionAttrs/2010-10-30-volatile.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -functionattrs -S | FileCheck %s
+; PR8279
+
+@g = constant i32 1
+
+define void @foo() {
+; CHECK: void @foo() {
+  %tmp = volatile load i32* @g
+  ret void
+}
diff --git a/final/test/Transforms/FunctionAttrs/dg.exp b/final/test/Transforms/FunctionAttrs/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/FunctionAttrs/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/GVN/2007-07-25-DominatedLoop.ll b/final/test/Transforms/GVN/2007-07-25-DominatedLoop.ll
new file mode 100644
index 00000000000..ad580ce1677
--- /dev/null
+++ b/final/test/Transforms/GVN/2007-07-25-DominatedLoop.ll
@@ -0,0 +1,86 @@
+; RUN: opt < %s -gvn | llvm-dis
+
+	%struct.PerlInterpreter = type { i8 }
+@PL_sv_count = external global i32		; <i32*> [#uses=2]
+
+define void @perl_destruct(%struct.PerlInterpreter* %sv_interp) {
+entry:
+	br i1 false, label %cond_next25, label %cond_true16
+
+cond_true16:		; preds = %entry
+	ret void
+
+cond_next25:		; preds = %entry
+	br i1 false, label %cond_next33, label %cond_true32
+
+cond_true32:		; preds = %cond_next25
+	ret void
+
+cond_next33:		; preds = %cond_next25
+	br i1 false, label %cond_next61, label %cond_true.i46
+
+cond_true.i46:		; preds = %cond_next33
+	ret void
+
+cond_next61:		; preds = %cond_next33
+	br i1 false, label %cond_next69, label %cond_true66
+
+cond_true66:		; preds = %cond_next61
+	ret void
+
+cond_next69:		; preds = %cond_next61
+	br i1 false, label %Perl_safefree.exit52, label %cond_true.i50
+
+cond_true.i50:		; preds = %cond_next69
+	ret void
+
+Perl_safefree.exit52:		; preds = %cond_next69
+	br i1 false, label %cond_next80, label %cond_true77
+
+cond_true77:		; preds = %Perl_safefree.exit52
+	ret void
+
+cond_next80:		; preds = %Perl_safefree.exit52
+	br i1 false, label %Perl_safefree.exit56, label %cond_true.i54
+
+cond_true.i54:		; preds = %cond_next80
+	ret void
+
+Perl_safefree.exit56:		; preds = %cond_next80
+	br i1 false, label %Perl_safefree.exit60, label %cond_true.i58
+
+cond_true.i58:		; preds = %Perl_safefree.exit56
+	ret void
+
+Perl_safefree.exit60:		; preds = %Perl_safefree.exit56
+	br i1 false, label %Perl_safefree.exit64, label %cond_true.i62
+
+cond_true.i62:		; preds = %Perl_safefree.exit60
+	ret void
+
+Perl_safefree.exit64:		; preds = %Perl_safefree.exit60
+	br i1 false, label %Perl_safefree.exit68, label %cond_true.i66
+
+cond_true.i66:		; preds = %Perl_safefree.exit64
+	ret void
+
+Perl_safefree.exit68:		; preds = %Perl_safefree.exit64
+	br i1 false, label %cond_next150, label %cond_true23.i
+
+cond_true23.i:		; preds = %Perl_safefree.exit68
+	ret void
+
+cond_next150:		; preds = %Perl_safefree.exit68
+	%tmp16092 = load i32* @PL_sv_count, align 4		; <i32> [#uses=0]
+	br label %cond_next165
+
+bb157:		; preds = %cond_next165
+	%tmp158 = load i32* @PL_sv_count, align 4		; <i32> [#uses=0]
+	br label %cond_next165
+
+cond_next165:		; preds = %bb157, %cond_next150
+	br i1 false, label %bb171, label %bb157
+
+bb171:		; preds = %cond_next165
+	ret void
+}
diff --git a/final/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll b/final/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
new file mode 100644
index 00000000000..9983374b154
--- /dev/null
+++ b/final/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -basicaa -gvn -S | not grep {tmp10 =}
+
+	%struct.INT2 = type { i32, i32 }
+@blkshifts = external global %struct.INT2*		; <%struct.INT2**> [#uses=2]
+
+define i32 @xcompact() {
+entry:
+	store %struct.INT2* null, %struct.INT2** @blkshifts, align 4
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%tmp10 = load %struct.INT2** @blkshifts, align 4		; <%struct.INT2*> [#uses=0]
+	br label %bb
+}
diff --git a/final/test/Transforms/GVN/2007-07-25-Loop.ll b/final/test/Transforms/GVN/2007-07-25-Loop.ll
new file mode 100644
index 00000000000..6a9f58e02f6
--- /dev/null
+++ b/final/test/Transforms/GVN/2007-07-25-Loop.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -gvn | llvm-dis
+
+	%struct.s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }
+
+define void @print_arch(i8* %arch_file, i32 %route_type, i64 %det_routing_arch.0.0, i64 %det_routing_arch.0.1, i64 %det_routing_arch.0.2, i64 %det_routing_arch.0.3, i64 %det_routing_arch.0.4, %struct.s_segment_inf* %segment_inf, i64 %timing_inf.0.0, i64 %timing_inf.0.1, i64 %timing_inf.0.2, i64 %timing_inf.0.3, i64 %timing_inf.0.4, i32 %timing_inf.1) {
+entry:
+	br i1 false, label %bb278, label %bb344
+
+bb278:		; preds = %bb278, %entry
+	br i1 false, label %bb278, label %bb344
+
+bb344:		; preds = %bb278, %entry
+	%tmp38758 = load i16* null, align 2		; <i16> [#uses=0]
+	ret void
+}
diff --git a/final/test/Transforms/GVN/2007-07-25-NestedLoop.ll b/final/test/Transforms/GVN/2007-07-25-NestedLoop.ll
new file mode 100644
index 00000000000..c6d7750d627
--- /dev/null
+++ b/final/test/Transforms/GVN/2007-07-25-NestedLoop.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -gvn | llvm-dis
+
+	%struct.TypHeader = type { i32, %struct.TypHeader**, [3 x i8], i8 }
+
+define %struct.TypHeader* @LtRec(%struct.TypHeader* %hdL, %struct.TypHeader* %hdR) {
+entry:
+	br i1 false, label %bb556.preheader, label %bb534.preheader
+
+bb534.preheader:		; preds = %entry
+	ret %struct.TypHeader* null
+
+bb556.preheader:		; preds = %entry
+	%tmp56119 = getelementptr %struct.TypHeader* %hdR, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp56220 = load i32* %tmp56119		; <i32> [#uses=0]
+	br i1 false, label %bb.nph23, label %bb675.preheader
+
+bb.nph23:		; preds = %bb556.preheader
+	ret %struct.TypHeader* null
+
+bb656:		; preds = %bb675.outer, %bb656
+	%tmp678 = load i32* %tmp677		; <i32> [#uses=0]
+	br i1 false, label %bb684, label %bb656
+
+bb684:		; preds = %bb675.outer, %bb656
+	br i1 false, label %bb924.preheader, label %bb675.outer
+
+bb675.outer:		; preds = %bb675.preheader, %bb684
+	%tmp67812 = load i32* %tmp67711		; <i32> [#uses=0]
+	br i1 false, label %bb684, label %bb656
+
+bb675.preheader:		; preds = %bb556.preheader
+	%tmp67711 = getelementptr %struct.TypHeader* %hdR, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp677 = getelementptr %struct.TypHeader* %hdR, i32 0, i32 0		; <i32*> [#uses=1]
+	br label %bb675.outer
+
+bb924.preheader:		; preds = %bb684
+	ret %struct.TypHeader* null
+}
diff --git a/final/test/Transforms/GVN/2007-07-25-SinglePredecessor.ll b/final/test/Transforms/GVN/2007-07-25-SinglePredecessor.ll
new file mode 100644
index 00000000000..ecff657ed38
--- /dev/null
+++ b/final/test/Transforms/GVN/2007-07-25-SinglePredecessor.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -gvn | llvm-dis
+
+	%struct.ggBRDF = type { i32 (...)** }
+	%struct.ggBox3 = type { %struct.ggPoint3, %struct.ggPoint3 }
+	%struct.ggMaterialRecord = type { %struct.ggPoint2, %struct.ggBox3, %struct.ggBox3, %struct.ggSpectrum, %struct.ggSpectrum, %struct.ggSpectrum, %struct.ggBRDF*, i32, i32, i32, i32 }
+	%struct.ggONB3 = type { %struct.ggPoint3, %struct.ggPoint3, %struct.ggPoint3 }
+	%struct.ggPoint2 = type { [2 x double] }
+	%struct.ggPoint3 = type { [3 x double] }
+	%struct.ggSpectrum = type { [8 x float] }
+	%struct.mrViewingHitRecord = type { double, %struct.ggPoint3, %struct.ggONB3, %struct.ggPoint2, double, %struct.ggSpectrum, %struct.ggSpectrum, i32, i32, i32, i32 }
+	%struct.mrXEllipticalCylinder = type { %struct.ggBRDF, float, float, float, float, float, float }
+
+define i32 @_ZNK21mrZEllipticalCylinder10viewingHitERK6ggRay3dddR18mrViewingHitRecordR16ggMaterialRecord(%struct.mrXEllipticalCylinder* %this, %struct.ggBox3* %ray, double %unnamed_arg, double %tmin, double %tmax, %struct.mrViewingHitRecord* %VHR, %struct.ggMaterialRecord* %unnamed_arg2) {
+entry:
+	%tmp80.i = getelementptr %struct.mrViewingHitRecord* %VHR, i32 0, i32 1, i32 0, i32 0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %tmp80.i
+	br i1 false, label %return, label %cond_next.i
+
+cond_next.i:		; preds = %entry
+	br i1 false, label %return, label %cond_true
+
+cond_true:		; preds = %cond_next.i
+	%tmp3.i8 = getelementptr %struct.mrViewingHitRecord* %VHR, i32 0, i32 1, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp46 = load double* %tmp3.i8		; <double> [#uses=0]
+	ret i32 1
+
+return:		; preds = %cond_next.i, %entry
+	ret i32 0
+}
diff --git a/final/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll b/final/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
new file mode 100644
index 00000000000..14cb91b5316
--- /dev/null
+++ b/final/test/Transforms/GVN/2007-07-26-InterlockingLoops.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+
+@last = external global [65 x i32*]
+
+define i32 @NextRootMove(i32 %wtm) {
+entry:
+	%tmp17618 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
+; CHECK: entry:
+; CHECK-NEXT: %tmp17618 = load
+; CHECK-NOT: load
+; CHECK-NOT: phi
+	br label %cond_true116
+
+cond_true116:
+	br i1 false, label %cond_true128, label %cond_true145
+
+cond_true128:
+	%tmp17625 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
+	br i1 false, label %bb98.backedge, label %return.loopexit
+
+bb98.backedge:
+	br label %cond_true116
+
+cond_true145:
+	%tmp17631 = load i32** getelementptr ([65 x i32*]* @last, i32 0, i32 1), align 4
+	br i1 false, label %bb98.backedge, label %return.loopexit
+
+return.loopexit:
+	br label %return
+
+return:
+	ret i32 0
+}
diff --git a/final/test/Transforms/GVN/2007-07-26-NonRedundant.ll b/final/test/Transforms/GVN/2007-07-26-NonRedundant.ll
new file mode 100644
index 00000000000..7579e8aff08
--- /dev/null
+++ b/final/test/Transforms/GVN/2007-07-26-NonRedundant.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -gvn | llvm-dis
+
+@bsLive = external global i32		; <i32*> [#uses=2]
+
+define i32 @bsR(i32 %n) {
+entry:
+	br i1 false, label %cond_next, label %bb19
+
+cond_next:		; preds = %entry
+	store i32 0, i32* @bsLive, align 4
+	br label %bb19
+
+bb19:		; preds = %cond_next, %entry
+	%tmp29 = load i32* @bsLive, align 4		; <i32> [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/Transforms/GVN/2007-07-26-PhiErasure.ll b/final/test/Transforms/GVN/2007-07-26-PhiErasure.ll
new file mode 100644
index 00000000000..d898ab8e2fa
--- /dev/null
+++ b/final/test/Transforms/GVN/2007-07-26-PhiErasure.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -gvn -S | not grep phi
+
+	%struct..0anon = type { i32 }
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.rtx_def = type { i16, i8, i8, [1 x %struct..0anon] }
+@n_spills = external global i32		; <i32*> [#uses=2]
+
+define i32 @reload(%struct.rtx_def* %first, i32 %global, %struct.FILE* %dumpfile) {
+cond_next2835.1:		; preds = %cond_next2861
+	%tmp2922 = load i32* @n_spills, align 4		; <i32> [#uses=0]
+	br label %bb2928
+
+bb2928:		; preds = %cond_next2835.1, %cond_next2943
+	br i1 false, label %cond_next2943, label %cond_true2935
+
+cond_true2935:		; preds = %bb2928
+	br label %cond_next2943
+
+cond_next2943:		; preds = %cond_true2935, %bb2928
+	br i1 false, label %bb2982.preheader, label %bb2928
+
+bb2982.preheader:		; preds = %cond_next2943
+	%tmp298316 = load i32* @n_spills, align 4		; <i32> [#uses=0]
+	ret i32 %tmp298316
+
+}
diff --git a/final/test/Transforms/GVN/2007-07-30-PredIDom.ll b/final/test/Transforms/GVN/2007-07-30-PredIDom.ll
new file mode 100644
index 00000000000..5cb6bb3ecff
--- /dev/null
+++ b/final/test/Transforms/GVN/2007-07-30-PredIDom.ll
@@ -0,0 +1,274 @@
+; RUN: opt < %s -gvn | llvm-dis
+
+	%"struct.Block::$_16" = type { i32 }
+	%struct.Exp = type { %struct.Exp_*, i32, i32, i32, %struct.Exp*, %struct.Exp*, %"struct.Exp::$_10", %"struct.Block::$_16", %"struct.Exp::$_12" }
+	%"struct.Exp::$_10" = type { %struct.Exp* }
+	%"struct.Exp::$_12" = type { %struct.Exp** }
+	%struct.Exp_ = type { i32, i32, i32, i32, %struct.Id* }
+	%struct.Id = type { i8*, i32, i32, i32, %"struct.Id::$_13" }
+	%"struct.Id::$_13" = type { double }
+
+define i8* @_ZN3Exp8toStringEj(%struct.Exp* %this, i32 %nextpc) {
+entry:
+	switch i32 0, label %bb970 [
+		 i32 1, label %bb
+		 i32 2, label %bb39
+		 i32 3, label %bb195
+		 i32 4, label %bb270
+		 i32 5, label %bb418
+		 i32 6, label %bb633
+		 i32 7, label %bb810
+		 i32 8, label %bb882
+		 i32 9, label %bb925
+	]
+
+bb:		; preds = %entry
+	store i8* null, i8** null
+	br label %return
+
+bb39:		; preds = %entry
+	br i1 false, label %cond_true, label %cond_false132
+
+cond_true:		; preds = %bb39
+	br i1 false, label %cond_true73, label %cond_false
+
+cond_true73:		; preds = %cond_true
+	br i1 false, label %cond_true108, label %cond_next
+
+cond_true108:		; preds = %cond_true73
+	br label %cond_next
+
+cond_next:		; preds = %cond_true108, %cond_true73
+	br label %cond_next131
+
+cond_false:		; preds = %cond_true
+	br label %cond_next131
+
+cond_next131:		; preds = %cond_false, %cond_next
+	br label %cond_next141
+
+cond_false132:		; preds = %bb39
+	br label %cond_next141
+
+cond_next141:		; preds = %cond_false132, %cond_next131
+	br i1 false, label %cond_true169, label %cond_false175
+
+cond_true169:		; preds = %cond_next141
+	br label %cond_next181
+
+cond_false175:		; preds = %cond_next141
+	br label %cond_next181
+
+cond_next181:		; preds = %cond_false175, %cond_true169
+	br i1 false, label %cond_true189, label %cond_next191
+
+cond_true189:		; preds = %cond_next181
+	br label %cond_next191
+
+cond_next191:		; preds = %cond_true189, %cond_next181
+	store i8* null, i8** null
+	br label %return
+
+bb195:		; preds = %entry
+	br i1 false, label %cond_true248, label %cond_false250
+
+cond_true248:		; preds = %bb195
+	br label %cond_next252
+
+cond_false250:		; preds = %bb195
+	br label %cond_next252
+
+cond_next252:		; preds = %cond_false250, %cond_true248
+	br i1 false, label %cond_true265, label %cond_next267
+
+cond_true265:		; preds = %cond_next252
+	br label %cond_next267
+
+cond_next267:		; preds = %cond_true265, %cond_next252
+	store i8* null, i8** null
+	br label %return
+
+bb270:		; preds = %entry
+	br i1 false, label %cond_true338, label %cond_false340
+
+cond_true338:		; preds = %bb270
+	br label %cond_next342
+
+cond_false340:		; preds = %bb270
+	br label %cond_next342
+
+cond_next342:		; preds = %cond_false340, %cond_true338
+	br i1 false, label %cond_true362, label %cond_false364
+
+cond_true362:		; preds = %cond_next342
+	br label %cond_next366
+
+cond_false364:		; preds = %cond_next342
+	br label %cond_next366
+
+cond_next366:		; preds = %cond_false364, %cond_true362
+	br i1 false, label %cond_true393, label %cond_next395
+
+cond_true393:		; preds = %cond_next366
+	br label %cond_next395
+
+cond_next395:		; preds = %cond_true393, %cond_next366
+	br i1 false, label %cond_true406, label %cond_next408
+
+cond_true406:		; preds = %cond_next395
+	br label %cond_next408
+
+cond_next408:		; preds = %cond_true406, %cond_next395
+	br i1 false, label %cond_true413, label %cond_next415
+
+cond_true413:		; preds = %cond_next408
+	br label %cond_next415
+
+cond_next415:		; preds = %cond_true413, %cond_next408
+	store i8* null, i8** null
+	br label %return
+
+bb418:		; preds = %entry
+	br i1 false, label %cond_true512, label %cond_false514
+
+cond_true512:		; preds = %bb418
+	br label %cond_next516
+
+cond_false514:		; preds = %bb418
+	br label %cond_next516
+
+cond_next516:		; preds = %cond_false514, %cond_true512
+	br i1 false, label %cond_true536, label %cond_false538
+
+cond_true536:		; preds = %cond_next516
+	br label %cond_next540
+
+cond_false538:		; preds = %cond_next516
+	br label %cond_next540
+
+cond_next540:		; preds = %cond_false538, %cond_true536
+	br i1 false, label %cond_true560, label %cond_false562
+
+cond_true560:		; preds = %cond_next540
+	br label %cond_next564
+
+cond_false562:		; preds = %cond_next540
+	br label %cond_next564
+
+cond_next564:		; preds = %cond_false562, %cond_true560
+	br i1 false, label %cond_true597, label %cond_next599
+
+cond_true597:		; preds = %cond_next564
+	br label %cond_next599
+
+cond_next599:		; preds = %cond_true597, %cond_next564
+	br i1 false, label %cond_true614, label %cond_next616
+
+cond_true614:		; preds = %cond_next599
+	br label %cond_next616
+
+cond_next616:		; preds = %cond_true614, %cond_next599
+	br i1 false, label %cond_true621, label %cond_next623
+
+cond_true621:		; preds = %cond_next616
+	br label %cond_next623
+
+cond_next623:		; preds = %cond_true621, %cond_next616
+	br i1 false, label %cond_true628, label %cond_next630
+
+cond_true628:		; preds = %cond_next623
+	br label %cond_next630
+
+cond_next630:		; preds = %cond_true628, %cond_next623
+	store i8* null, i8** null
+	br label %return
+
+bb633:		; preds = %entry
+	br i1 false, label %cond_true667, label %cond_next669
+
+cond_true667:		; preds = %bb633
+	br label %cond_next669
+
+cond_next669:		; preds = %cond_true667, %bb633
+	br i1 false, label %cond_true678, label %cond_next791
+
+cond_true678:		; preds = %cond_next669
+	br label %bb735
+
+bb679:		; preds = %bb735
+	br i1 false, label %cond_true729, label %cond_next731
+
+cond_true729:		; preds = %bb679
+	br label %cond_next731
+
+cond_next731:		; preds = %cond_true729, %bb679
+	br label %bb735
+
+bb735:		; preds = %cond_next731, %cond_true678
+	br i1 false, label %bb679, label %bb743
+
+bb743:		; preds = %bb735
+	br i1 false, label %cond_true788, label %cond_next790
+
+cond_true788:		; preds = %bb743
+	br label %cond_next790
+
+cond_next790:		; preds = %cond_true788, %bb743
+	br label %cond_next791
+
+cond_next791:		; preds = %cond_next790, %cond_next669
+	br i1 false, label %cond_true805, label %cond_next807
+
+cond_true805:		; preds = %cond_next791
+	br label %cond_next807
+
+cond_next807:		; preds = %cond_true805, %cond_next791
+	store i8* null, i8** null
+	br label %return
+
+bb810:		; preds = %entry
+	br i1 false, label %cond_true870, label %cond_next872
+
+cond_true870:		; preds = %bb810
+	br label %cond_next872
+
+cond_next872:		; preds = %cond_true870, %bb810
+	br i1 false, label %cond_true877, label %cond_next879
+
+cond_true877:		; preds = %cond_next872
+	br label %cond_next879
+
+cond_next879:		; preds = %cond_true877, %cond_next872
+	store i8* null, i8** null
+	br label %return
+
+bb882:		; preds = %entry
+	br i1 false, label %cond_true920, label %cond_next922
+
+cond_true920:		; preds = %bb882
+	br label %cond_next922
+
+cond_next922:		; preds = %cond_true920, %bb882
+	store i8* null, i8** null
+	br label %return
+
+bb925:		; preds = %entry
+	br i1 false, label %cond_true965, label %cond_next967
+
+cond_true965:		; preds = %bb925
+	br label %cond_next967
+
+cond_next967:		; preds = %cond_true965, %bb925
+	store i8* null, i8** null
+	br label %return
+
+bb970:		; preds = %entry
+	unreachable
+		; No predecessors!
+	store i8* null, i8** null
+	br label %return
+
+return:		; preds = %0, %cond_next967, %cond_next922, %cond_next879, %cond_next807, %cond_next630, %cond_next415, %cond_next267, %cond_next191, %bb
+	%retval980 = load i8** null		; <i8*> [#uses=1]
+	ret i8* %retval980
+}
diff --git a/final/test/Transforms/GVN/2007-07-31-NoDomInherit.ll b/final/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
new file mode 100644
index 00000000000..f2c001296f6
--- /dev/null
+++ b/final/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
@@ -0,0 +1,313 @@
+; RUN: opt < %s -basicaa -gvn -S | grep {tmp47 = phi i32 }
+
+	%struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
+@debug = external constant i32		; <i32*> [#uses=0]
+@counters = external constant i32		; <i32*> [#uses=1]
+@trialx = external global [17 x i32]		; <[17 x i32]*> [#uses=1]
+@dummy1 = external global [7 x i32]		; <[7 x i32]*> [#uses=0]
+@dummy2 = external global [4 x i32]		; <[4 x i32]*> [#uses=0]
+@unacceptable = external global i32		; <i32*> [#uses=0]
+@isa = external global [13 x %struct.anon]		; <[13 x %struct.anon]*> [#uses=3]
+@.str = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str1 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str2 = external constant [1 x i8]		; <[1 x i8]*> [#uses=0]
+@.str3 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str4 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str5 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str6 = external constant [2 x i8]		; <[2 x i8]*> [#uses=0]
+@.str7 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str8 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str9 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str10 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str11 = external constant [2 x i8]		; <[2 x i8]*> [#uses=0]
+@.str12 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str13 = external constant [2 x i8]		; <[2 x i8]*> [#uses=0]
+@.str14 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str15 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str16 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str17 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str18 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str19 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str20 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str21 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str22 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str23 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str24 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str25 = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@.str26 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str27 = external constant [6 x i8]		; <[6 x i8]*> [#uses=0]
+@r = external global [17 x i32]		; <[17 x i32]*> [#uses=0]
+@.str28 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str29 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@pgm = external global [5 x { i32, [3 x i32] }]		; <[5 x { i32, [3 x i32] }]*> [#uses=4]
+@.str30 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str31 = external constant [13 x i8]		; <[13 x i8]*> [#uses=0]
+@.str32 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str33 = external constant [4 x i8]		; <[4 x i8]*> [#uses=0]
+@.str34 = external constant [20 x i8]		; <[20 x i8]*> [#uses=0]
+@numi = external global i32		; <i32*> [#uses=7]
+@.str35 = external constant [10 x i8]		; <[10 x i8]*> [#uses=0]
+@counter = external global [5 x i32]		; <[5 x i32]*> [#uses=2]
+@itrialx.2510 = external global i32		; <i32*> [#uses=0]
+@.str36 = external constant [43 x i8]		; <[43 x i8]*> [#uses=0]
+@.str37 = external constant [42 x i8]		; <[42 x i8]*> [#uses=0]
+@corr_result = external global i32		; <i32*> [#uses=0]
+@.str38 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
+@.str39 = external constant [5 x i8]		; <[5 x i8]*> [#uses=0]
+@.str40 = external constant [47 x i8]		; <[47 x i8]*> [#uses=0]
+@correct_result = external global [17 x i32]		; <[17 x i32]*> [#uses=1]
+@.str41 = external constant [46 x i8]		; <[46 x i8]*> [#uses=0]
+@.str42 = external constant [32 x i8]		; <[32 x i8]*> [#uses=0]
+@.str43 = external constant [44 x i8]		; <[44 x i8]*> [#uses=1]
+@.str44 = external constant [21 x i8]		; <[21 x i8]*> [#uses=1]
+@.str45 = external constant [12 x i8]		; <[12 x i8]*> [#uses=1]
+@.str46 = external constant [5 x i8]		; <[5 x i8]*> [#uses=1]
+@.str47 = external constant [12 x i8]		; <[12 x i8]*> [#uses=1]
+
+declare i32 @neg(i32, i32, i32)
+
+declare i32 @Not(i32, i32, i32)
+
+declare i32 @pop(i32, i32, i32)
+
+declare i32 @nlz(i32, i32, i32)
+
+declare i32 @rev(i32, i32, i32)
+
+declare i32 @add(i32, i32, i32)
+
+declare i32 @sub(i32, i32, i32)
+
+declare i32 @mul(i32, i32, i32)
+
+declare i32 @divide(i32, i32, i32)
+
+declare i32 @divu(i32, i32, i32)
+
+declare i32 @And(i32, i32, i32)
+
+declare i32 @Or(i32, i32, i32)
+
+declare i32 @Xor(i32, i32, i32)
+
+declare i32 @rotl(i32, i32, i32)
+
+declare i32 @shl(i32, i32, i32)
+
+declare i32 @shr(i32, i32, i32)
+
+declare i32 @shrs(i32, i32, i32)
+
+declare i32 @cmpeq(i32, i32, i32)
+
+declare i32 @cmplt(i32, i32, i32)
+
+declare i32 @cmpltu(i32, i32, i32)
+
+declare i32 @seleq(i32, i32, i32)
+
+declare i32 @sellt(i32, i32, i32)
+
+declare i32 @selle(i32, i32, i32)
+
+declare void @print_expr(i32)
+
+declare i32 @printf(i8*, ...)
+
+declare i32 @putchar(i32)
+
+declare void @print_pgm()
+
+declare void @simulate_one_instruction(i32)
+
+declare i32 @check(i32)
+
+declare i32 @puts(i8*)
+
+declare void @fix_operands(i32)
+
+declare void @abort()
+
+declare i32 @increment()
+
+declare i32 @search()
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	%argc_addr = alloca i32		; <i32*> [#uses=1]
+	%argv_addr = alloca i8**		; <i8***> [#uses=1]
+	%retval = alloca i32, align 4		; <i32*> [#uses=2]
+	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
+	%i = alloca i32, align 4		; <i32*> [#uses=21]
+	%num_sol = alloca i32, align 4		; <i32*> [#uses=4]
+	%total = alloca i32, align 4		; <i32*> [#uses=4]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %argc, i32* %argc_addr
+	store i8** %argv, i8*** %argv_addr
+	store i32 0, i32* %num_sol
+	store i32 1, i32* @numi
+	br label %bb91
+
+bb:		; preds = %cond_next97
+	%tmp1 = load i32* @numi		; <i32> [#uses=1]
+	%tmp2 = getelementptr [44 x i8]* @.str43, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp3 = call i32 (i8*, ...)* @printf( i8* %tmp2, i32 %tmp1 )		; <i32> [#uses=0]
+	store i32 0, i32* %i
+	br label %bb13
+
+bb4:		; preds = %bb13
+	%tmp5 = load i32* %i		; <i32> [#uses=1]
+	%tmp6 = load i32* %i		; <i32> [#uses=1]
+	%tmp7 = getelementptr [17 x i32]* @trialx, i32 0, i32 %tmp6		; <i32*> [#uses=1]
+	%tmp8 = load i32* %tmp7		; <i32> [#uses=1]
+	%tmp9 = call i32 @userfun( i32 %tmp8 )		; <i32> [#uses=1]
+	%tmp10 = getelementptr [17 x i32]* @correct_result, i32 0, i32 %tmp5		; <i32*> [#uses=1]
+	store i32 %tmp9, i32* %tmp10
+	%tmp11 = load i32* %i		; <i32> [#uses=1]
+	%tmp12 = add i32 %tmp11, 1		; <i32> [#uses=1]
+	store i32 %tmp12, i32* %i
+	br label %bb13
+
+bb13:		; preds = %bb4, %bb
+	%tmp14 = load i32* %i		; <i32> [#uses=1]
+	%tmp15 = icmp sle i32 %tmp14, 16		; <i1> [#uses=1]
+	%tmp1516 = zext i1 %tmp15 to i32		; <i32> [#uses=1]
+	%toBool = icmp ne i32 %tmp1516, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb4, label %bb17
+
+bb17:		; preds = %bb13
+	store i32 0, i32* %i
+	br label %bb49
+
+bb18:		; preds = %bb49
+	%tmp19 = load i32* %i		; <i32> [#uses=1]
+	%tmp20 = getelementptr [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp19		; <{ i32, [3 x i32] }*> [#uses=1]
+	%tmp21 = getelementptr { i32, [3 x i32] }* %tmp20, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp21
+	%tmp22 = load i32* %i		; <i32> [#uses=1]
+	%tmp23 = getelementptr [13 x %struct.anon]* @isa, i32 0, i32 0		; <%struct.anon*> [#uses=1]
+	%tmp24 = getelementptr %struct.anon* %tmp23, i32 0, i32 3		; <[3 x i32]*> [#uses=1]
+	%tmp25 = getelementptr [3 x i32]* %tmp24, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp26 = load i32* %tmp25		; <i32> [#uses=1]
+	%tmp27 = getelementptr [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp22		; <{ i32, [3 x i32] }*> [#uses=1]
+	%tmp28 = getelementptr { i32, [3 x i32] }* %tmp27, i32 0, i32 1		; <[3 x i32]*> [#uses=1]
+	%tmp29 = getelementptr [3 x i32]* %tmp28, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 %tmp26, i32* %tmp29
+	%tmp30 = load i32* %i		; <i32> [#uses=1]
+	%tmp31 = getelementptr [13 x %struct.anon]* @isa, i32 0, i32 0		; <%struct.anon*> [#uses=1]
+	%tmp32 = getelementptr %struct.anon* %tmp31, i32 0, i32 3		; <[3 x i32]*> [#uses=1]
+	%tmp33 = getelementptr [3 x i32]* %tmp32, i32 0, i32 1		; <i32*> [#uses=1]
+	%tmp34 = load i32* %tmp33		; <i32> [#uses=1]
+	%tmp35 = getelementptr [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp30		; <{ i32, [3 x i32] }*> [#uses=1]
+	%tmp36 = getelementptr { i32, [3 x i32] }* %tmp35, i32 0, i32 1		; <[3 x i32]*> [#uses=1]
+	%tmp37 = getelementptr [3 x i32]* %tmp36, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 %tmp34, i32* %tmp37
+	%tmp38 = load i32* %i		; <i32> [#uses=1]
+	%tmp39 = getelementptr [13 x %struct.anon]* @isa, i32 0, i32 0		; <%struct.anon*> [#uses=1]
+	%tmp40 = getelementptr %struct.anon* %tmp39, i32 0, i32 3		; <[3 x i32]*> [#uses=1]
+	%tmp41 = getelementptr [3 x i32]* %tmp40, i32 0, i32 2		; <i32*> [#uses=1]
+	%tmp42 = load i32* %tmp41		; <i32> [#uses=1]
+	%tmp43 = getelementptr [5 x { i32, [3 x i32] }]* @pgm, i32 0, i32 %tmp38		; <{ i32, [3 x i32] }*> [#uses=1]
+	%tmp44 = getelementptr { i32, [3 x i32] }* %tmp43, i32 0, i32 1		; <[3 x i32]*> [#uses=1]
+	%tmp45 = getelementptr [3 x i32]* %tmp44, i32 0, i32 2		; <i32*> [#uses=1]
+	store i32 %tmp42, i32* %tmp45
+	%tmp46 = load i32* %i		; <i32> [#uses=1]
+	call void @fix_operands( i32 %tmp46 )
+	%tmp47 = load i32* %i		; <i32> [#uses=1]
+	%tmp48 = add i32 %tmp47, 1		; <i32> [#uses=1]
+	store i32 %tmp48, i32* %i
+	br label %bb49
+
+bb49:		; preds = %bb18, %bb17
+	%tmp50 = load i32* @numi		; <i32> [#uses=1]
+	%tmp51 = load i32* %i		; <i32> [#uses=1]
+	%tmp52 = icmp slt i32 %tmp51, %tmp50		; <i1> [#uses=1]
+	%tmp5253 = zext i1 %tmp52 to i32		; <i32> [#uses=1]
+	%toBool54 = icmp ne i32 %tmp5253, 0		; <i1> [#uses=1]
+	br i1 %toBool54, label %bb18, label %bb55
+
+bb55:		; preds = %bb49
+	%tmp56 = call i32 @search( )		; <i32> [#uses=1]
+	store i32 %tmp56, i32* %num_sol
+	%tmp57 = getelementptr [21 x i8]* @.str44, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp58 = load i32* %num_sol		; <i32> [#uses=1]
+	%tmp59 = call i32 (i8*, ...)* @printf( i8* %tmp57, i32 %tmp58 )		; <i32> [#uses=0]
+	%tmp60 = load i32* @counters		; <i32> [#uses=1]
+	%tmp61 = icmp ne i32 %tmp60, 0		; <i1> [#uses=1]
+	%tmp6162 = zext i1 %tmp61 to i32		; <i32> [#uses=1]
+	%toBool63 = icmp ne i32 %tmp6162, 0		; <i1> [#uses=1]
+	br i1 %toBool63, label %cond_true, label %cond_next
+
+cond_true:		; preds = %bb55
+	store i32 0, i32* %total
+	%tmp64 = getelementptr [12 x i8]* @.str45, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp65 = call i32 (i8*, ...)* @printf( i8* %tmp64 )		; <i32> [#uses=0]
+	store i32 0, i32* %i
+	br label %bb79
+
+bb66:		; preds = %bb79
+	%tmp67 = load i32* %i		; <i32> [#uses=1]
+	%tmp68 = getelementptr [5 x i32]* @counter, i32 0, i32 %tmp67		; <i32*> [#uses=1]
+	%tmp69 = load i32* %tmp68		; <i32> [#uses=1]
+	%tmp70 = getelementptr [5 x i8]* @.str46, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp71 = call i32 (i8*, ...)* @printf( i8* %tmp70, i32 %tmp69 )		; <i32> [#uses=0]
+	%tmp72 = load i32* %i		; <i32> [#uses=1]
+	%tmp73 = getelementptr [5 x i32]* @counter, i32 0, i32 %tmp72		; <i32*> [#uses=1]
+	%tmp74 = load i32* %tmp73		; <i32> [#uses=1]
+	%tmp75 = load i32* %total		; <i32> [#uses=1]
+	%tmp76 = add i32 %tmp74, %tmp75		; <i32> [#uses=1]
+	store i32 %tmp76, i32* %total
+	%tmp77 = load i32* %i		; <i32> [#uses=1]
+	%tmp78 = add i32 %tmp77, 1		; <i32> [#uses=1]
+	store i32 %tmp78, i32* %i
+	br label %bb79
+
+bb79:		; preds = %bb66, %cond_true
+	%tmp80 = load i32* @numi		; <i32> [#uses=1]
+	%tmp81 = load i32* %i		; <i32> [#uses=1]
+	%tmp82 = icmp slt i32 %tmp81, %tmp80		; <i1> [#uses=1]
+	%tmp8283 = zext i1 %tmp82 to i32		; <i32> [#uses=1]
+	%toBool84 = icmp ne i32 %tmp8283, 0		; <i1> [#uses=1]
+	br i1 %toBool84, label %bb66, label %bb85
+
+bb85:		; preds = %bb79
+	%tmp86 = getelementptr [12 x i8]* @.str47, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp87 = load i32* %total		; <i32> [#uses=1]
+	%tmp88 = call i32 (i8*, ...)* @printf( i8* %tmp86, i32 %tmp87 )		; <i32> [#uses=0]
+	br label %cond_next
+
+cond_next:		; preds = %bb85, %bb55
+	%tmp89 = load i32* @numi		; <i32> [#uses=1]
+	%tmp90 = add i32 %tmp89, 1		; <i32> [#uses=1]
+	store i32 %tmp90, i32* @numi
+	br label %bb91
+
+bb91:		; preds = %cond_next, %entry
+	%tmp92 = load i32* @numi		; <i32> [#uses=1]
+	%tmp93 = icmp sgt i32 %tmp92, 5		; <i1> [#uses=1]
+	%tmp9394 = zext i1 %tmp93 to i32		; <i32> [#uses=1]
+	%toBool95 = icmp ne i32 %tmp9394, 0		; <i1> [#uses=1]
+	br i1 %toBool95, label %cond_true96, label %cond_next97
+
+cond_true96:		; preds = %bb91
+	br label %bb102
+
+cond_next97:		; preds = %bb91
+	%tmp98 = load i32* %num_sol		; <i32> [#uses=1]
+	%tmp99 = icmp eq i32 %tmp98, 0		; <i1> [#uses=1]
+	%tmp99100 = zext i1 %tmp99 to i32		; <i32> [#uses=1]
+	%toBool101 = icmp ne i32 %tmp99100, 0		; <i1> [#uses=1]
+	br i1 %toBool101, label %bb, label %bb102
+
+bb102:		; preds = %cond_next97, %cond_true96
+	store i32 0, i32* %tmp
+	%tmp103 = load i32* %tmp		; <i32> [#uses=1]
+	store i32 %tmp103, i32* %retval
+	br label %return
+
+return:		; preds = %bb102
+	%retval104 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval104
+}
+
+declare i32 @userfun(i32)
diff --git a/final/test/Transforms/GVN/2007-07-31-RedundantPhi.ll b/final/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
new file mode 100644
index 00000000000..a570e3571ee
--- /dev/null
+++ b/final/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -basicaa -gvn -S | not grep {tmp701 =}
+
+@img_width = external global i16		; <i16*> [#uses=2]
+
+define i32 @smpUMHEXBipredIntegerPelBlockMotionSearch(i16* %cur_pic, i16 signext  %ref, i32 %list, i32 %pic_pix_x, i32 %pic_pix_y, i32 %blocktype, i16 signext  %pred_mv_x1, i16 signext  %pred_mv_y1, i16 signext  %pred_mv_x2, i16 signext  %pred_mv_y2, i16* %mv_x, i16* %mv_y, i16* %s_mv_x, i16* %s_mv_y, i32 %search_range, i32 %min_mcost, i32 %lambda_factor) {
+cond_next143:		; preds = %entry
+	store i16 0, i16* @img_width, align 2
+	br i1 false, label %cond_next449, label %cond_false434
+
+cond_false434:		; preds = %cond_true415
+	br label %cond_next449
+
+cond_next449:		; preds = %cond_false434, %cond_true415
+	br i1 false, label %cond_next698, label %cond_false470
+
+cond_false470:		; preds = %cond_next449
+	br label %cond_next698
+
+cond_next698:		; preds = %cond_true492
+	%tmp701 = load i16* @img_width, align 2		; <i16> [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/Transforms/GVN/2008-02-12-UndefLoad.ll b/final/test/Transforms/GVN/2008-02-12-UndefLoad.ll
new file mode 100644
index 00000000000..de2aa614ff5
--- /dev/null
+++ b/final/test/Transforms/GVN/2008-02-12-UndefLoad.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -gvn -S | not grep load
+; PR1996
+
+%struct.anon = type { i32, i8, i8, i8, i8 }
+
+define i32 @a() {
+entry:
+        %c = alloca %struct.anon                ; <%struct.anon*> [#uses=2]
+        %tmp = getelementptr %struct.anon* %c, i32 0, i32 0             ; <i32*> [#uses=1]
+        %tmp1 = getelementptr i32* %tmp, i32 1          ; <i32*> [#uses=2]
+        %tmp2 = load i32* %tmp1, align 4                ; <i32> [#uses=1]
+        %tmp3 = or i32 %tmp2, 11                ; <i32> [#uses=1]
+        %tmp4 = and i32 %tmp3, -21              ; <i32> [#uses=1]
+        store i32 %tmp4, i32* %tmp1, align 4
+        %call = call i32 (...)* @x( %struct.anon* %c )          ; <i32> [#uses=0]
+        ret i32 undef
+}
+
+
+declare i32 @x(...)
diff --git a/final/test/Transforms/GVN/2008-02-13-NewPHI.ll b/final/test/Transforms/GVN/2008-02-13-NewPHI.ll
new file mode 100644
index 00000000000..80b519d14e9
--- /dev/null
+++ b/final/test/Transforms/GVN/2008-02-13-NewPHI.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -gvn
+; PR2032
+
+define i32 @sscal(i32 %n, double %sa1, float* %sx, i32 %incx) {
+entry:
+	%sx_addr = alloca float*		; <float**> [#uses=3]
+	store float* %sx, float** %sx_addr, align 4
+	br label %bb33
+
+bb:		; preds = %bb33
+	%tmp27 = load float** %sx_addr, align 4		; <float*> [#uses=1]
+	store float 0.000000e+00, float* %tmp27, align 4
+	store float* null, float** %sx_addr, align 4
+	br label %bb33
+
+bb33:		; preds = %bb, %entry
+	br i1 false, label %bb, label %return
+
+return:		; preds = %bb33
+	%retval59 = load i32* null, align 4		; <i32> [#uses=1]
+	ret i32 %retval59
+}
diff --git a/final/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll b/final/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll
new file mode 100644
index 00000000000..9a75e1ad978
--- /dev/null
+++ b/final/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -gvn -dse -S | grep {call.*memcpy} | count 1
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+	%struct.ggFrame3 = type { %struct.ggPoint3, %struct.ggONB3 }
+	%struct.ggHMatrix3 = type { [4 x [4 x double]] }
+	%struct.ggONB3 = type { %struct.ggPoint3, %struct.ggPoint3, %struct.ggPoint3 }
+	%struct.ggPoint3 = type { [3 x double] }
+	%struct.ggQuaternion = type { [4 x double], i32, %struct.ggHMatrix3 }
+
+declare void @llvm.memcpy.i64(i8*, i8*, i64, i32) nounwind 
+
+define void @_Z10ggCRSplineRK8ggFrame3S1_S1_S1_d(%struct.ggFrame3* noalias sret  %agg.result, %struct.ggFrame3* %f0, %struct.ggFrame3* %f1, %struct.ggFrame3* %f2, %struct.ggFrame3* %f3, double %t) nounwind  {
+entry:
+	%qresult = alloca %struct.ggQuaternion		; <%struct.ggQuaternion*> [#uses=1]
+	%tmp = alloca %struct.ggONB3		; <%struct.ggONB3*> [#uses=2]
+	call void @_ZN12ggQuaternion7getONB3Ev( %struct.ggONB3* noalias sret  %tmp, %struct.ggQuaternion* %qresult ) nounwind 
+	%tmp1.i = getelementptr %struct.ggFrame3* %agg.result, i32 0, i32 1		; <%struct.ggONB3*> [#uses=1]
+	%tmp13.i = bitcast %struct.ggONB3* %tmp1.i to i8*		; <i8*> [#uses=1]
+	%tmp24.i = bitcast %struct.ggONB3* %tmp to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i64( i8* %tmp13.i, i8* %tmp24.i, i64 72, i32 8 ) nounwind 
+	ret void
+}
+
+declare void @_ZN12ggQuaternion7getONB3Ev(%struct.ggONB3* noalias sret , %struct.ggQuaternion*) nounwind 
diff --git a/final/test/Transforms/GVN/2008-02-26-MemCpySize.ll b/final/test/Transforms/GVN/2008-02-26-MemCpySize.ll
new file mode 100644
index 00000000000..6ed8a76c0de
--- /dev/null
+++ b/final/test/Transforms/GVN/2008-02-26-MemCpySize.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -gvn -dse -S | grep {call.*memcpy.*cell} | count 2
+; PR2099
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9"
+	%struct.s = type { [11 x i8], i32 }
+@.str = internal constant [11 x i8] c"0123456789\00"		; <[11 x i8]*> [#uses=1]
+@cell = weak global %struct.s zeroinitializer		; <%struct.s*> [#uses=2]
+
+declare i32 @check(%struct.s* byval  %p) nounwind
+
+declare i32 @strcmp(i8*, i8*) nounwind readonly 
+
+define i32 @main() noreturn nounwind  {
+entry:
+	%p = alloca %struct.s, align 8		; <%struct.s*> [#uses=2]
+	store i32 99, i32* getelementptr (%struct.s* @cell, i32 0, i32 1), align 4
+	call void @llvm.memcpy.i32( i8* getelementptr (%struct.s* @cell, i32 0, i32 0, i32 0), i8* getelementptr ([11 x i8]* @.str, i32 0, i32 0), i32 11, i32 1 )
+	%tmp = getelementptr %struct.s* %p, i32 0, i32 0, i32 0		; <i8*> [#uses=2]
+	call void @llvm.memcpy.i64( i8* %tmp, i8* getelementptr (%struct.s* @cell, i32 0, i32 0, i32 0), i64 16, i32 8 )
+	%tmp1.i = getelementptr %struct.s* %p, i32 0, i32 1		; <i32*> [#uses=1]
+	%tmp2.i = load i32* %tmp1.i, align 4		; <i32> [#uses=1]
+	%tmp3.i = icmp eq i32 %tmp2.i, 99		; <i1> [#uses=1]
+	br i1 %tmp3.i, label %bb5.i, label %bb
+
+bb5.i:		; preds = %entry
+	%tmp91.i = call i32 @strcmp( i8* %tmp, i8* getelementptr ([11 x i8]* @.str, i32 0, i32 0) ) nounwind readonly 		; <i32> [#uses=1]
+	%tmp53 = icmp eq i32 %tmp91.i, 0		; <i1> [#uses=1]
+	br i1 %tmp53, label %bb7, label %bb
+
+bb:		; preds = %bb5.i, %entry
+	call void @abort( ) noreturn nounwind 
+	unreachable
+
+bb7:		; preds = %bb5.i
+	call void @exit( i32 0 ) noreturn nounwind 
+	unreachable
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+
+declare void @abort() noreturn nounwind 
+
+declare void @exit(i32) noreturn nounwind 
+
+declare void @llvm.memcpy.i64(i8*, i8*, i64, i32) nounwind 
diff --git a/final/test/Transforms/GVN/2008-07-02-Unreachable.ll b/final/test/Transforms/GVN/2008-07-02-Unreachable.ll
new file mode 100644
index 00000000000..be69cfc0319
--- /dev/null
+++ b/final/test/Transforms/GVN/2008-07-02-Unreachable.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -basicaa -gvn -S | grep {ret i8 \[%\]tmp3}
+; PR2503
+
+@g_3 = external global i8		; <i8*> [#uses=2]
+
+define i8 @func_1() nounwind  {
+entry:
+	br i1 false, label %ifelse, label %ifthen
+
+ifthen:		; preds = %entry
+	br label %ifend
+
+ifelse:		; preds = %entry
+	%tmp3 = load i8* @g_3		; <i8> [#uses=0]
+	br label %forcond.thread
+
+forcond.thread:		; preds = %ifelse
+	br label %afterfor
+
+forcond:		; preds = %forinc
+	br i1 false, label %afterfor, label %forbody
+
+forbody:		; preds = %forcond
+	br label %forinc
+
+forinc:		; preds = %forbody
+	br label %forcond
+
+afterfor:		; preds = %forcond, %forcond.thread
+	%tmp10 = load i8* @g_3		; <i8> [#uses=0]
+	ret i8 %tmp10
+
+ifend:		; preds = %afterfor, %ifthen
+	ret i8 0
+}
diff --git a/final/test/Transforms/GVN/2008-12-09-SelfRemove.ll b/final/test/Transforms/GVN/2008-12-09-SelfRemove.ll
new file mode 100644
index 00000000000..c6833e373d0
--- /dev/null
+++ b/final/test/Transforms/GVN/2008-12-09-SelfRemove.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -gvn -S | grep getelementptr | count 1
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+	%struct.anon = type { i8*, i32 }
+	%struct.d_print_info = type { i32, i8*, i32, i32, %struct.d_print_template*, %struct.d_print_mod*, i32 }
+	%struct.d_print_mod = type { %struct.d_print_mod*, %struct.demangle_component*, i32, %struct.d_print_template* }
+	%struct.d_print_template = type { %struct.d_print_template*, %struct.demangle_component* }
+	%struct.demangle_component = type { i32, { %struct.anon } }
+
+define void @d_print_mod_list(%struct.d_print_info* %dpi, %struct.d_print_mod* %mods, i32 %suffix) nounwind {
+entry:
+	%0 = getelementptr %struct.d_print_info* %dpi, i32 0, i32 1		; <i8**> [#uses=1]
+	br i1 false, label %return, label %bb
+
+bb:		; preds = %entry
+	%1 = load i8** %0, align 4		; <i8*> [#uses=0]
+	%2 = getelementptr %struct.d_print_info* %dpi, i32 0, i32 1		; <i8**> [#uses=0]
+	br label %bb21
+
+bb21:		; preds = %bb21, %bb
+	br label %bb21
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/GVN/2008-12-12-RLE-Crash.ll b/final/test/Transforms/GVN/2008-12-12-RLE-Crash.ll
new file mode 100644
index 00000000000..da67ee77669
--- /dev/null
+++ b/final/test/Transforms/GVN/2008-12-12-RLE-Crash.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -gvn | llvm-dis
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	br label %bb84
+
+bb41:		; preds = %bb82
+	%tmp = load i8* %opt.0, align 1		; <i8> [#uses=0]
+	%tmp1 = getelementptr i8* %opt.0, i32 1		; <i8*> [#uses=2]
+	switch i32 0, label %bb81 [
+		i32 102, label %bb82
+		i32 110, label %bb79
+		i32 118, label %bb80
+	]
+
+bb79:		; preds = %bb41
+	br label %bb82
+
+bb80:		; preds = %bb41
+	ret i32 0
+
+bb81:		; preds = %bb41
+	ret i32 1
+
+bb82:		; preds = %bb84, %bb79, %bb41
+	%opt.0 = phi i8* [ %tmp3, %bb84 ], [ %tmp1, %bb79 ], [ %tmp1, %bb41 ]		; <i8*> [#uses=3]
+	%tmp2 = load i8* %opt.0, align 1		; <i8> [#uses=0]
+	br i1 false, label %bb84, label %bb41
+
+bb84:		; preds = %bb82, %entry
+	%tmp3 = getelementptr i8* null, i32 1		; <i8*> [#uses=1]
+	br label %bb82
+}
diff --git a/final/test/Transforms/GVN/2008-12-14-rle-reanalyze.ll b/final/test/Transforms/GVN/2008-12-14-rle-reanalyze.ll
new file mode 100644
index 00000000000..41f76c8167e
--- /dev/null
+++ b/final/test/Transforms/GVN/2008-12-14-rle-reanalyze.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -gvn | llvm-dis
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+@sort_value = external global [256 x i32], align 32		; <[256 x i32]*> [#uses=2]
+
+define i32 @Quiesce(i32 %alpha, i32 %beta, i32 %wtm, i32 %ply) nounwind {
+entry:
+	br label %bb22
+
+bb22:		; preds = %bb23, %bb22, %entry
+	br i1 false, label %bb23, label %bb22
+
+bb23:		; preds = %bb23, %bb22
+	%sortv.233 = phi i32* [ getelementptr ([256 x i32]* @sort_value, i32 0, i32 0), %bb22 ], [ %sortv.2, %bb23 ]		; <i32*> [#uses=1]
+	%0 = load i32* %sortv.233, align 4		; <i32> [#uses=0]
+	%sortv.2 = getelementptr [256 x i32]* @sort_value, i32 0, i32 0		; <i32*> [#uses=1]
+	br i1 false, label %bb23, label %bb22
+}
diff --git a/final/test/Transforms/GVN/2008-12-15-CacheVisited.ll b/final/test/Transforms/GVN/2008-12-15-CacheVisited.ll
new file mode 100644
index 00000000000..0a63f3f4626
--- /dev/null
+++ b/final/test/Transforms/GVN/2008-12-15-CacheVisited.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -gvn | llvm-dis
+; Cached results must be added to and verified against the visited sets.
+; PR3217
+
+define fastcc void @gen_field_die(i32* %decl) nounwind {
+entry:
+	br i1 false, label %bb203, label %bb202
+
+bb202:		; preds = %entry
+	unreachable
+
+bb203:		; preds = %entry
+	%tmp = getelementptr i32* %decl, i32 1		; <i32*> [#uses=1]
+	%tmp1 = load i32* %tmp, align 4		; <i32> [#uses=0]
+	br i1 false, label %bb207, label %bb204
+
+bb204:		; preds = %bb203
+	%tmp2 = getelementptr i32* %decl, i32 1		; <i32*> [#uses=1]
+	br label %bb208
+
+bb207:		; preds = %bb203
+	br label %bb208
+
+bb208:		; preds = %bb207, %bb204
+	%iftmp.1374.0.in = phi i32* [ null, %bb207 ], [ %tmp2, %bb204 ]		; <i32*> [#uses=1]
+	%iftmp.1374.0 = load i32* %iftmp.1374.0.in		; <i32> [#uses=0]
+	unreachable
+}
diff --git a/final/test/Transforms/GVN/2009-01-21-SortInvalidation.ll b/final/test/Transforms/GVN/2009-01-21-SortInvalidation.ll
new file mode 100644
index 00000000000..36775936c8b
--- /dev/null
+++ b/final/test/Transforms/GVN/2009-01-21-SortInvalidation.ll
@@ -0,0 +1,55 @@
+; RUN: opt < %s -gvn | llvm-dis
+; PR3358
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.re_pattern_buffer = type { i8*, i64, i64, i64, i8*, i8*, i64, i8 }
+	%struct.re_registers = type { i32, i32*, i32* }
+
+define fastcc i32 @byte_re_match_2_internal(%struct.re_pattern_buffer* nocapture %bufp, i8* %string1, i32 %size1, i8* %string2, i32 %size2, i32 %pos, %struct.re_registers* %regs, i32 %stop) nounwind {
+entry:
+	br label %bb159
+
+succeed_label:		; preds = %bb159
+	ret i32 0
+
+bb159:		; preds = %bb664, %bb554, %bb159, %bb159, %bb159, %entry
+	%d.0 = phi i8* [ null, %entry ], [ %d.0, %bb159 ], [ %d.0, %bb554 ], [ %d.0, %bb159 ], [ %d.0, %bb159 ], [ %d.12, %bb664 ]		; <i8*> [#uses=5]
+	switch i32 0, label %bb661 [
+		i32 0, label %bb159
+		i32 1, label %succeed_label
+		i32 13, label %bb159
+		i32 14, label %bb159
+		i32 16, label %bb411
+		i32 24, label %bb622
+		i32 28, label %bb543
+	]
+
+bb411:		; preds = %bb411, %bb159
+	br label %bb411
+
+bb543:		; preds = %bb159
+	br i1 false, label %bb549, label %bb550
+
+bb549:		; preds = %bb543
+	br label %bb554
+
+bb550:		; preds = %bb543
+	br i1 false, label %bb554, label %bb552
+
+bb552:		; preds = %bb550
+	%0 = load i8* %d.0, align 8		; <i8> [#uses=0]
+	br label %bb554
+
+bb554:		; preds = %bb552, %bb550, %bb549
+	br i1 false, label %bb159, label %bb661
+
+bb622:		; preds = %bb622, %bb159
+	br label %bb622
+
+bb661:		; preds = %bb554, %bb159
+	%d.12 = select i1 false, i8* null, i8* null		; <i8*> [#uses=1]
+	br label %bb664
+
+bb664:		; preds = %bb664, %bb661
+	br i1 false, label %bb159, label %bb664
+}
diff --git a/final/test/Transforms/GVN/2009-01-22-SortInvalidation.ll b/final/test/Transforms/GVN/2009-01-22-SortInvalidation.ll
new file mode 100644
index 00000000000..95690a5a257
--- /dev/null
+++ b/final/test/Transforms/GVN/2009-01-22-SortInvalidation.ll
@@ -0,0 +1,100 @@
+; RUN: opt < %s -gvn | llvm-dis
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+	%struct..4sPragmaType = type { i8*, i32 }
+	%struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 }
+	%struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* }
+	%struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 }
+	%struct.AuxData = type { i8*, void (i8*)* }
+	%struct.Bitvec = type { i32, i32, i32, { [125 x i32] } }
+	%struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* }
+	%struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* }
+	%struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* }
+	%struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* }
+	%struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] }
+	%struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 }
+	%struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 }
+	%struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* }
+	%struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 }
+	%struct.Context = type { i64, i32, %struct.Fifo }
+	%struct.CountCtx = type { i64 }
+	%struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* }
+	%struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* }
+	%struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..4sPragmaType, %struct..4sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 }
+	%struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* }
+	%struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 }
+	%struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 }
+	%struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* }
+	%struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] }
+	%struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] }
+	%struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* }
+	%struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 }
+	%struct.IdList = type { %struct..4sPragmaType*, i32, i32 }
+	%struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** }
+	%struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] }
+	%struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* }
+	%struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.PgHdr*, i32, %struct.MemPage* }
+	%struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* }
+	%struct.Op = type { i8, i8, i8, i8, i32, i32, i32, { i32 } }
+	%struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.PgHdr*, %struct.PgHdr*, %struct.PgHdr*, i64, i64, i64, i64, i64, i32, void (%struct.PgHdr*, i32)*, void (%struct.PgHdr*, i32)*, i32, %struct.PgHdr**, i8*, [16 x i8] }
+	%struct.PagerLruLink = type { %struct.PgHdr*, %struct.PgHdr* }
+	%struct.PagerLruList = type { %struct.PgHdr*, %struct.PgHdr*, %struct.PgHdr* }
+	%struct.Parse = type { %struct.sqlite3*, i32, i8*, %struct.Vdbe*, i8, i8, i8, i8, i8, i8, i8, [8 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [12 x i32], i32, %struct.TableLock*, i32, i32, i32, i32, i32, %struct.Expr**, i8, %struct..4sPragmaType, %struct..4sPragmaType, %struct..4sPragmaType, i8*, i8*, %struct.Table*, %struct.Trigger*, %struct.TriggerStack*, i8*, %struct..4sPragmaType, i8, %struct.Table*, i32 }
+	%struct.PgHdr = type { %struct.Pager*, i32, %struct.PgHdr*, %struct.PgHdr*, %struct.PagerLruLink, %struct.PgHdr*, i8, i8, i8, i8, i8, i16, %struct.PgHdr*, %struct.PgHdr*, i8* }
+	%struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* }
+	%struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] }
+	%struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] }
+	%struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 }
+	%struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* }
+	%struct.TableLock = type { i32, i32, i8, i8* }
+	%struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..4sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* }
+	%struct.TriggerStack = type { %struct.Table*, i32, i32, i32, i32, i32, i32, %struct.Trigger*, %struct.TriggerStack* }
+	%struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..4sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* }
+	%struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 }
+	%struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] }
+	%struct._OvflCell = type { i8*, i16 }
+	%struct._ht = type { i32, %struct.HashElem* }
+	%struct.anon = type { double }
+	%struct.sColMap = type { i32, i8* }
+	%struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %struct.anon, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 }
+	%struct.sqlite3InitInfo = type { i32, i32, i8 }
+	%struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* }
+	%struct.sqlite3_file = type { %struct.sqlite3_io_methods* }
+	%struct.sqlite3_index_constraint = type { i32, i8, i8, i32 }
+	%struct.sqlite3_index_constraint_usage = type { i32, i8 }
+	%struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double }
+	%struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* }
+	%struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* }
+	%struct.sqlite3_mutex = type opaque
+	%struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* }
+	%struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* }
+	%struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* }
+
+define fastcc void @sqlite3Insert(%struct.Parse* %pParse, %struct.SrcList* %pTabList, %struct.ExprList* %pList, %struct.Select* %pSelect, %struct.IdList* %pColumn, i32 %onError) nounwind {
+entry:
+	br i1 false, label %bb54, label %bb69.loopexit
+
+bb54:		; preds = %entry
+	br label %bb69.loopexit
+
+bb59:		; preds = %bb63.preheader
+	%0 = load %struct..4sPragmaType** %3, align 4		; <%struct..4sPragmaType*> [#uses=0]
+	br label %bb65
+
+bb65:		; preds = %bb63.preheader, %bb59
+	%1 = load %struct..4sPragmaType** %4, align 4		; <%struct..4sPragmaType*> [#uses=0]
+	br i1 false, label %bb67, label %bb63.preheader
+
+bb67:		; preds = %bb65
+	%2 = getelementptr %struct.IdList* %pColumn, i32 0, i32 0		; <%struct..4sPragmaType**> [#uses=0]
+	unreachable
+
+bb69.loopexit:		; preds = %bb54, %entry
+	%3 = getelementptr %struct.IdList* %pColumn, i32 0, i32 0		; <%struct..4sPragmaType**> [#uses=1]
+	%4 = getelementptr %struct.IdList* %pColumn, i32 0, i32 0		; <%struct..4sPragmaType**> [#uses=1]
+	br label %bb63.preheader
+
+bb63.preheader:		; preds = %bb69.loopexit, %bb65
+	br i1 false, label %bb59, label %bb65
+}
diff --git a/final/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll b/final/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll
new file mode 100644
index 00000000000..c2d57a105fc
--- /dev/null
+++ b/final/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll
@@ -0,0 +1,193 @@
+; RUN: opt < %s -gvn -enable-load-pre -disable-output
+
+	%struct.VEC_rtx_base = type { i32, i32, [1 x %struct.rtx_def*] }
+	%struct.VEC_rtx_gc = type { %struct.VEC_rtx_base }
+	%struct.block_symbol = type { [3 x %struct.cgraph_rtl_info], %struct.object_block*, i64 }
+	%struct.cgraph_rtl_info = type { i32 }
+	%struct.object_block = type { %struct.section*, i32, i64, %struct.VEC_rtx_gc*, %struct.VEC_rtx_gc* }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.section = type { %struct.unnamed_section }
+	%struct.u = type { %struct.block_symbol }
+	%struct.unnamed_section = type { %struct.cgraph_rtl_info, void (i8*)*, i8*, %struct.section* }
+
+declare %struct.rtvec_def* @gen_rtvec(i32, ...)
+
+declare %struct.rtx_def* @plus_constant(%struct.rtx_def*, i64)
+
+declare %struct.rtx_def* @gen_rtx_fmt_Ei(i32, i32, %struct.rtvec_def*, i32)
+
+declare i32 @local_symbolic_operand(%struct.rtx_def*, i32)
+
+define %struct.rtx_def* @legitimize_pic_address(%struct.rtx_def* %orig, %struct.rtx_def* %reg) nounwind {
+entry:
+	%addr = alloca %struct.rtx_def*		; <%struct.rtx_def**> [#uses=5]
+	%iftmp.1532 = alloca %struct.rtx_def*		; <%struct.rtx_def**> [#uses=3]
+	store %struct.rtx_def* %orig, %struct.rtx_def** null
+	%0 = load %struct.rtx_def** null, align 4		; <%struct.rtx_def*> [#uses=0]
+	br i1 false, label %bb96, label %bb59
+
+bb59:		; preds = %entry
+	%1 = load %struct.rtx_def** %addr, align 4		; <%struct.rtx_def*> [#uses=1]
+	%2 = call i32 @local_symbolic_operand(%struct.rtx_def* %1, i32 0) nounwind		; <i32> [#uses=0]
+	br i1 false, label %bb96, label %bb63
+
+bb63:		; preds = %bb59
+	br i1 false, label %bb64, label %bb74
+
+bb64:		; preds = %bb63
+	br i1 false, label %bb72, label %bb65
+
+bb65:		; preds = %bb64
+	br label %bb72
+
+bb72:		; preds = %bb65, %bb64
+	br label %bb74
+
+bb74:		; preds = %bb72, %bb63
+	br i1 false, label %bb75, label %bb76
+
+bb75:		; preds = %bb74
+	br label %bb76
+
+bb76:		; preds = %bb75, %bb74
+	br i1 false, label %bb77, label %bb84
+
+bb77:		; preds = %bb76
+	%3 = getelementptr [1 x %struct.cgraph_rtl_info]* null, i32 0, i32 0		; <%struct.cgraph_rtl_info*> [#uses=0]
+	unreachable
+
+bb84:		; preds = %bb76
+	br i1 false, label %bb85, label %bb86
+
+bb85:		; preds = %bb84
+	br label %bb87
+
+bb86:		; preds = %bb84
+	br label %bb87
+
+bb87:		; preds = %bb86, %bb85
+	%4 = call %struct.rtx_def* @gen_rtx_fmt_Ei(i32 16, i32 0, %struct.rtvec_def* null, i32 1) nounwind		; <%struct.rtx_def*> [#uses=0]
+	br i1 false, label %bb89, label %bb90
+
+bb89:		; preds = %bb87
+	br label %bb91
+
+bb90:		; preds = %bb87
+	br label %bb91
+
+bb91:		; preds = %bb90, %bb89
+	br i1 false, label %bb92, label %bb93
+
+bb92:		; preds = %bb91
+	br label %bb94
+
+bb93:		; preds = %bb91
+	br label %bb94
+
+bb94:		; preds = %bb93, %bb92
+	unreachable
+
+bb96:		; preds = %bb59, %entry
+	%5 = load %struct.rtx_def** %addr, align 4		; <%struct.rtx_def*> [#uses=1]
+	%6 = getelementptr %struct.rtx_def* %5, i32 0, i32 0		; <i16*> [#uses=1]
+	%7 = load i16* %6, align 2		; <i16> [#uses=0]
+	br i1 false, label %bb147, label %bb97
+
+bb97:		; preds = %bb96
+	%8 = load %struct.rtx_def** %addr, align 4		; <%struct.rtx_def*> [#uses=0]
+	br i1 false, label %bb147, label %bb99
+
+bb99:		; preds = %bb97
+	unreachable
+
+bb147:		; preds = %bb97, %bb96
+	%9 = load %struct.rtx_def** %addr, align 4		; <%struct.rtx_def*> [#uses=1]
+	%10 = getelementptr %struct.rtx_def* %9, i32 0, i32 0		; <i16*> [#uses=1]
+	%11 = load i16* %10, align 2		; <i16> [#uses=0]
+	br i1 false, label %bb164, label %bb148
+
+bb148:		; preds = %bb147
+	br i1 false, label %bb164, label %bb149
+
+bb149:		; preds = %bb148
+	br i1 false, label %bb150, label %bb152
+
+bb150:		; preds = %bb149
+	unreachable
+
+bb152:		; preds = %bb149
+	br label %bb164
+
+bb164:		; preds = %bb152, %bb148, %bb147
+	%12 = getelementptr [1 x %struct.cgraph_rtl_info]* null, i32 0, i32 1		; <%struct.cgraph_rtl_info*> [#uses=0]
+	br i1 false, label %bb165, label %bb166
+
+bb165:		; preds = %bb164
+	br label %bb167
+
+bb166:		; preds = %bb164
+	br label %bb167
+
+bb167:		; preds = %bb166, %bb165
+	br i1 false, label %bb211, label %bb168
+
+bb168:		; preds = %bb167
+	br i1 false, label %bb211, label %bb170
+
+bb170:		; preds = %bb168
+	br i1 false, label %bb172, label %bb181
+
+bb172:		; preds = %bb170
+	br i1 false, label %bb179, label %bb174
+
+bb174:		; preds = %bb172
+	br i1 false, label %bb177, label %bb175
+
+bb175:		; preds = %bb174
+	br i1 false, label %bb177, label %bb176
+
+bb176:		; preds = %bb175
+	br label %bb178
+
+bb177:		; preds = %bb175, %bb174
+	br label %bb178
+
+bb178:		; preds = %bb177, %bb176
+	br label %bb180
+
+bb179:		; preds = %bb172
+	br label %bb180
+
+bb180:		; preds = %bb179, %bb178
+	br label %bb181
+
+bb181:		; preds = %bb180, %bb170
+	%13 = call %struct.rtvec_def* (i32, ...)* @gen_rtvec(i32 1, %struct.rtx_def* null) nounwind		; <%struct.rtvec_def*> [#uses=0]
+	unreachable
+
+bb211:		; preds = %bb168, %bb167
+	%14 = load %struct.rtx_def** %addr, align 4		; <%struct.rtx_def*> [#uses=0]
+	%15 = getelementptr [1 x %struct.cgraph_rtl_info]* null, i32 0, i32 0		; <%struct.cgraph_rtl_info*> [#uses=0]
+	store %struct.rtx_def* null, %struct.rtx_def** null, align 4
+	br i1 false, label %bb212, label %bb213
+
+bb212:		; preds = %bb211
+	store %struct.rtx_def* null, %struct.rtx_def** %iftmp.1532, align 4
+	br label %bb214
+
+bb213:		; preds = %bb211
+	store %struct.rtx_def* null, %struct.rtx_def** %iftmp.1532, align 4
+	br label %bb214
+
+bb214:		; preds = %bb213, %bb212
+	%16 = bitcast %struct.block_symbol* null to [1 x %struct.cgraph_rtl_info]*		; <[1 x %struct.cgraph_rtl_info]*> [#uses=1]
+	%17 = getelementptr [1 x %struct.cgraph_rtl_info]* %16, i32 0, i32 1		; <%struct.cgraph_rtl_info*> [#uses=0]
+	%18 = load %struct.rtx_def** %iftmp.1532, align 4		; <%struct.rtx_def*> [#uses=0]
+	%19 = getelementptr %struct.rtx_def* null, i32 0, i32 3		; <%struct.u*> [#uses=1]
+	%20 = getelementptr %struct.u* %19, i32 0, i32 0		; <%struct.block_symbol*> [#uses=1]
+	%21 = bitcast %struct.block_symbol* %20 to [1 x i64]*		; <[1 x i64]*> [#uses=1]
+	%22 = getelementptr [1 x i64]* %21, i32 0, i32 0		; <i64*> [#uses=0]
+	%23 = call %struct.rtx_def* @plus_constant(%struct.rtx_def* null, i64 0) nounwind		; <%struct.rtx_def*> [#uses=0]
+	unreachable
+}
diff --git a/final/test/Transforms/GVN/2009-03-10-PREOnVoid.ll b/final/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
new file mode 100644
index 00000000000..89d6a5f982b
--- /dev/null
+++ b/final/test/Transforms/GVN/2009-03-10-PREOnVoid.ll
@@ -0,0 +1,82 @@
+; RUN: opt < %s -gvn -disable-output
+; PR3775
+
+; ModuleID = 'bugpoint-reduced-simplified.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%"struct.__gnu_cxx::hash<void*>" = type <{ i8 }>
+	%struct.__sched_param = type { i32 }
+	%struct._pthread_descr_struct = type opaque
+	%struct.pthread_attr_t = type { i32, i32, %struct.__sched_param, i32, i32, i32, i32, i8*, i32 }
+	%struct.pthread_mutex_t = type { i32, i32, %struct._pthread_descr_struct*, i32, %llvm.dbg.anchor.type }
+	%"struct.std::_Rb_tree<void*,std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > >,std::_Select1st<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,std::less<void*>,std::allocator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > > >" = type { %"struct.std::_Rb_tree<void*,std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > >,std::_Select1st<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,std::less<void*>,std::allocator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > > >::_Rb_tree_impl<std::less<void*>,false>" }
+	%"struct.std::_Rb_tree<void*,std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > >,std::_Select1st<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,std::less<void*>,std::allocator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > > >::_Rb_tree_impl<std::less<void*>,false>" = type { %"struct.__gnu_cxx::hash<void*>", %"struct.std::_Rb_tree_node_base", i32 }
+	%"struct.std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >" = type { %"struct.std::_Rb_tree_node_base"* }
+	%"struct.std::_Rb_tree_node_base" = type { i32, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }
+	%"struct.std::pair<std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,bool>" = type { %"struct.std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >", i8 }
+	%"struct.std::pair<void* const,void*>" = type { i8*, i8* }
+
+@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
+@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
+@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_createPmPK16__pthread_attr_sPFPvS3_ES3_ = alias weak i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct.__sched_param*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%struct.__sched_param*)* @pthread_mutexattr_init		; <i32 (%struct.__sched_param*)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%struct.__sched_param*, i32)* @pthread_mutexattr_settype		; <i32 (%struct.__sched_param*, i32)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%struct.__sched_param*)* @pthread_mutexattr_destroy		; <i32 (%struct.__sched_param*)*> [#uses=0]
+
+declare fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind readnone
+
+define fastcc void @_ZNSt8_Rb_treeIPvSt4pairIKS0_S0_ESt10_Select1stIS3_ESt4lessIS0_ESaIS3_EE16_M_insert_uniqueERKS3_(%"struct.std::pair<std::_Rb_tree_iterator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,bool>"* noalias nocapture sret %agg.result, %"struct.std::_Rb_tree<void*,std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > >,std::_Select1st<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > >,std::less<void*>,std::allocator<std::pair<void* const, std::vector<ShadowInfo, std::allocator<ShadowInfo> > > > >"* %this, %"struct.std::pair<void* const,void*>"* %__v) nounwind {
+entry:
+	br i1 false, label %bb7, label %bb
+
+bb:		; preds = %bb, %entry
+	br i1 false, label %bb5, label %bb
+
+bb5:		; preds = %bb
+	call fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind
+	br i1 false, label %bb11, label %bb7
+
+bb7:		; preds = %bb5, %entry
+	br label %bb11
+
+bb11:		; preds = %bb7, %bb5
+	call fastcc void @_ZNSt10_Select1stISt4pairIKPvS1_EEC1Ev() nounwind
+	unreachable
+}
+
+declare i32 @pthread_once(i32*, void ()*)
+
+declare i8* @pthread_getspecific(i32)
+
+declare i32 @pthread_setspecific(i32, i8*)
+
+declare i32 @pthread_create(i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
+
+declare i32 @pthread_cancel(i32)
+
+declare i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
+
+declare i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
+
+declare i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
+
+declare i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct.__sched_param*)
+
+declare i32 @pthread_key_create(i32*, void (i8*)*)
+
+declare i32 @pthread_key_delete(i32)
+
+declare i32 @pthread_mutexattr_init(%struct.__sched_param*)
+
+declare i32 @pthread_mutexattr_settype(%struct.__sched_param*, i32)
+
+declare i32 @pthread_mutexattr_destroy(%struct.__sched_param*)
diff --git a/final/test/Transforms/GVN/2009-06-17-InvalidPRE.ll b/final/test/Transforms/GVN/2009-06-17-InvalidPRE.ll
new file mode 100644
index 00000000000..6ac6072a927
--- /dev/null
+++ b/final/test/Transforms/GVN/2009-06-17-InvalidPRE.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -gvn -enable-load-pre -S | not grep pre1
+; GVN load pre was hoisting the loads at %13 and %16 up to bb4.outer.  
+; This is invalid as it bypasses the check for %m.0.ph==null in bb4. 
+; ModuleID = 'mbuf.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+  %struct.mbuf = type { %struct.mbuf*, %struct.mbuf*, i32, i8*, i16, i16, i32 }
+
+define void @m_adj(%struct.mbuf* %mp, i32 %req_len) nounwind optsize {
+entry:
+  %0 = icmp eq %struct.mbuf* %mp, null    ; <i1> [#uses=1]
+  %1 = icmp slt i32 %req_len, 0   ; <i1> [#uses=1]
+  %or.cond = or i1 %1, %0   ; <i1> [#uses=1]
+  br i1 %or.cond, label %return, label %bb4.preheader
+
+bb4.preheader:    ; preds = %entry
+  br label %bb4.outer
+
+bb2:    ; preds = %bb1
+  %2 = sub i32 %len.0, %13   ; <i32> [#uses=1]
+  %3 = getelementptr %struct.mbuf* %m.0.ph, i32 0, i32 2    ; <i32*> [#uses=1]
+  store i32 0, i32* %3, align 4
+  %4 = getelementptr %struct.mbuf* %m.0.ph, i32 0, i32 0    ; <%struct.mbuf**> [#uses=1]
+  %5 = load %struct.mbuf** %4, align 4    ; <%struct.mbuf*> [#uses=1]
+  br label %bb4.outer
+
+bb4.outer:    ; preds = %bb4.preheader, %bb2
+  %m.0.ph = phi %struct.mbuf* [ %5, %bb2 ], [ %mp, %bb4.preheader ]   ; <%struct.mbuf*> [#uses=7]
+  %len.0.ph = phi i32 [ %2, %bb2 ], [ %req_len, %bb4.preheader ]    ; <i32> [#uses=1]
+  %6 = icmp ne %struct.mbuf* %m.0.ph, null    ; <i1> [#uses=1]
+  %7 = getelementptr %struct.mbuf* %m.0.ph, i32 0, i32 2    ; <i32*> [#uses=1]
+  %8 = getelementptr %struct.mbuf* %m.0.ph, i32 0, i32 2   ; <i32*> [#uses=1]
+  %9 = getelementptr %struct.mbuf* %m.0.ph, i32 0, i32 3   ; <i8**> [#uses=1]
+  %10 = getelementptr %struct.mbuf* %m.0.ph, i32 0, i32 3   ; <i8**> [#uses=1]
+  br label %bb4
+
+bb4:    ; preds = %bb4.outer, %bb3
+  %len.0 = phi i32 [ 0, %bb3 ], [ %len.0.ph, %bb4.outer ]   ; <i32> [#uses=6]
+  %11 = icmp sgt i32 %len.0, 0    ; <i1> [#uses=1]
+  %12 = and i1 %11, %6    ; <i1> [#uses=1]
+  br i1 %12, label %bb1, label %bb7
+
+bb1:    ; preds = %bb4
+  %13 = load i32* %7, align 4    ; <i32> [#uses=3]
+  %14 = icmp sgt i32 %13, %len.0    ; <i1> [#uses=1]
+  br i1 %14, label %bb3, label %bb2
+
+bb3:    ; preds = %bb1
+  %15 = sub i32 %13, %len.0    ; <i32> [#uses=1]
+  store i32 %15, i32* %8, align 4
+  %16 = load i8** %9, align 4    ; <i8*> [#uses=1]
+  %17 = getelementptr i8* %16, i32 %len.0   ; <i8*> [#uses=1]
+  store i8* %17, i8** %10, align 4
+  br label %bb4
+
+bb7:    ; preds = %bb4
+  %18 = getelementptr %struct.mbuf* %mp, i32 0, i32 5   ; <i16*> [#uses=1]
+  %19 = load i16* %18, align 2    ; <i16> [#uses=1]
+  %20 = zext i16 %19 to i32   ; <i32> [#uses=1]
+  %21 = and i32 %20, 2    ; <i32> [#uses=1]
+  %22 = icmp eq i32 %21, 0    ; <i1> [#uses=1]
+  br i1 %22, label %return, label %bb8
+
+bb8:    ; preds = %bb7
+  %23 = sub i32 %req_len, %len.0    ; <i32> [#uses=1]
+  %24 = getelementptr %struct.mbuf* %mp, i32 0, i32 6   ; <i32*> [#uses=1]
+  store i32 %23, i32* %24, align 4
+  ret void
+
+return:   ; preds = %bb7, %entry
+  ret void
+}
diff --git a/final/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll b/final/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll
new file mode 100644
index 00000000000..f079108b9bd
--- /dev/null
+++ b/final/test/Transforms/GVN/2009-07-13-MemDepSortFail.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -gvn | llvm-dis
+; PR4256
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%struct.cset = type { i8*, i8, i8, i32, i8* }
+	%struct.lmat = type { %struct.re_guts*, i32, %llvm.dbg.anchor.type*, i8*, i8*, i8*, i8*, i8**, i32, i8*, i8*, i8*, i8*, i8* }
+	%struct.re_guts = type { i32*, %struct.cset*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, [1 x i8] }
+
+define i8* @lbackref(%struct.lmat* %m, i8* %start, i8* %stop, i32 %startst, i32 %stopst, i32 %lev, i32 %rec) nounwind {
+entry:
+	br label %bb63
+
+bb:		; preds = %bb63
+	switch i32 0, label %bb62 [
+		i32 268435456, label %bb2
+		i32 805306368, label %bb9
+		i32 -1610612736, label %bb51
+	]
+
+bb2:		; preds = %bb
+	br label %bb62
+
+bb9:		; preds = %bb
+	%0 = load i8* %sp.1, align 1		; <i8> [#uses=0]
+	br label %bb62
+
+bb51:		; preds = %bb
+	%1 = load i8* %sp.1, align 1		; <i8> [#uses=0]
+	ret i8* null
+
+bb62:		; preds = %bb9, %bb2, %bb
+	br label %bb63
+
+bb63:		; preds = %bb84, %bb69, %bb62, %entry
+	%sp.1 = phi i8* [ null, %bb62 ], [ %sp.1.lcssa, %bb84 ], [ %start, %entry ], [ %sp.1.lcssa, %bb69 ]		; <i8*> [#uses=3]
+	br i1 false, label %bb, label %bb65
+
+bb65:		; preds = %bb63
+	%sp.1.lcssa = phi i8* [ %sp.1, %bb63 ]		; <i8*> [#uses=4]
+	br i1 false, label %bb66, label %bb69
+
+bb66:		; preds = %bb65
+	ret i8* null
+
+bb69:		; preds = %bb65
+	switch i32 0, label %bb108.loopexit2.loopexit.loopexit [
+		i32 1342177280, label %bb63
+		i32 1476395008, label %bb84
+		i32 1879048192, label %bb104
+		i32 2013265920, label %bb93
+	]
+
+bb84:		; preds = %bb69
+	%2 = tail call i8* @lbackref(%struct.lmat* %m, i8* %sp.1.lcssa, i8* %stop, i32 0, i32 %stopst, i32 0, i32 0) nounwind		; <i8*> [#uses=0]
+	br label %bb63
+
+bb93:		; preds = %bb69
+	ret i8* null
+
+bb104:		; preds = %bb69
+	%sp.1.lcssa.lcssa33 = phi i8* [ %sp.1.lcssa, %bb69 ]		; <i8*> [#uses=0]
+	unreachable
+
+bb108.loopexit2.loopexit.loopexit:		; preds = %bb69
+	ret i8* null
+}
diff --git a/final/test/Transforms/GVN/2009-11-12-MemDepMallocBitCast.ll b/final/test/Transforms/GVN/2009-11-12-MemDepMallocBitCast.ll
new file mode 100644
index 00000000000..b433297bba6
--- /dev/null
+++ b/final/test/Transforms/GVN/2009-11-12-MemDepMallocBitCast.ll
@@ -0,0 +1,15 @@
+; Test to make sure malloc's bitcast does not block detection of a store 
+; to aliased memory; GVN should not optimize away the load in this program.
+; RUN: opt < %s -gvn -S | FileCheck %s
+
+define i64 @test() {
+  %1 = tail call i8* @malloc(i64 mul (i64 4, i64 ptrtoint (i64* getelementptr (i64* null, i64 1) to i64))) ; <i8*> [#uses=2]
+  store i8 42, i8* %1
+  %X = bitcast i8* %1 to i64*                     ; <i64*> [#uses=1]
+  %Y = load i64* %X                               ; <i64> [#uses=1]
+  ret i64 %Y
+; CHECK: %Y = load i64* %X
+; CHECK: ret i64 %Y
+}
+
+declare noalias i8* @malloc(i64)
diff --git a/final/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll b/final/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll
new file mode 100644
index 00000000000..d6e1c6b76d0
--- /dev/null
+++ b/final/test/Transforms/GVN/2010-03-31-RedundantPHIs.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+
+; CHECK-NOT: load
+; CHECK-NOT: phi
+
+define i8* @cat(i8* %s1, ...) nounwind {
+entry:
+  br i1 undef, label %bb, label %bb3
+
+bb:                                               ; preds = %entry
+  unreachable
+
+bb3:                                              ; preds = %entry
+  store i8* undef, i8** undef, align 4
+  br i1 undef, label %bb5, label %bb6
+
+bb5:                                              ; preds = %bb3
+  unreachable
+
+bb6:                                              ; preds = %bb3
+  br label %bb12
+
+bb8:                                              ; preds = %bb12
+  br i1 undef, label %bb9, label %bb10
+
+bb9:                                              ; preds = %bb8
+  %0 = load i8** undef, align 4                   ; <i8*> [#uses=0]
+  %1 = load i8** undef, align 4                   ; <i8*> [#uses=0]
+  br label %bb11
+
+bb10:                                             ; preds = %bb8
+  br label %bb11
+
+bb11:                                             ; preds = %bb10, %bb9
+  br label %bb12
+
+bb12:                                             ; preds = %bb11, %bb6
+  br i1 undef, label %bb8, label %bb13
+
+bb13:                                             ; preds = %bb12
+  ret i8* undef
+}
diff --git a/final/test/Transforms/GVN/2010-05-08-OneBit.ll b/final/test/Transforms/GVN/2010-05-08-OneBit.ll
new file mode 100644
index 00000000000..1809cf03f90
--- /dev/null
+++ b/final/test/Transforms/GVN/2010-05-08-OneBit.ll
@@ -0,0 +1,63 @@
+; RUN: opt < %s -gvn
+; PR7052
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main(i32 %argc, i8** nocapture %argv) {
+entry:
+  %0 = getelementptr inbounds i8* undef, i64 5    ; <i8*> [#uses=1]
+  %1 = bitcast i8* %0 to i32*                     ; <i32*> [#uses=1]
+  store i32 undef, i32* %1, align 1
+  br i1 undef, label %k121.i.i, label %l117.i.i
+
+l117.i.i:                                         ; preds = %entry
+  invoke fastcc void @foo()
+          to label %.noexc5 unwind label %landing_pad
+
+.noexc5:                                          ; preds = %l117.i.i
+  unreachable
+
+k121.i.i:                                         ; preds = %entry
+  br i1 undef, label %l129.i.i, label %k133.i.i
+
+l129.i.i:                                         ; preds = %k121.i.i
+  invoke fastcc void @foo()
+          to label %.noexc7 unwind label %landing_pad
+
+.noexc7:                                          ; preds = %l129.i.i
+  unreachable
+
+k133.i.i:                                         ; preds = %k121.i.i
+  %2 = getelementptr i8* undef, i64 5             ; <i8*> [#uses=1]
+  %3 = bitcast i8* %2 to i1*                      ; <i1*> [#uses=1]
+  %4 = load i1* %3                                ; <i1> [#uses=1]
+  br i1 %4, label %k151.i.i, label %l147.i.i
+
+l147.i.i:                                         ; preds = %k133.i.i
+  invoke fastcc void @foo()
+          to label %.noexc10 unwind label %landing_pad
+
+.noexc10:                                         ; preds = %l147.i.i
+  unreachable
+
+k151.i.i:                                         ; preds = %k133.i.i
+  ret i32 0
+
+landing_pad:                                      ; preds = %l147.i.i, %l129.i.i, %l117.i.i
+  switch i32 undef, label %fin [
+    i32 1, label %catch1
+    i32 2, label %catch
+  ]
+
+fin:                                              ; preds = %landing_pad
+  unreachable
+
+catch:                                            ; preds = %landing_pad
+  ret i32 1
+
+catch1:                                           ; preds = %landing_pad
+  ret i32 2
+}
+
+declare fastcc void @foo()
diff --git a/final/test/Transforms/GVN/2010-11-13-Simplify.ll b/final/test/Transforms/GVN/2010-11-13-Simplify.ll
new file mode 100644
index 00000000000..07585a20b6c
--- /dev/null
+++ b/final/test/Transforms/GVN/2010-11-13-Simplify.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+
+declare i32 @foo(i32) readnone
+
+define i1 @bar() {
+; CHECK: @bar
+  %a = call i32 @foo (i32 0) readnone
+  %b = call i32 @foo (i32 0) readnone
+  %c = and i32 %a, %b
+  %x = call i32 @foo (i32 %a) readnone
+  %y = call i32 @foo (i32 %c) readnone
+  %z = icmp eq i32 %x, %y
+  ret i1 %z
+; CHECK: ret i1 true
+} 
diff --git a/final/test/Transforms/GVN/basic.ll b/final/test/Transforms/GVN/basic.ll
new file mode 100644
index 00000000000..1decafac8c4
--- /dev/null
+++ b/final/test/Transforms/GVN/basic.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -gvn -S | not grep {%z2 =}
+
+define i32 @main() {
+block1:
+	%z1 = bitcast i32 0 to i32
+	br label %block2
+block2:
+  %z2 = bitcast i32 0 to i32
+  ret i32 %z2
+}
diff --git a/final/test/Transforms/GVN/bitcast-of-call.ll b/final/test/Transforms/GVN/bitcast-of-call.ll
new file mode 100644
index 00000000000..55b4b6e9d31
--- /dev/null
+++ b/final/test/Transforms/GVN/bitcast-of-call.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -gvn -S | not grep tmp2
+; PR2213
+
+define i32* @f(i8* %x) {
+entry:
+        %tmp = call i8* @m( i32 12 )            ; <i8*> [#uses=2]
+        %tmp1 = bitcast i8* %tmp to i32*                ; <i32*> [#uses=0]
+        %tmp2 = bitcast i8* %tmp to i32*                ; <i32*> [#uses=0]
+        ret i32* %tmp2
+}
+
+declare i8* @m(i32)
diff --git a/final/test/Transforms/GVN/calls-nonlocal.ll b/final/test/Transforms/GVN/calls-nonlocal.ll
new file mode 100644
index 00000000000..24ef2e9ec41
--- /dev/null
+++ b/final/test/Transforms/GVN/calls-nonlocal.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -basicaa -gvn -S | grep strlen | count 2
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+
+define i32 @test(i32 %g, i8* %P) nounwind  {
+entry:
+	%tmp2 = call i32 @strlen( i8* %P ) nounwind readonly 		; <i32> [#uses=1]
+	%tmp3 = icmp eq i32 %tmp2, 100		; <i1> [#uses=1]
+	%tmp34 = zext i1 %tmp3 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp34, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb6
+
+bb:		; preds = %entry
+	br label %bb27
+
+bb6:		; preds = %entry
+	%tmp8 = add i32 %g, 42		; <i32> [#uses=2]
+	%tmp10 = call i32 @strlen( i8* %P ) nounwind readonly 		; <i32> [#uses=1]
+	%tmp11 = icmp eq i32 %tmp10, 100		; <i1> [#uses=1]
+	%tmp1112 = zext i1 %tmp11 to i8		; <i8> [#uses=1]
+	%toBool13 = icmp ne i8 %tmp1112, 0		; <i1> [#uses=1]
+	br i1 %toBool13, label %bb14, label %bb16
+
+bb14:		; preds = %bb6
+	br label %bb27
+
+bb16:		; preds = %bb6
+	%tmp18 = mul i32 %tmp8, 2		; <i32> [#uses=1]
+	%tmp20 = call i32 @strlen( i8* %P ) nounwind readonly 		; <i32> [#uses=1]
+	%tmp21 = icmp eq i32 %tmp20, 100		; <i1> [#uses=1]
+	%tmp2122 = zext i1 %tmp21 to i8		; <i8> [#uses=1]
+	%toBool23 = icmp ne i8 %tmp2122, 0		; <i1> [#uses=1]
+	br i1 %toBool23, label %bb24, label %bb26
+
+bb24:		; preds = %bb16
+	br label %bb27
+
+bb26:		; preds = %bb16
+	br label %bb27
+
+bb27:		; preds = %bb26, %bb24, %bb14, %bb
+	%tmp.0 = phi i32 [ 11, %bb26 ], [ %tmp18, %bb24 ], [ %tmp8, %bb14 ], [ %g, %bb ]		; <i32> [#uses=1]
+	br label %return
+
+return:		; preds = %bb27
+	ret i32 %tmp.0
+}
+
+declare i32 @strlen(i8*) nounwind readonly 
diff --git a/final/test/Transforms/GVN/calls-readonly.ll b/final/test/Transforms/GVN/calls-readonly.ll
new file mode 100644
index 00000000000..97ec91512e3
--- /dev/null
+++ b/final/test/Transforms/GVN/calls-readonly.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -basicaa -gvn -S | grep {call.*strlen} | count 1
+; Should delete the second call to strlen even though the intervening strchr call exists.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define i8* @test(i8* %P, i8* %Q, i32 %x, i32 %y) nounwind readonly {
+entry:
+  %0 = tail call i32 @strlen(i8* %P)              ; <i32> [#uses=2]
+  %1 = icmp eq i32 %0, 0                          ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %2 = sdiv i32 %x, %y                            ; <i32> [#uses=1]
+  br label %bb1
+
+bb1:                                              ; preds = %bb, %entry
+  %x_addr.0 = phi i32 [ %2, %bb ], [ %x, %entry ] ; <i32> [#uses=1]
+  %3 = tail call i8* @strchr(i8* %Q, i32 97)      ; <i8*> [#uses=1]
+  %4 = tail call i32 @strlen(i8* %P)              ; <i32> [#uses=1]
+  %5 = add i32 %x_addr.0, %0                      ; <i32> [#uses=1]
+  %.sum = sub i32 %5, %4                          ; <i32> [#uses=1]
+  %6 = getelementptr i8* %3, i32 %.sum            ; <i8*> [#uses=1]
+  ret i8* %6
+}
+
+declare i32 @strlen(i8*) nounwind readonly
+
+declare i8* @strchr(i8*, i32) nounwind readonly
diff --git a/final/test/Transforms/GVN/condprop.ll b/final/test/Transforms/GVN/condprop.ll
new file mode 100644
index 00000000000..be6c3498fe4
--- /dev/null
+++ b/final/test/Transforms/GVN/condprop.ll
@@ -0,0 +1,55 @@
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+
+@a = external global i32		; <i32*> [#uses=7]
+
+; CHECK: @foo
+define i32 @foo() nounwind {
+entry:
+	%0 = load i32* @a, align 4
+	%1 = icmp eq i32 %0, 4
+	br i1 %1, label %bb, label %bb1
+
+bb:		; preds = %entry
+	br label %bb8
+
+bb1:		; preds = %entry
+	%2 = load i32* @a, align 4
+	%3 = icmp eq i32 %2, 5
+	br i1 %3, label %bb2, label %bb3
+
+bb2:		; preds = %bb1
+	br label %bb8
+
+bb3:		; preds = %bb1
+	%4 = load i32* @a, align 4
+	%5 = icmp eq i32 %4, 4
+; CHECK: br i1 false, label %bb4, label %bb5
+	br i1 %5, label %bb4, label %bb5
+
+bb4:		; preds = %bb3
+	%6 = load i32* @a, align 4
+	%7 = add i32 %6, 5
+	br label %bb8
+
+bb5:		; preds = %bb3
+	%8 = load i32* @a, align 4
+	%9 = icmp eq i32 %8, 5
+; CHECK: br i1 false, label %bb6, label %bb7
+	br i1 %9, label %bb6, label %bb7
+
+bb6:		; preds = %bb5
+	%10 = load i32* @a, align 4
+	%11 = add i32 %10, 4
+	br label %bb8
+
+bb7:		; preds = %bb5
+	%12 = load i32* @a, align 4
+	br label %bb8
+
+bb8:		; preds = %bb7, %bb6, %bb4, %bb2, %bb
+	%.0 = phi i32 [ %12, %bb7 ], [ %11, %bb6 ], [ %7, %bb4 ], [ 4, %bb2 ], [ 5, %bb ]
+	br label %return
+
+return:		; preds = %bb8
+	ret i32 %.0
+}
\ No newline at end of file
diff --git a/final/test/Transforms/GVN/crash-no-aa.ll b/final/test/Transforms/GVN/crash-no-aa.ll
new file mode 100644
index 00000000000..dae65ddb2fe
--- /dev/null
+++ b/final/test/Transforms/GVN/crash-no-aa.ll
@@ -0,0 +1,16 @@
+; RUN: opt -no-aa -gvn -S %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v1
+28:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-freebsd8.0"
+
+; PR5744
+define i32 @test1({i16, i32} *%P) {
+  %P2 = getelementptr {i16, i32} *%P, i32 0, i32 0
+  store i16 42, i16* %P2
+
+  %P3 = getelementptr {i16, i32} *%P, i32 0, i32 1
+  %V = load i32* %P3
+  ret i32 %V
+}
+
diff --git a/final/test/Transforms/GVN/crash.ll b/final/test/Transforms/GVN/crash.ll
new file mode 100644
index 00000000000..4a3aa1c55a8
--- /dev/null
+++ b/final/test/Transforms/GVN/crash.ll
@@ -0,0 +1,153 @@
+; RUN: opt -gvn %s -disable-output
+
+; PR5631
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0"
+
+define i32* @test1(i8* %name, i32 %namelen, i32* %o, i32 %expected_type) nounwind ssp {
+entry:
+  br i1 undef, label %if.end13, label %while.body.preheader
+
+
+if.end13:                                         ; preds = %if.then6
+  br label %while.body.preheader
+
+while.body.preheader:                             ; preds = %if.end13, %if.end
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.backedge, %while.body.preheader
+  %o.addr.0 = phi i32* [ undef, %while.body.preheader ], [ %o.addr.0.be, %while.body.backedge ] ; <i32*> [#uses=2]
+  br i1 false, label %return.loopexit, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %while.body
+  %tmp20 = bitcast i32* %o.addr.0 to i32*         ; <i32*> [#uses=1]
+  %tmp22 = load i32* %tmp20                       ; <i32> [#uses=0]
+  br i1 undef, label %land.lhs.true24, label %if.end31
+
+land.lhs.true24:                                  ; preds = %lor.lhs.false
+  %call28 = call i32* @parse_object(i8* undef) nounwind ; <i32*> [#uses=0]
+  br i1 undef, label %return.loopexit, label %if.end31
+
+if.end31:                                         ; preds = %land.lhs.true24, %lor.lhs.false
+  br i1 undef, label %return.loopexit, label %if.end41
+
+if.end41:                                         ; preds = %if.end31
+  %tmp43 = bitcast i32* %o.addr.0 to i32*         ; <i32*> [#uses=1]
+  %tmp45 = load i32* %tmp43                       ; <i32> [#uses=0]
+  br i1 undef, label %if.then50, label %if.else
+
+if.then50:                                        ; preds = %if.end41
+  %tmp53 = load i32** undef                       ; <i32*> [#uses=1]
+  br label %while.body.backedge
+
+if.else:                                          ; preds = %if.end41
+  br i1 undef, label %if.then62, label %if.else67
+
+if.then62:                                        ; preds = %if.else
+  br label %while.body.backedge
+
+while.body.backedge:                              ; preds = %if.then62, %if.then50
+  %o.addr.0.be = phi i32* [ %tmp53, %if.then50 ], [ undef, %if.then62 ] ; <i32*> [#uses=1]
+  br label %while.body
+
+if.else67:                                        ; preds = %if.else
+  ret i32* null
+
+return.loopexit:                                  ; preds = %if.end31, %land.lhs.true24, %while.body
+  ret i32* undef
+}
+
+declare i32* @parse_object(i8*)
+
+
+
+
+
+
+%struct.attribute_spec = type { i8*, i32, i32, i8, i8, i8 }
+
+@attribute_tables = external global [4 x %struct.attribute_spec*] ; <[4 x %struct.attribute_spec*]*> [#uses=2]
+
+define void @test2() nounwind {
+entry:
+  br label %bb69.i
+
+bb69.i:                                           ; preds = %bb57.i.preheader
+  %tmp4 = getelementptr inbounds [4 x %struct.attribute_spec*]* @attribute_tables, i32 0, i32 undef ; <%struct.attribute_spec**> [#uses=1]
+  %tmp3 = load %struct.attribute_spec** %tmp4, align 4 ; <%struct.attribute_spec*> [#uses=1]
+  br label %bb65.i
+
+bb65.i:                                           ; preds = %bb65.i.preheader, %bb64.i
+  %storemerge6.i = phi i32 [ 1, %bb64.i ], [ 0, %bb69.i ] ; <i32> [#uses=3]
+  %scevgep14 = getelementptr inbounds %struct.attribute_spec* %tmp3, i32 %storemerge6.i, i32 0 ; <i8**> [#uses=1]
+  %tmp2 = load i8** %scevgep14, align 4           ; <i8*> [#uses=0]
+  %tmp = load %struct.attribute_spec** %tmp4, align 4 ; <%struct.attribute_spec*> [#uses=1]
+  %scevgep1516 = getelementptr inbounds %struct.attribute_spec* %tmp, i32 %storemerge6.i, i32 0 ; <i8**> [#uses=0]
+  unreachable
+
+bb64.i:                                           ; Unreachable
+  br label %bb65.i
+
+bb66.i:                                           ; Unreachable
+  br label %bb69.i
+}
+
+
+
+; rdar://7438974
+
+@g = external global i64, align 8
+
+define i32* @test3() {
+do.end17.i:
+  %tmp18.i = load i7** undef
+  %tmp1 = bitcast i7* %tmp18.i to i8*
+  br i1 undef, label %do.body36.i, label %if.then21.i
+
+if.then21.i:
+  %tmp2 = bitcast i7* %tmp18.i to i8*
+  ret i32* undef
+
+do.body36.i:
+  %ivar38.i = load i64* @g 
+  %tmp3 = bitcast i7* %tmp18.i to i8*
+  %add.ptr39.sum.i = add i64 %ivar38.i, 8
+  %tmp40.i = getelementptr inbounds i8* %tmp3, i64 %add.ptr39.sum.i
+  %tmp4 = bitcast i8* %tmp40.i to i64*
+  %tmp41.i = load i64* %tmp4
+  br i1 undef, label %if.then48.i, label %do.body57.i
+
+if.then48.i:
+  %call54.i = call i32 @foo2()
+  br label %do.body57.i
+
+do.body57.i:
+  %tmp58.i = load i7** undef
+  %ivar59.i = load i64* @g
+  %tmp5 = bitcast i7* %tmp58.i to i8*
+  %add.ptr65.sum.i = add i64 %ivar59.i, 8
+  %tmp66.i = getelementptr inbounds i8* %tmp5, i64 %add.ptr65.sum.i
+  %tmp6 = bitcast i8* %tmp66.i to i64*
+  %tmp67.i = load i64* %tmp6
+  ret i32* undef
+}
+
+declare i32 @foo2()
+
+
+
+define i32 @test4() {
+entry:
+  ret i32 0
+  
+dead:
+  %P2 = getelementptr i32 *%P2, i32 52
+  %Q2 = getelementptr i32 *%Q2, i32 52
+  store i32 4, i32* %P2
+  %A = load i32* %Q2
+  br i1 true, label %dead, label %dead2
+  
+dead2:
+  ret i32 %A
+}
diff --git a/final/test/Transforms/GVN/dg.exp b/final/test/Transforms/GVN/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/GVN/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/GVN/invariant-simple.ll b/final/test/Transforms/GVN/invariant-simple.ll
new file mode 100644
index 00000000000..98ea48cdde3
--- /dev/null
+++ b/final/test/Transforms/GVN/invariant-simple.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define i8 @test(i8* %P) nounwind {
+; CHECK: @test
+; CHECK-NOT: load
+; CHECK: ret i8
+entry:
+  store i8 1, i8* %P
+  %0 = call {}* @llvm.invariant.start(i64 32, i8* %P)
+  %1 = tail call i32 @foo(i8* %P)
+  call void @llvm.invariant.end({}* %0, i64 32, i8* %P)
+  %2 = load i8* %P
+  ret i8 %2
+}
+
+define i8 @test2(i8* %P) nounwind {
+; CHECK: @test2
+; CHECK: store i8 1
+; CHECK: store i8 2
+; CHECK: ret i8 0
+entry:
+  store i8 1, i8* %P
+  %0 = call {}* @llvm.invariant.start(i64 32, i8* %P)
+  %1 = tail call i32 @bar(i8* %P)
+  call void @llvm.invariant.end({}* %0, i64 32, i8* %P)
+  store i8 2, i8* %P
+  ret i8 0
+}
+
+declare i32 @foo(i8*) nounwind 
+declare i32 @bar(i8*) nounwind readonly
+declare {}* @llvm.invariant.start(i64 %S, i8* nocapture %P) readonly
+declare void @llvm.invariant.end({}* %S, i64 %SS, i8* nocapture %P)
diff --git a/final/test/Transforms/GVN/lifetime-simple.ll b/final/test/Transforms/GVN/lifetime-simple.ll
new file mode 100644
index 00000000000..02f7bcc9e85
--- /dev/null
+++ b/final/test/Transforms/GVN/lifetime-simple.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define i8 @test(i8* %P) nounwind {
+; CHECK: lifetime.start
+; CHECK-NOT: load
+; CHECK: lifetime.end
+entry:
+  call void @llvm.lifetime.start(i64 32, i8* %P)
+  %0 = load i8* %P
+  store i8 1, i8* %P
+  call void @llvm.lifetime.end(i64 32, i8* %P)
+  %1 = load i8* %P
+  ret i8 %1
+}
+
+declare void @llvm.lifetime.start(i64 %S, i8* nocapture %P) readonly
+declare void @llvm.lifetime.end(i64 %S, i8* nocapture %P)
diff --git a/final/test/Transforms/GVN/load-constant-mem.ll b/final/test/Transforms/GVN/load-constant-mem.ll
new file mode 100644
index 00000000000..314c8069cac
--- /dev/null
+++ b/final/test/Transforms/GVN/load-constant-mem.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -basicaa -gvn -instcombine -S | grep {ret i32 0}
+; PR4189
+@G = external constant [4 x i32]
+
+define i32 @test(i8* %p, i32 %i) nounwind {
+entry:
+	%P = getelementptr [4 x i32]* @G, i32 0, i32 %i
+	%A = load i32* %P
+	store i8 4, i8* %p
+	%B = load i32* %P
+	%C = sub i32 %A, %B
+	ret i32 %C
+}
diff --git a/final/test/Transforms/GVN/load-pre-align.ll b/final/test/Transforms/GVN/load-pre-align.ll
new file mode 100644
index 00000000000..d8ad59f9df4
--- /dev/null
+++ b/final/test/Transforms/GVN/load-pre-align.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+
+@p = external global i32
+
+define i32 @test(i32 %n) nounwind {
+; CHECK: @test
+entry:
+  br label %for.cond
+
+; loads aligned greater than the memory should not be moved past conditionals
+; CHECK-NOT: load
+; CHECK: br i1
+
+for.cond:
+  %i.0 = phi i32 [ 0, %entry ], [ %indvar.next, %for.inc ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:
+; ...but PRE can still move the load out of for.end to here.
+; CHECK: for.cond.for.end_crit_edge:
+; CHECK-NEXT: load
+  br label %for.end
+
+for.body:
+  %tmp3 = load i32* @p, align 8
+  %dec = add i32 %tmp3, -1
+  store i32 %dec, i32* @p
+  %cmp6 = icmp slt i32 %dec, 0
+  br i1 %cmp6, label %for.body.for.end_crit_edge, label %for.inc
+
+for.body.for.end_crit_edge:
+  br label %for.end
+
+for.inc:
+  %indvar.next = add i32 %i.0, 1
+  br label %for.cond
+
+for.end:
+  %tmp9 = load i32* @p, align 8
+  ret i32 %tmp9
+}
diff --git a/final/test/Transforms/GVN/load-pre-licm.ll b/final/test/Transforms/GVN/load-pre-licm.ll
new file mode 100644
index 00000000000..63541ad181d
--- /dev/null
+++ b/final/test/Transforms/GVN/load-pre-licm.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -basicaa -gvn < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin11.0.0"
+
+@sortlist = external global [5001 x i32], align 4
+
+define void @Bubble() nounwind noinline {
+; CHECK: entry:
+; CHECK-NEXT: %tmp7.pre = load i32
+entry:
+  br label %while.body5
+
+; CHECK: while.body5:
+; CHECK: %tmp7 = phi i32
+; CHECK-NOT: %tmp7 = load i32
+while.body5:
+  %indvar = phi i32 [ 0, %entry ], [ %tmp6, %if.end ]
+  %tmp5 = add i32 %indvar, 2
+  %arrayidx9 = getelementptr [5001 x i32]* @sortlist, i32 0, i32 %tmp5
+  %tmp6 = add i32 %indvar, 1
+  %arrayidx = getelementptr [5001 x i32]* @sortlist, i32 0, i32 %tmp6
+  %tmp7 = load i32* %arrayidx, align 4
+  %tmp10 = load i32* %arrayidx9, align 4
+  %cmp11 = icmp sgt i32 %tmp7, %tmp10
+  br i1 %cmp11, label %if.then, label %if.end
+
+; CHECK: if.then:
+if.then:
+  store i32 %tmp10, i32* %arrayidx, align 4
+  store i32 %tmp7, i32* %arrayidx9, align 4
+  br label %if.end
+
+if.end:
+  %exitcond = icmp eq i32 %tmp6, 100
+  br i1 %exitcond, label %while.end.loopexit, label %while.body5
+
+while.end.loopexit:
+  ret void
+}
diff --git a/final/test/Transforms/GVN/local-pre.ll b/final/test/Transforms/GVN/local-pre.ll
new file mode 100644
index 00000000000..5f03984653a
--- /dev/null
+++ b/final/test/Transforms/GVN/local-pre.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -gvn -enable-pre -S | grep {b.pre}
+
+define i32 @main(i32 %p) {
+block1:
+  
+	br i1 true, label %block2, label %block3
+
+block2:
+ %a = add i32 %p, 1
+ br label %block4
+
+block3:
+  br label %block4
+
+block4:
+  %b = add i32 %p, 1
+  ret i32 %b
+}
diff --git a/final/test/Transforms/GVN/lpre-call-wrap-2.ll b/final/test/Transforms/GVN/lpre-call-wrap-2.ll
new file mode 100644
index 00000000000..e39f3ed87d1
--- /dev/null
+++ b/final/test/Transforms/GVN/lpre-call-wrap-2.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -basicaa -gvn -enable-load-pre %s | FileCheck %s
+;
+; The partially redundant load in bb1 should be hoisted to "bb".  This comes
+; from this C code (GCC PR 23455):
+;   unsigned outcnt;  extern void flush_outbuf(void);
+;   void bi_windup(unsigned char *outbuf, unsigned char bi_buf) {
+;     outbuf[outcnt] = bi_buf;
+;     if (outcnt == 16384)
+;       flush_outbuf();
+;     outbuf[outcnt] = bi_buf;
+;   }
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+@outcnt = common global i32 0		; <i32*> [#uses=3]
+
+define void @bi_windup(i8* %outbuf, i8 zeroext %bi_buf) nounwind {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = load i32* @outcnt, align 4		; <i32> [#uses=1]
+	%1 = getelementptr i8* %outbuf, i32 %0		; <i8*> [#uses=1]
+	store i8 %bi_buf, i8* %1, align 1
+	%2 = load i32* @outcnt, align 4		; <i32> [#uses=1]
+	%3 = icmp eq i32 %2, 16384		; <i1> [#uses=1]
+	br i1 %3, label %bb, label %bb1
+
+bb:		; preds = %entry
+	call void @flush_outbuf() nounwind
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+; CHECK: bb1:
+; CHECK-NEXT: phi
+; CHECK-NEXT: getelementptr
+	%4 = load i32* @outcnt, align 4		; <i32> [#uses=1]
+	%5 = getelementptr i8* %outbuf, i32 %4		; <i8*> [#uses=1]
+	store i8 %bi_buf, i8* %5, align 1
+	ret void
+}
+
+declare void @flush_outbuf()
diff --git a/final/test/Transforms/GVN/lpre-call-wrap.ll b/final/test/Transforms/GVN/lpre-call-wrap.ll
new file mode 100644
index 00000000000..40462798b53
--- /dev/null
+++ b/final/test/Transforms/GVN/lpre-call-wrap.ll
@@ -0,0 +1,55 @@
+; RUN: opt -S -gvn -enable-load-pre %s | FileCheck %s
+;
+; Make sure the load in bb3.backedge is removed and moved into bb1 after the 
+; call.  This makes the non-call case faster. 
+;
+; This test is derived from this C++ code (GCC PR 37810):
+; void g();
+; struct A { 
+;   int n; int m;
+;   A& operator++(void) { ++n; if (n == m) g(); return *this; }
+;   A() : n(0), m(0) { } 
+;   friend bool operator!=(A const& a1, A const& a2) { return a1.n != a2.n; }
+; };
+; void testfunction(A& iter) { A const end; while (iter != end) ++iter; }
+;
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+	%struct.A = type { i32, i32 }
+
+define void @_Z12testfunctionR1A(%struct.A* %iter) {
+entry:
+	%0 = getelementptr %struct.A* %iter, i32 0, i32 0		; <i32*> [#uses=3]
+	%1 = load i32* %0, align 4		; <i32> [#uses=2]
+	%2 = icmp eq i32 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %return, label %bb.nph
+
+bb.nph:		; preds = %entry
+	%3 = getelementptr %struct.A* %iter, i32 0, i32 1		; <i32*> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb3.backedge, %bb.nph
+	%.rle = phi i32 [ %1, %bb.nph ], [ %7, %bb3.backedge ]		; <i32> [#uses=1]
+	%4 = add i32 %.rle, 1		; <i32> [#uses=2]
+	store i32 %4, i32* %0, align 4
+	%5 = load i32* %3, align 4		; <i32> [#uses=1]
+	%6 = icmp eq i32 %4, %5		; <i1> [#uses=1]
+	br i1 %6, label %bb1, label %bb3.backedge
+
+bb1:		; preds = %bb
+	tail call void @_Z1gv()
+	br label %bb3.backedge
+
+bb3.backedge:		; preds = %bb, %bb1
+; CHECK: bb3.backedge:
+; CHECK-NEXT: phi
+; CHECK-NEXT: icmp
+	%7 = load i32* %0, align 4		; <i32> [#uses=2]
+	%8 = icmp eq i32 %7, 0		; <i1> [#uses=1]
+	br i1 %8, label %return, label %bb
+
+return:		; preds = %bb3.backedge, %entry
+	ret void
+}
+
+declare void @_Z1gv()
diff --git a/final/test/Transforms/GVN/mixed.ll b/final/test/Transforms/GVN/mixed.ll
new file mode 100644
index 00000000000..6bfada2f4d5
--- /dev/null
+++ b/final/test/Transforms/GVN/mixed.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -basicaa -gvn -S | not grep DEADLOAD
+; RUN: opt < %s -basicaa -gvn -S | not grep DEADGEP
+
+define i32 @main(i32** %p) {
+block1:
+	%z1 = load i32** %p
+	%z2 = getelementptr i32* %z1, i32 0
+	%z3 = load i32* %z2
+	%DEADLOAD = load i32** %p
+	%DEADGEP = getelementptr i32* %DEADLOAD, i32 0
+	%DEADLOAD2 = load i32* %DEADGEP
+	ret i32 %DEADLOAD2
+}
diff --git a/final/test/Transforms/GVN/non-local-offset.ll b/final/test/Transforms/GVN/non-local-offset.ll
new file mode 100644
index 00000000000..8eaa99933ab
--- /dev/null
+++ b/final/test/Transforms/GVN/non-local-offset.ll
@@ -0,0 +1,59 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+; GVN should ignore the store to p[1] to see that the load from p[0] is
+; fully redundant.
+
+; CHECK: @yes
+; CHECK: if.then:
+; CHECK-NEXT: store i32 0, i32* %q
+; CHECK-NEXT: ret void
+
+define void @yes(i1 %c, i32* %p, i32* %q) nounwind {
+entry:
+  store i32 0, i32* %p
+  %p1 = getelementptr inbounds i32* %p, i64 1
+  store i32 1, i32* %p1
+  br i1 %c, label %if.else, label %if.then
+
+if.then:
+  %t = load i32* %p
+  store i32 %t, i32* %q
+  ret void
+
+if.else:
+  ret void
+}
+
+; GVN should ignore the store to p[1] to see that the first load from p[0] is
+; fully redundant. However, the second load is larger, so it's not a simple
+; redundancy.
+
+; CHECK: @watch_out_for_size_change
+; CHECK: if.then:
+; CHECK-NEXT: store i32 0, i32* %q
+; CHECK-NEXT: ret void
+; CHECK: if.else:
+; CHECK: load i64* %pc
+; CHECK: store i64
+
+define void @watch_out_for_size_change(i1 %c, i32* %p, i32* %q) nounwind {
+entry:
+  store i32 0, i32* %p
+  %p1 = getelementptr inbounds i32* %p, i64 1
+  store i32 1, i32* %p1
+  br i1 %c, label %if.else, label %if.then
+
+if.then:
+  %t = load i32* %p
+  store i32 %t, i32* %q
+  ret void
+
+if.else:
+  %pc = bitcast i32* %p to i64*
+  %qc = bitcast i32* %q to i64*
+  %t64 = load i64* %pc
+  store i64 %t64, i64* %qc
+  ret void
+}
diff --git a/final/test/Transforms/GVN/nonescaping-malloc.ll b/final/test/Transforms/GVN/nonescaping-malloc.ll
new file mode 100644
index 00000000000..1d50205c685
--- /dev/null
+++ b/final/test/Transforms/GVN/nonescaping-malloc.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s -basicaa -gvn -stats -disable-output |& grep {Number of loads deleted}
+; rdar://7363102
+
+; GVN should be able to eliminate load %tmp22.i, because it is redundant with
+; load %tmp8.i. This requires being able to prove that %tmp7.i doesn't
+; alias the malloc'd value %tmp.i20.i.i, which it can do since %tmp7.i
+; is derived from %tmp5.i which is computed from a load, and %tmp.i20.i.i
+; is never stored and does not escape.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+%"struct.llvm::MallocAllocator" = type <{ i8 }>
+%"struct.llvm::StringMap<void*,llvm::MallocAllocator>" = type { %"struct.llvm::StringMapImpl", %"struct.llvm::MallocAllocator" }
+%"struct.llvm::StringMapEntry<void*>" = type { %"struct.llvm::StringMapEntryBase", i8* }
+%"struct.llvm::StringMapEntryBase" = type { i32 }
+%"struct.llvm::StringMapImpl" = type { %"struct.llvm::StringMapImpl::ItemBucket"*, i32, i32, i32, i32 }
+%"struct.llvm::StringMapImpl::ItemBucket" = type { i32, %"struct.llvm::StringMapEntryBase"* }
+%"struct.llvm::StringRef" = type { i8*, i64 }
+
+define %"struct.llvm::StringMapEntry<void*>"* @_Z3fooRN4llvm9StringMapIPvNS_15MallocAllocatorEEEPKc(%"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %X, i8* %P) ssp {
+entry:
+  %tmp = alloca %"struct.llvm::StringRef", align 8 ; <%"struct.llvm::StringRef"*> [#uses=3]
+  %tmp.i = getelementptr inbounds %"struct.llvm::StringRef"* %tmp, i64 0, i32 0 ; <i8**> [#uses=1]
+  store i8* %P, i8** %tmp.i, align 8
+  %tmp1.i = call i64 @strlen(i8* %P) nounwind readonly ; <i64> [#uses=1]
+  %tmp2.i = getelementptr inbounds %"struct.llvm::StringRef"* %tmp, i64 0, i32 1 ; <i64*> [#uses=1]
+  store i64 %tmp1.i, i64* %tmp2.i, align 8
+  %tmp1 = call %"struct.llvm::StringMapEntry<void*>"* @_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueERKNS_9StringRefE(%"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %X, %"struct.llvm::StringRef"* %tmp) ssp ; <%"struct.llvm::StringMapEntry<void*>"*> [#uses=1]
+  ret %"struct.llvm::StringMapEntry<void*>"* %tmp1
+}
+
+declare i64 @strlen(i8* nocapture) nounwind readonly
+
+declare noalias i8* @malloc(i64) nounwind
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+
+declare i32 @_ZN4llvm13StringMapImpl15LookupBucketForENS_9StringRefE(%"struct.llvm::StringMapImpl"*, i64, i64)
+
+define linkonce_odr %"struct.llvm::StringMapEntry<void*>"* @_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueERKNS_9StringRefE(%"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, %"struct.llvm::StringRef"* nocapture %Key) ssp align 2 {
+entry:
+  %elt = bitcast %"struct.llvm::StringRef"* %Key to i64* ; <i64*> [#uses=1]
+  %val = load i64* %elt                           ; <i64> [#uses=3]
+  %tmp = getelementptr inbounds %"struct.llvm::StringRef"* %Key, i64 0, i32 1 ; <i64*> [#uses=1]
+  %val2 = load i64* %tmp                          ; <i64> [#uses=2]
+  %tmp2.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0 ; <%"struct.llvm::StringMapImpl"*> [#uses=1]
+  %tmp3.i = tail call i32 @_ZN4llvm13StringMapImpl15LookupBucketForENS_9StringRefE(%"struct.llvm::StringMapImpl"* %tmp2.i, i64 %val, i64 %val2) ; <i32> [#uses=1]
+  %tmp4.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0, i32 0 ; <%"struct.llvm::StringMapImpl::ItemBucket"**> [#uses=1]
+  %tmp5.i = load %"struct.llvm::StringMapImpl::ItemBucket"** %tmp4.i, align 8 ; <%"struct.llvm::StringMapImpl::ItemBucket"*> [#uses=1]
+  %tmp6.i = zext i32 %tmp3.i to i64               ; <i64> [#uses=1]
+  %tmp7.i = getelementptr inbounds %"struct.llvm::StringMapImpl::ItemBucket"* %tmp5.i, i64 %tmp6.i, i32 1 ; <%"struct.llvm::StringMapEntryBase"**> [#uses=2]
+  %tmp8.i = load %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8 ; <%"struct.llvm::StringMapEntryBase"*> [#uses=3]
+  %tmp9.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp8.i, null ; <i1> [#uses=1]
+  %tmp13.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp8.i, inttoptr (i64 -1 to %"struct.llvm::StringMapEntryBase"*) ; <i1> [#uses=1]
+  %or.cond.i = or i1 %tmp9.i, %tmp13.i            ; <i1> [#uses=1]
+  br i1 %or.cond.i, label %bb4.i, label %bb6.i
+
+bb4.i:                                            ; preds = %entry
+  %tmp41.i = inttoptr i64 %val to i8*             ; <i8*> [#uses=2]
+  %tmp4.i35.i = getelementptr inbounds i8* %tmp41.i, i64 %val2 ; <i8*> [#uses=1]
+  %tmp.i.i = ptrtoint i8* %tmp4.i35.i to i64      ; <i64> [#uses=1]
+  %tmp1.i.i = trunc i64 %tmp.i.i to i32           ; <i32> [#uses=1]
+  %tmp3.i.i = trunc i64 %val to i32               ; <i32> [#uses=1]
+  %tmp4.i.i = sub i32 %tmp1.i.i, %tmp3.i.i        ; <i32> [#uses=3]
+  %tmp5.i.i = add i32 %tmp4.i.i, 17               ; <i32> [#uses=1]
+  %tmp8.i.i = zext i32 %tmp5.i.i to i64           ; <i64> [#uses=1]
+  %tmp.i20.i.i = tail call noalias i8* @malloc(i64 %tmp8.i.i) nounwind ; <i8*> [#uses=7]
+  %tmp10.i.i = bitcast i8* %tmp.i20.i.i to %"struct.llvm::StringMapEntry<void*>"* ; <%"struct.llvm::StringMapEntry<void*>"*> [#uses=2]
+  %tmp12.i.i = icmp eq i8* %tmp.i20.i.i, null     ; <i1> [#uses=1]
+  br i1 %tmp12.i.i, label %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i, label %bb.i.i
+
+bb.i.i:                                           ; preds = %bb4.i
+  %tmp.i.i.i.i = bitcast i8* %tmp.i20.i.i to i32* ; <i32*> [#uses=1]
+  store i32 %tmp4.i.i, i32* %tmp.i.i.i.i, align 4
+  %tmp1.i19.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 8 ; <i8*> [#uses=1]
+  %0 = bitcast i8* %tmp1.i19.i.i to i8**          ; <i8**> [#uses=1]
+  store i8* null, i8** %0, align 8
+  br label %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i
+
+_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i: ; preds = %bb4.i, %bb.i.i
+  %tmp.i18.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 16 ; <i8*> [#uses=1]
+  %tmp15.i.i = zext i32 %tmp4.i.i to i64          ; <i64> [#uses=2]
+  tail call void @llvm.memcpy.i64(i8* %tmp.i18.i.i, i8* %tmp41.i, i64 %tmp15.i.i, i32 1) nounwind
+  %tmp.i18.sum.i.i = add i64 %tmp15.i.i, 16       ; <i64> [#uses=1]
+  %tmp17.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 %tmp.i18.sum.i.i ; <i8*> [#uses=1]
+  store i8 0, i8* %tmp17.i.i, align 1
+  %tmp.i.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 8 ; <i8*> [#uses=1]
+  %1 = bitcast i8* %tmp.i.i.i to i8**             ; <i8**> [#uses=1]
+  store i8* null, i8** %1, align 8
+  %tmp22.i = load %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8 ; <%"struct.llvm::StringMapEntryBase"*> [#uses=1]
+  %tmp24.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp22.i, inttoptr (i64 -1 to %"struct.llvm::StringMapEntryBase"*) ; <i1> [#uses=1]
+  br i1 %tmp24.i, label %bb9.i, label %_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueIS1_EERNS_14StringMapEntryIS1_EENS_9StringRefET_.exit
+
+bb6.i:                                            ; preds = %entry
+  %tmp16.i = bitcast %"struct.llvm::StringMapEntryBase"* %tmp8.i to %"struct.llvm::StringMapEntry<void*>"* ; <%"struct.llvm::StringMapEntry<void*>"*> [#uses=1]
+  ret %"struct.llvm::StringMapEntry<void*>"* %tmp16.i
+
+bb9.i:                                            ; preds = %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i
+  %tmp25.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0, i32 3 ; <i32*> [#uses=2]
+  %tmp26.i = load i32* %tmp25.i, align 8          ; <i32> [#uses=1]
+  %tmp27.i = add i32 %tmp26.i, -1                 ; <i32> [#uses=1]
+  store i32 %tmp27.i, i32* %tmp25.i, align 8
+  ret %"struct.llvm::StringMapEntry<void*>"* %tmp10.i.i
+
+_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueIS1_EERNS_14StringMapEntryIS1_EENS_9StringRefET_.exit: ; preds = %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i
+  ret %"struct.llvm::StringMapEntry<void*>"* %tmp10.i.i
+}
diff --git a/final/test/Transforms/GVN/null-aliases-nothing.ll b/final/test/Transforms/GVN/null-aliases-nothing.ll
new file mode 100644
index 00000000000..9e4ae18c710
--- /dev/null
+++ b/final/test/Transforms/GVN/null-aliases-nothing.ll
@@ -0,0 +1,20 @@
+; RUN: opt %s -basicaa -gvn -S | FileCheck %s
+
+%t = type { i32 }
+declare void @test1f(i8*)
+
+define void @test1(%t* noalias %stuff ) {
+    %p = getelementptr inbounds %t* %stuff, i32 0, i32 0
+    %before = load i32* %p
+
+    call void @test1f(i8* null)
+
+    %after = load i32* %p ; <--- This should be a dead load
+    %sum = add i32 %before, %after
+
+    store i32 %sum, i32* %p
+    ret void
+; CHECK: load
+; CHECK-NOT: load
+; CHECK: ret void
+}
diff --git a/final/test/Transforms/GVN/phi-translate.ll b/final/test/Transforms/GVN/phi-translate.ll
new file mode 100644
index 00000000000..f10537e0c93
--- /dev/null
+++ b/final/test/Transforms/GVN/phi-translate.ll
@@ -0,0 +1,31 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+; CHECK: @foo
+; CHECK: entry.end_crit_edge:
+; CHECK:   %n.pre = load i32* %q.phi.trans.insert
+; CHECK: then:
+; CHECK:   store i32 %z
+; CHECK: end:
+; CHECK:   %n = phi i32 [ %n.pre, %entry.end_crit_edge ], [ %z, %then ]
+; CHECK:   ret i32 %n
+
+@G = external global [100 x i32]
+define i32 @foo(i32 %x, i32 %z) {
+entry:
+  %tobool = icmp eq i32 %x, 0
+  br i1 %tobool, label %end, label %then
+
+then:
+  %i = sext i32 %x to i64
+  %p = getelementptr [100 x i32]* @G, i64 0, i64 %i
+  store i32 %z, i32* %p
+  br label %end
+
+end:
+  %j = sext i32 %x to i64
+  %q = getelementptr [100 x i32]* @G, i64 0, i64 %j
+  %n = load i32* %q
+  ret i32 %n
+}
diff --git a/final/test/Transforms/GVN/pre-basic-add.ll b/final/test/Transforms/GVN/pre-basic-add.ll
new file mode 100644
index 00000000000..c13099fe734
--- /dev/null
+++ b/final/test/Transforms/GVN/pre-basic-add.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -gvn -enable-pre -S | grep {.pre}
+
+@H = common global i32 0		; <i32*> [#uses=2]
+@G = common global i32 0		; <i32*> [#uses=1]
+
+define i32 @test() nounwind {
+entry:
+	%0 = load i32* @H, align 4		; <i32> [#uses=2]
+	%1 = call i32 (...)* @foo() nounwind		; <i32> [#uses=1]
+	%2 = icmp ne i32 %1, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb, label %bb1
+
+bb:		; preds = %entry
+	%3 = add i32 %0, 42		; <i32> [#uses=1]
+	store i32 %3, i32* @G, align 4
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%4 = add i32 %0, 42		; <i32> [#uses=1]
+	store i32 %4, i32* @H, align 4
+	br label %return
+
+return:		; preds = %bb1
+	ret i32 0
+}
+
+declare i32 @foo(...)
diff --git a/final/test/Transforms/GVN/pre-load.ll b/final/test/Transforms/GVN/pre-load.ll
new file mode 100644
index 00000000000..bf4add42e80
--- /dev/null
+++ b/final/test/Transforms/GVN/pre-load.ll
@@ -0,0 +1,391 @@
+; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define i32 @test1(i32* %p, i1 %C) {
+; CHECK: @test1
+block1:
+	br i1 %C, label %block2, label %block3
+
+block2:
+ br label %block4
+; CHECK: block2:
+; CHECK-NEXT: load i32* %p
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+  %PRE = load i32* %p
+  ret i32 %PRE
+; CHECK: block4:
+; CHECK-NEXT: phi i32
+; CHECK-NEXT: ret i32
+}
+
+; This is a simple phi translation case.
+define i32 @test2(i32* %p, i32* %q, i1 %C) {
+; CHECK: @test2
+block1:
+	br i1 %C, label %block2, label %block3
+
+block2:
+ br label %block4
+; CHECK: block2:
+; CHECK-NEXT: load i32* %q
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  %PRE = load i32* %P2
+  ret i32 %PRE
+; CHECK: block4:
+; CHECK-NEXT: phi i32 [
+; CHECK-NOT: load
+; CHECK: ret i32
+}
+
+; This is a PRE case that requires phi translation through a GEP.
+define i32 @test3(i32* %p, i32* %q, i32** %Hack, i1 %C) {
+; CHECK: @test3
+block1:
+  %B = getelementptr i32* %q, i32 1
+  store i32* %B, i32** %Hack
+	br i1 %C, label %block2, label %block3
+
+block2:
+ br label %block4
+; CHECK: block2:
+; CHECK-NEXT: load i32* %B
+
+block3:
+  %A = getelementptr i32* %p, i32 1
+  store i32 0, i32* %A
+  br label %block4
+
+block4:
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  %P3 = getelementptr i32* %P2, i32 1
+  %PRE = load i32* %P3
+  ret i32 %PRE
+; CHECK: block4:
+; CHECK-NEXT: phi i32 [
+; CHECK-NOT: load
+; CHECK: ret i32
+}
+
+;; Here the loaded address is available, but the computation is in 'block3'
+;; which does not dominate 'block2'.
+define i32 @test4(i32* %p, i32* %q, i32** %Hack, i1 %C) {
+; CHECK: @test4
+block1:
+	br i1 %C, label %block2, label %block3
+
+block2:
+ br label %block4
+; CHECK: block2:
+; CHECK:   load i32*
+; CHECK:   br label %block4
+
+block3:
+  %B = getelementptr i32* %q, i32 1
+  store i32* %B, i32** %Hack
+
+  %A = getelementptr i32* %p, i32 1
+  store i32 0, i32* %A
+  br label %block4
+
+block4:
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  %P3 = getelementptr i32* %P2, i32 1
+  %PRE = load i32* %P3
+  ret i32 %PRE
+; CHECK: block4:
+; CHECK-NEXT: phi i32 [
+; CHECK-NOT: load
+; CHECK: ret i32
+}
+
+;void test5(int N, double *G) {
+;  int j;
+;  for (j = 0; j < N - 1; j++)
+;    G[j] = G[j] + G[j+1];
+;}
+
+define void @test5(i32 %N, double* nocapture %G) nounwind ssp {
+; CHECK: @test5
+entry:
+  %0 = add i32 %N, -1           
+  %1 = icmp sgt i32 %0, 0       
+  br i1 %1, label %bb.nph, label %return
+
+bb.nph:                         
+  %tmp = zext i32 %0 to i64     
+  br label %bb
+
+; CHECK: bb.nph:
+; CHECK: load double*
+; CHECK: br label %bb
+
+bb:             
+  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
+  %tmp6 = add i64 %indvar, 1                    
+  %scevgep = getelementptr double* %G, i64 %tmp6
+  %scevgep7 = getelementptr double* %G, i64 %indvar
+  %2 = load double* %scevgep7, align 8
+  %3 = load double* %scevgep, align 8 
+  %4 = fadd double %2, %3             
+  store double %4, double* %scevgep7, align 8
+  %exitcond = icmp eq i64 %tmp6, %tmp 
+  br i1 %exitcond, label %return, label %bb
+
+; Should only be one load in the loop.
+; CHECK: bb:
+; CHECK: load double*
+; CHECK-NOT: load double*
+; CHECK: br i1 %exitcond
+
+return:                               
+  ret void
+}
+
+;void test6(int N, double *G) {
+;  int j;
+;  for (j = 0; j < N - 1; j++)
+;    G[j+1] = G[j] + G[j+1];
+;}
+
+define void @test6(i32 %N, double* nocapture %G) nounwind ssp {
+; CHECK: @test6
+entry:
+  %0 = add i32 %N, -1           
+  %1 = icmp sgt i32 %0, 0       
+  br i1 %1, label %bb.nph, label %return
+
+bb.nph:                         
+  %tmp = zext i32 %0 to i64     
+  br label %bb
+
+; CHECK: bb.nph:
+; CHECK: load double*
+; CHECK: br label %bb
+
+bb:             
+  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
+  %tmp6 = add i64 %indvar, 1                    
+  %scevgep = getelementptr double* %G, i64 %tmp6
+  %scevgep7 = getelementptr double* %G, i64 %indvar
+  %2 = load double* %scevgep7, align 8
+  %3 = load double* %scevgep, align 8 
+  %4 = fadd double %2, %3             
+  store double %4, double* %scevgep, align 8
+  %exitcond = icmp eq i64 %tmp6, %tmp 
+  br i1 %exitcond, label %return, label %bb
+
+; Should only be one load in the loop.
+; CHECK: bb:
+; CHECK: load double*
+; CHECK-NOT: load double*
+; CHECK: br i1 %exitcond
+
+return:                               
+  ret void
+}
+
+;void test7(int N, double* G) {
+;  long j;
+;  G[1] = 1;
+;  for (j = 1; j < N - 1; j++)
+;      G[j+1] = G[j] + G[j+1];
+;}
+
+; This requires phi translation of the adds.
+define void @test7(i32 %N, double* nocapture %G) nounwind ssp {
+entry:
+  %0 = getelementptr inbounds double* %G, i64 1   
+  store double 1.000000e+00, double* %0, align 8
+  %1 = add i32 %N, -1                             
+  %2 = icmp sgt i32 %1, 1                         
+  br i1 %2, label %bb.nph, label %return
+
+bb.nph:                                           
+  %tmp = sext i32 %1 to i64                       
+  %tmp7 = add i64 %tmp, -1                        
+  br label %bb
+
+bb:                                               
+  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ] 
+  %tmp8 = add i64 %indvar, 2                      
+  %scevgep = getelementptr double* %G, i64 %tmp8  
+  %tmp9 = add i64 %indvar, 1                      
+  %scevgep10 = getelementptr double* %G, i64 %tmp9 
+  %3 = load double* %scevgep10, align 8           
+  %4 = load double* %scevgep, align 8             
+  %5 = fadd double %3, %4                         
+  store double %5, double* %scevgep, align 8
+  %exitcond = icmp eq i64 %tmp9, %tmp7            
+  br i1 %exitcond, label %return, label %bb
+
+; Should only be one load in the loop.
+; CHECK: bb:
+; CHECK: load double*
+; CHECK-NOT: load double*
+; CHECK: br i1 %exitcond
+
+return:                                           
+  ret void
+}
+
+;; Here the loaded address isn't available in 'block2' at all, requiring a new
+;; GEP to be inserted into it.
+define i32 @test8(i32* %p, i32* %q, i32** %Hack, i1 %C) {
+; CHECK: @test8
+block1:
+	br i1 %C, label %block2, label %block3
+
+block2:
+ br label %block4
+; CHECK: block2:
+; CHECK:   load i32*
+; CHECK:   br label %block4
+
+block3:
+  %A = getelementptr i32* %p, i32 1
+  store i32 0, i32* %A
+  br label %block4
+
+block4:
+  %P2 = phi i32* [%p, %block3], [%q, %block2]
+  %P3 = getelementptr i32* %P2, i32 1
+  %PRE = load i32* %P3
+  ret i32 %PRE
+; CHECK: block4:
+; CHECK-NEXT: phi i32 [
+; CHECK-NOT: load
+; CHECK: ret i32
+}
+
+;void test9(int N, double* G) {
+;  long j;
+;  for (j = 1; j < N - 1; j++)
+;      G[j+1] = G[j] + G[j+1];
+;}
+
+; This requires phi translation of the adds.
+define void @test9(i32 %N, double* nocapture %G) nounwind ssp {
+entry:
+  add i32 0, 0
+  %1 = add i32 %N, -1                             
+  %2 = icmp sgt i32 %1, 1                         
+  br i1 %2, label %bb.nph, label %return
+
+bb.nph:                                           
+  %tmp = sext i32 %1 to i64                       
+  %tmp7 = add i64 %tmp, -1                        
+  br label %bb
+
+; CHECK: bb.nph:
+; CHECK:   load double*
+; CHECK:   br label %bb
+
+bb:                                               
+  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ] 
+  %tmp8 = add i64 %indvar, 2                      
+  %scevgep = getelementptr double* %G, i64 %tmp8  
+  %tmp9 = add i64 %indvar, 1                      
+  %scevgep10 = getelementptr double* %G, i64 %tmp9 
+  %3 = load double* %scevgep10, align 8           
+  %4 = load double* %scevgep, align 8             
+  %5 = fadd double %3, %4                         
+  store double %5, double* %scevgep, align 8
+  %exitcond = icmp eq i64 %tmp9, %tmp7            
+  br i1 %exitcond, label %return, label %bb
+
+; Should only be one load in the loop.
+; CHECK: bb:
+; CHECK: load double*
+; CHECK-NOT: load double*
+; CHECK: br i1 %exitcond
+
+return:                                           
+  ret void
+}
+
+;void test10(int N, double* G) {
+;  long j;
+;  for (j = 1; j < N - 1; j++)
+;      G[j] = G[j] + G[j+1] + G[j-1];
+;}
+
+; PR5501
+define void @test10(i32 %N, double* nocapture %G) nounwind ssp {
+entry:
+  %0 = add i32 %N, -1
+  %1 = icmp sgt i32 %0, 1
+  br i1 %1, label %bb.nph, label %return
+
+bb.nph:
+  %tmp = sext i32 %0 to i64
+  %tmp8 = add i64 %tmp, -1
+  br label %bb
+; CHECK: bb.nph:
+; CHECK:   load double*
+; CHECK:   load double*
+; CHECK:   br label %bb
+
+
+bb:
+  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp11, %bb ]
+  %scevgep = getelementptr double* %G, i64 %indvar
+  %tmp9 = add i64 %indvar, 2
+  %scevgep10 = getelementptr double* %G, i64 %tmp9
+  %tmp11 = add i64 %indvar, 1
+  %scevgep12 = getelementptr double* %G, i64 %tmp11
+  %2 = load double* %scevgep12, align 8
+  %3 = load double* %scevgep10, align 8
+  %4 = fadd double %2, %3
+  %5 = load double* %scevgep, align 8
+  %6 = fadd double %4, %5
+  store double %6, double* %scevgep12, align 8
+  %exitcond = icmp eq i64 %tmp11, %tmp8
+  br i1 %exitcond, label %return, label %bb
+
+; Should only be one load in the loop.
+; CHECK: bb:
+; CHECK: load double*
+; CHECK-NOT: load double*
+; CHECK: br i1 %exitcond
+
+return:
+  ret void
+}
+
+; Test critical edge splitting.
+define i32 @test11(i32* %p, i1 %C, i32 %N) {
+; CHECK: @test11
+block1:
+        br i1 %C, label %block2, label %block3
+
+block2:
+ %cond = icmp sgt i32 %N, 1
+ br i1 %cond, label %block4, label %block5
+; CHECK: load i32* %p
+; CHECK-NEXT: br label %block4
+
+block3:
+  store i32 0, i32* %p
+  br label %block4
+
+block4:
+  %PRE = load i32* %p
+  br label %block5
+
+block5:
+  %ret = phi i32 [ 0, %block2 ], [ %PRE, %block4 ]
+  ret i32 %ret
+; CHECK: block4:
+; CHECK-NEXT: phi i32
+}
diff --git a/final/test/Transforms/GVN/pre-single-pred.ll b/final/test/Transforms/GVN/pre-single-pred.ll
new file mode 100644
index 00000000000..f1f5c71a93a
--- /dev/null
+++ b/final/test/Transforms/GVN/pre-single-pred.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -gvn -enable-load-pre -S | FileCheck %s
+; This testcase assumed we'll PRE the load into %for.cond, but we don't actually
+; verify that doing so is safe.  If there didn't _happen_ to be a load in
+; %for.end, we would actually be lengthening the execution on some paths, and
+; we were never actually checking that case.  Now we actually do perform some
+; conservative checking to make sure we don't make paths longer, but we don't
+; currently get this case, which we got lucky on previously.
+;
+; Now that that faulty assumption is corrected, test that we DON'T incorrectly
+; hoist the load.  Doing the right thing for the wrong reasons is still a bug.
+
+@p = external global i32
+define i32 @f(i32 %n) nounwind {
+entry:
+	br label %for.cond
+
+for.cond:		; preds = %for.inc, %entry
+	%i.0 = phi i32 [ 0, %entry ], [ %indvar.next, %for.inc ]		; <i32> [#uses=2]
+	%cmp = icmp slt i32 %i.0, %n		; <i1> [#uses=1]
+	br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:		; preds = %for.cond
+	br label %for.end
+
+; CHECK: for.body:
+; CHECK-NEXT: %tmp3 = load i32* @p
+for.body:		; preds = %for.cond
+	%tmp3 = load i32* @p		; <i32> [#uses=1]
+	%dec = add i32 %tmp3, -1		; <i32> [#uses=2]
+	store i32 %dec, i32* @p
+	%cmp6 = icmp slt i32 %dec, 0		; <i1> [#uses=1]
+	br i1 %cmp6, label %for.body.for.end_crit_edge, label %for.inc
+
+; CHECK: for.body.for.end_crit_edge:
+for.body.for.end_crit_edge:		; preds = %for.body
+	br label %for.end
+
+for.inc:		; preds = %for.body
+	%indvar.next = add i32 %i.0, 1		; <i32> [#uses=1]
+	br label %for.cond
+
+for.end:		; preds = %for.body.for.end_crit_edge, %for.cond.for.end_crit_edge
+	%tmp9 = load i32* @p		; <i32> [#uses=1]
+	ret i32 %tmp9
+}
diff --git a/final/test/Transforms/GVN/preserve-tbaa.ll b/final/test/Transforms/GVN/preserve-tbaa.ll
new file mode 100644
index 00000000000..2fcfc471c5d
--- /dev/null
+++ b/final/test/Transforms/GVN/preserve-tbaa.ll
@@ -0,0 +1,28 @@
+; RUN: opt -tbaa -basicaa -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+; GVN should preserve the TBAA tag on loads when doing PRE.
+
+; CHECK: @test
+; CHECK: %tmp33.pre = load i16* undef, align 2, !tbaa !0
+; CHECK: br label %for.body
+define void @test() nounwind {
+entry:
+  br i1 undef, label %bb.nph, label %for.end
+
+bb.nph:                                           ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %bb.nph
+  %tmp33 = load i16* undef, align 2, !tbaa !0
+  store i16 undef, i16* undef, align 2, !tbaa !0
+  br i1 false, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+!0 = metadata !{metadata !"short", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/final/test/Transforms/GVN/rle-must-alias.ll b/final/test/Transforms/GVN/rle-must-alias.ll
new file mode 100644
index 00000000000..479724063e0
--- /dev/null
+++ b/final/test/Transforms/GVN/rle-must-alias.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -basicaa -gvn -S | grep {DEAD = phi i32 }
+
+; GVN should eliminate the fully redundant %9 GEP which 
+; allows DEAD to be removed.  This is PR3198.
+
+; The %7 and %4 loads combine to make %DEAD unneeded.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+@H = common global [100 x i32] zeroinitializer, align 32		; <[100 x i32]*> [#uses=3]
+@G = common global i32 0		; <i32*> [#uses=2]
+
+define i32 @test(i32 %i) nounwind {
+entry:
+	%0 = tail call i32 (...)* @foo() nounwind		; <i32> [#uses=1]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb1, label %bb
+
+bb:		; preds = %entry
+	%2 = tail call i32 (...)* @bar() nounwind		; <i32> [#uses=0]
+	%3 = getelementptr [100 x i32]* @H, i32 0, i32 %i		; <i32*> [#uses=1]
+	%4 = load i32* %3, align 4		; <i32> [#uses=1]
+	store i32 %4, i32* @G, align 4
+	br label %bb3
+
+bb1:		; preds = %entry
+	%5 = tail call i32 (...)* @baz() nounwind		; <i32> [#uses=0]
+	%6 = getelementptr [100 x i32]* @H, i32 0, i32 %i		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=2]
+	store i32 %7, i32* @G, align 4
+	%8 = icmp eq i32 %7, 0		; <i1> [#uses=1]
+	br i1 %8, label %bb3, label %bb4
+
+bb3:		; preds = %bb1, %bb
+	%9 = getelementptr [100 x i32]* @H, i32 0, i32 %i		; <i32*> [#uses=1]
+	%DEAD = load i32* %9, align 4		; <i32> [#uses=1]
+	ret i32 %DEAD
+
+bb4:		; preds = %bb1
+	ret i32 0
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(...)
+
+declare i32 @baz(...)
diff --git a/final/test/Transforms/GVN/rle-no-phi-translate.ll b/final/test/Transforms/GVN/rle-no-phi-translate.ll
new file mode 100644
index 00000000000..96dbf481899
--- /dev/null
+++ b/final/test/Transforms/GVN/rle-no-phi-translate.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+; XFAIL: *
+; FIXME: This should be promotable, but memdep/gvn don't track values
+; path/edge sensitively enough.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define i32 @g(i32* %b, i32* %c) nounwind {
+entry:
+        store i32 1, i32* %b
+        store i32 2, i32* %c
+        
+	%t1 = icmp eq i32* %b, null		; <i1> [#uses=1]
+	br i1 %t1, label %bb, label %bb2
+
+bb:		; preds = %entry
+	br label %bb2
+
+bb2:		; preds = %bb1, %bb
+	%c_addr.0 = phi i32* [ %b, %entry ], [ %c, %bb ]		; <i32*> [#uses=1]
+	%cv = load i32* %c_addr.0, align 4		; <i32> [#uses=1]
+	ret i32 %cv
+; CHECK: bb2:
+; CHECK-NOT: load i32
+; CHECK: ret i32 
+}
+
diff --git a/final/test/Transforms/GVN/rle-nonlocal.ll b/final/test/Transforms/GVN/rle-nonlocal.ll
new file mode 100644
index 00000000000..6b74e9a946d
--- /dev/null
+++ b/final/test/Transforms/GVN/rle-nonlocal.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+
+define i32 @main(i32** %p) {
+block1:
+	br i1 true, label %block2, label %block3
+
+block2:
+ %a = load i32** %p
+ br label %block4
+
+block3:
+  %b = load i32** %p
+  br label %block4
+
+block4:
+; CHECK-NOT: %existingPHI = phi
+; CHECK: %DEAD = phi
+  %existingPHI = phi i32* [ %a, %block2 ], [ %b, %block3 ] 
+  %DEAD = load i32** %p
+  %c = load i32* %DEAD
+  %d = load i32* %existingPHI
+  %e = add i32 %c, %d
+  ret i32 %e
+}
diff --git a/final/test/Transforms/GVN/rle-phi-translate.ll b/final/test/Transforms/GVN/rle-phi-translate.ll
new file mode 100644
index 00000000000..6731f43c0d2
--- /dev/null
+++ b/final/test/Transforms/GVN/rle-phi-translate.ll
@@ -0,0 +1,146 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define i32 @test1(i32* %b, i32* %c) nounwind {
+; CHECK: @test1
+entry:
+	%g = alloca i32
+	%t1 = icmp eq i32* %b, null
+	br i1 %t1, label %bb, label %bb1
+
+bb:
+	%t2 = load i32* %c, align 4
+	%t3 = add i32 %t2, 1
+	store i32 %t3, i32* %g, align 4
+	br label %bb2
+
+bb1:		; preds = %entry
+	%t5 = load i32* %b, align 4
+	%t6 = add i32 %t5, 1
+	store i32 %t6, i32* %g, align 4
+	br label %bb2
+
+bb2:		; preds = %bb1, %bb
+	%c_addr.0 = phi i32* [ %g, %bb1 ], [ %c, %bb ]
+	%b_addr.0 = phi i32* [ %b, %bb1 ], [ %g, %bb ]
+	%cv = load i32* %c_addr.0, align 4
+	%bv = load i32* %b_addr.0, align 4
+; CHECK: %bv = phi i32
+; CHECK: %cv = phi i32
+; CHECK-NOT: load
+; CHECK: ret i32
+	%ret = add i32 %cv, %bv
+	ret i32 %ret
+}
+
+define i8 @test2(i1 %cond, i32* %b, i32* %c) nounwind {
+; CHECK: @test2
+entry:
+  br i1 %cond, label %bb, label %bb1
+
+bb:
+  %b1 = bitcast i32* %b to i8*
+  store i8 4, i8* %b1
+  br label %bb2
+
+bb1:
+  %c1 = bitcast i32* %c to i8*
+  store i8 92, i8* %c1
+  br label %bb2
+
+bb2:
+  %d = phi i32* [ %c, %bb1 ], [ %b, %bb ]
+  %d1 = bitcast i32* %d to i8*
+  %dv = load i8* %d1
+; CHECK: %dv = phi i8 [ 92, %bb1 ], [ 4, %bb ]
+; CHECK-NOT: load
+; CHECK: ret i8 %dv
+  ret i8 %dv
+}
+
+define i32 @test3(i1 %cond, i32* %b, i32* %c) nounwind {
+; CHECK: @test3
+entry:
+  br i1 %cond, label %bb, label %bb1
+
+bb:
+  %b1 = getelementptr i32* %b, i32 17
+  store i32 4, i32* %b1
+  br label %bb2
+
+bb1:
+  %c1 = getelementptr i32* %c, i32 7
+  store i32 82, i32* %c1
+  br label %bb2
+
+bb2:
+  %d = phi i32* [ %c, %bb1 ], [ %b, %bb ]
+  %i = phi i32 [ 7, %bb1 ], [ 17, %bb ]
+  %d1 = getelementptr i32* %d, i32 %i
+  %dv = load i32* %d1
+; CHECK: %dv = phi i32 [ 82, %bb1 ], [ 4, %bb ]
+; CHECK-NOT: load
+; CHECK: ret i32 %dv
+  ret i32 %dv
+}
+
+; PR5313
+define i32 @test4(i1 %cond, i32* %b, i32* %c) nounwind {
+; CHECK: @test4
+entry:
+  br i1 %cond, label %bb, label %bb1
+
+bb:
+  store i32 4, i32* %b
+  br label %bb2
+
+bb1:
+  %c1 = getelementptr i32* %c, i32 7
+  store i32 82, i32* %c1
+  br label %bb2
+
+bb2:
+  %d = phi i32* [ %c, %bb1 ], [ %b, %bb ]
+  %i = phi i32 [ 7, %bb1 ], [ 0, %bb ]
+  %d1 = getelementptr i32* %d, i32 %i
+  %dv = load i32* %d1
+; CHECK: %dv = phi i32 [ 82, %bb1 ], [ 4, %bb ]
+; CHECK-NOT: load
+; CHECK: ret i32 %dv
+  ret i32 %dv
+}
+
+
+
+; void test5(int N, double* G) {
+;   for (long j = 1; j < 1000; j++)
+;     G[j] = G[j] + G[j-1];
+; }
+;
+; Should compile into one load in the loop.
+define void @test5(i32 %N, double* nocapture %G) nounwind ssp {
+; CHECK: @test5
+bb.nph:
+  br label %for.body
+
+for.body:
+  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp, %for.body ]
+  %arrayidx6 = getelementptr double* %G, i64 %indvar
+  %tmp = add i64 %indvar, 1
+  %arrayidx = getelementptr double* %G, i64 %tmp
+  %tmp3 = load double* %arrayidx
+  %tmp7 = load double* %arrayidx6
+  %add = fadd double %tmp3, %tmp7
+  store double %add, double* %arrayidx
+  %exitcond = icmp eq i64 %tmp, 999
+  br i1 %exitcond, label %for.end, label %for.body
+; CHECK: for.body:
+; CHECK: phi double
+; CHECK: load double
+; CHECK-NOT: load double
+; CHECK: br i1
+for.end:
+  ret void
+}
diff --git a/final/test/Transforms/GVN/rle-semidominated.ll b/final/test/Transforms/GVN/rle-semidominated.ll
new file mode 100644
index 00000000000..c6cd1fdc00c
--- /dev/null
+++ b/final/test/Transforms/GVN/rle-semidominated.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -basicaa -gvn -S | grep {DEAD = phi i32 }
+
+define i32 @main(i32* %p) {
+block1:
+  %z = load i32* %p
+	br i1 true, label %block2, label %block3
+
+block2:
+ br label %block4
+
+block3:
+  %b = bitcast i32 0 to i32
+  store i32 %b, i32* %p
+  br label %block4
+
+block4:
+  %DEAD = load i32* %p
+  ret i32 %DEAD
+}
diff --git a/final/test/Transforms/GVN/rle.ll b/final/test/Transforms/GVN/rle.ll
new file mode 100644
index 00000000000..2e433217507
--- /dev/null
+++ b/final/test/Transforms/GVN/rle.ll
@@ -0,0 +1,546 @@
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+
+; 32-bit little endian target.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+;; Trivial RLE test.
+define i32 @test0(i32 %V, i32* %P) {
+  store i32 %V, i32* %P
+
+  %A = load i32* %P
+  ret i32 %A
+; CHECK: @test0
+; CHECK: ret i32 %V
+}
+
+
+;;===----------------------------------------------------------------------===;;
+;; Tests for crashers
+;;===----------------------------------------------------------------------===;;
+
+;; PR5016
+define i8 @crash0({i32, i32} %A, {i32, i32}* %P) {
+  store {i32, i32} %A, {i32, i32}* %P
+  %X = bitcast {i32, i32}* %P to i8*
+  %Y = load i8* %X
+  ret i8 %Y
+}
+
+
+;;===----------------------------------------------------------------------===;;
+;; Store -> Load  and  Load -> Load forwarding where src and dst are different
+;; types, but where the base pointer is a must alias.
+;;===----------------------------------------------------------------------===;;
+
+;; i32 -> f32 forwarding.
+define float @coerce_mustalias1(i32 %V, i32* %P) {
+  store i32 %V, i32* %P
+   
+  %P2 = bitcast i32* %P to float*
+
+  %A = load float* %P2
+  ret float %A
+; CHECK: @coerce_mustalias1
+; CHECK-NOT: load
+; CHECK: ret float 
+}
+
+;; i32* -> float forwarding.
+define float @coerce_mustalias2(i32* %V, i32** %P) {
+  store i32* %V, i32** %P
+   
+  %P2 = bitcast i32** %P to float*
+
+  %A = load float* %P2
+  ret float %A
+; CHECK: @coerce_mustalias2
+; CHECK-NOT: load
+; CHECK: ret float 
+}
+
+;; float -> i32* forwarding.
+define i32* @coerce_mustalias3(float %V, float* %P) {
+  store float %V, float* %P
+   
+  %P2 = bitcast float* %P to i32**
+
+  %A = load i32** %P2
+  ret i32* %A
+; CHECK: @coerce_mustalias3
+; CHECK-NOT: load
+; CHECK: ret i32* 
+}
+
+;; i32 -> f32 load forwarding.
+define float @coerce_mustalias4(i32* %P, i1 %cond) {
+  %A = load i32* %P
+  
+  %P2 = bitcast i32* %P to float*
+  %B = load float* %P2
+  br i1 %cond, label %T, label %F
+T:
+  ret float %B
+  
+F:
+  %X = bitcast i32 %A to float
+  ret float %X
+
+; CHECK: @coerce_mustalias4
+; CHECK: %A = load i32* %P
+; CHECK-NOT: load
+; CHECK: ret float
+; CHECK: F:
+}
+
+;; i32 -> i8 forwarding
+define i8 @coerce_mustalias5(i32 %V, i32* %P) {
+  store i32 %V, i32* %P
+   
+  %P2 = bitcast i32* %P to i8*
+
+  %A = load i8* %P2
+  ret i8 %A
+; CHECK: @coerce_mustalias5
+; CHECK-NOT: load
+; CHECK: ret i8
+}
+
+;; i64 -> float forwarding
+define float @coerce_mustalias6(i64 %V, i64* %P) {
+  store i64 %V, i64* %P
+   
+  %P2 = bitcast i64* %P to float*
+
+  %A = load float* %P2
+  ret float %A
+; CHECK: @coerce_mustalias6
+; CHECK-NOT: load
+; CHECK: ret float
+}
+
+;; i64 -> i8* (32-bit) forwarding
+define i8* @coerce_mustalias7(i64 %V, i64* %P) {
+  store i64 %V, i64* %P
+   
+  %P2 = bitcast i64* %P to i8**
+
+  %A = load i8** %P2
+  ret i8* %A
+; CHECK: @coerce_mustalias7
+; CHECK-NOT: load
+; CHECK: ret i8*
+}
+
+; memset -> i16 forwarding.
+define signext i16 @memset_to_i16_local(i16* %A) nounwind ssp {
+entry:
+  %conv = bitcast i16* %A to i8* 
+  tail call void @llvm.memset.i64(i8* %conv, i8 1, i64 200, i32 1)
+  %arrayidx = getelementptr inbounds i16* %A, i64 42
+  %tmp2 = load i16* %arrayidx
+  ret i16 %tmp2
+; CHECK: @memset_to_i16_local
+; CHECK-NOT: load
+; CHECK: ret i16 257
+}
+
+; memset -> float forwarding.
+define float @memset_to_float_local(float* %A, i8 %Val) nounwind ssp {
+entry:
+  %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
+  tail call void @llvm.memset.i64(i8* %conv, i8 %Val, i64 400, i32 1)
+  %arrayidx = getelementptr inbounds float* %A, i64 42 ; <float*> [#uses=1]
+  %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
+  ret float %tmp2
+; CHECK: @memset_to_float_local
+; CHECK-NOT: load
+; CHECK: zext
+; CHECK-NEXT: shl
+; CHECK-NEXT: or
+; CHECK-NEXT: shl
+; CHECK-NEXT: or
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: ret float
+}
+
+;; non-local memset -> i16 load forwarding.
+define i16 @memset_to_i16_nonlocal0(i16* %P, i1 %cond) {
+  %P3 = bitcast i16* %P to i8*
+  br i1 %cond, label %T, label %F
+T:
+  tail call void @llvm.memset.i64(i8* %P3, i8 1, i64 400, i32 1)
+  br label %Cont
+  
+F:
+  tail call void @llvm.memset.i64(i8* %P3, i8 2, i64 400, i32 1)
+  br label %Cont
+
+Cont:
+  %P2 = getelementptr i16* %P, i32 4
+  %A = load i16* %P2
+  ret i16 %A
+
+; CHECK: @memset_to_i16_nonlocal0
+; CHECK: Cont:
+; CHECK-NEXT:   %A = phi i16 [ 514, %F ], [ 257, %T ]
+; CHECK-NOT: load
+; CHECK: ret i16 %A
+}
+
+@GCst = constant {i32, float, i32 } { i32 42, float 14., i32 97 }
+
+; memset -> float forwarding.
+define float @memcpy_to_float_local(float* %A) nounwind ssp {
+entry:
+  %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
+  tail call void @llvm.memcpy.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i32 1)
+  %arrayidx = getelementptr inbounds float* %A, i64 1 ; <float*> [#uses=1]
+  %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
+  ret float %tmp2
+; CHECK: @memcpy_to_float_local
+; CHECK-NOT: load
+; CHECK: ret float 1.400000e+01
+}
+
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+
+
+
+
+;; non-local i32/float -> i8 load forwarding.
+define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {
+  %P2 = bitcast i32* %P to float*
+  %P3 = bitcast i32* %P to i8*
+  br i1 %cond, label %T, label %F
+T:
+  store i32 42, i32* %P
+  br label %Cont
+  
+F:
+  store float 1.0, float* %P2
+  br label %Cont
+
+Cont:
+  %A = load i8* %P3
+  ret i8 %A
+
+; CHECK: @coerce_mustalias_nonlocal0
+; CHECK: Cont:
+; CHECK:   %A = phi i8 [
+; CHECK-NOT: load
+; CHECK: ret i8 %A
+}
+
+
+;; non-local i32/float -> i8 load forwarding.  This also tests that the "P3"
+;; bitcast equivalence can be properly phi translated.
+define i8 @coerce_mustalias_nonlocal1(i32* %P, i1 %cond) {
+  %P2 = bitcast i32* %P to float*
+  br i1 %cond, label %T, label %F
+T:
+  store i32 42, i32* %P
+  br label %Cont
+  
+F:
+  store float 1.0, float* %P2
+  br label %Cont
+
+Cont:
+  %P3 = bitcast i32* %P to i8*
+  %A = load i8* %P3
+  ret i8 %A
+
+;; FIXME: This is disabled because this caused a miscompile in the llvm-gcc
+;; bootstrap, see r82411
+;
+; HECK: @coerce_mustalias_nonlocal1
+; HECK: Cont:
+; HECK:   %A = phi i8 [
+; HECK-NOT: load
+; HECK: ret i8 %A
+}
+
+
+;; non-local i32 -> i8 partial redundancy load forwarding.
+define i8 @coerce_mustalias_pre0(i32* %P, i1 %cond) {
+  %P3 = bitcast i32* %P to i8*
+  br i1 %cond, label %T, label %F
+T:
+  store i32 42, i32* %P
+  br label %Cont
+  
+F:
+  br label %Cont
+
+Cont:
+  %A = load i8* %P3
+  ret i8 %A
+
+; CHECK: @coerce_mustalias_pre0
+; CHECK: F:
+; CHECK:   load i8* %P3
+; CHECK: Cont:
+; CHECK:   %A = phi i8 [
+; CHECK-NOT: load
+; CHECK: ret i8 %A
+}
+
+;;===----------------------------------------------------------------------===;;
+;; Store -> Load  and  Load -> Load forwarding where src and dst are different
+;; types, and the reload is an offset from the store pointer.
+;;===----------------------------------------------------------------------===;;
+
+;; i32 -> i8 forwarding.
+;; PR4216
+define i8 @coerce_offset0(i32 %V, i32* %P) {
+  store i32 %V, i32* %P
+   
+  %P2 = bitcast i32* %P to i8*
+  %P3 = getelementptr i8* %P2, i32 2
+
+  %A = load i8* %P3
+  ret i8 %A
+; CHECK: @coerce_offset0
+; CHECK-NOT: load
+; CHECK: ret i8
+}
+
+;; non-local i32/float -> i8 load forwarding.
+define i8 @coerce_offset_nonlocal0(i32* %P, i1 %cond) {
+  %P2 = bitcast i32* %P to float*
+  %P3 = bitcast i32* %P to i8*
+  %P4 = getelementptr i8* %P3, i32 2
+  br i1 %cond, label %T, label %F
+T:
+  store i32 42, i32* %P
+  br label %Cont
+  
+F:
+  store float 1.0, float* %P2
+  br label %Cont
+
+Cont:
+  %A = load i8* %P4
+  ret i8 %A
+
+; CHECK: @coerce_offset_nonlocal0
+; CHECK: Cont:
+; CHECK:   %A = phi i8 [
+; CHECK-NOT: load
+; CHECK: ret i8 %A
+}
+
+
+;; non-local i32 -> i8 partial redundancy load forwarding.
+define i8 @coerce_offset_pre0(i32* %P, i1 %cond) {
+  %P3 = bitcast i32* %P to i8*
+  %P4 = getelementptr i8* %P3, i32 2
+  br i1 %cond, label %T, label %F
+T:
+  store i32 42, i32* %P
+  br label %Cont
+  
+F:
+  br label %Cont
+
+Cont:
+  %A = load i8* %P4
+  ret i8 %A
+
+; CHECK: @coerce_offset_pre0
+; CHECK: F:
+; CHECK:   load i8* %P4
+; CHECK: Cont:
+; CHECK:   %A = phi i8 [
+; CHECK-NOT: load
+; CHECK: ret i8 %A
+}
+
+define i32 @chained_load(i32** %p) {
+block1:
+  %z = load i32** %p
+	br i1 true, label %block2, label %block3
+
+block2:
+ %a = load i32** %p
+ br label %block4
+
+block3:
+  %b = load i32** %p
+  br label %block4
+
+block4:
+  %c = load i32** %p
+  %d = load i32* %c
+  ret i32 %d
+  
+; CHECK: @chained_load
+; CHECK: %z = load i32** %p
+; CHECK-NOT: load
+; CHECK: %d = load i32* %z
+; CHECK-NEXT: ret i32 %d
+}
+
+
+declare i1 @cond() readonly
+declare i1 @cond2() readonly
+
+define i32 @phi_trans2() {
+; CHECK: @phi_trans2
+entry:
+  %P = alloca i32, i32 400
+  br label %F1
+  
+F1:
+  %A = phi i32 [1, %entry], [2, %F]
+  %cond2 = call i1 @cond()
+  br i1 %cond2, label %T1, label %TY
+  
+T1:
+  %P2 = getelementptr i32* %P, i32 %A
+  %x = load i32* %P2
+  %cond = call i1 @cond2()
+  br i1 %cond, label %TX, label %F
+  
+F:
+  %P3 = getelementptr i32* %P, i32 2
+  store i32 17, i32* %P3
+  
+  store i32 42, i32* %P2  ; Provides "P[A]".
+  br label %F1
+
+TX:
+  ; This load should not be compiled to 'ret i32 42'.  An overly clever
+  ; implementation of GVN would see that we're returning 17 if the loop
+  ; executes once or 42 if it executes more than that, but we'd have to do
+  ; loop restructuring to expose this, and GVN shouldn't do this sort of CFG
+  ; transformation.
+  
+; CHECK: TX:
+; CHECK: ret i32 %x
+  ret i32 %x
+TY:
+  ret i32 0
+}
+
+define i32 @phi_trans3(i32* %p) {
+; CHECK: @phi_trans3
+block1:
+  br i1 true, label %block2, label %block3
+
+block2:
+ store i32 87, i32* %p
+ br label %block4
+
+block3:
+  %p2 = getelementptr i32* %p, i32 43
+  store i32 97, i32* %p2
+  br label %block4
+
+block4:
+  %A = phi i32 [-1, %block2], [42, %block3]
+  br i1 true, label %block5, label %exit
+  
+; CHECK: block4:
+; CHECK-NEXT: %D = phi i32 [ 87, %block2 ], [ 97, %block3 ]  
+; CHECK-NOT: load
+
+block5:
+  %B = add i32 %A, 1
+  br i1 true, label %block6, label %exit
+  
+block6:
+  %C = getelementptr i32* %p, i32 %B
+  br i1 true, label %block7, label %exit
+  
+block7:
+  %D = load i32* %C
+  ret i32 %D
+  
+; CHECK: block7:
+; CHECK-NEXT: ret i32 %D
+
+exit:
+  ret i32 -1
+}
+
+define i8 @phi_trans4(i8* %p) {
+; CHECK: @phi_trans4
+entry:
+  %X3 = getelementptr i8* %p, i32 192
+  store i8 192, i8* %X3
+  
+  %X = getelementptr i8* %p, i32 4
+  %Y = load i8* %X
+  br label %loop
+
+loop:
+  %i = phi i32 [4, %entry], [192, %loop]
+  %X2 = getelementptr i8* %p, i32 %i
+  %Y2 = load i8* %X2
+  
+; CHECK: loop:
+; CHECK-NEXT: %Y2 = phi i8 [ %Y, %entry ], [ 0, %loop ]
+; CHECK-NOT: load i8
+  
+  %cond = call i1 @cond2()
+
+  %Z = bitcast i8 *%X3 to i32*
+  store i32 0, i32* %Z
+  br i1 %cond, label %loop, label %out
+  
+out:
+  %R = add i8 %Y, %Y2
+  ret i8 %R
+}
+
+define i8 @phi_trans5(i8* %p) {
+; CHECK: @phi_trans5
+entry:
+  
+  %X4 = getelementptr i8* %p, i32 2
+  store i8 19, i8* %X4
+  
+  %X = getelementptr i8* %p, i32 4
+  %Y = load i8* %X
+  br label %loop
+
+loop:
+  %i = phi i32 [4, %entry], [3, %cont]
+  %X2 = getelementptr i8* %p, i32 %i
+  %Y2 = load i8* %X2  ; Ensure this load is not being incorrectly replaced.
+  %cond = call i1 @cond2()
+  br i1 %cond, label %cont, label %out
+
+cont:
+  %Z = getelementptr i8* %X2, i32 -1
+  %Z2 = bitcast i8 *%Z to i32*
+  store i32 50462976, i32* %Z2  ;; (1 << 8) | (2 << 16) | (3 << 24)
+
+
+; CHECK: store i32
+; CHECK-NEXT: getelementptr i8* %p, i32 3
+; CHECK-NEXT: load i8*
+  br label %loop
+  
+out:
+  %R = add i8 %Y, %Y2
+  ret i8 %R
+}
+
+
+; PR6642
+define i32 @memset_to_load() nounwind readnone {
+entry:
+  %x = alloca [256 x i32], align 4                ; <[256 x i32]*> [#uses=2]
+  %tmp = bitcast [256 x i32]* %x to i8*           ; <i8*> [#uses=1]
+  call void @llvm.memset.i64(i8* %tmp, i8 0, i64 1024, i32 4)
+  %arraydecay = getelementptr inbounds [256 x i32]* %x, i32 0, i32 0 ; <i32*>
+  %tmp1 = load i32* %arraydecay                   ; <i32> [#uses=1]
+  ret i32 %tmp1
+; CHECK: @memset_to_load
+; CHECK: ret i32 0
+}
+
diff --git a/final/test/Transforms/GlobalDCE/2002-07-17-CastRef.ll b/final/test/Transforms/GlobalDCE/2002-07-17-CastRef.ll
new file mode 100644
index 00000000000..37356f25948
--- /dev/null
+++ b/final/test/Transforms/GlobalDCE/2002-07-17-CastRef.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -globaldce
+;
+define internal void @func() {
+        ret void
+}
+
+define void @main() {
+        %X = bitcast void ()* @func to i32*             ; <i32*> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/Transforms/GlobalDCE/2002-07-17-ConstantRef.ll b/final/test/Transforms/GlobalDCE/2002-07-17-ConstantRef.ll
new file mode 100644
index 00000000000..740f7201a34
--- /dev/null
+++ b/final/test/Transforms/GlobalDCE/2002-07-17-ConstantRef.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -globaldce
+;
+
+@X = global void ()* @func              ; <void ()**> [#uses=0]
+
+; Not dead, can be reachable via X
+define internal void @func() {
+        ret void
+}
+
+define void @main() {
+        ret void
+}
diff --git a/final/test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll b/final/test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll
new file mode 100644
index 00000000000..766c227460e
--- /dev/null
+++ b/final/test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll
@@ -0,0 +1,17 @@
+; Make sure that functions are removed successfully if they are referred to by
+; a global that is dead.  Make sure any globals they refer to die as well.
+
+; RUN: opt < %s -globaldce -S | not grep foo
+
+;; Unused, kills %foo
+@b = internal global i32 ()* @foo               ; <i32 ()**> [#uses=0]
+
+;; Should die when function %foo is killed
+@foo.upgrd.1 = internal global i32 7            ; <i32*> [#uses=1]
+
+ ;; dies when %b dies.
+define internal i32 @foo() {
+        %ret = load i32* @foo.upgrd.1           ; <i32> [#uses=1]
+        ret i32 %ret
+}
+
diff --git a/final/test/Transforms/GlobalDCE/2002-08-17-WorkListTest.ll b/final/test/Transforms/GlobalDCE/2002-08-17-WorkListTest.ll
new file mode 100644
index 00000000000..42fcb1e004a
--- /dev/null
+++ b/final/test/Transforms/GlobalDCE/2002-08-17-WorkListTest.ll
@@ -0,0 +1,12 @@
+; This testcase tests that a worklist is being used, and that globals can be 
+; removed if they are the subject of a constexpr and ConstantPointerRef
+
+; RUN: opt < %s -globaldce -S | not grep global
+
+@t0 = internal global [4 x i8] c"foo\00"                ; <[4 x i8]*> [#uses=1]
+@t1 = internal global [4 x i8] c"bar\00"                ; <[4 x i8]*> [#uses=1]
+@s1 = internal global [1 x i8*] [ i8* getelementptr ([4 x i8]* @t0, i32 0, i32 0) ]             ; <[1 x i8*]*> [#uses=0]
+@s2 = internal global [1 x i8*] [ i8* getelementptr ([4 x i8]* @t1, i64 0, i64 0) ]             ; <[1 x i8*]*> [#uses=0]
+@b = internal global i32* @a            ; <i32**> [#uses=0]
+@a = internal global i32 7              ; <i32*> [#uses=1]
+
diff --git a/final/test/Transforms/GlobalDCE/2002-09-12-Redeletion.ll b/final/test/Transforms/GlobalDCE/2002-09-12-Redeletion.ll
new file mode 100644
index 00000000000..6221fa3a62f
--- /dev/null
+++ b/final/test/Transforms/GlobalDCE/2002-09-12-Redeletion.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -globaldce
+
+;; Should die when function %foo is killed
+@foo.upgrd.1 = internal global i32 7            ; <i32*> [#uses=3]
+@bar = internal global [2 x { i32*, i32 }] [ { i32*, i32 } { i32* @foo.upgrd.1, i32 7 }, { i32*, i32 } { i32* @foo.upgrd.1, i32 1 } ]            ; <[2 x { i32*, i32 }]*> [#uses=0]
+
+define internal i32 @foo() {
+        %ret = load i32* @foo.upgrd.1           ; <i32> [#uses=1]
+        ret i32 %ret
+}
+
diff --git a/final/test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll b/final/test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll
new file mode 100644
index 00000000000..738ec43aaeb
--- /dev/null
+++ b/final/test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll
@@ -0,0 +1,11 @@
+; distilled from 255.vortex
+; RUN: opt < %s -globaldce -S | not grep testfunc
+
+declare i1 ()* @getfunc()
+
+define internal i1 @testfunc() {
+        %F = call i1 ()* ()* @getfunc( )                ; <i1 ()*> [#uses=1]
+        %c = icmp eq i1 ()* %F, @testfunc               ; <i1> [#uses=1]
+        ret i1 %c
+}
+
diff --git a/final/test/Transforms/GlobalDCE/2003-10-09-PreserveWeakGlobals.ll b/final/test/Transforms/GlobalDCE/2003-10-09-PreserveWeakGlobals.ll
new file mode 100644
index 00000000000..5b2c97f0f85
--- /dev/null
+++ b/final/test/Transforms/GlobalDCE/2003-10-09-PreserveWeakGlobals.ll
@@ -0,0 +1,6 @@
+; Weak variables should be preserved by global DCE!
+
+; RUN: opt < %s -globaldce -S | grep @A
+
+
+@A = weak global i32 54
diff --git a/final/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll b/final/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
new file mode 100644
index 00000000000..6658cee1223
--- /dev/null
+++ b/final/test/Transforms/GlobalDCE/2009-01-05-DeadAliases.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -globaldce -S | not grep @D
+; RUN: opt < %s -globaldce -S | grep @L | count 3
+
+@A = global i32 0
+@D = alias internal i32* @A
+@L1 = alias i32* @A
+@L2 = alias internal i32* @L1
+@L3 = alias i32* @L2
diff --git a/final/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll b/final/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
new file mode 100644
index 00000000000..68933c6ef37
--- /dev/null
+++ b/final/test/Transforms/GlobalDCE/2009-02-17-AliasUsesAliasee.ll
@@ -0,0 +1,4 @@
+; RUN: opt < %s -globaldce
+
+@A = alias internal void ()* @F
+define internal void @F() { ret void }
diff --git a/final/test/Transforms/GlobalDCE/2009-09-03-MDNode.ll b/final/test/Transforms/GlobalDCE/2009-09-03-MDNode.ll
new file mode 100644
index 00000000000..29864f82528
--- /dev/null
+++ b/final/test/Transforms/GlobalDCE/2009-09-03-MDNode.ll
@@ -0,0 +1,264 @@
+; RUN: opt < %s -globaldce | llc -O0 -o /dev/null
+
+%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
+%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>" = type { i32 }
+%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* }
+%struct.pthread_attr_t = type { i64, [48 x i8] }
+%struct.pthread_mutex_t = type { %struct..0__pthread_mutex_s }
+
+@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once ; <i32 (i32*, void ()*)*> [#uses=0]
+@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific ; <i8* (i32)*> [#uses=0]
+@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific ; <i32 (i32, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create ; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i64)* @pthread_cancel ; <i32 (i64)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock ; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)* @pthread_mutex_init ; <i32 (%struct.pthread_mutex_t*, %"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create ; <i32 (i32*, void (i8*)*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete ; <i32 (i32)*> [#uses=0]
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)* @pthread_mutexattr_init ; <i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*, i32)* @pthread_mutexattr_settype ; <i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*, i32)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)* @pthread_mutexattr_destroy ; <i32 (%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)*> [#uses=0]
+
+define weak void @_ZN9__gnu_cxx26__aux_require_boolean_exprIbEEvRKT_(i8* %__t) {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !0)
+  tail call void @llvm.dbg.stoppoint(i32 240, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !0)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_19_ConvertibleConceptIjjEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !8)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !8)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPcEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !11)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !11)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKcEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !12)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !12)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPwEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !13)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !13)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKwEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !14)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !14)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPwEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !15)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !15)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPcEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !16)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !16)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIiEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !17)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !17)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIlEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !18)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !18)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIxEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !19)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !19)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIjEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !20)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !20)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIcSt11char_traitsIcEEcEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !21)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !21)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIwSt11char_traitsIwEEwEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !22)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !22)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPcEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !23)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !23)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKcEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !24)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !24)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKcSsEEEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !25)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !25)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPcSsEEEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !26)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !26)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKwSbIwSt11char_traitsIwESaIwEEEEEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !27)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !27)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPwSbIwSt11char_traitsIwESaIwEEEEEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !28)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !28)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPwEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !29)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !29)
+  ret void
+}
+
+define weak void @_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKwEEEEvv() {
+entry:
+  tail call void @llvm.dbg.func.start(metadata !30)
+  tail call void @llvm.dbg.stoppoint(i32 63, i32 0, metadata !2)
+  tail call void @llvm.dbg.region.end(metadata !30)
+  ret void
+}
+
+declare void @llvm.dbg.func.start(metadata) nounwind readnone
+
+declare void @llvm.dbg.stoppoint(i32, i32, metadata) nounwind readnone
+
+declare void @llvm.dbg.region.end(metadata) nounwind readnone
+
+declare extern_weak i32 @pthread_once(i32*, void ()*)
+
+declare extern_weak i8* @pthread_getspecific(i32)
+
+declare extern_weak i32 @pthread_setspecific(i32, i8*)
+
+declare extern_weak i32 @pthread_create(i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
+
+declare extern_weak i32 @pthread_cancel(i64)
+
+declare extern_weak i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
+
+declare extern_weak i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
+
+declare extern_weak i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
+
+declare extern_weak i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)
+
+declare extern_weak i32 @pthread_key_create(i32*, void (i8*)*)
+
+declare extern_weak i32 @pthread_key_delete(i32)
+
+declare extern_weak i32 @pthread_mutexattr_init(%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)
+
+declare extern_weak i32 @pthread_mutexattr_settype(%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*, i32)
+
+declare extern_weak i32 @pthread_mutexattr_destroy(%"struct.__gnu_cxx::_ConvertibleConcept<unsigned int,unsigned int>"*)
+
+!0 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__aux_require_boolean_expr<bool>", metadata !"__aux_require_boolean_expr<bool>", metadata !"_ZN9__gnu_cxx26__aux_require_boolean_exprIbEEvRKT_", metadata !2, i32 239, metadata !3, i1 false, i1 true}
+!1 = metadata !{i32 458769, i32 0, i32 4, metadata !"concept-inst.cc", metadata !"/home/buildbot/buildslave/llvm-x86_64-linux-selfhost/llvm-gcc.obj/x86_64-unknown-linux-gnu/libstdc++-v3/src/../../../../llvm-gcc.src/libstdc++-v3/src", metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build)", i1 true, i1 true, metadata !"", i32 0}
+!2 = metadata !{i32 458769, i32 0, i32 4, metadata !"boost_concept_check.h", metadata !"/home/buildbot/buildslave/llvm-x86_64-linux-selfhost/llvm-gcc.obj/x86_64-unknown-linux-gnu/libstdc++-v3/include/bits", metadata !"4.2.1 (Based on Apple Inc. build 5649) (LLVM build)", i1 false, i1 true, metadata !"", i32 0}
+!3 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0}
+!4 = metadata !{null, metadata !5}
+!5 = metadata !{i32 458768, metadata !1, metadata !"", metadata !1, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !6}
+!6 = metadata !{i32 458790, metadata !1, metadata !"", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !7}
+!7 = metadata !{i32 458788, metadata !1, metadata !"bool", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2}
+!8 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_ConvertibleConcept<unsigned int, unsigned int> >", metadata !"__function_requires<__gnu_cxx::_ConvertibleConcept<unsigned int, unsigned int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_19_ConvertibleConceptIjjEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!9 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !10, i32 0}
+!10 = metadata !{null}
+!11 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<char*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!12 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const char*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!13 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!14 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_InputIteratorConcept<const wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_21_InputIteratorConceptIPKwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!15 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!16 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<char*> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIPcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!17 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIiEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!18 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIlEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!19 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long long int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<long long int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIxEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!20 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<unsigned int> >", metadata !"__function_requires<__gnu_cxx::_LessThanComparableConcept<unsigned int> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_26_LessThanComparableConceptIjEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!21 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<char, std::char_traits<char> >, char> >", metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<char, std::char_traits<char> >, char> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIcSt11char_traitsIcEEcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!22 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<wchar_t, std::char_traits<wchar_t> >, wchar_t> >", metadata !"__function_requires<__gnu_cxx::_OutputIteratorConcept<std::ostreambuf_iterator<wchar_t, std::char_traits<wchar_t> >, wchar_t> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_22_OutputIteratorConceptISt19ostreambuf_iteratorIwSt11char_traitsIwEEwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!23 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<char*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!24 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const char*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const char*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKcEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!25 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKcSsEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!26 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<char*, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPcSsEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!27 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<const wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPKwSbIwSt11char_traitsIwESaIwEEEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!28 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<__gnu_cxx::__normal_iterator<wchar_t*, std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > > > >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptINS_17__normal_iteratorIPwSbIwSt11char_traitsIwESaIwEEEEEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!29 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
+!30 = metadata !{i32 458798, i32 0, metadata !1, metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const wchar_t*> >", metadata !"__function_requires<__gnu_cxx::_RandomAccessIteratorConcept<const wchar_t*> >", metadata !"_ZN9__gnu_cxx19__function_requiresINS_28_RandomAccessIteratorConceptIPKwEEEEvv", metadata !2, i32 61, metadata !9, i1 false, i1 true}
diff --git a/final/test/Transforms/GlobalDCE/basicvariabletest.ll b/final/test/Transforms/GlobalDCE/basicvariabletest.ll
new file mode 100644
index 00000000000..a97b66de2c6
--- /dev/null
+++ b/final/test/Transforms/GlobalDCE/basicvariabletest.ll
@@ -0,0 +1,5 @@
+; RUN: opt < %s -globaldce -S | not grep global
+
+@X = external global i32
+@Y = internal global i32 7
+
diff --git a/final/test/Transforms/GlobalDCE/dg.exp b/final/test/Transforms/GlobalDCE/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/GlobalDCE/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/GlobalDCE/externally_available.ll b/final/test/Transforms/GlobalDCE/externally_available.ll
new file mode 100644
index 00000000000..cc88cb10dcc
--- /dev/null
+++ b/final/test/Transforms/GlobalDCE/externally_available.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -globaldce -S | not grep test_
+
+; test_function should not be emitted to the .s file.
+define available_externally i32 @test_function() {
+  ret i32 4
+}
+
+; test_global should not be emitted to the .s file.
+@test_global = available_externally global i32 4
+
diff --git a/final/test/Transforms/GlobalOpt/2004-10-10-CastStoreOnce.ll b/final/test/Transforms/GlobalOpt/2004-10-10-CastStoreOnce.ll
new file mode 100644
index 00000000000..bdcf1fa4778
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2004-10-10-CastStoreOnce.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -globalopt
+
+@V = global float 1.200000e+01          ; <float*> [#uses=1]
+@G = internal global i32* null          ; <i32**> [#uses=2]
+
+define i32 @user() {
+        %P = load i32** @G              ; <i32*> [#uses=1]
+        %Q = load i32* %P               ; <i32> [#uses=1]
+        ret i32 %Q
+}
+
+define void @setter() {
+        %Vi = bitcast float* @V to i32*         ; <i32*> [#uses=1]
+        store i32* %Vi, i32** @G
+        ret void
+}
+
diff --git a/final/test/Transforms/GlobalOpt/2005-06-15-LocalizeConstExprCrash.ll b/final/test/Transforms/GlobalOpt/2005-06-15-LocalizeConstExprCrash.ll
new file mode 100644
index 00000000000..7bcb1d43091
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2005-06-15-LocalizeConstExprCrash.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -globalopt -disable-output
+; PR579
+
+@g_40507551 = internal global i16 31038         ; <i16*> [#uses=1]
+
+define void @main() {
+        %tmp.4.i.1 = load i8* getelementptr (i8* bitcast (i16* @g_40507551 to i8*), i32 1)              ; <i8> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/Transforms/GlobalOpt/2005-09-27-Crash.ll b/final/test/Transforms/GlobalOpt/2005-09-27-Crash.ll
new file mode 100644
index 00000000000..ab2077a43c6
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2005-09-27-Crash.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -globalopt -disable-output
+        %RPyString = type { i32, %arraytype.Char }
+        %arraytype.Char = type { i32, [0 x i8] }
+        %arraytype.Signed = type { i32, [0 x i32] }
+        %functiontype.1 = type %RPyString* (i32)
+        %structtype.test = type { i32, %arraytype.Signed }
+@structinstance.test = internal global { i32, { i32, [2 x i32] } } { i32 41, { i32, [2 x i32] } { i32 2, [2 x i32] [ i32 100, i32 101 ] } }              ; <{ i32, { i32, [2 x i32] } }*> [#uses=1]
+
+define fastcc void @pypy_array_constant() {
+block0:
+        %tmp.9 = getelementptr %structtype.test* bitcast ({ i32, { i32, [2 x i32] } }* @structinstance.test to %structtype.test*), i32 0, i32 0          ; <i32*> [#uses=0]
+        ret void
+}
+
+define fastcc void @new.varsizestruct.rpy_string() {
+        unreachable
+}
+
+define void @__entrypoint__pypy_array_constant() {
+        call fastcc void @pypy_array_constant( )
+        ret void
+}
+
+define void @__entrypoint__raised_LLVMException() {
+        ret void
+}
+
diff --git a/final/test/Transforms/GlobalOpt/2006-07-07-InlineAsmCrash.ll b/final/test/Transforms/GlobalOpt/2006-07-07-InlineAsmCrash.ll
new file mode 100644
index 00000000000..c9712198ce7
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2006-07-07-InlineAsmCrash.ll
@@ -0,0 +1,135 @@
+; RUN: opt < %s -globalopt -disable-output
+; PR820
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct..0FileDescriptor = type { i32 }
+	%"struct.FlagDescription<int32>" = type { i8*, i32*, i1, i1, i32, i8* }
+	%"struct.FlagRegisterer<bool>" = type { i8 }
+	%struct.MutexLock = type { %struct..0FileDescriptor* }
+	%"struct.std::DisabledRangeMap" = type { %"struct.std::_Rb_tree<const char*,std::pair<const char* const, FlagDescription<bool> >,std::_Select1st<std::pair<const char* const, FlagDescription<bool> > >,StringCmp,std::allocator<std::pair<const char* const, FlagDescription<bool> > > >" }
+	%"struct.std::_Rb_tree<const char*,std::pair<const char* const, FlagDescription<bool> >,std::_Select1st<std::pair<const char* const, FlagDescription<bool> > >,StringCmp,std::allocator<std::pair<const char* const, FlagDescription<bool> > > >" = type { %"struct.std::_Rb_tree<const char*,std::pair<const char* const, FlagDescription<bool> >,std::_Select1st<std::pair<const char* const, FlagDescription<bool> > >,StringCmp,std::allocator<std::pair<const char* const, FlagDescription<bool> > > >::_Rb_tree_impl<StringCmp,false>" }
+	%"struct.std::_Rb_tree<const char*,std::pair<const char* const, FlagDescription<bool> >,std::_Select1st<std::pair<const char* const, FlagDescription<bool> > >,StringCmp,std::allocator<std::pair<const char* const, FlagDescription<bool> > > >::_Rb_tree_impl<StringCmp,false>" = type { %"struct.FlagRegisterer<bool>", %"struct.std::_Rb_tree_node_base", i32 }
+	%"struct.std::_Rb_tree_const_iterator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" = type { %"struct.std::_Rb_tree_node_base"* }
+	%"struct.std::_Rb_tree_node_base" = type { i32, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }
+	%"struct.std::_Vector_base<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" }
+	%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
+	%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
+@registry_lock = external global %struct..0FileDescriptor		; <%struct..0FileDescriptor*> [#uses=0]
+@_ZN61FLAG__foo_int32_44FLAGS_E = external global %"struct.FlagRegisterer<bool>"		; <%"struct.FlagRegisterer<bool>"*> [#uses=0]
+@llvm.global_ctors = appending global [20 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN62FLAG__foo_string_10FLAGS_E }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN60FLAG__foo_bool_19FLAGS_E }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZNK5Bzh4Enum13is_contiguousEv }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN62FLAG__foo_string_17FLAGS_E }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN61FLAG__foo_int32_21FLAGS_E }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN7ScannerC2Ev }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__Z11StripStringPSsPKcc }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZNK9__gnu_cxx4hashI11StringPieceEclERKS1_ }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN8Hasher325ResetEj }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__Z25ACLRv }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN61FLAG__foo_int64_25FLAGS_E }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN61FLAG__foo_int32_7FLAGS_E }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN62FLAG__foo_string_18FLAGS_E }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN62FLAG__foo_string_17FLAGS_E }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN61FLAG__foo_int32_25FLAGS_E }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_eventbuf }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN61FLAG__foo_int32_26FLAGS_E }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN62FLAG__foo_string_16FLAGS_E }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__ZN17InitializerC2EPKcS1_PFvvE }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__checker_bcad_variable } ]		; <[20 x { i32, void ()* }]*> [#uses=0]
+
+declare void @_GLOBAL__I__ZN62FLAG__foo_string_10FLAGS_E()
+
+declare void @_GLOBAL__I__ZN60FLAG__foo_bool_19FLAGS_E()
+
+declare void @_GLOBAL__I__ZNK5Bzh4Enum13is_contiguousEv()
+
+declare void @_GLOBAL__I__ZN62FLAG__foo_string_17FLAGS_E()
+
+declare void @_GLOBAL__I__ZN61FLAG__foo_int32_21FLAGS_E()
+
+define void @_ZN14FlagRegistererIiEC1EPKcRK15FlagDescriptionIiE() {
+entry:
+	call void @_Z12RegisterFlagIiEvPKcRK15FlagDescriptionIT_E( )
+	ret void
+}
+
+define void @_Z12RegisterFlagIiEvPKcRK15FlagDescriptionIT_E() {
+entry:
+	call void @_ZN9MutexLockC1EP5Mutex( )
+	ret void
+}
+
+declare void @_GLOBAL__I__ZN7ScannerC2Ev()
+
+declare void @_GLOBAL__I__Z11StripStringPSsPKcc()
+
+define void @_ZNSt6vectorIiSaIiEEC1ERKS0_() {
+entry:
+	unreachable
+}
+
+declare void @_GLOBAL__I__ZNK9__gnu_cxx4hashI11StringPieceEclERKS1_()
+
+declare void @_GLOBAL__I__ZN8Hasher325ResetEj()
+
+declare void @_GLOBAL__I__Z25ACLRv()
+
+define void @_ZN9MutexLockC1EP5Mutex() {
+entry:
+	call void @_ZN5Mutex4LockEv( )
+	ret void
+}
+
+define void @_ZN5Mutex4LockEv() {
+entry:
+	call void @_Z22Acquire_CASPViii( )
+	ret void
+}
+
+define void @_ZNSt3mapIPKc15FlagDescriptionIiE9StringCmpSaISt4pairIKS1_S3_EEE3endEv(%"struct.std::_Rb_tree_const_iterator<std::basic_string<char, std::char_traits<char>, std::allocator<char> > >"* sret  %agg.result) {
+entry:
+	unreachable
+}
+
+declare void @_GLOBAL__I__ZN61FLAG__foo_int64_25FLAGS_E()
+
+define void @_Z14CASPViii() {
+entry:
+	%tmp3 = call i32 asm sideeffect "lock; cmpxchg $1,$2", "={ax},q,m,0,~{dirflag},~{fpsr},~{flags},~{memory}"( i32 0, i32* null, i32 0 )		; <i32> [#uses=0]
+	unreachable
+}
+
+declare void @_GLOBAL__I__ZN61FLAG__foo_int32_7FLAGS_E()
+
+declare void @_GLOBAL__I__ZN62FLAG__foo_string_18FLAGS_E()
+
+define void @_Z22Acquire_CASPViii() {
+entry:
+	call void @_Z14CASPViii( )
+	unreachable
+}
+
+declare void @_GLOBAL__I__ZN61FLAG__foo_int32_25FLAGS_E()
+
+declare void @_GLOBAL__I_eventbuf()
+
+define void @_GLOBAL__I__ZN61FLAG__foo_int32_26FLAGS_E() {
+entry:
+	call void @_Z41__static_initialization_and_destruction_0ii1662( i32 1, i32 65535 )
+	ret void
+}
+
+define void @_Z41__static_initialization_and_destruction_0ii1662(i32 %__initialize_p, i32 %__priority) {
+entry:
+	%__initialize_p_addr = alloca i32		; <i32*> [#uses=2]
+	%__priority_addr = alloca i32		; <i32*> [#uses=2]
+	store i32 %__initialize_p, i32* %__initialize_p_addr
+	store i32 %__priority, i32* %__priority_addr
+	%tmp = load i32* %__priority_addr		; <i32> [#uses=1]
+	%tmp.upgrd.1 = icmp eq i32 %tmp, 65535		; <i1> [#uses=1]
+	br i1 %tmp.upgrd.1, label %cond_true, label %cond_next14
+
+cond_true:		; preds = %entry
+	%tmp8 = load i32* %__initialize_p_addr		; <i32> [#uses=1]
+	%tmp9 = icmp eq i32 %tmp8, 1		; <i1> [#uses=1]
+	br i1 %tmp9, label %cond_true10, label %cond_next14
+
+cond_true10:		; preds = %cond_true
+	call void @_ZN14FlagRegistererIiEC1EPKcRK15FlagDescriptionIiE( )
+	ret void
+
+cond_next14:		; preds = %cond_true, %entry
+	ret void
+}
+
+declare void @_GLOBAL__I__ZN62FLAG__foo_string_16FLAGS_E()
+
+define void @_ZN9__gnu_cxx13new_allocatorIPNS_15_Hashtable_nodeIjEEEC2Ev() {
+entry:
+	unreachable
+}
+
+declare void @_GLOBAL__I__ZN17InitializerC2EPKcS1_PFvvE()
+
+declare void @_GLOBAL__I__checker_bcad_variable()
diff --git a/final/test/Transforms/GlobalOpt/2006-11-01-ShrinkGlobalPhiCrash.ll b/final/test/Transforms/GlobalOpt/2006-11-01-ShrinkGlobalPhiCrash.ll
new file mode 100644
index 00000000000..352639ac067
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2006-11-01-ShrinkGlobalPhiCrash.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -globalopt -disable-output
+
+        %struct._list = type { i32*, %struct._list* }
+        %struct._play = type { i32, i32*, %struct._list*, %struct._play* }
+@nrow = internal global i32 0           ; <i32*> [#uses=2]
+
+define void @make_play() {
+entry:
+        br label %cond_true16.i
+
+cond_true16.i:          ; preds = %cond_true16.i, %entry
+        %low.0.in.i.0 = phi i32* [ @nrow, %entry ], [ null, %cond_true16.i ]            ; <i32*> [#uses=1]
+        %low.0.i = load i32* %low.0.in.i.0              ; <i32> [#uses=0]
+        br label %cond_true16.i
+}
+
+define void @make_wanted() {
+entry:
+        unreachable
+}
+
+define void @get_good_move() {
+entry:
+        ret void
+}
+
+define void @main() {
+entry:
+        store i32 8, i32* @nrow
+        tail call void @make_play( )
+        ret void
+}
+
diff --git a/final/test/Transforms/GlobalOpt/2007-04-05-Crash.ll b/final/test/Transforms/GlobalOpt/2007-04-05-Crash.ll
new file mode 100644
index 00000000000..d306d147824
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2007-04-05-Crash.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -globalopt -disable-output
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "thumb-apple-darwin8"
+@replacementUnichar = internal global i16 -3		; <i16*> [#uses=2]
+@"L_OBJC_IMAGE_INFO" = internal global [2 x i32] zeroinitializer		; <[2 x i32]*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [ i8* bitcast ([2 x i32]* @"L_OBJC_IMAGE_INFO" to i8*) ]		; <[1 x i8*]*> [#uses=0]
+
+define i16 @__NSCharToUnicharCFWrapper(i8 zeroext  %ch) zeroext  {
+entry:
+	%iftmp.0.0.in.in = select i1 false, i16* @replacementUnichar, i16* null		; <i16*> [#uses=1]
+	%iftmp.0.0.in = load i16* %iftmp.0.0.in.in		; <i16> [#uses=1]
+	ret i16 %iftmp.0.0.in
+}
+
+define void @__NSASCIICharToUnichar() {
+entry:
+	ret void
+}
+
+define void @_NSDefaultCStringEncoding() {
+entry:
+	call void @__NSSetCStringCharToUnichar( )
+	br i1 false, label %cond_true6, label %cond_next8
+
+cond_true6:		; preds = %entry
+	store i16 -2, i16* @replacementUnichar
+	ret void
+
+cond_next8:		; preds = %entry
+	ret void
+}
+
+declare void @__NSSetCStringCharToUnichar()
diff --git a/final/test/Transforms/GlobalOpt/2007-05-13-Crash.ll b/final/test/Transforms/GlobalOpt/2007-05-13-Crash.ll
new file mode 100644
index 00000000000..57039093d1e
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2007-05-13-Crash.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s  -globalopt -disable-output
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+        %struct.SFLMutableListItem = type { i16 }
+        %struct.__CFDictionary = type opaque
+        %struct.__CFString = type opaque
+        %struct.__builtin_CFString = type { i32*, i32, i8*, i32 }
+@_ZZ19SFLGetVisibilityKeyvE19_kSFLLVisibilityKey = internal global %struct.__CFString* null             ; <%struct.__CFString**> [#uses=2]
+@_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey = internal global %struct.__CFString* null               ; <%struct.__CFString**> [#uses=7]
+internal constant %struct.__builtin_CFString {
+    i32* getelementptr ([0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0), 
+    i32 1992, 
+    i8* getelementptr ([14 x i8]* @.str, i32 0, i32 0), 
+    i32 13 }, section "__DATA,__cfstring"               ; <%struct.__builtin_CFString*>:0 [#uses=1]
+@__CFConstantStringClassReference = external global [0 x i32]           ; <[0 x i32]*> [#uses=1]
+@.str = internal constant [14 x i8] c"AlwaysVisible\00"         ; <[14 x i8]*> [#uses=1]
+@_ZZ21SFLGetNeverVisibleKeyvE21_kSFLLNeverVisibleKey = internal global %struct.__CFString* null         ; <%struct.__CFString**> [#uses=2]
+
+define %struct.__CFString* @_Z19SFLGetVisibilityKeyv() {
+entry:
+        %tmp1 = load %struct.__CFString** @_ZZ19SFLGetVisibilityKeyvE19_kSFLLVisibilityKey              ; <%struct.__CFString*> [#uses=1]
+        ret %struct.__CFString* %tmp1
+}
+
+define %struct.__CFString* @_Z22SFLGetAlwaysVisibleKeyv() {
+entry:
+        %tmp1 = load %struct.__CFString** @_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey                ; <%struct.__CFString*> [#uses=1]
+        %tmp2 = icmp eq %struct.__CFString* %tmp1, null         ; <i1> [#uses=1]
+        br i1 %tmp2, label %cond_true, label %cond_next
+
+cond_true:              ; preds = %entry
+        store %struct.__CFString* bitcast (%struct.__builtin_CFString* @0 to %struct.__CFString*), %struct.__CFString** @_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey
+        br label %cond_next
+
+cond_next:              ; preds = %entry, %cond_true
+        %tmp4 = load %struct.__CFString** @_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey                ; <%struct.__CFString*> [#uses=1]
+        ret %struct.__CFString* %tmp4
+}
+
+define %struct.__CFString* @_Z21SFLGetNeverVisibleKeyv() {
+entry:
+        %tmp1 = load %struct.__CFString** @_ZZ21SFLGetNeverVisibleKeyvE21_kSFLLNeverVisibleKey          ; <%struct.__CFString*> [#uses=1]
+        ret %struct.__CFString* %tmp1
+}
+
+define %struct.__CFDictionary* @_ZN18SFLMutableListItem18GetPrefsDictionaryEv(%struct.SFLMutableListItem* %this) {
+entry:
+        %tmp4 = getelementptr %struct.SFLMutableListItem* %this, i32 0, i32 0  ; <i16*> [#uses=1]
+        %tmp5 = load i16* %tmp4         ; <i16> [#uses=1]
+        %tmp6 = icmp eq i16 %tmp5, 0            ; <i1> [#uses=1]
+        br i1 %tmp6, label %cond_next22, label %cond_true
+
+cond_true:              ; preds = %entry
+        %tmp9 = load %struct.__CFString** @_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey                ; <%struct.__CFString*> [#uses=1]
+        %tmp10 = icmp eq %struct.__CFString* %tmp9, null                ; <i1> [#uses=1]
+        br i1 %tmp10, label %cond_true13, label %cond_next22
+
+cond_true13:            ; preds = %cond_true
+        store %struct.__CFString* bitcast (%struct.__builtin_CFString* @0 to %struct.__CFString*), %struct.__CFString** @_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey
+        br label %cond_next22
+
+cond_next22:            ; preds = %entry, %cond_true13, %cond_true
+        %iftmp.1.0.in = phi %struct.__CFString** [ @_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey, %cond_true ], [ @_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey, %cond_true13 ], [ @_ZZ21SFLGetNeverVisibleKeyvE21_kSFLLNeverVisibleKey, %entry ]             ; <%struct.__CFString**> [#uses=1]
+        %iftmp.1.0 = load %struct.__CFString** %iftmp.1.0.in            ; <%struct.__CFString*> [#uses=1]
+        %tmp24 = load %struct.__CFString** @_ZZ19SFLGetVisibilityKeyvE19_kSFLLVisibilityKey             ; <%struct.__CFString*> [#uses=1]
+        %tmp2728 = bitcast %struct.__CFString* %tmp24 to i8*            ; <i8*> [#uses=1]
+        %tmp2930 = bitcast %struct.__CFString* %iftmp.1.0 to i8*               ; <i8*> [#uses=1]
+        call void @_Z20CFDictionaryAddValuePKvS0_( i8* %tmp2728, i8* %tmp2930 )
+        ret %struct.__CFDictionary* undef
+}
+
+declare void @_Z20CFDictionaryAddValuePKvS0_(i8*, i8*)
+
diff --git a/final/test/Transforms/GlobalOpt/2007-06-04-PackedStruct.ll b/final/test/Transforms/GlobalOpt/2007-06-04-PackedStruct.ll
new file mode 100644
index 00000000000..7036c158ba6
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2007-06-04-PackedStruct.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -globalopt -disable-output
+; PR1491
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+	%"struct.__gnu_cxx::new_allocator<std::_Rb_tree_node<std::pair<const int, int> > >" = type <{ i8 }>
+	%"struct.std::_Rb_tree<int,std::pair<const int, int>,std::_Select1st<std::pair<const int, int> >,std::less<int>,std::allocator<std::pair<const int, int> > >" = type { %"struct.std::_Rb_tree<int,std::pair<const int, int>,std::_Select1st<std::pair<const int, int> >,std::less<int>,std::allocator<std::pair<const int, int> > >::_Rb_tree_impl<std::less<int>,false>" }
+	%"struct.std::_Rb_tree<int,std::pair<const int, int>,std::_Select1st<std::pair<const int, int> >,std::less<int>,std::allocator<std::pair<const int, int> > >::_Rb_tree_impl<std::less<int>,false>" = type { %"struct.__gnu_cxx::new_allocator<std::_Rb_tree_node<std::pair<const int, int> > >", %"struct.std::_Rb_tree_node_base", i32 }
+	%"struct.std::_Rb_tree_node_base" = type { i32, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }
+	%"struct.std::map<int,int,std::less<int>,std::allocator<std::pair<const int, int> > >" = type { %"struct.std::_Rb_tree<int,std::pair<const int, int>,std::_Select1st<std::pair<const int, int> >,std::less<int>,std::allocator<std::pair<const int, int> > >" }
+@someMap = global %"struct.std::map<int,int,std::less<int>,std::allocator<std::pair<const int, int> > >" zeroinitializer		; <%"struct.std::map<int,int,std::less<int>,std::allocator<std::pair<const int, int> > >"*> [#uses=1]
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_someMap } ]		; <[1 x { i32, void ()* }]*> [#uses=0]
+@llvm.global_dtors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @_GLOBAL__D_someMap } ]		; <[1 x { i32, void ()* }]*> [#uses=0]
+
+define void @_GLOBAL__I_someMap() {
+entry:
+	call void @_Z41__static_initialization_and_destruction_0ii( i32 1, i32 65535 )
+	ret void
+}
+
+declare void @_GLOBAL__D_someMap()
+
+define void @_Z41__static_initialization_and_destruction_0ii(i32 %__initialize_p, i32 %__priority) {
+entry:
+	%tmp1 = icmp eq i32 %__priority, 65535		; <i1> [#uses=1]
+	%tmp4 = icmp eq i32 %__initialize_p, 1		; <i1> [#uses=1]
+	%tmp7 = and i1 %tmp1, %tmp4		; <i1> [#uses=1]
+	br i1 %tmp7, label %cond_true, label %cond_next
+
+cond_true:		; preds = %entry
+	store i8 0, i8* getelementptr (%"struct.std::map<int,int,std::less<int>,std::allocator<std::pair<const int, int> > >"* @someMap, i32 0, i32 0, i32 0, i32 0, i32 0)
+	ret void
+
+cond_next:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/GlobalOpt/2007-11-09-GEP-GEP-Crash.ll b/final/test/Transforms/GlobalOpt/2007-11-09-GEP-GEP-Crash.ll
new file mode 100644
index 00000000000..442cb921d8a
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2007-11-09-GEP-GEP-Crash.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -globalopt -disable-output
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin8"
+        %struct.empty0 = type {  }
+        %struct.es = type { %struct.empty0 }
+        %struct.es1 = type { %struct.empty0 }
+@aaui1 = internal global [6 x [2 x i32]] [ [2 x i32] [ i32 1, i32 1 ], [2 x i32] [ i32 1, i32 1 ], [2 x i32] [ i32 1, i32 1 ], [2 x i32] [ i32 1, i32 1 ], [2 x i32] [ i32 1, i32 1 ], [2 x i32] [ i32 1, i32 1 ] ]              ; <[6 x [2 x i32]]*> [#uses=1]
+@aaui0 = internal global [0 x [2 x i32]] zeroinitializer                ; <[0 x [2 x i32]]*> [#uses=1]
+
+define i8 @func() {
+entry:
+        %tmp10 = getelementptr [2 x i32]* getelementptr ([6 x [2 x i32]]* @aaui1, i32 0, i32 0), i32 5, i32 1           ; <i32*> [#uses=1]
+        %tmp11 = load i32* %tmp10, align 4              ; <i32> [#uses=1]
+        %tmp12 = call i32 (...)* @func3( i32* null, i32 0, i32 %tmp11 )         ; <i32> [#uses=0]
+        ret i8 undef
+}
+
+declare i32 @func3(...)
+
diff --git a/final/test/Transforms/GlobalOpt/2008-01-03-Crash.ll b/final/test/Transforms/GlobalOpt/2008-01-03-Crash.ll
new file mode 100644
index 00000000000..4105ab1ed5b
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2008-01-03-Crash.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -globalopt | llvm-dis
+; PR1896
+
+@indirect1 = internal global void (i32)* null		; <void (i32)**> [#uses=2]
+
+declare void @indirectmarked(i32)
+
+define i32 @main() {
+entry:
+	br i1 false, label %cond_next20.i, label %cond_true.i9
+
+cond_true.i9:		; preds = %entry
+	ret i32 0
+
+cond_next20.i:		; preds = %entry
+	store void (i32)* @indirectmarked, void (i32)** @indirect1, align 4
+	br i1 false, label %cond_next21.i.i23.i, label %stack_restore
+
+stack_restore:		; preds = %cond_next20.i
+	ret i32 0
+
+cond_next21.i.i23.i:		; preds = %cond_next20.i
+	%tmp6.i4.i = load i32* bitcast (void (i32)** @indirect1 to i32*), align 4		; <i32> [#uses=0]
+	ret i32 0
+}
+
diff --git a/final/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll b/final/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll
new file mode 100644
index 00000000000..82abc8fe546
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -globalopt -S | grep {16 x .31 x double.. zeroinitializer}
+
+; The 'X' indices could be larger than 31.  Do not SROA the outer indices of this array.
+@mm = internal global [16 x [31 x double]] zeroinitializer, align 32
+
+define void @test(i32 %X) {
+	%P = getelementptr [16 x [31 x double]]* @mm, i32 0, i32 0, i32 %X
+	store double 1.0, double* %P
+	ret void
+}
+
+define double @get(i32 %X) {
+	%P = getelementptr [16 x [31 x double]]* @mm, i32 0, i32 0, i32 %X
+	%V = load double* %P
+	ret double %V
+}
diff --git a/final/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll b/final/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
new file mode 100644
index 00000000000..0c817005c27
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -globalopt -S | grep {volatile load}
+@t0.1441 = internal global double 0x3FD5555555555555, align 8		; <double*> [#uses=1]
+
+define double @foo() nounwind  {
+entry:
+	%tmp1 = volatile load double* @t0.1441, align 8		; <double> [#uses=2]
+	%tmp4 = fmul double %tmp1, %tmp1		; <double> [#uses=1]
+	ret double %tmp4
+}
diff --git a/final/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll b/final/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll
new file mode 100644
index 00000000000..0e70c49adf1
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2008-02-16-NestAttr.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -globalopt -S | grep { nest } | count 1
+	%struct.FRAME.nest = type { i32, i32 (i32)* }
+	%struct.__builtin_trampoline = type { [10 x i8] }
+@.str = internal constant [7 x i8] c"%d %d\0A\00"		; <[7 x i8]*> [#uses=1]
+
+define i32 @process(i32 (i32)* %func) nounwind  {
+entry:
+	%tmp2 = tail call i32 %func( i32 1 ) nounwind 		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define internal fastcc i32 @g.1478(%struct.FRAME.nest* nest  %CHAIN.1, i32 %m) nounwind  {
+entry:
+	%tmp3 = getelementptr %struct.FRAME.nest* %CHAIN.1, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp4 = load i32* %tmp3, align 4		; <i32> [#uses=1]
+	%tmp7 = icmp eq i32 %tmp4, %m		; <i1> [#uses=1]
+	%tmp78 = zext i1 %tmp7 to i32		; <i32> [#uses=1]
+	ret i32 %tmp78
+}
+
+define internal i32 @f.1481(%struct.FRAME.nest* nest  %CHAIN.2, i32 %m) nounwind  {
+entry:
+	%tmp4 = tail call fastcc i32 @g.1478( %struct.FRAME.nest* nest  %CHAIN.2, i32 %m ) nounwind 		; <i32> [#uses=1]
+	%tmp6 = getelementptr %struct.FRAME.nest* %CHAIN.2, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp7 = load i32* %tmp6, align 4		; <i32> [#uses=1]
+	%tmp9 = icmp eq i32 %tmp4, %tmp7		; <i1> [#uses=1]
+	%tmp910 = zext i1 %tmp9 to i32		; <i32> [#uses=1]
+	ret i32 %tmp910
+}
+
+define i32 @nest(i32 %n) nounwind  {
+entry:
+	%TRAMP.316 = alloca [10 x i8]		; <[10 x i8]*> [#uses=1]
+	%FRAME.0 = alloca %struct.FRAME.nest		; <%struct.FRAME.nest*> [#uses=3]
+	%TRAMP.316.sub = getelementptr [10 x i8]* %TRAMP.316, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 %n, i32* %tmp3, align 8
+	%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8*		; <i8*> [#uses=1]
+	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.316.sub, i8* bitcast (i32 (%struct.FRAME.nest*, i32)* @f.1481 to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
+	%tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (i32)**> [#uses=1]
+	%tmp89 = bitcast i8* %tramp to i32 (i32)*		; <i32 (i32)*> [#uses=2]
+	store i32 (i32)* %tmp89, i32 (i32)** %tmp7, align 4
+	%tmp13 = call i32 @process( i32 (i32)* %tmp89 ) nounwind 		; <i32> [#uses=1]
+	ret i32 %tmp13
+}
+
+declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind 
+
+define i32 @main() nounwind  {
+entry:
+	%tmp = tail call i32 @nest( i32 2 ) nounwind 		; <i32> [#uses=1]
+	%tmp1 = tail call i32 @nest( i32 1 ) nounwind 		; <i32> [#uses=1]
+	%tmp3 = tail call i32 (i8*, ...)* @printf( i8* noalias  getelementptr ([7 x i8]* @.str, i32 0, i32 0), i32 %tmp1, i32 %tmp ) nounwind 		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @printf(i8*, ...) nounwind 
diff --git a/final/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll b/final/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
new file mode 100644
index 00000000000..5b06fea5d92
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
@@ -0,0 +1,32 @@
+; Verify that when @G is SROA'd that the new globals have correct 
+; alignments.  Elements 0 and 2 must be 16-byte aligned, and element 
+; 1 must be at least 8 byte aligned (but could be more). 
+
+; RUN: opt < %s -globalopt -S | grep {@G.0 = internal unnamed_addr global .*align 16}
+; RUN: opt < %s -globalopt -S | grep {@G.1 = internal unnamed_addr global .*align 8}
+; RUN: opt < %s -globalopt -S | grep {@G.2 = internal unnamed_addr global .*align 16}
+; rdar://5891920
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin8"
+
+%T = type { double, double, double }
+
+@G = internal global %T zeroinitializer, align 16
+
+
+define void @test() {
+  store double 1.0, double* getelementptr (%T* @G, i32 0, i32 0), align 16
+  store double 2.0, double* getelementptr (%T* @G, i32 0, i32 1), align 8
+  store double 3.0, double* getelementptr (%T* @G, i32 0, i32 2), align 16
+  ret void
+}
+
+define double @test2() {
+  %V1 = load double* getelementptr (%T* @G, i32 0, i32 0), align 16
+  %V2 = load double* getelementptr (%T* @G, i32 0, i32 1), align 8
+  %V3 = load double* getelementptr (%T* @G, i32 0, i32 2), align 16
+  %R = fadd double %V1, %V2
+  %R2 = fadd double %R, %V3
+  ret double %R2
+}
diff --git a/final/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll b/final/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
new file mode 100644
index 00000000000..390e77a8cea
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
@@ -0,0 +1,28 @@
+; This test lets globalopt split the global struct and array into different
+; values. This used to crash, because globalopt forgot to put the new var in the
+; same address space as the old one.
+
+; RUN: opt < %s -globalopt -S > %t
+; Check that the new global values still have their address space
+; RUN: cat %t | grep addrspace.*global
+
+@struct = internal addrspace(1) global { i32, i32 } zeroinitializer
+@array = internal addrspace(1) global [ 2 x i32 ] zeroinitializer 
+
+define i32 @foo() {
+  %A = load i32 addrspace(1) * getelementptr ({ i32, i32 } addrspace(1) * @struct, i32 0, i32 0)
+  %B = load i32 addrspace(1) * getelementptr ([ 2 x i32 ] addrspace(1) * @array, i32 0, i32 0)
+  ; Use the loaded values, so they won't get removed completely
+  %R = add i32 %A, %B
+  ret i32 %R
+}
+
+; We put stores in a different function, so that the global variables won't get
+; optimized away completely.
+define void @bar(i32 %R) {
+  store i32 %R, i32 addrspace(1) * getelementptr ([ 2 x i32 ] addrspace(1) * @array, i32 0, i32 0)
+  store i32 %R, i32 addrspace(1) * getelementptr ({ i32, i32 } addrspace(1) * @struct, i32 0, i32 0)
+  ret void
+}
+
+
diff --git a/final/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll b/final/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll
new file mode 100644
index 00000000000..3242e1eed6a
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -globalopt | llvm-dis
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+	%struct.foo = type { i32, i32 }
+@X = internal global %struct.foo* null		; <%struct.foo**> [#uses=2]
+
+define void @bar(i32 %Size) nounwind noinline {
+entry:
+	%tmp = malloc [1000000 x %struct.foo]		; <[1000000 x %struct.foo]*> [#uses=1]
+	%.sub = getelementptr [1000000 x %struct.foo]* %tmp, i32 0, i32 0		; <%struct.foo*> [#uses=1]
+	store %struct.foo* %.sub, %struct.foo** @X, align 4
+	ret void
+}
+
+define i32 @baz() nounwind readonly noinline {
+bb1.thread:
+	%tmpLD1 = load %struct.foo** @X, align 4		; <%struct.foo*> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%tmp = phi %struct.foo* [ %tmpLD1, %bb1.thread ], [ %tmpLD1, %bb1 ]		; <%struct.foo*> [#uses=1]
+	%0 = getelementptr %struct.foo* %tmp, i32 1		; <%struct.foo*> [#uses=0]
+	br label %bb1
+}
diff --git a/final/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll b/final/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll
new file mode 100644
index 00000000000..51dcac1f1a1
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -globalopt | llvm-dis
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+	%struct.foo = type { i32, i32 }
+@X = internal global %struct.foo* null		; <%struct.foo**> [#uses=2]
+
+define void @bar(i32 %Size) nounwind noinline {
+entry:
+	%tmp = malloc [1000000 x %struct.foo]		; <[1000000 x %struct.foo]*> [#uses=1]
+	%.sub = getelementptr [1000000 x %struct.foo]* %tmp, i32 0, i32 0		; <%struct.foo*> [#uses=1]
+	store %struct.foo* %.sub, %struct.foo** @X, align 4
+	ret void
+}
+
+define i32 @baz() nounwind readonly noinline {
+bb1.thread:
+	%tmpLD1 = load %struct.foo** @X, align 4		; <%struct.foo*> [#uses=3]
+	store %struct.foo* %tmpLD1, %struct.foo** null
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%tmp = phi %struct.foo* [ %tmpLD1, %bb1.thread ], [ %tmpLD1, %bb1 ]		; <%struct.foo*> [#uses=0]
+	br i1 false, label %bb2, label %bb1
+
+bb2:		; preds = %bb1
+	ret i32 0
+}
diff --git a/final/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll b/final/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll
new file mode 100644
index 00000000000..c4b6e52e712
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -globalopt -S | grep {phi.*@head}
+; PR3321
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.node = type { %struct.node*, i32 }
+@head = internal global %struct.node* null		; <%struct.node**> [#uses=2]
+@node = internal global %struct.node { %struct.node* null, i32 42 }, align 16		; <%struct.node*> [#uses=1]
+
+define i32 @f() nounwind {
+entry:
+	store %struct.node* @node, %struct.node** @head, align 8
+	br label %bb1
+
+bb:		; preds = %bb1
+	%0 = getelementptr %struct.node* %t.0, i64 0, i32 1		; <i32*> [#uses=1]
+	%1 = load i32* %0, align 4		; <i32> [#uses=1]
+	%2 = getelementptr %struct.node* %t.0, i64 0, i32 0		; <%struct.node**> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%value.0 = phi i32 [ undef, %entry ], [ %1, %bb ]		; <i32> [#uses=1]
+	%t.0.in = phi %struct.node** [ @head, %entry ], [ %2, %bb ]		; <%struct.node**> [#uses=1]
+	%t.0 = load %struct.node** %t.0.in		; <%struct.node*> [#uses=3]
+	%3 = icmp eq %struct.node* %t.0, null		; <i1> [#uses=1]
+	br i1 %3, label %bb2, label %bb
+
+bb2:		; preds = %bb1
+	ret i32 %value.0
+}
+
+define i32 @main() nounwind {
+entry:
+	%0 = call i32 @f() nounwind		; <i32> [#uses=1]
+	ret i32 %0
+}
diff --git a/final/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll b/final/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
new file mode 100644
index 00000000000..a1b69efe1a7
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2009-02-15-BitcastAlias.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -globalopt
+
+@g = external global i32
+
+@a = alias bitcast (i32* @g to i8*)
+
+define void @f() {
+	%tmp = load i8* @a
+	ret void
+}
diff --git a/final/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll b/final/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
new file mode 100644
index 00000000000..a5be2b17d4a
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+define internal void @f() {
+; CHECK-NOT: @f
+; CHECK: define void @a
+	ret void
+}
+
+@a = alias void ()* @f
+
+define void @g() {
+	call void()* @a()
+	ret void
+}
+
+@b = alias internal void ()* @g
+; CHECK-NOT: @b
+
+define void @h() {
+	call void()* @b()
+; CHECK: call void @g
+	ret void
+}
+
diff --git a/final/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/final/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
new file mode 100644
index 00000000000..31548565745
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
@@ -0,0 +1,76 @@
+; RUN: opt < %s -globalopt -stats -disable-output |& grep "1 globalopt - Number of global vars shrunk to booleans"
+
+@Stop = internal global i32 0                     ; <i32*> [#uses=3]
+
+define i32 @foo(i32 %i) nounwind ssp {
+entry:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !3)
+  %0 = icmp eq i32 %i, 1, !dbg !7                 ; <i1> [#uses=1]
+  br i1 %0, label %bb, label %bb1, !dbg !7
+
+bb:                                               ; preds = %entry
+  store i32 0, i32* @Stop, align 4, !dbg !9
+  %1 = mul nsw i32 %i, 42, !dbg !10               ; <i32> [#uses=1]
+  call void @llvm.dbg.value(metadata !{i32 %1}, i64 0, metadata !3), !dbg !10
+  br label %bb2, !dbg !10
+
+bb1:                                              ; preds = %entry
+  store i32 1, i32* @Stop, align 4, !dbg !11
+  br label %bb2, !dbg !11
+
+bb2:                                              ; preds = %bb1, %bb
+  %i_addr.0 = phi i32 [ %1, %bb ], [ %i, %bb1 ]   ; <i32> [#uses=1]
+  br label %return, !dbg !12
+
+return:                                           ; preds = %bb2
+  ret i32 %i_addr.0, !dbg !12
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @bar() nounwind ssp {
+entry:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load i32* @Stop, align 4, !dbg !13         ; <i32> [#uses=1]
+  %1 = icmp eq i32 %0, 1, !dbg !13                ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %bb1, !dbg !13
+
+bb:                                               ; preds = %entry
+  br label %bb2, !dbg !18
+
+bb1:                                              ; preds = %entry
+  br label %bb2, !dbg !19
+
+bb2:                                              ; preds = %bb1, %bb
+  %.0 = phi i32 [ 0, %bb ], [ 1, %bb1 ]           ; <i32> [#uses=1]
+  br label %return, !dbg !19
+
+return:                                           ; preds = %bb2
+  ret i32 %.0, !dbg !19
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.gv = !{!0}
+
+!0 = metadata !{i32 458804, i32 0, metadata !1, metadata !"Stop", metadata !"Stop", metadata !"", metadata !1, i32 2, metadata !2, i1 true, i1 true, i32* @Stop} ; [ DW_TAG_variable ]
+!1 = metadata !{i32 458769, i32 0, i32 1, metadata !"g.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!2 = metadata !{i32 458788, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!3 = metadata !{i32 459009, metadata !4, metadata !"i", metadata !1, i32 4, metadata !2} ; [ DW_TAG_arg_variable ]
+!4 = metadata !{i32 458798, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 4, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!5 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{metadata !2, metadata !2}
+!7 = metadata !{i32 5, i32 0, metadata !8, null}
+!8 = metadata !{i32 458763, metadata !4, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!9 = metadata !{i32 6, i32 0, metadata !8, null}
+!10 = metadata !{i32 7, i32 0, metadata !8, null}
+!11 = metadata !{i32 9, i32 0, metadata !8, null}
+!12 = metadata !{i32 11, i32 0, metadata !8, null}
+!13 = metadata !{i32 14, i32 0, metadata !14, null}
+!14 = metadata !{i32 458763, metadata !15, i32 0, i32 0} ; [ DW_TAG_lexical_block ]
+!15 = metadata !{i32 458798, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"bar", metadata !1, i32 13, metadata !16, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!16 = metadata !{i32 458773, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !17, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!17 = metadata !{metadata !2}
+!18 = metadata !{i32 15, i32 0, metadata !14, null}
+!19 = metadata !{i32 16, i32 0, metadata !14, null}
diff --git a/final/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll b/final/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll
new file mode 100644
index 00000000000..62f75e123be
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -globalopt -S | grep internal | count 2
+
+global i32 0
+define i32* @1() {
+	ret i32* @0
+}
+define i32* @f() {
+entry:
+	call i32* @1()
+	ret i32* %0
+}
diff --git a/final/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll b/final/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
new file mode 100644
index 00000000000..d645ce49438
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -globalopt -S | grep {@X = internal unnamed_addr global i32}
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+@X = internal global i32* null		; <i32**> [#uses=2]
+@Y = internal global i32 0		; <i32*> [#uses=1]
+
+define void @foo() nounwind {
+entry:
+	store i32* @Y, i32** @X, align 4
+	ret void
+}
+
+define i32* @get() nounwind {
+entry:
+	%0 = load i32** @X, align 4		; <i32*> [#uses=1]
+	ret i32* %0
+}
diff --git a/final/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll b/final/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll
new file mode 100644
index 00000000000..d3c3ff59fea
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2009-06-01-RecursivePHI.ll
@@ -0,0 +1,122 @@
+; RUN: opt < %s -globalopt
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+	%struct.s_annealing_sched = type { i32, float, float, float, float }
+	%struct.s_bb = type { i32, i32, i32, i32 }
+	%struct.s_net = type { i8*, i32, i32*, float, float }
+	%struct.s_placer_opts = type { i32, float, i32, i32, i8*, i32, i32 }
+@net = internal global %struct.s_net* null		; <%struct.s_net**> [#uses=4]
+
+define fastcc void @alloc_and_load_placement_structs(i32 %place_cost_type, i32 %num_regions, float %place_cost_exp, float*** nocapture %old_region_occ_x, float*** nocapture %old_region_occ_y) nounwind ssp {
+entry:
+	br i1 undef, label %bb.i, label %my_malloc.exit
+
+bb.i:		; preds = %entry
+	unreachable
+
+my_malloc.exit:		; preds = %entry
+	br i1 undef, label %bb.i81, label %my_malloc.exit83
+
+bb.i81:		; preds = %my_malloc.exit
+	unreachable
+
+my_malloc.exit83:		; preds = %my_malloc.exit
+	br i1 undef, label %bb.i.i57, label %my_calloc.exit.i
+
+bb.i.i57:		; preds = %my_malloc.exit83
+	unreachable
+
+my_calloc.exit.i:		; preds = %my_malloc.exit83
+	br i1 undef, label %bb.i4.i, label %my_calloc.exit5.i
+
+bb.i4.i:		; preds = %my_calloc.exit.i
+	unreachable
+
+my_calloc.exit5.i:		; preds = %my_calloc.exit.i
+	%.pre.i58 = load %struct.s_net** @net, align 4		; <%struct.s_net*> [#uses=1]
+	br label %bb17.i78
+
+bb1.i61:		; preds = %bb4.preheader.i, %bb1.i61
+	br i1 undef, label %bb1.i61, label %bb5.i62
+
+bb5.i62:		; preds = %bb1.i61
+	br i1 undef, label %bb6.i64, label %bb15.preheader.i
+
+bb15.preheader.i:		; preds = %bb4.preheader.i, %bb5.i62
+	br label %bb16.i77
+
+bb6.i64:		; preds = %bb5.i62
+	br i1 undef, label %bb7.i65, label %bb8.i67
+
+bb7.i65:		; preds = %bb6.i64
+	unreachable
+
+bb8.i67:		; preds = %bb6.i64
+	br i1 undef, label %bb.i1.i68, label %my_malloc.exit.i70
+
+bb.i1.i68:		; preds = %bb8.i67
+	unreachable
+
+my_malloc.exit.i70:		; preds = %bb8.i67
+	%0 = load %struct.s_net** @net, align 4		; <%struct.s_net*> [#uses=1]
+	br i1 undef, label %bb9.i71, label %bb16.i77
+
+bb9.i71:		; preds = %bb9.i71, %my_malloc.exit.i70
+	%1 = load %struct.s_net** @net, align 4		; <%struct.s_net*> [#uses=1]
+	br i1 undef, label %bb9.i71, label %bb16.i77
+
+bb16.i77:		; preds = %bb9.i71, %my_malloc.exit.i70, %bb15.preheader.i
+	%.pre41.i.rle244 = phi %struct.s_net* [ %.pre41.i, %bb15.preheader.i ], [ %0, %my_malloc.exit.i70 ], [ %1, %bb9.i71 ]		; <%struct.s_net*> [#uses=1]
+	br label %bb17.i78
+
+bb17.i78:		; preds = %bb16.i77, %my_calloc.exit5.i
+	%.pre41.i = phi %struct.s_net* [ %.pre41.i.rle244, %bb16.i77 ], [ %.pre.i58, %my_calloc.exit5.i ]		; <%struct.s_net*> [#uses=1]
+	br i1 undef, label %bb4.preheader.i, label %alloc_and_load_unique_pin_list.exit
+
+bb4.preheader.i:		; preds = %bb17.i78
+	br i1 undef, label %bb1.i61, label %bb15.preheader.i
+
+alloc_and_load_unique_pin_list.exit:		; preds = %bb17.i78
+	ret void
+}
+
+define void @read_net(i8* %net_file) nounwind ssp {
+entry:
+	br i1 undef, label %bb3.us.us.i, label %bb6.preheader
+
+bb6.preheader:		; preds = %entry
+	br i1 undef, label %bb7, label %bb
+
+bb3.us.us.i:		; preds = %entry
+	unreachable
+
+bb:		; preds = %bb6.preheader
+	br i1 undef, label %bb.i34, label %bb1.i38
+
+bb.i34:		; preds = %bb
+	unreachable
+
+bb1.i38:		; preds = %bb
+	%mallocsize = mul i64 28, undef                  ; <i64> [#uses=1]
+	%malloccall = tail call i8* @malloc(i64 %mallocsize)      ; <i8*> [#uses=1]
+	%0 = bitcast i8* %malloccall to %struct.s_net*  ; <%struct.s_net*> [#uses=1]
+	br i1 undef, label %bb.i1.i39, label %my_malloc.exit2.i
+
+bb.i1.i39:		; preds = %bb1.i38
+	unreachable
+
+my_malloc.exit2.i:		; preds = %bb1.i38
+	store %struct.s_net* %0, %struct.s_net** @net, align 4
+	br i1 undef, label %bb.i7.i40, label %my_malloc.exit8.i
+
+bb.i7.i40:		; preds = %my_malloc.exit2.i
+	unreachable
+
+my_malloc.exit8.i:		; preds = %my_malloc.exit2.i
+	unreachable
+
+bb7:		; preds = %bb6.preheader
+	unreachable
+}
+
+declare noalias i8* @malloc(i64)
diff --git a/final/test/Transforms/GlobalOpt/2009-11-16-BrokenPerformHeapAllocSRoA.ll b/final/test/Transforms/GlobalOpt/2009-11-16-BrokenPerformHeapAllocSRoA.ll
new file mode 100644
index 00000000000..54e8f909790
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2009-11-16-BrokenPerformHeapAllocSRoA.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+%struct.hashheader = type { i16, i16, i16, i16, i16, i16, i32, i32, i32, i32, i32, i32, i32, i32, i32, [5 x i8], [13 x i8], i8, i8, i8, [228 x i16], [228 x i8], [228 x i8], [228 x i8], [228 x i8], [228 x i8], [228 x i8], [128 x i8], [100 x [11 x i8]], [100 x i32], [100 x i32], i16 }
+%struct.strchartype = type { i8*, i8*, i8* }
+
+@hashheader = internal global %struct.hashheader zeroinitializer, align 32 ; <%struct.hashheader*> [#uses=1]
+@chartypes = internal global %struct.strchartype* null ; <%struct.strchartype**> [#uses=1]
+; CHECK-NOT: @hashheader
+; CHECK-NOT: @chartypes
+
+; based on linit in office-ispell
+define void @test() nounwind ssp {
+  %1 = load i32* getelementptr inbounds (%struct.hashheader* @hashheader, i64 0, i32 13), align 8 ; <i32> [#uses=1]
+  %2 = sext i32 %1 to i64                         ; <i64> [#uses=1]
+  %3 = mul i64 %2, ptrtoint (%struct.strchartype* getelementptr (%struct.strchartype* null, i64 1) to i64) ; <i64> [#uses=1]
+  %4 = tail call i8* @malloc(i64 %3)              ; <i8*> [#uses=1]
+; CHECK: call i8* @malloc(i64
+  %5 = bitcast i8* %4 to %struct.strchartype*     ; <%struct.strchartype*> [#uses=1]
+  store %struct.strchartype* %5, %struct.strchartype** @chartypes, align 8
+  ret void
+}
+
+declare noalias i8* @malloc(i64)
diff --git a/final/test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll b/final/test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll
new file mode 100644
index 00000000000..b73f62ba148
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2009-11-16-MallocSingleStoreToGlobalVar.ll
@@ -0,0 +1,30 @@
+; Test ensures that non-optimizable array mallocs are not optimized; specifically
+; GlobalOpt was treating a non-optimizable array malloc as a non-array malloc
+; and optimizing the global object that the malloc was stored to as a single
+; element global.  The global object @TOP in this test should not be optimized.
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+@TOP = internal global i64* null                    ; <i64**> [#uses=2]
+; CHECK: @TOP = internal unnamed_addr global i64* null
+@channelColumns = internal global i64 0             ; <i64*> [#uses=2]
+
+; Derived from @DescribeChannel() in yacr2
+define void @test() nounwind ssp {
+  store i64 2335, i64* @channelColumns, align 8
+  %1 = load i64* @channelColumns, align 8         ; <i64> [#uses=1]
+  %2 = shl i64 %1, 3                              ; <i64> [#uses=1]
+  %3 = add i64 %2, 8                              ; <i64> [#uses=1]
+  %4 = call noalias i8* @malloc(i64 %3) nounwind  ; <i8*> [#uses=1]
+; CHECK: call noalias i8* @malloc
+  %5 = bitcast i8* %4 to i64*                     ; <i64*> [#uses=1]
+  store i64* %5, i64** @TOP, align 8
+  %6 = load i64** @TOP, align 8                   ; <i64*> [#uses=1]
+  %7 = getelementptr inbounds i64* %6, i64 13     ; <i64*> [#uses=1]
+  store i64 0, i64* %7, align 8
+  ret void
+}
+
+declare noalias i8* @malloc(i64) nounwind
diff --git a/final/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll b/final/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll
new file mode 100644
index 00000000000..27352fa2906
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2010-02-25-MallocPromote.ll
@@ -0,0 +1,18 @@
+; PR6422
+; RUN: opt -globalopt -S %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@fixLRBT = internal global i32* null              ; <i32**> [#uses=2]
+
+declare noalias i8* @malloc(i32)
+
+define i32 @parser() nounwind {
+bb918:
+  %malloccall.i10 = call i8* @malloc(i32 16) nounwind ; <i8*> [#uses=1]
+  %0 = bitcast i8* %malloccall.i10 to i32*        ; <i32*> [#uses=1]
+  store i32* %0, i32** @fixLRBT, align 8
+  %1 = load i32** @fixLRBT, align 8               ; <i32*> [#uses=0]
+  %A = load i32* %1
+  ret i32 %A
+}
diff --git a/final/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll b/final/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll
new file mode 100644
index 00000000000..6f1996a867e
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2010-02-26-MallocSROA.ll
@@ -0,0 +1,27 @@
+; RUN: opt -globalopt -S %s
+; PR6435
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.xyz = type { double, i32 }
+
+@Y = internal global %struct.xyz* null            ; <%struct.xyz**> [#uses=2]
+@numf2s = external global i32                     ; <i32*> [#uses=1]
+
+define fastcc void @init_net() nounwind {
+entry:
+  %0 = load i32* @numf2s, align 4                 ; <i32> [#uses=1]
+  %mallocsize2 = shl i32 %0, 4                    ; <i32> [#uses=1]
+  %malloccall3 = tail call i8* @malloc(i32 %mallocsize2) nounwind ; <i8*> [#uses=1]
+  %1 = bitcast i8* %malloccall3 to %struct.xyz*   ; <%struct.xyz*> [#uses=1]
+  store %struct.xyz* %1, %struct.xyz** @Y, align 8
+  ret void
+}
+
+define fastcc void @load_train(i8* %trainfile, i32 %mode, i32 %objects) nounwind {
+entry:
+  %0 = load %struct.xyz** @Y, align 8             ; <%struct.xyz*> [#uses=0]
+  ret void
+}
+
+declare noalias i8* @malloc(i32)
diff --git a/final/test/Transforms/GlobalOpt/2010-10-19-WeakOdr.ll b/final/test/Transforms/GlobalOpt/2010-10-19-WeakOdr.ll
new file mode 100644
index 00000000000..ad5b440a5ab
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/2010-10-19-WeakOdr.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+; PR8389: Globals with weak_odr linkage type must not be modified
+
+; CHECK: weak_odr global i32 0
+
+@SomeVar = weak_odr global i32 0
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @CTOR } ]
+
+define internal void @CTOR() {
+  store i32 23, i32* @SomeVar
+  ret void
+}
+
+
diff --git a/final/test/Transforms/GlobalOpt/alias-resolve.ll b/final/test/Transforms/GlobalOpt/alias-resolve.ll
new file mode 100644
index 00000000000..84511798739
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/alias-resolve.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -globalopt -S > %t
+; RUN: cat %t | grep foo1 | count 1
+; RUN: cat %t | grep foo2 | count 4
+; RUN: cat %t | grep bar1 | count 1
+; RUN: cat %t | grep bar2 | count 4
+
+@foo1 = alias void ()* @foo2
+@foo2 = alias weak void()* @bar1
+@bar1  = alias void ()* @bar2
+
+declare void @bar2()
+
+define void @baz() {
+entry:
+        call void @foo1()
+        call void @foo2()
+        call void @bar1()
+        ret void
+}
diff --git a/final/test/Transforms/GlobalOpt/basictest.ll b/final/test/Transforms/GlobalOpt/basictest.ll
new file mode 100644
index 00000000000..4332d3dd38c
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/basictest.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -globalopt -S | not grep global
+
+@X = internal global i32 4              ; <i32*> [#uses=1]
+
+define i32 @foo() {
+        %V = load i32* @X               ; <i32> [#uses=1]
+        ret i32 %V
+}
+
diff --git a/final/test/Transforms/GlobalOpt/constantexpr-dangle.ll b/final/test/Transforms/GlobalOpt/constantexpr-dangle.ll
new file mode 100644
index 00000000000..099c607509b
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/constantexpr-dangle.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -instcombine -globalopt -S | \
+; RUN:   grep {internal fastcc float @foo}
+
+define internal float @foo() {
+        ret float 0.000000e+00
+}
+
+define float @bar() {
+        %tmp1 = call float (...)* bitcast (float ()* @foo to float (...)*)( )
+        %tmp2 = fmul float %tmp1, 1.000000e+01           ; <float> [#uses=1]
+        ret float %tmp2
+}
+
diff --git a/final/test/Transforms/GlobalOpt/constantfold-initializers.ll b/final/test/Transforms/GlobalOpt/constantfold-initializers.ll
new file mode 100644
index 00000000000..834bd0012e7
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/constantfold-initializers.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -S -globalopt | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+@.str91250 = global [3 x i8] zeroinitializer
+
+; CHECK: @A = global i1 false
+@A = global i1 icmp ne (i64 sub nsw (i64 ptrtoint (i8* getelementptr inbounds ([3 x i8]* @.str91250, i64 0, i64 1) to i64), i64 ptrtoint ([3 x i8]* @.str91250 to i64)), i64 1)
diff --git a/final/test/Transforms/GlobalOpt/crash.ll b/final/test/Transforms/GlobalOpt/crash.ll
new file mode 100644
index 00000000000..9da5a5e3c93
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/crash.ll
@@ -0,0 +1,66 @@
+; RUN: opt -globalopt -disable-output %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin9.8"
+
+%0 = type { i32, void ()* }
+%struct.btSimdScalar = type { %"union.btSimdScalar::$_14" }
+%"union.btSimdScalar::$_14" = type { <4 x float> }
+
+@_ZL6vTwist =  global %struct.btSimdScalar zeroinitializer ; <%struct.btSimdScalar*> [#uses=1]
+@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @_GLOBAL__I__ZN21btConeTwistConstraintC2Ev }] ; <[12 x %0]*> [#uses=0]
+
+define internal void @_GLOBAL__I__ZN21btConeTwistConstraintC2Ev() nounwind section "__TEXT,__StaticInit,regular,pure_instructions" {
+entry:
+  store float 1.0, float* getelementptr inbounds (%struct.btSimdScalar* @_ZL6vTwist, i32 0, i32 0, i32 0, i32 3), align 4
+  ret void
+}
+
+
+; PR6760
+%T = type { [5 x i32] }
+
+@switch_inf = internal global %T* null
+
+define void @test(i8* %arch_file, i32 %route_type) {
+entry:
+  %A = sext i32 1 to i64
+  %B = mul i64 %A, 20
+  %C = call noalias i8* @malloc(i64 %B) nounwind
+  %D = bitcast i8* %C to %T*
+  store %T* %D, %T** @switch_inf, align 8
+  unreachable
+
+bb.nph.i: 
+  %scevgep.i539 = getelementptr i8* %C, i64 4
+  unreachable
+
+xx:
+  %E = load %T** @switch_inf, align 8 
+  unreachable
+}
+
+declare noalias i8* @malloc(i64) nounwind
+
+
+; PR8063
+@permute_bitrev.bitrev = internal global i32* null, align 8
+define void @permute_bitrev() nounwind {
+entry:
+  %tmp = load i32** @permute_bitrev.bitrev, align 8
+  %conv = sext i32 0 to i64
+  %mul = mul i64 %conv, 4
+  %call = call i8* @malloc(i64 %mul)
+  %0 = bitcast i8* %call to i32*
+  store i32* %0, i32** @permute_bitrev.bitrev, align 8
+  ret void
+}
+
+
+
+
+@data8 = internal global [8000 x i8] zeroinitializer, align 16
+define void @memset_with_strange_user() ssp {
+  call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([8000 x i8]* @data8, i64 0, i64 0), i8 undef, i64 ptrtoint (i8* getelementptr ([8000 x i8]* @data8, i64 1, i64 sub (i64 0, i64 ptrtoint ([8000 x i8]* @data8 to i64))) to i64), i32 16, i1 false)
+  ret void
+}
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/final/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll b/final/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
new file mode 100644
index 00000000000..204f979ed3e
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/ctor-list-opt-constexpr.ll
@@ -0,0 +1,23 @@
+; RUN: opt -globalopt %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%0 = type { i32, void ()* }
+%struct.foo = type { i32* }
+
+@G = global i32 0, align 4
+@H = global i32 0, align 4
+@X = global %struct.foo zeroinitializer, align 8
+@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @init }]
+
+; PR8710 - GlobalOpt shouldn't change the global's initializer to have this
+; arbitrary constant expression, the code generator can't handle it.
+define internal void @init() {
+entry:
+  %tmp = getelementptr inbounds %struct.foo* @X, i32 0, i32 0
+  store i32* inttoptr (i64 sdiv (i64 ptrtoint (i32* @G to i64), i64 ptrtoint (i32* @H to i64)) to i32*), i32** %tmp, align 8
+  ret void
+}
+
+; CHECK: @init
+; CHECK: store i32*
diff --git a/final/test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll b/final/test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll
new file mode 100644
index 00000000000..9b11985693a
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+; Don't get fooled by the inbounds keyword; it doesn't change
+; the computed address.
+
+; CHECK: @H = global i32 2
+; CHECK: @I = global i32 2
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @CTOR } ]
+@addr = external global i32
+@G = internal global [6 x [5 x i32]] zeroinitializer
+@H = global i32 80
+@I = global i32 90
+
+define internal void @CTOR() {
+  store i32 1, i32* getelementptr ([6 x [5 x i32]]* @G, i64 0, i64 0, i64 0)
+  store i32 2, i32* getelementptr inbounds ([6 x [5 x i32]]* @G, i64 0, i64 0, i64 0)
+  %t = load i32* getelementptr ([6 x [5 x i32]]* @G, i64 0, i64 0, i64 0)
+  store i32 %t, i32* @H
+  %s = load i32* getelementptr inbounds ([6 x [5 x i32]]* @G, i64 0, i64 0, i64 0)
+  store i32 %s, i32* @I
+  ret void
+}
diff --git a/final/test/Transforms/GlobalOpt/ctor-list-opt.ll b/final/test/Transforms/GlobalOpt/ctor-list-opt.ll
new file mode 100644
index 00000000000..542c786762e
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/ctor-list-opt.ll
@@ -0,0 +1,100 @@
+; RUN: opt < %s -globalopt -S | not grep CTOR
+@llvm.global_ctors = appending global [11 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @CTOR1 }, { i32, void ()* } { i32 65535, void ()* @CTOR1 }, { i32, void ()* } { i32 65535, void ()* @CTOR2 }, { i32, void ()* } { i32 65535, void ()* @CTOR3 }, { i32, void ()* } { i32 65535, void ()* @CTOR4 }, { i32, void ()* } { i32 65535, void ()* @CTOR5 }, { i32, void ()* } { i32 65535, void ()* @CTOR6 }, { i32, void ()* } { i32 65535, void ()* @CTOR7 }, { i32, void ()* } { i32 65535, void ()* @CTOR8 }, { i32, void ()* } { i32 65535, void ()* @CTOR9 }, { i32, void ()* } { i32 2147483647, void ()* null } ]		; <[10 x { i32, void ()* }]*> [#uses=0]
+@G = global i32 0		; <i32*> [#uses=1]
+@G2 = global i32 0		; <i32*> [#uses=1]
+@G3 = global i32 -123		; <i32*> [#uses=2]
+@X = global { i32, [2 x i32] } { i32 0, [2 x i32] [ i32 17, i32 21 ] }		; <{ i32, [2 x i32] }*> [#uses=2]
+@Y = global i32 -1		; <i32*> [#uses=2]
+@Z = global i32 123		; <i32*> [#uses=1]
+@D = global double 0.000000e+00		; <double*> [#uses=1]
+@CTORGV = internal global i1 false		; <i1*> [#uses=2]
+
+define internal void @CTOR1() {
+	ret void
+}
+
+define internal void @CTOR2() {
+	%A = add i32 1, 23		; <i32> [#uses=1]
+	store i32 %A, i32* @G
+	store i1 true, i1* @CTORGV
+	ret void
+}
+
+define internal void @CTOR3() {
+	%X = or i1 true, false		; <i1> [#uses=1]
+	br label %Cont
+
+Cont:		; preds = %0
+	br i1 %X, label %S, label %T
+
+S:		; preds = %Cont
+	store i32 24, i32* @G2
+	ret void
+
+T:		; preds = %Cont
+	ret void
+}
+
+define internal void @CTOR4() {
+	%X = load i32* @G3		; <i32> [#uses=1]
+	%Y = add i32 %X, 123		; <i32> [#uses=1]
+	store i32 %Y, i32* @G3
+	ret void
+}
+
+define internal void @CTOR5() {
+	%X.2p = getelementptr inbounds { i32, [2 x i32] }* @X, i32 0, i32 1, i32 0		; <i32*> [#uses=2]
+	%X.2 = load i32* %X.2p		; <i32> [#uses=1]
+	%X.1p = getelementptr inbounds { i32, [2 x i32] }* @X, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 %X.2, i32* %X.1p
+	store i32 42, i32* %X.2p
+	ret void
+}
+
+define internal void @CTOR6() {
+	%A = alloca i32		; <i32*> [#uses=2]
+	%y = load i32* @Y		; <i32> [#uses=1]
+	store i32 %y, i32* %A
+	%Av = load i32* %A		; <i32> [#uses=1]
+	%Av1 = add i32 %Av, 1		; <i32> [#uses=1]
+	store i32 %Av1, i32* @Y
+	ret void
+}
+
+define internal void @CTOR7() {
+	call void @setto( i32* @Z, i32 0 )
+	ret void
+}
+
+define void @setto(i32* %P, i32 %V) {
+	store i32 %V, i32* %P
+	ret void
+}
+
+declare double @cos(double)
+
+define internal void @CTOR8() {
+	%X = call double @cos( double 0.000000e+00 )		; <double> [#uses=1]
+	store double %X, double* @D
+	ret void
+}
+
+define i1 @accessor() {
+	%V = load i1* @CTORGV		; <i1> [#uses=1]
+	ret i1 %V
+}
+
+%struct.A = type { i32 }
+%struct.B = type { i32 (...)**, i8*, [4 x i8] }
+@GV1 = global %struct.B zeroinitializer, align 8
+@GV2 =  constant [3 x i8*] [i8* inttoptr (i64 16 to i8*), i8* null, i8* bitcast ({ i8*, i8*, i32, i32, i8*, i64 }* null to i8*)]
+; CHECK-NOT: CTOR9
+define internal void @CTOR9() {
+entry:
+  %0 = bitcast %struct.B* @GV1 to i8*
+  %1 = getelementptr inbounds i8* %0, i64 16
+  %2 = bitcast i8* %1 to %struct.A*
+  %3 = bitcast %struct.B* @GV1 to i8***
+  store i8** getelementptr inbounds ([3 x i8*]* @GV2, i64 1, i64 0), i8*** %3
+  ret void
+}
diff --git a/final/test/Transforms/GlobalOpt/deadglobal-2.ll b/final/test/Transforms/GlobalOpt/deadglobal-2.ll
new file mode 100644
index 00000000000..4f818198309
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/deadglobal-2.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -globalopt -S | not grep internal
+
+; This is a harder case to delete as the GEP has a variable index.
+
+@G = internal global [4 x i32] zeroinitializer
+
+define void @foo(i32 %X) {
+	%Ptr = getelementptr [4 x i32]* @G, i32 0, i32 %X
+	store i32 1, i32* %Ptr
+	ret void
+}
diff --git a/final/test/Transforms/GlobalOpt/deadglobal.ll b/final/test/Transforms/GlobalOpt/deadglobal.ll
new file mode 100644
index 00000000000..c8d8e7674d6
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/deadglobal.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -globalopt -S | not grep internal
+
+@G = internal global i32 123            ; <i32*> [#uses=1]
+
+define void @foo() {
+        store i32 1, i32* @G
+        ret void
+}
+
diff --git a/final/test/Transforms/GlobalOpt/dg.exp b/final/test/Transforms/GlobalOpt/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/GlobalOpt/globalsra-partial.ll b/final/test/Transforms/GlobalOpt/globalsra-partial.ll
new file mode 100644
index 00000000000..06485b53e0e
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/globalsra-partial.ll
@@ -0,0 +1,24 @@
+; In this case, the global can only be broken up by one level.
+
+; RUN: opt < %s -globalopt -S | not grep 12345
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+@G = internal global { i32, [4 x float] } zeroinitializer               ; <{ i32, [4 x float] }*> [#uses=3]
+
+define void @onlystore() {
+        store i32 12345, i32* getelementptr ({ i32, [4 x float] }* @G, i32 0, i32 0)
+        ret void
+}
+
+define void @storeinit(i32 %i) {
+        %Ptr = getelementptr { i32, [4 x float] }* @G, i32 0, i32 1, i32 %i             ; <float*> [#uses=1]
+        store float 1.000000e+00, float* %Ptr
+        ret void
+}
+
+define float @readval(i32 %i) {
+        %Ptr = getelementptr { i32, [4 x float] }* @G, i32 0, i32 1, i32 %i             ; <float*> [#uses=1]
+        %V = load float* %Ptr           ; <float> [#uses=1]
+        ret float %V
+}
+
diff --git a/final/test/Transforms/GlobalOpt/globalsra-unknown-index.ll b/final/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
new file mode 100644
index 00000000000..1e0db6a998b
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -globalopt -S > %t
+; RUN: grep {@Y = internal unnamed_addr global \\\[3 x \[%\]struct.X\\\] zeroinitializer} %t
+; RUN: grep load %t | count 6
+; RUN: grep {add i32 \[%\]a, \[%\]b} %t | count 3
+
+; globalopt should not sra the global, because it can't see the index.
+
+%struct.X = type { [3 x i32], [3 x i32] }
+
+@Y = internal global [3 x %struct.X] zeroinitializer
+
+@addr = external global i8
+
+define void @frob() {
+  store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 ptrtoint (i8* @addr to i64)), align 4
+  ret void
+}
+define i32 @borf(i64 %i, i64 %j) {
+  %p = getelementptr inbounds [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0
+  %a = load i32* %p
+  %q = getelementptr inbounds [3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 0
+  %b = load i32* %q
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+define i32 @borg(i64 %i, i64 %j) {
+  %p = getelementptr inbounds [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 1
+  %a = load i32* %p
+  %q = getelementptr inbounds [3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 1
+  %b = load i32* %q
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+define i32 @borh(i64 %i, i64 %j) {
+  %p = getelementptr inbounds [3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 2
+  %a = load i32* %p
+  %q = getelementptr inbounds [3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 2
+  %b = load i32* %q
+  %c = add i32 %a, %b
+  ret i32 %c
+}
diff --git a/final/test/Transforms/GlobalOpt/globalsra.ll b/final/test/Transforms/GlobalOpt/globalsra.ll
new file mode 100644
index 00000000000..6d8f220d12b
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/globalsra.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -globalopt -S | not grep global
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+@G = internal global { i32, float, { double } } {
+    i32 1, 
+    float 1.000000e+00, 
+    { double } { double 1.727000e+01 } }                ; <{ i32, float, { double } }*> [#uses=3]
+
+define void @onlystore() {
+        store i32 123, i32* getelementptr ({ i32, float, { double } }* @G, i32 0, i32 0)
+        ret void
+}
+
+define float @storeinit() {
+        store float 1.000000e+00, float* getelementptr ({ i32, float, { double } }* @G, i32 0, i32 1)
+        %X = load float* getelementptr ({ i32, float, { double } }* @G, i32 0, i32 1)           ; <float> [#uses=1]
+        ret float %X
+}
+
+define double @constantize() {
+        %X = load double* getelementptr ({ i32, float, { double } }* @G, i32 0, i32 2, i32 0)           ; <double> [#uses=1]
+        ret double %X
+}
+
diff --git a/final/test/Transforms/GlobalOpt/heap-sra-1.ll b/final/test/Transforms/GlobalOpt/heap-sra-1.ll
new file mode 100644
index 00000000000..9d5148f9be6
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/heap-sra-1.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+	%struct.foo = type { i32, i32 }
+@X = internal global %struct.foo* null
+; CHECK: @X.f0
+; CHECK: @X.f1
+
+define void @bar(i64 %Size) nounwind noinline {
+entry:
+  %mallocsize = mul i64 %Size, 8                  ; <i64> [#uses=1]
+  %malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1]
+  %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1]
+	store %struct.foo* %.sub, %struct.foo** @X, align 4
+	ret void
+}
+
+declare noalias i8* @malloc(i64)
+
+define i32 @baz() nounwind readonly noinline {
+bb1.thread:
+	%0 = load %struct.foo** @X, align 4		
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]
+	%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ]
+	%1 = getelementptr %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0
+	%2 = load i32* %1, align 4
+	%3 = add i32 %2, %sum.0.reg2mem.0	
+	%indvar.next = add i32 %i.0.reg2mem.0, 1	
+	%exitcond = icmp eq i32 %indvar.next, 1200		
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb2:		; preds = %bb1
+	ret i32 %3
+}
+
diff --git a/final/test/Transforms/GlobalOpt/heap-sra-2.ll b/final/test/Transforms/GlobalOpt/heap-sra-2.ll
new file mode 100644
index 00000000000..fa8c36281ee
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/heap-sra-2.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+	%struct.foo = type { i32, i32 }
+@X = internal global %struct.foo* null		; <%struct.foo**> [#uses=2]
+; CHECK: @X.f0
+; CHECK: @X.f1
+
+define void @bar(i32 %Size) nounwind noinline {
+entry:
+	%malloccall = tail call i8* @malloc(i64 8000000) ; <i8*> [#uses=1]
+	%0 = bitcast i8* %malloccall to [1000000 x %struct.foo]* ; <[1000000 x %struct.foo]*> [#uses=1]
+	%.sub = getelementptr [1000000 x %struct.foo]* %0, i32 0, i32 0		; <%struct.foo*> [#uses=1]
+	store %struct.foo* %.sub, %struct.foo** @X, align 4
+	ret void
+}
+
+declare noalias i8* @malloc(i64)
+
+define i32 @baz() nounwind readonly noinline {
+bb1.thread:
+	%0 = load %struct.foo** @X, align 4		; <%struct.foo*> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]		; <i32> [#uses=2]
+	%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ]		; <i32> [#uses=1]
+	%1 = getelementptr %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0		; <i32*> [#uses=1]
+	%2 = load i32* %1, align 4		; <i32> [#uses=1]
+	%3 = add i32 %2, %sum.0.reg2mem.0		; <i32> [#uses=2]
+	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 1200		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb2:		; preds = %bb1
+	ret i32 %3
+}
+
diff --git a/final/test/Transforms/GlobalOpt/heap-sra-3.ll b/final/test/Transforms/GlobalOpt/heap-sra-3.ll
new file mode 100644
index 00000000000..e7a877cda44
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/heap-sra-3.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+	%struct.foo = type { i32, i32 }
+@X = internal global %struct.foo* null
+; CHECK: @X.f0
+; CHECK: @X.f1
+
+define void @bar(i64 %Size) nounwind noinline {
+entry:
+  %mallocsize = mul i64 8, %Size ; <i64> [#uses=1]
+; CHECK: mul i64 %Size, 4
+  %malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1]
+  %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1]
+	store %struct.foo* %.sub, %struct.foo** @X, align 4
+	ret void
+}
+
+declare noalias i8* @malloc(i64)
+
+define i32 @baz() nounwind readonly noinline {
+bb1.thread:
+	%0 = load %struct.foo** @X, align 4		
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]
+	%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ]
+	%1 = getelementptr %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0
+	%2 = load i32* %1, align 4
+	%3 = add i32 %2, %sum.0.reg2mem.0	
+	%indvar.next = add i32 %i.0.reg2mem.0, 1	
+	%exitcond = icmp eq i32 %indvar.next, 1200		
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb2:		; preds = %bb1
+	ret i32 %3
+}
+
diff --git a/final/test/Transforms/GlobalOpt/heap-sra-4.ll b/final/test/Transforms/GlobalOpt/heap-sra-4.ll
new file mode 100644
index 00000000000..d5a58288e1a
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/heap-sra-4.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+	%struct.foo = type { i32, i32 }
+@X = internal global %struct.foo* null
+; CHECK: @X.f0
+; CHECK: @X.f1
+
+define void @bar(i64 %Size) nounwind noinline {
+entry:
+  %mallocsize = shl i64 %Size, 3                  ; <i64> [#uses=1]
+  %malloccall = tail call i8* @malloc(i64 %mallocsize) ; <i8*> [#uses=1]
+; CHECK: mul i64 %Size, 4
+  %.sub = bitcast i8* %malloccall to %struct.foo* ; <%struct.foo*> [#uses=1]
+	store %struct.foo* %.sub, %struct.foo** @X, align 4
+	ret void
+}
+
+declare noalias i8* @malloc(i64)
+
+define i32 @baz() nounwind readonly noinline {
+bb1.thread:
+	%0 = load %struct.foo** @X, align 4		
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]
+	%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %3, %bb1 ]
+	%1 = getelementptr %struct.foo* %0, i32 %i.0.reg2mem.0, i32 0
+	%2 = load i32* %1, align 4
+	%3 = add i32 %2, %sum.0.reg2mem.0	
+	%indvar.next = add i32 %i.0.reg2mem.0, 1	
+	%exitcond = icmp eq i32 %indvar.next, 1200		
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb2:		; preds = %bb1
+	ret i32 %3
+}
+
diff --git a/final/test/Transforms/GlobalOpt/heap-sra-phi.ll b/final/test/Transforms/GlobalOpt/heap-sra-phi.ll
new file mode 100644
index 00000000000..6188e5af988
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/heap-sra-phi.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -globalopt -S | grep {tmp.f1 = phi i32. }
+; RUN: opt < %s -globalopt -S | grep {tmp.f0 = phi i32. }
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+	%struct.foo = type { i32, i32 }
+@X = internal global %struct.foo* null		; <%struct.foo**> [#uses=2]
+
+define void @bar(i32 %Size) nounwind noinline {
+entry:
+	%malloccall = tail call i8* @malloc(i64 8000000) ; <i8*> [#uses=1]
+	%tmp = bitcast i8* %malloccall to [1000000 x %struct.foo]* ; <[1000000 x %struct.foo]*> [#uses=1]
+	%.sub = getelementptr [1000000 x %struct.foo]* %tmp, i32 0, i32 0		; <%struct.foo*> [#uses=1]
+	store %struct.foo* %.sub, %struct.foo** @X, align 4
+	ret void
+}
+
+declare noalias i8* @malloc(i64)
+
+define i32 @baz() nounwind readonly noinline {
+bb1.thread:
+	%tmpLD1 = load %struct.foo** @X, align 4		; <%struct.foo*> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+        %tmp = phi %struct.foo* [%tmpLD1, %bb1.thread ], [ %tmpLD2, %bb1 ]		; <i32> [#uses=2]
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]		; <i32> [#uses=2]
+	%sum.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %tmp3, %bb1 ]		; <i32> [#uses=1]
+	%tmp1 = getelementptr %struct.foo* %tmp, i32 %i.0.reg2mem.0, i32 0		; <i32*> [#uses=1]
+	%tmp2 = load i32* %tmp1, align 4		; <i32> [#uses=1]
+	%tmp6 = add i32 %tmp2, %sum.0.reg2mem.0		; <i32> [#uses=2]
+	%tmp4 = getelementptr %struct.foo* %tmp, i32 %i.0.reg2mem.0, i32 1		; <i32*> [#uses=1]
+        %tmp5 = load i32 * %tmp4
+        %tmp3 = add i32 %tmp5, %tmp6
+	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
+        
+      	%tmpLD2 = load %struct.foo** @X, align 4		; <%struct.foo*> [#uses=1]
+
+	%exitcond = icmp eq i32 %indvar.next, 1200		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb2, label %bb1
+
+bb2:		; preds = %bb1
+	ret i32 %tmp3
+}
diff --git a/final/test/Transforms/GlobalOpt/integer-bool.ll b/final/test/Transforms/GlobalOpt/integer-bool.ll
new file mode 100644
index 00000000000..59403b18d9d
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/integer-bool.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -globalopt -instcombine | \
+; RUN:    llvm-dis | grep {ret i1 true}
+
+;; check that global opt turns integers that only hold 0 or 1 into bools.
+
+@G = internal global i32 0              ; <i32*> [#uses=3]
+
+define void @set1() {
+        store i32 0, i32* @G
+        ret void
+}
+
+define void @set2() {
+        store i32 1, i32* @G
+        ret void
+}
+
+define i1 @get() {
+        %A = load i32* @G               ; <i32> [#uses=1]
+        %C = icmp slt i32 %A, 2         ; <i1> [#uses=1]
+        ret i1 %C
+}
+
diff --git a/final/test/Transforms/GlobalOpt/iterate.ll b/final/test/Transforms/GlobalOpt/iterate.ll
new file mode 100644
index 00000000000..74668742759
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/iterate.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -globalopt -S | not grep %G
+
+@G = internal global i32 0              ; <i32*> [#uses=1]
+@H = internal global { i32* } { i32* @G }               ; <{ i32* }*> [#uses=1]
+
+define i32 @loadg() {
+        %G = load i32** getelementptr ({ i32* }* @H, i32 0, i32 0)              ; <i32*> [#uses=1]
+        %GV = load i32* %G              ; <i32> [#uses=1]
+        ret i32 %GV
+}
+
diff --git a/final/test/Transforms/GlobalOpt/load-store-global.ll b/final/test/Transforms/GlobalOpt/load-store-global.ll
new file mode 100644
index 00000000000..f824b2c11cb
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/load-store-global.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -globalopt -S | not grep G
+
+@G = internal global i32 17             ; <i32*> [#uses=3]
+
+define void @foo() {
+        %V = load i32* @G               ; <i32> [#uses=1]
+        store i32 %V, i32* @G
+        ret void
+}
+
+define i32 @bar() {
+        %X = load i32* @G               ; <i32> [#uses=1]
+        ret i32 %X
+}
+
diff --git a/final/test/Transforms/GlobalOpt/malloc-promote-1.ll b/final/test/Transforms/GlobalOpt/malloc-promote-1.ll
new file mode 100644
index 00000000000..51ccbbd43ca
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/malloc-promote-1.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+@G = internal global i32* null          ; <i32**> [#uses=3]
+; CHECK-NOT: global
+
+define void @init() {
+        %malloccall = tail call i8* @malloc(i64 4)      ; <i8*> [#uses=1]
+        %P = bitcast i8* %malloccall to i32*            ; <i32*> [#uses=1]
+        store i32* %P, i32** @G
+        %GV = load i32** @G             ; <i32*> [#uses=1]
+        store i32 0, i32* %GV
+        ret void
+}
+
+declare noalias i8* @malloc(i64)
+
+define i32 @get() {
+        %GV = load i32** @G             ; <i32*> [#uses=1]
+        %V = load i32* %GV              ; <i32> [#uses=1]
+        ret i32 %V
+; CHECK: ret i32 0
+}
+
diff --git a/final/test/Transforms/GlobalOpt/malloc-promote-2.ll b/final/test/Transforms/GlobalOpt/malloc-promote-2.ll
new file mode 100644
index 00000000000..6cb44812d2b
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/malloc-promote-2.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+@G = internal global i32* null
+
+define void @t() {
+; CHECK: @t()
+; CHECK-NOT: call i8* @malloc
+; CHECK-NEXT: ret void
+  %malloccall = tail call i8* @malloc(i64 mul (i64 100, i64 4))
+  %P = bitcast i8* %malloccall to i32*
+  store i32* %P, i32** @G
+  %GV = load i32** @G
+  %GVe = getelementptr i32* %GV, i32 40
+  store i32 20, i32* %GVe
+  ret void
+}
+
+declare noalias i8* @malloc(i64)
diff --git a/final/test/Transforms/GlobalOpt/memcpy.ll b/final/test/Transforms/GlobalOpt/memcpy.ll
new file mode 100644
index 00000000000..8f063a2fe42
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/memcpy.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -globalopt -S | \
+; RUN:   grep {G1 = internal unnamed_addr constant}
+
+@G1 = internal global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00"         ; <[58 x i8]*> [#uses=1]
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+define void @foo() {
+        %Blah = alloca [58 x i8]                ; <[58 x i8]*> [#uses=1]
+        %tmp.0 = getelementptr [58 x i8]* %Blah, i32 0, i32 0           ; <i8*> [#uses=1]
+        call void @llvm.memcpy.i32( i8* %tmp.0, i8* getelementptr ([58 x i8]* @G1, i32 0, i32 0), i32 58, i32 1 )
+        ret void
+}
+
+
diff --git a/final/test/Transforms/GlobalOpt/memset.ll b/final/test/Transforms/GlobalOpt/memset.ll
new file mode 100644
index 00000000000..a9b9d5e6bdc
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/memset.ll
@@ -0,0 +1,21 @@
+; both globals are write only, delete them.
+
+; RUN: opt < %s -globalopt -S | \
+; RUN:   not grep internal
+
+@G0 = internal global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00"         ; <[58 x i8]*> [#uses=1]
+@G1 = internal global [4 x i32] [ i32 1, i32 2, i32 3, i32 4 ]          ; <[4 x i32]*> [#uses=1]
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32)
+
+define void @foo() {
+        %Blah = alloca [58 x i8]                ; <[58 x i8]*> [#uses=1]
+        %tmp3 = bitcast [58 x i8]* %Blah to i8*         ; <i8*> [#uses=1]
+        call void @llvm.memcpy.i32( i8* bitcast ([4 x i32]* @G1 to i8*), i8* %tmp3, i32 16, i32 1 )
+        call void @llvm.memset.i32( i8* getelementptr ([58 x i8]* @G0, i32 0, i32 0), i8 17, i32 58, i32 1 )
+        ret void
+}
+
+
diff --git a/final/test/Transforms/GlobalOpt/metadata.ll b/final/test/Transforms/GlobalOpt/metadata.ll
new file mode 100644
index 00000000000..730e2b08023
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/metadata.ll
@@ -0,0 +1,26 @@
+; RUN: opt -S -globalopt < %s | FileCheck %s
+
+; PR6112 - When globalopt does RAUW(@G, %G), the metadata reference should drop
+; to null.  Function local metadata that references @G from a different function
+; to that containing %G should likewise drop to null.
+@G = internal global i8** null
+
+define i32 @main(i32 %argc, i8** %argv) {
+; CHECK: @main
+; CHECK: %G = alloca
+  store i8** %argv, i8*** @G
+  ret i32 0
+}
+
+define void @foo(i32 %x) {
+  call void @llvm.foo(metadata !{i8*** @G, i32 %x})
+; CHECK: call void @llvm.foo(metadata !{null, i32 %x})
+  ret void
+}
+
+declare void @llvm.foo(metadata) nounwind readnone
+
+!named = !{!0}
+
+!0 = metadata !{i8*** @G}
+; CHECK: !0 = metadata !{null}
diff --git a/final/test/Transforms/GlobalOpt/phi-select.ll b/final/test/Transforms/GlobalOpt/phi-select.ll
new file mode 100644
index 00000000000..cd8a7dc990f
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/phi-select.ll
@@ -0,0 +1,31 @@
+; Test that PHI nodes and select instructions do not necessarily make stuff
+; non-constant.
+
+; RUN: opt < %s -globalopt -S | not grep global
+
+@X = internal global i32 4              ; <i32*> [#uses=2]
+@Y = internal global i32 5              ; <i32*> [#uses=2]
+
+define i32 @test1(i1 %C) {
+        %P = select i1 %C, i32* @X, i32* @Y             ; <i32*> [#uses=1]
+        %V = load i32* %P               ; <i32> [#uses=1]
+        ret i32 %V
+}
+
+define i32 @test2(i1 %C) {
+; <label>:0
+        br i1 %C, label %T, label %Cont
+
+T:              ; preds = %0
+        br label %Cont
+
+Cont:           ; preds = %T, %0
+        %P = phi i32* [ @X, %0 ], [ @Y, %T ]            ; <i32*> [#uses=1]
+        %V = load i32* %P               ; <i32> [#uses=1]
+        ret i32 %V
+}
+
+
+
+
+
diff --git a/final/test/Transforms/GlobalOpt/storepointer-compare.ll b/final/test/Transforms/GlobalOpt/storepointer-compare.ll
new file mode 100644
index 00000000000..2f5ae869b7c
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/storepointer-compare.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -globalopt -S | \
+; RUN:   grep {call void @Actual}
+
+; Check that a comparison does not prevent an indirect call from being made 
+; direct.  The global will still remain, but indirect call elim is still good.
+
+@G = internal global void ()* null              ; <void ()**> [#uses=2]
+
+define internal void @Actual() {
+        ret void
+}
+
+define void @init() {
+        store void ()* @Actual, void ()** @G
+        ret void
+}
+
+define void @doit() {
+        %FP = load void ()** @G         ; <void ()*> [#uses=2]
+        %CC = icmp eq void ()* %FP, null                ; <i1> [#uses=1]
+        br i1 %CC, label %isNull, label %DoCall
+
+DoCall:         ; preds = %0
+        call void %FP( )
+        ret void
+
+isNull:         ; preds = %0
+        ret void
+}
+
diff --git a/final/test/Transforms/GlobalOpt/storepointer.ll b/final/test/Transforms/GlobalOpt/storepointer.ll
new file mode 100644
index 00000000000..8019076f946
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/storepointer.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -globalopt -S | not grep global
+
+@G = internal global void ()* null              ; <void ()**> [#uses=2]
+
+define internal void @Actual() {
+        ret void
+}
+
+define void @init() {
+        store void ()* @Actual, void ()** @G
+        ret void
+}
+
+define void @doit() {
+        %FP = load void ()** @G         ; <void ()*> [#uses=1]
+        call void %FP( )
+        ret void
+}
+
diff --git a/final/test/Transforms/GlobalOpt/trivialstore.ll b/final/test/Transforms/GlobalOpt/trivialstore.ll
new file mode 100644
index 00000000000..21437f33b26
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/trivialstore.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -globalopt -S | not grep G
+
+@G = internal global i32 17             ; <i32*> [#uses=3]
+
+define void @foo() {
+        store i32 17, i32* @G
+        ret void
+}
+
+define i32 @bar() {
+        %X = load i32* @G               ; <i32> [#uses=1]
+        ret i32 %X
+}
+
+define internal void @dead() {
+        store i32 123, i32* @G
+        ret void
+}
+
diff --git a/final/test/Transforms/GlobalOpt/undef-init.ll b/final/test/Transforms/GlobalOpt/undef-init.ll
new file mode 100644
index 00000000000..c1494973950
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/undef-init.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -globalopt -S | not grep store
+
+@llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__Z3foov } ]          ; <[1 x { i32, void ()* }]*> [#uses=0]
+@X.0 = internal global i32 undef                ; <i32*> [#uses=2]
+
+define i32 @_Z3foov() {
+entry:
+        %tmp.1 = load i32* @X.0         ; <i32> [#uses=1]
+        ret i32 %tmp.1
+}
+
+define internal void @_GLOBAL__I__Z3foov() {
+entry:
+        store i32 1, i32* @X.0
+        ret void
+}
+
diff --git a/final/test/Transforms/GlobalOpt/unnamed-addr.ll b/final/test/Transforms/GlobalOpt/unnamed-addr.ll
new file mode 100644
index 00000000000..be028212277
--- /dev/null
+++ b/final/test/Transforms/GlobalOpt/unnamed-addr.ll
@@ -0,0 +1,54 @@
+; RUN: opt %s -globalopt -S | FileCheck %s
+
+@a = internal global i32 0, align 4
+@b = internal global i32 0, align 4
+@c = internal global i32 0, align 4
+@d = internal constant [4 x i8] c"foo\00", align 1
+
+; CHECK: @a = internal global i32 0, align 4
+; CHECK: @b = internal global i32 0, align 4
+; CHECK: @c = internal unnamed_addr global i32 0, align 4
+; CHECK: @d = internal unnamed_addr constant [4 x i8] c"foo\00", align 1
+
+define i1 @bah(i64 %i) nounwind readonly optsize ssp {
+entry:
+  %arrayidx4 = getelementptr inbounds [4 x i8]* @d, i64 0, i64 %i
+  %tmp5 = load i8* %arrayidx4, align 1
+  %cmp = icmp eq i8 %tmp5, 42
+  ret i1 %cmp
+}
+
+define void @baz(i32 %x) {
+entry:
+  store i32 %x, i32* @a, align 4
+  store i32 %x, i32* @b, align 4
+  store i32 %x, i32* @c, align 4
+  ret void
+}
+
+define i32 @foo(i32* %x) nounwind readnone optsize ssp {
+entry:
+  %cmp = icmp eq i32* %x, @a
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @bar() {
+entry:
+  switch i64 ptrtoint (i32* @b to i64), label %sw.epilog [
+    i64 1, label %return
+    i64 0, label %return
+  ]
+
+sw.epilog:
+  ret i32 0
+
+return:
+  ret i32 1
+}
+
+define i32 @zed() {
+entry:
+  %tmp1 = load i32* @c, align 4
+  ret i32 %tmp1
+}
diff --git a/final/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll b/final/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll
new file mode 100644
index 00000000000..66403363bf9
--- /dev/null
+++ b/final/test/Transforms/IPConstantProp/2008-06-09-WeakProp.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -ipconstprop -S | grep {ret i32 %r}
+; Should not propagate the result of a weak function.
+; PR2411
+
+define weak i32 @foo() nounwind  {
+entry:
+        ret i32 1
+}
+
+define i32 @main() nounwind  {
+entry:
+        %r = call i32 @foo( ) nounwind
+        ret i32 %r
+}
+
diff --git a/final/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll b/final/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll
new file mode 100644
index 00000000000..bd174a8be3f
--- /dev/null
+++ b/final/test/Transforms/IPConstantProp/2009-09-24-byval-ptr.ll
@@ -0,0 +1,40 @@
+; RUN: llvm-as <%s | opt -ipsccp | llvm-dis | FileCheck %s
+; Don't constant-propagate byval pointers, since they are not pointers!
+; PR5038
+%struct.MYstr = type { i8, i32 }
+@mystr = internal global %struct.MYstr zeroinitializer ; <%struct.MYstr*> [#uses=3]
+define internal void @vfu1(%struct.MYstr* byval align 4 %u) nounwind {
+entry:
+  %0 = getelementptr %struct.MYstr* %u, i32 0, i32 1 ; <i32*> [#uses=1]
+  store i32 99, i32* %0, align 4
+; CHECK: %struct.MYstr* %u
+  %1 = getelementptr %struct.MYstr* %u, i32 0, i32 0 ; <i8*> [#uses=1]
+  store i8 97, i8* %1, align 4
+; CHECK: %struct.MYstr* %u
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define internal i32 @vfu2(%struct.MYstr* byval align 4 %u) nounwind readonly {
+entry:
+  %0 = getelementptr %struct.MYstr* %u, i32 0, i32 1 ; <i32*> [#uses=1]
+  %1 = load i32* %0
+; CHECK: load i32* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 1)
+  %2 = getelementptr %struct.MYstr* %u, i32 0, i32 0 ; <i8*> [#uses=1]
+  %3 = load i8* %2
+; CHECK: load i8* getelementptr inbounds (%struct.MYstr* @mystr, i32 0, i32 0)
+  %4 = zext i8 %3 to i32
+  %5 = add i32 %4, %1
+  ret i32 %5
+}
+
+define i32 @unions() nounwind {
+entry:
+  call void @vfu1(%struct.MYstr* byval align 4 @mystr) nounwind
+  %result = call i32 @vfu2(%struct.MYstr* byval align 4 @mystr) nounwind
+
+  ret i32 %result
+}
+
diff --git a/final/test/Transforms/IPConstantProp/dangling-block-address.ll b/final/test/Transforms/IPConstantProp/dangling-block-address.ll
new file mode 100644
index 00000000000..0489dfa796f
--- /dev/null
+++ b/final/test/Transforms/IPConstantProp/dangling-block-address.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -internalize -ipsccp -S | FileCheck %s
+; PR5569
+
+; IPSCCP should prove that the blocks are dead and delete them, and
+; properly handle the dangling blockaddress constants.
+
+; CHECK: @bar.l = internal constant [2 x i8*] [i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 1 to i8*)]
+
+@code = global [5 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1], align 4 ; <[5 x i32]*> [#uses=0]
+@bar.l = internal constant [2 x i8*] [i8* blockaddress(@bar, %lab0), i8* blockaddress(@bar, %end)] ; <[2 x i8*]*> [#uses=1]
+
+define void @foo(i32 %x) nounwind readnone {
+entry:
+  %b = alloca i32, align 4                        ; <i32*> [#uses=1]
+  volatile store i32 -1, i32* %b
+  ret void
+}
+
+define void @bar(i32* nocapture %pc) nounwind readonly {
+entry:
+  br label %indirectgoto
+
+lab0:                                             ; preds = %indirectgoto
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
+  br label %indirectgoto
+
+end:                                              ; preds = %indirectgoto
+  ret void
+
+indirectgoto:                                     ; preds = %lab0, %entry
+  %indvar = phi i32 [ %indvar.next, %lab0 ], [ 0, %entry ] ; <i32> [#uses=2]
+  %pc.addr.0 = getelementptr i32* %pc, i32 %indvar ; <i32*> [#uses=1]
+  %tmp1.pn = load i32* %pc.addr.0                 ; <i32> [#uses=1]
+  %indirect.goto.dest.in = getelementptr inbounds [2 x i8*]* @bar.l, i32 0, i32 %tmp1.pn ; <i8**> [#uses=1]
+  %indirect.goto.dest = load i8** %indirect.goto.dest.in ; <i8*> [#uses=1]
+  indirectbr i8* %indirect.goto.dest, [label %lab0, label %end]
+}
+
+define i32 @main() nounwind readnone {
+entry:
+  ret i32 0
+}
diff --git a/final/test/Transforms/IPConstantProp/deadarg.ll b/final/test/Transforms/IPConstantProp/deadarg.ll
new file mode 100644
index 00000000000..4b9938e09e9
--- /dev/null
+++ b/final/test/Transforms/IPConstantProp/deadarg.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -ipconstprop -disable-output
+define internal void @foo(i32 %X) {
+        call void @foo( i32 %X )
+        ret void
+}
+
diff --git a/final/test/Transforms/IPConstantProp/dg.exp b/final/test/Transforms/IPConstantProp/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/IPConstantProp/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/IPConstantProp/recursion.ll b/final/test/Transforms/IPConstantProp/recursion.ll
new file mode 100644
index 00000000000..b25a6c08181
--- /dev/null
+++ b/final/test/Transforms/IPConstantProp/recursion.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -ipconstprop -deadargelim -S | not grep %X
+define internal i32 @foo(i32 %X) {
+        %Y = call i32 @foo( i32 %X )            ; <i32> [#uses=1]
+        %Z = add i32 %Y, 1              ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+define void @bar() {
+        call i32 @foo( i32 17 )         ; <i32>:1 [#uses=0]
+        ret void
+}
+
diff --git a/final/test/Transforms/IPConstantProp/return-argument.ll b/final/test/Transforms/IPConstantProp/return-argument.ll
new file mode 100644
index 00000000000..6d6eb24cf54
--- /dev/null
+++ b/final/test/Transforms/IPConstantProp/return-argument.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -ipconstprop -S > %t
+; RUN: cat %t | grep {store i32 %Z, i32\\* %Q}
+; RUN: cat %t | grep {add i32 1, 3}
+
+;; This function returns its second argument on all return statements
+define internal i32* @incdec(i1 %C, i32* %V) {
+        %X = load i32* %V
+        br i1 %C, label %T, label %F
+
+T:              ; preds = %0
+        %X1 = add i32 %X, 1
+        store i32 %X1, i32* %V
+        ret i32* %V
+
+F:              ; preds = %0
+        %X2 = sub i32 %X, 1
+        store i32 %X2, i32* %V
+        ret i32* %V
+}
+
+;; This function returns its first argument as a part of a multiple return
+;; value
+define internal { i32, i32 } @foo(i32 %A, i32 %B) {
+        %X = add i32 %A, %B
+        %Y = insertvalue { i32, i32 } undef, i32 %A, 0
+        %Z = insertvalue { i32, i32 } %Y, i32 %X, 1
+        ret { i32, i32 } %Z
+}
+
+define void @caller(i1 %C) {
+        %Q = alloca i32
+        ;; Call incdec to see if %W is properly replaced by %Q
+        %W = call i32* @incdec(i1 %C, i32* %Q )             ; <i32> [#uses=1]
+        ;; Call @foo twice, to prevent the arguments from propagating into the
+        ;; function (so we can check the returned argument is properly
+        ;; propagated per-caller).
+        %S1 = call { i32, i32 } @foo(i32 1, i32 2)
+        %X1 = extractvalue { i32, i32 } %S1, 0
+        %S2 = invoke { i32, i32 } @foo(i32 3, i32 4) to label %OK unwind label %RET
+OK:
+        %X2 = extractvalue { i32, i32 } %S2, 0
+        ;; Do some stuff with the returned values which we can grep for
+        %Z  = add i32 %X1, %X2
+        store i32 %Z, i32* %W
+        br label %RET
+RET:
+        ret void
+}
+
diff --git a/final/test/Transforms/IPConstantProp/return-constant.ll b/final/test/Transforms/IPConstantProp/return-constant.ll
new file mode 100644
index 00000000000..b2558595287
--- /dev/null
+++ b/final/test/Transforms/IPConstantProp/return-constant.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -ipconstprop -instcombine | \
+; RUN:    llvm-dis | grep {ret i1 true} | count 2
+define internal i32 @foo(i1 %C) {
+        br i1 %C, label %T, label %F
+
+T:              ; preds = %0
+        ret i32 52
+
+F:              ; preds = %0
+        ret i32 52
+}
+
+define i1 @caller(i1 %C) {
+        %X = call i32 @foo( i1 %C )             ; <i32> [#uses=1]
+        %Y = icmp ne i32 %X, 0          ; <i1> [#uses=1]
+        ret i1 %Y
+}
+
+define i1 @invokecaller(i1 %C) {
+        %X = invoke i32 @foo( i1 %C ) to label %OK unwind label %FAIL             ; <i32> [#uses=1]
+OK:
+        %Y = icmp ne i32 %X, 0          ; <i1> [#uses=1]
+        ret i1 %Y 
+FAIL:
+        ret i1 false
+}
diff --git a/final/test/Transforms/IPConstantProp/return-constants.ll b/final/test/Transforms/IPConstantProp/return-constants.ll
new file mode 100644
index 00000000000..79220dd1f53
--- /dev/null
+++ b/final/test/Transforms/IPConstantProp/return-constants.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -ipconstprop -S > %t
+;; Check that the 21 constants got propagated properly
+; RUN: cat %t | grep {%M = add i32 21, 21}
+;; Check that the second return values didn't get propagated
+; RUN: cat %t | grep {%N = add i32 %B, %D}
+
+define internal {i32, i32} @foo(i1 %Q) {
+        br i1 %Q, label %T, label %F
+
+T:              ; preds = %0
+        ret i32 21, i32 22
+
+F:              ; preds = %0
+        ret i32 21, i32 23
+}
+
+define internal {i32, i32} @bar(i1 %Q) {
+        %A = insertvalue { i32, i32 } undef, i32 21, 0
+        br i1 %Q, label %T, label %F
+
+T:              ; preds = %0
+        %B = insertvalue { i32, i32 } %A, i32 22, 1
+        ret { i32, i32 } %B
+
+F:              ; preds = %0
+        %C = insertvalue { i32, i32 } %A, i32 23, 1
+        ret { i32, i32 } %C
+}
+
+define { i32, i32 } @caller(i1 %Q) {
+        %X = call {i32, i32} @foo( i1 %Q )
+        %A = getresult {i32, i32} %X, 0
+        %B = getresult {i32, i32} %X, 1
+        %Y = call {i32, i32} @bar( i1 %Q )
+        %C = extractvalue {i32, i32} %Y, 0
+        %D = extractvalue {i32, i32} %Y, 1
+        %M = add i32 %A, %C
+        %N = add i32 %B, %D
+        ret { i32, i32 } %X
+}
+
diff --git a/final/test/Transforms/IPConstantProp/user-with-multiple-uses.ll b/final/test/Transforms/IPConstantProp/user-with-multiple-uses.ll
new file mode 100644
index 00000000000..402ea41167c
--- /dev/null
+++ b/final/test/Transforms/IPConstantProp/user-with-multiple-uses.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -S -ipsccp | FileCheck %s
+; PR5596
+
+; IPSCCP should propagate the 0 argument, eliminate the switch, and propagate
+; the result.
+
+; CHECK: define i32 @main() noreturn nounwind {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %call2 = tail call i32 @wwrite(i64 0) nounwind
+; CHECK-NEXT: ret i32 123
+
+define i32 @main() noreturn nounwind {
+entry:
+  %call2 = tail call i32 @wwrite(i64 0) nounwind
+  ret i32 %call2
+}
+
+define internal i32 @wwrite(i64 %i) nounwind readnone {
+entry:
+  switch i64 %i, label %sw.default [
+    i64 3, label %return
+    i64 10, label %return
+  ]
+
+sw.default:
+  ret i32 123
+
+return:
+  ret i32 0
+}
diff --git a/final/test/Transforms/IndVarSimplify/2002-09-09-PointerIndVar.ll b/final/test/Transforms/IndVarSimplify/2002-09-09-PointerIndVar.ll
new file mode 100644
index 00000000000..92911ae078c
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2002-09-09-PointerIndVar.ll
@@ -0,0 +1,17 @@
+; Induction variable pass is doing bad things with pointer induction vars, 
+; trying to do arithmetic on them directly.
+;
+; RUN: opt < %s -indvars
+;
+define void @test(i32 %A, i32 %S, i8* %S.upgrd.1) {
+; <label>:0
+        br label %Loop
+
+Loop:           ; preds = %Loop, %0
+        %PIV = phi i8* [ %S.upgrd.1, %0 ], [ %PIVNext.upgrd.3, %Loop ]          ; <i8*> [#uses=1]
+        %PIV.upgrd.2 = ptrtoint i8* %PIV to i64         ; <i64> [#uses=1]
+        %PIVNext = add i64 %PIV.upgrd.2, 8              ; <i64> [#uses=1]
+        %PIVNext.upgrd.3 = inttoptr i64 %PIVNext to i8*         ; <i8*> [#uses=1]
+        br label %Loop
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/2003-04-16-ExprAnalysis.ll b/final/test/Transforms/IndVarSimplify/2003-04-16-ExprAnalysis.ll
new file mode 100644
index 00000000000..38fa112bdb5
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2003-04-16-ExprAnalysis.ll
@@ -0,0 +1,17 @@
+; This is a test case for the expression analysis code, not really indvars.
+; It was assuming any constant of int type was a ConstantInteger.
+;
+; RUN: opt < %s -indvars
+
+@X = global i32 7               ; <i32*> [#uses=1]
+
+define void @test(i32 %A) {
+; <label>:0
+        br label %Loop
+
+Loop:           ; preds = %Loop, %0
+        %IV = phi i32 [ %A, %0 ], [ %IVNext, %Loop ]            ; <i32> [#uses=1]
+        %IVNext = add i32 %IV, ptrtoint (i32* @X to i32)                ; <i32> [#uses=1]
+        br label %Loop
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/2003-09-12-MultiplePred.ll b/final/test/Transforms/IndVarSimplify/2003-09-12-MultiplePred.ll
new file mode 100644
index 00000000000..ecd5086f730
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2003-09-12-MultiplePred.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -indvars -S | grep indvar
+
+define i32 @test() {
+; <label>:0
+        br i1 true, label %LoopHead, label %LoopHead
+
+LoopHead:               ; preds = %LoopHead, %0, %0
+        %A = phi i32 [ 7, %0 ], [ 7, %0 ], [ %B, %LoopHead ]            ; <i32> [#uses=1]
+        %B = add i32 %A, 1              ; <i32> [#uses=2]
+        br i1 true, label %LoopHead, label %Out
+
+Out:            ; preds = %LoopHead
+        ret i32 %B
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll b/final/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
new file mode 100644
index 00000000000..150ae70a826
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2003-09-23-NotAtTop.ll
@@ -0,0 +1,20 @@
+; RUN: opt -S -indvars %s | FileCheck %s
+
+; The indvar simplification code should ensure that the first PHI in the block 
+; is the canonical one!
+
+define i32 @test() {
+; <label>:0
+        br label %Loop
+
+Loop:           ; preds = %Loop, %0
+; CHECK: Loop:
+; CHECK-NEXT: Canonical
+        %NonIndvar = phi i32 [ 200, %0 ], [ %NonIndvarNext, %Loop ]             ; <i32> [#uses=1]
+        %Canonical = phi i32 [ 0, %0 ], [ %CanonicalNext, %Loop ]               ; <i32> [#uses=2]
+        store i32 %Canonical, i32* null
+        %NonIndvarNext = sdiv i32 %NonIndvar, 2         ; <i32> [#uses=1]
+        %CanonicalNext = add i32 %Canonical, 1          ; <i32> [#uses=1]
+        br label %Loop
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/2003-12-10-RemoveInstrCrash.ll b/final/test/Transforms/IndVarSimplify/2003-12-10-RemoveInstrCrash.ll
new file mode 100644
index 00000000000..75363314750
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2003-12-10-RemoveInstrCrash.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -indvars -disable-output
+
+define void @test() {
+entry:
+        %inc.2 = add i32 1, 1           ; <i32> [#uses=1]
+        br i1 false, label %no_exit, label %loopexit
+
+no_exit:                ; preds = %no_exit, %entry
+        %j.0.pn = phi i32 [ %inc.3, %no_exit ], [ %inc.2, %entry ]              ; <i32> [#uses=1]
+        %k.0.pn = phi i32 [ %inc.4, %no_exit ], [ 1, %entry ]           ; <i32> [#uses=1]
+        %inc.3 = add i32 %j.0.pn, 1             ; <i32> [#uses=1]
+        %inc.4 = add i32 %k.0.pn, 1             ; <i32> [#uses=1]
+        br i1 undef, label %no_exit, label %loopexit
+
+loopexit:               ; preds = %no_exit, %entry
+        ret void
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/2003-12-15-Crash.ll b/final/test/Transforms/IndVarSimplify/2003-12-15-Crash.ll
new file mode 100644
index 00000000000..662828c749d
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2003-12-15-Crash.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -indvars -disable-output 
+define void @_ZN17CoinFactorization7cleanupEv() {
+entry:
+        br i1 false, label %loopexit.14, label %cond_continue.3
+
+cond_continue.3:                ; preds = %entry
+        ret void
+
+loopexit.14:            ; preds = %entry
+        %tmp.738 = sub i32 0, 0         ; <i32> [#uses=1]
+        br i1 undef, label %no_exit.15.preheader, label %loopexit.15
+
+no_exit.15.preheader:           ; preds = %loopexit.14
+        br label %no_exit.15
+
+no_exit.15:             ; preds = %no_exit.15, %no_exit.15.preheader
+        %highC.0 = phi i32 [ %tmp.738, %no_exit.15.preheader ], [ %dec.0, %no_exit.15 ]         ; <i32> [#uses=1]
+        %dec.0 = add i32 %highC.0, -1           ; <i32> [#uses=1]
+        br i1 undef, label %no_exit.15, label %loopexit.15
+
+loopexit.15:            ; preds = %no_exit.15, %loopexit.14
+        ret void
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/2003-12-21-IndVarSize.ll b/final/test/Transforms/IndVarSimplify/2003-12-21-IndVarSize.ll
new file mode 100644
index 00000000000..0fc9c8547d9
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2003-12-21-IndVarSize.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -indvars -S | grep indvar | not grep i32
+
+@G = global i64 0               ; <i64*> [#uses=1]
+
+define void @test() {
+; <label>:0
+        br label %Loop
+
+Loop:           ; preds = %Loop, %0
+        %X = phi i64 [ 1, %0 ], [ %X.next, %Loop ]              ; <i64> [#uses=2]
+        %X.next = add i64 %X, 1         ; <i64> [#uses=1]
+        store i64 %X, i64* @G
+        br label %Loop
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/2004-03-10-PHIInsertionBug.ll b/final/test/Transforms/IndVarSimplify/2004-03-10-PHIInsertionBug.ll
new file mode 100644
index 00000000000..c49819e27af
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2004-03-10-PHIInsertionBug.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -indvars -disable-output
+
+define void @test() {
+        br label %endif.0.i
+
+endif.0.i:              ; preds = %0
+        br i1 false, label %then.3.i, label %endif.3.i
+
+then.3.i:               ; preds = %endif.0.i
+        br label %endif.3.i
+
+endif.3.i:              ; preds = %then.3.i, %endif.0.i
+        %inxm.0.i = phi i32 [ 8, %then.3.i ], [ 0, %endif.0.i ]         ; <i32> [#uses=1]
+        %doinner.1.i = phi i32 [ 0, %then.3.i ], [ 0, %endif.0.i ]              ; <i32> [#uses=0]
+        br label %loopentry.2.i
+
+loopentry.2.i:          ; preds = %no_exit.2.i, %endif.3.i
+        %inxk.0.i = phi i32 [ %tmp.210.i, %no_exit.2.i ], [ 0, %endif.3.i ]             ; <i32> [#uses=1]
+        br label %no_exit.2.i
+
+no_exit.2.i:            ; preds = %loopentry.2.i
+        %tmp.210.i = sub i32 %inxk.0.i, %inxm.0.i               ; <i32> [#uses=2]
+        %tmp.213.i = add i32 %tmp.210.i, 0              ; <i32> [#uses=0]
+        br label %loopentry.2.i
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll b/final/test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll
new file mode 100644
index 00000000000..1ed4c44d265
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll
@@ -0,0 +1,283 @@
+; RUN: opt < %s -indvars -disable-output
+; ModuleID = '2004-04-05-InvokeCastCrash.ll'
+	%struct.__false_type = type { i8 }
+	%"struct.__gnu_cxx::_Hashtable_node<const llvm::Constant*>" = type { %"struct.__gnu_cxx::_Hashtable_node<const llvm::Constant*>"*, %"struct.llvm::Constant"* }
+	%"struct.__gnu_cxx::_Hashtable_node<std::pair<const llvm::Value* const, int> >" = type { %"struct.__gnu_cxx::_Hashtable_node<std::pair<const llvm::Value* const, int> >"*, %"struct.std::pair<const llvm::Value* const,int>" }
+	%"struct.__gnu_cxx::hash_map<const llvm::Value*,int,__gnu_cxx::hash<const llvm::Value*>,std::equal_to<const llvm::Value*>,std::allocator<int> >" = type { %"struct.__gnu_cxx::hashtable<std::pair<const llvm::Value* const, int>,const llvm::Value*,__gnu_cxx::hash<const llvm::Value*>,std::_Select1st<std::pair<const llvm::Value* const, int> >,std::equal_to<const llvm::Value*>,std::allocator<int> >" }
+	%"struct.__gnu_cxx::hash_set<const llvm::Constant*,__gnu_cxx::hash<const llvm::Constant*>,std::equal_to<const llvm::Constant*>,std::allocator<const llvm::Constant*> >" = type { %"struct.__gnu_cxx::hashtable<const llvm::Constant*,const llvm::Constant*,__gnu_cxx::hash<const llvm::Constant*>,std::_Identity<const llvm::Constant*>,std::equal_to<const llvm::Constant*>,std::allocator<const llvm::Constant*> >" }
+	%"struct.__gnu_cxx::hashtable<const llvm::Constant*,const llvm::Constant*,__gnu_cxx::hash<const llvm::Constant*>,std::_Identity<const llvm::Constant*>,std::equal_to<const llvm::Constant*>,std::allocator<const llvm::Constant*> >" = type { %struct.__false_type, %struct.__false_type, %struct.__false_type, %struct.__false_type, %"struct.std::vector<__gnu_cxx::_Hashtable_node<const llvm::Constant*>*,std::allocator<const llvm::Constant*> >", i32 }
+	%"struct.__gnu_cxx::hashtable<std::pair<const llvm::Value* const, int>,const llvm::Value*,__gnu_cxx::hash<const llvm::Value*>,std::_Select1st<std::pair<const llvm::Value* const, int> >,std::equal_to<const llvm::Value*>,std::allocator<int> >" = type { %struct.__false_type, %struct.__false_type, %struct.__false_type, %struct.__false_type, %"struct.std::vector<__gnu_cxx::_Hashtable_node<std::pair<const llvm::Value* const, int> >*,std::allocator<int> >", i32 }
+	%"struct.llvm::AbstractTypeUser" = type { i32 (...)** }
+	%"struct.llvm::Annotable" = type { i32 (...)**, %"struct.llvm::Annotation"* }
+	%"struct.llvm::Annotation" = type { i32 (...)**, %"struct.llvm::AnnotationID", %"struct.llvm::Annotation"* }
+	%"struct.llvm::AnnotationID" = type { i32 }
+	%"struct.llvm::Argument" = type { %"struct.llvm::Value", %"struct.llvm::Function"*, %"struct.llvm::Argument"*, %"struct.llvm::Argument"* }
+	%"struct.llvm::BasicBlock" = type { %"struct.llvm::Value", %"struct.llvm::iplist<llvm::Instruction,llvm::ilist_traits<llvm::Instruction> >", %"struct.llvm::BasicBlock"*, %"struct.llvm::BasicBlock"* }
+	%"struct.llvm::Constant" = type opaque
+	%"struct.llvm::DerivedType" = type { %"struct.llvm::Type", %"struct.llvm::AbstractTypeUser", %"struct.std::vector<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*> >" }
+	%"struct.llvm::Function" = type { %"struct.llvm::GlobalValue", %"struct.llvm::Annotable", %"struct.llvm::iplist<llvm::BasicBlock,llvm::ilist_traits<llvm::BasicBlock> >", %"struct.llvm::iplist<llvm::Argument,llvm::ilist_traits<llvm::Argument> >", %"struct.llvm::SymbolTable"*, %"struct.llvm::Function"*, %"struct.llvm::Function"* }
+	%"struct.llvm::FunctionPass" = type { %"struct.llvm::Pass" }
+	%"struct.llvm::FunctionType" = type { %"struct.llvm::DerivedType", i1 }
+	%"struct.llvm::GlobalValue" = type { %"struct.llvm::User", i32, %"struct.llvm::Module"* }
+	%"struct.llvm::Instruction" = type { %"struct.llvm::User", %"struct.llvm::Annotable", %"struct.llvm::BasicBlock"*, %"struct.llvm::Instruction"*, %"struct.llvm::Instruction"*, i32 }
+	%"struct.llvm::IntrinsicLowering" = type opaque
+	%"struct.llvm::MachineBasicBlock" = type { %"struct.llvm::ilist<llvm::MachineInstr>", %"struct.llvm::MachineBasicBlock"*, %"struct.llvm::MachineBasicBlock"*, %"struct.llvm::BasicBlock"* }
+	%"struct.llvm::MachineConstantPool" = type opaque
+	%"struct.llvm::MachineFrameInfo" = type opaque
+	%"struct.llvm::MachineFunction" = type { %"struct.llvm::Annotation", %"struct.llvm::Function"*, %"struct.llvm::TargetMachine"*, %"struct.llvm::iplist<llvm::MachineBasicBlock,llvm::ilist_traits<llvm::MachineBasicBlock> >", %"struct.llvm::SSARegMap"*, %"struct.llvm::MachineFunctionInfo"*, %"struct.llvm::MachineFrameInfo"*, %"struct.llvm::MachineConstantPool"* }
+	%"struct.llvm::MachineFunctionInfo" = type { %"struct.__gnu_cxx::hash_set<const llvm::Constant*,__gnu_cxx::hash<const llvm::Constant*>,std::equal_to<const llvm::Constant*>,std::allocator<const llvm::Constant*> >", %"struct.__gnu_cxx::hash_map<const llvm::Value*,int,__gnu_cxx::hash<const llvm::Value*>,std::equal_to<const llvm::Value*>,std::allocator<int> >", i32, i32, i32, i32, i32, i32, i32, i1, i1, i1, %"struct.llvm::MachineFunction"* }
+	%"struct.llvm::MachineFunctionPass" = type { %"struct.llvm::FunctionPass" }
+	%"struct.llvm::MachineInstr" = type { i16, i8, %"struct.std::vector<llvm::MachineOperand,std::allocator<llvm::MachineOperand> >", %"struct.llvm::MachineInstr"*, %"struct.llvm::MachineInstr"*, %"struct.llvm::MachineBasicBlock"* }
+	%"struct.llvm::MachineInstrBuilder" = type { %"struct.llvm::MachineInstr"* }
+	%"struct.llvm::MachineOperand" = type { %"union.llvm::MachineOperand::._65", i32, i32 }
+	%"struct.llvm::Module" = type opaque
+	%"struct.llvm::PATypeHandle" = type { %"struct.llvm::Type"*, %"struct.llvm::AbstractTypeUser"* }
+	%"struct.llvm::PATypeHolder" = type { %"struct.llvm::Type"* }
+	%"struct.llvm::Pass" = type { i32 (...)**, %"struct.llvm::AbstractTypeUser"*, %"struct.llvm::PassInfo"*, %"struct.std::vector<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> > >" }
+	%"struct.llvm::PassInfo" = type { i8*, i8*, %"struct.std::type_info"*, i8, %"struct.std::vector<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*> >", %"struct.llvm::Pass"* ()*, %"struct.llvm::Pass"* (%"struct.llvm::TargetMachine"*)* }
+	%"struct.llvm::SSARegMap" = type opaque
+	%"struct.llvm::SymbolTable" = type opaque
+	%"struct.llvm::SymbolTableListTraits<llvm::Argument,llvm::Function,llvm::Function,llvm::ilist_traits<llvm::Argument> >" = type { %"struct.llvm::Function"*, %"struct.llvm::Function"* }
+	%"struct.llvm::SymbolTableListTraits<llvm::Instruction,llvm::BasicBlock,llvm::Function,llvm::ilist_traits<llvm::Instruction> >" = type { %"struct.llvm::Function"*, %"struct.llvm::BasicBlock"* }
+	%"struct.llvm::TargetData" = type { %"struct.llvm::FunctionPass", i1, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%"struct.llvm::TargetFrameInfo" = type { i32 (...)**, i32, i32, i32 }
+	%"struct.llvm::TargetInstrDescriptor" = type { i8*, i32, i32, i32, i1, i32, i32, i32, i32, i32, i32*, i32* }
+	%"struct.llvm::TargetInstrInfo" = type { i32 (...)**, %"struct.llvm::TargetInstrDescriptor"*, i32, i32 }
+	%"struct.llvm::TargetMachine" = type { i32 (...)**, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >", %"struct.llvm::TargetData", %"struct.llvm::IntrinsicLowering"* }
+	%"struct.llvm::TargetRegClassInfo" = type { i32 (...)**, i32, i32, i32 }
+	%"struct.llvm::TargetRegInfo" = type { i32 (...)**, %"struct.std::vector<const llvm::TargetRegClassInfo*,std::allocator<const llvm::TargetRegClassInfo*> >", %"struct.llvm::TargetMachine"* }
+	%"struct.llvm::Type" = type { %"struct.llvm::Value", i32, i32, i1, i32, %"struct.llvm::Type"*, %"struct.std::vector<llvm::PATypeHandle,std::allocator<llvm::PATypeHandle> >" }
+	%"struct.llvm::Use" = type { %"struct.llvm::Value"*, %"struct.llvm::User"*, %"struct.llvm::Use"*, %"struct.llvm::Use"* }
+	%"struct.llvm::User" = type { %"struct.llvm::Value", %"struct.std::vector<llvm::Use,std::allocator<llvm::Use> >" }
+	%"struct.llvm::Value" = type { i32 (...)**, %"struct.llvm::iplist<llvm::Use,llvm::ilist_traits<llvm::Use> >", %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >", %"struct.llvm::PATypeHolder", i32 }
+	%"struct.llvm::_GLOBAL__N_::InsertPrologEpilogCode" = type { %"struct.llvm::MachineFunctionPass" }
+	%"struct.llvm::ilist<llvm::MachineInstr>" = type { %"struct.llvm::iplist<llvm::MachineInstr,llvm::ilist_traits<llvm::MachineInstr> >" }
+	%"struct.llvm::ilist_iterator<const llvm::MachineBasicBlock>" = type { %"struct.llvm::MachineBasicBlock"* }
+	%"struct.llvm::ilist_traits<llvm::Argument>" = type { %"struct.llvm::SymbolTableListTraits<llvm::Argument,llvm::Function,llvm::Function,llvm::ilist_traits<llvm::Argument> >" }
+	%"struct.llvm::ilist_traits<llvm::Instruction>" = type { %"struct.llvm::SymbolTableListTraits<llvm::Instruction,llvm::BasicBlock,llvm::Function,llvm::ilist_traits<llvm::Instruction> >" }
+	%"struct.llvm::iplist<llvm::Argument,llvm::ilist_traits<llvm::Argument> >" = type { %"struct.llvm::ilist_traits<llvm::Argument>", %"struct.llvm::Argument"*, %"struct.llvm::Argument"* }
+	%"struct.llvm::iplist<llvm::BasicBlock,llvm::ilist_traits<llvm::BasicBlock> >" = type { %"struct.llvm::ilist_traits<llvm::Argument>", %"struct.llvm::BasicBlock"*, %"struct.llvm::BasicBlock"* }
+	%"struct.llvm::iplist<llvm::Instruction,llvm::ilist_traits<llvm::Instruction> >" = type { %"struct.llvm::ilist_traits<llvm::Instruction>", %"struct.llvm::Instruction"*, %"struct.llvm::Instruction"* }
+	%"struct.llvm::iplist<llvm::MachineBasicBlock,llvm::ilist_traits<llvm::MachineBasicBlock> >" = type { %"struct.llvm::MachineBasicBlock"*, %"struct.llvm::MachineBasicBlock"* }
+	%"struct.llvm::iplist<llvm::MachineInstr,llvm::ilist_traits<llvm::MachineInstr> >" = type { %"struct.llvm::ilist_iterator<const llvm::MachineBasicBlock>", %"struct.llvm::MachineInstr"*, %"struct.llvm::MachineInstr"* }
+	%"struct.llvm::iplist<llvm::Use,llvm::ilist_traits<llvm::Use> >" = type { %"struct.llvm::Use"*, %"struct.llvm::Use"* }
+	%"struct.std::_Vector_alloc_base<__gnu_cxx::_Hashtable_node<const llvm::Constant*>*,std::allocator<const llvm::Constant*>, true>" = type { %"struct.__gnu_cxx::_Hashtable_node<const llvm::Constant*>"**, %"struct.__gnu_cxx::_Hashtable_node<const llvm::Constant*>"**, %"struct.__gnu_cxx::_Hashtable_node<const llvm::Constant*>"** }
+	%"struct.std::_Vector_alloc_base<__gnu_cxx::_Hashtable_node<std::pair<const llvm::Value* const, int> >*,std::allocator<int>, true>" = type { %"struct.__gnu_cxx::_Hashtable_node<std::pair<const llvm::Value* const, int> >"**, %"struct.__gnu_cxx::_Hashtable_node<std::pair<const llvm::Value* const, int> >"**, %"struct.__gnu_cxx::_Hashtable_node<std::pair<const llvm::Value* const, int> >"** }
+	%"struct.std::_Vector_alloc_base<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*>, true>" = type { %"struct.llvm::PassInfo"**, %"struct.llvm::PassInfo"**, %"struct.llvm::PassInfo"** }
+	%"struct.std::_Vector_alloc_base<const llvm::TargetRegClassInfo*,std::allocator<const llvm::TargetRegClassInfo*>, true>" = type { %"struct.llvm::TargetFrameInfo"**, %"struct.llvm::TargetFrameInfo"**, %"struct.llvm::TargetFrameInfo"** }
+	%"struct.std::_Vector_alloc_base<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*>, true>" = type { %"struct.llvm::AbstractTypeUser"**, %"struct.llvm::AbstractTypeUser"**, %"struct.llvm::AbstractTypeUser"** }
+	%"struct.std::_Vector_alloc_base<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*>, true>" = type { %"struct.llvm::MachineInstr"**, %"struct.llvm::MachineInstr"**, %"struct.llvm::MachineInstr"** }
+	%"struct.std::_Vector_alloc_base<llvm::MachineOperand,std::allocator<llvm::MachineOperand>, true>" = type { %"struct.llvm::MachineOperand"*, %"struct.llvm::MachineOperand"*, %"struct.llvm::MachineOperand"* }
+	%"struct.std::_Vector_alloc_base<llvm::PATypeHandle,std::allocator<llvm::PATypeHandle>, true>" = type { %"struct.llvm::PATypeHandle"*, %"struct.llvm::PATypeHandle"*, %"struct.llvm::PATypeHandle"* }
+	%"struct.std::_Vector_alloc_base<llvm::Use,std::allocator<llvm::Use>, true>" = type { %"struct.llvm::Use"*, %"struct.llvm::Use"*, %"struct.llvm::Use"* }
+	%"struct.std::_Vector_alloc_base<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> >, true>" = type { %"struct.std::pair<const llvm::PassInfo*,llvm::Pass*>"*, %"struct.std::pair<const llvm::PassInfo*,llvm::Pass*>"*, %"struct.std::pair<const llvm::PassInfo*,llvm::Pass*>"* }
+	%"struct.std::_Vector_base<__gnu_cxx::_Hashtable_node<const llvm::Constant*>*,std::allocator<const llvm::Constant*> >" = type { %"struct.std::_Vector_alloc_base<__gnu_cxx::_Hashtable_node<const llvm::Constant*>*,std::allocator<const llvm::Constant*>, true>" }
+	%"struct.std::_Vector_base<__gnu_cxx::_Hashtable_node<std::pair<const llvm::Value* const, int> >*,std::allocator<int> >" = type { %"struct.std::_Vector_alloc_base<__gnu_cxx::_Hashtable_node<std::pair<const llvm::Value* const, int> >*,std::allocator<int>, true>" }
+	%"struct.std::_Vector_base<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*> >" = type { %"struct.std::_Vector_alloc_base<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*>, true>" }
+	%"struct.std::_Vector_base<const llvm::TargetRegClassInfo*,std::allocator<const llvm::TargetRegClassInfo*> >" = type { %"struct.std::_Vector_alloc_base<const llvm::TargetRegClassInfo*,std::allocator<const llvm::TargetRegClassInfo*>, true>" }
+	%"struct.std::_Vector_base<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*> >" = type { %"struct.std::_Vector_alloc_base<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*>, true>" }
+	%"struct.std::_Vector_base<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >" = type { %"struct.std::_Vector_alloc_base<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*>, true>" }
+	%"struct.std::_Vector_base<llvm::MachineOperand,std::allocator<llvm::MachineOperand> >" = type { %"struct.std::_Vector_alloc_base<llvm::MachineOperand,std::allocator<llvm::MachineOperand>, true>" }
+	%"struct.std::_Vector_base<llvm::PATypeHandle,std::allocator<llvm::PATypeHandle> >" = type { %"struct.std::_Vector_alloc_base<llvm::PATypeHandle,std::allocator<llvm::PATypeHandle>, true>" }
+	%"struct.std::_Vector_base<llvm::Use,std::allocator<llvm::Use> >" = type { %"struct.std::_Vector_alloc_base<llvm::Use,std::allocator<llvm::Use>, true>" }
+	%"struct.std::_Vector_base<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> > >" = type { %"struct.std::_Vector_alloc_base<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> >, true>" }
+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" }
+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
+	%"struct.std::pair<const llvm::PassInfo*,llvm::Pass*>" = type { %"struct.llvm::PassInfo"*, %"struct.llvm::Pass"* }
+	%"struct.std::pair<const llvm::Value* const,int>" = type { %"struct.llvm::Value"*, i32 }
+	%"struct.std::type_info" = type { i32 (...)**, i8* }
+	%"struct.std::vector<__gnu_cxx::_Hashtable_node<const llvm::Constant*>*,std::allocator<const llvm::Constant*> >" = type { %"struct.std::_Vector_base<__gnu_cxx::_Hashtable_node<const llvm::Constant*>*,std::allocator<const llvm::Constant*> >" }
+	%"struct.std::vector<__gnu_cxx::_Hashtable_node<std::pair<const llvm::Value* const, int> >*,std::allocator<int> >" = type { %"struct.std::_Vector_base<__gnu_cxx::_Hashtable_node<std::pair<const llvm::Value* const, int> >*,std::allocator<int> >" }
+	%"struct.std::vector<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*> >" = type { %"struct.std::_Vector_base<const llvm::PassInfo*,std::allocator<const llvm::PassInfo*> >" }
+	%"struct.std::vector<const llvm::TargetRegClassInfo*,std::allocator<const llvm::TargetRegClassInfo*> >" = type { %"struct.std::_Vector_base<const llvm::TargetRegClassInfo*,std::allocator<const llvm::TargetRegClassInfo*> >" }
+	%"struct.std::vector<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*> >" = type { %"struct.std::_Vector_base<llvm::AbstractTypeUser*,std::allocator<llvm::AbstractTypeUser*> >" }
+	%"struct.std::vector<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >" = type { %"struct.std::_Vector_base<llvm::MachineInstr*,std::allocator<llvm::MachineInstr*> >" }
+	%"struct.std::vector<llvm::MachineOperand,std::allocator<llvm::MachineOperand> >" = type { %"struct.std::_Vector_base<llvm::MachineOperand,std::allocator<llvm::MachineOperand> >" }
+	%"struct.std::vector<llvm::PATypeHandle,std::allocator<llvm::PATypeHandle> >" = type { %"struct.std::_Vector_base<llvm::PATypeHandle,std::allocator<llvm::PATypeHandle> >" }
+	%"struct.std::vector<llvm::Use,std::allocator<llvm::Use> >" = type { %"struct.std::_Vector_base<llvm::Use,std::allocator<llvm::Use> >" }
+	%"struct.std::vector<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> > >" = type { %"struct.std::_Vector_base<std::pair<const llvm::PassInfo*, llvm::Pass*>,std::allocator<std::pair<const llvm::PassInfo*, llvm::Pass*> > >" }
+	%"union.llvm::MachineOperand::._65" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* }
+
+declare void @_Znwj()
+
+declare void @_ZN4llvm12MachineInstrC1Esjbb()
+
+declare void @_ZNSt6vectorIPN4llvm12MachineInstrESaIS2_EE9push_backERKS2_()
+
+declare void @_ZNK4llvm8Function15getFunctionTypeEv()
+
+declare void @_ZNK4llvm19MachineInstrBuilder7addMRegEiNS_14MachineOperand7UseTypeE()
+
+declare void @_ZNK4llvm19MachineInstrBuilder7addSImmEi()
+
+define void @_ZN4llvm11_GLOBAL__N_22InsertPrologEpilogCode20runOnMachineFunctionERNS_15MachineFunctionE(%"struct.llvm::MachineFunction"* %F) {
+entry:
+	%tmp.8.i = invoke %"struct.llvm::TargetFrameInfo"* null( %"struct.llvm::TargetMachine"* null )
+			to label %invoke_cont.0.i unwind label %invoke_catch.0.i		; <%"struct.llvm::TargetFrameInfo"*> [#uses=0]
+
+invoke_catch.0.i:		; preds = %invoke_cont.49.i, %invoke_cont.48.i, %invoke_cont.47.i, %invoke_cont.i53.i, %no_exit.i, %invoke_cont.44.i, %invoke_cont.43.i, %invoke_cont.42.i, %invoke_cont.41.i, %invoke_cont.40.i, %invoke_cont.39.i, %invoke_cont.38.i, %invoke_cont.37.i, %then.2.i, %invoke_cont.35.i, %invoke_cont.34.i, %then.1.i, %endif.0.i, %invoke_cont.9.i, %invoke_cont.8.i, %invoke_cont.7.i, %invoke_cont.i.i, %then.0.i, %invoke_cont.4.i, %invoke_cont.3.i, %invoke_cont.2.i, %invoke_cont.1.i, %endif.0.i.i, %tmp.7.i.noexc.i, %invoke_cont.0.i, %entry
+	ret void
+
+invoke_cont.0.i:		; preds = %entry
+	%tmp.7.i1.i = invoke %"struct.llvm::TargetFrameInfo"* null( %"struct.llvm::TargetMachine"* null )
+			to label %tmp.7.i.noexc.i unwind label %invoke_catch.0.i		; <%"struct.llvm::TargetFrameInfo"*> [#uses=2]
+
+tmp.7.i.noexc.i:		; preds = %invoke_cont.0.i
+	%tmp.17.i2.i = invoke i32 null( %"struct.llvm::TargetFrameInfo"* %tmp.7.i1.i )
+			to label %endif.0.i.i unwind label %invoke_catch.0.i		; <i32> [#uses=0]
+
+endif.0.i.i:		; preds = %tmp.7.i.noexc.i
+	%tmp.38.i4.i = invoke i32 null( %"struct.llvm::TargetFrameInfo"* %tmp.7.i1.i )
+			to label %tmp.38.i.noexc.i unwind label %invoke_catch.0.i		; <i32> [#uses=0]
+
+tmp.38.i.noexc.i:		; preds = %endif.0.i.i
+	br i1 false, label %invoke_cont.1.i, label %then.1.i.i
+
+then.1.i.i:		; preds = %tmp.38.i.noexc.i
+	ret void
+
+invoke_cont.1.i:		; preds = %tmp.38.i.noexc.i
+	%tmp.21.i = invoke %"struct.llvm::TargetRegInfo"* null( %"struct.llvm::TargetMachine"* null )
+			to label %invoke_cont.2.i unwind label %invoke_catch.0.i		; <%"struct.llvm::TargetRegInfo"*> [#uses=1]
+
+invoke_cont.2.i:		; preds = %invoke_cont.1.i
+	%tmp.28.i = invoke i32 null( %"struct.llvm::TargetRegInfo"* %tmp.21.i )
+			to label %invoke_cont.3.i unwind label %invoke_catch.0.i		; <i32> [#uses=0]
+
+invoke_cont.3.i:		; preds = %invoke_cont.2.i
+	%tmp.36.i = invoke %"struct.llvm::TargetInstrInfo"* null( %"struct.llvm::TargetMachine"* null )
+			to label %invoke_cont.4.i unwind label %invoke_catch.0.i		; <%"struct.llvm::TargetInstrInfo"*> [#uses=1]
+
+invoke_cont.4.i:		; preds = %invoke_cont.3.i
+	%tmp.43.i = invoke i1 null( %"struct.llvm::TargetInstrInfo"* %tmp.36.i, i16 383, i64 0 )
+			to label %invoke_cont.5.i unwind label %invoke_catch.0.i		; <i1> [#uses=1]
+
+invoke_cont.5.i:		; preds = %invoke_cont.4.i
+	br i1 %tmp.43.i, label %then.0.i, label %else.i
+
+then.0.i:		; preds = %invoke_cont.5.i
+	invoke void @_Znwj( )
+			to label %tmp.0.i.noexc.i unwind label %invoke_catch.0.i
+
+tmp.0.i.noexc.i:		; preds = %then.0.i
+	invoke void @_ZN4llvm12MachineInstrC1Esjbb( )
+			to label %invoke_cont.i.i unwind label %cond_true.i.i
+
+cond_true.i.i:		; preds = %tmp.0.i.noexc.i
+	ret void
+
+invoke_cont.i.i:		; preds = %tmp.0.i.noexc.i
+	invoke void @_ZNK4llvm19MachineInstrBuilder7addMRegEiNS_14MachineOperand7UseTypeE( )
+			to label %invoke_cont.7.i unwind label %invoke_catch.0.i
+
+invoke_cont.7.i:		; preds = %invoke_cont.i.i
+	invoke void @_ZNK4llvm19MachineInstrBuilder7addSImmEi( )
+			to label %invoke_cont.8.i unwind label %invoke_catch.0.i
+
+invoke_cont.8.i:		; preds = %invoke_cont.7.i
+	invoke void @_ZNK4llvm19MachineInstrBuilder7addMRegEiNS_14MachineOperand7UseTypeE( )
+			to label %invoke_cont.9.i unwind label %invoke_catch.0.i
+
+invoke_cont.9.i:		; preds = %invoke_cont.8.i
+	invoke void @_ZNSt6vectorIPN4llvm12MachineInstrESaIS2_EE9push_backERKS2_( )
+			to label %endif.0.i unwind label %invoke_catch.0.i
+
+else.i:		; preds = %invoke_cont.5.i
+	ret void
+
+endif.0.i:		; preds = %invoke_cont.9.i
+	invoke void @_ZNK4llvm8Function15getFunctionTypeEv( )
+			to label %invoke_cont.33.i unwind label %invoke_catch.0.i
+
+invoke_cont.33.i:		; preds = %endif.0.i
+	br i1 false, label %then.1.i, label %endif.1.i
+
+then.1.i:		; preds = %invoke_cont.33.i
+	invoke void @_ZNK4llvm8Function15getFunctionTypeEv( )
+			to label %invoke_cont.34.i unwind label %invoke_catch.0.i
+
+invoke_cont.34.i:		; preds = %then.1.i
+	%tmp.121.i = invoke %"struct.llvm::TargetRegInfo"* null( %"struct.llvm::TargetMachine"* null )
+			to label %invoke_cont.35.i unwind label %invoke_catch.0.i		; <%"struct.llvm::TargetRegInfo"*> [#uses=1]
+
+invoke_cont.35.i:		; preds = %invoke_cont.34.i
+	%tmp.128.i = invoke i32 null( %"struct.llvm::TargetRegInfo"* %tmp.121.i )
+			to label %invoke_cont.36.i unwind label %invoke_catch.0.i		; <i32> [#uses=0]
+
+invoke_cont.36.i:		; preds = %invoke_cont.35.i
+	br i1 false, label %then.2.i, label %endif.1.i
+
+then.2.i:		; preds = %invoke_cont.36.i
+	%tmp.140.i = invoke %"struct.llvm::TargetRegInfo"* null( %"struct.llvm::TargetMachine"* null )
+			to label %invoke_cont.37.i unwind label %invoke_catch.0.i		; <%"struct.llvm::TargetRegInfo"*> [#uses=0]
+
+invoke_cont.37.i:		; preds = %then.2.i
+	%tmp.148.i = invoke %"struct.llvm::TargetRegInfo"* null( %"struct.llvm::TargetMachine"* null )
+			to label %invoke_cont.38.i unwind label %invoke_catch.0.i		; <%"struct.llvm::TargetRegInfo"*> [#uses=1]
+
+invoke_cont.38.i:		; preds = %invoke_cont.37.i
+	%tmp.155.i = invoke i32 null( %"struct.llvm::TargetRegInfo"* %tmp.148.i, %"struct.llvm::Type"* null, i1 false )
+			to label %invoke_cont.39.i unwind label %invoke_catch.0.i		; <i32> [#uses=0]
+
+invoke_cont.39.i:		; preds = %invoke_cont.38.i
+	%tmp.163.i = invoke %"struct.llvm::TargetFrameInfo"* null( %"struct.llvm::TargetMachine"* null )
+			to label %invoke_cont.40.i unwind label %invoke_catch.0.i		; <%"struct.llvm::TargetFrameInfo"*> [#uses=1]
+
+invoke_cont.40.i:		; preds = %invoke_cont.39.i
+	%tmp.170.i = invoke i32 null( %"struct.llvm::TargetFrameInfo"* %tmp.163.i )
+			to label %invoke_cont.41.i unwind label %invoke_catch.0.i		; <i32> [#uses=0]
+
+invoke_cont.41.i:		; preds = %invoke_cont.40.i
+	%tmp.177.i = invoke %"struct.llvm::TargetFrameInfo"* null( %"struct.llvm::TargetMachine"* null )
+			to label %invoke_cont.42.i unwind label %invoke_catch.0.i		; <%"struct.llvm::TargetFrameInfo"*> [#uses=1]
+
+invoke_cont.42.i:		; preds = %invoke_cont.41.i
+	%tmp.184.i = invoke i32 null( %"struct.llvm::TargetFrameInfo"* %tmp.177.i )
+			to label %invoke_cont.43.i unwind label %invoke_catch.0.i		; <i32> [#uses=1]
+
+invoke_cont.43.i:		; preds = %invoke_cont.42.i
+	%tmp.191.i = invoke %"struct.llvm::TargetFrameInfo"* null( %"struct.llvm::TargetMachine"* null )
+			to label %invoke_cont.44.i unwind label %invoke_catch.0.i		; <%"struct.llvm::TargetFrameInfo"*> [#uses=1]
+
+invoke_cont.44.i:		; preds = %invoke_cont.43.i
+	%tmp.198.i = invoke i32 null( %"struct.llvm::TargetFrameInfo"* %tmp.191.i, %"struct.llvm::MachineFunction"* %F, i1* null )
+			to label %invoke_cont.45.i unwind label %invoke_catch.0.i		; <i32> [#uses=0]
+
+invoke_cont.45.i:		; preds = %invoke_cont.44.i
+	br i1 false, label %no_exit.i, label %endif.1.i
+
+no_exit.i:		; preds = %invoke_cont.50.i, %invoke_cont.45.i
+	%nextArgOffset.0.i.1 = phi i32 [ %tmp.221.i, %invoke_cont.50.i ], [ 0, %invoke_cont.45.i ]		; <i32> [#uses=1]
+	invoke void @_Znwj( )
+			to label %tmp.0.i.noexc55.i unwind label %invoke_catch.0.i
+
+tmp.0.i.noexc55.i:		; preds = %no_exit.i
+	invoke void @_ZN4llvm12MachineInstrC1Esjbb( )
+			to label %invoke_cont.i53.i unwind label %cond_true.i52.i
+
+cond_true.i52.i:		; preds = %tmp.0.i.noexc55.i
+	ret void
+
+invoke_cont.i53.i:		; preds = %tmp.0.i.noexc55.i
+	invoke void @_ZNK4llvm19MachineInstrBuilder7addMRegEiNS_14MachineOperand7UseTypeE( )
+			to label %invoke_cont.47.i unwind label %invoke_catch.0.i
+
+invoke_cont.47.i:		; preds = %invoke_cont.i53.i
+	invoke void @_ZNK4llvm19MachineInstrBuilder7addMRegEiNS_14MachineOperand7UseTypeE( )
+			to label %invoke_cont.48.i unwind label %invoke_catch.0.i
+
+invoke_cont.48.i:		; preds = %invoke_cont.47.i
+	invoke void @_ZNK4llvm19MachineInstrBuilder7addSImmEi( )
+			to label %invoke_cont.49.i unwind label %invoke_catch.0.i
+
+invoke_cont.49.i:		; preds = %invoke_cont.48.i
+	invoke void @_ZNSt6vectorIPN4llvm12MachineInstrESaIS2_EE9push_backERKS2_( )
+			to label %invoke_cont.50.i unwind label %invoke_catch.0.i
+
+invoke_cont.50.i:		; preds = %invoke_cont.49.i
+	%tmp.221.i = add i32 %nextArgOffset.0.i.1, %tmp.184.i		; <i32> [#uses=1]
+	br i1 false, label %no_exit.i, label %endif.1.i
+
+endif.1.i:		; preds = %invoke_cont.50.i, %invoke_cont.45.i, %invoke_cont.36.i, %invoke_cont.33.i
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/2004-04-07-ScalarEvolutionCrash.ll b/final/test/Transforms/IndVarSimplify/2004-04-07-ScalarEvolutionCrash.ll
new file mode 100644
index 00000000000..ec1218bb86d
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2004-04-07-ScalarEvolutionCrash.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -indvars -disable-output
+
+define void @.outPlank_21() {
+entry:
+        br i1 false, label %loopexit.0, label %no_exit.0
+
+no_exit.0:              ; preds = %entry
+        ret void
+
+loopexit.0:             ; preds = %entry
+        br i1 false, label %no_exit.1, label %loopexit.1
+
+no_exit.1:              ; preds = %loopexit.2, %loopexit.0
+        %i.0.0 = phi i32 [ %inc, %loopexit.2 ], [ 0, %loopexit.0 ]              ; <i32> [#uses=1]
+        br i1 false, label %loopexit.2, label %no_exit.2
+
+no_exit.2:              ; preds = %no_exit.1
+        ret void
+
+loopexit.2:             ; preds = %no_exit.1
+        %inc = add i32 %i.0.0, 1                ; <i32> [#uses=1]
+        br i1 false, label %no_exit.1, label %loopexit.1
+
+loopexit.1:             ; preds = %loopexit.2, %loopexit.0
+        ret void
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/2005-02-11-InvokeCrash.ll b/final/test/Transforms/IndVarSimplify/2005-02-11-InvokeCrash.ll
new file mode 100644
index 00000000000..aee67ccacc5
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2005-02-11-InvokeCrash.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -indvars -disable-output
+
+define void @_ZN5ArrayISt7complexIdEEC2ERK10dim_vector() {
+entry:
+        %tmp.7 = invoke i32 @_ZN5ArrayISt7complexIdEE8get_sizeERK10dim_vector( )
+                        to label %invoke_cont.0 unwind label %cond_true.1               ; <i32> [#uses=2]
+
+cond_true.1:            ; preds = %entry
+        unwind
+
+invoke_cont.0:          ; preds = %entry
+        %tmp.4.i = bitcast i32 %tmp.7 to i32            ; <i32> [#uses=0]
+        %tmp.14.0.i5 = add i32 %tmp.7, -1               ; <i32> [#uses=1]
+        br label %no_exit.i
+
+no_exit.i:              ; preds = %no_exit.i, %invoke_cont.0
+        %tmp.14.0.i.0 = phi i32 [ %tmp.14.0.i, %no_exit.i ], [ %tmp.14.0.i5, %invoke_cont.0 ]           ; <i32> [#uses=1]
+        %tmp.14.0.i = add i32 %tmp.14.0.i.0, -1         ; <i32> [#uses=1]
+        br label %no_exit.i
+}
+
+declare i32 @_ZN5ArrayISt7complexIdEE8get_sizeERK10dim_vector()
+
diff --git a/final/test/Transforms/IndVarSimplify/2005-02-17-TruncateExprCrash.ll b/final/test/Transforms/IndVarSimplify/2005-02-17-TruncateExprCrash.ll
new file mode 100644
index 00000000000..70a7a9de05f
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2005-02-17-TruncateExprCrash.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -indvars -disable-output
+
+declare void @q_atomic_increment()
+
+declare void @_Z9qt_assertPKcS0_i()
+
+define void @_ZN13QMetaResourceC1EPKh() {
+entry:
+	invoke void @_Z9qt_assertPKcS0_i( )
+			to label %endif.1 unwind label %then.i.i551
+
+then.i.i551:		; preds = %entry
+	ret void
+
+endif.1:		; preds = %entry
+	br i1 false, label %then.2, label %then.i.i
+
+then.2:		; preds = %endif.1
+	invoke void @q_atomic_increment( )
+			to label %loopentry.0 unwind label %invoke_catch.6
+
+invoke_catch.6:		; preds = %then.2
+	ret void
+
+loopentry.0:		; preds = %then.2
+	br i1 false, label %shortcirc_next.i, label %endif.3
+
+endif.3:		; preds = %loopentry.0
+	ret void
+
+shortcirc_next.i:		; preds = %loopentry.0
+	br i1 false, label %_ZNK7QString2atEi.exit, label %then.i
+
+then.i:		; preds = %shortcirc_next.i
+	ret void
+
+_ZNK7QString2atEi.exit:		; preds = %shortcirc_next.i
+	br i1 false, label %endif.4, label %then.4
+
+then.4:		; preds = %_ZNK7QString2atEi.exit
+	ret void
+
+endif.4:		; preds = %_ZNK7QString2atEi.exit
+	%tmp.115 = load i8* null		; <i8> [#uses=1]
+	br i1 false, label %loopexit.1, label %no_exit.0
+
+no_exit.0:		; preds = %no_exit.0, %endif.4
+	%bytes_in_len.4.5 = phi i8 [ %dec, %no_exit.0 ], [ %tmp.115, %endif.4 ]		; <i8> [#uses=1]
+	%off.5.5.in = phi i32 [ %off.5.5, %no_exit.0 ], [ 0, %endif.4 ]		; <i32> [#uses=1]
+	%off.5.5 = add i32 %off.5.5.in, 1		; <i32> [#uses=2]
+	%dec = add i8 %bytes_in_len.4.5, -1		; <i8> [#uses=2]
+	%tmp.123631 = icmp eq i8 %dec, 0		; <i1> [#uses=1]
+	br i1 %tmp.123631, label %loopexit.1, label %no_exit.0
+
+loopexit.1:		; preds = %no_exit.0, %endif.4
+	%off.5.in.6 = phi i32 [ 0, %endif.4 ], [ %off.5.5, %no_exit.0 ]		; <i32> [#uses=0]
+	ret void
+
+then.i.i:		; preds = %endif.1
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/2005-02-26-ExitValueCompute.ll b/final/test/Transforms/IndVarSimplify/2005-02-26-ExitValueCompute.ll
new file mode 100644
index 00000000000..1ba69826faf
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2005-02-26-ExitValueCompute.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -indvars -S | \
+; RUN:   grep {ret i32 152}
+
+define i32 @main() {
+entry:
+        br label %no_exit
+
+no_exit:                ; preds = %no_exit, %entry
+        %i.1.0 = phi i32 [ 0, %entry ], [ %inc, %no_exit ]              ; <i32> [#uses=2]
+        %tmp.4 = icmp sgt i32 %i.1.0, 50                ; <i1> [#uses=1]
+        %tmp.7 = select i1 %tmp.4, i32 100, i32 0               ; <i32> [#uses=1]
+        %i.0 = add i32 %i.1.0, 1                ; <i32> [#uses=1]
+        %inc = add i32 %i.0, %tmp.7             ; <i32> [#uses=3]
+        %tmp.1 = icmp slt i32 %inc, 100         ; <i1> [#uses=1]
+        br i1 %tmp.1, label %no_exit, label %loopexit
+
+loopexit:               ; preds = %no_exit
+        ret i32 %inc
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/2005-06-15-InstMoveCrash.ll b/final/test/Transforms/IndVarSimplify/2005-06-15-InstMoveCrash.ll
new file mode 100644
index 00000000000..0862f1131b5
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2005-06-15-InstMoveCrash.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -indvars -disable-output
+
+define void @main() {
+entry:
+        br label %no_exit.1.outer
+
+no_exit.1.outer:                ; preds = %endif.0, %entry
+        %l_14237116.1.0.ph = phi i8 [ -46, %entry ], [ 0, %endif.0 ]            ; <i8> [#uses=1]
+        %i.0.0.0.ph = phi i32 [ 0, %entry ], [ %inc.1, %endif.0 ]               ; <i32> [#uses=1]
+        br label %no_exit.1
+
+no_exit.1:              ; preds = %_Z13func_47880058cc.exit, %no_exit.1.outer
+        br i1 false, label %_Z13func_47880058cc.exit, label %then.i
+
+then.i:         ; preds = %no_exit.1
+        br label %_Z13func_47880058cc.exit
+
+_Z13func_47880058cc.exit:               ; preds = %then.i, %no_exit.1
+        br i1 false, label %then.0, label %no_exit.1
+
+then.0:         ; preds = %_Z13func_47880058cc.exit
+        %tmp.6 = bitcast i8 %l_14237116.1.0.ph to i8            ; <i8> [#uses=1]
+        br i1 false, label %endif.0, label %then.1
+
+then.1:         ; preds = %then.0
+        br label %endif.0
+
+endif.0:                ; preds = %then.1, %then.0
+        %inc.1 = add i32 %i.0.0.0.ph, 1         ; <i32> [#uses=2]
+        %tmp.2 = icmp sgt i32 %inc.1, 99                ; <i1> [#uses=1]
+        br i1 %tmp.2, label %loopexit.0, label %no_exit.1.outer
+
+loopexit.0:             ; preds = %endif.0
+        %tmp.28 = zext i8 %tmp.6 to i32         ; <i32> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/2005-11-18-Crash.ll b/final/test/Transforms/IndVarSimplify/2005-11-18-Crash.ll
new file mode 100644
index 00000000000..7202c7ae9d8
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2005-11-18-Crash.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -indvars -disable-output
+
+@fixtab = external global [29 x [29 x [2 x i32]]]               ; <[29 x [29 x [2 x i32]]]*> [#uses=1]
+
+define void @init_optabs() {
+entry:
+        br label %no_exit.0
+
+no_exit.0:              ; preds = %no_exit.0, %entry
+        %p.0.0 = phi i32* [ getelementptr ([29 x [29 x [2 x i32]]]* @fixtab, i32 0, i32 0, i32 0, i32 0), %entry ], [ %inc.0, %no_exit.0 ]               ; <i32*> [#uses=1]
+        %inc.0 = getelementptr i32* %p.0.0, i32 1               ; <i32*> [#uses=1]
+        br i1 undef, label %no_exit.0, label %no_exit.1
+
+no_exit.1:              ; preds = %no_exit.0
+        ret void
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/2006-03-31-NegativeStride.ll b/final/test/Transforms/IndVarSimplify/2006-03-31-NegativeStride.ll
new file mode 100644
index 00000000000..1bbc6310808
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2006-03-31-NegativeStride.ll
@@ -0,0 +1,22 @@
+; PR726
+; RUN: opt < %s -indvars -S | \
+; RUN:   grep {ret i32 27}
+
+; Make sure to compute the right exit value based on negative strides.
+
+define i32 @test() {
+entry:
+        br label %cond_true
+
+cond_true:              ; preds = %cond_true, %entry
+        %a.0.0 = phi i32 [ 10, %entry ], [ %tmp4, %cond_true ]          ; <i32> [#uses=2]
+        %b.0.0 = phi i32 [ 0, %entry ], [ %tmp2, %cond_true ]           ; <i32> [#uses=1]
+        %tmp2 = add i32 %b.0.0, %a.0.0          ; <i32> [#uses=2]
+        %tmp4 = add i32 %a.0.0, -1              ; <i32> [#uses=2]
+        %tmp = icmp sgt i32 %tmp4, 7            ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %bb7
+
+bb7:            ; preds = %cond_true
+        ret i32 %tmp2
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/2006-06-16-Indvar-LCSSA-Crash.ll b/final/test/Transforms/IndVarSimplify/2006-06-16-Indvar-LCSSA-Crash.ll
new file mode 100644
index 00000000000..36ec2b81ba7
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2006-06-16-Indvar-LCSSA-Crash.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -indvars -disable-output
+
+define void @get_block() {
+endif.0:
+        br label %no_exit.30
+
+no_exit.30:             ; preds = %no_exit.30, %endif.0
+        %x.12.0 = phi i32 [ %inc.28, %no_exit.30 ], [ -2, %endif.0 ]            ; <i32> [#uses=1]
+        %tmp.583 = load i16* null               ; <i16> [#uses=1]
+        %tmp.584 = zext i16 %tmp.583 to i32             ; <i32> [#uses=1]
+        %tmp.588 = load i32* null               ; <i32> [#uses=1]
+        %tmp.589 = mul i32 %tmp.584, %tmp.588           ; <i32> [#uses=1]
+        %tmp.591 = add i32 %tmp.589, 0          ; <i32> [#uses=1]
+        %inc.28 = add i32 %x.12.0, 1            ; <i32> [#uses=2]
+        %tmp.565 = icmp sgt i32 %inc.28, 3              ; <i1> [#uses=1]
+        br i1 %tmp.565, label %loopexit.30, label %no_exit.30
+
+loopexit.30:            ; preds = %no_exit.30
+        %tmp.591.lcssa = phi i32 [ %tmp.591, %no_exit.30 ]              ; <i32> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/2006-09-20-LFTR-Crash.ll b/final/test/Transforms/IndVarSimplify/2006-09-20-LFTR-Crash.ll
new file mode 100644
index 00000000000..787c9b07bdc
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2006-09-20-LFTR-Crash.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -indvars -disable-output
+; ModuleID = '2006-09-20-LFTR-Crash.ll'
+	%struct.p7prior_s = type { i32, i32, [200 x float], [200 x [7 x float]], i32, [200 x float], [200 x [20 x float]], i32, [200 x float], [200 x [20 x float]] }
+
+define void @P7DefaultPrior() {
+entry:
+	switch i32 0, label %UnifiedReturnBlock [
+		 i32 2, label %bb160
+		 i32 3, label %bb
+	]
+
+bb:		; preds = %entry
+	br i1 false, label %cond_true.i, label %sre_malloc.exit
+
+cond_true.i:		; preds = %bb
+	unreachable
+
+sre_malloc.exit:		; preds = %bb
+	br label %cond_true
+
+cond_true:		; preds = %cond_true66, %cond_true, %sre_malloc.exit
+	%tmp59 = phi i32 [ 1, %sre_malloc.exit ], [ %phitmp, %cond_true66 ], [ %tmp59, %cond_true ]		; <i32> [#uses=2]
+	%indvar245.0.ph = phi i32 [ 0, %sre_malloc.exit ], [ %indvar.next246, %cond_true66 ], [ %indvar245.0.ph, %cond_true ]		; <i32> [#uses=2]
+	br i1 false, label %bb57, label %cond_true
+
+bb57:		; preds = %cond_true
+	%tmp65 = icmp sgt i32 0, %tmp59		; <i1> [#uses=1]
+	%indvar.next246 = add i32 %indvar245.0.ph, 1		; <i32> [#uses=2]
+	br i1 %tmp65, label %cond_true66, label %bb69
+
+cond_true66:		; preds = %bb57
+	%q.1.0 = bitcast i32 %indvar.next246 to i32		; <i32> [#uses=1]
+	%phitmp = add i32 %q.1.0, 1		; <i32> [#uses=1]
+	br label %cond_true
+
+bb69:		; preds = %bb57
+	ret void
+
+bb160:		; preds = %entry
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/2006-12-10-BitCast.ll b/final/test/Transforms/IndVarSimplify/2006-12-10-BitCast.ll
new file mode 100644
index 00000000000..80c9ebf2f01
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2006-12-10-BitCast.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -indvars -disable-output
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8"
+	%struct.vorbis_dsp_state = type { i32, %struct.vorbis_info*, float**, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
+	%struct.vorbis_info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+
+define void @_ve_envelope_search() {
+entry:
+	br i1 false, label %cond_true27, label %bb137
+
+cond_true27:		; preds = %entry
+	br i1 false, label %cond_true52, label %bb80
+
+cond_true52:		; preds = %cond_true27
+	%tmp152.i = bitcast float 0.000000e+00 to i32		; <i32> [#uses=1]
+	br label %cond_next182.i
+
+cond_next182.i:		; preds = %cond_next182.i, %cond_true52
+	%decay.i.0 = phi i32 [ %tmp195.i.upgrd.1, %cond_next182.i ], [ %tmp152.i, %cond_true52 ]		; <i32> [#uses=1]
+	%tmp194.i53 = bitcast i32 %decay.i.0 to float		; <float> [#uses=1]
+	%tmp195.i = fsub float %tmp194.i53, 8.000000e+00		; <float> [#uses=1]
+	%tmp195.i.upgrd.1 = bitcast float %tmp195.i to i32		; <i32> [#uses=1]
+	br i1 undef, label %cond_next182.i, label %bb418.i.preheader
+
+bb418.i.preheader:		; preds = %cond_next182.i
+	ret void
+
+bb80:		; preds = %cond_true27
+	ret void
+
+bb137:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll b/final/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll
new file mode 100644
index 00000000000..268b8d1a7e4
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2007-01-06-TripCount.ll
@@ -0,0 +1,38 @@
+; PR1015
+; RUN: opt < %s -indvars -S | not grep {ret i32 0}
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8"
+@foo = internal constant [5 x i8] c"\00abc\00"		; <[5 x i8]*> [#uses=1]
+@str = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+
+
+define i32 @test(i32 %J) {
+entry:
+	br label %bb2
+
+bb:		; preds = %cond_next, %cond_true
+	%tmp1 = add i32 %i.0, 1		; <i32> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb, %entry
+	%i.0 = phi i32 [ 0, %entry ], [ %tmp1, %bb ]		; <i32> [#uses=4]
+	%tmp = icmp eq i32 %i.0, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %cond_true, label %cond_next
+
+cond_true:		; preds = %bb2
+	br label %bb
+
+cond_next:		; preds = %bb2
+	%tmp2 = getelementptr [5 x i8]* @foo, i32 0, i32 %i.0		; <i8*> [#uses=1]
+	%tmp3 = load i8* %tmp2		; <i8> [#uses=1]
+	%tmp5 = icmp eq i8 %tmp3, 0		; <i1> [#uses=1]
+	br i1 %tmp5, label %bb6, label %bb
+
+bb6:		; preds = %cond_next
+	br label %return
+
+return:		; preds = %bb6
+	ret i32 %i.0
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/2007-06-06-DeleteDanglesPtr.ll b/final/test/Transforms/IndVarSimplify/2007-06-06-DeleteDanglesPtr.ll
new file mode 100644
index 00000000000..fc7d6335910
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2007-06-06-DeleteDanglesPtr.ll
@@ -0,0 +1,117 @@
+; RUN: opt < %s -indvars -disable-output
+; PR1487
+
+	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
+	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVCodecTag*, i32* }
+	%struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVCodecTag, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVCodecTag, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64 }
+	%struct.AVCodecTag = type { i32, i32 }
+	%struct.AVFrame = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*] }
+	%struct.AVOption = type { i8*, i8*, i32, i32, double, double, double, i32, i8* }
+	%struct.AVPaletteControl = type { i32, [256 x i32] }
+	%struct.AVPanScan = type { i32, i32, i32, [3 x [2 x i16]] }
+	%struct.RcOverride = type { i32, i32, i32, float }
+
+define i32 @smc_decode_frame(%struct.AVCodecContext* %avctx, i8* %data, i32* %data_size, i8* %buf, i32 %buf_size) {
+entry:
+	br i1 false, label %cond_next, label %cond_true
+
+cond_true:		; preds = %entry
+	ret i32 -1
+
+cond_next:		; preds = %entry
+	br i1 false, label %bb.outer5.split.split.split.us, label %cond_true194.split
+
+bb.outer5.split.split.split.us:		; preds = %cond_next
+	br i1 false, label %cond_next188.us503.us, label %bb.us481
+
+bb275.us493.us:		; preds = %cond_next188.us503.us, %cond_next188.us503.us
+	ret i32 0
+
+cond_next188.us503.us:		; preds = %bb.outer5.split.split.split.us
+	switch i32 0, label %bb1401 [
+		 i32 0, label %cond_next202.bb215_crit_edge.split
+		 i32 16, label %bb215
+		 i32 32, label %bb275.us493.us
+		 i32 48, label %bb275.us493.us
+		 i32 64, label %cond_next202.bb417_crit_edge.split
+		 i32 80, label %bb417
+		 i32 96, label %cond_next202.bb615_crit_edge.split
+		 i32 112, label %bb615
+		 i32 128, label %cond_next202.bb716_crit_edge.split
+		 i32 144, label %bb716
+		 i32 160, label %cond_next202.bb882_crit_edge.split
+		 i32 176, label %bb882
+		 i32 192, label %cond_next202.bb1062_crit_edge.split
+		 i32 208, label %bb1062
+		 i32 224, label %bb1326.us.outer.outer
+	]
+
+bb.us481:		; preds = %bb.outer5.split.split.split.us
+	ret i32 0
+
+cond_true194.split:		; preds = %cond_next
+	ret i32 %buf_size
+
+cond_next202.bb1062_crit_edge.split:		; preds = %cond_next188.us503.us
+	ret i32 0
+
+cond_next202.bb882_crit_edge.split:		; preds = %cond_next188.us503.us
+	ret i32 0
+
+cond_next202.bb716_crit_edge.split:		; preds = %cond_next188.us503.us
+	ret i32 0
+
+cond_next202.bb615_crit_edge.split:		; preds = %cond_next188.us503.us
+	ret i32 0
+
+cond_next202.bb417_crit_edge.split:		; preds = %cond_next188.us503.us
+	ret i32 0
+
+cond_next202.bb215_crit_edge.split:		; preds = %cond_next188.us503.us
+	ret i32 0
+
+bb215:		; preds = %cond_next188.us503.us
+	ret i32 0
+
+bb417:		; preds = %cond_next188.us503.us
+	ret i32 0
+
+bb615:		; preds = %cond_next188.us503.us
+	ret i32 0
+
+bb716:		; preds = %cond_next188.us503.us
+	ret i32 0
+
+bb882:		; preds = %cond_next188.us503.us
+	ret i32 0
+
+bb1062:		; preds = %cond_next188.us503.us
+	ret i32 0
+
+bb1326.us:		; preds = %bb1326.us.outer.outer, %bb1347.loopexit.us, %bb1326.us
+	%pixel_y.162036.us.ph = phi i32 [ %tmp1352.us, %bb1347.loopexit.us ], [ 0, %bb1326.us.outer.outer ], [ %pixel_y.162036.us.ph, %bb1326.us ]		; <i32> [#uses=2]
+	%stream_ptr.142038.us.ph = phi i32 [ %tmp1339.us, %bb1347.loopexit.us ], [ %stream_ptr.142038.us.ph.ph, %bb1326.us.outer.outer ], [ %stream_ptr.142038.us.ph, %bb1326.us ]		; <i32> [#uses=2]
+	%pixel_x.232031.us = phi i32 [ %tmp1341.us, %bb1326.us ], [ 0, %bb1326.us.outer.outer ], [ 0, %bb1347.loopexit.us ]		; <i32> [#uses=3]
+	%block_ptr.222030.us = add i32 0, %pixel_x.232031.us		; <i32> [#uses=1]
+	%stream_ptr.132032.us = add i32 %pixel_x.232031.us, %stream_ptr.142038.us.ph		; <i32> [#uses=1]
+	%tmp1341.us = add i32 %pixel_x.232031.us, 1		; <i32> [#uses=2]
+	%tmp1344.us = icmp slt i32 %tmp1341.us, 4		; <i1> [#uses=1]
+	br i1 %tmp1344.us, label %bb1326.us, label %bb1347.loopexit.us
+
+bb1347.loopexit.us:		; preds = %bb1326.us
+	%tmp1339.us = add i32 %stream_ptr.132032.us, 1		; <i32> [#uses=2]
+	%tmp1337.us = add i32 %block_ptr.222030.us, 1		; <i32> [#uses=0]
+	%tmp1352.us = add i32 %pixel_y.162036.us.ph, 1		; <i32> [#uses=2]
+	%tmp1355.us = icmp slt i32 %tmp1352.us, 4		; <i1> [#uses=1]
+	br i1 %tmp1355.us, label %bb1326.us, label %bb1358
+
+bb1358:		; preds = %bb1347.loopexit.us
+	br label %bb1326.us.outer.outer
+
+bb1326.us.outer.outer:		; preds = %bb1358, %cond_next188.us503.us
+	%stream_ptr.142038.us.ph.ph = phi i32 [ %tmp1339.us, %bb1358 ], [ 0, %cond_next188.us503.us ]		; <i32> [#uses=1]
+	br label %bb1326.us
+
+bb1401:		; preds = %cond_next188.us503.us
+	ret i32 0
+}
diff --git a/final/test/Transforms/IndVarSimplify/2007-11-23-BitcastCrash.ll b/final/test/Transforms/IndVarSimplify/2007-11-23-BitcastCrash.ll
new file mode 100644
index 00000000000..cad4eb155ce
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2007-11-23-BitcastCrash.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -indvars -disable-output
+; PR1814
+target datalayout = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32"
+
+define void @FuncAt1938470480(i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i64, i64, i1, i1, i1, i1, i1, i1) {
+EntryBlock:
+	br label %asmBlockAt738ab7f3
+
+asmBlockAt738ab9b0:		; preds = %asmBlockAt738ab7f3
+	%.lcssa6 = phi i64 [ %23, %asmBlockAt738ab7f3 ]		; <i64> [#uses=0]
+	ret void
+
+asmBlockAt738ab7f3:		; preds = %asmBlockAt738ab7f3, %EntryBlock
+	%ebp95 = phi i32 [ 128, %EntryBlock ], [ %24, %asmBlockAt738ab7f3 ]		; <i32> [#uses=2]
+	sub <4 x i16> zeroinitializer, zeroinitializer		; <<4 x i16>>:22 [#uses=1]
+	bitcast <4 x i16> %22 to i64		; <i64>:23 [#uses=1]
+	add i32 %ebp95, -64		; <i32>:24 [#uses=1]
+	icmp ult i32 %ebp95, 64		; <i1>:25 [#uses=1]
+	br i1 %25, label %asmBlockAt738ab9b0, label %asmBlockAt738ab7f3
+}
diff --git a/final/test/Transforms/IndVarSimplify/2008-06-15-SCEVExpanderBug.ll b/final/test/Transforms/IndVarSimplify/2008-06-15-SCEVExpanderBug.ll
new file mode 100644
index 00000000000..77235d2888e
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2008-06-15-SCEVExpanderBug.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -indvars -disable-output
+; PR2434
+
+define fastcc void @regcppop() nounwind  {
+entry:
+	%tmp61 = add i32 0, -5		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%PL_savestack_ix.tmp.0 = phi i32 [ %tmp61, %entry ], [ %tmp127, %bb ]		; <i32> [#uses=2]
+	%indvar10 = phi i32 [ 0, %entry ], [ %indvar.next11, %bb ]		; <i32> [#uses=2]
+	%tmp13 = mul i32 %indvar10, -4		; <i32> [#uses=0]
+	%tmp111 = add i32 %PL_savestack_ix.tmp.0, -3		; <i32> [#uses=0]
+	%tmp127 = add i32 %PL_savestack_ix.tmp.0, -4		; <i32> [#uses=1]
+	%indvar.next11 = add i32 %indvar10, 1		; <i32> [#uses=1]
+	br label %bb
+}
diff --git a/final/test/Transforms/IndVarSimplify/2008-09-02-IVType.ll b/final/test/Transforms/IndVarSimplify/2008-09-02-IVType.ll
new file mode 100644
index 00000000000..288431aa8bc
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2008-09-02-IVType.ll
@@ -0,0 +1,58 @@
+; RUN: opt < %s -indvars -S | grep sext | count 1
+; ModuleID = '<stdin>'
+
+	%struct.App1Marker = type <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>
+	%struct.ComponentInstanceRecord = type <{ [1 x i32] }>
+	%struct.DCPredictors = type { [5 x i16] }
+	%struct.DecodeTable = type { i16, i16, i16, i16, i8**, i8** }
+	%struct.ICMDataProcRecord = type <{ i16 (i8**, i32, i32)*, i32 }>
+	%struct.JPEGBitStream = type { i8*, i32, i32, i32, i32, i32, %struct.App1Marker*, i8*, i32, i16, i16, i32 }
+	%struct.JPEGGlobals = type { [2048 x i8], %struct.JPEGBitStream, i8*, i32, i32, %struct.ComponentInstanceRecord*, %struct.ComponentInstanceRecord*, i32, %struct.OpaqueQTMLMutex*, %struct.Rect, i32, i32, %struct.SharedGlobals, %struct.DCPredictors, i8, i8, void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, %struct.YUVGeneralParams, i16, i16, i32, [5 x i16*], [5 x %struct.DecodeTable*], [5 x %struct.DecodeTable*], [5 x i8], [5 x i8], [4 x [65 x i16]], [4 x %struct.DecodeTable], [4 x %struct.DecodeTable], [4 x i8*], [4 x i8*], i16, i16, i32, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, i8**, [18 x i8], [18 x i8], [18 x i8], [18 x i8], i32, i32, i8**, i8**, i8, i8, i8, i8, i16, i16, %struct.App1Marker*, i8, i8, i8, i8, i32**, i8*, i16*, i8*, i16*, i8, [3 x i8], i32, [3 x i32], [3 x i32], [3 x i32], [3 x i32], [3 x i32], [3 x i16*], [3 x i16*], [3 x i8**], [3 x %struct.DecodeTable*], [3 x %struct.DecodeTable*], [3 x i32], i32, [3 x i16*], i32, i32, i32, [3 x i32], i8, i8, i8, i8, %struct.ICMDataProcRecord*, i32, i32, i8**, i8**, i8**, i8**, i32, i32, i8*, i32, i32, i16*, i16*, i8*, i32, i32, i32, i32, i32, i32, i32, [16 x <2 x i64>], [1280 x i8], i8 }
+	%struct.OpaqueQTMLMutex = type opaque
+	%struct.Rect = type { i16, i16, i16, i16 }
+	%struct.SharedDGlobals = type { %struct.DecodeTable, %struct.DecodeTable, %struct.DecodeTable, %struct.DecodeTable }
+	%struct.SharedEGlobals = type { i8**, i8**, i8**, i8** }
+	%struct.SharedGlobals = type { %struct.SharedEGlobals*, %struct.SharedDGlobals* }
+	%struct.YUVGeneralParams = type { i16*, i8*, i8*, i8*, i8*, i8*, void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, i16, i16, i16, [6 x i8], void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, i16, i16 }
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i16 (%struct.JPEGGlobals*)* @ExtractBufferedBlocksIgnored to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i16 @ExtractBufferedBlocksIgnored(%struct.JPEGGlobals* %globp) signext nounwind {
+entry:
+	%tmp4311 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 70		; <i32*> [#uses=1]
+	%tmp4412 = load i32* %tmp4311, align 16		; <i32> [#uses=2]
+	%tmp4613 = icmp sgt i32 %tmp4412, 0		; <i1> [#uses=1]
+	br i1 %tmp4613, label %bb, label %bb49
+
+bb:		; preds = %bb28, %entry
+	%component.09 = phi i16 [ 0, %entry ], [ %tmp37, %bb28 ]		; <i16> [#uses=2]
+	%tmp12 = sext i16 %component.09 to i32		; <i32> [#uses=2]
+	%tmp6 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 77, i32 %tmp12		; <i16**> [#uses=2]
+	%tmp7 = load i16** %tmp6, align 4		; <i16*> [#uses=2]
+	%tmp235 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 71, i32 %tmp12		; <i32*> [#uses=1]
+	%tmp246 = load i32* %tmp235, align 4		; <i32> [#uses=2]
+	%tmp267 = icmp sgt i32 %tmp246, 0		; <i1> [#uses=1]
+	br i1 %tmp267, label %bb8, label %bb28
+
+bb8:		; preds = %bb8, %bb
+	%indvar = phi i32 [ 0, %bb ], [ %indvar.next2, %bb8 ]		; <i32> [#uses=3]
+	%theDCTBufferIter.01.rec = shl i32 %indvar, 6		; <i32> [#uses=1]
+	%tmp10.rec = add i32 %theDCTBufferIter.01.rec, 64		; <i32> [#uses=1]
+	%tmp10 = getelementptr i16* %tmp7, i32 %tmp10.rec		; <i16*> [#uses=1]
+	%i.02 = trunc i32 %indvar to i16		; <i16> [#uses=1]
+	%tmp13 = add i16 %i.02, 1		; <i16> [#uses=1]
+	%phitmp = sext i16 %tmp13 to i32		; <i32> [#uses=1]
+	%tmp26 = icmp slt i32 %phitmp, %tmp246		; <i1> [#uses=1]
+	%indvar.next2 = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp26, label %bb8, label %bb28
+
+bb28:		; preds = %bb8, %bb
+	%theDCTBufferIter.0.lcssa = phi i16* [ %tmp7, %bb ], [ %tmp10, %bb8 ]		; <i16*> [#uses=1]
+	store i16* %theDCTBufferIter.0.lcssa, i16** %tmp6, align 4
+	%tmp37 = add i16 %component.09, 1		; <i16> [#uses=2]
+	%phitmp15 = sext i16 %tmp37 to i32		; <i32> [#uses=1]
+	%tmp46 = icmp slt i32 %phitmp15, 42		; <i1> [#uses=1]
+	br i1 %tmp46, label %bb, label %bb49
+
+bb49:		; preds = %bb28, %entry
+	ret i16 0
+}
diff --git a/final/test/Transforms/IndVarSimplify/2008-10-03-CouldNotCompute.ll b/final/test/Transforms/IndVarSimplify/2008-10-03-CouldNotCompute.ll
new file mode 100644
index 00000000000..23e78849613
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2008-10-03-CouldNotCompute.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -indvars
+; PR2857
+
+@foo = external global i32		; <i32*> [#uses=1]
+
+define void @test(i32 %n, i32 %arg) {
+entry:
+	br i1 false, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	%0 = load i32* @foo, align 4		; <i32> [#uses=1]
+	%1 = sext i32 %0 to i64		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.nph
+	%.in = phi i32 [ %2, %bb ], [ %n, %bb.nph ]		; <i32> [#uses=1]
+	%val.02 = phi i64 [ %5, %bb ], [ 0, %bb.nph ]		; <i64> [#uses=2]
+	%result.01 = phi i64 [ %4, %bb ], [ 0, %bb.nph ]		; <i64> [#uses=1]
+	%2 = add i32 %.in, -1		; <i32> [#uses=2]
+	%3 = mul i64 %1, %val.02		; <i64> [#uses=1]
+	%4 = add i64 %3, %result.01		; <i64> [#uses=2]
+	%5 = add i64 %val.02, 1		; <i64> [#uses=1]
+	%6 = icmp sgt i32 %2, 0		; <i1> [#uses=1]
+	br i1 %6, label %bb, label %bb3.bb4_crit_edge
+
+bb3.bb4_crit_edge:		; preds = %bb
+	%.lcssa = phi i64 [ %4, %bb ]		; <i64> [#uses=0]
+	ret void
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/2008-11-25-APFloatAssert.ll b/final/test/Transforms/IndVarSimplify/2008-11-25-APFloatAssert.ll
new file mode 100644
index 00000000000..39b97af86fa
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2008-11-25-APFloatAssert.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -indvars
+
+define void @t() nounwind {
+entry:
+	br label %bb23.i91
+
+bb23.i91:		; preds = %bb23.i91, %entry
+	%result.0.i89 = phi ppc_fp128 [ 0xM00000000000000000000000000000000, %entry ], [ %0, %bb23.i91 ]		; <ppc_fp128> [#uses=2]
+	%0 = fmul ppc_fp128 %result.0.i89, %result.0.i89		; <ppc_fp128> [#uses=1]
+	br label %bb23.i91
+}
diff --git a/final/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll b/final/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll
new file mode 100644
index 00000000000..37ad63a9a77
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2009-04-14-shorten_iv_vars.ll
@@ -0,0 +1,114 @@
+; RUN: opt < %s -indvars -S | not grep {sext}
+; ModuleID = '<stdin>'
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.6"
+@a = external global i32*		; <i32**> [#uses=3]
+@b = external global i32*		; <i32**> [#uses=3]
+@c = external global i32*		; <i32**> [#uses=3]
+@d = external global i32*		; <i32**> [#uses=3]
+@e = external global i32*		; <i32**> [#uses=3]
+@f = external global i32*		; <i32**> [#uses=3]
+
+define void @foo() nounwind {
+bb1.thread:
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %84, %bb1 ]		; <i32> [#uses=19]
+	%0 = load i32** @a, align 8		; <i32*> [#uses=1]
+	%1 = load i32** @b, align 8		; <i32*> [#uses=1]
+	%2 = sext i32 %i.0.reg2mem.0 to i64		; <i64> [#uses=1]
+	%3 = getelementptr i32* %1, i64 %2		; <i32*> [#uses=1]
+	%4 = load i32* %3, align 1		; <i32> [#uses=1]
+	%5 = load i32** @c, align 8		; <i32*> [#uses=1]
+	%6 = sext i32 %i.0.reg2mem.0 to i64		; <i64> [#uses=1]
+	%7 = getelementptr i32* %5, i64 %6		; <i32*> [#uses=1]
+	%8 = load i32* %7, align 1		; <i32> [#uses=1]
+	%9 = add i32 %8, %4		; <i32> [#uses=1]
+	%10 = sext i32 %i.0.reg2mem.0 to i64		; <i64> [#uses=1]
+	%11 = getelementptr i32* %0, i64 %10		; <i32*> [#uses=1]
+	store i32 %9, i32* %11, align 1
+	%12 = load i32** @a, align 8		; <i32*> [#uses=1]
+	%13 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
+	%14 = load i32** @b, align 8		; <i32*> [#uses=1]
+	%15 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
+	%16 = sext i32 %15 to i64		; <i64> [#uses=1]
+	%17 = getelementptr i32* %14, i64 %16		; <i32*> [#uses=1]
+	%18 = load i32* %17, align 1		; <i32> [#uses=1]
+	%19 = load i32** @c, align 8		; <i32*> [#uses=1]
+	%20 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
+	%21 = sext i32 %20 to i64		; <i64> [#uses=1]
+	%22 = getelementptr i32* %19, i64 %21		; <i32*> [#uses=1]
+	%23 = load i32* %22, align 1		; <i32> [#uses=1]
+	%24 = add i32 %23, %18		; <i32> [#uses=1]
+	%25 = sext i32 %13 to i64		; <i64> [#uses=1]
+	%26 = getelementptr i32* %12, i64 %25		; <i32*> [#uses=1]
+	store i32 %24, i32* %26, align 1
+	%27 = load i32** @a, align 8		; <i32*> [#uses=1]
+	%28 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
+	%29 = load i32** @b, align 8		; <i32*> [#uses=1]
+	%30 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
+	%31 = sext i32 %30 to i64		; <i64> [#uses=1]
+	%32 = getelementptr i32* %29, i64 %31		; <i32*> [#uses=1]
+	%33 = load i32* %32, align 1		; <i32> [#uses=1]
+	%34 = load i32** @c, align 8		; <i32*> [#uses=1]
+	%35 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
+	%36 = sext i32 %35 to i64		; <i64> [#uses=1]
+	%37 = getelementptr i32* %34, i64 %36		; <i32*> [#uses=1]
+	%38 = load i32* %37, align 1		; <i32> [#uses=1]
+	%39 = add i32 %38, %33		; <i32> [#uses=1]
+	%40 = sext i32 %28 to i64		; <i64> [#uses=1]
+	%41 = getelementptr i32* %27, i64 %40		; <i32*> [#uses=1]
+	store i32 %39, i32* %41, align 1
+	%42 = load i32** @d, align 8		; <i32*> [#uses=1]
+	%43 = load i32** @e, align 8		; <i32*> [#uses=1]
+	%44 = sext i32 %i.0.reg2mem.0 to i64		; <i64> [#uses=1]
+	%45 = getelementptr i32* %43, i64 %44		; <i32*> [#uses=1]
+	%46 = load i32* %45, align 1		; <i32> [#uses=1]
+	%47 = load i32** @f, align 8		; <i32*> [#uses=1]
+	%48 = sext i32 %i.0.reg2mem.0 to i64		; <i64> [#uses=1]
+	%49 = getelementptr i32* %47, i64 %48		; <i32*> [#uses=1]
+	%50 = load i32* %49, align 1		; <i32> [#uses=1]
+	%51 = add i32 %50, %46		; <i32> [#uses=1]
+	%52 = sext i32 %i.0.reg2mem.0 to i64		; <i64> [#uses=1]
+	%53 = getelementptr i32* %42, i64 %52		; <i32*> [#uses=1]
+	store i32 %51, i32* %53, align 1
+	%54 = load i32** @d, align 8		; <i32*> [#uses=1]
+	%55 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
+	%56 = load i32** @e, align 8		; <i32*> [#uses=1]
+	%57 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
+	%58 = sext i32 %57 to i64		; <i64> [#uses=1]
+	%59 = getelementptr i32* %56, i64 %58		; <i32*> [#uses=1]
+	%60 = load i32* %59, align 1		; <i32> [#uses=1]
+	%61 = load i32** @f, align 8		; <i32*> [#uses=1]
+	%62 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
+	%63 = sext i32 %62 to i64		; <i64> [#uses=1]
+	%64 = getelementptr i32* %61, i64 %63		; <i32*> [#uses=1]
+	%65 = load i32* %64, align 1		; <i32> [#uses=1]
+	%66 = add i32 %65, %60		; <i32> [#uses=1]
+	%67 = sext i32 %55 to i64		; <i64> [#uses=1]
+	%68 = getelementptr i32* %54, i64 %67		; <i32*> [#uses=1]
+	store i32 %66, i32* %68, align 1
+	%69 = load i32** @d, align 8		; <i32*> [#uses=1]
+	%70 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
+	%71 = load i32** @e, align 8		; <i32*> [#uses=1]
+	%72 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
+	%73 = sext i32 %72 to i64		; <i64> [#uses=1]
+	%74 = getelementptr i32* %71, i64 %73		; <i32*> [#uses=1]
+	%75 = load i32* %74, align 1		; <i32> [#uses=1]
+	%76 = load i32** @f, align 8		; <i32*> [#uses=1]
+	%77 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
+	%78 = sext i32 %77 to i64		; <i64> [#uses=1]
+	%79 = getelementptr i32* %76, i64 %78		; <i32*> [#uses=1]
+	%80 = load i32* %79, align 1		; <i32> [#uses=1]
+	%81 = add i32 %80, %75		; <i32> [#uses=1]
+	%82 = sext i32 %70 to i64		; <i64> [#uses=1]
+	%83 = getelementptr i32* %69, i64 %82		; <i32*> [#uses=1]
+	store i32 %81, i32* %83, align 1
+	%84 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%85 = icmp sgt i32 %84, 23646		; <i1> [#uses=1]
+	br i1 %85, label %return, label %bb1
+
+return:		; preds = %bb1
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll b/final/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
new file mode 100644
index 00000000000..803b540606e
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2009-04-15-shorten-iv-vars-2.ll
@@ -0,0 +1,160 @@
+; RUN: opt < %s -indvars -instcombine -S | not grep {\[sz\]ext}
+; ModuleID = '<stdin>'
+;extern int *a, *b, *c, *d, *e, *f;  /* 64 bit */
+;extern int K[256];
+;void foo () {
+;  int i;
+;  for (i=0; i<23647; i++) {
+;    a[(i&15)] = b[i&15]+c[i&15];
+;    a[(i+1)&15] = b[(i+1)&15]+c[(i+1)&15];
+;    a[(i+2)&15] = b[(i+2)&15]+c[(i+2)&15];
+;    d[i&15] = e[i&15]+f[i&15] +K[i];
+;    d[(i+1)&15] = e[(i+1)&15]+f[(i+1)&15]+K[i+1];
+;    d[(i+2)&15] = e[(i+2)&15]+f[(i+2)&15]+K[i+2];
+;  }
+;}
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.6"
+@a = external global i32*		; <i32**> [#uses=3]
+@b = external global i32*		; <i32**> [#uses=3]
+@c = external global i32*		; <i32**> [#uses=3]
+@d = external global i32*		; <i32**> [#uses=3]
+@e = external global i32*		; <i32**> [#uses=3]
+@f = external global i32*		; <i32**> [#uses=3]
+@K = external global [256 x i32]		; <[256 x i32]*> [#uses=3]
+
+define void @foo() nounwind {
+bb1.thread:
+	br label %bb1
+
+bb1:		; preds = %bb1, %bb1.thread
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb1.thread ], [ %116, %bb1 ]		; <i32> [#uses=22]
+	%0 = load i32** @a, align 8		; <i32*> [#uses=1]
+	%1 = and i32 %i.0.reg2mem.0, 15		; <i32> [#uses=1]
+	%2 = load i32** @b, align 8		; <i32*> [#uses=1]
+	%3 = and i32 %i.0.reg2mem.0, 15		; <i32> [#uses=1]
+	%4 = zext i32 %3 to i64		; <i64> [#uses=1]
+	%5 = getelementptr i32* %2, i64 %4		; <i32*> [#uses=1]
+	%6 = load i32* %5, align 1		; <i32> [#uses=1]
+	%7 = load i32** @c, align 8		; <i32*> [#uses=1]
+	%8 = and i32 %i.0.reg2mem.0, 15		; <i32> [#uses=1]
+	%9 = zext i32 %8 to i64		; <i64> [#uses=1]
+	%10 = getelementptr i32* %7, i64 %9		; <i32*> [#uses=1]
+	%11 = load i32* %10, align 1		; <i32> [#uses=1]
+	%12 = add i32 %11, %6		; <i32> [#uses=1]
+	%13 = zext i32 %1 to i64		; <i64> [#uses=1]
+	%14 = getelementptr i32* %0, i64 %13		; <i32*> [#uses=1]
+	store i32 %12, i32* %14, align 1
+	%15 = load i32** @a, align 8		; <i32*> [#uses=1]
+	%16 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
+	%17 = and i32 %16, 15		; <i32> [#uses=1]
+	%18 = load i32** @b, align 8		; <i32*> [#uses=1]
+	%19 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
+	%20 = and i32 %19, 15		; <i32> [#uses=1]
+	%21 = zext i32 %20 to i64		; <i64> [#uses=1]
+	%22 = getelementptr i32* %18, i64 %21		; <i32*> [#uses=1]
+	%23 = load i32* %22, align 1		; <i32> [#uses=1]
+	%24 = load i32** @c, align 8		; <i32*> [#uses=1]
+	%25 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
+	%26 = and i32 %25, 15		; <i32> [#uses=1]
+	%27 = zext i32 %26 to i64		; <i64> [#uses=1]
+	%28 = getelementptr i32* %24, i64 %27		; <i32*> [#uses=1]
+	%29 = load i32* %28, align 1		; <i32> [#uses=1]
+	%30 = add i32 %29, %23		; <i32> [#uses=1]
+	%31 = zext i32 %17 to i64		; <i64> [#uses=1]
+	%32 = getelementptr i32* %15, i64 %31		; <i32*> [#uses=1]
+	store i32 %30, i32* %32, align 1
+	%33 = load i32** @a, align 8		; <i32*> [#uses=1]
+	%34 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
+	%35 = and i32 %34, 15		; <i32> [#uses=1]
+	%36 = load i32** @b, align 8		; <i32*> [#uses=1]
+	%37 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
+	%38 = and i32 %37, 15		; <i32> [#uses=1]
+	%39 = zext i32 %38 to i64		; <i64> [#uses=1]
+	%40 = getelementptr i32* %36, i64 %39		; <i32*> [#uses=1]
+	%41 = load i32* %40, align 1		; <i32> [#uses=1]
+	%42 = load i32** @c, align 8		; <i32*> [#uses=1]
+	%43 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
+	%44 = and i32 %43, 15		; <i32> [#uses=1]
+	%45 = zext i32 %44 to i64		; <i64> [#uses=1]
+	%46 = getelementptr i32* %42, i64 %45		; <i32*> [#uses=1]
+	%47 = load i32* %46, align 1		; <i32> [#uses=1]
+	%48 = add i32 %47, %41		; <i32> [#uses=1]
+	%49 = zext i32 %35 to i64		; <i64> [#uses=1]
+	%50 = getelementptr i32* %33, i64 %49		; <i32*> [#uses=1]
+	store i32 %48, i32* %50, align 1
+	%51 = load i32** @d, align 8		; <i32*> [#uses=1]
+	%52 = and i32 %i.0.reg2mem.0, 15		; <i32> [#uses=1]
+	%53 = load i32** @e, align 8		; <i32*> [#uses=1]
+	%54 = and i32 %i.0.reg2mem.0, 15		; <i32> [#uses=1]
+	%55 = zext i32 %54 to i64		; <i64> [#uses=1]
+	%56 = getelementptr i32* %53, i64 %55		; <i32*> [#uses=1]
+	%57 = load i32* %56, align 1		; <i32> [#uses=1]
+	%58 = load i32** @f, align 8		; <i32*> [#uses=1]
+	%59 = and i32 %i.0.reg2mem.0, 15		; <i32> [#uses=1]
+	%60 = zext i32 %59 to i64		; <i64> [#uses=1]
+	%61 = getelementptr i32* %58, i64 %60		; <i32*> [#uses=1]
+	%62 = load i32* %61, align 1		; <i32> [#uses=1]
+	%63 = sext i32 %i.0.reg2mem.0 to i64		; <i64> [#uses=1]
+	%64 = getelementptr [256 x i32]* @K, i64 0, i64 %63		; <i32*> [#uses=1]
+	%65 = load i32* %64, align 4		; <i32> [#uses=1]
+	%66 = add i32 %62, %57		; <i32> [#uses=1]
+	%67 = add i32 %66, %65		; <i32> [#uses=1]
+	%68 = zext i32 %52 to i64		; <i64> [#uses=1]
+	%69 = getelementptr i32* %51, i64 %68		; <i32*> [#uses=1]
+	store i32 %67, i32* %69, align 1
+	%70 = load i32** @d, align 8		; <i32*> [#uses=1]
+	%71 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
+	%72 = and i32 %71, 15		; <i32> [#uses=1]
+	%73 = load i32** @e, align 8		; <i32*> [#uses=1]
+	%74 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
+	%75 = and i32 %74, 15		; <i32> [#uses=1]
+	%76 = zext i32 %75 to i64		; <i64> [#uses=1]
+	%77 = getelementptr i32* %73, i64 %76		; <i32*> [#uses=1]
+	%78 = load i32* %77, align 1		; <i32> [#uses=1]
+	%79 = load i32** @f, align 8		; <i32*> [#uses=1]
+	%80 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
+	%81 = and i32 %80, 15		; <i32> [#uses=1]
+	%82 = zext i32 %81 to i64		; <i64> [#uses=1]
+	%83 = getelementptr i32* %79, i64 %82		; <i32*> [#uses=1]
+	%84 = load i32* %83, align 1		; <i32> [#uses=1]
+	%85 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=1]
+	%86 = sext i32 %85 to i64		; <i64> [#uses=1]
+	%87 = getelementptr [256 x i32]* @K, i64 0, i64 %86		; <i32*> [#uses=1]
+	%88 = load i32* %87, align 4		; <i32> [#uses=1]
+	%89 = add i32 %84, %78		; <i32> [#uses=1]
+	%90 = add i32 %89, %88		; <i32> [#uses=1]
+	%91 = zext i32 %72 to i64		; <i64> [#uses=1]
+	%92 = getelementptr i32* %70, i64 %91		; <i32*> [#uses=1]
+	store i32 %90, i32* %92, align 1
+	%93 = load i32** @d, align 8		; <i32*> [#uses=1]
+	%94 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
+	%95 = and i32 %94, 15		; <i32> [#uses=1]
+	%96 = load i32** @e, align 8		; <i32*> [#uses=1]
+	%97 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
+	%98 = and i32 %97, 15		; <i32> [#uses=1]
+	%99 = zext i32 %98 to i64		; <i64> [#uses=1]
+	%100 = getelementptr i32* %96, i64 %99		; <i32*> [#uses=1]
+	%101 = load i32* %100, align 1		; <i32> [#uses=1]
+	%102 = load i32** @f, align 8		; <i32*> [#uses=1]
+	%103 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
+	%104 = and i32 %103, 15		; <i32> [#uses=1]
+	%105 = zext i32 %104 to i64		; <i64> [#uses=1]
+	%106 = getelementptr i32* %102, i64 %105		; <i32*> [#uses=1]
+	%107 = load i32* %106, align 1		; <i32> [#uses=1]
+	%108 = add i32 %i.0.reg2mem.0, 2		; <i32> [#uses=1]
+	%109 = sext i32 %108 to i64		; <i64> [#uses=1]
+	%110 = getelementptr [256 x i32]* @K, i64 0, i64 %109		; <i32*> [#uses=1]
+	%111 = load i32* %110, align 4		; <i32> [#uses=1]
+	%112 = add i32 %107, %101		; <i32> [#uses=1]
+	%113 = add i32 %112, %111		; <i32> [#uses=1]
+	%114 = zext i32 %95 to i64		; <i64> [#uses=1]
+	%115 = getelementptr i32* %93, i64 %114		; <i32*> [#uses=1]
+	store i32 %113, i32* %115, align 1
+	%116 = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%117 = icmp sgt i32 %116, 23646		; <i1> [#uses=1]
+	br i1 %117, label %return, label %bb1
+
+return:		; preds = %bb1
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/2009-04-22-IndvarCrash.ll b/final/test/Transforms/IndVarSimplify/2009-04-22-IndvarCrash.ll
new file mode 100644
index 00000000000..24074bf7271
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2009-04-22-IndvarCrash.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -indvars
+; rdar://6817574
+
+define i32 @t1() nounwind ssp {
+entry:
+	br label %bb32
+
+bb32:		; preds = %bb32, %entry
+	%mbPartIdx.0.reg2mem.0 = phi i8 [ %2, %bb32 ], [ 0, %entry ]		; <i8> [#uses=3]
+	%0 = and i8 %mbPartIdx.0.reg2mem.0, 1		; <i8> [#uses=0]
+	%1 = zext i8 %mbPartIdx.0.reg2mem.0 to i64		; <i64> [#uses=0]
+	%2 = add i8 %mbPartIdx.0.reg2mem.0, 1		; <i8> [#uses=2]
+	%3 = icmp ugt i8 %2, 3		; <i1> [#uses=1]
+	br i1 %3, label %bb41, label %bb32
+
+bb41:		; preds = %bb32
+	ret i32 0
+}
+
+define i32 @t2() nounwind ssp {
+entry:
+	br label %bb116
+
+bb116:		; preds = %bb116, %entry
+	%mbPartIdx.1.reg2mem.0 = phi i8 [ %3, %bb116 ], [ 0, %entry ]		; <i8> [#uses=3]
+	%0 = and i8 %mbPartIdx.1.reg2mem.0, 1		; <i8> [#uses=1]
+	%1 = zext i8 %mbPartIdx.1.reg2mem.0 to i64		; <i64> [#uses=0]
+	%2 = zext i8 %0 to i32		; <i32> [#uses=0]
+	%3 = add i8 %mbPartIdx.1.reg2mem.0, 1		; <i8> [#uses=2]
+	%4 = icmp ugt i8 %3, 3		; <i1> [#uses=1]
+	br i1 %4, label %bb131, label %bb116
+
+bb131:		; preds = %bb116
+	unreachable
+}
diff --git a/final/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll b/final/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll
new file mode 100644
index 00000000000..9fd2d2f04f7
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2009-04-27-Floating.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -indvars -S | grep icmp | grep next
+; PR4086
+declare void @foo()
+
+define void @test() {
+entry:
+        br label %loop_body
+
+loop_body:              
+        %i = phi float [ %nexti, %loop_body ], [ 0.0, %entry ]          
+        tail call void @foo()
+        %nexti = fadd float %i, 1.0
+        %less = fcmp olt float %nexti, 2.0              
+        br i1 %less, label %loop_body, label %done
+
+done:           
+        ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll b/final/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll
new file mode 100644
index 00000000000..d211e3b824b
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/2009-05-24-useafterfree.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -indvars
+; PR4258
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define void @0(i32*, i32*, i32, i32) nounwind {
+	br i1 false, label %bb.nph1.preheader, label %.outer._crit_edge
+
+bb.nph1.preheader:		; preds = %4
+	%smax = select i1 false, i32 -1, i32 0		; <i32> [#uses=1]
+	%tmp12 = sub i32 0, %smax		; <i32> [#uses=1]
+	br label %bb.nph1
+
+bb.nph1:		; preds = %.outer, %bb.nph1.preheader
+	br i1 undef, label %bb.nph3.preheader, label %.outer
+
+bb.nph3.preheader:		; preds = %bb.nph1
+	br label %bb.nph3
+
+bb.nph3:		; preds = %bb.nph3, %bb.nph3.preheader
+	%indvar7 = phi i32 [ %indvar.next8, %bb.nph3 ], [ 0, %bb.nph3.preheader ]		; <i32> [#uses=3]
+	%tmp9 = mul i32 %indvar7, -1		; <i32> [#uses=1]
+	%tmp13 = add i32 %tmp9, %tmp12		; <i32> [#uses=1]
+	%tmp14 = add i32 %tmp13, -2		; <i32> [#uses=1]
+	%5 = icmp sgt i32 %tmp14, 0		; <i1> [#uses=1]
+	%indvar.next8 = add i32 %indvar7, 1		; <i32> [#uses=1]
+	br i1 %5, label %bb.nph3, label %.outer.loopexit
+
+.outer.loopexit:		; preds = %bb.nph3
+	%indvar7.lcssa = phi i32 [ %indvar7, %bb.nph3 ]		; <i32> [#uses=0]
+	br label %.outer
+
+.outer:		; preds = %.outer.loopexit, %bb.nph1
+	br i1 undef, label %bb.nph1, label %.outer._crit_edge.loopexit
+
+.outer._crit_edge.loopexit:		; preds = %.outer
+	br label %.outer._crit_edge
+
+.outer._crit_edge:		; preds = %.outer._crit_edge.loopexit, %4
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/ada-loops.ll b/final/test/Transforms/IndVarSimplify/ada-loops.ll
new file mode 100644
index 00000000000..436840ae907
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/ada-loops.ll
@@ -0,0 +1,90 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: grep phi %t | count 4
+; RUN: grep {= phi i32} %t | count 4
+; RUN: not grep {sext i} %t
+; RUN: not grep {zext i} %t
+; RUN: not grep {trunc i} %t
+; RUN: not grep {add i8} %t
+; PR1301
+
+; Do a bunch of analysis and prove that the loops can use an i32 trip
+; count without casting.
+
+; ModuleID = 'ada.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+
+define void @kinds__sbytezero([256 x i32]* nocapture %a) nounwind {
+bb.thread:
+	%tmp46 = getelementptr [256 x i32]* %a, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp46
+	br label %bb
+
+bb:		; preds = %bb, %bb.thread
+	%i.0.reg2mem.0 = phi i8 [ -128, %bb.thread ], [ %tmp8, %bb ]		; <i8> [#uses=1]
+	%tmp8 = add i8 %i.0.reg2mem.0, 1		; <i8> [#uses=3]
+	%tmp1 = sext i8 %tmp8 to i32		; <i32> [#uses=1]
+	%tmp3 = add i32 %tmp1, 128		; <i32> [#uses=1]
+	%tmp4 = getelementptr [256 x i32]* %a, i32 0, i32 %tmp3		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp4
+	%0 = icmp eq i8 %tmp8, 127		; <i1> [#uses=1]
+	br i1 %0, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
+
+define void @kinds__ubytezero([256 x i32]* nocapture %a) nounwind {
+bb.thread:
+	%tmp35 = getelementptr [256 x i32]* %a, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp35
+	br label %bb
+
+bb:		; preds = %bb, %bb.thread
+	%i.0.reg2mem.0 = phi i8 [ 0, %bb.thread ], [ %tmp7, %bb ]		; <i8> [#uses=1]
+	%tmp7 = add i8 %i.0.reg2mem.0, 1		; <i8> [#uses=3]
+	%tmp1 = zext i8 %tmp7 to i32		; <i32> [#uses=1]
+	%tmp3 = getelementptr [256 x i32]* %a, i32 0, i32 %tmp1		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp3
+	%0 = icmp eq i8 %tmp7, -1		; <i1> [#uses=1]
+	br i1 %0, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
+
+define void @kinds__srangezero([21 x i32]* nocapture %a) nounwind {
+bb.thread:
+	br label %bb
+
+bb:		; preds = %bb, %bb.thread
+	%i.0.reg2mem.0 = phi i8 [ -10, %bb.thread ], [ %tmp7, %bb ]		; <i8> [#uses=2]
+	%tmp12 = sext i8 %i.0.reg2mem.0 to i32		; <i32> [#uses=1]
+	%tmp4 = add i32 %tmp12, 10		; <i32> [#uses=1]
+	%tmp5 = getelementptr [21 x i32]* %a, i32 0, i32 %tmp4		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp5
+	%tmp7 = add i8 %i.0.reg2mem.0, 1		; <i8> [#uses=2]
+	%0 = icmp sgt i8 %tmp7, 10		; <i1> [#uses=1]
+	br i1 %0, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
+
+define void @kinds__urangezero([21 x i32]* nocapture %a) nounwind {
+bb.thread:
+	br label %bb
+
+bb:		; preds = %bb, %bb.thread
+	%i.0.reg2mem.0 = phi i8 [ 10, %bb.thread ], [ %tmp7, %bb ]		; <i8> [#uses=2]
+	%tmp12 = sext i8 %i.0.reg2mem.0 to i32		; <i32> [#uses=1]
+	%tmp4 = add i32 %tmp12, -10		; <i32> [#uses=1]
+	%tmp5 = getelementptr [21 x i32]* %a, i32 0, i32 %tmp4		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp5
+	%tmp7 = add i8 %i.0.reg2mem.0, 1		; <i8> [#uses=2]
+	%0 = icmp sgt i8 %tmp7, 30		; <i1> [#uses=1]
+	br i1 %0, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/addrec-gep.ll b/final/test/Transforms/IndVarSimplify/addrec-gep.ll
new file mode 100644
index 00000000000..345f666c3b2
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/addrec-gep.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: grep getelementptr %t | count 1
+; RUN: grep {mul .*, 37}  %t | count 1
+; RUN: grep {add .*, 5203}  %t | count 1
+; RUN: not grep cast %t
+
+; This test tests several things. The load and store should use the
+; same address instead of having it computed twice, and SCEVExpander should
+; be able to reconstruct the full getelementptr, despite it having a few
+; obstacles set in its way.
+
+target datalayout = "e-p:64:64:64"
+
+define void @foo(i64 %n, i64 %m, i64 %o, i64 %q, double* nocapture %p) nounwind {
+entry:
+	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %bb.nph3, label %return
+
+bb.nph:		; preds = %bb2.preheader
+	%tmp1 = mul i64 %tmp16, %i.02		; <i64> [#uses=1]
+	%tmp2 = mul i64 %tmp19, %i.02		; <i64> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb2, %bb.nph
+	%j.01 = phi i64 [ %tmp9, %bb2 ], [ 0, %bb.nph ]		; <i64> [#uses=3]
+	%tmp3 = add i64 %j.01, %tmp1		; <i64> [#uses=1]
+	%tmp4 = add i64 %j.01, %tmp2		; <i64> [#uses=1]
+        %z0 = add i64 %tmp3, 5203
+	%tmp5 = getelementptr double* %p, i64 %z0		; <double*> [#uses=1]
+	%tmp6 = load double* %tmp5, align 8		; <double> [#uses=1]
+	%tmp7 = fdiv double %tmp6, 2.100000e+00		; <double> [#uses=1]
+        %z1 = add i64 %tmp4, 5203
+	%tmp8 = getelementptr double* %p, i64 %z1		; <double*> [#uses=1]
+	store double %tmp7, double* %tmp8, align 8
+	%tmp9 = add i64 %j.01, 1		; <i64> [#uses=2]
+	br label %bb2
+
+bb2:		; preds = %bb1
+	%tmp10 = icmp slt i64 %tmp9, %m		; <i1> [#uses=1]
+	br i1 %tmp10, label %bb1, label %bb2.bb3_crit_edge
+
+bb2.bb3_crit_edge:		; preds = %bb2
+	br label %bb3
+
+bb3:		; preds = %bb2.preheader, %bb2.bb3_crit_edge
+	%tmp11 = add i64 %i.02, 1		; <i64> [#uses=2]
+	br label %bb4
+
+bb4:		; preds = %bb3
+	%tmp12 = icmp slt i64 %tmp11, %n		; <i1> [#uses=1]
+	br i1 %tmp12, label %bb2.preheader, label %bb4.return_crit_edge
+
+bb4.return_crit_edge:		; preds = %bb4
+	br label %bb4.return_crit_edge.split
+
+bb4.return_crit_edge.split:		; preds = %bb.nph3, %bb4.return_crit_edge
+	br label %return
+
+bb.nph3:		; preds = %entry
+	%tmp13 = icmp sgt i64 %m, 0		; <i1> [#uses=1]
+	%tmp14 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp15 = mul i64 %tmp14, %o		; <i64> [#uses=1]
+	%tmp16 = mul i64 %tmp15, %q		; <i64> [#uses=1]
+	%tmp17 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp18 = mul i64 %tmp17, %o		; <i64> [#uses=1]
+	%tmp19 = mul i64 %tmp18, %q		; <i64> [#uses=1]
+	br i1 %tmp13, label %bb.nph3.split, label %bb4.return_crit_edge.split
+
+bb.nph3.split:		; preds = %bb.nph3
+	br label %bb2.preheader
+
+bb2.preheader:		; preds = %bb.nph3.split, %bb4
+	%i.02 = phi i64 [ %tmp11, %bb4 ], [ 0, %bb.nph3.split ]		; <i64> [#uses=3]
+	br i1 true, label %bb.nph, label %bb3
+
+return:		; preds = %bb4.return_crit_edge.split, %entry
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/ashr-tripcount.ll b/final/test/Transforms/IndVarSimplify/ashr-tripcount.ll
new file mode 100644
index 00000000000..baaefdc2bc5
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/ashr-tripcount.ll
@@ -0,0 +1,107 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: grep sext %t | count 1
+
+; Indvars should be able to eliminate all of the sign extensions
+; inside the loop.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+@pow_2_tab = external constant [0 x float]		; <[0 x float]*> [#uses=1]
+@pow_2_025_tab = external constant [0 x float]		; <[0 x float]*> [#uses=1]
+@i_pow_2_tab = external constant [0 x float]		; <[0 x float]*> [#uses=1]
+@i_pow_2_025_tab = external constant [0 x float]		; <[0 x float]*> [#uses=1]
+
+define void @foo(i32 %gain, i32 %noOfLines, i32* %quaSpectrum, float* %iquaSpectrum, float* %pow4_3_tab_ptr) nounwind {
+entry:
+	%t0 = icmp slt i32 %gain, 0		; <i1> [#uses=1]
+	br i1 %t0, label %bb1, label %bb2
+
+bb1:		; preds = %entry
+	%t1 = sub i32 0, %gain		; <i32> [#uses=1]
+	%t2 = sub i32 0, %gain		; <i32> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb1, %entry
+	%pow_2_tab.pn = phi [0 x float]* [ @i_pow_2_tab, %bb1 ], [ @pow_2_tab, %entry ]		; <[0 x float]*> [#uses=1]
+	%.pn3.in.in = phi i32 [ %t1, %bb1 ], [ %gain, %entry ]		; <i32> [#uses=1]
+	%pow_2_025_tab.pn = phi [0 x float]* [ @i_pow_2_025_tab, %bb1 ], [ @pow_2_025_tab, %entry ]		; <[0 x float]*> [#uses=1]
+	%.pn2.in.in = phi i32 [ %t2, %bb1 ], [ %gain, %entry ]		; <i32> [#uses=1]
+	%.pn3.in = ashr i32 %.pn3.in.in, 2		; <i32> [#uses=1]
+	%.pn2.in = and i32 %.pn2.in.in, 3		; <i32> [#uses=1]
+	%.pn3 = sext i32 %.pn3.in to i64		; <i64> [#uses=1]
+	%.pn2 = zext i32 %.pn2.in to i64		; <i64> [#uses=1]
+	%.pn.in = getelementptr [0 x float]* %pow_2_tab.pn, i64 0, i64 %.pn3		; <float*> [#uses=1]
+	%.pn1.in = getelementptr [0 x float]* %pow_2_025_tab.pn, i64 0, i64 %.pn2		; <float*> [#uses=1]
+	%.pn = load float* %.pn.in		; <float> [#uses=1]
+	%.pn1 = load float* %.pn1.in		; <float> [#uses=1]
+	%invQuantizer.0 = fmul float %.pn, %.pn1		; <float> [#uses=4]
+	%t3 = ashr i32 %noOfLines, 2		; <i32> [#uses=1]
+	%t4 = icmp sgt i32 %t3, 0		; <i1> [#uses=1]
+	br i1 %t4, label %bb.nph, label %return
+
+bb.nph:		; preds = %bb2
+	%t5 = ashr i32 %noOfLines, 2		; <i32> [#uses=1]
+	br label %bb3
+
+bb3:		; preds = %bb4, %bb.nph
+	%i.05 = phi i32 [ %t49, %bb4 ], [ 0, %bb.nph ]		; <i32> [#uses=9]
+	%k.04 = phi i32 [ %t48, %bb4 ], [ 0, %bb.nph ]		; <i32> [#uses=1]
+	%t6 = sext i32 %i.05 to i64		; <i64> [#uses=1]
+	%t7 = getelementptr i32* %quaSpectrum, i64 %t6		; <i32*> [#uses=1]
+	%t8 = load i32* %t7, align 4		; <i32> [#uses=1]
+	%t9 = zext i32 %t8 to i64		; <i64> [#uses=1]
+	%t10 = getelementptr float* %pow4_3_tab_ptr, i64 %t9		; <float*> [#uses=1]
+	%t11 = load float* %t10, align 4		; <float> [#uses=1]
+	%t12 = or i32 %i.05, 1		; <i32> [#uses=1]
+	%t13 = sext i32 %t12 to i64		; <i64> [#uses=1]
+	%t14 = getelementptr i32* %quaSpectrum, i64 %t13		; <i32*> [#uses=1]
+	%t15 = load i32* %t14, align 4		; <i32> [#uses=1]
+	%t16 = zext i32 %t15 to i64		; <i64> [#uses=1]
+	%t17 = getelementptr float* %pow4_3_tab_ptr, i64 %t16		; <float*> [#uses=1]
+	%t18 = load float* %t17, align 4		; <float> [#uses=1]
+	%t19 = or i32 %i.05, 2		; <i32> [#uses=1]
+	%t20 = sext i32 %t19 to i64		; <i64> [#uses=1]
+	%t21 = getelementptr i32* %quaSpectrum, i64 %t20		; <i32*> [#uses=1]
+	%t22 = load i32* %t21, align 4		; <i32> [#uses=1]
+	%t23 = zext i32 %t22 to i64		; <i64> [#uses=1]
+	%t24 = getelementptr float* %pow4_3_tab_ptr, i64 %t23		; <float*> [#uses=1]
+	%t25 = load float* %t24, align 4		; <float> [#uses=1]
+	%t26 = or i32 %i.05, 3		; <i32> [#uses=1]
+	%t27 = sext i32 %t26 to i64		; <i64> [#uses=1]
+	%t28 = getelementptr i32* %quaSpectrum, i64 %t27		; <i32*> [#uses=1]
+	%t29 = load i32* %t28, align 4		; <i32> [#uses=1]
+	%t30 = zext i32 %t29 to i64		; <i64> [#uses=1]
+	%t31 = getelementptr float* %pow4_3_tab_ptr, i64 %t30		; <float*> [#uses=1]
+	%t32 = load float* %t31, align 4		; <float> [#uses=1]
+	%t33 = fmul float %t11, %invQuantizer.0		; <float> [#uses=1]
+	%t34 = sext i32 %i.05 to i64		; <i64> [#uses=1]
+	%t35 = getelementptr float* %iquaSpectrum, i64 %t34		; <float*> [#uses=1]
+	store float %t33, float* %t35, align 4
+	%t36 = or i32 %i.05, 1		; <i32> [#uses=1]
+	%t37 = fmul float %t18, %invQuantizer.0		; <float> [#uses=1]
+	%t38 = sext i32 %t36 to i64		; <i64> [#uses=1]
+	%t39 = getelementptr float* %iquaSpectrum, i64 %t38		; <float*> [#uses=1]
+	store float %t37, float* %t39, align 4
+	%t40 = or i32 %i.05, 2		; <i32> [#uses=1]
+	%t41 = fmul float %t25, %invQuantizer.0		; <float> [#uses=1]
+	%t42 = sext i32 %t40 to i64		; <i64> [#uses=1]
+	%t43 = getelementptr float* %iquaSpectrum, i64 %t42		; <float*> [#uses=1]
+	store float %t41, float* %t43, align 4
+	%t44 = or i32 %i.05, 3		; <i32> [#uses=1]
+	%t45 = fmul float %t32, %invQuantizer.0		; <float> [#uses=1]
+	%t46 = sext i32 %t44 to i64		; <i64> [#uses=1]
+	%t47 = getelementptr float* %iquaSpectrum, i64 %t46		; <float*> [#uses=1]
+	store float %t45, float* %t47, align 4
+	%t48 = add i32 %k.04, 1		; <i32> [#uses=2]
+	%t49 = add i32 %i.05, 4		; <i32> [#uses=1]
+	br label %bb4
+
+bb4:		; preds = %bb3
+	%t50 = icmp sgt i32 %t5, %t48		; <i1> [#uses=1]
+	br i1 %t50, label %bb3, label %bb4.return_crit_edge
+
+bb4.return_crit_edge:		; preds = %bb4
+	br label %return
+
+return:		; preds = %bb4.return_crit_edge, %bb2
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/avoid-i0.ll b/final/test/Transforms/IndVarSimplify/avoid-i0.ll
new file mode 100644
index 00000000000..59661fa2e88
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/avoid-i0.ll
@@ -0,0 +1,126 @@
+; RUN: opt < %s -indvars
+; PR4052
+; PR4054
+
+; Don't treat an and with 0 as a mask (trunc+zext).
+
+define i32 @int80(i8 signext %p_71) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb6, %entry
+	%p_71_addr.0 = phi i8 [ %p_71, %entry ], [ %0, %bb6 ]		; <i8> [#uses=0]
+	br i1 undef, label %bb4, label %bb1
+
+bb1:		; preds = %bb
+	ret i32 0
+
+bb4:		; preds = %bb4, %bb
+	br i1 undef, label %bb6, label %bb4
+
+bb6:		; preds = %bb4
+	%0 = and i8 0, 0		; <i8> [#uses=1]
+	br label %bb
+}
+
+@x = common global i32 0		; <i32*> [#uses=1]
+
+define signext i8 @safe_sub_func_int32_t_s_s(i32 %_si1, i8 signext %_si2) nounwind {
+entry:
+	%_si1_addr = alloca i32		; <i32*> [#uses=3]
+	%_si2_addr = alloca i8		; <i8*> [#uses=3]
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%0 = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %_si1, i32* %_si1_addr
+	store i8 %_si2, i8* %_si2_addr
+	%1 = load i8* %_si2_addr, align 1		; <i8> [#uses=1]
+	%2 = sext i8 %1 to i32		; <i32> [#uses=1]
+	%3 = load i32* %_si1_addr, align 4		; <i32> [#uses=1]
+	%4 = xor i32 %2, %3		; <i32> [#uses=1]
+	%5 = load i8* %_si2_addr, align 1		; <i8> [#uses=1]
+	%6 = sext i8 %5 to i32		; <i32> [#uses=1]
+	%7 = sub i32 7, %6		; <i32> [#uses=1]
+	%8 = load i32* %_si1_addr, align 4		; <i32> [#uses=1]
+	%9 = shl i32 %8, %7		; <i32> [#uses=1]
+	%10 = and i32 %4, %9		; <i32> [#uses=1]
+	%11 = icmp slt i32 %10, 0		; <i1> [#uses=1]
+	%12 = zext i1 %11 to i32		; <i32> [#uses=1]
+	store i32 %12, i32* %0, align 4
+	%13 = load i32* %0, align 4		; <i32> [#uses=1]
+	store i32 %13, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	%retval12 = trunc i32 %retval1 to i8		; <i8> [#uses=1]
+	ret i8 %retval12
+}
+
+define i32 @safe_sub_func_uint64_t_u_u(i32 %_ui1, i32 %_ui2) nounwind {
+entry:
+	%_ui1_addr = alloca i32		; <i32*> [#uses=2]
+	%_ui2_addr = alloca i32		; <i32*> [#uses=1]
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%0 = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %_ui1, i32* %_ui1_addr
+	store i32 %_ui2, i32* %_ui2_addr
+	%1 = load i32* %_ui1_addr, align 4		; <i32> [#uses=1]
+	%2 = sub i32 %1, 1		; <i32> [#uses=1]
+	store i32 %2, i32* %0, align 4
+	%3 = load i32* %0, align 4		; <i32> [#uses=1]
+	store i32 %3, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval1
+}
+
+define void @int87(i8 signext %p_48, i8 signext %p_49) nounwind {
+entry:
+	%p_48_addr = alloca i8		; <i8*> [#uses=1]
+	%p_49_addr = alloca i8		; <i8*> [#uses=1]
+	%l_52 = alloca i32		; <i32*> [#uses=7]
+	%vol.0 = alloca i32		; <i32*> [#uses=1]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i8 %p_48, i8* %p_48_addr
+	store i8 %p_49, i8* %p_49_addr
+	br label %bb4
+
+bb:		; preds = %bb4
+	%0 = volatile load i32* @x, align 4		; <i32> [#uses=1]
+	store i32 %0, i32* %vol.0, align 4
+	store i32 0, i32* %l_52, align 4
+	br label %bb2
+
+bb1:		; preds = %bb2
+	%1 = load i32* %l_52, align 4		; <i32> [#uses=1]
+	%2 = call i32 @safe_sub_func_uint64_t_u_u(i32 %1, i32 1) nounwind		; <i32> [#uses=1]
+	store i32 %2, i32* %l_52, align 4
+	br label %bb2
+
+bb2:		; preds = %bb1, %bb
+	%3 = load i32* %l_52, align 4		; <i32> [#uses=1]
+	%4 = icmp eq i32 %3, 0		; <i1> [#uses=1]
+	br i1 %4, label %bb1, label %bb3
+
+bb3:		; preds = %bb2
+	%5 = load i32* %l_52, align 4		; <i32> [#uses=1]
+	%6 = call signext i8 @safe_sub_func_int32_t_s_s(i32 %5, i8 signext 1) nounwind		; <i8> [#uses=1]
+	%7 = sext i8 %6 to i32		; <i32> [#uses=1]
+	store i32 %7, i32* %l_52, align 4
+	br label %bb4
+
+bb4:		; preds = %bb3, %entry
+	%8 = load i32* %l_52, align 4		; <i32> [#uses=1]
+	%9 = icmp ne i32 %8, 0		; <i1> [#uses=1]
+	br i1 %9, label %bb, label %bb5
+
+bb5:		; preds = %bb4
+	br label %return
+
+return:		; preds = %bb5
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/casted-argument.ll b/final/test/Transforms/IndVarSimplify/casted-argument.ll
new file mode 100644
index 00000000000..a5e002b4578
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/casted-argument.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -indvars -disable-output
+; PR4009
+; PR4038
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define void @safe_bcopy(i8* %to) nounwind {
+entry:
+	%cmp11 = icmp ult i8* %to, null		; <i1> [#uses=1]
+	br i1 %cmp11, label %loop, label %return
+
+return:		; preds = %entry
+	ret void
+
+loop:		; preds = %loop, %if.else
+	%pn = phi i8* [ %ge, %loop ], [ null, %entry ]		; <i8*> [#uses=1]
+	%cp = ptrtoint i8* %to to i32		; <i32> [#uses=1]
+	%su = sub i32 0, %cp		; <i32> [#uses=1]
+	%ge = getelementptr i8* %pn, i32 %su		; <i8*> [#uses=2]
+	tail call void @bcopy(i8* %ge) nounwind
+	br label %loop
+}
+
+define void @safe_bcopy_4038(i8* %from, i8* %to, i32 %size) nounwind {
+entry:
+	br i1 false, label %if.else, label %if.then12
+
+if.then12:		; preds = %entry
+	ret void
+
+if.else:		; preds = %entry
+	%sub.ptr.rhs.cast40 = ptrtoint i8* %from to i32		; <i32> [#uses=1]
+	br label %if.end54
+
+if.end54:		; preds = %if.end54, %if.else
+	%sub.ptr4912.pn = phi i8* [ %sub.ptr4912, %if.end54 ], [ null, %if.else ]		; <i8*> [#uses=1]
+	%sub.ptr7 = phi i8* [ %sub.ptr, %if.end54 ], [ null, %if.else ]		; <i8*> [#uses=2]
+	%sub.ptr.rhs.cast46.pn = ptrtoint i8* %from to i32		; <i32> [#uses=1]
+	%sub.ptr.lhs.cast45.pn = ptrtoint i8* %to to i32		; <i32> [#uses=1]
+	%sub.ptr.sub47.pn = sub i32 %sub.ptr.rhs.cast46.pn, %sub.ptr.lhs.cast45.pn		; <i32> [#uses=1]
+	%sub.ptr4912 = getelementptr i8* %sub.ptr4912.pn, i32 %sub.ptr.sub47.pn		; <i8*> [#uses=2]
+	tail call void @bcopy_4038(i8* %sub.ptr4912, i8* %sub.ptr7, i32 0) nounwind
+	%sub.ptr = getelementptr i8* %sub.ptr7, i32 %sub.ptr.rhs.cast40		; <i8*> [#uses=1]
+	br label %if.end54
+}
+
+declare void @bcopy(i8* nocapture) nounwind
+
+declare void @bcopy_4038(i8*, i8*, i32) nounwind
diff --git a/final/test/Transforms/IndVarSimplify/complex-scev.ll b/final/test/Transforms/IndVarSimplify/complex-scev.ll
new file mode 100644
index 00000000000..434c4ec9952
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/complex-scev.ll
@@ -0,0 +1,29 @@
+; The i induction variable looks like a wrap-around, but it really is just
+; a simple affine IV.  Make sure that indvars eliminates it.
+
+; RUN: opt < %s -indvars -S | grep phi | count 1
+
+define void @foo() {
+entry:
+        br label %bb6
+
+bb6:            ; preds = %cond_true, %entry
+        %j.0 = phi i32 [ 1, %entry ], [ %tmp5, %cond_true ]             ; <i32> [#uses=3]
+        %i.0 = phi i32 [ 0, %entry ], [ %j.0, %cond_true ]              ; <i32> [#uses=1]
+        %tmp7 = call i32 (...)* @foo2( )                ; <i32> [#uses=1]
+        %tmp = icmp ne i32 %tmp7, 0             ; <i1> [#uses=1]
+        br i1 %tmp, label %cond_true, label %return
+
+cond_true:              ; preds = %bb6
+        %tmp2 = call i32 (...)* @bar( i32 %i.0, i32 %j.0 )              ; <i32> [#uses=0]
+        %tmp5 = add i32 %j.0, 1         ; <i32> [#uses=1]
+        br label %bb6
+
+return:         ; preds = %bb6
+        ret void
+}
+
+declare i32 @bar(...)
+
+declare i32 @foo2(...)
+
diff --git a/final/test/Transforms/IndVarSimplify/crash.ll b/final/test/Transforms/IndVarSimplify/crash.ll
new file mode 100644
index 00000000000..516fd8084d9
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/crash.ll
@@ -0,0 +1,55 @@
+; RUN: opt -indvars %s -disable-output
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+declare i32 @putchar(i8) nounwind
+
+define void @t2(i1* %P) nounwind {
+; <label>:0
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %2 = phi double [ 9.000000e+00, %0 ], [ %4, %1 ] ; <double> [#uses=1]
+  %3 = tail call i32 @putchar(i8 72)              ; <i32> [#uses=0]
+  %4 = fadd double %2, -1.000000e+00              ; <double> [#uses=2]
+  %5 = fcmp ult double %4, 0.000000e+00           ; <i1> [#uses=1]
+  store i1 %5, i1* %P
+  br i1 %5, label %6, label %1
+
+; <label>:6                                       ; preds = %1
+  ret void
+}
+
+; PR7562
+define void @fannkuch() nounwind {
+entry:                                              ; preds = %entry
+  br label %bb12
+
+bb12:                                             ; preds = %bb29, %entry
+  %i.1 = phi i32 [ undef, %entry ], [ %i.0, %bb29 ] ; <i32> [#uses=2]
+  %r.1 = phi i32 [ undef, %entry ], [ %r.0, %bb29 ] ; <i32> [#uses=2]
+  br i1 undef, label %bb13, label %bb24
+
+bb13:                                             ; preds = %bb12
+  br label %bb24
+
+bb24:                                             ; preds = %bb30, %bb13, %bb12
+  %i.2 = phi i32 [ %i.1, %bb13 ], [ %i.0, %bb30 ], [ %i.1, %bb12 ] ; <i32> [#uses=1]
+  %r.0 = phi i32 [ %r.1, %bb13 ], [ %2, %bb30 ], [ %r.1, %bb12 ] ; <i32> [#uses=3]
+  br label %bb28
+
+bb27:                                             ; preds = %bb28
+  %0 = add nsw i32 %i.0, 1                        ; <i32> [#uses=1]
+  br label %bb28
+
+bb28:                                             ; preds = %bb27, %bb26
+  %i.0 = phi i32 [ %i.2, %bb24 ], [ %0, %bb27 ]   ; <i32> [#uses=4]
+  %1 = icmp slt i32 %i.0, %r.0                    ; <i1> [#uses=1]
+  br i1 %1, label %bb27, label %bb29
+
+bb29:                                             ; preds = %bb28
+  br i1 undef, label %bb12, label %bb30
+
+bb30:                                             ; preds = %bb29
+  %2 = add nsw i32 %r.0, 1                        ; <i32> [#uses=1]
+  br label %bb24
+}
diff --git a/final/test/Transforms/IndVarSimplify/dangling-use.ll b/final/test/Transforms/IndVarSimplify/dangling-use.ll
new file mode 100644
index 00000000000..51c31204c6d
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/dangling-use.ll
@@ -0,0 +1,41 @@
+; RUN: opt -indvars -disable-output < %s 
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i8:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128-n32"
+target triple = "powerpc-apple-darwin11"
+
+define void @vec_inverse_5_7_vert_loop_copyseparate(i8* %x, i32 %n, i32 %rowbytes) nounwind {
+entry:
+  %tmp1 = sdiv i32 %n, 3                          ; <i32> [#uses=1]
+  %tmp2 = sdiv i32 %rowbytes, 5                   ; <i32> [#uses=2]
+  br label %bb49
+
+bb49:                                             ; preds = %bb48, %entry
+  %x_addr.0 = phi i8* [ %x, %entry ], [ %tmp481, %bb48 ] ; <i8*> [#uses=2]
+  br label %bb10
+
+bb10:                                             ; preds = %bb49
+  %tmp326 = mul nsw i32 %tmp1, %tmp2              ; <i32> [#uses=1]
+  %tmp351 = getelementptr inbounds i8* %x_addr.0, i32 %tmp326 ; <i8*> [#uses=1]
+  br i1 false, label %bb.nph, label %bb48
+
+bb.nph:                                           ; preds = %bb10
+  br label %bb23
+
+bb23:                                             ; preds = %bb28, %bb.nph
+  %pOriginHi.01 = phi i8* [ %tmp351, %bb.nph ], [ %pOriginHi.0, %bb28 ] ; <i8*> [#uses=2]
+  %tmp378 = bitcast i8* %pOriginHi.01 to i8*      ; <i8*> [#uses=1]
+  store i8* %tmp378, i8** null
+  %tmp385 = getelementptr inbounds i8* %pOriginHi.01, i32 %tmp2 ; <i8*> [#uses=1]
+  br label %bb28
+
+bb28:                                             ; preds = %bb23
+  %pOriginHi.0 = phi i8* [ %tmp385, %bb23 ]       ; <i8*> [#uses=1]
+  br i1 false, label %bb23, label %bb28.bb48_crit_edge
+
+bb28.bb48_crit_edge:                              ; preds = %bb28
+  br label %bb48
+
+bb48:                                             ; preds = %bb28.bb48_crit_edge, %bb10
+  %tmp481 = getelementptr inbounds i8* %x_addr.0, i32 1 ; <i8*> [#uses=1]
+  br label %bb49
+}
diff --git a/final/test/Transforms/IndVarSimplify/dg.exp b/final/test/Transforms/IndVarSimplify/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/IndVarSimplify/divide-pointer.ll b/final/test/Transforms/IndVarSimplify/divide-pointer.ll
new file mode 100644
index 00000000000..16608ee8280
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/divide-pointer.ll
@@ -0,0 +1,95 @@
+; RUN: opt < %s -indvars
+; PR4271
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+	%struct.xyz = type <{ i64, i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, [8 x i8], i64, i64, i32, i32, [4 x i32], i32, i32, i32, i32, i32, i32, [76 x i32], i32, [2 x %struct.uvw] }>
+	%struct.uvw = type <{ i64, i64 }>
+
+define i32 @foo(%struct.xyz* %header, i8* %p2, i8* %p3, i8* nocapture %p4) nounwind {
+entry:
+	br label %while.body.i
+
+while.body.i:		; preds = %while.body.i, %entry
+	br i1 undef, label %while.body.i, label %bcopy_internal.exit
+
+bcopy_internal.exit:		; preds = %while.body.i
+	%conv135 = ptrtoint %struct.xyz* %header to i32		; <i32> [#uses=1]
+	%shr136 = lshr i32 %conv135, 12		; <i32> [#uses=1]
+	br label %for.body
+
+for.body:		; preds = %for.body, %bcopy_internal.exit
+	%ppnum.052 = phi i32 [ %inc, %for.body ], [ %shr136, %bcopy_internal.exit ]		; <i32> [#uses=1]
+	%inc = add i32 %ppnum.052, 1		; <i32> [#uses=2]
+	%cmp = icmp ugt i32 %inc, undef		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then199, label %for.body
+
+if.then199:		; preds = %if.then199, %for.body
+	br label %if.then199
+}
+
+define i32 @same_thing_but_signed(%struct.xyz* %header, i8* %p2, i8* %p3, i8* nocapture %p4) nounwind {
+entry:
+	br label %while.body.i
+
+while.body.i:		; preds = %while.body.i, %entry
+	br i1 undef, label %while.body.i, label %bcopy_internal.exit
+
+bcopy_internal.exit:		; preds = %while.body.i
+	%conv135 = ptrtoint %struct.xyz* %header to i32		; <i32> [#uses=1]
+	%shr136 = ashr i32 %conv135, 12		; <i32> [#uses=1]
+	br label %for.body
+
+for.body:		; preds = %for.body, %bcopy_internal.exit
+	%ppnum.052 = phi i32 [ %inc, %for.body ], [ %shr136, %bcopy_internal.exit ]		; <i32> [#uses=1]
+	%inc = add i32 %ppnum.052, 1		; <i32> [#uses=2]
+	%cmp = icmp ugt i32 %inc, undef		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then199, label %for.body
+
+if.then199:		; preds = %if.then199, %for.body
+	br label %if.then199
+}
+
+define i32 @same_thing_but_multiplied(%struct.xyz* %header, i8* %p2, i8* %p3, i8* nocapture %p4) nounwind {
+entry:
+	br label %while.body.i
+
+while.body.i:		; preds = %while.body.i, %entry
+	br i1 undef, label %while.body.i, label %bcopy_internal.exit
+
+bcopy_internal.exit:		; preds = %while.body.i
+	%conv135 = ptrtoint %struct.xyz* %header to i32		; <i32> [#uses=1]
+	%shr136 = shl i32 %conv135, 12		; <i32> [#uses=1]
+	br label %for.body
+
+for.body:		; preds = %for.body, %bcopy_internal.exit
+	%ppnum.052 = phi i32 [ %inc, %for.body ], [ %shr136, %bcopy_internal.exit ]		; <i32> [#uses=1]
+	%inc = add i32 %ppnum.052, 1		; <i32> [#uses=2]
+	%cmp = icmp ugt i32 %inc, undef		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then199, label %for.body
+
+if.then199:		; preds = %if.then199, %for.body
+	br label %if.then199
+}
+
+define i32 @same_thing_but_xored(%struct.xyz* %header, i8* %p2, i8* %p3, i8* nocapture %p4) nounwind {
+entry:
+	br label %while.body.i
+
+while.body.i:		; preds = %while.body.i, %entry
+	br i1 undef, label %while.body.i, label %bcopy_internal.exit
+
+bcopy_internal.exit:		; preds = %while.body.i
+	%conv135 = ptrtoint %struct.xyz* %header to i32		; <i32> [#uses=1]
+	%shr136 = xor i32 %conv135, 12		; <i32> [#uses=1]
+	br label %for.body
+
+for.body:		; preds = %for.body, %bcopy_internal.exit
+	%ppnum.052 = phi i32 [ %inc, %for.body ], [ %shr136, %bcopy_internal.exit ]		; <i32> [#uses=1]
+	%inc = add i32 %ppnum.052, 1		; <i32> [#uses=2]
+	%cmp = icmp ugt i32 %inc, undef		; <i1> [#uses=1]
+	br i1 %cmp, label %if.then199, label %for.body
+
+if.then199:		; preds = %if.then199, %for.body
+	br label %if.then199
+}
diff --git a/final/test/Transforms/IndVarSimplify/eliminate-comparison.ll b/final/test/Transforms/IndVarSimplify/eliminate-comparison.ll
new file mode 100644
index 00000000000..953bbdff5c6
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/eliminate-comparison.ll
@@ -0,0 +1,108 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+@X = external global [0 x double]
+
+; Indvars should be able to simplify simple comparisons involving
+; induction variables.
+
+; CHECK: @foo
+; CHECK: %cond = and i1 %tobool.not, true
+
+define void @foo(i64 %n, i32* nocapture %p) nounwind {
+entry:
+  %cmp9 = icmp sgt i64 %n, 0
+  br i1 %cmp9, label %pre, label %return
+
+pre:
+  %t3 = load i32* %p
+  %tobool.not = icmp ne i32 %t3, 0
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %pre ], [ %inc, %for.inc ]
+  %cmp6 = icmp slt i64 %i, %n
+  %cond = and i1 %tobool.not, %cmp6
+  br i1 %cond, label %if.then, label %for.inc
+
+if.then:
+  %arrayidx = getelementptr [0 x double]* @X, i64 0, i64 %i
+  store double 3.200000e+00, double* %arrayidx
+  br label %for.inc
+
+for.inc:
+  %inc = add nsw i64 %i, 1
+  %exitcond = icmp sge i64 %inc, %n
+  br i1 %exitcond, label %return, label %loop
+
+return:
+  ret void
+}
+
+; Don't eliminate an icmp that's contributing to the loop exit test though.
+
+; CHECK: @_ZNK4llvm5APInt3ultERKS0_
+; CHECK: %tmp99 = icmp sgt i32 %i, -1
+
+define i32 @_ZNK4llvm5APInt3ultERKS0_(i32 %tmp2.i1, i64** %tmp65, i64** %tmp73, i64** %tmp82, i64** %tmp90) {
+entry:
+  br label %bb18
+
+bb13:
+  %tmp66 = load i64** %tmp65, align 4
+  %tmp68 = getelementptr inbounds i64* %tmp66, i32 %i
+  %tmp69 = load i64* %tmp68, align 4
+  %tmp74 = load i64** %tmp73, align 4
+  %tmp76 = getelementptr inbounds i64* %tmp74, i32 %i
+  %tmp77 = load i64* %tmp76, align 4
+  %tmp78 = icmp ugt i64 %tmp69, %tmp77
+  br i1 %tmp78, label %bb20.loopexit, label %bb15
+
+bb15:
+  %tmp83 = load i64** %tmp82, align 4
+  %tmp85 = getelementptr inbounds i64* %tmp83, i32 %i
+  %tmp86 = load i64* %tmp85, align 4
+  %tmp91 = load i64** %tmp90, align 4
+  %tmp93 = getelementptr inbounds i64* %tmp91, i32 %i
+  %tmp94 = load i64* %tmp93, align 4
+  %tmp95 = icmp ult i64 %tmp86, %tmp94
+  br i1 %tmp95, label %bb20.loopexit, label %bb17
+
+bb17:
+  %tmp97 = add nsw i32 %i, -1
+  br label %bb18
+
+bb18:
+  %i = phi i32 [ %tmp2.i1, %entry ], [ %tmp97, %bb17 ]
+  %tmp99 = icmp sgt i32 %i, -1
+  br i1 %tmp99, label %bb13, label %bb20.loopexit
+
+bb20.loopexit:
+  %tmp.0.ph = phi i32 [ 0, %bb18 ], [ 1, %bb15 ], [ 0, %bb13 ]
+  ret i32 %tmp.0.ph
+}
+
+; Indvars should eliminate the icmp here.
+
+; CHECK: @func_10
+; CHECK-NOT: icmp
+; CHECK: ret void
+
+define void @func_10() nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ %i.next, %loop ], [ 0, %entry ]
+  %t0 = icmp slt i32 %i, 0
+  %t1 = zext i1 %t0 to i32
+  %t2 = add i32 %t1, %i
+  %u3 = zext i32 %t2 to i64
+  store i64 %u3, i64* null
+  %i.next = add i32 %i, 1
+  br i1 undef, label %loop, label %return
+
+return:
+  ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/eliminate-max.ll b/final/test/Transforms/IndVarSimplify/eliminate-max.ll
new file mode 100644
index 00000000000..c25bd0e3541
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/eliminate-max.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -S -indvars | grep {= icmp} | count 3
+; PR4914.ll
+
+; Indvars should be able to do range analysis and eliminate icmps.
+; There are two here which cannot be eliminated.
+; There's one that icmp which can be eliminated and which indvars currently
+; cannot eliminate, because it requires analyzing more than just the
+; range of the induction variable.
+
+@0 = private constant [4 x i8] c"%d\0A\00", align 1 ; <[4 x i8]*> [#uses=1]
+
+define i32 @main() nounwind {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb14, %bb
+  %t = phi i32 [ 0, %bb ], [ %t19, %bb14 ]        ; <i32> [#uses=5]
+  %t2 = phi i32 [ 0, %bb ], [ %t18, %bb14 ]       ; <i32> [#uses=1]
+  %t3 = icmp slt i32 %t, 0                        ; <i1> [#uses=1]
+  br i1 %t3, label %bb7, label %bb4
+
+bb4:                                              ; preds = %bb1
+  %t5 = icmp sgt i32 %t, 255                      ; <i1> [#uses=1]
+  %t6 = select i1 %t5, i32 255, i32 %t            ; <i32> [#uses=1]
+  br label %bb7
+
+bb7:                                              ; preds = %bb4, %bb1
+  %t8 = phi i32 [ %t6, %bb4 ], [ 0, %bb1 ]        ; <i32> [#uses=1]
+  %t9 = sub i32 0, %t                             ; <i32> [#uses=3]
+  %t10 = icmp slt i32 %t9, 0                      ; <i1> [#uses=1]
+  br i1 %t10, label %bb14, label %bb11
+
+bb11:                                             ; preds = %bb7
+  %t12 = icmp sgt i32 %t9, 255                    ; <i1> [#uses=1]
+  %t13 = select i1 %t12, i32 255, i32 %t9         ; <i32> [#uses=1]
+  br label %bb14
+
+bb14:                                             ; preds = %bb11, %bb7
+  %t15 = phi i32 [ %t13, %bb11 ], [ 0, %bb7 ]     ; <i32> [#uses=1]
+  %t16 = add nsw i32 %t2, 255                     ; <i32> [#uses=1]
+  %t17 = add nsw i32 %t16, %t8                    ; <i32> [#uses=1]
+  %t18 = add nsw i32 %t17, %t15                   ; <i32> [#uses=2]
+  %t19 = add nsw i32 %t, 1                        ; <i32> [#uses=2]
+  %t20 = icmp slt i32 %t19, 1000000000            ; <i1> [#uses=1]
+  br i1 %t20, label %bb1, label %bb21
+
+bb21:                                             ; preds = %bb14
+  %t22 = call i32 (i8*, ...)* @printf(i8* noalias getelementptr inbounds ([4 x i8]* @0, i32 0, i32 0), i32 %t18) nounwind
+  ret i32 0
+}
+
+declare i32 @printf(i8* noalias nocapture, ...) nounwind
diff --git a/final/test/Transforms/IndVarSimplify/eliminate-rem.ll b/final/test/Transforms/IndVarSimplify/eliminate-rem.ll
new file mode 100644
index 00000000000..f756389398f
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/eliminate-rem.ll
@@ -0,0 +1,121 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Indvars should be able to eliminate this srem.
+; CHECK: @simple
+; CHECK-NOT: rem
+; CHECK: ret
+
+define void @simple(i64 %arg, double* %arg3) nounwind {
+bb:
+  %t = icmp slt i64 0, %arg                     ; <i1> [#uses=1]
+  br i1 %t, label %bb4, label %bb12
+
+bb4:                                              ; preds = %bb
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb5
+  %t6 = phi i64 [ %t9, %bb5 ], [ 0, %bb4 ]    ; <i64> [#uses=2]
+  %t7 = srem i64 %t6, %arg                    ; <i64> [#uses=1]
+  %t8 = getelementptr inbounds double* %arg3, i64 %t7 ; <double*> [#uses=1]
+  store double 0.000000e+00, double* %t8
+  %t9 = add nsw i64 %t6, 1                    ; <i64> [#uses=2]
+  %t10 = icmp slt i64 %t9, %arg               ; <i1> [#uses=1]
+  br i1 %t10, label %bb5, label %bb11
+
+bb11:                                             ; preds = %bb5
+  br label %bb12
+
+bb12:                                             ; preds = %bb11, %bb
+  ret void
+}
+
+; Indvars should be able to eliminate the (i+1)%n.
+; CHECK: @f
+; CHECK-NOT: rem
+; CHECK: rem
+; CHECK-NOT: rem
+; CHECK: ret
+
+define i32 @f(i64* %arg, i64 %arg1, i64 %arg2, i64 %arg3) nounwind {
+bb:
+  %t = icmp sgt i64 %arg1, 0                      ; <i1> [#uses=1]
+  br i1 %t, label %bb4, label %bb54
+
+bb4:                                              ; preds = %bb
+  br label %bb5
+
+bb5:                                              ; preds = %bb49, %bb4
+  %t6 = phi i64 [ %t51, %bb49 ], [ 0, %bb4 ]      ; <i64> [#uses=4]
+  %t7 = phi i32 [ %t50, %bb49 ], [ 0, %bb4 ]      ; <i32> [#uses=2]
+  %t8 = add nsw i64 %t6, %arg1                    ; <i64> [#uses=1]
+  %t9 = add nsw i64 %t8, -2                       ; <i64> [#uses=1]
+  %t10 = srem i64 %t9, %arg1                      ; <i64> [#uses=1]
+  %t11 = add nsw i64 %t10, 1                      ; <i64> [#uses=1]
+  %t12 = add nsw i64 %t6, 1                       ; <i64> [#uses=1]
+  %t13 = srem i64 %t12, %arg1                     ; <i64> [#uses=1]
+  %t14 = icmp sgt i64 %arg1, 0                    ; <i1> [#uses=1]
+  br i1 %t14, label %bb15, label %bb49
+
+bb15:                                             ; preds = %bb5
+  br label %bb16
+
+bb16:                                             ; preds = %bb44, %bb15
+  %t17 = phi i64 [ %t46, %bb44 ], [ 0, %bb15 ]    ; <i64> [#uses=1]
+  %t18 = phi i32 [ %t45, %bb44 ], [ %t7, %bb15 ]  ; <i32> [#uses=2]
+  %t19 = icmp sgt i64 %arg1, 0                    ; <i1> [#uses=1]
+  br i1 %t19, label %bb20, label %bb44
+
+bb20:                                             ; preds = %bb16
+  br label %bb21
+
+bb21:                                             ; preds = %bb21, %bb20
+  %t22 = phi i64 [ %t41, %bb21 ], [ 0, %bb20 ]    ; <i64> [#uses=4]
+  %t23 = phi i32 [ %t40, %bb21 ], [ %t18, %bb20 ] ; <i32> [#uses=1]
+  %t24 = mul i64 %t6, %arg1                       ; <i64> [#uses=1]
+  %t25 = mul i64 %t13, %arg1                      ; <i64> [#uses=1]
+  %t26 = add nsw i64 %t24, %t22                   ; <i64> [#uses=1]
+  %t27 = mul i64 %t11, %arg1                      ; <i64> [#uses=1]
+  %t28 = add nsw i64 %t25, %t22                   ; <i64> [#uses=1]
+  %t29 = getelementptr inbounds i64* %arg, i64 %t26 ; <i64*> [#uses=1]
+  %t30 = add nsw i64 %t27, %t22                   ; <i64> [#uses=1]
+  %t31 = getelementptr inbounds i64* %arg, i64 %t28 ; <i64*> [#uses=1]
+  %t32 = zext i32 %t23 to i64                     ; <i64> [#uses=1]
+  %t33 = load i64* %t29                           ; <i64> [#uses=1]
+  %t34 = getelementptr inbounds i64* %arg, i64 %t30 ; <i64*> [#uses=1]
+  %t35 = load i64* %t31                           ; <i64> [#uses=1]
+  %t36 = add nsw i64 %t32, %t33                   ; <i64> [#uses=1]
+  %t37 = add nsw i64 %t36, %t35                   ; <i64> [#uses=1]
+  %t38 = load i64* %t34                           ; <i64> [#uses=1]
+  %t39 = add nsw i64 %t37, %t38                   ; <i64> [#uses=1]
+  %t40 = trunc i64 %t39 to i32                    ; <i32> [#uses=2]
+  %t41 = add nsw i64 %t22, 1                      ; <i64> [#uses=2]
+  %t42 = icmp slt i64 %t41, %arg1                 ; <i1> [#uses=1]
+  br i1 %t42, label %bb21, label %bb43
+
+bb43:                                             ; preds = %bb21
+  br label %bb44
+
+bb44:                                             ; preds = %bb43, %bb16
+  %t45 = phi i32 [ %t18, %bb16 ], [ %t40, %bb43 ] ; <i32> [#uses=2]
+  %t46 = add nsw i64 %t17, 1                      ; <i64> [#uses=2]
+  %t47 = icmp slt i64 %t46, %arg1                 ; <i1> [#uses=1]
+  br i1 %t47, label %bb16, label %bb48
+
+bb48:                                             ; preds = %bb44
+  br label %bb49
+
+bb49:                                             ; preds = %bb48, %bb5
+  %t50 = phi i32 [ %t7, %bb5 ], [ %t45, %bb48 ]   ; <i32> [#uses=2]
+  %t51 = add nsw i64 %t6, 1                       ; <i64> [#uses=2]
+  %t52 = icmp slt i64 %t51, %arg1                 ; <i1> [#uses=1]
+  br i1 %t52, label %bb5, label %bb53
+
+bb53:                                             ; preds = %bb49
+  br label %bb54
+
+bb54:                                             ; preds = %bb53, %bb
+  %t55 = phi i32 [ 0, %bb ], [ %t50, %bb53 ]      ; <i32> [#uses=1]
+  ret i32 %t55
+}
diff --git a/final/test/Transforms/IndVarSimplify/exit_value_tests.ll b/final/test/Transforms/IndVarSimplify/exit_value_tests.ll
new file mode 100644
index 00000000000..737e733375f
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/exit_value_tests.ll
@@ -0,0 +1,114 @@
+; Test that we can evaluate the exit values of various expression types.  Since
+; these loops all have predictable exit values we can replace the use outside
+; of the loop with a closed-form computation, making the loop dead.
+;
+; RUN: opt < %s -indvars -loop-deletion -simplifycfg | \
+; RUN:   llvm-dis | not grep br
+
+define i32 @polynomial_constant() {
+; <label>:0
+	br label %Loop
+
+Loop:		; preds = %Loop, %0
+	%A1 = phi i32 [ 0, %0 ], [ %A2, %Loop ]		; <i32> [#uses=3]
+	%B1 = phi i32 [ 0, %0 ], [ %B2, %Loop ]		; <i32> [#uses=1]
+	%A2 = add i32 %A1, 1		; <i32> [#uses=1]
+	%B2 = add i32 %B1, %A1		; <i32> [#uses=2]
+	%C = icmp eq i32 %A1, 1000		; <i1> [#uses=1]
+	br i1 %C, label %Out, label %Loop
+
+Out:		; preds = %Loop
+	ret i32 %B2
+}
+
+define i32 @NSquare(i32 %N) {
+; <label>:0
+	br label %Loop
+
+Loop:		; preds = %Loop, %0
+	%X = phi i32 [ 0, %0 ], [ %X2, %Loop ]		; <i32> [#uses=4]
+	%X2 = add i32 %X, 1		; <i32> [#uses=1]
+	%c = icmp eq i32 %X, %N		; <i1> [#uses=1]
+	br i1 %c, label %Out, label %Loop
+
+Out:		; preds = %Loop
+	%Y = mul i32 %X, %X		; <i32> [#uses=1]
+	ret i32 %Y
+}
+
+define i32 @NSquareOver2(i32 %N) {
+; <label>:0
+	br label %Loop
+
+Loop:		; preds = %Loop, %0
+	%X = phi i32 [ 0, %0 ], [ %X2, %Loop ]		; <i32> [#uses=3]
+	%Y = phi i32 [ 15, %0 ], [ %Y2, %Loop ]		; <i32> [#uses=1]
+	%Y2 = add i32 %Y, %X		; <i32> [#uses=2]
+	%X2 = add i32 %X, 1		; <i32> [#uses=1]
+	%c = icmp eq i32 %X, %N		; <i1> [#uses=1]
+	br i1 %c, label %Out, label %Loop
+
+Out:		; preds = %Loop
+	ret i32 %Y2
+}
+
+define i32 @strength_reduced() {
+; <label>:0
+	br label %Loop
+
+Loop:		; preds = %Loop, %0
+	%A1 = phi i32 [ 0, %0 ], [ %A2, %Loop ]		; <i32> [#uses=3]
+	%B1 = phi i32 [ 0, %0 ], [ %B2, %Loop ]		; <i32> [#uses=1]
+	%A2 = add i32 %A1, 1		; <i32> [#uses=1]
+	%B2 = add i32 %B1, %A1		; <i32> [#uses=2]
+	%C = icmp eq i32 %A1, 1000		; <i1> [#uses=1]
+	br i1 %C, label %Out, label %Loop
+
+Out:		; preds = %Loop
+	ret i32 %B2
+}
+
+define i32 @chrec_equals() {
+entry:
+	br label %no_exit
+
+no_exit:		; preds = %no_exit, %entry
+	%i0 = phi i32 [ 0, %entry ], [ %i1, %no_exit ]		; <i32> [#uses=3]
+	%ISq = mul i32 %i0, %i0		; <i32> [#uses=1]
+	%i1 = add i32 %i0, 1		; <i32> [#uses=2]
+	%tmp.1 = icmp ne i32 %ISq, 10000		; <i1> [#uses=1]
+	br i1 %tmp.1, label %no_exit, label %loopexit
+
+loopexit:		; preds = %no_exit
+	ret i32 %i1
+}
+
+define i16 @cast_chrec_test() {
+; <label>:0
+	br label %Loop
+
+Loop:		; preds = %Loop, %0
+	%A1 = phi i32 [ 0, %0 ], [ %A2, %Loop ]		; <i32> [#uses=2]
+	%B1 = trunc i32 %A1 to i16		; <i16> [#uses=2]
+	%A2 = add i32 %A1, 1		; <i32> [#uses=1]
+	%C = icmp eq i16 %B1, 1000		; <i1> [#uses=1]
+	br i1 %C, label %Out, label %Loop
+
+Out:		; preds = %Loop
+	ret i16 %B1
+}
+
+define i32 @linear_div_fold() {
+entry:
+	br label %loop
+
+loop:		; preds = %loop, %entry
+	%i = phi i32 [ 4, %entry ], [ %i.next, %loop ]		; <i32> [#uses=3]
+	%i.next = add i32 %i, 8		; <i32> [#uses=1]
+	%RV = udiv i32 %i, 2		; <i32> [#uses=1]
+	%c = icmp ne i32 %i, 68		; <i1> [#uses=1]
+	br i1 %c, label %loop, label %loopexit
+
+loopexit:		; preds = %loop
+	ret i32 %RV
+}
diff --git a/final/test/Transforms/IndVarSimplify/floating-point-iv.ll b/final/test/Transforms/IndVarSimplify/floating-point-iv.ll
new file mode 100644
index 00000000000..8f4b87048a4
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/floating-point-iv.ll
@@ -0,0 +1,92 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+define void @test1() nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%x.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
+	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
+	%1 = fadd double %x.0.reg2mem.0, 1.000000e+00		; <double> [#uses=2]
+	%2 = fcmp olt double %1, 1.000000e+04		; <i1> [#uses=1]
+	br i1 %2, label %bb, label %return
+
+return:		; preds = %bb
+	ret void
+; CHECK: @test1
+; CHECK: icmp
+}
+
+declare i32 @foo(double)
+
+define void @test2() nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%x.0.reg2mem.0 = phi double [ -10.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
+	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
+	%1 = fadd double %x.0.reg2mem.0, 2.000000e+00		; <double> [#uses=2]
+	%2 = fcmp olt double %1, -1.000000e+00		; <i1> [#uses=1]
+	br i1 %2, label %bb, label %return
+
+return:		; preds = %bb
+	ret void
+; CHECK: @test2
+; CHECK: icmp
+}
+
+
+define void @test3() nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%x.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]
+	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind
+	%1 = fadd double %x.0.reg2mem.0, 1.000000e+00
+	%2 = fcmp olt double %1, -1.000000e+00
+	br i1 %2, label %bb, label %return
+
+return:
+	ret void
+; CHECK: @test3
+; CHECK: fcmp
+}
+
+define void @test4() nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%x.0.reg2mem.0 = phi double [ 40.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=2]
+	%0 = tail call i32 @foo(double %x.0.reg2mem.0) nounwind		; <i32> [#uses=0]
+	%1 = fadd double %x.0.reg2mem.0, -1.000000e+00		; <double> [#uses=2]
+	%2 = fcmp olt double %1, 1.000000e+00		; <i1> [#uses=1]
+	br i1 %2, label %bb, label %return
+
+return:
+	ret void
+; CHECK: @test4
+; CHECK: fcmp
+}
+
+; PR6761
+define void @test5() nounwind {
+; <label>:0
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  %2 = phi double [ 9.000000e+00, %0 ], [ %4, %1 ] ; <double> [#uses=1]
+  %3 = tail call i32 @foo(double 0.0)              ; <i32> [#uses=0]
+  %4 = fadd double %2, -1.000000e+00              ; <double> [#uses=2]
+  %5 = fcmp ult double %4, 0.000000e+00           ; <i1> [#uses=1]
+  br i1 %5, label %exit, label %1
+
+exit:
+  ret void
+  
+; CHECK: @test5
+; CHECK: icmp eq i32 {{.*}}, 10
+; CHECK-NEXT: br i1
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/gep-with-mul-base.ll b/final/test/Transforms/IndVarSimplify/gep-with-mul-base.ll
new file mode 100644
index 00000000000..19d54ff2a22
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/gep-with-mul-base.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: grep add %t | count 6
+; RUN: grep sub %t | count 2
+; RUN: grep mul %t | count 6
+
+define void @foo(i64 %n, i64 %m, i64 %o, double* nocapture %p) nounwind {
+entry:
+	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	%tmp1 = mul i64 %n, 37		; <i64> [#uses=1]
+	%tmp2 = mul i64 %tmp1, %m		; <i64> [#uses=1]
+	%tmp3 = mul i64 %tmp2, %o		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.nph
+	%i.01 = phi i64 [ %tmp3, %bb.nph ], [ %tmp13, %bb ]		; <i64> [#uses=3]
+	%tmp9 = getelementptr double* %p, i64 %i.01		; <double*> [#uses=1]
+	%tmp10 = load double* %tmp9, align 8		; <double> [#uses=1]
+	%tmp11 = fdiv double %tmp10, 2.100000e+00		; <double> [#uses=1]
+	store double %tmp11, double* %tmp9, align 8
+	%tmp13 = add i64 %i.01, 1		; <i64> [#uses=2]
+	%tmp14 = icmp slt i64 %tmp13, %n		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb, label %return.loopexit
+
+return.loopexit:		; preds = %bb
+	br label %return
+
+return:		; preds = %return.loopexit, %entry
+	ret void
+}
+define void @bar(i64 %n, i64 %m, i64 %o, i64 %q, double* nocapture %p) nounwind {
+entry:
+	%tmp = icmp sgt i64 %n, 0		; <i1> [#uses=1]
+	br i1 %tmp, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	%tmp1 = mul i64 %n, %q		; <i64> [#uses=1]
+	%tmp2 = mul i64 %tmp1, %m		; <i64> [#uses=1]
+	%tmp3 = mul i64 %tmp2, %o		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.nph
+	%i.01 = phi i64 [ %tmp3, %bb.nph ], [ %tmp13, %bb ]		; <i64> [#uses=3]
+	%tmp9 = getelementptr double* %p, i64 %i.01		; <double*> [#uses=1]
+	%tmp10 = load double* %tmp9, align 8		; <double> [#uses=1]
+	%tmp11 = fdiv double %tmp10, 2.100000e+00		; <double> [#uses=1]
+	store double %tmp11, double* %tmp9, align 8
+	%tmp13 = add i64 %i.01, 1		; <i64> [#uses=2]
+	%tmp14 = icmp slt i64 %tmp13, %n		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb, label %return.loopexit
+
+return.loopexit:		; preds = %bb
+	br label %return
+
+return:		; preds = %return.loopexit, %entry
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/indirectbr.ll b/final/test/Transforms/IndVarSimplify/indirectbr.ll
new file mode 100644
index 00000000000..a208ded22ed
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/indirectbr.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -indvars -S -disable-output
+
+; PR5758
+define zeroext i1 @foo() nounwind {
+entry:
+  indirectbr i8* undef, [label %"202", label %"133"]
+
+"132":                                            ; preds = %"133"
+  %0 = add i32 %1, 1                              ; <i32> [#uses=1]
+  br label %"133"
+
+"133":                                            ; preds = %"132", %entry
+  %1 = phi i32 [ %0, %"132" ], [ 0, %entry ]      ; <i32> [#uses=2]
+  %2 = icmp eq i32 %1, 4                          ; <i1> [#uses=1]
+  br i1 %2, label %"134", label %"132"
+
+"134":                                            ; preds = %"133"
+  ret i1 true
+
+"202":                                            ; preds = %entry
+  ret i1 false
+}
+
+; PR7333
+define void @__atomvec_module__put_vrml_bonds() nounwind {
+bb7.preheader:                                    ; preds = %entry
+  indirectbr i8* undef, [label %bb14, label %bb16]
+
+bb14:                                             ; preds = %bb14, %bb7.preheader
+  br label %bb16
+
+bb16:                                             ; preds = %bb16, %bb14, %bb7.preheader
+  %S.31.0 = phi i64 [ %3, %bb16 ], [ 1, %bb7.preheader ], [ 1, %bb14 ] ; <i64> [#uses=2]
+  %0 = add nsw i64 %S.31.0, -1                    ; <i64> [#uses=1]
+  %1 = getelementptr inbounds [3 x double]* undef, i64 0, i64 %0 ; <double*> [#uses=1]
+  %2 = load double* %1, align 8                   ; <double> [#uses=0]
+  %3 = add nsw i64 %S.31.0, 1                     ; <i64> [#uses=1]
+  br label %bb16
+}
diff --git a/final/test/Transforms/IndVarSimplify/interesting-invoke-use.ll b/final/test/Transforms/IndVarSimplify/interesting-invoke-use.ll
new file mode 100644
index 00000000000..8adc0e52577
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/interesting-invoke-use.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -indvars
+
+; An invoke has a result value which is used in an "Interesting"
+; expression inside the loop. IndVars should be able to rewrite
+; the expression in the correct place.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+  %struct.string___XUB = type { i32, i32 }
+  %struct.string___XUP = type { [0 x i8]*, %struct.string___XUB* }
+@.str7 = external constant [24 x i8]            ; <[24 x i8]*> [#uses=1]
+@C.17.316 = external constant %struct.string___XUB              ; <%struct.string___XUB*> [#uses=1]
+
+define void @_ada_c35503g() {
+entry:
+  br label %bb
+
+bb:             ; preds = %bb, %entry
+  br i1 false, label %bb65.loopexit, label %bb
+
+bb65.loopexit:          ; preds = %bb
+  br label %bb123
+
+bb123:          ; preds = %bb178, %bb65.loopexit
+  %i.0 = phi i32 [ %3, %bb178 ], [ 0, %bb65.loopexit ]          ; <i32> [#uses=3]
+  %0 = invoke i32 @report__ident_int(i32 1)
+      to label %invcont127 unwind label %lpad266                ; <i32> [#uses=1]
+
+invcont127:             ; preds = %bb123
+  %1 = sub i32 %i.0, %0         ; <i32> [#uses=1]
+  %2 = icmp eq i32 0, %1                ; <i1> [#uses=1]
+  br i1 %2, label %bb178, label %bb128
+
+bb128:          ; preds = %invcont127
+  invoke void @system__img_int__image_integer(%struct.string___XUP* noalias sret null, i32 %i.0)
+      to label %invcont129 unwind label %lpad266
+
+invcont129:             ; preds = %bb128
+  invoke void @system__string_ops__str_concat(%struct.string___XUP* noalias sret null, [0 x i8]* bitcast ([24 x i8]* @.str7 to [0 x i8]*), %struct.string___XUB* @C.17.316, [0 x i8]* null, %struct.string___XUB* null)
+      to label %invcont138 unwind label %lpad266
+
+invcont138:             ; preds = %invcont129
+  unreachable
+
+bb178:          ; preds = %invcont127
+  %3 = add i32 %i.0, 1          ; <i32> [#uses=1]
+  br label %bb123
+
+lpad266:                ; preds = %invcont129, %bb128, %bb123
+  unreachable
+}
+
+declare void @system__img_int__image_integer(%struct.string___XUP* noalias sret, i32)
+
+declare void @system__string_ops__str_concat(%struct.string___XUP* noalias sret, [0 x i8]*, %struct.string___XUB*, [0 x i8]*, %struct.string___XUB*)
+
+declare i32 @report__ident_int(i32)
diff --git a/final/test/Transforms/IndVarSimplify/iterationCount_zext_or_trunc.ll b/final/test/Transforms/IndVarSimplify/iterationCount_zext_or_trunc.ll
new file mode 100644
index 00000000000..02145d1c5e7
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/iterationCount_zext_or_trunc.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -indvars -disable-output
+
+; ModuleID = 'testcase.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+
+define i32 @testcase(i5 zeroext  %k) {
+entry:
+	br label %bb2
+
+bb:		; preds = %bb2
+	%tmp1 = add i32 %tmp2, %result		; <i32> [#uses=1]
+	%indvar_next1 = add i5 %k_0, 1		; <i5> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb, %entry
+	%k_0 = phi i5 [ 0, %entry ], [ %indvar_next1, %bb ]		; <i5> [#uses=2]
+	%result = phi i32 [ 0, %entry ], [ %tmp1, %bb ]		; <i32> [#uses=2]
+	%tmp2 = zext i5 %k_0 to i32		; <i32> [#uses=1]
+	%exitcond = icmp eq i32 %tmp2, 16		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb3, label %bb
+
+bb3:		; preds = %bb2
+	ret i32 %result
+}
diff --git a/final/test/Transforms/IndVarSimplify/iv-sext.ll b/final/test/Transforms/IndVarSimplify/iv-sext.ll
new file mode 100644
index 00000000000..55165022109
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/iv-sext.ll
@@ -0,0 +1,143 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: grep {= sext} %t | count 4
+; RUN: grep {phi i64} %t | count 2
+
+; Indvars should be able to promote the hiPart induction variable in the
+; inner loop to i64.
+; TODO: it should promote hiPart to i64 in the outer loop too.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define void @t(float* %pTmp1, float* %peakWeight, float* %nrgReducePeakrate, i32 %bandEdgeIndex, float %tmp1) nounwind {
+entry:
+	%tmp = load float* %peakWeight, align 4		; <float> [#uses=1]
+	%tmp2 = icmp sgt i32 %bandEdgeIndex, 0		; <i1> [#uses=1]
+	br i1 %tmp2, label %bb.nph22, label %return
+
+bb.nph22:		; preds = %entry
+	%tmp3 = add i32 %bandEdgeIndex, -1		; <i32> [#uses=2]
+	br label %bb
+
+bb:		; preds = %bb8, %bb.nph22
+	%distERBhi.121 = phi float [ %distERBhi.2.lcssa, %bb8 ], [ 0.000000e+00, %bb.nph22 ]		; <float> [#uses=2]
+	%distERBlo.120 = phi float [ %distERBlo.0.lcssa, %bb8 ], [ 0.000000e+00, %bb.nph22 ]		; <float> [#uses=2]
+	%hiPart.119 = phi i32 [ %hiPart.0.lcssa, %bb8 ], [ 0, %bb.nph22 ]		; <i32> [#uses=3]
+	%loPart.118 = phi i32 [ %loPart.0.lcssa, %bb8 ], [ 0, %bb.nph22 ]		; <i32> [#uses=2]
+	%peakCount.117 = phi float [ %peakCount.2.lcssa, %bb8 ], [ %tmp, %bb.nph22 ]		; <float> [#uses=2]
+	%part.016 = phi i32 [ %tmp46, %bb8 ], [ 0, %bb.nph22 ]		; <i32> [#uses=5]
+	%tmp4 = icmp sgt i32 %part.016, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb1, label %bb3.preheader
+
+bb1:		; preds = %bb
+	%tmp5 = add i32 %part.016, -1		; <i32> [#uses=1]
+	%tmp6 = sext i32 %tmp5 to i64		; <i64> [#uses=1]
+	%tmp7 = getelementptr float* %pTmp1, i64 %tmp6		; <float*> [#uses=1]
+	%tmp8 = load float* %tmp7, align 4		; <float> [#uses=1]
+	%tmp9 = fadd float %tmp8, %distERBlo.120		; <float> [#uses=1]
+	%tmp10 = add i32 %part.016, -1		; <i32> [#uses=1]
+	%tmp11 = sext i32 %tmp10 to i64		; <i64> [#uses=1]
+	%tmp12 = getelementptr float* %pTmp1, i64 %tmp11		; <float*> [#uses=1]
+	%tmp13 = load float* %tmp12, align 4		; <float> [#uses=1]
+	%tmp14 = fsub float %distERBhi.121, %tmp13		; <float> [#uses=1]
+	br label %bb3.preheader
+
+bb3.preheader:		; preds = %bb1, %bb
+	%distERBlo.0.ph = phi float [ %distERBlo.120, %bb ], [ %tmp9, %bb1 ]		; <float> [#uses=3]
+	%distERBhi.0.ph = phi float [ %distERBhi.121, %bb ], [ %tmp14, %bb1 ]		; <float> [#uses=3]
+	%tmp15 = fcmp ogt float %distERBlo.0.ph, 2.500000e+00		; <i1> [#uses=1]
+	br i1 %tmp15, label %bb.nph, label %bb5.preheader
+
+bb.nph:		; preds = %bb3.preheader
+	br label %bb2
+
+bb2:		; preds = %bb3, %bb.nph
+	%distERBlo.03 = phi float [ %tmp19, %bb3 ], [ %distERBlo.0.ph, %bb.nph ]		; <float> [#uses=1]
+	%loPart.02 = phi i32 [ %tmp24, %bb3 ], [ %loPart.118, %bb.nph ]		; <i32> [#uses=3]
+	%peakCount.01 = phi float [ %tmp23, %bb3 ], [ %peakCount.117, %bb.nph ]		; <float> [#uses=1]
+	%tmp16 = sext i32 %loPart.02 to i64		; <i64> [#uses=1]
+	%tmp17 = getelementptr float* %pTmp1, i64 %tmp16		; <float*> [#uses=1]
+	%tmp18 = load float* %tmp17, align 4		; <float> [#uses=1]
+	%tmp19 = fsub float %distERBlo.03, %tmp18		; <float> [#uses=3]
+	%tmp20 = sext i32 %loPart.02 to i64		; <i64> [#uses=1]
+	%tmp21 = getelementptr float* %peakWeight, i64 %tmp20		; <float*> [#uses=1]
+	%tmp22 = load float* %tmp21, align 4		; <float> [#uses=1]
+	%tmp23 = fsub float %peakCount.01, %tmp22		; <float> [#uses=2]
+	%tmp24 = add i32 %loPart.02, 1		; <i32> [#uses=2]
+	br label %bb3
+
+bb3:		; preds = %bb2
+	%tmp25 = fcmp ogt float %tmp19, 2.500000e+00		; <i1> [#uses=1]
+	br i1 %tmp25, label %bb2, label %bb3.bb5.preheader_crit_edge
+
+bb3.bb5.preheader_crit_edge:		; preds = %bb3
+	%tmp24.lcssa = phi i32 [ %tmp24, %bb3 ]		; <i32> [#uses=1]
+	%tmp23.lcssa = phi float [ %tmp23, %bb3 ]		; <float> [#uses=1]
+	%tmp19.lcssa = phi float [ %tmp19, %bb3 ]		; <float> [#uses=1]
+	br label %bb5.preheader
+
+bb5.preheader:		; preds = %bb3.bb5.preheader_crit_edge, %bb3.preheader
+	%distERBlo.0.lcssa = phi float [ %tmp19.lcssa, %bb3.bb5.preheader_crit_edge ], [ %distERBlo.0.ph, %bb3.preheader ]		; <float> [#uses=2]
+	%loPart.0.lcssa = phi i32 [ %tmp24.lcssa, %bb3.bb5.preheader_crit_edge ], [ %loPart.118, %bb3.preheader ]		; <i32> [#uses=1]
+	%peakCount.0.lcssa = phi float [ %tmp23.lcssa, %bb3.bb5.preheader_crit_edge ], [ %peakCount.117, %bb3.preheader ]		; <float> [#uses=2]
+	%.not10 = fcmp olt float %distERBhi.0.ph, 2.500000e+00		; <i1> [#uses=1]
+	%tmp26 = icmp sgt i32 %tmp3, %hiPart.119		; <i1> [#uses=1]
+	%or.cond11 = and i1 %tmp26, %.not10		; <i1> [#uses=1]
+	br i1 %or.cond11, label %bb.nph12, label %bb7
+
+bb.nph12:		; preds = %bb5.preheader
+	br label %bb4
+
+bb4:		; preds = %bb5, %bb.nph12
+	%distERBhi.29 = phi float [ %tmp30, %bb5 ], [ %distERBhi.0.ph, %bb.nph12 ]		; <float> [#uses=1]
+	%hiPart.08 = phi i32 [ %tmp31, %bb5 ], [ %hiPart.119, %bb.nph12 ]		; <i32> [#uses=2]
+	%peakCount.27 = phi float [ %tmp35, %bb5 ], [ %peakCount.0.lcssa, %bb.nph12 ]		; <float> [#uses=1]
+	%tmp27 = sext i32 %hiPart.08 to i64		; <i64> [#uses=1]
+	%tmp28 = getelementptr float* %pTmp1, i64 %tmp27		; <float*> [#uses=1]
+	%tmp29 = load float* %tmp28, align 4		; <float> [#uses=1]
+	%tmp30 = fadd float %tmp29, %distERBhi.29		; <float> [#uses=3]
+	%tmp31 = add i32 %hiPart.08, 1		; <i32> [#uses=4]
+	%tmp32 = sext i32 %tmp31 to i64		; <i64> [#uses=1]
+	%tmp33 = getelementptr float* %peakWeight, i64 %tmp32		; <float*> [#uses=1]
+	%tmp34 = load float* %tmp33, align 4		; <float> [#uses=1]
+	%tmp35 = fadd float %tmp34, %peakCount.27		; <float> [#uses=2]
+	br label %bb5
+
+bb5:		; preds = %bb4
+	%.not = fcmp olt float %tmp30, 2.500000e+00		; <i1> [#uses=1]
+	%tmp36 = icmp sgt i32 %tmp3, %tmp31		; <i1> [#uses=1]
+	%or.cond = and i1 %tmp36, %.not		; <i1> [#uses=1]
+	br i1 %or.cond, label %bb4, label %bb5.bb7_crit_edge
+
+bb5.bb7_crit_edge:		; preds = %bb5
+	%tmp35.lcssa = phi float [ %tmp35, %bb5 ]		; <float> [#uses=1]
+	%tmp31.lcssa = phi i32 [ %tmp31, %bb5 ]		; <i32> [#uses=1]
+	%tmp30.lcssa = phi float [ %tmp30, %bb5 ]		; <float> [#uses=1]
+	br label %bb7
+
+bb7:		; preds = %bb5.bb7_crit_edge, %bb5.preheader
+	%distERBhi.2.lcssa = phi float [ %tmp30.lcssa, %bb5.bb7_crit_edge ], [ %distERBhi.0.ph, %bb5.preheader ]		; <float> [#uses=2]
+	%hiPart.0.lcssa = phi i32 [ %tmp31.lcssa, %bb5.bb7_crit_edge ], [ %hiPart.119, %bb5.preheader ]		; <i32> [#uses=1]
+	%peakCount.2.lcssa = phi float [ %tmp35.lcssa, %bb5.bb7_crit_edge ], [ %peakCount.0.lcssa, %bb5.preheader ]		; <float> [#uses=2]
+	%tmp37 = fadd float %distERBlo.0.lcssa, %distERBhi.2.lcssa		; <float> [#uses=1]
+	%tmp38 = fdiv float %peakCount.2.lcssa, %tmp37		; <float> [#uses=1]
+	%tmp39 = fmul float %tmp38, %tmp1		; <float> [#uses=2]
+	%tmp40 = fmul float %tmp39, %tmp39		; <float> [#uses=2]
+	%tmp41 = fmul float %tmp40, %tmp40		; <float> [#uses=1]
+	%tmp42 = fadd float %tmp41, 1.000000e+00		; <float> [#uses=1]
+	%tmp43 = fdiv float 1.000000e+00, %tmp42		; <float> [#uses=1]
+	%tmp44 = sext i32 %part.016 to i64		; <i64> [#uses=1]
+	%tmp45 = getelementptr float* %nrgReducePeakrate, i64 %tmp44		; <float*> [#uses=1]
+	store float %tmp43, float* %tmp45, align 4
+	%tmp46 = add i32 %part.016, 1		; <i32> [#uses=2]
+	br label %bb8
+
+bb8:		; preds = %bb7
+	%tmp47 = icmp slt i32 %tmp46, %bandEdgeIndex		; <i1> [#uses=1]
+	br i1 %tmp47, label %bb, label %bb8.return_crit_edge
+
+bb8.return_crit_edge:		; preds = %bb8
+	br label %return
+
+return:		; preds = %bb8.return_crit_edge, %entry
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/iv-zext.ll b/final/test/Transforms/IndVarSimplify/iv-zext.ll
new file mode 100644
index 00000000000..1cc559fd79f
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/iv-zext.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: not grep and %t
+; RUN: not grep zext %t
+
+target datalayout = "-p:64:64:64"
+
+define void @foo(double* %d, i64 %n) nounwind {
+entry:
+	br label %loop
+
+loop:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+	%indvar.i8 = and i64 %indvar, 255
+	%t0 = getelementptr double* %d, i64 %indvar.i8
+	%t1 = load double* %t0
+	%t2 = fmul double %t1, 0.1
+	store double %t2, double* %t0
+	%indvar.i24 = and i64 %indvar, 16777215
+	%t3 = getelementptr double* %d, i64 %indvar.i24
+	%t4 = load double* %t3
+	%t5 = fmul double %t4, 2.3
+	store double %t5, double* %t3
+	%t6 = getelementptr double* %d, i64 %indvar
+	%t7 = load double* %t6
+	%t8 = fmul double %t7, 4.5
+	store double %t8, double* %t6
+	%indvar.next = add i64 %indvar, 1
+	%exitcond = icmp eq i64 %indvar.next, 10
+	br i1 %exitcond, label %return, label %loop
+
+return:
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/lftr-other-uses.ll b/final/test/Transforms/IndVarSimplify/lftr-other-uses.ll
new file mode 100644
index 00000000000..09ec237cfc3
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/lftr-other-uses.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -indvars -disable-output
+
+; Don't RAUW the loop's original comparison instruction if it has
+; other uses which aren't dominated by the new comparison instruction.
+
+	%struct.DecRefPicMarking_s = type { i32, i32, i32, i32, i32, %struct.DecRefPicMarking_s* }
+	%struct.datapartition = type { %typedef.Bitstream*, %typedef.DecodingEnvironment, i32 (%struct.syntaxelement*, %struct.img_par*, %struct.inp_par*, %struct.datapartition*)* }
+	%struct.img_par = type { i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [16 x [16 x i16]], [6 x [32 x i32]], [16 x [16 x i32]], [4 x [12 x [4 x [4 x i32]]]], [16 x i32], i32**, i32*, i32***, i32**, i32, i32, i32, i32, %typedef.Slice*, %struct.macroblock*, i32, i32, i32, i32, i32, i32, i32**, %struct.DecRefPicMarking_s*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32***, i32***, i32****, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.timeb, %struct.timeb, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.inp_par = type { [100 x i8], [100 x i8], [100 x i8], i32, i32, i32, i32, i32, i32, i32 }
+	%struct.macroblock = type { i32, i32, i32, %struct.macroblock*, %struct.macroblock*, i32, [2 x [4 x [4 x [2 x i32]]]], i32, i64, i64, i32, i32, [4 x i32], [4 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.syntaxelement = type { i32, i32, i32, i32, i32, i32, i32, i32, void (i32, i32, i32*, i32*)*, void (%struct.syntaxelement*, %struct.inp_par*, %struct.img_par*, %typedef.DecodingEnvironment*)* }
+	%struct.timeb = type { i32, i16, i16, i16 }
+	%typedef.BiContextType = type { i16, i8 }
+	%typedef.Bitstream = type { i32, i32, i32, i32, i8*, i32 }
+	%typedef.DecodingEnvironment = type { i32, i32, i32, i32, i32, i8*, i32* }
+	%typedef.MotionInfoContexts = type { [4 x [11 x %typedef.BiContextType]], [2 x [9 x %typedef.BiContextType]], [2 x [10 x %typedef.BiContextType]], [2 x [6 x %typedef.BiContextType]], [4 x %typedef.BiContextType], [4 x %typedef.BiContextType], [3 x %typedef.BiContextType] }
+	%typedef.Slice = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.datapartition*, %typedef.MotionInfoContexts*, %typedef.TextureInfoContexts*, i32, i32*, i32*, i32*, i32, i32*, i32*, i32*, i32 (%struct.img_par*, %struct.inp_par*)*, i32, i32, i32, i32 }
+	%typedef.TextureInfoContexts = type { [2 x %typedef.BiContextType], [4 x %typedef.BiContextType], [3 x [4 x %typedef.BiContextType]], [10 x [4 x %typedef.BiContextType]], [10 x [15 x %typedef.BiContextType]], [10 x [15 x %typedef.BiContextType]], [10 x [5 x %typedef.BiContextType]], [10 x [5 x %typedef.BiContextType]], [10 x [15 x %typedef.BiContextType]], [10 x [15 x %typedef.BiContextType]] }
+
+define void @readCBP_CABAC(%struct.syntaxelement* %se, %struct.inp_par* %inp, %struct.img_par* %img.1, %typedef.DecodingEnvironment* %dep_dp) {
+entry:
+	br label %loopentry.0
+
+loopentry.0:		; preds = %loopentry.1, %entry
+	%mb_y.0 = phi i32 [ 0, %entry ], [ %tmp.152, %loopentry.1 ]		; <i32> [#uses=2]
+	%tmp.14 = icmp sle i32 %mb_y.0, 3		; <i1> [#uses=2]
+	%tmp.15 = zext i1 %tmp.14 to i32		; <i32> [#uses=0]
+	br i1 %tmp.14, label %loopentry.1, label %loopexit.0
+
+loopentry.1:		; preds = %loopentry.0
+	%tmp.152 = add i32 %mb_y.0, 2		; <i32> [#uses=1]
+	br label %loopentry.0
+
+loopexit.0:		; preds = %loopentry.0
+	unreachable
+}
diff --git a/final/test/Transforms/IndVarSimplify/lftr-promote.ll b/final/test/Transforms/IndVarSimplify/lftr-promote.ll
new file mode 100644
index 00000000000..c4ecc845562
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/lftr-promote.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -indvars -S | grep add | count 1
+
+; Indvars should be able to compute the exit value of this loop
+; without any additional arithmetic. The only add needed should
+; be the canonical IV increment.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define void @foo(double* %p, i32 %n) nounwind {
+entry:
+	%0 = icmp sgt i32 %n, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	br label %bb2
+
+bb2:		; preds = %bb3, %bb.nph
+	%i.01 = phi i32 [ %7, %bb3 ], [ 0, %bb.nph ]		; <i32> [#uses=3]
+	%1 = sext i32 %i.01 to i64		; <i64> [#uses=1]
+	%2 = getelementptr double* %p, i64 %1		; <double*> [#uses=1]
+	%3 = load double* %2, align 8		; <double> [#uses=1]
+	%4 = fmul double %3, 1.100000e+00		; <double> [#uses=1]
+	%5 = sext i32 %i.01 to i64		; <i64> [#uses=1]
+	%6 = getelementptr double* %p, i64 %5		; <double*> [#uses=1]
+	store double %4, double* %6, align 8
+	%7 = add i32 %i.01, 1		; <i32> [#uses=2]
+	br label %bb3
+
+bb3:		; preds = %bb2
+	%8 = icmp slt i32 %7, %n		; <i1> [#uses=1]
+	br i1 %8, label %bb2, label %bb3.return_crit_edge
+
+bb3.return_crit_edge:		; preds = %bb3
+	br label %return
+
+return:		; preds = %bb3.return_crit_edge, %entry
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/lftr_simple.ll b/final/test/Transforms/IndVarSimplify/lftr_simple.ll
new file mode 100644
index 00000000000..e373013609b
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/lftr_simple.ll
@@ -0,0 +1,22 @@
+; LFTR should eliminate the need for the computation of i*i completely.  It 
+; is only used to compute the exit value.
+; RUN: opt < %s -indvars -dce -S | not grep mul
+
+@A = external global i32                ; <i32*> [#uses=1]
+
+define i32 @quadratic_setlt() {
+entry:
+        br label %loop
+
+loop:           ; preds = %loop, %entry
+        %i = phi i32 [ 7, %entry ], [ %i.next, %loop ]          ; <i32> [#uses=5]
+        %i.next = add i32 %i, 1         ; <i32> [#uses=1]
+        store i32 %i, i32* @A
+        %i2 = mul i32 %i, %i            ; <i32> [#uses=1]
+        %c = icmp slt i32 %i2, 1000             ; <i1> [#uses=1]
+        br i1 %c, label %loop, label %loopexit
+
+loopexit:               ; preds = %loop
+        ret i32 %i
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/loop_evaluate10.ll b/final/test/Transforms/IndVarSimplify/loop_evaluate10.ll
new file mode 100644
index 00000000000..269478a5ed0
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/loop_evaluate10.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -indvars -S \
+; RUN:   | grep {%b.1 = phi i32 \\\[ 2, %bb \\\], \\\[ 1, %bb2 \\\]}
+
+; This loop has multiple exits, and the value of %b1 depends on which
+; exit is taken. Indvars should correctly compute the exit values.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-pc-linux-gnu"
+	%struct..0anon = type <{ i8, [3 x i8] }>
+
+define i32 @main() nounwind {
+entry:
+	br label %bb2
+
+bb2:		; preds = %bb, %entry
+	%sdata.0 = phi i32 [ 1, %entry ], [ %ins10, %bb ]		; <i32> [#uses=2]
+	%b.0 = phi i32 [ 0, %entry ], [ %t0, %bb ]		; <i32> [#uses=2]
+	%tmp6 = trunc i32 %sdata.0 to i8		; <i8> [#uses=2]
+	%t2 = and i8 %tmp6, 1		; <i8> [#uses=1]
+	%t3 = icmp eq i8 %t2, 0		; <i1> [#uses=1]
+	%t4 = xor i8 %tmp6, 1		; <i8> [#uses=1]
+	%tmp8 = zext i8 %t4 to i32		; <i32> [#uses=1]
+	%mask9 = and i32 %sdata.0, -256		; <i32> [#uses=1]
+	%ins10 = or i32 %tmp8, %mask9		; <i32> [#uses=1]
+	br i1 %t3, label %bb3, label %bb
+
+bb:		; preds = %bb2
+	%t0 = add i32 %b.0, 1		; <i32> [#uses=3]
+	%t1 = icmp sgt i32 %t0, 100		; <i1> [#uses=1]
+	br i1 %t1, label %bb3, label %bb2
+
+bb3:		; preds = %bb, %bb2
+	%b.1 = phi i32 [ %t0, %bb ], [ %b.0, %bb2 ]		; <i32> [#uses=1]
+	%t5 = icmp eq i32 %b.1, 1		; <i1> [#uses=1]
+	br i1 %t5, label %bb5, label %bb4
+
+bb4:		; preds = %bb3
+	tail call void @abort() noreturn nounwind
+	unreachable
+
+bb5:		; preds = %bb3
+	ret i32 0
+}
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
+
+declare void @abort() noreturn nounwind
diff --git a/final/test/Transforms/IndVarSimplify/loop_evaluate11.ll b/final/test/Transforms/IndVarSimplify/loop_evaluate11.ll
new file mode 100644
index 00000000000..40b785ea4d6
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/loop_evaluate11.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -domfrontier -indvars -loop-deletion
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+define void @slap_sl_mem_create() nounwind {
+entry:
+	br label %bb15
+
+bb15:		; preds = %bb15, %entry
+	%order_end.0 = phi i32 [ 0, %entry ], [ %tmp, %bb15 ]		; <i32> [#uses=1]
+	%tmp = add i32 %order_end.0, 1		; <i32> [#uses=2]
+	br i1 undef, label %bb17, label %bb15
+
+bb17:		; preds = %bb17, %bb15
+	%order_start.0 = phi i32 [ %tmp1, %bb17 ], [ 0, %bb15 ]		; <i32> [#uses=2]
+	%tmp1 = add i32 %order_start.0, 1		; <i32> [#uses=2]
+	%tmp2 = icmp eq i32 undef, 0		; <i1> [#uses=1]
+	br i1 %tmp2, label %bb18, label %bb17
+
+bb18:		; preds = %bb17
+	%tmp3 = sub i32 %tmp, %tmp1		; <i32> [#uses=0]
+	br label %bb59
+
+bb51:		; preds = %bb59
+	%tmp4 = add i32 %order_start.0, 2		; <i32> [#uses=1]
+	%tmp5 = add i32 %tmp4, undef		; <i32> [#uses=1]
+	%tmp6 = lshr i32 undef, %tmp5		; <i32> [#uses=1]
+	%tmp7 = icmp eq i32 %tmp6, 0		; <i1> [#uses=1]
+	br i1 %tmp7, label %bb52, label %bb59
+
+bb59:		; preds = %bb51, %bb18
+	br label %bb51
+
+bb52:		; preds = %bb51
+	unreachable
+}
diff --git a/final/test/Transforms/IndVarSimplify/loop_evaluate7.ll b/final/test/Transforms/IndVarSimplify/loop_evaluate7.ll
new file mode 100644
index 00000000000..b9c0b12f2d9
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/loop_evaluate7.ll
@@ -0,0 +1,61 @@
+; RUN: opt < %s -indvars
+; PR4436
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define i8* @string_expandtabs(i32 %n, i8* %m) nounwind {
+entry:
+	br i1 undef, label %bb33, label %bb1
+
+bb1:		; preds = %entry
+	br i1 undef, label %overflow1, label %bb15
+
+bb15:		; preds = %bb1
+	br i1 undef, label %bb33, label %bb17
+
+bb17:		; preds = %bb15
+	br label %bb30
+
+bb19:		; preds = %bb30
+	br i1 undef, label %bb20, label %bb29
+
+bb20:		; preds = %bb19
+	%0 = load i32* undef, align 4		; <i32> [#uses=1]
+	%1 = sub i32 %0, %n		; <i32> [#uses=1]
+	br label %bb23
+
+bb21:		; preds = %bb23
+	%2 = icmp ult i8* %q.0, %m		; <i1> [#uses=1]
+	br i1 %2, label %bb22, label %overflow2
+
+bb22:		; preds = %bb21
+	%3 = getelementptr i8* %q.0, i32 1		; <i8*> [#uses=1]
+	br label %bb23
+
+bb23:		; preds = %bb22, %bb20
+	%i.2 = phi i32 [ %1, %bb20 ], [ %4, %bb22 ]		; <i32> [#uses=1]
+	%q.0 = phi i8* [ undef, %bb20 ], [ %3, %bb22 ]		; <i8*> [#uses=3]
+	%4 = add i32 %i.2, -1		; <i32> [#uses=2]
+	%5 = icmp eq i32 %4, -1		; <i1> [#uses=1]
+	br i1 %5, label %bb29, label %bb21
+
+bb29:		; preds = %bb23, %bb19
+	%q.1 = phi i8* [ undef, %bb19 ], [ %q.0, %bb23 ]		; <i8*> [#uses=0]
+	br label %bb30
+
+bb30:		; preds = %bb29, %bb17
+	br i1 undef, label %bb19, label %bb33
+
+overflow2:		; preds = %bb21
+	br i1 undef, label %bb32, label %overflow1
+
+bb32:		; preds = %overflow2
+	br label %overflow1
+
+overflow1:		; preds = %bb32, %overflow2, %bb1
+	ret i8* null
+
+bb33:		; preds = %bb30, %bb15, %entry
+	ret i8* undef
+}
diff --git a/final/test/Transforms/IndVarSimplify/loop_evaluate8.ll b/final/test/Transforms/IndVarSimplify/loop_evaluate8.ll
new file mode 100644
index 00000000000..2a9d2059623
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/loop_evaluate8.ll
@@ -0,0 +1,63 @@
+; RUN: opt < %s -indvars -S | not grep select
+
+; This loop has backedge-taken-count zero. Indvars shouldn't expand any
+; instructions to compute a trip count.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define i8* @string_expandtabs() nounwind {
+entry:
+	br i1 undef, label %bb33, label %bb1
+
+bb1:		; preds = %entry
+	br i1 undef, label %overflow1, label %bb15
+
+bb15:		; preds = %bb1
+	br i1 undef, label %bb33, label %bb17
+
+bb17:		; preds = %bb15
+	br label %bb30
+
+bb19:		; preds = %bb30
+	br i1 undef, label %bb20, label %bb29
+
+bb20:		; preds = %bb19
+	%0 = load i32* undef, align 4		; <i32> [#uses=1]
+	%1 = sub i32 %0, undef		; <i32> [#uses=1]
+	br label %bb23
+
+bb21:		; preds = %bb23
+	%2 = icmp ult i8* %q.0, undef		; <i1> [#uses=1]
+	br i1 %2, label %bb22, label %overflow2
+
+bb22:		; preds = %bb21
+	%3 = getelementptr i8* %q.0, i32 1		; <i8*> [#uses=1]
+	br label %bb23
+
+bb23:		; preds = %bb22, %bb20
+	%i.2 = phi i32 [ %1, %bb20 ], [ %4, %bb22 ]		; <i32> [#uses=1]
+	%q.0 = phi i8* [ undef, %bb20 ], [ %3, %bb22 ]		; <i8*> [#uses=3]
+	%4 = add i32 %i.2, -1		; <i32> [#uses=2]
+	%5 = icmp eq i32 %4, -1		; <i1> [#uses=1]
+	br i1 %5, label %bb29, label %bb21
+
+bb29:		; preds = %bb23, %bb19
+	%q.1 = phi i8* [ undef, %bb19 ], [ %q.0, %bb23 ]		; <i8*> [#uses=0]
+	br label %bb30
+
+bb30:		; preds = %bb29, %bb17
+	br i1 undef, label %bb19, label %bb33
+
+overflow2:		; preds = %bb21
+	br i1 undef, label %bb32, label %overflow1
+
+bb32:		; preds = %overflow2
+	br label %overflow1
+
+overflow1:		; preds = %bb32, %overflow2, %bb1
+	ret i8* null
+
+bb33:		; preds = %bb30, %bb15, %entry
+	ret i8* undef
+}
diff --git a/final/test/Transforms/IndVarSimplify/loop_evaluate9.ll b/final/test/Transforms/IndVarSimplify/loop_evaluate9.ll
new file mode 100644
index 00000000000..8184a73f89e
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/loop_evaluate9.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: grep {\[%\]tmp7 = icmp eq i8 -28, -28} %t
+; RUN: grep {\[%\]tmp8 = icmp eq i8 63, 63} %t
+; PR4477
+
+; Indvars should compute the exit values in loop.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+	%struct.cc70a02__complex_integers__complex_type = type { i8, i8 }
+@.str = internal constant [13 x i8] c"fc70a00.adb\00\00", align 1		; <[13 x i8]*> [#uses=1]
+
+define void @_ada_cc70a02() {
+entry:
+	br label %bb1.i
+
+bb1.i:		; preds = %bb2.i, %entry
+	%indvar.i = phi i32 [ 0, %entry ], [ %indvar.next.i, %bb2.i ]		; <i32> [#uses=2]
+	%result.0.i = phi i16 [ 0, %entry ], [ %ins36.i, %bb2.i ]		; <i16> [#uses=2]
+	%tmp38.i = trunc i16 %result.0.i to i8		; <i8> [#uses=2]
+	%tmp = add i8 %tmp38.i, 96		; <i8> [#uses=1]
+	%tmp1 = icmp ugt i8 %tmp, -56		; <i1> [#uses=1]
+	br i1 %tmp1, label %bb.i.i, label %bb1.i.i
+
+bb.i.i:		; preds = %bb1.i
+	tail call void @__gnat_rcheck_12(i8* getelementptr ([13 x i8]* @.str, i32 0, i32 0), i32 24) noreturn
+	unreachable
+
+bb1.i.i:		; preds = %bb1.i
+	%tmp41.i = lshr i16 %result.0.i, 8		; <i16> [#uses=1]
+	%tmp42.i = trunc i16 %tmp41.i to i8		; <i8> [#uses=2]
+	%tmp2 = add i8 %tmp42.i, 109		; <i8> [#uses=1]
+	%tmp3 = icmp ugt i8 %tmp2, -56		; <i1> [#uses=1]
+	br i1 %tmp3, label %bb2.i.i, label %cc70a02__complex_integers__Oadd.153.exit.i
+
+bb2.i.i:		; preds = %bb1.i.i
+	tail call void @__gnat_rcheck_12(i8* getelementptr ([13 x i8]* @.str, i32 0, i32 0), i32 24) noreturn
+	unreachable
+
+cc70a02__complex_integers__Oadd.153.exit.i:		; preds = %bb1.i.i
+	%tmp4 = add i8 %tmp38.i, -4		; <i8> [#uses=2]
+	%tmp5 = add i8 %tmp42.i, 9		; <i8> [#uses=2]
+	%tmp25.i = zext i8 %tmp4 to i16		; <i16> [#uses=1]
+	%tmp33.i = zext i8 %tmp5 to i16		; <i16> [#uses=1]
+	%tmp34.i = shl i16 %tmp33.i, 8		; <i16> [#uses=1]
+	%ins36.i = or i16 %tmp34.i, %tmp25.i		; <i16> [#uses=1]
+	%tmp6 = icmp eq i32 %indvar.i, 6		; <i1> [#uses=1]
+	br i1 %tmp6, label %cc70a02__complex_multiplication.170.exit, label %bb2.i
+
+bb2.i:		; preds = %cc70a02__complex_integers__Oadd.153.exit.i
+	%indvar.next.i = add i32 %indvar.i, 1		; <i32> [#uses=1]
+	br label %bb1.i
+
+cc70a02__complex_multiplication.170.exit:		; preds = %cc70a02__complex_integers__Oadd.153.exit.i
+	%tmp7 = icmp eq i8 %tmp4, -28		; <i1> [#uses=1]
+	%tmp8 = icmp eq i8 %tmp5, 63		; <i1> [#uses=1]
+	%or.cond = and i1 %tmp8, %tmp7		; <i1> [#uses=1]
+	br i1 %or.cond, label %return, label %bb1
+
+bb1:		; preds = %cc70a02__complex_multiplication.170.exit
+	tail call void @exit(i32 1)
+	ret void
+
+return:		; preds = %cc70a02__complex_multiplication.170.exit
+	ret void
+}
+
+declare fastcc void @cc70a02__complex_integers__complex.164(%struct.cc70a02__complex_integers__complex_type* noalias nocapture sret, i8 signext, i8 signext) nounwind
+
+declare fastcc void @cc70a02__complex_integers__Osubtract.149(%struct.cc70a02__complex_integers__complex_type* noalias sret, %struct.cc70a02__complex_integers__complex_type* byval align 4)
+
+declare fastcc void @cc70a02__complex_integers__Oadd.153(%struct.cc70a02__complex_integers__complex_type* noalias sret, %struct.cc70a02__complex_integers__complex_type* byval align 4, %struct.cc70a02__complex_integers__complex_type* byval align 4)
+
+declare fastcc void @cc70a02__complex_multiplication.170(%struct.cc70a02__complex_integers__complex_type* noalias sret, %struct.cc70a02__complex_integers__complex_type* byval align 4)
+
+declare void @__gnat_rcheck_12(i8*, i32) noreturn
+
+declare void @exit(i32)
diff --git a/final/test/Transforms/IndVarSimplify/loop_evaluate_1.ll b/final/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
new file mode 100644
index 00000000000..abf1bc3a9d0
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/loop_evaluate_1.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -indvars -loop-deletion -simplifycfg -S | not grep br
+;
+; Testcase distilled from 256.bzip2
+
+define i32 @main() {
+entry:
+        br label %loopentry
+
+loopentry:              ; preds = %loopentry, %entry
+        %indvar1 = phi i32 [ 0, %entry ], [ %indvar.next2, %loopentry ]         ; <i32> [#uses=1]
+        %h.0 = phi i32 [ %tmp.2, %loopentry ], [ 4, %entry ]            ; <i32> [#uses=1]
+        %tmp.1 = mul i32 %h.0, 3                ; <i32> [#uses=1]
+        %tmp.2 = add i32 %tmp.1, 1              ; <i32> [#uses=2]
+        %indvar.next2 = add i32 %indvar1, 1             ; <i32> [#uses=2]
+        %exitcond3 = icmp ne i32 %indvar.next2, 4               ; <i1> [#uses=1]
+        br i1 %exitcond3, label %loopentry, label %loopexit
+
+loopexit:               ; preds = %loopentry
+        ret i32 %tmp.2
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/loop_evaluate_2.ll b/final/test/Transforms/IndVarSimplify/loop_evaluate_2.ll
new file mode 100644
index 00000000000..c0099a83ab1
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/loop_evaluate_2.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -indvars -loop-deletion -simplifycfg | opt \
+; RUN:     -analyze -loops | not grep "^Loop Containing" 
+; PR1179
+
+define i32 @ltst(i32 %x) {
+entry:
+        icmp sgt i32 %x, 0              ; <i1>:0 [#uses=1]
+        br i1 %0, label %bb.preheader, label %bb8
+
+bb.preheader:           ; preds = %entry
+        br label %bb
+
+bb:             ; preds = %bb, %bb.preheader
+        %i.01.0 = phi i32 [ %tmp4, %bb ], [ 0, %bb.preheader ]          ; <i32> [#uses=1]
+        %j.03.0 = phi i32 [ %tmp2, %bb ], [ 0, %bb.preheader ]          ; <i32> [#uses=1]
+        %tmp4 = add i32 %i.01.0, 1              ; <i32> [#uses=2]
+        %tmp2 = add i32 %j.03.0, 1              ; <i32> [#uses=2]
+        icmp slt i32 %tmp4, %x          ; <i1>:1 [#uses=1]
+        br i1 %1, label %bb, label %bb8.loopexit
+
+bb8.loopexit:           ; preds = %bb
+        br label %bb8
+
+bb8:            ; preds = %bb8.loopexit, %entry
+        %j.03.1 = phi i32 [ 0, %entry ], [ %tmp2, %bb8.loopexit ]               ; <i32> [#uses=1]
+        ret i32 %j.03.1
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/loop_evaluate_3.ll b/final/test/Transforms/IndVarSimplify/loop_evaluate_3.ll
new file mode 100644
index 00000000000..65c66f7f5ac
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/loop_evaluate_3.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -indvars -S | grep {ret i32 600000}
+; PR1179
+
+define i32 @foo() {
+entry:
+        br label %bb5
+
+bb5:            ; preds = %bb5, %entry
+        %i.01.0 = phi i32 [ 0, %entry ], [ %tmp2, %bb5 ]                ; <i32> [#uses=1]
+        %x.03.0 = phi i32 [ 0, %entry ], [ %tmp4, %bb5 ]                ; <i32> [#uses=1]
+        %tmp2 = add i32 %i.01.0, 3              ; <i32> [#uses=2]
+        %tmp4 = add i32 %x.03.0, 1              ; <i32> [#uses=2]
+        icmp slt i32 %tmp4, 200000              ; <i1>:0 [#uses=1]
+        br i1 %0, label %bb5, label %bb7
+
+bb7:            ; preds = %bb5
+        ret i32 %tmp2
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/loop_evaluate_4.ll b/final/test/Transforms/IndVarSimplify/loop_evaluate_4.ll
new file mode 100644
index 00000000000..e4b642c7f58
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/loop_evaluate_4.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -indvars -S | grep {ret i32 9900}
+; PR1179
+
+define i32 @test4() {
+entry:
+        br label %bb7
+
+bb7:            ; preds = %bb7, %entry
+        %v.01.0 = phi i32 [ 0, %entry ], [ %tmp4, %bb7 ]                ; <i32> [#uses=1]
+        %i.03.0 = phi i32 [ 0, %entry ], [ %tmp6, %bb7 ]                ; <i32> [#uses=2]
+        %tmp2 = shl i32 %i.03.0, 1              ; <i32> [#uses=1]
+        %tmp4 = add i32 %tmp2, %v.01.0          ; <i32> [#uses=2]
+        %tmp6 = add i32 %i.03.0, 1              ; <i32> [#uses=2]
+        icmp slt i32 %tmp6, 100         ; <i1>:0 [#uses=1]
+        br i1 %0, label %bb7, label %bb9
+
+bb9:            ; preds = %bb7
+        ret i32 %tmp4
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/loop_evaluate_5.ll b/final/test/Transforms/IndVarSimplify/loop_evaluate_5.ll
new file mode 100644
index 00000000000..80b961ac7c3
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/loop_evaluate_5.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -indvars -S | grep {120, %bb2.bb3_crit_edge}
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+
+; Indvars should be able to compute an exit value for %tmp1.
+
+define i32 @testcase(i5 zeroext %k) nounwind readnone {
+entry:
+	br i1 false, label %bb3, label %bb.nph
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb2, %bb.nph
+	%result2 = phi i32 [ %tmp1, %bb2 ], [ 0, %bb.nph ]		; <i32> [#uses=1]
+	%k_01 = phi i5 [ %indvar_next1, %bb2 ], [ 0, %bb.nph ]		; <i5> [#uses=2]
+	%tmp2 = zext i5 %k_01 to i32		; <i32> [#uses=1]
+	%tmp1 = add i32 %tmp2, %result2		; <i32> [#uses=2]
+	%indvar_next1 = add i5 %k_01, 1		; <i5> [#uses=2]
+	br label %bb2
+
+bb2:		; preds = %bb
+	%phitmp = icmp eq i5 %indvar_next1, -16		; <i1> [#uses=1]
+	br i1 %phitmp, label %bb2.bb3_crit_edge, label %bb
+
+bb2.bb3_crit_edge:		; preds = %bb2
+	br label %bb3
+
+bb3:		; preds = %bb2.bb3_crit_edge, %entry
+	%result.lcssa = phi i32 [ %tmp1, %bb2.bb3_crit_edge ], [ 0, %entry ]		; <i32> [#uses=1]
+	ret i32 %result.lcssa
+}
diff --git a/final/test/Transforms/IndVarSimplify/loop_evaluate_6.ll b/final/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
new file mode 100644
index 00000000000..da38de538f7
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/loop_evaluate_6.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -indvars -loop-deletion -S | grep phi | count 1
+; XFAIL: *
+
+; Indvars can't evaluate this loop, because ScalarEvolution can't compute
+; an exact trip count, because it doesn't know if dividing by the stride will
+; have a remainder. It could be done with more aggressive VRP though.
+
+define i32 @test(i32 %x_offs) nounwind readnone {
+entry:
+	%0 = icmp sgt i32 %x_offs, 4		; <i1> [#uses=1]
+	br i1 %0, label %bb.nph, label %bb2
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%x_offs_addr.01 = phi i32 [ %1, %bb1 ], [ %x_offs, %bb.nph ]		; <i32> [#uses=1]
+	%1 = add i32 %x_offs_addr.01, -4		; <i32> [#uses=3]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%2 = icmp sgt i32 %1, 4		; <i1> [#uses=1]
+	br i1 %2, label %bb, label %bb1.bb2_crit_edge
+
+bb1.bb2_crit_edge:		; preds = %bb1
+	br label %bb2
+
+bb2:		; preds = %bb1.bb2_crit_edge, %entry
+	%x_offs_addr.0.lcssa = phi i32 [ %1, %bb1.bb2_crit_edge ], [ %x_offs, %entry ]		; <i32> [#uses=1]
+	ret i32 %x_offs_addr.0.lcssa
+}
diff --git a/final/test/Transforms/IndVarSimplify/masked-iv.ll b/final/test/Transforms/IndVarSimplify/masked-iv.ll
new file mode 100644
index 00000000000..f1f5af96eb9
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/masked-iv.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: not grep trunc %t
+; RUN: grep and %t | count 1
+
+; Indvars should do the IV arithmetic in the canonical IV type (i64),
+; and only use one truncation.
+
+define void @foo(i64* %A, i64* %B, i64 %n, i64 %a, i64 %s) nounwind {
+entry:
+	%t0 = icmp sgt i64 %n, 0		; <i1> [#uses=1]
+	br i1 %t0, label %bb.preheader, label %return
+
+bb.preheader:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb, %bb.preheader
+	%i.01 = phi i64 [ %t6, %bb ], [ %a, %bb.preheader ]		; <i64> [#uses=3]
+	%t1 = and i64 %i.01, 255		; <i64> [#uses=1]
+	%t2 = getelementptr i64* %A, i64 %t1		; <i64*> [#uses=1]
+	store i64 %i.01, i64* %t2, align 8
+	%t6 = add i64 %i.01, %s		; <i64> [#uses=1]
+	br label %bb
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/max-pointer.ll b/final/test/Transforms/IndVarSimplify/max-pointer.ll
new file mode 100644
index 00000000000..f18f968f595
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/max-pointer.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: grep {icmp ugt i8\\\*} %t | count 1
+; RUN: grep {icmp sgt i8\\\*} %t | count 1
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+	%struct.CKenCodeCodec = type <{ i8 }>
+
+define void @foo(i8* %str1Ptr, i8* %str2Ptr, i8* %inLastBytePtr) nounwind {
+entry:
+	%0 = icmp ult i8* %str2Ptr, %str1Ptr		; <i1> [#uses=1]
+	%str2Ptr_addr.0 = select i1 %0, i8* %str1Ptr, i8* %str2Ptr		; <i8*> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb2, %entry
+	%str2Ptr_addr.1 = phi i8* [ %str2Ptr_addr.0, %entry ], [ %1, %bb2 ]		; <i8*> [#uses=1]
+	%1 = getelementptr i8* %str2Ptr_addr.1, i64 1		; <i8*> [#uses=2]
+	%2 = icmp ult i8* %1, %inLastBytePtr		; <i1> [#uses=0]
+	br i1 undef, label %bb2, label %return
+
+return:		; preds = %bb2
+	ret void
+}
+
+define void @sfoo(i8* %str1Ptr, i8* %str2Ptr, i8* %inLastBytePtr) nounwind {
+entry:
+	%0 = icmp slt i8* %str2Ptr, %str1Ptr		; <i1> [#uses=1]
+	%str2Ptr_addr.0 = select i1 %0, i8* %str1Ptr, i8* %str2Ptr		; <i8*> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb2, %entry
+	%str2Ptr_addr.1 = phi i8* [ %str2Ptr_addr.0, %entry ], [ %1, %bb2 ]		; <i8*> [#uses=1]
+	%1 = getelementptr i8* %str2Ptr_addr.1, i64 1		; <i8*> [#uses=2]
+	%2 = icmp slt i8* %1, %inLastBytePtr		; <i1> [#uses=0]
+	br i1 undef, label %bb2, label %return
+
+return:		; preds = %bb2
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll b/final/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
new file mode 100644
index 00000000000..34d432b4ee1
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -indvars
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+@ue = external global i64
+
+define i32 @foo() nounwind {
+entry:
+	br label %bb38.i
+
+bb14.i27:
+	%t0 = load i64* @ue, align 8
+	%t1 = sub i64 %t0, %i.0.i35
+	%t2 = add i64 %t1, 1
+	br i1 undef, label %bb15.i28, label %bb19.i31
+
+bb15.i28:
+	br label %bb19.i31
+
+bb19.i31:
+	%y.0.i = phi i64 [ %t2, %bb15.i28 ], [ %t2, %bb14.i27 ]
+	br label %bb35.i
+
+bb35.i:
+	br i1 undef, label %bb37.i, label %bb14.i27
+
+bb37.i:
+	%t3 = add i64 %i.0.i35, 1
+	br label %bb38.i
+
+bb38.i:
+	%i.0.i35 = phi i64 [ 1, %entry ], [ %t3, %bb37.i ]
+	br label %bb35.i
+}
diff --git a/final/test/Transforms/IndVarSimplify/pointer-indvars.ll b/final/test/Transforms/IndVarSimplify/pointer-indvars.ll
new file mode 100644
index 00000000000..6d25f90542c
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/pointer-indvars.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -indvars -S | grep indvar
+@G = global i32* null           ; <i32**> [#uses=1]
+@Array = external global [40 x i32]             ; <[40 x i32]*> [#uses=1]
+
+define void @test() {
+; <label>:0
+        br label %Loop
+
+Loop:           ; preds = %Loop, %0
+        %X = phi i32* [ getelementptr ([40 x i32]* @Array, i64 0, i64 0), %0 ], [ %X.next, %Loop ]              ; <i32*> [#uses=2]
+        %X.next = getelementptr i32* %X, i64 1          ; <i32*> [#uses=1]
+        store i32* %X, i32** @G
+        br label %Loop
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/pointer.ll b/final/test/Transforms/IndVarSimplify/pointer.ll
new file mode 100644
index 00000000000..5eee655d422
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/pointer.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: grep {%exitcond = icmp eq i64 %indvar.next, %n} %t
+; RUN: grep {getelementptr i8\\* %A, i64 %indvar} %t
+; RUN: grep getelementptr %t | count 1
+; RUN: grep add %t | count 1
+; RUN: not grep scevgep %t
+; RUN: not grep ptrtoint %t
+
+; Indvars should be able to expand the pointer-arithmetic
+; IV into an integer IV indexing into a simple getelementptr.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
+
+define void @foo(i8* %A, i64 %n) nounwind {
+entry:
+	%0 = icmp eq i64 %n, 0		; <i1> [#uses=1]
+	br i1 %0, label %return, label %bb.nph
+
+bb.nph:		; preds = %entry
+	%1 = getelementptr i8* %A, i64 %n		; <i8*> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%q.01 = phi i8* [ %2, %bb1 ], [ %A, %bb.nph ]		; <i8*> [#uses=2]
+	store i8 0, i8* %q.01, align 1
+	%2 = getelementptr i8* %q.01, i64 1		; <i8*> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%3 = icmp eq i8* %1, %2		; <i1> [#uses=1]
+	br i1 %3, label %bb1.return_crit_edge, label %bb
+
+bb1.return_crit_edge:		; preds = %bb1
+	br label %return
+
+return:		; preds = %bb1.return_crit_edge, %entry
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/polynomial-expand.ll b/final/test/Transforms/IndVarSimplify/polynomial-expand.ll
new file mode 100644
index 00000000000..2087f6a6966
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/polynomial-expand.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -indvars -disable-output
+; PR5073
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @ctpmv_(float* noalias nocapture %tmp4, i32 %tmp21) nounwind {
+bb20:                                             ; preds = %bb19
+  br label %bb24
+
+bb24:                                             ; preds = %bb40, %bb23
+  %tmp25 = phi i32 [ %tmp43, %bb40 ], [ %tmp21, %bb20 ] ; <i32> [#uses=4]
+  %tmp26 = phi i32 [ %tmp41, %bb40 ], [ undef, %bb20 ] ; <i32> [#uses=2]
+  %tmp27 = add nsw i32 %tmp26, -1                 ; <i32> [#uses=1]
+  %tmp28 = add nsw i32 %tmp25, -1                 ; <i32> [#uses=2]
+  %tmp29 = icmp sgt i32 %tmp28, 0                 ; <i1> [#uses=1]
+  br i1 %tmp29, label %bb30, label %bb40
+
+bb30:                                             ; preds = %bb30, %bb24
+  %tmp31 = phi i32 [ %tmp39, %bb30 ], [ %tmp28, %bb24 ] ; <i32> [#uses=2]
+  %tmp32 = phi i32 [ %tmp37, %bb30 ], [ %tmp27, %bb24 ] ; <i32> [#uses=2]
+  %tmp33 = sext i32 %tmp32 to i64                 ; <i64> [#uses=1]
+  %tmp35 = getelementptr float* %tmp4, i64 %tmp33 ; <%0*> [#uses=1]
+  %tmp36 = load float* %tmp35, align 4               ; <%0> [#uses=0]
+  %tmp37 = add nsw i32 %tmp32, -1                 ; <i32> [#uses=1]
+  %tmp39 = add nsw i32 %tmp31, -1                 ; <i32> [#uses=1]
+  %tmp38 = icmp eq i32 %tmp31, 1                  ; <i1> [#uses=1]
+  br i1 %tmp38, label %bb40, label %bb30
+
+bb40:                                             ; preds = %bb30, %bb24
+  %tmp41 = sub i32 %tmp26, %tmp25                 ; <i32> [#uses=1]
+  %tmp43 = add nsw i32 %tmp25, -1                 ; <i32> [#uses=1]
+  %tmp42 = icmp eq i32 %tmp25, 1                  ; <i1> [#uses=1]
+  br i1 %tmp42, label %bb46, label %bb24
+
+bb46:                                             ; preds = %bb40, %bb23, %bb19
+  ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll b/final/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll
new file mode 100644
index 00000000000..3a5c0b650ff
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/preserve-gep-loop-variant.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: not grep inttoptr %t
+; RUN: not grep ptrtoint %t
+; RUN: grep scevgep %t
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; Indvars shouldn't need inttoptr/ptrtoint to expand an address here.
+
+define void @foo(i8* %p) nounwind {
+entry:
+  br i1 true, label %bb.nph, label %for.end
+
+for.cond:
+  %phitmp = icmp slt i64 %inc, 20
+  br i1 %phitmp, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:
+  br label %for.end
+
+bb.nph:
+  br label %for.body
+
+for.body:
+  %storemerge1 = phi i64 [ %inc, %for.cond ], [ 0, %bb.nph ]
+  %call = tail call i64 @bar() nounwind
+  %call2 = tail call i64 @car() nounwind
+  %conv = trunc i64 %call2 to i8
+  %conv3 = sext i8 %conv to i64
+  %add = add nsw i64 %call, %storemerge1
+  %add4 = add nsw i64 %add, %conv3
+  %arrayidx = getelementptr inbounds i8* %p, i64 %add4
+  store i8 0, i8* %arrayidx
+  %inc = add nsw i64 %storemerge1, 1
+  br label %for.cond
+
+for.end:
+  ret void
+}
+
+declare i64 @bar()
+
+declare i64 @car()
diff --git a/final/test/Transforms/IndVarSimplify/preserve-gep-nested.ll b/final/test/Transforms/IndVarSimplify/preserve-gep-nested.ll
new file mode 100644
index 00000000000..bb0993c88ea
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/preserve-gep-nested.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -indvars -S > %t
+; Exactly one getelementptr for each load+store.
+; RUN: grep getelementptr %t | count 6
+; Each getelementptr using %struct.Q* %s as a base and not i8*.
+; RUN: grep {getelementptr \[%\]struct\\.Q\\* \[%\]s,} %t | count 6
+; No explicit integer multiplications!
+; RUN: not grep {= mul} %t
+; No i8* arithmetic or pointer casting anywhere!
+; RUN: not grep {i8\\*} %t
+; RUN: not grep bitcast %t
+; RUN: not grep inttoptr %t
+; RUN: not grep ptrtoint %t
+
+; FIXME: This test should pass with or without TargetData. Until opt
+; supports running tests without targetdata, just hardware this in.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+%struct.Q = type { [10 x %struct.N] }
+%struct.N = type { %struct.S }
+%struct.S = type { [100 x double], [100 x double] }
+
+define void @foo(%struct.Q* %s, i64 %n) nounwind {
+entry:
+  br label %bb1
+
+bb1:
+  %i = phi i64 [ 2, %entry ], [ %i.next, %bb ]
+  %j = phi i64 [ 0, %entry ], [ %j.next, %bb ]
+  %t5 = icmp slt i64 %i, %n
+  br i1 %t5, label %bb, label %return
+
+bb:
+  %t0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %i
+  %t1 = load double* %t0, align 8
+  %t2 = fmul double %t1, 3.200000e+00
+  %t3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %i
+  store double %t2, double* %t3, align 8
+
+  %s0 = getelementptr inbounds %struct.Q* %s, i64 13, i32 0, i64 7, i32 0, i32 1, i64 %i
+  %s1 = load double* %s0, align 8
+  %s2 = fmul double %s1, 3.200000e+00
+  %s3 = getelementptr inbounds %struct.Q* %s, i64 13, i32 0, i64 7, i32 0, i32 1, i64 %i
+  store double %s2, double* %s3, align 8
+
+  %u0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 7, i32 0, i32 1, i64 %j
+  %u1 = load double* %u0, align 8
+  %u2 = fmul double %u1, 3.200000e+00
+  %u3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 7, i32 0, i32 1, i64 %j
+  store double %u2, double* %u3, align 8
+
+  %v0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 1, i64 %i
+  %v1 = load double* %v0, align 8
+  %v2 = fmul double %v1, 3.200000e+00
+  %v3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 1, i64 %i
+  store double %v2, double* %v3, align 8
+
+  %w0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %j
+  %w1 = load double* %w0, align 8
+  %w2 = fmul double %w1, 3.200000e+00
+  %w3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 0, i32 0, i32 0, i64 %j
+  store double %w2, double* %w3, align 8
+
+  %x0 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 3, i32 0, i32 0, i64 %i
+  %x1 = load double* %x0, align 8
+  %x2 = fmul double %x1, 3.200000e+00
+  %x3 = getelementptr inbounds %struct.Q* %s, i64 0, i32 0, i64 3, i32 0, i32 0, i64 %i
+  store double %x2, double* %x3, align 8
+
+  %i.next = add i64 %i, 1
+  %j.next = add i64 %j, 1
+  br label %bb1
+
+return:
+  ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll b/final/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
new file mode 100644
index 00000000000..e17368b04cc
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/preserve-gep-remainder.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -indvars -S \
+; RUN:   | grep {\[%\]p.2.ip.1 = getelementptr \\\[3 x \\\[3 x double\\\]\\\]\\* \[%\]p, i64 2, i64 \[%\]tmp, i64 1}
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; Indvars shouldn't expand this to
+;   %p.2.ip.1 = getelementptr [3 x [3 x double]]* %p, i64 0, i64 %tmp, i64 19
+; or something. That's valid, but more obscure.
+
+define void @foo([3 x [3 x double]]* noalias %p) nounwind {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  %ip = add i64 %i, 1
+  %p.2.ip.1 = getelementptr [3 x [3 x double]]* %p, i64 2, i64 %ip, i64 1
+  volatile store double 0.0, double* %p.2.ip.1
+  %i.next = add i64 %i, 1
+  br label %loop
+}
diff --git a/final/test/Transforms/IndVarSimplify/preserve-gep.ll b/final/test/Transforms/IndVarSimplify/preserve-gep.ll
new file mode 100644
index 00000000000..a27d20dc965
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/preserve-gep.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: not grep ptrtoint %t
+; RUN: not grep inttoptr %t
+; RUN: grep getelementptr %t | count 1
+
+; Indvars shouldn't leave getelementptrs expanded out as
+; inttoptr+ptrtoint in its output in common cases.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.Foo = type { i32, i32, [10 x i32], i32 }
+
+define void @me(%struct.Foo* nocapture %Bar) nounwind {
+entry:
+	br i1 false, label %return, label %bb.nph
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%i.01 = phi i64 [ %4, %bb1 ], [ 0, %bb.nph ]		; <i64> [#uses=3]
+	%0 = getelementptr %struct.Foo* %Bar, i64 %i.01, i32 2, i64 3		; <i32*> [#uses=1]
+	%1 = load i32* %0, align 4		; <i32> [#uses=1]
+	%2 = mul i32 %1, 113		; <i32> [#uses=1]
+	%3 = getelementptr %struct.Foo* %Bar, i64 %i.01, i32 2, i64 3		; <i32*> [#uses=1]
+	store i32 %2, i32* %3, align 4
+	%4 = add i64 %i.01, 1		; <i64> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%phitmp = icmp sgt i64 %4, 19999		; <i1> [#uses=1]
+	br i1 %phitmp, label %bb1.return_crit_edge, label %bb
+
+bb1.return_crit_edge:		; preds = %bb1
+	br label %return
+
+return:		; preds = %bb1.return_crit_edge, %entry
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll b/final/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll
new file mode 100644
index 00000000000..9e46a78ffc7
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: grep sext %t | count 1
+; RUN: grep phi %t | count 1
+; RUN: grep {phi i64} %t
+
+; Indvars should insert a 64-bit induction variable to eliminate the
+; sext for the addressing, however it shouldn't eliminate the sext
+; on the other phi, since that value undergoes signed wrapping.
+
+define void @foo(i32* nocapture %d, i32 %n) nounwind {
+entry:
+	%0 = icmp sgt i32 %n, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%i.02 = phi i32 [ %5, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=2]
+	%p.01 = phi i8 [ %4, %bb1 ], [ -1, %bb.nph ]		; <i8> [#uses=2]
+	%1 = sext i8 %p.01 to i32		; <i32> [#uses=1]
+	%2 = sext i32 %i.02 to i64		; <i64> [#uses=1]
+	%3 = getelementptr i32* %d, i64 %2		; <i32*> [#uses=1]
+	store i32 %1, i32* %3, align 4
+	%4 = add i8 %p.01, 1		; <i8> [#uses=1]
+	%5 = add i32 %i.02, 1		; <i32> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%6 = icmp slt i32 %5, %n		; <i1> [#uses=1]
+	br i1 %6, label %bb, label %bb1.return_crit_edge
+
+bb1.return_crit_edge:		; preds = %bb1
+	br label %return
+
+return:		; preds = %bb1.return_crit_edge, %entry
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll b/final/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
new file mode 100644
index 00000000000..a007ca60b08
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll
@@ -0,0 +1,99 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: not grep sext %t
+
+define i64 @test(i64* nocapture %first, i32 %count) nounwind readonly {
+entry:
+	%t0 = icmp sgt i32 %count, 0		; <i1> [#uses=1]
+	br i1 %t0, label %bb.nph, label %bb2
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%result.02 = phi i64 [ %t5, %bb1 ], [ 0, %bb.nph ]		; <i64> [#uses=1]
+	%n.01 = phi i32 [ %t6, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=2]
+	%t1 = sext i32 %n.01 to i64		; <i64> [#uses=1]
+	%t2 = getelementptr i64* %first, i64 %t1		; <i64*> [#uses=1]
+	%t3 = load i64* %t2, align 8		; <i64> [#uses=1]
+	%t4 = lshr i64 %t3, 4		; <i64> [#uses=1]
+	%t5 = add i64 %t4, %result.02		; <i64> [#uses=2]
+	%t6 = add i32 %n.01, 1		; <i32> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%t7 = icmp slt i32 %t6, %count		; <i1> [#uses=1]
+	br i1 %t7, label %bb, label %bb1.bb2_crit_edge
+
+bb1.bb2_crit_edge:		; preds = %bb1
+	%.lcssa = phi i64 [ %t5, %bb1 ]		; <i64> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %bb1.bb2_crit_edge, %entry
+	%result.0.lcssa = phi i64 [ %.lcssa, %bb1.bb2_crit_edge ], [ 0, %entry ]		; <i64> [#uses=1]
+	ret i64 %result.0.lcssa
+}
+
+define void @foo(i16 signext %N, i32* nocapture %P) nounwind {
+entry:
+	%t0 = icmp sgt i16 %N, 0		; <i1> [#uses=1]
+	br i1 %t0, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb1, %bb.nph
+	%i.01 = phi i16 [ %t3, %bb1 ], [ 0, %bb.nph ]		; <i16> [#uses=2]
+	%t1 = sext i16 %i.01 to i64		; <i64> [#uses=1]
+	%t2 = getelementptr i32* %P, i64 %t1		; <i32*> [#uses=1]
+	store i32 123, i32* %t2, align 4
+	%t3 = add i16 %i.01, 1		; <i16> [#uses=2]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%t4 = icmp slt i16 %t3, %N		; <i1> [#uses=1]
+	br i1 %t4, label %bb, label %bb1.return_crit_edge
+
+bb1.return_crit_edge:		; preds = %bb1
+	br label %return
+
+return:		; preds = %bb1.return_crit_edge, %entry
+	ret void
+}
+
+; Test cases from PR1301:
+
+define void @kinds__srangezero([21 x i32]* nocapture %a) nounwind {
+bb.thread:
+  br label %bb
+
+bb:             ; preds = %bb, %bb.thread
+  %i.0.reg2mem.0 = phi i8 [ -10, %bb.thread ], [ %tmp7, %bb ]           ; <i8> [#uses=2]
+  %tmp12 = sext i8 %i.0.reg2mem.0 to i32                ; <i32> [#uses=1]
+  %tmp4 = add i32 %tmp12, 10            ; <i32> [#uses=1]
+  %tmp5 = getelementptr [21 x i32]* %a, i32 0, i32 %tmp4                ; <i32*> [#uses=1]
+  store i32 0, i32* %tmp5
+  %tmp7 = add i8 %i.0.reg2mem.0, 1              ; <i8> [#uses=2]
+  %0 = icmp sgt i8 %tmp7, 10            ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb
+
+return:         ; preds = %bb
+  ret void
+}
+
+define void @kinds__urangezero([21 x i32]* nocapture %a) nounwind {
+bb.thread:
+  br label %bb
+
+bb:             ; preds = %bb, %bb.thread
+  %i.0.reg2mem.0 = phi i8 [ 10, %bb.thread ], [ %tmp7, %bb ]            ; <i8> [#uses=2]
+  %tmp12 = sext i8 %i.0.reg2mem.0 to i32                ; <i32> [#uses=1]
+  %tmp4 = add i32 %tmp12, -10           ; <i32> [#uses=1]
+  %tmp5 = getelementptr [21 x i32]* %a, i32 0, i32 %tmp4                ; <i32*> [#uses=1]
+  store i32 0, i32* %tmp5
+  %tmp7 = add i8 %i.0.reg2mem.0, 1              ; <i8> [#uses=2]
+  %0 = icmp sgt i8 %tmp7, 30            ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb
+
+return:         ; preds = %bb
+  ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/shrunk-constant.ll b/final/test/Transforms/IndVarSimplify/shrunk-constant.ll
new file mode 100644
index 00000000000..271f8edf198
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/shrunk-constant.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -scalar-evolution -analyze \
+; RUN:  | grep {\\-->  (zext i4 {-7,+,-8}<%loop> to i32)}
+
+define fastcc void @foo() nounwind {
+entry:
+	br label %loop
+
+loop:
+	%i = phi i32 [ 0, %entry ], [ %t2, %loop ]
+	%t0 = add i32 %i, 9
+	%t1 = and i32 %t0, 9
+        store i32 %t1, i32* null
+	%t2 = add i32 %i, 8
+	br label %loop
+}
diff --git a/final/test/Transforms/IndVarSimplify/signed-trip-count.ll b/final/test/Transforms/IndVarSimplify/signed-trip-count.ll
new file mode 100644
index 00000000000..1a5e64ddc1b
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/signed-trip-count.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -indvars -S > %t
+; RUN: not grep sext %t
+; RUN: grep phi %t | count 1
+
+define void @foo(i64* nocapture %x, i32 %n) nounwind {
+entry:
+	%tmp102 = icmp sgt i32 %n, 0		; <i1> [#uses=1]
+	br i1 %tmp102, label %bb.nph, label %return
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb7, %bb.nph
+	%i.01 = phi i32 [ %tmp6, %bb7 ], [ 0, %bb.nph ]		; <i32> [#uses=3]
+	%tmp1 = sext i32 %i.01 to i64		; <i64> [#uses=1]
+	%tmp4 = getelementptr i64* %x, i32 %i.01		; <i64*> [#uses=1]
+	store i64 %tmp1, i64* %tmp4, align 8
+	%tmp6 = add i32 %i.01, 1		; <i32> [#uses=2]
+	br label %bb7
+
+bb7:		; preds = %bb
+	%tmp10 = icmp slt i32 %tmp6, %n		; <i1> [#uses=1]
+	br i1 %tmp10, label %bb, label %bb7.return_crit_edge
+
+bb7.return_crit_edge:		; preds = %bb7
+	br label %return
+
+return:		; preds = %bb7.return_crit_edge, %entry
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/single-element-range.ll b/final/test/Transforms/IndVarSimplify/single-element-range.ll
new file mode 100644
index 00000000000..4b035eea149
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/single-element-range.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -indvars
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv6-apple-darwin10"
+
+define void @sqlite3_free_table(i8** %azResult) nounwind {
+entry:
+	br i1 undef, label %return, label %bb
+
+bb:		; preds = %entry
+	%0 = load i8** undef, align 4		; <i8*> [#uses=2]
+	%1 = ptrtoint i8* %0 to i32		; <i32> [#uses=1]
+	%2 = icmp sgt i8* %0, inttoptr (i32 1 to i8*)		; <i1> [#uses=1]
+	br i1 %2, label %bb1, label %bb5
+
+bb1:		; preds = %bb1, %bb
+	%i.01 = phi i32 [ %3, %bb1 ], [ 1, %bb ]		; <i32> [#uses=1]
+	%3 = add i32 %i.01, 1		; <i32> [#uses=2]
+	%4 = icmp slt i32 %3, %1		; <i1> [#uses=1]
+	br i1 %4, label %bb1, label %bb5
+
+bb5:		; preds = %bb1, %bb
+	ret void
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/sink-alloca.ll b/final/test/Transforms/IndVarSimplify/sink-alloca.ll
new file mode 100644
index 00000000000..3a6c683e7ce
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/sink-alloca.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+; PR4775
+
+; Indvars shouldn't sink the alloca out of the entry block, even though
+; it's not used until after the loop.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 ()* @main to i8*)],
+section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define i32 @main() nounwind {
+; CHECK: entry:
+; CHECK-NEXT: %result.i = alloca i32, align 4
+entry:
+  %result.i = alloca i32, align 4                 ; <i32*> [#uses=2]
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond, %entry
+  %call = call i32 @bar() nounwind                ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %call, 0                  ; <i1> [#uses=1]
+  br i1 %tobool, label %while.end, label %while.cond
+
+while.end:                                        ; preds = %while.cond
+  volatile store i32 0, i32* %result.i
+  %tmp.i = volatile load i32* %result.i           ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @bar()
diff --git a/final/test/Transforms/IndVarSimplify/sink-trapping.ll b/final/test/Transforms/IndVarSimplify/sink-trapping.ll
new file mode 100644
index 00000000000..a18000c5f8a
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/sink-trapping.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -indvars -S | FileCheck %s --check-prefix=CHECK
+
+declare i1 @b()
+
+define i32 @a(i32 %x) nounwind {
+for.body.preheader:
+    %y = sdiv i32 10, %x
+	br label %for.body
+
+for.body:
+    %cmp = call i1 @b()
+	br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:
+	ret i32 %y
+}
+; CHECK: for.end.loopexit:
+; CHECK: sdiv
+; CHECK: ret
diff --git a/final/test/Transforms/IndVarSimplify/subtract.ll b/final/test/Transforms/IndVarSimplify/subtract.ll
new file mode 100644
index 00000000000..f45bdab4be5
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/subtract.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -indvars -S | grep indvar
+
+@G = global i64 0               ; <i64*> [#uses=1]
+
+define void @test(i64 %V) {
+; <label>:0
+        br label %Loop
+
+Loop:           ; preds = %Loop, %0
+        %X = phi i64 [ 1, %0 ], [ %X.next, %Loop ]              ; <i64> [#uses=2]
+        %X.next = sub i64 %X, %V                ; <i64> [#uses=1]
+        store i64 %X, i64* @G
+        br label %Loop
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/tripcount_compute.ll b/final/test/Transforms/IndVarSimplify/tripcount_compute.ll
new file mode 100644
index 00000000000..8835b9627fd
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/tripcount_compute.ll
@@ -0,0 +1,162 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+; These tests ensure that we can compute the trip count of various forms of
+; loops.  If the trip count of the loop is computable, then we will know what
+; the exit value of the loop will be for some value, allowing us to substitute
+; it directly into users outside of the loop, making the loop dead.
+
+; CHECK: @linear_setne
+; CHECK: ret i32 100
+
+define i32 @linear_setne() {
+entry:
+	br label %loop
+
+loop:		; preds = %loop, %entry
+	%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]		; <i32> [#uses=3]
+	%i.next = add i32 %i, 1		; <i32> [#uses=1]
+	%c = icmp ne i32 %i, 100		; <i1> [#uses=1]
+	br i1 %c, label %loop, label %loopexit
+
+loopexit:		; preds = %loop
+	ret i32 %i
+}
+
+; CHECK: @linear_setne_2
+; CHECK: ret i32 100
+
+define i32 @linear_setne_2() {
+entry:
+	br label %loop
+
+loop:		; preds = %loop, %entry
+	%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]		; <i32> [#uses=3]
+	%i.next = add i32 %i, 2		; <i32> [#uses=1]
+	%c = icmp ne i32 %i, 100		; <i1> [#uses=1]
+	br i1 %c, label %loop, label %loopexit
+
+loopexit:		; preds = %loop
+	ret i32 %i
+}
+
+; CHECK: @linear_setne_overflow
+; CHECK: ret i32 0
+
+define i32 @linear_setne_overflow() {
+entry:
+	br label %loop
+
+loop:		; preds = %loop, %entry
+	%i = phi i32 [ 1024, %entry ], [ %i.next, %loop ]		; <i32> [#uses=3]
+	%i.next = add i32 %i, 1024		; <i32> [#uses=1]
+	%c = icmp ne i32 %i, 0		; <i1> [#uses=1]
+	br i1 %c, label %loop, label %loopexit
+
+loopexit:		; preds = %loop
+	ret i32 %i
+}
+
+; CHECK: @linear_setlt
+; CHECK: ret i32 100
+
+define i32 @linear_setlt() {
+entry:
+	br label %loop
+
+loop:		; preds = %loop, %entry
+	%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]		; <i32> [#uses=3]
+	%i.next = add i32 %i, 1		; <i32> [#uses=1]
+	%c = icmp slt i32 %i, 100		; <i1> [#uses=1]
+	br i1 %c, label %loop, label %loopexit
+
+loopexit:		; preds = %loop
+	ret i32 %i
+}
+
+; CHECK: @quadratic_setlt
+; CHECK: ret i32 34
+
+define i32 @quadratic_setlt() {
+entry:
+	br label %loop
+
+loop:		; preds = %loop, %entry
+	%i = phi i32 [ 7, %entry ], [ %i.next, %loop ]		; <i32> [#uses=4]
+	%i.next = add i32 %i, 3		; <i32> [#uses=1]
+	%i2 = mul i32 %i, %i		; <i32> [#uses=1]
+	%c = icmp slt i32 %i2, 1000		; <i1> [#uses=1]
+	br i1 %c, label %loop, label %loopexit
+
+loopexit:		; preds = %loop
+	ret i32 %i
+}
+
+; CHECK: @chained
+; CHECK: ret i32 200
+
+define i32 @chained() {
+entry:
+	br label %loop
+
+loop:		; preds = %loop, %entry
+	%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]		; <i32> [#uses=3]
+	%i.next = add i32 %i, 1		; <i32> [#uses=1]
+	%c = icmp ne i32 %i, 100		; <i1> [#uses=1]
+	br i1 %c, label %loop, label %loopexit
+
+loopexit:		; preds = %loop
+	br label %loop2
+
+loop2:		; preds = %loop2, %loopexit
+	%j = phi i32 [ %i, %loopexit ], [ %j.next, %loop2 ]		; <i32> [#uses=3]
+	%j.next = add i32 %j, 1		; <i32> [#uses=1]
+	%c2 = icmp ne i32 %j, 200		; <i1> [#uses=1]
+	br i1 %c2, label %loop2, label %loopexit2
+
+loopexit2:		; preds = %loop2
+	ret i32 %j
+}
+
+; CHECK: @chained4
+; CHECK: ret i32 400
+
+define i32 @chained4() {
+entry:
+  br label %loop
+
+loop:                                             ; preds = %loop, %entry
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]  ; <i32> [#uses=3]
+  %i.next = add i32 %i, 1                         ; <i32> [#uses=1]
+  %c = icmp ne i32 %i.next, 100                   ; <i1> [#uses=1]
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:                                         ; preds = %loop
+  br label %loop2
+
+loop2:                                            ; preds = %loop2, %loopexit
+  %j = phi i32 [ %i.next, %loopexit ], [ %j.next, %loop2 ] ; <i32> [#uses=3]
+  %j.next = add i32 %j, 1                         ; <i32> [#uses=1]
+  %c2 = icmp ne i32 %j.next, 200                  ; <i1> [#uses=1]
+  br i1 %c2, label %loop2, label %loopexit2
+
+loopexit2:                                        ; preds = %loop
+  br label %loop8
+
+loop8:                                            ; preds = %loop2, %loopexit
+  %k = phi i32 [ %j.next, %loopexit2 ], [ %k.next, %loop8 ] ; <i32> [#uses=3]
+  %k.next = add i32 %k, 1                         ; <i32> [#uses=1]
+  %c8 = icmp ne i32 %k.next, 300                  ; <i1> [#uses=1]
+  br i1 %c8, label %loop8, label %loopexit8
+
+loopexit8:                                        ; preds = %loop2
+  br label %loop9
+
+loop9:                                            ; preds = %loop2, %loopexit
+  %l = phi i32 [ %k.next, %loopexit8 ], [ %l.next, %loop9 ] ; <i32> [#uses=3]
+  %l.next = add i32 %l, 1                         ; <i32> [#uses=1]
+  %c9 = icmp ne i32 %l.next, 400                  ; <i1> [#uses=1]
+  br i1 %c9, label %loop9, label %loopexit9
+
+loopexit9:                                        ; preds = %loop2
+  ret i32 %l.next
+}
diff --git a/final/test/Transforms/IndVarSimplify/tripcount_infinite.ll b/final/test/Transforms/IndVarSimplify/tripcount_infinite.ll
new file mode 100644
index 00000000000..0495b50c3e4
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/tripcount_infinite.ll
@@ -0,0 +1,38 @@
+; These tests have an infinite trip count.  We obviously shouldn't remove the 
+; loops!  :)
+;
+; RUN: opt < %s -indvars -adce -simplifycfg -S | grep icmp | wc -l > %t2
+; RUN: llvm-as < %s | llvm-dis | grep icmp | wc -l > %t1
+; RUN: diff %t1 %t2
+
+;; test for (i = 1; i != 100; i += 2)
+define i32 @infinite_linear() {
+entry:
+        br label %loop
+
+loop:           ; preds = %loop, %entry
+        %i = phi i32 [ 1, %entry ], [ %i.next, %loop ]          ; <i32> [#uses=3]
+        %i.next = add i32 %i, 2         ; <i32> [#uses=1]
+        %c = icmp ne i32 %i, 100                ; <i1> [#uses=1]
+        br i1 %c, label %loop, label %loopexit
+
+loopexit:               ; preds = %loop
+        ret i32 %i
+}
+
+;; test for (i = 1; i*i != 63; ++i)
+define i32 @infinite_quadratic() {
+entry:
+        br label %loop
+
+loop:           ; preds = %loop, %entry
+        %i = phi i32 [ 1, %entry ], [ %i.next, %loop ]          ; <i32> [#uses=4]
+        %isquare = mul i32 %i, %i               ; <i32> [#uses=1]
+        %i.next = add i32 %i, 1         ; <i32> [#uses=1]
+        %c = icmp ne i32 %isquare, 63           ; <i1> [#uses=1]
+        br i1 %c, label %loop, label %loopexit
+
+loopexit:               ; preds = %loop
+        ret i32 %i
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/udiv.ll b/final/test/Transforms/IndVarSimplify/udiv.ll
new file mode 100644
index 00000000000..8260093d1c3
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/udiv.ll
@@ -0,0 +1,162 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+@main.flags = internal global [8193 x i8] zeroinitializer, align 1 ; <[8193 x i8]*> [#uses=5]
+@.str = private constant [11 x i8] c"Count: %d\0A\00" ; <[11 x i8]*> [#uses=1]
+
+; Indvars shouldn't emit a udiv here, because there's no udiv in the
+; original code. This comes from SingleSource/Benchmarks/Shootout/sieve.c.
+
+; CHECK: @main
+; CHECK-NOT: div
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
+entry:
+  %cmp = icmp eq i32 %argc, 2                     ; <i1> [#uses=1]
+  br i1 %cmp, label %cond.true, label %while.cond.preheader
+
+cond.true:                                        ; preds = %entry
+  %arrayidx = getelementptr inbounds i8** %argv, i64 1 ; <i8**> [#uses=1]
+  %tmp2 = load i8** %arrayidx                     ; <i8*> [#uses=1]
+  %call = tail call i32 @atoi(i8* %tmp2) nounwind readonly ; <i32> [#uses=1]
+  br label %while.cond.preheader
+
+while.cond.preheader:                             ; preds = %entry, %cond.true
+  %NUM.0.ph = phi i32 [ %call, %cond.true ], [ 170000, %entry ] ; <i32> [#uses=2]
+  %tobool18 = icmp eq i32 %NUM.0.ph, 0            ; <i1> [#uses=1]
+  br i1 %tobool18, label %while.end, label %bb.nph30
+
+while.cond.loopexit:                              ; preds = %for.cond12.while.cond.loopexit_crit_edge, %for.cond12.loopexit
+  %count.2.lcssa = phi i32 [ %count.1.lcssa, %for.cond12.while.cond.loopexit_crit_edge ], [ 0, %for.cond12.loopexit ] ; <i32> [#uses=1]
+  br label %while.cond
+
+while.cond:                                       ; preds = %while.cond.loopexit
+  %tobool = icmp eq i32 %dec19, 0                 ; <i1> [#uses=1]
+  br i1 %tobool, label %while.cond.while.end_crit_edge, label %for.cond.preheader
+
+while.cond.while.end_crit_edge:                   ; preds = %while.cond
+  %count.2.lcssa.lcssa = phi i32 [ %count.2.lcssa, %while.cond ] ; <i32> [#uses=1]
+  br label %while.end
+
+bb.nph30:                                         ; preds = %while.cond.preheader
+  br label %for.cond.preheader
+
+for.cond.preheader:                               ; preds = %bb.nph30, %while.cond
+  %dec19.in = phi i32 [ %NUM.0.ph, %bb.nph30 ], [ %dec19, %while.cond ] ; <i32> [#uses=1]
+  %dec19 = add i32 %dec19.in, -1                  ; <i32> [#uses=2]
+  br i1 true, label %bb.nph, label %for.cond12.loopexit
+
+for.cond:                                         ; preds = %for.body
+  %cmp8 = icmp slt i64 %inc, 8193                 ; <i1> [#uses=1]
+  br i1 %cmp8, label %for.body, label %for.cond.for.cond12.loopexit_crit_edge
+
+for.cond.for.cond12.loopexit_crit_edge:           ; preds = %for.cond
+  br label %for.cond12.loopexit
+
+bb.nph:                                           ; preds = %for.cond.preheader
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.cond
+  %i.02 = phi i64 [ 2, %bb.nph ], [ %inc, %for.cond ] ; <i64> [#uses=2]
+  %arrayidx10 = getelementptr inbounds [8193 x i8]* @main.flags, i64 0, i64 %i.02 ; <i8*> [#uses=1]
+  store i8 1, i8* %arrayidx10
+  %inc = add nsw i64 %i.02, 1                     ; <i64> [#uses=2]
+  br label %for.cond
+
+for.cond12.loopexit:                              ; preds = %for.cond.for.cond12.loopexit_crit_edge, %for.cond.preheader
+  br i1 true, label %bb.nph16, label %while.cond.loopexit
+
+for.cond12:                                       ; preds = %for.inc35
+  %cmp14 = icmp slt i64 %inc37, 8193              ; <i1> [#uses=1]
+  br i1 %cmp14, label %for.body15, label %for.cond12.while.cond.loopexit_crit_edge
+
+for.cond12.while.cond.loopexit_crit_edge:         ; preds = %for.cond12
+  %count.1.lcssa = phi i32 [ %count.1, %for.cond12 ] ; <i32> [#uses=1]
+  br label %while.cond.loopexit
+
+bb.nph16:                                         ; preds = %for.cond12.loopexit
+  br label %for.body15
+
+for.body15:                                       ; preds = %bb.nph16, %for.cond12
+  %count.212 = phi i32 [ 0, %bb.nph16 ], [ %count.1, %for.cond12 ] ; <i32> [#uses=2]
+  %i.17 = phi i64 [ 2, %bb.nph16 ], [ %inc37, %for.cond12 ] ; <i64> [#uses=4]
+  %arrayidx17 = getelementptr inbounds [8193 x i8]* @main.flags, i64 0, i64 %i.17 ; <i8*> [#uses=1]
+  %tmp18 = load i8* %arrayidx17                   ; <i8> [#uses=1]
+  %tobool19 = icmp eq i8 %tmp18, 0                ; <i1> [#uses=1]
+  br i1 %tobool19, label %for.inc35, label %if.then
+
+if.then:                                          ; preds = %for.body15
+  %add = shl i64 %i.17, 1                         ; <i64> [#uses=2]
+  %cmp243 = icmp slt i64 %add, 8193               ; <i1> [#uses=1]
+  br i1 %cmp243, label %bb.nph5, label %for.end32
+
+for.cond22:                                       ; preds = %for.body25
+  %cmp24 = icmp slt i64 %add31, 8193              ; <i1> [#uses=1]
+  br i1 %cmp24, label %for.body25, label %for.cond22.for.end32_crit_edge
+
+for.cond22.for.end32_crit_edge:                   ; preds = %for.cond22
+  br label %for.end32
+
+bb.nph5:                                          ; preds = %if.then
+  br label %for.body25
+
+for.body25:                                       ; preds = %bb.nph5, %for.cond22
+  %k.04 = phi i64 [ %add, %bb.nph5 ], [ %add31, %for.cond22 ] ; <i64> [#uses=2]
+  %arrayidx27 = getelementptr inbounds [8193 x i8]* @main.flags, i64 0, i64 %k.04 ; <i8*> [#uses=1]
+  store i8 0, i8* %arrayidx27
+  %add31 = add nsw i64 %k.04, %i.17               ; <i64> [#uses=2]
+  br label %for.cond22
+
+for.end32:                                        ; preds = %for.cond22.for.end32_crit_edge, %if.then
+  %inc34 = add nsw i32 %count.212, 1              ; <i32> [#uses=1]
+  br label %for.inc35
+
+for.inc35:                                        ; preds = %for.body15, %for.end32
+  %count.1 = phi i32 [ %inc34, %for.end32 ], [ %count.212, %for.body15 ] ; <i32> [#uses=2]
+  %inc37 = add nsw i64 %i.17, 1                   ; <i64> [#uses=2]
+  br label %for.cond12
+
+while.end:                                        ; preds = %while.cond.while.end_crit_edge, %while.cond.preheader
+  %count.0.lcssa = phi i32 [ %count.2.lcssa.lcssa, %while.cond.while.end_crit_edge ], [ 0, %while.cond.preheader ] ; <i32> [#uses=1]
+  %call40 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i64 0, i64 0), i32 %count.0.lcssa) nounwind ; <i32> [#uses=0]
+  ret i32 0
+}
+
+declare i32 @atoi(i8* nocapture) nounwind readonly
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+; IndVars shouldn't be afraid to emit a udiv here, since there's a udiv in
+; the original code.
+
+; CHECK: @foo
+; CHECK: for.body.preheader:
+; CHECK-NEXT: udiv
+
+define void @foo(double* %p, i64 %n) nounwind {
+entry:
+  %div0 = udiv i64 %n, 7                          ; <i64> [#uses=1]
+  %div1 = add i64 %div0, 1
+  %cmp2 = icmp ult i64 0, %div1                   ; <i1> [#uses=1]
+  br i1 %cmp2, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.03 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ] ; <i64> [#uses=2]
+  %arrayidx = getelementptr inbounds double* %p, i64 %i.03 ; <double*> [#uses=1]
+  store double 0.000000e+00, double* %arrayidx
+  %inc = add i64 %i.03, 1                         ; <i64> [#uses=2]
+  %divx = udiv i64 %n, 7                           ; <i64> [#uses=1]
+  %div = add i64 %divx, 1
+  %cmp = icmp ult i64 %inc, %div                  ; <i1> [#uses=1]
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  ret void
+}
diff --git a/final/test/Transforms/IndVarSimplify/uglygep.ll b/final/test/Transforms/IndVarSimplify/uglygep.ll
new file mode 100644
index 00000000000..0014b683db4
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/uglygep.ll
@@ -0,0 +1,40 @@
+; RUN: opt -indvars -S < %s | not grep uglygep
+; rdar://8197217
+
+; Indvars should be able to emit a clean GEP here, not an uglygep.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0"
+
+@numf2s = external global i32                     ; <i32*> [#uses=1]
+@numf1s = external global i32                     ; <i32*> [#uses=1]
+@tds = external global double**                   ; <double***> [#uses=1]
+
+define void @init_td(i32 %tmp7) nounwind {
+entry:
+  br label %bb4
+
+bb4:                                              ; preds = %bb3, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %tmp9, %bb3 ]   ; <i32> [#uses=3]
+  br label %bb
+
+bb:                                               ; preds = %bb4
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  %j.0 = phi i32 [ 0, %bb ], [ %tmp6, %bb1 ]      ; <i32> [#uses=3]
+  %tmp8 = icmp slt i32 %j.0, %tmp7                ; <i1> [#uses=1]
+  br i1 %tmp8, label %bb1, label %bb3
+
+bb1:                                              ; preds = %bb2
+  %tmp = load double*** @tds, align 8             ; <double**> [#uses=1]
+  %tmp1 = sext i32 %i.0 to i64                    ; <i64> [#uses=1]
+  %tmp2 = getelementptr inbounds double** %tmp, i64 %tmp1 ; <double**> [#uses=1]
+  %tmp3 = load double** %tmp2, align 1            ; <double*> [#uses=1]
+  %tmp6 = add nsw i32 %j.0, 1                     ; <i32> [#uses=1]
+  br label %bb2
+
+bb3:                                              ; preds = %bb2
+  %tmp9 = add nsw i32 %i.0, 1                     ; <i32> [#uses=1]
+  br label %bb4
+}
diff --git a/final/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll b/final/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
new file mode 100644
index 00000000000..0c8857f8578
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -indvars -instcombine -S | \
+; RUN:   grep {store i32 0}
+; Test that -indvars can reduce variable stride IVs.  If it can reduce variable
+; stride iv's, it will make %iv. and %m.0.0 isomorphic to each other without 
+; cycles, allowing the tmp.21 subtraction to be eliminated.
+; END.
+
+define void @vnum_test8(i32* %data) {
+entry:
+        %tmp.1 = getelementptr i32* %data, i32 3                ; <i32*> [#uses=1]
+        %tmp.2 = load i32* %tmp.1               ; <i32> [#uses=2]
+        %tmp.4 = getelementptr i32* %data, i32 4                ; <i32*> [#uses=1]
+        %tmp.5 = load i32* %tmp.4               ; <i32> [#uses=2]
+        %tmp.8 = getelementptr i32* %data, i32 2                ; <i32*> [#uses=1]
+        %tmp.9 = load i32* %tmp.8               ; <i32> [#uses=3]
+        %tmp.125 = icmp sgt i32 %tmp.2, 0               ; <i1> [#uses=1]
+        br i1 %tmp.125, label %no_exit.preheader, label %return
+
+no_exit.preheader:              ; preds = %entry
+        %tmp.16 = getelementptr i32* %data, i32 %tmp.9          ; <i32*> [#uses=1]
+        br label %no_exit
+
+no_exit:                ; preds = %no_exit, %no_exit.preheader
+        %iv.ui = phi i32 [ 0, %no_exit.preheader ], [ %iv..inc.ui, %no_exit ]           ; <i32> [#uses=1]
+        %iv. = phi i32 [ %tmp.5, %no_exit.preheader ], [ %iv..inc, %no_exit ]           ; <i32> [#uses=2]
+        %m.0.0 = phi i32 [ %tmp.5, %no_exit.preheader ], [ %tmp.24, %no_exit ]          ; <i32> [#uses=2]
+        store i32 2, i32* %tmp.16
+        %tmp.21 = sub i32 %m.0.0, %iv.          ; <i32> [#uses=1]
+        store i32 %tmp.21, i32* %data
+        %tmp.24 = add i32 %m.0.0, %tmp.9                ; <i32> [#uses=1]
+        %iv..inc = add i32 %tmp.9, %iv.         ; <i32> [#uses=1]
+        %iv..inc.ui = add i32 %iv.ui, 1         ; <i32> [#uses=2]
+        %iv..inc1 = bitcast i32 %iv..inc.ui to i32              ; <i32> [#uses=1]
+        %tmp.12 = icmp slt i32 %iv..inc1, %tmp.2                ; <i1> [#uses=1]
+        br i1 %tmp.12, label %no_exit, label %return.loopexit
+
+return.loopexit:                ; preds = %no_exit
+        br label %return
+
+return:         ; preds = %return.loopexit, %entry
+        ret void
+}
+
diff --git a/final/test/Transforms/IndVarSimplify/variable-stride-ivs-1.ll b/final/test/Transforms/IndVarSimplify/variable-stride-ivs-1.ll
new file mode 100644
index 00000000000..98cfa345962
--- /dev/null
+++ b/final/test/Transforms/IndVarSimplify/variable-stride-ivs-1.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -indvars
+; PR4315
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-undermydesk-freebsd8.0"
+	%struct.mbuf = type <{ %struct.mbuf*, i8*, i32, i8, i8, i8, i8 }>
+
+define i32 @crash(%struct.mbuf* %m) nounwind {
+entry:
+	br label %for.cond
+
+for.cond:		; preds = %if.end, %entry
+	%i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ]		; <i32> [#uses=3]
+	%chksum.0 = phi i8 [ 0, %entry ], [ %conv3, %if.end ]		; <i8> [#uses=3]
+	%cmp = icmp slt i32 %i.0, 1		; <i1> [#uses=1]
+	br i1 %cmp, label %for.body, label %do.body
+
+for.body:		; preds = %for.cond
+	br i1 undef, label %if.end, label %do.body
+
+if.end:		; preds = %for.body
+	%i.02 = trunc i32 %i.0 to i8		; <i8> [#uses=1]
+	%conv3 = add i8 %chksum.0, %i.02		; <i8> [#uses=1]
+	%inc = add i32 %i.0, 1		; <i32> [#uses=1]
+	br label %for.cond
+
+do.body:		; preds = %do.cond, %for.body, %for.cond
+	%chksum.2 = phi i8 [ undef, %do.cond ], [ %chksum.0, %for.body ], [ %chksum.0, %for.cond ]		; <i8> [#uses=1]
+	br i1 undef, label %do.cond, label %bb.nph
+
+bb.nph:		; preds = %do.body
+	br label %while.body
+
+while.body:		; preds = %while.body, %bb.nph
+	%chksum.13 = phi i8 [ undef, %while.body ], [ %chksum.2, %bb.nph ]		; <i8> [#uses=0]
+	br i1 undef, label %do.cond, label %while.body
+
+do.cond:		; preds = %while.body, %do.body
+	br i1 false, label %do.end, label %do.body
+
+do.end:		; preds = %do.cond
+	ret i32 0
+}
diff --git a/final/test/Transforms/Inline/2003-09-14-InlineValue.ll b/final/test/Transforms/Inline/2003-09-14-InlineValue.ll
new file mode 100644
index 00000000000..49a27e195e2
--- /dev/null
+++ b/final/test/Transforms/Inline/2003-09-14-InlineValue.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -inline -disable-output
+
+declare i32 @External()
+
+define internal i32 @Callee() {
+        %I = call i32 @External( )              ; <i32> [#uses=2]
+        %J = add i32 %I, %I             ; <i32> [#uses=1]
+        ret i32 %J
+}
+
+define i32 @Caller() {
+        %V = invoke i32 @Callee( )
+                        to label %Ok unwind label %Bad          ; <i32> [#uses=1]
+
+Ok:             ; preds = %0
+        ret i32 %V
+
+Bad:            ; preds = %0
+        ret i32 0
+}
+
diff --git a/final/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll b/final/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
new file mode 100644
index 00000000000..5ced3b8e8da
--- /dev/null
+++ b/final/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -inline -disable-output
+
+define i32 @main() {
+entry:
+        invoke void @__main( )
+                        to label %LongJmpBlkPre unwind label %LongJmpBlkPre
+
+LongJmpBlkPre:          ; preds = %entry, %entry
+        %i.3 = phi i32 [ 0, %entry ], [ 0, %entry ]             ; <i32> [#uses=0]
+        ret i32 0
+}
+
+define void @__main() {
+        ret void
+}
+
diff --git a/final/test/Transforms/Inline/2003-09-22-PHINodesInExceptionDest.ll b/final/test/Transforms/Inline/2003-09-22-PHINodesInExceptionDest.ll
new file mode 100644
index 00000000000..4418f77c9d0
--- /dev/null
+++ b/final/test/Transforms/Inline/2003-09-22-PHINodesInExceptionDest.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -inline -disable-output
+
+define i32 @main() {
+entry:
+        invoke void @__main( )
+                        to label %Call2Invoke unwind label %LongJmpBlkPre
+
+Call2Invoke:            ; preds = %entry
+        br label %LongJmpBlkPre
+
+LongJmpBlkPre:          ; preds = %Call2Invoke, %entry
+        %i.3 = phi i32 [ 0, %entry ], [ 0, %Call2Invoke ]               ; <i32> [#uses=0]
+        ret i32 0
+}
+
+define void @__main() {
+        call void @__llvm_getGlobalCtors( )
+        call void @__llvm_getGlobalDtors( )
+        ret void
+}
+
+declare void @__llvm_getGlobalCtors()
+
+declare void @__llvm_getGlobalDtors()
+
diff --git a/final/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll b/final/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll
new file mode 100644
index 00000000000..1bd55299a90
--- /dev/null
+++ b/final/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -inline -disable-output
+
+define i32 @main() {
+entry:
+        invoke void @__main( )
+                        to label %else unwind label %RethrowExcept
+
+else:           ; preds = %LJDecisionBB, %entry
+        %i.2 = phi i32 [ 36, %entry ], [ %i.2, %LJDecisionBB ]          ; <i32> [#uses=1]
+        br label %LJDecisionBB
+
+LJDecisionBB:           ; preds = %else
+        br label %else
+
+RethrowExcept:          ; preds = %entry
+        ret i32 0
+}
+
+define void @__main() {
+        ret void
+}
+
+
diff --git a/final/test/Transforms/Inline/2003-10-13-AllocaDominanceProblem.ll b/final/test/Transforms/Inline/2003-10-13-AllocaDominanceProblem.ll
new file mode 100644
index 00000000000..4a80d37c097
--- /dev/null
+++ b/final/test/Transforms/Inline/2003-10-13-AllocaDominanceProblem.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -inline -disable-output
+
+define i32 @reload() {
+reloadentry:
+        br label %A
+
+A:              ; preds = %reloadentry
+        call void @callee( )
+        ret i32 0
+}
+
+define internal void @callee() {
+entry:
+        %X = alloca i8, i32 0           ; <i8*> [#uses=0]
+        %Y = bitcast i32 0 to i32               ; <i32> [#uses=1]
+        %Z = alloca i8, i32 %Y          ; <i8*> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/Transforms/Inline/2003-10-26-InlineInvokeExceptionDestPhi.ll b/final/test/Transforms/Inline/2003-10-26-InlineInvokeExceptionDestPhi.ll
new file mode 100644
index 00000000000..9afd45040ca
--- /dev/null
+++ b/final/test/Transforms/Inline/2003-10-26-InlineInvokeExceptionDestPhi.ll
@@ -0,0 +1,20 @@
+; The inliner is breaking inlining invoke instructions where there is a PHI 
+; node in the exception destination, and the inlined function contains an 
+; unwind instruction.
+
+; RUN: opt < %s -inline -disable-output
+
+define linkonce void @foo() {
+        unwind
+}
+
+define i32 @test() {
+BB1:
+        invoke void @foo( )
+                        to label %Cont unwind label %Cont
+
+Cont:           ; preds = %BB1, %BB1
+        %A = phi i32 [ 0, %BB1 ], [ 0, %BB1 ]           ; <i32> [#uses=1]
+        ret i32 %A
+}
+
diff --git a/final/test/Transforms/Inline/2004-04-15-InlineDeletesCall.ll b/final/test/Transforms/Inline/2004-04-15-InlineDeletesCall.ll
new file mode 100644
index 00000000000..38994519cea
--- /dev/null
+++ b/final/test/Transforms/Inline/2004-04-15-InlineDeletesCall.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -inline -disable-output
+
+; Inlining the first call caused the inliner function to delete the second
+; call.  Then the inliner tries to inline the second call, which no longer
+; exists.
+
+define internal void @Callee1() {
+        unwind
+}
+
+define void @Callee2() {
+        ret void
+}
+
+define void @caller() {
+        call void @Callee1( )
+        call void @Callee2( )
+        ret void
+}
+
diff --git a/final/test/Transforms/Inline/2004-04-20-InlineLinkOnce.ll b/final/test/Transforms/Inline/2004-04-20-InlineLinkOnce.ll
new file mode 100644
index 00000000000..fabad30bb5a
--- /dev/null
+++ b/final/test/Transforms/Inline/2004-04-20-InlineLinkOnce.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -inline -prune-eh -disable-output
+
+define linkonce void @caller() {
+        call void @callee( )
+        ret void
+}
+
+define linkonce void @callee() {
+        ret void
+}
+
diff --git a/final/test/Transforms/Inline/2004-10-17-InlineFunctionWithoutReturn.ll b/final/test/Transforms/Inline/2004-10-17-InlineFunctionWithoutReturn.ll
new file mode 100644
index 00000000000..733cbb9c754
--- /dev/null
+++ b/final/test/Transforms/Inline/2004-10-17-InlineFunctionWithoutReturn.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -inline -disable-output
+
+define i32 @test() {
+        unwind
+}
+
+define i32 @caller() {
+        %X = call i32 @test( )          ; <i32> [#uses=1]
+        ret i32 %X
+}
+
diff --git a/final/test/Transforms/Inline/2006-01-14-CallGraphUpdate.ll b/final/test/Transforms/Inline/2006-01-14-CallGraphUpdate.ll
new file mode 100644
index 00000000000..415495eb515
--- /dev/null
+++ b/final/test/Transforms/Inline/2006-01-14-CallGraphUpdate.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -inline -prune-eh -disable-output
+
+        %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>" = type { %"struct.std::locale::facet" }
+        %"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >" = type { i32 (...)**, i32*, i32*, i32*, i32*, i32*, i32*, %"struct.std::locale" }
+        %"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
+        %"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+        %"struct.std::ios_base::_Words" = type { i8*, i32 }
+        %"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+        %"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+        %"struct.std::locale::facet" = type { i32 (...)**, i32 }
+        %"struct.std::ostreambuf_iterator<wchar_t,std::char_traits<wchar_t> >" = type { %"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >"*, i32 }
+
+define void @_ZNKSt7num_putIwSt19ostreambuf_iteratorIwSt11char_traitsIwEEE6do_putES3_RSt8ios_basewl(%"struct.std::ostreambuf_iterator<wchar_t,std::char_traits<wchar_t> >"* %agg.result, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"* %this, %"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >"* %__s.0__, i32 %__s.1__, %"struct.std::ios_base"* %__io, i32 %__fill, i32 %__v) {
+entry:
+        tail call fastcc void @_ZNKSt7num_putIwSt19ostreambuf_iteratorIwSt11char_traitsIwEEE13_M_insert_intIlEES3_S3_RSt8ios_basewT_( )
+        ret void
+}
+
+define fastcc void @_ZNKSt7num_putIwSt19ostreambuf_iteratorIwSt11char_traitsIwEEE13_M_insert_intIlEES3_S3_RSt8ios_basewT_() {
+entry:
+        %tmp.38 = shl i32 0, 3          ; <i32> [#uses=1]
+        %tmp.39 = alloca i8, i32 %tmp.38                ; <i8*> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/Transforms/Inline/2006-07-12-InlinePruneCGUpdate.ll b/final/test/Transforms/Inline/2006-07-12-InlinePruneCGUpdate.ll
new file mode 100644
index 00000000000..69345627221
--- /dev/null
+++ b/final/test/Transforms/Inline/2006-07-12-InlinePruneCGUpdate.ll
@@ -0,0 +1,840 @@
+; RUN: opt < %s -inline -prune-eh -disable-output
+; PR827
+@_ZTV8CRjii = internal global [1 x i32 (...)*] [ i32 (...)* @_ZN8CRjii12NlFeeEPN5Jr7sE ]		; <[1 x i32 (...)*]*> [#uses=0]
+
+define internal i32 @_ZN8CRjii12NlFeeEPN5Jr7sE(...) {
+entry:
+	br i1 false, label %cond_true, label %cond_false179
+
+cond_true:		; preds = %entry
+	br label %bb9
+
+bb:		; preds = %cond_true14
+	br label %bb9
+
+bb9:		; preds = %bb, %cond_true
+	br i1 false, label %cond_true14, label %cond_false
+
+cond_true14:		; preds = %bb9
+	br label %bb
+
+cond_false:		; preds = %bb9
+	br label %bb15
+
+cond_next:		; No predecessors!
+	br label %bb15
+
+bb15:		; preds = %cond_next, %cond_false
+	br label %bb24
+
+bb17:		; preds = %cond_true29
+	br label %bb24
+
+bb24:		; preds = %bb17, %bb15
+	br i1 false, label %cond_true29, label %cond_false30
+
+cond_true29:		; preds = %bb24
+	br label %bb17
+
+cond_false30:		; preds = %bb24
+	br label %bb32
+
+cond_next31:		; No predecessors!
+	br label %bb32
+
+bb32:		; preds = %cond_next31, %cond_false30
+	br label %bb41
+
+bb34:		; preds = %cond_true46
+	br label %bb41
+
+bb41:		; preds = %bb34, %bb32
+	br i1 false, label %cond_true46, label %cond_false47
+
+cond_true46:		; preds = %bb41
+	br label %bb34
+
+cond_false47:		; preds = %bb41
+	br label %bb49
+
+cond_next48:		; No predecessors!
+	br label %bb49
+
+bb49:		; preds = %cond_next48, %cond_false47
+	br label %bb58
+
+bb51:		; preds = %cond_true63
+	br label %bb58
+
+bb58:		; preds = %bb51, %bb49
+	br i1 false, label %cond_true63, label %cond_false64
+
+cond_true63:		; preds = %bb58
+	br label %bb51
+
+cond_false64:		; preds = %bb58
+	br label %bb66
+
+cond_next65:		; No predecessors!
+	br label %bb66
+
+bb66:		; preds = %cond_next65, %cond_false64
+	br label %bb76
+
+bb68:		; preds = %cond_true81
+	br label %bb76
+
+bb76:		; preds = %bb68, %bb66
+	br i1 false, label %cond_true81, label %cond_false82
+
+cond_true81:		; preds = %bb76
+	br label %bb68
+
+cond_false82:		; preds = %bb76
+	br label %bb84
+
+cond_next83:		; No predecessors!
+	br label %bb84
+
+bb84:		; preds = %cond_next83, %cond_false82
+	br label %bb94
+
+bb86:		; preds = %cond_true99
+	br label %bb94
+
+bb94:		; preds = %bb86, %bb84
+	br i1 false, label %cond_true99, label %cond_false100
+
+cond_true99:		; preds = %bb94
+	br label %bb86
+
+cond_false100:		; preds = %bb94
+	br label %bb102
+
+cond_next101:		; No predecessors!
+	br label %bb102
+
+bb102:		; preds = %cond_next101, %cond_false100
+	br label %bb112
+
+bb104:		; preds = %cond_true117
+	br label %bb112
+
+bb112:		; preds = %bb104, %bb102
+	br i1 false, label %cond_true117, label %cond_false118
+
+cond_true117:		; preds = %bb112
+	br label %bb104
+
+cond_false118:		; preds = %bb112
+	br label %bb120
+
+cond_next119:		; No predecessors!
+	br label %bb120
+
+bb120:		; preds = %cond_next119, %cond_false118
+	br label %bb130
+
+bb122:		; preds = %cond_true135
+	br label %bb130
+
+bb130:		; preds = %bb122, %bb120
+	br i1 false, label %cond_true135, label %cond_false136
+
+cond_true135:		; preds = %bb130
+	br label %bb122
+
+cond_false136:		; preds = %bb130
+	br label %bb138
+
+cond_next137:		; No predecessors!
+	br label %bb138
+
+bb138:		; preds = %cond_next137, %cond_false136
+	br label %bb148
+
+bb140:		; preds = %cond_true153
+	call fastcc void @_Zjrf1( )
+	br label %bb148
+
+bb148:		; preds = %bb140, %bb138
+	br i1 false, label %cond_true153, label %cond_false154
+
+cond_true153:		; preds = %bb148
+	br label %bb140
+
+cond_false154:		; preds = %bb148
+	br label %bb156
+
+cond_next155:		; No predecessors!
+	br label %bb156
+
+bb156:		; preds = %cond_next155, %cond_false154
+	br label %bb166
+
+bb158:		; preds = %cond_true171
+	br label %bb166
+
+bb166:		; preds = %bb158, %bb156
+	br i1 false, label %cond_true171, label %cond_false172
+
+cond_true171:		; preds = %bb166
+	br label %bb158
+
+cond_false172:		; preds = %bb166
+	br label %bb174
+
+cond_next173:		; No predecessors!
+	br label %bb174
+
+bb174:		; preds = %cond_next173, %cond_false172
+	br label %cleanup
+
+cleanup:		; preds = %bb174
+	br label %finally
+
+finally:		; preds = %cleanup
+	br label %cond_next180
+
+cond_false179:		; preds = %entry
+	br label %cond_next180
+
+cond_next180:		; preds = %cond_false179, %finally
+	br label %return
+
+return:		; preds = %cond_next180
+	ret i32 0
+}
+
+define internal fastcc void @_Zjrf2() {
+entry:
+	br label %bb3
+
+bb:		; preds = %cond_true
+	br label %bb3
+
+bb3:		; preds = %bb, %entry
+	%tmp5 = load i8** null		; <i8*> [#uses=1]
+	%tmp = icmp ne i8* null, %tmp5		; <i1> [#uses=1]
+	br i1 %tmp, label %cond_true, label %cond_false
+
+cond_true:		; preds = %bb3
+	br label %bb
+
+cond_false:		; preds = %bb3
+	br label %bb6
+
+cond_next:		; No predecessors!
+	br label %bb6
+
+bb6:		; preds = %cond_next, %cond_false
+	br label %return
+
+return:		; preds = %bb6
+	ret void
+}
+
+define internal fastcc void @_Zjrf3() {
+entry:
+	call fastcc void @_Zjrf2( )
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define internal fastcc void @_Zjrf4() {
+entry:
+	br label %bb6
+
+bb:		; preds = %cond_true
+	br label %bb6
+
+bb6:		; preds = %bb, %entry
+	br i1 false, label %cond_true, label %cond_false
+
+cond_true:		; preds = %bb6
+	br label %bb
+
+cond_false:		; preds = %bb6
+	br label %bb8
+
+cond_next:		; No predecessors!
+	br label %bb8
+
+bb8:		; preds = %cond_next, %cond_false
+	br i1 false, label %cond_true9, label %cond_false12
+
+cond_true9:		; preds = %bb8
+	call fastcc void @_Zjrf3( )
+	br label %cond_next13
+
+cond_false12:		; preds = %bb8
+	br label %cond_next13
+
+cond_next13:		; preds = %cond_false12, %cond_true9
+	br label %return
+
+return:		; preds = %cond_next13
+	ret void
+}
+
+define internal fastcc void @_Zjrf5() {
+entry:
+	call fastcc void @_Zjrf4( )
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define internal fastcc void @_Zjrf6() {
+entry:
+	call fastcc void @_Zjrf5( )
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define internal fastcc void @_Zjrf7() {
+entry:
+	br label %cleanup
+
+cleanup:		; preds = %entry
+	br label %finally
+
+finally:		; preds = %cleanup
+	call fastcc void @_Zjrf6( )
+	br label %cleanup9
+
+cleanup9:		; preds = %finally
+	br label %finally8
+
+finally8:		; preds = %cleanup9
+	br label %cleanup11
+
+cleanup11:		; preds = %finally8
+	br label %finally10
+
+finally10:		; preds = %cleanup11
+	br label %finally23
+
+finally23:		; preds = %finally10
+	br label %return
+
+return:		; preds = %finally23
+	ret void
+}
+
+define internal fastcc void @_Zjrf11() {
+entry:
+	br label %bb7
+
+bb:		; preds = %cond_true
+	br label %bb7
+
+bb7:		; preds = %bb, %entry
+	br i1 false, label %cond_true, label %cond_false
+
+cond_true:		; preds = %bb7
+	br label %bb
+
+cond_false:		; preds = %bb7
+	br label %bb9
+
+cond_next:		; No predecessors!
+	br label %bb9
+
+bb9:		; preds = %cond_next, %cond_false
+	br label %return
+		; No predecessors!
+	br i1 false, label %cond_true12, label %cond_false15
+
+cond_true12:		; preds = %0
+	call fastcc void @_Zjrf3( )
+	br label %cond_next16
+
+cond_false15:		; preds = %0
+	br label %cond_next16
+
+cond_next16:		; preds = %cond_false15, %cond_true12
+	br label %return
+
+return:		; preds = %cond_next16, %bb9
+	ret void
+}
+
+define internal fastcc void @_Zjrf9() {
+entry:
+	call fastcc void @_Zjrf11( )
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define internal fastcc void @_Zjrf10() {
+entry:
+	call fastcc void @_Zjrf9( )
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define internal fastcc void @_Zjrf8() {
+entry:
+	br i1 false, label %cond_true, label %cond_false201
+
+cond_true:		; preds = %entry
+	br i1 false, label %cond_true36, label %cond_false
+
+cond_true36:		; preds = %cond_true
+	br label %cleanup
+
+cleanup:		; preds = %cond_true36
+	br label %finally
+
+finally:		; preds = %cleanup
+	br label %cond_next189
+
+cond_false:		; preds = %cond_true
+	br i1 false, label %cond_true99, label %cond_false137
+
+cond_true99:		; preds = %cond_false
+	br label %cleanup136
+
+cleanup136:		; preds = %cond_true99
+	br label %finally135
+
+finally135:		; preds = %cleanup136
+	br label %cond_next
+
+cond_false137:		; preds = %cond_false
+	call fastcc void @_Zjrf10( )
+	br label %cleanup188
+
+cleanup188:		; preds = %cond_false137
+	br label %finally187
+
+finally187:		; preds = %cleanup188
+	br label %cond_next
+
+cond_next:		; preds = %finally187, %finally135
+	br label %cond_next189
+
+cond_next189:		; preds = %cond_next, %finally
+	br label %cond_next202
+
+cond_false201:		; preds = %entry
+	br label %cond_next202
+
+cond_next202:		; preds = %cond_false201, %cond_next189
+	br label %return
+
+return:		; preds = %cond_next202
+	ret void
+}
+
+define internal fastcc void @_Zjrf1() {
+entry:
+	br label %bb492
+
+bb:		; preds = %cond_true499
+	br label %cleanup
+
+cleanup:		; preds = %bb
+	br label %finally
+
+finally:		; preds = %cleanup
+	br label %cleanup11
+
+cleanup11:		; preds = %finally
+	br label %finally10
+
+finally10:		; preds = %cleanup11
+	br i1 false, label %cond_true, label %cond_false286
+
+cond_true:		; preds = %finally10
+	br label %cleanup26
+
+cleanup26:		; preds = %cond_true
+	br label %finally25
+
+finally25:		; preds = %cleanup26
+	br label %bb30
+
+bb27:		; preds = %cond_true37
+	br label %bb30
+
+bb30:		; preds = %bb27, %finally25
+	br i1 false, label %cond_true37, label %cond_false
+
+cond_true37:		; preds = %bb30
+	br label %bb27
+
+cond_false:		; preds = %bb30
+	br label %bb38
+
+cond_next:		; No predecessors!
+	br label %bb38
+
+bb38:		; preds = %cond_next, %cond_false
+	br label %bb148
+
+bb40:		; preds = %cond_true156
+	br label %bb139
+
+bb41:		; preds = %cond_true142
+	call fastcc void @_Zjrf7( )
+	br label %bb105
+
+bb44:		; preds = %cond_true112
+	br label %bb74
+
+bb66:		; preds = %cond_true80
+	br label %bb74
+
+bb74:		; preds = %bb66, %bb44
+	br i1 false, label %cond_true80, label %cond_false81
+
+cond_true80:		; preds = %bb74
+	br label %bb66
+
+cond_false81:		; preds = %bb74
+	br label %bb83
+
+cond_next82:		; No predecessors!
+	br label %bb83
+
+bb83:		; preds = %cond_next82, %cond_false81
+	br label %cleanup97
+
+cleanup97:		; preds = %bb83
+	br label %finally96
+
+finally96:		; preds = %cleanup97
+	br label %cleanup99
+
+cleanup99:		; preds = %finally96
+	br label %finally98
+
+finally98:		; preds = %cleanup99
+	br label %bb105
+
+bb105:		; preds = %finally98, %bb41
+	br i1 false, label %cond_true112, label %cond_false113
+
+cond_true112:		; preds = %bb105
+	br label %bb44
+
+cond_false113:		; preds = %bb105
+	br label %bb115
+
+cond_next114:		; No predecessors!
+	br label %bb115
+
+bb115:		; preds = %cond_next114, %cond_false113
+	br i1 false, label %cond_true119, label %cond_false123
+
+cond_true119:		; preds = %bb115
+	call fastcc void @_Zjrf8( )
+	br label %cond_next124
+
+cond_false123:		; preds = %bb115
+	br label %cond_next124
+
+cond_next124:		; preds = %cond_false123, %cond_true119
+	br i1 false, label %cond_true131, label %cond_false132
+
+cond_true131:		; preds = %cond_next124
+	br label %cleanup135
+
+cond_false132:		; preds = %cond_next124
+	br label %cond_next133
+
+cond_next133:		; preds = %cond_false132
+	br label %cleanup136
+
+cleanup135:		; preds = %cond_true131
+	br label %done
+
+cleanup136:		; preds = %cond_next133
+	br label %finally134
+
+finally134:		; preds = %cleanup136
+	br label %bb139
+
+bb139:		; preds = %finally134, %bb40
+	br i1 false, label %cond_true142, label %cond_false143
+
+cond_true142:		; preds = %bb139
+	br label %bb41
+
+cond_false143:		; preds = %bb139
+	br label %bb145
+
+cond_next144:		; No predecessors!
+	br label %bb145
+
+bb145:		; preds = %cond_next144, %cond_false143
+	br label %bb148
+
+bb148:		; preds = %bb145, %bb38
+	br i1 false, label %cond_true156, label %cond_false157
+
+cond_true156:		; preds = %bb148
+	br label %bb40
+
+cond_false157:		; preds = %bb148
+	br label %bb159
+
+cond_next158:		; No predecessors!
+	br label %bb159
+
+bb159:		; preds = %cond_next158, %cond_false157
+	br label %done
+
+done:		; preds = %bb159, %cleanup135
+	br label %bb214
+
+bb185:		; preds = %cond_true218
+	br i1 false, label %cond_true193, label %cond_false206
+
+cond_true193:		; preds = %bb185
+	br label %cond_next211
+
+cond_false206:		; preds = %bb185
+	br label %cond_next211
+
+cond_next211:		; preds = %cond_false206, %cond_true193
+	br label %bb214
+
+bb214:		; preds = %cond_next211, %done
+	br i1 false, label %cond_true218, label %cond_false219
+
+cond_true218:		; preds = %bb214
+	br label %bb185
+
+cond_false219:		; preds = %bb214
+	br label %bb221
+
+cond_next220:		; No predecessors!
+	br label %bb221
+
+bb221:		; preds = %cond_next220, %cond_false219
+	br i1 false, label %cond_true236, label %cond_false245
+
+cond_true236:		; preds = %bb221
+	br label %cond_next249
+
+cond_false245:		; preds = %bb221
+	br label %cond_next249
+
+cond_next249:		; preds = %cond_false245, %cond_true236
+	br i1 false, label %cond_true272, label %cond_false277
+
+cond_true272:		; preds = %cond_next249
+	br label %cond_next278
+
+cond_false277:		; preds = %cond_next249
+	br label %cond_next278
+
+cond_next278:		; preds = %cond_false277, %cond_true272
+	br label %cleanup285
+
+cleanup285:		; preds = %cond_next278
+	br label %finally284
+
+finally284:		; preds = %cleanup285
+	br label %cond_next287
+
+cond_false286:		; preds = %finally10
+	br label %cond_next287
+
+cond_next287:		; preds = %cond_false286, %finally284
+	br i1 false, label %cond_true317, label %cond_false319
+
+cond_true317:		; preds = %cond_next287
+	br label %cond_next321
+
+cond_false319:		; preds = %cond_next287
+	br label %cond_next321
+
+cond_next321:		; preds = %cond_false319, %cond_true317
+	br label %bb348
+
+bb335:		; preds = %cond_true355
+	br label %bb348
+
+bb348:		; preds = %bb335, %cond_next321
+	br i1 false, label %cond_true355, label %cond_false356
+
+cond_true355:		; preds = %bb348
+	br label %bb335
+
+cond_false356:		; preds = %bb348
+	br label %bb358
+
+cond_next357:		; No predecessors!
+	br label %bb358
+
+bb358:		; preds = %cond_next357, %cond_false356
+	br i1 false, label %cond_true363, label %cond_false364
+
+cond_true363:		; preds = %bb358
+	br label %bb388
+
+cond_false364:		; preds = %bb358
+	br label %cond_next365
+
+cond_next365:		; preds = %cond_false364
+	br i1 false, label %cond_true370, label %cond_false371
+
+cond_true370:		; preds = %cond_next365
+	br label %bb388
+
+cond_false371:		; preds = %cond_next365
+	br label %cond_next372
+
+cond_next372:		; preds = %cond_false371
+	br i1 false, label %cond_true385, label %cond_false386
+
+cond_true385:		; preds = %cond_next372
+	br label %bb388
+
+cond_false386:		; preds = %cond_next372
+	br label %cond_next387
+
+cond_next387:		; preds = %cond_false386
+	br label %bb389
+
+bb388:		; preds = %cond_true385, %cond_true370, %cond_true363
+	br label %bb389
+
+bb389:		; preds = %bb388, %cond_next387
+	br i1 false, label %cond_true392, label %cond_false443
+
+cond_true392:		; preds = %bb389
+	br label %bb419
+
+bb402:		; preds = %cond_true425
+	br i1 false, label %cond_true406, label %cond_false412
+
+cond_true406:		; preds = %bb402
+	br label %cond_next416
+
+cond_false412:		; preds = %bb402
+	br label %cond_next416
+
+cond_next416:		; preds = %cond_false412, %cond_true406
+	br label %bb419
+
+bb419:		; preds = %cond_next416, %cond_true392
+	br i1 false, label %cond_true425, label %cond_false426
+
+cond_true425:		; preds = %bb419
+	br label %bb402
+
+cond_false426:		; preds = %bb419
+	br label %bb428
+
+cond_next427:		; No predecessors!
+	br label %bb428
+
+bb428:		; preds = %cond_next427, %cond_false426
+	br label %cond_next478
+
+cond_false443:		; preds = %bb389
+	br label %bb460
+
+bb450:		; preds = %cond_true466
+	br label %bb460
+
+bb460:		; preds = %bb450, %cond_false443
+	br i1 false, label %cond_true466, label %cond_false467
+
+cond_true466:		; preds = %bb460
+	br label %bb450
+
+cond_false467:		; preds = %bb460
+	br label %bb469
+
+cond_next468:		; No predecessors!
+	br label %bb469
+
+bb469:		; preds = %cond_next468, %cond_false467
+	br label %cond_next478
+
+cond_next478:		; preds = %bb469, %bb428
+	br label %cleanup485
+
+cleanup485:		; preds = %cond_next478
+	br label %finally484
+
+finally484:		; preds = %cleanup485
+	br label %cleanup487
+
+cleanup487:		; preds = %finally484
+	br label %finally486
+
+finally486:		; preds = %cleanup487
+	br label %cleanup489
+
+cleanup489:		; preds = %finally486
+	br label %finally488
+
+finally488:		; preds = %cleanup489
+	br label %bb492
+
+bb492:		; preds = %finally488, %entry
+	br i1 false, label %cond_true499, label %cond_false500
+
+cond_true499:		; preds = %bb492
+	br label %bb
+
+cond_false500:		; preds = %bb492
+	br label %bb502
+
+cond_next501:		; No predecessors!
+	br label %bb502
+
+bb502:		; preds = %cond_next501, %cond_false500
+	br label %return
+
+return:		; preds = %bb502
+	ret void
+}
+
+define internal fastcc void @_ZSt26__unguarded_insertion_sortIN9__gnu_cxx17__normal_iteratorIPSsSt6vectorISsSaISsEEEEEvT_S7_() {
+entry:
+	br label %bb12
+
+bb:		; preds = %cond_true
+	br label %cleanup
+
+cleanup:		; preds = %bb
+	br label %finally
+
+finally:		; preds = %cleanup
+	br label %bb12
+
+bb12:		; preds = %finally, %entry
+	br i1 false, label %cond_true, label %cond_false
+
+cond_true:		; preds = %bb12
+	br label %bb
+
+cond_false:		; preds = %bb12
+	br label %bb14
+
+cond_next:		; No predecessors!
+	br label %bb14
+
+bb14:		; preds = %cond_next, %cond_false
+	br label %return
+
+return:		; preds = %bb14
+	ret void
+}
diff --git a/final/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll b/final/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll
new file mode 100644
index 00000000000..37cba9801ca
--- /dev/null
+++ b/final/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll
@@ -0,0 +1,245 @@
+; RUN: opt < %s -inline -prune-eh -disable-output
+; PR993
+target datalayout = "e-p:32:32"
+target triple = "i386-unknown-openbsd3.9"
+deplibs = [ "stdc++", "c", "crtend" ]
+	%"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" = type { i8* }
+	%"struct.__gnu_cxx::char_producer<char>" = type { i32 (...)** }
+	%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.__sbuf = type { i8*, i32 }
+	%"struct.std::__basic_file<char>" = type { %struct.__sFILE*, i1 }
+	%"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>" = type { %"struct.std::locale::facet" }
+	%"struct.std::bad_alloc" = type { %"struct.__gnu_cxx::char_producer<char>" }
+	%"struct.std::basic_filebuf<char,std::char_traits<char> >" = type { %"struct.std::basic_streambuf<char,std::char_traits<char> >", i32, %"struct.std::__basic_file<char>", i32, %union.__mbstate_t, %union.__mbstate_t, i8*, i32, i1, i1, i1, i1, i8, i8*, i8*, i1, %"struct.std::codecvt<char,char,__mbstate_t>"*, i8*, i32, i8*, i8* }
+	%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i1, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"* }
+	%"struct.std::basic_iostream<char,std::char_traits<char> >" = type { %"struct.std::locale::facet", %"struct.__gnu_cxx::char_producer<char>", %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_ofstream<char,std::char_traits<char> >" = type { %"struct.__gnu_cxx::char_producer<char>", %"struct.std::basic_filebuf<char,std::char_traits<char> >", %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" }
+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" }
+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" = type { i32, i32, i32 }
+	%"struct.std::codecvt<char,char,__mbstate_t>" = type { %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>", i32* }
+	%"struct.std::ctype<char>" = type { %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>", i32*, i1, i32*, i32*, i32* }
+	%"struct.std::domain_error" = type { %"struct.std::logic_error" }
+	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %struct.__sbuf, [8 x %struct.__sbuf], i32, %struct.__sbuf*, %"struct.std::locale" }
+	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+	%"struct.std::ios_base::_Words" = type { i8*, i32 }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+	%"struct.std::logic_error" = type { %"struct.__gnu_cxx::char_producer<char>", %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" }
+	%union.__mbstate_t = type { i64, [120 x i8] }
+@.str_1 = external global [17 x i8]		; <[17 x i8]*> [#uses=0]
+@.str_9 = external global [24 x i8]		; <[24 x i8]*> [#uses=0]
+
+define void @main() {
+entry:
+	call fastcc void @_ZNSt14basic_ofstreamIcSt11char_traitsIcEE4openEPKcSt13_Ios_Openmode( )
+	ret void
+}
+
+define fastcc void @_ZNSt14basic_ofstreamIcSt11char_traitsIcEE4openEPKcSt13_Ios_Openmode() {
+entry:
+	%tmp.6 = icmp eq %"struct.std::basic_filebuf<char,std::char_traits<char> >"* null, null		; <i1> [#uses=1]
+	br i1 %tmp.6, label %then, label %UnifiedReturnBlock
+
+then:		; preds = %entry
+	tail call fastcc void @_ZNSt9basic_iosIcSt11char_traitsIcEE8setstateESt12_Ios_Iostate( )
+	ret void
+
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
+
+define fastcc void @_ZN10__cxxabiv111__terminateEPFvvE() {
+entry:
+	unreachable
+}
+
+define void @_ZNSdD0Ev() {
+entry:
+	unreachable
+}
+
+define void @_ZThn8_NSdD1Ev() {
+entry:
+	ret void
+}
+
+define void @_ZNSt13basic_filebufIcSt11char_traitsIcEED0Ev() {
+entry:
+	ret void
+}
+
+define void @_ZNSt13basic_filebufIcSt11char_traitsIcEE9pbackfailEi() {
+entry:
+	unreachable
+}
+
+define fastcc void @_ZNSoD2Ev() {
+entry:
+	unreachable
+}
+
+define fastcc void @_ZNSt9basic_iosIcSt11char_traitsIcEED2Ev() {
+entry:
+	unreachable
+}
+
+define fastcc void @_ZNSt9basic_iosIcSt11char_traitsIcEE8setstateESt12_Ios_Iostate() {
+entry:
+	tail call fastcc void @_ZSt19__throw_ios_failurePKc( )
+	ret void
+}
+
+declare fastcc void @_ZNSaIcED1Ev()
+
+define fastcc void @_ZNSsC1EPKcRKSaIcE() {
+entry:
+	tail call fastcc void @_ZNSs16_S_construct_auxIPKcEEPcT_S3_RKSaIcE12__false_type( )
+	unreachable
+}
+
+define fastcc void @_ZSt14__convert_to_vIyEvPKcRT_RSt12_Ios_IostateRKPii() {
+entry:
+	ret void
+}
+
+define fastcc void @_ZNSt7num_getIcSt19istreambuf_iteratorIcSt11char_traitsIcEEEC1Ej() {
+entry:
+	ret void
+}
+
+define fastcc void @_ZSt19__throw_ios_failurePKc() {
+entry:
+	call fastcc void @_ZNSsC1EPKcRKSaIcE( )
+	unwind
+}
+
+define void @_GLOBAL__D__ZSt23lexicographical_compareIPKaS1_EbT_S2_T0_S3_() {
+entry:
+	ret void
+}
+
+define void @_ZNSt9bad_allocD1Ev() {
+entry:
+	unreachable
+}
+
+define fastcc void @_ZSt19__throw_logic_errorPKc() {
+entry:
+	invoke fastcc void @_ZNSt11logic_errorC1ERKSs( )
+			to label %try_exit.0 unwind label %try_catch.0
+
+try_catch.0:		; preds = %entry
+	unreachable
+
+try_exit.0:		; preds = %entry
+	unwind
+}
+
+define fastcc void @_ZNSt11logic_errorC1ERKSs() {
+entry:
+	call fastcc void @_ZNSsC1ERKSs( )
+	ret void
+}
+
+define void @_ZNSt12domain_errorD1Ev() {
+entry:
+	unreachable
+}
+
+define fastcc void @_ZSt20__throw_length_errorPKc() {
+entry:
+	call fastcc void @_ZNSt12length_errorC1ERKSs( )
+	unwind
+}
+
+define fastcc void @_ZNSt12length_errorC1ERKSs() {
+entry:
+	invoke fastcc void @_ZNSsC1ERKSs( )
+			to label %_ZNSt11logic_errorC2ERKSs.exit unwind label %invoke_catch.i
+
+invoke_catch.i:		; preds = %entry
+	unwind
+
+_ZNSt11logic_errorC2ERKSs.exit:		; preds = %entry
+	ret void
+}
+
+define fastcc void @_ZNSs4_Rep9_S_createEjRKSaIcE() {
+entry:
+	call fastcc void @_ZSt20__throw_length_errorPKc( )
+	unreachable
+}
+
+define fastcc void @_ZNSs12_S_constructIN9__gnu_cxx17__normal_iteratorIPcSsEEEES2_T_S4_RKSaIcESt20forward_iterator_tag() {
+entry:
+	unreachable
+}
+
+define fastcc void @_ZNSs16_S_construct_auxIPKcEEPcT_S3_RKSaIcE12__false_type() {
+entry:
+	br i1 false, label %then.1.i, label %endif.1.i
+
+then.1.i:		; preds = %entry
+	call fastcc void @_ZSt19__throw_logic_errorPKc( )
+	br label %endif.1.i
+
+endif.1.i:		; preds = %then.1.i, %entry
+	call fastcc void @_ZNSs4_Rep9_S_createEjRKSaIcE( )
+	unreachable
+}
+
+define fastcc void @_ZNSsC1ERKSs() {
+entry:
+	call fastcc void @_ZNSs4_Rep7_M_grabERKSaIcES2_( )
+	invoke fastcc void @_ZNSaIcEC1ERKS_( )
+			to label %invoke_cont.1 unwind label %invoke_catch.1
+
+invoke_catch.1:		; preds = %entry
+	call fastcc void @_ZNSaIcED1Ev( )
+	unwind
+
+invoke_cont.1:		; preds = %entry
+	call fastcc void @_ZNSaIcEC2ERKS_( )
+	ret void
+}
+
+define fastcc void @_ZNSaIcEC1ERKS_() {
+entry:
+	ret void
+}
+
+define fastcc void @_ZNSs7replaceEN9__gnu_cxx17__normal_iteratorIPcSsEES2_jc() {
+entry:
+	ret void
+}
+
+define fastcc void @_ZNSs4_Rep7_M_grabERKSaIcES2_() {
+entry:
+	br i1 false, label %else.i, label %cond_true
+
+cond_true:		; preds = %entry
+	ret void
+
+else.i:		; preds = %entry
+	tail call fastcc void @_ZNSs4_Rep9_S_createEjRKSaIcE( )
+	unreachable
+}
+
+define fastcc void @_ZNSaIcEC2ERKS_() {
+entry:
+	ret void
+}
+
+define fastcc void @_ZN9__gnu_cxx12__pool_allocILb1ELi0EE8allocateEj() {
+entry:
+	ret void
+}
+
+define fastcc void @_ZN9__gnu_cxx12__pool_allocILb1ELi0EE9_S_refillEj() {
+entry:
+	unreachable
+}
diff --git a/final/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll b/final/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll
new file mode 100644
index 00000000000..279823a4b28
--- /dev/null
+++ b/final/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll
@@ -0,0 +1,338 @@
+; RUN: opt < %s -inline -prune-eh -disable-output
+; PR992
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+deplibs = [ "stdc++", "c", "crtend" ]
+	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+	%"struct.__cxxabiv1::__array_type_info" = type { %"struct.std::type_info" }
+	%"struct.__cxxabiv1::__si_class_type_info" = type { %"struct.__cxxabiv1::__array_type_info", %"struct.__cxxabiv1::__array_type_info"* }
+	%"struct.__gnu_cxx::_Rope_rep_alloc_base<char,std::allocator<char>, true>" = type { i32 }
+	%"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" = type { i8* }
+	%"struct.__gnu_cxx::__normal_iterator<const wchar_t*,std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > >" = type { i32* }
+	%"struct.__gnu_cxx::char_producer<char>" = type { i32 (...)** }
+	%"struct.__gnu_cxx::stdio_sync_filebuf<char,std::char_traits<char> >" = type { %"struct.std::basic_streambuf<char,std::char_traits<char> >", %struct._IO_FILE*, i32 }
+	%"struct.__gnu_cxx::stdio_sync_filebuf<wchar_t,std::char_traits<wchar_t> >" = type { %"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >", %struct._IO_FILE*, i32 }
+	%struct.__locale_struct = type { [13 x %struct.locale_data*], i16*, i32*, i32*, [13 x i8*] }
+	%struct.__mbstate_t = type { i32, %"struct.__gnu_cxx::_Rope_rep_alloc_base<char,std::allocator<char>, true>" }
+	%struct.locale_data = type opaque
+	%"struct.std::__basic_file<char>" = type { %struct._IO_FILE*, i1 }
+	%"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>" = type { %"struct.std::locale::facet" }
+	%"struct.std::basic_filebuf<char,std::char_traits<char> >" = type { %"struct.std::basic_streambuf<char,std::char_traits<char> >", i32, %"struct.std::__basic_file<char>", i32, %struct.__mbstate_t, %struct.__mbstate_t, i8*, i32, i1, i1, i1, i1, i8, i8*, i8*, i1, %"struct.std::codecvt<char,char,__mbstate_t>"*, i8*, i32, i8*, i8* }
+	%"struct.std::basic_filebuf<wchar_t,std::char_traits<wchar_t> >" = type { %"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >", i32, %"struct.std::__basic_file<char>", i32, %struct.__mbstate_t, %struct.__mbstate_t, i32*, i32, i1, i1, i1, i1, i32, i32*, i32*, i1, %"struct.std::codecvt<char,char,__mbstate_t>"*, i8*, i32, i8*, i8* }
+	%"struct.std::basic_fstream<char,std::char_traits<char> >" = type { { %"struct.std::locale::facet", %"struct.__gnu_cxx::char_producer<char>" }, %"struct.std::basic_filebuf<char,std::char_traits<char> >", %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_fstream<wchar_t,std::char_traits<wchar_t> >" = type { { %"struct.std::locale::facet", %"struct.__gnu_cxx::char_producer<char>" }, %"struct.std::basic_filebuf<wchar_t,std::char_traits<wchar_t> >", %"struct.std::basic_ios<wchar_t,std::char_traits<wchar_t> >" }
+	%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i1, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"* }
+	%"struct.std::basic_ios<wchar_t,std::char_traits<wchar_t> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<wchar_t,std::char_traits<wchar_t> >"*, i32, i1, %"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >"*, %"struct.std::codecvt<char,char,__mbstate_t>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"* }
+	%"struct.std::basic_iostream<wchar_t,std::char_traits<wchar_t> >" = type { %"struct.std::locale::facet", %"struct.__gnu_cxx::char_producer<char>", %"struct.std::basic_ios<wchar_t,std::char_traits<wchar_t> >" }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_ostream<wchar_t,std::char_traits<wchar_t> >" = type { i32 (...)**, %"struct.std::basic_ios<wchar_t,std::char_traits<wchar_t> >" }
+	%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+	%"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >" = type { i32 (...)**, i32*, i32*, i32*, i32*, i32*, i32*, %"struct.std::locale" }
+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" }
+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" }
+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" = type { i32, i32, i32 }
+	%"struct.std::basic_string<wchar_t,std::char_traits<wchar_t>,std::allocator<wchar_t> >" = type { %"struct.__gnu_cxx::__normal_iterator<const wchar_t*,std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > >" }
+	%"struct.std::codecvt<char,char,__mbstate_t>" = type { %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>", %struct.__locale_struct* }
+	%"struct.std::collate<char>" = type { %"struct.std::locale::facet", %struct.__locale_struct* }
+	%"struct.std::collate_byname<char>" = type { %"struct.std::collate<char>" }
+	%"struct.std::ctype<char>" = type { %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>", %struct.__locale_struct*, i1, i32*, i32*, i16* }
+	%"struct.std::ctype_byname<char>" = type { %"struct.std::ctype<char>" }
+	%"struct.std::domain_error" = type { %"struct.std::logic_error" }
+	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
+	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+	%"struct.std::ios_base::_Words" = type { i8*, i32 }
+	%"struct.std::istreambuf_iterator<char,std::char_traits<char> >" = type { %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, i32 }
+	%"struct.std::istreambuf_iterator<wchar_t,std::char_traits<wchar_t> >" = type { %"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >"*, i32 }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+	%"struct.std::logic_error" = type { %"struct.__gnu_cxx::char_producer<char>", %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" }
+	%"struct.std::type_info" = type { i32 (...)**, i8* }
+@.str_11 = external global [42 x i8]		; <[42 x i8]*> [#uses=0]
+@.str_9 = external global [24 x i8]		; <[24 x i8]*> [#uses=0]
+@.str_1 = external global [17 x i8]		; <[17 x i8]*> [#uses=0]
+
+define void @main() {
+entry:
+	tail call fastcc void @_ZNSolsEi( )
+	ret void
+}
+
+define fastcc void @_ZNSolsEi() {
+entry:
+	%tmp.22 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp.22, label %else, label %then
+
+then:		; preds = %entry
+	ret void
+
+else:		; preds = %entry
+	tail call fastcc void @_ZNSolsEl( )
+	ret void
+}
+
+define void @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_() {
+entry:
+	ret void
+}
+
+define fastcc void @_ZNSt9basic_iosIcSt11char_traitsIcEE8setstateESt12_Ios_Iostate() {
+entry:
+	tail call fastcc void @_ZSt19__throw_ios_failurePKc( )
+	ret void
+}
+
+define fastcc void @_ZNSo3putEc() {
+entry:
+	ret void
+}
+
+define fastcc void @_ZNSolsEl() {
+entry:
+	%tmp.21.i = icmp eq %"struct.std::basic_ostream<char,std::char_traits<char> >"* null, null		; <i1> [#uses=1]
+	br i1 %tmp.21.i, label %endif.0.i, label %shortcirc_next.i
+
+shortcirc_next.i:		; preds = %entry
+	ret void
+
+endif.0.i:		; preds = %entry
+	call fastcc void @_ZNSt9basic_iosIcSt11char_traitsIcEE8setstateESt12_Ios_Iostate( )
+	ret void
+}
+
+define fastcc void @_ZSt19__throw_ios_failurePKc() {
+entry:
+	call fastcc void @_ZNSsC1EPKcRKSaIcE( )
+	ret void
+}
+
+define fastcc void @_ZNSt8ios_baseD2Ev() {
+entry:
+	unreachable
+}
+
+define void @_ZN9__gnu_cxx18stdio_sync_filebufIwSt11char_traitsIwEE5uflowEv() {
+entry:
+	unreachable
+}
+
+define void @_ZN9__gnu_cxx18stdio_sync_filebufIcSt11char_traitsIcEED1Ev() {
+entry:
+	unreachable
+}
+
+define void @_ZNSt15basic_streambufIcSt11char_traitsIcEE6setbufEPci() {
+entry:
+	ret void
+}
+
+define fastcc void @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale() {
+entry:
+	ret void
+}
+
+declare fastcc void @_ZNSaIcED1Ev()
+
+define fastcc void @_ZSt19__throw_logic_errorPKc() {
+entry:
+	call fastcc void @_ZNSt11logic_errorC1ERKSs( )
+	ret void
+}
+
+define fastcc void @_ZNSs4_Rep9_S_createEjRKSaIcE() {
+entry:
+	br i1 false, label %then.0, label %endif.0
+
+then.0:		; preds = %entry
+	call fastcc void @_ZSt20__throw_length_errorPKc( )
+	ret void
+
+endif.0:		; preds = %entry
+	ret void
+}
+
+define fastcc void @_ZSt20__throw_length_errorPKc() {
+entry:
+	call fastcc void @_ZNSt12length_errorC1ERKSs( )
+	unwind
+}
+
+define fastcc void @_ZNSs16_S_construct_auxIPKcEEPcT_S3_RKSaIcE12__false_type() {
+entry:
+	br i1 false, label %then.1.i, label %endif.1.i
+
+then.1.i:		; preds = %entry
+	call fastcc void @_ZSt19__throw_logic_errorPKc( )
+	ret void
+
+endif.1.i:		; preds = %entry
+	call fastcc void @_ZNSs4_Rep9_S_createEjRKSaIcE( )
+	unreachable
+}
+
+define fastcc void @_ZNSsC1ERKSs() {
+entry:
+	call fastcc void @_ZNSs4_Rep7_M_grabERKSaIcES2_( )
+	invoke fastcc void @_ZNSaIcEC1ERKS_( )
+			to label %invoke_cont.1 unwind label %invoke_catch.1
+
+invoke_catch.1:		; preds = %entry
+	call fastcc void @_ZNSaIcED1Ev( )
+	unwind
+
+invoke_cont.1:		; preds = %entry
+	call fastcc void @_ZNSaIcEC2ERKS_( )
+	ret void
+}
+
+define fastcc void @_ZNSs7reserveEj() {
+entry:
+	ret void
+}
+
+define fastcc void @_ZNSaIcEC1ERKS_() {
+entry:
+	ret void
+}
+
+define fastcc void @_ZNSs4_Rep7_M_grabERKSaIcES2_() {
+entry:
+	br i1 false, label %else.i, label %cond_true
+
+cond_true:		; preds = %entry
+	ret void
+
+else.i:		; preds = %entry
+	tail call fastcc void @_ZNSs4_Rep9_S_createEjRKSaIcE( )
+	ret void
+}
+
+define fastcc void @_ZNSsC1EPKcRKSaIcE() {
+entry:
+	tail call fastcc void @_ZNSs16_S_construct_auxIPKcEEPcT_S3_RKSaIcE12__false_type( )
+	unreachable
+}
+
+define fastcc void @_ZNSaIcEC2ERKS_() {
+entry:
+	ret void
+}
+
+define void @_ZNSt7num_putIwSt19ostreambuf_iteratorIwSt11char_traitsIwEEED1Ev() {
+entry:
+	unreachable
+}
+
+define void @_ZNSt14collate_bynameIcED1Ev() {
+entry:
+	unreachable
+}
+
+define void @_ZNKSt7num_getIcSt19istreambuf_iteratorIcSt11char_traitsIcEEE6do_getES3_S3_RSt8ios_baseRSt12_Ios_IostateRy() {
+entry:
+	ret void
+}
+
+define void @_ZNSt23__codecvt_abstract_baseIcc11__mbstate_tED1Ev() {
+entry:
+	unreachable
+}
+
+define void @_ZNSt12ctype_bynameIcED0Ev() {
+entry:
+	unreachable
+}
+
+define fastcc void @_ZNSt8messagesIwEC1Ej() {
+entry:
+	ret void
+}
+
+define fastcc void @_ZSt14__convert_to_vIlEvPKcRT_RSt12_Ios_IostateRKP15__locale_structi() {
+entry:
+	ret void
+}
+
+define fastcc void @_ZNSt8time_getIwSt19istreambuf_iteratorIwSt11char_traitsIwEEEC1Ej() {
+entry:
+	ret void
+}
+
+define fastcc void @_ZNSt8time_getIcSt19istreambuf_iteratorIcSt11char_traitsIcEEEC1Ej() {
+entry:
+	ret void
+}
+
+define fastcc void @_ZNKSt7num_getIwSt19istreambuf_iteratorIwSt11char_traitsIwEEE16_M_extract_floatES3_S3_RSt8ios_baseRSt12_Ios_IostateRSs() {
+entry:
+	unreachable
+}
+
+define fastcc void @_ZNSbIwSt11char_traitsIwESaIwEE4swapERS2_() {
+entry:
+	ret void
+}
+
+define void @_ZNSt14basic_iostreamIwSt11char_traitsIwEED0Ev() {
+entry:
+	unreachable
+}
+
+define void @_ZNSt15basic_streambufIcSt11char_traitsIcEE9showmanycEv() {
+entry:
+	ret void
+}
+
+define void @_ZNSt9exceptionD0Ev() {
+entry:
+	unreachable
+}
+
+define fastcc void @_ZNSt11logic_errorC1ERKSs() {
+entry:
+	call fastcc void @_ZNSsC1ERKSs( )
+	ret void
+}
+
+define fastcc void @_ZNSt11logic_errorD2Ev() {
+entry:
+	unreachable
+}
+
+define fastcc void @_ZNSt12length_errorC1ERKSs() {
+entry:
+	invoke fastcc void @_ZNSsC1ERKSs( )
+			to label %_ZNSt11logic_errorC2ERKSs.exit unwind label %invoke_catch.i
+
+invoke_catch.i:		; preds = %entry
+	unwind
+
+_ZNSt11logic_errorC2ERKSs.exit:		; preds = %entry
+	ret void
+}
+
+define void @_ZNK10__cxxabiv120__si_class_type_info20__do_find_public_srcEiPKvPKNS_17__class_type_infoES2_() {
+entry:
+	ret void
+}
+
+define fastcc void @_ZNSbIwSt11char_traitsIwESaIwEE16_S_construct_auxIPKwEEPwT_S7_RKS1_12__false_type() {
+entry:
+	ret void
+}
+
+define void @_ZTv0_n12_NSt13basic_fstreamIwSt11char_traitsIwEED1Ev() {
+entry:
+	ret void
+}
+
+define void @_ZNSt13basic_fstreamIcSt11char_traitsIcEED1Ev() {
+entry:
+	unreachable
+}
+
+define fastcc void @_ZNSt5ctypeIcEC1EPKtbj() {
+entry:
+	ret void
+}
diff --git a/final/test/Transforms/Inline/2007-04-15-InlineEH.ll b/final/test/Transforms/Inline/2007-04-15-InlineEH.ll
new file mode 100644
index 00000000000..635f93e8242
--- /dev/null
+++ b/final/test/Transforms/Inline/2007-04-15-InlineEH.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -inline -S | not grep {invoke void asm}
+; PR1335
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+	%struct.gnat__strings__string_access = type { i8*, %struct.string___XUB* }
+	%struct.string___XUB = type { i32, i32 }
+
+define void @bc__support__high_resolution_time__clock() {
+entry:
+	call void asm "rdtsc\0A\09movl %eax, $0\0A\09movl %edx, $1", "=*imr,=*imr,~{dirflag},~{fpsr},~{flags},~{dx},~{ax}"( i32* null, i32* null ) nounwind
+	unreachable
+}
+
+define fastcc void @bc__support__high_resolution_time__initialize_clock_rate() {
+entry:
+	invoke void @gnat__os_lib__getenv( %struct.gnat__strings__string_access* null )
+			to label %invcont unwind label %cleanup144
+
+invcont:		; preds = %entry
+	invoke void @ada__calendar__delays__delay_for( )
+			to label %invcont64 unwind label %cleanup144
+
+invcont64:		; preds = %invcont
+	invoke void @ada__calendar__clock( )
+			to label %invcont65 unwind label %cleanup144
+
+invcont65:		; preds = %invcont64
+	invoke void @bc__support__high_resolution_time__clock( )
+			to label %invcont67 unwind label %cleanup144
+
+invcont67:		; preds = %invcont65
+	ret void
+
+cleanup144:		; preds = %invcont65, %invcont64, %invcont, %entry
+	unwind
+}
+
+declare void @gnat__os_lib__getenv(%struct.gnat__strings__string_access*)
+
+declare void @ada__calendar__delays__delay_for()
+
+declare void @ada__calendar__clock()
+
+define void @bc__support__high_resolution_time___elabb() {
+entry:
+	call fastcc void @bc__support__high_resolution_time__initialize_clock_rate( )
+	ret void
+}
diff --git a/final/test/Transforms/Inline/2007-06-06-NoInline.ll b/final/test/Transforms/Inline/2007-06-06-NoInline.ll
new file mode 100644
index 00000000000..d5a7953ffb0
--- /dev/null
+++ b/final/test/Transforms/Inline/2007-06-06-NoInline.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -inline -S | grep "define internal i32 @bar"
+@llvm.noinline = appending global [1 x i8*] [ i8* bitcast (i32 (i32, i32)* @bar to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define internal i32 @bar(i32 %x, i32 %y) {
+entry:
+	%x_addr = alloca i32		; <i32*> [#uses=2]
+	%y_addr = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca i32, align 4		; <i32*> [#uses=2]
+	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %x, i32* %x_addr
+	store i32 %y, i32* %y_addr
+	%tmp1 = load i32* %x_addr		; <i32> [#uses=1]
+	%tmp2 = load i32* %y_addr		; <i32> [#uses=1]
+	%tmp3 = add i32 %tmp1, %tmp2		; <i32> [#uses=1]
+	store i32 %tmp3, i32* %tmp
+	%tmp4 = load i32* %tmp		; <i32> [#uses=1]
+	store i32 %tmp4, i32* %retval
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval5
+}
+
+define i32 @foo(i32 %a, i32 %b) {
+entry:
+	%a_addr = alloca i32		; <i32*> [#uses=2]
+	%b_addr = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca i32, align 4		; <i32*> [#uses=2]
+	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %a, i32* %a_addr
+	store i32 %b, i32* %b_addr
+	%tmp1 = load i32* %b_addr		; <i32> [#uses=1]
+	%tmp2 = load i32* %a_addr		; <i32> [#uses=1]
+	%tmp3 = call i32 @bar( i32 %tmp1, i32 %tmp2 )		; <i32> [#uses=1]
+	store i32 %tmp3, i32* %tmp
+	%tmp4 = load i32* %tmp		; <i32> [#uses=1]
+	store i32 %tmp4, i32* %retval
+	br label %return
+
+return:		; preds = %entry
+	%retval5 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval5
+}
diff --git a/final/test/Transforms/Inline/2007-06-25-WeakInline.ll b/final/test/Transforms/Inline/2007-06-25-WeakInline.ll
new file mode 100644
index 00000000000..929891a6785
--- /dev/null
+++ b/final/test/Transforms/Inline/2007-06-25-WeakInline.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -inline -S | grep call
+
+; 'bar' can be overridden at link-time, don't inline it.
+
+define void @foo() {
+entry:
+        tail call void @bar( )            ; <i32> [#uses=0]
+        ret void
+}
+
+define weak void @bar() {
+        ret void
+}
+
diff --git a/final/test/Transforms/Inline/2007-12-19-InlineNoUnwind.ll b/final/test/Transforms/Inline/2007-12-19-InlineNoUnwind.ll
new file mode 100644
index 00000000000..979157ebc8b
--- /dev/null
+++ b/final/test/Transforms/Inline/2007-12-19-InlineNoUnwind.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -inline -S | grep nounwind
+; RUN: opt < %s -inline -S | grep unreachable
+
+declare i1 @extern()
+
+define internal i32 @test() {
+entry:
+	%n = call i1 @extern( )
+	br i1 %n, label %r, label %u
+r:
+	ret i32 0
+u:
+	unwind
+}
+
+define i32 @caller() {
+	%X = call i32 @test( ) nounwind
+	ret i32 %X
+}
diff --git a/final/test/Transforms/Inline/2008-03-04-StructRet.ll b/final/test/Transforms/Inline/2008-03-04-StructRet.ll
new file mode 100644
index 00000000000..3311d565368
--- /dev/null
+++ b/final/test/Transforms/Inline/2008-03-04-StructRet.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -inline -disable-output
+	%struct.Benchmark = type { i32 (...)** }
+	%struct.Complex = type { double, double }
+	%struct.ComplexBenchmark = type { %struct.Benchmark }
+
+define %struct.Complex @_Zml7ComplexS_1(double %a.0, double %a.1, double %b.0, double %b.1) nounwind  {
+entry:
+	%mrv = alloca %struct.Complex		; <%struct.Complex*> [#uses=2]
+	%mrv.gep = getelementptr %struct.Complex* %mrv, i32 0, i32 0		; <double*> [#uses=1]
+	%mrv.ld = load double* %mrv.gep		; <double> [#uses=1]
+	%mrv.gep1 = getelementptr %struct.Complex* %mrv, i32 0, i32 1		; <double*> [#uses=1]
+	%mrv.ld2 = load double* %mrv.gep1		; <double> [#uses=1]
+	ret double %mrv.ld, double %mrv.ld2
+}
+
+define void @_ZNK16ComplexBenchmark9oop_styleEv(%struct.ComplexBenchmark* %this) nounwind  {
+entry:
+	%tmp = alloca %struct.Complex		; <%struct.Complex*> [#uses=0]
+	br label %bb31
+bb:		; preds = %bb31
+	call %struct.Complex @_Zml7ComplexS_1( double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 ) nounwind 		; <%struct.Complex>:0 [#uses=1]
+	%gr = getresult %struct.Complex %0, 1		; <double> [#uses=0]
+	br label %bb31
+bb31:		; preds = %bb, %entry
+	br i1 false, label %bb, label %return
+return:		; preds = %bb31
+	ret void
+}
diff --git a/final/test/Transforms/Inline/2008-03-07-Inline-2.ll b/final/test/Transforms/Inline/2008-03-07-Inline-2.ll
new file mode 100644
index 00000000000..0c968e6ce18
--- /dev/null
+++ b/final/test/Transforms/Inline/2008-03-07-Inline-2.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -inline -disable-output
+	%struct.Demand = type { double, double }
+	%struct.branch = type { %struct.Demand, double, double, double, double, %struct.branch*, [12 x %struct.leaf*] }
+	%struct.leaf = type { %struct.Demand, double, double }
+@P = external global double		; <double*> [#uses=1]
+
+define %struct.leaf* @build_leaf() nounwind  {
+entry:
+	unreachable
+}
+
+define %struct.Demand @Compute_Branch2(%struct.branch* %br, double %theta_R, double %theta_I, double %pi_R, double %pi_I) nounwind  {
+entry:
+	%mrv = alloca %struct.Demand		; <%struct.Demand*> [#uses=4]
+	%a2 = alloca %struct.Demand		; <%struct.Demand*> [#uses=0]
+	br i1 false, label %bb46, label %bb
+bb:		; preds = %entry
+	%mrv.gep = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
+	%mrv.ld = load double* %mrv.gep		; <double> [#uses=1]
+	%mrv.gep1 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
+	%mrv.ld2 = load double* %mrv.gep1		; <double> [#uses=1]
+	ret double %mrv.ld, double %mrv.ld2
+bb46:		; preds = %entry
+	br label %bb72
+bb49:		; preds = %bb72
+	call %struct.Demand @Compute_Leaf1( %struct.leaf* null, double 0.000000e+00, double 0.000000e+00 ) nounwind 		; <%struct.Demand>:0 [#uses=1]
+	%gr = getresult %struct.Demand %0, 1		; <double> [#uses=0]
+	br label %bb72
+bb72:		; preds = %bb49, %bb46
+	br i1 false, label %bb49, label %bb77
+bb77:		; preds = %bb72
+	%mrv.gep3 = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
+	%mrv.ld4 = load double* %mrv.gep3		; <double> [#uses=1]
+	%mrv.gep5 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
+	%mrv.ld6 = load double* %mrv.gep5		; <double> [#uses=1]
+	ret double %mrv.ld4, double %mrv.ld6
+}
+
+define %struct.Demand @Compute_Leaf1(%struct.leaf* %l, double %pi_R, double %pi_I) nounwind  {
+entry:
+	%mrv = alloca %struct.Demand		; <%struct.Demand*> [#uses=2]
+	%tmp10 = load double* @P, align 8		; <double> [#uses=1]
+	%tmp11 = fcmp olt double %tmp10, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %tmp11, label %bb, label %bb13
+bb:		; preds = %entry
+	br label %bb13
+bb13:		; preds = %bb, %entry
+	%mrv.gep = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
+	%mrv.ld = load double* %mrv.gep		; <double> [#uses=1]
+	%mrv.gep1 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
+	%mrv.ld2 = load double* %mrv.gep1		; <double> [#uses=1]
+	ret double %mrv.ld, double %mrv.ld2
+}
diff --git a/final/test/Transforms/Inline/2008-03-07-Inline.ll b/final/test/Transforms/Inline/2008-03-07-Inline.ll
new file mode 100644
index 00000000000..86afb2d43ec
--- /dev/null
+++ b/final/test/Transforms/Inline/2008-03-07-Inline.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -inline -disable-output
+	%struct.Demand = type { double, double }
+	%struct.branch = type { %struct.Demand, double, double, double, double, %struct.branch*, [12 x %struct.leaf*] }
+	%struct.leaf = type { %struct.Demand, double, double }
+@P = external global double		; <double*> [#uses=1]
+
+define %struct.leaf* @build_leaf() nounwind  {
+entry:
+	unreachable
+}
+
+define %struct.Demand @Compute_Branch2(%struct.branch* %br, double %theta_R, double %theta_I, double %pi_R, double %pi_I) nounwind  {
+entry:
+	%mrv = alloca %struct.Demand		; <%struct.Demand*> [#uses=4]
+	%a2 = alloca %struct.Demand		; <%struct.Demand*> [#uses=0]
+	br i1 false, label %bb46, label %bb
+bb:		; preds = %entry
+	%mrv.gep = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
+	%mrv.ld = load double* %mrv.gep		; <double> [#uses=1]
+	%mrv.gep1 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
+	%mrv.ld2 = load double* %mrv.gep1		; <double> [#uses=1]
+	ret double %mrv.ld, double %mrv.ld2
+bb46:		; preds = %entry
+	br label %bb72
+bb49:		; preds = %bb72
+	call %struct.Demand @Compute_Leaf1( %struct.leaf* null, double 0.000000e+00, double 0.000000e+00 ) nounwind 		; <%struct.Demand>:0 [#uses=1]
+	%gr = getresult %struct.Demand %0, 1		; <double> [#uses=0]
+	br label %bb72
+bb72:		; preds = %bb49, %bb46
+	br i1 false, label %bb49, label %bb77
+bb77:		; preds = %bb72
+	%mrv.gep3 = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
+	%mrv.ld4 = load double* %mrv.gep3		; <double> [#uses=1]
+	%mrv.gep5 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
+	%mrv.ld6 = load double* %mrv.gep5		; <double> [#uses=1]
+	ret double %mrv.ld4, double %mrv.ld6
+}
+
+define %struct.Demand @Compute_Leaf1(%struct.leaf* %l, double %pi_R, double %pi_I) nounwind  {
+entry:
+	%mrv = alloca %struct.Demand		; <%struct.Demand*> [#uses=4]
+	%tmp10 = load double* @P, align 8		; <double> [#uses=1]
+	%tmp11 = fcmp olt double %tmp10, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %tmp11, label %bb, label %bb13
+bb:		; preds = %entry
+	%mrv.gep = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
+	%mrv.ld = load double* %mrv.gep		; <double> [#uses=1]
+	%mrv.gep1 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
+	%mrv.ld2 = load double* %mrv.gep1		; <double> [#uses=1]
+	ret double %mrv.ld, double %mrv.ld2
+bb13:		; preds = %entry
+	%mrv.gep3 = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
+	%mrv.ld4 = load double* %mrv.gep3		; <double> [#uses=1]
+	%mrv.gep5 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
+	%mrv.ld6 = load double* %mrv.gep5		; <double> [#uses=1]
+	ret double %mrv.ld4, double %mrv.ld6
+}
diff --git a/final/test/Transforms/Inline/2008-09-02-AlwaysInline.ll b/final/test/Transforms/Inline/2008-09-02-AlwaysInline.ll
new file mode 100644
index 00000000000..39095c40728
--- /dev/null
+++ b/final/test/Transforms/Inline/2008-09-02-AlwaysInline.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s  -inline-threshold=0 -inline -S | not grep call 
+
+define i32 @fn2() alwaysinline {
+  ret i32 1
+}
+
+define i32 @fn3() {
+   %r = call i32 @fn2()
+   ret i32 %r
+}
diff --git a/final/test/Transforms/Inline/2008-09-02-NoInline.ll b/final/test/Transforms/Inline/2008-09-02-NoInline.ll
new file mode 100644
index 00000000000..33c8949afe0
--- /dev/null
+++ b/final/test/Transforms/Inline/2008-09-02-NoInline.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -inline -S | grep call | count 1
+
+define i32 @fn2() noinline {
+  ret i32 1
+}
+
+define i32 @fn3() {
+   %r = call i32 @fn2()
+   ret i32 %r
+}
diff --git a/final/test/Transforms/Inline/2008-10-30-AlwaysInline.ll b/final/test/Transforms/Inline/2008-10-30-AlwaysInline.ll
new file mode 100644
index 00000000000..11e501274d3
--- /dev/null
+++ b/final/test/Transforms/Inline/2008-10-30-AlwaysInline.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -always-inline -S | not grep call 
+
+; Ensure that threshold doesn't disrupt always inline.
+; RUN: opt < %s -inline-threshold=-2000000001 -always-inline -S | not grep call 
+
+
+define internal i32 @if0() alwaysinline {
+       ret i32 1 
+}
+
+define i32 @f0() {
+       %r = call i32 @if0()
+       ret i32 %r
+}
diff --git a/final/test/Transforms/Inline/2008-11-04-AlwaysInline.ll b/final/test/Transforms/Inline/2008-11-04-AlwaysInline.ll
new file mode 100644
index 00000000000..bc9787b8234
--- /dev/null
+++ b/final/test/Transforms/Inline/2008-11-04-AlwaysInline.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -always-inline -S | grep {@foo}
+; Ensure that foo is not removed by always inliner
+; PR 2945
+
+define internal i32 @foo() nounwind {
+  ret i32 0
+}
diff --git a/final/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll b/final/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll
new file mode 100644
index 00000000000..db2a799225b
--- /dev/null
+++ b/final/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -inline -S | grep call
+; Do not inline calls to variable-sized alloca.
+
+@q = common global i8* null		; <i8**> [#uses=1]
+
+define i8* @a(i32 %i) nounwind {
+entry:
+	%i_addr = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca i8*		; <i8**> [#uses=1]
+	%p = alloca i8*		; <i8**> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %i, i32* %i_addr
+	%0 = load i32* %i_addr, align 4		; <i32> [#uses=1]
+	%1 = alloca i8, i32 %0		; <i8*> [#uses=1]
+	store i8* %1, i8** %p, align 4
+	%2 = load i8** %p, align 4		; <i8*> [#uses=1]
+	store i8* %2, i8** @q, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i8** %retval		; <i8*> [#uses=1]
+	ret i8* %retval1
+}
+
+define void @b(i32 %i) nounwind {
+entry:
+	%i_addr = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %i, i32* %i_addr
+	%0 = load i32* %i_addr, align 4		; <i32> [#uses=1]
+	%1 = call i8* @a(i32 %0) nounwind		; <i8*> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/Inline/2009-01-13-RecursiveInlineCrash.ll b/final/test/Transforms/Inline/2009-01-13-RecursiveInlineCrash.ll
new file mode 100644
index 00000000000..7d8d16bacff
--- /dev/null
+++ b/final/test/Transforms/Inline/2009-01-13-RecursiveInlineCrash.ll
@@ -0,0 +1,293 @@
+; RUN: opt < %s -inline -argpromotion -disable-output
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
+@NumNodes = external global i32		; <i32*> [#uses=0]
+@"\01LC" = external constant [43 x i8]		; <[43 x i8]*> [#uses=0]
+@"\01LC1" = external constant [19 x i8]		; <[19 x i8]*> [#uses=0]
+@"\01LC2" = external constant [17 x i8]		; <[17 x i8]*> [#uses=0]
+
+declare i32 @dealwithargs(i32, i8** nocapture) nounwind
+
+declare i32 @atoi(i8*)
+
+define internal fastcc i32 @adj(i32 %d, i32 %ct) nounwind readnone {
+entry:
+	switch i32 %d, label %return [
+		i32 0, label %bb
+		i32 1, label %bb10
+		i32 2, label %bb5
+		i32 3, label %bb15
+	]
+
+bb:		; preds = %entry
+	switch i32 %ct, label %bb3 [
+		i32 1, label %return
+		i32 0, label %return
+	]
+
+bb3:		; preds = %bb
+	ret i32 0
+
+bb5:		; preds = %entry
+	switch i32 %ct, label %bb8 [
+		i32 3, label %return
+		i32 2, label %return
+	]
+
+bb8:		; preds = %bb5
+	ret i32 0
+
+bb10:		; preds = %entry
+	switch i32 %ct, label %bb13 [
+		i32 1, label %return
+		i32 3, label %return
+	]
+
+bb13:		; preds = %bb10
+	ret i32 0
+
+bb15:		; preds = %entry
+	switch i32 %ct, label %bb18 [
+		i32 2, label %return
+		i32 0, label %return
+	]
+
+bb18:		; preds = %bb15
+	ret i32 0
+
+return:		; preds = %bb15, %bb15, %bb10, %bb10, %bb5, %bb5, %bb, %bb, %entry
+	ret i32 1
+}
+
+declare fastcc i32 @reflect(i32, i32) nounwind readnone
+
+declare i32 @CountTree(%struct.quad_struct* nocapture) nounwind readonly
+
+define internal fastcc %struct.quad_struct* @child(%struct.quad_struct* nocapture %tree, i32 %ct) nounwind readonly {
+entry:
+	switch i32 %ct, label %bb5 [
+		i32 0, label %bb1
+		i32 1, label %bb
+		i32 2, label %bb3
+		i32 3, label %bb2
+	]
+
+bb:		; preds = %entry
+	%0 = getelementptr %struct.quad_struct* %tree, i32 0, i32 3		; <%struct.quad_struct**> [#uses=1]
+	%1 = load %struct.quad_struct** %0, align 4		; <%struct.quad_struct*> [#uses=1]
+	ret %struct.quad_struct* %1
+
+bb1:		; preds = %entry
+	%2 = getelementptr %struct.quad_struct* %tree, i32 0, i32 2		; <%struct.quad_struct**> [#uses=1]
+	%3 = load %struct.quad_struct** %2, align 4		; <%struct.quad_struct*> [#uses=1]
+	ret %struct.quad_struct* %3
+
+bb2:		; preds = %entry
+	%4 = getelementptr %struct.quad_struct* %tree, i32 0, i32 5		; <%struct.quad_struct**> [#uses=1]
+	%5 = load %struct.quad_struct** %4, align 4		; <%struct.quad_struct*> [#uses=1]
+	ret %struct.quad_struct* %5
+
+bb3:		; preds = %entry
+	%6 = getelementptr %struct.quad_struct* %tree, i32 0, i32 4		; <%struct.quad_struct**> [#uses=1]
+	%7 = load %struct.quad_struct** %6, align 4		; <%struct.quad_struct*> [#uses=1]
+	ret %struct.quad_struct* %7
+
+bb5:		; preds = %entry
+	ret %struct.quad_struct* null
+}
+
+define internal fastcc %struct.quad_struct* @gtequal_adj_neighbor(%struct.quad_struct* nocapture %tree, i32 %d) nounwind readonly {
+entry:
+	%0 = getelementptr %struct.quad_struct* %tree, i32 0, i32 6		; <%struct.quad_struct**> [#uses=1]
+	%1 = load %struct.quad_struct** %0, align 4		; <%struct.quad_struct*> [#uses=4]
+	%2 = getelementptr %struct.quad_struct* %tree, i32 0, i32 1		; <i32*> [#uses=1]
+	%3 = load i32* %2, align 4		; <i32> [#uses=2]
+	%4 = icmp eq %struct.quad_struct* %1, null		; <i1> [#uses=1]
+	br i1 %4, label %bb3, label %bb
+
+bb:		; preds = %entry
+	%5 = call fastcc i32 @adj(i32 %d, i32 %3) nounwind		; <i32> [#uses=1]
+	%6 = icmp eq i32 %5, 0		; <i1> [#uses=1]
+	br i1 %6, label %bb3, label %bb1
+
+bb1:		; preds = %bb
+	%7 = call fastcc %struct.quad_struct* @gtequal_adj_neighbor(%struct.quad_struct* %1, i32 %d) nounwind		; <%struct.quad_struct*> [#uses=1]
+	br label %bb3
+
+bb3:		; preds = %bb1, %bb, %entry
+	%q.0 = phi %struct.quad_struct* [ %7, %bb1 ], [ %1, %bb ], [ %1, %entry ]		; <%struct.quad_struct*> [#uses=4]
+	%8 = icmp eq %struct.quad_struct* %q.0, null		; <i1> [#uses=1]
+	br i1 %8, label %bb7, label %bb4
+
+bb4:		; preds = %bb3
+	%9 = getelementptr %struct.quad_struct* %q.0, i32 0, i32 0		; <i32*> [#uses=1]
+	%10 = load i32* %9, align 4		; <i32> [#uses=1]
+	%11 = icmp eq i32 %10, 2		; <i1> [#uses=1]
+	br i1 %11, label %bb5, label %bb7
+
+bb5:		; preds = %bb4
+	%12 = call fastcc i32 @reflect(i32 %d, i32 %3) nounwind		; <i32> [#uses=1]
+	%13 = call fastcc %struct.quad_struct* @child(%struct.quad_struct* %q.0, i32 %12) nounwind		; <%struct.quad_struct*> [#uses=1]
+	ret %struct.quad_struct* %13
+
+bb7:		; preds = %bb4, %bb3
+	ret %struct.quad_struct* %q.0
+}
+
+declare fastcc i32 @sum_adjacent(%struct.quad_struct* nocapture, i32, i32, i32) nounwind readonly
+
+define i32 @perimeter(%struct.quad_struct* nocapture %tree, i32 %size) nounwind readonly {
+entry:
+	%0 = getelementptr %struct.quad_struct* %tree, i32 0, i32 0		; <i32*> [#uses=1]
+	%1 = load i32* %0, align 4		; <i32> [#uses=1]
+	%2 = icmp eq i32 %1, 2		; <i1> [#uses=1]
+	br i1 %2, label %bb, label %bb2
+
+bb:		; preds = %entry
+	%3 = getelementptr %struct.quad_struct* %tree, i32 0, i32 4		; <%struct.quad_struct**> [#uses=1]
+	%4 = load %struct.quad_struct** %3, align 4		; <%struct.quad_struct*> [#uses=1]
+	%5 = sdiv i32 %size, 2		; <i32> [#uses=1]
+	%6 = call i32 @perimeter(%struct.quad_struct* %4, i32 %5) nounwind		; <i32> [#uses=1]
+	%7 = getelementptr %struct.quad_struct* %tree, i32 0, i32 5		; <%struct.quad_struct**> [#uses=1]
+	%8 = load %struct.quad_struct** %7, align 4		; <%struct.quad_struct*> [#uses=1]
+	%9 = sdiv i32 %size, 2		; <i32> [#uses=1]
+	%10 = call i32 @perimeter(%struct.quad_struct* %8, i32 %9) nounwind		; <i32> [#uses=1]
+	%11 = add i32 %10, %6		; <i32> [#uses=1]
+	%12 = getelementptr %struct.quad_struct* %tree, i32 0, i32 3		; <%struct.quad_struct**> [#uses=1]
+	%13 = load %struct.quad_struct** %12, align 4		; <%struct.quad_struct*> [#uses=1]
+	%14 = sdiv i32 %size, 2		; <i32> [#uses=1]
+	%15 = call i32 @perimeter(%struct.quad_struct* %13, i32 %14) nounwind		; <i32> [#uses=1]
+	%16 = add i32 %15, %11		; <i32> [#uses=1]
+	%17 = getelementptr %struct.quad_struct* %tree, i32 0, i32 2		; <%struct.quad_struct**> [#uses=1]
+	%18 = load %struct.quad_struct** %17, align 4		; <%struct.quad_struct*> [#uses=1]
+	%19 = sdiv i32 %size, 2		; <i32> [#uses=1]
+	%20 = call i32 @perimeter(%struct.quad_struct* %18, i32 %19) nounwind		; <i32> [#uses=1]
+	%21 = add i32 %20, %16		; <i32> [#uses=1]
+	ret i32 %21
+
+bb2:		; preds = %entry
+	%22 = getelementptr %struct.quad_struct* %tree, i32 0, i32 0		; <i32*> [#uses=1]
+	%23 = load i32* %22, align 4		; <i32> [#uses=1]
+	%24 = icmp eq i32 %23, 0		; <i1> [#uses=1]
+	br i1 %24, label %bb3, label %bb23
+
+bb3:		; preds = %bb2
+	%25 = call fastcc %struct.quad_struct* @gtequal_adj_neighbor(%struct.quad_struct* %tree, i32 0) nounwind		; <%struct.quad_struct*> [#uses=4]
+	%26 = icmp eq %struct.quad_struct* %25, null		; <i1> [#uses=1]
+	br i1 %26, label %bb8, label %bb4
+
+bb4:		; preds = %bb3
+	%27 = getelementptr %struct.quad_struct* %25, i32 0, i32 0		; <i32*> [#uses=1]
+	%28 = load i32* %27, align 4		; <i32> [#uses=1]
+	%29 = icmp eq i32 %28, 1		; <i1> [#uses=1]
+	br i1 %29, label %bb8, label %bb6
+
+bb6:		; preds = %bb4
+	%30 = getelementptr %struct.quad_struct* %25, i32 0, i32 0		; <i32*> [#uses=1]
+	%31 = load i32* %30, align 4		; <i32> [#uses=1]
+	%32 = icmp eq i32 %31, 2		; <i1> [#uses=1]
+	br i1 %32, label %bb7, label %bb8
+
+bb7:		; preds = %bb6
+	%33 = call fastcc i32 @sum_adjacent(%struct.quad_struct* %25, i32 3, i32 2, i32 %size) nounwind		; <i32> [#uses=1]
+	br label %bb8
+
+bb8:		; preds = %bb7, %bb6, %bb4, %bb3
+	%retval1.1 = phi i32 [ 0, %bb6 ], [ %33, %bb7 ], [ %size, %bb4 ], [ %size, %bb3 ]		; <i32> [#uses=3]
+	%34 = call fastcc %struct.quad_struct* @gtequal_adj_neighbor(%struct.quad_struct* %tree, i32 1) nounwind		; <%struct.quad_struct*> [#uses=4]
+	%35 = icmp eq %struct.quad_struct* %34, null		; <i1> [#uses=1]
+	br i1 %35, label %bb10, label %bb9
+
+bb9:		; preds = %bb8
+	%36 = getelementptr %struct.quad_struct* %34, i32 0, i32 0		; <i32*> [#uses=1]
+	%37 = load i32* %36, align 4		; <i32> [#uses=1]
+	%38 = icmp eq i32 %37, 1		; <i1> [#uses=1]
+	br i1 %38, label %bb10, label %bb11
+
+bb10:		; preds = %bb9, %bb8
+	%39 = add i32 %retval1.1, %size		; <i32> [#uses=1]
+	br label %bb13
+
+bb11:		; preds = %bb9
+	%40 = getelementptr %struct.quad_struct* %34, i32 0, i32 0		; <i32*> [#uses=1]
+	%41 = load i32* %40, align 4		; <i32> [#uses=1]
+	%42 = icmp eq i32 %41, 2		; <i1> [#uses=1]
+	br i1 %42, label %bb12, label %bb13
+
+bb12:		; preds = %bb11
+	%43 = call fastcc i32 @sum_adjacent(%struct.quad_struct* %34, i32 2, i32 0, i32 %size) nounwind		; <i32> [#uses=1]
+	%44 = add i32 %43, %retval1.1		; <i32> [#uses=1]
+	br label %bb13
+
+bb13:		; preds = %bb12, %bb11, %bb10
+	%retval1.2 = phi i32 [ %retval1.1, %bb11 ], [ %44, %bb12 ], [ %39, %bb10 ]		; <i32> [#uses=3]
+	%45 = call fastcc %struct.quad_struct* @gtequal_adj_neighbor(%struct.quad_struct* %tree, i32 2) nounwind		; <%struct.quad_struct*> [#uses=4]
+	%46 = icmp eq %struct.quad_struct* %45, null		; <i1> [#uses=1]
+	br i1 %46, label %bb15, label %bb14
+
+bb14:		; preds = %bb13
+	%47 = getelementptr %struct.quad_struct* %45, i32 0, i32 0		; <i32*> [#uses=1]
+	%48 = load i32* %47, align 4		; <i32> [#uses=1]
+	%49 = icmp eq i32 %48, 1		; <i1> [#uses=1]
+	br i1 %49, label %bb15, label %bb16
+
+bb15:		; preds = %bb14, %bb13
+	%50 = add i32 %retval1.2, %size		; <i32> [#uses=1]
+	br label %bb18
+
+bb16:		; preds = %bb14
+	%51 = getelementptr %struct.quad_struct* %45, i32 0, i32 0		; <i32*> [#uses=1]
+	%52 = load i32* %51, align 4		; <i32> [#uses=1]
+	%53 = icmp eq i32 %52, 2		; <i1> [#uses=1]
+	br i1 %53, label %bb17, label %bb18
+
+bb17:		; preds = %bb16
+	%54 = call fastcc i32 @sum_adjacent(%struct.quad_struct* %45, i32 0, i32 1, i32 %size) nounwind		; <i32> [#uses=1]
+	%55 = add i32 %54, %retval1.2		; <i32> [#uses=1]
+	br label %bb18
+
+bb18:		; preds = %bb17, %bb16, %bb15
+	%retval1.3 = phi i32 [ %retval1.2, %bb16 ], [ %55, %bb17 ], [ %50, %bb15 ]		; <i32> [#uses=3]
+	%56 = call fastcc %struct.quad_struct* @gtequal_adj_neighbor(%struct.quad_struct* %tree, i32 3) nounwind		; <%struct.quad_struct*> [#uses=4]
+	%57 = icmp eq %struct.quad_struct* %56, null		; <i1> [#uses=1]
+	br i1 %57, label %bb20, label %bb19
+
+bb19:		; preds = %bb18
+	%58 = getelementptr %struct.quad_struct* %56, i32 0, i32 0		; <i32*> [#uses=1]
+	%59 = load i32* %58, align 4		; <i32> [#uses=1]
+	%60 = icmp eq i32 %59, 1		; <i1> [#uses=1]
+	br i1 %60, label %bb20, label %bb21
+
+bb20:		; preds = %bb19, %bb18
+	%61 = add i32 %retval1.3, %size		; <i32> [#uses=1]
+	ret i32 %61
+
+bb21:		; preds = %bb19
+	%62 = getelementptr %struct.quad_struct* %56, i32 0, i32 0		; <i32*> [#uses=1]
+	%63 = load i32* %62, align 4		; <i32> [#uses=1]
+	%64 = icmp eq i32 %63, 2		; <i1> [#uses=1]
+	br i1 %64, label %bb22, label %bb23
+
+bb22:		; preds = %bb21
+	%65 = call fastcc i32 @sum_adjacent(%struct.quad_struct* %56, i32 1, i32 3, i32 %size) nounwind		; <i32> [#uses=1]
+	%66 = add i32 %65, %retval1.3		; <i32> [#uses=1]
+	ret i32 %66
+
+bb23:		; preds = %bb21, %bb2
+	%retval1.0 = phi i32 [ 0, %bb2 ], [ %retval1.3, %bb21 ]		; <i32> [#uses=1]
+	ret i32 %retval1.0
+}
+
+declare i32 @main(i32, i8** nocapture) noreturn nounwind
+
+declare i32 @printf(i8*, ...) nounwind
+
+declare void @exit(i32) noreturn nounwind
+
+declare fastcc i32 @CheckOutside(i32, i32) nounwind readnone
+
+declare fastcc i32 @CheckIntersect(i32, i32, i32) nounwind readnone
+
+declare %struct.quad_struct* @MakeTree(i32, i32, i32, i32, i32, %struct.quad_struct*, i32, i32) nounwind
diff --git a/final/test/Transforms/Inline/2009-05-07-CallUsingSelfCrash.ll b/final/test/Transforms/Inline/2009-05-07-CallUsingSelfCrash.ll
new file mode 100644
index 00000000000..c8629ea22eb
--- /dev/null
+++ b/final/test/Transforms/Inline/2009-05-07-CallUsingSelfCrash.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -inline -disable-output
+; PR4123
+	%struct.S0 = type <{ i32 }>
+	%struct.S1 = type <{ i8, i8, i8, i8, %struct.S0 }>
+	%struct.S2 = type <{ %struct.S1, i32 }>
+
+define void @func_113(%struct.S1* noalias nocapture sret %agg.result, i8 signext %p_114) noreturn nounwind {
+entry:
+	unreachable
+
+for.inc:		; preds = %for.inc
+	%call48 = call fastcc signext i8 @safe_sub_func_uint8_t_u_u(i8 signext %call48)		; <i8> [#uses=1]
+	br label %for.inc
+}
+
+define fastcc signext i8 @safe_sub_func_uint8_t_u_u(i8 signext %_ui1) nounwind readnone {
+entry:
+	ret i8 %_ui1
+}
+
diff --git a/final/test/Transforms/Inline/2010-05-12-ValueMap.ll b/final/test/Transforms/Inline/2010-05-12-ValueMap.ll
new file mode 100644
index 00000000000..f9cc13f499b
--- /dev/null
+++ b/final/test/Transforms/Inline/2010-05-12-ValueMap.ll
@@ -0,0 +1,28 @@
+; RUN: opt %s -inline -mergefunc -disable-output
+
+; This tests for a bug where the inliner kept the functions in a ValueMap after
+; it had completed and a ModulePass started to run. LLVM would crash deleting
+; a function that was still a key in the ValueMap.
+
+define internal fastcc void @list_Cdr1918() nounwind inlinehint {
+  unreachable
+}
+
+define internal fastcc void @list_PairSecond1927() nounwind inlinehint {
+  call fastcc void @list_Cdr1918() nounwind inlinehint
+  unreachable
+}
+
+define internal fastcc void @list_Cdr3164() nounwind inlinehint {
+  unreachable
+}
+
+define internal fastcc void @list_Nconc3167() nounwind inlinehint {
+  call fastcc void @list_Cdr3164() nounwind inlinehint
+  unreachable
+}
+
+define void @term_Equal() nounwind {
+  call fastcc void @list_Cdr3164() nounwind inlinehint
+  unreachable
+}
diff --git a/final/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll b/final/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll
new file mode 100644
index 00000000000..1ce74e6e41b
--- /dev/null
+++ b/final/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -tailcallelim -inline -instcombine -dse -S | FileCheck %s
+; PR7272
+
+; When inlining through a byval call site, the inliner creates allocas which may
+; be used by inlined calls, so any inlined calls need to have their 'tail' flags
+; cleared.  If not then you can get nastiness like with this testcase, where the
+; (inlined) call to 'ext' in 'foo' was being passed an uninitialized value.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+declare void @ext(i32*)
+
+define void @bar(i32* byval %x) {
+  call void @ext(i32* %x)
+  ret void
+}
+
+define void @foo(i32* %x) {
+; CHECK: define void @foo
+; CHECK: store i32 %1, i32* %x
+  call void @bar(i32* byval %x)
+  ret void
+}
diff --git a/final/test/Transforms/Inline/PR4909.ll b/final/test/Transforms/Inline/PR4909.ll
new file mode 100644
index 00000000000..24545f9aa88
--- /dev/null
+++ b/final/test/Transforms/Inline/PR4909.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -partial-inliner -disable-output
+
+define i32 @f() {
+entry:
+  br label %return
+
+return:                                           ; preds = %entry
+  ret i32 undef
+}
+
+define i32 @g() {
+entry:
+  %0 = call i32 @f()
+  ret i32 %0
+}
diff --git a/final/test/Transforms/Inline/alloca-in-scc.ll b/final/test/Transforms/Inline/alloca-in-scc.ll
new file mode 100644
index 00000000000..d539255e6e0
--- /dev/null
+++ b/final/test/Transforms/Inline/alloca-in-scc.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -inline | llvm-dis
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+define i32 @main(i32 %argc, i8** %argv) nounwind ssp {
+entry:
+  call fastcc void @c() nounwind
+  unreachable
+}
+
+define internal fastcc void @a() nounwind ssp {
+entry:
+  %al = alloca [3 x i32], align 4
+  %0 = getelementptr inbounds [3 x i32]* %al, i32 0, i32 2 
+  
+  call fastcc void @c() nounwind
+  unreachable
+}
+
+define internal fastcc void @b() nounwind ssp {
+entry:
+  tail call fastcc void @a() nounwind ssp
+  unreachable
+}
+
+define internal fastcc void @c() nounwind ssp {
+entry:
+  call fastcc void @b() nounwind
+  unreachable
+}
diff --git a/final/test/Transforms/Inline/alloca_test.ll b/final/test/Transforms/Inline/alloca_test.ll
new file mode 100644
index 00000000000..e5791d5d255
--- /dev/null
+++ b/final/test/Transforms/Inline/alloca_test.ll
@@ -0,0 +1,23 @@
+; This test ensures that alloca instructions in the entry block for an inlined
+; function are moved to the top of the function they are inlined into.
+;
+; RUN: opt -S -inline %s | FileCheck %s
+
+define i32 @func(i32 %i) {
+        %X = alloca i32         ; <i32*> [#uses=1]
+        store i32 %i, i32* %X
+        ret i32 %i
+}
+
+declare void @bar()
+
+define i32 @main(i32 %argc) {
+Entry:
+; CHECK: Entry
+; CHECK-NEXT: alloca
+        call void @bar( )
+        %X = call i32 @func( i32 7 )            ; <i32> [#uses=1]
+        %Y = add i32 %X, %argc          ; <i32> [#uses=1]
+        ret i32 %Y
+}
+
diff --git a/final/test/Transforms/Inline/always_inline_dyn_alloca.ll b/final/test/Transforms/Inline/always_inline_dyn_alloca.ll
new file mode 100644
index 00000000000..25cfc49f1a8
--- /dev/null
+++ b/final/test/Transforms/Inline/always_inline_dyn_alloca.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -inline -S | not grep callee
+; rdar://6655932
+
+; If callee is marked alwaysinline, inline it! Even if callee has dynamic
+; alloca and caller does not,
+
+define internal void @callee(i32 %N) alwaysinline {
+        %P = alloca i32, i32 %N
+        ret void
+}
+
+define void @foo(i32 %N) {
+        call void @callee( i32 %N )
+        ret void
+}
diff --git a/final/test/Transforms/Inline/array_merge.ll b/final/test/Transforms/Inline/array_merge.ll
new file mode 100644
index 00000000000..0d176b8acb3
--- /dev/null
+++ b/final/test/Transforms/Inline/array_merge.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+; rdar://7173846
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+define internal void @foo() nounwind ssp {
+entry:
+  %A = alloca [100 x i32]
+  %B = alloca [100 x i32]
+  call void @bar([100 x i32]* %A, [100 x i32]* %B) nounwind
+  ret void
+}
+
+declare void @bar([100 x i32]*, [100 x i32]*)
+
+define void @test() nounwind ssp {
+entry:
+; CHECK: @test()
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %A.i = alloca
+; CHECK-NEXT: %B.i = alloca
+; CHECK-NEXT: call void
+  call void @foo() nounwind
+  call void @foo() nounwind
+  ret void
+}
diff --git a/final/test/Transforms/Inline/basictest.ll b/final/test/Transforms/Inline/basictest.ll
new file mode 100644
index 00000000000..609a3d4e153
--- /dev/null
+++ b/final/test/Transforms/Inline/basictest.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -inline -scalarrepl -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define i32 @test1f(i32 %i) {
+        ret i32 %i
+}
+
+define i32 @test1(i32 %W) {
+        %X = call i32 @test1f(i32 7)
+        %Y = add i32 %X, %W
+        ret i32 %Y
+; CHECK: @test1(
+; CHECK-NEXT: %Y = add i32 7, %W
+; CHECK-NEXT: ret i32 %Y
+}
+
+
+
+; rdar://7339069
+
+%T = type { i32, i32 }
+
+; CHECK-NOT: @test2f
+define internal %T* @test2f(i1 %cond, %T* %P) {
+  br i1 %cond, label %T, label %F
+  
+T:
+  %A = getelementptr %T* %P, i32 0, i32 0
+  store i32 42, i32* %A
+  ret %T* %P
+  
+F:
+  ret %T* %P
+}
+
+define i32 @test2(i1 %cond) {
+  %A = alloca %T
+  
+  %B = call %T* @test2f(i1 %cond, %T* %A)
+  %C = getelementptr %T* %B, i32 0, i32 0
+  %D = load i32* %C
+  ret i32 %D
+  
+; CHECK: @test2(
+; CHECK-NOT: = alloca
+; CHECK: ret i32
+}
diff --git a/final/test/Transforms/Inline/byval.ll b/final/test/Transforms/Inline/byval.ll
new file mode 100644
index 00000000000..e601faf2bb3
--- /dev/null
+++ b/final/test/Transforms/Inline/byval.ll
@@ -0,0 +1,106 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+
+; Inlining a byval struct should cause an explicit copy into an alloca.
+
+	%struct.ss = type { i32, i64 }
+@.str = internal constant [10 x i8] c"%d, %lld\0A\00"		; <[10 x i8]*> [#uses=1]
+
+define internal void @f(%struct.ss* byval  %b) nounwind  {
+entry:
+	%tmp = getelementptr %struct.ss* %b, i32 0, i32 0		; <i32*> [#uses=2]
+	%tmp1 = load i32* %tmp, align 4		; <i32> [#uses=1]
+	%tmp2 = add i32 %tmp1, 1		; <i32> [#uses=1]
+	store i32 %tmp2, i32* %tmp, align 4
+	ret void
+}
+
+declare i32 @printf(i8*, ...) nounwind 
+
+define i32 @test1() nounwind  {
+entry:
+	%S = alloca %struct.ss		; <%struct.ss*> [#uses=4]
+	%tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp1, align 8
+	%tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1		; <i64*> [#uses=1]
+	store i64 2, i64* %tmp4, align 4
+	call void @f( %struct.ss* byval  %S ) nounwind 
+	ret i32 0
+; CHECK: @test1()
+; CHECK: %S1 = alloca %struct.ss
+; CHECK: %S = alloca %struct.ss
+; CHECK: call void @llvm.memcpy
+; CHECK: ret i32 0
+}
+
+; Inlining a byval struct should NOT cause an explicit copy 
+; into an alloca if the function is readonly
+
+define internal i32 @f2(%struct.ss* byval  %b) nounwind readonly {
+entry:
+	%tmp = getelementptr %struct.ss* %b, i32 0, i32 0		; <i32*> [#uses=2]
+	%tmp1 = load i32* %tmp, align 4		; <i32> [#uses=1]
+	%tmp2 = add i32 %tmp1, 1		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @test2() nounwind  {
+entry:
+	%S = alloca %struct.ss		; <%struct.ss*> [#uses=4]
+	%tmp1 = getelementptr %struct.ss* %S, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp1, align 8
+	%tmp4 = getelementptr %struct.ss* %S, i32 0, i32 1		; <i64*> [#uses=1]
+	store i64 2, i64* %tmp4, align 4
+	%X = call i32 @f2( %struct.ss* byval  %S ) nounwind 
+	ret i32 %X
+; CHECK: @test2()
+; CHECK: %S = alloca %struct.ss
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: ret i32
+}
+
+
+; Inlining a byval with an explicit alignment needs to use *at least* that
+; alignment on the generated alloca.
+; PR8769
+declare void @g3(%struct.ss* %p)
+
+define internal void @f3(%struct.ss* byval align 64 %b) nounwind {
+   call void @g3(%struct.ss* %b)  ;; Could make alignment assumptions!
+   ret void
+}
+
+define void @test3() nounwind  {
+entry:
+	%S = alloca %struct.ss, align 1  ;; May not be aligned.
+	call void @f3( %struct.ss* byval align 64 %S) nounwind 
+	ret void
+; CHECK: @test3()
+; CHECK: %S1 = alloca %struct.ss, align 64
+; CHECK: %S = alloca %struct.ss
+; CHECK: call void @llvm.memcpy
+; CHECK: call void @g3(%struct.ss* %S1)
+; CHECK: ret void
+}
+
+
+; Inlining a byval struct should NOT cause an explicit copy 
+; into an alloca if the function is readonly, but should increase an alloca's
+; alignment to satisfy an explicit alignment request.
+
+define internal i32 @f4(%struct.ss* byval align 64 %b) nounwind readonly {
+        call void @g3(%struct.ss* %b)
+	ret i32 4
+}
+
+define i32 @test4() nounwind  {
+entry:
+	%S = alloca %struct.ss, align 2		; <%struct.ss*> [#uses=4]
+	%X = call i32 @f4( %struct.ss* byval align 64 %S ) nounwind 
+	ret i32 %X
+; CHECK: @test4()
+; CHECK: %S = alloca %struct.ss, align 64
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: call void @g3
+; CHECK: ret i32 4
+}
+
diff --git a/final/test/Transforms/Inline/callgraph-update.ll b/final/test/Transforms/Inline/callgraph-update.ll
new file mode 100644
index 00000000000..ff0120b7330
--- /dev/null
+++ b/final/test/Transforms/Inline/callgraph-update.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -inline -loop-rotate -verify-dom-info -verify-loop-info -disable-output
+; PR3601
+declare void @solve()
+
+define internal fastcc void @read() {
+	br label %bb4
+
+bb3:
+	br label %bb4
+
+bb4:
+	call void @solve()
+	br i1 false, label %bb5, label %bb3
+
+bb5:
+	unreachable
+}
+
+define internal fastcc void @parse() {
+	call fastcc void @read()
+	ret void
+}
+
+define void @main() {
+	invoke fastcc void @parse()
+			to label %invcont unwind label %lpad
+
+invcont:
+	unreachable
+
+lpad:
+	unreachable
+}
diff --git a/final/test/Transforms/Inline/casts.ll b/final/test/Transforms/Inline/casts.ll
new file mode 100644
index 00000000000..166185a545d
--- /dev/null
+++ b/final/test/Transforms/Inline/casts.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -inline -S | grep {ret i32 1}
+; ModuleID = 'short.opt.bc'
+
+define i32 @testBool(i1 %X) {
+        %tmp = zext i1 %X to i32                ; <i32> [#uses=1]
+        ret i32 %tmp
+}
+
+define i32 @testByte(i8 %X) {
+        %tmp = icmp ne i8 %X, 0         ; <i1> [#uses=1]
+        %tmp.i = zext i1 %tmp to i32            ; <i32> [#uses=1]
+        ret i32 %tmp.i
+}
+
+define i32 @main() {
+        %rslt = call i32 @testByte( i8 123 )            ; <i32> [#uses=1]
+        ret i32 %rslt
+}
+
diff --git a/final/test/Transforms/Inline/cfg_preserve_test.ll b/final/test/Transforms/Inline/cfg_preserve_test.ll
new file mode 100644
index 00000000000..9597109dff0
--- /dev/null
+++ b/final/test/Transforms/Inline/cfg_preserve_test.ll
@@ -0,0 +1,16 @@
+; This test ensures that inlining an "empty" function does not destroy the CFG
+;
+; RUN: opt < %s -inline -S | not grep br
+
+define i32 @func(i32 %i) {
+        ret i32 %i
+}
+
+declare void @bar()
+
+define i32 @main(i32 %argc) {
+Entry:
+        %X = call i32 @func( i32 7 )            ; <i32> [#uses=1]
+        ret i32 %X
+}
+
diff --git a/final/test/Transforms/Inline/crash.ll b/final/test/Transforms/Inline/crash.ll
new file mode 100644
index 00000000000..1df4d6063e8
--- /dev/null
+++ b/final/test/Transforms/Inline/crash.ll
@@ -0,0 +1,119 @@
+; RUN: opt < %s -inline -argpromotion -instcombine -disable-output
+
+; This test was failing because the inliner would inline @list_DeleteElement
+; into @list_DeleteDuplicates and then into @inf_GetBackwardPartnerLits,
+; turning the indirect call into a direct one.  This allowed instcombine to see
+; the bitcast and eliminate it, deleting the original call and introducing
+; another one.  This crashed the inliner because the new call was not in the
+; callgraph.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+
+define void @list_DeleteElement(i32 (i8*, i8*)* nocapture %Test) nounwind ssp {
+entry:
+  %0 = call i32 %Test(i8* null, i8* undef) nounwind
+  ret void
+}
+
+
+define void @list_DeleteDuplicates(i32 (i8*, i8*)* nocapture %Test) nounwind ssp {
+foo:
+  call void @list_DeleteElement(i32 (i8*, i8*)* %Test) nounwind ssp 
+  call fastcc void @list_Rplacd1284() nounwind ssp
+  unreachable
+
+}
+
+define internal i32 @inf_LiteralsHaveSameSubtermAndAreFromSameClause(i32* nocapture %L1, i32* nocapture %L2) nounwind readonly ssp {
+entry:
+  unreachable
+}
+
+
+define internal fastcc void @inf_GetBackwardPartnerLits(i32* nocapture %Flags) nounwind ssp {
+test:
+  call void @list_DeleteDuplicates(i32 (i8*, i8*)* bitcast (i32 (i32*, i32*)* @inf_LiteralsHaveSameSubtermAndAreFromSameClause to i32 (i8*, i8*)*)) nounwind 
+  ret void
+}
+
+
+define void @inf_BackwardEmptySortPlusPlus() nounwind ssp {
+entry:
+  call fastcc void @inf_GetBackwardPartnerLits(i32* null) nounwind ssp
+  unreachable
+}
+
+define void @inf_BackwardWeakening() nounwind ssp {
+entry:
+  call fastcc void @inf_GetBackwardPartnerLits(i32* null) nounwind ssp
+  unreachable
+}
+
+declare fastcc void @list_Rplacd1284() nounwind ssp
+
+
+
+
+;============================
+; PR5208
+
+define void @AAA() {
+entry:
+  %A = alloca i8, i32 undef, align 1
+  invoke fastcc void @XXX()
+          to label %invcont98 unwind label %lpad156 
+
+invcont98:                          
+  unreachable
+
+lpad156:                            
+  unreachable
+}
+
+declare fastcc void @YYY()
+
+define internal fastcc void @XXX() {
+entry:
+  %B = alloca i8, i32 undef, align 1
+  invoke fastcc void @YYY()
+          to label %bb260 unwind label %lpad
+
+bb260:                              
+  ret void
+
+lpad:                               
+  unwind
+}
+
+
+
+;; This exposed a crash handling devirtualized calls.
+define void @f1(void ()* %f) ssp {
+entry:
+  call void %f()
+  ret void
+}
+
+define void @f4(i32 %size) ssp {
+entry:
+  invoke void @f1(void ()* @f3)
+          to label %invcont3 unwind label %lpad18
+
+invcont3:                                         ; preds = %bb1
+  ret void
+
+lpad18:                                           ; preds = %invcont3, %bb1
+  unreachable
+}
+
+define void @f3() ssp {
+entry:
+  unreachable
+}
+
+declare void @f5() ssp
+
+
+
diff --git a/final/test/Transforms/Inline/crash2.ll b/final/test/Transforms/Inline/crash2.ll
new file mode 100644
index 00000000000..cb1f44d5cca
--- /dev/null
+++ b/final/test/Transforms/Inline/crash2.ll
@@ -0,0 +1,29 @@
+; RUN: opt  -inline -scalarrepl -max-cg-scc-iterations=1  %s -disable-output
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.3"
+
+declare i8* @f1(i8*) ssp align 2
+
+define linkonce_odr void @f2(i8* %t) inlinehint ssp {
+entry:
+  unreachable
+}
+
+define linkonce_odr void @f3(void (i8*)* %__f) ssp {
+entry:
+  %__f_addr = alloca void (i8*)*, align 8
+  store void (i8*)* %__f, void (i8*)** %__f_addr
+
+  %0 = load void (i8*)** %__f_addr, align 8
+  call void %0(i8* undef)
+  call i8* @f1(i8* undef) ssp
+  unreachable
+}
+
+define linkonce_odr void @f4(i8* %this) ssp align 2 {
+entry:
+  %0 = alloca i32
+  call void @f3(void (i8*)* @f2) ssp
+  ret void
+}
+
diff --git a/final/test/Transforms/Inline/delete-call.ll b/final/test/Transforms/Inline/delete-call.ll
new file mode 100644
index 00000000000..3505608b828
--- /dev/null
+++ b/final/test/Transforms/Inline/delete-call.ll
@@ -0,0 +1,22 @@
+; RUN: opt %s -S  -inline -functionattrs -stats |& grep {Number of call sites deleted, not inlined}
+; RUN: opt %s -S  -inline -stats |& grep {Number of functions inlined}
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin9.8"
+
+define internal i32 @test(i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+  %0 = add nsw i32 %y, %z                         ; <i32> [#uses=1]
+  %1 = mul i32 %0, %x                             ; <i32> [#uses=1]
+  %2 = mul i32 %y, %z                             ; <i32> [#uses=1]
+  %3 = add nsw i32 %1, %2                         ; <i32> [#uses=1]
+  ret i32 %3
+}
+
+define i32 @test2() nounwind {
+entry:
+  %0 = call i32 @test(i32 1, i32 2, i32 4) nounwind ; <i32> [#uses=1]
+  ret i32 14
+}
+
+
diff --git a/final/test/Transforms/Inline/devirtualize-2.ll b/final/test/Transforms/Inline/devirtualize-2.ll
new file mode 100644
index 00000000000..02ff7679148
--- /dev/null
+++ b/final/test/Transforms/Inline/devirtualize-2.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+; PR4834
+
+define i32 @test1() {
+  %funcall1_ = call fastcc i32 ()* ()* @f1()
+  %executecommandptr1_ = call i32 %funcall1_()
+  ret i32 %executecommandptr1_
+}
+
+define internal fastcc i32 ()* @f1() nounwind readnone {
+  ret i32 ()* @f2
+}
+
+define internal i32 @f2() nounwind readnone {
+  ret i32 1
+}
+
+; CHECK: @test1()
+; CHECK-NEXT: ret i32 1
+
+
+
+
+
+declare i8* @f1a(i8*) ssp align 2
+
+define internal i32 @f2a(i8* %t) inlinehint ssp {
+entry:
+  ret i32 41
+}
+
+define internal i32 @f3a(i32 (i8*)* %__f) ssp {
+entry:
+  %A = call i32 %__f(i8* undef)
+  ret i32 %A
+}
+
+define i32 @test2(i8* %this) ssp align 2 {
+  %X = call i32 @f3a(i32 (i8*)* @f2a) ssp
+  ret i32 %X
+}
+
+; CHECK: @test2
+; CHECK-NEXT: ret i32 41
diff --git a/final/test/Transforms/Inline/devirtualize-3.ll b/final/test/Transforms/Inline/devirtualize-3.ll
new file mode 100644
index 00000000000..c32be4e024a
--- /dev/null
+++ b/final/test/Transforms/Inline/devirtualize-3.ll
@@ -0,0 +1,79 @@
+; RUN: opt -basicaa -inline -S -scalarrepl -gvn -instcombine %s | FileCheck %s
+; PR5009
+
+; CHECK: define i32 @main() 
+; CHECK-NEXT: entry:
+; CHECK-NEXT:  call void @exit(i32 38) 
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%struct.cont_t = type { void (i8*, i32)*, i8* }
+%struct.foo_sf_t = type { %struct.cont_t*, i32 }
+
+define i32 @main() nounwind ssp {
+entry:
+  %cont = alloca %struct.cont_t, align 8          ; <%struct.cont_t*> [#uses=4]
+  %tmp = getelementptr inbounds %struct.cont_t* %cont, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
+  %tmp1 = getelementptr inbounds %struct.cont_t* %cont, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=2]
+  store void (i8*, i32)* bitcast (void (%struct.cont_t*, i32)* @quit to void (i8*, i32)*), void (i8*, i32)** %tmp1
+  %tmp2 = load void (i8*, i32)** %tmp1            ; <void (i8*, i32)*> [#uses=1]
+  store void (i8*, i32)* %tmp2, void (i8*, i32)** %tmp
+  %tmp3 = getelementptr inbounds %struct.cont_t* %cont, i32 0, i32 1 ; <i8**> [#uses=1]
+  store i8* null, i8** %tmp3
+  call void @foo(%struct.cont_t* %cont)
+  ret i32 0
+}
+
+define internal void @quit(%struct.cont_t* %cont, i32 %rcode) nounwind ssp {
+entry:
+  call void @exit(i32 %rcode) noreturn
+  unreachable
+}
+
+define internal void @foo(%struct.cont_t* %c) nounwind ssp {
+entry:
+  %sf = alloca %struct.foo_sf_t, align 8          ; <%struct.foo_sf_t*> [#uses=3]
+  %next = alloca %struct.cont_t, align 8          ; <%struct.cont_t*> [#uses=3]
+  %tmp = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1]
+  store %struct.cont_t* %c, %struct.cont_t** %tmp
+  %tmp2 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 1 ; <i32*> [#uses=1]
+  store i32 2, i32* %tmp2
+  %tmp4 = getelementptr inbounds %struct.cont_t* %next, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
+  store void (i8*, i32)* bitcast (void (%struct.foo_sf_t*, i32)* @foo2 to void (i8*, i32)*), void (i8*, i32)** %tmp4
+  %tmp5 = getelementptr inbounds %struct.cont_t* %next, i32 0, i32 1 ; <i8**> [#uses=1]
+  %conv = bitcast %struct.foo_sf_t* %sf to i8*    ; <i8*> [#uses=1]
+  store i8* %conv, i8** %tmp5
+  call void @bar(%struct.cont_t* %next, i32 14)
+  ret void
+}
+
+define internal void @foo2(%struct.foo_sf_t* %sf, i32 %y) nounwind ssp {
+entry:
+  %tmp1 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1]
+  %tmp2 = load %struct.cont_t** %tmp1             ; <%struct.cont_t*> [#uses=1]
+  %tmp3 = getelementptr inbounds %struct.cont_t* %tmp2, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
+  %tmp4 = load void (i8*, i32)** %tmp3            ; <void (i8*, i32)*> [#uses=1]
+  %tmp6 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1]
+  %tmp7 = load %struct.cont_t** %tmp6             ; <%struct.cont_t*> [#uses=1]
+  %conv = bitcast %struct.cont_t* %tmp7 to i8*    ; <i8*> [#uses=1]
+  %tmp9 = getelementptr inbounds %struct.foo_sf_t* %sf, i32 0, i32 1 ; <i32*> [#uses=1]
+  %tmp10 = load i32* %tmp9                        ; <i32> [#uses=1]
+  %mul = mul i32 %tmp10, %y                       ; <i32> [#uses=1]
+  call void %tmp4(i8* %conv, i32 %mul)
+  ret void
+}
+
+define internal void @bar(%struct.cont_t* %c, i32 %y) nounwind ssp {
+entry:
+  %tmp1 = getelementptr inbounds %struct.cont_t* %c, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1]
+  %tmp2 = load void (i8*, i32)** %tmp1            ; <void (i8*, i32)*> [#uses=1]
+  %tmp4 = getelementptr inbounds %struct.cont_t* %c, i32 0, i32 1 ; <i8**> [#uses=1]
+  %tmp5 = load i8** %tmp4                         ; <i8*> [#uses=1]
+  %add = add nsw i32 %y, 5                        ; <i32> [#uses=1]
+  call void %tmp2(i8* %tmp5, i32 %add)
+  ret void
+}
+
+declare void @exit(i32) noreturn
+
diff --git a/final/test/Transforms/Inline/devirtualize.ll b/final/test/Transforms/Inline/devirtualize.ll
new file mode 100644
index 00000000000..51ea4baa386
--- /dev/null
+++ b/final/test/Transforms/Inline/devirtualize.ll
@@ -0,0 +1,182 @@
+; RUN: opt -S -basicaa -inline -scalarrepl -instcombine -simplifycfg -instcombine -gvn -globaldce %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+
+; Simple devirt testcase, requires iteration between inliner and GVN.
+;  rdar://6295824
+define i32 @foo(i32 ()** noalias %p, i64* noalias %q) nounwind ssp {
+entry:
+  store i32 ()* @bar, i32 ()** %p
+  store i64 0, i64* %q
+  %tmp3 = load i32 ()** %p                        ; <i32 ()*> [#uses=1]
+  %call = call i32 %tmp3()                        ; <i32> [#uses=1]
+  %X = add i32 %call, 4
+  ret i32 %X
+  
+; CHECK: @foo
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store
+; CHECK-NEXT: store
+; CHECK-NEXT: ret i32 11
+}
+
+define internal i32 @bar() nounwind ssp {
+entry:
+  ret i32 7
+}
+
+
+;; More complex devirt case, from PR6724
+; CHECK: @_Z1gv()
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret i32 7
+
+%0 = type { i8*, i8* }
+%1 = type { i8*, i8*, i32, i32, i8*, i64, i8*, i64 }
+%2 = type { i8*, i8*, i8* }
+%struct.A = type { i8** }
+%struct.B = type { i8** }
+%struct.C = type { [16 x i8] }
+%struct.D = type { [16 x i8] }
+
+@_ZTV1D = linkonce_odr constant [6 x i8*] [i8* null, i8* bitcast (%2* @_ZTI1D to i8*), i8* bitcast (i32 (%struct.C*)* @_ZN1D1fEv to i8*), i8* inttoptr (i64 -8 to i8*), i8* bitcast (%2* @_ZTI1D to i8*), i8* bitcast (i32 (%struct.C*)* @_ZThn8_N1D1fEv to i8*)] ; <[6 x i8*]*> [#uses=2]
+@_ZTVN10__cxxabiv120__si_class_type_infoE = external global i8* ; <i8**> [#uses=1]
+@_ZTS1D = linkonce_odr constant [3 x i8] c"1D\00"     ; <[3 x i8]*> [#uses=1]
+@_ZTVN10__cxxabiv121__vmi_class_type_infoE = external global i8* ; <i8**> [#uses=1]
+@_ZTS1C = linkonce_odr constant [3 x i8] c"1C\00"     ; <[3 x i8]*> [#uses=1]
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8* ; <i8**> [#uses=1]
+@_ZTS1A = linkonce_odr constant [3 x i8] c"1A\00"     ; <[3 x i8]*> [#uses=1]
+@_ZTI1A = linkonce_odr constant %0 { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1A, i32 0, i32 0) } ; <%0*> [#uses=1]
+@_ZTS1B = linkonce_odr constant [3 x i8] c"1B\00"     ; <[3 x i8]*> [#uses=1]
+@_ZTI1B = linkonce_odr constant %0 { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1B, i32 0, i32 0) } ; <%0*> [#uses=1]
+@_ZTI1C = linkonce_odr constant %1 { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1C, i32 0, i32 0), i32 0, i32 2, i8* bitcast (%0* @_ZTI1A to i8*), i64 2, i8* bitcast (%0* @_ZTI1B to i8*), i64 2050 } ; <%1*> [#uses=1]
+@_ZTI1D = linkonce_odr constant %2 { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8]* @_ZTS1D, i32 0, i32 0), i8* bitcast (%1* @_ZTI1C to i8*) } ; <%2*> [#uses=1]
+@_ZTV1C = linkonce_odr constant [6 x i8*] [i8* null, i8* bitcast (%1* @_ZTI1C to i8*), i8* bitcast (i32 (%struct.C*)* @_ZN1C1fEv to i8*), i8* inttoptr (i64 -8 to i8*), i8* bitcast (%1* @_ZTI1C to i8*), i8* bitcast (i32 (%struct.C*)* @_ZThn8_N1C1fEv to i8*)] ; <[6 x i8*]*> [#uses=2]
+@_ZTV1B = linkonce_odr constant [3 x i8*] [i8* null, i8* bitcast (%0* @_ZTI1B to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1B1fEv to i8*)] ; <[3 x i8*]*> [#uses=1]
+@_ZTV1A = linkonce_odr constant [3 x i8*] [i8* null, i8* bitcast (%0* @_ZTI1A to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A1fEv to i8*)] ; <[3 x i8*]*> [#uses=1]
+
+define i32 @_Z1gv() ssp {
+entry:
+  %d = alloca %struct.C, align 8                  ; <%struct.C*> [#uses=2]
+  call void @_ZN1DC1Ev(%struct.C* %d)
+  %call = call i32 @_Z1fP1D(%struct.C* %d)        ; <i32> [#uses=1]
+  %X = add i32 %call, 3
+  ret i32 %X
+}
+
+define linkonce_odr void @_ZN1DC1Ev(%struct.C* %this) inlinehint ssp align 2 {
+entry:
+  call void @_ZN1DC2Ev(%struct.C* %this)
+  ret void
+}
+
+define internal i32 @_Z1fP1D(%struct.C* %d) ssp {
+entry:
+  %0 = icmp eq %struct.C* %d, null                ; <i1> [#uses=1]
+  br i1 %0, label %cast.end, label %cast.notnull
+
+cast.notnull:                                     ; preds = %entry
+  %1 = bitcast %struct.C* %d to i8*               ; <i8*> [#uses=1]
+  %add.ptr = getelementptr i8* %1, i64 8          ; <i8*> [#uses=1]
+  %2 = bitcast i8* %add.ptr to %struct.A*         ; <%struct.A*> [#uses=1]
+  br label %cast.end
+
+cast.end:                                         ; preds = %entry, %cast.notnull
+  %3 = phi %struct.A* [ %2, %cast.notnull ], [ null, %entry ] ; <%struct.A*> [#uses=2]
+  %4 = bitcast %struct.A* %3 to i32 (%struct.A*)*** ; <i32 (%struct.A*)***> [#uses=1]
+  %5 = load i32 (%struct.A*)*** %4                ; <i32 (%struct.A*)**> [#uses=1]
+  %vfn = getelementptr inbounds i32 (%struct.A*)** %5, i64 0 ; <i32 (%struct.A*)**> [#uses=1]
+  %6 = load i32 (%struct.A*)** %vfn               ; <i32 (%struct.A*)*> [#uses=1]
+  %call = call i32 %6(%struct.A* %3)              ; <i32> [#uses=1]
+  ret i32 %call
+}
+
+define linkonce_odr i32 @_ZN1D1fEv(%struct.C* %this) ssp align 2 {
+entry:
+  ret i32 4
+}
+
+define linkonce_odr i32 @_ZThn8_N1D1fEv(%struct.C* %this) {
+entry:
+  %0 = bitcast %struct.C* %this to i8*            ; <i8*> [#uses=1]
+  %1 = getelementptr inbounds i8* %0, i64 -8      ; <i8*> [#uses=1]
+  %2 = bitcast i8* %1 to %struct.C*               ; <%struct.C*> [#uses=1]
+  %call = call i32 @_ZN1D1fEv(%struct.C* %2)      ; <i32> [#uses=1]
+  ret i32 %call
+}
+
+define linkonce_odr void @_ZN1DC2Ev(%struct.C* %this) inlinehint ssp align 2 {
+entry:
+  call void @_ZN1CC2Ev(%struct.C* %this)
+  %0 = bitcast %struct.C* %this to i8*            ; <i8*> [#uses=1]
+  %1 = getelementptr inbounds i8* %0, i64 0       ; <i8*> [#uses=1]
+  %2 = bitcast i8* %1 to i8***                    ; <i8***> [#uses=1]
+  store i8** getelementptr inbounds ([6 x i8*]* @_ZTV1D, i64 0, i64 2), i8*** %2
+  %3 = bitcast %struct.C* %this to i8*            ; <i8*> [#uses=1]
+  %4 = getelementptr inbounds i8* %3, i64 8       ; <i8*> [#uses=1]
+  %5 = bitcast i8* %4 to i8***                    ; <i8***> [#uses=1]
+  store i8** getelementptr inbounds ([6 x i8*]* @_ZTV1D, i64 0, i64 5), i8*** %5
+  ret void
+}
+
+define linkonce_odr void @_ZN1CC2Ev(%struct.C* %this) inlinehint ssp align 2 {
+entry:
+  %0 = bitcast %struct.C* %this to %struct.A*     ; <%struct.A*> [#uses=1]
+  call void @_ZN1AC2Ev(%struct.A* %0)
+  %1 = bitcast %struct.C* %this to i8*            ; <i8*> [#uses=1]
+  %2 = getelementptr inbounds i8* %1, i64 8       ; <i8*> [#uses=1]
+  %3 = bitcast i8* %2 to %struct.A*               ; <%struct.A*> [#uses=1]
+  call void @_ZN1BC2Ev(%struct.A* %3)
+  %4 = bitcast %struct.C* %this to i8*            ; <i8*> [#uses=1]
+  %5 = getelementptr inbounds i8* %4, i64 0       ; <i8*> [#uses=1]
+  %6 = bitcast i8* %5 to i8***                    ; <i8***> [#uses=1]
+  store i8** getelementptr inbounds ([6 x i8*]* @_ZTV1C, i64 0, i64 2), i8*** %6
+  %7 = bitcast %struct.C* %this to i8*            ; <i8*> [#uses=1]
+  %8 = getelementptr inbounds i8* %7, i64 8       ; <i8*> [#uses=1]
+  %9 = bitcast i8* %8 to i8***                    ; <i8***> [#uses=1]
+  store i8** getelementptr inbounds ([6 x i8*]* @_ZTV1C, i64 0, i64 5), i8*** %9
+  ret void
+}
+
+define linkonce_odr i32 @_ZN1C1fEv(%struct.C* %this) ssp align 2 {
+entry:
+  ret i32 3
+}
+
+define linkonce_odr i32 @_ZThn8_N1C1fEv(%struct.C* %this) {
+entry:
+  %0 = bitcast %struct.C* %this to i8*            ; <i8*> [#uses=1]
+  %1 = getelementptr inbounds i8* %0, i64 -8      ; <i8*> [#uses=1]
+  %2 = bitcast i8* %1 to %struct.C*               ; <%struct.C*> [#uses=1]
+  %call = call i32 @_ZN1C1fEv(%struct.C* %2)      ; <i32> [#uses=1]
+  ret i32 %call
+}
+
+define linkonce_odr void @_ZN1AC2Ev(%struct.A* %this) inlinehint ssp align 2 {
+entry:
+  %0 = bitcast %struct.A* %this to i8*            ; <i8*> [#uses=1]
+  %1 = getelementptr inbounds i8* %0, i64 0       ; <i8*> [#uses=1]
+  %2 = bitcast i8* %1 to i8***                    ; <i8***> [#uses=1]
+  store i8** getelementptr inbounds ([3 x i8*]* @_ZTV1A, i64 0, i64 2), i8*** %2
+  ret void
+}
+
+define linkonce_odr void @_ZN1BC2Ev(%struct.A* %this) inlinehint ssp align 2 {
+entry:
+  %0 = bitcast %struct.A* %this to i8*            ; <i8*> [#uses=1]
+  %1 = getelementptr inbounds i8* %0, i64 0       ; <i8*> [#uses=1]
+  %2 = bitcast i8* %1 to i8***                    ; <i8***> [#uses=1]
+  store i8** getelementptr inbounds ([3 x i8*]* @_ZTV1B, i64 0, i64 2), i8*** %2
+  ret void
+}
+
+define linkonce_odr i32 @_ZN1B1fEv(%struct.A* %this) ssp align 2 {
+entry:
+  ret i32 2
+}
+
+define linkonce_odr i32 @_ZN1A1fEv(%struct.A* %this) ssp align 2 {
+entry:
+  ret i32 1
+}
diff --git a/final/test/Transforms/Inline/dg.exp b/final/test/Transforms/Inline/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/Inline/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/Inline/dynamic_alloca_test.ll b/final/test/Transforms/Inline/dynamic_alloca_test.ll
new file mode 100644
index 00000000000..0286535efec
--- /dev/null
+++ b/final/test/Transforms/Inline/dynamic_alloca_test.ll
@@ -0,0 +1,35 @@
+; Test that functions with dynamic allocas get inlined in a case where
+; naively inlining it would result in a miscompilation.
+; Functions with dynamic allocas can only be inlined into functions that
+; already have dynamic allocas.
+
+; RUN: opt < %s -inline -S | \
+; RUN:   grep llvm.stacksave
+; RUN: opt < %s -inline -S | not grep callee
+
+
+declare void @ext(i32*)
+
+define internal void @callee(i32 %N) {
+        %P = alloca i32, i32 %N         ; <i32*> [#uses=1]
+        call void @ext( i32* %P )
+        ret void
+}
+
+define void @foo(i32 %N) {
+; <label>:0
+        %P = alloca i32, i32 %N         ; <i32*> [#uses=1]
+        call void @ext( i32* %P )
+        br label %Loop
+
+Loop:           ; preds = %Loop, %0
+        %count = phi i32 [ 0, %0 ], [ %next, %Loop ]            ; <i32> [#uses=2]
+        %next = add i32 %count, 1               ; <i32> [#uses=1]
+        call void @callee( i32 %N )
+        %cond = icmp eq i32 %count, 100000              ; <i1> [#uses=1]
+        br i1 %cond, label %out, label %Loop
+
+out:            ; preds = %Loop
+        ret void
+}
+
diff --git a/final/test/Transforms/Inline/externally_available.ll b/final/test/Transforms/Inline/externally_available.ll
new file mode 100644
index 00000000000..08b56385ac0
--- /dev/null
+++ b/final/test/Transforms/Inline/externally_available.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -inline -constprop -S > %t
+; RUN: not grep test_function %t
+; RUN: grep {ret i32 5} %t
+
+
+; test_function should not be emitted to the .s file.
+define available_externally i32 @test_function() {
+  ret i32 4
+}
+
+
+define i32 @result() {
+  %A = call i32 @test_function()
+  %B = add i32 %A, 1
+  ret i32 %B
+}
diff --git a/final/test/Transforms/Inline/gvn-inline-iteration.ll b/final/test/Transforms/Inline/gvn-inline-iteration.ll
new file mode 100644
index 00000000000..e502fd5777d
--- /dev/null
+++ b/final/test/Transforms/Inline/gvn-inline-iteration.ll
@@ -0,0 +1,23 @@
+; RUN: opt -basicaa -inline -gvn %s -S -max-cg-scc-iterations=1 | FileCheck %s
+; rdar://6295824 and PR6724
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define i32 @foo(i32 ()** noalias nocapture %p, i64* noalias nocapture %q) nounwind ssp {
+entry:
+  store i32 ()* @bar, i32 ()** %p
+  store i64 0, i64* %q
+  %tmp3 = load i32 ()** %p                        ; <i32 ()*> [#uses=1]
+  %call = tail call i32 %tmp3() nounwind          ; <i32> [#uses=1]
+  ret i32 %call
+}
+; CHECK: @foo
+; CHECK: ret i32 7
+; CHECK: @bar
+; CHECK: ret i32 7
+
+define internal i32 @bar() nounwind readnone ssp {
+entry:
+  ret i32 7
+}
diff --git a/final/test/Transforms/Inline/inline-invoke-tail.ll b/final/test/Transforms/Inline/inline-invoke-tail.ll
new file mode 100644
index 00000000000..961f6789fe4
--- /dev/null
+++ b/final/test/Transforms/Inline/inline-invoke-tail.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -inline -S | not grep {tail call void @llvm.memcpy.i32}
+; PR3550
+
+define internal void @foo(i32* %p, i32* %q) {
+	%pp = bitcast i32* %p to i8*
+	%qq = bitcast i32* %q to i8*
+	tail call void @llvm.memcpy.i32(i8* %pp, i8* %qq, i32 4, i32 1)
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+
+define i32 @main() {
+	%a = alloca i32		; <i32*> [#uses=3]
+	%b = alloca i32		; <i32*> [#uses=2]
+	store i32 1, i32* %a, align 4
+	store i32 0, i32* %b, align 4
+	invoke void @foo(i32* %a, i32* %b)
+			to label %invcont unwind label %lpad
+
+invcont:
+	%retval = load i32* %a, align 4
+	ret i32 %retval
+
+lpad:
+	%eh_ptr = call i8* @llvm.eh.exception()
+	%eh_select = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null)
+	unreachable
+}
+
+declare i8* @llvm.eh.exception() nounwind
+
+declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/final/test/Transforms/Inline/inline-tail.ll b/final/test/Transforms/Inline/inline-tail.ll
new file mode 100644
index 00000000000..8bb059d01a0
--- /dev/null
+++ b/final/test/Transforms/Inline/inline-tail.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -inline -S | not grep tail
+
+declare void @bar(i32*)
+
+define internal void @foo(i32* %P) {
+        tail call void @bar( i32* %P )
+        ret void
+}
+
+define void @caller() {
+        %A = alloca i32         ; <i32*> [#uses=1]
+        call void @foo( i32* %A )
+        ret void
+}
+
diff --git a/final/test/Transforms/Inline/inline_cleanup.ll b/final/test/Transforms/Inline/inline_cleanup.ll
new file mode 100644
index 00000000000..4c647219421
--- /dev/null
+++ b/final/test/Transforms/Inline/inline_cleanup.ll
@@ -0,0 +1,63 @@
+; Test that the inliner doesn't leave around dead allocas, and that it folds
+; uncond branches away after it is done specializing.
+
+; RUN: opt < %s -inline -S | \
+; RUN:    not grep {alloca.*uses=0}
+; RUN: opt < %s -inline -S | \
+; RUN:    not grep {br label}
+@A = weak global i32 0		; <i32*> [#uses=1]
+@B = weak global i32 0		; <i32*> [#uses=1]
+@C = weak global i32 0		; <i32*> [#uses=1]
+
+define internal fastcc void @foo(i32 %X) {
+entry:
+	%ALL = alloca i32, align 4		; <i32*> [#uses=1]
+	%tmp1 = and i32 %X, 1		; <i32> [#uses=1]
+	%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true
+
+cond_true:		; preds = %entry
+	store i32 1, i32* @A
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp4 = and i32 %X, 2		; <i32> [#uses=1]
+	%tmp4.upgrd.2 = icmp eq i32 %tmp4, 0		; <i1> [#uses=1]
+	br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5
+
+cond_true5:		; preds = %cond_next
+	store i32 1, i32* @B
+	br label %cond_next7
+
+cond_next7:		; preds = %cond_true5, %cond_next
+	%tmp10 = and i32 %X, 4		; <i32> [#uses=1]
+	%tmp10.upgrd.3 = icmp eq i32 %tmp10, 0		; <i1> [#uses=1]
+	br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11
+
+cond_true11:		; preds = %cond_next7
+	store i32 1, i32* @C
+	br label %cond_next13
+
+cond_next13:		; preds = %cond_true11, %cond_next7
+	%tmp16 = and i32 %X, 8		; <i32> [#uses=1]
+	%tmp16.upgrd.4 = icmp eq i32 %tmp16, 0		; <i1> [#uses=1]
+	br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17
+
+cond_true17:		; preds = %cond_next13
+	call void @ext( i32* %ALL )
+	ret void
+
+UnifiedReturnBlock:		; preds = %cond_next13
+	ret void
+}
+
+declare void @ext(i32*)
+
+define void @test() {
+entry:
+	tail call fastcc void @foo( i32 1 )
+	tail call fastcc void @foo( i32 2 )
+	tail call fastcc void @foo( i32 3 )
+	tail call fastcc void @foo( i32 8 )
+	ret void
+}
diff --git a/final/test/Transforms/Inline/inline_constprop.ll b/final/test/Transforms/Inline/inline_constprop.ll
new file mode 100644
index 00000000000..537c69b305c
--- /dev/null
+++ b/final/test/Transforms/Inline/inline_constprop.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -inline -S | not grep callee
+; RUN: opt < %s -inline -S | not grep div
+
+
+define internal i32 @callee(i32 %A, i32 %B) {
+        %C = sdiv i32 %A, %B            ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i32 @test() {
+        %X = call i32 @callee( i32 10, i32 3 )          ; <i32> [#uses=1]
+        ret i32 %X
+}
+
diff --git a/final/test/Transforms/Inline/inline_dce.ll b/final/test/Transforms/Inline/inline_dce.ll
new file mode 100644
index 00000000000..5143d024942
--- /dev/null
+++ b/final/test/Transforms/Inline/inline_dce.ll
@@ -0,0 +1,25 @@
+; This checks to ensure that the inline pass deletes functions if they get 
+; inlined into all of their callers.
+
+; RUN: opt < %s -inline -S | \
+; RUN:   not grep @reallysmall
+
+define internal i32 @reallysmall(i32 %A) {
+        ret i32 %A
+}
+
+define void @caller1() {
+        call i32 @reallysmall( i32 5 )          ; <i32>:1 [#uses=0]
+        ret void
+}
+
+define void @caller2(i32 %A) {
+        call i32 @reallysmall( i32 %A )         ; <i32>:1 [#uses=0]
+        ret void
+}
+
+define i32 @caller3(i32 %A) {
+        %B = call i32 @reallysmall( i32 %A )            ; <i32> [#uses=1]
+        ret i32 %B
+}
+
diff --git a/final/test/Transforms/Inline/inline_prune.ll b/final/test/Transforms/Inline/inline_prune.ll
new file mode 100644
index 00000000000..658a422540a
--- /dev/null
+++ b/final/test/Transforms/Inline/inline_prune.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -inline -S | \
+; RUN:    not grep {callee\[12\](}
+; RUN: opt < %s -inline -S | not grep mul
+
+define internal i32 @callee1(i32 %A, i32 %B) {
+        %cond = icmp eq i32 %A, 123             ; <i1> [#uses=1]
+        br i1 %cond, label %T, label %F
+
+T:              ; preds = %0
+        %C = mul i32 %B, %B             ; <i32> [#uses=1]
+        ret i32 %C
+
+F:              ; preds = %0
+        ret i32 0
+}
+
+define internal i32 @callee2(i32 %A, i32 %B) {
+        switch i32 %A, label %T [
+                 i32 10, label %F
+                 i32 1234, label %G
+        ]
+                ; No predecessors!
+        %cond = icmp eq i32 %A, 123             ; <i1> [#uses=1]
+        br i1 %cond, label %T, label %F
+
+T:              ; preds = %1, %0
+        %C = mul i32 %B, %B             ; <i32> [#uses=1]
+        ret i32 %C
+
+F:              ; preds = %1, %0
+        ret i32 0
+
+G:              ; preds = %0
+        %D = mul i32 %B, %B             ; <i32> [#uses=1]
+        %E = mul i32 %D, %B             ; <i32> [#uses=1]
+        ret i32 %E
+}
+
+define i32 @test(i32 %A) {
+        %X = call i32 @callee1( i32 10, i32 %A )                ; <i32> [#uses=1]
+        %Y = call i32 @callee2( i32 10, i32 %A )                ; <i32> [#uses=1]
+        %Z = add i32 %X, %Y             ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
diff --git a/final/test/Transforms/Inline/invoke_test-1.ll b/final/test/Transforms/Inline/invoke_test-1.ll
new file mode 100644
index 00000000000..0d27e2a7f5e
--- /dev/null
+++ b/final/test/Transforms/Inline/invoke_test-1.ll
@@ -0,0 +1,24 @@
+; Test that we can inline a simple function, turning the calls in it into invoke
+; instructions
+
+; RUN: opt < %s -inline -S | \
+; RUN:   not grep {call\[^e\]}
+
+declare void @might_throw()
+
+define internal void @callee() {
+        call void @might_throw( )
+        ret void
+}
+
+; caller returns true if might_throw throws an exception...
+define i32 @caller() {
+        invoke void @callee( )
+                        to label %cont unwind label %exc
+
+cont:           ; preds = %0
+        ret i32 0
+
+exc:            ; preds = %0
+        ret i32 1
+}
diff --git a/final/test/Transforms/Inline/invoke_test-2.ll b/final/test/Transforms/Inline/invoke_test-2.ll
new file mode 100644
index 00000000000..bbb9ab05539
--- /dev/null
+++ b/final/test/Transforms/Inline/invoke_test-2.ll
@@ -0,0 +1,30 @@
+; Test that if an invoked function is inlined, and if that function cannot
+; throw, that the dead handler is now unreachable.
+
+; RUN: opt < %s -inline -simplifycfg -S | \
+; RUN:   not grep UnreachableExceptionHandler
+
+declare void @might_throw()
+
+define internal i32 @callee() {
+        invoke void @might_throw( )
+                        to label %cont unwind label %exc
+
+cont:           ; preds = %0
+        ret i32 0
+
+exc:            ; preds = %0
+        ret i32 1
+}
+
+; caller returns true if might_throw throws an exception... callee cannot throw.
+define i32 @caller() {
+        %X = invoke i32 @callee( )
+                        to label %cont unwind label %UnreachableExceptionHandler                ; <i32> [#uses=1]
+
+cont:           ; preds = %0
+        ret i32 %X
+
+UnreachableExceptionHandler:            ; preds = %0
+        ret i32 -1
+}
diff --git a/final/test/Transforms/Inline/invoke_test-3.ll b/final/test/Transforms/Inline/invoke_test-3.ll
new file mode 100644
index 00000000000..b360526fb34
--- /dev/null
+++ b/final/test/Transforms/Inline/invoke_test-3.ll
@@ -0,0 +1,32 @@
+; Test that any rethrown exceptions in an inlined function are automatically
+; turned into branches to the invoke destination.
+
+; RUN: opt < %s -inline -S | not grep unwind$
+
+declare void @might_throw()
+
+define internal i32 @callee() {
+        invoke void @might_throw( )
+                        to label %cont unwind label %exc
+
+cont:           ; preds = %0
+        ret i32 0
+
+exc:            ; preds = %0a
+       ; This just rethrows the exception!
+        unwind
+}
+
+; caller returns true if might_throw throws an exception... which gets
+; propagated by callee.
+define i32 @caller() {
+        %X = invoke i32 @callee( )
+                        to label %cont unwind label %Handler            ; <i32> [#uses=1]
+
+cont:           ; preds = %0
+        ret i32 %X
+
+Handler:                ; preds = %0
+; This consumes an exception thrown by might_throw
+        ret i32 1
+}
diff --git a/final/test/Transforms/Inline/nested-inline.ll b/final/test/Transforms/Inline/nested-inline.ll
new file mode 100644
index 00000000000..12926671722
--- /dev/null
+++ b/final/test/Transforms/Inline/nested-inline.ll
@@ -0,0 +1,111 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+; Test that bar and bar2 are both inlined throughout and removed.
+@A = weak global i32 0		; <i32*> [#uses=1]
+@B = weak global i32 0		; <i32*> [#uses=1]
+@C = weak global i32 0		; <i32*> [#uses=1]
+
+define fastcc void @foo(i32 %X) {
+entry:
+; CHECK: @foo
+	%ALL = alloca i32, align 4		; <i32*> [#uses=1]
+	%tmp1 = and i32 %X, 1		; <i32> [#uses=1]
+	%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true
+
+cond_true:		; preds = %entry
+	store i32 1, i32* @A
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp4 = and i32 %X, 2		; <i32> [#uses=1]
+	%tmp4.upgrd.2 = icmp eq i32 %tmp4, 0		; <i1> [#uses=1]
+	br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5
+
+cond_true5:		; preds = %cond_next
+	store i32 1, i32* @B
+	br label %cond_next7
+
+cond_next7:		; preds = %cond_true5, %cond_next
+	%tmp10 = and i32 %X, 4		; <i32> [#uses=1]
+	%tmp10.upgrd.3 = icmp eq i32 %tmp10, 0		; <i1> [#uses=1]
+	br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11
+
+cond_true11:		; preds = %cond_next7
+	store i32 1, i32* @C
+	br label %cond_next13
+
+cond_next13:		; preds = %cond_true11, %cond_next7
+	%tmp16 = and i32 %X, 8		; <i32> [#uses=1]
+	%tmp16.upgrd.4 = icmp eq i32 %tmp16, 0		; <i1> [#uses=1]
+	br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17
+
+cond_true17:		; preds = %cond_next13
+	call void @ext( i32* %ALL )
+	ret void
+
+UnifiedReturnBlock:		; preds = %cond_next13
+	ret void
+}
+
+; CHECK-NOT: @bar
+define internal fastcc void @bar(i32 %X) {
+entry:
+	%ALL = alloca i32, align 4		; <i32*> [#uses=1]
+	%tmp1 = and i32 %X, 1		; <i32> [#uses=1]
+	%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true
+
+cond_true:		; preds = %entry
+	store i32 1, i32* @A
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp4 = and i32 %X, 2		; <i32> [#uses=1]
+	%tmp4.upgrd.2 = icmp eq i32 %tmp4, 0		; <i1> [#uses=1]
+	br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5
+
+cond_true5:		; preds = %cond_next
+	store i32 1, i32* @B
+	br label %cond_next7
+
+cond_next7:		; preds = %cond_true5, %cond_next
+	%tmp10 = and i32 %X, 4		; <i32> [#uses=1]
+	%tmp10.upgrd.3 = icmp eq i32 %tmp10, 0		; <i1> [#uses=1]
+	br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11
+
+cond_true11:		; preds = %cond_next7
+	store i32 1, i32* @C
+	br label %cond_next13
+
+cond_next13:		; preds = %cond_true11, %cond_next7
+	%tmp16 = and i32 %X, 8		; <i32> [#uses=1]
+	%tmp16.upgrd.4 = icmp eq i32 %tmp16, 0		; <i1> [#uses=1]
+	br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17
+
+cond_true17:		; preds = %cond_next13
+	call void @foo( i32 %X )
+	ret void
+
+UnifiedReturnBlock:		; preds = %cond_next13
+	ret void
+}
+
+define internal fastcc void @bar2(i32 %X) {
+entry:
+	call void @foo( i32 %X )
+	ret void
+}
+
+declare void @ext(i32*)
+
+define void @test(i32 %X) {
+entry:
+; CHECK: test
+; CHECK-NOT: @bar
+	tail call fastcc void @bar( i32 %X )
+	tail call fastcc void @bar( i32 %X )
+	tail call fastcc void @bar2( i32 %X )
+	tail call fastcc void @bar2( i32 %X )
+	ret void
+; CHECK: ret
+}
diff --git a/final/test/Transforms/Inline/noinline-recursive-fn.ll b/final/test/Transforms/Inline/noinline-recursive-fn.ll
new file mode 100644
index 00000000000..1d5ebbbf0fa
--- /dev/null
+++ b/final/test/Transforms/Inline/noinline-recursive-fn.ll
@@ -0,0 +1,73 @@
+; The inliner should never inline recursive functions into other functions.
+; This effectively is just peeling off the first iteration of a loop, and the
+; inliner heuristics are not set up for this.
+
+; RUN: opt -inline %s -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.3"
+
+@g = common global i32 0                          ; <i32*> [#uses=1]
+
+define internal void @foo(i32 %x) nounwind ssp {
+entry:
+  %0 = icmp slt i32 %x, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  %1 = sub nsw i32 %x, 1                          ; <i32> [#uses=1]
+  call void @foo(i32 %1) nounwind ssp
+  volatile store i32 1, i32* @g, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+
+;; CHECK: @bonk
+;; CHECK: call void @foo(i32 42)
+define void @bonk() nounwind ssp {
+entry:
+  call void @foo(i32 42) nounwind ssp
+  ret void
+}
+
+
+
+;; Here is an indirect case that should not be infinitely inlined.
+
+define internal void @f1(i32 %x, i8* %Foo, i8* %Bar) nounwind ssp {
+entry:
+  %0 = bitcast i8* %Bar to void (i32, i8*, i8*)*
+  %1 = sub nsw i32 %x, 1
+  call void %0(i32 %1, i8* %Foo, i8* %Bar) nounwind
+  volatile store i32 42, i32* @g, align 4
+  ret void
+}
+
+define internal void @f2(i32 %x, i8* %Foo, i8* %Bar) nounwind ssp {
+entry:
+  %0 = icmp slt i32 %x, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  %1 = bitcast i8* %Foo to void (i32, i8*, i8*)*  ; <void (i32, i8*, i8*)*> [#uses=1]
+  call void %1(i32 %x, i8* %Foo, i8* %Bar) nounwind
+  volatile store i32 13, i32* @g, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+
+; CHECK: @top_level
+; CHECK: call void @f2(i32 122
+; Here we inline one instance of the cycle, but we don't want to completely
+; unroll it.
+define void @top_level() nounwind ssp {
+entry:
+  call void @f2(i32 123, i8* bitcast (void (i32, i8*, i8*)* @f1 to i8*), i8* bitcast (void (i32, i8*, i8*)* @f2 to i8*)) nounwind ssp
+  ret void
+}
diff --git a/final/test/Transforms/Inline/noinline.ll b/final/test/Transforms/Inline/noinline.ll
new file mode 100644
index 00000000000..dc3f6e00309
--- /dev/null
+++ b/final/test/Transforms/Inline/noinline.ll
@@ -0,0 +1,18 @@
+; RUN: opt %s -inline -S | FileCheck %s
+; PR6682
+declare void @foo() nounwind
+
+define void @bar() nounwind {
+entry:
+    tail call void @foo() nounwind
+    ret void
+}
+
+define void @bazz() nounwind {
+entry:
+    tail call void @bar() nounwind noinline
+    ret void
+}
+
+; CHECK: define void @bazz()
+; CHECK: call void @bar()
diff --git a/final/test/Transforms/InstCombine/2002-03-11-InstCombineHang.ll b/final/test/Transforms/InstCombine/2002-03-11-InstCombineHang.ll
new file mode 100644
index 00000000000..5d027a74492
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2002-03-11-InstCombineHang.ll
@@ -0,0 +1,9 @@
+; This testcase causes instcombine to hang.
+;
+; RUN: opt < %s -instcombine
+
+define void @test(i32 %X) {
+        %reg117 = add i32 %X, 0         ; <i32> [#uses=0]
+        ret void
+}
+
diff --git a/final/test/Transforms/InstCombine/2002-05-14-SubFailure.ll b/final/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
new file mode 100644
index 00000000000..d2b2b0027a5
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
@@ -0,0 +1,10 @@
+; Instcombine was missing a test that caused it to make illegal transformations
+; sometimes.  In this case, it transforms the sub into an add:
+; RUN: opt < %s -instcombine -S | grep sub
+;
+define i32 @test(i32 %i, i32 %j) {
+        %A = mul i32 %i, %j
+        %B = sub i32 2, %A
+        ret i32 %B
+}
+
diff --git a/final/test/Transforms/InstCombine/2002-08-02-CastTest.ll b/final/test/Transforms/InstCombine/2002-08-02-CastTest.ll
new file mode 100644
index 00000000000..363cb21e395
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2002-08-02-CastTest.ll
@@ -0,0 +1,11 @@
+; This testcase is incorrectly getting completely eliminated.  There should be
+; SOME instruction named %c here, even if it's a bitwise and.
+;
+; RUN: opt < %s -instcombine -S | grep %c
+;
+define i64 @test3(i64 %A) {
+        %c1 = trunc i64 %A to i8                ; <i8> [#uses=1]
+        %c2 = zext i8 %c1 to i64                ; <i64> [#uses=1]
+        ret i64 %c2
+}
+
diff --git a/final/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll b/final/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
new file mode 100644
index 00000000000..22574f77f1d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | not grep add
+
+define i32 @test(i32 %A) {
+        %A.neg = sub i32 0, %A          ; <i32> [#uses=1]
+        %.neg = sub i32 0, 1            ; <i32> [#uses=1]
+        %X = add i32 %.neg, 1           ; <i32> [#uses=1]
+        %Y.neg.ra = add i32 %A, %X              ; <i32> [#uses=1]
+        %r = add i32 %A.neg, %Y.neg.ra          ; <i32> [#uses=1]
+        ret i32 %r
+}
+
diff --git a/final/test/Transforms/InstCombine/2003-05-26-CastMiscompile.ll b/final/test/Transforms/InstCombine/2003-05-26-CastMiscompile.ll
new file mode 100644
index 00000000000..19010d22d72
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2003-05-26-CastMiscompile.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | grep 4294967295
+
+define i64 @test(i64 %Val) {
+        %tmp.3 = trunc i64 %Val to i32          ; <i32> [#uses=1]
+        %tmp.8 = zext i32 %tmp.3 to i64         ; <i64> [#uses=1]
+        ret i64 %tmp.8
+}
+
diff --git a/final/test/Transforms/InstCombine/2003-05-27-ConstExprCrash.ll b/final/test/Transforms/InstCombine/2003-05-27-ConstExprCrash.ll
new file mode 100644
index 00000000000..8645249b7ca
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2003-05-27-ConstExprCrash.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -disable-output
+
+@X = global i32 5               ; <i32*> [#uses=1]
+
+define i64 @test() {
+        %C = add i64 1, 2               ; <i64> [#uses=1]
+        %V = add i64 ptrtoint (i32* @X to i64), %C              ; <i64> [#uses=1]
+        ret i64 %V
+}
+
diff --git a/final/test/Transforms/InstCombine/2003-06-05-BranchInvertInfLoop.ll b/final/test/Transforms/InstCombine/2003-06-05-BranchInvertInfLoop.ll
new file mode 100644
index 00000000000..154f3ba65e9
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2003-06-05-BranchInvertInfLoop.ll
@@ -0,0 +1,16 @@
+; This testcase causes an infinite loop in the instruction combiner,
+; because it things that the constant value is a not expression... and 
+; constantly inverts the branch back and forth.
+;
+; RUN: opt < %s -instcombine -disable-output
+
+define i8 @test19(i1 %c) {
+        br i1 true, label %True, label %False
+
+True:           ; preds = %0
+        ret i8 1
+
+False:          ; preds = %0
+        ret i8 3
+}
+
diff --git a/final/test/Transforms/InstCombine/2003-07-21-ExternalConstant.ll b/final/test/Transforms/InstCombine/2003-07-21-ExternalConstant.ll
new file mode 100644
index 00000000000..f550c8349f8
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2003-07-21-ExternalConstant.ll
@@ -0,0 +1,44 @@
+;
+; Test: ExternalConstant
+;
+; Description:
+;	This regression test helps check whether the instruction combining
+;	optimization pass correctly handles global variables which are marked
+;	as external and constant.
+;
+;	If a problem occurs, we should die on an assert().  Otherwise, we
+;	should pass through the optimizer without failure.
+;
+; Extra code:
+; RUN: opt < %s -instcombine
+; END.
+
+target datalayout = "e-p:32:32"
+@silly = external constant i32          ; <i32*> [#uses=1]
+
+declare void @bzero(i8*, i32)
+
+declare void @bcopy(i8*, i8*, i32)
+
+declare i32 @bcmp(i8*, i8*, i32)
+
+declare i32 @fputs(i8*, i8*)
+
+declare i32 @fputs_unlocked(i8*, i8*)
+
+define i32 @function(i32 %a.1) {
+entry:
+        %a.0 = alloca i32               ; <i32*> [#uses=2]
+        %result = alloca i32            ; <i32*> [#uses=2]
+        store i32 %a.1, i32* %a.0
+        %tmp.0 = load i32* %a.0         ; <i32> [#uses=1]
+        %tmp.1 = load i32* @silly               ; <i32> [#uses=1]
+        %tmp.2 = add i32 %tmp.0, %tmp.1         ; <i32> [#uses=1]
+        store i32 %tmp.2, i32* %result
+        br label %return
+
+return:         ; preds = %entry
+        %tmp.3 = load i32* %result              ; <i32> [#uses=1]
+        ret i32 %tmp.3
+}
+
diff --git a/final/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll b/final/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
new file mode 100644
index 00000000000..c02d33ccc1b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2003-08-12-AllocaNonNull.ll
@@ -0,0 +1,20 @@
+; This testcase can be simplified by "realizing" that alloca can never return 
+; null.
+; RUN: opt < %s -instcombine -simplifycfg -S | not grep br
+
+declare i32 @bitmap_clear(...)
+
+define i32 @oof() {
+entry:
+        %live_head = alloca i32         ; <i32*> [#uses=2]
+        %tmp.1 = icmp ne i32* %live_head, null          ; <i1> [#uses=1]
+        br i1 %tmp.1, label %then, label %UnifiedExitNode
+
+then:           ; preds = %entry
+        %tmp.4 = call i32 (...)* @bitmap_clear( i32* %live_head )               ; <i32> [#uses=0]
+        br label %UnifiedExitNode
+
+UnifiedExitNode:                ; preds = %then, %entry
+        ret i32 0
+}
+
diff --git a/final/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll b/final/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
new file mode 100644
index 00000000000..32979191f85
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2003-09-09-VolatileLoadElim.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -S | grep load
+
+define void @test(i32* %P) {
+        ; Dead but not deletable!
+        %X = volatile load i32* %P              ; <i32> [#uses=0]
+        ret void
+}
diff --git a/final/test/Transforms/InstCombine/2003-10-29-CallSiteResolve.ll b/final/test/Transforms/InstCombine/2003-10-29-CallSiteResolve.ll
new file mode 100644
index 00000000000..cfe5df6d30a
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2003-10-29-CallSiteResolve.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -disable-output
+
+declare i32* @bar()
+
+define float* @foo() {
+        %tmp.11 = invoke float* bitcast (i32* ()* @bar to float* ()*)( )
+                        to label %invoke_cont unwind label %X           ; <float*> [#uses=1]
+
+invoke_cont:            ; preds = %0
+        ret float* %tmp.11
+
+X:              ; preds = %0
+        ret float* null
+}
+
diff --git a/final/test/Transforms/InstCombine/2003-11-03-VarargsCallBug.ll b/final/test/Transforms/InstCombine/2003-11-03-VarargsCallBug.ll
new file mode 100644
index 00000000000..c1692f77abb
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2003-11-03-VarargsCallBug.ll
@@ -0,0 +1,13 @@
+; The cast in this testcase is not eliminable on a 32-bit target!
+; RUN: opt < %s -instcombine -S | grep inttoptr
+
+target datalayout = "e-p:32:32"
+
+declare void @foo(...)
+
+define void @test(i64 %X) {
+        %Y = inttoptr i64 %X to i32*            ; <i32*> [#uses=1]
+        call void (...)* @foo( i32* %Y )
+        ret void
+}
+
diff --git a/final/test/Transforms/InstCombine/2004-01-13-InstCombineInvokePHI.ll b/final/test/Transforms/InstCombine/2004-01-13-InstCombineInvokePHI.ll
new file mode 100644
index 00000000000..bec0b9e0c57
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2004-01-13-InstCombineInvokePHI.ll
@@ -0,0 +1,28 @@
+; Test for a problem afflicting several C++ programs in the testsuite.  The 
+; instcombine pass is trying to get rid of the cast in the invoke instruction, 
+; inserting a cast of the return value after the PHI instruction, but which is
+; used by the PHI instruction.  This is bad: because of the semantics of the
+; invoke instruction, we really cannot perform this transformation at all at
+; least without splitting the critical edge.
+;
+; RUN: opt < %s -instcombine -disable-output
+
+declare i8* @test()
+
+define i32 @foo() {
+entry:
+        br i1 true, label %cont, label %call
+
+call:           ; preds = %entry
+        %P = invoke i32* bitcast (i8* ()* @test to i32* ()*)( )
+                        to label %cont unwind label %N          ; <i32*> [#uses=1]
+
+cont:           ; preds = %call, %entry
+        %P2 = phi i32* [ %P, %call ], [ null, %entry ]          ; <i32*> [#uses=1]
+        %V = load i32* %P2              ; <i32> [#uses=1]
+        ret i32 %V
+
+N:              ; preds = %call
+        ret i32 0
+}
+
diff --git a/final/test/Transforms/InstCombine/2004-02-23-ShiftShiftOverflow.ll b/final/test/Transforms/InstCombine/2004-02-23-ShiftShiftOverflow.ll
new file mode 100644
index 00000000000..a08e3a884ce
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2004-02-23-ShiftShiftOverflow.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | not grep 34
+
+define i32 @test(i32 %X) {
+        ; Do not fold into shr X, 34, as this uses undefined behavior!
+        %Y = ashr i32 %X, 17            ; <i32> [#uses=1]
+        %Z = ashr i32 %Y, 17            ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+define i32 @test2(i32 %X) {
+        ; Do not fold into shl X, 34, as this uses undefined behavior!
+        %Y = shl i32 %X, 17             ; <i32> [#uses=1]
+        %Z = shl i32 %Y, 17             ; <i32> [#uses=1]
+        ret i32 %Z
+}
diff --git a/final/test/Transforms/InstCombine/2004-03-13-InstCombineInfLoop.ll b/final/test/Transforms/InstCombine/2004-03-13-InstCombineInfLoop.ll
new file mode 100644
index 00000000000..ff20d7db948
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2004-03-13-InstCombineInfLoop.ll
@@ -0,0 +1,13 @@
+; This testcase caused the combiner to go into an infinite loop, moving the 
+; cast back and forth, changing the seteq to operate on int vs uint and back.
+
+; RUN: opt < %s -instcombine -disable-output
+
+define i1 @test(i32 %A, i32 %B) {
+        %C = sub i32 0, %A              ; <i32> [#uses=1]
+        %Cc = bitcast i32 %C to i32             ; <i32> [#uses=1]
+        %D = sub i32 0, %B              ; <i32> [#uses=1]
+        %E = icmp eq i32 %Cc, %D                ; <i1> [#uses=1]
+        ret i1 %E
+}
+
diff --git a/final/test/Transforms/InstCombine/2004-04-04-InstCombineReplaceAllUsesWith.ll b/final/test/Transforms/InstCombine/2004-04-04-InstCombineReplaceAllUsesWith.ll
new file mode 100644
index 00000000000..84f9bad69d8
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2004-04-04-InstCombineReplaceAllUsesWith.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -disable-output
+
+define i32 @test() {
+        ret i32 0
+
+Loop:           ; preds = %Loop
+        %X = add i32 %X, 1              ; <i32> [#uses=1]
+        br label %Loop
+}
+
diff --git a/final/test/Transforms/InstCombine/2004-05-07-UnsizedCastLoad.ll b/final/test/Transforms/InstCombine/2004-05-07-UnsizedCastLoad.ll
new file mode 100644
index 00000000000..8b549374a70
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2004-05-07-UnsizedCastLoad.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -disable-output
+
+%Ty = type opaque
+
+define i32 @test(%Ty* %X) {
+        %Y = bitcast %Ty* %X to i32*            ; <i32*> [#uses=1]
+        %Z = load i32* %Y               ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
diff --git a/final/test/Transforms/InstCombine/2004-07-27-ConstantExprMul.ll b/final/test/Transforms/InstCombine/2004-07-27-ConstantExprMul.ll
new file mode 100644
index 00000000000..819260b60b1
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2004-07-27-ConstantExprMul.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -disable-output
+
+@p = weak global i32 0          ; <i32*> [#uses=1]
+
+define i32 @test(i32 %x) {
+        %y = mul i32 %x, ptrtoint (i32* @p to i32)              ; <i32> [#uses=1]
+        ret i32 %y
+}
+
diff --git a/final/test/Transforms/InstCombine/2004-08-09-RemInfLoop.ll b/final/test/Transforms/InstCombine/2004-08-09-RemInfLoop.ll
new file mode 100644
index 00000000000..f3e5d77c0c8
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2004-08-09-RemInfLoop.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine
+
+; This testcase should not send the instcombiner into an infinite loop!
+
+define i32 @test(i32 %X) {
+        %Y = srem i32 %X, 0             ; <i32> [#uses=1]
+        ret i32 %Y
+}
+
diff --git a/final/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll b/final/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll
new file mode 100644
index 00000000000..1154bb481d3
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2004-08-10-BoolSetCC.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:    grep {ret i1 false}
+
+define i1 @test(i1 %V) {
+        %Y = icmp ult i1 %V, false              ; <i1> [#uses=1]
+        ret i1 %Y
+}
+
diff --git a/final/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll b/final/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll
new file mode 100644
index 00000000000..8169d2127f7
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2004-09-20-BadLoadCombine.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine -mem2reg -S | \
+; RUN:   not grep {i32 1}
+
+; When propagating the load through the select, make sure that the load is
+; inserted where the original load was, not where the select is.  Not doing
+; so could produce incorrect results!
+
+define i32 @test(i1 %C) {
+        %X = alloca i32         ; <i32*> [#uses=3]
+        %X2 = alloca i32                ; <i32*> [#uses=2]
+        store i32 1, i32* %X
+        store i32 2, i32* %X2
+        %Y = select i1 %C, i32* %X, i32* %X2            ; <i32*> [#uses=1]
+        store i32 3, i32* %X
+        %Z = load i32* %Y               ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
diff --git a/final/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll b/final/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
new file mode 100644
index 00000000000..e646edf0296
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2004-09-20-BadLoadCombine2.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -instcombine -mem2reg -simplifycfg | \
+; RUN:   llvm-dis | grep -v store | not grep {i32 1}
+
+; Test to make sure that instcombine does not accidentally propagate the load
+; into the PHI, which would break the program.
+
+define i32 @test(i1 %C) {
+entry:
+        %X = alloca i32         ; <i32*> [#uses=3]
+        %X2 = alloca i32                ; <i32*> [#uses=2]
+        store i32 1, i32* %X
+        store i32 2, i32* %X2
+        br i1 %C, label %cond_true.i, label %cond_continue.i
+
+cond_true.i:            ; preds = %entry
+        br label %cond_continue.i
+
+cond_continue.i:                ; preds = %cond_true.i, %entry
+        %mem_tmp.i.0 = phi i32* [ %X, %cond_true.i ], [ %X2, %entry ]           ; <i32*> [#uses=1]
+        store i32 3, i32* %X
+        %tmp.3 = load i32* %mem_tmp.i.0         ; <i32> [#uses=1]
+        ret i32 %tmp.3
+}
+
+
diff --git a/final/test/Transforms/InstCombine/2004-09-28-BadShiftAndSetCC.ll b/final/test/Transforms/InstCombine/2004-09-28-BadShiftAndSetCC.ll
new file mode 100644
index 00000000000..27c823b9e63
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2004-09-28-BadShiftAndSetCC.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | not grep -- -65536
+
+define i1 @test(i32 %tmp.124) {
+        %tmp.125 = shl i32 %tmp.124, 8          ; <i32> [#uses=1]
+        %tmp.126.mask = and i32 %tmp.125, -16777216             ; <i32> [#uses=1]
+        %tmp.128 = icmp eq i32 %tmp.126.mask, 167772160         ; <i1> [#uses=1]
+        ret i1 %tmp.128
+}
+
diff --git a/final/test/Transforms/InstCombine/2004-11-22-Missed-and-fold.ll b/final/test/Transforms/InstCombine/2004-11-22-Missed-and-fold.ll
new file mode 100644
index 00000000000..730fdc26aab
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2004-11-22-Missed-and-fold.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | not grep and
+
+define i8 @test21(i8 %A) {
+        ;; sign extend
+        %C = ashr i8 %A, 7              ; <i8> [#uses=1]
+        ;; chop off sign
+        %D = and i8 %C, 1               ; <i8> [#uses=1]
+        ret i8 %D
+}
+
diff --git a/final/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll b/final/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll
new file mode 100644
index 00000000000..6672b6c6d4e
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll
@@ -0,0 +1,192 @@
+; This test case tests the InstructionCombining optimization that
+; reduces things like:
+;   %Y = sext i8 %X to i32 
+;   %C = icmp ult i32 %Y, 1024
+; to
+;   %C = i1 true
+; It includes test cases for different constant values, signedness of the
+; cast operands, and types of setCC operators. In all cases, the cast should
+; be eliminated. In many cases the setCC is also eliminated based on the
+; constant value and the range of the casted value.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; END.
+define i1 @lt_signed_to_large_unsigned(i8 %SB) {
+        %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp ult i32 %Y, 1024              ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: %C1 = icmp sgt i8 %SB, -1
+; CHECK: ret i1 %C1
+}
+
+define i1 @lt_signed_to_large_signed(i8 %SB) {
+        %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp slt i32 %Y, 1024              ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: ret i1 true
+}
+
+define i1 @lt_signed_to_large_negative(i8 %SB) {
+        %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp slt i32 %Y, -1024             ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: ret i1 false
+}
+
+define i1 @lt_signed_to_small_unsigned(i8 %SB) {
+        %Y = sext i8 %SB to i32
+        %C = icmp ult i32 %Y, 17
+        ret i1 %C
+; CHECK: %C = icmp ult i8 %SB, 17
+; CHECK: ret i1 %C
+}
+
+define i1 @lt_signed_to_small_signed(i8 %SB) {
+        %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp slt i32 %Y, 17                ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: %C = icmp slt i8 %SB, 17
+; CHECK: ret i1 %C
+}
+define i1 @lt_signed_to_small_negative(i8 %SB) {
+        %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp slt i32 %Y, -17               ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: %C = icmp slt i8 %SB, -17
+; CHECK: ret i1 %C
+}
+
+define i1 @lt_unsigned_to_large_unsigned(i8 %SB) {
+        %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp ult i32 %Y, 1024              ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: ret i1 true
+}
+
+define i1 @lt_unsigned_to_large_signed(i8 %SB) {
+        %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp slt i32 %Y, 1024              ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: ret i1 true
+}
+
+define i1 @lt_unsigned_to_large_negative(i8 %SB) {
+        %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp slt i32 %Y, -1024             ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: ret i1 false
+}
+
+define i1 @lt_unsigned_to_small_unsigned(i8 %SB) {
+        %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp ult i32 %Y, 17                ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: %C = icmp ult i8 %SB, 17
+; CHECK: ret i1 %C
+}
+
+define i1 @lt_unsigned_to_small_signed(i8 %SB) {
+        %Y = zext i8 %SB to i32
+        %C = icmp slt i32 %Y, 17
+        ret i1 %C
+; CHECK: %C = icmp ult i8 %SB, 17
+; CHECK: ret i1 %C
+}
+
+define i1 @lt_unsigned_to_small_negative(i8 %SB) {
+        %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp slt i32 %Y, -17               ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: ret i1 false
+}
+
+define i1 @gt_signed_to_large_unsigned(i8 %SB) {
+        %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp ugt i32 %Y, 1024              ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: %C = icmp slt i8 %SB, 0
+; CHECK: ret i1 %C
+}
+
+define i1 @gt_signed_to_large_signed(i8 %SB) {
+        %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp sgt i32 %Y, 1024              ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: ret i1 false
+}
+
+define i1 @gt_signed_to_large_negative(i8 %SB) {
+        %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp sgt i32 %Y, -1024             ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: ret i1 true
+}
+
+define i1 @gt_signed_to_small_unsigned(i8 %SB) {
+        %Y = sext i8 %SB to i32
+        %C = icmp ugt i32 %Y, 17
+        ret i1 %C
+; CHECK: %C = icmp ugt i8 %SB, 17
+; CHECK: ret i1 %C
+}
+
+define i1 @gt_signed_to_small_signed(i8 %SB) {
+        %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp sgt i32 %Y, 17                ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: %C = icmp sgt i8 %SB, 17
+; CHECK: ret i1 %C
+}
+
+define i1 @gt_signed_to_small_negative(i8 %SB) {
+        %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp sgt i32 %Y, -17               ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: %C = icmp sgt i8 %SB, -17
+; CHECK: ret i1 %C
+}
+
+define i1 @gt_unsigned_to_large_unsigned(i8 %SB) {
+        %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp ugt i32 %Y, 1024              ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: ret i1 false
+}
+
+define i1 @gt_unsigned_to_large_signed(i8 %SB) {
+        %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp sgt i32 %Y, 1024              ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: ret i1 false
+}
+
+define i1 @gt_unsigned_to_large_negative(i8 %SB) {
+        %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp sgt i32 %Y, -1024             ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: ret i1 true
+}
+
+define i1 @gt_unsigned_to_small_unsigned(i8 %SB) {
+        %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp ugt i32 %Y, 17                ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: %C = icmp ugt i8 %SB, 17
+; CHECK: ret i1 %C
+}
+
+define i1 @gt_unsigned_to_small_signed(i8 %SB) {
+        %Y = zext i8 %SB to i32
+        %C = icmp sgt i32 %Y, 17
+        ret i1 %C
+; CHECK: %C = icmp ugt i8 %SB, 17
+; CHECK: ret i1 %C
+}
+
+define i1 @gt_unsigned_to_small_negative(i8 %SB) {
+        %Y = zext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp sgt i32 %Y, -17               ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: ret i1 true
+}
+
diff --git a/final/test/Transforms/InstCombine/2004-12-08-RemInfiniteLoop.ll b/final/test/Transforms/InstCombine/2004-12-08-RemInfiniteLoop.ll
new file mode 100644
index 00000000000..008afa8a787
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2004-12-08-RemInfiniteLoop.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine
+
+define i32 @test(i32 %X) {
+        %Y = srem i32 %X, undef         ; <i32> [#uses=1]
+        ret i32 %Y
+}
+
diff --git a/final/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll b/final/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll
new file mode 100644
index 00000000000..38553d79881
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2005-03-04-ShiftOverflow.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   not grep {ret i1 false}
+
+define i1 @test(i64 %tmp.169) {
+        %tmp.1710 = lshr i64 %tmp.169, 1                ; <i64> [#uses=1]
+        %tmp.1912 = icmp ugt i64 %tmp.1710, 0           ; <i1> [#uses=1]
+        ret i1 %tmp.1912
+}
+
diff --git a/final/test/Transforms/InstCombine/2005-04-07-UDivSelectCrash.ll b/final/test/Transforms/InstCombine/2005-04-07-UDivSelectCrash.ll
new file mode 100644
index 00000000000..1ec118006de
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2005-04-07-UDivSelectCrash.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -disable-output
+
+define i32 @test(i1 %C, i32 %tmp.15) {
+        %tmp.16 = select i1 %C, i32 8, i32 1            ; <i32> [#uses=1]
+        %tmp.18 = udiv i32 %tmp.15, %tmp.16             ; <i32> [#uses=1]
+        ret i32 %tmp.18
+}
+
diff --git a/final/test/Transforms/InstCombine/2005-06-15-DivSelectCrash.ll b/final/test/Transforms/InstCombine/2005-06-15-DivSelectCrash.ll
new file mode 100644
index 00000000000..9846ee72dd4
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2005-06-15-DivSelectCrash.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -disable-output
+
+define i32 @_Z13func_31585107li(i32 %l_39521025, i32 %l_59244666) {
+        %shortcirc_val = select i1 false, i32 1, i32 0          ; <i32> [#uses=1]
+        %tmp.8 = udiv i32 0, %shortcirc_val             ; <i32> [#uses=1]
+        %tmp.9 = icmp eq i32 %tmp.8, 0          ; <i1> [#uses=1]
+        %retval = select i1 %tmp.9, i32 %l_59244666, i32 -1621308501            ; <i32> [#uses=1]
+        ret i32 %retval
+}
+
diff --git a/final/test/Transforms/InstCombine/2005-06-15-ShiftSetCCCrash.ll b/final/test/Transforms/InstCombine/2005-06-15-ShiftSetCCCrash.ll
new file mode 100644
index 00000000000..e2d0618a41e
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2005-06-15-ShiftSetCCCrash.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR577
+
+define i1 @test() {
+        %tmp.3 = shl i32 0, 41          ; <i32> [#uses=1]
+        %tmp.4 = icmp ne i32 %tmp.3, 0          ; <i1> [#uses=1]
+        ret i1 %tmp.4
+}
+
diff --git a/final/test/Transforms/InstCombine/2005-06-16-RangeCrash.ll b/final/test/Transforms/InstCombine/2005-06-16-RangeCrash.ll
new file mode 100644
index 00000000000..f0e60aca59f
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2005-06-16-RangeCrash.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR585
+
+define i1 @test() {
+        %tmp.26 = sdiv i32 0, -2147483648               ; <i32> [#uses=1]
+        %tmp.27 = icmp eq i32 %tmp.26, 0                ; <i1> [#uses=1]
+        ret i1 %tmp.27
+}
+
diff --git a/final/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll b/final/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll
new file mode 100644
index 00000000000..3d887ddad02
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2005-06-16-SetCCOrSetCCMiscompile.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep {ret i1 true}
+; PR586
+
+@g_07918478 = external global i32               ; <i32*> [#uses=1]
+
+define i1 @test() {
+        %tmp.0 = load i32* @g_07918478          ; <i32> [#uses=2]
+        %tmp.1 = icmp ne i32 %tmp.0, 0          ; <i1> [#uses=1]
+        %tmp.4 = icmp ult i32 %tmp.0, 4111              ; <i1> [#uses=1]
+        %bothcond = or i1 %tmp.1, %tmp.4                ; <i1> [#uses=1]
+        ret i1 %bothcond
+}
+
diff --git a/final/test/Transforms/InstCombine/2005-07-07-DeadPHILoop.ll b/final/test/Transforms/InstCombine/2005-07-07-DeadPHILoop.ll
new file mode 100644
index 00000000000..caee951b704
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2005-07-07-DeadPHILoop.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -disable-output
+
+; This example caused instcombine to spin into an infinite loop.
+
+define void @test(i32* %P) {
+        ret void
+
+Dead:           ; preds = %Dead
+        %X = phi i32 [ %Y, %Dead ]              ; <i32> [#uses=1]
+        %Y = sdiv i32 %X, 10            ; <i32> [#uses=2]
+        store i32 %Y, i32* %P
+        br label %Dead
+}
+
diff --git a/final/test/Transforms/InstCombine/2006-02-13-DemandedMiscompile.ll b/final/test/Transforms/InstCombine/2006-02-13-DemandedMiscompile.ll
new file mode 100644
index 00000000000..10541ef7032
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-02-13-DemandedMiscompile.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   not grep undef
+
+define i32 @test(i8 %A) {
+        %B = sext i8 %A to i32          ; <i32> [#uses=1]
+        %C = ashr i32 %B, 8             ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+
diff --git a/final/test/Transforms/InstCombine/2006-02-28-Crash.ll b/final/test/Transforms/InstCombine/2006-02-28-Crash.ll
new file mode 100644
index 00000000000..9bea14c027e
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-02-28-Crash.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -disable-output
+
+define i32 @test() {
+        %tmp203 = icmp eq i32 1, 2              ; <i1> [#uses=1]
+        %tmp203.upgrd.1 = zext i1 %tmp203 to i32                ; <i32> [#uses=1]
+        ret i32 %tmp203.upgrd.1
+}
+
diff --git a/final/test/Transforms/InstCombine/2006-03-30-ExtractElement.ll b/final/test/Transforms/InstCombine/2006-03-30-ExtractElement.ll
new file mode 100644
index 00000000000..aa7d58786b4
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-03-30-ExtractElement.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -disable-output
+
+define float @test(<4 x float> %V) {
+        %V2 = insertelement <4 x float> %V, float 1.000000e+00, i32 3           ; <<4 x float>> [#uses=1]
+        %R = extractelement <4 x float> %V2, i32 2              ; <float> [#uses=1]
+        ret float %R
+}
+
diff --git a/final/test/Transforms/InstCombine/2006-04-28-ShiftShiftLongLong.ll b/final/test/Transforms/InstCombine/2006-04-28-ShiftShiftLongLong.ll
new file mode 100644
index 00000000000..baaafefa81d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-04-28-ShiftShiftLongLong.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; This cannot be turned into a sign extending cast!
+
+define i64 @test(i64 %X) {
+        %Y = shl i64 %X, 16             ; <i64> [#uses=1]
+; CHECK: %Y = shl i64 %X, 16
+        %Z = ashr i64 %Y, 16            ; <i64> [#uses=1]
+; CHECK: %Z = ashr exact i64 %Y, 16
+        ret i64 %Z
+; CHECK: ret i64 %Z
+}
+
diff --git a/final/test/Transforms/InstCombine/2006-05-04-DemandedBitCrash.ll b/final/test/Transforms/InstCombine/2006-05-04-DemandedBitCrash.ll
new file mode 100644
index 00000000000..e22395fb877
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-05-04-DemandedBitCrash.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -instcombine -disable-output
+; END.
+
+define void @test() {
+bb38.i:
+	%varspec.0.i1014 = bitcast i64 123814269237067777 to i64		; <i64> [#uses=1]
+	%locspec.0.i1015 = bitcast i32 1 to i32		; <i32> [#uses=2]
+	%tmp51391.i1018 = lshr i64 %varspec.0.i1014, 16		; <i64> [#uses=1]
+	%tmp51392.i1019 = trunc i64 %tmp51391.i1018 to i32		; <i32> [#uses=2]
+	%tmp51392.mask.i1020 = lshr i32 %tmp51392.i1019, 29		; <i32> [#uses=1]
+	%tmp7.i1021 = and i32 %tmp51392.mask.i1020, 1		; <i32> [#uses=2]
+	%tmp18.i1026 = lshr i32 %tmp51392.i1019, 31		; <i32> [#uses=2]
+	%tmp18.i1027 = trunc i32 %tmp18.i1026 to i8		; <i8> [#uses=1]
+	br i1 false, label %cond_false1148.i1653, label %bb377.i1259
+
+bb377.i1259:		; preds = %bb38.i
+	br i1 false, label %cond_true541.i1317, label %cond_false1148.i1653
+
+cond_true541.i1317:		; preds = %bb377.i1259
+	%tmp545.i1318 = lshr i32 %locspec.0.i1015, 10		; <i32> [#uses=1]
+	%tmp550.i1319 = lshr i32 %locspec.0.i1015, 4		; <i32> [#uses=1]
+	%tmp550551.i1320 = and i32 %tmp550.i1319, 63		; <i32> [#uses=1]
+	%tmp553.i1321 = icmp ult i32 %tmp550551.i1320, 4		; <i1> [#uses=1]
+	%tmp558.i1322 = icmp eq i32 %tmp7.i1021, 0		; <i1> [#uses=1]
+	%bothcond.i1326 = or i1 %tmp553.i1321, false		; <i1> [#uses=1]
+	%bothcond1.i1327 = or i1 %bothcond.i1326, false		; <i1> [#uses=1]
+	%bothcond2.not.i1328 = or i1 %bothcond1.i1327, false		; <i1> [#uses=1]
+	%bothcond3.i1329 = or i1 %bothcond2.not.i1328, %tmp558.i1322		; <i1> [#uses=0]
+	br i1 false, label %cond_true583.i1333, label %cond_next592.i1337
+
+cond_true583.i1333:		; preds = %cond_true541.i1317
+	br i1 false, label %cond_true586.i1335, label %cond_next592.i1337
+
+cond_true586.i1335:		; preds = %cond_true583.i1333
+	br label %cond_true.i
+
+cond_next592.i1337:		; preds = %cond_true583.i1333, %cond_true541.i1317
+	%mask_z.0.i1339 = phi i32 [ %tmp18.i1026, %cond_true541.i1317 ], [ 0, %cond_true583.i1333 ]		; <i32> [#uses=0]
+	%tmp594.i1340 = and i32 %tmp545.i1318, 15		; <i32> [#uses=0]
+	br label %cond_true.i
+
+cond_false1148.i1653:		; preds = %bb377.i1259, %bb38.i
+	%tmp1150.i1654 = icmp eq i32 %tmp7.i1021, 0		; <i1> [#uses=1]
+	%tmp1160.i1656 = icmp eq i8 %tmp18.i1027, 0		; <i1> [#uses=1]
+	%bothcond8.i1658 = or i1 %tmp1150.i1654, %tmp1160.i1656		; <i1> [#uses=1]
+	%bothcond9.i1659 = or i1 %bothcond8.i1658, false		; <i1> [#uses=0]
+	br label %cond_true.i
+
+cond_true.i:		; preds = %cond_false1148.i1653, %cond_next592.i1337, %cond_true586.i1335
+	ret void
+}
diff --git a/final/test/Transforms/InstCombine/2006-09-15-CastToBool.ll b/final/test/Transforms/InstCombine/2006-09-15-CastToBool.ll
new file mode 100644
index 00000000000..ee261ced586
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-09-15-CastToBool.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | grep and
+; PR913
+
+define i32 @test(i32* %tmp1) {
+        %tmp.i = load i32* %tmp1                ; <i32> [#uses=1]
+        %tmp = bitcast i32 %tmp.i to i32                ; <i32> [#uses=1]
+        %tmp2.ui = lshr i32 %tmp, 5             ; <i32> [#uses=1]
+        %tmp2 = bitcast i32 %tmp2.ui to i32             ; <i32> [#uses=1]
+        %tmp3 = and i32 %tmp2, 1                ; <i32> [#uses=1]
+        %tmp3.upgrd.1 = icmp ne i32 %tmp3, 0            ; <i1> [#uses=1]
+        %tmp34 = zext i1 %tmp3.upgrd.1 to i32           ; <i32> [#uses=1]
+        ret i32 %tmp34
+}
+
diff --git a/final/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst-2.ll b/final/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst-2.ll
new file mode 100644
index 00000000000..889bbcfa3ea
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-10-19-SignedToUnsignedCastAndConst-2.ll
@@ -0,0 +1,10 @@
+; The optimizer should be able to remove cast operation here.
+; RUN: opt < %s -instcombine -S | \
+; RUN:    not grep sext.*i32
+
+define i1 @eq_signed_to_small_unsigned(i8 %SB) {
+        %Y = sext i8 %SB to i32         ; <i32> [#uses=1]
+        %C = icmp eq i32 %Y, 17         ; <i1> [#uses=1]
+        ret i1 %C
+}
+
diff --git a/final/test/Transforms/InstCombine/2006-10-20-mask.ll b/final/test/Transforms/InstCombine/2006-10-20-mask.ll
new file mode 100644
index 00000000000..0aaa5e8c21b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-10-20-mask.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:    grep and
+
+define i64 @foo(i64 %tmp, i64 %tmp2) {
+        %tmp.upgrd.1 = trunc i64 %tmp to i32            ; <i32> [#uses=1]
+        %tmp2.upgrd.2 = trunc i64 %tmp2 to i32          ; <i32> [#uses=1]
+        %tmp3 = and i32 %tmp.upgrd.1, %tmp2.upgrd.2             ; <i32> [#uses=1]
+        %tmp4 = zext i32 %tmp3 to i64           ; <i64> [#uses=1]
+        ret i64 %tmp4
+}
+
diff --git a/final/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll b/final/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
new file mode 100644
index 00000000000..d3ba1e2287a
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-10-26-VectorReassoc.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep mul | count 2
+
+define <4 x float> @test(<4 x float> %V) {
+        %Y = fmul <4 x float> %V, < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >                ; <<4 x float>> [#uses=1]
+        %Z = fmul <4 x float> %Y, < float 1.000000e+00, float 2.000000e+05, float -3.000000e+00, float 4.000000e+00 >               ; <<4 x float>> [#uses=1]
+        ret <4 x float> %Z
+}
+
diff --git a/final/test/Transforms/InstCombine/2006-11-03-Memmove64.ll b/final/test/Transforms/InstCombine/2006-11-03-Memmove64.ll
new file mode 100644
index 00000000000..35bb45e2b91
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-11-03-Memmove64.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:    not grep memmove.i32
+; Instcombine was trying to turn this into a memmove.i32
+
+target datalayout = "e-p:64:64"
+target triple = "alphaev67-unknown-linux-gnu"
+@str10 = internal constant [1 x i8] zeroinitializer             ; <[1 x i8]*> [#uses=1]
+
+define void @do_join(i8* %b) {
+entry:
+        call void @llvm.memmove.i64( i8* %b, i8* getelementptr ([1 x i8]* @str10, i32 0, i64 0), i64 1, i32 1 )
+        ret void
+}
+
+declare void @llvm.memmove.i64(i8*, i8*, i64, i32)
+
diff --git a/final/test/Transforms/InstCombine/2006-11-10-ashr-miscompile.ll b/final/test/Transforms/InstCombine/2006-11-10-ashr-miscompile.ll
new file mode 100644
index 00000000000..7799423f04f
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-11-10-ashr-miscompile.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep lshr
+; Verify this is not turned into -1.
+
+define i32 @test(i8 %amt) {
+        %shift.upgrd.1 = zext i8 %amt to i32            ; <i32> [#uses=1]
+        %B = lshr i32 -1, %shift.upgrd.1                ; <i32> [#uses=1]
+        ret i32 %B
+}
+
diff --git a/final/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll b/final/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
new file mode 100644
index 00000000000..7adeb9fd960
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep sub
+; RUN: opt < %s -instcombine -S | grep add
+
+define <4 x float> @test(<4 x float> %tmp26, <4 x float> %tmp53) {
+        ; (X+Y)-Y != X for fp vectors.
+        %tmp64 = fadd <4 x float> %tmp26, %tmp53         ; <<4 x float>> [#uses=1]
+        %tmp75 = fsub <4 x float> %tmp64, %tmp53         ; <<4 x float>> [#uses=1]
+        ret <4 x float> %tmp75
+}
diff --git a/final/test/Transforms/InstCombine/2006-12-05-fp-to-int-ext.ll b/final/test/Transforms/InstCombine/2006-12-05-fp-to-int-ext.ll
new file mode 100644
index 00000000000..74483c1d9c7
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-12-05-fp-to-int-ext.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | grep zext
+
+; Never merge these two conversions, even though it's possible: this is
+; significantly more expensive than the two conversions on some targets
+; and it causes libgcc to be compile __fixunsdfdi into a recursive 
+; function.
+define i64 @test(double %D) {
+        %A = fptoui double %D to i32            ; <i32> [#uses=1]
+        %B = zext i32 %A to i64         ; <i64> [#uses=1]
+        ret i64 %B
+}
+
diff --git a/final/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll b/final/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll
new file mode 100644
index 00000000000..5a74bd2ab7d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-12-08-Phi-ICmp-Op-Fold.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep {icmp sgt}
+; END.
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.point = type { i32, i32 }
+
+define i32 @visible(i32 %direction, i64 %p1.0, i64 %p2.0, i64 %p3.0) {
+entry:
+	%p1_addr = alloca %struct.point		; <%struct.point*> [#uses=2]
+	%p2_addr = alloca %struct.point		; <%struct.point*> [#uses=2]
+	%p3_addr = alloca %struct.point		; <%struct.point*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = bitcast %struct.point* %p1_addr to { i64 }*		; <{ i64 }*> [#uses=1]
+	%tmp.upgrd.1 = getelementptr { i64 }* %tmp, i64 0, i32 0		; <i64*> [#uses=1]
+	store i64 %p1.0, i64* %tmp.upgrd.1
+	%tmp1 = bitcast %struct.point* %p2_addr to { i64 }*		; <{ i64 }*> [#uses=1]
+	%tmp2 = getelementptr { i64 }* %tmp1, i64 0, i32 0		; <i64*> [#uses=1]
+	store i64 %p2.0, i64* %tmp2
+	%tmp3 = bitcast %struct.point* %p3_addr to { i64 }*		; <{ i64 }*> [#uses=1]
+	%tmp4 = getelementptr { i64 }* %tmp3, i64 0, i32 0		; <i64*> [#uses=1]
+	store i64 %p3.0, i64* %tmp4
+	%tmp.upgrd.2 = icmp eq i32 %direction, 0		; <i1> [#uses=1]
+	%tmp5 = bitcast %struct.point* %p1_addr to { i64 }*		; <{ i64 }*> [#uses=1]
+	%tmp6 = getelementptr { i64 }* %tmp5, i64 0, i32 0		; <i64*> [#uses=1]
+	%tmp.upgrd.3 = load i64* %tmp6		; <i64> [#uses=1]
+	%tmp7 = bitcast %struct.point* %p2_addr to { i64 }*		; <{ i64 }*> [#uses=1]
+	%tmp8 = getelementptr { i64 }* %tmp7, i64 0, i32 0		; <i64*> [#uses=1]
+	%tmp9 = load i64* %tmp8		; <i64> [#uses=1]
+	%tmp10 = bitcast %struct.point* %p3_addr to { i64 }*		; <{ i64 }*> [#uses=1]
+	%tmp11 = getelementptr { i64 }* %tmp10, i64 0, i32 0		; <i64*> [#uses=1]
+	%tmp12 = load i64* %tmp11		; <i64> [#uses=1]
+	%tmp13 = call i32 @determinant( i64 %tmp.upgrd.3, i64 %tmp9, i64 %tmp12 )		; <i32> [#uses=2]
+	br i1 %tmp.upgrd.2, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	%tmp14 = icmp slt i32 %tmp13, 0		; <i1> [#uses=1]
+	%tmp14.upgrd.4 = zext i1 %tmp14 to i32		; <i32> [#uses=1]
+	br label %return
+
+cond_false:		; preds = %entry
+	%tmp26 = icmp sgt i32 %tmp13, 0		; <i1> [#uses=1]
+	%tmp26.upgrd.5 = zext i1 %tmp26 to i32		; <i32> [#uses=1]
+	br label %return
+
+return:		; preds = %cond_false, %cond_true
+	%retval.0 = phi i32 [ %tmp14.upgrd.4, %cond_true ], [ %tmp26.upgrd.5, %cond_false ]		; <i32> [#uses=1]
+	ret i32 %retval.0
+}
+
+declare i32 @determinant(i64, i64, i64)
diff --git a/final/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll b/final/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
new file mode 100644
index 00000000000..2665791fe08
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-12-08-Select-ICmp.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -instcombine -S | grep select
+; END.
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+        %struct.point = type { i32, i32 }
+
+define i32 @visible(i32 %direction, i64 %p1.0, i64 %p2.0, i64 %p3.0) {
+entry:
+        %p1_addr = alloca %struct.point         ; <%struct.point*> [#uses=2]
+        %p2_addr = alloca %struct.point         ; <%struct.point*> [#uses=2]
+        %p3_addr = alloca %struct.point         ; <%struct.point*> [#uses=2]
+        %tmp = bitcast %struct.point* %p1_addr to { i64 }*              ; <{ i64 }*> [#uses=1]
+        %tmp.upgrd.1 = getelementptr { i64 }* %tmp, i32 0, i32 0                ; <i64*> [#uses=1]
+        store i64 %p1.0, i64* %tmp.upgrd.1
+        %tmp1 = bitcast %struct.point* %p2_addr to { i64 }*             ; <{ i64 }*> [#uses=1]
+        %tmp2 = getelementptr { i64 }* %tmp1, i32 0, i32 0              ; <i64*> [#uses=1]
+        store i64 %p2.0, i64* %tmp2
+        %tmp3 = bitcast %struct.point* %p3_addr to { i64 }*             ; <{ i64 }*> [#uses=1]
+        %tmp4 = getelementptr { i64 }* %tmp3, i32 0, i32 0              ; <i64*> [#uses=1]
+        store i64 %p3.0, i64* %tmp4
+        %tmp.upgrd.2 = icmp eq i32 %direction, 0                ; <i1> [#uses=1]
+        %tmp5 = bitcast %struct.point* %p1_addr to { i64 }*             ; <{ i64 }*> [#uses=1]
+        %tmp6 = getelementptr { i64 }* %tmp5, i32 0, i32 0              ; <i64*> [#uses=1]
+        %tmp.upgrd.3 = load i64* %tmp6          ; <i64> [#uses=1]
+        %tmp7 = bitcast %struct.point* %p2_addr to { i64 }*             ; <{ i64 }*> [#uses=1]
+        %tmp8 = getelementptr { i64 }* %tmp7, i32 0, i32 0              ; <i64*> [#uses=1]
+        %tmp9 = load i64* %tmp8         ; <i64> [#uses=1]
+        %tmp10 = bitcast %struct.point* %p3_addr to { i64 }*            ; <{ i64 }*> [#uses=1]
+        %tmp11 = getelementptr { i64 }* %tmp10, i32 0, i32 0            ; <i64*> [#uses=1]
+        %tmp12 = load i64* %tmp11               ; <i64> [#uses=1]
+        %tmp13 = call i32 @determinant( i64 %tmp.upgrd.3, i64 %tmp9, i64 %tmp12 )         ; <i32> [#uses=2]
+        %tmp14 = icmp slt i32 %tmp13, 0         ; <i1> [#uses=1]
+        %tmp26 = icmp sgt i32 %tmp13, 0         ; <i1> [#uses=1]
+        %retval.0.in = select i1 %tmp.upgrd.2, i1 %tmp14, i1 %tmp26             ; <i1> [#uses=1]
+        %retval.0 = zext i1 %retval.0.in to i32         ; <i32> [#uses=1]
+        ret i32 %retval.0
+}
+
+declare i32 @determinant(i64, i64, i64)
+
diff --git a/final/test/Transforms/InstCombine/2006-12-15-Range-Test.ll b/final/test/Transforms/InstCombine/2006-12-15-Range-Test.ll
new file mode 100644
index 00000000000..c3700a00c42
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-12-15-Range-Test.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep icmp | count 1
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep {icmp ugt} | count 1
+; END.
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+@r = external global [17 x i32]         ; <[17 x i32]*> [#uses=1]
+
+define i1 @print_pgm_cond_true(i32 %tmp12.reload, i32* %tmp16.out) {
+newFuncRoot:
+        br label %cond_true
+
+bb27.exitStub:          ; preds = %cond_true
+        store i32 %tmp16, i32* %tmp16.out
+        ret i1 true
+
+cond_next23.exitStub:           ; preds = %cond_true
+        store i32 %tmp16, i32* %tmp16.out
+        ret i1 false
+
+cond_true:              ; preds = %newFuncRoot
+        %tmp15 = getelementptr [17 x i32]* @r, i32 0, i32 %tmp12.reload         ; <i32*> [#uses=1]
+        %tmp16 = load i32* %tmp15               ; <i32> [#uses=4]
+        %tmp18 = icmp slt i32 %tmp16, -31               ; <i1> [#uses=1]
+        %tmp21 = icmp sgt i32 %tmp16, 31                ; <i1> [#uses=1]
+        %bothcond = or i1 %tmp18, %tmp21                ; <i1> [#uses=1]
+        br i1 %bothcond, label %bb27.exitStub, label %cond_next23.exitStub
+}
+
diff --git a/final/test/Transforms/InstCombine/2006-12-23-Select-Cmp-Cmp.ll b/final/test/Transforms/InstCombine/2006-12-23-Select-Cmp-Cmp.ll
new file mode 100644
index 00000000000..eba1ac1298c
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2006-12-23-Select-Cmp-Cmp.ll
@@ -0,0 +1,30 @@
+; For PR1065. This causes an assertion in instcombine if a select with two cmp
+; operands is encountered.
+; RUN: opt < %s -instcombine -disable-output
+; END.
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.internal_state = type { i32 }
+	%struct.mng_data = type { i32, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i32, i32, i32, i8, i32, i32, i32, i32, i16, i16, i16, i8, i8, double, double, double, i8, i8, i8, i8, i32, i32, i32, i32, i32, i8, i32, i32, i8*, i8* (i32)*, void (i8*, i32)*, void (i8*, i8*, i32)*, i8 (%struct.mng_data*)*, i8 (%struct.mng_data*)*, i8 (%struct.mng_data*, i8*, i32, i32*)*, i8 (%struct.mng_data*, i8*, i32, i32*)*, i8 (%struct.mng_data*, i32, i8, i32, i32, i32, i32, i8*)*, i8 (%struct.mng_data*, i32, i32, i8*)*, i8 (%struct.mng_data*, i32, i32)*, i8 (%struct.mng_data*, i8, i8*, i8*, i8*, i8*)*, i8 (%struct.mng_data*)*, i8 (%struct.mng_data*, i8*)*, i8 (%struct.mng_data*, i8*)*, i8 (%struct.mng_data*, i32, i32)*, i8 (%struct.mng_data*, i32, i32, i8*)*, i8 (%struct.mng_data*, i8, i8, i32, i32)*, i8* (%struct.mng_data*, i32)*, i8* (%struct.mng_data*, i32)*, i8* (%struct.mng_data*, i32)*, i8 (%struct.mng_data*, i32, i32, i32, i32)*, i32 (%struct.mng_data*)*, i8 (%struct.mng_data*, i32)*, i8 (%struct.mng_data*, i32)*, i8 (%struct.mng_data*, i32, i32, i32, i32, i32, i32, i32, i32)*, i8 (%struct.mng_data*, i8)*, i8 (%struct.mng_data*, i32, i8*)*, i8 (%struct.mng_data*, i32, i8, i8*)*, i8, i32, i32, i8*, i8*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i8, i8, i8, i8, i8, i32, i8, i8, i8, i32, i8*, i32, i8*, i32, i8, i8, i8, i32, i8*, i8*, i32, i32, i8*, i8*, %struct.mng_pushdata*, %struct.mng_pushdata*, %struct.mng_pushdata*, %struct.mng_pushdata*, i8, i8, i32, i32, i8*, i8, i8, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8, i32, i32, i8*, i32, i32, i32, i8, i8, i32, i32, i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i8*, %struct.mng_savedata*, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i8*, [256 x i8], double, void ()*, void ()*, void ()*, void ()*, void ()*, void ()*, void ()*, void ()*, void ()*, void ()*, void ()*, void ()*, i16, i8, i8, i8, i8, i8, i32, i32, i8, i32, i32, i32, i32, i16, i16, i16, i8, i16, i8, i32, i32, i32, i32, i8, i32, i32, i8, i32, i32, i32, i32, i8, i32, i32, i8, i32, i32, i32, i32, i32, i8, i32, i8, i16, i16, i16, i16, i32, [256 x %struct.mng_palette8e], i32, [256 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i32, i8*, i16, i16, i16, i8*, i8, i8, i32, i32, i32, i32, i8, void ()*, void ()*, void ()*, void ()*, void ()*, void ()*, i8*, i8, i8, i8, i32, i8*, i8*, i16, i16, i16, i16, i32, i32, i8*, %struct.z_stream, i32, i32, i32, i32, i32, i32, i8, i8, [256 x i32], i8 }
+	%struct.mng_palette8e = type { i8, i8, i8 }
+	%struct.mng_pushdata = type { i8*, i8*, i32, i8, i8*, i32 }
+	%struct.mng_savedata = type { i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i8, i16, i8, i8, i32, i32, i8, i32, i32, i32, i32, i32, [256 x %struct.mng_palette8e], i32, [256 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i8, i32, i8*, i16, i16, i16 }
+	%struct.z_stream = type { i8*, i32, i32, i8*, i32, i32, i8*, %struct.internal_state*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8*, i32, i32, i32 }
+
+define void @mng_write_basi() {
+entry:
+	%tmp = load i8* null		; <i8> [#uses=1]
+	%tmp.upgrd.1 = icmp ugt i8 %tmp, 8		; <i1> [#uses=1]
+	%tmp.upgrd.2 = load i16* null		; <i16> [#uses=2]
+	%tmp3 = icmp eq i16 %tmp.upgrd.2, 255		; <i1> [#uses=1]
+	%tmp7 = icmp eq i16 %tmp.upgrd.2, -1		; <i1> [#uses=1]
+	%bOpaque.0.in = select i1 %tmp.upgrd.1, i1 %tmp7, i1 %tmp3		; <i1> [#uses=1]
+	br i1 %bOpaque.0.in, label %cond_next90, label %bb95
+
+cond_next90:		; preds = %entry
+	ret void
+
+bb95:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll b/final/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll
new file mode 100644
index 00000000000..e5238a577d2
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-01-13-ExtCompareMiscompile.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | grep {icmp ugt}
+; PR1107
+; PR1940
+
+define i1 @test(i8 %A, i8 %B) {
+	%a = zext i8 %A to i32
+	%b = zext i8 %B to i32
+	%c = icmp sgt i32 %a, %b
+	ret i1 %c
+}
diff --git a/final/test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll b/final/test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll
new file mode 100644
index 00000000000..d2d215fa86d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-01-14-FcmpSelf.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -instcombine -S | grep {fcmp uno.*0.0}
+; PR1111
+define i1 @test(double %X) {
+  %tmp = fcmp une double %X, %X
+  ret i1 %tmp
+}
diff --git a/final/test/Transforms/InstCombine/2007-01-18-VectorInfLoop.ll b/final/test/Transforms/InstCombine/2007-01-18-VectorInfLoop.ll
new file mode 100644
index 00000000000..fed2255c0e2
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-01-18-VectorInfLoop.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -disable-output
+
+define <4 x i32> @test(<4 x i32> %A) {
+    %B = xor <4 x i32> %A, < i32 -1, i32 -1, i32 -1, i32 -1 > 
+    %C = and <4 x i32> %B, < i32 -1, i32 -1, i32 -1, i32 -1 >
+    ret <4 x i32> %C
+}
diff --git a/final/test/Transforms/InstCombine/2007-01-27-AndICmp.ll b/final/test/Transforms/InstCombine/2007-01-27-AndICmp.ll
new file mode 100644
index 00000000000..bd15dce11ac
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-01-27-AndICmp.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | grep {ugt.*, 1}
+
+define i1 @test(i32 %tmp1030) {
+	%tmp1037 = icmp ne i32 %tmp1030, 40		; <i1> [#uses=1]
+	%tmp1039 = icmp ne i32 %tmp1030, 41		; <i1> [#uses=1]
+	%tmp1042 = and i1 %tmp1037, %tmp1039		; <i1> [#uses=1]
+	ret i1 %tmp1042
+}
diff --git a/final/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll b/final/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
new file mode 100644
index 00000000000..05891a203ab
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-02-01-LoadSinkAlloca.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -instcombine -mem2reg -S | grep {%A = alloca} 
+; RUN: opt < %s -instcombine -mem2reg -S | \
+; RUN:    not grep {%B = alloca}
+; END.
+
+; Ensure that instcombine doesn't sink the loads in entry/cond_true into 
+; cond_next.  Doing so prevents mem2reg from promoting the B alloca.
+
+define i32 @test2(i32 %C) {
+entry:
+	%A = alloca i32
+	%B = alloca i32
+	%tmp = call i32 (...)* @bar( i32* %A )		; <i32> [#uses=0]
+	%T = load i32* %A		; <i32> [#uses=1]
+	%tmp2 = icmp eq i32 %C, 0		; <i1> [#uses=1]
+	br i1 %tmp2, label %cond_next, label %cond_true
+
+cond_true:		; preds = %entry
+	store i32 123, i32* %B
+	call i32 @test2( i32 123 )		; <i32>:0 [#uses=0]
+	%T1 = load i32* %B		; <i32> [#uses=1]
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp1.0 = phi i32 [ %T1, %cond_true ], [ %T, %entry ]		; <i32> [#uses=1]
+	%tmp7 = call i32 (...)* @baq( )		; <i32> [#uses=0]
+	%tmp8 = call i32 (...)* @baq( )		; <i32> [#uses=0]
+	%tmp9 = call i32 (...)* @baq( )		; <i32> [#uses=0]
+	%tmp10 = call i32 (...)* @baq( )		; <i32> [#uses=0]
+	%tmp11 = call i32 (...)* @baq( )		; <i32> [#uses=0]
+	%tmp12 = call i32 (...)* @baq( )		; <i32> [#uses=0]
+	%tmp13 = call i32 (...)* @baq( )		; <i32> [#uses=0]
+	%tmp14 = call i32 (...)* @baq( )		; <i32> [#uses=0]
+	%tmp15 = call i32 (...)* @baq( )		; <i32> [#uses=0]
+	%tmp16 = call i32 (...)* @baq( )		; <i32> [#uses=0]
+	%tmp17 = call i32 (...)* @baq( )		; <i32> [#uses=0]
+	%tmp18 = call i32 (...)* @baq( )		; <i32> [#uses=0]
+	%tmp19 = call i32 (...)* @baq( )		; <i32> [#uses=0]
+	%tmp20 = call i32 (...)* @baq( )		; <i32> [#uses=0]
+	ret i32 %tmp1.0
+}
+
+declare i32 @bar(...)
+
+declare i32 @baq(...)
diff --git a/final/test/Transforms/InstCombine/2007-02-07-PointerCast.ll b/final/test/Transforms/InstCombine/2007-02-07-PointerCast.ll
new file mode 100644
index 00000000000..bf60991f7dd
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-02-07-PointerCast.ll
@@ -0,0 +1,22 @@
+;RUN: opt < %s -instcombine -S | grep zext
+
+; Make sure the uint isn't removed.  Instcombine in llvm 1.9 was dropping the 
+; uint cast which was causing a sign extend. This only affected code with 
+; pointers in the high half of memory, so it wasn't noticed much
+; compile a kernel though...
+
+target datalayout = "e-p:32:32"
+@str = internal constant [6 x i8] c"%llx\0A\00"         ; <[6 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main(i32 %x, i8** %a) {
+entry:
+        %tmp = getelementptr [6 x i8]* @str, i32 0, i64 0               ; <i8*> [#uses=1]
+        %tmp1 = load i8** %a            ; <i8*> [#uses=1]
+        %tmp2 = ptrtoint i8* %tmp1 to i32               ; <i32> [#uses=1]
+        %tmp3 = zext i32 %tmp2 to i64           ; <i64> [#uses=1]
+        %tmp.upgrd.1 = call i32 (i8*, ...)* @printf( i8* %tmp, i64 %tmp3 )              ; <i32> [#uses=0]
+        ret i32 0
+}
+
diff --git a/final/test/Transforms/InstCombine/2007-02-23-PhiFoldInfLoop.ll b/final/test/Transforms/InstCombine/2007-02-23-PhiFoldInfLoop.ll
new file mode 100644
index 00000000000..f31c280661d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-02-23-PhiFoldInfLoop.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -instcombine -S | grep ret
+; PR1217
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.termbox = type { %struct.termbox*, i32, i32, i32, i32, i32 }
+
+
+define void @ggenorien() {
+entry:
+	%tmp68 = icmp eq %struct.termbox* null, null		; <i1> [#uses=1]
+	br i1 %tmp68, label %cond_next448, label %bb80
+
+bb80:		; preds = %entry
+	ret void
+
+cond_next448:		; preds = %entry
+	br i1 false, label %bb756, label %bb595
+
+bb595:		; preds = %cond_next448
+	br label %bb609
+
+bb609:		; preds = %bb756, %bb595
+	%termnum.6240.0 = phi i32 [ 2, %bb595 ], [ %termnum.6, %bb756 ]		; <i32> [#uses=1]
+	%tmp755 = add i32 %termnum.6240.0, 1		; <i32> [#uses=1]
+	br label %bb756
+
+bb756:		; preds = %bb609, %cond_next448
+	%termnum.6 = phi i32 [ %tmp755, %bb609 ], [ 2, %cond_next448 ]		; <i32> [#uses=1]
+	br label %bb609
+}
diff --git a/final/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll b/final/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll
new file mode 100644
index 00000000000..109e4a217fb
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-03-13-CompareMerge.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep {icmp sle}
+; PR1244
+
+define i1 @test(i32 %c.3.i, i32 %d.292.2.i) {
+   %tmp266.i = icmp slt i32 %c.3.i, %d.292.2.i     
+   %tmp276.i = icmp eq i32 %c.3.i, %d.292.2.i 
+   %sel_tmp80 = or i1 %tmp266.i, %tmp276.i 
+   ret i1 %sel_tmp80
+}
diff --git a/final/test/Transforms/InstCombine/2007-03-19-BadTruncChangePR1261.ll b/final/test/Transforms/InstCombine/2007-03-19-BadTruncChangePR1261.ll
new file mode 100644
index 00000000000..589bd805d6d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-03-19-BadTruncChangePR1261.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | grep zext
+; PR1261. 
+
+define i16 @test(i31 %zzz) {
+  %A = sext i31 %zzz to i32
+  %B = add i32 %A, 16384
+  %C = lshr i32 %B, 15
+  %D = trunc i32 %C to i16
+  ret i16 %D
+}
diff --git a/final/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll b/final/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll
new file mode 100644
index 00000000000..ca93af3a691
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-03-21-SignedRangeTest.ll
@@ -0,0 +1,7 @@
+; For PR1248
+; RUN: opt < %s -instcombine -S | grep {ugt i32 .*, 11}
+define i1 @test(i32 %tmp6) {
+  %tmp7 = sdiv i32 %tmp6, 12     ; <i32> [#uses=1]
+  icmp ne i32 %tmp7, -6           ; <i1>:1 [#uses=1]
+  ret i1 %1
+}
diff --git a/final/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll b/final/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
new file mode 100644
index 00000000000..c79400413ff
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-03-25-BadShiftMask.ll
@@ -0,0 +1,29 @@
+; PR1271
+; RUN: opt < %s -instcombine -S | \
+; RUN:    grep {icmp eq i32 .tmp.*, 2146435072}
+%struct..0anon = type { i32, i32 }
+%struct..1anon = type { double }
+
+define i32 @main() {
+entry:
+	%u = alloca %struct..1anon, align 8		; <%struct..1anon*> [#uses=4]
+	%tmp1 = getelementptr %struct..1anon* %u, i32 0, i32 0		; <double*> [#uses=1]
+	store double 0x7FF0000000000000, double* %tmp1
+	%tmp3 = getelementptr %struct..1anon* %u, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp34 = bitcast double* %tmp3 to %struct..0anon*		; <%struct..0anon*> [#uses=1]
+	%tmp5 = getelementptr %struct..0anon* %tmp34, i32 0, i32 1		; <i32*> [#uses=1]
+	%tmp6 = load i32* %tmp5		; <i32> [#uses=1]
+	%tmp7 = shl i32 %tmp6, 1		; <i32> [#uses=1]
+	%tmp8 = lshr i32 %tmp7, 21		; <i32> [#uses=1]
+	%tmp89 = trunc i32 %tmp8 to i16		; <i16> [#uses=1]
+	icmp ne i16 %tmp89, 2047		; <i1>:0 [#uses=1]
+	zext i1 %0 to i8		; <i8>:1 [#uses=1]
+	icmp ne i8 %1, 0		; <i1>:2 [#uses=1]
+	br i1 %2, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	ret i32 0
+
+cond_false:		; preds = %entry
+        ret i32 1
+}
diff --git a/final/test/Transforms/InstCombine/2007-03-25-DoubleShift.ll b/final/test/Transforms/InstCombine/2007-03-25-DoubleShift.ll
new file mode 100644
index 00000000000..0d4aac25c28
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-03-25-DoubleShift.ll
@@ -0,0 +1,9 @@
+; PR1271
+; RUN: opt < %s -instcombine -S | grep and
+define i1 @test(i32 %tmp13) {
+entry:
+	%tmp14 = shl i32 %tmp13, 12		; <i32> [#uses=1]
+	%tmp15 = lshr i32 %tmp14, 12		; <i32> [#uses=1]
+	%res = icmp ne i32 %tmp15, 0		; <i1>:3 [#uses=1]
+        ret i1 %res
+}
diff --git a/final/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll b/final/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll
new file mode 100644
index 00000000000..807efcf29f1
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-03-26-BadShiftMask.ll
@@ -0,0 +1,35 @@
+; PR1271
+; RUN: opt < %s -instcombine -S | \
+; RUN:    grep {ashr exact i32 %.mp137, 2}
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+
+
+define i1 @test(i32* %tmp141, i32* %tmp145, 
+            i32 %b8, i32 %iftmp.430.0, i32* %tmp134.out, i32* %tmp137.out)
+{
+newFuncRoot:
+	%tmp133 = and i32 %b8, 1		; <i32> [#uses=1]
+	%tmp134 = shl i32 %tmp133, 3		; <i32> [#uses=3]
+	%tmp136 = ashr i32 %b8, 1		; <i32> [#uses=1]
+	%tmp137 = shl i32 %tmp136, 3		; <i32> [#uses=3]
+	%tmp139 = ashr i32 %tmp134, 2		; <i32> [#uses=1]
+	store i32 %tmp139, i32* %tmp141
+	%tmp143 = ashr i32 %tmp137, 2		; <i32> [#uses=1]
+	store i32 %tmp143, i32* %tmp145
+	icmp eq i32 %iftmp.430.0, 0		; <i1>:0 [#uses=1]
+	zext i1 %0 to i8		; <i8>:1 [#uses=1]
+	icmp ne i8 %1, 0		; <i1>:2 [#uses=1]
+	br i1 %2, label %cond_true147.exitStub, label %cond_false252.exitStub
+
+cond_true147.exitStub:		; preds = %newFuncRoot
+	store i32 %tmp134, i32* %tmp134.out
+	store i32 %tmp137, i32* %tmp137.out
+	ret i1 true
+
+cond_false252.exitStub:		; preds = %newFuncRoot
+	store i32 %tmp134, i32* %tmp134.out
+	store i32 %tmp137, i32* %tmp137.out
+	ret i1 false
+}
diff --git a/final/test/Transforms/InstCombine/2007-04-04-BadFoldBitcastIntoMalloc.ll b/final/test/Transforms/InstCombine/2007-04-04-BadFoldBitcastIntoMalloc.ll
new file mode 100644
index 00000000000..b59d3c80335
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-04-04-BadFoldBitcastIntoMalloc.ll
@@ -0,0 +1,19 @@
+; In the presence of a negative offset (the -8 below), a fold of a bitcast into
+; a malloc messes up the element count, causing an extra 4GB to be allocated on
+; 64-bit targets.
+;
+; RUN: opt < %s -instcombine -S | not grep {= add }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "x86_64-unknown-freebsd6.2"
+
+define i1 @test(i32 %tmp141, double** %tmp145)
+{
+  %tmp133 = add i32 %tmp141, 1
+  %tmp134 = shl i32 %tmp133, 3
+  %tmp135 = add i32 %tmp134, -8
+  %tmp136 = malloc i8, i32 %tmp135
+  %tmp137 = bitcast i8* %tmp136 to double*
+  store double* %tmp137, double** %tmp145
+  ret i1 false
+}
diff --git a/final/test/Transforms/InstCombine/2007-04-08-SingleEltVectorCrash.ll b/final/test/Transforms/InstCombine/2007-04-08-SingleEltVectorCrash.ll
new file mode 100644
index 00000000000..22eb2c23c34
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-04-08-SingleEltVectorCrash.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR1304
+
+define i64 @bork(<1 x i64> %vec) {
+  %tmp = extractelement <1 x i64> %vec, i32 0
+  ret i64 %tmp
+}
diff --git a/final/test/Transforms/InstCombine/2007-05-04-Crash.ll b/final/test/Transforms/InstCombine/2007-05-04-Crash.ll
new file mode 100644
index 00000000000..9f50d8ac9b0
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-05-04-Crash.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR1384
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+	%struct.CFRuntimeBase = type { i32, [4 x i8] }
+	%struct.CGColor = type opaque
+	%struct.CGColorSpace = type { %struct.CFRuntimeBase, i8, i8, i8, i32, i32, i32, %struct.CGColor*, float*, %struct.CGMD5Signature, %struct.CGMD5Signature*, [0 x %struct.CGColorSpaceDescriptor] }
+	%struct.CGColorSpaceCalibratedRGBData = type { [3 x float], [3 x float], [3 x float], [9 x float] }
+	%struct.CGColorSpaceDescriptor = type { %struct.CGColorSpaceCalibratedRGBData }
+	%struct.CGColorSpaceLabData = type { [3 x float], [3 x float], [4 x float] }
+	%struct.CGMD5Signature = type { [16 x i8], i8 }
+
+declare fastcc %struct.CGColorSpace* @CGColorSpaceCreate(i32, i32)
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+define %struct.CGColorSpace* @CGColorSpaceCreateLab(float* %whitePoint, float* %blackPoint, float* %range) {
+entry:
+	%tmp17 = call fastcc %struct.CGColorSpace* @CGColorSpaceCreate( i32 5, i32 3 )		; <%struct.CGColorSpace*> [#uses=2]
+	%tmp28 = getelementptr %struct.CGColorSpace* %tmp17, i32 0, i32 11		; <[0 x %struct.CGColorSpaceDescriptor]*> [#uses=1]
+	%tmp29 = getelementptr [0 x %struct.CGColorSpaceDescriptor]* %tmp28, i32 0, i32 0		; <%struct.CGColorSpaceDescriptor*> [#uses=1]
+	%tmp30 = getelementptr %struct.CGColorSpaceDescriptor* %tmp29, i32 0, i32 0		; <%struct.CGColorSpaceCalibratedRGBData*> [#uses=1]
+	%tmp3031 = bitcast %struct.CGColorSpaceCalibratedRGBData* %tmp30 to %struct.CGColorSpaceLabData*		; <%struct.CGColorSpaceLabData*> [#uses=1]
+	%tmp45 = getelementptr %struct.CGColorSpaceLabData* %tmp3031, i32 0, i32 2		; <[4 x float]*> [#uses=1]
+	%tmp46 = getelementptr [4 x float]* %tmp45, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp4648 = bitcast float* %tmp46 to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %tmp4648, i8* null, i32 16, i32 4 )
+	ret %struct.CGColorSpace* %tmp17
+}
diff --git a/final/test/Transforms/InstCombine/2007-05-10-icmp-or.ll b/final/test/Transforms/InstCombine/2007-05-10-icmp-or.ll
new file mode 100644
index 00000000000..4af5dfeef5d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-05-10-icmp-or.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -disable-output
+define i1 @test(i32 %tmp9) {
+        %tmp20 = icmp ugt i32 %tmp9, 255                ; <i1> [#uses=1]
+        %tmp11.not = icmp sgt i32 %tmp9, 255            ; <i1> [#uses=1]
+        %bothcond = or i1 %tmp20, %tmp11.not            ; <i1> [#uses=1]
+        ret i1 %bothcond
+}
+
diff --git a/final/test/Transforms/InstCombine/2007-05-14-Crash.ll b/final/test/Transforms/InstCombine/2007-05-14-Crash.ll
new file mode 100644
index 00000000000..a3c010d2c42
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-05-14-Crash.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine -disable-output
+
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "powerpc-apple-darwin8.8.0"        
+
+%struct.abc = type { i32, [32 x i8] }        
+%struct.def = type { i8**, %struct.abc }        
+        %struct.anon = type <{  }>
+
+define i8* @foo(%struct.anon* %deviceRef, %struct.abc* %pCap) {
+entry:
+        %tmp1 = bitcast %struct.anon* %deviceRef to %struct.def*            
+        %tmp3 = getelementptr %struct.def* %tmp1, i32 0, i32 1               
+        %tmp35 = bitcast %struct.abc* %tmp3 to i8*           
+        ret i8* %tmp35
+}
+
+
diff --git a/final/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll b/final/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
new file mode 100644
index 00000000000..40818d40c29
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | grep {call.*sret}
+; Make sure instcombine doesn't drop the sret attribute.
+
+define void @blah(i16* %tmp10) {
+entry:
+	call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend_stret to void (i16* sret )*)( i16* %tmp10 sret  )
+	ret void
+}
+
+declare i8* @objc_msgSend_stret(i8*, i8*, ...)
diff --git a/final/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll b/final/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll
new file mode 100644
index 00000000000..62b93513b76
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-06-06-AshrSignBit.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -S | grep {ashr}
+; PR1499
+
+define void @av_cmp_q_cond_true(i32* %retval, i32* %tmp9, i64* %tmp10) {
+newFuncRoot:
+	br label %cond_true
+
+return.exitStub:		; preds = %cond_true
+	ret void
+
+cond_true:		; preds = %newFuncRoot
+	%tmp30 = load i64* %tmp10		; <i64> [#uses=1]
+	%.cast = zext i32 63 to i64		; <i64> [#uses=1]
+	%tmp31 = ashr i64 %tmp30, %.cast		; <i64> [#uses=1]
+	%tmp3132 = trunc i64 %tmp31 to i32		; <i32> [#uses=1]
+	%tmp33 = or i32 %tmp3132, 1		; <i32> [#uses=1]
+	store i32 %tmp33, i32* %tmp9
+	%tmp34 = load i32* %tmp9		; <i32> [#uses=1]
+	store i32 %tmp34, i32* %retval
+	br label %return.exitStub
+}
+
diff --git a/final/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll b/final/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll
new file mode 100644
index 00000000000..af539c12a33
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-06-21-DivCompareMiscomp.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep {ret i1 true}
+; rdar://5278853
+
+define i1 @test(i32 %tmp468) {
+        %tmp470 = udiv i32 %tmp468, 4           ; <i32> [#uses=2]
+        %tmp475 = icmp ult i32 %tmp470, 1073741824              ; <i1> [#uses=1]
+        ret i1 %tmp475
+}
+
diff --git a/final/test/Transforms/InstCombine/2007-08-02-InfiniteLoop.ll b/final/test/Transforms/InstCombine/2007-08-02-InfiniteLoop.ll
new file mode 100644
index 00000000000..3f76187b780
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-08-02-InfiniteLoop.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR1594
+
+define i64 @test(i16 %tmp510, i16 %tmp512) {
+	%W = sext i16 %tmp510 to i32           ; <i32> [#uses=1]
+        %X = sext i16 %tmp512 to i32           ; <i32> [#uses=1]
+        %Y = add i32 %W, %X               ; <i32> [#uses=1]
+        %Z = sext i32 %Y to i64          ; <i64> [#uses=1]
+	ret i64 %Z
+}
diff --git a/final/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll b/final/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
new file mode 100644
index 00000000000..c27fe0ab6a6
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-09-10-AliasConstFold.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -instcombine -S | grep icmp
+; PR1646
+
+@__gthrw_pthread_cancel = alias weak i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=1]
+@__gthread_active_ptr.5335 = internal constant i8* bitcast (i32 (i32)* @__gthrw_pthread_cancel to i8*)		; <i8**> [#uses=1]
+declare extern_weak i32 @pthread_cancel(i32)
+
+define i1 @__gthread_active_p() {
+entry:
+	%tmp1 = load i8** @__gthread_active_ptr.5335, align 4		; <i8*> [#uses=1]
+	%tmp2 = icmp ne i8* %tmp1, null		; <i1> [#uses=1]
+	ret i1 %tmp2
+}
diff --git a/final/test/Transforms/InstCombine/2007-09-11-Trampoline.ll b/final/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
new file mode 100644
index 00000000000..d8f3d97017b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | grep {call i32 @f}
+
+	%struct.FRAME.nest = type { i32, i32 (i32)* }
+	%struct.__builtin_trampoline = type { [10 x i8] }
+
+declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
+
+declare i32 @f(%struct.FRAME.nest* nest , i32 )
+
+define i32 @nest(i32 %n) {
+entry:
+	%FRAME.0 = alloca %struct.FRAME.nest, align 8		; <%struct.FRAME.nest*> [#uses=3]
+	%TRAMP.216 = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
+	%TRAMP.216.sub = getelementptr [10 x i8]* %TRAMP.216, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 %n, i32* %tmp3, align 8
+	%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8*		; <i8*> [#uses=1]
+	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest* nest , i32)* @f to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
+	%tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (i32)**> [#uses=1]
+	%tmp89 = bitcast i8* %tramp to i32 (i32)*		; <i32 (i32)*> [#uses=2]
+	store i32 (i32)* %tmp89, i32 (i32)** %tmp7, align 8
+	%tmp2.i = call i32 %tmp89( i32 1 )		; <i32> [#uses=1]
+	ret i32 %tmp2.i
+}
diff --git a/final/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll b/final/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
new file mode 100644
index 00000000000..23ee12ba754
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-09-17-AliasConstFold2.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | grep icmp
+; PR1678
+
+@A = alias weak void ()* @B		; <void ()*> [#uses=1]
+
+declare extern_weak void @B()
+
+define i32 @active() {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp1 = icmp ne void ()* @A, null		; <i1> [#uses=1]
+	%tmp12 = zext i1 %tmp1 to i32		; <i32> [#uses=1]
+	ret i32 %tmp12
+}
diff --git a/final/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll b/final/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
new file mode 100644
index 00000000000..710aff274af
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -instcombine -S | not grep call
+; RUN: opt < %s -std-compile-opts -S | not grep xyz
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+@.str = internal constant [4 x i8] c"xyz\00"		; <[4 x i8]*> [#uses=1]
+
+define void @foo(i8* %P) {
+entry:
+	%P_addr = alloca i8*		; <i8**> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i8* %P, i8** %P_addr
+	%tmp = load i8** %P_addr, align 4		; <i8*> [#uses=1]
+	%tmp1 = getelementptr [4 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %tmp, i8* %tmp1, i32 4, i32 1 )
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/final/test/Transforms/InstCombine/2007-10-12-Crash.ll b/final/test/Transforms/InstCombine/2007-10-12-Crash.ll
new file mode 100644
index 00000000000..b3d9f02b68a
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-10-12-Crash.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -instcombine -disable-output
+
+	%struct.Ray = type { %struct.Vec, %struct.Vec }
+	%struct.Scene = type { i32 (...)** }
+	%struct.Vec = type { double, double, double }
+
+declare double @_Z9ray_traceRK3VecRK3RayRK5Scene(%struct.Vec*, %struct.Ray*, %struct.Scene*)
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	%tmp3 = alloca %struct.Ray, align 4		; <%struct.Ray*> [#uses=2]
+	%tmp97 = icmp slt i32 0, 512		; <i1> [#uses=1]
+	br i1 %tmp97, label %bb71, label %bb108
+
+bb29:		; preds = %bb62
+	%tmp322 = bitcast %struct.Ray* %tmp3 to %struct.Vec*		; <%struct.Vec*> [#uses=1]
+	%tmp322.0 = getelementptr %struct.Vec* %tmp322, i32 0, i32 0		; <double*> [#uses=1]
+	store double 0.000000e+00, double* %tmp322.0
+	%tmp57 = call double @_Z9ray_traceRK3VecRK3RayRK5Scene( %struct.Vec* null, %struct.Ray* %tmp3, %struct.Scene* null )		; <double> [#uses=0]
+	br label %bb62
+
+bb62:		; preds = %bb71, %bb29
+	%tmp65 = icmp slt i32 0, 4		; <i1> [#uses=1]
+	br i1 %tmp65, label %bb29, label %bb68
+
+bb68:		; preds = %bb62
+	ret i32 0
+
+bb71:		; preds = %entry
+	%tmp74 = icmp slt i32 0, 4		; <i1> [#uses=1]
+	br i1 %tmp74, label %bb62, label %bb77
+
+bb77:		; preds = %bb71
+	ret i32 0
+
+bb108:		; preds = %entry
+	ret i32 0
+}
diff --git a/final/test/Transforms/InstCombine/2007-10-28-stacksave.ll b/final/test/Transforms/InstCombine/2007-10-28-stacksave.ll
new file mode 100644
index 00000000000..76bceb6879b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-10-28-stacksave.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -instcombine -S | grep {call.*stacksave}
+; PR1745
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+@p = weak global i8* null		; <i8**> [#uses=1]
+
+define i32 @main() {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	br label %lab
+
+lab:		; preds = %cleanup31, %entry
+	%n.0 = phi i32 [ 0, %entry ], [ %tmp25, %cleanup31 ]		; <i32> [#uses=2]
+	%tmp2 = call i8* @llvm.stacksave( )		; <i8*> [#uses=2]
+	%tmp4 = srem i32 %n.0, 47		; <i32> [#uses=1]
+	%tmp5 = add i32 %tmp4, 1		; <i32> [#uses=5]
+	%tmp7 = sub i32 %tmp5, 1		; <i32> [#uses=0]
+	%tmp89 = zext i32 %tmp5 to i64		; <i64> [#uses=1]
+	%tmp10 = mul i64 %tmp89, 32		; <i64> [#uses=0]
+	%tmp12 = mul i32 %tmp5, 4		; <i32> [#uses=0]
+	%tmp1314 = zext i32 %tmp5 to i64		; <i64> [#uses=1]
+	%tmp15 = mul i64 %tmp1314, 32		; <i64> [#uses=0]
+	%tmp17 = mul i32 %tmp5, 4		; <i32> [#uses=1]
+	%tmp18 = alloca i8, i32 %tmp17		; <i8*> [#uses=1]
+	%tmp1819 = bitcast i8* %tmp18 to i32*		; <i32*> [#uses=2]
+	%tmp21 = getelementptr i32* %tmp1819, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp21, align 4
+	%tmp2223 = bitcast i32* %tmp1819 to i8*		; <i8*> [#uses=1]
+	volatile store i8* %tmp2223, i8** @p, align 4
+	%tmp25 = add i32 %n.0, 1		; <i32> [#uses=2]
+	%tmp27 = icmp sle i32 %tmp25, 999999		; <i1> [#uses=1]
+	%tmp2728 = zext i1 %tmp27 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp2728, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %cleanup31, label %cond_next
+
+cond_next:		; preds = %lab
+	call void @llvm.stackrestore( i8* %tmp2 )
+	ret i32 0
+
+cleanup31:		; preds = %lab
+	call void @llvm.stackrestore( i8* %tmp2 )
+	br label %lab
+}
+
+declare i8* @llvm.stacksave()
+
+declare void @llvm.stackrestore(i8*)
diff --git a/final/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll b/final/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll
new file mode 100644
index 00000000000..8105b4ba557
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-10-31-RangeCrash.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instcombine -disable-output
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
+target triple = "powerpc-apple-darwin8"
+
+define i32 @test() {
+entry:
+	%tmp50.i17 = icmp slt i32 0, 4		; <i1> [#uses=1]
+	br i1 %tmp50.i17, label %bb.i, label %calculateColorSpecificBlackLevel.exit
+
+bb.i:		; preds = %entry
+	br label %bb51.i.i
+
+bb27.i.i:		; preds = %bb51.i.i
+	%tmp31.i.i = load i16* null, align 2		; <i16> [#uses=2]
+	%tmp35.i.i = icmp ult i16 %tmp31.i.i, 1		; <i1> [#uses=1]
+	%tmp41.i.i = icmp ugt i16 %tmp31.i.i, -1		; <i1> [#uses=1]
+	%bothcond.i.i = or i1 %tmp35.i.i, %tmp41.i.i		; <i1> [#uses=1]
+	%bothcond1.i.i = zext i1 %bothcond.i.i to i32		; <i32> [#uses=1]
+	%tmp46.i.i = xor i32 %bothcond1.i.i, 1		; <i32> [#uses=1]
+	%count.0.i.i = add i32 %count.1.i.i, %tmp46.i.i		; <i32> [#uses=1]
+	%tmp50.i.i = add i32 %x.0.i.i, 2		; <i32> [#uses=1]
+	br label %bb51.i.i
+
+bb51.i.i:		; preds = %bb27.i.i, %bb.i
+	%count.1.i.i = phi i32 [ %count.0.i.i, %bb27.i.i ], [ 0, %bb.i ]		; <i32> [#uses=1]
+	%x.0.i.i = phi i32 [ %tmp50.i.i, %bb27.i.i ], [ 0, %bb.i ]		; <i32> [#uses=2]
+	%tmp54.i.i = icmp slt i32 %x.0.i.i, 0		; <i1> [#uses=1]
+	br i1 %tmp54.i.i, label %bb27.i.i, label %bb57.i.i
+
+bb57.i.i:		; preds = %bb51.i.i
+	ret i32 0
+
+calculateColorSpecificBlackLevel.exit:		; preds = %entry
+	ret i32 undef
+}
diff --git a/final/test/Transforms/InstCombine/2007-10-31-StringCrash.ll b/final/test/Transforms/InstCombine/2007-10-31-StringCrash.ll
new file mode 100644
index 00000000000..220f3e22b99
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-10-31-StringCrash.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -instcombine -disable-output
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+declare void @__darwin_gcc3_preregister_frame_info()
+
+define void @_start(i32 %argc, i8** %argv, i8** %envp) {
+entry:
+	%tmp1 = bitcast void ()* @__darwin_gcc3_preregister_frame_info to i32*		; <i32*> [#uses=1]
+	%tmp2 = load i32* %tmp1, align 4		; <i32> [#uses=1]
+	%tmp3 = icmp ne i32 %tmp2, 0		; <i1> [#uses=1]
+	%tmp34 = zext i1 %tmp3 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp34, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %cond_true, label %return
+
+cond_true:		; preds = %entry
+	ret void
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll b/final/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll
new file mode 100644
index 00000000000..e1549a0fe54
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR1780
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+%opaque_t = type opaque
+
+%op_ts = type {opaque, i32}
+
+@g = external global %opaque_t
+@h = external global %op_ts
+
+define i32 @foo() {
+entry:
+        %x = load i8* bitcast (%opaque_t* @g to i8*)
+        %y = load i32* bitcast (%op_ts* @h to i32*)
+	%z = zext i8 %x to i32
+	%r = add i32 %y, %z
+        ret i32 %r
+}
+
diff --git a/final/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll b/final/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll
new file mode 100644
index 00000000000..5282739d5c7
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-11-15-CompareMiscomp.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | grep {icmp eq i32 %In, 1}
+; PR1800
+
+define i1 @test(i32 %In) {
+	%c1 = icmp sgt i32 %In, -1
+	%c2 = icmp eq i32 %In, 1
+	%V = and i1 %c1, %c2
+	ret i1 %V
+}
+
diff --git a/final/test/Transforms/InstCombine/2007-11-22-IcmpCrash.ll b/final/test/Transforms/InstCombine/2007-11-22-IcmpCrash.ll
new file mode 100644
index 00000000000..f71b99ce1a4
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-11-22-IcmpCrash.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR1817
+
+define i1 @test1(i32 %X) {
+	%A = icmp slt i32 %X, 10
+	%B = icmp ult i32 %X, 10
+	%C = and i1 %A, %B
+	ret i1 %C
+}
+
+define i1 @test2(i32 %X) {
+	%A = icmp slt i32 %X, 10
+	%B = icmp ult i32 %X, 10
+	%C = or i1 %A, %B
+	ret i1 %C
+}
diff --git a/final/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll b/final/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
new file mode 100644
index 00000000000..24394c63e4d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | not grep bitcast
+; PR1716
+
+@.str = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	%tmp32 = tail call i32 (i8* noalias , ...) nounwind * bitcast (i32 (i8*, ...) nounwind * @printf to i32 (i8* noalias , ...) nounwind *)( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0) noalias , i32 0 ) nounwind 		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @printf(i8*, ...) nounwind 
diff --git a/final/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll b/final/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll
new file mode 100644
index 00000000000..6420537b9de
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-12-10-ConstFoldCompare.ll
@@ -0,0 +1,9 @@
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+; RUN: opt < %s -instcombine -S | not grep {ret i1 0}
+; PR1850
+
+define i1 @test() {
+	%cond = icmp ule i8* inttoptr (i64 4294967297 to i8*), inttoptr (i64 5 to i8*)
+	ret i1 %cond
+}
diff --git a/final/test/Transforms/InstCombine/2007-12-12-GEPScale.ll b/final/test/Transforms/InstCombine/2007-12-12-GEPScale.ll
new file mode 100644
index 00000000000..cea87f2a466
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-12-12-GEPScale.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | not grep 1431655764
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+
+define i8* @foo([100 x {i8,i8,i8}]* %x) {
+entry:
+        %p = bitcast [100 x {i8,i8,i8}]* %x to i8*
+        %q = getelementptr i8* %p, i32 -4
+        ret i8* %q
+}
diff --git a/final/test/Transforms/InstCombine/2007-12-16-AsmNoUnwind.ll b/final/test/Transforms/InstCombine/2007-12-16-AsmNoUnwind.ll
new file mode 100644
index 00000000000..85cf9b6904c
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-12-16-AsmNoUnwind.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -S | grep nounwind
+
+define void @bar() {
+entry:
+        call void asm sideeffect "", "~{dirflag},~{fpsr},~{flags}"( )
+        ret void
+}
diff --git a/final/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll b/final/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll
new file mode 100644
index 00000000000..cc89f6dd205
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-12-18-AddSelCmpSub.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -instcombine -S | grep {add} | count 1
+
+define i32 @foo(i32 %a) {
+entry:
+        %tmp15 = sub i32 99, %a         ; <i32> [#uses=2]
+        %tmp16 = icmp slt i32 %tmp15, 0         ; <i1> [#uses=1]
+        %smax = select i1 %tmp16, i32 0, i32 %tmp15             ; <i32> [#uses=1]
+        %tmp12 = add i32 %smax, %a              ; <i32> [#uses=1]
+        %tmp13 = add i32 %tmp12, 1              ; <i32> [#uses=1]
+        ret i32 %tmp13
+}
+
+define i32 @bar(i32 %a) {
+entry:
+        %tmp15 = sub i32 99, %a         ; <i32> [#uses=2]
+        %tmp16 = icmp slt i32 %tmp15, 0         ; <i1> [#uses=1]
+        %smax = select i1 %tmp16, i32 0, i32 %tmp15             ; <i32> [#uses=1]
+        %tmp12 = add i32 %smax, %a              ; <i32> [#uses=1]
+        ret i32 %tmp12
+}
+
+define i32 @fun(i32 %a) {
+entry:
+        %tmp15 = sub i32 99, %a         ; <i32> [#uses=1]
+        %tmp16 = icmp slt i32 %a, 0         ; <i1> [#uses=1]
+        %smax = select i1 %tmp16, i32 0, i32 %tmp15             ; <i32> [#uses=1]
+        %tmp12 = add i32 %smax, %a              ; <i32> [#uses=1]
+        ret i32 %tmp12
+}
diff --git a/final/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll b/final/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll
new file mode 100644
index 00000000000..8721c83521b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2007-12-28-IcmpSub2.ll
@@ -0,0 +1,89 @@
+; RUN: opt < %s -mem2reg -instcombine -S | grep "ret i32 1" | count 8
+
+define i32 @test1() {
+entry:
+	%z = alloca i32
+	store i32 0, i32* %z
+	%tmp = load i32* %z
+	%sub = sub i32 %tmp, 1
+	%cmp = icmp ule i32 %sub, 0
+	%retval = select i1 %cmp, i32 0, i32 1
+	ret i32 %retval
+}
+
+define i32 @test2() {
+entry:
+	%z = alloca i32
+	store i32 0, i32* %z
+	%tmp = load i32* %z
+	%sub = sub i32 %tmp, 1
+	%cmp = icmp ugt i32 %sub, 0
+	%retval = select i1 %cmp, i32 1, i32 0
+	ret i32 %retval
+}
+
+define i32 @test3() {
+entry:
+	%z = alloca i32
+	store i32 0, i32* %z
+	%tmp = load i32* %z
+	%sub = sub i32 %tmp, 1
+	%cmp = icmp slt i32 %sub, 0
+	%retval = select i1 %cmp, i32 1, i32 0
+	ret i32 %retval
+}
+
+define i32 @test4() {
+entry:
+	%z = alloca i32
+	store i32 0, i32* %z
+	%tmp = load i32* %z
+	%sub = sub i32 %tmp, 1
+	%cmp = icmp sle i32 %sub, 0
+	%retval = select i1 %cmp, i32 1, i32 0
+	ret i32 %retval
+}
+
+define i32 @test5() {
+entry:
+	%z = alloca i32
+	store i32 0, i32* %z
+	%tmp = load i32* %z
+	%sub = sub i32 %tmp, 1
+	%cmp = icmp sge i32 %sub, 0
+	%retval = select i1 %cmp, i32 0, i32 1
+	ret i32 %retval
+}
+
+define i32 @test6() {
+entry:
+	%z = alloca i32
+	store i32 0, i32* %z
+	%tmp = load i32* %z
+	%sub = sub i32 %tmp, 1
+	%cmp = icmp sgt i32 %sub, 0
+	%retval = select i1 %cmp, i32 0, i32 1
+	ret i32 %retval
+}
+
+define i32 @test7() {
+entry:
+	%z = alloca i32
+	store i32 0, i32* %z
+	%tmp = load i32* %z
+	%sub = sub i32 %tmp, 1
+	%cmp = icmp eq i32 %sub, 0
+	%retval = select i1 %cmp, i32 0, i32 1
+	ret i32 %retval
+}
+
+define i32 @test8() {
+entry:
+	%z = alloca i32
+	store i32 0, i32* %z
+	%tmp = load i32* %z
+	%sub = sub i32 %tmp, 1
+	%cmp = icmp ne i32 %sub, 0
+	%retval = select i1 %cmp, i32 1, i32 0
+	ret i32 %retval
+}
diff --git a/final/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll b/final/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll
new file mode 100644
index 00000000000..5f4fa478dab
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll
@@ -0,0 +1,23 @@
+; Ignore stderr, we expect warnings there
+; RUN: opt < %s -instcombine 2> /dev/null -S | not grep bitcast
+
+define void @a() {
+	ret void
+}
+
+define i32 @b(i32* inreg  %x) signext  {
+	ret i32 0
+}
+
+define void @c(...) {
+	ret void
+}
+
+define void @g(i32* %y) {
+	call void bitcast (void ()* @a to void (i32*)*)( i32* noalias  %y )
+	call <2 x i32> bitcast (i32 (i32*)* @b to <2 x i32> (i32*)*)( i32* inreg  null )		; <<2 x i32>>:1 [#uses=0]
+	%x = call i64 bitcast (i32 (i32*)* @b to i64 (i32)*)( i32 0 )		; <i64> [#uses=0]
+	call void bitcast (void (...)* @c to void (i32)*)( i32 0 )
+	call void bitcast (void (...)* @c to void (i32)*)( i32 zeroext  0 )
+	ret void
+}
diff --git a/final/test/Transforms/InstCombine/2008-01-06-CastCrash.ll b/final/test/Transforms/InstCombine/2008-01-06-CastCrash.ll
new file mode 100644
index 00000000000..097a0ce849d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-01-06-CastCrash.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -disable-output
+
+define <2 x i32> @f() {
+	ret <2 x i32> undef
+}
+
+define i32 @g() {
+	%x = call i32 bitcast (<2 x i32> ()* @f to i32 ()*)( )		; <i32> [#uses=1]
+	ret i32 %x
+}
diff --git a/final/test/Transforms/InstCombine/2008-01-06-VoidCast.ll b/final/test/Transforms/InstCombine/2008-01-06-VoidCast.ll
new file mode 100644
index 00000000000..407ff4ddc29
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-01-06-VoidCast.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | not grep bitcast
+
+define void @f(i16 %y) {
+	ret void
+}
+
+define i32 @g(i32 %y) {
+	%x = call i32 bitcast (void (i16)* @f to i32 (i32)*)( i32 %y )		; <i32> [#uses=1]
+	ret i32 %x
+}
diff --git a/final/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll b/final/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll
new file mode 100644
index 00000000000..fbc8ba972a0
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-01-13-AndCmpCmp.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep and
+; PR1907
+
+define i1 @test(i32 %c84.17) {
+	%tmp2696 = icmp ne i32 %c84.17, 34		; <i1> [#uses=2]
+ 	%tmp2699 = icmp sgt i32 %c84.17, -1		; <i1> [#uses=1]
+ 	%tmp2703 = and i1 %tmp2696, %tmp2699		; <i1> [#uses=1]
+	ret i1 %tmp2703
+}
diff --git a/final/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll b/final/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll
new file mode 100644
index 00000000000..7b3281ff4ea
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | grep bitcast | count 2
+
+define i32 @b(i32* inreg  %x) signext  {
+	ret i32 0
+}
+
+define void @c(...) {
+	ret void
+}
+
+define void @g(i32* %y) {
+	call i32 bitcast (i32 (i32*)* @b to i32 (i32)*)( i32 zeroext  0 )		; <i32>:2 [#uses=0]
+	call void bitcast (void (...)* @c to void (i32*)*)( i32* sret  null )
+	ret void
+}
diff --git a/final/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll b/final/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll
new file mode 100644
index 00000000000..6401dfd0c11
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-01-14-DoubleNest.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -disable-output
+
+	%struct.FRAME.nest = type { i32, i32 (i32*)* }
+	%struct.__builtin_trampoline = type { [10 x i8] }
+
+declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind 
+
+declare i32 @f(%struct.FRAME.nest* nest , i32*)
+
+define i32 @nest(i32 %n) {
+entry:
+	%FRAME.0 = alloca %struct.FRAME.nest, align 8		; <%struct.FRAME.nest*> [#uses=3]
+	%TRAMP.216 = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
+	%TRAMP.216.sub = getelementptr [10 x i8]* %TRAMP.216, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 %n, i32* %tmp3, align 8
+	%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8*		; <i8*> [#uses=1]
+	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest*, i32*)* @f to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
+	%tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (i32*)**> [#uses=1]
+	%tmp89 = bitcast i8* %tramp to i32 (i32*)*		; <i32 (i32*)*> [#uses=2]
+	store i32 (i32*)* %tmp89, i32 (i32*)** %tmp7, align 8
+	%tmp2.i = call i32 %tmp89( i32* nest  null )		; <i32> [#uses=1]
+	ret i32 %tmp2.i
+}
diff --git a/final/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll b/final/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll
new file mode 100644
index 00000000000..9bb94089393
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-01-14-VarArgTrampoline.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | grep zeroext
+
+	%struct.FRAME.nest = type { i32, i32 (...)* }
+	%struct.__builtin_trampoline = type { [10 x i8] }
+
+declare i8* @llvm.init.trampoline(i8*, i8*, i8*) nounwind 
+
+declare i32 @f(%struct.FRAME.nest* nest , ...)
+
+define i32 @nest(i32 %n) {
+entry:
+	%FRAME.0 = alloca %struct.FRAME.nest, align 8		; <%struct.FRAME.nest*> [#uses=3]
+	%TRAMP.216 = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
+	%TRAMP.216.sub = getelementptr [10 x i8]* %TRAMP.216, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 %n, i32* %tmp3, align 8
+	%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8*		; <i8*> [#uses=1]
+	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest*, ...)* @f to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
+	%tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (...)**> [#uses=1]
+	%tmp89 = bitcast i8* %tramp to i32 (...)*		; <i32 (...)*> [#uses=2]
+	store i32 (...)* %tmp89, i32 (...)** %tmp7, align 8
+	%tmp2.i = call i32 (...)* %tmp89( i32 zeroext 0 )		; <i32> [#uses=1]
+	ret i32 %tmp2.i
+}
diff --git a/final/test/Transforms/InstCombine/2008-01-21-MismatchedCastAndCompare.ll b/final/test/Transforms/InstCombine/2008-01-21-MismatchedCastAndCompare.ll
new file mode 100644
index 00000000000..5ff23a3881f
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-01-21-MismatchedCastAndCompare.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR1940
+
+define i1 @test1(i8 %A, i8 %B) {
+        %a = zext i8 %A to i32
+        %b = zext i8 %B to i32
+        %c = icmp sgt i32 %a, %b
+        ret i1 %c
+; CHECK: %c = icmp ugt i8 %A, %B
+; CHECK: ret i1 %c
+}
+
+define i1 @test2(i8 %A, i8 %B) {
+        %a = sext i8 %A to i32
+        %b = sext i8 %B to i32
+        %c = icmp ugt i32 %a, %b
+        ret i1 %c
+; CHECK: %c = icmp ugt i8 %A, %B
+; CHECK: ret i1 %c
+}
diff --git a/final/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll b/final/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
new file mode 100644
index 00000000000..87c2b75d249
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-01-21-MulTrunc.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i16 @test1(i16 %a) {
+        %tmp = zext i16 %a to i32               ; <i32> [#uses=2]
+        %tmp21 = lshr i32 %tmp, 8               ; <i32> [#uses=1]
+; CHECK: %tmp21 = lshr i16 %a, 8
+        %tmp5 = mul i32 %tmp, 5         ; <i32> [#uses=1]
+; CHECK: %tmp5 = mul i16 %a, 5
+        %tmp.upgrd.32 = or i32 %tmp21, %tmp5            ; <i32> [#uses=1]
+; CHECK: %tmp.upgrd.32 = or i16 %tmp21, %tmp5
+        %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16           ; <i16> [#uses=1]
+        ret i16 %tmp.upgrd.3
+; CHECK: ret i16 %tmp.upgrd.32
+}
+
diff --git a/final/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll b/final/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
new file mode 100644
index 00000000000..c161bcc9045
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-01-27-FloatSelect.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -S | grep select
+
+define double @fold(i1 %a, double %b) {
+%s = select i1 %a, double 0., double 1.
+%c = fdiv double %b, %s
+ret double %c
+}
diff --git a/final/test/Transforms/InstCombine/2008-01-29-AddICmp.ll b/final/test/Transforms/InstCombine/2008-01-29-AddICmp.ll
new file mode 100644
index 00000000000..28a94ce07ad
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-01-29-AddICmp.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | not grep {a.off}
+; PR1949
+
+define i1 @test1(i32 %a) {
+  %a.off = add i32 %a, 4          ; <i32> [#uses=1]
+  %C = icmp ult i32 %a.off, 4             ; <i1> [#uses=1]
+  ret i1 %C
+}
+
+define i1 @test2(i32 %a) {
+  %a.off = sub i32 %a, 4          ; <i32> [#uses=1]
+  %C = icmp ugt i32 %a.off, -5             ; <i1> [#uses=1]
+  ret i1 %C
+}
+
+define i1 @test3(i32 %a) {
+  %a.off = add i32 %a, 4          ; <i32> [#uses=1]
+  %C = icmp slt i32 %a.off, 2147483652             ; <i1> [#uses=1]
+  ret i1 %C
+}
diff --git a/final/test/Transforms/InstCombine/2008-02-13-MulURem.ll b/final/test/Transforms/InstCombine/2008-02-13-MulURem.ll
new file mode 100644
index 00000000000..a88c5109724
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-02-13-MulURem.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | grep rem
+; PR1933
+
+define i32 @fold(i32 %a) {
+  %s = mul i32 %a, 3
+  %c = urem i32 %s, 3
+  ret i32 %c
+}
diff --git a/final/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll b/final/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll
new file mode 100644
index 00000000000..af61c150a7f
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-02-16-SDivOverflow.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | grep {ret i.* 0} | count 2
+; PR2048
+
+define i32 @i(i32 %a) {
+  %tmp1 = sdiv i32 %a, -1431655765
+  %tmp2 = sdiv i32 %tmp1, 3
+  ret i32 %tmp2
+}
+
+define i8 @j(i8 %a) {
+  %tmp1 = sdiv i8 %a, 64
+  %tmp2 = sdiv i8 %tmp1, 3
+  ret i8 %tmp2
+}
diff --git a/final/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll b/final/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll
new file mode 100644
index 00000000000..d26dec11e25
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-02-16-SDivOverflow2.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep {sdiv i8 \%a, 9}
+; PR2048
+
+define i8 @i(i8 %a) {
+  %tmp1 = sdiv i8 %a, -3
+  %tmp2 = sdiv i8 %tmp1, -3
+  ret i8 %tmp2
+}
+
diff --git a/final/test/Transforms/InstCombine/2008-02-23-MulSub.ll b/final/test/Transforms/InstCombine/2008-02-23-MulSub.ll
new file mode 100644
index 00000000000..bb21c4b0341
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-02-23-MulSub.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | not grep mul
+
+define i26 @test(i26 %a) nounwind  {
+entry:
+	%_add = mul i26 %a, 2885		; <i26> [#uses=1]
+	%_shl2 = mul i26 %a, 2884		; <i26> [#uses=1]
+	%_sub = sub i26 %_add, %_shl2		; <i26> [#uses=1]
+	ret i26 %_sub
+}
diff --git a/final/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll b/final/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll
new file mode 100644
index 00000000000..7f8bd4fb8a9
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-02-28-OrFCmpCrash.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine | llvm-dis
+; rdar://5771353
+
+define float @test(float %x, x86_fp80 %y) nounwind readonly  {
+entry:
+	%tmp67 = fcmp uno x86_fp80 %y, 0xK00000000000000000000		; <i1> [#uses=1]
+	%tmp71 = fcmp uno float %x, 0.000000e+00		; <i1> [#uses=1]
+	%bothcond = or i1 %tmp67, %tmp71		; <i1> [#uses=1]
+	br i1 %bothcond, label %bb74, label %bb80
+
+bb74:		; preds = %entry
+	ret float 0.000000e+00
+
+bb80:		; preds = %entry
+	ret float 0.000000e+00
+}
diff --git a/final/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll b/final/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll
new file mode 100644
index 00000000000..da7e49ee847
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-03-13-IntToPtr.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep {16} | count 1
+
+define i8* @bork(i8** %qux) {
+  %tmp275 = load i8** %qux, align 1
+  %tmp275276 = ptrtoint i8* %tmp275 to i32
+  %tmp277 = add i32 %tmp275276, 16
+  %tmp277278 = inttoptr i32 %tmp277 to i8*
+  ret i8* %tmp277278
+}
diff --git a/final/test/Transforms/InstCombine/2008-04-22-ByValBitcast.ll b/final/test/Transforms/InstCombine/2008-04-22-ByValBitcast.ll
new file mode 100644
index 00000000000..aa38065a714
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-04-22-ByValBitcast.ll
@@ -0,0 +1,15 @@
+;; The bitcast cannot be eliminated because byval arguments need
+;; the correct type, or at least a type of the correct size.
+; RUN: opt < %s -instcombine -S | grep bitcast
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+	%struct.NSRect = type { [4 x float] }
+
+define void @foo(i8* %context) nounwind  {
+entry:
+	%tmp1 = bitcast i8* %context to %struct.NSRect*		; <%struct.NSRect*> [#uses=1]
+	call void (i32, ...)* @bar( i32 3, %struct.NSRect* byval align 4  %tmp1 ) nounwind 
+	ret void
+}
+
+declare void @bar(i32, ...)
diff --git a/final/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll b/final/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
new file mode 100644
index 00000000000..626564da936
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-04-28-VolatileStore.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | grep {volatile store}
+
+define void @test() {
+	%votf = alloca <4 x float>		; <<4 x float>*> [#uses=1]
+	volatile store <4 x float> zeroinitializer, <4 x float>* %votf, align 16
+	ret void
+}
+
diff --git a/final/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll b/final/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
new file mode 100644
index 00000000000..f2cc7254a32
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-04-29-VolatileLoadDontMerge.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -instcombine -S | grep {volatile load} | count 2
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@g_1 = internal global i32 0		; <i32*> [#uses=3]
+
+define i32 @main() nounwind  {
+entry:
+	%tmp93 = icmp slt i32 0, 10		; <i1> [#uses=0]
+	%tmp34 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%b.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp6, %bb ]		; <i32> [#uses=1]
+	%tmp3.reg2mem.0 = phi i32 [ %tmp34, %entry ], [ %tmp3, %bb ]		; <i32> [#uses=1]
+	%tmp4 = add i32 %tmp3.reg2mem.0, 5		; <i32> [#uses=1]
+	volatile store i32 %tmp4, i32* @g_1, align 4
+	%tmp6 = add i32 %b.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%tmp9 = icmp slt i32 %tmp6, 10		; <i1> [#uses=1]
+	%tmp3 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	br i1 %tmp9, label %bb, label %bb11
+
+bb11:		; preds = %bb
+	ret i32 0
+}
+
diff --git a/final/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll b/final/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
new file mode 100644
index 00000000000..176162d3863
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-04-29-VolatileLoadMerge.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -instcombine -S | grep {volatile load} | count 2
+; PR2262
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@g_1 = internal global i32 0		; <i32*> [#uses=3]
+
+define i32 @main(i32 %i) nounwind  {
+entry:
+	%tmp93 = icmp slt i32 %i, 10		; <i1> [#uses=0]
+	%tmp34 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	br i1 %tmp93, label %bb11, label %bb
+
+bb:		; preds = %bb, %entry
+	%tmp3 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	br label %bb11
+
+bb11:		; preds = %bb
+	%tmp4 = phi i32 [ %tmp34, %entry ], [ %tmp3, %bb ]		; <i32> [#uses=1]
+	ret i32 %tmp4
+}
+
diff --git a/final/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll b/final/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll
new file mode 100644
index 00000000000..bbd004213d0
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-05-08-LiveStoreDelete.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -instcombine -S | grep {store i8} | count 3
+; PR2297
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define i32 @a() nounwind  {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp1 = call i8* @malloc( i32 10 ) nounwind 		; <i8*> [#uses=5]
+	%tmp3 = getelementptr i8* %tmp1, i32 1		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp3, align 1
+	%tmp5 = getelementptr i8* %tmp1, i32 0		; <i8*> [#uses=1]
+	store i8 1, i8* %tmp5, align 1
+	%tmp7 = call i32 @strlen( i8* %tmp1 ) nounwind readonly 		; <i32> [#uses=1]
+	%tmp9 = getelementptr i8* %tmp1, i32 0		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp9, align 1
+	%tmp11 = call i32 (...)* @b( i8* %tmp1 ) nounwind 		; <i32> [#uses=0]
+	ret i32 %tmp7
+}
+
+declare i8* @malloc(i32) nounwind 
+
+declare i32 @strlen(i8*) nounwind readonly 
+
+declare i32 @b(...)
diff --git a/final/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll b/final/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
new file mode 100644
index 00000000000..1da28562aae
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-05-08-StrLenSink.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -instcombine %s | FileCheck %s
+; PR2297
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define i32 @a() nounwind  {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp1 = call i8* @malloc( i32 10 ) nounwind 		; <i8*> [#uses=5]
+	%tmp3 = getelementptr i8* %tmp1, i32 1		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp3, align 1
+	%tmp5 = getelementptr i8* %tmp1, i32 0		; <i8*> [#uses=1]
+	store i8 1, i8* %tmp5, align 1
+; CHECK: store
+; CHECK: store
+; CHECK-NEXT: strlen
+; CHECK-NEXT: store
+	%tmp7 = call i32 @strlen( i8* %tmp1 ) nounwind readonly 		; <i32> [#uses=1]
+	%tmp9 = getelementptr i8* %tmp1, i32 0		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp9, align 1
+	%tmp11 = call i32 (...)* @b( i8* %tmp1 ) nounwind 		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret i32 %tmp7
+}
+
+declare i8* @malloc(i32) nounwind 
+
+declare i32 @strlen(i8*) nounwind readonly 
+
+declare i32 @b(...)
diff --git a/final/test/Transforms/InstCombine/2008-05-09-SinkOfInvoke.ll b/final/test/Transforms/InstCombine/2008-05-09-SinkOfInvoke.ll
new file mode 100644
index 00000000000..d56a1a0b786
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-05-09-SinkOfInvoke.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR2303
+	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+
+declare i32* @_ZNSt6locale5facet15_S_get_c_localeEv()
+
+declare i32** @__ctype_toupper_loc() readnone 
+
+declare i32** @__ctype_tolower_loc() readnone 
+
+define void @_ZNSt5ctypeIcEC2EPiPKtbm(%"struct.std::ctype<char>"* %this, i32* %unnamed_arg, i16* %__table, i8 zeroext  %__del, i64 %__refs) {
+entry:
+	%tmp8 = invoke i32* @_ZNSt6locale5facet15_S_get_c_localeEv( )
+			to label %invcont unwind label %lpad		; <i32*> [#uses=0]
+
+invcont:		; preds = %entry
+	%tmp32 = invoke i32** @__ctype_toupper_loc( ) readnone 
+			to label %invcont31 unwind label %lpad		; <i32**> [#uses=0]
+
+invcont31:		; preds = %invcont
+	%tmp38 = invoke i32** @__ctype_tolower_loc( ) readnone 
+			to label %invcont37 unwind label %lpad		; <i32**> [#uses=1]
+
+invcont37:		; preds = %invcont31
+	%tmp39 = load i32** %tmp38, align 8		; <i32*> [#uses=1]
+	%tmp41 = getelementptr %"struct.std::ctype<char>"* %this, i32 0, i32 4		; <i32**> [#uses=1]
+	store i32* %tmp39, i32** %tmp41, align 8
+	ret void
+
+lpad:		; preds = %invcont31, %invcont, %entry
+	unreachable
+}
diff --git a/final/test/Transforms/InstCombine/2008-05-17-InfLoop.ll b/final/test/Transforms/InstCombine/2008-05-17-InfLoop.ll
new file mode 100644
index 00000000000..2939a482165
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-05-17-InfLoop.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR2339
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-s0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+declare void @BZALLOC(i32)
+
+define void @f(i32) {
+entry:
+	%blockSize100k = alloca i32		; <i32*> [#uses=2]
+	store i32 %0, i32* %blockSize100k
+	%n = alloca i32		; <i32*> [#uses=2]
+	load i32* %blockSize100k		; <i32>:1 [#uses=1]
+	store i32 %1, i32* %n
+	load i32* %n		; <i32>:2 [#uses=1]
+	add i32 %2, 2		; <i32>:3 [#uses=1]
+	mul i32 %3, ptrtoint (i32* getelementptr (i32* null, i32 1) to i32)		; <i32>:4 [#uses=1]
+	call void @BZALLOC( i32 %4 )
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll b/final/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll
new file mode 100644
index 00000000000..b34fc1e991d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-05-18-FoldIntToPtr.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -instcombine -S | grep {ret i1 false} | count 2
+; PR2329
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define i1 @f1() {
+  ret i1 icmp eq (i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 2 to i8*))
+}
+
+define i1 @f2() {
+  ret i1 icmp eq (i8* inttoptr (i16 1 to i8*), i8* inttoptr (i16 2 to i8*))
+}
diff --git a/final/test/Transforms/InstCombine/2008-05-22-IDivVector.ll b/final/test/Transforms/InstCombine/2008-05-22-IDivVector.ll
new file mode 100644
index 00000000000..f7ba99c6b44
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-05-22-IDivVector.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -instcombine -disable-output
+
+define <3 x i8> @f(<3 x i8> %i) {
+  %A = sdiv <3 x i8> %i, %i
+  ret <3 x i8> %A
+}
diff --git a/final/test/Transforms/InstCombine/2008-05-22-NegValVector.ll b/final/test/Transforms/InstCombine/2008-05-22-NegValVector.ll
new file mode 100644
index 00000000000..bf92faf2fec
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-05-22-NegValVector.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | not grep sub
+
+define <3 x i8> @f(<3 x i8> %a) {
+  %A = sub <3 x i8> zeroinitializer, %a
+  %B = mul <3 x i8> %A, <i8 5, i8 5, i8 5>
+  ret <3 x i8> %B
+}
+
diff --git a/final/test/Transforms/InstCombine/2008-05-23-CompareFold.ll b/final/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
new file mode 100644
index 00000000000..2de5af73573
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-05-23-CompareFold.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | grep {ret i1 false}
+; PR2359
+define i1 @f(i8* %x) {
+entry:
+       %tmp462 = load i8* %x, align 1          ; <i8> [#uses=1]
+       %tmp462463 = sitofp i8 %tmp462 to float         ; <float> [#uses=1]
+       %tmp464 = fcmp ugt float %tmp462463, 0x47EFFFFFE0000000         ; <i1>
+       ret i1 %tmp464
+}
+
+
diff --git a/final/test/Transforms/InstCombine/2008-05-31-AddBool.ll b/final/test/Transforms/InstCombine/2008-05-31-AddBool.ll
new file mode 100644
index 00000000000..541669365bd
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-05-31-AddBool.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -S | grep {xor}
+; PR2389
+
+define i1 @test(i1 %a, i1 %b) {
+  %A = add i1 %a, %b
+  ret i1 %A
+}
diff --git a/final/test/Transforms/InstCombine/2008-05-31-Bools.ll b/final/test/Transforms/InstCombine/2008-05-31-Bools.ll
new file mode 100644
index 00000000000..a0fe47a6257
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-05-31-Bools.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S > %t
+; RUN: grep {xor} %t
+; RUN: grep {and} %t
+; RUN: not grep {div} %t
+
+define i1 @foo1(i1 %a, i1 %b) {
+  %A = sub i1 %a, %b
+  ret i1 %A
+}
+
+define i1 @foo2(i1 %a, i1 %b) {
+  %A = mul i1 %a, %b
+  ret i1 %A
+}
+
+define i1 @foo3(i1 %a, i1 %b) {
+  %A = udiv i1 %a, %b
+  ret i1 %A
+}
+
+define i1 @foo4(i1 %a, i1 %b) {
+  %A = sdiv i1 %a, %b
+  ret i1 %A
+}
diff --git a/final/test/Transforms/InstCombine/2008-06-05-ashr-crash.ll b/final/test/Transforms/InstCombine/2008-06-05-ashr-crash.ll
new file mode 100644
index 00000000000..5e4a9d0e5b3
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-06-05-ashr-crash.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine
+
+define i65 @foo(i65 %x) nounwind  {
+entry:
+	%tmp2 = ashr i65 %x, 65		; <i65> [#uses=1]
+	ret i65 %tmp2
+}
diff --git a/final/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll b/final/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll
new file mode 100644
index 00000000000..917d3ae1f84
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-06-08-ICmpPHI.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -instcombine -S | grep {phi i32} | count 2
+
+define void @test() nounwind  {
+entry:
+	br label %bb
+
+bb:		; preds = %bb16, %entry
+	%i.0 = phi i32 [ 0, %entry ], [ %indvar.next, %somebb ]		; <i32> [#uses=1]
+	%x.0 = phi i32 [ 37, %entry ], [ %tmp17, %somebb ]		; <i32> [#uses=1]
+	%tmp = tail call i32 (...)* @bork( ) nounwind 		; <i32> [#uses=0]
+	%tmp1 = tail call i32 (...)* @bork( ) nounwind 		; <i32> [#uses=0]
+	%tmp2 = tail call i32 (...)* @bork( ) nounwind 		; <i32> [#uses=1]
+	%tmp3 = icmp eq i32 %tmp2, 0		; <i1> [#uses=1]
+	br i1 %tmp3, label %bb7, label %bb5
+
+bb5:		; preds = %bb
+	%tmp6 = tail call i32 (...)* @bork( ) nounwind 		; <i32> [#uses=0]
+	br label %bb7
+
+bb7:		; preds = %bb5, %bb
+	%tmp8 = tail call i32 (...)* @bork( ) nounwind 		; <i32> [#uses=0]
+	%tmp9 = tail call i32 (...)* @bork( ) nounwind 		; <i32> [#uses=0]
+	%tmp11 = icmp eq i32 %x.0, 37		; <i1> [#uses=1]
+	br i1 %tmp11, label %bb14, label %bb16
+
+bb14:		; preds = %bb7
+	%tmp15 = tail call i32 (...)* @bar( ) nounwind 		; <i32> [#uses=0]
+	br label %bb16
+
+bb16:		; preds = %bb14, %bb7
+	%tmp17 = tail call i32 (...)* @zap( ) nounwind 		; <i32> [#uses=1]
+	%indvar.next = add i32 %i.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 42		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %somebb
+
+somebb:
+	br label %bb
+
+return:		; preds = %bb16
+	ret void
+}
+
+declare i32 @bork(...)
+
+declare i32 @bar(...)
+
+declare i32 @zap(...)
diff --git a/final/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll b/final/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll
new file mode 100644
index 00000000000..08959c9c7c0
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-06-13-InfiniteLoopStore.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | grep {store i32} | count 2
+
+@g_139 = global i32 0           ; <i32*> [#uses=2]
+
+define void @func_56(i32 %p_60) nounwind  {
+entry:
+        store i32 1, i32* @g_139, align 4
+        %tmp1 = icmp ne i32 %p_60, 0            ; <i1> [#uses=1]
+        %tmp12 = zext i1 %tmp1 to i8            ; <i8> [#uses=1]
+        %toBool = icmp ne i8 %tmp12, 0          ; <i1> [#uses=1]
+        br i1 %toBool, label %bb, label %return
+
+bb:             ; preds = %bb, %entry
+        store i32 1, i32* @g_139, align 4
+        br label %bb
+
+return:         ; preds = %entry
+        ret void
+}
+
diff --git a/final/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll b/final/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll
new file mode 100644
index 00000000000..aed1b14ce31
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-06-13-ReadOnlyCallStore.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | grep {store i8} | count 2
+
+define i32 @a(i8* %s) nounwind  {
+entry:
+	store i8 0, i8* %s, align 1 ; This store cannot be eliminated!
+	%tmp3 = call i32 @strlen( i8* %s ) nounwind readonly
+	%tmp5 = icmp ne i32 %tmp3, 0
+	br i1 %tmp5, label %bb, label %bb8
+
+bb:		; preds = %entry
+	store i8 0, i8* %s, align 1
+	br label %bb8
+
+bb8:
+	ret i32 %tmp3
+}
+
+declare i32 @strlen(i8*) nounwind readonly 
+
diff --git a/final/test/Transforms/InstCombine/2008-06-19-UncondLoad.ll b/final/test/Transforms/InstCombine/2008-06-19-UncondLoad.ll
new file mode 100644
index 00000000000..05f1c520008
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-06-19-UncondLoad.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -S | grep load | count 3
+; PR2471
+
+declare i32 @x(i32*)
+define i32 @b(i32* %a, i32* %b) {
+entry:
+        %tmp1 = load i32* %a            
+        %tmp3 = load i32* %b           
+        %add = add i32 %tmp1, %tmp3   
+        %call = call i32 @x( i32* %a )
+        %tobool = icmp ne i32 %add, 0
+	; not safe to turn into an uncond load
+        %cond = select i1 %tobool, i32* %b, i32* %a             
+        %tmp8 = load i32* %cond       
+        ret i32 %tmp8
+}
diff --git a/final/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll b/final/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll
new file mode 100644
index 00000000000..c3371c6ae73
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-06-21-CompareMiscomp.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | grep {icmp eq i32 %In, 15}
+; PR2479
+; (See also PR1800.)
+
+define i1 @test(i32 %In) {
+	%c1 = icmp ugt i32 %In, 13
+	%c2 = icmp eq i32 %In, 15
+	%V = and i1 %c1, %c2
+	ret i1 %V
+}
+
diff --git a/final/test/Transforms/InstCombine/2008-06-24-StackRestore.ll b/final/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
new file mode 100644
index 00000000000..83078345518
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-06-24-StackRestore.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -instcombine -S | grep {call.*llvm.stackrestore}
+; PR2488
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@p = weak global i8* null		; <i8**> [#uses=2]
+
+define i32 @main() nounwind  {
+entry:
+	%tmp248 = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
+	%tmp2752 = alloca i32		; <i32*> [#uses=2]
+	%tmpcast53 = bitcast i32* %tmp2752 to i8*		; <i8*> [#uses=1]
+	store i32 2, i32* %tmp2752, align 4
+	volatile store i8* %tmpcast53, i8** @p, align 4
+	br label %bb44
+
+bb:		; preds = %bb44
+	ret i32 0
+
+bb44:		; preds = %bb44, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %tmp3857, %bb44 ]		; <i32> [#uses=1]
+	%tmp249 = phi i8* [ %tmp248, %entry ], [ %tmp2, %bb44 ]		; <i8*> [#uses=1]
+	%tmp3857 = add i32 %indvar, 1		; <i32> [#uses=3]
+	call void @llvm.stackrestore( i8* %tmp249 )
+	%tmp2 = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
+	%tmp4 = srem i32 %tmp3857, 1000		; <i32> [#uses=2]
+	%tmp5 = add i32 %tmp4, 1		; <i32> [#uses=1]
+	%tmp27 = alloca i32, i32 %tmp5		; <i32*> [#uses=3]
+	%tmpcast = bitcast i32* %tmp27 to i8*		; <i8*> [#uses=1]
+	store i32 1, i32* %tmp27, align 4
+	%tmp34 = getelementptr i32* %tmp27, i32 %tmp4		; <i32*> [#uses=1]
+	store i32 2, i32* %tmp34, align 4
+	volatile store i8* %tmpcast, i8** @p, align 4
+	%exitcond = icmp eq i32 %tmp3857, 999999		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb, label %bb44
+}
+
+declare i8* @llvm.stacksave() nounwind 
+
+declare void @llvm.stackrestore(i8*) nounwind 
diff --git a/final/test/Transforms/InstCombine/2008-07-08-AndICmp.ll b/final/test/Transforms/InstCombine/2008-07-08-AndICmp.ll
new file mode 100644
index 00000000000..a12f4bdf108
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-07-08-AndICmp.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | grep icmp | count 1
+; PR2330
+
+define i1 @foo(i32 %a, i32 %b) nounwind {
+entry:
+	icmp ult i32 %a, 8		; <i1>:0 [#uses=1]
+	icmp ult i32 %b, 8		; <i1>:1 [#uses=1]
+	and i1 %1, %0		; <i1>:2 [#uses=1]
+	ret i1 %2
+}
diff --git a/final/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll b/final/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll
new file mode 100644
index 00000000000..8245b4d017b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | grep {icmp ne i32 \%a}
+; PR2330
+
+define i1 @foo(i32 %a) nounwind  {
+entry:
+	%tmp15 = shl i32 1, %a		; <i32> [#uses=1]
+	%tmp237 = and i32 %tmp15, 1		; <i32> [#uses=1]
+	%toBool = icmp eq i32 %tmp237, 0		; <i1> [#uses=1]
+	ret i1 %toBool
+}
diff --git a/final/test/Transforms/InstCombine/2008-07-08-SubAnd.ll b/final/test/Transforms/InstCombine/2008-07-08-SubAnd.ll
new file mode 100644
index 00000000000..009115966f5
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-07-08-SubAnd.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep -v {i32 8}
+; PR2330
+
+define i32 @a(i32 %a) nounwind  {
+entry:
+	%tmp2 = sub i32 8, %a		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp2, 7		; <i32> [#uses=1]
+	ret i32 %tmp3
+}
diff --git a/final/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll b/final/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
new file mode 100644
index 00000000000..ccfb1182769
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-07-08-VolatileLoadMerge.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -instcombine -S | grep {volatile load} | count 2
+; PR2496
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+@g_1 = internal global i32 0		; <i32*> [#uses=3]
+
+define i32 @main() nounwind  {
+entry:
+	%tmp93 = icmp slt i32 0, 10		; <i1> [#uses=0]
+	%tmp34 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%b.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %tmp6, %bb ]		; <i32> [#uses=1]
+	%tmp3.reg2mem.0 = phi i32 [ %tmp3, %bb ], [ %tmp34, %entry ]
+	%tmp4 = add i32 %tmp3.reg2mem.0, 5		; <i32> [#uses=1]
+	volatile store i32 %tmp4, i32* @g_1, align 4
+	%tmp6 = add i32 %b.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%tmp9 = icmp slt i32 %tmp6, 10		; <i1> [#uses=1]
+	%tmp3 = volatile load i32* @g_1, align 4		; <i32> [#uses=1]
+	br i1 %tmp9, label %bb, label %bb11
+
+bb11:		; preds = %bb
+	ret i32 0
+}
+
diff --git a/final/test/Transforms/InstCombine/2008-07-09-SubAndError.ll b/final/test/Transforms/InstCombine/2008-07-09-SubAndError.ll
new file mode 100644
index 00000000000..47a7590076c
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-07-09-SubAndError.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | not grep {sub i32 0}
+; PR2330
+
+define i32 @foo(i32 %a) nounwind {
+entry:
+  %A = sub i32 5, %a
+  %B = and i32 %A, 2
+  ret i32 %B
+}
diff --git a/final/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll b/final/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll
new file mode 100644
index 00000000000..e9115320251
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | grep {%C = xor i1 %A, true}
+; RUN: opt < %s -instcombine -S | grep {ret i1 false}
+; PR2539
+
+define i1 @test1(i1 %A) {
+	%B = zext i1 %A to i32
+	%C = icmp slt i32 %B, 1
+	ret i1 %C
+}
+
+
+define i1 @test2(i1 zeroext  %b) {
+entry:
+	%cmptmp = icmp slt i1 %b, true		; <i1> [#uses=1]
+	ret i1 %cmptmp
+}
+
diff --git a/final/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll b/final/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll
new file mode 100644
index 00000000000..76e30399a66
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-07-10-ICmpBinOp.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | not grep add
+; RUN: opt < %s -instcombine -S | not grep mul
+; PR2330
+
+define i1 @f(i32 %x, i32 %y) nounwind {
+entry:
+  %A = add i32 %x, 5
+  %B = add i32 %y, 5
+  %C = icmp eq i32 %A, %B
+  ret i1 %C
+}
+
+define i1 @g(i32 %x, i32 %y) nounwind {
+entry:
+  %A = mul i32 %x, 5
+  %B = mul i32 %y, 5
+  %C = icmp eq i32 %A, %B
+  ret i1 %C
+}
diff --git a/final/test/Transforms/InstCombine/2008-07-11-RemAnd.ll b/final/test/Transforms/InstCombine/2008-07-11-RemAnd.ll
new file mode 100644
index 00000000000..bf53451d66c
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-07-11-RemAnd.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | not grep rem
+; PR2330
+
+define i32 @a(i32 %b) nounwind  {
+entry:
+	srem i32 %b, 8		; <i32>:0 [#uses=1]
+	and i32 %0, 1		; <i32>:1 [#uses=1]
+	ret i32 %1
+}
diff --git a/final/test/Transforms/InstCombine/2008-07-13-DivZero.ll b/final/test/Transforms/InstCombine/2008-07-13-DivZero.ll
new file mode 100644
index 00000000000..be1f8c29437
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-07-13-DivZero.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -S | grep {lshr.*3}
+; RUN: opt < %s -instcombine -S | grep {call .*%cond}
+; PR2506
+
+; We can simplify the operand of udiv to '8', but not the operand to the
+; call.  If the callee never returns, we can't assume the div is reachable.
+define i32 @a(i32 %x, i32 %y) {
+entry:
+        %tobool = icmp ne i32 %y, 0             ; <i1> [#uses=1]
+        %cond = select i1 %tobool, i32 8, i32 0         ; <i32> [#uses=2]
+        %call = call i32 @b( i32 %cond )                ; <i32> [#uses=0]
+        %div = udiv i32 %x, %cond               ; <i32> [#uses=1]
+        ret i32 %div
+}
+
+declare i32 @b(i32)
diff --git a/final/test/Transforms/InstCombine/2008-07-16-fsub.ll b/final/test/Transforms/InstCombine/2008-07-16-fsub.ll
new file mode 100644
index 00000000000..672b4e95526
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-07-16-fsub.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | grep sub
+; PR2553
+
+define double @test(double %X) nounwind {
+	; fsub of self can't be optimized away.
+	%Y = fsub double %X, %X
+	ret double %Y
+}
diff --git a/final/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll b/final/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll
new file mode 100644
index 00000000000..501d8a66c3e
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -instcombine -S | not grep {store }
+; PR2296
+
+@G = common global double 0.000000e+00, align 16
+
+define void @x(<2 x i64> %y) nounwind  {
+entry:
+	bitcast <2 x i64> %y to <4 x i32>
+	call void @llvm.x86.sse2.storel.dq( i8* bitcast (double* @G to i8*), <4 x i32> %0 ) nounwind 
+	ret void
+}
+
+declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind 
diff --git a/final/test/Transforms/InstCombine/2008-08-05-And.ll b/final/test/Transforms/InstCombine/2008-08-05-And.ll
new file mode 100644
index 00000000000..9773c2d7624
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-08-05-And.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -instcombine -S | not grep or
+; PR2629
+
+define void @f(i8* %x) nounwind  {
+entry:
+        br label %bb
+
+bb:
+	%g1 = getelementptr i8* %x, i32 0
+        %l1 = load i8* %g1, align 1
+	%s1 = sub i8 %l1, 6
+	%c1 = icmp ugt i8 %s1, 2
+	%s2 = sub i8 %l1, 10
+        %c2 = icmp ugt i8 %s2, 2
+        %a1 = and i1 %c1, %c2
+	br i1 %a1, label %incompatible, label %okay
+
+okay:
+        ret void
+
+incompatible:
+        ret void
+}
diff --git a/final/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll b/final/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll
new file mode 100644
index 00000000000..e9081f0fa94
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-08-17-ICmpXorSignbit.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -instcombine -S | not grep xor
+
+define i1 @test1(i8 %x, i8 %y) {
+  %X = xor i8 %x, 128
+  %Y = xor i8 %y, 128
+  %tmp = icmp slt i8 %X, %Y
+  ret i1 %tmp
+}
+
+define i1 @test2(i8 %x, i8 %y) {
+  %X = xor i8 %x, 128
+  %Y = xor i8 %y, 128
+  %tmp = icmp ult i8 %X, %Y
+  ret i1 %tmp
+}
+
+define i1 @test3(i8 %x) {
+  %X = xor i8 %x, 128
+  %tmp = icmp uge i8 %X, 15
+  ret i1 %tmp
+}
+
+define i1 @test4(i8 %x, i8 %y) {
+  %X = xor i8 %x, 127
+  %Y = xor i8 %y, 127
+  %tmp = icmp slt i8 %X, %Y
+  ret i1 %tmp
+}
+
+define i1 @test5(i8 %x, i8 %y) {
+  %X = xor i8 %x, 127
+  %Y = xor i8 %y, 127
+  %tmp = icmp ult i8 %X, %Y
+  ret i1 %tmp
+}
+
+define i1 @test6(i8 %x) {
+  %X = xor i8 %x, 127
+  %tmp = icmp uge i8 %X, 15
+  ret i1 %tmp
+}
diff --git a/final/test/Transforms/InstCombine/2008-09-02-VectorCrash.ll b/final/test/Transforms/InstCombine/2008-09-02-VectorCrash.ll
new file mode 100644
index 00000000000..7c50141421d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-09-02-VectorCrash.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -instcombine
+
+define void @entry(i32 %m_task_id, i32 %start_x, i32 %end_x, i32 %start_y, i32 %end_y) {
+	br label %1
+
+; <label>:1		; preds = %4, %0
+	%2 = icmp slt i32 0, %end_y		; <i1> [#uses=1]
+	br i1 %2, label %4, label %3
+
+; <label>:3		; preds = %1
+	ret void
+
+; <label>:4		; preds = %6, %1
+	%5 = icmp slt i32 0, %end_x		; <i1> [#uses=1]
+	br i1 %5, label %6, label %1
+
+; <label>:6		; preds = %4
+	%7 = srem <2 x i32> zeroinitializer, zeroinitializer		; <<2 x i32>> [#uses=1]
+	%8 = extractelement <2 x i32> %7, i32 1		; <i32> [#uses=1]
+	%9 = select i1 false, i32 0, i32 %8		; <i32> [#uses=1]
+	%10 = insertelement <2 x i32> zeroinitializer, i32 %9, i32 1		; <<2 x i32>> [#uses=1]
+	%11 = extractelement <2 x i32> %10, i32 1		; <i32> [#uses=1]
+	%12 = insertelement <4 x i32> zeroinitializer, i32 %11, i32 3		; <<4 x i32>> [#uses=1]
+	%13 = sitofp <4 x i32> %12 to <4 x float>		; <<4 x float>> [#uses=1]
+	store <4 x float> %13, <4 x float>* null
+	br label %4
+}
diff --git a/final/test/Transforms/InstCombine/2008-09-29-FoldingOr.ll b/final/test/Transforms/InstCombine/2008-09-29-FoldingOr.ll
new file mode 100644
index 00000000000..31ea94a5d83
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-09-29-FoldingOr.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | grep {or i1}
+; PR2844
+
+define i32 @test(i32 %p_74) {
+	%A = icmp eq i32 %p_74, 0		; <i1> [#uses=1]
+	%B = icmp slt i32 %p_74, -638208501		; <i1> [#uses=1]
+	%or.cond = or i1 %A, %B		; <i1> [#uses=1]
+	%iftmp.10.0 = select i1 %or.cond, i32 0, i32 1		; <i32> [#uses=1]
+	ret i32 %iftmp.10.0
+}
diff --git a/final/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll b/final/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll
new file mode 100644
index 00000000000..fd36d86a948
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-10-11-DivCompareFold.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | grep {ret i1 false}
+; PR2697
+
+define i1 @x(i32 %x) nounwind {
+	%div = sdiv i32 %x, 65536		; <i32> [#uses=1]
+	%cmp = icmp slt i32 %div, -65536
+	ret i1 %cmp
+}
diff --git a/final/test/Transforms/InstCombine/2008-10-23-ConstFoldWithoutMask.ll b/final/test/Transforms/InstCombine/2008-10-23-ConstFoldWithoutMask.ll
new file mode 100644
index 00000000000..d70d05293e8
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-10-23-ConstFoldWithoutMask.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine
+; PR2940
+
+define i32 @tstid() {
+	%var0 = inttoptr i32 1 to i8*		; <i8*> [#uses=1]
+	%var2 = ptrtoint i8* %var0 to i32		; <i32> [#uses=1]
+	ret i32 %var2
+}
diff --git a/final/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll b/final/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll
new file mode 100644
index 00000000000..aa077e2ac3b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-11-01-SRemDemandedBits.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | grep {ret i1 true}
+; PR2993
+
+define i1 @foo(i32 %x) {
+  %1 = srem i32 %x, -1
+  %2 = icmp eq i32 %1, 0
+  ret i1 %2
+}
diff --git a/final/test/Transforms/InstCombine/2008-11-08-FCmp.ll b/final/test/Transforms/InstCombine/2008-11-08-FCmp.ll
new file mode 100644
index 00000000000..c636288661b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-11-08-FCmp.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR3021
+
+; When inst combining an FCMP with the LHS coming from a uitofp instruction, we
+; can't lower it to signed ICMP instructions.
+
+define i1 @test1(i32 %val) {
+  %1 = uitofp i32 %val to double
+  %2 = fcmp ole double %1, 0.000000e+00
+; CHECK: icmp eq i32 %val, 0
+  ret i1 %2
+}
+
+define i1 @test2(i32 %val) {
+  %1 = uitofp i32 %val to double
+  %2 = fcmp olt double %1, 0.000000e+00
+  ret i1 %2
+; CHECK: ret i1 false
+}
+
+define i1 @test3(i32 %val) {
+  %1 = uitofp i32 %val to double
+  %2 = fcmp oge double %1, 0.000000e+00
+  ret i1 %2
+; CHECK: ret i1 true
+}
+
+define i1 @test4(i32 %val) {
+  %1 = uitofp i32 %val to double
+  %2 = fcmp ogt double %1, 0.000000e+00
+; CHECK: icmp ne i32 %val, 0
+  ret i1 %2
+}
+
+define i1 @test5(i32 %val) {
+  %1 = uitofp i32 %val to double
+  %2 = fcmp ogt double %1, -4.400000e+00
+  ret i1 %2
+; CHECK: ret i1 true
+}
+
+define i1 @test6(i32 %val) {
+  %1 = uitofp i32 %val to double
+  %2 = fcmp olt double %1, -4.400000e+00
+  ret i1 %2
+; CHECK: ret i1 false
+}
diff --git a/final/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll b/final/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll
new file mode 100644
index 00000000000..43af190abce
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-11-20-DivMulRem.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR3103
+
+define i8 @test1(i8 %x, i8 %y) {
+; CHECK: @test1
+  %A = udiv i8 %x, %y
+; CHECK-NEXT: urem
+  %B = mul i8 %A, %y
+  %C = sub i8 %x, %B
+  ret i8 %C
+; CHECK-NEXT: ret
+}
+
+define i8 @test2(i8 %x, i8 %y) {
+; CHECK: @test2
+  %A = sdiv i8 %x, %y
+; CHECK-NEXT: srem
+  %B = mul i8 %A, %y
+  %C = sub i8 %x, %B
+  ret i8 %C
+; CHECK-NEXT: ret
+}
+
+define i8 @test3(i8 %x, i8 %y) {
+; CHECK: @test3
+  %A = udiv i8 %x, %y
+; CHECK-NEXT: urem
+  %B = mul i8 %A, %y
+  %C = sub i8 %B, %x
+; CHECK-NEXT: sub
+  ret i8 %C
+; CHECK-NEXT: ret
+}
+
+define i8 @test4(i8 %x) {
+; CHECK: @test4
+  %A = udiv i8 %x, 3
+; CHECK-NEXT: urem
+  %B = mul i8 %A, -3
+; CHECK-NEXT: sub
+  %C = sub i8 %x, %B
+; CHECK-NEXT: add
+  ret i8 %C
+; CHECK-NEXT: ret
+}
+
+define i32 @test5(i32 %x, i32 %y) {
+; CHECK: @test5
+; (((X / Y) * Y) / Y) -> X / Y
+  %div = sdiv i32 %x, %y
+; CHECK-NEXT: sdiv
+  %mul = mul i32 %div, %y
+  %r = sdiv i32 %mul, %y
+  ret i32 %r
+; CHECK-NEXT: ret
+}
+
+define i32 @test6(i32 %x, i32 %y) {
+; CHECK: @test6
+; (((X / Y) * Y) / Y) -> X / Y
+  %div = udiv i32 %x, %y
+; CHECK-NEXT: udiv
+  %mul = mul i32 %div, %y
+  %r = udiv i32 %mul, %y
+  ret i32 %r
+; CHECK-NEXT: ret
+}
diff --git a/final/test/Transforms/InstCombine/2008-11-27-IDivVector.ll b/final/test/Transforms/InstCombine/2008-11-27-IDivVector.ll
new file mode 100644
index 00000000000..318a80cbc2a
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-11-27-IDivVector.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | not grep div
+
+define <2 x i8> @f(<2 x i8> %x) {
+  %A = udiv <2 x i8> %x, <i8 1, i8 1>
+  ret <2 x i8> %A
+}
+
+define <2 x i8> @g(<2 x i8> %x) {
+  %A = sdiv <2 x i8> %x, <i8 1, i8 1>
+  ret <2 x i8> %A
+}
diff --git a/final/test/Transforms/InstCombine/2008-11-27-MultiplyIntVec.ll b/final/test/Transforms/InstCombine/2008-11-27-MultiplyIntVec.ll
new file mode 100644
index 00000000000..d8c53fac49e
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-11-27-MultiplyIntVec.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | not grep mul
+
+define <2 x i8> @f(<2 x i8> %x) {
+  %A = mul <2 x i8> %x, <i8 1, i8 1>
+  ret <2 x i8> %A
+}
+
+define <2 x i8> @g(<2 x i8> %x) {
+  %A = mul <2 x i8> %x, <i8 -1, i8 -1>
+  ret <2 x i8> %A
+}
diff --git a/final/test/Transforms/InstCombine/2008-11-27-UDivNegative.ll b/final/test/Transforms/InstCombine/2008-11-27-UDivNegative.ll
new file mode 100644
index 00000000000..fc90bba7708
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-11-27-UDivNegative.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -instcombine -S | not grep div
+
+define i8 @test(i8 %x) readnone nounwind {
+  %A = udiv i8 %x, 250
+  ret i8 %A
+}
diff --git a/final/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll b/final/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll
new file mode 100644
index 00000000000..e4c7ebcefc7
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2008-12-17-SRemNegConstVec.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -S | grep {i8 2, i8 2}
+; PR2756
+
+define <2 x i8> @foo(<2 x i8> %x) {
+  %A = srem <2 x i8> %x, <i8 2, i8 -2>
+  ret <2 x i8> %A
+}
diff --git a/final/test/Transforms/InstCombine/2009-01-05-i128-crash.ll b/final/test/Transforms/InstCombine/2009-01-05-i128-crash.ll
new file mode 100644
index 00000000000..d355e0aff8a
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-01-05-i128-crash.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -instcombine | llvm-dis
+; PR3235
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define hidden i128 @"\01_gfortrani_max_value"(i32 %length, i32 %signed_flag) nounwind {
+entry:
+	switch i32 %length, label %bb13 [
+		i32 1, label %bb17
+		i32 4, label %bb9
+		i32 8, label %bb5
+	]
+
+bb5:		; preds = %entry
+	%0 = icmp eq i32 %signed_flag, 0		; <i1> [#uses=1]
+	%iftmp.28.0 = select i1 %0, i128 18446744073709551615, i128 9223372036854775807		; <i128> [#uses=1]
+	ret i128 %iftmp.28.0
+
+bb9:		; preds = %entry
+	ret i128 0
+
+bb13:		; preds = %entry
+	ret i128 0
+
+bb17:		; preds = %entry
+	ret i128 0
+}
diff --git a/final/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll b/final/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
new file mode 100644
index 00000000000..a61a94ecbf3
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-01-08-AlignAlloca.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -instcombine -S > %t
+; RUN: grep {, align 4} %t | count 3
+; RUN: grep {, align 8} %t | count 3
+; rdar://6480438
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+	%struct.Key = type { { i32, i32 } }
+	%struct.anon = type <{ i8, [3 x i8], i32 }>
+
+define i32 @bar(i64 %key_token2) nounwind {
+entry:
+	%iospec = alloca %struct.Key		; <%struct.Key*> [#uses=3]
+	%ret = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%0 = getelementptr %struct.Key* %iospec, i32 0, i32 0		; <{ i32, i32 }*> [#uses=2]
+	%1 = getelementptr { i32, i32 }* %0, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %1, align 4
+	%2 = getelementptr { i32, i32 }* %0, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 0, i32* %2, align 4
+	%3 = getelementptr %struct.Key* %iospec, i32 0, i32 0		; <{ i32, i32 }*> [#uses=1]
+	%4 = bitcast { i32, i32 }* %3 to i64*		; <i64*> [#uses=1]
+	store i64 %key_token2, i64* %4, align 4
+	%5 = call i32 (...)* @foo(%struct.Key* byval align 4 %iospec, i32* %ret) nounwind		; <i32> [#uses=0]
+	%6 = load i32* %ret, align 4		; <i32> [#uses=1]
+	ret i32 %6
+}
+
+declare i32 @foo(...)
diff --git a/final/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll b/final/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
new file mode 100644
index 00000000000..ce62f35c103
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-01-16-PointerAddrSpace.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | grep {store.*addrspace(1)}
+; PR3335
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define i32 @test(i32* %P) nounwind {
+entry:
+  %Q = bitcast i32* %P to i32 addrspace(1)*
+  store i32 0, i32 addrspace(1)* %Q, align 4
+  ret i32 0
+}
diff --git a/final/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll b/final/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
new file mode 100644
index 00000000000..142134791ef
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-01-19-fmod-constant-float-specials.ll
@@ -0,0 +1,315 @@
+; RUN: opt < %s -simplifycfg -instcombine -S | grep 0x7FF8000000000000 | count 12
+; RUN: opt < %s -simplifycfg -instcombine -S | grep {0\\.0} | count 3
+; RUN: opt < %s -simplifycfg -instcombine -S | grep {3\\.5} | count 1
+;
+
+; ModuleID = 'apf.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+@"\01LC" = internal constant [4 x i8] c"%f\0A\00"		; <[4 x i8]*> [#uses=1]
+
+define void @foo1() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0x7FF0000000000000, float* %x, align 4
+	store float 0x7FF8000000000000, float* %y, align 4
+	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+declare i32 @printf(i8*, ...) nounwind
+
+define void @foo2() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0x7FF0000000000000, float* %x, align 4
+	store float 0.000000e+00, float* %y, align 4
+	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo3() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0x7FF0000000000000, float* %x, align 4
+	store float 3.500000e+00, float* %y, align 4
+	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo4() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0x7FF0000000000000, float* %x, align 4
+	store float 0x7FF0000000000000, float* %y, align 4
+	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo5() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0x7FF8000000000000, float* %x, align 4
+	store float 0x7FF0000000000000, float* %y, align 4
+	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo6() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0x7FF8000000000000, float* %x, align 4
+	store float 0.000000e+00, float* %y, align 4
+	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo7() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0x7FF8000000000000, float* %x, align 4
+	store float 3.500000e+00, float* %y, align 4
+	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo8() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0x7FF8000000000000, float* %x, align 4
+	store float 0x7FF8000000000000, float* %y, align 4
+	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo9() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0.000000e+00, float* %x, align 4
+	store float 0x7FF8000000000000, float* %y, align 4
+	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo10() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0.000000e+00, float* %x, align 4
+	store float 0x7FF0000000000000, float* %y, align 4
+	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo11() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0.000000e+00, float* %x, align 4
+	store float 0.000000e+00, float* %y, align 4
+	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo12() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 0.000000e+00, float* %x, align 4
+	store float 3.500000e+00, float* %y, align 4
+	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo13() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 3.500000e+00, float* %x, align 4
+	store float 0x7FF8000000000000, float* %y, align 4
+	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo14() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 3.500000e+00, float* %x, align 4
+	store float 0x7FF0000000000000, float* %y, align 4
+	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo15() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 3.500000e+00, float* %x, align 4
+	store float 0.000000e+00, float* %y, align 4
+	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
+
+define void @foo16() nounwind {
+entry:
+	%y = alloca float		; <float*> [#uses=2]
+	%x = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store float 3.500000e+00, float* %x, align 4
+	store float 3.500000e+00, float* %y, align 4
+	%0 = load float* %y, align 4		; <float> [#uses=1]
+	%1 = fpext float %0 to double		; <double> [#uses=1]
+	%2 = load float* %x, align 4		; <float> [#uses=1]
+	%3 = fpext float %2 to double		; <double> [#uses=1]
+	%4 = frem double %3, %1		; <double> [#uses=1]
+	%5 = call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), double %4) nounwind		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/InstCombine/2009-01-19-fmod-constant-float.ll b/final/test/Transforms/InstCombine/2009-01-19-fmod-constant-float.ll
new file mode 100644
index 00000000000..6bc7ce34d1c
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-01-19-fmod-constant-float.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -simplifycfg -instcombine -S | grep 0x3FB99999A0000000 | count 2
+; RUN: opt < %s -simplifycfg -instcombine -S | grep 0xBFB99999A0000000 | count 2
+; check constant folding for 'frem'.  PR 3316.
+
+; ModuleID = 'tt.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define float @test1() nounwind {
+entry:
+	%retval = alloca float		; <float*> [#uses=2]
+	%0 = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%1 = frem double 1.000000e-01, 1.000000e+00	; <double> [#uses=1]
+	%2 = fptrunc double %1 to float		; <float> [#uses=1]
+	store float %2, float* %0, align 4
+	%3 = load float* %0, align 4		; <float> [#uses=1]
+	store float %3, float* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load float* %retval		; <float> [#uses=1]
+	ret float %retval1
+}
+
+define float @test2() nounwind {
+entry:
+	%retval = alloca float		; <float*> [#uses=2]
+	%0 = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%1 = frem double -1.000000e-01, 1.000000e+00	; <double> [#uses=1]
+	%2 = fptrunc double %1 to float		; <float> [#uses=1]
+	store float %2, float* %0, align 4
+	%3 = load float* %0, align 4		; <float> [#uses=1]
+	store float %3, float* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load float* %retval		; <float> [#uses=1]
+	ret float %retval1
+}
+
+define float @test3() nounwind {
+entry:
+	%retval = alloca float		; <float*> [#uses=2]
+	%0 = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%1 = frem double 1.000000e-01, -1.000000e+00	; <double> [#uses=1]
+	%2 = fptrunc double %1 to float		; <float> [#uses=1]
+	store float %2, float* %0, align 4
+	%3 = load float* %0, align 4		; <float> [#uses=1]
+	store float %3, float* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load float* %retval		; <float> [#uses=1]
+	ret float %retval1
+}
+
+define float @test4() nounwind {
+entry:
+	%retval = alloca float		; <float*> [#uses=2]
+	%0 = alloca float		; <float*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%1 = frem double -1.000000e-01, -1.000000e+00	; <double> [#uses=1]
+	%2 = fptrunc double %1 to float		; <float> [#uses=1]
+	store float %2, float* %0, align 4
+	%3 = load float* %0, align 4		; <float> [#uses=1]
+	store float %3, float* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load float* %retval		; <float> [#uses=1]
+	ret float %retval1
+}
diff --git a/final/test/Transforms/InstCombine/2009-01-24-EmptyStruct.ll b/final/test/Transforms/InstCombine/2009-01-24-EmptyStruct.ll
new file mode 100644
index 00000000000..4b64b487291
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-01-24-EmptyStruct.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine
+; PR3381
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.atomic_t = type { i32 }
+	%struct.inode = type { i32, %struct.mutex }
+	%struct.list_head = type { %struct.list_head*, %struct.list_head* }
+	%struct.lock_class_key = type {  }
+	%struct.mutex = type { %struct.atomic_t, %struct.rwlock_t, %struct.list_head }
+	%struct.rwlock_t = type { %struct.lock_class_key }
+
+define void @handle_event(%struct.inode* %bar) nounwind {
+entry:
+	%0 = getelementptr %struct.inode* %bar, i64 -1, i32 1, i32 1		; <%struct.rwlock_t*> [#uses=1]
+	%1 = bitcast %struct.rwlock_t* %0 to i32*		; <i32*> [#uses=1]
+	store i32 1, i32* %1, align 4
+	ret void
+}
diff --git a/final/test/Transforms/InstCombine/2009-01-31-InfIterate.ll b/final/test/Transforms/InstCombine/2009-01-31-InfIterate.ll
new file mode 100644
index 00000000000..815c1a91936
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-01-31-InfIterate.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine | llvm-dis
+; PR3452
+define i128 @test(i64 %A, i64 %B, i1 %C, i128 %Z, i128 %Y, i64* %P, i64* %Q) {
+entry:
+	%tmp2 = trunc i128 %Z to i64
+	%tmp4 = trunc i128 %Y to i64
+	store i64 %tmp2, i64* %P
+	store i64 %tmp4, i64* %Q
+	%x = sub i64 %tmp2, %tmp4
+	%c = sub i64 %tmp2, %tmp4
+	%tmp137 = zext i1 %C to i64
+	%tmp138 = sub i64 %c, %tmp137
+	br label %T
+
+T:
+	%G = phi i64 [%tmp138, %entry], [%tmp2, %Fal]
+	%F = zext i64 %G to i128
+	ret i128 %F
+
+Fal:
+	br label %T
+}
diff --git a/final/test/Transforms/InstCombine/2009-01-31-Pressure.ll b/final/test/Transforms/InstCombine/2009-01-31-Pressure.ll
new file mode 100644
index 00000000000..c3ee9a35ba3
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-01-31-Pressure.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -S | grep {%B = add i8 %b, %x}
+; PR2698
+
+declare void @use1(i1)
+declare void @use8(i8)
+
+define void @test1(i8 %a, i8 %b, i8 %x) {
+  %A = add i8 %a, %x
+  %B = add i8 %b, %x
+  %C = icmp eq i8 %A, %B
+  call void @use1(i1 %C)
+  ret void
+}
+
+define void @test2(i8 %a, i8 %b, i8 %x) {
+  %A = add i8 %a, %x
+  %B = add i8 %b, %x
+  %C = icmp eq i8 %A, %B
+  call void @use1(i1 %C)
+  call void @use8(i8 %A)
+  ret void
+}
diff --git a/final/test/Transforms/InstCombine/2009-02-04-FPBitcast.ll b/final/test/Transforms/InstCombine/2009-02-04-FPBitcast.ll
new file mode 100644
index 00000000000..bc6a2045fa0
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-02-04-FPBitcast.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine
+; PR3468
+
+define x86_fp80 @cast() {
+	%tmp = bitcast i80 0 to x86_fp80		; <x86_fp80> [#uses=1]
+	ret x86_fp80 %tmp
+}
+
+define i80 @invcast() {
+	%tmp = bitcast x86_fp80 0xK00000000000000000000 to i80		; <i80> [#uses=1]
+	ret i80 %tmp
+}
diff --git a/final/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll b/final/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
new file mode 100644
index 00000000000..b29d8d23bc0
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
@@ -0,0 +1,278 @@
+; RUN: opt < %s -instcombine -scalarrepl -S | not grep { = alloca}
+; rdar://6417724
+; Instcombine shouldn't do anything to this function that prevents promoting the allocas inside it.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+	%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >" = type { i32* }
+	%"struct.std::_Vector_base<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" }
+	%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
+	%"struct.std::bidirectional_iterator_tag" = type <{ i8 }>
+	%"struct.std::forward_iterator_tag" = type <{ i8 }>
+	%"struct.std::input_iterator_tag" = type <{ i8 }>
+	%"struct.std::random_access_iterator_tag" = type <{ i8 }>
+	%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
+
+define i32* @_Z3fooRSt6vectorIiSaIiEE(%"struct.std::vector<int,std::allocator<int> >"* %X) {
+entry:
+	%0 = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=2]
+	%__first_addr.i.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=31]
+	%__last_addr.i.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=4]
+	%unnamed_arg.i = alloca %"struct.std::bidirectional_iterator_tag", align 8		; <%"struct.std::bidirectional_iterator_tag"*> [#uses=1]
+	%1 = alloca %"struct.std::bidirectional_iterator_tag"		; <%"struct.std::bidirectional_iterator_tag"*> [#uses=1]
+	%__first_addr.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=2]
+	%2 = alloca %"struct.std::bidirectional_iterator_tag"		; <%"struct.std::bidirectional_iterator_tag"*> [#uses=2]
+	%3 = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=2]
+	%4 = alloca i32		; <i32*> [#uses=8]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 42, i32* %4, align 4
+	%5 = getelementptr %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0		; <%"struct.std::_Vector_base<int,std::allocator<int> >"*> [#uses=1]
+	%6 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >"* %5, i32 0, i32 0		; <%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"*> [#uses=1]
+	%7 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %6, i32 0, i32 1		; <i32**> [#uses=1]
+	%8 = load i32** %7, align 4		; <i32*> [#uses=1]
+	%9 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0		; <i32**> [#uses=1]
+	store i32* %8, i32** %9, align 4
+	%10 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0		; <i32**> [#uses=1]
+	%11 = load i32** %10, align 4		; <i32*> [#uses=1]
+	%tmp2.i = ptrtoint i32* %11 to i32		; <i32> [#uses=1]
+	%tmp1.i = inttoptr i32 %tmp2.i to i32*		; <i32*> [#uses=1]
+	%tmp3 = ptrtoint i32* %tmp1.i to i32		; <i32> [#uses=1]
+	%tmp2 = inttoptr i32 %tmp3 to i32*		; <i32*> [#uses=1]
+	%12 = getelementptr %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0		; <%"struct.std::_Vector_base<int,std::allocator<int> >"*> [#uses=1]
+	%13 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >"* %12, i32 0, i32 0		; <%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"*> [#uses=1]
+	%14 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %13, i32 0, i32 0		; <i32**> [#uses=1]
+	%15 = load i32** %14, align 4		; <i32*> [#uses=1]
+	%16 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0		; <i32**> [#uses=1]
+	store i32* %15, i32** %16, align 4
+	%17 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0		; <i32**> [#uses=1]
+	%18 = load i32** %17, align 4		; <i32*> [#uses=1]
+	%tmp2.i17 = ptrtoint i32* %18 to i32		; <i32> [#uses=1]
+	%tmp1.i18 = inttoptr i32 %tmp2.i17 to i32*		; <i32*> [#uses=1]
+	%tmp8 = ptrtoint i32* %tmp1.i18 to i32		; <i32> [#uses=1]
+	%tmp6 = inttoptr i32 %tmp8 to i32*		; <i32*> [#uses=1]
+	%19 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0		; <i32**> [#uses=1]
+	store i32* %tmp6, i32** %19
+	%20 = getelementptr %"struct.std::bidirectional_iterator_tag"* %1, i32 0, i32 0		; <i8*> [#uses=1]
+	%21 = load i8* %20, align 1		; <i8> [#uses=1]
+	%22 = or i8 %21, 0		; <i8> [#uses=1]
+	%23 = or i8 %22, 0		; <i8> [#uses=1]
+	%24 = or i8 %23, 0		; <i8> [#uses=0]
+	%25 = getelementptr %"struct.std::bidirectional_iterator_tag"* %2, i32 0, i32 0		; <i8*> [#uses=1]
+	store i8 0, i8* %25, align 1
+	%elt.i = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%val.i = load i32** %elt.i		; <i32*> [#uses=1]
+	%tmp.i = bitcast %"struct.std::bidirectional_iterator_tag"* %unnamed_arg.i to i8*		; <i8*> [#uses=1]
+	%tmp9.i = bitcast %"struct.std::bidirectional_iterator_tag"* %2 to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i64(i8* %tmp.i, i8* %tmp9.i, i64 1, i32 1)
+	%26 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	store i32* %val.i, i32** %26
+	%27 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	store i32* %tmp2, i32** %27
+	%28 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%29 = load i32** %28, align 4		; <i32*> [#uses=1]
+	%30 = ptrtoint i32* %29 to i32		; <i32> [#uses=1]
+	%31 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%32 = load i32** %31, align 4		; <i32*> [#uses=1]
+	%33 = ptrtoint i32* %32 to i32		; <i32> [#uses=1]
+	%34 = sub i32 %30, %33		; <i32> [#uses=1]
+	%35 = ashr i32 %34, 2		; <i32> [#uses=1]
+	%36 = ashr i32 %35, 2		; <i32> [#uses=1]
+	br label %bb12.i.i
+
+bb.i.i:		; preds = %bb12.i.i
+	%37 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%38 = load i32** %37, align 4		; <i32*> [#uses=1]
+	%39 = load i32* %38, align 4		; <i32> [#uses=1]
+	%40 = load i32* %4, align 4		; <i32> [#uses=1]
+	%41 = icmp eq i32 %39, %40		; <i1> [#uses=1]
+	%42 = zext i1 %41 to i8		; <i8> [#uses=1]
+	%toBool.i.i = icmp ne i8 %42, 0		; <i1> [#uses=1]
+	br i1 %toBool.i.i, label %bb1.i.i, label %bb2.i.i
+
+bb1.i.i:		; preds = %bb.i.i
+	%43 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%44 = load i32** %43, align 4		; <i32*> [#uses=1]
+	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+
+bb2.i.i:		; preds = %bb.i.i
+	%45 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%46 = load i32** %45, align 4		; <i32*> [#uses=1]
+	%47 = getelementptr i32* %46, i64 1		; <i32*> [#uses=1]
+	%48 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	store i32* %47, i32** %48, align 4
+	%49 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%50 = load i32** %49, align 4		; <i32*> [#uses=1]
+	%51 = load i32* %50, align 4		; <i32> [#uses=1]
+	%52 = load i32* %4, align 4		; <i32> [#uses=1]
+	%53 = icmp eq i32 %51, %52		; <i1> [#uses=1]
+	%54 = zext i1 %53 to i8		; <i8> [#uses=1]
+	%toBool3.i.i = icmp ne i8 %54, 0		; <i1> [#uses=1]
+	br i1 %toBool3.i.i, label %bb4.i.i, label %bb5.i.i
+
+bb4.i.i:		; preds = %bb2.i.i
+	%55 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%56 = load i32** %55, align 4		; <i32*> [#uses=1]
+	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+
+bb5.i.i:		; preds = %bb2.i.i
+	%57 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%58 = load i32** %57, align 4		; <i32*> [#uses=1]
+	%59 = getelementptr i32* %58, i64 1		; <i32*> [#uses=1]
+	%60 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	store i32* %59, i32** %60, align 4
+	%61 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%62 = load i32** %61, align 4		; <i32*> [#uses=1]
+	%63 = load i32* %62, align 4		; <i32> [#uses=1]
+	%64 = load i32* %4, align 4		; <i32> [#uses=1]
+	%65 = icmp eq i32 %63, %64		; <i1> [#uses=1]
+	%66 = zext i1 %65 to i8		; <i8> [#uses=1]
+	%toBool6.i.i = icmp ne i8 %66, 0		; <i1> [#uses=1]
+	br i1 %toBool6.i.i, label %bb7.i.i, label %bb8.i.i
+
+bb7.i.i:		; preds = %bb5.i.i
+	%67 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%68 = load i32** %67, align 4		; <i32*> [#uses=1]
+	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+
+bb8.i.i:		; preds = %bb5.i.i
+	%69 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%70 = load i32** %69, align 4		; <i32*> [#uses=1]
+	%71 = getelementptr i32* %70, i64 1		; <i32*> [#uses=1]
+	%72 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	store i32* %71, i32** %72, align 4
+	%73 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%74 = load i32** %73, align 4		; <i32*> [#uses=1]
+	%75 = load i32* %74, align 4		; <i32> [#uses=1]
+	%76 = load i32* %4, align 4		; <i32> [#uses=1]
+	%77 = icmp eq i32 %75, %76		; <i1> [#uses=1]
+	%78 = zext i1 %77 to i8		; <i8> [#uses=1]
+	%toBool9.i.i = icmp ne i8 %78, 0		; <i1> [#uses=1]
+	br i1 %toBool9.i.i, label %bb10.i.i, label %bb11.i.i
+
+bb10.i.i:		; preds = %bb8.i.i
+	%79 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%80 = load i32** %79, align 4		; <i32*> [#uses=1]
+	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+
+bb11.i.i:		; preds = %bb8.i.i
+	%81 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%82 = load i32** %81, align 4		; <i32*> [#uses=1]
+	%83 = getelementptr i32* %82, i64 1		; <i32*> [#uses=1]
+	%84 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	store i32* %83, i32** %84, align 4
+	%85 = sub i32 %__trip_count.0.i.i, 1		; <i32> [#uses=1]
+	br label %bb12.i.i
+
+bb12.i.i:		; preds = %bb11.i.i, %entry
+	%__trip_count.0.i.i = phi i32 [ %36, %entry ], [ %85, %bb11.i.i ]		; <i32> [#uses=2]
+	%86 = icmp sgt i32 %__trip_count.0.i.i, 0		; <i1> [#uses=1]
+	br i1 %86, label %bb.i.i, label %bb13.i.i
+
+bb13.i.i:		; preds = %bb12.i.i
+	%87 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%88 = load i32** %87, align 4		; <i32*> [#uses=1]
+	%89 = ptrtoint i32* %88 to i32		; <i32> [#uses=1]
+	%90 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%91 = load i32** %90, align 4		; <i32*> [#uses=1]
+	%92 = ptrtoint i32* %91 to i32		; <i32> [#uses=1]
+	%93 = sub i32 %89, %92		; <i32> [#uses=1]
+	%94 = ashr i32 %93, 2		; <i32> [#uses=1]
+	switch i32 %94, label %bb26.i.i [
+		i32 1, label %bb22.i.i
+		i32 2, label %bb18.i.i
+		i32 3, label %bb14.i.i
+	]
+
+bb14.i.i:		; preds = %bb13.i.i
+	%95 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%96 = load i32** %95, align 4		; <i32*> [#uses=1]
+	%97 = load i32* %96, align 4		; <i32> [#uses=1]
+	%98 = load i32* %4, align 4		; <i32> [#uses=1]
+	%99 = icmp eq i32 %97, %98		; <i1> [#uses=1]
+	%100 = zext i1 %99 to i8		; <i8> [#uses=1]
+	%toBool15.i.i = icmp ne i8 %100, 0		; <i1> [#uses=1]
+	br i1 %toBool15.i.i, label %bb16.i.i, label %bb17.i.i
+
+bb16.i.i:		; preds = %bb14.i.i
+	%101 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%102 = load i32** %101, align 4		; <i32*> [#uses=1]
+	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+
+bb17.i.i:		; preds = %bb14.i.i
+	%103 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%104 = load i32** %103, align 4		; <i32*> [#uses=1]
+	%105 = getelementptr i32* %104, i64 1		; <i32*> [#uses=1]
+	%106 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	store i32* %105, i32** %106, align 4
+	br label %bb18.i.i
+
+bb18.i.i:		; preds = %bb17.i.i, %bb13.i.i
+	%107 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%108 = load i32** %107, align 4		; <i32*> [#uses=1]
+	%109 = load i32* %108, align 4		; <i32> [#uses=1]
+	%110 = load i32* %4, align 4		; <i32> [#uses=1]
+	%111 = icmp eq i32 %109, %110		; <i1> [#uses=1]
+	%112 = zext i1 %111 to i8		; <i8> [#uses=1]
+	%toBool19.i.i = icmp ne i8 %112, 0		; <i1> [#uses=1]
+	br i1 %toBool19.i.i, label %bb20.i.i, label %bb21.i.i
+
+bb20.i.i:		; preds = %bb18.i.i
+	%113 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%114 = load i32** %113, align 4		; <i32*> [#uses=1]
+	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+
+bb21.i.i:		; preds = %bb18.i.i
+	%115 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%116 = load i32** %115, align 4		; <i32*> [#uses=1]
+	%117 = getelementptr i32* %116, i64 1		; <i32*> [#uses=1]
+	%118 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	store i32* %117, i32** %118, align 4
+	br label %bb22.i.i
+
+bb22.i.i:		; preds = %bb21.i.i, %bb13.i.i
+	%119 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%120 = load i32** %119, align 4		; <i32*> [#uses=1]
+	%121 = load i32* %120, align 4		; <i32> [#uses=1]
+	%122 = load i32* %4, align 4		; <i32> [#uses=1]
+	%123 = icmp eq i32 %121, %122		; <i1> [#uses=1]
+	%124 = zext i1 %123 to i8		; <i8> [#uses=1]
+	%toBool23.i.i = icmp ne i8 %124, 0		; <i1> [#uses=1]
+	br i1 %toBool23.i.i, label %bb24.i.i, label %bb25.i.i
+
+bb24.i.i:		; preds = %bb22.i.i
+	%125 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%126 = load i32** %125, align 4		; <i32*> [#uses=1]
+	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+
+bb25.i.i:		; preds = %bb22.i.i
+	%127 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%128 = load i32** %127, align 4		; <i32*> [#uses=1]
+	%129 = getelementptr i32* %128, i64 1		; <i32*> [#uses=1]
+	%130 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	store i32* %129, i32** %130, align 4
+	br label %bb26.i.i
+
+bb26.i.i:		; preds = %bb25.i.i, %bb13.i.i
+	%131 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
+	%132 = load i32** %131, align 4		; <i32*> [#uses=1]
+	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+
+_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit:		; preds = %bb26.i.i, %bb24.i.i, %bb20.i.i, %bb16.i.i, %bb10.i.i, %bb7.i.i, %bb4.i.i, %bb1.i.i
+	%.0.0.i.i = phi i32* [ %132, %bb26.i.i ], [ %126, %bb24.i.i ], [ %114, %bb20.i.i ], [ %102, %bb16.i.i ], [ %80, %bb10.i.i ], [ %68, %bb7.i.i ], [ %56, %bb4.i.i ], [ %44, %bb1.i.i ]		; <i32*> [#uses=1]
+	%tmp2.i.i = ptrtoint i32* %.0.0.i.i to i32		; <i32> [#uses=1]
+	%tmp1.i.i = inttoptr i32 %tmp2.i.i to i32*		; <i32*> [#uses=1]
+	%tmp4.i = ptrtoint i32* %tmp1.i.i to i32		; <i32> [#uses=1]
+	%tmp3.i = inttoptr i32 %tmp4.i to i32*		; <i32*> [#uses=1]
+	%tmp8.i = ptrtoint i32* %tmp3.i to i32		; <i32> [#uses=1]
+	%tmp6.i = inttoptr i32 %tmp8.i to i32*		; <i32*> [#uses=1]
+	%tmp12 = ptrtoint i32* %tmp6.i to i32		; <i32> [#uses=1]
+	%tmp10 = inttoptr i32 %tmp12 to i32*		; <i32*> [#uses=1]
+	%tmp16 = ptrtoint i32* %tmp10 to i32		; <i32> [#uses=1]
+	br label %return
+
+return:		; preds = %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+	%tmp14 = inttoptr i32 %tmp16 to i32*		; <i32*> [#uses=1]
+	ret i32* %tmp14
+}
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
diff --git a/final/test/Transforms/InstCombine/2009-02-21-LoadCST.ll b/final/test/Transforms/InstCombine/2009-02-21-LoadCST.ll
new file mode 100644
index 00000000000..f56fc388eb5
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-02-21-LoadCST.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | grep {ret i32 3679669}
+; PR3595
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+@.str1 = internal constant [4 x i8] c"\B5%8\00"
+
+define i32 @test() {
+  %rhsv = load i32* bitcast ([4 x i8]* @.str1 to i32*), align 1
+  ret i32 %rhsv
+}
diff --git a/final/test/Transforms/InstCombine/2009-02-25-CrashZeroSizeArray.ll b/final/test/Transforms/InstCombine/2009-02-25-CrashZeroSizeArray.ll
new file mode 100644
index 00000000000..a8349f042e4
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-02-25-CrashZeroSizeArray.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -instcombine | llvm-dis
+; PR3667
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define void @_ada_c32001b(i32 %tmp5) {
+entry:
+	%max289 = select i1 false, i32 %tmp5, i32 0		; <i32> [#uses=1]
+	%tmp6 = mul i32 %max289, 4		; <i32> [#uses=1]
+	%tmp7 = alloca i8, i32 0		; <i8*> [#uses=1]
+	%tmp8 = bitcast i8* %tmp7 to [0 x [0 x i32]]*		; <[0 x [0 x i32]]*> [#uses=1]
+	%tmp11 = load i32* null, align 1		; <i32> [#uses=1]
+	%tmp12 = icmp eq i32 %tmp11, 3		; <i1> [#uses=1]
+	%tmp13 = zext i1 %tmp12 to i8		; <i8> [#uses=1]
+	%tmp14 = ashr i32 %tmp6, 2		; <i32> [#uses=1]
+	%tmp15 = bitcast [0 x [0 x i32]]* %tmp8 to i8*		; <i8*> [#uses=1]
+	%tmp16 = mul i32 %tmp14, 4		; <i32> [#uses=1]
+	%tmp17 = mul i32 1, %tmp16		; <i32> [#uses=1]
+	%tmp18 = getelementptr i8* %tmp15, i32 %tmp17		; <i8*> [#uses=1]
+	%tmp19 = bitcast i8* %tmp18 to [0 x i32]*		; <[0 x i32]*> [#uses=1]
+	%tmp20 = bitcast [0 x i32]* %tmp19 to i32*		; <i32*> [#uses=1]
+	%tmp21 = getelementptr i32* %tmp20, i32 0		; <i32*> [#uses=1]
+	%tmp22 = load i32* %tmp21, align 1		; <i32> [#uses=1]
+	%tmp23 = icmp eq i32 %tmp22, 4		; <i1> [#uses=1]
+	%tmp24 = zext i1 %tmp23 to i8		; <i8> [#uses=1]
+	%toBool709 = icmp ne i8 %tmp13, 0		; <i1> [#uses=1]
+	%toBool710 = icmp ne i8 %tmp24, 0		; <i1> [#uses=1]
+	%tmp25 = and i1 %toBool709, %toBool710		; <i1> [#uses=1]
+	%tmp26 = zext i1 %tmp25 to i8		; <i8> [#uses=1]
+	%toBool711 = icmp ne i8 %tmp26, 0		; <i1> [#uses=1]
+	br i1 %toBool711, label %a, label %b
+
+a:		; preds = %entry
+	ret void
+
+b:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/InstCombine/2009-03-18-vector-ashr-crash.ll b/final/test/Transforms/InstCombine/2009-03-18-vector-ashr-crash.ll
new file mode 100644
index 00000000000..c617ca4fcad
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-03-18-vector-ashr-crash.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine | llvm-dis
+; PR3826
+
+define void @0(<4 x i16>*, <4 x i16>*) {
+	%3 = alloca <4 x i16>*		; <<4 x i16>**> [#uses=1]
+	%4 = load <4 x i16>* null, align 1		; <<4 x i16>> [#uses=1]
+	%5 = ashr <4 x i16> %4, <i16 5, i16 5, i16 5, i16 5>		; <<4 x i16>> [#uses=1]
+	%6 = load <4 x i16>** %3		; <<4 x i16>*> [#uses=1]
+	store <4 x i16> %5, <4 x i16>* %6, align 1
+	ret void
+}
diff --git a/final/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll b/final/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll
new file mode 100644
index 00000000000..0a07bf34bac
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-03-20-AShrOverShift.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep {ashr i32 %val, 31}
+; PR3851
+
+define i32 @foo2(i32 %val) nounwind {
+entry:
+	%shr = ashr i32 %val, 15		; <i32> [#uses=3]
+	%shr4 = ashr i32 %shr, 17		; <i32> [#uses=1]
+        ret i32 %shr4
+ }
diff --git a/final/test/Transforms/InstCombine/2009-03-24-InfLoop.ll b/final/test/Transforms/InstCombine/2009-03-24-InfLoop.ll
new file mode 100644
index 00000000000..4ce04a1eb54
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-03-24-InfLoop.ll
@@ -0,0 +1,9 @@
+; PR3874
+; RUN: opt < %s -instcombine | llvm-dis
+  define i1 @test(i32 %x) {
+    %A = lshr i32 3968, %x
+    %B = and i32 %A, 1
+    %C = icmp eq i32 %B, 0
+    ret i1 %C
+  }
+
diff --git a/final/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll b/final/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll
new file mode 100644
index 00000000000..244b22a14de
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-04-07-MulPromoteToI96.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -instcombine -S | grep {mul i64}
+; rdar://6762288
+
+; Instcombine should not promote the mul to i96 because it is definitely
+; not a legal type for the target, and we don't want a libcall.
+
+define i96 @test(i96 %a.4, i96 %b.2) {
+	%tmp1086 = trunc i96 %a.4 to i64		; <i64> [#uses=1]
+	%tmp836 = trunc i96 %b.2 to i64		; <i64> [#uses=1]
+	%mul185 = mul i64 %tmp1086, %tmp836		; <i64> [#uses=1]
+	%tmp544 = zext i64 %mul185 to i96		; <i96> [#uses=1]
+	ret i96 %tmp544
+}
diff --git a/final/test/Transforms/InstCombine/2009-05-23-FCmpToICmp.ll b/final/test/Transforms/InstCombine/2009-05-23-FCmpToICmp.ll
new file mode 100644
index 00000000000..dd14c6beec4
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-05-23-FCmpToICmp.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | not grep cmp
+; rdar://6903175
+
+define i1 @f0(i32 *%a) nounwind {
+       %b = load i32* %a, align 4
+       %c = uitofp i32 %b to double
+       %d = fcmp ogt double %c, 0x41EFFFFFFFE00000
+       ret i1 %d
+}
diff --git a/final/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll b/final/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
new file mode 100644
index 00000000000..e5355b8d3c5
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-06-11-StoreAddrSpace.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -S | grep {store i32 0,}
+; PR4366
+
+define void @a() {
+  store i32 0, i32 addrspace(1)* null
+  ret void
+}
diff --git a/final/test/Transforms/InstCombine/2009-06-16-SRemDemandedBits.ll b/final/test/Transforms/InstCombine/2009-06-16-SRemDemandedBits.ll
new file mode 100644
index 00000000000..6beedf83cd6
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-06-16-SRemDemandedBits.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep srem
+; PR3439
+
+define i32 @a(i32 %x) nounwind {
+entry:
+	%rem = srem i32 %x, 2
+	%and = and i32 %rem, 2
+	ret i32 %and
+}
diff --git a/final/test/Transforms/InstCombine/2009-07-02-MaskedIntVector.ll b/final/test/Transforms/InstCombine/2009-07-02-MaskedIntVector.ll
new file mode 100644
index 00000000000..41940fe885e
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-07-02-MaskedIntVector.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine | llvm-dis
+; PR4495
+
+define i32 @test(i64 %test) {
+entry:
+	%0 = bitcast <4 x i32> undef to <16 x i8>		; <<16 x i8>> [#uses=1]
+	%t12 = shufflevector <16 x i8> %0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>		; <<16 x i8>> [#uses=1]
+	%t11 = bitcast <16 x i8> %t12 to <2 x i64>		; <<2 x i64>> [#uses=1]
+	%t9 = extractelement <2 x i64> %t11, i32 0		; <i64> [#uses=1]
+	%t10 = bitcast i64 %t9 to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%t7 = bitcast i64 %test to <2 x i32>		; <<2 x i32>> [#uses=1]
+	%t6 = xor <2 x i32> %t10, %t7		; <<2 x i32>> [#uses=1]
+	%t1 = extractelement <2 x i32> %t6, i32 0		; <i32> [#uses=1]
+	ret i32 %t1
+}
diff --git a/final/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll b/final/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll
new file mode 100644
index 00000000000..fb7497b395d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2009-12-17-CmpSelectNull.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@.str254 = internal constant [2 x i8] c".\00"
+@.str557 = internal constant [3 x i8] c"::\00"
+
+define i8* @demangle_qualified(i32 %isfuncname) nounwind {
+entry:
+  %tobool272 = icmp ne i32 %isfuncname, 0
+  %cond276 = select i1 %tobool272, i8* getelementptr inbounds ([2 x i8]* @.str254, i32 0, i32 0), i8* getelementptr inbounds ([3 x i8]* @.str557, i32 0, i32 0) ; <i8*> [#uses=4]
+  %cmp.i504 = icmp eq i8* %cond276, null
+  %rval = getelementptr i8* %cond276, i1 %cmp.i504
+  ret i8* %rval
+}
+
+; CHECK: %cond276 = select i1
+; CHECK: ret i8* %cond276
diff --git a/final/test/Transforms/InstCombine/2010-01-28-NegativeSRem.ll b/final/test/Transforms/InstCombine/2010-01-28-NegativeSRem.ll
new file mode 100644
index 00000000000..4ab9bf0c3f5
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2010-01-28-NegativeSRem.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR6165
+
+define i32 @f() {
+entry:
+  br label %BB1
+
+BB1:                                              ; preds = %BB1, %entry
+; CHECK: BB1:
+  %x = phi i32 [ -29, %entry ], [ 0, %BB1 ]       ; <i32> [#uses=2]
+  %rem = srem i32 %x, 2                           ; <i32> [#uses=1]
+  %t = icmp eq i32 %rem, -1                       ; <i1> [#uses=1]
+  br i1 %t, label %BB2, label %BB1
+; CHECK-NOT: br i1 false
+
+BB2:                                              ; preds = %BB1
+; CHECK: BB2:
+  ret i32 %x
+}
diff --git a/final/test/Transforms/InstCombine/2010-03-03-ExtElim.ll b/final/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
new file mode 100644
index 00000000000..2df12d670ad
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2010-03-03-ExtElim.ll
@@ -0,0 +1,18 @@
+; RUN: opt -instcombine -S %s | FileCheck %s
+; PR6486
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-unknown-linux-gnu"
+
+@g_92 = common global [2 x i32*] zeroinitializer, align 4 ; <[2 x i32*]*> [#uses=1]
+@g_177 = constant i32** bitcast (i8* getelementptr (i8* bitcast ([2 x i32*]* @g_92 to i8*), i64 4) to i32**), align 4 ; <i32***> [#uses=1]
+
+define i1 @test() nounwind {
+; CHECK: @test
+  %tmp = load i32*** @g_177                       ; <i32**> [#uses=1]
+  %cmp = icmp ne i32** null, %tmp                 ; <i1> [#uses=1]
+  %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
+  %cmp1 = icmp sle i32 0, %conv                   ; <i1> [#uses=1]
+  ret i1 %cmp1
+; CHECK: ret i1 true
+}
diff --git a/final/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll b/final/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
new file mode 100644
index 00000000000..441d5f9b0b6
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2010-11-01-lshr-mask.ll
@@ -0,0 +1,46 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; <rdar://problem/8606771>
+; CHECK: @main
+define i32 @main(i32 %argc) nounwind ssp {
+entry:
+  %tmp3151 = trunc i32 %argc to i8
+; CHECK: %tmp3162 = shl i8 %tmp3151, 5
+; CHECK: and i8 %tmp3162, 64
+; CHECK-NOT: shl
+; CHECK-NOT: shr
+  %tmp3161 = or i8 %tmp3151, -17
+  %tmp3162 = and i8 %tmp3151, 122
+  %tmp3163 = xor i8 %tmp3162, -17
+  %tmp4114 = shl i8 %tmp3163, 6
+  %tmp4115 = xor i8 %tmp4114, %tmp3163
+  %tmp4120 = xor i8 %tmp3161, %tmp4115
+  %tmp4126 = lshr i8 %tmp4120, 7
+  %tmp4127 = mul i8 %tmp4126, 64
+  %tmp4086 = zext i8 %tmp4127 to i32
+; CHECK: ret i32
+  ret i32 %tmp4086
+}
+
+; rdar://8739316
+; CHECK: @foo
+define i8 @foo(i8 %arg, i8 %arg1) nounwind {
+bb:
+  %tmp = shl i8 %arg, 7
+  %tmp2 = and i8 %arg1, 84
+  %tmp3 = and i8 %arg1, -118
+  %tmp4 = and i8 %arg1, 33
+  %tmp5 = sub i8 -88, %tmp2
+  %tmp6 = and i8 %tmp5, 84
+  %tmp7 = or i8 %tmp4, %tmp6
+  %tmp8 = xor i8 %tmp, %tmp3
+  %tmp9 = or i8 %tmp7, %tmp8
+  %tmp10 = lshr i8 %tmp8, 7
+  %tmp11 = shl i8 %tmp10, 5
+
+; CHECK: %0 = lshr i8 %tmp8, 2
+; CHECK: %tmp11 = and i8 %0, 32
+
+  %tmp12 = xor i8 %tmp11, %tmp9
+  ret i8 %tmp12
+}
diff --git a/final/test/Transforms/InstCombine/2010-11-21-SizeZeroTypeGEP.ll b/final/test/Transforms/InstCombine/2010-11-21-SizeZeroTypeGEP.ll
new file mode 100644
index 00000000000..720365c4d6b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2010-11-21-SizeZeroTypeGEP.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define {}* @foo({}* %x, i32 %n) {
+; CHECK: @foo
+; CHECK-NOT: getelementptr
+  %p = getelementptr {}* %x, i32 %n
+  ret {}* %p
+}
+
+define i8* @bar(i64 %n, {{}, [0 x {[0 x i8]}]}* %p) {
+; CHECK: @bar
+  %g = getelementptr {{}, [0 x {[0 x i8]}]}* %p, i64 %n, i32 1, i64 %n, i32 0, i64 %n
+; CHECK: %p, i64 0, i32 1, i64 0, i32 0, i64 %n
+  ret i8* %g
+}
diff --git a/final/test/Transforms/InstCombine/2010-11-23-Distributed.ll b/final/test/Transforms/InstCombine/2010-11-23-Distributed.ll
new file mode 100644
index 00000000000..4f8e8dc713b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2010-11-23-Distributed.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+define i32 @foo(i32 %x, i32 %y) {
+; CHECK: @foo
+  %add = add nsw i32 %y, %x
+  %mul = mul nsw i32 %add, %y
+  %square = mul nsw i32 %y, %y
+  %res = sub i32 %mul, %square
+  ret i32 %res
+; CHECK-NEXT: mul i32 %x, %y
+; CHECK-NEXT: ret i32
+}
+
+define i1 @bar(i64 %x, i64 %y) {
+; CHECK: @bar
+  %a = and i64 %y, %x
+; CHECK: and
+; CHECK-NOT: and
+  %not = xor i64 %a, -1
+  %b = and i64 %y, %not
+  %r = icmp eq i64 %b, 0
+  ret i1 %r
+; CHECK: ret i1
+}
diff --git a/final/test/Transforms/InstCombine/2011-02-14-InfLoop.ll b/final/test/Transforms/InstCombine/2011-02-14-InfLoop.ll
new file mode 100644
index 00000000000..6d8a7ddbe46
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2011-02-14-InfLoop.ll
@@ -0,0 +1,19 @@
+; This testcase causes an infinite loop in the instruction combiner,
+; because it changes a pattern and the original pattern is almost
+; identical to the newly-generated pattern.
+; RUN: opt < %s -instcombine -disable-output
+
+;PR PR9216
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define <4 x float> @m_387(i8* noalias nocapture %A, i8* nocapture %B, <4 x i1> %C) nounwind {
+entry:
+  %movcsext20 = sext <4 x i1> %C to <4 x i32>
+  %tmp2389 = xor <4 x i32> %movcsext20, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %movcand25 = and <4 x i32> %tmp2389, <i32 undef, i32 undef, i32 undef, i32 -1>
+  %movcor26 = or <4 x i32> %movcand25, zeroinitializer
+  %L2 = bitcast <4 x i32> %movcor26 to <4 x float>
+  %L3 = shufflevector <4 x float> zeroinitializer, <4 x float> %L2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+  ret <4 x float> %L3
+}
diff --git a/final/test/Transforms/InstCombine/2011-02-16-InsertelementHang.ll b/final/test/Transforms/InstCombine/2011-02-16-InsertelementHang.ll
new file mode 100644
index 00000000000..2f6034e158a
--- /dev/null
+++ b/final/test/Transforms/InstCombine/2011-02-16-InsertelementHang.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR9218
+
+%vec2x2 = type { <2 x double>, <2 x double> }
+
+define %vec2x2 @split(double) nounwind alwaysinline {
+; CHECK: @split
+; CHECK: ret %vec2x2 undef
+  %vba = insertelement <2 x double> undef, double %0, i32 2
+  ret <2 x double> %vba, <2 x double> %vba
+}
diff --git a/final/test/Transforms/InstCombine/CPP_min_max.ll b/final/test/Transforms/InstCombine/CPP_min_max.ll
new file mode 100644
index 00000000000..531ce2b07b3
--- /dev/null
+++ b/final/test/Transforms/InstCombine/CPP_min_max.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep select | not grep {i32\\*}
+
+; This testcase corresponds to PR362, which notices that this horrible code
+; is generated by the C++ front-end and LLVM optimizers, which has lots of
+; loads and other stuff that are unneeded.
+;
+; Instcombine should propagate the load through the select instructions to
+; allow elimination of the extra stuff by the mem2reg pass.
+
+define void @_Z5test1RiS_(i32* %x, i32* %y) {
+entry:
+        %tmp.1.i = load i32* %y         ; <i32> [#uses=1]
+        %tmp.3.i = load i32* %x         ; <i32> [#uses=1]
+        %tmp.4.i = icmp slt i32 %tmp.1.i, %tmp.3.i              ; <i1> [#uses=1]
+        %retval.i = select i1 %tmp.4.i, i32* %y, i32* %x                ; <i32*> [#uses=1]
+        %tmp.4 = load i32* %retval.i            ; <i32> [#uses=1]
+        store i32 %tmp.4, i32* %x
+        ret void
+}
+
+define void @_Z5test2RiS_(i32* %x, i32* %y) {
+entry:
+        %tmp.0 = alloca i32             ; <i32*> [#uses=2]
+        %tmp.2 = load i32* %x           ; <i32> [#uses=2]
+        store i32 %tmp.2, i32* %tmp.0
+        %tmp.3.i = load i32* %y         ; <i32> [#uses=1]
+        %tmp.4.i = icmp slt i32 %tmp.2, %tmp.3.i                ; <i1> [#uses=1]
+        %retval.i = select i1 %tmp.4.i, i32* %y, i32* %tmp.0            ; <i32*> [#uses=1]
+        %tmp.6 = load i32* %retval.i            ; <i32> [#uses=1]
+        store i32 %tmp.6, i32* %y
+        ret void
+}
+
diff --git a/final/test/Transforms/InstCombine/IntPtrCast.ll b/final/test/Transforms/InstCombine/IntPtrCast.ll
new file mode 100644
index 00000000000..4ecbccd86a4
--- /dev/null
+++ b/final/test/Transforms/InstCombine/IntPtrCast.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:32:32"
+
+define i32* @test(i32* %P) {
+        %V = ptrtoint i32* %P to i32            ; <i32> [#uses=1]
+        %P2 = inttoptr i32 %V to i32*           ; <i32*> [#uses=1]
+        ret i32* %P2
+; CHECK: ret i32* %P
+}
+
diff --git a/final/test/Transforms/InstCombine/JavaCompare.ll b/final/test/Transforms/InstCombine/JavaCompare.ll
new file mode 100644
index 00000000000..46b6c19f9a5
--- /dev/null
+++ b/final/test/Transforms/InstCombine/JavaCompare.ll
@@ -0,0 +1,14 @@
+; This is the sequence of stuff that the Java front-end expands for a single 
+; <= comparison.  Check to make sure we turn it into a <= (only)
+
+; RUN: opt < %s -instcombine -S | grep {icmp sle i32 %A, %B}
+
+define i1 @le(i32 %A, i32 %B) {
+        %c1 = icmp sgt i32 %A, %B               ; <i1> [#uses=1]
+        %tmp = select i1 %c1, i32 1, i32 0              ; <i32> [#uses=1]
+        %c2 = icmp slt i32 %A, %B               ; <i1> [#uses=1]
+        %result = select i1 %c2, i32 -1, i32 %tmp               ; <i32> [#uses=1]
+        %c3 = icmp sle i32 %result, 0           ; <i1> [#uses=1]
+        ret i1 %c3
+}
+
diff --git a/final/test/Transforms/InstCombine/README.txt b/final/test/Transforms/InstCombine/README.txt
new file mode 100644
index 00000000000..de043c77489
--- /dev/null
+++ b/final/test/Transforms/InstCombine/README.txt
@@ -0,0 +1,4 @@
+This directory contains test cases for the instcombine transformation.  The
+dated tests are actual bug tests, whereas the named tests are used to test
+for features that the this pass should be capable of performing.
+
diff --git a/final/test/Transforms/InstCombine/add-shrink.ll b/final/test/Transforms/InstCombine/add-shrink.ll
new file mode 100644
index 00000000000..cc574786631
--- /dev/null
+++ b/final/test/Transforms/InstCombine/add-shrink.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | grep {add nsw i32}
+; RUN: opt < %s -instcombine -S | grep sext | count 1
+
+; Should only have one sext and the add should be i32 instead of i64.
+
+define i64 @test1(i32 %A) {
+	%B = ashr i32 %A, 7		; <i32> [#uses=1]
+	%C = ashr i32 %A, 9		; <i32> [#uses=1]
+	%D = sext i32 %B to i64		; <i64> [#uses=1]
+	%E = sext i32 %C to i64		; <i64> [#uses=1]
+	%F = add i64 %D, %E		; <i64> [#uses=1]
+	ret i64 %F
+}
+
diff --git a/final/test/Transforms/InstCombine/add-sitofp.ll b/final/test/Transforms/InstCombine/add-sitofp.ll
new file mode 100644
index 00000000000..98a8cb452a6
--- /dev/null
+++ b/final/test/Transforms/InstCombine/add-sitofp.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep {add nsw i32}
+
+define double @x(i32 %a, i32 %b) nounwind {
+  %m = lshr i32 %a, 24
+  %n = and i32 %m, %b
+  %o = sitofp i32 %n to double
+  %p = fadd double %o, 1.0
+  ret double %p
+}
diff --git a/final/test/Transforms/InstCombine/add.ll b/final/test/Transforms/InstCombine/add.ll
new file mode 100644
index 00000000000..a316d06894a
--- /dev/null
+++ b/final/test/Transforms/InstCombine/add.ll
@@ -0,0 +1,301 @@
+; This test makes sure that add instructions are properly eliminated.
+
+; RUN: opt < %s -instcombine -S | \
+; RUN:    grep -v OK | not grep add
+
+define i32 @test1(i32 %A) {
+        %B = add i32 %A, 0              ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i32 @test2(i32 %A) {
+        %B = add i32 %A, 5              ; <i32> [#uses=1]
+        %C = add i32 %B, -5             ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i32 @test3(i32 %A) {
+        %B = add i32 %A, 5              ; <i32> [#uses=1]
+        ;; This should get converted to an add
+        %C = sub i32 %B, 5              ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i32 @test4(i32 %A, i32 %B) {
+        %C = sub i32 0, %A              ; <i32> [#uses=1]
+        ; D = B + -A = B - A
+        %D = add i32 %B, %C             ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test5(i32 %A, i32 %B) {
+        %C = sub i32 0, %A              ; <i32> [#uses=1]
+        ; D = -A + B = B - A
+        %D = add i32 %C, %B             ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test6(i32 %A) {
+        %B = mul i32 7, %A              ; <i32> [#uses=1]
+        ; C = 7*A+A == 8*A == A << 3
+        %C = add i32 %B, %A             ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i32 @test7(i32 %A) {
+        %B = mul i32 7, %A              ; <i32> [#uses=1]
+        ; C = A+7*A == 8*A == A << 3
+        %C = add i32 %A, %B             ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+; (A & C1)+(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+define i32 @test8(i32 %A, i32 %B) {
+        %A1 = and i32 %A, 7             ; <i32> [#uses=1]
+        %B1 = and i32 %B, 128           ; <i32> [#uses=1]
+        %C = add i32 %A1, %B1           ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i32 @test9(i32 %A) {
+        %B = shl i32 %A, 4              ; <i32> [#uses=2]
+        ; === shl int %A, 5
+        %C = add i32 %B, %B             ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i1 @test10(i8 %A, i8 %b) {
+        %B = add i8 %A, %b              ; <i8> [#uses=1]
+        ; === A != -b
+        %c = icmp ne i8 %B, 0           ; <i1> [#uses=1]
+        ret i1 %c
+}
+
+define i1 @test11(i8 %A) {
+        %B = add i8 %A, -1              ; <i8> [#uses=1]
+        ; === A != 1
+        %c = icmp ne i8 %B, 0           ; <i1> [#uses=1]
+        ret i1 %c
+}
+
+define i32 @test12(i32 %A, i32 %B) {
+        ; Should be transformed into shl A, 1
+         %C_OK = add i32 %B, %A          ; <i32> [#uses=1]
+        br label %X
+
+X:              ; preds = %0
+        %D = add i32 %C_OK, %A          ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test13(i32 %A, i32 %B, i32 %C) {
+        %D_OK = add i32 %A, %B          ; <i32> [#uses=1]
+        %E_OK = add i32 %D_OK, %C               ; <i32> [#uses=1]
+        ;; shl A, 1
+        %F = add i32 %E_OK, %A          ; <i32> [#uses=1]
+        ret i32 %F
+}
+
+define i32 @test14(i32 %offset, i32 %difference) {
+        %tmp.2 = and i32 %difference, 3         ; <i32> [#uses=1]
+        %tmp.3_OK = add i32 %tmp.2, %offset             ; <i32> [#uses=1]
+        %tmp.5.mask = and i32 %difference, -4           ; <i32> [#uses=1]
+        ; == add %offset, %difference
+        %tmp.8 = add i32 %tmp.3_OK, %tmp.5.mask         ; <i32> [#uses=1]
+        ret i32 %tmp.8
+}
+
+define i8 @test15(i8 %A) {
+        ; Does not effect result
+        %B = add i8 %A, -64             ; <i8> [#uses=1]
+        ; Only one bit set
+        %C = and i8 %B, 16              ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define i8 @test16(i8 %A) {
+        ; Turn this into a XOR
+        %B = add i8 %A, 16              ; <i8> [#uses=1]
+        ; Only one bit set
+        %C = and i8 %B, 16              ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define i32 @test17(i32 %A) {
+        %B = xor i32 %A, -1             ; <i32> [#uses=1]
+        ; == sub int 0, %A
+        %C = add i32 %B, 1              ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i8 @test18(i8 %A) {
+        %B = xor i8 %A, -1              ; <i8> [#uses=1]
+        ; == sub ubyte 16, %A
+        %C = add i8 %B, 17              ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define i32 @test19(i1 %C) {
+        %A = select i1 %C, i32 1000, i32 10             ; <i32> [#uses=1]
+        %V = add i32 %A, 123            ; <i32> [#uses=1]
+        ret i32 %V
+}
+
+define i32 @test20(i32 %x) {
+        %tmp.2 = xor i32 %x, -2147483648                ; <i32> [#uses=1]
+        ;; Add of sign bit -> xor of sign bit.
+        %tmp.4 = add i32 %tmp.2, -2147483648            ; <i32> [#uses=1]
+        ret i32 %tmp.4
+}
+
+define i1 @test21(i32 %x) {
+        %t = add i32 %x, 4              ; <i32> [#uses=1]
+        %y = icmp eq i32 %t, 123                ; <i1> [#uses=1]
+        ret i1 %y
+}
+
+define i32 @test22(i32 %V) {
+        %V2 = add i32 %V, 10            ; <i32> [#uses=1]
+        switch i32 %V2, label %Default [
+                 i32 20, label %Lab1
+                 i32 30, label %Lab2
+        ]
+
+Default:                ; preds = %0
+        ret i32 123
+
+Lab1:           ; preds = %0
+        ret i32 12312
+
+Lab2:           ; preds = %0
+        ret i32 1231231
+}
+
+define i32 @test23(i1 %C, i32 %a) {
+entry:
+        br i1 %C, label %endif, label %else
+
+else:           ; preds = %entry
+        br label %endif
+
+endif:          ; preds = %else, %entry
+        %b.0 = phi i32 [ 0, %entry ], [ 1, %else ]              ; <i32> [#uses=1]
+        %tmp.4 = add i32 %b.0, 1                ; <i32> [#uses=1]
+        ret i32 %tmp.4
+}
+
+define i32 @test24(i32 %A) {
+        %B = add i32 %A, 1              ; <i32> [#uses=1]
+        %C = shl i32 %B, 1              ; <i32> [#uses=1]
+        %D = sub i32 %C, 2              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i64 @test25(i64 %Y) {
+        %tmp.4 = shl i64 %Y, 2          ; <i64> [#uses=1]
+        %tmp.12 = shl i64 %Y, 2         ; <i64> [#uses=1]
+        %tmp.8 = add i64 %tmp.4, %tmp.12                ; <i64> [#uses=1]
+        ret i64 %tmp.8
+}
+
+define i32 @test26(i32 %A, i32 %B) {
+        %C = add i32 %A, %B             ; <i32> [#uses=1]
+        %D = sub i32 %C, %B             ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test27(i1 %C, i32 %X, i32 %Y) {
+        %A = add i32 %X, %Y             ; <i32> [#uses=1]
+        %B = add i32 %Y, 123            ; <i32> [#uses=1]
+        ;; Fold add through select.
+        %C.upgrd.1 = select i1 %C, i32 %A, i32 %B               ; <i32> [#uses=1]
+        %D = sub i32 %C.upgrd.1, %Y             ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test28(i32 %X) {
+        %Y = add i32 %X, 1234           ; <i32> [#uses=1]
+        %Z = sub i32 42, %Y             ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+define i32 @test29(i32 %X, i32 %x) {
+        %tmp.2 = sub i32 %X, %x         ; <i32> [#uses=2]
+        %tmp.2.mask = and i32 %tmp.2, 63                ; <i32> [#uses=1]
+        %tmp.6 = add i32 %tmp.2.mask, %x                ; <i32> [#uses=1]
+        %tmp.7 = and i32 %tmp.6, 63             ; <i32> [#uses=1]
+        %tmp.9 = and i32 %tmp.2, -64            ; <i32> [#uses=1]
+        %tmp.10 = or i32 %tmp.7, %tmp.9         ; <i32> [#uses=1]
+        ret i32 %tmp.10
+}
+
+define i64 @test30(i64 %x) {
+        %tmp.2 = xor i64 %x, -9223372036854775808               ; <i64> [#uses=1]
+        ;; Add of sign bit -> xor of sign bit.
+        %tmp.4 = add i64 %tmp.2, -9223372036854775808           ; <i64> [#uses=1]
+        ret i64 %tmp.4
+}
+
+define i32 @test31(i32 %A) {
+        %B = add i32 %A, 4              ; <i32> [#uses=1]
+        %C = mul i32 %B, 5              ; <i32> [#uses=1]
+        %D = sub i32 %C, 20             ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test32(i32 %A) {
+        %B = add i32 %A, 4              ; <i32> [#uses=1]
+        %C = shl i32 %B, 2              ; <i32> [#uses=1]
+        %D = sub i32 %C, 16             ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i8 @test33(i8 %A) {
+        %B = and i8 %A, -2              ; <i8> [#uses=1]
+        %C = add i8 %B, 1               ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define i8 @test34(i8 %A) {
+        %B = add i8 %A, 64              ; <i8> [#uses=1]
+        %C = and i8 %B, 12              ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define i32 @test35(i32 %a) {
+        %tmpnot = xor i32 %a, -1                ; <i32> [#uses=1]
+        %tmp2 = add i32 %tmpnot, %a             ; <i32> [#uses=1]
+        ret i32 %tmp2
+}
+
+define i32 @test36(i32 %a) {
+	%x = and i32 %a, -2
+	%y = and i32 %a, -126
+	%z = add i32 %x, %y
+	%q = and i32 %z, 1  ; always zero
+	ret i32 %q
+}
+
+define i1 @test37(i32 %a, i32 %b) nounwind readnone {
+  %add = add i32 %a, %b
+  %cmp = icmp eq i32 %add, %a
+  ret i1 %cmp
+}
+
+define i1 @test38(i32 %a, i32 %b) nounwind readnone {
+  %add = add i32 %a, %b
+  %cmp = icmp eq i32 %add, %b
+  ret i1 %cmp
+}
+
+define i1 @test39(i32 %a, i32 %b) nounwind readnone {
+  %add = add i32 %b, %a
+  %cmp = icmp eq i32 %add, %a
+  ret i1 %cmp
+}
+
+define i1 @test40(i32 %a, i32 %b) nounwind readnone {
+  %add = add i32 %b, %a
+  %cmp = icmp eq i32 %add, %b
+  ret i1 %cmp
+}
diff --git a/final/test/Transforms/InstCombine/add2.ll b/final/test/Transforms/InstCombine/add2.ll
new file mode 100644
index 00000000000..c5109c52885
--- /dev/null
+++ b/final/test/Transforms/InstCombine/add2.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i64 @test1(i64 %A, i32 %B) {
+        %tmp12 = zext i32 %B to i64
+        %tmp3 = shl i64 %tmp12, 32
+        %tmp5 = add i64 %tmp3, %A
+        %tmp6 = and i64 %tmp5, 123
+        ret i64 %tmp6
+; CHECK: @test1
+; CHECK-NEXT: and i64 %A, 123
+; CHECK-NEXT: ret i64
+}
+
+define i32 @test2(i32 %A) {
+  %B = and i32 %A, 7
+  %C = and i32 %A, 32
+  %F = add i32 %B, %C
+  ret i32 %F
+; CHECK: @test2
+; CHECK-NEXT: and i32 %A, 39
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test3(i32 %A) {
+  %B = and i32 %A, 128
+  %C = lshr i32 %A, 30
+  %F = add i32 %B, %C
+  ret i32 %F
+; CHECK: @test3
+; CHECK-NEXT: and
+; CHECK-NEXT: lshr
+; CHECK-NEXT: or i32 %B, %C
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test4(i32 %A) {
+  %B = add nuw i32 %A, %A
+  ret i32 %B
+; CHECK: @test4
+; CHECK-NEXT: %B = shl nuw i32 %A, 1
+; CHECK-NEXT: ret i32 %B
+}
+
diff --git a/final/test/Transforms/InstCombine/add3.ll b/final/test/Transforms/InstCombine/add3.ll
new file mode 100644
index 00000000000..cde3e24215f
--- /dev/null
+++ b/final/test/Transforms/InstCombine/add3.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -instcombine -S | grep inttoptr | count 2
+
+;; Target triple for gep raising case below.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+; PR1795
+define void @test2(i32 %.val24) {
+EntryBlock:
+        add i32 %.val24, -12
+        inttoptr i32 %0 to i32*
+        store i32 1, i32* %1
+        add i32 %.val24, -16
+        inttoptr i32 %2 to i32*
+        getelementptr i32* %3, i32 1
+        load i32* %4
+        tail call i32 @callee( i32 %5 )
+        ret void
+}
+
+declare i32 @callee(i32)
diff --git a/final/test/Transforms/InstCombine/addnegneg.ll b/final/test/Transforms/InstCombine/addnegneg.ll
new file mode 100644
index 00000000000..a3a09f27ed9
--- /dev/null
+++ b/final/test/Transforms/InstCombine/addnegneg.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | grep { sub } | count 1
+; PR2047
+
+define i32 @l(i32 %a, i32 %b, i32 %c, i32 %d) {
+entry:
+	%b.neg = sub i32 0, %b		; <i32> [#uses=1]
+	%c.neg = sub i32 0, %c		; <i32> [#uses=1]
+	%sub4 = add i32 %c.neg, %b.neg		; <i32> [#uses=1]
+	%sub6 = add i32 %sub4, %d		; <i32> [#uses=1]
+	ret i32 %sub6
+}
+
diff --git a/final/test/Transforms/InstCombine/adjust-for-sminmax.ll b/final/test/Transforms/InstCombine/adjust-for-sminmax.ll
new file mode 100644
index 00000000000..b9b6f702eb6
--- /dev/null
+++ b/final/test/Transforms/InstCombine/adjust-for-sminmax.ll
@@ -0,0 +1,85 @@
+; RUN: opt < %s -instcombine -S | grep {icmp s\[lg\]t i32 %n, 0} | count 16
+
+; Instcombine should recognize that this code can be adjusted
+; to fit the canonical smax/smin pattern.
+
+define i32 @floor_a(i32 %n) {
+  %t = icmp sgt i32 %n, -1
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+define i32 @ceil_a(i32 %n) {
+  %t = icmp slt i32 %n, 1
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+define i32 @floor_b(i32 %n) {
+  %t = icmp sgt i32 %n, 0
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+define i32 @ceil_b(i32 %n) {
+  %t = icmp slt i32 %n, 0
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+define i32 @floor_c(i32 %n) {
+  %t = icmp sge i32 %n, 0
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+define i32 @ceil_c(i32 %n) {
+  %t = icmp sle i32 %n, 0
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+define i32 @floor_d(i32 %n) {
+  %t = icmp sge i32 %n, 1
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+define i32 @ceil_d(i32 %n) {
+  %t = icmp sle i32 %n, -1
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+define i32 @floor_e(i32 %n) {
+  %t = icmp sgt i32 %n, -1
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+define i32 @ceil_e(i32 %n) {
+  %t = icmp slt i32 %n, 1
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+define i32 @floor_f(i32 %n) {
+  %t = icmp sgt i32 %n, 0
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+define i32 @ceil_f(i32 %n) {
+  %t = icmp slt i32 %n, 0
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+define i32 @floor_g(i32 %n) {
+  %t = icmp sge i32 %n, 0
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+define i32 @ceil_g(i32 %n) {
+  %t = icmp sle i32 %n, 0
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+define i32 @floor_h(i32 %n) {
+  %t = icmp sge i32 %n, 1
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
+define i32 @ceil_h(i32 %n) {
+  %t = icmp sle i32 %n, -1
+  %m = select i1 %t, i32 %n, i32 0
+  ret i32 %m
+}
diff --git a/final/test/Transforms/InstCombine/align-2d-gep.ll b/final/test/Transforms/InstCombine/align-2d-gep.ll
new file mode 100644
index 00000000000..eeca5c0b1f6
--- /dev/null
+++ b/final/test/Transforms/InstCombine/align-2d-gep.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -instcombine -S | grep {align 16} | count 1
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; A multi-dimensional array in a nested loop doing vector stores that
+; aren't yet aligned. Instcombine can understand the addressing in the
+; Nice case to prove 16 byte alignment. In the Awkward case, the inner
+; array dimension is not even, so the stores to it won't always be
+; aligned. Instcombine should prove alignment in exactly one of the two
+; stores.
+
+@Nice    = global [1001 x [20000 x double]] zeroinitializer, align 32
+@Awkward = global [1001 x [20001 x double]] zeroinitializer, align 32
+
+define void @foo() nounwind  {
+entry:
+  br label %bb7.outer
+
+bb7.outer:
+  %i = phi i64 [ 0, %entry ], [ %indvar.next26, %bb11 ]
+  br label %bb1
+
+bb1:
+  %j = phi i64 [ 0, %bb7.outer ], [ %indvar.next, %bb1 ]
+
+  %t4 = getelementptr [1001 x [20000 x double]]* @Nice, i64 0, i64 %i, i64 %j
+  %q = bitcast double* %t4 to <2 x double>*
+  store <2 x double><double 0.0, double 0.0>, <2 x double>* %q, align 8
+
+  %s4 = getelementptr [1001 x [20001 x double]]* @Awkward, i64 0, i64 %i, i64 %j
+  %r = bitcast double* %s4 to <2 x double>*
+  store <2 x double><double 0.0, double 0.0>, <2 x double>* %r, align 8
+
+  %indvar.next = add i64 %j, 2
+  %exitcond = icmp eq i64 %indvar.next, 557
+  br i1 %exitcond, label %bb11, label %bb1
+
+bb11:
+  %indvar.next26 = add i64 %i, 1
+  %exitcond27 = icmp eq i64 %indvar.next26, 991
+  br i1 %exitcond27, label %return.split, label %bb7.outer
+
+return.split:
+  ret void
+}
diff --git a/final/test/Transforms/InstCombine/align-addr.ll b/final/test/Transforms/InstCombine/align-addr.ll
new file mode 100644
index 00000000000..27916b98603
--- /dev/null
+++ b/final/test/Transforms/InstCombine/align-addr.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; Instcombine should be able to prove vector alignment in the
+; presence of a few mild address computation tricks.
+
+; CHECK: @test0(
+; CHECK: align 16
+
+define void @test0(i8* %b, i64 %n, i64 %u, i64 %y) nounwind  {
+entry:
+  %c = ptrtoint i8* %b to i64
+  %d = and i64 %c, -16
+  %e = inttoptr i64 %d to double*
+  %v = mul i64 %u, 2
+  %z = and i64 %y, -2
+  %t1421 = icmp eq i64 %n, 0
+  br i1 %t1421, label %return, label %bb
+
+bb:
+  %i = phi i64 [ %indvar.next, %bb ], [ 20, %entry ]
+  %j = mul i64 %i, %v
+  %h = add i64 %j, %z
+  %t8 = getelementptr double* %e, i64 %h
+  %p = bitcast double* %t8 to <2 x double>*
+  store <2 x double><double 0.0, double 0.0>, <2 x double>* %p, align 8
+  %indvar.next = add i64 %i, 1
+  %exitcond = icmp eq i64 %indvar.next, %n
+  br i1 %exitcond, label %return, label %bb
+
+return:
+  ret void
+}
+
+; When we see a unaligned load from an insufficiently aligned global or
+; alloca, increase the alignment of the load, turning it into an aligned load.
+
+; CHECK: @test1(
+; CHECK: tmp = load
+; CHECK: GLOBAL{{.*}}align 16
+
+@GLOBAL = internal global [4 x i32] zeroinitializer
+
+define <16 x i8> @test1(<2 x i64> %x) {
+entry:
+	%tmp = load <16 x i8>* bitcast ([4 x i32]* @GLOBAL to <16 x i8>*), align 1
+	ret <16 x i8> %tmp
+}
+
+; When a load or store lacks an explicit alignment, add one.
+
+; CHECK: @test2(
+; CHECK: load double* %p, align 8
+; CHECK: store double %n, double* %p, align 8
+
+define double @test2(double* %p, double %n) nounwind {
+  %t = load double* %p
+  store double %n, double* %p
+  ret double %t
+}
diff --git a/final/test/Transforms/InstCombine/align-external.ll b/final/test/Transforms/InstCombine/align-external.ll
new file mode 100644
index 00000000000..6e8ad87f19e
--- /dev/null
+++ b/final/test/Transforms/InstCombine/align-external.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Don't assume that external global variables have their preferred
+; alignment. They may only have the ABI minimum alignment.
+
+; CHECK: %s = shl i64 %a, 3
+; CHECK: %r = or i64 %s, ptrtoint (i32* @A to i64)
+; CHECK: %q = add i64 %r, 1
+; CHECK: ret i64 %q
+
+target datalayout = "-i32:8:32"
+
+@A = external global i32
+@B = external global i32
+
+define i64 @foo(i64 %a) {
+  %t = ptrtoint i32* @A to i64
+  %s = shl i64 %a, 3
+  %r = or i64 %t, %s
+  %q = add i64 %r, 1
+  ret i64 %q
+}
diff --git a/final/test/Transforms/InstCombine/alloca.ll b/final/test/Transforms/InstCombine/alloca.ll
new file mode 100644
index 00000000000..e4d13673454
--- /dev/null
+++ b/final/test/Transforms/InstCombine/alloca.ll
@@ -0,0 +1,46 @@
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; END.
+
+declare void @use(...)
+
+; Zero byte allocas should be deleted.
+; CHECK: @test
+; CHECK-NOT: alloca
+define void @test() {
+        %X = alloca [0 x i32]           ; <[0 x i32]*> [#uses=1]
+        call void (...)* @use( [0 x i32]* %X )
+        %Y = alloca i32, i32 0          ; <i32*> [#uses=1]
+        call void (...)* @use( i32* %Y )
+        %Z = alloca {  }                ; <{  }*> [#uses=1]
+        call void (...)* @use( {  }* %Z )
+        ret void
+}
+
+; Zero byte allocas should be deleted.
+; CHECK: @test2
+; CHECK-NOT: alloca
+define void @test2() {
+        %A = alloca i32         ; <i32*> [#uses=1]
+        store i32 123, i32* %A
+        ret void
+}
+
+; Zero byte allocas should be deleted.
+; CHECK: @test3
+; CHECK-NOT: alloca
+define void @test3() {
+        %A = alloca { i32 }             ; <{ i32 }*> [#uses=1]
+        %B = getelementptr { i32 }* %A, i32 0, i32 0            ; <i32*> [#uses=1]
+        store i32 123, i32* %B
+        ret void
+}
+
+; CHECK: @test4
+; CHECK: = zext i32 %n to i64
+; CHECK: %A = alloca i32, i64 %
+define i32* @test4(i32 %n) {
+  %A = alloca i32, i32 %n
+  ret i32* %A
+}
diff --git a/final/test/Transforms/InstCombine/and-compare.ll b/final/test/Transforms/InstCombine/and-compare.ll
new file mode 100644
index 00000000000..c30a245e415
--- /dev/null
+++ b/final/test/Transforms/InstCombine/and-compare.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:    grep and | count 1
+
+; Should be optimized to one and.
+define i1 @test1(i32 %a, i32 %b) {
+        %tmp1 = and i32 %a, 65280               ; <i32> [#uses=1]
+        %tmp3 = and i32 %b, 65280               ; <i32> [#uses=1]
+        %tmp = icmp ne i32 %tmp1, %tmp3         ; <i1> [#uses=1]
+        ret i1 %tmp
+}
+
diff --git a/final/test/Transforms/InstCombine/and-fcmp.ll b/final/test/Transforms/InstCombine/and-fcmp.ll
new file mode 100644
index 00000000000..91868d1d075
--- /dev/null
+++ b/final/test/Transforms/InstCombine/and-fcmp.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -instcombine -S | grep fcmp | count 3
+; RUN: opt < %s -instcombine -S | grep ret | grep 0
+
+define zeroext i8 @t1(float %x, float %y) nounwind {
+       %a = fcmp ueq float %x, %y
+       %b = fcmp ord float %x, %y
+       %c = and i1 %a, %b
+       %retval = zext i1 %c to i8
+       ret i8 %retval
+}
+
+define zeroext i8 @t2(float %x, float %y) nounwind {
+       %a = fcmp olt float %x, %y
+       %b = fcmp ord float %x, %y
+       %c = and i1 %a, %b
+       %retval = zext i1 %c to i8
+       ret i8 %retval
+}
+
+define zeroext i8 @t3(float %x, float %y) nounwind {
+       %a = fcmp oge float %x, %y
+       %b = fcmp uno float %x, %y
+       %c = and i1 %a, %b
+       %retval = zext i1 %c to i8
+       ret i8 %retval
+}
+
+define zeroext i8 @t4(float %x, float %y) nounwind {
+       %a = fcmp one float %y, %x
+       %b = fcmp ord float %x, %y
+       %c = and i1 %a, %b
+       %retval = zext i1 %c to i8
+       ret i8 %retval
+}
diff --git a/final/test/Transforms/InstCombine/and-not-or.ll b/final/test/Transforms/InstCombine/and-not-or.ll
new file mode 100644
index 00000000000..9dce7b4e6fb
--- /dev/null
+++ b/final/test/Transforms/InstCombine/and-not-or.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -instcombine -S | grep {and i32 %x, %y} | count 4
+; RUN: opt < %s -instcombine -S | not grep {or}
+
+define i32 @func1(i32 %x, i32 %y) nounwind {
+entry:
+	%n = xor i32 %y, -1
+	%o = or i32 %n, %x
+	%a = and i32 %o, %y
+	ret i32 %a
+}
+
+define i32 @func2(i32 %x, i32 %y) nounwind {
+entry:
+	%n = xor i32 %y, -1
+	%o = or i32 %x, %n
+	%a = and i32 %o, %y
+	ret i32 %a
+}
+
+define i32 @func3(i32 %x, i32 %y) nounwind {
+entry:
+	%n = xor i32 %y, -1
+	%o = or i32 %n, %x
+	%a = and i32 %y, %o
+	ret i32 %a
+}
+
+define i32 @func4(i32 %x, i32 %y) nounwind {
+entry:
+	%n = xor i32 %y, -1
+	%o = or i32 %x, %n
+	%a = and i32 %y, %o
+	ret i32 %a
+}
diff --git a/final/test/Transforms/InstCombine/and-or-and.ll b/final/test/Transforms/InstCombine/and-or-and.ll
new file mode 100644
index 00000000000..216cd46775a
--- /dev/null
+++ b/final/test/Transforms/InstCombine/and-or-and.ll
@@ -0,0 +1,61 @@
+; If we have an 'and' of the result of an 'or', and one of the 'or' operands
+; cannot have contributed any of the resultant bits, delete the or.  This
+; occurs for very common C/C++ code like this:
+;
+; struct foo { int A : 16; int B : 16; };
+; void test(struct foo *F, int X, int Y) {
+;        F->A = X; F->B = Y;
+; }
+;
+; Which corresponds to test1.
+
+; RUN: opt < %s -instcombine -S | \
+; RUN:   not grep {or }
+
+define i32 @test1(i32 %X, i32 %Y) {
+        %A = and i32 %X, 7              ; <i32> [#uses=1]
+        %B = and i32 %Y, 8              ; <i32> [#uses=1]
+        %C = or i32 %A, %B              ; <i32> [#uses=1]
+        ;; This cannot include any bits from %Y!
+        %D = and i32 %C, 7              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test2(i32 %X, i8 %Y) {
+        %B = zext i8 %Y to i32          ; <i32> [#uses=1]
+        %C = or i32 %X, %B              ; <i32> [#uses=1]
+        ;; This cannot include any bits from %Y!
+        %D = and i32 %C, 65536          ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test3(i32 %X, i32 %Y) {
+        %B = shl i32 %Y, 1              ; <i32> [#uses=1]
+        %C = or i32 %X, %B              ; <i32> [#uses=1]
+        ;; This cannot include any bits from %Y!
+        %D = and i32 %C, 1              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test4(i32 %X, i32 %Y) {
+        %B = lshr i32 %Y, 31            ; <i32> [#uses=1]
+        %C = or i32 %X, %B              ; <i32> [#uses=1]
+        ;; This cannot include any bits from %Y!
+        %D = and i32 %C, 2              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @or_test1(i32 %X, i32 %Y) {
+        %A = and i32 %X, 1              ; <i32> [#uses=1]
+        ;; This cannot include any bits from X!
+        %B = or i32 %A, 1               ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i8 @or_test2(i8 %X, i8 %Y) {
+        %A = shl i8 %X, 7               ; <i8> [#uses=1]
+        ;; This cannot include any bits from X!
+        %B = or i8 %A, -128             ; <i8> [#uses=1]
+        ret i8 %B
+}
+
diff --git a/final/test/Transforms/InstCombine/and-or-not.ll b/final/test/Transforms/InstCombine/and-or-not.ll
new file mode 100644
index 00000000000..37ec3bc1aab
--- /dev/null
+++ b/final/test/Transforms/InstCombine/and-or-not.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -instcombine -S | grep xor | count 4
+; RUN: opt < %s -instcombine -S | not grep and
+; RUN: opt < %s -instcombine -S | not grep { or}
+
+; PR1510
+
+; These are all equivelent to A^B
+
+define i32 @test1(i32 %a, i32 %b) {
+entry:
+        %tmp3 = or i32 %b, %a           ; <i32> [#uses=1]
+        %tmp3not = xor i32 %tmp3, -1            ; <i32> [#uses=1]
+        %tmp6 = and i32 %b, %a          ; <i32> [#uses=1]
+        %tmp7 = or i32 %tmp6, %tmp3not          ; <i32> [#uses=1]
+        %tmp7not = xor i32 %tmp7, -1            ; <i32> [#uses=1]
+        ret i32 %tmp7not
+}
+
+define i32 @test2(i32 %a, i32 %b) {
+entry:
+        %tmp3 = or i32 %b, %a           ; <i32> [#uses=1]
+        %tmp6 = and i32 %b, %a          ; <i32> [#uses=1]
+        %tmp6not = xor i32 %tmp6, -1            ; <i32> [#uses=1]
+        %tmp7 = and i32 %tmp3, %tmp6not         ; <i32> [#uses=1]
+        ret i32 %tmp7
+}
+
+define <4 x i32> @test3(<4 x i32> %a, <4 x i32> %b) {
+entry:
+        %tmp3 = or <4 x i32> %a, %b             ; <<4 x i32>> [#uses=1]
+        %tmp3not = xor <4 x i32> %tmp3, < i32 -1, i32 -1, i32 -1, i32 -1 >              ; <<4 x i32>> [#uses=1]
+        %tmp6 = and <4 x i32> %a, %b            ; <<4 x i32>> [#uses=1]
+        %tmp7 = or <4 x i32> %tmp6, %tmp3not            ; <<4 x i32>> [#uses=1]
+        %tmp7not = xor <4 x i32> %tmp7, < i32 -1, i32 -1, i32 -1, i32 -1 >              ; <<4 x i32>> [#uses=1]
+        ret <4 x i32> %tmp7not
+}
+
+define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
+entry:
+        %tmp3 = or <4 x i32> %a, %b             ; <<4 x i32>> [#uses=1]
+        %tmp6 = and <4 x i32> %a, %b            ; <<4 x i32>> [#uses=1]
+        %tmp6not = xor <4 x i32> %tmp6, < i32 -1, i32 -1, i32 -1, i32 -1 >              ; <<4 x i32>> [#uses=1]
+        %tmp7 = and <4 x i32> %tmp3, %tmp6not           ; <<4 x i32>> [#uses=1]
+        ret <4 x i32> %tmp7
+}
+
diff --git a/final/test/Transforms/InstCombine/and-or.ll b/final/test/Transforms/InstCombine/and-or.ll
new file mode 100644
index 00000000000..b4224b38b1c
--- /dev/null
+++ b/final/test/Transforms/InstCombine/and-or.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -instcombine -S | grep {and i32 %a, 1} | count 4
+; RUN: opt < %s -instcombine -S | grep {or i32 %0, %b} | count 4
+
+
+define i32 @func1(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%0 = or i32 %b, %a		; <i32> [#uses=1]
+	%1 = and i32 %0, 1		; <i32> [#uses=1]
+	%2 = and i32 %b, -2		; <i32> [#uses=1]
+	%3 = or i32 %1, %2		; <i32> [#uses=1]
+	ret i32 %3
+}
+
+define i32 @func2(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%0 = or i32 %a, %b		; <i32> [#uses=1]
+	%1 = and i32 1, %0		; <i32> [#uses=1]
+	%2 = and i32 -2, %b		; <i32> [#uses=1]
+	%3 = or i32 %1, %2		; <i32> [#uses=1]
+	ret i32 %3
+}
+
+define i32 @func3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%0 = or i32 %b, %a		; <i32> [#uses=1]
+	%1 = and i32 %0, 1		; <i32> [#uses=1]
+	%2 = and i32 %b, -2		; <i32> [#uses=1]
+	%3 = or i32 %2, %1		; <i32> [#uses=1]
+	ret i32 %3
+}
+
+define i32 @func4(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%0 = or i32 %a, %b		; <i32> [#uses=1]
+	%1 = and i32 1, %0		; <i32> [#uses=1]
+	%2 = and i32 -2, %b		; <i32> [#uses=1]
+	%3 = or i32 %2, %1		; <i32> [#uses=1]
+	ret i32 %3
+}
diff --git a/final/test/Transforms/InstCombine/and-xor-merge.ll b/final/test/Transforms/InstCombine/and-xor-merge.ll
new file mode 100644
index 00000000000..e432a9aef7d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/and-xor-merge.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | grep and | count 1
+; RUN: opt < %s -instcombine -S | grep xor | count 2
+
+; (x&z) ^ (y&z) -> (x^y)&z
+define i32 @test1(i32 %x, i32 %y, i32 %z) {
+        %tmp3 = and i32 %z, %x
+        %tmp6 = and i32 %z, %y
+        %tmp7 = xor i32 %tmp3, %tmp6
+        ret i32 %tmp7
+}
+
+; (x & y) ^ (x|y) -> x^y
+define i32 @test2(i32 %x, i32 %y, i32 %z) {
+        %tmp3 = and i32 %y, %x
+        %tmp6 = or i32 %y, %x
+        %tmp7 = xor i32 %tmp3, %tmp6
+        ret i32 %tmp7
+}
+
diff --git a/final/test/Transforms/InstCombine/and.ll b/final/test/Transforms/InstCombine/and.ll
new file mode 100644
index 00000000000..8492df9a120
--- /dev/null
+++ b/final/test/Transforms/InstCombine/and.ll
@@ -0,0 +1,255 @@
+; This test makes sure that these instructions are properly eliminated.
+;
+
+; RUN: opt < %s -instcombine -S | not grep and
+
+define i32 @test1(i32 %A) {
+        ; zero result
+        %B = and i32 %A, 0              ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i32 @test2(i32 %A) {
+        ; noop
+        %B = and i32 %A, -1             ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i1 @test3(i1 %A) {
+        ; always = false
+        %B = and i1 %A, false           ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test4(i1 %A) {
+        ; noop
+        %B = and i1 %A, true            ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i32 @test5(i32 %A) {
+        %B = and i32 %A, %A             ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i1 @test6(i1 %A) {
+        %B = and i1 %A, %A              ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+; A & ~A == 0
+define i32 @test7(i32 %A) {
+        %NotA = xor i32 %A, -1          ; <i32> [#uses=1]
+        %B = and i32 %A, %NotA          ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+; AND associates
+define i8 @test8(i8 %A) {
+        %B = and i8 %A, 3               ; <i8> [#uses=1]
+        %C = and i8 %B, 4               ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define i1 @test9(i32 %A) {
+        ; Test of sign bit, convert to setle %A, 0
+        %B = and i32 %A, -2147483648            ; <i32> [#uses=1]
+        %C = icmp ne i32 %B, 0          ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i1 @test9a(i32 %A) {
+        ; Test of sign bit, convert to setle %A, 0
+        %B = and i32 %A, -2147483648            ; <i32> [#uses=1]
+        %C = icmp ne i32 %B, 0          ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i32 @test10(i32 %A) {
+        %B = and i32 %A, 12             ; <i32> [#uses=1]
+        %C = xor i32 %B, 15             ; <i32> [#uses=1]
+        ; (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
+        %D = and i32 %C, 1              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test11(i32 %A, i32* %P) {
+        %B = or i32 %A, 3               ; <i32> [#uses=1]
+        %C = xor i32 %B, 12             ; <i32> [#uses=2]
+        ; additional use of C
+        store i32 %C, i32* %P
+        ; %C = and uint %B, 3 --> 3
+        %D = and i32 %C, 3              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i1 @test12(i32 %A, i32 %B) {
+        %C1 = icmp ult i32 %A, %B               ; <i1> [#uses=1]
+        %C2 = icmp ule i32 %A, %B               ; <i1> [#uses=1]
+        ; (A < B) & (A <= B) === (A < B)
+        %D = and i1 %C1, %C2            ; <i1> [#uses=1]
+        ret i1 %D
+}
+
+define i1 @test13(i32 %A, i32 %B) {
+        %C1 = icmp ult i32 %A, %B               ; <i1> [#uses=1]
+        %C2 = icmp ugt i32 %A, %B               ; <i1> [#uses=1]
+        ; (A < B) & (A > B) === false
+        %D = and i1 %C1, %C2            ; <i1> [#uses=1]
+        ret i1 %D
+}
+
+define i1 @test14(i8 %A) {
+        %B = and i8 %A, -128            ; <i8> [#uses=1]
+        %C = icmp ne i8 %B, 0           ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i8 @test15(i8 %A) {
+        %B = lshr i8 %A, 7              ; <i8> [#uses=1]
+        ; Always equals zero
+        %C = and i8 %B, 2               ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define i8 @test16(i8 %A) {
+        %B = shl i8 %A, 2               ; <i8> [#uses=1]
+        %C = and i8 %B, 3               ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+;; ~(~X & Y) --> (X | ~Y)
+define i8 @test17(i8 %X, i8 %Y) {
+        %B = xor i8 %X, -1              ; <i8> [#uses=1]
+        %C = and i8 %B, %Y              ; <i8> [#uses=1]
+        %D = xor i8 %C, -1              ; <i8> [#uses=1]
+        ret i8 %D
+}
+
+define i1 @test18(i32 %A) {
+        %B = and i32 %A, -128           ; <i32> [#uses=1]
+        ;; C >= 128
+        %C = icmp ne i32 %B, 0          ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i1 @test18a(i8 %A) {
+        %B = and i8 %A, -2              ; <i8> [#uses=1]
+        %C = icmp eq i8 %B, 0           ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i32 @test19(i32 %A) {
+        %B = shl i32 %A, 3              ; <i32> [#uses=1]
+        ;; Clearing a zero bit
+        %C = and i32 %B, -2             ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i8 @test20(i8 %A) {
+        %C = lshr i8 %A, 7              ; <i8> [#uses=1]
+        ;; Unneeded
+        %D = and i8 %C, 1               ; <i8> [#uses=1]
+        ret i8 %D
+}
+
+define i1 @test22(i32 %A) {
+        %B = icmp eq i32 %A, 1          ; <i1> [#uses=1]
+        %C = icmp sge i32 %A, 3         ; <i1> [#uses=1]
+        ;; false
+        %D = and i1 %B, %C              ; <i1> [#uses=1]
+        ret i1 %D
+}
+
+define i1 @test23(i32 %A) {
+        %B = icmp sgt i32 %A, 1         ; <i1> [#uses=1]
+        %C = icmp sle i32 %A, 2         ; <i1> [#uses=1]
+        ;; A == 2
+        %D = and i1 %B, %C              ; <i1> [#uses=1]
+        ret i1 %D
+}
+
+define i1 @test24(i32 %A) {
+        %B = icmp sgt i32 %A, 1         ; <i1> [#uses=1]
+        %C = icmp ne i32 %A, 2          ; <i1> [#uses=1]
+        ;; A > 2
+        %D = and i1 %B, %C              ; <i1> [#uses=1]
+        ret i1 %D
+}
+
+define i1 @test25(i32 %A) {
+        %B = icmp sge i32 %A, 50                ; <i1> [#uses=1]
+        %C = icmp slt i32 %A, 100               ; <i1> [#uses=1]
+        ;; (A-50) <u 50
+        %D = and i1 %B, %C              ; <i1> [#uses=1]
+        ret i1 %D
+}
+
+define i1 @test26(i32 %A) {
+        %B = icmp ne i32 %A, 50         ; <i1> [#uses=1]
+        %C = icmp ne i32 %A, 51         ; <i1> [#uses=1]
+        ;; (A-50) > 1
+        %D = and i1 %B, %C              ; <i1> [#uses=1]
+        ret i1 %D
+}
+
+define i8 @test27(i8 %A) {
+        %B = and i8 %A, 4               ; <i8> [#uses=1]
+        %C = sub i8 %B, 16              ; <i8> [#uses=1]
+        ;; 0xF0
+        %D = and i8 %C, -16             ; <i8> [#uses=1]
+        %E = add i8 %D, 16              ; <i8> [#uses=1]
+        ret i8 %E
+}
+
+;; This is juse a zero extending shr.
+define i32 @test28(i32 %X) {
+        ;; Sign extend
+        %Y = ashr i32 %X, 24            ; <i32> [#uses=1]
+        ;; Mask out sign bits
+        %Z = and i32 %Y, 255            ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+define i32 @test29(i8 %X) {
+        %Y = zext i8 %X to i32          ; <i32> [#uses=1]
+       ;; Zero extend makes this unneeded.
+        %Z = and i32 %Y, 255            ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+define i32 @test30(i1 %X) {
+        %Y = zext i1 %X to i32          ; <i32> [#uses=1]
+        %Z = and i32 %Y, 1              ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+define i32 @test31(i1 %X) {
+        %Y = zext i1 %X to i32          ; <i32> [#uses=1]
+        %Z = shl i32 %Y, 4              ; <i32> [#uses=1]
+        %A = and i32 %Z, 16             ; <i32> [#uses=1]
+        ret i32 %A
+}
+
+define i32 @test32(i32 %In) {
+        %Y = and i32 %In, 16            ; <i32> [#uses=1]
+        %Z = lshr i32 %Y, 2             ; <i32> [#uses=1]
+        %A = and i32 %Z, 1              ; <i32> [#uses=1]
+        ret i32 %A
+}
+
+;; Code corresponding to one-bit bitfield ^1.
+define i32 @test33(i32 %b) {
+        %tmp.4.mask = and i32 %b, 1             ; <i32> [#uses=1]
+        %tmp.10 = xor i32 %tmp.4.mask, 1                ; <i32> [#uses=1]
+        %tmp.12 = and i32 %b, -2                ; <i32> [#uses=1]
+        %tmp.13 = or i32 %tmp.12, %tmp.10               ; <i32> [#uses=1]
+        ret i32 %tmp.13
+}
+
+define i32 @test34(i32 %A, i32 %B) {
+        %tmp.2 = or i32 %B, %A          ; <i32> [#uses=1]
+        %tmp.4 = and i32 %tmp.2, %B             ; <i32> [#uses=1]
+        ret i32 %tmp.4
+}
+
diff --git a/final/test/Transforms/InstCombine/and2.ll b/final/test/Transforms/InstCombine/and2.ll
new file mode 100644
index 00000000000..a8881522eac
--- /dev/null
+++ b/final/test/Transforms/InstCombine/and2.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; PR1738
+define i1 @test1(double %X, double %Y) {
+        %tmp9 = fcmp ord double %X, 0.000000e+00
+        %tmp13 = fcmp ord double %Y, 0.000000e+00
+        %bothcond = and i1 %tmp13, %tmp9
+        ret i1 %bothcond
+; CHECK:  fcmp ord double %Y, %X
+}
+
+define i1 @test2(i1 %X, i1 %Y) {
+  %a = and i1 %X, %Y
+  %b = and i1 %a, %X
+  ret i1 %b
+; CHECK: @test2
+; CHECK-NEXT: and i1 %X, %Y
+; CHECK-NEXT: ret
+}
+
+define i32 @test3(i32 %X, i32 %Y) {
+  %a = and i32 %X, %Y
+  %b = and i32 %Y, %a
+  ret i32 %b
+; CHECK: @test3
+; CHECK-NEXT: and i32 %X, %Y
+; CHECK-NEXT: ret
+}
+
+define i1 @test4(i32 %X) {
+  %a = icmp ult i32 %X, 31
+  %b = icmp slt i32 %X, 0
+  %c = and i1 %a, %b
+  ret i1 %c
+; CHECK: @test4
+; CHECK-NEXT: ret i1 false
+}
diff --git a/final/test/Transforms/InstCombine/apint-add1.ll b/final/test/Transforms/InstCombine/apint-add1.ll
new file mode 100644
index 00000000000..02f1baf5399
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-add1.ll
@@ -0,0 +1,34 @@
+; This test makes sure that add instructions are properly eliminated.
+; This test is for Integer BitWidth <= 64 && BitWidth % 8 != 0.
+
+; RUN: opt < %s -instcombine -S | \
+; RUN:    grep -v OK | not grep add
+
+
+define i1 @test1(i1 %x) {
+        %tmp.2 = xor i1 %x, 1
+        ;; Add of sign bit -> xor of sign bit.
+        %tmp.4 = add i1 %tmp.2, 1
+        ret i1 %tmp.4
+}
+
+define i47 @test2(i47 %x) {
+        %tmp.2 = xor i47 %x, 70368744177664
+        ;; Add of sign bit -> xor of sign bit.
+        %tmp.4 = add i47 %tmp.2, 70368744177664
+        ret i47 %tmp.4
+}
+
+define i15 @test3(i15 %x) {
+        %tmp.2 = xor i15 %x, 16384
+        ;; Add of sign bit -> xor of sign bit.
+        %tmp.4 = add i15 %tmp.2, 16384
+        ret i15 %tmp.4
+}
+
+define i49 @test6(i49 %x) {
+        ;; (x & 254)+1 -> (x & 254)|1
+        %tmp.2 = and i49 %x, 562949953421310
+        %tmp.4 = add i49 %tmp.2, 1
+        ret i49 %tmp.4
+}
diff --git a/final/test/Transforms/InstCombine/apint-add2.ll b/final/test/Transforms/InstCombine/apint-add2.ll
new file mode 100644
index 00000000000..913a70f1b45
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-add2.ll
@@ -0,0 +1,46 @@
+; This test makes sure that add instructions are properly eliminated.
+; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
+
+; RUN: opt < %s -instcombine -S | \
+; RUN:    grep -v OK | not grep add
+; END.
+
+define i111 @test1(i111 %x) {
+        %tmp.2 = shl i111 1, 110
+        %tmp.4 = xor i111 %x, %tmp.2
+        ;; Add of sign bit -> xor of sign bit.
+        %tmp.6 = add i111 %tmp.4, %tmp.2
+        ret i111 %tmp.6
+}
+
+define i65 @test2(i65 %x) {
+        %tmp.0 = shl i65 1, 64
+        %tmp.2 = xor i65 %x, %tmp.0
+        ;; Add of sign bit -> xor of sign bit.
+        %tmp.4 = add i65 %tmp.2, %tmp.0
+        ret i65 %tmp.4
+}
+
+define i1024 @test3(i1024 %x) {
+        %tmp.0 = shl i1024 1, 1023
+        %tmp.2 = xor i1024 %x, %tmp.0
+        ;; Add of sign bit -> xor of sign bit.
+        %tmp.4 = add i1024 %tmp.2, %tmp.0
+        ret i1024 %tmp.4
+}
+
+define i128 @test4(i128 %x) {
+        ;; If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
+        %tmp.5 = shl i128 1, 127
+        %tmp.1 = ashr i128 %tmp.5, 120
+        %tmp.2 = xor i128 %x, %tmp.1      
+        %tmp.4 = add i128 %tmp.2, %tmp.5
+        ret i128 %tmp.4
+}
+
+define i77 @test6(i77 %x) {
+        ;; (x & 254)+1 -> (x & 254)|1
+        %tmp.2 = and i77 %x, 562949953421310
+        %tmp.4 = add i77 %tmp.2, 1
+        ret i77 %tmp.4
+}
diff --git a/final/test/Transforms/InstCombine/apint-and-compare.ll b/final/test/Transforms/InstCombine/apint-and-compare.ll
new file mode 100644
index 00000000000..53e591e69c9
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-and-compare.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -S | grep and | count 2
+
+; Should be optimized to one and.
+define i1 @test1(i33 %a, i33 %b) {
+        %tmp1 = and i33 %a, 65280
+        %tmp3 = and i33 %b, 65280
+        %tmp = icmp ne i33 %tmp1, %tmp3
+        ret i1 %tmp
+}
+
+define i1 @test2(i999 %a, i999 %b) {
+        %tmp1 = and i999 %a, 65280
+        %tmp3 = and i999 %b, 65280
+        %tmp = icmp ne i999 %tmp1, %tmp3
+        ret i1 %tmp
+}
diff --git a/final/test/Transforms/InstCombine/apint-and-or-and.ll b/final/test/Transforms/InstCombine/apint-and-or-and.ll
new file mode 100644
index 00000000000..17d29b601e6
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-and-or-and.ll
@@ -0,0 +1,50 @@
+; If we have an 'and' of the result of an 'or', and one of the 'or' operands
+; cannot have contributed any of the resultant bits, delete the or.  This
+; occurs for very common C/C++ code like this:
+;
+; struct foo { int A : 16; int B : 16; };
+; void test(struct foo *F, int X, int Y) {
+;        F->A = X; F->B = Y;
+; }
+;
+; Which corresponds to test1.
+; 
+; This tests arbitrary precision integers.
+
+; RUN: opt < %s -instcombine -S | not grep {or }
+; END.
+
+define i17 @test1(i17 %X, i17 %Y) {
+	%A = and i17 %X, 7
+	%B = and i17 %Y, 8
+	%C = or i17 %A, %B
+	%D = and i17 %C, 7  ;; This cannot include any bits from %Y!
+	ret i17 %D
+}
+
+define i49 @test3(i49 %X, i49 %Y) {
+	%B = shl i49 %Y, 1
+	%C = or i49 %X, %B
+	%D = and i49 %C, 1  ;; This cannot include any bits from %Y!
+	ret i49 %D
+}
+
+define i67 @test4(i67 %X, i67 %Y) {
+	%B = lshr i67 %Y, 66
+	%C = or i67 %X, %B
+	%D = and i67 %C, 2  ;; This cannot include any bits from %Y!
+	ret i67 %D
+}
+
+define i231 @or_test1(i231 %X, i231 %Y) {
+	%A = and i231 %X, 1
+	%B = or i231 %A, 1     ;; This cannot include any bits from X!
+	ret i231 %B
+}
+
+define i7 @or_test2(i7 %X, i7 %Y) {
+	%A = shl i7 %X, 6
+	%B = or i7 %A, 64     ;; This cannot include any bits from X!
+	ret i7 %B
+}
+
diff --git a/final/test/Transforms/InstCombine/apint-and-xor-merge.ll b/final/test/Transforms/InstCombine/apint-and-xor-merge.ll
new file mode 100644
index 00000000000..8adffde3627
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-and-xor-merge.ll
@@ -0,0 +1,22 @@
+; This test case checks that the merge of and/xor can work on arbitrary
+; precision integers.
+
+; RUN: opt < %s -instcombine -S | grep and | count 1
+; RUN: opt < %s -instcombine -S | grep xor | count 2
+
+; (x &z ) ^ (y & z) -> (x ^ y) & z
+define i57 @test1(i57 %x, i57 %y, i57 %z) {
+        %tmp3 = and i57 %z, %x
+        %tmp6 = and i57 %z, %y
+        %tmp7 = xor i57 %tmp3, %tmp6
+        ret i57 %tmp7
+}
+
+; (x & y) ^ (x | y) -> x ^ y
+define i23 @test2(i23 %x, i23 %y, i23 %z) {
+        %tmp3 = and i23 %y, %x
+        %tmp6 = or i23 %y, %x
+        %tmp7 = xor i23 %tmp3, %tmp6
+        ret i23 %tmp7
+}
+
diff --git a/final/test/Transforms/InstCombine/apint-and1.ll b/final/test/Transforms/InstCombine/apint-and1.ll
new file mode 100644
index 00000000000..cd4cbb9cf45
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-and1.ll
@@ -0,0 +1,57 @@
+; This test makes sure that and instructions are properly eliminated.
+; This test is for Integer BitWidth <= 64 && BitWidth % 8 != 0.
+
+; RUN: opt < %s -instcombine -S | not grep {and }
+; END.
+
+define i39 @test0(i39 %A) {
+        %B = and i39 %A, 0 ; zero result
+        ret i39 %B
+}
+
+define i47 @test1(i47 %A, i47 %B) {
+        ;; (~A & ~B) == (~(A | B)) - De Morgan's Law
+        %NotA = xor i47 %A, -1
+        %NotB = xor i47 %B, -1
+        %C1 = and i47 %NotA, %NotB
+        ret i47 %C1
+}
+
+define i15 @test2(i15 %x) {
+        %tmp.2 = and i15 %x, -1 ; noop
+        ret i15 %tmp.2
+}
+
+define i23 @test3(i23 %x) {
+        %tmp.0 = and i23 %x, 127
+        %tmp.2 = and i23 %tmp.0, 128
+        ret i23 %tmp.2
+}
+
+define i1 @test4(i37 %x) {
+        %A = and i37 %x, -2147483648
+        %B = icmp ne i37 %A, 0
+        ret i1 %B
+}
+
+define i7 @test5(i7 %A, i7* %P) {
+        %B = or i7 %A, 3
+        %C = xor i7 %B, 12
+        store i7 %C, i7* %P
+        %r = and i7 %C, 3
+        ret i7 %r
+}
+
+define i7 @test6(i7 %A, i7 %B) {
+        ;; ~(~X & Y) --> (X | ~Y)
+        %t0 = xor i7 %A, -1
+        %t1 = and i7 %t0, %B
+        %r = xor i7 %t1, -1
+        ret i7 %r
+}
+
+define i47 @test7(i47 %A) {
+        %X = ashr i47 %A, 39 ;; sign extend
+        %C1 = and i47 %X, 255
+        ret i47 %C1
+}
diff --git a/final/test/Transforms/InstCombine/apint-and2.ll b/final/test/Transforms/InstCombine/apint-and2.ll
new file mode 100644
index 00000000000..ae74472b3d0
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-and2.ll
@@ -0,0 +1,82 @@
+; This test makes sure that and instructions are properly eliminated.
+; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
+
+; RUN: opt < %s -instcombine -S | not grep {and }
+; END.
+
+
+define i999 @test0(i999 %A) {
+        %B = and i999 %A, 0 ; zero result
+        ret i999 %B
+}
+
+define i477 @test1(i477 %A, i477 %B) {
+        ;; (~A & ~B) == (~(A | B)) - De Morgan's Law
+        %NotA = xor i477 %A, -1
+        %NotB = xor i477 %B, -1
+        %C1 = and i477 %NotA, %NotB
+        ret i477 %C1
+}
+
+define i129 @tst(i129 %A, i129 %B) {
+        ;; (~A & ~B) == (~(A | B)) - De Morgan's Law
+        %NotA = xor i129 %A, -1
+        %NotB = xor i129 %B, -1
+        %C1 = and i129 %NotA, %NotB
+        ret i129 %C1
+}
+
+define i65 @test(i65 %A, i65 %B) {
+        ;; (~A & ~B) == (~(A | B)) - De Morgan's Law
+        %NotA = xor i65 %A, -1
+        %NotB = xor i65 -1, %B
+        %C1 = and i65 %NotA, %NotB
+        ret i65 %C1
+}
+
+define i66 @tes(i66 %A, i66 %B) {
+        ;; (~A & ~B) == (~(A | B)) - De Morgan's Law
+        %NotA = xor i66 %A, -1
+        %NotB = xor i66 %B, -1
+        %C1 = and i66 %NotA, %NotB
+        ret i66 %C1
+}
+
+define i1005 @test2(i1005 %x) {
+        %tmp.2 = and i1005 %x, -1 ; noop
+        ret i1005 %tmp.2
+}
+
+define i123 @test3(i123 %x) {
+        %tmp.0 = and i123 %x, 127
+        %tmp.2 = and i123 %tmp.0, 128
+        ret i123 %tmp.2
+}
+
+define i1 @test4(i737 %x) {
+        %A = and i737 %x, -2147483648
+        %B = icmp ne i737 %A, 0
+        ret i1 %B
+}
+
+define i117 @test5(i117 %A, i117* %P) {
+        %B = or i117 %A, 3
+        %C = xor i117 %B, 12
+        store i117 %C, i117* %P
+        %r = and i117 %C, 3
+        ret i117 %r
+}
+
+define i117 @test6(i117 %A, i117 %B) {
+        ;; ~(~X & Y) --> (X | ~Y)
+        %t0 = xor i117 %A, -1
+        %t1 = and i117 %t0, %B
+        %r = xor i117 %t1, -1
+        ret i117 %r
+}
+
+define i1024 @test7(i1024 %A) {
+        %X = ashr i1024 %A, 1016 ;; sign extend
+        %C1 = and i1024 %X, 255
+        ret i1024 %C1
+}
diff --git a/final/test/Transforms/InstCombine/apint-call-cast-target.ll b/final/test/Transforms/InstCombine/apint-call-cast-target.ll
new file mode 100644
index 00000000000..fe336de7524
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-call-cast-target.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instcombine -S | grep call | not grep bitcast
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+
+
+define i32 @main() {
+entry:
+	%tmp = call i32 bitcast (i7* (i999*)* @ctime to i32 (i99*)*)( i99* null )
+	ret i32 %tmp
+}
+
+define i7* @ctime(i999*) {
+entry:
+	%tmp = call i7* bitcast (i32 ()* @main to i7* ()*)( )
+	ret i7* %tmp
+}
diff --git a/final/test/Transforms/InstCombine/apint-cast-and-cast.ll b/final/test/Transforms/InstCombine/apint-cast-and-cast.ll
new file mode 100644
index 00000000000..251d78f59be
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-cast-and-cast.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | not grep bitcast
+
+define i19 @test1(i43 %val) {
+  %t1 = bitcast i43 %val to i43 
+  %t2 = and i43 %t1, 1
+  %t3 = trunc i43 %t2 to i19
+  ret i19 %t3
+}
+
+define i73 @test2(i677 %val) {
+  %t1 = bitcast i677 %val to i677 
+  %t2 = and i677 %t1, 1
+  %t3 = trunc i677 %t2 to i73
+  ret i73 %t3
+}
diff --git a/final/test/Transforms/InstCombine/apint-cast-cast-to-and.ll b/final/test/Transforms/InstCombine/apint-cast-cast-to-and.ll
new file mode 100644
index 00000000000..b2069a93ac4
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-cast-cast-to-and.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | not grep i41
+
+define i61 @test1(i61 %X) {
+        %Y = trunc i61 %X to i41 ;; Turn i61o an AND
+        %Z = zext i41 %Y to i61
+        ret i61 %Z
+}
+
diff --git a/final/test/Transforms/InstCombine/apint-cast.ll b/final/test/Transforms/InstCombine/apint-cast.ll
new file mode 100644
index 00000000000..85e7a4fca93
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-cast.ll
@@ -0,0 +1,30 @@
+; Tests to make sure elimination of casts is working correctly
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i17 @test1(i17 %a) {
+        %tmp = zext i17 %a to i37               ; <i37> [#uses=2]
+        %tmp21 = lshr i37 %tmp, 8               ; <i37> [#uses=1]
+; CHECK: %tmp21 = lshr i17 %a, 8
+        %tmp5 = shl i37 %tmp, 8         ; <i37> [#uses=1]
+; CHECK: %tmp5 = shl i17 %a, 8
+        %tmp.upgrd.32 = or i37 %tmp21, %tmp5            ; <i37> [#uses=1]
+; CHECK: %tmp.upgrd.32 = or i17 %tmp21, %tmp5
+        %tmp.upgrd.3 = trunc i37 %tmp.upgrd.32 to i17   ; <i17> [#uses=1]
+        ret i17 %tmp.upgrd.3
+; CHECK: ret i17 %tmp.upgrd.32
+}
+
+define i167 @test2(i167 %a) {
+        %tmp = zext i167 %a to i577               ; <i577> [#uses=2]
+        %tmp21 = lshr i577 %tmp, 9               ; <i577> [#uses=1]
+; CHECK: %tmp21 = lshr i167 %a, 9
+        %tmp5 = shl i577 %tmp, 8         ; <i577> [#uses=1]
+; CHECK: %tmp5 = shl i167 %a, 8
+        %tmp.upgrd.32 = or i577 %tmp21, %tmp5            ; <i577> [#uses=1]
+; CHECK: %tmp.upgrd.32 = or i167 %tmp21, %tmp5
+        %tmp.upgrd.3 = trunc i577 %tmp.upgrd.32 to i167  ; <i167> [#uses=1]
+        ret i167 %tmp.upgrd.3
+; CHECK: ret i167 %tmp.upgrd.32
+}
diff --git a/final/test/Transforms/InstCombine/apint-div1.ll b/final/test/Transforms/InstCombine/apint-div1.ll
new file mode 100644
index 00000000000..68aadac1de4
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-div1.ll
@@ -0,0 +1,22 @@
+; This test makes sure that div instructions are properly eliminated.
+; This test is for Integer BitWidth < 64 && BitWidth % 2 != 0.
+;
+; RUN: opt < %s -instcombine -S | not grep div
+
+
+define i33 @test1(i33 %X) {
+    %Y = udiv i33 %X, 4096
+    ret i33 %Y
+}
+
+define i49 @test2(i49 %X) {
+    %tmp.0 = shl i49 4096, 17
+    %Y = udiv i49 %X, %tmp.0
+    ret i49 %Y
+}
+
+define i59 @test3(i59 %X, i1 %C) {
+        %V = select i1 %C, i59 1024, i59 4096
+        %R = udiv i59 %X, %V
+        ret i59 %R
+}
diff --git a/final/test/Transforms/InstCombine/apint-div2.ll b/final/test/Transforms/InstCombine/apint-div2.ll
new file mode 100644
index 00000000000..2d7ac78a210
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-div2.ll
@@ -0,0 +1,22 @@
+; This test makes sure that div instructions are properly eliminated.
+; This test is for Integer BitWidth >= 64 && BitWidth <= 1024.
+;
+; RUN: opt < %s -instcombine -S | not grep div
+
+
+define i333 @test1(i333 %X) {
+    %Y = udiv i333 %X, 70368744177664
+    ret i333 %Y
+}
+
+define i499 @test2(i499 %X) {
+    %tmp.0 = shl i499 4096, 197
+    %Y = udiv i499 %X, %tmp.0
+    ret i499 %Y
+}
+
+define i599 @test3(i599 %X, i1 %C) {
+        %V = select i1 %C, i599 70368744177664, i599 4096
+        %R = udiv i599 %X, %V
+        ret i599 %R
+}
diff --git a/final/test/Transforms/InstCombine/apint-mul1.ll b/final/test/Transforms/InstCombine/apint-mul1.ll
new file mode 100644
index 00000000000..6a5b3e7f03c
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-mul1.ll
@@ -0,0 +1,11 @@
+; This test makes sure that mul instructions are properly eliminated.
+; This test is for Integer BitWidth < 64 && BitWidth % 2 != 0.
+;
+
+; RUN: opt < %s -instcombine -S | not grep mul
+
+
+define i17 @test1(i17 %X) {
+    %Y = mul i17 %X, 1024
+    ret i17 %Y
+} 
diff --git a/final/test/Transforms/InstCombine/apint-mul2.ll b/final/test/Transforms/InstCombine/apint-mul2.ll
new file mode 100644
index 00000000000..558d2fb63c1
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-mul2.ll
@@ -0,0 +1,12 @@
+; This test makes sure that mul instructions are properly eliminated.
+; This test is for Integer BitWidth >= 64 && BitWidth % 2 >= 1024.
+;
+
+; RUN: opt < %s -instcombine -S | not grep mul
+
+
+define i177 @test1(i177 %X) {
+    %C = shl i177 1, 155
+    %Y = mul i177 %X, %C
+    ret i177 %Y
+} 
diff --git a/final/test/Transforms/InstCombine/apint-not.ll b/final/test/Transforms/InstCombine/apint-not.ll
new file mode 100644
index 00000000000..488b7f2c98e
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-not.ll
@@ -0,0 +1,42 @@
+; This test makes sure that the xor instructions are properly eliminated
+; when arbitrary precision integers are used.
+
+; RUN: opt < %s -instcombine -S | not grep xor
+
+define i33 @test1(i33 %A) {
+	%B = xor i33 %A, -1
+	%C = xor i33 %B, -1
+	ret i33 %C
+}
+
+define i1 @test2(i52 %A, i52 %B) {
+	%cond = icmp ule i52 %A, %B     ; Can change into uge
+	%Ret = xor i1 %cond, true
+	ret i1 %Ret
+}
+
+; Test that demorgans law can be instcombined
+define i47 @test3(i47 %A, i47 %B) {
+	%a = xor i47 %A, -1
+	%b = xor i47 %B, -1
+	%c = and i47 %a, %b
+	%d = xor i47 %c, -1
+	ret i47 %d
+}
+
+; Test that demorgens law can work with constants
+define i61 @test4(i61 %A, i61 %B) {
+	%a = xor i61 %A, -1
+	%c = and i61 %a, 5    ; 5 = ~c2
+	%d = xor i61 %c, -1
+	ret i61 %d
+}
+
+; test the mirror of demorgans law...
+define i71 @test5(i71 %A, i71 %B) {
+	%a = xor i71 %A, -1
+	%b = xor i71 %B, -1
+	%c = or i71 %a, %b
+	%d = xor i71 %c, -1
+	ret i71 %d
+}
diff --git a/final/test/Transforms/InstCombine/apint-or1.ll b/final/test/Transforms/InstCombine/apint-or1.ll
new file mode 100644
index 00000000000..d4f87ac894d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-or1.ll
@@ -0,0 +1,36 @@
+; This test makes sure that or instructions are properly eliminated.
+; This test is for Integer BitWidth <= 64 && BitWidth % 2 != 0.
+;
+
+; RUN: opt < %s -instcombine -S | not grep or
+
+
+define i7 @test0(i7 %X) {
+    %Y = or i7 %X, 0
+    ret i7 %Y
+}
+
+define i17 @test1(i17 %X) {
+    %Y = or i17 %X, -1
+    ret i17 %Y
+} 
+
+define i23 @test2(i23 %A) {
+    ;; A | ~A == -1
+    %NotA = xor i23 -1, %A
+    %B = or i23 %A, %NotA
+    ret i23 %B
+}
+
+define i39 @test3(i39 %V, i39 %M) {
+    ;; If we have: ((V + N) & C1) | (V & C2)
+    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+    ;; replace with V+N.
+    %C1 = xor i39 274877906943, -1 ;; C2 = 274877906943
+    %N = and i39 %M, 274877906944
+    %A = add i39 %V, %N
+    %B = and i39 %A, %C1
+    %D = and i39 %V, 274877906943
+    %R = or i39 %B, %D
+    ret i39 %R
+}
diff --git a/final/test/Transforms/InstCombine/apint-or2.ll b/final/test/Transforms/InstCombine/apint-or2.ll
new file mode 100644
index 00000000000..d7de255f7fd
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-or2.ll
@@ -0,0 +1,35 @@
+; This test makes sure that or instructions are properly eliminated.
+; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
+;
+; RUN: opt < %s -instcombine -S | not grep or
+
+
+define i777 @test0(i777 %X) {
+    %Y = or i777 %X, 0
+    ret i777 %Y
+}
+
+define i117 @test1(i117 %X) {
+    %Y = or i117 %X, -1
+    ret i117 %Y
+} 
+
+define i1023 @test2(i1023 %A) {
+    ;; A | ~A == -1
+    %NotA = xor i1023 -1, %A
+    %B = or i1023 %A, %NotA
+    ret i1023 %B
+}
+
+define i399 @test3(i399 %V, i399 %M) {
+    ;; If we have: ((V + N) & C1) | (V & C2)
+    ;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
+    ;; replace with V+N.
+    %C1 = xor i399 274877906943, -1 ;; C2 = 274877906943
+    %N = and i399 %M, 18446742974197923840
+    %A = add i399 %V, %N
+    %B = and i399 %A, %C1
+    %D = and i399 %V, 274877906943
+    %R = or i399 %B, %D
+    ret i399 %R
+}
diff --git a/final/test/Transforms/InstCombine/apint-rem1.ll b/final/test/Transforms/InstCombine/apint-rem1.ll
new file mode 100644
index 00000000000..030faccee8b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-rem1.ll
@@ -0,0 +1,22 @@
+; This test makes sure that these instructions are properly eliminated.
+; This test is for Integer BitWidth < 64 && BitWidth % 2 != 0.
+;
+; RUN: opt < %s -instcombine -S | not grep rem
+
+
+define i33 @test1(i33 %A) {
+    %B = urem i33 %A, 4096
+    ret i33 %B
+}
+
+define i49 @test2(i49 %A) {
+    %B = shl i49 4096, 11
+    %Y = urem i49 %A, %B
+    ret i49 %Y
+}
+
+define i59 @test3(i59 %X, i1 %C) {
+	%V = select i1 %C, i59 70368744177664, i59 4096
+	%R = urem i59 %X, %V
+	ret i59 %R
+}
diff --git a/final/test/Transforms/InstCombine/apint-rem2.ll b/final/test/Transforms/InstCombine/apint-rem2.ll
new file mode 100644
index 00000000000..9bfc4cde952
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-rem2.ll
@@ -0,0 +1,22 @@
+; This test makes sure that these instructions are properly eliminated.
+; This test is for Integer BitWidth >= 64 && BitWidth <= 1024.
+;
+; RUN: opt < %s -instcombine -S | not grep rem
+
+
+define i333 @test1(i333 %A) {
+    %B = urem i333 %A, 70368744177664
+    ret i333 %B
+}
+
+define i499 @test2(i499 %A) {
+    %B = shl i499 4096, 111
+    %Y = urem i499 %A, %B
+    ret i499 %Y
+}
+
+define i599 @test3(i599 %X, i1 %C) {
+	%V = select i1 %C, i599 70368744177664, i599 4096
+	%R = urem i599 %X, %V
+	ret i599 %R
+}
diff --git a/final/test/Transforms/InstCombine/apint-select.ll b/final/test/Transforms/InstCombine/apint-select.ll
new file mode 100644
index 00000000000..f2ea60101c5
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-select.ll
@@ -0,0 +1,44 @@
+; This test makes sure that these instructions are properly eliminated.
+
+; RUN: opt < %s -instcombine -S | not grep select
+
+
+define i41 @test1(i1 %C) {
+	%V = select i1 %C, i41 1, i41 0  ; V = C
+	ret i41 %V
+}
+
+define i999 @test2(i1 %C) {
+	%V = select i1 %C, i999 0, i999 1  ; V = C
+	ret i999 %V
+}
+
+define i41 @test3(i41 %X) {
+    ;; (x <s 0) ? -1 : 0 -> ashr x, 31
+    %t = icmp slt i41 %X, 0
+    %V = select i1 %t, i41 -1, i41 0
+    ret i41 %V
+}
+
+define i1023 @test4(i1023 %X) {
+    ;; (x <s 0) ? -1 : 0 -> ashr x, 31
+    %t = icmp slt i1023 %X, 0
+    %V = select i1 %t, i1023 -1, i1023 0
+    ret i1023 %V
+}
+
+define i41 @test5(i41 %X) {
+    ;; ((X & 27) ? 27 : 0)
+    %Y = and i41 %X, 32
+    %t = icmp ne i41 %Y, 0
+    %V = select i1 %t, i41 32, i41 0
+    ret i41 %V
+}
+
+define i1023 @test6(i1023 %X) {
+    ;; ((X & 27) ? 27 : 0)
+    %Y = and i1023 %X, 64 
+    %t = icmp ne i1023 %Y, 0
+    %V = select i1 %t, i1023 64, i1023 0
+    ret i1023 %V
+}
diff --git a/final/test/Transforms/InstCombine/apint-shift-simplify.ll b/final/test/Transforms/InstCombine/apint-shift-simplify.ll
new file mode 100644
index 00000000000..1a3340ac566
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-shift-simplify.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:    egrep {shl|lshr|ashr} | count 3
+
+define i41 @test0(i41 %A, i41 %B, i41 %C) {
+	%X = shl i41 %A, %C
+	%Y = shl i41 %B, %C
+	%Z = and i41 %X, %Y
+	ret i41 %Z
+}
+
+define i57 @test1(i57 %A, i57 %B, i57 %C) {
+	%X = lshr i57 %A, %C
+	%Y = lshr i57 %B, %C
+	%Z = or i57 %X, %Y
+	ret i57 %Z
+}
+
+define i49 @test2(i49 %A, i49 %B, i49 %C) {
+	%X = ashr i49 %A, %C
+	%Y = ashr i49 %B, %C
+	%Z = xor i49 %X, %Y
+	ret i49 %Z
+}
diff --git a/final/test/Transforms/InstCombine/apint-shift.ll b/final/test/Transforms/InstCombine/apint-shift.ll
new file mode 100644
index 00000000000..55243a64918
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-shift.ll
@@ -0,0 +1,184 @@
+; This test makes sure that shit instructions are properly eliminated
+; even with arbitrary precision integers.
+; RUN: opt < %s -instcombine -S | not grep sh
+; END.
+
+define i47 @test1(i47 %A) {
+	%B = shl i47 %A, 0		; <i47> [#uses=1]
+	ret i47 %B
+}
+
+define i41 @test2(i7 %X) {
+	%A = zext i7 %X to i41		; <i41> [#uses=1]
+	%B = shl i41 0, %A		; <i41> [#uses=1]
+	ret i41 %B
+}
+
+define i41 @test3(i41 %A) {
+	%B = ashr i41 %A, 0		; <i41> [#uses=1]
+	ret i41 %B
+}
+
+define i39 @test4(i7 %X) {
+	%A = zext i7 %X to i39		; <i39> [#uses=1]
+	%B = ashr i39 0, %A		; <i39> [#uses=1]
+	ret i39 %B
+}
+
+define i55 @test5(i55 %A) {
+	%B = lshr i55 %A, 55		; <i55> [#uses=1]
+	ret i55 %B
+}
+
+define i32 @test5a(i32 %A) {
+	%B = shl i32 %A, 32		; <i32> [#uses=1]
+	ret i32 %B
+}
+
+define i55 @test6(i55 %A) {
+	%B = shl i55 %A, 1		; <i55> [#uses=1]
+	%C = mul i55 %B, 3		; <i55> [#uses=1]
+	ret i55 %C
+}
+
+define i29 @test7(i8 %X) {
+	%A = zext i8 %X to i29		; <i29> [#uses=1]
+	%B = ashr i29 -1, %A		; <i29> [#uses=1]
+	ret i29 %B
+}
+
+define i7 @test8(i7 %A) {
+	%B = shl i7 %A, 4		; <i7> [#uses=1]
+	%C = shl i7 %B, 3		; <i7> [#uses=1]
+	ret i7 %C
+}
+
+define i17 @test9(i17 %A) {
+	%B = shl i17 %A, 16		; <i17> [#uses=1]
+	%C = lshr i17 %B, 16		; <i17> [#uses=1]
+	ret i17 %C
+}
+
+define i19 @test10(i19 %A) {
+	%B = lshr i19 %A, 18		; <i19> [#uses=1]
+	%C = shl i19 %B, 18		; <i19> [#uses=1]
+	ret i19 %C
+}
+
+define i23 @test11(i23 %A) {
+	%a = mul i23 %A, 3		; <i23> [#uses=1]
+	%B = lshr i23 %a, 11		; <i23> [#uses=1]
+	%C = shl i23 %B, 12		; <i23> [#uses=1]
+	ret i23 %C
+}
+
+define i47 @test12(i47 %A) {
+	%B = ashr i47 %A, 8		; <i47> [#uses=1]
+	%C = shl i47 %B, 8		; <i47> [#uses=1]
+	ret i47 %C
+}
+
+define i18 @test13(i18 %A) {
+	%a = mul i18 %A, 3		; <i18> [#uses=1]
+	%B = ashr i18 %a, 8		; <i18> [#uses=1]
+	%C = shl i18 %B, 9		; <i18> [#uses=1]
+	ret i18 %C
+}
+
+define i35 @test14(i35 %A) {
+	%B = lshr i35 %A, 4		; <i35> [#uses=1]
+	%C = or i35 %B, 1234		; <i35> [#uses=1]
+	%D = shl i35 %C, 4		; <i35> [#uses=1]
+	ret i35 %D
+}
+
+define i79 @test14a(i79 %A) {
+	%B = shl i79 %A, 4		; <i79> [#uses=1]
+	%C = and i79 %B, 1234		; <i79> [#uses=1]
+	%D = lshr i79 %C, 4		; <i79> [#uses=1]
+	ret i79 %D
+}
+
+define i45 @test15(i1 %C) {
+	%A = select i1 %C, i45 3, i45 1	; <i45> [#uses=1]
+	%V = shl i45 %A, 2		; <i45> [#uses=1]
+	ret i45 %V
+}
+
+define i53 @test15a(i1 %X) {
+	%A = select i1 %X, i8 3, i8 1	; <i8> [#uses=1]
+	%B = zext i8 %A to i53		; <i53> [#uses=1]
+	%V = shl i53 64, %B		; <i53> [#uses=1]
+	ret i53 %V
+}
+
+define i1 @test16(i84 %X) {
+	%tmp.3 = ashr i84 %X, 4		; <i84> [#uses=1]
+	%tmp.6 = and i84 %tmp.3, 1	; <i84> [#uses=1]
+	%tmp.7 = icmp ne i84 %tmp.6, 0	; <i1> [#uses=1]
+	ret i1 %tmp.7
+}
+
+define i1 @test17(i106 %A) {
+	%B = lshr i106 %A, 3		; <i106> [#uses=1]
+	%C = icmp eq i106 %B, 1234	; <i1> [#uses=1]
+	ret i1 %C
+}
+
+define i1 @test18(i11 %A) {
+	%B = lshr i11 %A, 10		; <i11> [#uses=1]
+	%C = icmp eq i11 %B, 123	; <i1> [#uses=1]
+	ret i1 %C
+}
+
+define i1 @test19(i37 %A) {
+	%B = ashr i37 %A, 2		; <i37> [#uses=1]
+	%C = icmp eq i37 %B, 0		; <i1> [#uses=1]
+	ret i1 %C
+}
+
+define i1 @test19a(i39 %A) {
+	%B = ashr i39 %A, 2		; <i39> [#uses=1]
+	%C = icmp eq i39 %B, -1		; <i1> [#uses=1]
+	ret i1 %C
+}
+
+define i1 @test20(i13 %A) {
+	%B = ashr i13 %A, 12		; <i13> [#uses=1]
+	%C = icmp eq i13 %B, 123	; <i1> [#uses=1]
+	ret i1 %C
+}
+
+define i1 @test21(i12 %A) {
+	%B = shl i12 %A, 6		; <i12> [#uses=1]
+	%C = icmp eq i12 %B, -128		; <i1> [#uses=1]
+	ret i1 %C
+}
+
+define i1 @test22(i14 %A) {
+	%B = shl i14 %A, 7		; <i14> [#uses=1]
+	%C = icmp eq i14 %B, 0		; <i1> [#uses=1]
+	ret i1 %C
+}
+
+define i11 @test23(i44 %A) {
+	%B = shl i44 %A, 33		; <i44> [#uses=1]
+	%C = ashr i44 %B, 33		; <i44> [#uses=1]
+	%D = trunc i44 %C to i11	; <i8> [#uses=1]
+	ret i11 %D
+}
+
+define i37 @test25(i37 %tmp.2, i37 %AA) {
+	%x = lshr i37 %AA, 17		; <i37> [#uses=1]
+	%tmp.3 = lshr i37 %tmp.2, 17		; <i37> [#uses=1]
+	%tmp.5 = add i37 %tmp.3, %x		; <i37> [#uses=1]
+	%tmp.6 = shl i37 %tmp.5, 17		; <i37> [#uses=1]
+	ret i37 %tmp.6
+}
+
+define i40 @test26(i40 %A) {
+	%B = lshr i40 %A, 1		; <i40> [#uses=1]
+	%C = bitcast i40 %B to i40		; <i40> [#uses=1]
+	%D = shl i40 %C, 1		; <i40> [#uses=1]
+	ret i40 %D
+}
diff --git a/final/test/Transforms/InstCombine/apint-shl-trunc.ll b/final/test/Transforms/InstCombine/apint-shl-trunc.ll
new file mode 100644
index 00000000000..8163e6d527d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-shl-trunc.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | grep shl
+; END.
+
+define i1 @test0(i39 %X, i39 %A) {
+	%B = lshr i39 %X, %A
+	%D = trunc i39 %B to i1
+	ret i1 %D
+}
+
+define i1 @test1(i799 %X, i799 %A) {
+	%B = lshr i799 %X, %A
+	%D = trunc i799 %B to i1
+	ret i1 %D
+}
diff --git a/final/test/Transforms/InstCombine/apint-sub.ll b/final/test/Transforms/InstCombine/apint-sub.ll
new file mode 100644
index 00000000000..8b9ff143ea4
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-sub.ll
@@ -0,0 +1,141 @@
+; This test makes sure that sub instructions are properly eliminated
+; even with arbitrary precision integers.
+;
+
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep -v {sub i19 %Cok, %Bok} | grep -v {sub i25 0, %Aok} | not grep sub
+; END.
+
+define i23 @test1(i23 %A) {
+	%B = sub i23 %A, %A		; <i23> [#uses=1]
+	ret i23 %B
+}
+
+define i47 @test2(i47 %A) {
+	%B = sub i47 %A, 0		; <i47> [#uses=1]
+	ret i47 %B
+}
+
+define i97 @test3(i97 %A) {
+	%B = sub i97 0, %A		; <i97> [#uses=1]
+	%C = sub i97 0, %B		; <i97> [#uses=1]
+	ret i97 %C
+}
+
+define i108 @test4(i108 %A, i108 %x) {
+	%B = sub i108 0, %A		; <i108> [#uses=1]
+	%C = sub i108 %x, %B		; <i108> [#uses=1]
+	ret i108 %C
+}
+
+define i19 @test5(i19 %A, i19 %Bok, i19 %Cok) {
+	%D = sub i19 %Bok, %Cok		; <i19> [#uses=1]
+	%E = sub i19 %A, %D		; <i19> [#uses=1]
+	ret i19 %E
+}
+
+define i57 @test6(i57 %A, i57 %B) {
+	%C = and i57 %A, %B		; <i57> [#uses=1]
+	%D = sub i57 %A, %C		; <i57> [#uses=1]
+	ret i57 %D
+}
+
+define i77 @test7(i77 %A) {
+	%B = sub i77 -1, %A		; <i77> [#uses=1]
+	ret i77 %B
+}
+
+define i27 @test8(i27 %A) {
+	%B = mul i27 9, %A		; <i27> [#uses=1]
+	%C = sub i27 %B, %A		; <i27> [#uses=1]
+	ret i27 %C
+}
+
+define i42 @test9(i42 %A) {
+	%B = mul i42 3, %A		; <i42> [#uses=1]
+	%C = sub i42 %A, %B		; <i42> [#uses=1]
+	ret i42 %C
+}
+
+define i124 @test10(i124 %A, i124 %B) {
+	%C = sub i124 0, %A		; <i124> [#uses=1]
+	%D = sub i124 0, %B		; <i124> [#uses=1]
+	%E = mul i124 %C, %D		; <i124> [#uses=1]
+	ret i124 %E
+}
+
+define i55 @test10a(i55 %A) {
+	%C = sub i55 0, %A		; <i55> [#uses=1]
+	%E = mul i55 %C, 7		; <i55> [#uses=1]
+	ret i55 %E
+}
+
+define i1 @test11(i9 %A, i9 %B) {
+	%C = sub i9 %A, %B		; <i9> [#uses=1]
+	%cD = icmp ne i9 %C, 0		; <i1> [#uses=1]
+	ret i1 %cD
+}
+
+define i43 @test12(i43 %A) {
+	%B = ashr i43 %A, 42		; <i43> [#uses=1]
+	%C = sub i43 0, %B		; <i43> [#uses=1]
+	ret i43 %C
+}
+
+define i79 @test13(i79 %A) {
+	%B = lshr i79 %A, 78		; <i79> [#uses=1]
+	%C = sub i79 0, %B		; <i79> [#uses=1]
+	ret i79 %C
+}
+
+define i1024 @test14(i1024 %A) {
+	%B = lshr i1024 %A, 1023        ; <i1024> [#uses=1]
+	%C = bitcast i1024 %B to i1024	; <i1024> [#uses=1]
+	%D = sub i1024 0, %C		; <i1024> [#uses=1]
+	ret i1024 %D
+}
+
+define i14 @test15(i14 %A, i14 %B) {
+	%C = sub i14 0, %A		; <i14> [#uses=1]
+	%D = srem i14 %B, %C		; <i14> [#uses=1]
+	ret i14 %D
+}
+
+define i51 @test16(i51 %A) {
+	%X = sdiv i51 %A, 1123		; <i51> [#uses=1]
+	%Y = sub i51 0, %X		; <i51> [#uses=1]
+	ret i51 %Y
+}
+
+; Can't fold subtract here because negation it might oveflow.
+; PR3142
+define i25 @test17(i25 %Aok) {
+	%B = sub i25 0, %Aok		; <i25> [#uses=1]
+	%C = sdiv i25 %B, 1234		; <i25> [#uses=1]
+	ret i25 %C
+}
+
+define i128 @test18(i128 %Y) {
+	%tmp.4 = shl i128 %Y, 2		; <i128> [#uses=1]
+	%tmp.12 = shl i128 %Y, 2	; <i128> [#uses=1]
+	%tmp.8 = sub i128 %tmp.4, %tmp.12	; <i128> [#uses=1]
+	ret i128 %tmp.8
+}
+
+define i39 @test19(i39 %X, i39 %Y) {
+	%Z = sub i39 %X, %Y		; <i39> [#uses=1]
+	%Q = add i39 %Z, %Y		; <i39> [#uses=1]
+	ret i39 %Q
+}
+
+define i1 @test20(i33 %g, i33 %h) {
+	%tmp.2 = sub i33 %g, %h		; <i33> [#uses=1]
+	%tmp.4 = icmp ne i33 %tmp.2, %g		; <i1> [#uses=1]
+	ret i1 %tmp.4
+}
+
+define i1 @test21(i256 %g, i256 %h) {
+	%tmp.2 = sub i256 %g, %h	; <i256> [#uses=1]
+	%tmp.4 = icmp ne i256 %tmp.2, %g; <i1> [#uses=1]
+	ret i1 %tmp.4
+}
diff --git a/final/test/Transforms/InstCombine/apint-xor1.ll b/final/test/Transforms/InstCombine/apint-xor1.ll
new file mode 100644
index 00000000000..849c6598332
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-xor1.ll
@@ -0,0 +1,50 @@
+; This test makes sure that xor instructions are properly eliminated.
+; This test is for Integer BitWidth <= 64 && BitWidth % 8 != 0.
+
+; RUN: opt < %s -instcombine -S | not grep {xor }
+
+
+define i47 @test1(i47 %A, i47 %B) {
+        ;; (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+        %A1 = and i47 %A, 70368744177664
+        %B1 = and i47 %B, 70368744177661
+        %C1 = xor i47 %A1, %B1
+        ret i47 %C1
+}
+
+define i15 @test2(i15 %x) {
+        %tmp.2 = xor i15 %x, 0
+        ret i15 %tmp.2
+}
+
+define i23 @test3(i23 %x) {
+        %tmp.2 = xor i23 %x, %x
+        ret i23 %tmp.2
+}
+
+define i37 @test4(i37 %x) {
+        ; x ^ ~x == -1
+        %NotX = xor i37 -1, %x
+        %B = xor i37 %x, %NotX
+        ret i37 %B
+}
+
+define i7 @test5(i7 %A) {
+        ;; (A|B)^B == A & (~B)
+        %t1 = or i7 %A, 23
+        %r = xor i7 %t1, 23
+        ret i7 %r
+}
+
+define i7 @test6(i7 %A) {
+        %t1 = xor i7 %A, 23
+        %r = xor i7 %t1, 23
+        ret i7 %r
+}
+
+define i47 @test7(i47 %A) {
+        ;; (A | C1) ^ C2 -> (A | C1) & ~C2 iff (C1&C2) == C2
+        %B1 = or i47 %A,   70368744177663
+        %C1 = xor i47 %B1, 703687463
+        ret i47 %C1
+}
diff --git a/final/test/Transforms/InstCombine/apint-xor2.ll b/final/test/Transforms/InstCombine/apint-xor2.ll
new file mode 100644
index 00000000000..cacc17958eb
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-xor2.ll
@@ -0,0 +1,51 @@
+; This test makes sure that xor instructions are properly eliminated.
+; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
+
+; RUN: opt < %s -instcombine -S | not grep {xor }
+; END.
+
+
+define i447 @test1(i447 %A, i447 %B) {
+        ;; (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
+        %A1 = and i447 %A, 70368744177664
+        %B1 = and i447 %B, 70368744177663
+        %C1 = xor i447 %A1, %B1
+        ret i447 %C1
+}
+
+define i1005 @test2(i1005 %x) {
+        %tmp.2 = xor i1005 %x, 0
+        ret i1005 %tmp.2
+}
+
+define i123 @test3(i123 %x) {
+        %tmp.2 = xor i123 %x, %x
+        ret i123 %tmp.2
+}
+
+define i737 @test4(i737 %x) {
+        ; x ^ ~x == -1
+        %NotX = xor i737 -1, %x
+        %B = xor i737 %x, %NotX
+        ret i737 %B
+}
+
+define i700 @test5(i700 %A) {
+        ;; (A|B)^B == A & (~B)
+        %t1 = or i700 %A, 288230376151711743 
+        %r = xor i700 %t1, 288230376151711743 
+        ret i700 %r
+}
+
+define i77 @test6(i77 %A) {
+        %t1 = xor i77 %A, 23
+        %r = xor i77 %t1, 23
+        ret i77 %r
+}
+
+define i1023 @test7(i1023 %A) {
+        ;; (A | C1) ^ C2 -> (A | C1) & ~C2 iff (C1&C2) == C2
+        %B1 = or i1023 %A,   70368744177663
+        %C1 = xor i1023 %B1, 703687463
+        ret i1023 %C1
+}
diff --git a/final/test/Transforms/InstCombine/apint-zext1.ll b/final/test/Transforms/InstCombine/apint-zext1.ll
new file mode 100644
index 00000000000..40de360bb69
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-zext1.ll
@@ -0,0 +1,11 @@
+; Tests to make sure elimination of casts is working correctly
+; This test is for Integer BitWidth <= 64 && BitWidth % 2 != 0.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i47 @test_sext_zext(i11 %A) {
+    %c1 = zext i11 %A to i39
+    %c2 = sext i39 %c1 to i47
+    ret i47 %c2
+; CHECK: %c2 = zext i11 %A to i47
+; CHECK: ret i47 %c2
+}
diff --git a/final/test/Transforms/InstCombine/apint-zext2.ll b/final/test/Transforms/InstCombine/apint-zext2.ll
new file mode 100644
index 00000000000..886dcf2826c
--- /dev/null
+++ b/final/test/Transforms/InstCombine/apint-zext2.ll
@@ -0,0 +1,11 @@
+; Tests to make sure elimination of casts is working correctly
+; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1024 @test_sext_zext(i77 %A) {
+    %c1 = zext i77 %A to i533
+    %c2 = sext i533 %c1 to i1024
+    ret i1024 %c2
+; CHECK: %c2 = zext i77 %A to i1024
+; CHECK: ret i1024 %c2
+}
diff --git a/final/test/Transforms/InstCombine/ashr-nop.ll b/final/test/Transforms/InstCombine/ashr-nop.ll
new file mode 100644
index 00000000000..870ede38cd8
--- /dev/null
+++ b/final/test/Transforms/InstCombine/ashr-nop.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | not grep ashr
+
+define i32 @foo(i32 %x) {
+  %o = and i32 %x, 1
+  %n = add i32 %o, -1
+  %t = ashr i32 %n, 17
+  ret i32 %t
+}
diff --git a/final/test/Transforms/InstCombine/badmalloc.ll b/final/test/Transforms/InstCombine/badmalloc.ll
new file mode 100644
index 00000000000..f5a623dc77c
--- /dev/null
+++ b/final/test/Transforms/InstCombine/badmalloc.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+declare noalias i8* @malloc(i64) nounwind
+declare void @free(i8*)
+
+; PR5130
+define i1 @test1() {
+  %A = call noalias i8* @malloc(i64 4) nounwind
+  %B = icmp eq i8* %A, null
+  store i8 0, i8* %A
+
+  call void @free(i8* %A)
+  ret i1 %B
+
+; CHECK: @test1
+; CHECK: ret i1 %B
+}
diff --git a/final/test/Transforms/InstCombine/binop-cast.ll b/final/test/Transforms/InstCombine/binop-cast.ll
new file mode 100644
index 00000000000..3dbca7ef148
--- /dev/null
+++ b/final/test/Transforms/InstCombine/binop-cast.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @testAdd(i32 %X, i32 %Y) {
+	%tmp = add i32 %X, %Y
+; CHECK: %tmp = add i32 %X, %Y
+	%tmp.l = bitcast i32 %tmp to i32
+	ret i32 %tmp.l
+; CHECK: ret i32 %tmp
+}
diff --git a/final/test/Transforms/InstCombine/bit-checks.ll b/final/test/Transforms/InstCombine/bit-checks.ll
new file mode 100644
index 00000000000..79a096ff0f1
--- /dev/null
+++ b/final/test/Transforms/InstCombine/bit-checks.ll
@@ -0,0 +1,372 @@
+; This test makes sure that these instructions are properly eliminated.
+;
+; RUN: opt < %s -instcombine -S | \
+; RUN:    not grep {tobool}
+; END.
+define i32 @main(i32 %argc, i8** %argv) nounwind ssp {
+entry:
+  %and = and i32 %argc, 1                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 0                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 2                        ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 0                 ; <i1> [#uses=1]
+  %or.cond = and i1 %tobool, %tobool3             ; <i1> [#uses=1]
+  %retval.0 = select i1 %or.cond, i32 2, i32 1    ; <i32> [#uses=1]
+  ret i32 %retval.0
+}
+
+define i32 @main2(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 1                         ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 0                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 2                        ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 0                 ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; tests to check combining (icmp eq (A & B), C) & (icmp eq (A & D), E)
+; tests to check if (icmp eq (A & B), 0) is treated like (icmp eq (A & B), B)
+; if B is a single bit constant
+
+; (icmp eq (A & B), 0) & (icmp eq (A & D), 0) -> (icmp eq (A & (B|D)), 0)
+define i32 @main3(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 0                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 48                       ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 0                 ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main3b(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 0                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 16                       ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 16                 ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main3e_like(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, %argc2                    ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 0                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %argc3                   ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 0                 ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (icmp ne (A & B), 0) | (icmp ne (A & D), 0) -> (icmp ne (A & (B|D)), 0)
+define i32 @main3c(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 0                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 48                       ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 0                 ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main3d(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 0                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 16                       ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 16                ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main3f_like(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, %argc2                    ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 0                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %argc3                   ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 0                 ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (icmp eq (A & B), B) & (icmp eq (A & D), D) -> (icmp eq (A & (B|D)), (B|D))
+define i32 @main4(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 7                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 48                       ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 48                ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main4b(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 7                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 16                       ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 0                 ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main4e_like(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, %argc2                    ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, %argc2              ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %argc3                   ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, %argc3            ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (icmp ne (A & B), B) | (icmp ne (A & D), D) -> (icmp ne (A & (B|D)), (B|D))
+define i32 @main4c(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 7                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 48                       ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 48                ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main4d(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 7                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 16                       ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 0                 ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main4f_like(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, %argc2                    ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, %argc2              ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %argc3                   ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, %argc3            ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (icmp eq (A & B), A) & (icmp eq (A & D), A) -> (icmp eq (A & (B&D)), A)
+define i32 @main5_like(i32 %argc, i32 %argc2, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 7                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc2, 7                       ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 7                 ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main5e_like(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, %argc2                    ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, %argc               ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %argc3                   ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, %argc             ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (icmp ne (A & B), A) | (icmp ne (A & D), A) -> (icmp ne (A & (B&D)), A)
+define i32 @main5c_like(i32 %argc, i32 %argc2, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 7                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc2, 7                       ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 7                 ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main5f_like(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, %argc2                    ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, %argc               ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %argc3                   ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, %argc             ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (icmp eq (A & B), C) & (icmp eq (A & D), E) -> (icmp eq (A & (B|D)), (C|E))
+; if B, C, D, E are constant, and it's possible
+define i32 @main6(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 3                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 48                       ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 16                ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main6b(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and, 3                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 16                       ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 0                 ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (icmp ne (A & B), C) | (icmp ne (A & D), E) -> (icmp ne (A & (B|D)), (C|E))
+; if B, C, D, E are constant, and it's possible
+define i32 @main6c(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 3                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 48                       ; <i32> [#uses=1]
+  %tobool3 = icmp ne i32 %and2, 16                ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+define i32 @main6d(i32 %argc, i8** nocapture %argv) nounwind readnone ssp {
+entry:
+  %and = and i32 %argc, 7                         ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %and, 3                   ; <i1> [#uses=1]
+  %and2 = and i32 %argc, 16                       ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, 0                 ; <i1> [#uses=1]
+  %or.cond = or i1 %tobool, %tobool3              ; <i1> [#uses=1]
+  %storemerge = select i1 %or.cond, i32 0, i32 1  ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; test parameter permutations
+; (B & A) == B & (D & A) == D
+define i32 @main7a(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and1 = and i32 %argc2, %argc                   ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and1, %argc2              ; <i1> [#uses=1]
+  %and2 = and i32 %argc3, %argc                   ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, %argc3            ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; B == (A & B) & D == (A & D)
+define i32 @main7b(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and1 = and i32 %argc, %argc2                   ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %argc2, %and1             ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %argc3                   ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %argc3, %and2            ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; B == (B & A) & D == (D & A)
+define i32 @main7c(i32 %argc, i32 %argc2, i32 %argc3, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %and1 = and i32 %argc2, %argc                   ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %argc2, %and1             ; <i1> [#uses=1]
+  %and2 = and i32 %argc3, %argc                   ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %argc3, %and2            ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (A & (B & C)) == (B & C) & (A & (D & E)) == (D & E)
+define i32 @main7d(i32 %argc, i32 %argc2, i32 %argc3,
+                   i32 %argc4, i32 %argc5, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %bc = and i32 %argc2, %argc4                    ; <i32> [#uses=1]
+  %de = and i32 %argc3, %argc5                    ; <i32> [#uses=1]
+  %and1 = and i32 %argc, %bc                      ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and1, %bc                ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %de                      ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, %de               ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; ((B & C) & A) == (B & C) & ((D & E) & A) == (D & E)
+define i32 @main7e(i32 %argc, i32 %argc2, i32 %argc3,
+                   i32 %argc4, i32 %argc5, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %bc = and i32 %argc2, %argc4                    ; <i32> [#uses=1]
+  %de = and i32 %argc3, %argc5                    ; <i32> [#uses=1]
+  %and1 = and i32 %bc, %argc                      ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %and1, %bc                ; <i1> [#uses=1]
+  %and2 = and i32 %de, %argc                      ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %and2, %de               ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (B & C) == (A & (B & C)) & (D & E) == (A & (D & E))
+define i32 @main7f(i32 %argc, i32 %argc2, i32 %argc3,
+                   i32 %argc4, i32 %argc5, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %bc = and i32 %argc2, %argc4                    ; <i32> [#uses=1]
+  %de = and i32 %argc3, %argc5                    ; <i32> [#uses=1]
+  %and1 = and i32 %argc, %bc                      ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %bc, %and1                ; <i1> [#uses=1]
+  %and2 = and i32 %argc, %de                      ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %de, %and2               ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
+
+; (B & C) == ((B & C) & A) & (D & E) == ((D & E) & A)
+define i32 @main7g(i32 %argc, i32 %argc2, i32 %argc3,
+                   i32 %argc4, i32 %argc5, i8** nocapture %argv)
+           nounwind readnone ssp {
+entry:
+  %bc = and i32 %argc2, %argc4                    ; <i32> [#uses=1]
+  %de = and i32 %argc3, %argc5                    ; <i32> [#uses=1]
+  %and1 = and i32 %bc, %argc                      ; <i32> [#uses=1]
+  %tobool = icmp eq i32 %bc, %and1                ; <i1> [#uses=1]
+  %and2 = and i32 %de, %argc                      ; <i32> [#uses=1]
+  %tobool3 = icmp eq i32 %de, %and2               ; <i1> [#uses=1]
+  %and.cond = and i1 %tobool, %tobool3            ; <i1> [#uses=1]
+  %storemerge = select i1 %and.cond, i32 0, i32 1 ; <i32> [#uses=1]
+  ret i32 %storemerge
+}
diff --git a/final/test/Transforms/InstCombine/bit-tracking.ll b/final/test/Transforms/InstCombine/bit-tracking.ll
new file mode 100644
index 00000000000..51bbc088883
--- /dev/null
+++ b/final/test/Transforms/InstCombine/bit-tracking.ll
@@ -0,0 +1,26 @@
+; This file contains various testcases that require tracking whether bits are
+; set or cleared by various instructions.
+; RUN: opt < %s -instcombine -instcombine -S |\
+; RUN:   not grep %ELIM
+
+; Reduce down to a single XOR
+define i32 @test3(i32 %B) {
+        %ELIMinc = and i32 %B, 1                ; <i32> [#uses=1]
+        %tmp.5 = xor i32 %ELIMinc, 1            ; <i32> [#uses=1]
+        %ELIM7 = and i32 %B, -2         ; <i32> [#uses=1]
+        %tmp.8 = or i32 %tmp.5, %ELIM7          ; <i32> [#uses=1]
+        ret i32 %tmp.8
+}
+
+; Finally, a bigger case where we chain things together.  This corresponds to
+; incrementing a single-bit bitfield, which should become just an xor.
+define i32 @test4(i32 %B) {
+        %ELIM3 = shl i32 %B, 31         ; <i32> [#uses=1]
+        %ELIM4 = ashr i32 %ELIM3, 31            ; <i32> [#uses=1]
+        %inc = add i32 %ELIM4, 1                ; <i32> [#uses=1]
+        %ELIM5 = and i32 %inc, 1                ; <i32> [#uses=1]
+        %ELIM7 = and i32 %B, -2         ; <i32> [#uses=1]
+        %tmp.8 = or i32 %ELIM5, %ELIM7          ; <i32> [#uses=1]
+        ret i32 %tmp.8
+}
+
diff --git a/final/test/Transforms/InstCombine/bitcast-sext-vector.ll b/final/test/Transforms/InstCombine/bitcast-sext-vector.ll
new file mode 100644
index 00000000000..d70bdbaf372
--- /dev/null
+++ b/final/test/Transforms/InstCombine/bitcast-sext-vector.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK: sext
+; Don't fold zero/sign extensions with a bitcast between a vector and scalar.
+
+define i32 @t(<4 x i8> %src1, <4 x i8> %src2) nounwind readonly {
+entry:
+	%cmp = icmp eq <4 x i8> %src1, %src2; <<4 x i1>> [#uses=1]
+	%sext = sext <4 x i1> %cmp to <4 x i8>
+	%val = bitcast <4 x i8> %sext to i32
+	ret i32 %val
+}
diff --git a/final/test/Transforms/InstCombine/bitcast-store.ll b/final/test/Transforms/InstCombine/bitcast-store.ll
new file mode 100644
index 00000000000..e4a61e98e4f
--- /dev/null
+++ b/final/test/Transforms/InstCombine/bitcast-store.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; Instcombine should preserve metadata and alignment while
+; folding a bitcast into a store.
+
+; CHECK: store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 16, !tag !0
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.A = type { i32 (...)** }
+
+@G = external constant [5 x i8*]
+
+define void @foo(%struct.A* %a) nounwind {
+entry:
+  %0 = bitcast %struct.A* %a to i8***
+  store i8** getelementptr inbounds ([5 x i8*]* @G, i64 0, i64 2), i8*** %0, align 16, !tag !0
+  ret void
+}
+
+!0 = metadata !{metadata !"hello"}
diff --git a/final/test/Transforms/InstCombine/bitcast-vec-canon.ll b/final/test/Transforms/InstCombine/bitcast-vec-canon.ll
new file mode 100644
index 00000000000..d27765e8942
--- /dev/null
+++ b/final/test/Transforms/InstCombine/bitcast-vec-canon.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -S | grep element | count 4
+
+define double @a(<1 x i64> %y) {
+  %c = bitcast <1 x i64> %y to double
+  ret double %c 
+}
+
+define i64 @b(<1 x i64> %y) {
+  %c = bitcast <1 x i64> %y to i64
+  ret i64 %c 
+}
+
+define <1 x i64> @c(double %y) {
+  %c = bitcast double %y to <1 x i64>
+  ret <1 x i64> %c
+}
+
+define <1 x i64> @d(i64 %y) {
+  %c = bitcast i64 %y to <1 x i64>
+  ret <1 x i64> %c
+}
+
diff --git a/final/test/Transforms/InstCombine/bitcast-vec-uniform.ll b/final/test/Transforms/InstCombine/bitcast-vec-uniform.ll
new file mode 100644
index 00000000000..5975f1ec396
--- /dev/null
+++ b/final/test/Transforms/InstCombine/bitcast-vec-uniform.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: @a
+; CHECK-NOT: bitcast
+; CHECK: ret
+define <4 x i32> @a(<1 x i64> %y) {
+  %c = bitcast <2 x i64> <i64 0, i64 0> to <4 x i32>
+  ret <4 x i32> %c
+}
+
+; CHECK: @b
+; CHECK-NOT: bitcast
+; CHECK: ret
+
+define <4 x i32> @b(<1 x i64> %y) {
+  %c = bitcast <2 x i64> <i64 -1, i64 -1> to <4 x i32>
+  ret <4 x i32> %c
+}
+
+; CHECK: @foo
+; CHECK-NOT: bitcast
+; CHECK: ret
+
+; from MultiSource/Benchmarks/Bullet
+define <2 x float> @foo() {
+  %cast = bitcast i64 -1 to <2 x float>
+  ret <2 x float> %cast
+}
+
+
+; CHECK: @foo2
+; CHECK-NOT: bitcast
+; CHECK: ret
+define <2 x double> @foo2() {
+  %cast = bitcast i128 -1 to <2 x double>
+  ret <2 x double> %cast
+}
+
+; CHECK: @foo3
+; CHECK-NOT: bitcast
+; CHECK: ret
+define <1 x float> @foo3() {
+  %cast = bitcast i32 -1 to <1 x float>
+  ret <1 x float> %cast
+}
+
+; CHECK: @foo4
+; CHECK-NOT: bitcast
+; CHECK: ret
+define float @foo4() {
+  %cast = bitcast <1 x i32 ><i32 -1> to float
+  ret float %cast
+}
+
+; CHECK: @foo5
+; CHECK-NOT: bitcast
+; CHECK: ret
+define double @foo5() {
+  %cast = bitcast <2 x i32 ><i32 -1, i32 -1> to double
+  ret double %cast
+}
+
+
+; CHECK: @foo6
+; CHECK-NOT: bitcast
+; CHECK: ret
+define <2 x double> @foo6() {
+  %cast = bitcast <4 x i32><i32 -1, i32 -1, i32 -1, i32 -1> to <2 x double>
+  ret <2 x double> %cast
+}
diff --git a/final/test/Transforms/InstCombine/bitcast-vector-fold.ll b/final/test/Transforms/InstCombine/bitcast-vector-fold.ll
new file mode 100644
index 00000000000..8feec229171
--- /dev/null
+++ b/final/test/Transforms/InstCombine/bitcast-vector-fold.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -instcombine -S | not grep bitcast
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define <2 x i64> @test1() {
+	%tmp3 = bitcast <4 x i32> < i32 0, i32 1, i32 2, i32 3 > to <2 x i64>
+	ret <2 x i64> %tmp3
+}
+
+define <4 x i32> @test2() {
+	%tmp3 = bitcast <2 x i64> < i64 0, i64 1 > to <4 x i32>
+	ret <4 x i32> %tmp3
+}
+
+define <2 x double> @test3() {
+	%tmp3 = bitcast <4 x i32> < i32 0, i32 1, i32 2, i32 3 > to <2 x double>
+	ret <2 x double> %tmp3
+}
+
+define <4 x float> @test4() {
+	%tmp3 = bitcast <2 x i64> < i64 0, i64 1 > to <4 x float>
+	ret <4 x float> %tmp3
+}
+
+define <2 x i64> @test5() {
+	%tmp3 = bitcast <4 x float> <float 0.0, float 1.0, float 2.0, float 3.0> to <2 x i64>
+	ret <2 x i64> %tmp3
+}
+
+define <4 x i32> @test6() {
+	%tmp3 = bitcast <2 x double> <double 0.5, double 1.0> to <4 x i32>
+	ret <4 x i32> %tmp3
+}
diff --git a/final/test/Transforms/InstCombine/bitcast.ll b/final/test/Transforms/InstCombine/bitcast.ll
new file mode 100644
index 00000000000..0718b8a3aee
--- /dev/null
+++ b/final/test/Transforms/InstCombine/bitcast.ll
@@ -0,0 +1,105 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Bitcasts between vectors and scalars are valid.
+; PR4487
+define i32 @test1(i64 %a) {
+        %t1 = bitcast i64 %a to <2 x i32>
+        %t2 = bitcast i64 %a to <2 x i32>
+        %t3 = xor <2 x i32> %t1, %t2
+        %t4 = extractelement <2 x i32> %t3, i32 0
+        ret i32 %t4
+        
+; CHECK: @test1
+; CHECK: ret i32 0
+}
+
+; Optimize bitcasts that are extracting low element of vector.  This happens
+; because of SRoA.
+; rdar://7892780
+define float @test2(<2 x float> %A, <2 x i32> %B) {
+  %tmp28 = bitcast <2 x float> %A to i64  ; <i64> [#uses=2]
+  %tmp23 = trunc i64 %tmp28 to i32                ; <i32> [#uses=1]
+  %tmp24 = bitcast i32 %tmp23 to float            ; <float> [#uses=1]
+
+  %tmp = bitcast <2 x i32> %B to i64
+  %tmp2 = trunc i64 %tmp to i32                ; <i32> [#uses=1]
+  %tmp4 = bitcast i32 %tmp2 to float            ; <float> [#uses=1]
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+  
+; CHECK: @test2
+; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 0
+; CHECK-NEXT:  bitcast <2 x i32> %B to <2 x float>
+; CHECK-NEXT:  %tmp4 = extractelement <2 x float> {{.*}}, i32 0
+; CHECK-NEXT:  %add = fadd float %tmp24, %tmp4
+; CHECK-NEXT:  ret float %add
+}
+
+; Optimize bitcasts that are extracting other elements of a vector.  This
+; happens because of SRoA.
+; rdar://7892780
+define float @test3(<2 x float> %A, <2 x i64> %B) {
+  %tmp28 = bitcast <2 x float> %A to i64
+  %tmp29 = lshr i64 %tmp28, 32
+  %tmp23 = trunc i64 %tmp29 to i32
+  %tmp24 = bitcast i32 %tmp23 to float
+
+  %tmp = bitcast <2 x i64> %B to i128
+  %tmp1 = lshr i128 %tmp, 64
+  %tmp2 = trunc i128 %tmp1 to i32
+  %tmp4 = bitcast i32 %tmp2 to float
+
+  %add = fadd float %tmp24, %tmp4
+  ret float %add
+  
+; CHECK: @test3
+; CHECK-NEXT:  %tmp24 = extractelement <2 x float> %A, i32 1
+; CHECK-NEXT:  bitcast <2 x i64> %B to <4 x float>
+; CHECK-NEXT:  %tmp4 = extractelement <4 x float> {{.*}}, i32 2
+; CHECK-NEXT:  %add = fadd float %tmp24, %tmp4
+; CHECK-NEXT:  ret float %add
+}
+
+
+define <2 x i32> @test4(i32 %A, i32 %B){
+  %tmp38 = zext i32 %A to i64
+  %tmp32 = zext i32 %B to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x i32>
+  ret <2 x i32> %tmp43
+  ; CHECK: @test4
+  ; CHECK-NEXT: insertelement <2 x i32> undef, i32 %A, i32 0
+  ; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %B, i32 1
+  ; CHECK-NEXT: ret <2 x i32> 
+
+}
+
+; rdar://8360454
+define <2 x float> @test5(float %A, float %B) {
+  %tmp37 = bitcast float %A to i32
+  %tmp38 = zext i32 %tmp37 to i64
+  %tmp31 = bitcast float %B to i32
+  %tmp32 = zext i32 %tmp31 to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x float>
+  ret <2 x float> %tmp43
+  ; CHECK: @test5
+  ; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0
+  ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %B, i32 1
+  ; CHECK-NEXT: ret <2 x float> 
+}
+
+define <2 x float> @test6(float %A){
+  %tmp23 = bitcast float %A to i32              ; <i32> [#uses=1]
+  %tmp24 = zext i32 %tmp23 to i64                 ; <i64> [#uses=1]
+  %tmp25 = shl i64 %tmp24, 32                     ; <i64> [#uses=1]
+  %mask20 = or i64 %tmp25, 1109917696             ; <i64> [#uses=1]
+  %tmp35 = bitcast i64 %mask20 to <2 x float>     ; <<2 x float>> [#uses=1]
+  ret <2 x float> %tmp35
+; CHECK: @test6
+; CHECK-NEXT: insertelement <2 x float> <float 4.200000e+01, float undef>, float %A, i32 1
+; CHECK: ret
+}
diff --git a/final/test/Transforms/InstCombine/bitcount.ll b/final/test/Transforms/InstCombine/bitcount.ll
new file mode 100644
index 00000000000..f75ca2df69d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/bitcount.ll
@@ -0,0 +1,19 @@
+; Tests to make sure bit counts of constants are folded
+; RUN: opt < %s -instcombine -S | grep {ret i32 19}
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep -v declare | not grep llvm.ct
+
+declare i31 @llvm.ctpop.i31(i31 %val) 
+declare i32 @llvm.cttz.i32(i32 %val) 
+declare i33 @llvm.ctlz.i33(i33 %val) 
+
+define i32 @test(i32 %A) {
+  %c1 = call i31 @llvm.ctpop.i31(i31 12415124)
+  %c2 = call i32 @llvm.cttz.i32(i32 87359874)
+  %c3 = call i33 @llvm.ctlz.i33(i33 87359874)
+  %t1 = zext i31 %c1 to i32
+  %t3 = trunc i33 %c3 to i32
+  %r1 = add i32 %t1, %c2
+  %r2 = add i32 %r1, %t3
+  ret i32 %r2
+}
diff --git a/final/test/Transforms/InstCombine/bittest.ll b/final/test/Transforms/InstCombine/bittest.ll
new file mode 100644
index 00000000000..92863d59470
--- /dev/null
+++ b/final/test/Transforms/InstCombine/bittest.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -instcombine -simplifycfg -S |\
+; RUN:    not grep {call void @abort}
+
+@b_rec.0 = external global i32          ; <i32*> [#uses=2]
+
+define void @_Z12h000007_testv(i32* %P) {
+entry:
+        %tmp.2 = load i32* @b_rec.0             ; <i32> [#uses=1]
+        %tmp.9 = or i32 %tmp.2, -989855744              ; <i32> [#uses=2]
+        %tmp.16 = and i32 %tmp.9, -805306369            ; <i32> [#uses=2]
+        %tmp.17 = and i32 %tmp.9, -973078529            ; <i32> [#uses=1]
+        store i32 %tmp.17, i32* @b_rec.0
+        %tmp.17.shrunk = bitcast i32 %tmp.16 to i32             ; <i32> [#uses=1]
+        %tmp.22 = and i32 %tmp.17.shrunk, -1073741824           ; <i32> [#uses=1]
+        %tmp.23 = icmp eq i32 %tmp.22, -1073741824              ; <i1> [#uses=1]
+        br i1 %tmp.23, label %endif.0, label %then.0
+
+then.0:         ; preds = %entry
+        tail call void @abort( )
+        unreachable
+
+endif.0:                ; preds = %entry
+        %tmp.17.shrunk2 = bitcast i32 %tmp.16 to i32            ; <i32> [#uses=1]
+        %tmp.27.mask = and i32 %tmp.17.shrunk2, 100663295               ; <i32> [#uses=1]
+        store i32 %tmp.27.mask, i32* %P
+        ret void
+}
+
+declare void @abort()
+
diff --git a/final/test/Transforms/InstCombine/bswap-fold.ll b/final/test/Transforms/InstCombine/bswap-fold.ll
new file mode 100644
index 00000000000..a6b30c053eb
--- /dev/null
+++ b/final/test/Transforms/InstCombine/bswap-fold.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -instcombine -S | not grep call.*bswap
+
+define i1 @test1(i16 %tmp2) {
+        %tmp10 = call i16 @llvm.bswap.i16( i16 %tmp2 )
+        %tmp = icmp eq i16 %tmp10, 1
+        ret i1 %tmp
+}
+
+define i1 @test2(i32 %tmp) {
+        %tmp34 = tail call i32 @llvm.bswap.i32( i32 %tmp )
+        %tmp.upgrd.1 = icmp eq i32 %tmp34, 1
+        ret i1 %tmp.upgrd.1
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define i1 @test3(i64 %tmp) {
+        %tmp34 = tail call i64 @llvm.bswap.i64( i64 %tmp )
+        %tmp.upgrd.2 = icmp eq i64 %tmp34, 1
+        ret i1 %tmp.upgrd.2
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+declare i16 @llvm.bswap.i16(i16)
+
+; rdar://5992453
+; A & 255
+define i32 @test4(i32 %a) nounwind  {
+entry:
+	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )	
+	%tmp4 = lshr i32 %tmp2, 24
+	ret i32 %tmp4
+}
+
+; A
+define i32 @test5(i32 %a) nounwind  {
+entry:
+	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )
+	%tmp4 = tail call i32 @llvm.bswap.i32( i32 %tmp2 )
+	ret i32 %tmp4
+}
+
+; a >> 24
+define i32 @test6(i32 %a) nounwind  {
+entry:
+	%tmp2 = tail call i32 @llvm.bswap.i32( i32 %a )	
+	%tmp4 = and i32 %tmp2, 255
+	ret i32 %tmp4
+}
+
+; PR5284
+declare i64 @llvm.bswap.i64(i64)
+declare i32 @llvm.bswap.i32(i32)
+declare i16 @llvm.bswap.i16(i16)
+
+define i16 @test7(i32 %A) {
+  %B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind 
+  %C = trunc i32 %B to i16
+  %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
+  ret i16 %D
+}
+
+define i16 @test8(i64 %A) {
+  %B = tail call i64 @llvm.bswap.i64(i64 %A) nounwind 
+  %C = trunc i64 %B to i16
+  %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
+  ret i16 %D
+}
+
+; Misc: Fold bswap(undef) to undef.
+define i64 @foo() {
+  %a = call i64 @llvm.bswap.i64(i64 undef)
+  ret i64 %a
+}
diff --git a/final/test/Transforms/InstCombine/bswap.ll b/final/test/Transforms/InstCombine/bswap.ll
new file mode 100644
index 00000000000..168b3e83330
--- /dev/null
+++ b/final/test/Transforms/InstCombine/bswap.ll
@@ -0,0 +1,74 @@
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+; RUN: opt < %s -instcombine -S | \
+; RUN:    grep {call.*llvm.bswap} | count 6
+
+define i32 @test1(i32 %i) {
+	%tmp1 = lshr i32 %i, 24		; <i32> [#uses=1]
+	%tmp3 = lshr i32 %i, 8		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp3, 65280		; <i32> [#uses=1]
+	%tmp5 = or i32 %tmp1, %tmp4		; <i32> [#uses=1]
+	%tmp7 = shl i32 %i, 8		; <i32> [#uses=1]
+	%tmp8 = and i32 %tmp7, 16711680		; <i32> [#uses=1]
+	%tmp9 = or i32 %tmp5, %tmp8		; <i32> [#uses=1]
+	%tmp11 = shl i32 %i, 24		; <i32> [#uses=1]
+	%tmp12 = or i32 %tmp9, %tmp11		; <i32> [#uses=1]
+	ret i32 %tmp12
+}
+
+define i32 @test2(i32 %arg) {
+	%tmp2 = shl i32 %arg, 24		; <i32> [#uses=1]
+	%tmp4 = shl i32 %arg, 8		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp4, 16711680		; <i32> [#uses=1]
+	%tmp6 = or i32 %tmp2, %tmp5		; <i32> [#uses=1]
+	%tmp8 = lshr i32 %arg, 8		; <i32> [#uses=1]
+	%tmp9 = and i32 %tmp8, 65280		; <i32> [#uses=1]
+	%tmp10 = or i32 %tmp6, %tmp9		; <i32> [#uses=1]
+	%tmp12 = lshr i32 %arg, 24		; <i32> [#uses=1]
+	%tmp14 = or i32 %tmp10, %tmp12		; <i32> [#uses=1]
+	ret i32 %tmp14
+}
+
+define i16 @test3(i16 %s) {
+	%tmp2 = lshr i16 %s, 8		; <i16> [#uses=1]
+	%tmp4 = shl i16 %s, 8		; <i16> [#uses=1]
+	%tmp5 = or i16 %tmp2, %tmp4		; <i16> [#uses=1]
+	ret i16 %tmp5
+}
+
+define i16 @test4(i16 %s) {
+	%tmp2 = lshr i16 %s, 8		; <i16> [#uses=1]
+	%tmp4 = shl i16 %s, 8		; <i16> [#uses=1]
+	%tmp5 = or i16 %tmp4, %tmp2		; <i16> [#uses=1]
+	ret i16 %tmp5
+}
+
+define i16 @test5(i16 %a) {
+	%tmp = zext i16 %a to i32		; <i32> [#uses=2]
+	%tmp1 = and i32 %tmp, 65280		; <i32> [#uses=1]
+	%tmp2 = ashr i32 %tmp1, 8		; <i32> [#uses=1]
+	%tmp2.upgrd.1 = trunc i32 %tmp2 to i16		; <i16> [#uses=1]
+	%tmp4 = and i32 %tmp, 255		; <i32> [#uses=1]
+	%tmp5 = shl i32 %tmp4, 8		; <i32> [#uses=1]
+	%tmp5.upgrd.2 = trunc i32 %tmp5 to i16		; <i16> [#uses=1]
+	%tmp.upgrd.3 = or i16 %tmp2.upgrd.1, %tmp5.upgrd.2		; <i16> [#uses=1]
+	%tmp6 = bitcast i16 %tmp.upgrd.3 to i16		; <i16> [#uses=1]
+	%tmp6.upgrd.4 = zext i16 %tmp6 to i32		; <i32> [#uses=1]
+	%retval = trunc i32 %tmp6.upgrd.4 to i16		; <i16> [#uses=1]
+	ret i16 %retval
+}
+
+; PR2842
+define i32 @test6(i32 %x) nounwind readnone {
+	%tmp = shl i32 %x, 16		; <i32> [#uses=1]
+	%x.mask = and i32 %x, 65280		; <i32> [#uses=1]
+	%tmp1 = lshr i32 %x, 16		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 255		; <i32> [#uses=1]
+	%tmp3 = or i32 %x.mask, %tmp		; <i32> [#uses=1]
+	%tmp4 = or i32 %tmp3, %tmp2		; <i32> [#uses=1]
+	%tmp5 = shl i32 %tmp4, 8		; <i32> [#uses=1]
+	%tmp6 = lshr i32 %x, 24		; <i32> [#uses=1]
+	%tmp7 = or i32 %tmp5, %tmp6		; <i32> [#uses=1]
+	ret i32 %tmp7
+}
+
diff --git a/final/test/Transforms/InstCombine/call-cast-target.ll b/final/test/Transforms/InstCombine/call-cast-target.ll
new file mode 100644
index 00000000000..7addc8abc84
--- /dev/null
+++ b/final/test/Transforms/InstCombine/call-cast-target.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep call | not grep bitcast
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define i32 @main() {
+entry:
+        %tmp = call i32 bitcast (i8* (i32*)* @ctime to i32 (i32*)*)( i32* null )          ; <i32> [#uses=1]
+        ret i32 %tmp
+}
+
+declare i8* @ctime(i32*)
+
diff --git a/final/test/Transforms/InstCombine/call-intrinsics.ll b/final/test/Transforms/InstCombine/call-intrinsics.ll
new file mode 100644
index 00000000000..f9d10805806
--- /dev/null
+++ b/final/test/Transforms/InstCombine/call-intrinsics.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine | llvm-dis
+
+@X = global i8 0                ; <i8*> [#uses=3]
+@Y = global i8 12               ; <i8*> [#uses=2]
+
+declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32)
+
+define void @zero_byte_test() {
+        ; These process zero bytes, so they are a noop.
+        call void @llvm.memmove.i32( i8* @X, i8* @Y, i32 0, i32 100 )
+        call void @llvm.memcpy.i32( i8* @X, i8* @Y, i32 0, i32 100 )
+        call void @llvm.memset.i32( i8* @X, i8 123, i32 0, i32 100 )
+        ret void
+}
+
diff --git a/final/test/Transforms/InstCombine/call.ll b/final/test/Transforms/InstCombine/call.ll
new file mode 100644
index 00000000000..2ef8dc0670f
--- /dev/null
+++ b/final/test/Transforms/InstCombine/call.ll
@@ -0,0 +1,132 @@
+; Ignore stderr, we expect warnings there
+; RUN: opt < %s -instcombine 2> /dev/null -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+; Simple case, argument translatable without changing the value
+declare void @test1a(i8*)
+
+define void @test1(i32* %A) {
+        call void bitcast (void (i8*)* @test1a to void (i32*)*)( i32* %A )
+        ret void
+; CHECK: %tmp = bitcast i32* %A to i8*
+; CHECK: call void @test1a(i8* %tmp)
+; CHECK: ret void
+}
+
+; More complex case, translate argument because of resolution.  This is safe 
+; because we have the body of the function
+define void @test2a(i8 %A) {
+        ret void
+; CHECK: ret void
+}
+
+define i32 @test2(i32 %A) {
+        call void bitcast (void (i8)* @test2a to void (i32)*)( i32 %A )
+        ret i32 %A
+; CHECK: %tmp = trunc i32 %A to i8
+; CHECK: call void @test2a(i8 %tmp)
+; CHECK: ret i32 %A
+}
+
+
+; Resolving this should insert a cast from sbyte to int, following the C 
+; promotion rules.
+define void @test3a(i8, ...) {unreachable }
+
+define void @test3(i8 %A, i8 %B) {
+        call void bitcast (void (i8, ...)* @test3a to void (i8, i8)*)( i8 %A, i8 %B 
+)
+        ret void
+; CHECK: %tmp = zext i8 %B to i32
+; CHECK: call void (i8, ...)* @test3a(i8 %A, i32 %tmp)
+; CHECK: ret void
+}
+
+
+; test conversion of return value...
+define i8 @test4a() {
+        ret i8 0
+; CHECK: ret i8 0
+}
+
+define i32 @test4() {
+        %X = call i32 bitcast (i8 ()* @test4a to i32 ()*)( )            ; <i32> [#uses=1]
+        ret i32 %X
+; CHECK: %X1 = call i8 @test4a()
+; CHECK: %tmp = zext i8 %X1 to i32
+; CHECK: ret i32 %tmp
+}
+
+
+; test conversion of return value... no value conversion occurs so we can do 
+; this with just a prototype...
+declare i32 @test5a()
+
+define i32 @test5() {
+        %X = call i32 @test5a( )                ; <i32> [#uses=1]
+        ret i32 %X
+; CHECK: %X = call i32 @test5a()
+; CHECK: ret i32 %X
+}
+
+
+; test addition of new arguments...
+declare i32 @test6a(i32)
+
+define i32 @test6() {
+        %X = call i32 bitcast (i32 (i32)* @test6a to i32 ()*)( )
+        ret i32 %X
+; CHECK: %X1 = call i32 @test6a(i32 0)
+; CHECK: ret i32 %X1
+}
+
+
+; test removal of arguments, only can happen with a function body
+define void @test7a() {
+        ret void
+; CHECK: ret void
+}
+
+define void @test7() {
+        call void bitcast (void ()* @test7a to void (i32)*)( i32 5 )
+        ret void
+; CHECK: call void @test7a()
+; CHECK: ret void
+}
+
+
+; rdar://7590304
+declare void @test8a()
+
+define i8* @test8() {
+  invoke void @test8a()
+          to label %invoke.cont unwind label %try.handler
+
+invoke.cont:                                      ; preds = %entry
+  unreachable
+
+try.handler:                                      ; preds = %entry
+  ret i8* null
+}
+
+; Don't turn this into "unreachable": the callee and caller don't agree in
+; calling conv, but the implementation of test8a may actually end up using the
+; right calling conv.
+; CHECK: @test8() {
+; CHECK-NEXT: invoke void @test8a()
+
+
+
+; Don't turn this into a direct call, because test9x is just a prototype and 
+; doing so will make it varargs.
+; rdar://9038601
+declare i8* @test9x(i8*, i8*, ...) noredzone
+define i8* @test9(i8* %arg, i8* %tmp3) nounwind ssp noredzone {
+entry:
+  %call = call i8* bitcast (i8* (i8*, i8*, ...)* @test9x to i8* (i8*, i8*)*)(i8* %arg, i8* %tmp3) noredzone
+  ret i8* %call
+; CHECK: @test9(
+; CHECK: call i8* bitcast
+}
+
diff --git a/final/test/Transforms/InstCombine/call2.ll b/final/test/Transforms/InstCombine/call2.ll
new file mode 100644
index 00000000000..3a6bd67ce56
--- /dev/null
+++ b/final/test/Transforms/InstCombine/call2.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -instcombine | llvm-dis
+
+; This used to crash trying to do a double-to-pointer conversion
+define i32 @bar() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = call i32 (...)* bitcast (i32 (i8*)* @f to i32 (...)*)( double 3.000000e+00 )		; <i32> [#uses=0]
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval1
+}
+
+define i32 @f(i8* %p) {
+entry:
+	%p_addr = alloca i8*		; <i8**> [#uses=1]
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i8* %p, i8** %p_addr
+	br label %return
+
+return:		; preds = %entry
+	%retval1 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval1
+}
diff --git a/final/test/Transforms/InstCombine/canonicalize_branch.ll b/final/test/Transforms/InstCombine/canonicalize_branch.ll
new file mode 100644
index 00000000000..24090abcb5a
--- /dev/null
+++ b/final/test/Transforms/InstCombine/canonicalize_branch.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @test1(i32 %X, i32 %Y) {
+        %C = icmp ne i32 %X, %Y
+        br i1 %C, label %T, label %F
+
+; CHECK: @test1
+; CHECK: %C = icmp eq i32 %X, %Y
+; CHECK: br i1 %C, label %F, label %T
+
+T:
+        ret i32 12
+F:
+        ret i32 123
+}
+
+define i32 @test2(i32 %X, i32 %Y) {
+        %C = icmp ule i32 %X, %Y
+        br i1 %C, label %T, label %F
+
+; CHECK: @test2
+; CHECK: %C = icmp ugt i32 %X, %Y
+; CHECK: br i1 %C, label %F, label %T
+
+T:
+        ret i32 12
+F:
+        ret i32 123
+}
+
+define i32 @test3(i32 %X, i32 %Y) {
+        %C = icmp uge i32 %X, %Y
+        br i1 %C, label %T, label %F
+
+; CHECK: @test3
+; CHECK: %C = icmp ult i32 %X, %Y
+; CHECK: br i1 %C, label %F, label %T
+
+T:
+        ret i32 12
+F:
+        ret i32 123
+}
+
diff --git a/final/test/Transforms/InstCombine/cast-mul-select.ll b/final/test/Transforms/InstCombine/cast-mul-select.ll
new file mode 100644
index 00000000000..f55423cd1da
--- /dev/null
+++ b/final/test/Transforms/InstCombine/cast-mul-select.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32"
+
+define i32 @mul(i32 %x, i32 %y) {
+  %A = trunc i32 %x to i8
+  %B = trunc i32 %y to i8
+  %C = mul i8 %A, %B
+  %D = zext i8 %C to i32
+  ret i32 %D
+; CHECK: %C = mul i32 %x, %y
+; CHECK: %D = and i32 %C, 255
+; CHECK: ret i32 %D
+}
+
+define i32 @select1(i1 %cond, i32 %x, i32 %y, i32 %z) {
+  %A = trunc i32 %x to i8
+  %B = trunc i32 %y to i8
+  %C = trunc i32 %z to i8
+  %D = add i8 %A, %B
+  %E = select i1 %cond, i8 %C, i8 %D
+  %F = zext i8 %E to i32
+  ret i32 %F
+; CHECK: %D = add i32 %x, %y
+; CHECK: %E = select i1 %cond, i32 %z, i32 %D
+; CHECK: %F = and i32 %E, 255
+; CHECK: ret i32 %F
+}
+
+define i8 @select2(i1 %cond, i8 %x, i8 %y, i8 %z) {
+  %A = zext i8 %x to i32
+  %B = zext i8 %y to i32
+  %C = zext i8 %z to i32
+  %D = add i32 %A, %B
+  %E = select i1 %cond, i32 %C, i32 %D
+  %F = trunc i32 %E to i8
+  ret i8 %F
+; CHECK: %D = add i8 %x, %y
+; CHECK: %E = select i1 %cond, i8 %z, i8 %D
+; CHECK: ret i8 %E
+}
diff --git a/final/test/Transforms/InstCombine/cast-set.ll b/final/test/Transforms/InstCombine/cast-set.ll
new file mode 100644
index 00000000000..893440424c4
--- /dev/null
+++ b/final/test/Transforms/InstCombine/cast-set.ll
@@ -0,0 +1,65 @@
+; This tests for various complex cast elimination cases instcombine should
+; handle.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @test1(i32 %X) {
+        %A = bitcast i32 %X to i32              ; <i32> [#uses=1]
+        ; Convert to setne int %X, 12
+        %c = icmp ne i32 %A, 12         ; <i1> [#uses=1]
+        ret i1 %c
+; CHECK: %c = icmp ne i32 %X, 12
+; CHECK: ret i1 %c
+}
+
+define i1 @test2(i32 %X, i32 %Y) {
+        %A = bitcast i32 %X to i32              ; <i32> [#uses=1]
+        %B = bitcast i32 %Y to i32              ; <i32> [#uses=1]
+        ; Convert to setne int %X, %Y
+        %c = icmp ne i32 %A, %B         ; <i1> [#uses=1]
+        ret i1 %c
+; CHECK: %c = icmp ne i32 %X, %Y
+; CHECK: ret i1 %c
+}
+
+define i32 @test4(i32 %A) {
+        %B = bitcast i32 %A to i32              ; <i32> [#uses=1]
+        %C = shl i32 %B, 2              ; <i32> [#uses=1]
+        %D = bitcast i32 %C to i32              ; <i32> [#uses=1]
+        ret i32 %D
+; CHECK: %C = shl i32 %A, 2
+; CHECK: ret i32 %C
+}
+
+define i16 @test5(i16 %A) {
+        %B = sext i16 %A to i32         ; <i32> [#uses=1]
+        %C = and i32 %B, 15             ; <i32> [#uses=1]
+        %D = trunc i32 %C to i16                ; <i16> [#uses=1]
+        ret i16 %D
+; CHECK: %C = and i16 %A, 15
+; CHECK: ret i16 %C
+}
+
+define i1 @test6(i1 %A) {
+        %B = zext i1 %A to i32          ; <i32> [#uses=1]
+        %C = icmp ne i32 %B, 0          ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: ret i1 %A
+}
+
+define i1 @test6a(i1 %A) {
+        %B = zext i1 %A to i32          ; <i32> [#uses=1]
+        %C = icmp ne i32 %B, -1         ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: ret i1 true
+}
+
+define i1 @test7(i8* %A) {
+        %B = bitcast i8* %A to i32*             ; <i32*> [#uses=1]
+        %C = icmp eq i32* %B, null              ; <i1> [#uses=1]
+        ret i1 %C
+; CHECK: %C = icmp eq i8* %A, null
+; CHECK: ret i1 %C
+}
diff --git a/final/test/Transforms/InstCombine/cast.ll b/final/test/Transforms/InstCombine/cast.ll
new file mode 100644
index 00000000000..bc5e3655c19
--- /dev/null
+++ b/final/test/Transforms/InstCombine/cast.ll
@@ -0,0 +1,651 @@
+; Tests to make sure elimination of casts is working correctly
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64"
+
+@inbuf = external global [32832 x i8]           ; <[32832 x i8]*> [#uses=1]
+
+define i32 @test1(i32 %A) {
+        %c1 = bitcast i32 %A to i32             ; <i32> [#uses=1]
+        %c2 = bitcast i32 %c1 to i32            ; <i32> [#uses=1]
+        ret i32 %c2
+; CHECK: ret i32 %A
+}
+
+define i64 @test2(i8 %A) {
+        %c1 = zext i8 %A to i16         ; <i16> [#uses=1]
+        %c2 = zext i16 %c1 to i32               ; <i32> [#uses=1]
+        %Ret = zext i32 %c2 to i64              ; <i64> [#uses=1]
+        ret i64 %Ret
+; CHECK: %Ret = zext i8 %A to i64
+; CHECK: ret i64 %Ret
+}
+
+; This function should just use bitwise AND
+define i64 @test3(i64 %A) {
+        %c1 = trunc i64 %A to i8                ; <i8> [#uses=1]
+        %c2 = zext i8 %c1 to i64                ; <i64> [#uses=1]
+        ret i64 %c2
+; CHECK: %c2 = and i64 %A, 255
+; CHECK: ret i64 %c2
+}
+
+define i32 @test4(i32 %A, i32 %B) {
+        %COND = icmp slt i32 %A, %B             ; <i1> [#uses=1]
+        ; Booleans are unsigned integrals
+        %c = zext i1 %COND to i8                ; <i8> [#uses=1]
+        ; for the cast elim purpose
+        %result = zext i8 %c to i32             ; <i32> [#uses=1]
+        ret i32 %result
+; CHECK: %COND = icmp slt i32 %A, %B
+; CHECK: %result = zext i1 %COND to i32
+; CHECK: ret i32 %result
+}
+
+define i32 @test5(i1 %B) {
+        ; This cast should get folded into
+        %c = zext i1 %B to i8           ; <i8> [#uses=1]
+        ; this cast        
+        %result = zext i8 %c to i32             ; <i32> [#uses=1]
+        ret i32 %result
+; CHECK: %result = zext i1 %B to i32
+; CHECK: ret i32 %result
+}
+
+define i32 @test6(i64 %A) {
+        %c1 = trunc i64 %A to i32               ; <i32> [#uses=1]
+        %res = bitcast i32 %c1 to i32           ; <i32> [#uses=1]
+        ret i32 %res
+; CHECK:  trunc i64 %A to i32
+; CHECK-NEXT: ret i32
+}
+
+define i64 @test7(i1 %A) {
+        %c1 = zext i1 %A to i32         ; <i32> [#uses=1]
+        %res = sext i32 %c1 to i64              ; <i64> [#uses=1]
+        ret i64 %res
+; CHECK: %res = zext i1 %A to i64
+; CHECK: ret i64 %res
+}
+
+define i64 @test8(i8 %A) {
+        %c1 = sext i8 %A to i64         ; <i64> [#uses=1]
+        %res = bitcast i64 %c1 to i64           ; <i64> [#uses=1]
+        ret i64 %res
+; CHECK: = sext i8 %A to i64
+; CHECK-NEXT: ret i64
+}
+
+define i16 @test9(i16 %A) {
+        %c1 = sext i16 %A to i32                ; <i32> [#uses=1]
+        %c2 = trunc i32 %c1 to i16              ; <i16> [#uses=1]
+        ret i16 %c2
+; CHECK: ret i16 %A
+}
+
+define i16 @test10(i16 %A) {
+        %c1 = sext i16 %A to i32                ; <i32> [#uses=1]
+        %c2 = trunc i32 %c1 to i16              ; <i16> [#uses=1]
+        ret i16 %c2
+; CHECK: ret i16 %A
+}
+
+declare void @varargs(i32, ...)
+
+define void @test11(i32* %P) {
+        %c = bitcast i32* %P to i16*            ; <i16*> [#uses=1]
+        call void (i32, ...)* @varargs( i32 5, i16* %c )
+        ret void
+; CHECK: call void (i32, ...)* @varargs(i32 5, i32* %P)
+; CHECK: ret void
+}
+
+define i32* @test12() {
+        %p = malloc [4 x i8]            ; <[4 x i8]*> [#uses=1]
+        %c = bitcast [4 x i8]* %p to i32*               ; <i32*> [#uses=1]
+        ret i32* %c
+; CHECK: %malloccall = tail call i8* @malloc(i32 4)
+; CHECK: ret i32* %c
+}
+
+define i8* @test13(i64 %A) {
+        %c = getelementptr [0 x i8]* bitcast ([32832 x i8]* @inbuf to [0 x i8]*), i64 0, i64 %A             ; <i8*> [#uses=1]
+        ret i8* %c
+; CHECK: %c = getelementptr [32832 x i8]* @inbuf, i64 0, i64 %A
+; CHECK: ret i8* %c
+}
+
+define i1 @test14(i8 %A) {
+        %c = bitcast i8 %A to i8                ; <i8> [#uses=1]
+        %X = icmp ult i8 %c, -128               ; <i1> [#uses=1]
+        ret i1 %X
+; CHECK: %X = icmp sgt i8 %A, -1
+; CHECK: ret i1 %X
+}
+
+
+; This just won't occur when there's no difference between ubyte and sbyte
+;bool %test15(ubyte %A) {
+;        %c = cast ubyte %A to sbyte
+;        %X = setlt sbyte %c, 0   ; setgt %A, 127
+;        ret bool %X
+;}
+
+define i1 @test16(i32* %P) {
+        %c = icmp ne i32* %P, null              ; <i1> [#uses=1]
+        ret i1 %c
+; CHECK: %c = icmp ne i32* %P, null
+; CHECK: ret i1 %c
+}
+
+define i16 @test17(i1 %tmp3) {
+        %c = zext i1 %tmp3 to i32               ; <i32> [#uses=1]
+        %t86 = trunc i32 %c to i16              ; <i16> [#uses=1]
+        ret i16 %t86
+; CHECK: %t86 = zext i1 %tmp3 to i16
+; CHECK: ret i16 %t86
+}
+
+define i16 @test18(i8 %tmp3) {
+        %c = sext i8 %tmp3 to i32               ; <i32> [#uses=1]
+        %t86 = trunc i32 %c to i16              ; <i16> [#uses=1]
+        ret i16 %t86
+; CHECK: %t86 = sext i8 %tmp3 to i16
+; CHECK: ret i16 %t86
+}
+
+define i1 @test19(i32 %X) {
+        %c = sext i32 %X to i64         ; <i64> [#uses=1]
+        %Z = icmp slt i64 %c, 12345             ; <i1> [#uses=1]
+        ret i1 %Z
+; CHECK: %Z = icmp slt i32 %X, 12345
+; CHECK: ret i1 %Z
+}
+
+define i1 @test20(i1 %B) {
+        %c = zext i1 %B to i32          ; <i32> [#uses=1]
+        %D = icmp slt i32 %c, -1                ; <i1> [#uses=1]
+        ;; false
+        ret i1 %D
+; CHECK: ret i1 false
+}
+
+define i32 @test21(i32 %X) {
+        %c1 = trunc i32 %X to i8                ; <i8> [#uses=1]
+        ;; sext -> zext -> and -> nop
+        %c2 = sext i8 %c1 to i32                ; <i32> [#uses=1]
+        %RV = and i32 %c2, 255          ; <i32> [#uses=1]
+        ret i32 %RV
+; CHECK: %c21 = and i32 %X, 255
+; CHECK: ret i32 %c21
+}
+
+define i32 @test22(i32 %X) {
+        %c1 = trunc i32 %X to i8                ; <i8> [#uses=1]
+        ;; sext -> zext -> and -> nop
+        %c2 = sext i8 %c1 to i32                ; <i32> [#uses=1]
+        %RV = shl i32 %c2, 24           ; <i32> [#uses=1]
+        ret i32 %RV
+; CHECK: shl i32 %X, 24
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test23(i32 %X) {
+        ;; Turn into an AND even though X
+        %c1 = trunc i32 %X to i16               ; <i16> [#uses=1]
+        ;; and Z are signed.
+        %c2 = zext i16 %c1 to i32               ; <i32> [#uses=1]
+        ret i32 %c2
+; CHECK: %c2 = and i32 %X, 65535
+; CHECK: ret i32 %c2
+}
+
+define i1 @test24(i1 %C) {
+        %X = select i1 %C, i32 14, i32 1234             ; <i32> [#uses=1]
+        ;; Fold cast into select
+        %c = icmp ne i32 %X, 0          ; <i1> [#uses=1]
+        ret i1 %c
+; CHECK: ret i1 true
+}
+
+define void @test25(i32** %P) {
+        %c = bitcast i32** %P to float**                ; <float**> [#uses=1]
+        ;; Fold cast into null
+        store float* null, float** %c
+        ret void
+; CHECK: store i32* null, i32** %P
+; CHECK: ret void
+}
+
+define i32 @test26(float %F) {
+        ;; no need to cast from float->double.
+        %c = fpext float %F to double           ; <double> [#uses=1]
+        %D = fptosi double %c to i32            ; <i32> [#uses=1]
+        ret i32 %D
+; CHECK: %D = fptosi float %F to i32
+; CHECK: ret i32 %D
+}
+
+define [4 x float]* @test27([9 x [4 x float]]* %A) {
+        %c = bitcast [9 x [4 x float]]* %A to [4 x float]*              ; <[4 x float]*> [#uses=1]
+        ret [4 x float]* %c
+; CHECK: %c = getelementptr inbounds [9 x [4 x float]]* %A, i64 0, i64 0
+; CHECK: ret [4 x float]* %c
+}
+
+define float* @test28([4 x float]* %A) {
+        %c = bitcast [4 x float]* %A to float*          ; <float*> [#uses=1]
+        ret float* %c
+; CHECK: %c = getelementptr inbounds [4 x float]* %A, i64 0, i64 0
+; CHECK: ret float* %c
+}
+
+define i32 @test29(i32 %c1, i32 %c2) {
+        %tmp1 = trunc i32 %c1 to i8             ; <i8> [#uses=1]
+        %tmp4.mask = trunc i32 %c2 to i8                ; <i8> [#uses=1]
+        %tmp = or i8 %tmp4.mask, %tmp1          ; <i8> [#uses=1]
+        %tmp10 = zext i8 %tmp to i32            ; <i32> [#uses=1]
+        ret i32 %tmp10
+; CHECK: %tmp2 = or i32 %c2, %c1
+; CHECK: %tmp10 = and i32 %tmp2, 255
+; CHECK: ret i32 %tmp10
+}
+
+define i32 @test30(i32 %c1) {
+        %c2 = trunc i32 %c1 to i8               ; <i8> [#uses=1]
+        %c3 = xor i8 %c2, 1             ; <i8> [#uses=1]
+        %c4 = zext i8 %c3 to i32                ; <i32> [#uses=1]
+        ret i32 %c4
+; CHECK: %c3 = and i32 %c1, 255
+; CHECK: %c4 = xor i32 %c3, 1
+; CHECK: ret i32 %c4
+}
+
+define i1 @test31(i64 %A) {
+        %B = trunc i64 %A to i32                ; <i32> [#uses=1]
+        %C = and i32 %B, 42             ; <i32> [#uses=1]
+        %D = icmp eq i32 %C, 10         ; <i1> [#uses=1]
+        ret i1 %D
+; CHECK: %C1 = and i64 %A, 42
+; CHECK: %D = icmp eq i64 %C1, 10
+; CHECK: ret i1 %D
+}
+
+define void @test32(double** %tmp) {
+        %tmp8 = malloc [16 x i8]                ; <[16 x i8]*> [#uses=1]
+        %tmp8.upgrd.1 = bitcast [16 x i8]* %tmp8 to double*             ; <double*> [#uses=1]
+        store double* %tmp8.upgrd.1, double** %tmp
+        ret void
+; CHECK: %malloccall = tail call i8* @malloc(i32 16)
+; CHECK: %tmp8.upgrd.1 = bitcast i8* %malloccall to double*
+; CHECK: store double* %tmp8.upgrd.1, double** %tmp
+; CHECK: ret void
+}
+
+define i32 @test33(i32 %c1) {
+        %x = bitcast i32 %c1 to float           ; <float> [#uses=1]
+        %y = bitcast float %x to i32            ; <i32> [#uses=1]
+        ret i32 %y
+; CHECK: ret i32 %c1
+}
+
+define i16 @test34(i16 %a) {
+        %c1 = zext i16 %a to i32                ; <i32> [#uses=1]
+        %tmp21 = lshr i32 %c1, 8                ; <i32> [#uses=1]
+        %c2 = trunc i32 %tmp21 to i16           ; <i16> [#uses=1]
+        ret i16 %c2
+; CHECK: %tmp21 = lshr i16 %a, 8
+; CHECK: ret i16 %tmp21
+}
+
+define i16 @test35(i16 %a) {
+        %c1 = bitcast i16 %a to i16             ; <i16> [#uses=1]
+        %tmp2 = lshr i16 %c1, 8         ; <i16> [#uses=1]
+        %c2 = bitcast i16 %tmp2 to i16          ; <i16> [#uses=1]
+        ret i16 %c2
+; CHECK: %tmp2 = lshr i16 %a, 8
+; CHECK: ret i16 %tmp2
+}
+
+; icmp sgt i32 %a, -1
+; rdar://6480391
+define i1 @test36(i32 %a) {
+        %b = lshr i32 %a, 31
+        %c = trunc i32 %b to i8
+        %d = icmp eq i8 %c, 0
+        ret i1 %d
+; CHECK: %d = icmp sgt i32 %a, -1
+; CHECK: ret i1 %d
+}
+
+; ret i1 false
+define i1 @test37(i32 %a) {
+        %b = lshr i32 %a, 31
+        %c = or i32 %b, 512
+        %d = trunc i32 %c to i8
+        %e = icmp eq i8 %d, 11
+        ret i1 %e
+; CHECK: ret i1 false
+}
+
+define i64 @test38(i32 %a) {
+	%1 = icmp eq i32 %a, -2
+	%2 = zext i1 %1 to i8
+	%3 = xor i8 %2, 1
+	%4 = zext i8 %3 to i64
+        ret i64 %4
+; CHECK: %1 = icmp ne i32 %a, -2
+; CHECK: %2 = zext i1 %1 to i64
+; CHECK: ret i64 %2
+}
+
+define i16 @test39(i16 %a) {
+        %tmp = zext i16 %a to i32
+        %tmp21 = lshr i32 %tmp, 8
+        %tmp5 = shl i32 %tmp, 8
+        %tmp.upgrd.32 = or i32 %tmp21, %tmp5
+        %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16
+        ret i16 %tmp.upgrd.3
+; CHECK: @test39
+; CHECK: %tmp.upgrd.32 = call i16 @llvm.bswap.i16(i16 %a)
+; CHECK: ret i16 %tmp.upgrd.32
+}
+
+define i16 @test40(i16 %a) {
+        %tmp = zext i16 %a to i32
+        %tmp21 = lshr i32 %tmp, 9
+        %tmp5 = shl i32 %tmp, 8
+        %tmp.upgrd.32 = or i32 %tmp21, %tmp5
+        %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16
+        ret i16 %tmp.upgrd.3
+; CHECK: @test40
+; CHECK: %tmp21 = lshr i16 %a, 9
+; CHECK: %tmp5 = shl i16 %a, 8
+; CHECK: %tmp.upgrd.32 = or i16 %tmp21, %tmp5
+; CHECK: ret i16 %tmp.upgrd.32
+}
+
+; PR1263
+define i32* @test41(i32* %tmp1) {
+        %tmp64 = bitcast i32* %tmp1 to { i32 }*
+        %tmp65 = getelementptr { i32 }* %tmp64, i32 0, i32 0
+        ret i32* %tmp65
+; CHECK: @test41
+; CHECK: ret i32* %tmp1
+}
+
+define i32 @test42(i32 %X) {
+        %Y = trunc i32 %X to i8         ; <i8> [#uses=1]
+        %Z = zext i8 %Y to i32          ; <i32> [#uses=1]
+        ret i32 %Z
+; CHECK: @test42
+; CHECK: %Z = and i32 %X, 255
+}
+
+; rdar://6598839
+define zeroext i64 @test43(i8 zeroext %on_off) nounwind readonly {
+	%A = zext i8 %on_off to i32
+	%B = add i32 %A, -1
+	%C = sext i32 %B to i64
+	ret i64 %C  ;; Should be (add (zext i8 -> i64), -1)
+; CHECK: @test43
+; CHECK-NEXT: %A = zext i8 %on_off to i64
+; CHECK-NEXT: %B = add i64 %A, -1
+; CHECK-NEXT: ret i64 %B
+}
+
+define i64 @test44(i8 %T) {
+ %A = zext i8 %T to i16
+ %B = or i16 %A, 1234
+ %C = zext i16 %B to i64
+ ret i64 %C
+; CHECK: @test44
+; CHECK-NEXT: %A = zext i8 %T to i64
+; CHECK-NEXT: %B = or i64 %A, 1234
+; CHECK-NEXT: ret i64 %B
+}
+
+define i64 @test45(i8 %A, i64 %Q) {
+ %D = trunc i64 %Q to i32  ;; should be removed
+ %B = sext i8 %A to i32
+ %C = or i32 %B, %D
+ %E = zext i32 %C to i64 
+ ret i64 %E
+; CHECK: @test45
+; CHECK-NEXT: %B = sext i8 %A to i64
+; CHECK-NEXT: %C = or i64 %B, %Q
+; CHECK-NEXT: %E = and i64 %C, 4294967295
+; CHECK-NEXT: ret i64 %E
+}
+
+
+define i64 @test46(i64 %A) {
+ %B = trunc i64 %A to i32
+ %C = and i32 %B, 42
+ %D = shl i32 %C, 8
+ %E = zext i32 %D to i64 
+ ret i64 %E
+; CHECK: @test46
+; CHECK-NEXT: %C = shl i64 %A, 8
+; CHECK-NEXT: %D = and i64 %C, 10752
+; CHECK-NEXT: ret i64 %D
+}
+
+define i64 @test47(i8 %A) {
+ %B = sext i8 %A to i32
+ %C = or i32 %B, 42
+ %E = zext i32 %C to i64 
+ ret i64 %E
+; CHECK: @test47
+; CHECK-NEXT:   %B = sext i8 %A to i64
+; CHECK-NEXT: %C = and i64 %B, 4294967253
+; CHECK-NEXT:  %E = or i64 %C, 42
+; CHECK-NEXT:  ret i64 %E
+}
+
+define i64 @test48(i8 %A, i8 %a) {
+  %b = zext i8 %a to i32
+  %B = zext i8 %A to i32
+  %C = shl i32 %B, 8
+  %D = or i32 %C, %b
+  %E = zext i32 %D to i64
+  ret i64 %E
+; CHECK: @test48
+; CHECK-NEXT: %b = zext i8 %a to i64
+; CHECK-NEXT: %B = zext i8 %A to i64
+; CHECK-NEXT: %C = shl nuw nsw i64 %B, 8
+; CHECK-NEXT: %D = or i64 %C, %b
+; CHECK-NEXT: ret i64 %D
+}
+
+define i64 @test49(i64 %A) {
+ %B = trunc i64 %A to i32
+ %C = or i32 %B, 1
+ %D = sext i32 %C to i64 
+ ret i64 %D
+; CHECK: @test49
+; CHECK-NEXT: %C = shl i64 %A, 32
+; CHECK-NEXT: ashr exact i64 %C, 32
+; CHECK-NEXT: %D = or i64 {{.*}}, 1
+; CHECK-NEXT: ret i64 %D
+}
+
+define i64 @test50(i64 %A) {
+  %a = lshr i64 %A, 2
+  %B = trunc i64 %a to i32
+  %D = add i32 %B, -1
+  %E = sext i32 %D to i64
+  ret i64 %E
+; CHECK: @test50
+; CHECK-NEXT: shl i64 %A, 30
+; CHECK-NEXT: add i64 {{.*}}, -4294967296
+; CHECK-NEXT: %sext = ashr i64 {{.*}}, 32
+; CHECK-NEXT: ret i64 %sext
+}
+
+define i64 @test51(i64 %A, i1 %cond) {
+  %B = trunc i64 %A to i32
+  %C = and i32 %B, -2
+  %D = or i32 %B, 1
+  %E = select i1 %cond, i32 %C, i32 %D
+  %F = sext i32 %E to i64
+  ret i64 %F
+; CHECK: @test51
+
+; FIXME: disabled, see PR5997
+; HECK-NEXT: %C = and i64 %A, 4294967294
+; HECK-NEXT: %D = or i64 %A, 1
+; HECK-NEXT: %E = select i1 %cond, i64 %C, i64 %D
+; HECK-NEXT: %sext = shl i64 %E, 32
+; HECK-NEXT: %F = ashr i64 %sext, 32
+; HECK-NEXT: ret i64 %F
+}
+
+define i32 @test52(i64 %A) {
+  %B = trunc i64 %A to i16
+  %C = or i16 %B, -32574
+  %D = and i16 %C, -25350
+  %E = zext i16 %D to i32
+  ret i32 %E
+; CHECK: @test52
+; CHECK-NEXT: %B = trunc i64 %A to i32
+; CHECK-NEXT: %C = and i32 %B, 7224
+; CHECK-NEXT: %D = or i32 %C, 32962
+; CHECK-NEXT: ret i32 %D
+}
+
+define i64 @test53(i32 %A) {
+  %B = trunc i32 %A to i16
+  %C = or i16 %B, -32574
+  %D = and i16 %C, -25350
+  %E = zext i16 %D to i64
+  ret i64 %E
+; CHECK: @test53
+; CHECK-NEXT: %B = zext i32 %A to i64
+; CHECK-NEXT: %C = and i64 %B, 7224
+; CHECK-NEXT: %D = or i64 %C, 32962
+; CHECK-NEXT: ret i64 %D
+}
+
+define i32 @test54(i64 %A) {
+  %B = trunc i64 %A to i16
+  %C = or i16 %B, -32574
+  %D = and i16 %C, -25350
+  %E = sext i16 %D to i32
+  ret i32 %E
+; CHECK: @test54
+; CHECK-NEXT: %B = trunc i64 %A to i32
+; CHECK-NEXT: %C = and i32 %B, 7224
+; CHECK-NEXT: %D = or i32 %C, -32574
+; CHECK-NEXT: ret i32 %D
+}
+
+define i64 @test55(i32 %A) {
+  %B = trunc i32 %A to i16
+  %C = or i16 %B, -32574
+  %D = and i16 %C, -25350
+  %E = sext i16 %D to i64
+  ret i64 %E
+; CHECK: @test55
+; CHECK-NEXT: %B = zext i32 %A to i64
+; CHECK-NEXT: %C = and i64 %B, 7224
+; CHECK-NEXT: %D = or i64 %C, -32574
+; CHECK-NEXT: ret i64 %D
+}
+
+define i64 @test56(i16 %A) nounwind {
+  %tmp353 = sext i16 %A to i32
+  %tmp354 = lshr i32 %tmp353, 5
+  %tmp355 = zext i32 %tmp354 to i64
+  ret i64 %tmp355
+; CHECK: @test56
+; CHECK-NEXT: %tmp353 = sext i16 %A to i64
+; CHECK-NEXT: %tmp354 = lshr i64 %tmp353, 5
+; CHECK-NEXT: %tmp355 = and i64 %tmp354, 134217727
+; CHECK-NEXT: ret i64 %tmp355
+}
+
+define i64 @test57(i64 %A) nounwind {
+ %B = trunc i64 %A to i32
+ %C = lshr i32 %B, 8
+ %E = zext i32 %C to i64
+ ret i64 %E
+; CHECK: @test57
+; CHECK-NEXT: %C = lshr i64 %A, 8 
+; CHECK-NEXT: %E = and i64 %C, 16777215
+; CHECK-NEXT: ret i64 %E
+}
+
+define i64 @test58(i64 %A) nounwind {
+ %B = trunc i64 %A to i32
+ %C = lshr i32 %B, 8
+ %D = or i32 %C, 128
+ %E = zext i32 %D to i64
+ ret i64 %E
+ 
+; CHECK: @test58
+; CHECK-NEXT:   %C = lshr i64 %A, 8
+; CHECK-NEXT:   %D = and i64 %C, 16777087
+; CHECK-NEXT:   %E = or i64 %D, 128
+; CHECK-NEXT:   ret i64 %E
+}
+
+define i64 @test59(i8 %A, i8 %B) nounwind {
+  %C = zext i8 %A to i32
+  %D = shl i32 %C, 4
+  %E = and i32 %D, 48
+  %F = zext i8 %B to i32
+  %G = lshr i32 %F, 4
+  %H = or i32 %G, %E
+  %I = zext i32 %H to i64
+  ret i64 %I
+; CHECK: @test59
+; CHECK-NEXT:   %C = zext i8 %A to i64
+; CHECK-NOT: i32
+; CHECK:   %F = zext i8 %B to i64
+; CHECK-NOT: i32
+; CHECK:   ret i64 %H
+}
+
+define <3 x i32> @test60(<4 x i32> %call4) nounwind {
+  %tmp11 = bitcast <4 x i32> %call4 to i128
+  %tmp9 = trunc i128 %tmp11 to i96
+  %tmp10 = bitcast i96 %tmp9 to <3 x i32>
+  ret <3 x i32> %tmp10
+  
+; CHECK: @test60
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+}
+
+define <4 x i32> @test61(<3 x i32> %call4) nounwind {
+  %tmp11 = bitcast <3 x i32> %call4 to i96
+  %tmp9 = zext i96 %tmp11 to i128
+  %tmp10 = bitcast i128 %tmp9 to <4 x i32>
+  ret <4 x i32> %tmp10
+; CHECK: @test61
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+}
+
+define <4 x i32> @test62(<3 x float> %call4) nounwind {
+  %tmp11 = bitcast <3 x float> %call4 to i96
+  %tmp9 = zext i96 %tmp11 to i128
+  %tmp10 = bitcast i128 %tmp9 to <4 x i32>
+  ret <4 x i32> %tmp10
+; CHECK: @test62
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+}
+
+; PR7311 - Don't create invalid IR on scalar->vector cast.
+define <2 x float> @test63(i64 %tmp8) nounwind {
+entry:
+  %a = bitcast i64 %tmp8 to <2 x i32>           
+  %vcvt.i = uitofp <2 x i32> %a to <2 x float>  
+  ret <2 x float> %vcvt.i
+; CHECK: @test63
+; CHECK: bitcast
+; CHECK: uitofp
+}
+
diff --git a/final/test/Transforms/InstCombine/cast_ptr.ll b/final/test/Transforms/InstCombine/cast_ptr.ll
new file mode 100644
index 00000000000..09910fbc848
--- /dev/null
+++ b/final/test/Transforms/InstCombine/cast_ptr.ll
@@ -0,0 +1,79 @@
+; Tests to make sure elimination of casts is working correctly
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "p:32:32"
+
+; This shouldn't convert to getelementptr because the relationship
+; between the arithmetic and the layout of allocated memory is
+; entirely unknown.
+; CHECK: @test1
+; CHECK: ptrtoint
+; CHECK: add
+; CHECK: inttoptr
+define i8* @test1(i8* %t) {
+        %tmpc = ptrtoint i8* %t to i32          ; <i32> [#uses=1]
+        %tmpa = add i32 %tmpc, 32               ; <i32> [#uses=1]
+        %tv = inttoptr i32 %tmpa to i8*         ; <i8*> [#uses=1]
+        ret i8* %tv
+}
+
+; These casts should be folded away.
+; CHECK: @test2
+; CHECK: icmp eq i8* %a, %b
+define i1 @test2(i8* %a, i8* %b) {
+        %tmpa = ptrtoint i8* %a to i32          ; <i32> [#uses=1]
+        %tmpb = ptrtoint i8* %b to i32          ; <i32> [#uses=1]
+        %r = icmp eq i32 %tmpa, %tmpb           ; <i1> [#uses=1]
+        ret i1 %r
+}
+
+; These casts should also be folded away.
+; CHECK: @test3
+; CHECK: icmp eq i8* %a, @global
+@global = global i8 0
+define i1 @test3(i8* %a) {
+        %tmpa = ptrtoint i8* %a to i32
+        %r = icmp eq i32 %tmpa, ptrtoint (i8* @global to i32)
+        ret i1 %r
+}
+
+define i1 @test4(i32 %A) {
+  %B = inttoptr i32 %A to i8*
+  %C = icmp eq i8* %B, null
+  ret i1 %C
+; CHECK: @test4
+; CHECK-NEXT: %C = icmp eq i32 %A, 0
+; CHECK-NEXT: ret i1 %C 
+}
+
+
+; Pulling the cast out of the load allows us to eliminate the load, and then 
+; the whole array.
+
+        %op = type { float }
+        %unop = type { i32 }
+@Array = internal constant [1 x %op* (%op*)*] [ %op* (%op*)* @foo ]             ; <[1 x %op* (%op*)*]*> [#uses=1]
+
+declare %op* @foo(%op* %X)
+
+define %unop* @test5(%op* %O) {
+        %tmp = load %unop* (%op*)** bitcast ([1 x %op* (%op*)*]* @Array to %unop* (%op*)**); <%unop* (%op*)*> [#uses=1]
+        %tmp.2 = call %unop* %tmp( %op* %O )            ; <%unop*> [#uses=1]
+        ret %unop* %tmp.2
+; CHECK: @test5
+; CHECK: call %op* @foo(%op* %O)
+}
+
+
+
+; InstCombine can not 'load (cast P)' -> cast (load P)' if the cast changes
+; the address space.
+
+define i8 @test6(i8 addrspace(1)* %source) {                                                                                        
+entry: 
+  %arrayidx223 = bitcast i8 addrspace(1)* %source to i8*
+  %tmp4 = load i8* %arrayidx223
+  ret i8 %tmp4
+; CHECK: @test6
+; CHECK: load i8* %arrayidx223
+} 
diff --git a/final/test/Transforms/InstCombine/compare-signs.ll b/final/test/Transforms/InstCombine/compare-signs.ll
new file mode 100644
index 00000000000..f8e49110610
--- /dev/null
+++ b/final/test/Transforms/InstCombine/compare-signs.ll
@@ -0,0 +1,58 @@
+; RUN: opt %s -instcombine -S | FileCheck %s
+; PR5438
+
+; TODO: This should also optimize down.
+;define i32 @test1(i32 %a, i32 %b) nounwind readnone {
+;entry:
+;        %0 = icmp sgt i32 %a, -1        ; <i1> [#uses=1]
+;        %1 = icmp slt i32 %b, 0         ; <i1> [#uses=1]
+;        %2 = xor i1 %1, %0              ; <i1> [#uses=1]
+;        %3 = zext i1 %2 to i32          ; <i32> [#uses=1]
+;        ret i32 %3
+;}
+
+; TODO: This optimizes partially but not all the way.
+;define i32 @test2(i32 %a, i32 %b) nounwind readnone {
+;entry:
+;        %0 = and i32 %a, 8            ;<i32>  [#uses=1]
+;        %1 = and i32 %b, 8            ;<i32>  [#uses=1]
+;        %2 = icmp eq i32 %0, %1         ;<i1>  [#uses=1]
+;        %3 = zext i1 %2 to i32          ;<i32>  [#uses=1]
+;        ret i32 %3
+;}
+
+define i32 @test3(i32 %a, i32 %b) nounwind readnone {
+; CHECK: @test3
+entry:
+; CHECK: xor i32 %a, %b
+; CHECK: lshr i32 %0, 31
+; CHECK: xor i32 %1, 1
+        %0 = lshr i32 %a, 31            ; <i32> [#uses=1]
+        %1 = lshr i32 %b, 31            ; <i32> [#uses=1]
+        %2 = icmp eq i32 %0, %1         ; <i1> [#uses=1]
+        %3 = zext i1 %2 to i32          ; <i32> [#uses=1]
+        ret i32 %3
+; CHECK-NOT: icmp
+; CHECK-NOT: zext
+; CHECK: ret i32 %2
+}
+
+; Variation on @test3: checking the 2nd bit in a situation where the 5th bit
+; is one, not zero.
+define i32 @test3i(i32 %a, i32 %b) nounwind readnone {
+; CHECK: @test3i
+entry:
+; CHECK: xor i32 %a, %b
+; CHECK: lshr i32 %0, 31
+; CHECK: xor i32 %1, 1
+        %0 = lshr i32 %a, 29            ; <i32> [#uses=1]
+        %1 = lshr i32 %b, 29            ; <i32> [#uses=1]
+        %2 = or i32 %0, 35
+        %3 = or i32 %1, 35
+        %4 = icmp eq i32 %2, %3         ; <i1> [#uses=1]
+        %5 = zext i1 %4 to i32          ; <i32> [#uses=1]
+        ret i32 %5
+; CHECK-NOT: icmp
+; CHECK-NOT: zext
+; CHECK: ret i32 %2
+}
diff --git a/final/test/Transforms/InstCombine/constant-fold-compare.ll b/final/test/Transforms/InstCombine/constant-fold-compare.ll
new file mode 100644
index 00000000000..6e41e2f6802
--- /dev/null
+++ b/final/test/Transforms/InstCombine/constant-fold-compare.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+
+define i32 @a() nounwind readnone {
+entry:
+  ret i32 zext (i1 icmp eq (i32 0, i32 ptrtoint (i32 ()* @a to i32)) to i32)
+}
+; CHECK: ret i32 0
diff --git a/final/test/Transforms/InstCombine/constant-fold-gep.ll b/final/test/Transforms/InstCombine/constant-fold-gep.ll
new file mode 100644
index 00000000000..c679226d4a9
--- /dev/null
+++ b/final/test/Transforms/InstCombine/constant-fold-gep.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+; Constant folding should fix notionally out-of-bounds indices
+; and add inbounds keywords.
+
+%struct.X = type { [3 x i32], [3 x i32] }
+
+@Y = internal global [3 x %struct.X] zeroinitializer
+
+define void @frob() {
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 1), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 1), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 2), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 2), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 0), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 3), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 1), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 4), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 0, i32 1, i64 2), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 5), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 0), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 6), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 1), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 7), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 2), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 8), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 0), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 9), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 1), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 10), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 1, i32 1, i64 2), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 11), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 0), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 12), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 1), align 4
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 13), align 4
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 2), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 14), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 0), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 15), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 1), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 16), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 0, i64 2, i32 1, i64 2), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 17), align 8
+; CHECK: store i32 1, i32* getelementptr inbounds ([3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 0), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 18), align 8
+; CHECK: store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 2, i64 0, i32 0, i64 0), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 36), align 8
+; CHECK: store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 1, i64 0, i32 0, i64 1), align 8
+  store i32 1, i32* getelementptr ([3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 19), align 8
+  ret void
+}
+
+
+; PR8883 - Constant fold exotic gep subtract
+; CHECK: @test2
+@X = global [1000 x i8] zeroinitializer, align 16
+
+define i64 @test2() {
+entry:
+  %A = bitcast i8* getelementptr inbounds ([1000 x i8]* @X, i64 1, i64 0) to i8*
+  %B = bitcast i8* getelementptr inbounds ([1000 x i8]* @X, i64 0, i64 0) to i8*
+
+  %B2 = ptrtoint i8* %B to i64
+  %C = sub i64 0, %B2
+  %D = getelementptr i8* %A, i64 %C
+  %E = ptrtoint i8* %D to i64
+
+  ret i64 %E
+  ; CHECK: ret i64 1000
+}
diff --git a/final/test/Transforms/InstCombine/crash.ll b/final/test/Transforms/InstCombine/crash.ll
new file mode 100644
index 00000000000..e17774d7b0e
--- /dev/null
+++ b/final/test/Transforms/InstCombine/crash.ll
@@ -0,0 +1,372 @@
+; RUN: opt < %s -instcombine -S
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128:n8:16:32"
+target triple = "i386-apple-darwin10.0"
+
+define i32 @test0(i8 %tmp2) ssp {
+entry:
+  %tmp3 = zext i8 %tmp2 to i32
+  %tmp8 = lshr i32 %tmp3, 6 
+  %tmp9 = lshr i32 %tmp3, 7 
+  %tmp10 = xor i32 %tmp9, 67108858
+  %tmp11 = xor i32 %tmp10, %tmp8 
+  %tmp12 = xor i32 %tmp11, 0     
+  ret i32 %tmp12
+}
+
+; PR4905
+define <2 x i64> @test1(<2 x i64> %x, <2 x i64> %y) nounwind {
+entry:
+  %conv.i94 = bitcast <2 x i64> %y to <4 x i32>   ; <<4 x i32>> [#uses=1]
+  %sub.i97 = sub <4 x i32> %conv.i94, undef       ; <<4 x i32>> [#uses=1]
+  %conv3.i98 = bitcast <4 x i32> %sub.i97 to <2 x i64> ; <<2 x i64>> [#uses=2]
+  %conv2.i86 = bitcast <2 x i64> %conv3.i98 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %cmp.i87 = icmp sgt <4 x i32> undef, %conv2.i86 ; <<4 x i1>> [#uses=1]
+  %sext.i88 = sext <4 x i1> %cmp.i87 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %conv3.i89 = bitcast <4 x i32> %sext.i88 to <2 x i64> ; <<2 x i64>> [#uses=1]
+  %and.i = and <2 x i64> %conv3.i89, %conv3.i98   ; <<2 x i64>> [#uses=1]
+  %or.i = or <2 x i64> zeroinitializer, %and.i    ; <<2 x i64>> [#uses=1]
+  %conv2.i43 = bitcast <2 x i64> %or.i to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %sub.i = sub <4 x i32> zeroinitializer, %conv2.i43 ; <<4 x i32>> [#uses=1]
+  %conv3.i44 = bitcast <4 x i32> %sub.i to <2 x i64> ; <<2 x i64>> [#uses=1]
+  ret <2 x i64> %conv3.i44
+}
+
+
+; PR4908
+define void @test2(<1 x i16>* nocapture %b, i32* nocapture %c) nounwind ssp {
+entry:
+  %arrayidx = getelementptr inbounds <1 x i16>* %b, i64 undef ; <<1 x i16>*>
+  %tmp2 = load <1 x i16>* %arrayidx               ; <<1 x i16>> [#uses=1]
+  %tmp6 = bitcast <1 x i16> %tmp2 to i16          ; <i16> [#uses=1]
+  %tmp7 = zext i16 %tmp6 to i32                   ; <i32> [#uses=1]
+  %ins = or i32 0, %tmp7                          ; <i32> [#uses=1]
+  %arrayidx20 = getelementptr inbounds i32* %c, i64 undef ; <i32*> [#uses=1]
+  store i32 %ins, i32* %arrayidx20
+  ret void
+}
+
+; PR5262
+@tmp2 = global i64 0                              ; <i64*> [#uses=1]
+
+declare void @use(i64) nounwind
+
+define void @foo(i1) nounwind align 2 {
+; <label>:1
+  br i1 %0, label %2, label %3
+
+; <label>:2                                       ; preds = %1
+  br label %3
+
+; <label>:3                                       ; preds = %2, %1
+  %4 = phi i8 [ 1, %2 ], [ 0, %1 ]                ; <i8> [#uses=1]
+  %5 = icmp eq i8 %4, 0                           ; <i1> [#uses=1]
+  %6 = load i64* @tmp2, align 8                   ; <i64> [#uses=1]
+  %7 = select i1 %5, i64 0, i64 %6                ; <i64> [#uses=1]
+  br label %8
+
+; <label>:8                                       ; preds = %3
+  call void @use(i64 %7)
+  ret void
+}
+
+%t0 = type { i32, i32 }
+%t1 = type { i32, i32, i32, i32, i32* }
+
+declare %t0* @bar2(i64)
+
+define void @bar3(i1, i1) nounwind align 2 {
+; <label>:2
+  br i1 %1, label %10, label %3
+
+; <label>:3                                       ; preds = %2
+  %4 = getelementptr inbounds %t0* null, i64 0, i32 1 ; <i32*> [#uses=0]
+  %5 = getelementptr inbounds %t1* null, i64 0, i32 4 ; <i32**> [#uses=1]
+  %6 = load i32** %5, align 8                     ; <i32*> [#uses=1]
+  %7 = icmp ne i32* %6, null                      ; <i1> [#uses=1]
+  %8 = zext i1 %7 to i32                          ; <i32> [#uses=1]
+  %9 = add i32 %8, 0                              ; <i32> [#uses=1]
+  br label %10
+
+; <label>:10                                      ; preds = %3, %2
+  %11 = phi i32 [ %9, %3 ], [ 0, %2 ]             ; <i32> [#uses=1]
+  br i1 %1, label %12, label %13
+
+; <label>:12                                      ; preds = %10
+  br label %13
+
+; <label>:13                                      ; preds = %12, %10
+  %14 = zext i32 %11 to i64                       ; <i64> [#uses=1]
+  %15 = tail call %t0* @bar2(i64 %14) nounwind      ; <%0*> [#uses=0]
+  ret void
+}
+
+
+
+
+; PR5262
+; Make sure the PHI node gets put in a place where all of its operands dominate
+; it.
+define i64 @test4(i1 %c, i64* %P) nounwind align 2 {
+BB0:
+  br i1 %c, label %BB1, label %BB2
+
+BB1:
+  br label %BB2
+
+BB2:
+  %v5_ = phi i1 [ true, %BB0], [false, %BB1]
+  %v6 = load i64* %P
+  br label %l8
+
+l8:
+  br label %l10
+  
+l10:
+  %v11 = select i1 %v5_, i64 0, i64 %v6
+  ret i64 %v11
+}
+
+; PR5471
+define i32 @test5a() {
+       ret i32 0
+}
+
+define void @test5() {
+       store i1 true, i1* undef
+       %1 = invoke i32 @test5a() to label %exit unwind label %exit
+exit:
+       ret void
+}
+
+
+; PR5673
+
+@test6g = external global i32*  
+
+define arm_aapcs_vfpcc i32 @test6(i32 %argc, i8** %argv) nounwind {
+entry:
+  store i32* getelementptr (i32* bitcast (i32 (i32, i8**)* @test6 to i32*), i32 -2048), i32** @test6g, align 4
+  unreachable
+}
+
+
+; PR5827
+
+%class.RuleBasedBreakIterator = type { i64 ()* }
+%class.UStack = type { i8** }
+
+define i32 @_ZN22RuleBasedBreakIterator15checkDictionaryEi(%class.RuleBasedBreakIterator* %this, i32 %x) align 2 {
+entry:
+  %breaks = alloca %class.UStack, align 4         ; <%class.UStack*> [#uses=3]
+  call void @_ZN6UStackC1Ei(%class.UStack* %breaks, i32 0)
+  %tobool = icmp ne i32 %x, 0                     ; <i1> [#uses=1]
+  br i1 %tobool, label %cond.end, label %cond.false
+
+terminate.handler:                                ; preds = %ehcleanup
+  %exc = call i8* @llvm.eh.exception()            ; <i8*> [#uses=1]
+  %0 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 1) ; <i32> [#uses=0]
+  call void @_ZSt9terminatev() noreturn nounwind
+  unreachable
+
+ehcleanup:                                        ; preds = %cond.false
+  %exc1 = call i8* @llvm.eh.exception()           ; <i8*> [#uses=2]
+  %1 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %exc1, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null) ; <i32> [#uses=0]
+  invoke void @_ZN6UStackD1Ev(%class.UStack* %breaks)
+          to label %cont unwind label %terminate.handler
+
+cont:                                             ; preds = %ehcleanup
+  call void @_Unwind_Resume_or_Rethrow(i8* %exc1)
+  unreachable
+
+cond.false:                                       ; preds = %entry
+  %tmp4 = getelementptr inbounds %class.RuleBasedBreakIterator* %this, i32 0, i32 0 ; <i64 ()**> [#uses=1]
+  %tmp5 = load i64 ()** %tmp4                     ; <i64 ()*> [#uses=1]
+  %call = invoke i64 %tmp5()
+          to label %cond.end unwind label %ehcleanup ; <i64> [#uses=1]
+
+cond.end:                                         ; preds = %cond.false, %entry
+  %cond = phi i64 [ 0, %entry ], [ %call, %cond.false ] ; <i64> [#uses=1]
+  %conv = trunc i64 %cond to i32                  ; <i32> [#uses=1]
+  call void @_ZN6UStackD1Ev(%class.UStack* %breaks)
+  ret i32 %conv
+}
+
+declare void @_ZN6UStackC1Ei(%class.UStack*, i32)
+
+declare void @_ZN6UStackD1Ev(%class.UStack*)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare void @_ZSt9terminatev()
+
+declare void @_Unwind_Resume_or_Rethrow(i8*)
+
+
+
+; rdar://7590304
+define i8* @test10(i8* %self, i8* %tmp3) {
+entry:
+  store i1 true, i1* undef
+  store i1 true, i1* undef
+  invoke void @test10a()
+          to label %invoke.cont unwind label %try.handler ; <i8*> [#uses=0]
+
+invoke.cont:                                      ; preds = %entry
+  unreachable
+
+try.handler:                                      ; preds = %entry
+  ret i8* %self
+}
+
+define void @test10a() {
+  ret void
+}
+
+
+; PR6193
+define i32 @test11(i32 %aMaskWidth, i8 %aStride) nounwind {
+entry:
+  %conv41 = sext i8 %aStride to i32
+  %neg = xor i32 %conv41, -1
+  %and42 = and i32 %aMaskWidth, %neg
+  %and47 = and i32 130, %conv41
+  %or = or i32 %and42, %and47
+  ret i32 %or
+}
+
+; PR6503
+define void @test12(i32* %A) nounwind {
+entry:
+  %tmp1 = load i32* %A
+  %cmp = icmp ugt i32 1, %tmp1                    ; <i1> [#uses=1]
+  %conv = zext i1 %cmp to i32                     ; <i32> [#uses=1]
+  %tmp2 = load i32* %A
+  %cmp3 = icmp ne i32 %tmp2, 0                    ; <i1> [#uses=1]
+  %conv4 = zext i1 %cmp3 to i32                   ; <i32> [#uses=1]
+  %or = or i32 %conv, %conv4                      ; <i32> [#uses=1]
+  %cmp5 = icmp ugt i32 undef, %or                 ; <i1> [#uses=1]
+  %conv6 = zext i1 %cmp5 to i32                   ; <i32> [#uses=0]
+  ret void
+}
+
+%s1 = type { %s2, %s2, [6 x %s2], i32, i32, i32, [1 x i32], [0 x i8] }
+%s2 = type { i64 }
+define void @test13() nounwind ssp {
+entry:
+  %0 = getelementptr inbounds %s1* null, i64 0, i32 2, i64 0, i32 0
+  %1 = bitcast i64* %0 to i32*
+  %2 = getelementptr inbounds %s1* null, i64 0, i32 2, i64 1, i32 0
+  %.pre = load i32* %1, align 8
+  %3 = lshr i32 %.pre, 19
+  %brmerge = or i1 undef, undef
+  %4 = and i32 %3, 3
+  %5 = add nsw i32 %4, 1
+  %6 = shl i32 %5, 19
+  %7 = add i32 %6, 1572864
+  %8 = and i32 %7, 1572864
+  %9 = load i64* %2, align 8
+  %trunc156 = trunc i64 %9 to i32
+  %10 = and i32 %trunc156, -1537
+  %11 = and i32 %10, -6145
+  %12 = or i32 %11, 2048
+  %13 = and i32 %12, -24577
+  %14 = or i32 %13, 16384
+  %15 = or i32 %14, 98304
+  store i32 %15, i32* undef, align 8
+  %16 = and i32 %15, -1572865
+  %17 = or i32 %16, %8
+  store i32 %17, i32* undef, align 8
+  %18 = and i32 %17, -449
+  %19 = or i32 %18, 64
+  store i32 %19, i32* undef, align 8
+  unreachable
+}
+
+
+; PR8807
+declare i32 @test14f(i8* (i8*)*) nounwind
+
+define void @test14() nounwind readnone {
+entry:
+  %tmp = bitcast i32 (i8* (i8*)*)* @test14f to i32 (i32*)*
+  %call10 = call i32 %tmp(i32* byval undef)
+  ret void
+}
+
+
+; PR8896
+@g_54 = external global [7 x i16]
+
+define void @test15(i32* %p_92) nounwind {
+entry:
+%0 = load i32* %p_92, align 4
+%1 = icmp ne i32 %0, 0
+%2 = zext i1 %1 to i32
+%3 = call i32 @func_14() nounwind
+%4 = trunc i32 %3 to i16
+%5 = sext i16 %4 to i32
+%6 = trunc i32 %5 to i16
+br i1 undef, label %"3", label %"5"
+
+"3":                                              ; preds = %entry
+%7 = sext i16 %6 to i32
+%8 = ashr i32 %7, -1649554541
+%9 = trunc i32 %8 to i16
+br label %"5"
+
+"5":                                              ; preds = %"3", %entry
+%10 = phi i16 [ %9, %"3" ], [ %6, %entry ]
+%11 = sext i16 %10 to i32
+%12 = xor i32 %2, %11
+%13 = sext i32 %12 to i64
+%14 = icmp ne i64 %13, 0
+br i1 %14, label %return, label %"7"
+
+"7":                                              ; preds = %"5"
+ret void
+
+return:                                           ; preds = %"5"
+ret void
+}
+
+declare i32 @func_14()
+
+
+define double @test16(i32 %a) nounwind {
+  %cmp = icmp slt i32 %a, 2
+  %select = select i1 %cmp, double 2.000000e+00, double 3.141592e+00
+  ret double %select
+}
+
+
+; PR8983
+%struct.basic_ios = type { i8 }
+
+define %struct.basic_ios *@test17() ssp {
+entry:
+  %add.ptr.i = getelementptr i8* null, i64 undef
+  %0 = bitcast i8* %add.ptr.i to %struct.basic_ios*
+  ret %struct.basic_ios* %0
+}
+
+; PR9013
+define void @test18() nounwind ssp {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %l_197.0 = phi i32 [ 0, %entry ], [ %sub.i, %for.inc ]
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.cond
+  %conv = and i32 %l_197.0, 255
+  %sub.i = add nsw i32 %conv, -1
+  br label %for.cond
+
+return:                                           ; No predecessors!
+  ret void
+}
diff --git a/final/test/Transforms/InstCombine/dce-iterate.ll b/final/test/Transforms/InstCombine/dce-iterate.ll
new file mode 100644
index 00000000000..1d2cc53210f
--- /dev/null
+++ b/final/test/Transforms/InstCombine/dce-iterate.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | grep {ret double .sy}
+
+define internal double @ScaleObjectAdd(double %sx, double %sy, double %sz) nounwind {
+entry:
+        %sx34 = bitcast double %sx to i64               ; <i64> [#uses=1]
+        %sx3435 = zext i64 %sx34 to i960                ; <i960> [#uses=1]
+        %sy22 = bitcast double %sy to i64               ; <i64> [#uses=1]
+        %sy2223 = zext i64 %sy22 to i960                ; <i960> [#uses=1]
+        %sy222324 = shl i960 %sy2223, 320               ; <i960> [#uses=1]
+        %sy222324.ins = or i960 %sx3435, %sy222324              ; <i960> [#uses=1]
+        %sz10 = bitcast double %sz to i64               ; <i64> [#uses=1]
+        %sz1011 = zext i64 %sz10 to i960                ; <i960> [#uses=1]
+        %sz101112 = shl i960 %sz1011, 640               ; <i960> [#uses=1]
+        %sz101112.ins = or i960 %sy222324.ins, %sz101112 
+        
+        %a = trunc i960 %sz101112.ins to i64            ; <i64> [#uses=1]
+        %b = bitcast i64 %a to double           ; <double> [#uses=1]
+        %c = lshr i960 %sz101112.ins, 320               ; <i960> [#uses=1]
+        %d = trunc i960 %c to i64               ; <i64> [#uses=1]
+        %e = bitcast i64 %d to double           ; <double> [#uses=1]
+        %f = fadd double %b, %e
+
+        ret double %e
+}
diff --git a/final/test/Transforms/InstCombine/deadcode.ll b/final/test/Transforms/InstCombine/deadcode.ll
new file mode 100644
index 00000000000..52af0ef4e8d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/deadcode.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | grep {ret i32 %A}
+; RUN: opt < %s -die -S | not grep call.*llvm.stacksave
+
+define i32 @test(i32 %A) {
+	%X = or i1 false, false		
+	br i1 %X, label %T, label %C
+
+T:		; preds = %0
+	%B = add i32 %A, 1	
+	br label %C
+
+C:		; preds = %T, %0
+	%C.upgrd.1 = phi i32 [ %B, %T ], [ %A, %0 ]
+	ret i32 %C.upgrd.1
+}
+
+define i32* @test2(i32 %width) {
+	%tmp = call i8* @llvm.stacksave( )
+        %tmp14 = alloca i32, i32 %width
+	ret i32* %tmp14
+} 
+
+declare i8* @llvm.stacksave()
+
diff --git a/final/test/Transforms/InstCombine/dg.exp b/final/test/Transforms/InstCombine/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/InstCombine/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/InstCombine/div.ll b/final/test/Transforms/InstCombine/div.ll
new file mode 100644
index 00000000000..0d139808260
--- /dev/null
+++ b/final/test/Transforms/InstCombine/div.ll
@@ -0,0 +1,84 @@
+; This test makes sure that div instructions are properly eliminated.
+
+; RUN: opt < %s -instcombine -S | not grep div
+
+define i32 @test1(i32 %A) {
+        %B = sdiv i32 %A, 1             ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i32 @test2(i32 %A) {
+        ; => Shift
+        %B = udiv i32 %A, 8             ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i32 @test3(i32 %A) {
+        ; => 0, don't need to keep traps
+        %B = sdiv i32 0, %A             ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i32 @test4(i32 %A) {
+        ; 0-A
+        %B = sdiv i32 %A, -1            ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i32 @test5(i32 %A) {
+        %B = udiv i32 %A, -16           ; <i32> [#uses=1]
+        %C = udiv i32 %B, -4            ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i1 @test6(i32 %A) {
+        %B = udiv i32 %A, 123           ; <i32> [#uses=1]
+        ; A < 123
+        %C = icmp eq i32 %B, 0          ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i1 @test7(i32 %A) {
+        %B = udiv i32 %A, 10            ; <i32> [#uses=1]
+        ; A >= 20 && A < 30
+        %C = icmp eq i32 %B, 2          ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i1 @test8(i8 %A) {
+        %B = udiv i8 %A, 123            ; <i8> [#uses=1]
+        ; A >= 246
+        %C = icmp eq i8 %B, 2           ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i1 @test9(i8 %A) {
+        %B = udiv i8 %A, 123            ; <i8> [#uses=1]
+        ; A < 246
+        %C = icmp ne i8 %B, 2           ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i32 @test10(i32 %X, i1 %C) {
+        %V = select i1 %C, i32 64, i32 8                ; <i32> [#uses=1]
+        %R = udiv i32 %X, %V            ; <i32> [#uses=1]
+        ret i32 %R
+}
+
+define i32 @test11(i32 %X, i1 %C) {
+        %A = select i1 %C, i32 1024, i32 32             ; <i32> [#uses=1]
+        %B = udiv i32 %X, %A            ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+; PR2328
+define i32 @test12(i32 %x) nounwind  {
+	%tmp3 = udiv i32 %x, %x		; 1
+	ret i32 %tmp3
+}
+
+define i32 @test13(i32 %x) nounwind  {
+	%tmp3 = sdiv i32 %x, %x		; 1
+	ret i32 %tmp3
+}
+
diff --git a/final/test/Transforms/InstCombine/enforce-known-alignment.ll b/final/test/Transforms/InstCombine/enforce-known-alignment.ll
new file mode 100644
index 00000000000..9e9be7f5657
--- /dev/null
+++ b/final/test/Transforms/InstCombine/enforce-known-alignment.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine -S | grep alloca | grep {align 16}
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+
+define void @foo(i32) {
+	%2 = alloca [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>], align 16		; <[3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>]*> [#uses=1]
+	%3 = getelementptr [3 x <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>]* %2, i32 0, i32 0		; <<{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>*> [#uses=1]
+	%4 = getelementptr <{ { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } } }>* %3, i32 0, i32 0		; <{ { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } }*> [#uses=1]
+	%5 = getelementptr { { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 } }* %4, i32 0, i32 0		; <{ [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 }*> [#uses=1]
+	%6 = bitcast { [2 x { { i32 } }], [2 x i8], { i16 }, [2 x i8], i8, i8 }* %5 to { [8 x i16] }*		; <{ [8 x i16] }*> [#uses=1]
+	%7 = getelementptr { [8 x i16] }* %6, i32 0, i32 0		; <[8 x i16]*> [#uses=1]
+	%8 = getelementptr [8 x i16]* %7, i32 0, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %8, align 16
+        call void @bar(i16* %8)
+	ret void
+}
+
+declare void @bar(i16*)
diff --git a/final/test/Transforms/InstCombine/exact.ll b/final/test/Transforms/InstCombine/exact.ll
new file mode 100644
index 00000000000..58f8b5d5bcd
--- /dev/null
+++ b/final/test/Transforms/InstCombine/exact.ll
@@ -0,0 +1,154 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: @sdiv1
+; CHECK: sdiv i32 %x, 8
+define i32 @sdiv1(i32 %x) {
+  %y = sdiv i32 %x, 8
+  ret i32 %y
+}
+
+; CHECK: @sdiv2
+; CHECK: ashr exact i32 %x, 3
+define i32 @sdiv2(i32 %x) {
+  %y = sdiv exact i32 %x, 8
+  ret i32 %y
+}
+
+; CHECK: @sdiv3
+; CHECK: %y = srem i32 %x, 3
+; CHECK: %z = sub i32 %x, %y
+; CHECK: ret i32 %z
+define i32 @sdiv3(i32 %x) {
+  %y = sdiv i32 %x, 3
+  %z = mul i32 %y, 3
+  ret i32 %z
+}
+
+; CHECK: @sdiv4
+; CHECK: ret i32 %x
+define i32 @sdiv4(i32 %x) {
+  %y = sdiv exact i32 %x, 3
+  %z = mul i32 %y, 3
+  ret i32 %z
+}
+
+; CHECK: i32 @sdiv5
+; CHECK: %y = srem i32 %x, 3
+; CHECK: %z = sub i32 %y, %x
+; CHECK: ret i32 %z
+define i32 @sdiv5(i32 %x) {
+  %y = sdiv i32 %x, 3
+  %z = mul i32 %y, -3
+  ret i32 %z
+}
+
+; CHECK: @sdiv6
+; CHECK: %z = sub i32 0, %x
+; CHECK: ret i32 %z
+define i32 @sdiv6(i32 %x) {
+  %y = sdiv exact i32 %x, 3
+  %z = mul i32 %y, -3
+  ret i32 %z
+}
+
+; CHECK: @udiv1
+; CHECK: ret i32 %x
+define i32 @udiv1(i32 %x, i32 %w) {
+  %y = udiv exact i32 %x, %w
+  %z = mul i32 %y, %w
+  ret i32 %z
+}
+
+; CHECK: @udiv2
+; CHECK: %z = lshr exact i32 %x, %w
+; CHECK: ret i32 %z
+define i32 @udiv2(i32 %x, i32 %w) {
+  %y = shl i32 1, %w
+  %z = udiv exact i32 %x, %y
+  ret i32 %z
+}
+
+; CHECK: @ashr1
+; CHECK: %B = ashr exact i64 %A, 2
+; CHECK: ret i64 %B
+define i64 @ashr1(i64 %X) nounwind {
+  %A = shl i64 %X, 8
+  %B = ashr i64 %A, 2   ; X/4
+  ret i64 %B
+}
+
+; PR9120
+; CHECK: @ashr_icmp1
+; CHECK: %B = icmp eq i64 %X, 0
+; CHECK: ret i1 %B
+define i1 @ashr_icmp1(i64 %X) nounwind {
+  %A = ashr exact i64 %X, 2   ; X/4
+  %B = icmp eq i64 %A, 0
+  ret i1 %B
+}
+
+; CHECK: @ashr_icmp2
+; CHECK: %Z = icmp slt i64 %X, 16
+; CHECK: ret i1 %Z
+define i1 @ashr_icmp2(i64 %X) nounwind {
+ %Y = ashr exact i64 %X, 2  ; x / 4
+ %Z = icmp slt i64 %Y, 4    ; x < 16
+ ret i1 %Z
+}
+
+; CHECK: @udiv_icmp1
+; CHECK: icmp ne i64 %X, 0
+define i1 @udiv_icmp1(i64 %X) nounwind {
+  %A = udiv exact i64 %X, 5   ; X/5
+  %B = icmp ne i64 %A, 0
+  ret i1 %B
+}
+
+; CHECK: @sdiv_icmp1
+; CHECK: icmp eq i64 %X, 0
+define i1 @sdiv_icmp1(i64 %X) nounwind {
+  %A = sdiv exact i64 %X, 5   ; X/5 == 0 --> x == 0
+  %B = icmp eq i64 %A, 0
+  ret i1 %B
+}
+
+; CHECK: @sdiv_icmp2
+; CHECK: icmp eq i64 %X, 5
+define i1 @sdiv_icmp2(i64 %X) nounwind {
+  %A = sdiv exact i64 %X, 5   ; X/5 == 1 --> x == 5
+  %B = icmp eq i64 %A, 1
+  ret i1 %B
+}
+
+; CHECK: @sdiv_icmp3
+; CHECK: icmp eq i64 %X, -5
+define i1 @sdiv_icmp3(i64 %X) nounwind {
+  %A = sdiv exact i64 %X, 5   ; X/5 == -1 --> x == -5
+  %B = icmp eq i64 %A, -1
+  ret i1 %B
+}
+
+; CHECK: @sdiv_icmp4
+; CHECK: icmp eq i64 %X, 0
+define i1 @sdiv_icmp4(i64 %X) nounwind {
+  %A = sdiv exact i64 %X, -5   ; X/-5 == 0 --> x == 0
+  %B = icmp eq i64 %A, 0
+  ret i1 %B
+}
+
+; CHECK: @sdiv_icmp5
+; CHECK: icmp eq i64 %X, -5
+define i1 @sdiv_icmp5(i64 %X) nounwind {
+  %A = sdiv exact i64 %X, -5   ; X/-5 == 1 --> x == -5
+  %B = icmp eq i64 %A, 1
+  ret i1 %B
+}
+
+; CHECK: @sdiv_icmp6
+; CHECK: icmp eq i64 %X, 5
+define i1 @sdiv_icmp6(i64 %X) nounwind {
+  %A = sdiv exact i64 %X, -5   ; X/-5 == 1 --> x == 5
+  %B = icmp eq i64 %A, -1
+  ret i1 %B
+}
+
diff --git a/final/test/Transforms/InstCombine/extractvalue.ll b/final/test/Transforms/InstCombine/extractvalue.ll
new file mode 100644
index 00000000000..64edc18d450
--- /dev/null
+++ b/final/test/Transforms/InstCombine/extractvalue.ll
@@ -0,0 +1,107 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @bar({i32, i32} %a)
+declare i32 @baz(i32 %a)
+
+; CHECK: define i32 @foo
+; CHECK-NOT: extractvalue
+define i32 @foo(i32 %a, i32 %b) {
+; Instcombine should fold various combinations of insertvalue and extractvalue
+; together
+        ; Build a simple struct and pull values out again
+        %s1.1 = insertvalue {i32, i32} undef, i32 %a, 0
+        %s1 = insertvalue {i32, i32} %s1.1, i32 %b, 1
+        %v1 = extractvalue {i32, i32} %s1, 0
+        %v2 = extractvalue {i32, i32} %s1, 1
+        
+        ; Build a nested struct and pull a sub struct out of it
+        ; This requires instcombine to insert a few insertvalue instructions
+        %ns1.1 = insertvalue {i32, {i32, i32}} undef, i32 %v1, 0
+        %ns1.2 = insertvalue {i32, {i32, i32}} %ns1.1, i32 %v1, 1, 0
+        %ns1   = insertvalue {i32, {i32, i32}} %ns1.2, i32 %v2, 1, 1
+        %s2    = extractvalue {i32, {i32, i32}} %ns1, 1
+        %v3    = extractvalue {i32, {i32, i32}} %ns1, 1, 1
+        call void @bar({i32, i32} %s2)
+
+        ; Use nested extractvalues to get to a value
+        %s3    = extractvalue {i32, {i32, i32}} %ns1, 1
+        %v4    = extractvalue {i32, i32} %s3, 1
+        call void @bar({i32, i32} %s3)
+
+        ; Use nested insertvalues to build a nested struct
+        %s4.1 = insertvalue {i32, i32} undef, i32 %v3, 0
+        %s4   = insertvalue {i32, i32} %s4.1, i32 %v4, 1
+        %ns2  = insertvalue {i32, {i32, i32}} undef, {i32, i32} %s4, 1
+
+        ; And now extract a single value from there
+        %v5   = extractvalue {i32, {i32, i32}} %ns2, 1, 1
+
+        ret i32 %v5
+}
+
+; CHECK: define i32 @extract2gep
+; CHECK-NEXT: [[GEP:%[a-z0-9]+]] = getelementptr inbounds {{.*}}* %pair, i32 0, i32 1
+; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i32* [[GEP]]
+; CHECK-NEXT: store
+; CHECK-NEXT: br label %loop
+; CHECK-NOT: extractvalue
+; CHECK: call {{.*}}(i32 [[LOAD]])
+; CHECK-NOT: extractvalue
+; CHECK: ret i32 [[LOAD]]
+define i32 @extract2gep({i32, i32}* %pair, i32* %P) {
+        ; The load + extractvalue should be converted
+        ; to an inbounds gep + smaller load.
+        ; The new load should be in the same spot as the old load.
+        %L = load {i32, i32}* %pair
+        store i32 0, i32* %P
+        br label %loop
+
+loop:
+        %E = extractvalue {i32, i32} %L, 1
+        %C = call i32 @baz(i32 %E)
+        store i32 %C, i32* %P
+        %cond = icmp eq i32 %C, 0
+        br i1 %cond, label %end, label %loop
+
+end:
+        ret i32 %E
+}
+
+; CHECK: define i32 @doubleextract2gep
+; CHECK-NEXT: [[GEP:%[a-z0-9]+]] = getelementptr inbounds {{.*}}* %arg, i32 0, i32 1, i32 1
+; CHECK-NEXT: [[LOAD:%[A-Za-z0-9]+]] = load i32* [[GEP]]
+; CHECK-NEXT: ret i32 [[LOAD]]
+define i32 @doubleextract2gep({i32, {i32, i32}}* %arg) {
+        ; The load + extractvalues should be converted
+        ; to a 3-index inbounds gep + smaller load.
+        %L = load {i32, {i32, i32}}* %arg
+        %E1 = extractvalue {i32, {i32, i32}} %L, 1
+        %E2 = extractvalue {i32, i32} %E1, 1
+        ret i32 %E2
+}
+
+; CHECK: define i32 @nogep-multiuse
+; CHECK-NEXT: load {{.*}} %pair
+; CHECK-NEXT: extractvalue
+; CHECK-NEXT: extractvalue
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+define i32 @nogep-multiuse({i32, i32}* %pair) {
+        ; The load should be left unchanged since both parts are needed.
+        %L = volatile load {i32, i32}* %pair
+        %LHS = extractvalue {i32, i32} %L, 0
+        %RHS = extractvalue {i32, i32} %L, 1
+        %R = add i32 %LHS, %RHS
+        ret i32 %R
+}
+
+; CHECK: define i32 @nogep-volatile
+; CHECK-NEXT: volatile load {{.*}} %pair
+; CHECK-NEXT: extractvalue
+; CHECK-NEXT: ret
+define i32 @nogep-volatile({i32, i32}* %pair) {
+        ; The volatile load should be left unchanged.
+        %L = volatile load {i32, i32}* %pair
+        %E = extractvalue {i32, i32} %L, 1
+        ret i32 %E
+}
diff --git a/final/test/Transforms/InstCombine/fcmp-select.ll b/final/test/Transforms/InstCombine/fcmp-select.ll
new file mode 100644
index 00000000000..e04ab3e8923
--- /dev/null
+++ b/final/test/Transforms/InstCombine/fcmp-select.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; x != y ? x : y -> x if it's the right kind of != and at least
+; one of x and y is not negative zero.
+
+; CHECK: f0
+; CHECK: ret double %x
+define double @f0(double %x) nounwind readnone {
+entry:
+  %cmp = fcmp une double %x, -1.0
+  %cond = select i1 %cmp, double %x, double -1.0
+  ret double %cond
+}
+; CHECK: f1
+; CHECK: ret double -1.000000e+00
+define double @f1(double %x) nounwind readnone {
+entry:
+  %cmp = fcmp une double %x, -1.0
+  %cond = select i1 %cmp, double -1.0, double %x
+  ret double %cond
+}
+; CHECK: f2
+; CHECK: ret double %cond
+define double @f2(double %x, double %y) nounwind readnone {
+entry:
+  %cmp = fcmp une double %x, %y
+  %cond = select i1 %cmp, double %x, double %y
+  ret double %cond
+}
+; CHECK: f3
+; CHECK: ret double %cond
+define double @f3(double %x, double %y) nounwind readnone {
+entry:
+  %cmp = fcmp une double %x, %y
+  %cond = select i1 %cmp, double %y, double %x
+  ret double %cond
+}
+; CHECK: f4
+; CHECK: ret double %cond
+define double @f4(double %x) nounwind readnone {
+entry:
+  %cmp = fcmp one double %x, -1.0
+  %cond = select i1 %cmp, double %x, double -1.0
+  ret double %cond
+}
+; CHECK: f5
+; CHECK: ret double %cond
+define double @f5(double %x) nounwind readnone {
+entry:
+  %cmp = fcmp one double %x, -1.0
+  %cond = select i1 %cmp, double -1.0, double %x
+  ret double %cond
+}
diff --git a/final/test/Transforms/InstCombine/fcmp-special.ll b/final/test/Transforms/InstCombine/fcmp-special.ll
new file mode 100644
index 00000000000..a39021e08d1
--- /dev/null
+++ b/final/test/Transforms/InstCombine/fcmp-special.ll
@@ -0,0 +1,155 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Infinity
+
+; CHECK: inf0
+; CHECK: ret i1 false
+define i1 @inf0(double %arg) nounwind readnone {
+  %tmp = fcmp ogt double %arg, 0x7FF0000000000000
+  ret i1 %tmp
+}
+
+; CHECK: inf1
+; CHECK: ret i1 true
+define i1 @inf1(double %arg) nounwind readnone {
+  %tmp = fcmp ule double %arg, 0x7FF0000000000000
+  ret i1 %tmp
+}
+
+; Negative infinity
+
+; CHECK: ninf0
+; CHECK: ret i1 false
+define i1 @ninf0(double %arg) nounwind readnone {
+  %tmp = fcmp olt double %arg, 0xFFF0000000000000
+  ret i1 %tmp
+}
+
+; CHECK: ninf1
+; CHECK: ret i1 true
+define i1 @ninf1(double %arg) nounwind readnone {
+  %tmp = fcmp uge double %arg, 0xFFF0000000000000
+  ret i1 %tmp
+}
+
+; NaNs
+
+; CHECK: nan0
+; CHECK: ret i1 false
+define i1 @nan0(double %arg) nounwind readnone {
+  %tmp = fcmp ord double %arg, 0x7FF00000FFFFFFFF
+  ret i1 %tmp
+}
+
+; CHECK: nan1
+; CHECK: ret i1 false
+define i1 @nan1(double %arg) nounwind readnone {
+  %tmp = fcmp oeq double %arg, 0x7FF00000FFFFFFFF
+  ret i1 %tmp
+}
+
+; CHECK: nan2
+; CHECK: ret i1 false
+define i1 @nan2(double %arg) nounwind readnone {
+  %tmp = fcmp olt double %arg, 0x7FF00000FFFFFFFF
+  ret i1 %tmp
+}
+
+; CHECK: nan3
+; CHECK: ret i1 true
+define i1 @nan3(double %arg) nounwind readnone {
+  %tmp = fcmp uno double %arg, 0x7FF00000FFFFFFFF
+  ret i1 %tmp
+}
+
+; CHECK: nan4
+; CHECK: ret i1 true
+define i1 @nan4(double %arg) nounwind readnone {
+  %tmp = fcmp une double %arg, 0x7FF00000FFFFFFFF
+  ret i1 %tmp
+}
+
+; CHECK: nan5
+; CHECK: ret i1 true
+define i1 @nan5(double %arg) nounwind readnone {
+  %tmp = fcmp ult double %arg, 0x7FF00000FFFFFFFF
+  ret i1 %tmp
+}
+
+; Negative NaN.
+
+; CHECK: nnan0
+; CHECK: ret i1 false
+define i1 @nnan0(double %arg) nounwind readnone {
+  %tmp = fcmp ord double %arg, 0xFFF00000FFFFFFFF
+  ret i1 %tmp
+}
+
+; CHECK: nnan1
+; CHECK: ret i1 false
+define i1 @nnan1(double %arg) nounwind readnone {
+  %tmp = fcmp oeq double %arg, 0xFFF00000FFFFFFFF
+  ret i1 %tmp
+}
+
+; CHECK: nnan2
+; CHECK: ret i1 false
+define i1 @nnan2(double %arg) nounwind readnone {
+  %tmp = fcmp olt double %arg, 0xFFF00000FFFFFFFF
+  ret i1 %tmp
+}
+
+; CHECK: nnan3
+; CHECK: ret i1 true
+define i1 @nnan3(double %arg) nounwind readnone {
+  %tmp = fcmp uno double %arg, 0xFFF00000FFFFFFFF
+  ret i1 %tmp
+}
+
+; CHECK: nnan4
+; CHECK: ret i1 true
+define i1 @nnan4(double %arg) nounwind readnone {
+  %tmp = fcmp une double %arg, 0xFFF00000FFFFFFFF
+  ret i1 %tmp
+}
+
+; CHECK: nnan5
+; CHECK: ret i1 true
+define i1 @nnan5(double %arg) nounwind readnone {
+  %tmp = fcmp ult double %arg, 0xFFF00000FFFFFFFF
+  ret i1 %tmp
+}
+
+; Negative zero.
+
+; CHECK: nzero0
+; CHECK: ret i1 true
+define i1 @nzero0() {
+  %tmp = fcmp oeq double 0.0, -0.0
+  ret i1 %tmp
+}
+
+; CHECK: nzero1
+; CHECK: ret i1 false
+define i1 @nzero1() {
+  %tmp = fcmp ogt double 0.0, -0.0
+  ret i1 %tmp
+}
+
+; Misc.
+
+; CHECK: misc0
+; CHECK: %tmp = fcmp ord double %arg, 0.000000e+00
+; CHECK: ret i1 %tmp
+define i1 @misc0(double %arg) {
+  %tmp = fcmp oeq double %arg, %arg
+  ret i1 %tmp
+}
+
+; CHECK: misc1
+; CHECK: ret i1 false
+define i1 @misc1(double %arg) {
+  %tmp = fcmp one double %arg, %arg
+  ret i1 %tmp
+}
+
diff --git a/final/test/Transforms/InstCombine/fold-bin-operand.ll b/final/test/Transforms/InstCombine/fold-bin-operand.ll
new file mode 100644
index 00000000000..d0d072ac6bb
--- /dev/null
+++ b/final/test/Transforms/InstCombine/fold-bin-operand.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | not grep icmp
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define i1 @f(i1 %x) {
+	%b = and i1 %x, icmp eq (i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 2 to i8*))
+	ret i1 %b
+}
+
+; FIXME: This doesn't fold at the moment!
+; define i32 @f(i32 %x) {
+;	%b = add i32 %x, zext (i1 icmp eq (i8* inttoptr (i32 1000000 to i8*), i8* inttoptr (i32 2000000 to i8*)) to i32)
+;	ret i32 %b
+;}
+
diff --git a/final/test/Transforms/InstCombine/fold-calls.ll b/final/test/Transforms/InstCombine/fold-calls.ll
new file mode 100644
index 00000000000..504f874beae
--- /dev/null
+++ b/final/test/Transforms/InstCombine/fold-calls.ll
@@ -0,0 +1,19 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; This shouldn't fold, because sin(inf) is invalid.
+; CHECK: @foo
+; CHECK:   %t = call double @sin(double 0x7FF0000000000000)
+define double @foo() {
+  %t = call double @sin(double 0x7FF0000000000000)
+  ret double %t
+}
+
+; This should fold.
+; CHECK: @bar
+; CHECK:   ret double 0.0
+define double @bar() {
+  %t = call double @sin(double 0.0)
+  ret double %t
+}
+
+declare double @sin(double)
diff --git a/final/test/Transforms/InstCombine/fold-vector-select.ll b/final/test/Transforms/InstCombine/fold-vector-select.ll
new file mode 100644
index 00000000000..3f22522a6ce
--- /dev/null
+++ b/final/test/Transforms/InstCombine/fold-vector-select.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -instcombine -S | not grep select
+
+define void @foo(<4 x i32> *%A, <4 x i32> *%B, <4 x i32> *%C, <4 x i32> *%D) {
+ %r = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> zeroinitializer
+ %g = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>,  <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 6, i32 9, i32 1>
+ %b = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>,  <4 x i32> zeroinitializer, <4 x i32> <i32 7, i32 1, i32 4, i32 9>
+ %a = select <4 x i1> zeroinitializer,  <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 2, i32 8, i32 5>
+ store <4 x i32> %r, <4 x i32>* %A
+ store <4 x i32> %g, <4 x i32>* %B
+ store <4 x i32> %b, <4 x i32>* %C
+ store <4 x i32> %a, <4 x i32>* %D
+ ret void
+}
diff --git a/final/test/Transforms/InstCombine/fold-vector-zero.ll b/final/test/Transforms/InstCombine/fold-vector-zero.ll
new file mode 100644
index 00000000000..e1d86b6cd07
--- /dev/null
+++ b/final/test/Transforms/InstCombine/fold-vector-zero.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instcombine -S | not grep zeroinitializer
+
+define void @foo(i64 %A, i64 %B) {
+bb8:
+	br label %bb30
+
+bb30:
+	%s0 = phi i64 [ 0, %bb8 ], [ %r21, %bb30 ]
+	%l0 = phi i64 [ -2222, %bb8 ], [ %r23, %bb30 ]
+	%r2 = add i64 %s0, %B
+	%r3 = inttoptr i64 %r2 to <2 x double>*
+	%r4 = load <2 x double>* %r3, align 8
+	%r6 = bitcast <2 x double> %r4 to <2 x i64>
+	%r7 = bitcast <2 x double> zeroinitializer to <2 x i64>
+	%r8 = insertelement <2 x i64> undef, i64 9223372036854775807, i32 0
+	%r9 = insertelement <2 x i64> undef, i64 -9223372036854775808, i32 0
+	%r10 = insertelement <2 x i64> %r8, i64 9223372036854775807, i32 1
+	%r11 = insertelement <2 x i64> %r9, i64 -9223372036854775808, i32 1
+	%r12 = and <2 x i64> %r6, %r10
+	%r13 = and <2 x i64> %r7, %r11
+	%r14 = or <2 x i64> %r12, %r13
+	%r15 = bitcast <2 x i64> %r14 to <2 x double>
+	%r18 = add i64 %s0, %A
+	%r19 = inttoptr i64 %r18 to <2 x double>*
+	store <2 x double> %r15, <2 x double>* %r19, align 8
+	%r21 = add i64 16, %s0
+	%r23 = add i64 1, %l0
+	%r25 = icmp slt i64 %r23, 0
+	%r26 = zext i1 %r25 to i64
+	%r27 = icmp ne i64 %r26, 0
+	br i1 %r27, label %bb30, label %bb5
+
+bb5:
+	ret void
+}
diff --git a/final/test/Transforms/InstCombine/fp-ret-bitcast.ll b/final/test/Transforms/InstCombine/fp-ret-bitcast.ll
new file mode 100644
index 00000000000..35ece426617
--- /dev/null
+++ b/final/test/Transforms/InstCombine/fp-ret-bitcast.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:    grep {call float bitcast} | count 1
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+	%struct.NSObject = type { %struct.objc_class* }
+ 	%struct.NSArray = type { %struct.NSObject }
+	%struct.objc_class = type opaque
+ 	%struct.objc_selector = type opaque
+
+@"\01L_OBJC_METH_VAR_NAME_112" = internal global [15 x i8] c"whiteComponent\00", section "__TEXT,__cstring,cstring_literals"
+@"\01L_OBJC_SELECTOR_REFERENCES_81" = internal global %struct.objc_selector* bitcast ([15 x i8]* @"\01L_OBJC_METH_VAR_NAME_112" to %struct.objc_selector*), section "__OBJC,__message_refs,literal_pointers,no_dead_strip"
+
+define void @bork() nounwind  {
+entry:
+	%color = alloca %struct.NSArray*
+	%color.466 = alloca %struct.NSObject*
+	%tmp103 = load %struct.NSArray** %color, align 4
+	%tmp103104 = getelementptr %struct.NSArray* %tmp103, i32 0, i32 0
+	store %struct.NSObject* %tmp103104, %struct.NSObject** %color.466, align 4
+	%tmp105 = load %struct.objc_selector** @"\01L_OBJC_SELECTOR_REFERENCES_81", align 4
+	%tmp106 = load %struct.NSObject** %color.466, align 4
+	%tmp107 = call float bitcast (void (%struct.NSObject*, ...)* @objc_msgSend_fpret to float (%struct.NSObject*, %struct.objc_selector*)*)( %struct.NSObject* %tmp106, %struct.objc_selector* %tmp105 ) nounwind
+	br label %exit
+
+exit:
+	ret void
+}
+
+declare void @objc_msgSend_fpret(%struct.NSObject*, ...)
diff --git a/final/test/Transforms/InstCombine/fpcast.ll b/final/test/Transforms/InstCombine/fpcast.ll
new file mode 100644
index 00000000000..bc6aa0a6891
--- /dev/null
+++ b/final/test/Transforms/InstCombine/fpcast.ll
@@ -0,0 +1,15 @@
+; Test some floating point casting cases
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i8 @test1() {
+        %x = fptoui float 2.550000e+02 to i8            ; <i8> [#uses=1]
+        ret i8 %x
+; CHECK: ret i8 -1
+}
+
+define i8 @test2() {
+        %x = fptosi float -1.000000e+00 to i8           ; <i8> [#uses=1]
+        ret i8 %x
+; CHECK: ret i8 -1
+}
+
diff --git a/final/test/Transforms/InstCombine/fpextend.ll b/final/test/Transforms/InstCombine/fpextend.ll
new file mode 100644
index 00000000000..70e0c62dd70
--- /dev/null
+++ b/final/test/Transforms/InstCombine/fpextend.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -instcombine -S | not grep fpext
+@X = external global float 
+@Y = external global float
+
+define void @test() nounwind  {
+entry:
+	%tmp = load float* @X, align 4		; <float> [#uses=1]
+	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
+	%tmp3 = fadd double %tmp1, 0.000000e+00		; <double> [#uses=1]
+	%tmp34 = fptrunc double %tmp3 to float		; <float> [#uses=1]
+	store float %tmp34, float* @X, align 4
+	ret void
+}
+
+define void @test3() nounwind  {
+entry:
+	%tmp = load float* @X, align 4		; <float> [#uses=1]
+	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
+	%tmp2 = load float* @Y, align 4		; <float> [#uses=1]
+	%tmp23 = fpext float %tmp2 to double		; <double> [#uses=1]
+	%tmp5 = fdiv double %tmp1, %tmp23		; <double> [#uses=1]
+	%tmp56 = fptrunc double %tmp5 to float		; <float> [#uses=1]
+	store float %tmp56, float* @X, align 4
+	ret void
+}
+
+define void @test4() nounwind  {
+entry:
+	%tmp = load float* @X, align 4		; <float> [#uses=1]
+	%tmp1 = fpext float %tmp to double		; <double> [#uses=1]
+	%tmp2 = fsub double -0.000000e+00, %tmp1		; <double> [#uses=1]
+	%tmp34 = fptrunc double %tmp2 to float		; <float> [#uses=1]
+	store float %tmp34, float* @X, align 4
+	ret void
+}
+
diff --git a/final/test/Transforms/InstCombine/fsub.ll b/final/test/Transforms/InstCombine/fsub.ll
new file mode 100644
index 00000000000..af2fadd2867
--- /dev/null
+++ b/final/test/Transforms/InstCombine/fsub.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; PR4374
+define float @test1(float %a, float %b) nounwind {
+  %t1 = fsub float %a, %b
+  %t2 = fsub float -0.000000e+00, %t1
+
+; CHECK:       %t1 = fsub float %a, %b
+; CHECK-NEXT:  %t2 = fsub float -0.000000e+00, %t1
+
+  ret float %t2
+}
+
+; <rdar://problem/7530098>
+define double @test2(double %x, double %y) nounwind {
+  %t1 = fadd double %x, %y
+  %t2 = fsub double %x, %t1
+
+; CHECK:      %t1 = fadd double %x, %y
+; CHECK-NEXT: %t2 = fsub double %x, %t1
+
+  ret double %t2
+}
diff --git a/final/test/Transforms/InstCombine/gepgep.ll b/final/test/Transforms/InstCombine/gepgep.ll
new file mode 100644
index 00000000000..9e681d28c42
--- /dev/null
+++ b/final/test/Transforms/InstCombine/gepgep.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -instcombine -disable-output
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@buffer = external global [64 x float]
+
+declare void @use(i8*)
+
+define void @f() {
+  call void @use(i8* getelementptr (i8* getelementptr (i8* bitcast ([64 x float]* @buffer to i8*), i64 and (i64 sub (i64 0, i64 ptrtoint ([64 x float]* @buffer to i64)), i64 63)), i64 64))
+  ret void
+}
diff --git a/final/test/Transforms/InstCombine/getelementptr.ll b/final/test/Transforms/InstCombine/getelementptr.ll
new file mode 100644
index 00000000000..9e8547b684b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/getelementptr.ll
@@ -0,0 +1,479 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64"
+%intstruct = type { i32 }
+%pair = type { i32, i32 }
+%struct.B = type { double }
+%struct.A = type { %struct.B, i32, i32 }
+
+
+@Global = constant [10 x i8] c"helloworld"
+
+; Test noop elimination
+define i32* @test1(i32* %I) {
+        %A = getelementptr i32* %I, i64 0 
+        ret i32* %A
+; CHECK: @test1
+; CHECK: ret i32* %I
+}
+
+; Test noop elimination
+define i32* @test2(i32* %I) {
+        %A = getelementptr i32* %I
+        ret i32* %A
+; CHECK: @test2
+; CHECK: ret i32* %I
+}
+
+; Test that two array indexing geps fold
+define i32* @test3(i32* %I) {
+        %A = getelementptr i32* %I, i64 17
+        %B = getelementptr i32* %A, i64 4
+        ret i32* %B
+; CHECK: @test3
+; CHECK: getelementptr i32* %I, i64 21
+}
+
+; Test that two getelementptr insts fold
+define i32* @test4({ i32 }* %I) {
+        %A = getelementptr { i32 }* %I, i64 1 
+        %B = getelementptr { i32 }* %A, i64 0, i32 0
+        ret i32* %B
+; CHECK: @test4
+; CHECK: getelementptr %intstruct* %I, i64 1, i32 0
+}
+
+define void @test5(i8 %B) {
+        ; This should be turned into a constexpr instead of being an instruction
+        %A = getelementptr [10 x i8]* @Global, i64 0, i64 4 
+        store i8 %B, i8* %A
+        ret void
+; CHECK: @test5
+; CHECK: store i8 %B, i8* getelementptr inbounds ([10 x i8]* @Global, i64 0, i64 4)
+}
+
+define i32* @test6() {
+        %M = malloc [4 x i32] 
+        %A = getelementptr [4 x i32]* %M, i64 0, i64 0
+        %B = getelementptr i32* %A, i64 2             
+        ret i32* %B
+; CHECK: @test6
+; CHECK: getelementptr i8* %malloccall, i64 8
+}
+
+define i32* @test7(i32* %I, i64 %C, i64 %D) {
+        %A = getelementptr i32* %I, i64 %C 
+        %B = getelementptr i32* %A, i64 %D 
+        ret i32* %B
+; CHECK: @test7
+; CHECK: %A.sum = add i64 %C, %D
+; CHECK: getelementptr i32* %I, i64 %A.sum
+}
+
+define i8* @test8([10 x i32]* %X) {
+        ;; Fold into the cast.
+        %A = getelementptr [10 x i32]* %X, i64 0, i64 0 
+        %B = bitcast i32* %A to i8*     
+        ret i8* %B
+; CHECK: @test8
+; CHECK: bitcast [10 x i32]* %X to i8*
+}
+
+define i32 @test9() {
+        %A = getelementptr { i32, double }* null, i32 0, i32 1
+        %B = ptrtoint double* %A to i32        
+        ret i32 %B
+; CHECK: @test9
+; CHECK: ret i32 8
+}
+
+define i1 @test10({ i32, i32 }* %x, { i32, i32 }* %y) {
+        %tmp.1 = getelementptr { i32, i32 }* %x, i32 0, i32 1
+        %tmp.3 = getelementptr { i32, i32 }* %y, i32 0, i32 1
+        ;; seteq x, y
+        %tmp.4 = icmp eq i32* %tmp.1, %tmp.3       
+        ret i1 %tmp.4
+; CHECK: @test10
+; CHECK: icmp eq %pair* %x, %y
+}
+
+define i1 @test11({ i32, i32 }* %X) {
+        %P = getelementptr { i32, i32 }* %X, i32 0, i32 0 
+        %Q = icmp eq i32* %P, null             
+        ret i1 %Q
+; CHECK: @test11
+; CHECK: icmp eq %pair* %X, null
+}
+
+
+; PR4748
+define i32 @test12(%struct.A* %a) {
+entry:
+  %g3 = getelementptr %struct.A* %a, i32 0, i32 1
+  store i32 10, i32* %g3, align 4
+
+  %g4 = getelementptr %struct.A* %a, i32 0, i32 0
+  
+  %new_a = bitcast %struct.B* %g4 to %struct.A*
+
+  %g5 = getelementptr %struct.A* %new_a, i32 0, i32 1	
+  %a_a = load i32* %g5, align 4	
+  ret i32 %a_a
+; CHECK:      @test12
+; CHECK:      getelementptr %struct.A* %a, i64 0, i32 1
+; CHECK-NEXT: store i32 10, i32* %g3
+; CHECK-NEXT: ret i32 10
+}
+
+
+; PR2235
+%S = type { i32, [ 100 x i32] }
+define i1 @test13(i64 %X, %S* %P) {
+        %A = getelementptr inbounds %S* %P, i32 0, i32 1, i64 %X
+        %B = getelementptr inbounds %S* %P, i32 0, i32 0
+	%C = icmp eq i32* %A, %B
+	ret i1 %C
+; CHECK: @test13
+; CHECK:    %C = icmp eq i64 %X, -1
+}
+
+
+@G = external global [3 x i8]      
+define i8* @test14(i32 %Idx) {
+        %idx = zext i32 %Idx to i64
+        %tmp = getelementptr i8* getelementptr ([3 x i8]* @G, i32 0, i32 0), i64 %idx
+        ret i8* %tmp
+; CHECK: @test14
+; CHECK: getelementptr [3 x i8]* @G, i64 0, i64 %idx
+}
+
+
+; Test folding of constantexpr geps into normal geps.
+@Array = external global [40 x i32]
+define i32 *@test15(i64 %X) {
+        %A = getelementptr i32* getelementptr ([40 x i32]* @Array, i64 0, i64 0), i64 %X
+        ret i32* %A
+; CHECK: @test15
+; CHECK: getelementptr [40 x i32]* @Array, i64 0, i64 %X
+}
+
+
+define i32* @test16(i32* %X, i32 %Idx) {
+        %R = getelementptr i32* %X, i32 %Idx       
+        ret i32* %R
+; CHECK: @test16
+; CHECK: sext i32 %Idx to i64
+}
+
+
+define i1 @test17(i16* %P, i32 %I, i32 %J) {
+        %X = getelementptr inbounds i16* %P, i32 %I
+        %Y = getelementptr inbounds i16* %P, i32 %J
+        %C = icmp ult i16* %X, %Y
+        ret i1 %C
+; CHECK: @test17
+; CHECK: %C = icmp slt i32 %I, %J 
+}
+
+define i1 @test18(i16* %P, i32 %I) {
+        %X = getelementptr inbounds i16* %P, i32 %I
+        %C = icmp ult i16* %X, %P
+        ret i1 %C
+; CHECK: @test18
+; CHECK: %C = icmp slt i32 %I, 0
+}
+
+define i32 @test19(i32* %P, i32 %A, i32 %B) {
+        %tmp.4 = getelementptr inbounds i32* %P, i32 %A
+        %tmp.9 = getelementptr inbounds i32* %P, i32 %B
+        %tmp.10 = icmp eq i32* %tmp.4, %tmp.9
+        %tmp.11 = zext i1 %tmp.10 to i32
+        ret i32 %tmp.11
+; CHECK: @test19
+; CHECK: icmp eq i32 %A, %B
+}
+
+define i32 @test20(i32* %P, i32 %A, i32 %B) {
+        %tmp.4 = getelementptr inbounds i32* %P, i32 %A
+        %tmp.6 = icmp eq i32* %tmp.4, %P
+        %tmp.7 = zext i1 %tmp.6 to i32
+        ret i32 %tmp.7
+; CHECK: @test20
+; CHECK: icmp eq i32 %A, 0
+}
+
+
+define i32 @test21() {
+        %pbob1 = alloca %intstruct
+        %pbob2 = getelementptr %intstruct* %pbob1
+        %pbobel = getelementptr %intstruct* %pbob2, i64 0, i32 0
+        %rval = load i32* %pbobel
+        ret i32 %rval
+; CHECK: @test21
+; CHECK: getelementptr %intstruct* %pbob1, i64 0, i32 0
+}
+
+
+@A = global i32 1               ; <i32*> [#uses=1]
+@B = global i32 2               ; <i32*> [#uses=1]
+
+define i1 @test22() {
+        %C = icmp ult i32* getelementptr (i32* @A, i64 1), 
+                           getelementptr (i32* @B, i64 2) 
+        ret i1 %C
+; CHECK: @test22
+; CHECK: icmp ult (i32* getelementptr inbounds (i32* @A, i64 1), i32* getelementptr (i32* @B, i64 2))
+}
+
+
+%X = type { [10 x i32], float }
+
+define i1 @test23() {
+        %A = getelementptr %X* null, i64 0, i32 0, i64 0                ; <i32*> [#uses=1]
+        %B = icmp ne i32* %A, null              ; <i1> [#uses=1]
+        ret i1 %B
+; CHECK: @test23
+; CHECK: ret i1 false
+}
+
+%"java/lang/Object" = type { %struct.llvm_java_object_base }
+%"java/lang/StringBuffer" = type { %"java/lang/Object", i32, { %"java/lang/Object", i32, [0 x i16] }*, i1 }
+%struct.llvm_java_object_base = type opaque
+
+define void @test24() {
+bc0:
+        %tmp53 = getelementptr %"java/lang/StringBuffer"* null, i32 0, i32 1            ; <i32*> [#uses=1]
+        store i32 0, i32* %tmp53
+        ret void
+; CHECK: @test24
+; CHECK: store i32 0, i32* getelementptr (%"java/lang/StringBuffer"* null, i64 0, i32 1)
+}
+
+define void @test25() {
+entry:
+        %tmp = getelementptr { i64, i64, i64, i64 }* null, i32 0, i32 3         ; <i64*> [#uses=1]
+        %tmp.upgrd.1 = load i64* %tmp           ; <i64> [#uses=1]
+        %tmp8.ui = load i64* null               ; <i64> [#uses=1]
+        %tmp8 = bitcast i64 %tmp8.ui to i64             ; <i64> [#uses=1]
+        %tmp9 = and i64 %tmp8, %tmp.upgrd.1             ; <i64> [#uses=1]
+        %sext = trunc i64 %tmp9 to i32          ; <i32> [#uses=1]
+        %tmp27.i = sext i32 %sext to i64                ; <i64> [#uses=1]
+        tail call void @foo25( i32 0, i64 %tmp27.i )
+        unreachable
+; CHECK: @test25
+}
+
+declare void @foo25(i32, i64)
+
+
+; PR1637
+define i1 @test26(i8* %arr) {
+        %X = getelementptr i8* %arr, i32 1
+        %Y = getelementptr i8* %arr, i32 1
+        %test = icmp uge i8* %X, %Y
+        ret i1 %test
+; CHECK: @test26
+; CHECK: ret i1 true
+}
+
+	%struct.__large_struct = type { [100 x i64] }
+	%struct.compat_siginfo = type { i32, i32, i32, { [29 x i32] } }
+	%struct.siginfo_t = type { i32, i32, i32, { { i32, i32, [0 x i8], %struct.sigval_t, i32 }, [88 x i8] } }
+	%struct.sigval_t = type { i8* }
+
+define i32 @test27(%struct.compat_siginfo* %to, %struct.siginfo_t* %from) {
+entry:
+	%from_addr = alloca %struct.siginfo_t*	
+	%tmp344 = load %struct.siginfo_t** %from_addr, align 8	
+	%tmp345 = getelementptr %struct.siginfo_t* %tmp344, i32 0, i32 3
+	%tmp346 = getelementptr { { i32, i32, [0 x i8], %struct.sigval_t, i32 }, [88 x i8] }* %tmp345, i32 0, i32 0
+	%tmp346347 = bitcast { i32, i32, [0 x i8], %struct.sigval_t, i32 }* %tmp346 to { i32, i32, %struct.sigval_t }*	
+	%tmp348 = getelementptr { i32, i32, %struct.sigval_t }* %tmp346347, i32 0, i32 2
+	%tmp349 = getelementptr %struct.sigval_t* %tmp348, i32 0, i32 0
+	%tmp349350 = bitcast i8** %tmp349 to i32*
+	%tmp351 = load i32* %tmp349350, align 8	
+	%tmp360 = call i32 asm sideeffect "...",
+        "=r,ir,*m,i,0,~{dirflag},~{fpsr},~{flags}"( i32 %tmp351,
+         %struct.__large_struct* null, i32 -14, i32 0 )
+	unreachable
+; CHECK: @test27
+}
+
+; PR1978
+	%struct.x = type <{ i8 }>
+@.str = internal constant [6 x i8] c"Main!\00"	
+@.str1 = internal constant [12 x i8] c"destroy %p\0A\00"	
+
+define i32 @test28() nounwind  {
+entry:
+	%orientations = alloca [1 x [1 x %struct.x]]
+	%tmp3 = call i32 @puts( i8* getelementptr ([6 x i8]* @.str, i32 0, i32 0) ) nounwind 
+	%tmp45 = getelementptr inbounds [1 x [1 x %struct.x]]* %orientations, i32 1, i32 0, i32 0
+	%orientations62 = getelementptr [1 x [1 x %struct.x]]* %orientations, i32 0, i32 0, i32 0
+	br label %bb10
+
+bb10:
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb10 ]
+	%tmp.0.reg2mem.0.rec = mul i32 %indvar, -1	
+	%tmp12.rec = add i32 %tmp.0.reg2mem.0.rec, -1	
+	%tmp12 = getelementptr inbounds %struct.x* %tmp45, i32 %tmp12.rec
+	%tmp16 = call i32 (i8*, ...)* @printf( i8* getelementptr ([12 x i8]* @.str1, i32 0, i32 0), %struct.x* %tmp12 ) nounwind
+	%tmp84 = icmp eq %struct.x* %tmp12, %orientations62
+	%indvar.next = add i32 %indvar, 1
+	br i1 %tmp84, label %bb17, label %bb10
+
+bb17:	
+	ret i32 0
+; CHECK: @test28
+; CHECK: icmp eq i32 %indvar, 0
+}
+
+declare i32 @puts(i8*)
+
+declare i32 @printf(i8*, ...)
+
+
+
+
+; rdar://6762290
+	%T = type <{ i64, i64, i64 }>
+define i32 @test29(i8* %start, i32 %X) nounwind {
+entry:
+	%tmp3 = load i64* null		
+	%add.ptr = getelementptr i8* %start, i64 %tmp3
+	%tmp158 = load i32* null
+	%add.ptr159 = getelementptr %T* null, i32 %tmp158
+	%add.ptr209 = getelementptr i8* %start, i64 0
+	%add.ptr212 = getelementptr i8* %add.ptr209, i32 %X
+	%cmp214 = icmp ugt i8* %add.ptr212, %add.ptr
+	br i1 %cmp214, label %if.then216, label %if.end363
+
+if.then216:
+	ret i32 1
+
+if.end363:
+	ret i32 0
+; CHECK: @test29
+}
+
+
+; PR3694
+define i32 @test30(i32 %m, i32 %n) nounwind {
+entry:
+	%0 = alloca i32, i32 %n, align 4
+	%1 = bitcast i32* %0 to [0 x i32]*
+	call void @test30f(i32* %0) nounwind
+	%2 = getelementptr [0 x i32]* %1, i32 0, i32 %m
+	%3 = load i32* %2, align 4
+	ret i32 %3
+; CHECK: @test30
+; CHECK: getelementptr i32
+}
+
+declare void @test30f(i32*)
+
+
+
+define i1 @test31(i32* %A) {
+        %B = getelementptr i32* %A, i32 1
+        %C = getelementptr i32* %A, i64 1
+        %V = icmp eq i32* %B, %C 
+        ret i1 %V
+; CHECK: @test31
+; CHECK: ret i1 true
+}
+
+
+; PR1345
+define i8* @test32(i8* %v) {
+	%A = alloca [4 x i8*], align 16
+	%B = getelementptr [4 x i8*]* %A, i32 0, i32 0
+	store i8* null, i8** %B
+	%C = bitcast [4 x i8*]* %A to { [16 x i8] }*
+	%D = getelementptr { [16 x i8] }* %C, i32 0, i32 0, i32 8
+	%E = bitcast i8* %D to i8**
+	store i8* %v, i8** %E
+	%F = getelementptr [4 x i8*]* %A, i32 0, i32 2	
+	%G = load i8** %F
+	ret i8* %G
+; CHECK: @test32
+; CHECK: %D = getelementptr [4 x i8*]* %A, i64 0, i64 1
+; CHECK: %F = getelementptr [4 x i8*]* %A, i64 0, i64 2
+}
+
+; PR3290
+%struct.Key = type { { i32, i32 } }
+%struct.anon = type <{ i8, [3 x i8], i32 }>
+
+define i32 *@test33(%struct.Key *%A) {
+	%B = bitcast %struct.Key* %A to %struct.anon*
+        %C = getelementptr %struct.anon* %B, i32 0, i32 2 
+	ret i32 *%C
+; CHECK: @test33
+; CHECK: getelementptr %struct.Key* %A, i64 0, i32 0, i32 1
+}
+
+
+
+	%T2 = type { i8*, i8 }
+define i8* @test34(i8* %Val, i64 %V) nounwind {
+entry:
+	%A = alloca %T2, align 8	
+	%mrv_gep = bitcast %T2* %A to i64*
+	%B = getelementptr %T2* %A, i64 0, i32 0
+        
+      	store i64 %V, i64* %mrv_gep
+	%C = load i8** %B, align 8
+	ret i8* %C
+; CHECK: @test34
+; CHECK: %V.c = inttoptr i64 %V to i8*
+; CHECK: ret i8* %V.c
+}
+
+%t0 = type { i8*, [19 x i8] }
+%t1 = type { i8*, [0 x i8] }
+
+@array = external global [11 x i8]
+
+@s = external global %t0
+@"\01LC8" = external constant [17 x i8]
+
+; Instcombine should be able to fold this getelementptr.
+
+define i32 @test35() nounwind {
+  call i32 (i8*, ...)* @printf(i8* getelementptr ([17 x i8]* @"\01LC8", i32 0, i32 0),
+             i8* getelementptr (%t1* bitcast (%t0* @s to %t1*), i32 0, i32 1, i32 0)) nounwind
+  ret i32 0
+; CHECK: @test35
+; CHECK: call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([17 x i8]* @"\01LC8", i64 0, i64 0), i8* getelementptr inbounds (%t0* @s, i64 0, i32 1, i64 0)) nounwind
+}
+
+; Instcombine should constant-fold the GEP so that indices that have
+; static array extents are within bounds of those array extents.
+; In the below, -1 is not in the range [0,11). After the transformation,
+; the same address is computed, but 3 is in the range of [0,11).
+
+define i8* @test36() nounwind {
+  ret i8* getelementptr ([11 x i8]* @array, i32 0, i64 -1)
+; CHECK: @test36
+; CHECK: ret i8* getelementptr ([11 x i8]* @array, i64 1676976733973595601, i64 4)
+}
+
+; Instcombine shouldn't assume that gep(A,0,1) != gep(A,1,0).
+@A37 = external constant [1 x i8]
+define i1 @test37() nounwind {
+; CHECK: @test37
+; CHECK: ret i1 true
+  %t = icmp eq i8* getelementptr ([1 x i8]* @A37, i64 0, i64 1),
+                   getelementptr ([1 x i8]* @A37, i64 1, i64 0)
+  ret i1 %t
+}
+
+; Test index promotion
+define i32* @test38(i32* %I, i32 %n) {
+        %A = getelementptr i32* %I, i32 %n
+        ret i32* %A
+; CHECK: @test38
+; CHECK: = sext i32 %n to i64
+; CHECK: %A = getelementptr i32* %I, i64 %
+}
diff --git a/final/test/Transforms/InstCombine/hoist_instr.ll b/final/test/Transforms/InstCombine/hoist_instr.ll
new file mode 100644
index 00000000000..fa451bcc727
--- /dev/null
+++ b/final/test/Transforms/InstCombine/hoist_instr.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;; This tests that the div is hoisted into the then block.
+define i32 @foo(i1 %C, i32 %A, i32 %B) {
+entry:
+        br i1 %C, label %then, label %endif
+
+then:           ; preds = %entry
+; CHECK: then:
+; CHECK-NEXT: sdiv i32
+        br label %endif
+
+endif:          ; preds = %then, %entry
+        %X = phi i32 [ %A, %then ], [ 15, %entry ]              ; <i32> [#uses=1]
+        %Y = sdiv i32 %X, 42            ; <i32> [#uses=1]
+        ret i32 %Y
+}
+
diff --git a/final/test/Transforms/InstCombine/icmp.ll b/final/test/Transforms/InstCombine/icmp.ll
new file mode 100644
index 00000000000..a1626835a54
--- /dev/null
+++ b/final/test/Transforms/InstCombine/icmp.ll
@@ -0,0 +1,496 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define i32 @test1(i32 %X) {
+entry:
+        icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
+        zext i1 %0 to i32               ; <i32>:1 [#uses=1]
+        ret i32 %1
+; CHECK: @test1
+; CHECK: lshr i32 %X, 31
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test2(i32 %X) {
+entry:
+        icmp ult i32 %X, -2147483648            ; <i1>:0 [#uses=1]
+        zext i1 %0 to i32               ; <i32>:1 [#uses=1]
+        ret i32 %1
+; CHECK: @test2
+; CHECK: lshr i32 %X, 31
+; CHECK-NEXT: xor i32
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test3(i32 %X) {
+entry:
+        icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
+        sext i1 %0 to i32               ; <i32>:1 [#uses=1]
+        ret i32 %1
+; CHECK: @test3
+; CHECK: ashr i32 %X, 31
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test4(i32 %X) {
+entry:
+        icmp ult i32 %X, -2147483648            ; <i1>:0 [#uses=1]
+        sext i1 %0 to i32               ; <i32>:1 [#uses=1]
+        ret i32 %1
+; CHECK: @test4
+; CHECK: ashr i32 %X, 31
+; CHECK-NEXT: xor i32
+; CHECK-NEXT: ret i32
+}
+
+; PR4837
+define <2 x i1> @test5(<2 x i64> %x) {
+entry:
+  %V = icmp eq <2 x i64> %x, undef
+  ret <2 x i1> %V
+; CHECK: @test5
+; CHECK: ret <2 x i1> <i1 true, i1 true>
+}
+
+define i32 @test6(i32 %a, i32 %b) {
+        %c = icmp sle i32 %a, -1
+        %d = zext i1 %c to i32
+        %e = sub i32 0, %d
+        %f = and i32 %e, %b
+        ret i32 %f
+; CHECK: @test6
+; CHECK-NEXT: ashr i32 %a, 31
+; CHECK-NEXT: %f = and i32 %e, %b
+; CHECK-NEXT: ret i32 %f
+}
+
+
+define i1 @test7(i32 %x) {
+entry:
+  %a = add i32 %x, -1
+  %b = icmp ult i32 %a, %x
+  ret i1 %b
+; CHECK: @test7
+; CHECK: %b = icmp ne i32 %x, 0
+; CHECK: ret i1 %b
+}
+
+define i1 @test8(i32 %x){
+entry:
+  %a = add i32 %x, -1 
+  %b = icmp eq i32 %a, %x
+  ret i1 %b
+; CHECK: @test8
+; CHECK: ret i1 false
+}
+
+define i1 @test9(i32 %x)  {
+entry:
+  %a = add i32 %x, -2
+  %b = icmp ugt i32 %x, %a 
+  ret i1 %b
+; CHECK: @test9
+; CHECK: icmp ugt i32 %x, 1
+; CHECK: ret i1 %b
+}
+
+define i1 @test10(i32 %x){
+entry:
+  %a = add i32 %x, -1      
+  %b = icmp slt i32 %a, %x 
+  ret i1 %b
+  
+; CHECK: @test10
+; CHECK: %b = icmp ne i32 %x, -2147483648
+; CHECK: ret i1 %b
+}
+
+define i1 @test11(i32 %x) {
+  %a = add nsw i32 %x, 8
+  %b = icmp slt i32 %x, %a
+  ret i1 %b
+; CHECK: @test11  
+; CHECK: ret i1 true
+}
+
+; PR6195
+define i1 @test12(i1 %A) {
+  %S = select i1 %A, i64 -4294967295, i64 8589934591
+  %B = icmp ne i64 bitcast (<2 x i32> <i32 1, i32 -1> to i64), %S
+  ret i1 %B
+; CHECK: @test12
+; CHECK-NEXT: %B = select i1
+; CHECK-NEXT: ret i1 %B
+}
+
+; PR6481
+define i1 @test13(i8 %X) nounwind readnone {
+entry:
+        %cmp = icmp slt i8 undef, %X
+        ret i1 %cmp
+; CHECK: @test13
+; CHECK: ret i1 false
+}
+
+define i1 @test14(i8 %X) nounwind readnone {
+entry:
+        %cmp = icmp slt i8 undef, -128
+        ret i1 %cmp
+; CHECK: @test14
+; CHECK: ret i1 false
+}
+
+define i1 @test15() nounwind readnone {
+entry:
+        %cmp = icmp eq i8 undef, -128
+        ret i1 %cmp
+; CHECK: @test15
+; CHECK: ret i1 undef
+}
+
+define i1 @test16() nounwind readnone {
+entry:
+        %cmp = icmp ne i8 undef, -128
+        ret i1 %cmp
+; CHECK: @test16
+; CHECK: ret i1 undef
+}
+
+define i1 @test17(i32 %x) nounwind {
+  %shl = shl i32 1, %x
+  %and = and i32 %shl, 8
+  %cmp = icmp eq i32 %and, 0
+  ret i1 %cmp
+; CHECK: @test17
+; CHECK-NEXT: %cmp = icmp ne i32 %x, 3
+}
+
+
+define i1 @test18(i32 %x) nounwind {
+  %sh = lshr i32 8, %x
+  %and = and i32 %sh, 1
+  %cmp = icmp eq i32 %and, 0
+  ret i1 %cmp
+; CHECK: @test18
+; CHECK-NEXT: %cmp = icmp ne i32 %x, 3
+}
+
+define i1 @test19(i32 %x) nounwind {
+  %shl = shl i32 1, %x
+  %and = and i32 %shl, 8
+  %cmp = icmp eq i32 %and, 8
+  ret i1 %cmp
+; CHECK: @test19
+; CHECK-NEXT: %cmp = icmp eq i32 %x, 3
+}
+
+define i1 @test20(i32 %x) nounwind {
+  %shl = shl i32 1, %x
+  %and = and i32 %shl, 8
+  %cmp = icmp ne i32 %and, 0
+  ret i1 %cmp
+; CHECK: @test20
+; CHECK-NEXT: %cmp = icmp eq i32 %x, 3
+}
+
+define i1 @test21(i8 %x, i8 %y) {
+; CHECK: @test21
+; CHECK-NOT: or i8
+; CHECK: icmp ugt
+  %A = or i8 %x, 1
+  %B = icmp ugt i8 %A, 3
+  ret i1 %B
+}
+
+define i1 @test22(i8 %x, i8 %y) {
+; CHECK: @test22
+; CHECK-NOT: or i8
+; CHECK: icmp ult
+  %A = or i8 %x, 1
+  %B = icmp ult i8 %A, 4
+  ret i1 %B
+}
+
+; PR2740
+; CHECK: @test23
+; CHECK: icmp sgt i32 %x, 1328634634
+define i1 @test23(i32 %x) nounwind {
+	%i3 = sdiv i32 %x, -1328634635
+	%i4 = icmp eq i32 %i3, -1
+	ret i1 %i4
+}
+
+@X = global [1000 x i32] zeroinitializer
+
+; PR8882
+; CHECK: @test24
+; CHECK:    %cmp = icmp eq i64 %i, 1000
+; CHECK:   ret i1 %cmp
+define i1 @test24(i64 %i) {
+  %p1 = getelementptr inbounds i32* getelementptr inbounds ([1000 x i32]* @X, i64 0, i64 0), i64 %i
+  %cmp = icmp eq i32* %p1, getelementptr inbounds ([1000 x i32]* @X, i64 1, i64 0)
+  ret i1 %cmp
+}
+
+; CHECK: @test25
+; X + Z > Y + Z -> X > Y if there is no overflow.
+; CHECK: %c = icmp sgt i32 %x, %y
+; CHECK: ret i1 %c
+define i1 @test25(i32 %x, i32 %y, i32 %z) {
+  %lhs = add nsw i32 %x, %z
+  %rhs = add nsw i32 %y, %z
+  %c = icmp sgt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test26
+; X + Z > Y + Z -> X > Y if there is no overflow.
+; CHECK: %c = icmp ugt i32 %x, %y
+; CHECK: ret i1 %c
+define i1 @test26(i32 %x, i32 %y, i32 %z) {
+  %lhs = add nuw i32 %x, %z
+  %rhs = add nuw i32 %y, %z
+  %c = icmp ugt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test27
+; X - Z > Y - Z -> X > Y if there is no overflow.
+; CHECK: %c = icmp sgt i32 %x, %y
+; CHECK: ret i1 %c
+define i1 @test27(i32 %x, i32 %y, i32 %z) {
+  %lhs = sub nsw i32 %x, %z
+  %rhs = sub nsw i32 %y, %z
+  %c = icmp sgt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test28
+; X - Z > Y - Z -> X > Y if there is no overflow.
+; CHECK: %c = icmp ugt i32 %x, %y
+; CHECK: ret i1 %c
+define i1 @test28(i32 %x, i32 %y, i32 %z) {
+  %lhs = sub nuw i32 %x, %z
+  %rhs = sub nuw i32 %y, %z
+  %c = icmp ugt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test29
+; X + Y > X -> Y > 0 if there is no overflow.
+; CHECK: %c = icmp sgt i32 %y, 0
+; CHECK: ret i1 %c
+define i1 @test29(i32 %x, i32 %y) {
+  %lhs = add nsw i32 %x, %y
+  %c = icmp sgt i32 %lhs, %x
+  ret i1 %c
+}
+
+; CHECK: @test30
+; X + Y > X -> Y > 0 if there is no overflow.
+; CHECK: %c = icmp ne i32 %y, 0
+; CHECK: ret i1 %c
+define i1 @test30(i32 %x, i32 %y) {
+  %lhs = add nuw i32 %x, %y
+  %c = icmp ugt i32 %lhs, %x
+  ret i1 %c
+}
+
+; CHECK: @test31
+; X > X + Y -> 0 > Y if there is no overflow.
+; CHECK: %c = icmp slt i32 %y, 0
+; CHECK: ret i1 %c
+define i1 @test31(i32 %x, i32 %y) {
+  %rhs = add nsw i32 %x, %y
+  %c = icmp sgt i32 %x, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test32
+; X > X + Y -> 0 > Y if there is no overflow.
+; CHECK: ret i1 false
+define i1 @test32(i32 %x, i32 %y) {
+  %rhs = add nuw i32 %x, %y
+  %c = icmp ugt i32 %x, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test33
+; X - Y > X -> 0 > Y if there is no overflow.
+; CHECK: %c = icmp slt i32 %y, 0
+; CHECK: ret i1 %c
+define i1 @test33(i32 %x, i32 %y) {
+  %lhs = sub nsw i32 %x, %y
+  %c = icmp sgt i32 %lhs, %x
+  ret i1 %c
+}
+
+; CHECK: @test34
+; X - Y > X -> 0 > Y if there is no overflow.
+; CHECK: ret i1 false
+define i1 @test34(i32 %x, i32 %y) {
+  %lhs = sub nuw i32 %x, %y
+  %c = icmp ugt i32 %lhs, %x
+  ret i1 %c
+}
+
+; CHECK: @test35
+; X > X - Y -> Y > 0 if there is no overflow.
+; CHECK: %c = icmp sgt i32 %y, 0
+; CHECK: ret i1 %c
+define i1 @test35(i32 %x, i32 %y) {
+  %rhs = sub nsw i32 %x, %y
+  %c = icmp sgt i32 %x, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test36
+; X > X - Y -> Y > 0 if there is no overflow.
+; CHECK: %c = icmp ne i32 %y, 0
+; CHECK: ret i1 %c
+define i1 @test36(i32 %x, i32 %y) {
+  %rhs = sub nuw i32 %x, %y
+  %c = icmp ugt i32 %x, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test37
+; X - Y > X - Z -> Z > Y if there is no overflow.
+; CHECK: %c = icmp sgt i32 %z, %y
+; CHECK: ret i1 %c
+define i1 @test37(i32 %x, i32 %y, i32 %z) {
+  %lhs = sub nsw i32 %x, %y
+  %rhs = sub nsw i32 %x, %z
+  %c = icmp sgt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; CHECK: @test38
+; X - Y > X - Z -> Z > Y if there is no overflow.
+; CHECK: %c = icmp ugt i32 %z, %y
+; CHECK: ret i1 %c
+define i1 @test38(i32 %x, i32 %y, i32 %z) {
+  %lhs = sub nuw i32 %x, %y
+  %rhs = sub nuw i32 %x, %z
+  %c = icmp ugt i32 %lhs, %rhs
+  ret i1 %c
+}
+
+; PR9343 #1
+; CHECK: @test39
+; CHECK %B = icmp eq i32 %X, 0
+define i1 @test39(i32 %X, i32 %Y) {
+  %A = ashr exact i32 %X, %Y
+  %B = icmp eq i32 %A, 0
+  ret i1 %B
+}
+
+; CHECK: @test40
+; CHECK: %B = icmp ne i32 %X, 0
+define i1 @test40(i32 %X, i32 %Y) {
+  %A = lshr exact i32 %X, %Y
+  %B = icmp ne i32 %A, 0
+  ret i1 %B
+}
+
+; PR9343 #3
+; CHECK: @test41
+; CHECK: ret i1 true
+define i1 @test41(i32 %X, i32 %Y) {
+  %A = urem i32 %X, %Y
+  %B = icmp ugt i32 %Y, %A
+  ret i1 %B
+}
+
+; CHECK: @test42
+; CHECK: %B = icmp sgt i32 %Y, -1
+define i1 @test42(i32 %X, i32 %Y) {
+  %A = srem i32 %X, %Y
+  %B = icmp slt i32 %A, %Y
+  ret i1 %B
+}
+
+; CHECK: @test43
+; CHECK: %B = icmp slt i32 %Y, 0
+define i1 @test43(i32 %X, i32 %Y) {
+  %A = srem i32 %X, %Y
+  %B = icmp slt i32 %Y, %A
+  ret i1 %B
+}
+
+; CHECK: @test44
+; CHECK: %B = icmp sgt i32 %Y, -1
+define i1 @test44(i32 %X, i32 %Y) {
+  %A = srem i32 %X, %Y
+  %B = icmp slt i32 %A, %Y
+  ret i1 %B
+}
+
+; CHECK: @test45
+; CHECK: %B = icmp slt i32 %Y, 0
+define i1 @test45(i32 %X, i32 %Y) {
+  %A = srem i32 %X, %Y
+  %B = icmp slt i32 %Y, %A
+  ret i1 %B
+}
+
+; PR9343 #4
+; CHECK: @test46
+; CHECK: %C = icmp ult i32 %X, %Y
+define i1 @test46(i32 %X, i32 %Y, i32 %Z) {
+  %A = ashr exact i32 %X, %Z
+  %B = ashr exact i32 %Y, %Z
+  %C = icmp ult i32 %A, %B
+  ret i1 %C
+}
+
+; PR9343 #5
+; CHECK: @test47
+; CHECK: %C = icmp ugt i32 %X, %Y
+define i1 @test47(i32 %X, i32 %Y, i32 %Z) {
+  %A = ashr exact i32 %X, %Z
+  %B = ashr exact i32 %Y, %Z
+  %C = icmp ugt i32 %A, %B
+  ret i1 %C
+}
+
+; PR9343 #8
+; CHECK: @test48
+; CHECK: %C = icmp eq i32 %X, %Y
+define i1 @test48(i32 %X, i32 %Y, i32 %Z) {
+  %A = sdiv exact i32 %X, %Z
+  %B = sdiv exact i32 %Y, %Z
+  %C = icmp eq i32 %A, %B
+  ret i1 %C
+}
+
+; PR8469
+; CHECK: @test49
+; CHECK: ret <2 x i1> <i1 true, i1 true>
+define <2 x i1> @test49(<2 x i32> %tmp3) {
+entry:
+  %tmp11 = and <2 x i32> %tmp3, <i32 3, i32 3>
+  %cmp = icmp ult <2 x i32> %tmp11, <i32 4, i32 4>
+  ret <2 x i1> %cmp  
+}
+
+; PR9343 #7
+; CHECK: @test50
+; CHECK: ret i1 true
+define i1 @test50(i16 %X, i32 %Y) {
+  %A = zext i16 %X to i32
+  %B = srem i32 %A, %Y
+  %C = icmp sgt i32 %B, -1
+  ret i1 %C
+}
+
+; CHECK: @test51
+; CHECK: ret i1 %C
+define i1 @test51(i32 %X, i32 %Y) {
+  %A = and i32 %X, 2147483648
+  %B = srem i32 %A, %Y
+  %C = icmp sgt i32 %B, -1
+  ret i1 %C
+}
diff --git a/final/test/Transforms/InstCombine/idioms.ll b/final/test/Transforms/InstCombine/idioms.ll
new file mode 100644
index 00000000000..6b3567fc6e8
--- /dev/null
+++ b/final/test/Transforms/InstCombine/idioms.ll
@@ -0,0 +1,32 @@
+; RUN: opt -instcombine %s -S | FileCheck %s
+
+; Check that code corresponding to the following C function is
+; simplified into a single ASR operation:
+;
+; int test_asr(int a, int b) {
+;   return a < 0 ? -(-a - 1 >> b) - 1 : a >> b;
+; }
+;
+define i32 @test_asr(i32 %a, i32 %b) {
+entry:
+	%c = icmp slt i32 %a, 0
+	br i1 %c, label %bb2, label %bb3
+
+bb2:
+	%t1 = sub i32 0, %a
+	%not = sub i32 %t1, 1
+	%d = ashr i32 %not, %b
+	%t2 = sub i32 0, %d
+	%not2 = sub i32 %t2, 1
+	br label %bb4
+bb3:
+	%e = ashr i32 %a, %b
+	br label %bb4
+bb4:
+        %f = phi i32 [ %not2, %bb2 ], [ %e, %bb3 ]
+	ret i32 %f
+; CHECK: @test_asr
+; CHECK: bb4:
+; CHECK: %f = ashr i32 %a, %b
+; CHECK: ret i32 %f
+}
diff --git a/final/test/Transforms/InstCombine/intrinsics.ll b/final/test/Transforms/InstCombine/intrinsics.ll
new file mode 100644
index 00000000000..50e7f1f7c92
--- /dev/null
+++ b/final/test/Transforms/InstCombine/intrinsics.ll
@@ -0,0 +1,190 @@
+; RUN: opt %s -instcombine -S | FileCheck %s
+
+%overflow.result = type {i8, i1}
+
+declare %overflow.result @llvm.uadd.with.overflow.i8(i8, i8)
+declare %overflow.result @llvm.umul.with.overflow.i8(i8, i8)
+declare double @llvm.powi.f64(double, i32) nounwind readonly
+declare i32 @llvm.cttz.i32(i32) nounwind readnone
+declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+declare i32 @llvm.ctpop.i32(i32) nounwind readnone
+declare i8 @llvm.ctlz.i8(i8) nounwind readnone
+
+define i8 @uaddtest1(i8 %A, i8 %B) {
+  %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
+  %y = extractvalue %overflow.result %x, 0
+  ret i8 %y
+; CHECK: @uaddtest1
+; CHECK-NEXT: %y = add i8 %A, %B
+; CHECK-NEXT: ret i8 %y
+}
+
+define i8 @uaddtest2(i8 %A, i8 %B, i1* %overflowPtr) {
+  %and.A = and i8 %A, 127
+  %and.B = and i8 %B, 127
+  %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %and.A, i8 %and.B)
+  %y = extractvalue %overflow.result %x, 0
+  %z = extractvalue %overflow.result %x, 1
+  store i1 %z, i1* %overflowPtr
+  ret i8 %y
+; CHECK: @uaddtest2
+; CHECK-NEXT: %and.A = and i8 %A, 127
+; CHECK-NEXT: %and.B = and i8 %B, 127
+; CHECK-NEXT: %1 = add nuw i8 %and.A, %and.B
+; CHECK-NEXT: store i1 false, i1* %overflowPtr
+; CHECK-NEXT: ret i8 %1
+}
+
+define i8 @uaddtest3(i8 %A, i8 %B, i1* %overflowPtr) {
+  %or.A = or i8 %A, -128
+  %or.B = or i8 %B, -128
+  %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %or.A, i8 %or.B)
+  %y = extractvalue %overflow.result %x, 0
+  %z = extractvalue %overflow.result %x, 1
+  store i1 %z, i1* %overflowPtr
+  ret i8 %y
+; CHECK: @uaddtest3
+; CHECK-NEXT: %or.A = or i8 %A, -128
+; CHECK-NEXT: %or.B = or i8 %B, -128
+; CHECK-NEXT: %1 = add i8 %or.A, %or.B
+; CHECK-NEXT: store i1 true, i1* %overflowPtr
+; CHECK-NEXT: ret i8 %1
+}
+
+define i8 @uaddtest4(i8 %A, i1* %overflowPtr) {
+  %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 undef, i8 %A)
+  %y = extractvalue %overflow.result %x, 0
+  %z = extractvalue %overflow.result %x, 1
+  store i1 %z, i1* %overflowPtr
+  ret i8 %y
+; CHECK: @uaddtest4
+; CHECK-NEXT: ret i8 undef
+}
+
+define i8 @uaddtest5(i8 %A, i1* %overflowPtr) {
+  %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 0, i8 %A)
+  %y = extractvalue %overflow.result %x, 0
+  %z = extractvalue %overflow.result %x, 1
+  store i1 %z, i1* %overflowPtr
+  ret i8 %y
+; CHECK: @uaddtest5
+; CHECK: ret i8 %A
+}
+
+define i1 @uaddtest6(i8 %A, i8 %B) {
+  %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 -4)
+  %z = extractvalue %overflow.result %x, 1
+  ret i1 %z
+; CHECK: @uaddtest6
+; CHECK-NEXT: %z = icmp ugt i8 %A, 3
+; CHECK-NEXT: ret i1 %z
+}
+
+define i8 @uaddtest7(i8 %A, i8 %B) {
+  %x = call %overflow.result @llvm.uadd.with.overflow.i8(i8 %A, i8 %B)
+  %z = extractvalue %overflow.result %x, 0
+  ret i8 %z
+; CHECK: @uaddtest7
+; CHECK-NEXT: %z = add i8 %A, %B
+; CHECK-NEXT: ret i8 %z
+}
+
+
+define i8 @umultest1(i8 %A, i1* %overflowPtr) {
+  %x = call %overflow.result @llvm.umul.with.overflow.i8(i8 0, i8 %A)
+  %y = extractvalue %overflow.result %x, 0
+  %z = extractvalue %overflow.result %x, 1
+  store i1 %z, i1* %overflowPtr
+  ret i8 %y
+; CHECK: @umultest1
+; CHECK-NEXT: store i1 false, i1* %overflowPtr
+; CHECK-NEXT: ret i8 0
+}
+
+define i8 @umultest2(i8 %A, i1* %overflowPtr) {
+  %x = call %overflow.result @llvm.umul.with.overflow.i8(i8 1, i8 %A)
+  %y = extractvalue %overflow.result %x, 0
+  %z = extractvalue %overflow.result %x, 1
+  store i1 %z, i1* %overflowPtr
+  ret i8 %y
+; CHECK: @umultest2
+; CHECK-NEXT: store i1 false, i1* %overflowPtr
+; CHECK-NEXT: ret i8 %A
+}
+
+define void @powi(double %V, double *%P) {
+entry:
+  %A = tail call double @llvm.powi.f64(double %V, i32 -1) nounwind
+  volatile store double %A, double* %P
+
+  %B = tail call double @llvm.powi.f64(double %V, i32 0) nounwind
+  volatile store double %B, double* %P
+
+  %C = tail call double @llvm.powi.f64(double %V, i32 1) nounwind
+  volatile store double %C, double* %P
+  ret void
+; CHECK: @powi
+; CHECK: %A = fdiv double 1.0{{.*}}, %V
+; CHECK: volatile store double %A, 
+; CHECK: volatile store double 1.0 
+; CHECK: volatile store double %V
+}
+
+define i32 @cttz(i32 %a) {
+entry:
+  %or = or i32 %a, 8
+  %and = and i32 %or, -8
+  %count = tail call i32 @llvm.cttz.i32(i32 %and) nounwind readnone
+  ret i32 %count
+; CHECK: @cttz
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret i32 3
+}
+
+define i8 @ctlz(i8 %a) {
+entry:
+  %or = or i8 %a, 32
+  %and = and i8 %or, 63
+  %count = tail call i8 @llvm.ctlz.i8(i8 %and) nounwind readnone
+  ret i8 %count
+; CHECK: @ctlz
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret i8 2
+}
+
+define void @cmp.simplify(i32 %a, i32 %b, i1* %c) {
+entry:
+  %lz = tail call i32 @llvm.ctlz.i32(i32 %a) nounwind readnone
+  %lz.cmp = icmp eq i32 %lz, 32
+  volatile store i1 %lz.cmp, i1* %c
+  %tz = tail call i32 @llvm.cttz.i32(i32 %a) nounwind readnone
+  %tz.cmp = icmp ne i32 %tz, 32
+  volatile store i1 %tz.cmp, i1* %c
+  %pop = tail call i32 @llvm.ctpop.i32(i32 %b) nounwind readnone
+  %pop.cmp = icmp eq i32 %pop, 0
+  volatile store i1 %pop.cmp, i1* %c
+  ret void
+; CHECK: @cmp.simplify
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %lz.cmp = icmp eq i32 %a, 0
+; CHECK-NEXT: volatile store i1 %lz.cmp, i1* %c
+; CHECK-NEXT: %tz.cmp = icmp ne i32 %a, 0
+; CHECK-NEXT: volatile store i1 %tz.cmp, i1* %c
+; CHECK-NEXT: %pop.cmp = icmp eq i32 %b, 0
+; CHECK-NEXT: volatile store i1 %pop.cmp, i1* %c
+}
+
+
+define i32 @cttz_simplify1(i32 %x) nounwind readnone ssp {
+  %tmp1 = tail call i32 @llvm.ctlz.i32(i32 %x)    ; <i32> [#uses=1]
+  %shr3 = lshr i32 %tmp1, 5                       ; <i32> [#uses=1]
+  ret i32 %shr3
+  
+; CHECK: @cttz_simplify1
+; CHECK: icmp eq i32 %x, 0
+; CHECK-NEXT: zext i1 
+; CHECK-NEXT: ret i32
+}
+
+declare i32 @llvm.ctlz.i32(i32) nounwind readnone
+
diff --git a/final/test/Transforms/InstCombine/invariant.ll b/final/test/Transforms/InstCombine/invariant.ll
new file mode 100644
index 00000000000..38323802269
--- /dev/null
+++ b/final/test/Transforms/InstCombine/invariant.ll
@@ -0,0 +1,16 @@
+; Test to make sure unused llvm.invariant.start calls are not trivially eliminated
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @g(i8*)
+
+declare {}* @llvm.invariant.start(i64, i8* nocapture) nounwind readonly
+
+define i8 @f() {
+  %a = alloca i8                                  ; <i8*> [#uses=4]
+  store i8 0, i8* %a
+  %i = call {}* @llvm.invariant.start(i64 1, i8* %a) ; <{}*> [#uses=0]
+  ; CHECK: call {}* @llvm.invariant.start
+  call void @g(i8* %a)
+  %r = load i8* %a                                ; <i8> [#uses=1]
+  ret i8 %r
+}
diff --git a/final/test/Transforms/InstCombine/known_align.ll b/final/test/Transforms/InstCombine/known_align.ll
new file mode 100644
index 00000000000..5382abf8212
--- /dev/null
+++ b/final/test/Transforms/InstCombine/known_align.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -instcombine -S | grep {align 1}
+; END.
+
+	%struct.p = type <{ i8, i32 }>
+@t = global %struct.p <{ i8 1, i32 10 }>		; <%struct.p*> [#uses=1]
+@u = weak global %struct.p zeroinitializer		; <%struct.p*> [#uses=1]
+
+define i32 @main() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=2]
+	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
+	%tmp1 = alloca i32, align 4		; <i32*> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp3 = load i32* getelementptr (%struct.p* @t, i32 0, i32 1), align 1		; <i32> [#uses=1]
+	store i32 %tmp3, i32* %tmp1, align 4
+	%tmp5 = load i32* %tmp1, align 4		; <i32> [#uses=1]
+	store i32 %tmp5, i32* getelementptr (%struct.p* @u, i32 0, i32 1), align 1
+	%tmp6 = load i32* %tmp1, align 4		; <i32> [#uses=1]
+	store i32 %tmp6, i32* %tmp, align 4
+	%tmp7 = load i32* %tmp, align 4		; <i32> [#uses=1]
+	store i32 %tmp7, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval8 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval8
+}
diff --git a/final/test/Transforms/InstCombine/load-cmp.ll b/final/test/Transforms/InstCombine/load-cmp.ll
new file mode 100644
index 00000000000..5cafb7787e3
--- /dev/null
+++ b/final/test/Transforms/InstCombine/load-cmp.ll
@@ -0,0 +1,112 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85, 
+                                     i16 73, i16 82, i16 69, i16 68, i16 0]
+@GD = internal constant [6 x double]
+   [double -10.0, double 1.0, double 4.0, double 2.0, double -20.0, double -40.0]
+
+define i1 @test1(i32 %X) {
+  %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16* %P
+  %R = icmp eq i16 %Q, 0
+  ret i1 %R
+; CHECK: @test1
+; CHECK-NEXT: %R = icmp eq i32 %X, 9
+; CHECK-NEXT: ret i1 %R
+}
+
+define i1 @test2(i32 %X) {
+  %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16* %P
+  %R = icmp slt i16 %Q, 85
+  ret i1 %R
+; CHECK: @test2
+; CHECK-NEXT: %R = icmp ne i32 %X, 4
+; CHECK-NEXT: ret i1 %R
+}
+
+define i1 @test3(i32 %X) {
+  %P = getelementptr inbounds [6 x double]* @GD, i32 0, i32 %X
+  %Q = load double* %P
+  %R = fcmp oeq double %Q, 1.0
+  ret i1 %R
+; CHECK: @test3
+; CHECK-NEXT: %R = icmp eq i32 %X, 1
+; CHECK-NEXT: ret i1 %R
+}
+
+define i1 @test4(i32 %X) {
+  %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16* %P
+  %R = icmp sle i16 %Q, 73
+  ret i1 %R
+; CHECK: @test4
+; CHECK-NEXT: lshr i32 933, %X
+; CHECK-NEXT: and i32 {{.*}}, 1
+; CHECK-NEXT: %R = icmp ne i32 {{.*}}, 0
+; CHECK-NEXT: ret i1 %R
+}
+
+define i1 @test5(i32 %X) {
+  %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16* %P
+  %R = icmp eq i16 %Q, 69
+  ret i1 %R
+; CHECK: @test5
+; CHECK-NEXT: icmp eq i32 %X, 2
+; CHECK-NEXT: icmp eq i32 %X, 7
+; CHECK-NEXT: %R = or i1
+; CHECK-NEXT: ret i1 %R
+}
+
+define i1 @test6(i32 %X) {
+  %P = getelementptr inbounds [6 x double]* @GD, i32 0, i32 %X
+  %Q = load double* %P
+  %R = fcmp ogt double %Q, 0.0
+  ret i1 %R
+; CHECK: @test6
+; CHECK-NEXT: add i32 %X, -1
+; CHECK-NEXT: %R = icmp ult i32 {{.*}}, 3
+; CHECK-NEXT: ret i1 %R
+}
+
+define i1 @test7(i32 %X) {
+  %P = getelementptr inbounds [6 x double]* @GD, i32 0, i32 %X
+  %Q = load double* %P
+  %R = fcmp olt double %Q, 0.0
+  ret i1 %R
+; CHECK: @test7
+; CHECK-NEXT: add i32 %X, -1
+; CHECK-NEXT: %R = icmp ugt i32 {{.*}}, 2
+; CHECK-NEXT: ret i1 %R
+}
+
+define i1 @test8(i32 %X) {
+  %P = getelementptr inbounds [10 x i16]* @G16, i32 0, i32 %X
+  %Q = load i16* %P
+  %R = and i16 %Q, 3
+  %S = icmp eq i16 %R, 0
+  ret i1 %S
+; CHECK: @test8
+; CHECK-NEXT: add i32 %X, -8
+; CHECK-NEXT: icmp ult i32 {{.*}}, 2
+; CHECK-NEXT: ret i1
+}
+
+@GA = internal constant [4 x { i32, i32 } ] [
+  { i32, i32 } { i32 1, i32 0 },
+  { i32, i32 } { i32 2, i32 1 },
+  { i32, i32 } { i32 3, i32 1 },
+  { i32, i32 } { i32 4, i32 0 }
+]
+
+define i1 @test9(i32 %X) {
+  %P = getelementptr inbounds [4 x { i32, i32 } ]* @GA, i32 0, i32 %X, i32 1
+  %Q = load i32* %P
+  %R = icmp eq i32 %Q, 1
+  ret i1 %R
+; CHECK: @test9
+; CHECK-NEXT: add i32 %X, -1
+; CHECK-NEXT: icmp ult i32 {{.*}}, 2
+; CHECK-NEXT: ret i1
+}
diff --git a/final/test/Transforms/InstCombine/load-select.ll b/final/test/Transforms/InstCombine/load-select.ll
new file mode 100644
index 00000000000..f3d83dc8210
--- /dev/null
+++ b/final/test/Transforms/InstCombine/load-select.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+
+@a = constant [2 x i32] [i32 3, i32 6]            ; <[2 x i32]*> [#uses=2]
+
+define i32 @b(i32 %y) nounwind readonly {
+; CHECK: @b
+; CHECK-NOT: load
+; CHECK: ret i32
+entry:
+  %0 = icmp eq i32 %y, 0                          ; <i1> [#uses=1]
+  %storemerge = select i1 %0, i32* getelementptr inbounds ([2 x i32]* @a, i32 0, i32 1), i32* getelementptr inbounds ([2 x i32]* @a, i32 0, i32 0) ; <i32*> [#uses=1]
+  %1 = load i32* %storemerge, align 4             ; <i32> [#uses=1]
+  ret i32 %1
+}
diff --git a/final/test/Transforms/InstCombine/load.ll b/final/test/Transforms/InstCombine/load.ll
new file mode 100644
index 00000000000..d11e08e10de
--- /dev/null
+++ b/final/test/Transforms/InstCombine/load.ll
@@ -0,0 +1,98 @@
+; This test makes sure that these instructions are properly eliminated.
+;
+; RUN: opt < %s -instcombine -S | not grep load
+
+@X = constant i32 42		; <i32*> [#uses=2]
+@X2 = constant i32 47		; <i32*> [#uses=1]
+@Y = constant [2 x { i32, float }] [ { i32, float } { i32 12, float 1.000000e+00 }, { i32, float } { i32 37, float 0x3FF3B2FEC0000000 } ]		; <[2 x { i32, float }]*> [#uses=2]
+@Z = constant [2 x { i32, float }] zeroinitializer		; <[2 x { i32, float }]*> [#uses=1]
+
+@GLOBAL = internal constant [4 x i32] zeroinitializer
+
+
+define i32 @test1() {
+	%B = load i32* @X		; <i32> [#uses=1]
+	ret i32 %B
+}
+
+define float @test2() {
+	%A = getelementptr [2 x { i32, float }]* @Y, i64 0, i64 1, i32 1		; <float*> [#uses=1]
+	%B = load float* %A		; <float> [#uses=1]
+	ret float %B
+}
+
+define i32 @test3() {
+	%A = getelementptr [2 x { i32, float }]* @Y, i64 0, i64 0, i32 0		; <i32*> [#uses=1]
+	%B = load i32* %A		; <i32> [#uses=1]
+	ret i32 %B
+}
+
+define i32 @test4() {
+	%A = getelementptr [2 x { i32, float }]* @Z, i64 0, i64 1, i32 0		; <i32*> [#uses=1]
+	%B = load i32* %A		; <i32> [#uses=1]
+	ret i32 %B
+}
+
+define i32 @test5(i1 %C) {
+	%Y = select i1 %C, i32* @X, i32* @X2		; <i32*> [#uses=1]
+	%Z = load i32* %Y		; <i32> [#uses=1]
+	ret i32 %Z
+}
+
+define i32 @test7(i32 %X) {
+	%V = getelementptr i32* null, i32 %X		; <i32*> [#uses=1]
+	%R = load i32* %V		; <i32> [#uses=1]
+	ret i32 %R
+}
+
+define i32 @test8(i32* %P) {
+	store i32 1, i32* %P
+	%X = load i32* %P		; <i32> [#uses=1]
+	ret i32 %X
+}
+
+define i32 @test9(i32* %P) {
+	%X = load i32* %P		; <i32> [#uses=1]
+	%Y = load i32* %P		; <i32> [#uses=1]
+	%Z = sub i32 %X, %Y		; <i32> [#uses=1]
+	ret i32 %Z
+}
+
+define i32 @test10(i1 %C.upgrd.1, i32* %P, i32* %Q) {
+	br i1 %C.upgrd.1, label %T, label %F
+T:		; preds = %0
+	store i32 1, i32* %Q
+	store i32 0, i32* %P
+	br label %C
+F:		; preds = %0
+	store i32 0, i32* %P
+	br label %C
+C:		; preds = %F, %T
+	%V = load i32* %P		; <i32> [#uses=1]
+	ret i32 %V
+}
+
+define double @test11(double* %p) {
+  %t0 = getelementptr double* %p, i32 1
+  store double 2.0, double* %t0
+  %t1 = getelementptr double* %p, i32 1
+  %x = load double* %t1
+  ret double %x
+}
+
+define i32 @test12(i32* %P) {
+        %A = alloca i32
+        store i32 123, i32* %A
+        ; Cast the result of the load not the source
+        %Q = bitcast i32* %A to i32*
+        %V = load i32* %Q
+        ret i32 %V
+}
+
+define <16 x i8> @test13(<2 x i64> %x) {
+entry:
+	%tmp = load <16 x i8> * bitcast ([4 x i32]* @GLOBAL to <16 x i8>*)
+	ret <16 x i8> %tmp
+}
+
+
diff --git a/final/test/Transforms/InstCombine/load3.ll b/final/test/Transforms/InstCombine/load3.ll
new file mode 100644
index 00000000000..35398e17db8
--- /dev/null
+++ b/final/test/Transforms/InstCombine/load3.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; Instcombine should be able to do trivial CSE of loads.
+
+define i32 @test1(i32* %p) {
+  %t0 = getelementptr i32* %p, i32 1
+  %y = load i32* %t0
+  %t1 = getelementptr i32* %p, i32 1
+  %x = load i32* %t1
+  %a = sub i32 %y, %x
+  ret i32 %a
+; CHECK: @test1
+; CHECK: ret i32 0
+}
+
+
+; PR7429
+@.str = private constant [4 x i8] c"XYZ\00"
+define float @test2() {
+  %tmp = load float* bitcast ([4 x i8]* @.str to float*), align 1
+  ret float %tmp
+  
+; CHECK: @test2
+; CHECK: ret float 0x3806965600000000
+}
\ No newline at end of file
diff --git a/final/test/Transforms/InstCombine/loadstore-alignment.ll b/final/test/Transforms/InstCombine/loadstore-alignment.ll
new file mode 100644
index 00000000000..1d932d27f78
--- /dev/null
+++ b/final/test/Transforms/InstCombine/loadstore-alignment.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -instcombine -S | grep {, align 16} | count 14
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+@x = external global <2 x i64>, align 16
+@xx = external global [13 x <2 x i64>], align 16
+
+define <2 x i64> @static_hem() {
+	%t = getelementptr <2 x i64>* @x, i32 7
+	%tmp1 = load <2 x i64>* %t, align 1
+	ret <2 x i64> %tmp1
+}
+
+define <2 x i64> @hem(i32 %i) {
+	%t = getelementptr <2 x i64>* @x, i32 %i
+	%tmp1 = load <2 x i64>* %t, align 1
+	ret <2 x i64> %tmp1
+}
+
+define <2 x i64> @hem_2d(i32 %i, i32 %j) {
+	%t = getelementptr [13 x <2 x i64>]* @xx, i32 %i, i32 %j
+	%tmp1 = load <2 x i64>* %t, align 1
+	ret <2 x i64> %tmp1
+}
+
+define <2 x i64> @foo() {
+	%tmp1 = load <2 x i64>* @x, align 1
+	ret <2 x i64> %tmp1
+}
+
+define <2 x i64> @bar() {
+	%t = alloca <2 x i64>
+        call void @kip(<2 x i64>* %t)
+	%tmp1 = load <2 x i64>* %t, align 1
+	ret <2 x i64> %tmp1
+}
+
+define void @static_hem_store(<2 x i64> %y) {
+	%t = getelementptr <2 x i64>* @x, i32 7
+	store <2 x i64> %y, <2 x i64>* %t, align 1
+        ret void
+}
+
+define void @hem_store(i32 %i, <2 x i64> %y) {
+	%t = getelementptr <2 x i64>* @x, i32 %i
+	store <2 x i64> %y, <2 x i64>* %t, align 1
+        ret void
+}
+
+define void @hem_2d_store(i32 %i, i32 %j, <2 x i64> %y) {
+	%t = getelementptr [13 x <2 x i64>]* @xx, i32 %i, i32 %j
+	store <2 x i64> %y, <2 x i64>* %t, align 1
+        ret void
+}
+
+define void @foo_store(<2 x i64> %y) {
+	store <2 x i64> %y, <2 x i64>* @x, align 1
+        ret void
+}
+
+define void @bar_store(<2 x i64> %y) {
+	%t = alloca <2 x i64>
+        call void @kip(<2 x i64>* %t)
+	store <2 x i64> %y, <2 x i64>* %t, align 1
+        ret void
+}
+
+declare void @kip(<2 x i64>* %t)
diff --git a/final/test/Transforms/InstCombine/logical-select.ll b/final/test/Transforms/InstCombine/logical-select.ll
new file mode 100644
index 00000000000..bb59817a4f6
--- /dev/null
+++ b/final/test/Transforms/InstCombine/logical-select.ll
@@ -0,0 +1,68 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+
+define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+  %e = icmp slt i32 %a, %b
+  %f = sext i1 %e to i32
+  %g = and i32 %c, %f
+  %h = xor i32 %f, -1
+  %i = and i32 %d, %h
+  %j = or i32 %g, %i
+  ret i32 %j
+; CHECK: %e = icmp slt i32 %a, %b
+; CHECK: %j = select i1 %e, i32 %c, i32 %d
+; CHECK: ret i32 %j
+}
+define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+  %e = icmp slt i32 %a, %b
+  %f = sext i1 %e to i32
+  %g = and i32 %c, %f
+  %h = xor i32 %f, -1
+  %i = and i32 %d, %h
+  %j = or i32 %i, %g
+  ret i32 %j
+; CHECK: %e = icmp slt i32 %a, %b
+; CHECK: %j = select i1 %e, i32 %c, i32 %d
+; CHECK: ret i32 %j
+}
+
+define i32 @goo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+entry:
+  %0 = icmp slt i32 %a, %b
+  %iftmp.0.0 = select i1 %0, i32 -1, i32 0
+  %1 = and i32 %iftmp.0.0, %c
+  %not = xor i32 %iftmp.0.0, -1
+  %2 = and i32 %not, %d
+  %3 = or i32 %1, %2
+  ret i32 %3
+; CHECK: %0 = icmp slt i32 %a, %b
+; CHECK: %1 = select i1 %0, i32 %c, i32 %d
+; CHECK: ret i32 %1
+}
+define i32 @poo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+entry:
+  %0 = icmp slt i32 %a, %b
+  %iftmp.0.0 = select i1 %0, i32 -1, i32 0
+  %1 = and i32 %iftmp.0.0, %c
+  %iftmp = select i1 %0, i32 0, i32 -1
+  %2 = and i32 %iftmp, %d
+  %3 = or i32 %1, %2
+  ret i32 %3
+; CHECK: %0 = icmp slt i32 %a, %b
+; CHECK: %1 = select i1 %0, i32 %c, i32 %d
+; CHECK: ret i32 %1
+}
+
+define i32 @par(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+entry:
+  %0 = icmp slt i32 %a, %b
+  %iftmp.1.0 = select i1 %0, i32 -1, i32 0
+  %1 = and i32 %iftmp.1.0, %c
+  %not = xor i32 %iftmp.1.0, -1
+  %2 = and i32 %not, %d
+  %3 = or i32 %1, %2
+  ret i32 %3
+; CHECK: %0 = icmp slt i32 %a, %b
+; CHECK: %1 = select i1 %0, i32 %c, i32 %d
+; CHECK: ret i32 %1
+}
diff --git a/final/test/Transforms/InstCombine/lshr-phi.ll b/final/test/Transforms/InstCombine/lshr-phi.ll
new file mode 100644
index 00000000000..76a113face0
--- /dev/null
+++ b/final/test/Transforms/InstCombine/lshr-phi.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instcombine -S > %t
+; RUN: not grep lshr %t
+; RUN: grep add %t | count 1
+
+; Instcombine should be able to eliminate the lshr, because only
+; bits in the operand which might be non-zero will be shifted
+; off the end.
+
+define i32 @hash_string(i8* nocapture %key) nounwind readonly {
+entry:
+	%t0 = load i8* %key, align 1		; <i8> [#uses=1]
+	%t1 = icmp eq i8 %t0, 0		; <i1> [#uses=1]
+	br i1 %t1, label %bb2, label %bb
+
+bb:		; preds = %bb, %entry
+	%indvar = phi i64 [ 0, %entry ], [ %tmp, %bb ]		; <i64> [#uses=2]
+	%k.04 = phi i32 [ 0, %entry ], [ %t8, %bb ]		; <i32> [#uses=2]
+	%cp.05 = getelementptr i8* %key, i64 %indvar		; <i8*> [#uses=1]
+	%t2 = shl i32 %k.04, 1		; <i32> [#uses=1]
+	%t3 = lshr i32 %k.04, 14		; <i32> [#uses=1]
+	%t4 = add i32 %t2, %t3		; <i32> [#uses=1]
+	%t5 = load i8* %cp.05, align 1		; <i8> [#uses=1]
+	%t6 = sext i8 %t5 to i32		; <i32> [#uses=1]
+	%t7 = xor i32 %t6, %t4		; <i32> [#uses=1]
+	%t8 = and i32 %t7, 16383		; <i32> [#uses=2]
+	%tmp = add i64 %indvar, 1		; <i64> [#uses=2]
+	%scevgep = getelementptr i8* %key, i64 %tmp		; <i8*> [#uses=1]
+	%t9 = load i8* %scevgep, align 1		; <i8> [#uses=1]
+	%t10 = icmp eq i8 %t9, 0		; <i1> [#uses=1]
+	br i1 %t10, label %bb2, label %bb
+
+bb2:		; preds = %bb, %entry
+	%k.0.lcssa = phi i32 [ 0, %entry ], [ %t8, %bb ]		; <i32> [#uses=1]
+	ret i32 %k.0.lcssa
+}
diff --git a/final/test/Transforms/InstCombine/malloc-free-delete.ll b/final/test/Transforms/InstCombine/malloc-free-delete.ll
new file mode 100644
index 00000000000..317786fc564
--- /dev/null
+++ b/final/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR1201
+define i32 @main(i32 %argc, i8** %argv) {
+        %c_19 = alloca i8*
+        %malloc_206 = malloc i8, i32 10
+; CHECK-NOT: malloc
+        store i8* %malloc_206, i8** %c_19
+        %tmp_207 = load i8** %c_19
+        free i8* %tmp_207
+; CHECK-NOT: free
+        ret i32 0
+; CHECK: ret i32 0
+}
+
+declare i8* @malloc(i32)
+declare void @free(i8*)
+
+define i1 @foo() {
+; CHECK: @foo
+; CHECK-NEXT: ret i1 false
+  %m = call i8* @malloc(i32 1)
+  %z = icmp eq i8* %m, null
+  call void @free(i8* %m)
+  ret i1 %z
+}
diff --git a/final/test/Transforms/InstCombine/malloc.ll b/final/test/Transforms/InstCombine/malloc.ll
new file mode 100644
index 00000000000..b6ebbeaf575
--- /dev/null
+++ b/final/test/Transforms/InstCombine/malloc.ll
@@ -0,0 +1,7 @@
+; test that malloc's with a constant argument are promoted to array allocations
+; RUN: opt < %s -instcombine -S | grep getelementptr
+
+define i32* @test() {
+	%X = malloc i32, i32 4
+	ret i32* %X
+}
diff --git a/final/test/Transforms/InstCombine/malloc2.ll b/final/test/Transforms/InstCombine/malloc2.ll
new file mode 100644
index 00000000000..8462dacf857
--- /dev/null
+++ b/final/test/Transforms/InstCombine/malloc2.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; PR1313
+
+define i32 @test1(i32 %argc, i8* %argv, i8* %envp) {
+        %tmp15.i.i.i23 = malloc [2564 x i32]            ; <[2564 x i32]*> [#uses=1]
+; CHECK-NOT: call i8* @malloc
+        %c = icmp eq [2564 x i32]* %tmp15.i.i.i23, null              ; <i1>:0 [#uses=1]
+        %retval = zext i1 %c to i32             ; <i32> [#uses=1]
+        ret i32 %retval
+; CHECK: ret i32 0
+}
+
+define i32 @test2(i32 %argc, i8* %argv, i8* %envp) {
+        %tmp15.i.i.i23 = malloc [2564 x i32]            ; <[2564 x i32]*> [#uses=1]
+; CHECK-NOT: call i8* @malloc
+        %X = bitcast [2564 x i32]* %tmp15.i.i.i23 to i32*
+        %c = icmp ne i32* %X, null
+        %retval = zext i1 %c to i32             ; <i32> [#uses=1]
+        ret i32 %retval
+; CHECK: ret i32 1
+}
+
diff --git a/final/test/Transforms/InstCombine/malloc3.ll b/final/test/Transforms/InstCombine/malloc3.ll
new file mode 100644
index 00000000000..f1c0cae15ae
--- /dev/null
+++ b/final/test/Transforms/InstCombine/malloc3.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -instcombine -S | not grep load
+; PR1728
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+        %struct.foo = type { %struct.foo*, [10 x i32] }
+@.str = internal constant [21 x i8] c"tmp = %p, next = %p\0A\00"                ; <[21 x i8]*> [#uses=1]
+
+define i32 @main() {
+entry:
+        %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+        %tmp1 = malloc i8, i32 44               ; <i8*> [#uses=1]
+        %tmp12 = bitcast i8* %tmp1 to %struct.foo*              ; <%struct.foo*> [#uses=3]
+        %tmp3 = malloc i8, i32 44               ; <i8*> [#uses=1]
+        %tmp34 = bitcast i8* %tmp3 to %struct.foo*              ; <%struct.foo*> [#uses=1]
+        %tmp6 = getelementptr %struct.foo* %tmp12, i32 0, i32 0         ; <%struct.foo**> [#uses=1]
+        store %struct.foo* %tmp34, %struct.foo** %tmp6, align 4
+        %tmp8 = getelementptr %struct.foo* %tmp12, i32 0, i32 0         ; <%struct.foo**> [#uses=1]
+        %tmp9 = load %struct.foo** %tmp8, align 4               ; <%struct.foo*> [#uses=1]
+        %tmp10 = getelementptr [21 x i8]* @.str, i32 0, i32 0           ; <i8*> [#uses=1]
+        %tmp13 = call i32 (i8*, ...)* @printf( i8* %tmp10, %struct.foo* %tmp12, %struct.foo* %tmp9 )            ; <i32> [#uses=0]
+        ret i32 undef
+}
+
+declare i32 @printf(i8*, ...)
+
diff --git a/final/test/Transforms/InstCombine/memcpy-to-load.ll b/final/test/Transforms/InstCombine/memcpy-to-load.ll
new file mode 100644
index 00000000000..ebb8711af9f
--- /dev/null
+++ b/final/test/Transforms/InstCombine/memcpy-to-load.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | grep {load double}
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+
+define void @foo(double* %X, double* %Y) {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp2 = bitcast double* %X to i8*		; <i8*> [#uses=1]
+	%tmp13 = bitcast double* %Y to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %tmp2, i8* %tmp13, i32 8, i32 1 )
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
diff --git a/final/test/Transforms/InstCombine/memcpy.ll b/final/test/Transforms/InstCombine/memcpy.ll
new file mode 100644
index 00000000000..8a2e3aaad02
--- /dev/null
+++ b/final/test/Transforms/InstCombine/memcpy.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @test1(i8* %a) {
+        tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 false)
+        ret void
+; CHECK: define void @test1
+; CHECK-NEXT: ret void
+}
+
+
+; PR8267
+define void @test2(i8* %a) {
+        tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 true)
+        ret void
+; CHECK: define void @test2
+; CHECK-NEXT: call void @llvm.memcpy
+}
diff --git a/final/test/Transforms/InstCombine/memmove.ll b/final/test/Transforms/InstCombine/memmove.ll
new file mode 100644
index 00000000000..1806cfcb54f
--- /dev/null
+++ b/final/test/Transforms/InstCombine/memmove.ll
@@ -0,0 +1,42 @@
+; This test makes sure that memmove instructions are properly eliminated.
+;
+; RUN: opt < %s -instcombine -S | \
+; RUN:    not grep {call void @llvm.memmove}
+
+@S = internal constant [33 x i8] c"panic: restorelist inconsistency\00"		; <[33 x i8]*> [#uses=1]
+@h = constant [2 x i8] c"h\00"		; <[2 x i8]*> [#uses=1]
+@hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=1]
+@hello_u = constant [8 x i8] c"hello_u\00"		; <[8 x i8]*> [#uses=1]
+
+
+declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
+
+define void @test1(i8* %A, i8* %B, i32 %N) {
+	call void @llvm.memmove.i32( i8* %A, i8* %B, i32 0, i32 1 )
+	ret void
+}
+
+define void @test2(i8* %A, i32 %N) {
+        ;; dest can't alias source since we can't write to source!
+	call void @llvm.memmove.i32( i8* %A, i8* getelementptr ([33 x i8]* @S, i32 0, i32 0), i32 %N, i32 1 )
+	ret void
+}
+
+define i32 @test3() {
+	%h_p = getelementptr [2 x i8]* @h, i32 0, i32 0		; <i8*> [#uses=1]
+	%hel_p = getelementptr [4 x i8]* @hel, i32 0, i32 0		; <i8*> [#uses=1]
+	%hello_u_p = getelementptr [8 x i8]* @hello_u, i32 0, i32 0		; <i8*> [#uses=1]
+	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
+	%target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=3]
+	call void @llvm.memmove.i32( i8* %target_p, i8* %h_p, i32 2, i32 2 )
+	call void @llvm.memmove.i32( i8* %target_p, i8* %hel_p, i32 4, i32 4 )
+	call void @llvm.memmove.i32( i8* %target_p, i8* %hello_u_p, i32 8, i32 8 )
+	ret i32 0
+}
+
+; PR2370
+define void @test4(i8* %a) {
+        tail call void @llvm.memmove.i32( i8* %a, i8* %a, i32 100, i32 1 )
+        ret void
+}
+
diff --git a/final/test/Transforms/InstCombine/memset.ll b/final/test/Transforms/InstCombine/memset.ll
new file mode 100644
index 00000000000..8e85694d9ad
--- /dev/null
+++ b/final/test/Transforms/InstCombine/memset.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | not grep {call.*llvm.memset}
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32)
+
+define i32 @main() {
+	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
+	%target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=5]
+	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 0, i32 1 )
+	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 1, i32 1 )
+	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 2, i32 2 )
+	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 4, i32 4 )
+	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 8, i32 8 )
+	ret i32 0
+}
+
diff --git a/final/test/Transforms/InstCombine/memset2.ll b/final/test/Transforms/InstCombine/memset2.ll
new file mode 100644
index 00000000000..87639f0d6a2
--- /dev/null
+++ b/final/test/Transforms/InstCombine/memset2.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Test to check that instcombine doesn't drop the address space when optimizing
+; memset.
+%struct.Moves = type { [9 x i8], i8, i8, i8, [5 x i8] }
+
+define i32 @test(%struct.Moves addrspace(1)* nocapture %moves) {
+entry:
+; CHECK: bitcast i8 addrspace(1)* %gep to i64 addrspace(1)*
+	%gep = getelementptr inbounds %struct.Moves addrspace(1)* %moves, i32 1, i32 0, i32 9
+	 call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %gep, i8 0, i64 8, i32 1, i1 false)                                                                     
+	ret i32 0
+}
+
+declare void @llvm.memset.p1i8.i64(i8addrspace(1)* nocapture, i8, i64, i32, i1) nounwind
diff --git a/final/test/Transforms/InstCombine/memset_chk.ll b/final/test/Transforms/InstCombine/memset_chk.ll
new file mode 100644
index 00000000000..58ecda582fd
--- /dev/null
+++ b/final/test/Transforms/InstCombine/memset_chk.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; rdar://7719085
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.data = type { [100 x i32], [100 x i32], [1024 x i8] }
+
+define i32 @t() nounwind ssp {
+; CHECK: @t
+; CHECK: @llvm.memset.p0i8.i64
+entry:
+  %0 = alloca %struct.data, align 8               ; <%struct.data*> [#uses=1]
+  %1 = bitcast %struct.data* %0 to i8*            ; <i8*> [#uses=1]
+  %2 = call i8* @__memset_chk(i8* %1, i32 0, i64 1824, i64 1824) nounwind ; <i8*> [#uses=0]
+  ret i32 0
+}
+
+declare i8* @__memset_chk(i8*, i32, i64, i64) nounwind
diff --git a/final/test/Transforms/InstCombine/mul-masked-bits.ll b/final/test/Transforms/InstCombine/mul-masked-bits.ll
new file mode 100644
index 00000000000..a43d5f20bea
--- /dev/null
+++ b/final/test/Transforms/InstCombine/mul-masked-bits.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | grep ashr
+
+define i32 @foo(i32 %x, i32 %y) {
+  %a = and i32 %x, 7
+  %b = and i32 %y, 7
+  %c = mul i32 %a, %b
+  %d = shl i32 %c, 26
+  %e = ashr i32 %d, 26
+  ret i32 %e
+}
diff --git a/final/test/Transforms/InstCombine/mul.ll b/final/test/Transforms/InstCombine/mul.ll
new file mode 100644
index 00000000000..53a56434aed
--- /dev/null
+++ b/final/test/Transforms/InstCombine/mul.ll
@@ -0,0 +1,116 @@
+; This test makes sure that mul instructions are properly eliminated.
+; RUN: opt < %s -instcombine -S | not grep mul
+
+define i32 @test1(i32 %A) {
+        %B = mul i32 %A, 1              ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i32 @test2(i32 %A) {
+        ; Should convert to an add instruction
+        %B = mul i32 %A, 2              ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i32 @test3(i32 %A) {
+        ; This should disappear entirely
+        %B = mul i32 %A, 0              ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define double @test4(double %A) {
+        ; This is safe for FP
+        %B = fmul double 1.000000e+00, %A                ; <double> [#uses=1]
+        ret double %B
+}
+
+define i32 @test5(i32 %A) {
+        %B = mul i32 %A, 8              ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i8 @test6(i8 %A) {
+        %B = mul i8 %A, 8               ; <i8> [#uses=1]
+        %C = mul i8 %B, 8               ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+define i32 @test7(i32 %i) {
+        %tmp = mul i32 %i, -1           ; <i32> [#uses=1]
+        ret i32 %tmp
+}
+
+define i64 @test8(i64 %i) {
+       ; tmp = sub 0, %i
+        %j = mul i64 %i, -1             ; <i64> [#uses=1]
+        ret i64 %j
+}
+
+define i32 @test9(i32 %i) {
+        ; %j = sub 0, %i
+        %j = mul i32 %i, -1             ; <i32> [#uses=1]
+        ret i32 %j
+}
+
+define i32 @test10(i32 %a, i32 %b) {
+        %c = icmp slt i32 %a, 0         ; <i1> [#uses=1]
+        %d = zext i1 %c to i32          ; <i32> [#uses=1]
+       ; e = b & (a >> 31)
+        %e = mul i32 %d, %b             ; <i32> [#uses=1]
+        ret i32 %e
+}
+
+define i32 @test11(i32 %a, i32 %b) {
+        %c = icmp sle i32 %a, -1                ; <i1> [#uses=1]
+        %d = zext i1 %c to i32          ; <i32> [#uses=1]
+        ; e = b & (a >> 31)
+        %e = mul i32 %d, %b             ; <i32> [#uses=1]
+        ret i32 %e
+}
+
+define i32 @test12(i8 %a, i32 %b) {
+        %c = icmp ugt i8 %a, 127                ; <i1> [#uses=1]
+        %d = zext i1 %c to i32          ; <i32> [#uses=1]
+        ; e = b & (a >> 31)
+        %e = mul i32 %d, %b             ; <i32> [#uses=1]
+        ret i32 %e
+}
+
+; PR2642
+define internal void @test13(<4 x float>*) {
+	load <4 x float>* %0, align 1
+	fmul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >
+	store <4 x float> %3, <4 x float>* %0, align 1
+	ret void
+}
+
+define <16 x i8> @test14(<16 x i8> %a) {
+        %b = mul <16 x i8> %a, zeroinitializer
+        ret <16 x i8> %b
+}
+
+; rdar://7293527
+define i32 @test15(i32 %A, i32 %B) {
+entry:
+  %shl = shl i32 1, %B
+  %m = mul i32 %shl, %A
+  ret i32 %m
+}
+
+; X * Y (when Y is 0 or 1) --> x & (0-Y)
+define i32 @test16(i32 %b, i1 %c) {
+        %d = zext i1 %c to i32          ; <i32> [#uses=1]
+        ; e = b & (a >> 31)
+        %e = mul i32 %d, %b             ; <i32> [#uses=1]
+        ret i32 %e
+}
+
+; X * Y (when Y is 0 or 1) --> x & (0-Y)
+define i32 @test17(i32 %a, i32 %b) {
+  %a.lobit = lshr i32 %a, 31
+  %e = mul i32 %a.lobit, %b
+  ret i32 %e
+}
+
+
+
diff --git a/final/test/Transforms/InstCombine/multi-use-or.ll b/final/test/Transforms/InstCombine/multi-use-or.ll
new file mode 100644
index 00000000000..8c6a0e0bbce
--- /dev/null
+++ b/final/test/Transforms/InstCombine/multi-use-or.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -instcombine -S | grep {fadd double .sx, .sy}
+; The 'or' has multiple uses, make sure that this doesn't prevent instcombine
+; from propagating the extends to the truncs.
+
+define double @ScaleObjectAdd(double %sx, double %sy, double %sz) nounwind {
+entry:
+        %sx34 = bitcast double %sx to i64               ; <i64> [#uses=1]
+        %sx3435 = zext i64 %sx34 to i192                ; <i192> [#uses=1]
+        %sy22 = bitcast double %sy to i64               ; <i64> [#uses=1]
+        %sy2223 = zext i64 %sy22 to i192                ; <i192> [#uses=1]
+        %sy222324 = shl i192 %sy2223, 128               ; <i192> [#uses=1]
+        %sy222324.ins = or i192 %sx3435, %sy222324              ; <i192> [#uses=1]
+        
+        
+        %a = trunc i192 %sy222324.ins to i64            ; <i64> [#uses=1]
+        %b = bitcast i64 %a to double           ; <double> [#uses=1]
+        %c = lshr i192 %sy222324.ins, 128               ; <i192> [#uses=1]
+        %d = trunc i192 %c to i64               ; <i64> [#uses=1]
+        %e = bitcast i64 %d to double           ; <double> [#uses=1]
+        %f = fadd double %b, %e
+
+;        ret double %e
+        ret double %f
+}
diff --git a/final/test/Transforms/InstCombine/narrow.ll b/final/test/Transforms/InstCombine/narrow.ll
new file mode 100644
index 00000000000..1b96a06eeb7
--- /dev/null
+++ b/final/test/Transforms/InstCombine/narrow.ll
@@ -0,0 +1,18 @@
+; This file contains various testcases that check to see that instcombine
+; is narrowing computations when possible.
+; RUN: opt < %s -instcombine -S | \
+; RUN:    grep {ret i1 false}
+
+; test1 - Eliminating the casts in this testcase (by narrowing the AND
+; operation) allows instcombine to realize the function always returns false.
+;
+define i1 @test1(i32 %A, i32 %B) {
+        %C1 = icmp slt i32 %A, %B               ; <i1> [#uses=1]
+        %ELIM1 = zext i1 %C1 to i32             ; <i32> [#uses=1]
+        %C2 = icmp sgt i32 %A, %B               ; <i1> [#uses=1]
+        %ELIM2 = zext i1 %C2 to i32             ; <i32> [#uses=1]
+        %C3 = and i32 %ELIM1, %ELIM2            ; <i32> [#uses=1]
+        %ELIM3 = trunc i32 %C3 to i1            ; <i1> [#uses=1]
+        ret i1 %ELIM3
+}
+
diff --git a/final/test/Transforms/InstCombine/neon-intrinsics.ll b/final/test/Transforms/InstCombine/neon-intrinsics.ll
new file mode 100644
index 00000000000..3ad09cc6c69
--- /dev/null
+++ b/final/test/Transforms/InstCombine/neon-intrinsics.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; The alignment arguments for NEON load/store intrinsics can be increased
+; by instcombine.  Check for this.
+
+; CHECK: vld4.v2i32({{.*}}, i32 32)
+; CHECK: vst4.v2i32({{.*}}, i32 16)
+
+@x = common global [8 x i32] zeroinitializer, align 32
+@y = common global [8 x i32] zeroinitializer, align 16
+
+%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
+
+define void @test() nounwind ssp {
+  %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8* bitcast ([8 x i32]* @x to i8*), i32 1)
+  %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
+  %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 1
+  %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
+  %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 3
+  call void @llvm.arm.neon.vst4.v2i32(i8* bitcast ([8 x i32]* @y to i8*), <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1)
+  ret void
+}
+
+declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32(i8*, i32) nounwind readonly
+declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
diff --git a/final/test/Transforms/InstCombine/no-negzero.ll b/final/test/Transforms/InstCombine/no-negzero.ll
new file mode 100644
index 00000000000..f295130b0ea
--- /dev/null
+++ b/final/test/Transforms/InstCombine/no-negzero.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; ModuleID = '3555a.c'
+; sqrt(fabs) cannot be negative zero, so we should eliminate the fadd.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.8"
+
+; CHECK: @mysqrt
+; CHECK-NOT: fadd
+; CHECK: ret
+define double @mysqrt(double %x) nounwind {
+entry:
+  %x_addr = alloca double                         ; <double*> [#uses=2]
+  %retval = alloca double, align 8                ; <double*> [#uses=2]
+  %0 = alloca double, align 8                     ; <double*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store double %x, double* %x_addr
+  %1 = load double* %x_addr, align 8              ; <double> [#uses=1]
+  %2 = call double @fabs(double %1) nounwind readnone ; <double> [#uses=1]
+  %3 = call double @sqrt(double %2) nounwind readonly ; <double> [#uses=1]
+  %4 = fadd double %3, 0.000000e+00               ; <double> [#uses=1]
+  store double %4, double* %0, align 8
+  %5 = load double* %0, align 8                   ; <double> [#uses=1]
+  store double %5, double* %retval, align 8
+  br label %return
+
+return:                                           ; preds = %entry
+  %retval1 = load double* %retval                 ; <double> [#uses=1]
+  ret double %retval1
+}
+
+declare double @fabs(double)
+
+declare double @sqrt(double) nounwind readonly
diff --git a/final/test/Transforms/InstCombine/not-fcmp.ll b/final/test/Transforms/InstCombine/not-fcmp.ll
new file mode 100644
index 00000000000..ad01a6bdf1b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/not-fcmp.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | grep "fcmp uge"
+; PR1570
+
+define i1 @f(float %X, float %Y) {
+entry:
+        %tmp3 = fcmp olt float %X, %Y           ; <i1> [#uses=1]
+        %toBoolnot5 = xor i1 %tmp3, true                ; <i1> [#uses=1]
+        ret i1 %toBoolnot5
+}
+
diff --git a/final/test/Transforms/InstCombine/not.ll b/final/test/Transforms/InstCombine/not.ll
new file mode 100644
index 00000000000..c58ce11b438
--- /dev/null
+++ b/final/test/Transforms/InstCombine/not.ll
@@ -0,0 +1,54 @@
+; This test makes sure that these instructions are properly eliminated.
+;
+
+; RUN: opt < %s -instcombine -S | not grep xor
+
+define i32 @test1(i32 %A) {
+        %B = xor i32 %A, -1             ; <i32> [#uses=1]
+        %C = xor i32 %B, -1             ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+define i1 @test2(i32 %A, i32 %B) {
+        ; Can change into setge
+        %cond = icmp sle i32 %A, %B             ; <i1> [#uses=1]
+        %Ret = xor i1 %cond, true               ; <i1> [#uses=1]
+        ret i1 %Ret
+}
+
+; Test that demorgans law can be instcombined
+define i32 @test3(i32 %A, i32 %B) {
+        %a = xor i32 %A, -1             ; <i32> [#uses=1]
+        %b = xor i32 %B, -1             ; <i32> [#uses=1]
+        %c = and i32 %a, %b             ; <i32> [#uses=1]
+        %d = xor i32 %c, -1             ; <i32> [#uses=1]
+        ret i32 %d
+}
+
+; Test that demorgens law can work with constants
+define i32 @test4(i32 %A, i32 %B) {
+        %a = xor i32 %A, -1             ; <i32> [#uses=1]
+        %c = and i32 %a, 5              ; <i32> [#uses=1]
+        %d = xor i32 %c, -1             ; <i32> [#uses=1]
+        ret i32 %d
+}
+
+; test the mirror of demorgans law...
+define i32 @test5(i32 %A, i32 %B) {
+        %a = xor i32 %A, -1             ; <i32> [#uses=1]
+        %b = xor i32 %B, -1             ; <i32> [#uses=1]
+        %c = or i32 %a, %b              ; <i32> [#uses=1]
+        %d = xor i32 %c, -1             ; <i32> [#uses=1]
+        ret i32 %d
+}
+
+; PR2298
+define i8 @test6(i32 %a, i32 %b) zeroext nounwind  {
+entry:
+	%tmp1not = xor i32 %a, -1		; <i32> [#uses=1]
+	%tmp2not = xor i32 %b, -1		; <i32> [#uses=1]
+	%tmp3 = icmp slt i32 %tmp1not, %tmp2not		; <i1> [#uses=1]
+	%retval67 = zext i1 %tmp3 to i8		; <i8> [#uses=1]
+	ret i8 %retval67
+}
+
diff --git a/final/test/Transforms/InstCombine/nothrow.ll b/final/test/Transforms/InstCombine/nothrow.ll
new file mode 100644
index 00000000000..08d90bfbd7d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/nothrow.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | not grep call
+; rdar://6880732
+declare double @t1(i32) readonly
+
+define void @t2() nounwind {
+  call double @t1(i32 42)  ;; dead call even though callee is not nothrow.
+  ret void
+}
diff --git a/final/test/Transforms/InstCombine/nsw.ll b/final/test/Transforms/InstCombine/nsw.ll
new file mode 100644
index 00000000000..681bdc234b7
--- /dev/null
+++ b/final/test/Transforms/InstCombine/nsw.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: @sub1
+; CHECK: %y = sub i32 0, %x
+; CHECK: %z = sdiv i32 %y, 337
+; CHECK: ret i32 %z
+define i32 @sub1(i32 %x) {
+  %y = sub i32 0, %x
+  %z = sdiv i32 %y, 337
+  ret i32 %z
+}
+
+; CHECK: @sub2
+; CHECK: %z = sdiv i32 %x, -337
+; CHECK: ret i32 %z
+define i32 @sub2(i32 %x) {
+  %y = sub nsw i32 0, %x
+  %z = sdiv i32 %y, 337
+  ret i32 %z
+}
+
+; CHECK: @shl_icmp
+; CHECK: %B = icmp eq i64 %X, 0
+; CHECK: ret i1 %B
+define i1 @shl_icmp(i64 %X) nounwind {
+  %A = shl nuw i64 %X, 2   ; X/4
+  %B = icmp eq i64 %A, 0
+  ret i1 %B
+}
+
+; CHECK: @shl1
+; CHECK: %B = shl nuw nsw i64 %A, 8
+; CHECK: ret i64 %B
+define i64 @shl1(i64 %X, i64* %P) nounwind {
+  %A = and i64 %X, 312
+  store i64 %A, i64* %P  ; multiple uses of A.
+  %B = shl i64 %A, 8
+  ret i64 %B
+}
diff --git a/final/test/Transforms/InstCombine/objsize.ll b/final/test/Transforms/InstCombine/objsize.ll
new file mode 100644
index 00000000000..043525b7555
--- /dev/null
+++ b/final/test/Transforms/InstCombine/objsize.ll
@@ -0,0 +1,162 @@
+; Test a pile of objectsize bounds checking.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; We need target data to get the sizes of the arrays and structures.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = private global [60 x i8] zeroinitializer, align 1 ; <[60 x i8]*>
+@.str = private constant [8 x i8] c"abcdefg\00"   ; <[8 x i8]*>
+
+define i32 @foo() nounwind {
+; CHECK: @foo
+; CHECK-NEXT: ret i32 60
+  %1 = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
+  ret i32 %1
+}
+
+define i8* @bar() nounwind {
+; CHECK: @bar
+entry:
+  %retval = alloca i8*
+  %0 = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i1 false)
+  %cmp = icmp ne i32 %0, -1
+; CHECK: br i1 true
+  br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:
+  %1 = load i8** %retval
+  ret i8* %1
+
+cond.false:
+  %2 = load i8** %retval
+  ret i8* %2
+}
+
+define i32 @f() nounwind {
+; CHECK: @f
+; CHECK-NEXT: ret i32 0
+  %1 = call i32 @llvm.objectsize.i32(i8* getelementptr ([60 x i8]* @a, i32 1, i32 0), i1 false)
+  ret i32 %1
+}
+
+@window = external global [0 x i8]
+
+define i1 @baz() nounwind {
+; CHECK: @baz
+; CHECK-NEXT: ret i1 true
+  %1 = tail call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 0), i1 false)
+  %2 = icmp eq i32 %1, -1
+  ret i1 %2
+}
+
+define void @test1(i8* %q, i32 %x) nounwind noinline {
+; CHECK: @test1
+; CHECK: objectsize.i32
+entry:
+  %0 = call i32 @llvm.objectsize.i32(i8* getelementptr inbounds ([0 x i8]* @window, i32 0, i32 10), i1 false) ; <i64> [#uses=1]
+  %1 = icmp eq i32 %0, -1                         ; <i1> [#uses=1]
+  br i1 %1, label %"47", label %"46"
+
+"46":                                             ; preds = %entry
+  unreachable
+
+"47":                                             ; preds = %entry
+  unreachable
+}
+
+@.str5 = private constant [9 x i32] [i32 97, i32 98, i32 99, i32 100, i32 0, i32
+ 101, i32 102, i32 103, i32 0], align 4
+define i32 @test2() nounwind {
+; CHECK: @test2
+; CHECK-NEXT: ret i32 34
+  %1 = call i32 @llvm.objectsize.i32(i8* getelementptr (i8* bitcast ([9 x i32]* @.str5 to i8*), i32 2), i1 false)
+  ret i32 %1
+}
+
+; rdar://7674946
+@array = internal global [480 x float] zeroinitializer ; <[480 x float]*> [#uses=1]
+
+declare i8* @__memcpy_chk(i8*, i8*, i32, i32) nounwind
+
+declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+
+declare i8* @__inline_memcpy_chk(i8*, i8*, i32) nounwind inlinehint
+
+define void @test3() nounwind {
+; CHECK: @test3
+entry:
+  br i1 undef, label %bb11, label %bb12
+
+bb11:
+  %0 = getelementptr inbounds float* getelementptr inbounds ([480 x float]* @array, i32 0, i32 128), i32 -127 ; <float*> [#uses=1]
+  %1 = bitcast float* %0 to i8*                   ; <i8*> [#uses=1]
+  %2 = call i32 @llvm.objectsize.i32(i8* %1, i1 false) ; <i32> [#uses=1]
+  %3 = call i8* @__memcpy_chk(i8* undef, i8* undef, i32 512, i32 %2) nounwind ; <i8*> [#uses=0]
+; CHECK: unreachable
+  unreachable
+
+bb12:
+  %4 = getelementptr inbounds float* getelementptr inbounds ([480 x float]* @array, i32 0, i32 128), i32 -127 ; <float*> [#uses=1]
+  %5 = bitcast float* %4 to i8*                   ; <i8*> [#uses=1]
+  %6 = call i8* @__inline_memcpy_chk(i8* %5, i8* undef, i32 512) nounwind inlinehint ; <i8*> [#uses=0]
+; CHECK: @__inline_memcpy_chk
+  unreachable
+}
+
+; rdar://7718857
+
+%struct.data = type { [100 x i32], [100 x i32], [1024 x i8] }
+
+define i32 @test4() nounwind ssp {
+; CHECK: @test4
+entry:
+  %0 = alloca %struct.data, align 8
+  %1 = bitcast %struct.data* %0 to i8*
+  %2 = call i32 @llvm.objectsize.i32(i8* %1, i1 false) nounwind
+; CHECK-NOT: @llvm.objectsize
+; CHECK: @llvm.memset.p0i8.i32(i8* %1, i8 0, i32 1824, i32 8, i1 false)
+  %3 = call i8* @__memset_chk(i8* %1, i32 0, i32 1824, i32 %2) nounwind
+  ret i32 0
+}
+
+; rdar://7782496
+@s = external global i8*
+
+define void @test5(i32 %n) nounwind ssp {
+; CHECK: @test5
+entry:
+  %0 = tail call noalias i8* @malloc(i32 20) nounwind
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %0, i1 false)
+  %2 = load i8** @s, align 8
+; CHECK-NOT: @llvm.objectsize
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 10, i32 1, i1 false)
+  %3 = tail call i8* @__memcpy_chk(i8* %0, i8* %2, i32 10, i32 %1) nounwind
+  ret void
+}
+
+define void @test6(i32 %n) nounwind ssp {
+; CHECK: @test6
+entry:
+  %0 = tail call noalias i8* @malloc(i32 20) nounwind
+  %1 = tail call i32 @llvm.objectsize.i32(i8* %0, i1 false)
+  %2 = load i8** @s, align 8
+; CHECK-NOT: @llvm.objectsize
+; CHECK: @__memcpy_chk(i8* %0, i8* %1, i32 30, i32 20)
+  %3 = tail call i8* @__memcpy_chk(i8* %0, i8* %2, i32 30, i32 %1) nounwind
+  ret void
+}
+
+declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
+
+declare noalias i8* @malloc(i32) nounwind
+
+declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+
+define i32 @test7() {
+; CHECK: @test7
+  %alloc = call noalias i8* @malloc(i32 48) nounwind
+  %gep = getelementptr inbounds i8* %alloc, i32 16
+  %objsize = call i32 @llvm.objectsize.i32(i8* %gep, i1 false) nounwind readonly
+; CHECK-NEXT: ret i32 32
+  ret i32 %objsize
+}
+
diff --git a/final/test/Transforms/InstCombine/odr-linkage.ll b/final/test/Transforms/InstCombine/odr-linkage.ll
new file mode 100644
index 00000000000..61365b4848a
--- /dev/null
+++ b/final/test/Transforms/InstCombine/odr-linkage.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | grep {ret i32 10}
+
+@g1 = available_externally constant i32 1
+@g2 = linkonce_odr constant i32 2
+@g3 = weak_odr constant i32 3
+@g4 = internal constant i32 4
+
+define i32 @test() {
+  %A = load i32* @g1
+  %B = load i32* @g2
+  %C = load i32* @g3
+  %D = load i32* @g4
+  
+  %a = add i32 %A, %B
+  %b = add i32 %a, %C
+  %c = add i32 %b, %D
+  ret i32 %c
+}
+   
diff --git a/final/test/Transforms/InstCombine/or-fcmp.ll b/final/test/Transforms/InstCombine/or-fcmp.ll
new file mode 100644
index 00000000000..09a3c994d93
--- /dev/null
+++ b/final/test/Transforms/InstCombine/or-fcmp.ll
@@ -0,0 +1,58 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; CHECK: @t1
+define zeroext i8 @t1(float %x, float %y) nounwind {
+       %a = fcmp ueq float %x, %y             ; <i1> [#uses=1]
+       %b = fcmp uno float %x, %y               ; <i1> [#uses=1]
+       %c = or i1 %a, %b
+; CHECK-NOT: fcmp uno
+; CHECK: fcmp ueq
+       %retval = zext i1 %c to i8
+       ret i8 %retval
+}
+
+; CHECK: @t2
+define zeroext i8 @t2(float %x, float %y) nounwind {
+       %a = fcmp olt float %x, %y             ; <i1> [#uses=1]
+       %b = fcmp oeq float %x, %y               ; <i1> [#uses=1]
+; CHECK-NOT: fcmp olt
+; CHECK-NOT: fcmp oeq
+; CHECK: fcmp ole
+       %c = or i1 %a, %b
+       %retval = zext i1 %c to i8
+       ret i8 %retval
+}
+
+; CHECK: @t3
+define zeroext i8 @t3(float %x, float %y) nounwind {
+       %a = fcmp ult float %x, %y             ; <i1> [#uses=1]
+       %b = fcmp uge float %x, %y               ; <i1> [#uses=1]
+       %c = or i1 %a, %b
+       %retval = zext i1 %c to i8
+; CHECK: ret i8 1
+       ret i8 %retval
+}
+
+; CHECK: @t4
+define zeroext i8 @t4(float %x, float %y) nounwind {
+       %a = fcmp ult float %x, %y             ; <i1> [#uses=1]
+       %b = fcmp ugt float %x, %y               ; <i1> [#uses=1]
+       %c = or i1 %a, %b
+; CHECK-NOT: fcmp ult
+; CHECK-NOT: fcmp ugt
+; CHECK: fcmp une
+       %retval = zext i1 %c to i8
+       ret i8 %retval
+}
+
+; CHECK: @t5
+define zeroext i8 @t5(float %x, float %y) nounwind {
+       %a = fcmp olt float %x, %y             ; <i1> [#uses=1]
+       %b = fcmp oge float %x, %y               ; <i1> [#uses=1]
+       %c = or i1 %a, %b
+; CHECK-NOT: fcmp olt
+; CHECK-NOT: fcmp oge
+; CHECK: fcmp ord
+       %retval = zext i1 %c to i8
+       ret i8 %retval
+}
diff --git a/final/test/Transforms/InstCombine/or-to-xor.ll b/final/test/Transforms/InstCombine/or-to-xor.ll
new file mode 100644
index 00000000000..1495ee49709
--- /dev/null
+++ b/final/test/Transforms/InstCombine/or-to-xor.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -instcombine -S | grep {xor i32 %a, %b} | count 4
+; RUN: opt < %s -instcombine -S | not grep {and}
+
+define i32 @func1(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%b_not = xor i32 %b, -1
+	%0 = and i32 %a, %b_not
+	%a_not = xor i32 %a, -1
+	%1 = and i32 %a_not, %b
+	%2 = or i32 %0, %1
+	ret i32 %2
+}
+
+define i32 @func2(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%b_not = xor i32 %b, -1
+	%0 = and i32 %b_not, %a
+	%a_not = xor i32 %a, -1
+	%1 = and i32 %a_not, %b
+	%2 = or i32 %0, %1
+	ret i32 %2
+}
+
+define i32 @func3(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%b_not = xor i32 %b, -1
+	%0 = and i32 %a, %b_not
+	%a_not = xor i32 %a, -1
+	%1 = and i32 %b, %a_not
+	%2 = or i32 %0, %1
+	ret i32 %2
+}
+
+define i32 @func4(i32 %a, i32 %b) nounwind readnone {
+entry:
+	%b_not = xor i32 %b, -1
+	%0 = and i32 %b_not, %a
+	%a_not = xor i32 %a, -1
+	%1 = and i32 %b, %a_not
+	%2 = or i32 %0, %1
+	ret i32 %2
+}
diff --git a/final/test/Transforms/InstCombine/or-xor.ll b/final/test/Transforms/InstCombine/or-xor.ll
new file mode 100644
index 00000000000..f496dd48c40
--- /dev/null
+++ b/final/test/Transforms/InstCombine/or-xor.ll
@@ -0,0 +1,94 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define i32 @test1(i32 %x, i32 %y) nounwind {
+  %or = or i32 %x, %y
+  %not = xor i32 %or, -1
+  %z = or i32 %x, %not
+  ret i32 %z
+; CHECK: @test1
+; CHECK-NEXT: %y.not = xor i32 %y, -1
+; CHECK-NEXT: %z = or i32 %y.not, %x
+; CHECK-NEXT: ret i32 %z
+}
+
+define i32 @test2(i32 %x, i32 %y) nounwind {
+  %or = or i32 %x, %y
+  %not = xor i32 %or, -1
+  %z = or i32 %y, %not
+  ret i32 %z
+; CHECK: @test2
+; CHECK-NEXT: %x.not = xor i32 %x, -1
+; CHECK-NEXT: %z = or i32 %x.not, %y
+; CHECK-NEXT: ret i32 %z
+}
+
+define i32 @test3(i32 %x, i32 %y) nounwind {
+  %xor = xor i32 %x, %y
+  %not = xor i32 %xor, -1
+  %z = or i32 %x, %not
+  ret i32 %z
+; CHECK: @test3
+; CHECK-NEXT: %y.not = xor i32 %y, -1
+; CHECK-NEXT: %z = or i32 %y.not, %x
+; CHECK-NEXT: ret i32 %z
+}
+
+define i32 @test4(i32 %x, i32 %y) nounwind {
+  %xor = xor i32 %x, %y
+  %not = xor i32 %xor, -1
+  %z = or i32 %y, %not
+  ret i32 %z
+; CHECK: @test4
+; CHECK-NEXT: %x.not = xor i32 %x, -1
+; CHECK-NEXT: %z = or i32 %x.not, %y
+; CHECK-NEXT: ret i32 %z
+}
+
+define i32 @test5(i32 %x, i32 %y) nounwind {
+  %and = and i32 %x, %y
+  %not = xor i32 %and, -1
+  %z = or i32 %x, %not
+  ret i32 %z
+; CHECK: @test5
+; CHECK-NEXT: ret i32 -1
+}
+
+define i32 @test6(i32 %x, i32 %y) nounwind {
+  %and = and i32 %x, %y
+  %not = xor i32 %and, -1
+  %z = or i32 %y, %not
+  ret i32 %z
+; CHECK: @test6
+; CHECK-NEXT: ret i32 -1
+}
+
+define i32 @test7(i32 %x, i32 %y) nounwind {
+  %xor = xor i32 %x, %y
+  %z = or i32 %y, %xor
+  ret i32 %z
+; CHECK: @test7
+; CHECK-NEXT: %z = or i32 %x, %y
+; CHECK-NEXT: ret i32 %z
+}
+
+define i32 @test8(i32 %x, i32 %y) nounwind {
+  %not = xor i32 %y, -1
+  %xor = xor i32 %x, %not
+  %z = or i32 %y, %xor
+  ret i32 %z
+; CHECK: @test8
+; CHECK-NEXT: %x.not = xor i32 %x, -1
+; CHECK-NEXT: %z = or i32 %x.not, %y
+; CHECK-NEXT: ret i32 %z
+}
+
+define i32 @test9(i32 %x, i32 %y) nounwind {
+  %not = xor i32 %x, -1
+  %xor = xor i32 %not, %y
+  %z = or i32 %x, %xor
+  ret i32 %z
+; CHECK: @test9
+; CHECK-NEXT: %y.not = xor i32 %y, -1
+; CHECK-NEXT: %z = or i32 %y.not, %x
+; CHECK-NEXT: ret i32 %z
+}
diff --git a/final/test/Transforms/InstCombine/or.ll b/final/test/Transforms/InstCombine/or.ll
new file mode 100644
index 00000000000..f82f9faab2d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/or.ll
@@ -0,0 +1,392 @@
+; This test makes sure that these instructions are properly eliminated.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+define i32 @test1(i32 %A) {
+        %B = or i32 %A, 0
+        ret i32 %B
+; CHECK: @test1
+; CHECK: ret i32 %A
+}
+
+define i32 @test2(i32 %A) {
+        %B = or i32 %A, -1 
+        ret i32 %B
+; CHECK: @test2
+; CHECK: ret i32 -1
+}
+
+define i8 @test2a(i8 %A) {
+        %B = or i8 %A, -1  
+        ret i8 %B
+; CHECK: @test2a
+; CHECK: ret i8 -1
+}
+
+define i1 @test3(i1 %A) {
+        %B = or i1 %A, false
+        ret i1 %B
+; CHECK: @test3
+; CHECK: ret i1 %A
+}
+
+define i1 @test4(i1 %A) {
+        %B = or i1 %A, true 
+        ret i1 %B
+; CHECK: @test4
+; CHECK: ret i1 true
+}
+
+define i1 @test5(i1 %A) {
+        %B = or i1 %A, %A   
+        ret i1 %B
+; CHECK: @test5
+; CHECK: ret i1 %A
+}
+
+define i32 @test6(i32 %A) {
+        %B = or i32 %A, %A  
+        ret i32 %B
+; CHECK: @test6
+; CHECK: ret i32 %A
+}
+
+; A | ~A == -1
+define i32 @test7(i32 %A) {
+        %NotA = xor i32 -1, %A
+        %B = or i32 %A, %NotA
+        ret i32 %B
+; CHECK: @test7
+; CHECK: ret i32 -1
+}
+
+define i8 @test8(i8 %A) {
+        %B = or i8 %A, -2
+        %C = or i8 %B, 1
+        ret i8 %C
+; CHECK: @test8
+; CHECK: ret i8 -1
+}
+
+; Test that (A|c1)|(B|c2) == (A|B)|(c1|c2)
+define i8 @test9(i8 %A, i8 %B) {
+        %C = or i8 %A, 1
+        %D = or i8 %B, -2
+        %E = or i8 %C, %D
+        ret i8 %E
+; CHECK: @test9
+; CHECK: ret i8 -1
+}
+
+define i8 @test10(i8 %A) {
+        %B = or i8 %A, 1
+        %C = and i8 %B, -2
+        ; (X & C1) | C2 --> (X | C2) & (C1|C2)
+        %D = or i8 %C, -2
+        ret i8 %D
+; CHECK: @test10
+; CHECK: ret i8 -2
+}
+
+define i8 @test11(i8 %A) {
+        %B = or i8 %A, -2
+        %C = xor i8 %B, 13
+        ; (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
+        %D = or i8 %C, 1
+        %E = xor i8 %D, 12
+        ret i8 %E
+; CHECK: @test11
+; CHECK: ret i8 -1
+}
+
+define i32 @test12(i32 %A) {
+        ; Should be eliminated
+        %B = or i32 %A, 4
+        %C = and i32 %B, 8
+        ret i32 %C
+; CHECK: @test12
+; CHECK: %C = and i32 %A, 8
+; CHECK: ret i32 %C
+}
+
+define i32 @test13(i32 %A) {
+        %B = or i32 %A, 12
+        ; Always equal to 8
+        %C = and i32 %B, 8
+        ret i32 %C
+; CHECK: @test13
+; CHECK: ret i32 8
+}
+
+define i1 @test14(i32 %A, i32 %B) {
+        %C1 = icmp ult i32 %A, %B
+        %C2 = icmp ugt i32 %A, %B
+        ; (A < B) | (A > B) === A != B
+        %D = or i1 %C1, %C2
+        ret i1 %D
+; CHECK: @test14
+; CHECK: icmp ne i32 %A, %B
+; CHECK: ret i1
+}
+
+define i1 @test15(i32 %A, i32 %B) {
+        %C1 = icmp ult i32 %A, %B
+        %C2 = icmp eq i32 %A, %B
+        ; (A < B) | (A == B) === A <= B
+        %D = or i1 %C1, %C2
+        ret i1 %D
+; CHECK: @test15
+; CHECK:  icmp ule i32 %A, %B
+; CHECK: ret i1
+}
+
+define i32 @test16(i32 %A) {
+        %B = and i32 %A, 1
+        ; -2 = ~1
+        %C = and i32 %A, -2
+        ; %D = and int %B, -1 == %B
+        %D = or i32 %B, %C
+        ret i32 %D
+; CHECK: @test16
+; CHECK: ret i32 %A
+}
+
+define i32 @test17(i32 %A) {
+        %B = and i32 %A, 1
+        %C = and i32 %A, 4
+        ; %D = and int %B, 5
+        %D = or i32 %B, %C
+        ret i32 %D
+; CHECK: @test17
+; CHECK: %D = and i32 %A, 5
+; CHECK: ret i32 %D
+}
+
+define i1 @test18(i32 %A) {
+        %B = icmp sge i32 %A, 100
+        %C = icmp slt i32 %A, 50
+        ;; (A-50) >u 50
+        %D = or i1 %B, %C
+        ret i1 %D
+; CHECK: @test18
+; CHECK: add i32
+; CHECK:  icmp ugt 
+; CHECK: ret i1 
+}
+
+define i1 @test19(i32 %A) {
+        %B = icmp eq i32 %A, 50
+        %C = icmp eq i32 %A, 51
+        ;; (A-50) < 2
+        %D = or i1 %B, %C
+        ret i1 %D
+; CHECK: @test19
+; CHECK: add i32
+; CHECK: icmp ult 
+; CHECK: ret i1
+}
+
+define i32 @test20(i32 %x) {
+        %y = and i32 %x, 123
+        %z = or i32 %y, %x
+        ret i32 %z
+; CHECK: @test20
+; CHECK: ret i32 %x
+}
+
+define i32 @test21(i32 %tmp.1) {
+        %tmp.1.mask1 = add i32 %tmp.1, 2
+        %tmp.3 = and i32 %tmp.1.mask1, -2
+        %tmp.5 = and i32 %tmp.1, 1
+        ;; add tmp.1, 2
+        %tmp.6 = or i32 %tmp.5, %tmp.3
+        ret i32 %tmp.6
+; CHECK: @test21
+; CHECK:   add i32 %{{[^,]*}}, 2
+; CHECK:   ret i32 
+}
+
+define i32 @test22(i32 %B) {
+        %ELIM41 = and i32 %B, 1
+        %ELIM7 = and i32 %B, -2
+        %ELIM5 = or i32 %ELIM41, %ELIM7
+        ret i32 %ELIM5
+; CHECK: @test22
+; CHECK: ret i32 %B
+}
+
+define i16 @test23(i16 %A) {
+        %B = lshr i16 %A, 1
+        ;; fold or into xor
+        %C = or i16 %B, -32768
+        %D = xor i16 %C, 8193
+        ret i16 %D
+; CHECK: @test23
+; CHECK:   %B = lshr i16 %A, 1
+; CHECK:   %D = xor i16 %B, -24575
+; CHECK:   ret i16 %D
+}
+
+; PR1738
+define i1 @test24(double %X, double %Y) {
+        %tmp9 = fcmp uno double %X, 0.000000e+00                ; <i1> [#uses=1]
+        %tmp13 = fcmp uno double %Y, 0.000000e+00               ; <i1> [#uses=1]
+        %bothcond = or i1 %tmp13, %tmp9         ; <i1> [#uses=1]
+        ret i1 %bothcond
+        
+; CHECK: @test24
+; CHECK:    = fcmp uno double %Y, %X
+; CHECK:   ret i1 
+}
+
+; PR3266 & PR5276
+define i1 @test25(i32 %A, i32 %B) {
+  %C = icmp eq i32 %A, 0
+  %D = icmp eq i32 %B, 57
+  %E = or i1 %C, %D
+  %F = xor i1 %E, -1
+  ret i1 %F
+
+; CHECK: @test25
+; CHECK: icmp ne i32 %A, 0
+; CHECK-NEXT: icmp ne i32 %B, 57
+; CHECK-NEXT:  %F = and i1 
+; CHECK-NEXT:  ret i1 %F
+}
+
+; PR5634
+define i1 @test26(i32 %A, i32 %B) {
+        %C1 = icmp eq i32 %A, 0
+        %C2 = icmp eq i32 %B, 0
+        ; (A == 0) & (A == 0)   -->   (A|B) == 0
+        %D = and i1 %C1, %C2
+        ret i1 %D
+; CHECK: @test26
+; CHECK: or i32 %A, %B
+; CHECK: icmp eq i32 {{.*}}, 0
+; CHECK: ret i1 
+}
+
+define i1 @test27(i32* %A, i32* %B) {
+  %C1 = ptrtoint i32* %A to i32
+  %C2 = ptrtoint i32* %B to i32
+  %D = or i32 %C1, %C2
+  %E = icmp eq i32 %D, 0
+  ret i1 %E
+; CHECK: @test27
+; CHECK: icmp eq i32* %A, null
+; CHECK: icmp eq i32* %B, null
+; CHECK: and i1
+; CHECK: ret i1
+}
+
+; PR5634
+define i1 @test28(i32 %A, i32 %B) {
+        %C1 = icmp ne i32 %A, 0
+        %C2 = icmp ne i32 %B, 0
+        ; (A != 0) | (A != 0)   -->   (A|B) != 0
+        %D = or i1 %C1, %C2
+        ret i1 %D
+; CHECK: @test28
+; CHECK: or i32 %A, %B
+; CHECK: icmp ne i32 {{.*}}, 0
+; CHECK: ret i1 
+}
+
+define i1 @test29(i32* %A, i32* %B) {
+  %C1 = ptrtoint i32* %A to i32
+  %C2 = ptrtoint i32* %B to i32
+  %D = or i32 %C1, %C2
+  %E = icmp ne i32 %D, 0
+  ret i1 %E
+; CHECK: @test29
+; CHECK: icmp ne i32* %A, null
+; CHECK: icmp ne i32* %B, null
+; CHECK: or i1
+; CHECK: ret i1
+}
+
+; PR4216
+define i32 @test30(i32 %A) {
+entry:
+  %B = or i32 %A, 32962
+  %C = and i32 %A, -65536
+  %D = and i32 %B, 40186
+  %E = or i32 %D, %C
+  ret i32 %E
+; CHECK: @test30
+; CHECK: %D = and i32 %A, -58312
+; CHECK: %E = or i32 %D, 32962
+; CHECK: ret i32 %E
+}
+
+; PR4216
+define i64 @test31(i64 %A) nounwind readnone ssp noredzone {
+  %B = or i64 %A, 194
+  %D = and i64 %B, 250
+
+  %C = or i64 %A, 32768
+  %E = and i64 %C, 4294941696
+
+  %F = or i64 %D, %E
+  ret i64 %F
+; CHECK: @test31
+; CHECK-NEXT: %E1 = and i64 %A, 4294908984
+; CHECK-NEXT: %F = or i64 %E1, 32962
+; CHECK-NEXT: ret i64 %F
+}
+
+define <4 x i32> @test32(<4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32> %vecinit6.i191) {
+  %and.i135 = sext <4 x i1> %and.i1352 to <4 x i32> ; <<4 x i32>> [#uses=2]
+  %and.i129 = and <4 x i32> %vecinit6.i176, %and.i135 ; <<4 x i32>> [#uses=1]
+  %neg.i = xor <4 x i32> %and.i135, <i32 -1, i32 -1, i32 -1, i32 -1> ; <<4 x i32>> [#uses=1]
+  %and.i = and <4 x i32> %vecinit6.i191, %neg.i   ; <<4 x i32>> [#uses=1]
+  %or.i = or <4 x i32> %and.i, %and.i129          ; <<4 x i32>> [#uses=1]
+  ret <4 x i32> %or.i
+; Don't turn this into a vector select until codegen matures to handle them
+; better.
+; CHECK: @test32
+; CHECK: or <4 x i32> %and.i, %and.i129
+}
+
+define i1 @test33(i1 %X, i1 %Y) {
+  %a = or i1 %X, %Y
+  %b = or i1 %a, %X
+  ret i1 %b
+; CHECK: @test33
+; CHECK-NEXT: or i1 %X, %Y
+; CHECK-NEXT: ret
+}
+
+define i32 @test34(i32 %X, i32 %Y) {
+  %a = or i32 %X, %Y
+  %b = or i32 %Y, %a
+  ret i32 %b
+; CHECK: @test34
+; CHECK-NEXT: or i32 %X, %Y
+; CHECK-NEXT: ret
+}
+
+define i32 @test35(i32 %a, i32 %b) {
+  %1 = or i32 %a, 1135
+  %2 = or i32 %1, %b
+  ret i32 %2
+  ; CHECK: @test35
+  ; CHECK-NEXT: or i32 %a, %b
+  ; CHECK-NEXT: or i32 %1, 1135
+}
+
+define i1 @test36(i32 %x) {
+  %cmp1 = icmp eq i32 %x, 23
+  %cmp2 = icmp eq i32 %x, 24
+  %ret1 = or i1 %cmp1, %cmp2
+  %cmp3 = icmp eq i32 %x, 25
+  %ret2 = or i1 %ret1, %cmp3
+  ret i1 %ret2
+; CHECK: @test36
+; CHECK-NEXT: %x.off = add i32 %x, -23
+; CHECK-NEXT: icmp ult i32 %x.off, 3
+; CHECK-NEXT: ret i1
+}
+
diff --git a/final/test/Transforms/InstCombine/overflow.ll b/final/test/Transforms/InstCombine/overflow.ll
new file mode 100644
index 00000000000..9123283988d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/overflow.ll
@@ -0,0 +1,133 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+; <rdar://problem/8558713>
+
+declare void @throwAnExceptionOrWhatever()
+
+; CHECK: @test1
+define i32 @test1(i32 %a, i32 %b) nounwind ssp {
+entry:
+; CHECK-NOT: sext
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %add = add nsw i64 %conv2, %conv
+  %add.off = add i64 %add, 2147483648
+; CHECK: llvm.sadd.with.overflow.i32
+  %0 = icmp ugt i64 %add.off, 4294967295
+  br i1 %0, label %if.then, label %if.end
+
+if.then:
+  tail call void @throwAnExceptionOrWhatever() nounwind
+  br label %if.end
+
+if.end:
+; CHECK-NOT: trunc
+  %conv9 = trunc i64 %add to i32
+; CHECK: ret i32
+  ret i32 %conv9
+}
+
+; CHECK: @test2
+; This form should not be promoted for two reasons: 1) it is unprofitable to
+; promote it since the add.off instruction has another use, and 2) it is unsafe
+; because the add-with-off makes the high bits of the original add live.
+define i32 @test2(i32 %a, i32 %b, i64* %P) nounwind ssp {
+entry:
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %add = add nsw i64 %conv2, %conv
+  %add.off = add i64 %add, 2147483648
+  
+  store i64 %add.off, i64* %P
+  
+; CHECK-NOT: llvm.sadd.with.overflow
+  %0 = icmp ugt i64 %add.off, 4294967295
+  br i1 %0, label %if.then, label %if.end
+
+if.then:
+  tail call void @throwAnExceptionOrWhatever() nounwind
+  br label %if.end
+
+if.end:
+  %conv9 = trunc i64 %add to i32
+; CHECK: ret i32
+  ret i32 %conv9
+}
+
+; CHECK: test3
+; PR8816
+; This is illegal to transform because the high bits of the original add are
+; live out.
+define i64 @test3(i32 %a, i32 %b) nounwind ssp {
+entry:
+  %conv = sext i32 %a to i64
+  %conv2 = sext i32 %b to i64
+  %add = add nsw i64 %conv2, %conv
+  %add.off = add i64 %add, 2147483648
+; CHECK-NOT: llvm.sadd.with.overflow
+  %0 = icmp ugt i64 %add.off, 4294967295
+  br i1 %0, label %if.then, label %if.end
+
+if.then:
+  tail call void @throwAnExceptionOrWhatever() nounwind
+  br label %if.end
+
+if.end:
+  ret i64 %add
+; CHECK: ret i64
+}
+
+; CHECK: @test4
+; Should be able to form an i8 sadd computed in an i32.
+define zeroext i8 @test4(i8 signext %a, i8 signext %b) nounwind ssp {
+entry:
+  %conv = sext i8 %a to i32
+  %conv2 = sext i8 %b to i32
+  %add = add nsw i32 %conv2, %conv
+  %add4 = add nsw i32 %add, 128
+  %cmp = icmp ugt i32 %add4, 255
+  br i1 %cmp, label %if.then, label %if.end
+; CHECK: llvm.sadd.with.overflow.i8
+if.then:                                          ; preds = %entry
+  tail call void @throwAnExceptionOrWhatever() nounwind
+  unreachable
+
+if.end:                                           ; preds = %entry
+  %conv7 = trunc i32 %add to i8
+  ret i8 %conv7
+; CHECK: ret i8
+}
+
+; CHECK: @test5
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test5(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %a
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+; CHECK: @test6
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test6(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ult i64 %add, %b
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+; CHECK: @test7
+; CHECK: llvm.uadd.with.overflow
+; CHECK: ret i64
+define i64 @test7(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %add = add i64 %b, %a
+  %cmp = icmp ugt i64 %b, %add
+  %Q = select i1 %cmp, i64 %b, i64 42
+  ret i64 %Q
+}
+
+
diff --git a/final/test/Transforms/InstCombine/phi-merge-gep.ll b/final/test/Transforms/InstCombine/phi-merge-gep.ll
new file mode 100644
index 00000000000..26717497094
--- /dev/null
+++ b/final/test/Transforms/InstCombine/phi-merge-gep.ll
@@ -0,0 +1,102 @@
+; RUN: opt < %s -S -instcombine > %t
+; RUN: grep {= getelementptr} %t | count 20
+; RUN: grep {= phi} %t | count 13
+
+; Don't push the geps through these phis, because they would require
+; two phis each, which burdens the loop with high register pressure.
+
+define void @foo(float* %Ar, float* %Ai, i64 %As, float* %Cr, float* %Ci, i64 %Cs, i64 %n) nounwind {
+entry:
+  %0 = getelementptr inbounds float* %Ar, i64 0   ; <float*> [#uses=1]
+  %1 = getelementptr inbounds float* %Ai, i64 0   ; <float*> [#uses=1]
+  %2 = mul i64 %n, %As                            ; <i64> [#uses=1]
+  %3 = getelementptr inbounds float* %Ar, i64 %2  ; <float*> [#uses=1]
+  %4 = mul i64 %n, %As                            ; <i64> [#uses=1]
+  %5 = getelementptr inbounds float* %Ai, i64 %4  ; <float*> [#uses=1]
+  %6 = mul i64 %n, 2                              ; <i64> [#uses=1]
+  %7 = mul i64 %6, %As                            ; <i64> [#uses=1]
+  %8 = getelementptr inbounds float* %Ar, i64 %7  ; <float*> [#uses=1]
+  %9 = mul i64 %n, 2                              ; <i64> [#uses=1]
+  %10 = mul i64 %9, %As                           ; <i64> [#uses=1]
+  %11 = getelementptr inbounds float* %Ai, i64 %10 ; <float*> [#uses=1]
+  %12 = getelementptr inbounds float* %Cr, i64 0  ; <float*> [#uses=1]
+  %13 = getelementptr inbounds float* %Ci, i64 0  ; <float*> [#uses=1]
+  %14 = mul i64 %n, %Cs                           ; <i64> [#uses=1]
+  %15 = getelementptr inbounds float* %Cr, i64 %14 ; <float*> [#uses=1]
+  %16 = mul i64 %n, %Cs                           ; <i64> [#uses=1]
+  %17 = getelementptr inbounds float* %Ci, i64 %16 ; <float*> [#uses=1]
+  %18 = mul i64 %n, 2                             ; <i64> [#uses=1]
+  %19 = mul i64 %18, %Cs                          ; <i64> [#uses=1]
+  %20 = getelementptr inbounds float* %Cr, i64 %19 ; <float*> [#uses=1]
+  %21 = mul i64 %n, 2                             ; <i64> [#uses=1]
+  %22 = mul i64 %21, %Cs                          ; <i64> [#uses=1]
+  %23 = getelementptr inbounds float* %Ci, i64 %22 ; <float*> [#uses=1]
+  br label %bb13
+
+bb:                                               ; preds = %bb13
+  %24 = load float* %A0r.0, align 4               ; <float> [#uses=1]
+  %25 = load float* %A0i.0, align 4               ; <float> [#uses=1]
+  %26 = load float* %A1r.0, align 4               ; <float> [#uses=2]
+  %27 = load float* %A1i.0, align 4               ; <float> [#uses=2]
+  %28 = load float* %A2r.0, align 4               ; <float> [#uses=2]
+  %29 = load float* %A2i.0, align 4               ; <float> [#uses=2]
+  %30 = fadd float %26, %28                       ; <float> [#uses=2]
+  %31 = fadd float %27, %29                       ; <float> [#uses=2]
+  %32 = fsub float %26, %28                       ; <float> [#uses=1]
+  %33 = fsub float %27, %29                       ; <float> [#uses=1]
+  %34 = fadd float %24, %30                       ; <float> [#uses=2]
+  %35 = fadd float %25, %31                       ; <float> [#uses=2]
+  %36 = fmul float %30, -1.500000e+00             ; <float> [#uses=1]
+  %37 = fmul float %31, -1.500000e+00             ; <float> [#uses=1]
+  %38 = fadd float %34, %36                       ; <float> [#uses=2]
+  %39 = fadd float %35, %37                       ; <float> [#uses=2]
+  %40 = fmul float %32, 0x3FEBB67AE0000000        ; <float> [#uses=2]
+  %41 = fmul float %33, 0x3FEBB67AE0000000        ; <float> [#uses=2]
+  %42 = fadd float %38, %41                       ; <float> [#uses=1]
+  %43 = fsub float %39, %40                       ; <float> [#uses=1]
+  %44 = fsub float %38, %41                       ; <float> [#uses=1]
+  %45 = fadd float %39, %40                       ; <float> [#uses=1]
+  store float %34, float* %C0r.0, align 4
+  store float %35, float* %C0i.0, align 4
+  store float %42, float* %C1r.0, align 4
+  store float %43, float* %C1i.0, align 4
+  store float %44, float* %C2r.0, align 4
+  store float %45, float* %C2i.0, align 4
+  %46 = getelementptr inbounds float* %A0r.0, i64 %As ; <float*> [#uses=1]
+  %47 = getelementptr inbounds float* %A0i.0, i64 %As ; <float*> [#uses=1]
+  %48 = getelementptr inbounds float* %A1r.0, i64 %As ; <float*> [#uses=1]
+  %49 = getelementptr inbounds float* %A1i.0, i64 %As ; <float*> [#uses=1]
+  %50 = getelementptr inbounds float* %A2r.0, i64 %As ; <float*> [#uses=1]
+  %51 = getelementptr inbounds float* %A2i.0, i64 %As ; <float*> [#uses=1]
+  %52 = getelementptr inbounds float* %C0r.0, i64 %Cs ; <float*> [#uses=1]
+  %53 = getelementptr inbounds float* %C0i.0, i64 %Cs ; <float*> [#uses=1]
+  %54 = getelementptr inbounds float* %C1r.0, i64 %Cs ; <float*> [#uses=1]
+  %55 = getelementptr inbounds float* %C1i.0, i64 %Cs ; <float*> [#uses=1]
+  %56 = getelementptr inbounds float* %C2r.0, i64 %Cs ; <float*> [#uses=1]
+  %57 = getelementptr inbounds float* %C2i.0, i64 %Cs ; <float*> [#uses=1]
+  %58 = add nsw i64 %i.0, 1                       ; <i64> [#uses=1]
+  br label %bb13
+
+bb13:                                             ; preds = %bb, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %58, %bb ]      ; <i64> [#uses=2]
+  %C2i.0 = phi float* [ %23, %entry ], [ %57, %bb ] ; <float*> [#uses=2]
+  %C2r.0 = phi float* [ %20, %entry ], [ %56, %bb ] ; <float*> [#uses=2]
+  %C1i.0 = phi float* [ %17, %entry ], [ %55, %bb ] ; <float*> [#uses=2]
+  %C1r.0 = phi float* [ %15, %entry ], [ %54, %bb ] ; <float*> [#uses=2]
+  %C0i.0 = phi float* [ %13, %entry ], [ %53, %bb ] ; <float*> [#uses=2]
+  %C0r.0 = phi float* [ %12, %entry ], [ %52, %bb ] ; <float*> [#uses=2]
+  %A2i.0 = phi float* [ %11, %entry ], [ %51, %bb ] ; <float*> [#uses=2]
+  %A2r.0 = phi float* [ %8, %entry ], [ %50, %bb ] ; <float*> [#uses=2]
+  %A1i.0 = phi float* [ %5, %entry ], [ %49, %bb ] ; <float*> [#uses=2]
+  %A1r.0 = phi float* [ %3, %entry ], [ %48, %bb ] ; <float*> [#uses=2]
+  %A0i.0 = phi float* [ %1, %entry ], [ %47, %bb ] ; <float*> [#uses=2]
+  %A0r.0 = phi float* [ %0, %entry ], [ %46, %bb ] ; <float*> [#uses=2]
+  %59 = icmp slt i64 %i.0, %n                     ; <i1> [#uses=1]
+  br i1 %59, label %bb, label %bb14
+
+bb14:                                             ; preds = %bb13
+  br label %return
+
+return:                                           ; preds = %bb14
+  ret void
+}
diff --git a/final/test/Transforms/InstCombine/phi.ll b/final/test/Transforms/InstCombine/phi.ll
new file mode 100644
index 00000000000..62c6a63a7e5
--- /dev/null
+++ b/final/test/Transforms/InstCombine/phi.ll
@@ -0,0 +1,546 @@
+; This test makes sure that these instructions are properly eliminated.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128:n8:16:32:64"
+
+define i32 @test1(i32 %A, i1 %b) {
+BB0:
+        br i1 %b, label %BB1, label %BB2
+
+BB1:
+        ; Combine away one argument PHI nodes
+        %B = phi i32 [ %A, %BB0 ]               
+        ret i32 %B
+
+BB2:
+        ret i32 %A
+; CHECK: @test1
+; CHECK: BB1:
+; CHECK-NEXT: ret i32 %A
+}
+
+define i32 @test2(i32 %A, i1 %b) {
+BB0:
+        br i1 %b, label %BB1, label %BB2
+
+BB1:
+        br label %BB2
+
+BB2:
+        ; Combine away PHI nodes with same values
+        %B = phi i32 [ %A, %BB0 ], [ %A, %BB1 ]         
+        ret i32 %B
+; CHECK: @test2
+; CHECK: BB2:
+; CHECK-NEXT: ret i32 %A
+}
+
+define i32 @test3(i32 %A, i1 %b) {
+BB0:
+        br label %Loop
+
+Loop:
+        ; PHI has same value always.
+        %B = phi i32 [ %A, %BB0 ], [ %B, %Loop ]
+        br i1 %b, label %Loop, label %Exit
+
+Exit:
+        ret i32 %B
+; CHECK: @test3
+; CHECK: Exit:
+; CHECK-NEXT: ret i32 %A
+}
+
+define i32 @test4(i1 %b) {
+BB0:
+        ; Loop is unreachable
+        ret i32 7
+
+Loop:           ; preds = %L2, %Loop
+        ; PHI has same value always.
+        %B = phi i32 [ %B, %L2 ], [ %B, %Loop ]         
+        br i1 %b, label %L2, label %Loop
+
+L2:             ; preds = %Loop
+        br label %Loop
+; CHECK: @test4
+; CHECK: Loop:
+; CHECK-NEXT: br i1 %b
+}
+
+define i32 @test5(i32 %A, i1 %b) {
+BB0:
+        br label %Loop
+
+Loop:           ; preds = %Loop, %BB0
+        ; PHI has same value always.
+        %B = phi i32 [ %A, %BB0 ], [ undef, %Loop ]             
+        br i1 %b, label %Loop, label %Exit
+
+Exit:           ; preds = %Loop
+        ret i32 %B
+; CHECK: @test5
+; CHECK: Loop:
+; CHECK-NEXT: br i1 %b
+; CHECK: Exit:
+; CHECK-NEXT: ret i32 %A
+}
+
+define i32 @test6(i16 %A, i1 %b) {
+BB0:
+        %X = zext i16 %A to i32              
+        br i1 %b, label %BB1, label %BB2
+
+BB1:           
+        %Y = zext i16 %A to i32              
+        br label %BB2
+
+BB2:           
+        ;; Suck casts into phi
+        %B = phi i32 [ %X, %BB0 ], [ %Y, %BB1 ]         
+        ret i32 %B
+; CHECK: @test6
+; CHECK: BB2:
+; CHECK: zext i16 %A to i32
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test7(i32 %A, i1 %b) {
+BB0:
+        br label %Loop
+
+Loop:           ; preds = %Loop, %BB0
+        ; PHI is dead.
+        %B = phi i32 [ %A, %BB0 ], [ %C, %Loop ]                
+        %C = add i32 %B, 123            
+        br i1 %b, label %Loop, label %Exit
+
+Exit:           ; preds = %Loop
+        ret i32 0
+; CHECK: @test7
+; CHECK: Loop:
+; CHECK-NEXT: br i1 %b
+}
+
+define i32* @test8({ i32, i32 } *%A, i1 %b) {
+BB0:
+        %X = getelementptr inbounds { i32, i32 } *%A, i32 0, i32 1
+        br i1 %b, label %BB1, label %BB2
+
+BB1:
+        %Y = getelementptr { i32, i32 } *%A, i32 0, i32 1
+        br label %BB2
+
+BB2:
+        ;; Suck GEPs into phi
+        %B = phi i32* [ %X, %BB0 ], [ %Y, %BB1 ]
+        ret i32* %B
+; CHECK: @test8
+; CHECK-NOT: phi
+; CHECK: BB2:
+; CHECK-NEXT: %B = getelementptr %0 
+; CHECK-NEXT: ret i32* %B
+}
+
+define i32 @test9(i32* %A, i32* %B) {
+entry:
+  %c = icmp eq i32* %A, null
+  br i1 %c, label %bb1, label %bb
+
+bb:
+  %C = load i32* %B, align 1
+  br label %bb2
+
+bb1:
+  %D = load i32* %A, align 1
+  br label %bb2
+
+bb2:
+  %E = phi i32 [ %C, %bb ], [ %D, %bb1 ]
+  ret i32 %E
+; CHECK: @test9
+; CHECK:       bb2:
+; CHECK-NEXT:        phi i32* [ %B, %bb ], [ %A, %bb1 ]
+; CHECK-NEXT:   %E = load i32* %{{[^,]*}}, align 1
+; CHECK-NEXT:   ret i32 %E
+
+}
+
+define i32 @test10(i32* %A, i32* %B) {
+entry:
+  %c = icmp eq i32* %A, null
+  br i1 %c, label %bb1, label %bb
+
+bb:
+  %C = load i32* %B, align 16
+  br label %bb2
+
+bb1:
+  %D = load i32* %A, align 32
+  br label %bb2
+
+bb2:
+  %E = phi i32 [ %C, %bb ], [ %D, %bb1 ]
+  ret i32 %E
+; CHECK: @test10
+; CHECK:       bb2:
+; CHECK-NEXT:        phi i32* [ %B, %bb ], [ %A, %bb1 ]
+; CHECK-NEXT:   %E = load i32* %{{[^,]*}}, align 16
+; CHECK-NEXT:   ret i32 %E
+}
+
+
+; PR1777
+declare i1 @test11a()
+
+define i1 @test11() {
+entry:
+  %a = alloca i32
+  %i = ptrtoint i32* %a to i32
+  %b = call i1 @test11a()
+  br i1 %b, label %one, label %two
+
+one:
+  %x = phi i32 [%i, %entry], [%y, %two]
+  %c = call i1 @test11a()
+  br i1 %c, label %two, label %end
+
+two:
+  %y = phi i32 [%i, %entry], [%x, %one]
+  %d = call i1 @test11a()
+  br i1 %d, label %one, label %end
+
+end:
+  %f = phi i32 [ %x, %one], [%y, %two]
+  ; Change the %f to %i, and the optimizer suddenly becomes a lot smarter
+  ; even though %f must equal %i at this point
+  %g = inttoptr i32 %f to i32*
+  store i32 10, i32* %g
+  %z = call i1 @test11a()
+  ret i1 %z
+; CHECK: @test11
+; CHECK-NOT: phi i32
+; CHECK: ret i1 %z
+}
+
+
+define i64 @test12(i1 %cond, i8* %Ptr, i64 %Val) {
+entry:
+  %tmp41 = ptrtoint i8* %Ptr to i64
+  %tmp42 = zext i64 %tmp41 to i128
+  br i1 %cond, label %end, label %two
+
+two:
+  %tmp36 = zext i64 %Val to i128            ; <i128> [#uses=1]
+  %tmp37 = shl i128 %tmp36, 64                    ; <i128> [#uses=1]
+  %ins39 = or i128 %tmp42, %tmp37                 ; <i128> [#uses=1]
+  br label %end
+
+end:
+  %tmp869.0 = phi i128 [ %tmp42, %entry ], [ %ins39, %two ]
+  %tmp32 = trunc i128 %tmp869.0 to i64            ; <i64> [#uses=1]
+  %tmp29 = lshr i128 %tmp869.0, 64                ; <i128> [#uses=1]
+  %tmp30 = trunc i128 %tmp29 to i64               ; <i64> [#uses=1]
+
+  %tmp2 = add i64 %tmp32, %tmp30
+  ret i64 %tmp2
+; CHECK: @test12
+; CHECK-NOT: zext
+; CHECK: end:
+; CHECK-NEXT: phi i64 [ 0, %entry ], [ %Val, %two ]
+; CHECK-NOT: phi
+; CHECK: ret i64
+}
+
+declare void @test13f(double, i32)
+
+define void @test13(i1 %cond, i32 %V1, double %Vald) {
+entry:
+  %tmp42 = zext i32 %V1 to i128
+  br i1 %cond, label %end, label %two
+
+two:
+  %Val = bitcast double %Vald to i64
+  %tmp36 = zext i64 %Val to i128            ; <i128> [#uses=1]
+  %tmp37 = shl i128 %tmp36, 64                    ; <i128> [#uses=1]
+  %ins39 = or i128 %tmp42, %tmp37                 ; <i128> [#uses=1]
+  br label %end
+
+end:
+  %tmp869.0 = phi i128 [ %tmp42, %entry ], [ %ins39, %two ]
+  %tmp32 = trunc i128 %tmp869.0 to i32
+  %tmp29 = lshr i128 %tmp869.0, 64                ; <i128> [#uses=1]
+  %tmp30 = trunc i128 %tmp29 to i64               ; <i64> [#uses=1]
+  %tmp31 = bitcast i64 %tmp30 to double
+  
+  call void @test13f(double %tmp31, i32 %tmp32)
+  ret void
+; CHECK: @test13
+; CHECK-NOT: zext
+; CHECK: end:
+; CHECK-NEXT: phi double [ 0.000000e+00, %entry ], [ %Vald, %two ]
+; CHECK-NEXT: call void @test13f(double {{[^,]*}}, i32 %V1)
+; CHECK: ret void
+}
+
+define i640 @test14a(i320 %A, i320 %B, i1 %b1) {
+BB0:
+        %a = zext i320 %A to i640
+        %b = zext i320 %B to i640
+        br label %Loop
+
+Loop:
+        %C = phi i640 [ %a, %BB0 ], [ %b, %Loop ]             
+        br i1 %b1, label %Loop, label %Exit
+
+Exit:           ; preds = %Loop
+        ret i640 %C
+; CHECK: @test14a
+; CHECK: Loop:
+; CHECK-NEXT: phi i320
+}
+
+define i160 @test14b(i320 %A, i320 %B, i1 %b1) {
+BB0:
+        %a = trunc i320 %A to i160
+        %b = trunc i320 %B to i160
+        br label %Loop
+
+Loop:
+        %C = phi i160 [ %a, %BB0 ], [ %b, %Loop ]             
+        br i1 %b1, label %Loop, label %Exit
+
+Exit:           ; preds = %Loop
+        ret i160 %C
+; CHECK: @test14b
+; CHECK: Loop:
+; CHECK-NEXT: phi i160
+}
+
+declare i64 @test15a(i64)
+
+define i64 @test15b(i64 %A, i1 %b) {
+; CHECK: @test15b
+entry:
+  %i0 = zext i64 %A to i128
+  %i1 = shl i128 %i0, 64
+  %i = or i128 %i1, %i0
+  br i1 %b, label %one, label %two
+; CHECK: entry:
+; CHECK-NEXT: br i1 %b
+
+one:
+  %x = phi i128 [%i, %entry], [%y, %two]
+  %x1 = lshr i128 %x, 64
+  %x2 = trunc i128 %x1 to i64
+  %c = call i64 @test15a(i64 %x2)
+  %c1 = zext i64 %c to i128
+  br label %two
+
+; CHECK: one:
+; CHECK-NEXT: phi i64
+; CHECK-NEXT: %c = call i64 @test15a
+
+two:
+  %y = phi i128 [%i, %entry], [%c1, %one]
+  %y1 = lshr i128 %y, 64
+  %y2 = trunc i128 %y1 to i64
+  %d = call i64 @test15a(i64 %y2)
+  %d1 = trunc i64 %d to i1
+  br i1 %d1, label %one, label %end
+
+; CHECK: two:
+; CHECK-NEXT: phi i64
+; CHECK-NEXT: phi i64
+; CHECK-NEXT: %d = call i64 @test15a
+
+end:
+  %g = trunc i128 %y to i64
+  ret i64 %g
+; CHECK: end: 
+; CHECK-NEXT: ret i64
+}
+
+; PR6512 - Shouldn't merge loads from different addr spaces.
+define i32 @test16(i32 addrspace(1)* %pointer1, i32 %flag, i32* %pointer2)
+nounwind {
+entry:
+  %retval = alloca i32, align 4                   ; <i32*> [#uses=2]
+  %pointer1.addr = alloca i32 addrspace(1)*, align 4 ; <i32 addrspace(1)**>
+  %flag.addr = alloca i32, align 4                ; <i32*> [#uses=2]
+  %pointer2.addr = alloca i32*, align 4           ; <i32**> [#uses=2]
+  %res = alloca i32, align 4                      ; <i32*> [#uses=4]
+  store i32 addrspace(1)* %pointer1, i32 addrspace(1)** %pointer1.addr
+  store i32 %flag, i32* %flag.addr
+  store i32* %pointer2, i32** %pointer2.addr
+  store i32 10, i32* %res
+  %tmp = load i32* %flag.addr                     ; <i32> [#uses=1]
+  %tobool = icmp ne i32 %tmp, 0                   ; <i1> [#uses=1]
+  br i1 %tobool, label %if.then, label %if.else
+
+return:                                           ; preds = %if.end
+  %tmp7 = load i32* %retval                       ; <i32> [#uses=1]
+  ret i32 %tmp7
+
+if.end:                                           ; preds = %if.else, %if.then
+  %tmp6 = load i32* %res                          ; <i32> [#uses=1]
+  store i32 %tmp6, i32* %retval
+  br label %return
+
+if.then:                                          ; preds = %entry
+  %tmp1 = load i32 addrspace(1)** %pointer1.addr  ; <i32 addrspace(1)*>
+  %arrayidx = getelementptr i32 addrspace(1)* %tmp1, i32 0 ; <i32 addrspace(1)*> [#uses=1]
+  %tmp2 = load i32 addrspace(1)* %arrayidx        ; <i32> [#uses=1]
+  store i32 %tmp2, i32* %res
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  %tmp3 = load i32** %pointer2.addr               ; <i32*> [#uses=1]
+  %arrayidx4 = getelementptr i32* %tmp3, i32 0    ; <i32*> [#uses=1]
+  %tmp5 = load i32* %arrayidx4                    ; <i32> [#uses=1]
+  store i32 %tmp5, i32* %res
+  br label %if.end
+}
+
+; PR4413
+declare i32 @ext()
+; CHECK: @test17
+define i32 @test17(i1 %a) {
+entry:
+    br i1 %a, label %bb1, label %bb2
+
+bb1:        ; preds = %entry
+    %0 = tail call i32 @ext()        ; <i32> [#uses=1]
+    br label %bb2
+
+bb2:        ; preds = %bb1, %entry
+    %cond = phi i1 [ true, %bb1 ], [ false, %entry ]        ; <i1> [#uses=1]
+; CHECK-NOT: %val = phi i32 [ %0, %bb1 ], [ 0, %entry ]
+    %val = phi i32 [ %0, %bb1 ], [ 0, %entry ]        ; <i32> [#uses=1]
+    %res = select i1 %cond, i32 %val, i32 0        ; <i32> [#uses=1]
+; CHECK: ret i32 %cond
+    ret i32 %res
+}
+
+define i1 @test18(i1 %cond) {
+  %zero = alloca i32
+  %one = alloca i32
+  br i1 %cond, label %true, label %false
+true:
+  br label %ret
+false:
+  br label %ret
+ret:
+  %ptr = phi i32* [ %zero, %true ] , [ %one, %false ]
+  %isnull = icmp eq i32* %ptr, null
+  ret i1 %isnull
+; CHECK: @test18
+; CHECK: ret i1 false
+}
+
+define i1 @test19(i1 %cond, double %x) {
+  br i1 %cond, label %true, label %false
+true:
+  br label %ret
+false:
+  br label %ret
+ret:
+  %p = phi double [ %x, %true ], [ 0x7FF0000000000000, %false ]; RHS = +infty
+  %cmp = fcmp ule double %x, %p
+  ret i1 %cmp
+; CHECK: @test19
+; CHECK: ret i1 true
+}
+
+define i1 @test20(i1 %cond) {
+  %a = alloca i32
+  %b = alloca i32
+  %c = alloca i32
+  br i1 %cond, label %true, label %false
+true:
+  br label %ret
+false:
+  br label %ret
+ret:
+  %p = phi i32* [ %a, %true ], [ %b, %false ]
+  %r = icmp eq i32* %p, %c
+  ret i1 %r
+; CHECK: @test20
+; CHECK: ret i1 false
+}
+
+define i1 @test21(i1 %c1, i1 %c2) {
+  %a = alloca i32
+  %b = alloca i32
+  %c = alloca i32
+  br i1 %c1, label %true, label %false
+true:
+  br label %loop
+false:
+  br label %loop
+loop:
+  %p = phi i32* [ %a, %true ], [ %b, %false ], [ %p, %loop ]
+  %r = icmp eq i32* %p, %c
+  br i1 %c2, label %ret, label %loop
+ret:
+  ret i1 %r
+; CHECK: @test21
+; CHECK: ret i1 false
+}
+
+define void @test22() {
+; CHECK: @test22
+entry:
+  br label %loop
+loop:
+  %phi = phi i32 [ 0, %entry ], [ %y, %loop ]
+  %y = add i32 %phi, 1
+  %o = or i32 %y, %phi
+  %e = icmp eq i32 %o, %y
+  br i1 %e, label %loop, label %ret
+; CHECK: br i1 %e
+ret:
+  ret void
+}
+
+define i32 @test23(i32 %A, i1 %b, i32 * %P) {
+BB0:
+        br label %Loop
+
+Loop:           ; preds = %Loop, %BB0
+        ; PHI has same value always.
+        %B = phi i32 [ %A, %BB0 ], [ 42, %Loop ]
+        %D = add i32 %B, 19
+        store i32 %D, i32* %P
+        br i1 %b, label %Loop, label %Exit
+
+Exit:           ; preds = %Loop
+        %E = add i32 %B, 19
+        ret i32 %E
+; CHECK: @test23
+; CHECK: %phitmp = add i32 %A, 19
+; CHECK: Loop:
+; CHECK-NEXT: %B = phi i32 [ %phitmp, %BB0 ], [ 61, %Loop ]
+; CHECK: Exit:
+; CHECK-NEXT: ret i32 %B
+}
+
+define i32 @test24(i32 %A, i1 %cond) {
+BB0:
+        %X = add nuw nsw i32 %A, 1
+        br i1 %cond, label %BB1, label %BB2
+
+BB1:
+        %Y = add nuw i32 %A, 1
+        br label %BB2
+
+BB2:
+        %C = phi i32 [ %X, %BB0 ], [ %Y, %BB1 ]
+        ret i32 %C
+; CHECK: @test24
+; CHECK-NOT: phi
+; CHECK: BB2:
+; CHECK-NEXT: %C = add nuw i32 %A, 1
+; CHECK-NEXT: ret i32 %C
+}
diff --git a/final/test/Transforms/InstCombine/pr2645-0.ll b/final/test/Transforms/InstCombine/pr2645-0.ll
new file mode 100644
index 00000000000..9bcaa43a80b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/pr2645-0.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -instcombine -S | grep {insertelement <4 x float> undef}
+
+; Instcombine should be able to prove that none of the
+; insertelement's first operand's elements are needed.
+
+define internal void @""(i8*) {
+; <label>:1
+        bitcast i8* %0 to i32*          ; <i32*>:2 [#uses=1]
+        load i32* %2, align 1           ; <i32>:3 [#uses=1]
+        getelementptr i8* %0, i32 4             ; <i8*>:4 [#uses=1]
+        bitcast i8* %4 to i32*          ; <i32*>:5 [#uses=1]
+        load i32* %5, align 1           ; <i32>:6 [#uses=1]
+        br label %7
+
+; <label>:7             ; preds = %9, %1
+        %.01 = phi <4 x float> [ undef, %1 ], [ %12, %9 ]               ; <<4 x float>> [#uses=1]
+        %.0 = phi i32 [ %3, %1 ], [ %15, %9 ]           ; <i32> [#uses=3]
+        icmp slt i32 %.0, %6            ; <i1>:8 [#uses=1]
+        br i1 %8, label %9, label %16
+
+; <label>:9             ; preds = %7
+        sitofp i32 %.0 to float         ; <float>:10 [#uses=1]
+        insertelement <4 x float> %.01, float %10, i32 0                ; <<4 x float>>:11 [#uses=1]
+        shufflevector <4 x float> %11, <4 x float> undef, <4 x i32> zeroinitializer             ; <<4 x float>>:12 [#uses=2]
+        getelementptr i8* %0, i32 48            ; <i8*>:13 [#uses=1]
+        bitcast i8* %13 to <4 x float>*         ; <<4 x float>*>:14 [#uses=1]
+        store <4 x float> %12, <4 x float>* %14, align 16
+        add i32 %.0, 2          ; <i32>:15 [#uses=1]
+        br label %7
+
+; <label>:16            ; preds = %7
+        ret void
+}
diff --git a/final/test/Transforms/InstCombine/pr2645-1.ll b/final/test/Transforms/InstCombine/pr2645-1.ll
new file mode 100644
index 00000000000..d320dafcd25
--- /dev/null
+++ b/final/test/Transforms/InstCombine/pr2645-1.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -instcombine -S | grep shufflevector
+; PR2645
+
+; instcombine shouldn't delete the shufflevector.
+
+define internal void @""(i8*, i32, i8*) {
+; <label>:3
+        br label %4
+
+; <label>:4             ; preds = %6, %3
+        %.0 = phi i32 [ 0, %3 ], [ %19, %6 ]            ; <i32> [#uses=4]
+        %5 = icmp slt i32 %.0, %1               ; <i1> [#uses=1]
+        br i1 %5, label %6, label %20
+
+; <label>:6             ; preds = %4
+        %7 = getelementptr i8* %2, i32 %.0              ; <i8*> [#uses=1]
+        %8 = bitcast i8* %7 to <4 x i16>*               ; <<4 x i16>*> [#uses=1]
+        %9 = load <4 x i16>* %8, align 1                ; <<4 x i16>> [#uses=1]
+        %10 = bitcast <4 x i16> %9 to <1 x i64>         ; <<1 x i64>> [#uses=1]
+        %11 = call <2 x i64> @foo(<1 x i64> %10)
+; <<2 x i64>> [#uses=1]
+        %12 = bitcast <2 x i64> %11 to <4 x i32>                ; <<4 x i32>> [#uses=1]
+        %13 = bitcast <4 x i32> %12 to <8 x i16>                ; <<8 x i16>> [#uses=2]
+        %14 = shufflevector <8 x i16> %13, <8 x i16> %13, <8 x i32> < i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3 >          ; <<8 x i16>> [#uses=1]
+        %15 = bitcast <8 x i16> %14 to <4 x i32>                ; <<4 x i32>> [#uses=1]
+        %16 = sitofp <4 x i32> %15 to <4 x float>               ; <<4 x float>> [#uses=1]
+        %17 = getelementptr i8* %0, i32 %.0             ; <i8*> [#uses=1]
+        %18 = bitcast i8* %17 to <4 x float>*           ; <<4 x float>*> [#uses=1]
+        store <4 x float> %16, <4 x float>* %18, align 1
+        %19 = add i32 %.0, 1            ; <i32> [#uses=1]
+        br label %4
+
+; <label>:20            ; preds = %4
+        call void @llvm.x86.mmx.emms( )
+        ret void
+}
+
+declare <2 x i64> @foo(<1 x i64>)
+declare void @llvm.x86.mmx.emms( )
diff --git a/final/test/Transforms/InstCombine/pr2996.ll b/final/test/Transforms/InstCombine/pr2996.ll
new file mode 100644
index 00000000000..ff3245d8668
--- /dev/null
+++ b/final/test/Transforms/InstCombine/pr2996.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine
+; PR2996
+
+define void @func_53(i16 signext %p_56) nounwind {
+entry:
+	%0 = icmp sgt i16 %p_56, -1		; <i1> [#uses=1]
+	%iftmp.0.0 = select i1 %0, i32 -1, i32 0		; <i32> [#uses=1]
+	%1 = call i32 (...)* @func_4(i32 %iftmp.0.0) nounwind		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @func_4(...)
diff --git a/final/test/Transforms/InstCombine/pr8547.ll b/final/test/Transforms/InstCombine/pr8547.ll
new file mode 100644
index 00000000000..485f4d9644f
--- /dev/null
+++ b/final/test/Transforms/InstCombine/pr8547.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; Converting the 2 shifts to SHL 6 without the AND is wrong.  PR 8547.
+
+@g_2 = global i32 0, align 4
+@.str = constant [10 x i8] c"g_2 = %d\0A\00"
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() nounwind {
+codeRepl:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %codeRepl
+  %storemerge = phi i32 [ 0, %codeRepl ], [ 5, %for.cond ]
+  store i32 %storemerge, i32* @g_2, align 4
+  %shl = shl i32 %storemerge, 30
+  %conv2 = lshr i32 %shl, 24
+; CHECK:  %0 = shl nuw nsw i32 %storemerge, 6
+; CHECK:  %conv2 = and i32 %0, 64
+  %tobool = icmp eq i32 %conv2, 0
+  br i1 %tobool, label %for.cond, label %codeRepl2
+
+codeRepl2:                                        ; preds = %for.cond
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([10 x i8]* @.str, i64 0, i64 0), i32 %conv2) nounwind
+  ret i32 0
+}
\ No newline at end of file
diff --git a/final/test/Transforms/InstCombine/preserve-sminmax.ll b/final/test/Transforms/InstCombine/preserve-sminmax.ll
new file mode 100644
index 00000000000..00232ccf318
--- /dev/null
+++ b/final/test/Transforms/InstCombine/preserve-sminmax.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Instcombine normally would fold the sdiv into the comparison,
+; making "icmp slt i32 %h, 2", but in this case the sdiv has
+; another use, so it wouldn't a big win, and it would also
+; obfuscate an otherise obvious smax pattern to the point where
+; other analyses wouldn't recognize it.
+
+define i32 @foo(i32 %h) {
+  %sd = sdiv i32 %h, 2
+  %t = icmp slt i32 %sd, 1
+  %r = select i1 %t, i32 %sd, i32 1
+  ret i32 %r
+}
+
+; CHECK:  %sd = sdiv i32 %h, 2
+; CHECK:  %t = icmp slt i32 %sd, 1
+; CHECK:  %r = select i1 %t, i32 %sd, i32 1
+; CHECK:  ret i32 %r
+
+define i32 @bar(i32 %h) {
+  %sd = sdiv i32 %h, 2
+  %t = icmp sgt i32 %sd, 1
+  %r = select i1 %t, i32 %sd, i32 1
+  ret i32 %r
+}
+
+; CHECK:  %sd = sdiv i32 %h, 2
+; CHECK:  %t = icmp sgt i32 %sd, 1
+; CHECK:  %r = select i1 %t, i32 %sd, i32 1
+; CHECK:  ret i32 %r
+
diff --git a/final/test/Transforms/InstCombine/ptr-int-cast.ll b/final/test/Transforms/InstCombine/ptr-int-cast.ll
new file mode 100644
index 00000000000..ad11e430d9a
--- /dev/null
+++ b/final/test/Transforms/InstCombine/ptr-int-cast.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+define i1 @test1(i32 *%x) nounwind {
+entry:
+; CHECK: test1
+; CHECK: ptrtoint i32* %x to i64
+	%tmp = ptrtoint i32* %x to i1
+	ret i1 %tmp
+}
+
+define i32* @test2(i128 %x) nounwind {
+entry:
+; CHECK: test2
+; CHECK: inttoptr i64 %tmp1 to i32*
+	%tmp = inttoptr i128 %x to i32*
+	ret i32* %tmp
+}
+
+; PR3574
+; CHECK: f0
+; CHECK: %tmp = zext i32 %a0 to i64
+; CHECK: ret i64 %tmp
+define i64 @f0(i32 %a0) nounwind {
+       %t0 = inttoptr i32 %a0 to i8*
+       %t1 = ptrtoint i8* %t0 to i64
+       ret i64 %t1
+}
+
diff --git a/final/test/Transforms/InstCombine/rem.ll b/final/test/Transforms/InstCombine/rem.ll
new file mode 100644
index 00000000000..b421b7c0e8b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/rem.ll
@@ -0,0 +1,88 @@
+; This test makes sure that these instructions are properly eliminated.
+;
+; RUN: opt < %s -instcombine -S | not grep rem
+; END.
+
+define i32 @test1(i32 %A) {
+	%B = srem i32 %A, 1	; ISA constant 0
+	ret i32 %B
+}
+
+define i32 @test2(i32 %A) {	; 0 % X = 0, we don't need to preserve traps
+	%B = srem i32 0, %A
+	ret i32 %B
+}
+
+define i32 @test3(i32 %A) {
+	%B = urem i32 %A, 8
+	ret i32 %B
+}
+
+define i1 @test3a(i32 %A) {
+	%B = srem i32 %A, -8
+	%C = icmp ne i32 %B, 0
+	ret i1 %C
+}
+
+define i32 @test4(i32 %X, i1 %C) {
+	%V = select i1 %C, i32 1, i32 8
+	%R = urem i32 %X, %V
+	ret i32 %R
+}
+
+define i32 @test5(i32 %X, i8 %B) {
+	%shift.upgrd.1 = zext i8 %B to i32
+	%Amt = shl i32 32, %shift.upgrd.1
+	%V = urem i32 %X, %Amt
+	ret i32 %V
+}
+
+define i32 @test6(i32 %A) {
+	%B = srem i32 %A, 0	;; undef
+	ret i32 %B
+}
+
+define i32 @test7(i32 %A) {
+	%B = mul i32 %A, 8
+	%C = srem i32 %B, 4
+	ret i32 %C
+}
+
+define i32 @test8(i32 %A) {
+	%B = shl i32 %A, 4
+	%C = srem i32 %B, 8
+	ret i32 %C
+}
+
+define i32 @test9(i32 %A) {
+	%B = mul i32 %A, 64
+	%C = urem i32 %B, 32
+	ret i32 %C
+}
+
+define i32 @test10(i8 %c) {
+	%tmp.1 = zext i8 %c to i32
+	%tmp.2 = mul i32 %tmp.1, 4
+	%tmp.3 = sext i32 %tmp.2 to i64
+	%tmp.5 = urem i64 %tmp.3, 4
+	%tmp.6 = trunc i64 %tmp.5 to i32
+	ret i32 %tmp.6
+}
+
+define i32 @test11(i32 %i) {
+	%tmp.1 = and i32 %i, -2
+	%tmp.3 = mul i32 %tmp.1, 2
+	%tmp.5 = urem i32 %tmp.3, 4
+	ret i32 %tmp.5
+}
+
+define i32 @test12(i32 %i) {
+	%tmp.1 = and i32 %i, -4
+	%tmp.5 = srem i32 %tmp.1, 2
+	ret i32 %tmp.5
+}
+
+define i32 @test13(i32 %i) {
+	%x = srem i32 %i, %i
+	ret i32 %x
+}
diff --git a/final/test/Transforms/InstCombine/sdiv-1.ll b/final/test/Transforms/InstCombine/sdiv-1.ll
new file mode 100644
index 00000000000..c46b5eaef4a
--- /dev/null
+++ b/final/test/Transforms/InstCombine/sdiv-1.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -inline -S | not grep '-715827882'
+; PR3142
+
+define i32 @a(i32 %X) nounwind readnone {
+entry:
+       %0 = sub i32 0, %X
+       %1 = sdiv i32 %0, -3
+       ret i32 %1
+}
+
+define i32 @b(i32 %X) nounwind readnone {
+entry:
+       %0 = call i32 @a(i32 -2147483648)
+       ret i32 %0
+}
+
+define i32 @c(i32 %X) nounwind readnone {
+entry:
+       %0 = sub i32 0, -2147483648
+       %1 = sdiv i32 %0, -3
+       ret i32 %1
+}
diff --git a/final/test/Transforms/InstCombine/sdiv-2.ll b/final/test/Transforms/InstCombine/sdiv-2.ll
new file mode 100644
index 00000000000..0e4c0080201
--- /dev/null
+++ b/final/test/Transforms/InstCombine/sdiv-2.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -instcombine -disable-output
+; PR3144
+
+define fastcc i32 @func(i32 %length) nounwind {
+entry:
+	%0 = icmp ne i32 %length, -1		; <i1> [#uses=1]
+	%iftmp.13.0 = select i1 %0, i128 0, i128 200000000		; <i128> [#uses=2]
+	%1 = sdiv i128 %iftmp.13.0, 10		; <i128> [#uses=1]
+	br label %bb5
+
+bb5:		; preds = %bb8, %entry
+	%v.0 = phi i128 [ 0, %entry ], [ %6, %bb8 ]		; <i128> [#uses=2]
+	%2 = icmp sgt i128 %v.0, %1		; <i1> [#uses=1]
+	br i1 %2, label %overflow, label %bb7
+
+bb7:		; preds = %bb5
+	%3 = mul i128 %v.0, 10		; <i128> [#uses=2]
+	%4 = sub i128 %iftmp.13.0, 0		; <i128> [#uses=1]
+	%5 = icmp slt i128 %4, %3		; <i1> [#uses=1]
+	br i1 %5, label %overflow, label %bb8
+
+bb8:		; preds = %bb7
+	%6 = add i128 0, %3		; <i128> [#uses=1]
+	br label %bb5
+
+overflow:		; preds = %bb7, %bb5
+	ret i32 1
+}
diff --git a/final/test/Transforms/InstCombine/sdiv-shift.ll b/final/test/Transforms/InstCombine/sdiv-shift.ll
new file mode 100644
index 00000000000..f4d2b36cbbf
--- /dev/null
+++ b/final/test/Transforms/InstCombine/sdiv-shift.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | not grep div
+
+define i32 @a(i16 zeroext %x, i32 %y) nounwind {
+entry:
+	%conv = zext i16 %x to i32
+	%s = shl i32 2, %y
+	%d = sdiv i32 %conv, %s
+	ret i32 %d
+}
diff --git a/final/test/Transforms/InstCombine/select-2.ll b/final/test/Transforms/InstCombine/select-2.ll
new file mode 100644
index 00000000000..a76addc9942
--- /dev/null
+++ b/final/test/Transforms/InstCombine/select-2.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine -S | grep select | count 2
+
+; Make sure instcombine don't fold select into operands. We don't want to emit
+; select of two integers unless it's selecting 0 / 1.
+
+define i32 @t1(i32 %c, i32 %x) nounwind {
+       %t1 = icmp eq i32 %c, 0
+       %t2 = lshr i32 %x, 18
+       %t3 = select i1 %t1, i32 %t2, i32 %x
+       ret i32 %t3
+}
+
+define i32 @t2(i32 %c, i32 %x) nounwind {
+       %t1 = icmp eq i32 %c, 0
+       %t2 = and i32 %x, 18
+       %t3 = select i1 %t1, i32 %t2, i32 %x
+       ret i32 %t3
+}
diff --git a/final/test/Transforms/InstCombine/select-crash.ll b/final/test/Transforms/InstCombine/select-crash.ll
new file mode 100644
index 00000000000..8ee33690d8c
--- /dev/null
+++ b/final/test/Transforms/InstCombine/select-crash.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; Formerly crashed, PR8490.
+
+define fastcc double @gimp_operation_color_balance_map(float %value, double %highlights) nounwind readnone inlinehint {
+entry:
+; CHECK: gimp_operation_color_balance_map
+; CHECK: fsub double -0.000000
+  %conv = fpext float %value to double
+  %div = fdiv double %conv, 1.600000e+01
+  %add = fadd double %div, 1.000000e+00
+  %div1 = fdiv double 1.000000e+00, %add
+  %sub = fsub double 1.075000e+00, %div1
+  %sub24 = fsub double 1.000000e+00, %sub
+  %add26 = fadd double %sub, 1.000000e+00
+  %cmp86 = fcmp ogt double %highlights, 0.000000e+00
+  %cond90 = select i1 %cmp86, double %sub24, double %add26
+  %mul91 = fmul double %highlights, %cond90
+  %add94 = fadd double undef, %mul91
+  ret double %add94
+}
diff --git a/final/test/Transforms/InstCombine/select-load-call.ll b/final/test/Transforms/InstCombine/select-load-call.ll
new file mode 100644
index 00000000000..bef0cf841bf
--- /dev/null
+++ b/final/test/Transforms/InstCombine/select-load-call.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | grep {ret i32 1}
+
+declare void @test2()
+
+define i32 @test(i1 %cond, i32 *%P) {
+  %A = alloca i32
+  store i32 1, i32* %P
+  store i32 1, i32* %A
+
+  call void @test2() readonly
+
+  %P2 = select i1 %cond, i32 *%P, i32* %A
+  %V = load i32* %P2
+  ret i32 %V
+}
diff --git a/final/test/Transforms/InstCombine/select.ll b/final/test/Transforms/InstCombine/select.ll
new file mode 100644
index 00000000000..e9981a523d5
--- /dev/null
+++ b/final/test/Transforms/InstCombine/select.ll
@@ -0,0 +1,726 @@
+; This test makes sure that these instructions are properly eliminated.
+; PR1822
+
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @test1(i32 %A, i32 %B) {
+        %C = select i1 false, i32 %A, i32 %B            
+        ret i32 %C
+; CHECK: @test1
+; CHECK: ret i32 %B
+}
+
+define i32 @test2(i32 %A, i32 %B) {
+        %C = select i1 true, i32 %A, i32 %B             
+        ret i32 %C
+; CHECK: @test2
+; CHECK: ret i32 %A
+}
+
+
+define i32 @test3(i1 %C, i32 %I) {
+        ; V = I
+        %V = select i1 %C, i32 %I, i32 %I               
+        ret i32 %V
+; CHECK: @test3
+; CHECK: ret i32 %I
+}
+
+define i1 @test4(i1 %C) {
+        ; V = C
+        %V = select i1 %C, i1 true, i1 false            
+        ret i1 %V
+; CHECK: @test4
+; CHECK: ret i1 %C
+}
+
+define i1 @test5(i1 %C) {
+        ; V = !C
+        %V = select i1 %C, i1 false, i1 true            
+        ret i1 %V
+; CHECK: @test5
+; CHECK: xor i1 %C, true
+; CHECK: ret i1
+}
+
+define i32 @test6(i1 %C) { 
+        ; V = cast C to int
+        %V = select i1 %C, i32 1, i32 0         
+        ret i32 %V
+; CHECK: @test6
+; CHECK: %V = zext i1 %C to i32
+; CHECK: ret i32 %V
+}
+
+define i1 @test7(i1 %C, i1 %X) {
+        ; R = or C, X       
+        %R = select i1 %C, i1 true, i1 %X               
+        ret i1 %R
+; CHECK: @test7
+; CHECK: %R = or i1 %C, %X
+; CHECK: ret i1 %R
+}
+
+define i1 @test8(i1 %C, i1 %X) {
+        ; R = and C, X
+        %R = select i1 %C, i1 %X, i1 false              
+        ret i1 %R
+; CHECK: @test8
+; CHECK: %R = and i1 %C, %X
+; CHECK: ret i1 %R
+}
+
+define i1 @test9(i1 %C, i1 %X) {
+        ; R = and !C, X
+        %R = select i1 %C, i1 false, i1 %X              
+        ret i1 %R
+; CHECK: @test9
+; CHECK: xor i1 %C, true
+; CHECK: %R = and i1
+; CHECK: ret i1 %R
+}
+
+define i1 @test10(i1 %C, i1 %X) {
+        ; R = or !C, X
+        %R = select i1 %C, i1 %X, i1 true               
+        ret i1 %R
+; CHECK: @test10
+; CHECK: xor i1 %C, true
+; CHECK: %R = or i1
+; CHECK: ret i1 %R
+}
+
+define i32 @test11(i32 %a) {
+        %C = icmp eq i32 %a, 0          
+        %R = select i1 %C, i32 0, i32 1         
+        ret i32 %R
+; CHECK: @test11
+; CHECK: icmp ne i32 %a, 0
+; CHECK: %R = zext i1
+; CHECK: ret i32 %R
+}
+
+define i32 @test12(i1 %cond, i32 %a) {
+        %b = or i32 %a, 1               
+        %c = select i1 %cond, i32 %b, i32 %a            
+        ret i32 %c
+; CHECK: @test12
+; CHECK: %b = zext i1 %cond to i32
+; CHECK: %c = or i32 %b, %a
+; CHECK: ret i32 %c
+}
+
+define i32 @test12a(i1 %cond, i32 %a) {
+        %b = ashr i32 %a, 1             
+        %c = select i1 %cond, i32 %b, i32 %a            
+        ret i32 %c
+; CHECK: @test12a
+; CHECK: %b = zext i1 %cond to i32
+; CHECK: %c = ashr i32 %a, %b
+; CHECK: ret i32 %c
+}
+
+define i32 @test12b(i1 %cond, i32 %a) {
+        %b = ashr i32 %a, 1             
+        %c = select i1 %cond, i32 %a, i32 %b            
+        ret i32 %c
+; CHECK: @test12b
+; CHECK: zext i1 %cond to i32
+; CHECK: %b = xor i32
+; CHECK: %c = ashr i32 %a, %b
+; CHECK: ret i32 %c
+}
+
+define i32 @test13(i32 %a, i32 %b) {
+        %C = icmp eq i32 %a, %b         
+        %V = select i1 %C, i32 %a, i32 %b               
+        ret i32 %V
+; CHECK: @test13
+; CHECK: ret i32 %b
+}
+
+define i32 @test13a(i32 %a, i32 %b) {
+        %C = icmp ne i32 %a, %b         
+        %V = select i1 %C, i32 %a, i32 %b               
+        ret i32 %V
+; CHECK: @test13a
+; CHECK: ret i32 %a
+}
+
+define i32 @test13b(i32 %a, i32 %b) {
+        %C = icmp eq i32 %a, %b         
+        %V = select i1 %C, i32 %b, i32 %a               
+        ret i32 %V
+; CHECK: @test13b
+; CHECK: ret i32 %a
+}
+
+define i1 @test14a(i1 %C, i32 %X) {
+        %V = select i1 %C, i32 %X, i32 0                
+        ; (X < 1) | !C
+        %R = icmp slt i32 %V, 1         
+        ret i1 %R
+; CHECK: @test14a
+; CHECK: icmp slt i32 %X, 1
+; CHECK: xor i1 %C, true
+; CHECK: or i1
+; CHECK: ret i1 %R
+}
+
+define i1 @test14b(i1 %C, i32 %X) {
+        %V = select i1 %C, i32 0, i32 %X                
+        ; (X < 1) | C
+        %R = icmp slt i32 %V, 1         
+        ret i1 %R
+; CHECK: @test14b
+; CHECK: icmp slt i32 %X, 1
+; CHECK: or i1
+; CHECK: ret i1 %R
+}
+
+;; Code sequence for (X & 16) ? 16 : 0
+define i32 @test15a(i32 %X) {
+        %t1 = and i32 %X, 16            
+        %t2 = icmp eq i32 %t1, 0                
+        %t3 = select i1 %t2, i32 0, i32 16              
+        ret i32 %t3
+; CHECK: @test15a
+; CHECK: %t1 = and i32 %X, 16
+; CHECK: ret i32 %t1
+}
+
+;; Code sequence for (X & 32) ? 0 : 24
+define i32 @test15b(i32 %X) {
+        %t1 = and i32 %X, 32            
+        %t2 = icmp eq i32 %t1, 0                
+        %t3 = select i1 %t2, i32 32, i32 0              
+        ret i32 %t3
+; CHECK: @test15b
+; CHECK: %t1 = and i32 %X, 32
+; CHECK: xor i32 %t1, 32
+; CHECK: ret i32
+}
+
+;; Alternate code sequence for (X & 16) ? 16 : 0
+define i32 @test15c(i32 %X) {
+        %t1 = and i32 %X, 16            
+        %t2 = icmp eq i32 %t1, 16               
+        %t3 = select i1 %t2, i32 16, i32 0              
+        ret i32 %t3
+; CHECK: @test15c
+; CHECK: %t1 = and i32 %X, 16
+; CHECK: ret i32 %t1
+}
+
+;; Alternate code sequence for (X & 16) ? 16 : 0
+define i32 @test15d(i32 %X) {
+        %t1 = and i32 %X, 16            
+        %t2 = icmp ne i32 %t1, 0                
+        %t3 = select i1 %t2, i32 16, i32 0              
+        ret i32 %t3
+; CHECK: @test15d
+; CHECK: %t1 = and i32 %X, 16
+; CHECK: ret i32 %t1
+}
+
+;; (a & 128) ? 256 : 0
+define i32 @test15e(i32 %X) {
+        %t1 = and i32 %X, 128
+        %t2 = icmp ne i32 %t1, 0
+        %t3 = select i1 %t2, i32 256, i32 0
+        ret i32 %t3
+; CHECK: @test15e
+; CHECK: %t1 = shl i32 %X, 1
+; CHECK: and i32 %t1, 256
+; CHECK: ret i32
+}
+
+;; (a & 128) ? 0 : 256
+define i32 @test15f(i32 %X) {
+        %t1 = and i32 %X, 128
+        %t2 = icmp ne i32 %t1, 0
+        %t3 = select i1 %t2, i32 0, i32 256
+        ret i32 %t3
+; CHECK: @test15f
+; CHECK: %t1 = shl i32 %X, 1
+; CHECK: and i32 %t1, 256
+; CHECK: xor i32 %{{.*}}, 256
+; CHECK: ret i32
+}
+
+;; (a & 8) ? -1 : -9
+define i32 @test15g(i32 %X) {
+        %t1 = and i32 %X, 8
+        %t2 = icmp ne i32 %t1, 0
+        %t3 = select i1 %t2, i32 -1, i32 -9
+        ret i32 %t3
+; CHECK: @test15g
+; CHECK-NEXT: %1 = or i32 %X, -9
+; CHECK-NEXT: ret i32 %1
+}
+
+;; (a & 8) ? -9 : -1
+define i32 @test15h(i32 %X) {
+        %t1 = and i32 %X, 8
+        %t2 = icmp ne i32 %t1, 0
+        %t3 = select i1 %t2, i32 -9, i32 -1
+        ret i32 %t3
+; CHECK: @test15h
+; CHECK-NEXT: %1 = or i32 %X, -9
+; CHECK-NEXT: %2 = xor i32 %1, 8
+; CHECK-NEXT: ret i32 %2
+}
+
+;; (a & 2) ? 577 : 1089
+define i32 @test15i(i32 %X) {
+        %t1 = and i32 %X, 2
+        %t2 = icmp ne i32 %t1, 0
+        %t3 = select i1 %t2, i32 577, i32 1089
+        ret i32 %t3
+; CHECK: @test15i
+; CHECK-NEXT: %t1 = shl i32 %X, 8
+; CHECK-NEXT: %1 = and i32 %t1, 512
+; CHECK-NEXT: %2 = xor i32 %1, 512
+; CHECK-NEXT: %3 = add i32 %2, 577
+; CHECK-NEXT: ret i32 %3
+}
+
+;; (a & 2) ? 1089 : 577
+define i32 @test15j(i32 %X) {
+        %t1 = and i32 %X, 2
+        %t2 = icmp ne i32 %t1, 0
+        %t3 = select i1 %t2, i32 1089, i32 577
+        ret i32 %t3
+; CHECK: @test15j
+; CHECK-NEXT: %t1 = shl i32 %X, 8
+; CHECK-NEXT: %1 = and i32 %t1, 512
+; CHECK-NEXT: %2 = add i32 %1, 577
+; CHECK-NEXT: ret i32 %2
+}
+
+define i32 @test16(i1 %C, i32* %P) {
+        %P2 = select i1 %C, i32* %P, i32* null          
+        %V = load i32* %P2              
+        ret i32 %V
+; CHECK: @test16
+; CHECK-NEXT: %V = load i32* %P
+; CHECK: ret i32 %V
+}
+
+define i1 @test17(i32* %X, i1 %C) {
+        %R = select i1 %C, i32* %X, i32* null           
+        %RV = icmp eq i32* %R, null             
+        ret i1 %RV
+; CHECK: @test17
+; CHECK: icmp eq i32* %X, null
+; CHECK: xor i1 %C, true
+; CHECK: %RV = or i1
+; CHECK: ret i1 %RV
+}
+
+define i32 @test18(i32 %X, i32 %Y, i1 %C) {
+        %R = select i1 %C, i32 %X, i32 0                
+        %V = sdiv i32 %Y, %R            
+        ret i32 %V
+; CHECK: @test18
+; CHECK: %V = sdiv i32 %Y, %X
+; CHECK: ret i32 %V
+}
+
+define i32 @test19(i32 %x) {
+        %tmp = icmp ugt i32 %x, 2147483647              
+        %retval = select i1 %tmp, i32 -1, i32 0         
+        ret i32 %retval
+; CHECK: @test19
+; CHECK-NEXT: ashr i32 %x, 31
+; CHECK-NEXT: ret i32 
+}
+
+define i32 @test20(i32 %x) {
+        %tmp = icmp slt i32 %x, 0               
+        %retval = select i1 %tmp, i32 -1, i32 0         
+        ret i32 %retval
+; CHECK: @test20
+; CHECK-NEXT: ashr i32 %x, 31
+; CHECK-NEXT: ret i32 
+}
+
+define i64 @test21(i32 %x) {
+        %tmp = icmp slt i32 %x, 0               
+        %retval = select i1 %tmp, i64 -1, i64 0         
+        ret i64 %retval
+; CHECK: @test21
+; CHECK-NEXT: ashr i32 %x, 31
+; CHECK-NEXT: sext i32 
+; CHECK-NEXT: ret i64
+}
+
+define i16 @test22(i32 %x) {
+        %tmp = icmp slt i32 %x, 0               
+        %retval = select i1 %tmp, i16 -1, i16 0         
+        ret i16 %retval
+; CHECK: @test22
+; CHECK-NEXT: ashr i32 %x, 31
+; CHECK-NEXT: trunc i32 
+; CHECK-NEXT: ret i16
+}
+
+define i1 @test23(i1 %a, i1 %b) {
+        %c = select i1 %a, i1 %b, i1 %a         
+        ret i1 %c
+; CHECK: @test23
+; CHECK-NEXT: %c = and i1 %a, %b
+; CHECK-NEXT: ret i1 %c
+}
+
+define i1 @test24(i1 %a, i1 %b) {
+        %c = select i1 %a, i1 %a, i1 %b         
+        ret i1 %c
+; CHECK: @test24
+; CHECK-NEXT: %c = or i1 %a, %b
+; CHECK-NEXT: ret i1 %c
+}
+
+define i32 @test25(i1 %c)  {
+entry:
+  br i1 %c, label %jump, label %ret
+jump:
+  br label %ret 
+ret:
+  %a = phi i1 [true, %jump], [false, %entry]
+  %b = select i1 %a, i32 10, i32 20
+  ret i32 %b
+; CHECK: @test25
+; CHECK: %a = phi i32 [ 10, %jump ], [ 20, %entry ]
+; CHECK-NEXT: ret i32 %a
+}
+
+define i32 @test26(i1 %cond)  {
+entry:
+  br i1 %cond, label %jump, label %ret
+jump:
+  %c = or i1 false, false
+  br label %ret 
+ret:
+  %a = phi i1 [true, %jump], [%c, %entry]
+  %b = select i1 %a, i32 10, i32 20
+  ret i32 %b
+; CHECK: @test26
+; CHECK: %a = phi i32 [ 10, %jump ], [ 20, %entry ]
+; CHECK-NEXT: ret i32 %a
+}
+
+define i32 @test27(i1 %c, i32 %A, i32 %B)  {
+entry:
+  br i1 %c, label %jump, label %ret
+jump:
+  br label %ret 
+ret:
+  %a = phi i1 [true, %jump], [false, %entry]
+  %b = select i1 %a, i32 %A, i32 %B
+  ret i32 %b
+; CHECK: @test27
+; CHECK: %a = phi i32 [ %A, %jump ], [ %B, %entry ]
+; CHECK-NEXT: ret i32 %a
+}
+
+define i32 @test28(i1 %cond, i32 %A, i32 %B)  {
+entry:
+  br i1 %cond, label %jump, label %ret
+jump:
+  br label %ret 
+ret:
+  %c = phi i32 [%A, %jump], [%B, %entry]
+  %a = phi i1 [true, %jump], [false, %entry]
+  %b = select i1 %a, i32 %A, i32 %c
+  ret i32 %b
+; CHECK: @test28
+; CHECK: %a = phi i32 [ %A, %jump ], [ %B, %entry ]
+; CHECK-NEXT: ret i32 %a
+}
+
+define i32 @test29(i1 %cond, i32 %A, i32 %B)  {
+entry:
+  br i1 %cond, label %jump, label %ret
+jump:
+  br label %ret 
+ret:
+  %c = phi i32 [%A, %jump], [%B, %entry]
+  %a = phi i1 [true, %jump], [false, %entry]
+  br label %next
+  
+next:
+  %b = select i1 %a, i32 %A, i32 %c
+  ret i32 %b
+; CHECK: @test29
+; CHECK: %a = phi i32 [ %A, %jump ], [ %B, %entry ]
+; CHECK: ret i32 %a
+}
+
+
+; SMAX(SMAX(x, y), x) -> SMAX(x, y)
+define i32 @test30(i32 %x, i32 %y) {
+  %cmp = icmp sgt i32 %x, %y
+  %cond = select i1 %cmp, i32 %x, i32 %y
+  
+  %cmp5 = icmp sgt i32 %cond, %x
+  %retval = select i1 %cmp5, i32 %cond, i32 %x
+  ret i32 %retval
+; CHECK: @test30
+; CHECK: ret i32 %cond
+}
+
+; UMAX(UMAX(x, y), x) -> UMAX(x, y)
+define i32 @test31(i32 %x, i32 %y) {
+  %cmp = icmp ugt i32 %x, %y 
+  %cond = select i1 %cmp, i32 %x, i32 %y
+  %cmp5 = icmp ugt i32 %cond, %x
+  %retval = select i1 %cmp5, i32 %cond, i32 %x
+  ret i32 %retval
+; CHECK: @test31
+; CHECK: ret i32 %cond
+}
+
+; SMIN(SMIN(x, y), x) -> SMIN(x, y)
+define i32 @test32(i32 %x, i32 %y) {
+  %cmp = icmp sgt i32 %x, %y
+  %cond = select i1 %cmp, i32 %y, i32 %x
+  %cmp5 = icmp sgt i32 %cond, %x
+  %retval = select i1 %cmp5, i32 %x, i32 %cond
+  ret i32 %retval
+; CHECK: @test32
+; CHECK: ret i32 %cond
+}
+
+; MAX(MIN(x, y), x) -> x
+define i32 @test33(i32 %x, i32 %y) {
+  %cmp = icmp sgt i32 %x, %y
+  %cond = select i1 %cmp, i32 %y, i32 %x
+  %cmp5 = icmp sgt i32 %cond, %x
+  %retval = select i1 %cmp5, i32 %cond, i32 %x
+  ret i32 %retval
+; CHECK: @test33
+; CHECK: ret i32 %x
+}
+
+; MIN(MAX(x, y), x) -> x
+define i32 @test34(i32 %x, i32 %y) {
+  %cmp = icmp sgt i32 %x, %y
+  %cond = select i1 %cmp, i32 %x, i32 %y
+  %cmp5 = icmp sgt i32 %cond, %x
+  %retval = select i1 %cmp5, i32 %x, i32 %cond
+  ret i32 %retval
+; CHECK: @test34
+; CHECK: ret i32 %x
+}
+
+define i32 @test35(i32 %x) {
+  %cmp = icmp sge i32 %x, 0
+  %cond = select i1 %cmp, i32 60, i32 100
+  ret i32 %cond
+; CHECK: @test35
+; CHECK: ashr i32 %x, 31
+; CHECK: and i32 {{.*}}, 40
+; CHECK: add i32 {{.*}}, 60
+; CHECK: ret
+}
+
+define i32 @test36(i32 %x) {
+  %cmp = icmp slt i32 %x, 0
+  %cond = select i1 %cmp, i32 60, i32 100
+  ret i32 %cond
+; CHECK: @test36
+; CHECK: ashr i32 %x, 31
+; CHECK: and i32 {{.*}}, -40
+; CHECK: add i32 {{.*}}, 100
+; CHECK: ret
+}
+
+define i32 @test37(i32 %x) {
+  %cmp = icmp sgt i32 %x, -1
+  %cond = select i1 %cmp, i32 1, i32 -1
+  ret i32 %cond
+; CHECK: @test37
+; CHECK: ashr i32 %x, 31
+; CHECK: or i32 {{.*}}, 1
+; CHECK: ret
+}
+
+define i1 @test38(i1 %cond) {
+  %zero = alloca i32
+  %one = alloca i32
+  %ptr = select i1 %cond, i32* %zero, i32* %one
+  %isnull = icmp eq i32* %ptr, null
+  ret i1 %isnull
+; CHECK: @test38
+; CHECK: ret i1 false
+}
+
+define i1 @test39(i1 %cond, double %x) {
+  %s = select i1 %cond, double %x, double 0x7FF0000000000000 ; RHS = +infty
+  %cmp = fcmp ule double %x, %s
+  ret i1 %cmp
+; CHECK: @test39
+; CHECK: ret i1 true
+}
+
+define i1 @test40(i1 %cond) {
+  %a = alloca i32
+  %b = alloca i32
+  %c = alloca i32
+  %s = select i1 %cond, i32* %a, i32* %b
+  %r = icmp eq i32* %s, %c
+  ret i1 %r
+; CHECK: @test40
+; CHECK: ret i1 false
+}
+
+define i32 @test41(i1 %cond, i32 %x, i32 %y) {
+  %z = and i32 %x, %y
+  %s = select i1 %cond, i32 %y, i32 %z
+  %r = and i32 %x, %s
+  ret i32 %r
+; CHECK: @test41
+; CHECK-NEXT: and i32 %x, %y
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test42(i32 %x, i32 %y) {
+  %b = add i32 %y, -1
+  %cond = icmp eq i32 %x, 0
+  %c = select i1 %cond, i32 %b, i32 %y
+  ret i32 %c
+; CHECK: @test42
+; CHECK-NEXT: %cond = icmp eq i32 %x, 0
+; CHECK-NEXT: %b = sext i1 %cond to i32
+; CHECK-NEXT: %c = add i32 %b, %y
+; CHECK-NEXT: ret i32 %c
+}
+
+define i64 @test43(i32 %a) nounwind {
+	%a_ext = sext i32 %a to i64
+	%is_a_nonnegative = icmp sgt i32 %a, -1
+	%max = select i1 %is_a_nonnegative, i64 %a_ext, i64 0
+	ret i64 %max
+; CHECK: @test43
+; CHECK-NEXT: %a_ext = sext i32 %a to i64
+; CHECK-NEXT: %is_a_nonnegative = icmp slt i64 %a_ext, 0
+; CHECK-NEXT: %max = select i1 %is_a_nonnegative, i64 0, i64 %a_ext
+; CHECK-NEXT: ret i64 %max
+}
+
+define i64 @test44(i32 %a) nounwind {
+	%a_ext = sext i32 %a to i64
+	%is_a_nonpositive = icmp slt i32 %a, 1
+	%min = select i1 %is_a_nonpositive, i64 %a_ext, i64 0
+	ret i64 %min
+; CHECK: @test44
+; CHECK-NEXT: %a_ext = sext i32 %a to i64
+; CHECK-NEXT: %is_a_nonpositive = icmp sgt i64 %a_ext, 0
+; CHECK-NEXT: %min = select i1 %is_a_nonpositive, i64 0, i64 %a_ext
+; CHECK-NEXT: ret i64 %min
+}
+define i64 @test45(i32 %a) nounwind {
+	%a_ext = zext i32 %a to i64
+	%is_a_nonnegative = icmp ugt i32 %a, 2
+	%max = select i1 %is_a_nonnegative, i64 %a_ext, i64 3
+	ret i64 %max
+; CHECK: @test45
+; CHECK-NEXT: %a_ext = zext i32 %a to i64
+; CHECK-NEXT: %is_a_nonnegative = icmp ult i64 %a_ext, 3
+; CHECK-NEXT: %max = select i1 %is_a_nonnegative, i64 3, i64 %a_ext
+; CHECK-NEXT: ret i64 %max
+}
+
+define i64 @test46(i32 %a) nounwind {
+	%a_ext = zext i32 %a to i64
+	%is_a_nonpositive = icmp ult i32 %a, 3
+	%min = select i1 %is_a_nonpositive, i64 %a_ext, i64 2
+	ret i64 %min
+; CHECK: @test46
+; CHECK-NEXT: %a_ext = zext i32 %a to i64
+; CHECK-NEXT: %is_a_nonpositive = icmp ugt i64 %a_ext, 2
+; CHECK-NEXT: %min = select i1 %is_a_nonpositive, i64 2, i64 %a_ext
+; CHECK-NEXT: ret i64 %min
+}
+define i64 @test47(i32 %a) nounwind {
+	%a_ext = sext i32 %a to i64
+	%is_a_nonnegative = icmp ugt i32 %a, 2
+	%max = select i1 %is_a_nonnegative, i64 %a_ext, i64 3
+	ret i64 %max
+; CHECK: @test47
+; CHECK-NEXT: %a_ext = sext i32 %a to i64
+; CHECK-NEXT: %is_a_nonnegative = icmp ult i64 %a_ext, 3
+; CHECK-NEXT: %max = select i1 %is_a_nonnegative, i64 3, i64 %a_ext
+; CHECK-NEXT: ret i64 %max
+}
+
+define i64 @test48(i32 %a) nounwind {
+	%a_ext = sext i32 %a to i64
+	%is_a_nonpositive = icmp ult i32 %a, 3
+	%min = select i1 %is_a_nonpositive, i64 %a_ext, i64 2
+	ret i64 %min
+; CHECK: @test48
+; CHECK-NEXT: %a_ext = sext i32 %a to i64
+; CHECK-NEXT: %is_a_nonpositive = icmp ugt i64 %a_ext, 2
+; CHECK-NEXT: %min = select i1 %is_a_nonpositive, i64 2, i64 %a_ext
+; CHECK-NEXT: ret i64 %min
+}
+
+define i64 @test49(i32 %a) nounwind {
+	%a_ext = sext i32 %a to i64
+	%is_a_nonpositive = icmp ult i32 %a, 3
+	%min = select i1 %is_a_nonpositive, i64 2, i64 %a_ext
+	ret i64 %min
+; CHECK: @test49
+; CHECK-NEXT: %a_ext = sext i32 %a to i64
+; CHECK-NEXT: %is_a_nonpositive = icmp ugt i64 %a_ext, 2
+; CHECK-NEXT: %min = select i1 %is_a_nonpositive, i64 %a_ext, i64 2
+; CHECK-NEXT: ret i64 %min
+}
+define i64 @test50(i32 %a) nounwind {
+	%is_a_nonpositive = icmp ult i32 %a, 3
+	%a_ext = sext i32 %a to i64
+	%min = select i1 %is_a_nonpositive, i64 2, i64 %a_ext
+	ret i64 %min
+; CHECK: @test50
+; CHECK-NEXT: %a_ext = sext i32 %a to i64
+; CHECK-NEXT: %is_a_nonpositive = icmp ugt i64 %a_ext, 2
+; CHECK-NEXT: %min = select i1 %is_a_nonpositive, i64 %a_ext, i64 2
+; CHECK-NEXT: ret i64 %min
+}
+
+; PR8994
+
+; This select instruction can't be eliminated because trying to do so would
+; change the number of vector elements. This used to assert.
+define i48 @test51(<3 x i1> %icmp, <3 x i16> %tmp) {
+; CHECK: @test51
+  %select = select <3 x i1> %icmp, <3 x i16> zeroinitializer, <3 x i16> %tmp
+; CHECK: select <3 x i1>
+  %tmp2 = bitcast <3 x i16> %select to i48
+  ret i48 %tmp2
+}
+
+; PR8575
+
+define i32 @test52(i32 %n, i32 %m) nounwind {
+; CHECK: @test52
+  %cmp = icmp sgt i32 %n, %m
+  %. = select i1 %cmp, i32 1, i32 3
+  %add = add nsw i32 %., 3
+  %storemerge = select i1 %cmp, i32 %., i32 %add
+; CHECK: select i1 %cmp, i32 1, i32 6
+  ret i32 %storemerge
+}
+
+; PR9454
+define i32 @test53(i32 %x) nounwind {
+  %and = and i32 %x, 2
+  %cmp = icmp eq i32 %and, %x
+  %sel = select i1 %cmp, i32 2, i32 1
+  ret i32 %sel
+; CHECK: @test53
+; CHECK: select i1 %cmp
+; CHECK: ret
+}
diff --git a/final/test/Transforms/InstCombine/set.ll b/final/test/Transforms/InstCombine/set.ll
new file mode 100644
index 00000000000..daa9148f6ad
--- /dev/null
+++ b/final/test/Transforms/InstCombine/set.ll
@@ -0,0 +1,171 @@
+; This test makes sure that these instructions are properly eliminated.
+;
+; RUN: opt < %s -instcombine -S | not grep icmp
+; END.
+	
+@X = external global i32                ; <i32*> [#uses=2]
+
+define i1 @test1(i32 %A) {
+        %B = icmp eq i32 %A, %A         ; <i1> [#uses=1]
+        ; Never true
+        %C = icmp eq i32* @X, null              ; <i1> [#uses=1]
+        %D = and i1 %B, %C              ; <i1> [#uses=1]
+        ret i1 %D
+}
+
+define i1 @test2(i32 %A) {
+        %B = icmp ne i32 %A, %A         ; <i1> [#uses=1]
+        ; Never false
+        %C = icmp ne i32* @X, null              ; <i1> [#uses=1]
+        %D = or i1 %B, %C               ; <i1> [#uses=1]
+        ret i1 %D
+}
+
+define i1 @test3(i32 %A) {
+        %B = icmp slt i32 %A, %A                ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+
+define i1 @test4(i32 %A) {
+        %B = icmp sgt i32 %A, %A                ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test5(i32 %A) {
+        %B = icmp sle i32 %A, %A                ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test6(i32 %A) {
+        %B = icmp sge i32 %A, %A                ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test7(i32 %A) {
+        ; true
+        %B = icmp uge i32 %A, 0         ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test8(i32 %A) {
+        ; false
+        %B = icmp ult i32 %A, 0         ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+;; test operations on boolean values these should all be eliminated$a
+define i1 @test9(i1 %A) {
+        ; false
+        %B = icmp ult i1 %A, false              ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test10(i1 %A) {
+        ; false
+        %B = icmp ugt i1 %A, true               ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test11(i1 %A) {
+        ; true
+        %B = icmp ule i1 %A, true               ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test12(i1 %A) {
+        ; true
+        %B = icmp uge i1 %A, false              ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test13(i1 %A, i1 %B) {
+        ; A | ~B
+        %C = icmp uge i1 %A, %B         ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i1 @test14(i1 %A, i1 %B) {
+        ; ~(A ^ B)
+        %C = icmp eq i1 %A, %B          ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i1 @test16(i32 %A) {
+        %B = and i32 %A, 5              ; <i32> [#uses=1]
+        ; Is never true
+        %C = icmp eq i32 %B, 8          ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i1 @test17(i8 %A) {
+        %B = or i8 %A, 1                ; <i8> [#uses=1]
+        ; Always false
+        %C = icmp eq i8 %B, 2           ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i1 @test18(i1 %C, i32 %a) {
+entry:
+        br i1 %C, label %endif, label %else
+
+else:           ; preds = %entry
+        br label %endif
+
+endif:          ; preds = %else, %entry
+        %b.0 = phi i32 [ 0, %entry ], [ 1, %else ]              ; <i32> [#uses=1]
+        %tmp.4 = icmp slt i32 %b.0, 123         ; <i1> [#uses=1]
+        ret i1 %tmp.4
+}
+
+define i1 @test19(i1 %A, i1 %B) {
+        %a = zext i1 %A to i32          ; <i32> [#uses=1]
+        %b = zext i1 %B to i32          ; <i32> [#uses=1]
+        %C = icmp eq i32 %a, %b         ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i32 @test20(i32 %A) {
+        %B = and i32 %A, 1              ; <i32> [#uses=1]
+        %C = icmp ne i32 %B, 0          ; <i1> [#uses=1]
+        %D = zext i1 %C to i32          ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test21(i32 %a) {
+        %tmp.6 = and i32 %a, 4          ; <i32> [#uses=1]
+        %not.tmp.7 = icmp ne i32 %tmp.6, 0              ; <i1> [#uses=1]
+        %retval = zext i1 %not.tmp.7 to i32             ; <i32> [#uses=1]
+        ret i32 %retval
+}
+
+define i1 @test22(i32 %A, i32 %X) {
+        %B = and i32 %A, 100663295              ; <i32> [#uses=1]
+        %C = icmp ult i32 %B, 268435456         ; <i1> [#uses=1]
+        %Y = and i32 %X, 7              ; <i32> [#uses=1]
+        %Z = icmp sgt i32 %Y, -1                ; <i1> [#uses=1]
+        %R = or i1 %C, %Z               ; <i1> [#uses=1]
+        ret i1 %R
+}
+
+define i32 @test23(i32 %a) {
+        %tmp.1 = and i32 %a, 1          ; <i32> [#uses=1]
+        %tmp.2 = icmp eq i32 %tmp.1, 0          ; <i1> [#uses=1]
+        %tmp.3 = zext i1 %tmp.2 to i32          ; <i32> [#uses=1]
+        ret i32 %tmp.3
+}
+
+define i32 @test24(i32 %a) {
+        %tmp1 = and i32 %a, 4           ; <i32> [#uses=1]
+        %tmp.1 = lshr i32 %tmp1, 2              ; <i32> [#uses=1]
+        %tmp.2 = icmp eq i32 %tmp.1, 0          ; <i1> [#uses=1]
+        %tmp.3 = zext i1 %tmp.2 to i32          ; <i32> [#uses=1]
+        ret i32 %tmp.3
+}
+
+define i1 @test25(i32 %A) {
+        %B = and i32 %A, 2              ; <i32> [#uses=1]
+        %C = icmp ugt i32 %B, 2         ; <i1> [#uses=1]
+        ret i1 %C
+}
+
diff --git a/final/test/Transforms/InstCombine/setcc-strength-reduce.ll b/final/test/Transforms/InstCombine/setcc-strength-reduce.ll
new file mode 100644
index 00000000000..62ab116367f
--- /dev/null
+++ b/final/test/Transforms/InstCombine/setcc-strength-reduce.ll
@@ -0,0 +1,37 @@
+; This test ensures that "strength reduction" of conditional expressions are
+; working.  Basically this boils down to converting setlt,gt,le,ge instructions
+; into equivalent setne,eq instructions.
+;
+; RUN: opt < %s -instcombine -S | \
+; RUN:    grep -v {icmp eq} | grep -v {icmp ne} | not grep icmp
+; END.
+
+define i1 @test1(i32 %A) {
+        ; setne %A, 0
+        %B = icmp uge i32 %A, 1         ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test2(i32 %A) {
+       ; setne %A, 0
+        %B = icmp ugt i32 %A, 0         ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test3(i8 %A) {
+        ; setne %A, -128
+        %B = icmp sge i8 %A, -127               ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test4(i8 %A) {
+        ; setne %A, 127 
+        %B = icmp sle i8 %A, 126                ; <i1> [#uses=1]
+        ret i1 %B
+}
+
+define i1 @test5(i8 %A) {
+        ; setne %A, 127
+        %B = icmp slt i8 %A, 127                ; <i1> [#uses=1]
+        ret i1 %B
+}
diff --git a/final/test/Transforms/InstCombine/sext.ll b/final/test/Transforms/InstCombine/sext.ll
new file mode 100644
index 00000000000..60669b7a109
--- /dev/null
+++ b/final/test/Transforms/InstCombine/sext.ll
@@ -0,0 +1,128 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+declare i32 @llvm.ctpop.i32(i32)
+declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.cttz.i32(i32)
+
+define i64 @test1(i32 %x) {
+  %t = call i32 @llvm.ctpop.i32(i32 %x)
+  %s = sext i32 %t to i64
+  ret i64 %s
+  
+; CHECK: @test1
+; CHECK: zext i32 %t
+}
+
+define i64 @test2(i32 %x) {
+  %t = call i32 @llvm.ctlz.i32(i32 %x)
+  %s = sext i32 %t to i64
+  ret i64 %s
+
+; CHECK: @test2
+; CHECK: zext i32 %t
+}
+
+define i64 @test3(i32 %x) {
+  %t = call i32 @llvm.cttz.i32(i32 %x)
+  %s = sext i32 %t to i64
+  ret i64 %s
+
+; CHECK: @test3
+; CHECK: zext i32 %t
+}
+
+define i64 @test4(i32 %x) {
+  %t = udiv i32 %x, 3
+  %s = sext i32 %t to i64
+  ret i64 %s
+
+; CHECK: @test4
+; CHECK: zext i32 %t
+}
+
+define i64 @test5(i32 %x) {
+  %t = urem i32 %x, 30000
+  %s = sext i32 %t to i64
+  ret i64 %s
+; CHECK: @test5
+; CHECK: zext i32 %t
+}
+
+define i64 @test6(i32 %x) {
+  %u = lshr i32 %x, 3
+  %t = mul i32 %u, 3
+  %s = sext i32 %t to i64
+  ret i64 %s
+; CHECK: @test6
+; CHECK: zext i32 %t
+}
+
+define i64 @test7(i32 %x) {
+  %t = and i32 %x, 511
+  %u = sub i32 20000, %t
+  %s = sext i32 %u to i64
+  ret i64 %s
+; CHECK: @test7
+; CHECK: zext i32 %u to i64
+}
+
+define i32 @test8(i8 %a, i32 %f, i1 %p, i32* %z) {
+  %d = lshr i32 %f, 24
+  %e = select i1 %p, i32 %d, i32 0
+  %s = trunc i32 %e to i16
+  %n = sext i16 %s to i32
+  ret i32 %n
+; CHECK: @test8
+; CHECK: %d = lshr i32 %f, 24
+; CHECK: %n = select i1 %p, i32 %d, i32 0
+; CHECK: ret i32 %n
+}
+
+; rdar://6013816
+define i16 @test9(i16 %t, i1 %cond) nounwind {
+entry:
+	br i1 %cond, label %T, label %F
+T:
+	%t2 = sext i16 %t to i32
+	br label %F
+
+F:
+	%V = phi i32 [%t2, %T], [42, %entry]
+	%W = trunc i32 %V to i16
+	ret i16 %W
+; CHECK: @test9
+; CHECK: T:
+; CHECK-NEXT: br label %F
+; CHECK: F:
+; CHECK-NEXT: phi i16
+; CHECK-NEXT: ret i16
+}
+
+; PR2638
+define i32 @test10(i32 %i) nounwind  {
+entry:
+        %tmp12 = trunc i32 %i to i8
+        %tmp16 = shl i8 %tmp12, 6
+        %a = ashr i8 %tmp16, 6 
+        %b = sext i8 %a to i32 
+        ret i32 %b
+; CHECK: @test10
+; CHECK:  shl i32 %i, 30
+; CHECK-NEXT: ashr exact i32
+; CHECK-NEXT: ret i32
+}
+
+define void @test11(<2 x i16> %srcA, <2 x i16> %srcB, <2 x i16>* %dst) {
+  %cmp = icmp eq <2 x i16> %srcB, %srcA
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  %tmask = ashr <2 x i16> %sext, <i16 15, i16 15> 
+  store <2 x i16> %tmask, <2 x i16>* %dst
+  ret void                                                                                                                      
+; CHECK: @test11
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: sext <2 x i1>
+; CHECK-NEXT: store <2 x i16>
+; CHECK-NEXT: ret
+}                                                                                                                               
diff --git a/final/test/Transforms/InstCombine/shift-sra.ll b/final/test/Transforms/InstCombine/shift-sra.ll
new file mode 100644
index 00000000000..a578bbe4d4f
--- /dev/null
+++ b/final/test/Transforms/InstCombine/shift-sra.ll
@@ -0,0 +1,78 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+
+define i32 @test1(i32 %X, i8 %A) {
+        %shift.upgrd.1 = zext i8 %A to i32              ; <i32> [#uses=1]
+        ; can be logical shift.
+        %Y = ashr i32 %X, %shift.upgrd.1                ; <i32> [#uses=1]
+        %Z = and i32 %Y, 1              ; <i32> [#uses=1]
+        ret i32 %Z
+; CHECK: @test1
+; CHECK: lshr i32 %X, %shift.upgrd.1 
+}
+
+define i32 @test2(i8 %tmp) {
+        %tmp3 = zext i8 %tmp to i32             ; <i32> [#uses=1]
+        %tmp4 = add i32 %tmp3, 7                ; <i32> [#uses=1]
+        %tmp5 = ashr i32 %tmp4, 3               ; <i32> [#uses=1]
+        ret i32 %tmp5
+; CHECK: @test2
+; CHECK: lshr i32 %tmp4, 3
+}
+
+define i64 @test3(i1 %X, i64 %Y, i1 %Cond) {
+  br i1 %Cond, label %T, label %F
+T:
+  %X2 = sext i1 %X to i64
+  br label %C
+F:
+  %Y2 = ashr i64 %Y, 63
+  br label %C
+C:
+  %P = phi i64 [%X2, %T], [%Y2, %F] 
+  %S = ashr i64 %P, 12
+  ret i64 %S
+  
+; CHECK: @test3
+; CHECK: %P = phi i64
+; CHECK-NEXT: ret i64 %P
+}
+
+define i64 @test4(i1 %X, i64 %Y, i1 %Cond) {
+  br i1 %Cond, label %T, label %F
+T:
+  %X2 = sext i1 %X to i64
+  br label %C
+F:
+  %Y2 = ashr i64 %Y, 63
+  br label %C
+C:
+  %P = phi i64 [%X2, %T], [%Y2, %F] 
+  %R = shl i64 %P, 12
+  %S = ashr i64 %R, 12
+  ret i64 %S
+  
+; CHECK: @test4
+; CHECK: %P = phi i64
+; CHECK-NEXT: ret i64 %P
+}
+
+; rdar://7732987
+define i32 @test5(i32 %Y) {
+  br i1 undef, label %A, label %C
+A:
+  br i1 undef, label %B, label %D
+B:
+  br label %D
+C:
+  br i1 undef, label %D, label %E
+D:
+  %P = phi i32 [0, %A], [0, %B], [%Y, %C] 
+  %S = ashr i32 %P, 16
+  ret i32 %S
+; CHECK: @test5
+; CHECK: %P = phi i32
+; CHECK-NEXT: ashr i32 %P, 16
+E:
+  ret i32 0
+}
diff --git a/final/test/Transforms/InstCombine/shift.ll b/final/test/Transforms/InstCombine/shift.ll
new file mode 100644
index 00000000000..7fab1d2cab5
--- /dev/null
+++ b/final/test/Transforms/InstCombine/shift.ll
@@ -0,0 +1,487 @@
+; This test makes sure that these instructions are properly eliminated.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @test1(i32 %A) {
+; CHECK: @test1
+; CHECK: ret i32 %A
+        %B = shl i32 %A, 0              ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i32 @test2(i8 %A) {
+; CHECK: @test2
+; CHECK: ret i32 0
+        %shift.upgrd.1 = zext i8 %A to i32              ; <i32> [#uses=1]
+        %B = shl i32 0, %shift.upgrd.1          ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i32 @test3(i32 %A) {
+; CHECK: @test3
+; CHECK: ret i32 %A
+        %B = ashr i32 %A, 0             ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+define i32 @test4(i8 %A) {
+; CHECK: @test4
+; CHECK: ret i32 0
+        %shift.upgrd.2 = zext i8 %A to i32              ; <i32> [#uses=1]
+        %B = ashr i32 0, %shift.upgrd.2         ; <i32> [#uses=1]
+        ret i32 %B
+}
+
+
+define i32 @test5(i32 %A) {
+; CHECK: @test5
+; CHECK: ret i32 undef
+        %B = lshr i32 %A, 32  ;; shift all bits out 
+        ret i32 %B
+}
+
+define i32 @test5a(i32 %A) {
+; CHECK: @test5a
+; CHECK: ret i32 undef
+        %B = shl i32 %A, 32     ;; shift all bits out 
+        ret i32 %B
+}
+
+define i32 @test5b() {
+; CHECK: @test5b
+; CHECK: ret i32 -1
+        %B = ashr i32 undef, 2  ;; top two bits must be equal, so not undef
+        ret i32 %B
+}
+
+define i32 @test5b2(i32 %A) {
+; CHECK: @test5b2
+; CHECK: ret i32 -1
+        %B = ashr i32 undef, %A  ;; top %A bits must be equal, so not undef
+        ret i32 %B
+}
+
+define i32 @test6(i32 %A) {
+; CHECK: @test6
+; CHECK-NEXT: mul i32 %A, 6
+; CHECK-NEXT: ret i32
+        %B = shl i32 %A, 1      ;; convert to an mul instruction 
+        %C = mul i32 %B, 3             
+        ret i32 %C
+}
+
+define i32 @test7(i8 %A) {
+; CHECK: @test7
+; CHECK-NEXT: ret i32 -1
+        %shift.upgrd.3 = zext i8 %A to i32 
+        %B = ashr i32 -1, %shift.upgrd.3  ;; Always equal to -1
+        ret i32 %B
+}
+
+;; (A << 5) << 3 === A << 8 == 0
+define i8 @test8(i8 %A) {
+; CHECK: @test8
+; CHECK: ret i8 0
+        %B = shl i8 %A, 5               ; <i8> [#uses=1]
+        %C = shl i8 %B, 3               ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+;; (A << 7) >> 7 === A & 1
+define i8 @test9(i8 %A) {
+; CHECK: @test9
+; CHECK-NEXT: and i8 %A, 1
+; CHECK-NEXT: ret i8
+        %B = shl i8 %A, 7               ; <i8> [#uses=1]
+        %C = lshr i8 %B, 7              ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+;; (A >> 7) << 7 === A & 128
+define i8 @test10(i8 %A) {
+; CHECK: @test10
+; CHECK-NEXT: and i8 %A, -128
+; CHECK-NEXT: ret i8
+        %B = lshr i8 %A, 7              ; <i8> [#uses=1]
+        %C = shl i8 %B, 7               ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+;; (A >> 3) << 4 === (A & 0x1F) << 1
+define i8 @test11(i8 %A) {
+; CHECK: @test11
+; CHECK-NEXT: mul i8 %A, 6
+; CHECK-NEXT: and i8
+; CHECK-NEXT: ret i8
+        %a = mul i8 %A, 3               ; <i8> [#uses=1]
+        %B = lshr i8 %a, 3              ; <i8> [#uses=1]
+        %C = shl i8 %B, 4               ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+;; (A >> 8) << 8 === A & -256
+define i32 @test12(i32 %A) {
+; CHECK: @test12
+; CHECK-NEXT: and i32 %A, -256
+; CHECK-NEXT: ret i32
+        %B = ashr i32 %A, 8             ; <i32> [#uses=1]
+        %C = shl i32 %B, 8              ; <i32> [#uses=1]
+        ret i32 %C
+}
+
+;; (A >> 3) << 4 === (A & -8) * 2
+define i8 @test13(i8 %A) {
+; CHECK: @test13
+; CHECK-NEXT: mul i8 %A, 6
+; CHECK-NEXT: and i8
+; CHECK-NEXT: ret i8
+        %a = mul i8 %A, 3               ; <i8> [#uses=1]
+        %B = ashr i8 %a, 3              ; <i8> [#uses=1]
+        %C = shl i8 %B, 4               ; <i8> [#uses=1]
+        ret i8 %C
+}
+
+;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4)
+define i32 @test14(i32 %A) {
+; CHECK: @test14
+; CHECK-NEXT: %B = and i32 %A, -19760
+; CHECK-NEXT: or i32 %B, 19744
+; CHECK-NEXT: ret i32
+        %B = lshr i32 %A, 4             ; <i32> [#uses=1]
+        %C = or i32 %B, 1234            ; <i32> [#uses=1]
+        %D = shl i32 %C, 4              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4)
+define i32 @test14a(i32 %A) {
+; CHECK: @test14a
+; CHECK-NEXT: and i32 %A, 77
+; CHECK-NEXT: ret i32
+        %B = shl i32 %A, 4              ; <i32> [#uses=1]
+        %C = and i32 %B, 1234           ; <i32> [#uses=1]
+        %D = lshr i32 %C, 4             ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+define i32 @test15(i1 %C) {
+; CHECK: @test15
+; CHECK-NEXT: select i1 %C, i32 12, i32 4
+; CHECK-NEXT: ret i32
+        %A = select i1 %C, i32 3, i32 1         ; <i32> [#uses=1]
+        %V = shl i32 %A, 2              ; <i32> [#uses=1]
+        ret i32 %V
+}
+
+define i32 @test15a(i1 %C) {
+; CHECK: @test15a
+; CHECK-NEXT: select i1 %C, i32 512, i32 128
+; CHECK-NEXT: ret i32
+        %A = select i1 %C, i8 3, i8 1           ; <i8> [#uses=1]
+        %shift.upgrd.4 = zext i8 %A to i32              ; <i32> [#uses=1]
+        %V = shl i32 64, %shift.upgrd.4         ; <i32> [#uses=1]
+        ret i32 %V
+}
+
+define i1 @test16(i32 %X) {
+; CHECK: @test16
+; CHECK-NEXT: and i32 %X, 16
+; CHECK-NEXT: icmp ne i32
+; CHECK-NEXT: ret i1
+        %tmp.3 = ashr i32 %X, 4 
+        %tmp.6 = and i32 %tmp.3, 1
+        %tmp.7 = icmp ne i32 %tmp.6, 0
+        ret i1 %tmp.7
+}
+
+define i1 @test17(i32 %A) {
+; CHECK: @test17
+; CHECK-NEXT: and i32 %A, -8
+; CHECK-NEXT: icmp eq i32
+; CHECK-NEXT: ret i1
+        %B = lshr i32 %A, 3             ; <i32> [#uses=1]
+        %C = icmp eq i32 %B, 1234               ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+
+define i1 @test18(i8 %A) {
+; CHECK: @test18
+; CHECK: ret i1 false
+
+        %B = lshr i8 %A, 7              ; <i8> [#uses=1]
+        ;; false
+        %C = icmp eq i8 %B, 123         ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i1 @test19(i32 %A) {
+; CHECK: @test19
+; CHECK-NEXT: icmp ult i32 %A, 4
+; CHECK-NEXT: ret i1
+        %B = ashr i32 %A, 2             ; <i32> [#uses=1]
+        ;; (X & -4) == 0
+        %C = icmp eq i32 %B, 0          ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+
+define i1 @test19a(i32 %A) {
+; CHECK: @test19a
+; CHECK-NEXT: and i32 %A, -4
+; CHECK-NEXT: icmp eq i32
+; CHECK-NEXT: ret i1
+        %B = ashr i32 %A, 2             ; <i32> [#uses=1]
+        ;; (X & -4) == -4
+        %C = icmp eq i32 %B, -1         ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i1 @test20(i8 %A) {
+; CHECK: @test20
+; CHECK: ret i1 false
+        %B = ashr i8 %A, 7              ; <i8> [#uses=1]
+        ;; false
+        %C = icmp eq i8 %B, 123         ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i1 @test21(i8 %A) {
+; CHECK: @test21
+; CHECK-NEXT: and i8 %A, 15
+; CHECK-NEXT: icmp eq i8
+; CHECK-NEXT: ret i1
+        %B = shl i8 %A, 4               ; <i8> [#uses=1]
+        %C = icmp eq i8 %B, -128                ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i1 @test22(i8 %A) {
+; CHECK: @test22
+; CHECK-NEXT: and i8 %A, 15
+; CHECK-NEXT: icmp eq i8
+; CHECK-NEXT: ret i1
+        %B = shl i8 %A, 4               ; <i8> [#uses=1]
+        %C = icmp eq i8 %B, 0           ; <i1> [#uses=1]
+        ret i1 %C
+}
+
+define i8 @test23(i32 %A) {
+; CHECK: @test23
+; CHECK-NEXT: trunc i32 %A to i8
+; CHECK-NEXT: ret i8
+
+        ;; casts not needed
+        %B = shl i32 %A, 24             ; <i32> [#uses=1]
+        %C = ashr i32 %B, 24            ; <i32> [#uses=1]
+        %D = trunc i32 %C to i8         ; <i8> [#uses=1]
+        ret i8 %D
+}
+
+define i8 @test24(i8 %X) {
+; CHECK: @test24
+; CHECK-NEXT: and i8 %X, 3
+; CHECK-NEXT: ret i8
+        %Y = and i8 %X, -5              ; <i8> [#uses=1]
+        %Z = shl i8 %Y, 5               ; <i8> [#uses=1]
+        %Q = ashr i8 %Z, 5              ; <i8> [#uses=1]
+        ret i8 %Q
+}
+
+define i32 @test25(i32 %tmp.2, i32 %AA) {
+; CHECK: @test25
+; CHECK-NEXT: and i32 %tmp.2, -131072
+; CHECK-NEXT: add i32 %{{[^,]*}}, %AA
+; CHECK-NEXT: and i32 %{{[^,]*}}, -131072
+; CHECK-NEXT: ret i32
+        %x = lshr i32 %AA, 17           ; <i32> [#uses=1]
+        %tmp.3 = lshr i32 %tmp.2, 17            ; <i32> [#uses=1]
+        %tmp.5 = add i32 %tmp.3, %x             ; <i32> [#uses=1]
+        %tmp.6 = shl i32 %tmp.5, 17             ; <i32> [#uses=1]
+        ret i32 %tmp.6
+}
+
+;; handle casts between shifts.
+define i32 @test26(i32 %A) {
+; CHECK: @test26
+; CHECK-NEXT: and i32 %A, -2
+; CHECK-NEXT: ret i32
+        %B = lshr i32 %A, 1             ; <i32> [#uses=1]
+        %C = bitcast i32 %B to i32              ; <i32> [#uses=1]
+        %D = shl i32 %C, 1              ; <i32> [#uses=1]
+        ret i32 %D
+}
+
+
+define i1 @test27(i32 %x) nounwind {
+; CHECK: @test27
+; CHECK-NEXT: and i32 %x, 8
+; CHECK-NEXT: icmp ne i32
+; CHECK-NEXT: ret i1
+  %y = lshr i32 %x, 3
+  %z = trunc i32 %y to i1
+  ret i1 %z
+}
+ 
+define i8 @test28(i8 %x) {
+entry:
+; CHECK: @test28
+; CHECK:     icmp slt i8 %x, 0
+; CHECK-NEXT:     br i1 
+	%tmp1 = lshr i8 %x, 7
+	%cond1 = icmp ne i8 %tmp1, 0
+	br i1 %cond1, label %bb1, label %bb2
+
+bb1:
+	ret i8 0
+
+bb2:
+	ret i8 1
+}
+
+define i8 @test28a(i8 %x, i8 %y) {
+entry:
+; This shouldn't be transformed.
+; CHECK: @test28a
+; CHECK:     %tmp1 = lshr i8 %x, 7
+; CHECK:     %cond1 = icmp eq i8 %tmp1, 0
+; CHECK:     br i1 %cond1, label %bb2, label %bb1
+	%tmp1 = lshr i8 %x, 7
+	%cond1 = icmp ne i8 %tmp1, 0
+	br i1 %cond1, label %bb1, label %bb2
+bb1:
+	ret i8 %tmp1
+bb2:
+        %tmp2 = add i8 %tmp1, %y
+	ret i8 %tmp2
+}
+
+
+define i32 @test29(i64 %d18) {
+entry:
+	%tmp916 = lshr i64 %d18, 32
+	%tmp917 = trunc i64 %tmp916 to i32
+	%tmp10 = lshr i32 %tmp917, 31
+	ret i32 %tmp10
+; CHECK: @test29
+; CHECK:  %tmp916 = lshr i64 %d18, 63
+; CHECK:  %tmp10 = trunc i64 %tmp916 to i32
+}
+
+
+define i32 @test30(i32 %A, i32 %B, i32 %C) {
+	%X = shl i32 %A, %C
+	%Y = shl i32 %B, %C
+	%Z = and i32 %X, %Y
+	ret i32 %Z
+; CHECK: @test30
+; CHECK: %X1 = and i32 %A, %B
+; CHECK: %Z = shl i32 %X1, %C
+}
+
+define i32 @test31(i32 %A, i32 %B, i32 %C) {
+	%X = lshr i32 %A, %C
+	%Y = lshr i32 %B, %C
+	%Z = or i32 %X, %Y
+	ret i32 %Z
+; CHECK: @test31
+; CHECK: %X1 = or i32 %A, %B
+; CHECK: %Z = lshr i32 %X1, %C
+}
+
+define i32 @test32(i32 %A, i32 %B, i32 %C) {
+	%X = ashr i32 %A, %C
+	%Y = ashr i32 %B, %C
+	%Z = xor i32 %X, %Y
+	ret i32 %Z
+; CHECK: @test32
+; CHECK: %X1 = xor i32 %A, %B
+; CHECK: %Z = ashr i32 %X1, %C
+; CHECK: ret i32 %Z
+}
+
+define i1 @test33(i32 %X) {
+        %tmp1 = shl i32 %X, 7
+        %tmp2 = icmp slt i32 %tmp1, 0
+        ret i1 %tmp2
+; CHECK: @test33
+; CHECK: %tmp1.mask = and i32 %X, 16777216
+; CHECK: %tmp2 = icmp ne i32 %tmp1.mask, 0
+}
+
+define i1 @test34(i32 %X) {
+        %tmp1 = lshr i32 %X, 7
+        %tmp2 = icmp slt i32 %tmp1, 0
+        ret i1 %tmp2
+; CHECK: @test34
+; CHECK: ret i1 false
+}
+
+define i1 @test35(i32 %X) {
+        %tmp1 = ashr i32 %X, 7
+        %tmp2 = icmp slt i32 %tmp1, 0
+        ret i1 %tmp2
+; CHECK: @test35
+; CHECK: %tmp2 = icmp slt i32 %X, 0
+; CHECK: ret i1 %tmp2
+}
+
+define i128 @test36(i128 %A, i128 %B) {
+entry:
+  %tmp27 = shl i128 %A, 64
+  %tmp23 = shl i128 %B, 64
+  %ins = or i128 %tmp23, %tmp27
+  %tmp45 = lshr i128 %ins, 64
+  ret i128 %tmp45
+  
+; CHECK: @test36
+; CHECK:  %tmp231 = or i128 %B, %A
+; CHECK:  %ins = and i128 %tmp231, 18446744073709551615
+; CHECK:  ret i128 %ins
+}
+
+define i64 @test37(i128 %A, i32 %B) {
+entry:
+  %tmp27 = shl i128 %A, 64
+  %tmp22 = zext i32 %B to i128
+  %tmp23 = shl i128 %tmp22, 96
+  %ins = or i128 %tmp23, %tmp27
+  %tmp45 = lshr i128 %ins, 64
+  %tmp46 = trunc i128 %tmp45 to i64
+  ret i64 %tmp46
+  
+; CHECK: @test37
+; CHECK:  %tmp23 = shl nuw nsw i128 %tmp22, 32
+; CHECK:  %ins = or i128 %tmp23, %A
+; CHECK:  %tmp46 = trunc i128 %ins to i64
+}
+
+define i32 @test38(i32 %x) nounwind readnone {
+  %rem = srem i32 %x, 32
+  %shl = shl i32 1, %rem
+  ret i32 %shl
+; CHECK: @test38
+; CHECK-NEXT: and i32 %x, 31
+; CHECK-NEXT: shl i32 1
+; CHECK-NEXT: ret i32
+}
+
+; <rdar://problem/8756731>
+; CHECK: @test39
+define i8 @test39(i32 %a0) {
+entry:
+  %tmp4 = trunc i32 %a0 to i8
+; CHECK: and i8 %tmp49, 64
+  %tmp5 = shl i8 %tmp4, 5
+  %tmp48 = and i8 %tmp5, 32
+  %tmp49 = lshr i8 %tmp48, 5
+  %tmp50 = mul i8 %tmp49, 64
+  %tmp51 = xor i8 %tmp50, %tmp5
+; CHECK: and i8 %0, 16
+  %tmp52 = and i8 %tmp51, -128
+  %tmp53 = lshr i8 %tmp52, 7
+  %tmp54 = mul i8 %tmp53, 16
+  %tmp55 = xor i8 %tmp54, %tmp51
+; CHECK: ret i8 %tmp551
+  ret i8 %tmp55
+}
diff --git a/final/test/Transforms/InstCombine/shufflemask-undef.ll b/final/test/Transforms/InstCombine/shufflemask-undef.ll
new file mode 100644
index 00000000000..cf87aef7df4
--- /dev/null
+++ b/final/test/Transforms/InstCombine/shufflemask-undef.ll
@@ -0,0 +1,109 @@
+; RUN: opt < %s -instcombine -S | not grep {shufflevector.\*i32 8}
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+	%struct.ActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
+	%struct.AlphaTest = type { float, i16, i8, i8 }
+	%struct.ArrayRange = type { i8, i8, i8, i8 }
+	%struct.BlendMode = type { i16, i16, i16, i16, %struct.IColor4, i16, i16, i8, i8, i8, i8 }
+	%struct.ClearColor = type { double, %struct.IColor4, %struct.IColor4, float, i32 }
+	%struct.ClipPlane = type { i32, [6 x %struct.IColor4] }
+	%struct.ColorBuffer = type { i16, i8, i8, [8 x i16], [0 x i32] }
+	%struct.ColorMatrix = type { [16 x float]*, %struct.ImagingColorScale }
+	%struct.Convolution = type { %struct.IColor4, %struct.ImagingColorScale, i16, i16, [0 x i32], float*, i32, i32 }
+	%struct.DepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
+	%struct.FixedFunction = type { %struct.PPStreamToken* }
+	%struct.FogMode = type { %struct.IColor4, float, float, float, float, float, i16, i16, i16, i8, i8 }
+	%struct.HintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
+	%struct.Histogram = type { %struct.ProgramLimits*, i32, i16, i8, i8 }
+	%struct.ImagingColorScale = type { %struct.TCoord2, %struct.TCoord2, %struct.TCoord2, %struct.TCoord2 }
+	%struct.ImagingSubset = type { %struct.Convolution, %struct.Convolution, %struct.Convolution, %struct.ColorMatrix, %struct.Minmax, %struct.Histogram, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, i32, [0 x i32] }
+	%struct.Light = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.PointLineLimits, float, float, float, float, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, float, float, float, float }
+	%struct.LightModel = type { %struct.IColor4, [8 x %struct.Light], [2 x %struct.Material], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 }
+	%struct.LightProduct = type { %struct.IColor4, %struct.IColor4, %struct.IColor4 }
+	%struct.LineMode = type { float, i32, i16, i16, i8, i8, i8, i8 }
+	%struct.LogicOp = type { i16, i8, i8 }
+	%struct.MaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.Material = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, float, float, float, float, [8 x %struct.LightProduct], %struct.IColor4, [8 x i32] }
+	%struct.Minmax = type { %struct.MinmaxTable*, i16, i8, i8, [0 x i32] }
+	%struct.MinmaxTable = type { %struct.IColor4, %struct.IColor4 }
+	%struct.Mipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, float*, i8*, i16, i16, i16, i16, [2 x float] }
+	%struct.Multisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.PipelineProgramState = type { i8, i8, i8, i8, [0 x i32], %struct.IColor4* }
+	%struct.PixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.PixelMode = type { float, float, %struct.PixelStore, %struct.PixelTransfer, %struct.PixelMap, %struct.ImagingSubset, i32, i32 }
+	%struct.PixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
+	%struct.PixelStore = type { %struct.PixelPack, %struct.PixelPack }
+	%struct.PixelTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }
+	%struct.PluginBufferData = type { i32 }
+	%struct.PointLineLimits = type { float, float, float }
+	%struct.PointMode = type { float, float, float, float, %struct.PointLineLimits, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 }
+	%struct.PolygonMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.ProgramLimits = type { i32, i32, i32, i32 }
+	%struct.RegisterCombiners = type { i8, i8, i8, i8, i32, [2 x %struct.IColor4], [8 x %struct.RegisterCombinersPerStageState], %struct.RegisterCombinersFinalStageState }
+	%struct.RegisterCombinersFinalStageState = type { i8, i8, i8, i8, [7 x %struct.RegisterCombinersPerVariableState] }
+	%struct.RegisterCombinersPerPortionState = type { [4 x %struct.RegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
+	%struct.RegisterCombinersPerStageState = type { [2 x %struct.RegisterCombinersPerPortionState], [2 x %struct.IColor4] }
+	%struct.RegisterCombinersPerVariableState = type { i16, i16, i16, i16 }
+	%struct.SWRSurfaceRec = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 }
+	%struct.ScissorTest = type { %struct.ProgramLimits, i8, i8, i8, i8 }
+	%struct.State = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.IColor4], [128 x %struct.IColor4], %struct.Viewport, %struct.Transform, %struct.LightModel, %struct.ActiveTextureTargets, %struct.AlphaTest, %struct.BlendMode, %struct.ClearColor, %struct.ColorBuffer, %struct.DepthTest, %struct.ArrayRange, %struct.FogMode, %struct.HintMode, %struct.LineMode, %struct.LogicOp, %struct.MaskMode, %struct.PixelMode, %struct.PointMode, %struct.PolygonMode, %struct.ScissorTest, i32, %struct.StencilTest, [8 x %struct.TextureMode], [16 x %struct.TextureImageMode], %struct.ArrayRange, [8 x %struct.TextureCoordGen], %struct.ClipPlane, %struct.Multisample, %struct.RegisterCombiners, %struct.ArrayRange, %struct.ArrayRange, [3 x %struct.PipelineProgramState], %struct.ArrayRange, %struct.TransformFeedback, i32*, %struct.FixedFunction, [3 x i32], [3 x i32] }>
+	%struct.StencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
+	%struct.TextureCoordGen = type { { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, i8, i8, i8, i8 }
+	%struct.TextureGeomState = type { i16, i16, i16, i16, i16, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] }
+	%struct.TextureImageMode = type { float }
+	%struct.TextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* }
+	%struct.TextureMode = type { %struct.IColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
+	%struct.TextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.IColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* }
+	%struct.TextureRec = type { [4 x float], %struct.TextureState*, %struct.Mipmaplevel*, %struct.Mipmaplevel*, float, float, float, float, i8, i8, i8, i8, i16, i16, i16, i16, i32, float, [2 x %struct.PPStreamToken] }
+	%struct.TextureState = type { i16, i8, i8, i16, i16, float, i32, %struct.SWRSurfaceRec*, %struct.TextureParamState, %struct.TextureGeomState, [0 x i32], i8*, i32, %struct.TextureLevel, [1 x [15 x %struct.TextureLevel]] }
+	%struct.Transform = type <{ [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }>
+	%struct.TransformFeedback = type { i8, i8, i8, i8, [0 x i32], [16 x i32], [16 x i32] }
+	%struct.Viewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
+	%struct.IColor4 = type { float, float, float, float }
+	%struct.TCoord2 = type { float, float }
+	%struct.VMGPStack = type { [6 x <4 x float>*], <4 x float>*, i32, i32, <4 x float>*, <4 x float>**, i32, i32, i32, i32, i32, i32 }
+	%struct.VMTextures = type { [16 x %struct.TextureRec*] }
+	%struct.PPStreamToken = type { { i16, i16, i32 } }
+	%struct._VMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8], { void (i8*, i8*, i32, i8*)*, float (float)*, float (float)*, float (float)*, i32 (float)* } }
+
+define i32 @foo(%struct.State* %dst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._VMConstants* %cnstn, %struct.PPStreamToken* %pstrm, %struct.PluginBufferData* %gpctx, %struct.VMTextures* %txtrs, %struct.VMGPStack* %gpstk) nounwind {
+bb266.i:
+	getelementptr <4 x float>* null, i32 11		; <<4 x float>*>:0 [#uses=1]
+	load <4 x float>* %0, align 16		; <<4 x float>>:1 [#uses=1]
+	shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 1, i32 1 >		; <<4 x float>>:2 [#uses=1]
+	shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:3 [#uses=1]
+	shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:4 [#uses=1]
+	shufflevector <4 x float> %4, <4 x float> %3, <4 x i32> < i32 6, i32 7, i32 2, i32 3 >		; <<4 x float>>:5 [#uses=1]
+	fmul <4 x float> %5, zeroinitializer		; <<4 x float>>:6 [#uses=2]
+	fmul <4 x float> %6, %6		; <<4 x float>>:7 [#uses=1]
+	fadd <4 x float> zeroinitializer, %7		; <<4 x float>>:8 [#uses=1]
+	call <4 x float> @llvm.x86.sse.max.ps( <4 x float> zeroinitializer, <4 x float> %8 ) nounwind readnone		; <<4 x float>>:9 [#uses=1]
+	%phitmp40 = bitcast <4 x float> %9 to <4 x i32>		; <<4 x i32>> [#uses=1]
+	%tmp4109.i = and <4 x i32> %phitmp40, < i32 8388607, i32 8388607, i32 8388607, i32 8388607 >		; <<4 x i32>> [#uses=1]
+	%tmp4116.i = or <4 x i32> %tmp4109.i, < i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216 >		; <<4 x i32>> [#uses=1]
+	%tmp4117.i = bitcast <4 x i32> %tmp4116.i to <4 x float>		; <<4 x float>> [#uses=1]
+	fadd <4 x float> %tmp4117.i, zeroinitializer		; <<4 x float>>:10 [#uses=1]
+	fmul <4 x float> %10, < float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01 >		; <<4 x float>>:11 [#uses=1]
+	call <4 x float> @llvm.x86.sse.max.ps( <4 x float> %11, <4 x float> zeroinitializer ) nounwind readnone		; <<4 x float>>:12 [#uses=1]
+	call <4 x float> @llvm.x86.sse.min.ps( <4 x float> %12, <4 x float> zeroinitializer ) nounwind readnone		; <<4 x float>>:13 [#uses=1]
+	%tmp4170.i = call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %13, <4 x float> zeroinitializer, i8 2 ) nounwind		; <<4 x float>> [#uses=1]
+	bitcast <4 x float> %tmp4170.i to <16 x i8>		; <<16 x i8>>:14 [#uses=1]
+	call i32 @llvm.x86.sse2.pmovmskb.128( <16 x i8> %14 ) nounwind readnone		; <i32>:15 [#uses=1]
+	icmp eq i32 %15, 0		; <i1>:16 [#uses=1]
+	br i1 %16, label %bb5574.i, label %bb4521.i
+
+bb4521.i:		; preds = %bb266.i
+	unreachable
+
+bb5574.i:		; preds = %bb266.i
+	unreachable
+}
+
+declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
+
+declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
+
+declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
diff --git a/final/test/Transforms/InstCombine/shufflevec-constant.ll b/final/test/Transforms/InstCombine/shufflevec-constant.ll
new file mode 100644
index 00000000000..29ae5a79824
--- /dev/null
+++ b/final/test/Transforms/InstCombine/shufflevec-constant.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | grep {ret <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0x7FF0000000000000, float 0x7FF0000000000000>}
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+
+define <4 x float> @__inff4() nounwind readnone {
+entry:
+	%tmp14 = extractelement <1 x double> bitcast (<2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000> to <1 x double>), i32 0		; <double> [#uses=1]
+	%tmp4 = bitcast double %tmp14 to i64		; <i64> [#uses=1]
+	%tmp3 = bitcast i64 %tmp4 to <2 x float>		; <<2 x float>> [#uses=1]
+	%tmp8 = shufflevector <2 x float> %tmp3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>		; <<4 x float>> [#uses=1]
+	%tmp9 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp8, <4 x i32> <i32 0, i32 1, i32 4, i32 5>		; <<4 x float>> [#uses=0]
+	ret <4 x float> %tmp9
+}
diff --git a/final/test/Transforms/InstCombine/signed-comparison.ll b/final/test/Transforms/InstCombine/signed-comparison.ll
new file mode 100644
index 00000000000..9a08c6446c5
--- /dev/null
+++ b/final/test/Transforms/InstCombine/signed-comparison.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -instcombine -S > %t
+; RUN: not grep zext %t
+; RUN: not grep slt %t
+; RUN: grep {icmp ult} %t
+
+; Instcombine should convert the zext+slt into a simple ult.
+
+define void @foo(double* %p) nounwind {
+entry:
+	br label %bb
+
+bb:
+	%indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb ]
+	%t0 = and i64 %indvar, 65535
+	%t1 = getelementptr double* %p, i64 %t0
+	%t2 = load double* %t1, align 8
+	%t3 = fmul double %t2, 2.2
+	store double %t3, double* %t1, align 8
+	%i.04 = trunc i64 %indvar to i16
+	%t4 = add i16 %i.04, 1
+	%t5 = zext i16 %t4 to i32
+	%t6 = icmp slt i32 %t5, 500
+	%indvar.next = add i64 %indvar, 1
+	br i1 %t6, label %bb, label %return
+
+return:
+	ret void
+}
diff --git a/final/test/Transforms/InstCombine/signext.ll b/final/test/Transforms/InstCombine/signext.ll
new file mode 100644
index 00000000000..ecee9830cd5
--- /dev/null
+++ b/final/test/Transforms/InstCombine/signext.ll
@@ -0,0 +1,87 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128:n8:16:32:64"
+
+define i32 @test1(i32 %x) {
+        %tmp.1 = and i32 %x, 65535              ; <i32> [#uses=1]
+        %tmp.2 = xor i32 %tmp.1, -32768         ; <i32> [#uses=1]
+        %tmp.3 = add i32 %tmp.2, 32768          ; <i32> [#uses=1]
+        ret i32 %tmp.3
+; CHECK: @test1
+; CHECK: %sext = shl i32 %x, 16
+; CHECK: %tmp.3 = ashr exact i32 %sext, 16
+; CHECK: ret i32 %tmp.3
+}
+
+define i32 @test2(i32 %x) {
+        %tmp.1 = and i32 %x, 65535              ; <i32> [#uses=1]
+        %tmp.2 = xor i32 %tmp.1, 32768          ; <i32> [#uses=1]
+        %tmp.3 = add i32 %tmp.2, -32768         ; <i32> [#uses=1]
+        ret i32 %tmp.3
+; CHECK: @test2
+; CHECK: %sext = shl i32 %x, 16
+; CHECK: %tmp.3 = ashr exact i32 %sext, 16
+; CHECK: ret i32 %tmp.3
+}
+
+define i32 @test3(i16 %P) {
+        %tmp.1 = zext i16 %P to i32             ; <i32> [#uses=1]
+        %tmp.4 = xor i32 %tmp.1, 32768          ; <i32> [#uses=1]
+        %tmp.5 = add i32 %tmp.4, -32768         ; <i32> [#uses=1]
+        ret i32 %tmp.5
+; CHECK: @test3
+; CHECK: %tmp.5 = sext i16 %P to i32
+; CHECK: ret i32 %tmp.5
+}
+
+define i32 @test4(i16 %P) {
+        %tmp.1 = zext i16 %P to i32             ; <i32> [#uses=1]
+        %tmp.4 = xor i32 %tmp.1, 32768          ; <i32> [#uses=1]
+        %tmp.5 = add i32 %tmp.4, -32768         ; <i32> [#uses=1]
+        ret i32 %tmp.5
+; CHECK: @test4
+; CHECK: %tmp.5 = sext i16 %P to i32
+; CHECK: ret i32 %tmp.5
+}
+
+define i32 @test5(i32 %x) {
+        %tmp.1 = and i32 %x, 255                ; <i32> [#uses=1]
+        %tmp.2 = xor i32 %tmp.1, 128            ; <i32> [#uses=1]
+        %tmp.3 = add i32 %tmp.2, -128           ; <i32> [#uses=1]
+        ret i32 %tmp.3
+; CHECK: @test5
+; CHECK: %sext = shl i32 %x, 24
+; CHECK: %tmp.3 = ashr exact i32 %sext, 24
+; CHECK: ret i32 %tmp.3
+}
+
+define i32 @test6(i32 %x) {
+        %tmp.2 = shl i32 %x, 16         ; <i32> [#uses=1]
+        %tmp.4 = ashr i32 %tmp.2, 16            ; <i32> [#uses=1]
+        ret i32 %tmp.4
+; CHECK: @test6
+; CHECK: %tmp.2 = shl i32 %x, 16
+; CHECK: %tmp.4 = ashr exact i32 %tmp.2, 16
+; CHECK: ret i32 %tmp.4
+}
+
+define i32 @test7(i16 %P) {
+  %tmp.1 = zext i16 %P to i32                     ; <i32> [#uses=1]
+  %sext1 = shl i32 %tmp.1, 16                     ; <i32> [#uses=1]
+  %tmp.5 = ashr i32 %sext1, 16                    ; <i32> [#uses=1]
+  ret i32 %tmp.5
+; CHECK: @test7
+; CHECK: %tmp.5 = sext i16 %P to i32
+; CHECK: ret i32 %tmp.5
+}
+
+define i32 @test8(i32 %x) nounwind readnone {
+entry:
+  %shr = lshr i32 %x, 5                           ; <i32> [#uses=1]
+  %xor = xor i32 %shr, 67108864                   ; <i32> [#uses=1]
+  %sub = add i32 %xor, -67108864                  ; <i32> [#uses=1]
+  ret i32 %sub
+; CHECK: @test8
+; CHECK: %shr = ashr i32 %x, 5
+; CHECK: ret i32 %shr
+}
diff --git a/final/test/Transforms/InstCombine/simplify-demanded-bits-pointer.ll b/final/test/Transforms/InstCombine/simplify-demanded-bits-pointer.ll
new file mode 100644
index 00000000000..6d2193fe448
--- /dev/null
+++ b/final/test/Transforms/InstCombine/simplify-demanded-bits-pointer.ll
@@ -0,0 +1,84 @@
+; RUN: opt < %s -instcombine -disable-output
+
+; SimplifyDemandedBits should cope with pointer types.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.VEC_rtx_base = type { i32, i32, [1 x %struct.rtx_def*] }
+	%struct.VEC_rtx_gc = type { %struct.VEC_rtx_base }
+	%struct.block_symbol = type { [3 x %struct.rtunion], %struct.object_block*, i64 }
+	%struct.object_block = type { %struct.section*, i32, i64, %struct.VEC_rtx_gc*, %struct.VEC_rtx_gc* }
+	%struct.omp_clause_subcode = type { i32 }
+	%struct.rtunion = type { i8* }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.section = type { %struct.unnamed_section }
+	%struct.u = type { %struct.block_symbol }
+	%struct.unnamed_section = type { %struct.omp_clause_subcode, void (i8*)*, i8*, %struct.section* }
+
+define fastcc void @cse_insn(%struct.rtx_def* %insn, %struct.rtx_def* %libcall_insn) nounwind {
+entry:
+	br i1 undef, label %bb43, label %bb88
+
+bb43:		; preds = %entry
+	br label %bb88
+
+bb88:		; preds = %bb43, %entry
+	br i1 undef, label %bb95, label %bb107
+
+bb95:		; preds = %bb88
+	unreachable
+
+bb107:		; preds = %bb88
+	%0 = load i16* undef, align 8		; <i16> [#uses=1]
+	%1 = icmp eq i16 %0, 38		; <i1> [#uses=1]
+	%src_eqv_here.0 = select i1 %1, %struct.rtx_def* null, %struct.rtx_def* null		; <%struct.rtx_def*> [#uses=1]
+	br i1 undef, label %bb127, label %bb125
+
+bb125:		; preds = %bb107
+	br i1 undef, label %bb127, label %bb126
+
+bb126:		; preds = %bb125
+	br i1 undef, label %bb129, label %bb133
+
+bb127:		; preds = %bb125, %bb107
+	unreachable
+
+bb129:		; preds = %bb126
+	br label %bb133
+
+bb133:		; preds = %bb129, %bb126
+	br i1 undef, label %bb134, label %bb146
+
+bb134:		; preds = %bb133
+	unreachable
+
+bb146:		; preds = %bb133
+	br i1 undef, label %bb180, label %bb186
+
+bb180:		; preds = %bb146
+	%2 = icmp eq %struct.rtx_def* null, null		; <i1> [#uses=1]
+	%3 = zext i1 %2 to i8		; <i8> [#uses=1]
+	%4 = icmp ne %struct.rtx_def* %src_eqv_here.0, null		; <i1> [#uses=1]
+	%5 = zext i1 %4 to i8		; <i8> [#uses=1]
+	%toBool181 = icmp ne i8 %3, 0		; <i1> [#uses=1]
+	%toBool182 = icmp ne i8 %5, 0		; <i1> [#uses=1]
+	%6 = and i1 %toBool181, %toBool182		; <i1> [#uses=1]
+	%7 = zext i1 %6 to i8		; <i8> [#uses=1]
+	%toBool183 = icmp ne i8 %7, 0		; <i1> [#uses=1]
+	br i1 %toBool183, label %bb184, label %bb186
+
+bb184:		; preds = %bb180
+	br i1 undef, label %bb185, label %bb186
+
+bb185:		; preds = %bb184
+	br label %bb186
+
+bb186:		; preds = %bb185, %bb184, %bb180, %bb146
+	br i1 undef, label %bb190, label %bb195
+
+bb190:		; preds = %bb186
+	unreachable
+
+bb195:		; preds = %bb186
+	unreachable
+}
diff --git a/final/test/Transforms/InstCombine/sink_instruction.ll b/final/test/Transforms/InstCombine/sink_instruction.ll
new file mode 100644
index 00000000000..e521de208f2
--- /dev/null
+++ b/final/test/Transforms/InstCombine/sink_instruction.ll
@@ -0,0 +1,56 @@
+; RUN: opt -instcombine %s -S | FileCheck %s
+
+;; This tests that the instructions in the entry blocks are sunk into each
+;; arm of the 'if'.
+
+define i32 @test1(i1 %C, i32 %A, i32 %B) {
+; CHECK: @test1
+entry:
+        %tmp.2 = sdiv i32 %A, %B                ; <i32> [#uses=1]
+        %tmp.9 = add i32 %B, %A         ; <i32> [#uses=1]
+        br i1 %C, label %then, label %endif
+
+then:           ; preds = %entry
+        ret i32 %tmp.9
+
+endif:          ; preds = %entry
+; CHECK: sdiv i32
+; CHECK-NEXT: ret i32
+        ret i32 %tmp.2
+}
+
+
+;; PHI use, sink divide before call.
+define i32 @test2(i32 %x) nounwind ssp {
+; CHECK: @test2
+; CHECK-NOT: sdiv i32
+entry:
+  br label %bb
+
+bb:                                               ; preds = %bb2, %entry
+  %x_addr.17 = phi i32 [ %x, %entry ], [ %x_addr.0, %bb2 ] ; <i32> [#uses=4]
+  %i.06 = phi i32 [ 0, %entry ], [ %4, %bb2 ]     ; <i32> [#uses=1]
+  %0 = add nsw i32 %x_addr.17, 1                  ; <i32> [#uses=1]
+  %1 = sdiv i32 %0, %x_addr.17                    ; <i32> [#uses=1]
+  %2 = icmp eq i32 %x_addr.17, 0                  ; <i1> [#uses=1]
+  br i1 %2, label %bb1, label %bb2
+
+bb1:                                              ; preds = %bb
+; CHECK: bb1:
+; CHECK-NEXT: add nsw i32 %x_addr.17, 1
+; CHECK-NEXT: sdiv i32
+; CHECK-NEXT: tail call i32 @bar()
+  %3 = tail call i32 @bar() nounwind       ; <i32> [#uses=0]
+  br label %bb2
+
+bb2:                                              ; preds = %bb, %bb1
+  %x_addr.0 = phi i32 [ %1, %bb1 ], [ %x_addr.17, %bb ] ; <i32> [#uses=2]
+  %4 = add nsw i32 %i.06, 1                       ; <i32> [#uses=2]
+  %exitcond = icmp eq i32 %4, 1000000             ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb4, label %bb
+
+bb4:                                              ; preds = %bb2
+  ret i32 %x_addr.0
+}
+
+declare i32 @bar()
diff --git a/final/test/Transforms/InstCombine/sitofp.ll b/final/test/Transforms/InstCombine/sitofp.ll
new file mode 100644
index 00000000000..bd31b89f787
--- /dev/null
+++ b/final/test/Transforms/InstCombine/sitofp.ll
@@ -0,0 +1,55 @@
+; RUN: opt < %s -instcombine -S | not grep itofp
+
+define i1 @test1(i8 %A) {
+  %B = sitofp i8 %A to double
+  %C = fcmp ult double %B, 128.0
+  ret i1 %C  ;  True!
+}
+define i1 @test2(i8 %A) {
+  %B = sitofp i8 %A to double
+  %C = fcmp ugt double %B, -128.1
+  ret i1 %C  ;  True!
+}
+
+define i1 @test3(i8 %A) {
+  %B = sitofp i8 %A to double
+  %C = fcmp ule double %B, 127.0
+  ret i1 %C  ;  true!
+}
+
+define i1 @test4(i8 %A) {
+  %B = sitofp i8 %A to double
+  %C = fcmp ult double %B, 127.0
+  ret i1 %C  ;  A != 127
+}
+
+define i32 @test5(i32 %A) {
+  %B = sitofp i32 %A to double
+  %C = fptosi double %B to i32
+  %D = uitofp i32 %C to double
+  %E = fptoui double %D to i32
+  ret i32 %E
+}
+
+define i32 @test6(i32 %A) {
+	%B = and i32 %A, 7		; <i32> [#uses=1]
+	%C = and i32 %A, 32		; <i32> [#uses=1]
+	%D = sitofp i32 %B to double		; <double> [#uses=1]
+	%E = sitofp i32 %C to double		; <double> [#uses=1]
+	%F = fadd double %D, %E		; <double> [#uses=1]
+	%G = fptosi double %F to i32		; <i32> [#uses=1]
+	ret i32 %G
+}
+
+define i32 @test7(i32 %a) nounwind {
+	%b = sitofp i32 %a to double		; <double> [#uses=1]
+	%c = fptoui double %b to i32		; <i32> [#uses=1]
+	ret i32 %c
+}
+
+define i32 @test8(i32 %a) nounwind {
+	%b = uitofp i32 %a to double		; <double> [#uses=1]
+	%c = fptosi double %b to i32		; <i32> [#uses=1]
+	ret i32 %c
+}
+
diff --git a/final/test/Transforms/InstCombine/sqrt.ll b/final/test/Transforms/InstCombine/sqrt.ll
new file mode 100644
index 00000000000..69e511bfb3b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/sqrt.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -instcombine %s | FileCheck %s
+
+define float @test1(float %x) nounwind readnone ssp {
+entry:
+; CHECK: @test1
+; CHECK-NOT: fpext
+; CHECK-NOT: sqrt(
+; CHECK: sqrtf(
+; CHECK-NOT: fptrunc
+  %conv = fpext float %x to double                ; <double> [#uses=1]
+  %call = tail call double @sqrt(double %conv) readnone nounwind ; <double> [#uses=1]
+  %conv1 = fptrunc double %call to float          ; <float> [#uses=1]
+; CHECK: ret float
+  ret float %conv1
+}
+
+declare double @sqrt(double)
+
+; PR8096
+define float @test2(float %x) nounwind readnone ssp {
+entry:
+; CHECK: @test2
+; CHECK-NOT: fpext
+; CHECK-NOT: sqrt(
+; CHECK: sqrtf(
+; CHECK-NOT: fptrunc
+  %conv = fpext float %x to double                ; <double> [#uses=1]
+  %call = tail call double @sqrt(double %conv) nounwind ; <double> [#uses=1]
+  %conv1 = fptrunc double %call to float          ; <float> [#uses=1]
+; CHECK: ret float
+  ret float %conv1
+}
diff --git a/final/test/Transforms/InstCombine/srem-simplify-bug.ll b/final/test/Transforms/InstCombine/srem-simplify-bug.ll
new file mode 100644
index 00000000000..af824a445df
--- /dev/null
+++ b/final/test/Transforms/InstCombine/srem-simplify-bug.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep {ret i1 false}
+; PR2276
+
+define i1 @f(i32 %x) {
+  %A = or i32 %x, 1
+  %B = srem i32 %A, 1
+  %C = icmp ne i32 %B, 0
+  ret i1 %C
+}
diff --git a/final/test/Transforms/InstCombine/srem.ll b/final/test/Transforms/InstCombine/srem.ll
new file mode 100644
index 00000000000..beefe4fb8d3
--- /dev/null
+++ b/final/test/Transforms/InstCombine/srem.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | grep srem
+
+define i64 @foo(i64 %x1, i64 %y2) {
+	%r = sdiv i64 %x1, %y2
+	%r7 = mul i64 %r, %y2
+	%r8 = sub i64 %x1, %r7
+	ret i64 %r8
+}
diff --git a/final/test/Transforms/InstCombine/srem1.ll b/final/test/Transforms/InstCombine/srem1.ll
new file mode 100644
index 00000000000..f18690c3fff
--- /dev/null
+++ b/final/test/Transforms/InstCombine/srem1.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -instcombine
+; PR2670
+
+@g_127 = external global i32		; <i32*> [#uses=1]
+
+define i32 @func_56(i32 %p_58, i32 %p_59, i32 %p_61, i16 signext %p_62) nounwind {
+entry:
+	%call = call i32 (...)* @rshift_s_s( i32 %p_61, i32 1 )		; <i32> [#uses=1]
+	%conv = sext i32 %call to i64		; <i64> [#uses=1]
+	%or = or i64 -1734012817166602727, %conv		; <i64> [#uses=1]
+	%rem = srem i64 %or, 1		; <i64> [#uses=1]
+	%cmp = icmp eq i64 %rem, 1		; <i1> [#uses=1]
+	%cmp.ext = zext i1 %cmp to i32		; <i32> [#uses=1]
+	store i32 %cmp.ext, i32* @g_127
+	ret i32 undef
+}
+
+declare i32 @rshift_s_s(...)
diff --git a/final/test/Transforms/InstCombine/stack-overalign.ll b/final/test/Transforms/InstCombine/stack-overalign.ll
new file mode 100644
index 00000000000..88b4114d588
--- /dev/null
+++ b/final/test/Transforms/InstCombine/stack-overalign.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -instcombine -S | grep {align 32} | count 1
+
+; It's tempting to have an instcombine in which the src pointer of a
+; memcpy is aligned up to the alignment of the destination, however
+; there are pitfalls. If the src is an alloca, aligning it beyond what
+; the target's stack pointer is aligned at will require dynamic
+; stack realignment, which can require functions that don't otherwise
+; need a frame pointer to need one.
+;
+; Abstaining from this transform is not the only way to approach this
+; issue. Some late phase could be smart enough to reduce alloca
+; alignments when they are greater than they need to be. Or, codegen
+; could do dynamic alignment for just the one alloca, and leave the
+; main stack pointer at its standard alignment.
+
+@dst = global [1024 x i8] zeroinitializer, align 32
+
+define void @foo() nounwind {
+entry:
+	%src = alloca [1024 x i8], align 1
+	%src1 = getelementptr [1024 x i8]* %src, i32 0, i32 0
+	call void @llvm.memcpy.i32(i8* getelementptr ([1024 x i8]* @dst, i32 0, i32 0), i8* %src1, i32 1024, i32 1)
+	call void @frob(i8* %src1) nounwind
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+
+declare void @frob(i8*)
diff --git a/final/test/Transforms/InstCombine/stacksaverestore.ll b/final/test/Transforms/InstCombine/stacksaverestore.ll
new file mode 100644
index 00000000000..0fcaefac622
--- /dev/null
+++ b/final/test/Transforms/InstCombine/stacksaverestore.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -instcombine -S | grep {call.*stackrestore} | count 1
+
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+
+;; Test that llvm.stackrestore is removed when possible.
+define i32* @test1(i32 %P) {
+	%tmp = call i8* @llvm.stacksave( )
+	call void @llvm.stackrestore( i8* %tmp ) ;; not restoring anything
+	%A = alloca i32, i32 %P		
+	ret i32* %A
+}
+
+define void @test2(i8* %X) {
+	call void @llvm.stackrestore( i8* %X )  ;; no allocas before return.
+	ret void
+}
+
+define void @foo(i32 %size) nounwind  {
+entry:
+	%tmp118124 = icmp sgt i32 %size, 0		; <i1> [#uses=1]
+	br i1 %tmp118124, label %bb.preheader, label %return
+
+bb.preheader:		; preds = %entry
+	%tmp25 = add i32 %size, -1		; <i32> [#uses=1]
+	%tmp125 = icmp slt i32 %size, 1		; <i1> [#uses=1]
+	%smax = select i1 %tmp125, i32 1, i32 %size		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.preheader
+	%i.0.reg2mem.0 = phi i32 [ 0, %bb.preheader ], [ %indvar.next, %bb ]		; <i32> [#uses=2]
+	%tmp = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
+	%tmp23 = alloca i8, i32 %size		; <i8*> [#uses=2]
+	%tmp27 = getelementptr i8* %tmp23, i32 %tmp25		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp27, align 1
+	%tmp28 = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
+	%tmp52 = alloca i8, i32 %size		; <i8*> [#uses=1]
+	%tmp53 = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
+	%tmp77 = alloca i8, i32 %size		; <i8*> [#uses=1]
+	%tmp78 = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
+	%tmp102 = alloca i8, i32 %size		; <i8*> [#uses=1]
+	call void @bar( i32 %i.0.reg2mem.0, i8* %tmp23, i8* %tmp52, i8* %tmp77, i8* %tmp102, i32 %size ) nounwind 
+	call void @llvm.stackrestore( i8* %tmp78 )
+	call void @llvm.stackrestore( i8* %tmp53 )
+	call void @llvm.stackrestore( i8* %tmp28 )
+	call void @llvm.stackrestore( i8* %tmp )
+	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %smax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}
+
+declare void @bar(i32, i8*, i8*, i8*, i8*, i32)
+
diff --git a/final/test/Transforms/InstCombine/store.ll b/final/test/Transforms/InstCombine/store.ll
new file mode 100644
index 00000000000..64460d7a6d6
--- /dev/null
+++ b/final/test/Transforms/InstCombine/store.ll
@@ -0,0 +1,85 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @test1(i32* %P) {
+        store i32 undef, i32* %P
+        store i32 123, i32* undef
+        store i32 124, i32* null
+        ret void
+; CHECK: @test1(
+; CHECK-NEXT: store i32 123, i32* undef
+; CHECK-NEXT: store i32 undef, i32* null
+; CHECK-NEXT: ret void
+}
+
+define void @test2(i32* %P) {
+        %X = load i32* %P               ; <i32> [#uses=1]
+        %Y = add i32 %X, 0              ; <i32> [#uses=1]
+        store i32 %Y, i32* %P
+        ret void
+; CHECK: @test2
+; CHECK-NEXT: ret void
+}
+
+;; Simple sinking tests
+
+; "if then else"
+define i32 @test3(i1 %C) {
+	%A = alloca i32
+        br i1 %C, label %Cond, label %Cond2
+
+Cond:
+        store i32 -987654321, i32* %A
+        br label %Cont
+
+Cond2:
+	store i32 47, i32* %A
+	br label %Cont
+
+Cont:
+	%V = load i32* %A
+	ret i32 %V
+; CHECK: @test3
+; CHECK-NOT: alloca
+; CHECK: Cont:
+; CHECK-NEXT:  %storemerge = phi i32 [ 47, %Cond2 ], [ -987654321, %Cond ]
+; CHECK-NEXT:  ret i32 %storemerge
+}
+
+; "if then"
+define i32 @test4(i1 %C) {
+	%A = alloca i32
+	store i32 47, i32* %A
+        br i1 %C, label %Cond, label %Cont
+
+Cond:
+        store i32 -987654321, i32* %A
+        br label %Cont
+
+Cont:
+	%V = load i32* %A
+	ret i32 %V
+; CHECK: @test4
+; CHECK-NOT: alloca
+; CHECK: Cont:
+; CHECK-NEXT:  %storemerge = phi i32 [ -987654321, %Cond ], [ 47, %0 ]
+; CHECK-NEXT:  ret i32 %storemerge
+}
+
+; "if then"
+define void @test5(i1 %C, i32* %P) {
+	store i32 47, i32* %P, align 1
+        br i1 %C, label %Cond, label %Cont
+
+Cond:
+        store i32 -987654321, i32* %P, align 1
+        br label %Cont
+
+Cont:
+	ret void
+; CHECK: @test5
+; CHECK: Cont:
+; CHECK-NEXT:  %storemerge = phi i32
+; CHECK-NEXT:  store i32 %storemerge, i32* %P, align 1
+; CHECK-NEXT:  ret void
+}
+
diff --git a/final/test/Transforms/InstCombine/strcpy_chk.ll b/final/test/Transforms/InstCombine/strcpy_chk.ll
new file mode 100644
index 00000000000..8835a0ba467
--- /dev/null
+++ b/final/test/Transforms/InstCombine/strcpy_chk.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+@a = common global [60 x i8] zeroinitializer, align 1 ; <[60 x i8]*> [#uses=1]
+@.str = private constant [8 x i8] c"abcdefg\00"   ; <[8 x i8]*> [#uses=1]
+
+define i8* @foo() nounwind {
+; CHECK: @foo
+; CHECK-NEXT: call i8* @strcpy
+  %call = call i8* @__strcpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 60) ; <i8*> [#uses=1]
+  ret i8* %call
+}
+
+declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
diff --git a/final/test/Transforms/InstCombine/sub.ll b/final/test/Transforms/InstCombine/sub.ll
new file mode 100644
index 00000000000..9656a7e862a
--- /dev/null
+++ b/final/test/Transforms/InstCombine/sub.ll
@@ -0,0 +1,303 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+; Optimize subtracts.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i32 @test1(i32 %A) {
+	%B = sub i32 %A, %A	
+	ret i32 %B
+; CHECK: @test1
+; CHECK: ret i32 0
+}
+
+define i32 @test2(i32 %A) {
+	%B = sub i32 %A, 0	
+	ret i32 %B
+; CHECK: @test2
+; CHECK: ret i32 %A
+}
+
+define i32 @test3(i32 %A) {
+	%B = sub i32 0, %A	
+	%C = sub i32 0, %B	
+	ret i32 %C
+; CHECK: @test3
+; CHECK: ret i32 %A
+}
+
+define i32 @test4(i32 %A, i32 %x) {
+	%B = sub i32 0, %A	
+	%C = sub i32 %x, %B	
+	ret i32 %C
+; CHECK: @test4
+; CHECK: %C = add i32 %x, %A
+; CHECK: ret i32 %C
+}
+
+define i32 @test5(i32 %A, i32 %B, i32 %C) {
+	%D = sub i32 %B, %C	
+	%E = sub i32 %A, %D	
+	ret i32 %E
+; CHECK: @test5
+; CHECK: %D1 = sub i32 %C, %B
+; CHECK: %E = add
+; CHECK: ret i32 %E
+}
+
+define i32 @test6(i32 %A, i32 %B) {
+	%C = and i32 %A, %B	
+	%D = sub i32 %A, %C	
+	ret i32 %D
+; CHECK: @test6
+; CHECK-NEXT: xor i32 %B, -1
+; CHECK-NEXT: %D = and i32 
+; CHECK-NEXT: ret i32 %D
+}
+
+define i32 @test7(i32 %A) {
+	%B = sub i32 -1, %A	
+	ret i32 %B
+; CHECK: @test7
+; CHECK: %B = xor i32 %A, -1
+; CHECK: ret i32 %B
+}
+
+define i32 @test8(i32 %A) {
+	%B = mul i32 9, %A	
+	%C = sub i32 %B, %A	
+	ret i32 %C
+; CHECK: @test8
+; CHECK: %C = shl i32 %A, 3
+; CHECK: ret i32 %C
+}
+
+define i32 @test9(i32 %A) {
+	%B = mul i32 3, %A	
+	%C = sub i32 %A, %B	
+	ret i32 %C
+; CHECK: @test9
+; CHECK: %C = mul i32 %A, -2
+; CHECK: ret i32 %C
+}
+
+define i32 @test10(i32 %A, i32 %B) {
+	%C = sub i32 0, %A	
+	%D = sub i32 0, %B	
+	%E = mul i32 %C, %D	
+	ret i32 %E
+; CHECK: @test10
+; CHECK: %E = mul i32 %A, %B
+; CHECK: ret i32 %E
+}
+
+define i32 @test10a(i32 %A) {
+	%C = sub i32 0, %A	
+	%E = mul i32 %C, 7	
+	ret i32 %E
+; CHECK: @test10a
+; CHECK: %E = mul i32 %A, -7
+; CHECK: ret i32 %E
+}
+
+define i1 @test11(i8 %A, i8 %B) {
+	%C = sub i8 %A, %B	
+	%cD = icmp ne i8 %C, 0	
+	ret i1 %cD
+; CHECK: @test11
+; CHECK: %cD = icmp ne i8 %A, %B
+; CHECK: ret i1 %cD
+}
+
+define i32 @test12(i32 %A) {
+	%B = ashr i32 %A, 31	
+	%C = sub i32 0, %B	
+	ret i32 %C
+; CHECK: @test12
+; CHECK: %C = lshr i32 %A, 31
+; CHECK: ret i32 %C
+}
+
+define i32 @test13(i32 %A) {
+	%B = lshr i32 %A, 31	
+	%C = sub i32 0, %B	
+	ret i32 %C
+; CHECK: @test13
+; CHECK: %C = ashr i32 %A, 31
+; CHECK: ret i32 %C
+}
+
+define i32 @test14(i32 %A) {
+	%B = lshr i32 %A, 31	
+	%C = bitcast i32 %B to i32	
+	%D = sub i32 0, %C	
+	ret i32 %D
+; CHECK: @test14
+; CHECK: %D = ashr i32 %A, 31
+; CHECK: ret i32 %D
+}
+
+define i32 @test15(i32 %A, i32 %B) {
+	%C = sub i32 0, %A	
+	%D = srem i32 %B, %C	
+	ret i32 %D
+; CHECK: @test15
+; CHECK: %D = srem i32 %B, %A 
+; CHECK: ret i32 %D
+}
+
+define i32 @test16(i32 %A) {
+	%X = sdiv i32 %A, 1123	
+	%Y = sub i32 0, %X	
+	ret i32 %Y
+; CHECK: @test16
+; CHECK: %Y = sdiv i32 %A, -1123
+; CHECK: ret i32 %Y
+}
+
+; Can't fold subtract here because negation it might oveflow.
+; PR3142
+define i32 @test17(i32 %A) {
+	%B = sub i32 0, %A	
+	%C = sdiv i32 %B, 1234	
+	ret i32 %C
+; CHECK: @test17
+; CHECK: %B = sub i32 0, %A
+; CHECK: %C = sdiv i32 %B, 1234
+; CHECK: ret i32 %C
+}
+
+define i64 @test18(i64 %Y) {
+	%tmp.4 = shl i64 %Y, 2	
+	%tmp.12 = shl i64 %Y, 2	
+	%tmp.8 = sub i64 %tmp.4, %tmp.12	
+	ret i64 %tmp.8
+; CHECK: @test18
+; CHECK: ret i64 0
+}
+
+define i32 @test19(i32 %X, i32 %Y) {
+	%Z = sub i32 %X, %Y	
+	%Q = add i32 %Z, %Y	
+	ret i32 %Q
+; CHECK: @test19
+; CHECK: ret i32 %X
+}
+
+define i1 @test20(i32 %g, i32 %h) {
+	%tmp.2 = sub i32 %g, %h	
+	%tmp.4 = icmp ne i32 %tmp.2, %g	
+	ret i1 %tmp.4
+; CHECK: @test20
+; CHECK: %tmp.4 = icmp ne i32 %h, 0
+; CHECK: ret i1 %tmp.4
+}
+
+define i1 @test21(i32 %g, i32 %h) {
+	%tmp.2 = sub i32 %g, %h	
+	%tmp.4 = icmp ne i32 %tmp.2, %g		
+        ret i1 %tmp.4
+; CHECK: @test21
+; CHECK: %tmp.4 = icmp ne i32 %h, 0
+; CHECK: ret i1 %tmp.4
+}
+
+; PR2298
+define i1 @test22(i32 %a, i32 %b) zeroext nounwind  {
+	%tmp2 = sub i32 0, %a	
+	%tmp4 = sub i32 0, %b	
+	%tmp5 = icmp eq i32 %tmp2, %tmp4	
+	ret i1 %tmp5
+; CHECK: @test22
+; CHECK: %tmp5 = icmp eq i32 %b, %a
+; CHECK: ret i1 %tmp5
+}
+
+; rdar://7362831
+define i32 @test23(i8* %P, i64 %A){
+  %B = getelementptr inbounds i8* %P, i64 %A
+  %C = ptrtoint i8* %B to i64
+  %D = trunc i64 %C to i32
+  %E = ptrtoint i8* %P to i64
+  %F = trunc i64 %E to i32
+  %G = sub i32 %D, %F
+  ret i32 %G
+; CHECK: @test23
+; CHECK-NEXT: = trunc i64 %A to i32
+; CHECK-NEXT: ret i32
+}
+
+define i64 @test24(i8* %P, i64 %A){
+  %B = getelementptr inbounds i8* %P, i64 %A
+  %C = ptrtoint i8* %B to i64
+  %E = ptrtoint i8* %P to i64
+  %G = sub i64 %C, %E
+  ret i64 %G
+; CHECK: @test24
+; CHECK-NEXT: ret i64 %A
+}
+
+define i64 @test24a(i8* %P, i64 %A){
+  %B = getelementptr inbounds i8* %P, i64 %A
+  %C = ptrtoint i8* %B to i64
+  %E = ptrtoint i8* %P to i64
+  %G = sub i64 %E, %C
+  ret i64 %G
+; CHECK: @test24a
+; CHECK-NEXT: sub i64 0, %A
+; CHECK-NEXT: ret i64 
+}
+
+@Arr = external global [42 x i16]
+
+define i64 @test24b(i8* %P, i64 %A){
+  %B = getelementptr inbounds [42 x i16]* @Arr, i64 0, i64 %A
+  %C = ptrtoint i16* %B to i64
+  %G = sub i64 %C, ptrtoint ([42 x i16]* @Arr to i64)
+  ret i64 %G
+; CHECK: @test24b
+; CHECK-NEXT: shl nuw i64 %A, 1
+; CHECK-NEXT: ret i64 
+}
+
+
+define i64 @test25(i8* %P, i64 %A){
+  %B = getelementptr inbounds [42 x i16]* @Arr, i64 0, i64 %A
+  %C = ptrtoint i16* %B to i64
+  %G = sub i64 %C, ptrtoint (i16* getelementptr ([42 x i16]* @Arr, i64 1, i64 0) to i64)
+  ret i64 %G
+; CHECK: @test25
+; CHECK-NEXT: shl nuw i64 %A, 1
+; CHECK-NEXT: add i64 {{.*}}, -84
+; CHECK-NEXT: ret i64 
+}
+
+define i32 @test26(i32 %x) {
+  %shl = shl i32 3, %x
+  %neg = sub i32 0, %shl
+  ret i32 %neg
+; CHECK: @test26
+; CHECK-NEXT: shl i32 -3
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test27(i32 %x, i32 %y) {
+  %mul = mul i32 %y, -8
+  %sub = sub i32 %x, %mul
+  ret i32 %sub
+; CHECK: @test27
+; CHECK-NEXT: shl i32 %y, 3
+; CHECK-NEXT: add i32
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test28(i32 %x, i32 %y, i32 %z) {
+  %neg = sub i32 0, %z
+  %mul = mul i32 %neg, %y
+  %sub = sub i32 %x, %mul
+  ret i32 %sub
+; CHECK: @test28
+; CHECK-NEXT: mul i32 %z, %y
+; CHECK-NEXT: add i32
+; CHECK-NEXT: ret i32
+}
diff --git a/final/test/Transforms/InstCombine/trunc.ll b/final/test/Transforms/InstCombine/trunc.ll
new file mode 100644
index 00000000000..6ec342a4f5c
--- /dev/null
+++ b/final/test/Transforms/InstCombine/trunc.ll
@@ -0,0 +1,119 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; Instcombine should be able to eliminate all of these ext casts.
+
+declare void @use(i32)
+
+define i64 @test1(i64 %a) {
+  %b = trunc i64 %a to i32
+  %c = and i32 %b, 15
+  %d = zext i32 %c to i64
+  call void @use(i32 %b)
+  ret i64 %d
+; CHECK: @test1
+; CHECK: %d = and i64 %a, 15
+; CHECK: ret i64 %d
+}
+define i64 @test2(i64 %a) {
+  %b = trunc i64 %a to i32
+  %c = shl i32 %b, 4
+  %q = ashr i32 %c, 4
+  %d = sext i32 %q to i64
+  call void @use(i32 %b)
+  ret i64 %d
+; CHECK: @test2
+; CHECK: shl i64 %a, 36
+; CHECK: %d = ashr exact i64 {{.*}}, 36
+; CHECK: ret i64 %d
+}
+define i64 @test3(i64 %a) {
+  %b = trunc i64 %a to i32
+  %c = and i32 %b, 8
+  %d = zext i32 %c to i64
+  call void @use(i32 %b)
+  ret i64 %d
+; CHECK: @test3
+; CHECK: %d = and i64 %a, 8
+; CHECK: ret i64 %d
+}
+define i64 @test4(i64 %a) {
+  %b = trunc i64 %a to i32
+  %c = and i32 %b, 8
+  %x = xor i32 %c, 8
+  %d = zext i32 %x to i64
+  call void @use(i32 %b)
+  ret i64 %d
+; CHECK: @test4
+; CHECK: = and i64 %a, 8
+; CHECK: %d = xor i64 {{.*}}, 8
+; CHECK: ret i64 %d
+}
+
+define i32 @test5(i32 %A) {
+  %B = zext i32 %A to i128
+  %C = lshr i128 %B, 16
+  %D = trunc i128 %C to i32
+  ret i32 %D
+; CHECK: @test5
+; CHECK: %C = lshr i32 %A, 16
+; CHECK: ret i32 %C
+}
+
+define i32 @test6(i64 %A) {
+  %B = zext i64 %A to i128
+  %C = lshr i128 %B, 32
+  %D = trunc i128 %C to i32
+  ret i32 %D
+; CHECK: @test6
+; CHECK: %C = lshr i64 %A, 32
+; CHECK: %D = trunc i64 %C to i32
+; CHECK: ret i32 %D
+}
+
+define i92 @test7(i64 %A) {
+  %B = zext i64 %A to i128
+  %C = lshr i128 %B, 32
+  %D = trunc i128 %C to i92
+  ret i92 %D
+; CHECK: @test7
+; CHECK: %B = zext i64 %A to i92
+; CHECK: %C = lshr i92 %B, 32
+; CHECK: ret i92 %C
+}
+
+define i64 @test8(i32 %A, i32 %B) {
+  %tmp38 = zext i32 %A to i128
+  %tmp32 = zext i32 %B to i128
+  %tmp33 = shl i128 %tmp32, 32
+  %ins35 = or i128 %tmp33, %tmp38
+  %tmp42 = trunc i128 %ins35 to i64
+  ret i64 %tmp42
+; CHECK: @test8
+; CHECK:   %tmp38 = zext i32 %A to i64
+; CHECK:   %tmp32 = zext i32 %B to i64
+; CHECK:   %tmp33 = shl nuw i64 %tmp32, 32
+; CHECK:   %ins35 = or i64 %tmp33, %tmp38
+; CHECK:   ret i64 %ins35
+}
+
+define i8 @test9(i32 %X) {
+  %Y = and i32 %X, 42
+  %Z = trunc i32 %Y to i8
+  ret i8 %Z
+; CHECK: @test9
+; CHECK: trunc
+; CHECK: and
+; CHECK: ret
+}
+
+; rdar://8808586
+define i8 @test10(i32 %X) {
+  %Y = trunc i32 %X to i8
+  %Z = and i8 %Y, 42
+  ret i8 %Z
+; CHECK: @test10
+; CHECK: trunc
+; CHECK: and
+; CHECK: ret
+}
diff --git a/final/test/Transforms/InstCombine/udiv-simplify-bug-0.ll b/final/test/Transforms/InstCombine/udiv-simplify-bug-0.ll
new file mode 100644
index 00000000000..bfdd98cddfa
--- /dev/null
+++ b/final/test/Transforms/InstCombine/udiv-simplify-bug-0.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | grep {ret i64 0} | count 2
+
+define i64 @foo(i32 %x) nounwind {
+  %y = lshr i32 %x, 1
+  %r = udiv i32 %y, -1
+  %z = sext i32 %r to i64
+  ret i64 %z
+}
+define i64 @bar(i32 %x) nounwind {
+  %y = lshr i32 %x, 31
+  %r = udiv i32 %y, 3
+  %z = sext i32 %r to i64
+  ret i64 %z
+}
diff --git a/final/test/Transforms/InstCombine/udiv-simplify-bug-1.ll b/final/test/Transforms/InstCombine/udiv-simplify-bug-1.ll
new file mode 100644
index 00000000000..d95e8f83590
--- /dev/null
+++ b/final/test/Transforms/InstCombine/udiv-simplify-bug-1.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -instcombine -S > %t1.ll
+; RUN: grep udiv %t1.ll | count 2
+; RUN: grep zext %t1.ll | count 2
+; PR2274
+
+; The udiv instructions shouldn't be optimized away, and the
+; sext instructions should be optimized to zext.
+
+define i64 @bar(i32 %x) nounwind {
+  %y = lshr i32 %x, 30
+  %r = udiv i32 %y, 3
+  %z = sext i32 %r to i64
+  ret i64 %z
+}
+define i64 @qux(i32 %x, i32 %v) nounwind {
+  %y = lshr i32 %x, 31
+  %r = udiv i32 %y, %v
+  %z = sext i32 %r to i64
+  ret i64 %z
+}
diff --git a/final/test/Transforms/InstCombine/udiv_select_to_select_shift.ll b/final/test/Transforms/InstCombine/udiv_select_to_select_shift.ll
new file mode 100644
index 00000000000..9b059a6cc94
--- /dev/null
+++ b/final/test/Transforms/InstCombine/udiv_select_to_select_shift.ll
@@ -0,0 +1,17 @@
+; Test that this transform works:
+; udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
+;
+; RUN: opt < %s -instcombine -S -o %t
+; RUN:   not grep select %t
+; RUN:   grep lshr %t | count 2
+; RUN:   not grep udiv %t
+
+define i64 @test(i64 %X, i1 %Cond ) {
+entry:
+        %divisor1 = select i1 %Cond, i64 16, i64 8
+        %quotient1 = udiv i64 %X, %divisor1
+        %divisor2 = select i1 %Cond, i64 8, i64 0
+        %quotient2 = udiv i64 %X, %divisor2
+        %sum = add i64 %quotient1, %quotient2
+        ret i64 %sum
+}
diff --git a/final/test/Transforms/InstCombine/udivrem-change-width.ll b/final/test/Transforms/InstCombine/udivrem-change-width.ll
new file mode 100644
index 00000000000..9983944df8d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/udivrem-change-width.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -instcombine -S | not grep zext
+; PR4548
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define i8 @udiv_i8(i8 %a, i8 %b) nounwind {
+  %conv = zext i8 %a to i32       
+  %conv2 = zext i8 %b to i32      
+  %div = udiv i32 %conv, %conv2   
+  %conv3 = trunc i32 %div to i8   
+  ret i8 %conv3
+}
+
+define i8 @urem_i8(i8 %a, i8 %b) nounwind {
+  %conv = zext i8 %a to i32       
+  %conv2 = zext i8 %b to i32      
+  %div = urem i32 %conv, %conv2   
+  %conv3 = trunc i32 %div to i8   
+  ret i8 %conv3
+}
+
diff --git a/final/test/Transforms/InstCombine/urem-simplify-bug.ll b/final/test/Transforms/InstCombine/urem-simplify-bug.ll
new file mode 100644
index 00000000000..229f1a85e86
--- /dev/null
+++ b/final/test/Transforms/InstCombine/urem-simplify-bug.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -instcombine -S | grep {= or i32 %x, -5}
+
+@.str = internal constant [5 x i8] c"foo\0A\00"		; <[5 x i8]*> [#uses=1]
+@.str1 = internal constant [5 x i8] c"bar\0A\00"		; <[5 x i8]*> [#uses=1]
+
+define i32 @main() nounwind  {
+entry:
+	%x = call i32 @func_11( ) nounwind 		; <i32> [#uses=1]
+	%tmp3 = or i32 %x, -5		; <i32> [#uses=1]
+	%tmp5 = urem i32 251, %tmp3		; <i32> [#uses=1]
+	%tmp6 = icmp ne i32 %tmp5, 0		; <i1> [#uses=1]
+	%tmp67 = zext i1 %tmp6 to i32		; <i32> [#uses=1]
+	%tmp9 = urem i32 %tmp67, 95		; <i32> [#uses=1]
+	%tmp10 = and i32 %tmp9, 1		; <i32> [#uses=1]
+	%tmp12 = icmp eq i32 %tmp10, 0		; <i1> [#uses=1]
+	br i1 %tmp12, label %bb14, label %bb
+
+bb:		; preds = %entry
+	br label %bb15
+
+bb14:		; preds = %entry
+	br label %bb15
+
+bb15:		; preds = %bb14, %bb
+	%iftmp.0.0 = phi i8* [ getelementptr ([5 x i8]* @.str1, i32 0, i32 0), %bb14 ], [ getelementptr ([5 x i8]* @.str, i32 0, i32 0), %bb ]		; <i8*> [#uses=1]
+	%tmp17 = call i32 (i8*, ...)* @printf( i8* %iftmp.0.0 ) nounwind 		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @func_11()
+
+declare i32 @printf(i8*, ...) nounwind 
diff --git a/final/test/Transforms/InstCombine/urem.ll b/final/test/Transforms/InstCombine/urem.ll
new file mode 100644
index 00000000000..51084224a73
--- /dev/null
+++ b/final/test/Transforms/InstCombine/urem.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instcombine -S | grep urem
+
+define i64 @rem_unsigned(i64 %x1, i64 %y2) {
+	%r = udiv i64 %x1, %y2
+	%r7 = mul i64 %r, %y2
+	%r8 = sub i64 %x1, %r7
+	ret i64 %r8
+}
diff --git a/final/test/Transforms/InstCombine/vec_demanded_elts.ll b/final/test/Transforms/InstCombine/vec_demanded_elts.ll
new file mode 100644
index 00000000000..9f308aa093e
--- /dev/null
+++ b/final/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -0,0 +1,138 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i16 @test1(float %f) {
+entry:
+; CHECK: @test1
+; CHECK: fmul float
+; CHECK-NOT: insertelement {{.*}} 0.00
+; CHECK-NOT: call {{.*}} @llvm.x86.sse.mul
+; CHECK-NOT: call {{.*}} @llvm.x86.sse.sub
+; CHECK: ret
+	%tmp = insertelement <4 x float> undef, float %f, i32 0		; <<4 x float>> [#uses=1]
+	%tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1		; <<4 x float>> [#uses=1]
+	%tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2		; <<4 x float>> [#uses=1]
+	%tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3		; <<4 x float>> [#uses=1]
+	%tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
+	%tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
+	%tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )		; <<4 x float>> [#uses=1]
+	%tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer )		; <<4 x float>> [#uses=1]
+	%tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )		; <i32> [#uses=1]
+	%tmp69 = trunc i32 %tmp.upgrd.1 to i16		; <i16> [#uses=1]
+	ret i16 %tmp69
+}
+
+define i32 @test2(float %f) {
+; CHECK: @test2
+; CHECK-NOT: insertelement
+; CHECK-NOT: extractelement
+; CHECK: ret
+  %tmp5 = fmul float %f, %f
+  %tmp9 = insertelement <4 x float> undef, float %tmp5, i32 0
+  %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1
+  %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
+  %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3
+  %tmp19 = bitcast <4 x float> %tmp12 to <4 x i32>
+  %tmp21 = extractelement <4 x i32> %tmp19, i32 0
+  ret i32 %tmp21
+}
+
+define i64 @test3(float %f, double %d) {
+; CHECK: @test3
+; CHECK-NOT: insertelement {{.*}} 0.00
+; CHECK: ret
+entry:
+  %v00 = insertelement <4 x float> undef, float %f, i32 0
+  %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
+  %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
+  %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
+  %tmp0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %v03)
+  %v10 = insertelement <4 x float> undef, float %f, i32 0
+  %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
+  %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
+  %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
+  %tmp1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %v13)
+  %v20 = insertelement <4 x float> undef, float %f, i32 0
+  %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
+  %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
+  %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
+  %tmp2 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %v23)
+  %v30 = insertelement <4 x float> undef, float %f, i32 0
+  %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
+  %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
+  %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
+  %tmp3 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %v33)
+  %v40 = insertelement <2 x double> undef, double %d, i32 0
+  %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
+  %tmp4 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %v41)
+  %v50 = insertelement <2 x double> undef, double %d, i32 0
+  %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
+  %tmp5 = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %v51)
+  %v60 = insertelement <2 x double> undef, double %d, i32 0
+  %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
+  %tmp6 = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %v61)
+  %v70 = insertelement <2 x double> undef, double %d, i32 0
+  %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
+  %tmp7 = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %v71)
+  %tmp8 = add i32 %tmp0, %tmp2
+  %tmp9 = add i32 %tmp4, %tmp6
+  %tmp10 = add i32 %tmp8, %tmp9
+  %tmp11 = sext i32 %tmp10 to i64
+  %tmp12 = add i64 %tmp1, %tmp3
+  %tmp13 = add i64 %tmp5, %tmp7
+  %tmp14 = add i64 %tmp12, %tmp13
+  %tmp15 = add i64 %tmp11, %tmp14
+  ret i64 %tmp15
+}
+
+define void @get_image() nounwind {
+; CHECK: @get_image
+; CHECK-NOT: extractelement
+; CHECK: unreachable
+entry:
+  %0 = call i32 @fgetc(i8* null) nounwind               ; <i32> [#uses=1]
+  %1 = trunc i32 %0 to i8         ; <i8> [#uses=1]
+  %tmp2 = insertelement <100 x i8> zeroinitializer, i8 %1, i32 1          ; <<100 x i8>> [#uses=1]
+  %tmp1 = extractelement <100 x i8> %tmp2, i32 0          ; <i8> [#uses=1]
+  %2 = icmp eq i8 %tmp1, 80               ; <i1> [#uses=1]
+  br i1 %2, label %bb2, label %bb3
+
+bb2:            ; preds = %entry
+  br label %bb3
+
+bb3:            ; preds = %bb2, %entry
+  unreachable
+}
+
+; PR4340
+define void @vac(<4 x float>* nocapture %a) nounwind {
+; CHECK: @vac
+; CHECK-NOT: load
+; CHECK: ret
+entry:
+	%tmp1 = load <4 x float>* %a		; <<4 x float>> [#uses=1]
+	%vecins = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 0	; <<4 x float>> [#uses=1]
+	%vecins4 = insertelement <4 x float> %vecins, float 0.000000e+00, i32 1; <<4 x float>> [#uses=1]
+	%vecins6 = insertelement <4 x float> %vecins4, float 0.000000e+00, i32 2; <<4 x float>> [#uses=1]
+	%vecins8 = insertelement <4 x float> %vecins6, float 0.000000e+00, i32 3; <<4 x float>> [#uses=1]
+	store <4 x float> %vecins8, <4 x float>* %a
+	ret void
+}
+
+declare i32 @fgetc(i8*)
+
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
+
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
+
+declare i32 @llvm.x86.sse.cvtss2si(<4 x float>)
+declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>)
+declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)
+declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>)
+declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>)
+declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>)
+declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
+declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)
diff --git a/final/test/Transforms/InstCombine/vec_extract_elt.ll b/final/test/Transforms/InstCombine/vec_extract_elt.ll
new file mode 100644
index 00000000000..63e4ee2112d
--- /dev/null
+++ b/final/test/Transforms/InstCombine/vec_extract_elt.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | not grep extractelement
+
+define i32 @test(float %f) {
+        %tmp7 = insertelement <4 x float> undef, float %f, i32 0                ; <<4 x float>> [#uses=1]
+        %tmp17 = bitcast <4 x float> %tmp7 to <4 x i32>         ; <<4 x i32>> [#uses=1]
+        %tmp19 = extractelement <4 x i32> %tmp17, i32 0         ; <i32> [#uses=1]
+        ret i32 %tmp19
+}
+
diff --git a/final/test/Transforms/InstCombine/vec_insertelt.ll b/final/test/Transforms/InstCombine/vec_insertelt.ll
new file mode 100644
index 00000000000..eedf882518b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/vec_insertelt.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -instcombine -S | grep {ret <4 x i32> %A}
+
+; PR1286
+define <4 x i32> @test1(<4 x i32> %A) {
+	%B = insertelement <4 x i32> %A, i32 undef, i32 1
+	ret <4 x i32> %B
+}
diff --git a/final/test/Transforms/InstCombine/vec_narrow.ll b/final/test/Transforms/InstCombine/vec_narrow.ll
new file mode 100644
index 00000000000..c05c8021885
--- /dev/null
+++ b/final/test/Transforms/InstCombine/vec_narrow.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep {fadd float}
+
+        %V = type <4 x float>
+
+define float @test(%V %A, %V %B, float %f) {
+        %C = insertelement %V %A, float %f, i32 0               ; <%V> [#uses=1]
+        %D = fadd %V %C, %B              ; <%V> [#uses=1]
+        %E = extractelement %V %D, i32 0                ; <float> [#uses=1]
+        ret float %E
+}
+
diff --git a/final/test/Transforms/InstCombine/vec_sext.ll b/final/test/Transforms/InstCombine/vec_sext.ll
new file mode 100644
index 00000000000..d7ab96b9cfd
--- /dev/null
+++ b/final/test/Transforms/InstCombine/vec_sext.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define <4 x i32> @psignd_3(<4 x i32> %a, <4 x i32> %b) nounwind ssp {
+entry:
+  %cmp = icmp slt <4 x i32> %b, zeroinitializer
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %sub = sub nsw <4 x i32> zeroinitializer, %a
+  %0 = icmp slt <4 x i32> %sext, zeroinitializer
+  %sext3 = sext <4 x i1> %0 to <4 x i32>
+  %1 = xor <4 x i32> %sext3, <i32 -1, i32 -1, i32 -1, i32 -1>
+  %2 = and <4 x i32> %a, %1
+  %3 = and <4 x i32> %sext3, %sub
+  %cond = or <4 x i32> %2, %3
+  ret <4 x i32> %cond
+
+; CHECK:   ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
+; CHECK:   sub nsw <4 x i32> zeroinitializer, %a
+; CHECK:   xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK:   and <4 x i32> %a, %0
+; CHECK:   and <4 x i32> %b.lobit, %sub
+; CHECK:   or <4 x i32> %1, %2
+}
diff --git a/final/test/Transforms/InstCombine/vec_shuffle.ll b/final/test/Transforms/InstCombine/vec_shuffle.ll
new file mode 100644
index 00000000000..bd36e9ecb1e
--- /dev/null
+++ b/final/test/Transforms/InstCombine/vec_shuffle.ll
@@ -0,0 +1,112 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+%T = type <4 x float>
+
+
+define %T @test1(%T %v1) {
+; CHECK: @test1
+; CHECK: ret %T %v1
+  %v2 = shufflevector %T %v1, %T undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret %T %v2
+}
+
+define %T @test2(%T %v1) {
+; CHECK: @test2
+; CHECK: ret %T %v1
+  %v2 = shufflevector %T %v1, %T %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret %T %v2
+}
+
+define float @test3(%T %A, %T %B, float %f) {
+; CHECK: @test3
+; CHECK: ret float %f
+        %C = insertelement %T %A, float %f, i32 0
+        %D = shufflevector %T %C, %T %B, <4 x i32> <i32 5, i32 0, i32 2, i32 7>
+        %E = extractelement %T %D, i32 1
+        ret float %E
+}
+
+define i32 @test4(<4 x i32> %X) {
+; CHECK: @test4
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: ret 
+        %tmp152.i53899.i = shufflevector <4 x i32> %X, <4 x i32> undef, <4 x i32> zeroinitializer
+        %tmp34 = extractelement <4 x i32> %tmp152.i53899.i, i32 0
+        ret i32 %tmp34
+}
+
+define i32 @test5(<4 x i32> %X) {
+; CHECK: @test5
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: ret 
+        %tmp152.i53899.i = shufflevector <4 x i32> %X, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 undef, i32 undef>
+        %tmp34 = extractelement <4 x i32> %tmp152.i53899.i, i32 0
+        ret i32 %tmp34
+}
+
+define float @test6(<4 x float> %X) {
+; CHECK: @test6
+; CHECK-NEXT: extractelement
+; CHECK-NEXT: ret 
+        %X1 = bitcast <4 x float> %X to <4 x i32>
+        %tmp152.i53899.i = shufflevector <4 x i32> %X1, <4 x i32> undef, <4 x i32> zeroinitializer
+        %tmp152.i53900.i = bitcast <4 x i32> %tmp152.i53899.i to <4 x float>
+        %tmp34 = extractelement <4 x float> %tmp152.i53900.i, i32 0
+        ret float %tmp34
+}
+
+define <4 x float> @test7(<4 x float> %tmp45.i) {
+; CHECK: @test7
+; CHECK-NEXT: ret %T %tmp45.i
+        %tmp1642.i = shufflevector <4 x float> %tmp45.i, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >
+        ret <4 x float> %tmp1642.i
+}
+
+; This should turn into a single shuffle.
+define <4 x float> @test8(<4 x float> %tmp, <4 x float> %tmp1) {
+; CHECK: @test8
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+        %tmp4 = extractelement <4 x float> %tmp, i32 1
+        %tmp2 = extractelement <4 x float> %tmp, i32 3
+        %tmp1.upgrd.1 = extractelement <4 x float> %tmp1, i32 0
+        %tmp128 = insertelement <4 x float> undef, float %tmp4, i32 0
+        %tmp130 = insertelement <4 x float> %tmp128, float undef, i32 1
+        %tmp132 = insertelement <4 x float> %tmp130, float %tmp2, i32 2 
+        %tmp134 = insertelement <4 x float> %tmp132, float %tmp1.upgrd.1, i32 3
+        ret <4 x float> %tmp134
+}
+
+; Test fold of two shuffles where the first shuffle vectors inputs are a
+; different length then the second.
+define <4 x i8> @test9(<16 x i8> %tmp6) nounwind {
+; CHECK: @test9
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+	%tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> undef, <4 x i32> < i32 13, i32 9, i32 4, i32 13 >		; <<4 x i8>> [#uses=1]
+	%tmp9 = shufflevector <4 x i8> %tmp7, <4 x i8> undef, <4 x i32> < i32 3, i32 1, i32 2, i32 0 >		; <<4 x i8>> [#uses=1]
+	ret <4 x i8> %tmp9
+}
+
+; Same as test9, but make sure that "undef" mask values are not confused with
+; mask values of 2*N, where N is the mask length.  These shuffles should not
+; be folded (because [8,9,4,8] may not be a mask supported by the target).
+define <4 x i8> @test9a(<16 x i8> %tmp6) nounwind {
+; CHECK: @test9a
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+	%tmp7 = shufflevector <16 x i8> %tmp6, <16 x i8> undef, <4 x i32> < i32 undef, i32 9, i32 4, i32 8 >		; <<4 x i8>> [#uses=1]
+	%tmp9 = shufflevector <4 x i8> %tmp7, <4 x i8> undef, <4 x i32> < i32 3, i32 1, i32 2, i32 0 >		; <<4 x i8>> [#uses=1]
+	ret <4 x i8> %tmp9
+}
+
+; Redundant vector splats should be removed.  Radar 8597790.
+define <4 x i32> @test10(<4 x i32> %tmp5) nounwind {
+; CHECK: @test10
+; CHECK-NEXT: shufflevector
+; CHECK-NEXT: ret
+  %tmp6 = shufflevector <4 x i32> %tmp5, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+  %tmp7 = shufflevector <4 x i32> %tmp6, <4 x i32> undef, <4 x i32> zeroinitializer
+  ret <4 x i32> %tmp7
+}
diff --git a/final/test/Transforms/InstCombine/vector-casts.ll b/final/test/Transforms/InstCombine/vector-casts.ll
new file mode 100644
index 00000000000..e931dc79ef4
--- /dev/null
+++ b/final/test/Transforms/InstCombine/vector-casts.ll
@@ -0,0 +1,151 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; This turns into a&1 != 0
+define <2 x i1> @test1(<2 x i64> %a) {
+  %t = trunc <2 x i64> %a to <2 x i1>
+  ret <2 x i1> %t
+
+; CHECK: @test1
+; CHECK:   and <2 x i64> %a, <i64 1, i64 1>
+; CHECK:   icmp ne <2 x i64> %tmp, zeroinitializer
+}
+
+; The ashr turns into an lshr.
+define <2 x i64> @test2(<2 x i64> %a) {
+  %b = and <2 x i64> %a, <i64 65535, i64 65535>
+  %t = ashr <2 x i64> %b, <i64 1, i64 1>
+  ret <2 x i64> %t
+
+; CHECK: @test2
+; CHECK:   and <2 x i64> %a, <i64 65535, i64 65535>
+; CHECK:   lshr <2 x i64> %b, <i64 1, i64 1>
+}
+
+
+
+define <2 x i64> @test3(<4 x float> %a, <4 x float> %b) nounwind readnone {
+entry:
+	%cmp = fcmp ord <4 x float> %a, zeroinitializer	
+	%sext = sext <4 x i1> %cmp to <4 x i32>	
+	%cmp4 = fcmp ord <4 x float> %b, zeroinitializer
+	%sext5 = sext <4 x i1> %cmp4 to <4 x i32>
+	%and = and <4 x i32> %sext, %sext5
+	%conv = bitcast <4 x i32> %and to <2 x i64>
+	ret <2 x i64> %conv
+        
+; CHECK: @test3
+; CHECK:   fcmp ord <4 x float> %a, %b
+}
+
+define <2 x i64> @test4(<4 x float> %a, <4 x float> %b) nounwind readnone {
+entry:
+	%cmp = fcmp uno <4 x float> %a, zeroinitializer
+	%sext = sext <4 x i1> %cmp to <4 x i32>
+	%cmp4 = fcmp uno <4 x float> %b, zeroinitializer
+	%sext5 = sext <4 x i1> %cmp4 to <4 x i32>
+	%or = or <4 x i32> %sext, %sext5
+	%conv = bitcast <4 x i32> %or to <2 x i64>
+	ret <2 x i64> %conv
+; CHECK: @test4
+; CHECK:   fcmp uno <4 x float> %a, %b
+}
+
+
+; rdar://7434900
+define <2 x i64> @test5(<4 x float> %a, <4 x float> %b) nounwind readnone {
+entry:
+	%cmp = fcmp ult <4 x float> %a, zeroinitializer	
+	%sext = sext <4 x i1> %cmp to <4 x i32>	
+	%cmp4 = fcmp ult <4 x float> %b, zeroinitializer
+	%sext5 = sext <4 x i1> %cmp4 to <4 x i32>
+	%and = and <4 x i32> %sext, %sext5
+	%conv = bitcast <4 x i32> %and to <2 x i64>
+	ret <2 x i64> %conv
+        
+; CHECK: @test5
+; CHECK:   sext <4 x i1> %cmp to <4 x i32>	
+; CHECK:   sext <4 x i1> %cmp4 to <4 x i32>	
+}
+
+
+define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) nounwind {
+entry:
+  %val = trunc <2 x i64> %src to <2 x i32>
+  %add = add <2 x i32> %val, <i32 1, i32 1>
+  store <2 x i32> %add, <2 x i32>* %dst.addr
+  ret void
+}
+
+define <2 x i65> @foo(<2 x i64> %t) {
+  %a = trunc <2 x i64> %t to <2 x i32>
+  %b = zext <2 x i32> %a to <2 x i65>
+  ret <2 x i65> %b
+}
+define <2 x i64> @bar(<2 x i65> %t) {
+  %a = trunc <2 x i65> %t to <2 x i32>
+  %b = zext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %b
+}
+define <2 x i65> @foos(<2 x i64> %t) {
+  %a = trunc <2 x i64> %t to <2 x i32>
+  %b = sext <2 x i32> %a to <2 x i65>
+  ret <2 x i65> %b
+}
+define <2 x i64> @bars(<2 x i65> %t) {
+  %a = trunc <2 x i65> %t to <2 x i32>
+  %b = sext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %b
+}
+define <2 x i64> @quxs(<2 x i64> %t) {
+  %a = trunc <2 x i64> %t to <2 x i32>
+  %b = sext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %b
+}
+define <2 x i64> @quxt(<2 x i64> %t) {
+  %a = shl <2 x i64> %t, <i64 32, i64 32>
+  %b = ashr <2 x i64> %a, <i64 32, i64 32>
+  ret <2 x i64> %b
+}
+define <2 x double> @fa(<2 x double> %t) {
+  %a = fptrunc <2 x double> %t to <2 x float>
+  %b = fpext <2 x float> %a to <2 x double>
+  ret <2 x double> %b
+}
+define <2 x double> @fb(<2 x double> %t) {
+  %a = fptoui <2 x double> %t to <2 x i64>
+  %b = uitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %b
+}
+define <2 x double> @fc(<2 x double> %t) {
+  %a = fptosi <2 x double> %t to <2 x i64>
+  %b = sitofp <2 x i64> %a to <2 x double>
+  ret <2 x double> %b
+}
+
+; PR9228
+; This was a crasher, so no CHECK statements.
+define <4 x float> @f(i32 %a) nounwind alwaysinline {
+; CHECK: @f
+entry:
+  %dim = insertelement <4 x i32> undef, i32 %a, i32 0
+  %dim30 = insertelement <4 x i32> %dim, i32 %a, i32 1
+  %dim31 = insertelement <4 x i32> %dim30, i32 %a, i32 2
+  %dim32 = insertelement <4 x i32> %dim31, i32 %a, i32 3
+
+  %offset_ptr = getelementptr <4 x float>* null, i32 1
+  %offset_int = ptrtoint <4 x float>* %offset_ptr to i64
+  %sizeof32 = trunc i64 %offset_int to i32
+
+  %smearinsert33 = insertelement <4 x i32> undef, i32 %sizeof32, i32 0
+  %smearinsert34 = insertelement <4 x i32> %smearinsert33, i32 %sizeof32, i32 1
+  %smearinsert35 = insertelement <4 x i32> %smearinsert34, i32 %sizeof32, i32 2
+  %smearinsert36 = insertelement <4 x i32> %smearinsert35, i32 %sizeof32, i32 3
+
+  %delta_scale = mul <4 x i32> %dim32, %smearinsert36
+  %offset_delta = add <4 x i32> zeroinitializer, %delta_scale
+
+  %offset_varying_delta = add <4 x i32> %offset_delta, undef
+
+  ret <4 x float> undef
+}
+
diff --git a/final/test/Transforms/InstCombine/vector-srem.ll b/final/test/Transforms/InstCombine/vector-srem.ll
new file mode 100644
index 00000000000..acb11c52adb
--- /dev/null
+++ b/final/test/Transforms/InstCombine/vector-srem.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instcombine -S | grep {srem <4 x i32>}
+
+define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u)
+{
+  %k = sdiv <4 x i32> %t, %u
+  %l = mul <4 x i32> %k, %u
+  %m = sub <4 x i32> %t, %l
+  ret <4 x i32> %m
+}
diff --git a/final/test/Transforms/InstCombine/volatile_store.ll b/final/test/Transforms/InstCombine/volatile_store.ll
new file mode 100644
index 00000000000..5316bd772e1
--- /dev/null
+++ b/final/test/Transforms/InstCombine/volatile_store.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -instcombine -S | grep {volatile store}
+; RUN: opt < %s -instcombine -S | grep {volatile load}
+
+@x = weak global i32 0		; <i32*> [#uses=2]
+
+define void @self_assign_1() {
+entry:
+	%tmp = volatile load i32* @x		; <i32> [#uses=1]
+	volatile store i32 %tmp, i32* @x
+	br label %return
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/InstCombine/xor-undef.ll b/final/test/Transforms/InstCombine/xor-undef.ll
new file mode 100644
index 00000000000..cf72955b66b
--- /dev/null
+++ b/final/test/Transforms/InstCombine/xor-undef.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -instcombine -S | grep zeroinitializer
+
+define <2 x i64> @f() {
+	%tmp = xor <2 x i64> undef, undef
+        ret <2 x i64> %tmp
+}
diff --git a/final/test/Transforms/InstCombine/xor.ll b/final/test/Transforms/InstCombine/xor.ll
new file mode 100644
index 00000000000..a7bcdac08bd
--- /dev/null
+++ b/final/test/Transforms/InstCombine/xor.ll
@@ -0,0 +1,193 @@
+; This test makes sure that these instructions are properly eliminated.
+;
+; RUN: opt < %s -instcombine -S | \
+; RUN:    not grep {xor }
+; END.
+@G1 = global i32 0		; <i32*> [#uses=1]
+@G2 = global i32 0		; <i32*> [#uses=1]
+
+define i1 @test0(i1 %A) {
+	%B = xor i1 %A, false		; <i1> [#uses=1]
+	ret i1 %B
+}
+
+define i32 @test1(i32 %A) {
+	%B = xor i32 %A, 0		; <i32> [#uses=1]
+	ret i32 %B
+}
+
+define i1 @test2(i1 %A) {
+	%B = xor i1 %A, %A		; <i1> [#uses=1]
+	ret i1 %B
+}
+
+define i32 @test3(i32 %A) {
+	%B = xor i32 %A, %A		; <i32> [#uses=1]
+	ret i32 %B
+}
+
+define i32 @test4(i32 %A) {
+	%NotA = xor i32 -1, %A		; <i32> [#uses=1]
+	%B = xor i32 %A, %NotA		; <i32> [#uses=1]
+	ret i32 %B
+}
+
+define i32 @test5(i32 %A) {
+	%t1 = or i32 %A, 123		; <i32> [#uses=1]
+	%r = xor i32 %t1, 123		; <i32> [#uses=1]
+	ret i32 %r
+}
+
+define i8 @test6(i8 %A) {
+	%B = xor i8 %A, 17		; <i8> [#uses=1]
+	%C = xor i8 %B, 17		; <i8> [#uses=1]
+	ret i8 %C
+}
+
+define i32 @test7(i32 %A, i32 %B) {
+	%A1 = and i32 %A, 7		; <i32> [#uses=1]
+	%B1 = and i32 %B, 128		; <i32> [#uses=1]
+	%C1 = xor i32 %A1, %B1		; <i32> [#uses=1]
+	ret i32 %C1
+}
+
+define i8 @test8(i1 %c) {
+	%d = xor i1 %c, true		; <i1> [#uses=1]
+	br i1 %d, label %True, label %False
+
+True:		; preds = %0
+	ret i8 1
+
+False:		; preds = %0
+	ret i8 3
+}
+
+define i1 @test9(i8 %A) {
+	%B = xor i8 %A, 123		; <i8> [#uses=1]
+	%C = icmp eq i8 %B, 34		; <i1> [#uses=1]
+	ret i1 %C
+}
+
+define i8 @test10(i8 %A) {
+	%B = and i8 %A, 3		; <i8> [#uses=1]
+	%C = xor i8 %B, 4		; <i8> [#uses=1]
+	ret i8 %C
+}
+
+define i8 @test11(i8 %A) {
+	%B = or i8 %A, 12		; <i8> [#uses=1]
+	%C = xor i8 %B, 4		; <i8> [#uses=1]
+	ret i8 %C
+}
+
+define i1 @test12(i8 %A) {
+	%B = xor i8 %A, 4		; <i8> [#uses=1]
+	%c = icmp ne i8 %B, 0		; <i1> [#uses=1]
+	ret i1 %c
+}
+
+define i1 @test13(i8 %A, i8 %B) {
+	%C = icmp ult i8 %A, %B		; <i1> [#uses=1]
+	%D = icmp ugt i8 %A, %B		; <i1> [#uses=1]
+	%E = xor i1 %C, %D		; <i1> [#uses=1]
+	ret i1 %E
+}
+
+define i1 @test14(i8 %A, i8 %B) {
+	%C = icmp eq i8 %A, %B		; <i1> [#uses=1]
+	%D = icmp ne i8 %B, %A		; <i1> [#uses=1]
+	%E = xor i1 %C, %D		; <i1> [#uses=1]
+	ret i1 %E
+}
+
+define i32 @test15(i32 %A) {
+	%B = add i32 %A, -1		; <i32> [#uses=1]
+	%C = xor i32 %B, -1		; <i32> [#uses=1]
+	ret i32 %C
+}
+
+define i32 @test16(i32 %A) {
+	%B = add i32 %A, 123		; <i32> [#uses=1]
+	%C = xor i32 %B, -1		; <i32> [#uses=1]
+	ret i32 %C
+}
+
+define i32 @test17(i32 %A) {
+	%B = sub i32 123, %A		; <i32> [#uses=1]
+	%C = xor i32 %B, -1		; <i32> [#uses=1]
+	ret i32 %C
+}
+
+define i32 @test18(i32 %A) {
+	%B = xor i32 %A, -1		; <i32> [#uses=1]
+	%C = sub i32 123, %B		; <i32> [#uses=1]
+	ret i32 %C
+}
+
+define i32 @test19(i32 %A, i32 %B) {
+	%C = xor i32 %A, %B		; <i32> [#uses=1]
+	%D = xor i32 %C, %A		; <i32> [#uses=1]
+	ret i32 %D
+}
+
+define void @test20(i32 %A, i32 %B) {
+	%tmp.2 = xor i32 %B, %A		; <i32> [#uses=2]
+	%tmp.5 = xor i32 %tmp.2, %B		; <i32> [#uses=2]
+	%tmp.8 = xor i32 %tmp.5, %tmp.2		; <i32> [#uses=1]
+	store i32 %tmp.8, i32* @G1
+	store i32 %tmp.5, i32* @G2
+	ret void
+}
+
+define i32 @test21(i1 %C, i32 %A, i32 %B) {
+	%C2 = xor i1 %C, true		; <i1> [#uses=1]
+	%D = select i1 %C2, i32 %A, i32 %B		; <i32> [#uses=1]
+	ret i32 %D
+}
+
+define i32 @test22(i1 %X) {
+	%Y = xor i1 %X, true		; <i1> [#uses=1]
+	%Z = zext i1 %Y to i32		; <i32> [#uses=1]
+	%Q = xor i32 %Z, 1		; <i32> [#uses=1]
+	ret i32 %Q
+}
+
+define i1 @test23(i32 %a, i32 %b) {
+	%tmp.2 = xor i32 %b, %a		; <i32> [#uses=1]
+	%tmp.4 = icmp eq i32 %tmp.2, %a		; <i1> [#uses=1]
+	ret i1 %tmp.4
+}
+
+define i1 @test24(i32 %c, i32 %d) {
+	%tmp.2 = xor i32 %d, %c		; <i32> [#uses=1]
+	%tmp.4 = icmp ne i32 %tmp.2, %c		; <i1> [#uses=1]
+	ret i1 %tmp.4
+}
+
+define i32 @test25(i32 %g, i32 %h) {
+	%h2 = xor i32 %h, -1		; <i32> [#uses=1]
+	%tmp2 = and i32 %h2, %g		; <i32> [#uses=1]
+	%tmp4 = xor i32 %tmp2, %g		; <i32> [#uses=1]
+	ret i32 %tmp4
+}
+
+define i32 @test26(i32 %a, i32 %b) {
+	%b2 = xor i32 %b, -1		; <i32> [#uses=1]
+	%tmp2 = xor i32 %a, %b2		; <i32> [#uses=1]
+	%tmp4 = and i32 %tmp2, %a		; <i32> [#uses=1]
+	ret i32 %tmp4
+}
+
+define i32 @test27(i32 %b, i32 %c, i32 %d) {
+	%tmp2 = xor i32 %d, %b		; <i32> [#uses=1]
+	%tmp5 = xor i32 %d, %c		; <i32> [#uses=1]
+	%tmp = icmp eq i32 %tmp2, %tmp5		; <i1> [#uses=1]
+	%tmp6 = zext i1 %tmp to i32		; <i32> [#uses=1]
+	ret i32 %tmp6
+}
+
+define i32 @test28(i32 %indvar) {
+	%tmp7 = add i32 %indvar, -2147483647		; <i32> [#uses=1]
+	%tmp214 = xor i32 %tmp7, -2147483648		; <i32> [#uses=1]
+	ret i32 %tmp214
+}
diff --git a/final/test/Transforms/InstCombine/xor2.ll b/final/test/Transforms/InstCombine/xor2.ll
new file mode 100644
index 00000000000..89f00bd6847
--- /dev/null
+++ b/final/test/Transforms/InstCombine/xor2.ll
@@ -0,0 +1,53 @@
+; This test makes sure that these instructions are properly eliminated.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; PR1253
+define i1 @test0(i32 %A) {
+; CHECK: @test0
+; CHECK: %C = icmp slt i32 %A, 0
+	%B = xor i32 %A, -2147483648
+	%C = icmp sgt i32 %B, -1
+	ret i1 %C
+}
+
+define i1 @test1(i32 %A) {
+; CHECK: @test1
+; CHECK: %C = icmp slt i32 %A, 0
+	%B = xor i32 %A, 12345
+	%C = icmp slt i32 %B, 0
+	ret i1 %C
+}
+
+; PR1014
+define i32 @test2(i32 %tmp1) {
+; CHECK:      @test2
+; CHECK-NEXT:   and i32 %tmp1, 32
+; CHECK-NEXT:   or i32 %ovm, 8 
+; CHECK-NEXT:   ret i32
+        %ovm = and i32 %tmp1, 32
+        %ov3 = add i32 %ovm, 145
+        %ov110 = xor i32 %ov3, 153
+        ret i32 %ov110
+}
+
+define i32 @test3(i32 %tmp1) {
+; CHECK:      @test3
+; CHECK-NEXT:   and i32 %tmp1, 32
+; CHECK-NEXT:   or i32 %ovm, 8
+; CHECK-NEXT:   ret i32
+  %ovm = or i32 %tmp1, 145 
+  %ov31 = and i32 %ovm, 177
+  %ov110 = xor i32 %ov31, 153
+  ret i32 %ov110
+}
+
+define i32 @test4(i32 %A, i32 %B) {
+	%1 = xor i32 %A, -1
+	%2 = ashr i32 %1, %B
+	%3 = xor i32 %2, -1
+	ret i32 %3
+; CHECK: @test4
+; CHECK: %1 = ashr i32 %A, %B
+; CHECK: ret i32 %1
+}
diff --git a/final/test/Transforms/InstCombine/zero-point-zero-add.ll b/final/test/Transforms/InstCombine/zero-point-zero-add.ll
new file mode 100644
index 00000000000..d07a9f4b9de
--- /dev/null
+++ b/final/test/Transforms/InstCombine/zero-point-zero-add.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -instcombine -S | grep 0.0 | count 1
+
+declare double @abs(double)
+
+define double @test(double %X) {
+  %Y = fadd double %X, 0.0          ;; Should be a single add x, 0.0
+  %Z = fadd double %Y, 0.0
+  ret double %Z
+}
+
+define double @test1(double %X) {
+  %Y = call double @abs(double %X)
+  %Z = fadd double %Y, 0.0
+  ret double %Z
+}
diff --git a/final/test/Transforms/InstCombine/zeroext-and-reduce.ll b/final/test/Transforms/InstCombine/zeroext-and-reduce.ll
new file mode 100644
index 00000000000..592b8a172f8
--- /dev/null
+++ b/final/test/Transforms/InstCombine/zeroext-and-reduce.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -instcombine -S | \
+; RUN:   grep {and i32 %Y, 8}
+
+define i32 @test1(i8 %X) {
+        %Y = zext i8 %X to i32          ; <i32> [#uses=1]
+        %Z = and i32 %Y, 65544          ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+
diff --git a/final/test/Transforms/InstCombine/zext-bool-add-sub.ll b/final/test/Transforms/InstCombine/zext-bool-add-sub.ll
new file mode 100644
index 00000000000..11642733acc
--- /dev/null
+++ b/final/test/Transforms/InstCombine/zext-bool-add-sub.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -instcombine -S | not grep zext
+
+define i32 @a(i1 %x) {
+entry:
+        %y = zext i1 %x to i32
+        %res = add i32 %y, 1
+        ret i32 %res
+}
+
+define i32 @b(i1 %x) {
+entry:
+        %y = zext i1 %x to i32
+        %res = add i32 %y, -1
+        ret i32 %res
+}
+
+define i32 @c(i1 %x) {
+entry:
+        %y = zext i1 %x to i32
+        %res = sub i32 0, %y
+        ret i32 %res
+}
+
+define i32 @d(i1 %x) {
+entry:
+        %y = zext i1 %x to i32
+        %res = sub i32 3, %y
+        ret i32 %res
+}
diff --git a/final/test/Transforms/InstCombine/zext-fold.ll b/final/test/Transforms/InstCombine/zext-fold.ll
new file mode 100644
index 00000000000..9521101e736
--- /dev/null
+++ b/final/test/Transforms/InstCombine/zext-fold.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -instcombine -S | grep {zext } | count 1
+; PR1570
+
+define i32 @test2(float %X, float %Y) {
+entry:
+        %tmp3 = fcmp uno float %X, %Y           ; <i1> [#uses=1]
+        %tmp34 = zext i1 %tmp3 to i8            ; <i8> [#uses=1]
+        %tmp = xor i8 %tmp34, 1         ; <i8> [#uses=1]
+        %toBoolnot5 = zext i8 %tmp to i32               ; <i32> [#uses=1]
+        ret i32 %toBoolnot5
+}
+
diff --git a/final/test/Transforms/InstCombine/zext-or-icmp.ll b/final/test/Transforms/InstCombine/zext-or-icmp.ll
new file mode 100644
index 00000000000..969c3015703
--- /dev/null
+++ b/final/test/Transforms/InstCombine/zext-or-icmp.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -instcombine -S | grep icmp | count 1
+
+	%struct.FooBar = type <{ i8, i8, [2 x i8], i8, i8, i8, i8, i16, i16, [4 x i8], [8 x %struct.Rock] }>
+	%struct.Rock = type { i16, i16 }
+@some_idx = internal constant [4 x i8] c"\0A\0B\0E\0F"		; <[4 x i8]*> [#uses=1]
+
+define i8 @t(%struct.FooBar* %up, i8 zeroext  %intra_flag, i32 %blk_i) zeroext nounwind  {
+entry:
+	%tmp2 = lshr i32 %blk_i, 1		; <i32> [#uses=1]
+	%tmp3 = and i32 %tmp2, 2		; <i32> [#uses=1]
+	%tmp5 = and i32 %blk_i, 1		; <i32> [#uses=1]
+	%tmp6 = or i32 %tmp3, %tmp5		; <i32> [#uses=1]
+	%tmp8 = getelementptr %struct.FooBar* %up, i32 0, i32 7		; <i16*> [#uses=1]
+	%tmp9 = load i16* %tmp8, align 1		; <i16> [#uses=1]
+	%tmp910 = zext i16 %tmp9 to i32		; <i32> [#uses=1]
+	%tmp12 = getelementptr [4 x i8]* @some_idx, i32 0, i32 %tmp6		; <i8*> [#uses=1]
+	%tmp13 = load i8* %tmp12, align 1		; <i8> [#uses=1]
+	%tmp1314 = zext i8 %tmp13 to i32		; <i32> [#uses=1]
+	%tmp151 = lshr i32 %tmp910, %tmp1314		; <i32> [#uses=1]
+	%tmp1516 = trunc i32 %tmp151 to i8		; <i8> [#uses=1]
+	%tmp18 = getelementptr %struct.FooBar* %up, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp19 = load i8* %tmp18, align 1		; <i8> [#uses=1]
+	%tmp22 = and i8 %tmp1516, %tmp19		; <i8> [#uses=1]
+	%tmp24 = getelementptr %struct.FooBar* %up, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp25 = load i8* %tmp24, align 1		; <i8> [#uses=1]
+	%tmp26.mask = and i8 %tmp25, 1		; <i8> [#uses=1]
+	%toBool = icmp eq i8 %tmp26.mask, 0		; <i1> [#uses=1]
+	%toBool.not = xor i1 %toBool, true		; <i1> [#uses=1]
+	%toBool33 = icmp eq i8 %intra_flag, 0		; <i1> [#uses=1]
+	%bothcond = or i1 %toBool.not, %toBool33		; <i1> [#uses=1]
+	%iftmp.1.0 = select i1 %bothcond, i8 0, i8 1		; <i8> [#uses=1]
+	%tmp40 = or i8 %tmp22, %iftmp.1.0		; <i8> [#uses=1]
+	%tmp432 = and i8 %tmp40, 1		; <i8> [#uses=1]
+	ret i8 %tmp432
+}
diff --git a/final/test/Transforms/InstCombine/zext.ll b/final/test/Transforms/InstCombine/zext.ll
new file mode 100644
index 00000000000..10eabf7aed4
--- /dev/null
+++ b/final/test/Transforms/InstCombine/zext.ll
@@ -0,0 +1,11 @@
+; Tests to make sure elimination of casts is working correctly
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i64 @test_sext_zext(i16 %A) {
+        %c1 = zext i16 %A to i32                ; <i32> [#uses=1]
+        %c2 = sext i32 %c1 to i64               ; <i64> [#uses=1]
+        ret i64 %c2
+; CHECK-NOT: %c1
+; CHECK: %c2 = zext i16 %A to i64
+; CHECK: ret i64 %c2
+}
diff --git a/final/test/Transforms/InstSimplify/2010-12-20-Boolean.ll b/final/test/Transforms/InstSimplify/2010-12-20-Boolean.ll
new file mode 100644
index 00000000000..3aa1bd60cfd
--- /dev/null
+++ b/final/test/Transforms/InstSimplify/2010-12-20-Boolean.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i1 @add(i1 %x) {
+; CHECK: @add
+  %z = add i1 %x, %x
+  ret i1 %z
+; CHECK: ret i1 false
+}
+
+define i1 @sub(i1 %x) {
+; CHECK: @sub
+  %z = sub i1 false, %x
+  ret i1 %z
+; CHECK: ret i1 %x
+}
+
+define i1 @mul(i1 %x) {
+; CHECK: @mul
+  %z = mul i1 %x, %x
+  ret i1 %z
+; CHECK: ret i1 %x
+}
+
+define i1 @ne(i1 %x) {
+; CHECK: @ne
+  %z = icmp ne i1 %x, 0
+  ret i1 %z
+; CHECK: ret i1 %x
+}
diff --git a/final/test/Transforms/InstSimplify/2010-12-20-Distribute.ll b/final/test/Transforms/InstSimplify/2010-12-20-Distribute.ll
new file mode 100644
index 00000000000..d20abd68c20
--- /dev/null
+++ b/final/test/Transforms/InstSimplify/2010-12-20-Distribute.ll
@@ -0,0 +1,62 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i32 @factorize(i32 %x, i32 %y) {
+; CHECK: @factorize
+; (X | 1) & (X | 2) -> X | (1 & 2) -> X
+  %l = or i32 %x, 1
+  %r = or i32 %x, 2
+  %z = and i32 %l, %r
+  ret i32 %z
+; CHECK: ret i32 %x
+}
+
+define i32 @factorize2(i32 %x) {
+; CHECK: @factorize2
+; 3*X - 2*X -> X
+  %l = mul i32 3, %x
+  %r = mul i32 2, %x
+  %z = sub i32 %l, %r
+  ret i32 %z
+; CHECK: ret i32 %x
+}
+
+define i32 @factorize3(i32 %x, i32 %a, i32 %b) {
+; CHECK: @factorize3
+; (X | (A|B)) & (X | B) -> X | ((A|B) & B) -> X | B
+  %aORb = or i32 %a, %b
+  %l = or i32 %x, %aORb
+  %r = or i32 %x, %b
+  %z = and i32 %l, %r
+  ret i32 %z
+; CHECK: ret i32 %r
+}
+
+define i32 @factorize4(i32 %x, i32 %y) {
+; CHECK: @factorize4
+  %sh = shl i32 %y, 1
+  %ml = mul i32 %sh, %x
+  %mr = mul i32 %x, %y
+  %s = sub i32 %ml, %mr
+  ret i32 %s
+; CHECK: ret i32 %mr
+}
+
+define i32 @factorize5(i32 %x, i32 %y) {
+; CHECK: @factorize5
+  %sh = mul i32 %y, 2
+  %ml = mul i32 %sh, %x
+  %mr = mul i32 %x, %y
+  %s = sub i32 %ml, %mr
+  ret i32 %s
+; CHECK: ret i32 %mr
+}
+
+define i32 @expand(i32 %x) {
+; CHECK: @expand
+; ((X & 1) | 2) & 1 -> ((X & 1) & 1) | (2 & 1) -> (X & 1) | 0 -> X & 1
+  %a = and i32 %x, 1
+  %b = or i32 %a, 2
+  %c = and i32 %b, 1
+  ret i32 %c
+; CHECK: ret i32 %a
+}
diff --git a/final/test/Transforms/InstSimplify/2011-01-14-Thread.ll b/final/test/Transforms/InstSimplify/2011-01-14-Thread.ll
new file mode 100644
index 00000000000..8fc4dc5d5bb
--- /dev/null
+++ b/final/test/Transforms/InstSimplify/2011-01-14-Thread.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i32 @shift_select(i1 %cond) {
+; CHECK: @shift_select
+  %s = select i1 %cond, i32 0, i32 1
+  %r = lshr i32 %s, 1
+  ret i32 %r
+; CHECK: ret i32 0
+}
diff --git a/final/test/Transforms/InstSimplify/2011-02-01-Vector.ll b/final/test/Transforms/InstSimplify/2011-02-01-Vector.ll
new file mode 100644
index 00000000000..3039a663fa4
--- /dev/null
+++ b/final/test/Transforms/InstSimplify/2011-02-01-Vector.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define <2 x i32> @sdiv(<2 x i32> %x) {
+; CHECK: @sdiv
+  %div = sdiv <2 x i32> %x, <i32 1, i32 1>
+  ret <2 x i32> %div
+; CHECK: ret <2 x i32> %x
+}
diff --git a/final/test/Transforms/InstSimplify/compare.ll b/final/test/Transforms/InstSimplify/compare.ll
new file mode 100644
index 00000000000..7174e7fc61a
--- /dev/null
+++ b/final/test/Transforms/InstSimplify/compare.ll
@@ -0,0 +1,295 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+target datalayout = "p:32:32"
+
+define i1 @ptrtoint() {
+; CHECK: @ptrtoint
+  %a = alloca i8
+  %tmp = ptrtoint i8* %a to i32
+  %r = icmp eq i32 %tmp, 0
+  ret i1 %r
+; CHECK: ret i1 false
+}
+
+define i1 @zext(i32 %x) {
+; CHECK: @zext
+  %e1 = zext i32 %x to i64
+  %e2 = zext i32 %x to i64
+  %r = icmp eq i64 %e1, %e2
+  ret i1 %r
+; CHECK: ret i1 true
+}
+
+define i1 @zext2(i1 %x) {
+; CHECK: @zext2
+  %e = zext i1 %x to i32
+  %c = icmp ne i32 %e, 0
+  ret i1 %c
+; CHECK: ret i1 %x
+}
+
+define i1 @zext3() {
+; CHECK: @zext3
+  %e = zext i1 1 to i32
+  %c = icmp ne i32 %e, 0
+  ret i1 %c
+; CHECK: ret i1 true
+}
+
+define i1 @sext(i32 %x) {
+; CHECK: @sext
+  %e1 = sext i32 %x to i64
+  %e2 = sext i32 %x to i64
+  %r = icmp eq i64 %e1, %e2
+  ret i1 %r
+; CHECK: ret i1 true
+}
+
+define i1 @sext2(i1 %x) {
+; CHECK: @sext2
+  %e = sext i1 %x to i32
+  %c = icmp ne i32 %e, 0
+  ret i1 %c
+; CHECK: ret i1 %x
+}
+
+define i1 @sext3() {
+; CHECK: @sext3
+  %e = sext i1 1 to i32
+  %c = icmp ne i32 %e, 0
+  ret i1 %c
+; CHECK: ret i1 true
+}
+
+define i1 @add(i32 %x, i32 %y) {
+; CHECK: @add
+  %l = lshr i32 %x, 1
+  %q = lshr i32 %y, 1
+  %r = or i32 %q, 1
+  %s = add i32 %l, %r
+  %c = icmp eq i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @add2(i8 %x, i8 %y) {
+; CHECK: @add2
+  %l = or i8 %x, 128
+  %r = or i8 %y, 129
+  %s = add i8 %l, %r
+  %c = icmp eq i8 %s, 0
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @add3(i8 %x, i8 %y) {
+; CHECK: @add3
+  %l = zext i8 %x to i32
+  %r = zext i8 %y to i32
+  %s = add i32 %l, %r
+  %c = icmp eq i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 %c
+}
+
+define i1 @add4(i32 %x, i32 %y) {
+; CHECK: @add4
+  %z = add nsw i32 %y, 1
+  %s1 = add nsw i32 %x, %y
+  %s2 = add nsw i32 %x, %z
+  %c = icmp slt i32 %s1, %s2
+  ret i1 %c
+; CHECK: ret i1 true
+}
+
+define i1 @add5(i32 %x, i32 %y) {
+; CHECK: @add5
+  %z = add nuw i32 %y, 1
+  %s1 = add nuw i32 %x, %z
+  %s2 = add nuw i32 %x, %y
+  %c = icmp ugt i32 %s1, %s2
+  ret i1 %c
+; CHECK: ret i1 true
+}
+
+define i1 @addpowtwo(i32 %x, i32 %y) {
+; CHECK: @addpowtwo
+  %l = lshr i32 %x, 1
+  %r = shl i32 1, %y
+  %s = add i32 %l, %r
+  %c = icmp eq i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @or(i32 %x) {
+; CHECK: @or
+  %o = or i32 %x, 1
+  %c = icmp eq i32 %o, 0
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @shl(i32 %x) {
+; CHECK: @shl
+  %s = shl i32 1, %x
+  %c = icmp eq i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @lshr1(i32 %x) {
+; CHECK: @lshr1
+  %s = lshr i32 -1, %x
+  %c = icmp eq i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @lshr2(i32 %x) {
+; CHECK: @lshr2
+  %s = lshr i32 %x, 30
+  %c = icmp ugt i32 %s, 8
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @ashr1(i32 %x) {
+; CHECK: @ashr1
+  %s = ashr i32 -1, %x
+  %c = icmp eq i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @ashr2(i32 %x) {
+; CHECK: @ashr2
+  %s = ashr i32 %x, 30
+  %c = icmp slt i32 %s, -5
+  ret i1 %c
+; CHECK: ret i1 false
+}
+
+define i1 @select1(i1 %cond) {
+; CHECK: @select1
+  %s = select i1 %cond, i32 1, i32 0
+  %c = icmp eq i32 %s, 1
+  ret i1 %c
+; CHECK: ret i1 %cond
+}
+
+define i1 @select2(i1 %cond) {
+; CHECK: @select2
+  %x = zext i1 %cond to i32
+  %s = select i1 %cond, i32 %x, i32 0
+  %c = icmp ne i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 %cond
+}
+
+define i1 @select3(i1 %cond) {
+; CHECK: @select3
+  %x = zext i1 %cond to i32
+  %s = select i1 %cond, i32 1, i32 %x
+  %c = icmp ne i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 %cond
+}
+
+define i1 @select4(i1 %cond) {
+; CHECK: @select4
+  %invert = xor i1 %cond, 1
+  %s = select i1 %invert, i32 0, i32 1
+  %c = icmp ne i32 %s, 0
+  ret i1 %c
+; CHECK: ret i1 %cond
+}
+
+define i1 @urem1(i32 %X, i32 %Y) {
+; CHECK: @urem1
+  %A = urem i32 %X, %Y
+  %B = icmp ult i32 %A, %Y
+  ret i1 %B
+; CHECK: ret i1 true
+}
+
+define i1 @urem2(i32 %X, i32 %Y) {
+; CHECK: @urem2
+  %A = urem i32 %X, %Y
+  %B = icmp eq i32 %A, %Y
+  ret i1 %B
+; CHECK ret i1 false
+}
+
+define i1 @urem3(i32 %X) {
+; CHECK: @urem3
+  %A = urem i32 %X, 10
+  %B = icmp ult i32 %A, 15
+  ret i1 %B
+; CHECK: ret i1 true
+}
+
+define i1 @urem4(i32 %X) {
+; CHECK: @urem4
+  %A = urem i32 %X, 15
+  %B = icmp ult i32 %A, 10
+  ret i1 %B
+; CHECK: ret i1 %B
+}
+
+define i1 @urem5(i16 %X, i32 %Y) {
+; CHECK: @urem5
+  %A = zext i16 %X to i32
+  %B = urem i32 %A, %Y
+  %C = icmp slt i32 %B, %Y
+  ret i1 %C
+; CHECK: ret i1 true
+}
+
+define i1 @srem1(i32 %X) {
+; CHECK: @srem1
+  %A = srem i32 %X, -5
+  %B = icmp sgt i32 %A, 5
+  ret i1 %B
+; CHECK: ret i1 false
+}
+
+define i1 @udiv1(i32 %X) {
+; CHECK: @udiv1
+  %A = udiv i32 %X, 1000000
+  %B = icmp ult i32 %A, 5000
+  ret i1 %B
+; CHECK: ret i1 true
+}
+
+define i1 @udiv2(i32 %X, i32 %Y, i32 %Z) {
+; CHECK: @udiv2
+  %A = udiv exact i32 10, %Z
+  %B = udiv exact i32 20, %Z
+  %C = icmp ult i32 %A, %B
+  ret i1 %C
+; CHECK: ret i1 true
+}
+
+define i1 @sdiv1(i32 %X) {
+; CHECK: @sdiv1
+  %A = sdiv i32 %X, 1000000
+  %B = icmp slt i32 %A, 3000
+  ret i1 %B
+; CHECK: ret i1 true
+}
+
+define i1 @or1(i32 %X) {
+; CHECK: @or1
+  %A = or i32 %X, 62
+  %B = icmp ult i32 %A, 50
+  ret i1 %B
+; CHECK: ret i1 false
+}
+
+define i1 @and1(i32 %X) {
+; CHECK: @and1
+  %A = and i32 %X, 62
+  %B = icmp ugt i32 %A, 70
+  ret i1 %B
+; CHECK: ret i1 false
+}
diff --git a/final/test/Transforms/InstSimplify/dg.exp b/final/test/Transforms/InstSimplify/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/InstSimplify/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/InstSimplify/exact-nsw-nuw.ll b/final/test/Transforms/InstSimplify/exact-nsw-nuw.ll
new file mode 100644
index 00000000000..f3a804eb5b5
--- /dev/null
+++ b/final/test/Transforms/InstSimplify/exact-nsw-nuw.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+; PR8862
+
+; CHECK: @shift1
+; CHECK: ret i32 %A
+define i32 @shift1(i32 %A, i32 %B) {
+  %C = lshr exact i32 %A, %B
+  %D = shl nuw i32 %C, %B
+  ret i32 %D
+}
+
+; CHECK: @shift2
+; CHECK: lshr
+; CHECK: ret i32 %D
+define i32 @shift2(i32 %A, i32 %B) {
+  %C = lshr i32 %A, %B
+  %D = shl nuw i32 %C, %B
+  ret i32 %D
+}
+
+; CHECK: @shift3
+; CHECK: ret i32 %A
+define i32 @shift3(i32 %A, i32 %B) {
+  %C = ashr exact i32 %A, %B
+  %D = shl nuw i32 %C, %B
+  ret i32 %D
+}
+
+; CHECK: @shift4
+; CHECK: ret i32 %A
+define i32 @shift4(i32 %A, i32 %B) {
+  %C = shl nuw i32 %A, %B
+  %D = lshr i32 %C, %B
+  ret i32 %D
+}
+
+; CHECK: @shift5
+; CHECK: ret i32 %A
+define i32 @shift5(i32 %A, i32 %B) {
+  %C = shl nsw i32 %A, %B
+  %D = ashr i32 %C, %B
+  ret i32 %D
+}
diff --git a/final/test/Transforms/InstSimplify/fdiv.ll b/final/test/Transforms/InstSimplify/fdiv.ll
new file mode 100644
index 00000000000..9d85154b240
--- /dev/null
+++ b/final/test/Transforms/InstSimplify/fdiv.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define double @fdiv_of_undef(double %X) {
+; CHECK: @fdiv_of_undef
+; undef / X -> undef
+  %r = fdiv double undef, %X
+  ret double %r
+; CHECK: ret double undef
+}
+
+define double @fdiv_by_undef(double %X) {
+; CHECK: @fdiv_by_undef
+; X / undef -> undef
+  %r = fdiv double %X, undef
+  ret double %r
+; CHECK: ret double undef
+}
diff --git a/final/test/Transforms/InstSimplify/reassociate.ll b/final/test/Transforms/InstSimplify/reassociate.ll
new file mode 100644
index 00000000000..3c8169e5e28
--- /dev/null
+++ b/final/test/Transforms/InstSimplify/reassociate.ll
@@ -0,0 +1,186 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i32 @add1(i32 %x) {
+; CHECK: @add1
+; (X + -1) + 1 -> X
+  %l = add i32 %x, -1
+  %r = add i32 %l, 1
+  ret i32 %r
+; CHECK: ret i32 %x
+}
+
+define i32 @and1(i32 %x, i32 %y) {
+; CHECK: @and1
+; (X & Y) & X -> X & Y
+  %l = and i32 %x, %y
+  %r = and i32 %l, %x
+  ret i32 %r
+; CHECK: ret i32 %l
+}
+
+define i32 @and2(i32 %x, i32 %y) {
+; CHECK: @and2
+; X & (X & Y) -> X & Y
+  %r = and i32 %x, %y
+  %l = and i32 %x, %r
+  ret i32 %l
+; CHECK: ret i32 %r
+}
+
+define i32 @or1(i32 %x, i32 %y) {
+; CHECK: @or1
+; (X | Y) | X -> X | Y
+  %l = or i32 %x, %y
+  %r = or i32 %l, %x
+  ret i32 %r
+; CHECK: ret i32 %l
+}
+
+define i32 @or2(i32 %x, i32 %y) {
+; CHECK: @or2
+; X | (X | Y) -> X | Y
+  %r = or i32 %x, %y
+  %l = or i32 %x, %r
+  ret i32 %l
+; CHECK: ret i32 %r
+}
+
+define i32 @xor1(i32 %x, i32 %y) {
+; CHECK: @xor1
+; (X ^ Y) ^ X = Y
+  %l = xor i32 %x, %y
+  %r = xor i32 %l, %x
+  ret i32 %r
+; CHECK: ret i32 %y
+}
+
+define i32 @xor2(i32 %x, i32 %y) {
+; CHECK: @xor2
+; X ^ (X ^ Y) = Y
+  %r = xor i32 %x, %y
+  %l = xor i32 %x, %r
+  ret i32 %l
+; CHECK: ret i32 %y
+}
+
+define i32 @sub1(i32 %x, i32 %y) {
+; CHECK: @sub1
+  %d = sub i32 %x, %y
+  %r = sub i32 %x, %d
+  ret i32 %r
+; CHECK: ret i32 %y
+}
+
+define i32 @sub2(i32 %x) {
+; CHECK: @sub2
+; X - (X + 1) -> -1
+  %xp1 = add i32 %x, 1
+  %r = sub i32 %x, %xp1
+  ret i32 %r
+; CHECK: ret i32 -1
+}
+
+define i32 @sub3(i32 %x, i32 %y) {
+; CHECK: @sub3
+; ((X + 1) + Y) - (Y + 1) -> X
+  %xp1 = add i32 %x, 1
+  %lhs = add i32 %xp1, %y
+  %rhs = add i32 %y, 1
+  %r = sub i32 %lhs, %rhs
+  ret i32 %r
+; CHECK: ret i32 %x
+}
+
+define i32 @sdiv1(i32 %x, i32 %y) {
+; CHECK: @sdiv1
+; (no overflow X * Y) / Y -> X
+  %mul = mul nsw i32 %x, %y
+  %r = sdiv i32 %mul, %y
+  ret i32 %r
+; CHECK: ret i32 %x
+}
+
+define i32 @sdiv2(i32 %x, i32 %y) {
+; CHECK: @sdiv2
+; (((X / Y) * Y) / Y) -> X / Y
+  %div = sdiv i32 %x, %y
+  %mul = mul i32 %div, %y
+  %r = sdiv i32 %mul, %y
+  ret i32 %r
+; CHECK: ret i32 %div
+}
+
+define i32 @sdiv3(i32 %x, i32 %y) {
+; CHECK: @sdiv3
+; (X rem Y) / Y -> 0
+  %rem = srem i32 %x, %y
+  %div = sdiv i32 %rem, %y
+  ret i32 %div
+; CHECK: ret i32 0
+}
+
+define i32 @sdiv4(i32 %x, i32 %y) {
+; CHECK: @sdiv4
+; (X / Y) * Y -> X if the division is exact
+  %div = sdiv exact i32 %x, %y
+  %mul = mul i32 %div, %y
+  ret i32 %mul
+; CHECK: ret i32 %x
+}
+
+define i32 @sdiv5(i32 %x, i32 %y) {
+; CHECK: @sdiv5
+; Y * (X / Y) -> X if the division is exact
+  %div = sdiv exact i32 %x, %y
+  %mul = mul i32 %y, %div
+  ret i32 %mul
+; CHECK: ret i32 %x
+}
+
+
+define i32 @udiv1(i32 %x, i32 %y) {
+; CHECK: @udiv1
+; (no overflow X * Y) / Y -> X
+  %mul = mul nuw i32 %x, %y
+  %r = udiv i32 %mul, %y
+  ret i32 %r
+; CHECK: ret i32 %x
+}
+
+define i32 @udiv2(i32 %x, i32 %y) {
+; CHECK: @udiv2
+; (((X / Y) * Y) / Y) -> X / Y
+  %div = udiv i32 %x, %y
+  %mul = mul i32 %div, %y
+  %r = udiv i32 %mul, %y
+  ret i32 %r
+; CHECK: ret i32 %div
+}
+
+define i32 @udiv3(i32 %x, i32 %y) {
+; CHECK: @udiv3
+; (X rem Y) / Y -> 0
+  %rem = urem i32 %x, %y
+  %div = udiv i32 %rem, %y
+  ret i32 %div
+; CHECK: ret i32 0
+}
+
+define i32 @udiv4(i32 %x, i32 %y) {
+; CHECK: @udiv4
+; (X / Y) * Y -> X if the division is exact
+  %div = udiv exact i32 %x, %y
+  %mul = mul i32 %div, %y
+  ret i32 %mul
+; CHECK: ret i32 %x
+}
+
+define i32 @udiv5(i32 %x, i32 %y) {
+; CHECK: @udiv5
+; Y * (X / Y) -> X if the division is exact
+  %div = udiv exact i32 %x, %y
+  %mul = mul i32 %y, %div
+  ret i32 %mul
+; CHECK: ret i32 %x
+}
+
diff --git a/final/test/Transforms/Internalize/2008-05-09-AllButMain.ll b/final/test/Transforms/Internalize/2008-05-09-AllButMain.ll
new file mode 100644
index 00000000000..a85e834582d
--- /dev/null
+++ b/final/test/Transforms/Internalize/2008-05-09-AllButMain.ll
@@ -0,0 +1,27 @@
+; No arguments means internalize all but main
+; RUN: opt < %s -internalize -S | grep internal | count 4
+; Internalize all but foo and j
+; RUN: opt < %s -internalize -internalize-public-api-list foo -internalize-public-api-list j -S | grep internal | count 3
+; Non existent files should be treated as if they were empty (so internalize all but main)
+; RUN: opt < %s -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null -S | grep internal | count 4
+; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-list foo -internalize-public-api-file /nonexistent/file 2> /dev/null -S | grep internal | count 3
+; -file and -list options should be merged, the .apifile contains foo and j
+; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-file %s.apifile -S | grep internal | count 2
+
+@i = weak global i32 0          ; <i32*> [#uses=0]
+@j = weak global i32 0          ; <i32*> [#uses=0]
+
+define void @main(...) {
+entry:  
+        ret void
+}
+
+define void @foo(...) {
+entry:  
+        ret void
+}
+
+define void @bar(...) {
+entry:  
+        ret void
+}
diff --git a/final/test/Transforms/Internalize/2008-05-09-AllButMain.ll.apifile b/final/test/Transforms/Internalize/2008-05-09-AllButMain.ll.apifile
new file mode 100644
index 00000000000..f6c58b80c1c
--- /dev/null
+++ b/final/test/Transforms/Internalize/2008-05-09-AllButMain.ll.apifile
@@ -0,0 +1,2 @@
+foo
+j
diff --git a/final/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll b/final/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
new file mode 100644
index 00000000000..7b18a04e116
--- /dev/null
+++ b/final/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -internalize -S | grep internal | count 3
+
+@A = global i32 0
+@B = alias i32* @A
+@C = alias i32* @B
+
+define i32 @main() {
+	%tmp = load i32* @C
+	ret i32 %tmp
+}
diff --git a/final/test/Transforms/Internalize/available_externally.ll b/final/test/Transforms/Internalize/available_externally.ll
new file mode 100644
index 00000000000..a2cf23fb390
--- /dev/null
+++ b/final/test/Transforms/Internalize/available_externally.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -internalize -internalize-public-api-list foo -S | FileCheck %s
+
+; CHECK: define void @foo
+define void @foo() {
+  ret void
+}
+
+; CHECK: define internal void @zed
+define void @zed() {
+  ret void
+}
+
+; CHECK: define available_externally void @bar
+define available_externally void @bar() {
+  ret void
+}
diff --git a/final/test/Transforms/Internalize/dg.exp b/final/test/Transforms/Internalize/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/Internalize/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/JumpThreading/2008-11-27-EntryMunge.ll b/final/test/Transforms/JumpThreading/2008-11-27-EntryMunge.ll
new file mode 100644
index 00000000000..b5d1065e679
--- /dev/null
+++ b/final/test/Transforms/JumpThreading/2008-11-27-EntryMunge.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -jump-threading -S | grep {ret i32 0}
+; PR3138
+
+define i32 @jt() {
+entry:
+       br i1 true, label %bb3, label %bb
+
+bb:             ; preds = %entry
+       unreachable
+
+bb3:            ; preds = %entry
+       ret i32 0
+}
diff --git a/final/test/Transforms/JumpThreading/2010-08-26-and.ll b/final/test/Transforms/JumpThreading/2010-08-26-and.ll
new file mode 100644
index 00000000000..2d6caf752e0
--- /dev/null
+++ b/final/test/Transforms/JumpThreading/2010-08-26-and.ll
@@ -0,0 +1,162 @@
+; RUN: opt -jump-threading -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%class.StringSwitch = type { i8*, i32, i32, i8 }
+
+@.str = private constant [4 x i8] c"red\00"       ; <[4 x i8]*> [#uses=1]
+@.str1 = private constant [7 x i8] c"orange\00"   ; <[7 x i8]*> [#uses=1]
+@.str2 = private constant [7 x i8] c"yellow\00"   ; <[7 x i8]*> [#uses=1]
+@.str3 = private constant [6 x i8] c"green\00"    ; <[6 x i8]*> [#uses=1]
+@.str4 = private constant [5 x i8] c"blue\00"     ; <[5 x i8]*> [#uses=1]
+@.str5 = private constant [7 x i8] c"indigo\00"   ; <[7 x i8]*> [#uses=1]
+@.str6 = private constant [7 x i8] c"violet\00"   ; <[7 x i8]*> [#uses=1]
+@.str7 = private constant [12 x i8] c"Color = %d\0A\00" ; <[12 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
+entry:
+  %cmp142 = icmp sgt i32 %argc, 1                 ; <i1> [#uses=1]
+  br i1 %cmp142, label %bb.nph, label %for.end
+
+bb.nph:                                           ; preds = %entry
+  %tmp = add i32 %argc, -2                        ; <i32> [#uses=1]
+  %tmp144 = zext i32 %tmp to i64                  ; <i64> [#uses=1]
+  %tmp145 = add i64 %tmp144, 1                    ; <i64> [#uses=1]
+  br label %land.lhs.true.i
+
+land.lhs.true.i:                                  ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134, %bb.nph
+  %retval.0.i.pre161 = phi i32 [ undef, %bb.nph ], [ %retval.0.i.pre, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 ] ; <i32> [#uses=3]
+  %indvar = phi i64 [ 0, %bb.nph ], [ %tmp146, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134 ] ; <i64> [#uses=1]
+  %tmp146 = add i64 %indvar, 1                    ; <i64> [#uses=3]
+  %arrayidx = getelementptr i8** %argv, i64 %tmp146 ; <i8**> [#uses=1]
+  %tmp6 = load i8** %arrayidx, align 8            ; <i8*> [#uses=8]
+  %call.i.i = call i64 @strlen(i8* %tmp6) nounwind ; <i64> [#uses=1]
+  %conv.i.i = trunc i64 %call.i.i to i32          ; <i32> [#uses=6]\
+; CHECK: switch i32 %conv.i.i
+; CHECK-NOT: if.then.i40
+; CHECK: }
+  switch i32 %conv.i.i, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit [
+    i32 3, label %land.lhs.true5.i
+    i32 6, label %land.lhs.true5.i37
+  ]
+
+land.lhs.true5.i:                                 ; preds = %land.lhs.true.i
+  %call.i = call i32 @memcmp(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* %tmp6, i64 4) nounwind ; <i32> [#uses=1]
+  %cmp9.i = icmp eq i32 %call.i, 0                ; <i1> [#uses=1]
+  br i1 %cmp9.i, label %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit
+
+_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit: ; preds = %land.lhs.true5.i
+  br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit
+
+land.lhs.true5.i37:                               ; preds = %land.lhs.true.i
+  %call.i35 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str1, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1]
+  %cmp9.i36 = icmp eq i32 %call.i35, 0            ; <i1> [#uses=1]
+  br i1 %cmp9.i36, label %if.then.i40, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit
+
+if.then.i40:                                      ; preds = %land.lhs.true5.i37
+  br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit
+
+_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit: ; preds = %if.then.i40, %land.lhs.true5.i37, %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit, %land.lhs.true5.i, %land.lhs.true.i
+  %retval.0.i.pre159 = phi i32 [ 1, %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre161, %land.lhs.true5.i37 ], [ 2, %if.then.i40 ], [ %retval.0.i.pre161, %land.lhs.true5.i ], [ %retval.0.i.pre161, %land.lhs.true.i ] ; <i32> [#uses=2]
+  %tmp2.i44 = phi i8 [ 1, %_ZN12StringSwitchI5ColorE4CaseILj4EEERS1_RAT__KcRKS0_.exit ], [ 0, %land.lhs.true5.i37 ], [ 1, %if.then.i40 ], [ 0, %land.lhs.true5.i ], [ 0, %land.lhs.true.i ] ; <i8> [#uses=3]
+  %tobool.i46 = icmp eq i8 %tmp2.i44, 0           ; <i1> [#uses=1]
+  %cmp.i49 = icmp eq i32 %conv.i.i, 6             ; <i1> [#uses=1]
+  %or.cond = and i1 %tobool.i46, %cmp.i49         ; <i1> [#uses=1]
+  br i1 %or.cond, label %land.lhs.true5.i55, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60
+
+land.lhs.true5.i55:                               ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit
+  %call.i53 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str2, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1]
+  %cmp9.i54 = icmp eq i32 %call.i53, 0            ; <i1> [#uses=1]
+  br i1 %cmp9.i54, label %if.then.i58, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60
+
+if.then.i58:                                      ; preds = %land.lhs.true5.i55
+  br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60
+
+_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60: ; preds = %if.then.i58, %land.lhs.true5.i55, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit
+  %retval.0.i.pre158 = phi i32 [ %retval.0.i.pre159, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre159, %land.lhs.true5.i55 ], [ 3, %if.then.i58 ] ; <i32> [#uses=2]
+  %tmp2.i63 = phi i8 [ %tmp2.i44, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit ], [ %tmp2.i44, %land.lhs.true5.i55 ], [ 1, %if.then.i58 ] ; <i8> [#uses=3]
+  %tmp14.i64 = and i8 %tmp2.i63, 1                ; <i8> [#uses=1]
+  %tobool.i65 = icmp eq i8 %tmp14.i64, 0          ; <i1> [#uses=1]
+  %cmp.i68 = icmp eq i32 %conv.i.i, 5             ; <i1> [#uses=1]
+  %or.cond168 = and i1 %tobool.i65, %cmp.i68      ; <i1> [#uses=1]
+  br i1 %or.cond168, label %land.lhs.true5.i74, label %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit
+
+land.lhs.true5.i74:                               ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60
+  %call.i72 = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8]* @.str3, i64 0, i64 0), i8* %tmp6, i64 6) nounwind ; <i32> [#uses=1]
+  %cmp9.i73 = icmp eq i32 %call.i72, 0            ; <i1> [#uses=1]
+  br i1 %cmp9.i73, label %if.then.i77, label %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit
+
+if.then.i77:                                      ; preds = %land.lhs.true5.i74
+  br label %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit
+
+_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit: ; preds = %if.then.i77, %land.lhs.true5.i74, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60
+  %retval.0.i.pre157 = phi i32 [ %retval.0.i.pre158, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 ], [ %retval.0.i.pre158, %land.lhs.true5.i74 ], [ 4, %if.then.i77 ] ; <i32> [#uses=2]
+  %tmp2.i81 = phi i8 [ %tmp2.i63, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit60 ], [ %tmp2.i63, %land.lhs.true5.i74 ], [ 1, %if.then.i77 ] ; <i8> [#uses=3]
+  %tmp14.i82 = and i8 %tmp2.i81, 1                ; <i8> [#uses=1]
+  %tobool.i83 = icmp eq i8 %tmp14.i82, 0          ; <i1> [#uses=1]
+  %cmp.i86 = icmp eq i32 %conv.i.i, 4             ; <i1> [#uses=1]
+  %or.cond169 = and i1 %tobool.i83, %cmp.i86      ; <i1> [#uses=1]
+  br i1 %or.cond169, label %land.lhs.true5.i92, label %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit
+
+land.lhs.true5.i92:                               ; preds = %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit
+  %call.i90 = call i32 @memcmp(i8* getelementptr inbounds ([5 x i8]* @.str4, i64 0, i64 0), i8* %tmp6, i64 5) nounwind ; <i32> [#uses=1]
+  %cmp9.i91 = icmp eq i32 %call.i90, 0            ; <i1> [#uses=1]
+  br i1 %cmp9.i91, label %if.then.i95, label %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit
+
+if.then.i95:                                      ; preds = %land.lhs.true5.i92
+  br label %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit
+
+_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit: ; preds = %if.then.i95, %land.lhs.true5.i92, %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit
+  %retval.0.i.pre156 = phi i32 [ %retval.0.i.pre157, %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre157, %land.lhs.true5.i92 ], [ 5, %if.then.i95 ] ; <i32> [#uses=2]
+  %tmp2.i99 = phi i8 [ %tmp2.i81, %_ZN12StringSwitchI5ColorE4CaseILj6EEERS1_RAT__KcRKS0_.exit ], [ %tmp2.i81, %land.lhs.true5.i92 ], [ 1, %if.then.i95 ] ; <i8> [#uses=3]
+  %tmp14.i100 = and i8 %tmp2.i99, 1               ; <i8> [#uses=1]
+  %tobool.i101 = icmp eq i8 %tmp14.i100, 0        ; <i1> [#uses=1]
+  %cmp.i104 = icmp eq i32 %conv.i.i, 6            ; <i1> [#uses=1]
+  %or.cond170 = and i1 %tobool.i101, %cmp.i104    ; <i1> [#uses=1]
+  br i1 %or.cond170, label %land.lhs.true5.i110, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115
+
+land.lhs.true5.i110:                              ; preds = %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit
+  %call.i108 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str5, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1]
+  %cmp9.i109 = icmp eq i32 %call.i108, 0          ; <i1> [#uses=1]
+  br i1 %cmp9.i109, label %if.then.i113, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115
+
+if.then.i113:                                     ; preds = %land.lhs.true5.i110
+  br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115
+
+_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115: ; preds = %if.then.i113, %land.lhs.true5.i110, %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit
+  %retval.0.i.pre155 = phi i32 [ %retval.0.i.pre156, %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit ], [ %retval.0.i.pre156, %land.lhs.true5.i110 ], [ 6, %if.then.i113 ] ; <i32> [#uses=2]
+  %tmp2.i118 = phi i8 [ %tmp2.i99, %_ZN12StringSwitchI5ColorE4CaseILj5EEERS1_RAT__KcRKS0_.exit ], [ %tmp2.i99, %land.lhs.true5.i110 ], [ 1, %if.then.i113 ] ; <i8> [#uses=3]
+  %tmp14.i119 = and i8 %tmp2.i118, 1              ; <i8> [#uses=1]
+  %tobool.i120 = icmp eq i8 %tmp14.i119, 0        ; <i1> [#uses=1]
+  %cmp.i123 = icmp eq i32 %conv.i.i, 6            ; <i1> [#uses=1]
+  %or.cond171 = and i1 %tobool.i120, %cmp.i123    ; <i1> [#uses=1]
+  br i1 %or.cond171, label %land.lhs.true5.i129, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134
+
+land.lhs.true5.i129:                              ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115
+  %call.i127 = call i32 @memcmp(i8* getelementptr inbounds ([7 x i8]* @.str6, i64 0, i64 0), i8* %tmp6, i64 7) nounwind ; <i32> [#uses=1]
+  %cmp9.i128 = icmp eq i32 %call.i127, 0          ; <i1> [#uses=1]
+  br i1 %cmp9.i128, label %if.then.i132, label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134
+
+if.then.i132:                                     ; preds = %land.lhs.true5.i129
+  br label %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134
+
+_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134: ; preds = %if.then.i132, %land.lhs.true5.i129, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115
+  %retval.0.i.pre = phi i32 [ %retval.0.i.pre155, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 ], [ %retval.0.i.pre155, %land.lhs.true5.i129 ], [ 7, %if.then.i132 ] ; <i32> [#uses=2]
+  %tmp2.i137 = phi i8 [ %tmp2.i118, %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit115 ], [ %tmp2.i118, %land.lhs.true5.i129 ], [ 1, %if.then.i132 ] ; <i8> [#uses=1]
+  %tmp7.i138 = and i8 %tmp2.i137, 1               ; <i8> [#uses=1]
+  %tobool.i139 = icmp eq i8 %tmp7.i138, 0         ; <i1> [#uses=1]
+  %retval.0.i = select i1 %tobool.i139, i32 0, i32 %retval.0.i.pre ; <i32> [#uses=1]
+  %call22 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([12 x i8]* @.str7, i64 0, i64 0), i32 %retval.0.i) ; <i32> [#uses=0]
+  %exitcond = icmp eq i64 %tmp146, %tmp145        ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %land.lhs.true.i
+
+for.end:                                          ; preds = %_ZN12StringSwitchI5ColorE4CaseILj7EEERS1_RAT__KcRKS0_.exit134, %entry
+  ret i32 0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+declare i32 @memcmp(i8* nocapture, i8* nocapture, i64) nounwind readonly
+
+declare i64 @strlen(i8* nocapture) nounwind readonly
diff --git a/final/test/Transforms/JumpThreading/and-and-cond.ll b/final/test/Transforms/JumpThreading/and-and-cond.ll
new file mode 100644
index 00000000000..765d940cc7c
--- /dev/null
+++ b/final/test/Transforms/JumpThreading/and-and-cond.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | FileCheck %s
+
+declare i32 @f1()
+declare i32 @f2()
+declare void @f3()
+
+define i32 @test(i1 %cond, i1 %cond2, i1 %cond3) {
+; CHECK: test
+	br i1 %cond, label %T1, label %F1
+
+; CHECK-NOT: T1:
+T1:
+	%v1 = call i32 @f1()
+	br label %Merge
+
+F1:
+	%v2 = call i32 @f2()
+	br label %Merge
+
+Merge:
+; CHECK: Merge:
+; CHECK: %v1 = call i32 @f1()
+; CHECK-NEXT: %D = and i1 %cond2, %cond3
+; CHECK-NEXT: br i1 %D
+	%A = phi i1 [true, %T1], [false, %F1]
+	%B = phi i32 [%v1, %T1], [%v2, %F1]
+	%C = and i1 %A, %cond2
+	%D = and i1 %C, %cond3
+	br i1 %D, label %T2, label %F2
+
+T2:
+	call void @f3()
+	ret i32 %B
+
+F2:
+	ret i32 %B
+}
diff --git a/final/test/Transforms/JumpThreading/and-cond.ll b/final/test/Transforms/JumpThreading/and-cond.ll
new file mode 100644
index 00000000000..0159bb3bb76
--- /dev/null
+++ b/final/test/Transforms/JumpThreading/and-cond.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -jump-threading -mem2reg -instcombine -simplifycfg  -S | FileCheck %s
+
+declare i32 @f1()
+declare i32 @f2()
+declare void @f3()
+
+define i32 @test(i1 %cond, i1 %cond2) {
+; CHECK: test
+	br i1 %cond, label %T1, label %F1
+
+; CHECK-NOT: T1
+T1:
+	%v1 = call i32 @f1()
+	br label %Merge
+
+F1:
+	%v2 = call i32 @f2()
+	br label %Merge
+
+Merge:
+; CHECK: Merge:
+; CHECK: %v1 = call i32 @f1()
+; CHECK-NEXT: br i1 %cond2
+	%A = phi i1 [true, %T1], [false, %F1]
+	%B = phi i32 [%v1, %T1], [%v2, %F1]
+	%C = and i1 %A, %cond2
+	br i1 %C, label %T2, label %F2
+
+T2:
+	call void @f3()
+	ret i32 %B
+
+F2:
+	ret i32 %B
+}
diff --git a/final/test/Transforms/JumpThreading/basic.ll b/final/test/Transforms/JumpThreading/basic.ll
new file mode 100644
index 00000000000..46271379bd0
--- /dev/null
+++ b/final/test/Transforms/JumpThreading/basic.ll
@@ -0,0 +1,478 @@
+; RUN: opt %s -jump-threading -S | FileCheck %s
+
+declare i32 @f1()
+declare i32 @f2()
+declare void @f3()
+
+define i32 @test1(i1 %cond) {
+; CHECK: @test1
+
+	br i1 %cond, label %T1, label %F1
+
+T1:
+	%v1 = call i32 @f1()
+	br label %Merge
+
+F1:
+	%v2 = call i32 @f2()
+	br label %Merge
+
+Merge:
+	%A = phi i1 [true, %T1], [false, %F1]
+	%B = phi i32 [%v1, %T1], [%v2, %F1]
+	br i1 %A, label %T2, label %F2
+
+T2:
+; CHECK: T2:
+; CHECK: ret i32 %v1
+	call void @f3()
+	ret i32 %B
+
+F2:
+; CHECK: F2:
+; CHECK: ret i32 %v2
+	ret i32 %B
+}
+
+
+;; cond is known false on Entry -> F1 edge!
+define i32 @test2(i1 %cond) {
+; CHECK: @test2
+Entry:
+	br i1 %cond, label %T1, label %F1
+
+T1:
+; CHECK: %v1 = call i32 @f1()
+; CHECK: ret i32 47
+	%v1 = call i32 @f1()
+	br label %Merge
+
+F1:
+	br i1 %cond, label %Merge, label %F2
+
+Merge:
+	%B = phi i32 [47, %T1], [192, %F1]
+	ret i32 %B
+
+F2:
+	call void @f3()
+	ret i32 12
+}
+
+
+; Undef handling.
+define i32 @test3(i1 %cond) {
+; CHECK: @test3
+; CHECK-NEXT: T1:
+; CHECK-NEXT: ret i32 42
+	br i1 undef, label %T1, label %F1
+
+T1:
+	ret i32 42
+
+F1:
+	ret i32 17
+}
+
+define i32 @test4(i1 %cond, i1 %cond2) {
+; CHECK: @test4
+
+	br i1 %cond, label %T1, label %F1
+
+T1:
+; CHECK:   %v1 = call i32 @f1()
+; CHECK-NEXT:   br label %T
+
+	%v1 = call i32 @f1()
+	br label %Merge
+
+F1:
+	%v2 = call i32 @f2()
+; CHECK:   %v2 = call i32 @f2()
+; CHECK-NEXT:   br i1 %cond2, 
+	br label %Merge
+
+Merge:
+	%A = phi i1 [undef, %T1], [%cond2, %F1]
+	%B = phi i32 [%v1, %T1], [%v2, %F1]
+	br i1 %A, label %T2, label %F2
+
+T2:
+	call void @f3()
+	ret i32 %B
+
+F2:
+	ret i32 %B
+}
+
+
+;; This tests that the branch in 'merge' can be cloned up into T1.
+define i32 @test5(i1 %cond, i1 %cond2) {
+; CHECK: @test5
+
+	br i1 %cond, label %T1, label %F1
+
+T1:
+; CHECK: T1:
+; CHECK-NEXT:   %v1 = call i32 @f1()
+; CHECK-NEXT:   %cond3 = icmp eq i32 %v1, 412
+; CHECK-NEXT:   br i1 %cond3, label %T2, label %F2
+
+	%v1 = call i32 @f1()
+        %cond3 = icmp eq i32 %v1, 412
+	br label %Merge
+
+F1:
+	%v2 = call i32 @f2()
+	br label %Merge
+
+Merge:
+	%A = phi i1 [%cond3, %T1], [%cond2, %F1]
+	%B = phi i32 [%v1, %T1], [%v2, %F1]
+	br i1 %A, label %T2, label %F2
+
+T2:
+	call void @f3()
+	ret i32 %B
+
+F2:
+	ret i32 %B
+}
+
+
+;; Lexically duplicated conditionals should be threaded.
+
+
+define i32 @test6(i32 %A) {
+; CHECK: @test6
+	%tmp455 = icmp eq i32 %A, 42
+	br i1 %tmp455, label %BB1, label %BB2
+
+; CHECK: call i32 @f2()
+; CHECK-NEXT: ret i32 3
+
+; CHECK: call i32 @f1()
+; CHECK-NOT: br
+; CHECK: call void @f3()
+; CHECK-NOT: br
+; CHECK: ret i32 4
+    
+BB2:
+	call i32 @f1()
+	br label %BB1
+        
+
+BB1:
+	%tmp459 = icmp eq i32 %A, 42
+	br i1 %tmp459, label %BB3, label %BB4
+
+BB3:
+	call i32 @f2()
+        ret i32 3
+
+BB4:
+	call void @f3()
+	ret i32 4
+}
+
+
+;; This tests that the branch in 'merge' can be cloned up into T1.
+;; rdar://7367025
+define i32 @test7(i1 %cond, i1 %cond2) {
+Entry:
+; CHECK: @test7
+	%v1 = call i32 @f1()
+	br i1 %cond, label %Merge, label %F1
+
+F1:
+	%v2 = call i32 @f2()
+	br label %Merge
+
+Merge:
+	%B = phi i32 [%v1, %Entry], [%v2, %F1]
+        %M = icmp ne i32 %B, %v1
+        %N = icmp eq i32 %B, 47
+        %O = and i1 %M, %N
+	br i1 %O, label %T2, label %F2
+
+; CHECK: Merge:
+; CHECK-NOT: phi
+; CHECK-NEXT:   %v2 = call i32 @f2()
+
+T2:
+	call void @f3()
+	ret i32 %B
+
+F2:
+	ret i32 %B
+; CHECK: F2:
+; CHECK-NEXT: phi i32
+}
+
+
+declare i1 @test8a()
+
+define i32 @test8b(i1 %cond, i1 %cond2) {
+; CHECK: @test8b
+T0:
+        %A = call i1 @test8a()
+	br i1 %A, label %T1, label %F1
+        
+; CHECK: T0:
+; CHECK-NEXT: call
+; CHECK-NEXT: br i1 %A, label %T1, label %Y
+
+T1:
+        %B = call i1 @test8a()
+	br i1 %B, label %T2, label %F1
+
+; CHECK: T1:
+; CHECK-NEXT: call
+; CHECK-NEXT: br i1 %B, label %T2, label %Y
+T2:
+        %C = call i1 @test8a()
+	br i1 %cond, label %T3, label %F1
+
+; CHECK: T2:
+; CHECK-NEXT: call
+; CHECK-NEXT: br i1 %cond, label %T3, label %Y
+T3:
+        ret i32 0
+
+F1:
+        %D = phi i32 [0, %T0], [0, %T1], [1, %T2]
+        %E = icmp eq i32 %D, 1
+        %F = and i1 %E, %cond
+	br i1 %F, label %X, label %Y
+X:
+        call i1 @test8a()
+        ret i32 1
+Y:
+        ret i32 2
+}
+
+
+;;; Verify that we can handle constraint propagation through "xor x, 1".
+define i32 @test9(i1 %cond, i1 %cond2) {
+Entry:
+; CHECK: @test9
+	%v1 = call i32 @f1()
+	br i1 %cond, label %Merge, label %F1
+
+; CHECK: Entry:
+; CHECK-NEXT:  %v1 = call i32 @f1()
+; CHECK-NEXT:  br i1 %cond, label %F2, label %Merge
+
+F1:
+	%v2 = call i32 @f2()
+	br label %Merge
+
+Merge:
+	%B = phi i32 [%v1, %Entry], [%v2, %F1]
+        %M = icmp eq i32 %B, %v1
+        %M1 = xor i1 %M, 1
+        %N = icmp eq i32 %B, 47
+        %O = and i1 %M1, %N
+	br i1 %O, label %T2, label %F2
+
+; CHECK: Merge:
+; CHECK-NOT: phi
+; CHECK-NEXT:   %v2 = call i32 @f2()
+
+T2:
+	%Q = zext i1 %M to i32
+	ret i32 %Q
+
+F2:
+	ret i32 %B
+; CHECK: F2:
+; CHECK-NEXT: phi i32
+}
+
+
+
+; CHECK: @test10
+declare i32 @test10f1()
+declare i32 @test10f2()
+declare void @test10f3()
+
+;; Non-local condition threading.
+define i32 @test10g(i1 %cond) {
+; CHECK: @test10g
+; CHECK-NEXT:   br i1 %cond, label %T2, label %F2
+        br i1 %cond, label %T1, label %F1
+
+T1:
+        %v1 = call i32 @test10f1()
+        br label %Merge
+        
+; CHECK: %v1 = call i32 @test10f1()
+; CHECK-NEXT: call void @f3()
+; CHECK-NEXT: ret i32 %v1
+
+F1:
+        %v2 = call i32 @test10f2()
+        br label %Merge
+
+Merge:
+        %B = phi i32 [%v1, %T1], [%v2, %F1]
+        br i1 %cond, label %T2, label %F2
+
+T2:
+        call void @f3()
+        ret i32 %B
+
+F2:
+        ret i32 %B
+}
+
+
+; Impossible conditional constraints should get threaded.  BB3 is dead here.
+define i32 @test11(i32 %A) {
+; CHECK: @test11
+; CHECK-NEXT: icmp
+; CHECK-NEXT: br i1 %tmp455, label %BB4, label %BB2
+	%tmp455 = icmp eq i32 %A, 42
+	br i1 %tmp455, label %BB1, label %BB2
+        
+BB2:
+; CHECK: call i32 @f1()
+; CHECK-NEXT: ret i32 %C
+	%C = call i32 @f1()
+	ret i32 %C
+        
+
+BB1:
+	%tmp459 = icmp eq i32 %A, 43
+	br i1 %tmp459, label %BB3, label %BB4
+
+BB3:
+	call i32 @f2()
+        ret i32 3
+
+BB4:
+	call void @f3()
+	ret i32 4
+}
+
+;; Correlated value through boolean expression.  GCC PR18046.
+define void @test12(i32 %A) {
+; CHECK: @test12
+entry:
+  %cond = icmp eq i32 %A, 0
+  br i1 %cond, label %bb, label %bb1
+; Should branch to the return block instead of through BB1.
+; CHECK: entry:
+; CHECK-NEXT: %cond = icmp eq i32 %A, 0
+; CHECK-NEXT: br i1 %cond, label %bb1, label %return
+
+bb:                   
+  %B = call i32 @test10f2()
+  br label %bb1
+
+bb1:
+  %C = phi i32 [ %A, %entry ], [ %B, %bb ]
+  %cond4 = icmp eq i32 %C, 0
+  br i1 %cond4, label %bb2, label %return
+
+; CHECK: bb1:
+; CHECK-NEXT: %B = call i32 @test10f2()
+; CHECK-NEXT: %cond4 = icmp eq i32 %B, 0
+; CHECK-NEXT: br i1 %cond4, label %bb2, label %return
+
+bb2:
+  %D = call i32 @test10f2()
+  ret void
+
+return:
+  ret void
+}
+
+
+;; Duplicate condition to avoid xor of cond.
+;; rdar://7391699
+define i32 @test13(i1 %cond, i1 %cond2) {
+Entry:
+; CHECK: @test13
+	%v1 = call i32 @f1()
+	br i1 %cond, label %Merge, label %F1
+
+F1:
+	br label %Merge
+
+Merge:
+	%B = phi i1 [true, %Entry], [%cond2, %F1]
+        %C = phi i32 [192, %Entry], [%v1, %F1]
+        %M = icmp eq i32 %C, 192
+        %N = xor i1 %B, %M
+	br i1 %N, label %T2, label %F2
+
+T2:
+	ret i32 123
+
+F2:
+	ret i32 %v1
+        
+; CHECK:   br i1 %cond, label %F2, label %Merge
+
+; CHECK:      Merge:
+; CHECK-NEXT:   %M = icmp eq i32 %v1, 192
+; CHECK-NEXT:   %N = xor i1 %cond2, %M
+; CHECK-NEXT:   br i1 %N, label %T2, label %F2
+}
+
+; CHECK: @test14
+define i32 @test14(i32 %in) {
+entry:
+	%A = icmp eq i32 %in, 0
+; CHECK: br i1 %A, label %right_ret, label %merge
+  br i1 %A, label %left, label %right
+
+; CHECK-NOT: left:
+left:
+	br label %merge
+
+; CHECK-NOT: right:
+right:
+  %B = call i32 @f1()
+	br label %merge
+
+merge:
+; CHECK-NOT: %C = phi i32 [%in, %left], [%B, %right]
+	%C = phi i32 [%in, %left], [%B, %right]
+	%D = add i32 %C, 1
+	%E = icmp eq i32 %D, 2
+	br i1 %E, label %left_ret, label %right_ret
+
+; CHECK: left_ret:
+left_ret:
+	ret i32 0
+
+right_ret:
+	ret i32 1
+}
+
+; PR5652
+; CHECK: @test15
+define i32 @test15(i32 %len) {
+entry:
+; CHECK: icmp ult i32 %len, 13
+  %tmp = icmp ult i32 %len, 13
+  br i1 %tmp, label %check, label %exit0
+
+exit0:
+  ret i32 0
+
+check:
+  %tmp9 = icmp ult i32 %len, 21
+  br i1 %tmp9, label %exit1, label %exit2
+
+exit2:
+; CHECK-NOT: ret i32 2
+  ret i32 2
+
+exit1:
+  ret i32 1
+; CHECK: }
+}
+
diff --git a/final/test/Transforms/JumpThreading/branch-no-const.ll b/final/test/Transforms/JumpThreading/branch-no-const.ll
new file mode 100644
index 00000000000..16867b07c2e
--- /dev/null
+++ b/final/test/Transforms/JumpThreading/branch-no-const.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -jump-threading -S | not grep phi
+
+declare i8 @mcguffin()
+
+define i32 @test(i1 %foo, i8 %b) {
+entry:
+  %a = call i8 @mcguffin()
+  br i1 %foo, label %bb1, label %bb2
+bb1:
+  br label %jt
+bb2:
+  br label %jt
+jt:
+  %x = phi i8 [%a, %bb1], [%b, %bb2]
+  %A = icmp eq i8 %x, %a
+  br i1 %A, label %rt, label %rf
+rt:
+  ret i32 7
+rf:
+  ret i32 8
+}
diff --git a/final/test/Transforms/JumpThreading/compare.ll b/final/test/Transforms/JumpThreading/compare.ll
new file mode 100644
index 00000000000..581785c45f5
--- /dev/null
+++ b/final/test/Transforms/JumpThreading/compare.ll
@@ -0,0 +1,30 @@
+; There should be no phi nodes left.
+; RUN: opt < %s -jump-threading  -S | not grep {phi i32}
+
+declare i32 @f1()
+declare i32 @f2()
+declare void @f3()
+
+define i32 @test(i1 %cond) {
+	br i1 %cond, label %T1, label %F1
+
+T1:
+	%v1 = call i32 @f1()
+	br label %Merge
+
+F1:
+	%v2 = call i32 @f2()
+	br label %Merge
+
+Merge:
+	%B = phi i32 [%v1, %T1], [12, %F1]
+	%A = icmp ne i32 %B, 42
+	br i1 %A, label %T2, label %F2
+
+T2:
+	call void @f3()
+	ret i32 1
+
+F2:
+	ret i32 0
+}
diff --git a/final/test/Transforms/JumpThreading/crash.ll b/final/test/Transforms/JumpThreading/crash.ll
new file mode 100644
index 00000000000..aed51a1c9ee
--- /dev/null
+++ b/final/test/Transforms/JumpThreading/crash.ll
@@ -0,0 +1,513 @@
+; RUN: opt < %s -jump-threading -disable-output
+; PR2285
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.system__secondary_stack__mark_id = type { i64, i64 }
+
+define void @_ada_c35507b() {
+entry:
+	br label %bb
+
+bb:		; preds = %bb13, %entry
+	%ch.0 = phi i8 [ 0, %entry ], [ 0, %bb13 ]		; <i8> [#uses=1]
+	%tmp11 = icmp ugt i8 %ch.0, 31		; <i1> [#uses=1]
+	%tmp120 = call %struct.system__secondary_stack__mark_id @system__secondary_stack__ss_mark( )		; <%struct.system__secondary_stack__mark_id> [#uses=1]
+	br i1 %tmp11, label %bb110, label %bb13
+
+bb13:		; preds = %bb
+	br label %bb
+
+bb110:		; preds = %bb
+	%mrv_gr124 = getresult %struct.system__secondary_stack__mark_id %tmp120, 1		; <i64> [#uses=0]
+	unreachable
+}
+
+declare %struct.system__secondary_stack__mark_id @system__secondary_stack__ss_mark()
+
+
+
+define fastcc void @findratio(double* nocapture %res1, double* nocapture %res2) nounwind ssp {
+entry:
+  br label %bb12
+
+bb6.us:                                        
+  %tmp = icmp eq i32 undef, undef              
+  %tmp1 = fsub double undef, undef             
+  %tmp2 = fcmp ult double %tmp1, 0.000000e+00  
+  br i1 %tmp, label %bb6.us, label %bb13
+
+
+bb12:                                            
+  %tmp3 = fcmp ult double undef, 0.000000e+00  
+  br label %bb13
+
+bb13:                                            
+  %.lcssa31 = phi double [ undef, %bb12 ], [ %tmp1, %bb6.us ]
+  %.lcssa30 = phi i1 [ %tmp3, %bb12 ], [ %tmp2, %bb6.us ] 
+  br i1 %.lcssa30, label %bb15, label %bb61
+
+bb15:                                            
+  %tmp4 = fsub double -0.000000e+00, %.lcssa31   
+  ret void
+
+
+bb61:                                            
+  ret void
+}
+
+
+; PR5258
+define i32 @test(i1 %cond, i1 %cond2, i32 %a) {
+A:
+  br i1 %cond, label %F, label %A1
+F:
+  br label %A1
+
+A1:  
+  %d = phi i1 [false, %A], [true, %F]
+  %e = add i32 %a, %a
+  br i1 %d, label %B, label %G
+  
+G:
+  br i1 %cond2, label %B, label %D
+  
+B:
+  %f = phi i32 [%e, %G], [%e, %A1]
+  %b = add i32 0, 0
+  switch i32 %a, label %C [
+    i32 7, label %D
+    i32 8, label %D
+    i32 9, label %D
+  ]
+
+C:
+  br label %D
+  
+D:
+  %c = phi i32 [%e, %B], [%e, %B], [%e, %B], [%f, %C], [%e, %G]
+  ret i32 %c
+E:
+  ret i32 412
+}
+
+
+define i32 @test2() nounwind {
+entry:
+        br i1 true, label %decDivideOp.exit, label %bb7.i
+
+bb7.i:          ; preds = %bb7.i, %entry
+        br label %bb7.i
+
+decDivideOp.exit:               ; preds = %entry
+        ret i32 undef
+}
+
+
+; PR3298
+
+define i32 @test3(i32 %p_79, i32 %p_80) nounwind {
+entry:
+	br label %bb7
+
+bb1:		; preds = %bb2
+	br label %bb2
+
+bb2:		; preds = %bb7, %bb1
+	%l_82.0 = phi i8 [ 0, %bb1 ], [ %l_82.1, %bb7 ]		; <i8> [#uses=3]
+	br i1 true, label %bb3, label %bb1
+
+bb3:		; preds = %bb2
+	%0 = icmp eq i32 %p_80_addr.1, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb7, label %bb6
+
+bb5:		; preds = %bb6
+	%1 = icmp eq i8 %l_82.0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb1.i, label %bb.i
+
+bb.i:		; preds = %bb5
+	br label %safe_div_func_char_s_s.exit
+
+bb1.i:		; preds = %bb5
+	br label %safe_div_func_char_s_s.exit
+
+safe_div_func_char_s_s.exit:		; preds = %bb1.i, %bb.i
+	br label %bb6
+
+bb6:		; preds = %safe_div_func_char_s_s.exit, %bb3
+	%p_80_addr.0 = phi i32 [ %p_80_addr.1, %bb3 ], [ 1, %safe_div_func_char_s_s.exit ]		; <i32> [#uses=2]
+	%2 = icmp eq i32 %p_80_addr.0, 0		; <i1> [#uses=1]
+	br i1 %2, label %bb7, label %bb5
+
+bb7:		; preds = %bb6, %bb3, %entry
+	%l_82.1 = phi i8 [ 1, %entry ], [ %l_82.0, %bb3 ], [ %l_82.0, %bb6 ]		; <i8> [#uses=2]
+	%p_80_addr.1 = phi i32 [ 0, %entry ], [ %p_80_addr.1, %bb3 ], [ %p_80_addr.0, %bb6 ]		; <i32> [#uses=4]
+	%3 = icmp eq i32 %p_80_addr.1, 0		; <i1> [#uses=1]
+	br i1 %3, label %bb8, label %bb2
+
+bb8:		; preds = %bb7
+	%4 = sext i8 %l_82.1 to i32		; <i32> [#uses=0]
+	ret i32 0
+}
+
+
+; PR3353
+
+define i32 @test4(i8 %X) {
+entry:
+        %Y = add i8 %X, 1
+        %Z = add i8 %Y, 1
+        br label %bb33.i
+
+bb33.i:         ; preds = %bb33.i, %bb32.i
+        switch i8 %Y, label %bb32.i [
+                i8 39, label %bb35.split.i
+                i8 13, label %bb33.i
+        ]
+
+bb35.split.i:
+        ret i32 5
+bb32.i:
+        ret i32 1
+}
+
+
+define fastcc void @test5(i1 %tmp, i32 %tmp1) nounwind ssp {
+entry:
+  br i1 %tmp, label %bb12, label %bb13
+
+
+bb12:                                            
+  br label %bb13
+
+bb13:                                            
+  %.lcssa31 = phi i32 [ undef, %bb12 ], [ %tmp1, %entry ]
+  %A = and i1 undef, undef
+  br i1 %A, label %bb15, label %bb61
+
+bb15:                                            
+  ret void
+
+
+bb61:                                            
+  ret void
+}
+
+
+; PR5640
+define fastcc void @test6(i1 %tmp, i1 %tmp1) nounwind ssp {
+entry:
+  br i1 %tmp, label %bb12, label %bb14
+
+bb12:           
+  br label %bb14
+
+bb14:           
+  %A = phi i1 [ %A, %bb13 ],  [ true, %bb12 ], [%tmp1, %entry]
+  br label %bb13
+
+bb13:                                            
+  br i1 %A, label %bb14, label %bb61
+
+
+bb61:                                            
+  ret void
+}
+
+
+; PR5698
+define void @test7(i32 %x) {
+entry:
+  br label %tailrecurse
+
+tailrecurse:
+  switch i32 %x, label %return [
+    i32 2, label %bb2
+    i32 3, label %bb
+  ]
+
+bb:         
+  switch i32 %x, label %return [
+    i32 2, label %bb2
+    i32 3, label %tailrecurse
+  ]
+
+bb2:        
+  ret void
+
+return:     
+  ret void
+}
+
+; PR6119
+define i32 @test8(i32 %action) nounwind {
+entry:
+  switch i32 %action, label %lor.rhs [
+    i32 1, label %if.then
+    i32 0, label %lor.end
+  ]
+
+if.then:                                          ; preds = %for.cond, %lor.end, %entry
+  ret i32 undef
+
+lor.rhs:                                          ; preds = %entry
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %entry
+  %cmp103 = xor i1 undef, undef                   ; <i1> [#uses=1]
+  br i1 %cmp103, label %for.cond, label %if.then
+
+for.cond:                                         ; preds = %for.body, %lor.end
+  br i1 undef, label %if.then, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond
+}
+
+; PR6119
+define i32 @test9(i32 %action) nounwind {
+entry:
+  switch i32 %action, label %lor.rhs [
+    i32 1, label %if.then
+    i32 0, label %lor.end
+  ]
+
+if.then:                                          ; preds = %for.cond, %lor.end, %entry
+  ret i32 undef
+
+lor.rhs:                                          ; preds = %entry
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %entry
+  %0 = phi i1 [ undef, %lor.rhs ], [ true, %entry ] ; <i1> [#uses=1]
+  %cmp103 = xor i1 undef, %0                      ; <i1> [#uses=1]
+  br i1 %cmp103, label %for.cond, label %if.then
+
+for.cond:                                         ; preds = %for.body, %lor.end
+  br i1 undef, label %if.then, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond
+}
+
+; PR6119
+define i32 @test10(i32 %action, i32 %type) nounwind {
+entry:
+  %cmp2 = icmp eq i32 %type, 0                    ; <i1> [#uses=1]
+  switch i32 %action, label %lor.rhs [
+    i32 1, label %if.then
+    i32 0, label %lor.end
+  ]
+
+if.then:                                          ; preds = %for.cond, %lor.end, %entry
+  ret i32 undef
+
+lor.rhs:                                          ; preds = %entry
+  %cmp101 = icmp eq i32 %action, 2                ; <i1> [#uses=1]
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %entry
+  %0 = phi i1 [ %cmp101, %lor.rhs ], [ true, %entry ] ; <i1> [#uses=1]
+  %cmp103 = xor i1 %cmp2, %0                      ; <i1> [#uses=1]
+  br i1 %cmp103, label %for.cond, label %if.then
+
+for.cond:                                         ; preds = %for.body, %lor.end
+  br i1 undef, label %if.then, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond
+}
+
+
+; PR6305
+define void @test11() nounwind {
+entry:
+  br label %A
+
+A:                                             ; preds = %entry
+  call void undef(i64 ptrtoint (i8* blockaddress(@test11, %A) to i64)) nounwind
+  unreachable
+}
+
+; PR6743
+define void @test12() nounwind ssp {
+entry:
+  br label %lbl_51
+
+lbl_51:                                           ; preds = %if.then, %entry
+  %tmp3 = phi i1 [ false, %if.then ], [ undef, %entry ] ; <i1> [#uses=2]
+  br i1 %tmp3, label %if.end12, label %if.then
+
+if.then:                                          ; preds = %lbl_51
+  br i1 %tmp3, label %lbl_51, label %if.end12
+
+if.end12:                                         ; preds = %if.then, %lbl_51
+  ret void
+}
+
+
+
+; PR7356
+define i32 @test13(i32* %P, i8* %Ptr) {
+entry:
+  indirectbr i8* %Ptr, [label %BrBlock, label %B2]
+  
+B2:
+  store i32 4, i32 *%P
+  br label %BrBlock
+
+BrBlock:
+  %L = load i32* %P
+  %C = icmp eq i32 %L, 42
+  br i1 %C, label %T, label %F
+  
+T:
+  ret i32 123
+F:
+  ret i32 1422
+}
+
+
+; PR7498
+define void @test14() nounwind {
+entry:
+  %cmp33 = icmp slt i8 undef, 0                   ; <i1> [#uses=1]
+  %tobool = icmp eq i8 undef, 0                   ; <i1> [#uses=1]
+  br i1 %tobool, label %land.end69, label %land.rhs
+
+land.rhs:                                         ; preds = %entry
+  br label %land.end69
+
+land.end69:                                       ; preds = %land.rhs, %entry
+  %0 = phi i1 [ undef, %land.rhs ], [ true, %entry ] ; <i1> [#uses=1]
+  %cmp71 = or i1 true, %0                         ; <i1> [#uses=1]
+  %cmp73 = xor i1 %cmp33, %cmp71                  ; <i1> [#uses=1]
+  br i1 %cmp73, label %if.then, label %if.end
+
+if.then:                                          ; preds = %land.end69
+  ret void
+
+if.end:                                           ; preds = %land.end69
+  ret void
+}
+
+; PR7647
+define void @test15() nounwind {
+entry:
+  ret void
+  
+if.then237:
+  br label %lbl_664
+
+lbl_596:                                          ; preds = %lbl_664, %for.end37
+  volatile store i64 undef, i64* undef, align 4
+  br label %for.cond111
+
+for.cond111:                                      ; preds = %safe_sub_func_int64_t_s_s.exit, %lbl_596
+  %storemerge = phi i8 [ undef, %cond.true.i100 ], [ 22, %lbl_596 ] ; <i8> [#uses=1]
+  %l_678.5 = phi i64 [ %l_678.3, %cond.true.i100 ], [ undef, %lbl_596 ] ; <i64> [#uses=2]
+  %cmp114 = icmp slt i8 %storemerge, -2           ; <i1> [#uses=1]
+  br i1 %cmp114, label %lbl_664, label %if.end949
+
+lbl_664:                                          ; preds = %for.end1058, %if.then237, %for.cond111
+  %l_678.3 = phi i64 [ %l_678.5, %for.cond111 ], [ %l_678.2, %for.cond1035 ], [ 5, %if.then237 ] ; <i64> [#uses=1]
+  %tobool118 = icmp eq i32 undef, 0               ; <i1> [#uses=1]
+  br i1 %tobool118, label %cond.true.i100, label %lbl_596
+
+cond.true.i100:                                   ; preds = %for.inc120
+  br label %for.cond111
+
+lbl_709:
+  br label %if.end949
+  
+for.cond603:                                      ; preds = %for.body607, %if.end336
+  br i1 undef, label %for.cond603, label %if.end949
+
+if.end949:                                        ; preds = %for.cond603, %lbl_709, %for.cond111
+  %l_678.2 = phi i64 [ %l_678.5, %for.cond111 ], [ undef, %lbl_709 ], [ 5, %for.cond603 ] ; <i64> [#uses=1]
+  br label %for.body1016
+
+for.body1016:                                     ; preds = %for.cond1012
+  br label %for.body1016
+
+for.cond1035:                                     ; preds = %for.inc1055, %if.then1026
+  br i1 undef, label %for.cond1040, label %lbl_664
+
+for.cond1040:                                     ; preds = %for.body1044, %for.cond1035
+  ret void
+}
+
+; PR7755
+define void @test16(i1 %c, i1 %c2, i1 %c3, i1 %c4) nounwind ssp {
+entry:
+  %cmp = icmp sgt i32 undef, 1                    ; <i1> [#uses=1]
+  br i1 %c, label %land.end, label %land.rhs
+
+land.rhs:                                         ; preds = %entry
+  br i1 %c2, label %lor.lhs.false.i, label %land.end
+
+lor.lhs.false.i:                                  ; preds = %land.rhs
+  br i1 %c3, label %land.end, label %land.end
+
+land.end:                            
+  %0 = phi i1 [ true, %entry ], [ false, %land.rhs ], [false, %lor.lhs.false.i], [false, %lor.lhs.false.i] ; <i1> [#uses=1]
+  %cmp12 = and i1 %cmp, %0 
+  %xor1 = xor i1 %cmp12, %c4
+  br i1 %xor1, label %if.then, label %if.end
+
+if.then:                      
+  ret void
+
+if.end:                       
+  ret void
+}
+
+define void @test17() {
+entry:
+  br i1 undef, label %bb269.us.us, label %bb269.us.us.us
+
+bb269.us.us.us:
+  %indvar = phi i64 [ %indvar.next, %bb287.us.us.us ], [ 0, %entry ]
+  %0 = icmp eq i16 undef, 0
+  br i1 %0, label %bb287.us.us.us, label %bb286.us.us.us
+
+bb287.us.us.us:
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 4
+  br i1 %exitcond, label %bb288.bb289.loopexit_crit_edge, label %bb269.us.us.us
+
+bb286.us.us.us:
+  unreachable
+
+bb269.us.us:
+	unreachable
+
+bb288.bb289.loopexit_crit_edge:
+  unreachable
+}
+
+; PR 8247
+%struct.S1 = type { i8, i8 }
+@func_89.l_245 = internal constant %struct.S1 { i8 33, i8 6 }, align 1
+define void @func_89(i16 zeroext %p_90, %struct.S1* nocapture %p_91, i32* nocapture %p_92) nounwind ssp {
+entry:
+  store i32 0, i32* %p_92, align 4
+  br i1 false, label %lbl_260, label %if.else
+
+if.else:                                          ; preds = %entry
+  br label %for.cond
+
+for.cond:                                         ; preds = %lbl_260, %if.else
+  %l_245.0 = phi i16 [ %l_245.1, %lbl_260 ], [ 33, %if.else ]
+  %l_261.0 = phi i32 [ %and, %lbl_260 ], [ 255, %if.else ]
+  %tobool21 = icmp ult i16 %l_245.0, 256
+  br i1 %tobool21, label %if.end, label %lbl_260
+
+lbl_260:                                          ; preds = %for.cond, %entry
+  %l_245.1 = phi i16 [ 1569, %entry ], [ %l_245.0, %for.cond ]
+  %l_261.1 = phi i32 [ 255, %entry ], [ %l_261.0, %for.cond ]
+  %and = and i32 %l_261.1, 1
+  br label %for.cond
+
+if.end:                                           ; preds = %for.cond
+  ret void
+}
diff --git a/final/test/Transforms/JumpThreading/degenerate-phi.ll b/final/test/Transforms/JumpThreading/degenerate-phi.ll
new file mode 100644
index 00000000000..35d9fdec428
--- /dev/null
+++ b/final/test/Transforms/JumpThreading/degenerate-phi.ll
@@ -0,0 +1,24 @@
+; RUN: opt -jump-threading -disable-output %s
+; PR9112
+
+; This is actually a test for value tracking. Jump threading produces
+; "%phi = phi i16" when it removes all edges leading to %unreachable.
+; The .ll parser won't let us write that directly since it's invalid code.
+
+define void @func() nounwind {
+entry:
+  br label %bb
+
+bb:
+  br label %bb
+
+unreachable:
+  %phi = phi i16 [ %add, %unreachable ], [ 0, %next ]
+  %add = add i16 0, %phi
+  %cmp = icmp slt i16 %phi, 0
+  br i1 %cmp, label %unreachable, label %next
+
+next:
+  br label %unreachable
+}
+
diff --git a/final/test/Transforms/JumpThreading/dg.exp b/final/test/Transforms/JumpThreading/dg.exp
new file mode 100644
index 00000000000..de42dad163f
--- /dev/null
+++ b/final/test/Transforms/JumpThreading/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.ll]]
diff --git a/final/test/Transforms/JumpThreading/indirectbr.ll b/final/test/Transforms/JumpThreading/indirectbr.ll
new file mode 100644
index 00000000000..141277fec62
--- /dev/null
+++ b/final/test/Transforms/JumpThreading/indirectbr.ll
@@ -0,0 +1,94 @@
+; RUN: opt -S < %s -jump-threading | FileCheck %s
+
+; Keep block addresses alive.
+@addresses = constant [4 x i8*] [
+  i8* blockaddress(@test1, %L1), i8* blockaddress(@test1, %L2),
+  i8* blockaddress(@test2, %L1), i8* blockaddress(@test2, %L2)
+]
+
+declare void @bar()
+declare void @baz()
+
+
+
+; Check basic jump threading for indirectbr instructions.
+
+; CHECK: void @test1
+; CHECK: br i1 %tobool, label %L1, label %indirectgoto
+; CHECK-NOT: if.else:
+; CHECK: L1:
+; CHECK: indirectbr i8* %address, [label %L1, label %L2]
+define void @test1(i32 %i, i8* %address) nounwind {
+entry:
+  %rem = srem i32 %i, 2
+  %tobool = icmp ne i32 %rem, 0
+  br i1 %tobool, label %indirectgoto, label %if.else
+
+if.else:                                          ; preds = %entry
+  br label %indirectgoto
+
+L1:                                               ; preds = %indirectgoto
+  call void @bar()
+  ret void
+
+L2:                                               ; preds = %indirectgoto
+  call void @baz()
+  ret void
+
+indirectgoto:                                     ; preds = %if.else, %entry
+  %indirect.goto.dest = phi i8* [ %address, %if.else ], [ blockaddress(@test1, %L1), %entry ]
+  indirectbr i8* %indirect.goto.dest, [label %L1, label %L2]
+}
+
+
+; Check constant folding of indirectbr
+
+; CHECK: void @test2
+; CHECK: entry:
+; CHECK-NEXT: br label %L1
+; CHECK: L1:
+; CHECK-NEXT: call void @bar
+; CHECK-NEXT: ret void
+define void @test2() nounwind {
+entry:
+  indirectbr i8* blockaddress(@test2, %L1), [label %L1, label %L2]
+
+L1:                                               ; preds = %indirectgoto
+  call void @bar()
+  ret void
+
+L2:                                               ; preds = %indirectgoto
+  call void @baz()
+  ret void
+}
+
+
+; PR4151
+; Don't merge address-taken blocks.
+@.str = private unnamed_addr constant [4 x i8] c"%p\0A\00"
+
+; CHECK: @test3
+; CHECK: __here:
+; CHECK: blockaddress(@test3, %__here)
+; CHECK: __here1:
+; CHECK: blockaddress(@test3, %__here1)
+; CHECK: __here3:
+; CHECK: blockaddress(@test3, %__here3)
+define void @test3() nounwind ssp noredzone {
+entry:
+  br label %__here
+
+__here:                                           ; preds = %entry
+  %call = call i32 (...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i64 ptrtoint (i8* blockaddress(@test3, %__here) to i64)) nounwind noredzone
+  br label %__here1
+
+__here1:                                          ; preds = %__here
+  %call2 = call i32 (...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i64 ptrtoint (i8* blockaddress(@test3, %__here1) to i64)) nounwind noredzone
+  br label %__here3
+
+__here3:                                          ; preds = %__here1
+  %call4 = call i32 (...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i64 ptrtoint (i8* blockaddress(@test3, %__here3) to i64)) nounwind noredzone
+  ret void
+}
+
+declare i32 @printf(...) noredzone
diff --git a/final/test/Transforms/JumpThreading/lvi-load.ll b/final/test/Transforms/JumpThreading/lvi-load.ll
new file mode 100644
index 00000000000..2a4cf925102
--- /dev/null
+++ b/final/test/Transforms/JumpThreading/lvi-load.ll
@@ -0,0 +1,49 @@
+; RUN: opt -S -jump-threading -dce < %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.4"
+
+%"struct.llvm::PATypeHolder" = type { %"struct.llvm::Type"* }
+%"struct.llvm::PointerIntPair<llvm::Use**,2u,llvm::Use::PrevPtrTag,llvm::PointerLikeTypeTraits<llvm::Use**> >" = type { i64 }
+%"struct.llvm::Type" = type opaque
+%"struct.llvm::Use" = type { %"struct.llvm::Value"*, %"struct.llvm::Use"*, %"struct.llvm::PointerIntPair<llvm::Use**,2u,llvm::Use::PrevPtrTag,llvm::PointerLikeTypeTraits<llvm::Use**> >" }
+%"struct.llvm::Value" = type { i32 (...)**, i8, i8, i16, %"struct.llvm::PATypeHolder", %"struct.llvm::Use"*, %"struct.llvm::ValueName"* }
+%"struct.llvm::ValueName" = type opaque
+
+@_ZZN4llvm4castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_E8__func__ = internal constant [5 x i8] c"cast\00", align 8 ; <[5 x i8]*> [#uses=1]
+@.str = private constant [31 x i8] c"include/llvm/Support/Casting.h\00", align 8 ; <[31 x i8]*> [#uses=1]
+@.str1 = private constant [59 x i8] c"isa<X>(Val) && \22cast<Ty>() argument of incompatible type!\22\00", align 8 ; <[59 x i8]*> [#uses=1]
+
+; CHECK: Z3fooPN4llvm5ValueE
+define zeroext i8 @_Z3fooPN4llvm5ValueE(%"struct.llvm::Value"* %V) ssp {
+entry:
+  %0 = getelementptr inbounds %"struct.llvm::Value"* %V, i64 0, i32 1 ; <i8*> [#uses=1]
+  %1 = load i8* %0, align 8                       ; <i8> [#uses=2]
+  %2 = icmp ugt i8 %1, 20                         ; <i1> [#uses=1]
+  br i1 %2, label %bb.i, label %bb2
+
+bb.i:                                             ; preds = %entry
+  %toBoolnot.i.i = icmp ult i8 %1, 21             ; <i1> [#uses=1]
+  br i1 %toBoolnot.i.i, label %bb6.i.i, label %_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit
+
+; CHECK-NOT: assert
+bb6.i.i:                                          ; preds = %bb.i
+  tail call void @__assert_rtn(i8* getelementptr inbounds ([5 x i8]* @_ZZN4llvm4castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_E8__func__, i64 0, i64 0), i8* getelementptr inbounds ([31 x i8]* @.str, i64 0, i64 0), i32 202, i8* getelementptr inbounds ([59 x i8]* @.str1, i64 0, i64 0)) noreturn
+  unreachable
+
+_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit: ; preds = %bb.i
+; CHECK-NOT: null
+  %3 = icmp eq %"struct.llvm::Value"* %V, null    ; <i1> [#uses=1]
+  br i1 %3, label %bb2, label %bb
+
+bb:                                               ; preds = %_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit
+  tail call void @_ZNK4llvm5Value4dumpEv(%"struct.llvm::Value"* %V)
+; CHECK: ret
+  ret i8 1
+
+bb2:                                              ; preds = %entry, %_ZN4llvm8dyn_castINS_11InstructionEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit
+  ret i8 0
+}
+
+declare void @__assert_rtn(i8*, i8*, i32, i8*) noreturn
+
+declare void @_ZNK4llvm5Value4dumpEv(%"struct.llvm::Value"*)
diff --git a/final/test/Transforms/JumpThreading/no-irreducible-loops.ll b/final/test/Transforms/JumpThreading/no-irreducible-loops.ll
new file mode 100644
index 00000000000..97276b039aa
--- /dev/null
+++ b/final/test/Transforms/JumpThreading/no-irreducible-loops.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -jump-threading -loop-rotate -instcombine -indvars -loop-unroll -simplifycfg -S -verify-dom-info -verify-loop-info > %t
+; RUN: grep {volatile store} %t | count 3
+; RUN: not grep {br label} %t
+
+; Jump threading should not prevent this loop from being unrolled.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+@v1 = external global i32		; <i32*> [#uses=2]
+
+define i32 @unroll() nounwind {
+entry:
+	br label %bb4
+
+bb:		; preds = %bb4
+	%0 = icmp eq i32 %i.0, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb1, label %bb2
+
+bb1:		; preds = %bb
+	volatile store i32 1000, i32* @v1, align 4
+	br label %bb3
+
+bb2:		; preds = %bb
+	volatile store i32 1001, i32* @v1, align 4
+	br label %bb3
+
+bb3:		; preds = %bb2, %bb1
+	%1 = add i32 %i.0, 1		; <i32> [#uses=1]
+	br label %bb4
+
+bb4:		; preds = %bb3, %entry
+	%i.0 = phi i32 [ 0, %entry ], [ %1, %bb3 ]		; <i32> [#uses=3]
+	%2 = icmp sgt i32 %i.0, 2		; <i1> [#uses=1]
+	br i1 %2, label %bb5, label %bb
+
+bb5:		; preds = %bb4
+	ret i32 0
+}
diff --git a/final/test/Transforms/JumpThreading/or-undef.ll b/final/test/Transforms/JumpThreading/or-undef.ll
new file mode 100644
index 00000000000..6e359925b6c
--- /dev/null
+++ b/final/test/Transforms/JumpThreading/or-undef.ll
@@ -0,0 +1,69 @@
+; RUN: opt -jump-threading -S %s | FileCheck %s
+; rdar://7620633
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0"
+
+define void @test1(i8* %args, i32 %from_tty) nounwind optsize ssp {
+entry:
+  %tmp = call i8* @f3(void (i8*)* null, i8* null) nounwind ; <i8*> [#uses=1]
+  %tmp1 = icmp eq i8* %args, null                 ; <i1> [#uses=1]
+  br i1 %tmp1, label %bb2, label %bb
+
+; CHECK: entry:
+; CHECK-NEXT: %tmp = call i8* @f3
+; CHECK-NEXT: %tmp1 = icmp eq i8* %args, null
+; CHECK-NEXT: br i1 %tmp1, label %bb7, label %bb
+
+bb:                                               ; preds = %entry
+  %tmp2 = call noalias i8** @buildargv(i8* %args) nounwind ; <i8**> [#uses=4]
+  %tmp3 = icmp eq i8** %tmp2, null                ; <i1> [#uses=1]
+  br i1 %tmp3, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  call void @f2(i8** %tmp2) nounwind
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb, %entry
+  %argv.0 = phi i8** [ %tmp2, %bb1 ], [ %tmp2, %bb ], [ undef, %entry ] ; <i8**> [#uses=4]
+  %tmp5 = icmp eq i8* %args, null                 ; <i1> [#uses=1]
+  %tmp6 = icmp eq i8** %argv.0, null              ; <i1> [#uses=1]
+  %tmp7 = or i1 %tmp5, %tmp6                      ; <i1> [#uses=1]
+  br i1 %tmp7, label %bb7, label %bb5
+
+bb5:                                              ; preds = %bb2
+  %tmp8 = load i8** %argv.0, align 8              ; <i8*> [#uses=1]
+  %tmp9 = icmp eq i8* %tmp8, null                 ; <i1> [#uses=1]
+  br i1 %tmp9, label %bb7, label %bb6
+
+bb6:                                              ; preds = %bb5
+  %tmp10 = load i8** %argv.0, align 8             ; <i8*> [#uses=1]
+  %tmp11 = load i8* %tmp10, align 1               ; <i8> [#uses=1]
+  %tmp12 = icmp eq i8 %tmp11, 0                   ; <i1> [#uses=1]
+  br i1 %tmp12, label %bb7, label %bb8
+
+bb7:                                              ; preds = %bb6, %bb5, %bb2
+  call void @f1() nounwind optsize ssp
+  br label %bb9
+
+bb8:                                              ; preds = %bb6
+  %tmp13 = load i8** %argv.0, align 8             ; <i8*> [#uses=1]
+  %tmp14 = call i64 @f5(i8* %tmp13) nounwind      ; <i64> [#uses=0]
+  br label %bb9
+
+bb9:                                              ; preds = %bb8, %bb7
+  call void @f4(i8* %tmp) nounwind
+  ret void
+}
+
+declare noalias i8** @buildargv(i8*)
+
+declare void @f2(i8**)
+
+declare void @f4(i8*)
+
+declare i8* @f3(void (i8*)*, i8*)
+
+declare void @f1()
+
+declare i64 @f5(i8*)
diff --git a/final/test/Transforms/JumpThreading/select.ll b/final/test/Transforms/JumpThreading/select.ll
new file mode 100644
index 00000000000..8a81857736a
--- /dev/null
+++ b/final/test/Transforms/JumpThreading/select.ll
@@ -0,0 +1,123 @@
+; RUN: opt -S -jump-threading < %s | FileCheck %s
+
+declare void @foo()
+declare void @bar()
+declare void @baz()
+declare void @quux()
+
+
+; Jump threading of branch with select as condition.
+; Mostly theoretical since instruction combining simplifies all selects of
+; booleans where at least one operand is true/false/undef.
+
+; CHECK: @test_br
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %L1,
+define void @test_br(i1 %cond, i1 %value) nounwind {
+entry:
+  br i1 %cond, label %L0, label %L3
+L0:
+  %expr = select i1 %cond, i1 true, i1 %value
+  br i1 %expr, label %L1, label %L2
+
+L1:
+  call void @foo()
+  ret void
+L2:
+  call void @bar()
+  ret void
+L3:
+  call void @baz()
+  br label %L0
+}
+
+
+; Jump threading of switch with select as condition.
+
+; CHECK: @test_switch
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %L1,
+define void @test_switch(i1 %cond, i8 %value) nounwind {
+entry:
+  br i1 %cond, label %L0, label %L4
+L0:
+  %expr = select i1 %cond, i8 1, i8 %value
+  switch i8 %expr, label %L3 [i8 1, label %L1 i8 2, label %L2]
+
+L1:
+  call void @foo()
+  ret void
+L2:
+  call void @bar()
+  ret void
+L3:
+  call void @baz()
+  ret void
+L4:
+  call void @quux()
+  br label %L0
+}
+
+; Make sure the blocks in the indirectbr test aren't trivially removable as
+; successors by taking their addresses.
+@anchor = constant [3 x i8*] [
+  i8* blockaddress(@test_indirectbr, %L1),
+  i8* blockaddress(@test_indirectbr, %L2),
+  i8* blockaddress(@test_indirectbr, %L3)
+]
+
+
+; Jump threading of indirectbr with select as address.
+
+; CHECK: @test_indirectbr
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %L1, label %L3
+define void @test_indirectbr(i1 %cond, i8* %address) nounwind {
+entry:
+  br i1 %cond, label %L0, label %L3
+L0:
+  %indirect.goto.dest = select i1 %cond, i8* blockaddress(@test_indirectbr, %L1), i8* %address
+  indirectbr i8* %indirect.goto.dest, [label %L1, label %L2, label %L3]
+
+L1:
+  call void @foo()
+  ret void
+L2:
+  call void @bar()
+  ret void
+L3:
+  call void @baz()
+  ret void
+}
+
+
+; A more complicated case: the condition is a select based on a comparison.
+
+; CHECK: @test_switch_cmp
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %L0, label %[[THREADED:[A-Za-z.0-9]+]]
+; CHECK: [[THREADED]]:
+; CHECK-NEXT: call void @quux
+; CHECK-NEXT: br label %L1
+define void @test_switch_cmp(i1 %cond, i32 %val, i8 %value) nounwind {
+entry:
+  br i1 %cond, label %L0, label %L4
+L0:
+  %val.phi = phi i32 [%val, %entry], [-1, %L4]
+  %cmp = icmp slt i32 %val.phi, 0
+  %expr = select i1 %cmp, i8 1, i8 %value
+  switch i8 %expr, label %L3 [i8 1, label %L1 i8 2, label %L2]
+
+L1:
+  call void @foo()
+  ret void
+L2:
+  call void @bar()
+  ret void
+L3:
+  call void @baz()
+  ret void
+L4:
+  call void @quux()
+  br label %L0
+}
diff --git a/final/test/Transforms/JumpThreading/thread-loads.ll b/final/test/Transforms/JumpThreading/thread-loads.ll
new file mode 100644
index 00000000000..cce23ea319c
--- /dev/null
+++ b/final/test/Transforms/JumpThreading/thread-loads.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -jump-threading -S | FileCheck %s
+; rdar://6402033
+
+; Test that we can thread through the block with the partially redundant load (%2).
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin7"
+
+define i32 @foo(i32* %P) nounwind {
+; CHECK: foo
+entry:
+	%0 = tail call i32 (...)* @f1() nounwind		; <i32> [#uses=1]
+	%1 = icmp eq i32 %0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb1, label %bb
+
+bb:		; preds = %entry
+; CHECK: bb1.thread:
+; CHECK: store
+; CHECK: br label %bb3
+	store i32 42, i32* %P, align 4
+	br label %bb1
+
+bb1:		; preds = %entry, %bb
+	%res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]		; <i32> [#uses=2]
+	%2 = load i32* %P, align 4		; <i32> [#uses=1]
+	%3 = icmp sgt i32 %2, 36		; <i1> [#uses=1]
+	br i1 %3, label %bb3, label %bb2
+
+bb2:		; preds = %bb1
+	%4 = tail call i32 (...)* @f2() nounwind		; <i32> [#uses=0]
+	ret i32 %res.0
+
+bb3:		; preds = %bb1
+; CHECK: bb3:
+; CHECK: %res.01 = phi i32 [ 1, %bb1.thread ], [ 0, %bb1 ]
+; CHECK: ret i32 %res.01
+	ret i32 %res.0
+}
+
+declare i32 @f1(...)
+
+declare i32 @f2(...)
diff --git a/final/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll b/final/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
new file mode 100644
index 00000000000..7a80f8052b0
--- /dev/null
+++ b/final/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -loop-simplify -lcssa -S | \
+; RUN:   grep {%%SJE.0.0.lcssa = phi .struct.SetJmpMapEntry}
+; RUN: opt < %s -loop-simplify -lcssa -S | \
+; RUN:   grep {%%SJE.0.0.lcssa1 = phi .struct.SetJmpMapEntry}
+
+        %struct.SetJmpMapEntry = type { i8*, i32, %struct.SetJmpMapEntry* }
+
+define void @__llvm_sjljeh_try_catching_longjmp_exception() {
+entry:
+        br i1 false, label %UnifiedReturnBlock, label %no_exit
+no_exit:                ; preds = %endif, %entry
+        %SJE.0.0 = phi %struct.SetJmpMapEntry* [ %tmp.24, %endif ], [ null, %entry ]            ; <%struct.SetJmpMapEntry*> [#uses=1]
+        br i1 false, label %then, label %endif
+then:           ; preds = %no_exit
+        %tmp.20 = getelementptr %struct.SetJmpMapEntry* %SJE.0.0, i32 0, i32 1          ; <i32*> [#uses=0]
+        ret void
+endif:          ; preds = %no_exit
+        %tmp.24 = load %struct.SetJmpMapEntry** null            ; <%struct.SetJmpMapEntry*> [#uses=1]
+        br i1 false, label %UnifiedReturnBlock, label %no_exit
+UnifiedReturnBlock:             ; preds = %endif, %entry
+        ret void
+}
+
diff --git a/final/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll b/final/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll
new file mode 100644
index 00000000000..ad4f1447b29
--- /dev/null
+++ b/final/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -lcssa -S | \
+; RUN:    grep {%X.1.lcssa}
+; RUN: opt < %s -lcssa -S | \
+; RUN:    not grep {%X.1.lcssa1}
+
+declare i1 @c1()
+
+declare i1 @c2()
+
+define i32 @foo() {
+entry:
+	br label %loop_begin
+loop_begin:		; preds = %loop_body.2, %entry
+	br i1 true, label %loop_body.1, label %loop_exit2
+loop_body.1:		; preds = %loop_begin
+	%X.1 = add i32 0, 1		; <i32> [#uses=1]
+	%rel.1 = call i1 @c1( )		; <i1> [#uses=1]
+	br i1 %rel.1, label %loop_exit, label %loop_body.2
+loop_body.2:		; preds = %loop_body.1
+	%rel.2 = call i1 @c2( )		; <i1> [#uses=1]
+	br i1 %rel.2, label %loop_exit, label %loop_begin
+loop_exit:		; preds = %loop_body.2, %loop_body.1
+	ret i32 %X.1
+loop_exit2:		; preds = %loop_begin
+	ret i32 1
+}
+
diff --git a/final/test/Transforms/LCSSA/2006-07-09-NoDominator.ll b/final/test/Transforms/LCSSA/2006-07-09-NoDominator.ll
new file mode 100644
index 00000000000..b03f09ac20e
--- /dev/null
+++ b/final/test/Transforms/LCSSA/2006-07-09-NoDominator.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -lcssa
+
+	%struct.SetJmpMapEntry = type { i8*, i32, %struct.SetJmpMapEntry* }
+
+define void @__llvm_sjljeh_try_catching_longjmp_exception() {
+entry:
+	br label %loopentry
+loopentry:		; preds = %endif, %entry
+	%SJE.0 = phi %struct.SetJmpMapEntry* [ null, %entry ], [ %tmp.25, %endif ]	; <%struct.SetJmpMapEntry*> [#uses=1]
+	br i1 false, label %no_exit, label %loopexit
+no_exit:		; preds = %loopentry
+	br i1 false, label %then, label %endif
+then:		; preds = %no_exit
+	%tmp.21 = getelementptr %struct.SetJmpMapEntry* %SJE.0, i32 0, i32 1		; <i32*> [#uses=0]
+	br label %return
+endif:		; preds = %no_exit
+	%tmp.25 = load %struct.SetJmpMapEntry** null		; <%struct.SetJmpMapEntry*> [#uses=1]
+	br label %loopentry
+loopexit:		; preds = %loopentry
+	br label %return
+return:		; preds = %loopexit, %then
+	ret void
+}
+
diff --git a/final/test/Transforms/LCSSA/2006-10-31-UnreachableBlock-2.ll b/final/test/Transforms/LCSSA/2006-10-31-UnreachableBlock-2.ll
new file mode 100644
index 00000000000..3ba8d18b00c
--- /dev/null
+++ b/final/test/Transforms/LCSSA/2006-10-31-UnreachableBlock-2.ll
@@ -0,0 +1,145 @@
+; RUN: opt < %s -lcssa -disable-output -verify-dom-info -verify-loop-info
+; PR977
+; END.
+declare i32 @opost_block()
+
+define void @write_chan() {
+entry:
+	br i1 false, label %shortcirc_next.0, label %shortcirc_done.0
+shortcirc_next.0:		; preds = %entry
+	br label %shortcirc_done.0
+shortcirc_done.0:		; preds = %shortcirc_next.0, %entry
+	br i1 false, label %shortcirc_next.1, label %shortcirc_done.1
+shortcirc_next.1:		; preds = %shortcirc_done.0
+	br label %shortcirc_done.1
+shortcirc_done.1:		; preds = %shortcirc_next.1, %shortcirc_done.0
+	br i1 false, label %then.0, label %endif.0
+then.0:		; preds = %shortcirc_done.1
+	br i1 false, label %then.1, label %endif.1
+then.1:		; preds = %then.0
+	br label %return
+after_ret.0:		; No predecessors!
+	br label %endif.1
+endif.1:		; preds = %after_ret.0, %then.0
+	br label %endif.0
+endif.0:		; preds = %endif.1, %shortcirc_done.1
+	br label %loopentry.0
+loopentry.0:		; preds = %endif.12, %endif.0
+	br i1 false, label %then.2, label %endif.2
+then.2:		; preds = %loopentry.0
+	br label %loopexit.0
+dead_block_after_break.0:		; No predecessors!
+	br label %endif.2
+endif.2:		; preds = %dead_block_after_break.0, %loopentry.0
+	br i1 false, label %shortcirc_done.2, label %shortcirc_next.2
+shortcirc_next.2:		; preds = %endif.2
+	br i1 false, label %shortcirc_next.3, label %shortcirc_done.3
+shortcirc_next.3:		; preds = %shortcirc_next.2
+	br label %shortcirc_done.3
+shortcirc_done.3:		; preds = %shortcirc_next.3, %shortcirc_next.2
+	br label %shortcirc_done.2
+shortcirc_done.2:		; preds = %shortcirc_done.3, %endif.2
+	br i1 false, label %then.3, label %endif.3
+then.3:		; preds = %shortcirc_done.2
+	br label %loopexit.0
+dead_block_after_break.1:		; No predecessors!
+	br label %endif.3
+endif.3:		; preds = %dead_block_after_break.1, %shortcirc_done.2
+	br i1 false, label %shortcirc_next.4, label %shortcirc_done.4
+shortcirc_next.4:		; preds = %endif.3
+	br label %shortcirc_done.4
+shortcirc_done.4:		; preds = %shortcirc_next.4, %endif.3
+	br i1 false, label %then.4, label %else
+then.4:		; preds = %shortcirc_done.4
+	br label %loopentry.1
+loopentry.1:		; preds = %endif.8, %then.4
+	br i1 false, label %no_exit, label %loopexit.1
+no_exit:		; preds = %loopentry.1
+	%tmp.94 = call i32 @opost_block( )		; <i32> [#uses=1]
+	br i1 false, label %then.5, label %endif.5
+then.5:		; preds = %no_exit
+	br i1 false, label %then.6, label %endif.6
+then.6:		; preds = %then.5
+	br label %loopexit.1
+dead_block_after_break.2:		; No predecessors!
+	br label %endif.6
+endif.6:		; preds = %dead_block_after_break.2, %then.5
+	br label %break_out
+dead_block_after_goto.0:		; No predecessors!
+	br label %endif.5
+endif.5:		; preds = %dead_block_after_goto.0, %no_exit
+	br i1 false, label %then.7, label %endif.7
+then.7:		; preds = %endif.5
+	br label %loopexit.1
+dead_block_after_break.3:		; No predecessors!
+	br label %endif.7
+endif.7:		; preds = %dead_block_after_break.3, %endif.5
+	switch i32 1, label %switchexit [
+		 i32 4, label %label.2
+		 i32 2, label %label.1
+		 i32 1, label %label.0
+	]
+label.0:		; preds = %endif.7
+	br label %switchexit
+dead_block_after_break.4:		; No predecessors!
+	br label %label.1
+label.1:		; preds = %dead_block_after_break.4, %endif.7
+	br label %switchexit
+dead_block_after_break.5:		; No predecessors!
+	br label %label.2
+label.2:		; preds = %dead_block_after_break.5, %endif.7
+	br label %switchexit
+dead_block_after_break.6:		; No predecessors!
+	br label %switchexit
+switchexit:		; preds = %dead_block_after_break.6, %label.2, %label.1, %label.0, %endif.7
+	br i1 false, label %then.8, label %endif.8
+then.8:		; preds = %switchexit
+	br label %loopexit.1
+dead_block_after_break.7:		; No predecessors!
+	br label %endif.8
+endif.8:		; preds = %dead_block_after_break.7, %switchexit
+	br label %loopentry.1
+loopexit.1:		; preds = %then.8, %then.7, %then.6, %loopentry.1
+	br i1 false, label %then.9, label %endif.9
+then.9:		; preds = %loopexit.1
+	br label %endif.9
+endif.9:		; preds = %then.9, %loopexit.1
+	br label %endif.4
+else:		; preds = %shortcirc_done.4
+	br i1 false, label %then.10, label %endif.10
+then.10:		; preds = %else
+	br label %break_out
+dead_block_after_goto.1:		; No predecessors!
+	br label %endif.10
+endif.10:		; preds = %dead_block_after_goto.1, %else
+	br label %endif.4
+endif.4:		; preds = %endif.10, %endif.9
+	br i1 false, label %then.11, label %endif.11
+then.11:		; preds = %endif.4
+	br label %loopexit.0
+dead_block_after_break.8:		; No predecessors!
+	br label %endif.11
+endif.11:		; preds = %dead_block_after_break.8, %endif.4
+	br i1 false, label %then.12, label %endif.12
+then.12:		; preds = %endif.11
+	br label %loopexit.0
+dead_block_after_break.9:		; No predecessors!
+	br label %endif.12
+endif.12:		; preds = %dead_block_after_break.9, %endif.11
+	br label %loopentry.0
+loopexit.0:		; preds = %then.12, %then.11, %then.3, %then.2
+	br label %break_out
+break_out:		; preds = %loopexit.0, %then.10, %endif.6
+	%retval.3 = phi i32 [ 0, %loopexit.0 ], [ %tmp.94, %endif.6 ], [ 0, %then.10 ]		; <i32> [#uses=0]
+	br i1 false, label %cond_true, label %cond_false
+cond_true:		; preds = %break_out
+	br label %cond_continue
+cond_false:		; preds = %break_out
+	br label %cond_continue
+cond_continue:		; preds = %cond_false, %cond_true
+	br label %return
+after_ret.1:		; No predecessors!
+	br label %return
+return:		; preds = %after_ret.1, %cond_continue, %then.1
+	ret void
+}
diff --git a/final/test/Transforms/LCSSA/2006-10-31-UnreachableBlock.ll b/final/test/Transforms/LCSSA/2006-10-31-UnreachableBlock.ll
new file mode 100644
index 00000000000..ecb1be5c674
--- /dev/null
+++ b/final/test/Transforms/LCSSA/2006-10-31-UnreachableBlock.ll
@@ -0,0 +1,184 @@
+; RUN: opt < %s -lcssa -disable-output
+; PR977
+; END.
+
+define void @process_backlog() {
+entry:
+	br label %loopentry.preheader
+loopentry.preheader:		; preds = %dead_block_after_break, %entry
+	%work.0.ph = phi i32 [ %inc, %dead_block_after_break ], [ 0, %entry ]		; <i32> [#uses=0]
+	br label %loopentry
+loopentry:		; preds = %endif.1, %loopentry.preheader
+	br i1 false, label %then.i, label %loopentry.__skb_dequeue67.exit_crit_edge
+loopentry.__skb_dequeue67.exit_crit_edge:		; preds = %loopentry
+	br label %__skb_dequeue67.exit
+then.i:		; preds = %loopentry
+	br label %__skb_dequeue67.exit
+__skb_dequeue67.exit:		; preds = %then.i, %loopentry.__skb_dequeue67.exit_crit_edge
+	br i1 false, label %then.0, label %__skb_dequeue67.exit.endif.0_crit_edge
+__skb_dequeue67.exit.endif.0_crit_edge:		; preds = %__skb_dequeue67.exit
+	br label %endif.0
+then.0:		; preds = %__skb_dequeue67.exit
+	br label %job_done
+dead_block_after_goto:		; No predecessors!
+	unreachable
+endif.0:		; preds = %__skb_dequeue67.exit.endif.0_crit_edge
+	br i1 false, label %then.0.i, label %endif.0.endif.0.i_crit_edge
+endif.0.endif.0.i_crit_edge:		; preds = %endif.0
+	br label %endif.0.i
+then.0.i:		; preds = %endif.0
+	br label %endif.0.i
+endif.0.i:		; preds = %then.0.i, %endif.0.endif.0.i_crit_edge
+	br i1 false, label %then.i.i, label %endif.0.i.skb_bond.exit.i_crit_edge
+endif.0.i.skb_bond.exit.i_crit_edge:		; preds = %endif.0.i
+	br label %skb_bond.exit.i
+then.i.i:		; preds = %endif.0.i
+	br label %skb_bond.exit.i
+skb_bond.exit.i:		; preds = %then.i.i, %endif.0.i.skb_bond.exit.i_crit_edge
+	br label %loopentry.0.i
+loopentry.0.i:		; preds = %loopentry.0.i.backedge, %skb_bond.exit.i
+	br i1 false, label %loopentry.0.i.no_exit.0.i_crit_edge, label %loopentry.0.i.loopexit.0.i_crit_edge
+loopentry.0.i.loopexit.0.i_crit_edge:		; preds = %loopentry.0.i
+	br label %loopexit.0.i
+loopentry.0.i.no_exit.0.i_crit_edge:		; preds = %loopentry.0.i
+	br label %no_exit.0.i
+no_exit.0.i:		; preds = %then.3.i.no_exit.0.i_crit_edge, %loopentry.0.i.no_exit.0.i_crit_edge
+	br i1 false, label %no_exit.0.i.shortcirc_done.0.i_crit_edge, label %shortcirc_next.0.i
+no_exit.0.i.shortcirc_done.0.i_crit_edge:		; preds = %no_exit.0.i
+	br label %shortcirc_done.0.i
+shortcirc_next.0.i:		; preds = %no_exit.0.i
+	br label %shortcirc_done.0.i
+shortcirc_done.0.i:		; preds = %shortcirc_next.0.i, %no_exit.0.i.shortcirc_done.0.i_crit_edge
+	br i1 false, label %then.1.i, label %endif.1.i
+then.1.i:		; preds = %shortcirc_done.0.i
+	br i1 false, label %then.2.i, label %then.1.i.endif.2.i_crit_edge
+then.1.i.endif.2.i_crit_edge:		; preds = %then.1.i
+	br label %endif.2.i
+then.2.i:		; preds = %then.1.i
+	br i1 false, label %then.3.i, label %else.0.i
+then.3.i:		; preds = %then.2.i
+	br i1 false, label %then.3.i.no_exit.0.i_crit_edge, label %then.3.i.loopexit.0.i_crit_edge
+then.3.i.loopexit.0.i_crit_edge:		; preds = %then.3.i
+	br label %loopexit.0.i
+then.3.i.no_exit.0.i_crit_edge:		; preds = %then.3.i
+	br label %no_exit.0.i
+else.0.i:		; preds = %then.2.i
+	br label %endif.2.i
+endif.3.i:		; No predecessors!
+	unreachable
+endif.2.i:		; preds = %else.0.i, %then.1.i.endif.2.i_crit_edge
+	br label %loopentry.0.i.backedge
+endif.1.i:		; preds = %shortcirc_done.0.i
+	br label %loopentry.0.i.backedge
+loopentry.0.i.backedge:		; preds = %endif.1.i, %endif.2.i
+	br label %loopentry.0.i
+loopexit.0.i:		; preds = %then.3.i.loopexit.0.i_crit_edge, %loopentry.0.i.loopexit.0.i_crit_edge
+	br label %loopentry.1.i
+loopentry.1.i:		; preds = %loopentry.1.i.backedge, %loopexit.0.i
+	br i1 false, label %loopentry.1.i.no_exit.1.i_crit_edge, label %loopentry.1.i.loopexit.1.i_crit_edge
+loopentry.1.i.loopexit.1.i_crit_edge:		; preds = %loopentry.1.i
+	br label %loopexit.1.i
+loopentry.1.i.no_exit.1.i_crit_edge:		; preds = %loopentry.1.i
+	br label %no_exit.1.i
+no_exit.1.i:		; preds = %then.6.i.no_exit.1.i_crit_edge, %loopentry.1.i.no_exit.1.i_crit_edge
+	br i1 false, label %shortcirc_next.1.i, label %no_exit.1.i.shortcirc_done.1.i_crit_edge
+no_exit.1.i.shortcirc_done.1.i_crit_edge:		; preds = %no_exit.1.i
+	br label %shortcirc_done.1.i
+shortcirc_next.1.i:		; preds = %no_exit.1.i
+	br i1 false, label %shortcirc_next.1.i.shortcirc_done.2.i_crit_edge, label %shortcirc_next.2.i
+shortcirc_next.1.i.shortcirc_done.2.i_crit_edge:		; preds = %shortcirc_next.1.i
+	br label %shortcirc_done.2.i
+shortcirc_next.2.i:		; preds = %shortcirc_next.1.i
+	br label %shortcirc_done.2.i
+shortcirc_done.2.i:		; preds = %shortcirc_next.2.i, %shortcirc_next.1.i.shortcirc_done.2.i_crit_edge
+	br label %shortcirc_done.1.i
+shortcirc_done.1.i:		; preds = %shortcirc_done.2.i, %no_exit.1.i.shortcirc_done.1.i_crit_edge
+	br i1 false, label %then.4.i, label %endif.4.i
+then.4.i:		; preds = %shortcirc_done.1.i
+	br i1 false, label %then.5.i, label %then.4.i.endif.5.i_crit_edge
+then.4.i.endif.5.i_crit_edge:		; preds = %then.4.i
+	br label %endif.5.i
+then.5.i:		; preds = %then.4.i
+	br i1 false, label %then.6.i, label %else.1.i
+then.6.i:		; preds = %then.5.i
+	br i1 false, label %then.6.i.no_exit.1.i_crit_edge, label %then.6.i.loopexit.1.i_crit_edge
+then.6.i.loopexit.1.i_crit_edge:		; preds = %then.6.i
+	br label %loopexit.1.i
+then.6.i.no_exit.1.i_crit_edge:		; preds = %then.6.i
+	br label %no_exit.1.i
+else.1.i:		; preds = %then.5.i
+	br label %endif.5.i
+endif.6.i:		; No predecessors!
+	unreachable
+endif.5.i:		; preds = %else.1.i, %then.4.i.endif.5.i_crit_edge
+	br label %loopentry.1.i.backedge
+endif.4.i:		; preds = %shortcirc_done.1.i
+	br label %loopentry.1.i.backedge
+loopentry.1.i.backedge:		; preds = %endif.4.i, %endif.5.i
+	br label %loopentry.1.i
+loopexit.1.i:		; preds = %then.6.i.loopexit.1.i_crit_edge, %loopentry.1.i.loopexit.1.i_crit_edge
+	br i1 false, label %then.7.i, label %else.2.i
+then.7.i:		; preds = %loopexit.1.i
+	br i1 false, label %then.8.i, label %else.3.i
+then.8.i:		; preds = %then.7.i
+	br label %netif_receive_skb.exit
+else.3.i:		; preds = %then.7.i
+	br label %netif_receive_skb.exit
+endif.8.i:		; No predecessors!
+	unreachable
+else.2.i:		; preds = %loopexit.1.i
+	br i1 false, label %else.2.i.shortcirc_done.i.i_crit_edge, label %shortcirc_next.i.i
+else.2.i.shortcirc_done.i.i_crit_edge:		; preds = %else.2.i
+	br label %shortcirc_done.i.i
+shortcirc_next.i.i:		; preds = %else.2.i
+	br label %shortcirc_done.i.i
+shortcirc_done.i.i:		; preds = %shortcirc_next.i.i, %else.2.i.shortcirc_done.i.i_crit_edge
+	br i1 false, label %then.i1.i, label %shortcirc_done.i.i.kfree_skb65.exit.i_crit_edge
+shortcirc_done.i.i.kfree_skb65.exit.i_crit_edge:		; preds = %shortcirc_done.i.i
+	br label %kfree_skb65.exit.i
+then.i1.i:		; preds = %shortcirc_done.i.i
+	br label %kfree_skb65.exit.i
+kfree_skb65.exit.i:		; preds = %then.i1.i, %shortcirc_done.i.i.kfree_skb65.exit.i_crit_edge
+	br label %netif_receive_skb.exit
+netif_receive_skb.exit:		; preds = %kfree_skb65.exit.i, %else.3.i, %then.8.i
+	br i1 false, label %then.i1, label %netif_receive_skb.exit.dev_put69.exit_crit_edge
+netif_receive_skb.exit.dev_put69.exit_crit_edge:		; preds = %netif_receive_skb.exit
+	br label %dev_put69.exit
+then.i1:		; preds = %netif_receive_skb.exit
+	br label %dev_put69.exit
+dev_put69.exit:		; preds = %then.i1, %netif_receive_skb.exit.dev_put69.exit_crit_edge
+	%inc = add i32 0, 1		; <i32> [#uses=1]
+	br i1 false, label %dev_put69.exit.shortcirc_done_crit_edge, label %shortcirc_next
+dev_put69.exit.shortcirc_done_crit_edge:		; preds = %dev_put69.exit
+	br label %shortcirc_done
+shortcirc_next:		; preds = %dev_put69.exit
+	br label %shortcirc_done
+shortcirc_done:		; preds = %shortcirc_next, %dev_put69.exit.shortcirc_done_crit_edge
+	br i1 false, label %then.1, label %endif.1
+then.1:		; preds = %shortcirc_done
+	ret void
+dead_block_after_break:		; No predecessors!
+	br label %loopentry.preheader
+endif.1:		; preds = %shortcirc_done
+	br label %loopentry
+loopexit:		; No predecessors!
+	unreachable
+after_ret.0:		; No predecessors!
+	br label %job_done
+job_done:		; preds = %after_ret.0, %then.0
+	br label %loopentry.i
+loopentry.i:		; preds = %no_exit.i, %job_done
+	br i1 false, label %no_exit.i, label %clear_bit62.exit
+no_exit.i:		; preds = %loopentry.i
+	br label %loopentry.i
+clear_bit62.exit:		; preds = %loopentry.i
+	br i1 false, label %then.2, label %endif.2
+then.2:		; preds = %clear_bit62.exit
+	ret void
+endif.2:		; preds = %clear_bit62.exit
+	ret void
+after_ret.1:		; No predecessors!
+	ret void
+return:		; No predecessors!
+	unreachable
+}
diff --git a/final/test/Transforms/LCSSA/2007-07-12-LICM-2.ll b/final/test/Transforms/LCSSA/2007-07-12-LICM-2.ll
new file mode 100644
index 00000000000..2c5815cef33
--- /dev/null
+++ b/final/test/Transforms/LCSSA/2007-07-12-LICM-2.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -loop-rotate -licm -loop-unswitch -disable-output
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	br label %bb7
+
+bb7:		; preds = %bb7, %entry
+	%tmp39 = load <4 x float>* null		; <<4 x float>> [#uses=1]
+	%tmp40 = fadd <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >		; <<4 x float>> [#uses=1]
+	%tmp43 = fadd <4 x float> %tmp40, < float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 2.000000e+00 >		; <<4 x float>> [#uses=1]
+	%tmp46 = fadd <4 x float> %tmp43, < float 3.000000e+00, float 0.000000e+00, float 2.000000e+00, float 4.000000e+00 >		; <<4 x float>> [#uses=1]
+	%tmp49 = fadd <4 x float> %tmp46, < float 0.000000e+00, float 4.000000e+00, float 6.000000e+00, float 1.000000e+00 >		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp49, <4 x float>* null
+	br i1 false, label %bb7, label %bb56
+
+bb56:		; preds = %bb7
+	ret i32 0
+}
diff --git a/final/test/Transforms/LCSSA/2007-07-12-LICM-3.ll b/final/test/Transforms/LCSSA/2007-07-12-LICM-3.ll
new file mode 100644
index 00000000000..7e0d3c63b13
--- /dev/null
+++ b/final/test/Transforms/LCSSA/2007-07-12-LICM-3.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -loop-rotate -licm -loop-unswitch -disable-output
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+        br label %bb
+
+bb:             ; preds = %bb56, %entry
+        br label %bb7
+
+bb7:            ; preds = %bb7, %bb
+        %tmp39 = load <4 x float>* null         ; <<4 x float>> [#uses=1]
+        %tmp40 = fadd <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >             ; <<4 x float>> [#uses=1]
+        %tmp43 = fadd <4 x float> %tmp40, < float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 2.000000e+00 >             ; <<4 x float>> [#uses=1]
+        %tmp46 = fadd <4 x float> %tmp43, < float 3.000000e+00, float 0.000000e+00, float 2.000000e+00, float 4.000000e+00 >             ; <<4 x float>> [#uses=1]
+        %tmp49 = fadd <4 x float> %tmp46, < float 0.000000e+00, float 4.000000e+00, float 6.000000e+00, float 1.000000e+00 >             ; <<4 x float>> [#uses=1]
+        store <4 x float> %tmp49, <4 x float>* null
+        br i1 false, label %bb7, label %bb56
+
+bb56:           ; preds = %bb7
+        br i1 false, label %bb, label %bb64
+
+bb64:           ; preds = %bb56
+        ret i32 0
+}
diff --git a/final/test/Transforms/LCSSA/2007-07-12-LICM.ll b/final/test/Transforms/LCSSA/2007-07-12-LICM.ll
new file mode 100644
index 00000000000..8c07aa2ec35
--- /dev/null
+++ b/final/test/Transforms/LCSSA/2007-07-12-LICM.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -loop-rotate -licm -loop-unswitch -disable-output
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	br label %bb7
+
+bb7:		; preds = %bb7, %entry
+	%tmp39 = load <4 x float>* null		; <<4 x float>> [#uses=1]
+	%tmp40 = fadd <4 x float> %tmp39, < float 2.000000e+00, float 3.000000e+00, float 1.000000e+00, float 0.000000e+00 >		; <<4 x float>> [#uses=0]
+	store <4 x float> zeroinitializer, <4 x float>* null
+	br i1 false, label %bb7, label %bb56
+
+bb56:		; preds = %bb7
+	ret i32 0
+}
diff --git a/final/test/Transforms/LCSSA/basictest.ll b/final/test/Transforms/LCSSA/basictest.ll
new file mode 100644
index 00000000000..23ab2c0ce05
--- /dev/null
+++ b/final/test/Transforms/LCSSA/basictest.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -lcssa -S | \
+; RUN:   grep {X3.lcssa = phi i32}
+; RUN: opt < %s -lcssa -S | \
+; RUN:   grep {X4 = add i32 3, %X3.lcssa}
+
+define void @lcssa(i1 %S2) {
+entry:
+	br label %loop.interior
+loop.interior:		; preds = %post.if, %entry
+	br i1 %S2, label %if.true, label %if.false
+if.true:		; preds = %loop.interior
+	%X1 = add i32 0, 0		; <i32> [#uses=1]
+	br label %post.if
+if.false:		; preds = %loop.interior
+	%X2 = add i32 0, 1		; <i32> [#uses=1]
+	br label %post.if
+post.if:		; preds = %if.false, %if.true
+	%X3 = phi i32 [ %X1, %if.true ], [ %X2, %if.false ]		; <i32> [#uses=1]
+	br i1 %S2, label %loop.exit, label %loop.interior
+loop.exit:		; preds = %post.if
+	%X4 = add i32 3, %X3		; <i32> [#uses=0]
+	ret void
+}
+
diff --git a/final/test/Transforms/LCSSA/dg.exp b/final/test/Transforms/LCSSA/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/LCSSA/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/LCSSA/indirectbr.ll b/final/test/Transforms/LCSSA/indirectbr.ll
new file mode 100644
index 00000000000..96564486e82
--- /dev/null
+++ b/final/test/Transforms/LCSSA/indirectbr.ll
@@ -0,0 +1,542 @@
+; RUN: opt < %s -lcssa -verify-loop-info -verify-dom-info -disable-output
+; PR5437
+
+; LCSSA should work correctly in the case of an indirectbr that exits
+; the loop, and the loop has exits with predecessors not within the loop
+; (and btw these edges are unsplittable due to the indirectbr).
+
+define i32 @js_Interpret() nounwind {
+entry:
+  br i1 undef, label %"4", label %"3"
+
+"3":                                              ; preds = %entry
+  ret i32 0
+
+"4":                                              ; preds = %entry
+  br i1 undef, label %"6", label %"5"
+
+"5":                                              ; preds = %"4"
+  unreachable
+
+"6":                                              ; preds = %"4"
+  br i1 undef, label %"10", label %"13"
+
+"10":                                             ; preds = %"6"
+  br i1 undef, label %"22", label %"15"
+
+"13":                                             ; preds = %"6"
+  unreachable
+
+"15":                                             ; preds = %"23", %"10"
+  unreachable
+
+"22":                                             ; preds = %"10"
+  br label %"23"
+
+"23":                                             ; preds = %"1375", %"22"
+  %0 = phi i32 [ undef, %"22" ], [ %1, %"1375" ]  ; <i32> [#uses=1]
+  indirectbr i8* undef, [label %"15", label %"24", label %"25", label %"26", label %"27", label %"28", label %"29", label %"30", label %"32", label %"32", label %"33", label %"167", label %"173", label %"173", label %"173", label %"173", label %"173", label %"192", label %"193", label %"194", label %"196", label %"206", label %"231", label %"241", label %"251", label %"261", label %"307", label %"353", label %"354", label %"355", label %"361", label %"367", label %"400", label %"433", label %"466", label %"499", label %"509", label %"519", label %"529", label %"571", label %"589", label %"607", label %"635", label %"655", label %"664", label %"671", label %"680", label %"687", label %"692", label %"698", label %"704", label %"715", label %"715", label %"716", label %"725", label %"725", label %"725", label %"725", label %"724", label %"724", label %"724", label %"724", label %"737", label %"737", label %"737", label %"737", label %"761", label %"758", label %"759", label %"760", label %"766", label %"763", label %"764", label %"765", label %"771", label %"768", label %"769", label %"770", label %"780", label %"777", label %"778", label %"779", label %"821", label %"826", label %"831", label %"832", label %"833", label %"836", label %"836", label %"886", label %"905", label %"978", label %"978", label %"1136", label %"1166", label %"1179", label %"1201", label %"1212", label %"1212", label %"1274", label %"1284", label %"1284", label %"1346", label %"1347", label %"1348", label %"1349", label %"1350", label %"1353", label %"1353", label %"1353", label %"1355", label %"1355", label %"1357", label %"1357", label %"1358", label %"1359", label %"1374", label %"1375", label %"1376", label %"1377", label %"1378", label %"1379", label %"1386", label %"1395", label %"1394", label %"1425", label %"1426", label %"1440", label %"1449", label %"1455", label %"1461", label %"1471", label %"1482", label %"1484", label %"1486", label %"1489", label %"1489", label %"1492", label %"1494", label %"1494", label %"1497", label %"1499", label %"1499", label %"1515", label %"1546", label %"1546", label %"1566", label %"1584", label %"1587", label %"1591", label %"1605", label %"1609", label %"1609", label %"1640", label %"1648", label %"1651", label %"1703", label %"1710", label %"1718", label %"1724", label %"1725", label %"1726", label %"1727", label %"1728", label %"1731", label %"1732", label %"1733", label %"1734", label %"1735", label %"1741", label %"1750", label %"1752", label %"1754", label %"1755", label %"1757", label %"1759", label %"1761", label %"1764", label %"1764", label %"1766", label %"1768", label %"1775", label %"1775", label %"1781", label %"1781", label %"1790", label %"1827", label %"1836", label %"1836", label %"1845", label %"1845", label %"1848", label %"1849", label %"1851", label %"1853", label %"1856", label %"1861", label %"1861"]
+
+"24":                                             ; preds = %"23"
+  unreachable
+
+"25":                                             ; preds = %"23"
+  unreachable
+
+"26":                                             ; preds = %"23"
+  unreachable
+
+"27":                                             ; preds = %"23"
+  unreachable
+
+"28":                                             ; preds = %"23"
+  unreachable
+
+"29":                                             ; preds = %"23"
+  unreachable
+
+"30":                                             ; preds = %"23"
+  unreachable
+
+"32":                                             ; preds = %"23", %"23"
+  unreachable
+
+"33":                                             ; preds = %"23"
+  unreachable
+
+"167":                                            ; preds = %"23"
+  unreachable
+
+"173":                                            ; preds = %"23", %"23", %"23", %"23", %"23"
+  unreachable
+
+"192":                                            ; preds = %"23"
+  unreachable
+
+"193":                                            ; preds = %"23"
+  unreachable
+
+"194":                                            ; preds = %"23"
+  unreachable
+
+"196":                                            ; preds = %"23"
+  unreachable
+
+"206":                                            ; preds = %"23"
+  unreachable
+
+"231":                                            ; preds = %"23"
+  unreachable
+
+"241":                                            ; preds = %"23"
+  unreachable
+
+"251":                                            ; preds = %"23"
+  unreachable
+
+"261":                                            ; preds = %"23"
+  unreachable
+
+"307":                                            ; preds = %"23"
+  unreachable
+
+"353":                                            ; preds = %"23"
+  unreachable
+
+"354":                                            ; preds = %"23"
+  unreachable
+
+"355":                                            ; preds = %"23"
+  unreachable
+
+"361":                                            ; preds = %"23"
+  unreachable
+
+"367":                                            ; preds = %"23"
+  unreachable
+
+"400":                                            ; preds = %"23"
+  unreachable
+
+"433":                                            ; preds = %"23"
+  unreachable
+
+"466":                                            ; preds = %"23"
+  unreachable
+
+"499":                                            ; preds = %"23"
+  unreachable
+
+"509":                                            ; preds = %"23"
+  unreachable
+
+"519":                                            ; preds = %"23"
+  unreachable
+
+"529":                                            ; preds = %"23"
+  unreachable
+
+"571":                                            ; preds = %"23"
+  unreachable
+
+"589":                                            ; preds = %"23"
+  unreachable
+
+"607":                                            ; preds = %"23"
+  unreachable
+
+"635":                                            ; preds = %"23"
+  unreachable
+
+"655":                                            ; preds = %"23"
+  unreachable
+
+"664":                                            ; preds = %"23"
+  unreachable
+
+"671":                                            ; preds = %"23"
+  unreachable
+
+"680":                                            ; preds = %"23"
+  unreachable
+
+"687":                                            ; preds = %"23"
+  unreachable
+
+"692":                                            ; preds = %"23"
+  br label %"1862"
+
+"698":                                            ; preds = %"23"
+  unreachable
+
+"704":                                            ; preds = %"23"
+  unreachable
+
+"715":                                            ; preds = %"23", %"23"
+  unreachable
+
+"716":                                            ; preds = %"23"
+  unreachable
+
+"724":                                            ; preds = %"23", %"23", %"23", %"23"
+  unreachable
+
+"725":                                            ; preds = %"23", %"23", %"23", %"23"
+  unreachable
+
+"737":                                            ; preds = %"23", %"23", %"23", %"23"
+  unreachable
+
+"758":                                            ; preds = %"23"
+  unreachable
+
+"759":                                            ; preds = %"23"
+  unreachable
+
+"760":                                            ; preds = %"23"
+  unreachable
+
+"761":                                            ; preds = %"23"
+  unreachable
+
+"763":                                            ; preds = %"23"
+  unreachable
+
+"764":                                            ; preds = %"23"
+  unreachable
+
+"765":                                            ; preds = %"23"
+  br label %"766"
+
+"766":                                            ; preds = %"765", %"23"
+  unreachable
+
+"768":                                            ; preds = %"23"
+  unreachable
+
+"769":                                            ; preds = %"23"
+  unreachable
+
+"770":                                            ; preds = %"23"
+  unreachable
+
+"771":                                            ; preds = %"23"
+  unreachable
+
+"777":                                            ; preds = %"23"
+  unreachable
+
+"778":                                            ; preds = %"23"
+  unreachable
+
+"779":                                            ; preds = %"23"
+  unreachable
+
+"780":                                            ; preds = %"23"
+  unreachable
+
+"821":                                            ; preds = %"23"
+  unreachable
+
+"826":                                            ; preds = %"23"
+  unreachable
+
+"831":                                            ; preds = %"23"
+  unreachable
+
+"832":                                            ; preds = %"23"
+  unreachable
+
+"833":                                            ; preds = %"23"
+  unreachable
+
+"836":                                            ; preds = %"23", %"23"
+  unreachable
+
+"886":                                            ; preds = %"23"
+  unreachable
+
+"905":                                            ; preds = %"23"
+  unreachable
+
+"978":                                            ; preds = %"23", %"23"
+  unreachable
+
+"1136":                                           ; preds = %"23"
+  unreachable
+
+"1166":                                           ; preds = %"23"
+  unreachable
+
+"1179":                                           ; preds = %"23"
+  unreachable
+
+"1201":                                           ; preds = %"23"
+  unreachable
+
+"1212":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1274":                                           ; preds = %"23"
+  unreachable
+
+"1284":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1346":                                           ; preds = %"23"
+  unreachable
+
+"1347":                                           ; preds = %"23"
+  unreachable
+
+"1348":                                           ; preds = %"23"
+  unreachable
+
+"1349":                                           ; preds = %"23"
+  unreachable
+
+"1350":                                           ; preds = %"23"
+  unreachable
+
+"1353":                                           ; preds = %"23", %"23", %"23"
+  unreachable
+
+"1355":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1357":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1358":                                           ; preds = %"23"
+  unreachable
+
+"1359":                                           ; preds = %"23"
+  unreachable
+
+"1374":                                           ; preds = %"23"
+  unreachable
+
+"1375":                                           ; preds = %"23"
+  %1 = zext i8 undef to i32                       ; <i32> [#uses=1]
+  br label %"23"
+
+"1376":                                           ; preds = %"23"
+  unreachable
+
+"1377":                                           ; preds = %"23"
+  unreachable
+
+"1378":                                           ; preds = %"23"
+  unreachable
+
+"1379":                                           ; preds = %"23"
+  unreachable
+
+"1386":                                           ; preds = %"23"
+  unreachable
+
+"1394":                                           ; preds = %"23"
+  unreachable
+
+"1395":                                           ; preds = %"23"
+  unreachable
+
+"1425":                                           ; preds = %"23"
+  unreachable
+
+"1426":                                           ; preds = %"23"
+  unreachable
+
+"1440":                                           ; preds = %"23"
+  unreachable
+
+"1449":                                           ; preds = %"23"
+  unreachable
+
+"1455":                                           ; preds = %"23"
+  unreachable
+
+"1461":                                           ; preds = %"23"
+  unreachable
+
+"1471":                                           ; preds = %"23"
+  unreachable
+
+"1482":                                           ; preds = %"23"
+  unreachable
+
+"1484":                                           ; preds = %"23"
+  unreachable
+
+"1486":                                           ; preds = %"23"
+  unreachable
+
+"1489":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1492":                                           ; preds = %"23"
+  unreachable
+
+"1494":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1497":                                           ; preds = %"23"
+  unreachable
+
+"1499":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1515":                                           ; preds = %"23"
+  unreachable
+
+"1546":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1566":                                           ; preds = %"23"
+  br i1 undef, label %"1569", label %"1568"
+
+"1568":                                           ; preds = %"1566"
+  unreachable
+
+"1569":                                           ; preds = %"1566"
+  unreachable
+
+"1584":                                           ; preds = %"23"
+  unreachable
+
+"1587":                                           ; preds = %"23"
+  unreachable
+
+"1591":                                           ; preds = %"23"
+  unreachable
+
+"1605":                                           ; preds = %"23"
+  unreachable
+
+"1609":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1640":                                           ; preds = %"23"
+  unreachable
+
+"1648":                                           ; preds = %"23"
+  unreachable
+
+"1651":                                           ; preds = %"23"
+  unreachable
+
+"1703":                                           ; preds = %"23"
+  unreachable
+
+"1710":                                           ; preds = %"23"
+  unreachable
+
+"1718":                                           ; preds = %"23"
+  unreachable
+
+"1724":                                           ; preds = %"23"
+  unreachable
+
+"1725":                                           ; preds = %"23"
+  unreachable
+
+"1726":                                           ; preds = %"23"
+  unreachable
+
+"1727":                                           ; preds = %"23"
+  unreachable
+
+"1728":                                           ; preds = %"23"
+  unreachable
+
+"1731":                                           ; preds = %"23"
+  unreachable
+
+"1732":                                           ; preds = %"23"
+  unreachable
+
+"1733":                                           ; preds = %"23"
+  unreachable
+
+"1734":                                           ; preds = %"23"
+  unreachable
+
+"1735":                                           ; preds = %"23"
+  unreachable
+
+"1741":                                           ; preds = %"23"
+  unreachable
+
+"1750":                                           ; preds = %"23"
+  unreachable
+
+"1752":                                           ; preds = %"23"
+  unreachable
+
+"1754":                                           ; preds = %"23"
+  unreachable
+
+"1755":                                           ; preds = %"23"
+  unreachable
+
+"1757":                                           ; preds = %"23"
+  unreachable
+
+"1759":                                           ; preds = %"23"
+  unreachable
+
+"1761":                                           ; preds = %"23"
+  unreachable
+
+"1764":                                           ; preds = %"23", %"23"
+  %2 = icmp eq i32 %0, 168                        ; <i1> [#uses=0]
+  unreachable
+
+"1766":                                           ; preds = %"23"
+  unreachable
+
+"1768":                                           ; preds = %"23"
+  unreachable
+
+"1775":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1781":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1790":                                           ; preds = %"23"
+  unreachable
+
+"1827":                                           ; preds = %"23"
+  unreachable
+
+"1836":                                           ; preds = %"23", %"23"
+  br label %"1862"
+
+"1845":                                           ; preds = %"23", %"23"
+  unreachable
+
+"1848":                                           ; preds = %"23"
+  unreachable
+
+"1849":                                           ; preds = %"23"
+  unreachable
+
+"1851":                                           ; preds = %"23"
+  unreachable
+
+"1853":                                           ; preds = %"23"
+  unreachable
+
+"1856":                                           ; preds = %"23"
+  unreachable
+
+"1861":                                           ; preds = %"23", %"23"
+  unreachable
+
+"41":                                             ; preds = %"23", %"23"
+  unreachable
+
+"1862":                                           ; preds = %"1836", %"692"
+  unreachable
+}
diff --git a/final/test/Transforms/LCSSA/invoke-dest.ll b/final/test/Transforms/LCSSA/invoke-dest.ll
new file mode 100644
index 00000000000..454715089c3
--- /dev/null
+++ b/final/test/Transforms/LCSSA/invoke-dest.ll
@@ -0,0 +1,143 @@
+; RUN: opt < %s -lcssa
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+
+@.str12 = external constant [3 x i8], align 1		; <[3 x i8]*> [#uses=1]
+@.str17175 = external constant [4 x i8], align 1		; <[4 x i8]*> [#uses=1]
+@.str21179 = external constant [12 x i8], align 1		; <[12 x i8]*> [#uses=1]
+@.str25183 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@.str32190 = external constant [92 x i8], align 1		; <[92 x i8]*> [#uses=1]
+@.str41 = external constant [25 x i8], align 1		; <[25 x i8]*> [#uses=1]
+
+define void @_ZN8EtherBus10initializeEv() {
+entry:
+	br i1 undef, label %_ZN7cObjectnwEj.exit, label %bb.i
+
+bb.i:		; preds = %entry
+	br label %_ZN7cObjectnwEj.exit
+
+_ZN7cObjectnwEj.exit:		; preds = %bb.i, %entry
+	invoke void @_ZN7cObjectC2EPKc(i8* undef, i8* getelementptr ([12 x i8]* @.str21179, i32 0, i32 0))
+			to label %bb1 unwind label %lpad
+
+bb1:		; preds = %_ZN7cObjectnwEj.exit
+	br i1 undef, label %_ZNK5cGate4sizeEv.exit, label %bb.i110
+
+bb.i110:		; preds = %bb1
+	br label %_ZNK5cGate4sizeEv.exit
+
+_ZNK5cGate4sizeEv.exit:		; preds = %bb.i110, %bb1
+	br i1 undef, label %_ZNK5cGate4sizeEv.exit122, label %bb.i120
+
+bb.i120:		; preds = %_ZNK5cGate4sizeEv.exit
+	br label %_ZNK5cGate4sizeEv.exit122
+
+_ZNK5cGate4sizeEv.exit122:		; preds = %bb.i120, %_ZNK5cGate4sizeEv.exit
+	br i1 undef, label %bb8, label %bb2
+
+bb2:		; preds = %_ZNK5cGate4sizeEv.exit122
+	unreachable
+
+bb8:		; preds = %_ZNK5cGate4sizeEv.exit122
+	%tmp = invoke i8* @_ZN7cModule3parEPKc(i8* undef, i8* getelementptr ([10 x i8]* @.str25183, i32 0, i32 0))
+			to label %invcont9 unwind label %lpad119		; <i8*> [#uses=1]
+
+invcont9:		; preds = %bb8
+	%tmp1 = invoke i8* @_ZN4cPar11stringValueEv(i8* %tmp)
+			to label %invcont10 unwind label %lpad119		; <i8*> [#uses=1]
+
+invcont10:		; preds = %invcont9
+	invoke void @_ZN8EtherBus8tokenizeEPKcRSt6vectorIdSaIdEE(i8* null, i8* %tmp1, i8* undef)
+			to label %invcont11 unwind label %lpad119
+
+invcont11:		; preds = %invcont10
+	br i1 undef, label %bb12, label %bb18
+
+bb12:		; preds = %invcont11
+	invoke void (i8*, i8*, ...)* @_ZN6cEnvir6printfEPKcz(i8* null, i8* getelementptr ([3 x i8]* @.str12, i32 0, i32 0), i32 undef)
+			to label %bb.i.i159 unwind label %lpad119
+
+bb.i.i159:		; preds = %bb12
+	unreachable
+
+bb18:		; preds = %invcont11
+	br i1 undef, label %bb32, label %bb34
+
+bb32:		; preds = %bb18
+	br i1 undef, label %bb.i.i123, label %bb34
+
+bb.i.i123:		; preds = %bb32
+	br label %bb34
+
+bb34:		; preds = %bb.i.i123, %bb32, %bb18
+	%tmp2 = invoke i8* @_Znaj(i32 undef)
+			to label %invcont35 unwind label %lpad119		; <i8*> [#uses=0]
+
+invcont35:		; preds = %bb34
+	br i1 undef, label %bb49, label %bb61
+
+bb49:		; preds = %invcont35
+	invoke void (i8*, i8*, ...)* @_ZNK13cSimpleModule5errorEPKcz(i8* undef, i8* getelementptr ([92 x i8]* @.str32190, i32 0, i32 0))
+			to label %bb51 unwind label %lpad119
+
+bb51:		; preds = %bb49
+	unreachable
+
+bb61:		; preds = %invcont35
+	br label %bb106
+
+.noexc:		; preds = %bb106
+	invoke void @_ZN7cObjectC2EPKc(i8* undef, i8* getelementptr ([25 x i8]* @.str41, i32 0, i32 0))
+			to label %bb102 unwind label %lpad123
+
+bb102:		; preds = %.noexc
+	invoke void undef(i8* undef, i8 zeroext 1)
+			to label %invcont103 unwind label %lpad119
+
+invcont103:		; preds = %bb102
+	invoke void undef(i8* undef, double 1.000000e+07)
+			to label %invcont104 unwind label %lpad119
+
+invcont104:		; preds = %invcont103
+	%tmp3 = invoke i32 @_ZN13cSimpleModule11sendDelayedEP8cMessagedPKci(i8* undef, i8* undef, double 0.000000e+00, i8* getelementptr ([4 x i8]* @.str17175, i32 0, i32 0), i32 undef)
+			to label %invcont105 unwind label %lpad119		; <i32> [#uses=0]
+
+invcont105:		; preds = %invcont104
+	br label %bb106
+
+bb106:		; preds = %invcont105, %bb61
+	%tmp4 = invoke i8* @_Znaj(i32 124)
+			to label %.noexc unwind label %lpad119		; <i8*> [#uses=1]
+
+lpad:		; preds = %_ZN7cObjectnwEj.exit
+	br label %Unwind
+
+lpad119:		; preds = %bb106, %invcont104, %invcont103, %bb102, %bb49, %bb34, %bb12, %invcont10, %invcont9, %bb8
+	unreachable
+
+lpad123:		; preds = %.noexc
+	%tmp5 = icmp eq i8* %tmp4, null		; <i1> [#uses=1]
+	br i1 %tmp5, label %Unwind, label %bb.i2
+
+bb.i2:		; preds = %lpad123
+	br label %Unwind
+
+Unwind:		; preds = %bb.i2, %lpad123, %lpad
+	unreachable
+}
+
+declare void @_ZN8EtherBus8tokenizeEPKcRSt6vectorIdSaIdEE(i8* nocapture, i8*, i8*)
+
+declare i8* @_Znaj(i32)
+
+declare void @_ZN6cEnvir6printfEPKcz(i8* nocapture, i8* nocapture, ...)
+
+declare void @_ZNK13cSimpleModule5errorEPKcz(i8* nocapture, i8* nocapture, ...) noreturn
+
+declare i8* @_ZN7cModule3parEPKc(i8*, i8*)
+
+declare i32 @_ZN13cSimpleModule11sendDelayedEP8cMessagedPKci(i8*, i8*, double, i8*, i32)
+
+declare void @_ZN7cObjectC2EPKc(i8*, i8*)
+
+declare i8* @_ZN4cPar11stringValueEv(i8*)
diff --git a/final/test/Transforms/LCSSA/unreachable-use.ll b/final/test/Transforms/LCSSA/unreachable-use.ll
new file mode 100644
index 00000000000..c389c9cfa87
--- /dev/null
+++ b/final/test/Transforms/LCSSA/unreachable-use.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -lcssa -S -verify-loop-info | grep {\[%\]tmp33 = load i1\\*\\* \[%\]tmp}
+; PR6546
+
+; LCSSA doesn't need to transform uses in blocks not reachable
+; from the entry block.
+
+define fastcc void @dfs() nounwind {
+bb:
+  br label %bb44
+
+bb44:
+  br i1 undef, label %bb7, label %bb45
+
+bb7:
+  %tmp = bitcast i1** undef to i1**
+  br label %bb15
+
+bb15:
+  br label %bb44
+
+bb32:
+  %tmp33 = load i1** %tmp, align 8
+  br label %bb45
+
+bb45:
+  unreachable
+}
diff --git a/final/test/Transforms/LICM/2003-02-26-LoopExitNotDominated.ll b/final/test/Transforms/LICM/2003-02-26-LoopExitNotDominated.ll
new file mode 100644
index 00000000000..ff20312ec37
--- /dev/null
+++ b/final/test/Transforms/LICM/2003-02-26-LoopExitNotDominated.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -basicaa -licm -disable-output
+
+;%MoveArray = external global [64 x ulong]
+
+define void @InitMoveArray() {
+bb3:
+	%X = alloca [2 x i64]		; <[2 x i64]*> [#uses=1]
+	br i1 false, label %bb13, label %bb4
+bb4:		; preds = %bb3
+	%reg3011 = getelementptr [2 x i64]* %X, i64 0, i64 0		; <i64*> [#uses=1]
+	br label %bb8
+bb8:		; preds = %bb8, %bb4
+	store i64 0, i64* %reg3011
+	br i1 false, label %bb8, label %bb13
+bb13:		; preds = %bb8, %bb3
+	ret void
+}
+
diff --git a/final/test/Transforms/LICM/2003-02-27-NestedLoopExitBlocks.ll b/final/test/Transforms/LICM/2003-02-27-NestedLoopExitBlocks.ll
new file mode 100644
index 00000000000..4559e310179
--- /dev/null
+++ b/final/test/Transforms/LICM/2003-02-27-NestedLoopExitBlocks.ll
@@ -0,0 +1,17 @@
+; Exit blocks need to be updated for all nested loops...
+
+; RUN: opt < %s -loop-simplify
+
+define i32 @yyparse() {
+bb0:
+	br i1 false, label %UnifiedExitNode, label %bb19
+bb19:		; preds = %bb28, %bb0
+	br i1 false, label %bb28, label %UnifiedExitNode
+bb28:		; preds = %bb32, %bb19
+	br i1 false, label %bb32, label %bb19
+bb32:		; preds = %bb28
+	br i1 false, label %UnifiedExitNode, label %bb28
+UnifiedExitNode:		; preds = %bb32, %bb19, %bb0
+	ret i32 0
+}
+
diff --git a/final/test/Transforms/LICM/2003-02-27-PreheaderExitNodeUpdate.ll b/final/test/Transforms/LICM/2003-02-27-PreheaderExitNodeUpdate.ll
new file mode 100644
index 00000000000..2718cb10537
--- /dev/null
+++ b/final/test/Transforms/LICM/2003-02-27-PreheaderExitNodeUpdate.ll
@@ -0,0 +1,16 @@
+; This testcase fails because preheader insertion is not updating exit node 
+; information for loops.
+
+; RUN: opt < %s -licm
+
+define i32 @main(i32 %argc, i8** %argv) {
+bb0:
+	br i1 false, label %bb7, label %bb5
+bb5:		; preds = %bb5, %bb0
+	br i1 false, label %bb5, label %bb7
+bb7:		; preds = %bb7, %bb5, %bb0
+	br i1 false, label %bb7, label %bb10
+bb10:		; preds = %bb7
+	ret i32 0
+}
+
diff --git a/final/test/Transforms/LICM/2003-02-27-PreheaderProblem.ll b/final/test/Transforms/LICM/2003-02-27-PreheaderProblem.ll
new file mode 100644
index 00000000000..70a04c73b1d
--- /dev/null
+++ b/final/test/Transforms/LICM/2003-02-27-PreheaderProblem.ll
@@ -0,0 +1,24 @@
+; Here we have a case where there are two loops and LICM is hoisting an 
+; instruction from one loop into the other loop!  This is obviously bad and 
+; happens because preheader insertion doesn't insert a preheader for this
+; case... bad.
+
+; RUN: opt < %s -licm -loop-deletion -simplifycfg -S | \
+; RUN:   not grep {br }
+
+define i32 @main(i32 %argc) {
+; <label>:0
+	br label %bb5
+bb5:		; preds = %bb5, %0
+	%I = phi i32 [ 0, %0 ], [ %I2, %bb5 ]		; <i32> [#uses=1]
+	%I2 = add i32 %I, 1		; <i32> [#uses=2]
+	%c = icmp eq i32 %I2, 10		; <i1> [#uses=1]
+	br i1 %c, label %bb5, label %bb8
+bb8:		; preds = %bb8, %bb5
+	%cann-indvar = phi i32 [ 0, %bb8 ], [ 0, %bb5 ]		; <i32> [#uses=0]
+	%X = add i32 %argc, %argc		; <i32> [#uses=1]
+	br i1 false, label %bb8, label %bb10
+bb10:		; preds = %bb8
+	ret i32 %X
+}
+
diff --git a/final/test/Transforms/LICM/2003-02-27-StoreSinkPHIs.ll b/final/test/Transforms/LICM/2003-02-27-StoreSinkPHIs.ll
new file mode 100644
index 00000000000..a9c6b856f8e
--- /dev/null
+++ b/final/test/Transforms/LICM/2003-02-27-StoreSinkPHIs.ll
@@ -0,0 +1,15 @@
+; LICM is adding stores before phi nodes.  bad.
+
+; RUN: opt < %s -licm
+
+define i1 @test(i1 %c) {
+; <label>:0
+	br i1 %c, label %Loop, label %Out
+Loop:		; preds = %Loop, %0
+	store i32 0, i32* null
+	br i1 %c, label %Loop, label %Out
+Out:		; preds = %Loop, %0
+	%X = phi i1 [ %c, %0 ], [ true, %Loop ]		; <i1> [#uses=1]
+	ret i1 %X
+}
+
diff --git a/final/test/Transforms/LICM/2003-02-28-PromoteDifferentType.ll b/final/test/Transforms/LICM/2003-02-28-PromoteDifferentType.ll
new file mode 100644
index 00000000000..c759e6eff8e
--- /dev/null
+++ b/final/test/Transforms/LICM/2003-02-28-PromoteDifferentType.ll
@@ -0,0 +1,15 @@
+; Test that hoisting is disabled for pointers of different types...
+;
+; RUN: opt < %s -licm
+
+define void @test(i32* %P) {
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	store i32 5, i32* %P
+	%P2 = bitcast i32* %P to i8*		; <i8*> [#uses=1]
+	store i8 4, i8* %P2
+	br i1 true, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
+
diff --git a/final/test/Transforms/LICM/2003-05-02-LoadHoist.ll b/final/test/Transforms/LICM/2003-05-02-LoadHoist.ll
new file mode 100644
index 00000000000..71d3e789aa0
--- /dev/null
+++ b/final/test/Transforms/LICM/2003-05-02-LoadHoist.ll
@@ -0,0 +1,23 @@
+; This testcase tests for a problem where LICM hoists loads out of a loop 
+; despite the fact that calls to unknown functions may modify what is being 
+; loaded from.  Basically if the load gets hoisted, the subtract gets turned
+; into a constant zero.
+;
+; RUN: opt < %s -licm -gvn -instcombine -S | grep load
+
+@X = global i32 7		; <i32*> [#uses=2]
+
+declare void @foo()
+
+define i32 @test(i1 %c) {
+	%A = load i32* @X		; <i32> [#uses=1]
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	call void @foo( )
+        ;; Should not hoist this load!
+	%B = load i32* @X		; <i32> [#uses=1]
+	br i1 %c, label %Loop, label %Out
+Out:		; preds = %Loop
+	%C = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %C
+}
diff --git a/final/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll b/final/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
new file mode 100644
index 00000000000..67c3951d74e
--- /dev/null
+++ b/final/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -licm | lli
+
+define i32 @main() {
+entry:
+	br label %Loop
+Loop:		; preds = %LoopCont, %entry
+	br i1 true, label %LoopCont, label %Out
+LoopCont:		; preds = %Loop
+	%X = add i32 1, 0		; <i32> [#uses=1]
+	br i1 true, label %Out, label %Loop
+Out:		; preds = %LoopCont, %Loop
+	%V = phi i32 [ 2, %Loop ], [ %X, %LoopCont ]		; <i32> [#uses=1]
+	%V2 = sub i32 %V, 1		; <i32> [#uses=1]
+	ret i32 %V2
+}
+
diff --git a/final/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll b/final/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
new file mode 100644
index 00000000000..16f4fed34ec
--- /dev/null
+++ b/final/test/Transforms/LICM/2004-09-14-AliasAnalysisInvalidate.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -globalsmodref-aa -licm -disable-output
+
+@PL_regcomp_parse = internal global i8* null		; <i8**> [#uses=2]
+
+define void @test() {
+	br label %Outer
+Outer:		; preds = %Next, %0
+	br label %Inner
+Inner:		; preds = %Inner, %Outer
+	%tmp.114.i.i.i = load i8** @PL_regcomp_parse		; <i8*> [#uses=1]
+	%tmp.115.i.i.i = load i8* %tmp.114.i.i.i		; <i8> [#uses=0]
+	store i8* null, i8** @PL_regcomp_parse
+	br i1 false, label %Inner, label %Next
+Next:		; preds = %Inner
+	br i1 false, label %Outer, label %Exit
+Exit:		; preds = %Next
+	ret void
+}
+
diff --git a/final/test/Transforms/LICM/2004-11-17-UndefIndexCrash.ll b/final/test/Transforms/LICM/2004-11-17-UndefIndexCrash.ll
new file mode 100644
index 00000000000..a119865f051
--- /dev/null
+++ b/final/test/Transforms/LICM/2004-11-17-UndefIndexCrash.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -licm -disable-output
+	%struct.roadlet = type { i8*, %struct.vehicle*, [8 x %struct.roadlet*], [8 x %struct.roadlet* (%struct.roadlet*, %struct.vehicle*, i32)*] }
+	%struct.vehicle = type { %struct.roadlet*, i8*, i32, i32, %union.._631., i32 }
+	%union.._631. = type { i32 }
+
+declare %struct.roadlet* @_Z11return_nullP7roadletP7vehicle9direction(%struct.roadlet*, %struct.vehicle*, i32)
+
+declare %struct.roadlet* @_Z14lane_switch_okP7roadletP7vehicle9direction(%struct.roadlet*, %struct.vehicle*, i32)
+
+define void @main() {
+__main.entry:
+	br label %invoke_cont.3
+invoke_cont.3:		; preds = %invoke_cont.3, %__main.entry
+	%tmp.34.i.i502.7 = getelementptr %struct.roadlet* null, i32 0, i32 3, i32 7		; <%struct.roadlet* (%struct.roadlet*, %struct.vehicle*, i32)**> [#uses=1]
+	store %struct.roadlet* (%struct.roadlet*, %struct.vehicle*, i32)* @_Z11return_nullP7roadletP7vehicle9direction, %struct.roadlet* (%struct.roadlet*, %struct.vehicle*, i32)** %tmp.34.i.i502.7
+	store %struct.roadlet* (%struct.roadlet*, %struct.vehicle*, i32)* @_Z14lane_switch_okP7roadletP7vehicle9direction, %struct.roadlet* (%struct.roadlet*, %struct.vehicle*, i32)** null
+	%tmp.4.i.i339 = getelementptr %struct.roadlet* null, i32 0, i32 3, i32 undef		; <%struct.roadlet* (%struct.roadlet*, %struct.vehicle*, i32)**> [#uses=1]
+	store %struct.roadlet* (%struct.roadlet*, %struct.vehicle*, i32)* @_Z11return_nullP7roadletP7vehicle9direction, %struct.roadlet* (%struct.roadlet*, %struct.vehicle*, i32)** %tmp.4.i.i339
+	br label %invoke_cont.3
+}
diff --git a/final/test/Transforms/LICM/2005-03-24-LICM-Aggregate-Crash.ll b/final/test/Transforms/LICM/2005-03-24-LICM-Aggregate-Crash.ll
new file mode 100644
index 00000000000..91740cf0eb7
--- /dev/null
+++ b/final/test/Transforms/LICM/2005-03-24-LICM-Aggregate-Crash.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -licm -disable-output
+
+define void @test({ i32 }* %P) {
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	free { i32 }* %P
+	br label %Loop
+}
+
diff --git a/final/test/Transforms/LICM/2006-09-12-DeadUserOfSunkInstr.ll b/final/test/Transforms/LICM/2006-09-12-DeadUserOfSunkInstr.ll
new file mode 100644
index 00000000000..9763660ffb0
--- /dev/null
+++ b/final/test/Transforms/LICM/2006-09-12-DeadUserOfSunkInstr.ll
@@ -0,0 +1,148 @@
+; RUN: opt < %s -licm -disable-output
+; PR908
+; END.
+
+	%struct.alloc_chain = type { i8*, %struct.alloc_chain* }
+	%struct.oggpack_buffer = type { i32, i32, i8*, i8*, i32 }
+	%struct.vorbis_block = type { float**, %struct.oggpack_buffer, i32, i32, i32, i32, i32, i32, i64, i64, %struct.vorbis_dsp_state*, i8*, i32, i32, i32, %struct.alloc_chain*, i32, i32, i32, i32, i8* }
+	%struct.vorbis_dsp_state = type { i32, %struct.vorbis_info*, float**, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
+	%struct.vorbis_info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+
+define fastcc void @_01forward() {
+entry:
+	br i1 false, label %bb222.preheader, label %bb241
+cond_true67:		; preds = %cond_true87
+	br label %cond_next80
+cond_next80:		; preds = %cond_true87, %cond_true67
+	br label %bb83
+bb83.preheader:		; preds = %cond_true226
+	br i1 false, label %bb83.us.preheader, label %bb83.preheader1
+bb83.us.preheader:		; preds = %bb83.preheader
+	br label %bb83.us
+bb83.us:		; preds = %cond_next80.us, %bb83.us.preheader
+	br i1 false, label %cond_true87.us, label %cond_next92.loopexit2
+cond_next80.us:		; preds = %bb59.loopexit.us, %cond_true67.us
+	br label %bb83.us
+cond_true67.us:		; preds = %bb59.loopexit.us
+	br label %cond_next80.us
+cond_next.us:		; preds = %cond_true56.us, %cond_true38.us
+	br i1 false, label %cond_true56.us, label %bb59.loopexit.us
+cond_true38.us:		; preds = %cond_true56.us
+	br label %cond_next.us
+cond_true56.us:		; preds = %cond_true87.us, %cond_next.us
+	br i1 false, label %cond_true38.us, label %cond_next.us
+cond_true87.us:		; preds = %bb83.us
+	br label %cond_true56.us
+bb59.loopexit.us:		; preds = %cond_next.us
+	br i1 false, label %cond_true67.us, label %cond_next80.us
+bb83.preheader1:		; preds = %bb83.preheader
+	br label %bb83
+bb83:		; preds = %bb83.preheader1, %cond_next80
+	br i1 false, label %cond_next92.loopexit, label %cond_true87
+cond_true87:		; preds = %bb83
+	br i1 false, label %cond_true67, label %cond_next80
+cond_next92.loopexit:		; preds = %bb83
+	br label %cond_next92
+cond_next92.loopexit2:		; preds = %bb83.us
+	br label %cond_next92
+cond_next92:		; preds = %cond_true226, %cond_next92.loopexit2, %cond_next92.loopexit
+	br i1 false, label %cond_true218.loopexit, label %bb222
+cond_true139:		; preds = %cond_true202
+	br i1 false, label %cond_next195, label %cond_true155
+cond_true155:		; preds = %cond_true139
+	br i1 false, label %cond_true249.i.preheader, label %_encodepart.exit
+cond_true.i:		; preds = %cond_true115.i
+	br i1 false, label %bb60.i.preheader, label %cond_next97.i
+bb60.i.preheader:		; preds = %cond_true.i
+	br label %bb60.i
+bb60.i:		; preds = %cond_true63.i, %bb60.i.preheader
+	br i1 false, label %cond_true63.i, label %cond_next97.i.loopexit
+cond_true63.i:		; preds = %bb60.i
+	br i1 false, label %bb60.i, label %cond_next97.i.loopexit
+bb86.i.preheader:		; preds = %cond_true115.i
+	br label %bb86.i
+bb86.i:		; preds = %cond_true93.i, %bb86.i.preheader
+	br i1 false, label %cond_true93.i, label %cond_next97.i.loopexit3
+cond_true93.i:		; preds = %bb86.i
+	br i1 false, label %cond_next97.i.loopexit3, label %bb86.i
+cond_next97.i.loopexit:		; preds = %cond_true63.i, %bb60.i
+	br label %cond_next97.i
+cond_next97.i.loopexit3:		; preds = %cond_true93.i, %bb86.i
+	br label %cond_next97.i
+cond_next97.i:		; preds = %cond_next97.i.loopexit3, %cond_next97.i.loopexit, %cond_true.i
+	br i1 false, label %bb118.i.loopexit, label %cond_true115.i
+cond_true115.i.preheader:		; preds = %cond_true249.i
+	br label %cond_true115.i
+cond_true115.i:		; preds = %cond_true115.i.preheader, %cond_next97.i
+	br i1 false, label %cond_true.i, label %bb86.i.preheader
+bb118.i.loopexit:		; preds = %cond_next97.i
+	br label %bb118.i
+bb118.i:		; preds = %cond_true249.i, %bb118.i.loopexit
+	br i1 false, label %cond_next204.i, label %cond_true128.i
+cond_true128.i:		; preds = %bb118.i
+	br i1 false, label %cond_true199.i.preheader, label %cond_next204.i
+cond_true199.i.preheader:		; preds = %cond_true128.i
+	br label %cond_true199.i
+cond_true199.i.us:		; No predecessors!
+	br i1 false, label %cond_true167.i.us, label %cond_next187.i.us
+cond_next187.i.us:		; preds = %bb170.i.loopexit.us, %bb170.i.us.cond_next187.i.us_crit_edge, %cond_true199.i.us
+	unreachable
+bb170.i.us.cond_next187.i.us_crit_edge:		; preds = %bb170.i.loopexit.us
+	br label %cond_next187.i.us
+cond_true167.i.us:		; preds = %cond_true167.i.us, %cond_true199.i.us
+	br i1 false, label %cond_true167.i.us, label %bb170.i.loopexit.us
+bb170.i.loopexit.us:		; preds = %cond_true167.i.us
+	br i1 false, label %cond_next187.i.us, label %bb170.i.us.cond_next187.i.us_crit_edge
+cond_true199.i:		; preds = %cond_true199.i, %cond_true199.i.preheader
+	br i1 false, label %cond_next204.i.loopexit, label %cond_true199.i
+cond_next204.i.loopexit:		; preds = %cond_true199.i
+	br label %cond_next204.i
+cond_next204.i:		; preds = %cond_next204.i.loopexit, %cond_true128.i, %bb118.i
+	br label %bb233.i
+cond_true230.i:		; No predecessors!
+	%exitcond155 = icmp eq i32 0, %tmp16.i		; <i1> [#uses=0]
+	unreachable
+bb233.i:		; preds = %cond_next204.i
+	br i1 false, label %_encodepart.exit.loopexit, label %cond_true249.i
+cond_true249.i.preheader:		; preds = %cond_true155
+	br label %cond_true249.i
+cond_true249.i:		; preds = %cond_true249.i.preheader, %bb233.i
+	%tmp16.i = bitcast i32 0 to i32		; <i32> [#uses=1]
+	br i1 false, label %cond_true115.i.preheader, label %bb118.i
+_encodepart.exit.loopexit:		; preds = %bb233.i
+	br label %_encodepart.exit
+_encodepart.exit:		; preds = %_encodepart.exit.loopexit, %cond_true155
+	br label %cond_next195
+cond_next195:		; preds = %cond_true202, %_encodepart.exit, %cond_true139
+	br i1 false, label %bb205.loopexit, label %cond_true202
+cond_true202.preheader:		; preds = %cond_true218
+	br label %cond_true202
+cond_true202:		; preds = %cond_true202.preheader, %cond_next195
+	br i1 false, label %cond_next195, label %cond_true139
+bb205.loopexit:		; preds = %cond_next195
+	br label %bb205
+bb205:		; preds = %cond_true218, %bb205.loopexit
+	br i1 false, label %cond_true218, label %bb222.outer105.loopexit
+cond_true218.loopexit:		; preds = %cond_next92
+	br label %cond_true218
+cond_true218:		; preds = %cond_true218.loopexit, %bb205
+	br i1 false, label %cond_true202.preheader, label %bb205
+bb222.preheader:		; preds = %entry
+	br label %bb222.outer
+bb222.outer:		; preds = %bb229, %bb222.preheader
+	br label %bb222.outer105
+bb222.outer105.loopexit:		; preds = %bb205
+	br label %bb222.outer105
+bb222.outer105:		; preds = %bb222.outer105.loopexit, %bb222.outer
+	br label %bb222
+bb222:		; preds = %bb222.outer105, %cond_next92
+	br i1 false, label %cond_true226, label %bb229
+cond_true226:		; preds = %bb222
+	br i1 false, label %bb83.preheader, label %cond_next92
+bb229:		; preds = %bb222
+	br i1 false, label %bb222.outer, label %bb241.loopexit
+bb241.loopexit:		; preds = %bb229
+	br label %bb241
+bb241:		; preds = %bb241.loopexit, %entry
+	ret void
+}
diff --git a/final/test/Transforms/LICM/2007-05-22-VolatileSink.ll b/final/test/Transforms/LICM/2007-05-22-VolatileSink.ll
new file mode 100644
index 00000000000..c12e13beccf
--- /dev/null
+++ b/final/test/Transforms/LICM/2007-05-22-VolatileSink.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -licm -S | grep {volatile store}
+; PR1435
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+
+define void @Transpose(i32* %DataIn, i32* %DataOut) {
+entry:
+	%buffer = alloca [64 x i32], align 16		; <[64 x i32]*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	br label %bb6
+
+bb:		; preds = %bb6
+	%tmp2 = volatile load i32* %DataIn		; <i32> [#uses=1]
+	%tmp3 = getelementptr [64 x i32]* %buffer, i32 0, i32 %i.0		; <i32*> [#uses=1]
+	store i32 %tmp2, i32* %tmp3
+	%tmp5 = add i32 %i.0, 1		; <i32> [#uses=1]
+	br label %bb6
+
+bb6:		; preds = %bb, %entry
+	%i.0 = phi i32 [ 0, %entry ], [ %tmp5, %bb ]		; <i32> [#uses=3]
+	%tmp8 = icmp sle i32 %i.0, 63		; <i1> [#uses=1]
+	%tmp89 = zext i1 %tmp8 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp89, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb30
+
+bb12:		; preds = %bb22
+	%tmp14 = mul i32 %j.1, 8		; <i32> [#uses=1]
+	%tmp16 = add i32 %tmp14, %i.1		; <i32> [#uses=1]
+	%tmp17 = getelementptr [64 x i32]* %buffer, i32 0, i32 %tmp16		; <i32*> [#uses=1]
+	%tmp18 = load i32* %tmp17		; <i32> [#uses=1]
+	volatile store i32 %tmp18, i32* %DataOut
+	%tmp21 = add i32 %j.1, 1		; <i32> [#uses=1]
+	br label %bb22
+
+bb22:		; preds = %bb30, %bb12
+	%j.1 = phi i32 [ %tmp21, %bb12 ], [ 0, %bb30 ]		; <i32> [#uses=4]
+	%tmp24 = icmp sle i32 %j.1, 7		; <i1> [#uses=1]
+	%tmp2425 = zext i1 %tmp24 to i8		; <i8> [#uses=1]
+	%toBool26 = icmp ne i8 %tmp2425, 0		; <i1> [#uses=1]
+	br i1 %toBool26, label %bb12, label %bb27
+
+bb27:		; preds = %bb22
+	%tmp29 = add i32 %i.1, 1		; <i32> [#uses=1]
+	br label %bb30
+
+bb30:		; preds = %bb27, %bb6
+	%j.0 = phi i32 [ %j.1, %bb27 ], [ undef, %bb6 ]		; <i32> [#uses=0]
+	%i.1 = phi i32 [ %tmp29, %bb27 ], [ 0, %bb6 ]		; <i32> [#uses=3]
+	%tmp32 = icmp sle i32 %i.1, 7		; <i1> [#uses=1]
+	%tmp3233 = zext i1 %tmp32 to i8		; <i8> [#uses=1]
+	%toBool34 = icmp ne i8 %tmp3233, 0		; <i1> [#uses=1]
+	br i1 %toBool34, label %bb22, label %return
+
+return:		; preds = %bb30
+	ret void
+}
diff --git a/final/test/Transforms/LICM/2007-07-30-AliasSet.ll b/final/test/Transforms/LICM/2007-07-30-AliasSet.ll
new file mode 100644
index 00000000000..8ecd1bcee48
--- /dev/null
+++ b/final/test/Transforms/LICM/2007-07-30-AliasSet.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -licm -loop-unswitch -disable-output
+	%struct.III_scalefac_t = type { [22 x i32], [13 x [3 x i32]] }
+	%struct.gr_info = type { i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, [4 x i32] }
+
+define i32 @scale_bitcount_lsf(%struct.III_scalefac_t* %scalefac, %struct.gr_info* %cod_info) {
+entry:
+	br i1 false, label %bb28, label %bb133.preheader
+
+bb133.preheader:		; preds = %entry
+	ret i32 0
+
+bb28:		; preds = %entry
+	br i1 false, label %bb63.outer, label %bb79
+
+bb63.outer:		; preds = %bb73, %bb28
+	br i1 false, label %bb35, label %bb73
+
+bb35:		; preds = %cond_next60, %bb63.outer
+	%window.34 = phi i32 [ %tmp62, %cond_next60 ], [ 0, %bb63.outer ]		; <i32> [#uses=1]
+	%tmp44 = getelementptr [4 x i32]* null, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp46 = load i32* %tmp44, align 4		; <i32> [#uses=0]
+	br i1 false, label %cond_true50, label %cond_next60
+
+cond_true50:		; preds = %bb35
+	%tmp59 = getelementptr [4 x i32]* null, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp59, align 4
+	br label %cond_next60
+
+cond_next60:		; preds = %cond_true50, %bb35
+	%tmp62 = add i32 %window.34, 1		; <i32> [#uses=1]
+	br i1 false, label %bb35, label %bb73
+
+bb73:		; preds = %cond_next60, %bb63.outer
+	%tmp76 = icmp slt i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp76, label %bb63.outer, label %bb79
+
+bb79:		; preds = %bb73, %bb28
+	ret i32 0
+}
diff --git a/final/test/Transforms/LICM/2007-09-17-PromoteValue.ll b/final/test/Transforms/LICM/2007-09-17-PromoteValue.ll
new file mode 100644
index 00000000000..31abd8c180a
--- /dev/null
+++ b/final/test/Transforms/LICM/2007-09-17-PromoteValue.ll
@@ -0,0 +1,61 @@
+; ModuleID = 'PR1657.bc'
+; Do not promote getelementptr because it may exposes load from a null pointer 
+; and store from a null pointer  which are covered by 
+; icmp eq %struct.decision* null, null condition.
+; RUN: opt < %s -licm -S | not grep promoted
+	%struct.decision = type { i8, %struct.decision* }
+
+define i32 @main() {
+entry:
+	br label %blah.i
+
+blah.i:		; preds = %cond_true.i, %entry
+	%tmp3.i = icmp eq %struct.decision* null, null		; <i1> [#uses=1]
+	br i1 %tmp3.i, label %clear_modes.exit, label %cond_true.i
+
+cond_true.i:		; preds = %blah.i
+	%tmp1.i = getelementptr %struct.decision* null, i32 0, i32 0		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp1.i
+	br label %blah.i
+
+clear_modes.exit:		; preds = %blah.i
+	call void @exit( i32 0 )
+	unreachable
+}
+
+define i32 @f(i8* %ptr) {
+entry:
+        br label %loop.head
+
+loop.head:              ; preds = %cond.true, %entry
+        %x = phi i8* [ %ptr, %entry ], [ %ptr.i, %cond.true ]           ; <i8*> [#uses=1]
+        %tmp3.i = icmp ne i8* %ptr, %x          ; <i1> [#uses=1]
+        br i1 %tmp3.i, label %cond.true, label %exit
+
+cond.true:              ; preds = %loop.head
+        %ptr.i = getelementptr i8* %ptr, i32 0          ; <i8*> [#uses=2]
+        store i8 0, i8* %ptr.i
+        br label %loop.head
+
+exit:           ; preds = %loop.head
+        ret i32 0
+}
+
+define i32 @f2(i8* %p, i8* %q) {
+entry:
+        br label %loop.head
+
+loop.head:              ; preds = %cond.true, %entry
+        %tmp3.i = icmp eq i8* null, %q            ; <i1> [#uses=1]
+        br i1 %tmp3.i, label %exit, label %cond.true
+
+cond.true:              ; preds = %loop.head
+        %ptr.i = getelementptr i8* %p, i32 0          ; <i8*> [#uses=2]
+        store i8 0, i8* %ptr.i
+        br label %loop.head
+
+exit:           ; preds = %loop.head
+        ret i32 0
+}
+
+declare void @exit(i32)
diff --git a/final/test/Transforms/LICM/2007-09-24-PromoteNullValue.ll b/final/test/Transforms/LICM/2007-09-24-PromoteNullValue.ll
new file mode 100644
index 00000000000..916f479cba1
--- /dev/null
+++ b/final/test/Transforms/LICM/2007-09-24-PromoteNullValue.ll
@@ -0,0 +1,46 @@
+; Do not promote null value because it may be unsafe to do so.
+; RUN: opt < %s -licm -S | not grep promoted
+
+define i32 @f(i32 %foo, i32 %bar, i32 %com) {
+entry:
+	%tmp2 = icmp eq i32 %foo, 0		; <i1> [#uses=1]
+	br i1 %tmp2, label %cond_next, label %cond_true
+
+cond_true:		; preds = %entry
+	br label %return
+
+cond_next:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb15, %cond_next
+	switch i32 %bar, label %bb15 [
+		 i32 1, label %bb6
+	]
+
+bb6:		; preds = %bb
+	%tmp8 = icmp eq i32 %com, 0		; <i1> [#uses=1]
+	br i1 %tmp8, label %cond_next14, label %cond_true11
+
+cond_true11:		; preds = %bb6
+	br label %return
+
+cond_next14:		; preds = %bb6
+	store i8 0, i8* null
+	br label %bb15
+
+bb15:		; preds = %cond_next14, %bb
+	br label %bb
+
+return:		; preds = %cond_true11, %cond_true
+	%storemerge = phi i32 [ 0, %cond_true ], [ undef, %cond_true11 ]		; <i32> [#uses=1]
+	ret i32 %storemerge
+}
+
+define i32 @kdMain() {
+entry:
+	%tmp1 = call i32 @f( i32 0, i32 1, i32 1 )		; <i32> [#uses=0]
+	call void @exit( i32 0 )
+	unreachable
+}
+
+declare void @exit(i32)
diff --git a/final/test/Transforms/LICM/2007-10-01-PromoteSafeValue.ll b/final/test/Transforms/LICM/2007-10-01-PromoteSafeValue.ll
new file mode 100644
index 00000000000..59f1dcbe2d7
--- /dev/null
+++ b/final/test/Transforms/LICM/2007-10-01-PromoteSafeValue.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -licm -S | grep promoted
+; Promote value if at least one use is safe
+
+
+define i32 @f2(i32* %p, i8* %q) {
+entry:
+        br label %loop.head
+
+loop.head:              ; preds = %cond.true, %entry
+        store i32 20, i32* %p
+        %tmp3.i = icmp eq i8* null, %q            ; <i1> [#uses=1]
+        br i1 %tmp3.i, label %exit, label %cond.true
+        
+cond.true:              ; preds = %loop.head
+        store i32 40, i32* %p
+        br label %loop.head
+
+exit:           ; preds = %loop.head
+        ret i32 0
+}
+
diff --git a/final/test/Transforms/LICM/2008-05-20-AliasSetVAArg.ll b/final/test/Transforms/LICM/2008-05-20-AliasSetVAArg.ll
new file mode 100644
index 00000000000..a5a7bf85f10
--- /dev/null
+++ b/final/test/Transforms/LICM/2008-05-20-AliasSetVAArg.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -licm -disable-output
+; PR2346
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-pc-linux-gnu"
+	%struct._zval_struct = type { %union._double, i32, i8, i8, i8, i8 }
+	%union._double = type { double }
+
+define i8* @zend_fetch_resource(%struct._zval_struct** %passed_id, i32 %default_id, i8* %resource_type_name, i32* %found_resource_type, i32 %num_resource_types, ...) {
+entry:
+	br label %whilebody.i.i
+
+whilebody.i.i:		; preds = %whilebody.i.i, %entry
+	br i1 false, label %ifthen.i.i, label %whilebody.i.i
+
+ifthen.i.i:		; preds = %whilebody.i.i
+	br label %forcond
+
+forcond:		; preds = %forbody, %ifthen.i.i
+	br i1 false, label %forbody, label %afterfor
+
+forbody:		; preds = %forcond
+	va_arg i8** null, i32		; <i32>:0 [#uses=0]
+	br i1 false, label %ifthen59, label %forcond
+
+ifthen59:		; preds = %forbody
+	unreachable
+
+afterfor:		; preds = %forcond
+	ret i8* null
+}
diff --git a/final/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll b/final/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll
new file mode 100644
index 00000000000..d4df26e67ab
--- /dev/null
+++ b/final/test/Transforms/LICM/2008-07-22-LoadGlobalConstant.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+
+@a = external constant float*
+
+define void @test(i32 %count) {
+entry:
+        br label %forcond
+
+; CHECK:  %tmp3 = load float** @a
+; CHECK:  br label %forcond
+
+forcond:
+        %i.0 = phi i32 [ 0, %entry ], [ %inc, %forbody ]
+        %cmp = icmp ult i32 %i.0, %count
+        br i1 %cmp, label %forbody, label %afterfor
+
+; CHECK:  %i.0 = phi i32 [ 0, %entry ], [ %inc, %forbody ]
+; CHECK:  %cmp = icmp ult i32 %i.0, %count
+; CHECK:  br i1 %cmp, label %forbody, label %afterfor
+
+forbody:
+        %tmp3 = load float** @a
+        %arrayidx = getelementptr float* %tmp3, i32 %i.0
+        %tmp7 = uitofp i32 %i.0 to float
+        store float %tmp7, float* %arrayidx
+        %inc = add i32 %i.0, 1
+        br label %forcond
+
+; CHECK:  %arrayidx = getelementptr float* %tmp3, i32 %i.0
+; CHECK:  %tmp7 = uitofp i32 %i.0 to float
+; CHECK:  store float %tmp7, float* %arrayidx
+; CHECK:  %inc = add i32 %i.0, 1
+; CHECK:  br label %forcond
+
+afterfor:
+        ret void
+}
+
+; CHECK:  ret void
diff --git a/final/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll b/final/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll
new file mode 100644
index 00000000000..e3cdbb3d2a4
--- /dev/null
+++ b/final/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll
@@ -0,0 +1,21 @@
+; Test for rdar://7452967
+; RUN: opt < %s -licm -disable-output
+define void @foo (i8* %v)
+{
+  entry:
+    br i1 undef, label %preheader, label %return
+
+  preheader:
+    br i1 undef, label %loop, label %return
+
+  loop:
+    indirectbr i8* undef, [label %preheader, label %stuff]
+
+  stuff:
+    %0 = load i8* undef, align 1
+    br label %loop
+
+  return:
+    ret void
+
+}
diff --git a/final/test/Transforms/LICM/Preserve-LCSSA.ll b/final/test/Transforms/LICM/Preserve-LCSSA.ll
new file mode 100644
index 00000000000..832d7627071
--- /dev/null
+++ b/final/test/Transforms/LICM/Preserve-LCSSA.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -loop-rotate -licm -loop-unswitch -disable-output -verify-loop-info -verify-dom-info
+
+define i32 @stringSearch_Clib(i32 %count) {
+entry:
+	br i1 false, label %bb36, label %bb44
+
+bb4:		; preds = %bb36
+	br i1 false, label %cond_next, label %cond_true
+
+cond_true:		; preds = %bb4
+	ret i32 0
+
+cond_next:		; preds = %bb4
+	ret i32 0
+
+bb36:		; preds = %bb41, %entry
+	br i1 false, label %bb4, label %bb41
+
+bb41:		; preds = %bb36
+	%ttmp2 = icmp slt i32 0, %count		; <i1> [#uses=1]
+	br i1 %ttmp2, label %bb36, label %bb44
+
+bb44:		; preds = %bb41, %entry
+	ret i32 0
+}
diff --git a/final/test/Transforms/LICM/basictest.ll b/final/test/Transforms/LICM/basictest.ll
new file mode 100644
index 00000000000..1dbb4dc6b49
--- /dev/null
+++ b/final/test/Transforms/LICM/basictest.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -licm | llvm-dis
+
+define void @testfunc(i32 %i) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%j = phi i32 [ 0, %0 ], [ %Next, %Loop ]		; <i32> [#uses=1]
+	%i2 = mul i32 %i, 17		; <i32> [#uses=1]
+	%Next = add i32 %j, %i2		; <i32> [#uses=2]
+	%cond = icmp eq i32 %Next, 0		; <i1> [#uses=1]
+	br i1 %cond, label %Out, label %Loop
+Out:		; preds = %Loop
+	ret void
+}
+
diff --git a/final/test/Transforms/LICM/crash.ll b/final/test/Transforms/LICM/crash.ll
new file mode 100644
index 00000000000..ff7fa0b19a8
--- /dev/null
+++ b/final/test/Transforms/LICM/crash.ll
@@ -0,0 +1,74 @@
+; RUN: opt -licm %s -disable-output
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+
+; PR8068
+@g_12 = external global i8, align 1
+define void @test1() nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond, %bb.nph
+  store i8 0, i8* @g_12, align 1
+  %tmp6 = load i8* @g_12, align 1
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body
+  store i8 %tmp6, i8* @g_12, align 1
+  br i1 false, label %for.cond.for.end10_crit_edge, label %for.body
+
+for.cond.for.end10_crit_edge:                     ; preds = %for.cond
+  br label %for.end10
+
+for.end10:                                        ; preds = %for.cond.for.end10_crit_edge, %entry
+  ret void
+}
+
+; PR8067
+@g_8 = external global i32, align 4
+
+define void @test2() noreturn nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %tmp7 = load i32* @g_8, align 4
+  store i32* @g_8, i32** undef, align 16
+  store i32 undef, i32* @g_8, align 4
+  br label %for.body
+}
+
+; PR8102
+define void @test3() {
+entry:
+  %__first = alloca { i32* }
+  br i1 undef, label %for.cond, label %for.end
+
+for.cond:                                         ; preds = %for.cond, %entry
+  %tmp1 = getelementptr { i32*}* %__first, i32 0, i32 0
+  %tmp2 = load i32** %tmp1, align 4
+  %call = tail call i32* @test3helper(i32* %tmp2)
+  %tmp3 = getelementptr { i32*}* %__first, i32 0, i32 0
+  store i32* %call, i32** %tmp3, align 4
+  br i1 false, label %for.cond, label %for.end
+
+for.end:                                          ; preds = %for.cond, %entry
+  ret void
+}
+
+declare i32* @test3helper(i32*)
+
+
+; PR8602
+@g_47 = external global i32, align 4
+
+define void @test4() noreturn nounwind {
+  br label %1
+
+; <label>:1                                       ; preds = %1, %0
+  volatile store i32* @g_47, i32** undef, align 8
+  store i32 undef, i32* @g_47, align 4
+  br label %1
+}
diff --git a/final/test/Transforms/LICM/dg.exp b/final/test/Transforms/LICM/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/LICM/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/LICM/hoisting.ll b/final/test/Transforms/LICM/hoisting.ll
new file mode 100644
index 00000000000..6f28d53af66
--- /dev/null
+++ b/final/test/Transforms/LICM/hoisting.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -licm -S | FileCheck %s
+
+@X = global i32 0		; <i32*> [#uses=1]
+
+declare void @foo()
+
+; This testcase tests for a problem where LICM hoists 
+; potentially trapping instructions when they are not guaranteed to execute.
+define i32 @test1(i1 %c) {
+; CHECK: @test1
+	%A = load i32* @X		; <i32> [#uses=2]
+	br label %Loop
+Loop:		; preds = %LoopTail, %0
+	call void @foo( )
+	br i1 %c, label %LoopTail, label %IfUnEqual
+        
+IfUnEqual:		; preds = %Loop
+; CHECK: IfUnEqual:
+; CHECK-NEXT: sdiv i32 4, %A
+	%B1 = sdiv i32 4, %A		; <i32> [#uses=1]
+	br label %LoopTail
+        
+LoopTail:		; preds = %IfUnEqual, %Loop
+	%B = phi i32 [ 0, %Loop ], [ %B1, %IfUnEqual ]		; <i32> [#uses=1]
+	br i1 %c, label %Loop, label %Out
+Out:		; preds = %LoopTail
+	%C = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %C
+}
+
+
+declare void @foo2(i32)
+
+
+;; It is ok and desirable to hoist this potentially trapping instruction.
+define i32 @test2(i1 %c) {
+; CHECK: @test2
+; CHECK-NEXT: load i32* @X
+; CHECK-NEXT: %B = sdiv i32 4, %A
+	%A = load i32* @X		; <i32> [#uses=2]
+	br label %Loop
+Loop:
+        ;; Should have hoisted this div!
+	%B = sdiv i32 4, %A		; <i32> [#uses=2]
+	call void @foo2( i32 %B )
+	br i1 %c, label %Loop, label %Out
+Out:		; preds = %Loop
+	%C = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %C
+}
+
+
+; This loop invariant instruction should be constant folded, not hoisted.
+define i32 @test3(i1 %c) {
+; CHECK: define i32 @test3
+; CHECK: call void @foo2(i32 6)
+	%A = load i32* @X		; <i32> [#uses=2]
+	br label %Loop
+Loop:
+	%B = add i32 4, 2		; <i32> [#uses=2]
+	call void @foo2( i32 %B )
+	br i1 %c, label %Loop, label %Out
+Out:		; preds = %Loop
+	%C = sub i32 %A, %B		; <i32> [#uses=1]
+	ret i32 %C
+}
diff --git a/final/test/Transforms/LICM/no-preheader-test.ll b/final/test/Transforms/LICM/no-preheader-test.ll
new file mode 100644
index 00000000000..bd3eea38ef3
--- /dev/null
+++ b/final/test/Transforms/LICM/no-preheader-test.ll
@@ -0,0 +1,20 @@
+; Test that LICM works when there is not a loop-preheader
+; RUN: opt < %s -licm | llvm-dis
+
+define void @testfunc(i32 %i.s, i1 %ifcond) {
+	br i1 %ifcond, label %Then, label %Else
+Then:		; preds = %0
+	br label %Loop
+Else:		; preds = %0
+	br label %Loop
+Loop:		; preds = %Loop, %Else, %Then
+	%j = phi i32 [ 0, %Then ], [ 12, %Else ], [ %Next, %Loop ]		; <i32> [#uses=1]
+	%i = bitcast i32 %i.s to i32		; <i32> [#uses=1]
+	%i2 = mul i32 %i, 17		; <i32> [#uses=1]
+	%Next = add i32 %j, %i2		; <i32> [#uses=2]
+	%cond = icmp eq i32 %Next, 0		; <i1> [#uses=1]
+	br i1 %cond, label %Out, label %Loop
+Out:		; preds = %Loop
+	ret void
+}
+
diff --git a/final/test/Transforms/LICM/scalar_promote.ll b/final/test/Transforms/LICM/scalar_promote.ll
new file mode 100644
index 00000000000..d8acdc1a3ad
--- /dev/null
+++ b/final/test/Transforms/LICM/scalar_promote.ll
@@ -0,0 +1,150 @@
+; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
+
+@X = global i32 7		; <i32*> [#uses=4]
+
+define void @test1(i32 %i) {
+Entry:
+	br label %Loop
+; CHECK: @test1
+; CHECK: Entry:
+; CHECK-NEXT:   load i32* @X
+; CHECK-NEXT:   br label %Loop
+
+
+Loop:		; preds = %Loop, %0
+	%j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]		; <i32> [#uses=1]
+	%x = load i32* @X		; <i32> [#uses=1]
+	%x2 = add i32 %x, 1		; <i32> [#uses=1]
+	store i32 %x2, i32* @X
+	%Next = add i32 %j, 1		; <i32> [#uses=2]
+	%cond = icmp eq i32 %Next, 0		; <i1> [#uses=1]
+	br i1 %cond, label %Out, label %Loop
+
+Out:	
+	ret void
+; CHECK: Out:
+; CHECK-NEXT:   store i32 %x2, i32* @X
+; CHECK-NEXT:   ret void
+
+}
+
+define void @test2(i32 %i) {
+Entry:
+	br label %Loop
+; CHECK: @test2
+; CHECK: Entry:
+; CHECK-NEXT:    %.promoted = load i32* getelementptr inbounds (i32* @X, i64 1)
+; CHECK-NEXT:    br label %Loop
+
+Loop:		; preds = %Loop, %0
+	%X1 = getelementptr i32* @X, i64 1		; <i32*> [#uses=1]
+	%A = load i32* %X1		; <i32> [#uses=1]
+	%V = add i32 %A, 1		; <i32> [#uses=1]
+	%X2 = getelementptr i32* @X, i64 1		; <i32*> [#uses=1]
+	store i32 %V, i32* %X2
+	br i1 false, label %Loop, label %Exit
+
+Exit:		; preds = %Loop
+	ret void
+; CHECK: Exit:
+; CHECK-NEXT:   store i32 %V, i32* getelementptr inbounds (i32* @X, i64 1)
+; CHECK-NEXT:   ret void
+}
+
+
+
+define void @test3(i32 %i) {
+; CHECK: @test3
+	br label %Loop
+Loop:
+        ; Should not promote this to a register
+	%x = volatile load i32* @X
+	%x2 = add i32 %x, 1	
+	store i32 %x2, i32* @X
+	br i1 true, label %Out, label %Loop
+        
+; CHECK: Loop:
+; CHECK-NEXT: volatile load
+
+Out:		; preds = %Loop
+	ret void
+}
+
+; PR8041
+define void @test4(i8* %x, i8 %n) {
+; CHECK: @test4
+  %handle1 = alloca i8*
+  %handle2 = alloca i8*
+  store i8* %x, i8** %handle1
+  br label %loop
+
+loop:
+  %tmp = getelementptr i8* %x, i64 8
+  store i8* %tmp, i8** %handle2
+  br label %subloop
+
+subloop:
+  %count = phi i8 [ 0, %loop ], [ %nextcount, %subloop ]
+  %offsetx2 = load i8** %handle2
+  store i8 %n, i8* %offsetx2
+  %newoffsetx2 = getelementptr i8* %offsetx2, i64 -1
+  store i8* %newoffsetx2, i8** %handle2
+  %nextcount = add i8 %count, 1
+  %innerexitcond = icmp sge i8 %nextcount, 8
+  br i1 %innerexitcond, label %innerexit, label %subloop
+
+; Should have promoted 'handle2' accesses.
+; CHECK: subloop:
+; CHECK-NEXT: phi i8* [
+; CHECK-NEXT: %count = phi i8 [
+; CHECK-NEXT: store i8 %n
+; CHECK-NOT: store
+; CHECK: br i1
+
+innerexit:
+  %offsetx1 = load i8** %handle1
+  %val = load i8* %offsetx1
+  %cond = icmp eq i8 %val, %n
+  br i1 %cond, label %exit, label %loop
+
+; Should not have promoted offsetx1 loads.
+; CHECK: innerexit:
+; CHECK: %val = load i8* %offsetx1
+; CHECK: %cond = icmp eq i8 %val, %n
+; CHECK: br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+define void @test5(i32 %i, i32** noalias %P2) {
+Entry:
+	br label %Loop
+; CHECK: @test5
+; CHECK: Entry:
+; CHECK-NEXT:   load i32* @X
+; CHECK-NEXT:   br label %Loop
+
+
+Loop:		; preds = %Loop, %0
+	%j = phi i32 [ 0, %Entry ], [ %Next, %Loop ]		; <i32> [#uses=1]
+	%x = load i32* @X		; <i32> [#uses=1]
+	%x2 = add i32 %x, 1		; <i32> [#uses=1]
+	store i32 %x2, i32* @X
+        
+        volatile store i32* @X, i32** %P2
+        
+	%Next = add i32 %j, 1		; <i32> [#uses=2]
+	%cond = icmp eq i32 %Next, 0		; <i1> [#uses=1]
+	br i1 %cond, label %Out, label %Loop
+
+Out:	
+	ret void
+; CHECK: Out:
+; CHECK-NEXT:   store i32 %x2, i32* @X
+; CHECK-NEXT:   ret void
+
+}
+
+
diff --git a/final/test/Transforms/LICM/sinking.ll b/final/test/Transforms/LICM/sinking.ll
new file mode 100644
index 00000000000..68e4b64bf9b
--- /dev/null
+++ b/final/test/Transforms/LICM/sinking.ll
@@ -0,0 +1,249 @@
+; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+
+declare i32 @strlen(i8*) readonly
+
+declare void @foo()
+
+; Sink readonly function.
+define i32 @test1(i8* %P) {
+	br label %Loop
+
+Loop:		; preds = %Loop, %0
+	%A = call i32 @strlen( i8* %P ) readonly
+	br i1 false, label %Loop, label %Out
+
+Out:		; preds = %Loop
+	ret i32 %A
+; CHECK: @test1
+; CHECK: Out:
+; CHECK-NEXT: call i32 @strlen
+; CHECK-NEXT: ret i32 %A
+}
+
+declare double @sin(double) readnone
+
+; Sink readnone function out of loop with unknown memory behavior.
+define double @test2(double %X) {
+	br label %Loop
+
+Loop:		; preds = %Loop, %0
+	call void @foo( )
+	%A = call double @sin( double %X ) readnone
+	br i1 true, label %Loop, label %Out
+
+Out:		; preds = %Loop
+	ret double %A
+; CHECK: @test2
+; CHECK: Out:
+; CHECK-NEXT: call double @sin
+; CHECK-NEXT: ret double %A
+}
+
+; This testcase checks to make sure the sinker does not cause problems with
+; critical edges.
+define void @test3() {
+Entry:
+	br i1 false, label %Loop, label %Exit
+Loop:
+	%X = add i32 0, 1
+	br i1 false, label %Loop, label %Exit
+Exit:
+	%Y = phi i32 [ 0, %Entry ], [ %X, %Loop ]
+	ret void
+        
+; CHECK: @test3
+; CHECK:     Exit.loopexit:
+; CHECK-NEXT:  %X = add i32 0, 1
+; CHECK-NEXT:  br label %Exit
+
+}
+
+; If the result of an instruction is only used outside of the loop, sink
+; the instruction to the exit blocks instead of executing it on every
+; iteration of the loop.
+;
+define i32 @test4(i32 %N) {
+Entry:
+	br label %Loop
+Loop:		; preds = %Loop, %Entry
+	%N_addr.0.pn = phi i32 [ %dec, %Loop ], [ %N, %Entry ]	
+	%tmp.6 = mul i32 %N, %N_addr.0.pn		; <i32> [#uses=1]
+	%tmp.7 = sub i32 %tmp.6, %N		; <i32> [#uses=1]
+	%dec = add i32 %N_addr.0.pn, -1		; <i32> [#uses=1]
+	%tmp.1 = icmp ne i32 %N_addr.0.pn, 1		; <i1> [#uses=1]
+	br i1 %tmp.1, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret i32 %tmp.7
+; CHECK: @test4
+; CHECK:     Out:
+; CHECK-NEXT:  mul i32 %N, %N_addr.0.pn
+; CHECK-NEXT:  sub i32 %tmp.6, %N
+; CHECK-NEXT:  ret i32
+}
+
+; To reduce register pressure, if a load is hoistable out of the loop, and the
+; result of the load is only used outside of the loop, sink the load instead of
+; hoisting it!
+;
+@X = global i32 5		; <i32*> [#uses=1]
+
+define i32 @test5(i32 %N) {
+Entry:
+	br label %Loop
+Loop:		; preds = %Loop, %Entry
+	%N_addr.0.pn = phi i32 [ %dec, %Loop ], [ %N, %Entry ]	
+	%tmp.6 = load i32* @X		; <i32> [#uses=1]
+	%dec = add i32 %N_addr.0.pn, -1		; <i32> [#uses=1]
+	%tmp.1 = icmp ne i32 %N_addr.0.pn, 1		; <i1> [#uses=1]
+	br i1 %tmp.1, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret i32 %tmp.6
+; CHECK: @test5
+; CHECK:     Out:
+; CHECK-NEXT:  %tmp.6 = load i32* @X
+; CHECK-NEXT:  ret i32 %tmp.6
+}
+
+
+
+; The loop sinker was running from the bottom of the loop to the top, causing
+; it to miss opportunities to sink instructions that depended on sinking other
+; instructions from the loop.  Instead they got hoisted, which is better than
+; leaving them in the loop, but increases register pressure pointlessly.
+
+	%Ty = type { i32, i32 }
+@X2 = external global %Ty
+
+define i32 @test6() {
+	br label %Loop
+Loop:
+	%dead = getelementptr %Ty* @X2, i64 0, i32 0
+	%sunk2 = load i32* %dead
+	br i1 false, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret i32 %sunk2
+; CHECK: @test6
+; CHECK:     Out:
+; CHECK-NEXT:  %dead = getelementptr %Ty* @X2, i64 0, i32 0
+; CHECK-NEXT:  %sunk2 = load i32* %dead
+; CHECK-NEXT:  ret i32 %sunk2
+}
+
+
+
+; This testcase ensures that we can sink instructions from loops with
+; multiple exits.
+;
+define i32 @test7(i32 %N, i1 %C) {
+Entry:
+	br label %Loop
+Loop:		; preds = %ContLoop, %Entry
+	%N_addr.0.pn = phi i32 [ %dec, %ContLoop ], [ %N, %Entry ]
+	%tmp.6 = mul i32 %N, %N_addr.0.pn
+	%tmp.7 = sub i32 %tmp.6, %N		; <i32> [#uses=2]
+	%dec = add i32 %N_addr.0.pn, -1		; <i32> [#uses=1]
+	br i1 %C, label %ContLoop, label %Out1
+ContLoop:
+	%tmp.1 = icmp ne i32 %N_addr.0.pn, 1
+	br i1 %tmp.1, label %Loop, label %Out2
+Out1:		; preds = %Loop
+	ret i32 %tmp.7
+Out2:		; preds = %ContLoop
+	ret i32 %tmp.7
+; CHECK: @test7
+; CHECK:     Out1:
+; CHECK-NEXT:  mul i32 %N, %N_addr.0.pn
+; CHECK-NEXT:  sub i32 %tmp.6, %N
+; CHECK-NEXT:  ret
+; CHECK:     Out2:
+; CHECK-NEXT:  mul i32 %N, %N_addr.0.pn
+; CHECK-NEXT:  sub i32 %tmp.6
+; CHECK-NEXT:  ret
+}
+
+
+; This testcase checks to make sure we can sink values which are only live on
+; some exits out of the loop, and that we can do so without breaking dominator
+; info.
+define i32 @test8(i1 %C1, i1 %C2, i32* %P, i32* %Q) {
+Entry:
+	br label %Loop
+Loop:		; preds = %Cont, %Entry
+	br i1 %C1, label %Cont, label %exit1
+Cont:		; preds = %Loop
+	%X = load i32* %P		; <i32> [#uses=2]
+	store i32 %X, i32* %Q
+	%V = add i32 %X, 1		; <i32> [#uses=1]
+	br i1 %C2, label %Loop, label %exit2
+exit1:		; preds = %Loop
+	ret i32 0
+exit2:		; preds = %Cont
+	ret i32 %V
+; CHECK: @test8
+; CHECK:     exit1:
+; CHECK-NEXT:  ret i32 0
+; CHECK:     exit2:
+; CHECK-NEXT:  %V = add i32 %X, 1
+; CHECK-NEXT:  ret i32 %V
+}
+
+
+define void @test9() {
+loopentry.2.i:
+	br i1 false, label %no_exit.1.i.preheader, label %loopentry.3.i.preheader
+no_exit.1.i.preheader:		; preds = %loopentry.2.i
+	br label %no_exit.1.i
+no_exit.1.i:		; preds = %endif.8.i, %no_exit.1.i.preheader
+	br i1 false, label %return.i, label %endif.8.i
+endif.8.i:		; preds = %no_exit.1.i
+	%inc.1.i = add i32 0, 1		; <i32> [#uses=1]
+	br i1 false, label %no_exit.1.i, label %loopentry.3.i.preheader.loopexit
+loopentry.3.i.preheader.loopexit:		; preds = %endif.8.i
+	br label %loopentry.3.i.preheader
+loopentry.3.i.preheader:		; preds = %loopentry.3.i.preheader.loopexit, %loopentry.2.i
+	%arg_num.0.i.ph13000 = phi i32 [ 0, %loopentry.2.i ], [ %inc.1.i, %loopentry.3.i.preheader.loopexit ]		; <i32> [#uses=0]
+	ret void
+return.i:		; preds = %no_exit.1.i
+	ret void
+
+; CHECK: @test9
+; CHECK: loopentry.3.i.preheader.loopexit:
+; CHECK-NEXT:  %inc.1.i = add i32 0, 1
+; CHECK-NEXT:  br label %loopentry.3.i.preheader
+}
+
+
+; Potentially trapping instructions may be sunk as long as they are guaranteed
+; to be executed.
+define i32 @test10(i32 %N) {
+Entry:
+	br label %Loop
+Loop:		; preds = %Loop, %Entry
+	%N_addr.0.pn = phi i32 [ %dec, %Loop ], [ %N, %Entry ]		; <i32> [#uses=3]
+	%tmp.6 = sdiv i32 %N, %N_addr.0.pn		; <i32> [#uses=1]
+	%dec = add i32 %N_addr.0.pn, -1		; <i32> [#uses=1]
+	%tmp.1 = icmp ne i32 %N_addr.0.pn, 0		; <i1> [#uses=1]
+	br i1 %tmp.1, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret i32 %tmp.6
+        
+; CHECK: @test10
+; CHECK: Out: 
+; CHECK-NEXT:  %tmp.6 = sdiv i32 %N, %N_addr.0.pn
+; CHECK-NEXT:  ret i32 %tmp.6
+}
+
+; Should delete, not sink, dead instructions.
+define void @test11() {
+	br label %Loop
+Loop:
+	%dead = getelementptr %Ty* @X2, i64 0, i32 0
+	br i1 false, label %Loop, label %Out
+Out:
+	ret void
+; CHECK: @test11
+; CHECK:     Out:
+; CHECK-NEXT:  ret void
+}
+
+
diff --git a/final/test/Transforms/LoopDeletion/2007-07-23-InfiniteLoop.ll b/final/test/Transforms/LoopDeletion/2007-07-23-InfiniteLoop.ll
new file mode 100644
index 00000000000..bcc73fdfab8
--- /dev/null
+++ b/final/test/Transforms/LoopDeletion/2007-07-23-InfiniteLoop.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -loop-deletion -S | grep switch
+; PR 1564
+  
+define fastcc void @out() {
+    start:
+            br label %loop
+    unreachable:
+            unreachable
+    loop:
+            switch i32 0, label %unreachable [
+                     i32 0, label %loop
+            ]
+}
diff --git a/final/test/Transforms/LoopDeletion/2008-05-06-Phi.ll b/final/test/Transforms/LoopDeletion/2008-05-06-Phi.ll
new file mode 100644
index 00000000000..4fc6378ee25
--- /dev/null
+++ b/final/test/Transforms/LoopDeletion/2008-05-06-Phi.ll
@@ -0,0 +1,109 @@
+; RUN: opt < %s -inline -tailduplicate -instcombine -jump-threading -licm -loop-unswitch -instcombine -indvars -loop-deletion -gvn -simplifycfg -verify -disable-output
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9"
+	%struct.BF_BitstreamElement = type { i32, i16 }
+	%struct.BF_BitstreamPart = type { i32, %struct.BF_BitstreamElement* }
+	%struct.BF_PartHolder = type { i32, %struct.BF_BitstreamPart* }
+	%struct.Bit_stream_struc = type { i8*, i32, %struct.FILE*, i8*, i32, i32, i32, i32 }
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.III_scalefac_t = type { [22 x i32], [13 x [3 x i32]] }
+	%struct.III_side_info_t = type { i32, i32, i32, [2 x [4 x i32]], [2 x %struct.anon] }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.anon = type { [2 x %struct.gr_info_ss] }
+	%struct.gr_info = type { i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, [4 x i32] }
+	%struct.gr_info_ss = type { %struct.gr_info }
+	%struct.lame_global_flags = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
+@scaleFactorsPH = external global [2 x [2 x %struct.BF_PartHolder*]]		; <[2 x [2 x %struct.BF_PartHolder*]]*> [#uses=1]
+@slen1_tab = external constant [16 x i32]		; <[16 x i32]*> [#uses=1]
+
+declare %struct.BF_PartHolder* @BF_addElement(%struct.BF_PartHolder*, %struct.BF_BitstreamElement*) nounwind 
+
+define %struct.BF_PartHolder* @BF_addEntry(%struct.BF_PartHolder* %thePH, i32 %value, i32 %length) nounwind  {
+entry:
+	%myElement = alloca %struct.BF_BitstreamElement		; <%struct.BF_BitstreamElement*> [#uses=2]
+	%tmp1 = getelementptr %struct.BF_BitstreamElement* %myElement, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 %value, i32* %tmp1, align 8
+	%tmp7 = icmp eq i32 %length, 0		; <i1> [#uses=1]
+	br i1 %tmp7, label %bb13, label %bb
+
+bb:		; preds = %entry
+	%tmp10 = call %struct.BF_PartHolder* @BF_addElement( %struct.BF_PartHolder* %thePH, %struct.BF_BitstreamElement* %myElement ) nounwind 		; <%struct.BF_PartHolder*> [#uses=1]
+	ret %struct.BF_PartHolder* %tmp10
+
+bb13:		; preds = %entry
+	ret %struct.BF_PartHolder* %thePH
+}
+
+define void @III_format_bitstream(%struct.lame_global_flags* %gfp, i32 %bitsPerFrame, [2 x [576 x i32]]* %l3_enc, %struct.III_side_info_t* %l3_side, [2 x %struct.III_scalefac_t]* %scalefac, %struct.Bit_stream_struc* %in_bs) nounwind  {
+entry:
+	call fastcc void @encodeMainData( %struct.lame_global_flags* %gfp, [2 x [576 x i32]]* %l3_enc, %struct.III_side_info_t* %l3_side, [2 x %struct.III_scalefac_t]* %scalefac ) nounwind 
+	unreachable
+}
+
+define internal fastcc void @encodeMainData(%struct.lame_global_flags* %gfp, [2 x [576 x i32]]* %l3_enc, %struct.III_side_info_t* %si, [2 x %struct.III_scalefac_t]* %scalefac) nounwind  {
+entry:
+	%tmp69 = getelementptr %struct.lame_global_flags* %gfp, i32 0, i32 43		; <i32*> [#uses=1]
+	%tmp70 = load i32* %tmp69, align 4		; <i32> [#uses=1]
+	%tmp71 = icmp eq i32 %tmp70, 1		; <i1> [#uses=1]
+	br i1 %tmp71, label %bb352, label %bb498
+
+bb113:		; preds = %bb132
+	%tmp123 = getelementptr [2 x %struct.III_scalefac_t]* %scalefac, i32 0, i32 0, i32 1, i32 %sfb.0, i32 %window.0		; <i32*> [#uses=1]
+	%tmp124 = load i32* %tmp123, align 4		; <i32> [#uses=1]
+	%tmp126 = load %struct.BF_PartHolder** %tmp80, align 4		; <%struct.BF_PartHolder*> [#uses=1]
+	%tmp128 = call %struct.BF_PartHolder* @BF_addEntry( %struct.BF_PartHolder* %tmp126, i32 %tmp124, i32 %tmp93 ) nounwind 		; <%struct.BF_PartHolder*> [#uses=1]
+	store %struct.BF_PartHolder* %tmp128, %struct.BF_PartHolder** %tmp80, align 4
+	%tmp131 = add i32 %window.0, 1		; <i32> [#uses=1]
+	br label %bb132
+
+bb132:		; preds = %bb140, %bb113
+	%window.0 = phi i32 [ %tmp131, %bb113 ], [ 0, %bb140 ]		; <i32> [#uses=3]
+	%tmp134 = icmp slt i32 %window.0, 3		; <i1> [#uses=1]
+	br i1 %tmp134, label %bb113, label %bb137
+
+bb137:		; preds = %bb132
+	%tmp139 = add i32 %sfb.0, 1		; <i32> [#uses=1]
+	br label %bb140
+
+bb140:		; preds = %bb341, %bb137
+	%sfb.0 = phi i32 [ %tmp139, %bb137 ], [ 0, %bb341 ]		; <i32> [#uses=3]
+	%tmp142 = icmp slt i32 %sfb.0, 6		; <i1> [#uses=1]
+	br i1 %tmp142, label %bb132, label %bb174
+
+bb166:		; preds = %bb174
+	%tmp160 = load %struct.BF_PartHolder** %tmp80, align 4		; <%struct.BF_PartHolder*> [#uses=1]
+	%tmp162 = call %struct.BF_PartHolder* @BF_addEntry( %struct.BF_PartHolder* %tmp160, i32 0, i32 0 ) nounwind 		; <%struct.BF_PartHolder*> [#uses=0]
+	unreachable
+
+bb174:		; preds = %bb140
+	%tmp176 = icmp slt i32 6, 12		; <i1> [#uses=1]
+	br i1 %tmp176, label %bb166, label %bb341
+
+bb341:		; preds = %bb352, %bb174
+	%tmp80 = getelementptr [2 x [2 x %struct.BF_PartHolder*]]* @scaleFactorsPH, i32 0, i32 0, i32 0		; <%struct.BF_PartHolder**> [#uses=3]
+	%tmp92 = getelementptr [16 x i32]* @slen1_tab, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp93 = load i32* %tmp92, align 4		; <i32> [#uses=1]
+	br label %bb140
+
+bb352:		; preds = %entry
+	%tmp354 = icmp slt i32 0, 2		; <i1> [#uses=1]
+	br i1 %tmp354, label %bb341, label %return
+
+bb498:		; preds = %entry
+	ret void
+
+return:		; preds = %bb352
+	ret void
+}
+
+define void @getframebits(%struct.lame_global_flags* %gfp, i32* %bitsPerFrame, i32* %mean_bits) nounwind  {
+entry:
+	unreachable
+}
+
+define i32 @lame_encode_buffer(%struct.lame_global_flags* %gfp, i16* %buffer_l, i16* %buffer_r, i32 %nsamples, i8* %mp3buf, i32 %mp3buf_size) nounwind  {
+entry:
+	unreachable
+}
diff --git a/final/test/Transforms/LoopDeletion/dcetest.ll b/final/test/Transforms/LoopDeletion/dcetest.ll
new file mode 100644
index 00000000000..f1e793de03d
--- /dev/null
+++ b/final/test/Transforms/LoopDeletion/dcetest.ll
@@ -0,0 +1,36 @@
+; This is the test case taken from Appel's book that illustrates a hard case
+; that SCCP gets right, and when followed by ADCE, is completely eliminated
+;
+; RUN: opt < %s -sccp -simplifycfg -indvars -loop-deletion -dce -simplifycfg -S | not grep br
+
+define i32 @"test function"(i32 %i0, i32 %j0) {
+BB1:
+        br label %BB2
+
+BB2:            ; preds = %BB7, %BB1
+        %j2 = phi i32 [ %j4, %BB7 ], [ 1, %BB1 ]                ; <i32> [#uses=2]
+        %k2 = phi i32 [ %k4, %BB7 ], [ 0, %BB1 ]                ; <i32> [#uses=4]
+        %kcond = icmp slt i32 %k2, 100          ; <i1> [#uses=1]
+        br i1 %kcond, label %BB3, label %BB4
+
+BB3:            ; preds = %BB2
+        %jcond = icmp slt i32 %j2, 20           ; <i1> [#uses=1]
+        br i1 %jcond, label %BB5, label %BB6
+
+BB4:            ; preds = %BB2
+        ret i32 %j2
+
+BB5:            ; preds = %BB3
+        %k3 = add i32 %k2, 1            ; <i32> [#uses=1]
+        br label %BB7
+
+BB6:            ; preds = %BB3
+        %k5 = add i32 %k2, 1            ; <i32> [#uses=1]
+        br label %BB7
+
+BB7:            ; preds = %BB6, %BB5
+        %j4 = phi i32 [ 1, %BB5 ], [ %k2, %BB6 ]                ; <i32> [#uses=1]
+        %k4 = phi i32 [ %k3, %BB5 ], [ %k5, %BB6 ]              ; <i32> [#uses=1]
+        br label %BB2
+}
+
diff --git a/final/test/Transforms/LoopDeletion/dg.exp b/final/test/Transforms/LoopDeletion/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/LoopDeletion/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/LoopDeletion/multiple-exit-conditions.ll b/final/test/Transforms/LoopDeletion/multiple-exit-conditions.ll
new file mode 100644
index 00000000000..87f8f461050
--- /dev/null
+++ b/final/test/Transforms/LoopDeletion/multiple-exit-conditions.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-deletion -S | FileCheck %s
+
+; ScalarEvolution can prove the loop iteration is finite, even though
+; it can't represent the exact trip count as an expression. That's
+; good enough to let the loop be deleted.
+
+; CHECK:      entry:
+; CHECK-NEXT:   br label %return
+
+; CHECK:      return:
+; CHECK-NEXT:   ret void
+
+define void @foo(i64 %n, i64 %m) nounwind {
+entry:
+  br label %bb
+
+bb:
+  %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb ]
+  %t0 = add i64 %x.0, 1
+  %t1 = icmp slt i64 %x.0, %n
+  %t3 = icmp sgt i64 %x.0, %m
+  %t4 = and i1 %t1, %t3
+  br i1 %t4, label %bb, label %return
+
+return:
+  ret void
+}
diff --git a/final/test/Transforms/LoopDeletion/multiple-exits.ll b/final/test/Transforms/LoopDeletion/multiple-exits.ll
new file mode 100644
index 00000000000..6af413b49cd
--- /dev/null
+++ b/final/test/Transforms/LoopDeletion/multiple-exits.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -loop-deletion -S | FileCheck %s
+
+; Checks whether dead loops with multiple exits can be eliminated
+
+; CHECK:      entry:
+; CHECK-NEXT:   br label %return
+
+; CHECK:      return:
+; CHECK-NEXT:   ret void
+
+define void @foo(i64 %n, i64 %m) nounwind {
+entry:
+  br label %bb
+
+bb:
+  %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb2 ]
+  %t0 = add i64 %x.0, 1
+  %t1 = icmp slt i64 %x.0, %n
+  br i1 %t1, label %bb2, label %return
+bb2:
+  %t2 = icmp slt i64 %x.0, %m
+  br i1 %t1, label %bb, label %return
+
+return:
+  ret void
+}
diff --git a/final/test/Transforms/LoopDeletion/simplify-then-delete.ll b/final/test/Transforms/LoopDeletion/simplify-then-delete.ll
new file mode 100644
index 00000000000..5a21672a596
--- /dev/null
+++ b/final/test/Transforms/LoopDeletion/simplify-then-delete.ll
@@ -0,0 +1,65 @@
+; RUN: opt < %s -S -indvars -loop-deletion -simplifycfg | FileCheck %s
+; PR5794
+
+; Indvars and loop deletion should be able to eliminate all looping
+; in this testcase.
+
+; CHECK:      define i32 @pmat(i32 %m, i32 %n, double* %y) nounwind {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   ret i32 0
+; CHECK-NEXT: }
+
+target datalayout = "e-p:64:64:64"
+
+define i32 @pmat(i32 %m, i32 %n, double* %y) nounwind {
+entry:
+  %cmp4 = icmp sgt i32 %m, 0
+  br i1 %cmp4, label %bb.n10, label %w.e12
+
+w.c:
+  %cmp = icmp slt i32 %inc11, %m
+  br i1 %cmp, label %w.c2.p, label %w.c.w.e12c
+
+w.c.w.e12c:
+  br label %w.c.w.e12c.s
+
+w.c.w.e12c.s:
+  br label %w.e12
+
+bb.n10:
+  %cmp51 = icmp sgt i32 %n, 0
+  br i1 %cmp51, label %bb.n10.w.c.w.e12c.sc, label %bb.n10.bb.n10.sc
+
+bb.n10.bb.n10.sc:
+  br label %bb.n10.s
+
+bb.n10.w.c.w.e12c.sc:
+  br label %w.c.w.e12c.s
+
+bb.n10.s:
+  br label %w.c2.p
+
+w.c2.p:
+  %i.05 = phi i32 [ 0, %bb.n10.s ], [ %inc11, %w.c ]
+  br i1 false, label %bb.n, label %w.e
+
+w.c2:
+  br i1 undef, label %w.b6, label %w.c2.w.ec
+
+w.c2.w.ec:
+  br label %w.e
+
+bb.n:
+  br label %w.b6
+
+w.b6:
+  br label %w.c2
+
+w.e:
+  %i.08 = phi i32 [ undef, %w.c2.w.ec ], [ %i.05, %w.c2.p ]
+  %inc11 = add nsw i32 %i.08, 1
+  br label %w.c
+
+w.e12:
+  ret i32 0
+}
diff --git a/final/test/Transforms/LoopIdiom/basic.ll b/final/test/Transforms/LoopIdiom/basic.ll
new file mode 100644
index 00000000000..485114c8d48
--- /dev/null
+++ b/final/test/Transforms/LoopIdiom/basic.ll
@@ -0,0 +1,349 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define void @test1(i8* %Base, i64 %Size) nounwind ssp {
+bb.nph:                                           ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8* %Base, i64 %indvar
+  store i8 0, i8* %I.0.014, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test1
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
+; CHECK-NOT: store
+}
+
+; This is a loop that was rotated but where the blocks weren't merged.  This
+; shouldn't perturb us.
+define void @test1a(i8* %Base, i64 %Size) nounwind ssp {
+bb.nph:                                           ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
+  %I.0.014 = getelementptr i8* %Base, i64 %indvar
+  store i8 0, i8* %I.0.014, align 1
+  %indvar.next = add i64 %indvar, 1
+  br label %for.body.cont
+for.body.cont:
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test1a
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
+; CHECK-NOT: store
+}
+
+
+define void @test2(i32* %Base, i64 %Size) nounwind ssp {
+entry:
+  %cmp10 = icmp eq i64 %Size, 0
+  br i1 %cmp10, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %add.ptr.i = getelementptr i32* %Base, i64 %i.011
+  store i32 16843009, i32* %add.ptr.i, align 4
+  %inc = add nsw i64 %i.011, 1
+  %exitcond = icmp eq i64 %inc, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test2
+; CHECK: br i1 %cmp10,
+; CHECK: %tmp = mul i64 %Size, 4
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base1, i8 1, i64 %tmp, i32 4, i1 false)
+; CHECK-NOT: store
+}
+
+; This is a case where there is an extra may-aliased store in the loop, we can't
+; promote the memset.
+define void @test3(i32* %Base, i64 %Size, i8 *%MayAlias) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.011 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %add.ptr.i = getelementptr i32* %Base, i64 %i.011
+  store i32 16843009, i32* %add.ptr.i, align 4
+  
+  store i8 42, i8* %MayAlias
+  %inc = add nsw i64 %i.011, 1
+  %exitcond = icmp eq i64 %inc, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %entry
+  ret void
+; CHECK: @test3
+; CHECK-NOT: memset
+; CHECK: ret void
+}
+
+
+;; TODO: We should be able to promote this memset.  Not yet though.
+define void @test4(i8* %Base) nounwind ssp {
+bb.nph:                                           ; preds = %entry
+  %Base100 = getelementptr i8* %Base, i64 1000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8* %Base, i64 %indvar
+  store i8 0, i8* %I.0.014, align 1
+  
+  ;; Store beyond the range memset, should be safe to promote.
+  store i8 42, i8* %Base100
+  
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 100
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK-TODO: @test4
+; CHECK-TODO: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 100, i32 1, i1 false)
+; CHECK-TODO-NOT: store
+}
+
+; This can't be promoted: the memset is a store of a loop variant value.
+define void @test5(i8* %Base, i64 %Size) nounwind ssp {
+bb.nph:                                           ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8* %Base, i64 %indvar
+  
+  %V = trunc i64 %indvar to i8
+  store i8 %V, i8* %I.0.014, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test5
+; CHECK-NOT: memset
+; CHECK: ret void
+}
+
+
+;; memcpy formation
+define void @test6(i64 %Size) nounwind ssp {
+bb.nph:
+  %Base = alloca i8, i32 10000
+  %Dest = alloca i8, i32 10000
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8* %Base, i64 %indvar
+  %DestI = getelementptr i8* %Dest, i64 %indvar
+  %V = load i8* %I.0.014, align 1
+  store i8 %V, i8* %DestI, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test6
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Dest, i8* %Base, i64 %Size, i32 1, i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+
+; This is a loop that was rotated but where the blocks weren't merged.  This
+; shouldn't perturb us.
+define void @test7(i8* %Base, i64 %Size) nounwind ssp {
+bb.nph:                                           ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body.cont ]
+  br label %for.body.cont
+for.body.cont:
+  %I.0.014 = getelementptr i8* %Base, i64 %indvar
+  store i8 0, i8* %I.0.014, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test7
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %Base, i8 0, i64 %Size, i32 1, i1 false)
+; CHECK-NOT: store
+}
+
+; This is a loop should not be transformed, it only executes one iteration.
+define void @test8(i64* %Ptr, i64 %Size) nounwind ssp {
+bb.nph:                                           ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %PI = getelementptr i64* %Ptr, i64 %indvar
+  store i64 0, i64 *%PI
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 1
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test8
+; CHECK: store i64 0, i64* %PI
+}
+
+declare i8* @external(i8*)
+
+;; This cannot be transformed into a memcpy, because the read-from location is
+;; mutated by the loop.
+define void @test9(i64 %Size) nounwind ssp {
+bb.nph:
+  %Base = alloca i8, i32 10000
+  %Dest = alloca i8, i32 10000
+  
+  %BaseAlias = call i8* @external(i8* %Base)
+  br label %for.body
+
+for.body:                                         ; preds = %bb.nph, %for.body
+  %indvar = phi i64 [ 0, %bb.nph ], [ %indvar.next, %for.body ]
+  %I.0.014 = getelementptr i8* %Base, i64 %indvar
+  %DestI = getelementptr i8* %Dest, i64 %indvar
+  %V = load i8* %I.0.014, align 1
+  store i8 %V, i8* %DestI, align 1
+
+  ;; This store can clobber the input.
+  store i8 4, i8* %BaseAlias
+ 
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %Size
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+; CHECK: @test9
+; CHECK-NOT: llvm.memcpy
+; CHECK: ret void
+}
+
+; Two dimensional nested loop should be promoted to one big memset.
+define void @test10(i8* %X) nounwind ssp {
+entry:
+  br label %bb.nph
+
+bb.nph:                                           ; preds = %entry, %for.inc10
+  %i.04 = phi i32 [ 0, %entry ], [ %inc12, %for.inc10 ]
+  br label %for.body5
+
+for.body5:                                        ; preds = %for.body5, %bb.nph
+  %j.02 = phi i32 [ 0, %bb.nph ], [ %inc, %for.body5 ]
+  %mul = mul nsw i32 %i.04, 100
+  %add = add nsw i32 %j.02, %mul
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds i8* %X, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %inc = add nsw i32 %j.02, 1
+  %cmp4 = icmp eq i32 %inc, 100
+  br i1 %cmp4, label %for.inc10, label %for.body5
+
+for.inc10:                                        ; preds = %for.body5
+  %inc12 = add nsw i32 %i.04, 1
+  %cmp = icmp eq i32 %inc12, 100
+  br i1 %cmp, label %for.end13, label %bb.nph
+
+for.end13:                                        ; preds = %for.inc10
+  ret void
+; CHECK: @test10
+; CHECK: entry:
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+; On darwin10 (which is the triple in this .ll file) this loop can be turned
+; into a memset_pattern call.
+; rdar://9009151
+define void @test11_pattern(i32* nocapture %P) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
+  %arrayidx = getelementptr i32* %P, i64 %indvar
+  store i32 1, i32* %arrayidx, align 4
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+; CHECK: @test11_pattern
+; CHECK-NEXT: entry:
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: memset_pattern
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+; Store of null should turn into memset of zero.
+define void @test12(i32** nocapture %P) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
+  %arrayidx = getelementptr i32** %P, i64 %indvar
+  store i32* null, i32** %arrayidx, align 4
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+; CHECK: @test12
+; CHECK-NEXT: entry:
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %P1, i8 0, i64 80000, i32 4, i1 false)
+; CHECK-NOT: store
+; CHECK: ret void
+}
+
+@G = global i32 5
+
+; This store-of-address loop can be turned into a memset_pattern call.
+; rdar://9009151
+define void @test13_pattern(i32** nocapture %P) nounwind ssp {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
+  %arrayidx = getelementptr i32** %P, i64 %indvar
+  store i32* @G, i32** %arrayidx, align 4
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, 10000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body
+  ret void
+; CHECK: @test13_pattern
+; CHECK-NEXT: entry:
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: memset_pattern
+; CHECK-NOT: store
+; CHECK: ret void
+}
diff --git a/final/test/Transforms/LoopIdiom/debug-line.ll b/final/test/Transforms/LoopIdiom/debug-line.ll
new file mode 100644
index 00000000000..d31662d57e9
--- /dev/null
+++ b/final/test/Transforms/LoopIdiom/debug-line.ll
@@ -0,0 +1,49 @@
+; RUN: opt -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+
+define void @foo(double* nocapture %a) nounwind ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{double* %a}, i64 0, metadata !5), !dbg !8
+  tail call void @llvm.dbg.value(metadata !9, i64 0, metadata !10), !dbg !14
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %for.body ]
+  %arrayidx = getelementptr double* %a, i64 %indvar
+; CHECK: call void @llvm.memset{{.+}} !dbg 
+  store double 0.000000e+00, double* %arrayidx, align 8, !dbg !15
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp ne i64 %indvar.next, 1000
+  br i1 %exitcond, label %for.body, label %for.end, !dbg !14
+
+for.end:                                          ; preds = %for.body
+  tail call void @llvm.dbg.value(metadata !{null}, i64 0, metadata !10), !dbg !16
+  ret void, !dbg !17
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (double*)* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"li.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"li.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 127165:127174)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777218, metadata !6, i32 0} ; [ DW_TAG_arg_variable ]
+!6 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ]
+!7 = metadata !{i32 589860, metadata !2, metadata !"double", null, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 2, i32 18, metadata !0, null}
+!9 = metadata !{i32 0}
+!10 = metadata !{i32 590080, metadata !11, metadata !"i", metadata !1, i32 3, metadata !13, i32 0} ; [ DW_TAG_auto_variable ]
+!11 = metadata !{i32 589835, metadata !12, i32 3, i32 3, metadata !1, i32 1} ; [ DW_TAG_lexical_block ]
+!12 = metadata !{i32 589835, metadata !0, i32 2, i32 21, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
+!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 3, i32 3, metadata !12, null}
+!15 = metadata !{i32 4, i32 5, metadata !11, null}
+!16 = metadata !{i32 3, i32 29, metadata !11, null}
+!17 = metadata !{i32 5, i32 1, metadata !12, null}
diff --git a/final/test/Transforms/LoopIdiom/dg.exp b/final/test/Transforms/LoopIdiom/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/LoopIdiom/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/LoopRotate/2009-01-25-SingleEntryPhi.ll b/final/test/Transforms/LoopRotate/2009-01-25-SingleEntryPhi.ll
new file mode 100644
index 00000000000..7036d2d9c3a
--- /dev/null
+++ b/final/test/Transforms/LoopRotate/2009-01-25-SingleEntryPhi.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output
+; PR3408
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+	%struct.Cls = type { i32, i8, [2 x %struct.Cls*], [2 x %struct.Lit*] }
+	%struct.Lit = type { i8 }
+
+define void @picosat_main_bb13.i.i71.outer_bb132.i.i.i.outer(%struct.Cls**, %struct.Cls**, i32 %collect.i.i.i.1.lcssa, i32 %lcollect.i.i.i.2.lcssa, %struct.Cls*** %rhead.tmp.0236.out, i32* %collect.i.i.i.2.out, i32* %lcollect.i.i.i.3.ph.ph.ph.out) nounwind {
+newFuncRoot:
+	br label %codeRepl
+
+bb133.i.i.i.exitStub:		; preds = %codeRepl
+	ret void
+
+bb130.i.i.i:		; preds = %codeRepl
+	%rhead.tmp.0236.lcssa82 = phi %struct.Cls** [ null, %codeRepl ]		; <%struct.Cls**> [#uses=0]
+	br label %codeRepl
+
+codeRepl:		; preds = %bb130.i.i.i, %newFuncRoot
+	br i1 false, label %bb130.i.i.i, label %bb133.i.i.i.exitStub
+}
diff --git a/final/test/Transforms/LoopRotate/PhiRename-1.ll b/final/test/Transforms/LoopRotate/PhiRename-1.ll
new file mode 100644
index 00000000000..74426a8ee2b
--- /dev/null
+++ b/final/test/Transforms/LoopRotate/PhiRename-1.ll
@@ -0,0 +1,95 @@
+; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -S | not grep {\\\[ .tmp224} 
+; END.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+
+	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
+	%struct.Index_Map = type { i32, %struct.item_set** }
+	%struct.Item = type { [4 x i16], %struct.rule* }
+	%struct.__sFILEX = type opaque
+	%struct.__sbuf = type { i8*, i32 }
+	%struct.dimension = type { i16*, %struct.Index_Map, %struct.mapping*, i32, %struct.plankMap* }
+	%struct.item_set = type { i32, i32, %struct.operator*, [2 x %struct.item_set*], %struct.item_set*, i16*, %struct.Item*, %struct.Item* }
+	%struct.list = type { i8*, %struct.list* }
+	%struct.mapping = type { %struct.list**, i32, i32, i32, %struct.item_set** }
+	%struct.nonterminal = type { i8*, i32, i32, i32, %struct.plankMap*, %struct.rule* }
+	%struct.operator = type { i8*, i8, i32, i32, i32, i32, %struct.table* }
+	%struct.pattern = type { %struct.nonterminal*, %struct.operator*, [2 x %struct.nonterminal*] }
+	%struct.plank = type { i8*, %struct.list*, i32 }
+	%struct.plankMap = type { %struct.list*, i32, %struct.stateMap* }
+	%struct.rule = type { [4 x i16], i32, i32, i32, %struct.nonterminal*, %struct.pattern*, i8 }
+	%struct.stateMap = type { i8*, %struct.plank*, i32, i16* }
+	%struct.table = type { %struct.operator*, %struct.list*, i16*, [2 x %struct.dimension*], %struct.item_set** }
+@outfile = external global %struct.FILE*		; <%struct.FILE**> [#uses=1]
+@str1 = external constant [11 x i8]		; <[11 x i8]*> [#uses=1]
+@operators = weak global %struct.list* null		; <%struct.list**> [#uses=1]
+
+
+
+define i32 @opsOfArity(i32 %arity) {
+entry:
+	%arity_addr = alloca i32		; <i32*> [#uses=2]
+	%retval = alloca i32, align 4		; <i32*> [#uses=2]
+	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
+	%c = alloca i32, align 4		; <i32*> [#uses=4]
+	%l = alloca %struct.list*, align 4		; <%struct.list**> [#uses=5]
+	%op = alloca %struct.operator*, align 4		; <%struct.operator**> [#uses=3]
+	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 %arity, i32* %arity_addr
+	store i32 0, i32* %c
+	%tmp1 = load %struct.list** @operators		; <%struct.list*> [#uses=1]
+	store %struct.list* %tmp1, %struct.list** %l
+	br label %bb21
+
+bb:		; preds = %bb21
+	%tmp3 = getelementptr %struct.list* %tmp22, i32 0, i32 0		; <i8**> [#uses=1]
+	%tmp4 = load i8** %tmp3		; <i8*> [#uses=1]
+	%tmp45 = bitcast i8* %tmp4 to %struct.operator*		; <%struct.operator*> [#uses=1]
+	store %struct.operator* %tmp45, %struct.operator** %op
+	%tmp6 = load %struct.operator** %op		; <%struct.operator*> [#uses=1]
+	%tmp7 = getelementptr %struct.operator* %tmp6, i32 0, i32 5		; <i32*> [#uses=1]
+	%tmp8 = load i32* %tmp7		; <i32> [#uses=1]
+	%tmp9 = load i32* %arity_addr		; <i32> [#uses=1]
+	icmp eq i32 %tmp8, %tmp9		; <i1>:0 [#uses=1]
+	zext i1 %0 to i8		; <i8>:1 [#uses=1]
+	icmp ne i8 %1, 0		; <i1>:2 [#uses=1]
+	br i1 %2, label %cond_true, label %cond_next
+
+cond_true:		; preds = %bb
+	%tmp10 = load %struct.operator** %op		; <%struct.operator*> [#uses=1]
+	%tmp11 = getelementptr %struct.operator* %tmp10, i32 0, i32 2		; <i32*> [#uses=1]
+	%tmp12 = load i32* %tmp11		; <i32> [#uses=1]
+	%tmp13 = load %struct.FILE** @outfile		; <%struct.FILE*> [#uses=1]
+	%tmp14 = getelementptr [11 x i8]* @str1, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp15 = call i32 (%struct.FILE*, i8*, ...)* @fprintf( %struct.FILE* %tmp13, i8* %tmp14, i32 %tmp12 )		; <i32> [#uses=0]
+	%tmp16 = load i32* %c		; <i32> [#uses=1]
+	%tmp17 = add i32 %tmp16, 1		; <i32> [#uses=1]
+	store i32 %tmp17, i32* %c
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %bb
+	%tmp19 = getelementptr %struct.list* %tmp22, i32 0, i32 1		; <%struct.list**> [#uses=1]
+	%tmp20 = load %struct.list** %tmp19		; <%struct.list*> [#uses=1]
+	store %struct.list* %tmp20, %struct.list** %l
+	br label %bb21
+
+bb21:		; preds = %cond_next, %entry
+        %l.in = phi %struct.list** [ @operators, %entry ], [ %tmp19, %cond_next ]
+	%tmp22 = load %struct.list** %l.in		; <%struct.list*> [#uses=1]
+	icmp ne %struct.list* %tmp22, null		; <i1>:3 [#uses=1]
+	zext i1 %3 to i8		; <i8>:4 [#uses=1]
+	icmp ne i8 %4, 0		; <i1>:5 [#uses=1]
+	br i1 %5, label %bb, label %bb23
+
+bb23:		; preds = %bb21
+	%tmp24 = load i32* %c		; <i32> [#uses=1]
+	store i32 %tmp24, i32* %tmp
+	%tmp25 = load i32* %tmp		; <i32> [#uses=1]
+	store i32 %tmp25, i32* %retval
+	br label %return
+
+return:		; preds = %bb23
+	%retval26 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval26
+}
+
+declare i32 @fprintf(%struct.FILE*, i8*, ...)
diff --git a/final/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll b/final/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll
new file mode 100644
index 00000000000..a1aa21beeef
--- /dev/null
+++ b/final/test/Transforms/LoopRotate/PhiSelfRefernce-1.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -loop-rotate -verify-dom-info -verify-loop-info -disable-output
+; ModuleID = 'PhiSelfRefernce-1.bc'
+
+define void @snrm2(i32 %incx) {
+entry:
+	br i1 false, label %START, label %return
+
+START:		; preds = %entry
+	br i1 false, label %bb85, label %cond_false93
+
+bb52:		; preds = %bb85
+	br i1 false, label %bb307, label %cond_next71
+
+cond_next71:		; preds = %bb52
+	ret void
+
+bb85:		; preds = %START
+	br i1 false, label %bb52, label %bb88
+
+bb88:		; preds = %bb85
+	ret void
+
+cond_false93:		; preds = %START
+	ret void
+
+bb243:		; preds = %bb307
+	br label %bb307
+
+bb307:		; preds = %bb243, %bb52
+	%sx_addr.2.pn = phi float* [ %sx_addr.5, %bb243 ], [ null, %bb52 ]		; <float*> [#uses=1]
+	%sx_addr.5 = getelementptr float* %sx_addr.2.pn, i32 %incx		; <float*> [#uses=1]
+	br i1 false, label %bb243, label %bb310
+
+bb310:		; preds = %bb307
+	ret void
+
+return:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/LoopRotate/basic.ll b/final/test/Transforms/LoopRotate/basic.ll
new file mode 100644
index 00000000000..b7bcb21d56f
--- /dev/null
+++ b/final/test/Transforms/LoopRotate/basic.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -loop-rotate %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; PR5319 - The "arrayidx" gep should be hoisted, not duplicated.  We should
+; end up with one phi node.
+define void @test1() nounwind ssp {
+; CHECK: @test1
+entry:
+  %array = alloca [20 x i32], align 16
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, 100
+  %arrayidx = getelementptr inbounds [20 x i32]* %array, i64 0, i64 0
+  br i1 %cmp, label %for.body, label %for.end
+
+; CHECK: for.body:
+; CHECK-NEXT: phi i32 [ 0
+; CHECK-NEXT: store i32 0
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* %arrayidx, align 16
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ]
+  call void @g(i32* %arrayidx.lcssa) nounwind
+  ret void
+}
+
+declare void @g(i32*)
+
diff --git a/final/test/Transforms/LoopRotate/crash.ll b/final/test/Transforms/LoopRotate/crash.ll
new file mode 100644
index 00000000000..9dc9862d150
--- /dev/null
+++ b/final/test/Transforms/LoopRotate/crash.ll
@@ -0,0 +1,139 @@
+; RUN: opt -loop-rotate %s -disable-output -verify-dom-info -verify-loop-info
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; PR8955 - Rotating an outer loop that has a condbr for a latch block.
+define void @test1() nounwind ssp {
+entry:
+  br label %lbl_283
+
+lbl_283:                                          ; preds = %if.end, %entry
+  br i1 undef, label %if.else, label %if.then
+
+if.then:                                          ; preds = %lbl_283
+  br i1 undef, label %if.end, label %for.condthread-pre-split
+
+for.condthread-pre-split:                         ; preds = %if.then
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %for.condthread-pre-split
+  br i1 undef, label %lbl_281, label %for.cond
+
+lbl_281:                                          ; preds = %if.end, %for.cond
+  br label %if.end
+
+if.end:                                           ; preds = %lbl_281, %if.then
+  br i1 undef, label %lbl_283, label %lbl_281
+
+if.else:                                          ; preds = %lbl_283
+  ret void
+}
+
+        %struct.relation = type { [4 x i16], i32, [4 x i16], i32, i32 }
+
+define void @test2() {
+entry:
+        br i1 false, label %bb139, label %bb10.i44
+bb10.i44:               ; preds = %entry
+        ret void
+bb127:          ; preds = %bb139
+        br label %bb139
+bb139:          ; preds = %bb127, %entry
+        br i1 false, label %bb127, label %bb142
+bb142:          ; preds = %bb139
+        %r91.0.lcssa = phi %struct.relation* [ null, %bb139 ]           ; <%struct.relation*> [#uses=0]
+        ret void
+}
+
+
+define void @test3() {
+entry:
+	br i1 false, label %bb139, label %cond_true
+cond_true:		; preds = %entry
+	ret void
+bb90:		; preds = %bb139
+	br i1 false, label %bb136, label %cond_next121
+cond_next121:		; preds = %bb90
+	br i1 false, label %bb136, label %bb127
+bb127:		; preds = %cond_next121
+	br label %bb136
+bb136:		; preds = %bb127, %cond_next121, %bb90
+	%changes.1 = phi i32 [ %changes.2, %bb90 ], [ %changes.2, %cond_next121 ], [ 1, %bb127 ]		; <i32> [#uses=1]
+	br label %bb139
+bb139:		; preds = %bb136, %entry
+	%changes.2 = phi i32 [ %changes.1, %bb136 ], [ 0, %entry ]		; <i32> [#uses=3]
+	br i1 false, label %bb90, label %bb142
+bb142:		; preds = %bb139
+	%changes.2.lcssa = phi i32 [ %changes.2, %bb139 ]		; <i32> [#uses=0]
+	ret void
+}
+
+define void @test4() {
+entry:
+	br i1 false, label %cond_false485, label %bb405
+bb405:		; preds = %entry
+	ret void
+cond_false485:		; preds = %entry
+	br label %bb830
+bb511:		; preds = %bb830
+	br i1 false, label %bb816, label %bb830
+cond_next667:		; preds = %bb816
+	br i1 false, label %cond_next695, label %bb680
+bb676:		; preds = %bb680
+	br label %bb680
+bb680:		; preds = %bb676, %cond_next667
+	%iftmp.68.0 = zext i1 false to i8		; <i8> [#uses=1]
+	br i1 false, label %bb676, label %cond_next695
+cond_next695:		; preds = %bb680, %cond_next667
+	%iftmp.68.2 = phi i8 [ %iftmp.68.0, %bb680 ], [ undef, %cond_next667 ]	; <i8> [#uses=0]
+	ret void
+bb816:		; preds = %bb816, %bb511
+	br i1 false, label %cond_next667, label %bb816
+bb830:		; preds = %bb511, %cond_false485
+	br i1 false, label %bb511, label %bb835
+bb835:		; preds = %bb830
+	ret void
+}
+
+	%struct.NSArray = type { %struct.NSObject }
+	%struct.NSObject = type { %struct.objc_class* }
+	%struct.NSRange = type { i64, i64 }
+	%struct._message_ref_t = type { %struct.NSObject* (%struct.NSObject*, %struct._message_ref_t*, ...)*, %struct.objc_selector* }
+	%struct.objc_class = type opaque
+	%struct.objc_selector = type opaque
+@"\01L_OBJC_MESSAGE_REF_26" = external global %struct._message_ref_t		; <%struct._message_ref_t*> [#uses=1]
+
+define %struct.NSArray* @test5(%struct.NSArray* %self, %struct._message_ref_t* %_cmd) {
+entry:
+	br label %bb116
+
+bb116:		; preds = %bb131, %entry
+	%tmp123 = call %struct.NSRange null( %struct.NSObject* null, %struct._message_ref_t* @"\01L_OBJC_MESSAGE_REF_26", %struct.NSArray* null )		; <%struct.NSRange> [#uses=1]
+	br i1 false, label %bb141, label %bb131
+
+bb131:		; preds = %bb116
+	%mrv_gr125 = getresult %struct.NSRange %tmp123, 1		; <i64> [#uses=0]
+	br label %bb116
+
+bb141:		; preds = %bb116
+	ret %struct.NSArray* null
+}
+
+define void @test6(i8* %msg) {
+entry:
+	br label %bb15
+bb6:		; preds = %bb15
+	%gep.upgrd.1 = zext i32 %offset.1 to i64		; <i64> [#uses=1]
+	%tmp11 = getelementptr i8* %msg, i64 %gep.upgrd.1		; <i8*> [#uses=0]
+	br label %bb15
+bb15:		; preds = %bb6, %entry
+	%offset.1 = add i32 0, 1		; <i32> [#uses=2]
+	br i1 false, label %bb6, label %bb17
+bb17:		; preds = %bb15
+	%offset.1.lcssa = phi i32 [ %offset.1, %bb15 ]		; <i32> [#uses=0]
+	%payload_type.1.lcssa = phi i32 [ 0, %bb15 ]		; <i32> [#uses=0]
+	ret void
+}
+
+
diff --git a/final/test/Transforms/LoopRotate/dbgvalue.ll b/final/test/Transforms/LoopRotate/dbgvalue.ll
new file mode 100644
index 00000000000..92871780a4d
--- /dev/null
+++ b/final/test/Transforms/LoopRotate/dbgvalue.ll
@@ -0,0 +1,59 @@
+; RUN: opt -S -loop-rotate  %s  | FileCheck %s
+
+; CHECK: entry
+; CHECK-NEXT: call void @llvm.dbg.value(metadata !{i32 %x}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp {
+entry:
+  br label %tailrecurse
+
+tailrecurse:                                      ; preds = %if.then, %entry
+  %x.tr = phi i32 [ %x, %entry ], [ %call, %if.then ]
+  %y.tr = phi i32 [ %y, %entry ], [ %call9, %if.then ]
+  %z.tr = phi i32 [ %z, %entry ], [ %call14, %if.then ]
+  tail call void @llvm.dbg.value(metadata !{i32 %x.tr}, i64 0, metadata !6), !dbg !7
+  tail call void @llvm.dbg.value(metadata !{i32 %y.tr}, i64 0, metadata !8), !dbg !9
+  tail call void @llvm.dbg.value(metadata !{i32 %z.tr}, i64 0, metadata !10), !dbg !11
+  %cmp = icmp slt i32 %y.tr, %x.tr, !dbg !12
+  br i1 %cmp, label %if.then, label %if.end, !dbg !12
+
+if.then:                                          ; preds = %tailrecurse
+  %sub = sub nsw i32 %x.tr, 1, !dbg !14
+  %call = tail call i32 @tak(i32 %sub, i32 %y.tr, i32 %z.tr), !dbg !14
+  %sub6 = sub nsw i32 %y.tr, 1, !dbg !14
+  %call9 = tail call i32 @tak(i32 %sub6, i32 %z.tr, i32 %x.tr), !dbg !14
+  %sub11 = sub nsw i32 %z.tr, 1, !dbg !14
+  %call14 = tail call i32 @tak(i32 %sub11, i32 %x.tr, i32 %y.tr), !dbg !14
+  br label %tailrecurse
+
+if.end:                                           ; preds = %tailrecurse
+  br label %return, !dbg !16
+
+return:                                           ; preds = %if.end
+  ret i32 %z.tr, !dbg !17
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+
+!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"tak", metadata !"tak", metadata !"", metadata !1, i32 32, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, i32 (i32, i32, i32)* @tak} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 589865, metadata !"/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", metadata !"/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", metadata !"/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame", metadata !"clang version 2.9 (trunk 125492)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 590081, metadata !0, metadata !"x", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!7 = metadata !{i32 32, i32 13, metadata !0, null}
+!8 = metadata !{i32 590081, metadata !0, metadata !"y", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!9 = metadata !{i32 32, i32 20, metadata !0, null}
+!10 = metadata !{i32 590081, metadata !0, metadata !"z", metadata !1, i32 32, metadata !5, i32 0} ; [ DW_TAG_arg_variable ]
+!11 = metadata !{i32 32, i32 27, metadata !0, null}
+!12 = metadata !{i32 33, i32 3, metadata !13, null}
+!13 = metadata !{i32 589835, metadata !0, i32 32, i32 30, metadata !1, i32 6} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 34, i32 5, metadata !15, null}
+!15 = metadata !{i32 589835, metadata !13, i32 33, i32 14, metadata !1, i32 7} ; [ DW_TAG_lexical_block ]
+!16 = metadata !{i32 36, i32 3, metadata !13, null}
+!17 = metadata !{i32 37, i32 1, metadata !13, null}
diff --git a/final/test/Transforms/LoopRotate/dg.exp b/final/test/Transforms/LoopRotate/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/LoopRotate/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/LoopRotate/indirectbr.ll b/final/test/Transforms/LoopRotate/indirectbr.ll
new file mode 100644
index 00000000000..9c82aa88346
--- /dev/null
+++ b/final/test/Transforms/LoopRotate/indirectbr.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -S -loop-rotate -disable-output -verify-loop-info -verify-dom-info
+; PR5502
+
+define void @z80_do_opcodes() nounwind {
+entry:
+  br label %while.cond
+
+while.cond:                                       ; preds = %end_opcode, %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.cond
+  br label %indirectgoto
+
+run_opcode:                                       ; preds = %indirectgoto
+  %tmp276 = load i8* undef                        ; <i8> [#uses=1]
+  br label %indirectgoto
+
+if.else295:                                       ; preds = %divide_late
+  br label %end_opcode
+
+end_opcode:                                       ; preds = %indirectgoto, %sw.default42406, %sw.default, %if.else295
+  %opcode.2 = phi i8 [ %opcode.0, %indirectgoto ], [ 0, %sw.default42406 ], [ undef, %sw.default ], [ %opcode.0, %if.else295 ] ; <i8> [#uses=0]
+  switch i32 undef, label %while.cond [
+    i32 221, label %sw.bb11691
+    i32 253, label %sw.bb30351
+  ]
+
+sw.bb11691:                                       ; preds = %end_opcode
+  br label %sw.default
+
+sw.default:                                       ; preds = %sw.bb11691
+  br label %end_opcode
+
+sw.bb30351:                                       ; preds = %end_opcode
+  br label %sw.default42406
+
+sw.default42406:                                  ; preds = %sw.bb30351
+  br label %end_opcode
+
+indirectgoto:                                     ; preds = %run_opcode, %while.body
+  %opcode.0 = phi i8 [ undef, %while.body ], [ %tmp276, %run_opcode ] ; <i8> [#uses=2]
+  indirectbr i8* undef, [label %run_opcode, label %if.else295, label %end_opcode]
+}
diff --git a/final/test/Transforms/LoopRotate/phi-duplicate.ll b/final/test/Transforms/LoopRotate/phi-duplicate.ll
new file mode 100644
index 00000000000..73728309225
--- /dev/null
+++ b/final/test/Transforms/LoopRotate/phi-duplicate.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S %s -loop-rotate | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0"
+
+; PR5837
+define void @test(i32 %N, double* %G) nounwind ssp {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %j.0 = phi i64 [ 1, %entry ], [ %inc, %for.body ] ; <i64> [#uses=5]
+  %cmp = icmp slt i64 %j.0, 1000                  ; <i1> [#uses=1]
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds double* %G, i64 %j.0 ; <double*> [#uses=1]
+  %tmp3 = load double* %arrayidx                  ; <double> [#uses=1]
+  %sub = sub i64 %j.0, 1                          ; <i64> [#uses=1]
+  %arrayidx6 = getelementptr inbounds double* %G, i64 %sub ; <double*> [#uses=1]
+  %tmp7 = load double* %arrayidx6                 ; <double> [#uses=1]
+  %add = fadd double %tmp3, %tmp7                 ; <double> [#uses=1]
+  %arrayidx10 = getelementptr inbounds double* %G, i64 %j.0 ; <double*> [#uses=1]
+  store double %add, double* %arrayidx10
+  %inc = add nsw i64 %j.0, 1                      ; <i64> [#uses=1]
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Should only end up with one phi.
+; CHECK:      define void @test
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   br label %for.body
+; CHECK:      for.body:
+; CHECK-NEXT:   %j.01 = phi i64
+; CHECK-NOT:  br
+; CHECK:   br i1 %cmp, label %for.body, label %for.end
+; CHECK:      for.end:
+; CHECK-NEXT:        ret void
diff --git a/final/test/Transforms/LoopRotate/pr2639.ll b/final/test/Transforms/LoopRotate/pr2639.ll
new file mode 100644
index 00000000000..da9a3a2b914
--- /dev/null
+++ b/final/test/Transforms/LoopRotate/pr2639.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-deletion -loop-rotate -verify-dom-info -verify-loop-info -disable-output
+; PR 2639
+
+	%struct.HexxagonMove = type { i8, i8, i32 }
+
+define void @_ZN16HexxagonMoveList7addMoveER12HexxagonMove() {
+entry:
+	br i1 false, label %bb9.preheader, label %bb11
+
+bb9.preheader:		; preds = %entry
+	br label %bb9
+
+bb1:		; preds = %bb9
+	br i1 false, label %bb3, label %bb8
+
+bb3:		; preds = %bb1
+	br label %bb5
+
+bb4:		; preds = %bb5
+	br label %bb5
+
+bb5:		; preds = %bb4, %bb3
+	%exitcond = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb7, label %bb4
+
+bb7:		; preds = %bb5
+	store %struct.HexxagonMove* null, %struct.HexxagonMove** null, align 4
+	br label %bb8
+
+bb8:		; preds = %bb7, %bb1
+	br label %bb9
+
+bb9:		; preds = %bb8, %bb9.preheader
+	br i1 false, label %bb11, label %bb1
+
+bb11:		; preds = %bb9, %entry
+	ret void
+}
diff --git a/final/test/Transforms/LoopRotate/preserve-scev.ll b/final/test/Transforms/LoopRotate/preserve-scev.ll
new file mode 100644
index 00000000000..7bd22326864
--- /dev/null
+++ b/final/test/Transforms/LoopRotate/preserve-scev.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -loop-rotate -loop-reduce -verify-dom-info -verify-loop-info -disable-output
+
+define fastcc void @foo() nounwind {
+BB:
+  br label %BB1
+
+BB1:                                              ; preds = %BB19, %BB
+  br label %BB4
+
+BB2:                                              ; preds = %BB4
+  %tmp = bitcast i32 undef to i32                 ; <i32> [#uses=1]
+  br label %BB4
+
+BB4:                                              ; preds = %BB3, %BB1
+  %tmp5 = phi i32 [ undef, %BB1 ], [ %tmp, %BB2 ] ; <i32> [#uses=1]
+  br i1 false, label %BB8, label %BB2
+
+BB8:                                              ; preds = %BB6
+  %tmp7 = bitcast i32 %tmp5 to i32                ; <i32> [#uses=2]
+  br i1 false, label %BB9, label %BB13
+
+BB9:                                              ; preds = %BB12, %BB8
+  %tmp10 = phi i32 [ %tmp11, %BB12 ], [ %tmp7, %BB8 ] ; <i32> [#uses=2]
+  %tmp11 = add i32 %tmp10, 1                      ; <i32> [#uses=1]
+  br label %BB12
+
+BB12:                                             ; preds = %BB9
+  br i1 false, label %BB9, label %BB17
+
+BB13:                                             ; preds = %BB15, %BB8
+  %tmp14 = phi i32 [ %tmp16, %BB15 ], [ %tmp7, %BB8 ] ; <i32> [#uses=1]
+  br label %BB15
+
+BB15:                                             ; preds = %BB13
+  %tmp16 = add i32 %tmp14, -1                     ; <i32> [#uses=1]
+  br i1 false, label %BB13, label %BB18
+
+BB17:                                             ; preds = %BB12
+  br label %BB19
+
+BB18:                                             ; preds = %BB15
+  br label %BB19
+
+BB19:                                             ; preds = %BB18, %BB17
+  %tmp20 = phi i32 [ %tmp10, %BB17 ], [ undef, %BB18 ] ; <i32> [#uses=0]
+  br label %BB1
+}
diff --git a/final/test/Transforms/LoopSimplify/2003-04-25-AssertFail.ll b/final/test/Transforms/LoopSimplify/2003-04-25-AssertFail.ll
new file mode 100644
index 00000000000..66bf1a0caa9
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/2003-04-25-AssertFail.ll
@@ -0,0 +1,20 @@
+; This testcase exposed a problem with the loop identification pass (LoopInfo).
+; Basically, it was incorrectly calculating the loop nesting information.
+;
+; RUN: opt < %s -loop-simplify
+
+define i32 @yylex() {
+	br label %loopentry.0
+loopentry.0:		; preds = %else.4, %0
+	br label %loopexit.2
+loopexit.2:		; preds = %else.4, %loopexit.2, %loopentry.0
+	br i1 false, label %loopexit.2, label %else.4
+yy_find_action:		; preds = %else.4
+	br label %else.4
+else.4:		; preds = %yy_find_action, %loopexit.2
+	switch i32 0, label %loopexit.2 [
+		 i32 2, label %yy_find_action
+		 i32 0, label %loopentry.0
+	]
+}
+
diff --git a/final/test/Transforms/LoopSimplify/2003-05-12-PreheaderExitOfChild.ll b/final/test/Transforms/LoopSimplify/2003-05-12-PreheaderExitOfChild.ll
new file mode 100644
index 00000000000..2b2afae3661
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/2003-05-12-PreheaderExitOfChild.ll
@@ -0,0 +1,42 @@
+; This (complex) testcase causes an assertion failure because a preheader is 
+; inserted for the "fail" loop, but the exit block of a loop is not updated
+; to be the preheader instead of the exit loop itself.
+
+; RUN: opt < %s -loop-simplify
+define i32 @re_match_2() {
+	br label %loopentry.1
+loopentry.1:		; preds = %endif.82, %0
+	br label %shortcirc_done.36
+shortcirc_done.36:		; preds = %loopentry.1
+	br i1 false, label %fail, label %endif.40
+endif.40:		; preds = %shortcirc_done.36
+	br label %loopexit.20
+loopentry.20:		; preds = %endif.46
+	br label %loopexit.20
+loopexit.20:		; preds = %loopentry.20, %endif.40
+	br label %loopentry.21
+loopentry.21:		; preds = %no_exit.19, %loopexit.20
+	br i1 false, label %no_exit.19, label %loopexit.21
+no_exit.19:		; preds = %loopentry.21
+	br i1 false, label %fail, label %loopentry.21
+loopexit.21:		; preds = %loopentry.21
+	br label %endif.45
+endif.45:		; preds = %loopexit.21
+	br label %cond_true.15
+cond_true.15:		; preds = %endif.45
+	br i1 false, label %fail, label %endif.46
+endif.46:		; preds = %cond_true.15
+	br label %loopentry.20
+fail:		; preds = %loopexit.37, %cond_true.15, %no_exit.19, %shortcirc_done.36
+	br label %then.80
+then.80:		; preds = %fail
+	br label %endif.81
+endif.81:		; preds = %then.80
+	br label %loopexit.37
+loopexit.37:		; preds = %endif.81
+	br i1 false, label %fail, label %endif.82
+endif.82:		; preds = %loopexit.37
+	br label %loopentry.1
+}
+
+
diff --git a/final/test/Transforms/LoopSimplify/2003-08-15-PreheadersFail.ll b/final/test/Transforms/LoopSimplify/2003-08-15-PreheadersFail.ll
new file mode 100644
index 00000000000..11be6941d8b
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/2003-08-15-PreheadersFail.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -tailduplicate -instcombine -simplifycfg -licm -disable-output
+target datalayout = "e-p:32:32"
+@yy_base = external global [787 x i16]		; <[787 x i16]*> [#uses=1]
+@yy_state_ptr = external global i32*		; <i32**> [#uses=3]
+@yy_state_buf = external global [16386 x i32]		; <[16386 x i32]*> [#uses=1]
+@yy_lp = external global i32		; <i32*> [#uses=1]
+
+define i32 @_yylex() {
+	br label %loopentry.0
+loopentry.0:		; preds = %else.26, %0
+	store i32* getelementptr ([16386 x i32]* @yy_state_buf, i64 0, i64 0), i32** @yy_state_ptr
+	%tmp.35 = load i32** @yy_state_ptr		; <i32*> [#uses=2]
+	%inc.0 = getelementptr i32* %tmp.35, i64 1		; <i32*> [#uses=1]
+	store i32* %inc.0, i32** @yy_state_ptr
+	%tmp.36 = load i32* null		; <i32> [#uses=1]
+	store i32 %tmp.36, i32* %tmp.35
+	br label %loopexit.2
+loopexit.2:		; preds = %else.26, %loopexit.2, %loopentry.0
+	store i8* null, i8** null
+	%tmp.91 = load i32* null		; <i32> [#uses=1]
+	%tmp.92 = sext i32 %tmp.91 to i64		; <i64> [#uses=1]
+	%tmp.93 = getelementptr [787 x i16]* @yy_base, i64 0, i64 %tmp.92		; <i16*> [#uses=1]
+	%tmp.94 = load i16* %tmp.93		; <i16> [#uses=1]
+	%tmp.95 = icmp ne i16 %tmp.94, 4394		; <i1> [#uses=1]
+	br i1 %tmp.95, label %loopexit.2, label %yy_find_action
+yy_find_action:		; preds = %else.26, %loopexit.2
+	br label %loopentry.3
+loopentry.3:		; preds = %then.9, %shortcirc_done.0, %yy_find_action
+	%tmp.105 = load i32* @yy_lp		; <i32> [#uses=1]
+	%tmp.106 = icmp ne i32 %tmp.105, 0		; <i1> [#uses=1]
+	br i1 %tmp.106, label %shortcirc_next.0, label %shortcirc_done.0
+shortcirc_next.0:		; preds = %loopentry.3
+	%tmp.114 = load i16* null		; <i16> [#uses=1]
+	%tmp.115 = sext i16 %tmp.114 to i32		; <i32> [#uses=1]
+	%tmp.116 = icmp slt i32 0, %tmp.115		; <i1> [#uses=1]
+	br label %shortcirc_done.0
+shortcirc_done.0:		; preds = %shortcirc_next.0, %loopentry.3
+	%shortcirc_val.0 = phi i1 [ false, %loopentry.3 ], [ %tmp.116, %shortcirc_next.0 ]		; <i1> [#uses=1]
+	br i1 %shortcirc_val.0, label %else.0, label %loopentry.3
+else.0:		; preds = %shortcirc_done.0
+	%tmp.144 = load i32* null		; <i32> [#uses=1]
+	%tmp.145 = and i32 %tmp.144, 8192		; <i32> [#uses=1]
+	%tmp.146 = icmp ne i32 %tmp.145, 0		; <i1> [#uses=1]
+	br i1 %tmp.146, label %then.9, label %else.26
+then.9:		; preds = %else.0
+	br label %loopentry.3
+else.26:		; preds = %else.0
+	switch i32 0, label %loopentry.0 [
+		 i32 2, label %yy_find_action
+		 i32 0, label %loopexit.2
+	]
+}
diff --git a/final/test/Transforms/LoopSimplify/2003-12-10-ExitBlocksProblem.ll b/final/test/Transforms/LoopSimplify/2003-12-10-ExitBlocksProblem.ll
new file mode 100644
index 00000000000..fb39f05c6db
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/2003-12-10-ExitBlocksProblem.ll
@@ -0,0 +1,36 @@
+; LoopSimplify is breaking LICM on this testcase because the exit blocks from
+; the loop are reachable from more than just the exit nodes: the exit blocks
+; have predecessors from outside of the loop!
+;
+; This is distilled from a monsterous crafty example.
+
+; RUN: opt < %s -licm -disable-output
+
+
+@G = weak global i32 0		; <i32*> [#uses=7]
+
+define i32 @main() {
+entry:
+	store i32 123, i32* @G
+	br label %loopentry.i
+loopentry.i:		; preds = %endif.1.i, %entry
+	%tmp.0.i = load i32* @G		; <i32> [#uses=1]
+	%tmp.1.i = icmp eq i32 %tmp.0.i, 123		; <i1> [#uses=1]
+	br i1 %tmp.1.i, label %Out.i, label %endif.0.i
+endif.0.i:		; preds = %loopentry.i
+	%tmp.3.i = load i32* @G		; <i32> [#uses=1]
+	%tmp.4.i = icmp eq i32 %tmp.3.i, 126		; <i1> [#uses=1]
+	br i1 %tmp.4.i, label %ExitBlock.i, label %endif.1.i
+endif.1.i:		; preds = %endif.0.i
+	%tmp.6.i = load i32* @G		; <i32> [#uses=1]
+	%inc.i = add i32 %tmp.6.i, 1		; <i32> [#uses=1]
+	store i32 %inc.i, i32* @G
+	br label %loopentry.i
+Out.i:		; preds = %loopentry.i
+	store i32 0, i32* @G
+	br label %ExitBlock.i
+ExitBlock.i:		; preds = %Out.i, %endif.0.i
+	%tmp.7.i = load i32* @G		; <i32> [#uses=1]
+	ret i32 %tmp.7.i
+}
+
diff --git a/final/test/Transforms/LoopSimplify/2004-02-05-DominatorInfoCorruption.ll b/final/test/Transforms/LoopSimplify/2004-02-05-DominatorInfoCorruption.ll
new file mode 100644
index 00000000000..aae8476c830
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/2004-02-05-DominatorInfoCorruption.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -loop-simplify -verify -licm -disable-output
+
+define void @.subst_48() {
+entry:
+	br label %loopentry.0
+loopentry.0:		; preds = %loopentry.0, %entry
+	br i1 false, label %loopentry.0, label %loopentry.2
+loopentry.2:		; preds = %loopentry.2, %loopentry.0
+	%tmp.968 = icmp sle i32 0, 3		; <i1> [#uses=1]
+	br i1 %tmp.968, label %loopentry.2, label %UnifiedReturnBlock
+UnifiedReturnBlock:		; preds = %loopentry.2
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopSimplify/2004-03-15-IncorrectDomUpdate.ll b/final/test/Transforms/LoopSimplify/2004-03-15-IncorrectDomUpdate.ll
new file mode 100644
index 00000000000..3e7661ecb57
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/2004-03-15-IncorrectDomUpdate.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -loop-simplify -licm -disable-output
+define void @main() {
+entry:
+	br i1 false, label %Out, label %loop
+loop:		; preds = %loop, %entry
+	%LI = icmp sgt i32 0, 0		; <i1> [#uses=1]
+	br i1 %LI, label %loop, label %Out
+Out:		; preds = %loop, %entry
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopSimplify/2004-04-01-IncorrectDomUpdate.ll b/final/test/Transforms/LoopSimplify/2004-04-01-IncorrectDomUpdate.ll
new file mode 100644
index 00000000000..c29383764af
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/2004-04-01-IncorrectDomUpdate.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -loop-simplify -licm -disable-output
+
+; This is PR306
+
+define void @NormalizeCoeffsVecFFE() {
+entry:
+	br label %loopentry.0
+loopentry.0:		; preds = %no_exit.0, %entry
+	br i1 false, label %loopentry.1, label %no_exit.0
+no_exit.0:		; preds = %loopentry.0
+	br i1 false, label %loopentry.0, label %loopentry.1
+loopentry.1:		; preds = %no_exit.1, %no_exit.0, %loopentry.0
+	br i1 false, label %no_exit.1, label %loopexit.1
+no_exit.1:		; preds = %loopentry.1
+	%tmp.43 = icmp eq i16 0, 0		; <i1> [#uses=1]
+	br i1 %tmp.43, label %loopentry.1, label %loopexit.1
+loopexit.1:		; preds = %no_exit.1, %loopentry.1
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopSimplify/2004-04-12-LoopSimplify-SwitchBackedges.ll b/final/test/Transforms/LoopSimplify/2004-04-12-LoopSimplify-SwitchBackedges.ll
new file mode 100644
index 00000000000..c522ec9463b
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/2004-04-12-LoopSimplify-SwitchBackedges.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -loop-simplify -disable-output
+
+define void @test() {
+loopentry.0:
+	br label %loopentry.1
+loopentry.1:		; preds = %then.6, %then.6, %loopentry.1, %loopentry.0
+	%pixel.4 = phi i32 [ 0, %loopentry.0 ], [ %pixel.4, %loopentry.1 ], [ %tmp.370, %then.6 ], [ %tmp.370, %then.6 ]		; <i32> [#uses=1]
+	br i1 false, label %then.6, label %loopentry.1
+then.6:		; preds = %loopentry.1
+	%tmp.370 = add i32 0, 0		; <i32> [#uses=2]
+	switch i32 0, label %label.7 [
+		 i32 6408, label %loopentry.1
+		 i32 32841, label %loopentry.1
+	]
+label.7:		; preds = %then.6
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll b/final/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll
new file mode 100644
index 00000000000..5818808ae0c
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -scalarrepl -loop-simplify -licm -disable-output -verify-dom-info -verify-loop-info
+
+define void @inflate() {
+entry:
+	br label %loopentry.0.outer1111
+loopentry.0.outer1111:		; preds = %then.41, %label.11, %loopentry.0.outer1111, %entry
+	%left.0.ph1107 = phi i32 [ %tmp.1172, %then.41 ], [ 0, %entry ], [ %tmp.1172, %label.11 ], [ %left.0.ph1107, %loopentry.0.outer1111 ]		; <i32> [#uses=2]
+	%tmp.1172 = sub i32 %left.0.ph1107, 0		; <i32> [#uses=2]
+	switch i32 0, label %label.11 [
+		 i32 23, label %loopentry.0.outer1111
+		 i32 13, label %then.41
+	]
+label.11:		; preds = %loopentry.0.outer1111
+	br label %loopentry.0.outer1111
+then.41:		; preds = %loopentry.0.outer1111
+	br label %loopentry.0.outer1111
+}
+
diff --git a/final/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll b/final/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll
new file mode 100644
index 00000000000..e73fff18bc5
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll
@@ -0,0 +1,892 @@
+; RUN: opt < %s -loop-simplify -disable-output
+; PR1752
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-s0:0:64-f80:32:32"
+target triple = "i686-pc-mingw32"
+	%struct.BigInt = type { %"struct.std::vector<ulong,std::allocator<ulong> >" }
+	%struct.Fibonacci = type { %"struct.std::vector<BigInt,std::allocator<BigInt> >" }
+	%struct.__false_type = type <{ i8 }>
+	%"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >" = type { %struct.BigInt* }
+	%"struct.std::_Vector_base<BigInt,std::allocator<BigInt> >" = type { %"struct.std::_Vector_base<BigInt,std::allocator<BigInt> >::_Vector_impl" }
+	%"struct.std::_Vector_base<BigInt,std::allocator<BigInt> >::_Vector_impl" = type { %struct.BigInt*, %struct.BigInt*, %struct.BigInt* }
+	%"struct.std::_Vector_base<ulong,std::allocator<ulong> >" = type { %"struct.std::_Vector_base<ulong,std::allocator<ulong> >::_Vector_impl" }
+	%"struct.std::_Vector_base<ulong,std::allocator<ulong> >::_Vector_impl" = type { i32*, i32*, i32* }
+	%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"* }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" }
+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
+	%"struct.std::basic_stringbuf<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.std::basic_streambuf<char,std::char_traits<char> >", i32, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" }
+	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 }
+	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
+	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+	%"struct.std::ios_base::_Words" = type { i8*, i32 }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+	%"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >" = type { %"struct.std::locale::facet" }
+	%"struct.std::ostringstream" = type { [4 x i8], %"struct.std::basic_stringbuf<char,std::char_traits<char>,std::allocator<char> >", %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::vector<BigInt,std::allocator<BigInt> >" = type { %"struct.std::_Vector_base<BigInt,std::allocator<BigInt> >" }
+	%"struct.std::vector<ulong,std::allocator<ulong> >" = type { %"struct.std::_Vector_base<ulong,std::allocator<ulong> >" }
+@.str13 = external constant [6 x i8]		; <[6 x i8]*> [#uses=1]
+@.str14 = external constant [5 x i8]		; <[5 x i8]*> [#uses=1]
+@.str15 = external constant [2 x i8]		; <[2 x i8]*> [#uses=1]
+@_ZSt4cout = external global %"struct.std::basic_ostream<char,std::char_traits<char> >"		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+
+declare void @_ZN9Fibonacci10get_numberEj(%struct.BigInt* sret , %struct.Fibonacci*, i32)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8*)
+
+declare void @_ZNSsD1Ev(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEm(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i32)
+
+declare void @_ZNKSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE3strEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* sret , %"struct.std::ostringstream"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsIcSt11char_traitsIcESaIcEERSt13basic_ostreamIT_T0_ES7_RKSbIS4_S5_T1_E(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"*)
+
+declare void @_ZNSt19basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev(%"struct.std::ostringstream"*)
+
+declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @___ZlsRSoRK6BigInt___ZN9__gnu_cxx13new_allocatorI6BigIntE10deallocateEPS1_j(i32, %"struct.std::basic_ostream<char,std::char_traits<char> >"*, %struct.BigInt*, %struct.__false_type*, i32)
+
+declare void @___ZNSt12_Vector_baseI6BigIntSaIS0_EE13_M_deallocateEPS0_j___ZNSt12_Vector_baseI6BigIntSaIS0_EED2Ev___ZNSt6vectorI6BigIntSaIS0_EEC1ERKS1_(%"struct.std::_Vector_base<BigInt,std::allocator<BigInt> >"*, i32, %struct.BigInt*, i32, %"struct.std::vector<BigInt,std::allocator<BigInt> >"*, %struct.__false_type*)
+
+declare i32 @___ZN9__gnu_cxxmiIPK6BigIntS3_St6vectorIS1_SaIS1_EEEENS_17__normal_iteratorIT_T1_E15difference_typeERKSA_RKNS7_IT0_S9_EE___ZNKSt6vectorI6BigIntSaIS0_EE4sizeEv___ZNK9Fibonacci16show_all_numbersEv___ZNKSt6vectorI6BigIntSaIS0_EE8capacityEv(%"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"*, %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"*, %"struct.std::vector<BigInt,std::allocator<BigInt> >"*, i32, %struct.Fibonacci*)
+
+declare %struct.BigInt* @___ZNSt6vectorI6BigIntSaIS0_EEixEj___ZNSt6vectorI6BigIntSaIS0_EE3endEv(%"struct.std::vector<BigInt,std::allocator<BigInt> >"*, i32, i32)
+
+declare %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"* @___ZN9__gnu_cxx17__normal_iteratorIP6BigIntSt6vectorIS1_SaIS1_EEEppEv___ZNSt6vectorImSaImEED1Ev___ZN6BigIntD1Ev___ZN9__gnu_cxx13new_allocatorI6BigIntE7destroyEPS1____ZSt8_DestroyIP6BigIntSaIS0_EEvT_S3_T0_(i32, %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"*, %"struct.std::vector<ulong,std::allocator<ulong> >"*, %struct.BigInt*, %struct.__false_type*, %struct.BigInt*, %struct.__false_type* noalias )
+
+declare void @___ZNSt6vectorI6BigIntSaIS0_EED1Ev___ZN9FibonacciD1Ev___ZNSt6vectorImSaImEEC1ERKS0_(i32, %"struct.std::vector<BigInt,std::allocator<BigInt> >"*, %struct.Fibonacci*, %"struct.std::vector<ulong,std::allocator<ulong> >"*, %struct.__false_type*)
+
+define void @___ZN9FibonacciC1Ej___ZN9Fibonacci11show_numberEm(%struct.Fibonacci* %this_this, i32 %functionID, i32 %n_i_n_i) {
+bb_init:
+	br label %bb_main
+
+bb_main:		; preds = %meshBB349, %meshBB348, %meshBB347, %meshBB346, %meshBB345.unwinddest, %meshBB345, %meshBB344, %meshBB343, %meshBB342, %meshBB341, %meshBB340.normaldest, %meshBB340, %meshBB339, %invcont17.normaldest.normaldest, %invcont17.normaldest, %meshBB338.unwinddest, %meshBB338, %meshBB337.unwinddest, %meshBB337, %meshBB336.unwinddest, %meshBB336, %meshBB335, %meshBB334, %meshBB333, %meshBB332, %meshBB331, %meshBB330.normaldest, %meshBB330, %meshBB329.normaldest, %meshBB329, %meshBB328, %meshBB327, %meshBB326, %meshBB325.unwinddest, %meshBB325, %meshBB324, %meshBB323.normaldest, %meshBB323, %meshBB322.unwinddest, %meshBB322, %meshBB321, %meshBB320.unwinddest, %meshBB320, %meshBB319.unwinddest, %meshBB319, %meshBB318.unwinddest, %meshBB318, %meshBB317, %meshBB37.fragment, %meshBB37.unwinddest, %meshBB37, %meshBB36.fragment, %meshBB36, %meshBB35.fragment, %meshBB35, %meshBB34.fragment, %meshBB34, %meshBB33.fragment, %meshBB33, %meshBB32.fragment, %meshBB32, %meshBB31.fragment, %meshBB31, %meshBB30.fragment, %meshBB30.normaldest, %meshBB30, %meshBB29.fragment, %meshBB29.unwinddest, %meshBB29, %meshBB28.fragment, %meshBB28.unwinddest, %meshBB28, %meshBB27.fragment, %meshBB27, %meshBB26.fragment, %meshBB26.normaldest, %meshBB26, %meshBB25.fragment, %meshBB25, %meshBB24.fragment, %meshBB24.unwinddest, %meshBB24, %meshBB23.fragment, %meshBB23.normaldest, %meshBB23, %entry1.fragment.normaldest.normaldest, %entry1.fragment.normaldest, %meshBB22.fragment, %meshBB22.unwinddest, %meshBB22, %meshBB.fragment, %meshBB.unwinddest, %meshBB, %Unwind20, %unwind78.Unwind_crit_edge, %unwind78.fragment.fragment, %unwind78.fragment, %unwind78.fragment316, %unwind78, %invcont70, %unwind66.Unwind_crit_edge, %unwind66.fragment.fragment, %unwind66.fragment, %unwind66.fragment315, %unwind66, %unwind53.nofilter_crit_edge, %unwind53.fragment.fragment, %unwind53.fragment, %unwind53.fragment314, %unwind53, %nofilter.Unwind_crit_edge.normaldest, %nofilter.Unwind_crit_edge, %nofilter, %unwind43.nofilter_crit_edge, %unwind43.fragment.fragment, %unwind43.fragment, %unwind43.fragment313, %unwind43, %invcont41.normaldest, %invcont41, %unwind37.nofilter_crit_edge, %unwind37, %invcont36, %invcont33.unwind_crit_edge.unwinddest, %invcont33.unwind_crit_edge, %invcont30.unwind_crit_edge.unwinddest, %invcont30.unwind_crit_edge, %invcont30.normaldest, %invcont30, %invcont28.unwind_crit_edge, %invcont28.normaldest, %invcont28, %invcont25.unwind_crit_edge.unwinddest, %invcont25.unwind_crit_edge, %invcont25, %invcont22.unwind_crit_edge, %invcont22, %invcont17.unwind_crit_edge, %invcont17, %cond_next.unwind_crit_edge, %cond_next, %invcont12.cond_next_crit_edge, %invcont12.unwind_crit_edge, %invcont12, %cond_true.unwind_crit_edge.unwinddest, %cond_true.unwind_crit_edge, %invcont.cond_next_crit_edge, %invcont16.fragment, %invcont16, %unwind11.fragment, %unwind11, %entry.unwind_crit_edge, %entry1.fragment, %entry1.fragment312, %entry1, %Unwind, %unwind20.Unwind_crit_edge, %unwind20.fragment.fragment, %unwind20.fragment, %unwind20.fragment311, %unwind20, %invcont15, %invcont14.unwind10_crit_edge, %invcont14, %unwind10.Unwind_crit_edge, %unwind10.fragment, %unwind10.fragment310, %unwind10, %invcont.unwind10_crit_edge, %invcont, %unwind.fragment, %unwind, %entry.fragment, %entry.fragment309, %entry, %NewDefault, %LeafBlock, %LeafBlock914, %NodeBlock, %comb_entry.fragment, %old_entry, %bb_init
+	switch i32 0, label %old_entry [
+		 i32 2739, label %invcont28.fragment
+		 i32 2688, label %meshBB28.fragment
+		 i32 1318, label %meshBB32.fragment
+		 i32 2964, label %unwind53.fragment.fragment
+		 i32 824, label %unwind78.fragment.fragment
+		 i32 1983, label %meshBB33.fragment
+		 i32 2582, label %invcont30.fragment
+		 i32 2235, label %meshBB36.fragment
+		 i32 1275, label %meshBB343
+		 i32 2719, label %invcont.fragment
+		 i32 1500, label %entry1.fragment.fragment
+		 i32 815, label %unwind11.fragment
+		 i32 1051, label %entry
+		 i32 2342, label %unwind
+		 i32 1814, label %invcont
+		 i32 315, label %invcont.unwind10_crit_edge
+		 i32 2422, label %unwind10
+		 i32 2663, label %unwind10.Unwind_crit_edge
+		 i32 266, label %invcont14
+		 i32 367, label %invcont14.unwind10_crit_edge
+		 i32 2242, label %invcont15
+		 i32 452, label %unwind20
+		 i32 419, label %invcont.cond_next_crit_edge
+		 i32 181, label %cond_true
+		 i32 2089, label %unwind20.Unwind_crit_edge
+		 i32 633, label %filter
+		 i32 455, label %Unwind
+		 i32 2016, label %entry1
+		 i32 263, label %invcont33.unwind_crit_edge
+		 i32 2498, label %invcont36
+		 i32 2992, label %unwind37
+		 i32 616, label %entry.unwind_crit_edge
+		 i32 622, label %unwind11
+		 i32 875, label %invcont16
+		 i32 766, label %unwind53.nofilter_crit_edge
+		 i32 668, label %filter62
+		 i32 2138, label %unwind66
+		 i32 713, label %unwind66.Unwind_crit_edge
+		 i32 1422, label %invcont70
+		 i32 1976, label %cond_true.unwind_crit_edge
+		 i32 1263, label %invcont12
+		 i32 2453, label %invcont12.unwind_crit_edge
+		 i32 2876, label %invcont12.cond_next_crit_edge
+		 i32 2271, label %cond_next
+		 i32 2938, label %cond_next.unwind_crit_edge
+		 i32 1082, label %invcont17
+		 i32 531, label %invcont17.unwind_crit_edge
+		 i32 111, label %invcont22
+		 i32 1935, label %invcont22.unwind_crit_edge
+		 i32 2004, label %invcont25
+		 i32 1725, label %invcont25.unwind_crit_edge
+		 i32 1701, label %invcont28
+		 i32 957, label %invcont28.unwind_crit_edge
+		 i32 165, label %invcont30
+		 i32 899, label %invcont30.unwind_crit_edge
+		 i32 1092, label %invcont33
+		 i32 2869, label %unwind37.nofilter_crit_edge
+		 i32 203, label %invcont41
+		 i32 693, label %unwind43
+		 i32 2895, label %unwind43.nofilter_crit_edge
+		 i32 1174, label %invcont47
+		 i32 1153, label %filter19
+		 i32 2304, label %nofilter
+		 i32 848, label %nofilter.Unwind_crit_edge
+		 i32 1207, label %unwind53
+		 i32 2848, label %filter75
+		 i32 59, label %unwind78
+		 i32 1213, label %unwind78.Unwind_crit_edge
+		 i32 2199, label %filter87
+		 i32 1268, label %Unwind20
+		 i32 743, label %old_entry
+		 i32 1276, label %meshBB319
+		 i32 1619, label %meshBB320
+		 i32 2047, label %meshBB331
+		 i32 2828, label %meshBB23.fragment
+		 i32 2530, label %meshBB332
+		 i32 1389, label %meshBB318
+		 i32 1450, label %meshBB317
+		 i32 1416, label %meshBB31.fragment
+		 i32 82, label %meshBB322
+		 i32 853, label %unwind78.fragment316
+		 i32 107, label %meshBB24.fragment
+		 i32 1200, label %meshBB37.fragment
+		 i32 605, label %unwind53.fragment314
+		 i32 209, label %meshBB29.fragment
+		 i32 1513, label %meshBB27.fragment
+		 i32 1542, label %meshBB35.fragment
+		 i32 1873, label %meshBB348
+		 i32 472, label %meshBB325
+		 i32 2615, label %meshBB22.fragment
+		 i32 359, label %meshBB.fragment
+		 i32 2467, label %Unwind20.fragment
+		 i32 1671, label %unwind66.fragment.fragment
+		 i32 1006, label %meshBB25.fragment
+		 i32 1243, label %meshBB333
+		 i32 2795, label %unwind43.fragment313
+		 i32 1591, label %meshBB335
+		 i32 773, label %meshBB341
+		 i32 2440, label %cond_next.fragment
+		 i32 487, label %meshBB326
+		 i32 394, label %meshBB324
+		 i32 14, label %invcont16.fragment
+		 i32 574, label %entry1.fragment312
+		 i32 1453, label %meshBB35
+		 i32 345, label %entry1.fragment
+		 i32 2951, label %unwind20.fragment
+		 i32 1960, label %meshBB31
+		 i32 2163, label %meshBB32
+		 i32 1978, label %Unwind.fragment
+		 i32 1559, label %unwind20.fragment.fragment
+		 i32 950, label %unwind10.fragment
+		 i32 1724, label %unwind53.fragment
+		 i32 514, label %meshBB36
+		 i32 1928, label %unwind10.fragment.fragment
+		 i32 1266, label %meshBB26
+		 i32 3148, label %unwind20.fragment311
+		 i32 1581, label %unwind43.fragment
+		 i32 1829, label %meshBB34
+		 i32 1472, label %meshBB28
+		 i32 2657, label %unwind66.fragment
+		 i32 2169, label %meshBB22
+		 i32 2619, label %meshBB
+		 i32 1397, label %entry.fragment
+		 i32 231, label %invcont41.fragment
+		 i32 2557, label %meshBB338
+		 i32 2387, label %meshBB30.fragment
+		 i32 2927, label %meshBB340
+		 i32 2331, label %meshBB321
+		 i32 47, label %meshBB328
+		 i32 1753, label %meshBB342
+		 i32 2074, label %meshBB323
+		 i32 2128, label %meshBB334
+		 i32 2396, label %meshBB337
+		 i32 1811, label %meshBB29
+		 i32 1113, label %meshBB27
+		 i32 2232, label %unwind10.fragment310
+		 i32 804, label %meshBB24
+		 i32 3099, label %meshBB30
+		 i32 564, label %meshBB33
+		 i32 1359, label %unwind.fragment
+		 i32 1906, label %entry.fragment309
+		 i32 2644, label %entry.fragment.fragment
+		 i32 134, label %entry1.fragment.normaldest
+		 i32 2767, label %comb_entry.fragment
+		 i32 2577, label %meshBB25
+		 i32 3128, label %meshBB37
+		 i32 2360, label %meshBB23
+		 i32 286, label %unwind78.fragment
+		 i32 976, label %meshBB346
+		 i32 2412, label %meshBB339
+		 i32 876, label %meshBB345
+		 i32 3078, label %meshBB329
+		 i32 1297, label %meshBB347
+		 i32 3051, label %meshBB336
+		 i32 1342, label %meshBB344
+		 i32 728, label %meshBB330
+		 i32 1778, label %meshBB349
+		 i32 2784, label %meshBB327
+		 i32 1854, label %meshBB26.fragment
+		 i32 1025, label %meshBB34.fragment
+		 i32 2139, label %unwind43.fragment.fragment
+		 i32 2217, label %nofilter.fragment
+		 i32 665, label %invcont12.fragment
+		 i32 316, label %invcont22.fragment
+		 i32 1467, label %unwind66.fragment315
+		 i32 3018, label %unwind37.fragment
+		 i32 1123, label %invcont17.normaldest
+		 i32 2104, label %NewDefault
+		 i32 1639, label %LeafBlock
+		 i32 925, label %LeafBlock914
+		 i32 2880, label %NodeBlock
+	]
+
+old_entry:		; preds = %bb_main, %bb_main
+	br label %bb_main
+
+comb_entry.fragment:		; preds = %bb_main
+	br label %bb_main
+
+NodeBlock:		; preds = %bb_main
+	br label %bb_main
+
+LeafBlock914:		; preds = %bb_main
+	br label %bb_main
+
+LeafBlock:		; preds = %bb_main
+	br label %bb_main
+
+NewDefault:		; preds = %bb_main
+	br label %bb_main
+
+entry:		; preds = %bb_main
+	br label %bb_main
+
+entry.fragment309:		; preds = %bb_main
+	br label %bb_main
+
+entry.fragment:		; preds = %bb_main
+	br label %bb_main
+
+entry.fragment.fragment:		; preds = %bb_main
+	invoke void @___ZNSt12_Vector_baseI6BigIntSaIS0_EE13_M_deallocateEPS0_j___ZNSt12_Vector_baseI6BigIntSaIS0_EED2Ev___ZNSt6vectorI6BigIntSaIS0_EEC1ERKS1_( %"struct.std::_Vector_base<BigInt,std::allocator<BigInt> >"* null, i32 28, %struct.BigInt* null, i32 0, %"struct.std::vector<BigInt,std::allocator<BigInt> >"* null, %struct.__false_type* null )
+			to label %meshBB340 unwind label %meshBB325
+
+unwind:		; preds = %bb_main
+	br label %bb_main
+
+unwind.fragment:		; preds = %bb_main
+	br label %bb_main
+
+invcont:		; preds = %bb_main
+	br label %bb_main
+
+invcont.fragment:		; preds = %bb_main
+	invoke void @_ZN9Fibonacci10get_numberEj( %struct.BigInt* null sret , %struct.Fibonacci* %this_this, i32 %n_i_n_i )
+			to label %invcont14 unwind label %meshBB37
+
+invcont.unwind10_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+unwind10:		; preds = %bb_main
+	br label %bb_main
+
+unwind10.fragment310:		; preds = %bb_main
+	br label %bb_main
+
+unwind10.fragment:		; preds = %bb_main
+	br label %bb_main
+
+unwind10.fragment.fragment:		; preds = %bb_main
+	invoke void @___ZNSt6vectorI6BigIntSaIS0_EED1Ev___ZN9FibonacciD1Ev___ZNSt6vectorImSaImEEC1ERKS0_( i32 57, %"struct.std::vector<BigInt,std::allocator<BigInt> >"* null, %struct.Fibonacci* null, %"struct.std::vector<ulong,std::allocator<ulong> >"* null, %struct.__false_type* null )
+			to label %meshBB329 unwind label %meshBB24
+
+unwind10.Unwind_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+invcont14:		; preds = %invcont.fragment, %bb_main
+	br label %bb_main
+
+invcont14.normaldest:		; No predecessors!
+	invoke %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"* @___ZN9__gnu_cxx17__normal_iteratorIP6BigIntSt6vectorIS1_SaIS1_EEEppEv___ZNSt6vectorImSaImEED1Ev___ZN6BigIntD1Ev___ZN9__gnu_cxx13new_allocatorI6BigIntE7destroyEPS1____ZSt8_DestroyIP6BigIntSaIS0_EEvT_S3_T0_( i32 14, %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"* null, %"struct.std::vector<ulong,std::allocator<ulong> >"* null, %struct.BigInt* null, %struct.__false_type* null, %struct.BigInt* null, %struct.__false_type* null noalias  )
+			to label %invcont15 unwind label %meshBB345		; <%"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"*>:0 [#uses=0]
+
+invcont14.unwind10_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+invcont15:		; preds = %invcont14.normaldest, %bb_main
+	br label %bb_main
+
+invcont15.normaldest:		; No predecessors!
+	br label %UnifiedReturnBlock
+
+unwind20:		; preds = %bb_main
+	br label %bb_main
+
+unwind20.fragment311:		; preds = %bb_main
+	br label %bb_main
+
+unwind20.fragment:		; preds = %bb_main
+	br label %bb_main
+
+unwind20.fragment.fragment:		; preds = %bb_main
+	br label %bb_main
+
+unwind20.Unwind_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+filter:		; preds = %bb_main
+	br label %UnifiedUnreachableBlock
+
+Unwind:		; preds = %bb_main
+	br label %bb_main
+
+Unwind.fragment:		; preds = %bb_main
+	br label %UnifiedUnreachableBlock
+
+entry1:		; preds = %bb_main
+	br label %bb_main
+
+entry1.fragment312:		; preds = %bb_main
+	br label %bb_main
+
+entry1.fragment:		; preds = %bb_main
+	br label %bb_main
+
+entry1.fragment.fragment:		; preds = %bb_main
+	%tmp52 = invoke i32 @___ZN9__gnu_cxxmiIPK6BigIntS3_St6vectorIS1_SaIS1_EEEENS_17__normal_iteratorIT_T1_E15difference_typeERKSA_RKNS7_IT0_S9_EE___ZNKSt6vectorI6BigIntSaIS0_EE4sizeEv___ZNK9Fibonacci16show_all_numbersEv___ZNKSt6vectorI6BigIntSaIS0_EE8capacityEv( %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"* null, %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"* null, %"struct.std::vector<BigInt,std::allocator<BigInt> >"* null, i32 16, %struct.Fibonacci* null )
+			to label %entry1.fragment.normaldest unwind label %meshBB320		; <i32> [#uses=0]
+
+entry.unwind_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+unwind11:		; preds = %bb_main
+	br label %bb_main
+
+unwind11.fragment:		; preds = %bb_main
+	br label %bb_main
+
+invcont16:		; preds = %bb_main
+	br label %bb_main
+
+invcont16.fragment:		; preds = %bb_main
+	br label %bb_main
+
+invcont.cond_next_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+cond_true:		; preds = %bb_main
+	invoke void @_ZN9Fibonacci10get_numberEj( %struct.BigInt* null sret , %struct.Fibonacci* %this_this, i32 %n_i_n_i )
+			to label %meshBB323 unwind label %cond_true.unwind_crit_edge
+
+cond_true.unwind_crit_edge:		; preds = %cond_true, %bb_main
+	br label %bb_main
+
+cond_true.unwind_crit_edge.unwinddest:		; No predecessors!
+	br label %bb_main
+
+invcont12:		; preds = %bb_main
+	br label %bb_main
+
+invcont12.fragment:		; preds = %bb_main
+	invoke %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"* @___ZN9__gnu_cxx17__normal_iteratorIP6BigIntSt6vectorIS1_SaIS1_EEEppEv___ZNSt6vectorImSaImEED1Ev___ZN6BigIntD1Ev___ZN9__gnu_cxx13new_allocatorI6BigIntE7destroyEPS1____ZSt8_DestroyIP6BigIntSaIS0_EEvT_S3_T0_( i32 14, %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"* null, %"struct.std::vector<ulong,std::allocator<ulong> >"* null, %struct.BigInt* null, %struct.__false_type* null, %struct.BigInt* null, %struct.__false_type* null noalias  )
+			to label %meshBB30 unwind label %meshBB337		; <%"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"*>:1 [#uses=0]
+
+invcont12.unwind_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+invcont12.cond_next_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+cond_next:		; preds = %bb_main
+	br label %bb_main
+
+cond_next.fragment:		; preds = %bb_main
+	%tmp183 = invoke %struct.BigInt* @___ZNSt6vectorI6BigIntSaIS0_EEixEj___ZNSt6vectorI6BigIntSaIS0_EE3endEv( %"struct.std::vector<BigInt,std::allocator<BigInt> >"* null, i32 %n_i_n_i, i32 29 )
+			to label %invcont17 unwind label %meshBB336		; <%struct.BigInt*> [#uses=0]
+
+cond_next.unwind_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+invcont17:		; preds = %cond_next.fragment, %bb_main
+	br label %bb_main
+
+invcont17.normaldest917:		; No predecessors!
+	%tmp23 = invoke %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* null, i8* getelementptr ([6 x i8]* @.str13, i32 0, i32 0) )
+			to label %invcont17.normaldest unwind label %meshBB318		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+
+invcont17.unwind_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+invcont22:		; preds = %bb_main
+	br label %bb_main
+
+invcont22.fragment:		; preds = %bb_main
+	%tmp26 = invoke %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEm( %"struct.std::basic_ostream<char,std::char_traits<char> >"* undef, i32 %n_i_n_i )
+			to label %invcont25 unwind label %meshBB319		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
+
+invcont22.unwind_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+invcont25:		; preds = %invcont22.fragment, %bb_main
+	br label %bb_main
+
+invcont25.normaldest:		; No predecessors!
+	%tmp2918 = invoke %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp26, i8* getelementptr ([5 x i8]* @.str14, i32 0, i32 0) )
+			to label %invcont28 unwind label %invcont25.unwind_crit_edge		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+
+invcont25.unwind_crit_edge:		; preds = %invcont25.normaldest, %bb_main
+	br label %bb_main
+
+invcont25.unwind_crit_edge.unwinddest:		; No predecessors!
+	br label %bb_main
+
+invcont28:		; preds = %invcont25.normaldest, %bb_main
+	br label %bb_main
+
+invcont28.normaldest:		; No predecessors!
+	br label %bb_main
+
+invcont28.fragment:		; preds = %bb_main
+	%tmp311 = invoke %"struct.std::basic_ostream<char,std::char_traits<char> >"* @___ZlsRSoRK6BigInt___ZN9__gnu_cxx13new_allocatorI6BigIntE10deallocateEPS1_j( i32 32, %"struct.std::basic_ostream<char,std::char_traits<char> >"* undef, %struct.BigInt* undef, %struct.__false_type* null, i32 0 )
+			to label %invcont30 unwind label %meshBB322		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+
+invcont28.unwind_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+invcont30:		; preds = %invcont28.fragment, %bb_main
+	br label %bb_main
+
+invcont30.normaldest:		; No predecessors!
+	br label %bb_main
+
+invcont30.fragment:		; preds = %bb_main
+	%tmp34 = invoke %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* undef, i8* getelementptr ([2 x i8]* @.str15, i32 0, i32 0) )
+			to label %meshBB26 unwind label %invcont30.unwind_crit_edge		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+
+invcont30.unwind_crit_edge:		; preds = %invcont30.fragment, %bb_main
+	br label %bb_main
+
+invcont30.unwind_crit_edge.unwinddest:		; No predecessors!
+	br label %bb_main
+
+invcont33:		; preds = %bb_main
+	invoke void @_ZNKSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE3strEv( %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* null sret , %"struct.std::ostringstream"* null )
+			to label %invcont36 unwind label %invcont33.unwind_crit_edge
+
+invcont33.unwind_crit_edge:		; preds = %invcont33, %bb_main
+	br label %bb_main
+
+invcont33.unwind_crit_edge.unwinddest:		; No predecessors!
+	br label %bb_main
+
+invcont36:		; preds = %invcont33, %bb_main
+	br label %bb_main
+
+invcont36.normaldest:		; No predecessors!
+	%tmp42 = invoke %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsIcSt11char_traitsIcESaIcEERSt13basic_ostreamIT_T0_ES7_RKSbIS4_S5_T1_E( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* null )
+			to label %invcont41 unwind label %meshBB338		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+
+unwind37:		; preds = %bb_main
+	br label %bb_main
+
+unwind37.fragment:		; preds = %bb_main
+	invoke void @_ZNSsD1Ev( %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* null )
+			to label %meshBB330 unwind label %meshBB22
+
+unwind37.nofilter_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+invcont41:		; preds = %invcont36.normaldest, %bb_main
+	br label %bb_main
+
+invcont41.normaldest:		; No predecessors!
+	br label %bb_main
+
+invcont41.fragment:		; preds = %bb_main
+	invoke void @_ZNSsD1Ev( %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* null )
+			to label %meshBB23 unwind label %meshBB29
+
+unwind43:		; preds = %bb_main
+	br label %bb_main
+
+unwind43.fragment313:		; preds = %bb_main
+	br label %bb_main
+
+unwind43.fragment:		; preds = %bb_main
+	br label %bb_main
+
+unwind43.fragment.fragment:		; preds = %bb_main
+	br label %bb_main
+
+unwind43.nofilter_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+invcont47:		; preds = %bb_main
+	invoke void @_ZNSt19basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev( %"struct.std::ostringstream"* null )
+			to label %invcont70 unwind label %meshBB28
+
+filter19:		; preds = %bb_main
+	br label %UnifiedUnreachableBlock
+
+nofilter:		; preds = %bb_main
+	br label %bb_main
+
+nofilter.fragment:		; preds = %bb_main
+	invoke void @_ZNSt19basic_ostringstreamIcSt11char_traitsIcESaIcEED1Ev( %"struct.std::ostringstream"* null )
+			to label %nofilter.Unwind_crit_edge unwind label %meshBB
+
+nofilter.Unwind_crit_edge:		; preds = %nofilter.fragment, %bb_main
+	br label %bb_main
+
+nofilter.Unwind_crit_edge.normaldest:		; No predecessors!
+	br label %bb_main
+
+unwind53:		; preds = %bb_main
+	br label %bb_main
+
+unwind53.fragment314:		; preds = %bb_main
+	br label %bb_main
+
+unwind53.fragment:		; preds = %bb_main
+	br label %bb_main
+
+unwind53.fragment.fragment:		; preds = %bb_main
+	br label %bb_main
+
+unwind53.nofilter_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+filter62:		; preds = %bb_main
+	br label %UnifiedUnreachableBlock
+
+unwind66:		; preds = %bb_main
+	br label %bb_main
+
+unwind66.fragment315:		; preds = %bb_main
+	br label %bb_main
+
+unwind66.fragment:		; preds = %bb_main
+	br label %bb_main
+
+unwind66.fragment.fragment:		; preds = %bb_main
+	br label %bb_main
+
+unwind66.Unwind_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+invcont70:		; preds = %invcont47, %bb_main
+	br label %bb_main
+
+invcont70.normaldest:		; No predecessors!
+	br label %UnifiedReturnBlock
+
+filter75:		; preds = %bb_main
+	br label %UnifiedUnreachableBlock
+
+unwind78:		; preds = %bb_main
+	br label %bb_main
+
+unwind78.fragment316:		; preds = %bb_main
+	br label %bb_main
+
+unwind78.fragment:		; preds = %bb_main
+	br label %bb_main
+
+unwind78.fragment.fragment:		; preds = %bb_main
+	br label %bb_main
+
+unwind78.Unwind_crit_edge:		; preds = %bb_main
+	br label %bb_main
+
+filter87:		; preds = %bb_main
+	br label %UnifiedUnreachableBlock
+
+Unwind20:		; preds = %bb_main
+	br label %bb_main
+
+Unwind20.fragment:		; preds = %bb_main
+	br label %UnifiedUnreachableBlock
+
+meshBB:		; preds = %nofilter.fragment, %bb_main
+	br label %bb_main
+
+meshBB.unwinddest:		; No predecessors!
+	br label %bb_main
+
+meshBB.fragment:		; preds = %bb_main
+	br label %bb_main
+
+meshBB22:		; preds = %unwind37.fragment, %bb_main
+	br label %bb_main
+
+meshBB22.unwinddest:		; No predecessors!
+	br label %bb_main
+
+meshBB22.fragment:		; preds = %bb_main
+	br label %bb_main
+
+entry1.fragment.normaldest:		; preds = %entry1.fragment.fragment, %bb_main
+	br label %bb_main
+
+entry1.fragment.normaldest.normaldest:		; No predecessors!
+	br label %bb_main
+
+meshBB23:		; preds = %invcont41.fragment, %bb_main
+	br label %bb_main
+
+meshBB23.normaldest:		; No predecessors!
+	br label %bb_main
+
+meshBB23.fragment:		; preds = %bb_main
+	br label %bb_main
+
+meshBB24:		; preds = %unwind10.fragment.fragment, %bb_main
+	br label %bb_main
+
+meshBB24.unwinddest:		; No predecessors!
+	br label %bb_main
+
+meshBB24.fragment:		; preds = %bb_main
+	br label %bb_main
+
+meshBB25:		; preds = %bb_main
+	br label %bb_main
+
+meshBB25.fragment:		; preds = %bb_main
+	br label %bb_main
+
+meshBB26:		; preds = %invcont30.fragment, %bb_main
+	br label %bb_main
+
+meshBB26.normaldest:		; No predecessors!
+	br label %bb_main
+
+meshBB26.fragment:		; preds = %bb_main
+	br label %bb_main
+
+meshBB27:		; preds = %bb_main
+	br label %bb_main
+
+meshBB27.fragment:		; preds = %bb_main
+	br label %bb_main
+
+meshBB28:		; preds = %invcont47, %bb_main
+	br label %bb_main
+
+meshBB28.unwinddest:		; No predecessors!
+	br label %bb_main
+
+meshBB28.fragment:		; preds = %bb_main
+	br label %bb_main
+
+meshBB29:		; preds = %invcont41.fragment, %bb_main
+	br label %bb_main
+
+meshBB29.unwinddest:		; No predecessors!
+	br label %bb_main
+
+meshBB29.fragment:		; preds = %bb_main
+	br label %bb_main
+
+meshBB30:		; preds = %invcont12.fragment, %bb_main
+	br label %bb_main
+
+meshBB30.normaldest:		; No predecessors!
+	br label %bb_main
+
+meshBB30.fragment:		; preds = %bb_main
+	br label %bb_main
+
+meshBB31:		; preds = %bb_main
+	br label %bb_main
+
+meshBB31.fragment:		; preds = %bb_main
+	br label %bb_main
+
+meshBB32:		; preds = %bb_main
+	br label %bb_main
+
+meshBB32.fragment:		; preds = %bb_main
+	br label %bb_main
+
+meshBB33:		; preds = %bb_main
+	br label %bb_main
+
+meshBB33.fragment:		; preds = %bb_main
+	br label %bb_main
+
+meshBB34:		; preds = %bb_main
+	br label %bb_main
+
+meshBB34.fragment:		; preds = %bb_main
+	br label %bb_main
+
+meshBB35:		; preds = %bb_main
+	br label %bb_main
+
+meshBB35.fragment:		; preds = %bb_main
+	br label %bb_main
+
+meshBB36:		; preds = %bb_main
+	br label %bb_main
+
+meshBB36.fragment:		; preds = %bb_main
+	br label %bb_main
+
+meshBB37:		; preds = %invcont.fragment, %bb_main
+	br label %bb_main
+
+meshBB37.unwinddest:		; No predecessors!
+	br label %bb_main
+
+meshBB37.fragment:		; preds = %bb_main
+	br label %bb_main
+
+meshBB317:		; preds = %bb_main
+	br label %bb_main
+
+meshBB318:		; preds = %invcont17.normaldest917, %bb_main
+	br label %bb_main
+
+meshBB318.unwinddest:		; No predecessors!
+	br label %bb_main
+
+meshBB319:		; preds = %invcont22.fragment, %bb_main
+	br label %bb_main
+
+meshBB319.unwinddest:		; No predecessors!
+	br label %bb_main
+
+meshBB320:		; preds = %entry1.fragment.fragment, %bb_main
+	br label %bb_main
+
+meshBB320.unwinddest:		; No predecessors!
+	br label %bb_main
+
+meshBB321:		; preds = %bb_main
+	br label %bb_main
+
+meshBB322:		; preds = %invcont28.fragment, %bb_main
+	br label %bb_main
+
+meshBB322.unwinddest:		; No predecessors!
+	br label %bb_main
+
+meshBB323:		; preds = %cond_true, %bb_main
+	br label %bb_main
+
+meshBB323.normaldest:		; No predecessors!
+	br label %bb_main
+
+meshBB324:		; preds = %bb_main
+	br label %bb_main
+
+meshBB325:		; preds = %entry.fragment.fragment, %bb_main
+	br label %bb_main
+
+meshBB325.unwinddest:		; No predecessors!
+	br label %bb_main
+
+meshBB326:		; preds = %bb_main
+	br label %bb_main
+
+meshBB327:		; preds = %bb_main
+	br label %bb_main
+
+meshBB328:		; preds = %bb_main
+	br label %bb_main
+
+meshBB329:		; preds = %unwind10.fragment.fragment, %bb_main
+	br label %bb_main
+
+meshBB329.normaldest:		; No predecessors!
+	br label %bb_main
+
+meshBB330:		; preds = %unwind37.fragment, %bb_main
+	br label %bb_main
+
+meshBB330.normaldest:		; No predecessors!
+	br label %bb_main
+
+meshBB331:		; preds = %bb_main
+	br label %bb_main
+
+meshBB332:		; preds = %bb_main
+	br label %bb_main
+
+meshBB333:		; preds = %bb_main
+	br label %bb_main
+
+meshBB334:		; preds = %bb_main
+	br label %bb_main
+
+meshBB335:		; preds = %bb_main
+	br label %bb_main
+
+meshBB336:		; preds = %cond_next.fragment, %bb_main
+	br label %bb_main
+
+meshBB336.unwinddest:		; No predecessors!
+	br label %bb_main
+
+meshBB337:		; preds = %invcont12.fragment, %bb_main
+	br label %bb_main
+
+meshBB337.unwinddest:		; No predecessors!
+	br label %bb_main
+
+meshBB338:		; preds = %invcont36.normaldest, %bb_main
+	br label %bb_main
+
+meshBB338.unwinddest:		; No predecessors!
+	br label %bb_main
+
+invcont17.normaldest:		; preds = %invcont17.normaldest917, %bb_main
+	br label %bb_main
+
+invcont17.normaldest.normaldest:		; No predecessors!
+	store %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp23, %"struct.std::basic_ostream<char,std::char_traits<char> >"** undef
+	br label %bb_main
+
+meshBB339:		; preds = %bb_main
+	br label %bb_main
+
+meshBB340:		; preds = %entry.fragment.fragment, %bb_main
+	br label %bb_main
+
+meshBB340.normaldest:		; No predecessors!
+	br label %bb_main
+
+meshBB341:		; preds = %bb_main
+	br label %bb_main
+
+meshBB342:		; preds = %bb_main
+	br label %bb_main
+
+meshBB343:		; preds = %bb_main
+	br label %bb_main
+
+meshBB344:		; preds = %bb_main
+	br label %bb_main
+
+meshBB345:		; preds = %invcont14.normaldest, %bb_main
+	br label %bb_main
+
+meshBB345.unwinddest:		; No predecessors!
+	br label %bb_main
+
+meshBB346:		; preds = %bb_main
+	br label %bb_main
+
+meshBB347:		; preds = %bb_main
+	br label %bb_main
+
+meshBB348:		; preds = %bb_main
+	br label %bb_main
+
+meshBB349:		; preds = %bb_main
+	br label %bb_main
+
+UnifiedUnreachableBlock:		; preds = %Unwind20.fragment, %filter87, %filter75, %filter62, %filter19, %Unwind.fragment, %filter
+	unreachable
+
+UnifiedReturnBlock:		; preds = %invcont70.normaldest, %invcont15.normaldest
+	ret void
+}
diff --git a/final/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll b/final/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll
new file mode 100644
index 00000000000..f179da234cb
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/2010-07-15-IncorrectDomFrontierUpdate.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -domfrontier -loop-simplify -domfrontier -verify-dom-info -analyze 
+
+
+define void @a() nounwind {
+entry:
+  br i1 undef, label %bb37, label %bb1.i
+
+bb1.i:                                            ; preds = %bb1.i, %bb
+  %indvar = phi i64 [ %indvar.next, %bb1.i ], [ 0, %entry ] ; <i64> [#uses=1]
+  %indvar.next = add i64 %indvar, 1               ; <i64> [#uses=2]
+  %exitcond = icmp eq i64 %indvar.next, 576       ; <i1> [#uses=1]
+  br i1 %exitcond, label %bb37, label %bb1.i
+
+bb37:                                             ; preds = %bb1.i, %bb
+  br label %return
+
+
+return:                                           ; preds = %bb39
+  ret void
+}
diff --git a/final/test/Transforms/LoopSimplify/2010-12-26-PHIInfiniteLoop.ll b/final/test/Transforms/LoopSimplify/2010-12-26-PHIInfiniteLoop.ll
new file mode 100644
index 00000000000..00f520bf797
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/2010-12-26-PHIInfiniteLoop.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -loop-simplify -S
+; PR8702
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-freebsd9.0"
+
+declare void @foo(i32 %x)
+
+define fastcc void @inm_merge() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %while.cond36.i, %entry
+  br i1 undef, label %do.body, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  br i1 undef, label %while.cond36.i, label %if.end44
+
+if.end44:                                         ; preds = %for.body
+  %call49 = call fastcc i32 @inm_get_source()
+  br i1 undef, label %if.end54, label %for.cond64
+
+if.end54:                                         ; preds = %if.end44
+  br label %while.cond36.i
+
+while.cond36.i:                                   ; preds = %if.end54, %for.body
+  br label %for.cond
+
+for.cond64:                                       ; preds = %if.end88, %for.cond64, %if.end44
+  %error.161 = phi i32 [ %error.161, %for.cond64 ], [ %error.161, %if.end88 ], [ %call49, %if.end44 ]
+  call void @foo(i32 %error.161)
+  br i1 undef, label %for.cond64, label %if.end88
+
+if.end88:                                         ; preds = %for.cond64
+  br i1 undef, label %for.cond64, label %if.end98
+
+if.end98:                                         ; preds = %if.end88
+  unreachable
+
+do.body:                                          ; preds = %for.cond
+  unreachable
+}
+
+declare fastcc i32 @inm_get_source() nounwind
diff --git a/final/test/Transforms/LoopSimplify/basictest.ll b/final/test/Transforms/LoopSimplify/basictest.ll
new file mode 100644
index 00000000000..6b31848a94b
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/basictest.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -loop-simplify
+
+; This function should get a preheader inserted before BB3, that is jumped
+; to by BB1 & BB2
+;
+
+define void @test() {
+	br i1 true, label %BB1, label %BB2
+BB1:		; preds = %0
+	br label %BB3
+BB2:		; preds = %0
+	br label %BB3
+BB3:		; preds = %BB3, %BB2, %BB1
+	br label %BB3
+}
+
diff --git a/final/test/Transforms/LoopSimplify/dg.exp b/final/test/Transforms/LoopSimplify/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/LoopSimplify/hardertest.ll b/final/test/Transforms/LoopSimplify/hardertest.ll
new file mode 100644
index 00000000000..1ccb396490c
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/hardertest.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -loop-simplify
+
+define void @foo(i1 %C) {
+	br i1 %C, label %T, label %F
+T:		; preds = %0
+	br label %Loop
+F:		; preds = %0
+	br label %Loop
+Loop:		; preds = %L2, %Loop, %F, %T
+	%Val = phi i32 [ 0, %T ], [ 1, %F ], [ 2, %Loop ], [ 3, %L2 ]		; <i32> [#uses=0]
+	br i1 %C, label %Loop, label %L2
+L2:		; preds = %Loop
+	br label %Loop
+}
+
diff --git a/final/test/Transforms/LoopSimplify/indirectbr-backedge.ll b/final/test/Transforms/LoopSimplify/indirectbr-backedge.ll
new file mode 100644
index 00000000000..7eabc09cd7d
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/indirectbr-backedge.ll
@@ -0,0 +1,35 @@
+; RUN: opt -loop-simplify -S < %s | FileCheck %s
+
+; LoopSimplify shouldn't split loop backedges that use indirectbr.
+
+; CHECK: bb1:                                              ; preds = %bb5, %bb
+; CHECK-NEXT: indirectbr
+
+; CHECK: bb5:                                              ; preds = %bb1
+; CHECK-NEXT: br label %bb1{{$}}
+
+define void @foo(i8* %p) nounwind {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb5, %bb1, %bb
+  indirectbr i8* %p, [label %bb6, label %bb7, label %bb1, label %bb2, label %bb3, label %bb5, label %bb4]
+
+bb2:                                              ; preds = %bb1
+  ret void
+
+bb3:                                              ; preds = %bb1
+  ret void
+
+bb4:                                              ; preds = %bb1
+  ret void
+
+bb5:                                              ; preds = %bb1
+  br label %bb1
+
+bb6:                                              ; preds = %bb1
+  ret void
+
+bb7:                                              ; preds = %bb1
+  ret void
+}
diff --git a/final/test/Transforms/LoopSimplify/indirectbr.ll b/final/test/Transforms/LoopSimplify/indirectbr.ll
new file mode 100644
index 00000000000..9814d4ad93f
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/indirectbr.ll
@@ -0,0 +1,100 @@
+; RUN: opt < %s -loop-simplify -lcssa -verify-loop-info -verify-dom-info -S \
+; RUN:   | grep -F {indirectbr i8* %x, \[label %L0, label %L1\]} \
+; RUN:   | count 6
+
+; LoopSimplify should not try to transform loops when indirectbr is involved.
+
+define void @entry(i8* %x) {
+entry:
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+
+L0:
+  br label %L0
+
+L1:
+  ret void
+}
+
+define void @backedge(i8* %x) {
+entry:
+  br label %L0
+
+L0:
+  br label %L1
+
+L1:
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+}
+
+define i64 @exit(i8* %x) {
+entry:
+  br label %L2
+
+L2:
+  %z = bitcast i64 0 to i64
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+
+L0:
+  br label %L2
+
+L1:
+  ret i64 %z
+}
+
+define i64 @criticalexit(i8* %x, i1 %a) {
+entry:
+  br i1 %a, label %L1, label %L2
+
+L2:
+  %z = bitcast i64 0 to i64
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+
+L0:
+  br label %L2
+
+L1:
+  %y = phi i64 [ %z, %L2 ], [ 1, %entry ]
+  ret i64 %y
+}
+
+define i64 @exit_backedge(i8* %x) {
+entry:
+  br label %L0
+
+L0:
+  %z = bitcast i64 0 to i64
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+
+L1:
+  ret i64 %z
+}
+
+define i64 @criticalexit_backedge(i8* %x, i1 %a) {
+entry:
+  br i1 %a, label %L0, label %L1
+
+L0:
+  %z = bitcast i64 0 to i64
+  indirectbr i8* %x, [ label %L0, label %L1 ]
+
+L1:
+  %y = phi i64 [ %z, %L0 ], [ 1, %entry ]
+  ret i64 %y
+}
+
+define void @pr5502() nounwind {
+entry:
+  br label %while.cond
+
+while.cond:
+  br i1 undef, label %while.body, label %while.end
+
+while.body:
+  indirectbr i8* undef, [label %end_opcode, label %end_opcode]
+
+end_opcode:
+  br i1 false, label %end_opcode, label %while.cond
+
+while.end:
+  ret void
+}
diff --git a/final/test/Transforms/LoopSimplify/merge-exits.ll b/final/test/Transforms/LoopSimplify/merge-exits.ll
new file mode 100644
index 00000000000..93a224744ca
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/merge-exits.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -loop-simplify -loop-rotate -instcombine -indvars -S -verify-loop-info -verify-dom-info > %t
+; RUN: not grep sext %t
+; RUN: grep {phi i64} %t | count 1
+
+; Loopsimplify should be able to merge the two loop exits
+; into one, so that loop rotate can rotate the loop, so
+; that indvars can promote the induction variable to i64
+; without needing casts.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define float @t(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
+entry:
+	%t0 = load float* %peakWeight, align 4		; <float> [#uses=1]
+	br label %bb1
+
+bb:		; preds = %bb2
+	%t1 = sext i32 %hiPart.0 to i64		; <i64> [#uses=1]
+	%t2 = getelementptr float* %pTmp1, i64 %t1		; <float*> [#uses=1]
+	%t3 = load float* %t2, align 4		; <float> [#uses=1]
+	%t4 = fadd float %t3, %distERBhi.0		; <float> [#uses=1]
+	%t5 = add i32 %hiPart.0, 1		; <i32> [#uses=2]
+	%t6 = sext i32 %t5 to i64		; <i64> [#uses=1]
+	%t7 = getelementptr float* %peakWeight, i64 %t6		; <float*> [#uses=1]
+	%t8 = load float* %t7, align 4		; <float> [#uses=1]
+	%t9 = fadd float %t8, %peakCount.0		; <float> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ]		; <float> [#uses=2]
+	%hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ]		; <i32> [#uses=3]
+	%distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ]		; <float> [#uses=3]
+	%t10 = fcmp uge float %distERBhi.0, 2.500000e+00		; <i1> [#uses=1]
+	br i1 %t10, label %bb3, label %bb2
+
+bb2:		; preds = %bb1
+	%t11 = add i32 %bandEdgeIndex, -1		; <i32> [#uses=1]
+	%t12 = icmp sgt i32 %t11, %hiPart.0		; <i1> [#uses=1]
+	br i1 %t12, label %bb, label %bb3
+
+bb3:		; preds = %bb2, %bb1
+	%t13 = fdiv float %peakCount.0, %distERBhi.0		; <float> [#uses=1]
+	ret float %t13
+}
diff --git a/final/test/Transforms/LoopSimplify/phi-node-simplify.ll b/final/test/Transforms/LoopSimplify/phi-node-simplify.ll
new file mode 100644
index 00000000000..8eb63d9111a
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/phi-node-simplify.ll
@@ -0,0 +1,55 @@
+; Loop Simplify should turn phi nodes like X = phi [X, Y]  into just Y, eliminating them.
+; RUN: opt < %s -loop-simplify -S | grep phi | count 6
+
+@A = weak global [3000000 x i32] zeroinitializer		; <[3000000 x i32]*> [#uses=1]
+@B = weak global [20000 x i32] zeroinitializer		; <[20000 x i32]*> [#uses=1]
+@C = weak global [100 x i32] zeroinitializer		; <[100 x i32]*> [#uses=1]
+@Z = weak global i32 0		; <i32*> [#uses=2]
+
+define i32 @main() {
+entry:
+	tail call void @__main( )
+	br label %loopentry.1
+loopentry.1:		; preds = %loopexit.1, %entry
+	%indvar20 = phi i32 [ 0, %entry ], [ %indvar.next21, %loopexit.1 ]		; <i32> [#uses=1]
+	%a.1 = phi i32* [ getelementptr ([3000000 x i32]* @A, i32 0, i32 0), %entry ], [ %inc.0, %loopexit.1 ]		; <i32*> [#uses=1]
+	br label %no_exit.2
+no_exit.2:		; preds = %loopexit.2, %no_exit.2, %loopentry.1
+	%a.0.4.ph = phi i32* [ %a.1, %loopentry.1 ], [ %inc.0, %loopexit.2 ], [ %a.0.4.ph, %no_exit.2 ]		; <i32*> [#uses=3]
+	%b.1.4.ph = phi i32* [ getelementptr ([20000 x i32]* @B, i32 0, i32 0), %loopentry.1 ], [ %inc.1, %loopexit.2 ], [ %b.1.4.ph, %no_exit.2 ]		; <i32*> [#uses=3]
+	%indvar17 = phi i32 [ 0, %loopentry.1 ], [ %indvar.next18, %loopexit.2 ], [ %indvar17, %no_exit.2 ]		; <i32> [#uses=2]
+	%indvar = phi i32 [ %indvar.next, %no_exit.2 ], [ 0, %loopexit.2 ], [ 0, %loopentry.1 ]		; <i32> [#uses=5]
+	%b.1.4.rec = bitcast i32 %indvar to i32		; <i32> [#uses=1]
+	%gep.upgrd.1 = zext i32 %indvar to i64		; <i64> [#uses=1]
+	%c.2.4 = getelementptr [100 x i32]* @C, i32 0, i64 %gep.upgrd.1		; <i32*> [#uses=1]
+	%gep.upgrd.2 = zext i32 %indvar to i64		; <i64> [#uses=1]
+	%a.0.4 = getelementptr i32* %a.0.4.ph, i64 %gep.upgrd.2		; <i32*> [#uses=1]
+	%gep.upgrd.3 = zext i32 %indvar to i64		; <i64> [#uses=1]
+	%b.1.4 = getelementptr i32* %b.1.4.ph, i64 %gep.upgrd.3		; <i32*> [#uses=1]
+	%inc.0.rec = add i32 %b.1.4.rec, 1		; <i32> [#uses=2]
+	%inc.0 = getelementptr i32* %a.0.4.ph, i32 %inc.0.rec		; <i32*> [#uses=2]
+	%tmp.13 = load i32* %a.0.4		; <i32> [#uses=1]
+	%inc.1 = getelementptr i32* %b.1.4.ph, i32 %inc.0.rec		; <i32*> [#uses=1]
+	%tmp.15 = load i32* %b.1.4		; <i32> [#uses=1]
+	%tmp.18 = load i32* %c.2.4		; <i32> [#uses=1]
+	%tmp.16 = mul i32 %tmp.15, %tmp.13		; <i32> [#uses=1]
+	%tmp.19 = mul i32 %tmp.16, %tmp.18		; <i32> [#uses=1]
+	%tmp.20 = load i32* @Z		; <i32> [#uses=1]
+	%tmp.21 = add i32 %tmp.19, %tmp.20		; <i32> [#uses=1]
+	store i32 %tmp.21, i32* @Z
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 100		; <i1> [#uses=1]
+	br i1 %exitcond, label %loopexit.2, label %no_exit.2
+loopexit.2:		; preds = %no_exit.2
+	%indvar.next18 = add i32 %indvar17, 1		; <i32> [#uses=2]
+	%exitcond19 = icmp eq i32 %indvar.next18, 200		; <i1> [#uses=1]
+	br i1 %exitcond19, label %loopexit.1, label %no_exit.2
+loopexit.1:		; preds = %loopexit.2
+	%indvar.next21 = add i32 %indvar20, 1		; <i32> [#uses=2]
+	%exitcond22 = icmp eq i32 %indvar.next21, 300		; <i1> [#uses=1]
+	br i1 %exitcond22, label %return, label %loopentry.1
+return:		; preds = %loopexit.1
+	ret i32 undef
+}
+
+declare void @__main()
diff --git a/final/test/Transforms/LoopSimplify/preserve-scev.ll b/final/test/Transforms/LoopSimplify/preserve-scev.ll
new file mode 100644
index 00000000000..017a0d21084
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/preserve-scev.ll
@@ -0,0 +1,50 @@
+; RUN: opt -S < %s -indvars | opt -analyze -iv-users | grep {%cmp = icmp slt i32} | grep {= \{%\\.ph,+,1\}<%for.cond>}
+; PR8079
+
+; LoopSimplify should invalidate indvars when splitting out the
+; inner loop.
+
+@maxStat = external global i32
+
+define i32 @test() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %if.then5, %if.end, %entry
+  %cuts.1 = phi i32 [ 0, %entry ], [ %inc, %if.then5 ], [ %cuts.1, %if.end ]
+  %0 = phi i32 [ 0, %entry ], [ %add, %if.end ], [ %add, %if.then5 ]
+  %add = add i32 %0, 1
+  %cmp = icmp slt i32 %0, 1
+  %tmp1 = load i32* @maxStat, align 4
+  br i1 %cmp, label %for.body, label %for.cond14.preheader
+
+for.cond14.preheader:                             ; preds = %for.cond
+  %cmp1726 = icmp sgt i32 %tmp1, 0
+  br i1 %cmp1726, label %for.body18, label %return
+
+for.body:                                         ; preds = %for.cond
+  %cmp2 = icmp sgt i32 %tmp1, 100
+  br i1 %cmp2, label %return, label %if.end
+
+if.end:                                           ; preds = %for.body
+  %cmp4 = icmp sgt i32 %tmp1, -1
+  br i1 %cmp4, label %if.then5, label %for.cond
+
+if.then5:                                         ; preds = %if.end
+  call void @foo() nounwind
+  %inc = add i32 %cuts.1, 1
+  br label %for.cond
+
+for.body18:                                       ; preds = %for.body18, %for.cond14.preheader
+  %i13.027 = phi i32 [ %1, %for.body18 ], [ 0, %for.cond14.preheader ]
+  call void @foo() nounwind
+  %1 = add nsw i32 %i13.027, 1
+  %tmp16 = load i32* @maxStat, align 4
+  %cmp17 = icmp slt i32 %1, %tmp16
+  br i1 %cmp17, label %for.body18, label %return
+
+return:                                           ; preds = %for.body18, %for.body, %for.cond14.preheader
+  ret i32 0
+}
+
+declare void @foo() nounwind
diff --git a/final/test/Transforms/LoopSimplify/single-backedge.ll b/final/test/Transforms/LoopSimplify/single-backedge.ll
new file mode 100644
index 00000000000..f9567f12429
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/single-backedge.ll
@@ -0,0 +1,20 @@
+; The loop canonicalization pass should guarantee that there is one backedge 
+; for all loops.  This allows the -indvars pass to recognize the %IV 
+; induction variable in this testcase.
+
+; RUN: opt < %s -indvars -S | grep indvar
+
+define i32 @test(i1 %C) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %BE2, %BE1, %0
+	%IV = phi i32 [ 1, %0 ], [ %IV2, %BE1 ], [ %IV2, %BE2 ]		; <i32> [#uses=2]
+	store i32 %IV, i32* null
+	%IV2 = add i32 %IV, 2		; <i32> [#uses=2]
+	br i1 %C, label %BE1, label %BE2
+BE1:		; preds = %Loop
+	br label %Loop
+BE2:		; preds = %Loop
+	br label %Loop
+}
+
diff --git a/final/test/Transforms/LoopSimplify/unreachable-loop-pred.ll b/final/test/Transforms/LoopSimplify/unreachable-loop-pred.ll
new file mode 100644
index 00000000000..76b7bb21e46
--- /dev/null
+++ b/final/test/Transforms/LoopSimplify/unreachable-loop-pred.ll
@@ -0,0 +1,20 @@
+; RUN: opt -S -loop-simplify -disable-output -verify-loop-info -verify-dom-info < %s
+; PR5235
+
+; When loopsimplify inserts a preheader for this loop, it should add the new
+; block to the enclosing loop and not get confused by the unreachable
+; bogus loop entry.
+
+define void @is_extract_cab() nounwind {
+entry:
+  br label %header
+
+header:                                       ; preds = %if.end206, %cond.end66, %if.end23
+  br label %while.body115
+
+while.body115:                                    ; preds = %9, %if.end192, %if.end101
+  br i1 undef, label %header, label %while.body115
+
+foo:
+  br label %while.body115
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/2005-08-15-AddRecIV.ll b/final/test/Transforms/LoopStrengthReduce/2005-08-15-AddRecIV.ll
new file mode 100644
index 00000000000..1f08a4367bd
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/2005-08-15-AddRecIV.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -loop-reduce -disable-output
+
+define void @try_swap() {
+entry:
+	br i1 false, label %cond_continue.0.i, label %cond_false.0.i
+cond_false.0.i:		; preds = %entry
+	ret void
+cond_continue.0.i:		; preds = %entry
+	br i1 false, label %cond_continue.1.i, label %cond_false.1.i
+cond_false.1.i:		; preds = %cond_continue.0.i
+	ret void
+cond_continue.1.i:		; preds = %cond_continue.0.i
+	br i1 false, label %endif.3.i, label %else.0.i
+endif.3.i:		; preds = %cond_continue.1.i
+	br i1 false, label %my_irand.exit82, label %endif.0.i62
+else.0.i:		; preds = %cond_continue.1.i
+	ret void
+endif.0.i62:		; preds = %endif.3.i
+	ret void
+my_irand.exit82:		; preds = %endif.3.i
+	br i1 false, label %else.2, label %then.4
+then.4:		; preds = %my_irand.exit82
+	ret void
+else.2:		; preds = %my_irand.exit82
+	br i1 false, label %find_affected_nets.exit, label %loopentry.1.i107.outer.preheader
+loopentry.1.i107.outer.preheader:		; preds = %else.2
+	ret void
+find_affected_nets.exit:		; preds = %else.2
+	br i1 false, label %save_region_occ.exit, label %loopentry.1
+save_region_occ.exit:		; preds = %find_affected_nets.exit
+	br i1 false, label %no_exit.1.preheader, label %loopexit.1
+loopentry.1:		; preds = %find_affected_nets.exit
+	ret void
+no_exit.1.preheader:		; preds = %save_region_occ.exit
+	ret void
+loopexit.1:		; preds = %save_region_occ.exit
+	br i1 false, label %then.10, label %loopentry.3
+then.10:		; preds = %loopexit.1
+	ret void
+loopentry.3:		; preds = %endif.16, %loopexit.1
+	%indvar342 = phi i32 [ %indvar.next343, %endif.16 ], [ 0, %loopexit.1 ]		; <i32> [#uses=2]
+	br i1 false, label %loopexit.3, label %endif.16
+endif.16:		; preds = %loopentry.3
+	%indvar.next343 = add i32 %indvar342, 1		; <i32> [#uses=1]
+	br label %loopentry.3
+loopexit.3:		; preds = %loopentry.3
+	br label %loopentry.4
+loopentry.4:		; preds = %loopentry.4, %loopexit.3
+	%indvar340 = phi i32 [ 0, %loopexit.3 ], [ %indvar.next341, %loopentry.4 ]		; <i32> [#uses=2]
+	%tmp. = add i32 %indvar340, %indvar342		; <i32> [#uses=1]
+	%tmp.526 = load i32** null		; <i32*> [#uses=1]
+	%gep.upgrd.1 = zext i32 %tmp. to i64		; <i64> [#uses=1]
+	%tmp.528 = getelementptr i32* %tmp.526, i64 %gep.upgrd.1		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp.528
+	%indvar.next341 = add i32 %indvar340, 1		; <i32> [#uses=1]
+	br label %loopentry.4
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/2005-08-17-OutOfLoopVariant.ll b/final/test/Transforms/LoopStrengthReduce/2005-08-17-OutOfLoopVariant.ll
new file mode 100644
index 00000000000..f1c523ae6c6
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/2005-08-17-OutOfLoopVariant.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -loop-reduce -disable-output
+
+define i32 @image_to_texture(i32 %indvar454) {
+loopentry.1.outer:
+	%j.2.1.ph = bitcast i32 %indvar454 to i32		; <i32> [#uses=1]
+	br label %loopentry.1
+loopentry.1:		; preds = %loopentry.1, %loopentry.1.outer
+	%i.3 = phi i32 [ 0, %loopentry.1.outer ], [ %i.3.be, %loopentry.1 ]		; <i32> [#uses=2]
+	%tmp.390 = load i32* null		; <i32> [#uses=1]
+	%tmp.392 = mul i32 %tmp.390, %j.2.1.ph		; <i32> [#uses=1]
+	%tmp.394 = add i32 %tmp.392, %i.3		; <i32> [#uses=1]
+	%i.3.be = add i32 %i.3, 1		; <i32> [#uses=1]
+	br i1 false, label %loopentry.1, label %label.6
+label.6:		; preds = %loopentry.1
+	ret i32 %tmp.394
+}
+
diff --git a/final/test/Transforms/LoopStrengthReduce/2005-09-12-UsesOutOutsideOfLoop.ll b/final/test/Transforms/LoopStrengthReduce/2005-09-12-UsesOutOutsideOfLoop.ll
new file mode 100644
index 00000000000..f56a55379c8
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/2005-09-12-UsesOutOutsideOfLoop.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -loop-reduce -disable-output
+
+define void @main() {
+entry:
+	br label %loopentry.0
+loopentry.0:		; preds = %then.5, %entry
+	%arg_index.1.ph = phi i32 [ 1, %entry ], [ %arg_index.1.ph.be, %then.5 ]		; <i32> [#uses=1]
+	br i1 false, label %no_exit.0, label %loopexit.0
+no_exit.0:		; preds = %loopentry.0
+	%arg_index.1.1 = add i32 0, %arg_index.1.ph		; <i32> [#uses=2]
+	br i1 false, label %then.i55, label %endif.i61
+then.i55:		; preds = %no_exit.0
+	br i1 false, label %then.4, label %else.1
+endif.i61:		; preds = %no_exit.0
+	ret void
+then.4:		; preds = %then.i55
+	%tmp.19993 = add i32 %arg_index.1.1, 2		; <i32> [#uses=0]
+	ret void
+else.1:		; preds = %then.i55
+	br i1 false, label %then.i86, label %loopexit.i97
+then.i86:		; preds = %else.1
+	ret void
+loopexit.i97:		; preds = %else.1
+	br i1 false, label %then.5, label %else.2
+then.5:		; preds = %loopexit.i97
+	%arg_index.1.ph.be = add i32 %arg_index.1.1, 2		; <i32> [#uses=1]
+	br label %loopentry.0
+else.2:		; preds = %loopexit.i97
+	ret void
+loopexit.0:		; preds = %loopentry.0
+	ret void
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/2007-04-23-UseIterator.ll b/final/test/Transforms/LoopStrengthReduce/2007-04-23-UseIterator.ll
new file mode 100644
index 00000000000..8c2cfaf3210
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/2007-04-23-UseIterator.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -loop-reduce -disable-output
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+
+target triple = "i686-apple-darwin9"
+
+define i8* @foo( i8* %ABC) {
+entry:
+	switch i8 0, label %bb129 [
+		 i8 0, label %UnifiedReturnBlock
+		 i8 9, label %UnifiedReturnBlock
+		 i8 32, label %UnifiedReturnBlock
+		 i8 35, label %UnifiedReturnBlock
+		 i8 37, label %bb16.preheader
+	]
+
+bb16.preheader:		; preds = %entry
+	br label %bb16
+
+bb16:		; preds = %cond_next102, %bb16.preheader
+	%indvar = phi i32 [ %indvar.next, %cond_next102 ], [ 0, %bb16.preheader ]		; <i32> [#uses=2]
+	%ABC.2146.0.rec = mul i32 %indvar, 3		; <i32> [#uses=1]
+	br i1 false, label %UnifiedReturnBlock.loopexit, label %cond_next102
+
+cond_next102:		; preds = %bb16
+	%tmp138145.rec = add i32 %ABC.2146.0.rec, 3		; <i32> [#uses=1]
+	%tmp138145 = getelementptr i8* %ABC, i32 %tmp138145.rec		; <i8*> [#uses=4]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	switch i8 0, label %bb129.loopexit [
+		 i8 0, label %UnifiedReturnBlock.loopexit
+		 i8 9, label %UnifiedReturnBlock.loopexit
+		 i8 32, label %UnifiedReturnBlock.loopexit
+		 i8 35, label %UnifiedReturnBlock.loopexit
+		 i8 37, label %bb16
+	]
+
+bb129.loopexit:		; preds = %cond_next102
+	br label %bb129
+
+bb129:		; preds = %bb129.loopexit, %entry
+	ret i8* null
+
+UnifiedReturnBlock.loopexit:		; preds = %cond_next102, %cond_next102, %cond_next102, %cond_next102, %bb16
+	%UnifiedRetVal.ph = phi i8* [ %tmp138145, %cond_next102 ], [ %tmp138145, %cond_next102 ], [ %tmp138145, %cond_next102 ], [ %tmp138145, %cond_next102 ], [ null, %bb16 ]		; <i8*> [#uses=0]
+	br label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %UnifiedReturnBlock.loopexit, %entry, %entry, %entry, %entry
+	ret i8* null
+}
+
+define i8* @bar() {
+entry:
+	switch i8 0, label %bb158 [
+		 i8 37, label %bb74
+		 i8 58, label %cond_true
+		 i8 64, label %bb11
+	]
+
+bb11:		; preds = %entry
+	ret i8* null
+
+cond_true:		; preds = %entry
+	ret i8* null
+
+bb74:		; preds = %entry
+	ret i8* null
+
+bb158:		; preds = %entry
+	ret i8* null
+}
+
diff --git a/final/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll b/final/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
new file mode 100644
index 00000000000..90477d10697
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/2008-08-13-CmpStride.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -loop-reduce -S | grep add | count 2
+; PR 2662
+@g_3 = common global i16 0		; <i16*> [#uses=2]
+@"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+
+define void @func_1() nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%l_2.0.reg2mem.0 = phi i16 [ 0, %entry ], [ %t1, %bb ]		; <i16> [#uses=2]
+	%t0 = shl i16 %l_2.0.reg2mem.0, 1		; <i16>:0 [#uses=1]
+	volatile store i16 %t0, i16* @g_3, align 2
+	%t1 = add i16 %l_2.0.reg2mem.0, -3		; <i16>:1 [#uses=2]
+	%t2 = icmp slt i16 %t1, 1		; <i1>:2 [#uses=1]
+	br i1 %t2, label %bb, label %return
+
+return:		; preds = %bb
+	ret void
+}
+
+define i32 @main() nounwind {
+entry:
+	tail call void @func_1( ) nounwind
+	volatile load i16* @g_3, align 2		; <i16>:0 [#uses=1]
+	zext i16 %0 to i32		; <i32>:1 [#uses=1]
+	tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i32 %1 ) nounwind		; <i32>:2 [#uses=0]
+	ret i32 0
+}
+
+declare i32 @printf(i8*, ...) nounwind
diff --git a/final/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll b/final/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll
new file mode 100644
index 00000000000..c650d8cf76d
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/2008-08-14-ShadowIV.ll
@@ -0,0 +1,99 @@
+; RUN: opt < %s -loop-reduce -S | grep "phi double" | count 1
+
+define void @foobar(i32 %n) nounwind {
+entry:
+	icmp eq i32 %n, 0		; <i1>:0 [#uses=2]
+	br i1 %0, label %return, label %bb.nph
+
+bb.nph:		; preds = %entry
+	%umax = select i1 %0, i32 1, i32 %n		; <i32> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.nph
+	%i.03 = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	tail call void @bar( i32 %i.03 ) nounwind
+	uitofp i32 %i.03 to double		; <double>:1 [#uses=1]
+	tail call void @foo( double %1 ) nounwind
+	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}
+
+; Unable to eliminate cast because the mantissa bits for double are not enough
+; to hold all of i64 IV bits.
+define void @foobar2(i64 %n) nounwind {
+entry:
+	icmp eq i64 %n, 0		; <i1>:0 [#uses=2]
+	br i1 %0, label %return, label %bb.nph
+
+bb.nph:		; preds = %entry
+	%umax = select i1 %0, i64 1, i64 %n		; <i64> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb, %bb.nph
+	%i.03 = phi i64 [ 0, %bb.nph ], [ %indvar.next, %bb ]		; <i64> [#uses=3]
+	trunc i64 %i.03 to i32		; <i32>:1 [#uses=1]
+	tail call void @bar( i32 %1 ) nounwind
+	uitofp i64 %i.03 to double		; <double>:2 [#uses=1]
+	tail call void @foo( double %2 ) nounwind
+	%indvar.next = add i64 %i.03, 1		; <i64> [#uses=2]
+	%exitcond = icmp eq i64 %indvar.next, %umax		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}
+
+; Unable to eliminate cast due to potentional overflow.
+define void @foobar3() nounwind {
+entry:
+	tail call i32 (...)* @nn( ) nounwind		; <i32>:0 [#uses=1]
+	icmp eq i32 %0, 0		; <i1>:1 [#uses=1]
+	br i1 %1, label %return, label %bb
+
+bb:		; preds = %bb, %entry
+	%i.03 = phi i32 [ 0, %entry ], [ %3, %bb ]		; <i32> [#uses=3]
+	tail call void @bar( i32 %i.03 ) nounwind
+	uitofp i32 %i.03 to double		; <double>:2 [#uses=1]
+	tail call void @foo( double %2 ) nounwind
+	add i32 %i.03, 1		; <i32>:3 [#uses=2]
+	tail call i32 (...)* @nn( ) nounwind		; <i32>:4 [#uses=1]
+	icmp ugt i32 %4, %3		; <i1>:5 [#uses=1]
+	br i1 %5, label %bb, label %return
+
+return:		; preds = %bb, %entry
+	ret void
+}
+
+; Unable to eliminate cast due to overflow.
+define void @foobar4() nounwind {
+entry:
+	br label %bb.nph
+
+bb.nph:		; preds = %entry
+	br label %bb
+
+bb:		; preds = %bb, %bb.nph
+	%i.03 = phi i8 [ 0, %bb.nph ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%tmp2 = sext i8 %i.03 to i32		; <i32>:0 [#uses=1]
+	tail call void @bar( i32 %tmp2 ) nounwind
+	%tmp3 = uitofp i8 %i.03 to double		; <double>:1 [#uses=1]
+	tail call void @foo( double %tmp3 ) nounwind
+	%indvar.next = add i8 %i.03, 1		; <i32> [#uses=2]
+        %tmp = sext i8 %indvar.next to i32
+	%exitcond = icmp eq i32 %tmp, 32767		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb, %entry
+	ret void
+}
+
+declare void @bar(i32)
+
+declare void @foo(double)
+
+declare i32 @nn(...)
+
diff --git a/final/test/Transforms/LoopStrengthReduce/2008-09-09-Overflow.ll b/final/test/Transforms/LoopStrengthReduce/2008-09-09-Overflow.ll
new file mode 100644
index 00000000000..1ee6b5cdf18
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/2008-09-09-Overflow.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -loop-reduce -S | grep phi | count 2
+; PR 2779
+@g_19 = common global i32 0		; <i32*> [#uses=3]
+@"\01LC" = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+
+define i32 @func_8(i8 zeroext %p_9) nounwind {
+entry:
+	ret i32 1
+}
+
+define i32 @func_3(i8 signext %p_5) nounwind {
+entry:
+	ret i32 1
+}
+
+define void @func_1() nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%indvar = phi i16 [ 0, %entry ], [ %indvar.next, %bb ]		; <i16> [#uses=2]
+	%tmp = sub i16 0, %indvar		; <i16> [#uses=1]
+	%tmp27 = trunc i16 %tmp to i8		; <i8> [#uses=1]
+	load i32* @g_19, align 4		; <i32>:0 [#uses=2]
+	add i32 %0, 1		; <i32>:1 [#uses=1]
+	store i32 %1, i32* @g_19, align 4
+	trunc i32 %0 to i8		; <i8>:2 [#uses=1]
+	tail call i32 @func_8( i8 zeroext %2 ) nounwind		; <i32>:3 [#uses=0]
+	shl i8 %tmp27, 2		; <i8>:4 [#uses=1]
+	add i8 %4, -112		; <i8>:5 [#uses=1]
+	tail call i32 @func_3( i8 signext %5 ) nounwind		; <i32>:6 [#uses=0]
+	%indvar.next = add i16 %indvar, 1		; <i16> [#uses=2]
+	%exitcond = icmp eq i16 %indvar.next, -28		; <i1> [#uses=1]
+	br i1 %exitcond, label %return, label %bb
+
+return:		; preds = %bb
+	ret void
+}
+
+define i32 @main() nounwind {
+entry:
+	tail call void @func_1( ) nounwind
+	load i32* @g_19, align 4		; <i32>:0 [#uses=1]
+	tail call i32 (i8*, ...)* @printf( i8* getelementptr ([4 x i8]* @"\01LC", i32 0, i32 0), i32 %0 ) nounwind		; <i32>:1 [#uses=0]
+	ret i32 0
+}
+
+declare i32 @printf(i8*, ...) nounwind
diff --git a/final/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll b/final/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll
new file mode 100644
index 00000000000..b2cf818dc45
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/2009-01-13-nonconstant-stride-outside-loop.ll
@@ -0,0 +1,39 @@
+; RUN: opt < %s -loop-reduce -S | grep phi | count 1
+; RUN: opt < %s -loop-reduce -S | grep mul | count 1
+; ModuleID = '<stdin>'
+; Make sure examining a fuller expression outside the loop doesn't cause us to create a second
+; IV of stride %3.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.5"
+	%struct.anon = type { %struct.obj*, %struct.obj* }
+	%struct.obj = type { i16, i16, { %struct.anon } }
+@heap_size = external global i32		; <i32*> [#uses=1]
+@"\01LC85" = external constant [39 x i8]		; <[39 x i8]*> [#uses=1]
+
+declare i32 @sprintf(i8*, i8*, ...) nounwind
+
+define %struct.obj* @gc_status(%struct.obj* %args) nounwind {
+entry:
+	br label %bb1.i
+
+bb.i2:		; preds = %bb2.i3
+	%indvar.next24 = add i32 %m.0.i, 1		; <i32> [#uses=1]
+	br label %bb1.i
+
+bb1.i:		; preds = %bb.i2, %entry
+	%m.0.i = phi i32 [ 0, %entry ], [ %indvar.next24, %bb.i2 ]		; <i32> [#uses=4]
+	%0 = icmp slt i32 %m.0.i, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb2.i3, label %nactive_heaps.exit
+
+bb2.i3:		; preds = %bb1.i
+	%1 = load %struct.obj** null, align 4		; <%struct.obj*> [#uses=1]
+	%2 = icmp eq %struct.obj* %1, null		; <i1> [#uses=1]
+	br i1 %2, label %nactive_heaps.exit, label %bb.i2
+
+nactive_heaps.exit:		; preds = %bb2.i3, %bb1.i
+	%3 = load i32* @heap_size, align 4		; <i32> [#uses=1]
+	%4 = mul i32 %3, %m.0.i		; <i32> [#uses=1]
+	%5 = sub i32 %4, 0		; <i32> [#uses=1]
+	%6 = tail call i32 (i8*, i8*, ...)* @sprintf(i8* null, i8* getelementptr ([39 x i8]* @"\01LC85", i32 0, i32 0), i32 %m.0.i, i32 0, i32 %5, i32 0) nounwind		; <i32> [#uses=0]
+	ret %struct.obj* null
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll b/final/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll
new file mode 100644
index 00000000000..002a878b7b2
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/2009-04-28-no-reduce-mul.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+; The multiply in bb2 must not be reduced to an add, as the sext causes the
+; %1 argument to become negative after a while.
+
+; CHECK: sext i8
+; CHECK: mul i32
+; CHECK: store i32
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+@table = common global [32 x [256 x i32]] zeroinitializer, align 32		; <[32 x [256 x i32]]*> [#uses=2]
+
+define i32 @main() nounwind {
+bb4.thread:
+	br label %bb2
+
+bb2:		; preds = %bb4, %bb2, %bb4.thread
+	%i.0.reg2mem.0.ph = phi i32 [ 0, %bb4.thread ], [ %i.0.reg2mem.0.ph, %bb2 ], [ %indvar.next9, %bb4 ]		; <i32> [#uses=4]
+	%j.0.reg2mem.0 = phi i32 [ 0, %bb4.thread ], [ %indvar.next, %bb2 ], [ 0, %bb4 ]		; <i32> [#uses=3]
+	%0 = trunc i32 %j.0.reg2mem.0 to i8		; <i8> [#uses=1]
+	%1 = sext i8 %0 to i32		; <i32> [#uses=1]
+	%2 = mul i32 %1, %i.0.reg2mem.0.ph		; <i32> [#uses=1]
+	%3 = getelementptr [32 x [256 x i32]]* @table, i32 0, i32 %i.0.reg2mem.0.ph, i32 %j.0.reg2mem.0		; <i32*> [#uses=1]
+	store i32 %2, i32* %3, align 4
+	%indvar.next = add i32 %j.0.reg2mem.0, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, 256		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb4, label %bb2
+
+bb4:		; preds = %bb2
+	%indvar.next9 = add i32 %i.0.reg2mem.0.ph, 1		; <i32> [#uses=2]
+	%exitcond10 = icmp eq i32 %indvar.next9, 32		; <i1> [#uses=1]
+	br i1 %exitcond10, label %bb5, label %bb2
+
+bb5:		; preds = %bb4
+	%4 = load i32* getelementptr ([32 x [256 x i32]]* @table, i32 0, i32 9, i32 132), align 16		; <i32> [#uses=1]
+	%5 = icmp eq i32 %4, -1116		; <i1> [#uses=1]
+	br i1 %5, label %bb7, label %bb6
+
+bb6:		; preds = %bb5
+	tail call void @abort() noreturn nounwind
+	unreachable
+
+bb7:		; preds = %bb5
+	ret i32 0
+}
+
+declare void @abort() noreturn nounwind
diff --git a/final/test/Transforms/LoopStrengthReduce/2009-11-10-LSRCrash.ll b/final/test/Transforms/LoopStrengthReduce/2009-11-10-LSRCrash.ll
new file mode 100644
index 00000000000..4032a599e8d
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/2009-11-10-LSRCrash.ll
@@ -0,0 +1,130 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin11
+
+define void @_ZN4llvm20SelectionDAGLowering14visitInlineAsmENS_8CallSiteE() nounwind ssp align 2 {
+entry:
+  br i1 undef, label %bb3.i, label %bb4.i
+
+bb3.i:                                            ; preds = %entry
+  unreachable
+
+bb4.i:                                            ; preds = %entry
+  br i1 undef, label %bb.i.i, label %_ZNK4llvm8CallSite14getCalledValueEv.exit
+
+bb.i.i:                                           ; preds = %bb4.i
+  unreachable
+
+_ZNK4llvm8CallSite14getCalledValueEv.exit:        ; preds = %bb4.i
+  br i1 undef, label %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit, label %bb6.i
+
+bb6.i:                                            ; preds = %_ZNK4llvm8CallSite14getCalledValueEv.exit
+  unreachable
+
+_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit: ; preds = %_ZNK4llvm8CallSite14getCalledValueEv.exit
+  br i1 undef, label %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit, label %bb.i
+
+bb.i:                                             ; preds = %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit
+  br label %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit
+
+_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit: ; preds = %bb.i, %_ZN4llvm4castINS_9InlineAsmEPNS_5ValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS6_.exit
+  br i1 undef, label %bb50, label %bb27
+
+bb27:                                             ; preds = %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit
+  br i1 undef, label %bb1.i727, label %bb.i.i726
+
+bb.i.i726:                                        ; preds = %bb27
+  unreachable
+
+bb1.i727:                                         ; preds = %bb27
+  unreachable
+
+bb50:                                             ; preds = %_ZL25hasInlineAsmMemConstraintRSt6vectorIN4llvm9InlineAsm14ConstraintInfoESaIS2_EERKNS0_14TargetLoweringE.exit
+  br label %bb107
+
+bb51:                                             ; preds = %bb107
+  br i1 undef, label %bb105, label %bb106
+
+bb105:                                            ; preds = %bb51
+  unreachable
+
+bb106:                                            ; preds = %bb51
+  br label %bb107
+
+bb107:                                            ; preds = %bb106, %bb50
+  br i1 undef, label %bb108, label %bb51
+
+bb108:                                            ; preds = %bb107
+  br i1 undef, label %bb242, label %bb114
+
+bb114:                                            ; preds = %bb108
+  br i1 undef, label %bb141, label %bb116
+
+bb116:                                            ; preds = %bb114
+  br i1 undef, label %bb120, label %bb121
+
+bb120:                                            ; preds = %bb116
+  unreachable
+
+bb121:                                            ; preds = %bb116
+  unreachable
+
+bb141:                                            ; preds = %bb114
+  br i1 undef, label %bb182, label %bb143
+
+bb143:                                            ; preds = %bb141
+  br label %bb157
+
+bb144:                                            ; preds = %bb.i.i.i843
+  switch i32 undef, label %bb155 [
+    i32 2, label %bb153
+    i32 6, label %bb153
+    i32 4, label %bb153
+  ]
+
+bb153:                                            ; preds = %bb144, %bb144, %bb144
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
+  br label %bb157
+
+bb155:                                            ; preds = %bb144
+  unreachable
+
+bb157:                                            ; preds = %bb153, %bb143
+  %indvar = phi i32 [ %indvar.next, %bb153 ], [ 0, %bb143 ] ; <i32> [#uses=2]
+  %0 = icmp eq i32 undef, %indvar                 ; <i1> [#uses=1]
+  switch i16 undef, label %bb6.i841 [
+    i16 9, label %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit
+    i16 26, label %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit
+  ]
+
+bb6.i841:                                         ; preds = %bb157
+  unreachable
+
+_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit: ; preds = %bb157, %bb157
+  br i1 undef, label %bb.i.i.i843, label %bb1.i.i.i844
+
+bb.i.i.i843:                                      ; preds = %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit
+  br i1 %0, label %bb158, label %bb144
+
+bb1.i.i.i844:                                     ; preds = %_ZN4llvm4castINS_14ConstantSDNodeENS_7SDValueEEENS_10cast_rettyIT_T0_E8ret_typeERKS5_.exit
+  unreachable
+
+bb158:                                            ; preds = %bb.i.i.i843
+  br i1 undef, label %bb177, label %bb176
+
+bb176:                                            ; preds = %bb158
+  unreachable
+
+bb177:                                            ; preds = %bb158
+  br i1 undef, label %bb179, label %bb178
+
+bb178:                                            ; preds = %bb177
+  unreachable
+
+bb179:                                            ; preds = %bb177
+  unreachable
+
+bb182:                                            ; preds = %bb141
+  unreachable
+
+bb242:                                            ; preds = %bb108
+  unreachable
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/count-to-zero.ll b/final/test/Transforms/LoopStrengthReduce/count-to-zero.ll
new file mode 100644
index 00000000000..feb79f8a0c7
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/count-to-zero.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+; rdar://7382068
+
+define void @t(i32 %c) nounwind optsize {
+entry:
+  br label %bb6
+
+bb1:                                              ; preds = %bb6
+  %tmp = icmp eq i32 %c_addr.1, 20                ; <i1> [#uses=1]
+  br i1 %tmp, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1
+  %tmp1 = tail call i32 @f20(i32 %c_addr.1) nounwind ; <i32> [#uses=1]
+  br label %bb7
+
+bb3:                                              ; preds = %bb1
+  %tmp2 = icmp slt i32 %c_addr.1, 10              ; <i1> [#uses=1]
+  %tmp3 = add nsw i32 %c_addr.1, 1                ; <i32> [#uses=1]
+  %tmp4 = add i32 %c_addr.1, -1                   ; <i32> [#uses=1]
+  %c_addr.1.be = select i1 %tmp2, i32 %tmp3, i32 %tmp4 ; <i32> [#uses=1]
+  %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
+; CHECK: add i32 %lsr.iv, -1
+  br label %bb6
+
+bb6:                                              ; preds = %bb3, %entry
+  %indvar = phi i32 [ %indvar.next, %bb3 ], [ 0, %entry ] ; <i32> [#uses=2]
+  %c_addr.1 = phi i32 [ %c_addr.1.be, %bb3 ], [ %c, %entry ] ; <i32> [#uses=7]
+  %tmp5 = icmp eq i32 %indvar, 9999               ; <i1> [#uses=1]
+; CHECK: icmp eq i32 %lsr.iv, 0
+  %tmp6 = icmp eq i32 %c_addr.1, 100              ; <i1> [#uses=1]
+  %or.cond = or i1 %tmp5, %tmp6                   ; <i1> [#uses=1]
+  br i1 %or.cond, label %bb7, label %bb1
+
+bb7:                                              ; preds = %bb6, %bb2
+  %c_addr.0 = phi i32 [ %tmp1, %bb2 ], [ %c_addr.1, %bb6 ] ; <i32> [#uses=1]
+  tail call void @bar(i32 %c_addr.0) nounwind
+  ret void
+}
+
+declare i32 @f20(i32)
+
+declare void @bar(i32)
diff --git a/final/test/Transforms/LoopStrengthReduce/dead-phi.ll b/final/test/Transforms/LoopStrengthReduce/dead-phi.ll
new file mode 100644
index 00000000000..07a942f70bd
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/dead-phi.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -loop-reduce -S | grep phi | count 1
+
+define void @foo(i32 %n) {
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+
+  ; These three instructions form an isolated cycle and can be deleted.
+  %j = phi i32 [ 0, %entry ], [ %j.y, %loop ]
+  %j.x = add i32 %j, 1
+  %j.y = mul i32 %j.x, 2
+
+  %i.next = add i32 %i, 1
+  %c = icmp ne i32 %i.next, %n
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/dg.exp b/final/test/Transforms/LoopStrengthReduce/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/LoopStrengthReduce/different-type-ivs.ll b/final/test/Transforms/LoopStrengthReduce/different-type-ivs.ll
new file mode 100644
index 00000000000..8cdd264591c
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/different-type-ivs.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -loop-reduce -disable-output
+; Test to make sure that loop-reduce never crashes on IV's 
+; with different types but identical strides.
+
+define void @foo() {
+entry:
+	br label %no_exit
+no_exit:		; preds = %no_exit, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %no_exit ]		; <i32> [#uses=3]
+	%indvar.upgrd.1 = trunc i32 %indvar to i16		; <i16> [#uses=1]
+	%X.0.0 = mul i16 %indvar.upgrd.1, 1234		; <i16> [#uses=1]
+	%tmp. = mul i32 %indvar, 1234		; <i32> [#uses=1]
+	%tmp.5 = sext i16 %X.0.0 to i32		; <i32> [#uses=1]
+	%tmp.3 = call i32 (...)* @bar( i32 %tmp.5, i32 %tmp. )		; <i32> [#uses=0]
+	%tmp.0 = call i1 @pred( )		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp.0, label %return, label %no_exit
+return:		; preds = %no_exit
+	ret void
+}
+
+declare i1 @pred()
+
+declare i32 @bar(...)
+
diff --git a/final/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll b/final/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll
new file mode 100644
index 00000000000..4136486fef4
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/dont-hoist-simple-loop-constants.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -loop-reduce -S | \
+; RUN:   not grep {bitcast i32 1 to i32}
+; END.
+; The setlt wants to use a value that is incremented one more than the dominant
+; IV.  Don't insert the 1 outside the loop, preventing folding it into the add.
+
+define void @test([700 x i32]* %nbeaux_.0__558, i32* %i_.16574) {
+then.0:
+	br label %no_exit.2
+no_exit.2:		; preds = %no_exit.2, %then.0
+	%indvar630 = phi i32 [ 0, %then.0 ], [ %indvar.next631, %no_exit.2 ]		; <i32> [#uses=4]
+	%gep.upgrd.1 = zext i32 %indvar630 to i64		; <i64> [#uses=1]
+	%tmp.38 = getelementptr [700 x i32]* %nbeaux_.0__558, i32 0, i64 %gep.upgrd.1		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp.38
+	%inc.2 = add i32 %indvar630, 2		; <i32> [#uses=2]
+	%tmp.34 = icmp slt i32 %inc.2, 701		; <i1> [#uses=1]
+	%indvar.next631 = add i32 %indvar630, 1		; <i32> [#uses=1]
+	br i1 %tmp.34, label %no_exit.2, label %loopexit.2.loopexit
+loopexit.2.loopexit:		; preds = %no_exit.2
+	store i32 %inc.2, i32* %i_.16574
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll b/final/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll
new file mode 100644
index 00000000000..90051e3542c
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/dont_insert_redundant_ops.ll
@@ -0,0 +1,36 @@
+; Check that this test makes INDVAR and related stuff dead.
+; RUN: opt < %s -loop-reduce -S | grep phi | count 2
+
+declare i1 @pred()
+
+define void @test1({ i32, i32 }* %P) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ]		; <i32> [#uses=3]
+	%gep1 = getelementptr { i32, i32 }* %P, i32 %INDVAR, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %gep1
+	%gep2 = getelementptr { i32, i32 }* %P, i32 %INDVAR, i32 1		; <i32*> [#uses=1]
+	store i32 0, i32* %gep2
+	%INDVAR2 = add i32 %INDVAR, 1		; <i32> [#uses=1]
+	%cond = call i1 @pred( )		; <i1> [#uses=1]
+	br i1 %cond, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
+
+define void @test2([2 x i32]* %P) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ]		; <i32> [#uses=3]
+	%gep1 = getelementptr [2 x i32]* %P, i32 %INDVAR, i64 0		; <i32*> [#uses=1]
+	store i32 0, i32* %gep1
+	%gep2 = getelementptr [2 x i32]* %P, i32 %INDVAR, i64 1		; <i32*> [#uses=1]
+	store i32 0, i32* %gep2
+	%INDVAR2 = add i32 %INDVAR, 1		; <i32> [#uses=1]
+	%cond = call i1 @pred( )		; <i1> [#uses=1]
+	br i1 %cond, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/dont_reduce_bytes.ll b/final/test/Transforms/LoopStrengthReduce/dont_reduce_bytes.ll
new file mode 100644
index 00000000000..20300002eb3
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/dont_reduce_bytes.ll
@@ -0,0 +1,22 @@
+; Don't reduce the byte access to P[i], at least not on targets that 
+; support an efficient 'mem[r1+r2]' addressing mode.
+
+; RUN: opt < %s -loop-reduce -disable-output
+
+
+declare i1 @pred(i32)
+
+define void @test(i8* %PTR) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ]		; <i32> [#uses=2]
+	%STRRED = getelementptr i8* %PTR, i32 %INDVAR		; <i8*> [#uses=1]
+	store i8 0, i8* %STRRED
+	%INDVAR2 = add i32 %INDVAR, 1		; <i32> [#uses=2]
+        ;; cannot eliminate indvar
+	%cond = call i1 @pred( i32 %INDVAR2 )		; <i1> [#uses=1]
+	br i1 %cond, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/dont_reverse.ll b/final/test/Transforms/LoopStrengthReduce/dont_reverse.ll
new file mode 100644
index 00000000000..4c5db04b21d
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/dont_reverse.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -loop-reduce -S \
+; RUN:    | grep {icmp eq i2 %lsr.iv.next, %xmp4344}
+
+; Don't reverse the iteration if the rhs of the compare is defined
+; inside the loop.
+
+define void @Fill_Buffer(i2* %p) nounwind {
+entry:
+	br label %bb8
+
+bb8:
+	%indvar34 = phi i32 [ 0, %entry ], [ %indvar.next35, %bb8 ]
+	%indvar3451 = trunc i32 %indvar34 to i2
+	%xmp4344 = load i2* %p
+	%xmp104 = icmp eq i2 %indvar3451, %xmp4344
+	%indvar.next35 = add i32 %indvar34, 1
+	br i1 %xmp104, label %bb10, label %bb8
+
+bb10:
+	unreachable
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll b/final/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
new file mode 100644
index 00000000000..abbfda6e925
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/exit_compare_live_range.ll
@@ -0,0 +1,21 @@
+; Make sure that the compare instruction occurs after the increment to avoid
+; having overlapping live ranges that result in copies.  We want the setcc 
+; instruction immediately before the conditional branch.
+;
+; RUN: opt -S -loop-reduce %s | FileCheck %s
+
+define void @foo(float* %D, i32 %E) {
+entry:
+	br label %no_exit
+no_exit:		; preds = %no_exit, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %no_exit ]		; <i32> [#uses=1]
+	volatile store float 0.000000e+00, float* %D
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+; CHECK: icmp
+; CHECK-NEXT: br i1
+	%exitcond = icmp eq i32 %indvar.next, %E		; <i1> [#uses=1]
+	br i1 %exitcond, label %loopexit, label %no_exit
+loopexit:		; preds = %no_exit
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopStrengthReduce/hoist-parent-preheader.ll b/final/test/Transforms/LoopStrengthReduce/hoist-parent-preheader.ll
new file mode 100644
index 00000000000..7982fbc8c4a
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/hoist-parent-preheader.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -loop-reduce -verify
+target triple = "x86_64-apple-darwin10"
+
+define void @myquicksort(i8* %a) nounwind ssp {
+entry:
+  br i1 undef, label %loop1, label %return
+
+loop1:                                            ; preds = %bb13.loopexit, %entry
+  %indvar419 = phi i64 [ %indvar.next420, %loop2.exit ], [ 0, %entry ]
+  %tmp474 = shl i64 %indvar419, 2
+  %tmp484 = add i64 %tmp474, 4
+  br label %loop2
+
+loop2:                                            ; preds = %loop1, %loop2.backedge
+  %indvar414 = phi i64 [ %indvar.next415, %loop2.backedge ], [ 0, %loop1 ]
+  %tmp473 = mul i64 %indvar414, -4
+  %tmp485 = add i64 %tmp484, %tmp473
+  %storemerge4 = getelementptr i8* %a, i64 %tmp485
+  %0 = icmp ugt i8* %storemerge4, %a
+  br i1 false, label %loop2.exit, label %loop2.backedge
+
+loop2.backedge:                                   ; preds = %loop2
+  %indvar.next415 = add i64 %indvar414, 1
+  br label %loop2
+
+loop2.exit:                                       ; preds = %loop2
+  %indvar.next420 = add i64 %indvar419, 1
+  br i1 undef, label %loop1, label %return
+
+return:                                           ; preds = %loop2.exit, %entry
+  ret void
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/invariant_value_first.ll b/final/test/Transforms/LoopStrengthReduce/invariant_value_first.ll
new file mode 100644
index 00000000000..4094e9c7e4d
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/invariant_value_first.ll
@@ -0,0 +1,23 @@
+; Check that the index of 'P[outer]' is pulled out of the loop.
+; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | \
+; RUN:   not grep {getelementptr.*%outer.*%INDVAR}
+
+declare i1 @pred()
+
+declare i32 @foo()
+
+define void @test([10000 x i32]* %P) {
+; <label>:0
+	%outer = call i32 @foo( )		; <i32> [#uses=1]
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ]		; <i32> [#uses=2]
+	%STRRED = getelementptr [10000 x i32]* %P, i32 %outer, i32 %INDVAR		; <i32*> [#uses=1]
+	store i32 0, i32* %STRRED
+	%INDVAR2 = add i32 %INDVAR, 1		; <i32> [#uses=1]
+	%cond = call i1 @pred( )		; <i1> [#uses=1]
+	br i1 %cond, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll b/final/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll
new file mode 100644
index 00000000000..e2aed78c32e
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/invariant_value_first_arg.ll
@@ -0,0 +1,20 @@
+; Check that the index of 'P[outer]' is pulled out of the loop.
+; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | \
+; RUN:   not grep {getelementptr.*%outer.*%INDVAR}
+
+declare i1 @pred()
+
+define void @test([10000 x i32]* %P, i32 %outer) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ]		; <i32> [#uses=2]
+	%STRRED = getelementptr [10000 x i32]* %P, i32 %outer, i32 %INDVAR		; <i32*> [#uses=1]
+	store i32 0, i32* %STRRED
+	%INDVAR2 = add i32 %INDVAR, 1		; <i32> [#uses=1]
+	%cond = call i1 @pred( )		; <i1> [#uses=1]
+	br i1 %cond, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopStrengthReduce/nested-reduce.ll b/final/test/Transforms/LoopStrengthReduce/nested-reduce.ll
new file mode 100644
index 00000000000..58b8d3eecd0
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/nested-reduce.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -loop-reduce -S | not grep mul
+
+; Make sure we don't get a multiply by 6 in this loop.
+
+define i32 @foo(i32 %A, i32 %B, i32 %C, i32 %D) {
+entry:
+	%tmp.5 = icmp sgt i32 %C, 0		; <i1> [#uses=1]
+	%tmp.25 = and i32 %A, 1		; <i32> [#uses=1]
+	br label %loopentry.1
+loopentry.1:		; preds = %loopexit.1, %entry
+	%indvar20 = phi i32 [ 0, %entry ], [ %indvar.next21, %loopexit.1 ]		; <i32> [#uses=2]
+	%k.1 = phi i32 [ 0, %entry ], [ %k.1.3, %loopexit.1 ]		; <i32> [#uses=2]
+	br i1 %tmp.5, label %no_exit.1.preheader, label %loopexit.1
+no_exit.1.preheader:		; preds = %loopentry.1
+	%i.0.0 = bitcast i32 %indvar20 to i32		; <i32> [#uses=1]
+	%tmp.9 = mul i32 %i.0.0, 6		; <i32> [#uses=1]
+	br label %no_exit.1.outer
+no_exit.1.outer:		; preds = %cond_true, %no_exit.1.preheader
+	%k.1.2.ph = phi i32 [ %k.1, %no_exit.1.preheader ], [ %k.09, %cond_true ]		; <i32> [#uses=2]
+	%j.1.2.ph = phi i32 [ 0, %no_exit.1.preheader ], [ %inc.1, %cond_true ]		; <i32> [#uses=1]
+	br label %no_exit.1
+no_exit.1:		; preds = %cond_continue, %no_exit.1.outer
+	%indvar.ui = phi i32 [ 0, %no_exit.1.outer ], [ %indvar.next, %cond_continue ]		; <i32> [#uses=2]
+	%indvar = bitcast i32 %indvar.ui to i32		; <i32> [#uses=1]
+	%j.1.2 = add i32 %indvar, %j.1.2.ph		; <i32> [#uses=2]
+	%tmp.11 = add i32 %j.1.2, %tmp.9		; <i32> [#uses=1]
+	%tmp.12 = trunc i32 %tmp.11 to i8		; <i8> [#uses=1]
+	%shift.upgrd.1 = zext i8 %tmp.12 to i32		; <i32> [#uses=1]
+	%tmp.13 = shl i32 %D, %shift.upgrd.1		; <i32> [#uses=2]
+	%tmp.15 = icmp eq i32 %tmp.13, %B		; <i1> [#uses=1]
+	%inc.1 = add i32 %j.1.2, 1		; <i32> [#uses=3]
+	br i1 %tmp.15, label %cond_true, label %cond_continue
+cond_true:		; preds = %no_exit.1
+	%tmp.26 = and i32 %tmp.25, %tmp.13		; <i32> [#uses=1]
+	%k.09 = add i32 %tmp.26, %k.1.2.ph		; <i32> [#uses=2]
+	%tmp.517 = icmp slt i32 %inc.1, %C		; <i1> [#uses=1]
+	br i1 %tmp.517, label %no_exit.1.outer, label %loopexit.1
+cond_continue:		; preds = %no_exit.1
+	%tmp.519 = icmp slt i32 %inc.1, %C		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar.ui, 1		; <i32> [#uses=1]
+	br i1 %tmp.519, label %no_exit.1, label %loopexit.1
+loopexit.1:		; preds = %cond_continue, %cond_true, %loopentry.1
+	%k.1.3 = phi i32 [ %k.1, %loopentry.1 ], [ %k.09, %cond_true ], [ %k.1.2.ph, %cond_continue ]		; <i32> [#uses=2]
+	%indvar.next21 = add i32 %indvar20, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next21, 4		; <i1> [#uses=1]
+	br i1 %exitcond, label %loopexit.0, label %loopentry.1
+loopexit.0:		; preds = %loopexit.1
+	ret i32 %k.1.3
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/nonlinear-postinc.ll b/final/test/Transforms/LoopStrengthReduce/nonlinear-postinc.ll
new file mode 100644
index 00000000000..1e63770553d
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/nonlinear-postinc.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -loop-reduce
+; PR6453
+
+target datalayout = "e-p:64:64:64"
+
+define void @_ZNK15PolynomialSpaceILi3EE13compute_indexEjRA3_j() nounwind {
+entry:
+  br label %bb6
+
+bb6:
+  %t4 = phi i32 [ 0, %entry ], [ %t3, %bb5 ]
+  %t16 = sub i32 undef, %t4
+  %t25 = sub i32 undef, %t4
+  %t26 = add i32 undef, %t25
+  br label %bb4
+
+bb4:
+  %t2 = phi i32 [ %t1, %bb3 ], [ 0, %bb6 ]
+  %t17 = mul i32 %t2, %t16
+  %t18 = zext i32 %t2 to i33
+  %t19 = add i32 %t2, -1
+  %t20 = zext i32 %t19 to i33
+  %t21 = mul i33 %t18, %t20
+  %t22 = lshr i33 %t21, 1
+  %t23 = trunc i33 %t22 to i32
+  %t24 = sub i32 %t17, %t23
+  %t27 = add i32 %t24, %t26
+  br i1 false, label %bb1, label %bb5
+
+bb1:
+  %t = icmp ugt i32 %t27, undef
+  br i1 %t, label %bb2, label %bb3
+
+bb3:
+  %t1 = add i32 %t2, 1
+  br label %bb4
+
+bb5:
+  %t3 = add i32 %t4, 1
+  br label %bb6
+
+bb2:
+  ret void
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll b/final/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll
new file mode 100644
index 00000000000..410d88f672b
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/ops_after_indvar.ll
@@ -0,0 +1,24 @@
+; Check that this test makes INDVAR and related stuff dead, because P[indvar]
+; gets reduced, making INDVAR dead.
+
+; RUN: opt < %s -loop-reduce -S -default-data-layout="e-p:32:32:32" | not grep INDVAR
+
+declare i1 @pred()
+
+declare i32 @getidx()
+
+define void @test([10000 x i32]* %P) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ]		; <i32> [#uses=2]
+	%idx = call i32 @getidx( )		; <i32> [#uses=1]
+	%STRRED = getelementptr [10000 x i32]* %P, i32 %INDVAR, i32 %idx		; <i32*> [#uses=1]
+	store i32 0, i32* %STRRED
+	%INDVAR2 = add i32 %INDVAR, 1		; <i32> [#uses=1]
+	%cond = call i1 @pred( )		; <i1> [#uses=1]
+	br i1 %cond, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopStrengthReduce/phi_node_update_multiple_preds.ll b/final/test/Transforms/LoopStrengthReduce/phi_node_update_multiple_preds.ll
new file mode 100644
index 00000000000..7ef494debdd
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/phi_node_update_multiple_preds.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -loop-reduce -disable-output
+; LSR should not crash on this.
+
+define fastcc void @loadloop() {
+entry:
+	switch i8 0, label %shortcirc_next [
+		 i8 32, label %loopexit.2
+		 i8 59, label %loopexit.2
+	]
+shortcirc_next:		; preds = %no_exit.2, %entry
+	%indvar37 = phi i32 [ 0, %entry ], [ %indvar.next38, %no_exit.2 ]		; <i32> [#uses=3]
+	%gep.upgrd.1 = zext i32 %indvar37 to i64		; <i64> [#uses=1]
+	%wp.2.4 = getelementptr i8* null, i64 %gep.upgrd.1		; <i8*> [#uses=1]
+	br i1 false, label %loopexit.2, label %no_exit.2
+no_exit.2:		; preds = %shortcirc_next
+	%wp.2.4.rec = bitcast i32 %indvar37 to i32		; <i32> [#uses=1]
+	%inc.1.rec = add i32 %wp.2.4.rec, 1		; <i32> [#uses=1]
+	%inc.1 = getelementptr i8* null, i32 %inc.1.rec		; <i8*> [#uses=2]
+	%indvar.next38 = add i32 %indvar37, 1		; <i32> [#uses=1]
+	switch i8 0, label %shortcirc_next [
+		 i8 32, label %loopexit.2
+		 i8 59, label %loopexit.2
+	]
+loopexit.2:		; preds = %no_exit.2, %no_exit.2, %shortcirc_next, %entry, %entry
+	%wp.2.7 = phi i8* [ null, %entry ], [ null, %entry ], [ %wp.2.4, %shortcirc_next ], [ %inc.1, %no_exit.2 ], [ %inc.1, %no_exit.2 ]		; <i8*> [#uses=0]
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopStrengthReduce/pr2537.ll b/final/test/Transforms/LoopStrengthReduce/pr2537.ll
new file mode 100644
index 00000000000..46ad70e736d
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/pr2537.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -loop-reduce -disable-output
+; PR 2537
+
+define void @a() {
+entry:
+        br label %dobody
+
+dobody:         ; preds = %dobody, %entry
+        %y.0 = phi i128 [ 0, %entry ], [ %add, %dobody ]
+        %x.0 = phi i128 [ 0, %entry ], [ %add2, %dobody ]
+        %add = add i128 %y.0, shl (i128 1, i128 64)
+        %add2 = add i128 %x.0, shl (i128 1, i128 48)
+        call void @b( i128 %add )
+        %cmp = icmp ult i128 %add2, shl (i128 1, i128 64)
+        br i1 %cmp, label %dobody, label %afterdo
+
+afterdo:                ; preds = %dobody
+        ret void
+}
+
+declare void @b(i128 %add)
diff --git a/final/test/Transforms/LoopStrengthReduce/pr2570.ll b/final/test/Transforms/LoopStrengthReduce/pr2570.ll
new file mode 100644
index 00000000000..80efb9f87e5
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/pr2570.ll
@@ -0,0 +1,287 @@
+; RUN: opt < %s -loop-reduce -S | grep {phi\\>} | count 8
+; PR2570
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@g_14 = internal global i32 1		; <i32*> [#uses=1]
+@g_39 = internal global i16 -5		; <i16*> [#uses=2]
+@g_43 = internal global i32 -6		; <i32*> [#uses=3]
+@g_33 = internal global i32 -1269044541		; <i32*> [#uses=1]
+@g_137 = internal global i32 8		; <i32*> [#uses=1]
+@g_82 = internal global i32 -5		; <i32*> [#uses=3]
+@g_91 = internal global i32 1		; <i32*> [#uses=1]
+@g_197 = internal global i32 1		; <i32*> [#uses=4]
+@g_207 = internal global i32 1		; <i32*> [#uses=2]
+@g_222 = internal global i16 4165		; <i16*> [#uses=1]
+@g_247 = internal global i8 -21		; <i8*> [#uses=2]
+@g_260 = internal global i32 1		; <i32*> [#uses=2]
+@g_221 = internal global i16 -17503		; <i16*> [#uses=3]
+@g_267 = internal global i16 1		; <i16*> [#uses=1]
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (i32 (i32, i32, i16, i32, i8, i32)* @func_44 to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i32 @func_44(i32 %p_45, i32 %p_46, i16 zeroext  %p_48, i32 %p_49, i8 zeroext  %p_50, i32 %p_52) nounwind  {
+entry:
+	tail call i32 @func_116( i8 zeroext  2 ) nounwind 		; <i32>:0 [#uses=0]
+	tail call i32 @func_63( i16 signext  2 ) nounwind 		; <i32>:1 [#uses=1]
+	load i16* @g_39, align 2		; <i16>:2 [#uses=1]
+	tail call i32 @func_63( i16 signext  %2 ) nounwind 		; <i32>:3 [#uses=1]
+	trunc i32 %3 to i16		; <i16>:4 [#uses=1]
+	and i16 %4, 1		; <i16>:5 [#uses=1]
+	trunc i32 %p_52 to i8		; <i8>:6 [#uses=1]
+	trunc i32 %1 to i16		; <i16>:7 [#uses=1]
+	tail call i32 @func_74( i16 zeroext  %5, i8 zeroext  %6, i16 zeroext  %7, i16 zeroext  0 ) nounwind 		; <i32>:8 [#uses=0]
+	tail call i32 @func_124( i32 544824386 ) nounwind 		; <i32>:9 [#uses=0]
+	zext i8 %p_50 to i32		; <i32>:10 [#uses=1]
+	load i32* @g_43, align 4		; <i32>:11 [#uses=1]
+	icmp sle i32 %10, %11		; <i1>:12 [#uses=1]
+	zext i1 %12 to i32		; <i32>:13 [#uses=2]
+	load i8* @g_247, align 1		; <i8>:14 [#uses=1]
+	trunc i32 %p_45 to i16		; <i16>:15 [#uses=1]
+	zext i8 %14 to i16		; <i16>:16 [#uses=1]
+	tail call i32 @func_74( i16 zeroext  %15, i8 zeroext  0, i16 zeroext  %16, i16 zeroext  23618 ) nounwind 		; <i32>:17 [#uses=4]
+	icmp slt i32 %17, 0		; <i1>:18 [#uses=1]
+	br i1 %18, label %bb162, label %bb152
+
+bb152:		; preds = %entry
+	lshr i32 2147483647, %13		; <i32>:19 [#uses=1]
+	icmp slt i32 %19, %17		; <i1>:20 [#uses=1]
+	select i1 %20, i32 0, i32 %13		; <i32>:21 [#uses=1]
+	%.348 = shl i32 %17, %21		; <i32> [#uses=1]
+	br label %bb162
+
+bb162:		; preds = %bb152, %entry
+	%.0346 = phi i32 [ %.348, %bb152 ], [ %17, %entry ]		; <i32> [#uses=1]
+	tail call i32 @func_124( i32 1 ) nounwind 		; <i32>:22 [#uses=1]
+	mul i32 %22, %.0346		; <i32>:23 [#uses=1]
+	icmp slt i32 %p_45, 0		; <i1>:24 [#uses=1]
+	icmp ugt i32 %p_45, 31		; <i1>:25 [#uses=1]
+	%or.cond = or i1 %24, %25		; <i1> [#uses=1]
+	br i1 %or.cond, label %bb172, label %bb168
+
+bb168:		; preds = %bb162
+	lshr i32 2147483647, %p_45		; <i32>:26 [#uses=1]
+	shl i32 1392859848, %p_45		; <i32>:27 [#uses=1]
+	icmp slt i32 %26, 1392859848		; <i1>:28 [#uses=1]
+	%.op355 = add i32 %27, 38978		; <i32> [#uses=1]
+	%phitmp = select i1 %28, i32 1392898826, i32 %.op355		; <i32> [#uses=1]
+	br label %bb172
+
+bb172:		; preds = %bb168, %bb162
+	%.0343 = phi i32 [ %phitmp, %bb168 ], [ 1392898826, %bb162 ]		; <i32> [#uses=2]
+	tail call i32 @func_84( i32 1, i16 zeroext  0, i16 zeroext  8 ) nounwind 		; <i32>:29 [#uses=0]
+	icmp eq i32 %.0343, 0		; <i1>:30 [#uses=1]
+	%.0341 = select i1 %30, i32 1, i32 %.0343		; <i32> [#uses=1]
+	urem i32 %23, %.0341		; <i32>:31 [#uses=1]
+	load i32* @g_137, align 4		; <i32>:32 [#uses=4]
+	icmp slt i32 %32, 0		; <i1>:33 [#uses=1]
+	br i1 %33, label %bb202, label %bb198
+
+bb198:		; preds = %bb172
+	%not. = icmp slt i32 %32, 1073741824		; <i1> [#uses=1]
+	zext i1 %not. to i32		; <i32>:34 [#uses=1]
+	%.351 = shl i32 %32, %34		; <i32> [#uses=1]
+	br label %bb202
+
+bb202:		; preds = %bb198, %bb172
+	%.0335 = phi i32 [ %.351, %bb198 ], [ %32, %bb172 ]		; <i32> [#uses=1]
+	icmp ne i32 %31, %.0335		; <i1>:35 [#uses=1]
+	zext i1 %35 to i32		; <i32>:36 [#uses=1]
+	tail call i32 @func_128( i32 %36 ) nounwind 		; <i32>:37 [#uses=0]
+	icmp eq i32 %p_45, 293685862		; <i1>:38 [#uses=1]
+	br i1 %38, label %bb205, label %bb311
+
+bb205:		; preds = %bb202
+	icmp sgt i32 %p_46, 214		; <i1>:39 [#uses=1]
+	zext i1 %39 to i32		; <i32>:40 [#uses=2]
+	tail call i32 @func_128( i32 %40 ) nounwind 		; <i32>:41 [#uses=0]
+	icmp sgt i32 %p_46, 65532		; <i1>:42 [#uses=1]
+	zext i1 %42 to i16		; <i16>:43 [#uses=1]
+	tail call i32 @func_74( i16 zeroext  23618, i8 zeroext  -29, i16 zeroext  %43, i16 zeroext  1 ) nounwind 		; <i32>:44 [#uses=2]
+	tail call i32 @func_103( i16 zeroext  -869 ) nounwind 		; <i32>:45 [#uses=0]
+	udiv i32 %44, 34162		; <i32>:46 [#uses=1]
+	icmp ult i32 %44, 34162		; <i1>:47 [#uses=1]
+	%.0331 = select i1 %47, i32 1, i32 %46		; <i32> [#uses=1]
+	urem i32 293685862, %.0331		; <i32>:48 [#uses=1]
+	tail call i32 @func_112( i32 %p_52, i16 zeroext  1 ) nounwind 		; <i32>:49 [#uses=0]
+	icmp eq i32 %p_52, 0		; <i1>:50 [#uses=2]
+	br i1 %50, label %bb222, label %bb215
+
+bb215:		; preds = %bb205
+	zext i16 %p_48 to i32		; <i32>:51 [#uses=1]
+	icmp eq i16 %p_48, 0		; <i1>:52 [#uses=1]
+	%.0329 = select i1 %52, i32 1, i32 %51		; <i32> [#uses=1]
+	udiv i32 -1, %.0329		; <i32>:53 [#uses=1]
+	icmp eq i32 %53, 0		; <i1>:54 [#uses=1]
+	br i1 %54, label %bb222, label %bb223
+
+bb222:		; preds = %bb215, %bb205
+	br label %bb223
+
+bb223:		; preds = %bb222, %bb215
+	%iftmp.437.0 = phi i32 [ 0, %bb222 ], [ 1, %bb215 ]		; <i32> [#uses=1]
+	load i32* @g_91, align 4		; <i32>:55 [#uses=3]
+	tail call i32 @func_103( i16 zeroext  4 ) nounwind 		; <i32>:56 [#uses=0]
+	tail call i32 @func_112( i32 0, i16 zeroext  -31374 ) nounwind 		; <i32>:57 [#uses=0]
+	load i32* @g_197, align 4		; <i32>:58 [#uses=1]
+	tail call i32 @func_124( i32 28156 ) nounwind 		; <i32>:59 [#uses=1]
+	load i32* @g_260, align 4		; <i32>:60 [#uses=1]
+	load i32* @g_43, align 4		; <i32>:61 [#uses=1]
+	xor i32 %61, %60		; <i32>:62 [#uses=1]
+	mul i32 %62, %59		; <i32>:63 [#uses=1]
+	trunc i32 %63 to i8		; <i8>:64 [#uses=1]
+	trunc i32 %58 to i16		; <i16>:65 [#uses=1]
+	tail call i32 @func_74( i16 zeroext  0, i8 zeroext  %64, i16 zeroext  %65, i16 zeroext  0 ) nounwind 		; <i32>:66 [#uses=2]
+	icmp slt i32 %66, 0		; <i1>:67 [#uses=1]
+	icmp slt i32 %55, 0		; <i1>:68 [#uses=1]
+	icmp ugt i32 %55, 31		; <i1>:69 [#uses=1]
+	or i1 %68, %69		; <i1>:70 [#uses=1]
+	%or.cond352 = or i1 %70, %67		; <i1> [#uses=1]
+	select i1 %or.cond352, i32 0, i32 %55		; <i32>:71 [#uses=1]
+	%.353 = ashr i32 %66, %71		; <i32> [#uses=2]
+	load i16* @g_221, align 2		; <i16>:72 [#uses=1]
+	zext i16 %72 to i32		; <i32>:73 [#uses=1]
+	icmp ugt i32 %.353, 31		; <i1>:74 [#uses=1]
+	select i1 %74, i32 0, i32 %.353		; <i32>:75 [#uses=1]
+	%.0323 = lshr i32 %73, %75		; <i32> [#uses=1]
+	add i32 %.0323, %iftmp.437.0		; <i32>:76 [#uses=1]
+	and i32 %48, 255		; <i32>:77 [#uses=2]
+	add i32 %77, 2042556439		; <i32>:78 [#uses=1]
+	load i32* @g_207, align 4		; <i32>:79 [#uses=2]
+	icmp ugt i32 %79, 31		; <i1>:80 [#uses=1]
+	select i1 %80, i32 0, i32 %79		; <i32>:81 [#uses=1]
+	%.0320 = lshr i32 %77, %81		; <i32> [#uses=1]
+	icmp ne i32 %78, %.0320		; <i1>:82 [#uses=1]
+	zext i1 %82 to i8		; <i8>:83 [#uses=1]
+	tail call i32 @func_25( i8 zeroext  %83 ) nounwind 		; <i32>:84 [#uses=1]
+	xor i32 %84, 1		; <i32>:85 [#uses=1]
+	load i32* @g_197, align 4		; <i32>:86 [#uses=1]
+	add i32 %86, 1		; <i32>:87 [#uses=1]
+	add i32 %87, %85		; <i32>:88 [#uses=1]
+	icmp ugt i32 %76, %88		; <i1>:89 [#uses=1]
+	br i1 %89, label %bb241, label %bb311
+
+bb241:		; preds = %bb223
+	store i16 -9, i16* @g_221, align 2
+	udiv i32 %p_52, 1538244727		; <i32>:90 [#uses=1]
+	load i32* @g_207, align 4		; <i32>:91 [#uses=1]
+	sub i32 %91, %90		; <i32>:92 [#uses=1]
+	load i32* @g_14, align 4		; <i32>:93 [#uses=1]
+	trunc i32 %93 to i16		; <i16>:94 [#uses=1]
+	trunc i32 %p_46 to i16		; <i16>:95 [#uses=2]
+	sub i16 %94, %95		; <i16>:96 [#uses=1]
+	load i32* @g_197, align 4		; <i32>:97 [#uses=1]
+	trunc i32 %97 to i16		; <i16>:98 [#uses=1]
+	tail call i32 @func_55( i32 -346178830, i16 zeroext  %98, i16 zeroext  %95 ) nounwind 		; <i32>:99 [#uses=0]
+	zext i16 %p_48 to i32		; <i32>:100 [#uses=1]
+	load i8* @g_247, align 1		; <i8>:101 [#uses=1]
+	zext i8 %101 to i32		; <i32>:102 [#uses=1]
+	sub i32 %100, %102		; <i32>:103 [#uses=1]
+	tail call i32 @func_55( i32 %103, i16 zeroext  -2972, i16 zeroext  %96 ) nounwind 		; <i32>:104 [#uses=0]
+	xor i32 %92, 2968		; <i32>:105 [#uses=1]
+	load i32* @g_197, align 4		; <i32>:106 [#uses=1]
+	icmp ugt i32 %105, %106		; <i1>:107 [#uses=1]
+	zext i1 %107 to i32		; <i32>:108 [#uses=1]
+	store i32 %108, i32* @g_33, align 4
+	br label %bb248
+
+bb248:		; preds = %bb284, %bb241
+	%p_49_addr.1.reg2mem.0 = phi i32 [ 0, %bb241 ], [ %134, %bb284 ]		; <i32> [#uses=2]
+	%p_48_addr.2.reg2mem.0 = phi i16 [ %p_48, %bb241 ], [ %p_48_addr.1, %bb284 ]		; <i16> [#uses=1]
+	%p_46_addr.1.reg2mem.0 = phi i32 [ %p_46, %bb241 ], [ %133, %bb284 ]		; <i32> [#uses=1]
+	%p_45_addr.1.reg2mem.0 = phi i32 [ %p_45, %bb241 ], [ %p_45_addr.0, %bb284 ]		; <i32> [#uses=2]
+	tail call i32 @func_63( i16 signext  1 ) nounwind 		; <i32>:109 [#uses=1]
+	icmp eq i32 %109, 0		; <i1>:110 [#uses=1]
+	br i1 %110, label %bb272.thread, label %bb255.thread
+
+bb272.thread:		; preds = %bb248
+	store i32 1, i32* @g_82
+	load i16* @g_267, align 2		; <i16>:111 [#uses=1]
+	icmp eq i16 %111, 0		; <i1>:112 [#uses=1]
+	br i1 %112, label %bb311.loopexit.split, label %bb268
+
+bb255.thread:		; preds = %bb248
+	load i32* @g_260, align 4		; <i32>:113 [#uses=1]
+	sub i32 %113, %p_52		; <i32>:114 [#uses=1]
+	and i32 %114, -20753		; <i32>:115 [#uses=1]
+	icmp ne i32 %115, 0		; <i1>:116 [#uses=1]
+	zext i1 %116 to i16		; <i16>:117 [#uses=1]
+	store i16 %117, i16* @g_221, align 2
+	br label %bb284
+
+bb268:		; preds = %bb268, %bb272.thread
+	%indvar = phi i32 [ 0, %bb272.thread ], [ %g_82.tmp.0, %bb268 ]		; <i32> [#uses=2]
+	%p_46_addr.0.reg2mem.0 = phi i32 [ %p_46_addr.1.reg2mem.0, %bb272.thread ], [ %119, %bb268 ]		; <i32> [#uses=1]
+	%g_82.tmp.0 = add i32 %indvar, 1		; <i32> [#uses=2]
+	trunc i32 %p_46_addr.0.reg2mem.0 to i16		; <i16>:118 [#uses=1]
+	and i32 %g_82.tmp.0, 28156		; <i32>:119 [#uses=1]
+	add i32 %indvar, 2		; <i32>:120 [#uses=4]
+	icmp sgt i32 %120, -1		; <i1>:121 [#uses=1]
+	br i1 %121, label %bb268, label %bb274.split
+
+bb274.split:		; preds = %bb268
+	store i32 %120, i32* @g_82
+	br i1 %50, label %bb279, label %bb276
+
+bb276:		; preds = %bb274.split
+	store i16 0, i16* @g_222, align 2
+	br label %bb284
+
+bb279:		; preds = %bb274.split
+	icmp eq i32 %120, 0		; <i1>:122 [#uses=1]
+	%.0317 = select i1 %122, i32 1, i32 %120		; <i32> [#uses=1]
+	udiv i32 -8, %.0317		; <i32>:123 [#uses=1]
+	trunc i32 %123 to i16		; <i16>:124 [#uses=1]
+	br label %bb284
+
+bb284:		; preds = %bb279, %bb276, %bb255.thread
+	%p_49_addr.0 = phi i32 [ %p_49_addr.1.reg2mem.0, %bb279 ], [ %p_49_addr.1.reg2mem.0, %bb276 ], [ 0, %bb255.thread ]		; <i32> [#uses=1]
+	%p_48_addr.1 = phi i16 [ %124, %bb279 ], [ %118, %bb276 ], [ %p_48_addr.2.reg2mem.0, %bb255.thread ]		; <i16> [#uses=1]
+	%p_45_addr.0 = phi i32 [ %p_45_addr.1.reg2mem.0, %bb279 ], [ %p_45_addr.1.reg2mem.0, %bb276 ], [ 8, %bb255.thread ]		; <i32> [#uses=3]
+	load i32* @g_43, align 4		; <i32>:125 [#uses=1]
+	trunc i32 %125 to i8		; <i8>:126 [#uses=1]
+	tail call i32 @func_116( i8 zeroext  %126 ) nounwind 		; <i32>:127 [#uses=0]
+	lshr i32 65255, %p_45_addr.0		; <i32>:128 [#uses=1]
+	icmp ugt i32 %p_45_addr.0, 31		; <i1>:129 [#uses=1]
+	%.op = lshr i32 %128, 31		; <i32> [#uses=1]
+	%.op.op = xor i32 %.op, 1		; <i32> [#uses=1]
+	%.354..lobit.not = select i1 %129, i32 1, i32 %.op.op		; <i32> [#uses=1]
+	load i16* @g_39, align 2		; <i16>:130 [#uses=1]
+	zext i16 %130 to i32		; <i32>:131 [#uses=1]
+	icmp slt i32 %.354..lobit.not, %131		; <i1>:132 [#uses=1]
+	zext i1 %132 to i32		; <i32>:133 [#uses=1]
+	add i32 %p_49_addr.0, 1		; <i32>:134 [#uses=2]
+	icmp sgt i32 %134, -1		; <i1>:135 [#uses=1]
+	br i1 %135, label %bb248, label %bb307
+
+bb307:		; preds = %bb284
+	tail call i32 @func_103( i16 zeroext  0 ) nounwind 		; <i32>:136 [#uses=0]
+	ret i32 %40
+
+bb311.loopexit.split:		; preds = %bb272.thread
+	store i32 1, i32* @g_82
+	ret i32 1
+
+bb311:		; preds = %bb223, %bb202
+	%.0 = phi i32 [ 1, %bb202 ], [ 0, %bb223 ]		; <i32> [#uses=1]
+	ret i32 %.0
+}
+
+declare i32 @func_25(i8 zeroext ) nounwind 
+
+declare i32 @func_55(i32, i16 zeroext , i16 zeroext ) nounwind 
+
+declare i32 @func_63(i16 signext ) nounwind 
+
+declare i32 @func_74(i16 zeroext , i8 zeroext , i16 zeroext , i16 zeroext ) nounwind 
+
+declare i32 @func_84(i32, i16 zeroext , i16 zeroext ) nounwind 
+
+declare i32 @func_103(i16 zeroext ) nounwind 
+
+declare i32 @func_124(i32) nounwind 
+
+declare i32 @func_128(i32) nounwind 
+
+declare i32 @func_116(i8 zeroext ) nounwind 
+
+declare i32 @func_112(i32, i16 zeroext ) nounwind 
diff --git a/final/test/Transforms/LoopStrengthReduce/pr3086.ll b/final/test/Transforms/LoopStrengthReduce/pr3086.ll
new file mode 100644
index 00000000000..599633a11fe
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/pr3086.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -loop-reduce
+; RUN: opt < %s -analyze -scalar-evolution
+; PR 3086
+
+	%struct.Cls = type { i32, i8, [2 x %struct.Cls*], [2 x %struct.Lit*] }
+	%struct.Lit = type { i8 }
+
+define fastcc i64 @collect_clauses() nounwind {
+entry:
+	br label %bb11
+
+bb5:		; preds = %bb9
+	%0 = load %struct.Lit** %storemerge, align 8		; <%struct.Lit*> [#uses=0]
+	%indvar.next8 = add i64 %storemerge.rec, 1		; <i64> [#uses=1]
+	br label %bb9
+
+bb9:		; preds = %bb22, %bb5
+	%storemerge.rec = phi i64 [ %indvar.next8, %bb5 ], [ 0, %bb22 ]		; <i64> [#uses=2]
+	%storemerge = getelementptr %struct.Lit** null, i64 %storemerge.rec		; <%struct.Lit**> [#uses=2]
+	%1 = icmp ugt %struct.Lit** null, %storemerge		; <i1> [#uses=1]
+	br i1 %1, label %bb5, label %bb22
+
+bb11:		; preds = %bb22, %entry
+	%2 = load %struct.Cls** null, align 8		; <%struct.Cls*> [#uses=0]
+	br label %bb22
+
+bb22:		; preds = %bb11, %bb9
+	br i1 false, label %bb11, label %bb9
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/pr3399.ll b/final/test/Transforms/LoopStrengthReduce/pr3399.ll
new file mode 100644
index 00000000000..b809007fea8
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/pr3399.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -loop-reduce | llvm-dis
+; PR3399
+
+@g_53 = external global i32		; <i32*> [#uses=1]
+
+define i32 @foo() nounwind {
+bb5.thread:
+	br label %bb
+
+bb:		; preds = %bb5, %bb5.thread
+	%indvar = phi i32 [ 0, %bb5.thread ], [ %indvar.next, %bb5 ]		; <i32> [#uses=2]
+	br i1 false, label %bb5, label %bb1
+
+bb1:		; preds = %bb
+	%l_2.0.reg2mem.0 = sub i32 0, %indvar		; <i32> [#uses=1]
+	%0 = volatile load i32* @g_53, align 4		; <i32> [#uses=1]
+	%1 = trunc i32 %l_2.0.reg2mem.0 to i16		; <i16> [#uses=1]
+	%2 = trunc i32 %0 to i16		; <i16> [#uses=1]
+	%3 = mul i16 %2, %1		; <i16> [#uses=1]
+	%4 = icmp eq i16 %3, 0		; <i1> [#uses=1]
+	br i1 %4, label %bb7, label %bb2
+
+bb2:		; preds = %bb2, %bb1
+	br label %bb2
+
+bb5:		; preds = %bb
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br label %bb
+
+bb7:		; preds = %bb1
+	ret i32 1
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/pr3571.ll b/final/test/Transforms/LoopStrengthReduce/pr3571.ll
new file mode 100644
index 00000000000..a23e4db4970
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/pr3571.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-reduce | llvm-dis
+; PR3571
+
+target triple = "i386-pc-mingw32"
+define void @_ZNK18qdesigner_internal10TreeWidget12drawBranchesEP8QPainterRK5QRectRK11QModelIndex() nounwind {
+entry:
+	br label %_ZNK11QModelIndex7isValidEv.exit.i
+
+bb.i:		; preds = %_ZNK11QModelIndex7isValidEv.exit.i
+	%indvar.next = add i32 %result.0.i, 1		; <i32> [#uses=1]
+	br label %_ZNK11QModelIndex7isValidEv.exit.i
+
+_ZNK11QModelIndex7isValidEv.exit.i:		; preds = %bb.i, %entry
+	%result.0.i = phi i32 [ 0, %entry ], [ %indvar.next, %bb.i ]		; <i32> [#uses=2]
+	%0 = load i32** null, align 4		; <%struct.QAbstractItemDelegate*> [#uses=0]
+	br i1 false, label %_ZN18qdesigner_internalL5levelEP18QAbstractItemModelRK11QModelIndex.exit, label %bb.i
+
+_ZN18qdesigner_internalL5levelEP18QAbstractItemModelRK11QModelIndex.exit:		; preds = %_ZNK11QModelIndex7isValidEv.exit.i
+	%1 = call i32 @_ZNK9QTreeView11indentationEv(i32* null) nounwind		; <i32> [#uses=1]
+	%2 = mul i32 %1, %result.0.i		; <i32> [#uses=1]
+	%3 = add i32 %2, -2		; <i32> [#uses=1]
+	%4 = add i32 %3, 0		; <i32> [#uses=1]
+	store i32 %4, i32* null, align 8
+	unreachable
+}
+
+declare i32 @_ZNK9QTreeView11indentationEv(i32*)
diff --git a/final/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/final/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
new file mode 100644
index 00000000000..59f14fcd1ce
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -analyze -iv-users | grep {\{1,+,3,+,2\}<%loop> (post-inc with loop %loop)}
+
+; The value of %r is dependent on a polynomial iteration expression.
+
+define i64 @foo(i64 %n) {
+entry:
+  br label %loop
+
+loop:
+  %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %loop ]
+  %indvar.next = add i64 %indvar, 1
+  %c = icmp eq i64 %indvar.next, %n
+  br i1 %c, label %exit, label %loop
+
+exit:
+  %r = mul i64 %indvar.next, %indvar.next
+  ret i64 %r
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/related_indvars.ll b/final/test/Transforms/LoopStrengthReduce/related_indvars.ll
new file mode 100644
index 00000000000..12942bf10a5
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/related_indvars.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-reduce -S | grep phi | count 1
+
+; This should only result in one PHI node!
+
+; void foo(double *D, double *E, double F) {
+;   while (D != E)
+;     *D++ = F;
+; }
+
+define void @foo(double* %D, double* %E, double %F) nounwind {
+entry:
+	%tmp.24 = icmp eq double* %D, %E		; <i1> [#uses=1]
+	br i1 %tmp.24, label %return, label %no_exit
+no_exit:		; preds = %no_exit, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %no_exit ]		; <i32> [#uses=2]
+	%D_addr.0.0.rec = bitcast i32 %indvar to i32		; <i32> [#uses=2]
+	%D_addr.0.0 = getelementptr double* %D, i32 %D_addr.0.0.rec		; <double*> [#uses=1]
+	%inc.rec = add i32 %D_addr.0.0.rec, 1		; <i32> [#uses=1]
+	%inc = getelementptr double* %D, i32 %inc.rec		; <double*> [#uses=1]
+	store double %F, double* %D_addr.0.0
+	%tmp.2 = icmp eq double* %inc, %E		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp.2, label %return, label %no_exit
+return:		; preds = %no_exit, %entry
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopStrengthReduce/remove_indvar.ll b/final/test/Transforms/LoopStrengthReduce/remove_indvar.ll
new file mode 100644
index 00000000000..bb395324d4e
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/remove_indvar.ll
@@ -0,0 +1,21 @@
+; Check that this test makes INDVAR and related stuff dead.
+; RUN: opt < %s -loop-reduce -S | not grep INDVAR
+
+declare i1 @pred()
+
+define void @test(i32* %P) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+        %i = phi i32 [ 0, %0 ], [ %i.next, %Loop ]
+	%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ]		; <i32> [#uses=2]
+	%STRRED = getelementptr i32* %P, i32 %INDVAR		; <i32*> [#uses=1]
+	store i32 0, i32* %STRRED
+	%INDVAR2 = add i32 %INDVAR, 1		; <i32> [#uses=1]
+        %i.next = add i32 %i, 1
+	%cond = call i1 @pred( )		; <i1> [#uses=1]
+	br i1 %cond, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopStrengthReduce/share_code_in_preheader.ll b/final/test/Transforms/LoopStrengthReduce/share_code_in_preheader.ll
new file mode 100644
index 00000000000..412a716bc43
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/share_code_in_preheader.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-reduce -S | grep mul | count 1
+; LSR should not make two copies of the Q*L expression in the preheader!
+
+define i8 @test(i8* %A, i8* %B, i32 %L, i32 %Q, i32 %N.s) {
+entry:
+	%tmp.6 = mul i32 %Q, %L		; <i32> [#uses=1]
+	%N = bitcast i32 %N.s to i32		; <i32> [#uses=1]
+	br label %no_exit
+no_exit:		; preds = %no_exit, %entry
+	%indvar.ui = phi i32 [ 0, %entry ], [ %indvar.next, %no_exit ]		; <i32> [#uses=2]
+	%Sum.0.0 = phi i8 [ 0, %entry ], [ %tmp.21, %no_exit ]		; <i8> [#uses=1]
+	%indvar = bitcast i32 %indvar.ui to i32		; <i32> [#uses=1]
+	%N_addr.0.0 = sub i32 %N.s, %indvar		; <i32> [#uses=1]
+	%tmp.8 = add i32 %N_addr.0.0, %tmp.6		; <i32> [#uses=2]
+	%tmp.9 = getelementptr i8* %A, i32 %tmp.8		; <i8*> [#uses=1]
+	%tmp.10 = load i8* %tmp.9		; <i8> [#uses=1]
+	%tmp.17 = getelementptr i8* %B, i32 %tmp.8		; <i8*> [#uses=1]
+	%tmp.18 = load i8* %tmp.17		; <i8> [#uses=1]
+	%tmp.19 = sub i8 %tmp.10, %tmp.18		; <i8> [#uses=1]
+	%tmp.21 = add i8 %tmp.19, %Sum.0.0		; <i8> [#uses=2]
+	%indvar.next = add i32 %indvar.ui, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %loopexit, label %no_exit
+loopexit:		; preds = %no_exit
+	ret i8 %tmp.21
+}
+
diff --git a/final/test/Transforms/LoopStrengthReduce/share_ivs.ll b/final/test/Transforms/LoopStrengthReduce/share_ivs.ll
new file mode 100644
index 00000000000..0459bc849bf
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/share_ivs.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -loop-reduce -S | grep phi | count 1
+
+; This testcase should have ONE stride 18 indvar, the other use should have a
+; loop invariant value (B) added to it inside of the loop, instead of having
+; a whole indvar based on B for it.
+
+declare i1 @cond(i32)
+
+define void @test(i32 %B) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%IV = phi i32 [ 0, %0 ], [ %IVn, %Loop ]		; <i32> [#uses=3]
+	%C = mul i32 %IV, 18		; <i32> [#uses=1]
+	%D = mul i32 %IV, 18		; <i32> [#uses=1]
+	%E = add i32 %D, %B		; <i32> [#uses=1]
+	%cnd = call i1 @cond( i32 %E )		; <i1> [#uses=1]
+	call i1 @cond( i32 %C )		; <i1>:1 [#uses=0]
+	%IVn = add i32 %IV, 1		; <i32> [#uses=1]
+	br i1 %cnd, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopStrengthReduce/uglygep.ll b/final/test/Transforms/LoopStrengthReduce/uglygep.ll
new file mode 100644
index 00000000000..8af5cf1dfd7
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/uglygep.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -loop-reduce -S | not grep uglygep
+
+; LSR shouldn't consider %t8 to be an interesting user of %t6, and it
+; should be able to form pretty GEPs.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+define void @Z4() nounwind {
+bb:
+  br label %bb3
+
+bb1:                                              ; preds = %bb3
+  br i1 undef, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb1
+  %t = add i64 %t4, 1                         ; <i64> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %t4 = phi i64 [ %t, %bb2 ], [ 0, %bb ]      ; <i64> [#uses=3]
+  br label %bb1
+
+bb10:                                             ; preds = %bb9
+  %t7 = icmp eq i64 %t4, 0                    ; <i1> [#uses=1]
+  %t3 = add i64 %t4, 16                     ; <i64> [#uses=1]
+  br label %bb14
+
+bb14:                                             ; preds = %bb14, %bb10
+  %t2 = getelementptr inbounds i8* undef, i64 %t4 ; <i8*> [#uses=1]
+  store i8 undef, i8* %t2
+  %t6 = load float** undef
+  %t8 = bitcast float* %t6 to i8*              ; <i8*> [#uses=1]
+  %t9 = getelementptr inbounds i8* %t8, i64 %t3 ; <i8*> [#uses=1]
+  store i8 undef, i8* %t9
+  br label %bb14
+}
+
+define fastcc void @TransformLine() nounwind {
+bb:
+  br label %loop0
+
+loop0:                                            ; preds = %loop0, %bb
+  %i0 = phi i32 [ %i0.next, %loop0 ], [ 0, %bb ]  ; <i32> [#uses=2]
+  %i0.next = add i32 %i0, 1                       ; <i32> [#uses=1]
+  br i1 false, label %loop0, label %bb0
+
+bb0:                                              ; preds = %loop0
+  br label %loop1
+
+loop1:                                            ; preds = %bb5, %bb0
+  %i1 = phi i32 [ 0, %bb0 ], [ %i1.next, %bb5 ]   ; <i32> [#uses=4]
+  %t0 = add i32 %i0, %i1                          ; <i32> [#uses=1]
+  br i1 false, label %bb2, label %bb6
+
+bb2:                                              ; preds = %loop1
+  br i1 true, label %bb6, label %bb5
+
+bb5:                                              ; preds = %bb2
+  %i1.next = add i32 %i1, 1                       ; <i32> [#uses=1]
+  br i1 true, label %bb6, label %loop1
+
+bb6:                                              ; preds = %bb5, %bb2, %loop1
+  %p8 = phi i32 [ %t0, %bb5 ], [ undef, %loop1 ], [ undef, %bb2 ] ; <i32> [#uses=0]
+  %p9 = phi i32 [ undef, %bb5 ], [ %i1, %loop1 ], [ %i1, %bb2 ] ; <i32> [#uses=0]
+  unreachable
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll b/final/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll
new file mode 100644
index 00000000000..5ed37dd6a24
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/use_postinc_value_outside_loop.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -loop-reduce -S | \
+; RUN:   grep {add i32 %indvar630.ui, 1}
+;
+; Make sure that the use of the IV outside of the loop (the store) uses the 
+; post incremented value of the IV, not the preincremented value.  This 
+; prevents the loop from having to keep the post and pre-incremented value
+; around for the duration of the loop, adding a copy and an extra register
+; to the loop.
+
+declare i1 @pred(i32)
+
+define void @test([700 x i32]* %nbeaux_.0__558, i32* %i_.16574) {
+then.0:
+	br label %no_exit.2
+no_exit.2:		; preds = %no_exit.2, %then.0
+	%indvar630.ui = phi i32 [ 0, %then.0 ], [ %indvar.next631, %no_exit.2 ]		; <i32> [#uses=3]
+	%indvar630 = bitcast i32 %indvar630.ui to i32		; <i32> [#uses=2]
+	%gep.upgrd.1 = zext i32 %indvar630.ui to i64		; <i64> [#uses=1]
+	%tmp.38 = getelementptr [700 x i32]* %nbeaux_.0__558, i32 0, i64 %gep.upgrd.1		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp.38
+	%inc.2 = add i32 %indvar630, 2		; <i32> [#uses=1]
+	%tmp.34 = call i1 @pred( i32 %indvar630 )		; <i1> [#uses=1]
+	%indvar.next631 = add i32 %indvar630.ui, 1		; <i32> [#uses=1]
+	br i1 %tmp.34, label %no_exit.2, label %loopexit.2.loopexit
+loopexit.2.loopexit:		; preds = %no_exit.2
+	store i32 %inc.2, i32* %i_.16574
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll b/final/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll
new file mode 100644
index 00000000000..0a9fab0d5ea
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/var_stride_used_by_compare.ll
@@ -0,0 +1,41 @@
+; Base should not be i*3, it should be i*2.
+; RUN: opt < %s -loop-reduce -S | \
+; RUN:   not grep {mul.*%i, 3}
+
+; Indvar should not start at zero:
+; RUN: opt < %s -loop-reduce -S | \
+; RUN:   not grep {phi i32 .* 0}
+; END.
+
+; mul uint %i, 3
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8"
+@flags2 = external global [8193 x i8], align 32		; <[8193 x i8]*> [#uses=1]
+
+define void @foo(i32 %k, i32 %i.s) {
+entry:
+	%i = bitcast i32 %i.s to i32		; <i32> [#uses=2]
+	%k_addr.012 = shl i32 %i.s, 1		; <i32> [#uses=1]
+	%tmp14 = icmp sgt i32 %k_addr.012, 8192		; <i1> [#uses=1]
+	br i1 %tmp14, label %return, label %bb.preheader
+bb.preheader:		; preds = %entry
+	%tmp. = shl i32 %i, 1		; <i32> [#uses=1]
+	br label %bb
+bb:		; preds = %bb, %bb.preheader
+	%indvar = phi i32 [ %indvar.next, %bb ], [ 0, %bb.preheader ]		; <i32> [#uses=2]
+	%tmp.15 = mul i32 %indvar, %i		; <i32> [#uses=1]
+	%tmp.16 = add i32 %tmp.15, %tmp.		; <i32> [#uses=2]
+	%k_addr.0.0 = bitcast i32 %tmp.16 to i32		; <i32> [#uses=1]
+	%gep.upgrd.1 = zext i32 %tmp.16 to i64		; <i64> [#uses=1]
+	%tmp = getelementptr [8193 x i8]* @flags2, i32 0, i64 %gep.upgrd.1		; <i8*> [#uses=1]
+	store i8 0, i8* %tmp
+	%k_addr.0 = add i32 %k_addr.0.0, %i.s		; <i32> [#uses=1]
+	%tmp.upgrd.2 = icmp sgt i32 %k_addr.0, 8192		; <i1> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
+	br i1 %tmp.upgrd.2, label %return.loopexit, label %bb
+return.loopexit:		; preds = %bb
+	br label %return
+return:		; preds = %return.loopexit, %entry
+	ret void
+}
diff --git a/final/test/Transforms/LoopStrengthReduce/variable_stride.ll b/final/test/Transforms/LoopStrengthReduce/variable_stride.ll
new file mode 100644
index 00000000000..7c0f053e4c3
--- /dev/null
+++ b/final/test/Transforms/LoopStrengthReduce/variable_stride.ll
@@ -0,0 +1,18 @@
+; Check that variable strides are reduced to adds instead of multiplies.
+; RUN: opt < %s -loop-reduce -S | not grep mul
+
+declare i1 @pred(i32)
+
+define void @test([10000 x i32]* %P, i32 %STRIDE) {
+; <label>:0
+	br label %Loop
+Loop:		; preds = %Loop, %0
+	%INDVAR = phi i32 [ 0, %0 ], [ %INDVAR2, %Loop ]		; <i32> [#uses=2]
+	%Idx = mul i32 %INDVAR, %STRIDE		; <i32> [#uses=1]
+	%cond = call i1 @pred( i32 %Idx )		; <i1> [#uses=1]
+	%INDVAR2 = add i32 %INDVAR, 1		; <i32> [#uses=1]
+	br i1 %cond, label %Loop, label %Out
+Out:		; preds = %Loop
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopUnroll/2004-05-13-DontUnrollTooMuch.ll b/final/test/Transforms/LoopUnroll/2004-05-13-DontUnrollTooMuch.ll
new file mode 100644
index 00000000000..3141bf1900c
--- /dev/null
+++ b/final/test/Transforms/LoopUnroll/2004-05-13-DontUnrollTooMuch.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -loop-unroll -disable-output
+
+define i32 @main() {
+entry:
+	br label %no_exit
+no_exit:		; preds = %no_exit, %entry
+	%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %no_exit ]		; <i32> [#uses=1]
+	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=2]
+	%exitcond = icmp ne i32 %indvar.next, -2147483648		; <i1> [#uses=1]
+	br i1 %exitcond, label %no_exit, label %loopexit
+loopexit:		; preds = %no_exit
+	ret i32 0
+}
+
diff --git a/final/test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll b/final/test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll
new file mode 100644
index 00000000000..374f46d10cb
--- /dev/null
+++ b/final/test/Transforms/LoopUnroll/2005-03-06-BadLoopInfoUpdate.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -loop-unroll -loop-simplify -disable-output
+
+define void @print_board() {
+entry:
+	br label %no_exit.1
+no_exit.1:		; preds = %cond_false.2, %entry
+	br label %no_exit.2
+no_exit.2:		; preds = %no_exit.2, %no_exit.1
+	%indvar1 = phi i32 [ 0, %no_exit.1 ], [ %indvar.next2, %no_exit.2 ]		; <i32> [#uses=1]
+	%indvar.next2 = add i32 %indvar1, 1		; <i32> [#uses=2]
+	%exitcond3 = icmp ne i32 %indvar.next2, 7		; <i1> [#uses=1]
+	br i1 %exitcond3, label %no_exit.2, label %loopexit.2
+loopexit.2:		; preds = %no_exit.2
+	br i1 false, label %cond_true.2, label %cond_false.2
+cond_true.2:		; preds = %loopexit.2
+	ret void
+cond_false.2:		; preds = %loopexit.2
+	br i1 false, label %no_exit.1, label %loopexit.1
+loopexit.1:		; preds = %cond_false.2
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopUnroll/2006-08-24-MultiBlockLoop.ll b/final/test/Transforms/LoopUnroll/2006-08-24-MultiBlockLoop.ll
new file mode 100644
index 00000000000..8219a0c2305
--- /dev/null
+++ b/final/test/Transforms/LoopUnroll/2006-08-24-MultiBlockLoop.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -loop-unroll -S | grep bb72.2
+
+define void @vorbis_encode_noisebias_setup() {
+entry:
+	br label %cond_true.outer
+cond_true.outer:		; preds = %bb72, %entry
+	%indvar1.ph = phi i32 [ 0, %entry ], [ %indvar.next2, %bb72 ]		; <i32> [#uses=1]
+	br label %bb72
+bb72:		; preds = %cond_true.outer
+	%indvar.next2 = add i32 %indvar1.ph, 1		; <i32> [#uses=2]
+	%exitcond3 = icmp eq i32 %indvar.next2, 3		; <i1> [#uses=1]
+	br i1 %exitcond3, label %cond_true138, label %cond_true.outer
+cond_true138:		; preds = %bb72
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopUnroll/2007-04-16-PhiUpdate.ll b/final/test/Transforms/LoopUnroll/2007-04-16-PhiUpdate.ll
new file mode 100644
index 00000000000..40c9ce0e50b
--- /dev/null
+++ b/final/test/Transforms/LoopUnroll/2007-04-16-PhiUpdate.ll
@@ -0,0 +1,17 @@
+; PR 1334
+; RUN: opt < %s -loop-unroll -disable-output
+
+define void @sal__math_float_manipulator_7__math__joint_array_dcv_ops__Omultiply__3([6 x float]* %agg.result) {
+entry:
+	%tmp282911 = zext i8 0 to i32		; <i32> [#uses=1]
+	br label %cond_next
+cond_next:		; preds = %cond_next, %entry
+	%indvar = phi i8 [ 0, %entry ], [ %indvar.next, %cond_next ]		; <i8> [#uses=1]
+	%indvar.next = add i8 %indvar, 1		; <i8> [#uses=2]
+	%exitcond = icmp eq i8 %indvar.next, 7		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb27, label %cond_next
+bb27:		; preds = %cond_next
+	%tmp282911.lcssa = phi i32 [ %tmp282911, %cond_next ]		; <i32> [#uses=0]
+	ret void
+}
+
diff --git a/final/test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll b/final/test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll
new file mode 100644
index 00000000000..d4c8402bd2c
--- /dev/null
+++ b/final/test/Transforms/LoopUnroll/2007-05-05-UnrollMiscomp.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -loop-unroll -S | not grep undef
+; PR1385
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+        %struct.__mpz_struct = type { i32, i32, i32* }
+
+
+define void @Foo(%struct.__mpz_struct* %base) {
+entry:
+        %want = alloca [1 x %struct.__mpz_struct], align 16             ; <[1 x %struct.__mpz_struct]*> [#uses=4]
+        %want1 = getelementptr [1 x %struct.__mpz_struct]* %want, i32 0, i32 0          ; <%struct.__mpz_struct*> [#uses=1]
+        call void @__gmpz_init( %struct.__mpz_struct* %want1 )
+        %want27 = getelementptr [1 x %struct.__mpz_struct]* %want, i32 0, i32 0         ; <%struct.__mpz_struct*> [#uses=1]
+        %want3 = getelementptr [1 x %struct.__mpz_struct]* %want, i32 0, i32 0          ; <%struct.__mpz_struct*> [#uses=1]
+        %want2 = getelementptr [1 x %struct.__mpz_struct]* %want, i32 0, i32 0          ; <%struct.__mpz_struct*> [#uses=2]
+        br label %bb
+
+bb:             ; preds = %bb, %entry
+        %i.01.0 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]          ; <i32> [#uses=1]
+        %want23.0 = phi %struct.__mpz_struct* [ %want27, %entry ], [ %want2, %bb ]              ; <%struct.__mpz_struct*> [#uses=1]
+        call void @__gmpz_mul( %struct.__mpz_struct* %want23.0, %struct.__mpz_struct* %want3, %struct.__mpz_struct* %base )
+        %indvar.next = add i32 %i.01.0, 1               ; <i32> [#uses=2]
+        %exitcond = icmp ne i32 %indvar.next, 2         ; <i1> [#uses=1]
+        br i1 %exitcond, label %bb, label %bb10
+
+bb10:           ; preds = %bb
+        %want2.lcssa = phi %struct.__mpz_struct* [ %want2, %bb ]                ; <%struct.__mpz_struct*> [#uses=1]
+        call void @__gmpz_clear( %struct.__mpz_struct* %want2.lcssa )
+        ret void
+}
+
+declare void @__gmpz_init(%struct.__mpz_struct*)
+declare void @__gmpz_mul(%struct.__mpz_struct*, %struct.__mpz_struct*, %struct.__mpz_struct*)
+declare void @__gmpz_clear(%struct.__mpz_struct*)
+
diff --git a/final/test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll b/final/test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll
new file mode 100644
index 00000000000..68842a41957
--- /dev/null
+++ b/final/test/Transforms/LoopUnroll/2007-05-09-UnknownTripCount.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -loop-unroll -unroll-count=3 -S | grep bb72.2
+
+define void @foo(i32 %trips) {
+entry:
+	br label %cond_true.outer
+
+cond_true.outer:
+	%indvar1.ph = phi i32 [ 0, %entry ], [ %indvar.next2, %bb72 ]
+	br label %bb72
+
+bb72:
+	%indvar.next2 = add i32 %indvar1.ph, 1
+	%exitcond3 = icmp eq i32 %indvar.next2, %trips
+	br i1 %exitcond3, label %cond_true138, label %cond_true.outer
+
+cond_true138:
+	ret void
+}
diff --git a/final/test/Transforms/LoopUnroll/2007-11-05-Crash.ll b/final/test/Transforms/LoopUnroll/2007-11-05-Crash.ll
new file mode 100644
index 00000000000..1711f119997
--- /dev/null
+++ b/final/test/Transforms/LoopUnroll/2007-11-05-Crash.ll
@@ -0,0 +1,295 @@
+; RUN: opt < %s -disable-output -loop-unroll
+; PR1770
+; PR1947
+
+	%struct.cl_engine = type { i32, i16, i32, i8**, i8**, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
+	%struct.cl_limits = type { i32, i32, i32, i32, i16, i64 }
+	%struct.cli_ac_alt = type { i8, i8*, i16, i16, %struct.cli_ac_alt* }
+	%struct.cli_ac_node = type { i8, i8, %struct.cli_ac_patt*, %struct.cli_ac_node**, %struct.cli_ac_node* }
+	%struct.cli_ac_patt = type { i16*, i16*, i16, i16, i8, i32, i32, i8*, i8*, i32, i16, i16, i16, i16, %struct.cli_ac_alt**, i8, i16, %struct.cli_ac_patt*, %struct.cli_ac_patt* }
+	%struct.cli_bm_patt = type { i8*, i32, i8*, i8*, i8, %struct.cli_bm_patt* }
+	%struct.cli_ctx = type { i8**, i64*, %struct.cli_matcher*, %struct.cl_engine*, %struct.cl_limits*, i32, i32, i32, i32, %struct.cli_dconf* }
+	%struct.cli_dconf = type { i32, i32, i32, i32, i32, i32, i32 }
+	%struct.cli_matcher = type { i16, i8, i32*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 }
+
+declare i8* @calloc(i64, i64)
+
+define fastcc i32 @cli_scanpe(i32 %desc, %struct.cli_ctx* %ctx) {
+entry:
+	br i1 false, label %cond_next17, label %cond_true14
+
+cond_true14:		; preds = %entry
+	ret i32 0
+
+cond_next17:		; preds = %entry
+	br i1 false, label %LeafBlock, label %LeafBlock1250
+
+LeafBlock1250:		; preds = %cond_next17
+	ret i32 0
+
+LeafBlock:		; preds = %cond_next17
+	br i1 false, label %cond_next33, label %cond_true30
+
+cond_true30:		; preds = %LeafBlock
+	ret i32 0
+
+cond_next33:		; preds = %LeafBlock
+	br i1 false, label %cond_next90, label %cond_true42
+
+cond_true42:		; preds = %cond_next33
+	ret i32 0
+
+cond_next90:		; preds = %cond_next33
+	br i1 false, label %cond_next100, label %cond_true97
+
+cond_true97:		; preds = %cond_next90
+	ret i32 0
+
+cond_next100:		; preds = %cond_next90
+	br i1 false, label %cond_next109, label %cond_true106
+
+cond_true106:		; preds = %cond_next100
+	ret i32 0
+
+cond_next109:		; preds = %cond_next100
+	br i1 false, label %cond_false, label %cond_true118
+
+cond_true118:		; preds = %cond_next109
+	ret i32 0
+
+cond_false:		; preds = %cond_next109
+	br i1 false, label %NodeBlock1482, label %cond_true126
+
+cond_true126:		; preds = %cond_false
+	ret i32 0
+
+NodeBlock1482:		; preds = %cond_false
+	br i1 false, label %cond_next285, label %NodeBlock1480
+
+NodeBlock1480:		; preds = %NodeBlock1482
+	ret i32 0
+
+cond_next285:		; preds = %NodeBlock1482
+	br i1 false, label %cond_next320, label %cond_true294
+
+cond_true294:		; preds = %cond_next285
+	ret i32 0
+
+cond_next320:		; preds = %cond_next285
+	br i1 false, label %LeafBlock1491, label %LeafBlock1493
+
+LeafBlock1493:		; preds = %cond_next320
+	ret i32 0
+
+LeafBlock1491:		; preds = %cond_next320
+	br i1 false, label %cond_true400, label %cond_true378
+
+cond_true378:		; preds = %LeafBlock1491
+	ret i32 1
+
+cond_true400:		; preds = %LeafBlock1491
+	br i1 false, label %cond_next413, label %cond_true406
+
+cond_true406:		; preds = %cond_true400
+	ret i32 0
+
+cond_next413:		; preds = %cond_true400
+	br i1 false, label %cond_next429, label %cond_true424
+
+cond_true424:		; preds = %cond_next413
+	ret i32 0
+
+cond_next429:		; preds = %cond_next413
+	br i1 false, label %NodeBlock1557, label %NodeBlock1579
+
+NodeBlock1579:		; preds = %cond_next429
+	ret i32 0
+
+NodeBlock1557:		; preds = %cond_next429
+	br i1 false, label %LeafBlock1543, label %NodeBlock1555
+
+NodeBlock1555:		; preds = %NodeBlock1557
+	ret i32 0
+
+LeafBlock1543:		; preds = %NodeBlock1557
+	br i1 false, label %cond_next870, label %cond_next663
+
+cond_next663:		; preds = %LeafBlock1543
+	ret i32 0
+
+cond_next870:		; preds = %LeafBlock1543
+	br i1 false, label %cond_true1012, label %cond_true916
+
+cond_true916:		; preds = %cond_next870
+	ret i32 0
+
+cond_true1012:		; preds = %cond_next870
+	br i1 false, label %cond_next3849, label %cond_true2105
+
+cond_true2105:		; preds = %cond_true1012
+	ret i32 0
+
+cond_next3849:		; preds = %cond_true1012
+	br i1 false, label %cond_next4378, label %bb6559
+
+bb3862:		; preds = %cond_next4385
+	br i1 false, label %cond_false3904, label %cond_true3876
+
+cond_true3876:		; preds = %bb3862
+	ret i32 0
+
+cond_false3904:		; preds = %bb3862
+	br i1 false, label %cond_next4003, label %cond_true3935
+
+cond_true3935:		; preds = %cond_false3904
+	ret i32 0
+
+cond_next4003:		; preds = %cond_false3904
+	br i1 false, label %cond_next5160, label %cond_next4015
+
+cond_next4015:		; preds = %cond_next4003
+	ret i32 0
+
+cond_next4378:		; preds = %cond_next3849
+	br i1 false, label %cond_next4385, label %bb4393
+
+cond_next4385:		; preds = %cond_next4378
+	br i1 false, label %bb3862, label %bb4393
+
+bb4393:		; preds = %cond_next4385, %cond_next4378
+	ret i32 0
+
+cond_next5160:		; preds = %cond_next4003
+	br i1 false, label %bb5188, label %bb6559
+
+bb5188:		; preds = %cond_next5160
+	br i1 false, label %cond_next5285, label %cond_true5210
+
+cond_true5210:		; preds = %bb5188
+	ret i32 0
+
+cond_next5285:		; preds = %bb5188
+	br i1 false, label %cond_true5302, label %cond_true5330
+
+cond_true5302:		; preds = %cond_next5285
+	br i1 false, label %bb7405, label %bb7367
+
+cond_true5330:		; preds = %cond_next5285
+	ret i32 0
+
+bb6559:		; preds = %cond_next5160, %cond_next3849
+	ret i32 0
+
+bb7367:		; preds = %cond_true5302
+	ret i32 0
+
+bb7405:		; preds = %cond_true5302
+	br i1 false, label %cond_next8154, label %cond_true7410
+
+cond_true7410:		; preds = %bb7405
+	ret i32 0
+
+cond_next8154:		; preds = %bb7405
+	br i1 false, label %cond_true8235, label %bb9065
+
+cond_true8235:		; preds = %cond_next8154
+	br i1 false, label %bb8274, label %bb8245
+
+bb8245:		; preds = %cond_true8235
+	ret i32 0
+
+bb8274:		; preds = %cond_true8235
+	br i1 false, label %cond_next8358, label %cond_true8295
+
+cond_true8295:		; preds = %bb8274
+	ret i32 0
+
+cond_next8358:		; preds = %bb8274
+	br i1 false, label %cond_next.i509, label %cond_true8371
+
+cond_true8371:		; preds = %cond_next8358
+	ret i32 -123
+
+cond_next.i509:		; preds = %cond_next8358
+	br i1 false, label %bb36.i, label %bb33.i
+
+bb33.i:		; preds = %cond_next.i509
+	ret i32 0
+
+bb36.i:		; preds = %cond_next.i509
+	br i1 false, label %cond_next54.i, label %cond_true51.i
+
+cond_true51.i:		; preds = %bb36.i
+	ret i32 0
+
+cond_next54.i:		; preds = %bb36.i
+	%tmp10.i.i527 = call i8* @calloc( i64 0, i64 1 )		; <i8*> [#uses=1]
+	br i1 false, label %cond_next11.i.i, label %bb132.i
+
+bb132.i:		; preds = %cond_next54.i
+	ret i32 0
+
+cond_next11.i.i:		; preds = %cond_next54.i
+	br i1 false, label %bb32.i.i545, label %cond_true1008.critedge.i
+
+bb32.i.i545:		; preds = %cond_next11.i.i
+	br i1 false, label %cond_next349.i, label %cond_true184.i
+
+cond_true184.i:		; preds = %bb32.i.i545
+	ret i32 0
+
+cond_next349.i:		; preds = %bb32.i.i545
+	br i1 false, label %cond_next535.i, label %cond_true1008.critedge1171.i
+
+cond_next535.i:		; preds = %cond_next349.i
+	br i1 false, label %cond_next569.i, label %cond_false574.i
+
+cond_next569.i:		; preds = %cond_next535.i
+	br i1 false, label %cond_next670.i, label %cond_true1008.critedge1185.i
+
+cond_false574.i:		; preds = %cond_next535.i
+	ret i32 0
+
+cond_next670.i:		; preds = %cond_next569.i
+	br i1 false, label %cond_true692.i, label %cond_next862.i
+
+cond_true692.i:		; preds = %cond_next670.i
+	br i1 false, label %cond_false742.i, label %cond_true718.i
+
+cond_true718.i:		; preds = %cond_true692.i
+	ret i32 0
+
+cond_false742.i:		; preds = %cond_true692.i
+	br i1 false, label %cond_true784.i, label %cond_next9079
+
+cond_true784.i:		; preds = %cond_next811.i, %cond_false742.i
+	%indvar1411.i.reg2mem.0 = phi i8 [ %indvar.next1412.i, %cond_next811.i ], [ 0, %cond_false742.i ]		; <i8> [#uses=1]
+	br i1 false, label %cond_true1008.critedge1190.i, label %cond_next811.i
+
+cond_next811.i:		; preds = %cond_true784.i
+	%indvar.next1412.i = add i8 %indvar1411.i.reg2mem.0, 1		; <i8> [#uses=2]
+	%tmp781.i = icmp eq i8 %indvar.next1412.i, 3		; <i1> [#uses=1]
+	br i1 %tmp781.i, label %cond_next9079, label %cond_true784.i
+
+cond_next862.i:		; preds = %cond_next670.i
+	ret i32 0
+
+cond_true1008.critedge.i:		; preds = %cond_next11.i.i
+	ret i32 0
+
+cond_true1008.critedge1171.i:		; preds = %cond_next349.i
+	ret i32 0
+
+cond_true1008.critedge1185.i:		; preds = %cond_next569.i
+	ret i32 0
+
+cond_true1008.critedge1190.i:		; preds = %cond_true784.i
+	%tmp621.i532.lcssa610 = phi i8* [ %tmp10.i.i527, %cond_true784.i ]		; <i8*> [#uses=0]
+	ret i32 0
+
+bb9065:		; preds = %cond_next8154
+	ret i32 0
+
+cond_next9079:		; preds = %cond_next811.i, %cond_false742.i
+	ret i32 0
+}
diff --git a/final/test/Transforms/LoopUnroll/basic.ll b/final/test/Transforms/LoopUnroll/basic.ll
new file mode 100644
index 00000000000..eeb3e9a57b0
--- /dev/null
+++ b/final/test/Transforms/LoopUnroll/basic.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -loop-unroll -S | FileCheck %s
+
+
+; This should not unroll since the address of the loop header is taken.
+
+; CHECK: @test1
+; CHECK: store i8* blockaddress(@test1, %l1), i8** %P
+; CHECK: l1:
+; CHECK-NEXT: phi i32
+; rdar://8287027
+define i32 @test1(i8** %P) nounwind ssp {
+entry:
+  store i8* blockaddress(@test1, %l1), i8** %P
+  br label %l1
+
+l1:                                               ; preds = %l1, %entry
+  %x.0 = phi i32 [ 0, %entry ], [ %inc, %l1 ]
+  %inc = add nsw i32 %x.0, 1
+  %exitcond = icmp eq i32 %inc, 3
+  br i1 %exitcond, label %l2, label %l1
+
+l2:                                               ; preds = %l1
+  ret i32 0
+}
diff --git a/final/test/Transforms/LoopUnroll/dg.exp b/final/test/Transforms/LoopUnroll/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/LoopUnroll/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/LoopUnroll/shifted-tripcount.ll b/final/test/Transforms/LoopUnroll/shifted-tripcount.ll
new file mode 100644
index 00000000000..a118a463172
--- /dev/null
+++ b/final/test/Transforms/LoopUnroll/shifted-tripcount.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -loop-unroll -unroll-count=2 -S | FileCheck %s
+
+; LoopUnroll should unroll this loop into one big basic block.
+
+; CHECK: for.body:
+; CHECK: %i.013 = phi i64 [ 0, %entry ], [ %tmp16.1, %for.body ]
+; CHECK: br i1 %exitcond.1, label %for.end, label %for.body
+
+define void @foo(double* nocapture %p, i64 %n) nounwind {
+entry:
+  %mul10 = shl i64 %n, 1                          ; <i64> [#uses=2]
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.013 = phi i64 [ %tmp16, %for.body ], [ 0, %entry ] ; <i64> [#uses=2]
+  %arrayidx7 = getelementptr double* %p, i64 %i.013 ; <double*> [#uses=2]
+  %tmp16 = add i64 %i.013, 1                      ; <i64> [#uses=3]
+  %arrayidx = getelementptr double* %p, i64 %tmp16 ; <double*> [#uses=1]
+  %tmp4 = load double* %arrayidx                  ; <double> [#uses=1]
+  %tmp8 = load double* %arrayidx7                 ; <double> [#uses=1]
+  %mul9 = fmul double %tmp8, %tmp4                ; <double> [#uses=1]
+  store double %mul9, double* %arrayidx7
+  %exitcond = icmp eq i64 %tmp16, %mul10          ; <i1> [#uses=1]
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
diff --git a/final/test/Transforms/LoopUnswitch/2006-06-13-SingleEntryPHI.ll b/final/test/Transforms/LoopUnswitch/2006-06-13-SingleEntryPHI.ll
new file mode 100644
index 00000000000..e0301572821
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/2006-06-13-SingleEntryPHI.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -loop-unswitch -disable-output
+
+	%struct.BLEND_MAP = type { i16, i16, i16, i32, %struct.BLEND_MAP_ENTRY* }
+	%struct.BLEND_MAP_ENTRY = type { float, i8, { [5 x float], [4 x i8] } }
+	%struct.TPATTERN = type { i16, i16, i16, i32, float, float, float, %struct.WARP*, %struct.TPATTERN*, %struct.BLEND_MAP*, { %struct.anon, [4 x i8] } }
+	%struct.TURB = type { i16, %struct.WARP*, [3 x double], i32, float, float }
+	%struct.WARP = type { i16, %struct.WARP* }
+	%struct.anon = type { float, [3 x double] }
+
+define void @Parse_Pattern() {
+entry:
+	br label %bb1096.outer20
+bb671:		; preds = %cond_true1099
+	br label %bb1096.outer23
+bb1096.outer20.loopexit:		; preds = %cond_true1099
+	%Local_Turb.0.ph24.lcssa = phi %struct.TURB* [ %Local_Turb.0.ph24, %cond_true1099 ]		; <%struct.TURB*> [#uses=1]
+	br label %bb1096.outer20
+bb1096.outer20:		; preds = %bb1096.outer20.loopexit, %entry
+	%Local_Turb.0.ph22 = phi %struct.TURB* [ undef, %entry ], [ %Local_Turb.0.ph24.lcssa, %bb1096.outer20.loopexit ]		; <%struct.TURB*> [#uses=1]
+	%tmp1098 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br label %bb1096.outer23
+bb1096.outer23:		; preds = %bb1096.outer20, %bb671
+	%Local_Turb.0.ph24 = phi %struct.TURB* [ %Local_Turb.0.ph22, %bb1096.outer20 ], [ null, %bb671 ]		; <%struct.TURB*> [#uses=2]
+	br label %bb1096
+bb1096:		; preds = %cond_true1099, %bb1096.outer23
+	br i1 %tmp1098, label %cond_true1099, label %bb1102
+cond_true1099:		; preds = %bb1096
+	switch i32 0, label %bb1096.outer20.loopexit [
+		 i32 161, label %bb671
+		 i32 359, label %bb1096
+	]
+bb1102:		; preds = %bb1096
+	%Local_Turb.0.ph24.lcssa1 = phi %struct.TURB* [ %Local_Turb.0.ph24, %bb1096 ]		; <%struct.TURB*> [#uses=0]
+	ret void
+}
diff --git a/final/test/Transforms/LoopUnswitch/2006-06-27-DeadSwitchCase.ll b/final/test/Transforms/LoopUnswitch/2006-06-27-DeadSwitchCase.ll
new file mode 100644
index 00000000000..fd4d7300b44
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/2006-06-27-DeadSwitchCase.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -loop-unswitch -disable-output
+
+define void @init_caller_save() {
+entry:
+	br label %cond_true78
+cond_next20:		; preds = %cond_true64
+	br label %bb31
+bb31:		; preds = %cond_true64, %cond_true64, %cond_next20
+	%iftmp.29.1 = phi i32 [ 0, %cond_next20 ], [ 0, %cond_true64 ], [ 0, %cond_true64 ]		; <i32> [#uses=0]
+	br label %bb54
+bb54:		; preds = %cond_true78, %bb31
+	br i1 false, label %bb75, label %cond_true64
+cond_true64:		; preds = %bb54
+	switch i32 %i.0.0, label %cond_next20 [
+		 i32 17, label %bb31
+		 i32 18, label %bb31
+	]
+bb75:		; preds = %bb54
+	%tmp74.0 = add i32 %i.0.0, 1		; <i32> [#uses=1]
+	br label %cond_true78
+cond_true78:		; preds = %bb75, %entry
+	%i.0.0 = phi i32 [ 0, %entry ], [ %tmp74.0, %bb75 ]		; <i32> [#uses=2]
+	br label %bb54
+}
+
diff --git a/final/test/Transforms/LoopUnswitch/2007-05-09-Unreachable.ll b/final/test/Transforms/LoopUnswitch/2007-05-09-Unreachable.ll
new file mode 100644
index 00000000000..468b1943069
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/2007-05-09-Unreachable.ll
@@ -0,0 +1,28 @@
+; PR1333
+; RUN: opt < %s -loop-unswitch -disable-output
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+	%struct.ada__streams__root_stream_type = type { %struct.ada__tags__dispatch_table* }
+	%struct.ada__tags__dispatch_table = type { [1 x i8*] }
+	%struct.quotes__T173s = type { i8, %struct.quotes__T173s__T174s, [2 x [1 x double]], [2 x i16], i64, i8 }
+	%struct.quotes__T173s__T174s = type { i8, i8, i8, i16, i16, [2 x [1 x double]] }
+
+define void @quotes__write_quote() {
+entry:
+	%tmp606.i = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br label %bb
+bb:		; preds = %cond_next73, %bb, %entry
+	br i1 false, label %bb51, label %bb
+bb51:		; preds = %cond_next73, %bb
+	br i1 %tmp606.i, label %quotes__bid_ask_depth_offset_matrices__get_price.exit, label %cond_true.i
+cond_true.i:		; preds = %bb51
+	unreachable
+quotes__bid_ask_depth_offset_matrices__get_price.exit:		; preds = %bb51
+	br i1 false, label %cond_next73, label %cond_true72
+cond_true72:		; preds = %quotes__bid_ask_depth_offset_matrices__get_price.exit
+	unreachable
+cond_next73:		; preds = %quotes__bid_ask_depth_offset_matrices__get_price.exit
+	br i1 false, label %bb, label %bb51
+}
+
diff --git a/final/test/Transforms/LoopUnswitch/2007-05-09-tl.ll b/final/test/Transforms/LoopUnswitch/2007-05-09-tl.ll
new file mode 100644
index 00000000000..61615d0cc52
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/2007-05-09-tl.ll
@@ -0,0 +1,95 @@
+; RUN: opt < %s -loop-unswitch -disable-output
+; PR1333
+
+define void @pp_cxx_expression() {
+entry:
+	%tmp6 = lshr i32 0, 24		; <i32> [#uses=1]
+	br label %tailrecurse
+
+tailrecurse:		; preds = %tailrecurse, %tailrecurse, %entry
+	switch i32 %tmp6, label %bb96 [
+		 i32 24, label %bb10
+		 i32 25, label %bb10
+		 i32 28, label %bb10
+		 i32 29, label %bb48
+		 i32 31, label %bb48
+		 i32 32, label %bb48
+		 i32 33, label %bb48
+		 i32 34, label %bb48
+		 i32 36, label %bb15
+		 i32 51, label %bb89
+		 i32 52, label %bb89
+		 i32 54, label %bb83
+		 i32 57, label %bb59
+		 i32 63, label %bb80
+		 i32 64, label %bb80
+		 i32 68, label %bb80
+		 i32 169, label %bb75
+		 i32 170, label %bb19
+		 i32 171, label %bb63
+		 i32 172, label %bb63
+		 i32 173, label %bb67
+		 i32 174, label %bb67
+		 i32 175, label %bb19
+		 i32 176, label %bb75
+		 i32 178, label %bb59
+		 i32 179, label %bb89
+		 i32 180, label %bb59
+		 i32 182, label %bb48
+		 i32 183, label %bb48
+		 i32 184, label %bb48
+		 i32 185, label %bb48
+		 i32 186, label %bb48
+		 i32 195, label %bb48
+		 i32 196, label %bb59
+		 i32 197, label %bb89
+		 i32 198, label %bb70
+		 i32 199, label %bb59
+		 i32 200, label %bb59
+		 i32 201, label %bb59
+		 i32 202, label %bb59
+		 i32 203, label %bb75
+		 i32 204, label %bb59
+		 i32 205, label %tailrecurse
+		 i32 210, label %tailrecurse
+	]
+
+bb10:		; preds = %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb15:		; preds = %tailrecurse
+	ret void
+
+bb19:		; preds = %tailrecurse, %tailrecurse
+	ret void
+
+bb48:		; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb59:		; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb63:		; preds = %tailrecurse, %tailrecurse
+	ret void
+
+bb67:		; preds = %tailrecurse, %tailrecurse
+	ret void
+
+bb70:		; preds = %tailrecurse
+	ret void
+
+bb75:		; preds = %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb80:		; preds = %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb83:		; preds = %tailrecurse
+	ret void
+
+bb89:		; preds = %tailrecurse, %tailrecurse, %tailrecurse, %tailrecurse
+	ret void
+
+bb96:		; preds = %tailrecurse
+	ret void
+}
diff --git a/final/test/Transforms/LoopUnswitch/2007-07-12-ExitDomInfo.ll b/final/test/Transforms/LoopUnswitch/2007-07-12-ExitDomInfo.ll
new file mode 100644
index 00000000000..bf5a61b3630
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/2007-07-12-ExitDomInfo.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -loop-unswitch -instcombine -disable-output
+
+@str3 = external constant [3 x i8]		; <[3 x i8]*> [#uses=1]
+
+define i32 @stringSearch_Clib(i32 %count) {
+entry:
+	%ttmp25 = icmp sgt i32 %count, 0		; <i1> [#uses=1]
+	br i1 %ttmp25, label %bb36.preheader, label %bb44
+
+bb36.preheader:		; preds = %entry
+	%ttmp33 = icmp slt i32 0, 250		; <i1> [#uses=1]
+	br label %bb36.outer
+
+bb36.outer:		; preds = %bb41, %bb36.preheader
+	br i1 %ttmp33, label %bb.nph, label %bb41
+
+bb.nph:		; preds = %bb36.outer
+	%ttmp8 = icmp eq i8* null, null		; <i1> [#uses=1]
+	%ttmp6 = icmp eq i8* null, null		; <i1> [#uses=1]
+	%tmp31 = call i32 @strcspn( i8* null, i8* getelementptr ([3 x i8]* @str3, i64 0, i64 0) )		; <i32> [#uses=1]
+	br i1 %ttmp8, label %cond_next, label %cond_true
+
+cond_true:		; preds = %bb.nph
+	ret i32 0
+
+cond_next:		; preds = %bb.nph
+	br i1 %ttmp6, label %cond_next28, label %cond_true20
+
+cond_true20:		; preds = %cond_next
+	ret i32 0
+
+cond_next28:		; preds = %cond_next
+	%tmp33 = add i32 %tmp31, 0		; <i32> [#uses=1]
+	br label %bb41
+
+bb41:		; preds = %cond_next28, %bb36.outer
+	%c.2.lcssa = phi i32 [ 0, %bb36.outer ], [ %tmp33, %cond_next28 ]		; <i32> [#uses=1]
+	br i1 false, label %bb36.outer, label %bb44
+
+bb44:		; preds = %bb41, %entry
+	%c.01.1 = phi i32 [ 0, %entry ], [ %c.2.lcssa, %bb41 ]		; <i32> [#uses=1]
+	ret i32 %c.01.1
+}
+
+declare i32 @strcspn(i8*, i8*)
diff --git a/final/test/Transforms/LoopUnswitch/2007-07-13-DomInfo.ll b/final/test/Transforms/LoopUnswitch/2007-07-13-DomInfo.ll
new file mode 100644
index 00000000000..5ae335bb666
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/2007-07-13-DomInfo.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-unswitch -disable-output
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	%tmp1785365 = icmp ult i32 0, 100		; <i1> [#uses=1]
+	br label %bb
+
+bb:		; preds = %cond_true, %entry
+	br i1 false, label %cond_true, label %cond_next
+
+cond_true:		; preds = %bb
+	br i1 %tmp1785365, label %bb, label %bb1788
+
+cond_next:		; preds = %bb
+	%iftmp.1.0 = select i1 false, i32 0, i32 0		; <i32> [#uses=1]
+	br i1 false, label %cond_true47, label %cond_next74
+
+cond_true47:		; preds = %cond_next
+	%tmp53 = urem i32 %iftmp.1.0, 0		; <i32> [#uses=0]
+	ret i32 0
+
+cond_next74:		; preds = %cond_next
+	ret i32 0
+
+bb1788:		; preds = %cond_true
+	ret i32 0
+}
diff --git a/final/test/Transforms/LoopUnswitch/2007-07-18-DomInfo.ll b/final/test/Transforms/LoopUnswitch/2007-07-18-DomInfo.ll
new file mode 100644
index 00000000000..dfca15403bf
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/2007-07-18-DomInfo.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -loop-unswitch -disable-output
+; PR1559
+
+target triple = "i686-pc-linux-gnu"
+	%struct.re_pattern_buffer = type { i8*, i32, i32, i32, i8*, i8*, i32, i8 }
+
+define fastcc i32 @byte_regex_compile(i8* %pattern, i32 %size, i32 %syntax, %struct.re_pattern_buffer* %bufp) {
+entry:
+        br i1 false, label %bb147, label %cond_next123
+
+cond_next123:           ; preds = %entry
+        ret i32 0
+
+bb147:          ; preds = %entry
+        switch i32 0, label %normal_char [
+                 i32 91, label %bb1734
+                 i32 92, label %bb5700
+        ]
+
+bb1734:         ; preds = %bb147
+        br label %bb1855.outer.outer
+
+cond_true1831:          ; preds = %bb1855.outer
+        br i1 %tmp1837, label %cond_next1844, label %cond_true1840
+
+cond_true1840:          ; preds = %cond_true1831
+        ret i32 0
+
+cond_next1844:          ; preds = %cond_true1831
+        br i1 false, label %bb1855.outer, label %cond_true1849
+
+cond_true1849:          ; preds = %cond_next1844
+        br label %bb1855.outer.outer
+
+bb1855.outer.outer:             ; preds = %cond_true1849, %bb1734
+        %b.10.ph.ph = phi i8* [ null, %cond_true1849 ], [ null, %bb1734 ]               ; <i8*> [#uses=1]
+        br label %bb1855.outer
+
+bb1855.outer:           ; preds = %bb1855.outer.outer, %cond_next1844
+        %b.10.ph = phi i8* [ null, %cond_next1844 ], [ %b.10.ph.ph, %bb1855.outer.outer ]               ; <i8*> [#uses=1]
+        %tmp1837 = icmp eq i8* null, null               ; <i1> [#uses=2]
+        br i1 false, label %cond_true1831, label %cond_next1915
+
+cond_next1915:          ; preds = %cond_next1961, %bb1855.outer
+        store i8* null, i8** null
+        br i1 %tmp1837, label %cond_next1929, label %cond_true1923
+
+cond_true1923:          ; preds = %cond_next1915
+        ret i32 0
+
+cond_next1929:          ; preds = %cond_next1915
+        br i1 false, label %cond_next1961, label %cond_next2009
+
+cond_next1961:          ; preds = %cond_next1929
+        %tmp1992 = getelementptr i8* %b.10.ph, i32 0            ; <i8*> [#uses=0]
+        br label %cond_next1915
+
+cond_next2009:          ; preds = %cond_next1929
+        ret i32 0
+
+bb5700:         ; preds = %bb147
+        ret i32 0
+
+normal_char:            ; preds = %bb147
+        ret i32 0
+}
diff --git a/final/test/Transforms/LoopUnswitch/2007-08-01-Dom.ll b/final/test/Transforms/LoopUnswitch/2007-08-01-Dom.ll
new file mode 100644
index 00000000000..fc92579933b
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/2007-08-01-Dom.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -licm -loop-unswitch -disable-output 
+; PR 1589
+
+      	%struct.QBasicAtomic = type { i32 }
+
+define void @_ZNK5QDate9addMonthsEi(%struct.QBasicAtomic* sret  %agg.result, %struct.QBasicAtomic* %this, i32 %nmonths) {
+entry:
+	br label %cond_true90
+
+bb16:		; preds = %cond_true90
+	br i1 false, label %bb93, label %cond_true90
+
+bb45:		; preds = %cond_true90
+	br i1 false, label %bb53, label %bb58
+
+bb53:		; preds = %bb45
+	br i1 false, label %bb93, label %cond_true90
+
+bb58:		; preds = %bb45
+	store i32 0, i32* null, align 4
+	br i1 false, label %cond_true90, label %bb93
+
+cond_true90:		; preds = %bb58, %bb53, %bb16, %entry
+	%nmonths_addr.016.1 = phi i32 [ %nmonths, %entry ], [ 0, %bb16 ], [ 0, %bb53 ], [ %nmonths_addr.016.1, %bb58 ]		; <i32> [#uses=2]
+	%tmp14 = icmp slt i32 %nmonths_addr.016.1, -11		; <i1> [#uses=1]
+	br i1 %tmp14, label %bb16, label %bb45
+
+bb93:		; preds = %bb58, %bb53, %bb16
+	ret void
+}
diff --git a/final/test/Transforms/LoopUnswitch/2007-08-01-LCSSA.ll b/final/test/Transforms/LoopUnswitch/2007-08-01-LCSSA.ll
new file mode 100644
index 00000000000..f83acaa6b0b
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/2007-08-01-LCSSA.ll
@@ -0,0 +1,55 @@
+; RUN: opt < %s -loop-unswitch -instcombine -disable-output
+	%struct.ClassDef = type { %struct.QByteArray, %struct.QByteArray, %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", i8, i8, %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QList<ArgumentDef>", %"struct.QMap<QByteArray,QByteArray>", %"struct.QList<ArgumentDef>", %"struct.QMap<QByteArray,QByteArray>", i32, i32 }
+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct.Generator = type { %struct.FILE*, %struct.ClassDef*, %"struct.QList<ArgumentDef>", %struct.QByteArray, %"struct.QList<ArgumentDef>" }
+	%struct.QBasicAtomic = type { i32 }
+	%struct.QByteArray = type { %"struct.QByteArray::Data"* }
+	%"struct.QByteArray::Data" = type { %struct.QBasicAtomic, i32, i32, i8*, [1 x i8] }
+	%"struct.QList<ArgumentDef>" = type { %"struct.QList<ArgumentDef>::._19" }
+	%"struct.QList<ArgumentDef>::._19" = type { %struct.QListData }
+	%struct.QListData = type { %"struct.QListData::Data"* }
+	%"struct.QListData::Data" = type { %struct.QBasicAtomic, i32, i32, i32, i8, [1 x i8*] }
+	%"struct.QMap<QByteArray,QByteArray>" = type { %"struct.QMap<QByteArray,QByteArray>::._56" }
+	%"struct.QMap<QByteArray,QByteArray>::._56" = type { %struct.QMapData* }
+	%struct.QMapData = type { %struct.QMapData*, [12 x %struct.QMapData*], %struct.QBasicAtomic, i32, i32, i32, i8 }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+@.str9 = external constant [1 x i8]		; <[1 x i8]*> [#uses=1]
+
+declare i32 @strcmp(i8*, i8*)
+
+define i32 @_ZN9Generator6strregEPKc(%struct.Generator* %this, i8* %s) {
+entry:
+	%s_addr.0 = select i1 false, i8* getelementptr ([1 x i8]* @.str9, i32 0, i32 0), i8* %s		; <i8*> [#uses=2]
+	%tmp122 = icmp eq i8* %s_addr.0, null		; <i1> [#uses=1]
+	br label %bb184
+
+bb55:		; preds = %bb184
+	ret i32 0
+
+bb88:		; preds = %bb184
+	br i1 %tmp122, label %bb154, label %bb128
+
+bb128:		; preds = %bb88
+	%tmp138 = call i32 @strcmp( i8* null, i8* %s_addr.0 )		; <i32> [#uses=1]
+	%iftmp.37.0.in4 = icmp eq i32 %tmp138, 0		; <i1> [#uses=1]
+	br i1 %iftmp.37.0.in4, label %bb250, label %bb166
+
+bb154:		; preds = %bb88
+	br i1 false, label %bb250, label %bb166
+
+bb166:		; preds = %bb154, %bb128
+	%tmp175 = add i32 %idx.0, 1		; <i32> [#uses=1]
+	%tmp177 = add i32 %tmp175, 0		; <i32> [#uses=1]
+	%tmp181 = add i32 %tmp177, 0		; <i32> [#uses=1]
+	%tmp183 = add i32 %i33.0, 1		; <i32> [#uses=1]
+	br label %bb184
+
+bb184:		; preds = %bb166, %entry
+	%i33.0 = phi i32 [ 0, %entry ], [ %tmp183, %bb166 ]		; <i32> [#uses=2]
+	%idx.0 = phi i32 [ 0, %entry ], [ %tmp181, %bb166 ]		; <i32> [#uses=2]
+	%tmp49 = icmp slt i32 %i33.0, 0		; <i1> [#uses=1]
+	br i1 %tmp49, label %bb88, label %bb55
+
+bb250:		; preds = %bb154, %bb128
+	ret i32 %idx.0
+}
diff --git a/final/test/Transforms/LoopUnswitch/2007-10-04-DomFrontier.ll b/final/test/Transforms/LoopUnswitch/2007-10-04-DomFrontier.ll
new file mode 100644
index 00000000000..efbb7619591
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/2007-10-04-DomFrontier.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -licm -loop-unroll -disable-output
+
+@resonant = external global i32		; <i32*> [#uses=2]
+
+define void @weightadj() {
+entry:
+	br label %bb
+
+bb:		; preds = %bb158, %entry
+	store i32 0, i32* @resonant, align 4
+	br i1 false, label %g.exit, label %bb158
+
+g.exit:		; preds = %bb68, %bb
+	br i1 false, label %bb68, label %cond_true
+
+cond_true:		; preds = %g.exit
+	store i32 1, i32* @resonant, align 4
+	br label %bb68
+
+bb68:		; preds = %cond_true, %g.exit
+	%tmp71 = icmp slt i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp71, label %g.exit, label %bb158
+
+bb158:		; preds = %bb68, %bb
+	br i1 false, label %bb, label %return
+
+return:		; preds = %bb158
+	ret void
+}
diff --git a/final/test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll b/final/test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll
new file mode 100644
index 00000000000..906c2c581e1
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/2008-06-02-DomInfo.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -loop-unswitch -instcombine -gvn -disable-output
+; PR2372
+target triple = "i386-pc-linux-gnu"
+
+define i32 @func_3(i16 signext  %p_5, i16 signext  %p_6) nounwind  {
+entry:
+	%tmp3 = icmp eq i16 %p_5, 0		; <i1> [#uses=1]
+	%tmp1314 = sext i16 %p_6 to i32		; <i32> [#uses=1]
+	%tmp28 = icmp ugt i32 %tmp1314, 3		; <i1> [#uses=1]
+	%bothcond = or i1 %tmp28, false		; <i1> [#uses=1]
+	br label %bb
+bb:		; preds = %bb54, %entry
+	br i1 %tmp3, label %bb54, label %bb5
+bb5:		; preds = %bb
+	br i1 %bothcond, label %bb54, label %bb31
+bb31:		; preds = %bb5
+	br label %bb54
+bb54:		; preds = %bb31, %bb5, %bb
+	br i1 false, label %bb64, label %bb
+bb64:		; preds = %bb54
+	%tmp6566 = sext i16 %p_6 to i32		; <i32> [#uses=1]
+	%tmp68 = tail call i32 (...)* @func_18( i32 1, i32 %tmp6566, i32 1 ) nounwind 		; <i32> [#uses=0]
+	ret i32 undef
+}
+
+declare i32 @func_18(...)
diff --git a/final/test/Transforms/LoopUnswitch/2008-06-17-DomFrontier.ll b/final/test/Transforms/LoopUnswitch/2008-06-17-DomFrontier.ll
new file mode 100644
index 00000000000..f74054a0589
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/2008-06-17-DomFrontier.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -licm -loop-unswitch -disable-output
+@g_56 = external global i16		; <i16*> [#uses=2]
+
+define i32 @func_67(i32 %p_68, i8 signext  %p_69, i8 signext  %p_71) nounwind  {
+entry:
+	br label %bb
+bb:		; preds = %bb44, %entry
+	br label %bb3
+bb3:		; preds = %bb36, %bb
+	%bothcond = or i1 false, false		; <i1> [#uses=1]
+	br i1 %bothcond, label %bb29, label %bb19
+bb19:		; preds = %bb3
+	br i1 false, label %bb36, label %bb29
+bb29:		; preds = %bb19, %bb3
+	ret i32 0
+bb36:		; preds = %bb19
+	store i16 0, i16* @g_56, align 2
+	br i1 false, label %bb44, label %bb3
+bb44:		; preds = %bb44, %bb36
+	%tmp46 = load i16* @g_56, align 2		; <i16> [#uses=0]
+	br i1 false, label %bb, label %bb44
+}
diff --git a/final/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll b/final/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
new file mode 100644
index 00000000000..20f2c2bfd74
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/2008-11-03-Invariant.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -loop-unswitch -stats -disable-output |& grep "1 loop-unswitch - Number of branches unswitched" | count 1
+; PR 3170
+define i32 @a(i32 %x, i32 %y) nounwind {
+entry:
+	%0 = icmp ult i32 0, %y		; <i1> [#uses=1]
+	br i1 %0, label %bb.nph, label %bb4
+
+bb.nph:		; preds = %entry
+	%1 = icmp eq i32 %x, 0		; <i1> [#uses=1]
+	br label %bb
+
+bb:		; preds = %bb.nph, %bb3
+	%i.01 = phi i32 [ %3, %bb3 ], [ 0, %bb.nph ]		; <i32> [#uses=1]
+	br i1 %1, label %bb2, label %bb1
+
+bb1:		; preds = %bb
+	%2 = tail call i32 (...)* @b() nounwind		; <i32> [#uses=0]
+	br label %bb2
+
+bb2:		; preds = %bb, %bb1
+	%3 = add i32 %i.01, 1		; <i32> [#uses=2]
+	br label %bb3
+
+bb3:		; preds = %bb2
+	%i.0 = phi i32 [ %3, %bb2 ]		; <i32> [#uses=1]
+	%4 = icmp ult i32 %i.0, %y		; <i1> [#uses=1]
+	br i1 %4, label %bb, label %bb3.bb4_crit_edge
+
+bb3.bb4_crit_edge:		; preds = %bb3
+	br label %bb4
+
+bb4:		; preds = %bb3.bb4_crit_edge, %entry
+	ret i32 0
+}
+
+declare i32 @b(...)
diff --git a/final/test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll b/final/test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll
new file mode 100644
index 00000000000..a976d18d444
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/2010-11-18-LCSSA.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -loop-unswitch
+; PR8622
+@g_38 = external global i32, align 4
+
+define void @func_67(i32 %p_68.coerce) nounwind {
+entry:
+  br i1 true, label %for.end12, label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %g_38.promoted = load i32* @g_38
+  br label %for.body
+
+for.body:                                         ; preds = %for.cond, %bb.nph
+  %tobool.i = icmp eq i32 %p_68.coerce, 1
+  %xor4.i = xor i32 %p_68.coerce, 1
+  %call1 = select i1 %tobool.i, i32 0, i32 %xor4.i
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body
+  br i1 true, label %for.cond.for.end12_crit_edge, label %for.body
+
+for.cond.for.end12_crit_edge:                     ; preds = %for.cond
+  store i32 %call1, i32* @g_38
+  br label %for.end12
+
+for.end12:                                        ; preds = %for.cond.for.end12_crit_edge, %entry
+  ret void
+}
diff --git a/final/test/Transforms/LoopUnswitch/basictest.ll b/final/test/Transforms/LoopUnswitch/basictest.ll
new file mode 100644
index 00000000000..1e6f2cf15ee
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/basictest.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -loop-unswitch -disable-output
+
+define i32 @test(i32* %A, i1 %C) {
+entry:
+	br label %no_exit
+no_exit:		; preds = %no_exit.backedge, %entry
+	%i.0.0 = phi i32 [ 0, %entry ], [ %i.0.0.be, %no_exit.backedge ]		; <i32> [#uses=3]
+	%gep.upgrd.1 = zext i32 %i.0.0 to i64		; <i64> [#uses=1]
+	%tmp.7 = getelementptr i32* %A, i64 %gep.upgrd.1		; <i32*> [#uses=4]
+	%tmp.13 = load i32* %tmp.7		; <i32> [#uses=2]
+	%tmp.14 = add i32 %tmp.13, 1		; <i32> [#uses=1]
+	store i32 %tmp.14, i32* %tmp.7
+	br i1 %C, label %then, label %endif
+then:		; preds = %no_exit
+	%tmp.29 = load i32* %tmp.7		; <i32> [#uses=1]
+	%tmp.30 = add i32 %tmp.29, 2		; <i32> [#uses=1]
+	store i32 %tmp.30, i32* %tmp.7
+	%inc9 = add i32 %i.0.0, 1		; <i32> [#uses=2]
+	%tmp.112 = icmp ult i32 %inc9, 100000		; <i1> [#uses=1]
+	br i1 %tmp.112, label %no_exit.backedge, label %return
+no_exit.backedge:		; preds = %endif, %then
+	%i.0.0.be = phi i32 [ %inc9, %then ], [ %inc, %endif ]		; <i32> [#uses=1]
+	br label %no_exit
+endif:		; preds = %no_exit
+	%inc = add i32 %i.0.0, 1		; <i32> [#uses=2]
+	%tmp.1 = icmp ult i32 %inc, 100000		; <i1> [#uses=1]
+	br i1 %tmp.1, label %no_exit.backedge, label %return
+return:		; preds = %endif, %then
+	ret i32 %tmp.13
+}
+
diff --git a/final/test/Transforms/LoopUnswitch/crash.ll b/final/test/Transforms/LoopUnswitch/crash.ll
new file mode 100644
index 00000000000..101fb7a2c2c
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/crash.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -loop-unswitch -disable-output
+
+define void @test1(i32* %S2) {
+entry:
+	br i1 false, label %list_Length.exit, label %cond_true.i
+cond_true.i:		; preds = %entry
+	ret void
+list_Length.exit:		; preds = %entry
+	br i1 false, label %list_Length.exit9, label %cond_true.i5
+cond_true.i5:		; preds = %list_Length.exit
+	ret void
+list_Length.exit9:		; preds = %list_Length.exit
+	br i1 false, label %bb78, label %return
+bb44:		; preds = %bb78, %cond_next68
+	br i1 %tmp49.not, label %bb62, label %bb62.loopexit
+bb62.loopexit:		; preds = %bb44
+	br label %bb62
+bb62:		; preds = %bb62.loopexit, %bb44
+	br i1 false, label %return.loopexit, label %cond_next68
+cond_next68:		; preds = %bb62
+	br i1 false, label %return.loopexit, label %bb44
+bb78:		; preds = %list_Length.exit9
+	%tmp49.not = icmp eq i32* %S2, null		; <i1> [#uses=1]
+	br label %bb44
+return.loopexit:		; preds = %cond_next68, %bb62
+	%retval.0.ph = phi i32 [ 1, %cond_next68 ], [ 0, %bb62 ]		; <i32> [#uses=1]
+	br label %return
+return:		; preds = %return.loopexit, %list_Length.exit9
+	%retval.0 = phi i32 [ 0, %list_Length.exit9 ], [ %retval.0.ph, %return.loopexit ]		; <i32> [#uses=0]
+	ret void
+}
+
+define void @test2(i32 %x1, i32 %y1, i32 %z1, i32 %r1) nounwind {
+entry:
+  br label %bb.nph
+
+bb.nph:                                           ; preds = %entry
+  %and.i13521 = and <4 x i1> undef, undef         ; <<4 x i1>> [#uses=1]
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %bb.nph
+  %or.i = select <4 x i1> %and.i13521, <4 x i32> undef, <4 x i32> undef ; <<4 x i32>> [#uses=0]
+  br i1 false, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+}
+
+; PR6879
+define i32* @test3(i32** %p_45, i16 zeroext %p_46, i64 %p_47, i64 %p_48, i16 signext %p_49) nounwind {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond4, %entry
+  br i1 false, label %for.cond4, label %for.end88
+
+for.cond4:                                        ; preds = %for.cond
+  %conv46 = trunc i32 0 to i8                     ; <i8> [#uses=2]
+  %cmp60 = icmp sgt i8 %conv46, 124               ; <i1> [#uses=1]
+  %or.cond = and i1 undef, %cmp60                 ; <i1> [#uses=1]
+  %cond = select i1 %or.cond, i8 %conv46, i8 undef ; <i8> [#uses=0]
+  br label %for.cond
+
+for.end88:                                        ; preds = %for.cond
+  ret i32* undef
+}
diff --git a/final/test/Transforms/LoopUnswitch/dg.exp b/final/test/Transforms/LoopUnswitch/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/LoopUnswitch/infinite-loop.ll b/final/test/Transforms/LoopUnswitch/infinite-loop.ll
new file mode 100644
index 00000000000..73391ca8d19
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/infinite-loop.ll
@@ -0,0 +1,53 @@
+; RUN: opt -loop-unswitch -disable-output -stats -info-output-file - < %s | FileCheck --check-prefix=STATS %s
+; RUN: opt -loop-unswitch -simplifycfg -S < %s | FileCheck %s
+; PR5373
+
+; Loop unswitching shouldn't trivially unswitch the true case of condition %a
+; in the code here because it leads to an infinite loop. While this doesn't
+; contain any instructions with side effects, it's still a kind of side effect.
+; It can trivially unswitch on the false cas of condition %a though.
+
+; STATS: 2 loop-unswitch - Number of branches unswitched
+; STATS: 1 loop-unswitch - Number of unswitches that are trivial
+
+; CHECK: @func_16
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split
+
+; CHECK: entry.split:
+; CHECK-NEXT: br i1 %b, label %cond.end.us, label %abort1
+
+; CHECK: cond.end.us:
+; CHECK-NEXT: br label %cond.end.us
+
+; CHECK: abort0.split:
+; CHECK-NEXT: call void @end0() noreturn nounwind
+; CHECK-NEXT: unreachable
+
+; CHECK: abort1:
+; CHECK-NEXT: call void @end1() noreturn nounwind
+; CHECK-NEXT: unreachable
+
+; CHECK: }
+
+define void @func_16(i1 %a, i1 %b) nounwind {
+entry:
+  br label %for.body
+
+for.body:
+  br i1 %a, label %cond.end, label %abort0
+
+cond.end:
+  br i1 %b, label %for.body, label %abort1
+
+abort0:
+  call void @end0() noreturn nounwind
+  unreachable
+
+abort1:
+  call void @end1() noreturn nounwind
+  unreachable
+}
+
+declare void @end0() noreturn
+declare void @end1() noreturn
diff --git a/final/test/Transforms/LoopUnswitch/preserve-analyses.ll b/final/test/Transforms/LoopUnswitch/preserve-analyses.ll
new file mode 100644
index 00000000000..668f8ecaf8a
--- /dev/null
+++ b/final/test/Transforms/LoopUnswitch/preserve-analyses.ll
@@ -0,0 +1,129 @@
+; RUN: opt -loop-unswitch -verify-loop-info -verify-dom-info %s -disable-output
+
+; Loop unswitch should be able to unswitch these loops and
+; preserve LCSSA and LoopSimplify forms.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv6-apple-darwin9"
+
+@delim1 = external global i32                     ; <i32*> [#uses=1]
+@delim2 = external global i32                     ; <i32*> [#uses=1]
+
+define i32 @ineqn(i8* %s, i8* %p) nounwind readonly {
+entry:
+  %0 = load i32* @delim1, align 4                 ; <i32> [#uses=1]
+  %1 = load i32* @delim2, align 4                 ; <i32> [#uses=1]
+  br label %bb8.outer
+
+bb:                                               ; preds = %bb8
+  %2 = icmp eq i8* %p_addr.0, %s                  ; <i1> [#uses=1]
+  br i1 %2, label %bb10, label %bb2
+
+bb2:                                              ; preds = %bb
+  %3 = getelementptr inbounds i8* %p_addr.0, i32 1 ; <i8*> [#uses=3]
+  switch i32 %ineq.0.ph, label %bb8.backedge [
+    i32 0, label %bb3
+    i32 1, label %bb6
+  ]
+
+bb8.backedge:                                     ; preds = %bb6, %bb5, %bb2
+  br label %bb8
+
+bb3:                                              ; preds = %bb2
+  %4 = icmp eq i32 %8, %0                         ; <i1> [#uses=1]
+  br i1 %4, label %bb8.outer.loopexit, label %bb5
+
+bb5:                                              ; preds = %bb3
+  br i1 %6, label %bb6, label %bb8.backedge
+
+bb6:                                              ; preds = %bb5, %bb2
+  %5 = icmp eq i32 %8, %1                         ; <i1> [#uses=1]
+  br i1 %5, label %bb7, label %bb8.backedge
+
+bb7:                                              ; preds = %bb6
+  %.lcssa1 = phi i8* [ %3, %bb6 ]                 ; <i8*> [#uses=1]
+  br label %bb8.outer.backedge
+
+bb8.outer.backedge:                               ; preds = %bb8.outer.loopexit, %bb7
+  %.lcssa2 = phi i8* [ %.lcssa1, %bb7 ], [ %.lcssa, %bb8.outer.loopexit ] ; <i8*> [#uses=1]
+  %ineq.0.ph.be = phi i32 [ 0, %bb7 ], [ 1, %bb8.outer.loopexit ] ; <i32> [#uses=1]
+  br label %bb8.outer
+
+bb8.outer.loopexit:                               ; preds = %bb3
+  %.lcssa = phi i8* [ %3, %bb3 ]                  ; <i8*> [#uses=1]
+  br label %bb8.outer.backedge
+
+bb8.outer:                                        ; preds = %bb8.outer.backedge, %entry
+  %ineq.0.ph = phi i32 [ 0, %entry ], [ %ineq.0.ph.be, %bb8.outer.backedge ] ; <i32> [#uses=3]
+  %p_addr.0.ph = phi i8* [ %p, %entry ], [ %.lcssa2, %bb8.outer.backedge ] ; <i8*> [#uses=1]
+  %6 = icmp eq i32 %ineq.0.ph, 1                  ; <i1> [#uses=1]
+  br label %bb8
+
+bb8:                                              ; preds = %bb8.outer, %bb8.backedge
+  %p_addr.0 = phi i8* [ %p_addr.0.ph, %bb8.outer ], [ %3, %bb8.backedge ] ; <i8*> [#uses=3]
+  %7 = load i8* %p_addr.0, align 1                ; <i8> [#uses=2]
+  %8 = sext i8 %7 to i32                          ; <i32> [#uses=2]
+  %9 = icmp eq i8 %7, 0                           ; <i1> [#uses=1]
+  br i1 %9, label %bb10, label %bb
+
+bb10:                                             ; preds = %bb8, %bb
+  %.0 = phi i32 [ %ineq.0.ph, %bb ], [ 0, %bb8 ]  ; <i32> [#uses=1]
+  ret i32 %.0
+}
+
+; This is a simplified form of ineqn from above. It triggers some
+; different cases in the loop-unswitch code.
+
+define void @simplified_ineqn() nounwind readonly {
+entry:
+  br label %bb8.outer
+
+bb8.outer:                                        ; preds = %bb6, %bb2, %entry
+  %x = phi i32 [ 0, %entry ], [ 0, %bb6 ], [ 1, %bb2 ] ; <i32> [#uses=1]
+  br i1 undef, label %return, label %bb2
+
+bb2:                                              ; preds = %bb
+  switch i32 %x, label %bb6 [
+    i32 0, label %bb8.outer
+  ]
+
+bb6:                                              ; preds = %bb2
+  br i1 undef, label %bb8.outer, label %bb2
+
+return:                                             ; preds = %bb8, %bb
+  ret void
+}
+
+; This function requires special handling to preserve LCSSA form.
+; PR4934
+
+define void @pnp_check_irq() nounwind noredzone {
+entry:
+  %conv56 = trunc i64 undef to i32                ; <i32> [#uses=1]
+  br label %while.cond.i
+
+while.cond.i:                                     ; preds = %while.cond.i.backedge, %entry
+  %call.i25 = call i8* @pci_get_device() nounwind noredzone ; <i8*> [#uses=2]
+  br i1 undef, label %if.then65, label %while.body.i
+
+while.body.i:                                     ; preds = %while.cond.i
+  br i1 undef, label %if.then31.i.i, label %while.cond.i.backedge
+
+while.cond.i.backedge:                            ; preds = %if.then31.i.i, %while.body.i
+  br label %while.cond.i
+
+if.then31.i.i:                                    ; preds = %while.body.i
+  switch i32 %conv56, label %while.cond.i.backedge [
+    i32 14, label %if.then42.i.i
+    i32 15, label %if.then42.i.i
+  ]
+
+if.then42.i.i:                                    ; preds = %if.then31.i.i, %if.then31.i.i
+  %call.i25.lcssa48 = phi i8* [ %call.i25, %if.then31.i.i ], [ %call.i25, %if.then31.i.i ] ; <i8*> [#uses=0]
+  unreachable
+
+if.then65:                                        ; preds = %while.cond.i
+  unreachable
+}
+
+declare i8* @pci_get_device() noredzone
diff --git a/final/test/Transforms/LowerAtomic/atomic-load.ll b/final/test/Transforms/LowerAtomic/atomic-load.ll
new file mode 100644
index 00000000000..5b110d6b7eb
--- /dev/null
+++ b/final/test/Transforms/LowerAtomic/atomic-load.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -loweratomic -S | FileCheck %s
+
+declare i8 @llvm.atomic.load.add.i8.p0i8(i8* %ptr, i8 %delta)
+declare i8 @llvm.atomic.load.nand.i8.p0i8(i8* %ptr, i8 %delta)
+declare i8 @llvm.atomic.load.min.i8.p0i8(i8* %ptr, i8 %delta)
+
+define i8 @add() {
+; CHECK: @add
+  %i = alloca i8
+  %j = call i8 @llvm.atomic.load.add.i8.p0i8(i8* %i, i8 42)
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: add
+; CHECK-NEXT: store
+  ret i8 %j
+; CHECK: ret i8 [[INST]]
+}
+
+define i8 @nand() {
+; CHECK: @nand
+  %i = alloca i8
+  %j = call i8 @llvm.atomic.load.nand.i8.p0i8(i8* %i, i8 42)
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: and
+; CHECK-NEXT: xor
+; CHECK-NEXT: store
+  ret i8 %j
+; CHECK: ret i8 [[INST]]
+}
+
+define i8 @min() {
+; CHECK: @min
+  %i = alloca i8
+  %j = call i8 @llvm.atomic.load.min.i8.p0i8(i8* %i, i8 42)
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: icmp
+; CHECK-NEXT: select
+; CHECK-NEXT: store
+  ret i8 %j
+; CHECK: ret i8 [[INST]]
+}
diff --git a/final/test/Transforms/LowerAtomic/atomic-swap.ll b/final/test/Transforms/LowerAtomic/atomic-swap.ll
new file mode 100644
index 00000000000..0a59c8595e6
--- /dev/null
+++ b/final/test/Transforms/LowerAtomic/atomic-swap.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -loweratomic -S | FileCheck %s
+
+declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* %ptr, i8 %cmp, i8 %val)
+declare i8 @llvm.atomic.swap.i8.p0i8(i8* %ptr, i8 %val)
+
+define i8 @cmpswap() {
+; CHECK: @cmpswap
+  %i = alloca i8
+  %j = call i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* %i, i8 0, i8 42)
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: icmp
+; CHECK-NEXT: select
+; CHECK-NEXT: store
+  ret i8 %j
+; CHECK: ret i8 [[INST]]
+}
+
+define i8 @swap() {
+; CHECK: @swap
+  %i = alloca i8
+  %j = call i8 @llvm.atomic.swap.i8.p0i8(i8* %i, i8 42)
+; CHECK: [[INST:%[a-z0-9]+]] = load
+; CHECK-NEXT: store
+  ret i8 %j
+; CHECK: ret i8 [[INST]]
+}
diff --git a/final/test/Transforms/LowerAtomic/barrier.ll b/final/test/Transforms/LowerAtomic/barrier.ll
new file mode 100644
index 00000000000..218c5ba8d18
--- /dev/null
+++ b/final/test/Transforms/LowerAtomic/barrier.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -loweratomic -S | FileCheck %s
+
+declare void @llvm.memory.barrier(i1 %ll, i1 %ls, i1 %sl, i1 %ss, i1 %device)
+
+define void @barrier() {
+; CHECK: @barrier
+  call void @llvm.memory.barrier(i1 0, i1 0, i1 0, i1 0, i1 0)
+; CHECK-NEXT: ret
+  ret void
+}
diff --git a/final/test/Transforms/LowerAtomic/dg.exp b/final/test/Transforms/LowerAtomic/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/LowerAtomic/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/LowerInvoke/2003-12-10-Crash.ll b/final/test/Transforms/LowerInvoke/2003-12-10-Crash.ll
new file mode 100644
index 00000000000..31f3d42225a
--- /dev/null
+++ b/final/test/Transforms/LowerInvoke/2003-12-10-Crash.ll
@@ -0,0 +1,22 @@
+; This testcase was reduced from Shootout-C++/reversefile.cpp by bugpoint
+
+; RUN: opt < %s -lowerinvoke -disable-output
+
+declare void @baz()
+
+declare void @bar()
+
+define void @foo() {
+then:
+	invoke void @baz( )
+			to label %invoke_cont.0 unwind label %try_catch
+invoke_cont.0:		; preds = %then
+	invoke void @bar( )
+			to label %try_exit unwind label %try_catch
+try_catch:		; preds = %invoke_cont.0, %then
+	%__tmp.0 = phi i32* [ null, %invoke_cont.0 ], [ null, %then ]		; <i32*> [#uses=0]
+	ret void
+try_exit:		; preds = %invoke_cont.0
+	ret void
+}
+
diff --git a/final/test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll b/final/test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll
new file mode 100644
index 00000000000..bddb70248ed
--- /dev/null
+++ b/final/test/Transforms/LowerInvoke/2004-02-29-PHICrash.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -lowerinvoke -enable-correct-eh-support -disable-output
+
+define void @_ZNKSt11__use_cacheISt16__numpunct_cacheIcEEclERKSt6locale() {
+entry:
+	br i1 false, label %then, label %UnifiedReturnBlock
+then:		; preds = %entry
+	invoke void @_Znwj( )
+			to label %UnifiedReturnBlock unwind label %UnifiedReturnBlock
+UnifiedReturnBlock:		; preds = %then, %then, %entry
+	%UnifiedRetVal = phi i32* [ null, %entry ], [ null, %then ], [ null, %then ] ; <i32*> [#uses=0]
+	ret void
+}
+
+declare void @_Znwj()
+
diff --git a/final/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHI.ll b/final/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHI.ll
new file mode 100644
index 00000000000..1057ad7057c
--- /dev/null
+++ b/final/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHI.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -lowerinvoke -enable-correct-eh-support -disable-output
+
+declare void @ll_listnext__listiterPtr()
+
+define void @WorkTask.fn() {
+block0:
+	invoke void @ll_listnext__listiterPtr( )
+			to label %block9 unwind label %block8_exception_handling
+block8_exception_handling:		; preds = %block0
+	ret void
+block9:		; preds = %block0
+	%w_2690 = phi { i32, i32 }* [ null, %block0 ]		; <{ i32, i32 }*> [#uses=1]
+	%tmp.129 = getelementptr { i32, i32 }* %w_2690, i32 0, i32 1		; <i32*> [#uses=1]
+	%v2769 = load i32* %tmp.129		; <i32> [#uses=0]
+	ret void
+}
+
diff --git a/final/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHIUse.ll b/final/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHIUse.ll
new file mode 100644
index 00000000000..940204649c7
--- /dev/null
+++ b/final/test/Transforms/LowerInvoke/2005-08-03-InvokeWithPHIUse.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -lowerinvoke -enable-correct-eh-support -disable-output
+
+declare fastcc i32 @ll_listnext__listiterPtr()
+
+define fastcc i32 @WorkTask.fn() {
+block0:
+	%v2679 = invoke fastcc i32 @ll_listnext__listiterPtr( )
+			to label %block9 unwind label %block8_exception_handling	; <i32> [#uses=1]
+block8_exception_handling:		; preds = %block0
+	ret i32 0
+block9:		; preds = %block0
+	%i_2689 = phi i32 [ %v2679, %block0 ]		; <i32> [#uses=1]
+	ret i32 %i_2689
+}
+
diff --git a/final/test/Transforms/LowerInvoke/2008-02-14-CritEdgePhiCrash.ll b/final/test/Transforms/LowerInvoke/2008-02-14-CritEdgePhiCrash.ll
new file mode 100644
index 00000000000..b46ccfbb79a
--- /dev/null
+++ b/final/test/Transforms/LowerInvoke/2008-02-14-CritEdgePhiCrash.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -lowerinvoke -enable-correct-eh-support -disable-output
+; PR2029
+define i32 @main(i32 %argc, i8** %argv) {
+bb470:
+        invoke i32 @main(i32 0, i8** null) to label %invcont474 unwind label
+%lpad902
+
+invcont474:             ; preds = %bb470
+        ret i32 0
+
+lpad902:                ; preds = %bb470
+        %tmp471.lcssa = phi i8* [ null, %bb470 ]                ; <i8*>
+        ret i32 0
+}
diff --git a/final/test/Transforms/LowerInvoke/basictest.ll b/final/test/Transforms/LowerInvoke/basictest.ll
new file mode 100644
index 00000000000..f0ca5f42531
--- /dev/null
+++ b/final/test/Transforms/LowerInvoke/basictest.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -lowerinvoke -disable-output -enable-correct-eh-support
+
+
+define i32 @foo() {
+	invoke i32 @foo( )
+			to label %Ok unwind label %Crap		; <i32>:1 [#uses=0]
+Ok:		; preds = %0
+	invoke i32 @foo( )
+			to label %Ok2 unwind label %Crap		; <i32>:2 [#uses=0]
+Ok2:		; preds = %Ok
+	ret i32 2
+Crap:		; preds = %Ok, %0
+	ret i32 1
+}
+
+define i32 @bar(i32 %blah) {
+	br label %doit
+doit:		; preds = %0
+        ;; Value live across an unwind edge.
+	%B2 = add i32 %blah, 1		; <i32> [#uses=1]
+	invoke i32 @foo( )
+			to label %Ok unwind label %Crap		; <i32>:1 [#uses=0]
+Ok:		; preds = %doit
+	invoke i32 @foo( )
+			to label %Ok2 unwind label %Crap		; <i32>:2 [#uses=0]
+Ok2:		; preds = %Ok
+	ret i32 2
+Crap:		; preds = %Ok, %doit
+	ret i32 %B2
+}
diff --git a/final/test/Transforms/LowerInvoke/dg.exp b/final/test/Transforms/LowerInvoke/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/LowerInvoke/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/LowerSetJmp/2003-11-05-DominanceProperties.ll b/final/test/Transforms/LowerSetJmp/2003-11-05-DominanceProperties.ll
new file mode 100644
index 00000000000..9180c15b18f
--- /dev/null
+++ b/final/test/Transforms/LowerSetJmp/2003-11-05-DominanceProperties.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -lowersetjmp -disable-output
+
+	%struct.jmpenv = type { i32, i8 }
+
+declare void @Perl_sv_setpv()
+
+declare i32 @llvm.setjmp(i32*)
+
+define void @perl_call_sv() {
+	call void @Perl_sv_setpv( )
+	%tmp.335 = getelementptr %struct.jmpenv* null, i64 0, i32 0		; <i32*> [#uses=1]
+	%tmp.336 = call i32 @llvm.setjmp( i32* null )		; <i32> [#uses=1]
+	store i32 %tmp.336, i32* %tmp.335
+	ret void
+}
+
diff --git a/final/test/Transforms/LowerSetJmp/dg.exp b/final/test/Transforms/LowerSetJmp/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/LowerSetJmp/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/LowerSetJmp/simpletest.ll b/final/test/Transforms/LowerSetJmp/simpletest.ll
new file mode 100644
index 00000000000..1430dffdfe3
--- /dev/null
+++ b/final/test/Transforms/LowerSetJmp/simpletest.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -lowersetjmp -S | grep invoke
+
+	%JmpBuf = type i32
+@.str_1 = internal constant [13 x i8] c"returned %d\0A\00"		; <[13 x i8]*> [#uses=1]
+
+declare void @llvm.longjmp(i32*, i32)
+
+declare i32 @llvm.setjmp(i32*)
+
+declare void @foo()
+
+define i32 @simpletest() {
+	%B = alloca i32		; <i32*> [#uses=2]
+	%Val = call i32 @llvm.setjmp( i32* %B )		; <i32> [#uses=2]
+	%V = icmp ne i32 %Val, 0		; <i1> [#uses=1]
+	br i1 %V, label %LongJumped, label %Normal
+Normal:		; preds = %0
+	call void @foo( )
+	call void @llvm.longjmp( i32* %B, i32 42 )
+	ret i32 0
+LongJumped:		; preds = %0
+	ret i32 %Val
+}
+
+declare i32 @printf(i8*, ...)
+
+define i32 @main() {
+	%V = call i32 @simpletest( )		; <i32> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* getelementptr ([13 x i8]* @.str_1, i64 0, i64 0), i32 %V )		; <i32>:1 [#uses=0]
+	ret i32 0
+}
+
diff --git a/final/test/Transforms/LowerSwitch/2003-05-01-PHIProblem.ll b/final/test/Transforms/LowerSwitch/2003-05-01-PHIProblem.ll
new file mode 100644
index 00000000000..d143ab05035
--- /dev/null
+++ b/final/test/Transforms/LowerSwitch/2003-05-01-PHIProblem.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -lowerswitch
+
+define void @child(i32 %ct.1) {
+entry:
+	switch i32 0, label %return [
+		 i32 3, label %UnifiedExitNode
+		 i32 0, label %return
+	]
+return:		; preds = %entry, %entry
+	%result.0 = phi i32* [ null, %entry ], [ null, %entry ]		; <i32*> [#uses=0]
+	br label %UnifiedExitNode
+UnifiedExitNode:		; preds = %return, %entry
+	ret void
+}
+
diff --git a/final/test/Transforms/LowerSwitch/2003-08-23-EmptySwitch.ll b/final/test/Transforms/LowerSwitch/2003-08-23-EmptySwitch.ll
new file mode 100644
index 00000000000..61e1dcd345b
--- /dev/null
+++ b/final/test/Transforms/LowerSwitch/2003-08-23-EmptySwitch.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -lowerswitch
+
+define void @test() {
+	switch i32 0, label %Next [
+	]
+Next:		; preds = %0
+	ret void
+}
+
diff --git a/final/test/Transforms/LowerSwitch/2004-03-13-SwitchIsDefaultCrash.ll b/final/test/Transforms/LowerSwitch/2004-03-13-SwitchIsDefaultCrash.ll
new file mode 100644
index 00000000000..964b07e3066
--- /dev/null
+++ b/final/test/Transforms/LowerSwitch/2004-03-13-SwitchIsDefaultCrash.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -lowerswitch -disable-output
+
+define void @solve() {
+entry:
+	%targetBlock = call i16 @solve_code( )		; <i16> [#uses=1]
+	br label %codeReplTail
+then.1:		; preds = %codeReplTail
+	ret void
+loopexit.0:		; preds = %codeReplTail
+	ret void
+codeReplTail:		; preds = %codeReplTail, %entry
+	switch i16 %targetBlock, label %codeReplTail [
+		 i16 0, label %loopexit.0
+		 i16 1, label %then.1
+	]
+}
+
+declare i16 @solve_code()
+
diff --git a/final/test/Transforms/LowerSwitch/dg.exp b/final/test/Transforms/LowerSwitch/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/LowerSwitch/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/LowerSwitch/feature.ll b/final/test/Transforms/LowerSwitch/feature.ll
new file mode 100644
index 00000000000..cdfa0f371d6
--- /dev/null
+++ b/final/test/Transforms/LowerSwitch/feature.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -lowerswitch -S > %t
+; RUN: grep slt %t | count 10
+; RUN: grep ule %t | count 3
+; RUN: grep eq  %t | count 9
+
+define i32 @main(i32 %tmp158) {
+entry:
+        switch i32 %tmp158, label %bb336 [
+                 i32 -2, label %bb338
+                 i32 -3, label %bb338
+                 i32 -4, label %bb338
+                 i32 -5, label %bb338
+                 i32 -6, label %bb338
+                 i32 0, label %bb338
+                 i32 1, label %bb338
+                 i32 2, label %bb338
+                 i32 3, label %bb338
+                 i32 4, label %bb338
+                 i32 5, label %bb338
+                 i32 6, label %bb338
+                 i32 7, label %bb
+                 i32 8, label %bb338
+                 i32 9, label %bb322
+                 i32 10, label %bb324
+                 i32 11, label %bb326
+                 i32 12, label %bb328
+                 i32 13, label %bb330
+                 i32 14, label %bb332
+                 i32 15, label %bb334
+        ]
+bb:
+  ret i32 2
+bb322:
+  ret i32 3
+bb324:
+  ret i32 4
+bb326:
+  ret i32 5
+bb328:
+  ret i32 6
+bb330:
+  ret i32 7
+bb332:
+  ret i32 8
+bb334:
+  ret i32 9
+bb336:
+  ret i32 10
+bb338:
+  ret i32 11
+}
diff --git a/final/test/Transforms/Mem2Reg/2002-03-28-UninitializedVal.ll b/final/test/Transforms/Mem2Reg/2002-03-28-UninitializedVal.ll
new file mode 100644
index 00000000000..777f3757bb8
--- /dev/null
+++ b/final/test/Transforms/Mem2Reg/2002-03-28-UninitializedVal.ll
@@ -0,0 +1,11 @@
+; Uninitialized values are not handled correctly.
+;
+; RUN: opt < %s -mem2reg -disable-output
+;
+
+define i32 @test() {
+        ; To be promoted
+	%X = alloca i32		; <i32*> [#uses=1]
+	%Y = load i32* %X		; <i32> [#uses=1]
+	ret i32 %Y
+}
diff --git a/final/test/Transforms/Mem2Reg/2002-05-01-ShouldNotPromoteThisAlloca.ll b/final/test/Transforms/Mem2Reg/2002-05-01-ShouldNotPromoteThisAlloca.ll
new file mode 100644
index 00000000000..89bd4928a47
--- /dev/null
+++ b/final/test/Transforms/Mem2Reg/2002-05-01-ShouldNotPromoteThisAlloca.ll
@@ -0,0 +1,12 @@
+; This input caused the mem2reg pass to die because it was trying to promote
+; the %r alloca, even though it is invalid to do so in this case!
+;
+; RUN: opt < %s -mem2reg
+
+define void @test() {
+	%r = alloca i32		; <i32*> [#uses=2]
+	store i32 4, i32* %r
+	store i32* %r, i32** null
+	ret void
+}
+
diff --git a/final/test/Transforms/Mem2Reg/2003-04-10-DFNotFound.ll b/final/test/Transforms/Mem2Reg/2003-04-10-DFNotFound.ll
new file mode 100644
index 00000000000..3665483458c
--- /dev/null
+++ b/final/test/Transforms/Mem2Reg/2003-04-10-DFNotFound.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -mem2reg
+
+define void @_Z3barv() {
+	%result = alloca i32		; <i32*> [#uses=1]
+	ret void
+		; No predecessors!
+	store i32 0, i32* %result
+	ret void
+}
+
diff --git a/final/test/Transforms/Mem2Reg/2003-04-18-DeadBlockProblem.ll b/final/test/Transforms/Mem2Reg/2003-04-18-DeadBlockProblem.ll
new file mode 100644
index 00000000000..36bd9e64991
--- /dev/null
+++ b/final/test/Transforms/Mem2Reg/2003-04-18-DeadBlockProblem.ll
@@ -0,0 +1,16 @@
+; This testcases makes sure that mem2reg can handle unreachable blocks.
+; RUN: opt < %s -mem2reg
+
+define i32 @test() {
+	%X = alloca i32		; <i32*> [#uses=2]
+	store i32 6, i32* %X
+	br label %Loop
+Loop:		; preds = %EndOfLoop, %0
+	store i32 5, i32* %X
+	br label %EndOfLoop
+Unreachable:		; No predecessors!
+	br label %EndOfLoop
+EndOfLoop:		; preds = %Unreachable, %Loop
+	br label %Loop
+}
+
diff --git a/final/test/Transforms/Mem2Reg/2003-04-24-MultipleIdenticalSuccessors.ll b/final/test/Transforms/Mem2Reg/2003-04-24-MultipleIdenticalSuccessors.ll
new file mode 100644
index 00000000000..f5f1ee34365
--- /dev/null
+++ b/final/test/Transforms/Mem2Reg/2003-04-24-MultipleIdenticalSuccessors.ll
@@ -0,0 +1,16 @@
+; Mem2reg used to only add one incoming value to a PHI node, even if it had
+; multiple incoming edges from a block.
+;
+; RUN: opt < %s -mem2reg -disable-output
+
+define i32 @test(i1 %c1, i1 %c2) {
+	%X = alloca i32		; <i32*> [#uses=2]
+	br i1 %c1, label %Exit, label %B2
+B2:		; preds = %0
+	store i32 2, i32* %X
+	br i1 %c2, label %Exit, label %Exit
+Exit:		; preds = %B2, %B2, %0
+	%Y = load i32* %X		; <i32> [#uses=1]
+	ret i32 %Y
+}
+
diff --git a/final/test/Transforms/Mem2Reg/2003-06-26-IterativePromote.ll b/final/test/Transforms/Mem2Reg/2003-06-26-IterativePromote.ll
new file mode 100644
index 00000000000..e82caa9fe0f
--- /dev/null
+++ b/final/test/Transforms/Mem2Reg/2003-06-26-IterativePromote.ll
@@ -0,0 +1,16 @@
+; Promoting some values allows promotion of other values.
+; RUN: opt < %s -mem2reg -S | not grep alloca
+
+define i32 @test2() {
+	%result = alloca i32		; <i32*> [#uses=2]
+	%a = alloca i32		; <i32*> [#uses=2]
+	%p = alloca i32*		; <i32**> [#uses=2]
+	store i32 0, i32* %a
+	store i32* %a, i32** %p
+	%tmp.0 = load i32** %p		; <i32*> [#uses=1]
+	%tmp.1 = load i32* %tmp.0		; <i32> [#uses=1]
+	store i32 %tmp.1, i32* %result
+	%tmp.2 = load i32* %result		; <i32> [#uses=1]
+	ret i32 %tmp.2
+}
+
diff --git a/final/test/Transforms/Mem2Reg/2003-10-05-DeadPHIInsertion.ll b/final/test/Transforms/Mem2Reg/2003-10-05-DeadPHIInsertion.ll
new file mode 100644
index 00000000000..1d38efc7457
--- /dev/null
+++ b/final/test/Transforms/Mem2Reg/2003-10-05-DeadPHIInsertion.ll
@@ -0,0 +1,22 @@
+; Mem2reg should not insert dead PHI nodes!  The naive algorithm inserts a PHI
+;  node in L3, even though there is no load of %A in anything dominated by L3.
+
+; RUN: opt < %s -mem2reg -S | not grep phi
+
+define void @test(i32 %B, i1 %C) {
+	%A = alloca i32		; <i32*> [#uses=4]
+	store i32 %B, i32* %A
+	br i1 %C, label %L1, label %L2
+L1:		; preds = %0
+	store i32 %B, i32* %A
+	%D = load i32* %A		; <i32> [#uses=1]
+	call void @test( i32 %D, i1 false )
+	br label %L3
+L2:		; preds = %0
+	%E = load i32* %A		; <i32> [#uses=1]
+	call void @test( i32 %E, i1 true )
+	br label %L3
+L3:		; preds = %L2, %L1
+	ret void
+}
+
diff --git a/final/test/Transforms/Mem2Reg/2005-06-30-ReadBeforeWrite.ll b/final/test/Transforms/Mem2Reg/2005-06-30-ReadBeforeWrite.ll
new file mode 100644
index 00000000000..74355961fbf
--- /dev/null
+++ b/final/test/Transforms/Mem2Reg/2005-06-30-ReadBeforeWrite.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -mem2reg -instcombine -S | grep store
+; PR590
+
+
+define void @zero(i8* %p, i32 %n) {
+entry:
+	%p_addr = alloca i8*		; <i8**> [#uses=2]
+	%n_addr = alloca i32		; <i32*> [#uses=2]
+	%i = alloca i32		; <i32*> [#uses=6]
+	%out = alloca i32		; <i32*> [#uses=2]
+	%undef = alloca i32		; <i32*> [#uses=2]
+	store i8* %p, i8** %p_addr
+	store i32 %n, i32* %n_addr
+	store i32 0, i32* %i
+	br label %loopentry
+loopentry:		; preds = %endif, %entry
+	%tmp.0 = load i32* %n_addr		; <i32> [#uses=1]
+	%tmp.1 = add i32 %tmp.0, 1		; <i32> [#uses=1]
+	%tmp.2 = load i32* %i		; <i32> [#uses=1]
+	%tmp.3 = icmp sgt i32 %tmp.1, %tmp.2		; <i1> [#uses=2]
+	%tmp.4 = zext i1 %tmp.3 to i32		; <i32> [#uses=0]
+	br i1 %tmp.3, label %no_exit, label %return
+no_exit:		; preds = %loopentry
+	%tmp.5 = load i32* %undef		; <i32> [#uses=1]
+	store i32 %tmp.5, i32* %out
+	store i32 0, i32* %undef
+	%tmp.6 = load i32* %i		; <i32> [#uses=1]
+	%tmp.7 = icmp sgt i32 %tmp.6, 0		; <i1> [#uses=2]
+	%tmp.8 = zext i1 %tmp.7 to i32		; <i32> [#uses=0]
+	br i1 %tmp.7, label %then, label %endif
+then:		; preds = %no_exit
+	%tmp.9 = load i8** %p_addr		; <i8*> [#uses=1]
+	%tmp.10 = load i32* %i		; <i32> [#uses=1]
+	%tmp.11 = sub i32 %tmp.10, 1		; <i32> [#uses=1]
+	%tmp.12 = getelementptr i8* %tmp.9, i32 %tmp.11		; <i8*> [#uses=1]
+	%tmp.13 = load i32* %out		; <i32> [#uses=1]
+	%tmp.14 = trunc i32 %tmp.13 to i8		; <i8> [#uses=1]
+	store i8 %tmp.14, i8* %tmp.12
+	br label %endif
+endif:		; preds = %then, %no_exit
+	%tmp.15 = load i32* %i		; <i32> [#uses=1]
+	%inc = add i32 %tmp.15, 1		; <i32> [#uses=1]
+	store i32 %inc, i32* %i
+	br label %loopentry
+return:		; preds = %loopentry
+	ret void
+}
diff --git a/final/test/Transforms/Mem2Reg/2005-11-28-Crash.ll b/final/test/Transforms/Mem2Reg/2005-11-28-Crash.ll
new file mode 100644
index 00000000000..8fd3351ba42
--- /dev/null
+++ b/final/test/Transforms/Mem2Reg/2005-11-28-Crash.ll
@@ -0,0 +1,62 @@
+; RUN: opt < %s -mem2reg -disable-output
+; PR670
+
+define void @printk(i32, ...) {
+entry:
+	%flags = alloca i32		; <i32*> [#uses=2]
+	br i1 false, label %then.0, label %endif.0
+then.0:		; preds = %entry
+	br label %endif.0
+endif.0:		; preds = %then.0, %entry
+	store i32 0, i32* %flags
+	br label %loopentry
+loopentry:		; preds = %endif.3, %endif.0
+	br i1 false, label %no_exit, label %loopexit
+no_exit:		; preds = %loopentry
+	br i1 false, label %then.1, label %endif.1
+then.1:		; preds = %no_exit
+	br i1 false, label %shortcirc_done.0, label %shortcirc_next.0
+shortcirc_next.0:		; preds = %then.1
+	br label %shortcirc_done.0
+shortcirc_done.0:		; preds = %shortcirc_next.0, %then.1
+	br i1 false, label %shortcirc_done.1, label %shortcirc_next.1
+shortcirc_next.1:		; preds = %shortcirc_done.0
+	br label %shortcirc_done.1
+shortcirc_done.1:		; preds = %shortcirc_next.1, %shortcirc_done.0
+	br i1 false, label %shortcirc_done.2, label %shortcirc_next.2
+shortcirc_next.2:		; preds = %shortcirc_done.1
+	br label %shortcirc_done.2
+shortcirc_done.2:		; preds = %shortcirc_next.2, %shortcirc_done.1
+	br i1 false, label %then.2, label %endif.2
+then.2:		; preds = %shortcirc_done.2
+	br label %endif.2
+endif.2:		; preds = %then.2, %shortcirc_done.2
+	br label %endif.1
+endif.1:		; preds = %endif.2, %no_exit
+	br i1 false, label %then.3, label %endif.3
+then.3:		; preds = %endif.1
+	br label %endif.3
+endif.3:		; preds = %then.3, %endif.1
+	br label %loopentry
+loopexit:		; preds = %loopentry
+	br label %endif.4
+then.4:		; No predecessors!
+	%tmp.61 = load i32* %flags		; <i32> [#uses=0]
+	br label %out
+dead_block_after_goto:		; No predecessors!
+	br label %endif.4
+endif.4:		; preds = %dead_block_after_goto, %loopexit
+	br i1 false, label %then.5, label %else
+then.5:		; preds = %endif.4
+	br label %endif.5
+else:		; preds = %endif.4
+	br label %endif.5
+endif.5:		; preds = %else, %then.5
+	br label %out
+out:		; preds = %endif.5, %then.4
+	br label %return
+after_ret:		; No predecessors!
+	br label %return
+return:		; preds = %after_ret, %out
+	ret void
+}
diff --git a/final/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll b/final/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
new file mode 100644
index 00000000000..50683cf8baa
--- /dev/null
+++ b/final/test/Transforms/Mem2Reg/2007-08-27-VolatileLoadsStores.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -std-compile-opts -S | grep volatile | count 3
+; PR1520
+; Don't promote volatile loads/stores. This is really needed to handle setjmp/lonjmp properly.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.__jmp_buf_tag = type { [6 x i32], i32, %struct.__sigset_t }
+	%struct.__sigset_t = type { [32 x i32] }
+@j = external global [1 x %struct.__jmp_buf_tag]		; <[1 x %struct.__jmp_buf_tag]*> [#uses=1]
+
+define i32 @f() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=2]
+	%v = alloca i32, align 4		; <i32*> [#uses=3]
+	%tmp = alloca i32, align 4		; <i32*> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	volatile store i32 0, i32* %v, align 4
+	%tmp1 = call i32 @_setjmp( %struct.__jmp_buf_tag* getelementptr ([1 x %struct.__jmp_buf_tag]* @j, i32 0, i32 0) )		; <i32> [#uses=1]
+	%tmp2 = icmp ne i32 %tmp1, 0		; <i1> [#uses=1]
+	%tmp23 = zext i1 %tmp2 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp23, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %bb, label %bb5
+
+bb:		; preds = %entry
+	%tmp4 = volatile load i32* %v, align 4		; <i32> [#uses=1]
+	store i32 %tmp4, i32* %tmp, align 4
+	br label %bb6
+
+bb5:		; preds = %entry
+	volatile store i32 1, i32* %v, align 4
+	call void @g( )
+	store i32 0, i32* %tmp, align 4
+	br label %bb6
+
+bb6:		; preds = %bb5, %bb
+	%tmp7 = load i32* %tmp, align 4		; <i32> [#uses=1]
+	store i32 %tmp7, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %bb6
+	%retval8 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval8
+}
+
+declare i32 @_setjmp(%struct.__jmp_buf_tag*)
+
+declare void @g()
diff --git a/final/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/final/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
new file mode 100644
index 00000000000..2f1ccb493da
--- /dev/null
+++ b/final/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -mem2reg -S | FileCheck %s
+
+define double @testfunc(i32 %i, double %j) nounwind ssp {
+entry:
+  %i_addr = alloca i32                            ; <i32*> [#uses=2]
+  %j_addr = alloca double                         ; <double*> [#uses=2]
+  %retval = alloca double                         ; <double*> [#uses=2]
+  %0 = alloca double                              ; <double*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{i32* %i_addr}, metadata !0), !dbg !8
+; CHECK: call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !0)
+; CHECK: call void @llvm.dbg.value(metadata !{double %j}, i64 0, metadata !9)
+  store i32 %i, i32* %i_addr
+  call void @llvm.dbg.declare(metadata !{double* %j_addr}, metadata !9), !dbg !8
+  store double %j, double* %j_addr
+  %1 = load i32* %i_addr, align 4, !dbg !10       ; <i32> [#uses=1]
+  %2 = add nsw i32 %1, 1, !dbg !10                ; <i32> [#uses=1]
+  %3 = sitofp i32 %2 to double, !dbg !10          ; <double> [#uses=1]
+  %4 = load double* %j_addr, align 8, !dbg !10    ; <double> [#uses=1]
+  %5 = fadd double %3, %4, !dbg !10               ; <double> [#uses=1]
+  store double %5, double* %0, align 8, !dbg !10
+  %6 = load double* %0, align 8, !dbg !10         ; <double> [#uses=1]
+  store double %6, double* %retval, align 8, !dbg !10
+  br label %return, !dbg !10
+
+return:                                           ; preds = %entry
+  %retval1 = load double* %retval, !dbg !10       ; <double> [#uses=1]
+  ret double %retval1, !dbg !10
+}
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"i", metadata !2, i32 2, metadata !7} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"testfunc", metadata !"testfunc", metadata !"testfunc", metadata !2, i32 2, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"testfunc.c", metadata !"/tmp", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"testfunc.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{metadata !6, metadata !7, metadata !6}
+!6 = metadata !{i32 524324, metadata !2, metadata !"double", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 2, i32 0, metadata !1, null}
+!9 = metadata !{i32 524545, metadata !1, metadata !"j", metadata !2, i32 2, metadata !6} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 3, i32 0, metadata !11, null}
+!11 = metadata !{i32 524299, metadata !1, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+
diff --git a/final/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/final/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
new file mode 100644
index 00000000000..4cb621f61ca
--- /dev/null
+++ b/final/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
@@ -0,0 +1,52 @@
+; RUN: opt -mem2reg < %s | llvm-dis | grep ".dbg " | count 7
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare void @foo(i32, i64, i8*)
+
+define void @baz(i32 %a) nounwind ssp {
+entry:
+  %x_addr.i = alloca i32                          ; <i32*> [#uses=2]
+  %y_addr.i = alloca i64                          ; <i64*> [#uses=2]
+  %z_addr.i = alloca i8*                          ; <i8**> [#uses=2]
+  %a_addr = alloca i32                            ; <i32*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  call void @llvm.dbg.declare(metadata !{i32* %a_addr}, metadata !0), !dbg !7
+  store i32 %a, i32* %a_addr
+  %0 = load i32* %a_addr, align 4, !dbg !8        ; <i32> [#uses=1]
+  call void @llvm.dbg.declare(metadata !{i32* %x_addr.i}, metadata !9) nounwind, !dbg !15
+  store i32 %0, i32* %x_addr.i
+  call void @llvm.dbg.declare(metadata !{i64* %y_addr.i}, metadata !16) nounwind, !dbg !15
+  store i64 55, i64* %y_addr.i
+  call void @llvm.dbg.declare(metadata !{i8** %z_addr.i}, metadata !17) nounwind, !dbg !15
+  store i8* bitcast (void (i32)* @baz to i8*), i8** %z_addr.i
+  %1 = load i32* %x_addr.i, align 4, !dbg !18     ; <i32> [#uses=1]
+  %2 = load i64* %y_addr.i, align 8, !dbg !18     ; <i64> [#uses=1]
+  %3 = load i8** %z_addr.i, align 8, !dbg !18     ; <i8*> [#uses=1]
+  call void @foo(i32 %1, i64 %2, i8* %3) nounwind, !dbg !18
+  br label %return, !dbg !19
+
+return:                                           ; preds = %entry
+  ret void, !dbg !19
+}
+
+!0 = metadata !{i32 524545, metadata !1, metadata !"a", metadata !2, i32 8, metadata !6} ; [ DW_TAG_arg_variable ]
+!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
+!2 = metadata !{i32 524329, metadata !"bar.c", metadata !"/tmp/", metadata !3} ; [ DW_TAG_file_type ]
+!3 = metadata !{i32 524305, i32 0, i32 1, metadata !"bar.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!5 = metadata !{null, metadata !6}
+!6 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!7 = metadata !{i32 8, i32 0, metadata !1, null}
+!8 = metadata !{i32 9, i32 0, metadata !1, null}
+!9 = metadata !{i32 524545, metadata !10, metadata !"x", metadata !2, i32 4, metadata !6} ; [ DW_TAG_arg_variable ]
+!10 = metadata !{i32 524334, i32 0, metadata !2, metadata !"bar", metadata !"bar", metadata !"bar", metadata !2, i32 4, metadata !11, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 false} ; [ DW_TAG_subprogram ]
+!11 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!12 = metadata !{null, metadata !6, metadata !13, metadata !14}
+!13 = metadata !{i32 524324, metadata !2, metadata !"long int", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!14 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!15 = metadata !{i32 4, i32 0, metadata !10, metadata !8}
+!16 = metadata !{i32 524545, metadata !10, metadata !"y", metadata !2, i32 4, metadata !13} ; [ DW_TAG_arg_variable ]
+!17 = metadata !{i32 524545, metadata !10, metadata !"z", metadata !2, i32 4, metadata !14} ; [ DW_TAG_arg_variable ]
+!18 = metadata !{i32 5, i32 0, metadata !10, metadata !8}
+!19 = metadata !{i32 10, i32 0, metadata !1, null}
diff --git a/final/test/Transforms/Mem2Reg/PromoteMemToRegister.ll b/final/test/Transforms/Mem2Reg/PromoteMemToRegister.ll
new file mode 100644
index 00000000000..1be6b03beec
--- /dev/null
+++ b/final/test/Transforms/Mem2Reg/PromoteMemToRegister.ll
@@ -0,0 +1,18 @@
+; Simple sanity check testcase.  Both alloca's should be eliminated.
+; RUN: opt < %s -mem2reg -S | not grep alloca
+
+define double @testfunc(i32 %i, double %j) {
+	%I = alloca i32		; <i32*> [#uses=4]
+	%J = alloca double		; <double*> [#uses=2]
+	store i32 %i, i32* %I
+	store double %j, double* %J
+	%t1 = load i32* %I		; <i32> [#uses=1]
+	%t2 = add i32 %t1, 1		; <i32> [#uses=1]
+	store i32 %t2, i32* %I
+	%t3 = load i32* %I		; <i32> [#uses=1]
+	%t4 = sitofp i32 %t3 to double		; <double> [#uses=1]
+	%t5 = load double* %J		; <double> [#uses=1]
+	%t6 = fmul double %t4, %t5		; <double> [#uses=1]
+	ret double %t6
+}
+
diff --git a/final/test/Transforms/Mem2Reg/UndefValuesMerge.ll b/final/test/Transforms/Mem2Reg/UndefValuesMerge.ll
new file mode 100644
index 00000000000..5013229b77f
--- /dev/null
+++ b/final/test/Transforms/Mem2Reg/UndefValuesMerge.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -mem2reg -S | not grep phi
+
+define i32 @testfunc(i1 %C, i32 %i, i8 %j) {
+	%I = alloca i32		; <i32*> [#uses=2]
+	br i1 %C, label %T, label %Cont
+T:		; preds = %0
+	store i32 %i, i32* %I
+	br label %Cont
+Cont:		; preds = %T, %0
+	%Y = load i32* %I		; <i32> [#uses=1]
+	ret i32 %Y
+}
+
diff --git a/final/test/Transforms/Mem2Reg/crash.ll b/final/test/Transforms/Mem2Reg/crash.ll
new file mode 100644
index 00000000000..655549f7940
--- /dev/null
+++ b/final/test/Transforms/Mem2Reg/crash.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -mem2reg -S
+; PR5023
+
+declare i32 @test1f()
+
+define i32 @test1() {
+entry:
+  %whichFlag = alloca i32
+  %A = invoke i32 @test1f()
+          to label %invcont2 unwind label %lpad86
+
+invcont2:
+  store i32 %A, i32* %whichFlag
+  br label %bb15
+
+bb15:
+  %B = load i32* %whichFlag
+  ret i32 %B
+
+lpad86:
+  br label %bb15
+  
+}
+
+
+
+define i32 @test2() {
+entry:
+  %whichFlag = alloca i32
+  br label %bb15
+
+bb15:
+  %B = load i32* %whichFlag
+  ret i32 %B
+
+invcont2:
+  %C = load i32* %whichFlag
+  store i32 %C, i32* %whichFlag
+  br label %bb15
+}
+
diff --git a/final/test/Transforms/Mem2Reg/dg.exp b/final/test/Transforms/Mem2Reg/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/Mem2Reg/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/final/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
new file mode 100644
index 00000000000..9f1e2804670
--- /dev/null
+++ b/final/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -basicaa -memcpyopt -dse -S | grep {call.*initialize} | not grep memtmp
+; PR2077
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define internal fastcc void @initialize({ x86_fp80, x86_fp80 }* noalias sret  %agg.result) nounwind  {
+entry:
+	%agg.result.03 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 0		; <x86_fp80*> [#uses=1]
+	store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
+	%agg.result.15 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 1		; <x86_fp80*> [#uses=1]
+	store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
+	ret void
+}
+
+declare fastcc x86_fp80 @passed_uninitialized({ x86_fp80, x86_fp80 }* %x) nounwind
+
+define fastcc void @badly_optimized() nounwind  {
+entry:
+	%z = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+	%tmp = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+	%memtmp = alloca { x86_fp80, x86_fp80 }, align 8		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+	call fastcc void @initialize( { x86_fp80, x86_fp80 }* noalias sret  %memtmp )
+	%tmp1 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8*		; <i8*> [#uses=1]
+	%memtmp2 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %tmp1, i8* %memtmp2, i32 24, i32 8 )
+	%z3 = bitcast { x86_fp80, x86_fp80 }* %z to i8*		; <i8*> [#uses=1]
+	%tmp4 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %z3, i8* %tmp4, i32 24, i32 8 )
+	%tmp5 = call fastcc x86_fp80 @passed_uninitialized( { x86_fp80, x86_fp80 }* %z )		; <x86_fp80> [#uses=0]
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
diff --git a/final/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/final/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
new file mode 100644
index 00000000000..418761e9361
--- /dev/null
+++ b/final/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -basicaa -memcpyopt -S | not grep {call.*memcpy.}
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+	%a = type { i32 }
+	%b = type { float }
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+declare void @g(%a*)
+
+define float @f() {
+entry:
+	%a_var = alloca %a
+	%b_var = alloca %b
+	call void @g(%a *%a_var)
+	%a_i8 = bitcast %a* %a_var to i8*
+	%b_i8 = bitcast %b* %b_var to i8*
+	call void @llvm.memcpy.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4)
+	%tmp1 = getelementptr %b* %b_var, i32 0, i32 0
+	%tmp2 = load float* %tmp1
+	ret float %tmp2
+}
diff --git a/final/test/Transforms/MemCpyOpt/align.ll b/final/test/Transforms/MemCpyOpt/align.ll
new file mode 100644
index 00000000000..b1f900d9da4
--- /dev/null
+++ b/final/test/Transforms/MemCpyOpt/align.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -S -memcpyopt | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+; The resulting memset is only 4-byte aligned, despite containing
+; a 16-byte aligned store in the middle.
+
+; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i32 4, i1 false)
+
+define void @foo(i32* %p) {
+  %a0 = getelementptr i32* %p, i64 0
+  store i32 0, i32* %a0, align 4
+  %a1 = getelementptr i32* %p, i64 1
+  store i32 0, i32* %a1, align 16
+  %a2 = getelementptr i32* %p, i64 2
+  store i32 0, i32* %a2, align 4
+  %a3 = getelementptr i32* %p, i64 3
+  store i32 0, i32* %a3, align 4
+  ret void
+}
diff --git a/final/test/Transforms/MemCpyOpt/crash.ll b/final/test/Transforms/MemCpyOpt/crash.ll
new file mode 100644
index 00000000000..cc3a6b05278
--- /dev/null
+++ b/final/test/Transforms/MemCpyOpt/crash.ll
@@ -0,0 +1,58 @@
+; RUN: opt < %s -basicaa -memcpyopt -disable-output
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "armv7-eabi"
+
+%struct.qw = type { [4 x float] }
+%struct.bar = type { %struct.qw, %struct.qw, %struct.qw, %struct.qw, %struct.qw, float, float}
+
+; PR4882
+define void @test1(%struct.bar* %this) {
+entry:
+  %0 = getelementptr inbounds %struct.bar* %this, i32 0, i32 0, i32 0, i32 0
+  store float 0.000000e+00, float* %0, align 4
+  %1 = getelementptr inbounds %struct.bar* %this, i32 0, i32 0, i32 0, i32 1
+  store float 0.000000e+00, float* %1, align 4
+  %2 = getelementptr inbounds %struct.bar* %this, i32 0, i32 0, i32 0, i32 2
+  store float 0.000000e+00, float* %2, align 4
+  %3 = getelementptr inbounds %struct.bar* %this, i32 0, i32 0, i32 0, i32 3
+  store float 0.000000e+00, float* %3, align 4
+  %4 = getelementptr inbounds %struct.bar* %this, i32 0, i32 1, i32 0, i32 0
+  store float 0.000000e+00, float* %4, align 4
+  %5 = getelementptr inbounds %struct.bar* %this, i32 0, i32 1, i32 0, i32 1
+  store float 0.000000e+00, float* %5, align 4
+  %6 = getelementptr inbounds %struct.bar* %this, i32 0, i32 1, i32 0, i32 2
+  store float 0.000000e+00, float* %6, align 4
+  %7 = getelementptr inbounds %struct.bar* %this, i32 0, i32 1, i32 0, i32 3
+  store float 0.000000e+00, float* %7, align 4
+  %8 = getelementptr inbounds %struct.bar* %this, i32 0, i32 3, i32 0, i32 1
+  store float 0.000000e+00, float* %8, align 4
+  %9 = getelementptr inbounds %struct.bar* %this, i32 0, i32 3, i32 0, i32 2
+  store float 0.000000e+00, float* %9, align 4
+  %10 = getelementptr inbounds %struct.bar* %this, i32 0, i32 3, i32 0, i32 3
+  store float 0.000000e+00, float* %10, align 4
+  %11 = getelementptr inbounds %struct.bar* %this, i32 0, i32 4, i32 0, i32 0
+  store float 0.000000e+00, float* %11, align 4
+  %12 = getelementptr inbounds %struct.bar* %this, i32 0, i32 4, i32 0, i32 1
+  store float 0.000000e+00, float* %12, align 4
+  %13 = getelementptr inbounds %struct.bar* %this, i32 0, i32 4, i32 0, i32 2
+  store float 0.000000e+00, float* %13, align 4
+  %14 = getelementptr inbounds %struct.bar* %this, i32 0, i32 4, i32 0, i32 3
+  store float 0.000000e+00, float* %14, align 4
+  %15 = getelementptr inbounds %struct.bar* %this, i32 0, i32 5
+  store float 0.000000e+00, float* %15, align 4
+  unreachable
+}
+
+; PR8753
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
+i1) nounwind
+
+define void @test2(i32 %cmd) nounwind {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* undef, i8* undef, i64 20, i32 1, i1
+false) nounwind
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* undef, i64 20, i32 1, i1
+false) nounwind
+  ret void
+}
diff --git a/final/test/Transforms/MemCpyOpt/dg.exp b/final/test/Transforms/MemCpyOpt/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/MemCpyOpt/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/MemCpyOpt/form-memset.ll b/final/test/Transforms/MemCpyOpt/form-memset.ll
new file mode 100644
index 00000000000..1ac97e9e6b9
--- /dev/null
+++ b/final/test/Transforms/MemCpyOpt/form-memset.ll
@@ -0,0 +1,222 @@
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
+
+; All the stores in this example should be merged into a single memset.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @test1(i8 signext  %c) nounwind  {
+entry:
+	%x = alloca [19 x i8]		; <[19 x i8]*> [#uses=20]
+	%tmp = getelementptr [19 x i8]* %x, i32 0, i32 0		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp, align 1
+	%tmp5 = getelementptr [19 x i8]* %x, i32 0, i32 1		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp5, align 1
+	%tmp9 = getelementptr [19 x i8]* %x, i32 0, i32 2		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp9, align 1
+	%tmp13 = getelementptr [19 x i8]* %x, i32 0, i32 3		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp13, align 1
+	%tmp17 = getelementptr [19 x i8]* %x, i32 0, i32 4		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp17, align 1
+	%tmp21 = getelementptr [19 x i8]* %x, i32 0, i32 5		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp21, align 1
+	%tmp25 = getelementptr [19 x i8]* %x, i32 0, i32 6		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp25, align 1
+	%tmp29 = getelementptr [19 x i8]* %x, i32 0, i32 7		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp29, align 1
+	%tmp33 = getelementptr [19 x i8]* %x, i32 0, i32 8		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp33, align 1
+	%tmp37 = getelementptr [19 x i8]* %x, i32 0, i32 9		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp37, align 1
+	%tmp41 = getelementptr [19 x i8]* %x, i32 0, i32 10		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp41, align 1
+	%tmp45 = getelementptr [19 x i8]* %x, i32 0, i32 11		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp45, align 1
+	%tmp49 = getelementptr [19 x i8]* %x, i32 0, i32 12		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp49, align 1
+	%tmp53 = getelementptr [19 x i8]* %x, i32 0, i32 13		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp53, align 1
+	%tmp57 = getelementptr [19 x i8]* %x, i32 0, i32 14		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp57, align 1
+	%tmp61 = getelementptr [19 x i8]* %x, i32 0, i32 15		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp61, align 1
+	%tmp65 = getelementptr [19 x i8]* %x, i32 0, i32 16		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp65, align 1
+	%tmp69 = getelementptr [19 x i8]* %x, i32 0, i32 17		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp69, align 1
+	%tmp73 = getelementptr [19 x i8]* %x, i32 0, i32 18		; <i8*> [#uses=1]
+	store i8 %c, i8* %tmp73, align 1
+	%tmp76 = call i32 (...)* @bar( [19 x i8]* %x ) nounwind
+	ret void
+; CHECK: @test1
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64
+; CHECK-NOT: store
+; CHECK: ret
+}
+
+declare i32 @bar(...)
+
+
+	%struct.MV = type { i16, i16 }
+
+define void @test2() nounwind  {
+entry:
+	%ref_idx = alloca [8 x i8]		; <[8 x i8]*> [#uses=8]
+	%left_mvd = alloca [8 x %struct.MV]		; <[8 x %struct.MV]*> [#uses=17]
+	%up_mvd = alloca [8 x %struct.MV]		; <[8 x %struct.MV]*> [#uses=17]
+	%tmp20 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 7		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp20, align 1
+	%tmp23 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 6		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp23, align 1
+	%tmp26 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 5		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp26, align 1
+	%tmp29 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 4		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp29, align 1
+	%tmp32 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 3		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp32, align 1
+	%tmp35 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 2		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp35, align 1
+	%tmp38 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 1		; <i8*> [#uses=1]
+	store i8 -1, i8* %tmp38, align 1
+	%tmp41 = getelementptr [8 x i8]* %ref_idx, i32 0, i32 0		; <i8*> [#uses=2]
+	store i8 -1, i8* %tmp41, align 1
+	%tmp43 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp43, align 2
+	%tmp46 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 7, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp46, align 2
+	%tmp57 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp57, align 2
+	%tmp60 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 6, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp60, align 2
+	%tmp71 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp71, align 2
+	%tmp74 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 5, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp74, align 2
+	%tmp85 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp85, align 2
+	%tmp88 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 4, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp88, align 2
+	%tmp99 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp99, align 2
+	%tmp102 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 3, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp102, align 2
+	%tmp113 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp113, align 2
+	%tmp116 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 2, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp116, align 2
+	%tmp127 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp127, align 2
+	%tmp130 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 1, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp130, align 2
+	%tmp141 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp141, align 8
+	%tmp144 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 0, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp144, align 2
+	%tmp148 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp148, align 2
+	%tmp151 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 7, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp151, align 2
+	%tmp162 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp162, align 2
+	%tmp165 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 6, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp165, align 2
+	%tmp176 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp176, align 2
+	%tmp179 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 5, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp179, align 2
+	%tmp190 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp190, align 2
+	%tmp193 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 4, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp193, align 2
+	%tmp204 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp204, align 2
+	%tmp207 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 3, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp207, align 2
+	%tmp218 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp218, align 2
+	%tmp221 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 2, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp221, align 2
+	%tmp232 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp232, align 2
+	%tmp235 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 1, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp235, align 2
+	%tmp246 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 0		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp246, align 8
+	%tmp249 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 0, i32 1		; <i16*> [#uses=1]
+	store i16 0, i16* %tmp249, align 2
+	%up_mvd252 = getelementptr [8 x %struct.MV]* %up_mvd, i32 0, i32 0		; <%struct.MV*> [#uses=1]
+	%left_mvd253 = getelementptr [8 x %struct.MV]* %left_mvd, i32 0, i32 0		; <%struct.MV*> [#uses=1]
+	call void @foo( %struct.MV* %up_mvd252, %struct.MV* %left_mvd253, i8* %tmp41 ) nounwind 
+	ret void
+        
+; CHECK: @test2
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %tmp41, i8 -1, i64 8, i32 1, i1 false)
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 32, i32 8, i1 false)
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 32, i32 8, i1 false)
+; CHECK-NOT: store
+; CHECK: ret
+}
+
+declare void @foo(%struct.MV*, %struct.MV*, i8*)
+
+
+; Store followed by memset.
+define void @test3(i32* nocapture %P) nounwind ssp {
+entry:
+  %arrayidx = getelementptr inbounds i32* %P, i64 1
+  store i32 0, i32* %arrayidx, align 4
+  %add.ptr = getelementptr inbounds i32* %P, i64 2
+  %0 = bitcast i32* %add.ptr to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
+  ret void
+; CHECK: @test3
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
+}
+
+; store followed by memset, different offset scenario
+define void @test4(i32* nocapture %P) nounwind ssp {
+entry:
+  store i32 0, i32* %P, align 4
+  %add.ptr = getelementptr inbounds i32* %P, i64 1
+  %0 = bitcast i32* %add.ptr to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
+  ret void
+; CHECK: @test4
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+; Memset followed by store.
+define void @test5(i32* nocapture %P) nounwind ssp {
+entry:
+  %add.ptr = getelementptr inbounds i32* %P, i64 2
+  %0 = bitcast i32* %add.ptr to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 11, i32 1, i1 false)
+  %arrayidx = getelementptr inbounds i32* %P, i64 1
+  store i32 0, i32* %arrayidx, align 4
+  ret void
+; CHECK: @test5
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 15, i32 4, i1 false)
+}
+
+;; Memset followed by memset.
+define void @test6(i32* nocapture %P) nounwind ssp {
+entry:
+  %0 = bitcast i32* %P to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 12, i32 1, i1 false)
+  %add.ptr = getelementptr inbounds i32* %P, i64 3
+  %1 = bitcast i32* %add.ptr to i8*
+  tail call void @llvm.memset.p0i8.i64(i8* %1, i8 0, i64 12, i32 1, i1 false)
+  ret void
+; CHECK: @test6
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 24, i32 1, i1 false)
+}
+
diff --git a/final/test/Transforms/MemCpyOpt/loadstore-sret.ll b/final/test/Transforms/MemCpyOpt/loadstore-sret.ll
new file mode 100644
index 00000000000..67e7137e7e4
--- /dev/null
+++ b/final/test/Transforms/MemCpyOpt/loadstore-sret.ll
@@ -0,0 +1,25 @@
+; RUN: opt -S < %s -basicaa -memcpyopt | FileCheck %s
+; <rdar://problem/8536696>
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%"class.std::auto_ptr" = type { i32* }
+
+; CHECK: @_Z3foov
+define void @_Z3foov(%"class.std::auto_ptr"* noalias nocapture sret %agg.result) ssp {
+_ZNSt8auto_ptrIiED1Ev.exit:
+  %temp.lvalue = alloca %"class.std::auto_ptr", align 8
+; CHECK: call void @_Z3barv(%"class.std::auto_ptr"* sret %agg.result)
+  call void @_Z3barv(%"class.std::auto_ptr"* sret %temp.lvalue)
+  %tmp.i.i = getelementptr inbounds %"class.std::auto_ptr"* %temp.lvalue, i64 0, i32 0
+; CHECK-NOT: load
+  %tmp2.i.i = load i32** %tmp.i.i, align 8
+  %tmp.i.i4 = getelementptr inbounds %"class.std::auto_ptr"* %agg.result, i64 0, i32 0
+; CHECK-NOT: store
+  store i32* %tmp2.i.i, i32** %tmp.i.i4, align 8
+; CHECK: ret void
+  ret void
+}
+
+declare void @_Z3barv(%"class.std::auto_ptr"* sret)
diff --git a/final/test/Transforms/MemCpyOpt/memcpy-to-memset.ll b/final/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
new file mode 100644
index 00000000000..b18d176f003
--- /dev/null
+++ b/final/test/Transforms/MemCpyOpt/memcpy-to-memset.ll
@@ -0,0 +1,19 @@
+; RUN: opt -memcpyopt -S < %s | FileCheck %s
+
+@cst = internal constant [3 x i32] [i32 -1, i32 -1, i32 -1], align 4
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @foo(i32*) nounwind
+
+define void @test1() nounwind {
+  %arr = alloca [3 x i32], align 4
+  %arr_i8 = bitcast [3 x i32]* %arr to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arr_i8, i8* bitcast ([3 x i32]* @cst to i8*), i64 12, i32 4, i1 false)
+  %arraydecay = getelementptr inbounds [3 x i32]* %arr, i64 0, i64 0
+  call void @foo(i32* %arraydecay) nounwind
+  ret void
+; CHECK: @test1
+; CHECK: call void @llvm.memset
+; CHECK-NOT: call void @llvm.memcpy
+; CHECK: ret void
+}
diff --git a/final/test/Transforms/MemCpyOpt/memcpy.ll b/final/test/Transforms/MemCpyOpt/memcpy.ll
new file mode 100644
index 00000000000..b387d32a7d5
--- /dev/null
+++ b/final/test/Transforms/MemCpyOpt/memcpy.ll
@@ -0,0 +1,111 @@
+; RUN: opt < %s -basicaa -memcpyopt -dse -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9"
+
+define void @test1({ x86_fp80, x86_fp80 }* sret  %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind  {
+entry:
+	%tmp2 = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=1]
+	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+	%tmp5 = fsub x86_fp80 0xK80000000000000000000, %z.1		; <x86_fp80> [#uses=1]
+	call void @ccoshl( { x86_fp80, x86_fp80 }* sret  %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0 ) nounwind 
+	%tmp219 = bitcast { x86_fp80, x86_fp80 }* %tmp2 to i8*		; <i8*> [#uses=2]
+	%memtmp20 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %tmp219, i8* %memtmp20, i32 32, i32 16 )
+	%agg.result21 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %agg.result21, i8* %tmp219, i32 32, i32 16 )
+
+; Check that one of the memcpy's are removed.
+;; FIXME: PR 8643 We should be able to eliminate the last memcpy here.
+
+; CHECK: @test1
+; CHECK: call void @ccoshl
+; CHECK: call void @llvm.memcpy
+; CHECK-NOT: llvm.memcpy
+; CHECK: ret void
+	ret void
+}
+
+declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind 
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+
+
+; The intermediate alloca and one of the memcpy's should be eliminated, the
+; other should be related with a memmove.
+define void @test2(i8* %P, i8* %Q) nounwind  {
+	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16
+	%R = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*
+	call void @llvm.memcpy.i32( i8* %R, i8* %P, i32 32, i32 16 )
+	call void @llvm.memcpy.i32( i8* %Q, i8* %R, i32 32, i32 16 )
+        ret void
+        
+; CHECK: @test2
+; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* %Q, i8* %P
+; CHECK-NEXT: ret void
+}
+
+
+
+
+@x = external global { x86_fp80, x86_fp80 }
+
+define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind  {
+	%x.0 = alloca { x86_fp80, x86_fp80 }
+	%x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8*
+	call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 )
+	%agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*
+	call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 )
+	ret void
+; CHECK: @test3
+; CHECK-NEXT: %agg.result2 = bitcast 
+; CHECK-NEXT: call void @llvm.memcpy
+; CHECK-NEXT: ret void
+}
+
+
+; PR8644
+define void @test4(i8 *%P) {
+  %A = alloca {i32, i32}
+  %a = bitcast {i32, i32}* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false)
+  call void @test4a(i8* byval align 1 %a) 
+  ret void
+; CHECK: @test4
+; CHECK-NEXT: call void @test4a(
+}
+
+declare void @test4a(i8* byval align 1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+%struct.S = type { i128, [4 x i8]}
+
+@sS = external global %struct.S, align 16
+
+declare void @test5a(%struct.S* byval align 16) nounwind ssp
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+; rdar://8713376 - This memcpy can't be eliminated.
+define i32 @test5(i32 %x) nounwind ssp {
+entry:
+  %y = alloca %struct.S, align 16
+  %tmp = bitcast %struct.S* %y to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast (%struct.S* @sS to i8*), i64 32, i32 16, i1 false)
+  %a = getelementptr %struct.S* %y, i64 0, i32 1, i64 0
+  store i8 4, i8* %a
+  call void @test5a(%struct.S* byval align 16 %y)
+  ret i32 0
+  ; CHECK: @test5(
+  ; CHECK: store i8 4
+  ; CHECK: call void @test5a(%struct.S* byval align 16 %y)
+}
+
+;; Noop memcpy should be zapped.
+define void @test6(i8 *%P) {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %P, i64 8, i32 4, i1 false)
+  ret void
+; CHECK: @test6
+; CHECK-NEXT: ret void
+}
+
diff --git a/final/test/Transforms/MemCpyOpt/memmove.ll b/final/test/Transforms/MemCpyOpt/memmove.ll
new file mode 100644
index 00000000000..8babb04e4b3
--- /dev/null
+++ b/final/test/Transforms/MemCpyOpt/memmove.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -basicaa -memcpyopt -S | FileCheck %s
+; These memmoves should get optimized to memcpys.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9.0"
+
+declare void @llvm.memmove.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+
+define i8* @test1(i8* nocapture %src) nounwind {
+entry:
+; CHECK: @test1
+; CHECK: call void @llvm.memcpy
+
+  %call3 = malloc [13 x i8]                       ; <[13 x i8]*> [#uses=1]
+  %call3.sub = getelementptr inbounds [13 x i8]* %call3, i64 0, i64 0 ; <i8*> [#uses=2]
+  tail call void @llvm.memmove.i64(i8* %call3.sub, i8* %src, i64 13, i32 1)
+  ret i8* %call3.sub
+}
+
+define void @test2(i8* %P) nounwind {
+entry:
+; CHECK: @test2
+; CHECK: call void @llvm.memcpy
+  %add.ptr = getelementptr i8* %P, i64 16         ; <i8*> [#uses=1]
+  tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 16, i32 1)
+  ret void
+}
+
+; This cannot be optimize because the src/dst really do overlap.
+define void @test3(i8* %P) nounwind {
+entry:
+; CHECK: @test3
+; CHECK: call void @llvm.memmove
+  %add.ptr = getelementptr i8* %P, i64 16         ; <i8*> [#uses=1]
+  tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 17, i32 1)
+  ret void
+}
diff --git a/final/test/Transforms/MemCpyOpt/smaller.ll b/final/test/Transforms/MemCpyOpt/smaller.ll
new file mode 100644
index 00000000000..1d35582d993
--- /dev/null
+++ b/final/test/Transforms/MemCpyOpt/smaller.ll
@@ -0,0 +1,28 @@
+; RUN: opt -memcpyopt -S < %s | FileCheck %s
+; rdar://8875553
+
+; Memcpyopt shouldn't optimize the second memcpy using the first
+; because the first has a smaller size.
+
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i32 4, i1 false)
+
+target datalayout = "e-p:32:32:32"
+
+%struct.s = type { [11 x i8], i32 }
+
+@.str = private constant [11 x i8] c"0123456789\00"
+@cell = external global %struct.s
+
+declare void @check(%struct.s* byval %p) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define void @foo() nounwind {
+entry:
+  %agg.tmp = alloca %struct.s, align 4
+  store i32 99, i32* getelementptr inbounds (%struct.s* @cell, i32 0, i32 1), align 4
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.s* @cell, i32 0, i32 0, i32 0), i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i32 11, i32 1, i1 false)
+  %tmp = getelementptr inbounds %struct.s* %agg.tmp, i32 0, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* getelementptr inbounds (%struct.s* @cell, i32 0, i32 0, i32 0), i32 16, i32 4, i1 false)
+  call void @check(%struct.s* byval %agg.tmp)
+  ret void
+}
diff --git a/final/test/Transforms/MemCpyOpt/sret.ll b/final/test/Transforms/MemCpyOpt/sret.ll
new file mode 100644
index 00000000000..ddfd0fd1fcc
--- /dev/null
+++ b/final/test/Transforms/MemCpyOpt/sret.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -basicaa -memcpyopt -S | not grep {call.*memcpy}
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin9"
+
+define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval  align 8 %z) nounwind  {
+entry:
+	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
+	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
+	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
+	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
+	%tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
+	%tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1		; <x86_fp80*> [#uses=1]
+	%real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0		; <x86_fp80*> [#uses=1]
+	%tmp7 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0		; <x86_fp80*> [#uses=1]
+	%tmp8 = load x86_fp80* %tmp7, align 16		; <x86_fp80> [#uses=1]
+	store x86_fp80 %tmp3, x86_fp80* %real, align 16
+	store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
+	call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %memtmp, { x86_fp80, x86_fp80 }* byval align 8 %iz ) nounwind 
+	%memtmp14 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
+	%agg.result15 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %agg.result15, i8* %memtmp14, i32 32, i32 16 )
+	ret void
+}
+
+declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval ) nounwind 
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
diff --git a/final/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll b/final/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
new file mode 100644
index 00000000000..201903e9954
--- /dev/null
+++ b/final/test/Transforms/MergeFunc/2011-02-08-RemoveEqual.ll
@@ -0,0 +1,276 @@
+; RUN: opt -mergefunc %s -disable-output
+; This used to crash.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+%"struct.kc::impl_Ccode_option" = type { %"struct.kc::impl_abstract_phylum" }
+%"struct.kc::impl_CexpressionDQ" = type { %"struct.kc::impl_Ccode_option", %"struct.kc::impl_Ccode_option"*, %"struct.kc::impl_CexpressionDQ"* }
+%"struct.kc::impl_Ctext" = type { %"struct.kc::impl_Ccode_option", i32, %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_Ctext_elem"*, %"struct.kc::impl_Ctext"* }
+%"struct.kc::impl_Ctext_elem" = type { %"struct.kc::impl_abstract_phylum", i32, %"struct.kc::impl_casestring__Str"* }
+%"struct.kc::impl_ID" = type { %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_Ccode_option"*, %"struct.kc::impl_casestring__Str"*, i32, %"struct.kc::impl_casestring__Str"* }
+%"struct.kc::impl_abstract_phylum" = type { i32 (...)** }
+%"struct.kc::impl_ac_abstract_declarator_AcAbsdeclDirdecl" = type { %"struct.kc::impl_Ccode_option", %"struct.kc::impl_Ccode_option"*, %"struct.kc::impl_Ccode_option"* }
+%"struct.kc::impl_casestring__Str" = type { %"struct.kc::impl_abstract_phylum", i8* }
+%"struct.kc::impl_elem_patternrepresentation" = type { %"struct.kc::impl_abstract_phylum", i32, %"struct.kc::impl_casestring__Str"*, %"struct.kc::impl_ID"* }
+%"struct.kc::impl_fileline" = type { %"struct.kc::impl_abstract_phylum", %"struct.kc::impl_casestring__Str"*, i32 }
+%"struct.kc::impl_fileline_FileLine" = type { %"struct.kc::impl_fileline" }
+%"struct.kc::impl_outmostpatterns" = type { %"struct.kc::impl_Ccode_option", %"struct.kc::impl_elem_patternrepresentation"*, %"struct.kc::impl_outmostpatterns"* }
+%"struct.kc::impl_withcaseinfo_Withcaseinfo" = type { %"struct.kc::impl_Ccode_option", %"struct.kc::impl_outmostpatterns"*, %"struct.kc::impl_outmostpatterns"*, %"struct.kc::impl_Ctext"* }
+
+@_ZTVN2kc13impl_filelineE = external constant [13 x i32 (...)*], align 32
+@.str = external constant [1 x i8], align 1
+@_ZTVN2kc22impl_fileline_FileLineE = external constant [13 x i32 (...)*], align 32
+
+define void @_ZN2kc22impl_fileline_FileLineC2EPNS_20impl_casestring__StrEi(%"struct.kc::impl_fileline_FileLine"* %this, %"struct.kc::impl_casestring__Str"* %_file, i32 %_line) align 2 {
+entry:
+  %this_addr = alloca %"struct.kc::impl_fileline_FileLine"*, align 4
+  %_file_addr = alloca %"struct.kc::impl_casestring__Str"*, align 4
+  %_line_addr = alloca i32, align 4
+  %save_filt.150 = alloca i32
+  %save_eptr.149 = alloca i8*
+  %iftmp.99 = alloca %"struct.kc::impl_casestring__Str"*
+  %eh_exception = alloca i8*
+  %eh_selector = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  store %"struct.kc::impl_fileline_FileLine"* %this, %"struct.kc::impl_fileline_FileLine"** %this_addr
+  store %"struct.kc::impl_casestring__Str"* %_file, %"struct.kc::impl_casestring__Str"** %_file_addr
+  store i32 %_line, i32* %_line_addr
+  %0 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %1 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine"* %0, i32 0, i32 0
+  call void @_ZN2kc13impl_filelineC2Ev() nounwind
+  %2 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %3 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine"* %2, i32 0, i32 0
+  %4 = getelementptr inbounds %"struct.kc::impl_fileline"* %3, i32 0, i32 0
+  %5 = getelementptr inbounds %"struct.kc::impl_abstract_phylum"* %4, i32 0, i32 0
+  store i32 (...)** getelementptr inbounds ([13 x i32 (...)*]* @_ZTVN2kc22impl_fileline_FileLineE, i32 0, i32 2), i32 (...)*** %5, align 4
+  %6 = load %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
+  %7 = icmp eq %"struct.kc::impl_casestring__Str"* %6, null
+  br i1 %7, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %8 = invoke %"struct.kc::impl_casestring__Str"* @_ZN2kc12mkcasestringEPKci()
+          to label %invcont unwind label %lpad
+
+invcont:                                          ; preds = %bb
+  store %"struct.kc::impl_casestring__Str"* %8, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  br label %bb2
+
+bb1:                                              ; preds = %entry
+  %9 = load %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
+  store %"struct.kc::impl_casestring__Str"* %9, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %invcont
+  %10 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %11 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine"* %10, i32 0, i32 0
+  %12 = getelementptr inbounds %"struct.kc::impl_fileline"* %11, i32 0, i32 1
+  %13 = load %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  store %"struct.kc::impl_casestring__Str"* %13, %"struct.kc::impl_casestring__Str"** %12, align 4
+  %14 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %15 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine"* %14, i32 0, i32 0
+  %16 = getelementptr inbounds %"struct.kc::impl_fileline"* %15, i32 0, i32 2
+  %17 = load i32* %_line_addr, align 4
+  store i32 %17, i32* %16, align 4
+  ret void
+
+lpad:                                             ; preds = %bb
+  %eh_ptr = call i8* @llvm.eh.exception()
+  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr4 = load i8** %eh_exception
+  %eh_select5 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr4, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0)
+  store i32 %eh_select5, i32* %eh_selector
+  %eh_select = load i32* %eh_selector
+  store i32 %eh_select, i32* %save_filt.150, align 4
+  %eh_value = load i8** %eh_exception
+  store i8* %eh_value, i8** %save_eptr.149, align 4
+  %18 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %19 = bitcast %"struct.kc::impl_fileline_FileLine"* %18 to %"struct.kc::impl_fileline"*
+  call void @_ZN2kc13impl_filelineD2Ev(%"struct.kc::impl_fileline"* %19) nounwind
+  %20 = load i8** %save_eptr.149, align 4
+  store i8* %20, i8** %eh_exception, align 4
+  %21 = load i32* %save_filt.150, align 4
+  store i32 %21, i32* %eh_selector, align 4
+  %eh_ptr6 = load i8** %eh_exception
+  call void @_Unwind_Resume_or_Rethrow()
+  unreachable
+}
+
+declare void @_ZN2kc13impl_filelineC2Ev() nounwind align 2
+
+define void @_ZN2kc13impl_filelineD1Ev(%"struct.kc::impl_fileline"* %this) nounwind align 2 {
+entry:
+  %this_addr = alloca %"struct.kc::impl_fileline"*, align 4
+  %"alloca point" = bitcast i32 0 to i32
+  store %"struct.kc::impl_fileline"* %this, %"struct.kc::impl_fileline"** %this_addr
+  %0 = load %"struct.kc::impl_fileline"** %this_addr, align 4
+  %1 = getelementptr inbounds %"struct.kc::impl_fileline"* %0, i32 0, i32 0
+  %2 = getelementptr inbounds %"struct.kc::impl_abstract_phylum"* %1, i32 0, i32 0
+  store i32 (...)** getelementptr inbounds ([13 x i32 (...)*]* @_ZTVN2kc13impl_filelineE, i32 0, i32 2), i32 (...)*** %2, align 4
+  %3 = trunc i32 0 to i8
+  %toBool = icmp ne i8 %3, 0
+  br i1 %toBool, label %bb1, label %return
+
+bb1:                                              ; preds = %entry
+  %4 = load %"struct.kc::impl_fileline"** %this_addr, align 4
+  %5 = bitcast %"struct.kc::impl_fileline"* %4 to i8*
+  call void @_ZdlPv() nounwind
+  br label %return
+
+return:                                           ; preds = %bb1, %entry
+  ret void
+}
+
+declare void @_ZdlPv() nounwind
+
+define void @_ZN2kc13impl_filelineD2Ev(%"struct.kc::impl_fileline"* %this) nounwind align 2 {
+entry:
+  %this_addr = alloca %"struct.kc::impl_fileline"*, align 4
+  %"alloca point" = bitcast i32 0 to i32
+  store %"struct.kc::impl_fileline"* %this, %"struct.kc::impl_fileline"** %this_addr
+  %0 = load %"struct.kc::impl_fileline"** %this_addr, align 4
+  %1 = getelementptr inbounds %"struct.kc::impl_fileline"* %0, i32 0, i32 0
+  %2 = getelementptr inbounds %"struct.kc::impl_abstract_phylum"* %1, i32 0, i32 0
+  store i32 (...)** getelementptr inbounds ([13 x i32 (...)*]* @_ZTVN2kc13impl_filelineE, i32 0, i32 2), i32 (...)*** %2, align 4
+  %3 = trunc i32 0 to i8
+  %toBool = icmp ne i8 %3, 0
+  br i1 %toBool, label %bb1, label %return
+
+bb1:                                              ; preds = %entry
+  %4 = load %"struct.kc::impl_fileline"** %this_addr, align 4
+  %5 = bitcast %"struct.kc::impl_fileline"* %4 to i8*
+  call void @_ZdlPv() nounwind
+  br label %return
+
+return:                                           ; preds = %bb1, %entry
+  ret void
+}
+
+define void @_ZN2kc22impl_fileline_FileLineC1EPNS_20impl_casestring__StrEi(%"struct.kc::impl_fileline_FileLine"* %this, %"struct.kc::impl_casestring__Str"* %_file, i32 %_line) align 2 {
+entry:
+  %this_addr = alloca %"struct.kc::impl_fileline_FileLine"*, align 4
+  %_file_addr = alloca %"struct.kc::impl_casestring__Str"*, align 4
+  %_line_addr = alloca i32, align 4
+  %save_filt.148 = alloca i32
+  %save_eptr.147 = alloca i8*
+  %iftmp.99 = alloca %"struct.kc::impl_casestring__Str"*
+  %eh_exception = alloca i8*
+  %eh_selector = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  store %"struct.kc::impl_fileline_FileLine"* %this, %"struct.kc::impl_fileline_FileLine"** %this_addr
+  store %"struct.kc::impl_casestring__Str"* %_file, %"struct.kc::impl_casestring__Str"** %_file_addr
+  store i32 %_line, i32* %_line_addr
+  %0 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %1 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine"* %0, i32 0, i32 0
+  call void @_ZN2kc13impl_filelineC2Ev() nounwind
+  %2 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %3 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine"* %2, i32 0, i32 0
+  %4 = getelementptr inbounds %"struct.kc::impl_fileline"* %3, i32 0, i32 0
+  %5 = getelementptr inbounds %"struct.kc::impl_abstract_phylum"* %4, i32 0, i32 0
+  store i32 (...)** getelementptr inbounds ([13 x i32 (...)*]* @_ZTVN2kc22impl_fileline_FileLineE, i32 0, i32 2), i32 (...)*** %5, align 4
+  %6 = load %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
+  %7 = icmp eq %"struct.kc::impl_casestring__Str"* %6, null
+  br i1 %7, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  %8 = invoke %"struct.kc::impl_casestring__Str"* @_ZN2kc12mkcasestringEPKci()
+          to label %invcont unwind label %lpad
+
+invcont:                                          ; preds = %bb
+  store %"struct.kc::impl_casestring__Str"* %8, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  br label %bb2
+
+bb1:                                              ; preds = %entry
+  %9 = load %"struct.kc::impl_casestring__Str"** %_file_addr, align 4
+  store %"struct.kc::impl_casestring__Str"* %9, %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %invcont
+  %10 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %11 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine"* %10, i32 0, i32 0
+  %12 = getelementptr inbounds %"struct.kc::impl_fileline"* %11, i32 0, i32 1
+  %13 = load %"struct.kc::impl_casestring__Str"** %iftmp.99, align 4
+  store %"struct.kc::impl_casestring__Str"* %13, %"struct.kc::impl_casestring__Str"** %12, align 4
+  %14 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %15 = getelementptr inbounds %"struct.kc::impl_fileline_FileLine"* %14, i32 0, i32 0
+  %16 = getelementptr inbounds %"struct.kc::impl_fileline"* %15, i32 0, i32 2
+  %17 = load i32* %_line_addr, align 4
+  store i32 %17, i32* %16, align 4
+  ret void
+
+lpad:                                             ; preds = %bb
+  %eh_ptr = call i8* @llvm.eh.exception()
+  store i8* %eh_ptr, i8** %eh_exception
+  %eh_ptr4 = load i8** %eh_exception
+  %eh_select5 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr4, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i32 0)
+  store i32 %eh_select5, i32* %eh_selector
+  %eh_select = load i32* %eh_selector
+  store i32 %eh_select, i32* %save_filt.148, align 4
+  %eh_value = load i8** %eh_exception
+  store i8* %eh_value, i8** %save_eptr.147, align 4
+  %18 = load %"struct.kc::impl_fileline_FileLine"** %this_addr, align 4
+  %19 = bitcast %"struct.kc::impl_fileline_FileLine"* %18 to %"struct.kc::impl_fileline"*
+  call void @_ZN2kc13impl_filelineD2Ev(%"struct.kc::impl_fileline"* %19) nounwind
+  %20 = load i8** %save_eptr.147, align 4
+  store i8* %20, i8** %eh_exception, align 4
+  %21 = load i32* %save_filt.148, align 4
+  store i32 %21, i32* %eh_selector, align 4
+  %eh_ptr6 = load i8** %eh_exception
+  call void @_Unwind_Resume_or_Rethrow()
+  unreachable
+}
+
+declare i8* @llvm.eh.exception() nounwind readonly
+
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_Unwind_Resume_or_Rethrow()
+
+define void @_ZN2kc21printer_functor_classC2Ev(%"struct.kc::impl_abstract_phylum"* %this) nounwind align 2 {
+entry:
+  unreachable
+}
+
+define %"struct.kc::impl_Ccode_option"* @_ZN2kc11phylum_castIPNS_17impl_withcaseinfoES1_EET_PT0_(%"struct.kc::impl_Ccode_option"* %t) nounwind {
+entry:
+  ret %"struct.kc::impl_Ccode_option"* null
+}
+
+define %"struct.kc::impl_abstract_phylum"* @_ZNK2kc43impl_ac_direct_declarator_AcDirectDeclProto9subphylumEi(%"struct.kc::impl_ac_abstract_declarator_AcAbsdeclDirdecl"* %this, i32 %no) nounwind align 2 {
+entry:
+  ret %"struct.kc::impl_abstract_phylum"* undef
+}
+
+define void @_ZN2kc30impl_withcaseinfo_WithcaseinfoD0Ev(%"struct.kc::impl_withcaseinfo_Withcaseinfo"* %this) nounwind align 2 {
+entry:
+  unreachable
+}
+
+define void @_ZN2kc30impl_withcaseinfo_WithcaseinfoC1EPNS_26impl_patternrepresentationES2_PNS_10impl_CtextE(%"struct.kc::impl_withcaseinfo_Withcaseinfo"* %this, %"struct.kc::impl_outmostpatterns"* %_patternrepresentation_1, %"struct.kc::impl_outmostpatterns"* %_patternrepresentation_2, %"struct.kc::impl_Ctext"* %_Ctext_1) nounwind align 2 {
+entry:
+  unreachable
+}
+
+define void @_ZN2kc21impl_rewriteviewsinfoC2EPNS_20impl_rewriteviewinfoEPS0_(%"struct.kc::impl_CexpressionDQ"* %this, %"struct.kc::impl_Ccode_option"* %p1, %"struct.kc::impl_CexpressionDQ"* %p2) nounwind align 2 {
+entry:
+  unreachable
+}
+
+define %"struct.kc::impl_Ctext_elem"* @_ZN2kc11phylum_castIPNS_9impl_termENS_20impl_abstract_phylumEEET_PT0_(%"struct.kc::impl_abstract_phylum"* %t) nounwind {
+entry:
+  unreachable
+}
+
+define void @_ZN2kc27impl_ac_parameter_type_listD2Ev(%"struct.kc::impl_Ccode_option"* %this) nounwind align 2 {
+entry:
+  ret void
+}
+
+define void @_ZN2kc21impl_ac_operator_nameD2Ev(%"struct.kc::impl_Ctext_elem"* %this) nounwind align 2 {
+entry:
+  ret void
+}
+
+declare %"struct.kc::impl_casestring__Str"* @_ZN2kc12mkcasestringEPKci()
diff --git a/final/test/Transforms/MergeFunc/dg.exp b/final/test/Transforms/MergeFunc/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/MergeFunc/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/MergeFunc/fold-weak.ll b/final/test/Transforms/MergeFunc/fold-weak.ll
new file mode 100644
index 00000000000..23e4d33c3a9
--- /dev/null
+++ b/final/test/Transforms/MergeFunc/fold-weak.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -mergefunc -S > %t
+; RUN: grep {define weak} %t | count 2
+; RUN: grep {call} %t | count 2
+; XFAIL: *
+
+; This test is off for a bit as we change this particular sort of folding to
+; only apply on ELF systems and not Mach-O systems.
+
+define weak i32 @sum(i32 %x, i32 %y) {
+  %sum = add i32 %x, %y
+  ret i32 %sum
+}
+
+define weak i32 @add(i32 %x, i32 %y) {
+  %sum = add i32 %x, %y
+  ret i32 %sum
+}
diff --git a/final/test/Transforms/MergeFunc/phi-speculation1.ll b/final/test/Transforms/MergeFunc/phi-speculation1.ll
new file mode 100644
index 00000000000..7b2a2fe5d52
--- /dev/null
+++ b/final/test/Transforms/MergeFunc/phi-speculation1.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -mergefunc -stats -disable-output |& not grep {functions merged}
+
+define i32 @foo1(i32 %x) {
+entry:
+  %A = add i32 %x, 1
+  %B = call i32 @foo1(i32 %A)
+  br label %loop
+loop:
+  %C = phi i32 [%B, %entry], [%D, %loop]
+  %D = add i32 %x, 2
+  %E = icmp ugt i32 %D, 10000
+  br i1 %E, label %loopexit, label %loop
+loopexit:
+  ret i32 %D
+}
+
+define i32 @foo2(i32 %x) {
+entry:
+  %0 = add i32 %x, 1
+  %1 = call i32 @foo2(i32 %0)
+  br label %loop
+loop:
+  %2 = phi i32 [%1, %entry], [%3, %loop]
+  %3 = add i32 %2, 2
+  %4 = icmp ugt i32 %3, 10000
+  br i1 %4, label %loopexit, label %loop
+loopexit:
+  ret i32 %3
+}
diff --git a/final/test/Transforms/MergeFunc/phi-speculation2.ll b/final/test/Transforms/MergeFunc/phi-speculation2.ll
new file mode 100644
index 00000000000..f080191ef86
--- /dev/null
+++ b/final/test/Transforms/MergeFunc/phi-speculation2.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -mergefunc -stats -disable-output |& grep {functions merged}
+
+define i32 @foo1(i32 %x) {
+entry:
+  %A = add i32 %x, 1
+  %B = call i32 @foo1(i32 %A)
+  br label %loop
+loop:
+  %C = phi i32 [%B, %entry], [%D, %loop]
+  %D = add i32 %C, 2
+  %E = icmp ugt i32 %D, 10000
+  br i1 %E, label %loopexit, label %loop
+loopexit:
+  ret i32 %D
+}
+
+define i32 @foo2(i32 %x) {
+entry:
+  %0 = add i32 %x, 1
+  %1 = call i32 @foo2(i32 %0)
+  br label %loop
+loop:
+  %2 = phi i32 [%1, %entry], [%3, %loop]
+  %3 = add i32 %2, 2
+  %4 = icmp ugt i32 %3, 10000
+  br i1 %4, label %loopexit, label %loop
+loopexit:
+  ret i32 %3
+}
diff --git a/final/test/Transforms/MergeFunc/vector.ll b/final/test/Transforms/MergeFunc/vector.ll
new file mode 100644
index 00000000000..6954fcec3da
--- /dev/null
+++ b/final/test/Transforms/MergeFunc/vector.ll
@@ -0,0 +1,76 @@
+; RUN: opt -mergefunc -stats -disable-output < %s |& grep {functions merged}
+
+; This test is checks whether we can merge
+;   vector<intptr_t>::push_back(0)
+; and
+;   vector<void *>::push_back(0)
+; .
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%0 = type { i32, void ()* }
+%1 = type { i64, i1 }
+%"class.std::vector" = type { [24 x i8] }
+
+@vi = global %"class.std::vector" zeroinitializer, align 8
+@__dso_handle = external unnamed_addr global i8*
+@vp = global %"class.std::vector" zeroinitializer, align 8
+@llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @_GLOBAL__I_a }]
+
+define linkonce_odr void @_ZNSt6vectorIlSaIlEED1Ev(%"class.std::vector"* nocapture %this) unnamed_addr align 2 {
+entry:
+  %tmp2.i.i = bitcast %"class.std::vector"* %this to i64**
+  %tmp3.i.i = load i64** %tmp2.i.i, align 8, !tbaa !0
+  %tobool.i.i.i = icmp eq i64* %tmp3.i.i, null
+  br i1 %tobool.i.i.i, label %_ZNSt6vectorIlSaIlEED2Ev.exit, label %if.then.i.i.i
+
+if.then.i.i.i:                                    ; preds = %entry
+  %0 = bitcast i64* %tmp3.i.i to i8*
+  tail call void @_ZdlPv(i8* %0) nounwind
+  ret void
+
+_ZNSt6vectorIlSaIlEED2Ev.exit:                    ; preds = %entry
+  ret void
+}
+
+declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
+
+define linkonce_odr void @_ZNSt6vectorIPvSaIS0_EED1Ev(%"class.std::vector"* nocapture %this) unnamed_addr align 2 {
+entry:
+  %tmp2.i.i = bitcast %"class.std::vector"* %this to i8***
+  %tmp3.i.i = load i8*** %tmp2.i.i, align 8, !tbaa !0
+  %tobool.i.i.i = icmp eq i8** %tmp3.i.i, null
+  br i1 %tobool.i.i.i, label %_ZNSt6vectorIPvSaIS0_EED2Ev.exit, label %if.then.i.i.i
+
+if.then.i.i.i:                                    ; preds = %entry
+  %0 = bitcast i8** %tmp3.i.i to i8*
+  tail call void @_ZdlPv(i8* %0) nounwind
+  ret void
+
+_ZNSt6vectorIPvSaIS0_EED2Ev.exit:                 ; preds = %entry
+  ret void
+}
+
+declare void @_Z1fv()
+
+declare void @_ZNSt6vectorIPvSaIS0_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS0_S2_EERKS0_(%"class.std::vector"* nocapture %this, i8** %__position.coerce, i8** nocapture %__x) align 2
+
+declare void @_ZdlPv(i8*) nounwind
+
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+declare void @_ZSt17__throw_bad_allocv() noreturn
+
+declare noalias i8* @_Znwm(i64)
+
+declare void @_ZNSt6vectorIlSaIlEE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPlS1_EERKl(%"class.std::vector"* nocapture %this, i64* %__position.coerce, i64* nocapture %__x) align 2
+
+declare void @_GLOBAL__I_a()
+
+declare %1 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+
+!0 = metadata !{metadata !"any pointer", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"long", metadata !1}
diff --git a/final/test/Transforms/MergeFunc/vectors-and-arrays.ll b/final/test/Transforms/MergeFunc/vectors-and-arrays.ll
new file mode 100644
index 00000000000..dc64a0858ba
--- /dev/null
+++ b/final/test/Transforms/MergeFunc/vectors-and-arrays.ll
@@ -0,0 +1,18 @@
+; RUN: opt -mergefunc < %s -disable-output -stats | not grep merged
+; This used to crash with an assert.
+
+define <2 x i8> @v1(<2 x i8> %x) {
+  ret <2 x i8> %x
+}
+
+define <4 x i8> @v2(<4 x i8> %x) {
+  ret <4 x i8> %x
+}
+
+define [2 x i8] @a1([2 x i8] %x) {
+  ret [2 x i8] %x
+}
+
+define [4 x i8] @a2([4 x i8] %x) {
+  ret [4 x i8] %x
+}
diff --git a/final/test/Transforms/PruneEH/2003-09-14-ExternalCall.ll b/final/test/Transforms/PruneEH/2003-09-14-ExternalCall.ll
new file mode 100644
index 00000000000..679eafd5936
--- /dev/null
+++ b/final/test/Transforms/PruneEH/2003-09-14-ExternalCall.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -prune-eh -S | grep invoke
+
+declare void @External()
+
+define void @foo() {
+	invoke void @External( )
+			to label %Cont unwind label %Cont
+Cont:		; preds = %0, %0
+	ret void
+}
+
diff --git a/final/test/Transforms/PruneEH/2003-11-21-PHIUpdate.ll b/final/test/Transforms/PruneEH/2003-11-21-PHIUpdate.ll
new file mode 100644
index 00000000000..a01070308be
--- /dev/null
+++ b/final/test/Transforms/PruneEH/2003-11-21-PHIUpdate.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -prune-eh -disable-output
+
+define internal void @callee() {
+	ret void
+}
+
+define i32 @caller() {
+; <label>:0
+	invoke void @callee( )
+			to label %E unwind label %E
+E:		; preds = %0, %0
+	%X = phi i32 [ 0, %0 ], [ 0, %0 ]		; <i32> [#uses=1]
+	ret i32 %X
+}
+
diff --git a/final/test/Transforms/PruneEH/2008-06-02-Weak.ll b/final/test/Transforms/PruneEH/2008-06-02-Weak.ll
new file mode 100644
index 00000000000..fb97ae87083
--- /dev/null
+++ b/final/test/Transforms/PruneEH/2008-06-02-Weak.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -prune-eh -S | not grep nounwind
+
+define weak void @f() {
+entry:
+        ret void
+}
+
+define void @g() {
+entry:
+	call void @f()
+	ret void
+}
diff --git a/final/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll b/final/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
new file mode 100644
index 00000000000..33e0cfa3ce5
--- /dev/null
+++ b/final/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
@@ -0,0 +1,1445 @@
+; RUN: opt < %s -prune-eh -inline -print-callgraph \
+; RUN:   -disable-output |& \
+; RUN:     grep {calls.*ce3806g__fxio__put__put_int64__4.1339} | count 2
+	%struct.FRAME.ce3806g = type { %struct.string___XUB, %struct.string___XUB, %struct.string___XUB, %struct.string___XUB }
+	%struct.FRAME.ce3806g__fxio__put__4 = type { i32, i32, i32, %struct.system__file_control_block__pstring*, i32, i32, i8 }
+	%struct.RETURN = type { i8, i32 }
+	%struct.ada__streams__root_stream_type = type { %struct.ada__tags__dispatch_table* }
+	%struct.ada__tags__dispatch_table = type { [1 x i32] }
+	%struct.ada__tags__select_specific_data = type { i32, %struct.ada__tags__select_specific_data_element }
+	%struct.ada__tags__select_specific_data_element = type { i32, i8 }
+	%struct.ada__tags__type_specific_data = type { i32, i32, [2147483647 x i8]*, [2147483647 x i8]*, %struct.ada__tags__dispatch_table*, i8, i32, i32, i32, i32, [2 x %struct.ada__tags__dispatch_table*] }
+	%struct.ada__text_io__text_afcb = type { %struct.system__file_control_block__afcb, i32, i32, i32, i32, i32, %struct.ada__text_io__text_afcb*, i8, i8 }
+	%struct.exception = type { i8, i8, i32, i8*, i8*, i32, i8* }
+	%struct.long_long_float___PAD = type { x86_fp80, [1 x i32] }
+	%struct.string___XUB = type { i32, i32 }
+	%struct.system__file_control_block__afcb = type { %struct.ada__streams__root_stream_type, i32, %struct.system__file_control_block__pstring, %struct.system__file_control_block__pstring, i8, i8, i8, i8, i8, i8, i8, %struct.system__file_control_block__afcb*, %struct.system__file_control_block__afcb* }
+	%struct.system__file_control_block__pstring = type { i8*, %struct.string___XUB* }
+	%struct.system__finalization_implementation__limited_record_controller = type { %struct.system__finalization_root__root_controlled, %struct.system__finalization_root__root_controlled* }
+	%struct.system__finalization_implementation__record_controller = type { %struct.system__finalization_implementation__limited_record_controller, i32 }
+	%struct.system__finalization_root__empty_root_controlled = type { %struct.ada__tags__dispatch_table* }
+	%struct.system__finalization_root__root_controlled = type { %struct.ada__streams__root_stream_type, %struct.system__finalization_root__root_controlled*, %struct.system__finalization_root__root_controlled* }
+	%struct.system__secondary_stack__mark_id = type { i32, i32 }
+	%struct.system__standard_library__exception_data = type { i8, i8, i32, i32, %struct.system__standard_library__exception_data*, i32, void ()* }
+@.str = internal constant [12 x i8] c"system.ads\00\00"		; <[12 x i8]*> [#uses=1]
+@.str1 = internal constant [14 x i8] c"a-tifiio.adb\00\00"		; <[14 x i8]*> [#uses=1]
+@system__soft_links__abort_undefer = external global void ()*		; <void ()**> [#uses=6]
+@.str2 = internal constant [47 x i8] c"a-tifiio.adb:327 instantiated at ce3806g.adb:52"		; <[47 x i8]*> [#uses=1]
+@C.354.2200 = internal constant %struct.string___XUB { i32 1, i32 47 }		; <%struct.string___XUB*> [#uses=1]
+@ada__io_exceptions__data_error = external global %struct.exception		; <%struct.exception*> [#uses=1]
+@constraint_error = external global %struct.exception		; <%struct.exception*> [#uses=2]
+@__gnat_all_others_value = external constant i32		; <i32*> [#uses=21]
+@.str3 = internal constant [10 x i8] c"0123456789"		; <[10 x i8]*> [#uses=2]
+@ada__text_io__current_out = external global %struct.ada__text_io__text_afcb*		; <%struct.ada__text_io__text_afcb**> [#uses=1]
+@.str4 = internal constant [126 x i8] c"CHECK THAT FIXED_IO PUT OPERATES ON FILES OF MODE OUT_FILE AND IF NO FILE IS SPECIFIED THE CURRENT DEFAULT OUTPUT FILE IS USED"		; <[126 x i8]*> [#uses=1]
+@C.131.1559 = internal constant %struct.string___XUB { i32 1, i32 126 }		; <%struct.string___XUB*> [#uses=1]
+@.str5 = internal constant [7 x i8] c"CE3806G"		; <[7 x i8]*> [#uses=1]
+@C.132.1562 = internal constant %struct.string___XUB { i32 1, i32 7 }		; <%struct.string___XUB*> [#uses=1]
+@incompleteF.1176.b = internal global i1 false		; <i1*> [#uses=2]
+@incomplete.1177 = internal global %struct.exception { i8 0, i8 65, i32 23, i8* getelementptr ([23 x i8]* @incompleteE.1174, i32 0, i32 0), i8* null, i32 0, i8* null }		; <%struct.exception*> [#uses=15]
+@incompleteE.1174 = internal global [23 x i8] c"CE3806G.B_1.INCOMPLETE\00"		; <[23 x i8]*> [#uses=1]
+@.str6 = internal constant [0 x i8] zeroinitializer		; <[0 x i8]*> [#uses=1]
+@C.136.1568 = internal constant %struct.string___XUB { i32 1, i32 0 }		; <%struct.string___XUB*> [#uses=1]
+@C.137.1571 = internal constant %struct.string___XUB { i32 1, i32 0 }		; <%struct.string___XUB*> [#uses=1]
+@.str7 = internal constant [50 x i8] c"USE_ERROR RAISED ON TEXT CREATE WITH OUT_FILE MODE"		; <[50 x i8]*> [#uses=1]
+@C.139.1577 = internal constant %struct.string___XUB { i32 1, i32 50 }		; <%struct.string___XUB*> [#uses=1]
+@.str8 = internal constant [14 x i8] c"ce3806g.adb:65"		; <[14 x i8]*> [#uses=1]
+@C.140.1580 = internal constant %struct.string___XUB { i32 1, i32 14 }		; <%struct.string___XUB*> [#uses=1]
+@.str9 = internal constant [51 x i8] c"NAME_ERROR RAISED ON TEXT CREATE WITH OUT_FILE MODE"		; <[51 x i8]*> [#uses=1]
+@C.143.1585 = internal constant %struct.string___XUB { i32 1, i32 51 }		; <%struct.string___XUB*> [#uses=1]
+@.str10 = internal constant [14 x i8] c"ce3806g.adb:69"		; <[14 x i8]*> [#uses=1]
+@C.144.1588 = internal constant %struct.string___XUB { i32 1, i32 14 }		; <%struct.string___XUB*> [#uses=1]
+@C.146.1592 = internal constant %struct.string___XUB { i32 1, i32 0 }		; <%struct.string___XUB*> [#uses=1]
+@C.147.1595 = internal constant %struct.string___XUB { i32 1, i32 0 }		; <%struct.string___XUB*> [#uses=1]
+@C.153.1609 = internal constant %struct.string___XUB { i32 1, i32 0 }		; <%struct.string___XUB*> [#uses=1]
+@C.154.1612 = internal constant %struct.string___XUB { i32 1, i32 0 }		; <%struct.string___XUB*> [#uses=1]
+@.str12 = internal constant [47 x i8] c"USE_ERROR RAISED ON TEXT OPEN WITH IN_FILE MODE"		; <[47 x i8]*> [#uses=1]
+@C.156.1618 = internal constant %struct.string___XUB { i32 1, i32 47 }		; <%struct.string___XUB*> [#uses=1]
+@.str13 = internal constant [14 x i8] c"ce3806g.adb:88"		; <[14 x i8]*> [#uses=1]
+@C.157.1621 = internal constant %struct.string___XUB { i32 1, i32 14 }		; <%struct.string___XUB*> [#uses=1]
+@C.159.1627 = internal constant %struct.string___XUB { i32 1, i32 0 }		; <%struct.string___XUB*> [#uses=1]
+@C.160.1630 = internal constant %struct.string___XUB { i32 1, i32 0 }		; <%struct.string___XUB*> [#uses=1]
+@.str14 = internal constant [33 x i8] c"VALUE INCORRECT - FIXED FROM FILE"		; <[33 x i8]*> [#uses=1]
+@C.162.1637 = internal constant %struct.string___XUB { i32 1, i32 33 }		; <%struct.string___XUB*> [#uses=1]
+@.str15 = internal constant [36 x i8] c"VALUE INCORRECT - FIXED FROM DEFAULT"		; <[36 x i8]*> [#uses=1]
+@C.164.1642 = internal constant %struct.string___XUB { i32 1, i32 36 }		; <%struct.string___XUB*> [#uses=1]
+@ada__io_exceptions__use_error = external global %struct.exception		; <%struct.exception*> [#uses=4]
+@ada__io_exceptions__name_error = external global %struct.exception		; <%struct.exception*> [#uses=2]
+
+define void @_ada_ce3806g() {
+entry:
+	%0 = alloca %struct.system__file_control_block__pstring, align 8		; <%struct.system__file_control_block__pstring*> [#uses=3]
+	%1 = alloca %struct.system__file_control_block__pstring, align 8		; <%struct.system__file_control_block__pstring*> [#uses=3]
+	%2 = alloca %struct.system__file_control_block__pstring, align 8		; <%struct.system__file_control_block__pstring*> [#uses=3]
+	%3 = alloca %struct.system__file_control_block__pstring, align 8		; <%struct.system__file_control_block__pstring*> [#uses=3]
+	%FRAME.356 = alloca %struct.FRAME.ce3806g		; <%struct.FRAME.ce3806g*> [#uses=20]
+	call void @report__test( i8* getelementptr ([7 x i8]* @.str5, i32 0, i32 0), %struct.string___XUB* @C.132.1562, i8* getelementptr ([126 x i8]* @.str4, i32 0, i32 0), %struct.string___XUB* @C.131.1559 )
+	%4 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 3		; <%struct.string___XUB*> [#uses=1]
+	call void @system__secondary_stack__ss_mark( %struct.string___XUB* noalias sret %4 )
+	%.b = load i1* @incompleteF.1176.b		; <i1> [#uses=1]
+	br i1 %.b, label %bb11, label %bb
+
+bb:		; preds = %entry
+	invoke void @system__exception_table__register_exception( %struct.system__standard_library__exception_data* bitcast (%struct.exception* @incomplete.1177 to %struct.system__standard_library__exception_data*) )
+			to label %invcont unwind label %lpad
+
+invcont:		; preds = %bb
+	store i1 true, i1* @incompleteF.1176.b
+	br label %bb11
+
+bb11:		; preds = %entry, %invcont
+	%5 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 2		; <%struct.string___XUB*> [#uses=1]
+	invoke void @system__secondary_stack__ss_mark( %struct.string___XUB* noalias sret %5 )
+			to label %invcont12 unwind label %lpad228
+
+invcont12:		; preds = %bb11
+	invoke void @report__legal_file_name( %struct.system__file_control_block__pstring* noalias sret %3, i32 1, i8* getelementptr ([0 x i8]* @.str6, i32 0, i32 0), %struct.string___XUB* @C.137.1571 )
+			to label %invcont17 unwind label %lpad232
+
+invcont17:		; preds = %invcont12
+	%elt18 = getelementptr %struct.system__file_control_block__pstring* %3, i32 0, i32 0		; <i8**> [#uses=1]
+	%val19 = load i8** %elt18, align 8		; <i8*> [#uses=1]
+	%elt20 = getelementptr %struct.system__file_control_block__pstring* %3, i32 0, i32 1		; <%struct.string___XUB**> [#uses=1]
+	%val21 = load %struct.string___XUB** %elt20		; <%struct.string___XUB*> [#uses=1]
+	%6 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__create( %struct.ada__text_io__text_afcb* null, i8 2, i8* %val19, %struct.string___XUB* %val21, i8* getelementptr ([0 x i8]* @.str6, i32 0, i32 0), %struct.string___XUB* @C.136.1568 )
+			to label %invcont26 unwind label %lpad232		; <%struct.ada__text_io__text_afcb*> [#uses=2]
+
+invcont26:		; preds = %invcont17
+	%7 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 2, i32 0		; <i32*> [#uses=1]
+	%8 = load i32* %7, align 8		; <i32> [#uses=1]
+	%9 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 2, i32 1		; <i32*> [#uses=1]
+	%10 = load i32* %9, align 4		; <i32> [#uses=1]
+	invoke void @system__secondary_stack__ss_release( i32 %8, i32 %10 )
+			to label %bb73 unwind label %lpad228
+
+bb32:		; preds = %lpad232
+	call void @__gnat_begin_handler( i8* %eh_ptr233 ) nounwind
+	%11 = load void ()** @system__soft_links__abort_undefer, align 4		; <void ()*> [#uses=1]
+	invoke void %11( )
+			to label %invcont33 unwind label %lpad240
+
+invcont33:		; preds = %bb32
+	invoke void @report__not_applicable( i8* getelementptr ([50 x i8]* @.str7, i32 0, i32 0), %struct.string___XUB* @C.139.1577 )
+			to label %invcont38 unwind label %lpad240
+
+invcont38:		; preds = %invcont33
+	invoke void @__gnat_raise_exception( %struct.system__standard_library__exception_data* bitcast (%struct.exception* @incomplete.1177 to %struct.system__standard_library__exception_data*), i8* getelementptr ([14 x i8]* @.str8, i32 0, i32 0), %struct.string___XUB* @C.140.1580 ) noreturn
+			to label %invcont43 unwind label %lpad240
+
+invcont43:		; preds = %invcont38
+	unreachable
+
+bb47:		; preds = %ppad291
+	call void @__gnat_begin_handler( i8* %eh_ptr233 ) nounwind
+	%12 = load void ()** @system__soft_links__abort_undefer, align 4		; <void ()*> [#uses=1]
+	invoke void %12( )
+			to label %invcont49 unwind label %lpad248
+
+invcont49:		; preds = %bb47
+	invoke void @report__not_applicable( i8* getelementptr ([51 x i8]* @.str9, i32 0, i32 0), %struct.string___XUB* @C.143.1585 )
+			to label %invcont54 unwind label %lpad248
+
+invcont54:		; preds = %invcont49
+	invoke void @__gnat_raise_exception( %struct.system__standard_library__exception_data* bitcast (%struct.exception* @incomplete.1177 to %struct.system__standard_library__exception_data*), i8* getelementptr ([14 x i8]* @.str10, i32 0, i32 0), %struct.string___XUB* @C.144.1588 ) noreturn
+			to label %invcont59 unwind label %lpad248
+
+invcont59:		; preds = %invcont54
+	unreachable
+
+bb73:		; preds = %invcont26
+	invoke void @report__legal_file_name( %struct.system__file_control_block__pstring* noalias sret %2, i32 2, i8* getelementptr ([0 x i8]* @.str6, i32 0, i32 0), %struct.string___XUB* @C.147.1595 )
+			to label %invcont78 unwind label %lpad228
+
+invcont78:		; preds = %bb73
+	%elt79 = getelementptr %struct.system__file_control_block__pstring* %2, i32 0, i32 0		; <i8**> [#uses=1]
+	%val80 = load i8** %elt79, align 8		; <i8*> [#uses=1]
+	%elt81 = getelementptr %struct.system__file_control_block__pstring* %2, i32 0, i32 1		; <%struct.string___XUB**> [#uses=1]
+	%val82 = load %struct.string___XUB** %elt81		; <%struct.string___XUB*> [#uses=1]
+	%13 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__create( %struct.ada__text_io__text_afcb* null, i8 2, i8* %val80, %struct.string___XUB* %val82, i8* getelementptr ([0 x i8]* @.str6, i32 0, i32 0), %struct.string___XUB* @C.146.1592 )
+			to label %invcont87 unwind label %lpad228		; <%struct.ada__text_io__text_afcb*> [#uses=2]
+
+invcont87:		; preds = %invcont78
+	invoke void @ada__text_io__set_output( %struct.ada__text_io__text_afcb* %13 )
+			to label %invcont88 unwind label %lpad228
+
+invcont88:		; preds = %invcont87
+	%14 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 1		; <%struct.string___XUB*> [#uses=1]
+	invoke void @system__secondary_stack__ss_mark( %struct.string___XUB* noalias sret %14 )
+			to label %invcont89 unwind label %lpad228
+
+invcont89:		; preds = %invcont88
+	invoke fastcc void @ce3806g__fxio__put.1149( %struct.ada__text_io__text_afcb* %6 )
+			to label %bb94 unwind label %lpad252
+
+bb94:		; preds = %invcont89
+	invoke fastcc void @ce3806g__fxio__put__2.1155( )
+			to label %invcont95 unwind label %lpad252
+
+invcont95:		; preds = %bb94
+	%15 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__close( %struct.ada__text_io__text_afcb* %6 )
+			to label %invcont96 unwind label %lpad252		; <%struct.ada__text_io__text_afcb*> [#uses=1]
+
+invcont96:		; preds = %invcont95
+	%16 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 0		; <%struct.string___XUB*> [#uses=1]
+	invoke void @system__secondary_stack__ss_mark( %struct.string___XUB* noalias sret %16 )
+			to label %invcont97 unwind label %lpad252
+
+invcont97:		; preds = %invcont96
+	invoke void @report__legal_file_name( %struct.system__file_control_block__pstring* noalias sret %1, i32 1, i8* getelementptr ([0 x i8]* @.str6, i32 0, i32 0), %struct.string___XUB* @C.154.1612 )
+			to label %invcont102 unwind label %lpad256
+
+invcont102:		; preds = %invcont97
+	%elt103 = getelementptr %struct.system__file_control_block__pstring* %1, i32 0, i32 0		; <i8**> [#uses=1]
+	%val104 = load i8** %elt103, align 8		; <i8*> [#uses=1]
+	%elt105 = getelementptr %struct.system__file_control_block__pstring* %1, i32 0, i32 1		; <%struct.string___XUB**> [#uses=1]
+	%val106 = load %struct.string___XUB** %elt105		; <%struct.string___XUB*> [#uses=1]
+	%17 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__open( %struct.ada__text_io__text_afcb* %15, i8 0, i8* %val104, %struct.string___XUB* %val106, i8* getelementptr ([0 x i8]* @.str6, i32 0, i32 0), %struct.string___XUB* @C.153.1609 )
+			to label %invcont111 unwind label %lpad256		; <%struct.ada__text_io__text_afcb*> [#uses=2]
+
+invcont111:		; preds = %invcont102
+	%18 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
+	%19 = load i32* %18, align 8		; <i32> [#uses=1]
+	%20 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
+	%21 = load i32* %20, align 4		; <i32> [#uses=1]
+	invoke void @system__secondary_stack__ss_release( i32 %19, i32 %21 )
+			to label %bb143 unwind label %lpad252
+
+bb117:		; preds = %lpad256
+	call void @__gnat_begin_handler( i8* %eh_ptr257 ) nounwind
+	%22 = load void ()** @system__soft_links__abort_undefer, align 4		; <void ()*> [#uses=1]
+	invoke void %22( )
+			to label %invcont119 unwind label %lpad264
+
+invcont119:		; preds = %bb117
+	invoke void @report__not_applicable( i8* getelementptr ([47 x i8]* @.str12, i32 0, i32 0), %struct.string___XUB* @C.156.1618 )
+			to label %invcont124 unwind label %lpad264
+
+invcont124:		; preds = %invcont119
+	invoke void @__gnat_raise_exception( %struct.system__standard_library__exception_data* bitcast (%struct.exception* @incomplete.1177 to %struct.system__standard_library__exception_data*), i8* getelementptr ([14 x i8]* @.str13, i32 0, i32 0), %struct.string___XUB* @C.157.1621 ) noreturn
+			to label %invcont129 unwind label %lpad264
+
+invcont129:		; preds = %invcont124
+	unreachable
+
+bb143:		; preds = %invcont111
+	%23 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__standard_output( )
+			to label %invcont144 unwind label %lpad252		; <%struct.ada__text_io__text_afcb*> [#uses=1]
+
+invcont144:		; preds = %bb143
+	invoke void @ada__text_io__set_output( %struct.ada__text_io__text_afcb* %23 )
+			to label %invcont145 unwind label %lpad252
+
+invcont145:		; preds = %invcont144
+	%24 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__close( %struct.ada__text_io__text_afcb* %13 )
+			to label %invcont146 unwind label %lpad252		; <%struct.ada__text_io__text_afcb*> [#uses=1]
+
+invcont146:		; preds = %invcont145
+	invoke void @report__legal_file_name( %struct.system__file_control_block__pstring* noalias sret %0, i32 2, i8* getelementptr ([0 x i8]* @.str6, i32 0, i32 0), %struct.string___XUB* @C.160.1630 )
+			to label %invcont151 unwind label %lpad252
+
+invcont151:		; preds = %invcont146
+	%elt152 = getelementptr %struct.system__file_control_block__pstring* %0, i32 0, i32 0		; <i8**> [#uses=1]
+	%val153 = load i8** %elt152, align 8		; <i8*> [#uses=1]
+	%elt154 = getelementptr %struct.system__file_control_block__pstring* %0, i32 0, i32 1		; <%struct.string___XUB**> [#uses=1]
+	%val155 = load %struct.string___XUB** %elt154		; <%struct.string___XUB*> [#uses=1]
+	%25 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__open( %struct.ada__text_io__text_afcb* %24, i8 0, i8* %val153, %struct.string___XUB* %val155, i8* getelementptr ([0 x i8]* @.str6, i32 0, i32 0), %struct.string___XUB* @C.159.1627 )
+			to label %invcont160 unwind label %lpad252		; <%struct.ada__text_io__text_afcb*> [#uses=2]
+
+invcont160:		; preds = %invcont151
+	%26 = invoke fastcc i8 @ce3806g__fxio__get.1137( %struct.ada__text_io__text_afcb* %17 ) signext
+			to label %invcont161 unwind label %lpad252		; <i8> [#uses=1]
+
+invcont161:		; preds = %invcont160
+	%27 = icmp eq i8 %26, -3		; <i1> [#uses=1]
+	br i1 %27, label %bb169, label %bb163
+
+bb163:		; preds = %invcont161
+	invoke void @report__failed( i8* getelementptr ([33 x i8]* @.str14, i32 0, i32 0), %struct.string___XUB* @C.162.1637 )
+			to label %bb169 unwind label %lpad252
+
+bb169:		; preds = %invcont161, %bb163
+	%28 = invoke fastcc i8 @ce3806g__fxio__get.1137( %struct.ada__text_io__text_afcb* %25 ) signext
+			to label %invcont170 unwind label %lpad252		; <i8> [#uses=1]
+
+invcont170:		; preds = %bb169
+	%29 = icmp eq i8 %28, -1		; <i1> [#uses=1]
+	br i1 %29, label %bb187, label %bb172
+
+bb172:		; preds = %invcont170
+	invoke void @report__failed( i8* getelementptr ([36 x i8]* @.str15, i32 0, i32 0), %struct.string___XUB* @C.164.1642 )
+			to label %bb187 unwind label %lpad252
+
+bb187:		; preds = %invcont170, %bb172
+	%30 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 1, i32 0		; <i32*> [#uses=1]
+	%31 = load i32* %30, align 8		; <i32> [#uses=1]
+	%32 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 1, i32 1		; <i32*> [#uses=1]
+	%33 = load i32* %32, align 4		; <i32> [#uses=1]
+	invoke void @system__secondary_stack__ss_release( i32 %31, i32 %33 )
+			to label %bb193 unwind label %lpad228
+
+bb193:		; preds = %bb187
+	%34 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__delete( %struct.ada__text_io__text_afcb* %17 )
+			to label %invcont194 unwind label %lpad268		; <%struct.ada__text_io__text_afcb*> [#uses=0]
+
+invcont194:		; preds = %bb193
+	%35 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__delete( %struct.ada__text_io__text_afcb* %25 )
+			to label %bb221 unwind label %lpad268		; <%struct.ada__text_io__text_afcb*> [#uses=0]
+
+bb196:		; preds = %lpad268
+	call void @__gnat_begin_handler( i8* %eh_ptr269 ) nounwind
+	%36 = load void ()** @system__soft_links__abort_undefer, align 4		; <void ()*> [#uses=1]
+	invoke void %36( )
+			to label %bb203 unwind label %lpad276
+
+bb203:		; preds = %bb196
+	invoke void @__gnat_end_handler( i8* %eh_ptr269 )
+			to label %bb221 unwind label %lpad272
+
+bb205:		; preds = %ppad304
+	call void @__gnat_begin_handler( i8* %eh_exception.1 ) nounwind
+	%37 = load void ()** @system__soft_links__abort_undefer, align 4		; <void ()*> [#uses=1]
+	invoke void %37( )
+			to label %bb212 unwind label %lpad284
+
+bb212:		; preds = %bb205
+	invoke void @__gnat_end_handler( i8* %eh_exception.1 )
+			to label %bb221 unwind label %lpad280
+
+bb221:		; preds = %invcont194, %bb212, %bb203
+	%38 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 3, i32 0		; <i32*> [#uses=1]
+	%39 = load i32* %38, align 8		; <i32> [#uses=1]
+	%40 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 3, i32 1		; <i32*> [#uses=1]
+	%41 = load i32* %40, align 4		; <i32> [#uses=1]
+	call void @system__secondary_stack__ss_release( i32 %39, i32 %41 )
+	call void @report__result( )
+	ret void
+
+lpad:		; preds = %bb
+	%eh_ptr = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select227 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i32* @__gnat_all_others_value )		; <i32> [#uses=0]
+	br label %ppad
+
+lpad228:		; preds = %bb187, %ppad294, %invcont88, %invcont87, %invcont78, %bb73, %ppad288, %invcont26, %bb11
+	%eh_ptr229 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select231 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr229, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
+	br label %ppad304
+
+lpad232:		; preds = %invcont17, %invcont12
+	%eh_ptr233 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=6]
+	%eh_select235 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr233, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @ada__io_exceptions__use_error, %struct.exception* @ada__io_exceptions__name_error, %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=3]
+	%eh_typeid = call i32 @llvm.eh.typeid.for.i32( i8* getelementptr (%struct.exception* @ada__io_exceptions__use_error, i32 0, i32 0) )		; <i32> [#uses=1]
+	%42 = icmp eq i32 %eh_select235, %eh_typeid		; <i1> [#uses=1]
+	br i1 %42, label %bb32, label %ppad291
+
+lpad236:		; preds = %lpad240
+	%eh_ptr237 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select239 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr237, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
+	br label %ppad288
+
+lpad240:		; preds = %invcont38, %invcont33, %bb32
+	%eh_ptr241 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select243 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr241, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
+	invoke void @__gnat_end_handler( i8* %eh_ptr233 )
+			to label %ppad288 unwind label %lpad236
+
+lpad244:		; preds = %lpad248
+	%eh_ptr245 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select247 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr245, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
+	br label %ppad288
+
+lpad248:		; preds = %invcont54, %invcont49, %bb47
+	%eh_ptr249 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select251 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr249, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
+	invoke void @__gnat_end_handler( i8* %eh_ptr233 )
+			to label %ppad288 unwind label %lpad244
+
+lpad252:		; preds = %bb94, %invcont89, %invcont160, %bb169, %bb172, %bb163, %invcont151, %invcont146, %invcont145, %invcont144, %bb143, %ppad295, %invcont111, %invcont96, %invcont95
+	%eh_ptr253 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select255 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr253, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
+	br label %ppad294
+
+lpad256:		; preds = %invcont102, %invcont97
+	%eh_ptr257 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=4]
+	%eh_select259 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr257, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @ada__io_exceptions__use_error, %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=2]
+	%eh_typeid297 = call i32 @llvm.eh.typeid.for.i32( i8* getelementptr (%struct.exception* @ada__io_exceptions__use_error, i32 0, i32 0) )		; <i32> [#uses=1]
+	%43 = icmp eq i32 %eh_select259, %eh_typeid297		; <i1> [#uses=1]
+	br i1 %43, label %bb117, label %ppad295
+
+lpad260:		; preds = %lpad264
+	%eh_ptr261 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select263 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr261, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
+	br label %ppad295
+
+lpad264:		; preds = %invcont124, %invcont119, %bb117
+	%eh_ptr265 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select267 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr265, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
+	invoke void @__gnat_end_handler( i8* %eh_ptr257 )
+			to label %ppad295 unwind label %lpad260
+
+lpad268:		; preds = %invcont194, %bb193
+	%eh_ptr269 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=5]
+	%eh_select271 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr269, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @ada__io_exceptions__use_error, %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=2]
+	%eh_typeid301 = call i32 @llvm.eh.typeid.for.i32( i8* getelementptr (%struct.exception* @ada__io_exceptions__use_error, i32 0, i32 0) )		; <i32> [#uses=1]
+	%44 = icmp eq i32 %eh_select271, %eh_typeid301		; <i1> [#uses=1]
+	br i1 %44, label %bb196, label %ppad304
+
+lpad272:		; preds = %bb203, %lpad276
+	%eh_ptr273 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select275 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr273, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
+	br label %ppad304
+
+lpad276:		; preds = %bb196
+	%eh_ptr277 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select279 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr277, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
+	invoke void @__gnat_end_handler( i8* %eh_ptr269 )
+			to label %ppad304 unwind label %lpad272
+
+lpad280:		; preds = %bb212, %lpad284
+	%eh_ptr281 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select283 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr281, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i32* @__gnat_all_others_value )		; <i32> [#uses=0]
+	br label %ppad
+
+lpad284:		; preds = %bb205
+	%eh_ptr285 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select287 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr285, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i32* @__gnat_all_others_value )		; <i32> [#uses=0]
+	invoke void @__gnat_end_handler( i8* %eh_exception.1 )
+			to label %ppad unwind label %lpad280
+
+ppad:		; preds = %lpad284, %ppad304, %lpad280, %lpad
+	%eh_exception.2 = phi i8* [ %eh_exception.1, %ppad304 ], [ %eh_ptr281, %lpad280 ], [ %eh_ptr, %lpad ], [ %eh_ptr285, %lpad284 ]		; <i8*> [#uses=1]
+	%45 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 3, i32 0		; <i32*> [#uses=1]
+	%46 = load i32* %45, align 8		; <i32> [#uses=1]
+	%47 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 3, i32 1		; <i32*> [#uses=1]
+	%48 = load i32* %47, align 4		; <i32> [#uses=1]
+	call void @system__secondary_stack__ss_release( i32 %46, i32 %48 )
+	%49 = call i32 (...)* @_Unwind_Resume( i8* %eh_exception.2 )		; <i32> [#uses=0]
+	unreachable
+
+ppad288:		; preds = %lpad248, %lpad240, %ppad291, %lpad244, %lpad236
+	%eh_exception.0 = phi i8* [ %eh_ptr233, %ppad291 ], [ %eh_ptr245, %lpad244 ], [ %eh_ptr237, %lpad236 ], [ %eh_ptr241, %lpad240 ], [ %eh_ptr249, %lpad248 ]		; <i8*> [#uses=1]
+	%eh_selector.0 = phi i32 [ %eh_select235, %ppad291 ], [ %eh_select247, %lpad244 ], [ %eh_select239, %lpad236 ], [ %eh_select243, %lpad240 ], [ %eh_select251, %lpad248 ]		; <i32> [#uses=1]
+	%50 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 2, i32 0		; <i32*> [#uses=1]
+	%51 = load i32* %50, align 8		; <i32> [#uses=1]
+	%52 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 2, i32 1		; <i32*> [#uses=1]
+	%53 = load i32* %52, align 4		; <i32> [#uses=1]
+	invoke void @system__secondary_stack__ss_release( i32 %51, i32 %53 )
+			to label %ppad304 unwind label %lpad228
+
+ppad291:		; preds = %lpad232
+	%eh_typeid292 = call i32 @llvm.eh.typeid.for.i32( i8* getelementptr (%struct.exception* @ada__io_exceptions__name_error, i32 0, i32 0) )		; <i32> [#uses=1]
+	%54 = icmp eq i32 %eh_select235, %eh_typeid292		; <i1> [#uses=1]
+	br i1 %54, label %bb47, label %ppad288
+
+ppad294:		; preds = %ppad295, %lpad252
+	%eh_exception.4 = phi i8* [ %eh_ptr253, %lpad252 ], [ %eh_exception.3, %ppad295 ]		; <i8*> [#uses=1]
+	%eh_selector.4 = phi i32 [ %eh_select255, %lpad252 ], [ %eh_selector.3, %ppad295 ]		; <i32> [#uses=1]
+	%55 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 1, i32 0		; <i32*> [#uses=1]
+	%56 = load i32* %55, align 8		; <i32> [#uses=1]
+	%57 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 1, i32 1		; <i32*> [#uses=1]
+	%58 = load i32* %57, align 4		; <i32> [#uses=1]
+	invoke void @system__secondary_stack__ss_release( i32 %56, i32 %58 )
+			to label %ppad304 unwind label %lpad228
+
+ppad295:		; preds = %lpad264, %lpad256, %lpad260
+	%eh_exception.3 = phi i8* [ %eh_ptr257, %lpad256 ], [ %eh_ptr261, %lpad260 ], [ %eh_ptr265, %lpad264 ]		; <i8*> [#uses=1]
+	%eh_selector.3 = phi i32 [ %eh_select259, %lpad256 ], [ %eh_select263, %lpad260 ], [ %eh_select267, %lpad264 ]		; <i32> [#uses=1]
+	%59 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
+	%60 = load i32* %59, align 8		; <i32> [#uses=1]
+	%61 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
+	%62 = load i32* %61, align 4		; <i32> [#uses=1]
+	invoke void @system__secondary_stack__ss_release( i32 %60, i32 %62 )
+			to label %ppad294 unwind label %lpad252
+
+ppad304:		; preds = %lpad276, %ppad294, %ppad288, %lpad268, %lpad272, %lpad228
+	%eh_exception.1 = phi i8* [ %eh_ptr229, %lpad228 ], [ %eh_ptr269, %lpad268 ], [ %eh_ptr273, %lpad272 ], [ %eh_exception.0, %ppad288 ], [ %eh_exception.4, %ppad294 ], [ %eh_ptr277, %lpad276 ]		; <i8*> [#uses=4]
+	%eh_selector.1 = phi i32 [ %eh_select231, %lpad228 ], [ %eh_select271, %lpad268 ], [ %eh_select275, %lpad272 ], [ %eh_selector.0, %ppad288 ], [ %eh_selector.4, %ppad294 ], [ %eh_select279, %lpad276 ]		; <i32> [#uses=1]
+	%eh_typeid305 = call i32 @llvm.eh.typeid.for.i32( i8* getelementptr (%struct.exception* @incomplete.1177, i32 0, i32 0) )		; <i32> [#uses=1]
+	%63 = icmp eq i32 %eh_selector.1, %eh_typeid305		; <i1> [#uses=1]
+	br i1 %63, label %bb205, label %ppad
+}
+
+define internal fastcc i8 @ce3806g__fxio__get.1137(%struct.ada__text_io__text_afcb* %file) signext {
+entry:
+	%0 = invoke x86_fp80 @ada__text_io__float_aux__get( %struct.ada__text_io__text_afcb* %file, i32 0 )
+			to label %invcont unwind label %lpad		; <x86_fp80> [#uses=5]
+
+invcont:		; preds = %entry
+	%1 = fcmp ult x86_fp80 %0, 0xKFFFEFFFFFFFFFFFFFFFF		; <i1> [#uses=1]
+	%2 = fcmp ugt x86_fp80 %0, 0xK7FFEFFFFFFFFFFFFFFFF		; <i1> [#uses=1]
+	%or.cond = or i1 %1, %2		; <i1> [#uses=1]
+	br i1 %or.cond, label %bb2, label %bb4
+
+bb2:		; preds = %invcont
+	invoke void @__gnat_rcheck_12( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 1 ) noreturn
+			to label %invcont3 unwind label %lpad
+
+invcont3:		; preds = %bb2
+	unreachable
+
+bb4:		; preds = %invcont
+	%3 = fmul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=1]
+	%4 = fcmp ult x86_fp80 %3, 0xKC0068000000000000000		; <i1> [#uses=1]
+	br i1 %4, label %bb8, label %bb6
+
+bb6:		; preds = %bb4
+	%5 = fmul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=1]
+	%6 = fcmp ugt x86_fp80 %5, 0xK4005FE00000000000000		; <i1> [#uses=1]
+	br i1 %6, label %bb8, label %bb10
+
+bb8:		; preds = %bb4, %bb6
+	invoke void @__gnat_rcheck_10( i8* getelementptr ([14 x i8]* @.str1, i32 0, i32 0), i32 324 ) noreturn
+			to label %invcont9 unwind label %lpad
+
+invcont9:		; preds = %bb8
+	unreachable
+
+bb10:		; preds = %bb6
+	%7 = fmul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=3]
+	%8 = fcmp ult x86_fp80 %7, 0xK00000000000000000000		; <i1> [#uses=1]
+	br i1 %8, label %bb13, label %bb12
+
+bb12:		; preds = %bb10
+	%9 = fadd x86_fp80 %7, 0xK3FFDFFFFFFFFFFFFFFFF		; <x86_fp80> [#uses=1]
+	br label %bb14
+
+bb13:		; preds = %bb10
+	%10 = fsub x86_fp80 %7, 0xK3FFDFFFFFFFFFFFFFFFF		; <x86_fp80> [#uses=1]
+	br label %bb14
+
+bb14:		; preds = %bb13, %bb12
+	%iftmp.339.0.in = phi x86_fp80 [ %10, %bb13 ], [ %9, %bb12 ]		; <x86_fp80> [#uses=1]
+	%iftmp.339.0 = fptosi x86_fp80 %iftmp.339.0.in to i8		; <i8> [#uses=3]
+	%11 = add i8 %iftmp.339.0, 20		; <i8> [#uses=1]
+	%12 = icmp ugt i8 %11, 40		; <i1> [#uses=1]
+	br i1 %12, label %bb16, label %bb18
+
+bb16:		; preds = %bb14
+	invoke void @__gnat_rcheck_12( i8* getelementptr ([14 x i8]* @.str1, i32 0, i32 0), i32 324 ) noreturn
+			to label %invcont17 unwind label %lpad
+
+invcont17:		; preds = %bb16
+	unreachable
+
+bb18:		; preds = %bb14
+	%13 = add i8 %iftmp.339.0, 20		; <i8> [#uses=1]
+	%14 = icmp ugt i8 %13, 40		; <i1> [#uses=1]
+	br i1 %14, label %bb20, label %bb22
+
+bb20:		; preds = %bb18
+	invoke void @__gnat_rcheck_12( i8* getelementptr ([14 x i8]* @.str1, i32 0, i32 0), i32 324 ) noreturn
+			to label %invcont21 unwind label %lpad
+
+invcont21:		; preds = %bb20
+	unreachable
+
+bb22:		; preds = %bb18
+	ret i8 %iftmp.339.0
+
+bb23:		; preds = %lpad
+	call void @__gnat_begin_handler( i8* %eh_ptr ) nounwind
+	%15 = load void ()** @system__soft_links__abort_undefer, align 4		; <void ()*> [#uses=1]
+	invoke void %15( )
+			to label %invcont24 unwind label %lpad33
+
+invcont24:		; preds = %bb23
+	invoke void @__gnat_raise_exception( %struct.system__standard_library__exception_data* bitcast (%struct.exception* @ada__io_exceptions__data_error to %struct.system__standard_library__exception_data*), i8* getelementptr ([47 x i8]* @.str2, i32 0, i32 0), %struct.string___XUB* @C.354.2200 ) noreturn
+			to label %invcont27 unwind label %lpad33
+
+invcont27:		; preds = %invcont24
+	unreachable
+
+lpad:		; preds = %bb20, %bb16, %bb8, %bb2, %entry
+	%eh_ptr = call i8* @llvm.eh.exception( )		; <i8*> [#uses=4]
+	%eh_select32 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @constraint_error, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
+	%eh_typeid = call i32 @llvm.eh.typeid.for.i32( i8* getelementptr (%struct.exception* @constraint_error, i32 0, i32 0) )		; <i32> [#uses=1]
+	%16 = icmp eq i32 %eh_select32, %eh_typeid		; <i1> [#uses=1]
+	br i1 %16, label %bb23, label %Unwind
+
+lpad33:		; preds = %invcont24, %bb23
+	%eh_ptr34 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select36 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr34, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i32* @__gnat_all_others_value )		; <i32> [#uses=0]
+	call void @__gnat_end_handler( i8* %eh_ptr )
+	br label %Unwind
+
+Unwind:		; preds = %lpad, %lpad33
+	%eh_exception.0 = phi i8* [ %eh_ptr, %lpad ], [ %eh_ptr34, %lpad33 ]		; <i8*> [#uses=1]
+	%17 = call i32 (...)* @_Unwind_Resume( i8* %eh_exception.0 )		; <i32> [#uses=0]
+	unreachable
+}
+
+define internal fastcc void @ce3806g__fxio__put.1149(%struct.ada__text_io__text_afcb* %file) {
+entry:
+	%A.301 = alloca %struct.string___XUB		; <%struct.string___XUB*> [#uses=3]
+	%A.292 = alloca %struct.string___XUB		; <%struct.string___XUB*> [#uses=3]
+	%0 = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
+	%1 = alloca [12 x i8]		; <[12 x i8]*> [#uses=1]
+	%.sub = getelementptr [12 x i8]* %1, i32 0, i32 0		; <i8*> [#uses=2]
+	%2 = getelementptr %struct.string___XUB* %A.292, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %2, align 8
+	%3 = getelementptr %struct.string___XUB* %A.292, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 12, i32* %3, align 4
+	%4 = invoke fastcc i32 @ce3806g__fxio__put__4.1215( i8* %.sub, %struct.string___XUB* %A.292, i8 signext -3 )
+			to label %invcont unwind label %lpad		; <i32> [#uses=1]
+
+invcont:		; preds = %entry
+	%5 = getelementptr %struct.string___XUB* %A.301, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %5, align 8
+	%6 = getelementptr %struct.string___XUB* %A.301, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 %4, i32* %6, align 4
+	invoke void @ada__text_io__generic_aux__put_item( %struct.ada__text_io__text_afcb* %file, i8* %.sub, %struct.string___XUB* %A.301 )
+			to label %bb60 unwind label %lpad
+
+bb60:		; preds = %invcont
+	ret void
+
+lpad:		; preds = %entry, %invcont
+	%eh_ptr = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select62 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i32* @__gnat_all_others_value )		; <i32> [#uses=0]
+	call void @llvm.stackrestore( i8* %0 )
+	%7 = call i32 (...)* @_Unwind_Resume( i8* %eh_ptr )		; <i32> [#uses=0]
+	unreachable
+}
+
+define internal fastcc void @ce3806g__fxio__put__2.1155() {
+entry:
+	%A.266 = alloca %struct.string___XUB		; <%struct.string___XUB*> [#uses=3]
+	%A.257 = alloca %struct.string___XUB		; <%struct.string___XUB*> [#uses=3]
+	%0 = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
+	%1 = alloca [12 x i8]		; <[12 x i8]*> [#uses=1]
+	%.sub = getelementptr [12 x i8]* %1, i32 0, i32 0		; <i8*> [#uses=2]
+	%2 = getelementptr %struct.string___XUB* %A.257, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %2, align 8
+	%3 = getelementptr %struct.string___XUB* %A.257, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 12, i32* %3, align 4
+	%4 = invoke fastcc i32 @ce3806g__fxio__put__4.1215( i8* %.sub, %struct.string___XUB* %A.257, i8 signext -1 )
+			to label %invcont unwind label %lpad		; <i32> [#uses=1]
+
+invcont:		; preds = %entry
+	%5 = getelementptr %struct.string___XUB* %A.266, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %5, align 8
+	%6 = getelementptr %struct.string___XUB* %A.266, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 %4, i32* %6, align 4
+	%7 = load %struct.ada__text_io__text_afcb** @ada__text_io__current_out, align 4		; <%struct.ada__text_io__text_afcb*> [#uses=1]
+	invoke void @ada__text_io__generic_aux__put_item( %struct.ada__text_io__text_afcb* %7, i8* %.sub, %struct.string___XUB* %A.266 )
+			to label %bb60 unwind label %lpad
+
+bb60:		; preds = %invcont
+	ret void
+
+lpad:		; preds = %entry, %invcont
+	%eh_ptr = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select62 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i32* @__gnat_all_others_value )		; <i32> [#uses=0]
+	call void @llvm.stackrestore( i8* %0 )
+	%8 = call i32 (...)* @_Unwind_Resume( i8* %eh_ptr )		; <i32> [#uses=0]
+	unreachable
+}
+
+define internal fastcc i32 @ce3806g__fxio__put__4.1215(i8* %to.0, %struct.string___XUB* %to.1, i8 signext %item) {
+entry:
+        %P0 = load i32 * @__gnat_all_others_value, align 4  ; <i32*> [#uses=1]
+        %P = alloca i32, i32 %P0	; <i32*> [#uses=1]
+        call void @ext( i32* %P )
+	%to_addr = alloca %struct.system__file_control_block__pstring		; <%struct.system__file_control_block__pstring*> [#uses=4]
+	%FRAME.358 = alloca %struct.FRAME.ce3806g__fxio__put__4		; <%struct.FRAME.ce3806g__fxio__put__4*> [#uses=65]
+	%0 = getelementptr %struct.system__file_control_block__pstring* %to_addr, i32 0, i32 0		; <i8**> [#uses=1]
+	store i8* %to.0, i8** %0, align 8
+	%1 = getelementptr %struct.system__file_control_block__pstring* %to_addr, i32 0, i32 1		; <%struct.string___XUB**> [#uses=1]
+	store %struct.string___XUB* %to.1, %struct.string___XUB** %1
+	%2 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	store %struct.system__file_control_block__pstring* %to_addr, %struct.system__file_control_block__pstring** %2, align 4
+	%3 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 3, i32* %3, align 8
+	%4 = getelementptr %struct.system__file_control_block__pstring* %to_addr, i32 0, i32 1		; <%struct.string___XUB**> [#uses=1]
+	%5 = load %struct.string___XUB** %4, align 4		; <%struct.string___XUB*> [#uses=1]
+	%6 = getelementptr %struct.string___XUB* %5, i32 0, i32 0		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%8 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 2		; <i32*> [#uses=1]
+	store i32 %7, i32* %8, align 8
+	%9 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 2		; <i32*> [#uses=1]
+	%10 = load i32* %9, align 8		; <i32> [#uses=1]
+	%11 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
+	store i32 %10, i32* %11, align 8
+	%item.lobit = lshr i8 %item, 7		; <i8> [#uses=1]
+	%12 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 6		; <i8*> [#uses=1]
+	store i8 %item.lobit, i8* %12, align 8
+	%13 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 2		; <i32*> [#uses=1]
+	%14 = load i32* %13, align 8		; <i32> [#uses=1]
+	%15 = add i32 %14, -1		; <i32> [#uses=1]
+	%16 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %15, i32* %16, align 4
+	%17 = sext i8 %item to i64		; <i64> [#uses=1]
+	%18 = call i64 @system__exn_lli__exn_long_long_integer( i64 10, i32 1 ) readnone		; <i64> [#uses=1]
+	%19 = sub i64 0, %18		; <i64> [#uses=1]
+	%20 = call i64 @system__exn_lli__exn_long_long_integer( i64 10, i32 0 ) readnone		; <i64> [#uses=1]
+	%21 = mul i64 %20, -2		; <i64> [#uses=1]
+	call fastcc void @ce3806g__fxio__put__put_scaled__4.1346( %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i64 %17, i64 %19, i64 %21, i32 0, i32 -1 )
+	%22 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%23 = load i32* %22, align 4		; <i32> [#uses=1]
+	%24 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 2		; <i32*> [#uses=1]
+	%25 = load i32* %24, align 8		; <i32> [#uses=1]
+	%26 = icmp slt i32 %23, %25		; <i1> [#uses=1]
+	br i1 %26, label %bb71, label %bb72
+
+bb71:		; preds = %entry
+	%27 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 0, i32* %27, align 4
+	br label %bb72
+
+bb72:		; preds = %entry, %bb102, %bb71
+	%28 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 1		; <i32*> [#uses=1]
+	%29 = load i32* %28, align 4		; <i32> [#uses=1]
+	%30 = icmp slt i32 %29, -1		; <i1> [#uses=1]
+	%31 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%32 = load i32* %31, align 4		; <i32> [#uses=2]
+	br i1 %30, label %bb103, label %bb74
+
+bb74:		; preds = %bb72
+	%33 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 2		; <i32*> [#uses=1]
+	%34 = load i32* %33, align 8		; <i32> [#uses=1]
+	%35 = add i32 %34, -1		; <i32> [#uses=1]
+	%36 = icmp eq i32 %32, %35		; <i1> [#uses=1]
+	%37 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 1		; <i32*> [#uses=1]
+	%38 = load i32* %37, align 4		; <i32> [#uses=2]
+	br i1 %36, label %bb76, label %bb98
+
+bb76:		; preds = %bb74
+	%39 = icmp slt i32 %38, 1		; <i1> [#uses=1]
+	br i1 %39, label %bb80, label %bb102
+
+bb80:		; preds = %bb76
+	%40 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 1		; <i32*> [#uses=1]
+	%41 = load i32* %40, align 4		; <i32> [#uses=2]
+	%42 = icmp sgt i32 %41, -1		; <i1> [#uses=1]
+	%.op = add i32 %41, 2		; <i32> [#uses=1]
+	%43 = select i1 %42, i32 %.op, i32 2		; <i32> [#uses=1]
+	%44 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 6		; <i8*> [#uses=1]
+	%45 = load i8* %44, align 8		; <i8> [#uses=1]
+	%46 = zext i8 %45 to i32		; <i32> [#uses=1]
+	%47 = add i32 %43, %46		; <i32> [#uses=2]
+	%48 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 0		; <i32*> [#uses=1]
+	%49 = load i32* %48, align 8		; <i32> [#uses=1]
+	%50 = icmp sgt i32 %47, %49		; <i1> [#uses=1]
+	br i1 %50, label %bb88, label %bb85
+
+bb85:		; preds = %bb80, %bb87
+	%j.0 = phi i32 [ %68, %bb87 ], [ %47, %bb80 ]		; <i32> [#uses=2]
+	%51 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%52 = load i32* %51, align 4		; <i32> [#uses=1]
+	%53 = add i32 %52, 1		; <i32> [#uses=1]
+	%54 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %53, i32* %54, align 4
+	%55 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
+	%56 = load i32* %55, align 8		; <i32> [#uses=1]
+	%57 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	%58 = load %struct.system__file_control_block__pstring** %57, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
+	%59 = getelementptr %struct.system__file_control_block__pstring* %58, i32 0, i32 0		; <i8**> [#uses=1]
+	%60 = load i8** %59, align 4		; <i8*> [#uses=1]
+	%61 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%62 = load i32* %61, align 4		; <i32> [#uses=1]
+	%63 = sub i32 %62, %56		; <i32> [#uses=1]
+	%64 = getelementptr i8* %60, i32 %63		; <i8*> [#uses=1]
+	store i8 32, i8* %64, align 1
+	%65 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 0		; <i32*> [#uses=1]
+	%66 = load i32* %65, align 8		; <i32> [#uses=1]
+	%67 = icmp eq i32 %66, %j.0		; <i1> [#uses=1]
+	br i1 %67, label %bb88, label %bb87
+
+bb87:		; preds = %bb85
+	%68 = add i32 %j.0, 1		; <i32> [#uses=1]
+	br label %bb85
+
+bb88:		; preds = %bb80, %bb85
+	%69 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 6		; <i8*> [#uses=1]
+	%70 = load i8* %69, align 8		; <i8> [#uses=1]
+	%toBool89 = icmp eq i8 %70, 0		; <i1> [#uses=1]
+	br i1 %toBool89, label %bb91, label %bb90
+
+bb90:		; preds = %bb88
+	%71 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%72 = load i32* %71, align 4		; <i32> [#uses=1]
+	%73 = add i32 %72, 1		; <i32> [#uses=1]
+	%74 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %73, i32* %74, align 4
+	%75 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
+	%76 = load i32* %75, align 8		; <i32> [#uses=1]
+	%77 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	%78 = load %struct.system__file_control_block__pstring** %77, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
+	%79 = getelementptr %struct.system__file_control_block__pstring* %78, i32 0, i32 0		; <i8**> [#uses=1]
+	%80 = load i8** %79, align 4		; <i8*> [#uses=1]
+	%81 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%82 = load i32* %81, align 4		; <i32> [#uses=1]
+	%83 = sub i32 %82, %76		; <i32> [#uses=1]
+	%84 = getelementptr i8* %80, i32 %83		; <i8*> [#uses=1]
+	store i8 45, i8* %84, align 1
+	br label %bb91
+
+bb91:		; preds = %bb88, %bb90
+	%85 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 1		; <i32*> [#uses=1]
+	%86 = load i32* %85, align 4		; <i32> [#uses=1]
+	%87 = icmp slt i32 %86, 0		; <i1> [#uses=1]
+	br i1 %87, label %bb93, label %bb97
+
+bb93:		; preds = %bb91
+	%88 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%89 = load i32* %88, align 4		; <i32> [#uses=1]
+	%90 = add i32 %89, 1		; <i32> [#uses=1]
+	%91 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %90, i32* %91, align 4
+	%92 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
+	%93 = load i32* %92, align 8		; <i32> [#uses=1]
+	%94 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	%95 = load %struct.system__file_control_block__pstring** %94, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
+	%96 = getelementptr %struct.system__file_control_block__pstring* %95, i32 0, i32 0		; <i8**> [#uses=1]
+	%97 = load i8** %96, align 4		; <i8*> [#uses=1]
+	%98 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%99 = load i32* %98, align 4		; <i32> [#uses=1]
+	%100 = sub i32 %99, %93		; <i32> [#uses=1]
+	%101 = getelementptr i8* %97, i32 %100		; <i8*> [#uses=1]
+	store i8 48, i8* %101, align 1
+	%102 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%103 = load i32* %102, align 4		; <i32> [#uses=1]
+	%104 = add i32 %103, 1		; <i32> [#uses=1]
+	%105 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %104, i32* %105, align 4
+	%106 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
+	%107 = load i32* %106, align 8		; <i32> [#uses=1]
+	%108 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	%109 = load %struct.system__file_control_block__pstring** %108, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
+	%110 = getelementptr %struct.system__file_control_block__pstring* %109, i32 0, i32 0		; <i8**> [#uses=1]
+	%111 = load i8** %110, align 4		; <i8*> [#uses=1]
+	%112 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%113 = load i32* %112, align 4		; <i32> [#uses=1]
+	%114 = sub i32 %113, %107		; <i32> [#uses=1]
+	%115 = getelementptr i8* %111, i32 %114		; <i8*> [#uses=1]
+	store i8 46, i8* %115, align 1
+	%116 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 1		; <i32*> [#uses=1]
+	%117 = load i32* %116, align 4		; <i32> [#uses=1]
+	br label %bb94
+
+bb94:		; preds = %bb96, %bb93
+	%j8.0 = phi i32 [ %117, %bb93 ], [ %133, %bb96 ]		; <i32> [#uses=2]
+	%118 = icmp sgt i32 %j8.0, -2		; <i1> [#uses=1]
+	br i1 %118, label %bb97, label %bb96
+
+bb96:		; preds = %bb94
+	%119 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%120 = load i32* %119, align 4		; <i32> [#uses=1]
+	%121 = add i32 %120, 1		; <i32> [#uses=1]
+	%122 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %121, i32* %122, align 4
+	%123 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
+	%124 = load i32* %123, align 8		; <i32> [#uses=1]
+	%125 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	%126 = load %struct.system__file_control_block__pstring** %125, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
+	%127 = getelementptr %struct.system__file_control_block__pstring* %126, i32 0, i32 0		; <i8**> [#uses=1]
+	%128 = load i8** %127, align 4		; <i8*> [#uses=1]
+	%129 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%130 = load i32* %129, align 4		; <i32> [#uses=1]
+	%131 = sub i32 %130, %124		; <i32> [#uses=1]
+	%132 = getelementptr i8* %128, i32 %131		; <i8*> [#uses=1]
+	store i8 48, i8* %132, align 1
+	%133 = add i32 %j8.0, 1		; <i32> [#uses=1]
+	br label %bb94
+
+bb97:		; preds = %bb91, %bb94
+	%134 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%135 = load i32* %134, align 4		; <i32> [#uses=1]
+	%136 = add i32 %135, 1		; <i32> [#uses=1]
+	%137 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %136, i32* %137, align 4
+	%138 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
+	%139 = load i32* %138, align 8		; <i32> [#uses=1]
+	%140 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	%141 = load %struct.system__file_control_block__pstring** %140, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
+	%142 = getelementptr %struct.system__file_control_block__pstring* %141, i32 0, i32 0		; <i8**> [#uses=1]
+	%143 = load i8** %142, align 4		; <i8*> [#uses=1]
+	%144 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%145 = load i32* %144, align 4		; <i32> [#uses=1]
+	%146 = sub i32 %145, %139		; <i32> [#uses=1]
+	%147 = getelementptr i8* %143, i32 %146		; <i8*> [#uses=1]
+	store i8 48, i8* %147, align 1
+	br label %bb102
+
+bb98:		; preds = %bb74
+	%148 = icmp eq i32 %38, -1		; <i1> [#uses=1]
+	br i1 %148, label %bb100, label %bb101
+
+bb100:		; preds = %bb98
+	%149 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%150 = load i32* %149, align 4		; <i32> [#uses=1]
+	%151 = add i32 %150, 1		; <i32> [#uses=1]
+	%152 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %151, i32* %152, align 4
+	%153 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
+	%154 = load i32* %153, align 8		; <i32> [#uses=1]
+	%155 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	%156 = load %struct.system__file_control_block__pstring** %155, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
+	%157 = getelementptr %struct.system__file_control_block__pstring* %156, i32 0, i32 0		; <i8**> [#uses=1]
+	%158 = load i8** %157, align 4		; <i8*> [#uses=1]
+	%159 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%160 = load i32* %159, align 4		; <i32> [#uses=1]
+	%161 = sub i32 %160, %154		; <i32> [#uses=1]
+	%162 = getelementptr i8* %158, i32 %161		; <i8*> [#uses=1]
+	store i8 46, i8* %162, align 1
+	br label %bb101
+
+bb101:		; preds = %bb98, %bb100
+	%163 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%164 = load i32* %163, align 4		; <i32> [#uses=1]
+	%165 = add i32 %164, 1		; <i32> [#uses=1]
+	%166 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %165, i32* %166, align 4
+	%167 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
+	%168 = load i32* %167, align 8		; <i32> [#uses=1]
+	%169 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	%170 = load %struct.system__file_control_block__pstring** %169, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
+	%171 = getelementptr %struct.system__file_control_block__pstring* %170, i32 0, i32 0		; <i8**> [#uses=1]
+	%172 = load i8** %171, align 4		; <i8*> [#uses=1]
+	%173 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
+	%174 = load i32* %173, align 4		; <i32> [#uses=1]
+	%175 = sub i32 %174, %168		; <i32> [#uses=1]
+	%176 = getelementptr i8* %172, i32 %175		; <i8*> [#uses=1]
+	store i8 48, i8* %176, align 1
+	br label %bb102
+
+bb102:		; preds = %bb76, %bb101, %bb97
+	%177 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 1		; <i32*> [#uses=1]
+	%178 = load i32* %177, align 4		; <i32> [#uses=1]
+	%179 = add i32 %178, -1		; <i32> [#uses=1]
+	%180 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 %179, i32* %180, align 4
+	br label %bb72
+
+bb103:		; preds = %bb72
+	ret i32 %32
+}
+
+declare x86_fp80 @ada__text_io__float_aux__get(%struct.ada__text_io__text_afcb*, i32)
+
+declare void @__gnat_rcheck_12(i8*, i32) noreturn
+
+declare void @__gnat_rcheck_10(i8*, i32) noreturn
+
+declare i8* @llvm.eh.exception() nounwind
+
+declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+
+declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
+
+declare void @__gnat_begin_handler(i8*) nounwind
+
+declare void @__gnat_raise_exception(%struct.system__standard_library__exception_data*, i8*, %struct.string___XUB*) noreturn
+
+declare void @__gnat_end_handler(i8*)
+
+declare i32 @__gnat_eh_personality(...)
+
+declare i32 @_Unwind_Resume(...)
+
+define internal fastcc void @ce3806g__fxio__put__put_int64__4.1339(%struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i64 %x, i32 %scale) {
+entry:
+	%0 = icmp eq i64 %x, 0		; <i1> [#uses=1]
+	br i1 %0, label %return, label %bb
+
+bb:		; preds = %entry
+	%1 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 %scale, i32* %1, align 4
+	%2 = add i64 %x, 9		; <i64> [#uses=1]
+	%3 = icmp ugt i64 %2, 18		; <i1> [#uses=1]
+	br i1 %3, label %bb18, label %bb19
+
+bb18:		; preds = %bb
+	%4 = add i32 %scale, 1		; <i32> [#uses=1]
+	%5 = sdiv i64 %x, 10		; <i64> [#uses=1]
+	call fastcc void @ce3806g__fxio__put__put_int64__4.1339( %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i64 %5, i32 %4 )
+	br label %bb19
+
+bb19:		; preds = %bb, %bb18
+	%6 = srem i64 %x, 10		; <i64> [#uses=3]
+	%neg = sub i64 0, %6		; <i64> [#uses=1]
+	%abscond = icmp sgt i64 %6, -1		; <i1> [#uses=1]
+	%abs = select i1 %abscond, i64 %6, i64 %neg		; <i64> [#uses=3]
+	%7 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%8 = load i32* %7, align 4		; <i32> [#uses=1]
+	%9 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 2		; <i32*> [#uses=1]
+	%10 = load i32* %9, align 4		; <i32> [#uses=1]
+	%11 = add i32 %10, -1		; <i32> [#uses=1]
+	%12 = icmp eq i32 %8, %11		; <i1> [#uses=1]
+	br i1 %12, label %bb23, label %bb44
+
+bb23:		; preds = %bb19
+	%13 = icmp ne i64 %abs, 0		; <i1> [#uses=1]
+	%14 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 1		; <i32*> [#uses=1]
+	%15 = load i32* %14, align 4		; <i32> [#uses=1]
+	%16 = icmp slt i32 %15, 1		; <i1> [#uses=1]
+	%17 = or i1 %13, %16		; <i1> [#uses=1]
+	br i1 %17, label %bb27, label %bb48
+
+bb27:		; preds = %bb23
+	%18 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 1		; <i32*> [#uses=1]
+	%19 = load i32* %18, align 4		; <i32> [#uses=2]
+	%20 = icmp sgt i32 %19, -1		; <i1> [#uses=1]
+	%.op = add i32 %19, 2		; <i32> [#uses=1]
+	%21 = select i1 %20, i32 %.op, i32 2		; <i32> [#uses=1]
+	%22 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 6		; <i8*> [#uses=1]
+	%23 = load i8* %22, align 1		; <i8> [#uses=1]
+	%24 = zext i8 %23 to i32		; <i32> [#uses=1]
+	%25 = add i32 %21, %24		; <i32> [#uses=2]
+	%26 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 0		; <i32*> [#uses=1]
+	%27 = load i32* %26, align 4		; <i32> [#uses=1]
+	%28 = icmp sgt i32 %25, %27		; <i1> [#uses=1]
+	br i1 %28, label %bb34, label %bb31
+
+bb31:		; preds = %bb27, %bb33
+	%j.0 = phi i32 [ %46, %bb33 ], [ %25, %bb27 ]		; <i32> [#uses=2]
+	%29 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%30 = load i32* %29, align 4		; <i32> [#uses=1]
+	%31 = add i32 %30, 1		; <i32> [#uses=1]
+	%32 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %31, i32* %32, align 4
+	%33 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 4		; <i32*> [#uses=1]
+	%34 = load i32* %33, align 4		; <i32> [#uses=1]
+	%35 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	%36 = load %struct.system__file_control_block__pstring** %35, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
+	%37 = getelementptr %struct.system__file_control_block__pstring* %36, i32 0, i32 0		; <i8**> [#uses=1]
+	%38 = load i8** %37, align 4		; <i8*> [#uses=1]
+	%39 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%40 = load i32* %39, align 4		; <i32> [#uses=1]
+	%41 = sub i32 %40, %34		; <i32> [#uses=1]
+	%42 = getelementptr i8* %38, i32 %41		; <i8*> [#uses=1]
+	store i8 32, i8* %42, align 1
+	%43 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 0		; <i32*> [#uses=1]
+	%44 = load i32* %43, align 4		; <i32> [#uses=1]
+	%45 = icmp eq i32 %44, %j.0		; <i1> [#uses=1]
+	br i1 %45, label %bb34, label %bb33
+
+bb33:		; preds = %bb31
+	%46 = add i32 %j.0, 1		; <i32> [#uses=1]
+	br label %bb31
+
+bb34:		; preds = %bb27, %bb31
+	%47 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 6		; <i8*> [#uses=1]
+	%48 = load i8* %47, align 1		; <i8> [#uses=1]
+	%toBool35 = icmp eq i8 %48, 0		; <i1> [#uses=1]
+	br i1 %toBool35, label %bb37, label %bb36
+
+bb36:		; preds = %bb34
+	%49 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%50 = load i32* %49, align 4		; <i32> [#uses=1]
+	%51 = add i32 %50, 1		; <i32> [#uses=1]
+	%52 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %51, i32* %52, align 4
+	%53 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 4		; <i32*> [#uses=1]
+	%54 = load i32* %53, align 4		; <i32> [#uses=1]
+	%55 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	%56 = load %struct.system__file_control_block__pstring** %55, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
+	%57 = getelementptr %struct.system__file_control_block__pstring* %56, i32 0, i32 0		; <i8**> [#uses=1]
+	%58 = load i8** %57, align 4		; <i8*> [#uses=1]
+	%59 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%60 = load i32* %59, align 4		; <i32> [#uses=1]
+	%61 = sub i32 %60, %54		; <i32> [#uses=1]
+	%62 = getelementptr i8* %58, i32 %61		; <i8*> [#uses=1]
+	store i8 45, i8* %62, align 1
+	br label %bb37
+
+bb37:		; preds = %bb34, %bb36
+	%63 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 1		; <i32*> [#uses=1]
+	%64 = load i32* %63, align 4		; <i32> [#uses=1]
+	%65 = icmp slt i32 %64, 0		; <i1> [#uses=1]
+	br i1 %65, label %bb39, label %bb43
+
+bb39:		; preds = %bb37
+	%66 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%67 = load i32* %66, align 4		; <i32> [#uses=1]
+	%68 = add i32 %67, 1		; <i32> [#uses=1]
+	%69 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %68, i32* %69, align 4
+	%70 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 4		; <i32*> [#uses=1]
+	%71 = load i32* %70, align 4		; <i32> [#uses=1]
+	%72 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	%73 = load %struct.system__file_control_block__pstring** %72, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
+	%74 = getelementptr %struct.system__file_control_block__pstring* %73, i32 0, i32 0		; <i8**> [#uses=1]
+	%75 = load i8** %74, align 4		; <i8*> [#uses=1]
+	%76 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%77 = load i32* %76, align 4		; <i32> [#uses=1]
+	%78 = sub i32 %77, %71		; <i32> [#uses=1]
+	%79 = getelementptr i8* %75, i32 %78		; <i8*> [#uses=1]
+	store i8 48, i8* %79, align 1
+	%80 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%81 = load i32* %80, align 4		; <i32> [#uses=1]
+	%82 = add i32 %81, 1		; <i32> [#uses=1]
+	%83 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %82, i32* %83, align 4
+	%84 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 4		; <i32*> [#uses=1]
+	%85 = load i32* %84, align 4		; <i32> [#uses=1]
+	%86 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	%87 = load %struct.system__file_control_block__pstring** %86, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
+	%88 = getelementptr %struct.system__file_control_block__pstring* %87, i32 0, i32 0		; <i8**> [#uses=1]
+	%89 = load i8** %88, align 4		; <i8*> [#uses=1]
+	%90 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%91 = load i32* %90, align 4		; <i32> [#uses=1]
+	%92 = sub i32 %91, %85		; <i32> [#uses=1]
+	%93 = getelementptr i8* %89, i32 %92		; <i8*> [#uses=1]
+	store i8 46, i8* %93, align 1
+	%94 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 1		; <i32*> [#uses=1]
+	%95 = load i32* %94, align 4		; <i32> [#uses=1]
+	br label %bb40
+
+bb40:		; preds = %bb42, %bb39
+	%j15.0 = phi i32 [ %95, %bb39 ], [ %111, %bb42 ]		; <i32> [#uses=2]
+	%96 = icmp sgt i32 %j15.0, -2		; <i1> [#uses=1]
+	br i1 %96, label %bb43, label %bb42
+
+bb42:		; preds = %bb40
+	%97 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%98 = load i32* %97, align 4		; <i32> [#uses=1]
+	%99 = add i32 %98, 1		; <i32> [#uses=1]
+	%100 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %99, i32* %100, align 4
+	%101 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 4		; <i32*> [#uses=1]
+	%102 = load i32* %101, align 4		; <i32> [#uses=1]
+	%103 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	%104 = load %struct.system__file_control_block__pstring** %103, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
+	%105 = getelementptr %struct.system__file_control_block__pstring* %104, i32 0, i32 0		; <i8**> [#uses=1]
+	%106 = load i8** %105, align 4		; <i8*> [#uses=1]
+	%107 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%108 = load i32* %107, align 4		; <i32> [#uses=1]
+	%109 = sub i32 %108, %102		; <i32> [#uses=1]
+	%110 = getelementptr i8* %106, i32 %109		; <i8*> [#uses=1]
+	store i8 48, i8* %110, align 1
+	%111 = add i32 %j15.0, 1		; <i32> [#uses=1]
+	br label %bb40
+
+bb43:		; preds = %bb37, %bb40
+	%112 = trunc i64 %abs to i32		; <i32> [#uses=1]
+	%113 = getelementptr [10 x i8]* @.str3, i32 0, i32 %112		; <i8*> [#uses=1]
+	%114 = load i8* %113, align 1		; <i8> [#uses=1]
+	%115 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%116 = load i32* %115, align 4		; <i32> [#uses=1]
+	%117 = add i32 %116, 1		; <i32> [#uses=1]
+	%118 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %117, i32* %118, align 4
+	%119 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 4		; <i32*> [#uses=1]
+	%120 = load i32* %119, align 4		; <i32> [#uses=1]
+	%121 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	%122 = load %struct.system__file_control_block__pstring** %121, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
+	%123 = getelementptr %struct.system__file_control_block__pstring* %122, i32 0, i32 0		; <i8**> [#uses=1]
+	%124 = load i8** %123, align 4		; <i8*> [#uses=1]
+	%125 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%126 = load i32* %125, align 4		; <i32> [#uses=1]
+	%127 = sub i32 %126, %120		; <i32> [#uses=1]
+	%128 = getelementptr i8* %124, i32 %127		; <i8*> [#uses=1]
+	store i8 %114, i8* %128, align 1
+	br label %bb48
+
+bb44:		; preds = %bb19
+	%129 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 1		; <i32*> [#uses=1]
+	%130 = load i32* %129, align 4		; <i32> [#uses=1]
+	%131 = icmp eq i32 %130, -1		; <i1> [#uses=1]
+	br i1 %131, label %bb46, label %bb47
+
+bb46:		; preds = %bb44
+	%132 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%133 = load i32* %132, align 4		; <i32> [#uses=1]
+	%134 = add i32 %133, 1		; <i32> [#uses=1]
+	%135 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %134, i32* %135, align 4
+	%136 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 4		; <i32*> [#uses=1]
+	%137 = load i32* %136, align 4		; <i32> [#uses=1]
+	%138 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	%139 = load %struct.system__file_control_block__pstring** %138, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
+	%140 = getelementptr %struct.system__file_control_block__pstring* %139, i32 0, i32 0		; <i8**> [#uses=1]
+	%141 = load i8** %140, align 4		; <i8*> [#uses=1]
+	%142 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%143 = load i32* %142, align 4		; <i32> [#uses=1]
+	%144 = sub i32 %143, %137		; <i32> [#uses=1]
+	%145 = getelementptr i8* %141, i32 %144		; <i8*> [#uses=1]
+	store i8 46, i8* %145, align 1
+	br label %bb47
+
+bb47:		; preds = %bb44, %bb46
+	%146 = trunc i64 %abs to i32		; <i32> [#uses=1]
+	%147 = getelementptr [10 x i8]* @.str3, i32 0, i32 %146		; <i8*> [#uses=1]
+	%148 = load i8* %147, align 1		; <i8> [#uses=1]
+	%149 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%150 = load i32* %149, align 4		; <i32> [#uses=1]
+	%151 = add i32 %150, 1		; <i32> [#uses=1]
+	%152 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	store i32 %151, i32* %152, align 4
+	%153 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 4		; <i32*> [#uses=1]
+	%154 = load i32* %153, align 4		; <i32> [#uses=1]
+	%155 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
+	%156 = load %struct.system__file_control_block__pstring** %155, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
+	%157 = getelementptr %struct.system__file_control_block__pstring* %156, i32 0, i32 0		; <i8**> [#uses=1]
+	%158 = load i8** %157, align 4		; <i8*> [#uses=1]
+	%159 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
+	%160 = load i32* %159, align 4		; <i32> [#uses=1]
+	%161 = sub i32 %160, %154		; <i32> [#uses=1]
+	%162 = getelementptr i8* %158, i32 %161		; <i8*> [#uses=1]
+	store i8 %148, i8* %162, align 1
+	br label %bb48
+
+bb48:		; preds = %bb23, %bb47, %bb43
+	%163 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 1		; <i32*> [#uses=1]
+	%164 = load i32* %163, align 4		; <i32> [#uses=1]
+	%165 = add i32 %164, -1		; <i32> [#uses=1]
+	%166 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 %165, i32* %166, align 4
+	ret void
+
+return:		; preds = %entry
+	ret void
+}
+
+define internal fastcc void @ce3806g__fxio__put__put_scaled__4.1346(%struct.FRAME.ce3806g__fxio__put__4* %CHAIN.365, i64 %x, i64 %y, i64 %z, i32 %a, i32 %e) {
+entry:
+	%0 = alloca { i64, i64 }		; <{ i64, i64 }*> [#uses=3]
+	%1 = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
+	%2 = add i32 %a, 17		; <i32> [#uses=2]
+	%3 = sdiv i32 %2, 18		; <i32> [#uses=3]
+	%4 = add i32 %3, 1		; <i32> [#uses=7]
+	%5 = icmp sgt i32 %4, -1		; <i1> [#uses=1]
+	%max53 = select i1 %5, i32 %4, i32 0		; <i32> [#uses=1]
+	%6 = alloca i64, i32 %max53		; <i64*> [#uses=21]
+	%7 = icmp sgt i32 %4, 0		; <i1> [#uses=1]
+	br i1 %7, label %bb55, label %bb58
+
+bb55:		; preds = %entry, %bb57
+	%J60b.0 = phi i32 [ %11, %bb57 ], [ 1, %entry ]		; <i32> [#uses=3]
+	%8 = add i32 %J60b.0, -1		; <i32> [#uses=1]
+	%9 = getelementptr i64* %6, i32 %8		; <i64*> [#uses=1]
+	store i64 0, i64* %9, align 8
+	%10 = icmp eq i32 %4, %J60b.0		; <i1> [#uses=1]
+	br i1 %10, label %bb58, label %bb57
+
+bb57:		; preds = %bb55
+	%11 = add i32 %J60b.0, 1		; <i32> [#uses=1]
+	br label %bb55
+
+bb58:		; preds = %entry, %bb55
+	%12 = icmp sgt i32 %4, 0		; <i1> [#uses=1]
+	br i1 %12, label %bb61, label %bb91
+
+bb61:		; preds = %bb58, %bb90
+	%j2.0 = phi i32 [ %88, %bb90 ], [ 1, %bb58 ]		; <i32> [#uses=11]
+	%aa.0 = phi i32 [ %86, %bb90 ], [ %a, %bb58 ]		; <i32> [#uses=6]
+	%yy.0 = phi i64 [ %84, %bb90 ], [ %y, %bb58 ]		; <i64> [#uses=3]
+	%xx.0 = phi i64 [ %21, %bb90 ], [ %x, %bb58 ]		; <i64> [#uses=2]
+	%13 = icmp eq i64 %xx.0, 0		; <i1> [#uses=1]
+	br i1 %13, label %bb91, label %bb63
+
+bb63:		; preds = %bb61
+	%14 = icmp eq i32 %aa.0, 0		; <i1> [#uses=1]
+	%15 = zext i1 %14 to i8		; <i8> [#uses=1]
+	invoke void @system__arith_64__scaled_divide( { i64, i64 }* noalias sret %0, i64 %xx.0, i64 %yy.0, i64 %z, i8 %15 )
+			to label %invcont unwind label %lpad
+
+invcont:		; preds = %bb63
+	%16 = getelementptr { i64, i64 }* %0, i32 0, i32 0		; <i64*> [#uses=1]
+	%17 = load i64* %16, align 8		; <i64> [#uses=1]
+	%18 = add i32 %j2.0, -1		; <i32> [#uses=1]
+	%19 = getelementptr i64* %6, i32 %18		; <i64*> [#uses=1]
+	store i64 %17, i64* %19, align 8
+	%20 = getelementptr { i64, i64 }* %0, i32 0, i32 1		; <i64*> [#uses=1]
+	%21 = load i64* %20, align 8		; <i64> [#uses=1]
+	%22 = add i32 %j2.0, -1		; <i32> [#uses=1]
+	%23 = getelementptr i64* %6, i32 %22		; <i64*> [#uses=1]
+	%24 = load i64* %23, align 8		; <i64> [#uses=1]
+	%25 = icmp eq i64 %24, %yy.0		; <i1> [#uses=1]
+	%26 = add i32 %j2.0, -1		; <i32> [#uses=1]
+	%27 = getelementptr i64* %6, i32 %26		; <i64*> [#uses=1]
+	%28 = load i64* %27, align 8		; <i64> [#uses=1]
+	%29 = sub i64 0, %28		; <i64> [#uses=1]
+	%30 = icmp eq i64 %yy.0, %29		; <i1> [#uses=1]
+	%31 = or i1 %25, %30		; <i1> [#uses=1]
+	%32 = icmp sgt i32 %j2.0, 1		; <i1> [#uses=1]
+	%or.cond = and i1 %31, %32		; <i1> [#uses=1]
+	br i1 %or.cond, label %bb69, label %bb83
+
+bb69:		; preds = %invcont
+	%33 = add i32 %j2.0, -1		; <i32> [#uses=1]
+	%34 = getelementptr i64* %6, i32 %33		; <i64*> [#uses=1]
+	%35 = load i64* %34, align 8		; <i64> [#uses=1]
+	%36 = icmp slt i64 %35, 0		; <i1> [#uses=1]
+	%37 = add i32 %j2.0, -2		; <i32> [#uses=1]
+	%38 = getelementptr i64* %6, i32 %37		; <i64*> [#uses=1]
+	%39 = load i64* %38, align 8		; <i64> [#uses=2]
+	br i1 %36, label %bb71, label %bb72
+
+bb71:		; preds = %bb69
+	%40 = add i64 %39, 1		; <i64> [#uses=1]
+	%41 = add i32 %j2.0, -2		; <i32> [#uses=1]
+	%42 = getelementptr i64* %6, i32 %41		; <i64*> [#uses=1]
+	store i64 %40, i64* %42, align 8
+	br label %bb73
+
+bb72:		; preds = %bb69
+	%43 = add i64 %39, -1		; <i64> [#uses=1]
+	%44 = add i32 %j2.0, -2		; <i32> [#uses=1]
+	%45 = getelementptr i64* %6, i32 %44		; <i64*> [#uses=1]
+	store i64 %43, i64* %45, align 8
+	br label %bb73
+
+bb73:		; preds = %bb72, %bb71
+	%46 = add i32 %j2.0, -1		; <i32> [#uses=1]
+	%47 = getelementptr i64* %6, i32 %46		; <i64*> [#uses=1]
+	store i64 0, i64* %47, align 8
+	br label %bb74
+
+bb74:		; preds = %bb82, %bb73
+	%j1.0 = phi i32 [ %4, %bb73 ], [ %81, %bb82 ]		; <i32> [#uses=12]
+	%48 = icmp slt i32 %j1.0, 2		; <i1> [#uses=1]
+	br i1 %48, label %bb83, label %bb76
+
+bb76:		; preds = %bb74
+	%49 = add i32 %j1.0, -1		; <i32> [#uses=1]
+	%50 = getelementptr i64* %6, i32 %49		; <i64*> [#uses=1]
+	%51 = load i64* %50, align 8		; <i64> [#uses=1]
+	%52 = icmp sgt i64 %51, 999999999999999999		; <i1> [#uses=1]
+	br i1 %52, label %bb78, label %bb79
+
+bb78:		; preds = %bb76
+	%53 = add i32 %j1.0, -2		; <i32> [#uses=1]
+	%54 = getelementptr i64* %6, i32 %53		; <i64*> [#uses=1]
+	%55 = load i64* %54, align 8		; <i64> [#uses=1]
+	%56 = add i64 %55, 1		; <i64> [#uses=1]
+	%57 = add i32 %j1.0, -2		; <i32> [#uses=1]
+	%58 = getelementptr i64* %6, i32 %57		; <i64*> [#uses=1]
+	store i64 %56, i64* %58, align 8
+	%59 = add i32 %j1.0, -1		; <i32> [#uses=1]
+	%60 = getelementptr i64* %6, i32 %59		; <i64*> [#uses=1]
+	%61 = load i64* %60, align 8		; <i64> [#uses=1]
+	%62 = add i64 %61, -1000000000000000000		; <i64> [#uses=1]
+	%63 = add i32 %j1.0, -1		; <i32> [#uses=1]
+	%64 = getelementptr i64* %6, i32 %63		; <i64*> [#uses=1]
+	store i64 %62, i64* %64, align 8
+	br label %bb82
+
+bb79:		; preds = %bb76
+	%65 = add i32 %j1.0, -1		; <i32> [#uses=1]
+	%66 = getelementptr i64* %6, i32 %65		; <i64*> [#uses=1]
+	%67 = load i64* %66, align 8		; <i64> [#uses=1]
+	%68 = icmp slt i64 %67, -999999999999999999		; <i1> [#uses=1]
+	br i1 %68, label %bb81, label %bb82
+
+bb81:		; preds = %bb79
+	%69 = add i32 %j1.0, -2		; <i32> [#uses=1]
+	%70 = getelementptr i64* %6, i32 %69		; <i64*> [#uses=1]
+	%71 = load i64* %70, align 8		; <i64> [#uses=1]
+	%72 = add i64 %71, -1		; <i64> [#uses=1]
+	%73 = add i32 %j1.0, -2		; <i32> [#uses=1]
+	%74 = getelementptr i64* %6, i32 %73		; <i64*> [#uses=1]
+	store i64 %72, i64* %74, align 8
+	%75 = add i32 %j1.0, -1		; <i32> [#uses=1]
+	%76 = getelementptr i64* %6, i32 %75		; <i64*> [#uses=1]
+	%77 = load i64* %76, align 8		; <i64> [#uses=1]
+	%78 = add i64 %77, 1000000000000000000		; <i64> [#uses=1]
+	%79 = add i32 %j1.0, -1		; <i32> [#uses=1]
+	%80 = getelementptr i64* %6, i32 %79		; <i64*> [#uses=1]
+	store i64 %78, i64* %80, align 8
+	br label %bb82
+
+bb82:		; preds = %bb79, %bb81, %bb78
+	%81 = add i32 %j1.0, -1		; <i32> [#uses=1]
+	br label %bb74
+
+bb83:		; preds = %invcont, %bb74
+	%82 = icmp slt i32 %aa.0, 19		; <i1> [#uses=1]
+	%min = select i1 %82, i32 %aa.0, i32 18		; <i32> [#uses=1]
+	%83 = invoke i64 @system__exn_lli__exn_long_long_integer( i64 10, i32 %min ) readnone
+			to label %invcont86 unwind label %lpad		; <i64> [#uses=1]
+
+invcont86:		; preds = %bb83
+	%84 = sub i64 0, %83		; <i64> [#uses=1]
+	%85 = icmp slt i32 %aa.0, 19		; <i1> [#uses=1]
+	%min87 = select i1 %85, i32 %aa.0, i32 18		; <i32> [#uses=1]
+	%86 = sub i32 %aa.0, %min87		; <i32> [#uses=1]
+	%87 = icmp eq i32 %4, %j2.0		; <i1> [#uses=1]
+	br i1 %87, label %bb91, label %bb90
+
+bb90:		; preds = %invcont86
+	%88 = add i32 %j2.0, 1		; <i32> [#uses=1]
+	br label %bb61
+
+bb91:		; preds = %bb58, %bb61, %invcont86
+	%89 = icmp slt i32 %2, 18		; <i1> [#uses=1]
+	br i1 %89, label %bb98, label %bb94
+
+bb94:		; preds = %bb91, %bb97
+	%j.0 = phi i32 [ %97, %bb97 ], [ 1, %bb91 ]		; <i32> [#uses=4]
+	%90 = mul i32 %j.0, 18		; <i32> [#uses=1]
+	%91 = add i32 %90, -18		; <i32> [#uses=1]
+	%92 = sub i32 %e, %91		; <i32> [#uses=1]
+	%93 = add i32 %j.0, -1		; <i32> [#uses=1]
+	%94 = getelementptr i64* %6, i32 %93		; <i64*> [#uses=1]
+	%95 = load i64* %94, align 8		; <i64> [#uses=1]
+	invoke fastcc void @ce3806g__fxio__put__put_int64__4.1339( %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.365, i64 %95, i32 %92 )
+			to label %invcont95 unwind label %lpad
+
+invcont95:		; preds = %bb94
+	%96 = icmp eq i32 %3, %j.0		; <i1> [#uses=1]
+	br i1 %96, label %bb98, label %bb97
+
+bb97:		; preds = %invcont95
+	%97 = add i32 %j.0, 1		; <i32> [#uses=1]
+	br label %bb94
+
+bb98:		; preds = %bb91, %invcont95
+	%98 = sub i32 %e, %a		; <i32> [#uses=1]
+	%99 = getelementptr i64* %6, i32 %3		; <i64*> [#uses=1]
+	%100 = load i64* %99, align 8		; <i64> [#uses=1]
+	invoke fastcc void @ce3806g__fxio__put__put_int64__4.1339( %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.365, i64 %100, i32 %98 )
+			to label %bb101 unwind label %lpad
+
+bb101:		; preds = %bb98
+	ret void
+
+lpad:		; preds = %bb98, %bb94, %bb83, %bb63
+	%eh_ptr = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
+	%eh_select103 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i32* @__gnat_all_others_value )		; <i32> [#uses=0]
+	call void @llvm.stackrestore( i8* %1 )
+	%101 = call i32 (...)* @_Unwind_Resume( i8* %eh_ptr )		; <i32> [#uses=0]
+	unreachable
+}
+
+declare i8* @llvm.stacksave() nounwind
+
+declare void @system__arith_64__scaled_divide({ i64, i64 }* noalias sret, i64, i64, i64, i8)
+
+declare i64 @system__exn_lli__exn_long_long_integer(i64, i32) readnone
+
+declare void @llvm.stackrestore(i8*) nounwind
+
+declare i32 @system__img_real__set_image_real(x86_fp80, i8*, %struct.string___XUB*, i32, i32, i32, i32)
+
+declare void @ada__text_io__generic_aux__put_item(%struct.ada__text_io__text_afcb*, i8*, %struct.string___XUB*)
+
+declare void @report__test(i8*, %struct.string___XUB*, i8*, %struct.string___XUB*)
+
+declare void @system__secondary_stack__ss_mark(%struct.string___XUB* noalias sret)
+
+declare void @system__exception_table__register_exception(%struct.system__standard_library__exception_data*)
+
+declare void @report__legal_file_name(%struct.system__file_control_block__pstring* noalias sret, i32, i8*, %struct.string___XUB*)
+
+declare %struct.ada__text_io__text_afcb* @ada__text_io__create(%struct.ada__text_io__text_afcb*, i8, i8*, %struct.string___XUB*, i8*, %struct.string___XUB*)
+
+declare void @system__secondary_stack__ss_release(i32, i32)
+
+declare void @report__not_applicable(i8*, %struct.string___XUB*)
+
+declare void @ada__text_io__set_output(%struct.ada__text_io__text_afcb*)
+
+declare %struct.ada__text_io__text_afcb* @ada__text_io__close(%struct.ada__text_io__text_afcb*)
+
+declare %struct.ada__text_io__text_afcb* @ada__text_io__open(%struct.ada__text_io__text_afcb*, i8, i8*, %struct.string___XUB*, i8*, %struct.string___XUB*)
+
+declare %struct.ada__text_io__text_afcb* @ada__text_io__standard_output()
+
+declare void @report__failed(i8*, %struct.string___XUB*)
+
+declare void @ext(i32*)
+
+declare %struct.ada__text_io__text_afcb* @ada__text_io__delete(%struct.ada__text_io__text_afcb*)
+
+declare void @report__result()
diff --git a/final/test/Transforms/PruneEH/dg.exp b/final/test/Transforms/PruneEH/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/PruneEH/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/PruneEH/recursivetest.ll b/final/test/Transforms/PruneEH/recursivetest.ll
new file mode 100644
index 00000000000..724c7cf224c
--- /dev/null
+++ b/final/test/Transforms/PruneEH/recursivetest.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -prune-eh -S | not grep invoke
+
+define internal i32 @foo() {
+	invoke i32 @foo( )
+			to label %Normal unwind label %Except		; <i32>:1 [#uses=0]
+Normal:		; preds = %0
+	ret i32 12
+Except:		; preds = %0
+	ret i32 123
+}
+
+define i32 @caller() {
+	invoke i32 @foo( )
+			to label %Normal unwind label %Except		; <i32>:1 [#uses=0]
+Normal:		; preds = %0
+	ret i32 0
+Except:		; preds = %0
+	ret i32 1
+}
+
diff --git a/final/test/Transforms/PruneEH/simplenoreturntest.ll b/final/test/Transforms/PruneEH/simplenoreturntest.ll
new file mode 100644
index 00000000000..61e2f15c0d0
--- /dev/null
+++ b/final/test/Transforms/PruneEH/simplenoreturntest.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -prune-eh -S | not grep {ret i32}
+
+declare void @noreturn() noreturn
+
+define i32 @caller() {
+	call void @noreturn( )
+	ret i32 17
+}
+
+define i32 @caller2() {
+	%T = call i32 @caller( )		; <i32> [#uses=1]
+	ret i32 %T
+}
diff --git a/final/test/Transforms/PruneEH/simpletest.ll b/final/test/Transforms/PruneEH/simpletest.ll
new file mode 100644
index 00000000000..77c429dae6d
--- /dev/null
+++ b/final/test/Transforms/PruneEH/simpletest.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -prune-eh -S | not grep invoke
+
+declare void @nounwind() nounwind
+
+define internal void @foo() {
+	call void @nounwind()
+	ret void
+}
+
+define i32 @caller() {
+	invoke void @foo( )
+			to label %Normal unwind label %Except
+
+Normal:		; preds = %0
+	ret i32 0
+
+Except:		; preds = %0
+	ret i32 1
+}
diff --git a/final/test/Transforms/Reassociate/2002-05-15-AgressiveSubMove.ll b/final/test/Transforms/Reassociate/2002-05-15-AgressiveSubMove.ll
new file mode 100644
index 00000000000..5780990c761
--- /dev/null
+++ b/final/test/Transforms/Reassociate/2002-05-15-AgressiveSubMove.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -reassociate -instcombine -constprop -dce -S | not grep add
+
+define i32 @test(i32 %A) {
+	%X = add i32 %A, 1		; <i32> [#uses=1]
+	%Y = add i32 %A, 1		; <i32> [#uses=1]
+	%r = sub i32 %X, %Y		; <i32> [#uses=1]
+	ret i32 %r
+}
+
diff --git a/final/test/Transforms/Reassociate/2002-05-15-MissedTree.ll b/final/test/Transforms/Reassociate/2002-05-15-MissedTree.ll
new file mode 100644
index 00000000000..e8bccbde28e
--- /dev/null
+++ b/final/test/Transforms/Reassociate/2002-05-15-MissedTree.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -reassociate -instcombine -constprop -die -S | not grep 5
+
+define i32 @test(i32 %A, i32 %B) {
+	%W = add i32 %B, -5		; <i32> [#uses=1]
+	%Y = add i32 %A, 5		; <i32> [#uses=1]
+	%Z = add i32 %W, %Y		; <i32> [#uses=1]
+	ret i32 %Z
+}
+
diff --git a/final/test/Transforms/Reassociate/2002-05-15-SubReassociate.ll b/final/test/Transforms/Reassociate/2002-05-15-SubReassociate.ll
new file mode 100644
index 00000000000..c18af5e07ef
--- /dev/null
+++ b/final/test/Transforms/Reassociate/2002-05-15-SubReassociate.ll
@@ -0,0 +1,12 @@
+; With sub reassociation, constant folding can eliminate all of the constants.
+;
+; RUN: opt < %s -reassociate -constprop -instcombine -dce -S | not grep add
+
+define i32 @test(i32 %A, i32 %B) {
+	%W = add i32 5, %B		; <i32> [#uses=1]
+	%X = add i32 -7, %A		; <i32> [#uses=1]
+	%Y = sub i32 %X, %W		; <i32> [#uses=1]
+	%Z = add i32 %Y, 12		; <i32> [#uses=1]
+	ret i32 %Z
+}
+
diff --git a/final/test/Transforms/Reassociate/2002-05-15-SubReassociate2.ll b/final/test/Transforms/Reassociate/2002-05-15-SubReassociate2.ll
new file mode 100644
index 00000000000..5848821e10f
--- /dev/null
+++ b/final/test/Transforms/Reassociate/2002-05-15-SubReassociate2.ll
@@ -0,0 +1,13 @@
+; With sub reassociation, constant folding can eliminate the two 12 constants.
+;
+; RUN: opt < %s -reassociate -constprop -dce -S | not grep 12
+
+define i32 @test(i32 %A, i32 %B, i32 %C, i32 %D) {
+	%M = add i32 %A, 12		; <i32> [#uses=1]
+	%N = add i32 %M, %B		; <i32> [#uses=1]
+	%O = add i32 %N, %C		; <i32> [#uses=1]
+	%P = sub i32 %D, %O		; <i32> [#uses=1]
+	%Q = add i32 %P, 12		; <i32> [#uses=1]
+	ret i32 %Q
+}
+
diff --git a/final/test/Transforms/Reassociate/2002-07-09-DominanceProblem.ll b/final/test/Transforms/Reassociate/2002-07-09-DominanceProblem.ll
new file mode 100644
index 00000000000..bbb08f96985
--- /dev/null
+++ b/final/test/Transforms/Reassociate/2002-07-09-DominanceProblem.ll
@@ -0,0 +1,10 @@
+; The reassociate pass is not preserving dominance properties correctly
+;
+; RUN: opt < %s -reassociate
+
+define i32 @compute_dist(i32 %i, i32 %j) {
+	%reg119 = sub i32 %j, %i		; <i32> [#uses=1]
+	ret i32 %reg119
+}
+
+
diff --git a/final/test/Transforms/Reassociate/2003-08-12-InfiniteLoop.ll b/final/test/Transforms/Reassociate/2003-08-12-InfiniteLoop.ll
new file mode 100644
index 00000000000..af7a821a4ba
--- /dev/null
+++ b/final/test/Transforms/Reassociate/2003-08-12-InfiniteLoop.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -reassociate -disable-output
+
+define i32 @test(i32 %A.1, i32 %B.1, i32 %C.1, i32 %D.1) {
+	%tmp.16 = and i32 %A.1, %B.1		; <i32> [#uses=1]
+	%tmp.18 = and i32 %tmp.16, %C.1		; <i32> [#uses=1]
+	%tmp.20 = and i32 %tmp.18, %D.1		; <i32> [#uses=1]
+	ret i32 %tmp.20
+}
+
diff --git a/final/test/Transforms/Reassociate/2005-08-24-Crash.ll b/final/test/Transforms/Reassociate/2005-08-24-Crash.ll
new file mode 100644
index 00000000000..9864de45f88
--- /dev/null
+++ b/final/test/Transforms/Reassociate/2005-08-24-Crash.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -reassociate -disable-output
+
+define void @test(i32 %a, i32 %b, i32 %c, i32 %d) {
+	%tmp.2 = xor i32 %a, %b		; <i32> [#uses=1]
+	%tmp.5 = xor i32 %c, %d		; <i32> [#uses=1]
+	%tmp.6 = xor i32 %tmp.2, %tmp.5		; <i32> [#uses=1]
+	%tmp.9 = xor i32 %c, %a		; <i32> [#uses=1]
+	%tmp.12 = xor i32 %b, %d		; <i32> [#uses=1]
+	%tmp.13 = xor i32 %tmp.9, %tmp.12		; <i32> [#uses=1]
+	%tmp.16 = xor i32 %tmp.6, %tmp.13		; <i32> [#uses=0]
+	ret void
+}
+
diff --git a/final/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll b/final/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll
new file mode 100644
index 00000000000..33e44d4ba75
--- /dev/null
+++ b/final/test/Transforms/Reassociate/2005-09-01-ArrayOutOfBounds.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -reassociate -instcombine -S |\
+; RUN:   grep {ret i32 0}
+
+define i32 @f(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
+	%tmp.2 = add i32 %a4, %a3		; <i32> [#uses=1]
+	%tmp.4 = add i32 %tmp.2, %a2		; <i32> [#uses=1]
+	%tmp.6 = add i32 %tmp.4, %a1		; <i32> [#uses=1]
+	%tmp.8 = add i32 %tmp.6, %a0		; <i32> [#uses=1]
+	%tmp.11 = add i32 %a3, %a2		; <i32> [#uses=1]
+	%tmp.13 = add i32 %tmp.11, %a1		; <i32> [#uses=1]
+	%tmp.15 = add i32 %tmp.13, %a0		; <i32> [#uses=1]
+	%tmp.18 = add i32 %a2, %a1		; <i32> [#uses=1]
+	%tmp.20 = add i32 %tmp.18, %a0		; <i32> [#uses=1]
+	%tmp.23 = add i32 %a1, %a0		; <i32> [#uses=1]
+	%tmp.26 = sub i32 %tmp.8, %tmp.15		; <i32> [#uses=1]
+	%tmp.28 = add i32 %tmp.26, %tmp.20		; <i32> [#uses=1]
+	%tmp.30 = sub i32 %tmp.28, %tmp.23		; <i32> [#uses=1]
+	%tmp.32 = sub i32 %tmp.30, %a4		; <i32> [#uses=1]
+	%tmp.34 = sub i32 %tmp.32, %a2		; <i32> [#uses=2]
+	%T = mul i32 %tmp.34, %tmp.34		; <i32> [#uses=1]
+	ret i32 %T
+}
+
diff --git a/final/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll b/final/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
new file mode 100644
index 00000000000..384cbc90a74
--- /dev/null
+++ b/final/test/Transforms/Reassociate/2006-04-27-ReassociateVector.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -reassociate -disable-output
+
+define void @foo() {
+	%tmp162 = fsub <4 x float> zeroinitializer, zeroinitializer		; <<4 x float>> [#uses=1]
+	%tmp164 = fmul <4 x float> zeroinitializer, %tmp162		; <<4 x float>> [#uses=0]
+	ret void
+}
+
diff --git a/final/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll b/final/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll
new file mode 100644
index 00000000000..e6c76b34448
--- /dev/null
+++ b/final/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -reassociate
+; PR9039
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-gnu-linux"
+
+%ada__tags__T15s = type void ()
+
+define void @exp_averages_intraday__deviation() {
+entry:
+  %0 = load i32* undef, align 4
+  %1 = shl i32 %0, 2
+  %2 = add nsw i32 undef, %1
+  %3 = add nsw i32 %2, undef
+  %4 = mul nsw i32 %0, 12
+  %5 = add nsw i32 %3, %4
+  %6 = add nsw i32 %5, %4
+  %7 = add nsw i32 %6, undef
+  br i1 false, label %"4", label %"12"
+
+"4":                                              ; preds = %entry
+  br i1 undef, label %"5", label %"8"
+
+"5":                                              ; preds = %"4"
+  unreachable
+
+"8":                                              ; preds = %"4"
+  %8 = getelementptr inbounds i8* undef, i32 %6
+  br i1 undef, label %"13", label %"12"
+
+"12":                                             ; preds = %"8", %entry
+  ret void
+
+"13":                                             ; preds = %"8"
+  ret void
+}
diff --git a/final/test/Transforms/Reassociate/basictest.ll b/final/test/Transforms/Reassociate/basictest.ll
new file mode 100644
index 00000000000..086474066c5
--- /dev/null
+++ b/final/test/Transforms/Reassociate/basictest.ll
@@ -0,0 +1,216 @@
+; With reassociation, constant folding can eliminate the 12 and -12 constants.
+;
+; RUN: opt < %s -reassociate  -gvn -instcombine -S | FileCheck %s
+
+define i32 @test1(i32 %arg) {
+	%tmp1 = sub i32 -12, %arg
+	%tmp2 = add i32 %tmp1, 12
+	ret i32 %tmp2
+; CHECK: @test1
+; CHECK-NEXT: sub i32 0, %arg
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test2(i32 %reg109, i32 %reg1111) {
+	%reg115 = add i32 %reg109, -30		; <i32> [#uses=1]
+	%reg116 = add i32 %reg115, %reg1111		; <i32> [#uses=1]
+	%reg117 = add i32 %reg116, 30		; <i32> [#uses=1]
+	ret i32 %reg117
+; CHECK: @test2
+; CHECK-NEXT: add i32 %reg1111, %reg109
+; CHECK-NEXT: ret i32
+}
+
+@e = external global i32		; <i32*> [#uses=3]
+@a = external global i32		; <i32*> [#uses=3]
+@b = external global i32		; <i32*> [#uses=3]
+@c = external global i32		; <i32*> [#uses=3]
+@f = external global i32		; <i32*> [#uses=3]
+
+define void @test3() {
+	%A = load i32* @a		; <i32> [#uses=2]
+	%B = load i32* @b		; <i32> [#uses=2]
+	%C = load i32* @c		; <i32> [#uses=2]
+	%t1 = add i32 %A, %B		; <i32> [#uses=1]
+	%t2 = add i32 %t1, %C		; <i32> [#uses=1]
+	%t3 = add i32 %C, %A		; <i32> [#uses=1]
+	%t4 = add i32 %t3, %B		; <i32> [#uses=1]
+	; e = (a+b)+c;
+        store i32 %t2, i32* @e
+        ; f = (a+c)+b
+	store i32 %t4, i32* @f
+	ret void
+; CHECK: @test3
+; CHECK: add i32
+; CHECK: add i32
+; CHECK-NOT: add i32
+; CHECK: ret void
+}
+
+define void @test4() {
+	%A = load i32* @a		; <i32> [#uses=2]
+	%B = load i32* @b		; <i32> [#uses=2]
+	%C = load i32* @c		; <i32> [#uses=2]
+	%t1 = add i32 %A, %B		; <i32> [#uses=1]
+	%t2 = add i32 %t1, %C		; <i32> [#uses=1]
+	%t3 = add i32 %C, %A		; <i32> [#uses=1]
+	%t4 = add i32 %t3, %B		; <i32> [#uses=1]
+	; e = c+(a+b)
+        store i32 %t2, i32* @e
+        ; f = (c+a)+b
+	store i32 %t4, i32* @f
+	ret void
+; CHECK: @test4
+; CHECK: add i32
+; CHECK: add i32
+; CHECK-NOT: add i32
+; CHECK: ret void
+}
+
+define void @test5() {
+	%A = load i32* @a		; <i32> [#uses=2]
+	%B = load i32* @b		; <i32> [#uses=2]
+	%C = load i32* @c		; <i32> [#uses=2]
+	%t1 = add i32 %B, %A		; <i32> [#uses=1]
+	%t2 = add i32 %t1, %C		; <i32> [#uses=1]
+	%t3 = add i32 %C, %A		; <i32> [#uses=1]
+	%t4 = add i32 %t3, %B		; <i32> [#uses=1]
+	; e = c+(b+a)
+        store i32 %t2, i32* @e
+        ; f = (c+a)+b
+	store i32 %t4, i32* @f
+	ret void
+; CHECK: @test5
+; CHECK: add i32
+; CHECK: add i32
+; CHECK-NOT: add i32
+; CHECK: ret void
+}
+
+define i32 @test6() {
+	%tmp.0 = load i32* @a
+	%tmp.1 = load i32* @b
+        ; (a+b)
+	%tmp.2 = add i32 %tmp.0, %tmp.1
+	%tmp.4 = load i32* @c
+	; (a+b)+c
+        %tmp.5 = add i32 %tmp.2, %tmp.4
+	; (a+c)
+        %tmp.8 = add i32 %tmp.0, %tmp.4
+	; (a+c)+b
+        %tmp.11 = add i32 %tmp.8, %tmp.1
+	; X ^ X = 0
+        %RV = xor i32 %tmp.5, %tmp.11
+	ret i32 %RV
+; CHECK: @test6
+; CHECK: ret i32 0
+}
+
+; This should be one add and two multiplies.
+define i32 @test7(i32 %A, i32 %B, i32 %C) {
+ ; A*A*B + A*C*A
+	%aa = mul i32 %A, %A
+	%aab = mul i32 %aa, %B
+	%ac = mul i32 %A, %C
+	%aac = mul i32 %ac, %A
+	%r = add i32 %aab, %aac
+	ret i32 %r
+; CHECK: @test7
+; CHECK-NEXT: add i32 %C, %B
+; CHECK-NEXT: mul i32 
+; CHECK-NEXT: mul i32 
+; CHECK-NEXT: ret i32 
+}
+
+
+define i32 @test8(i32 %X, i32 %Y, i32 %Z) {
+	%A = sub i32 0, %X
+	%B = mul i32 %A, %Y
+        ; (-X)*Y + Z -> Z-X*Y
+	%C = add i32 %B, %Z
+	ret i32 %C
+; CHECK: @test8
+; CHECK-NEXT: %A = mul i32 %Y, %X
+; CHECK-NEXT: %C = sub i32 %Z, %A
+; CHECK-NEXT: ret i32 %C
+}
+
+
+; PR5458
+define i32 @test9(i32 %X) {
+  %Y = mul i32 %X, 47
+  %Z = add i32 %Y, %Y
+  ret i32 %Z
+; CHECK: @test9
+; CHECK-NEXT: mul i32 %X, 94
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test10(i32 %X) {
+  %Y = add i32 %X ,%X
+  %Z = add i32 %Y, %X
+  ret i32 %Z
+; CHECK: @test10
+; CHECK-NEXT: mul i32 %X, 3
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test11(i32 %W) {
+  %X = mul i32 %W, 127
+  %Y = add i32 %X ,%X
+  %Z = add i32 %Y, %X
+  ret i32 %Z
+; CHECK: @test11
+; CHECK-NEXT: mul i32 %W, 381
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test12(i32 %X) {
+  %A = sub i32 1, %X
+  %B = sub i32 2, %X
+  %C = sub i32 3, %X
+
+  %Y = add i32 %A ,%B
+  %Z = add i32 %Y, %C
+  ret i32 %Z
+; CHECK: @test12
+; CHECK-NEXT: mul i32 %X, -3
+; CHECK-NEXT: add i32{{.*}}, 6
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test13(i32 %X1, i32 %X2, i32 %X3) {
+  %A = sub i32 0, %X1
+  %B = mul i32 %A, %X2   ; -X1*X2
+  %C = mul i32 %X1, %X3  ; X1*X3
+  %D = add i32 %B, %C    ; -X1*X2 + X1*X3 -> X1*(X3-X2)
+  ret i32 %D
+; CHECK: @test13
+; CHECK-NEXT: sub i32 %X3, %X2
+; CHECK-NEXT: mul i32 {{.*}}, %X1
+; CHECK-NEXT: ret i32
+}
+
+; PR5359
+define i32 @test14(i32 %X1, i32 %X2) {
+  %B = mul i32 %X1, 47   ; X1*47
+  %C = mul i32 %X2, -47  ; X2*-47
+  %D = add i32 %B, %C    ; X1*47 + X2*-47 -> 47*(X1-X2)
+  ret i32 %D
+; CHECK: @test14
+; CHECK-NEXT: sub i32 %X1, %X2
+; CHECK-NEXT: mul i32 {{.*}}, 47
+; CHECK-NEXT: ret i32
+}
+
+; Do not reassociate expressions of type i1
+define i32 @test15(i32 %X1, i32 %X2, i32 %X3) {
+  %A = icmp ne i32 %X1, 0
+  %B = icmp slt i32 %X2, %X3
+  %C = and i1 %A, %B
+  %D = select i1 %C, i32 %X1, i32 0
+  ret i32 %D
+; CHECK: @test15
+; CHECK: and i1 %A, %B
+}
+
diff --git a/final/test/Transforms/Reassociate/crash.ll b/final/test/Transforms/Reassociate/crash.ll
new file mode 100644
index 00000000000..6f21b66ed00
--- /dev/null
+++ b/final/test/Transforms/Reassociate/crash.ll
@@ -0,0 +1,44 @@
+; RUN: opt -reassociate -disable-output %s
+
+
+; rdar://7507855
+define fastcc i32 @test1() nounwind {
+entry:
+  %cond = select i1 undef, i32 1, i32 -1          ; <i32> [#uses=2]
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %sub889 = sub i32 undef, undef                  ; <i32> [#uses=1]
+  %sub891 = sub i32 %sub889, %cond                ; <i32> [#uses=0]
+  %add896 = sub i32 0, %cond                      ; <i32> [#uses=0]
+  ret i32 undef
+}
+
+; PR5981
+define i32 @test2() nounwind ssp {
+entry:
+  %0 = load i32* undef, align 4
+  %1 = mul nsw i32 undef, %0
+  %2 = mul nsw i32 undef, %0
+  %3 = add nsw i32 undef, %1
+  %4 = add nsw i32 %3, %2
+  %5 = add nsw i32 %4, 4
+  %6 = shl i32 %0, 3
+  %7 = add nsw i32 %5, %6
+  br label %bb4.i9
+
+bb4.i9:
+  %8 = add nsw i32 undef, %1
+  ret i32 0
+}
+
+
+define i32 @test3(i32 %Arg, i32 %x1, i32 %x2, i32 %x3) {
+ %A = mul i32 %x1, %Arg
+ %B = mul i32 %Arg, %x2 ;; Part of add operation being factored, also used by C
+ %C = mul i32 %x3, %B
+
+ %D = add i32 %A, %B
+ %E = add i32 %D, %C
+  ret i32 %E
+}
diff --git a/final/test/Transforms/Reassociate/dg.exp b/final/test/Transforms/Reassociate/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/Reassociate/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/Reassociate/inverses.ll b/final/test/Transforms/Reassociate/inverses.ll
new file mode 100644
index 00000000000..34abdc7aae0
--- /dev/null
+++ b/final/test/Transforms/Reassociate/inverses.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -reassociate -die -S | FileCheck %s
+
+define i32 @test1(i32 %a, i32 %b) {
+	%tmp.2 = and i32 %b, %a
+	%tmp.4 = xor i32 %a, -1
+        ; (A&B)&~A == 0
+	%tmp.5 = and i32 %tmp.2, %tmp.4
+	ret i32 %tmp.5
+; CHECK: @test1
+; CHECK: ret i32 0
+}
+
+define i32 @test2(i32 %a, i32 %b) {
+	%tmp.1 = and i32 %a, 1234
+	%tmp.2 = and i32 %b, %tmp.1
+	%tmp.4 = xor i32 %a, -1
+	; A&~A == 0
+        %tmp.5 = and i32 %tmp.2, %tmp.4
+	ret i32 %tmp.5
+; CHECK: @test2
+; CHECK: ret i32 0
+}
+
+define i32 @test3(i32 %b, i32 %a) {
+	%tmp.1 = add i32 %a, 1234
+	%tmp.2 = add i32 %b, %tmp.1
+	%tmp.4 = sub i32 0, %a
+        ; (b+(a+1234))+-a -> b+1234
+  	%tmp.5 = add i32 %tmp.2, %tmp.4
+	ret i32 %tmp.5
+; CHECK: @test3
+; CHECK: %tmp.5 = add i32 %b, 1234
+; CHECK: ret i32 %tmp.5
+}
diff --git a/final/test/Transforms/Reassociate/looptest.ll b/final/test/Transforms/Reassociate/looptest.ll
new file mode 100644
index 00000000000..91723bc37b0
--- /dev/null
+++ b/final/test/Transforms/Reassociate/looptest.ll
@@ -0,0 +1,50 @@
+; This testcase comes from this C fragment:
+;
+; void test(unsigned Num, int *Array) {
+;  unsigned i, j, k;
+;
+;  for (i = 0; i != Num; ++i)
+;    for (j = 0; j != Num; ++j)
+;      for (k = 0; k != Num; ++k)
+;        printf("%d\n", i+k+j);    /* Reassociate to (i+j)+k */
+;}
+;
+; In this case, we want to reassociate the specified expr so that i+j can be
+; hoisted out of the inner most loop.
+;
+; RUN: opt < %s -reassociate -S | grep 115 | not grep 117
+; END.
+@.LC0 = internal global [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+define void @test(i32 %Num, i32* %Array) {
+bb0:
+	%cond221 = icmp eq i32 0, %Num		; <i1> [#uses=3]
+	br i1 %cond221, label %bb7, label %bb2
+bb2:		; preds = %bb6, %bb0
+	%reg115 = phi i32 [ %reg120, %bb6 ], [ 0, %bb0 ]		; <i32> [#uses=2]
+	br i1 %cond221, label %bb6, label %bb3
+bb3:		; preds = %bb5, %bb2
+	%reg116 = phi i32 [ %reg119, %bb5 ], [ 0, %bb2 ]		; <i32> [#uses=2]
+	br i1 %cond221, label %bb5, label %bb4
+bb4:		; preds = %bb4, %bb3
+	%reg117 = phi i32 [ %reg118, %bb4 ], [ 0, %bb3 ]		; <i32> [#uses=2]
+	%reg113 = add i32 %reg115, %reg117		; <i32> [#uses=1]
+	%reg114 = add i32 %reg113, %reg116		; <i32> [#uses=1]
+	%cast227 = getelementptr [4 x i8]* @.LC0, i64 0, i64 0		; <i8*> [#uses=1]
+	call i32 (i8*, ...)* @printf( i8* %cast227, i32 %reg114 )		; <i32>:0 [#uses=0]
+	%reg118 = add i32 %reg117, 1		; <i32> [#uses=2]
+	%cond224 = icmp ne i32 %reg118, %Num		; <i1> [#uses=1]
+	br i1 %cond224, label %bb4, label %bb5
+bb5:		; preds = %bb4, %bb3
+	%reg119 = add i32 %reg116, 1		; <i32> [#uses=2]
+	%cond225 = icmp ne i32 %reg119, %Num		; <i1> [#uses=1]
+	br i1 %cond225, label %bb3, label %bb6
+bb6:		; preds = %bb5, %bb2
+	%reg120 = add i32 %reg115, 1		; <i32> [#uses=2]
+	%cond226 = icmp ne i32 %reg120, %Num		; <i1> [#uses=1]
+	br i1 %cond226, label %bb2, label %bb7
+bb7:		; preds = %bb6, %bb0
+	ret void
+}
diff --git a/final/test/Transforms/Reassociate/mulfactor.ll b/final/test/Transforms/Reassociate/mulfactor.ll
new file mode 100644
index 00000000000..f279727c993
--- /dev/null
+++ b/final/test/Transforms/Reassociate/mulfactor.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -reassociate -instcombine -S | grep mul | count 2
+
+; This should have exactly 2 multiplies when we're done.
+
+define i32 @f(i32 %a, i32 %b) {
+	%tmp.2 = mul i32 %a, %a		; <i32> [#uses=1]
+	%tmp.5 = shl i32 %a, 1		; <i32> [#uses=1]
+	%tmp.6 = mul i32 %tmp.5, %b		; <i32> [#uses=1]
+	%tmp.10 = mul i32 %b, %b		; <i32> [#uses=1]
+	%tmp.7 = add i32 %tmp.6, %tmp.2		; <i32> [#uses=1]
+	%tmp.11 = add i32 %tmp.7, %tmp.10		; <i32> [#uses=1]
+	ret i32 %tmp.11
+}
+
diff --git a/final/test/Transforms/Reassociate/mulfactor2.ll b/final/test/Transforms/Reassociate/mulfactor2.ll
new file mode 100644
index 00000000000..8116554196f
--- /dev/null
+++ b/final/test/Transforms/Reassociate/mulfactor2.ll
@@ -0,0 +1,15 @@
+; This should turn into one multiply and one add.
+
+; RUN: opt < %s -instcombine -reassociate -instcombine -S > %t
+; RUN: grep mul %t | count 1
+; RUN: grep add %t | count 1
+
+define i32 @main(i32 %t) {
+	%tmp.3 = mul i32 %t, 12		; <i32> [#uses=1]
+	%tmp.4 = add i32 %tmp.3, 5		; <i32> [#uses=1]
+	%tmp.6 = mul i32 %t, 6		; <i32> [#uses=1]
+	%tmp.8 = mul i32 %tmp.4, 3		; <i32> [#uses=1]
+	%tmp.9 = add i32 %tmp.8, %tmp.6		; <i32> [#uses=1]
+	ret i32 %tmp.9
+}
+
diff --git a/final/test/Transforms/Reassociate/negation.ll b/final/test/Transforms/Reassociate/negation.ll
new file mode 100644
index 00000000000..6a3dfd3b820
--- /dev/null
+++ b/final/test/Transforms/Reassociate/negation.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -reassociate -instcombine -S | not grep sub
+
+; Test that we can turn things like X*-(Y*Z) -> X*-1*Y*Z.
+
+define i32 @test1(i32 %a, i32 %b, i32 %z) {
+	%c = sub i32 0, %z		; <i32> [#uses=1]
+	%d = mul i32 %a, %b		; <i32> [#uses=1]
+	%e = mul i32 %c, %d		; <i32> [#uses=1]
+	%f = mul i32 %e, 12345		; <i32> [#uses=1]
+	%g = sub i32 0, %f		; <i32> [#uses=1]
+	ret i32 %g
+}
+
+define i32 @test2(i32 %a, i32 %b, i32 %z) {
+	%d = mul i32 %z, 40		; <i32> [#uses=1]
+	%c = sub i32 0, %d		; <i32> [#uses=1]
+	%e = mul i32 %a, %c		; <i32> [#uses=1]
+	%f = sub i32 0, %e		; <i32> [#uses=1]
+	ret i32 %f
+}
+
diff --git a/final/test/Transforms/Reassociate/optional-flags.ll b/final/test/Transforms/Reassociate/optional-flags.ll
new file mode 100644
index 00000000000..40f7d5bf5b8
--- /dev/null
+++ b/final/test/Transforms/Reassociate/optional-flags.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -reassociate < %s | FileCheck %s
+; rdar://8944681
+
+; Reassociate should clear optional flags like nsw when reassociating.
+
+; CHECK: @test0
+; CHECK: %y = add i64 %b, %a
+; CHECK: %z = add i64 %y, %c
+define i64 @test0(i64 %a, i64 %b, i64 %c) {
+  %y = add nsw i64 %c, %b
+  %z = add i64 %y, %a
+  ret i64 %z
+}
+
+; CHECK: @test1
+; CHECK: %y = add i64 %b, %a
+; CHECK: %z = add i64 %y, %c
+define i64 @test1(i64 %a, i64 %b, i64 %c) {
+  %y = add i64 %c, %b
+  %z = add nsw i64 %y, %a
+  ret i64 %z
+}
+
+; PR9215
+; CHECK: %s = add nsw i32 %y, %x
+define i32 @test2(i32 %x, i32 %y) {
+  %s = add nsw i32 %x, %y
+  ret i32 %s
+}
diff --git a/final/test/Transforms/Reassociate/otherops.ll b/final/test/Transforms/Reassociate/otherops.ll
new file mode 100644
index 00000000000..d68d00818cb
--- /dev/null
+++ b/final/test/Transforms/Reassociate/otherops.ll
@@ -0,0 +1,28 @@
+; Reassociation should apply to Add, Mul, And, Or, & Xor
+;
+; RUN: opt < %s -reassociate -constprop -instcombine -die -S | not grep 12
+
+define i32 @test_mul(i32 %arg) {
+	%tmp1 = mul i32 12, %arg		; <i32> [#uses=1]
+	%tmp2 = mul i32 %tmp1, 12		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @test_and(i32 %arg) {
+	%tmp1 = and i32 14, %arg		; <i32> [#uses=1]
+	%tmp2 = and i32 %tmp1, 14		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @test_or(i32 %arg) {
+	%tmp1 = or i32 14, %arg		; <i32> [#uses=1]
+	%tmp2 = or i32 %tmp1, 14		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
+define i32 @test_xor(i32 %arg) {
+	%tmp1 = xor i32 12, %arg		; <i32> [#uses=1]
+	%tmp2 = xor i32 %tmp1, 12		; <i32> [#uses=1]
+	ret i32 %tmp2
+}
+
diff --git a/final/test/Transforms/Reassociate/shift-factor.ll b/final/test/Transforms/Reassociate/shift-factor.ll
new file mode 100644
index 00000000000..73af5e5304e
--- /dev/null
+++ b/final/test/Transforms/Reassociate/shift-factor.ll
@@ -0,0 +1,12 @@
+; There should be exactly one shift and one add left.
+; RUN: opt < %s -reassociate -instcombine -S > %t
+; RUN: grep shl %t | count 1
+; RUN: grep add %t | count 1
+
+define i32 @test(i32 %X, i32 %Y) {
+	%tmp.2 = shl i32 %X, 1		; <i32> [#uses=1]
+	%tmp.6 = shl i32 %Y, 1		; <i32> [#uses=1]
+	%tmp.4 = add i32 %tmp.6, %tmp.2		; <i32> [#uses=1]
+	ret i32 %tmp.4
+}
+
diff --git a/final/test/Transforms/Reassociate/shifttest.ll b/final/test/Transforms/Reassociate/shifttest.ll
new file mode 100644
index 00000000000..8b2cbc98c47
--- /dev/null
+++ b/final/test/Transforms/Reassociate/shifttest.ll
@@ -0,0 +1,12 @@
+; With shl->mul reassociation, we can see that this is (shl A, 9) * A
+;
+; RUN: opt < %s -reassociate -instcombine -S |\
+; RUN:    grep {shl .*, 9}
+
+define i32 @test(i32 %A, i32 %B) {
+	%X = shl i32 %A, 5		; <i32> [#uses=1]
+	%Y = shl i32 %A, 4		; <i32> [#uses=1]
+	%Z = mul i32 %Y, %X		; <i32> [#uses=1]
+	ret i32 %Z
+}
+
diff --git a/final/test/Transforms/Reassociate/subtest.ll b/final/test/Transforms/Reassociate/subtest.ll
new file mode 100644
index 00000000000..4c63d1238a6
--- /dev/null
+++ b/final/test/Transforms/Reassociate/subtest.ll
@@ -0,0 +1,11 @@
+; With sub reassociation, constant folding can eliminate the 12 and -12 constants.
+;
+; RUN: opt < %s -reassociate -instcombine -S | not grep 12
+
+define i32 @test(i32 %A, i32 %B) {
+	%X = add i32 -12, %A		; <i32> [#uses=1]
+	%Y = sub i32 %X, %B		; <i32> [#uses=1]
+	%Z = add i32 %Y, 12		; <i32> [#uses=1]
+	ret i32 %Z
+}
+
diff --git a/final/test/Transforms/Reassociate/subtest2.ll b/final/test/Transforms/Reassociate/subtest2.ll
new file mode 100644
index 00000000000..0513c5fc1b6
--- /dev/null
+++ b/final/test/Transforms/Reassociate/subtest2.ll
@@ -0,0 +1,13 @@
+; With sub reassociation, constant folding can eliminate the uses of %a.
+;
+; RUN: opt < %s -reassociate -instcombine -S | grep %a | count 1
+; PR2047
+
+define i32 @test(i32 %a, i32 %b, i32 %c) nounwind  {
+entry:
+	%tmp3 = sub i32 %a, %b		; <i32> [#uses=1]
+	%tmp5 = sub i32 %tmp3, %c		; <i32> [#uses=1]
+	%tmp7 = sub i32 %tmp5, %a		; <i32> [#uses=1]
+	ret i32 %tmp7
+}
+
diff --git a/final/test/Transforms/SCCP/2002-05-02-EdgeFailure.ll b/final/test/Transforms/SCCP/2002-05-02-EdgeFailure.ll
new file mode 100644
index 00000000000..bb0cf04f67b
--- /dev/null
+++ b/final/test/Transforms/SCCP/2002-05-02-EdgeFailure.ll
@@ -0,0 +1,26 @@
+; edgefailure - This function illustrates how SCCP is not doing it's job.  This
+; function should be optimized almost completely away: the loop should be
+; analyzed to detect that the body executes exactly once, and thus the branch
+; can be eliminated and code becomes trivially dead.  This is distilled from a
+; real benchmark (mst from Olden benchmark, MakeGraph function).  When SCCP is
+; fixed, this should be eliminated by a single SCCP application.
+;
+; RUN: opt < %s -sccp -S | not grep loop
+
+define i32* @test() {
+bb1:
+	%A = malloc i32		; <i32*> [#uses=2]
+	br label %bb2
+bb2:		; preds = %bb2, %bb1
+        ;; Always 0
+	%i = phi i32 [ %i2, %bb2 ], [ 0, %bb1 ]		; <i32> [#uses=2]
+        ;; Always 1
+	%i2 = add i32 %i, 1		; <i32> [#uses=2]
+	store i32 %i, i32* %A
+        ;; Always false
+  	%loop = icmp sle i32 %i2, 0		; <i1> [#uses=1]
+	br i1 %loop, label %bb2, label %bb3
+bb3:		; preds = %bb2
+	ret i32* %A
+}
+
diff --git a/final/test/Transforms/SCCP/2002-05-02-MissSecondInst.ll b/final/test/Transforms/SCCP/2002-05-02-MissSecondInst.ll
new file mode 100644
index 00000000000..bb5b51d1bbb
--- /dev/null
+++ b/final/test/Transforms/SCCP/2002-05-02-MissSecondInst.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -sccp -S | not grep sub
+
+define void @test3(i32, i32) {
+	add i32 0, 0		; <i32>:3 [#uses=0]
+	sub i32 0, 4		; <i32>:4 [#uses=0]
+	ret void
+}
+
diff --git a/final/test/Transforms/SCCP/2002-05-20-MissedIncomingValue.ll b/final/test/Transforms/SCCP/2002-05-20-MissedIncomingValue.ll
new file mode 100644
index 00000000000..f6198025b80
--- /dev/null
+++ b/final/test/Transforms/SCCP/2002-05-20-MissedIncomingValue.ll
@@ -0,0 +1,19 @@
+; This test shows a case where SCCP is incorrectly eliminating the PHI node
+; because it thinks it has a constant 0 value, when it really doesn't.
+
+; RUN: opt < %s -sccp -S | grep phi
+
+define i32 @test(i32 %A, i1 %c) {
+bb1:
+	br label %BB2
+BB2:		; preds = %BB4, %bb1
+	%V = phi i32 [ 0, %bb1 ], [ %A, %BB4 ]		; <i32> [#uses=1]
+	br label %BB3
+BB3:		; preds = %BB2
+	br i1 %c, label %BB4, label %BB5
+BB4:		; preds = %BB3
+	br label %BB2
+BB5:		; preds = %BB3
+	ret i32 %V
+}
+
diff --git a/final/test/Transforms/SCCP/2002-05-21-InvalidSimplify.ll b/final/test/Transforms/SCCP/2002-05-21-InvalidSimplify.ll
new file mode 100644
index 00000000000..f02a29379b8
--- /dev/null
+++ b/final/test/Transforms/SCCP/2002-05-21-InvalidSimplify.ll
@@ -0,0 +1,33 @@
+; This test shows SCCP "proving" that the loop (from bb6 to 14) loops infinitely
+; this is in fact NOT the case, so the return should still be alive in the code
+; after sccp and CFG simplification have been performed.
+;
+; RUN: opt < %s -sccp -simplifycfg -S | \
+; RUN:   grep ret
+
+define void @old_main() {
+bb3:
+	br label %bb6
+bb6:		; preds = %bb14, %bb3
+	%reg403 = phi i32 [ %reg155, %bb14 ], [ 0, %bb3 ]		; <i32> [#uses=1]
+	%reg155 = add i32 %reg403, 1		; <i32> [#uses=2]
+	br label %bb11
+bb11:		; preds = %bb11, %bb6
+	%reg407 = phi i32 [ %reg408, %bb11 ], [ 0, %bb6 ]		; <i32> [#uses=2]
+	%reg408 = add i32 %reg407, 1		; <i32> [#uses=1]
+	%cond550 = icmp sle i32 %reg407, 1		; <i1> [#uses=1]
+	br i1 %cond550, label %bb11, label %bb12
+bb12:		; preds = %bb11
+	br label %bb13
+bb13:		; preds = %bb13, %bb12
+	%reg409 = phi i32 [ %reg410, %bb13 ], [ 0, %bb12 ]		; <i32> [#uses=1]
+	%reg410 = add i32 %reg409, 1		; <i32> [#uses=2]
+	%cond552 = icmp sle i32 %reg410, 2		; <i1> [#uses=1]
+	br i1 %cond552, label %bb13, label %bb14
+bb14:		; preds = %bb13
+	%cond553 = icmp sle i32 %reg155, 31		; <i1> [#uses=1]
+	br i1 %cond553, label %bb6, label %bb15
+bb15:		; preds = %bb14
+	ret void
+}
+
diff --git a/final/test/Transforms/SCCP/2002-08-30-GetElementPtrTest.ll b/final/test/Transforms/SCCP/2002-08-30-GetElementPtrTest.ll
new file mode 100644
index 00000000000..6aaf33ec206
--- /dev/null
+++ b/final/test/Transforms/SCCP/2002-08-30-GetElementPtrTest.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -sccp -S | not grep %X
+
+@G = external global [40 x i32]		; <[40 x i32]*> [#uses=1]
+
+define i32* @test() {
+	%X = getelementptr [40 x i32]* @G, i64 0, i64 0		; <i32*> [#uses=1]
+	ret i32* %X
+}
+
diff --git a/final/test/Transforms/SCCP/2003-06-24-OverdefinedPHIValue.ll b/final/test/Transforms/SCCP/2003-06-24-OverdefinedPHIValue.ll
new file mode 100644
index 00000000000..576f5d6504d
--- /dev/null
+++ b/final/test/Transforms/SCCP/2003-06-24-OverdefinedPHIValue.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -sccp -simplifycfg -S | \
+; RUN:   not grep then:
+
+define void @cprop_test11(i32* %data.1) {
+entry:
+	%tmp.1 = load i32* %data.1		; <i32> [#uses=3]
+	%tmp.41 = icmp sgt i32 %tmp.1, 1		; <i1> [#uses=1]
+	br i1 %tmp.41, label %no_exit, label %loopexit
+no_exit:		; preds = %endif, %then, %entry
+	%j.0 = phi i32 [ %j.0, %endif ], [ %i.0, %then ], [ 1, %entry ]		; <i32> [#uses=3]
+	%i.0 = phi i32 [ %inc, %endif ], [ %inc1, %then ], [ 1, %entry ]		; <i32> [#uses=4]
+	%tmp.8.not = icmp ne i32 %j.0, 0		; <i1> [#uses=1]
+	br i1 %tmp.8.not, label %endif, label %then
+then:		; preds = %no_exit
+	%inc1 = add i32 %i.0, 1		; <i32> [#uses=3]
+	%tmp.42 = icmp slt i32 %inc1, %tmp.1		; <i1> [#uses=1]
+	br i1 %tmp.42, label %no_exit, label %loopexit
+endif:		; preds = %no_exit
+	%inc = add i32 %i.0, 1		; <i32> [#uses=3]
+	%tmp.4 = icmp slt i32 %inc, %tmp.1		; <i1> [#uses=1]
+	br i1 %tmp.4, label %no_exit, label %loopexit
+loopexit:		; preds = %endif, %then, %entry
+	%j.1 = phi i32 [ 1, %entry ], [ %j.0, %endif ], [ %i.0, %then ]		; <i32> [#uses=1]
+	%i.1 = phi i32 [ 1, %entry ], [ %inc, %endif ], [ %inc1, %then ]		; <i32> [#uses=1]
+	%tmp.17 = getelementptr i32* %data.1, i64 1		; <i32*> [#uses=1]
+	store i32 %j.1, i32* %tmp.17
+	%tmp.23 = getelementptr i32* %data.1, i64 2		; <i32*> [#uses=1]
+	store i32 %i.1, i32* %tmp.23
+	ret void
+}
diff --git a/final/test/Transforms/SCCP/2003-08-26-InvokeHandling.ll b/final/test/Transforms/SCCP/2003-08-26-InvokeHandling.ll
new file mode 100644
index 00000000000..9876375ae88
--- /dev/null
+++ b/final/test/Transforms/SCCP/2003-08-26-InvokeHandling.ll
@@ -0,0 +1,18 @@
+; The PHI cannot be eliminated from this testcase, SCCP is mishandling invoke's!
+; RUN: opt < %s -sccp -S | grep phi
+
+declare void @foo()
+
+define i32 @test(i1 %cond) {
+Entry:
+	br i1 %cond, label %Inv, label %Cont
+Inv:		; preds = %Entry
+	invoke void @foo( )
+			to label %Ok unwind label %Cont
+Ok:		; preds = %Inv
+	br label %Cont
+Cont:		; preds = %Ok, %Inv, %Entry
+	%X = phi i32 [ 0, %Entry ], [ 1, %Ok ], [ 0, %Inv ]		; <i32> [#uses=1]
+	ret i32 %X
+}
+
diff --git a/final/test/Transforms/SCCP/2004-11-16-DeadInvoke.ll b/final/test/Transforms/SCCP/2004-11-16-DeadInvoke.ll
new file mode 100644
index 00000000000..5d2c78ef874
--- /dev/null
+++ b/final/test/Transforms/SCCP/2004-11-16-DeadInvoke.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -sccp -disable-output
+
+declare i32 @foo()
+
+define void @caller() {
+	br i1 true, label %T, label %F
+F:		; preds = %0
+	%X = invoke i32 @foo( )
+			to label %T unwind label %T		; <i32> [#uses=0]
+T:		; preds = %F, %F, %0
+	ret void
+}
+
diff --git a/final/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll b/final/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll
new file mode 100644
index 00000000000..4adfde3bfe9
--- /dev/null
+++ b/final/test/Transforms/SCCP/2004-12-10-UndefBranchBug.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -sccp -S | grep {ret i32 1}
+
+; This function definitely returns 1, even if we don't know the direction
+; of the branch.
+
+define i32 @foo() {
+	br i1 undef, label %T, label %T
+T:		; preds = %0, %0
+	%X = add i32 0, 1		; <i32> [#uses=1]
+	ret i32 %X
+}
+
diff --git a/final/test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll b/final/test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll
new file mode 100644
index 00000000000..47f9cb45cc6
--- /dev/null
+++ b/final/test/Transforms/SCCP/2006-10-23-IPSCCP-Crash.ll
@@ -0,0 +1,103 @@
+; RUN: opt < %s -sccp -disable-output
+; END.
+target datalayout = "E-p:32:32"
+target triple = "powerpc-apple-darwin8.7.0"
+	%struct.pat_list = type { i32, %struct.pat_list* }
+@JUMP = external global i32		; <i32*> [#uses=1]
+@old_D_pat = external global [16 x i8]		; <[16 x i8]*> [#uses=0]
+
+define void @asearch1(i32 %D) {
+entry:
+	%tmp80 = icmp ult i32 0, %D		; <i1> [#uses=1]
+	br i1 %tmp80, label %bb647.preheader, label %cond_true81.preheader
+cond_true81.preheader:		; preds = %entry
+	ret void
+bb647.preheader:		; preds = %entry
+	%tmp3.i = call i32 @read( )		; <i32> [#uses=1]
+	%tmp6.i = add i32 %tmp3.i, 0		; <i32> [#uses=1]
+	%tmp653 = icmp sgt i32 %tmp6.i, 0		; <i1> [#uses=1]
+	br i1 %tmp653, label %cond_true654, label %UnifiedReturnBlock
+cond_true612:		; preds = %cond_true654
+	ret void
+cond_next624:		; preds = %cond_true654
+	ret void
+cond_true654:		; preds = %bb647.preheader
+	br i1 undef, label %cond_true612, label %cond_next624
+UnifiedReturnBlock:		; preds = %bb647.preheader
+	ret void
+}
+
+define void @bitap(i32 %D) {
+entry:
+	%tmp29 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp29, label %cond_next50, label %cond_next37
+cond_next37:		; preds = %entry
+	ret void
+cond_next50:		; preds = %entry
+	%tmp52 = icmp sgt i32 %D, 0		; <i1> [#uses=1]
+	br i1 %tmp52, label %cond_true53, label %cond_next71
+cond_true53:		; preds = %cond_next50
+	%tmp54 = load i32* @JUMP		; <i32> [#uses=1]
+	%tmp55 = icmp eq i32 %tmp54, 1		; <i1> [#uses=1]
+	br i1 %tmp55, label %cond_true56, label %cond_next63
+cond_true56:		; preds = %cond_true53
+	%tmp57 = bitcast i32 %D to i32		; <i32> [#uses=1]
+	call void @asearch1( i32 %tmp57 )
+	ret void
+cond_next63:		; preds = %cond_true53
+	ret void
+cond_next71:		; preds = %cond_next50
+	ret void
+}
+
+declare i32 @read()
+
+define void @initial_value() {
+entry:
+	ret void
+}
+
+define void @main() {
+entry:
+	br label %cond_next252
+cond_next208:		; preds = %cond_true260
+	%tmp229 = call i32 @atoi( )		; <i32> [#uses=1]
+	br label %cond_next252
+bb217:		; preds = %cond_true260
+	ret void
+cond_next252:		; preds = %cond_next208, %entry
+	%D.0.0 = phi i32 [ 0, %entry ], [ %tmp229, %cond_next208 ]		; <i32> [#uses=1]
+	%tmp254 = getelementptr i8** null, i32 1		; <i8**> [#uses=1]
+	%tmp256 = load i8** %tmp254		; <i8*> [#uses=1]
+	%tmp258 = load i8* %tmp256		; <i8> [#uses=1]
+	%tmp259 = icmp eq i8 %tmp258, 45		; <i1> [#uses=1]
+	br i1 %tmp259, label %cond_true260, label %bb263
+cond_true260:		; preds = %cond_next252
+	%tmp205818 = icmp sgt i8 0, -1		; <i1> [#uses=1]
+	br i1 %tmp205818, label %cond_next208, label %bb217
+bb263:		; preds = %cond_next252
+	%tmp265 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp265, label %cond_next276, label %cond_true266
+cond_true266:		; preds = %bb263
+	ret void
+cond_next276:		; preds = %bb263
+	%tmp278 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp278, label %cond_next298, label %cond_true279
+cond_true279:		; preds = %cond_next276
+	ret void
+cond_next298:		; preds = %cond_next276
+	call void @bitap( i32 %D.0.0 )
+	ret void
+}
+
+declare i32 @atoi()
+
+define void @subset_pset() {
+entry:
+	ret void
+}
+
+define void @strcmp() {
+entry:
+	ret void
+}
diff --git a/final/test/Transforms/SCCP/2006-12-04-PackedType.ll b/final/test/Transforms/SCCP/2006-12-04-PackedType.ll
new file mode 100644
index 00000000000..cee33498721
--- /dev/null
+++ b/final/test/Transforms/SCCP/2006-12-04-PackedType.ll
@@ -0,0 +1,140 @@
+; Test VectorType handling by SCCP.
+; SCCP ignores VectorTypes until PR 1034 is fixed
+;
+; RUN: opt < %s -sccp
+; END.
+
+target datalayout = "E-p:32:32"
+target triple = "powerpc-apple-darwin8"
+	%struct.GLDAlphaTest = type { float, i16, i8, i8 }
+	%struct.GLDArrayRange = type { i8, i8, i8, i8 }
+	%struct.GLDBlendMode = type { i16, i16, i16, i16, %struct.GLTColor4, i16, i16, i8, i8, i8, i8 }
+	%struct.GLDBufferRec = type opaque
+	%struct.GLDBufferstate = type { %struct.GLTDimensions, %struct.GLTDimensions, %struct.GLTFixedColor4, %struct.GLTFixedColor4, i8, i8, i8, i8, [2 x %struct.GLSBuffer], [4 x %struct.GLSBuffer], %struct.GLSBuffer, %struct.GLSBuffer, %struct.GLSBuffer, [4 x %struct.GLSBuffer*], %struct.GLSBuffer*, %struct.GLSBuffer*, %struct.GLSBuffer*, i8, i8 }
+	%struct.GLDClearColor = type { double, %struct.GLTColor4, %struct.GLTColor4, float, i32 }
+	%struct.GLDClipPlane = type { i32, [6 x %struct.GLTColor4] }
+	%struct.GLDColorBuffer = type { i16, i16, [4 x i16] }
+	%struct.GLDColorMatrix = type { [16 x float]*, %struct.GLDImagingColorScale }
+	%struct.GLDContextRec = type { float, float, float, float, float, float, float, float, %struct.GLTColor4, %struct.GLTColor4, %struct.GLVMFPContext, %struct.GLDTextureMachine, %struct.GLGProcessor, %struct._GLVMConstants*, void (%struct.GLDContextRec*, i32, i32, %struct.GLVMFragmentAttribRec*, %struct.GLVMFragmentAttribRec*, i32)*, %struct._GLVMFunction*, void (%struct.GLDContextRec*, %struct.GLDVertex*)*, void (%struct.GLDContextRec*, %struct.GLDVertex*, %struct.GLDVertex*)*, void (%struct.GLDContextRec*, %struct.GLDVertex*, %struct.GLDVertex*, %struct.GLDVertex*)*, %struct._GLVMFunction*, %struct._GLVMFunction*, %struct._GLVMFunction*, i32, i32, i32, float, float, float, i32, %struct.GLSDrawable, %struct.GLDFramebufferAttachment, %struct.GLDFormat, %struct.GLDBufferstate, %struct.GLDSharedRec*, %struct.GLDState*, %struct.GLDPluginState*, %struct.GLTDimensions, %struct.GLTColor4*, %struct.GLTColor4*, %struct.GLVMFragmentAttribRec*, %struct.GLVMFragmentAttribRec*, %struct.GLVMFragmentAttribRec*, %struct.GLDPipelineProgramRec*, %struct.GLDStateProgramRec, %struct.GLVMTextures, { [4 x i8*], i8*, i8* }, [64 x float], %struct.GLDStippleData, i16, i8, i8, i32, %struct.GLDFramebufferRec*, i8, %struct.GLDQueryRec*, %struct.GLDQueryRec* }
+	%struct.GLDConvolution = type { %struct.GLTColor4, %struct.GLDImagingColorScale, i16, i16, float*, i32, i32 }
+	%struct.GLDDepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
+	%struct.GLDFogMode = type { %struct.GLTColor4, float, float, float, float, float, i16, i16, i16, i8, i8 }
+	%struct.GLDFormat = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8, i32, i32, i32 }
+	%struct.GLDFramebufferAttachment = type { i32, i32, i32, i32, i32, i32 }
+	%struct.GLDFramebufferData = type { [6 x %struct.GLDFramebufferAttachment], [4 x i16], i16, i16, i16, i16, i32 }
+	%struct.GLDFramebufferRec = type { %struct.GLDFramebufferData*, %struct.GLDPluginFramebufferData*, %struct.GLDPixelFormat }
+	%struct.GLDHintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
+	%struct.GLDHistogram = type { %struct.GLTFixedColor4*, i32, i16, i8, i8 }
+	%struct.GLDImagingColorScale = type { { float, float }, { float, float }, { float, float }, { float, float } }
+	%struct.GLDImagingSubset = type { %struct.GLDConvolution, %struct.GLDConvolution, %struct.GLDConvolution, %struct.GLDColorMatrix, %struct.GLDMinmax, %struct.GLDHistogram, %struct.GLDImagingColorScale, %struct.GLDImagingColorScale, %struct.GLDImagingColorScale, %struct.GLDImagingColorScale, i32 }
+	%struct.GLDLight = type { %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4, %struct.GLTCoord3, float, float, float, float, float, %struct.GLTCoord3, float, float, float, float, float }
+	%struct.GLDLightModel = type { %struct.GLTColor4, [8 x %struct.GLDLight], [2 x %struct.GLDMaterial], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 }
+	%struct.GLDLightProduct = type { %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4 }
+	%struct.GLDLineMode = type { float, i32, i16, i16, i8, i8, i8, i8 }
+	%struct.GLDLogicOp = type { i16, i8, i8 }
+	%struct.GLDMaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.GLDMaterial = type { %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4, float, float, float, float, [8 x %struct.GLDLightProduct], %struct.GLTColor4, [6 x i32], [2 x i32] }
+	%struct.GLDMinmax = type { %struct.GLDMinmaxTable*, i16, i8, i8 }
+	%struct.GLDMinmaxTable = type { %struct.GLTColor4, %struct.GLTColor4 }
+	%struct.GLDMipmaplevel = type { [4 x i32], [4 x float], [4 x i32], [4 x i32], [4 x float], [4 x i32], [3 x i32], i32, float*, float*, float*, i32, i32, i8*, i16, i16, i16, i16 }
+	%struct.GLDMultisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.GLDPipelineProgramData = type { i16, i16, i32, %struct._PPStreamToken*, i64, %struct.GLDShaderSourceData*, %struct.GLTColor4*, i32 }
+	%struct.GLDPipelineProgramRec = type { %struct.GLDPipelineProgramData*, %struct._PPStreamToken*, %struct._PPStreamToken*, %struct._GLVMFunction*, i32, i32, i32 }
+	%struct.GLDPipelineProgramState = type { i8, i8, i8, i8, %struct.GLTColor4* }
+	%struct.GLDPixelFormat = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.GLDPixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.GLDPixelMode = type { float, float, %struct.GLDPixelStore, %struct.GLDPixelTransfer, %struct.GLDPixelMap, %struct.GLDImagingSubset, i32, i32 }
+	%struct.GLDPixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
+	%struct.GLDPixelStore = type { %struct.GLDPixelPack, %struct.GLDPixelPack }
+	%struct.GLDPixelTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }
+	%struct.GLDPluginFramebufferData = type { [6 x %struct.GLDTextureRec*], i32, i32 }
+	%struct.GLDPluginProgramData = type { [3 x %struct.GLDPipelineProgramRec*], %struct.GLDBufferRec**, i32 }
+	%struct.GLDPluginState = type { [16 x [5 x %struct.GLDTextureRec*]], [3 x %struct.GLDTextureRec*], [16 x %struct.GLDTextureRec*], [3 x %struct.GLDPipelineProgramRec*], %struct.GLDProgramRec*, %struct.GLDVertexArrayRec*, [16 x %struct.GLDBufferRec*], %struct.GLDFramebufferRec*, %struct.GLDFramebufferRec* }
+	%struct.GLDPointMode = type { float, float, float, float, %struct.GLTCoord3, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 }
+	%struct.GLDPolygonMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }
+	%struct.GLDProgramData = type { i32, [16 x i32], i32, i32, i32, i32 }
+	%struct.GLDProgramRec = type { %struct.GLDProgramData*, %struct.GLDPluginProgramData*, i32 }
+	%struct.GLDQueryRec = type { i32, i32, %struct.GLDQueryRec* }
+	%struct.GLDRect = type { i32, i32, i32, i32, i32, i32 }
+	%struct.GLDRegisterCombiners = type { i8, i8, i8, i8, i32, [2 x %struct.GLTColor4], [8 x %struct.GLDRegisterCombinersPerStageState], %struct.GLDRegisterCombinersFinalStageState }
+	%struct.GLDRegisterCombinersFinalStageState = type { i8, i8, i8, i8, [7 x %struct.GLDRegisterCombinersPerVariableState] }
+	%struct.GLDRegisterCombinersPerPortionState = type { [4 x %struct.GLDRegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
+	%struct.GLDRegisterCombinersPerStageState = type { [2 x %struct.GLDRegisterCombinersPerPortionState], [2 x %struct.GLTColor4] }
+	%struct.GLDRegisterCombinersPerVariableState = type { i16, i16, i16, i16 }
+	%struct.GLDScissorTest = type { %struct.GLTFixedColor4, i8, i8, i8, i8 }
+	%struct.GLDShaderSourceData = type { i32, i32, i8*, i32*, i32, i32, i8*, i32*, i8* }
+	%struct.GLDSharedRec = type opaque
+	%struct.GLDState = type { i16, i16, i32, i32, i32, [256 x %struct.GLTColor4], [128 x %struct.GLTColor4], %struct.GLDViewport, %struct.GLDTransform, %struct.GLDLightModel, i32*, i32, i32, i32, %struct.GLDAlphaTest, %struct.GLDBlendMode, %struct.GLDClearColor, %struct.GLDColorBuffer, %struct.GLDDepthTest, %struct.GLDArrayRange, %struct.GLDFogMode, %struct.GLDHintMode, %struct.GLDLineMode, %struct.GLDLogicOp, %struct.GLDMaskMode, %struct.GLDPixelMode, %struct.GLDPointMode, %struct.GLDPolygonMode, %struct.GLDScissorTest, i32, %struct.GLDStencilTest, [16 x %struct.GLDTextureMode], %struct.GLDArrayRange, [8 x %struct.GLDTextureCoordGen], %struct.GLDClipPlane, %struct.GLDMultisample, %struct.GLDRegisterCombiners, %struct.GLDArrayRange, %struct.GLDArrayRange, [3 x %struct.GLDPipelineProgramState], %struct.GLDTransformFeedback }
+	%struct.GLDStateProgramRec = type { %struct.GLDPipelineProgramData*, %struct.GLDPipelineProgramRec* }
+	%struct.GLDStencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
+	%struct.GLDStippleData = type { i32, i16, i16, [32 x [32 x i8]] }
+	%struct.GLDTextureCoordGen = type { { i16, i16, %struct.GLTColor4, %struct.GLTColor4 }, { i16, i16, %struct.GLTColor4, %struct.GLTColor4 }, { i16, i16, %struct.GLTColor4, %struct.GLTColor4 }, { i16, i16, %struct.GLTColor4, %struct.GLTColor4 }, i8, i8, i8, i8 }
+	%struct.GLDTextureGeomState = type { i16, i16, i16, i16, i16, i8, i8, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] }
+	%struct.GLDTextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* }
+	%struct.GLDTextureMachine = type { [8 x %struct.GLDTextureRec*], %struct.GLDTextureRec*, i8, i8, i8, i8 }
+	%struct.GLDTextureMode = type { %struct.GLTColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
+	%struct.GLDTextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.GLTColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* }
+	%struct.GLDTextureRec = type { %struct.GLDTextureState*, i32, [2 x float], float, i32, float, float, float, float, float, float, %struct.GLDMipmaplevel*, %struct.GLDMipmaplevel*, i32, i32, i32, i32, i32, i32, %struct.GLDTextureParamState, i32, [2 x %struct._PPStreamToken] }
+	%struct.GLDTextureState = type { i16, i16, i16, float, i32, i16, %struct.GLISWRSurface*, i8, i8, i8, i8, %struct.GLDTextureParamState, %struct.GLDTextureGeomState, %struct.GLDTextureLevel, [6 x [15 x %struct.GLDTextureLevel]] }
+	%struct.GLDTransform = type { [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, i32, float, i16, i16, i8, i8, i8, i8 }
+	%struct.GLDTransformFeedback = type { i8, i8, i8, [16 x i32], [16 x i32] }
+	%struct.GLDVertex = type { %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4, %struct.GLTColor4, %struct.GLTCoord3, float, %struct.GLTColor4, float, float, float, i8, i8, i8, i8, [4 x float], [2 x %struct.GLDMaterial*], i32, i32, [8 x %struct.GLTColor4] }
+	%struct.GLDVertexArrayRec = type opaque
+	%struct.GLDViewport = type { float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
+	%struct.GLGColorTable = type { i32, i32, i32, i8* }
+	%struct.GLGOperation = type { i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, %struct.GLGColorTable, %struct.GLGColorTable, %struct.GLGColorTable }
+	%struct.GLGProcessor = type { void (%struct.GLDPixelMode*, %struct.GLGOperation*, %struct._GLGFunctionKey*)*, %struct._GLVMFunction*, %struct._GLGFunctionKey* }
+	%struct.GLISWRSurface = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 }
+	%struct.GLIWindow = type { i32, i32, i32 }
+	%struct.GLSBuffer = type { i8* }
+	%struct.GLSDrawable = type { %struct.GLSWindowRec* }
+	%struct.GLSWindowRec = type { %struct.GLTDimensions, %struct.GLTDimensions, i32, i32, %struct.GLSDrawable, [2 x i8*], i8*, i8*, i8*, [4 x i8*], i32, i32, i32, i32, [4 x i32], i16, i16, i16, %struct.GLIWindow, i32, i32, i8*, i8* }
+	%struct.GLTColor4 = type { float, float, float, float }
+	%struct.GLTCoord3 = type { float, float, float }
+	%struct.GLTDimensions = type { i32, i32 }
+	%struct.GLTFixedColor4 = type { i32, i32, i32, i32 }
+	%struct.GLVMFPContext = type { float, i32, i32, i32 }
+	%struct.GLVMFragmentAttribRec = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, [8 x <4 x float>] }
+	%struct.GLVMTextures = type { [8 x %struct.GLDTextureRec*] }
+	%struct._GLGFunctionKey = type opaque
+	%struct._GLVMConstants = type opaque
+	%struct._GLVMFunction = type opaque
+	%struct._PPStreamToken = type { { i16, i8, i8, i32 } }
+
+define void @gldLLVMVecPointRender(%struct.GLDContextRec* %ctx) {
+entry:
+	%tmp.uip = getelementptr %struct.GLDContextRec* %ctx, i32 0, i32 22		; <i32*> [#uses=1]
+	%tmp = load i32* %tmp.uip		; <i32> [#uses=3]
+	%tmp91 = lshr i32 %tmp, 5		; <i32> [#uses=1]
+	%tmp92 = trunc i32 %tmp91 to i1		; <i1> [#uses=1]
+	br i1 %tmp92, label %cond_true93, label %cond_next116
+cond_true93:		; preds = %entry
+	%tmp.upgrd.1 = getelementptr %struct.GLDContextRec* %ctx, i32 0, i32 31, i32 14		; <i32*> [#uses=1]
+	%tmp95 = load i32* %tmp.upgrd.1		; <i32> [#uses=1]
+	%tmp95.upgrd.2 = sitofp i32 %tmp95 to float		; <float> [#uses=1]
+	%tmp108 = fmul float undef, %tmp95.upgrd.2		; <float> [#uses=1]
+	br label %cond_next116
+cond_next116:		; preds = %cond_true93, %entry
+	%point_size.2 = phi float [ %tmp108, %cond_true93 ], [ undef, %entry ]		; <float> [#uses=2]
+	%tmp457 = fcmp olt float %point_size.2, 1.000000e+00		; <i1> [#uses=1]
+	%tmp460 = lshr i32 %tmp, 6		; <i32> [#uses=1]
+	%tmp461 = trunc i32 %tmp460 to i1		; <i1> [#uses=1]
+	br i1 %tmp457, label %cond_true458, label %cond_next484
+cond_true458:		; preds = %cond_next116
+	br i1 %tmp461, label %cond_true462, label %cond_next487
+cond_true462:		; preds = %cond_true458
+	%tmp26 = bitcast i32 %tmp to i32		; <i32> [#uses=1]
+	%tmp465 = and i32 %tmp26, 128		; <i32> [#uses=1]
+	%tmp466 = icmp eq i32 %tmp465, 0		; <i1> [#uses=1]
+	br i1 %tmp466, label %cond_true467, label %cond_next487
+cond_true467:		; preds = %cond_true462
+	ret void
+cond_next484:		; preds = %cond_next116
+	%tmp486 = fmul float %point_size.2, 5.000000e-01		; <float> [#uses=1]
+	br label %cond_next487
+cond_next487:		; preds = %cond_next484, %cond_true462, %cond_true458
+	%radius.0 = phi float [ %tmp486, %cond_next484 ], [ 5.000000e-01, %cond_true458 ], [ 5.000000e-01, %cond_true462 ]		; <float> [#uses=2]
+	%tmp494 = insertelement <4 x float> zeroinitializer, float %radius.0, i32 2		; <<4 x float>> [#uses=1]
+	%tmp495 = insertelement <4 x float> %tmp494, float %radius.0, i32 3		; <<4 x float>> [#uses=0]
+	ret void
+}
diff --git a/final/test/Transforms/SCCP/2006-12-19-UndefBug.ll b/final/test/Transforms/SCCP/2006-12-19-UndefBug.ll
new file mode 100644
index 00000000000..ec69ce05fe9
--- /dev/null
+++ b/final/test/Transforms/SCCP/2006-12-19-UndefBug.ll
@@ -0,0 +1,8 @@
+; RUN: opt < %s -sccp -S | \
+; RUN:   grep {ret i1 false}
+
+define i1 @foo() {
+	%X = and i1 false, undef		; <i1> [#uses=1]
+	ret i1 %X
+}
+
diff --git a/final/test/Transforms/SCCP/2007-05-16-InvokeCrash.ll b/final/test/Transforms/SCCP/2007-05-16-InvokeCrash.ll
new file mode 100644
index 00000000000..b84fe6db2f2
--- /dev/null
+++ b/final/test/Transforms/SCCP/2007-05-16-InvokeCrash.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -sccp -disable-output
+; PR1431
+
+define void @_ada_bench() {
+entry:
+	br label %cond_next
+cond_next:		; preds = %cond_next, %entry
+	%indvar46 = phi i32 [ 0, %entry ], [ %indvar.next47, %cond_next ]		; <i32> [#uses=1]
+	%indvar.next47 = add i32 %indvar46, 1		; <i32> [#uses=2]
+	%exitcond48 = icmp eq i32 %indvar.next47, 10000		; <i1> [#uses=1]
+	br i1 %exitcond48, label %cond_next40, label %cond_next
+cond_next40:		; preds = %cond_next40, %cond_next
+	%indvar43 = phi i32 [ %indvar.next44, %cond_next40 ], [ 0, %cond_next ]		; <i32> [#uses=1]
+	%indvar.next44 = add i32 %indvar43, 1		; <i32> [#uses=2]
+	%exitcond45 = icmp eq i32 %indvar.next44, 10000		; <i1> [#uses=1]
+	br i1 %exitcond45, label %cond_next53, label %cond_next40
+cond_next53:		; preds = %cond_next53, %cond_next40
+	%indvar41 = phi i32 [ %indvar.next42, %cond_next53 ], [ 0, %cond_next40 ]		; <i32> [#uses=1]
+	%indvar.next42 = add i32 %indvar41, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next42, 10000		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb67, label %cond_next53
+bb67:		; preds = %cond_next53
+	%tmp112 = invoke double @sin( double 5.000000e-01 )
+			to label %bb114 unwind label %cleanup		; <double> [#uses=0]
+bb114:		; preds = %bb67
+	%tmp147 = invoke double @log( double 5.000000e-01 )
+			to label %bb149 unwind label %cleanup		; <double> [#uses=0]
+bb149:		; preds = %bb114
+	%tmp175 = invoke double @sqrt( double 5.000000e-01 )
+			to label %bb177 unwind label %cleanup		; <double> [#uses=0]
+bb177:		; preds = %bb149
+	unreachable
+cleanup:		; preds = %bb149, %bb114, %bb67
+	unwind
+}
+
+declare double @sin(double)
+
+declare double @log(double)
+
+declare double @sqrt(double)
diff --git a/final/test/Transforms/SCCP/2008-01-27-UndefCorrelate.ll b/final/test/Transforms/SCCP/2008-01-27-UndefCorrelate.ll
new file mode 100644
index 00000000000..aa613dca5d1
--- /dev/null
+++ b/final/test/Transforms/SCCP/2008-01-27-UndefCorrelate.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -sccp -S | grep undef | count 1
+; PR1938
+
+define i32 @main() {
+entry:
+	br label %bb
+
+bb:
+	%indvar = phi i32 [ 0, %entry ], [ %k, %bb.backedge ]
+	%k = add i32 %indvar, 1
+	br i1 undef, label %cond_true, label %cond_false
+
+cond_true:
+	%tmp97 = icmp slt i32 %k, 10
+	br i1 %tmp97, label %bb.backedge, label %bb12
+
+bb.backedge:
+	br label %bb
+
+cond_false:
+	%tmp9 = icmp slt i32 %k, 10
+	br i1 %tmp9, label %bb.backedge, label %bb12
+
+bb12:
+	%tmp14 = icmp eq i32 %k, 10
+	br i1 %tmp14, label %cond_next18, label %cond_true17
+
+cond_true17:
+	tail call void @abort( )
+	unreachable
+
+cond_next18:
+	ret i32 0
+}
+
+declare void @abort()
diff --git a/final/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll b/final/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll
new file mode 100644
index 00000000000..1b26ca9e194
--- /dev/null
+++ b/final/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -sccp -S | grep {ret i32 %Z}
+; rdar://5778210
+
+declare {i32, i32} @bar(i32 %A) 
+
+define i32 @foo() {
+	%X = call {i32, i32} @bar(i32 17)
+        %Y = getresult {i32, i32} %X, 0
+	%Z = add i32 %Y, %Y
+	ret i32 %Z
+}
diff --git a/final/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll b/final/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
new file mode 100644
index 00000000000..cd6cf9704a5
--- /dev/null
+++ b/final/test/Transforms/SCCP/2008-05-23-UndefCallFold.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -sccp -S | not grep {ret i32 undef}
+; PR2358
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-pc-linux-gnu"
+
+define i32 @x(i32 %b) {
+entry:
+ %val = call i32 @llvm.cttz.i32(i32 undef)
+ ret i32 %val
+}
+
+declare i32 @llvm.cttz.i32(i32)
+
diff --git a/final/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll b/final/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
new file mode 100644
index 00000000000..d23ee2b23d7
--- /dev/null
+++ b/final/test/Transforms/SCCP/2009-01-14-IPSCCP-Invoke.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -ipsccp -S | grep {ret i32 42}
+; RUN: opt < %s -ipsccp -S | grep {ret i32 undef}
+; PR3325
+
+define i32 @main() {
+	%tmp1 = invoke i32 @f()
+			to label %UnifiedReturnBlock unwind label %lpad
+
+lpad:
+	unreachable
+
+UnifiedReturnBlock:
+	ret i32 %tmp1
+}
+
+define internal i32 @f() {
+       ret i32 42
+}
+
+declare i8* @__cxa_begin_catch(i8*) nounwind
+
+declare i8* @llvm.eh.exception() nounwind
+
+declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+
+declare void @__cxa_end_catch()
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/final/test/Transforms/SCCP/2009-05-27-VectorOperandZero.ll b/final/test/Transforms/SCCP/2009-05-27-VectorOperandZero.ll
new file mode 100644
index 00000000000..7aced663527
--- /dev/null
+++ b/final/test/Transforms/SCCP/2009-05-27-VectorOperandZero.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -sccp -disable-output
+; PR4277
+
+define i32 @main() nounwind {
+entry:
+	%0 = tail call signext i8 (...)* @sin() nounwind
+	ret i32 0
+}
+
+declare signext i8 @sin(...)
diff --git a/final/test/Transforms/SCCP/apint-array.ll b/final/test/Transforms/SCCP/apint-array.ll
new file mode 100644
index 00000000000..1e75878f3ae
--- /dev/null
+++ b/final/test/Transforms/SCCP/apint-array.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -sccp -S | grep {ret i101 12}
+
+@Y = constant [6 x i101] [ i101 12, i101 123456789000000, i101 -12,i101 
+-123456789000000, i101 0,i101 9123456789000000]
+
+define i101 @array()
+{
+Head:
+   %A = getelementptr [6 x i101]* @Y, i32 0, i32 1
+
+   %B = load i101* %A
+   %C = icmp sge i101 %B, 1
+   br i1 %C, label %True, label %False
+True:
+   %D = and i101 %B, 1
+   %E = trunc i101 %D to i32
+   %F = getelementptr [6 x i101]* @Y, i32 0, i32 %E
+   %G = load i101* %F
+   br label %False
+False:
+   %H = phi i101 [%G, %True], [-1, %Head]
+   ret i101 %H
+}
diff --git a/final/test/Transforms/SCCP/apint-basictest.ll b/final/test/Transforms/SCCP/apint-basictest.ll
new file mode 100644
index 00000000000..c03bfef7430
--- /dev/null
+++ b/final/test/Transforms/SCCP/apint-basictest.ll
@@ -0,0 +1,16 @@
+; This is a basic sanity check for constant propogation.  The add instruction 
+; should be eliminated.
+
+; RUN: opt < %s -sccp -S | not grep add
+
+define i128 @test(i1 %B) {
+	br i1 %B, label %BB1, label %BB2
+BB1:
+	%Val = add i128 0, 1
+	br label %BB3
+BB2:
+	br label %BB3
+BB3:
+	%Ret = phi i128 [%Val, %BB1], [2, %BB2]
+	ret i128 %Ret
+}
diff --git a/final/test/Transforms/SCCP/apint-basictest2.ll b/final/test/Transforms/SCCP/apint-basictest2.ll
new file mode 100644
index 00000000000..173482786f2
--- /dev/null
+++ b/final/test/Transforms/SCCP/apint-basictest2.ll
@@ -0,0 +1,17 @@
+; This is a basic sanity check for constant propogation.  The add instruction 
+; and phi instruction should be eliminated.
+
+; RUN: opt < %s -sccp -S | not grep phi
+; RUN: opt < %s -sccp -S | not grep add
+
+define i128 @test(i1 %B) {
+	br i1 %B, label %BB1, label %BB2
+BB1:
+	%Val = add i128 0, 1
+	br label %BB3
+BB2:
+	br label %BB3
+BB3:
+	%Ret = phi i128 [%Val, %BB1], [1, %BB2]
+	ret i128 %Ret
+}
diff --git a/final/test/Transforms/SCCP/apint-basictest3.ll b/final/test/Transforms/SCCP/apint-basictest3.ll
new file mode 100644
index 00000000000..47671bf46b3
--- /dev/null
+++ b/final/test/Transforms/SCCP/apint-basictest3.ll
@@ -0,0 +1,23 @@
+; This is a basic sanity check for constant propogation.  It tests the basic 
+; arithmatic operations.
+
+
+; RUN: opt < %s -sccp -S | not grep mul
+; RUN: opt < %s -sccp -S | not grep umod
+
+define i128 @test(i1 %B) {
+	br i1 %B, label %BB1, label %BB2
+BB1:
+	%t1 = add i128 0, 1
+        %t2 = sub i128 0, %t1
+        %t3 = mul i128 %t2, -1
+	br label %BB3
+BB2:
+        %f1 = udiv i128 -1, 1
+        %f2 = add i128 %f1, 1
+        %f3 = urem i128 %f2, 2121
+	br label %BB3
+BB3:
+	%Ret = phi i128 [%t3, %BB1], [%f3, %BB2]
+	ret i128 %Ret
+}
diff --git a/final/test/Transforms/SCCP/apint-basictest4.ll b/final/test/Transforms/SCCP/apint-basictest4.ll
new file mode 100644
index 00000000000..41036ea002d
--- /dev/null
+++ b/final/test/Transforms/SCCP/apint-basictest4.ll
@@ -0,0 +1,25 @@
+; This is a basic sanity check for constant propogation.  It tests the basic 
+; logic operations.
+
+
+; RUN: opt < %s -sccp -S | not grep and
+; RUN: opt < %s -sccp -S | not grep trunc
+; RUN: opt < %s -sccp -S | grep {ret i100 -1}
+
+define i100 @test(i133 %A) {
+        %B = and i133 0, %A
+        %C = icmp sgt i133 %B, 0
+	br i1 %C, label %BB1, label %BB2
+BB1:
+        %t3 = xor i133 %B, -1
+        %t4 = trunc i133 %t3 to i100
+	br label %BB3
+BB2:
+        %f1 = or i133 -1, %A
+        %f2 = lshr i133 %f1, 33
+        %f3 = trunc i133 %f2 to i100
+	br label %BB3
+BB3:
+	%Ret = phi i100 [%t4, %BB1], [%f3, %BB2]
+	ret i100 %Ret
+}
diff --git a/final/test/Transforms/SCCP/apint-bigarray.ll b/final/test/Transforms/SCCP/apint-bigarray.ll
new file mode 100644
index 00000000000..0dd9ad331a0
--- /dev/null
+++ b/final/test/Transforms/SCCP/apint-bigarray.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -sccp -S | not grep %X
+
+@G =  global [1000000 x i10000] zeroinitializer
+
+define internal i10000* @test(i10000 %Arg) {
+	%X = getelementptr [1000000 x i10000]* @G, i32 0, i32 999
+        store i10000 %Arg, i10000* %X
+	ret i10000* %X
+}
+
+define i10000 @caller()
+{
+        %Y = call i10000* @test(i10000 -1)
+        %Z = load i10000* %Y
+        ret i10000 %Z 
+}
+
+define i10000 @caller2()
+{
+        %Y = call i10000* @test(i10000 1)
+        %Z = load i10000* %Y
+        ret i10000 %Z 
+}
diff --git a/final/test/Transforms/SCCP/apint-bigint.ll b/final/test/Transforms/SCCP/apint-bigint.ll
new file mode 100644
index 00000000000..36a96c33571
--- /dev/null
+++ b/final/test/Transforms/SCCP/apint-bigint.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -sccp -S | not grep xor
+
+define i11129 @test1() {
+        %B = shl i11129 1, 11128 
+        %C = sub i11129 %B, 1
+        %D = xor i11129 %B, %C
+        
+	ret i11129 %D
+}
diff --git a/final/test/Transforms/SCCP/apint-bigint2.ll b/final/test/Transforms/SCCP/apint-bigint2.ll
new file mode 100644
index 00000000000..660eaad7bc1
--- /dev/null
+++ b/final/test/Transforms/SCCP/apint-bigint2.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -sccp -S | not grep load
+
+@Y = constant [6 x i101] [ i101 12, i101 123456789000000, i101 -12,
+                           i101 -123456789000000, i101 0,i101 9123456789000000]
+
+define i101 @array()
+{
+Head:
+   %A = getelementptr [6 x i101]* @Y, i32 0, i32 1
+   %B = load i101* %A
+   %D = and i101 %B, 1
+   %DD = or i101 %D, 1
+   %E = trunc i101 %DD to i32
+   %F = getelementptr [6 x i101]* @Y, i32 0, i32 %E
+   %G = load i101* %F
+ 
+   ret i101 %G
+}
diff --git a/final/test/Transforms/SCCP/apint-ipsccp1.ll b/final/test/Transforms/SCCP/apint-ipsccp1.ll
new file mode 100644
index 00000000000..fda40f53fe1
--- /dev/null
+++ b/final/test/Transforms/SCCP/apint-ipsccp1.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -ipsccp -S | grep -v {ret i512 undef} | \
+; RUN:   grep {ret i8 2}
+
+define internal i512 @test(i1 %B) {
+	br i1 %B, label %BB1, label %BB2
+BB1:
+	%Val = add i512 0, 1
+	br label %BB3
+BB2:
+	br label %BB3
+BB3:
+	%Ret = phi i512 [%Val, %BB1], [2, %BB2]
+	ret i512 %Ret
+}
+
+define i8 @caller()
+{
+    %t1 = and i2 2, 1
+    %t11 = trunc i2 %t1 to i1
+    %t2 = call i512 @test(i1 %t11)
+    %t3 = trunc i512 %t2 to i8
+    ret i8 %t3
+}
+
diff --git a/final/test/Transforms/SCCP/apint-ipsccp2.ll b/final/test/Transforms/SCCP/apint-ipsccp2.ll
new file mode 100644
index 00000000000..3c02e05548d
--- /dev/null
+++ b/final/test/Transforms/SCCP/apint-ipsccp2.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -ipsccp -S | grep -v {ret i101 0} | \
+; RUN:    grep -v {ret i101 undef} | not grep ret
+
+
+define internal i101 @bar(i101 %A) {
+	%x = icmp eq i101 %A, 0
+	br i1 %x, label %T, label %F
+T:
+	%B = call i101 @bar(i101 0)
+	ret i101 0
+F:      ; unreachable
+	%C = call i101 @bar(i101 1)
+	ret i101 %C
+}
+
+define i101 @foo() {
+	%X = call i101 @bar(i101 0)
+	ret i101 %X
+}
diff --git a/final/test/Transforms/SCCP/apint-ipsccp3.ll b/final/test/Transforms/SCCP/apint-ipsccp3.ll
new file mode 100644
index 00000000000..68987aee249
--- /dev/null
+++ b/final/test/Transforms/SCCP/apint-ipsccp3.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -ipsccp -S | not grep global
+
+@G = internal global i66 undef
+
+
+
+define void @foo() {
+	%X = load i66* @G
+	store i66 %X, i66* @G
+	ret void
+}
+
+define i66 @bar() {
+	%V = load i66* @G
+	%C = icmp eq i66 %V, 17
+	br i1 %C, label %T, label %F
+T:
+	store i66 17, i66* @G
+	ret i66 %V
+F:
+	store i66 123, i66* @G
+	ret i66 0
+}
diff --git a/final/test/Transforms/SCCP/apint-ipsccp4.ll b/final/test/Transforms/SCCP/apint-ipsccp4.ll
new file mode 100644
index 00000000000..75875ff642d
--- /dev/null
+++ b/final/test/Transforms/SCCP/apint-ipsccp4.ll
@@ -0,0 +1,49 @@
+; This test makes sure that these instructions are properly constant propagated.
+
+; RUN: opt < %s -ipsccp -S | not grep load
+; RUN: opt < %s -ipsccp -S | not grep add
+; RUN: opt < %s -ipsccp -S | not grep phi
+
+
+@Y = constant [2 x { i212, float }] [ { i212, float } { i212 12, float 1.0 }, 
+                                     { i212, float } { i212 37, float 2.0 } ]
+
+define internal float @test2() {
+	%A = getelementptr [2 x { i212, float}]* @Y, i32 0, i32 1, i32 1
+	%B = load float* %A
+	ret float %B
+}
+
+define internal float  @test3() {
+	%A = getelementptr [2 x { i212, float}]* @Y, i32 0, i32 0, i32 1
+	%B = load float* %A
+	ret float %B
+}
+
+define internal float @test()
+{
+   %A = call float @test2()
+   %B = call float @test3()
+
+   %E = fdiv float %B, %A
+   ret float %E
+}
+
+define float @All()
+{
+  %A = call float @test()
+  %B = fcmp oge float %A, 1.0
+  br i1 %B, label %T, label %F
+T:
+  %C = fadd float %A, 1.0
+  br label %exit
+F:
+  %D = fadd float %A, 2.0
+  br label %exit
+exit:
+  %E = phi float [%C, %T], [%D, %F]
+  ret float %E
+}
+
+
+
diff --git a/final/test/Transforms/SCCP/apint-load.ll b/final/test/Transforms/SCCP/apint-load.ll
new file mode 100644
index 00000000000..56fdb3513f1
--- /dev/null
+++ b/final/test/Transforms/SCCP/apint-load.ll
@@ -0,0 +1,36 @@
+; This test makes sure that these instructions are properly constant propagated.
+
+; RUN: opt < %s -ipsccp -S | not grep load
+; RUN: opt < %s -ipsccp -S | not grep fdiv
+
+@X = constant i212 42
+@Y = constant [2 x { i212, float }] [ { i212, float } { i212 12, float 1.0 }, 
+                                     { i212, float } { i212 37, float 0x3FF3B2FEC0000000 } ]
+define i212 @test1() {
+	%B = load i212* @X
+	ret i212 %B
+}
+
+define internal float @test2() {
+	%A = getelementptr [2 x { i212, float}]* @Y, i32 0, i32 1, i32 1
+	%B = load float* %A
+	ret float %B
+}
+
+define internal i212 @test3() {
+	%A = getelementptr [2 x { i212, float}]* @Y, i32 0, i32 0, i32 0
+	%B = load i212* %A
+	ret i212 %B
+}
+
+define float @All()
+{
+   %A = call float @test2()
+   %B = call i212 @test3()
+   %C = mul i212 %B, -1234567
+   %D = sitofp i212 %C to float
+   %E = fdiv float %A, %D
+   ret float %E
+}
+
+
diff --git a/final/test/Transforms/SCCP/apint-phi.ll b/final/test/Transforms/SCCP/apint-phi.ll
new file mode 100644
index 00000000000..50f0d1aeccb
--- /dev/null
+++ b/final/test/Transforms/SCCP/apint-phi.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -sccp -S | not grep phi
+
+define i999 @test(i999%A, i1 %c) {
+bb1:
+	br label %BB2
+BB2:
+	%V = phi i999 [2, %bb1], [%A, %BB4]
+	br label %BB3
+
+BB3:
+        %E = trunc i999 %V to i1
+        %F = and i1 %E, %c
+	br i1 %F, label %BB4, label %BB5
+BB4:
+	br label %BB2
+
+BB5:
+	ret i999 %V
+}
diff --git a/final/test/Transforms/SCCP/apint-select.ll b/final/test/Transforms/SCCP/apint-select.ll
new file mode 100644
index 00000000000..c79751910fc
--- /dev/null
+++ b/final/test/Transforms/SCCP/apint-select.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -sccp -S | not grep select
+
+@A = constant i32 10
+
+define i712 @test1() {
+        %P = getelementptr i32* @A, i32 0
+        %B = ptrtoint i32* %P to i64
+        %BB = and i64 %B, undef
+        %C = icmp sge i64 %BB, 0
+	%X = select i1 %C, i712 0, i712 1
+	ret i712 %X
+}
+
+
+
+define i712 @test2(i1 %C) {
+	%X = select i1 %C, i712 0, i712 undef
+	ret i712 %X
+}
+
+
diff --git a/final/test/Transforms/SCCP/calltest.ll b/final/test/Transforms/SCCP/calltest.ll
new file mode 100644
index 00000000000..9dec22f4d1c
--- /dev/null
+++ b/final/test/Transforms/SCCP/calltest.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -sccp -loop-deletion -simplifycfg -S | not grep br
+
+; No matter how hard you try, sqrt(1.0) is always 1.0.  This allows the
+; optimizer to delete this loop.
+
+declare double @sqrt(double)
+
+define double @test(i32 %param) {
+entry:
+	br label %Loop
+Loop:		; preds = %Loop, %entry
+	%I2 = phi i32 [ 0, %entry ], [ %I3, %Loop ]		; <i32> [#uses=1]
+	%V = phi double [ 1.000000e+00, %entry ], [ %V2, %Loop ]		; <double> [#uses=2]
+	%V2 = call double @sqrt( double %V )		; <double> [#uses=1]
+	%I3 = add i32 %I2, 1		; <i32> [#uses=2]
+	%tmp.7 = icmp ne i32 %I3, %param		; <i1> [#uses=1]
+	br i1 %tmp.7, label %Loop, label %Exit
+Exit:		; preds = %Loop
+	ret double %V
+}
+
diff --git a/final/test/Transforms/SCCP/crash.ll b/final/test/Transforms/SCCP/crash.ll
new file mode 100644
index 00000000000..2f6da1d726a
--- /dev/null
+++ b/final/test/Transforms/SCCP/crash.ll
@@ -0,0 +1,29 @@
+; RUN: opt %s -sccp -S
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+
+define void @test1(i8 %arg) {
+entry:
+  br i1 undef, label %return, label %bb
+
+bb:   
+  br label %bb34
+
+bb23: 
+  %c = icmp eq i8 %arg, undef 
+  br i1 %c, label %bb34, label %bb23
+
+bb34:
+  %Kind.1 = phi i32 [ undef, %bb ], [ %ins174, %bb23 ] 
+  %mask173 = or i32 %Kind.1, 7
+  %ins174 = and i32 %mask173, -249
+  br label %bb23
+
+return:
+  ret void
+}
+
+define i32 @test2([4 x i32] %A) {
+  %B = extractvalue [4 x i32] %A, 1
+  ret i32 %B
+}
diff --git a/final/test/Transforms/SCCP/dg.exp b/final/test/Transforms/SCCP/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/SCCP/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/SCCP/ipsccp-addr-taken.ll b/final/test/Transforms/SCCP/ipsccp-addr-taken.ll
new file mode 100644
index 00000000000..c6572fa5d14
--- /dev/null
+++ b/final/test/Transforms/SCCP/ipsccp-addr-taken.ll
@@ -0,0 +1,28 @@
+; RUN: opt %s -ipsccp -S | FileCheck %s
+; PR7876
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define internal i32 @foo() nounwind noinline ssp {
+entry:
+  ret i32 0
+; CHECK: @foo
+; CHECK: entry:
+; CHECK: ret i32 0
+}
+
+declare i32 @bar() 
+
+define internal i32 @test(i32 %c) nounwind noinline ssp {
+bb:
+  %tmp1 = icmp ne i32 %c, 0                       ; <i1> [#uses=1]
+  %tmp2 = select i1 %tmp1, i32 ()* @foo, i32 ()* @bar ; <i32 ()*> [#uses=1]
+  %tmp3 = tail call i32 %tmp2() nounwind          ; <i32> [#uses=1]
+  ret i32 %tmp3
+}
+
+define i32 @main() nounwind ssp {
+bb:
+  %tmp = tail call i32 @test(i32 1)               ; <i32> [#uses=1]
+  ret i32 %tmp
+}
diff --git a/final/test/Transforms/SCCP/ipsccp-basic.ll b/final/test/Transforms/SCCP/ipsccp-basic.ll
new file mode 100644
index 00000000000..a3c7637f986
--- /dev/null
+++ b/final/test/Transforms/SCCP/ipsccp-basic.ll
@@ -0,0 +1,206 @@
+; RUN: opt < %s -ipsccp -S | FileCheck %s
+
+;;======================== test1
+
+define internal i32 @test1a(i32 %A) {
+	%X = add i32 1, 2
+	ret i32 %A
+}
+; CHECK: define internal i32 @test1a
+; CHECK: ret i32 undef
+
+define i32 @test1b() {
+	%X = call i32 @test1a( i32 17 )
+	ret i32 %X
+
+; CHECK: define i32 @test1b
+; CHECK: ret i32 17
+}
+
+
+
+;;======================== test2
+
+define internal i32 @test2a(i32 %A) {
+	%C = icmp eq i32 %A, 0	
+	br i1 %C, label %T, label %F
+T:
+	%B = call i32 @test2a( i32 0 )
+	ret i32 0
+F:
+	%C.upgrd.1 = call i32 @test2a(i32 1)
+	ret i32 %C.upgrd.1
+}
+; CHECK: define internal i32 @test2a
+; CHECK-NEXT: br label %T
+; CHECK: ret i32 undef
+
+
+define i32 @test2b() {
+	%X = call i32 @test2a(i32 0)
+	ret i32 %X
+}
+; CHECK: define i32 @test2b
+; CHECK-NEXT: %X = call i32 @test2a(i32 0)
+; CHECK-NEXT: ret i32 0
+
+
+;;======================== test3
+
+@G = internal global i32 undef
+
+define void @test3a() {
+	%X = load i32* @G
+	store i32 %X, i32* @G
+	ret void
+}
+; CHECK: define void @test3a
+; CHECK-NEXT: ret void
+
+
+define i32 @test3b() {
+	%V = load i32* @G
+	%C = icmp eq i32 %V, 17
+	br i1 %C, label %T, label %F
+T:
+	store i32 17, i32* @G
+	ret i32 %V
+F:	
+	store i32 123, i32* @G
+	ret i32 0
+}
+; CHECK: define i32 @test3b
+; CHECK-NOT: store
+; CHECK: ret i32 0
+
+
+;;======================== test4
+
+define internal {i64,i64} @test4a() {
+  %a = insertvalue {i64,i64} undef, i64 4, 1
+  %b = insertvalue {i64,i64} %a, i64 5, 0
+  ret {i64,i64} %b
+}
+
+define i64 @test4b() {
+  %a = invoke {i64,i64} @test4a()
+          to label %A unwind label %B
+A:
+  %b = extractvalue {i64,i64} %a, 0
+  %c = call i64 @test4c(i64 %b)
+  ret i64 %c
+B:
+  ret i64 0
+}
+; CHECK: define i64 @test4b()
+; CHECK:   %c = call i64 @test4c(i64 5)
+; CHECK-NEXT:  ret i64 5
+
+
+define internal i64 @test4c(i64 %a) {
+  ret i64 %a
+}
+; CHECK: define internal i64 @test4c
+; CHECK: ret i64 undef
+
+
+
+;;======================== test5
+
+; PR4313
+define internal {i64,i64} @test5a() {
+  %a = insertvalue {i64,i64} undef, i64 4, 1
+  %b = insertvalue {i64,i64} %a, i64 5, 0
+  ret {i64,i64} %b
+}
+
+define i64 @test5b() {
+  %a = invoke {i64,i64} @test5a()
+          to label %A unwind label %B
+A:
+  %c = call i64 @test5c({i64,i64} %a)
+  ret i64 %c
+B:
+  ret i64 0
+}
+
+; CHECK: define i64 @test5b()
+; CHECK:     A:
+; CHECK-NEXT:  %c = call i64 @test5c(%0 %a)
+; CHECK-NEXT:  ret i64 5
+
+define internal i64 @test5c({i64,i64} %a) {
+  %b = extractvalue {i64,i64} %a, 0
+  ret i64 %b
+}
+
+
+;;======================== test6
+
+define i64 @test6a() {
+  ret i64 0
+}
+
+define i64 @test6b() {
+  %a = call i64 @test6a()
+  ret i64 %a
+}
+; CHECK: define i64 @test6b
+; CHECK: ret i64 0
+
+;;======================== test7
+
+
+%T = type {i32,i32}
+
+define internal {i32, i32} @test7a(i32 %A) {
+  %X = add i32 1, %A
+  %mrv0 = insertvalue %T undef, i32 %X, 0
+  %mrv1 = insertvalue %T %mrv0, i32 %A, 1
+  ret %T %mrv1
+; CHECK: @test7a
+; CHECK-NEXT: %mrv0 = insertvalue %T undef, i32 18, 0
+; CHECK-NEXT: %mrv1 = insertvalue %T %mrv0, i32 17, 1
+}
+
+define i32 @test7b() {
+	%X = call {i32, i32} @test7a(i32 17)
+        %Y = extractvalue {i32, i32} %X, 0
+	%Z = add i32 %Y, %Y
+	ret i32 %Z
+; CHECK: define i32 @test7b
+; CHECK-NEXT: call %T @test7a(i32 17)
+; CHECK-NEXT: ret i32 36
+}
+
+;;======================== test8
+
+
+define internal {} @test8a(i32 %A, i32* %P) {
+  store i32 %A, i32* %P
+  ret {} {}
+; CHECK: @test8a
+; CHECK-NEXT: store i32 5, 
+; CHECK-NEXT: ret 
+}
+
+define void @test8b(i32* %P) {
+    %X = call {} @test8a(i32 5, i32* %P)
+    ret void
+; CHECK: define void @test8b
+; CHECK-NEXT: call {} @test8a
+; CHECK-NEXT: ret void
+}
+
+;;======================== test9
+
+@test9g = internal global {  } zeroinitializer
+
+define void @test9() {
+entry:
+        %local_foo = alloca {  }
+        load {  }* @test9g
+        store {  } %0, {  }* %local_foo
+        ret void
+}
+
diff --git a/final/test/Transforms/SCCP/loadtest.ll b/final/test/Transforms/SCCP/loadtest.ll
new file mode 100644
index 00000000000..add2af483f5
--- /dev/null
+++ b/final/test/Transforms/SCCP/loadtest.ll
@@ -0,0 +1,33 @@
+; This test makes sure that these instructions are properly constant propagated.
+
+target datalayout = "e-p:32:32"
+
+; RUN: opt < %s -sccp -S | not grep load
+
+
+@X = constant i32 42		; <i32*> [#uses=1]
+@Y = constant [2 x { i32, float }] [ { i32, float } { i32 12, float 1.000000e+00 }, { i32, float } { i32 37, float 0x3FF3B2FEC0000000 } ]		; <[2 x { i32, float }]*> [#uses=2]
+
+define i32 @test1() {
+	%B = load i32* @X		; <i32> [#uses=1]
+	ret i32 %B
+}
+
+define float @test2() {
+	%A = getelementptr [2 x { i32, float }]* @Y, i64 0, i64 1, i32 1		; <float*> [#uses=1]
+	%B = load float* %A		; <float> [#uses=1]
+	ret float %B
+}
+
+define i32 @test3() {
+	%A = getelementptr [2 x { i32, float }]* @Y, i64 0, i64 0, i32 0		; <i32*> [#uses=1]
+	%B = load i32* %A
+	ret i32 %B
+}
+
+define i8 @test4() {
+	%A = bitcast i32* @X to i8*
+	%B = load i8* %A
+	ret i8 %B
+}
+
diff --git a/final/test/Transforms/SCCP/logical-nuke.ll b/final/test/Transforms/SCCP/logical-nuke.ll
new file mode 100644
index 00000000000..b3d845c7eec
--- /dev/null
+++ b/final/test/Transforms/SCCP/logical-nuke.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -sccp -S | grep {ret i32 0}
+
+; Test that SCCP has basic knowledge of when and/or nuke overdefined values.
+
+define i32 @test(i32 %X) {
+	%Y = and i32 %X, 0		; <i32> [#uses=1]
+	ret i32 %Y
+}
+
diff --git a/final/test/Transforms/SCCP/phitest.ll b/final/test/Transforms/SCCP/phitest.ll
new file mode 100644
index 00000000000..4c5c3dcc690
--- /dev/null
+++ b/final/test/Transforms/SCCP/phitest.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -sccp -dce -simplifycfg -S | not grep br
+
+define i32 @test(i32 %param) {
+entry:
+	%tmp.1 = icmp ne i32 %param, 0		; <i1> [#uses=1]
+	br i1 %tmp.1, label %endif.0, label %else
+else:		; preds = %entry
+	br label %endif.0
+endif.0:		; preds = %else, %entry
+	%a.0 = phi i32 [ 2, %else ], [ 3, %entry ]		; <i32> [#uses=1]
+	%b.0 = phi i32 [ 3, %else ], [ 2, %entry ]		; <i32> [#uses=1]
+	%tmp.5 = add i32 %a.0, %b.0		; <i32> [#uses=1]
+	%tmp.7 = icmp ne i32 %tmp.5, 5		; <i1> [#uses=1]
+	br i1 %tmp.7, label %UnifiedReturnBlock, label %endif.1
+endif.1:		; preds = %endif.0
+	ret i32 0
+UnifiedReturnBlock:		; preds = %endif.0
+	ret i32 2
+}
+
diff --git a/final/test/Transforms/SCCP/retvalue-undef.ll b/final/test/Transforms/SCCP/retvalue-undef.ll
new file mode 100644
index 00000000000..389561f8a11
--- /dev/null
+++ b/final/test/Transforms/SCCP/retvalue-undef.ll
@@ -0,0 +1,32 @@
+; RUN: opt -ipsccp -S %s | FileCheck %s
+; PR6414
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define internal i32 ()* @f() {
+  ret i32 ()* @g
+}
+
+define internal i32 @g() {
+  ret i32 8
+}
+
+; CHECK: internal i32 @g()
+; CHECK-NEXT: ret i32 8
+
+define internal void @outer_mod() {
+  %1 = call i32 ()* ()* @f()                      ; <i32 ()*> [#uses=1]
+  %2 = call i32 %1()                              ; <i32> [#uses=0]
+  ret void
+}
+
+define internal void @module_init() {
+  call void @register_outer_mod(void ()* @outer_mod)
+  ret void
+}
+
+declare void @register_outer_mod(void ()*)
+
+define i32 @main() {
+  ret i32 0
+}
diff --git a/final/test/Transforms/SCCP/sccptest.ll b/final/test/Transforms/SCCP/sccptest.ll
new file mode 100644
index 00000000000..a719f6cfb48
--- /dev/null
+++ b/final/test/Transforms/SCCP/sccptest.ll
@@ -0,0 +1,58 @@
+; RUN: opt < %s -sccp -S | FileCheck %s
+
+; This is a basic sanity check for constant propagation.  The add instruction 
+; should be eliminated.
+
+define i32 @test1(i1 %B) {
+	br i1 %B, label %BB1, label %BB2
+BB1:		; preds = %0
+	%Val = add i32 0, 0		; <i32> [#uses=1]
+	br label %BB3
+BB2:		; preds = %0
+	br label %BB3
+BB3:		; preds = %BB2, %BB1
+	%Ret = phi i32 [ %Val, %BB1 ], [ 1, %BB2 ]		; <i32> [#uses=1]
+	ret i32 %Ret
+        
+; CHECK: @test1
+; CHECK: %Ret = phi i32 [ 0, %BB1 ], [ 1, %BB2 ]
+}
+
+; This is the test case taken from appel's book that illustrates a hard case
+; that SCCP gets right.
+;
+define i32 @test2(i32 %i0, i32 %j0) {
+; CHECK: @test2
+BB1:
+	br label %BB2
+BB2:
+	%j2 = phi i32 [ %j4, %BB7 ], [ 1, %BB1 ]
+	%k2 = phi i32 [ %k4, %BB7 ], [ 0, %BB1 ]
+	%kcond = icmp slt i32 %k2, 100
+	br i1 %kcond, label %BB3, label %BB4
+BB3:
+	%jcond = icmp slt i32 %j2, 20
+	br i1 %jcond, label %BB5, label %BB6
+; CHECK: BB3:
+; CHECK-NEXT: br i1 true, label %BB5, label %BB6
+BB4:
+	ret i32 %j2
+; CHECK: BB4:
+; CHECK-NEXT: ret i32 1
+BB5:
+	%k3 = add i32 %k2, 1
+	br label %BB7
+BB6:
+	%k5 = add i32 %k2, 1
+	br label %BB7
+; CHECK: BB6:
+; CHECK-NEXT: br label %BB7
+BB7:
+	%j4 = phi i32 [ 1, %BB5 ], [ %k2, %BB6 ]
+	%k4 = phi i32 [ %k3, %BB5 ], [ %k5, %BB6 ]
+	br label %BB2
+; CHECK: BB7:
+; CHECK-NEXT: %k4 = phi i32 [ %k3, %BB5 ], [ undef, %BB6 ]
+; CHECK-NEXT: br label %BB2
+}
+
diff --git a/final/test/Transforms/SCCP/select.ll b/final/test/Transforms/SCCP/select.ll
new file mode 100644
index 00000000000..b2f1dd2d0f2
--- /dev/null
+++ b/final/test/Transforms/SCCP/select.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -sccp -S | not grep select
+
+define i32 @test1(i1 %C) {
+	%X = select i1 %C, i32 0, i32 0		; <i32> [#uses=1]
+	ret i32 %X
+}
+
+define i32 @test2(i1 %C) {
+	%X = select i1 %C, i32 0, i32 undef		; <i32> [#uses=1]
+	ret i32 %X
+}
+
diff --git a/final/test/Transforms/SCCP/undef-resolve.ll b/final/test/Transforms/SCCP/undef-resolve.ll
new file mode 100644
index 00000000000..bed561c8e4f
--- /dev/null
+++ b/final/test/Transforms/SCCP/undef-resolve.ll
@@ -0,0 +1,106 @@
+; RUN: opt %s -sccp -S | FileCheck %s
+
+
+; PR6940
+define double @test1() {
+  %t = sitofp i32 undef to double
+  ret double %t
+; CHECK: @test1
+; CHECK: ret double 0.0
+}
+
+
+; rdar://7832370
+; Check that lots of stuff doesn't get turned into undef.
+define i32 @test2() nounwind readnone ssp {
+; CHECK: @test2
+init:
+  br label %control.outer.outer
+
+control.outer.loopexit.us-lcssa:                  ; preds = %control
+  br label %control.outer.loopexit
+
+control.outer.loopexit:                           ; preds = %control.outer.loopexit.us-lcssa.us, %control.outer.loopexit.us-lcssa
+  br label %control.outer.outer.backedge
+
+control.outer.outer:                              ; preds = %control.outer.outer.backedge, %init
+  %switchCond.0.ph.ph = phi i32 [ 2, %init ], [ 3, %control.outer.outer.backedge ] ; <i32> [#uses=2]
+  %i.0.ph.ph = phi i32 [ undef, %init ], [ %i.0.ph.ph.be, %control.outer.outer.backedge ] ; <i32> [#uses=1]
+  %tmp4 = icmp eq i32 %i.0.ph.ph, 0               ; <i1> [#uses=1]
+  br i1 %tmp4, label %control.outer.outer.split.us, label %control.outer.outer.control.outer.outer.split_crit_edge
+
+control.outer.outer.control.outer.outer.split_crit_edge: ; preds = %control.outer.outer
+  br label %control.outer
+
+control.outer.outer.split.us:                     ; preds = %control.outer.outer
+  br label %control.outer.us
+
+control.outer.us:                                 ; preds = %bb3.us, %control.outer.outer.split.us
+  %A.0.ph.us = phi i32 [ %switchCond.0.us, %bb3.us ], [ 4, %control.outer.outer.split.us ] ; <i32> [#uses=2]
+  %switchCond.0.ph.us = phi i32 [ %A.0.ph.us, %bb3.us ], [ %switchCond.0.ph.ph, %control.outer.outer.split.us ] ; <i32> [#uses=1]
+  br label %control.us
+
+bb3.us:                                           ; preds = %control.us
+  br label %control.outer.us
+
+bb0.us:                                           ; preds = %control.us
+  br label %control.us
+
+; CHECK: control.us:                                       ; preds = %bb0.us, %control.outer.us
+; CHECK-NEXT:  %switchCond.0.us = phi i32
+; CHECK-NEXT:  switch i32 %switchCond.0.us
+control.us:                                       ; preds = %bb0.us, %control.outer.us
+  %switchCond.0.us = phi i32 [ %A.0.ph.us, %bb0.us ], [ %switchCond.0.ph.us, %control.outer.us ] ; <i32> [#uses=2]
+  switch i32 %switchCond.0.us, label %control.outer.loopexit.us-lcssa.us [
+    i32 0, label %bb0.us
+    i32 1, label %bb1.us-lcssa.us
+    i32 3, label %bb3.us
+    i32 4, label %bb4.us-lcssa.us
+  ]
+
+control.outer.loopexit.us-lcssa.us:               ; preds = %control.us
+  br label %control.outer.loopexit
+
+bb1.us-lcssa.us:                                  ; preds = %control.us
+  br label %bb1
+
+bb4.us-lcssa.us:                                  ; preds = %control.us
+  br label %bb4
+
+control.outer:                                    ; preds = %bb3, %control.outer.outer.control.outer.outer.split_crit_edge
+  %A.0.ph = phi i32 [ %nextId17, %bb3 ], [ 4, %control.outer.outer.control.outer.outer.split_crit_edge ] ; <i32> [#uses=1]
+  %switchCond.0.ph = phi i32 [ 0, %bb3 ], [ %switchCond.0.ph.ph, %control.outer.outer.control.outer.outer.split_crit_edge ] ; <i32> [#uses=1]
+  br label %control
+
+control:                                          ; preds = %bb0, %control.outer
+  %switchCond.0 = phi i32 [ %A.0.ph, %bb0 ], [ %switchCond.0.ph, %control.outer ] ; <i32> [#uses=2]
+  switch i32 %switchCond.0, label %control.outer.loopexit.us-lcssa [
+    i32 0, label %bb0
+    i32 1, label %bb1.us-lcssa
+    i32 3, label %bb3
+    i32 4, label %bb4.us-lcssa
+  ]
+
+bb4.us-lcssa:                                     ; preds = %control
+  br label %bb4
+
+bb4:                                              ; preds = %bb4.us-lcssa, %bb4.us-lcssa.us
+  br label %control.outer.outer.backedge
+
+control.outer.outer.backedge:                     ; preds = %bb4, %control.outer.loopexit
+  %i.0.ph.ph.be = phi i32 [ 1, %bb4 ], [ 0, %control.outer.loopexit ] ; <i32> [#uses=1]
+  br label %control.outer.outer
+
+bb3:                                              ; preds = %control
+  %nextId17 = add i32 %switchCond.0, -2           ; <i32> [#uses=1]
+  br label %control.outer
+
+bb0:                                              ; preds = %control
+  br label %control
+
+bb1.us-lcssa:                                     ; preds = %control
+  br label %bb1
+
+bb1:                                              ; preds = %bb1.us-lcssa, %bb1.us-lcssa.us
+  ret i32 0
+}
diff --git a/final/test/Transforms/SRETPromotion/2008-03-11-attributes.ll b/final/test/Transforms/SRETPromotion/2008-03-11-attributes.ll
new file mode 100644
index 00000000000..55abec55ed0
--- /dev/null
+++ b/final/test/Transforms/SRETPromotion/2008-03-11-attributes.ll
@@ -0,0 +1,7 @@
+; RUN: opt < %s -sretpromotion -disable-output
+	%struct.ObjPoint = type { double, double, double, double, double, double }
+
+define void @RotatePoint(%struct.ObjPoint* sret  %agg.result, %struct.ObjPoint* byval  %a, double %rx, double %ry, double %rz) nounwind  {
+entry:
+	unreachable
+}
diff --git a/final/test/Transforms/SRETPromotion/2008-06-04-function-pointer-passing.ll b/final/test/Transforms/SRETPromotion/2008-06-04-function-pointer-passing.ll
new file mode 100644
index 00000000000..1168b0b2e9d
--- /dev/null
+++ b/final/test/Transforms/SRETPromotion/2008-06-04-function-pointer-passing.ll
@@ -0,0 +1,24 @@
+; This test lures sretpromotion into promoting the sret argument of foo, even
+; when the function is used as an argument to bar. It used to not check for
+; this, assuming that all users of foo were direct calls, resulting in an
+; assertion failure later on.
+
+; We're mainly testing for opt not to crash, but we'll check to see if the sret
+; attribute is still there for good measure.
+; RUN: opt < %s -sretpromotion -S | grep sret
+
+%struct.S = type <{ i32, i32 }>
+
+define i32 @main() {
+entry:
+	%tmp = alloca %struct.S		; <%struct.S*> [#uses=1]
+	call void @bar( %struct.S* sret  %tmp, void (%struct.S*, ...)* @foo )
+	ret i32 undef
+}
+
+declare void @bar(%struct.S* sret , void (%struct.S*, ...)*)
+
+define internal void @foo(%struct.S* sret  %agg.result, ...) {
+entry:
+	ret void
+}
diff --git a/final/test/Transforms/SRETPromotion/2008-06-05-non-call-use.ll b/final/test/Transforms/SRETPromotion/2008-06-05-non-call-use.ll
new file mode 100644
index 00000000000..26c6a6e5077
--- /dev/null
+++ b/final/test/Transforms/SRETPromotion/2008-06-05-non-call-use.ll
@@ -0,0 +1,20 @@
+; This test shows an sret function that is used as an operand to a bitcast.
+; StructRetPromotion used to assume that a function was only used by call or
+; invoke instructions, making this code cause an assertion failure.
+
+; We're mainly testing for opt not to crash, but we'll check to see if the sret
+; attribute is still there for good measure.
+; RUN: opt < %s -sretpromotion -S | grep sret
+
+%struct.S = type <{ i32, i32 }>
+
+define i32 @main() {
+entry:
+        %bar = bitcast void (%struct.S*)* @foo to i32 ()*
+	ret i32 undef
+}
+
+define internal void @foo(%struct.S* sret) {
+entry:
+	ret void
+}
diff --git a/final/test/Transforms/SRETPromotion/basictest.ll b/final/test/Transforms/SRETPromotion/basictest.ll
new file mode 100644
index 00000000000..ff047dc41eb
--- /dev/null
+++ b/final/test/Transforms/SRETPromotion/basictest.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -sretpromotion -S > %t
+; RUN: cat %t | grep sret | count 1
+
+; This function is promotable
+define internal void @promotable({i32, i32}* sret %s) {
+  %A = getelementptr {i32, i32}* %s, i32 0, i32 0
+  store i32 0, i32* %A
+  %B = getelementptr {i32, i32}* %s, i32 0, i32 0
+  store i32 1, i32* %B
+  ret void
+}
+
+; This function is not promotable (due to it's use below)
+define internal void @notpromotable({i32, i32}* sret %s) {
+  %A = getelementptr {i32, i32}* %s, i32 0, i32 0
+  store i32 0, i32* %A
+  %B = getelementptr {i32, i32}* %s, i32 0, i32 0
+  store i32 1, i32* %B
+  ret void
+}
+
+define void @caller({i32, i32}* %t) {
+  %s = alloca {i32, i32}
+  call void @promotable({i32, i32}* %s)
+  %A = getelementptr {i32, i32}* %s, i32 0, i32 0
+  %a = load i32* %A
+  %B = getelementptr {i32, i32}* %s, i32 0, i32 0
+  %b = load i32* %B
+  ; This passes in something that's not an alloca, which makes the argument not
+  ; promotable
+  call void @notpromotable({i32, i32}* %t)
+  ret void
+}
diff --git a/final/test/Transforms/SRETPromotion/dg.exp b/final/test/Transforms/SRETPromotion/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/SRETPromotion/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll b/final/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll
new file mode 100644
index 00000000000..7116199d021
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -scalarrepl -instcombine -S | not grep alloca
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+; Test that an array is not incorrectly deconstructed.
+
+define i32 @test() nounwind {
+	%X = alloca [4 x i32]		; <[4 x i32]*> [#uses=1]
+	%Y = getelementptr [4 x i32]* %X, i64 0, i64 0		; <i32*> [#uses=1]
+        ; Must preserve arrayness!
+	%Z = getelementptr i32* %Y, i64 1		; <i32*> [#uses=1]
+	%A = load i32* %Z		; <i32> [#uses=1]
+	ret i32 %A
+}
diff --git a/final/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll b/final/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
new file mode 100644
index 00000000000..eb1c945e34e
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
@@ -0,0 +1,13 @@
+; Scalar replacement was incorrectly promoting this alloca!!
+;
+; RUN: opt < %s -scalarrepl -S | \
+; RUN:   sed {s/;.*//g} | grep {\\\[}
+
+define i8* @test() {
+	%A = alloca [30 x i8]		; <[30 x i8]*> [#uses=1]
+	%B = getelementptr [30 x i8]* %A, i64 0, i64 0		; <i8*> [#uses=2]
+	%C = getelementptr i8* %B, i64 1		; <i8*> [#uses=1]
+	store i8 0, i8* %B
+	ret i8* %C
+}
+
diff --git a/final/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll b/final/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
new file mode 100644
index 00000000000..24e6a3195f1
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -scalarrepl -S | grep {alloca %T}
+
+%T = type { [80 x i8], i32, i32 }
+declare i32 @.callback_1(i8*)
+
+declare void @.iter_2(i32 (i8*)*, i8*)
+
+define i32 @main() {
+	%d = alloca { [80 x i8], i32, i32 }		; <{ [80 x i8], i32, i32 }*> [#uses=2]
+	%tmp.0 = getelementptr { [80 x i8], i32, i32 }* %d, i64 0, i32 2		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp.0
+	%tmp.1 = getelementptr { [80 x i8], i32, i32 }* %d, i64 0, i32 0, i64 0		; <i8*> [#uses=1]
+	call void @.iter_2( i32 (i8*)* @.callback_1, i8* %tmp.1 )
+	ret i32 0
+}
+
diff --git a/final/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll b/final/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
new file mode 100644
index 00000000000..99c9fb9ef32
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -scalarrepl -S | not grep alloca
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+define i32 @func(<4 x float> %v0, <4 x float> %v1) nounwind {
+	%vsiidx = alloca [2 x <4 x i32>], align 16		; <[2 x <4 x i32>]*> [#uses=3]
+	%tmp = call <4 x i32> @llvm.x86.sse2.cvttps2dq( <4 x float> %v0 )		; <<4 x i32>> [#uses=2]
+	%tmp.upgrd.1 = bitcast <4 x i32> %tmp to <2 x i64>		; <<2 x i64>> [#uses=0]
+	%tmp.upgrd.2 = getelementptr [2 x <4 x i32>]* %vsiidx, i32 0, i32 0		; <<4 x i32>*> [#uses=1]
+	store <4 x i32> %tmp, <4 x i32>* %tmp.upgrd.2
+	%tmp10 = call <4 x i32> @llvm.x86.sse2.cvttps2dq( <4 x float> %v1 )		; <<4 x i32>> [#uses=2]
+	%tmp10.upgrd.3 = bitcast <4 x i32> %tmp10 to <2 x i64>		; <<2 x i64>> [#uses=0]
+	%tmp14 = getelementptr [2 x <4 x i32>]* %vsiidx, i32 0, i32 1		; <<4 x i32>*> [#uses=1]
+	store <4 x i32> %tmp10, <4 x i32>* %tmp14
+	%tmp15 = getelementptr [2 x <4 x i32>]* %vsiidx, i32 0, i32 0, i32 4		; <i32*> [#uses=1]
+	%tmp.upgrd.4 = load i32* %tmp15		; <i32> [#uses=1]
+	ret i32 %tmp.upgrd.4
+}
+
+declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
+
diff --git a/final/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll b/final/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll
new file mode 100644
index 00000000000..e67b6106cfa
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -scalarrepl -S | grep {alloca.*client_t}
+; PR1446
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-pc-linux-gnu"
+
+	%struct.clientSnapshot_t = type { i32, [32 x i8], %struct.playerState_t, i32, i32, i32, i32, i32 }
+	%struct.client_t = type { i32, [1024 x i8], [64 x [1024 x i8]], i32, i32, i32, i32, i32, i32, %struct.usercmd_t, i32, i32, [1024 x i8], %struct.sharedEntity_t*, [32 x i8], [64 x i8], i32, i32, i32, i32, i32, i32, [8 x i8*], [8 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, [32 x %struct.clientSnapshot_t], i32, i32, i32, i32, i32, %struct.netchan_t, %struct.netchan_buffer_t*, %struct.netchan_buffer_t**, i32, [1025 x i32] }
+	%struct.entityShared_t = type { %struct.entityState_t, i32, i32, i32, i32, i32, [3 x float], [3 x float], i32, [3 x float], [3 x float], [3 x float], [3 x float], i32 }
+	%struct.entityState_t = type { i32, i32, i32, %struct.trajectory_t, %struct.trajectory_t, i32, i32, [3 x float], [3 x float], [3 x float], [3 x float], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.msg_t = type { i32, i32, i32, i8*, i32, i32, i32, i32 }
+	%struct.netadr_t = type { i32, [4 x i8], [10 x i8], i16 }
+	%struct.netchan_buffer_t = type { %struct.msg_t, [16384 x i8], %struct.netchan_buffer_t* }
+	%struct.netchan_t = type { i32, i32, %struct.netadr_t, i32, i32, i32, i32, i32, [16384 x i8], i32, i32, i32, [16384 x i8] }
+	%struct.playerState_t = type { i32, i32, i32, i32, i32, [3 x float], [3 x float], i32, i32, i32, [3 x i32], i32, i32, i32, i32, i32, i32, [3 x float], i32, i32, [2 x i32], [2 x i32], i32, i32, i32, i32, i32, i32, [3 x float], i32, i32, i32, i32, i32, [16 x i32], [16 x i32], [16 x i32], [16 x i32], i32, i32, i32, i32, i32, i32, i32 }
+	%struct.sharedEntity_t = type { %struct.entityState_t, %struct.entityShared_t }
+	%struct.trajectory_t = type { i32, i32, i32, [3 x float], [3 x float] }
+	%struct.usercmd_t = type { i32, [3 x i32], i32, i8, i8, i8, i8 }
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+define void @SV_DirectConnect(i64 %from.0.0, i64 %from.0.1, i32 %from.1) {
+entry:
+	%temp = alloca %struct.client_t, align 16		; <%struct.client_t*> [#uses=1]
+	%temp586 = bitcast %struct.client_t* %temp to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* null, i8* %temp586, i32 121596, i32 0 )
+	unreachable
+}
diff --git a/final/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll b/final/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
new file mode 100644
index 00000000000..f1b8b80f8f2
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -scalarrepl -S | grep memcpy
+; PR1421
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+	%struct.LongestMember = type { i8, i32 }
+	%struct.MyString = type { i32 }
+	%struct.UnionType = type { %struct.LongestMember }
+
+define void @_Z4testP9UnionTypePS0_(%struct.UnionType* %p, %struct.UnionType** %pointerToUnion) {
+entry:
+	%tmp = alloca %struct.UnionType, align 8		; <%struct.UnionType*> [#uses=2]
+	%tmp2 = getelementptr %struct.UnionType* %tmp, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp13 = getelementptr %struct.UnionType* %p, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %tmp2, i8* %tmp13, i32 8, i32 0 )
+	%tmp5 = load %struct.UnionType** %pointerToUnion		; <%struct.UnionType*> [#uses=1]
+	%tmp56 = getelementptr %struct.UnionType* %tmp5, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
+	%tmp7 = getelementptr %struct.UnionType* %tmp, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %tmp56, i8* %tmp7, i32 8, i32 0 )
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/final/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll b/final/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
new file mode 100644
index 00000000000..81b6746345d
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -scalarrepl -S | not grep shr
+
+%struct.S = type { i16 }
+
+define i1 @f(i16 signext  %b) zeroext  {
+entry:
+	%b_addr = alloca i16		; <i16*> [#uses=2]
+	%retval = alloca i32		; <i32*> [#uses=2]
+	%s = alloca %struct.S		; <%struct.S*> [#uses=2]
+	%tmp = alloca i32		; <i32*> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i16 %b, i16* %b_addr
+	%tmp1 = getelementptr %struct.S* %s, i32 0, i32 0		; <i16*> [#uses=1]
+	%tmp2 = load i16* %b_addr, align 2		; <i16> [#uses=1]
+	store i16 %tmp2, i16* %tmp1, align 2
+	%tmp3 = getelementptr %struct.S* %s, i32 0, i32 0		; <i16*> [#uses=1]
+	%tmp34 = bitcast i16* %tmp3 to [2 x i1]*		; <[2 x i1]*> [#uses=1]
+	%tmp5 = getelementptr [2 x i1]* %tmp34, i32 0, i32 1		; <i1*> [#uses=1]
+	%tmp6 = load i1* %tmp5, align 1		; <i1> [#uses=1]
+	%tmp67 = zext i1 %tmp6 to i32		; <i32> [#uses=1]
+	store i32 %tmp67, i32* %tmp, align 4
+	%tmp8 = load i32* %tmp, align 4		; <i32> [#uses=1]
+	store i32 %tmp8, i32* %retval, align 4
+	br label %return
+
+return:		; preds = %entry
+	%retval9 = load i32* %retval		; <i32> [#uses=1]
+	%retval910 = trunc i32 %retval9 to i1		; <i1> [#uses=1]
+	ret i1 %retval910
+}
diff --git a/final/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll b/final/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
new file mode 100644
index 00000000000..d799bd77e45
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret i8 17}
+; rdar://5707076
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.1.0"
+	%struct.T = type <{ i8, [3 x i8] }>
+
+define i8 @f() {
+entry:
+	%s = alloca [1 x %struct.T], align 4		; <[1 x %struct.T]*> [#uses=2]
+	%T3 = bitcast [1 x %struct.T]* %s to i32*
+	store i32 -61184, i32* %T3
+
+	%tmp16 = getelementptr [1 x %struct.T]* %s, i32 0, i32 0		; <%struct.T*> [#uses=1]
+	%tmp17 = getelementptr %struct.T* %tmp16, i32 0, i32 1		; <[3 x i8]*> [#uses=1]
+	%tmp1718 = bitcast [3 x i8]* %tmp17 to i32*		; <i32*> [#uses=1]
+	%tmp19 = load i32* %tmp1718, align 4		; <i32> [#uses=1]
+	%mask = and i32 %tmp19, 16777215		; <i32> [#uses=2]
+	%mask2324 = trunc i32 %mask to i8		; <i8> [#uses=1]
+	ret i8 %mask2324
+}
+
diff --git a/final/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll b/final/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll
new file mode 100644
index 00000000000..7f8ef831052
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -scalarrepl -S | not grep alloca
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+	%struct..0anon = type { <1 x i64> }
+
+define i32 @main(i32 %argc, i8** %argv) {
+entry:
+	%c = alloca %struct..0anon		; <%struct..0anon*> [#uses=2]
+	%tmp2 = getelementptr %struct..0anon* %c, i32 0, i32 0		; <<1 x i64>*> [#uses=1]
+	store <1 x i64> zeroinitializer, <1 x i64>* %tmp2, align 8
+	%tmp7 = getelementptr %struct..0anon* %c, i32 0, i32 0		; <<1 x i64>*> [#uses=1]
+	%tmp78 = bitcast <1 x i64>* %tmp7 to [2 x i32]*		; <[2 x i32]*> [#uses=1]
+	%tmp9 = getelementptr [2 x i32]* %tmp78, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp10 = load i32* %tmp9, align 4		; <i32> [#uses=0]
+	unreachable
+}
diff --git a/final/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll b/final/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
new file mode 100644
index 00000000000..87a08b7eaaf
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
@@ -0,0 +1,33 @@
+; This test shows an alloca of a struct and an array that can be reduced to
+; multiple variables easily. However, the alloca is used by a store
+; instruction, which was not possible before aggregrates were first class
+; values. This checks of scalarrepl splits up the struct and array properly.
+
+; RUN: opt < %s -scalarrepl -S | not grep alloca
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+define i32 @foo() {
+	%target = alloca { i32, i32 }		; <{ i32, i32 }*> [#uses=1]
+        ; Build a first class struct to store
+	%res1 = insertvalue { i32, i32 } undef, i32 1, 0		; <{ i32, i32 }> [#uses=1]
+	%res2 = insertvalue { i32, i32 } %res1, i32 2, 1		; <{ i32, i32 }> [#uses=1]
+        ; And store it
+	store { i32, i32 } %res2, { i32, i32 }* %target
+        ; Actually use %target, so it doesn't get removed alltogether
+        %ptr = getelementptr { i32, i32 }* %target, i32 0, i32 0
+        %val = load i32* %ptr
+	ret i32 %val
+}
+
+define i32 @bar() {
+	%target = alloca [ 2 x i32 ]		; <{ i32, i32 }*> [#uses=1]
+        ; Build a first class array to store
+	%res1 = insertvalue [ 2 x i32 ] undef, i32 1, 0		; <{ i32, i32 }> [#uses=1]
+	%res2 = insertvalue [ 2 x i32 ] %res1, i32 2, 1		; <{ i32, i32 }> [#uses=1]
+        ; And store it
+	store [ 2 x i32 ] %res2, [ 2 x i32 ]* %target
+        ; Actually use %target, so it doesn't get removed alltogether
+        %ptr = getelementptr [ 2 x i32 ]* %target, i32 0, i32 0
+        %val = load i32* %ptr
+	ret i32 %val
+}
diff --git a/final/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll b/final/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
new file mode 100644
index 00000000000..b704727c11e
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -scalarrepl -S | grep {call.*mem} 
+; PR2369
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define void @memtest1(i8* %dst, i8* %src) nounwind  {
+entry:
+	%temp = alloca [200 x i8]		; <[100 x i8]*> [#uses=2]
+	%temp1 = bitcast [200 x i8]* %temp to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %temp1, i8* %src, i32 200, i32 1 )
+	%temp3 = bitcast [200 x i8]* %temp to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %dst, i8* %temp3, i32 200, i32 1 )
+	ret void
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
diff --git a/final/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll b/final/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
new file mode 100644
index 00000000000..1df01c1f4e3
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -scalarrepl -S | grep {s = alloca .struct.x}
+; PR2423
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+	%struct.x = type { [1 x i32], i32, i32 }
+
+define i32 @b() nounwind {
+entry:
+	%s = alloca %struct.x		; <%struct.x*> [#uses=2]
+	%r = alloca %struct.x		; <%struct.x*> [#uses=2]
+	call i32 @a( %struct.x* %s ) nounwind		; <i32>:0 [#uses=0]
+	%r1 = bitcast %struct.x* %r to i8*		; <i8*> [#uses=1]
+	%s2 = bitcast %struct.x* %s to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %r1, i8* %s2, i32 12, i32 8 )
+	getelementptr %struct.x* %r, i32 0, i32 0, i32 1		; <i32*>:1 [#uses=1]
+	load i32* %1, align 4		; <i32>:2 [#uses=1]
+	ret i32 %2
+}
+
+declare i32 @a(%struct.x*)
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
diff --git a/final/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll b/final/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
new file mode 100644
index 00000000000..e32e6835fc0
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
@@ -0,0 +1,25 @@
+; This test checks to see if scalarrepl also works when a gep with all zeroes is
+; used instead of a bitcast to prepare a memmove pointer argument. Previously,
+; this would not work when there was a vector involved in the struct, preventing
+; scalarrepl from removing the alloca below.
+
+; RUN: opt < %s -scalarrepl -S > %t
+; RUN: cat %t | not grep alloca
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+%struct.two = type <{ < 2 x i8 >, i16 }>
+
+define void @main(%struct.two* %D, i16 %V) {
+entry:
+	%S = alloca %struct.two
+        %S.2 = getelementptr %struct.two* %S, i32 0, i32 1
+        store i16 %V, i16* %S.2
+        ; This gep is effectively a bitcast to i8*, but is sometimes generated
+        ; because the type of the first element in %struct.two is i8.
+	%tmpS = getelementptr %struct.two* %S, i32 0, i32 0, i32 0 
+	%tmpD = bitcast %struct.two* %D to i8*
+        call void @llvm.memmove.i32(i8* %tmpD, i8* %tmpS, i32 4, i32 1)
+        ret void
+}
+
+declare void @llvm.memmove.i32(i8*, i8*, i32, i32) nounwind
diff --git a/final/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll b/final/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll
new file mode 100644
index 00000000000..9c0f2030c0e
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret i32 %x}
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+%pair = type { [1 x i32], i32 }
+
+define i32 @f(i32 %x, i32 %y) {
+       %instance = alloca %pair
+       %first = getelementptr %pair* %instance, i32 0, i32 0
+       %cast = bitcast [1 x i32]* %first to i32*
+       store i32 %x, i32* %cast
+       %second = getelementptr %pair* %instance, i32 0, i32 1
+       store i32 %y, i32* %second
+       %v = load i32* %cast
+       ret i32 %v
+}
diff --git a/final/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll b/final/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll
new file mode 100644
index 00000000000..f8ab875bac4
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -scalarrepl -instcombine -inline -instcombine -S | grep {ret i32 42}
+; PR3489
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "x86_64-apple-darwin10.0"
+	%struct.anon = type <{ i32, i32, i32 }>
+
+define i32 @f({ i64, i64 }) nounwind {
+entry:
+	%tmp = alloca { i64, i64 }, align 8		; <{ i64, i64 }*> [#uses=2]
+	store { i64, i64 } %0, { i64, i64 }* %tmp
+	%1 = bitcast { i64, i64 }* %tmp to %struct.anon*		; <%struct.anon*> [#uses=1]
+	%2 = load %struct.anon* %1, align 8		; <%struct.anon> [#uses=1]
+        %tmp3 = extractvalue %struct.anon %2, 0
+	ret i32 %tmp3
+}
+
+define i32 @g() {
+  %a = call i32 @f({i64,i64} { i64 42, i64 1123123123123123 })
+  ret i32 %a
+}
diff --git a/final/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll b/final/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
new file mode 100644
index 00000000000..526457be1ec
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
@@ -0,0 +1,20 @@
+; The store into %p should end up with a known alignment of 1, since the memcpy
+; is only known to access it with 1-byte alignment.
+; RUN: opt < %s -scalarrepl -S | grep {store i16 1, .*, align 1}
+; PR3720
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+        %struct.st = type { i16 }
+
+define void @f(i8* %p) nounwind {
+entry:
+        %s = alloca %struct.st, align 4  ; <%struct.st*> [#uses=2]
+        %0 = getelementptr %struct.st* %s, i32 0, i32 0  ; <i16*> [#uses=1]
+        store i16 1, i16* %0, align 4
+        %s1 = bitcast %struct.st* %s to i8*  ; <i8*> [#uses=1]
+        call void @llvm.memcpy.i32(i8* %p, i8* %s1, i32 2, i32 1)
+        ret void
+}
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+
diff --git a/final/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll b/final/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
new file mode 100644
index 00000000000..50e7f9a08e6
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
@@ -0,0 +1,184 @@
+; RUN: opt < %s -scalarrepl -disable-output -stats |& grep "Number of aggregates converted to scalar"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.6"
+	type { }		; type %0
+	type { i8*, i32, i32, i16, i16, %2, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %2, %3*, i32, [3 x i8], [1 x i8], %2, i32, i64 }		; type %1
+	type { i8*, i32 }		; type %2
+	type opaque		; type %3
+	type { i32 }		; type %4
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%llvm.dbg.basictype.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, i32 }
+	%llvm.dbg.compile_unit.type = type { i32, %0*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
+	%llvm.dbg.composite.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, %0*, %0*, i32 }
+	%llvm.dbg.derivedtype.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, %0* }
+	%llvm.dbg.subprogram.type = type { i32, %0*, %0*, i8*, i8*, i8*, %0*, i32, %0*, i1, i1 }
+	%llvm.dbg.subrange.type = type { i32, i64, i64 }
+	%llvm.dbg.variable.type = type { i32, %0*, i8*, %0*, i32, %0* }
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+internal constant [8 x i8] c"PR491.c\00", section "llvm.metadata"		; <[8 x i8]*>:0 [#uses=1]
+internal constant [77 x i8] c"/Volumes/Nanpura/mainline/llvm/projects/llvm-test/SingleSource/Regression/C/\00", section "llvm.metadata"		; <[77 x i8]*>:1 [#uses=1]
+internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5641) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*>:2 [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 1, i8* getelementptr ([8 x i8]* @0, i32 0, i32 0), i8* getelementptr ([77 x i8]* @1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @2, i32 0, i32 0), i1 true, i1 false, i8* null, i32 0 }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*>:3 [#uses=1]
+@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([4 x i8]* @3, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+internal constant [5 x i8] c"char\00", section "llvm.metadata"		; <[5 x i8]*>:4 [#uses=1]
+@llvm.dbg.basictype5 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([5 x i8]* @4, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 8, i64 8, i64 0, i32 0, i32 6 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458790, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 8, i64 8, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype5 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.derivedtype6 = internal constant %llvm.dbg.derivedtype.type { i32 458767, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+internal constant [13 x i8] c"unsigned int\00", section "llvm.metadata"		; <[13 x i8]*>:5 [#uses=1]
+@llvm.dbg.basictype8 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([13 x i8]* @5, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 7 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@llvm.dbg.array = internal constant [3 x %0*] [%0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype6 to %0*), %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype8 to %0*)], section "llvm.metadata"		; <[3 x %0*]*> [#uses=1]
+@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([3 x %0*]* @llvm.dbg.array to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+internal constant [12 x i8] c"assert_fail\00", section "llvm.metadata"		; <[12 x i8]*>:6 [#uses=1]
+@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([12 x i8]* @6, i32 0, i32 0), i8* getelementptr ([12 x i8]* @6, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 4, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to %0*), i1 true, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=0]
+internal constant [2 x i8] c"l\00", section "llvm.metadata"		; <[2 x i8]*>:7 [#uses=1]
+@__stderrp = external global %1*		; <%1**> [#uses=4]
+internal constant [35 x i8] c"assertion failed in line %u: '%s'\0A\00", section "__TEXT,__cstring,cstring_literals"		; <[35 x i8]*>:8 [#uses=1]
+@llvm.dbg.array13 = internal constant [2 x %0*] [%0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*), %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*)], section "llvm.metadata"		; <[2 x %0*]*> [#uses=1]
+@llvm.dbg.composite14 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([2 x %0*]* @llvm.dbg.array13 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+internal constant [5 x i8] c"test\00", section "llvm.metadata"		; <[5 x i8]*>:9 [#uses=1]
+@llvm.dbg.subprogram16 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([5 x i8]* @9, i32 0, i32 0), i8* getelementptr ([5 x i8]* @9, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 10, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite14 to %0*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+internal constant [9 x i8] c"long int\00", section "llvm.metadata"		; <[9 x i8]*>:10 [#uses=1]
+@llvm.dbg.basictype21 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([9 x i8]* @10, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@llvm.dbg.derivedtype22 = internal constant %llvm.dbg.derivedtype.type { i32 458765, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*), i8* getelementptr ([2 x i8]* @7, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 20, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype21 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.subrange = internal constant %llvm.dbg.subrange.type { i32 458785, i64 0, i64 3 }, section "llvm.metadata"		; <%llvm.dbg.subrange.type*> [#uses=1]
+@llvm.dbg.array23 = internal constant [1 x %0*] [%0* bitcast (%llvm.dbg.subrange.type* @llvm.dbg.subrange to %0*)], section "llvm.metadata"		; <[1 x %0*]*> [#uses=1]
+internal constant [14 x i8] c"unsigned char\00", section "llvm.metadata"		; <[14 x i8]*>:11 [#uses=1]
+@llvm.dbg.basictype25 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([14 x i8]* @11, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 8, i64 8, i64 0, i32 0, i32 8 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@llvm.dbg.composite26 = internal constant %llvm.dbg.composite.type { i32 458753, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 8, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype25 to %0*), %0* bitcast ([1 x %0*]* @llvm.dbg.array23 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+internal constant [2 x i8] c"c\00", section "llvm.metadata"		; <[2 x i8]*>:12 [#uses=1]
+@llvm.dbg.derivedtype28 = internal constant %llvm.dbg.derivedtype.type { i32 458765, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*), i8* getelementptr ([2 x i8]* @12, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 20, i64 32, i64 8, i64 0, i32 0, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite26 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.array29 = internal constant [2 x %0*] [%0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype22 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype28 to %0*)], section "llvm.metadata"		; <[2 x %0*]*> [#uses=1]
+@llvm.dbg.composite30 = internal constant %llvm.dbg.composite.type { i32 458775, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 20, i64 32, i64 32, i64 0, i32 0, %0* null, %0* bitcast ([2 x %0*]* @llvm.dbg.array29 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+internal constant [2 x i8] c"u\00", section "llvm.metadata"		; <[2 x i8]*>:13 [#uses=1]
+@llvm.dbg.variable32 = internal constant %llvm.dbg.variable.type { i32 459008, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*), i8* getelementptr ([2 x i8]* @13, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 20, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite30 to %0*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
+internal constant [11 x i8] c"u.l == 128\00", section "__TEXT,__cstring,cstring_literals"		; <[11 x i8]*>:14 [#uses=1]
+internal constant [8 x i8] c"u.l < 0\00", section "__TEXT,__cstring,cstring_literals"		; <[8 x i8]*>:15 [#uses=1]
+@llvm.dbg.array35 = internal constant [1 x %0*] [%0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*)], section "llvm.metadata"		; <[1 x %0*]*> [#uses=1]
+@llvm.dbg.composite36 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([1 x %0*]* @llvm.dbg.array35 to %0*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+internal constant [5 x i8] c"main\00", section "llvm.metadata"		; <[5 x i8]*>:16 [#uses=1]
+@llvm.dbg.subprogram38 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([5 x i8]* @16, i32 0, i32 0), i8* getelementptr ([5 x i8]* @16, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 28, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite36 to %0*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+
+declare void @llvm.dbg.func.start(%0*) nounwind readnone
+
+declare void @llvm.dbg.declare(%0*, %0*) nounwind readnone
+
+declare void @llvm.dbg.stoppoint(i32, i32, %0*) nounwind readnone
+
+declare i32 @fprintf(%1* nocapture, i8* nocapture, ...) nounwind
+
+declare void @llvm.dbg.region.end(%0*) nounwind readnone
+
+define i32 @test(i32) nounwind {
+; <label>:1
+	%2 = alloca %4, align 8		; <%4*> [#uses=7]
+	call void @llvm.dbg.func.start(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*))
+	%3 = bitcast %4* %2 to %0*		; <%0*> [#uses=1]
+	call void @llvm.dbg.declare(%0* %3, %0* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable32 to %0*))
+	call void @llvm.dbg.stoppoint(i32 21, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
+	%4 = getelementptr %4* %2, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %4, align 8
+	%5 = bitcast %4* %2 to i8*		; <i8*> [#uses=1]
+	store i8 -128, i8* %5, align 8
+	call void @llvm.dbg.stoppoint(i32 22, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
+	%6 = getelementptr %4* %2, i32 0, i32 0		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 8		; <i32> [#uses=1]
+	%8 = icmp eq i32 %7, 128		; <i1> [#uses=1]
+	br i1 %8, label %12, label %9
+
+; <label>:9		; preds = %1
+	call void @llvm.dbg.stoppoint(i32 5, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
+	%10 = load %1** @__stderrp, align 4		; <%1*> [#uses=1]
+	%11 = call i32 (%1*, i8*, ...)* @fprintf(%1* %10, i8* getelementptr ([35 x i8]* @8, i32 0, i32 0), i32 22, i8* getelementptr ([11 x i8]* @14, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	call void @llvm.dbg.stoppoint(i32 6, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
+	br label %12
+
+; <label>:12		; preds = %9, %1
+	%.0 = phi i32 [ 0, %9 ], [ 1, %1 ]		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 22, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
+	%13 = and i32 %.0, %0		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 23, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
+	%14 = getelementptr %4* %2, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %14, align 8
+	%15 = bitcast %4* %2 to [4 x i8]*		; <[4 x i8]*> [#uses=1]
+	%16 = getelementptr [4 x i8]* %15, i32 0, i32 3		; <i8*> [#uses=1]
+	store i8 -128, i8* %16, align 1
+	call void @llvm.dbg.stoppoint(i32 24, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
+	%17 = getelementptr %4* %2, i32 0, i32 0		; <i32*> [#uses=1]
+	%18 = load i32* %17, align 8		; <i32> [#uses=1]
+	%19 = icmp slt i32 %18, 0		; <i1> [#uses=1]
+	br i1 %19, label %23, label %20
+
+; <label>:20		; preds = %12
+	call void @llvm.dbg.stoppoint(i32 5, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
+	%21 = load %1** @__stderrp, align 4		; <%1*> [#uses=1]
+	%22 = call i32 (%1*, i8*, ...)* @fprintf(%1* %21, i8* getelementptr ([35 x i8]* @8, i32 0, i32 0), i32 24, i8* getelementptr ([8 x i8]* @15, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	call void @llvm.dbg.stoppoint(i32 6, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
+	br label %23
+
+; <label>:23		; preds = %20, %12
+	%.01 = phi i32 [ 0, %20 ], [ 1, %12 ]		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 24, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
+	%24 = and i32 %.01, %13		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 25, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
+	call void @llvm.dbg.region.end(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to %0*))
+	ret i32 %24
+}
+
+define i32 @main() nounwind {
+; <label>:0
+	%1 = alloca %4, align 8		; <%4*> [#uses=7]
+	call void @llvm.dbg.func.start(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram38 to %0*))
+	call void @llvm.dbg.stoppoint(i32 29, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
+	%2 = bitcast %4* %1 to %0*		; <%0*> [#uses=1]
+	call void @llvm.dbg.declare(%0* %2, %0* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable32 to %0*)) nounwind
+	call void @llvm.dbg.stoppoint(i32 21, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
+	%3 = getelementptr %4* %1, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %3, align 8
+	%4 = bitcast %4* %1 to i8*		; <i8*> [#uses=1]
+	store i8 -128, i8* %4, align 8
+	call void @llvm.dbg.stoppoint(i32 22, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
+	%5 = getelementptr %4* %1, i32 0, i32 0		; <i32*> [#uses=1]
+	%6 = load i32* %5, align 8		; <i32> [#uses=1]
+	%7 = icmp eq i32 %6, 128		; <i1> [#uses=1]
+	br i1 %7, label %11, label %8
+
+; <label>:8		; preds = %0
+	call void @llvm.dbg.stoppoint(i32 5, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
+	%9 = load %1** @__stderrp, align 4		; <%1*> [#uses=1]
+	%10 = call i32 (%1*, i8*, ...)* @fprintf(%1* %9, i8* getelementptr ([35 x i8]* @8, i32 0, i32 0), i32 22, i8* getelementptr ([11 x i8]* @14, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	call void @llvm.dbg.stoppoint(i32 6, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
+	br label %11
+
+; <label>:11		; preds = %8, %0
+	%.0.i = phi i32 [ 0, %8 ], [ 1, %0 ]		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 23, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
+	%12 = getelementptr %4* %1, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 0, i32* %12, align 8
+	%13 = bitcast %4* %1 to [4 x i8]*		; <[4 x i8]*> [#uses=1]
+	%14 = getelementptr [4 x i8]* %13, i32 0, i32 3		; <i8*> [#uses=1]
+	store i8 -128, i8* %14, align 1
+	call void @llvm.dbg.stoppoint(i32 24, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
+	%15 = getelementptr %4* %1, i32 0, i32 0		; <i32*> [#uses=1]
+	%16 = load i32* %15, align 8		; <i32> [#uses=1]
+	%17 = icmp slt i32 %16, 0		; <i1> [#uses=1]
+	br i1 %17, label %test.exit, label %18
+
+; <label>:18		; preds = %11
+	call void @llvm.dbg.stoppoint(i32 5, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
+	%19 = load %1** @__stderrp, align 4		; <%1*> [#uses=1]
+	%20 = call i32 (%1*, i8*, ...)* @fprintf(%1* %19, i8* getelementptr ([35 x i8]* @8, i32 0, i32 0), i32 24, i8* getelementptr ([8 x i8]* @15, i32 0, i32 0)) nounwind		; <i32> [#uses=0]
+	call void @llvm.dbg.stoppoint(i32 6, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
+	br label %test.exit
+
+test.exit:		; preds = %18, %11
+	%.01.i = phi i32 [ 0, %18 ], [ 1, %11 ]		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 24, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
+	%21 = and i32 %.01.i, %.0.i		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 25, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*)) nounwind
+	%tmp = xor i32 %21, 1		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 29, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*))
+	call void @llvm.dbg.region.end(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram38 to %0*))
+	ret i32 %tmp
+}
diff --git a/final/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll b/final/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
new file mode 100644
index 00000000000..31d9bae6be2
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
@@ -0,0 +1,89 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+; Radar 7441282
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
+%struct.int16x8_t = type { <8 x i16> }
+%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] }
+%union..0anon = type { %struct.int16x8x2_t }
+
+define void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind {
+; CHECK: @test
+; CHECK-NOT: alloca
+; CHECK: "alloca point"
+entry:
+  %tmp_addr = alloca %struct.int16x8_t            ; <%struct.int16x8_t*> [#uses=3]
+  %dst_addr = alloca %struct.int16x8x2_t*         ; <%struct.int16x8x2_t**> [#uses=2]
+  %__rv = alloca %union..0anon                    ; <%union..0anon*> [#uses=2]
+  %__bx = alloca %struct.int16x8_t                ; <%struct.int16x8_t*> [#uses=2]
+  %__ax = alloca %struct.int16x8_t                ; <%struct.int16x8_t*> [#uses=2]
+  %tmp2 = alloca %struct.int16x8x2_t              ; <%struct.int16x8x2_t*> [#uses=2]
+  %0 = alloca %struct.int16x8x2_t                 ; <%struct.int16x8x2_t*> [#uses=2]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  store <8 x i16> %tmp.0, <8 x i16>* %1
+  store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr
+  %2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %4 = load <8 x i16>* %3, align 16               ; <<8 x i16>> [#uses=1]
+  store <8 x i16> %4, <8 x i16>* %2, align 16
+  %5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %7 = load <8 x i16>* %6, align 16               ; <<8 x i16>> [#uses=1]
+  store <8 x i16> %7, <8 x i16>* %5, align 16
+  %8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %9 = load <8 x i16>* %8, align 16               ; <<8 x i16>> [#uses=2]
+  %10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %11 = load <8 x i16>* %10, align 16             ; <<8 x i16>> [#uses=2]
+  %12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1]
+  %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t* ; <%struct.__neon_int16x8x2_t*> [#uses=2]
+  %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> ; <<8 x i16>> [#uses=1]
+  %15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  store <8 x i16> %14, <8 x i16>* %15
+  %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> ; <<8 x i16>> [#uses=1]
+  %17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1 ; <<8 x i16>*> [#uses=1]
+  store <8 x i16> %16, <8 x i16>* %17
+  %18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1]
+  %19 = bitcast %struct.int16x8x2_t* %0 to i8*    ; <i8*> [#uses=1]
+  %20 = bitcast %struct.int16x8x2_t* %18 to i8*   ; <i8*> [#uses=1]
+  call void @llvm.memcpy.i32(i8* %19, i8* %20, i32 32, i32 16)
+  %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; <i8*> [#uses=1]
+  %21 = bitcast %struct.int16x8x2_t* %0 to i8*    ; <i8*> [#uses=1]
+  call void @llvm.memcpy.i32(i8* %tmp21, i8* %21, i32 32, i32 16)
+  %22 = load %struct.int16x8x2_t** %dst_addr, align 4 ; <%struct.int16x8x2_t*> [#uses=1]
+  %23 = bitcast %struct.int16x8x2_t* %22 to i8*   ; <i8*> [#uses=1]
+  %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; <i8*> [#uses=1]
+  call void @llvm.memcpy.i32(i8* %23, i8* %tmp22, i32 32, i32 16)
+  br label %return
+
+; CHECK: store <8 x i16>
+; CHECK: store <8 x i16>
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+; Radar 7466574
+%struct._NSRange = type { i64 }
+
+define void @test_memcpy_self() nounwind {
+; CHECK: @test_memcpy_self
+; CHECK-NOT: alloca
+; CHECK: br i1
+entry:
+  %range = alloca %struct._NSRange                ; <%struct._NSRange*> [#uses=2]
+  br i1 undef, label %cond.true, label %cond.false
+
+cond.true:                                        ; preds = %entry
+  %tmp3 = bitcast %struct._NSRange* %range to i8* ; <i8*> [#uses=1]
+  %tmp4 = bitcast %struct._NSRange* %range to i8* ; <i8*> [#uses=1]
+  call void @llvm.memcpy.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8)
+  ret void
+
+cond.false:                                       ; preds = %entry
+  ret void
+}
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
diff --git a/final/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll b/final/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
new file mode 100644
index 00000000000..3aee399f1c2
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+; Radar 7552893
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+%struct.test = type { [3 x double ] }
+
+define void @test_memcpy_self() nounwind {
+; CHECK: @test_memcpy_self
+; CHECK-NOT: alloca
+; CHECK: ret void
+  %1 = alloca %struct.test
+  %2 = bitcast %struct.test* %1 to i8*
+  call void @llvm.memcpy.i32(i8* %2, i8* %2, i32 24, i32 4)
+  ret void
+}
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
diff --git a/final/test/Transforms/ScalarRepl/AggregatePromote.ll b/final/test/Transforms/ScalarRepl/AggregatePromote.ll
new file mode 100644
index 00000000000..16b327356bb
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/AggregatePromote.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -scalarrepl -S | \
+; RUN:   not grep alloca
+
+target datalayout = "E-p:32:32"
+target triple = "powerpc-apple-darwin8.0.0"
+
+define i64 @test1(i64 %X) {
+	%A = alloca i64		; <i64*> [#uses=3]
+	store i64 %X, i64* %A
+	%B = bitcast i64* %A to i32*		; <i32*> [#uses=1]
+	%C = bitcast i32* %B to i8*		; <i8*> [#uses=1]
+	store i8 0, i8* %C
+	%Y = load i64* %A		; <i64> [#uses=1]
+	ret i64 %Y
+}
+
+define i8 @test2(i64 %X) {
+	%X_addr = alloca i64		; <i64*> [#uses=2]
+	store i64 %X, i64* %X_addr
+	%tmp.0 = bitcast i64* %X_addr to i32*		; <i32*> [#uses=1]
+	%tmp.1 = getelementptr i32* %tmp.0, i32 1		; <i32*> [#uses=1]
+	%tmp.2 = bitcast i32* %tmp.1 to i8*		; <i8*> [#uses=1]
+	%tmp.3 = getelementptr i8* %tmp.2, i32 3		; <i8*> [#uses=1]
+	%tmp.2.upgrd.1 = load i8* %tmp.3		; <i8> [#uses=1]
+	ret i8 %tmp.2.upgrd.1
+}
+
+define i16 @crafty(i64 %X) {
+	%a = alloca { i64 }		; <{ i64 }*> [#uses=2]
+	%tmp.0 = getelementptr { i64 }* %a, i32 0, i32 0		; <i64*> [#uses=1]
+	store i64 %X, i64* %tmp.0
+	%tmp.3 = bitcast { i64 }* %a to [4 x i16]*		; <[4 x i16]*> [#uses=2]
+	%tmp.4 = getelementptr [4 x i16]* %tmp.3, i32 0, i32 3		; <i16*> [#uses=1]
+	%tmp.5 = load i16* %tmp.4		; <i16> [#uses=1]
+	%tmp.8 = getelementptr [4 x i16]* %tmp.3, i32 0, i32 2		; <i16*> [#uses=1]
+	%tmp.9 = load i16* %tmp.8		; <i16> [#uses=1]
+	%tmp.10 = or i16 %tmp.9, %tmp.5		; <i16> [#uses=1]
+	ret i16 %tmp.10
+}
+
+define i16 @crafty2(i64 %X) {
+	%a = alloca i64		; <i64*> [#uses=2]
+	store i64 %X, i64* %a
+	%tmp.3 = bitcast i64* %a to [4 x i16]*		; <[4 x i16]*> [#uses=2]
+	%tmp.4 = getelementptr [4 x i16]* %tmp.3, i32 0, i32 3		; <i16*> [#uses=1]
+	%tmp.5 = load i16* %tmp.4		; <i16> [#uses=1]
+	%tmp.8 = getelementptr [4 x i16]* %tmp.3, i32 0, i32 2		; <i16*> [#uses=1]
+	%tmp.9 = load i16* %tmp.8		; <i16> [#uses=1]
+	%tmp.10 = or i16 %tmp.9, %tmp.5		; <i16> [#uses=1]
+	ret i16 %tmp.10
+}
diff --git a/final/test/Transforms/ScalarRepl/DifferingTypes.ll b/final/test/Transforms/ScalarRepl/DifferingTypes.ll
new file mode 100644
index 00000000000..933c47f7987
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/DifferingTypes.ll
@@ -0,0 +1,16 @@
+; This is a feature test.  Hopefully one day this will be implemented.  The 
+; generated code should perform the appropriate masking operations required 
+; depending on the endianness of the target...
+; RUN: opt < %s -scalarrepl -S | \
+; RUN:   not grep alloca
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+define i32 @testfunc(i32 %i, i8 %j) {
+	%I = alloca i32		; <i32*> [#uses=3]
+	store i32 %i, i32* %I
+	%P = bitcast i32* %I to i8*		; <i8*> [#uses=1]
+	store i8 %j, i8* %P
+	%t = load i32* %I		; <i32> [#uses=1]
+	ret i32 %t
+}
+
diff --git a/final/test/Transforms/ScalarRepl/address-space.ll b/final/test/Transforms/ScalarRepl/address-space.ll
new file mode 100644
index 00000000000..318d4e75906
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/address-space.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -scalarrepl < %s | FileCheck %s
+; PR7437 - Make sure SROA preserves address space of memcpy when
+; hacking on it.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10"
+
+%struct.anon = type { [1 x float] }
+
+; CHECK: define void @Test(
+; CHECK: load float addrspace(2)*
+; CHECK-NEXT: fsub float
+; CHECK: store float {{.*}}, float addrspace(2)* 
+define void @Test(%struct.anon addrspace(2)* %pPtr) nounwind {
+entry:
+  %s = alloca %struct.anon, align 4               ; <%struct.anon*> [#uses=3]
+  %arrayidx = getelementptr inbounds %struct.anon addrspace(2)* %pPtr, i64 0 ; <%struct.anon addrspace(2)*> [#uses=1]
+  %tmp1 = bitcast %struct.anon* %s to i8*         ; <i8*> [#uses=1]
+  %tmp2 = bitcast %struct.anon addrspace(2)* %arrayidx to i8 addrspace(2)* ; <i8 addrspace(2)*> [#uses=1]
+  call void @llvm.memcpy.p0i8.p2i8.i64(i8* %tmp1, i8 addrspace(2)* %tmp2, i64 4, i32 4, i1 false)
+  %tmp3 = getelementptr inbounds %struct.anon* %s, i32 0, i32 0 ; <[1 x float]*> [#uses=1]
+  %arrayidx4 = getelementptr inbounds [1 x float]* %tmp3, i32 0, i64 0 ; <float*> [#uses=2]
+  %tmp5 = load float* %arrayidx4                  ; <float> [#uses=1]
+  %sub = fsub float %tmp5, 5.000000e+00           ; <float> [#uses=1]
+  store float %sub, float* %arrayidx4
+  %arrayidx7 = getelementptr inbounds %struct.anon addrspace(2)* %pPtr, i64 0 ; <%struct.anon addrspace(2)*> [#uses=1]
+  %tmp8 = bitcast %struct.anon addrspace(2)* %arrayidx7 to i8 addrspace(2)* ; <i8 addrspace(2)*> [#uses=1]
+  %tmp9 = bitcast %struct.anon* %s to i8*         ; <i8*> [#uses=1]
+  call void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* %tmp8, i8* %tmp9, i64 4, i32 4, i1 false)
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p2i8.i64(i8* nocapture, i8 addrspace(2)* nocapture, i64, i32, i1) nounwind
+
+declare void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
diff --git a/final/test/Transforms/ScalarRepl/arraytest.ll b/final/test/Transforms/ScalarRepl/arraytest.ll
new file mode 100644
index 00000000000..06a928c6d82
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/arraytest.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -scalarrepl -mem2reg -S | not grep alloca
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+define i32 @test() {
+	%X = alloca [4 x i32]		; <[4 x i32]*> [#uses=1]
+	%Y = getelementptr [4 x i32]* %X, i64 0, i64 0		; <i32*> [#uses=2]
+	store i32 0, i32* %Y
+	%Z = load i32* %Y		; <i32> [#uses=1]
+	ret i32 %Z
+}
+
diff --git a/final/test/Transforms/ScalarRepl/badarray.ll b/final/test/Transforms/ScalarRepl/badarray.ll
new file mode 100644
index 00000000000..3ec3c01b283
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/badarray.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+
+; PR3466
+; Off end of array, don't transform.
+define i32 @test1() {
+; CHECK: @test1
+; CHECK-NOT: = alloca
+	%X = alloca [4 x i32]
+	%Y = getelementptr [4 x i32]* %X, i64 0, i64 6		; <i32*> [#uses=2]
+	store i32 0, i32* %Y
+	%Z = load i32* %Y		; <i32> [#uses=1]
+	ret i32 %Z
+}
+
+
+; Off end of array, don't transform.
+define i32 @test2() nounwind {
+entry:
+; CHECK: @test2
+; CHECK-NOT: = alloca
+        %yx2.i = alloca float, align 4          ; <float*> [#uses=1]            
+        %yx26.i = bitcast float* %yx2.i to i64*         ; <i64*> [#uses=1]      
+        %0 = load i64* %yx26.i, align 8         ; <i64> [#uses=0]               
+        unreachable
+}
+
+%base = type { i32, [0 x i8] }
+%padded = type { %base, [1 x i32] }
+
+; PR5436
+define void @test3() {
+entry:
+; CHECK: @test3
+; CHECK-NOT: = alloca
+; CHECK: store i64
+  %var_1 = alloca %padded, align 8                ; <%padded*> [#uses=3]
+  %0 = getelementptr inbounds %padded* %var_1, i32 0, i32 0 ; <%base*> [#uses=2]
+  
+  %p2 = getelementptr inbounds %base* %0, i32 0, i32 1, i32 0 ; <i8*> [#uses=1]
+  store i8 72, i8* %p2, align 1
+  
+  ; 72 -> a[0].
+
+  %callret = call %padded *@test3f() ; <i32> [#uses=2]
+  %callretcast = bitcast %padded* %callret to i8*                     ; <i8*> [#uses=1]
+  %var_11 = bitcast %padded* %var_1 to i8*        ; <i8*> [#uses=1]
+  call void @llvm.memcpy.i32(i8* %callretcast, i8* %var_11, i32 8, i32 4)
+  ret void
+}
+
+declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+
+declare %padded* @test3f()
diff --git a/final/test/Transforms/ScalarRepl/basictest.ll b/final/test/Transforms/ScalarRepl/basictest.ll
new file mode 100644
index 00000000000..9676873c30c
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/basictest.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+define i32 @test1() {
+	%X = alloca { i32, float }		; <{ i32, float }*> [#uses=1]
+	%Y = getelementptr { i32, float }* %X, i64 0, i32 0		; <i32*> [#uses=2]
+	store i32 0, i32* %Y
+	%Z = load i32* %Y		; <i32> [#uses=1]
+	ret i32 %Z
+; CHECK: @test1
+; CHECK-NOT: alloca
+; CHECK: ret i32 0
+}
+
+; PR8980
+define i64 @test2(i64 %X) {
+	%A = alloca [8 x i8]
+        %B = bitcast [8 x i8]* %A to i64*
+        
+	store i64 %X, i64* %B
+        br label %L2
+        
+L2:
+	%Z = load i64* %B		; <i32> [#uses=1]
+	ret i64 %Z
+; CHECK: @test2
+; CHECK-NOT: alloca
+; CHECK: ret i64 %X
+}
+
diff --git a/final/test/Transforms/ScalarRepl/bitfield-sroa.ll b/final/test/Transforms/ScalarRepl/bitfield-sroa.ll
new file mode 100644
index 00000000000..3728658caae
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/bitfield-sroa.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -scalarrepl -S | not grep alloca        
+; rdar://6532315
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+%t = type { { i32, i16, i8, i8 } }
+
+define i8 @foo(i64 %A) {
+        %ALL = alloca %t, align 8 
+        %tmp59172 = bitcast %t* %ALL to i64*
+        store i64 %A, i64* %tmp59172, align 8
+        %C = getelementptr %t* %ALL, i32 0, i32 0, i32 1             
+        %D = bitcast i16* %C to i32*    
+        %E = load i32* %D, align 4     
+        %F = bitcast %t* %ALL to i8* 
+        %G = load i8* %F, align 8 
+	ret i8 %G
+}
+
diff --git a/final/test/Transforms/ScalarRepl/copy-aggregate.ll b/final/test/Transforms/ScalarRepl/copy-aggregate.ll
new file mode 100644
index 00000000000..997da4bdb2f
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/copy-aggregate.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+; PR3290
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+;; Store of integer to whole alloca struct.
+define i32 @test1(i64 %V) nounwind {
+; CHECK: test1
+; CHECK-NOT: alloca
+	%X = alloca {{i32, i32}}
+	%Y = bitcast {{i32,i32}}* %X to i64*
+	store i64 %V, i64* %Y
+
+	%A = getelementptr {{i32,i32}}* %X, i32 0, i32 0, i32 0
+	%B = getelementptr {{i32,i32}}* %X, i32 0, i32 0, i32 1
+	%a = load i32* %A
+	%b = load i32* %B
+	%c = add i32 %a, %b
+	ret i32 %c
+}
+
+;; Store of integer to whole struct/array alloca.
+define float @test2(i128 %V) nounwind {
+; CHECK: test2
+; CHECK-NOT: alloca
+	%X = alloca {[4 x float]}
+	%Y = bitcast {[4 x float]}* %X to i128*
+	store i128 %V, i128* %Y
+
+	%A = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 0
+	%B = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 3
+	%a = load float* %A
+	%b = load float* %B
+	%c = fadd float %a, %b
+	ret float %c
+}
+
+;; Load of whole alloca struct as integer
+define i64 @test3(i32 %a, i32 %b) nounwind {
+; CHECK: test3
+; CHECK-NOT: alloca
+	%X = alloca {{i32, i32}}
+
+	%A = getelementptr {{i32,i32}}* %X, i32 0, i32 0, i32 0
+	%B = getelementptr {{i32,i32}}* %X, i32 0, i32 0, i32 1
+        store i32 %a, i32* %A
+        store i32 %b, i32* %B
+
+	%Y = bitcast {{i32,i32}}* %X to i64*
+        %Z = load i64* %Y
+	ret i64 %Z
+}
+
+;; load of integer from whole struct/array alloca.
+define i128 @test4(float %a, float %b) nounwind {
+; CHECK: test4
+; CHECK-NOT: alloca
+	%X = alloca {[4 x float]}
+	%A = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 0
+	%B = getelementptr {[4 x float]}* %X, i32 0, i32 0, i32 3
+	store float %a, float* %A
+	store float %b, float* %B
+        
+      	%Y = bitcast {[4 x float]}* %X to i128*
+	%V = load i128* %Y
+	ret i128 %V
+}
+
+;; If the elements of a struct or array alloca contain padding, SROA can still
+;; split up the alloca as long as there is no padding between the elements.
+%padded = type { i16, i8 }
+%arr = type [4 x %padded]
+define void @test5(%arr* %p, %arr* %q) {
+entry:
+; CHECK: test5
+; CHECK-NOT: i128
+  %var = alloca %arr, align 4
+  %vari8 = bitcast %arr* %var to i8*
+  %pi8 = bitcast %arr* %p to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %vari8, i8* %pi8, i32 16, i32 4, i1 false)
+  %qi8 = bitcast %arr* %q to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %qi8, i8* %vari8, i32 16, i32 4, i1 false)
+  ret void
+}
+
+;; Check that an array alloca can be split up when it is also accessed with
+;; a load or store as a homogeneous structure with the same element type and
+;; number of elements as the array.
+%homogeneous = type { <8 x i16>, <8 x i16>, <8 x i16> }
+%wrapped_array = type { [3 x <8 x i16>] }
+define void @test6(i8* %p, %wrapped_array* %arr) {
+entry:
+; CHECK: test6
+; CHECK: store <8 x i16>
+; CHECK: store <8 x i16>
+; CHECK: store <8 x i16>
+  %var = alloca %wrapped_array, align 16
+  %res = call %homogeneous @test6callee(i8* %p)
+  %varcast = bitcast %wrapped_array* %var to %homogeneous*
+  store %homogeneous %res, %homogeneous* %varcast
+  %tmp1 = bitcast %wrapped_array* %arr to i8*
+  %tmp2 = bitcast %wrapped_array* %var to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %tmp2, i32 48, i32 16, i1 false)
+  ret void
+}
+
+declare %homogeneous @test6callee(i8* nocapture) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/final/test/Transforms/ScalarRepl/crash.ll b/final/test/Transforms/ScalarRepl/crash.ll
new file mode 100644
index 00000000000..7b62f09e86d
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/crash.ll
@@ -0,0 +1,260 @@
+; RUN: opt -scalarrepl %s -disable-output
+; RUN: opt -scalarrepl-ssa %s -disable-output
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; PR9017
+define void @test1() nounwind readnone ssp {
+entry:
+  %l_72 = alloca i32*, align 8
+  unreachable
+
+for.cond:                                         ; preds = %for.cond
+  %tmp1.i = load i32** %l_72, align 8
+  store i32* %tmp1.i, i32** %l_72, align 8
+  br label %for.cond
+
+if.end:                                           ; No predecessors!
+  ret void
+}
+
+
+define void @test2() {
+  %E = alloca { { i32, float, double, i64 }, { i32, float, double, i64 } }        ; <{ { i32, float, double, i64 }, { i32, float, double, i64 } }*> [#uses=1]
+  %tmp.151 = getelementptr { { i32, float, double, i64 }, { i32, float, double, i64 } }* %E, i64 0, i32 1, i32 3          ; <i64*> [#uses=0]
+  ret void
+}
+
+define i32 @test3() {
+        %X = alloca { [4 x i32] }               ; <{ [4 x i32] }*> [#uses=1]
+        %Y = getelementptr { [4 x i32] }* %X, i64 0, i32 0, i64 2               ; <i32*> [#uses=2]
+        store i32 4, i32* %Y
+        %Z = load i32* %Y               ; <i32> [#uses=1]
+        ret i32 %Z
+}
+
+
+%struct.rtx_def = type { [2 x i8], i32, [1 x %union.rtunion_def] }
+%union.rtunion_def = type { i32 }
+
+define void @test4() {
+entry:
+        %c_addr.i = alloca i8           ; <i8*> [#uses=1]
+        switch i32 0, label %return [
+                 i32 36, label %label.7
+                 i32 34, label %label.7
+                 i32 41, label %label.5
+        ]
+label.5:                ; preds = %entry
+        ret void
+label.7:                ; preds = %entry, %entry
+        br i1 false, label %then.4, label %switchexit.0
+then.4:         ; preds = %label.7
+        %tmp.0.i = bitcast i8* %c_addr.i to i32*                ; <i32*> [#uses=1]
+        store i32 44, i32* %tmp.0.i
+        ret void
+switchexit.0:           ; preds = %label.7
+        ret void
+return:         ; preds = %entry
+        ret void
+}
+
+
+define void @test5() {
+entry:
+        %source_ptr = alloca i8*, align 4               ; <i8**> [#uses=2]
+        br i1 false, label %bb1357, label %cond_next583
+cond_next583:           ; preds = %entry
+        ret void
+bb1357:         ; preds = %entry
+        br i1 false, label %bb1365, label %bb27055
+bb1365:         ; preds = %bb1357
+        switch i32 0, label %cond_next10377 [
+                 i32 0, label %bb4679
+                 i32 1, label %bb4679
+                 i32 2, label %bb4679
+                 i32 3, label %bb4679
+                 i32 4, label %bb5115
+                 i32 5, label %bb6651
+                 i32 6, label %bb7147
+                 i32 7, label %bb8683
+                 i32 8, label %bb9131
+                 i32 9, label %bb9875
+                 i32 10, label %bb4679
+                 i32 11, label %bb4859
+                 i32 12, label %bb4679
+                 i32 16, label %bb10249
+        ]
+bb4679:         ; preds = %bb1365, %bb1365, %bb1365, %bb1365, %bb1365, %bb1365
+        ret void
+bb4859:         ; preds = %bb1365
+        ret void
+bb5115:         ; preds = %bb1365
+        ret void
+bb6651:         ; preds = %bb1365
+        ret void
+bb7147:         ; preds = %bb1365
+        ret void
+bb8683:         ; preds = %bb1365
+        ret void
+bb9131:         ; preds = %bb1365
+        ret void
+bb9875:         ; preds = %bb1365
+        %source_ptr9884 = bitcast i8** %source_ptr to i8**              ; <i8**> [#uses=1]
+        %tmp9885 = load i8** %source_ptr9884            ; <i8*> [#uses=0]
+        ret void
+bb10249:                ; preds = %bb1365
+        %source_ptr10257 = bitcast i8** %source_ptr to i16**            ; <i16**> [#uses=1]
+        %tmp10258 = load i16** %source_ptr10257         ; <i16*> [#uses=0]
+        ret void
+cond_next10377:         ; preds = %bb1365
+        ret void
+bb27055:                ; preds = %bb1357
+        ret void
+}
+
+
+        %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>" = type { %"struct.__gnu_cxx::bitmap_allocator<char>::_Alloc_block"* }
+        %"struct.__gnu_cxx::bitmap_allocator<char>" = type { i8 }
+        %"struct.__gnu_cxx::bitmap_allocator<char>::_Alloc_block" = type { [8 x i8] }
+
+; PR1045
+define void @test6() {
+entry:
+        %this_addr.i = alloca %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*                ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"**> [#uses=3]
+        %tmp = alloca %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>", align 4                ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
+        store %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp, %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i
+        %tmp.i = load %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i          ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
+        %tmp.i.upgrd.1 = bitcast %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp.i to %"struct.__gnu_cxx::bitmap_allocator<char>"*              ; <%"struct.__gnu_cxx::bitmap_allocator<char>"*> [#uses=0]
+        %tmp1.i = load %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i         ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
+        %tmp.i.upgrd.2 = getelementptr %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp1.i, i32 0, i32 0         ; <%"struct.__gnu_cxx::bitmap_allocator<char>::_Alloc_block"**> [#uses=0]
+        unreachable
+}
+
+        %struct.CGPoint = type { float, float }
+        %struct.aal_big_range_t = type { i32, i32 }        %struct.aal_callback_t = type { i8* (i8*, i32)*, void (i8*, i8*)* }        %struct.aal_edge_pool_t = type { %struct.aal_edge_pool_t*, i32, i32, [0 x %struct.aal_edge_t] }        %struct.aal_edge_t = type { %struct.CGPoint, %struct.CGPoint, i32 }
+        %struct.aal_range_t = type { i16, i16 }
+        %struct.aal_span_pool_t = type { %struct.aal_span_pool_t*, [341 x %struct.aal_span_t] }
+        %struct.aal_span_t = type { %struct.aal_span_t*, %struct.aal_big_range_t }
+        %struct.aal_spanarray_t = type { [2 x %struct.aal_range_t] }
+        %struct.aal_spanbucket_t = type { i16, [2 x i8], %struct.anon }
+        %struct.aal_state_t = type { %struct.CGPoint, %struct.CGPoint, %struct.CGPoint, i32, float, float, float, float, %struct.CGPoint, %struct.CGPoint, float, float, float, float, i32, i32, i32, i32, float, float, i8*, i32, i32, %struct.aal_edge_pool_t*, %struct.aal_edge_pool_t*, i8*, %struct.aal_callback_t*, i32, %struct.aal_span_t*, %struct.aal_span_t*, %struct.aal_span_t*, %struct.aal_span_pool_t*, i8, float, i8, i32 }
+        %struct.anon = type { %struct.aal_spanarray_t }
+
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+define fastcc void @test7() {
+entry:
+        %SB = alloca %struct.aal_spanbucket_t, align 4          ; <%struct.aal_spanbucket_t*> [#uses=2]
+        br i1 false, label %cond_true, label %cond_next79
+
+cond_true:              ; preds = %entry
+        br i1 false, label %cond_next, label %cond_next114.i
+
+cond_next114.i:         ; preds = %cond_true
+        ret void
+
+cond_next:              ; preds = %cond_true
+        %SB19 = bitcast %struct.aal_spanbucket_t* %SB to i8*            ; <i8*> [#uses=1]
+        call void @llvm.memcpy.i32( i8* %SB19, i8* null, i32 12, i32 0 )
+        br i1 false, label %cond_next34, label %cond_next79
+
+cond_next34:            ; preds = %cond_next
+        %i.2.reload22 = load i32* null          ; <i32> [#uses=1]
+        %tmp51 = getelementptr %struct.aal_spanbucket_t* %SB, i32 0, i32 2, i32 0, i32 0, i32 %i.2.reload22, i32 1      
+        ; <i16*> [#uses=0]
+        ret void
+
+cond_next79:            ; preds = %cond_next, %entry
+        ret void
+}
+
+
+       %struct.c37304a__vrec = type { i8, %struct.c37304a__vrec___disc___XVN }
+        %struct.c37304a__vrec___disc___XVN = type {
+%struct.c37304a__vrec___disc___XVN___O }
+        %struct.c37304a__vrec___disc___XVN___O = type {  }
+
+; PR3304
+define void @test8() {
+entry:
+        %v = alloca %struct.c37304a__vrec
+        %0 = getelementptr %struct.c37304a__vrec* %v, i32 0, i32 0             
+        store i8 8, i8* %0, align 1
+        unreachable
+}
+
+
+
+; rdar://6808691 - ZeroLengthMemSet
+        type <{ i32, i16, i8, i8, i64, i64, i16, [0 x i16] }>           
+
+define i32 @test9() {
+entry:
+        %.compoundliteral = alloca %0           
+        %tmp228 = getelementptr %0* %.compoundliteral, i32 0, i32 7
+        %tmp229 = bitcast [0 x i16]* %tmp228 to i8*             
+        call void @llvm.memset.i64(i8* %tmp229, i8 0, i64 0, i32 2)
+        unreachable
+}
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
+
+
+; PR4146 - i1 handling
+%wrapper = type { i1 }
+define void @test10() {
+entry:
+        %w = alloca %wrapper, align 8           ; <%wrapper*> [#uses=1]
+        %0 = getelementptr %wrapper* %w, i64 0, i32 0           ; <i1*>
+        store i1 true, i1* %0
+        ret void
+}
+
+
+        %struct.singlebool = type <{ i8 }>
+; PR4286
+define zeroext i8 @test11() nounwind {
+entry:
+        %a = alloca %struct.singlebool, align 1         ; <%struct.singlebool*> [#uses=2]
+        %storetmp.i = bitcast %struct.singlebool* %a to i1*             ; <i1*> [#uses=1]
+        store i1 true, i1* %storetmp.i
+        %tmp = getelementptr %struct.singlebool* %a, i64 0, i32 0               ; <i8*> [#uses=1]
+        %tmp1 = load i8* %tmp           ; <i8> [#uses=1]
+        ret i8 %tmp1
+}
+
+
+       %struct.Item = type { [4 x i16], %struct.rule* }
+        %struct.rule = type { [4 x i16], i32, i32, i32, %struct.nonterminal*, %struct.pattern*, i8 }
+        %struct.nonterminal = type { i8*, i32, i32, i32, %struct.plankMap*, %struct.rule* }
+        %struct.plankMap = type { %struct.list*, i32, %struct.stateMap* }
+        %struct.list = type { i8*, %struct.list* }
+        %struct.stateMap = type { i8*, %struct.plank*, i32, i16* }
+        %struct.plank = type { i8*, %struct.list*, i32 }
+        %struct.pattern = type { %struct.nonterminal*, %struct.operator*, [2 x %struct.nonterminal*] }
+        %struct.operator = type { i8*, i8, i32, i32, i32, i32, %struct.table* }
+        %struct.table = type { %struct.operator*, %struct.list*, i16*, [2 x %struct.dimension*], %struct.item_set** }
+        %struct.dimension = type { i16*, %struct.Index_Map, %struct.mapping*, i32, %struct.plankMap* }
+        %struct.Index_Map = type { i32, %struct.item_set** }
+        %struct.item_set = type { i32, i32, %struct.operator*, [2 x %struct.item_set*], %struct.item_set*, i16*, %struct.Item*, %struct.Item* }
+        %struct.mapping = type { %struct.list**, i32, i32, i32, %struct.item_set** }
+
+; VLAs.
+define void @test12() {
+bb4.i:
+        %0 = malloc [0 x %struct.Item]          ; <[0 x %struct.Item]*> [#uses=1]
+        %.sub.i.c.i = getelementptr [0 x %struct.Item]* %0, i32 0, i32 0                ; <%struct.Item*> [#uses=0]
+        unreachable
+}
+
+; PR8680
+define void @test13() nounwind {
+entry:
+  %memtmp = alloca i32, align 4
+  %0 = bitcast i32* %memtmp to void ()*
+  call void %0() nounwind
+  ret void
+}
diff --git a/final/test/Transforms/ScalarRepl/debuginfo.ll b/final/test/Transforms/ScalarRepl/debuginfo.ll
new file mode 100644
index 00000000000..6b8422cefa9
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/debuginfo.ll
@@ -0,0 +1,106 @@
+; RUN: opt < %s -scalarrepl -S | not grep alloca
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
+	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
+	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }* }
+	%llvm.dbg.derivedtype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }* }
+	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
+	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
+	%struct.Sphere = type { %struct.Vec }
+	%struct.Vec = type { i32, i32, i32 }
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str = internal constant [6 x i8] c"r.cpp\00", section "llvm.metadata"		; <[6 x i8]*> [#uses=1]
+@.str1 = internal constant [5 x i8] c"/tmp\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
+@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 4, i8* getelementptr ([6 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+@.str3 = internal constant [4 x i8] c"Vec\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@.str5 = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@.str6 = internal constant [2 x i8] c"y\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.derivedtype7 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str6, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, i64 32, i64 32, i64 32, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@.str8 = internal constant [2 x i8] c"z\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.derivedtype9 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str8, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, i64 32, i64 32, i64 64, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.derivedtype10 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.derivedtype11 = internal constant %llvm.dbg.derivedtype.type { i32 458790, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 96, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.derivedtype12 = internal constant %llvm.dbg.derivedtype.type { i32 458768, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype11 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.array = internal constant [3 x { }*] [ { }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype10 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
+@llvm.dbg.composite13 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite13 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.array14 = internal constant [5 x { }*] [ { }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype10 to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) ], section "llvm.metadata"		; <[5 x { }*]*> [#uses=1]
+@llvm.dbg.composite15 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([5 x { }*]* @llvm.dbg.array14 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@llvm.dbg.subprogram16 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 5, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite15 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.array17 = internal constant [5 x { }*] [ { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype7 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype9 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to { }*) ], section "llvm.metadata"		; <[5 x { }*]*> [#uses=1]
+@llvm.dbg.composite18 = internal constant %llvm.dbg.composite.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, i64 96, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([5 x { }*]* @llvm.dbg.array17 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@llvm.dbg.derivedtype19 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.array20 = internal constant [5 x { }*] [ { }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype19 to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) ], section "llvm.metadata"		; <[5 x { }*]*> [#uses=1]
+@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([5 x { }*]* @llvm.dbg.array20 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@.str21 = internal constant [13 x i8] c"__comp_ctor \00", section "llvm.metadata"		; <[13 x i8]*> [#uses=1]
+@.str22 = internal constant [14 x i8] c"_ZN3VecC1Eiii\00", section "llvm.metadata"		; <[14 x i8]*> [#uses=1]
+@llvm.dbg.array32 = internal constant [3 x { }*] [ { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
+@llvm.dbg.composite33 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array32 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@.str34 = internal constant [10 x i8] c"operator-\00", section "llvm.metadata"		; <[10 x i8]*> [#uses=1]
+@.str35 = internal constant [14 x i8] c"_ZmiRK3VecS1_\00", section "llvm.metadata"		; <[14 x i8]*> [#uses=1]
+@.str41 = internal constant [7 x i8] c"Sphere\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
+@.str43 = internal constant [7 x i8] c"center\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
+@llvm.dbg.derivedtype44 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([7 x i8]* @.str43, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 14, i64 96, i64 32, i64 0, i32 1, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.derivedtype45 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite52 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.array46 = internal constant [3 x { }*] [ { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype45 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
+@llvm.dbg.composite47 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array46 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@.str48 = internal constant [11 x i8] c"ray_sphere\00", section "llvm.metadata"		; <[11 x i8]*> [#uses=1]
+@.str49 = internal constant [30 x i8] c"_ZN6Sphere10ray_sphereERK3Vec\00", section "llvm.metadata"		; <[30 x i8]*> [#uses=1]
+@llvm.dbg.subprogram50 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([11 x i8]* @.str48, i32 0, i32 0), i8* getelementptr ([11 x i8]* @.str48, i32 0, i32 0), i8* getelementptr ([30 x i8]* @.str49, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 16, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite47 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.array51 = internal constant [2 x { }*] [ { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype44 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram50 to { }*) ], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
+@llvm.dbg.composite52 = internal constant %llvm.dbg.composite.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([7 x i8]* @.str41, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, i64 96, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array51 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@llvm.dbg.derivedtype53 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite52 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.array54 = internal constant [3 x { }*] [ { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype53 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
+@llvm.dbg.composite55 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array54 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@llvm.dbg.subprogram56 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([11 x i8]* @.str48, i32 0, i32 0), i8* getelementptr ([11 x i8]* @.str48, i32 0, i32 0), i8* getelementptr ([30 x i8]* @.str49, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 16, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite55 to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@.str61 = internal constant [2 x i8] c"v\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.variable62 = internal constant %llvm.dbg.variable.type { i32 459008, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram56 to { }*), i8* getelementptr ([2 x i8]* @.str61, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 17, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
+
+declare void @llvm.dbg.func.start({ }*) nounwind
+
+declare void @llvm.dbg.declare({ }*, { }*) nounwind
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
+
+declare void @llvm.dbg.region.end({ }*) nounwind
+
+define i32 @_ZN6Sphere10ray_sphereERK3Vec(%struct.Sphere* %this, %struct.Vec* %Orig) nounwind {
+entry:
+	%v = alloca %struct.Vec, align 8		; <%struct.Vec*> [#uses=4]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram56 to { }*))
+	%0 = bitcast %struct.Vec* %v to { }*		; <{ }*> [#uses=1]
+	call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable62 to { }*))
+	%1 = getelementptr %struct.Sphere* %this, i32 0, i32 0, i32 2		; <i32*> [#uses=1]
+	%2 = load i32* %1, align 4		; <i32> [#uses=1]
+	%3 = getelementptr %struct.Vec* %Orig, i32 0, i32 2		; <i32*> [#uses=1]
+	%4 = load i32* %3, align 4		; <i32> [#uses=1]
+	%5 = sub i32 %2, %4		; <i32> [#uses=1]
+	%6 = getelementptr %struct.Sphere* %this, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
+	%7 = load i32* %6, align 4		; <i32> [#uses=1]
+	%8 = getelementptr %struct.Vec* %Orig, i32 0, i32 1		; <i32*> [#uses=1]
+	%9 = load i32* %8, align 4		; <i32> [#uses=1]
+	%10 = sub i32 %7, %9		; <i32> [#uses=1]
+	%11 = getelementptr %struct.Sphere* %this, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
+	%12 = load i32* %11, align 4		; <i32> [#uses=1]
+	%13 = getelementptr %struct.Vec* %Orig, i32 0, i32 0		; <i32*> [#uses=1]
+	%14 = load i32* %13, align 4		; <i32> [#uses=1]
+	%15 = sub i32 %12, %14		; <i32> [#uses=1]
+	%16 = getelementptr %struct.Vec* %v, i32 0, i32 0		; <i32*> [#uses=2]
+	store i32 %15, i32* %16, align 8
+	%17 = getelementptr %struct.Vec* %v, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 %10, i32* %17, align 4
+	%18 = getelementptr %struct.Vec* %v, i32 0, i32 2		; <i32*> [#uses=1]
+	store i32 %5, i32* %18, align 8
+	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
+	call void @llvm.dbg.stoppoint(i32 9, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
+	%19 = load i32* %16, align 8		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 18, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram56 to { }*))
+	ret i32 %19
+}
diff --git a/final/test/Transforms/ScalarRepl/dg.exp b/final/test/Transforms/ScalarRepl/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/ScalarRepl/load-store-aggregate.ll b/final/test/Transforms/ScalarRepl/load-store-aggregate.ll
new file mode 100644
index 00000000000..c5008ac1312
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/load-store-aggregate.ll
@@ -0,0 +1,31 @@
+; This testcase shows that scalarrepl is able to replace struct alloca's which
+; are directly loaded from or stored to (using the first class aggregates
+; feature).
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+; RUN: opt < %s -scalarrepl -S > %t
+; RUN: cat %t | not grep alloca
+
+%struct.foo = type { i32, i32 }
+
+define i32 @test(%struct.foo* %P) {
+entry:
+	%L = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=2]
+        %V = load %struct.foo* %P
+        store %struct.foo %V, %struct.foo* %L
+
+	%tmp4 = getelementptr %struct.foo* %L, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp5 = load i32* %tmp4		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+define %struct.foo @test2(i32 %A, i32 %B) {
+entry:
+	%L = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=2]
+        %L.0 = getelementptr %struct.foo* %L, i32 0, i32 0
+        store i32 %A, i32* %L.0
+        %L.1 = getelementptr %struct.foo* %L, i32 0, i32 1
+        store i32 %B, i32* %L.1
+        %V = load %struct.foo* %L
+        ret %struct.foo %V
+}
diff --git a/final/test/Transforms/ScalarRepl/memcpy-align.ll b/final/test/Transforms/ScalarRepl/memcpy-align.ll
new file mode 100644
index 00000000000..a7af208f4f3
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/memcpy-align.ll
@@ -0,0 +1,32 @@
+; RUN: opt %s -scalarrepl -S | FileCheck %s
+; PR6832
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
+target triple = "arm-u-u"
+
+%0 = type { %struct.anon, %struct.anon }
+%struct.anon = type { [4 x i8] }
+
+@c = external global %0                           ; <%0*> [#uses=1]
+
+define void @good() nounwind {
+entry:
+  %x0 = alloca %struct.anon, align 4              ; <%struct.anon*> [#uses=2]
+  %tmp = bitcast %struct.anon* %x0 to i8*         ; <i8*> [#uses=1]
+  call void @llvm.memset.p0i8.i32(i8* %tmp, i8 0, i32 4, i32 4, i1 false)
+  %tmp1 = bitcast %struct.anon* %x0 to i8*        ; <i8*> [#uses=1]
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%0* @c, i32
+0, i32 0, i32 0, i32 0), i8* %tmp1, i32 4, i32 4, i1 false)
+  ret void
+  
+; CHECK: store i8 0, i8*{{.*}}, align 4
+; CHECK: store i8 0, i8*{{.*}}, align 1
+; CHECK: store i8 0, i8*{{.*}}, align 2
+; CHECK: store i8 0, i8*{{.*}}, align 1
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
+i1) nounwind
+
diff --git a/final/test/Transforms/ScalarRepl/memcpy-from-global.ll b/final/test/Transforms/ScalarRepl/memcpy-from-global.ll
new file mode 100644
index 00000000000..5b258645344
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/memcpy-from-global.ll
@@ -0,0 +1,96 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+@C.0.1248 = internal constant [128 x float] [ float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 ], align 32		; <[128 x float]*> [#uses=1]
+
+define float @test1(i32 %hash, float %x, float %y, float %z, float %w) {
+entry:
+	%lookupTable = alloca [128 x float], align 16		; <[128 x float]*> [#uses=5]
+	%lookupTable1 = bitcast [128 x float]* %lookupTable to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %lookupTable1, i8* bitcast ([128 x float]* @C.0.1248 to i8*), i32 512, i32 16 )
+        
+; CHECK: @test1
+; CHECK-NOT: alloca
+; CHECK-NOT: call{{.*}}@llvm.memcpy
+; CHECK: %lookupTable1 = bitcast [128 x float]* @C.0.1248 to i8*
+; CHECK-NOT: call{{.*}}@llvm.memcpy
+        
+	%tmp3 = shl i32 %hash, 2		; <i32> [#uses=1]
+	%tmp5 = and i32 %tmp3, 124		; <i32> [#uses=4]
+	%tmp753 = getelementptr [128 x float]* %lookupTable, i32 0, i32 %tmp5		; <float*> [#uses=1]
+	%tmp9 = load float* %tmp753		; <float> [#uses=1]
+	%tmp11 = fmul float %tmp9, %x		; <float> [#uses=1]
+	%tmp13 = fadd float %tmp11, 0.000000e+00		; <float> [#uses=1]
+	%tmp17.sum52 = or i32 %tmp5, 1		; <i32> [#uses=1]
+	%tmp1851 = getelementptr [128 x float]* %lookupTable, i32 0, i32 %tmp17.sum52		; <float*> [#uses=1]
+	%tmp19 = load float* %tmp1851		; <float> [#uses=1]
+	%tmp21 = fmul float %tmp19, %y		; <float> [#uses=1]
+	%tmp23 = fadd float %tmp21, %tmp13		; <float> [#uses=1]
+	%tmp27.sum50 = or i32 %tmp5, 2		; <i32> [#uses=1]
+	%tmp2849 = getelementptr [128 x float]* %lookupTable, i32 0, i32 %tmp27.sum50		; <float*> [#uses=1]
+	%tmp29 = load float* %tmp2849		; <float> [#uses=1]
+	%tmp31 = fmul float %tmp29, %z		; <float> [#uses=1]
+	%tmp33 = fadd float %tmp31, %tmp23		; <float> [#uses=1]
+	%tmp37.sum48 = or i32 %tmp5, 3		; <i32> [#uses=1]
+	%tmp3847 = getelementptr [128 x float]* %lookupTable, i32 0, i32 %tmp37.sum48		; <float*> [#uses=1]
+	%tmp39 = load float* %tmp3847		; <float> [#uses=1]
+	%tmp41 = fmul float %tmp39, %w		; <float> [#uses=1]
+	%tmp43 = fadd float %tmp41, %tmp33		; <float> [#uses=1]
+	ret float %tmp43
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+%T = type { i8, [123 x i8] }
+
+@G = constant %T {i8 1, [123 x i8] zeroinitializer }
+
+define void @test2() {
+  %A = alloca %T
+  %B = alloca %T
+  %a = bitcast %T* %A to i8*
+  %b = bitcast %T* %B to i8*
+
+; CHECK: @test2
+
+; %A alloca is deleted
+; CHECK-NEXT: %B = alloca %T
+
+; use @G instead of %A
+; CHECK-NEXT: %a = bitcast %T* @G to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 124, i32 4, i1 false)
+  call void @bar(i8* %b)
+  ret void
+}
+
+declare void @bar(i8*)
+
+
+;; Should be able to eliminate the alloca.
+define void @test3() {
+  %A = alloca %T
+  %a = bitcast %T* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
+  call void @bar(i8* %a) readonly
+; CHECK: @test3
+; CHECK-NEXT: %a = bitcast %T* @G to i8*
+; CHECK-NEXT: call void @bar(i8* %a)
+  ret void
+}
+
+define void @test4() {
+  %A = alloca %T
+  %a = bitcast %T* %A to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
+  call void @baz(i8* byval %a) 
+; CHECK: @test4
+; CHECK-NEXT: %a = bitcast %T* @G to i8*
+; CHECK-NEXT: call void @baz(i8* byval %a)
+  ret void
+}
+
+declare void @baz(i8* byval)
diff --git a/final/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll b/final/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
new file mode 100644
index 00000000000..0d61e5aab6c
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
@@ -0,0 +1,23 @@
+; PR1226
+; RUN: opt < %s -scalarrepl -S | \
+; RUN:   not grep {call void @llvm.memcpy.i32}
+; RUN: opt < %s -scalarrepl -S | grep getelementptr
+; END.
+
+target datalayout = "E-p:32:32"
+target triple = "powerpc-apple-darwin8.8.0"
+	%struct.foo = type { i8, i8 }
+
+
+define i32 @test1(%struct.foo* %P) {
+entry:
+	%L = alloca %struct.foo, align 2		; <%struct.foo*> [#uses=1]
+	%L2 = getelementptr %struct.foo* %L, i32 0, i32 0		; <i8*> [#uses=2]
+	%tmp13 = getelementptr %struct.foo* %P, i32 0, i32 0		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %L2, i8* %tmp13, i32 2, i32 1 )
+	%tmp5 = load i8* %L2		; <i8> [#uses=1]
+	%tmp56 = sext i8 %tmp5 to i32		; <i32> [#uses=1]
+	ret i32 %tmp56
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/final/test/Transforms/ScalarRepl/memset-aggregate.ll b/final/test/Transforms/ScalarRepl/memset-aggregate.ll
new file mode 100644
index 00000000000..5aeefcd1318
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/memset-aggregate.ll
@@ -0,0 +1,66 @@
+; PR1226
+; RUN: opt < %s -scalarrepl -S | grep {ret i32 16843009}
+; RUN: opt < %s -scalarrepl -S | not grep alloca
+; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret i16 514}
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "i686-apple-darwin8"
+	%struct.bar = type { %struct.foo, i64, double }
+	%struct.foo = type { i32, i32 }
+
+
+define i32 @test1(%struct.foo* %P) {
+entry:
+	%L = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=2]
+	%L2 = bitcast %struct.foo* %L to i8*		; <i8*> [#uses=1]
+	%tmp13 = bitcast %struct.foo* %P to i8*		; <i8*> [#uses=1]
+	call void @llvm.memcpy.i32( i8* %L2, i8* %tmp13, i32 8, i32 4 )
+	%tmp4 = getelementptr %struct.foo* %L, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp5 = load i32* %tmp4		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+define i32 @test2() {
+entry:
+	%L = alloca [4 x %struct.foo], align 16		; <[4 x %struct.foo]*> [#uses=2]
+	%L12 = bitcast [4 x %struct.foo]* %L to i8*		; <i8*> [#uses=1]
+	call void @llvm.memset.i32( i8* %L12, i8 0, i32 32, i32 16 )
+	%tmp4 = getelementptr [4 x %struct.foo]* %L, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
+	%tmp5 = load i32* %tmp4		; <i32> [#uses=1]
+	ret i32 %tmp5
+}
+
+declare void @llvm.memset.i32(i8*, i8, i32, i32)
+
+define i32 @test3() {
+entry:
+	%B = alloca %struct.bar, align 16		; <%struct.bar*> [#uses=4]
+	%B1 = bitcast %struct.bar* %B to i8*		; <i8*> [#uses=1]
+	call void @llvm.memset.i32( i8* %B1, i8 1, i32 24, i32 16 )
+	%tmp3 = getelementptr %struct.bar* %B, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp3
+	%tmp4 = getelementptr %struct.bar* %B, i32 0, i32 2		; <double*> [#uses=1]
+	store double 1.000000e+01, double* %tmp4
+	%tmp6 = getelementptr %struct.bar* %B, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
+	%tmp7 = load i32* %tmp6		; <i32> [#uses=1]
+	ret i32 %tmp7
+}
+
+
+	%struct.f = type { i32, i32, i32, i32, i32, i32 }
+
+define i16 @test4() nounwind {
+entry:
+	%A = alloca %struct.f, align 8		; <%struct.f*> [#uses=3]
+	%0 = getelementptr %struct.f* %A, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %0, align 8
+	%1 = getelementptr %struct.f* %A, i32 0, i32 1		; <i32*> [#uses=1]
+	%2 = bitcast i32* %1 to i8*		; <i8*> [#uses=1]
+	call void @llvm.memset.i32(i8* %2, i8 2, i32 12, i32 4)
+	%3 = getelementptr %struct.f* %A, i32 0, i32 2		; <i32*> [#uses=1]
+	%4 = load i32* %3, align 8		; <i32> [#uses=1]
+	%retval12 = trunc i32 %4 to i16		; <i16> [#uses=1]
+	ret i16 %retval12
+}
diff --git a/final/test/Transforms/ScalarRepl/nonzero-first-index.ll b/final/test/Transforms/ScalarRepl/nonzero-first-index.ll
new file mode 100644
index 00000000000..60f414b7175
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/nonzero-first-index.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+%nested = type { i32, [4 x i32] }
+
+; Check that a GEP with a non-zero first index does not prevent SROA as long
+; as the resulting offset corresponds to an element in the alloca.
+define i32 @test1() {
+; CHECK: @test1
+; CHECK-NOT: = i160
+; CHECK: ret i32 undef
+	%A = alloca %nested
+	%B = getelementptr %nested* %A, i32 0, i32 1, i32 0
+	%C = getelementptr i32* %B, i32 2
+	%D = load i32* %C
+	ret i32 %D
+}
+
+; But, if the offset is out of range, then it should not be transformed.
+define i32 @test2() {
+; CHECK: @test2
+; CHECK: i160
+	%A = alloca %nested
+	%B = getelementptr %nested* %A, i32 0, i32 1, i32 0
+	%C = getelementptr i32* %B, i32 4
+	%D = load i32* %C
+	ret i32 %D
+}
+
+; Try it with a bitcast and single GEP....
+define i32 @test3() {
+; CHECK: @test3
+; CHECK-NOT: = i160
+; CHECK: ret i32 undef
+	%A = alloca %nested
+	%B = bitcast %nested* %A to i32*
+	%C = getelementptr i32* %B, i32 2
+	%D = load i32* %C
+	ret i32 %D
+}
+
+; ...and again make sure that out-of-range accesses are not transformed.
+define i32 @test4() {
+; CHECK: @test4
+; CHECK: i160
+	%A = alloca %nested
+	%B = bitcast %nested* %A to i32*
+	%C = getelementptr i32* %B, i32 -1
+	%D = load i32* %C
+	ret i32 %D
+}
diff --git a/final/test/Transforms/ScalarRepl/not-a-vector.ll b/final/test/Transforms/ScalarRepl/not-a-vector.ll
new file mode 100644
index 00000000000..f873456b3c7
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/not-a-vector.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -scalarrepl -S | not grep alloca
+; RUN: opt < %s -scalarrepl -S | not grep {7 x double}
+; RUN: opt < %s -scalarrepl -instcombine -S | grep {ret double %B}
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+define double @test(double %A, double %B) {
+	%ARR = alloca [7 x i64]
+	%C = bitcast [7 x i64]* %ARR to double*
+	store double %A, double* %C
+
+	%D = getelementptr [7 x i64]* %ARR, i32 0, i32 4
+	%E = bitcast i64* %D to double*
+	store double %B, double* %E
+
+	%F = getelementptr double* %C, i32 4
+	%G = load double* %F
+	ret double %G
+}
+
+
diff --git a/final/test/Transforms/ScalarRepl/phi-select.ll b/final/test/Transforms/ScalarRepl/phi-select.ll
new file mode 100644
index 00000000000..fa3972de90a
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/phi-select.ll
@@ -0,0 +1,153 @@
+; RUN: opt %s -scalarrepl -S | FileCheck %s
+; Test promotion of allocas that have phis and select users.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.2"
+
+%struct.X = type { i32 }
+%PairTy = type {i32, i32}
+
+; CHECK: @test1
+; CHECK: %a.0 = alloca i32
+; CHECK: %b.0 = alloca i32
+define i32 @test1(i32 %x) nounwind readnone ssp {
+entry:
+  %a = alloca %struct.X, align 8                  ; <%struct.X*> [#uses=2]
+  %b = alloca %struct.X, align 8                  ; <%struct.X*> [#uses=2]
+  %0 = getelementptr inbounds %struct.X* %a, i64 0, i32 0 ; <i32*> [#uses=1]
+  store i32 1, i32* %0, align 8
+  %1 = getelementptr inbounds %struct.X* %b, i64 0, i32 0 ; <i32*> [#uses=1]
+  store i32 2, i32* %1, align 8
+  %2 = icmp eq i32 %x, 0                          ; <i1> [#uses=1]
+  %p.0 = select i1 %2, %struct.X* %b, %struct.X* %a ; <%struct.X*> [#uses=1]
+  %3 = getelementptr inbounds %struct.X* %p.0, i64 0, i32 0 ; <i32*> [#uses=1]
+  %4 = load i32* %3, align 8                      ; <i32> [#uses=1]
+  ret i32 %4
+}
+
+; CHECK: @test2
+; CHECK: %X.ld = phi i32 [ 1, %entry ], [ 2, %T ]
+; CHECK-NEXT: ret i32 %X.ld
+define i32 @test2(i1 %c) {
+entry:
+  %A = alloca {i32, i32}
+  %B = getelementptr {i32, i32}* %A, i32 0, i32 0
+  store i32 1, i32* %B
+  br i1 %c, label %T, label %F
+T:
+  %C = getelementptr {i32, i32}* %A, i32 0, i32 1
+  store i32 2, i32* %C
+  br label %F
+F:
+  %X = phi i32* [%B, %entry], [%C, %T]
+  %Q = load i32* %X
+  ret i32 %Q
+}
+
+; CHECK: @test3
+; CHECK-NEXT: %Q = select i1 %c, i32 1, i32 2
+; CHECK-NEXT: ret i32 %Q
+; rdar://8904039
+define i32 @test3(i1 %c) {
+  %A = alloca {i32, i32}
+  %B = getelementptr {i32, i32}* %A, i32 0, i32 0
+  store i32 1, i32* %B
+  %C = getelementptr {i32, i32}* %A, i32 0, i32 1
+  store i32 2, i32* %C
+  
+  %X = select i1 %c, i32* %B, i32* %C
+  %Q = load i32* %X
+  ret i32 %Q
+}
+
+;; We can't scalarize this, a use of the select is not an element access.
+define i64 @test4(i1 %c) {
+entry:
+  %A = alloca %PairTy
+  ; CHECK: @test4
+  ; CHECK: %A = alloca %PairTy
+  %B = getelementptr {i32, i32}* %A, i32 0, i32 0
+  store i32 1, i32* %B
+  %C = getelementptr {i32, i32}* %A, i32 0, i32 1
+  store i32 2, i32* %B
+  
+  %X = select i1 %c, i32* %B, i32* %C
+  %Y = bitcast i32* %X to i64*
+  %Q = load i64* %Y
+  ret i64 %Q
+}
+
+
+;;
+;; Tests for promoting allocas used by selects.
+;; rdar://7339113
+;;
+
+define i32 @test5(i32 *%P) nounwind readnone ssp {
+entry:
+  %b = alloca i32, align 8 
+  store i32 2, i32* %b, align 8
+  
+  ;; Select on constant condition should be folded.
+  %p.0 = select i1 false, i32* %b, i32* %P
+  store i32 123, i32* %p.0
+  
+  %r = load i32* %b, align 8
+  ret i32 %r
+  
+; CHECK: @test5
+; CHECK: store i32 123, i32* %P
+; CHECK: ret i32 2
+}
+
+define i32 @test6(i32 %x, i1 %c) nounwind readnone ssp {
+  %a = alloca i32, align 8
+  %b = alloca i32, align 8
+  store i32 1, i32* %a, align 8
+  store i32 2, i32* %b, align 8
+  %p.0 = select i1 %c, i32* %b, i32* %a
+  %r = load i32* %p.0, align 8
+  ret i32 %r
+; CHECK: @test6
+; CHECK-NEXT: %r = select i1 %c, i32 2, i32 1
+; CHECK-NEXT: ret i32 %r
+}
+
+; Verify that the loads happen where the loads are, not where the select is.
+define i32 @test7(i32 %x, i1 %c) nounwind readnone ssp {
+  %a = alloca i32, align 8
+  %b = alloca i32, align 8
+  store i32 1, i32* %a
+  store i32 2, i32* %b
+  %p.0 = select i1 %c, i32* %b, i32* %a
+  
+  store i32 0, i32* %a
+  
+  %r = load i32* %p.0, align 8
+  ret i32 %r
+; CHECK: @test7
+; CHECK-NOT: alloca i32
+; CHECK: %r = select i1 %c, i32 2, i32 0
+; CHECK: ret i32 %r
+}
+
+;; Promote allocs that are PHI'd together by moving the loads.
+define i32 @test8(i32 %x) nounwind readnone ssp {
+; CHECK: @test8
+; CHECK-NOT: load i32
+; CHECK-NOT: store i32
+; CHECK: %p.0.ld = phi i32 [ 2, %entry ], [ 1, %T ]
+; CHECK-NEXT: ret i32 %p.0.ld
+entry:
+  %a = alloca i32, align 8
+  %b = alloca i32, align 8
+  store i32 1, i32* %a, align 8
+  store i32 2, i32* %b, align 8
+  %c = icmp eq i32 %x, 0 
+  br i1 %c, label %T, label %Cont
+T:
+  br label %Cont
+Cont:
+  %p.0 = phi i32* [%b, %entry],[%a, %T]
+  %r = load i32* %p.0, align 8
+  ret i32 %r
+}
diff --git a/final/test/Transforms/ScalarRepl/phinodepromote.ll b/final/test/Transforms/ScalarRepl/phinodepromote.ll
new file mode 100644
index 00000000000..9c6e8b92f3e
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/phinodepromote.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -simplifycfg -instcombine -mem2reg -S | not grep alloca
+;
+; This tests to see if mem2reg can promote alloca instructions whose addresses
+; are used by PHI nodes that are immediately loaded.  The LLVM C++ front-end
+; often generates code that looks like this (when it codegen's ?: exprs as
+; lvalues), so handling this simple extension is quite useful.
+;
+; This testcase is what the following program looks like when it reaches
+; instcombine:
+;
+; template<typename T>
+; const T& max(const T& a1, const T& a2) { return a1 < a2 ? a1 : a2; }
+; int main() { return max(0, 1); }
+;
+; This test checks to make sure the combination of instcombine and mem2reg
+; perform the transformation.
+
+define i32 @main() {
+entry:
+	%mem_tmp.0 = alloca i32		; <i32*> [#uses=3]
+	%mem_tmp.1 = alloca i32		; <i32*> [#uses=3]
+	store i32 0, i32* %mem_tmp.0
+	store i32 1, i32* %mem_tmp.1
+	%tmp.1.i = load i32* %mem_tmp.1		; <i32> [#uses=1]
+	%tmp.3.i = load i32* %mem_tmp.0		; <i32> [#uses=1]
+	%tmp.4.i = icmp sle i32 %tmp.1.i, %tmp.3.i		; <i1> [#uses=1]
+	br i1 %tmp.4.i, label %cond_true.i, label %cond_continue.i
+cond_true.i:		; preds = %entry
+	br label %cond_continue.i
+cond_continue.i:		; preds = %cond_true.i, %entry
+	%mem_tmp.i.0 = phi i32* [ %mem_tmp.1, %cond_true.i ], [ %mem_tmp.0, %entry ]		; <i32*> [#uses=1]
+	%tmp.3 = load i32* %mem_tmp.i.0		; <i32> [#uses=1]
+	ret i32 %tmp.3
+}
diff --git a/final/test/Transforms/ScalarRepl/select_promote.ll b/final/test/Transforms/ScalarRepl/select_promote.ll
new file mode 100644
index 00000000000..d6b2b75327c
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/select_promote.ll
@@ -0,0 +1,18 @@
+; Test promotion of loads that use the result of a select instruction.  This
+; should be simplified by the instcombine pass.
+
+; RUN: opt < %s -instcombine -mem2reg -S | not grep alloca
+
+define i32 @main() {
+	%mem_tmp.0 = alloca i32		; <i32*> [#uses=3]
+	%mem_tmp.1 = alloca i32		; <i32*> [#uses=3]
+	store i32 0, i32* %mem_tmp.0
+	store i32 1, i32* %mem_tmp.1
+	%tmp.1.i = load i32* %mem_tmp.1		; <i32> [#uses=1]
+	%tmp.3.i = load i32* %mem_tmp.0		; <i32> [#uses=1]
+	%tmp.4.i = icmp sle i32 %tmp.1.i, %tmp.3.i		; <i1> [#uses=1]
+	%mem_tmp.i.0 = select i1 %tmp.4.i, i32* %mem_tmp.1, i32* %mem_tmp.0		; <i32*> [#uses=1]
+	%tmp.3 = load i32* %mem_tmp.i.0		; <i32> [#uses=1]
+	ret i32 %tmp.3
+}
+
diff --git a/final/test/Transforms/ScalarRepl/sroa-fca.ll b/final/test/Transforms/ScalarRepl/sroa-fca.ll
new file mode 100644
index 00000000000..2df3b9be1e4
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/sroa-fca.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -scalarrepl | llvm-dis
+; Make sure that SROA "scalar conversion" can handle first class aggregates.
+
+define i64 @test({i32, i32} %A) {
+	%X = alloca i64
+	%Y = bitcast i64* %X to {i32,i32}*
+	store {i32,i32} %A, {i32,i32}* %Y
+	
+	%Q = load i64* %X
+	ret i64 %Q
+}
+
+define {i32,i32} @test2(i64 %A) {
+	%X = alloca i64
+	%Y = bitcast i64* %X to {i32,i32}*
+	store i64 %A, i64* %X
+	
+	%Q = load {i32,i32}* %Y
+	ret {i32,i32} %Q
+}
+
diff --git a/final/test/Transforms/ScalarRepl/sroa_two.ll b/final/test/Transforms/ScalarRepl/sroa_two.ll
new file mode 100644
index 00000000000..d8aa26dce63
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/sroa_two.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -scalarrepl | llvm-dis
+
+define i32 @test(i32 %X) {
+	%Arr = alloca [2 x i32]		; <[2 x i32]*> [#uses=3]
+	%tmp.0 = getelementptr [2 x i32]* %Arr, i32 0, i32 0		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp.0
+	%tmp.1 = getelementptr [2 x i32]* %Arr, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 2, i32* %tmp.1
+	%tmp.3 = getelementptr [2 x i32]* %Arr, i32 0, i32 %X		; <i32*> [#uses=1]
+	%tmp.4 = load i32* %tmp.3		; <i32> [#uses=1]
+	ret i32 %tmp.4
+}
+
diff --git a/final/test/Transforms/ScalarRepl/union-fp-int.ll b/final/test/Transforms/ScalarRepl/union-fp-int.ll
new file mode 100644
index 00000000000..8b7e50df31b
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/union-fp-int.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -scalarrepl -S | \
+; RUN:   not grep alloca
+; RUN: opt < %s -scalarrepl -S | \
+; RUN:   grep {bitcast.*float.*i32}
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+define i32 @test(float %X) {
+	%X_addr = alloca float		; <float*> [#uses=2]
+	store float %X, float* %X_addr
+	%X_addr.upgrd.1 = bitcast float* %X_addr to i32*		; <i32*> [#uses=1]
+	%tmp = load i32* %X_addr.upgrd.1		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
diff --git a/final/test/Transforms/ScalarRepl/union-packed.ll b/final/test/Transforms/ScalarRepl/union-packed.ll
new file mode 100644
index 00000000000..b272abfc3d5
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/union-packed.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -scalarrepl -S | \
+; RUN:   not grep alloca
+; RUN: opt < %s -scalarrepl -S | \
+; RUN:   grep bitcast
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+define <4 x i32> @test(<4 x float> %X) {
+	%X_addr = alloca <4 x float>		; <<4 x float>*> [#uses=2]
+	store <4 x float> %X, <4 x float>* %X_addr
+	%X_addr.upgrd.1 = bitcast <4 x float>* %X_addr to <4 x i32>*		; <<4 x i32>*> [#uses=1]
+	%tmp = load <4 x i32>* %X_addr.upgrd.1		; <<4 x i32>> [#uses=1]
+	ret <4 x i32> %tmp
+}
+
diff --git a/final/test/Transforms/ScalarRepl/union-pointer.ll b/final/test/Transforms/ScalarRepl/union-pointer.ll
new file mode 100644
index 00000000000..fe702fa2177
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/union-pointer.ll
@@ -0,0 +1,41 @@
+; PR892
+; RUN: opt < %s -scalarrepl -S | \
+; RUN:   not grep alloca
+; RUN: opt < %s -scalarrepl -S | grep {ret i8}
+
+target datalayout = "e-p:32:32"
+target triple = "i686-apple-darwin8.7.2"
+	%struct.Val = type { i32*, i32 }
+
+define i8* @test(i16* %X) {
+	%X_addr = alloca i16*		; <i16**> [#uses=2]
+	store i16* %X, i16** %X_addr
+	%X_addr.upgrd.1 = bitcast i16** %X_addr to i8**		; <i8**> [#uses=1]
+	%tmp = load i8** %X_addr.upgrd.1		; <i8*> [#uses=1]
+	ret i8* %tmp
+}
+
+define void @test2(i64 %Op.0) {
+	%tmp = alloca %struct.Val, align 8		; <%struct.Val*> [#uses=3]
+	%tmp1 = alloca %struct.Val, align 8		; <%struct.Val*> [#uses=3]
+	%tmp.upgrd.2 = call i64 @_Z3foov( )		; <i64> [#uses=1]
+	%tmp1.upgrd.3 = bitcast %struct.Val* %tmp1 to i64*		; <i64*> [#uses=1]
+	store i64 %tmp.upgrd.2, i64* %tmp1.upgrd.3
+	%tmp.upgrd.4 = getelementptr %struct.Val* %tmp, i32 0, i32 0		; <i32**> [#uses=1]
+	%tmp2 = getelementptr %struct.Val* %tmp1, i32 0, i32 0		; <i32**> [#uses=1]
+	%tmp.upgrd.5 = load i32** %tmp2		; <i32*> [#uses=1]
+	store i32* %tmp.upgrd.5, i32** %tmp.upgrd.4
+	%tmp3 = getelementptr %struct.Val* %tmp, i32 0, i32 1		; <i32*> [#uses=1]
+	%tmp4 = getelementptr %struct.Val* %tmp1, i32 0, i32 1		; <i32*> [#uses=1]
+	%tmp.upgrd.6 = load i32* %tmp4		; <i32> [#uses=1]
+	store i32 %tmp.upgrd.6, i32* %tmp3
+	%tmp7 = bitcast %struct.Val* %tmp to { i64 }*		; <{ i64 }*> [#uses=1]
+	%tmp8 = getelementptr { i64 }* %tmp7, i32 0, i32 0		; <i64*> [#uses=1]
+	%tmp9 = load i64* %tmp8		; <i64> [#uses=1]
+	call void @_Z3bar3ValS_( i64 %Op.0, i64 %tmp9 )
+	ret void
+}
+
+declare i64 @_Z3foov()
+
+declare void @_Z3bar3ValS_(i64, i64)
diff --git a/final/test/Transforms/ScalarRepl/vector_memcpy.ll b/final/test/Transforms/ScalarRepl/vector_memcpy.ll
new file mode 100644
index 00000000000..decbd301b8d
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/vector_memcpy.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -scalarrepl -S > %t
+; RUN: grep {ret <16 x float> %A} %t
+; RUN: grep {ret <16 x float> zeroinitializer} %t
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+
+define <16 x float> @foo(<16 x float> %A) nounwind {
+	%tmp = alloca <16 x float>, align 16
+	%tmp2 = alloca <16 x float>, align 16
+	store <16 x float> %A, <16 x float>* %tmp
+	%s = bitcast <16 x float>* %tmp to i8*
+	%s2 = bitcast <16 x float>* %tmp2 to i8*
+	call void @llvm.memcpy.i64(i8* %s2, i8* %s, i64 64, i32 16)
+	
+	%R = load <16 x float>* %tmp2
+	ret <16 x float> %R
+}
+
+define <16 x float> @foo2(<16 x float> %A) nounwind {
+	%tmp2 = alloca <16 x float>, align 16
+
+	%s2 = bitcast <16 x float>* %tmp2 to i8*
+	call void @llvm.memset.i64(i8* %s2, i8 0, i64 64, i32 16)
+	
+	%R = load <16 x float>* %tmp2
+	ret <16 x float> %R
+}
+
+
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
diff --git a/final/test/Transforms/ScalarRepl/vector_promote.ll b/final/test/Transforms/ScalarRepl/vector_promote.ll
new file mode 100644
index 00000000000..37cb49f539d
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/vector_promote.ll
@@ -0,0 +1,100 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+define void @test1(<4 x float>* %F, float %f) {
+entry:
+	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=3]
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp3, <4 x float>* %G
+	%G.upgrd.1 = getelementptr <4 x float>* %G, i32 0, i32 0		; <float*> [#uses=1]
+	store float %f, float* %G.upgrd.1
+	%tmp4 = load <4 x float>* %G		; <<4 x float>> [#uses=2]
+	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp6, <4 x float>* %F
+	ret void
+; CHECK: @test1
+; CHECK-NOT: alloca
+; CHECK: %tmp = load <4 x float>* %F
+; CHECK: fadd <4 x float> %tmp, %tmp
+; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 0
+}
+
+define void @test2(<4 x float>* %F, float %f) {
+entry:
+	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=3]
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp3, <4 x float>* %G
+	%tmp.upgrd.2 = getelementptr <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
+	store float %f, float* %tmp.upgrd.2
+	%tmp4 = load <4 x float>* %G		; <<4 x float>> [#uses=2]
+	%tmp6 = fadd <4 x float> %tmp4, %tmp4		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp6, <4 x float>* %F
+	ret void
+; CHECK: @test2
+; CHECK-NOT: alloca
+; CHECK: %tmp = load <4 x float>* %F
+; CHECK: fadd <4 x float> %tmp, %tmp
+; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 2
+}
+
+define void @test3(<4 x float>* %F, float* %f) {
+entry:
+	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=2]
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp3, <4 x float>* %G
+	%tmp.upgrd.3 = getelementptr <4 x float>* %G, i32 0, i32 2		; <float*> [#uses=1]
+	%tmp.upgrd.4 = load float* %tmp.upgrd.3		; <float> [#uses=1]
+	store float %tmp.upgrd.4, float* %f
+	ret void
+; CHECK: @test3
+; CHECK-NOT: alloca
+; CHECK: %tmp = load <4 x float>* %F
+; CHECK: fadd <4 x float> %tmp, %tmp
+; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 2
+}
+
+define void @test4(<4 x float>* %F, float* %f) {
+entry:
+	%G = alloca <4 x float>, align 16		; <<4 x float>*> [#uses=2]
+	%tmp = load <4 x float>* %F		; <<4 x float>> [#uses=2]
+	%tmp3 = fadd <4 x float> %tmp, %tmp		; <<4 x float>> [#uses=1]
+	store <4 x float> %tmp3, <4 x float>* %G
+	%G.upgrd.5 = getelementptr <4 x float>* %G, i32 0, i32 0		; <float*> [#uses=1]
+	%tmp.upgrd.6 = load float* %G.upgrd.5		; <float> [#uses=1]
+	store float %tmp.upgrd.6, float* %f
+	ret void
+; CHECK: @test4
+; CHECK-NOT: alloca
+; CHECK: %tmp = load <4 x float>* %F
+; CHECK: fadd <4 x float> %tmp, %tmp
+; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 0
+}
+
+define i32 @test5(float %X) {  ;; should turn into bitcast.
+	%X_addr = alloca [4 x float]
+        %X1 = getelementptr [4 x float]* %X_addr, i32 0, i32 2
+	store float %X, float* %X1
+	%a = bitcast float* %X1 to i32*
+	%tmp = load i32* %a
+	ret i32 %tmp
+; CHECK: @test5
+; CHECK-NEXT: bitcast float %X to i32
+; CHECK-NEXT: ret i32
+}
+
+
+define i64 @test6(<2 x float> %X) {
+	%X_addr = alloca <2 x float>
+        store <2 x float> %X, <2 x float>* %X_addr
+	%P = bitcast <2 x float>* %X_addr to i64*
+	%tmp = load i64* %P
+	ret i64 %tmp
+; CHECK: @test6
+; CHECK: bitcast <2 x float> %X to <1 x i64>
+; CHECK: ret i64
+}
+
diff --git a/final/test/Transforms/ScalarRepl/volatile.ll b/final/test/Transforms/ScalarRepl/volatile.ll
new file mode 100644
index 00000000000..3ff322e065c
--- /dev/null
+++ b/final/test/Transforms/ScalarRepl/volatile.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -scalarrepl -S | grep {volatile load}
+; RUN: opt < %s -scalarrepl -S | grep {volatile store}
+
+define i32 @voltest(i32 %T) {
+	%A = alloca {i32, i32}
+	%B = getelementptr {i32,i32}* %A, i32 0, i32 0
+	volatile store i32 %T, i32* %B
+
+	%C = getelementptr {i32,i32}* %A, i32 0, i32 1
+	%X = volatile load i32* %C
+	ret i32 %X
+}
diff --git a/final/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll b/final/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll
new file mode 100644
index 00000000000..414235ba7cd
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2002-05-05-EmptyBlockMerge.ll
@@ -0,0 +1,22 @@
+; Basic block #2 should not be merged into BB #3!
+;
+; RUN: opt < %s -simplifycfg -S | \
+; RUN:   grep {br label}
+;
+
+declare void @foo()
+
+define void @cprop_test12(i32* %data) {
+bb0:
+	%reg108 = load i32* %data		; <i32> [#uses=2]
+	%cond218 = icmp ne i32 %reg108, 5		; <i1> [#uses=1]
+	br i1 %cond218, label %bb3, label %bb2
+bb2:		; preds = %bb0
+	call void @foo( )
+	br label %bb3
+bb3:		; preds = %bb2, %bb0
+	%reg117 = phi i32 [ 110, %bb2 ], [ %reg108, %bb0 ]		; <i32> [#uses=1]
+	store i32 %reg117, i32* %data
+	ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/2002-05-21-PHIElimination.ll b/final/test/Transforms/SimplifyCFG/2002-05-21-PHIElimination.ll
new file mode 100644
index 00000000000..055386b3475
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2002-05-21-PHIElimination.ll
@@ -0,0 +1,19 @@
+; CFG Simplification is making a loop dead, then changing the add into:
+;
+;   %V1 = add int %V1, 1
+;
+; Which is not valid SSA
+;
+; RUN: opt < %s -simplifycfg | llvm-dis
+
+define void @test() {
+; <label>:0
+	br i1 true, label %end, label %Loop
+Loop:		; preds = %Loop, %0
+	%V = phi i32 [ 0, %0 ], [ %V1, %Loop ]		; <i32> [#uses=1]
+	%V1 = add i32 %V, 1		; <i32> [#uses=1]
+	br label %Loop
+end:		; preds = %0
+	ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll b/final/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll
new file mode 100644
index 00000000000..88f32bc0827
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2002-06-24-PHINode.ll
@@ -0,0 +1,14 @@
+; -simplifycfg is not folding blocks if there is a PHI node involved.  This 
+; should be fixed eventually
+
+; RUN: opt < %s -simplifycfg -S | not grep br
+
+define i32 @main(i32 %argc) {
+; <label>:0
+	br label %InlinedFunctionReturnNode
+InlinedFunctionReturnNode:		; preds = %0
+	%X = phi i32 [ 7, %0 ]		; <i32> [#uses=1]
+	%Y = add i32 %X, %argc		; <i32> [#uses=1]
+	ret i32 %Y
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/2002-09-24-PHIAssertion.ll b/final/test/Transforms/SimplifyCFG/2002-09-24-PHIAssertion.ll
new file mode 100644
index 00000000000..9a1206221fb
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2002-09-24-PHIAssertion.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -simplifycfg
+
+define i32 @test(i32 %A, i32 %B, i1 %cond) {
+J:
+	%C = add i32 %A, 12		; <i32> [#uses=3]
+	br i1 %cond, label %L, label %L
+L:		; preds = %J, %J
+	%Q = phi i32 [ %C, %J ], [ %C, %J ]		; <i32> [#uses=1]
+	%D = add i32 %C, %B		; <i32> [#uses=1]
+	%E = add i32 %Q, %D		; <i32> [#uses=1]
+	ret i32 %E
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/2003-03-07-DominateProblem.ll b/final/test/Transforms/SimplifyCFG/2003-03-07-DominateProblem.ll
new file mode 100644
index 00000000000..87620461994
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2003-03-07-DominateProblem.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -simplifycfg -disable-output
+
+define void @test(i32* %ldo, i1 %c, i1 %d) {
+bb9:
+	br i1 %c, label %bb11, label %bb10
+bb10:		; preds = %bb9
+	br label %bb11
+bb11:		; preds = %bb10, %bb9
+	%reg330 = phi i32* [ null, %bb10 ], [ %ldo, %bb9 ]		; <i32*> [#uses=1]
+	br label %bb20
+bb20:		; preds = %bb20, %bb11
+	store i32* %reg330, i32** null
+	br i1 %d, label %bb20, label %done
+done:		; preds = %bb20
+	ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/2003-08-05-InvokeCrash.ll b/final/test/Transforms/SimplifyCFG/2003-08-05-InvokeCrash.ll
new file mode 100644
index 00000000000..c019931c07e
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2003-08-05-InvokeCrash.ll
@@ -0,0 +1,13 @@
+; Do not remove the invoke!
+;
+; RUN: opt < %s -simplifycfg -disable-output
+
+define i32 @test() {
+	%A = invoke i32 @test( )
+			to label %Ret unwind label %Ret2		; <i32> [#uses=1]
+Ret:		; preds = %0
+	ret i32 %A
+Ret2:		; preds = %0
+	ret i32 undef
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/2003-08-05-MishandleInvoke.ll b/final/test/Transforms/SimplifyCFG/2003-08-05-MishandleInvoke.ll
new file mode 100644
index 00000000000..15cd7730449
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2003-08-05-MishandleInvoke.ll
@@ -0,0 +1,12 @@
+; Do not remove the invoke!
+;
+; RUN: opt < %s -simplifycfg -S | grep invoke
+
+define i32 @test() {
+	invoke i32 @test( )
+			to label %Ret unwind label %Ret		; <i32>:1 [#uses=0]
+Ret:		; preds = %0, %0
+	%A = add i32 0, 1		; <i32> [#uses=1]
+	ret i32 %A
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll b/final/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll
new file mode 100644
index 00000000000..8ac9ae44372
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2003-08-17-BranchFold.ll
@@ -0,0 +1,22 @@
+; This test checks to make sure that 'br X, Dest, Dest' is folded into 
+; 'br Dest'
+
+; RUN: opt < %s -simplifycfg -S | \
+; RUN:   not grep {br i1 %c2}
+
+declare void @noop()
+
+define i32 @test(i1 %c1, i1 %c2) {
+	call void @noop( )
+	br i1 %c1, label %A, label %Y
+A:		; preds = %0
+	call void @noop( )
+	br i1 %c2, label %X, label %X
+X:		; preds = %Y, %A, %A
+	call void @noop( )
+	ret i32 0
+Y:		; preds = %0
+	call void @noop( )
+	br label %X
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll b/final/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll
new file mode 100644
index 00000000000..888e187b6b4
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2003-08-17-BranchFoldOrdering.ll
@@ -0,0 +1,26 @@
+; This test checks to make sure that 'br X, Dest, Dest' is folded into 
+; 'br Dest'.  This can only happen after the 'Z' block is eliminated.  This is
+; due to the fact that the SimplifyCFG function does not use 
+; the ConstantFoldTerminator function.
+
+; RUN: opt < %s -simplifycfg -S | \
+; RUN:   not grep {br i1 %c2}
+
+declare void @noop()
+
+define i32 @test(i1 %c1, i1 %c2) {
+	call void @noop( )
+	br i1 %c1, label %A, label %Y
+A:		; preds = %0
+	call void @noop( )
+	br i1 %c2, label %Z, label %X
+Z:		; preds = %A
+	br label %X
+X:		; preds = %Y, %Z, %A
+	call void @noop( )
+	ret i32 0
+Y:		; preds = %0
+	call void @noop( )
+	br label %X
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll b/final/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll
new file mode 100644
index 00000000000..af59ba04f44
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch-dbg.ll
@@ -0,0 +1,58 @@
+; RUN: opt < %s -simplifycfg -S | \
+; RUN:   not grep switch
+
+
+        %llvm.dbg.anchor.type = type { i32, i32 }
+        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
+
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
+
+@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
+@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
+
+; Test folding all to same dest
+define i32 @test3(i1 %C) {
+        br i1 %C, label %Start, label %TheDest
+Start:          ; preds = %0
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        switch i32 3, label %TheDest [
+                 i32 0, label %TheDest
+                 i32 1, label %TheDest
+                 i32 2, label %TheDest
+                 i32 5, label %TheDest
+        ]
+TheDest:                ; preds = %Start, %Start, %Start, %Start, %Start, %0
+        ret i32 1234
+}
+
+; Test folding switch -> branch
+define i32 @test4(i32 %C) {
+        switch i32 %C, label %L1 [
+                 i32 0, label %L2
+        ]
+L1:             ; preds = %0
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        ret i32 0
+L2:             ; preds = %0
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        ret i32 1
+}
+
+; Can fold into a cond branch!
+define i32 @test5(i32 %C) {
+        switch i32 %C, label %L1 [
+                 i32 0, label %L2
+                 i32 123, label %L1
+        ]
+L1:             ; preds = %0, %0
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        ret i32 0
+L2:             ; preds = %0
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        ret i32 1
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll b/final/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll
new file mode 100644
index 00000000000..93f851c6f9e
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll
@@ -0,0 +1,80 @@
+; RUN: opt < %s -simplifycfg -S | \
+; RUN:   not grep switch
+
+; Test normal folding
+define i32 @test1() {
+        switch i32 5, label %Default [
+                 i32 0, label %Foo
+                 i32 1, label %Bar
+                 i32 2, label %Baz
+                 i32 5, label %TheDest
+        ]
+Default:                ; preds = %0
+        ret i32 -1
+Foo:            ; preds = %0
+        ret i32 -2
+Bar:            ; preds = %0
+        ret i32 -3
+Baz:            ; preds = %0
+        ret i32 -4
+TheDest:                ; preds = %0
+        ret i32 1234
+}
+
+; Test folding to default dest
+define i32 @test2() {
+        switch i32 3, label %Default [
+                 i32 0, label %Foo
+                 i32 1, label %Bar
+                 i32 2, label %Baz
+                 i32 5, label %TheDest
+        ]
+Default:                ; preds = %0
+        ret i32 1234
+Foo:            ; preds = %0
+        ret i32 -2
+Bar:            ; preds = %0
+        ret i32 -5
+Baz:            ; preds = %0
+        ret i32 -6
+TheDest:                ; preds = %0
+        ret i32 -8
+}
+
+; Test folding all to same dest
+define i32 @test3(i1 %C) {
+        br i1 %C, label %Start, label %TheDest
+Start:          ; preds = %0
+        switch i32 3, label %TheDest [
+                 i32 0, label %TheDest
+                 i32 1, label %TheDest
+                 i32 2, label %TheDest
+                 i32 5, label %TheDest
+        ]
+TheDest:                ; preds = %Start, %Start, %Start, %Start, %Start, %0
+        ret i32 1234
+}
+
+; Test folding switch -> branch
+define i32 @test4(i32 %C) {
+        switch i32 %C, label %L1 [
+                 i32 0, label %L2
+        ]
+L1:             ; preds = %0
+        ret i32 0
+L2:             ; preds = %0
+        ret i32 1
+}
+
+; Can fold into a cond branch!
+define i32 @test5(i32 %C) {
+        switch i32 %C, label %L1 [
+                 i32 0, label %L2
+                 i32 123, label %L1
+        ]
+L1:             ; preds = %0, %0
+        ret i32 0
+L2:             ; preds = %0
+        ret i32 1
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll b/final/test/Transforms/SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll
new file mode 100644
index 00000000000..fafe73b2b4e
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2004-12-10-SimplifyCFGCrash.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -simplifycfg -disable-output
+
+define void @symhash_add() {
+entry:
+	br i1 undef, label %then.0, label %UnifiedReturnBlock
+then.0:		; preds = %entry
+	br i1 undef, label %loopentry.2, label %loopentry.1.preheader
+loopentry.1.preheader:		; preds = %then.0
+	br label %loopentry.1.outer
+loopentry.1.outer:		; preds = %loopexit.1, %loopentry.1.preheader
+	br label %loopentry.1
+loopentry.1:		; preds = %endif.1, %then.4, %then.3, %then.1, %loopentry.1.outer
+	br i1 undef, label %loopexit.1, label %no_exit.1
+no_exit.1:		; preds = %loopentry.1
+	br i1 undef, label %then.1, label %else.0
+then.1:		; preds = %no_exit.1
+	br label %loopentry.1
+else.0:		; preds = %no_exit.1
+	br i1 undef, label %then.2, label %else.1
+then.2:		; preds = %else.0
+	br i1 undef, label %then.3, label %endif.1
+then.3:		; preds = %then.2
+	br label %loopentry.1
+else.1:		; preds = %else.0
+	br i1 undef, label %endif.1, label %then.4
+then.4:		; preds = %else.1
+	br label %loopentry.1
+endif.1:		; preds = %else.1, %then.2
+	br label %loopentry.1
+loopexit.1:		; preds = %loopentry.1
+	br i1 undef, label %loopentry.1.outer, label %loopentry.2
+loopentry.2:		; preds = %no_exit.2, %loopexit.1, %then.0
+	br i1 undef, label %loopexit.2, label %no_exit.2
+no_exit.2:		; preds = %loopentry.2
+	br label %loopentry.2
+loopexit.2:		; preds = %loopentry.2
+	ret void
+UnifiedReturnBlock:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll b/final/test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll
new file mode 100644
index 00000000000..90be6803a5c
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2005-06-16-PHICrash.ll
@@ -0,0 +1,95 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; PR584
+@g_38098584 = external global i32		; <i32*> [#uses=1]
+@g_60187400 = external global i32		; <i32*> [#uses=1]
+@g_59182229 = external global i32		; <i32*> [#uses=2]
+
+define i32 @_Z13func_26556482h(i8 %l_88173906) {
+entry:
+	%tmp.1 = bitcast i8 %l_88173906 to i8		; <i8> [#uses=2]
+	%tmp.3 = icmp eq i8 %l_88173906, 0		; <i1> [#uses=1]
+	br i1 %tmp.3, label %else.0, label %then.0
+then.0:		; preds = %entry
+	%tmp.5 = icmp eq i8 %l_88173906, 0		; <i1> [#uses=1]
+	br i1 %tmp.5, label %else.1, label %then.1
+then.1:		; preds = %then.0
+	br label %return
+else.1:		; preds = %then.0
+	br label %loopentry.0
+loopentry.0:		; preds = %no_exit.0, %else.1
+	%i.0.1 = phi i32 [ 0, %else.1 ], [ %inc.0, %no_exit.0 ]		; <i32> [#uses=2]
+	%tmp.9 = icmp sgt i32 %i.0.1, 99		; <i1> [#uses=1]
+	br i1 %tmp.9, label %endif.0, label %no_exit.0
+no_exit.0:		; preds = %loopentry.0
+	%inc.0 = add i32 %i.0.1, 1		; <i32> [#uses=1]
+	br label %loopentry.0
+else.0:		; preds = %entry
+	%tmp.12 = sext i8 %tmp.1 to i32		; <i32> [#uses=1]
+	br label %return
+endif.0:		; preds = %loopentry.0
+	%tmp.14 = sext i8 %tmp.1 to i32		; <i32> [#uses=1]
+	%tmp.16 = zext i8 %l_88173906 to i32		; <i32> [#uses=1]
+	%tmp.17 = icmp sgt i32 %tmp.14, %tmp.16		; <i1> [#uses=1]
+	%tmp.19 = load i32* @g_59182229		; <i32> [#uses=2]
+	br i1 %tmp.17, label %cond_true, label %cond_false
+cond_true:		; preds = %endif.0
+	%tmp.20 = icmp ne i32 %tmp.19, 1		; <i1> [#uses=1]
+	br label %cond_continue
+cond_false:		; preds = %endif.0
+	%tmp.22 = icmp ne i32 %tmp.19, 0		; <i1> [#uses=1]
+	br label %cond_continue
+cond_continue:		; preds = %cond_false, %cond_true
+	%mem_tmp.0 = phi i1 [ %tmp.20, %cond_true ], [ %tmp.22, %cond_false ]		; <i1> [#uses=1]
+	br i1 %mem_tmp.0, label %then.2, label %else.2
+then.2:		; preds = %cond_continue
+	%tmp.25 = zext i8 %l_88173906 to i32		; <i32> [#uses=1]
+	br label %return
+else.2:		; preds = %cond_continue
+	br label %loopentry.1
+loopentry.1:		; preds = %endif.3, %else.2
+	%i.1.1 = phi i32 [ 0, %else.2 ], [ %inc.3, %endif.3 ]		; <i32> [#uses=2]
+	%i.3.2 = phi i32 [ undef, %else.2 ], [ %i.3.0, %endif.3 ]		; <i32> [#uses=2]
+	%l_88173906_addr.1 = phi i8 [ %l_88173906, %else.2 ], [ %l_88173906_addr.0, %endif.3 ]		; <i8> [#uses=3]
+	%tmp.29 = icmp sgt i32 %i.1.1, 99		; <i1> [#uses=1]
+	br i1 %tmp.29, label %endif.2, label %no_exit.1
+no_exit.1:		; preds = %loopentry.1
+	%tmp.30 = load i32* @g_38098584		; <i32> [#uses=1]
+	%tmp.31 = icmp eq i32 %tmp.30, 0		; <i1> [#uses=1]
+	br i1 %tmp.31, label %else.3, label %then.3
+then.3:		; preds = %no_exit.1
+	br label %endif.3
+else.3:		; preds = %no_exit.1
+	br i1 false, label %else.4, label %then.4
+then.4:		; preds = %else.3
+	br label %endif.3
+else.4:		; preds = %else.3
+	br i1 false, label %else.5, label %then.5
+then.5:		; preds = %else.4
+	store i32 -1004318825, i32* @g_59182229
+	br label %return
+else.5:		; preds = %else.4
+	br label %loopentry.3
+loopentry.3:		; preds = %then.7, %else.5
+	%i.3.3 = phi i32 [ 0, %else.5 ], [ %inc.2, %then.7 ]		; <i32> [#uses=3]
+	%tmp.55 = icmp sgt i32 %i.3.3, 99		; <i1> [#uses=1]
+	br i1 %tmp.55, label %endif.3, label %no_exit.3
+no_exit.3:		; preds = %loopentry.3
+	%tmp.57 = icmp eq i8 %l_88173906_addr.1, 0		; <i1> [#uses=1]
+	br i1 %tmp.57, label %else.7, label %then.7
+then.7:		; preds = %no_exit.3
+	store i32 16239, i32* @g_60187400
+	%inc.2 = add i32 %i.3.3, 1		; <i32> [#uses=1]
+	br label %loopentry.3
+else.7:		; preds = %no_exit.3
+	br label %return
+endif.3:		; preds = %loopentry.3, %then.4, %then.3
+	%i.3.0 = phi i32 [ %i.3.2, %then.3 ], [ %i.3.2, %then.4 ], [ %i.3.3, %loopentry.3 ]		; <i32> [#uses=1]
+	%l_88173906_addr.0 = phi i8 [ 100, %then.3 ], [ %l_88173906_addr.1, %then.4 ], [ %l_88173906_addr.1, %loopentry.3 ]		; <i8> [#uses=1]
+	%inc.3 = add i32 %i.1.1, 1		; <i32> [#uses=1]
+	br label %loopentry.1
+endif.2:		; preds = %loopentry.1
+	br label %return
+return:		; preds = %endif.2, %else.7, %then.5, %then.2, %else.0, %then.1
+	%result.0 = phi i32 [ 1624650671, %then.1 ], [ %tmp.25, %then.2 ], [ 3379, %then.5 ], [ 52410, %else.7 ], [ -1526438411, %endif.2 ], [ %tmp.12, %else.0 ]		; <i32> [#uses=1]
+	ret i32 %result.0
+}
diff --git a/final/test/Transforms/SimplifyCFG/2005-08-01-PHIUpdateFail.ll b/final/test/Transforms/SimplifyCFG/2005-08-01-PHIUpdateFail.ll
new file mode 100644
index 00000000000..c30bfa1379c
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2005-08-01-PHIUpdateFail.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; END.
+
+define void @main() {
+entry:
+	%tmp.14.i19 = icmp eq i32 0, 2		; <i1> [#uses=1]
+	br i1 %tmp.14.i19, label %endif.1.i20, label %read_min.exit
+endif.1.i20:		; preds = %entry
+	%tmp.9.i.i = icmp eq i8* null, null		; <i1> [#uses=1]
+	br i1 %tmp.9.i.i, label %then.i12.i, label %then.i.i
+then.i.i:		; preds = %endif.1.i20
+	ret void
+then.i12.i:		; preds = %endif.1.i20
+	%tmp.9.i4.i = icmp eq i8* null, null		; <i1> [#uses=1]
+	br i1 %tmp.9.i4.i, label %endif.2.i33, label %then.i5.i
+then.i5.i:		; preds = %then.i12.i
+	ret void
+endif.2.i33:		; preds = %then.i12.i
+	br i1 false, label %loopexit.0.i40, label %no_exit.0.i35
+no_exit.0.i35:		; preds = %no_exit.0.i35, %endif.2.i33
+	%tmp.130.i = icmp slt i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp.130.i, label %loopexit.0.i40.loopexit, label %no_exit.0.i35
+loopexit.0.i40.loopexit:		; preds = %no_exit.0.i35
+	br label %loopexit.0.i40
+loopexit.0.i40:		; preds = %loopexit.0.i40.loopexit, %endif.2.i33
+	%tmp.341.i = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp.341.i, label %loopentry.1.i, label %read_min.exit
+loopentry.1.i:		; preds = %loopexit.0.i40
+	%tmp.347.i = icmp sgt i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp.347.i, label %no_exit.1.i41, label %loopexit.2.i44
+no_exit.1.i41:		; preds = %endif.5.i, %loopentry.1.i
+	%indvar.i42 = phi i32 [ %indvar.next.i, %endif.5.i ], [ 0, %loopentry.1.i ]		; <i32> [#uses=1]
+	%tmp.355.i = icmp eq i32 0, 3		; <i1> [#uses=1]
+	br i1 %tmp.355.i, label %endif.5.i, label %read_min.exit
+endif.5.i:		; preds = %no_exit.1.i41
+	%tmp.34773.i = icmp sgt i32 0, 0		; <i1> [#uses=1]
+	%indvar.next.i = add i32 %indvar.i42, 1		; <i32> [#uses=1]
+	br i1 %tmp.34773.i, label %no_exit.1.i41, label %loopexit.1.i.loopexit
+loopexit.1.i.loopexit:		; preds = %endif.5.i
+	ret void
+loopexit.2.i44:		; preds = %loopentry.1.i
+	ret void
+read_min.exit:		; preds = %no_exit.1.i41, %loopexit.0.i40, %entry
+	%tmp.23 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp.23, label %endif.1, label %then.1
+then.1:		; preds = %read_min.exit
+	br i1 false, label %endif.0.i, label %then.0.i
+then.0.i:		; preds = %then.1
+	br i1 false, label %endif.1.i, label %then.1.i
+endif.0.i:		; preds = %then.1
+	br i1 false, label %endif.1.i, label %then.1.i
+then.1.i:		; preds = %endif.0.i, %then.0.i
+	br i1 false, label %getfree.exit, label %then.2.i
+endif.1.i:		; preds = %endif.0.i, %then.0.i
+	br i1 false, label %getfree.exit, label %then.2.i
+then.2.i:		; preds = %endif.1.i, %then.1.i
+	ret void
+getfree.exit:		; preds = %endif.1.i, %then.1.i
+	ret void
+endif.1:		; preds = %read_min.exit
+	%tmp.27.i = getelementptr i32* null, i32 0		; <i32*> [#uses=0]
+	br i1 false, label %loopexit.0.i15, label %no_exit.0.i14
+no_exit.0.i14:		; preds = %endif.1
+	ret void
+loopexit.0.i15:		; preds = %endif.1
+	br i1 false, label %primal_start_artificial.exit, label %no_exit.1.i16
+no_exit.1.i16:		; preds = %no_exit.1.i16, %loopexit.0.i15
+	br i1 false, label %primal_start_artificial.exit, label %no_exit.1.i16
+primal_start_artificial.exit:		; preds = %no_exit.1.i16, %loopexit.0.i15
+	ret void
+}
diff --git a/final/test/Transforms/SimplifyCFG/2005-08-03-PHIFactorCrash.ll b/final/test/Transforms/SimplifyCFG/2005-08-03-PHIFactorCrash.ll
new file mode 100644
index 00000000000..477c9c9e3a0
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2005-08-03-PHIFactorCrash.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; END.
+
+	%arraytype.1.Char = type { i32, [0 x i8] }
+	%arraytype.4.Signed = type { i32, [0 x i32] }
+	%functiontype.23 = type %structtype.Task* (%structtype.Task*, %structtype.Packet*, %structtype.FailedRun*)
+	%functiontype.27 = type %structtype.object* ()
+	%functiontype.28 = type i1 (%structtype.object*, %structtype.object_vtable*)
+	%functiontype.39 = type i32 (%structtype.listiter*)
+	%opaquetype.RuntimeTypeInfo = type i8* (i8*)
+	%structtype.AssertionError_vtable = type { %structtype.FailedRun_vtable }
+	%structtype.DeviceTask = type { %structtype.Task }
+	%structtype.FailedRun = type { %structtype.object }
+	%structtype.FailedRun_vtable = type { %structtype.object_vtable }
+	%structtype.Packet = type { %structtype.object, %structtype.list.1*, i32, i32, i32, %structtype.Packet* }
+	%structtype.Task = type { %structtype.TaskState, %structtype.FailedRun*, i32, %structtype.Packet*, %structtype.Task*, i32 }
+	%structtype.TaskState = type { %structtype.object, i1, i1, i1 }
+	%structtype.list.1 = type { %arraytype.4.Signed* }
+	%structtype.listiter = type { %structtype.list.1*, i32 }
+	%structtype.object = type { %structtype.object_vtable* }
+	%structtype.object_vtable = type { %structtype.object_vtable*, %opaquetype.RuntimeTypeInfo*, %arraytype.1.Char*, %functiontype.27* }
+@structinstance.59 = external global %structtype.AssertionError_vtable		; <%structtype.AssertionError_vtable*> [#uses=0]
+
+declare fastcc i1 @ll_isinstance__objectPtr_object_vtablePtr()
+
+declare fastcc void @ll_listnext__listiterPtr()
+
+define fastcc void @WorkTask.fn() {
+block0:
+	br label %block1
+block1:		; preds = %block0
+	%v2542 = call fastcc i1 @ll_isinstance__objectPtr_object_vtablePtr( )		; <i1> [#uses=1]
+	br i1 %v2542, label %block4, label %block2
+block2:		; preds = %block1
+	br label %block3
+block3:		; preds = %block2
+	unwind
+block4:		; preds = %block1
+	br label %block5
+block5:		; preds = %block4
+	%v2565 = icmp eq %structtype.Packet* null, null		; <i1> [#uses=1]
+	br i1 %v2565, label %block15, label %block6
+block6:		; preds = %block5
+	%self_2575 = phi %structtype.DeviceTask* [ null, %block5 ]		; <%structtype.DeviceTask*> [#uses=1]
+	br i1 false, label %block14, label %block7
+block7:		; preds = %block14, %block6
+	%self_2635 = phi %structtype.DeviceTask* [ %self_2575, %block6 ], [ null, %block14 ]		; <%structtype.DeviceTask*> [#uses=1]
+	%tmp.124 = getelementptr %structtype.Packet* null, i32 0, i32 2		; <i32*> [#uses=0]
+	br label %block8
+block8:		; preds = %block10, %block7
+	%self_2672 = phi %structtype.DeviceTask* [ %self_2635, %block7 ], [ null, %block10 ]		; <%structtype.DeviceTask*> [#uses=0]
+	invoke fastcc void @ll_listnext__listiterPtr( )
+			to label %block9 unwind label %block8_exception_handling
+block8_exception_handling:		; preds = %block8
+	br i1 false, label %block8_exception_found_branchto_block12, label %block8_not_exception_structinstance.10
+block8_not_exception_structinstance.10:		; preds = %block8_exception_handling
+	unwind
+block8_exception_found_branchto_block12:		; preds = %block8_exception_handling
+	br label %block12
+block9:		; preds = %block8
+	br i1 false, label %block11, label %block10
+block10:		; preds = %block11, %block9
+	br label %block8
+block11:		; preds = %block9
+	br label %block10
+block12:		; preds = %block8_exception_found_branchto_block12
+	br label %block13
+block13:		; preds = %block15, %block12
+	ret void
+block14:		; preds = %block6
+	br label %block7
+block15:		; preds = %block5
+	%v2586 = phi %structtype.DeviceTask* [ null, %block5 ]		; <%structtype.DeviceTask*> [#uses=0]
+	br label %block13
+}
diff --git a/final/test/Transforms/SimplifyCFG/2005-10-02-InvokeSimplify.ll b/final/test/Transforms/SimplifyCFG/2005-10-02-InvokeSimplify.ll
new file mode 100644
index 00000000000..778aa3b1f7b
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2005-10-02-InvokeSimplify.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -simplifycfg -disable-output
+
+define i1 @foo() {
+	%X = invoke i1 @foo( )
+			to label %N unwind label %F		; <i1> [#uses=1]
+F:		; preds = %0
+	ret i1 false
+N:		; preds = %0
+	br i1 %X, label %A, label %B
+A:		; preds = %N
+	ret i1 true
+B:		; preds = %N
+	ret i1 true
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll b/final/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll
new file mode 100644
index 00000000000..760aa139bf7
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2005-12-03-IncorrectPHIFold.ll
@@ -0,0 +1,124 @@
+; Make sure this doesn't turn into an infinite loop
+
+; RUN: opt < %s -simplifycfg -constprop -simplifycfg |\
+; RUN:   llvm-dis | grep bb86
+; END.
+	
+%struct.anon = type { i32, i32, i32, i32, [1024 x i8] }
+@_zero_ = external global %struct.anon*		; <%struct.anon**> [#uses=2]
+@_one_ = external global %struct.anon*		; <%struct.anon**> [#uses=4]
+@str = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=0]
+
+declare i32 @bc_compare(%struct.anon*, %struct.anon*)
+
+declare void @free_num(%struct.anon**)
+
+declare %struct.anon* @copy_num(%struct.anon*)
+
+declare void @init_num(%struct.anon**)
+
+declare %struct.anon* @new_num(i32, i32)
+
+declare void @int2num(%struct.anon**, i32)
+
+declare void @bc_multiply(%struct.anon*, %struct.anon*, %struct.anon**, i32)
+
+declare void @bc_raise(%struct.anon*, %struct.anon*, %struct.anon**, i32)
+
+declare i32 @bc_divide(%struct.anon*, %struct.anon*, %struct.anon**, i32)
+
+declare void @bc_add(%struct.anon*, %struct.anon*, %struct.anon**)
+
+declare i32 @_do_compare(%struct.anon*, %struct.anon*, i32, i32)
+
+declare i32 @printf(i8*, ...)
+
+define i32 @bc_sqrt(%struct.anon** %num, i32 %scale) {
+entry:
+	%guess = alloca %struct.anon*		; <%struct.anon**> [#uses=7]
+	%guess1 = alloca %struct.anon*		; <%struct.anon**> [#uses=7]
+	%point5 = alloca %struct.anon*		; <%struct.anon**> [#uses=3]
+	%tmp = load %struct.anon** %num		; <%struct.anon*> [#uses=1]
+	%tmp1 = load %struct.anon** @_zero_		; <%struct.anon*> [#uses=1]
+	%tmp.upgrd.1 = call i32 @bc_compare( %struct.anon* %tmp, %struct.anon* %tmp1 )		; <i32> [#uses=2]
+	%tmp.upgrd.2 = icmp slt i32 %tmp.upgrd.1, 0		; <i1> [#uses=1]
+	br i1 %tmp.upgrd.2, label %cond_true, label %cond_false
+cond_true:		; preds = %entry
+	ret i32 0
+cond_false:		; preds = %entry
+	%tmp5 = icmp eq i32 %tmp.upgrd.1, 0		; <i1> [#uses=1]
+	br i1 %tmp5, label %cond_true6, label %cond_next13
+cond_true6:		; preds = %cond_false
+	call void @free_num( %struct.anon** %num )
+	%tmp8 = load %struct.anon** @_zero_		; <%struct.anon*> [#uses=1]
+	%tmp9 = call %struct.anon* @copy_num( %struct.anon* %tmp8 )		; <%struct.anon*> [#uses=1]
+	store %struct.anon* %tmp9, %struct.anon** %num
+	ret i32 1
+cond_next13:		; preds = %cond_false
+	%tmp15 = load %struct.anon** %num		; <%struct.anon*> [#uses=1]
+	%tmp16 = load %struct.anon** @_one_		; <%struct.anon*> [#uses=1]
+	%tmp17 = call i32 @bc_compare( %struct.anon* %tmp15, %struct.anon* %tmp16 )		; <i32> [#uses=2]
+	%tmp19 = icmp eq i32 %tmp17, 0		; <i1> [#uses=1]
+	br i1 %tmp19, label %cond_true20, label %cond_next27
+cond_true20:		; preds = %cond_next13
+	call void @free_num( %struct.anon** %num )
+	%tmp22 = load %struct.anon** @_one_		; <%struct.anon*> [#uses=1]
+	%tmp23 = call %struct.anon* @copy_num( %struct.anon* %tmp22 )		; <%struct.anon*> [#uses=1]
+	store %struct.anon* %tmp23, %struct.anon** %num
+	ret i32 1
+cond_next27:		; preds = %cond_next13
+	%tmp29 = load %struct.anon** %num		; <%struct.anon*> [#uses=1]
+	%tmp30 = getelementptr %struct.anon* %tmp29, i32 0, i32 2		; <i32*> [#uses=1]
+	%tmp31 = load i32* %tmp30		; <i32> [#uses=2]
+	%tmp33 = icmp sge i32 %tmp31, %scale		; <i1> [#uses=1]
+	%max = select i1 %tmp33, i32 %tmp31, i32 %scale		; <i32> [#uses=4]
+	%tmp35 = add i32 %max, 2		; <i32> [#uses=0]
+	call void @init_num( %struct.anon** %guess )
+	call void @init_num( %struct.anon** %guess1 )
+	%tmp36 = call %struct.anon* @new_num( i32 1, i32 1 )		; <%struct.anon*> [#uses=2]
+	store %struct.anon* %tmp36, %struct.anon** %point5
+	%tmp.upgrd.3 = getelementptr %struct.anon* %tmp36, i32 0, i32 4, i32 1		; <i8*> [#uses=1]
+	store i8 5, i8* %tmp.upgrd.3
+	%tmp39 = icmp slt i32 %tmp17, 0		; <i1> [#uses=1]
+	br i1 %tmp39, label %cond_true40, label %cond_false43
+cond_true40:		; preds = %cond_next27
+	%tmp41 = load %struct.anon** @_one_		; <%struct.anon*> [#uses=1]
+	%tmp42 = call %struct.anon* @copy_num( %struct.anon* %tmp41 )		; <%struct.anon*> [#uses=1]
+	store %struct.anon* %tmp42, %struct.anon** %guess
+	br label %bb80.outer
+cond_false43:		; preds = %cond_next27
+	call void @int2num( %struct.anon** %guess, i32 10 )
+	%tmp45 = load %struct.anon** %num		; <%struct.anon*> [#uses=1]
+	%tmp46 = getelementptr %struct.anon* %tmp45, i32 0, i32 1		; <i32*> [#uses=1]
+	%tmp47 = load i32* %tmp46		; <i32> [#uses=1]
+	call void @int2num( %struct.anon** %guess1, i32 %tmp47 )
+	%tmp48 = load %struct.anon** %guess1		; <%struct.anon*> [#uses=1]
+	%tmp49 = load %struct.anon** %point5		; <%struct.anon*> [#uses=1]
+	call void @bc_multiply( %struct.anon* %tmp48, %struct.anon* %tmp49, %struct.anon** %guess1, i32 %max )
+	%tmp51 = load %struct.anon** %guess1		; <%struct.anon*> [#uses=1]
+	%tmp52 = getelementptr %struct.anon* %tmp51, i32 0, i32 2		; <i32*> [#uses=1]
+	store i32 0, i32* %tmp52
+	%tmp53 = load %struct.anon** %guess		; <%struct.anon*> [#uses=1]
+	%tmp54 = load %struct.anon** %guess1		; <%struct.anon*> [#uses=1]
+	call void @bc_raise( %struct.anon* %tmp53, %struct.anon* %tmp54, %struct.anon** %guess, i32 %max )
+	br label %bb80.outer
+bb80.outer:		; preds = %cond_true83, %cond_false43, %cond_true40
+	%done.1.ph = phi i32 [ 1, %cond_true83 ], [ 0, %cond_true40 ], [ 0, %cond_false43 ]		; <i32> [#uses=1]
+	br label %bb80
+bb80:		; preds = %cond_true83, %bb80.outer
+	%tmp82 = icmp eq i32 %done.1.ph, 0		; <i1> [#uses=1]
+	br i1 %tmp82, label %cond_true83, label %bb86
+cond_true83:		; preds = %bb80
+	%tmp71 = call i32 @_do_compare( %struct.anon* null, %struct.anon* null, i32 0, i32 1 )		; <i32> [#uses=1]
+	%tmp76 = icmp eq i32 %tmp71, 0		; <i1> [#uses=1]
+	br i1 %tmp76, label %bb80.outer, label %bb80
+bb86:		; preds = %bb80
+	call void @free_num( %struct.anon** %num )
+	%tmp88 = load %struct.anon** %guess		; <%struct.anon*> [#uses=1]
+	%tmp89 = load %struct.anon** @_one_		; <%struct.anon*> [#uses=1]
+	%tmp92 = call i32 @bc_divide( %struct.anon* %tmp88, %struct.anon* %tmp89, %struct.anon** %num, i32 %max )		; <i32> [#uses=0]
+	call void @free_num( %struct.anon** %guess )
+	call void @free_num( %struct.anon** %guess1 )
+	call void @free_num( %struct.anon** %point5 )
+	ret i32 1
+}
diff --git a/final/test/Transforms/SimplifyCFG/2006-02-17-InfiniteUnroll.ll b/final/test/Transforms/SimplifyCFG/2006-02-17-InfiniteUnroll.ll
new file mode 100644
index 00000000000..32f49e66788
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2006-02-17-InfiniteUnroll.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -simplifycfg -disable-output
+
+define void @polnel_() {
+entry:
+	%tmp595 = icmp slt i32 0, 0		; <i1> [#uses=4]
+	br i1 %tmp595, label %bb148.critedge, label %cond_true40
+bb36:		; preds = %bb43
+	br i1 %tmp595, label %bb43, label %cond_true40
+cond_true40:		; preds = %bb46, %cond_true40, %bb36, %entry
+	%tmp397 = icmp sgt i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp397, label %bb43, label %cond_true40
+bb43:		; preds = %cond_true40, %bb36
+	br i1 false, label %bb53, label %bb36
+bb46:		; preds = %bb53
+	br i1 %tmp595, label %bb53, label %cond_true40
+bb53:		; preds = %bb46, %bb43
+	br i1 false, label %bb102, label %bb46
+bb92.preheader:		; preds = %bb102
+	ret void
+bb102:		; preds = %bb53
+	br i1 %tmp595, label %bb148, label %bb92.preheader
+bb148.critedge:		; preds = %entry
+	ret void
+bb148:		; preds = %bb102
+	ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll b/final/test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll
new file mode 100644
index 00000000000..21cfb2615ff
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2006-06-12-InfLoop.ll
@@ -0,0 +1,413 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; END.
+
+define void @main(i32 %c) {
+entry:
+	%tmp.9 = icmp eq i32 %c, 2		; <i1> [#uses=1]
+	br i1 %tmp.9, label %endif.0, label %then.0
+then.0:		; preds = %entry
+	ret void
+endif.0:		; preds = %entry
+	br i1 false, label %then.1, label %endif.1
+then.1:		; preds = %endif.0
+	ret void
+endif.1:		; preds = %endif.0
+	br i1 false, label %then.2, label %endif.2
+then.2:		; preds = %endif.1
+	ret void
+endif.2:		; preds = %endif.1
+	br i1 false, label %then.3, label %loopentry.0
+then.3:		; preds = %endif.2
+	ret void
+loopentry.0:		; preds = %endif.2
+	br i1 false, label %no_exit.0.preheader, label %loopexit.0
+no_exit.0.preheader:		; preds = %loopentry.0
+	br label %no_exit.0
+no_exit.0:		; preds = %endif.4, %no_exit.0.preheader
+	br i1 false, label %then.4, label %endif.4
+then.4:		; preds = %no_exit.0
+	ret void
+endif.4:		; preds = %no_exit.0
+	br i1 false, label %no_exit.0, label %loopexit.0.loopexit
+loopexit.0.loopexit:		; preds = %endif.4
+	br label %loopexit.0
+loopexit.0:		; preds = %loopexit.0.loopexit, %loopentry.0
+	br i1 false, label %then.5, label %loopentry.1
+then.5:		; preds = %loopexit.0
+	ret void
+loopentry.1:		; preds = %loopexit.0
+	%tmp.143 = icmp sgt i32 0, 0		; <i1> [#uses=4]
+	br i1 %tmp.143, label %no_exit.1.preheader, label %loopexit.1
+no_exit.1.preheader:		; preds = %loopentry.1
+	br label %no_exit.1
+no_exit.1:		; preds = %endif.6, %no_exit.1.preheader
+	br i1 false, label %then.6, label %shortcirc_next.3
+shortcirc_next.3:		; preds = %no_exit.1
+	br i1 false, label %then.6, label %shortcirc_next.4
+shortcirc_next.4:		; preds = %shortcirc_next.3
+	br i1 false, label %then.6, label %endif.6
+then.6:		; preds = %shortcirc_next.4, %shortcirc_next.3, %no_exit.1
+	ret void
+endif.6:		; preds = %shortcirc_next.4
+	br i1 false, label %no_exit.1, label %loopexit.1.loopexit
+loopexit.1.loopexit:		; preds = %endif.6
+	br label %loopexit.1
+loopexit.1:		; preds = %loopexit.1.loopexit, %loopentry.1
+	br i1 false, label %then.i, label %loopentry.0.i
+then.i:		; preds = %loopexit.1
+	ret void
+loopentry.0.i:		; preds = %loopexit.1
+	br i1 %tmp.143, label %no_exit.0.i.preheader, label %readvector.exit
+no_exit.0.i.preheader:		; preds = %loopentry.0.i
+	br label %no_exit.0.i
+no_exit.0.i:		; preds = %loopexit.1.i, %no_exit.0.i.preheader
+	br i1 false, label %no_exit.1.i.preheader, label %loopexit.1.i
+no_exit.1.i.preheader:		; preds = %no_exit.0.i
+	br label %no_exit.1.i
+no_exit.1.i:		; preds = %loopexit.2.i, %no_exit.1.i.preheader
+	br i1 false, label %no_exit.2.i.preheader, label %loopexit.2.i
+no_exit.2.i.preheader:		; preds = %no_exit.1.i
+	br label %no_exit.2.i
+no_exit.2.i:		; preds = %no_exit.2.i, %no_exit.2.i.preheader
+	br i1 false, label %no_exit.2.i, label %loopexit.2.i.loopexit
+loopexit.2.i.loopexit:		; preds = %no_exit.2.i
+	br label %loopexit.2.i
+loopexit.2.i:		; preds = %loopexit.2.i.loopexit, %no_exit.1.i
+	br i1 false, label %no_exit.1.i, label %loopexit.1.i.loopexit
+loopexit.1.i.loopexit:		; preds = %loopexit.2.i
+	br label %loopexit.1.i
+loopexit.1.i:		; preds = %loopexit.1.i.loopexit, %no_exit.0.i
+	br i1 false, label %no_exit.0.i, label %readvector.exit.loopexit
+readvector.exit.loopexit:		; preds = %loopexit.1.i
+	br label %readvector.exit
+readvector.exit:		; preds = %readvector.exit.loopexit, %loopentry.0.i
+	br i1 %tmp.143, label %loopentry.1.preheader.i, label %loopexit.0.i
+loopentry.1.preheader.i:		; preds = %readvector.exit
+	br label %loopentry.1.outer.i
+loopentry.1.outer.i:		; preds = %loopexit.1.i110, %loopentry.1.preheader.i
+	br label %loopentry.1.i85
+loopentry.1.i85.loopexit:		; preds = %hamming.exit16.i
+	br label %loopentry.1.i85
+loopentry.1.i85:		; preds = %loopentry.1.i85.loopexit, %loopentry.1.outer.i
+	br i1 false, label %no_exit.1.preheader.i, label %loopexit.1.i110.loopexit1
+no_exit.1.preheader.i:		; preds = %loopentry.1.i85
+	br label %no_exit.1.i87
+no_exit.1.i87:		; preds = %then.1.i107, %no_exit.1.preheader.i
+	br i1 false, label %no_exit.i.i101.preheader, label %hamming.exit.i104
+no_exit.i.i101.preheader:		; preds = %no_exit.1.i87
+	br label %no_exit.i.i101
+no_exit.i.i101:		; preds = %no_exit.i.i101, %no_exit.i.i101.preheader
+	br i1 false, label %no_exit.i.i101, label %hamming.exit.i104.loopexit
+hamming.exit.i104.loopexit:		; preds = %no_exit.i.i101
+	br label %hamming.exit.i104
+hamming.exit.i104:		; preds = %hamming.exit.i104.loopexit, %no_exit.1.i87
+	br i1 false, label %no_exit.i15.i.preheader, label %hamming.exit16.i
+no_exit.i15.i.preheader:		; preds = %hamming.exit.i104
+	br label %no_exit.i15.i
+no_exit.i15.i:		; preds = %no_exit.i15.i, %no_exit.i15.i.preheader
+	br i1 false, label %no_exit.i15.i, label %hamming.exit16.i.loopexit
+hamming.exit16.i.loopexit:		; preds = %no_exit.i15.i
+	br label %hamming.exit16.i
+hamming.exit16.i:		; preds = %hamming.exit16.i.loopexit, %hamming.exit.i104
+	br i1 false, label %loopentry.1.i85.loopexit, label %then.1.i107
+then.1.i107:		; preds = %hamming.exit16.i
+	br i1 false, label %no_exit.1.i87, label %loopexit.1.i110.loopexit
+loopexit.1.i110.loopexit:		; preds = %then.1.i107
+	br label %loopexit.1.i110
+loopexit.1.i110.loopexit1:		; preds = %loopentry.1.i85
+	br label %loopexit.1.i110
+loopexit.1.i110:		; preds = %loopexit.1.i110.loopexit1, %loopexit.1.i110.loopexit
+	br i1 false, label %loopentry.1.outer.i, label %loopexit.0.i.loopexit
+loopexit.0.i.loopexit:		; preds = %loopexit.1.i110
+	br label %loopexit.0.i
+loopexit.0.i:		; preds = %loopexit.0.i.loopexit, %readvector.exit
+	br i1 false, label %UnifiedReturnBlock.i113, label %then.2.i112
+then.2.i112:		; preds = %loopexit.0.i
+	br label %checkham.exit
+UnifiedReturnBlock.i113:		; preds = %loopexit.0.i
+	br label %checkham.exit
+checkham.exit:		; preds = %UnifiedReturnBlock.i113, %then.2.i112
+	br i1 false, label %loopentry.1.i14.preheader, label %loopentry.3.i.preheader
+loopentry.1.i14.preheader:		; preds = %checkham.exit
+	br label %loopentry.1.i14
+loopentry.1.i14:		; preds = %loopexit.1.i18, %loopentry.1.i14.preheader
+	br i1 false, label %no_exit.1.i16.preheader, label %loopexit.1.i18
+no_exit.1.i16.preheader:		; preds = %loopentry.1.i14
+	br label %no_exit.1.i16
+no_exit.1.i16:		; preds = %no_exit.1.i16, %no_exit.1.i16.preheader
+	br i1 false, label %no_exit.1.i16, label %loopexit.1.i18.loopexit
+loopexit.1.i18.loopexit:		; preds = %no_exit.1.i16
+	br label %loopexit.1.i18
+loopexit.1.i18:		; preds = %loopexit.1.i18.loopexit, %loopentry.1.i14
+	br i1 false, label %loopentry.1.i14, label %loopentry.3.i.loopexit
+loopentry.3.i.loopexit:		; preds = %loopexit.1.i18
+	br label %loopentry.3.i.preheader
+loopentry.3.i.preheader:		; preds = %loopentry.3.i.loopexit, %checkham.exit
+	br label %loopentry.3.i
+loopentry.3.i:		; preds = %endif.1.i, %loopentry.3.i.preheader
+	br i1 false, label %loopentry.4.i.preheader, label %endif.1.i
+loopentry.4.i.preheader:		; preds = %loopentry.3.i
+	br label %loopentry.4.i
+loopentry.4.i:		; preds = %loopexit.4.i, %loopentry.4.i.preheader
+	br i1 false, label %no_exit.4.i.preheader, label %loopexit.4.i
+no_exit.4.i.preheader:		; preds = %loopentry.4.i
+	br label %no_exit.4.i
+no_exit.4.i:		; preds = %no_exit.4.i.backedge, %no_exit.4.i.preheader
+	br i1 false, label %endif.0.i, label %else.i
+else.i:		; preds = %no_exit.4.i
+	br i1 false, label %no_exit.4.i.backedge, label %loopexit.4.i.loopexit
+no_exit.4.i.backedge:		; preds = %endif.0.i, %else.i
+	br label %no_exit.4.i
+endif.0.i:		; preds = %no_exit.4.i
+	br i1 false, label %no_exit.4.i.backedge, label %loopexit.4.i.loopexit
+loopexit.4.i.loopexit:		; preds = %endif.0.i, %else.i
+	br label %loopexit.4.i
+loopexit.4.i:		; preds = %loopexit.4.i.loopexit, %loopentry.4.i
+	br i1 false, label %loopentry.4.i, label %endif.1.i.loopexit
+endif.1.i.loopexit:		; preds = %loopexit.4.i
+	br label %endif.1.i
+endif.1.i:		; preds = %endif.1.i.loopexit, %loopentry.3.i
+	%exitcond = icmp eq i32 0, 10		; <i1> [#uses=1]
+	br i1 %exitcond, label %generateT.exit, label %loopentry.3.i
+generateT.exit:		; preds = %endif.1.i
+	br i1 false, label %then.0.i, label %loopentry.1.i30.preheader
+then.0.i:		; preds = %generateT.exit
+	ret void
+loopentry.1.i30.loopexit:		; preds = %loopexit.3.i
+	br label %loopentry.1.i30.backedge
+loopentry.1.i30.preheader:		; preds = %generateT.exit
+	br label %loopentry.1.i30
+loopentry.1.i30:		; preds = %loopentry.1.i30.backedge, %loopentry.1.i30.preheader
+	br i1 %tmp.143, label %no_exit.0.i31.preheader, label %loopentry.1.i30.backedge
+loopentry.1.i30.backedge:		; preds = %loopentry.1.i30, %loopentry.1.i30.loopexit
+	br label %loopentry.1.i30
+no_exit.0.i31.preheader:		; preds = %loopentry.1.i30
+	br label %no_exit.0.i31
+no_exit.0.i31:		; preds = %loopexit.3.i, %no_exit.0.i31.preheader
+	br i1 false, label %then.1.i, label %else.0.i
+then.1.i:		; preds = %no_exit.0.i31
+	br i1 undef, label %then.0.i29, label %loopentry.0.i31
+then.0.i29:		; preds = %then.1.i
+	unreachable
+loopentry.0.i31:		; preds = %then.1.i
+	br i1 false, label %no_exit.0.i38.preheader, label %loopentry.1.i.preheader
+no_exit.0.i38.preheader:		; preds = %loopentry.0.i31
+	br label %no_exit.0.i38
+no_exit.0.i38:		; preds = %no_exit.0.i38, %no_exit.0.i38.preheader
+	br i1 undef, label %no_exit.0.i38, label %loopentry.1.i.preheader.loopexit
+loopentry.1.i.preheader.loopexit:		; preds = %no_exit.0.i38
+	br label %loopentry.1.i.preheader
+loopentry.1.i.preheader:		; preds = %loopentry.1.i.preheader.loopexit, %loopentry.0.i31
+	br label %loopentry.1.i
+loopentry.1.i:		; preds = %endif.2.i, %loopentry.1.i.preheader
+	br i1 undef, label %loopentry.2.i39.preheader, label %loopexit.1.i79.loopexit2
+loopentry.2.i39.preheader:		; preds = %loopentry.1.i
+	br label %loopentry.2.i39
+loopentry.2.i39:		; preds = %loopexit.5.i77, %loopentry.2.i39.preheader
+	br i1 false, label %loopentry.3.i40.preheader, label %hamming.exit.i71
+loopentry.3.i40.preheader:		; preds = %loopentry.2.i39
+	br label %loopentry.3.i40
+loopentry.3.i40:		; preds = %loopexit.3.i51, %loopentry.3.i40.preheader
+	br i1 false, label %no_exit.3.preheader.i42, label %loopexit.3.i51
+no_exit.3.preheader.i42:		; preds = %loopentry.3.i40
+	br label %no_exit.3.i49
+no_exit.3.i49:		; preds = %no_exit.3.i49, %no_exit.3.preheader.i42
+	br i1 undef, label %no_exit.3.i49, label %loopexit.3.i51.loopexit
+loopexit.3.i51.loopexit:		; preds = %no_exit.3.i49
+	br label %loopexit.3.i51
+loopexit.3.i51:		; preds = %loopexit.3.i51.loopexit, %loopentry.3.i40
+	br i1 undef, label %loopentry.3.i40, label %loopentry.4.i52
+loopentry.4.i52:		; preds = %loopexit.3.i51
+	br i1 false, label %no_exit.4.i54.preheader, label %hamming.exit.i71
+no_exit.4.i54.preheader:		; preds = %loopentry.4.i52
+	br label %no_exit.4.i54
+no_exit.4.i54:		; preds = %no_exit.4.backedge.i, %no_exit.4.i54.preheader
+	br i1 undef, label %then.1.i55, label %endif.1.i56
+then.1.i55:		; preds = %no_exit.4.i54
+	br i1 undef, label %no_exit.4.backedge.i, label %loopexit.4.i57
+no_exit.4.backedge.i:		; preds = %endif.1.i56, %then.1.i55
+	br label %no_exit.4.i54
+endif.1.i56:		; preds = %no_exit.4.i54
+	br i1 undef, label %no_exit.4.backedge.i, label %loopexit.4.i57
+loopexit.4.i57:		; preds = %endif.1.i56, %then.1.i55
+	br i1 false, label %no_exit.i.i69.preheader, label %hamming.exit.i71
+no_exit.i.i69.preheader:		; preds = %loopexit.4.i57
+	br label %no_exit.i.i69
+no_exit.i.i69:		; preds = %no_exit.i.i69, %no_exit.i.i69.preheader
+	br i1 undef, label %no_exit.i.i69, label %hamming.exit.i71.loopexit
+hamming.exit.i71.loopexit:		; preds = %no_exit.i.i69
+	br label %hamming.exit.i71
+hamming.exit.i71:		; preds = %hamming.exit.i71.loopexit, %loopexit.4.i57, %loopentry.4.i52, %loopentry.2.i39
+	br i1 undef, label %endif.2.i, label %loopentry.5.i72
+loopentry.5.i72:		; preds = %hamming.exit.i71
+	br i1 false, label %shortcirc_next.i74.preheader, label %loopexit.5.i77
+shortcirc_next.i74.preheader:		; preds = %loopentry.5.i72
+	br label %shortcirc_next.i74
+shortcirc_next.i74:		; preds = %no_exit.5.i76, %shortcirc_next.i74.preheader
+	br i1 undef, label %no_exit.5.i76, label %loopexit.5.i77.loopexit
+no_exit.5.i76:		; preds = %shortcirc_next.i74
+	br i1 undef, label %shortcirc_next.i74, label %loopexit.5.i77.loopexit
+loopexit.5.i77.loopexit:		; preds = %no_exit.5.i76, %shortcirc_next.i74
+	br label %loopexit.5.i77
+loopexit.5.i77:		; preds = %loopexit.5.i77.loopexit, %loopentry.5.i72
+	br i1 undef, label %loopentry.2.i39, label %loopexit.1.i79.loopexit
+endif.2.i:		; preds = %hamming.exit.i71
+	br label %loopentry.1.i
+loopexit.1.i79.loopexit:		; preds = %loopexit.5.i77
+	br label %loopexit.1.i79
+loopexit.1.i79.loopexit2:		; preds = %loopentry.1.i
+	br label %loopexit.1.i79
+loopexit.1.i79:		; preds = %loopexit.1.i79.loopexit2, %loopexit.1.i79.loopexit
+	br i1 undef, label %then.3.i, label %loopentry.6.i80
+then.3.i:		; preds = %loopexit.1.i79
+	br i1 false, label %no_exit.6.i82.preheader, label %run.exit
+loopentry.6.i80:		; preds = %loopexit.1.i79
+	br i1 false, label %no_exit.6.i82.preheader, label %run.exit
+no_exit.6.i82.preheader:		; preds = %loopentry.6.i80, %then.3.i
+	br label %no_exit.6.i82
+no_exit.6.i82:		; preds = %no_exit.6.i82, %no_exit.6.i82.preheader
+	br i1 undef, label %no_exit.6.i82, label %run.exit.loopexit
+run.exit.loopexit:		; preds = %no_exit.6.i82
+	br label %run.exit
+run.exit:		; preds = %run.exit.loopexit, %loopentry.6.i80, %then.3.i
+	br i1 false, label %no_exit.1.i36.preheader, label %loopentry.3.i37
+else.0.i:		; preds = %no_exit.0.i31
+	br i1 false, label %then.0.i4, label %loopentry.0.i6
+then.0.i4:		; preds = %else.0.i
+	unreachable
+loopentry.0.i6:		; preds = %else.0.i
+	br i1 false, label %no_exit.0.i8.preheader, label %loopentry.2.i.preheader
+no_exit.0.i8.preheader:		; preds = %loopentry.0.i6
+	br label %no_exit.0.i8
+no_exit.0.i8:		; preds = %no_exit.0.i8, %no_exit.0.i8.preheader
+	br i1 false, label %no_exit.0.i8, label %loopentry.2.i.preheader.loopexit
+loopentry.2.i.preheader.loopexit:		; preds = %no_exit.0.i8
+	br label %loopentry.2.i.preheader
+loopentry.2.i.preheader:		; preds = %loopentry.2.i.preheader.loopexit, %loopentry.0.i6
+	br label %loopentry.2.i
+loopentry.2.i:		; preds = %endif.3.i19, %loopentry.2.i.preheader
+	br i1 false, label %loopentry.3.i10.preheader, label %loopentry.4.i15
+loopentry.3.i10.preheader:		; preds = %loopentry.2.i
+	br label %loopentry.3.i10
+loopentry.3.i10:		; preds = %loopexit.3.i14, %loopentry.3.i10.preheader
+	br i1 false, label %no_exit.3.preheader.i, label %loopexit.3.i14
+no_exit.3.preheader.i:		; preds = %loopentry.3.i10
+	br label %no_exit.3.i12
+no_exit.3.i12:		; preds = %no_exit.3.i12, %no_exit.3.preheader.i
+	br i1 false, label %no_exit.3.i12, label %loopexit.3.i14.loopexit
+loopexit.3.i14.loopexit:		; preds = %no_exit.3.i12
+	br label %loopexit.3.i14
+loopexit.3.i14:		; preds = %loopexit.3.i14.loopexit, %loopentry.3.i10
+	br i1 false, label %loopentry.3.i10, label %loopentry.4.i15.loopexit
+loopentry.4.i15.loopexit:		; preds = %loopexit.3.i14
+	br label %loopentry.4.i15
+loopentry.4.i15:		; preds = %loopentry.4.i15.loopexit, %loopentry.2.i
+	br i1 false, label %loopentry.5.outer.i.preheader, label %loopentry.7.i
+loopentry.5.outer.i.preheader:		; preds = %loopentry.4.i15
+	br label %loopentry.5.outer.i
+loopentry.5.outer.i:		; preds = %loopexit.5.i, %loopentry.5.outer.i.preheader
+	br label %loopentry.5.i
+loopentry.5.i:		; preds = %endif.1.i18, %loopentry.5.outer.i
+	br i1 false, label %no_exit.5.i.preheader, label %loopexit.5.i.loopexit3
+no_exit.5.i.preheader:		; preds = %loopentry.5.i
+	br label %no_exit.5.i
+no_exit.5.i:		; preds = %then.2.i, %no_exit.5.i.preheader
+	br i1 false, label %loopentry.6.i, label %endif.1.i18
+loopentry.6.i:		; preds = %no_exit.5.i
+	br i1 false, label %no_exit.6.preheader.i, label %loopexit.6.i
+no_exit.6.preheader.i:		; preds = %loopentry.6.i
+	br label %no_exit.6.i
+no_exit.6.i:		; preds = %no_exit.6.i, %no_exit.6.preheader.i
+	br i1 false, label %no_exit.6.i, label %loopexit.6.i.loopexit
+loopexit.6.i.loopexit:		; preds = %no_exit.6.i
+	br label %loopexit.6.i
+loopexit.6.i:		; preds = %loopexit.6.i.loopexit, %loopentry.6.i
+	br i1 false, label %then.2.i, label %endif.1.i18
+then.2.i:		; preds = %loopexit.6.i
+	br i1 false, label %no_exit.5.i, label %loopexit.5.i.loopexit
+endif.1.i18:		; preds = %loopexit.6.i, %no_exit.5.i
+	br label %loopentry.5.i
+loopexit.5.i.loopexit:		; preds = %then.2.i
+	br label %loopexit.5.i
+loopexit.5.i.loopexit3:		; preds = %loopentry.5.i
+	br label %loopexit.5.i
+loopexit.5.i:		; preds = %loopexit.5.i.loopexit3, %loopexit.5.i.loopexit
+	br i1 false, label %loopentry.5.outer.i, label %loopentry.7.i.loopexit
+loopentry.7.i.loopexit:		; preds = %loopexit.5.i
+	br label %loopentry.7.i
+loopentry.7.i:		; preds = %loopentry.7.i.loopexit, %loopentry.4.i15
+	br i1 false, label %no_exit.7.i.preheader, label %hamming.exit.i
+no_exit.7.i.preheader:		; preds = %loopentry.7.i
+	br label %no_exit.7.i
+no_exit.7.i:		; preds = %no_exit.7.i, %no_exit.7.i.preheader
+	br i1 false, label %no_exit.7.i, label %loopexit.7.i
+loopexit.7.i:		; preds = %no_exit.7.i
+	br i1 false, label %no_exit.i.i.preheader, label %hamming.exit.i
+no_exit.i.i.preheader:		; preds = %loopexit.7.i
+	br label %no_exit.i.i
+no_exit.i.i:		; preds = %no_exit.i.i, %no_exit.i.i.preheader
+	br i1 false, label %no_exit.i.i, label %hamming.exit.i.loopexit
+hamming.exit.i.loopexit:		; preds = %no_exit.i.i
+	br label %hamming.exit.i
+hamming.exit.i:		; preds = %hamming.exit.i.loopexit, %loopexit.7.i, %loopentry.7.i
+	br i1 false, label %endif.3.i19, label %loopentry.8.i
+loopentry.8.i:		; preds = %hamming.exit.i
+	br i1 false, label %shortcirc_next.i.preheader, label %loopexit.8.i
+shortcirc_next.i.preheader:		; preds = %loopentry.8.i
+	br label %shortcirc_next.i
+shortcirc_next.i:		; preds = %no_exit.8.i, %shortcirc_next.i.preheader
+	br i1 false, label %no_exit.8.i, label %loopexit.8.i.loopexit
+no_exit.8.i:		; preds = %shortcirc_next.i
+	br i1 false, label %shortcirc_next.i, label %loopexit.8.i.loopexit
+loopexit.8.i.loopexit:		; preds = %no_exit.8.i, %shortcirc_next.i
+	br label %loopexit.8.i
+loopexit.8.i:		; preds = %loopexit.8.i.loopexit, %loopentry.8.i
+	br i1 false, label %no_exit.9.i.preheader, label %endif.3.i19
+no_exit.9.i.preheader:		; preds = %loopexit.8.i
+	br label %no_exit.9.i
+no_exit.9.i:		; preds = %no_exit.9.i, %no_exit.9.i.preheader
+	br i1 false, label %no_exit.9.i, label %endif.3.i19.loopexit
+endif.3.i19.loopexit:		; preds = %no_exit.9.i
+	br label %endif.3.i19
+endif.3.i19:		; preds = %endif.3.i19.loopexit, %loopexit.8.i, %hamming.exit.i
+	br i1 false, label %loopentry.2.i, label %loopexit.1.i20
+loopexit.1.i20:		; preds = %endif.3.i19
+	br i1 false, label %then.4.i, label %UnifiedReturnBlock.i
+then.4.i:		; preds = %loopexit.1.i20
+	br label %runcont.exit
+UnifiedReturnBlock.i:		; preds = %loopexit.1.i20
+	br label %runcont.exit
+runcont.exit:		; preds = %UnifiedReturnBlock.i, %then.4.i
+	br i1 false, label %no_exit.1.i36.preheader, label %loopentry.3.i37
+no_exit.1.i36.preheader:		; preds = %runcont.exit, %run.exit
+	br label %no_exit.1.i36
+no_exit.1.i36:		; preds = %no_exit.1.i36, %no_exit.1.i36.preheader
+	br i1 false, label %no_exit.1.i36, label %loopentry.3.i37.loopexit
+loopentry.3.i37.loopexit:		; preds = %no_exit.1.i36
+	br label %loopentry.3.i37
+loopentry.3.i37:		; preds = %loopentry.3.i37.loopexit, %runcont.exit, %run.exit
+	br i1 false, label %loopentry.4.i38.preheader, label %loopexit.3.i
+loopentry.4.i38.preheader:		; preds = %loopentry.3.i37
+	br label %loopentry.4.i38
+loopentry.4.i38:		; preds = %loopexit.4.i42, %loopentry.4.i38.preheader
+	br i1 false, label %no_exit.3.i.preheader, label %loopexit.4.i42
+no_exit.3.i.preheader:		; preds = %loopentry.4.i38
+	br label %no_exit.3.i
+no_exit.3.i:		; preds = %no_exit.3.i.backedge, %no_exit.3.i.preheader
+	br i1 false, label %endif.3.i, label %else.1.i
+else.1.i:		; preds = %no_exit.3.i
+	br i1 false, label %no_exit.3.i.backedge, label %loopexit.4.i42.loopexit
+no_exit.3.i.backedge:		; preds = %endif.3.i, %else.1.i
+	br label %no_exit.3.i
+endif.3.i:		; preds = %no_exit.3.i
+	br i1 false, label %no_exit.3.i.backedge, label %loopexit.4.i42.loopexit
+loopexit.4.i42.loopexit:		; preds = %endif.3.i, %else.1.i
+	br label %loopexit.4.i42
+loopexit.4.i42:		; preds = %loopexit.4.i42.loopexit, %loopentry.4.i38
+	br i1 false, label %loopentry.4.i38, label %loopexit.3.i.loopexit
+loopexit.3.i.loopexit:		; preds = %loopexit.4.i42
+	br label %loopexit.3.i
+loopexit.3.i:		; preds = %loopexit.3.i.loopexit, %loopentry.3.i37
+	%tmp.13.i155 = icmp slt i32 0, 0		; <i1> [#uses=1]
+	br i1 %tmp.13.i155, label %no_exit.0.i31, label %loopentry.1.i30.loopexit
+}
diff --git a/final/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll b/final/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll
new file mode 100644
index 00000000000..2c84c937ae3
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2006-08-03-Crash.ll
@@ -0,0 +1,98 @@
+; RUN: opt < %s -gvn -simplifycfg \
+; RUN:   -disable-output
+; PR867
+; END.
+
+target datalayout = "E-p:32:32"
+target triple = "powerpc-apple-darwin8"
+	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.eh_status = type opaque
+	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
+	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
+	%struct.initial_value_struct = type opaque
+	%struct.lang_decl = type opaque
+	%struct.lang_type = type opaque
+	%struct.language_function = type opaque
+	%struct.location_t = type { i8*, i32 }
+	%struct.machine_function = type { i32, i32, i8*, i32, i32 }
+	%struct.rtunion = type { i32 }
+	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
+	%struct.rtx_def = type { i16, i8, i8, %struct.u }
+	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
+	%struct.temp_slot = type opaque
+	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %union.tree_ann_d*, i8, i8, i8, i8, i8 }
+	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+	%struct.tree_decl_u1 = type { i64 }
+	%struct.tree_decl_u2 = type { %struct.function* }
+	%struct.tree_node = type { %struct.tree_decl }
+	%struct.tree_type = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32, i16, i8, i8, i32, %struct.tree_node*, %struct.tree_node*, %struct.rtunion, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_type* }
+	%struct.u = type { [1 x i64] }
+	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+	%struct.varasm_status = type opaque
+	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }
+	%union.tree_ann_d = type opaque
+@mode_class = external global [35 x i8]		; <[35 x i8]*> [#uses=3]
+
+define void @fold_builtin_classify() {
+entry:
+	%tmp63 = load i32* null		; <i32> [#uses=1]
+	switch i32 %tmp63, label %bb276 [
+		 i32 414, label %bb145
+		 i32 417, label %bb
+	]
+bb:		; preds = %entry
+	ret void
+bb145:		; preds = %entry
+	%tmp146 = load %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
+	%tmp148 = getelementptr %struct.tree_node* %tmp146, i32 0, i32 0, i32 0, i32 1		; <%struct.tree_node**> [#uses=1]
+	%tmp149 = load %struct.tree_node** %tmp148		; <%struct.tree_node*> [#uses=1]
+	%tmp150 = bitcast %struct.tree_node* %tmp149 to %struct.tree_type*		; <%struct.tree_type*> [#uses=1]
+	%tmp151 = getelementptr %struct.tree_type* %tmp150, i32 0, i32 6		; <i16*> [#uses=1]
+	%tmp151.upgrd.1 = bitcast i16* %tmp151 to i32*		; <i32*> [#uses=1]
+	%tmp152 = load i32* %tmp151.upgrd.1		; <i32> [#uses=1]
+	%tmp154 = lshr i32 %tmp152, 16		; <i32> [#uses=1]
+	%tmp154.mask = and i32 %tmp154, 127		; <i32> [#uses=1]
+	%gep.upgrd.2 = zext i32 %tmp154.mask to i64		; <i64> [#uses=1]
+	%tmp155 = getelementptr [35 x i8]* @mode_class, i32 0, i64 %gep.upgrd.2		; <i8*> [#uses=1]
+	%tmp156 = load i8* %tmp155		; <i8> [#uses=1]
+	%tmp157 = icmp eq i8 %tmp156, 4		; <i1> [#uses=1]
+	br i1 %tmp157, label %cond_next241, label %cond_true158
+cond_true158:		; preds = %bb145
+	%tmp172 = load %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
+	%tmp174 = getelementptr %struct.tree_node* %tmp172, i32 0, i32 0, i32 0, i32 1		; <%struct.tree_node**> [#uses=1]
+	%tmp175 = load %struct.tree_node** %tmp174		; <%struct.tree_node*> [#uses=1]
+	%tmp176 = bitcast %struct.tree_node* %tmp175 to %struct.tree_type*		; <%struct.tree_type*> [#uses=1]
+	%tmp177 = getelementptr %struct.tree_type* %tmp176, i32 0, i32 6		; <i16*> [#uses=1]
+	%tmp177.upgrd.3 = bitcast i16* %tmp177 to i32*		; <i32*> [#uses=1]
+	%tmp178 = load i32* %tmp177.upgrd.3		; <i32> [#uses=1]
+	%tmp180 = lshr i32 %tmp178, 16		; <i32> [#uses=1]
+	%tmp180.mask = and i32 %tmp180, 127		; <i32> [#uses=1]
+	%gep.upgrd.4 = zext i32 %tmp180.mask to i64		; <i64> [#uses=1]
+	%tmp181 = getelementptr [35 x i8]* @mode_class, i32 0, i64 %gep.upgrd.4		; <i8*> [#uses=1]
+	%tmp182 = load i8* %tmp181		; <i8> [#uses=1]
+	%tmp183 = icmp eq i8 %tmp182, 8		; <i1> [#uses=1]
+	br i1 %tmp183, label %cond_next241, label %cond_true184
+cond_true184:		; preds = %cond_true158
+	%tmp185 = load %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
+	%tmp187 = getelementptr %struct.tree_node* %tmp185, i32 0, i32 0, i32 0, i32 1		; <%struct.tree_node**> [#uses=1]
+	%tmp188 = load %struct.tree_node** %tmp187		; <%struct.tree_node*> [#uses=1]
+	%tmp189 = bitcast %struct.tree_node* %tmp188 to %struct.tree_type*		; <%struct.tree_type*> [#uses=1]
+	%tmp190 = getelementptr %struct.tree_type* %tmp189, i32 0, i32 6		; <i16*> [#uses=1]
+	%tmp190.upgrd.5 = bitcast i16* %tmp190 to i32*		; <i32*> [#uses=1]
+	%tmp191 = load i32* %tmp190.upgrd.5		; <i32> [#uses=1]
+	%tmp193 = lshr i32 %tmp191, 16		; <i32> [#uses=1]
+	%tmp193.mask = and i32 %tmp193, 127		; <i32> [#uses=1]
+	%gep.upgrd.6 = zext i32 %tmp193.mask to i64		; <i64> [#uses=1]
+	%tmp194 = getelementptr [35 x i8]* @mode_class, i32 0, i64 %gep.upgrd.6		; <i8*> [#uses=1]
+	%tmp195 = load i8* %tmp194		; <i8> [#uses=1]
+	%tmp196 = icmp eq i8 %tmp195, 4		; <i1> [#uses=1]
+	br i1 %tmp196, label %cond_next241, label %cond_true197
+cond_true197:		; preds = %cond_true184
+	ret void
+cond_next241:		; preds = %cond_true184, %cond_true158, %bb145
+	%tmp245 = load i32* null		; <i32> [#uses=0]
+	ret void
+bb276:		; preds = %entry
+	ret void
+}
diff --git a/final/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll b/final/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll
new file mode 100644
index 00000000000..009d1c8cc4d
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2006-10-19-UncondDiv.ll
@@ -0,0 +1,28 @@
+; PR957
+; RUN: opt < %s -simplifycfg -S | \
+; RUN:   not grep select
+
+@G = extern_weak global i32
+
+define i32 @test(i32 %tmp) {
+cond_false179:
+	%tmp181 = icmp eq i32 %tmp, 0		; <i1> [#uses=1]
+	br i1 %tmp181, label %cond_true182, label %cond_next185
+cond_true182:		; preds = %cond_false179
+	br label %cond_next185
+cond_next185:		; preds = %cond_true182, %cond_false179
+	%d0.3 = phi i32 [ udiv (i32 1, i32 ptrtoint (i32* @G to i32)), %cond_true182 ], [ %tmp, %cond_false179 ]		; <i32> [#uses=1]
+	ret i32 %d0.3
+}
+
+define i32 @test2(i32 %tmp) {
+cond_false179:
+	%tmp181 = icmp eq i32 %tmp, 0		; <i1> [#uses=1]
+	br i1 %tmp181, label %cond_true182, label %cond_next185
+cond_true182:		; preds = %cond_false179
+	br label %cond_next185
+cond_next185:		; preds = %cond_true182, %cond_false179
+	%d0.3 = phi i32 [ udiv (i32 1, i32 ptrtoint (i32* @G to i32)), %cond_true182 ], [ %tmp, %cond_false179 ]		; <i32> [#uses=1]
+	call i32 @test( i32 4 )		; <i32>:0 [#uses=0]
+	ret i32 %d0.3
+}
diff --git a/final/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll b/final/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll
new file mode 100644
index 00000000000..dba41c9b111
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2006-10-29-InvokeCrash.ll
@@ -0,0 +1,555 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; END.
+	%struct..4._102 = type { %struct.QVectorData* }
+	%struct..5._125 = type { %struct.QMapData* }
+	%struct.QAbstractTextDocumentLayout = type { %struct.QObject }
+	%struct.QBasicAtomic = type { i32 }
+	%struct.QFont = type { %struct.QFontPrivate*, i32 }
+	%struct.QFontMetrics = type { %struct.QFontPrivate* }
+	%struct.QFontPrivate = type opaque
+	%"struct.QFragmentMap<QTextBlockData>" = type { %struct.QFragmentMapData }
+	%struct.QFragmentMapData = type { %"struct.QFragmentMapData::._154", i32 }
+	%"struct.QFragmentMapData::._154" = type { %"struct.QFragmentMapData::Header"* }
+	%"struct.QFragmentMapData::Header" = type { i32, i32, i32, i32, i32, i32, i32, i32 }
+	%"struct.QHash<uint,QHashDummyValue>" = type { %"struct.QHash<uint,QHashDummyValue>::._152" }
+	%"struct.QHash<uint,QHashDummyValue>::._152" = type { %struct.QHashData* }
+	%struct.QHashData = type { %"struct.QHashData::Node"*, %"struct.QHashData::Node"**, %struct.QBasicAtomic, i32, i32, i16, i16, i32, i8 }
+	%"struct.QHashData::Node" = type { %"struct.QHashData::Node"*, i32 }
+	%"struct.QList<QObject*>::._92" = type { %struct.QListData }
+	%"struct.QList<QPointer<QObject> >" = type { %"struct.QList<QObject*>::._92" }
+	%struct.QListData = type { %"struct.QListData::Data"* }
+	%"struct.QListData::Data" = type { %struct.QBasicAtomic, i32, i32, i32, i8, [1 x i8*] }
+	%"struct.QMap<QUrl,QVariant>" = type { %struct..5._125 }
+	%struct.QMapData = type { %"struct.QMapData::Node"*, [12 x %"struct.QMapData::Node"*], %struct.QBasicAtomic, i32, i32, i32, i8 }
+	%"struct.QMapData::Node" = type { %"struct.QMapData::Node"*, [1 x %"struct.QMapData::Node"*] }
+	%struct.QObject = type { i32 (...)**, %struct.QObjectData* }
+	%struct.QObjectData = type { i32 (...)**, %struct.QObject*, %struct.QObject*, %"struct.QList<QPointer<QObject> >", i8, [3 x i8], i32, i32 }
+	%struct.QObjectPrivate = type { %struct.QObjectData, i32, %struct.QObject*, %"struct.QList<QPointer<QObject> >", %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %struct.QString }
+	%struct.QPaintDevice = type { i32 (...)**, i16 }
+	%struct.QPainter = type { %struct.QPainterPrivate* }
+	%struct.QPainterPrivate = type opaque
+	%struct.QPointF = type { double, double }
+	%struct.QPrinter = type { %struct.QPaintDevice, %struct.QPrinterPrivate* }
+	%struct.QPrinterPrivate = type opaque
+	%struct.QRectF = type { double, double, double, double }
+	%"struct.QSet<uint>" = type { %"struct.QHash<uint,QHashDummyValue>" }
+	%"struct.QSharedDataPointer<QTextFormatPrivate>" = type { %struct.QTextFormatPrivate* }
+	%struct.QString = type { %"struct.QString::Data"* }
+	%"struct.QString::Data" = type { %struct.QBasicAtomic, i32, i32, i16*, i8, i8, [1 x i16] }
+	%struct.QTextBlockFormat = type { %struct.QTextFormat }
+	%struct.QTextBlockGroup = type { %struct.QAbstractTextDocumentLayout }
+	%struct.QTextDocumentConfig = type { %struct.QString }
+	%struct.QTextDocumentPrivate = type { %struct.QObjectPrivate, %struct.QString, %"struct.QVector<QAbstractTextDocumentLayout::Selection>", i1, i32, i32, i1, i32, i32, i32, i32, i1, %struct.QTextFormatCollection, %struct.QTextBlockGroup*, %struct.QAbstractTextDocumentLayout*, %"struct.QFragmentMap<QTextBlockData>", %"struct.QFragmentMap<QTextBlockData>", i32, %"struct.QList<QPointer<QObject> >", %"struct.QList<QPointer<QObject> >", %"struct.QMap<QUrl,QVariant>", %"struct.QMap<QUrl,QVariant>", %"struct.QMap<QUrl,QVariant>", %struct.QTextDocumentConfig, i1, i1, %struct.QPointF }
+	%struct.QTextFormat = type { %"struct.QSharedDataPointer<QTextFormatPrivate>", i32 }
+	%struct.QTextFormatCollection = type { %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %"struct.QSet<uint>", %struct.QFont }
+	%struct.QTextFormatPrivate = type opaque
+	%"struct.QVector<QAbstractTextDocumentLayout::Selection>" = type { %struct..4._102 }
+	%struct.QVectorData = type { %struct.QBasicAtomic, i32, i32, i8 }
+
+define void @_ZNK13QTextDocument5printEP8QPrinter(%struct.QAbstractTextDocumentLayout* %this, %struct.QPrinter* %printer) {
+entry:
+	%tmp = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=2]
+	%tmp.upgrd.1 = alloca %struct.QRectF, align 16		; <%struct.QRectF*> [#uses=5]
+	%tmp2 = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=3]
+	%tmp.upgrd.2 = alloca %struct.QFontMetrics, align 16		; <%struct.QFontMetrics*> [#uses=4]
+	%tmp.upgrd.3 = alloca %struct.QFont, align 16		; <%struct.QFont*> [#uses=4]
+	%tmp3 = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=2]
+	%p = alloca %struct.QPainter, align 16		; <%struct.QPainter*> [#uses=14]
+	%body = alloca %struct.QRectF, align 16		; <%struct.QRectF*> [#uses=9]
+	%pageNumberPos = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=4]
+	%scaledPageSize = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=6]
+	%printerPageSize = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=3]
+	%fmt = alloca %struct.QTextBlockFormat, align 16		; <%struct.QTextBlockFormat*> [#uses=5]
+	%font = alloca %struct.QFont, align 16		; <%struct.QFont*> [#uses=5]
+	%tmp.upgrd.4 = call %struct.QTextDocumentPrivate* @_ZNK13QTextDocument6d_funcEv( %struct.QAbstractTextDocumentLayout* %this )		; <%struct.QTextDocumentPrivate*> [#uses=5]
+	%tmp.upgrd.5 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
+	call void @_ZN8QPainterC1EP12QPaintDevice( %struct.QPainter* %p, %struct.QPaintDevice* %tmp.upgrd.5 )
+	%tmp.upgrd.6 = invoke i1 @_ZNK8QPainter8isActiveEv( %struct.QPainter* %p )
+			to label %invcont unwind label %cleanup329		; <i1> [#uses=1]
+invcont:		; preds = %entry
+	br i1 %tmp.upgrd.6, label %cond_next, label %cleanup328
+cond_next:		; preds = %invcont
+	%tmp8 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %this )
+			to label %invcont7 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=0]
+invcont7:		; preds = %cond_next
+	%tmp10 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
+	call void @_ZN7QPointFC1Edd( %struct.QPointF* %tmp, double 0.000000e+00, double 0.000000e+00 )
+	call void @_ZN6QRectFC1ERK7QPointFRK6QSizeF( %struct.QRectF* %body, %struct.QPointF* %tmp, %struct.QPointF* %tmp10 )
+	call void @_ZN7QPointFC1Ev( %struct.QPointF* %pageNumberPos )
+	%tmp12 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
+	%tmp13 = call i1 @_ZNK6QSizeF7isValidEv( %struct.QPointF* %tmp12 )		; <i1> [#uses=1]
+	br i1 %tmp13, label %cond_next15, label %bb
+cond_next15:		; preds = %invcont7
+	%tmp17 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
+	%tmp.upgrd.7 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %tmp17 )		; <double> [#uses=1]
+	%tmp18 = fcmp oeq double %tmp.upgrd.7, 0x41DFFFFFFFC00000		; <i1> [#uses=1]
+	br i1 %tmp18, label %bb, label %cond_next20
+cond_next20:		; preds = %cond_next15
+	br label %bb21
+bb:		; preds = %cond_next15, %invcont7
+	br label %bb21
+bb21:		; preds = %bb, %cond_next20
+	%iftmp.406.0 = phi i1 [ false, %bb ], [ true, %cond_next20 ]		; <i1> [#uses=1]
+	br i1 %iftmp.406.0, label %cond_true24, label %cond_false
+cond_true24:		; preds = %bb21
+	%tmp.upgrd.8 = invoke i32 @_Z13qt_defaultDpiv( )
+			to label %invcont25 unwind label %cleanup329		; <i32> [#uses=1]
+invcont25:		; preds = %cond_true24
+	%tmp26 = sitofp i32 %tmp.upgrd.8 to double		; <double> [#uses=2]
+	%tmp30 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %this )
+			to label %invcont29 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=1]
+invcont29:		; preds = %invcont25
+	%tmp32 = invoke %struct.QPaintDevice* @_ZNK27QAbstractTextDocumentLayout11paintDeviceEv( %struct.QAbstractTextDocumentLayout* %tmp30 )
+			to label %invcont31 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=3]
+invcont31:		; preds = %invcont29
+	%tmp34 = icmp eq %struct.QPaintDevice* %tmp32, null		; <i1> [#uses=1]
+	br i1 %tmp34, label %cond_next42, label %cond_true35
+cond_true35:		; preds = %invcont31
+	%tmp38 = invoke i32 @_ZNK12QPaintDevice11logicalDpiXEv( %struct.QPaintDevice* %tmp32 )
+			to label %invcont37 unwind label %cleanup329		; <i32> [#uses=1]
+invcont37:		; preds = %cond_true35
+	%tmp38.upgrd.9 = sitofp i32 %tmp38 to double		; <double> [#uses=1]
+	%tmp41 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp32 )
+			to label %invcont40 unwind label %cleanup329		; <i32> [#uses=1]
+invcont40:		; preds = %invcont37
+	%tmp41.upgrd.10 = sitofp i32 %tmp41 to double		; <double> [#uses=1]
+	br label %cond_next42
+cond_next42:		; preds = %invcont40, %invcont31
+	%sourceDpiY.2 = phi double [ %tmp41.upgrd.10, %invcont40 ], [ %tmp26, %invcont31 ]		; <double> [#uses=1]
+	%sourceDpiX.2 = phi double [ %tmp38.upgrd.9, %invcont40 ], [ %tmp26, %invcont31 ]		; <double> [#uses=1]
+	%tmp44 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
+	%tmp46 = invoke i32 @_ZNK12QPaintDevice11logicalDpiXEv( %struct.QPaintDevice* %tmp44 )
+			to label %invcont45 unwind label %cleanup329		; <i32> [#uses=1]
+invcont45:		; preds = %cond_next42
+	%tmp46.upgrd.11 = sitofp i32 %tmp46 to double		; <double> [#uses=1]
+	%tmp48 = fdiv double %tmp46.upgrd.11, %sourceDpiX.2		; <double> [#uses=2]
+	%tmp50 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
+	%tmp52 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp50 )
+			to label %invcont51 unwind label %cleanup329		; <i32> [#uses=1]
+invcont51:		; preds = %invcont45
+	%tmp52.upgrd.12 = sitofp i32 %tmp52 to double		; <double> [#uses=1]
+	%tmp54 = fdiv double %tmp52.upgrd.12, %sourceDpiY.2		; <double> [#uses=2]
+	invoke void @_ZN8QPainter5scaleEdd( %struct.QPainter* %p, double %tmp48, double %tmp54 )
+			to label %invcont57 unwind label %cleanup329
+invcont57:		; preds = %invcont51
+	%tmp.upgrd.13 = getelementptr %struct.QPointF* %scaledPageSize, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp60 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26, i32 0		; <double*> [#uses=1]
+	%tmp61 = load double* %tmp60		; <double> [#uses=1]
+	store double %tmp61, double* %tmp.upgrd.13
+	%tmp62 = getelementptr %struct.QPointF* %scaledPageSize, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp63 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26, i32 1		; <double*> [#uses=1]
+	%tmp64 = load double* %tmp63		; <double> [#uses=1]
+	store double %tmp64, double* %tmp62
+	%tmp65 = call double* @_ZN6QSizeF6rwidthEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
+	%tmp67 = load double* %tmp65		; <double> [#uses=1]
+	%tmp69 = fmul double %tmp67, %tmp48		; <double> [#uses=1]
+	store double %tmp69, double* %tmp65
+	%tmp71 = call double* @_ZN6QSizeF7rheightEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
+	%tmp73 = load double* %tmp71		; <double> [#uses=1]
+	%tmp75 = fmul double %tmp73, %tmp54		; <double> [#uses=1]
+	store double %tmp75, double* %tmp71
+	%tmp78 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
+	%tmp80 = invoke i32 @_ZNK12QPaintDevice6heightEv( %struct.QPaintDevice* %tmp78 )
+			to label %invcont79 unwind label %cleanup329		; <i32> [#uses=1]
+invcont79:		; preds = %invcont57
+	%tmp82 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
+	%tmp84 = invoke i32 @_ZNK12QPaintDevice5widthEv( %struct.QPaintDevice* %tmp82 )
+			to label %invcont83 unwind label %cleanup329		; <i32> [#uses=1]
+invcont83:		; preds = %invcont79
+	%tmp80.upgrd.14 = sitofp i32 %tmp80 to double		; <double> [#uses=1]
+	%tmp84.upgrd.15 = sitofp i32 %tmp84 to double		; <double> [#uses=1]
+	call void @_ZN6QSizeFC1Edd( %struct.QPointF* %printerPageSize, double %tmp84.upgrd.15, double %tmp80.upgrd.14 )
+	%tmp85 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %printerPageSize )		; <double> [#uses=1]
+	%tmp86 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %scaledPageSize )		; <double> [#uses=1]
+	%tmp87 = fdiv double %tmp85, %tmp86		; <double> [#uses=1]
+	%tmp88 = call double @_ZNK6QSizeF5widthEv( %struct.QPointF* %printerPageSize )		; <double> [#uses=1]
+	%tmp89 = call double @_ZNK6QSizeF5widthEv( %struct.QPointF* %scaledPageSize )		; <double> [#uses=1]
+	%tmp90 = fdiv double %tmp88, %tmp89		; <double> [#uses=1]
+	invoke void @_ZN8QPainter5scaleEdd( %struct.QPainter* %p, double %tmp90, double %tmp87 )
+			to label %cond_next194 unwind label %cleanup329
+cond_false:		; preds = %bb21
+	%tmp.upgrd.16 = getelementptr %struct.QAbstractTextDocumentLayout* %this, i32 0, i32 0		; <%struct.QObject*> [#uses=1]
+	%tmp95 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument5cloneEP7QObject( %struct.QAbstractTextDocumentLayout* %this, %struct.QObject* %tmp.upgrd.16 )
+			to label %invcont94 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=9]
+invcont94:		; preds = %cond_false
+	%tmp99 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %tmp95 )
+			to label %invcont98 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=1]
+invcont98:		; preds = %invcont94
+	%tmp101 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont100 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=1]
+invcont100:		; preds = %invcont98
+	invoke void @_ZN27QAbstractTextDocumentLayout14setPaintDeviceEP12QPaintDevice( %struct.QAbstractTextDocumentLayout* %tmp99, %struct.QPaintDevice* %tmp101 )
+			to label %invcont103 unwind label %cleanup329
+invcont103:		; preds = %invcont100
+	%tmp105 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont104 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=1]
+invcont104:		; preds = %invcont103
+	%tmp107 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp105 )
+			to label %invcont106 unwind label %cleanup329		; <i32> [#uses=1]
+invcont106:		; preds = %invcont104
+	%tmp108 = sitofp i32 %tmp107 to double		; <double> [#uses=1]
+	%tmp109 = fmul double %tmp108, 0x3FE93264C993264C		; <double> [#uses=1]
+	%tmp109.upgrd.17 = fptosi double %tmp109 to i32		; <i32> [#uses=3]
+	%tmp.upgrd.18 = call %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv( %struct.QAbstractTextDocumentLayout* %tmp95 )		; <%struct.QTextBlockGroup*> [#uses=1]
+	invoke void @_ZNK10QTextFrame11frameFormatEv( %struct.QTextBlockFormat* sret  %fmt, %struct.QTextBlockGroup* %tmp.upgrd.18 )
+			to label %invcont111 unwind label %cleanup329
+invcont111:		; preds = %invcont106
+	%tmp112 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
+	invoke void @_ZN16QTextFrameFormat9setMarginEd( %struct.QTextBlockFormat* %fmt, double %tmp112 )
+			to label %invcont114 unwind label %cleanup192
+invcont114:		; preds = %invcont111
+	%tmp116 = call %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv( %struct.QAbstractTextDocumentLayout* %tmp95 )		; <%struct.QTextBlockGroup*> [#uses=1]
+	invoke void @_ZN10QTextFrame14setFrameFormatERK16QTextFrameFormat( %struct.QTextBlockGroup* %tmp116, %struct.QTextBlockFormat* %fmt )
+			to label %invcont117 unwind label %cleanup192
+invcont117:		; preds = %invcont114
+	%tmp119 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont118 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
+invcont118:		; preds = %invcont117
+	%tmp121 = invoke i32 @_ZNK12QPaintDevice6heightEv( %struct.QPaintDevice* %tmp119 )
+			to label %invcont120 unwind label %cleanup192		; <i32> [#uses=1]
+invcont120:		; preds = %invcont118
+	%tmp121.upgrd.19 = sitofp i32 %tmp121 to double		; <double> [#uses=1]
+	%tmp123 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont122 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
+invcont122:		; preds = %invcont120
+	%tmp125 = invoke i32 @_ZNK12QPaintDevice5widthEv( %struct.QPaintDevice* %tmp123 )
+			to label %invcont124 unwind label %cleanup192		; <i32> [#uses=1]
+invcont124:		; preds = %invcont122
+	%tmp125.upgrd.20 = sitofp i32 %tmp125 to double		; <double> [#uses=1]
+	call void @_ZN6QRectFC1Edddd( %struct.QRectF* %tmp.upgrd.1, double 0.000000e+00, double 0.000000e+00, double %tmp125.upgrd.20, double %tmp121.upgrd.19 )
+	%tmp126 = getelementptr %struct.QRectF* %body, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp127 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp128 = load double* %tmp127		; <double> [#uses=1]
+	store double %tmp128, double* %tmp126
+	%tmp129 = getelementptr %struct.QRectF* %body, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp130 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp131 = load double* %tmp130		; <double> [#uses=1]
+	store double %tmp131, double* %tmp129
+	%tmp132 = getelementptr %struct.QRectF* %body, i32 0, i32 2		; <double*> [#uses=1]
+	%tmp133 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 2		; <double*> [#uses=1]
+	%tmp134 = load double* %tmp133		; <double> [#uses=1]
+	store double %tmp134, double* %tmp132
+	%tmp135 = getelementptr %struct.QRectF* %body, i32 0, i32 3		; <double*> [#uses=1]
+	%tmp136 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 3		; <double*> [#uses=1]
+	%tmp137 = load double* %tmp136		; <double> [#uses=1]
+	store double %tmp137, double* %tmp135
+	%tmp138 = call double @_ZNK6QRectF6heightEv( %struct.QRectF* %body )		; <double> [#uses=1]
+	%tmp139 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
+	%tmp140 = fsub double %tmp138, %tmp139		; <double> [#uses=1]
+	%tmp142 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont141 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
+invcont141:		; preds = %invcont124
+	invoke void @_ZNK13QTextDocument11defaultFontEv( %struct.QFont* sret  %tmp.upgrd.3, %struct.QAbstractTextDocumentLayout* %tmp95 )
+			to label %invcont144 unwind label %cleanup192
+invcont144:		; preds = %invcont141
+	invoke void @_ZN12QFontMetricsC1ERK5QFontP12QPaintDevice( %struct.QFontMetrics* %tmp.upgrd.2, %struct.QFont* %tmp.upgrd.3, %struct.QPaintDevice* %tmp142 )
+			to label %invcont146 unwind label %cleanup173
+invcont146:		; preds = %invcont144
+	%tmp149 = invoke i32 @_ZNK12QFontMetrics6ascentEv( %struct.QFontMetrics* %tmp.upgrd.2 )
+			to label %invcont148 unwind label %cleanup168		; <i32> [#uses=1]
+invcont148:		; preds = %invcont146
+	%tmp149.upgrd.21 = sitofp i32 %tmp149 to double		; <double> [#uses=1]
+	%tmp150 = fadd double %tmp140, %tmp149.upgrd.21		; <double> [#uses=1]
+	%tmp152 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont151 unwind label %cleanup168		; <%struct.QPaintDevice*> [#uses=1]
+invcont151:		; preds = %invcont148
+	%tmp154 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp152 )
+			to label %invcont153 unwind label %cleanup168		; <i32> [#uses=1]
+invcont153:		; preds = %invcont151
+	%tmp155 = mul i32 %tmp154, 5		; <i32> [#uses=1]
+	%tmp156 = sdiv i32 %tmp155, 72		; <i32> [#uses=1]
+	%tmp156.upgrd.22 = sitofp i32 %tmp156 to double		; <double> [#uses=1]
+	%tmp157 = fadd double %tmp150, %tmp156.upgrd.22		; <double> [#uses=1]
+	%tmp158 = call double @_ZNK6QRectF5widthEv( %struct.QRectF* %body )		; <double> [#uses=1]
+	%tmp159 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
+	%tmp160 = fsub double %tmp158, %tmp159		; <double> [#uses=1]
+	call void @_ZN7QPointFC1Edd( %struct.QPointF* %tmp2, double %tmp160, double %tmp157 )
+	%tmp161 = getelementptr %struct.QPointF* %pageNumberPos, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp162 = getelementptr %struct.QPointF* %tmp2, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp163 = load double* %tmp162		; <double> [#uses=1]
+	store double %tmp163, double* %tmp161
+	%tmp164 = getelementptr %struct.QPointF* %pageNumberPos, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp165 = getelementptr %struct.QPointF* %tmp2, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp166 = load double* %tmp165		; <double> [#uses=1]
+	store double %tmp166, double* %tmp164
+	invoke void @_ZN12QFontMetricsD1Ev( %struct.QFontMetrics* %tmp.upgrd.2 )
+			to label %cleanup171 unwind label %cleanup173
+cleanup168:		; preds = %invcont151, %invcont148, %invcont146
+	invoke void @_ZN12QFontMetricsD1Ev( %struct.QFontMetrics* %tmp.upgrd.2 )
+			to label %cleanup173 unwind label %cleanup173
+cleanup171:		; preds = %invcont153
+	invoke void @_ZN5QFontD1Ev( %struct.QFont* %tmp.upgrd.3 )
+			to label %finally170 unwind label %cleanup192
+cleanup173:		; preds = %cleanup168, %cleanup168, %invcont153, %invcont144
+	invoke void @_ZN5QFontD1Ev( %struct.QFont* %tmp.upgrd.3 )
+			to label %cleanup192 unwind label %cleanup192
+finally170:		; preds = %cleanup171
+	invoke void @_ZNK13QTextDocument11defaultFontEv( %struct.QFont* sret  %font, %struct.QAbstractTextDocumentLayout* %tmp95 )
+			to label %invcont177 unwind label %cleanup192
+invcont177:		; preds = %finally170
+	invoke void @_ZN5QFont12setPointSizeEi( %struct.QFont* %font, i32 10 )
+			to label %invcont179 unwind label %cleanup187
+invcont179:		; preds = %invcont177
+	invoke void @_ZN13QTextDocument14setDefaultFontERK5QFont( %struct.QAbstractTextDocumentLayout* %tmp95, %struct.QFont* %font )
+			to label %invcont181 unwind label %cleanup187
+invcont181:		; preds = %invcont179
+	call void @_ZNK6QRectF4sizeEv( %struct.QPointF* sret  %tmp3, %struct.QRectF* %body )
+	invoke void @_ZN13QTextDocument11setPageSizeERK6QSizeF( %struct.QAbstractTextDocumentLayout* %tmp95, %struct.QPointF* %tmp3 )
+			to label %cleanup185 unwind label %cleanup187
+cleanup185:		; preds = %invcont181
+	invoke void @_ZN5QFontD1Ev( %struct.QFont* %font )
+			to label %cleanup190 unwind label %cleanup192
+cleanup187:		; preds = %invcont181, %invcont179, %invcont177
+	invoke void @_ZN5QFontD1Ev( %struct.QFont* %font )
+			to label %cleanup192 unwind label %cleanup192
+cleanup190:		; preds = %cleanup185
+	invoke void @_ZN16QTextFrameFormatD1Ev( %struct.QTextBlockFormat* %fmt )
+			to label %cond_next194 unwind label %cleanup329
+cleanup192:		; preds = %cleanup187, %cleanup187, %cleanup185, %finally170, %cleanup173, %cleanup173, %cleanup171, %invcont141, %invcont124, %invcont122, %invcont120, %invcont118, %invcont117, %invcont114, %invcont111
+	invoke void @_ZN16QTextFrameFormatD1Ev( %struct.QTextBlockFormat* %fmt )
+			to label %cleanup329 unwind label %cleanup329
+cond_next194:		; preds = %cleanup190, %invcont83
+	%clonedDoc.1 = phi %struct.QAbstractTextDocumentLayout* [ null, %invcont83 ], [ %tmp95, %cleanup190 ]		; <%struct.QAbstractTextDocumentLayout*> [#uses=3]
+	%doc.1 = phi %struct.QAbstractTextDocumentLayout* [ %this, %invcont83 ], [ %tmp95, %cleanup190 ]		; <%struct.QAbstractTextDocumentLayout*> [#uses=2]
+	%tmp197 = invoke i1 @_ZNK8QPrinter13collateCopiesEv( %struct.QPrinter* %printer )
+			to label %invcont196 unwind label %cleanup329		; <i1> [#uses=1]
+invcont196:		; preds = %cond_next194
+	br i1 %tmp197, label %cond_true200, label %cond_false204
+cond_true200:		; preds = %invcont196
+	%tmp203 = invoke i32 @_ZNK8QPrinter9numCopiesEv( %struct.QPrinter* %printer )
+			to label %invcont202 unwind label %cleanup329		; <i32> [#uses=1]
+invcont202:		; preds = %cond_true200
+	br label %cond_next208
+cond_false204:		; preds = %invcont196
+	%tmp207 = invoke i32 @_ZNK8QPrinter9numCopiesEv( %struct.QPrinter* %printer )
+			to label %invcont206 unwind label %cleanup329		; <i32> [#uses=1]
+invcont206:		; preds = %cond_false204
+	br label %cond_next208
+cond_next208:		; preds = %invcont206, %invcont202
+	%pageCopies.0 = phi i32 [ %tmp203, %invcont202 ], [ 1, %invcont206 ]		; <i32> [#uses=2]
+	%docCopies.0 = phi i32 [ 1, %invcont202 ], [ %tmp207, %invcont206 ]		; <i32> [#uses=2]
+	%tmp211 = invoke i32 @_ZNK8QPrinter8fromPageEv( %struct.QPrinter* %printer )
+			to label %invcont210 unwind label %cleanup329		; <i32> [#uses=3]
+invcont210:		; preds = %cond_next208
+	%tmp214 = invoke i32 @_ZNK8QPrinter6toPageEv( %struct.QPrinter* %printer )
+			to label %invcont213 unwind label %cleanup329		; <i32> [#uses=3]
+invcont213:		; preds = %invcont210
+	%tmp216 = icmp eq i32 %tmp211, 0		; <i1> [#uses=1]
+	br i1 %tmp216, label %cond_true217, label %cond_next225
+cond_true217:		; preds = %invcont213
+	%tmp219 = icmp eq i32 %tmp214, 0		; <i1> [#uses=1]
+	br i1 %tmp219, label %cond_true220, label %cond_next225
+cond_true220:		; preds = %cond_true217
+	%tmp223 = invoke i32 @_ZNK13QTextDocument9pageCountEv( %struct.QAbstractTextDocumentLayout* %doc.1 )
+			to label %invcont222 unwind label %cleanup329		; <i32> [#uses=1]
+invcont222:		; preds = %cond_true220
+	br label %cond_next225
+cond_next225:		; preds = %invcont222, %cond_true217, %invcont213
+	%toPage.1 = phi i32 [ %tmp223, %invcont222 ], [ %tmp214, %cond_true217 ], [ %tmp214, %invcont213 ]		; <i32> [#uses=2]
+	%fromPage.1 = phi i32 [ 1, %invcont222 ], [ %tmp211, %cond_true217 ], [ %tmp211, %invcont213 ]		; <i32> [#uses=2]
+	%tmp.page = invoke i32 @_ZNK8QPrinter9pageOrderEv( %struct.QPrinter* %printer )
+			to label %invcont227 unwind label %cleanup329		; <i32> [#uses=1]
+invcont227:		; preds = %cond_next225
+	%tmp228 = icmp eq i32 %tmp.page, 1		; <i1> [#uses=1]
+	br i1 %tmp228, label %cond_true230, label %cond_next234
+cond_true230:		; preds = %invcont227
+	br label %cond_next234
+cond_next234:		; preds = %cond_true230, %invcont227
+	%ascending.1 = phi i1 [ false, %cond_true230 ], [ true, %invcont227 ]		; <i1> [#uses=1]
+	%toPage.2 = phi i32 [ %fromPage.1, %cond_true230 ], [ %toPage.1, %invcont227 ]		; <i32> [#uses=1]
+	%fromPage.2 = phi i32 [ %toPage.1, %cond_true230 ], [ %fromPage.1, %invcont227 ]		; <i32> [#uses=1]
+	br label %bb309
+bb237:		; preds = %cond_true313, %cond_next293
+	%iftmp.410.4 = phi i1 [ %iftmp.410.5, %cond_true313 ], [ %iftmp.410.1, %cond_next293 ]		; <i1> [#uses=1]
+	%page.4 = phi i32 [ %fromPage.2, %cond_true313 ], [ %page.3, %cond_next293 ]		; <i32> [#uses=4]
+	br label %bb273
+invcont240:		; preds = %cond_true277
+	%tmp242 = icmp eq i32 %tmp241, 2		; <i1> [#uses=1]
+	br i1 %tmp242, label %bb252, label %cond_next244
+cond_next244:		; preds = %invcont240
+	%tmp247 = invoke i32 @_ZNK8QPrinter12printerStateEv( %struct.QPrinter* %printer )
+			to label %invcont246 unwind label %cleanup329		; <i32> [#uses=1]
+invcont246:		; preds = %cond_next244
+	%tmp248 = icmp eq i32 %tmp247, 3		; <i1> [#uses=1]
+	br i1 %tmp248, label %bb252, label %bb253
+bb252:		; preds = %invcont246, %invcont240
+	br label %bb254
+bb253:		; preds = %invcont246
+	br label %bb254
+bb254:		; preds = %bb253, %bb252
+	%iftmp.410.0 = phi i1 [ true, %bb252 ], [ false, %bb253 ]		; <i1> [#uses=2]
+	br i1 %iftmp.410.0, label %UserCanceled, label %cond_next258
+cond_next258:		; preds = %bb254
+	invoke fastcc void @_Z9printPageiP8QPainterPK13QTextDocumentRK6QRectFRK7QPointF( i32 %page.4, %struct.QPainter* %p, %struct.QAbstractTextDocumentLayout* %doc.1, %struct.QRectF* %body, %struct.QPointF* %pageNumberPos )
+			to label %invcont261 unwind label %cleanup329
+invcont261:		; preds = %cond_next258
+	%tmp263 = add i32 %pageCopies.0, -1		; <i32> [#uses=1]
+	%tmp265 = icmp sgt i32 %tmp263, %j.4		; <i1> [#uses=1]
+	br i1 %tmp265, label %cond_true266, label %cond_next270
+cond_true266:		; preds = %invcont261
+	%tmp269 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
+			to label %cond_next270 unwind label %cleanup329		; <i1> [#uses=0]
+cond_next270:		; preds = %cond_true266, %invcont261
+	%tmp272 = add i32 %j.4, 1		; <i32> [#uses=1]
+	br label %bb273
+bb273:		; preds = %cond_next270, %bb237
+	%iftmp.410.1 = phi i1 [ %iftmp.410.4, %bb237 ], [ %iftmp.410.0, %cond_next270 ]		; <i1> [#uses=2]
+	%j.4 = phi i32 [ 0, %bb237 ], [ %tmp272, %cond_next270 ]		; <i32> [#uses=3]
+	%tmp276 = icmp slt i32 %j.4, %pageCopies.0		; <i1> [#uses=1]
+	br i1 %tmp276, label %cond_true277, label %bb280
+cond_true277:		; preds = %bb273
+	%tmp241 = invoke i32 @_ZNK8QPrinter12printerStateEv( %struct.QPrinter* %printer )
+			to label %invcont240 unwind label %cleanup329		; <i32> [#uses=1]
+bb280:		; preds = %bb273
+	%tmp283 = icmp eq i32 %page.4, %toPage.2		; <i1> [#uses=1]
+	br i1 %tmp283, label %bb297, label %cond_next285
+cond_next285:		; preds = %bb280
+	br i1 %ascending.1, label %cond_true287, label %cond_false290
+cond_true287:		; preds = %cond_next285
+	%tmp289 = add i32 %page.4, 1		; <i32> [#uses=1]
+	br label %cond_next293
+cond_false290:		; preds = %cond_next285
+	%tmp292 = add i32 %page.4, -1		; <i32> [#uses=1]
+	br label %cond_next293
+cond_next293:		; preds = %cond_false290, %cond_true287
+	%page.3 = phi i32 [ %tmp289, %cond_true287 ], [ %tmp292, %cond_false290 ]		; <i32> [#uses=1]
+	%tmp296 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
+			to label %bb237 unwind label %cleanup329		; <i1> [#uses=0]
+bb297:		; preds = %bb280
+	%tmp299 = add i32 %docCopies.0, -1		; <i32> [#uses=1]
+	%tmp301 = icmp sgt i32 %tmp299, %i.1		; <i1> [#uses=1]
+	br i1 %tmp301, label %cond_true302, label %cond_next306
+cond_true302:		; preds = %bb297
+	%tmp305 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
+			to label %cond_next306 unwind label %cleanup329		; <i1> [#uses=0]
+cond_next306:		; preds = %cond_true302, %bb297
+	%tmp308 = add i32 %i.1, 1		; <i32> [#uses=1]
+	br label %bb309
+bb309:		; preds = %cond_next306, %cond_next234
+	%iftmp.410.5 = phi i1 [ undef, %cond_next234 ], [ %iftmp.410.1, %cond_next306 ]		; <i1> [#uses=1]
+	%i.1 = phi i32 [ 0, %cond_next234 ], [ %tmp308, %cond_next306 ]		; <i32> [#uses=3]
+	%tmp312 = icmp slt i32 %i.1, %docCopies.0		; <i1> [#uses=1]
+	br i1 %tmp312, label %cond_true313, label %UserCanceled
+cond_true313:		; preds = %bb309
+	br label %bb237
+UserCanceled:		; preds = %bb309, %bb254
+	%tmp318 = icmp eq %struct.QAbstractTextDocumentLayout* %clonedDoc.1, null		; <i1> [#uses=1]
+	br i1 %tmp318, label %cleanup327, label %cond_true319
+cond_true319:		; preds = %UserCanceled
+	%tmp.upgrd.23 = getelementptr %struct.QAbstractTextDocumentLayout* %clonedDoc.1, i32 0, i32 0, i32 0		; <i32 (...)***> [#uses=1]
+	%tmp.upgrd.24 = load i32 (...)*** %tmp.upgrd.23		; <i32 (...)**> [#uses=1]
+	%tmp322 = getelementptr i32 (...)** %tmp.upgrd.24, i32 4		; <i32 (...)**> [#uses=1]
+	%tmp.upgrd.25 = load i32 (...)** %tmp322		; <i32 (...)*> [#uses=1]
+	%tmp.upgrd.26 = bitcast i32 (...)* %tmp.upgrd.25 to void (%struct.QAbstractTextDocumentLayout*)*		; <void (%struct.QAbstractTextDocumentLayout*)*> [#uses=1]
+	invoke void %tmp.upgrd.26( %struct.QAbstractTextDocumentLayout* %clonedDoc.1 )
+			to label %cleanup327 unwind label %cleanup329
+cleanup327:		; preds = %cond_true319, %UserCanceled
+	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
+	ret void
+cleanup328:		; preds = %invcont
+	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
+	ret void
+cleanup329:		; preds = %cond_true319, %cond_true302, %cond_next293, %cond_true277, %cond_true266, %cond_next258, %cond_next244, %cond_next225, %cond_true220, %invcont210, %cond_next208, %cond_false204, %cond_true200, %cond_next194, %cleanup192, %cleanup192, %cleanup190, %invcont106, %invcont104, %invcont103, %invcont100, %invcont98, %invcont94, %cond_false, %invcont83, %invcont79, %invcont57, %invcont51, %invcont45, %cond_next42, %invcont37, %cond_true35, %invcont29, %invcont25, %cond_true24, %cond_next, %entry
+	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
+	unwind
+}
+
+declare void @_ZN6QSizeFC1Edd(%struct.QPointF*, double, double)
+
+declare i1 @_ZNK6QSizeF7isValidEv(%struct.QPointF*)
+
+declare double @_ZNK6QSizeF5widthEv(%struct.QPointF*)
+
+declare double @_ZNK6QSizeF6heightEv(%struct.QPointF*)
+
+declare double* @_ZN6QSizeF6rwidthEv(%struct.QPointF*)
+
+declare double* @_ZN6QSizeF7rheightEv(%struct.QPointF*)
+
+declare %struct.QTextDocumentPrivate* @_ZNK13QTextDocument6d_funcEv(%struct.QAbstractTextDocumentLayout*)
+
+declare void @_ZN7QPointFC1Ev(%struct.QPointF*)
+
+declare void @_ZN7QPointFC1Edd(%struct.QPointF*, double, double)
+
+declare void @_ZN16QTextFrameFormat9setMarginEd(%struct.QTextBlockFormat*, double)
+
+declare void @_ZN6QRectFC1Edddd(%struct.QRectF*, double, double, double, double)
+
+declare void @_ZN6QRectFC1ERK7QPointFRK6QSizeF(%struct.QRectF*, %struct.QPointF*, %struct.QPointF*)
+
+declare double @_ZNK6QRectF5widthEv(%struct.QRectF*)
+
+declare double @_ZNK6QRectF6heightEv(%struct.QRectF*)
+
+declare void @_ZNK6QRectF4sizeEv(%struct.QPointF*, %struct.QRectF*)
+
+declare void @_ZN16QTextFrameFormatD1Ev(%struct.QTextBlockFormat*)
+
+declare void @_ZNK10QTextFrame11frameFormatEv(%struct.QTextBlockFormat*, %struct.QTextBlockGroup*)
+
+declare void @_ZN10QTextFrame14setFrameFormatERK16QTextFrameFormat(%struct.QTextBlockGroup*, %struct.QTextBlockFormat*)
+
+declare i32 @_ZNK12QPaintDevice5widthEv(%struct.QPaintDevice*)
+
+declare i32 @_ZNK12QPaintDevice6heightEv(%struct.QPaintDevice*)
+
+declare i32 @_ZNK12QPaintDevice11logicalDpiXEv(%struct.QPaintDevice*)
+
+declare i32 @_ZNK12QPaintDevice11logicalDpiYEv(%struct.QPaintDevice*)
+
+declare %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument5cloneEP7QObject(%struct.QAbstractTextDocumentLayout*, %struct.QObject*)
+
+declare void @_ZN5QFontD1Ev(%struct.QFont*)
+
+declare %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv(%struct.QAbstractTextDocumentLayout*)
+
+declare %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv(%struct.QAbstractTextDocumentLayout*)
+
+declare i32 @_ZNK13QTextDocument9pageCountEv(%struct.QAbstractTextDocumentLayout*)
+
+declare void @_ZNK13QTextDocument11defaultFontEv(%struct.QFont*, %struct.QAbstractTextDocumentLayout*)
+
+declare void @_ZN13QTextDocument14setDefaultFontERK5QFont(%struct.QAbstractTextDocumentLayout*, %struct.QFont*)
+
+declare void @_ZN13QTextDocument11setPageSizeERK6QSizeF(%struct.QAbstractTextDocumentLayout*, %struct.QPointF*)
+
+declare void @_Z9printPageiP8QPainterPK13QTextDocumentRK6QRectFRK7QPointF(i32, %struct.QPainter*, %struct.QAbstractTextDocumentLayout*, %struct.QRectF*, %struct.QPointF*)
+
+declare void @_ZN12QFontMetricsD1Ev(%struct.QFontMetrics*)
+
+declare void @_ZN8QPainterC1EP12QPaintDevice(%struct.QPainter*, %struct.QPaintDevice*)
+
+declare i1 @_ZNK8QPainter8isActiveEv(%struct.QPainter*)
+
+declare i32 @_Z13qt_defaultDpiv()
+
+declare %struct.QPaintDevice* @_ZNK27QAbstractTextDocumentLayout11paintDeviceEv(%struct.QAbstractTextDocumentLayout*)
+
+declare void @_ZN8QPainter5scaleEdd(%struct.QPainter*, double, double)
+
+declare %struct.QPaintDevice* @_ZNK8QPainter6deviceEv(%struct.QPainter*)
+
+declare void @_ZN27QAbstractTextDocumentLayout14setPaintDeviceEP12QPaintDevice(%struct.QAbstractTextDocumentLayout*, %struct.QPaintDevice*)
+
+declare void @_ZN12QFontMetricsC1ERK5QFontP12QPaintDevice(%struct.QFontMetrics*, %struct.QFont*, %struct.QPaintDevice*)
+
+declare i32 @_ZNK12QFontMetrics6ascentEv(%struct.QFontMetrics*)
+
+declare void @_ZN5QFont12setPointSizeEi(%struct.QFont*, i32)
+
+declare i1 @_ZNK8QPrinter13collateCopiesEv(%struct.QPrinter*)
+
+declare i32 @_ZNK8QPrinter9numCopiesEv(%struct.QPrinter*)
+
+declare i32 @_ZNK8QPrinter8fromPageEv(%struct.QPrinter*)
+
+declare i32 @_ZNK8QPrinter6toPageEv(%struct.QPrinter*)
+
+declare i32 @_ZNK8QPrinter9pageOrderEv(%struct.QPrinter*)
+
+declare i32 @_ZNK8QPrinter12printerStateEv(%struct.QPrinter*)
+
+declare i1 @_ZN8QPrinter7newPageEv(%struct.QPrinter*)
+
+declare void @_ZN8QPainterD1Ev(%struct.QPainter*)
diff --git a/final/test/Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll b/final/test/Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll
new file mode 100644
index 00000000000..af865ce7fac
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2006-12-08-Ptr-ICmp-Branch.ll
@@ -0,0 +1,131 @@
+; RUN: opt < %s -simplifycfg | llvm-dis
+; END.
+
+; ModuleID = '2006-12-08-Ptr-ICmp-Branch.ll'
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-linux-gnu"
+	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+	%struct.charsequence = type { i8*, i32, i32 }
+	%struct.trie_s = type { [26 x %struct.trie_s*], i32 }
+@str = external global [14 x i8]		; <[14 x i8]*> [#uses=0]
+@str.upgrd.1 = external global [32 x i8]		; <[32 x i8]*> [#uses=0]
+@str.upgrd.2 = external global [12 x i8]		; <[12 x i8]*> [#uses=0]
+@C.0.2294 = external global %struct.charsequence		; <%struct.charsequence*> [#uses=3]
+@t = external global %struct.trie_s*		; <%struct.trie_s**> [#uses=0]
+@str.upgrd.3 = external global [3 x i8]		; <[3 x i8]*> [#uses=0]
+@str.upgrd.4 = external global [26 x i8]		; <[26 x i8]*> [#uses=0]
+
+declare void @charsequence_reset(%struct.charsequence*)
+
+declare void @free(i8*)
+
+declare void @charsequence_push(%struct.charsequence*, i8)
+
+declare i8* @charsequence_val(%struct.charsequence*)
+
+declare i32 @_IO_getc(%struct.FILE*)
+
+declare i32 @tolower(i32)
+
+declare %struct.trie_s* @trie_insert(%struct.trie_s*, i8*)
+
+declare i32 @feof(%struct.FILE*)
+
+define void @addfile(%struct.trie_s* %t, %struct.FILE* %f) {
+entry:
+	%t_addr = alloca %struct.trie_s*		; <%struct.trie_s**> [#uses=2]
+	%f_addr = alloca %struct.FILE*		; <%struct.FILE**> [#uses=3]
+	%c = alloca i8, align 1		; <i8*> [#uses=7]
+	%wstate = alloca i32, align 4		; <i32*> [#uses=4]
+	%cs = alloca %struct.charsequence, align 16		; <%struct.charsequence*> [#uses=7]
+	%str = alloca i8*, align 4		; <i8**> [#uses=3]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store %struct.trie_s* %t, %struct.trie_s** %t_addr
+	store %struct.FILE* %f, %struct.FILE** %f_addr
+	store i32 0, i32* %wstate
+	%tmp = getelementptr %struct.charsequence* %cs, i64 0, i32 0		; <i8**> [#uses=1]
+	%tmp1 = getelementptr %struct.charsequence* @C.0.2294, i64 0, i32 0		; <i8**> [#uses=1]
+	%tmp.upgrd.5 = load i8** %tmp1		; <i8*> [#uses=1]
+	store i8* %tmp.upgrd.5, i8** %tmp
+	%tmp.upgrd.6 = getelementptr %struct.charsequence* %cs, i64 0, i32 1		; <i32*> [#uses=1]
+	%tmp2 = getelementptr %struct.charsequence* @C.0.2294, i64 0, i32 1		; <i32*> [#uses=1]
+	%tmp.upgrd.7 = load i32* %tmp2		; <i32> [#uses=1]
+	store i32 %tmp.upgrd.7, i32* %tmp.upgrd.6
+	%tmp3 = getelementptr %struct.charsequence* %cs, i64 0, i32 2		; <i32*> [#uses=1]
+	%tmp4 = getelementptr %struct.charsequence* @C.0.2294, i64 0, i32 2		; <i32*> [#uses=1]
+	%tmp5 = load i32* %tmp4		; <i32> [#uses=1]
+	store i32 %tmp5, i32* %tmp3
+	br label %bb33
+bb:		; preds = %bb33
+	%tmp.upgrd.8 = load %struct.FILE** %f_addr		; <%struct.FILE*> [#uses=1]
+	%tmp.upgrd.9 = call i32 @_IO_getc( %struct.FILE* %tmp.upgrd.8 )		; <i32> [#uses=1]
+	%tmp6 = call i32 @tolower( i32 %tmp.upgrd.9 )		; <i32> [#uses=1]
+	%tmp6.upgrd.10 = trunc i32 %tmp6 to i8		; <i8> [#uses=1]
+	store i8 %tmp6.upgrd.10, i8* %c
+	%tmp7 = load i32* %wstate		; <i32> [#uses=1]
+	%tmp.upgrd.11 = icmp ne i32 %tmp7, 0		; <i1> [#uses=1]
+	br i1 %tmp.upgrd.11, label %cond_true, label %cond_false
+cond_true:		; preds = %bb
+	%tmp.upgrd.12 = load i8* %c		; <i8> [#uses=1]
+	%tmp8 = icmp sle i8 %tmp.upgrd.12, 96		; <i1> [#uses=1]
+	br i1 %tmp8, label %cond_true9, label %cond_next
+cond_true9:		; preds = %cond_true
+	br label %bb16
+cond_next:		; preds = %cond_true
+	%tmp10 = load i8* %c		; <i8> [#uses=1]
+	%tmp11 = icmp sgt i8 %tmp10, 122		; <i1> [#uses=1]
+	br i1 %tmp11, label %cond_true12, label %cond_next13
+cond_true12:		; preds = %cond_next
+	br label %bb16
+cond_next13:		; preds = %cond_next
+	%tmp14 = load i8* %c		; <i8> [#uses=1]
+	%tmp14.upgrd.13 = sext i8 %tmp14 to i32		; <i32> [#uses=1]
+	%tmp1415 = trunc i32 %tmp14.upgrd.13 to i8		; <i8> [#uses=1]
+	call void @charsequence_push( %struct.charsequence* %cs, i8 %tmp1415 )
+	br label %bb21
+bb16:		; preds = %cond_true12, %cond_true9
+	%tmp17 = call i8* @charsequence_val( %struct.charsequence* %cs )		; <i8*> [#uses=1]
+	store i8* %tmp17, i8** %str
+	%tmp.upgrd.14 = load %struct.trie_s** %t_addr		; <%struct.trie_s*> [#uses=1]
+	%tmp18 = load i8** %str		; <i8*> [#uses=1]
+	%tmp19 = call %struct.trie_s* @trie_insert( %struct.trie_s* %tmp.upgrd.14, i8* %tmp18 )		; <%struct.trie_s*> [#uses=0]
+	%tmp20 = load i8** %str		; <i8*> [#uses=1]
+	call void @free( i8* %tmp20 )
+	store i32 0, i32* %wstate
+	br label %bb21
+bb21:		; preds = %bb16, %cond_next13
+	br label %cond_next32
+cond_false:		; preds = %bb
+	%tmp22 = load i8* %c		; <i8> [#uses=1]
+	%tmp23 = icmp sgt i8 %tmp22, 96		; <i1> [#uses=1]
+	br i1 %tmp23, label %cond_true24, label %cond_next31
+cond_true24:		; preds = %cond_false
+	%tmp25 = load i8* %c		; <i8> [#uses=1]
+	%tmp26 = icmp sle i8 %tmp25, 122		; <i1> [#uses=1]
+	br i1 %tmp26, label %cond_true27, label %cond_next30
+cond_true27:		; preds = %cond_true24
+	call void @charsequence_reset( %struct.charsequence* %cs )
+	%tmp28 = load i8* %c		; <i8> [#uses=1]
+	%tmp28.upgrd.15 = sext i8 %tmp28 to i32		; <i32> [#uses=1]
+	%tmp2829 = trunc i32 %tmp28.upgrd.15 to i8		; <i8> [#uses=1]
+	call void @charsequence_push( %struct.charsequence* %cs, i8 %tmp2829 )
+	store i32 1, i32* %wstate
+	br label %cond_next30
+cond_next30:		; preds = %cond_true27, %cond_true24
+	br label %cond_next31
+cond_next31:		; preds = %cond_next30, %cond_false
+	br label %cond_next32
+cond_next32:		; preds = %cond_next31, %bb21
+	br label %bb33
+bb33:		; preds = %cond_next32, %entry
+	%tmp34 = load %struct.FILE** %f_addr		; <%struct.FILE*> [#uses=1]
+	%tmp35 = call i32 @feof( %struct.FILE* %tmp34 )		; <i32> [#uses=1]
+	%tmp36 = icmp eq i32 %tmp35, 0		; <i1> [#uses=1]
+	br i1 %tmp36, label %bb, label %bb37
+bb37:		; preds = %bb33
+	br label %return
+return:		; preds = %bb37
+	ret void
+}
diff --git a/final/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll b/final/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll
new file mode 100644
index 00000000000..a20c46e1ad1
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2007-11-22-InvokeNoUnwind.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -simplifycfg -S | not grep invoke
+
+declare i32 @func(i8*) nounwind
+
+define i32 @test() {
+	invoke i32 @func( i8* null )
+			to label %Cont unwind label %Other		; <i32>:1 [#uses=0]
+
+Cont:		; preds = %0
+	ret i32 0
+
+Other:		; preds = %0
+	ret i32 1
+}
diff --git a/final/test/Transforms/SimplifyCFG/2007-12-21-Crash.ll b/final/test/Transforms/SimplifyCFG/2007-12-21-Crash.ll
new file mode 100644
index 00000000000..46df0f0ed07
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2007-12-21-Crash.ll
@@ -0,0 +1,37 @@
+;RUN: opt < %s -simplifycfg -disable-output
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+define i32 @bork() nounwind  {
+entry:
+	br label %bb5.outer
+
+bb5.outer.loopexit:		; preds = %bb5
+	br label %bb5.outer
+
+bb5.outer:		; preds = %bb5.outer.loopexit, %entry
+	%undo.0.ph = phi i32 [ 0, %entry ], [ 1, %bb5.outer.loopexit ]		; <i32> [#uses=1]
+	br label %bb5
+
+bb5:		; preds = %bb5, %bb5.outer
+	%tmp6 = tail call i32 (...)* @foo( ) nounwind 		; <i32> [#uses=1]
+	switch i32 %tmp6, label %bb13 [
+		 i32 -1, label %bb10
+		 i32 102, label %bb5
+		 i32 110, label %bb5.outer.loopexit
+	]
+
+bb10:		; preds = %bb5
+	%tmp12 = tail call i32 (...)* @bar( i32 %undo.0.ph ) nounwind 		; <i32> [#uses=0]
+	br label %UnifiedReturnBlock
+
+bb13:		; preds = %bb5
+	br label %UnifiedReturnBlock
+
+UnifiedReturnBlock:		; preds = %bb13, %bb10
+	%UnifiedRetVal = phi i32 [ 1, %bb10 ], [ 258, %bb13 ]		; <i32> [#uses=1]
+	ret i32 %UnifiedRetVal
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(...)
diff --git a/final/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll b/final/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
new file mode 100644
index 00000000000..00f2d5bcf13
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2008-01-02-hoist-fp-add.ll
@@ -0,0 +1,26 @@
+; The phi should not be eliminated in this case, because the fp op could trap.
+; RUN: opt < %s -simplifycfg -S | grep {= phi double}
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+@G = weak global double 0.000000e+00, align 8		; <double*> [#uses=2]
+
+define void @test(i32 %X, i32 %Y, double %Z) {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	%tmp = load double* @G, align 8		; <double> [#uses=2]
+	%tmp3 = icmp eq i32 %X, %Y		; <i1> [#uses=1]
+	%tmp34 = zext i1 %tmp3 to i8		; <i8> [#uses=1]
+	%toBool = icmp ne i8 %tmp34, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %cond_true, label %cond_next
+
+cond_true:		; preds = %entry
+	%tmp7 = fadd double %tmp, %Z		; <double> [#uses=1]
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%F.0 = phi double [ %tmp, %entry ], [ %tmp7, %cond_true ]		; <double> [#uses=1]
+	store double %F.0, double* @G, align 8
+	ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/2008-04-23-MergeMultipleResultRet.ll b/final/test/Transforms/SimplifyCFG/2008-04-23-MergeMultipleResultRet.ll
new file mode 100644
index 00000000000..8e05a3cddca
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2008-04-23-MergeMultipleResultRet.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; rdar://5882392
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-apple-darwin9"
+	%struct.Py_complex = type { double, double }
+
+define %struct.Py_complex @_Py_c_pow(double %a.0, double %a.1, double %b.0, double %b.1) nounwind  {
+entry:
+	%tmp7 = fcmp une double %b.0, 0.000000e+00		; <i1> [#uses=1]
+	%tmp11 = fcmp une double %b.1, 0.000000e+00		; <i1> [#uses=1]
+	%bothcond = or i1 %tmp7, %tmp11		; <i1> [#uses=1]
+	br i1 %bothcond, label %bb15, label %bb53
+
+bb15:		; preds = %entry
+	%tmp18 = fcmp une double %a.0, 0.000000e+00		; <i1> [#uses=1]
+	%tmp24 = fcmp une double %a.1, 0.000000e+00		; <i1> [#uses=1]
+	%bothcond1 = or i1 %tmp18, %tmp24		; <i1> [#uses=1]
+	br i1 %bothcond1, label %bb29, label %bb27
+
+bb27:		; preds = %bb15
+	%tmp28 = call i32* @__error( ) nounwind 		; <i32*> [#uses=1]
+	store i32 33, i32* %tmp28, align 4
+	ret double undef, double undef
+
+bb29:		; preds = %bb15
+	%tmp36 = fcmp une double %b.1, 0.000000e+00		; <i1> [#uses=1]
+	br i1 %tmp36, label %bb39, label %bb47
+
+bb39:		; preds = %bb29
+	br label %bb47
+
+bb47:		; preds = %bb39, %bb29
+	ret double undef, double undef
+
+bb53:		; preds = %entry
+	ret double undef, double undef
+}
+
+declare i32* @__error()
+
+declare double @pow(double, double) nounwind readonly 
+
+declare double @cos(double) nounwind readonly 
diff --git a/final/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll b/final/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
new file mode 100644
index 00000000000..9c15efccd27
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; PR2256
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-pc-mingw32"
+
+define { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval  %Z, i1 %cond) nounwind  {
+bb:		; preds = %entry
+	br i1  %cond, label %bb48, label %bb40
+
+bb40:		; preds = %bb
+	store i32 34, i32* null, align 4
+	br label %bb196
+
+bb48:		; preds = %bb.bb48_crit_edge, %entry.bb48_crit_edge
+	%tmp53 = icmp eq i32 0, 1280		; <i1> [#uses=1]
+	br i1 %tmp53, label %bb56, label %bb174
+
+bb56:		; preds = %bb48
+	%iftmp.0.0 = select i1 false, x86_fp80 0xK3FFFC90FDAA22168C235, x86_fp80 0xKBFFFC90FDAA22168C235		; <x86_fp80> [#uses=0]
+	br label %bb196
+
+
+bb174:		; preds = %bb144, %bb114
+	%tmp191 = fmul x86_fp80 0xK00000000000000000000, 0xK3FFE8000000000000000		; <x86_fp80> [#uses=1]
+	br label %bb196
+
+bb196:		; preds = %bb174, %bb56, %bb40
+	%Res.1.0 = phi x86_fp80 [ 0xK7FFF8000000000000000, %bb40 ], [ %tmp191, %bb174 ], [ 0xK00000000000000000000, %bb56 ]		; <x86_fp80> [#uses=1]
+	ret x86_fp80 0xK00000000000000000000, x86_fp80 %Res.1.0
+}
diff --git a/final/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll b/final/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
new file mode 100644
index 00000000000..59e886b2ddb
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
@@ -0,0 +1,131 @@
+; RUN: opt < %s -simplifycfg -S > %t
+; RUN: not grep {^BB.tomerge} %t
+; RUN  grep {^BB.nomerge} %t | count 2
+
+; ModuleID = '<stdin>' 
+declare i1 @foo()
+
+declare i1 @bar(i32)
+
+; This function can't be merged
+define void @a() {
+entry:
+	br label %BB.nomerge
+
+BB.nomerge:		; preds = %Common, %entry
+        ; This phi has a conflicting value (0) with below phi (2), so blocks
+        ; can't be merged.
+	%a = phi i32 [ 1, %entry ], [ 0, %Common ]		; <i32> [#uses=1]
+	br label %Succ
+
+Succ:		; preds = %Common, %BB.nomerge
+	%b = phi i32 [ %a, %BB.nomerge ], [ 2, %Common ]		; <i32> [#uses=0]
+	%conde = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %conde, label %Common, label %Exit
+
+Common:		; preds = %Succ
+	%cond = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %cond, label %BB.nomerge, label %Succ
+
+Exit:		; preds = %Succ
+	ret void
+}
+
+; This function can't be merged
+define void @b() {
+entry:
+	br label %BB.nomerge
+
+BB.nomerge:		; preds = %Common, %entry
+	br label %Succ
+
+Succ:		; preds = %Common, %BB.nomerge
+        ; This phi has confliction values for Common and (through BB) Common,
+        ; blocks can't be merged
+	%b = phi i32 [ 1, %BB.nomerge ], [ 2, %Common ]		; <i32> [#uses=0]
+	%conde = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %conde, label %Common, label %Exit
+
+Common:		; preds = %Succ
+	%cond = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %cond, label %BB.nomerge, label %Succ
+
+Exit:		; preds = %Succ
+	ret void
+}
+
+; This function can be merged
+define void @c() {
+entry:
+	br label %BB.tomerge
+
+BB.tomerge:		; preds = %Common, %entry
+	br label %Succ
+
+Succ:		; preds = %Common, %BB.tomerge, %Pre-Exit
+        ; This phi has identical values for Common and (through BB) Common,
+        ; blocks can't be merged
+	%b = phi i32 [ 1, %BB.tomerge ], [ 1, %Common ], [ 2, %Pre-Exit ]
+	%conde = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %conde, label %Common, label %Pre-Exit
+
+Common:		; preds = %Succ
+	%cond = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %cond, label %BB.tomerge, label %Succ
+
+Pre-Exit:       ; preds = %Succ
+        ; This adds a backedge, so the %b phi node gets a third branch and is
+        ; not completely trivial
+	%cond2 = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %cond2, label %Succ, label %Exit
+        
+Exit:		; preds = %Pre-Exit
+	ret void
+}
+
+; This function can be merged
+define void @d() {
+entry:
+	br label %BB.tomerge
+
+BB.tomerge:		; preds = %Common, %entry
+        ; This phi has a matching value (0) with below phi (0), so blocks
+        ; can be merged.
+	%a = phi i32 [ 1, %entry ], [ 0, %Common ]		; <i32> [#uses=1]
+	br label %Succ
+
+Succ:		; preds = %Common, %BB.tomerge
+	%b = phi i32 [ %a, %BB.tomerge ], [ 0, %Common ]		; <i32> [#uses=0]
+	%conde = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %conde, label %Common, label %Exit
+
+Common:		; preds = %Succ
+	%cond = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %cond, label %BB.tomerge, label %Succ
+
+Exit:		; preds = %Succ
+	ret void
+}
+
+; This function can be merged
+define void @e() {
+entry:
+	br label %BB.tomerge
+
+BB.tomerge:		; preds = %Use, %entry
+        ; This phi is used somewhere else than Succ, but this should not prevent
+        ; merging this block
+	%a = phi i32 [ 1, %entry ], [ 0, %Use ]		; <i32> [#uses=1]
+	br label %Succ
+
+Succ:		; preds = %BB.tomerge
+	%conde = call i1 @foo( )		; <i1> [#uses=1]
+	br i1 %conde, label %Use, label %Exit
+
+Use:		; preds = %Succ
+	%cond = call i1 @bar( i32 %a )		; <i1> [#uses=1]
+	br i1 %cond, label %BB.tomerge, label %Exit
+
+Exit:		; preds = %Use, %Succ
+	ret void
+}
diff --git a/final/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll b/final/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
new file mode 100644
index 00000000000..d025dee85f2
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -simplifycfg -S | grep {%outval = phi i32 .*mux}
+; PR2540
+; Outval should end up with a select from 0/2, not all constants.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+@g_37 = common global i32 0		; <i32*> [#uses=1]
+@.str = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+
+define i32 @main() nounwind  {
+entry:
+	%l = load i32* @g_37, align 4		; <i32> [#uses=1]
+	%cmpa = icmp ne i32 %l, 0		; <i1> [#uses=3]
+	br i1 %cmpa, label %func_1.exit, label %mooseblock
+
+mooseblock:		; preds = %entry
+	%cmpb = icmp eq i1 %cmpa, false		; <i1> [#uses=2]
+	br i1 %cmpb, label %monkeyblock, label %beeblock
+
+monkeyblock:		; preds = %monkeyblock, %mooseblock
+	br i1 %cmpb, label %cowblock, label %monkeyblock
+
+beeblock:		; preds = %beeblock, %mooseblock
+	br i1 %cmpa, label %cowblock, label %beeblock
+
+cowblock:		; preds = %beeblock, %monkeyblock
+	%cowval = phi i32 [ 2, %beeblock ], [ 0, %monkeyblock ]		; <i32> [#uses=1]
+	br label %func_1.exit
+
+func_1.exit:		; preds = %cowblock, %entry
+	%outval = phi i32 [ %cowval, %cowblock ], [ 1, %entry ]		; <i32> [#uses=1]
+	%pout = tail call i32 (i8*, ...)* @printf( i8* noalias  getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %outval ) nounwind 		; <i32> [#uses=0]
+	ret i32 0
+}
+
+declare i32 @printf(i8*, ...) nounwind 
diff --git a/final/test/Transforms/SimplifyCFG/2008-09-08-MultiplePred.ll b/final/test/Transforms/SimplifyCFG/2008-09-08-MultiplePred.ll
new file mode 100644
index 00000000000..ac9622d43c3
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2008-09-08-MultiplePred.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; PR 2777
+@g_103 = common global i32 0		; <i32*> [#uses=1]
+
+define i32 @func_127(i32 %p_129) nounwind {
+entry:
+	load i32* @g_103, align 4		; <i32>:0 [#uses=1]
+	icmp eq i32 %0, 0		; <i1>:1 [#uses=2]
+	br i1 %1, label %bb6.preheader, label %entry.return_crit_edge
+
+entry.return_crit_edge:		; preds = %entry
+	br label %return
+
+bb6.preheader:		; preds = %entry
+	br i1 %1, label %bb6.preheader.split.us, label %bb6.preheader.split
+
+bb6.preheader.split.us:		; preds = %bb6.preheader
+	br label %return.loopexit.split
+
+bb6.preheader.split:		; preds = %bb6.preheader
+	br label %bb6
+
+bb6:		; preds = %bb17.bb6_crit_edge, %bb6.preheader.split
+	%indvar35 = phi i32 [ 0, %bb6.preheader.split ], [ %indvar.next36, %bb17.bb6_crit_edge ]		; <i32> [#uses=1]
+	%p_129_addr.3.reg2mem.0 = phi i32 [ %p_129_addr.2, %bb17.bb6_crit_edge ], [ %p_129, %bb6.preheader.split ]		; <i32> [#uses=3]
+	icmp eq i32 %p_129_addr.3.reg2mem.0, 0		; <i1>:2 [#uses=1]
+	br i1 %2, label %bb6.bb17_crit_edge, label %bb8
+
+bb6.bb17_crit_edge:		; preds = %bb6
+	br label %bb17
+
+bb8:		; preds = %bb6
+	br label %bb13
+
+bb13:		; preds = %bb8
+	br label %bb17
+
+bb17:		; preds = %bb13, %bb6.bb17_crit_edge
+	%p_129_addr.2 = phi i32 [ %p_129_addr.3.reg2mem.0, %bb13 ], [ %p_129_addr.3.reg2mem.0, %bb6.bb17_crit_edge ]		; <i32> [#uses=1]
+	%indvar.next36 = add i32 %indvar35, 1		; <i32> [#uses=2]
+	%exitcond37 = icmp eq i32 %indvar.next36, -1		; <i1> [#uses=1]
+	br i1 %exitcond37, label %return.loopexit, label %bb17.bb6_crit_edge
+
+bb17.bb6_crit_edge:		; preds = %bb17
+	br label %bb6
+
+return.loopexit:		; preds = %bb17
+	br label %return.loopexit.split
+
+return.loopexit.split:		; preds = %return.loopexit, %bb6.preheader.split.us
+	br label %return
+
+return:		; preds = %return.loopexit.split, %entry.return_crit_edge
+	ret i32 1
+}
+
+define i32 @func_135(i8 zeroext %p_137, i32 %p_138, i32 %p_140) nounwind {
+entry:
+	ret i32 undef
+}
diff --git a/final/test/Transforms/SimplifyCFG/2008-09-17-SpeculativeHoist.ll b/final/test/Transforms/SimplifyCFG/2008-09-17-SpeculativeHoist.ll
new file mode 100644
index 00000000000..f864184eb84
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2008-09-17-SpeculativeHoist.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; PR 2800
+
+define void @foo() {
+start:
+	%tmp = call i1 @bar( )		; <i1> [#uses=4]
+	br i1 %tmp, label %brtrue, label %brfalse
+
+brtrue:		; preds = %start
+	%tmpnew = and i1 %tmp, %tmp		; <i1> [#uses=1]
+	br label %brfalse
+
+brfalse:		; preds = %brtrue, %start
+	%andandtmp.0 = phi i1 [ %tmp, %start ], [ %tmpnew, %brtrue ]		; <i1> [#uses=0]
+	ret void
+}
+
+declare i1 @bar()
diff --git a/final/test/Transforms/SimplifyCFG/2008-10-03-SpeculativelyExecuteBeforePHI.ll b/final/test/Transforms/SimplifyCFG/2008-10-03-SpeculativelyExecuteBeforePHI.ll
new file mode 100644
index 00000000000..bb137c1babc
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2008-10-03-SpeculativelyExecuteBeforePHI.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -simplifycfg
+; PR2855
+
+define i32 @_Z1fPii(i32* %b, i32 %f) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb9, %bb7, %bb, %entry
+	%__c2.2 = phi i32 [ undef, %entry ], [ %__c2.1, %bb7 ], [ %__c2.1, %bb9 ]		; <i32> [#uses=2]
+	%s.0 = phi i32 [ 0, %entry ], [ 0, %bb7 ], [ %2, %bb9 ]		; <i32> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb
+	%0 = icmp slt i32 0, %f		; <i1> [#uses=1]
+	br i1 %0, label %bb3, label %bb6
+
+bb3:		; preds = %bb1
+	%1 = icmp eq i32 0, 0		; <i1> [#uses=1]
+	br i1 %1, label %bb6, label %bb5
+
+bb5:		; preds = %bb3
+	br label %bb7
+
+bb6:		; preds = %bb3, %bb1
+	%__c2.0 = phi i32 [ 0, %bb3 ], [ %__c2.2, %bb1 ]		; <i32> [#uses=1]
+	br label %bb7
+
+bb7:		; preds = %bb6, %bb5
+	%__c2.1 = phi i32 [ 0, %bb5 ], [ %__c2.0, %bb6 ]		; <i32> [#uses=2]
+	%iftmp.1.0 = phi i1 [ false, %bb5 ], [ true, %bb6 ]		; <i1> [#uses=1]
+	br i1 %iftmp.1.0, label %bb, label %bb9
+
+bb9:		; preds = %bb7
+	%2 = add i32 %s.0, 2		; <i32> [#uses=1]
+	br label %bb
+}
diff --git a/final/test/Transforms/SimplifyCFG/2008-12-06-SingleEntryPhi.ll b/final/test/Transforms/SimplifyCFG/2008-12-06-SingleEntryPhi.ll
new file mode 100644
index 00000000000..d3c7c320cee
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2008-12-06-SingleEntryPhi.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -simplifycfg | llvm-dis
+define i32 @test() {
+entry:
+	br label %T
+T:
+	%C = phi i1 [false, %entry] 
+	br i1 %C, label %X, label %Y
+X:
+	ret i32 2
+Y:
+	add i32 1, 2
+	ret i32 1
+}
diff --git a/final/test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll b/final/test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll
new file mode 100644
index 00000000000..727102435fc
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2008-12-16-DCECond.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -simplifycfg -S | not grep icmp
+; ModuleID = '/tmp/x.bc'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-pc-linux-gnu"
+
+define i32 @x(i32 %x) {
+entry:
+	%cmp = icmp eq i32 %x, 8		; <i1> [#uses=1]
+	br i1 %cmp, label %ifthen, label %ifend
+
+ifthen:		; preds = %entry
+	%call = call i32 (...)* @foo()		; <i32> [#uses=0]
+	br label %ifend
+
+ifend:		; preds = %ifthen, %entry
+	%cmp2 = icmp ne i32 %x, 8		; <i1> [#uses=1]
+	br i1 %cmp2, label %ifthen3, label %ifend5
+
+ifthen3:		; preds = %ifend
+	%call4 = call i32 (...)* @foo()		; <i32> [#uses=0]
+	br label %ifend5
+
+ifend5:		; preds = %ifthen3, %ifend
+	%cmp7 = icmp eq i32 %x, 9		; <i1> [#uses=1]
+	br i1 %cmp7, label %ifthen8, label %ifend10
+
+ifthen8:		; preds = %ifend5
+	%call9 = call i32 (...)* @bar()		; <i32> [#uses=0]
+	br label %ifend10
+
+ifend10:		; preds = %ifthen8, %ifend5
+	%cmp12 = icmp ne i32 %x, 9		; <i1> [#uses=1]
+	br i1 %cmp12, label %ifthen13, label %ifend15
+
+ifthen13:		; preds = %ifend10
+	%call14 = call i32 (...)* @bar()		; <i32> [#uses=0]
+	br label %ifend15
+
+ifend15:		; preds = %ifthen13, %ifend10
+	ret i32 0
+}
+
+declare i32 @foo(...)
+
+declare i32 @bar(...)
+
diff --git a/final/test/Transforms/SimplifyCFG/2009-01-18-PHIPropCrash.ll b/final/test/Transforms/SimplifyCFG/2009-01-18-PHIPropCrash.ll
new file mode 100644
index 00000000000..c6ae6accd7c
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2009-01-18-PHIPropCrash.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; PR3016
+; Dead use caused invariant violation.
+
+define i32 @func_105(i1 %tmp5, i1 %tmp7) nounwind {
+BB:
+	br i1 true, label %BB2, label %BB1
+
+BB1:		; preds = %BB
+	br label %BB2
+
+BB2:		; preds = %BB1, %BB
+	%tmp3 = phi i1 [ true, %BB ], [ false, %BB1 ]		; <i1> [#uses=1]
+	br label %BB9
+
+BB9:		; preds = %BB11, %BB2
+	%tmp10 = phi i32 [ 0, %BB2 ], [ %tmp12, %BB11 ]		; <i32> [#uses=1]
+	br i1 %tmp5, label %BB11, label %BB13
+
+BB11:		; preds = %BB13, %BB9
+	%tmp12 = phi i32 [ 0, %BB13 ], [ %tmp10, %BB9 ]		; <i32> [#uses=2]
+	br i1 %tmp3, label %BB9, label %BB20
+
+BB13:		; preds = %BB13, %BB9
+	%tmp14 = phi i32 [ 0, %BB9 ], [ %tmp14, %BB13 ]		; <i32> [#uses=1]
+	br i1 %tmp7, label %BB13, label %BB11
+
+BB20:		; preds = %BB11
+	ret i32 %tmp12
+}
diff --git a/final/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll b/final/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
new file mode 100644
index 00000000000..33167bd5c66
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2009-01-19-UnconditionalTrappingConstantExpr.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -simplifycfg -S | grep {br i1 } | count 4
+; PR3354
+; Do not merge bb1 into the entry block, it might trap.
+
+@G = extern_weak global i32
+
+define i32 @test(i32 %tmp21, i32 %tmp24) {
+	%tmp25 = icmp sle i32 %tmp21, %tmp24		
+	br i1 %tmp25, label %bb2, label %bb1	
+					
+bb1:		; preds = %bb	
+	%tmp26 = icmp sgt i32 sdiv (i32 -32768, i32 ptrtoint (i32* @G to i32)), 0
+	br i1 %tmp26, label %bb6, label %bb2		
+bb2:
+	ret i32 42
+
+bb6:
+	unwind
+}
+
+define i32 @test2(i32 %tmp21, i32 %tmp24, i1 %tmp34) {
+	br i1 %tmp34, label %bb5, label %bb6
+
+bb5:		; preds = %bb4
+	br i1 icmp sgt (i32 sdiv (i32 32767, i32 ptrtoint (i32* @G to i32)), i32 0), label %bb6, label %bb7
+bb6:
+	ret i32 42
+bb7:
+	unwind
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/2009-03-05-Speculative-Hoist-Dbg.ll b/final/test/Transforms/SimplifyCFG/2009-03-05-Speculative-Hoist-Dbg.ll
new file mode 100644
index 00000000000..db56fdba73f
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2009-03-05-Speculative-Hoist-Dbg.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s -simplifycfg -S | grep select
+        %llvm.dbg.anchor.type = type { i32, i32 }
+        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
+
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
+
+@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
+@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
+
+external global <{ i8 }>		; <<{ i8 }>*>:0 [#uses=0]
+@__dso_handle = external global i8*		; <i8**> [#uses=0]
+@_ZSt3cin = external global { i32 (...)**, i32, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, { i32 (...)**, \3 }*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }		; <{ i32 (...)**, i32, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, { i32 (...)**, \3 }*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*> [#uses=1]
+@_ZSt4cout = external global { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }		; <{ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*> [#uses=1]
+external constant [2 x i8]		; <[2 x i8]*>:1 [#uses=1]
+@llvm.global_ctors = external global [1 x { i32, void ()* }]		; <[1 x { i32, void ()* }]*> [#uses=0]
+
+define i32 @main(i32, i8**) {
+; <label>:2
+	%3 = alloca [4096 x i8], align 1		; <[4096 x i8]*> [#uses=1]
+	%4 = call { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }* @_ZNKSt9basic_iosIcSt11char_traitsIcEE5rdbufEv({ { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* }* getelementptr ({ i32 (...)**, i32, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, { i32 (...)**, \3 }*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZSt3cin, i32 0, i32 2))		; <{ i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*> [#uses=1]
+	%5 = getelementptr [4096 x i8]* %3, i32 0, i32 0		; <i8*> [#uses=1]
+	%6 = call { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }* @_ZNSt15basic_streambufIcSt11char_traitsIcEE9pubsetbufEPci({ i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }* %4, i8* %5, i32 4096)		; <{ i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*> [#uses=0]
+	%7 = call { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }* @_ZNKSt9basic_iosIcSt11char_traitsIcEE5rdbufEv({ { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* }* getelementptr ({ i32 (...)**, i32, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, { i32 (...)**, \3 }*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZSt3cin, i32 0, i32 2))		; <{ i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*> [#uses=1]
+	br label %25
+
+; <label>:8		; preds = %25
+	%9 = trunc i32 %26 to i8		; <i8> [#uses=4]
+	%10 = add i32 %.02, 1		; <i32> [#uses=3]
+	%11 = icmp eq i8 %9, 10		; <i1> [#uses=1]
+	br i1 %11, label %12, label %14
+
+; <label>:12		; preds = %8
+	%13 = add i32 %.1, 1		; <i32> [#uses=1]
+	br label %14
+
+; <label>:14		; preds = %12, %8
+	%.0 = phi i32 [ %.1, %8 ], [ %13, %12 ]		; <i32> [#uses=3]
+	%15 = icmp eq i8 %9, 32		; <i1> [#uses=1]
+	br i1 %15, label %20, label %16
+
+; <label>:16		; preds = %14
+	%17 = icmp eq i8 %9, 10		; <i1> [#uses=1]
+	br i1 %17, label %20, label %18
+
+; <label>:18		; preds = %16
+	%19 = icmp eq i8 %9, 9		; <i1> [#uses=1]
+	br i1 %19, label %20, label %21
+
+; <label>:20		; preds = %18, %16, %14
+	br label %25
+
+; <label>:21		; preds = %18
+	%22 = icmp eq i32 %.03, 0		; <i1> [#uses=1]
+	br i1 %22, label %23, label %25
+
+; <label>:23		; preds = %21
+        call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%24 = add i32 %.01, 1		; <i32> [#uses=1]
+	br label %25
+
+; <label>:25		; preds = %23, %21, %20, %2
+	%.03 = phi i32 [ 0, %2 ], [ %.03, %21 ], [ 1, %23 ], [ 0, %20 ]		; <i32> [#uses=2]
+	%.02 = phi i32 [ 0, %2 ], [ %10, %21 ], [ %10, %23 ], [ %10, %20 ]		; <i32> [#uses=2]
+	%.01 = phi i32 [ 0, %2 ], [ %.01, %21 ], [ %24, %23 ], [ %.01, %20 ]		; <i32> [#uses=4]
+	%.1 = phi i32 [ 0, %2 ], [ %.0, %21 ], [ %.0, %23 ], [ %.0, %20 ]		; <i32> [#uses=3]
+	%26 = call i32 @_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv({ i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }* %7)		; <i32> [#uses=2]
+	%27 = icmp eq i32 %26, -1		; <i1> [#uses=1]
+	br i1 %27, label %28, label %8
+
+; <label>:28		; preds = %25
+	%29 = call { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZNSolsEi({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZSt4cout, i32 %.1)		; <{ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*> [#uses=1]
+	%30 = call { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* %29, i8* getelementptr ([2 x i8]* @1, i32 0, i32 0))		; <{ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*> [#uses=1]
+	%31 = call { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZNSolsEi({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* %30, i32 %.01)		; <{ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*> [#uses=1]
+	%32 = call { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* %31, i8* getelementptr ([2 x i8]* @1, i32 0, i32 0))		; <{ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*> [#uses=1]
+	%33 = call { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZNSolsEi({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* %32, i32 %.02)		; <{ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*> [#uses=1]
+	%34 = call { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZNSolsEPFRSoS_E({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* %33, { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* ({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*)* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_)		; <{ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*> [#uses=0]
+	ret i32 0
+}
+
+declare void @""() section "__TEXT,__StaticInit,regular,pure_instructions"
+
+declare fastcc void @""() section "__TEXT,__StaticInit,regular,pure_instructions"
+
+declare void @_ZNSt8ios_base4InitC1Ev(<{ i8 }>*)
+
+declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*) nounwind
+
+declare void @""(i8*)
+
+declare void @_ZNSt8ios_base4InitD1Ev(<{ i8 }>*)
+
+declare { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }* @_ZNKSt9basic_iosIcSt11char_traitsIcEE5rdbufEv({ { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* }*)
+
+declare { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }* @_ZNSt15basic_streambufIcSt11char_traitsIcEE9pubsetbufEPci({ i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, i8*, i32)
+
+declare i32 @_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv({ i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*)
+
+declare { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZNSolsEi({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*, i32)
+
+declare { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*, i8*)
+
+declare { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZNSolsEPFRSoS_E({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*, { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* ({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*)*)
+
+declare { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*)
diff --git a/final/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll b/final/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll
new file mode 100644
index 00000000000..419feb6b56a
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2009-05-12-externweak.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -simplifycfg -S | not grep select
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+module asm ".globl _foo"
+module asm "_foo: ret"
+module asm ".globl _i"
+module asm ".set _i, 0"
+@i = extern_weak global i32		; <i32*> [#uses=2]
+@j = common global i32 0		; <i32*> [#uses=1]
+@ed = common global double 0.000000e+00, align 8		; <double*> [#uses=1]
+
+define i32 @main() nounwind ssp {
+entry:
+	br label %bb4
+
+bb:		; preds = %bb4
+	br i1 icmp ne (i32* @i, i32* null), label %bb1, label %bb2
+
+bb1:		; preds = %bb
+	%0 = load i32* @i, align 4		; <i32> [#uses=1]
+	br label %bb3
+
+bb2:		; preds = %bb
+	br label %bb3
+
+bb3:		; preds = %bb2, %bb1
+	%storemerge = phi i32 [ %0, %bb1 ], [ 0, %bb2 ]		; <i32> [#uses=2]
+	store i32 %storemerge, i32* @j
+	%1 = sitofp i32 %storemerge to double		; <double> [#uses=1]
+	%2 = call double @sin(double %1) nounwind readonly		; <double> [#uses=1]
+	%3 = fadd double %2, %d.0		; <double> [#uses=1]
+	%4 = add i32 %l.0, 1		; <i32> [#uses=1]
+	br label %bb4
+
+bb4:		; preds = %bb3, %entry
+	%d.0 = phi double [ undef, %entry ], [ %3, %bb3 ]		; <double> [#uses=2]
+	%l.0 = phi i32 [ 0, %entry ], [ %4, %bb3 ]		; <i32> [#uses=2]
+	%5 = icmp sgt i32 %l.0, 99		; <i1> [#uses=1]
+	br i1 %5, label %bb5, label %bb
+
+bb5:		; preds = %bb4
+	store double %d.0, double* @ed, align 8
+	ret i32 0
+}
+
+declare double @sin(double) nounwind readonly
diff --git a/final/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll b/final/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
new file mode 100644
index 00000000000..f99cbe1c8b8
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
@@ -0,0 +1,557 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; END.
+	%struct..4._102 = type { %struct.QVectorData* }
+	%struct..5._125 = type { %struct.QMapData* }
+	%struct.QAbstractTextDocumentLayout = type { %struct.QObject }
+	%struct.QBasicAtomic = type { i32 }
+	%struct.QFont = type { %struct.QFontPrivate*, i32 }
+	%struct.QFontMetrics = type { %struct.QFontPrivate* }
+	%struct.QFontPrivate = type opaque
+	%"struct.QFragmentMap<QTextBlockData>" = type { %struct.QFragmentMapData }
+	%struct.QFragmentMapData = type { %"struct.QFragmentMapData::._154", i32 }
+	%"struct.QFragmentMapData::._154" = type { %"struct.QFragmentMapData::Header"* }
+	%"struct.QFragmentMapData::Header" = type { i32, i32, i32, i32, i32, i32, i32, i32 }
+	%"struct.QHash<uint,QHashDummyValue>" = type { %"struct.QHash<uint,QHashDummyValue>::._152" }
+	%"struct.QHash<uint,QHashDummyValue>::._152" = type { %struct.QHashData* }
+	%struct.QHashData = type { %"struct.QHashData::Node"*, %"struct.QHashData::Node"**, %struct.QBasicAtomic, i32, i32, i16, i16, i32, i8 }
+	%"struct.QHashData::Node" = type { %"struct.QHashData::Node"*, i32 }
+	%"struct.QList<QObject*>::._92" = type { %struct.QListData }
+	%"struct.QList<QPointer<QObject> >" = type { %"struct.QList<QObject*>::._92" }
+	%struct.QListData = type { %"struct.QListData::Data"* }
+	%"struct.QListData::Data" = type { %struct.QBasicAtomic, i32, i32, i32, i8, [1 x i8*] }
+	%"struct.QMap<QUrl,QVariant>" = type { %struct..5._125 }
+	%struct.QMapData = type { %"struct.QMapData::Node"*, [12 x %"struct.QMapData::Node"*], %struct.QBasicAtomic, i32, i32, i32, i8 }
+	%"struct.QMapData::Node" = type { %"struct.QMapData::Node"*, [1 x %"struct.QMapData::Node"*] }
+	%struct.QObject = type { i32 (...)**, %struct.QObjectData* }
+	%struct.QObjectData = type { i32 (...)**, %struct.QObject*, %struct.QObject*, %"struct.QList<QPointer<QObject> >", i8, [3 x i8], i32, i32 }
+	%struct.QObjectPrivate = type { %struct.QObjectData, i32, %struct.QObject*, %"struct.QList<QPointer<QObject> >", %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %struct.QString }
+	%struct.QPaintDevice = type { i32 (...)**, i16 }
+	%struct.QPainter = type { %struct.QPainterPrivate* }
+	%struct.QPainterPrivate = type opaque
+	%struct.QPointF = type { double, double }
+	%struct.QPrinter = type { %struct.QPaintDevice, %struct.QPrinterPrivate* }
+	%struct.QPrinterPrivate = type opaque
+	%struct.QRectF = type { double, double, double, double }
+	%"struct.QSet<uint>" = type { %"struct.QHash<uint,QHashDummyValue>" }
+	%"struct.QSharedDataPointer<QTextFormatPrivate>" = type { %struct.QTextFormatPrivate* }
+	%struct.QString = type { %"struct.QString::Data"* }
+	%"struct.QString::Data" = type { %struct.QBasicAtomic, i32, i32, i16*, i8, i8, [1 x i16] }
+	%struct.QTextBlockFormat = type { %struct.QTextFormat }
+	%struct.QTextBlockGroup = type { %struct.QAbstractTextDocumentLayout }
+	%struct.QTextDocumentConfig = type { %struct.QString }
+	%struct.QTextDocumentPrivate = type { %struct.QObjectPrivate, %struct.QString, %"struct.QVector<QAbstractTextDocumentLayout::Selection>", i1, i32, i32, i1, i32, i32, i32, i32, i1, %struct.QTextFormatCollection, %struct.QTextBlockGroup*, %struct.QAbstractTextDocumentLayout*, %"struct.QFragmentMap<QTextBlockData>", %"struct.QFragmentMap<QTextBlockData>", i32, %"struct.QList<QPointer<QObject> >", %"struct.QList<QPointer<QObject> >", %"struct.QMap<QUrl,QVariant>", %"struct.QMap<QUrl,QVariant>", %"struct.QMap<QUrl,QVariant>", %struct.QTextDocumentConfig, i1, i1, %struct.QPointF }
+	%struct.QTextFormat = type { %"struct.QSharedDataPointer<QTextFormatPrivate>", i32 }
+	%struct.QTextFormatCollection = type { %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %"struct.QVector<QAbstractTextDocumentLayout::Selection>", %"struct.QSet<uint>", %struct.QFont }
+	%struct.QTextFormatPrivate = type opaque
+	%"struct.QVector<QAbstractTextDocumentLayout::Selection>" = type { %struct..4._102 }
+	%struct.QVectorData = type { %struct.QBasicAtomic, i32, i32, i8 }
+
+define void @_ZNK13QTextDocument5printEP8QPrinter(%struct.QAbstractTextDocumentLayout* %this, %struct.QPrinter* %printer) {
+entry:
+	%tmp = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=2]
+	%tmp.upgrd.1 = alloca %struct.QRectF, align 16		; <%struct.QRectF*> [#uses=5]
+	%tmp2 = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=3]
+	%tmp.upgrd.2 = alloca %struct.QFontMetrics, align 16		; <%struct.QFontMetrics*> [#uses=4]
+	%tmp.upgrd.3 = alloca %struct.QFont, align 16		; <%struct.QFont*> [#uses=4]
+	%tmp3 = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=2]
+	%p = alloca %struct.QPainter, align 16		; <%struct.QPainter*> [#uses=14]
+	%body = alloca %struct.QRectF, align 16		; <%struct.QRectF*> [#uses=9]
+        %foo = alloca double, align 8
+        %bar = alloca double, align 8
+	%pageNumberPos = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=4]
+	%scaledPageSize = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=6]
+	%printerPageSize = alloca %struct.QPointF, align 16		; <%struct.QPointF*> [#uses=3]
+	%fmt = alloca %struct.QTextBlockFormat, align 16		; <%struct.QTextBlockFormat*> [#uses=5]
+	%font = alloca %struct.QFont, align 16		; <%struct.QFont*> [#uses=5]
+	%tmp.upgrd.4 = call %struct.QTextDocumentPrivate* @_ZNK13QTextDocument6d_funcEv( %struct.QAbstractTextDocumentLayout* %this )		; <%struct.QTextDocumentPrivate*> [#uses=5]
+	%tmp.upgrd.5 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
+	call void @_ZN8QPainterC1EP12QPaintDevice( %struct.QPainter* %p, %struct.QPaintDevice* %tmp.upgrd.5 )
+	%tmp.upgrd.6 = invoke i1 @_ZNK8QPainter8isActiveEv( %struct.QPainter* %p )
+			to label %invcont unwind label %cleanup329		; <i1> [#uses=1]
+invcont:		; preds = %entry
+	br i1 %tmp.upgrd.6, label %cond_next, label %cleanup328
+cond_next:		; preds = %invcont
+	%tmp8 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %this )
+			to label %invcont7 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=0]
+invcont7:		; preds = %cond_next
+	%tmp10 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
+	call void @_ZN7QPointFC1Edd( %struct.QPointF* %tmp, double 0.000000e+00, double 0.000000e+00 )
+	call void @_ZN6QRectFC1ERK7QPointFRK6QSizeF( %struct.QRectF* %body, %struct.QPointF* %tmp, %struct.QPointF* %tmp10 )
+	call void @_ZN7QPointFC1Ev( %struct.QPointF* %pageNumberPos )
+	%tmp12 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
+	%tmp13 = call i1 @_ZNK6QSizeF7isValidEv( %struct.QPointF* %tmp12 )		; <i1> [#uses=1]
+	br i1 %tmp13, label %cond_next15, label %bb
+cond_next15:		; preds = %invcont7
+	%tmp17 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26		; <%struct.QPointF*> [#uses=1]
+	%tmp.upgrd.7 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %tmp17 )		; <double> [#uses=1]
+	%tmp18 = fcmp oeq double %tmp.upgrd.7, 0x41DFFFFFFFC00000		; <i1> [#uses=1]
+	br i1 %tmp18, label %bb, label %cond_next20
+cond_next20:		; preds = %cond_next15
+	br label %bb21
+bb:		; preds = %cond_next15, %invcont7
+	br label %bb21
+bb21:		; preds = %bb, %cond_next20
+	%iftmp.406.0 = phi i1 [ false, %bb ], [ true, %cond_next20 ]		; <i1> [#uses=1]
+	br i1 %iftmp.406.0, label %cond_true24, label %cond_false
+cond_true24:		; preds = %bb21
+	%tmp.upgrd.8 = invoke i32 @_Z13qt_defaultDpiv( )
+			to label %invcont25 unwind label %cleanup329		; <i32> [#uses=1]
+invcont25:		; preds = %cond_true24
+	%tmp26 = sitofp i32 %tmp.upgrd.8 to double		; <double> [#uses=2]
+	%tmp30 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %this )
+			to label %invcont29 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=1]
+invcont29:		; preds = %invcont25
+	%tmp32 = invoke %struct.QPaintDevice* @_ZNK27QAbstractTextDocumentLayout11paintDeviceEv( %struct.QAbstractTextDocumentLayout* %tmp30 )
+			to label %invcont31 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=3]
+invcont31:		; preds = %invcont29
+	%tmp34 = icmp eq %struct.QPaintDevice* %tmp32, null		; <i1> [#uses=1]
+	br i1 %tmp34, label %cond_next42, label %cond_true35
+cond_true35:		; preds = %invcont31
+	%tmp38 = invoke i32 @_ZNK12QPaintDevice11logicalDpiXEv( %struct.QPaintDevice* %tmp32 )
+			to label %invcont37 unwind label %cleanup329		; <i32> [#uses=1]
+invcont37:		; preds = %cond_true35
+	%tmp38.upgrd.9 = sitofp i32 %tmp38 to double		; <double> [#uses=1]
+	%tmp41 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp32 )
+			to label %invcont40 unwind label %cleanup329		; <i32> [#uses=1]
+invcont40:		; preds = %invcont37
+	%tmp41.upgrd.10 = sitofp i32 %tmp41 to double		; <double> [#uses=1]
+	br label %cond_next42
+cond_next42:		; preds = %invcont40, %invcont31
+	%sourceDpiY.2 = phi double [ %tmp41.upgrd.10, %invcont40 ], [ %tmp26, %invcont31 ]		; <double> [#uses=1]
+	%sourceDpiX.2 = phi double [ %tmp38.upgrd.9, %invcont40 ], [ %tmp26, %invcont31 ]		; <double> [#uses=1]
+	%tmp44 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
+	%tmp46 = invoke i32 @_ZNK12QPaintDevice11logicalDpiXEv( %struct.QPaintDevice* %tmp44 )
+			to label %invcont45 unwind label %cleanup329		; <i32> [#uses=1]
+invcont45:		; preds = %cond_next42
+	%tmp46.upgrd.11 = sitofp i32 %tmp46 to double		; <double> [#uses=1]
+	%tmp48 = fdiv double %tmp46.upgrd.11, %sourceDpiX.2		; <double> [#uses=2]
+	%tmp50 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
+	%tmp52 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp50 )
+			to label %invcont51 unwind label %cleanup329		; <i32> [#uses=1]
+invcont51:		; preds = %invcont45
+	%tmp52.upgrd.12 = sitofp i32 %tmp52 to double		; <double> [#uses=1]
+	%tmp54 = fdiv double %tmp52.upgrd.12, %sourceDpiY.2		; <double> [#uses=2]
+	invoke void @_ZN8QPainter5scaleEdd( %struct.QPainter* %p, double %tmp48, double %tmp54 )
+			to label %invcont57 unwind label %cleanup329
+invcont57:		; preds = %invcont51
+	%tmp.upgrd.13 = getelementptr %struct.QPointF* %scaledPageSize, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp60 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26, i32 0		; <double*> [#uses=1]
+	%tmp61 = load double* %tmp60		; <double> [#uses=1]
+	store double %tmp61, double* %tmp.upgrd.13
+	%tmp62 = getelementptr %struct.QPointF* %scaledPageSize, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp63 = getelementptr %struct.QTextDocumentPrivate* %tmp.upgrd.4, i32 0, i32 26, i32 1		; <double*> [#uses=1]
+	%tmp64 = load double* %tmp63		; <double> [#uses=1]
+	store double %tmp64, double* %tmp62
+	%tmp65 = call double* @_ZN6QSizeF6rwidthEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
+	%tmp67 = load double* %tmp65		; <double> [#uses=1]
+	%tmp69 = fmul double %tmp67, %tmp48		; <double> [#uses=1]
+	store double %tmp69, double* %tmp65
+	%tmp71 = call double* @_ZN6QSizeF7rheightEv( %struct.QPointF* %scaledPageSize )		; <double*> [#uses=2]
+	%tmp73 = load double* %tmp71		; <double> [#uses=1]
+	%tmp75 = fmul double %tmp73, %tmp54		; <double> [#uses=1]
+	store double %tmp75, double* %tmp71
+	%tmp78 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
+	%tmp80 = invoke i32 @_ZNK12QPaintDevice6heightEv( %struct.QPaintDevice* %tmp78 )
+			to label %invcont79 unwind label %cleanup329		; <i32> [#uses=1]
+invcont79:		; preds = %invcont57
+	%tmp82 = getelementptr %struct.QPrinter* %printer, i32 0, i32 0		; <%struct.QPaintDevice*> [#uses=1]
+	%tmp84 = invoke i32 @_ZNK12QPaintDevice5widthEv( %struct.QPaintDevice* %tmp82 )
+			to label %invcont83 unwind label %cleanup329		; <i32> [#uses=1]
+invcont83:		; preds = %invcont79
+	%tmp80.upgrd.14 = sitofp i32 %tmp80 to double		; <double> [#uses=1]
+	%tmp84.upgrd.15 = sitofp i32 %tmp84 to double		; <double> [#uses=1]
+	call void @_ZN6QSizeFC1Edd( %struct.QPointF* %printerPageSize, double %tmp84.upgrd.15, double %tmp80.upgrd.14 )
+	%tmp85 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %printerPageSize )		; <double> [#uses=1]
+	%tmp86 = call double @_ZNK6QSizeF6heightEv( %struct.QPointF* %scaledPageSize )		; <double> [#uses=1]
+	%tmp87 = fdiv double %tmp85, %tmp86		; <double> [#uses=1]
+	%tmp88 = call double @_ZNK6QSizeF5widthEv( %struct.QPointF* %printerPageSize )		; <double> [#uses=1]
+	%tmp89 = call double @_ZNK6QSizeF5widthEv( %struct.QPointF* %scaledPageSize )		; <double> [#uses=1]
+	%tmp90 = fdiv double %tmp88, %tmp89		; <double> [#uses=1]
+	invoke void @_ZN8QPainter5scaleEdd( %struct.QPainter* %p, double %tmp90, double %tmp87 )
+			to label %cond_next194 unwind label %cleanup329
+cond_false:		; preds = %bb21
+	%tmp.upgrd.16 = getelementptr %struct.QAbstractTextDocumentLayout* %this, i32 0, i32 0		; <%struct.QObject*> [#uses=1]
+	%tmp95 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument5cloneEP7QObject( %struct.QAbstractTextDocumentLayout* %this, %struct.QObject* %tmp.upgrd.16 )
+			to label %invcont94 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=9]
+invcont94:		; preds = %cond_false
+	%tmp99 = invoke %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv( %struct.QAbstractTextDocumentLayout* %tmp95 )
+			to label %invcont98 unwind label %cleanup329		; <%struct.QAbstractTextDocumentLayout*> [#uses=1]
+invcont98:		; preds = %invcont94
+	%tmp101 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont100 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=1]
+invcont100:		; preds = %invcont98
+	invoke void @_ZN27QAbstractTextDocumentLayout14setPaintDeviceEP12QPaintDevice( %struct.QAbstractTextDocumentLayout* %tmp99, %struct.QPaintDevice* %tmp101 )
+			to label %invcont103 unwind label %cleanup329
+invcont103:		; preds = %invcont100
+	%tmp105 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont104 unwind label %cleanup329		; <%struct.QPaintDevice*> [#uses=1]
+invcont104:		; preds = %invcont103
+	%tmp107 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp105 )
+			to label %invcont106 unwind label %cleanup329		; <i32> [#uses=1]
+invcont106:		; preds = %invcont104
+	%tmp108 = sitofp i32 %tmp107 to double		; <double> [#uses=1]
+	%tmp109 = fmul double %tmp108, 0x3FE93264C993264C		; <double> [#uses=1]
+	%tmp109.upgrd.17 = fptosi double %tmp109 to i32		; <i32> [#uses=3]
+	%tmp.upgrd.18 = call %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv( %struct.QAbstractTextDocumentLayout* %tmp95 )		; <%struct.QTextBlockGroup*> [#uses=1]
+	invoke void @_ZNK10QTextFrame11frameFormatEv( %struct.QTextBlockFormat* sret  %fmt, %struct.QTextBlockGroup* %tmp.upgrd.18 )
+			to label %invcont111 unwind label %cleanup329
+invcont111:		; preds = %invcont106
+	%tmp112 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
+	invoke void @_ZN16QTextFrameFormat9setMarginEd( %struct.QTextBlockFormat* %fmt, double %tmp112 )
+			to label %invcont114 unwind label %cleanup192
+invcont114:		; preds = %invcont111
+	%tmp116 = call %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv( %struct.QAbstractTextDocumentLayout* %tmp95 )		; <%struct.QTextBlockGroup*> [#uses=1]
+	invoke void @_ZN10QTextFrame14setFrameFormatERK16QTextFrameFormat( %struct.QTextBlockGroup* %tmp116, %struct.QTextBlockFormat* %fmt )
+			to label %invcont117 unwind label %cleanup192
+invcont117:		; preds = %invcont114
+	%tmp119 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont118 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
+invcont118:		; preds = %invcont117
+	%tmp121 = invoke i32 @_ZNK12QPaintDevice6heightEv( %struct.QPaintDevice* %tmp119 )
+			to label %invcont120 unwind label %cleanup192		; <i32> [#uses=1]
+invcont120:		; preds = %invcont118
+	%tmp121.upgrd.19 = sitofp i32 %tmp121 to double		; <double> [#uses=1]
+	%tmp123 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont122 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
+invcont122:		; preds = %invcont120
+	%tmp125 = invoke i32 @_ZNK12QPaintDevice5widthEv( %struct.QPaintDevice* %tmp123 )
+			to label %invcont124 unwind label %cleanup192		; <i32> [#uses=1]
+invcont124:		; preds = %invcont122
+	%tmp125.upgrd.20 = sitofp i32 %tmp125 to double		; <double> [#uses=1]
+	call void @_ZN6QRectFC1Edddd( %struct.QRectF* %tmp.upgrd.1, double 0.000000e+00, double 0.000000e+00, double %tmp125.upgrd.20, double %tmp121.upgrd.19 )
+	%tmp126 = getelementptr %struct.QRectF* %body, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp127 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp128 = load double* %tmp127		; <double> [#uses=1]
+	store double %tmp128, double* %tmp126
+	%tmp129 = getelementptr %struct.QRectF* %body, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp130 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp131 = load double* %tmp130		; <double> [#uses=1]
+	store double %tmp131, double* %tmp129
+	%tmp132 = getelementptr %struct.QRectF* %body, i32 0, i32 2		; <double*> [#uses=1]
+	%tmp133 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 2		; <double*> [#uses=1]
+	%tmp134 = load double* %tmp133		; <double> [#uses=1]
+	store double %tmp134, double* %tmp132
+	%tmp135 = getelementptr %struct.QRectF* %body, i32 0, i32 3		; <double*> [#uses=1]
+	%tmp136 = getelementptr %struct.QRectF* %tmp.upgrd.1, i32 0, i32 3		; <double*> [#uses=1]
+	%tmp137 = load double* %tmp136		; <double> [#uses=1]
+	store double %tmp137, double* %tmp135
+	%tmp138 = call double @_ZNK6QRectF6heightEv( %struct.QRectF* %body )		; <double> [#uses=1]
+	%tmp139 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
+	%tmp140 = fsub double %tmp138, %tmp139		; <double> [#uses=1]
+	%tmp142 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont141 unwind label %cleanup192		; <%struct.QPaintDevice*> [#uses=1]
+invcont141:		; preds = %invcont124
+	invoke void @_ZNK13QTextDocument11defaultFontEv( %struct.QFont* sret  %tmp.upgrd.3, %struct.QAbstractTextDocumentLayout* %tmp95 )
+			to label %invcont144 unwind label %cleanup192
+invcont144:		; preds = %invcont141
+	invoke void @_ZN12QFontMetricsC1ERK5QFontP12QPaintDevice( %struct.QFontMetrics* %tmp.upgrd.2, %struct.QFont* %tmp.upgrd.3, %struct.QPaintDevice* %tmp142 )
+			to label %invcont146 unwind label %cleanup173
+invcont146:		; preds = %invcont144
+	%tmp149 = invoke i32 @_ZNK12QFontMetrics6ascentEv( %struct.QFontMetrics* %tmp.upgrd.2 )
+			to label %invcont148 unwind label %cleanup168		; <i32> [#uses=1]
+invcont148:		; preds = %invcont146
+	%tmp149.upgrd.21 = sitofp i32 %tmp149 to double		; <double> [#uses=1]
+	%tmp150 = fadd double %tmp140, %tmp149.upgrd.21		; <double> [#uses=1]
+	%tmp152 = invoke %struct.QPaintDevice* @_ZNK8QPainter6deviceEv( %struct.QPainter* %p )
+			to label %invcont151 unwind label %cleanup168		; <%struct.QPaintDevice*> [#uses=1]
+invcont151:		; preds = %invcont148
+	%tmp154 = invoke i32 @_ZNK12QPaintDevice11logicalDpiYEv( %struct.QPaintDevice* %tmp152 )
+			to label %invcont153 unwind label %cleanup168		; <i32> [#uses=1]
+invcont153:		; preds = %invcont151
+	%tmp155 = mul i32 %tmp154, 5		; <i32> [#uses=1]
+	%tmp156 = sdiv i32 %tmp155, 72		; <i32> [#uses=1]
+	%tmp156.upgrd.22 = sitofp i32 %tmp156 to double		; <double> [#uses=1]
+	%tmp157 = fadd double %tmp150, %tmp156.upgrd.22		; <double> [#uses=1]
+	%tmp158 = call double @_ZNK6QRectF5widthEv( %struct.QRectF* %body )		; <double> [#uses=1]
+	%tmp159 = sitofp i32 %tmp109.upgrd.17 to double		; <double> [#uses=1]
+	%tmp160 = fsub double %tmp158, %tmp159		; <double> [#uses=1]
+	call void @_ZN7QPointFC1Edd( %struct.QPointF* %tmp2, double %tmp160, double %tmp157 )
+	%tmp161 = getelementptr %struct.QPointF* %pageNumberPos, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp162 = getelementptr %struct.QPointF* %tmp2, i32 0, i32 0		; <double*> [#uses=1]
+	%tmp163 = load double* %tmp162		; <double> [#uses=1]
+	store double %tmp163, double* %tmp161
+	%tmp164 = getelementptr %struct.QPointF* %pageNumberPos, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp165 = getelementptr %struct.QPointF* %tmp2, i32 0, i32 1		; <double*> [#uses=1]
+	%tmp166 = load double* %tmp165		; <double> [#uses=1]
+	store double %tmp166, double* %tmp164
+	invoke void @_ZN12QFontMetricsD1Ev( %struct.QFontMetrics* %tmp.upgrd.2 )
+			to label %cleanup171 unwind label %cleanup173
+cleanup168:		; preds = %invcont151, %invcont148, %invcont146
+	invoke void @_ZN12QFontMetricsD1Ev( %struct.QFontMetrics* %tmp.upgrd.2 )
+			to label %cleanup173 unwind label %cleanup173
+cleanup171:		; preds = %invcont153
+	invoke void @_ZN5QFontD1Ev( %struct.QFont* %tmp.upgrd.3 )
+			to label %finally170 unwind label %cleanup192
+cleanup173:		; preds = %cleanup168, %cleanup168, %invcont153, %invcont144
+	invoke void @_ZN5QFontD1Ev( %struct.QFont* %tmp.upgrd.3 )
+			to label %cleanup192 unwind label %cleanup192
+finally170:		; preds = %cleanup171
+	invoke void @_ZNK13QTextDocument11defaultFontEv( %struct.QFont* sret  %font, %struct.QAbstractTextDocumentLayout* %tmp95 )
+			to label %invcont177 unwind label %cleanup192
+invcont177:		; preds = %finally170
+	invoke void @_ZN5QFont12setPointSizeEi( %struct.QFont* %font, i32 10 )
+			to label %invcont179 unwind label %cleanup187
+invcont179:		; preds = %invcont177
+	invoke void @_ZN13QTextDocument14setDefaultFontERK5QFont( %struct.QAbstractTextDocumentLayout* %tmp95, %struct.QFont* %font )
+			to label %invcont181 unwind label %cleanup187
+invcont181:		; preds = %invcont179
+	call void @_ZNK6QRectF4sizeEv( %struct.QPointF* sret  %tmp3, %struct.QRectF* %body )
+	invoke void @_ZN13QTextDocument11setPageSizeERK6QSizeF( %struct.QAbstractTextDocumentLayout* %tmp95, %struct.QPointF* %tmp3 )
+			to label %cleanup185 unwind label %cleanup187
+cleanup185:		; preds = %invcont181
+	invoke void @_ZN5QFontD1Ev( %struct.QFont* %font )
+			to label %cleanup190 unwind label %cleanup192
+cleanup187:		; preds = %invcont181, %invcont179, %invcont177
+	invoke void @_ZN5QFontD1Ev( %struct.QFont* %font )
+			to label %cleanup192 unwind label %cleanup192
+cleanup190:		; preds = %cleanup185
+	invoke void @_ZN16QTextFrameFormatD1Ev( %struct.QTextBlockFormat* %fmt )
+			to label %cond_next194 unwind label %cleanup329
+cleanup192:		; preds = %cleanup187, %cleanup187, %cleanup185, %finally170, %cleanup173, %cleanup173, %cleanup171, %invcont141, %invcont124, %invcont122, %invcont120, %invcont118, %invcont117, %invcont114, %invcont111
+	invoke void @_ZN16QTextFrameFormatD1Ev( %struct.QTextBlockFormat* %fmt )
+			to label %cleanup329 unwind label %cleanup329
+cond_next194:		; preds = %cleanup190, %invcont83
+	%clonedDoc.1 = phi %struct.QAbstractTextDocumentLayout* [ null, %invcont83 ], [ %tmp95, %cleanup190 ]		; <%struct.QAbstractTextDocumentLayout*> [#uses=3]
+	%doc.1 = phi %struct.QAbstractTextDocumentLayout* [ %this, %invcont83 ], [ %tmp95, %cleanup190 ]		; <%struct.QAbstractTextDocumentLayout*> [#uses=2]
+	%tmp197 = invoke i1 @_ZNK8QPrinter13collateCopiesEv( %struct.QPrinter* %printer )
+			to label %invcont196 unwind label %cleanup329		; <i1> [#uses=1]
+invcont196:		; preds = %cond_next194
+	br i1 %tmp197, label %cond_true200, label %cond_false204
+cond_true200:		; preds = %invcont196
+	%tmp2000 = load double* %foo
+	store double %tmp2000, double* %bar
+	%tmp203 = invoke i32 @_ZNK8QPrinter9numCopiesEv( %struct.QPrinter* %printer )
+			to label %cond_next208 unwind label %cleanup329		; <i32> [#uses=1]
+cond_false204:		; preds = %invcont196
+	%tmp2001 = load double* %foo
+	store double %tmp2001, double* %bar
+	%tmp207 = invoke i32 @_ZNK8QPrinter9numCopiesEv( %struct.QPrinter* %printer )
+			to label %cond_next208 unwind label %cleanup329		; <i32> [#uses=1]
+cond_next208:		; preds = %invcont206, %invcont202
+	%pageCopies.0 = phi i32 [ %tmp203, %cond_true200 ], [ 1, %cond_false204 ]		; <i32> [#uses=2]
+	%docCopies.0 = phi i32 [ 1, %cond_true200 ], [ %tmp207, %cond_false204 ]		; <i32> [#uses=2]
+	%tmp211 = invoke i32 @_ZNK8QPrinter8fromPageEv( %struct.QPrinter* %printer )
+			to label %invcont210 unwind label %cleanup329		; <i32> [#uses=3]
+invcont210:		; preds = %cond_next208
+	%tmp214 = invoke i32 @_ZNK8QPrinter6toPageEv( %struct.QPrinter* %printer )
+			to label %invcont213 unwind label %cleanup329		; <i32> [#uses=3]
+invcont213:		; preds = %invcont210
+	%tmp216 = icmp eq i32 %tmp211, 0		; <i1> [#uses=1]
+	br i1 %tmp216, label %cond_true217, label %cond_next225
+cond_true217:		; preds = %invcont213
+	%tmp219 = icmp eq i32 %tmp214, 0		; <i1> [#uses=1]
+	br i1 %tmp219, label %cond_true220, label %cond_next225
+cond_true220:		; preds = %cond_true217
+	%tmp223 = invoke i32 @_ZNK13QTextDocument9pageCountEv( %struct.QAbstractTextDocumentLayout* %doc.1 )
+			to label %invcont222 unwind label %cleanup329		; <i32> [#uses=1]
+invcont222:		; preds = %cond_true220
+	br label %cond_next225
+cond_next225:		; preds = %invcont222, %cond_true217, %invcont213
+	%toPage.1 = phi i32 [ %tmp223, %invcont222 ], [ %tmp214, %cond_true217 ], [ %tmp214, %invcont213 ]		; <i32> [#uses=2]
+	%fromPage.1 = phi i32 [ 1, %invcont222 ], [ %tmp211, %cond_true217 ], [ %tmp211, %invcont213 ]		; <i32> [#uses=2]
+	%tmp.page = invoke i32 @_ZNK8QPrinter9pageOrderEv( %struct.QPrinter* %printer )
+			to label %invcont227 unwind label %cleanup329		; <i32> [#uses=1]
+invcont227:		; preds = %cond_next225
+	%tmp228 = icmp eq i32 %tmp.page, 1		; <i1> [#uses=1]
+	br i1 %tmp228, label %cond_true230, label %cond_next234
+cond_true230:		; preds = %invcont227
+	br label %cond_next234
+cond_next234:		; preds = %cond_true230, %invcont227
+	%ascending.1 = phi i1 [ false, %cond_true230 ], [ true, %invcont227 ]		; <i1> [#uses=1]
+	%toPage.2 = phi i32 [ %fromPage.1, %cond_true230 ], [ %toPage.1, %invcont227 ]		; <i32> [#uses=1]
+	%fromPage.2 = phi i32 [ %toPage.1, %cond_true230 ], [ %fromPage.1, %invcont227 ]		; <i32> [#uses=1]
+	br label %bb309
+bb237:		; preds = %cond_true313, %cond_next293
+	%iftmp.410.4 = phi i1 [ %iftmp.410.5, %cond_true313 ], [ %iftmp.410.1, %cond_next293 ]		; <i1> [#uses=1]
+	%page.4 = phi i32 [ %fromPage.2, %cond_true313 ], [ %page.3, %cond_next293 ]		; <i32> [#uses=4]
+	br label %bb273
+invcont240:		; preds = %cond_true277
+	%tmp242 = icmp eq i32 %tmp241, 2		; <i1> [#uses=1]
+	br i1 %tmp242, label %bb252, label %cond_next244
+cond_next244:		; preds = %invcont240
+	%tmp247 = invoke i32 @_ZNK8QPrinter12printerStateEv( %struct.QPrinter* %printer )
+			to label %invcont246 unwind label %cleanup329		; <i32> [#uses=1]
+invcont246:		; preds = %cond_next244
+	%tmp248 = icmp eq i32 %tmp247, 3		; <i1> [#uses=1]
+	br i1 %tmp248, label %bb252, label %bb253
+bb252:		; preds = %invcont246, %invcont240
+	br label %bb254
+bb253:		; preds = %invcont246
+	br label %bb254
+bb254:		; preds = %bb253, %bb252
+	%iftmp.410.0 = phi i1 [ true, %bb252 ], [ false, %bb253 ]		; <i1> [#uses=2]
+	br i1 %iftmp.410.0, label %UserCanceled, label %cond_next258
+cond_next258:		; preds = %bb254
+	invoke fastcc void @_Z9printPageiP8QPainterPK13QTextDocumentRK6QRectFRK7QPointF( i32 %page.4, %struct.QPainter* %p, %struct.QAbstractTextDocumentLayout* %doc.1, %struct.QRectF* %body, %struct.QPointF* %pageNumberPos )
+			to label %invcont261 unwind label %cleanup329
+invcont261:		; preds = %cond_next258
+	%tmp263 = add i32 %pageCopies.0, -1		; <i32> [#uses=1]
+	%tmp265 = icmp sgt i32 %tmp263, %j.4		; <i1> [#uses=1]
+	br i1 %tmp265, label %cond_true266, label %cond_next270
+cond_true266:		; preds = %invcont261
+	%tmp269 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
+			to label %cond_next270 unwind label %cleanup329		; <i1> [#uses=0]
+cond_next270:		; preds = %cond_true266, %invcont261
+	%tmp272 = add i32 %j.4, 1		; <i32> [#uses=1]
+	br label %bb273
+bb273:		; preds = %cond_next270, %bb237
+	%iftmp.410.1 = phi i1 [ %iftmp.410.4, %bb237 ], [ %iftmp.410.0, %cond_next270 ]		; <i1> [#uses=2]
+	%j.4 = phi i32 [ 0, %bb237 ], [ %tmp272, %cond_next270 ]		; <i32> [#uses=3]
+	%tmp276 = icmp slt i32 %j.4, %pageCopies.0		; <i1> [#uses=1]
+	br i1 %tmp276, label %cond_true277, label %bb280
+cond_true277:		; preds = %bb273
+	%tmp241 = invoke i32 @_ZNK8QPrinter12printerStateEv( %struct.QPrinter* %printer )
+			to label %invcont240 unwind label %cleanup329		; <i32> [#uses=1]
+bb280:		; preds = %bb273
+	%tmp283 = icmp eq i32 %page.4, %toPage.2		; <i1> [#uses=1]
+	br i1 %tmp283, label %bb297, label %cond_next285
+cond_next285:		; preds = %bb280
+	br i1 %ascending.1, label %cond_true287, label %cond_false290
+cond_true287:		; preds = %cond_next285
+	%tmp289 = add i32 %page.4, 1		; <i32> [#uses=1]
+	br label %cond_next293
+cond_false290:		; preds = %cond_next285
+	%tmp292 = add i32 %page.4, -1		; <i32> [#uses=1]
+	br label %cond_next293
+cond_next293:		; preds = %cond_false290, %cond_true287
+	%page.3 = phi i32 [ %tmp289, %cond_true287 ], [ %tmp292, %cond_false290 ]		; <i32> [#uses=1]
+	%tmp296 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
+			to label %bb237 unwind label %cleanup329		; <i1> [#uses=0]
+bb297:		; preds = %bb280
+	%tmp299 = add i32 %docCopies.0, -1		; <i32> [#uses=1]
+	%tmp301 = icmp sgt i32 %tmp299, %i.1		; <i1> [#uses=1]
+	br i1 %tmp301, label %cond_true302, label %cond_next306
+cond_true302:		; preds = %bb297
+	%tmp305 = invoke i1 @_ZN8QPrinter7newPageEv( %struct.QPrinter* %printer )
+			to label %cond_next306 unwind label %cleanup329		; <i1> [#uses=0]
+cond_next306:		; preds = %cond_true302, %bb297
+	%tmp308 = add i32 %i.1, 1		; <i32> [#uses=1]
+	br label %bb309
+bb309:		; preds = %cond_next306, %cond_next234
+	%iftmp.410.5 = phi i1 [ undef, %cond_next234 ], [ %iftmp.410.1, %cond_next306 ]		; <i1> [#uses=1]
+	%i.1 = phi i32 [ 0, %cond_next234 ], [ %tmp308, %cond_next306 ]		; <i32> [#uses=3]
+	%tmp312 = icmp slt i32 %i.1, %docCopies.0		; <i1> [#uses=1]
+	br i1 %tmp312, label %cond_true313, label %UserCanceled
+cond_true313:		; preds = %bb309
+	br label %bb237
+UserCanceled:		; preds = %bb309, %bb254
+	%tmp318 = icmp eq %struct.QAbstractTextDocumentLayout* %clonedDoc.1, null		; <i1> [#uses=1]
+	br i1 %tmp318, label %cleanup327, label %cond_true319
+cond_true319:		; preds = %UserCanceled
+	%tmp.upgrd.23 = getelementptr %struct.QAbstractTextDocumentLayout* %clonedDoc.1, i32 0, i32 0, i32 0		; <i32 (...)***> [#uses=1]
+	%tmp.upgrd.24 = load i32 (...)*** %tmp.upgrd.23		; <i32 (...)**> [#uses=1]
+	%tmp322 = getelementptr i32 (...)** %tmp.upgrd.24, i32 4		; <i32 (...)**> [#uses=1]
+	%tmp.upgrd.25 = load i32 (...)** %tmp322		; <i32 (...)*> [#uses=1]
+	%tmp.upgrd.26 = bitcast i32 (...)* %tmp.upgrd.25 to void (%struct.QAbstractTextDocumentLayout*)*		; <void (%struct.QAbstractTextDocumentLayout*)*> [#uses=1]
+	invoke void %tmp.upgrd.26( %struct.QAbstractTextDocumentLayout* %clonedDoc.1 )
+			to label %cleanup327 unwind label %cleanup329
+cleanup327:		; preds = %cond_true319, %UserCanceled
+	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
+	ret void
+cleanup328:		; preds = %invcont
+	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
+	ret void
+cleanup329:		; preds = %cond_true319, %cond_true302, %cond_next293, %cond_true277, %cond_true266, %cond_next258, %cond_next244, %cond_next225, %cond_true220, %invcont210, %cond_next208, %cond_false204, %cond_true200, %cond_next194, %cleanup192, %cleanup192, %cleanup190, %invcont106, %invcont104, %invcont103, %invcont100, %invcont98, %invcont94, %cond_false, %invcont83, %invcont79, %invcont57, %invcont51, %invcont45, %cond_next42, %invcont37, %cond_true35, %invcont29, %invcont25, %cond_true24, %cond_next, %entry
+	call void @_ZN8QPainterD1Ev( %struct.QPainter* %p )
+	unwind
+}
+
+declare void @_ZN6QSizeFC1Edd(%struct.QPointF*, double, double)
+
+declare i1 @_ZNK6QSizeF7isValidEv(%struct.QPointF*)
+
+declare double @_ZNK6QSizeF5widthEv(%struct.QPointF*)
+
+declare double @_ZNK6QSizeF6heightEv(%struct.QPointF*)
+
+declare double* @_ZN6QSizeF6rwidthEv(%struct.QPointF*)
+
+declare double* @_ZN6QSizeF7rheightEv(%struct.QPointF*)
+
+declare %struct.QTextDocumentPrivate* @_ZNK13QTextDocument6d_funcEv(%struct.QAbstractTextDocumentLayout*)
+
+declare void @_ZN7QPointFC1Ev(%struct.QPointF*)
+
+declare void @_ZN7QPointFC1Edd(%struct.QPointF*, double, double)
+
+declare void @_ZN16QTextFrameFormat9setMarginEd(%struct.QTextBlockFormat*, double)
+
+declare void @_ZN6QRectFC1Edddd(%struct.QRectF*, double, double, double, double)
+
+declare void @_ZN6QRectFC1ERK7QPointFRK6QSizeF(%struct.QRectF*, %struct.QPointF*, %struct.QPointF*)
+
+declare double @_ZNK6QRectF5widthEv(%struct.QRectF*)
+
+declare double @_ZNK6QRectF6heightEv(%struct.QRectF*)
+
+declare void @_ZNK6QRectF4sizeEv(%struct.QPointF*, %struct.QRectF*)
+
+declare void @_ZN16QTextFrameFormatD1Ev(%struct.QTextBlockFormat*)
+
+declare void @_ZNK10QTextFrame11frameFormatEv(%struct.QTextBlockFormat*, %struct.QTextBlockGroup*)
+
+declare void @_ZN10QTextFrame14setFrameFormatERK16QTextFrameFormat(%struct.QTextBlockGroup*, %struct.QTextBlockFormat*)
+
+declare i32 @_ZNK12QPaintDevice5widthEv(%struct.QPaintDevice*)
+
+declare i32 @_ZNK12QPaintDevice6heightEv(%struct.QPaintDevice*)
+
+declare i32 @_ZNK12QPaintDevice11logicalDpiXEv(%struct.QPaintDevice*)
+
+declare i32 @_ZNK12QPaintDevice11logicalDpiYEv(%struct.QPaintDevice*)
+
+declare %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument5cloneEP7QObject(%struct.QAbstractTextDocumentLayout*, %struct.QObject*)
+
+declare void @_ZN5QFontD1Ev(%struct.QFont*)
+
+declare %struct.QAbstractTextDocumentLayout* @_ZNK13QTextDocument14documentLayoutEv(%struct.QAbstractTextDocumentLayout*)
+
+declare %struct.QTextBlockGroup* @_ZNK13QTextDocument9rootFrameEv(%struct.QAbstractTextDocumentLayout*)
+
+declare i32 @_ZNK13QTextDocument9pageCountEv(%struct.QAbstractTextDocumentLayout*)
+
+declare void @_ZNK13QTextDocument11defaultFontEv(%struct.QFont*, %struct.QAbstractTextDocumentLayout*)
+
+declare void @_ZN13QTextDocument14setDefaultFontERK5QFont(%struct.QAbstractTextDocumentLayout*, %struct.QFont*)
+
+declare void @_ZN13QTextDocument11setPageSizeERK6QSizeF(%struct.QAbstractTextDocumentLayout*, %struct.QPointF*)
+
+declare void @_Z9printPageiP8QPainterPK13QTextDocumentRK6QRectFRK7QPointF(i32, %struct.QPainter*, %struct.QAbstractTextDocumentLayout*, %struct.QRectF*, %struct.QPointF*)
+
+declare void @_ZN12QFontMetricsD1Ev(%struct.QFontMetrics*)
+
+declare void @_ZN8QPainterC1EP12QPaintDevice(%struct.QPainter*, %struct.QPaintDevice*)
+
+declare i1 @_ZNK8QPainter8isActiveEv(%struct.QPainter*)
+
+declare i32 @_Z13qt_defaultDpiv()
+
+declare %struct.QPaintDevice* @_ZNK27QAbstractTextDocumentLayout11paintDeviceEv(%struct.QAbstractTextDocumentLayout*)
+
+declare void @_ZN8QPainter5scaleEdd(%struct.QPainter*, double, double)
+
+declare %struct.QPaintDevice* @_ZNK8QPainter6deviceEv(%struct.QPainter*)
+
+declare void @_ZN27QAbstractTextDocumentLayout14setPaintDeviceEP12QPaintDevice(%struct.QAbstractTextDocumentLayout*, %struct.QPaintDevice*)
+
+declare void @_ZN12QFontMetricsC1ERK5QFontP12QPaintDevice(%struct.QFontMetrics*, %struct.QFont*, %struct.QPaintDevice*)
+
+declare i32 @_ZNK12QFontMetrics6ascentEv(%struct.QFontMetrics*)
+
+declare void @_ZN5QFont12setPointSizeEi(%struct.QFont*, i32)
+
+declare i1 @_ZNK8QPrinter13collateCopiesEv(%struct.QPrinter*)
+
+declare i32 @_ZNK8QPrinter9numCopiesEv(%struct.QPrinter*)
+
+declare i32 @_ZNK8QPrinter8fromPageEv(%struct.QPrinter*)
+
+declare i32 @_ZNK8QPrinter6toPageEv(%struct.QPrinter*)
+
+declare i32 @_ZNK8QPrinter9pageOrderEv(%struct.QPrinter*)
+
+declare i32 @_ZNK8QPrinter12printerStateEv(%struct.QPrinter*)
+
+declare i1 @_ZN8QPrinter7newPageEv(%struct.QPrinter*)
+
+declare void @_ZN8QPainterD1Ev(%struct.QPainter*)
diff --git a/final/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll b/final/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll
new file mode 100644
index 00000000000..ced89cf4c2b
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2010-03-30-InvokeCrash.ll
@@ -0,0 +1,18 @@
+; RUN: opt %s -simplifycfg -disable-output
+; END.
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @bar(i32)
+
+define void @foo() {
+entry:
+ invoke void @bar(i32 undef)
+         to label %r unwind label %u
+
+r:                                                ; preds = %entry
+ ret void
+
+u:                                                ; preds = %entry
+ unwind
+}
diff --git a/final/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll b/final/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll
new file mode 100644
index 00000000000..ebacf2fe9a9
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/2010-10-24-OnlyUnwindInEntry.ll
@@ -0,0 +1,6 @@
+; RUN: opt %s -simplifycfg -disable-output
+; PR8445
+
+define void @test() {
+      unwind
+}
diff --git a/final/test/Transforms/SimplifyCFG/BrUnwind.ll b/final/test/Transforms/SimplifyCFG/BrUnwind.ll
new file mode 100644
index 00000000000..b19a27dea04
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/BrUnwind.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -simplifycfg -S | \
+; RUN: not grep {br label}
+
+define void @test(i1 %C) {
+        br i1 %C, label %A, label %B
+A:              ; preds = %0
+        call void @test( i1 %C )
+        br label %X
+B:              ; preds = %0
+        call void @test( i1 %C )
+        br label %X
+X:              ; preds = %B, %A
+        unwind
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/DeadSetCC.ll b/final/test/Transforms/SimplifyCFG/DeadSetCC.ll
new file mode 100644
index 00000000000..83394628cc4
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/DeadSetCC.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -simplifycfg -S | \
+; RUN:   not grep {icmp eq}
+
+; Check that simplifycfg deletes a dead 'seteq' instruction when it
+; folds a conditional branch into a switch instruction.
+
+declare void @foo()
+
+declare void @bar()
+
+define void @testcfg(i32 %V) {
+        %C = icmp eq i32 %V, 18         ; <i1> [#uses=1]
+        %D = icmp eq i32 %V, 180                ; <i1> [#uses=1]
+        %E = or i1 %C, %D               ; <i1> [#uses=1]
+        br i1 %E, label %L1, label %Sw
+Sw:             ; preds = %0
+        switch i32 %V, label %L1 [
+                 i32 15, label %L2
+                 i32 16, label %L2
+        ]
+L1:             ; preds = %Sw, %0
+        call void @foo( )
+        ret void
+L2:             ; preds = %Sw, %Sw
+        call void @bar( )
+        ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll b/final/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll
new file mode 100644
index 00000000000..912c7556e00
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll
@@ -0,0 +1,18 @@
+; Test merging of blocks with phi nodes.
+;
+; RUN: opt < %s -simplifycfg -S | not grep N:
+;
+
+define i32 @test(i1 %a) {
+Q:
+        br i1 %a, label %N, label %M
+N:              ; preds = %Q
+        br label %M
+M:              ; preds = %N, %Q
+        ; It's ok to merge N and M because the incoming values for W are the
+        ; same for both cases...
+        %W = phi i32 [ 2, %N ], [ 2, %Q ]               ; <i32> [#uses=1]
+        %R = add i32 %W, 1              ; <i32> [#uses=1]
+        ret i32 %R
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/HoistCode.ll b/final/test/Transforms/SimplifyCFG/HoistCode.ll
new file mode 100644
index 00000000000..9697e56a719
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/HoistCode.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -simplifycfg -S | not grep br
+
+define void @foo(i1 %C, i32* %P) {
+        br i1 %C, label %T, label %F
+T:              ; preds = %0
+        store i32 7, i32* %P
+        ret void
+F:              ; preds = %0
+        store i32 7, i32* %P
+        ret void
+}
diff --git a/final/test/Transforms/SimplifyCFG/MagicPointer.ll b/final/test/Transforms/SimplifyCFG/MagicPointer.ll
new file mode 100644
index 00000000000..93b9a276eac
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/MagicPointer.ll
@@ -0,0 +1,75 @@
+; Test that simplifycfg can create switch instructions from constant pointers.
+;
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; CHECK: switch i64 %magicptr
+; CHECK: i64 0, label
+; CHECK: i64 1, label
+; CHECK: i64 2, label
+; CHECK: i64 3, label
+; CHECK: i64 4, label
+; CHECK: }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+@.str = private constant [5 x i8] c"null\00"      ; <[5 x i8]*> [#uses=2]
+@.str1 = private constant [4 x i8] c"one\00"      ; <[4 x i8]*> [#uses=2]
+@.str2 = private constant [4 x i8] c"two\00"      ; <[4 x i8]*> [#uses=2]
+@.str3 = private constant [5 x i8] c"four\00"     ; <[5 x i8]*> [#uses=2]
+
+define void @f(i8* %x) nounwind ssp {
+entry:
+  %tobool = icmp eq i8* %x, null                  ; <i1> [#uses=1]
+  br i1 %tobool, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %call = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %if.end21
+
+if.else:                                          ; preds = %entry
+  %cmp = icmp eq i8* %x, inttoptr (i64 1 to i8*)  ; <i1> [#uses=1]
+  br i1 %cmp, label %if.then2, label %if.else4
+
+if.then2:                                         ; preds = %if.else
+  %call3 = call i32 @puts(i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %if.end20
+
+if.else4:                                         ; preds = %if.else
+  %cmp6 = icmp eq i8* %x, inttoptr (i64 2 to i8*) ; <i1> [#uses=1]
+  br i1 %cmp6, label %if.then9, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %if.else4
+  %cmp8 = icmp eq i8* %x, inttoptr (i64 3 to i8*) ; <i1> [#uses=1]
+  br i1 %cmp8, label %if.then9, label %if.else11
+
+if.then9:                                         ; preds = %lor.lhs.false, %if.else4
+  %call10 = call i32 @puts(i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %if.end19
+
+if.else11:                                        ; preds = %lor.lhs.false
+  %cmp13 = icmp eq i8* %x, inttoptr (i64 4 to i8*) ; <i1> [#uses=1]
+  br i1 %cmp13, label %if.then14, label %if.else16
+
+if.then14:                                        ; preds = %if.else11
+  %call15 = call i32 @puts(i8* getelementptr inbounds ([5 x i8]* @.str3, i64 0, i64 0)) nounwind ; <i32> [#uses=0]
+  br label %if.end
+
+if.else16:                                        ; preds = %if.else11
+  %call18 = call i32 @puts(i8* %x) nounwind       ; <i32> [#uses=0]
+  br label %if.end
+
+if.end:                                           ; preds = %if.else16, %if.then14
+  br label %if.end19
+
+if.end19:                                         ; preds = %if.end, %if.then9
+  br label %if.end20
+
+if.end20:                                         ; preds = %if.end19, %if.then2
+  br label %if.end21
+
+if.end21:                                         ; preds = %if.end20, %if.then
+  ret void
+}
+
+declare i32 @puts(i8*)
diff --git a/final/test/Transforms/SimplifyCFG/PhiBlockMerge.ll b/final/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
new file mode 100644
index 00000000000..c28d0bac375
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
@@ -0,0 +1,25 @@
+; Test merging of blocks that only have PHI nodes in them
+;
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+;
+
+define i32 @test(i1 %a, i1 %b) {
+; CHECK: br i1 %a
+        br i1 %a, label %M, label %O
+O:              ; preds = %0
+; CHECK: select i1 %b, i32 0, i32 1
+; CHECK-NOT: phi
+        br i1 %b, label %N, label %Q
+Q:              ; preds = %O
+        br label %N
+N:              ; preds = %Q, %O
+        ; This block should be foldable into M
+        %Wp = phi i32 [ 0, %O ], [ 1, %Q ]              ; <i32> [#uses=1]
+        br label %M
+M:              ; preds = %N, %0
+; CHECK: %W = phi i32
+        %W = phi i32 [ %Wp, %N ], [ 2, %0 ]             ; <i32> [#uses=1]
+        %R = add i32 %W, 1              ; <i32> [#uses=1]
+        ret i32 %R
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/PhiBlockMerge2.ll b/final/test/Transforms/SimplifyCFG/PhiBlockMerge2.ll
new file mode 100644
index 00000000000..fb5d600f114
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/PhiBlockMerge2.ll
@@ -0,0 +1,27 @@
+; Test merging of blocks that only have PHI nodes in them.  This tests the case
+; where the mergedinto block doesn't have any PHI nodes, and is in fact 
+; dominated by the block-to-be-eliminated
+;
+; RUN: opt < %s -simplifycfg -S | not grep N:
+;
+
+declare i1 @foo()
+
+define i32 @test(i1 %a, i1 %b) {
+        %c = call i1 @foo()
+	br i1 %c, label %N, label %P
+P:
+        %d = call i1 @foo()
+	br i1 %d, label %N, label %Q
+Q:
+	br label %N
+N:
+	%W = phi i32 [0, %0], [1, %Q], [2, %P]
+	; This block should be foldable into M
+	br label %M
+
+M:
+	%R = add i32 %W, 1
+	ret i32 %R
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/PhiEliminate.ll b/final/test/Transforms/SimplifyCFG/PhiEliminate.ll
new file mode 100644
index 00000000000..d5ce9a7e6bc
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/PhiEliminate.ll
@@ -0,0 +1,27 @@
+; Test a bunch of cases where the cfg simplification code should
+; be able to fold PHI nodes into computation in common cases.  Folding the PHI
+; nodes away allows the branches to be eliminated, performing a simple form of
+; 'if conversion'.
+
+; RUN: opt < %s -simplifycfg -S > %t.xform
+; RUN:   not grep phi %t.xform 
+; RUN:   grep ret %t.xform
+
+declare void @use(i1)
+
+declare void @use.upgrd.1(i32)
+
+
+define void @test(i1 %c, i32 %V, i32 %V2) {
+; <label>:0
+        br i1 %c, label %T, label %F
+T:              ; preds = %0
+        br label %F
+F:              ; preds = %T, %0
+        %B1 = phi i1 [ true, %0 ], [ false, %T ]                ; <i1> [#uses=1]
+        %I6 = phi i32 [ %V, %0 ], [ 0, %T ]             ; <i32> [#uses=1]
+        call void @use( i1 %B1 )
+        call void @use.upgrd.1( i32 %I6 )
+        ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/PhiEliminate2.ll b/final/test/Transforms/SimplifyCFG/PhiEliminate2.ll
new file mode 100644
index 00000000000..c0f6781293d
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/PhiEliminate2.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -simplifycfg -S | not grep br
+
+define i32 @test(i1 %C, i32 %V1, i32 %V2) {
+entry:
+        br i1 %C, label %then, label %Cont
+then:           ; preds = %entry
+        %V3 = or i32 %V2, %V1           ; <i32> [#uses=1]
+        br label %Cont
+Cont:           ; preds = %then, %entry
+        %V4 = phi i32 [ %V1, %entry ], [ %V3, %then ]           ; <i32> [#uses=0]
+        call i32 @test( i1 false, i32 0, i32 0 )                ; <i32>:0 [#uses=0]
+        ret i32 %V1
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/PhiNoEliminate.ll b/final/test/Transforms/SimplifyCFG/PhiNoEliminate.ll
new file mode 100644
index 00000000000..e9902e09442
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/PhiNoEliminate.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -simplifycfg -S | \
+; RUN:   not grep select
+
+;; The PHI node in this example should not be turned into a select, as we are
+;; not able to ifcvt the entire block.  As such, converting to a select just 
+;; introduces inefficiency without saving copies.
+
+define i32 @bar(i1 %C) {
+entry:
+        br i1 %C, label %then, label %endif
+then:           ; preds = %entry
+        %tmp.3 = call i32 @qux( )               ; <i32> [#uses=0]
+        br label %endif
+endif:          ; preds = %then, %entry
+        %R = phi i32 [ 123, %entry ], [ 12312, %then ]          ; <i32> [#uses=1]
+        ;; stuff to disable tail duplication
+        call i32 @qux( )                ; <i32>:0 [#uses=0]
+        call i32 @qux( )                ; <i32>:1 [#uses=0]
+        call i32 @qux( )                ; <i32>:2 [#uses=0]
+        call i32 @qux( )                ; <i32>:3 [#uses=0]
+        call i32 @qux( )                ; <i32>:4 [#uses=0]
+        call i32 @qux( )                ; <i32>:5 [#uses=0]
+        call i32 @qux( )                ; <i32>:6 [#uses=0]
+        ret i32 %R
+}
+
+declare i32 @qux()
diff --git a/final/test/Transforms/SimplifyCFG/SpeculativeExec.ll b/final/test/Transforms/SimplifyCFG/SpeculativeExec.ll
new file mode 100644
index 00000000000..5cfc77ce08a
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/SpeculativeExec.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -simplifycfg  -S | grep select
+; RUN: opt < %s -simplifycfg  -S | grep br | count 2
+
+define i32 @t2(i32 %a, i32 %b, i32 %c) nounwind  {
+entry:
+        %tmp1 = icmp eq i32 %b, 0
+        br i1 %tmp1, label %bb1, label %bb3
+
+bb1:            ; preds = %entry
+	%tmp2 = icmp sgt i32 %c, 1
+	br i1 %tmp2, label %bb2, label %bb3
+
+bb2:		; preds = bb1
+	%tmp3 = add i32 %a, 1
+	br label %bb3
+
+bb3:		; preds = %bb2, %entry
+	%tmp4 = phi i32 [ %b, %entry ], [ %a, %bb1 ], [ %tmp3, %bb2 ]
+        %tmp5 = sub i32 %tmp4, 1
+	ret i32 %tmp5
+}
diff --git a/final/test/Transforms/SimplifyCFG/UncondBranchToReturn.ll b/final/test/Transforms/SimplifyCFG/UncondBranchToReturn.ll
new file mode 100644
index 00000000000..bf9d9535d63
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/UncondBranchToReturn.ll
@@ -0,0 +1,33 @@
+; The unify-function-exit-nodes pass often makes basic blocks that just contain
+; a PHI node and a return.  Make sure the simplify cfg can straighten out this
+; important case.  This is basically the most trivial form of tail-duplication.
+
+; RUN: opt < %s -simplifycfg -S | \
+; RUN:    not grep {br label}
+
+define i32 @test(i1 %B, i32 %A, i32 %B.upgrd.1) {
+        br i1 %B, label %T, label %F
+T:              ; preds = %0
+        br label %ret
+F:              ; preds = %0
+        br label %ret
+ret:            ; preds = %F, %T
+        %X = phi i32 [ %A, %F ], [ %B.upgrd.1, %T ]             ; <i32> [#uses=1]
+        ret i32 %X
+}
+
+
+; Make sure it's willing to move unconditional branches to return instructions
+; as well, even if the return block is shared and the source blocks are
+; non-empty.
+define i32 @test2(i1 %B, i32 %A, i32 %B.upgrd.2) {
+        br i1 %B, label %T, label %F
+T:              ; preds = %0
+        call i32 @test( i1 true, i32 5, i32 8 )         ; <i32>:1 [#uses=0]
+        br label %ret
+F:              ; preds = %0
+        call i32 @test( i1 true, i32 5, i32 8 )         ; <i32>:2 [#uses=0]
+        br label %ret
+ret:            ; preds = %F, %T
+        ret i32 %A
+}
diff --git a/final/test/Transforms/SimplifyCFG/UnreachableEliminate.ll b/final/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
new file mode 100644
index 00000000000..7133d9875ca
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -simplifycfg -S | not grep unreachable
+
+define void @test1(i1 %C, i1* %BP) {
+        br i1 %C, label %T, label %F
+T:              ; preds = %0
+        store i1 %C, i1* %BP
+        unreachable
+F:              ; preds = %0
+        ret void
+}
+
+define void @test2() {
+        invoke void @test2( )
+                        to label %N unwind label %U
+U:              ; preds = %0
+        unreachable
+N:              ; preds = %0
+        ret void
+}
+
+define i32 @test3(i32 %v) {
+        switch i32 %v, label %default [
+                 i32 1, label %U
+                 i32 2, label %T
+        ]
+default:                ; preds = %0
+        ret i32 1
+U:              ; preds = %0
+        unreachable
+T:              ; preds = %0
+        ret i32 2
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/basictest.ll b/final/test/Transforms/SimplifyCFG/basictest.ll
new file mode 100644
index 00000000000..052e10667da
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/basictest.ll
@@ -0,0 +1,43 @@
+; Test CFG simplify removal of branch instructions.
+;
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+define void @test1() {
+        br label %1
+        ret void
+; CHECK: @test1
+; CHECK-NEXT: ret void
+}
+
+define void @test2() {
+        ret void
+        ret void
+; CHECK: @test2
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+}
+
+define void @test3(i1 %T) {
+        br i1 %T, label %1, label %1
+        ret void
+; CHECK: @test3
+; CHECK-NEXT: ret void
+}
+
+
+; PR5795
+define void @test5(i32 %A) {
+  switch i32 %A, label %return [
+    i32 2, label %1
+    i32 10, label %2
+  ]
+
+  ret void
+
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+; CHECK: @test5
+; CHECK-NEXT: ret void
+}
diff --git a/final/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll b/final/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll
new file mode 100644
index 00000000000..761f0d53905
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/branch-branch-dbginfo.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -simplifycfg -S | grep {br i1} | count 1
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
+	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
+	%llvm.dbg.derivedtype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }* }
+	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
+	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
+@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 393233, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([7 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str = internal constant [7 x i8] c"cond.c\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
+@.str1 = internal constant [5 x i8] c"/tmp\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
+@.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5555) (LLVM build)\00", section "llvm.metadata"		; <[52 x i8]*> [#uses=1]
+@.str3 = internal constant [4 x i8] c"foo\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 393473, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
+@.str5 = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.variable6 = internal constant %llvm.dbg.variable.type { i32 393473, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([2 x i8]* @.str7, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
+@.str7 = internal constant [2 x i8] c"y\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 393238, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([6 x i8]* @.str8, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, i64 0, i64 0, i64 0, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype9 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@.str8 = internal constant [6 x i8] c"uint1\00", section "llvm.metadata"		; <[6 x i8]*> [#uses=1]
+@llvm.dbg.basictype9 = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 7 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+
+define i32 @foo(i32 %x1, i1 zeroext %y2) nounwind {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
+	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%tmp4 = icmp eq i32 %x1, 0		; <i1> [#uses=1]
+	br i1 %tmp4, label %bb, label %bb14
+
+bb:		; preds = %entry
+	call void @llvm.dbg.stoppoint(i32 6, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 %y2, label %bb14, label %bb10
+
+bb7:		; preds = %bb
+	call void @llvm.dbg.stoppoint(i32 7, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%tmp9 = call i32 @g1(i32 %x1) nounwind		; <i32> [#uses=1]
+	ret i32 %tmp9
+
+bb10:		; preds = %bb
+	call void @llvm.dbg.stoppoint(i32 8, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%tmp12 = add i32 %x1, 1		; <i32> [#uses=1]
+	%tmp13 = call i32 @g2(i32 %tmp12) nounwind		; <i32> [#uses=1]
+	ret i32 %tmp13
+
+bb14:		; preds = %entry
+	call void @llvm.dbg.stoppoint(i32 10, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%tmp16 = call i32 @g1(i32 %x1) nounwind		; <i32> [#uses=1]
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
+	ret i32 %tmp16
+}
+
+declare void @llvm.dbg.func.start({ }*) nounwind
+
+declare void @llvm.dbg.declare({ }*, { }*) nounwind
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
+
+declare i32 @g1(i32)
+
+declare i32 @g2(i32)
+
+declare void @llvm.dbg.region.end({ }*) nounwind
diff --git a/final/test/Transforms/SimplifyCFG/branch-cond-merge.ll b/final/test/Transforms/SimplifyCFG/branch-cond-merge.ll
new file mode 100644
index 00000000000..f73e01ca476
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/branch-cond-merge.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -simplifycfg -instcombine \
+; RUN:   -simplifycfg -S | not grep call
+
+declare void @bar()
+
+define void @test(i32 %X, i32 %Y) {
+entry:
+        %tmp.2 = icmp ne i32 %X, %Y             ; <i1> [#uses=1]
+        br i1 %tmp.2, label %shortcirc_next, label %UnifiedReturnBlock
+shortcirc_next:         ; preds = %entry
+        %tmp.3 = icmp ne i32 %X, %Y             ; <i1> [#uses=1]
+        br i1 %tmp.3, label %UnifiedReturnBlock, label %then
+then:           ; preds = %shortcirc_next
+        call void @bar( )
+        ret void
+UnifiedReturnBlock:             ; preds = %shortcirc_next, %entry
+        ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/branch-cond-prop.ll b/final/test/Transforms/SimplifyCFG/branch-cond-prop.ll
new file mode 100644
index 00000000000..448934e7250
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/branch-cond-prop.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -simplifycfg -S | not grep call
+
+declare void @bar()
+
+define void @test(i32 %X, i32 %Y) {
+entry:
+        %tmp.2 = icmp slt i32 %X, %Y            ; <i1> [#uses=2]
+        br i1 %tmp.2, label %shortcirc_next, label %UnifiedReturnBlock
+shortcirc_next:         ; preds = %entry
+        br i1 %tmp.2, label %UnifiedReturnBlock, label %then
+then:           ; preds = %shortcirc_next
+        call void @bar( )
+        ret void
+UnifiedReturnBlock:             ; preds = %shortcirc_next, %entry
+        ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/branch-fold-test.ll b/final/test/Transforms/SimplifyCFG/branch-fold-test.ll
new file mode 100644
index 00000000000..460f2456d8c
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/branch-fold-test.ll
@@ -0,0 +1,17 @@
+; This test ensures that the simplifycfg pass continues to constant fold
+; terminator instructions.
+
+; RUN: opt < %s -simplifycfg -S | not grep br
+
+define i32 @test(i32 %A, i32 %B) {
+J:
+        %C = add i32 %A, 12             ; <i32> [#uses=2]
+        br i1 true, label %L, label %K
+L:              ; preds = %J
+        %D = add i32 %C, %B             ; <i32> [#uses=1]
+        ret i32 %D
+K:              ; preds = %J
+        %E = add i32 %C, %B             ; <i32> [#uses=1]
+        ret i32 %E
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/branch-fold.ll b/final/test/Transforms/SimplifyCFG/branch-fold.ll
new file mode 100644
index 00000000000..266609b52a5
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/branch-fold.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -simplifycfg -S | grep {br i1} | count 1
+
+define void @test(i32* %P, i32* %Q, i1 %A, i1 %B) {
+        br i1 %A, label %a, label %b
+a:              ; preds = %0
+        br i1 %B, label %b, label %c
+b:              ; preds = %a, %0
+        store i32 123, i32* %P
+        ret void
+c:              ; preds = %a
+        ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/branch-phi-thread.ll b/final/test/Transforms/SimplifyCFG/branch-phi-thread.ll
new file mode 100644
index 00000000000..f52d979ecd3
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/branch-phi-thread.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -simplifycfg -adce -S | \
+; RUN:   not grep {call void @f1}
+; END.
+
+declare void @f1()
+
+declare void @f2()
+
+declare void @f3()
+
+declare void @f4()
+
+define i32 @test1(i32 %X, i1 %D) {
+E:
+	%C = icmp eq i32 %X, 0		; <i1> [#uses=2]
+	br i1 %C, label %T, label %F
+T:		; preds = %A, %E
+	br i1 %C, label %B, label %A
+A:		; preds = %T
+	call void @f1( )
+	br i1 %D, label %T, label %F
+B:		; preds = %T
+	call void @f2( )
+	ret i32 345
+F:		; preds = %A, %E
+	call void @f3( )
+	ret i32 123
+}
+
+define i32 @test2(i32 %X, i1 %D) {
+E:
+	%C = icmp eq i32 %X, 0		; <i1> [#uses=2]
+	br i1 %C, label %T, label %F
+T:		; preds = %A, %E
+	%P = phi i1 [ true, %E ], [ %C, %A ]		; <i1> [#uses=1]
+	br i1 %P, label %B, label %A
+A:		; preds = %T
+	call void @f1( )
+	br i1 %D, label %T, label %F
+B:		; preds = %T
+	call void @f2( )
+	ret i32 345
+F:		; preds = %A, %E
+	call void @f3( )
+	ret i32 123
+}
+
+define i32 @test3(i32 %X, i1 %D, i32* %AP, i32* %BP) {
+E:
+	%C = icmp eq i32 %X, 0		; <i1> [#uses=2]
+	br i1 %C, label %T, label %F
+T:		; preds = %A, %E
+	call void @f3( )
+	%XX = load i32* %AP		; <i32> [#uses=1]
+	store i32 %XX, i32* %BP
+	br i1 %C, label %B, label %A
+A:		; preds = %T
+	call void @f1( )
+	br i1 %D, label %T, label %F
+B:		; preds = %T
+	call void @f2( )
+	ret i32 345
+F:		; preds = %A, %E
+	call void @f3( )
+	ret i32 123
+}
diff --git a/final/test/Transforms/SimplifyCFG/branch_fold_dbg.ll b/final/test/Transforms/SimplifyCFG/branch_fold_dbg.ll
new file mode 100644
index 00000000000..6a500de6b0a
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/branch_fold_dbg.ll
@@ -0,0 +1,122 @@
+; RUN: opt < %s -simplifycfg -S | not grep br
+; END.
+
+        %llvm.dbg.anchor.type = type { i32, i32 }
+        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
+
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
+
+@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
+@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
+
+
+define void @main() {
+entry:
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%tmp.14.i19 = icmp eq i32 0, 2		; <i1> [#uses=1]
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 %tmp.14.i19, label %endif.1.i20, label %read_min.exit
+endif.1.i20:		; preds = %entry
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%tmp.9.i.i = icmp eq i8* null, null		; <i1> [#uses=1]
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 %tmp.9.i.i, label %then.i12.i, label %then.i.i
+then.i.i:		; preds = %endif.1.i20
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	ret void
+then.i12.i:		; preds = %endif.1.i20
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%tmp.9.i4.i = icmp eq i8* null, null		; <i1> [#uses=1]
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 %tmp.9.i4.i, label %endif.2.i33, label %then.i5.i
+then.i5.i:		; preds = %then.i12.i
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	ret void
+endif.2.i33:		; preds = %then.i12.i
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 false, label %loopexit.0.i40, label %no_exit.0.i35
+no_exit.0.i35:		; preds = %no_exit.0.i35, %endif.2.i33
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%tmp.130.i = icmp slt i32 0, 0		; <i1> [#uses=1]
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 %tmp.130.i, label %loopexit.0.i40.loopexit, label %no_exit.0.i35
+loopexit.0.i40.loopexit:		; preds = %no_exit.0.i35
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %loopexit.0.i40
+loopexit.0.i40:		; preds = %loopexit.0.i40.loopexit, %endif.2.i33
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%tmp.341.i = icmp eq i32 0, 0		; <i1> [#uses=1]
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 %tmp.341.i, label %loopentry.1.i, label %read_min.exit
+loopentry.1.i:		; preds = %loopexit.0.i40
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%tmp.347.i = icmp sgt i32 0, 0		; <i1> [#uses=1]
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 %tmp.347.i, label %no_exit.1.i41, label %loopexit.2.i44
+no_exit.1.i41:		; preds = %endif.5.i, %loopentry.1.i
+	%indvar.i42 = phi i32 [ %indvar.next.i, %endif.5.i ], [ 0, %loopentry.1.i ]		; <i32> [#uses=1]
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%tmp.355.i = icmp eq i32 0, 3		; <i1> [#uses=1]
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 %tmp.355.i, label %endif.5.i, label %read_min.exit
+endif.5.i:		; preds = %no_exit.1.i41
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%tmp.34773.i = icmp sgt i32 0, 0		; <i1> [#uses=1]
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%indvar.next.i = add i32 %indvar.i42, 1		; <i32> [#uses=1]
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 %tmp.34773.i, label %no_exit.1.i41, label %loopexit.1.i.loopexit
+loopexit.1.i.loopexit:		; preds = %endif.5.i
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	ret void
+loopexit.2.i44:		; preds = %loopentry.1.i
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	ret void
+read_min.exit:		; preds = %no_exit.1.i41, %loopexit.0.i40, %entry
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%tmp.23 = icmp eq i32 0, 0		; <i1> [#uses=1]
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 %tmp.23, label %endif.1, label %then.1
+then.1:		; preds = %read_min.exit
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 false, label %endif.0.i, label %then.0.i
+then.0.i:		; preds = %then.1
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 false, label %endif.1.i, label %then.1.i
+endif.0.i:		; preds = %then.1
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 false, label %endif.1.i, label %then.1.i
+then.1.i:		; preds = %endif.0.i, %then.0.i
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 false, label %getfree.exit, label %then.2.i
+endif.1.i:		; preds = %endif.0.i, %then.0.i
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 false, label %getfree.exit, label %then.2.i
+then.2.i:		; preds = %endif.1.i, %then.1.i
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	ret void
+getfree.exit:		; preds = %endif.1.i, %then.1.i
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	ret void
+endif.1:		; preds = %read_min.exit
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	%tmp.27.i = getelementptr i32* null, i32 0		; <i32*> [#uses=0]
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 false, label %loopexit.0.i15, label %no_exit.0.i14
+no_exit.0.i14:		; preds = %endif.1
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	ret void
+loopexit.0.i15:		; preds = %endif.1
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 false, label %primal_start_artificial.exit, label %no_exit.1.i16
+no_exit.1.i16:		; preds = %no_exit.1.i16, %loopexit.0.i15
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br i1 false, label %primal_start_artificial.exit, label %no_exit.1.i16
+primal_start_artificial.exit:		; preds = %no_exit.1.i16, %loopexit.0.i15
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	ret void
+}
diff --git a/final/test/Transforms/SimplifyCFG/dbginfo.ll b/final/test/Transforms/SimplifyCFG/dbginfo.ll
new file mode 100644
index 00000000000..1a9f20ac871
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/dbginfo.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -simplifycfg -S | not grep "br label"
+
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
+	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
+	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }* }
+	%llvm.dbg.derivedtype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }* }
+	%llvm.dbg.global_variable.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1, { }* }
+	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
+	%llvm.dbg.subrange.type = type { i32, i64, i64 }
+	%struct.Group = type { %struct.Scene, %struct.Sphere, %"struct.std::list<Scene*,std::allocator<Scene*> >" }
+	%struct.Ray = type { %struct.Vec, %struct.Vec }
+	%struct.Scene = type { i32 (...)** }
+	%struct.Sphere = type { %struct.Scene, %struct.Vec, double }
+	%struct.Vec = type { double, double, double }
+	%struct.__class_type_info_pseudo = type { %struct.__type_info_pseudo }
+	%struct.__false_type = type <{ i8 }>
+	%"struct.__gnu_cxx::new_allocator<Scene*>" = type <{ i8 }>
+	%"struct.__gnu_cxx::new_allocator<std::_List_node<Scene*> >" = type <{ i8 }>
+	%struct.__si_class_type_info_pseudo = type { %struct.__type_info_pseudo, %"struct.std::type_info"* }
+	%struct.__type_info_pseudo = type { i8*, i8* }
+	%"struct.std::Hit" = type { double, %struct.Vec }
+	%"struct.std::_List_base<Scene*,std::allocator<Scene*> >" = type { %"struct.std::_List_base<Scene*,std::allocator<Scene*> >::_List_impl" }
+	%"struct.std::_List_base<Scene*,std::allocator<Scene*> >::_List_impl" = type { %"struct.std::_List_node_base" }
+	%"struct.std::_List_const_iterator<Scene*>" = type { %"struct.std::_List_node_base"* }
+	%"struct.std::_List_iterator<Scene*>" = type { %"struct.std::_List_node_base"* }
+	%"struct.std::_List_node<Scene*>" = type { %"struct.std::_List_node_base", %struct.Scene* }
+	%"struct.std::_List_node_base" = type { %"struct.std::_List_node_base"*, %"struct.std::_List_node_base"* }
+	%"struct.std::allocator<Scene*>" = type <{ i8 }>
+	%"struct.std::allocator<std::_List_node<Scene*> >" = type <{ i8 }>
+	%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"* }
+	%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
+	%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
+	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }
+	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
+	%"struct.std::ios_base::Init" = type <{ i8 }>
+	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
+	%"struct.std::ios_base::_Words" = type { i8*, i32 }
+	%"struct.std::list<Scene*,std::allocator<Scene*> >" = type { %"struct.std::_List_base<Scene*,std::allocator<Scene*> >" }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+	%"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >" = type { %"struct.std::locale::facet" }
+	%"struct.std::num_put<char,std::ostreambuf_iterator<char, std::char_traits<char> > >" = type { %"struct.std::locale::facet" }
+	%"struct.std::numeric_limits<double>" = type <{ i8 }>
+	%"struct.std::type_info" = type { i32 (...)**, i8* }
+@llvm.dbg.subprogram947 = external constant %llvm.dbg.subprogram.type		; <%llvm.dbg.subprogram.type*> [#uses=1]
+
+declare void @llvm.dbg.func.start({ }*) nounwind
+
+declare void @llvm.dbg.region.end({ }*) nounwind
+
+declare void @_ZN9__gnu_cxx13new_allocatorIP5SceneED2Ev(%struct.__false_type*) nounwind
+
+define void @_ZNSaIP5SceneED1Ev(%struct.__false_type* %this) nounwind {
+entry:
+	%this_addr = alloca %struct.__false_type*		; <%struct.__false_type**> [#uses=2]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram947 to { }*))
+	store %struct.__false_type* %this, %struct.__false_type** %this_addr
+	%0 = load %struct.__false_type** %this_addr, align 4		; <%struct.__false_type*> [#uses=1]
+	call void @_ZN9__gnu_cxx13new_allocatorIP5SceneED2Ev(%struct.__false_type* %0) nounwind
+	br label %bb
+
+bb:		; preds = %entry
+	br label %return
+
+return:		; preds = %bb
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram947 to { }*))
+	ret void
+}
diff --git a/final/test/Transforms/SimplifyCFG/dg.exp b/final/test/Transforms/SimplifyCFG/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/SimplifyCFG/duplicate-phis.ll b/final/test/Transforms/SimplifyCFG/duplicate-phis.ll
new file mode 100644
index 00000000000..5129f9fb6d2
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/duplicate-phis.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -instcombine -simplifycfg -S | grep { = phi } | count 1
+
+; instcombine should sort the PHI operands so that simplifycfg can see the
+; duplicate and remove it.
+
+define i32 @foo(i1 %t) {
+entry:
+  call void @bar()
+  br i1 %t, label %true, label %false
+true:
+  call void @bar()
+  br label %false
+false:
+  %a = phi i32 [ 2, %true ], [ 5, %entry ]
+  %b = phi i32 [ 5, %entry ], [ 2, %true ]
+  call void @bar()
+  %c = add i32 %a, %b
+  ret i32 %c
+}
+
+declare void @bar()
diff --git a/final/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll b/final/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll
new file mode 100644
index 00000000000..6fbbb1b19f3
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/hoist-common-code.dbg.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -simplifycfg -S | not grep br
+
+
+        %llvm.dbg.anchor.type = type { i32, i32 }
+        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
+
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
+
+@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
+@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
+
+declare void @bar(i32)
+
+define void @test(i1 %P, i32* %Q) {
+        br i1 %P, label %T, label %F
+T:              ; preds = %0
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        store i32 1, i32* %Q
+        %A = load i32* %Q               ; <i32> [#uses=1]
+        call void @bar( i32 %A )
+        ret void
+F:              ; preds = %0
+        store i32 1, i32* %Q
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        %B = load i32* %Q               ; <i32> [#uses=1]
+        call void @bar( i32 %B )
+        ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/hoist-common-code.ll b/final/test/Transforms/SimplifyCFG/hoist-common-code.ll
new file mode 100644
index 00000000000..5c83e2a3aa4
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/hoist-common-code.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -simplifycfg -S | not grep br
+
+declare void @bar(i32)
+
+define void @test(i1 %P, i32* %Q) {
+        br i1 %P, label %T, label %F
+T:              ; preds = %0
+        store i32 1, i32* %Q
+        %A = load i32* %Q               ; <i32> [#uses=1]
+        call void @bar( i32 %A )
+        ret void
+F:              ; preds = %0
+        store i32 1, i32* %Q
+        %B = load i32* %Q               ; <i32> [#uses=1]
+        call void @bar( i32 %B )
+        ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/indirectbr.ll b/final/test/Transforms/SimplifyCFG/indirectbr.ll
new file mode 100644
index 00000000000..7fb4def5b93
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/indirectbr.ll
@@ -0,0 +1,182 @@
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+
+; SimplifyCFG should eliminate redundant indirectbr edges.
+
+; CHECK: indbrtest0
+; CHECK: indirectbr i8* %t, [label %BB0, label %BB1, label %BB2]
+; CHECK: %x = phi i32 [ 0, %BB0 ], [ 1, %entry ]
+
+declare void @foo()
+declare void @A()
+declare void @B(i32)
+declare void @C()
+
+define void @indbrtest0(i8** %P, i8** %Q) {
+entry:
+  store i8* blockaddress(@indbrtest0, %BB0), i8** %P
+  store i8* blockaddress(@indbrtest0, %BB1), i8** %P
+  store i8* blockaddress(@indbrtest0, %BB2), i8** %P
+  call void @foo()
+  %t = load i8** %Q
+  indirectbr i8* %t, [label %BB0, label %BB1, label %BB2, label %BB0, label %BB1, label %BB2]
+BB0:
+  call void @A()
+  br label %BB1
+BB1:
+  %x = phi i32 [ 0, %BB0 ], [ 1, %entry ], [ 1, %entry ]
+  call void @B(i32 %x)
+  ret void
+BB2:
+  call void @C()
+  ret void
+}
+
+; SimplifyCFG should convert the indirectbr into a directbr. It would be even
+; better if it removed the branch altogether, but simplifycfdg currently misses
+; that because the predecessor is the entry block.
+
+; CHECK: indbrtest1
+; CHECK: br label %BB0
+
+define void @indbrtest1(i8** %P, i8** %Q) {
+entry:
+  store i8* blockaddress(@indbrtest1, %BB0), i8** %P
+  call void @foo()
+  %t = load i8** %Q
+  indirectbr i8* %t, [label %BB0, label %BB0]
+BB0:
+  call void @A()
+  ret void
+}
+
+; SimplifyCFG should notice that BB0 does not have its address taken and
+; remove it from entry's successor list.
+
+; CHECK: indbrtest2
+; CHECK: entry:
+; CHECK-NEXT: unreachable
+
+define void @indbrtest2(i8* %t) {
+entry:
+  indirectbr i8* %t, [label %BB0, label %BB0]
+BB0:
+  ret void
+}
+
+
+; Make sure the blocks in the next few tests aren't trivially removable as
+; successors by taking their addresses.
+
+@anchor = constant [13 x i8*] [
+  i8* blockaddress(@indbrtest3, %L1), i8* blockaddress(@indbrtest3, %L2), i8* blockaddress(@indbrtest3, %L3),
+  i8* blockaddress(@indbrtest4, %L1), i8* blockaddress(@indbrtest4, %L2), i8* blockaddress(@indbrtest4, %L3),
+  i8* blockaddress(@indbrtest5, %L1), i8* blockaddress(@indbrtest5, %L2), i8* blockaddress(@indbrtest5, %L3), i8* blockaddress(@indbrtest5, %L4),
+  i8* blockaddress(@indbrtest6, %L1), i8* blockaddress(@indbrtest6, %L2), i8* blockaddress(@indbrtest6, %L3)
+]
+
+; SimplifyCFG should turn the indirectbr into a conditional branch on the
+; condition of the select.
+
+; CHECK: @indbrtest3
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %cond, label %L1, label %L2
+; CHECK-NOT: indirectbr
+; CHECK-NOT: br
+; CHECK-NOT: L3:
+define void @indbrtest3(i1 %cond, i8* %address) nounwind {
+entry:
+  %indirect.goto.dest = select i1 %cond, i8* blockaddress(@indbrtest3, %L1), i8* blockaddress(@indbrtest3, %L2)
+  indirectbr i8* %indirect.goto.dest, [label %L1, label %L2, label %L3]
+
+L1:
+  call void @A()
+  ret void
+L2:
+  call void @C()
+  ret void
+L3:
+  call void @foo()
+  ret void
+}
+
+; SimplifyCFG should turn the indirectbr into an unconditional branch to the
+; only possible destination.
+; As in @indbrtest1, it should really remove the branch entirely, but it doesn't
+; because it's in the entry block.
+
+; CHECK: @indbrtest4
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %L1
+define void @indbrtest4(i1 %cond) nounwind {
+entry:
+  %indirect.goto.dest = select i1 %cond, i8* blockaddress(@indbrtest4, %L1), i8* blockaddress(@indbrtest4, %L1)
+  indirectbr i8* %indirect.goto.dest, [label %L1, label %L2, label %L3]
+
+L1:
+  call void @A()
+  ret void
+L2:
+  call void @C()
+  ret void
+L3:
+  call void @foo()
+  ret void
+}
+
+; SimplifyCFG should turn the indirectbr into an unreachable because neither
+; destination is listed as a successor.
+
+; CHECK: @indbrtest5
+; CHECK-NEXT: entry:
+; CHECK-NEXT: unreachable
+; CHECK-NEXT: }
+define void @indbrtest5(i1 %cond, i8* %anchor) nounwind {
+entry:
+  %indirect.goto.dest = select i1 %cond, i8* blockaddress(@indbrtest5, %L1), i8* blockaddress(@indbrtest5, %L2)
+; This needs to have more than one successor for this test, otherwise it gets
+; replaced with an unconditional branch to the single successor.
+  indirectbr i8* %indirect.goto.dest, [label %L3, label %L4]
+
+L1:
+  call void @A()
+  ret void
+L2:
+  call void @C()
+  ret void
+L3:
+  call void @foo()
+  ret void
+L4:
+  call void @foo()
+
+; This keeps blockaddresses not otherwise listed as successors from being zapped
+; before SimplifyCFG even looks at the indirectbr.
+  indirectbr i8* %anchor, [label %L1, label %L2]
+}
+
+; The same as above, except the selected addresses are equal.
+
+; CHECK: @indbrtest6
+; CHECK-NEXT: entry:
+; CHECK-NEXT: unreachable
+; CHECK-NEXT: }
+define void @indbrtest6(i1 %cond, i8* %anchor) nounwind {
+entry:
+  %indirect.goto.dest = select i1 %cond, i8* blockaddress(@indbrtest6, %L1), i8* blockaddress(@indbrtest6, %L1)
+; This needs to have more than one successor for this test, otherwise it gets
+; replaced with an unconditional branch to the single successor.
+  indirectbr i8* %indirect.goto.dest, [label %L2, label %L3]
+
+L1:
+  call void @A()
+  ret void
+L2:
+  call void @C()
+  ret void
+L3:
+  call void @foo()
+
+; This keeps blockaddresses not otherwise listed as successors from being zapped
+; before SimplifyCFG even looks at the indirectbr.
+  indirectbr i8* %anchor, [label %L1, label %L2]
+}
diff --git a/final/test/Transforms/SimplifyCFG/invoke_unwind.ll b/final/test/Transforms/SimplifyCFG/invoke_unwind.ll
new file mode 100644
index 00000000000..73faa952fba
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/invoke_unwind.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+declare void @bar()
+
+; This testcase checks to see if the simplifycfg pass is converting invoke
+; instructions to call instructions if the handler just rethrows the exception.
+define i32 @test1() {
+; CHECK: @test1
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: ret i32 0
+        invoke void @bar( )
+                        to label %1 unwind label %Rethrow
+        ret i32 0
+Rethrow:
+        unwind
+}
+
+
+; Verify that simplifycfg isn't duplicating 'unwind' instructions.  Doing this
+; is bad because it discourages commoning.
+define i32 @test2(i1 %c) {
+; CHECK: @test2
+; CHECK: T:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label %F
+  br i1 %c, label %T, label %F
+T:
+  call void @bar()
+  br label %F
+F:
+  unwind
+}
diff --git a/final/test/Transforms/SimplifyCFG/iterative-simplify.ll b/final/test/Transforms/SimplifyCFG/iterative-simplify.ll
new file mode 100644
index 00000000000..a3974110576
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/iterative-simplify.ll
@@ -0,0 +1,100 @@
+; RUN: opt < %s -simplifycfg -S | not grep bb17
+; PR1786
+
+define i32 @main() {
+entry:
+	%retval = alloca i32, align 4		; <i32*> [#uses=1]
+	%i = alloca i32, align 4		; <i32*> [#uses=7]
+	%z = alloca i32, align 4		; <i32*> [#uses=4]
+	%z16 = alloca i32, align 4		; <i32*> [#uses=4]
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	store i32 0, i32* %i
+	%toBool = icmp ne i8 1, 0		; <i1> [#uses=1]
+	br i1 %toBool, label %cond_true, label %cond_false
+
+cond_true:		; preds = %entry
+	store i32 0, i32* %z
+	br label %bb
+
+bb:		; preds = %cond_next, %cond_true
+	%tmp = load i32* %z		; <i32> [#uses=1]
+	%tmp1 = sub i32 %tmp, 16384		; <i32> [#uses=1]
+	store i32 %tmp1, i32* %z
+	%tmp2 = load i32* %i		; <i32> [#uses=1]
+	%tmp3 = add i32 %tmp2, 1		; <i32> [#uses=1]
+	store i32 %tmp3, i32* %i
+	%tmp4 = load i32* %i		; <i32> [#uses=1]
+	%tmp5 = icmp sgt i32 %tmp4, 262144		; <i1> [#uses=1]
+	%tmp56 = zext i1 %tmp5 to i8		; <i8> [#uses=1]
+	%toBool7 = icmp ne i8 %tmp56, 0		; <i1> [#uses=1]
+	br i1 %toBool7, label %cond_true8, label %cond_next
+
+cond_true8:		; preds = %bb
+	call void @abort( )
+	unreachable
+
+cond_next:		; preds = %bb
+	%tmp9 = load i32* %z		; <i32> [#uses=1]
+	%tmp10 = icmp ne i32 %tmp9, 0		; <i1> [#uses=1]
+	%tmp1011 = zext i1 %tmp10 to i8		; <i8> [#uses=1]
+	%toBool12 = icmp ne i8 %tmp1011, 0		; <i1> [#uses=1]
+	br i1 %toBool12, label %bb, label %bb13
+
+bb13:		; preds = %cond_next
+	call void @exit( i32 0 )
+	unreachable
+
+cond_false:		; preds = %entry
+	%toBool14 = icmp ne i8 1, 0		; <i1> [#uses=1]
+	br i1 %toBool14, label %cond_true15, label %cond_false33
+
+cond_true15:		; preds = %cond_false
+	store i32 0, i32* %z16
+	br label %bb17
+
+bb17:		; preds = %cond_next27, %cond_true15
+	%tmp18 = load i32* %z16		; <i32> [#uses=1]
+	%tmp19 = sub i32 %tmp18, 16384		; <i32> [#uses=1]
+	store i32 %tmp19, i32* %z16
+	%tmp20 = load i32* %i		; <i32> [#uses=1]
+	%tmp21 = add i32 %tmp20, 1		; <i32> [#uses=1]
+	store i32 %tmp21, i32* %i
+	%tmp22 = load i32* %i		; <i32> [#uses=1]
+	%tmp23 = icmp sgt i32 %tmp22, 262144		; <i1> [#uses=1]
+	%tmp2324 = zext i1 %tmp23 to i8		; <i8> [#uses=1]
+	%toBool25 = icmp ne i8 %tmp2324, 0		; <i1> [#uses=1]
+	br i1 %toBool25, label %cond_true26, label %cond_next27
+
+cond_true26:		; preds = %bb17
+	call void @abort( )
+	unreachable
+
+cond_next27:		; preds = %bb17
+	%tmp28 = load i32* %z16		; <i32> [#uses=1]
+	%tmp29 = icmp ne i32 %tmp28, 0		; <i1> [#uses=1]
+	%tmp2930 = zext i1 %tmp29 to i8		; <i8> [#uses=1]
+	%toBool31 = icmp ne i8 %tmp2930, 0		; <i1> [#uses=1]
+	br i1 %toBool31, label %bb17, label %bb32
+
+bb32:		; preds = %cond_next27
+	call void @exit( i32 0 )
+	unreachable
+
+cond_false33:		; preds = %cond_false
+	call void @exit( i32 0 )
+	unreachable
+
+cond_next34:		; No predecessors!
+	br label %cond_next35
+
+cond_next35:		; preds = %cond_next34
+	br label %return
+
+return:		; preds = %cond_next35
+	%retval36 = load i32* %retval		; <i32> [#uses=1]
+	ret i32 %retval36
+}
+
+declare void @abort()
+
+declare void @exit(i32)
diff --git a/final/test/Transforms/SimplifyCFG/noreturn-call.ll b/final/test/Transforms/SimplifyCFG/noreturn-call.ll
new file mode 100644
index 00000000000..b4547782837
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/noreturn-call.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -simplifycfg -S | grep unreachable
+; PR1796
+
+declare void @Finisher(i32) noreturn
+
+define void @YYY(i32) {
+  tail call void @Finisher(i32 %0) noreturn
+  tail call void @Finisher(i32 %0) noreturn
+  ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/return-merge.ll b/final/test/Transforms/SimplifyCFG/return-merge.ll
new file mode 100644
index 00000000000..977b6dff87c
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/return-merge.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -simplifycfg -S | not grep br
+
+define i32 @test1(i1 %C) {
+entry:
+        br i1 %C, label %T, label %F
+T:              ; preds = %entry
+        ret i32 1
+F:              ; preds = %entry
+        ret i32 0
+}
+
+define void @test2(i1 %C) {
+        br i1 %C, label %T, label %F
+T:              ; preds = %0
+        ret void
+F:              ; preds = %0
+        ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/select-gep.ll b/final/test/Transforms/SimplifyCFG/select-gep.ll
new file mode 100644
index 00000000000..009f05e5574
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/select-gep.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -simplifycfg %s | FileCheck %s
+
+define i8* @test1(i8* %x, i64 %y) nounwind {
+entry:
+  %tmp1 = load i8* %x, align 1
+  %cmp = icmp eq i8 %tmp1, 47
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  %incdec.ptr = getelementptr inbounds i8* %x, i64 %y
+  br label %if.end
+
+if.end:
+  %x.addr = phi i8* [ %incdec.ptr, %if.then ], [ %x, %entry ]
+  ret i8* %x.addr
+
+; CHECK: @test1
+; CHECK-NOT: select
+; CHECK: ret i8* %x.addr
+}
+
+%ST = type { i8, i8 }
+
+define i8* @test2(%ST* %x, i8* %y) nounwind {
+entry:
+  %cmp = icmp eq %ST* %x, null
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  %incdec.ptr = getelementptr %ST* %x, i32 0, i32 1
+  br label %if.end
+
+if.end:
+  %x.addr = phi i8* [ %incdec.ptr, %if.then ], [ %y, %entry ]
+  ret i8* %x.addr
+
+; CHECK: @test2
+; CHECK: %x.addr = select i1 %cmp, i8* %incdec.ptr, i8* %y
+; CHECK: ret i8* %x.addr
+}
diff --git a/final/test/Transforms/SimplifyCFG/speculate-with-offset.ll b/final/test/Transforms/SimplifyCFG/speculate-with-offset.ll
new file mode 100644
index 00000000000..a737d5602e8
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/speculate-with-offset.ll
@@ -0,0 +1,94 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+
+; This load is safe to speculate, as it's from a safe offset
+; within an alloca.
+
+; CHECK: @yes
+; CHECK-NOT: br
+
+define void @yes(i1 %c) nounwind {
+entry:
+  %a = alloca [4 x i64*], align 8
+  %__a.addr = getelementptr [4 x i64*]* %a, i64 0, i64 3
+  call void @frob(i64** %__a.addr)
+  br i1 %c, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %tmp5 = load i64** %__a.addr, align 8
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %storemerge = phi i64* [ undef, %if.then ], [ %tmp5, %if.end ]
+  ret void
+}
+
+; CHECK: @no0
+; CHECK: br i1 %c
+
+define void @no0(i1 %c) nounwind {
+entry:
+  %a = alloca [4 x i64*], align 8
+  %__a.addr = getelementptr [4 x i64*]* %a, i64 0, i64 4
+  call void @frob(i64** %__a.addr)
+  br i1 %c, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %tmp5 = load i64** %__a.addr, align 8
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %storemerge = phi i64* [ undef, %if.then ], [ %tmp5, %if.end ]
+  ret void
+}
+
+; CHECK: @no1
+; CHECK: br i1 %c
+
+define void @no1(i1 %c, i64 %n) nounwind {
+entry:
+  %a = alloca [4 x i64*], align 8
+  %__a.addr = getelementptr [4 x i64*]* %a, i64 0, i64 %n
+  call void @frob(i64** %__a.addr)
+  br i1 %c, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %tmp5 = load i64** %__a.addr, align 8
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %storemerge = phi i64* [ undef, %if.then ], [ %tmp5, %if.end ]
+  ret void
+}
+
+; CHECK: @no2
+; CHECK: br i1 %c
+
+define void @no2(i1 %c, i64 %n) nounwind {
+entry:
+  %a = alloca [4 x i64*], align 8
+  %__a.addr = getelementptr [4 x i64*]* %a, i64 1, i64 0
+  call void @frob(i64** %__a.addr)
+  br i1 %c, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %tmp5 = load i64** %__a.addr, align 8
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %storemerge = phi i64* [ undef, %if.then ], [ %tmp5, %if.end ]
+  ret void
+}
+
+declare void @frob(i64** nocapture %p)
diff --git a/final/test/Transforms/SimplifyCFG/switch-on-const-select.ll b/final/test/Transforms/SimplifyCFG/switch-on-const-select.ll
new file mode 100644
index 00000000000..5494a651d47
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/switch-on-const-select.ll
@@ -0,0 +1,138 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; Test basic folding to a conditional branch.
+define i32 @foo(i64 %x, i64 %y) nounwind {
+; CHECK: @foo
+entry:
+    %eq = icmp eq i64 %x, %y
+    br i1 %eq, label %b, label %switch
+switch:
+    %lt = icmp slt i64 %x, %y
+; CHECK: br i1 %lt, label %a, label %b
+    %qux = select i1 %lt, i32 0, i32 2
+    switch i32 %qux, label %bees [
+        i32 0, label %a
+        i32 1, label %b
+        i32 2, label %b
+    ]
+a:
+    tail call void @bees.a() nounwind
+    ret i32 1
+; CHECK: b:
+; CHECK-NEXT: %retval = phi i32 [ 0, %switch ], [ 2, %entry ]
+b:
+    %retval = phi i32 [0, %switch], [0, %switch], [2, %entry]
+    tail call void @bees.b() nounwind
+    ret i32 %retval
+; CHECK-NOT: bees:
+bees:
+    tail call void @llvm.trap() nounwind
+    unreachable
+}
+
+; Test basic folding to an unconditional branch.
+define i32 @bar(i64 %x, i64 %y) nounwind {
+; CHECK: @bar
+entry:
+; CHECK-NEXT: entry:
+; CHECK-NEXT: tail call void @bees.a() nounwind
+; CHECK-NEXT: ret i32 0
+    %lt = icmp slt i64 %x, %y
+    %qux = select i1 %lt, i32 0, i32 2
+    switch i32 %qux, label %bees [
+        i32 0, label %a
+        i32 1, label %b
+        i32 2, label %a
+    ]
+a:
+    %retval = phi i32 [0, %entry], [0, %entry], [1, %b]
+    tail call void @bees.a() nounwind
+    ret i32 0
+b:
+    tail call void @bees.b() nounwind
+    br label %a
+bees:
+    tail call void @llvm.trap() nounwind
+    unreachable
+}
+
+; Test the edge case where both values from the select are the default case.
+define void @bazz(i64 %x, i64 %y) nounwind {
+; CHECK: @bazz
+entry:
+; CHECK-NEXT: entry:
+; CHECK-NEXT: tail call void @bees.b() nounwind
+; CHECK-NEXT: ret void
+    %lt = icmp slt i64 %x, %y
+    %qux = select i1 %lt, i32 10, i32 12
+    switch i32 %qux, label %b [
+        i32 0, label %a
+        i32 1, label %bees
+        i32 2, label %bees
+    ]
+a:
+    tail call void @bees.a() nounwind
+    ret void
+b:
+    tail call void @bees.b() nounwind
+    ret void
+bees:
+    tail call void @llvm.trap()
+    unreachable
+}
+
+; Test the edge case where both values from the select are equal.
+define void @quux(i64 %x, i64 %y) nounwind {
+; CHECK: @quux
+entry:
+; CHECK-NEXT: entry:
+; CHECK-NEXT: tail call void @bees.a() nounwind
+; CHECK-NEXT: ret void
+    %lt = icmp slt i64 %x, %y
+    %qux = select i1 %lt, i32 0, i32 0
+    switch i32 %qux, label %b [
+        i32 0, label %a
+        i32 1, label %bees
+        i32 2, label %bees
+    ]
+a:
+    tail call void @bees.a() nounwind
+    ret void
+b:
+    tail call void @bees.b() nounwind
+    ret void
+bees:
+    tail call void @llvm.trap()
+    unreachable
+}
+
+; A final test, for phi node munging.
+define i32 @xyzzy(i64 %x, i64 %y) {
+; CHECK: @xyzzy
+entry:
+    %eq = icmp eq i64 %x, %y
+    br i1 %eq, label %r, label %cont
+cont:
+; CHECK: %lt = icmp slt i64 %x, %y
+    %lt = icmp slt i64 %x, %y
+; CHECK-NEXT: br i1 %lt, label %a, label %r
+    %qux = select i1 %lt, i32 0, i32 2
+    switch i32 %qux, label %bees [
+        i32 0, label %a
+        i32 1, label %r
+        i32 2, label %r
+    ]
+r:
+    %val = phi i32 [0, %entry], [1, %cont], [1, %cont]
+    ret i32 %val
+a:
+    ret i32 -1
+; CHECK-NOT: bees:
+bees:
+    tail call void @llvm.trap()
+    unreachable
+}
+
+declare void @llvm.trap() nounwind noreturn
+declare void @bees.a() nounwind
+declare void @bees.b() nounwind
diff --git a/final/test/Transforms/SimplifyCFG/switch-simplify-crash.ll b/final/test/Transforms/SimplifyCFG/switch-simplify-crash.ll
new file mode 100644
index 00000000000..bbc0bd78da7
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/switch-simplify-crash.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s -simplifycfg -disable-output
+
+define void @NewExtractNames() {
+entry:
+	br i1 false, label %endif.0, label %then.0
+then.0:		; preds = %entry
+	br i1 false, label %shortcirc_next.i, label %shortcirc_done.i
+shortcirc_next.i:		; preds = %then.0
+	br label %shortcirc_done.i
+shortcirc_done.i:		; preds = %shortcirc_next.i, %then.0
+	br i1 false, label %then.0.i, label %else.0.i
+then.0.i:		; preds = %shortcirc_done.i
+	br label %NewBase.exit
+else.0.i:		; preds = %shortcirc_done.i
+	br i1 false, label %endif.0.i, label %else.1.i
+else.1.i:		; preds = %else.0.i
+	br i1 false, label %endif.0.i, label %else.2.i
+else.2.i:		; preds = %else.1.i
+	br label %NewBase.exit
+endif.0.i:		; preds = %else.1.i, %else.0.i
+	br label %NewBase.exit
+NewBase.exit:		; preds = %endif.0.i, %else.2.i, %then.0.i
+	br label %endif.0
+endif.0:		; preds = %NewBase.exit, %entry
+	%tmp.32.mask = and i32 0, 31		; <i32> [#uses=1]
+	switch i32 %tmp.32.mask, label %label.9 [
+		 i32 16, label %loopentry.2
+		 i32 15, label %loopentry.2
+		 i32 14, label %loopentry.2
+		 i32 13, label %loopentry.2
+		 i32 10, label %loopentry.2
+		 i32 20, label %loopentry.1
+		 i32 19, label %loopentry.1
+		 i32 2, label %loopentry.0
+		 i32 0, label %switchexit
+	]
+loopentry.0:		; preds = %endif.1, %endif.0
+	br i1 false, label %no_exit.0, label %switchexit
+no_exit.0:		; preds = %loopentry.0
+	br i1 false, label %then.1, label %else.1
+then.1:		; preds = %no_exit.0
+	br label %endif.1
+else.1:		; preds = %no_exit.0
+	br i1 false, label %shortcirc_next.0, label %shortcirc_done.0
+shortcirc_next.0:		; preds = %else.1
+	br label %shortcirc_done.0
+shortcirc_done.0:		; preds = %shortcirc_next.0, %else.1
+	br i1 false, label %then.2, label %endif.2
+then.2:		; preds = %shortcirc_done.0
+	br label %endif.2
+endif.2:		; preds = %then.2, %shortcirc_done.0
+	br label %endif.1
+endif.1:		; preds = %endif.2, %then.1
+	br label %loopentry.0
+loopentry.1:		; preds = %endif.3, %endif.0, %endif.0
+	br i1 false, label %no_exit.1, label %switchexit
+no_exit.1:		; preds = %loopentry.1
+	br i1 false, label %then.3, label %else.2
+then.3:		; preds = %no_exit.1
+	br label %endif.3
+else.2:		; preds = %no_exit.1
+	br i1 false, label %shortcirc_next.1, label %shortcirc_done.1
+shortcirc_next.1:		; preds = %else.2
+	br label %shortcirc_done.1
+shortcirc_done.1:		; preds = %shortcirc_next.1, %else.2
+	br i1 false, label %then.4, label %endif.4
+then.4:		; preds = %shortcirc_done.1
+	br label %endif.4
+endif.4:		; preds = %then.4, %shortcirc_done.1
+	br label %endif.3
+endif.3:		; preds = %endif.4, %then.3
+	br label %loopentry.1
+loopentry.2:		; preds = %endif.5, %endif.0, %endif.0, %endif.0, %endif.0, %endif.0
+	%i.3 = phi i32 [ 0, %endif.5 ], [ 0, %endif.0 ], [ 0, %endif.0 ], [ 0, %endif.0 ], [ 0, %endif.0 ], [ 0, %endif.0 ]		; <i32> [#uses=1]
+	%tmp.158 = icmp slt i32 %i.3, 0		; <i1> [#uses=1]
+	br i1 %tmp.158, label %no_exit.2, label %switchexit
+no_exit.2:		; preds = %loopentry.2
+	br i1 false, label %shortcirc_next.2, label %shortcirc_done.2
+shortcirc_next.2:		; preds = %no_exit.2
+	br label %shortcirc_done.2
+shortcirc_done.2:		; preds = %shortcirc_next.2, %no_exit.2
+	br i1 false, label %then.5, label %endif.5
+then.5:		; preds = %shortcirc_done.2
+	br label %endif.5
+endif.5:		; preds = %then.5, %shortcirc_done.2
+	br label %loopentry.2
+label.9:		; preds = %endif.0
+	br i1 false, label %then.6, label %endif.6
+then.6:		; preds = %label.9
+	br label %endif.6
+endif.6:		; preds = %then.6, %label.9
+	store i32 0, i32* null
+	br label %switchexit
+switchexit:		; preds = %endif.6, %loopentry.2, %loopentry.1, %loopentry.0, %endif.0
+	br i1 false, label %endif.7, label %then.7
+then.7:		; preds = %switchexit
+	br i1 false, label %shortcirc_next.3, label %shortcirc_done.3
+shortcirc_next.3:		; preds = %then.7
+	br label %shortcirc_done.3
+shortcirc_done.3:		; preds = %shortcirc_next.3, %then.7
+	br i1 false, label %then.8, label %endif.8
+then.8:		; preds = %shortcirc_done.3
+	br label %endif.8
+endif.8:		; preds = %then.8, %shortcirc_done.3
+	br label %endif.7
+endif.7:		; preds = %endif.8, %switchexit
+	ret void
+}
diff --git a/final/test/Transforms/SimplifyCFG/switch-to-icmp.ll b/final/test/Transforms/SimplifyCFG/switch-to-icmp.ll
new file mode 100644
index 00000000000..414f8475bc2
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/switch-to-icmp.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+
+define zeroext i1 @test1(i32 %x) nounwind readnone ssp noredzone {
+entry:
+ switch i32 %x, label %lor.rhs [
+   i32 2, label %lor.end
+   i32 1, label %lor.end
+   i32 3, label %lor.end
+ ]
+
+lor.rhs:
+ br label %lor.end
+
+lor.end:
+ %0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ], [ true, %entry ]
+ ret i1 %0
+
+; CHECK: @test1
+; CHECK: %x.off = add i32 %x, -1
+; CHECK: %switch = icmp ult i32 %x.off, 3
+}
+
+define zeroext i1 @test2(i32 %x) nounwind readnone ssp noredzone {
+entry:
+ switch i32 %x, label %lor.rhs [
+   i32 0, label %lor.end
+   i32 1, label %lor.end
+ ]
+
+lor.rhs:
+ br label %lor.end
+
+lor.end:
+ %0 = phi i1 [ true, %entry ], [ false, %lor.rhs ], [ true, %entry ]
+ ret i1 %0
+
+; CHECK: @test2
+; CHECK: %switch = icmp ult i32 %x, 2
+}
diff --git a/final/test/Transforms/SimplifyCFG/switch_create.ll b/final/test/Transforms/SimplifyCFG/switch_create.ll
new file mode 100644
index 00000000000..546cc75f297
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/switch_create.ll
@@ -0,0 +1,481 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+declare void @foo1()
+
+declare void @foo2()
+
+define void @test1(i32 %V) {
+        %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
+        %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
+        %CN = or i1 %C1, %C2            ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
+T:              ; preds = %0
+        call void @foo1( )
+        ret void
+F:              ; preds = %0
+        call void @foo2( )
+        ret void
+; CHECK: @test1
+; CHECK:  switch i32 %V, label %F [
+; CHECK:    i32 17, label %T
+; CHECK:    i32 4, label %T
+; CHECK:  ]
+}
+
+define void @test2(i32 %V) {
+        %C1 = icmp ne i32 %V, 4         ; <i1> [#uses=1]
+        %C2 = icmp ne i32 %V, 17                ; <i1> [#uses=1]
+        %CN = and i1 %C1, %C2           ; <i1> [#uses=1]
+        br i1 %CN, label %T, label %F
+T:              ; preds = %0
+        call void @foo1( )
+        ret void
+F:              ; preds = %0
+        call void @foo2( )
+        ret void
+; CHECK: @test2
+; CHECK:  switch i32 %V, label %T [
+; CHECK:    i32 17, label %F
+; CHECK:    i32 4, label %F
+; CHECK:  ]
+}
+
+define void @test3(i32 %V) {
+        %C1 = icmp eq i32 %V, 4         ; <i1> [#uses=1]
+        br i1 %C1, label %T, label %N
+N:              ; preds = %0
+        %C2 = icmp eq i32 %V, 17                ; <i1> [#uses=1]
+        br i1 %C2, label %T, label %F
+T:              ; preds = %N, %0
+        call void @foo1( )
+        ret void
+F:              ; preds = %N
+        call void @foo2( )
+        ret void
+
+; CHECK: @test3
+; CHECK: switch i32 %V, label %F [
+; CHECK:     i32 4, label %T
+; CHECK:     i32 17, label %T
+; CHECK:   ]
+}
+
+
+
+define i32 @test4(i8 zeroext %c) nounwind ssp noredzone {
+entry:
+  %cmp = icmp eq i8 %c, 62
+  br i1 %cmp, label %lor.end, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp4 = icmp eq i8 %c, 34
+  br i1 %cmp4, label %lor.end, label %lor.rhs
+
+lor.rhs:                                          ; preds = %lor.lhs.false
+  %cmp8 = icmp eq i8 %c, 92
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %lor.lhs.false, %entry
+  %0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp8, %lor.rhs ]
+  %lor.ext = zext i1 %0 to i32
+  ret i32 %lor.ext
+  
+; CHECK: @test4
+; CHECK:  switch i8 %c, label %lor.rhs [
+; CHECK:    i8 62, label %lor.end
+; CHECK:    i8 34, label %lor.end
+; CHECK:    i8 92, label %lor.end
+; CHECK:  ]
+}
+
+define i32 @test5(i8 zeroext %c) nounwind ssp noredzone {
+entry:
+  switch i8 %c, label %lor.rhs [
+    i8 62, label %lor.end
+    i8 34, label %lor.end
+    i8 92, label %lor.end
+  ]
+
+lor.rhs:                                          ; preds = %entry
+  %V = icmp eq i8 %c, 92
+  br label %lor.end
+
+lor.end:                                          ; preds = %entry, %entry, %entry, %lor.rhs
+  %0 = phi i1 [ true, %entry ], [ %V, %lor.rhs ], [ true, %entry ], [ true, %entry ]
+  %lor.ext = zext i1 %0 to i32
+  ret i32 %lor.ext
+; CHECK: @test5
+; CHECK:  switch i8 %c, label %lor.rhs [
+; CHECK:    i8 62, label %lor.end
+; CHECK:    i8 34, label %lor.end
+; CHECK:    i8 92, label %lor.end
+; CHECK:  ]
+}
+
+
+define i1 @test6({ i32, i32 }* %I) {
+entry:
+        %tmp.1.i = getelementptr { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
+        %tmp.2.i = load i32* %tmp.1.i           ; <i32> [#uses=6]
+        %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
+        br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
+shortcirc_next.0:               ; preds = %entry
+        %tmp.6 = icmp eq i32 %tmp.2.i, 15               ; <i1> [#uses=1]
+        br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1
+shortcirc_next.1:               ; preds = %shortcirc_next.0
+        %tmp.11 = icmp eq i32 %tmp.2.i, 16              ; <i1> [#uses=1]
+        br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2
+shortcirc_next.2:               ; preds = %shortcirc_next.1
+        %tmp.16 = icmp eq i32 %tmp.2.i, 17              ; <i1> [#uses=1]
+        br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3
+shortcirc_next.3:               ; preds = %shortcirc_next.2
+        %tmp.21 = icmp eq i32 %tmp.2.i, 18              ; <i1> [#uses=1]
+        br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4
+shortcirc_next.4:               ; preds = %shortcirc_next.3
+        %tmp.26 = icmp eq i32 %tmp.2.i, 19              ; <i1> [#uses=1]
+        br label %UnifiedReturnBlock
+shortcirc_done.4:               ; preds = %shortcirc_next.3, %shortcirc_next.2, %shortcirc_next.1, %shortcirc_next.0, %entry
+        br label %UnifiedReturnBlock
+UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
+        %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
+        ret i1 %UnifiedRetVal
+        
+; CHECK: @test6
+; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
+; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
+}
+
+define void @test7(i8 zeroext %c, i32 %x) nounwind ssp noredzone {
+entry:
+  %cmp = icmp ult i32 %x, 32
+  %cmp4 = icmp eq i8 %c, 97
+  %or.cond = or i1 %cmp, %cmp4
+  %cmp9 = icmp eq i8 %c, 99
+  %or.cond11 = or i1 %or.cond, %cmp9
+  br i1 %or.cond11, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  tail call void @foo1() nounwind noredzone
+  ret void
+
+if.end:                                           ; preds = %entry
+  ret void
+  
+; CHECK: @test7
+; CHECK:   %cmp = icmp ult i32 %x, 32
+; CHECK:   br i1 %cmp, label %if.then, label %switch.early.test
+; CHECK: switch.early.test:
+; CHECK:   switch i8 %c, label %if.end [
+; CHECK:     i8 99, label %if.then
+; CHECK:     i8 97, label %if.then
+; CHECK:   ]
+}
+
+define i32 @test8(i8 zeroext %c, i32 %x, i1 %C) nounwind ssp noredzone {
+entry:
+  br i1 %C, label %N, label %if.then
+N:
+  %cmp = icmp ult i32 %x, 32
+  %cmp4 = icmp eq i8 %c, 97
+  %or.cond = or i1 %cmp, %cmp4
+  %cmp9 = icmp eq i8 %c, 99
+  %or.cond11 = or i1 %or.cond, %cmp9
+  br i1 %or.cond11, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %A = phi i32 [0, %entry], [42, %N]
+  tail call void @foo1() nounwind noredzone
+  ret i32 %A
+
+if.end:                                           ; preds = %entry
+  ret i32 0
+  
+; CHECK: @test8
+; CHECK: switch.early.test:
+; CHECK:   switch i8 %c, label %if.end [
+; CHECK:     i8 99, label %if.then
+; CHECK:     i8 97, label %if.then
+; CHECK:   ]
+; CHECK:   %A = phi i32 [ 0, %entry ], [ 42, %switch.early.test ], [ 42, %N ], [ 42, %switch.early.test ]
+}
+
+;; This is "Example 7" from http://blog.regehr.org/archives/320
+define i32 @test9(i8 zeroext %c) nounwind ssp noredzone {
+entry:
+  %cmp = icmp ult i8 %c, 33
+  br i1 %cmp, label %lor.end, label %lor.lhs.false
+
+lor.lhs.false:                                    ; preds = %entry
+  %cmp4 = icmp eq i8 %c, 46
+  br i1 %cmp4, label %lor.end, label %lor.lhs.false6
+
+lor.lhs.false6:                                   ; preds = %lor.lhs.false
+  %cmp9 = icmp eq i8 %c, 44
+  br i1 %cmp9, label %lor.end, label %lor.lhs.false11
+
+lor.lhs.false11:                                  ; preds = %lor.lhs.false6
+  %cmp14 = icmp eq i8 %c, 58
+  br i1 %cmp14, label %lor.end, label %lor.lhs.false16
+
+lor.lhs.false16:                                  ; preds = %lor.lhs.false11
+  %cmp19 = icmp eq i8 %c, 59
+  br i1 %cmp19, label %lor.end, label %lor.lhs.false21
+
+lor.lhs.false21:                                  ; preds = %lor.lhs.false16
+  %cmp24 = icmp eq i8 %c, 60
+  br i1 %cmp24, label %lor.end, label %lor.lhs.false26
+
+lor.lhs.false26:                                  ; preds = %lor.lhs.false21
+  %cmp29 = icmp eq i8 %c, 62
+  br i1 %cmp29, label %lor.end, label %lor.lhs.false31
+
+lor.lhs.false31:                                  ; preds = %lor.lhs.false26
+  %cmp34 = icmp eq i8 %c, 34
+  br i1 %cmp34, label %lor.end, label %lor.lhs.false36
+
+lor.lhs.false36:                                  ; preds = %lor.lhs.false31
+  %cmp39 = icmp eq i8 %c, 92
+  br i1 %cmp39, label %lor.end, label %lor.rhs
+
+lor.rhs:                                          ; preds = %lor.lhs.false36
+  %cmp43 = icmp eq i8 %c, 39
+  br label %lor.end
+
+lor.end:                                          ; preds = %lor.rhs, %lor.lhs.false36, %lor.lhs.false31, %lor.lhs.false26, %lor.lhs.false21, %lor.lhs.false16, %lor.lhs.false11, %lor.lhs.false6, %lor.lhs.false, %entry
+  %0 = phi i1 [ true, %lor.lhs.false36 ], [ true, %lor.lhs.false31 ], [ true, %lor.lhs.false26 ], [ true, %lor.lhs.false21 ], [ true, %lor.lhs.false16 ], [ true, %lor.lhs.false11 ], [ true, %lor.lhs.false6 ], [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp43, %lor.rhs ]
+  %conv46 = zext i1 %0 to i32
+  ret i32 %conv46
+  
+; CHECK: @test9
+; CHECK:   %cmp = icmp ult i8 %c, 33
+; CHECK:   br i1 %cmp, label %lor.end, label %switch.early.test
+
+; CHECK: switch.early.test:
+; CHECK:   switch i8 %c, label %lor.rhs [
+; CHECK:     i8 92, label %lor.end
+; CHECK:     i8 62, label %lor.end
+; CHECK:     i8 60, label %lor.end
+; CHECK:     i8 59, label %lor.end
+; CHECK:     i8 58, label %lor.end
+; CHECK:     i8 46, label %lor.end
+; CHECK:     i8 44, label %lor.end
+; CHECK:     i8 34, label %lor.end
+; CHECK:     i8 39, label %lor.end
+; CHECK:   ]
+}
+
+define i32 @test10(i32 %mode, i1 %Cond) {
+  %A = icmp ne i32 %mode, 0
+  %B = icmp ne i32 %mode, 51
+  %C = and i1 %A, %B
+  %D = and i1 %C, %Cond
+  br i1 %D, label %T, label %F
+T:
+  ret i32 123
+F:
+  ret i32 324
+
+; CHECK: @test10
+; CHECK:  br i1 %Cond, label %switch.early.test, label %F
+; CHECK:switch.early.test:
+; CHECK:  switch i32 %mode, label %T [
+; CHECK:    i32 51, label %F
+; CHECK:    i32 0, label %F
+; CHECK:  ]
+}
+
+; PR8780
+define i32 @test11(i32 %bar) nounwind {
+entry:
+  %cmp = icmp eq i32 %bar, 4
+  %cmp2 = icmp eq i32 %bar, 35
+  %or.cond = or i1 %cmp, %cmp2
+  %cmp5 = icmp eq i32 %bar, 53
+  %or.cond1 = or i1 %or.cond, %cmp5
+  %cmp8 = icmp eq i32 %bar, 24
+  %or.cond2 = or i1 %or.cond1, %cmp8
+  %cmp11 = icmp eq i32 %bar, 23
+  %or.cond3 = or i1 %or.cond2, %cmp11
+  %cmp14 = icmp eq i32 %bar, 55
+  %or.cond4 = or i1 %or.cond3, %cmp14
+  %cmp17 = icmp eq i32 %bar, 12
+  %or.cond5 = or i1 %or.cond4, %cmp17
+  %cmp20 = icmp eq i32 %bar, 35
+  %or.cond6 = or i1 %or.cond5, %cmp20
+  br i1 %or.cond6, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ 0, %if.end ]
+  ret i32 %retval.0
+
+; CHECK: @test11
+; CHECK: switch i32 %bar, label %if.end [
+; CHECK:   i32 55, label %return
+; CHECK:   i32 53, label %return
+; CHECK:   i32 35, label %return
+; CHECK:   i32 24, label %return
+; CHECK:   i32 23, label %return
+; CHECK:   i32 12, label %return
+; CHECK:   i32 4, label %return
+; CHECK: ]
+}
+
+define void @test12() nounwind {
+entry:
+  br label %bb49.us.us
+
+bb49.us.us:
+  %A = icmp eq i32 undef, undef
+  br i1 %A, label %bb55.us.us, label %malformed
+
+bb48.us.us:
+  %B = icmp ugt i32 undef, undef
+  br i1 %B, label %bb55.us.us, label %bb49.us.us
+
+bb55.us.us:
+  br label %bb48.us.us
+
+malformed:
+  ret void
+; CHECK: @test12
+
+}
+
+; test13 - handle switch formation with ult.
+define void @test13(i32 %x) nounwind ssp noredzone {
+entry:
+  %cmp = icmp ult i32 %x, 2
+  br i1 %cmp, label %if.then, label %lor.lhs.false3
+
+lor.lhs.false3:                                   ; preds = %lor.lhs.false
+  %cmp5 = icmp eq i32 %x, 3
+  br i1 %cmp5, label %if.then, label %lor.lhs.false6
+
+lor.lhs.false6:                                   ; preds = %lor.lhs.false3
+  %cmp8 = icmp eq i32 %x, 4
+  br i1 %cmp8, label %if.then, label %lor.lhs.false9
+
+lor.lhs.false9:                                   ; preds = %lor.lhs.false6
+  %cmp11 = icmp eq i32 %x, 6
+  br i1 %cmp11, label %if.then, label %if.end
+
+if.then:                                          ; preds = %lor.lhs.false9, %lor.lhs.false6, %lor.lhs.false3, %lor.lhs.false, %entry
+  call void @foo1() noredzone
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %lor.lhs.false9
+  ret void
+; CHECK: @test13
+; CHECK:  switch i32 %x, label %if.end [
+; CHECK:     i32 6, label %if.then
+; CHECK:     i32 4, label %if.then
+; CHECK:     i32 3, label %if.then
+; CHECK:     i32 1, label %if.then
+; CHECK:     i32 0, label %if.then
+; CHECK:   ]
+}
+
+; test14 - handle switch formation with ult.
+define void @test14(i32 %x) nounwind ssp noredzone {
+entry:
+  %cmp = icmp ugt i32 %x, 2
+  br i1 %cmp, label %lor.lhs.false3, label %if.then
+
+lor.lhs.false3:                                   ; preds = %lor.lhs.false
+  %cmp5 = icmp ne i32 %x, 3
+  br i1 %cmp5, label %lor.lhs.false6, label %if.then
+
+lor.lhs.false6:                                   ; preds = %lor.lhs.false3
+  %cmp8 = icmp ne i32 %x, 4
+  br i1 %cmp8, label %lor.lhs.false9, label %if.then
+
+lor.lhs.false9:                                   ; preds = %lor.lhs.false6
+  %cmp11 = icmp ne i32 %x, 6
+  br i1 %cmp11, label %if.end, label %if.then
+
+if.then:                                          ; preds = %lor.lhs.false9, %lor.lhs.false6, %lor.lhs.false3, %lor.lhs.false, %entry
+  call void @foo1() noredzone
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %lor.lhs.false9
+  ret void
+; CHECK: @test14
+; CHECK:  switch i32 %x, label %if.end [
+; CHECK:     i32 6, label %if.then
+; CHECK:     i32 4, label %if.then
+; CHECK:     i32 3, label %if.then
+; CHECK:     i32 1, label %if.then
+; CHECK:     i32 0, label %if.then
+; CHECK:   ]
+}
+
+; Don't crash on ginormous ranges.
+define void @test15(i128 %x) nounwind {
+  %cmp = icmp ugt i128 %x, 2
+  br i1 %cmp, label %if.end, label %lor.false
+
+lor.false:
+  %cmp2 = icmp ne i128 %x, 100000000000000000000
+  br i1 %cmp2, label %if.end, label %if.then
+
+if.then:
+  call void @foo1() noredzone
+  br label %if.end
+
+if.end:
+  ret void
+
+; CHECK: @test15
+; CHECK-NOT: switch
+; CHECK: ret void
+}
+
+; PR8675
+; rdar://5134905
+define zeroext i1 @test16(i32 %x) nounwind {
+entry:
+; CHECK: @test16
+; CHECK: %x.off = add i32 %x, -1
+; CHECK: %switch = icmp ult i32 %x.off, 3
+  %cmp.i = icmp eq i32 %x, 1
+  br i1 %cmp.i, label %lor.end, label %lor.lhs.false
+
+lor.lhs.false:
+  %cmp.i2 = icmp eq i32 %x, 2
+  br i1 %cmp.i2, label %lor.end, label %lor.rhs
+
+lor.rhs:
+  %cmp.i1 = icmp eq i32 %x, 3
+  br label %lor.end
+
+lor.end:
+  %0 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %cmp.i1, %lor.rhs ]
+  ret i1 %0
+}
+
+; Check that we don't turn an icmp into a switch where it's not useful.
+define void @test17(i32 %x, i32 %y) {
+  %cmp = icmp ult i32 %x, 3
+  %switch = icmp ult i32 %y, 2
+  %or.cond775 = or i1 %cmp, %switch
+  br i1 %or.cond775, label %lor.lhs.false8, label %return
+
+lor.lhs.false8:
+  tail call void @foo1()
+  ret void
+
+return:
+  ret void
+
+; CHECK: @test17
+; CHECK-NOT: switch.early.test
+; CHECK-NOT: switch i32
+; CHECK: ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/switch_formation.dbg.ll b/final/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
new file mode 100644
index 00000000000..2723ec608e1
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/switch_formation.dbg.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+        %llvm.dbg.anchor.type = type { i32, i32 }
+        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
+
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
+
+@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
+@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
+
+define i1 @t({ i32, i32 }* %I) {
+; CHECK: @t
+; CHECK: %tmp.2.i.off = add i32 %tmp.2.i, -14
+; CHECK: %switch = icmp ult i32 %tmp.2.i.off, 6
+entry:
+        %tmp.1.i = getelementptr { i32, i32 }* %I, i64 0, i32 1         ; <i32*> [#uses=1]
+        %tmp.2.i = load i32* %tmp.1.i           ; <i32> [#uses=6]
+        %tmp.2 = icmp eq i32 %tmp.2.i, 14               ; <i1> [#uses=1]
+        br i1 %tmp.2, label %shortcirc_done.4, label %shortcirc_next.0
+shortcirc_next.0:               ; preds = %entry
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        %tmp.6 = icmp eq i32 %tmp.2.i, 15               ; <i1> [#uses=1]
+        br i1 %tmp.6, label %shortcirc_done.4, label %shortcirc_next.1
+shortcirc_next.1:               ; preds = %shortcirc_next.0
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        %tmp.11 = icmp eq i32 %tmp.2.i, 16              ; <i1> [#uses=1]
+        br i1 %tmp.11, label %shortcirc_done.4, label %shortcirc_next.2
+shortcirc_next.2:               ; preds = %shortcirc_next.1
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        %tmp.16 = icmp eq i32 %tmp.2.i, 17              ; <i1> [#uses=1]
+        br i1 %tmp.16, label %shortcirc_done.4, label %shortcirc_next.3
+shortcirc_next.3:               ; preds = %shortcirc_next.2
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        %tmp.21 = icmp eq i32 %tmp.2.i, 18              ; <i1> [#uses=1]
+        br i1 %tmp.21, label %shortcirc_done.4, label %shortcirc_next.4
+shortcirc_next.4:               ; preds = %shortcirc_next.3
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        %tmp.26 = icmp eq i32 %tmp.2.i, 19              ; <i1> [#uses=1]
+        br label %UnifiedReturnBlock
+shortcirc_done.4:               ; preds = %shortcirc_next.3, %shortcirc_next.2, %shortcirc_next.1, %shortcirc_next.0, %entry
+        br label %UnifiedReturnBlock
+UnifiedReturnBlock:             ; preds = %shortcirc_done.4, %shortcirc_next.4
+        %UnifiedRetVal = phi i1 [ %tmp.26, %shortcirc_next.4 ], [ true, %shortcirc_done.4 ]             ; <i1> [#uses=1]
+        ret i1 %UnifiedRetVal
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/switch_switch_fold.ll b/final/test/Transforms/SimplifyCFG/switch_switch_fold.ll
new file mode 100644
index 00000000000..2e2e3101401
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/switch_switch_fold.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -simplifycfg -S | \
+; RUN:   grep switch | count 1
+
+; Test that a switch going to a switch on the same value can be merged.   All 
+; three switches in this example can be merged into one big one.
+
+declare void @foo1()
+
+declare void @foo2()
+
+declare void @foo3()
+
+declare void @foo4()
+
+define void @test1(i32 %V) {
+        switch i32 %V, label %F [
+                 i32 4, label %T
+                 i32 17, label %T
+                 i32 5, label %T
+                 i32 1234, label %F
+        ]
+T:              ; preds = %0, %0, %0
+        switch i32 %V, label %F [
+                 i32 4, label %A
+                 i32 17, label %B
+                 i32 42, label %C
+        ]
+A:              ; preds = %T
+        call void @foo1( )
+        ret void
+B:              ; preds = %F, %F, %T
+        call void @foo2( )
+        ret void
+C:              ; preds = %T
+        call void @foo3( )
+        ret void
+F:              ; preds = %F, %T, %0, %0
+        switch i32 %V, label %F [
+                 i32 4, label %B
+                 i32 18, label %B
+                 i32 42, label %D
+        ]
+D:              ; preds = %F
+        call void @foo4( )
+        ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll b/final/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
new file mode 100644
index 00000000000..7d7391af1b9
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
@@ -0,0 +1,116 @@
+; RUN: opt < %s -simplifycfg -S | \
+; RUN:   grep switch | count 1
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+	%llvm.dbg.anchor.type = type { i32, i32 }
+	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
+	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
+	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }*, i32 }
+	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
+	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str = internal constant [10 x i8] c"swithh2.c\00", section "llvm.metadata"		; <[10 x i8]*> [#uses=1]
+@.str1 = internal constant [38 x i8] c"/developer/home2/zsth/test/debug/tmp/\00", section "llvm.metadata"		; <[38 x i8]*> [#uses=1]
+@.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5641) (LLVM build)\00", section "llvm.metadata"		; <[52 x i8]*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([10 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([38 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null, i32 -1 }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+@.str3 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@llvm.dbg.array = internal constant [2 x { }*] [{ }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*)], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
+@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
+@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@.str4 = internal constant [4 x i8] c"foo\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@.str5 = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
+@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
+
+define i32 @foo(i32 %x) nounwind {
+entry:
+	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
+	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
+	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	switch i32 %x, label %bb4 [
+		i32 1, label %bb
+		i32 2, label %bb1
+		i32 3, label %bb2
+		i32 4, label %bb3
+	]
+		; No predecessors!
+	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %bb
+
+bb:		; preds = %0, %entry
+	call void @llvm.dbg.stoppoint(i32 3, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %bb8
+		; No predecessors!
+	call void @llvm.dbg.stoppoint(i32 3, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %bb1
+
+bb1:		; preds = %1, %entry
+	call void @llvm.dbg.stoppoint(i32 4, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %bb8
+		; No predecessors!
+	call void @llvm.dbg.stoppoint(i32 4, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %bb2
+
+bb2:		; preds = %2, %entry
+	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %bb8
+		; No predecessors!
+	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %bb3
+
+bb3:		; preds = %3, %entry
+	call void @llvm.dbg.stoppoint(i32 6, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %bb8
+		; No predecessors!
+	call void @llvm.dbg.stoppoint(i32 6, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %bb4
+
+bb4:		; preds = %4, %entry
+	call void @llvm.dbg.stoppoint(i32 10, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	switch i32 %x, label %bb7 [
+		i32 5, label %bb5
+		i32 6, label %bb6
+	]
+		; No predecessors!
+	call void @llvm.dbg.stoppoint(i32 10, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %bb5
+
+bb5:		; preds = %5, %bb4
+	call void @llvm.dbg.stoppoint(i32 11, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %bb8
+		; No predecessors!
+	call void @llvm.dbg.stoppoint(i32 11, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %bb6
+
+bb6:		; preds = %6, %bb4
+	call void @llvm.dbg.stoppoint(i32 12, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %bb8
+		; No predecessors!
+	call void @llvm.dbg.stoppoint(i32 12, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %bb7
+
+bb7:		; preds = %7, %bb4
+	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %bb8
+
+bb8:		; preds = %bb7, %bb6, %bb5, %bb3, %bb2, %bb1, %bb
+	%.0 = phi i32 [ 4, %bb3 ], [ 3, %bb2 ], [ 2, %bb1 ], [ 1, %bb ], [ 6, %bb6 ], [ 5, %bb5 ], [ 0, %bb7 ]		; <i32> [#uses=1]
+	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	br label %return
+
+return:		; preds = %bb8
+	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
+	ret i32 %.0
+}
+
+declare void @llvm.dbg.func.start({ }*) nounwind
+
+declare void @llvm.dbg.declare({ }*, { }*) nounwind
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
+
+declare void @llvm.dbg.region.end({ }*) nounwind
diff --git a/final/test/Transforms/SimplifyCFG/switch_thread.ll b/final/test/Transforms/SimplifyCFG/switch_thread.ll
new file mode 100644
index 00000000000..bd85fccd527
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/switch_thread.ll
@@ -0,0 +1,79 @@
+; RUN: opt < %s -simplifycfg -S | \
+; RUN:   not grep {call void @DEAD}
+
+; Test that we can thread a simple known condition through switch statements.
+
+declare void @foo1()
+
+declare void @foo2()
+
+declare void @DEAD()
+
+define void @test1(i32 %V) {
+        switch i32 %V, label %A [
+                 i32 4, label %T
+                 i32 17, label %Done
+                 i32 1234, label %A
+        ]
+;; V == 4 if we get here.
+T:              ; preds = %0
+        call void @foo1( )
+        ;; This switch is always statically determined.
+        switch i32 %V, label %A2 [
+                 i32 4, label %B
+                 i32 17, label %C
+                 i32 42, label %C
+        ]
+A2:             ; preds = %T
+        call void @DEAD( )
+        call void @DEAD( )
+        ;; always true
+        %cond2 = icmp eq i32 %V, 4              ; <i1> [#uses=1]
+        br i1 %cond2, label %Done, label %C
+A:              ; preds = %0, %0
+        call void @foo1( )
+        ;; always true
+        %cond = icmp ne i32 %V, 4               ; <i1> [#uses=1]
+        br i1 %cond, label %Done, label %C
+Done:           ; preds = %B, %A, %A2, %0
+        ret void
+B:              ; preds = %T
+        call void @foo2( )
+        ;; always true
+        %cond3 = icmp eq i32 %V, 4              ; <i1> [#uses=1]
+        br i1 %cond3, label %Done, label %C
+C:              ; preds = %B, %A, %A2, %T, %T
+        call void @DEAD( )
+        ret void
+}
+
+define void @test2(i32 %V) {
+        switch i32 %V, label %A [
+                 i32 4, label %T
+                 i32 17, label %D
+                 i32 1234, label %E
+        ]
+;; V != 4, 17, 1234 here.
+A:              ; preds = %0
+        call void @foo1( )
+        ;; This switch is always statically determined.
+        switch i32 %V, label %E [
+                 i32 4, label %C
+                 i32 17, label %C
+                 i32 42, label %D
+        ]
+;; unreacahble.
+C:              ; preds = %A, %A
+        call void @DEAD( )
+        ret void
+T:              ; preds = %0
+        call void @foo1( )
+        call void @foo1( )
+        ret void
+D:              ; preds = %A, %0
+        call void @foo1( )
+        ret void
+E:              ; preds = %A, %0
+        ret void
+}
+
diff --git a/final/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll b/final/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
new file mode 100644
index 00000000000..7bca5f5afa0
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/trapping-load-unreachable.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+; PR2967
+
+target datalayout =
+"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define void @test1(i32 %x) nounwind {
+entry:
+        %0 = icmp eq i32 %x, 0          ; <i1> [#uses=1]
+        br i1 %0, label %bb, label %return
+
+bb:             ; preds = %entry
+        %1 = volatile load i32* null
+        unreachable
+        
+        br label %return
+return:         ; preds = %entry
+        ret void
+; CHECK: @test1
+; CHECK: volatile load
+}
+
+; rdar://7958343
+define void @test2() nounwind {
+entry:
+        store i32 4,i32* null
+        ret void
+        
+; CHECK: @test2
+; CHECK: call void @llvm.trap
+; CHECK: unreachable
+}
+
+; PR7369
+define void @test3() nounwind {
+entry:
+        volatile store i32 4, i32* null
+        ret void
+
+; CHECK: @test3
+; CHECK: volatile store i32 4, i32* null
+; CHECK: ret
+}
diff --git a/final/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll b/final/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll
new file mode 100644
index 00000000000..01041eb7db5
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/two-entry-phi-return.dbg.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -simplifycfg -S | not grep br
+
+        %llvm.dbg.anchor.type = type { i32, i32 }
+        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
+
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
+
+@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
+@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+
+declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
+
+define i1 @qux(i8* %m, i8* %n, i8* %o, i8* %p) nounwind  {
+entry:
+        %tmp7 = icmp eq i8* %m, %n
+        br i1 %tmp7, label %bb, label %UnifiedReturnBlock
+
+bb:
+call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
+        %tmp15 = icmp eq i8* %o, %p
+        br label %UnifiedReturnBlock
+
+UnifiedReturnBlock:
+        %result = phi i1 [ 0, %entry ], [ %tmp15, %bb ]
+        ret i1 %result
+}
diff --git a/final/test/Transforms/SimplifyCFG/two-entry-phi-return.ll b/final/test/Transforms/SimplifyCFG/two-entry-phi-return.ll
new file mode 100644
index 00000000000..fb18624c71f
--- /dev/null
+++ b/final/test/Transforms/SimplifyCFG/two-entry-phi-return.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -simplifycfg -S | not grep br
+
+define i1 @qux(i8* %m, i8* %n, i8* %o, i8* %p) nounwind  {
+entry:
+        %tmp7 = icmp eq i8* %m, %n
+        br i1 %tmp7, label %bb, label %UnifiedReturnBlock
+
+bb:
+        %tmp15 = icmp eq i8* %o, %p
+        br label %UnifiedReturnBlock
+
+UnifiedReturnBlock:
+        %result = phi i1 [ 0, %entry ], [ %tmp15, %bb ]
+        ret i1 %result
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/2005-05-20-sprintf-crash.ll b/final/test/Transforms/SimplifyLibCalls/2005-05-20-sprintf-crash.ll
new file mode 100644
index 00000000000..8816579a42f
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/2005-05-20-sprintf-crash.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -simplify-libcalls -disable-output
+
+@G = constant [3 x i8] c"%s\00"		; <[3 x i8]*> [#uses=1]
+
+declare i32 @sprintf(i8*, i8*, ...)
+
+define void @foo(i8* %P, i32* %X) {
+	call i32 (i8*, i8*, ...)* @sprintf( i8* %P, i8* getelementptr ([3 x i8]* @G, i32 0, i32 0), i32* %X )		; <i32>:1 [#uses=0]
+	ret void
+}
+
diff --git a/final/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll b/final/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll
new file mode 100644
index 00000000000..8e9f2062cde
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/2007-04-06-strchr-miscompile.ll
@@ -0,0 +1,29 @@
+; PR1307
+; RUN: opt < %s -simplify-libcalls -instcombine -S > %t
+; RUN: grep {@str,.*i64 3} %t
+; RUN: grep {@str1,.*i64 7} %t
+; RUN: grep {ret i8.*null} %t
+; END.
+
+@str = internal constant [5 x i8] c"foog\00"
+@str1 = internal constant [8 x i8] c"blahhh!\00"
+@str2 = internal constant [5 x i8] c"Ponk\00"
+
+define i8* @test1() {
+        %tmp3 = tail call i8* @strchr( i8* getelementptr ([5 x i8]* @str, i32 0, i32 2), i32 103 )              ; <i8*> [#uses=1]
+        ret i8* %tmp3
+}
+
+declare i8* @strchr(i8*, i32)
+
+define i8* @test2() {
+        %tmp3 = tail call i8* @strchr( i8* getelementptr ([8 x i8]* @str1, i32 0, i32 2), i32 0 )               ; <i8*> [#uses=1]
+        ret i8* %tmp3
+}
+
+define i8* @test3() {
+entry:
+        %tmp3 = tail call i8* @strchr( i8* getelementptr ([5 x i8]* @str2, i32 0, i32 1), i32 80 )              ; <i8*> [#uses=1]
+        ret i8* %tmp3
+}
+
diff --git a/final/test/Transforms/SimplifyLibCalls/2008-05-19-memcmp.ll b/final/test/Transforms/SimplifyLibCalls/2008-05-19-memcmp.ll
new file mode 100644
index 00000000000..b6874322c4c
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/2008-05-19-memcmp.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -simplify-libcalls -S | grep i32
+; PR2341
+
+@_2E_str = external constant [5 x i8]		; <[5 x i8]*> [#uses=1]
+
+declare i32 @memcmp(i8*, i8*, i32) nounwind readonly 
+
+define i1 @f(i8** %start_addr) {
+entry:
+	%tmp4 = load i8** %start_addr, align 4		; <i8*> [#uses=1]
+	%tmp5 = call i32 @memcmp( i8* %tmp4, i8* getelementptr ([5 x i8]* @_2E_str, i32 0, i32 0), i32 4 ) nounwind readonly 		; <i32> [#uses=1]
+	%tmp6 = icmp eq i32 %tmp5, 0		; <i1> [#uses=1]
+	ret i1 %tmp6
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll b/final/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
new file mode 100644
index 00000000000..73eb05b05e3
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/2009-01-04-Annotate.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -simplify-libcalls -S > %t
+; RUN: grep noalias %t | count 2
+; RUN: grep nocapture %t | count 3
+; RUN: grep nounwind %t | count 3
+; RUN: grep readonly %t | count 1
+
+declare i8* @fopen(i8*, i8*)
+declare i8 @strlen(i8*)
+declare i32* @realloc(i32*, i32)
+
+; Test deliberately wrong declaration
+declare i32 @strcpy(...)
diff --git a/final/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll b/final/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll
new file mode 100644
index 00000000000..ac89199b0ec
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/2009-02-11-NotInitialized.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -inline -simplify-libcalls -functionattrs | \
+; RUN:   llvm-dis | grep nocapture | count 2
+; Check that nocapture attributes are added when run after an SCC pass.
+; PR3520
+
+define i32 @use(i8* %x) nounwind readonly {
+entry:
+	%0 = tail call i64 @strlen(i8* %x) nounwind readonly		; <i64> [#uses=1]
+	%1 = trunc i64 %0 to i32		; <i32> [#uses=1]
+	ret i32 %1
+}
+
+declare i64 @strlen(i8*) nounwind readonly
diff --git a/final/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll b/final/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll
new file mode 100644
index 00000000000..f8a0c88d2f8
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -simplify-libcalls -S > %t
+; RUN: grep nocapture %t | count 2
+; RUN: grep null %t | grep nocapture | count 1
+; RUN: grep null %t | grep call | not grep readonly
+
+; Test that we add nocapture to the declaration, and to the second call only.
+
+declare float @strtol(i8* %s, i8** %endptr, i32 %base)
+
+define void @foo(i8* %x, i8** %endptr) {
+  call float @strtol(i8* %x, i8** %endptr, i32 10)
+  call float @strtol(i8* %x, i8** null, i32 10)
+  ret void
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll b/final/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll
new file mode 100644
index 00000000000..9056499b4c5
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/2009-05-30-memcmp-byte.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -simplify-libcalls -instcombine -S | grep {ret i32 -65}
+; PR4284
+
+define i32 @test() nounwind {
+entry:
+	%c0 = alloca i8, align 1		; <i8*> [#uses=2]
+	%c2 = alloca i8, align 1		; <i8*> [#uses=2]
+	store i8 64, i8* %c0
+	store i8 -127, i8* %c2
+	%call = call i32 @memcmp(i8* %c0, i8* %c2, i32 1)		; <i32> [#uses=1]
+	ret i32 %call
+}
+
+declare i32 @memcmp(i8*, i8*, i32)
diff --git a/final/test/Transforms/SimplifyLibCalls/2009-07-28-Exit.ll b/final/test/Transforms/SimplifyLibCalls/2009-07-28-Exit.ll
new file mode 100644
index 00000000000..7af0a261d43
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/2009-07-28-Exit.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -simplify-libcalls -disable-output
+; PR4641
+
+	%struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64, %struct.pthread_mutex*, %struct.pthread*, i32, i32, %union.anon }
+	%struct.__sbuf = type { i8*, i32, [4 x i8] }
+	%struct.pthread = type opaque
+	%struct.pthread_mutex = type opaque
+	%union.anon = type { i64, [120 x i8] }
+@.str13 = external constant [2 x i8]		; <[2 x i8]*> [#uses=1]
+@.str14 = external constant [2 x i8]		; <[2 x i8]*> [#uses=1]
+
+define i32 @main(i32 %argc, i8** %argv) nounwind {
+entry:
+	call void @exit(i32 0) nounwind
+	%cond392 = select i1 undef, i8* getelementptr ([2 x i8]* @.str13, i32 0, i32 0), i8* getelementptr ([2 x i8]* @.str14, i32 0, i32 0)		; <i8*> [#uses=1]
+	%call393 = call %struct.__sFILE* @fopen(i8* undef, i8* %cond392) nounwind		; <%struct.__sFILE*> [#uses=0]
+	unreachable
+}
+
+declare %struct.__sFILE* @fopen(i8*, i8*)
+
+declare void @exit(i32)
diff --git a/final/test/Transforms/SimplifyLibCalls/2009-07-29-Exit2.ll b/final/test/Transforms/SimplifyLibCalls/2009-07-29-Exit2.ll
new file mode 100644
index 00000000000..b5a788e0973
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/2009-07-29-Exit2.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -simplify-libcalls -disable-output
+; PR4645
+
+define i32 @main() {
+entry:
+	br label %if.then
+
+lor.lhs.false:		; preds = %while.body
+	br i1 undef, label %if.then, label %for.cond
+
+if.then:		; preds = %lor.lhs.false, %while.body
+	call void @exit(i32 1)
+	br label %for.cond
+
+for.cond:		; preds = %for.end, %if.then, %lor.lhs.false
+	%j.0 = phi i32 [ %inc47, %for.end ], [ 0, %if.then ], [ 0, %lor.lhs.false ]		; <i32> [#uses=1]
+	unreachable
+
+for.end:		; preds = %for.cond20
+	%inc47 = add i32 %j.0, 1		; <i32> [#uses=1]
+	br label %for.cond
+}
+
+declare void @exit(i32)
diff --git a/final/test/Transforms/SimplifyLibCalls/2010-05-30-memcpy-Struct.ll b/final/test/Transforms/SimplifyLibCalls/2010-05-30-memcpy-Struct.ll
new file mode 100644
index 00000000000..f67bae74f50
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/2010-05-30-memcpy-Struct.ll
@@ -0,0 +1,20 @@
+; RUN: opt -simplify-libcalls %s -S -o - | FileCheck %s
+; PR7265
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%union.anon = type { i32, [4 x i8] }
+
+@.str = private constant [3 x i8] c"%s\00"        ; <[3 x i8]*> [#uses=2]
+
+define void @CopyEventArg(%union.anon* %ev) nounwind {
+entry:
+  %call = call i32 (i8*, i8*, ...)* @sprintf(i8* undef, i8* getelementptr inbounds ([3 x i8]* @.str, i64 0, i64 0), %union.anon* %ev) nounwind ; <i32> [#uses=0]
+; CHECK: bitcast %union.anon* %ev to i8*
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+  ret void
+}
+
+declare i32 @sprintf(i8*, i8*, ...)
+
diff --git a/final/test/Transforms/SimplifyLibCalls/FFS.ll b/final/test/Transforms/SimplifyLibCalls/FFS.ll
new file mode 100644
index 00000000000..ab45f1819b2
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/FFS.ll
@@ -0,0 +1,36 @@
+; Test that the ToAsciiOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | \
+; RUN:   not grep {call.*@ffs}
+
+@non_const = external global i32		; <i32*> [#uses=1]
+
+declare i32 @ffs(i32)
+
+declare i32 @ffsl(i32)
+
+declare i32 @ffsll(i64)
+
+define i32 @main() {
+	%arg = load i32* @non_const		; <i32> [#uses=1]
+	%val0 = call i32 @ffs( i32 %arg )		; <i32> [#uses=1]
+	%val1 = call i32 @ffs( i32 1 )		; <i32> [#uses=1]
+	%val2 = call i32 @ffs( i32 2048 )		; <i32> [#uses=1]
+	%val3 = call i32 @ffsl( i32 65536 )		; <i32> [#uses=1]
+	%val4 = call i32 @ffsll( i64 1024 )		; <i32> [#uses=1]
+	%val5 = call i32 @ffsll( i64 17179869184 )		; <i32> [#uses=1]
+	%val6 = call i32 @ffsll( i64 1152921504606846976 )		; <i32> [#uses=1]
+	%rslt1 = add i32 %val1, %val2		; <i32> [#uses=1]
+	%rslt2 = add i32 %val3, %val4		; <i32> [#uses=1]
+	%rslt3 = add i32 %val5, %val6		; <i32> [#uses=1]
+	%rslt4 = add i32 %rslt1, %rslt2		; <i32> [#uses=1]
+	%rslt5 = add i32 %rslt4, %rslt3		; <i32> [#uses=2]
+	%rslt6 = add i32 %rslt5, %val0		; <i32> [#uses=0]
+	ret i32 %rslt5
+}
+
+
+; PR4206
+define i32 @a(i64) nounwind {
+        %2 = call i32 @ffsll(i64 %0)            ; <i32> [#uses=1]
+        ret i32 %2
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/FPrintF.ll b/final/test/Transforms/SimplifyLibCalls/FPrintF.ll
new file mode 100644
index 00000000000..4a0d232dac3
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/FPrintF.ll
@@ -0,0 +1,28 @@
+; Test that the FPrintFOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | \
+; RUN:   not grep {call.*fprintf}
+
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "-p:64:64:64"
+
+	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+@str = constant [3 x i8] c"%s\00"		; <[3 x i8]*> [#uses=1]
+@chr = constant [3 x i8] c"%c\00"		; <[3 x i8]*> [#uses=1]
+@hello = constant [13 x i8] c"hello world\0A\00"		; <[13 x i8]*> [#uses=1]
+@stdout = external global %struct._IO_FILE*		; <%struct._IO_FILE**> [#uses=3]
+
+declare i32 @fprintf(%struct._IO_FILE*, i8*, ...)
+
+define i32 @foo() {
+entry:
+	%tmp.1 = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=1]
+	%tmp.0 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp.1, i8* getelementptr ([13 x i8]* @hello, i32 0, i32 0) )		; <i32> [#uses=0]
+	%tmp.4 = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=1]
+	%tmp.3 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp.4, i8* getelementptr ([3 x i8]* @str, i32 0, i32 0), i8* getelementptr ([13 x i8]* @hello, i32 0, i32 0) )		; <i32> [#uses=0]
+	%tmp.8 = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=1]
+	%tmp.7 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf( %struct._IO_FILE* %tmp.8, i8* getelementptr ([3 x i8]* @chr, i32 0, i32 0), i32 33 )		; <i32> [#uses=0]
+	ret i32 0
+}
+
diff --git a/final/test/Transforms/SimplifyLibCalls/FPuts.ll b/final/test/Transforms/SimplifyLibCalls/FPuts.ll
new file mode 100644
index 00000000000..1f72ede7961
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/FPuts.ll
@@ -0,0 +1,29 @@
+; Test that the FPutsOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | \
+; RUN:   not grep {call.*fputs}
+
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "-p:64:64:64"
+
+	%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
+	%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+@stdout = external global %struct._IO_FILE*		; <%struct._IO_FILE**> [#uses=1]
+@empty = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+@len1 = constant [2 x i8] c"A\00"		; <[2 x i8]*> [#uses=1]
+@long = constant [7 x i8] c"hello\0A\00"		; <[7 x i8]*> [#uses=1]
+
+declare i32 @fputs(i8*, %struct._IO_FILE*)
+
+define i32 @main() {
+entry:
+	%out = load %struct._IO_FILE** @stdout		; <%struct._IO_FILE*> [#uses=3]
+	%s1 = getelementptr [1 x i8]* @empty, i32 0, i32 0		; <i8*> [#uses=1]
+	%s2 = getelementptr [2 x i8]* @len1, i32 0, i32 0		; <i8*> [#uses=1]
+	%s3 = getelementptr [7 x i8]* @long, i32 0, i32 0		; <i8*> [#uses=1]
+	%a = call i32 @fputs( i8* %s1, %struct._IO_FILE* %out )		; <i32> [#uses=0]
+	%b = call i32 @fputs( i8* %s2, %struct._IO_FILE* %out )		; <i32> [#uses=0]
+	%c = call i32 @fputs( i8* %s3, %struct._IO_FILE* %out )		; <i32> [#uses=0]
+	ret i32 0
+}
+
diff --git a/final/test/Transforms/SimplifyLibCalls/IsDigit.ll b/final/test/Transforms/SimplifyLibCalls/IsDigit.ll
new file mode 100644
index 00000000000..51a769d9bb3
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/IsDigit.ll
@@ -0,0 +1,21 @@
+; Test that the IsDigitOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | \
+; RUN:   not grep call
+
+declare i32 @isdigit(i32)
+
+declare i32 @isascii(i32)
+
+define i32 @main() {
+	%val1 = call i32 @isdigit( i32 47 )		; <i32> [#uses=1]
+	%val2 = call i32 @isdigit( i32 48 )		; <i32> [#uses=1]
+	%val3 = call i32 @isdigit( i32 57 )		; <i32> [#uses=1]
+	%val4 = call i32 @isdigit( i32 58 )		; <i32> [#uses=1]
+	%rslt1 = add i32 %val1, %val2		; <i32> [#uses=1]
+	%rslt2 = add i32 %val3, %val4		; <i32> [#uses=1]
+	%sum = add i32 %rslt1, %rslt2		; <i32> [#uses=1]
+	%rslt = call i32 @isdigit( i32 %sum )		; <i32> [#uses=1]
+	%tmp = call i32 @isascii( i32 %rslt )		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
diff --git a/final/test/Transforms/SimplifyLibCalls/MemCpy.ll b/final/test/Transforms/SimplifyLibCalls/MemCpy.ll
new file mode 100644
index 00000000000..39662b1589a
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/MemCpy.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -constprop -instcombine -S | not grep {call.*llvm.memcpy.i32}
+
+@h = constant [2 x i8] c"h\00"		; <[2 x i8]*> [#uses=1]
+@hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=1]
+@hello_u = constant [8 x i8] c"hello_u\00"		; <[8 x i8]*> [#uses=1]
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+define i32 @main() {
+	%h_p = getelementptr [2 x i8]* @h, i32 0, i32 0		; <i8*> [#uses=1]
+	%hel_p = getelementptr [4 x i8]* @hel, i32 0, i32 0		; <i8*> [#uses=1]
+	%hello_u_p = getelementptr [8 x i8]* @hello_u, i32 0, i32 0		; <i8*> [#uses=1]
+	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
+	%target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=3]
+	call void @llvm.memcpy.i32( i8* %target_p, i8* %h_p, i32 2, i32 2 )
+	call void @llvm.memcpy.i32( i8* %target_p, i8* %hel_p, i32 4, i32 4 )
+	call void @llvm.memcpy.i32( i8* %target_p, i8* %hello_u_p, i32 8, i32 8 )
+	ret i32 0
+}
+
diff --git a/final/test/Transforms/SimplifyLibCalls/PR7357.ll b/final/test/Transforms/SimplifyLibCalls/PR7357.ll
new file mode 100644
index 00000000000..6d5c1d5047d
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/PR7357.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -default-data-layout="e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32" -simplify-libcalls -S | FileCheck %s
+@.str1 = private constant [11 x i8] c"(){};[]&|:\00", align 4
+
+; check that simplify libcalls will not replace a call with one calling
+; convention with a new call with a different calling convention.
+
+; CHECK: define arm_aapcscc i32 @foo(i32 %argc)
+; CHECK: call arm_aapcscc  i8* @strchr
+define arm_aapcscc i32 @foo(i32 %argc) nounwind {
+bb.nph:
+  call arm_aapcscc  i8* @strchr(i8* getelementptr ([11 x i8]* @.str1, i32 0,
+i32 0), i32 %argc) nounwind readonly
+  ret i32 0
+}
+
+declare arm_aapcscc i8* @strchr(i8*, i32) nounwind readonly
diff --git a/final/test/Transforms/SimplifyLibCalls/Printf.ll b/final/test/Transforms/SimplifyLibCalls/Printf.ll
new file mode 100644
index 00000000000..caea311ba14
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/Printf.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -simplify-libcalls -S -o %t
+; RUN: FileCheck < %t %s
+
+@str = internal constant [13 x i8] c"hello world\0A\00"         ; <[13 x i8]*> [#uses=1]
+@str1 = internal constant [2 x i8] c"h\00"              ; <[2 x i8]*> [#uses=1]
+
+declare i32 @printf(i8*, ...)
+
+; CHECK: define void @f0
+; CHECK-NOT: printf
+; CHECK: }
+define void @f0() {
+entry:
+        %tmp1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([13 x i8]* @str, i32 0, i32 0) )         ; <i32> [#uses=0]
+        ret void
+}
+
+; CHECK: define void @f1
+; CHECK-NOT: printf
+; CHECK: }
+define void @f1() {
+entry:
+        %tmp1 = tail call i32 (i8*, ...)* @printf( i8* getelementptr ([2 x i8]* @str1, i32 0, i32 0) )         ; <i32> [#uses=0]
+        ret void
+}
+
+; Verify that we don't turn this into a putchar call (thus changing the return
+; value).
+;
+; CHECK: define i32 @f2
+; CHECK: printf
+; CHECK: }
+define i32 @f2() {
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([2 x i8]* @str1, i32 0, i32 0))
+  ret i32 %call
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/Puts.ll b/final/test/Transforms/SimplifyLibCalls/Puts.ll
new file mode 100644
index 00000000000..48431434cc6
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/Puts.ll
@@ -0,0 +1,15 @@
+; Test that the PutsOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+target datalayout = "-p:64:64:64"
+
+@.str = private constant [1 x i8] zeroinitializer
+
+declare i32 @puts(i8*)
+
+define void @foo() {
+entry:
+; CHECK: call i32 @putchar(i32 10)
+  %call = call i32 @puts(i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0))
+  ret void
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/SPrintF.ll b/final/test/Transforms/SimplifyLibCalls/SPrintF.ll
new file mode 100644
index 00000000000..847e363f52c
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/SPrintF.ll
@@ -0,0 +1,40 @@
+; Test that the SPrintFOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | \
+; RUN:   not grep {call.*sprintf}
+
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "-p:64:64:64"
+
+@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
+@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+@null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
+@fmt1 = constant [3 x i8] c"%s\00"		; <[3 x i8]*> [#uses=1]
+@fmt2 = constant [3 x i8] c"%c\00"		; <[3 x i8]*> [#uses=1]
+
+declare i32 @sprintf(i8*, i8*, ...)
+
+declare i32 @puts(i8*)
+
+define i32 @foo(i8* %p) {
+	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
+	%target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=7]
+	%hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=2]
+	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
+	%nh_p = getelementptr [7 x i8]* @null_hello, i32 0, i32 0		; <i8*> [#uses=1]
+	%fmt1_p = getelementptr [3 x i8]* @fmt1, i32 0, i32 0		; <i8*> [#uses=2]
+	%fmt2_p = getelementptr [3 x i8]* @fmt2, i32 0, i32 0		; <i8*> [#uses=1]
+	store i8 0, i8* %target_p
+	%r1 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %hello_p )		; <i32> [#uses=1]
+	%r2 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %null_p )		; <i32> [#uses=1]
+	%r3 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %nh_p )		; <i32> [#uses=1]
+	%r4 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %fmt1_p, i8* %hello_p )		; <i32> [#uses=1]
+	%r4.1 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %fmt1_p, i8* %p )		; <i32> [#uses=1]
+	%r5 = call i32 (i8*, i8*, ...)* @sprintf( i8* %target_p, i8* %fmt2_p, i32 82 )		; <i32> [#uses=1]
+	%r6 = add i32 %r1, %r2		; <i32> [#uses=1]
+	%r7 = add i32 %r3, %r6		; <i32> [#uses=1]
+	%r8 = add i32 %r5, %r7		; <i32> [#uses=1]
+	%r9 = add i32 %r8, %r4		; <i32> [#uses=1]
+	%r10 = add i32 %r9, %r4.1		; <i32> [#uses=1]
+	ret i32 %r10
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/StrCat.ll b/final/test/Transforms/SimplifyLibCalls/StrCat.ll
new file mode 100644
index 00000000000..4e3d0ab7f40
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/StrCat.ll
@@ -0,0 +1,33 @@
+; Test that the StrCatOptimizer works correctly
+; PR3661
+; RUN: opt < %s -simplify-libcalls -S | \
+; RUN:   not grep {call.*strcat}
+; RUN: opt < %s -simplify-libcalls -S | \
+; RUN:   grep {puts.*%arg1}
+
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "-p:64:64:64"
+
+@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
+@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+@null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
+
+declare i8* @strcat(i8*, i8*)
+
+declare i32 @puts(i8*)
+
+define i32 @main() {
+	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
+	%arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=2]
+	store i8 0, i8* %arg1
+	%arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=1]
+	%rslt1 = call i8* @strcat( i8* %arg1, i8* %arg2 )		; <i8*> [#uses=1]
+	%arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
+	%rslt2 = call i8* @strcat( i8* %rslt1, i8* %arg3 )		; <i8*> [#uses=1]
+	%arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0		; <i8*> [#uses=1]
+	%rslt3 = call i8* @strcat( i8* %rslt2, i8* %arg4 )		; <i8*> [#uses=1]
+	call i32 @puts( i8* %rslt3 )		; <i32>:1 [#uses=0]
+	ret i32 0
+}
+
diff --git a/final/test/Transforms/SimplifyLibCalls/StrChr.ll b/final/test/Transforms/SimplifyLibCalls/StrChr.ll
new file mode 100644
index 00000000000..eaabeb2feb8
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/StrChr.ll
@@ -0,0 +1,26 @@
+; Test that the StrChrOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "-p:64:64:64"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i8* @strchr(i8*, i32)
+
+define i32 @foo(i32 %index) {
+	%hello_p = getelementptr [14 x i8]* @hello, i32 0, i32 0
+	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
+	%world = call i8* @strchr(i8* %hello_p, i32 119)
+; CHECK: getelementptr i8* %hello_p, i64 6
+	%ignore = call i8* @strchr(i8* %null_p, i32 119)
+; CHECK-NOT: call i8* strchr
+	%null = call i8* @strchr(i8* %hello_p, i32 0)
+; CHECK: getelementptr i8* %hello_p, i64 13
+	%result = call i8* @strchr(i8* %hello_p, i32 %index)
+; CHECK: call i8* @memchr(i8* %hello_p, i32 %index, i64 14)
+	ret i32 %index
+}
+
diff --git a/final/test/Transforms/SimplifyLibCalls/StrCmp.ll b/final/test/Transforms/SimplifyLibCalls/StrCmp.ll
new file mode 100644
index 00000000000..73596351a8c
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/StrCmp.ll
@@ -0,0 +1,28 @@
+; Test that the StrCmpOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | \
+; RUN:   not grep {call.*strcmp}
+
+@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
+@hell = constant [5 x i8] c"hell\00"		; <[5 x i8]*> [#uses=1]
+@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+
+declare i32 @strcmp(i8*, i8*)
+
+declare i32 @puts(i8*)
+
+define i32 @main() {
+	%hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=5]
+	%hell_p = getelementptr [5 x i8]* @hell, i32 0, i32 0		; <i8*> [#uses=1]
+	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=4]
+	%temp1 = call i32 @strcmp( i8* %hello_p, i8* %hello_p )		; <i32> [#uses=1]
+	%temp2 = call i32 @strcmp( i8* %null_p, i8* %null_p )		; <i32> [#uses=1]
+	%temp3 = call i32 @strcmp( i8* %hello_p, i8* %null_p )		; <i32> [#uses=1]
+	%temp4 = call i32 @strcmp( i8* %null_p, i8* %hello_p )		; <i32> [#uses=1]
+	%temp5 = call i32 @strcmp( i8* %hell_p, i8* %hello_p )		; <i32> [#uses=1]
+	%rslt1 = add i32 %temp1, %temp2		; <i32> [#uses=1]
+	%rslt2 = add i32 %rslt1, %temp3		; <i32> [#uses=1]
+	%rslt3 = add i32 %rslt2, %temp4		; <i32> [#uses=1]
+	%rslt4 = add i32 %rslt3, %temp5		; <i32> [#uses=1]
+	ret i32 %rslt4
+}
+
diff --git a/final/test/Transforms/SimplifyLibCalls/StrCpy.ll b/final/test/Transforms/SimplifyLibCalls/StrCpy.ll
new file mode 100644
index 00000000000..83406ff8f86
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/StrCpy.ll
@@ -0,0 +1,37 @@
+; Test that the StrCpyOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@hello = constant [6 x i8] c"hello\00"
+
+declare i8* @strcpy(i8*, i8*)
+
+declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
+
+declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
+
+; rdar://6839935
+
+define i32 @t1() {
+; CHECK: @t1
+  %target = alloca [1024 x i8]
+  %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
+  %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  %rslt1 = call i8* @strcpy( i8* %arg1, i8* %arg2 )
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32
+  ret i32 0
+}
+
+define i32 @t2() {
+; CHECK: @t2
+  %target = alloca [1024 x i8]
+  %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
+  %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
+  %tmp1 = call i32 @llvm.objectsize.i32(i8* %arg1, i1 false)
+  %rslt1 = call i8* @__strcpy_chk(i8* %arg1, i8* %arg2, i32 %tmp1)
+; CHECK: @__memcpy_chk
+  ret i32 0
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/StrLen.ll b/final/test/Transforms/SimplifyLibCalls/StrLen.ll
new file mode 100644
index 00000000000..45b349d6840
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/StrLen.ll
@@ -0,0 +1,56 @@
+; Test that the StrCatOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | \
+; RUN:    not grep {call.*strlen}
+
+target datalayout = "e-p:32:32"
+@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=3]
+@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=3]
+@null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
+
+declare i32 @strlen(i8*)
+
+define i32 @test1() {
+	%hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=1]
+	%hello_l = call i32 @strlen( i8* %hello_p )		; <i32> [#uses=1]
+	ret i32 %hello_l
+}
+
+define i32 @test2() {
+	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
+	%null_l = call i32 @strlen( i8* %null_p )		; <i32> [#uses=1]
+	ret i32 %null_l
+}
+
+define i32 @test3() {
+	%null_hello_p = getelementptr [7 x i8]* @null_hello, i32 0, i32 0		; <i8*> [#uses=1]
+	%null_hello_l = call i32 @strlen( i8* %null_hello_p )		; <i32> [#uses=1]
+	ret i32 %null_hello_l
+}
+
+define i1 @test4() {
+	%hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=1]
+	%hello_l = call i32 @strlen( i8* %hello_p )		; <i32> [#uses=1]
+	%eq_hello = icmp eq i32 %hello_l, 0		; <i1> [#uses=1]
+	ret i1 %eq_hello
+}
+
+define i1 @test5() {
+	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
+	%null_l = call i32 @strlen( i8* %null_p )		; <i32> [#uses=1]
+	%eq_null = icmp eq i32 %null_l, 0		; <i1> [#uses=1]
+	ret i1 %eq_null
+}
+
+define i1 @test6() {
+	%hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=1]
+	%hello_l = call i32 @strlen( i8* %hello_p )		; <i32> [#uses=1]
+	%ne_hello = icmp ne i32 %hello_l, 0		; <i1> [#uses=1]
+	ret i1 %ne_hello
+}
+
+define i1 @test7() {
+	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
+	%null_l = call i32 @strlen( i8* %null_p )		; <i32> [#uses=1]
+	%ne_null = icmp ne i32 %null_l, 0		; <i1> [#uses=1]
+	ret i1 %ne_null
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/StrNCat.ll b/final/test/Transforms/SimplifyLibCalls/StrNCat.ll
new file mode 100644
index 00000000000..d09c022fd4c
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/StrNCat.ll
@@ -0,0 +1,31 @@
+; Test that the StrNCatOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | \
+; RUN:   not grep {call.*strncat}
+; RUN: opt < %s -simplify-libcalls -S | \
+; RUN:   grep {puts.*%arg1}
+
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "-p:64:64:64"
+
+@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
+@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+@null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
+
+declare i8* @strncat(i8*, i8*, i32)
+
+declare i32 @puts(i8*)
+
+define i32 @main() {
+	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
+	%arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=2]
+	store i8 0, i8* %arg1
+	%arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=1]
+	%rslt1 = call i8* @strncat( i8* %arg1, i8* %arg2, i32 6 )		; <i8*> [#uses=1]
+	%arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
+	%rslt2 = call i8* @strncat( i8* %rslt1, i8* %arg3, i32 42 )		; <i8*> [#uses=1]
+	%arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0		; <i8*> [#uses=1]
+	%rslt3 = call i8* @strncat( i8* %rslt2, i8* %arg4, i32 42 )		; <i8*> [#uses=1]
+	call i32 @puts( i8* %rslt3 )		; <i32>:1 [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/StrNCmp.ll b/final/test/Transforms/SimplifyLibCalls/StrNCmp.ll
new file mode 100644
index 00000000000..8b536a54869
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/StrNCmp.ll
@@ -0,0 +1,35 @@
+; Test that the StrNCmpOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | \
+; RUN:   not grep {call.*strncmp}
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin9.0"
+
+@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
+@hell = constant [5 x i8] c"hell\00"		; <[5 x i8]*> [#uses=1]
+@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+
+declare i32 @strncmp(i8*, i8*, i32)
+
+declare i32 @puts(i8*)
+
+define i32 @main() {
+	%hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=5]
+	%hell_p = getelementptr [5 x i8]* @hell, i32 0, i32 0		; <i8*> [#uses=1]
+	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=4]
+	%temp1 = call i32 @strncmp( i8* %hello_p, i8* %hello_p, i32 5 )		; <i32> [#uses=1]
+	%temp2 = call i32 @strncmp( i8* %null_p, i8* %null_p, i32 0 )		; <i32> [#uses=1]
+	%temp3 = call i32 @strncmp( i8* %hello_p, i8* %null_p, i32 0 )		; <i32> [#uses=1]
+	%temp4 = call i32 @strncmp( i8* %null_p, i8* %hello_p, i32 0 )		; <i32> [#uses=1]
+	%temp5 = call i32 @strncmp( i8* %hell_p, i8* %hello_p, i32 4 )		; <i32> [#uses=1]
+	%rslt1 = add i32 %temp1, %temp2		; <i32> [#uses=1]
+	%rslt2 = add i32 %rslt1, %temp3		; <i32> [#uses=1]
+	%rslt3 = add i32 %rslt2, %temp4		; <i32> [#uses=1]
+	%rslt4 = add i32 %rslt3, %temp5		; <i32> [#uses=1]
+	ret i32 %rslt4
+}
+
+define i32 @test1(i8* %P, i8* %Q) {
+  %cmp = call i32 @strncmp(i8* %P, i8* %Q, i32 1)
+  ret i32 %cmp
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/StrNCpy.ll b/final/test/Transforms/SimplifyLibCalls/StrNCpy.ll
new file mode 100644
index 00000000000..c8af3ca8c3e
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/StrNCpy.ll
@@ -0,0 +1,29 @@
+; Test that the StrNCpyOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | \
+; RUN:   not grep {call.*strncpy}
+
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "-p:64:64:64"
+
+@hello = constant [6 x i8] c"hello\00"		; <[6 x i8]*> [#uses=1]
+@null = constant [1 x i8] zeroinitializer		; <[1 x i8]*> [#uses=1]
+@null_hello = constant [7 x i8] c"\00hello\00"		; <[7 x i8]*> [#uses=1]
+
+declare i8* @strncpy(i8*, i8*, i32)
+
+declare i32 @puts(i8*)
+
+define i32 @main() {
+	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
+	%arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=2]
+	store i8 0, i8* %arg1
+	%arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0		; <i8*> [#uses=1]
+	%rslt1 = call i8* @strncpy( i8* %arg1, i8* %arg2, i32 6 )		; <i8*> [#uses=1]
+	%arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0		; <i8*> [#uses=1]
+	%rslt2 = call i8* @strncpy( i8* %rslt1, i8* %arg3, i32 42 )		; <i8*> [#uses=1]
+	%arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0		; <i8*> [#uses=1]
+	%rslt3 = call i8* @strncpy( i8* %rslt2, i8* %arg4, i32 42 )		; <i8*> [#uses=1]
+	call i32 @puts( i8* %rslt3 )		; <i32>:1 [#uses=0]
+	ret i32 0
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/StrPBrk.ll b/final/test/Transforms/SimplifyLibCalls/StrPBrk.ll
new file mode 100644
index 00000000000..29c3b7477b4
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/StrPBrk.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+target datalayout = "-p:64:64:64"
+
+@hello = constant [12 x i8] c"hello world\00"
+@w = constant [2 x i8] c"w\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i8* @strpbrk(i8*, i8*)
+
+define void @test(i8* %s1, i8* %s2) {
+	%hello_p = getelementptr [12 x i8]* @hello, i32 0, i32 0
+	%w_p = getelementptr [2 x i8]* @w, i32 0, i32 0
+	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
+	%test1 = call i8* @strpbrk(i8* %null_p, i8* %s2)
+	%test2 = call i8* @strpbrk(i8* %s1, i8* %null_p)
+; CHECK-NOT: call i8* @strpbrk
+	%test3 = call i8* @strpbrk(i8* %s1, i8* %w_p)
+; CHECK: call i8* @strchr(i8* %s1, i32 119)
+	%test4 = call i8* @strpbrk(i8* %hello_p, i8* %w_p)
+; CHECK: getelementptr i8* %hello_p, i64 6
+	%test5 = call i8* @strpbrk(i8* %s1, i8* %s2)
+; CHECK: call i8* @strpbrk(i8* %s1, i8* %s2)
+	ret void
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/StrRChr.ll b/final/test/Transforms/SimplifyLibCalls/StrRChr.ll
new file mode 100644
index 00000000000..2259fc0289f
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/StrRChr.ll
@@ -0,0 +1,23 @@
+; Test that the StrRChrOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+target datalayout = "-p:64:64:64"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i8* @strrchr(i8*, i32)
+
+define void @foo(i8* %bar) {
+	%hello_p = getelementptr [14 x i8]* @hello, i32 0, i32 0
+	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
+	%world = call i8* @strrchr(i8* %hello_p, i32 119)
+; CHECK: getelementptr i8* %hello_p, i64 6
+	%ignore = call i8* @strrchr(i8* %null_p, i32 119)
+; CHECK-NOT: call i8* strrchr
+	%null = call i8* @strrchr(i8* %hello_p, i32 0)
+; CHECK: getelementptr i8* %hello_p, i64 13
+	%strchr = call i8* @strrchr(i8* %bar, i32 0)
+; CHECK: call i8* @strchr(i8* %bar, i32 0)
+	ret void
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/StrSpn.ll b/final/test/Transforms/SimplifyLibCalls/StrSpn.ll
new file mode 100644
index 00000000000..800c1908833
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/StrSpn.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+target datalayout = "-p:64:64:64"
+
+@abcba = constant [6 x i8] c"abcba\00"
+@abc = constant [4 x i8] c"abc\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i64 @strspn(i8*, i8*)
+
+define i64 @testspn(i8* %s1, i8* %s2) {
+  	%abcba_p = getelementptr [6 x i8]* @abcba, i32 0, i32 0
+	%abc_p = getelementptr [4 x i8]* @abc, i32 0, i32 0
+	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
+	%test1 = call i64 @strspn(i8* %s1, i8* %null_p)
+	%test2 = call i64 @strspn(i8* %null_p, i8* %s2)
+	%test3 = call i64 @strspn(i8* %abcba_p, i8* %abc_p)
+; CHECK-NOT: call i64 @strspn
+	%test4 = call i64 @strspn(i8* %s1, i8* %s2)
+; CHECK: call i64 @strspn(i8* %s1, i8* %s2)
+	ret i64 %test3
+; CHECK: ret i64 5
+}
+
+declare i64 @strcspn(i8*, i8*)
+
+define i64 @testcspn(i8* %s1, i8* %s2) {
+  	%abcba_p = getelementptr [6 x i8]* @abcba, i32 0, i32 0
+	%abc_p = getelementptr [4 x i8]* @abc, i32 0, i32 0
+	%null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
+	%test1 = call i64 @strcspn(i8* %s1, i8* %null_p)
+; CHECK: call i64 @strlen(i8* %s1)
+	%test2 = call i64 @strcspn(i8* %null_p, i8* %s2)
+	%test3 = call i64 @strcspn(i8* %abcba_p, i8* %abc_p)
+; CHECK-NOT: call i64 @strcspn
+	%test4 = call i64 @strcspn(i8* %s1, i8* %s2)
+; CHECK: call i64 @strcspn(i8* %s1, i8* %s2)
+        %add0 = add i64 %test1, %test3
+; CHECK: add i64 %{{.+}}, 0
+	ret i64 %add0
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/StrStr.ll b/final/test/Transforms/SimplifyLibCalls/StrStr.ll
new file mode 100644
index 00000000000..eefd2e8006a
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/StrStr.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+; PR5783
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin9.0"
+
+@.str = private constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=1]
+@.str1 = private constant [2 x i8] c"a\00"        ; <[2 x i8]*> [#uses=1]
+@.str2 = private constant [6 x i8] c"abcde\00"    ; <[6 x i8]*> [#uses=1]
+@.str3 = private constant [4 x i8] c"bcd\00"      ; <[4 x i8]*> [#uses=1]
+
+define i8* @test1(i8* %P) nounwind readonly {
+entry:
+  %call = tail call i8* @strstr(i8* %P, i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0)) nounwind ; <i8*> [#uses=1]
+  ret i8* %call
+; strstr(P, "") -> P
+; CHECK: @test1
+; CHECK: ret i8* %P
+}
+
+declare i8* @strstr(i8*, i8* nocapture) nounwind readonly
+
+define i8* @test2(i8* %P) nounwind readonly {
+entry:
+  %call = tail call i8* @strstr(i8* %P, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0)) nounwind ; <i8*> [#uses=1]
+  ret i8* %call
+; strstr(P, "a") -> strchr(P, 'a')
+; CHECK: @test2
+; CHECK: @strchr(i8* %P, i32 97)
+}
+
+define i8* @test3(i8* nocapture %P) nounwind readonly {
+entry:
+  %call = tail call i8* @strstr(i8* getelementptr inbounds ([6 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i32 0, i32 0)) nounwind ; <i8*> [#uses=1]
+  ret i8* %call
+; strstr("abcde", "bcd") -> "abcde"+1
+; CHECK: @test3
+; CHECK: getelementptr inbounds ([6 x i8]* @.str2, i32 0, i64 1)
+}
+
+define i8* @test4(i8* %P) nounwind readonly {
+entry:
+  %call = tail call i8* @strstr(i8* %P, i8* %P) nounwind ; <i8*> [#uses=1]
+  ret i8* %call
+; strstr(P, P) -> P
+; CHECK: @test4
+; CHECK: ret i8* %P
+}
+
+define i1 @test5(i8* %P, i8* %Q) nounwind readonly {
+entry:
+  %call = tail call i8* @strstr(i8* %P, i8* %Q) nounwind ; <i8*> [#uses=1]
+  %cmp = icmp eq i8* %call, %P
+  ret i1 %cmp
+; CHECK: @test5
+; CHECK: [[LEN:%[a-z]+]] = call {{i[0-9]+}} @strlen(i8* %Q)
+; CHECK: [[NCMP:%[a-z]+]] = call {{i[0-9]+}} @strncmp(i8* %P, i8* %Q, {{i[0-9]+}} [[LEN]])
+; CHECK: icmp eq {{i[0-9]+}} [[NCMP]], 0
+; CHECK: ret i1
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/ToAscii.ll b/final/test/Transforms/SimplifyLibCalls/ToAscii.ll
new file mode 100644
index 00000000000..e2b5683d9d7
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/ToAscii.ll
@@ -0,0 +1,21 @@
+; Test that the ToAsciiOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | \
+; RUN:   not grep {call.*toascii}
+
+declare i32 @toascii(i32)
+
+define i32 @main() {
+	%val1 = call i32 @toascii( i32 1 )		; <i32> [#uses=1]
+	%val2 = call i32 @toascii( i32 0 )		; <i32> [#uses=1]
+	%val3 = call i32 @toascii( i32 127 )		; <i32> [#uses=1]
+	%val4 = call i32 @toascii( i32 128 )		; <i32> [#uses=1]
+	%val5 = call i32 @toascii( i32 255 )		; <i32> [#uses=1]
+	%val6 = call i32 @toascii( i32 256 )		; <i32> [#uses=1]
+	%rslt1 = add i32 %val1, %val2		; <i32> [#uses=1]
+	%rslt2 = add i32 %val3, %val4		; <i32> [#uses=1]
+	%rslt3 = add i32 %val5, %val6		; <i32> [#uses=1]
+	%rslt4 = add i32 %rslt1, %rslt2		; <i32> [#uses=1]
+	%rslt5 = add i32 %rslt4, %rslt3		; <i32> [#uses=1]
+	ret i32 %rslt5
+}
+
diff --git a/final/test/Transforms/SimplifyLibCalls/abs.ll b/final/test/Transforms/SimplifyLibCalls/abs.ll
new file mode 100644
index 00000000000..6fbe0b9de44
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/abs.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -simplify-libcalls -S | grep {select i1 %ispos}
+; PR2337
+
+define i32 @test(i32 %x) {
+entry:
+	%call = call i32 @abs( i32 %x )		; <i32> [#uses=1]
+	ret i32 %call
+}
+
+declare i32 @abs(i32)
+
diff --git a/final/test/Transforms/SimplifyLibCalls/dg.exp b/final/test/Transforms/SimplifyLibCalls/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/SimplifyLibCalls/exp2.ll b/final/test/Transforms/SimplifyLibCalls/exp2.ll
new file mode 100644
index 00000000000..2f5d910558f
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/exp2.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -simplify-libcalls -S | grep {call.*ldexp} | count 4
+; rdar://5852514
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin8"
+
+define double @t1(i32 %x) nounwind  {
+entry:
+	%tmp12 = sitofp i32 %x to double		; <double> [#uses=1]
+	%exp2 = tail call double @exp2( double %tmp12 )		; <double> [#uses=1]
+	ret double %exp2
+}
+
+define float @t4(i8 zeroext  %x) nounwind  {
+entry:
+	%tmp12 = uitofp i8 %x to float		; <float> [#uses=1]
+	%tmp3 = tail call float @exp2f( float %tmp12 ) nounwind readonly 		; <float> [#uses=1]
+	ret float %tmp3
+}
+
+declare float @exp2f(float) nounwind readonly 
+
+define double @t3(i16 zeroext  %x) nounwind  {
+entry:
+	%tmp12 = uitofp i16 %x to double		; <double> [#uses=1]
+	%exp2 = tail call double @exp2( double %tmp12 )		; <double> [#uses=1]
+	ret double %exp2
+}
+
+define double @t2(i16 signext  %x) nounwind  {
+entry:
+	%tmp12 = sitofp i16 %x to double		; <double> [#uses=1]
+	%exp2 = tail call double @exp2( double %tmp12 )		; <double> [#uses=1]
+	ret double %exp2
+}
+
+declare double @exp2(double)
+
diff --git a/final/test/Transforms/SimplifyLibCalls/floor.ll b/final/test/Transforms/SimplifyLibCalls/floor.ll
new file mode 100644
index 00000000000..8780e32e0a0
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/floor.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -simplify-libcalls -S > %t
+; RUN: not grep {call.*floor(} %t
+; RUN: grep {call.*floorf(} %t
+; RUN: not grep {call.*ceil(} %t
+; RUN: grep {call.*ceilf(} %t
+; RUN: not grep {call.*nearbyint(} %t
+; RUN: grep {call.*nearbyintf(} %t
+; XFAIL: sparc
+
+declare double @floor(double)
+
+declare double @ceil(double)
+
+declare double @nearbyint(double)
+
+define float @test_floor(float %C) {
+	%D = fpext float %C to double		; <double> [#uses=1]
+        ; --> floorf
+	%E = call double @floor( double %D )		; <double> [#uses=1]
+	%F = fptrunc double %E to float		; <float> [#uses=1]
+	ret float %F
+}
+
+define float @test_ceil(float %C) {
+	%D = fpext float %C to double		; <double> [#uses=1]
+	; --> ceilf
+        %E = call double @ceil( double %D )		; <double> [#uses=1]
+	%F = fptrunc double %E to float		; <float> [#uses=1]
+	ret float %F
+}
+
+; PR8466
+; XFAIL: win32
+define float @test_nearbyint(float %C) {
+	%D = fpext float %C to double		; <double> [#uses=1]
+	; --> nearbyintf
+        %E = call double @nearbyint( double %D )		; <double> [#uses=1]
+	%F = fptrunc double %E to float		; <float> [#uses=1]
+	ret float %F
+}
+
diff --git a/final/test/Transforms/SimplifyLibCalls/iprintf.ll b/final/test/Transforms/SimplifyLibCalls/iprintf.ll
new file mode 100644
index 00000000000..7f036fe3ab8
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/iprintf.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -simplify-libcalls -S -o %t
+; RUN: FileCheck < %t %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32"
+target triple = "xcore-xmos-elf"
+
+@.str = internal constant [4 x i8] c"%f\0A\00"		; <[4 x i8]*> [#uses=1]
+@.str1 = internal constant [4 x i8] c"%d\0A\00"		; <[4 x i8]*> [#uses=1]
+
+; Verify printf with no floating point arguments is transformed to iprintf
+define i32 @f0(i32 %x) nounwind {
+entry:
+; CHECK: define i32 @f0
+; CHECK: @iprintf
+; CHECK: }
+	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str1, i32 0, i32 0), i32 %x)		; <i32> [#uses=0]
+	ret i32 %0
+}
+
+; Verify we don't turn this into an iprintf call
+define void @f1(double %x) nounwind {
+entry:
+; CHECK: define void @f1
+; CHECK: @printf
+; CHECK: }
+	%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), double %x) nounwind		; <i32> [#uses=0]
+	ret void
+}
+
+; Verify sprintf with no floating point arguments is transformed to siprintf
+define i32 @f2(i8* %p, i32 %x) nounwind {
+entry:
+; CHECK: define i32 @f2
+; CHECK: @siprintf
+; CHECK: }
+	%0 = tail call i32 (i8*, i8*, ...)* @sprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str1, i32 0, i32 0), i32 %x)
+	ret i32 %0
+}
+
+; Verify we don't turn this into an siprintf call
+define i32 @f3(i8* %p, double %x) nounwind {
+entry:
+; CHECK: define i32 @f3
+; CHECK: @sprintf
+; CHECK: }
+	%0 = tail call i32 (i8*, i8*, ...)* @sprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), double %x)
+	ret i32 %0
+}
+
+; Verify fprintf with no floating point arguments is transformed to fiprintf
+define i32 @f4(i8* %p, i32 %x) nounwind {
+entry:
+; CHECK: define i32 @f4
+; CHECK: @fiprintf
+; CHECK: }
+	%0 = tail call i32 (i8*, i8*, ...)* @fprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str1, i32 0, i32 0), i32 %x)
+	ret i32 %0
+}
+
+; Verify we don't turn this into an fiprintf call
+define i32 @f5(i8* %p, double %x) nounwind {
+entry:
+; CHECK: define i32 @f5
+; CHECK: @fprintf
+; CHECK: }
+	%0 = tail call i32 (i8*, i8*, ...)* @fprintf(i8 *%p, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), double %x)
+	ret i32 %0
+}
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+declare i32 @sprintf(i8* nocapture, i8* nocapture, ...) nounwind
+declare i32 @fprintf(i8* nocapture, i8* nocapture, ...) nounwind
diff --git a/final/test/Transforms/SimplifyLibCalls/memcmp.ll b/final/test/Transforms/SimplifyLibCalls/memcmp.ll
new file mode 100644
index 00000000000..ee99501bc0d
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/memcmp.ll
@@ -0,0 +1,35 @@
+; Test that the memcmpOptimizer works correctly
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+@h = constant [2 x i8] c"h\00"		; <[2 x i8]*> [#uses=0]
+@hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=0]
+@hello_u = constant [8 x i8] c"hello_u\00"		; <[8 x i8]*> [#uses=0]
+
+declare i32 @memcmp(i8*, i8*, i32)
+
+define void @test(i8* %P, i8* %Q, i32 %N, i32* %IP, i1* %BP) {
+	%A = call i32 @memcmp( i8* %P, i8* %P, i32 %N )		; <i32> [#uses=1]
+; CHECK-NOT: call {{.*}} memcmp
+; CHECK: volatile store
+	volatile store i32 %A, i32* %IP
+	%B = call i32 @memcmp( i8* %P, i8* %Q, i32 0 )		; <i32> [#uses=1]
+; CHECK-NOT: call {{.*}} memcmp
+; CHECK: volatile store
+	volatile store i32 %B, i32* %IP
+	%C = call i32 @memcmp( i8* %P, i8* %Q, i32 1 )		; <i32> [#uses=1]
+; CHECK: load
+; CHECK: zext
+; CHECK: load
+; CHECK: zext
+; CHECK: sub
+; CHECK: volatile store
+	volatile store i32 %C, i32* %IP
+        %F = call i32 @memcmp(i8* getelementptr ([4 x i8]* @hel, i32 0, i32 0),
+                              i8* getelementptr ([8 x i8]* @hello_u, i32 0, i32 0),
+                              i32 3)
+; CHECK-NOT: call {{.*}} memcmp
+; CHECK: volatile store
+        volatile store i32 %F, i32* %IP
+	ret void
+}
+
diff --git a/final/test/Transforms/SimplifyLibCalls/memmove.ll b/final/test/Transforms/SimplifyLibCalls/memmove.ll
new file mode 100644
index 00000000000..c0c00506cdd
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/memmove.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -simplify-libcalls -S | grep {llvm.memmove}
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define i8* @test(i8* %a, i8* %b, i32 %x) {
+entry:
+	%call = call i8* @memmove(i8* %a, i8* %b, i32 %x )
+	ret i8* %call
+}
+
+declare i8* @memmove(i8*,i8*,i32)
+
diff --git a/final/test/Transforms/SimplifyLibCalls/memset-64.ll b/final/test/Transforms/SimplifyLibCalls/memset-64.ll
new file mode 100644
index 00000000000..fb752c4083a
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/memset-64.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -simplify-libcalls -S | grep {llvm.memset}
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-pc-linux-gnu"
+
+define void @a(i8* %x) nounwind {
+entry:
+	%call = call i8* @memset(i8* %x, i32 1, i64 100)		; <i8*> [#uses=0]
+	ret void
+}
+
+declare i8* @memset(i8*, i32, i64)
+
diff --git a/final/test/Transforms/SimplifyLibCalls/memset.ll b/final/test/Transforms/SimplifyLibCalls/memset.ll
new file mode 100644
index 00000000000..0aede064cac
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/memset.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -simplify-libcalls -S | grep {llvm.memset}
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i686-pc-linux-gnu"
+
+define i8* @test(i8* %a, i32 %b, i32 %x) {
+entry:
+	%call = call i8* @memset(i8* %a, i32 %b, i32 %x )
+	ret i8* %call
+}
+
+declare i8* @memset(i8*,i32,i32)
+
diff --git a/final/test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll b/final/test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll
new file mode 100644
index 00000000000..669b414531a
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/pow-to-sqrt.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+; rdar://7251832
+
+; SimplifyLibcalls should optimize pow(x, 0.5) to sqrt plus code to handle
+; special cases. The readonly attribute on the call should be preserved.
+
+; CHECK: define float @foo(float %x) nounwind {
+; CHECK:   %sqrtf = call float @sqrtf(float %x) nounwind readonly
+; CHECK:   %fabsf = call float @fabsf(float %sqrtf) nounwind readonly
+; CHECK:   %tmp = fcmp oeq float %x, 0xFFF0000000000000
+; CHECK:   %tmp1 = select i1 %tmp, float 0x7FF0000000000000, float %fabsf
+; CHECK:   ret float %tmp1
+
+define float @foo(float %x) nounwind {
+  %retval = call float @powf(float %x, float 0.5)
+  ret float %retval
+}
+
+; CHECK: define double @doo(double %x) nounwind {
+; CHECK:   %sqrt = call double @sqrt(double %x) nounwind readonly
+; CHECK:   %fabs = call double @fabs(double %sqrt) nounwind readonly
+; CHECK:   %tmp = fcmp oeq double %x, 0xFFF0000000000000
+; CHECK:   %tmp1 = select i1 %tmp, double 0x7FF0000000000000, double %fabs
+; CHECK:   ret double %tmp1
+; CHECK: }
+
+define double @doo(double %x) nounwind {
+  %retval = call double @pow(double %x, double 0.5)
+  ret double %retval
+}
+
+declare float @powf(float, float) nounwind readonly
+declare double @pow(double, double) nounwind readonly
diff --git a/final/test/Transforms/SimplifyLibCalls/pow2.ll b/final/test/Transforms/SimplifyLibCalls/pow2.ll
new file mode 100644
index 00000000000..f8364f740b2
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/pow2.ll
@@ -0,0 +1,37 @@
+; Testcase for calls to the standard C "pow" function
+;
+; RUN: opt < %s -simplify-libcalls -S | not grep {call .pow}
+
+
+declare double @pow(double, double)
+declare float @powf(float, float)
+
+define double @test1(double %X) {
+	%Y = call double @pow( double %X, double 0.000000e+00 )		; <double> [#uses=1]
+	ret double %Y
+}
+
+define double @test2(double %X) {
+	%Y = call double @pow( double %X, double -0.000000e+00 )		; <double> [#uses=1]
+	ret double %Y
+}
+
+define double @test3(double %X) {
+	%Y = call double @pow( double 1.000000e+00, double %X )		; <double> [#uses=1]
+	ret double %Y
+}
+
+define double @test4(double %X) {
+	%Y = call double @pow( double %X, double 2.0)
+	ret double %Y
+}
+
+define float @test4f(float %X) {
+	%Y = call float @powf( float %X, float 2.0)
+	ret float %Y
+}
+
+define float @test5f(float %X) {
+	%Y = call float @powf(float 2.0, float %X)  ;; exp2
+	ret float %Y
+}
diff --git a/final/test/Transforms/SimplifyLibCalls/weak-symbols.ll b/final/test/Transforms/SimplifyLibCalls/weak-symbols.ll
new file mode 100644
index 00000000000..5875b211f77
--- /dev/null
+++ b/final/test/Transforms/SimplifyLibCalls/weak-symbols.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+; PR4738
+
+; SimplifyLibcalls shouldn't assume anything about weak symbols.
+
+@real_init = weak_odr constant [2 x i8] c"y\00"
+@fake_init = weak constant [2 x i8] c"y\00"
+@.str = private constant [2 x i8] c"y\00"
+
+; CHECK: define i32 @foo
+; CHECK: call i32 @strcmp
+define i32 @foo() nounwind {
+entry:
+  %t0 = call i32 @strcmp(i8* getelementptr inbounds ([2 x i8]* @fake_init, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0)) nounwind readonly
+  ret i32 %t0
+}
+
+; CHECK: define i32 @bar
+; CHECK: ret i32 0
+define i32 @bar() nounwind {
+entry:
+  %t0 = call i32 @strcmp(i8* getelementptr inbounds ([2 x i8]* @real_init, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0)) nounwind readonly
+  ret i32 %t0
+}
+
+declare i32 @strcmp(i8*, i8*) nounwind readonly
diff --git a/final/test/Transforms/Sink/basic.ll b/final/test/Transforms/Sink/basic.ll
new file mode 100644
index 00000000000..54b7f1369de
--- /dev/null
+++ b/final/test/Transforms/Sink/basic.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -basicaa -sink -S | FileCheck %s
+
+@A = external global i32
+@B = external global i32
+
+; Sink should sink the load past the store (which doesn't overlap) into
+; the block that uses it.
+
+;      CHECK: @foo
+;      CHECK: true:
+; CHECK-NEXT: %l = load i32* @A
+; CHECK-NEXT: ret i32 %l
+
+define i32 @foo(i1 %z) {
+  %l = load i32* @A
+  store i32 0, i32* @B
+  br i1 %z, label %true, label %false
+true:
+  ret i32 %l
+false:
+  ret i32 0
+}
diff --git a/final/test/Transforms/Sink/dg.exp b/final/test/Transforms/Sink/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/Sink/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll b/final/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
new file mode 100644
index 00000000000..69febc35df7
--- /dev/null
+++ b/final/test/Transforms/StripSymbols/2007-01-15-llvm.used.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -strip -S | grep foo | count 2
+; RUN: opt < %s -strip -S | grep bar | count 2
+@llvm.used = appending global [2 x i8*] [ i8* bitcast (i32* @foo to i8*), i8* bitcast (i32 ()* @bar to i8*) ], section "llvm.metadata"		; <[2 x i8*]*> [#uses=0]
+@foo = internal constant i32 41		; <i32*> [#uses=1]
+
+define internal i32 @bar() nounwind  {
+entry:
+	ret i32 42
+}
+
+define i32 @main() nounwind  {
+entry:
+	ret i32 0
+}
+
diff --git a/final/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll b/final/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
new file mode 100644
index 00000000000..f5899d26189
--- /dev/null
+++ b/final/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
@@ -0,0 +1,28 @@
+; RUN: opt -strip-debug < %s | llvm-dis | grep -v llvm.dbg
+
+@x = common global i32 0                          ; <i32*> [#uses=0]
+
+define void @foo() nounwind readnone optsize ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !9, i64 0, metadata !5), !dbg !10
+  ret void, !dbg !11
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.lv.foo = !{!5}
+!llvm.dbg.gv = !{!8}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, void ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"b.c", metadata !"/tmp", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"b.c", metadata !"/tmp", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 524544, metadata !6, metadata !"y", metadata !1, i32 3, metadata !7} ; [ DW_TAG_auto_variable ]
+!6 = metadata !{i32 524299, metadata !0, i32 2, i32 0} ; [ DW_TAG_lexical_block ]
+!7 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!8 = metadata !{i32 524340, i32 0, metadata !1, metadata !"x", metadata !"x", metadata !"", metadata !1, i32 1, metadata !7, i1 false, i1 true, i32* @x} ; [ DW_TAG_variable ]
+!9 = metadata !{i32 0}
+!10 = metadata !{i32 3, i32 0, metadata !6, null}
+!11 = metadata !{i32 4, i32 0, metadata !6, null}
diff --git a/final/test/Transforms/StripSymbols/2010-07-01-DeadDbgInfo.ll b/final/test/Transforms/StripSymbols/2010-07-01-DeadDbgInfo.ll
new file mode 100644
index 00000000000..1df0351c59a
--- /dev/null
+++ b/final/test/Transforms/StripSymbols/2010-07-01-DeadDbgInfo.ll
@@ -0,0 +1,47 @@
+; RUN: opt -strip-dead-debug-info < %s | llvm-dis -o %t.ll
+; RUN: grep -v bar %t.ll
+; RUN: grep -v abcd %t.ll
+
+@xyz = global i32 2                               ; <i32*> [#uses=1]
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+define i32 @fn() nounwind readnone ssp {
+entry:
+  ret i32 0, !dbg !17
+}
+
+define i32 @foo(i32 %i) nounwind readonly ssp {
+entry:
+  tail call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !14), !dbg !19
+  %.0 = load i32* @xyz, align 4                   ; <i32> [#uses=1]
+  ret i32 %.0, !dbg !20
+}
+
+!llvm.dbg.sp = !{!0, !5, !9}
+!llvm.dbg.lv.bar = !{!12}
+!llvm.dbg.lv.foo = !{!14}
+!llvm.dbg.gv = !{!15, !16}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"bar", metadata !"bar", metadata !"", metadata !1, i32 5, metadata !3, i1 true, i1 true, i32 0, i32 0, null, i1 false, i1 true, null} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"g.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 1, metadata !"g.c", metadata !"/tmp/", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{null}
+!5 = metadata !{i32 524334, i32 0, metadata !1, metadata !"fn", metadata !"fn", metadata !"fn", metadata !1, i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 ()* @fn} ; [ DW_TAG_subprogram ]
+!6 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 7, metadata !10, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 true, i32 (i32)* @foo} ; [ DW_TAG_subprogram ]
+!10 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!11 = metadata !{metadata !8, metadata !8}
+!12 = metadata !{i32 524544, metadata !13, metadata !"bb", metadata !1, i32 5, metadata !8} ; [ DW_TAG_auto_variable ]
+!13 = metadata !{i32 524299, metadata !0, i32 5, i32 0} ; [ DW_TAG_lexical_block ]
+!14 = metadata !{i32 524545, metadata !9, metadata !"i", metadata !1, i32 7, metadata !8} ; [ DW_TAG_arg_variable ]
+!15 = metadata !{i32 524340, i32 0, metadata !1, metadata !"abcd", metadata !"abcd", metadata !"", metadata !1, i32 2, metadata !8, i1 true, i1 true, null} ; [ DW_TAG_variable ]
+!16 = metadata !{i32 524340, i32 0, metadata !1, metadata !"xyz", metadata !"xyz", metadata !"", metadata !1, i32 3, metadata !8, i1 false, i1 true, i32* @xyz} ; [ DW_TAG_variable ]
+!17 = metadata !{i32 6, i32 0, metadata !18, null}
+!18 = metadata !{i32 524299, metadata !5, i32 6, i32 0} ; [ DW_TAG_lexical_block ]
+!19 = metadata !{i32 7, i32 0, metadata !9, null}
+!20 = metadata !{i32 10, i32 0, metadata !21, null}
+!21 = metadata !{i32 524299, metadata !9, i32 7, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/final/test/Transforms/StripSymbols/2010-08-25-crash.ll b/final/test/Transforms/StripSymbols/2010-08-25-crash.ll
new file mode 100644
index 00000000000..3965c378227
--- /dev/null
+++ b/final/test/Transforms/StripSymbols/2010-08-25-crash.ll
@@ -0,0 +1,19 @@
+; RUN: opt -strip-dead-debug-info -disable-output %s
+define i32 @foo() nounwind ssp {
+entry:
+  ret i32 0, !dbg !8
+}
+
+!llvm.dbg.sp = !{!0}
+!llvm.dbg.gv = !{!6}
+
+!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"foo", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @foo} ; [ DW_TAG_subprogram ]
+!1 = metadata !{i32 524329, metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW", metadata !2} ; [ DW_TAG_file_type ]
+!2 = metadata !{i32 524305, i32 0, i32 12, metadata !"/tmp/a.c", metadata !"/Volumes/Lalgate/clean/D.CW", metadata !"clang version 2.8 (trunk 112062)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!3 = metadata !{i32 524309, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!4 = metadata !{metadata !5}
+!5 = metadata !{i32 524324, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!6 = metadata !{i32 524340, i32 0, metadata !1, metadata !"i", metadata !"i", metadata !"i", metadata !1, i32 2, metadata !7, i1 true, i1 true, i32 0} ; [ DW_TAG_variable ]
+!7 = metadata !{i32 524326, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !5} ; [ DW_TAG_const_type ]
+!8 = metadata !{i32 3, i32 13, metadata !9, null}
+!9 = metadata !{i32 524299, metadata !0, i32 3, i32 11, metadata !1, i32 0} ; [ DW_TAG_lexical_block ]
diff --git a/final/test/Transforms/StripSymbols/dg.exp b/final/test/Transforms/StripSymbols/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/StripSymbols/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll b/final/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll
new file mode 100644
index 00000000000..06265926fa6
--- /dev/null
+++ b/final/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
+; PR7328
+; PR7506
+define i32 @foo(i32 %x) {
+; CHECK: define i32 @foo
+; CHECK: %accumulator.tr = phi i32 [ 1, %entry ], [ 0, %body ]
+entry:
+  %cond = icmp ugt i32 %x, 0                      ; <i1> [#uses=1]
+  br i1 %cond, label %return, label %body
+
+body:                                             ; preds = %entry
+  %y = add i32 %x, 1                              ; <i32> [#uses=1]
+  %tmp = call i32 @foo(i32 %y)                    ; <i32> [#uses=0]
+; CHECK-NOT: call
+  ret i32 0
+; CHECK: ret i32 %accumulator.tr
+
+return:                                           ; preds = %entry
+  ret i32 1
+}
diff --git a/final/test/Transforms/TailCallElim/accum_recursion.ll b/final/test/Transforms/TailCallElim/accum_recursion.ll
new file mode 100644
index 00000000000..9475f87e8f5
--- /dev/null
+++ b/final/test/Transforms/TailCallElim/accum_recursion.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
+
+define i32 @test1_factorial(i32 %x) {
+entry:
+	%tmp.1 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
+	br i1 %tmp.1, label %then, label %else
+then:		; preds = %entry
+	%tmp.6 = add i32 %x, -1		; <i32> [#uses=1]
+	%tmp.4 = call i32 @test1_factorial( i32 %tmp.6 )		; <i32> [#uses=1]
+	%tmp.7 = mul i32 %tmp.4, %x		; <i32> [#uses=1]
+	ret i32 %tmp.7
+else:		; preds = %entry
+	ret i32 1
+}
+
+; CHECK: define i32 @test1_factorial
+; CHECK: phi i32
+; CHECK-NOT: call i32
+; CHECK: else:
+
+; This is a more aggressive form of accumulator recursion insertion, which 
+; requires noticing that X doesn't change as we perform the tailcall.
+
+define i32 @test2_mul(i32 %x, i32 %y) {
+entry:
+	%tmp.1 = icmp eq i32 %y, 0		; <i1> [#uses=1]
+	br i1 %tmp.1, label %return, label %endif
+endif:		; preds = %entry
+	%tmp.8 = add i32 %y, -1		; <i32> [#uses=1]
+	%tmp.5 = call i32 @test2_mul( i32 %x, i32 %tmp.8 )		; <i32> [#uses=1]
+	%tmp.9 = add i32 %tmp.5, %x		; <i32> [#uses=1]
+	ret i32 %tmp.9
+return:		; preds = %entry
+	ret i32 %x
+}
+
+; CHECK: define i32 @test2_mul
+; CHECK: phi i32
+; CHECK-NOT: call i32
+; CHECK: return:
+
+
+define i64 @test3_fib(i64 %n) nounwind readnone {
+; CHECK: @test3_fib
+entry:
+; CHECK: tailrecurse:
+; CHECK: %accumulator.tr = phi i64 [ %n, %entry ], [ %3, %bb1 ]
+; CHECK: %n.tr = phi i64 [ %n, %entry ], [ %2, %bb1 ]
+  switch i64 %n, label %bb1 [
+; CHECK: switch i64 %n.tr, label %bb1 [
+    i64 0, label %bb2
+    i64 1, label %bb2
+  ]
+
+bb1:
+; CHECK: bb1:
+  %0 = add i64 %n, -1
+; CHECK: %0 = add i64 %n.tr, -1
+  %1 = tail call i64 @test3_fib(i64 %0) nounwind
+; CHECK: %1 = tail call i64 @test3_fib(i64 %0)
+  %2 = add i64 %n, -2
+; CHECK: %2 = add i64 %n.tr, -2
+  %3 = tail call i64 @test3_fib(i64 %2) nounwind
+; CHECK-NOT: tail call i64 @test3_fib
+  %4 = add nsw i64 %3, %1
+; CHECK: add nsw i64 %accumulator.tr, %1
+  ret i64 %4
+; CHECK: br label %tailrecurse
+
+bb2:
+; CHECK: bb2:
+  ret i64 %n
+; CHECK: ret i64 %accumulator.tr
+}
diff --git a/final/test/Transforms/TailCallElim/ackermann.ll b/final/test/Transforms/TailCallElim/ackermann.ll
new file mode 100644
index 00000000000..0c140ad681d
--- /dev/null
+++ b/final/test/Transforms/TailCallElim/ackermann.ll
@@ -0,0 +1,25 @@
+; This function contains two tail calls, which should be eliminated
+; RUN: opt < %s -tailcallelim -stats -disable-output |& grep {2 tailcallelim}
+
+define i32 @Ack(i32 %M.1, i32 %N.1) {
+entry:
+	%tmp.1 = icmp eq i32 %M.1, 0		; <i1> [#uses=1]
+	br i1 %tmp.1, label %then.0, label %endif.0
+then.0:		; preds = %entry
+	%tmp.4 = add i32 %N.1, 1		; <i32> [#uses=1]
+	ret i32 %tmp.4
+endif.0:		; preds = %entry
+	%tmp.6 = icmp eq i32 %N.1, 0		; <i1> [#uses=1]
+	br i1 %tmp.6, label %then.1, label %endif.1
+then.1:		; preds = %endif.0
+	%tmp.10 = add i32 %M.1, -1		; <i32> [#uses=1]
+	%tmp.8 = call i32 @Ack( i32 %tmp.10, i32 1 )		; <i32> [#uses=1]
+	ret i32 %tmp.8
+endif.1:		; preds = %endif.0
+	%tmp.13 = add i32 %M.1, -1		; <i32> [#uses=1]
+	%tmp.17 = add i32 %N.1, -1		; <i32> [#uses=1]
+	%tmp.14 = call i32 @Ack( i32 %M.1, i32 %tmp.17 )		; <i32> [#uses=1]
+	%tmp.11 = call i32 @Ack( i32 %tmp.13, i32 %tmp.14 )		; <i32> [#uses=1]
+	ret i32 %tmp.11
+}
+
diff --git a/final/test/Transforms/TailCallElim/dg.exp b/final/test/Transforms/TailCallElim/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/TailCallElim/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll b/final/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
new file mode 100644
index 00000000000..5cc92e1b173
--- /dev/null
+++ b/final/test/Transforms/TailCallElim/dont-tce-tail-marked-call.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -tailcallelim -S | \
+; RUN:    grep {call i32 @foo}
+
+declare void @bar(i32*)
+
+define i32 @foo(i32 %N) {
+	%A = alloca i32, i32 %N		; <i32*> [#uses=2]
+	store i32 17, i32* %A
+	call void @bar( i32* %A )
+	%X = tail call i32 @foo( i32 %N )		; <i32> [#uses=1]
+	ret i32 %X
+}
+
diff --git a/final/test/Transforms/TailCallElim/dont_reorder_load.ll b/final/test/Transforms/TailCallElim/dont_reorder_load.ll
new file mode 100644
index 00000000000..cc273c3ca53
--- /dev/null
+++ b/final/test/Transforms/TailCallElim/dont_reorder_load.ll
@@ -0,0 +1,64 @@
+; RUN: opt < %s -tailcallelim -S | grep call | count 3
+; PR4323
+
+; Several cases where tail call elimination should not move the load above the
+; call, and thus can't eliminate the tail recursion.
+
+
+@extern_weak_global = extern_weak global i32		; <i32*> [#uses=1]
+
+
+; This load can't be safely moved above the call because the load is from an
+; extern_weak global and may trap, but the call may unwind before that happens.
+define fastcc i32 @no_tailrecelim_1(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+entry:
+	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
+	br i1 %tmp2, label %if, label %else
+
+if:		; preds = %entry
+	unwind
+
+else:		; preds = %entry
+	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+	%tmp8 = call fastcc i32 @no_tailrecelim_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
+	%tmp9 = load i32* @extern_weak_global		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
+
+
+; This load can't be safely moved above the call because function may write to the pointer.
+define fastcc i32 @no_tailrecelim_2(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind {
+entry:
+	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
+	br i1 %tmp2, label %if, label %else
+
+if:		; preds = %entry
+	store i32 1, i32* %a_arg
+        ret i32 0
+
+else:		; preds = %entry
+	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+	%tmp8 = call fastcc i32 @no_tailrecelim_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
+	%tmp9 = load i32* %a_arg		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
+
+; This load can't be safely moved above the call because that would change the
+; order in which the volatile loads are performed.
+define fastcc i32 @no_tailrecelim_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind {
+entry:
+	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
+	br i1 %tmp2, label %if, label %else
+
+if:		; preds = %entry
+        ret i32 0
+
+else:		; preds = %entry
+	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+	%tmp8 = call fastcc i32 @no_tailrecelim_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
+	%tmp9 = volatile load i32* %a_arg		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
diff --git a/final/test/Transforms/TailCallElim/dup_tail.ll b/final/test/Transforms/TailCallElim/dup_tail.ll
new file mode 100644
index 00000000000..93638804f92
--- /dev/null
+++ b/final/test/Transforms/TailCallElim/dup_tail.ll
@@ -0,0 +1,23 @@
+; Duplicate the return into if.end to enable TCE.
+; RUN: opt %s -tailcallelim -stats -disable-output |& grep {Number of return duplicated}
+
+define i32 @fib(i32 %n) nounwind ssp {
+entry:
+  %cmp = icmp slt i32 %n, 2
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  br label %return
+
+if.end:                                           ; preds = %entry
+  %sub = add nsw i32 %n, -2
+  %call = call i32 @fib(i32 %sub)
+  %sub3 = add nsw i32 %n, -1
+  %call4 = call i32 @fib(i32 %sub3)
+  %add = add nsw i32 %call, %call4
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %retval.0 = phi i32 [ 1, %if.then ], [ %add, %if.end ]
+  ret i32 %retval.0
+}
diff --git a/final/test/Transforms/TailCallElim/inf-recursion.ll b/final/test/Transforms/TailCallElim/inf-recursion.ll
new file mode 100644
index 00000000000..e4ac9283aec
--- /dev/null
+++ b/final/test/Transforms/TailCallElim/inf-recursion.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -tailcallelim -S | FileCheck %s
+
+; Don't turn this into an infinite loop, this is probably the implementation
+; of fabs and we expect the codegen to lower fabs.
+; CHECK: @fabs(double %f)
+; CHECK: call
+; CHECK: ret
+
+define double @fabs(double %f) {
+entry:
+        %tmp2 = call double @fabs( double %f )          ; <double> [#uses=1]
+        ret double %tmp2
+}
+
+; Do turn other calls into infinite loops though.
+
+; CHECK: define double @foo
+; CHECK-NOT: call
+; CHECK: }
+define double @foo(double %f) {
+        %t= call double @foo(double %f)
+        ret double %t
+}
+
+; CHECK: define float @fabsf
+; CHECK-NOT: call
+; CHECK: }
+define float @fabsf(float %f) {
+        %t= call float @fabsf(float 2.0)
+        ret float %t
+}
+
+declare float @fabsf(float %f)
+declare x86_fp80 @fabsl(x86_fp80 %f)
diff --git a/final/test/Transforms/TailCallElim/intervening-inst.ll b/final/test/Transforms/TailCallElim/intervening-inst.ll
new file mode 100644
index 00000000000..0c40bd5dc50
--- /dev/null
+++ b/final/test/Transforms/TailCallElim/intervening-inst.ll
@@ -0,0 +1,17 @@
+; This function contains intervening instructions which should be moved out of the way
+; RUN: opt < %s -tailcallelim -S | not grep call
+
+define i32 @Test(i32 %X) {
+entry:
+	%tmp.1 = icmp eq i32 %X, 0		; <i1> [#uses=1]
+	br i1 %tmp.1, label %then.0, label %endif.0
+then.0:		; preds = %entry
+	%tmp.4 = add i32 %X, 1		; <i32> [#uses=1]
+	ret i32 %tmp.4
+endif.0:		; preds = %entry
+	%tmp.10 = add i32 %X, -1		; <i32> [#uses=1]
+	%tmp.8 = call i32 @Test( i32 %tmp.10 )		; <i32> [#uses=1]
+	%DUMMY = add i32 %X, 1		; <i32> [#uses=0]
+	ret i32 %tmp.8
+}
+
diff --git a/final/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll b/final/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
new file mode 100644
index 00000000000..a556ddb6eb1
--- /dev/null
+++ b/final/test/Transforms/TailCallElim/move_alloca_for_tail_call.ll
@@ -0,0 +1,15 @@
+; RUN: opt -tailcallelim %s -S | FileCheck %s
+; PR615
+
+declare void @bar(i32*)
+
+define i32 @foo() {
+; CHECK: i32 @foo()
+; CHECK-NEXT: alloca
+	%A = alloca i32		; <i32*> [#uses=2]
+	store i32 17, i32* %A
+	call void @bar( i32* %A )
+	%X = tail call i32 @foo( )		; <i32> [#uses=1]
+	ret i32 %X
+}
+
diff --git a/final/test/Transforms/TailCallElim/nocapture.ll b/final/test/Transforms/TailCallElim/nocapture.ll
new file mode 100644
index 00000000000..87cb9dd427b
--- /dev/null
+++ b/final/test/Transforms/TailCallElim/nocapture.ll
@@ -0,0 +1,25 @@
+; RUN: opt %s -tailcallelim -S | FileCheck %s
+; XFAIL: *
+
+declare void @use(i8* nocapture, i8* nocapture)
+
+define i8* @foo(i8* nocapture %A, i1 %cond) {
+; CHECK: tailrecurse:
+; CHECK: %A.tr = phi i8* [ %A, %0 ], [ %B, %cond_true ]
+; CHECK: %cond.tr = phi i1 [ %cond, %0 ], [ false, %cond_true ]
+  %B = alloca i8
+; CHECK: %B = alloca i8
+  br i1 %cond, label %cond_true, label %cond_false
+; CHECK: br i1 %cond.tr, label %cond_true, label %cond_false
+cond_true:
+; CHECK: cond_true:
+; CHECK: br label %tailrecurse
+  call i8* @foo(i8* %B, i1 false)
+  ret i8* null
+cond_false:
+; CHECK: cond_false
+  call void @use(i8* %A, i8* %B)
+; CHECK: tail call void @use(i8* %A.tr, i8* %B)
+  ret i8* null
+; CHECK: ret i8* null
+}
diff --git a/final/test/Transforms/TailCallElim/reorder_load.ll b/final/test/Transforms/TailCallElim/reorder_load.ll
new file mode 100644
index 00000000000..7f8af7ea147
--- /dev/null
+++ b/final/test/Transforms/TailCallElim/reorder_load.ll
@@ -0,0 +1,101 @@
+; RUN: opt < %s -tailcallelim -S | not grep call
+; PR4323
+
+; Several cases where tail call elimination should move the load above the call,
+; then eliminate the tail recursion.
+
+
+@global = external global i32		; <i32*> [#uses=1]
+@extern_weak_global = extern_weak global i32		; <i32*> [#uses=1]
+
+
+; This load can be moved above the call because the function won't write to it
+; and the call has no side effects.
+define fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
+entry:
+	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
+	br i1 %tmp2, label %if, label %else
+
+if:		; preds = %entry
+	ret i32 0
+
+else:		; preds = %entry
+	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+	%tmp8 = call fastcc i32 @raise_load_1(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
+	%tmp9 = load i32* %a_arg		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
+
+
+; This load can be moved above the call because the function won't write to it
+; and the load provably can't trap.
+define fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+entry:
+	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
+	br i1 %tmp2, label %if, label %else
+
+if:		; preds = %entry
+	ret i32 0
+
+else:		; preds = %entry
+	%nullcheck = icmp eq i32* %a_arg, null		; <i1> [#uses=1]
+	br i1 %nullcheck, label %unwind, label %recurse
+
+unwind:		; preds = %else
+	unwind
+
+recurse:		; preds = %else
+	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+	%tmp8 = call fastcc i32 @raise_load_2(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
+	%tmp9 = load i32* @global		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
+
+
+; This load can be safely moved above the call (even though it's from an
+; extern_weak global) because the call has no side effects.
+define fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) nounwind readonly {
+entry:
+	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
+	br i1 %tmp2, label %if, label %else
+
+if:		; preds = %entry
+	ret i32 0
+
+else:		; preds = %entry
+	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+	%tmp8 = call fastcc i32 @raise_load_3(i32* %a_arg, i32 %a_len_arg, i32 %tmp7)		; <i32> [#uses=1]
+	%tmp9 = load i32* @extern_weak_global		; <i32> [#uses=1]
+	%tmp10 = add i32 %tmp9, %tmp8		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
+
+
+; The second load can be safely moved above the call even though it's from an
+; unknown pointer (which normally means it might trap) because the first load
+; proves it doesn't trap.
+define fastcc i32 @raise_load_4(i32* %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+entry:
+	%tmp2 = icmp sge i32 %start_arg, %a_len_arg		; <i1> [#uses=1]
+	br i1 %tmp2, label %if, label %else
+
+if:		; preds = %entry
+	ret i32 0
+
+else:		; preds = %entry
+	%nullcheck = icmp eq i32* %a_arg, null		; <i1> [#uses=1]
+	br i1 %nullcheck, label %unwind, label %recurse
+
+unwind:		; preds = %else
+	unwind
+
+recurse:		; preds = %else
+	%tmp7 = add i32 %start_arg, 1		; <i32> [#uses=1]
+	%first = load i32* %a_arg		; <i32> [#uses=1]
+	%tmp8 = call fastcc i32 @raise_load_4(i32* %a_arg, i32 %first, i32 %tmp7)		; <i32> [#uses=1]
+	%second = load i32* %a_arg		; <i32> [#uses=1]
+	%tmp10 = add i32 %second, %tmp8		; <i32> [#uses=1]
+	ret i32 %tmp10
+}
diff --git a/final/test/Transforms/TailCallElim/return_constant.ll b/final/test/Transforms/TailCallElim/return_constant.ll
new file mode 100644
index 00000000000..48e5641bb57
--- /dev/null
+++ b/final/test/Transforms/TailCallElim/return_constant.ll
@@ -0,0 +1,17 @@
+; Though this case seems to be fairly unlikely to occur in the wild, someone
+; plunked it into the demo script, so maybe they care about it.
+;
+; RUN: opt < %s -tailcallelim -S | not grep call
+
+define i32 @aaa(i32 %c) {
+entry:
+	%tmp.1 = icmp eq i32 %c, 0		; <i1> [#uses=1]
+	br i1 %tmp.1, label %return, label %else
+else:		; preds = %entry
+	%tmp.5 = add i32 %c, -1		; <i32> [#uses=1]
+	%tmp.3 = call i32 @aaa( i32 %tmp.5 )		; <i32> [#uses=0]
+	ret i32 0
+return:		; preds = %entry
+	ret i32 0
+}
+
diff --git a/final/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll b/final/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
new file mode 100644
index 00000000000..3dddb013f7c
--- /dev/null
+++ b/final/test/Transforms/TailCallElim/trivial_codegen_tailcall.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -tailcallelim -S | \
+; RUN:    grep {tail call void @foo}
+
+
+declare void @foo()
+
+define void @bar() {
+	call void @foo( )
+	ret void
+}
+
diff --git a/final/test/Transforms/TailDup/2003-06-24-Simpleloop.ll b/final/test/Transforms/TailDup/2003-06-24-Simpleloop.ll
new file mode 100644
index 00000000000..d7e45af5ece
--- /dev/null
+++ b/final/test/Transforms/TailDup/2003-06-24-Simpleloop.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -tailduplicate -disable-output
+
+define void @motion_result7() {
+entry:
+	br label %endif
+endif:		; preds = %no_exit, %entry
+	%i.1 = phi i32 [ %inc, %no_exit ], [ 0, %entry ]		; <i32> [#uses=1]
+	%inc = add i32 %i.1, 1		; <i32> [#uses=1]
+	br i1 false, label %no_exit, label %UnifiedExitNode
+no_exit:		; preds = %endif
+	br i1 false, label %UnifiedExitNode, label %endif
+UnifiedExitNode:		; preds = %no_exit, %endif
+	ret void
+}
+
diff --git a/final/test/Transforms/TailDup/2003-07-22-InfiniteLoop.ll b/final/test/Transforms/TailDup/2003-07-22-InfiniteLoop.ll
new file mode 100644
index 00000000000..90f49909e5c
--- /dev/null
+++ b/final/test/Transforms/TailDup/2003-07-22-InfiniteLoop.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -tailduplicate -disable-output
+
+define i32 @sum() {
+entry:
+	br label %loopentry
+loopentry:		; preds = %loopentry, %entry
+	%i.0 = phi i32 [ 1, %entry ], [ %tmp.3, %loopentry ]		; <i32> [#uses=1]
+	%tmp.3 = add i32 %i.0, 1		; <i32> [#uses=1]
+	br label %loopentry
+}
+
diff --git a/final/test/Transforms/TailDup/2003-08-23-InvalidatedPointers.ll b/final/test/Transforms/TailDup/2003-08-23-InvalidatedPointers.ll
new file mode 100644
index 00000000000..efe9eaed7eb
--- /dev/null
+++ b/final/test/Transforms/TailDup/2003-08-23-InvalidatedPointers.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -tailduplicate -disable-output
+
+define i32 @sell_haggle() {
+entry:
+	br i1 false, label %then.5, label %UnifiedExitNode
+then.5:		; preds = %entry
+	br i1 false, label %loopentry.1.preheader, label %else.1
+else.1:		; preds = %then.5
+	br label %loopentry.1.preheader
+loopentry.1.preheader:		; preds = %else.1, %then.5
+	%final_ask.0 = phi i32 [ 0, %else.1 ], [ 0, %then.5 ]		; <i32> [#uses=2]
+	br label %loopentry.1
+loopentry.1:		; preds = %endif.17, %loopentry.1.preheader
+	switch i32 0, label %UnifiedExitNode [
+		 i32 2, label %UnifiedExitNode
+		 i32 1, label %endif.16
+	]
+endif.16:		; preds = %loopentry.1
+	br i1 false, label %then.17, label %UnifiedExitNode
+then.17:		; preds = %endif.16
+	br i1 false, label %then.18, label %endif.17
+then.18:		; preds = %then.17
+	br i1 false, label %endif.17, label %UnifiedExitNode
+endif.17:		; preds = %then.18, %then.17
+	%cur_ask.3 = phi i32 [ %final_ask.0, %then.17 ], [ %final_ask.0, %then.18 ]		; <i32> [#uses=0]
+	br i1 false, label %loopentry.1, label %UnifiedExitNode
+UnifiedExitNode:		; preds = %endif.17, %then.18, %endif.16, %loopentry.1, %loopentry.1, %entry
+	ret i32 0
+}
diff --git a/final/test/Transforms/TailDup/2003-08-31-UnreachableBlocks.ll b/final/test/Transforms/TailDup/2003-08-31-UnreachableBlocks.ll
new file mode 100644
index 00000000000..dc6492353b7
--- /dev/null
+++ b/final/test/Transforms/TailDup/2003-08-31-UnreachableBlocks.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -tailduplicate -disable-output
+
+define i32 @foo() {
+entry:
+	br label %return.i
+after_ret.i:		; No predecessors!
+	br label %return.i
+return.i:		; preds = %after_ret.i, %entry
+	%tmp.3 = ptrtoint i32* null to i32		; <i32> [#uses=1]
+	br label %return.i1
+after_ret.i1:		; No predecessors!
+	br label %return.i1
+return.i1:		; preds = %after_ret.i1, %return.i
+	%tmp.8 = sub i32 %tmp.3, 0		; <i32> [#uses=0]
+	ret i32 0
+}
+
diff --git a/final/test/Transforms/TailDup/2004-04-01-DemoteRegToStack.ll b/final/test/Transforms/TailDup/2004-04-01-DemoteRegToStack.ll
new file mode 100644
index 00000000000..c1e5f738a7d
--- /dev/null
+++ b/final/test/Transforms/TailDup/2004-04-01-DemoteRegToStack.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -tailduplicate -disable-output
+
+define void @interpret() {
+entry:
+	br label %retry
+retry:		; preds = %endif.4, %entry
+	%tmp.8 = call i32 @interp( )		; <i32> [#uses=3]
+	switch i32 0, label %endif.4 [
+		 i32 -25, label %return
+		 i32 -16, label %return
+	]
+endif.4:		; preds = %retry
+	br i1 false, label %return, label %retry
+return:		; preds = %endif.4, %retry, %retry
+	%result.0 = phi i32 [ %tmp.8, %retry ], [ %tmp.8, %retry ], [ %tmp.8, %endif.4 ]		; <i32> [#uses=0]
+	ret void
+}
+
+declare i32 @interp()
+
diff --git a/final/test/Transforms/TailDup/2008-05-13-InfiniteLoop.ll b/final/test/Transforms/TailDup/2008-05-13-InfiniteLoop.ll
new file mode 100644
index 00000000000..3e4f0b78748
--- /dev/null
+++ b/final/test/Transforms/TailDup/2008-05-13-InfiniteLoop.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -tailduplicate | llc
+; PR2323
+
+define i32 @func_27(i32 %p_28) nounwind  {
+entry:
+  %tmp125 = trunc i32 %p_28 to i8   ; <i8> [#uses=1]
+  %tmp5.i = icmp eq i8 %tmp125, 0   ; <i1> [#uses=1]
+  br i1 %tmp5.i, label %bb8.i, label %bb.i
+
+bb.i:   ; preds = %entry
+  br label %bb39.i
+
+bb8.i:    ; preds = %entry
+  br label %bb11.i
+
+bb11.i:   ; preds = %bb39.i, %bb8.i
+  %tmp126 = trunc i32 %p_28 to i8   ; <i8> [#uses=1]
+  br label %bb39.i
+
+bb39.i:   ; preds = %bb11.i, %bb.i
+  %tmp127 = trunc i32 %p_28 to i8   ; <i8> [#uses=1]
+  br label %bb11.i
+
+func_29.exit:   ; No predecessors!
+  ret i32 undef
+}
diff --git a/final/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll b/final/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
new file mode 100644
index 00000000000..03e99bc9bf6
--- /dev/null
+++ b/final/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output |& not grep tailduplicate
+; XFAIL: *
+
+define i32 @foo(i32 %l) nounwind  {
+entry:
+	%cond = icmp eq i32 %l, 1		; <i1> [#uses=1]
+	br i1 %cond, label %bb, label %bb9
+
+bb:		; preds = %entry
+	br label %bb9
+
+bb5:		; preds = %bb9
+	%tmp7 = call i32 (...)* @bar( i32 %x.0 ) nounwind 		; <i32> [#uses=1]
+	br label %bb9
+
+bb9:		; preds = %bb5, %bb, %entry
+	%x.0 = phi i32 [ 0, %entry ], [ %tmp7, %bb5 ], [ 1525, %bb ]		; <i32> [#uses=2]
+	%l_addr.0 = phi i32 [ %l, %entry ], [ %tmp11, %bb5 ], [ %l, %bb ]		; <i32> [#uses=1]
+	%tmp11 = add i32 %l_addr.0, -1		; <i32> [#uses=2]
+	%tmp13 = icmp eq i32 %tmp11, -1		; <i1> [#uses=1]
+	br i1 %tmp13, label %bb15, label %bb5
+
+bb15:		; preds = %bb9
+	ret i32 %x.0
+}
+
+declare i32 @bar(...)
diff --git a/final/test/Transforms/TailDup/2009-07-31-phicrash.ll b/final/test/Transforms/TailDup/2009-07-31-phicrash.ll
new file mode 100644
index 00000000000..ad1a0404761
--- /dev/null
+++ b/final/test/Transforms/TailDup/2009-07-31-phicrash.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -tailduplicate -disable-output
+; PR4662
+
+define void @a() {
+BB:
+	br label %BB6
+
+BB6:
+	%tmp9 = phi i64 [ 0, %BB ], [ 5, %BB34 ]
+	br label %BB34
+
+BB34:
+	br label %BB6
+}
diff --git a/final/test/Transforms/TailDup/MergeTest.ll b/final/test/Transforms/TailDup/MergeTest.ll
new file mode 100644
index 00000000000..2224283d8ee
--- /dev/null
+++ b/final/test/Transforms/TailDup/MergeTest.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -tailduplicate -taildup-threshold=2 -S | grep add | not grep uses=1
+
+define i32 @test1(i1 %C, i32 %A, i32* %P) {
+entry:
+	br i1 %C, label %L1, label %L2
+L1:		; preds = %entry
+	store i32 1, i32* %P
+	br label %L2
+L2:		; preds = %L1, %entry
+	%X = add i32 %A, 17		; <i32> [#uses=1]
+	ret i32 %X
+}
+
+define i32 @test2(i1 %C, i32 %A, i32* %P) {
+entry:
+	br i1 %C, label %L1, label %L2
+L1:		; preds = %entry
+	store i32 1, i32* %P
+	br label %L3
+L2:		; preds = %entry
+	store i32 7, i32* %P
+	br label %L3
+L3:		; preds = %L2, %L1
+	%X = add i32 %A, 17		; <i32> [#uses=1]
+	ret i32 %X
+}
+
diff --git a/final/test/Transforms/TailDup/PHIUpdateTest.ll b/final/test/Transforms/TailDup/PHIUpdateTest.ll
new file mode 100644
index 00000000000..38d8ebfcce5
--- /dev/null
+++ b/final/test/Transforms/TailDup/PHIUpdateTest.ll
@@ -0,0 +1,16 @@
+; This test checks to make sure phi nodes are updated properly
+;
+; RUN: opt < %s -tailduplicate -disable-output
+
+define i32 @test(i1 %c, i32 %X, i32 %Y) {
+	br label %L
+L:		; preds = %F, %0
+	%A = add i32 %X, %Y		; <i32> [#uses=1]
+	br i1 %c, label %T, label %F
+F:		; preds = %L
+	br i1 %c, label %L, label %T
+T:		; preds = %F, %L
+	%V = phi i32 [ %A, %L ], [ 0, %F ]		; <i32> [#uses=1]
+	ret i32 %V
+}
+
diff --git a/final/test/Transforms/TailDup/basictest.ll b/final/test/Transforms/TailDup/basictest.ll
new file mode 100644
index 00000000000..94f5d87ad2b
--- /dev/null
+++ b/final/test/Transforms/TailDup/basictest.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -tailduplicate -disable-output
+
+declare void @__main()
+
+define i32 @main() {
+entry:
+	call void @__main( )
+	br label %loopentry
+loopentry:		; preds = %no_exit, %entry
+	%i.0 = phi i32 [ %inc, %no_exit ], [ 0, %entry ]		; <i32> [#uses=3]
+	%tmp.1 = icmp sle i32 %i.0, 99		; <i1> [#uses=1]
+	br i1 %tmp.1, label %no_exit, label %return
+no_exit:		; preds = %loopentry
+	%tmp.51 = call i32 @main( )		; <i32> [#uses=0]
+	%inc = add i32 %i.0, 1		; <i32> [#uses=1]
+	br label %loopentry
+return:		; preds = %loopentry
+	ret i32 %i.0
+}
+
diff --git a/final/test/Transforms/TailDup/basictest2.ll b/final/test/Transforms/TailDup/basictest2.ll
new file mode 100644
index 00000000000..81a996adfee
--- /dev/null
+++ b/final/test/Transforms/TailDup/basictest2.ll
@@ -0,0 +1,15 @@
+; RUN: opt < %s -tailduplicate -disable-output
+
+define void @ab() {
+entry:
+	br label %loopentry.5
+loopentry.5:		; preds = %no_exit.5, %entry
+	%poscnt.1 = phi i64 [ 0, %entry ], [ %tmp.289, %no_exit.5 ]		; <i64> [#uses=1]
+	%tmp.289 = ashr i64 %poscnt.1, 1		; <i64> [#uses=1]
+	br i1 false, label %no_exit.5, label %loopexit.5
+no_exit.5:		; preds = %loopentry.5
+	br label %loopentry.5
+loopexit.5:		; preds = %loopentry.5
+	ret void
+}
+
diff --git a/final/test/Transforms/TailDup/dg.exp b/final/test/Transforms/TailDup/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Transforms/TailDup/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Transforms/TailDup/if-tail-dup.ll b/final/test/Transforms/TailDup/if-tail-dup.ll
new file mode 100644
index 00000000000..2e4f5be38d1
--- /dev/null
+++ b/final/test/Transforms/TailDup/if-tail-dup.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -tailduplicate | \
+; RUN:   llc -march=x86 -o %t
+; RUN: grep {\\\<je\\\>} %t
+; RUN: not grep jmp %t
+; END.
+; This should have no unconditional jumps in it.  The C source is:
+
+;void foo(int c, int* P) {
+;  if (c & 1)  P[0] = 1;
+;  if (c & 2)  P[1] = 1;
+;  if (c & 4)  P[2] = 1;
+;  if (c & 8)  P[3] = 1;
+;}
+
+define void @foo(i32 %c, i32* %P) {
+entry:
+	%tmp1 = and i32 %c, 1		; <i32> [#uses=1]
+	%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true
+cond_true:		; preds = %entry
+	store i32 1, i32* %P
+	br label %cond_next
+cond_next:		; preds = %cond_true, %entry
+	%tmp5 = and i32 %c, 2		; <i32> [#uses=1]
+	%tmp5.upgrd.2 = icmp eq i32 %tmp5, 0		; <i1> [#uses=1]
+	br i1 %tmp5.upgrd.2, label %cond_next10, label %cond_true6
+cond_true6:		; preds = %cond_next
+	%tmp8 = getelementptr i32* %P, i32 1		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp8
+	br label %cond_next10
+cond_next10:		; preds = %cond_true6, %cond_next
+	%tmp13 = and i32 %c, 4		; <i32> [#uses=1]
+	%tmp13.upgrd.3 = icmp eq i32 %tmp13, 0		; <i1> [#uses=1]
+	br i1 %tmp13.upgrd.3, label %cond_next18, label %cond_true14
+cond_true14:		; preds = %cond_next10
+	%tmp16 = getelementptr i32* %P, i32 2		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp16
+	br label %cond_next18
+cond_next18:		; preds = %cond_true14, %cond_next10
+	%tmp21 = and i32 %c, 8		; <i32> [#uses=1]
+	%tmp21.upgrd.4 = icmp eq i32 %tmp21, 0		; <i1> [#uses=1]
+	br i1 %tmp21.upgrd.4, label %return, label %cond_true22
+cond_true22:		; preds = %cond_next18
+	%tmp24 = getelementptr i32* %P, i32 3		; <i32*> [#uses=1]
+	store i32 1, i32* %tmp24
+	ret void
+return:		; preds = %cond_next18
+	ret void
+}
diff --git a/final/test/Unit/lit.cfg b/final/test/Unit/lit.cfg
new file mode 100644
index 00000000000..35092964a5c
--- /dev/null
+++ b/final/test/Unit/lit.cfg
@@ -0,0 +1,83 @@
+# -*- Python -*-
+
+# Configuration file for the 'lit' test runner.
+
+import os
+
+# name: The name of this test suite.
+config.name = 'LLVM-Unit'
+
+# suffixes: A list of file extensions to treat as test files.
+config.suffixes = []
+
+# test_source_root: The root path where tests are located.
+# test_exec_root: The root path where tests should be run.
+llvm_obj_root = getattr(config, 'llvm_obj_root', None)
+if llvm_obj_root is not None:
+    config.test_exec_root = os.path.join(llvm_obj_root, 'unittests')
+    config.test_source_root = config.test_exec_root
+
+# testFormat: The test format to use to interpret tests.
+llvm_build_mode = getattr(config, 'llvm_build_mode', "Debug")
+config.test_format = lit.formats.GoogleTest(llvm_build_mode, 'Tests')
+
+# Propagate the temp directory. Windows requires this because it uses \Windows\
+# if none of these are present.
+if 'TMP' in os.environ:
+    config.environment['TMP'] = os.environ['TMP']
+if 'TEMP' in os.environ:
+    config.environment['TEMP'] = os.environ['TEMP']
+
+###
+
+# If necessary, point the dynamic loader at libLLVM.so.
+if config.enable_shared:
+    shlibpath = config.environment.get(config.shlibpath_var,'')
+    if shlibpath:
+        shlibpath = os.pathsep + shlibpath
+    shlibpath = config.shlibdir + shlibpath
+    config.environment[config.shlibpath_var] = shlibpath
+
+# Check that the object root is known.
+if config.test_exec_root is None:
+    # Otherwise, we haven't loaded the site specific configuration (the user is
+    # probably trying to run on a test file directly, and either the site
+    # configuration hasn't been created by the build system, or we are in an
+    # out-of-tree build situation).
+
+    # Check for 'llvm_unit_site_config' user parameter, and use that if available.
+    site_cfg = lit.params.get('llvm_unit_site_config', None)
+    if site_cfg and os.path.exists(site_cfg):
+        lit.load_config(config, site_cfg)
+        raise SystemExit
+
+    # Try to detect the situation where we are using an out-of-tree build by
+    # looking for 'llvm-config'.
+    #
+    # FIXME: I debated (i.e., wrote and threw away) adding logic to
+    # automagically generate the lit.site.cfg if we are in some kind of fresh
+    # build situation. This means knowing how to invoke the build system
+    # though, and I decided it was too much magic.
+
+    llvm_config = lit.util.which('llvm-config', config.environment['PATH'])
+    if not llvm_config:
+        lit.fatal('No site specific configuration available!')
+
+    # Get the source and object roots.
+    llvm_src_root = lit.util.capture(['llvm-config', '--src-root']).strip()
+    llvm_obj_root = lit.util.capture(['llvm-config', '--obj-root']).strip()
+
+    # Validate that we got a tree which points to here.
+    this_src_root = os.path.join(os.path.dirname(__file__),'..','..')
+    if os.path.realpath(llvm_src_root) != os.path.realpath(this_src_root):
+        lit.fatal('No site specific configuration available!')
+
+    # Check that the site specific configuration exists.
+    site_cfg = os.path.join(llvm_obj_root, 'test', 'Unit', 'lit.site.cfg')
+    if not os.path.exists(site_cfg):
+        lit.fatal('No site specific configuration available!')
+
+    # Okay, that worked. Notify the user of the automagic, and reconfigure.
+    lit.note('using out-of-tree build at %r' % llvm_obj_root)
+    lit.load_config(config, site_cfg)
+    raise SystemExit
diff --git a/final/test/Unit/lit.site.cfg.in b/final/test/Unit/lit.site.cfg.in
new file mode 100644
index 00000000000..9643507b164
--- /dev/null
+++ b/final/test/Unit/lit.site.cfg.in
@@ -0,0 +1,22 @@
+## Autogenerated by LLVM/Clang configuration.
+# Do not edit!
+config.llvm_src_root = "@LLVM_SOURCE_DIR@"
+config.llvm_obj_root = "@LLVM_BINARY_DIR@"
+config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
+config.llvmgcc_dir = "@LLVMGCCDIR@"
+config.llvm_build_mode = "@LLVM_BUILD_MODE@"
+config.enable_shared = @ENABLE_SHARED@
+config.shlibdir = "@SHLIBDIR@"
+config.shlibpath_var = "@SHLIBPATH_VAR@"
+
+# Support substitution of the tools_dir and build_mode with user parameters.
+# This is used when we can't determine the tool dir at configuration time.
+try:
+    config.llvm_tools_dir = config.llvm_tools_dir % lit.params
+    config.llvm_build_mode = config.llvm_build_mode % lit.params
+except KeyError,e:
+    key, = e.args
+    lit.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
+
+# Let the main config do the real work.
+lit.load_config(config, "@LLVM_SOURCE_DIR@/test/Unit/lit.cfg")
diff --git a/final/test/Verifier/2002-04-13-RetTypes.ll b/final/test/Verifier/2002-04-13-RetTypes.ll
new file mode 100644
index 00000000000..197f5c24fc4
--- /dev/null
+++ b/final/test/Verifier/2002-04-13-RetTypes.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s |& grep {return type does not match operand type}
+
+; Verify the the operand type of the ret instructions in a function match the
+; delcared return type of the function they live in.
+;
+
+define i32 @testfunc()
+begin
+	ret i32* null
+end
diff --git a/final/test/Verifier/2002-11-05-GetelementptrPointers.ll b/final/test/Verifier/2002-11-05-GetelementptrPointers.ll
new file mode 100644
index 00000000000..1f71387ab3a
--- /dev/null
+++ b/final/test/Verifier/2002-11-05-GetelementptrPointers.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-as < %s |& grep {invalid getelementptr indices}
+
+; This testcase is invalid because we are indexing into a pointer that is 
+; contained WITHIN a structure.
+
+define void @test({i32, i32*} * %X) {
+	getelementptr {i32, i32*} * %X, i32 0, i32 1, i32 0
+	ret void
+}
diff --git a/final/test/Verifier/2004-05-21-SwitchConstantMismatch.ll b/final/test/Verifier/2004-05-21-SwitchConstantMismatch.ll
new file mode 100644
index 00000000000..339a21cac19
--- /dev/null
+++ b/final/test/Verifier/2004-05-21-SwitchConstantMismatch.ll
@@ -0,0 +1,13 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+
+
+int %main() {  
+start1:
+  switch uint 0, label %brt0 [int 3, label %brt1  ]
+brt0:
+  ret int 0
+brt1:
+  ret int 0
+}
+
diff --git a/final/test/Verifier/2005-03-21-UndefinedTypeReference.ll b/final/test/Verifier/2005-03-21-UndefinedTypeReference.ll
new file mode 100644
index 00000000000..5299397ab06
--- /dev/null
+++ b/final/test/Verifier/2005-03-21-UndefinedTypeReference.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s |& grep {use of undefined type named 'InvalidType'}
+
+define void @test() {
+        malloc %InvalidType
+        ret void
+}
+
diff --git a/final/test/Verifier/2006-07-11-StoreStruct.ll b/final/test/Verifier/2006-07-11-StoreStruct.ll
new file mode 100644
index 00000000000..80ab122d0b7
--- /dev/null
+++ b/final/test/Verifier/2006-07-11-StoreStruct.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s |& not grep {Instruction operands must be first-class}
+
+; This previously was for PR826, but structs are now first-class so
+; the following is now valid.
+
+        %struct_4 = type { i32 }
+
+define void @test() {
+        store %struct_4 zeroinitializer, %struct_4* null
+        unreachable
+}
diff --git a/final/test/Verifier/2006-10-15-AddrLabel.ll b/final/test/Verifier/2006-10-15-AddrLabel.ll
new file mode 100644
index 00000000000..0b73b47893c
--- /dev/null
+++ b/final/test/Verifier/2006-10-15-AddrLabel.ll
@@ -0,0 +1,8 @@
+; RUN: not llvm-as < %s > /dev/null |& grep {basic block pointers are invalid}
+
+define i32 @main() {
+         %foo  = call i8* %llvm.stacksave()
+         %foop = bitcast i8* %foo to label*
+         %nret = load label* %foop
+         br label %nret
+}
diff --git a/final/test/Verifier/2006-12-12-IntrinsicDefine.ll b/final/test/Verifier/2006-12-12-IntrinsicDefine.ll
new file mode 100644
index 00000000000..8d09b512066
--- /dev/null
+++ b/final/test/Verifier/2006-12-12-IntrinsicDefine.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as < %s |& grep {llvm intrinsics cannot be defined}
+; PR1047
+
+define void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1) {
+entry:
+	ret void
+}
diff --git a/final/test/Verifier/2007-12-21-InvokeParamAttrs.ll b/final/test/Verifier/2007-12-21-InvokeParamAttrs.ll
new file mode 100644
index 00000000000..709b47b33da
--- /dev/null
+++ b/final/test/Verifier/2007-12-21-InvokeParamAttrs.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+declare void @foo(i8*)
+
+define void @bar() {
+	invoke void @foo(i8* signext null)
+			to label %r unwind label %r
+r:
+	ret void
+}
diff --git a/final/test/Verifier/2008-01-11-VarargAttrs.ll b/final/test/Verifier/2008-01-11-VarargAttrs.ll
new file mode 100644
index 00000000000..b6ce6251cfc
--- /dev/null
+++ b/final/test/Verifier/2008-01-11-VarargAttrs.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+	%struct = type {  }
+
+declare void @foo(...)
+
+define void @bar() {
+	call void (...)* @foo(%struct* sret null )
+	ret void
+}
diff --git a/final/test/Verifier/2008-03-01-AllocaSized.ll b/final/test/Verifier/2008-03-01-AllocaSized.ll
new file mode 100644
index 00000000000..079a75d792e
--- /dev/null
+++ b/final/test/Verifier/2008-03-01-AllocaSized.ll
@@ -0,0 +1,8 @@
+; RUN: not llvm-as %s -o /dev/null |& grep {Cannot allocate unsized type}
+; PR2113
+
+define void @test() {
+	%A = alloca void()
+	ret void
+}
+
diff --git a/final/test/Verifier/2008-08-22-MemCpyAlignment.ll b/final/test/Verifier/2008-08-22-MemCpyAlignment.ll
new file mode 100644
index 00000000000..aaf69aeef67
--- /dev/null
+++ b/final/test/Verifier/2008-08-22-MemCpyAlignment.ll
@@ -0,0 +1,11 @@
+; RUN: not llvm-as %s -o /dev/null |& grep {alignment argument of memory intrinsics must be a constant int}
+; PR2318
+
+define void @x(i8* %a, i8* %src, i64 %len, i32 %align) nounwind  {
+entry:
+        tail call void @llvm.memcpy.i64( i8* %a, i8* %src, i64 %len, i32 %align) nounwind 
+        ret void
+}
+
+declare void @llvm.memcpy.i64( i8* %a, i8* %src, i64 %len, i32)
+
diff --git a/final/test/Verifier/2008-11-15-RetVoid.ll b/final/test/Verifier/2008-11-15-RetVoid.ll
new file mode 100644
index 00000000000..dbdcae28515
--- /dev/null
+++ b/final/test/Verifier/2008-11-15-RetVoid.ll
@@ -0,0 +1,5 @@
+; RUN: not llvm-as < %s |& grep {returns non-void in Function of void return}
+
+define void @foo() {
+  ret i32 0
+}
diff --git a/final/test/Verifier/2009-05-29-InvokeResult1.ll b/final/test/Verifier/2009-05-29-InvokeResult1.ll
new file mode 100644
index 00000000000..bb815b3bfe1
--- /dev/null
+++ b/final/test/Verifier/2009-05-29-InvokeResult1.ll
@@ -0,0 +1,15 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+declare i32 @v()
+
+define i32 @f() {
+e:
+	%r = invoke i32 @v()
+			to label %c unwind label %u		; <i32> [#uses=2]
+
+c:		; preds = %e
+	ret i32 %r
+
+u:		; preds = %e
+	ret i32 %r
+}
diff --git a/final/test/Verifier/2009-05-29-InvokeResult2.ll b/final/test/Verifier/2009-05-29-InvokeResult2.ll
new file mode 100644
index 00000000000..900b1d827bf
--- /dev/null
+++ b/final/test/Verifier/2009-05-29-InvokeResult2.ll
@@ -0,0 +1,16 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+declare i32 @v()
+
+define i32 @g() {
+e:
+	%s = invoke i32 @v()
+			to label %c unwind label %u		; <i32> [#uses=2]
+
+c:		; preds = %e
+	ret i32 %s
+
+u:		; preds = %e
+	%t = phi i32 [ %s, %e ]		; <i32> [#uses=1]
+	ret i32 %t
+}
diff --git a/final/test/Verifier/2009-05-29-InvokeResult3.ll b/final/test/Verifier/2009-05-29-InvokeResult3.ll
new file mode 100644
index 00000000000..050de4669d3
--- /dev/null
+++ b/final/test/Verifier/2009-05-29-InvokeResult3.ll
@@ -0,0 +1,19 @@
+; RUN: not llvm-as < %s >& /dev/null
+
+declare i32 @v()
+
+define i32 @h() {
+e:
+	%s = invoke i32 @v()
+			to label %c unwind label %u		; <i32> [#uses=2]
+
+c:		; preds = %e
+	br label %d
+
+d:		; preds = %u, %c
+	%p = phi i32 [ %s, %c ], [ %s, %u ]		; <i32> [#uses=1]
+	ret i32 %p
+
+u:		; preds = %e
+	br label %d
+}
diff --git a/final/test/Verifier/2010-08-07-PointerIntrinsic.ll b/final/test/Verifier/2010-08-07-PointerIntrinsic.ll
new file mode 100644
index 00000000000..bf5563d9c05
--- /dev/null
+++ b/final/test/Verifier/2010-08-07-PointerIntrinsic.ll
@@ -0,0 +1,21 @@
+; RUN: not llvm-as < %s 2> %t
+; RUN: grep {Broken module} %t
+; PR7316
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32"
+target triple = "x86-unknown-unknown"
+@aa = global [32 x i8] zeroinitializer, align 1
+@bb = global [16 x i8] zeroinitializer, align 1
+define void @x() nounwind {
+L.0:
+	%0 = getelementptr [32 x i8]* @aa, i32 0, i32 4
+	%1 = bitcast i8* %0 to [16 x i8]*
+	%2 = bitcast [16 x i8]* %1 to [0 x i8]*
+	%3 = getelementptr [16 x i8]* @bb
+	%4 = bitcast [16 x i8]* %3 to [0 x i8]*
+	call void @llvm.memcpy.i32([0 x i8]* %2, [0 x i8]* %4, i32 16, i32 1)
+	br label %return
+return:
+	ret void
+}
+declare void @llvm.memcpy.i32([0 x i8]*, [0 x i8]*, i32, i32) nounwind
diff --git a/final/test/Verifier/AmbiguousPhi.ll b/final/test/Verifier/AmbiguousPhi.ll
new file mode 100644
index 00000000000..9a725301877
--- /dev/null
+++ b/final/test/Verifier/AmbiguousPhi.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s |& grep {multiple entries for the same basic block}
+
+
+
+define i32 @test(i32 %i, i32 %j, i1 %c) {
+	br i1 %c, label %A, label %A
+A:
+	%a = phi i32 [%i, %0], [%j, %0]  ; Error, different values from same block!
+	ret i32 %a
+}
diff --git a/final/test/Verifier/PhiGrouping.ll b/final/test/Verifier/PhiGrouping.ll
new file mode 100644
index 00000000000..dc529dced36
--- /dev/null
+++ b/final/test/Verifier/PhiGrouping.ll
@@ -0,0 +1,17 @@
+; RUN: not llvm-as < %s |& grep {PHI nodes not grouped at top}
+
+
+
+define i32 @test(i32 %i, i32 %j, i1 %c) {
+	br i1 %c, label %A, label %B
+A:
+	br label %C
+B:
+	br label %C
+
+C:
+	%a = phi i32 [%i, %A], [%j, %B]
+	%x = add i32 %a, 0                 ; Error, PHI's should be grouped!
+	%b = phi i32 [%i, %A], [%j, %B]
+	ret i32 %x
+}
diff --git a/final/test/Verifier/README.txt b/final/test/Verifier/README.txt
new file mode 100644
index 00000000000..c04152172e9
--- /dev/null
+++ b/final/test/Verifier/README.txt
@@ -0,0 +1,3 @@
+This directory contains testcases that the verifier is supposed to detect as
+malformed LLVM code.  Testcases for situations that the verifier incorrectly
+identifies as malformed should go in the test/Assembler directory.
diff --git a/final/test/Verifier/SelfReferential.ll b/final/test/Verifier/SelfReferential.ll
new file mode 100644
index 00000000000..70154b77a82
--- /dev/null
+++ b/final/test/Verifier/SelfReferential.ll
@@ -0,0 +1,9 @@
+; RUN: not llvm-as %s -o /dev/null |& grep {Only PHI nodes may reference their own value}
+
+; Test that self referential instructions are not allowed
+
+define void @test() {
+	%A = add i32 %A, 0		; <i32> [#uses=1]
+	ret void
+}
+
diff --git a/final/test/Verifier/aliasing-chain.ll b/final/test/Verifier/aliasing-chain.ll
new file mode 100644
index 00000000000..fc5ef1ce13a
--- /dev/null
+++ b/final/test/Verifier/aliasing-chain.ll
@@ -0,0 +1,6 @@
+; RUN:  not llvm-as %s -o /dev/null |& grep {Aliasing chain should end with function or global variable}
+
+; Test that alising chain does not create a cycle
+
+@b1 = alias i32* @c1
+@c1 = alias i32* @b1
diff --git a/final/test/Verifier/byval-1.ll b/final/test/Verifier/byval-1.ll
new file mode 100644
index 00000000000..9bbead08611
--- /dev/null
+++ b/final/test/Verifier/byval-1.ll
@@ -0,0 +1,2 @@
+; RUN: not llvm-as < %s >& /dev/null
+declare void @h(i32 byval %num)
diff --git a/final/test/Verifier/byval-2.ll b/final/test/Verifier/byval-2.ll
new file mode 100644
index 00000000000..1d03715bc32
--- /dev/null
+++ b/final/test/Verifier/byval-2.ll
@@ -0,0 +1,4 @@
+; RUN: not llvm-as < %s >& /dev/null
+; PR2711
+	%s = type opaque
+declare void @h(%s* byval %num)
diff --git a/final/test/Verifier/byval-4.ll b/final/test/Verifier/byval-4.ll
new file mode 100644
index 00000000000..b6f9c67962c
--- /dev/null
+++ b/final/test/Verifier/byval-4.ll
@@ -0,0 +1,4 @@
+; RUN: llvm-as %s -o /dev/null
+%struct.foo = type { i64 }
+
+declare void @h(%struct.foo* byval %num)
diff --git a/final/test/Verifier/dg.exp b/final/test/Verifier/dg.exp
new file mode 100644
index 00000000000..f2005891a59
--- /dev/null
+++ b/final/test/Verifier/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/final/test/Verifier/gcread-ptrptr.ll b/final/test/Verifier/gcread-ptrptr.ll
new file mode 100644
index 00000000000..4ed22fa6c24
--- /dev/null
+++ b/final/test/Verifier/gcread-ptrptr.ll
@@ -0,0 +1,13 @@
+; RUN: not llvm-as < %s >& /dev/null
+; PR1633
+
+%meta = type { i8* }
+%obj = type { %meta* }
+
+declare %obj* @llvm.gcread(%obj*, %obj*)
+
+define %obj* @f() {
+entry:
+	%x = call %obj* @llvm.gcread(%obj* null, %obj* null)
+	ret %obj* %x
+}
diff --git a/final/test/Verifier/gcroot-alloca.ll b/final/test/Verifier/gcroot-alloca.ll
new file mode 100644
index 00000000000..8caa4b9f58b
--- /dev/null
+++ b/final/test/Verifier/gcroot-alloca.ll
@@ -0,0 +1,14 @@
+; RUN: not llvm-as < %s >& /dev/null
+; PR1633
+
+%meta = type { i8* }
+%obj = type { %meta* }
+
+declare void @llvm.gcroot(%obj**, %meta*)
+
+define void @f() {
+entry:
+	call void @llvm.gcroot(%obj** null, %meta* null)
+	
+	ret void
+}
diff --git a/final/test/Verifier/gcroot-meta.ll b/final/test/Verifier/gcroot-meta.ll
new file mode 100644
index 00000000000..1836f61c7ad
--- /dev/null
+++ b/final/test/Verifier/gcroot-meta.ll
@@ -0,0 +1,16 @@
+; RUN: not llvm-as < %s >& /dev/null
+; PR1633
+
+%meta = type { i8* }
+%obj = type { %meta* }
+
+declare void @llvm.gcroot(%obj**, %meta*)
+
+define void @f() {
+entry:
+	%local.obj = alloca %obj*
+	%local.meta = alloca %meta
+	call void @llvm.gcroot(%obj** %local.obj, %meta* %local.meta)
+	
+	ret void
+}
diff --git a/final/test/Verifier/gcroot-ptrptr.ll b/final/test/Verifier/gcroot-ptrptr.ll
new file mode 100644
index 00000000000..b573295e3e9
--- /dev/null
+++ b/final/test/Verifier/gcroot-ptrptr.ll
@@ -0,0 +1,14 @@
+; RUN: not llvm-as < %s >& /dev/null
+; PR1633
+
+%meta = type { i8* }
+%obj = type { %meta* }
+
+declare void @llvm.gcroot(%obj*, %meta*)
+
+define void @f() {
+entry:
+	%local.obj = alloca %obj
+	call void @llvm.gcroot(%obj* %local.obj, %meta* null)
+	ret void
+}
diff --git a/final/test/Verifier/gcwrite-ptrptr.ll b/final/test/Verifier/gcwrite-ptrptr.ll
new file mode 100644
index 00000000000..1f60becc332
--- /dev/null
+++ b/final/test/Verifier/gcwrite-ptrptr.ll
@@ -0,0 +1,13 @@
+; RUN: not llvm-as < %s >& /dev/null
+; PR1633
+
+%meta = type { i8* }
+%obj = type { %meta* }
+
+declare void @llvm.gcwrite(%obj*, %obj*, %obj*)
+
+define void @f() {
+entry:
+	call void @llvm.gcwrite(%obj* null, %obj* null, %obj* null)
+	ret void
+}
diff --git a/final/test/Verifier/invoke-1.ll b/final/test/Verifier/invoke-1.ll
new file mode 100644
index 00000000000..427abe02cef
--- /dev/null
+++ b/final/test/Verifier/invoke-1.ll
@@ -0,0 +1,10 @@
+; RUN: not llvm-as < %s |& grep {not verify as correct}
+; PR1042
+
+define i32 @foo() {
+	%A = invoke i32 @foo( )
+			to label %L unwind label %L		; <i32> [#uses=1]
+L:		; preds = %0, %0
+	ret i32 %A
+}
+
diff --git a/final/test/Verifier/invoke-2.ll b/final/test/Verifier/invoke-2.ll
new file mode 100644
index 00000000000..0145935a1a7
--- /dev/null
+++ b/final/test/Verifier/invoke-2.ll
@@ -0,0 +1,14 @@
+; RUN: not llvm-as %s |& grep {not verify as correct}
+; PR1042
+
+define i32 @foo() {
+	br i1 false, label %L1, label %L2
+L1:		; preds = %0
+	%A = invoke i32 @foo( )
+			to label %L unwind label %L		; <i32> [#uses=1]
+L2:		; preds = %0
+	br label %L
+L:		; preds = %L2, %L1, %L1
+	ret i32 %A
+}
+
diff --git a/final/test/lib/llvm.exp b/final/test/lib/llvm.exp
new file mode 100644
index 00000000000..d92503a3733
--- /dev/null
+++ b/final/test/lib/llvm.exp
@@ -0,0 +1,313 @@
+# This procedure executes one line of a test case's execution script.
+proc execOneLine { test PRS outcome lineno line } {
+  set status 0
+  set resultmsg ""
+  set retval [ catch { eval exec -keepnewline -- $line } errmsg ]
+  if { $retval != 0 } {
+    set code [lindex $::errorCode 0]
+    set lineno [expr $lineno + 1]
+    if { $PRS != ""} {
+      set PRS " for $PRS"
+    }
+    set errmsg " at line $lineno\nwhile running: $line\n$errmsg"
+    switch "$code" {
+      CHILDSTATUS {
+        set status [lindex $::errorCode 2]
+        if { $status != 0 } {
+          set resultmsg "$test$PRS\nFailed with exit($status)$errmsg"
+        }
+      }
+      CHILDKILLED {
+        set signal [lindex $::errorCode 2]
+        set resultmsg "$test$PRS\nFailed with signal($signal)$errmsg"
+      }
+      CHILDSUSP {
+        set signal [lindex $::errorCode 2]
+        set resultmsg "$test$PRS\nFailed with suspend($signal)$errmsg"
+      }
+      POSIX {
+        set posixNum [lindex $::errorCode 1]
+        set posixMsg [lindex $::errorCode 2]
+        set resultmsg "$test$PRS\nFailed with posix($posixNum,$posixMsg)$errmsg"
+      }
+      NONE {
+        # Any other error such as stderr output of a program, or syntax error in
+        # the RUN line.
+        set resultmsg "$test$PRS\nFailed with unknown error (or has stderr output)$errmsg"
+      }
+      default {
+        set resultmsg "$test$PRS\nFailed with unknown error$errmsg"
+      }
+    }
+  }
+  return $resultmsg
+}
+
+# This procedure performs variable substitutions on the RUN: lines of a test
+# cases.
+proc substitute { line test tmpFile } {
+  global srcroot objroot srcdir objdir subdir target_triplet
+  global llvmgcc llvmgxx emitir ocamlopt
+  global gccpath gxxpath compile_c compile_cxx link shlibext llvmlibsdir
+  global llvmdsymutil valgrind grep gas bugpoint_topts
+  set path [file join $srcdir $subdir]
+
+  # Substitute all Tcl variables.
+  set new_line [subst $line ]
+
+  #replace %% with _#MARKER#_ to make the replacement of %% more predictable
+  regsub -all {%%} $new_line {_#MARKER#_} new_line
+  #replace %llvmgcc_only with actual path to llvmgcc
+  regsub -all {%llvmgcc_only} $new_line "$llvmgcc" new_line
+  #replace %llvmgcc with actual path to llvmgcc
+  regsub -all {%llvmgcc} $new_line "$llvmgcc $emitir -w" new_line
+  #replace %llvmgxx with actual path to llvmg++
+  regsub -all {%llvmgxx} $new_line "$llvmgxx $emitir -w" new_line
+  #replace %compile_cxx with C++ compilation command
+  regsub -all {%compile_cxx} $new_line "$compile_cxx" new_line
+  #replace %compile_c with C compilation command
+  regsub -all {%compile_c} $new_line "$compile_c" new_line
+  #replace %link with C++ link command
+  regsub -all {%link} $new_line "$link" new_line
+  #replace %shlibext with shared library extension
+  regsub -all {%shlibext} $new_line "$shlibext" new_line
+  #replace %ocamlopt with ocaml compiler command
+  regsub -all {%ocamlopt} $new_line "$ocamlopt" new_line
+  #replace %llvmdsymutil with dsymutil command
+  regsub -all {%llvmdsymutil} $new_line "$llvmdsymutil" new_line
+  #replace %llvmlibsdir with configure library directory
+  regsub -all {%llvmlibsdir} $new_line "$llvmlibsdir" new_line
+  #replace %bugpoint_topts with actual bugpoint target options
+  regsub -all {%bugpoint_topts} $new_line "$bugpoint_topts" new_line
+  #replace %p with path to source,
+  regsub -all {%p} $new_line [file join $srcdir $subdir] new_line
+  #replace %s with filename
+  regsub -all {%s} $new_line $test new_line
+  #replace %t with temp filenames
+  regsub -all {%t} $new_line $tmpFile new_line
+  #replace %abs_tmp with absolute temp filenames
+  regsub -all {%abs_tmp} $new_line [file join [pwd] $tmpFile] new_line
+  #replace _#MARKER#_ with %
+  regsub -all {_#MARKER#_} $new_line % new_line
+
+  #replace grep with GNU grep
+  regsub -all { grep } $new_line " $grep " new_line
+  #replace as with GNU as
+  regsub -all {\| as } $new_line "| $gas " new_line
+
+  #valgind related stuff
+# regsub -all {bugpoint } $new_line "$valgrind bugpoint " new_line
+  regsub -all {llc } $new_line "$valgrind llc " new_line
+  regsub -all {lli } $new_line "$valgrind lli " new_line
+  regsub -all {llvm-ar } $new_line "$valgrind llvm-ar " new_line
+  regsub -all {llvm-as } $new_line "$valgrind llvm-as " new_line
+  regsub -all {llvm-bcanalyzer } $new_line "$valgrind llvm-bcanalyzer " new_line
+  regsub -all {llvm-dis } $new_line "$valgrind llvm-dis " new_line
+  regsub -all {llvm-extract } $new_line "$valgrind llvm-extract " new_line
+  regsub -all {llvm-ld } $new_line "$valgrind llvm-ld " new_line
+  regsub -all {llvm-link } $new_line "$valgrind llvm-link " new_line
+  regsub -all {llvm-nm } $new_line "$valgrind llvm-nm " new_line
+  regsub -all {llvm-prof } $new_line "$valgrind llvm-prof " new_line
+  regsub -all {llvm-ranlib } $new_line "$valgrind llvm-ranlib " new_line
+  regsub -all {([^a-zA-Z_-])opt } $new_line "\\1$valgrind opt " new_line
+  regsub -all {^opt } $new_line "$valgrind opt " new_line
+  regsub -all {tblgen } $new_line "$valgrind tblgen " new_line
+  regsub -all "not $valgrind " $new_line "$valgrind not " new_line
+
+  return $new_line
+}
+
+# This procedure runs the set of tests for the test_source_files array.
+proc RunLLVMTests { test_source_files } {
+  global srcroot objroot srcdir objdir subdir target_triplet
+  set timeout 60
+
+  set path [file join $objdir $subdir]
+
+  #Make Output Directory if it does not exist already
+  if { [file exists path] } {
+    cd $path
+  } else {
+    file mkdir $path
+    cd $path
+  }
+
+  file mkdir Output
+  cd Output
+
+  foreach test $test_source_files {
+    #Should figure out best way to set the timeout
+    #set timeout 40
+
+    set filename [file tail $test]
+    verbose "ABOUT TO RUN: $filename" 2
+    set outcome PASS
+    set tmpFile "$filename.tmp"
+
+    # Mark that it should not be XFAIL for this target.
+    set targetPASS 0
+
+    #set hasRunline bool to check if testcase has a runline
+    set numLines 0
+
+    # Open the test file and start reading lines
+    set testFileId [ open $test r]
+    set runline ""
+    set PRNUMS ""
+    foreach line [split [read $testFileId] \n] {
+
+      # if its the END. line then stop parsing (optimization for big files)
+      if {[regexp {END.[[:space:]]*$} $line match endofscript]} {
+        break
+
+      # if the line is continued, concatenate and continue the loop
+      } elseif {[regexp {RUN: *(.+)(\\)$} $line match oneline suffix]} {
+        set runline "$runline$oneline "
+
+      # if its a terminating RUN: line then do substitution on the whole line
+      # and then save the line.
+      } elseif {[regexp {RUN: *(.+)$} $line match oneline suffix]} {
+        set runline "$runline$oneline"
+        set runline [ substitute $runline $test $tmpFile ]
+        set lines($numLines) $runline
+        set numLines [expr $numLines + 1]
+        set runline ""
+
+      # if its an PR line, save the problem report number
+      } elseif {[regexp {PR([0-9]+)} $line match prnum]} {
+        if {$PRNUMS == ""} {
+          set PRNUMS "PR$prnum"
+        } else {
+          set PRNUMS "$PRNUMS,$prnum"
+        }
+      # if its an XFAIL line, see if we should be XFAILing or not.
+      } elseif {[regexp {XFAIL:[ *](.+)} $line match targets]} {
+        set targets
+
+        #split up target if more then 1 specified
+        foreach target [split $targets ,] {
+          if { $target == "*" } {
+              if {$targetPASS != 1} {
+                 set outcome XFAIL
+              }
+          } elseif { [regexp $target $target_triplet match] } {
+              if {$targetPASS != 1} {
+                 set outcome XFAIL
+              }
+          }
+        }
+      } elseif {[regexp {XTARGET:[ *](.+)} $line match targets]} {
+        set targets
+
+        #split up target if more then 1 specified
+        foreach target [split $targets ,] {
+          if { [regexp {\*} $target match] } {
+              set targetPASS 1
+              set outcome PASS
+          } elseif { [regexp $target $target_triplet match] } {
+              set targetPASS 1
+              set outcome PASS
+          }
+        }
+      }
+    }
+
+    # Done reading the script
+    close $testFileId
+
+
+    if { $numLines == 0 } {
+      fail "$test: \nDoes not have a RUN line\n"
+    } else {
+      set failed 0
+      for { set i 0 } { $i < $numLines } { set i [ expr $i + 1 ] } {
+        regsub ^.*RUN:(.*) $lines($i) \1 theLine
+        set resultmsg [execOneLine $test $PRNUMS $outcome $i $theLine ]
+        if { $resultmsg != "" } {
+          if { $outcome == "XFAIL" } {
+            xfail "$resultmsg"
+          } else {
+            fail "$resultmsg"
+          }
+          set failed 1
+          break
+        }
+      }
+      if { $failed } {
+        continue
+      } else {
+        if { $PRNUMS != "" } {
+          set PRNUMS " for $PRNUMS"
+        }
+        if { $outcome == "XFAIL" } {
+          xpass "$test$PRNUMS"
+        } else {
+          pass "$test$PRNUMS"
+        }
+      }
+    }
+  }
+}
+
+# This procedure provides an interface to check the LLVMGCC_LANGS makefile
+# variable to see if llvm-gcc supports compilation of a particular language.
+proc llvm_gcc_supports { lang } {
+  global llvmgcc llvmgcc_langs
+  # validate the language choices and determine the name of the compiler
+  # component responsible for determining if the compiler has been built.
+  switch "$lang" {
+    ada     { set file gnat1 }
+    c       { set file cc1 }
+    c++     { set file cc1plus }
+    objc    { set file cc1obj }
+    obj-c++ { set file cc1objplus }
+    fortran { set file f951 }
+    default { return 0 }
+  }
+  foreach supported_lang [split "$llvmgcc_langs" ,] {
+    if { "$lang" == "$supported_lang" } {
+      # FIXME: Knowing it is configured is not enough. We should do two more
+      # checks here. First, we need to run llvm-gcc -print-prog-name=$file to
+      # get the path to the compiler. If we don't get a path, the language isn't
+      # properly configured or built. If we do get a path, we should check to
+      # make sure that it is executable and perhaps even try executing it.
+      return 1;
+    }
+  }
+  return 0;
+}
+
+# This procedure provides an interface to check the TARGETS_TO_BUILD makefile
+# variable to see if a particular target has been configured to build. This
+# helps avoid running tests for targets that aren't available.
+proc llvm_supports_target { tgtName } {
+  global TARGETS_TO_BUILD
+  foreach target [split $TARGETS_TO_BUILD] {
+    if { [regexp $tgtName $target match] } {
+      return 1
+    }
+  }
+  return 0
+}
+
+proc llvm_supports_darwin_and_target { tgtName } {
+  global target_triplet
+  if { [ llvm_supports_target $tgtName ] } {
+    if { [regexp darwin $target_triplet match] } {
+      return 1
+    }
+  }
+  return 0
+}
+
+# This procedure provides an interface to check the BINDINGS_TO_BUILD makefile
+# variable to see if a particular binding has been configured to build.
+proc llvm_supports_binding { name } {
+  global llvm_bindings
+  foreach item [split $llvm_bindings] {
+    if { [regexp $name $item match] } {
+      return 1
+    }
+  }
+  return 0
+}
diff --git a/final/test/lib/llvm2cpp.exp b/final/test/lib/llvm2cpp.exp
new file mode 100644
index 00000000000..f4530338ee2
--- /dev/null
+++ b/final/test/lib/llvm2cpp.exp
@@ -0,0 +1,100 @@
+# This file defines a tcl proc to assist with testing the llvm2cpp. There are
+# no llvm2cpp specific test cases. Instead, it utilizes all the existing test
+# cases and makes sure llvm2cpp can run them. The basic idea is that we find
+# all the LLVM Assembly (*.ll) files, run llvm2cpp on them to generate a C++
+# program, compile those programs, run them and see if what they produce matches
+# the original input to llvm2cpp.
+
+proc llvm2cpp-test { files } {
+  global subdir llvmtoolsdir llvmlibsdir objdir srcdir objroot srcroot 
+  set timeout 30
+  set path [file join $objdir $subdir]
+  set llc [file join $llvmtoolsdir llc ]
+  set llvmas [file join $llvmtoolsdir llvm-as ]
+  set llvmdis [file join $llvmtoolsdir llvm-dis ]
+
+  #Make Output Directory if it does not exist already
+  if { [file exists path] } {
+      cd $path
+  } else {
+      file mkdir $path
+      cd $path
+  }
+  
+  file mkdir Output
+
+  foreach test $files {
+      
+    set filename [file tail $test]
+    set generated [file join Output $filename.cpp]
+    set executable [file join Output $filename.exe]
+    set output [file join Output $filename.gen]
+    set assembly [file join Output $filename.asm]
+    set testname [file rootname $filename]
+    set bytecode [file join Output $filename.bc]
+
+    # Note that the stderr for llvm-as, etc. must be redirected to /dev/null 
+    # because otherwise exec will see the msgs and return 1 even though they 
+    # are only warnings. If real errors are generated on stderr then llvm-as 
+    # will return a non-zero retval anyway so we're good.
+
+    # Scan the test file to see if there's an XFAIL file. If so, don't run it
+    set retval [ catch { 
+      exec -keepnewline grep XFAIL $test 2>/dev/null } msg ]
+    if { $retval == 0 } {
+      continue;
+    }
+
+    # Run llvm-as/llvm-dis
+    set pipeline llvm-as|llvm-dis
+    set retval [ catch { 
+      exec -keepnewline $llvmas < $test -o - | $llvmdis -o $assembly 2>/dev/null } msg ]
+
+    if { $retval != 0 } {
+      fail "$test: $pipeline returned $retval\n$msg"
+      continue 
+    }
+
+    # Build bytecode for llvm2cpp input
+    set retval [ catch { 
+      exec -keepnewline $llvmas < $assembly > $bytecode 2>/dev/null } msg ]
+
+    if { $retval != 0 } {
+      fail "$test: llvm-as returned $retval\n$msg"
+      continue 
+    }
+
+    set retval [ catch { 
+      exec -keepnewline $llc -march=cpp -o $generated < $bytecode 2>/dev/null } msg]
+
+    if { $retval != 0 } {
+      fail "$test: llvm2cpp returned $retval\n$msg"
+      continue
+    }
+
+    set retval [ catch { 
+      exec -keepnewline gcc -g -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -o $executable $generated -I$srcroot/include -I$objroot/include -L$llvmlibsdir -lLLVMCore -lLLVMSupport -lLLVMSystem -lstdc++ } msg ] 
+    if { $retval != 0 } {
+      fail "$test: gcc returned $retval\n$msg"
+      continue
+    }
+
+    set retval [ catch { exec -keepnewline $executable > $output } msg ]
+    if { $retval != 0 } {
+      set execname [file tail $executable]
+      fail "$test: $execname returned $retval:\n$msg"
+      continue
+    } 
+
+    set retval [ catch { 
+      exec -keepnewline diff $assembly $output } msg ]
+
+    if { $retval != 0 } {
+      fail "$test: diff returned $retval:\n$msg"
+      continue
+    }
+    pass "$test"
+  }
+}
+
+
diff --git a/final/test/lit.cfg b/final/test/lit.cfg
new file mode 100644
index 00000000000..9a2f74c21c9
--- /dev/null
+++ b/final/test/lit.cfg
@@ -0,0 +1,308 @@
+# -*- Python -*-
+
+# Configuration file for the 'lit' test runner.
+
+import os
+import sys
+import re
+
+# name: The name of this test suite.
+config.name = 'LLVM'
+
+# testFormat: The test format to use to interpret tests.
+config.test_format = lit.formats.TclTest()
+
+# suffixes: A list of file extensions to treat as test files, this is actually
+# set by on_clone().
+config.suffixes = []
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
+
+# Tweak PATH for Win32
+if sys.platform in ['win32']:
+    # Seek sane tools in directories and set to $PATH.
+    path = getattr(config, 'lit_tools_dir', None)
+    path = lit.getToolsPath(path,
+                            config.environment['PATH'],
+                            ['cmp.exe', 'grep.exe', 'sed.exe'])
+    if path is not None:
+        path = os.path.pathsep.join((path,
+                                     config.environment['PATH']))
+        config.environment['PATH'] = path
+
+# test_exec_root: The root path where tests should be run.
+llvm_obj_root = getattr(config, 'llvm_obj_root', None)
+if llvm_obj_root is not None:
+    config.test_exec_root = os.path.join(llvm_obj_root, 'test')
+
+# Tweak the PATH to include the scripts dir, the tools dir, and the llvm-gcc bin
+# dir (if available).
+if llvm_obj_root is not None:
+    # Include llvm-gcc first, as the llvm-gcc binaryies will not appear
+    # neither in the tools nor in the scripts dir. However it might be
+    # possible, that some old llvm tools are in the llvm-gcc dir. Adding
+    # llvm-gcc dir first ensures, that those will always be overwritten
+    # by the new tools in llvm_tools_dir. So now outdated tools are used
+      # for testing
+    llvmgcc_dir = getattr(config, 'llvmgcc_dir', None)
+    if llvmgcc_dir:
+        path = os.path.pathsep.join((os.path.join(llvmgcc_dir, 'bin'),
+                                     config.environment['PATH']))
+        config.environment['PATH'] = path
+
+    llvm_src_root = getattr(config, 'llvm_src_root', None)
+    if not llvm_src_root:
+        lit.fatal('No LLVM source root set!')
+    path = os.path.pathsep.join((os.path.join(llvm_src_root, 'test',
+                                              'Scripts'),
+                                 config.environment['PATH']))
+    config.environment['PATH'] = path
+
+    llvm_tools_dir = getattr(config, 'llvm_tools_dir', None)
+    if not llvm_tools_dir:
+        lit.fatal('No LLVM tools dir set!')
+    path = os.path.pathsep.join((llvm_tools_dir, config.environment['PATH']))
+    config.environment['PATH'] = path
+
+# Propagate 'HOME' through the environment.
+if 'HOME' in os.environ:
+    config.environment['HOME'] = os.environ['HOME']
+
+# Propagate 'INCLUDE' through the environment.
+if 'INCLUDE' in os.environ:
+    config.environment['INCLUDE'] = os.environ['INCLUDE']
+
+# Propagate 'LIB' through the environment.
+if 'LIB' in os.environ:
+    config.environment['LIB'] = os.environ['LIB']
+
+# Propagate the temp directory. Windows requires this because it uses \Windows\
+# if none of these are present.
+if 'TMP' in os.environ:
+    config.environment['TMP'] = os.environ['TMP']
+if 'TEMP' in os.environ:
+    config.environment['TEMP'] = os.environ['TEMP']
+
+# Propagate LLVM_SRC_ROOT into the environment.
+config.environment['LLVM_SRC_ROOT'] = getattr(config, 'llvm_src_root', '')
+
+# Propagate PYTHON_EXECUTABLE into the environment
+config.environment['PYTHON_EXECUTABLE'] = getattr(config, 'python_executable',
+                                                  '')
+
+###
+
+import os
+
+# Check that the object root is known.
+if config.test_exec_root is None:
+    # Otherwise, we haven't loaded the site specific configuration (the user is
+    # probably trying to run on a test file directly, and either the site
+    # configuration hasn't been created by the build system, or we are in an
+    # out-of-tree build situation).
+
+    # Check for 'llvm_site_config' user parameter, and use that if available.
+    site_cfg = lit.params.get('llvm_site_config', None)
+    if site_cfg and os.path.exists(site_cfg):
+        lit.load_config(config, site_cfg)
+        raise SystemExit
+
+    # Try to detect the situation where we are using an out-of-tree build by
+    # looking for 'llvm-config'.
+    #
+    # FIXME: I debated (i.e., wrote and threw away) adding logic to
+    # automagically generate the lit.site.cfg if we are in some kind of fresh
+    # build situation. This means knowing how to invoke the build system
+    # though, and I decided it was too much magic.
+
+    llvm_config = lit.util.which('llvm-config', config.environment['PATH'])
+    if not llvm_config:
+        lit.fatal('No site specific configuration available!')
+
+    # Get the source and object roots.
+    llvm_src_root = lit.util.capture(['llvm-config', '--src-root']).strip()
+    llvm_obj_root = lit.util.capture(['llvm-config', '--obj-root']).strip()
+
+    # Validate that we got a tree which points to here.
+    this_src_root = os.path.dirname(config.test_source_root)
+    if os.path.realpath(llvm_src_root) != os.path.realpath(this_src_root):
+        lit.fatal('No site specific configuration available!')
+
+    # Check that the site specific configuration exists.
+    site_cfg = os.path.join(llvm_obj_root, 'test', 'lit.site.cfg')
+    if not os.path.exists(site_cfg):
+        lit.fatal('No site specific configuration available!')
+
+    # Okay, that worked. Notify the user of the automagic, and reconfigure.
+    lit.note('using out-of-tree build at %r' % llvm_obj_root)
+    lit.load_config(config, site_cfg)
+    raise SystemExit
+
+###
+
+# Load site data from DejaGNU's site.exp.
+import re
+site_exp = {}
+# FIXME: Implement lit.site.cfg.
+for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
+    m = re.match('set ([^ ]+) "(.*)"', line)
+    if m:
+        site_exp[m.group(1)] = m.group(2)
+
+# Add substitutions.
+config.substitutions.append(('%llvmgcc_only', site_exp['llvmgcc']))
+for sub in ['llvmgcc', 'llvmgxx', 'emitir', 'compile_cxx', 'compile_c',
+            'link', 'shlibext', 'ocamlopt', 'llvmdsymutil', 'llvmlibsdir',
+            'llvmshlibdir',
+            'bugpoint_topts']:
+    if sub in ('llvmgcc', 'llvmgxx'):
+        config.substitutions.append(('%' + sub,
+                                     site_exp[sub] + ' %emitir -w'))
+    # FIXME: This is a hack to avoid LLVMC tests failing due to a clang driver
+    #        warning when passing in "-fexceptions -fno-exceptions".
+    elif sub == 'compile_cxx':
+        config.substitutions.append(('%' + sub,
+                                  site_exp[sub].replace('-fno-exceptions', '')))
+    else:
+        config.substitutions.append(('%' + sub, site_exp[sub]))
+
+# For each occurrence of an llvm tool name as its own word, replace it
+# with the full path to the build directory holding that tool.  This
+# ensures that we are testing the tools just built and not some random
+# tools that might happen to be in the user's PATH.  Thus this list
+# includes every tool placed in $(LLVM_OBJ_ROOT)/$(BuildMode)/bin
+# (llvm_tools_dir in lit parlance).
+                # Don't match 'bugpoint-' or 'clang-'.
+                                        # Don't match '/clang'.
+if os.pathsep == ';':
+    pathext = os.environ.get('PATHEXT', '').split(';')
+else:
+    pathext = ['']
+for pattern in [r"\bbugpoint\b(?!-)",   r"(?<!/)\bclang\b(?!-)",
+                r"\bgold\b",
+                r"\bllc\b",             r"\blli\b",
+                r"\bllvm-ar\b",         r"\bllvm-as\b",
+                r"\bllvm-bcanalyzer\b", r"\bllvm-config\b",
+                r"\bllvm-diff\b",       r"\bllvm-dis\b",
+                r"\bllvm-extract\b",    r"\bllvm-ld\b",
+                r"\bllvm-link\b",       r"\bllvm-mc\b",
+                r"\bllvm-nm\b",         r"\bllvm-prof\b",
+                r"\bllvm-ranlib\b",     r"\bllvm-shlib\b",
+                r"\bllvm-stub\b",       r"\bllvm2cpp\b",
+                # Don't match '-llvmc'.
+                r"(?<!-)\bllvmc\b",     r"\blto\b",
+                                        # Don't match '.opt', '-opt',
+                                        # '^opt' or '/opt'.
+                r"\bmacho-dump\b",      r"(?<!\.|-|\^|/)\bopt\b",
+                r"\btblgen\b",          r"\bFileCheck\b",
+                r"\bFileUpdate\b",      r"\bc-index-test\b",
+                r"\bfpcmp\b",           r"\bllvm-PerfectShuffle\b",
+                # Handle these specially as they are strings searched
+                # for during testing.
+                r"\| \bcount\b",         r"\| \bnot\b"]:
+    # Extract the tool name from the pattern.  This relies on the tool
+    # name being surrounded by \b word match operators.  If the
+    # pattern starts with "| ", include it in the string to be
+    # substituted.
+    substitution = re.sub(r"^(\\)?((\| )?)\W+b([0-9A-Za-z-_]+)\\b\W*$",
+                          r"\2" + llvm_tools_dir + "/" + r"\4",
+                          pattern)
+    for ext in pathext:
+        substitution_ext = substitution + ext
+        if os.path.exists(substitution_ext):
+             substitution = substitution_ext
+             break
+    config.substitutions.append((pattern, substitution))
+
+excludes = []
+
+# Provide target_triple for use in XFAIL and XTARGET.
+config.target_triple = site_exp['target_triplet']
+
+# When running under valgrind, we mangle '-vg' or '-vg_leak' onto the end of the
+# triple so we can check it with XFAIL and XTARGET.
+config.target_triple += lit.valgrindTriple
+
+# Provide llvm_supports_target for use in local configs.
+targets = set(site_exp["TARGETS_TO_BUILD"].split())
+def llvm_supports_target(name):
+    return name in targets
+
+def llvm_supports_darwin_and_target(name):
+    return 'darwin' in config.target_triple and llvm_supports_target(name)
+
+langs = set([s.strip() for s in site_exp['llvmgcc_langs'].split(',')])
+def llvm_gcc_supports(name):
+    return name.strip() in langs
+
+bindings = set([s.strip() for s in site_exp['llvm_bindings'].split(',')])
+def llvm_supports_binding(name):
+    return name.strip() in bindings
+
+# Provide on_clone hook for reading 'dg.exp'.
+import os
+simpleLibData = re.compile(r"""load_lib llvm.exp
+
+RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]""",
+                           re.MULTILINE)
+conditionalLibData = re.compile(r"""load_lib llvm.exp
+
+if.*\[ ?(llvm[^ ]*) ([^ ]*) ?\].*{
+ *RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]
+\}""", re.MULTILINE)
+def on_clone(parent, cfg, for_path):
+    def addSuffixes(match):
+        if match[0] == '{' and match[-1] == '}':
+            cfg.suffixes = ['.' + s for s in match[1:-1].split(',')]
+        else:
+            cfg.suffixes = ['.' + match]
+
+    libPath = os.path.join(os.path.dirname(for_path),
+                           'dg.exp')
+    if not os.path.exists(libPath):
+        cfg.unsupported = True
+        return
+
+    # Reset unsupported, in case we inherited it.
+    cfg.unsupported = False
+    lib = open(libPath).read().strip()
+
+    # Check for a simple library.
+    m = simpleLibData.match(lib)
+    if m:
+        addSuffixes(m.group(1))
+        return
+
+    # Check for a conditional test set.
+    m = conditionalLibData.match(lib)
+    if m:
+        funcname,arg,match = m.groups()
+        addSuffixes(match)
+
+        func = globals().get(funcname)
+        if not func:
+            lit.error('unsupported predicate %r' % funcname)
+        elif not func(arg):
+            cfg.unsupported = True
+        return
+    # Otherwise, give up.
+    lit.error('unable to understand %r:\n%s' % (libPath, lib))
+
+config.on_clone = on_clone
+
+### Features
+
+# Shell execution
+if sys.platform not in ['win32'] or lit.getBashPath() != '':
+    config.available_features.add('shell')
+
+# Loadable module
+# FIXME: This should be supplied by Makefile or autoconf.
+if sys.platform in ['win32', 'cygwin']:
+    loadable_module = (config.enable_shared == 1)
+else:
+    loadable_module = True
+
+if loadable_module:
+    config.available_features.add('loadable_module')
diff --git a/final/test/lit.site.cfg.in b/final/test/lit.site.cfg.in
new file mode 100644
index 00000000000..3588aa6245d
--- /dev/null
+++ b/final/test/lit.site.cfg.in
@@ -0,0 +1,20 @@
+## Autogenerated by LLVM/Clang configuration.
+# Do not edit!
+config.llvm_src_root = "@LLVM_SOURCE_DIR@"
+config.llvm_obj_root = "@LLVM_BINARY_DIR@"
+config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
+config.llvmgcc_dir = "@LLVMGCCDIR@"
+config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
+config.python_executable = "@PYTHON_EXECUTABLE@"
+config.enable_shared = @ENABLE_SHARED@
+
+# Support substitution of the tools_dir with user parameters. This is
+# used when we can't determine the tool dir at configuration time.
+try:
+    config.llvm_tools_dir = config.llvm_tools_dir % lit.params
+except KeyError,e:
+    key, = e.args
+    lit.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
+
+# Let the main config do the real work.
+lit.load_config(config, "@LLVM_SOURCE_DIR@/test/lit.cfg")
diff --git a/final/test/site.exp.in b/final/test/site.exp.in
new file mode 100644
index 00000000000..277d54995f6
--- /dev/null
+++ b/final/test/site.exp.in
@@ -0,0 +1,28 @@
+## Autogenerated by LLVM configuration.
+# Do not edit!
+set target_triplet "@TARGET_TRIPLE@"
+set TARGETS_TO_BUILD "@TARGETS_TO_BUILD@"
+set llvmgcc_langs "@LLVMGCC_LANGS@"
+set llvmtoolsdir "@LLVM_TOOLS_DIR@"
+set llvmlibsdir "@LLVM_LIBS_DIR@"
+set llvmshlibdir "@SHLIBDIR@"
+set llvm_bindings "@LLVM_BINDINGS@"
+set srcroot "@LLVM_SOURCE_DIR@"
+set objroot "@LLVM_BINARY_DIR@"
+set srcdir "@LLVM_SOURCE_DIR@"
+set objdir "@LLVM_BINARY_DIR@"
+set gccpath "@GCCPATH@"
+set gxxpath "@GXXPATH@"
+set compile_c "@TEST_COMPILE_C_CMD@"
+set compile_cxx "@TEST_COMPILE_CXX_CMD@"
+set link "@TEST_LINK_CMD@"
+set llvmgcc "@LLVMGCC@"
+set llvmgxx "@LLVMGXX@"
+set bugpoint_topts "@BUGPOINT_TOPTS@"
+set shlibext "@SHLIBEXT@"
+set ocamlopt "@OCAMLOPT@"
+set valgrind "@VALGRIND@"
+set grep "@GREP@"
+set gas "@AS@"
+set llvmdsymutil "@DSYMUTIL@"
+set emitir "@LLVMCC_EMITIR_FLAG@"
diff --git a/final/tools/CMakeLists.txt b/final/tools/CMakeLists.txt
new file mode 100644
index 00000000000..2f37911d251
--- /dev/null
+++ b/final/tools/CMakeLists.txt
@@ -0,0 +1,53 @@
+# NOTE: The tools are organized into groups of four consisting of one large and
+# three small executables. This is done to minimize memory load in parallel
+# builds.  Please retain this ordering.
+
+# If polly exists and is not disabled compile it and add it to the LLVM tools.
+option(LLVM_BUILD_POLLY "Compile polly" ON)
+if( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/polly/CMakeLists.txt )
+  if (LLVM_BUILD_POLLY)
+    add_subdirectory( ${CMAKE_CURRENT_SOURCE_DIR}/polly)
+  endif (LLVM_BUILD_POLLY)
+endif( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/polly/CMakeLists.txt )
+
+if( NOT WIN32 OR MSYS OR CYGWIN )
+  # It is useful to build llvm-config before the other tools, so we
+  # have a fresh LibDeps.txt for regenerating the hard-coded library
+  # dependencies. llvm-config/CMakeLists.txt takes care of this but we
+  # must keep llvm-config as the first entry on the list of tools to
+  # be built.
+  add_subdirectory(llvm-config)
+endif()
+
+add_subdirectory(opt)
+add_subdirectory(llvm-as)
+add_subdirectory(llvm-dis)
+add_subdirectory(llvm-mc)
+
+add_subdirectory(llc)
+add_subdirectory(llvm-ranlib)
+add_subdirectory(llvm-ar)
+add_subdirectory(llvm-nm)
+
+add_subdirectory(llvm-ld)
+add_subdirectory(llvm-prof)
+add_subdirectory(llvm-link)
+add_subdirectory(lli)
+
+add_subdirectory(llvm-extract)
+add_subdirectory(llvm-diff)
+add_subdirectory(macho-dump)
+add_subdirectory(llvm-objdump)
+
+add_subdirectory(bugpoint)
+add_subdirectory(bugpoint-passes)
+add_subdirectory(llvm-bcanalyzer)
+add_subdirectory(llvm-stub)
+add_subdirectory(edis)
+add_subdirectory(llvmc)
+
+if( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/clang/CMakeLists.txt )
+  add_subdirectory( ${CMAKE_CURRENT_SOURCE_DIR}/clang )
+endif( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/clang/CMakeLists.txt )
+
+set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} PARENT_SCOPE)
diff --git a/final/tools/Makefile b/final/tools/Makefile
new file mode 100644
index 00000000000..731024763b3
--- /dev/null
+++ b/final/tools/Makefile
@@ -0,0 +1,76 @@
+##===- tools/Makefile --------------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL := ..
+
+# Build clang if present.
+OPTIONAL_PARALLEL_DIRS := clang
+
+# Build LLDB if present. Note LLDB must be built last as it depends on the
+# wider LLVM infrastructure (including Clang). 
+OPTIONAL_DIRS := lldb
+
+# NOTE: The tools are organized into five groups of four consisting of one
+# large and three small executables. This is done to minimize memory load
+# in parallel builds.  Please retain this ordering.
+DIRS := llvm-config 
+PARALLEL_DIRS := opt llvm-as llvm-dis \
+                 llc llvm-ranlib llvm-ar llvm-nm \
+                 llvm-ld llvm-prof llvm-link \
+                 lli llvm-extract llvm-mc \
+                 bugpoint llvm-bcanalyzer llvm-stub \
+                 llvmc llvm-diff macho-dump llvm-objdump
+
+# Let users override the set of tools to build from the command line.
+ifdef ONLY_TOOLS
+  OPTIONAL_PARALLEL_DIRS :=
+  OPTIONAL_DIRS := $(findstring lldb,$(ONLY_TOOLS))
+  PARALLEL_DIRS := $(filter-out lldb,$(ONLY_TOOLS))
+endif
+
+include $(LEVEL)/Makefile.config
+
+
+# These libraries build as dynamic libraries (.dylib /.so), they can only be
+# built if ENABLE_PIC is set.
+ifndef ONLY_TOOLS
+ifeq ($(ENABLE_PIC),1)
+  # gold only builds if binutils is around.  It requires "lto" to build before
+  # it so it is added to DIRS.
+  ifdef BINUTILS_INCDIR
+    DIRS += lto gold
+  else
+    PARALLEL_DIRS += lto
+  endif
+
+  PARALLEL_DIRS += bugpoint-passes
+
+  # The edis library is only supported if ARM and/or X86 are enabled, and if
+  # LLVM is being built PIC on platforms that support dylibs.
+  ifneq ($(DISABLE_EDIS),1)
+    ifneq ($(filter $(TARGETS_TO_BUILD), X86 ARM),)
+      PARALLEL_DIRS += edis
+    endif
+  endif
+endif
+
+ifdef LLVM_HAS_POLLY
+  PARALLEL_DIRS += polly
+endif
+endif
+
+# On Win32, loadable modules can be built with ENABLE_SHARED.
+ifneq ($(ENABLE_SHARED),1)
+  ifneq (,$(filter $(HOST_OS), Cygwin MingW))
+    PARALLEL_DIRS := $(filter-out bugpoint-passes, \
+                        $(PARALLEL_DIRS))
+  endif
+endif
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/bugpoint-passes/CMakeLists.txt b/final/tools/bugpoint-passes/CMakeLists.txt
new file mode 100644
index 00000000000..b2f1bb5d135
--- /dev/null
+++ b/final/tools/bugpoint-passes/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_loadable_module( BugpointPasses
+  TestPasses.cpp
+  )
+
+add_dependencies(BugpointPasses bugpoint)
diff --git a/final/tools/bugpoint-passes/Makefile b/final/tools/bugpoint-passes/Makefile
new file mode 100644
index 00000000000..b4ad3e4ad3b
--- /dev/null
+++ b/final/tools/bugpoint-passes/Makefile
@@ -0,0 +1,23 @@
+##===- tools/bugpoint-passes/Makefile -- -------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = BugpointPasses
+LOADABLE_MODULE = 1
+USEDLIBS =
+
+# If we don't need RTTI or EH, there's no reason to export anything
+# from this plugin.
+ifneq ($(REQUIRES_RTTI), 1)
+ifneq ($(REQUIRES_EH), 1)
+EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/bugpoint.exports
+endif
+endif
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/bugpoint-passes/TestPasses.cpp b/final/tools/bugpoint-passes/TestPasses.cpp
new file mode 100644
index 00000000000..1535b038856
--- /dev/null
+++ b/final/tools/bugpoint-passes/TestPasses.cpp
@@ -0,0 +1,75 @@
+//===- TestPasses.cpp - "buggy" passes used to test bugpoint --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains "buggy" passes that are used to test bugpoint, to check
+// that it is narrowing down testcases correctly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/BasicBlock.h"
+#include "llvm/Constant.h"
+#include "llvm/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Type.h"
+#include "llvm/Support/InstVisitor.h"
+
+using namespace llvm;
+
+namespace {
+  /// CrashOnCalls - This pass is used to test bugpoint.  It intentionally
+  /// crashes on any call instructions.
+  class CrashOnCalls : public BasicBlockPass {
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    CrashOnCalls() : BasicBlockPass(ID) {}
+  private:
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+
+    bool runOnBasicBlock(BasicBlock &BB) {
+      for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
+        if (isa<CallInst>(*I))
+          abort();
+
+      return false;
+    }
+  };
+}
+
+char CrashOnCalls::ID = 0;
+static RegisterPass<CrashOnCalls>
+  X("bugpoint-crashcalls",
+    "BugPoint Test Pass - Intentionally crash on CallInsts");
+
+namespace {
+  /// DeleteCalls - This pass is used to test bugpoint.  It intentionally
+  /// deletes some call instructions, "misoptimizing" the program.
+  class DeleteCalls : public BasicBlockPass {
+  public:
+    static char ID; // Pass ID, replacement for typeid
+    DeleteCalls() : BasicBlockPass(ID) {}
+  private:
+    bool runOnBasicBlock(BasicBlock &BB) {
+      for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
+        if (CallInst *CI = dyn_cast<CallInst>(I)) {
+          if (!CI->use_empty())
+            CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+          CI->getParent()->getInstList().erase(CI);
+          break;
+        }
+      return false;
+    }
+  };
+}
+ 
+char DeleteCalls::ID = 0;
+static RegisterPass<DeleteCalls>
+  Y("bugpoint-deletecalls",
+    "BugPoint Test Pass - Intentionally 'misoptimize' CallInsts");
diff --git a/final/tools/bugpoint-passes/bugpoint.exports b/final/tools/bugpoint-passes/bugpoint.exports
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/final/tools/bugpoint/BugDriver.cpp b/final/tools/bugpoint/BugDriver.cpp
new file mode 100644
index 00000000000..1cbf6328b36
--- /dev/null
+++ b/final/tools/bugpoint/BugDriver.cpp
@@ -0,0 +1,246 @@
+//===- BugDriver.cpp - Top-Level BugPoint class implementation ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class contains all of the shared state and information that is used by
+// the BugPoint tool to track down errors in optimizations.  This class is the
+// main driver class that invokes all sub-functionality.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BugDriver.h"
+#include "ToolRunner.h"
+#include "llvm/Linker.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/IRReader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Host.h"
+#include <memory>
+using namespace llvm;
+
+namespace llvm {
+  Triple TargetTriple;
+}
+
+// Anonymous namespace to define command line options for debugging.
+//
+namespace {
+  // Output - The user can specify a file containing the expected output of the
+  // program.  If this filename is set, it is used as the reference diff source,
+  // otherwise the raw input run through an interpreter is used as the reference
+  // source.
+  //
+  cl::opt<std::string>
+  OutputFile("output", cl::desc("Specify a reference program output "
+                                "(for miscompilation detection)"));
+}
+
+/// setNewProgram - If we reduce or update the program somehow, call this method
+/// to update bugdriver with it.  This deletes the old module and sets the
+/// specified one as the current program.
+void BugDriver::setNewProgram(Module *M) {
+  delete Program;
+  Program = M;
+}
+
+
+/// getPassesString - Turn a list of passes into a string which indicates the
+/// command line options that must be passed to add the passes.
+///
+std::string llvm::getPassesString(const std::vector<std::string> &Passes) {
+  std::string Result;
+  for (unsigned i = 0, e = Passes.size(); i != e; ++i) {
+    if (i) Result += " ";
+    Result += "-";
+    Result += Passes[i];
+  }
+  return Result;
+}
+
+BugDriver::BugDriver(const char *toolname, bool find_bugs,
+                     unsigned timeout, unsigned memlimit, bool use_valgrind,
+                     LLVMContext& ctxt)
+  : Context(ctxt), ToolName(toolname), ReferenceOutputFile(OutputFile),
+    Program(0), Interpreter(0), SafeInterpreter(0), gcc(0),
+    run_find_bugs(find_bugs), Timeout(timeout), 
+    MemoryLimit(memlimit), UseValgrind(use_valgrind) {}
+
+BugDriver::~BugDriver() {
+  delete Program;
+}
+
+
+/// ParseInputFile - Given a bitcode or assembly input filename, parse and
+/// return it, or return null if not possible.
+///
+Module *llvm::ParseInputFile(const std::string &Filename,
+                             LLVMContext& Ctxt) {
+  SMDiagnostic Err;
+  Module *Result = ParseIRFile(Filename, Err, Ctxt);
+  if (!Result)
+    Err.Print("bugpoint", errs());
+
+  // If we don't have an override triple, use the first one to configure
+  // bugpoint, or use the host triple if none provided.
+  if (Result) {
+    if (TargetTriple.getTriple().empty()) {
+      Triple TheTriple(Result->getTargetTriple());
+
+      if (TheTriple.getTriple().empty())
+        TheTriple.setTriple(sys::getHostTriple());
+        
+      TargetTriple.setTriple(TheTriple.getTriple());
+    }
+
+    Result->setTargetTriple(TargetTriple.getTriple());  // override the triple
+  }
+  return Result;
+}
+
+// This method takes the specified list of LLVM input files, attempts to load
+// them, either as assembly or bitcode, then link them together. It returns
+// true on failure (if, for example, an input bitcode file could not be
+// parsed), and false on success.
+//
+bool BugDriver::addSources(const std::vector<std::string> &Filenames) {
+  assert(Program == 0 && "Cannot call addSources multiple times!");
+  assert(!Filenames.empty() && "Must specify at least on input filename!");
+
+  // Load the first input file.
+  Program = ParseInputFile(Filenames[0], Context);
+  if (Program == 0) return true;
+    
+  outs() << "Read input file      : '" << Filenames[0] << "'\n";
+
+  for (unsigned i = 1, e = Filenames.size(); i != e; ++i) {
+    std::auto_ptr<Module> M(ParseInputFile(Filenames[i], Context));
+    if (M.get() == 0) return true;
+
+    outs() << "Linking in input file: '" << Filenames[i] << "'\n";
+    std::string ErrorMessage;
+    if (Linker::LinkModules(Program, M.get(), &ErrorMessage)) {
+      errs() << ToolName << ": error linking in '" << Filenames[i] << "': "
+             << ErrorMessage << '\n';
+      return true;
+    }
+  }
+
+  outs() << "*** All input ok\n";
+
+  // All input files read successfully!
+  return false;
+}
+
+
+
+/// run - The top level method that is invoked after all of the instance
+/// variables are set up from command line arguments.
+///
+bool BugDriver::run(std::string &ErrMsg) {
+  if (run_find_bugs) {
+    // Rearrange the passes and apply them to the program. Repeat this process
+    // until the user kills the program or we find a bug.
+    return runManyPasses(PassesToRun, ErrMsg);
+  }
+
+  // If we're not running as a child, the first thing that we must do is 
+  // determine what the problem is. Does the optimization series crash the 
+  // compiler, or does it produce illegal code?  We make the top-level 
+  // decision by trying to run all of the passes on the the input program, 
+  // which should generate a bitcode file.  If it does generate a bitcode 
+  // file, then we know the compiler didn't crash, so try to diagnose a 
+  // miscompilation.
+  if (!PassesToRun.empty()) {
+    outs() << "Running selected passes on program to test for crash: ";
+    if (runPasses(Program, PassesToRun))
+      return debugOptimizerCrash();
+  }
+
+  // Set up the execution environment, selecting a method to run LLVM bitcode.
+  if (initializeExecutionEnvironment()) return true;
+
+  // Test to see if we have a code generator crash.
+  outs() << "Running the code generator to test for a crash: ";
+  std::string Error;
+  compileProgram(Program, &Error);
+  if (!Error.empty()) {
+    outs() << Error;
+    return debugCodeGeneratorCrash(ErrMsg);
+  }
+  outs() << '\n';
+
+  // Run the raw input to see where we are coming from.  If a reference output
+  // was specified, make sure that the raw output matches it.  If not, it's a
+  // problem in the front-end or the code generator.
+  //
+  bool CreatedOutput = false;
+  if (ReferenceOutputFile.empty()) {
+    outs() << "Generating reference output from raw program: ";
+    if (!createReferenceFile(Program)) {
+      return debugCodeGeneratorCrash(ErrMsg);
+    }
+    CreatedOutput = true;
+  }
+
+  // Make sure the reference output file gets deleted on exit from this
+  // function, if appropriate.
+  sys::Path ROF(ReferenceOutputFile);
+  FileRemover RemoverInstance(ROF, CreatedOutput && !SaveTemps);
+
+  // Diff the output of the raw program against the reference output.  If it
+  // matches, then we assume there is a miscompilation bug and try to 
+  // diagnose it.
+  outs() << "*** Checking the code generator...\n";
+  bool Diff = diffProgram(Program, "", "", false, &Error);
+  if (!Error.empty()) {
+    errs() << Error;
+    return debugCodeGeneratorCrash(ErrMsg);
+  }
+  if (!Diff) {
+    outs() << "\n*** Output matches: Debugging miscompilation!\n";
+    debugMiscompilation(&Error);
+    if (!Error.empty()) {
+      errs() << Error;
+      return debugCodeGeneratorCrash(ErrMsg);
+    }
+    return false;
+  }
+
+  outs() << "\n*** Input program does not match reference diff!\n";
+  outs() << "Debugging code generator problem!\n";
+  bool Failure = debugCodeGenerator(&Error);
+  if (!Error.empty()) {
+    errs() << Error;
+    return debugCodeGeneratorCrash(ErrMsg);
+  }
+  return Failure;
+}
+
+void llvm::PrintFunctionList(const std::vector<Function*> &Funcs) {
+  unsigned NumPrint = Funcs.size();
+  if (NumPrint > 10) NumPrint = 10;
+  for (unsigned i = 0; i != NumPrint; ++i)
+    outs() << " " << Funcs[i]->getName();
+  if (NumPrint < Funcs.size())
+    outs() << "... <" << Funcs.size() << " total>";
+  outs().flush();
+}
+
+void llvm::PrintGlobalVariableList(const std::vector<GlobalVariable*> &GVs) {
+  unsigned NumPrint = GVs.size();
+  if (NumPrint > 10) NumPrint = 10;
+  for (unsigned i = 0; i != NumPrint; ++i)
+    outs() << " " << GVs[i]->getName();
+  if (NumPrint < GVs.size())
+    outs() << "... <" << GVs.size() << " total>";
+  outs().flush();
+}
diff --git a/final/tools/bugpoint/BugDriver.h b/final/tools/bugpoint/BugDriver.h
new file mode 100644
index 00000000000..cc78489e3d9
--- /dev/null
+++ b/final/tools/bugpoint/BugDriver.h
@@ -0,0 +1,330 @@
+//===- BugDriver.h - Top-Level BugPoint class -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class contains all of the shared state and information that is used by
+// the BugPoint tool to track down errors in optimizations.  This class is the
+// main driver class that invokes all sub-functionality.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BUGDRIVER_H
+#define BUGDRIVER_H
+
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <vector>
+#include <string>
+
+namespace llvm {
+
+class Value;
+class PassInfo;
+class Module;
+class GlobalVariable;
+class Function;
+class BasicBlock;
+class AbstractInterpreter;
+class Instruction;
+class LLVMContext;
+
+class DebugCrashes;
+
+class GCC;
+
+extern bool DisableSimplifyCFG;
+
+/// BugpointIsInterrupted - Set to true when the user presses ctrl-c.
+///
+extern bool BugpointIsInterrupted;
+
+class BugDriver {
+  LLVMContext& Context;
+  const char *ToolName;            // argv[0] of bugpoint
+  std::string ReferenceOutputFile; // Name of `good' output file
+  Module *Program;             // The raw program, linked together
+  std::vector<std::string> PassesToRun;
+  AbstractInterpreter *Interpreter;   // How to run the program
+  AbstractInterpreter *SafeInterpreter;  // To generate reference output, etc.
+  GCC *gcc;
+  bool run_find_bugs;
+  unsigned Timeout;
+  unsigned MemoryLimit;
+  bool UseValgrind;
+
+  // FIXME: sort out public/private distinctions...
+  friend class ReducePassList;
+  friend class ReduceMisCodegenFunctions;
+
+public:
+  BugDriver(const char *toolname, bool find_bugs,
+            unsigned timeout, unsigned memlimit, bool use_valgrind,
+            LLVMContext& ctxt);
+  ~BugDriver();
+
+  const char *getToolName() const { return ToolName; }
+
+  LLVMContext& getContext() const { return Context; }
+
+  // Set up methods... these methods are used to copy information about the
+  // command line arguments into instance variables of BugDriver.
+  //
+  bool addSources(const std::vector<std::string> &FileNames);
+  void addPass(std::string p) { PassesToRun.push_back(p); }
+  void setPassesToRun(const std::vector<std::string> &PTR) {
+    PassesToRun = PTR;
+  }
+  const std::vector<std::string> &getPassesToRun() const {
+    return PassesToRun;
+  }
+
+  /// run - The top level method that is invoked after all of the instance
+  /// variables are set up from command line arguments. The \p as_child argument
+  /// indicates whether the driver is to run in parent mode or child mode.
+  ///
+  bool run(std::string &ErrMsg);
+
+  /// debugOptimizerCrash - This method is called when some optimizer pass
+  /// crashes on input.  It attempts to prune down the testcase to something
+  /// reasonable, and figure out exactly which pass is crashing.
+  ///
+  bool debugOptimizerCrash(const std::string &ID = "passes");
+
+  /// debugCodeGeneratorCrash - This method is called when the code generator
+  /// crashes on an input.  It attempts to reduce the input as much as possible
+  /// while still causing the code generator to crash.
+  bool debugCodeGeneratorCrash(std::string &Error);
+
+  /// debugMiscompilation - This method is used when the passes selected are not
+  /// crashing, but the generated output is semantically different from the
+  /// input.
+  void debugMiscompilation(std::string *Error);
+
+  /// debugPassMiscompilation - This method is called when the specified pass
+  /// miscompiles Program as input.  It tries to reduce the testcase to
+  /// something that smaller that still miscompiles the program.
+  /// ReferenceOutput contains the filename of the file containing the output we
+  /// are to match.
+  ///
+  bool debugPassMiscompilation(const PassInfo *ThePass,
+                               const std::string &ReferenceOutput);
+
+  /// compileSharedObject - This method creates a SharedObject from a given
+  /// BitcodeFile for debugging a code generator.
+  ///
+  std::string compileSharedObject(const std::string &BitcodeFile,
+                                  std::string &Error);
+
+  /// debugCodeGenerator - This method narrows down a module to a function or
+  /// set of functions, using the CBE as a ``safe'' code generator for other
+  /// functions that are not under consideration.
+  bool debugCodeGenerator(std::string *Error);
+
+  /// isExecutingJIT - Returns true if bugpoint is currently testing the JIT
+  ///
+  bool isExecutingJIT();
+
+  /// runPasses - Run all of the passes in the "PassesToRun" list, discard the
+  /// output, and return true if any of the passes crashed.
+  bool runPasses(Module *M) const {
+    return runPasses(M, PassesToRun);
+  }
+
+  Module *getProgram() const { return Program; }
+
+  /// swapProgramIn - Set the current module to the specified module, returning
+  /// the old one.
+  Module *swapProgramIn(Module *M) {
+    Module *OldProgram = Program;
+    Program = M;
+    return OldProgram;
+  }
+
+  AbstractInterpreter *switchToSafeInterpreter() {
+    AbstractInterpreter *Old = Interpreter;
+    Interpreter = (AbstractInterpreter*)SafeInterpreter;
+    return Old;
+  }
+
+  void switchToInterpreter(AbstractInterpreter *AI) {
+    Interpreter = AI;
+  }
+
+  /// setNewProgram - If we reduce or update the program somehow, call this
+  /// method to update bugdriver with it.  This deletes the old module and sets
+  /// the specified one as the current program.
+  void setNewProgram(Module *M);
+
+  /// compileProgram - Try to compile the specified module, returning false and
+  /// setting Error if an error occurs.  This is used for code generation
+  /// crash testing.
+  ///
+  void compileProgram(Module *M, std::string *Error) const;
+
+  /// executeProgram - This method runs "Program", capturing the output of the
+  /// program to a file.  A recommended filename may be optionally specified.
+  ///
+  std::string executeProgram(const Module *Program,
+                             std::string OutputFilename,
+                             std::string Bitcode,
+                             const std::string &SharedObjects,
+                             AbstractInterpreter *AI,
+                             std::string *Error) const;
+
+  /// executeProgramSafely - Used to create reference output with the "safe"
+  /// backend, if reference output is not provided.  If there is a problem with
+  /// the code generator (e.g., llc crashes), this will return false and set
+  /// Error.
+  ///
+  std::string executeProgramSafely(const Module *Program,
+                                   std::string OutputFile,
+                                   std::string *Error) const;
+
+  /// createReferenceFile - calls compileProgram and then records the output
+  /// into ReferenceOutputFile. Returns true if reference file created, false 
+  /// otherwise. Note: initializeExecutionEnvironment should be called BEFORE
+  /// this function.
+  ///
+  bool createReferenceFile(Module *M, const std::string &Filename
+                                            = "bugpoint.reference.out");
+
+  /// diffProgram - This method executes the specified module and diffs the
+  /// output against the file specified by ReferenceOutputFile.  If the output
+  /// is different, 1 is returned.  If there is a problem with the code
+  /// generator (e.g., llc crashes), this will return -1 and set Error.
+  ///
+  bool diffProgram(const Module *Program,
+                   const std::string &BitcodeFile = "",
+                   const std::string &SharedObj = "",
+                   bool RemoveBitcode = false,
+                   std::string *Error = 0) const;
+
+  /// EmitProgressBitcode - This function is used to output M to a file named
+  /// "bugpoint-ID.bc".
+  ///
+  void EmitProgressBitcode(const Module *M, const std::string &ID,
+                           bool NoFlyer = false) const;
+
+  /// deleteInstructionFromProgram - This method clones the current Program and
+  /// deletes the specified instruction from the cloned module.  It then runs a
+  /// series of cleanup passes (ADCE and SimplifyCFG) to eliminate any code
+  /// which depends on the value.  The modified module is then returned.
+  ///
+  Module *deleteInstructionFromProgram(const Instruction *I, unsigned Simp);
+
+  /// performFinalCleanups - This method clones the current Program and performs
+  /// a series of cleanups intended to get rid of extra cruft on the module.  If
+  /// the MayModifySemantics argument is true, then the cleanups is allowed to
+  /// modify how the code behaves.
+  ///
+  Module *performFinalCleanups(Module *M, bool MayModifySemantics = false);
+
+  /// ExtractLoop - Given a module, extract up to one loop from it into a new
+  /// function.  This returns null if there are no extractable loops in the
+  /// program or if the loop extractor crashes.
+  Module *ExtractLoop(Module *M);
+
+  /// ExtractMappedBlocksFromModule - Extract all but the specified basic blocks
+  /// into their own functions.  The only detail is that M is actually a module
+  /// cloned from the one the BBs are in, so some mapping needs to be performed.
+  /// If this operation fails for some reason (ie the implementation is buggy),
+  /// this function should return null, otherwise it returns a new Module.
+  Module *ExtractMappedBlocksFromModule(const std::vector<BasicBlock*> &BBs,
+                                        Module *M);
+
+  /// runPassesOn - Carefully run the specified set of pass on the specified
+  /// module, returning the transformed module on success, or a null pointer on
+  /// failure.  If AutoDebugCrashes is set to true, then bugpoint will
+  /// automatically attempt to track down a crashing pass if one exists, and
+  /// this method will never return null.
+  Module *runPassesOn(Module *M, const std::vector<std::string> &Passes,
+                      bool AutoDebugCrashes = false, unsigned NumExtraArgs = 0,
+                      const char * const *ExtraArgs = NULL);
+
+  /// runPasses - Run the specified passes on Program, outputting a bitcode
+  /// file and writting the filename into OutputFile if successful.  If the
+  /// optimizations fail for some reason (optimizer crashes), return true,
+  /// otherwise return false.  If DeleteOutput is set to true, the bitcode is
+  /// deleted on success, and the filename string is undefined.  This prints to
+  /// outs() a single line message indicating whether compilation was successful
+  /// or failed, unless Quiet is set.  ExtraArgs specifies additional arguments
+  /// to pass to the child bugpoint instance.
+  ///
+  bool runPasses(Module *Program,
+                 const std::vector<std::string> &PassesToRun,
+                 std::string &OutputFilename, bool DeleteOutput = false,
+                 bool Quiet = false, unsigned NumExtraArgs = 0,
+                 const char * const *ExtraArgs = NULL) const;
+                 
+  /// runManyPasses - Take the specified pass list and create different 
+  /// combinations of passes to compile the program with. Compile the program with
+  /// each set and mark test to see if it compiled correctly. If the passes 
+  /// compiled correctly output nothing and rearrange the passes into a new order.
+  /// If the passes did not compile correctly, output the command required to 
+  /// recreate the failure. This returns true if a compiler error is found.
+  ///
+  bool runManyPasses(const std::vector<std::string> &AllPasses,
+                     std::string &ErrMsg);
+
+  /// writeProgramToFile - This writes the current "Program" to the named
+  /// bitcode file.  If an error occurs, true is returned.
+  ///
+  bool writeProgramToFile(const std::string &Filename, const Module *M) const;
+
+private:
+  /// runPasses - Just like the method above, but this just returns true or
+  /// false indicating whether or not the optimizer crashed on the specified
+  /// input (true = crashed).
+  ///
+  bool runPasses(Module *M,
+                 const std::vector<std::string> &PassesToRun,
+                 bool DeleteOutput = true) const {
+    std::string Filename;
+    return runPasses(M, PassesToRun, Filename, DeleteOutput);
+  }
+
+  /// initializeExecutionEnvironment - This method is used to set up the
+  /// environment for executing LLVM programs.
+  ///
+  bool initializeExecutionEnvironment();
+};
+
+/// ParseInputFile - Given a bitcode or assembly input filename, parse and
+/// return it, or return null if not possible.
+///
+Module *ParseInputFile(const std::string &InputFilename,
+                       LLVMContext& ctxt);
+
+
+/// getPassesString - Turn a list of passes into a string which indicates the
+/// command line options that must be passed to add the passes.
+///
+std::string getPassesString(const std::vector<std::string> &Passes);
+
+/// PrintFunctionList - prints out list of problematic functions
+///
+void PrintFunctionList(const std::vector<Function*> &Funcs);
+
+/// PrintGlobalVariableList - prints out list of problematic global variables
+///
+void PrintGlobalVariableList(const std::vector<GlobalVariable*> &GVs);
+
+// DeleteFunctionBody - "Remove" the function by deleting all of it's basic
+// blocks, making it external.
+//
+void DeleteFunctionBody(Function *F);
+
+/// SplitFunctionsOutOfModule - Given a module and a list of functions in the
+/// module, split the functions OUT of the specified module, and place them in
+/// the new module.
+Module *SplitFunctionsOutOfModule(Module *M, const std::vector<Function*> &F,
+                                  ValueToValueMapTy &VMap);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/tools/bugpoint/CMakeLists.txt b/final/tools/bugpoint/CMakeLists.txt
new file mode 100644
index 00000000000..e06feb10031
--- /dev/null
+++ b/final/tools/bugpoint/CMakeLists.txt
@@ -0,0 +1,14 @@
+set(LLVM_LINK_COMPONENTS asmparser instrumentation scalaropts ipo
+  linker bitreader bitwriter)
+
+add_llvm_tool(bugpoint
+  BugDriver.cpp
+  CrashDebugger.cpp
+  ExecutionDriver.cpp
+  ExtractFunction.cpp
+  FindBugs.cpp
+  Miscompilation.cpp
+  OptimizerDriver.cpp
+  ToolRunner.cpp
+  bugpoint.cpp
+  )
diff --git a/final/tools/bugpoint/CrashDebugger.cpp b/final/tools/bugpoint/CrashDebugger.cpp
new file mode 100644
index 00000000000..f19ef6222f5
--- /dev/null
+++ b/final/tools/bugpoint/CrashDebugger.cpp
@@ -0,0 +1,667 @@
+//===- CrashDebugger.cpp - Debug compilation crashes ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the bugpoint internals that narrow down compilation crashes
+//
+//===----------------------------------------------------------------------===//
+
+#include "BugDriver.h"
+#include "ToolRunner.h"
+#include "ListReducer.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/PassManager.h"
+#include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/CommandLine.h"
+#include <set>
+using namespace llvm;
+
+namespace {
+  cl::opt<bool>
+  KeepMain("keep-main",
+           cl::desc("Force function reduction to keep main"),
+           cl::init(false));
+  cl::opt<bool>
+  NoGlobalRM ("disable-global-remove",
+         cl::desc("Do not remove global variables"),
+         cl::init(false));
+}
+
+namespace llvm {
+  class ReducePassList : public ListReducer<std::string> {
+    BugDriver &BD;
+  public:
+    ReducePassList(BugDriver &bd) : BD(bd) {}
+
+    // doTest - Return true iff running the "removed" passes succeeds, and
+    // running the "Kept" passes fail when run on the output of the "removed"
+    // passes.  If we return true, we update the current module of bugpoint.
+    //
+    virtual TestResult doTest(std::vector<std::string> &Removed,
+                              std::vector<std::string> &Kept,
+                              std::string &Error);
+  };
+}
+
+ReducePassList::TestResult
+ReducePassList::doTest(std::vector<std::string> &Prefix,
+                       std::vector<std::string> &Suffix,
+                       std::string &Error) {
+  sys::Path PrefixOutput;
+  Module *OrigProgram = 0;
+  if (!Prefix.empty()) {
+    outs() << "Checking to see if these passes crash: "
+           << getPassesString(Prefix) << ": ";
+    std::string PfxOutput;
+    if (BD.runPasses(BD.getProgram(), Prefix, PfxOutput))
+      return KeepPrefix;
+
+    PrefixOutput.set(PfxOutput);
+    OrigProgram = BD.Program;
+
+    BD.Program = ParseInputFile(PrefixOutput.str(), BD.getContext());
+    if (BD.Program == 0) {
+      errs() << BD.getToolName() << ": Error reading bitcode file '"
+             << PrefixOutput.str() << "'!\n";
+      exit(1);
+    }
+    PrefixOutput.eraseFromDisk();
+  }
+
+  outs() << "Checking to see if these passes crash: "
+         << getPassesString(Suffix) << ": ";
+
+  if (BD.runPasses(BD.getProgram(), Suffix)) {
+    delete OrigProgram;            // The suffix crashes alone...
+    return KeepSuffix;
+  }
+
+  // Nothing failed, restore state...
+  if (OrigProgram) {
+    delete BD.Program;
+    BD.Program = OrigProgram;
+  }
+  return NoFailure;
+}
+
+namespace {
+  /// ReduceCrashingGlobalVariables - This works by removing the global
+  /// variable's initializer and seeing if the program still crashes. If it
+  /// does, then we keep that program and try again.
+  ///
+  class ReduceCrashingGlobalVariables : public ListReducer<GlobalVariable*> {
+    BugDriver &BD;
+    bool (*TestFn)(const BugDriver &, Module *);
+  public:
+    ReduceCrashingGlobalVariables(BugDriver &bd,
+                                  bool (*testFn)(const BugDriver &, Module *))
+      : BD(bd), TestFn(testFn) {}
+
+    virtual TestResult doTest(std::vector<GlobalVariable*> &Prefix,
+                              std::vector<GlobalVariable*> &Kept,
+                              std::string &Error) {
+      if (!Kept.empty() && TestGlobalVariables(Kept))
+        return KeepSuffix;
+      if (!Prefix.empty() && TestGlobalVariables(Prefix))
+        return KeepPrefix;
+      return NoFailure;
+    }
+
+    bool TestGlobalVariables(std::vector<GlobalVariable*> &GVs);
+  };
+}
+
+bool
+ReduceCrashingGlobalVariables::TestGlobalVariables(
+                              std::vector<GlobalVariable*> &GVs) {
+  // Clone the program to try hacking it apart...
+  ValueToValueMapTy VMap;
+  Module *M = CloneModule(BD.getProgram(), VMap);
+
+  // Convert list to set for fast lookup...
+  std::set<GlobalVariable*> GVSet;
+
+  for (unsigned i = 0, e = GVs.size(); i != e; ++i) {
+    GlobalVariable* CMGV = cast<GlobalVariable>(VMap[GVs[i]]);
+    assert(CMGV && "Global Variable not in module?!");
+    GVSet.insert(CMGV);
+  }
+
+  outs() << "Checking for crash with only these global variables: ";
+  PrintGlobalVariableList(GVs);
+  outs() << ": ";
+
+  // Loop over and delete any global variables which we aren't supposed to be
+  // playing with...
+  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I)
+    if (I->hasInitializer() && !GVSet.count(I)) {
+      I->setInitializer(0);
+      I->setLinkage(GlobalValue::ExternalLinkage);
+    }
+
+  // Try running the hacked up program...
+  if (TestFn(BD, M)) {
+    BD.setNewProgram(M);        // It crashed, keep the trimmed version...
+
+    // Make sure to use global variable pointers that point into the now-current
+    // module.
+    GVs.assign(GVSet.begin(), GVSet.end());
+    return true;
+  }
+
+  delete M;
+  return false;
+}
+
+namespace llvm {
+  /// ReduceCrashingFunctions reducer - This works by removing functions and
+  /// seeing if the program still crashes. If it does, then keep the newer,
+  /// smaller program.
+  ///
+  class ReduceCrashingFunctions : public ListReducer<Function*> {
+    BugDriver &BD;
+    bool (*TestFn)(const BugDriver &, Module *);
+  public:
+    ReduceCrashingFunctions(BugDriver &bd,
+                            bool (*testFn)(const BugDriver &, Module *))
+      : BD(bd), TestFn(testFn) {}
+
+    virtual TestResult doTest(std::vector<Function*> &Prefix,
+                              std::vector<Function*> &Kept,
+                              std::string &Error) {
+      if (!Kept.empty() && TestFuncs(Kept))
+        return KeepSuffix;
+      if (!Prefix.empty() && TestFuncs(Prefix))
+        return KeepPrefix;
+      return NoFailure;
+    }
+
+    bool TestFuncs(std::vector<Function*> &Prefix);
+  };
+}
+
+bool ReduceCrashingFunctions::TestFuncs(std::vector<Function*> &Funcs) {
+
+  //if main isn't present, claim there is no problem
+  if (KeepMain && find(Funcs.begin(), Funcs.end(),
+                       BD.getProgram()->getFunction("main")) == Funcs.end())
+    return false;
+
+  // Clone the program to try hacking it apart...
+  ValueToValueMapTy VMap;
+  Module *M = CloneModule(BD.getProgram(), VMap);
+
+  // Convert list to set for fast lookup...
+  std::set<Function*> Functions;
+  for (unsigned i = 0, e = Funcs.size(); i != e; ++i) {
+    Function *CMF = cast<Function>(VMap[Funcs[i]]);
+    assert(CMF && "Function not in module?!");
+    assert(CMF->getFunctionType() == Funcs[i]->getFunctionType() && "wrong ty");
+    assert(CMF->getName() == Funcs[i]->getName() && "wrong name");
+    Functions.insert(CMF);
+  }
+
+  outs() << "Checking for crash with only these functions: ";
+  PrintFunctionList(Funcs);
+  outs() << ": ";
+
+  // Loop over and delete any functions which we aren't supposed to be playing
+  // with...
+  for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
+    if (!I->isDeclaration() && !Functions.count(I))
+      DeleteFunctionBody(I);
+
+  // Try running the hacked up program...
+  if (TestFn(BD, M)) {
+    BD.setNewProgram(M);        // It crashed, keep the trimmed version...
+
+    // Make sure to use function pointers that point into the now-current
+    // module.
+    Funcs.assign(Functions.begin(), Functions.end());
+    return true;
+  }
+  delete M;
+  return false;
+}
+
+
+namespace {
+  /// ReduceCrashingBlocks reducer - This works by setting the terminators of
+  /// all terminators except the specified basic blocks to a 'ret' instruction,
+  /// then running the simplify-cfg pass.  This has the effect of chopping up
+  /// the CFG really fast which can reduce large functions quickly.
+  ///
+  class ReduceCrashingBlocks : public ListReducer<const BasicBlock*> {
+    BugDriver &BD;
+    bool (*TestFn)(const BugDriver &, Module *);
+  public:
+    ReduceCrashingBlocks(BugDriver &bd,
+                         bool (*testFn)(const BugDriver &, Module *))
+      : BD(bd), TestFn(testFn) {}
+
+    virtual TestResult doTest(std::vector<const BasicBlock*> &Prefix,
+                              std::vector<const BasicBlock*> &Kept,
+                              std::string &Error) {
+      if (!Kept.empty() && TestBlocks(Kept))
+        return KeepSuffix;
+      if (!Prefix.empty() && TestBlocks(Prefix))
+        return KeepPrefix;
+      return NoFailure;
+    }
+
+    bool TestBlocks(std::vector<const BasicBlock*> &Prefix);
+  };
+}
+
+bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
+  // Clone the program to try hacking it apart...
+  ValueToValueMapTy VMap;
+  Module *M = CloneModule(BD.getProgram(), VMap);
+
+  // Convert list to set for fast lookup...
+  SmallPtrSet<BasicBlock*, 8> Blocks;
+  for (unsigned i = 0, e = BBs.size(); i != e; ++i)
+    Blocks.insert(cast<BasicBlock>(VMap[BBs[i]]));
+
+  outs() << "Checking for crash with only these blocks:";
+  unsigned NumPrint = Blocks.size();
+  if (NumPrint > 10) NumPrint = 10;
+  for (unsigned i = 0, e = NumPrint; i != e; ++i)
+    outs() << " " << BBs[i]->getName();
+  if (NumPrint < Blocks.size())
+    outs() << "... <" << Blocks.size() << " total>";
+  outs() << ": ";
+
+  // Loop over and delete any hack up any blocks that are not listed...
+  for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
+    for (Function::iterator BB = I->begin(), E = I->end(); BB != E; ++BB)
+      if (!Blocks.count(BB) && BB->getTerminator()->getNumSuccessors()) {
+        // Loop over all of the successors of this block, deleting any PHI nodes
+        // that might include it.
+        for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
+          (*SI)->removePredecessor(BB);
+
+        TerminatorInst *BBTerm = BB->getTerminator();
+        
+        if (!BB->getTerminator()->getType()->isVoidTy())
+          BBTerm->replaceAllUsesWith(Constant::getNullValue(BBTerm->getType()));
+
+        // Replace the old terminator instruction.
+        BB->getInstList().pop_back();
+        new UnreachableInst(BB->getContext(), BB);
+      }
+
+  // The CFG Simplifier pass may delete one of the basic blocks we are
+  // interested in.  If it does we need to take the block out of the list.  Make
+  // a "persistent mapping" by turning basic blocks into <function, name> pairs.
+  // This won't work well if blocks are unnamed, but that is just the risk we
+  // have to take.
+  std::vector<std::pair<std::string, std::string> > BlockInfo;
+
+  for (SmallPtrSet<BasicBlock*, 8>::iterator I = Blocks.begin(),
+         E = Blocks.end(); I != E; ++I)
+    BlockInfo.push_back(std::make_pair((*I)->getParent()->getName(),
+                                       (*I)->getName()));
+
+  // Now run the CFG simplify pass on the function...
+  std::vector<std::string> Passes;
+  Passes.push_back("simplifycfg");
+  Passes.push_back("verify");
+  Module *New = BD.runPassesOn(M, Passes);
+  delete M;
+  if (!New) {
+    errs() << "simplifycfg failed!\n";
+    exit(1);
+  }
+  M = New;
+
+  // Try running on the hacked up program...
+  if (TestFn(BD, M)) {
+    BD.setNewProgram(M);      // It crashed, keep the trimmed version...
+
+    // Make sure to use basic block pointers that point into the now-current
+    // module, and that they don't include any deleted blocks.
+    BBs.clear();
+    const ValueSymbolTable &GST = M->getValueSymbolTable();
+    for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
+      Function *F = cast<Function>(GST.lookup(BlockInfo[i].first));
+      ValueSymbolTable &ST = F->getValueSymbolTable();
+      Value* V = ST.lookup(BlockInfo[i].second);
+      if (V && V->getType() == Type::getLabelTy(V->getContext()))
+        BBs.push_back(cast<BasicBlock>(V));
+    }
+    return true;
+  }
+  delete M;  // It didn't crash, try something else.
+  return false;
+}
+
+namespace {
+  /// ReduceCrashingInstructions reducer - This works by removing the specified
+  /// non-terminator instructions and replacing them with undef.
+  ///
+  class ReduceCrashingInstructions : public ListReducer<const Instruction*> {
+    BugDriver &BD;
+    bool (*TestFn)(const BugDriver &, Module *);
+  public:
+    ReduceCrashingInstructions(BugDriver &bd,
+                               bool (*testFn)(const BugDriver &, Module *))
+      : BD(bd), TestFn(testFn) {}
+
+    virtual TestResult doTest(std::vector<const Instruction*> &Prefix,
+                              std::vector<const Instruction*> &Kept,
+                              std::string &Error) {
+      if (!Kept.empty() && TestInsts(Kept))
+        return KeepSuffix;
+      if (!Prefix.empty() && TestInsts(Prefix))
+        return KeepPrefix;
+      return NoFailure;
+    }
+
+    bool TestInsts(std::vector<const Instruction*> &Prefix);
+  };
+}
+
+bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
+                                           &Insts) {
+  // Clone the program to try hacking it apart...
+  ValueToValueMapTy VMap;
+  Module *M = CloneModule(BD.getProgram(), VMap);
+
+  // Convert list to set for fast lookup...
+  SmallPtrSet<Instruction*, 64> Instructions;
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
+    assert(!isa<TerminatorInst>(Insts[i]));
+    Instructions.insert(cast<Instruction>(VMap[Insts[i]]));
+  }
+
+  outs() << "Checking for crash with only " << Instructions.size();
+  if (Instructions.size() == 1)
+    outs() << " instruction: ";
+  else
+    outs() << " instructions: ";
+
+  for (Module::iterator MI = M->begin(), ME = M->end(); MI != ME; ++MI)
+    for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI)
+      for (BasicBlock::iterator I = FI->begin(), E = FI->end(); I != E;) {
+        Instruction *Inst = I++;
+        if (!Instructions.count(Inst) && !isa<TerminatorInst>(Inst)) {
+          if (!Inst->getType()->isVoidTy())
+            Inst->replaceAllUsesWith(UndefValue::get(Inst->getType()));
+          Inst->eraseFromParent();
+        }
+      }
+
+  // Verify that this is still valid.
+  PassManager Passes;
+  Passes.add(createVerifierPass());
+  Passes.run(*M);
+
+  // Try running on the hacked up program...
+  if (TestFn(BD, M)) {
+    BD.setNewProgram(M);      // It crashed, keep the trimmed version...
+
+    // Make sure to use instruction pointers that point into the now-current
+    // module, and that they don't include any deleted blocks.
+    Insts.clear();
+    for (SmallPtrSet<Instruction*, 64>::const_iterator I = Instructions.begin(),
+             E = Instructions.end(); I != E; ++I)
+      Insts.push_back(*I);
+    return true;
+  }
+  delete M;  // It didn't crash, try something else.
+  return false;
+}
+
+/// DebugACrash - Given a predicate that determines whether a component crashes
+/// on a program, try to destructively reduce the program while still keeping
+/// the predicate true.
+static bool DebugACrash(BugDriver &BD,
+                        bool (*TestFn)(const BugDriver &, Module *),
+                        std::string &Error) {
+  // See if we can get away with nuking some of the global variable initializers
+  // in the program...
+  if (!NoGlobalRM &&
+      BD.getProgram()->global_begin() != BD.getProgram()->global_end()) {
+    // Now try to reduce the number of global variable initializers in the
+    // module to something small.
+    Module *M = CloneModule(BD.getProgram());
+    bool DeletedInit = false;
+
+    for (Module::global_iterator I = M->global_begin(), E = M->global_end();
+         I != E; ++I)
+      if (I->hasInitializer()) {
+        I->setInitializer(0);
+        I->setLinkage(GlobalValue::ExternalLinkage);
+        DeletedInit = true;
+      }
+
+    if (!DeletedInit) {
+      delete M;  // No change made...
+    } else {
+      // See if the program still causes a crash...
+      outs() << "\nChecking to see if we can delete global inits: ";
+
+      if (TestFn(BD, M)) {      // Still crashes?
+        BD.setNewProgram(M);
+        outs() << "\n*** Able to remove all global initializers!\n";
+      } else {                  // No longer crashes?
+        outs() << "  - Removing all global inits hides problem!\n";
+        delete M;
+
+        std::vector<GlobalVariable*> GVs;
+
+        for (Module::global_iterator I = BD.getProgram()->global_begin(),
+               E = BD.getProgram()->global_end(); I != E; ++I)
+          if (I->hasInitializer())
+            GVs.push_back(I);
+
+        if (GVs.size() > 1 && !BugpointIsInterrupted) {
+          outs() << "\n*** Attempting to reduce the number of global "
+                    << "variables in the testcase\n";
+
+          unsigned OldSize = GVs.size();
+          ReduceCrashingGlobalVariables(BD, TestFn).reduceList(GVs, Error);
+          if (!Error.empty())
+            return true;
+
+          if (GVs.size() < OldSize)
+            BD.EmitProgressBitcode(BD.getProgram(), "reduced-global-variables");
+        }
+      }
+    }
+  }
+
+  // Now try to reduce the number of functions in the module to something small.
+  std::vector<Function*> Functions;
+  for (Module::iterator I = BD.getProgram()->begin(),
+         E = BD.getProgram()->end(); I != E; ++I)
+    if (!I->isDeclaration())
+      Functions.push_back(I);
+
+  if (Functions.size() > 1 && !BugpointIsInterrupted) {
+    outs() << "\n*** Attempting to reduce the number of functions "
+      "in the testcase\n";
+
+    unsigned OldSize = Functions.size();
+    ReduceCrashingFunctions(BD, TestFn).reduceList(Functions, Error);
+
+    if (Functions.size() < OldSize)
+      BD.EmitProgressBitcode(BD.getProgram(), "reduced-function");
+  }
+
+  // Attempt to delete entire basic blocks at a time to speed up
+  // convergence... this actually works by setting the terminator of the blocks
+  // to a return instruction then running simplifycfg, which can potentially
+  // shrinks the code dramatically quickly
+  //
+  if (!DisableSimplifyCFG && !BugpointIsInterrupted) {
+    std::vector<const BasicBlock*> Blocks;
+    for (Module::const_iterator I = BD.getProgram()->begin(),
+           E = BD.getProgram()->end(); I != E; ++I)
+      for (Function::const_iterator FI = I->begin(), E = I->end(); FI !=E; ++FI)
+        Blocks.push_back(FI);
+    unsigned OldSize = Blocks.size();
+    ReduceCrashingBlocks(BD, TestFn).reduceList(Blocks, Error);
+    if (Blocks.size() < OldSize)
+      BD.EmitProgressBitcode(BD.getProgram(), "reduced-blocks");
+  }
+
+  // Attempt to delete instructions using bisection. This should help out nasty
+  // cases with large basic blocks where the problem is at one end.
+  if (!BugpointIsInterrupted) {
+    std::vector<const Instruction*> Insts;
+    for (Module::const_iterator MI = BD.getProgram()->begin(),
+           ME = BD.getProgram()->end(); MI != ME; ++MI)
+      for (Function::const_iterator FI = MI->begin(), FE = MI->end(); FI != FE;
+           ++FI)
+        for (BasicBlock::const_iterator I = FI->begin(), E = FI->end();
+             I != E; ++I)
+          if (!isa<TerminatorInst>(I))
+            Insts.push_back(I);
+
+    ReduceCrashingInstructions(BD, TestFn).reduceList(Insts, Error);
+  }
+
+  // FIXME: This should use the list reducer to converge faster by deleting
+  // larger chunks of instructions at a time!
+  unsigned Simplification = 2;
+  do {
+    if (BugpointIsInterrupted) break;
+    --Simplification;
+    outs() << "\n*** Attempting to reduce testcase by deleting instruc"
+           << "tions: Simplification Level #" << Simplification << '\n';
+
+    // Now that we have deleted the functions that are unnecessary for the
+    // program, try to remove instructions that are not necessary to cause the
+    // crash.  To do this, we loop through all of the instructions in the
+    // remaining functions, deleting them (replacing any values produced with
+    // nulls), and then running ADCE and SimplifyCFG.  If the transformed input
+    // still triggers failure, keep deleting until we cannot trigger failure
+    // anymore.
+    //
+    unsigned InstructionsToSkipBeforeDeleting = 0;
+  TryAgain:
+
+    // Loop over all of the (non-terminator) instructions remaining in the
+    // function, attempting to delete them.
+    unsigned CurInstructionNum = 0;
+    for (Module::const_iterator FI = BD.getProgram()->begin(),
+           E = BD.getProgram()->end(); FI != E; ++FI)
+      if (!FI->isDeclaration())
+        for (Function::const_iterator BI = FI->begin(), E = FI->end(); BI != E;
+             ++BI)
+          for (BasicBlock::const_iterator I = BI->begin(), E = --BI->end();
+               I != E; ++I, ++CurInstructionNum)
+            if (InstructionsToSkipBeforeDeleting) {
+              --InstructionsToSkipBeforeDeleting;
+            } else {
+              if (BugpointIsInterrupted) goto ExitLoops;
+
+              outs() << "Checking instruction: " << *I;
+              Module *M = BD.deleteInstructionFromProgram(I, Simplification);
+
+              // Find out if the pass still crashes on this pass...
+              if (TestFn(BD, M)) {
+                // Yup, it does, we delete the old module, and continue trying
+                // to reduce the testcase...
+                BD.setNewProgram(M);
+                InstructionsToSkipBeforeDeleting = CurInstructionNum;
+                goto TryAgain;  // I wish I had a multi-level break here!
+              }
+
+              // This pass didn't crash without this instruction, try the next
+              // one.
+              delete M;
+            }
+
+    if (InstructionsToSkipBeforeDeleting) {
+      InstructionsToSkipBeforeDeleting = 0;
+      goto TryAgain;
+    }
+
+  } while (Simplification);
+ExitLoops:
+
+  // Try to clean up the testcase by running funcresolve and globaldce...
+  if (!BugpointIsInterrupted) {
+    outs() << "\n*** Attempting to perform final cleanups: ";
+    Module *M = CloneModule(BD.getProgram());
+    M = BD.performFinalCleanups(M, true);
+
+    // Find out if the pass still crashes on the cleaned up program...
+    if (TestFn(BD, M)) {
+      BD.setNewProgram(M);     // Yup, it does, keep the reduced version...
+    } else {
+      delete M;
+    }
+  }
+
+  BD.EmitProgressBitcode(BD.getProgram(), "reduced-simplified");
+
+  return false;
+}
+
+static bool TestForOptimizerCrash(const BugDriver &BD, Module *M) {
+  return BD.runPasses(M);
+}
+
+/// debugOptimizerCrash - This method is called when some pass crashes on input.
+/// It attempts to prune down the testcase to something reasonable, and figure
+/// out exactly which pass is crashing.
+///
+bool BugDriver::debugOptimizerCrash(const std::string &ID) {
+  outs() << "\n*** Debugging optimizer crash!\n";
+
+  std::string Error;
+  // Reduce the list of passes which causes the optimizer to crash...
+  if (!BugpointIsInterrupted)
+    ReducePassList(*this).reduceList(PassesToRun, Error);
+  assert(Error.empty());
+
+  outs() << "\n*** Found crashing pass"
+         << (PassesToRun.size() == 1 ? ": " : "es: ")
+         << getPassesString(PassesToRun) << '\n';
+
+  EmitProgressBitcode(Program, ID);
+
+  bool Success = DebugACrash(*this, TestForOptimizerCrash, Error);
+  assert(Error.empty());
+  return Success;
+}
+
+static bool TestForCodeGenCrash(const BugDriver &BD, Module *M) {
+  std::string Error;
+  BD.compileProgram(M, &Error);
+  if (!Error.empty()) {
+    errs() << "<crash>\n";
+    return true;  // Tool is still crashing.
+  }
+  errs() << '\n';
+  return false;
+}
+
+/// debugCodeGeneratorCrash - This method is called when the code generator
+/// crashes on an input.  It attempts to reduce the input as much as possible
+/// while still causing the code generator to crash.
+bool BugDriver::debugCodeGeneratorCrash(std::string &Error) {
+  errs() << "*** Debugging code generator crash!\n";
+
+  return DebugACrash(*this, TestForCodeGenCrash, Error);
+}
diff --git a/final/tools/bugpoint/ExecutionDriver.cpp b/final/tools/bugpoint/ExecutionDriver.cpp
new file mode 100644
index 00000000000..f1601cdb845
--- /dev/null
+++ b/final/tools/bugpoint/ExecutionDriver.cpp
@@ -0,0 +1,515 @@
+//===- ExecutionDriver.cpp - Allow execution of LLVM program --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code used to execute the program utilizing one of the
+// various ways of running LLVM bitcode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BugDriver.h"
+#include "ToolRunner.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/raw_ostream.h"
+#include <fstream>
+
+using namespace llvm;
+
+namespace {
+  // OutputType - Allow the user to specify the way code should be run, to test
+  // for miscompilation.
+  //
+  enum OutputType {
+    AutoPick, RunLLI, RunJIT, RunLLC, RunLLCIA, RunCBE, CBE_bug, LLC_Safe,
+    CompileCustom, Custom
+  };
+
+  cl::opt<double>
+  AbsTolerance("abs-tolerance", cl::desc("Absolute error tolerated"),
+               cl::init(0.0));
+  cl::opt<double>
+  RelTolerance("rel-tolerance", cl::desc("Relative error tolerated"),
+               cl::init(0.0));
+
+  cl::opt<OutputType>
+  InterpreterSel(cl::desc("Specify the \"test\" i.e. suspect back-end:"),
+                 cl::values(clEnumValN(AutoPick, "auto", "Use best guess"),
+                            clEnumValN(RunLLI, "run-int",
+                                       "Execute with the interpreter"),
+                            clEnumValN(RunJIT, "run-jit", "Execute with JIT"),
+                            clEnumValN(RunLLC, "run-llc", "Compile with LLC"),
+                            clEnumValN(RunLLCIA, "run-llc-ia",
+                                  "Compile with LLC with integrated assembler"),
+                            clEnumValN(RunCBE, "run-cbe", "Compile with CBE"),
+                            clEnumValN(CBE_bug,"cbe-bug", "Find CBE bugs"),
+                            clEnumValN(LLC_Safe, "llc-safe", "Use LLC for all"),
+                            clEnumValN(CompileCustom, "compile-custom",
+                            "Use -compile-command to define a command to "
+                            "compile the bitcode. Useful to avoid linking."),
+                            clEnumValN(Custom, "run-custom",
+                            "Use -exec-command to define a command to execute "
+                            "the bitcode. Useful for cross-compilation."),
+                            clEnumValEnd),
+                 cl::init(AutoPick));
+
+  cl::opt<OutputType>
+  SafeInterpreterSel(cl::desc("Specify \"safe\" i.e. known-good backend:"),
+              cl::values(clEnumValN(AutoPick, "safe-auto", "Use best guess"),
+                         clEnumValN(RunLLC, "safe-run-llc", "Compile with LLC"),
+                         clEnumValN(RunCBE, "safe-run-cbe", "Compile with CBE"),
+                         clEnumValN(Custom, "safe-run-custom",
+                         "Use -exec-command to define a command to execute "
+                         "the bitcode. Useful for cross-compilation."),
+                         clEnumValEnd),
+                     cl::init(AutoPick));
+
+  cl::opt<std::string>
+  SafeInterpreterPath("safe-path",
+                   cl::desc("Specify the path to the \"safe\" backend program"),
+                   cl::init(""));
+
+  cl::opt<bool>
+  AppendProgramExitCode("append-exit-code",
+      cl::desc("Append the exit code to the output so it gets diff'd too"),
+      cl::init(false));
+
+  cl::opt<std::string>
+  InputFile("input", cl::init("/dev/null"),
+            cl::desc("Filename to pipe in as stdin (default: /dev/null)"));
+
+  cl::list<std::string>
+  AdditionalSOs("additional-so",
+                cl::desc("Additional shared objects to load "
+                         "into executing programs"));
+
+  cl::list<std::string>
+  AdditionalLinkerArgs("Xlinker",
+      cl::desc("Additional arguments to pass to the linker"));
+
+  cl::opt<std::string>
+  CustomCompileCommand("compile-command", cl::init("llc"),
+      cl::desc("Command to compile the bitcode (use with -compile-custom) "
+               "(default: llc)"));
+
+  cl::opt<std::string>
+  CustomExecCommand("exec-command", cl::init("simulate"),
+      cl::desc("Command to execute the bitcode (use with -run-custom) "
+               "(default: simulate)"));
+}
+
+namespace llvm {
+  // Anything specified after the --args option are taken as arguments to the
+  // program being debugged.
+  cl::list<std::string>
+  InputArgv("args", cl::Positional, cl::desc("<program arguments>..."),
+            cl::ZeroOrMore, cl::PositionalEatsArgs);
+
+  cl::opt<std::string>
+  OutputPrefix("output-prefix", cl::init("bugpoint"),
+            cl::desc("Prefix to use for outputs (default: 'bugpoint')"));
+}
+
+namespace {
+  cl::list<std::string>
+  ToolArgv("tool-args", cl::Positional, cl::desc("<tool arguments>..."),
+           cl::ZeroOrMore, cl::PositionalEatsArgs);
+
+  cl::list<std::string>
+  SafeToolArgv("safe-tool-args", cl::Positional,
+               cl::desc("<safe-tool arguments>..."),
+               cl::ZeroOrMore, cl::PositionalEatsArgs);
+
+  cl::opt<std::string>
+  GCCBinary("gcc", cl::init("gcc"),
+              cl::desc("The gcc binary to use. (default 'gcc')"));
+
+  cl::list<std::string>
+  GCCToolArgv("gcc-tool-args", cl::Positional,
+              cl::desc("<gcc-tool arguments>..."),
+              cl::ZeroOrMore, cl::PositionalEatsArgs);
+}
+
+//===----------------------------------------------------------------------===//
+// BugDriver method implementation
+//
+
+/// initializeExecutionEnvironment - This method is used to set up the
+/// environment for executing LLVM programs.
+///
+bool BugDriver::initializeExecutionEnvironment() {
+  outs() << "Initializing execution environment: ";
+
+  // Create an instance of the AbstractInterpreter interface as specified on
+  // the command line
+  SafeInterpreter = 0;
+  std::string Message;
+
+  switch (InterpreterSel) {
+  case AutoPick:
+    InterpreterSel = RunCBE;
+    Interpreter =
+      AbstractInterpreter::createCBE(getToolName(), Message, GCCBinary,
+                                     &ToolArgv, &GCCToolArgv);
+    if (!Interpreter) {
+      InterpreterSel = RunJIT;
+      Interpreter = AbstractInterpreter::createJIT(getToolName(), Message,
+                                                   &ToolArgv);
+    }
+    if (!Interpreter) {
+      InterpreterSel = RunLLC;
+      Interpreter = AbstractInterpreter::createLLC(getToolName(), Message,
+                                                   GCCBinary, &ToolArgv,
+                                                   &GCCToolArgv);
+    }
+    if (!Interpreter) {
+      InterpreterSel = RunLLI;
+      Interpreter = AbstractInterpreter::createLLI(getToolName(), Message,
+                                                   &ToolArgv);
+    }
+    if (!Interpreter) {
+      InterpreterSel = AutoPick;
+      Message = "Sorry, I can't automatically select an interpreter!\n";
+    }
+    break;
+  case RunLLI:
+    Interpreter = AbstractInterpreter::createLLI(getToolName(), Message,
+                                                 &ToolArgv);
+    break;
+  case RunLLC:
+  case RunLLCIA:
+  case LLC_Safe:
+    Interpreter = AbstractInterpreter::createLLC(getToolName(), Message,
+                                                 GCCBinary, &ToolArgv,
+                                                 &GCCToolArgv,
+                                                 InterpreterSel == RunLLCIA);
+    break;
+  case RunJIT:
+    Interpreter = AbstractInterpreter::createJIT(getToolName(), Message,
+                                                 &ToolArgv);
+    break;
+  case RunCBE:
+  case CBE_bug:
+    Interpreter = AbstractInterpreter::createCBE(getToolName(), Message,
+                                                 GCCBinary, &ToolArgv,
+                                                 &GCCToolArgv);
+    break;
+  case CompileCustom:
+    Interpreter =
+      AbstractInterpreter::createCustomCompiler(Message, CustomCompileCommand);
+    break;
+  case Custom:
+    Interpreter =
+      AbstractInterpreter::createCustomExecutor(Message, CustomExecCommand);
+    break;
+  default:
+    Message = "Sorry, this back-end is not supported by bugpoint right now!\n";
+    break;
+  }
+  if (!Interpreter)
+    errs() << Message;
+  else // Display informational messages on stdout instead of stderr
+    outs() << Message;
+
+  std::string Path = SafeInterpreterPath;
+  if (Path.empty())
+    Path = getToolName();
+  std::vector<std::string> SafeToolArgs = SafeToolArgv;
+  switch (SafeInterpreterSel) {
+  case AutoPick:
+    // In "cbe-bug" mode, default to using LLC as the "safe" backend.
+    if (!SafeInterpreter &&
+        InterpreterSel == CBE_bug) {
+      SafeInterpreterSel = RunLLC;
+      SafeToolArgs.push_back("--relocation-model=pic");
+      SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
+                                                       GCCBinary,
+                                                       &SafeToolArgs,
+                                                       &GCCToolArgv);
+    }
+
+    // In "llc-safe" mode, default to using LLC as the "safe" backend.
+    if (!SafeInterpreter &&
+        InterpreterSel == LLC_Safe) {
+      SafeInterpreterSel = RunLLC;
+      SafeToolArgs.push_back("--relocation-model=pic");
+      SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
+                                                       GCCBinary,
+                                                       &SafeToolArgs,
+                                                       &GCCToolArgv);
+    }
+
+    // Pick a backend that's different from the test backend. The JIT and
+    // LLC backends share a lot of code, so prefer to use the CBE as the
+    // safe back-end when testing them.
+    if (!SafeInterpreter &&
+        InterpreterSel != RunCBE) {
+      SafeInterpreterSel = RunCBE;
+      SafeInterpreter = AbstractInterpreter::createCBE(Path.c_str(), Message,
+                                                       GCCBinary,
+                                                       &SafeToolArgs,
+                                                       &GCCToolArgv);
+    }
+    if (!SafeInterpreter &&
+        InterpreterSel != RunLLC &&
+        InterpreterSel != RunJIT) {
+      SafeInterpreterSel = RunLLC;
+      SafeToolArgs.push_back("--relocation-model=pic");
+      SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
+                                                       GCCBinary,
+                                                       &SafeToolArgs,
+                                                       &GCCToolArgv);
+    }
+    if (!SafeInterpreter) {
+      SafeInterpreterSel = AutoPick;
+      Message = "Sorry, I can't automatically select an interpreter!\n";
+    }
+    break;
+  case RunLLC:
+  case RunLLCIA:
+    SafeToolArgs.push_back("--relocation-model=pic");
+    SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
+                                                     GCCBinary, &SafeToolArgs,
+                                                     &GCCToolArgv,
+                                                SafeInterpreterSel == RunLLCIA);
+    break;
+  case RunCBE:
+    SafeInterpreter = AbstractInterpreter::createCBE(Path.c_str(), Message,
+                                                     GCCBinary, &SafeToolArgs,
+                                                     &GCCToolArgv);
+    break;
+  case Custom:
+    SafeInterpreter =
+      AbstractInterpreter::createCustomExecutor(Message, CustomExecCommand);
+    break;
+  default:
+    Message = "Sorry, this back-end is not supported by bugpoint as the "
+              "\"safe\" backend right now!\n";
+    break;
+  }
+  if (!SafeInterpreter) { outs() << Message << "\nExiting.\n"; exit(1); }
+
+  gcc = GCC::create(Message, GCCBinary, &GCCToolArgv);
+  if (!gcc) { outs() << Message << "\nExiting.\n"; exit(1); }
+
+  // If there was an error creating the selected interpreter, quit with error.
+  return Interpreter == 0;
+}
+
+/// compileProgram - Try to compile the specified module, returning false and
+/// setting Error if an error occurs.  This is used for code generation
+/// crash testing.
+///
+void BugDriver::compileProgram(Module *M, std::string *Error) const {
+  // Emit the program to a bitcode file...
+  sys::Path BitcodeFile (OutputPrefix + "-test-program.bc");
+  std::string ErrMsg;
+  if (BitcodeFile.makeUnique(true, &ErrMsg)) {
+    errs() << ToolName << ": Error making unique filename: " << ErrMsg
+           << "\n";
+    exit(1);
+  }
+  if (writeProgramToFile(BitcodeFile.str(), M)) {
+    errs() << ToolName << ": Error emitting bitcode to file '"
+           << BitcodeFile.str() << "'!\n";
+    exit(1);
+  }
+
+  // Remove the temporary bitcode file when we are done.
+  FileRemover BitcodeFileRemover(BitcodeFile, !SaveTemps);
+
+  // Actually compile the program!
+  Interpreter->compileProgram(BitcodeFile.str(), Error, Timeout, MemoryLimit);
+}
+
+
+/// executeProgram - This method runs "Program", capturing the output of the
+/// program to a file, returning the filename of the file.  A recommended
+/// filename may be optionally specified.
+///
+std::string BugDriver::executeProgram(const Module *Program,
+                                      std::string OutputFile,
+                                      std::string BitcodeFile,
+                                      const std::string &SharedObj,
+                                      AbstractInterpreter *AI,
+                                      std::string *Error) const {
+  if (AI == 0) AI = Interpreter;
+  assert(AI && "Interpreter should have been created already!");
+  bool CreatedBitcode = false;
+  std::string ErrMsg;
+  if (BitcodeFile.empty()) {
+    // Emit the program to a bitcode file...
+    sys::Path uniqueFilename(OutputPrefix + "-test-program.bc");
+    if (uniqueFilename.makeUnique(true, &ErrMsg)) {
+      errs() << ToolName << ": Error making unique filename: "
+             << ErrMsg << "!\n";
+      exit(1);
+    }
+    BitcodeFile = uniqueFilename.str();
+
+    if (writeProgramToFile(BitcodeFile, Program)) {
+      errs() << ToolName << ": Error emitting bitcode to file '"
+             << BitcodeFile << "'!\n";
+      exit(1);
+    }
+    CreatedBitcode = true;
+  }
+
+  // Remove the temporary bitcode file when we are done.
+  sys::Path BitcodePath(BitcodeFile);
+  FileRemover BitcodeFileRemover(BitcodePath, CreatedBitcode && !SaveTemps);
+
+  if (OutputFile.empty()) OutputFile = OutputPrefix + "-execution-output";
+
+  // Check to see if this is a valid output filename...
+  sys::Path uniqueFile(OutputFile);
+  if (uniqueFile.makeUnique(true, &ErrMsg)) {
+    errs() << ToolName << ": Error making unique filename: "
+           << ErrMsg << "\n";
+    exit(1);
+  }
+  OutputFile = uniqueFile.str();
+
+  // Figure out which shared objects to run, if any.
+  std::vector<std::string> SharedObjs(AdditionalSOs);
+  if (!SharedObj.empty())
+    SharedObjs.push_back(SharedObj);
+
+  int RetVal = AI->ExecuteProgram(BitcodeFile, InputArgv, InputFile, OutputFile,
+                                  Error, AdditionalLinkerArgs, SharedObjs,
+                                  Timeout, MemoryLimit);
+  if (!Error->empty())
+    return OutputFile;
+
+  if (RetVal == -1) {
+    errs() << "<timeout>";
+    static bool FirstTimeout = true;
+    if (FirstTimeout) {
+      outs() << "\n"
+ "*** Program execution timed out!  This mechanism is designed to handle\n"
+ "    programs stuck in infinite loops gracefully.  The -timeout option\n"
+ "    can be used to change the timeout threshold or disable it completely\n"
+ "    (with -timeout=0).  This message is only displayed once.\n";
+      FirstTimeout = false;
+    }
+  }
+
+  if (AppendProgramExitCode) {
+    std::ofstream outFile(OutputFile.c_str(), std::ios_base::app);
+    outFile << "exit " << RetVal << '\n';
+    outFile.close();
+  }
+
+  // Return the filename we captured the output to.
+  return OutputFile;
+}
+
+/// executeProgramSafely - Used to create reference output with the "safe"
+/// backend, if reference output is not provided.
+///
+std::string BugDriver::executeProgramSafely(const Module *Program,
+                                            std::string OutputFile,
+                                            std::string *Error) const {
+  return executeProgram(Program, OutputFile, "", "", SafeInterpreter, Error);
+}
+
+std::string BugDriver::compileSharedObject(const std::string &BitcodeFile,
+                                           std::string &Error) {
+  assert(Interpreter && "Interpreter should have been created already!");
+  sys::Path OutputFile;
+
+  // Using the known-good backend.
+  GCC::FileType FT = SafeInterpreter->OutputCode(BitcodeFile, OutputFile,
+                                                 Error);
+  if (!Error.empty())
+    return "";
+
+  std::string SharedObjectFile;
+  bool Failure = gcc->MakeSharedObject(OutputFile.str(), FT, SharedObjectFile,
+                                       AdditionalLinkerArgs, Error);
+  if (!Error.empty())
+    return "";
+  if (Failure)
+    exit(1);
+
+  // Remove the intermediate C file
+  OutputFile.eraseFromDisk();
+
+  return "./" + SharedObjectFile;
+}
+
+/// createReferenceFile - calls compileProgram and then records the output
+/// into ReferenceOutputFile. Returns true if reference file created, false
+/// otherwise. Note: initializeExecutionEnvironment should be called BEFORE
+/// this function.
+///
+bool BugDriver::createReferenceFile(Module *M, const std::string &Filename) {
+  std::string Error;
+  compileProgram(Program, &Error);
+  if (!Error.empty())
+    return false;
+
+  ReferenceOutputFile = executeProgramSafely(Program, Filename, &Error);
+  if (!Error.empty()) {
+    errs() << Error;
+    if (Interpreter != SafeInterpreter) {
+      errs() << "*** There is a bug running the \"safe\" backend.  Either"
+             << " debug it (for example with the -run-cbe bugpoint option,"
+             << " if CBE is being used as the \"safe\" backend), or fix the"
+             << " error some other way.\n";
+    }
+    return false;
+  }
+  outs() << "\nReference output is: " << ReferenceOutputFile << "\n\n";
+  return true;
+}
+
+/// diffProgram - This method executes the specified module and diffs the
+/// output against the file specified by ReferenceOutputFile.  If the output
+/// is different, 1 is returned.  If there is a problem with the code
+/// generator (e.g., llc crashes), this will return -1 and set Error.
+///
+bool BugDriver::diffProgram(const Module *Program,
+                            const std::string &BitcodeFile,
+                            const std::string &SharedObject,
+                            bool RemoveBitcode,
+                            std::string *ErrMsg) const {
+  // Execute the program, generating an output file...
+  sys::Path Output(executeProgram(Program, "", BitcodeFile, SharedObject, 0,
+                                  ErrMsg));
+  if (!ErrMsg->empty())
+    return false;
+
+  std::string Error;
+  bool FilesDifferent = false;
+  if (int Diff = DiffFilesWithTolerance(sys::Path(ReferenceOutputFile),
+                                        sys::Path(Output.str()),
+                                        AbsTolerance, RelTolerance, &Error)) {
+    if (Diff == 2) {
+      errs() << "While diffing output: " << Error << '\n';
+      exit(1);
+    }
+    FilesDifferent = true;
+  }
+  else {
+    // Remove the generated output if there are no differences.
+    Output.eraseFromDisk();
+  }
+
+  // Remove the bitcode file if we are supposed to.
+  if (RemoveBitcode)
+    sys::Path(BitcodeFile).eraseFromDisk();
+  return FilesDifferent;
+}
+
+bool BugDriver::isExecutingJIT() {
+  return InterpreterSel == RunJIT;
+}
+
diff --git a/final/tools/bugpoint/ExtractFunction.cpp b/final/tools/bugpoint/ExtractFunction.cpp
new file mode 100644
index 00000000000..593765cb70f
--- /dev/null
+++ b/final/tools/bugpoint/ExtractFunction.cpp
@@ -0,0 +1,369 @@
+//===- ExtractFunction.cpp - Extract a function from Program --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements several methods that are used to extract functions,
+// loops, or portions of a module from the rest of the module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BugDriver.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Signals.h"
+#include <set>
+using namespace llvm;
+
+namespace llvm {
+  bool DisableSimplifyCFG = false;
+  extern cl::opt<std::string> OutputPrefix;
+} // End llvm namespace
+
+namespace {
+  cl::opt<bool>
+  NoDCE ("disable-dce",
+         cl::desc("Do not use the -dce pass to reduce testcases"));
+  cl::opt<bool, true>
+  NoSCFG("disable-simplifycfg", cl::location(DisableSimplifyCFG),
+         cl::desc("Do not use the -simplifycfg pass to reduce testcases"));
+}
+
+/// deleteInstructionFromProgram - This method clones the current Program and
+/// deletes the specified instruction from the cloned module.  It then runs a
+/// series of cleanup passes (ADCE and SimplifyCFG) to eliminate any code which
+/// depends on the value.  The modified module is then returned.
+///
+Module *BugDriver::deleteInstructionFromProgram(const Instruction *I,
+                                                unsigned Simplification) {
+  // FIXME, use vmap?
+  Module *Clone = CloneModule(Program);
+
+  const BasicBlock *PBB = I->getParent();
+  const Function *PF = PBB->getParent();
+
+  Module::iterator RFI = Clone->begin(); // Get iterator to corresponding fn
+  std::advance(RFI, std::distance(PF->getParent()->begin(),
+                                  Module::const_iterator(PF)));
+
+  Function::iterator RBI = RFI->begin();  // Get iterator to corresponding BB
+  std::advance(RBI, std::distance(PF->begin(), Function::const_iterator(PBB)));
+
+  BasicBlock::iterator RI = RBI->begin(); // Get iterator to corresponding inst
+  std::advance(RI, std::distance(PBB->begin(), BasicBlock::const_iterator(I)));
+  Instruction *TheInst = RI;              // Got the corresponding instruction!
+
+  // If this instruction produces a value, replace any users with null values
+  if (!TheInst->getType()->isVoidTy())
+    TheInst->replaceAllUsesWith(Constant::getNullValue(TheInst->getType()));
+
+  // Remove the instruction from the program.
+  TheInst->getParent()->getInstList().erase(TheInst);
+
+  // Spiff up the output a little bit.
+  std::vector<std::string> Passes;
+
+  /// Can we get rid of the -disable-* options?
+  if (Simplification > 1 && !NoDCE)
+    Passes.push_back("dce");
+  if (Simplification && !DisableSimplifyCFG)
+    Passes.push_back("simplifycfg");      // Delete dead control flow
+
+  Passes.push_back("verify");
+  Module *New = runPassesOn(Clone, Passes);
+  delete Clone;
+  if (!New) {
+    errs() << "Instruction removal failed.  Sorry. :(  Please report a bug!\n";
+    exit(1);
+  }
+  return New;
+}
+
+/// performFinalCleanups - This method clones the current Program and performs
+/// a series of cleanups intended to get rid of extra cruft on the module
+/// before handing it to the user.
+///
+Module *BugDriver::performFinalCleanups(Module *M, bool MayModifySemantics) {
+  // Make all functions external, so GlobalDCE doesn't delete them...
+  for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
+    I->setLinkage(GlobalValue::ExternalLinkage);
+
+  std::vector<std::string> CleanupPasses;
+  CleanupPasses.push_back("globaldce");
+
+  if (MayModifySemantics)
+    CleanupPasses.push_back("deadarghaX0r");
+  else
+    CleanupPasses.push_back("deadargelim");
+
+  CleanupPasses.push_back("deadtypeelim");
+
+  Module *New = runPassesOn(M, CleanupPasses);
+  if (New == 0) {
+    errs() << "Final cleanups failed.  Sorry. :(  Please report a bug!\n";
+    return M;
+  }
+  delete M;
+  return New;
+}
+
+
+/// ExtractLoop - Given a module, extract up to one loop from it into a new
+/// function.  This returns null if there are no extractable loops in the
+/// program or if the loop extractor crashes.
+Module *BugDriver::ExtractLoop(Module *M) {
+  std::vector<std::string> LoopExtractPasses;
+  LoopExtractPasses.push_back("loop-extract-single");
+
+  Module *NewM = runPassesOn(M, LoopExtractPasses);
+  if (NewM == 0) {
+    outs() << "*** Loop extraction failed: ";
+    EmitProgressBitcode(M, "loopextraction", true);
+    outs() << "*** Sorry. :(  Please report a bug!\n";
+    return 0;
+  }
+
+  // Check to see if we created any new functions.  If not, no loops were
+  // extracted and we should return null.  Limit the number of loops we extract
+  // to avoid taking forever.
+  static unsigned NumExtracted = 32;
+  if (M->size() == NewM->size() || --NumExtracted == 0) {
+    delete NewM;
+    return 0;
+  } else {
+    assert(M->size() < NewM->size() && "Loop extract removed functions?");
+    Module::iterator MI = NewM->begin();
+    for (unsigned i = 0, e = M->size(); i != e; ++i)
+      ++MI;
+  }
+
+  return NewM;
+}
+
+
+// DeleteFunctionBody - "Remove" the function by deleting all of its basic
+// blocks, making it external.
+//
+void llvm::DeleteFunctionBody(Function *F) {
+  // delete the body of the function...
+  F->deleteBody();
+  assert(F->isDeclaration() && "This didn't make the function external!");
+}
+
+/// GetTorInit - Given a list of entries for static ctors/dtors, return them
+/// as a constant array.
+static Constant *GetTorInit(std::vector<std::pair<Function*, int> > &TorList) {
+  assert(!TorList.empty() && "Don't create empty tor list!");
+  std::vector<Constant*> ArrayElts;
+  for (unsigned i = 0, e = TorList.size(); i != e; ++i) {
+    std::vector<Constant*> Elts;
+    Elts.push_back(ConstantInt::get(
+          Type::getInt32Ty(TorList[i].first->getContext()), TorList[i].second));
+    Elts.push_back(TorList[i].first);
+    ArrayElts.push_back(ConstantStruct::get(TorList[i].first->getContext(),
+                                            Elts, false));
+  }
+  return ConstantArray::get(ArrayType::get(ArrayElts[0]->getType(), 
+                                           ArrayElts.size()),
+                            ArrayElts);
+}
+
+/// SplitStaticCtorDtor - A module was recently split into two parts, M1/M2, and
+/// M1 has all of the global variables.  If M2 contains any functions that are
+/// static ctors/dtors, we need to add an llvm.global_[cd]tors global to M2, and
+/// prune appropriate entries out of M1s list.
+static void SplitStaticCtorDtor(const char *GlobalName, Module *M1, Module *M2,
+                                ValueToValueMapTy &VMap) {
+  GlobalVariable *GV = M1->getNamedGlobal(GlobalName);
+  if (!GV || GV->isDeclaration() || GV->hasLocalLinkage() ||
+      !GV->use_empty()) return;
+  
+  std::vector<std::pair<Function*, int> > M1Tors, M2Tors;
+  ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
+  if (!InitList) return;
+  
+  for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
+    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
+      if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
+      
+      if (CS->getOperand(1)->isNullValue())
+        break;  // Found a null terminator, stop here.
+      
+      ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
+      int Priority = CI ? CI->getSExtValue() : 0;
+      
+      Constant *FP = CS->getOperand(1);
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
+        if (CE->isCast())
+          FP = CE->getOperand(0);
+      if (Function *F = dyn_cast<Function>(FP)) {
+        if (!F->isDeclaration())
+          M1Tors.push_back(std::make_pair(F, Priority));
+        else {
+          // Map to M2's version of the function.
+          F = cast<Function>(VMap[F]);
+          M2Tors.push_back(std::make_pair(F, Priority));
+        }
+      }
+    }
+  }
+  
+  GV->eraseFromParent();
+  if (!M1Tors.empty()) {
+    Constant *M1Init = GetTorInit(M1Tors);
+    new GlobalVariable(*M1, M1Init->getType(), false,
+                       GlobalValue::AppendingLinkage,
+                       M1Init, GlobalName);
+  }
+
+  GV = M2->getNamedGlobal(GlobalName);
+  assert(GV && "Not a clone of M1?");
+  assert(GV->use_empty() && "llvm.ctors shouldn't have uses!");
+
+  GV->eraseFromParent();
+  if (!M2Tors.empty()) {
+    Constant *M2Init = GetTorInit(M2Tors);
+    new GlobalVariable(*M2, M2Init->getType(), false,
+                       GlobalValue::AppendingLinkage,
+                       M2Init, GlobalName);
+  }
+}
+
+
+/// SplitFunctionsOutOfModule - Given a module and a list of functions in the
+/// module, split the functions OUT of the specified module, and place them in
+/// the new module.
+Module *
+llvm::SplitFunctionsOutOfModule(Module *M,
+                                const std::vector<Function*> &F,
+                                ValueToValueMapTy &VMap) {
+  // Make sure functions & globals are all external so that linkage
+  // between the two modules will work.
+  for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
+    I->setLinkage(GlobalValue::ExternalLinkage);
+  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I) {
+    if (I->hasName() && I->getName()[0] == '\01')
+      I->setName(I->getName().substr(1));
+    I->setLinkage(GlobalValue::ExternalLinkage);
+  }
+
+  ValueToValueMapTy NewVMap;
+  Module *New = CloneModule(M, NewVMap);
+
+  // Make sure global initializers exist only in the safe module (CBE->.so)
+  for (Module::global_iterator I = New->global_begin(), E = New->global_end();
+       I != E; ++I)
+    I->setInitializer(0);  // Delete the initializer to make it external
+
+  // Remove the Test functions from the Safe module
+  std::set<Function *> TestFunctions;
+  for (unsigned i = 0, e = F.size(); i != e; ++i) {
+    Function *TNOF = cast<Function>(VMap[F[i]]);
+    DEBUG(errs() << "Removing function ");
+    DEBUG(WriteAsOperand(errs(), TNOF, false));
+    DEBUG(errs() << "\n");
+    TestFunctions.insert(cast<Function>(NewVMap[TNOF]));
+    DeleteFunctionBody(TNOF);       // Function is now external in this module!
+  }
+
+  
+  // Remove the Safe functions from the Test module
+  for (Module::iterator I = New->begin(), E = New->end(); I != E; ++I)
+    if (!TestFunctions.count(I))
+      DeleteFunctionBody(I);
+  
+
+  // Make sure that there is a global ctor/dtor array in both halves of the
+  // module if they both have static ctor/dtor functions.
+  SplitStaticCtorDtor("llvm.global_ctors", M, New, NewVMap);
+  SplitStaticCtorDtor("llvm.global_dtors", M, New, NewVMap);
+  
+  return New;
+}
+
+//===----------------------------------------------------------------------===//
+// Basic Block Extraction Code
+//===----------------------------------------------------------------------===//
+
+/// ExtractMappedBlocksFromModule - Extract all but the specified basic blocks
+/// into their own functions.  The only detail is that M is actually a module
+/// cloned from the one the BBs are in, so some mapping needs to be performed.
+/// If this operation fails for some reason (ie the implementation is buggy),
+/// this function should return null, otherwise it returns a new Module.
+Module *BugDriver::ExtractMappedBlocksFromModule(const
+                                                 std::vector<BasicBlock*> &BBs,
+                                                 Module *M) {
+  sys::Path uniqueFilename(OutputPrefix + "-extractblocks");
+  std::string ErrMsg;
+  if (uniqueFilename.createTemporaryFileOnDisk(true, &ErrMsg)) {
+    outs() << "*** Basic Block extraction failed!\n";
+    errs() << "Error creating temporary file: " << ErrMsg << "\n";
+    EmitProgressBitcode(M, "basicblockextractfail", true);
+    return 0;
+  }
+  sys::RemoveFileOnSignal(uniqueFilename);
+
+  std::string ErrorInfo;
+  tool_output_file BlocksToNotExtractFile(uniqueFilename.c_str(), ErrorInfo);
+  if (!ErrorInfo.empty()) {
+    outs() << "*** Basic Block extraction failed!\n";
+    errs() << "Error writing list of blocks to not extract: " << ErrorInfo
+           << "\n";
+    EmitProgressBitcode(M, "basicblockextractfail", true);
+    return 0;
+  }
+  for (std::vector<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
+       I != E; ++I) {
+    BasicBlock *BB = *I;
+    // If the BB doesn't have a name, give it one so we have something to key
+    // off of.
+    if (!BB->hasName()) BB->setName("tmpbb");
+    BlocksToNotExtractFile.os() << BB->getParent()->getNameStr() << " "
+                                << BB->getName() << "\n";
+  }
+  BlocksToNotExtractFile.os().close();
+  if (BlocksToNotExtractFile.os().has_error()) {
+    errs() << "Error writing list of blocks to not extract: " << ErrorInfo
+           << "\n";
+    EmitProgressBitcode(M, "basicblockextractfail", true);
+    BlocksToNotExtractFile.os().clear_error();
+    return 0;
+  }
+  BlocksToNotExtractFile.keep();
+
+  std::string uniqueFN = "--extract-blocks-file=" + uniqueFilename.str();
+  const char *ExtraArg = uniqueFN.c_str();
+
+  std::vector<std::string> PI;
+  PI.push_back("extract-blocks");
+  Module *Ret = runPassesOn(M, PI, false, 1, &ExtraArg);
+
+  uniqueFilename.eraseFromDisk(); // Free disk space
+
+  if (Ret == 0) {
+    outs() << "*** Basic Block extraction failed, please report a bug!\n";
+    EmitProgressBitcode(M, "basicblockextractfail", true);
+  }
+  return Ret;
+}
diff --git a/final/tools/bugpoint/FindBugs.cpp b/final/tools/bugpoint/FindBugs.cpp
new file mode 100644
index 00000000000..a291f9fb0f9
--- /dev/null
+++ b/final/tools/bugpoint/FindBugs.cpp
@@ -0,0 +1,113 @@
+//===-- FindBugs.cpp - Run Many Different Optimizations -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an interface that allows bugpoint to choose different 
+// combinations of optimizations to run on the selected input. Bugpoint will 
+// run these optimizations and record the success/failure of each. This way
+// we can hopefully spot bugs in the optimizations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BugDriver.h"
+#include "ToolRunner.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <ctime>
+using namespace llvm;
+
+/// runManyPasses - Take the specified pass list and create different 
+/// combinations of passes to compile the program with. Compile the program with
+/// each set and mark test to see if it compiled correctly. If the passes 
+/// compiled correctly output nothing and rearrange the passes into a new order.
+/// If the passes did not compile correctly, output the command required to 
+/// recreate the failure. This returns true if a compiler error is found.
+///
+bool BugDriver::runManyPasses(const std::vector<std::string> &AllPasses,
+                              std::string &ErrMsg) {
+  setPassesToRun(AllPasses);
+  outs() << "Starting bug finding procedure...\n\n";
+  
+  // Creating a reference output if necessary
+  if (initializeExecutionEnvironment()) return false;
+  
+  outs() << "\n";
+  if (ReferenceOutputFile.empty()) {
+    outs() << "Generating reference output from raw program: \n";
+    if (!createReferenceFile(Program))
+      return false;
+  }
+  
+  srand(time(NULL));  
+  
+  unsigned num = 1;
+  while(1) {  
+    //
+    // Step 1: Randomize the order of the optimizer passes.
+    //
+    std::random_shuffle(PassesToRun.begin(), PassesToRun.end());
+    
+    //
+    // Step 2: Run optimizer passes on the program and check for success.
+    //
+    outs() << "Running selected passes on program to test for crash: ";
+    for(int i = 0, e = PassesToRun.size(); i != e; i++) {
+      outs() << "-" << PassesToRun[i] << " ";
+    }
+    
+    std::string Filename;
+    if(runPasses(Program, PassesToRun, Filename, false)) {
+      outs() << "\n";
+      outs() << "Optimizer passes caused failure!\n\n";
+      debugOptimizerCrash();
+      return true;
+    } else {
+      outs() << "Combination " << num << " optimized successfully!\n";
+    }
+    
+    //
+    // Step 3: Compile the optimized code.
+    //
+    outs() << "Running the code generator to test for a crash: ";
+    std::string Error;
+    compileProgram(Program, &Error);
+    if (!Error.empty()) {
+      outs() << "\n*** compileProgram threw an exception: ";
+      outs() << Error;
+      return debugCodeGeneratorCrash(ErrMsg);
+    }
+    outs() << '\n';
+    
+    //
+    // Step 4: Run the program and compare its output to the reference 
+    // output (created above).
+    //
+    outs() << "*** Checking if passes caused miscompliation:\n";
+    bool Diff = diffProgram(Program, Filename, "", false, &Error);
+    if (Error.empty() && Diff) {
+      outs() << "\n*** diffProgram returned true!\n";
+      debugMiscompilation(&Error);
+      if (Error.empty())
+        return true;
+    }
+    if (!Error.empty()) {
+      errs() << Error;
+      debugCodeGeneratorCrash(ErrMsg);
+      return true;
+    }
+    outs() << "\n*** diff'd output matches!\n";
+    
+    sys::Path(Filename).eraseFromDisk();
+    
+    outs() << "\n\n";
+    num++;
+  } //end while
+  
+  // Unreachable.
+}
diff --git a/final/tools/bugpoint/ListReducer.h b/final/tools/bugpoint/ListReducer.h
new file mode 100644
index 00000000000..bd1c5da65c8
--- /dev/null
+++ b/final/tools/bugpoint/ListReducer.h
@@ -0,0 +1,201 @@
+//===- ListReducer.h - Trim down list while retaining property --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class is to be used as a base class for operations that want to zero in
+// on a subset of the input which still causes the bug we are tracking.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BUGPOINT_LIST_REDUCER_H
+#define BUGPOINT_LIST_REDUCER_H
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <vector>
+#include <cstdlib>
+#include <algorithm>
+
+namespace llvm {
+  
+  extern bool BugpointIsInterrupted;
+
+template<typename ElTy>
+struct ListReducer {
+  enum TestResult {
+    NoFailure,         // No failure of the predicate was detected
+    KeepSuffix,        // The suffix alone satisfies the predicate
+    KeepPrefix,        // The prefix alone satisfies the predicate
+    InternalError      // Encountered an error trying to run the predicate
+  };
+
+  virtual ~ListReducer() {}
+
+  // doTest - This virtual function should be overriden by subclasses to
+  // implement the test desired.  The testcase is only required to test to see
+  // if the Kept list still satisfies the property, but if it is going to check
+  // the prefix anyway, it can.
+  //
+  virtual TestResult doTest(std::vector<ElTy> &Prefix,
+                            std::vector<ElTy> &Kept,
+                            std::string &Error) = 0;
+
+  // reduceList - This function attempts to reduce the length of the specified
+  // list while still maintaining the "test" property.  This is the core of the
+  // "work" that bugpoint does.
+  //
+  bool reduceList(std::vector<ElTy> &TheList, std::string &Error) {
+    std::vector<ElTy> empty;
+    std::srand(0x6e5ea738); // Seed the random number generator
+    switch (doTest(TheList, empty, Error)) {
+    case KeepPrefix:
+      if (TheList.size() == 1) // we are done, it's the base case and it fails
+        return true;
+      else
+        break; // there's definitely an error, but we need to narrow it down
+
+    case KeepSuffix:
+      // cannot be reached!
+      llvm_unreachable("bugpoint ListReducer internal error: "
+                       "selected empty set.");
+
+    case NoFailure:
+      return false; // there is no failure with the full set of passes/funcs!
+
+    case InternalError:
+      assert(!Error.empty());
+      return true;
+    }
+
+    // Maximal number of allowed splitting iterations,
+    // before the elements are randomly shuffled.
+    const unsigned MaxIterationsWithoutProgress = 3;
+    bool ShufflingEnabled = true;
+
+Backjump:
+    unsigned MidTop = TheList.size();
+    unsigned MaxIterations = MaxIterationsWithoutProgress;
+    unsigned NumOfIterationsWithoutProgress = 0;
+    while (MidTop > 1) { // Binary split reduction loop
+      // Halt if the user presses ctrl-c.
+      if (BugpointIsInterrupted) {
+        errs() << "\n\n*** Reduction Interrupted, cleaning up...\n\n";
+        return true;
+      }
+
+      // If the loop doesn't make satisfying progress, try shuffling.
+      // The purpose of shuffling is to avoid the heavy tails of the
+      // distribution (improving the speed of convergence).
+      if (ShufflingEnabled && 
+          NumOfIterationsWithoutProgress > MaxIterations) {
+        std::vector<ElTy> ShuffledList(TheList);
+        std::random_shuffle(ShuffledList.begin(), ShuffledList.end());
+        errs() << "\n\n*** Testing shuffled set...\n\n";
+        // Check that random shuffle doesn't loose the bug
+        if (doTest(ShuffledList, empty, Error) == KeepPrefix) {
+          // If the bug is still here, use the shuffled list.
+          TheList.swap(ShuffledList);
+          MidTop = TheList.size();
+          // Must increase the shuffling treshold to avoid the small 
+          // probability of inifinite looping without making progress.
+          MaxIterations += 2;
+          errs() << "\n\n*** Shuffling does not hide the bug...\n\n";
+        } else {
+          ShufflingEnabled = false; // Disable shuffling further on
+          errs() << "\n\n*** Shuffling hides the bug...\n\n";
+        }
+        NumOfIterationsWithoutProgress = 0;
+      }
+      
+      unsigned Mid = MidTop / 2;
+      std::vector<ElTy> Prefix(TheList.begin(), TheList.begin()+Mid);
+      std::vector<ElTy> Suffix(TheList.begin()+Mid, TheList.end());
+
+      switch (doTest(Prefix, Suffix, Error)) {
+      case KeepSuffix:
+        // The property still holds.  We can just drop the prefix elements, and
+        // shorten the list to the "kept" elements.
+        TheList.swap(Suffix);
+        MidTop = TheList.size();
+        // Reset progress treshold and progress counter
+        MaxIterations = MaxIterationsWithoutProgress;
+        NumOfIterationsWithoutProgress = 0;
+        break;
+      case KeepPrefix:
+        // The predicate still holds, shorten the list to the prefix elements.
+        TheList.swap(Prefix);
+        MidTop = TheList.size();
+        // Reset progress treshold and progress counter
+        MaxIterations = MaxIterationsWithoutProgress;
+        NumOfIterationsWithoutProgress = 0;
+        break;
+      case NoFailure:
+        // Otherwise the property doesn't hold.  Some of the elements we removed
+        // must be necessary to maintain the property.
+        MidTop = Mid;
+        NumOfIterationsWithoutProgress++;
+        break;
+      case InternalError:
+        return true;  // Error was set by doTest.
+      }
+      assert(Error.empty() && "doTest did not return InternalError for error");
+    }
+
+    // Probability of backjumping from the trimming loop back to the binary
+    // split reduction loop.
+    const int BackjumpProbability = 10;
+
+    // Okay, we trimmed as much off the top and the bottom of the list as we
+    // could.  If there is more than two elements in the list, try deleting 
+    // interior elements and testing that.
+    //
+    if (TheList.size() > 2) {
+      bool Changed = true;
+      std::vector<ElTy> EmptyList;
+      while (Changed) {  // Trimming loop.
+        Changed = false;
+        
+        // If the binary split reduction loop made an unfortunate sequence of
+        // splits, the trimming loop might be left off with a huge number of
+        // remaining elements (large search space). Backjumping out of that
+        // search space and attempting a different split can significantly 
+        // improve the convergence speed.
+        if (std::rand() % 100 < BackjumpProbability)
+          goto Backjump;
+        
+        for (unsigned i = 1; i < TheList.size()-1; ++i) { // Check interior elts
+          if (BugpointIsInterrupted) {
+            errs() << "\n\n*** Reduction Interrupted, cleaning up...\n\n";
+            return true;
+          }
+          
+          std::vector<ElTy> TestList(TheList);
+          TestList.erase(TestList.begin()+i);
+
+          if (doTest(EmptyList, TestList, Error) == KeepSuffix) {
+            // We can trim down the list!
+            TheList.swap(TestList);
+            --i;  // Don't skip an element of the list
+            Changed = true;
+          }
+          if (!Error.empty())
+            return true;
+        }
+        // This can take a long time if left uncontrolled.  For now, don't
+        // iterate.
+        break;
+      }
+    }
+
+    return true; // there are some failure and we've narrowed them down
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/tools/bugpoint/Makefile b/final/tools/bugpoint/Makefile
new file mode 100644
index 00000000000..5d287ef188a
--- /dev/null
+++ b/final/tools/bugpoint/Makefile
@@ -0,0 +1,16 @@
+##===- tools/bugpoint/Makefile -----------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+
+TOOLNAME = bugpoint
+
+LINK_COMPONENTS := asmparser instrumentation scalaropts ipo \
+                   linker bitreader bitwriter
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/bugpoint/Miscompilation.cpp b/final/tools/bugpoint/Miscompilation.cpp
new file mode 100644
index 00000000000..3a5f143ace6
--- /dev/null
+++ b/final/tools/bugpoint/Miscompilation.cpp
@@ -0,0 +1,1082 @@
+//===- Miscompilation.cpp - Debug program miscompilations -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements optimizer and code generation miscompilation debugging
+// support.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BugDriver.h"
+#include "ListReducer.h"
+#include "ToolRunner.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Linker.h"
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Config/config.h"   // for HAVE_LINK_R
+using namespace llvm;
+
+namespace llvm {
+  extern cl::opt<std::string> OutputPrefix;
+  extern cl::list<std::string> InputArgv;
+}
+
+namespace {
+  static llvm::cl::opt<bool> 
+    DisableLoopExtraction("disable-loop-extraction", 
+        cl::desc("Don't extract loops when searching for miscompilations"),
+        cl::init(false));
+  static llvm::cl::opt<bool> 
+    DisableBlockExtraction("disable-block-extraction", 
+        cl::desc("Don't extract blocks when searching for miscompilations"),
+        cl::init(false));
+
+  class ReduceMiscompilingPasses : public ListReducer<std::string> {
+    BugDriver &BD;
+  public:
+    ReduceMiscompilingPasses(BugDriver &bd) : BD(bd) {}
+
+    virtual TestResult doTest(std::vector<std::string> &Prefix,
+                              std::vector<std::string> &Suffix,
+                              std::string &Error);
+  };
+}
+
+/// TestResult - After passes have been split into a test group and a control
+/// group, see if they still break the program.
+///
+ReduceMiscompilingPasses::TestResult
+ReduceMiscompilingPasses::doTest(std::vector<std::string> &Prefix,
+                                 std::vector<std::string> &Suffix,
+                                 std::string &Error) {
+  // First, run the program with just the Suffix passes.  If it is still broken
+  // with JUST the kept passes, discard the prefix passes.
+  outs() << "Checking to see if '" << getPassesString(Suffix)
+         << "' compiles correctly: ";
+
+  std::string BitcodeResult;
+  if (BD.runPasses(BD.getProgram(), Suffix, BitcodeResult, false/*delete*/,
+                   true/*quiet*/)) {
+    errs() << " Error running this sequence of passes"
+           << " on the input program!\n";
+    BD.setPassesToRun(Suffix);
+    BD.EmitProgressBitcode(BD.getProgram(), "pass-error",  false);
+    exit(BD.debugOptimizerCrash());
+  }
+  
+  // Check to see if the finished program matches the reference output...
+  bool Diff = BD.diffProgram(BD.getProgram(), BitcodeResult, "",
+                             true /*delete bitcode*/, &Error);
+  if (!Error.empty())
+    return InternalError;
+  if (Diff) {
+    outs() << " nope.\n";
+    if (Suffix.empty()) {
+      errs() << BD.getToolName() << ": I'm confused: the test fails when "
+             << "no passes are run, nondeterministic program?\n";
+      exit(1);
+    }
+    return KeepSuffix;         // Miscompilation detected!
+  }
+  outs() << " yup.\n";      // No miscompilation!
+
+  if (Prefix.empty()) return NoFailure;
+
+  // Next, see if the program is broken if we run the "prefix" passes first,
+  // then separately run the "kept" passes.
+  outs() << "Checking to see if '" << getPassesString(Prefix)
+         << "' compiles correctly: ";
+
+  // If it is not broken with the kept passes, it's possible that the prefix
+  // passes must be run before the kept passes to break it.  If the program
+  // WORKS after the prefix passes, but then fails if running the prefix AND
+  // kept passes, we can update our bitcode file to include the result of the
+  // prefix passes, then discard the prefix passes.
+  //
+  if (BD.runPasses(BD.getProgram(), Prefix, BitcodeResult, false/*delete*/,
+                   true/*quiet*/)) {
+    errs() << " Error running this sequence of passes"
+           << " on the input program!\n";
+    BD.setPassesToRun(Prefix);
+    BD.EmitProgressBitcode(BD.getProgram(), "pass-error",  false);
+    exit(BD.debugOptimizerCrash());
+  }
+
+  // If the prefix maintains the predicate by itself, only keep the prefix!
+  Diff = BD.diffProgram(BD.getProgram(), BitcodeResult, "", false, &Error);
+  if (!Error.empty())
+    return InternalError;
+  if (Diff) {
+    outs() << " nope.\n";
+    sys::Path(BitcodeResult).eraseFromDisk();
+    return KeepPrefix;
+  }
+  outs() << " yup.\n";      // No miscompilation!
+
+  // Ok, so now we know that the prefix passes work, try running the suffix
+  // passes on the result of the prefix passes.
+  //
+  OwningPtr<Module> PrefixOutput(ParseInputFile(BitcodeResult,
+                                                BD.getContext()));
+  if (PrefixOutput == 0) {
+    errs() << BD.getToolName() << ": Error reading bitcode file '"
+           << BitcodeResult << "'!\n";
+    exit(1);
+  }
+  sys::Path(BitcodeResult).eraseFromDisk();  // No longer need the file on disk
+
+  // Don't check if there are no passes in the suffix.
+  if (Suffix.empty())
+    return NoFailure;
+
+  outs() << "Checking to see if '" << getPassesString(Suffix)
+            << "' passes compile correctly after the '"
+            << getPassesString(Prefix) << "' passes: ";
+
+  OwningPtr<Module> OriginalInput(BD.swapProgramIn(PrefixOutput.take()));
+  if (BD.runPasses(BD.getProgram(), Suffix, BitcodeResult, false/*delete*/,
+                   true/*quiet*/)) {
+    errs() << " Error running this sequence of passes"
+           << " on the input program!\n";
+    BD.setPassesToRun(Suffix);
+    BD.EmitProgressBitcode(BD.getProgram(), "pass-error",  false);
+    exit(BD.debugOptimizerCrash());
+  }
+
+  // Run the result...
+  Diff = BD.diffProgram(BD.getProgram(), BitcodeResult, "",
+                        true /*delete bitcode*/, &Error);
+  if (!Error.empty())
+    return InternalError;
+  if (Diff) {
+    outs() << " nope.\n";
+    return KeepSuffix;
+  }
+
+  // Otherwise, we must not be running the bad pass anymore.
+  outs() << " yup.\n";      // No miscompilation!
+  // Restore orig program & free test.
+  delete BD.swapProgramIn(OriginalInput.take());
+  return NoFailure;
+}
+
+namespace {
+  class ReduceMiscompilingFunctions : public ListReducer<Function*> {
+    BugDriver &BD;
+    bool (*TestFn)(BugDriver &, Module *, Module *, std::string &);
+  public:
+    ReduceMiscompilingFunctions(BugDriver &bd,
+                                bool (*F)(BugDriver &, Module *, Module *,
+                                          std::string &))
+      : BD(bd), TestFn(F) {}
+
+    virtual TestResult doTest(std::vector<Function*> &Prefix,
+                              std::vector<Function*> &Suffix,
+                              std::string &Error) {
+      if (!Suffix.empty()) {
+        bool Ret = TestFuncs(Suffix, Error);
+        if (!Error.empty())
+          return InternalError;
+        if (Ret)
+          return KeepSuffix;
+      }
+      if (!Prefix.empty()) {
+        bool Ret = TestFuncs(Prefix, Error);
+        if (!Error.empty())
+          return InternalError;
+        if (Ret)
+          return KeepPrefix;
+      }
+      return NoFailure;
+    }
+
+    bool TestFuncs(const std::vector<Function*> &Prefix, std::string &Error);
+  };
+}
+
+/// TestMergedProgram - Given two modules, link them together and run the
+/// program, checking to see if the program matches the diff. If there is
+/// an error, return NULL. If not, return the merged module. The Broken argument
+/// will be set to true if the output is different. If the DeleteInputs
+/// argument is set to true then this function deletes both input
+/// modules before it returns.
+///
+static Module *TestMergedProgram(const BugDriver &BD, Module *M1, Module *M2,
+                                 bool DeleteInputs, std::string &Error,
+                                 bool &Broken) {
+  // Link the two portions of the program back to together.
+  std::string ErrorMsg;
+  if (!DeleteInputs) {
+    M1 = CloneModule(M1);
+    M2 = CloneModule(M2);
+  }
+  if (Linker::LinkModules(M1, M2, &ErrorMsg)) {
+    errs() << BD.getToolName() << ": Error linking modules together:"
+           << ErrorMsg << '\n';
+    exit(1);
+  }
+  delete M2;   // We are done with this module.
+
+  // Execute the program.
+  Broken = BD.diffProgram(M1, "", "", false, &Error);
+  if (!Error.empty()) {
+    // Delete the linked module
+    delete M1;
+    return NULL;
+  }
+  return M1;
+}
+
+/// TestFuncs - split functions in a Module into two groups: those that are
+/// under consideration for miscompilation vs. those that are not, and test
+/// accordingly. Each group of functions becomes a separate Module.
+///
+bool ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*> &Funcs,
+                                            std::string &Error) {
+  // Test to see if the function is misoptimized if we ONLY run it on the
+  // functions listed in Funcs.
+  outs() << "Checking to see if the program is misoptimized when "
+         << (Funcs.size()==1 ? "this function is" : "these functions are")
+         << " run through the pass"
+         << (BD.getPassesToRun().size() == 1 ? "" : "es") << ":";
+  PrintFunctionList(Funcs);
+  outs() << '\n';
+
+  // Create a clone for two reasons:
+  // * If the optimization passes delete any function, the deleted function
+  //   will be in the clone and Funcs will still point to valid memory
+  // * If the optimization passes use interprocedural information to break
+  //   a function, we want to continue with the original function. Otherwise
+  //   we can conclude that a function triggers the bug when in fact one
+  //   needs a larger set of original functions to do so.
+  ValueToValueMapTy VMap;
+  Module *Clone = CloneModule(BD.getProgram(), VMap);
+  Module *Orig = BD.swapProgramIn(Clone);
+
+  std::vector<Function*> FuncsOnClone;
+  for (unsigned i = 0, e = Funcs.size(); i != e; ++i) {
+    Function *F = cast<Function>(VMap[Funcs[i]]);
+    FuncsOnClone.push_back(F);
+  }
+
+  // Split the module into the two halves of the program we want.
+  VMap.clear();
+  Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
+  Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize, FuncsOnClone,
+                                                 VMap);
+
+  // Run the predicate, note that the predicate will delete both input modules.
+  bool Broken = TestFn(BD, ToOptimize, ToNotOptimize, Error);
+
+  delete BD.swapProgramIn(Orig);
+
+  return Broken;
+}
+
+/// DisambiguateGlobalSymbols - Give anonymous global values names.
+///
+static void DisambiguateGlobalSymbols(Module *M) {
+  for (Module::global_iterator I = M->global_begin(), E = M->global_end();
+       I != E; ++I)
+    if (!I->hasName())
+      I->setName("anon_global");
+  for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
+    if (!I->hasName())
+      I->setName("anon_fn");
+}
+
+/// ExtractLoops - Given a reduced list of functions that still exposed the bug,
+/// check to see if we can extract the loops in the region without obscuring the
+/// bug.  If so, it reduces the amount of code identified.
+///
+static bool ExtractLoops(BugDriver &BD,
+                         bool (*TestFn)(BugDriver &, Module *, Module *,
+                                        std::string &),
+                         std::vector<Function*> &MiscompiledFunctions,
+                         std::string &Error) {
+  bool MadeChange = false;
+  while (1) {
+    if (BugpointIsInterrupted) return MadeChange;
+    
+    ValueToValueMapTy VMap;
+    Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
+    Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
+                                                   MiscompiledFunctions,
+                                                   VMap);
+    Module *ToOptimizeLoopExtracted = BD.ExtractLoop(ToOptimize);
+    if (!ToOptimizeLoopExtracted) {
+      // If the loop extractor crashed or if there were no extractible loops,
+      // then this chapter of our odyssey is over with.
+      delete ToNotOptimize;
+      delete ToOptimize;
+      return MadeChange;
+    }
+
+    errs() << "Extracted a loop from the breaking portion of the program.\n";
+
+    // Bugpoint is intentionally not very trusting of LLVM transformations.  In
+    // particular, we're not going to assume that the loop extractor works, so
+    // we're going to test the newly loop extracted program to make sure nothing
+    // has broken.  If something broke, then we'll inform the user and stop
+    // extraction.
+    AbstractInterpreter *AI = BD.switchToSafeInterpreter();
+    bool Failure;
+    Module *New = TestMergedProgram(BD, ToOptimizeLoopExtracted, ToNotOptimize,
+                                    false, Error, Failure);
+    if (!New)
+      return false;
+    // Delete the original and set the new program.
+    delete BD.swapProgramIn(New);
+    if (Failure) {
+      BD.switchToInterpreter(AI);
+
+      // Merged program doesn't work anymore!
+      errs() << "  *** ERROR: Loop extraction broke the program. :("
+             << " Please report a bug!\n";
+      errs() << "      Continuing on with un-loop-extracted version.\n";
+
+      BD.writeProgramToFile(OutputPrefix + "-loop-extract-fail-tno.bc",
+                            ToNotOptimize);
+      BD.writeProgramToFile(OutputPrefix + "-loop-extract-fail-to.bc",
+                            ToOptimize);
+      BD.writeProgramToFile(OutputPrefix + "-loop-extract-fail-to-le.bc",
+                            ToOptimizeLoopExtracted);
+
+      errs() << "Please submit the " 
+             << OutputPrefix << "-loop-extract-fail-*.bc files.\n";
+      delete ToOptimize;
+      delete ToNotOptimize;
+      delete ToOptimizeLoopExtracted;
+      return MadeChange;
+    }
+    delete ToOptimize;
+    BD.switchToInterpreter(AI);
+
+    outs() << "  Testing after loop extraction:\n";
+    // Clone modules, the tester function will free them.
+    Module *TOLEBackup = CloneModule(ToOptimizeLoopExtracted);
+    Module *TNOBackup  = CloneModule(ToNotOptimize);
+    Failure = TestFn(BD, ToOptimizeLoopExtracted, ToNotOptimize, Error);
+    if (!Error.empty())
+      return false;
+    if (!Failure) {
+      outs() << "*** Loop extraction masked the problem.  Undoing.\n";
+      // If the program is not still broken, then loop extraction did something
+      // that masked the error.  Stop loop extraction now.
+      delete TOLEBackup;
+      delete TNOBackup;
+      return MadeChange;
+    }
+    ToOptimizeLoopExtracted = TOLEBackup;
+    ToNotOptimize = TNOBackup;
+
+    outs() << "*** Loop extraction successful!\n";
+
+    std::vector<std::pair<std::string, const FunctionType*> > MisCompFunctions;
+    for (Module::iterator I = ToOptimizeLoopExtracted->begin(),
+           E = ToOptimizeLoopExtracted->end(); I != E; ++I)
+      if (!I->isDeclaration())
+        MisCompFunctions.push_back(std::make_pair(I->getName(),
+                                                  I->getFunctionType()));
+
+    // Okay, great!  Now we know that we extracted a loop and that loop
+    // extraction both didn't break the program, and didn't mask the problem.
+    // Replace the current program with the loop extracted version, and try to
+    // extract another loop.
+    std::string ErrorMsg;
+    if (Linker::LinkModules(ToNotOptimize, ToOptimizeLoopExtracted, &ErrorMsg)){
+      errs() << BD.getToolName() << ": Error linking modules together:"
+             << ErrorMsg << '\n';
+      exit(1);
+    }
+    delete ToOptimizeLoopExtracted;
+
+    // All of the Function*'s in the MiscompiledFunctions list are in the old
+    // module.  Update this list to include all of the functions in the
+    // optimized and loop extracted module.
+    MiscompiledFunctions.clear();
+    for (unsigned i = 0, e = MisCompFunctions.size(); i != e; ++i) {
+      Function *NewF = ToNotOptimize->getFunction(MisCompFunctions[i].first);
+                                                  
+      assert(NewF && "Function not found??");
+      assert(NewF->getFunctionType() == MisCompFunctions[i].second && 
+             "found wrong function type?");
+      MiscompiledFunctions.push_back(NewF);
+    }
+
+    BD.setNewProgram(ToNotOptimize);
+    MadeChange = true;
+  }
+}
+
+namespace {
+  class ReduceMiscompiledBlocks : public ListReducer<BasicBlock*> {
+    BugDriver &BD;
+    bool (*TestFn)(BugDriver &, Module *, Module *, std::string &);
+    std::vector<Function*> FunctionsBeingTested;
+  public:
+    ReduceMiscompiledBlocks(BugDriver &bd,
+                            bool (*F)(BugDriver &, Module *, Module *,
+                                      std::string &),
+                            const std::vector<Function*> &Fns)
+      : BD(bd), TestFn(F), FunctionsBeingTested(Fns) {}
+
+    virtual TestResult doTest(std::vector<BasicBlock*> &Prefix,
+                              std::vector<BasicBlock*> &Suffix,
+                              std::string &Error) {
+      if (!Suffix.empty()) {
+        bool Ret = TestFuncs(Suffix, Error);
+        if (!Error.empty())
+          return InternalError;
+        if (Ret)
+          return KeepSuffix;
+      }
+      if (!Prefix.empty()) {
+        bool Ret = TestFuncs(Prefix, Error);
+        if (!Error.empty())
+          return InternalError;
+        if (Ret)
+          return KeepPrefix;
+      }
+      return NoFailure;
+    }
+
+    bool TestFuncs(const std::vector<BasicBlock*> &BBs, std::string &Error);
+  };
+}
+
+/// TestFuncs - Extract all blocks for the miscompiled functions except for the
+/// specified blocks.  If the problem still exists, return true.
+///
+bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs,
+                                        std::string &Error) {
+  // Test to see if the function is misoptimized if we ONLY run it on the
+  // functions listed in Funcs.
+  outs() << "Checking to see if the program is misoptimized when all ";
+  if (!BBs.empty()) {
+    outs() << "but these " << BBs.size() << " blocks are extracted: ";
+    for (unsigned i = 0, e = BBs.size() < 10 ? BBs.size() : 10; i != e; ++i)
+      outs() << BBs[i]->getName() << " ";
+    if (BBs.size() > 10) outs() << "...";
+  } else {
+    outs() << "blocks are extracted.";
+  }
+  outs() << '\n';
+
+  // Split the module into the two halves of the program we want.
+  ValueToValueMapTy VMap;
+  Module *Clone = CloneModule(BD.getProgram(), VMap);
+  Module *Orig = BD.swapProgramIn(Clone);
+  std::vector<Function*> FuncsOnClone;
+  std::vector<BasicBlock*> BBsOnClone;
+  for (unsigned i = 0, e = FunctionsBeingTested.size(); i != e; ++i) {
+    Function *F = cast<Function>(VMap[FunctionsBeingTested[i]]);
+    FuncsOnClone.push_back(F);
+  }
+  for (unsigned i = 0, e = BBs.size(); i != e; ++i) {
+    BasicBlock *BB = cast<BasicBlock>(VMap[BBs[i]]);
+    BBsOnClone.push_back(BB);
+  }
+  VMap.clear();
+
+  Module *ToNotOptimize = CloneModule(BD.getProgram(), VMap);
+  Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
+                                                 FuncsOnClone,
+                                                 VMap);
+
+  // Try the extraction.  If it doesn't work, then the block extractor crashed
+  // or something, in which case bugpoint can't chase down this possibility.
+  if (Module *New = BD.ExtractMappedBlocksFromModule(BBsOnClone, ToOptimize)) {
+    delete ToOptimize;
+    // Run the predicate,
+    // note that the predicate will delete both input modules.
+    bool Ret = TestFn(BD, New, ToNotOptimize, Error);
+    delete BD.swapProgramIn(Orig);
+    return Ret;
+  }
+  delete BD.swapProgramIn(Orig);
+  delete ToOptimize;
+  delete ToNotOptimize;
+  return false;
+}
+
+
+/// ExtractBlocks - Given a reduced list of functions that still expose the bug,
+/// extract as many basic blocks from the region as possible without obscuring
+/// the bug.
+///
+static bool ExtractBlocks(BugDriver &BD,
+                          bool (*TestFn)(BugDriver &, Module *, Module *,
+                                         std::string &),
+                          std::vector<Function*> &MiscompiledFunctions,
+                          std::string &Error) {
+  if (BugpointIsInterrupted) return false;
+  
+  std::vector<BasicBlock*> Blocks;
+  for (unsigned i = 0, e = MiscompiledFunctions.size(); i != e; ++i)
+    for (Function::iterator I = MiscompiledFunctions[i]->begin(),
+           E = MiscompiledFunctions[i]->end(); I != E; ++I)
+      Blocks.push_back(I);
+
+  // Use the list reducer to identify blocks that can be extracted without
+  // obscuring the bug.  The Blocks list will end up containing blocks that must
+  // be retained from the original program.
+  unsigned OldSize = Blocks.size();
+
+  // Check to see if all blocks are extractible first.
+  bool Ret = ReduceMiscompiledBlocks(BD, TestFn, MiscompiledFunctions)
+                                  .TestFuncs(std::vector<BasicBlock*>(), Error);
+  if (!Error.empty())
+    return false;
+  if (Ret) {
+    Blocks.clear();
+  } else {
+    ReduceMiscompiledBlocks(BD, TestFn,
+                            MiscompiledFunctions).reduceList(Blocks, Error);
+    if (!Error.empty())
+      return false;
+    if (Blocks.size() == OldSize)
+      return false;
+  }
+
+  ValueToValueMapTy VMap;
+  Module *ProgClone = CloneModule(BD.getProgram(), VMap);
+  Module *ToExtract = SplitFunctionsOutOfModule(ProgClone,
+                                                MiscompiledFunctions,
+                                                VMap);
+  Module *Extracted = BD.ExtractMappedBlocksFromModule(Blocks, ToExtract);
+  if (Extracted == 0) {
+    // Weird, extraction should have worked.
+    errs() << "Nondeterministic problem extracting blocks??\n";
+    delete ProgClone;
+    delete ToExtract;
+    return false;
+  }
+
+  // Otherwise, block extraction succeeded.  Link the two program fragments back
+  // together.
+  delete ToExtract;
+
+  std::vector<std::pair<std::string, const FunctionType*> > MisCompFunctions;
+  for (Module::iterator I = Extracted->begin(), E = Extracted->end();
+       I != E; ++I)
+    if (!I->isDeclaration())
+      MisCompFunctions.push_back(std::make_pair(I->getName(),
+                                                I->getFunctionType()));
+
+  std::string ErrorMsg;
+  if (Linker::LinkModules(ProgClone, Extracted, &ErrorMsg)) {
+    errs() << BD.getToolName() << ": Error linking modules together:"
+           << ErrorMsg << '\n';
+    exit(1);
+  }
+  delete Extracted;
+
+  // Set the new program and delete the old one.
+  BD.setNewProgram(ProgClone);
+
+  // Update the list of miscompiled functions.
+  MiscompiledFunctions.clear();
+
+  for (unsigned i = 0, e = MisCompFunctions.size(); i != e; ++i) {
+    Function *NewF = ProgClone->getFunction(MisCompFunctions[i].first);
+    assert(NewF && "Function not found??");
+    assert(NewF->getFunctionType() == MisCompFunctions[i].second && 
+           "Function has wrong type??");
+    MiscompiledFunctions.push_back(NewF);
+  }
+
+  return true;
+}
+
+
+/// DebugAMiscompilation - This is a generic driver to narrow down
+/// miscompilations, either in an optimization or a code generator.
+///
+static std::vector<Function*>
+DebugAMiscompilation(BugDriver &BD,
+                     bool (*TestFn)(BugDriver &, Module *, Module *,
+                                    std::string &),
+                     std::string &Error) {
+  // Okay, now that we have reduced the list of passes which are causing the
+  // failure, see if we can pin down which functions are being
+  // miscompiled... first build a list of all of the non-external functions in
+  // the program.
+  std::vector<Function*> MiscompiledFunctions;
+  Module *Prog = BD.getProgram();
+  for (Module::iterator I = Prog->begin(), E = Prog->end(); I != E; ++I)
+    if (!I->isDeclaration())
+      MiscompiledFunctions.push_back(I);
+
+  // Do the reduction...
+  if (!BugpointIsInterrupted)
+    ReduceMiscompilingFunctions(BD, TestFn).reduceList(MiscompiledFunctions,
+                                                       Error);
+  if (!Error.empty())
+    return MiscompiledFunctions;
+
+  outs() << "\n*** The following function"
+         << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
+         << " being miscompiled: ";
+  PrintFunctionList(MiscompiledFunctions);
+  outs() << '\n';
+
+  // See if we can rip any loops out of the miscompiled functions and still
+  // trigger the problem.
+
+  if (!BugpointIsInterrupted && !DisableLoopExtraction) {
+    bool Ret = ExtractLoops(BD, TestFn, MiscompiledFunctions, Error);
+    if (!Error.empty())
+      return MiscompiledFunctions;
+    if (Ret) {
+      // Okay, we extracted some loops and the problem still appears.  See if
+      // we can eliminate some of the created functions from being candidates.
+      DisambiguateGlobalSymbols(BD.getProgram());
+
+      // Do the reduction...
+      if (!BugpointIsInterrupted)
+        ReduceMiscompilingFunctions(BD, TestFn).reduceList(MiscompiledFunctions,
+                                                           Error);
+      if (!Error.empty())
+        return MiscompiledFunctions;
+
+      outs() << "\n*** The following function"
+             << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
+             << " being miscompiled: ";
+      PrintFunctionList(MiscompiledFunctions);
+      outs() << '\n';
+    }
+  }
+
+  if (!BugpointIsInterrupted && !DisableBlockExtraction) {
+    bool Ret = ExtractBlocks(BD, TestFn, MiscompiledFunctions, Error);
+    if (!Error.empty())
+      return MiscompiledFunctions;
+    if (Ret) {
+      // Okay, we extracted some blocks and the problem still appears.  See if
+      // we can eliminate some of the created functions from being candidates.
+      DisambiguateGlobalSymbols(BD.getProgram());
+
+      // Do the reduction...
+      ReduceMiscompilingFunctions(BD, TestFn).reduceList(MiscompiledFunctions,
+                                                         Error);
+      if (!Error.empty())
+        return MiscompiledFunctions;
+
+      outs() << "\n*** The following function"
+             << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
+             << " being miscompiled: ";
+      PrintFunctionList(MiscompiledFunctions);
+      outs() << '\n';
+    }
+  }
+
+  return MiscompiledFunctions;
+}
+
+/// TestOptimizer - This is the predicate function used to check to see if the
+/// "Test" portion of the program is misoptimized.  If so, return true.  In any
+/// case, both module arguments are deleted.
+///
+static bool TestOptimizer(BugDriver &BD, Module *Test, Module *Safe,
+                          std::string &Error) {
+  // Run the optimization passes on ToOptimize, producing a transformed version
+  // of the functions being tested.
+  outs() << "  Optimizing functions being tested: ";
+  Module *Optimized = BD.runPassesOn(Test, BD.getPassesToRun(),
+                                     /*AutoDebugCrashes*/true);
+  outs() << "done.\n";
+  delete Test;
+
+  outs() << "  Checking to see if the merged program executes correctly: ";
+  bool Broken;
+  Module *New = TestMergedProgram(BD, Optimized, Safe, true, Error, Broken);
+  if (New) {
+    outs() << (Broken ? " nope.\n" : " yup.\n");
+    // Delete the original and set the new program.
+    delete BD.swapProgramIn(New);
+  }
+  return Broken;
+}
+
+
+/// debugMiscompilation - This method is used when the passes selected are not
+/// crashing, but the generated output is semantically different from the
+/// input.
+///
+void BugDriver::debugMiscompilation(std::string *Error) {
+  // Make sure something was miscompiled...
+  if (!BugpointIsInterrupted)
+    if (!ReduceMiscompilingPasses(*this).reduceList(PassesToRun, *Error)) {
+      if (Error->empty())
+        errs() << "*** Optimized program matches reference output!  No problem"
+               << " detected...\nbugpoint can't help you with your problem!\n";
+      return;
+    }
+
+  outs() << "\n*** Found miscompiling pass"
+         << (getPassesToRun().size() == 1 ? "" : "es") << ": "
+         << getPassesString(getPassesToRun()) << '\n';
+  EmitProgressBitcode(Program, "passinput");
+
+  std::vector<Function *> MiscompiledFunctions = 
+    DebugAMiscompilation(*this, TestOptimizer, *Error);
+  if (!Error->empty())
+    return;
+
+  // Output a bunch of bitcode files for the user...
+  outs() << "Outputting reduced bitcode files which expose the problem:\n";
+  ValueToValueMapTy VMap;
+  Module *ToNotOptimize = CloneModule(getProgram(), VMap);
+  Module *ToOptimize = SplitFunctionsOutOfModule(ToNotOptimize,
+                                                 MiscompiledFunctions,
+                                                 VMap);
+
+  outs() << "  Non-optimized portion: ";
+  EmitProgressBitcode(ToNotOptimize, "tonotoptimize", true);
+  delete ToNotOptimize;  // Delete hacked module.
+
+  outs() << "  Portion that is input to optimizer: ";
+  EmitProgressBitcode(ToOptimize, "tooptimize");
+  delete ToOptimize;      // Delete hacked module.
+
+  return;
+}
+
+/// CleanupAndPrepareModules - Get the specified modules ready for code
+/// generator testing.
+///
+static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
+                                     Module *Safe) {
+  // Clean up the modules, removing extra cruft that we don't need anymore...
+  Test = BD.performFinalCleanups(Test);
+
+  // If we are executing the JIT, we have several nasty issues to take care of.
+  if (!BD.isExecutingJIT()) return;
+
+  // First, if the main function is in the Safe module, we must add a stub to
+  // the Test module to call into it.  Thus, we create a new function `main'
+  // which just calls the old one.
+  if (Function *oldMain = Safe->getFunction("main"))
+    if (!oldMain->isDeclaration()) {
+      // Rename it
+      oldMain->setName("llvm_bugpoint_old_main");
+      // Create a NEW `main' function with same type in the test module.
+      Function *newMain = Function::Create(oldMain->getFunctionType(),
+                                           GlobalValue::ExternalLinkage,
+                                           "main", Test);
+      // Create an `oldmain' prototype in the test module, which will
+      // corresponds to the real main function in the same module.
+      Function *oldMainProto = Function::Create(oldMain->getFunctionType(),
+                                                GlobalValue::ExternalLinkage,
+                                                oldMain->getName(), Test);
+      // Set up and remember the argument list for the main function.
+      std::vector<Value*> args;
+      for (Function::arg_iterator
+             I = newMain->arg_begin(), E = newMain->arg_end(),
+             OI = oldMain->arg_begin(); I != E; ++I, ++OI) {
+        I->setName(OI->getName());    // Copy argument names from oldMain
+        args.push_back(I);
+      }
+
+      // Call the old main function and return its result
+      BasicBlock *BB = BasicBlock::Create(Safe->getContext(), "entry", newMain);
+      CallInst *call = CallInst::Create(oldMainProto, args.begin(), args.end(),
+                                        "", BB);
+
+      // If the type of old function wasn't void, return value of call
+      ReturnInst::Create(Safe->getContext(), call, BB);
+    }
+
+  // The second nasty issue we must deal with in the JIT is that the Safe
+  // module cannot directly reference any functions defined in the test
+  // module.  Instead, we use a JIT API call to dynamically resolve the
+  // symbol.
+
+  // Add the resolver to the Safe module.
+  // Prototype: void *getPointerToNamedFunction(const char* Name)
+  Constant *resolverFunc =
+    Safe->getOrInsertFunction("getPointerToNamedFunction",
+                    Type::getInt8PtrTy(Safe->getContext()),
+                    Type::getInt8PtrTy(Safe->getContext()),
+                       (Type *)0);
+
+  // Use the function we just added to get addresses of functions we need.
+  for (Module::iterator F = Safe->begin(), E = Safe->end(); F != E; ++F) {
+    if (F->isDeclaration() && !F->use_empty() && &*F != resolverFunc &&
+        !F->isIntrinsic() /* ignore intrinsics */) {
+      Function *TestFn = Test->getFunction(F->getName());
+
+      // Don't forward functions which are external in the test module too.
+      if (TestFn && !TestFn->isDeclaration()) {
+        // 1. Add a string constant with its name to the global file
+        Constant *InitArray = ConstantArray::get(F->getContext(), F->getName());
+        GlobalVariable *funcName =
+          new GlobalVariable(*Safe, InitArray->getType(), true /*isConstant*/,
+                             GlobalValue::InternalLinkage, InitArray,
+                             F->getName() + "_name");
+
+        // 2. Use `GetElementPtr *funcName, 0, 0' to convert the string to an
+        // sbyte* so it matches the signature of the resolver function.
+
+        // GetElementPtr *funcName, ulong 0, ulong 0
+        std::vector<Constant*> GEPargs(2,
+                     Constant::getNullValue(Type::getInt32Ty(F->getContext())));
+        Value *GEP =
+                ConstantExpr::getGetElementPtr(funcName, &GEPargs[0], 2);
+        std::vector<Value*> ResolverArgs;
+        ResolverArgs.push_back(GEP);
+
+        // Rewrite uses of F in global initializers, etc. to uses of a wrapper
+        // function that dynamically resolves the calls to F via our JIT API
+        if (!F->use_empty()) {
+          // Create a new global to hold the cached function pointer.
+          Constant *NullPtr = ConstantPointerNull::get(F->getType());
+          GlobalVariable *Cache =
+            new GlobalVariable(*F->getParent(), F->getType(), 
+                               false, GlobalValue::InternalLinkage,
+                               NullPtr,F->getName()+".fpcache");
+
+          // Construct a new stub function that will re-route calls to F
+          const FunctionType *FuncTy = F->getFunctionType();
+          Function *FuncWrapper = Function::Create(FuncTy,
+                                                   GlobalValue::InternalLinkage,
+                                                   F->getName() + "_wrapper",
+                                                   F->getParent());
+          BasicBlock *EntryBB  = BasicBlock::Create(F->getContext(),
+                                                    "entry", FuncWrapper);
+          BasicBlock *DoCallBB = BasicBlock::Create(F->getContext(),
+                                                    "usecache", FuncWrapper);
+          BasicBlock *LookupBB = BasicBlock::Create(F->getContext(),
+                                                    "lookupfp", FuncWrapper);
+
+          // Check to see if we already looked up the value.
+          Value *CachedVal = new LoadInst(Cache, "fpcache", EntryBB);
+          Value *IsNull = new ICmpInst(*EntryBB, ICmpInst::ICMP_EQ, CachedVal,
+                                       NullPtr, "isNull");
+          BranchInst::Create(LookupBB, DoCallBB, IsNull, EntryBB);
+
+          // Resolve the call to function F via the JIT API:
+          //
+          // call resolver(GetElementPtr...)
+          CallInst *Resolver =
+            CallInst::Create(resolverFunc, ResolverArgs.begin(),
+                             ResolverArgs.end(), "resolver", LookupBB);
+
+          // Cast the result from the resolver to correctly-typed function.
+          CastInst *CastedResolver =
+            new BitCastInst(Resolver,
+                            PointerType::getUnqual(F->getFunctionType()),
+                            "resolverCast", LookupBB);
+
+          // Save the value in our cache.
+          new StoreInst(CastedResolver, Cache, LookupBB);
+          BranchInst::Create(DoCallBB, LookupBB);
+
+          PHINode *FuncPtr = PHINode::Create(NullPtr->getType(),
+                                             "fp", DoCallBB);
+          FuncPtr->addIncoming(CastedResolver, LookupBB);
+          FuncPtr->addIncoming(CachedVal, EntryBB);
+
+          // Save the argument list.
+          std::vector<Value*> Args;
+          for (Function::arg_iterator i = FuncWrapper->arg_begin(),
+                 e = FuncWrapper->arg_end(); i != e; ++i)
+            Args.push_back(i);
+
+          // Pass on the arguments to the real function, return its result
+          if (F->getReturnType()->isVoidTy()) {
+            CallInst::Create(FuncPtr, Args.begin(), Args.end(), "", DoCallBB);
+            ReturnInst::Create(F->getContext(), DoCallBB);
+          } else {
+            CallInst *Call = CallInst::Create(FuncPtr, Args.begin(), Args.end(),
+                                              "retval", DoCallBB);
+            ReturnInst::Create(F->getContext(),Call, DoCallBB);
+          }
+
+          // Use the wrapper function instead of the old function
+          F->replaceAllUsesWith(FuncWrapper);
+        }
+      }
+    }
+  }
+
+  if (verifyModule(*Test) || verifyModule(*Safe)) {
+    errs() << "Bugpoint has a bug, which corrupted a module!!\n";
+    abort();
+  }
+}
+
+
+
+/// TestCodeGenerator - This is the predicate function used to check to see if
+/// the "Test" portion of the program is miscompiled by the code generator under
+/// test.  If so, return true.  In any case, both module arguments are deleted.
+///
+static bool TestCodeGenerator(BugDriver &BD, Module *Test, Module *Safe,
+                              std::string &Error) {
+  CleanupAndPrepareModules(BD, Test, Safe);
+
+  sys::Path TestModuleBC("bugpoint.test.bc");
+  std::string ErrMsg;
+  if (TestModuleBC.makeUnique(true, &ErrMsg)) {
+    errs() << BD.getToolName() << "Error making unique filename: "
+           << ErrMsg << "\n";
+    exit(1);
+  }
+  if (BD.writeProgramToFile(TestModuleBC.str(), Test)) {
+    errs() << "Error writing bitcode to `" << TestModuleBC.str()
+           << "'\nExiting.";
+    exit(1);
+  }
+  delete Test;
+
+  FileRemover TestModuleBCRemover(TestModuleBC, !SaveTemps);
+
+  // Make the shared library
+  sys::Path SafeModuleBC("bugpoint.safe.bc");
+  if (SafeModuleBC.makeUnique(true, &ErrMsg)) {
+    errs() << BD.getToolName() << "Error making unique filename: "
+           << ErrMsg << "\n";
+    exit(1);
+  }
+
+  if (BD.writeProgramToFile(SafeModuleBC.str(), Safe)) {
+    errs() << "Error writing bitcode to `" << SafeModuleBC.str()
+           << "'\nExiting.";
+    exit(1);
+  }
+
+  FileRemover SafeModuleBCRemover(SafeModuleBC, !SaveTemps);
+
+  std::string SharedObject = BD.compileSharedObject(SafeModuleBC.str(), Error);
+  if (!Error.empty())
+    return false;
+  delete Safe;
+
+  FileRemover SharedObjectRemover(sys::Path(SharedObject), !SaveTemps);
+
+  // Run the code generator on the `Test' code, loading the shared library.
+  // The function returns whether or not the new output differs from reference.
+  bool Result = BD.diffProgram(BD.getProgram(), TestModuleBC.str(),
+                               SharedObject, false, &Error);
+  if (!Error.empty())
+    return false;
+
+  if (Result)
+    errs() << ": still failing!\n";
+  else
+    errs() << ": didn't fail.\n";
+
+  return Result;
+}
+
+
+/// debugCodeGenerator - debug errors in LLC, LLI, or CBE.
+///
+bool BugDriver::debugCodeGenerator(std::string *Error) {
+  if ((void*)SafeInterpreter == (void*)Interpreter) {
+    std::string Result = executeProgramSafely(Program, "bugpoint.safe.out",
+                                              Error);
+    if (Error->empty()) {
+      outs() << "\n*** The \"safe\" i.e. 'known good' backend cannot match "
+             << "the reference diff.  This may be due to a\n    front-end "
+             << "bug or a bug in the original program, but this can also "
+             << "happen if bugpoint isn't running the program with the "
+             << "right flags or input.\n    I left the result of executing "
+             << "the program with the \"safe\" backend in this file for "
+             << "you: '"
+             << Result << "'.\n";
+    }
+    return true;
+  }
+
+  DisambiguateGlobalSymbols(Program);
+
+  std::vector<Function*> Funcs = DebugAMiscompilation(*this, TestCodeGenerator,
+                                                      *Error);
+  if (!Error->empty())
+    return true;
+
+  // Split the module into the two halves of the program we want.
+  ValueToValueMapTy VMap;
+  Module *ToNotCodeGen = CloneModule(getProgram(), VMap);
+  Module *ToCodeGen = SplitFunctionsOutOfModule(ToNotCodeGen, Funcs, VMap);
+
+  // Condition the modules
+  CleanupAndPrepareModules(*this, ToCodeGen, ToNotCodeGen);
+
+  sys::Path TestModuleBC("bugpoint.test.bc");
+  std::string ErrMsg;
+  if (TestModuleBC.makeUnique(true, &ErrMsg)) {
+    errs() << getToolName() << "Error making unique filename: "
+           << ErrMsg << "\n";
+    exit(1);
+  }
+
+  if (writeProgramToFile(TestModuleBC.str(), ToCodeGen)) {
+    errs() << "Error writing bitcode to `" << TestModuleBC.str()
+           << "'\nExiting.";
+    exit(1);
+  }
+  delete ToCodeGen;
+
+  // Make the shared library
+  sys::Path SafeModuleBC("bugpoint.safe.bc");
+  if (SafeModuleBC.makeUnique(true, &ErrMsg)) {
+    errs() << getToolName() << "Error making unique filename: "
+           << ErrMsg << "\n";
+    exit(1);
+  }
+
+  if (writeProgramToFile(SafeModuleBC.str(), ToNotCodeGen)) {
+    errs() << "Error writing bitcode to `" << SafeModuleBC.str()
+           << "'\nExiting.";
+    exit(1);
+  }
+  std::string SharedObject = compileSharedObject(SafeModuleBC.str(), *Error);
+  if (!Error->empty())
+    return true;
+  delete ToNotCodeGen;
+
+  outs() << "You can reproduce the problem with the command line: \n";
+  if (isExecutingJIT()) {
+    outs() << "  lli -load " << SharedObject << " " << TestModuleBC.str();
+  } else {
+    outs() << "  llc " << TestModuleBC.str() << " -o " << TestModuleBC.str()
+           << ".s\n";
+    outs() << "  gcc " << SharedObject << " " << TestModuleBC.str()
+              << ".s -o " << TestModuleBC.str() << ".exe";
+#if defined (HAVE_LINK_R)
+    outs() << " -Wl,-R.";
+#endif
+    outs() << "\n";
+    outs() << "  " << TestModuleBC.str() << ".exe";
+  }
+  for (unsigned i = 0, e = InputArgv.size(); i != e; ++i)
+    outs() << " " << InputArgv[i];
+  outs() << '\n';
+  outs() << "The shared object was created with:\n  llc -march=c "
+         << SafeModuleBC.str() << " -o temporary.c\n"
+         << "  gcc -xc temporary.c -O2 -o " << SharedObject;
+  if (TargetTriple.getArch() == Triple::sparc)
+    outs() << " -G";              // Compile a shared library, `-G' for Sparc
+  else
+    outs() << " -fPIC -shared";   // `-shared' for Linux/X86, maybe others
+
+  outs() << " -fno-strict-aliasing\n";
+
+  return false;
+}
diff --git a/final/tools/bugpoint/OptimizerDriver.cpp b/final/tools/bugpoint/OptimizerDriver.cpp
new file mode 100644
index 00000000000..c6be271ae2b
--- /dev/null
+++ b/final/tools/bugpoint/OptimizerDriver.cpp
@@ -0,0 +1,265 @@
+//===- OptimizerDriver.cpp - Allow BugPoint to run passes safely ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an interface that allows bugpoint to run various passes
+// without the threat of a buggy pass corrupting bugpoint (of course, bugpoint
+// may have its own bugs, but that's another story...).  It achieves this by
+// forking a copy of itself and having the child process do the optimizations.
+// If this client dies, we can always fork a new one.  :)
+//
+//===----------------------------------------------------------------------===//
+
+#include "BugDriver.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+
+#define DONT_GET_PLUGIN_LOADER_OPTION
+#include "llvm/Support/PluginLoader.h"
+
+#include <fstream>
+using namespace llvm;
+
+namespace llvm {
+  extern cl::opt<std::string> OutputPrefix;
+}
+
+namespace {
+  // ChildOutput - This option captures the name of the child output file that
+  // is set up by the parent bugpoint process
+  cl::opt<std::string> ChildOutput("child-output", cl::ReallyHidden);
+}
+
+/// writeProgramToFile - This writes the current "Program" to the named bitcode
+/// file.  If an error occurs, true is returned.
+///
+bool BugDriver::writeProgramToFile(const std::string &Filename,
+                                   const Module *M) const {
+  std::string ErrInfo;
+  tool_output_file Out(Filename.c_str(), ErrInfo,
+                       raw_fd_ostream::F_Binary);
+  if (ErrInfo.empty()) {
+    WriteBitcodeToFile(M, Out.os());
+    Out.os().close();
+    if (!Out.os().has_error()) {
+      Out.keep();
+      return false;
+    }
+  }
+  Out.os().clear_error();
+  return true;
+}
+
+
+/// EmitProgressBitcode - This function is used to output the current Program
+/// to a file named "bugpoint-ID.bc".
+///
+void BugDriver::EmitProgressBitcode(const Module *M,
+                                    const std::string &ID,
+                                    bool NoFlyer)  const {
+  // Output the input to the current pass to a bitcode file, emit a message
+  // telling the user how to reproduce it: opt -foo blah.bc
+  //
+  std::string Filename = OutputPrefix + "-" + ID + ".bc";
+  if (writeProgramToFile(Filename, M)) {
+    errs() <<  "Error opening file '" << Filename << "' for writing!\n";
+    return;
+  }
+
+  outs() << "Emitted bitcode to '" << Filename << "'\n";
+  if (NoFlyer || PassesToRun.empty()) return;
+  outs() << "\n*** You can reproduce the problem with: ";
+  if (UseValgrind) outs() << "valgrind ";
+  outs() << "opt " << Filename << " ";
+  outs() << getPassesString(PassesToRun) << "\n";
+}
+
+cl::opt<bool> SilencePasses("silence-passes",
+        cl::desc("Suppress output of running passes (both stdout and stderr)"));
+
+static cl::list<std::string> OptArgs("opt-args", cl::Positional,
+                                     cl::desc("<opt arguments>..."),
+                                     cl::ZeroOrMore, cl::PositionalEatsArgs);
+
+/// runPasses - Run the specified passes on Program, outputting a bitcode file
+/// and writing the filename into OutputFile if successful.  If the
+/// optimizations fail for some reason (optimizer crashes), return true,
+/// otherwise return false.  If DeleteOutput is set to true, the bitcode is
+/// deleted on success, and the filename string is undefined.  This prints to
+/// outs() a single line message indicating whether compilation was successful
+/// or failed.
+///
+bool BugDriver::runPasses(Module *Program,
+                          const std::vector<std::string> &Passes,
+                          std::string &OutputFilename, bool DeleteOutput,
+                          bool Quiet, unsigned NumExtraArgs,
+                          const char * const *ExtraArgs) const {
+  // setup the output file name
+  outs().flush();
+  sys::Path uniqueFilename(OutputPrefix + "-output.bc");
+  std::string ErrMsg;
+  if (uniqueFilename.makeUnique(true, &ErrMsg)) {
+    errs() << getToolName() << ": Error making unique filename: "
+           << ErrMsg << "\n";
+    return(1);
+  }
+  OutputFilename = uniqueFilename.str();
+
+  // set up the input file name
+  sys::Path inputFilename(OutputPrefix + "-input.bc");
+  if (inputFilename.makeUnique(true, &ErrMsg)) {
+    errs() << getToolName() << ": Error making unique filename: "
+           << ErrMsg << "\n";
+    return(1);
+  }
+
+  std::string ErrInfo;
+  tool_output_file InFile(inputFilename.c_str(), ErrInfo,
+                          raw_fd_ostream::F_Binary);
+
+
+  if (!ErrInfo.empty()) {
+    errs() << "Error opening bitcode file: " << inputFilename.str() << "\n";
+    return 1;
+  }
+  WriteBitcodeToFile(Program, InFile.os());
+  InFile.os().close();
+  if (InFile.os().has_error()) {
+    errs() << "Error writing bitcode file: " << inputFilename.str() << "\n";
+    InFile.os().clear_error();
+    return 1;
+  }
+
+  sys::Path tool = PrependMainExecutablePath("opt", getToolName(),
+                                             (void*)"opt");
+  if (tool.empty()) {
+    errs() << "Cannot find `opt' in executable directory!\n";
+    return 1;
+  }
+
+  // Ok, everything that could go wrong before running opt is done.
+  InFile.keep();
+
+  // setup the child process' arguments
+  SmallVector<const char*, 8> Args;
+  std::string Opt = tool.str();
+  if (UseValgrind) {
+    Args.push_back("valgrind");
+    Args.push_back("--error-exitcode=1");
+    Args.push_back("-q");
+    Args.push_back(tool.c_str());
+  } else
+    Args.push_back(Opt.c_str());
+
+  Args.push_back("-o");
+  Args.push_back(OutputFilename.c_str());
+  for (unsigned i = 0, e = OptArgs.size(); i != e; ++i)
+    Args.push_back(OptArgs[i].c_str());
+  std::vector<std::string> pass_args;
+  for (unsigned i = 0, e = PluginLoader::getNumPlugins(); i != e; ++i) {
+    pass_args.push_back( std::string("-load"));
+    pass_args.push_back( PluginLoader::getPlugin(i));
+  }
+  for (std::vector<std::string>::const_iterator I = Passes.begin(),
+       E = Passes.end(); I != E; ++I )
+    pass_args.push_back( std::string("-") + (*I) );
+  for (std::vector<std::string>::const_iterator I = pass_args.begin(),
+       E = pass_args.end(); I != E; ++I )
+    Args.push_back(I->c_str());
+  Args.push_back(inputFilename.c_str());
+  for (unsigned i = 0; i < NumExtraArgs; ++i)
+    Args.push_back(*ExtraArgs);
+  Args.push_back(0);
+
+  DEBUG(errs() << "\nAbout to run:\t";
+        for (unsigned i = 0, e = Args.size()-1; i != e; ++i)
+          errs() << " " << Args[i];
+        errs() << "\n";
+        );
+
+  sys::Path prog;
+  if (UseValgrind)
+    prog = sys::Program::FindProgramByName("valgrind");
+  else
+    prog = tool;
+
+  // Redirect stdout and stderr to nowhere if SilencePasses is given
+  sys::Path Nowhere;
+  const sys::Path *Redirects[3] = {0, &Nowhere, &Nowhere};
+
+  int result = sys::Program::ExecuteAndWait(prog, Args.data(), 0,
+                                            (SilencePasses ? Redirects : 0),
+                                            Timeout, MemoryLimit, &ErrMsg);
+
+  // If we are supposed to delete the bitcode file or if the passes crashed,
+  // remove it now.  This may fail if the file was never created, but that's ok.
+  if (DeleteOutput || result != 0)
+    sys::Path(OutputFilename).eraseFromDisk();
+
+  // Remove the temporary input file as well
+  inputFilename.eraseFromDisk();
+
+  if (!Quiet) {
+    if (result == 0)
+      outs() << "Success!\n";
+    else if (result > 0)
+      outs() << "Exited with error code '" << result << "'\n";
+    else if (result < 0) {
+      if (result == -1)
+        outs() << "Execute failed: " << ErrMsg << "\n";
+      else
+        outs() << "Crashed with signal #" << abs(result) << "\n";
+    }
+    if (result & 0x01000000)
+      outs() << "Dumped core\n";
+  }
+
+  // Was the child successful?
+  return result != 0;
+}
+
+
+/// runPassesOn - Carefully run the specified set of pass on the specified
+/// module, returning the transformed module on success, or a null pointer on
+/// failure.
+Module *BugDriver::runPassesOn(Module *M,
+                               const std::vector<std::string> &Passes,
+                               bool AutoDebugCrashes, unsigned NumExtraArgs,
+                               const char * const *ExtraArgs) {
+  std::string BitcodeResult;
+  if (runPasses(M, Passes, BitcodeResult, false/*delete*/, true/*quiet*/,
+                NumExtraArgs, ExtraArgs)) {
+    if (AutoDebugCrashes) {
+      errs() << " Error running this sequence of passes"
+             << " on the input program!\n";
+      delete swapProgramIn(M);
+      EmitProgressBitcode(M, "pass-error",  false);
+      exit(debugOptimizerCrash());
+    }
+    return 0;
+  }
+
+  Module *Ret = ParseInputFile(BitcodeResult, Context);
+  if (Ret == 0) {
+    errs() << getToolName() << ": Error reading bitcode file '"
+           << BitcodeResult << "'!\n";
+    exit(1);
+  }
+  sys::Path(BitcodeResult).eraseFromDisk();  // No longer need the file on disk
+  return Ret;
+}
diff --git a/final/tools/bugpoint/ToolRunner.cpp b/final/tools/bugpoint/ToolRunner.cpp
new file mode 100644
index 00000000000..37cc9028e07
--- /dev/null
+++ b/final/tools/bugpoint/ToolRunner.cpp
@@ -0,0 +1,969 @@
+//===-- ToolRunner.cpp ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the interfaces described in the ToolRunner.h file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "toolrunner"
+#include "ToolRunner.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Config/config.h"   // for HAVE_LINK_R
+#include <fstream>
+#include <sstream>
+using namespace llvm;
+
+namespace llvm {
+  cl::opt<bool>
+  SaveTemps("save-temps", cl::init(false), cl::desc("Save temporary files"));
+}
+
+namespace {
+  cl::opt<std::string>
+  RemoteClient("remote-client",
+               cl::desc("Remote execution client (rsh/ssh)"));
+
+  cl::opt<std::string>
+  RemoteHost("remote-host",
+             cl::desc("Remote execution (rsh/ssh) host"));
+
+  cl::opt<std::string>
+  RemotePort("remote-port",
+             cl::desc("Remote execution (rsh/ssh) port"));
+
+  cl::opt<std::string>
+  RemoteUser("remote-user",
+             cl::desc("Remote execution (rsh/ssh) user id"));
+
+  cl::opt<std::string>
+  RemoteExtra("remote-extra-options",
+          cl::desc("Remote execution (rsh/ssh) extra options"));
+}
+
+/// RunProgramWithTimeout - This function provides an alternate interface
+/// to the sys::Program::ExecuteAndWait interface.
+/// @see sys::Program::ExecuteAndWait
+static int RunProgramWithTimeout(const sys::Path &ProgramPath,
+                                 const char **Args,
+                                 const sys::Path &StdInFile,
+                                 const sys::Path &StdOutFile,
+                                 const sys::Path &StdErrFile,
+                                 unsigned NumSeconds = 0,
+                                 unsigned MemoryLimit = 0,
+                                 std::string *ErrMsg = 0) {
+  const sys::Path* redirects[3];
+  redirects[0] = &StdInFile;
+  redirects[1] = &StdOutFile;
+  redirects[2] = &StdErrFile;
+
+#if 0 // For debug purposes
+  {
+    errs() << "RUN:";
+    for (unsigned i = 0; Args[i]; ++i)
+      errs() << " " << Args[i];
+    errs() << "\n";
+  }
+#endif
+
+  return
+    sys::Program::ExecuteAndWait(ProgramPath, Args, 0, redirects,
+                                 NumSeconds, MemoryLimit, ErrMsg);
+}
+
+/// RunProgramRemotelyWithTimeout - This function runs the given program
+/// remotely using the given remote client and the sys::Program::ExecuteAndWait.
+/// Returns the remote program exit code or reports a remote client error if it
+/// fails. Remote client is required to return 255 if it failed or program exit
+/// code otherwise.
+/// @see sys::Program::ExecuteAndWait
+static int RunProgramRemotelyWithTimeout(const sys::Path &RemoteClientPath,
+                                         const char **Args,
+                                         const sys::Path &StdInFile,
+                                         const sys::Path &StdOutFile,
+                                         const sys::Path &StdErrFile,
+                                         unsigned NumSeconds = 0,
+                                         unsigned MemoryLimit = 0) {
+  const sys::Path* redirects[3];
+  redirects[0] = &StdInFile;
+  redirects[1] = &StdOutFile;
+  redirects[2] = &StdErrFile;
+
+#if 0 // For debug purposes
+  {
+    errs() << "RUN:";
+    for (unsigned i = 0; Args[i]; ++i)
+      errs() << " " << Args[i];
+    errs() << "\n";
+  }
+#endif
+
+  // Run the program remotely with the remote client
+  int ReturnCode = sys::Program::ExecuteAndWait(RemoteClientPath, Args,
+                                 0, redirects, NumSeconds, MemoryLimit);
+
+  // Has the remote client fail?
+  if (255 == ReturnCode) {
+    std::ostringstream OS;
+    OS << "\nError running remote client:\n ";
+    for (const char **Arg = Args; *Arg; ++Arg)
+      OS << " " << *Arg;
+    OS << "\n";
+
+    // The error message is in the output file, let's print it out from there.
+    std::ifstream ErrorFile(StdOutFile.c_str());
+    if (ErrorFile) {
+      std::copy(std::istreambuf_iterator<char>(ErrorFile),
+                std::istreambuf_iterator<char>(),
+                std::ostreambuf_iterator<char>(OS));
+      ErrorFile.close();
+    }
+
+    errs() << OS;
+  }
+
+  return ReturnCode;
+}
+
+static std::string ProcessFailure(sys::Path ProgPath, const char** Args,
+                                  unsigned Timeout = 0,
+                                  unsigned MemoryLimit = 0) {
+  std::ostringstream OS;
+  OS << "\nError running tool:\n ";
+  for (const char **Arg = Args; *Arg; ++Arg)
+    OS << " " << *Arg;
+  OS << "\n";
+
+  // Rerun the compiler, capturing any error messages to print them.
+  sys::Path ErrorFilename("bugpoint.program_error_messages");
+  std::string ErrMsg;
+  if (ErrorFilename.makeUnique(true, &ErrMsg)) {
+    errs() << "Error making unique filename: " << ErrMsg << "\n";
+    exit(1);
+  }
+  RunProgramWithTimeout(ProgPath, Args, sys::Path(""), ErrorFilename,
+                        ErrorFilename, Timeout, MemoryLimit);
+  // FIXME: check return code ?
+
+  // Print out the error messages generated by GCC if possible...
+  std::ifstream ErrorFile(ErrorFilename.c_str());
+  if (ErrorFile) {
+    std::copy(std::istreambuf_iterator<char>(ErrorFile),
+              std::istreambuf_iterator<char>(),
+              std::ostreambuf_iterator<char>(OS));
+    ErrorFile.close();
+  }
+
+  ErrorFilename.eraseFromDisk();
+  return OS.str();
+}
+
+//===---------------------------------------------------------------------===//
+// LLI Implementation of AbstractIntepreter interface
+//
+namespace {
+  class LLI : public AbstractInterpreter {
+    std::string LLIPath;          // The path to the LLI executable
+    std::vector<std::string> ToolArgs; // Args to pass to LLI
+  public:
+    LLI(const std::string &Path, const std::vector<std::string> *Args)
+      : LLIPath(Path) {
+      ToolArgs.clear ();
+      if (Args) { ToolArgs = *Args; }
+    }
+
+    virtual int ExecuteProgram(const std::string &Bitcode,
+                               const std::vector<std::string> &Args,
+                               const std::string &InputFile,
+                               const std::string &OutputFile,
+                               std::string *Error,
+                               const std::vector<std::string> &GCCArgs,
+                               const std::vector<std::string> &SharedLibs =
+                               std::vector<std::string>(),
+                               unsigned Timeout = 0,
+                               unsigned MemoryLimit = 0);
+  };
+}
+
+int LLI::ExecuteProgram(const std::string &Bitcode,
+                        const std::vector<std::string> &Args,
+                        const std::string &InputFile,
+                        const std::string &OutputFile,
+                        std::string *Error,
+                        const std::vector<std::string> &GCCArgs,
+                        const std::vector<std::string> &SharedLibs,
+                        unsigned Timeout,
+                        unsigned MemoryLimit) {
+  std::vector<const char*> LLIArgs;
+  LLIArgs.push_back(LLIPath.c_str());
+  LLIArgs.push_back("-force-interpreter=true");
+
+  for (std::vector<std::string>::const_iterator i = SharedLibs.begin(),
+         e = SharedLibs.end(); i != e; ++i) {
+    LLIArgs.push_back("-load");
+    LLIArgs.push_back((*i).c_str());
+  }
+
+  // Add any extra LLI args.
+  for (unsigned i = 0, e = ToolArgs.size(); i != e; ++i)
+    LLIArgs.push_back(ToolArgs[i].c_str());
+
+  LLIArgs.push_back(Bitcode.c_str());
+  // Add optional parameters to the running program from Argv
+  for (unsigned i=0, e = Args.size(); i != e; ++i)
+    LLIArgs.push_back(Args[i].c_str());
+  LLIArgs.push_back(0);
+
+  outs() << "<lli>"; outs().flush();
+  DEBUG(errs() << "\nAbout to run:\t";
+        for (unsigned i=0, e = LLIArgs.size()-1; i != e; ++i)
+          errs() << " " << LLIArgs[i];
+        errs() << "\n";
+        );
+  return RunProgramWithTimeout(sys::Path(LLIPath), &LLIArgs[0],
+      sys::Path(InputFile), sys::Path(OutputFile), sys::Path(OutputFile),
+      Timeout, MemoryLimit, Error);
+}
+
+// LLI create method - Try to find the LLI executable
+AbstractInterpreter *AbstractInterpreter::createLLI(const char *Argv0,
+                                                    std::string &Message,
+                                     const std::vector<std::string> *ToolArgs) {
+  std::string LLIPath =
+    PrependMainExecutablePath("lli", Argv0, (void *)(intptr_t)&createLLI).str();
+  if (!LLIPath.empty()) {
+    Message = "Found lli: " + LLIPath + "\n";
+    return new LLI(LLIPath, ToolArgs);
+  }
+
+  Message = "Cannot find `lli' in executable directory!\n";
+  return 0;
+}
+
+//===---------------------------------------------------------------------===//
+// Custom compiler command implementation of AbstractIntepreter interface
+//
+// Allows using a custom command for compiling the bitcode, thus allows, for
+// example, to compile a bitcode fragment without linking or executing, then
+// using a custom wrapper script to check for compiler errors.
+namespace {
+  class CustomCompiler : public AbstractInterpreter {
+    std::string CompilerCommand;
+    std::vector<std::string> CompilerArgs;
+  public:
+    CustomCompiler(
+      const std::string &CompilerCmd, std::vector<std::string> CompArgs) :
+      CompilerCommand(CompilerCmd), CompilerArgs(CompArgs) {}
+
+    virtual void compileProgram(const std::string &Bitcode,
+                                std::string *Error,
+                                unsigned Timeout = 0,
+                                unsigned MemoryLimit = 0);
+
+    virtual int ExecuteProgram(const std::string &Bitcode,
+                               const std::vector<std::string> &Args,
+                               const std::string &InputFile,
+                               const std::string &OutputFile,
+                               std::string *Error,
+                               const std::vector<std::string> &GCCArgs =
+                               std::vector<std::string>(),
+                               const std::vector<std::string> &SharedLibs =
+                               std::vector<std::string>(),
+                               unsigned Timeout = 0,
+                               unsigned MemoryLimit = 0) {
+      *Error = "Execution not supported with -compile-custom";
+      return -1;
+    }
+  };
+}
+
+void CustomCompiler::compileProgram(const std::string &Bitcode,
+                                    std::string *Error,
+                                    unsigned Timeout,
+                                    unsigned MemoryLimit) {
+
+  std::vector<const char*> ProgramArgs;
+  ProgramArgs.push_back(CompilerCommand.c_str());
+
+  for (std::size_t i = 0; i < CompilerArgs.size(); ++i)
+    ProgramArgs.push_back(CompilerArgs.at(i).c_str());
+  ProgramArgs.push_back(Bitcode.c_str());
+  ProgramArgs.push_back(0);
+
+  // Add optional parameters to the running program from Argv
+  for (unsigned i = 0, e = CompilerArgs.size(); i != e; ++i)
+    ProgramArgs.push_back(CompilerArgs[i].c_str());
+
+  if (RunProgramWithTimeout( sys::Path(CompilerCommand), &ProgramArgs[0],
+                             sys::Path(), sys::Path(), sys::Path(),
+                             Timeout, MemoryLimit, Error))
+    *Error = ProcessFailure(sys::Path(CompilerCommand), &ProgramArgs[0],
+                           Timeout, MemoryLimit);
+}
+
+//===---------------------------------------------------------------------===//
+// Custom execution command implementation of AbstractIntepreter interface
+//
+// Allows using a custom command for executing the bitcode, thus allows,
+// for example, to invoke a cross compiler for code generation followed by
+// a simulator that executes the generated binary.
+namespace {
+  class CustomExecutor : public AbstractInterpreter {
+    std::string ExecutionCommand;
+    std::vector<std::string> ExecutorArgs;
+  public:
+    CustomExecutor(
+      const std::string &ExecutionCmd, std::vector<std::string> ExecArgs) :
+      ExecutionCommand(ExecutionCmd), ExecutorArgs(ExecArgs) {}
+
+    virtual int ExecuteProgram(const std::string &Bitcode,
+                               const std::vector<std::string> &Args,
+                               const std::string &InputFile,
+                               const std::string &OutputFile,
+                               std::string *Error,
+                               const std::vector<std::string> &GCCArgs,
+                               const std::vector<std::string> &SharedLibs =
+                                 std::vector<std::string>(),
+                               unsigned Timeout = 0,
+                               unsigned MemoryLimit = 0);
+  };
+}
+
+int CustomExecutor::ExecuteProgram(const std::string &Bitcode,
+                        const std::vector<std::string> &Args,
+                        const std::string &InputFile,
+                        const std::string &OutputFile,
+                        std::string *Error,
+                        const std::vector<std::string> &GCCArgs,
+                        const std::vector<std::string> &SharedLibs,
+                        unsigned Timeout,
+                        unsigned MemoryLimit) {
+
+  std::vector<const char*> ProgramArgs;
+  ProgramArgs.push_back(ExecutionCommand.c_str());
+
+  for (std::size_t i = 0; i < ExecutorArgs.size(); ++i)
+    ProgramArgs.push_back(ExecutorArgs.at(i).c_str());
+  ProgramArgs.push_back(Bitcode.c_str());
+  ProgramArgs.push_back(0);
+
+  // Add optional parameters to the running program from Argv
+  for (unsigned i = 0, e = Args.size(); i != e; ++i)
+    ProgramArgs.push_back(Args[i].c_str());
+
+  return RunProgramWithTimeout(
+    sys::Path(ExecutionCommand),
+    &ProgramArgs[0], sys::Path(InputFile), sys::Path(OutputFile),
+    sys::Path(OutputFile), Timeout, MemoryLimit, Error);
+}
+
+// Tokenize the CommandLine to the command and the args to allow
+// defining a full command line as the command instead of just the
+// executed program. We cannot just pass the whole string after the command
+// as a single argument because then program sees only a single
+// command line argument (with spaces in it: "foo bar" instead
+// of "foo" and "bar").
+//
+// code borrowed from:
+// http://oopweb.com/CPP/Documents/CPPHOWTO/Volume/C++Programming-HOWTO-7.html
+static void lexCommand(std::string &Message, const std::string &CommandLine,
+                       std::string &CmdPath, std::vector<std::string> Args) {
+
+  std::string Command = "";
+  std::string delimiters = " ";
+
+  std::string::size_type lastPos = CommandLine.find_first_not_of(delimiters, 0);
+  std::string::size_type pos = CommandLine.find_first_of(delimiters, lastPos);
+
+  while (std::string::npos != pos || std::string::npos != lastPos) {
+    std::string token = CommandLine.substr(lastPos, pos - lastPos);
+    if (Command == "")
+       Command = token;
+    else
+       Args.push_back(token);
+    // Skip delimiters.  Note the "not_of"
+    lastPos = CommandLine.find_first_not_of(delimiters, pos);
+    // Find next "non-delimiter"
+    pos = CommandLine.find_first_of(delimiters, lastPos);
+  }
+
+  CmdPath = sys::Program::FindProgramByName(Command).str();
+  if (CmdPath.empty()) {
+    Message =
+      std::string("Cannot find '") + Command +
+      "' in PATH!\n";
+    return;
+  }
+
+  Message = "Found command in: " + CmdPath + "\n";
+}
+
+// Custom execution environment create method, takes the execution command
+// as arguments
+AbstractInterpreter *AbstractInterpreter::createCustomCompiler(
+                    std::string &Message,
+                    const std::string &CompileCommandLine) {
+
+  std::string CmdPath;
+  std::vector<std::string> Args;
+  lexCommand(Message, CompileCommandLine, CmdPath, Args);
+  if (CmdPath.empty())
+    return 0;
+
+  return new CustomCompiler(CmdPath, Args);
+}
+
+// Custom execution environment create method, takes the execution command
+// as arguments
+AbstractInterpreter *AbstractInterpreter::createCustomExecutor(
+                    std::string &Message,
+                    const std::string &ExecCommandLine) {
+
+
+  std::string CmdPath;
+  std::vector<std::string> Args;
+  lexCommand(Message, ExecCommandLine, CmdPath, Args);
+  if (CmdPath.empty())
+    return 0;
+
+  return new CustomExecutor(CmdPath, Args);
+}
+
+//===----------------------------------------------------------------------===//
+// LLC Implementation of AbstractIntepreter interface
+//
+GCC::FileType LLC::OutputCode(const std::string &Bitcode,
+                              sys::Path &OutputAsmFile, std::string &Error,
+                              unsigned Timeout, unsigned MemoryLimit) {
+  const char *Suffix = (UseIntegratedAssembler ? ".llc.o" : ".llc.s");
+  sys::Path uniqueFile(Bitcode + Suffix);
+  std::string ErrMsg;
+  if (uniqueFile.makeUnique(true, &ErrMsg)) {
+    errs() << "Error making unique filename: " << ErrMsg << "\n";
+    exit(1);
+  }
+  OutputAsmFile = uniqueFile;
+  std::vector<const char *> LLCArgs;
+  LLCArgs.push_back(LLCPath.c_str());
+
+  // Add any extra LLC args.
+  for (unsigned i = 0, e = ToolArgs.size(); i != e; ++i)
+    LLCArgs.push_back(ToolArgs[i].c_str());
+
+  LLCArgs.push_back("-o");
+  LLCArgs.push_back(OutputAsmFile.c_str()); // Output to the Asm file
+  LLCArgs.push_back(Bitcode.c_str());      // This is the input bitcode
+
+  if (UseIntegratedAssembler)
+    LLCArgs.push_back("-filetype=obj");
+
+  LLCArgs.push_back (0);
+
+  outs() << (UseIntegratedAssembler ? "<llc-ia>" : "<llc>");
+  outs().flush();
+  DEBUG(errs() << "\nAbout to run:\t";
+        for (unsigned i = 0, e = LLCArgs.size()-1; i != e; ++i)
+          errs() << " " << LLCArgs[i];
+        errs() << "\n";
+        );
+  if (RunProgramWithTimeout(sys::Path(LLCPath), &LLCArgs[0],
+                            sys::Path(), sys::Path(), sys::Path(),
+                            Timeout, MemoryLimit))
+    Error = ProcessFailure(sys::Path(LLCPath), &LLCArgs[0],
+                           Timeout, MemoryLimit);
+  return UseIntegratedAssembler ? GCC::ObjectFile : GCC::AsmFile;
+}
+
+void LLC::compileProgram(const std::string &Bitcode, std::string *Error,
+                         unsigned Timeout, unsigned MemoryLimit) {
+  sys::Path OutputAsmFile;
+  OutputCode(Bitcode, OutputAsmFile, *Error, Timeout, MemoryLimit);
+  OutputAsmFile.eraseFromDisk();
+}
+
+int LLC::ExecuteProgram(const std::string &Bitcode,
+                        const std::vector<std::string> &Args,
+                        const std::string &InputFile,
+                        const std::string &OutputFile,
+                        std::string *Error,
+                        const std::vector<std::string> &ArgsForGCC,
+                        const std::vector<std::string> &SharedLibs,
+                        unsigned Timeout,
+                        unsigned MemoryLimit) {
+
+  sys::Path OutputAsmFile;
+  GCC::FileType FileKind = OutputCode(Bitcode, OutputAsmFile, *Error, Timeout,
+                                      MemoryLimit);
+  FileRemover OutFileRemover(OutputAsmFile, !SaveTemps);
+
+  std::vector<std::string> GCCArgs(ArgsForGCC);
+  GCCArgs.insert(GCCArgs.end(), SharedLibs.begin(), SharedLibs.end());
+
+  // Assuming LLC worked, compile the result with GCC and run it.
+  return gcc->ExecuteProgram(OutputAsmFile.str(), Args, FileKind,
+                             InputFile, OutputFile, Error, GCCArgs,
+                             Timeout, MemoryLimit);
+}
+
+/// createLLC - Try to find the LLC executable
+///
+LLC *AbstractInterpreter::createLLC(const char *Argv0,
+                                    std::string &Message,
+                                    const std::string &GCCBinary,
+                                    const std::vector<std::string> *Args,
+                                    const std::vector<std::string> *GCCArgs,
+                                    bool UseIntegratedAssembler) {
+  std::string LLCPath =
+    PrependMainExecutablePath("llc", Argv0, (void *)(intptr_t)&createLLC).str();
+  if (LLCPath.empty()) {
+    Message = "Cannot find `llc' in executable directory!\n";
+    return 0;
+  }
+
+  Message = "Found llc: " + LLCPath + "\n";
+  GCC *gcc = GCC::create(Message, GCCBinary, GCCArgs);
+  if (!gcc) {
+    errs() << Message << "\n";
+    exit(1);
+  }
+  return new LLC(LLCPath, gcc, Args, UseIntegratedAssembler);
+}
+
+//===---------------------------------------------------------------------===//
+// JIT Implementation of AbstractIntepreter interface
+//
+namespace {
+  class JIT : public AbstractInterpreter {
+    std::string LLIPath;          // The path to the LLI executable
+    std::vector<std::string> ToolArgs; // Args to pass to LLI
+  public:
+    JIT(const std::string &Path, const std::vector<std::string> *Args)
+      : LLIPath(Path) {
+      ToolArgs.clear ();
+      if (Args) { ToolArgs = *Args; }
+    }
+
+    virtual int ExecuteProgram(const std::string &Bitcode,
+                               const std::vector<std::string> &Args,
+                               const std::string &InputFile,
+                               const std::string &OutputFile,
+                               std::string *Error,
+                               const std::vector<std::string> &GCCArgs =
+                                 std::vector<std::string>(),
+                               const std::vector<std::string> &SharedLibs =
+                                 std::vector<std::string>(),
+                               unsigned Timeout = 0,
+                               unsigned MemoryLimit = 0);
+  };
+}
+
+int JIT::ExecuteProgram(const std::string &Bitcode,
+                        const std::vector<std::string> &Args,
+                        const std::string &InputFile,
+                        const std::string &OutputFile,
+                        std::string *Error,
+                        const std::vector<std::string> &GCCArgs,
+                        const std::vector<std::string> &SharedLibs,
+                        unsigned Timeout,
+                        unsigned MemoryLimit) {
+  // Construct a vector of parameters, incorporating those from the command-line
+  std::vector<const char*> JITArgs;
+  JITArgs.push_back(LLIPath.c_str());
+  JITArgs.push_back("-force-interpreter=false");
+
+  // Add any extra LLI args.
+  for (unsigned i = 0, e = ToolArgs.size(); i != e; ++i)
+    JITArgs.push_back(ToolArgs[i].c_str());
+
+  for (unsigned i = 0, e = SharedLibs.size(); i != e; ++i) {
+    JITArgs.push_back("-load");
+    JITArgs.push_back(SharedLibs[i].c_str());
+  }
+  JITArgs.push_back(Bitcode.c_str());
+  // Add optional parameters to the running program from Argv
+  for (unsigned i=0, e = Args.size(); i != e; ++i)
+    JITArgs.push_back(Args[i].c_str());
+  JITArgs.push_back(0);
+
+  outs() << "<jit>"; outs().flush();
+  DEBUG(errs() << "\nAbout to run:\t";
+        for (unsigned i=0, e = JITArgs.size()-1; i != e; ++i)
+          errs() << " " << JITArgs[i];
+        errs() << "\n";
+        );
+  DEBUG(errs() << "\nSending output to " << OutputFile << "\n");
+  return RunProgramWithTimeout(sys::Path(LLIPath), &JITArgs[0],
+      sys::Path(InputFile), sys::Path(OutputFile), sys::Path(OutputFile),
+      Timeout, MemoryLimit, Error);
+}
+
+/// createJIT - Try to find the LLI executable
+///
+AbstractInterpreter *AbstractInterpreter::createJIT(const char *Argv0,
+                   std::string &Message, const std::vector<std::string> *Args) {
+  std::string LLIPath =
+    PrependMainExecutablePath("lli", Argv0, (void *)(intptr_t)&createJIT).str();
+  if (!LLIPath.empty()) {
+    Message = "Found lli: " + LLIPath + "\n";
+    return new JIT(LLIPath, Args);
+  }
+
+  Message = "Cannot find `lli' in executable directory!\n";
+  return 0;
+}
+
+GCC::FileType CBE::OutputCode(const std::string &Bitcode,
+                              sys::Path &OutputCFile, std::string &Error,
+                              unsigned Timeout, unsigned MemoryLimit) {
+  sys::Path uniqueFile(Bitcode+".cbe.c");
+  std::string ErrMsg;
+  if (uniqueFile.makeUnique(true, &ErrMsg)) {
+    errs() << "Error making unique filename: " << ErrMsg << "\n";
+    exit(1);
+  }
+  OutputCFile = uniqueFile;
+  std::vector<const char *> LLCArgs;
+  LLCArgs.push_back(LLCPath.c_str());
+
+  // Add any extra LLC args.
+  for (unsigned i = 0, e = ToolArgs.size(); i != e; ++i)
+    LLCArgs.push_back(ToolArgs[i].c_str());
+
+  LLCArgs.push_back("-o");
+  LLCArgs.push_back(OutputCFile.c_str());   // Output to the C file
+  LLCArgs.push_back("-march=c");            // Output C language
+  LLCArgs.push_back(Bitcode.c_str());      // This is the input bitcode
+  LLCArgs.push_back(0);
+
+  outs() << "<cbe>"; outs().flush();
+  DEBUG(errs() << "\nAbout to run:\t";
+        for (unsigned i = 0, e = LLCArgs.size()-1; i != e; ++i)
+          errs() << " " << LLCArgs[i];
+        errs() << "\n";
+        );
+  if (RunProgramWithTimeout(LLCPath, &LLCArgs[0], sys::Path(), sys::Path(),
+                            sys::Path(), Timeout, MemoryLimit))
+    Error = ProcessFailure(LLCPath, &LLCArgs[0], Timeout, MemoryLimit);
+  return GCC::CFile;
+}
+
+void CBE::compileProgram(const std::string &Bitcode, std::string *Error,
+                         unsigned Timeout, unsigned MemoryLimit) {
+  sys::Path OutputCFile;
+  OutputCode(Bitcode, OutputCFile, *Error, Timeout, MemoryLimit);
+  OutputCFile.eraseFromDisk();
+}
+
+int CBE::ExecuteProgram(const std::string &Bitcode,
+                        const std::vector<std::string> &Args,
+                        const std::string &InputFile,
+                        const std::string &OutputFile,
+                        std::string *Error,
+                        const std::vector<std::string> &ArgsForGCC,
+                        const std::vector<std::string> &SharedLibs,
+                        unsigned Timeout,
+                        unsigned MemoryLimit) {
+  sys::Path OutputCFile;
+  OutputCode(Bitcode, OutputCFile, *Error, Timeout, MemoryLimit);
+
+  FileRemover CFileRemove(OutputCFile, !SaveTemps);
+
+  std::vector<std::string> GCCArgs(ArgsForGCC);
+  GCCArgs.insert(GCCArgs.end(), SharedLibs.begin(), SharedLibs.end());
+
+  return gcc->ExecuteProgram(OutputCFile.str(), Args, GCC::CFile,
+                             InputFile, OutputFile, Error, GCCArgs,
+                             Timeout, MemoryLimit);
+}
+
+/// createCBE - Try to find the 'llc' executable
+///
+CBE *AbstractInterpreter::createCBE(const char *Argv0,
+                                    std::string &Message,
+                                    const std::string &GCCBinary,
+                                    const std::vector<std::string> *Args,
+                                    const std::vector<std::string> *GCCArgs) {
+  sys::Path LLCPath =
+    PrependMainExecutablePath("llc", Argv0, (void *)(intptr_t)&createCBE);
+  if (LLCPath.isEmpty()) {
+    Message =
+      "Cannot find `llc' in executable directory!\n";
+    return 0;
+  }
+
+  Message = "Found llc: " + LLCPath.str() + "\n";
+  GCC *gcc = GCC::create(Message, GCCBinary, GCCArgs);
+  if (!gcc) {
+    errs() << Message << "\n";
+    exit(1);
+  }
+  return new CBE(LLCPath, gcc, Args);
+}
+
+//===---------------------------------------------------------------------===//
+// GCC abstraction
+//
+
+static bool IsARMArchitecture(std::vector<const char*> Args) {
+  for (std::vector<const char*>::const_iterator
+         I = Args.begin(), E = Args.end(); I != E; ++I) {
+    if (StringRef(*I).equals_lower("-arch")) {
+      ++I;
+      if (I != E && StringRef(*I).substr(0, strlen("arm")).equals_lower("arm"))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+int GCC::ExecuteProgram(const std::string &ProgramFile,
+                        const std::vector<std::string> &Args,
+                        FileType fileType,
+                        const std::string &InputFile,
+                        const std::string &OutputFile,
+                        std::string *Error,
+                        const std::vector<std::string> &ArgsForGCC,
+                        unsigned Timeout,
+                        unsigned MemoryLimit) {
+  std::vector<const char*> GCCArgs;
+
+  GCCArgs.push_back(GCCPath.c_str());
+
+  if (TargetTriple.getArch() == Triple::x86)
+    GCCArgs.push_back("-m32");
+
+  for (std::vector<std::string>::const_iterator
+         I = gccArgs.begin(), E = gccArgs.end(); I != E; ++I)
+    GCCArgs.push_back(I->c_str());
+
+  // Specify -x explicitly in case the extension is wonky
+  if (fileType != ObjectFile) {
+    GCCArgs.push_back("-x");
+    if (fileType == CFile) {
+      GCCArgs.push_back("c");
+      GCCArgs.push_back("-fno-strict-aliasing");
+    } else {
+      GCCArgs.push_back("assembler");
+
+      // For ARM architectures we don't want this flag. bugpoint isn't
+      // explicitly told what architecture it is working on, so we get
+      // it from gcc flags
+      if ((TargetTriple.getOS() == Triple::Darwin) &&
+          !IsARMArchitecture(GCCArgs))
+        GCCArgs.push_back("-force_cpusubtype_ALL");
+    }
+  }
+
+  GCCArgs.push_back(ProgramFile.c_str());  // Specify the input filename.
+
+  GCCArgs.push_back("-x");
+  GCCArgs.push_back("none");
+  GCCArgs.push_back("-o");
+  sys::Path OutputBinary (ProgramFile+".gcc.exe");
+  std::string ErrMsg;
+  if (OutputBinary.makeUnique(true, &ErrMsg)) {
+    errs() << "Error making unique filename: " << ErrMsg << "\n";
+    exit(1);
+  }
+  GCCArgs.push_back(OutputBinary.c_str()); // Output to the right file...
+
+  // Add any arguments intended for GCC. We locate them here because this is
+  // most likely -L and -l options that need to come before other libraries but
+  // after the source. Other options won't be sensitive to placement on the
+  // command line, so this should be safe.
+  for (unsigned i = 0, e = ArgsForGCC.size(); i != e; ++i)
+    GCCArgs.push_back(ArgsForGCC[i].c_str());
+
+  GCCArgs.push_back("-lm");                // Hard-code the math library...
+  GCCArgs.push_back("-O2");                // Optimize the program a bit...
+#if defined (HAVE_LINK_R)
+  GCCArgs.push_back("-Wl,-R.");            // Search this dir for .so files
+#endif
+  if (TargetTriple.getArch() == Triple::sparc)
+    GCCArgs.push_back("-mcpu=v9");
+  GCCArgs.push_back(0);                    // NULL terminator
+
+  outs() << "<gcc>"; outs().flush();
+  DEBUG(errs() << "\nAbout to run:\t";
+        for (unsigned i = 0, e = GCCArgs.size()-1; i != e; ++i)
+          errs() << " " << GCCArgs[i];
+        errs() << "\n";
+        );
+  if (RunProgramWithTimeout(GCCPath, &GCCArgs[0], sys::Path(), sys::Path(),
+        sys::Path())) {
+    *Error = ProcessFailure(GCCPath, &GCCArgs[0]);
+    return -1;
+  }
+
+  std::vector<const char*> ProgramArgs;
+
+  // Declared here so that the destructor only runs after
+  // ProgramArgs is used.
+  std::string Exec;
+
+  if (RemoteClientPath.isEmpty())
+    ProgramArgs.push_back(OutputBinary.c_str());
+  else {
+    ProgramArgs.push_back(RemoteClientPath.c_str());
+    ProgramArgs.push_back(RemoteHost.c_str());
+    if (!RemoteUser.empty()) {
+      ProgramArgs.push_back("-l");
+      ProgramArgs.push_back(RemoteUser.c_str());
+    }
+    if (!RemotePort.empty()) {
+      ProgramArgs.push_back("-p");
+      ProgramArgs.push_back(RemotePort.c_str());
+    }
+    if (!RemoteExtra.empty()) {
+      ProgramArgs.push_back(RemoteExtra.c_str());
+    }
+
+    // Full path to the binary. We need to cd to the exec directory because
+    // there is a dylib there that the exec expects to find in the CWD
+    char* env_pwd = getenv("PWD");
+    Exec = "cd ";
+    Exec += env_pwd;
+    Exec += "; ./";
+    Exec += OutputBinary.c_str();
+    ProgramArgs.push_back(Exec.c_str());
+  }
+
+  // Add optional parameters to the running program from Argv
+  for (unsigned i = 0, e = Args.size(); i != e; ++i)
+    ProgramArgs.push_back(Args[i].c_str());
+  ProgramArgs.push_back(0);                // NULL terminator
+
+  // Now that we have a binary, run it!
+  outs() << "<program>"; outs().flush();
+  DEBUG(errs() << "\nAbout to run:\t";
+        for (unsigned i = 0, e = ProgramArgs.size()-1; i != e; ++i)
+          errs() << " " << ProgramArgs[i];
+        errs() << "\n";
+        );
+
+  FileRemover OutputBinaryRemover(OutputBinary, !SaveTemps);
+
+  if (RemoteClientPath.isEmpty()) {
+    DEBUG(errs() << "<run locally>");
+    return RunProgramWithTimeout(OutputBinary, &ProgramArgs[0],
+        sys::Path(InputFile), sys::Path(OutputFile), sys::Path(OutputFile),
+        Timeout, MemoryLimit, Error);
+  } else {
+    outs() << "<run remotely>"; outs().flush();
+    return RunProgramRemotelyWithTimeout(sys::Path(RemoteClientPath),
+        &ProgramArgs[0], sys::Path(InputFile), sys::Path(OutputFile),
+        sys::Path(OutputFile), Timeout, MemoryLimit);
+  }
+}
+
+int GCC::MakeSharedObject(const std::string &InputFile, FileType fileType,
+                          std::string &OutputFile,
+                          const std::vector<std::string> &ArgsForGCC,
+                          std::string &Error) {
+  sys::Path uniqueFilename(InputFile+LTDL_SHLIB_EXT);
+  std::string ErrMsg;
+  if (uniqueFilename.makeUnique(true, &ErrMsg)) {
+    errs() << "Error making unique filename: " << ErrMsg << "\n";
+    exit(1);
+  }
+  OutputFile = uniqueFilename.str();
+
+  std::vector<const char*> GCCArgs;
+
+  GCCArgs.push_back(GCCPath.c_str());
+
+  if (TargetTriple.getArch() == Triple::x86)
+    GCCArgs.push_back("-m32");
+
+  for (std::vector<std::string>::const_iterator
+         I = gccArgs.begin(), E = gccArgs.end(); I != E; ++I)
+    GCCArgs.push_back(I->c_str());
+
+  // Compile the C/asm file into a shared object
+  if (fileType != ObjectFile) {
+    GCCArgs.push_back("-x");
+    GCCArgs.push_back(fileType == AsmFile ? "assembler" : "c");
+  }
+  GCCArgs.push_back("-fno-strict-aliasing");
+  GCCArgs.push_back(InputFile.c_str());   // Specify the input filename.
+  GCCArgs.push_back("-x");
+  GCCArgs.push_back("none");
+  if (TargetTriple.getArch() == Triple::sparc)
+    GCCArgs.push_back("-G");       // Compile a shared library, `-G' for Sparc
+  else if (TargetTriple.getOS() == Triple::Darwin) {
+    // link all source files into a single module in data segment, rather than
+    // generating blocks. dynamic_lookup requires that you set
+    // MACOSX_DEPLOYMENT_TARGET=10.3 in your env.  FIXME: it would be better for
+    // bugpoint to just pass that in the environment of GCC.
+    GCCArgs.push_back("-single_module");
+    GCCArgs.push_back("-dynamiclib");   // `-dynamiclib' for MacOS X/PowerPC
+    GCCArgs.push_back("-undefined");
+    GCCArgs.push_back("dynamic_lookup");
+  } else
+    GCCArgs.push_back("-shared");  // `-shared' for Linux/X86, maybe others
+
+  if ((TargetTriple.getArch() == Triple::alpha) ||
+      (TargetTriple.getArch() == Triple::x86_64))
+    GCCArgs.push_back("-fPIC");   // Requires shared objs to contain PIC
+
+  if (TargetTriple.getArch() == Triple::sparc)
+    GCCArgs.push_back("-mcpu=v9");
+
+  GCCArgs.push_back("-o");
+  GCCArgs.push_back(OutputFile.c_str()); // Output to the right filename.
+  GCCArgs.push_back("-O2");              // Optimize the program a bit.
+
+
+
+  // Add any arguments intended for GCC. We locate them here because this is
+  // most likely -L and -l options that need to come before other libraries but
+  // after the source. Other options won't be sensitive to placement on the
+  // command line, so this should be safe.
+  for (unsigned i = 0, e = ArgsForGCC.size(); i != e; ++i)
+    GCCArgs.push_back(ArgsForGCC[i].c_str());
+  GCCArgs.push_back(0);                    // NULL terminator
+
+
+
+  outs() << "<gcc>"; outs().flush();
+  DEBUG(errs() << "\nAbout to run:\t";
+        for (unsigned i = 0, e = GCCArgs.size()-1; i != e; ++i)
+          errs() << " " << GCCArgs[i];
+        errs() << "\n";
+        );
+  if (RunProgramWithTimeout(GCCPath, &GCCArgs[0], sys::Path(), sys::Path(),
+                            sys::Path())) {
+    Error = ProcessFailure(GCCPath, &GCCArgs[0]);
+    return 1;
+  }
+  return 0;
+}
+
+/// create - Try to find the `gcc' executable
+///
+GCC *GCC::create(std::string &Message,
+                 const std::string &GCCBinary,
+                 const std::vector<std::string> *Args) {
+  sys::Path GCCPath = sys::Program::FindProgramByName(GCCBinary);
+  if (GCCPath.isEmpty()) {
+    Message = "Cannot find `"+ GCCBinary +"' in PATH!\n";
+    return 0;
+  }
+
+  sys::Path RemoteClientPath;
+  if (!RemoteClient.empty())
+    RemoteClientPath = sys::Program::FindProgramByName(RemoteClient);
+
+  Message = "Found gcc: " + GCCPath.str() + "\n";
+  return new GCC(GCCPath, RemoteClientPath, Args);
+}
diff --git a/final/tools/bugpoint/ToolRunner.h b/final/tools/bugpoint/ToolRunner.h
new file mode 100644
index 00000000000..cfa8acf6b24
--- /dev/null
+++ b/final/tools/bugpoint/ToolRunner.h
@@ -0,0 +1,247 @@
+//===-- tools/bugpoint/ToolRunner.h -----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file exposes an abstraction around a platform C compiler, used to
+// compile C and assembly code.  It also exposes an "AbstractIntepreter"
+// interface, which is used to execute code using one of the LLVM execution
+// engines.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BUGPOINT_TOOLRUNNER_H
+#define BUGPOINT_TOOLRUNNER_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/Path.h"
+#include <exception>
+#include <vector>
+
+namespace llvm {
+
+extern cl::opt<bool> SaveTemps;
+extern Triple TargetTriple;
+
+class CBE;
+class LLC;
+
+//===---------------------------------------------------------------------===//
+// GCC abstraction
+//
+class GCC {
+  sys::Path GCCPath;                // The path to the gcc executable.
+  sys::Path RemoteClientPath;       // The path to the rsh / ssh executable.
+  std::vector<std::string> gccArgs; // GCC-specific arguments.
+  GCC(const sys::Path &gccPath, const sys::Path &RemotePath,
+      const std::vector<std::string> *GCCArgs)
+    : GCCPath(gccPath), RemoteClientPath(RemotePath) {
+    if (GCCArgs) gccArgs = *GCCArgs;
+  }
+public:
+  enum FileType { AsmFile, ObjectFile, CFile };
+
+  static GCC *create(std::string &Message,
+                     const std::string &GCCBinary,
+                     const std::vector<std::string> *Args);
+
+  /// ExecuteProgram - Execute the program specified by "ProgramFile" (which is
+  /// either a .s file, or a .c file, specified by FileType), with the specified
+  /// arguments.  Standard input is specified with InputFile, and standard
+  /// Output is captured to the specified OutputFile location.  The SharedLibs
+  /// option specifies optional native shared objects that can be loaded into
+  /// the program for execution.
+  ///
+  int ExecuteProgram(const std::string &ProgramFile,
+                     const std::vector<std::string> &Args,
+                     FileType fileType,
+                     const std::string &InputFile,
+                     const std::string &OutputFile,
+                     std::string *Error = 0,
+                     const std::vector<std::string> &GCCArgs =
+                         std::vector<std::string>(),
+                     unsigned Timeout = 0,
+                     unsigned MemoryLimit = 0);
+
+  /// MakeSharedObject - This compiles the specified file (which is either a .c
+  /// file or a .s file) into a shared object.
+  ///
+  int MakeSharedObject(const std::string &InputFile, FileType fileType,
+                       std::string &OutputFile,
+                       const std::vector<std::string> &ArgsForGCC,
+                       std::string &Error);
+};
+
+
+//===---------------------------------------------------------------------===//
+/// AbstractInterpreter Class - Subclasses of this class are used to execute
+/// LLVM bitcode in a variety of ways.  This abstract interface hides this
+/// complexity behind a simple interface.
+///
+class AbstractInterpreter {
+public:
+  static CBE *createCBE(const char *Argv0, std::string &Message,
+                        const std::string              &GCCBinary,
+                        const std::vector<std::string> *Args = 0,
+                        const std::vector<std::string> *GCCArgs = 0);
+  static LLC *createLLC(const char *Argv0, std::string &Message,
+                        const std::string              &GCCBinary,
+                        const std::vector<std::string> *Args = 0,
+                        const std::vector<std::string> *GCCArgs = 0,
+                        bool UseIntegratedAssembler = false);
+
+  static AbstractInterpreter* createLLI(const char *Argv0, std::string &Message,
+                                        const std::vector<std::string> *Args=0);
+
+  static AbstractInterpreter* createJIT(const char *Argv0, std::string &Message,
+                                        const std::vector<std::string> *Args=0);
+
+  static AbstractInterpreter*
+  createCustomCompiler(std::string &Message,
+                       const std::string &CompileCommandLine);
+
+  static AbstractInterpreter*
+  createCustomExecutor(std::string &Message,
+                       const std::string &ExecCommandLine);
+
+
+  virtual ~AbstractInterpreter() {}
+
+  /// compileProgram - Compile the specified program from bitcode to executable
+  /// code.  This does not produce any output, it is only used when debugging
+  /// the code generator.  It returns false if the code generator fails.
+  virtual void compileProgram(const std::string &Bitcode, std::string *Error,
+                              unsigned Timeout = 0, unsigned MemoryLimit = 0) {}
+
+  /// OutputCode - Compile the specified program from bitcode to code
+  /// understood by the GCC driver (either C or asm).  If the code generator
+  /// fails, it sets Error, otherwise, this function returns the type of code
+  /// emitted.
+  virtual GCC::FileType OutputCode(const std::string &Bitcode,
+                                   sys::Path &OutFile, std::string &Error,
+                                   unsigned Timeout = 0,
+                                   unsigned MemoryLimit = 0) {
+    Error = "OutputCode not supported by this AbstractInterpreter!";
+    return GCC::AsmFile;
+  }
+
+  /// ExecuteProgram - Run the specified bitcode file, emitting output to the
+  /// specified filename.  This sets RetVal to the exit code of the program or
+  /// returns false if a problem was encountered that prevented execution of
+  /// the program.
+  ///
+  virtual int ExecuteProgram(const std::string &Bitcode,
+                             const std::vector<std::string> &Args,
+                             const std::string &InputFile,
+                             const std::string &OutputFile,
+                             std::string *Error,
+                             const std::vector<std::string> &GCCArgs =
+                               std::vector<std::string>(),
+                             const std::vector<std::string> &SharedLibs =
+                               std::vector<std::string>(),
+                             unsigned Timeout = 0,
+                             unsigned MemoryLimit = 0) = 0;
+};
+
+//===---------------------------------------------------------------------===//
+// CBE Implementation of AbstractIntepreter interface
+//
+class CBE : public AbstractInterpreter {
+  sys::Path LLCPath;                 // The path to the `llc' executable.
+  std::vector<std::string> ToolArgs; // Extra args to pass to LLC.
+  GCC *gcc;
+public:
+  CBE(const sys::Path &llcPath, GCC *Gcc,
+      const std::vector<std::string> *Args)
+    : LLCPath(llcPath), gcc(Gcc) {
+    ToolArgs.clear ();
+    if (Args) ToolArgs = *Args;
+  }
+  ~CBE() { delete gcc; }
+
+  /// compileProgram - Compile the specified program from bitcode to executable
+  /// code.  This does not produce any output, it is only used when debugging
+  /// the code generator.  Returns false if the code generator fails.
+  virtual void compileProgram(const std::string &Bitcode, std::string *Error,
+                              unsigned Timeout = 0, unsigned MemoryLimit = 0);
+
+  virtual int ExecuteProgram(const std::string &Bitcode,
+                             const std::vector<std::string> &Args,
+                             const std::string &InputFile,
+                             const std::string &OutputFile,
+                             std::string *Error,
+                             const std::vector<std::string> &GCCArgs =
+                               std::vector<std::string>(),
+                             const std::vector<std::string> &SharedLibs =
+                               std::vector<std::string>(),
+                             unsigned Timeout = 0,
+                             unsigned MemoryLimit = 0);
+
+  /// OutputCode - Compile the specified program from bitcode to code
+  /// understood by the GCC driver (either C or asm).  If the code generator
+  /// fails, it sets Error, otherwise, this function returns the type of code
+  /// emitted.
+  virtual GCC::FileType OutputCode(const std::string &Bitcode,
+                                   sys::Path &OutFile, std::string &Error,
+                                   unsigned Timeout = 0,
+                                   unsigned MemoryLimit = 0);
+};
+
+
+//===---------------------------------------------------------------------===//
+// LLC Implementation of AbstractIntepreter interface
+//
+class LLC : public AbstractInterpreter {
+  std::string LLCPath;               // The path to the LLC executable.
+  std::vector<std::string> ToolArgs; // Extra args to pass to LLC.
+  GCC *gcc;
+  bool UseIntegratedAssembler;
+public:
+  LLC(const std::string &llcPath, GCC *Gcc,
+      const std::vector<std::string> *Args,
+      bool useIntegratedAssembler)
+    : LLCPath(llcPath), gcc(Gcc),
+      UseIntegratedAssembler(useIntegratedAssembler) {
+    ToolArgs.clear();
+    if (Args) ToolArgs = *Args;
+  }
+  ~LLC() { delete gcc; }
+
+  /// compileProgram - Compile the specified program from bitcode to executable
+  /// code.  This does not produce any output, it is only used when debugging
+  /// the code generator.  Returns false if the code generator fails.
+  virtual void compileProgram(const std::string &Bitcode, std::string *Error,
+                              unsigned Timeout = 0, unsigned MemoryLimit = 0);
+
+  virtual int ExecuteProgram(const std::string &Bitcode,
+                             const std::vector<std::string> &Args,
+                             const std::string &InputFile,
+                             const std::string &OutputFile,
+                             std::string *Error,
+                             const std::vector<std::string> &GCCArgs =
+                               std::vector<std::string>(),
+                             const std::vector<std::string> &SharedLibs =
+                                std::vector<std::string>(),
+                             unsigned Timeout = 0,
+                             unsigned MemoryLimit = 0);
+
+  /// OutputCode - Compile the specified program from bitcode to code
+  /// understood by the GCC driver (either C or asm).  If the code generator
+  /// fails, it sets Error, otherwise, this function returns the type of code
+  /// emitted.
+  virtual GCC::FileType OutputCode(const std::string &Bitcode,
+                                   sys::Path &OutFile, std::string &Error,
+                                   unsigned Timeout = 0,
+                                   unsigned MemoryLimit = 0);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/tools/bugpoint/bugpoint.cpp b/final/tools/bugpoint/bugpoint.cpp
new file mode 100644
index 00000000000..f9c9e18099e
--- /dev/null
+++ b/final/tools/bugpoint/bugpoint.cpp
@@ -0,0 +1,184 @@
+//===- bugpoint.cpp - The LLVM Bugpoint utility ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program is an automated compiler debugger tool.  It is used to narrow
+// down miscompilations and crash problems to a specific pass in the compiler,
+// and the specific Module or Function input that is causing the problem.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BugDriver.h"
+#include "ToolRunner.h"
+#include "llvm/LinkAllPasses.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/PassNameParser.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PluginLoader.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/StandardPasses.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/Valgrind.h"
+#include "llvm/LinkAllVMCore.h"
+
+//Enable this macro to debug bugpoint itself.
+//#define DEBUG_BUGPOINT 1
+
+using namespace llvm;
+
+static cl::opt<bool> 
+FindBugs("find-bugs", cl::desc("Run many different optimization sequences "
+                               "on program to find bugs"), cl::init(false));
+
+static cl::list<std::string>
+InputFilenames(cl::Positional, cl::OneOrMore,
+               cl::desc("<input llvm ll/bc files>"));
+
+static cl::opt<unsigned>
+TimeoutValue("timeout", cl::init(300), cl::value_desc("seconds"),
+             cl::desc("Number of seconds program is allowed to run before it "
+                      "is killed (default is 300s), 0 disables timeout"));
+
+static cl::opt<int>
+MemoryLimit("mlimit", cl::init(-1), cl::value_desc("MBytes"),
+             cl::desc("Maximum amount of memory to use. 0 disables check."
+                      " Defaults to 100MB (800MB under valgrind)."));
+
+static cl::opt<bool>
+UseValgrind("enable-valgrind",
+            cl::desc("Run optimizations through valgrind"));
+
+// The AnalysesList is automatically populated with registered Passes by the
+// PassNameParser.
+//
+static cl::list<const PassInfo*, bool, PassNameParser>
+PassList(cl::desc("Passes available:"), cl::ZeroOrMore);
+
+static cl::opt<bool>
+StandardCompileOpts("std-compile-opts", 
+                   cl::desc("Include the standard compile time optimizations"));
+
+static cl::opt<bool>
+StandardLinkOpts("std-link-opts", 
+                 cl::desc("Include the standard link time optimizations"));
+
+static cl::opt<std::string>
+OverrideTriple("mtriple", cl::desc("Override target triple for module"));
+
+/// BugpointIsInterrupted - Set to true when the user presses ctrl-c.
+bool llvm::BugpointIsInterrupted = false;
+
+#ifndef DEBUG_BUGPOINT
+static void BugpointInterruptFunction() {
+  BugpointIsInterrupted = true;
+}
+#endif
+
+// Hack to capture a pass list.
+namespace {
+  class AddToDriver : public PassManager {
+    BugDriver &D;
+  public:
+    AddToDriver(BugDriver &_D) : D(_D) {}
+    
+    virtual void add(Pass *P) {
+      const void *ID = P->getPassID();
+      const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(ID);
+      D.addPass(PI->getPassArgument());
+    }
+  };
+}
+
+int main(int argc, char **argv) {
+#ifndef DEBUG_BUGPOINT
+  llvm::sys::PrintStackTraceOnErrorSignal();
+  llvm::PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+#endif
+  
+  // Initialize passes
+  PassRegistry &Registry = *PassRegistry::getPassRegistry();
+  initializeCore(Registry);
+  initializeScalarOpts(Registry);
+  initializeIPO(Registry);
+  initializeAnalysis(Registry);
+  initializeIPA(Registry);
+  initializeTransformUtils(Registry);
+  initializeInstCombine(Registry);
+  initializeInstrumentation(Registry);
+  initializeTarget(Registry);
+  
+  cl::ParseCommandLineOptions(argc, argv,
+                              "LLVM automatic testcase reducer. See\nhttp://"
+                              "llvm.org/cmds/bugpoint.html"
+                              " for more information.\n");
+#ifndef DEBUG_BUGPOINT
+  sys::SetInterruptFunction(BugpointInterruptFunction);
+#endif
+
+  LLVMContext& Context = getGlobalContext();
+  // If we have an override, set it and then track the triple we want Modules
+  // to use.
+  if (!OverrideTriple.empty()) {
+    TargetTriple.setTriple(Triple::normalize(OverrideTriple));
+    outs() << "Override triple set to '" << TargetTriple.getTriple() << "'\n";
+  }
+
+  if (MemoryLimit < 0) {
+    // Set the default MemoryLimit.  Be sure to update the flag's description if
+    // you change this.
+    if (sys::RunningOnValgrind() || UseValgrind)
+      MemoryLimit = 800;
+    else
+      MemoryLimit = 100;
+  }
+
+  BugDriver D(argv[0], FindBugs, TimeoutValue, MemoryLimit,
+              UseValgrind, Context);
+  if (D.addSources(InputFilenames)) return 1;
+  
+  AddToDriver PM(D);
+  if (StandardCompileOpts) {
+    createStandardModulePasses(&PM, 3,
+                               /*OptimizeSize=*/ false,
+                               /*UnitAtATime=*/ true,
+                               /*UnrollLoops=*/ true,
+                               /*SimplifyLibCalls=*/ true,
+                               /*HaveExceptions=*/ true,
+                               createFunctionInliningPass());
+  }
+      
+  if (StandardLinkOpts)
+    createStandardLTOPasses(&PM, /*Internalize=*/true,
+                            /*RunInliner=*/true,
+                            /*VerifyEach=*/false);
+
+
+  for (std::vector<const PassInfo*>::iterator I = PassList.begin(),
+         E = PassList.end();
+       I != E; ++I) {
+    const PassInfo* PI = *I;
+    D.addPass(PI->getPassArgument());
+  }
+
+  // Bugpoint has the ability of generating a plethora of core files, so to
+  // avoid filling up the disk, we prevent it
+#ifndef DEBUG_BUGPOINT
+  sys::Process::PreventCoreFiles();
+#endif
+
+  std::string Error;
+  bool Failure = D.run(Error);
+  if (!Error.empty()) {
+    errs() << Error;
+    return 1;
+  }
+  return Failure;
+}
diff --git a/final/tools/edis/CMakeLists.txt b/final/tools/edis/CMakeLists.txt
new file mode 100644
index 00000000000..5037f9f6f8b
--- /dev/null
+++ b/final/tools/edis/CMakeLists.txt
@@ -0,0 +1,11 @@
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+add_llvm_library(EnhancedDisassembly
+  ../../include/llvm-c/EnhancedDisassembly.h
+  EDMain.cpp
+)
+
+set_target_properties(EnhancedDisassembly
+  PROPERTIES
+  LINKER_LANGUAGE CXX)
+
diff --git a/final/tools/edis/EDMain.cpp b/final/tools/edis/EDMain.cpp
new file mode 100644
index 00000000000..16855b3f45d
--- /dev/null
+++ b/final/tools/edis/EDMain.cpp
@@ -0,0 +1,284 @@
+//===-- EDMain.cpp - LLVM Enhanced Disassembly C API ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the enhanced disassembler's public C API.
+//
+//===----------------------------------------------------------------------===//
+
+// FIXME: This code isn't layered right, the headers should be moved to
+// include llvm/MC/MCDisassembler or something.
+#include "../../lib/MC/MCDisassembler/EDDisassembler.h"
+#include "../../lib/MC/MCDisassembler/EDInst.h"
+#include "../../lib/MC/MCDisassembler/EDOperand.h"
+#include "../../lib/MC/MCDisassembler/EDToken.h"
+#include "llvm-c/EnhancedDisassembly.h"
+using namespace llvm;
+
+int EDGetDisassembler(EDDisassemblerRef *disassembler,
+                      const char *triple,
+                      EDAssemblySyntax_t syntax) {
+  EDDisassembler::initialize();
+  
+  EDDisassembler::AssemblySyntax Syntax;
+  switch (syntax) {
+  default: assert(0 && "Unknown assembly syntax!");
+  case kEDAssemblySyntaxX86Intel:
+    Syntax = EDDisassembler::kEDAssemblySyntaxX86Intel;
+    break;
+  case kEDAssemblySyntaxX86ATT:
+    Syntax = EDDisassembler::kEDAssemblySyntaxX86ATT;
+    break;
+  case kEDAssemblySyntaxARMUAL:
+    Syntax = EDDisassembler::kEDAssemblySyntaxARMUAL;
+    break;
+  }
+  
+  EDDisassemblerRef ret = EDDisassembler::getDisassembler(triple, Syntax);
+  
+  if (!ret)
+    return -1;
+  *disassembler = ret;
+  return 0;
+}
+
+int EDGetRegisterName(const char** regName,
+                      EDDisassemblerRef disassembler,
+                      unsigned regID) {
+  const char *name = ((EDDisassembler*)disassembler)->nameWithRegisterID(regID);
+  if (!name)
+    return -1;
+  *regName = name;
+  return 0;
+}
+
+int EDRegisterIsStackPointer(EDDisassemblerRef disassembler,
+                             unsigned regID) {
+  return ((EDDisassembler*)disassembler)->registerIsStackPointer(regID) ? 1 : 0;
+}
+
+int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler,
+                               unsigned regID) {
+  return ((EDDisassembler*)disassembler)->registerIsProgramCounter(regID) ? 1:0;
+}
+
+unsigned int EDCreateInsts(EDInstRef *insts,
+                           unsigned int count,
+                           EDDisassemblerRef disassembler,
+                           ::EDByteReaderCallback byteReader,
+                           uint64_t address,
+                           void *arg) {
+  unsigned int index;
+  
+  for (index = 0; index < count; ++index) {
+    EDInst *inst = ((EDDisassembler*)disassembler)->createInst(byteReader,
+                                                               address, arg);
+    
+    if (!inst)
+      return index;
+    
+    insts[index] = inst;
+    address += inst->byteSize();
+  }
+  
+  return count;
+}
+
+void EDReleaseInst(EDInstRef inst) {
+  delete ((EDInst*)inst);
+}
+
+int EDInstByteSize(EDInstRef inst) {
+  return ((EDInst*)inst)->byteSize();
+}
+
+int EDGetInstString(const char **buf,
+                    EDInstRef inst) {
+  return ((EDInst*)inst)->getString(*buf);
+}
+
+int EDInstID(unsigned *instID, EDInstRef inst) {
+  *instID = ((EDInst*)inst)->instID();
+  return 0;
+}
+
+int EDInstIsBranch(EDInstRef inst) {
+  return ((EDInst*)inst)->isBranch();
+}
+
+int EDInstIsMove(EDInstRef inst) {
+  return ((EDInst*)inst)->isMove();
+}
+
+int EDBranchTargetID(EDInstRef inst) {
+  return ((EDInst*)inst)->branchTargetID();
+}
+
+int EDMoveSourceID(EDInstRef inst) {
+  return ((EDInst*)inst)->moveSourceID();
+}
+
+int EDMoveTargetID(EDInstRef inst) {
+  return ((EDInst*)inst)->moveTargetID();
+}
+
+int EDNumTokens(EDInstRef inst) {
+  return ((EDInst*)inst)->numTokens();
+}
+
+int EDGetToken(EDTokenRef *token,
+               EDInstRef inst,
+               int index) {
+  return ((EDInst*)inst)->getToken(*(EDToken**)token, index);
+}
+
+int EDGetTokenString(const char **buf,
+                     EDTokenRef token) {
+  return ((EDToken*)token)->getString(*buf);
+}
+
+int EDOperandIndexForToken(EDTokenRef token) {
+  return ((EDToken*)token)->operandID();
+}
+
+int EDTokenIsWhitespace(EDTokenRef token) {
+  return ((EDToken*)token)->type() == EDToken::kTokenWhitespace;
+}
+
+int EDTokenIsPunctuation(EDTokenRef token) {
+  return ((EDToken*)token)->type() == EDToken::kTokenPunctuation;
+}
+
+int EDTokenIsOpcode(EDTokenRef token) {
+  return ((EDToken*)token)->type() == EDToken::kTokenOpcode;
+}
+
+int EDTokenIsLiteral(EDTokenRef token) {
+  return ((EDToken*)token)->type() == EDToken::kTokenLiteral;
+}
+
+int EDTokenIsRegister(EDTokenRef token) {
+  return ((EDToken*)token)->type() == EDToken::kTokenRegister;
+}
+
+int EDTokenIsNegativeLiteral(EDTokenRef token) {
+  if (((EDToken*)token)->type() != EDToken::kTokenLiteral)
+    return -1;
+  
+  return ((EDToken*)token)->literalSign();
+}
+
+int EDLiteralTokenAbsoluteValue(uint64_t *value, EDTokenRef token) {
+  if (((EDToken*)token)->type() != EDToken::kTokenLiteral)
+    return -1;
+  
+  return ((EDToken*)token)->literalAbsoluteValue(*value);
+}
+
+int EDRegisterTokenValue(unsigned *registerID,
+                         EDTokenRef token) {
+  if (((EDToken*)token)->type() != EDToken::kTokenRegister)
+    return -1;
+  
+  return ((EDToken*)token)->registerID(*registerID);
+}
+
+int EDNumOperands(EDInstRef inst) {
+  return ((EDInst*)inst)->numOperands();
+}
+
+int EDGetOperand(EDOperandRef *operand,
+                 EDInstRef inst,
+                 int index) {
+  return ((EDInst*)inst)->getOperand(*(EDOperand**)operand, index);
+}
+
+int EDOperandIsRegister(EDOperandRef operand) {
+  return ((EDOperand*)operand)->isRegister();
+}
+
+int EDOperandIsImmediate(EDOperandRef operand) {
+  return ((EDOperand*)operand)->isImmediate();
+}
+
+int EDOperandIsMemory(EDOperandRef operand) {
+  return ((EDOperand*)operand)->isMemory();
+}
+
+int EDRegisterOperandValue(unsigned *value, EDOperandRef operand) {
+  if (!((EDOperand*)operand)->isRegister())
+    return -1;
+  *value = ((EDOperand*)operand)->regVal();
+  return 0;
+}
+
+int EDImmediateOperandValue(uint64_t *value, EDOperandRef operand) {
+  if (!((EDOperand*)operand)->isImmediate())
+    return -1;
+  *value = ((EDOperand*)operand)->immediateVal();
+  return 0;
+}
+
+int EDEvaluateOperand(uint64_t *result, EDOperandRef operand,
+                      ::EDRegisterReaderCallback regReader, void *arg) {
+  return ((EDOperand*)operand)->evaluate(*result, regReader, arg);
+}
+
+#ifdef __BLOCKS__
+
+struct ByteReaderWrapper {
+  EDByteBlock_t byteBlock;
+};
+
+static int readerWrapperCallback(uint8_t *byte, 
+                          uint64_t address,
+                          void *arg) {
+  struct ByteReaderWrapper *wrapper = (struct ByteReaderWrapper *)arg;
+  return wrapper->byteBlock(byte, address);
+}
+
+unsigned int EDBlockCreateInsts(EDInstRef *insts,
+                                int count,
+                                EDDisassemblerRef disassembler,
+                                EDByteBlock_t byteBlock,
+                                uint64_t address) {
+  struct ByteReaderWrapper wrapper;
+  wrapper.byteBlock = byteBlock;
+  
+  return EDCreateInsts(insts,
+                       count,
+                       disassembler, 
+                       readerWrapperCallback, 
+                       address, 
+                       (void*)&wrapper);
+}
+
+int EDBlockEvaluateOperand(uint64_t *result, EDOperandRef operand,
+                           EDRegisterBlock_t regBlock) {
+  return ((EDOperand*)operand)->evaluate(*result, regBlock);
+}
+
+int EDBlockVisitTokens(EDInstRef inst, ::EDTokenVisitor_t visitor) {
+  return ((EDInst*)inst)->visitTokens((llvm::EDTokenVisitor_t)visitor);
+}
+
+#else
+
+extern "C" unsigned int EDBlockCreateInsts() {
+  return 0;
+}
+
+extern "C" int EDBlockEvaluateOperand() {
+  return -1;
+}
+
+extern "C" int EDBlockVisitTokens() {
+  return -1;
+}
+
+#endif
diff --git a/final/tools/edis/EnhancedDisassembly.exports b/final/tools/edis/EnhancedDisassembly.exports
new file mode 100644
index 00000000000..7050f7f3294
--- /dev/null
+++ b/final/tools/edis/EnhancedDisassembly.exports
@@ -0,0 +1,36 @@
+EDGetDisassembler
+EDGetRegisterName
+EDRegisterIsStackPointer
+EDRegisterIsProgramCounter
+EDCreateInsts
+EDReleaseInst
+EDInstByteSize
+EDGetInstString
+EDInstIsBranch
+EDInstIsMove
+EDBranchTargetID
+EDMoveSourceID
+EDMoveTargetID
+EDNumTokens
+EDGetToken
+EDGetTokenString
+EDOperandIndexForToken
+EDTokenIsWhitespace
+EDTokenIsPunctuation
+EDTokenIsOpcode
+EDTokenIsLiteral
+EDTokenIsRegister
+EDTokenIsNegativeLiteral
+EDLiteralTokenAbsoluteValue
+EDRegisterTokenValue
+EDNumOperands
+EDGetOperand
+EDOperandIsRegister
+EDOperandIsImmediate
+EDOperandIsMemory
+EDRegisterOperandValue
+EDImmediateOperandValue
+EDEvaluateOperand
+EDBlockCreateInsts
+EDBlockEvaluateOperand
+EDBlockVisitTokens
diff --git a/final/tools/edis/Makefile b/final/tools/edis/Makefile
new file mode 100644
index 00000000000..b5557fcb915
--- /dev/null
+++ b/final/tools/edis/Makefile
@@ -0,0 +1,54 @@
+##===- tools/edis/Makefile -----------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = EnhancedDisassembly
+LINK_LIBS_IN_SHARED = 1
+SHARED_LIBRARY = 1
+
+EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/EnhancedDisassembly.exports
+
+# Include this here so we can get the configuration of the targets
+# that have been configured for construction. We have to do this 
+# early so we can set up LINK_COMPONENTS before including Makefile.rules
+include $(LEVEL)/Makefile.config
+
+LINK_COMPONENTS := mcdisassembler
+
+# If the X86 target is enabled, link in the asmprinter and disassembler.
+ifneq ($(filter $(TARGETS_TO_BUILD), X86),)
+LINK_COMPONENTS += x86asmprinter x86disassembler
+endif
+
+# If the ARM target is enabled, link in the asmprinter and disassembler.
+ifneq ($(filter $(TARGETS_TO_BUILD), ARM),)
+LINK_COMPONENTS += armasmprinter armdisassembler
+endif
+
+include $(LEVEL)/Makefile.common
+
+ifeq ($(HOST_OS),Darwin)
+    # extra options to override libtool defaults 
+    LLVMLibsOptions    := $(LLVMLibsOptions)  \
+                         -Wl,-dead_strip
+
+    ifdef EDIS_VERSION
+        LLVMLibsOptions    := $(LLVMLibsOptions) -Wl,-current_version -Wl,$(EDIS_VERSION) \
+                              -Wl,-compatibility_version -Wl,1
+    endif
+
+    # Mac OS X 10.4 and earlier tools do not allow a second -install_name on command line
+    DARWIN_VERS := $(shell echo $(TARGET_TRIPLE) | sed 's/.*darwin\([0-9]*\).*/\1/')
+    ifneq ($(DARWIN_VERS),8)
+       LLVMLibsOptions    := $(LLVMLibsOptions)  \
+                            -Wl,-install_name \
+                            -Wl,"@rpath/lib$(LIBRARYNAME)$(SHLIBEXT)"
+    endif
+endif
+
diff --git a/final/tools/gold/Makefile b/final/tools/gold/Makefile
new file mode 100644
index 00000000000..66a0271fa25
--- /dev/null
+++ b/final/tools/gold/Makefile
@@ -0,0 +1,31 @@
+#===- tools/gold/Makefile ----------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMgold
+
+EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/gold.exports
+
+# Include this here so we can get the configuration of the targets
+# that have been configured for construction. We have to do this 
+# early so we can set up LINK_COMPONENTS before including Makefile.rules
+include $(LEVEL)/Makefile.config
+
+LINK_LIBS_IN_SHARED=1
+SHARED_LIBRARY = 1
+LOADABLE_MODULE = 1
+
+LINK_COMPONENTS := support
+LIBS += -llto
+
+# Because off_t is used in the public API, the largefile parts are required for
+# ABI compatibility.
+CXXFLAGS+=-I$(BINUTILS_INCDIR) -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -lLTO
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/gold/README.txt b/final/tools/gold/README.txt
new file mode 100644
index 00000000000..a906a9083ec
--- /dev/null
+++ b/final/tools/gold/README.txt
@@ -0,0 +1,21 @@
+This directory contains a plugin that is designed to work with binutils
+gold linker. At present time, this is not the default linker in
+binutils, and the default build of gold does not support plugins.
+
+Obtaining binutils:
+
+  cvs -z 9 -d :pserver:anoncvs@sourceware.org:/cvs/src login
+  {enter "anoncvs" as the password}
+  cvs -z 9 -d :pserver:anoncvs@sourceware.org:/cvs/src co binutils
+
+This will create a src/ directory. Make a build/ directory and from
+there configure binutils with "../src/configure --enable-gold --enable-plugins".
+Then build binutils with "make all-gold".
+
+To build the LLVMgold plugin, configure LLVM with the option
+--with-binutils-include=/path/to/binutils/src/include/ --enable-pic. To use the
+plugin, run "ld-new --plugin /path/to/LLVMgold.so".
+Without PIC libLTO and LLVMgold are not being built (because they would fail
+link on x86-64 with a relocation error: PIC and non-PIC can't be combined).
+As an alternative to passing --enable-pic, you can use 'make ENABLE_PIC=1' in
+your entire LLVM build.
diff --git a/final/tools/gold/gold-plugin.cpp b/final/tools/gold/gold-plugin.cpp
new file mode 100644
index 00000000000..e959d9566b2
--- /dev/null
+++ b/final/tools/gold/gold-plugin.cpp
@@ -0,0 +1,503 @@
+//===-- gold-plugin.cpp - Plugin to gold for Link Time Optimization  ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a gold plugin for LLVM. It provides an LLVM implementation of the
+// interface described in http://gcc.gnu.org/wiki/whopr/driver .
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "plugin-api.h"
+
+#include "llvm-c/lto.h"
+
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Errno.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+
+#include <cerrno>
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <list>
+#include <vector>
+
+// Support Windows/MinGW crazyness.
+#ifdef _WIN32
+# include <io.h>
+# define lseek _lseek
+# define read _read
+#endif
+
+using namespace llvm;
+
+namespace {
+  ld_plugin_status discard_message(int level, const char *format, ...) {
+    // Die loudly. Recent versions of Gold pass ld_plugin_message as the first
+    // callback in the transfer vector. This should never be called.
+    abort();
+  }
+
+  ld_plugin_add_symbols add_symbols = NULL;
+  ld_plugin_get_symbols get_symbols = NULL;
+  ld_plugin_add_input_file add_input_file = NULL;
+  ld_plugin_add_input_library add_input_library = NULL;
+  ld_plugin_set_extra_library_path set_extra_library_path = NULL;
+  ld_plugin_message message = discard_message;
+
+  int api_version = 0;
+  int gold_version = 0;
+
+  struct claimed_file {
+    void *handle;
+    std::vector<ld_plugin_symbol> syms;
+  };
+
+  lto_codegen_model output_type = LTO_CODEGEN_PIC_MODEL_STATIC;
+  std::string output_name = "";
+  std::list<claimed_file> Modules;
+  std::vector<sys::Path> Cleanup;
+  lto_code_gen_t code_gen = NULL;
+}
+
+namespace options {
+  enum generate_bc { BC_NO, BC_ALSO, BC_ONLY };
+  static bool generate_api_file = false;
+  static generate_bc generate_bc_file = BC_NO;
+  static std::string bc_path;
+  static std::string obj_path;
+  static std::string extra_library_path;
+  static std::string triple;
+  static std::string mcpu;
+  // Additional options to pass into the code generator.
+  // Note: This array will contain all plugin options which are not claimed
+  // as plugin exclusive to pass to the code generator.
+  // For example, "generate-api-file" and "as"options are for the plugin
+  // use only and will not be passed.
+  static std::vector<std::string> extra;
+
+  static void process_plugin_option(const char* opt_)
+  {
+    if (opt_ == NULL)
+      return;
+    llvm::StringRef opt = opt_;
+
+    if (opt == "generate-api-file") {
+      generate_api_file = true;
+    } else if (opt.startswith("mcpu=")) {
+      mcpu = opt.substr(strlen("mcpu="));
+    } else if (opt.startswith("extra-library-path=")) {
+      extra_library_path = opt.substr(strlen("extra_library_path="));
+    } else if (opt.startswith("mtriple=")) {
+      triple = opt.substr(strlen("mtriple="));
+    } else if (opt.startswith("obj-path=")) {
+      obj_path = opt.substr(strlen("obj-path="));
+    } else if (opt == "emit-llvm") {
+      generate_bc_file = BC_ONLY;
+    } else if (opt == "also-emit-llvm") {
+      generate_bc_file = BC_ALSO;
+    } else if (opt.startswith("also-emit-llvm=")) {
+      llvm::StringRef path = opt.substr(strlen("also-emit-llvm="));
+      generate_bc_file = BC_ALSO;
+      if (!bc_path.empty()) {
+        (*message)(LDPL_WARNING, "Path to the output IL file specified twice. "
+                   "Discarding %s", opt_);
+      } else {
+        bc_path = path;
+      }
+    } else {
+      // Save this option to pass to the code generator.
+      extra.push_back(opt);
+    }
+  }
+}
+
+static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
+                                        int *claimed);
+static ld_plugin_status all_symbols_read_hook(void);
+static ld_plugin_status cleanup_hook(void);
+
+extern "C" ld_plugin_status onload(ld_plugin_tv *tv);
+ld_plugin_status onload(ld_plugin_tv *tv) {
+  // We're given a pointer to the first transfer vector. We read through them
+  // until we find one where tv_tag == LDPT_NULL. The REGISTER_* tagged values
+  // contain pointers to functions that we need to call to register our own
+  // hooks. The others are addresses of functions we can use to call into gold
+  // for services.
+
+  bool registeredClaimFile = false;
+
+  for (; tv->tv_tag != LDPT_NULL; ++tv) {
+    switch (tv->tv_tag) {
+      case LDPT_API_VERSION:
+        api_version = tv->tv_u.tv_val;
+        break;
+      case LDPT_GOLD_VERSION:  // major * 100 + minor
+        gold_version = tv->tv_u.tv_val;
+        break;
+      case LDPT_OUTPUT_NAME:
+        output_name = tv->tv_u.tv_string;
+        break;
+      case LDPT_LINKER_OUTPUT:
+        switch (tv->tv_u.tv_val) {
+          case LDPO_REL:  // .o
+          case LDPO_DYN:  // .so
+            output_type = LTO_CODEGEN_PIC_MODEL_DYNAMIC;
+            break;
+          case LDPO_EXEC:  // .exe
+            output_type = LTO_CODEGEN_PIC_MODEL_STATIC;
+            break;
+          default:
+            (*message)(LDPL_ERROR, "Unknown output file type %d",
+                       tv->tv_u.tv_val);
+            return LDPS_ERR;
+        }
+        // TODO: add an option to disable PIC.
+        //output_type = LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC;
+        break;
+      case LDPT_OPTION:
+        options::process_plugin_option(tv->tv_u.tv_string);
+        break;
+      case LDPT_REGISTER_CLAIM_FILE_HOOK: {
+        ld_plugin_register_claim_file callback;
+        callback = tv->tv_u.tv_register_claim_file;
+
+        if ((*callback)(claim_file_hook) != LDPS_OK)
+          return LDPS_ERR;
+
+        registeredClaimFile = true;
+      } break;
+      case LDPT_REGISTER_ALL_SYMBOLS_READ_HOOK: {
+        ld_plugin_register_all_symbols_read callback;
+        callback = tv->tv_u.tv_register_all_symbols_read;
+
+        if ((*callback)(all_symbols_read_hook) != LDPS_OK)
+          return LDPS_ERR;
+
+        code_gen = lto_codegen_create();
+      } break;
+      case LDPT_REGISTER_CLEANUP_HOOK: {
+        ld_plugin_register_cleanup callback;
+        callback = tv->tv_u.tv_register_cleanup;
+
+        if ((*callback)(cleanup_hook) != LDPS_OK)
+          return LDPS_ERR;
+      } break;
+      case LDPT_ADD_SYMBOLS:
+        add_symbols = tv->tv_u.tv_add_symbols;
+        break;
+      case LDPT_GET_SYMBOLS:
+        get_symbols = tv->tv_u.tv_get_symbols;
+        break;
+      case LDPT_ADD_INPUT_FILE:
+        add_input_file = tv->tv_u.tv_add_input_file;
+        break;
+      case LDPT_ADD_INPUT_LIBRARY:
+        add_input_library = tv->tv_u.tv_add_input_file;
+        break;
+      case LDPT_SET_EXTRA_LIBRARY_PATH:
+        set_extra_library_path = tv->tv_u.tv_set_extra_library_path;
+        break;
+      case LDPT_MESSAGE:
+        message = tv->tv_u.tv_message;
+        break;
+      default:
+        break;
+    }
+  }
+
+  if (!registeredClaimFile) {
+    (*message)(LDPL_ERROR, "register_claim_file not passed to LLVMgold.");
+    return LDPS_ERR;
+  }
+  if (!add_symbols) {
+    (*message)(LDPL_ERROR, "add_symbols not passed to LLVMgold.");
+    return LDPS_ERR;
+  }
+
+  return LDPS_OK;
+}
+
+/// claim_file_hook - called by gold to see whether this file is one that
+/// our plugin can handle. We'll try to open it and register all the symbols
+/// with add_symbol if possible.
+static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file,
+                                        int *claimed) {
+  lto_module_t M;
+
+  if (file->offset) {
+    // Gold has found what might be IR part-way inside of a file, such as
+    // an .a archive.
+    if (lseek(file->fd, file->offset, SEEK_SET) == -1) {
+      (*message)(LDPL_ERROR,
+                 "Failed to seek to archive member of %s at offset %d: %s\n",
+                 file->name,
+                 file->offset, sys::StrError(errno).c_str());
+      return LDPS_ERR;
+    }
+    void *buf = malloc(file->filesize);
+    if (!buf) {
+      (*message)(LDPL_ERROR,
+                 "Failed to allocate buffer for archive member of size: %d\n",
+                 file->filesize);
+      return LDPS_ERR;
+    }
+    if (read(file->fd, buf, file->filesize) != file->filesize) {
+      (*message)(LDPL_ERROR,
+                 "Failed to read archive member of %s at offset %d: %s\n",
+                 file->name,
+                 file->offset,
+                 sys::StrError(errno).c_str());
+      free(buf);
+      return LDPS_ERR;
+    }
+    if (!lto_module_is_object_file_in_memory(buf, file->filesize)) {
+      free(buf);
+      return LDPS_OK;
+    }
+    M = lto_module_create_from_memory(buf, file->filesize);
+    if (!M) {
+      (*message)(LDPL_ERROR, "Failed to create LLVM module: %s",
+                 lto_get_error_message());
+      return LDPS_ERR;
+    }
+    free(buf);
+  } else {
+    lseek(file->fd, 0, SEEK_SET);
+    M = lto_module_create_from_fd(file->fd, file->name, file->filesize);
+    if (!M)
+      return LDPS_OK;
+  }
+
+  *claimed = 1;
+  Modules.resize(Modules.size() + 1);
+  claimed_file &cf = Modules.back();
+
+  if (!options::triple.empty())
+    lto_module_set_target_triple(M, options::triple.c_str());
+
+  cf.handle = file->handle;
+  unsigned sym_count = lto_module_get_num_symbols(M);
+  cf.syms.reserve(sym_count);
+
+  for (unsigned i = 0; i != sym_count; ++i) {
+    lto_symbol_attributes attrs = lto_module_get_symbol_attribute(M, i);
+    if ((attrs & LTO_SYMBOL_SCOPE_MASK) == LTO_SYMBOL_SCOPE_INTERNAL)
+      continue;
+
+    cf.syms.push_back(ld_plugin_symbol());
+    ld_plugin_symbol &sym = cf.syms.back();
+    sym.name = const_cast<char *>(lto_module_get_symbol_name(M, i));
+    sym.name = strdup(sym.name);
+    sym.version = NULL;
+
+    int scope = attrs & LTO_SYMBOL_SCOPE_MASK;
+    switch (scope) {
+      case LTO_SYMBOL_SCOPE_HIDDEN:
+        sym.visibility = LDPV_HIDDEN;
+        break;
+      case LTO_SYMBOL_SCOPE_PROTECTED:
+        sym.visibility = LDPV_PROTECTED;
+        break;
+      case 0: // extern
+      case LTO_SYMBOL_SCOPE_DEFAULT:
+        sym.visibility = LDPV_DEFAULT;
+        break;
+      default:
+        (*message)(LDPL_ERROR, "Unknown scope attribute: %d", scope);
+        return LDPS_ERR;
+    }
+
+    int definition = attrs & LTO_SYMBOL_DEFINITION_MASK;
+    sym.comdat_key = NULL;
+    switch (definition) {
+      case LTO_SYMBOL_DEFINITION_REGULAR:
+        sym.def = LDPK_DEF;
+        break;
+      case LTO_SYMBOL_DEFINITION_UNDEFINED:
+        sym.def = LDPK_UNDEF;
+        break;
+      case LTO_SYMBOL_DEFINITION_TENTATIVE:
+        sym.def = LDPK_COMMON;
+        break;
+      case LTO_SYMBOL_DEFINITION_WEAK:
+        sym.comdat_key = sym.name;
+        sym.def = LDPK_WEAKDEF;
+        break;
+      case LTO_SYMBOL_DEFINITION_WEAKUNDEF:
+        sym.def = LDPK_WEAKUNDEF;
+        break;
+      default:
+        (*message)(LDPL_ERROR, "Unknown definition attribute: %d", definition);
+        return LDPS_ERR;
+    }
+
+    sym.size = 0;
+
+    sym.resolution = LDPR_UNKNOWN;
+  }
+
+  cf.syms.reserve(cf.syms.size());
+
+  if (!cf.syms.empty()) {
+    if ((*add_symbols)(cf.handle, cf.syms.size(), &cf.syms[0]) != LDPS_OK) {
+      (*message)(LDPL_ERROR, "Unable to add symbols!");
+      return LDPS_ERR;
+    }
+  }
+
+  if (code_gen)
+    lto_codegen_add_module(code_gen, M);
+
+  lto_module_dispose(M);
+
+  return LDPS_OK;
+}
+
+/// all_symbols_read_hook - gold informs us that all symbols have been read.
+/// At this point, we use get_symbols to see if any of our definitions have
+/// been overridden by a native object file. Then, perform optimization and
+/// codegen.
+static ld_plugin_status all_symbols_read_hook(void) {
+  std::ofstream api_file;
+  assert(code_gen);
+
+  if (options::generate_api_file) {
+    api_file.open("apifile.txt", std::ofstream::out | std::ofstream::trunc);
+    if (!api_file.is_open()) {
+      (*message)(LDPL_FATAL, "Unable to open apifile.txt for writing.");
+      abort();
+    }
+  }
+
+  // If we don't preserve any symbols, libLTO will assume that all symbols are
+  // needed. Keep all symbols unless we're producing a final executable.
+  bool anySymbolsPreserved = false;
+  for (std::list<claimed_file>::iterator I = Modules.begin(),
+         E = Modules.end(); I != E; ++I) {
+    (*get_symbols)(I->handle, I->syms.size(), &I->syms[0]);
+    for (unsigned i = 0, e = I->syms.size(); i != e; i++) {
+      if (I->syms[i].resolution == LDPR_PREVAILING_DEF) {
+        lto_codegen_add_must_preserve_symbol(code_gen, I->syms[i].name);
+        anySymbolsPreserved = true;
+
+        if (options::generate_api_file)
+          api_file << I->syms[i].name << "\n";
+      }
+    }
+  }
+
+  if (options::generate_api_file)
+    api_file.close();
+
+  if (!anySymbolsPreserved) {
+    // All of the IL is unnecessary!
+    lto_codegen_dispose(code_gen);
+    return LDPS_OK;
+  }
+
+  lto_codegen_set_pic_model(code_gen, output_type);
+  lto_codegen_set_debug_model(code_gen, LTO_DEBUG_MODEL_DWARF);
+  if (!options::mcpu.empty())
+    lto_codegen_set_cpu(code_gen, options::mcpu.c_str());
+
+  // Pass through extra options to the code generator.
+  if (!options::extra.empty()) {
+    for (std::vector<std::string>::iterator it = options::extra.begin();
+         it != options::extra.end(); ++it) {
+      lto_codegen_debug_options(code_gen, (*it).c_str());
+    }
+  }
+
+  if (options::generate_bc_file != options::BC_NO) {
+    std::string path;
+    if (options::generate_bc_file == options::BC_ONLY)
+      path = output_name;
+    else if (!options::bc_path.empty())
+      path = options::bc_path;
+    else
+      path = output_name + ".bc";
+    bool err = lto_codegen_write_merged_modules(code_gen, path.c_str());
+    if (err)
+      (*message)(LDPL_FATAL, "Failed to write the output file.");
+    if (options::generate_bc_file == options::BC_ONLY)
+      exit(0);
+  }
+  size_t bufsize = 0;
+  const char *buffer = static_cast<const char *>(lto_codegen_compile(code_gen,
+                                                                     &bufsize));
+
+  std::string ErrMsg;
+
+  const char *objPath;
+  sys::Path uniqueObjPath("/tmp/llvmgold.o");
+  if (!options::obj_path.empty()) {
+    objPath = options::obj_path.c_str();
+  } else {
+    if (uniqueObjPath.createTemporaryFileOnDisk(true, &ErrMsg)) {
+      (*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
+      return LDPS_ERR;
+    }
+    objPath = uniqueObjPath.c_str();
+  }
+  tool_output_file objFile(objPath, ErrMsg,
+                             raw_fd_ostream::F_Binary);
+    if (!ErrMsg.empty()) {
+      (*message)(LDPL_ERROR, "%s", ErrMsg.c_str());
+      return LDPS_ERR;
+    }
+
+  objFile.os().write(buffer, bufsize);
+  objFile.os().close();
+  if (objFile.os().has_error()) {
+    (*message)(LDPL_ERROR, "Error writing output file '%s'",
+               objPath);
+    objFile.os().clear_error();
+    return LDPS_ERR;
+  }
+  objFile.keep();
+
+  lto_codegen_dispose(code_gen);
+  for (std::list<claimed_file>::iterator I = Modules.begin(),
+         E = Modules.end(); I != E; ++I) {
+    for (unsigned i = 0; i != I->syms.size(); ++i) {
+      ld_plugin_symbol &sym = I->syms[i];
+      free(sym.name);
+    }
+  }
+
+  if ((*add_input_file)(objPath) != LDPS_OK) {
+    (*message)(LDPL_ERROR, "Unable to add .o file to the link.");
+    (*message)(LDPL_ERROR, "File left behind in: %s", objPath);
+    return LDPS_ERR;
+  }
+
+  if (!options::extra_library_path.empty() &&
+      set_extra_library_path(options::extra_library_path.c_str()) != LDPS_OK) {
+    (*message)(LDPL_ERROR, "Unable to set the extra library path.");
+    return LDPS_ERR;
+  }
+
+  if (options::obj_path.empty())
+    Cleanup.push_back(sys::Path(objPath));
+
+  return LDPS_OK;
+}
+
+static ld_plugin_status cleanup_hook(void) {
+  std::string ErrMsg;
+
+  for (int i = 0, e = Cleanup.size(); i != e; ++i)
+    if (Cleanup[i].eraseFromDisk(false, &ErrMsg))
+      (*message)(LDPL_ERROR, "Failed to delete '%s': %s", Cleanup[i].c_str(),
+                 ErrMsg.c_str());
+
+  return LDPS_OK;
+}
diff --git a/final/tools/gold/gold.exports b/final/tools/gold/gold.exports
new file mode 100644
index 00000000000..277a33a1ec3
--- /dev/null
+++ b/final/tools/gold/gold.exports
@@ -0,0 +1 @@
+onload
diff --git a/final/tools/llc/CMakeLists.txt b/final/tools/llc/CMakeLists.txt
new file mode 100644
index 00000000000..683f29862d5
--- /dev/null
+++ b/final/tools/llc/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader asmparser)
+
+add_llvm_tool(llc
+  llc.cpp
+  )
diff --git a/final/tools/llc/Makefile b/final/tools/llc/Makefile
new file mode 100644
index 00000000000..7319aada489
--- /dev/null
+++ b/final/tools/llc/Makefile
@@ -0,0 +1,21 @@
+#===- tools/llc/Makefile -----------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = llc
+
+# Include this here so we can get the configuration of the targets
+# that have been configured for construction. We have to do this 
+# early so we can set up LINK_COMPONENTS before including Makefile.rules
+include $(LEVEL)/Makefile.config
+
+LINK_COMPONENTS := $(TARGETS_TO_BUILD) bitreader asmparser
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
+
diff --git a/final/tools/llc/llc.cpp b/final/tools/llc/llc.cpp
new file mode 100644
index 00000000000..bb426a9de8c
--- /dev/null
+++ b/final/tools/llc/llc.cpp
@@ -0,0 +1,348 @@
+//===-- llc.cpp - Implement the LLVM Native Code Generator ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the llc code generator driver. It provides a convenient
+// command-line interface for generating native assembly-language code
+// or C code, given LLVM bitcode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/IRReader.h"
+#include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
+#include "llvm/CodeGen/LinkAllCodegenComponents.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PluginLoader.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Target/SubtargetFeature.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetSelect.h"
+#include <memory>
+using namespace llvm;
+
+// General options for llc.  Other pass-specific options are specified
+// within the corresponding llc passes, and target-specific options
+// and back-end code generation options are specified with the target machine.
+//
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"));
+
+// Determine optimization level.
+static cl::opt<char>
+OptLevel("O",
+         cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
+                  "(default = '-O2')"),
+         cl::Prefix,
+         cl::ZeroOrMore,
+         cl::init(' '));
+
+static cl::opt<std::string>
+TargetTriple("mtriple", cl::desc("Override target triple for module"));
+
+static cl::opt<std::string>
+MArch("march", cl::desc("Architecture to generate code for (see --version)"));
+
+static cl::opt<std::string>
+MCPU("mcpu",
+  cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+  cl::value_desc("cpu-name"),
+  cl::init(""));
+
+static cl::list<std::string>
+MAttrs("mattr",
+  cl::CommaSeparated,
+  cl::desc("Target specific attributes (-mattr=help for details)"),
+  cl::value_desc("a1,+a2,-a3,..."));
+
+static cl::opt<bool>
+RelaxAll("mc-relax-all",
+  cl::desc("When used with filetype=obj, "
+           "relax all fixups in the emitted object file"));
+
+cl::opt<TargetMachine::CodeGenFileType>
+FileType("filetype", cl::init(TargetMachine::CGFT_AssemblyFile),
+  cl::desc("Choose a file type (not all types are supported by all targets):"),
+  cl::values(
+       clEnumValN(TargetMachine::CGFT_AssemblyFile, "asm",
+                  "Emit an assembly ('.s') file"),
+       clEnumValN(TargetMachine::CGFT_ObjectFile, "obj",
+                  "Emit a native object ('.o') file [experimental]"),
+       clEnumValN(TargetMachine::CGFT_Null, "null",
+                  "Emit nothing, for performance testing"),
+       clEnumValEnd));
+
+cl::opt<bool> NoVerify("disable-verify", cl::Hidden,
+                       cl::desc("Do not verify input module"));
+
+cl::opt<bool> DisableDotLoc("disable-dot-loc", cl::Hidden,
+                            cl::desc("Do not use .loc entries"));
+
+static cl::opt<bool>
+DisableRedZone("disable-red-zone",
+  cl::desc("Do not emit code that uses the red zone."),
+  cl::init(false));
+
+static cl::opt<bool>
+NoImplicitFloats("no-implicit-float",
+  cl::desc("Don't generate implicit floating point instructions (x86-only)"),
+  cl::init(false));
+
+// GetFileNameRoot - Helper function to get the basename of a filename.
+static inline std::string
+GetFileNameRoot(const std::string &InputFilename) {
+  std::string IFN = InputFilename;
+  std::string outputFilename;
+  int Len = IFN.length();
+  if ((Len > 2) &&
+      IFN[Len-3] == '.' &&
+      ((IFN[Len-2] == 'b' && IFN[Len-1] == 'c') ||
+       (IFN[Len-2] == 'l' && IFN[Len-1] == 'l'))) {
+    outputFilename = std::string(IFN.begin(), IFN.end()-3); // s/.bc/.s/
+  } else {
+    outputFilename = IFN;
+  }
+  return outputFilename;
+}
+
+static tool_output_file *GetOutputStream(const char *TargetName,
+                                         Triple::OSType OS,
+                                         const char *ProgName) {
+  // If we don't yet have an output filename, make one.
+  if (OutputFilename.empty()) {
+    if (InputFilename == "-")
+      OutputFilename = "-";
+    else {
+      OutputFilename = GetFileNameRoot(InputFilename);
+
+      switch (FileType) {
+      default: assert(0 && "Unknown file type");
+      case TargetMachine::CGFT_AssemblyFile:
+        if (TargetName[0] == 'c') {
+          if (TargetName[1] == 0)
+            OutputFilename += ".cbe.c";
+          else if (TargetName[1] == 'p' && TargetName[2] == 'p')
+            OutputFilename += ".cpp";
+          else
+            OutputFilename += ".s";
+        } else
+          OutputFilename += ".s";
+        break;
+      case TargetMachine::CGFT_ObjectFile:
+        if (OS == Triple::Win32)
+          OutputFilename += ".obj";
+        else
+          OutputFilename += ".o";
+        break;
+      case TargetMachine::CGFT_Null:
+        OutputFilename += ".null";
+        break;
+      }
+    }
+  }
+
+  // Decide if we need "binary" output.
+  bool Binary = false;
+  switch (FileType) {
+  default: assert(0 && "Unknown file type");
+  case TargetMachine::CGFT_AssemblyFile:
+    break;
+  case TargetMachine::CGFT_ObjectFile:
+  case TargetMachine::CGFT_Null:
+    Binary = true;
+    break;
+  }
+
+  // Open the file.
+  std::string error;
+  unsigned OpenFlags = 0;
+  if (Binary) OpenFlags |= raw_fd_ostream::F_Binary;
+  tool_output_file *FDOut = new tool_output_file(OutputFilename.c_str(), error,
+                                                 OpenFlags);
+  if (!error.empty()) {
+    errs() << error << '\n';
+    delete FDOut;
+    return 0;
+  }
+
+  return FDOut;
+}
+
+// main - Entry point for the llc compiler.
+//
+int main(int argc, char **argv) {
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+
+  // Enable debug stream buffering.
+  EnableDebugBuffering = true;
+
+  LLVMContext &Context = getGlobalContext();
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  // Initialize targets first, so that --version shows registered targets.
+  InitializeAllTargets();
+  InitializeAllAsmPrinters();
+  InitializeAllAsmParsers();
+
+  cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n");
+  
+  // Load the module to be compiled...
+  SMDiagnostic Err;
+  std::auto_ptr<Module> M;
+
+  M.reset(ParseIRFile(InputFilename, Err, Context));
+  if (M.get() == 0) {
+    Err.Print(argv[0], errs());
+    return 1;
+  }
+  Module &mod = *M.get();
+
+  // If we are supposed to override the target triple, do so now.
+  if (!TargetTriple.empty())
+    mod.setTargetTriple(Triple::normalize(TargetTriple));
+
+  Triple TheTriple(mod.getTargetTriple());
+  if (TheTriple.getTriple().empty())
+    TheTriple.setTriple(sys::getHostTriple());
+
+  // Allocate target machine.  First, check whether the user has explicitly
+  // specified an architecture to compile for. If so we have to look it up by
+  // name, because it might be a backend that has no mapping to a target triple.
+  const Target *TheTarget = 0;
+  if (!MArch.empty()) {
+    for (TargetRegistry::iterator it = TargetRegistry::begin(),
+           ie = TargetRegistry::end(); it != ie; ++it) {
+      if (MArch == it->getName()) {
+        TheTarget = &*it;
+        break;
+      }
+    }
+
+    if (!TheTarget) {
+      errs() << argv[0] << ": error: invalid target '" << MArch << "'.\n";
+      return 1;
+    }
+
+    // Adjust the triple to match (if known), otherwise stick with the
+    // module/host triple.
+    Triple::ArchType Type = Triple::getArchTypeForLLVMName(MArch);
+    if (Type != Triple::UnknownArch)
+      TheTriple.setArch(Type);
+  } else {
+    std::string Err;
+    TheTarget = TargetRegistry::lookupTarget(TheTriple.getTriple(), Err);
+    if (TheTarget == 0) {
+      errs() << argv[0] << ": error auto-selecting target for module '"
+             << Err << "'.  Please use the -march option to explicitly "
+             << "pick a target.\n";
+      return 1;
+    }
+  }
+
+  // Package up features to be passed to target/subtarget
+  std::string FeaturesStr;
+  if (MCPU.size() || MAttrs.size()) {
+    SubtargetFeatures Features;
+    Features.setCPU(MCPU);
+    for (unsigned i = 0; i != MAttrs.size(); ++i)
+      Features.AddFeature(MAttrs[i]);
+    FeaturesStr = Features.getString();
+  }
+
+  std::auto_ptr<TargetMachine> 
+    target(TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr));
+  assert(target.get() && "Could not allocate target machine!");
+  TargetMachine &Target = *target.get();
+
+  if (DisableDotLoc)
+    Target.setMCUseLoc(false);
+  if (TheTriple.getOS() == Triple::Darwin) {
+    switch (TheTriple.getDarwinMajorNumber()) {
+    case 7:
+    case 8:
+    case 9:
+      // disable .loc support for older darwin OS.
+      Target.setMCUseLoc(false);
+      break;
+    default:
+      break;
+    }
+  }
+
+  // Figure out where we are going to send the output...
+  OwningPtr<tool_output_file> Out
+    (GetOutputStream(TheTarget->getName(), TheTriple.getOS(), argv[0]));
+  if (!Out) return 1;
+
+  CodeGenOpt::Level OLvl = CodeGenOpt::Default;
+  switch (OptLevel) {
+  default:
+    errs() << argv[0] << ": invalid optimization level.\n";
+    return 1;
+  case ' ': break;
+  case '0': OLvl = CodeGenOpt::None; break;
+  case '1': OLvl = CodeGenOpt::Less; break;
+  case '2': OLvl = CodeGenOpt::Default; break;
+  case '3': OLvl = CodeGenOpt::Aggressive; break;
+  }
+
+  // Build up all of the passes that we want to do to the module.
+  PassManager PM;
+
+  // Add the target data from the target machine, if it exists, or the module.
+  if (const TargetData *TD = Target.getTargetData())
+    PM.add(new TargetData(*TD));
+  else
+    PM.add(new TargetData(&mod));
+
+  // Override default to generate verbose assembly.
+  Target.setAsmVerbosityDefault(true);
+
+  if (RelaxAll) {
+    if (FileType != TargetMachine::CGFT_ObjectFile)
+      errs() << argv[0]
+             << ": warning: ignoring -mc-relax-all because filetype != obj";
+    else
+      Target.setMCRelaxAll(true);
+  }
+
+  {
+    formatted_raw_ostream FOS(Out->os());
+
+    // Ask the target to add backend passes as necessary.
+    if (Target.addPassesToEmitFile(PM, FOS, FileType, OLvl, NoVerify)) {
+      errs() << argv[0] << ": target does not support generation of this"
+             << " file type!\n";
+      return 1;
+    }
+
+    PM.run(mod);
+  }
+
+  // Declare success.
+  Out->keep();
+
+  return 0;
+}
diff --git a/final/tools/lli/CMakeLists.txt b/final/tools/lli/CMakeLists.txt
new file mode 100644
index 00000000000..9378ef25546
--- /dev/null
+++ b/final/tools/lli/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS mcjit jit interpreter nativecodegen bitreader asmparser selectiondag)
+
+add_llvm_tool(lli
+  lli.cpp
+  )
diff --git a/final/tools/lli/Makefile b/final/tools/lli/Makefile
new file mode 100644
index 00000000000..80aa82b4d68
--- /dev/null
+++ b/final/tools/lli/Makefile
@@ -0,0 +1,15 @@
+##===- tools/lli/Makefile ------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL    := ../..
+TOOLNAME := lli
+LINK_COMPONENTS := mcjit jit interpreter nativecodegen bitreader asmparser selectiondag
+
+# Enable JIT support
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/lli/lli.cpp b/final/tools/lli/lli.cpp
new file mode 100644
index 00000000000..a756459ecc2
--- /dev/null
+++ b/final/tools/lli/lli.cpp
@@ -0,0 +1,270 @@
+//===- lli.cpp - LLVM Interpreter / Dynamic compiler ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This utility provides a simple wrapper around the LLVM Execution Engines,
+// which allow the direct execution of LLVM programs through a Just-In-Time
+// compiler, or through an interpreter if no JIT is available for this platform.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/CodeGen/LinkAllCodegenComponents.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/MCJIT.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/IRReader.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PluginLoader.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Target/TargetSelect.h"
+#include <cerrno>
+
+#ifdef __CYGWIN__
+#include <cygwin/version.h>
+#if defined(CYGWIN_VERSION_DLL_MAJOR) && CYGWIN_VERSION_DLL_MAJOR<1007
+#define DO_NOTHING_ATEXIT 1
+#endif
+#endif
+
+using namespace llvm;
+
+namespace {
+  cl::opt<std::string>
+  InputFile(cl::desc("<input bitcode>"), cl::Positional, cl::init("-"));
+
+  cl::list<std::string>
+  InputArgv(cl::ConsumeAfter, cl::desc("<program arguments>..."));
+
+  cl::opt<bool> ForceInterpreter("force-interpreter",
+                                 cl::desc("Force interpretation: disable JIT"),
+                                 cl::init(false));
+
+  cl::opt<bool> UseMCJIT(
+    "use-mcjit", cl::desc("Enable use of the MC-based JIT (if available)"),
+    cl::init(false));
+
+  // Determine optimization level.
+  cl::opt<char>
+  OptLevel("O",
+           cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
+                    "(default = '-O2')"),
+           cl::Prefix,
+           cl::ZeroOrMore,
+           cl::init(' '));
+
+  cl::opt<std::string>
+  TargetTriple("mtriple", cl::desc("Override target triple for module"));
+
+  cl::opt<std::string>
+  MArch("march",
+        cl::desc("Architecture to generate assembly for (see --version)"));
+
+  cl::opt<std::string>
+  MCPU("mcpu",
+       cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+       cl::value_desc("cpu-name"),
+       cl::init(""));
+
+  cl::list<std::string>
+  MAttrs("mattr",
+         cl::CommaSeparated,
+         cl::desc("Target specific attributes (-mattr=help for details)"),
+         cl::value_desc("a1,+a2,-a3,..."));
+
+  cl::opt<std::string>
+  EntryFunc("entry-function",
+            cl::desc("Specify the entry function (default = 'main') "
+                     "of the executable"),
+            cl::value_desc("function"),
+            cl::init("main"));
+  
+  cl::opt<std::string>
+  FakeArgv0("fake-argv0",
+            cl::desc("Override the 'argv[0]' value passed into the executing"
+                     " program"), cl::value_desc("executable"));
+  
+  cl::opt<bool>
+  DisableCoreFiles("disable-core-files", cl::Hidden,
+                   cl::desc("Disable emission of core files if possible"));
+
+  cl::opt<bool>
+  NoLazyCompilation("disable-lazy-compilation",
+                  cl::desc("Disable JIT lazy compilation"),
+                  cl::init(false));
+}
+
+static ExecutionEngine *EE = 0;
+
+static void do_shutdown() {
+  // Cygwin-1.5 invokes DLL's dtors before atexit handler.
+#ifndef DO_NOTHING_ATEXIT
+  delete EE;
+  llvm_shutdown();
+#endif
+}
+
+//===----------------------------------------------------------------------===//
+// main Driver function
+//
+int main(int argc, char **argv, char * const *envp) {
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  
+  LLVMContext &Context = getGlobalContext();
+  atexit(do_shutdown);  // Call llvm_shutdown() on exit.
+
+  // If we have a native target, initialize it to ensure it is linked in and
+  // usable by the JIT.
+  InitializeNativeTarget();
+
+  cl::ParseCommandLineOptions(argc, argv,
+                              "llvm interpreter & dynamic compiler\n");
+
+  // If the user doesn't want core files, disable them.
+  if (DisableCoreFiles)
+    sys::Process::PreventCoreFiles();
+  
+  // Load the bitcode...
+  SMDiagnostic Err;
+  Module *Mod = ParseIRFile(InputFile, Err, Context);
+  if (!Mod) {
+    Err.Print(argv[0], errs());
+    return 1;
+  }
+
+  // If not jitting lazily, load the whole bitcode file eagerly too.
+  std::string ErrorMsg;
+  if (NoLazyCompilation) {
+    if (Mod->MaterializeAllPermanently(&ErrorMsg)) {
+      errs() << argv[0] << ": bitcode didn't read correctly.\n";
+      errs() << "Reason: " << ErrorMsg << "\n";
+      exit(1);
+    }
+  }
+
+  EngineBuilder builder(Mod);
+  builder.setMArch(MArch);
+  builder.setMCPU(MCPU);
+  builder.setMAttrs(MAttrs);
+  builder.setErrorStr(&ErrorMsg);
+  builder.setEngineKind(ForceInterpreter
+                        ? EngineKind::Interpreter
+                        : EngineKind::JIT);
+
+  // If we are supposed to override the target triple, do so now.
+  if (!TargetTriple.empty())
+    Mod->setTargetTriple(Triple::normalize(TargetTriple));
+
+  // Enable MCJIT, if desired.
+  if (UseMCJIT)
+    builder.setUseMCJIT(true);
+
+  CodeGenOpt::Level OLvl = CodeGenOpt::Default;
+  switch (OptLevel) {
+  default:
+    errs() << argv[0] << ": invalid optimization level.\n";
+    return 1;
+  case ' ': break;
+  case '0': OLvl = CodeGenOpt::None; break;
+  case '1': OLvl = CodeGenOpt::Less; break;
+  case '2': OLvl = CodeGenOpt::Default; break;
+  case '3': OLvl = CodeGenOpt::Aggressive; break;
+  }
+  builder.setOptLevel(OLvl);
+
+  EE = builder.create();
+  if (!EE) {
+    if (!ErrorMsg.empty())
+      errs() << argv[0] << ": error creating EE: " << ErrorMsg << "\n";
+    else
+      errs() << argv[0] << ": unknown error creating EE!\n";
+    exit(1);
+  }
+
+  EE->RegisterJITEventListener(createOProfileJITEventListener());
+
+  EE->DisableLazyCompilation(NoLazyCompilation);
+
+  // If the user specifically requested an argv[0] to pass into the program,
+  // do it now.
+  if (!FakeArgv0.empty()) {
+    InputFile = FakeArgv0;
+  } else {
+    // Otherwise, if there is a .bc suffix on the executable strip it off, it
+    // might confuse the program.
+    if (StringRef(InputFile).endswith(".bc"))
+      InputFile.erase(InputFile.length() - 3);
+  }
+
+  // Add the module's name to the start of the vector of arguments to main().
+  InputArgv.insert(InputArgv.begin(), InputFile);
+
+  // Call the main function from M as if its signature were:
+  //   int main (int argc, char **argv, const char **envp)
+  // using the contents of Args to determine argc & argv, and the contents of
+  // EnvVars to determine envp.
+  //
+  Function *EntryFn = Mod->getFunction(EntryFunc);
+  if (!EntryFn) {
+    errs() << '\'' << EntryFunc << "\' function not found in module.\n";
+    return -1;
+  }
+
+  // If the program doesn't explicitly call exit, we will need the Exit 
+  // function later on to make an explicit call, so get the function now. 
+  Constant *Exit = Mod->getOrInsertFunction("exit", Type::getVoidTy(Context),
+                                                    Type::getInt32Ty(Context),
+                                                    NULL);
+  
+  // Reset errno to zero on entry to main.
+  errno = 0;
+ 
+  // Run static constructors.
+  EE->runStaticConstructorsDestructors(false);
+
+  if (NoLazyCompilation) {
+    for (Module::iterator I = Mod->begin(), E = Mod->end(); I != E; ++I) {
+      Function *Fn = &*I;
+      if (Fn != EntryFn && !Fn->isDeclaration())
+        EE->getPointerToFunction(Fn);
+    }
+  }
+
+  // Run main.
+  int Result = EE->runFunctionAsMain(EntryFn, InputArgv, envp);
+
+  // Run static destructors.
+  EE->runStaticConstructorsDestructors(true);
+  
+  // If the program didn't call exit explicitly, we should call it now. 
+  // This ensures that any atexit handlers get called correctly.
+  if (Function *ExitF = dyn_cast<Function>(Exit)) {
+    std::vector<GenericValue> Args;
+    GenericValue ResultGV;
+    ResultGV.IntVal = APInt(32, Result);
+    Args.push_back(ResultGV);
+    EE->runFunction(ExitF, Args);
+    errs() << "ERROR: exit(" << Result << ") returned!\n";
+    abort();
+  } else {
+    errs() << "ERROR: exit defined with wrong prototype!\n";
+    abort();
+  }
+}
diff --git a/final/tools/llvm-ar/CMakeLists.txt b/final/tools/llvm-ar/CMakeLists.txt
new file mode 100644
index 00000000000..c8b0b725d83
--- /dev/null
+++ b/final/tools/llvm-ar/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(LLVM_LINK_COMPONENTS archive)
+set(LLVM_REQUIRES_EH 1)
+
+add_llvm_tool(llvm-ar
+  llvm-ar.cpp
+  )
+
+# TODO: Support check-local.
diff --git a/final/tools/llvm-ar/Makefile b/final/tools/llvm-ar/Makefile
new file mode 100644
index 00000000000..e4fe4e8ca39
--- /dev/null
+++ b/final/tools/llvm-ar/Makefile
@@ -0,0 +1,25 @@
+##===- tools/llvm-ar/Makefile ------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+
+TOOLNAME = llvm-ar
+LINK_COMPONENTS = archive
+REQUIRES_EH := 1
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
+
+check-local::
+	$(Echo) Checking llvm-ar
+	$(Verb) $(ToolDir)/llvm-ar zRrS nada.a .
+	$(Verb) $(ToolDir)/llvm-ar tv nada.a | \
+	  grep Debug/llvm-ar.d >/dev/null 2>&1
+	$(Verb) $(RM) -f nada.a
diff --git a/final/tools/llvm-ar/llvm-ar.cpp b/final/tools/llvm-ar/llvm-ar.cpp
new file mode 100644
index 00000000000..c1c8b2474e7
--- /dev/null
+++ b/final/tools/llvm-ar/llvm-ar.cpp
@@ -0,0 +1,781 @@
+//===-- llvm-ar.cpp - LLVM archive librarian utility ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Builds up (relatively) standard unix archive files (.a) containing LLVM
+// bitcode or other files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Bitcode/Archive.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
+#include <algorithm>
+#include <memory>
+#include <fstream>
+using namespace llvm;
+
+// Option for compatibility with AIX, not used but must allow it to be present.
+static cl::opt<bool>
+X32Option ("X32_64", cl::Hidden,
+            cl::desc("Ignored option for compatibility with AIX"));
+
+// llvm-ar operation code and modifier flags. This must come first.
+static cl::opt<std::string>
+Options(cl::Positional, cl::Required, cl::desc("{operation}[modifiers]..."));
+
+// llvm-ar remaining positional arguments.
+static cl::list<std::string>
+RestOfArgs(cl::Positional, cl::OneOrMore,
+    cl::desc("[relpos] [count] <archive-file> [members]..."));
+
+// MoreHelp - Provide additional help output explaining the operations and
+// modifiers of llvm-ar. This object instructs the CommandLine library
+// to print the text of the constructor when the --help option is given.
+static cl::extrahelp MoreHelp(
+  "\nOPERATIONS:\n"
+  "  d[NsS]       - delete file(s) from the archive\n"
+  "  m[abiSs]     - move file(s) in the archive\n"
+  "  p[kN]        - print file(s) found in the archive\n"
+  "  q[ufsS]      - quick append file(s) to the archive\n"
+  "  r[abfiuzRsS] - replace or insert file(s) into the archive\n"
+  "  t            - display contents of archive\n"
+  "  x[No]        - extract file(s) from the archive\n"
+  "\nMODIFIERS (operation specific):\n"
+  "  [a] - put file(s) after [relpos]\n"
+  "  [b] - put file(s) before [relpos] (same as [i])\n"
+  "  [f] - truncate inserted file names\n"
+  "  [i] - put file(s) before [relpos] (same as [b])\n"
+  "  [k] - always print bitcode files (default is to skip them)\n"
+  "  [N] - use instance [count] of name\n"
+  "  [o] - preserve original dates\n"
+  "  [P] - use full path names when matching\n"
+  "  [R] - recurse through directories when inserting\n"
+  "  [s] - create an archive index (cf. ranlib)\n"
+  "  [S] - do not build a symbol table\n"
+  "  [u] - update only files newer than archive contents\n"
+  "  [z] - compress files before inserting/extracting\n"
+  "\nMODIFIERS (generic):\n"
+  "  [c] - do not warn if the library had to be created\n"
+  "  [v] - be verbose about actions taken\n"
+  "  [V] - be *really* verbose about actions taken\n"
+);
+
+// This enumeration delineates the kinds of operations on an archive
+// that are permitted.
+enum ArchiveOperation {
+  NoOperation,      ///< An operation hasn't been specified
+  Print,            ///< Print the contents of the archive
+  Delete,           ///< Delete the specified members
+  Move,             ///< Move members to end or as given by {a,b,i} modifiers
+  QuickAppend,      ///< Quickly append to end of archive
+  ReplaceOrInsert,  ///< Replace or Insert members
+  DisplayTable,     ///< Display the table of contents
+  Extract           ///< Extract files back to file system
+};
+
+// Modifiers to follow operation to vary behavior
+bool AddAfter = false;           ///< 'a' modifier
+bool AddBefore = false;          ///< 'b' modifier
+bool Create = false;             ///< 'c' modifier
+bool TruncateNames = false;      ///< 'f' modifier
+bool InsertBefore = false;       ///< 'i' modifier
+bool DontSkipBitcode = false;    ///< 'k' modifier
+bool UseCount = false;           ///< 'N' modifier
+bool OriginalDates = false;      ///< 'o' modifier
+bool FullPath = false;           ///< 'P' modifier
+bool RecurseDirectories = false; ///< 'R' modifier
+bool SymTable = true;            ///< 's' & 'S' modifiers
+bool OnlyUpdate = false;         ///< 'u' modifier
+bool Verbose = false;            ///< 'v' modifier
+bool ReallyVerbose = false;      ///< 'V' modifier
+bool Compression = false;        ///< 'z' modifier
+
+// Relative Positional Argument (for insert/move). This variable holds
+// the name of the archive member to which the 'a', 'b' or 'i' modifier
+// refers. Only one of 'a', 'b' or 'i' can be specified so we only need
+// one variable.
+std::string RelPos;
+
+// Select which of multiple entries in the archive with the same name should be
+// used (specified with -N) for the delete and extract operations.
+int Count = 1;
+
+// This variable holds the name of the archive file as given on the
+// command line.
+std::string ArchiveName;
+
+// This variable holds the list of member files to proecess, as given
+// on the command line.
+std::vector<std::string> Members;
+
+// This variable holds the (possibly expanded) list of path objects that
+// correspond to files we will
+std::set<sys::Path> Paths;
+
+// The Archive object to which all the editing operations will be sent.
+Archive* TheArchive = 0;
+
+// getRelPos - Extract the member filename from the command line for
+// the [relpos] argument associated with a, b, and i modifiers
+void getRelPos() {
+  if(RestOfArgs.size() > 0) {
+    RelPos = RestOfArgs[0];
+    RestOfArgs.erase(RestOfArgs.begin());
+  }
+  else
+    throw "Expected [relpos] for a, b, or i modifier";
+}
+
+// getCount - Extract the [count] argument associated with the N modifier
+// from the command line and check its value.
+void getCount() {
+  if(RestOfArgs.size() > 0) {
+    Count = atoi(RestOfArgs[0].c_str());
+    RestOfArgs.erase(RestOfArgs.begin());
+  }
+  else
+    throw "Expected [count] value with N modifier";
+
+  // Non-positive counts are not allowed
+  if (Count < 1)
+    throw "Invalid [count] value (not a positive integer)";
+}
+
+// getArchive - Get the archive file name from the command line
+void getArchive() {
+  if(RestOfArgs.size() > 0) {
+    ArchiveName = RestOfArgs[0];
+    RestOfArgs.erase(RestOfArgs.begin());
+  }
+  else
+    throw "An archive name must be specified.";
+}
+
+// getMembers - Copy over remaining items in RestOfArgs to our Members vector
+// This is just for clarity.
+void getMembers() {
+  if(RestOfArgs.size() > 0)
+    Members = std::vector<std::string>(RestOfArgs);
+}
+
+// parseCommandLine - Parse the command line options as presented and return the
+// operation specified. Process all modifiers and check to make sure that
+// constraints on modifier/operation pairs have not been violated.
+ArchiveOperation parseCommandLine() {
+
+  // Keep track of number of operations. We can only specify one
+  // per execution.
+  unsigned NumOperations = 0;
+
+  // Keep track of the number of positional modifiers (a,b,i). Only
+  // one can be specified.
+  unsigned NumPositional = 0;
+
+  // Keep track of which operation was requested
+  ArchiveOperation Operation = NoOperation;
+
+  for(unsigned i=0; i<Options.size(); ++i) {
+    switch(Options[i]) {
+    case 'd': ++NumOperations; Operation = Delete; break;
+    case 'm': ++NumOperations; Operation = Move ; break;
+    case 'p': ++NumOperations; Operation = Print; break;
+    case 'q': ++NumOperations; Operation = QuickAppend; break;
+    case 'r': ++NumOperations; Operation = ReplaceOrInsert; break;
+    case 't': ++NumOperations; Operation = DisplayTable; break;
+    case 'x': ++NumOperations; Operation = Extract; break;
+    case 'c': Create = true; break;
+    case 'f': TruncateNames = true; break;
+    case 'k': DontSkipBitcode = true; break;
+    case 'l': /* accepted but unused */ break;
+    case 'o': OriginalDates = true; break;
+    case 'P': FullPath = true; break;
+    case 'R': RecurseDirectories = true; break;
+    case 's': SymTable = true; break;
+    case 'S': SymTable = false; break;
+    case 'u': OnlyUpdate = true; break;
+    case 'v': Verbose = true; break;
+    case 'V': Verbose = ReallyVerbose = true; break;
+    case 'z': Compression = true; break;
+    case 'a':
+      getRelPos();
+      AddAfter = true;
+      NumPositional++;
+      break;
+    case 'b':
+      getRelPos();
+      AddBefore = true;
+      NumPositional++;
+      break;
+    case 'i':
+      getRelPos();
+      InsertBefore = true;
+      NumPositional++;
+      break;
+    case 'N':
+      getCount();
+      UseCount = true;
+      break;
+    default:
+      cl::PrintHelpMessage();
+    }
+  }
+
+  // At this point, the next thing on the command line must be
+  // the archive name.
+  getArchive();
+
+  // Everything on the command line at this point is a member.
+  getMembers();
+
+  // Perform various checks on the operation/modifier specification
+  // to make sure we are dealing with a legal request.
+  if (NumOperations == 0)
+    throw "You must specify at least one of the operations";
+  if (NumOperations > 1)
+    throw "Only one operation may be specified";
+  if (NumPositional > 1)
+    throw "You may only specify one of a, b, and i modifiers";
+  if (AddAfter || AddBefore || InsertBefore)
+    if (Operation != Move && Operation != ReplaceOrInsert)
+      throw "The 'a', 'b' and 'i' modifiers can only be specified with "
+            "the 'm' or 'r' operations";
+  if (RecurseDirectories && Operation != ReplaceOrInsert)
+    throw "The 'R' modifiers is only applicabe to the 'r' operation";
+  if (OriginalDates && Operation != Extract)
+    throw "The 'o' modifier is only applicable to the 'x' operation";
+  if (TruncateNames && Operation!=QuickAppend && Operation!=ReplaceOrInsert)
+    throw "The 'f' modifier is only applicable to the 'q' and 'r' operations";
+  if (OnlyUpdate && Operation != ReplaceOrInsert)
+    throw "The 'u' modifier is only applicable to the 'r' operation";
+  if (Compression && Operation!=ReplaceOrInsert && Operation!=Extract)
+    throw "The 'z' modifier is only applicable to the 'r' and 'x' operations";
+  if (Count > 1 && Members.size() > 1)
+    throw "Only one member name may be specified with the 'N' modifier";
+
+  // Return the parsed operation to the caller
+  return Operation;
+}
+
+// recurseDirectories - Implements the "R" modifier. This function scans through
+// the Paths vector (built by buildPaths, below) and replaces any directories it
+// finds with all the files in that directory (recursively). It uses the
+// sys::Path::getDirectoryContent method to perform the actual directory scans.
+bool
+recurseDirectories(const sys::Path& path,
+                   std::set<sys::Path>& result, std::string* ErrMsg) {
+  result.clear();
+  if (RecurseDirectories) {
+    std::set<sys::Path> content;
+    if (path.getDirectoryContents(content, ErrMsg))
+      return true;
+
+    for (std::set<sys::Path>::iterator I = content.begin(), E = content.end();
+         I != E; ++I) {
+      // Make sure it exists and is a directory
+      sys::PathWithStatus PwS(*I);
+      const sys::FileStatus *Status = PwS.getFileStatus(false, ErrMsg);
+      if (!Status)
+        return true;
+      if (Status->isDir) {
+        std::set<sys::Path> moreResults;
+        if (recurseDirectories(*I, moreResults, ErrMsg))
+          return true;
+        result.insert(moreResults.begin(), moreResults.end());
+      } else {
+          result.insert(*I);
+      }
+    }
+  }
+  return false;
+}
+
+// buildPaths - Convert the strings in the Members vector to sys::Path objects
+// and make sure they are valid and exist exist. This check is only needed for
+// the operations that add/replace files to the archive ('q' and 'r')
+bool buildPaths(bool checkExistence, std::string* ErrMsg) {
+  for (unsigned i = 0; i < Members.size(); i++) {
+    sys::Path aPath;
+    if (!aPath.set(Members[i]))
+      throw std::string("File member name invalid: ") + Members[i];
+    if (checkExistence) {
+      bool Exists;
+      if (sys::fs::exists(aPath.str(), Exists) || !Exists)
+        throw std::string("File does not exist: ") + Members[i];
+      std::string Err;
+      sys::PathWithStatus PwS(aPath);
+      const sys::FileStatus *si = PwS.getFileStatus(false, &Err);
+      if (!si)
+        throw Err;
+      if (si->isDir) {
+        std::set<sys::Path> dirpaths;
+        if (recurseDirectories(aPath, dirpaths, ErrMsg))
+          return true;
+        Paths.insert(dirpaths.begin(),dirpaths.end());
+      } else {
+        Paths.insert(aPath);
+      }
+    } else {
+      Paths.insert(aPath);
+    }
+  }
+  return false;
+}
+
+// printSymbolTable - print out the archive's symbol table.
+void printSymbolTable() {
+  outs() << "\nArchive Symbol Table:\n";
+  const Archive::SymTabType& symtab = TheArchive->getSymbolTable();
+  for (Archive::SymTabType::const_iterator I=symtab.begin(), E=symtab.end();
+       I != E; ++I ) {
+    unsigned offset = TheArchive->getFirstFileOffset() + I->second;
+    outs() << " " << format("%9u", offset) << "\t" << I->first <<"\n";
+  }
+}
+
+// doPrint - Implements the 'p' operation. This function traverses the archive
+// looking for members that match the path list. It is careful to uncompress
+// things that should be and to skip bitcode files unless the 'k' modifier was
+// given.
+bool doPrint(std::string* ErrMsg) {
+  if (buildPaths(false, ErrMsg))
+    return true;
+  unsigned countDown = Count;
+  for (Archive::iterator I = TheArchive->begin(), E = TheArchive->end();
+       I != E; ++I ) {
+    if (Paths.empty() ||
+        (std::find(Paths.begin(), Paths.end(), I->getPath()) != Paths.end())) {
+      if (countDown == 1) {
+        const char* data = reinterpret_cast<const char*>(I->getData());
+
+        // Skip things that don't make sense to print
+        if (I->isLLVMSymbolTable() || I->isSVR4SymbolTable() ||
+            I->isBSD4SymbolTable() || (!DontSkipBitcode && I->isBitcode()))
+          continue;
+
+        if (Verbose)
+          outs() << "Printing " << I->getPath().str() << "\n";
+
+        unsigned len = I->getSize();
+        outs().write(data, len);
+      } else {
+        countDown--;
+      }
+    }
+  }
+  return false;
+}
+
+// putMode - utility function for printing out the file mode when the 't'
+// operation is in verbose mode.
+void
+printMode(unsigned mode) {
+  if (mode & 004)
+    outs() << "r";
+  else
+    outs() << "-";
+  if (mode & 002)
+    outs() << "w";
+  else
+    outs() << "-";
+  if (mode & 001)
+    outs() << "x";
+  else
+    outs() << "-";
+}
+
+// doDisplayTable - Implement the 't' operation. This function prints out just
+// the file names of each of the members. However, if verbose mode is requested
+// ('v' modifier) then the file type, permission mode, user, group, size, and
+// modification time are also printed.
+bool
+doDisplayTable(std::string* ErrMsg) {
+  if (buildPaths(false, ErrMsg))
+    return true;
+  for (Archive::iterator I = TheArchive->begin(), E = TheArchive->end();
+       I != E; ++I ) {
+    if (Paths.empty() ||
+        (std::find(Paths.begin(), Paths.end(), I->getPath()) != Paths.end())) {
+      if (Verbose) {
+        // FIXME: Output should be this format:
+        // Zrw-r--r--  500/ 500    525 Nov  8 17:42 2004 Makefile
+        if (I->isBitcode())
+          outs() << "b";
+        else if (I->isCompressed())
+          outs() << "Z";
+        else
+          outs() << " ";
+        unsigned mode = I->getMode();
+        printMode((mode >> 6) & 007);
+        printMode((mode >> 3) & 007);
+        printMode(mode & 007);
+        outs() << " " << format("%4u", I->getUser());
+        outs() << "/" << format("%4u", I->getGroup());
+        outs() << " " << format("%8u", I->getSize());
+        outs() << " " << format("%20s", I->getModTime().str().substr(4).c_str());
+        outs() << " " << I->getPath().str() << "\n";
+      } else {
+        outs() << I->getPath().str() << "\n";
+      }
+    }
+  }
+  if (ReallyVerbose)
+    printSymbolTable();
+  return false;
+}
+
+// doExtract - Implement the 'x' operation. This function extracts files back to
+// the file system, making sure to uncompress any that were compressed
+bool
+doExtract(std::string* ErrMsg) {
+  if (buildPaths(false, ErrMsg))
+    return true;
+  for (Archive::iterator I = TheArchive->begin(), E = TheArchive->end();
+       I != E; ++I ) {
+    if (Paths.empty() ||
+        (std::find(Paths.begin(), Paths.end(), I->getPath()) != Paths.end())) {
+
+      // Make sure the intervening directories are created
+      if (I->hasPath()) {
+        sys::Path dirs(I->getPath());
+        dirs.eraseComponent();
+        if (dirs.createDirectoryOnDisk(/*create_parents=*/true, ErrMsg))
+          return true;
+      }
+
+      // Open up a file stream for writing
+      std::ios::openmode io_mode = std::ios::out | std::ios::trunc |
+                                   std::ios::binary;
+      std::ofstream file(I->getPath().c_str(), io_mode);
+
+      // Get the data and its length
+      const char* data = reinterpret_cast<const char*>(I->getData());
+      unsigned len = I->getSize();
+
+      // Write the data.
+      file.write(data,len);
+      file.close();
+
+      // If we're supposed to retain the original modification times, etc. do so
+      // now.
+      if (OriginalDates)
+        I->getPath().setStatusInfoOnDisk(I->getFileStatus());
+    }
+  }
+  return false;
+}
+
+// doDelete - Implement the delete operation. This function deletes zero or more
+// members from the archive. Note that if the count is specified, there should
+// be no more than one path in the Paths list or else this algorithm breaks.
+// That check is enforced in parseCommandLine (above).
+bool
+doDelete(std::string* ErrMsg) {
+  if (buildPaths(false, ErrMsg))
+    return true;
+  if (Paths.empty())
+    return false;
+  unsigned countDown = Count;
+  for (Archive::iterator I = TheArchive->begin(), E = TheArchive->end();
+       I != E; ) {
+    if (std::find(Paths.begin(), Paths.end(), I->getPath()) != Paths.end()) {
+      if (countDown == 1) {
+        Archive::iterator J = I;
+        ++I;
+        TheArchive->erase(J);
+      } else
+        countDown--;
+    } else {
+      ++I;
+    }
+  }
+
+  // We're done editting, reconstruct the archive.
+  if (TheArchive->writeToDisk(SymTable,TruncateNames,Compression,ErrMsg))
+    return true;
+  if (ReallyVerbose)
+    printSymbolTable();
+  return false;
+}
+
+// doMore - Implement the move operation. This function re-arranges just the
+// order of the archive members so that when the archive is written the move
+// of the members is accomplished. Note the use of the RelPos variable to
+// determine where the items should be moved to.
+bool
+doMove(std::string* ErrMsg) {
+  if (buildPaths(false, ErrMsg))
+    return true;
+
+  // By default and convention the place to move members to is the end of the
+  // archive.
+  Archive::iterator moveto_spot = TheArchive->end();
+
+  // However, if the relative positioning modifiers were used, we need to scan
+  // the archive to find the member in question. If we don't find it, its no
+  // crime, we just move to the end.
+  if (AddBefore || InsertBefore || AddAfter) {
+    for (Archive::iterator I = TheArchive->begin(), E= TheArchive->end();
+         I != E; ++I ) {
+      if (RelPos == I->getPath().str()) {
+        if (AddAfter) {
+          moveto_spot = I;
+          moveto_spot++;
+        } else {
+          moveto_spot = I;
+        }
+        break;
+      }
+    }
+  }
+
+  // Keep a list of the paths remaining to be moved
+  std::set<sys::Path> remaining(Paths);
+
+  // Scan the archive again, this time looking for the members to move to the
+  // moveto_spot.
+  for (Archive::iterator I = TheArchive->begin(), E= TheArchive->end();
+       I != E && !remaining.empty(); ++I ) {
+    std::set<sys::Path>::iterator found =
+      std::find(remaining.begin(),remaining.end(),I->getPath());
+    if (found != remaining.end()) {
+      if (I != moveto_spot)
+        TheArchive->splice(moveto_spot,*TheArchive,I);
+      remaining.erase(found);
+    }
+  }
+
+  // We're done editting, reconstruct the archive.
+  if (TheArchive->writeToDisk(SymTable,TruncateNames,Compression,ErrMsg))
+    return true;
+  if (ReallyVerbose)
+    printSymbolTable();
+  return false;
+}
+
+// doQuickAppend - Implements the 'q' operation. This function just
+// indiscriminantly adds the members to the archive and rebuilds it.
+bool
+doQuickAppend(std::string* ErrMsg) {
+  // Get the list of paths to append.
+  if (buildPaths(true, ErrMsg))
+    return true;
+  if (Paths.empty())
+    return false;
+
+  // Append them quickly.
+  for (std::set<sys::Path>::iterator PI = Paths.begin(), PE = Paths.end();
+       PI != PE; ++PI) {
+    if (TheArchive->addFileBefore(*PI,TheArchive->end(),ErrMsg))
+      return true;
+  }
+
+  // We're done editting, reconstruct the archive.
+  if (TheArchive->writeToDisk(SymTable,TruncateNames,Compression,ErrMsg))
+    return true;
+  if (ReallyVerbose)
+    printSymbolTable();
+  return false;
+}
+
+// doReplaceOrInsert - Implements the 'r' operation. This function will replace
+// any existing files or insert new ones into the archive.
+bool
+doReplaceOrInsert(std::string* ErrMsg) {
+
+  // Build the list of files to be added/replaced.
+  if (buildPaths(true, ErrMsg))
+    return true;
+  if (Paths.empty())
+    return false;
+
+  // Keep track of the paths that remain to be inserted.
+  std::set<sys::Path> remaining(Paths);
+
+  // Default the insertion spot to the end of the archive
+  Archive::iterator insert_spot = TheArchive->end();
+
+  // Iterate over the archive contents
+  for (Archive::iterator I = TheArchive->begin(), E = TheArchive->end();
+       I != E && !remaining.empty(); ++I ) {
+
+    // Determine if this archive member matches one of the paths we're trying
+    // to replace.
+
+    std::set<sys::Path>::iterator found = remaining.end();
+    for (std::set<sys::Path>::iterator RI = remaining.begin(),
+         RE = remaining.end(); RI != RE; ++RI ) {
+      std::string compare(RI->str());
+      if (TruncateNames && compare.length() > 15) {
+        const char* nm = compare.c_str();
+        unsigned len = compare.length();
+        size_t slashpos = compare.rfind('/');
+        if (slashpos != std::string::npos) {
+          nm += slashpos + 1;
+          len -= slashpos +1;
+        }
+        if (len > 15)
+          len = 15;
+        compare.assign(nm,len);
+      }
+      if (compare == I->getPath().str()) {
+        found = RI;
+        break;
+      }
+    }
+
+    if (found != remaining.end()) {
+      std::string Err;
+      sys::PathWithStatus PwS(*found);
+      const sys::FileStatus *si = PwS.getFileStatus(false, &Err);
+      if (!si)
+        return true;
+      if (!si->isDir) {
+        if (OnlyUpdate) {
+          // Replace the item only if it is newer.
+          if (si->modTime > I->getModTime())
+            if (I->replaceWith(*found, ErrMsg))
+              return true;
+        } else {
+          // Replace the item regardless of time stamp
+          if (I->replaceWith(*found, ErrMsg))
+            return true;
+        }
+      } else {
+        // We purposefully ignore directories.
+      }
+
+      // Remove it from our "to do" list
+      remaining.erase(found);
+    }
+
+    // Determine if this is the place where we should insert
+    if ((AddBefore || InsertBefore) && RelPos == I->getPath().str())
+      insert_spot = I;
+    else if (AddAfter && RelPos == I->getPath().str()) {
+      insert_spot = I;
+      insert_spot++;
+    }
+  }
+
+  // If we didn't replace all the members, some will remain and need to be
+  // inserted at the previously computed insert-spot.
+  if (!remaining.empty()) {
+    for (std::set<sys::Path>::iterator PI = remaining.begin(),
+         PE = remaining.end(); PI != PE; ++PI) {
+      if (TheArchive->addFileBefore(*PI,insert_spot, ErrMsg))
+        return true;
+    }
+  }
+
+  // We're done editting, reconstruct the archive.
+  if (TheArchive->writeToDisk(SymTable,TruncateNames,Compression,ErrMsg))
+    return true;
+  if (ReallyVerbose)
+    printSymbolTable();
+  return false;
+}
+
+// main - main program for llvm-ar .. see comments in the code
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  LLVMContext &Context = getGlobalContext();
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  // Have the command line options parsed and handle things
+  // like --help and --version.
+  cl::ParseCommandLineOptions(argc, argv,
+    "LLVM Archiver (llvm-ar)\n\n"
+    "  This program archives bitcode files into single libraries\n"
+  );
+
+  int exitCode = 0;
+
+  // Make sure we don't exit with "unhandled exception".
+  try {
+    // Do our own parsing of the command line because the CommandLine utility
+    // can't handle the grouped positional parameters without a dash.
+    ArchiveOperation Operation = parseCommandLine();
+
+    // Check the path name of the archive
+    sys::Path ArchivePath;
+    if (!ArchivePath.set(ArchiveName))
+      throw std::string("Archive name invalid: ") + ArchiveName;
+
+    // Create or open the archive object.
+    bool Exists;
+    if (llvm::sys::fs::exists(ArchivePath.str(), Exists) || !Exists) {
+      // Produce a warning if we should and we're creating the archive
+      if (!Create)
+        errs() << argv[0] << ": creating " << ArchivePath.str() << "\n";
+      TheArchive = Archive::CreateEmpty(ArchivePath, Context);
+      TheArchive->writeToDisk();
+    } else {
+      std::string Error;
+      TheArchive = Archive::OpenAndLoad(ArchivePath, Context, &Error);
+      if (TheArchive == 0) {
+        errs() << argv[0] << ": error loading '" << ArchivePath.str() << "': "
+               << Error << "!\n";
+        return 1;
+      }
+    }
+
+    // Make sure we're not fooling ourselves.
+    assert(TheArchive && "Unable to instantiate the archive");
+
+    // Make sure we clean up the archive even on failure.
+    std::auto_ptr<Archive> AutoArchive(TheArchive);
+
+    // Perform the operation
+    std::string ErrMsg;
+    bool haveError = false;
+    switch (Operation) {
+      case Print:           haveError = doPrint(&ErrMsg); break;
+      case Delete:          haveError = doDelete(&ErrMsg); break;
+      case Move:            haveError = doMove(&ErrMsg); break;
+      case QuickAppend:     haveError = doQuickAppend(&ErrMsg); break;
+      case ReplaceOrInsert: haveError = doReplaceOrInsert(&ErrMsg); break;
+      case DisplayTable:    haveError = doDisplayTable(&ErrMsg); break;
+      case Extract:         haveError = doExtract(&ErrMsg); break;
+      case NoOperation:
+        errs() << argv[0] << ": No operation was selected.\n";
+        break;
+    }
+    if (haveError) {
+      errs() << argv[0] << ": " << ErrMsg << "\n";
+      return 1;
+    }
+  } catch (const char*msg) {
+    // These errors are usage errors, thrown only by the various checks in the
+    // code above.
+    errs() << argv[0] << ": " << msg << "\n\n";
+    cl::PrintHelpMessage();
+    exitCode = 1;
+  } catch (const std::string& msg) {
+    // These errors are thrown by LLVM libraries (e.g. lib System) and represent
+    // a more serious error so we bump the exitCode and don't print the usage.
+    errs() << argv[0] << ": " << msg << "\n";
+    exitCode = 2;
+  } catch (...) {
+    // This really shouldn't happen, but just in case ....
+    errs() << argv[0] << ": An unexpected unknown exception occurred.\n";
+    exitCode = 3;
+  }
+
+  // Return result code back to operating system.
+  return exitCode;
+}
diff --git a/final/tools/llvm-as/CMakeLists.txt b/final/tools/llvm-as/CMakeLists.txt
new file mode 100644
index 00000000000..eef4a13e29d
--- /dev/null
+++ b/final/tools/llvm-as/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_LINK_COMPONENTS asmparser bitwriter)
+set(LLVM_REQUIRES_EH 1)
+
+add_llvm_tool(llvm-as
+  llvm-as.cpp
+  )
diff --git a/final/tools/llvm-as/Makefile b/final/tools/llvm-as/Makefile
new file mode 100644
index 00000000000..e1e5853a7b6
--- /dev/null
+++ b/final/tools/llvm-as/Makefile
@@ -0,0 +1,17 @@
+##===- tools/llvm-as/Makefile ------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = llvm-as
+LINK_COMPONENTS := asmparser bitwriter
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvm-as/llvm-as.cpp b/final/tools/llvm-as/llvm-as.cpp
new file mode 100644
index 00000000000..c1661cdcb19
--- /dev/null
+++ b/final/tools/llvm-as/llvm-as.cpp
@@ -0,0 +1,119 @@
+//===--- llvm-as.cpp - The low-level LLVM assembler -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This utility may be invoked in the following manner:
+//   llvm-as --help         - Output information about command line switches
+//   llvm-as [options]      - Read LLVM asm from stdin, write bitcode to stdout
+//   llvm-as [options] x.ll - Read LLVM asm from the x.ll file, write bitcode
+//                            to the x.bc file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Parser.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Signals.h"
+#include <memory>
+using namespace llvm;
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input .llvm file>"), cl::init("-"));
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Override output filename"),
+               cl::value_desc("filename"));
+
+static cl::opt<bool>
+Force("f", cl::desc("Enable binary output on terminals"));
+
+static cl::opt<bool>
+DisableOutput("disable-output", cl::desc("Disable output"), cl::init(false));
+
+static cl::opt<bool>
+DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden);
+
+static cl::opt<bool>
+DisableVerify("disable-verify", cl::Hidden,
+              cl::desc("Do not run verifier on input LLVM (dangerous!)"));
+
+static void WriteOutputFile(const Module *M) {
+  // Infer the output filename if needed.
+  if (OutputFilename.empty()) {
+    if (InputFilename == "-") {
+      OutputFilename = "-";
+    } else {
+      std::string IFN = InputFilename;
+      int Len = IFN.length();
+      if (IFN[Len-3] == '.' && IFN[Len-2] == 'l' && IFN[Len-1] == 'l') {
+        // Source ends in .ll
+        OutputFilename = std::string(IFN.begin(), IFN.end()-3);
+      } else {
+        OutputFilename = IFN;   // Append a .bc to it
+      }
+      OutputFilename += ".bc";
+    }
+  }
+
+  std::string ErrorInfo;
+  OwningPtr<tool_output_file> Out
+  (new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+                        raw_fd_ostream::F_Binary));
+  if (!ErrorInfo.empty()) {
+    errs() << ErrorInfo << '\n';
+    exit(1);
+  }
+
+  if (Force || !CheckBitcodeOutputToConsole(Out->os(), true))
+    WriteBitcodeToFile(M, Out->os());
+
+  // Declare success.
+  Out->keep();
+}
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  LLVMContext &Context = getGlobalContext();
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+  cl::ParseCommandLineOptions(argc, argv, "llvm .ll -> .bc assembler\n");
+
+  // Parse the file now...
+  SMDiagnostic Err;
+  std::auto_ptr<Module> M(ParseAssemblyFile(InputFilename, Err, Context));
+  if (M.get() == 0) {
+    Err.Print(argv[0], errs());
+    return 1;
+  }
+
+  if (!DisableVerify) {
+    std::string Err;
+    if (verifyModule(*M.get(), ReturnStatusAction, &Err)) {
+      errs() << argv[0]
+             << ": assembly parsed, but does not verify as correct!\n";
+      errs() << Err;
+      return 1;
+    }
+  }
+
+  if (DumpAsm) errs() << "Here's the assembly:\n" << *M.get();
+
+  if (!DisableOutput)
+    WriteOutputFile(M.get());
+
+  return 0;
+}
diff --git a/final/tools/llvm-bcanalyzer/CMakeLists.txt b/final/tools/llvm-bcanalyzer/CMakeLists.txt
new file mode 100644
index 00000000000..732bc3296f2
--- /dev/null
+++ b/final/tools/llvm-bcanalyzer/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_LINK_COMPONENTS bitreader)
+set(LLVM_REQUIRES_EH 1)
+
+add_llvm_tool(llvm-bcanalyzer
+  llvm-bcanalyzer.cpp
+  )
diff --git a/final/tools/llvm-bcanalyzer/Makefile b/final/tools/llvm-bcanalyzer/Makefile
new file mode 100644
index 00000000000..488387d5da2
--- /dev/null
+++ b/final/tools/llvm-bcanalyzer/Makefile
@@ -0,0 +1,17 @@
+##===- tools/llvm-bcanalyzer/Makefile ----------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+
+TOOLNAME = llvm-bcanalyzer
+LINK_COMPONENTS := bitreader
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/final/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
new file mode 100644
index 00000000000..9a92671b137
--- /dev/null
+++ b/final/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -0,0 +1,635 @@
+//===-- llvm-bcanalyzer.cpp - Bitcode Analyzer --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tool may be invoked in the following manner:
+//  llvm-bcanalyzer [options]      - Read LLVM bitcode from stdin
+//  llvm-bcanalyzer [options] x.bc - Read LLVM bitcode from the x.bc file
+//
+//  Options:
+//      --help      - Output information about command line switches
+//      --dump      - Dump low-level bitcode structure in readable format
+//
+// This tool provides analytical information about a bitcode file. It is
+// intended as an aid to developers of bitcode reading and writing software. It
+// produces on std::out a summary of the bitcode file that shows various
+// statistics about the contents of the file. By default this information is
+// detailed and contains information about individual bitcode blocks and the
+// functions in the module.
+// The tool is also able to print a bitcode file in a straight forward text
+// format that shows the containment and relationships of the information in
+// the bitcode file (-dump option).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
+#include <cstdio>
+#include <map>
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<std::string>
+  InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
+
+static cl::opt<bool> Dump("dump", cl::desc("Dump low level bitcode trace"));
+
+//===----------------------------------------------------------------------===//
+// Bitcode specific analysis.
+//===----------------------------------------------------------------------===//
+
+static cl::opt<bool> NoHistogram("disable-histogram",
+                                 cl::desc("Do not print per-code histogram"));
+
+static cl::opt<bool>
+NonSymbolic("non-symbolic",
+            cl::desc("Emit numeric info in dump even if"
+                     " symbolic info is available"));
+
+namespace {
+
+/// CurStreamTypeType - A type for CurStreamType
+enum CurStreamTypeType {
+  UnknownBitstream,
+  LLVMIRBitstream
+};
+
+}
+
+/// CurStreamType - If we can sniff the flavor of this stream, we can produce
+/// better dump info.
+static CurStreamTypeType CurStreamType;
+
+
+/// GetBlockName - Return a symbolic block name if known, otherwise return
+/// null.
+static const char *GetBlockName(unsigned BlockID,
+                                const BitstreamReader &StreamFile) {
+  // Standard blocks for all bitcode files.
+  if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
+    if (BlockID == bitc::BLOCKINFO_BLOCK_ID)
+      return "BLOCKINFO_BLOCK";
+    return 0;
+  }
+
+  // Check to see if we have a blockinfo record for this block, with a name.
+  if (const BitstreamReader::BlockInfo *Info =
+        StreamFile.getBlockInfo(BlockID)) {
+    if (!Info->Name.empty())
+      return Info->Name.c_str();
+  }
+
+
+  if (CurStreamType != LLVMIRBitstream) return 0;
+
+  switch (BlockID) {
+  default:                           return 0;
+  case bitc::MODULE_BLOCK_ID:        return "MODULE_BLOCK";
+  case bitc::PARAMATTR_BLOCK_ID:     return "PARAMATTR_BLOCK";
+  case bitc::TYPE_BLOCK_ID:          return "TYPE_BLOCK";
+  case bitc::CONSTANTS_BLOCK_ID:     return "CONSTANTS_BLOCK";
+  case bitc::FUNCTION_BLOCK_ID:      return "FUNCTION_BLOCK";
+  case bitc::TYPE_SYMTAB_BLOCK_ID:   return "TYPE_SYMTAB";
+  case bitc::VALUE_SYMTAB_BLOCK_ID:  return "VALUE_SYMTAB";
+  case bitc::METADATA_BLOCK_ID:      return "METADATA_BLOCK";
+  case bitc::METADATA_ATTACHMENT_ID: return "METADATA_ATTACHMENT_BLOCK";
+  }
+}
+
+/// GetCodeName - Return a symbolic code name if known, otherwise return
+/// null.
+static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
+                               const BitstreamReader &StreamFile) {
+  // Standard blocks for all bitcode files.
+  if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
+    if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
+      switch (CodeID) {
+      default: return 0;
+      case bitc::BLOCKINFO_CODE_SETBID:        return "SETBID";
+      case bitc::BLOCKINFO_CODE_BLOCKNAME:     return "BLOCKNAME";
+      case bitc::BLOCKINFO_CODE_SETRECORDNAME: return "SETRECORDNAME";
+      }
+    }
+    return 0;
+  }
+
+  // Check to see if we have a blockinfo record for this record, with a name.
+  if (const BitstreamReader::BlockInfo *Info =
+        StreamFile.getBlockInfo(BlockID)) {
+    for (unsigned i = 0, e = Info->RecordNames.size(); i != e; ++i)
+      if (Info->RecordNames[i].first == CodeID)
+        return Info->RecordNames[i].second.c_str();
+  }
+
+
+  if (CurStreamType != LLVMIRBitstream) return 0;
+
+  switch (BlockID) {
+  default: return 0;
+  case bitc::MODULE_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case bitc::MODULE_CODE_VERSION:     return "VERSION";
+    case bitc::MODULE_CODE_TRIPLE:      return "TRIPLE";
+    case bitc::MODULE_CODE_DATALAYOUT:  return "DATALAYOUT";
+    case bitc::MODULE_CODE_ASM:         return "ASM";
+    case bitc::MODULE_CODE_SECTIONNAME: return "SECTIONNAME";
+    case bitc::MODULE_CODE_DEPLIB:      return "DEPLIB";
+    case bitc::MODULE_CODE_GLOBALVAR:   return "GLOBALVAR";
+    case bitc::MODULE_CODE_FUNCTION:    return "FUNCTION";
+    case bitc::MODULE_CODE_ALIAS:       return "ALIAS";
+    case bitc::MODULE_CODE_PURGEVALS:   return "PURGEVALS";
+    case bitc::MODULE_CODE_GCNAME:      return "GCNAME";
+    }
+  case bitc::PARAMATTR_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case bitc::PARAMATTR_CODE_ENTRY: return "ENTRY";
+    }
+  case bitc::TYPE_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case bitc::TYPE_CODE_NUMENTRY:  return "NUMENTRY";
+    case bitc::TYPE_CODE_VOID:      return "VOID";
+    case bitc::TYPE_CODE_FLOAT:     return "FLOAT";
+    case bitc::TYPE_CODE_DOUBLE:    return "DOUBLE";
+    case bitc::TYPE_CODE_LABEL:     return "LABEL";
+    case bitc::TYPE_CODE_OPAQUE:    return "OPAQUE";
+    case bitc::TYPE_CODE_INTEGER:   return "INTEGER";
+    case bitc::TYPE_CODE_POINTER:   return "POINTER";
+    case bitc::TYPE_CODE_FUNCTION:  return "FUNCTION";
+    case bitc::TYPE_CODE_STRUCT:    return "STRUCT";
+    case bitc::TYPE_CODE_ARRAY:     return "ARRAY";
+    case bitc::TYPE_CODE_VECTOR:    return "VECTOR";
+    case bitc::TYPE_CODE_X86_FP80:  return "X86_FP80";
+    case bitc::TYPE_CODE_FP128:     return "FP128";
+    case bitc::TYPE_CODE_PPC_FP128: return "PPC_FP128";
+    case bitc::TYPE_CODE_METADATA:  return "METADATA";
+    }
+
+  case bitc::CONSTANTS_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case bitc::CST_CODE_SETTYPE:         return "SETTYPE";
+    case bitc::CST_CODE_NULL:            return "NULL";
+    case bitc::CST_CODE_UNDEF:           return "UNDEF";
+    case bitc::CST_CODE_INTEGER:         return "INTEGER";
+    case bitc::CST_CODE_WIDE_INTEGER:    return "WIDE_INTEGER";
+    case bitc::CST_CODE_FLOAT:           return "FLOAT";
+    case bitc::CST_CODE_AGGREGATE:       return "AGGREGATE";
+    case bitc::CST_CODE_STRING:          return "STRING";
+    case bitc::CST_CODE_CSTRING:         return "CSTRING";
+    case bitc::CST_CODE_CE_BINOP:        return "CE_BINOP";
+    case bitc::CST_CODE_CE_CAST:         return "CE_CAST";
+    case bitc::CST_CODE_CE_GEP:          return "CE_GEP";
+    case bitc::CST_CODE_CE_INBOUNDS_GEP: return "CE_INBOUNDS_GEP";
+    case bitc::CST_CODE_CE_SELECT:       return "CE_SELECT";
+    case bitc::CST_CODE_CE_EXTRACTELT:   return "CE_EXTRACTELT";
+    case bitc::CST_CODE_CE_INSERTELT:    return "CE_INSERTELT";
+    case bitc::CST_CODE_CE_SHUFFLEVEC:   return "CE_SHUFFLEVEC";
+    case bitc::CST_CODE_CE_CMP:          return "CE_CMP";
+    case bitc::CST_CODE_INLINEASM:       return "INLINEASM";
+    case bitc::CST_CODE_CE_SHUFVEC_EX:   return "CE_SHUFVEC_EX";
+    }
+  case bitc::FUNCTION_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case bitc::FUNC_CODE_DECLAREBLOCKS: return "DECLAREBLOCKS";
+
+    case bitc::FUNC_CODE_INST_BINOP:        return "INST_BINOP";
+    case bitc::FUNC_CODE_INST_CAST:         return "INST_CAST";
+    case bitc::FUNC_CODE_INST_GEP:          return "INST_GEP";
+    case bitc::FUNC_CODE_INST_INBOUNDS_GEP: return "INST_INBOUNDS_GEP";
+    case bitc::FUNC_CODE_INST_SELECT:       return "INST_SELECT";
+    case bitc::FUNC_CODE_INST_EXTRACTELT:   return "INST_EXTRACTELT";
+    case bitc::FUNC_CODE_INST_INSERTELT:    return "INST_INSERTELT";
+    case bitc::FUNC_CODE_INST_SHUFFLEVEC:   return "INST_SHUFFLEVEC";
+    case bitc::FUNC_CODE_INST_CMP:          return "INST_CMP";
+
+    case bitc::FUNC_CODE_INST_RET:          return "INST_RET";
+    case bitc::FUNC_CODE_INST_BR:           return "INST_BR";
+    case bitc::FUNC_CODE_INST_SWITCH:       return "INST_SWITCH";
+    case bitc::FUNC_CODE_INST_INVOKE:       return "INST_INVOKE";
+    case bitc::FUNC_CODE_INST_UNWIND:       return "INST_UNWIND";
+    case bitc::FUNC_CODE_INST_UNREACHABLE:  return "INST_UNREACHABLE";
+
+    case bitc::FUNC_CODE_INST_PHI:          return "INST_PHI";
+    case bitc::FUNC_CODE_INST_MALLOC:       return "INST_MALLOC";
+    case bitc::FUNC_CODE_INST_FREE:         return "INST_FREE";
+    case bitc::FUNC_CODE_INST_ALLOCA:       return "INST_ALLOCA";
+    case bitc::FUNC_CODE_INST_LOAD:         return "INST_LOAD";
+    case bitc::FUNC_CODE_INST_STORE:        return "INST_STORE";
+    case bitc::FUNC_CODE_INST_CALL:         return "INST_CALL";
+    case bitc::FUNC_CODE_INST_VAARG:        return "INST_VAARG";
+    case bitc::FUNC_CODE_INST_STORE2:       return "INST_STORE2";
+    case bitc::FUNC_CODE_INST_GETRESULT:    return "INST_GETRESULT";
+    case bitc::FUNC_CODE_INST_EXTRACTVAL:   return "INST_EXTRACTVAL";
+    case bitc::FUNC_CODE_INST_INSERTVAL:    return "INST_INSERTVAL";
+    case bitc::FUNC_CODE_INST_CMP2:         return "INST_CMP2";
+    case bitc::FUNC_CODE_INST_VSELECT:      return "INST_VSELECT";
+    case bitc::FUNC_CODE_DEBUG_LOC:         return "DEBUG_LOC";
+    case bitc::FUNC_CODE_DEBUG_LOC_AGAIN:   return "DEBUG_LOC_AGAIN";
+    case bitc::FUNC_CODE_INST_CALL2:        return "INST_CALL2";
+    case bitc::FUNC_CODE_DEBUG_LOC2:        return "DEBUG_LOC2";
+    }
+  case bitc::TYPE_SYMTAB_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case bitc::TST_CODE_ENTRY: return "ENTRY";
+    }
+  case bitc::VALUE_SYMTAB_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case bitc::VST_CODE_ENTRY: return "ENTRY";
+    case bitc::VST_CODE_BBENTRY: return "BBENTRY";
+    }
+  case bitc::METADATA_ATTACHMENT_ID:
+    switch(CodeID) {
+    default:return 0;
+    case bitc::METADATA_ATTACHMENT:  return "METADATA_ATTACHMENT";
+    case bitc::METADATA_ATTACHMENT2: return "METADATA_ATTACHMENT2";
+    }
+  case bitc::METADATA_BLOCK_ID:
+    switch(CodeID) {
+    default:return 0;
+    case bitc::METADATA_STRING:      return "METADATA_STRING";
+    case bitc::METADATA_NODE:        return "METADATA_NODE";
+    case bitc::METADATA_FN_NODE:     return "METADATA_FN_NODE";
+    case bitc::METADATA_NAME:        return "METADATA_NAME";
+    case bitc::METADATA_NAMED_NODE:  return "METADATA_NAMED_NODE";
+    case bitc::METADATA_KIND:        return "METADATA_KIND";
+    case bitc::METADATA_ATTACHMENT:  return "METADATA_ATTACHMENT";
+    case bitc::METADATA_NODE2:       return "METADATA_NODE2";
+    case bitc::METADATA_FN_NODE2:    return "METADATA_FN_NODE2";
+    case bitc::METADATA_NAMED_NODE2: return "METADATA_NAMED_NODE2";
+    }
+  }
+}
+
+struct PerRecordStats {
+  unsigned NumInstances;
+  unsigned NumAbbrev;
+  uint64_t TotalBits;
+
+  PerRecordStats() : NumInstances(0), NumAbbrev(0), TotalBits(0) {}
+};
+
+struct PerBlockIDStats {
+  /// NumInstances - This the number of times this block ID has been seen.
+  unsigned NumInstances;
+
+  /// NumBits - The total size in bits of all of these blocks.
+  uint64_t NumBits;
+
+  /// NumSubBlocks - The total number of blocks these blocks contain.
+  unsigned NumSubBlocks;
+
+  /// NumAbbrevs - The total number of abbreviations.
+  unsigned NumAbbrevs;
+
+  /// NumRecords - The total number of records these blocks contain, and the
+  /// number that are abbreviated.
+  unsigned NumRecords, NumAbbreviatedRecords;
+
+  /// CodeFreq - Keep track of the number of times we see each code.
+  std::vector<PerRecordStats> CodeFreq;
+
+  PerBlockIDStats()
+    : NumInstances(0), NumBits(0),
+      NumSubBlocks(0), NumAbbrevs(0), NumRecords(0), NumAbbreviatedRecords(0) {}
+};
+
+static std::map<unsigned, PerBlockIDStats> BlockIDStats;
+
+
+
+/// Error - All bitcode analysis errors go through this function, making this a
+/// good place to breakpoint if debugging.
+static bool Error(const std::string &Err) {
+  errs() << Err << "\n";
+  return true;
+}
+
+/// ParseBlock - Read a block, updating statistics, etc.
+static bool ParseBlock(BitstreamCursor &Stream, unsigned IndentLevel) {
+  std::string Indent(IndentLevel*2, ' ');
+  uint64_t BlockBitStart = Stream.GetCurrentBitNo();
+  unsigned BlockID = Stream.ReadSubBlockID();
+
+  // Get the statistics for this BlockID.
+  PerBlockIDStats &BlockStats = BlockIDStats[BlockID];
+
+  BlockStats.NumInstances++;
+
+  // BLOCKINFO is a special part of the stream.
+  if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
+    if (Dump) errs() << Indent << "<BLOCKINFO_BLOCK/>\n";
+    if (Stream.ReadBlockInfoBlock())
+      return Error("Malformed BlockInfoBlock");
+    uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
+    BlockStats.NumBits += BlockBitEnd-BlockBitStart;
+    return false;
+  }
+
+  unsigned NumWords = 0;
+  if (Stream.EnterSubBlock(BlockID, &NumWords))
+    return Error("Malformed block record");
+
+  const char *BlockName = 0;
+  if (Dump) {
+    errs() << Indent << "<";
+    if ((BlockName = GetBlockName(BlockID, *Stream.getBitStreamReader())))
+      errs() << BlockName;
+    else
+      errs() << "UnknownBlock" << BlockID;
+
+    if (NonSymbolic && BlockName)
+      errs() << " BlockID=" << BlockID;
+
+    errs() << " NumWords=" << NumWords
+           << " BlockCodeSize=" << Stream.GetAbbrevIDWidth() << ">\n";
+  }
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this block.
+  while (1) {
+    if (Stream.AtEndOfStream())
+      return Error("Premature end of bitstream");
+
+    uint64_t RecordStartBit = Stream.GetCurrentBitNo();
+
+    // Read the code for this record.
+    unsigned AbbrevID = Stream.ReadCode();
+    switch (AbbrevID) {
+    case bitc::END_BLOCK: {
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of block");
+      uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
+      BlockStats.NumBits += BlockBitEnd-BlockBitStart;
+      if (Dump) {
+        errs() << Indent << "</";
+        if (BlockName)
+          errs() << BlockName << ">\n";
+        else
+          errs() << "UnknownBlock" << BlockID << ">\n";
+      }
+      return false;
+    }
+    case bitc::ENTER_SUBBLOCK: {
+      uint64_t SubBlockBitStart = Stream.GetCurrentBitNo();
+      if (ParseBlock(Stream, IndentLevel+1))
+        return true;
+      ++BlockStats.NumSubBlocks;
+      uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo();
+
+      // Don't include subblock sizes in the size of this block.
+      BlockBitStart += SubBlockBitEnd-SubBlockBitStart;
+      break;
+    }
+    case bitc::DEFINE_ABBREV:
+      Stream.ReadAbbrevRecord();
+      ++BlockStats.NumAbbrevs;
+      break;
+    default:
+      Record.clear();
+
+      ++BlockStats.NumRecords;
+      if (AbbrevID != bitc::UNABBREV_RECORD)
+        ++BlockStats.NumAbbreviatedRecords;
+
+      const char *BlobStart = 0;
+      unsigned BlobLen = 0;
+      unsigned Code = Stream.ReadRecord(AbbrevID, Record, BlobStart, BlobLen);
+
+
+
+      // Increment the # occurrences of this code.
+      if (BlockStats.CodeFreq.size() <= Code)
+        BlockStats.CodeFreq.resize(Code+1);
+      BlockStats.CodeFreq[Code].NumInstances++;
+      BlockStats.CodeFreq[Code].TotalBits +=
+        Stream.GetCurrentBitNo()-RecordStartBit;
+      if (AbbrevID != bitc::UNABBREV_RECORD)
+        BlockStats.CodeFreq[Code].NumAbbrev++;
+
+      if (Dump) {
+        errs() << Indent << "  <";
+        if (const char *CodeName =
+              GetCodeName(Code, BlockID, *Stream.getBitStreamReader()))
+          errs() << CodeName;
+        else
+          errs() << "UnknownCode" << Code;
+        if (NonSymbolic &&
+            GetCodeName(Code, BlockID, *Stream.getBitStreamReader()))
+          errs() << " codeid=" << Code;
+        if (AbbrevID != bitc::UNABBREV_RECORD)
+          errs() << " abbrevid=" << AbbrevID;
+
+        for (unsigned i = 0, e = Record.size(); i != e; ++i)
+          errs() << " op" << i << "=" << (int64_t)Record[i];
+
+        errs() << "/>";
+
+        if (BlobStart) {
+          errs() << " blob data = ";
+          bool BlobIsPrintable = true;
+          for (unsigned i = 0; i != BlobLen; ++i)
+            if (!isprint(BlobStart[i])) {
+              BlobIsPrintable = false;
+              break;
+            }
+
+          if (BlobIsPrintable)
+            errs() << "'" << std::string(BlobStart, BlobStart+BlobLen) <<"'";
+          else
+            errs() << "unprintable, " << BlobLen << " bytes.";
+        }
+
+        errs() << "\n";
+      }
+
+      break;
+    }
+  }
+}
+
+static void PrintSize(double Bits) {
+  fprintf(stderr, "%.2f/%.2fB/%luW", Bits, Bits/8,(unsigned long)(Bits/32));
+}
+static void PrintSize(uint64_t Bits) {
+  fprintf(stderr, "%lub/%.2fB/%luW", (unsigned long)Bits,
+          (double)Bits/8, (unsigned long)(Bits/32));
+}
+
+
+/// AnalyzeBitcode - Analyze the bitcode file specified by InputFilename.
+static int AnalyzeBitcode() {
+  // Read the input file.
+  OwningPtr<MemoryBuffer> MemBuf;
+
+  if (error_code ec =
+        MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), MemBuf))
+    return Error("Error reading '" + InputFilename + "': " + ec.message());
+
+  if (MemBuf->getBufferSize() & 3)
+    return Error("Bitcode stream should be a multiple of 4 bytes in length");
+
+  unsigned char *BufPtr = (unsigned char *)MemBuf->getBufferStart();
+  unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
+
+  // If we have a wrapper header, parse it and ignore the non-bc file contents.
+  // The magic number is 0x0B17C0DE stored in little endian.
+  if (isBitcodeWrapper(BufPtr, EndBufPtr))
+    if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr))
+      return Error("Invalid bitcode wrapper header");
+
+  BitstreamReader StreamFile(BufPtr, EndBufPtr);
+  BitstreamCursor Stream(StreamFile);
+  StreamFile.CollectBlockInfoNames();
+
+  // Read the stream signature.
+  char Signature[6];
+  Signature[0] = Stream.Read(8);
+  Signature[1] = Stream.Read(8);
+  Signature[2] = Stream.Read(4);
+  Signature[3] = Stream.Read(4);
+  Signature[4] = Stream.Read(4);
+  Signature[5] = Stream.Read(4);
+
+  // Autodetect the file contents, if it is one we know.
+  CurStreamType = UnknownBitstream;
+  if (Signature[0] == 'B' && Signature[1] == 'C' &&
+      Signature[2] == 0x0 && Signature[3] == 0xC &&
+      Signature[4] == 0xE && Signature[5] == 0xD)
+    CurStreamType = LLVMIRBitstream;
+
+  unsigned NumTopBlocks = 0;
+
+  // Parse the top-level structure.  We only allow blocks at the top-level.
+  while (!Stream.AtEndOfStream()) {
+    unsigned Code = Stream.ReadCode();
+    if (Code != bitc::ENTER_SUBBLOCK)
+      return Error("Invalid record at top-level");
+
+    if (ParseBlock(Stream, 0))
+      return true;
+    ++NumTopBlocks;
+  }
+
+  if (Dump) errs() << "\n\n";
+
+  uint64_t BufferSizeBits = (EndBufPtr-BufPtr)*CHAR_BIT;
+  // Print a summary of the read file.
+  errs() << "Summary of " << InputFilename << ":\n";
+  errs() << "         Total size: ";
+  PrintSize(BufferSizeBits);
+  errs() << "\n";
+  errs() << "        Stream type: ";
+  switch (CurStreamType) {
+  default: assert(0 && "Unknown bitstream type");
+  case UnknownBitstream: errs() << "unknown\n"; break;
+  case LLVMIRBitstream:  errs() << "LLVM IR\n"; break;
+  }
+  errs() << "  # Toplevel Blocks: " << NumTopBlocks << "\n";
+  errs() << "\n";
+
+  // Emit per-block stats.
+  errs() << "Per-block Summary:\n";
+  for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(),
+       E = BlockIDStats.end(); I != E; ++I) {
+    errs() << "  Block ID #" << I->first;
+    if (const char *BlockName = GetBlockName(I->first, StreamFile))
+      errs() << " (" << BlockName << ")";
+    errs() << ":\n";
+
+    const PerBlockIDStats &Stats = I->second;
+    errs() << "      Num Instances: " << Stats.NumInstances << "\n";
+    errs() << "         Total Size: ";
+    PrintSize(Stats.NumBits);
+    errs() << "\n";
+    double pct = (Stats.NumBits * 100.0) / BufferSizeBits;
+    errs() << "    Percent of file: " << format("%2.4f%%", pct) << "\n";
+    if (Stats.NumInstances > 1) {
+      errs() << "       Average Size: ";
+      PrintSize(Stats.NumBits/(double)Stats.NumInstances);
+      errs() << "\n";
+      errs() << "  Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
+             << Stats.NumSubBlocks/(double)Stats.NumInstances << "\n";
+      errs() << "    Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
+             << Stats.NumAbbrevs/(double)Stats.NumInstances << "\n";
+      errs() << "    Tot/Avg Records: " << Stats.NumRecords << "/"
+             << Stats.NumRecords/(double)Stats.NumInstances << "\n";
+    } else {
+      errs() << "      Num SubBlocks: " << Stats.NumSubBlocks << "\n";
+      errs() << "        Num Abbrevs: " << Stats.NumAbbrevs << "\n";
+      errs() << "        Num Records: " << Stats.NumRecords << "\n";
+    }
+    if (Stats.NumRecords) {
+      double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords;
+      errs() << "    Percent Abbrevs: " << format("%2.4f%%", pct) << "\n";
+    }
+    errs() << "\n";
+
+    // Print a histogram of the codes we see.
+    if (!NoHistogram && !Stats.CodeFreq.empty()) {
+      std::vector<std::pair<unsigned, unsigned> > FreqPairs;  // <freq,code>
+      for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i)
+        if (unsigned Freq = Stats.CodeFreq[i].NumInstances)
+          FreqPairs.push_back(std::make_pair(Freq, i));
+      std::stable_sort(FreqPairs.begin(), FreqPairs.end());
+      std::reverse(FreqPairs.begin(), FreqPairs.end());
+
+      errs() << "\tRecord Histogram:\n";
+      fprintf(stderr, "\t\t  Count    # Bits   %% Abv  Record Kind\n");
+      for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) {
+        const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second];
+
+        fprintf(stderr, "\t\t%7d %9lu ", RecStats.NumInstances,
+                (unsigned long)RecStats.TotalBits);
+
+        if (RecStats.NumAbbrev)
+          fprintf(stderr, "%7.2f  ",
+                  (double)RecStats.NumAbbrev/RecStats.NumInstances*100);
+        else
+          fprintf(stderr, "         ");
+
+        if (const char *CodeName =
+              GetCodeName(FreqPairs[i].second, I->first, StreamFile))
+          fprintf(stderr, "%s\n", CodeName);
+        else
+          fprintf(stderr, "UnknownCode%d\n", FreqPairs[i].second);
+      }
+      errs() << "\n";
+
+    }
+  }
+  return 0;
+}
+
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+  cl::ParseCommandLineOptions(argc, argv, "llvm-bcanalyzer file analyzer\n");
+
+  return AnalyzeBitcode();
+}
diff --git a/final/tools/llvm-config/CMakeLists.txt b/final/tools/llvm-config/CMakeLists.txt
new file mode 100644
index 00000000000..b6f42895c17
--- /dev/null
+++ b/final/tools/llvm-config/CMakeLists.txt
@@ -0,0 +1,162 @@
+include(TestBigEndian)
+
+include(FindPerl)
+if( NOT PERL_FOUND )
+  message(FATAL_ERROR "Perl required but not found!")
+endif( NOT PERL_FOUND )
+
+set(PERL ${PERL_EXECUTABLE})
+set(VERSION PACKAGE_VERSION)
+set(PREFIX ${LLVM_BINARY_DIR}) # TODO: Root for `make install'.
+set(abs_top_srcdir ${LLVM_MAIN_SRC_DIR})
+set(abs_top_builddir ${LLVM_BINARY_DIR})
+execute_process(COMMAND date
+  OUTPUT_VARIABLE LLVM_CONFIGTIME
+  OUTPUT_STRIP_TRAILING_WHITESPACE)
+# LLVM_ON_UNIX and LLVM_ON_WIN32 already set.
+# those are set to blank by `autoconf' on MinGW, so it seems they are not required:
+#set(LLVMGCCDIR "")
+#set(LLVMGCC "")
+#set(LLVMGXX "")
+test_big_endian(IS_BIG_ENDIAN)
+if( IS_BIG_ENDIAN )
+  set(ENDIAN "big")
+else( IS_BIG_ENDIAN )
+  set(ENDIAN "little")
+endif( IS_BIG_ENDIAN )
+set(SHLIBEXT ${LTDL_SHLIB_EXT})
+#EXEEXT already set.
+set(OS "${CMAKE_SYSTEM}")
+set(ARCH "${LLVM_NATIVE_ARCH}")
+
+get_system_libs(LLVM_SYSTEM_LIBS_LIST)
+foreach(l ${LLVM_SYSTEM_LIBS_LIST})
+  set(LLVM_SYSTEM_LIBS ${LLVM_SYSTEM_LIBS} "-l${l}")
+endforeach()
+
+foreach(c ${LLVM_TARGETS_TO_BUILD})
+  set(TARGETS_BUILT "${TARGETS_BUILT} ${c}")
+endforeach(c)
+set(TARGETS_TO_BUILD ${TARGETS_BUILT})
+set(TARGET_HAS_JIT "1")  # TODO
+
+# Avoids replacement at config-time:
+set(LLVM_CPPFLAGS "@LLVM_CPPFLAGS@")
+set(LLVM_CFLAGS "@LLVM_CFLAGS@")
+set(LLVM_CXXFLAGS "@LLVM_CXXFLAGS@")
+set(LLVM_LDFLAGS "@LLVM_LDFLAGS@")
+set(LIBS "@LIBS@")
+set(LLVM_BUILDMODE "@LLVM_BUILDMODE@")
+
+configure_file(
+  ${CMAKE_CURRENT_SOURCE_DIR}/llvm-config.in.in
+  ${CMAKE_CURRENT_BINARY_DIR}/llvm-config.in
+  @ONLY
+)
+
+set(LIBDEPS ${CMAKE_CURRENT_BINARY_DIR}/LibDeps.txt)
+set(LIBDEPS_TMP ${CMAKE_CURRENT_BINARY_DIR}/LibDeps.txt.tmp)
+set(FINAL_LIBDEPS ${CMAKE_CURRENT_BINARY_DIR}/FinalLibDeps.txt)
+set(LLVM_CONFIG ${LLVM_TOOLS_BINARY_DIR}/llvm-config)
+set(LLVM_CONFIG_IN ${CMAKE_CURRENT_BINARY_DIR}/llvm-config.in)
+
+if( CMAKE_CROSSCOMPILING )
+  set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY)
+endif()
+
+find_program(NM_PATH nm PATH_SUFFIXES /bin)
+
+if( NOT NM_PATH )
+  message(FATAL_ERROR "`nm' not found")
+endif()
+
+get_property(llvm_libs GLOBAL PROPERTY LLVM_LIBS)
+
+add_custom_command(OUTPUT ${LIBDEPS_TMP}
+  COMMAND ${PERL_EXECUTABLE} ${LLVM_MAIN_SRC_DIR}/utils/GenLibDeps.pl -flat ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/${CMAKE_CFG_INTDIR} ${NM_PATH} > ${LIBDEPS_TMP}
+  DEPENDS ${llvm_libs}
+  COMMENT "Regenerating ${LIBDEPS_TMP}")
+
+add_custom_command(OUTPUT ${LIBDEPS}
+  COMMAND ${CMAKE_COMMAND} -E copy_if_different ${LIBDEPS_TMP} ${LIBDEPS}
+  DEPENDS ${LIBDEPS_TMP}
+  COMMENT "Updating ${LIBDEPS} if necessary...")
+
+# This must stop the build if find-cycles.pl returns error:
+add_custom_command(OUTPUT ${FINAL_LIBDEPS}
+  COMMAND ${CMAKE_COMMAND} -E remove -f ${FINAL_LIBDEPS} ${FINAL_LIBDEPS}.tmp
+  COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/find-cycles.pl < ${LIBDEPS} > ${FINAL_LIBDEPS}.tmp
+  COMMAND ${CMAKE_COMMAND} -E copy ${FINAL_LIBDEPS}.tmp ${FINAL_LIBDEPS}
+  DEPENDS ${LIBDEPS}
+  COMMENT "Checking for cyclic dependencies between LLVM libraries.")
+
+set(C_FLGS "${CMAKE_C_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
+set(CXX_FLGS "${CMAKE_CXX_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
+set(CPP_FLGS "${CMAKE_CPP_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${LLVM_DEFINITIONS}")
+
+# We don't want certain flags on the output of
+# llvm-config --cflags --cxxflags
+macro(remove_option_from_llvm_config option)
+  llvm_replace_compiler_option(C_FLGS "${option}" "")
+  llvm_replace_compiler_option(CXX_FLGS "${option}" "")
+  llvm_replace_compiler_option(CPP_FLGS "${option}" "")
+endmacro(remove_option_from_llvm_config)
+remove_option_from_llvm_config("-pedantic")
+remove_option_from_llvm_config("-Wall")
+remove_option_from_llvm_config("-W")
+
+add_custom_command(OUTPUT ${LLVM_CONFIG}
+  COMMAND echo 's!@LLVM_CPPFLAGS@!${CPP_FLGS}!' > temp.sed
+  COMMAND echo 's!@LLVM_CFLAGS@!${C_FLGS}!' >> temp.sed
+  COMMAND echo 's!@LLVM_CXXFLAGS@!${CXX_FLGS}!' >> temp.sed
+  # TODO: Use general flags for linking! not just for shared libs:
+  COMMAND echo 's!@LLVM_LDFLAGS@!${CMAKE_SHARED_LINKER_FLAGS}!' >> temp.sed
+  COMMAND echo 's!@LIBS@!${LLVM_SYSTEM_LIBS}!' >> temp.sed
+  COMMAND echo 's!@LLVM_BUILDMODE@!${CMAKE_BUILD_TYPE}!' >> temp.sed
+  COMMAND sed -f temp.sed < ${LLVM_CONFIG_IN} > ${LLVM_CONFIG}
+  COMMAND ${CMAKE_COMMAND} -E remove -f temp.sed
+  COMMAND cat ${FINAL_LIBDEPS} >> ${LLVM_CONFIG}
+  COMMAND chmod +x ${LLVM_CONFIG}
+  DEPENDS ${FINAL_LIBDEPS} ${LLVM_CONFIG_IN}
+  COMMENT "Building llvm-config script."
+  )
+
+add_custom_target(llvm-config.target ALL
+  DEPENDS ${LLVM_CONFIG})
+
+add_dependencies( llvm-config.target ${llvm_libs} )
+
+# Make sure that llvm-config builds before the llvm tools, so we have
+# LibDeps.txt and can use it for updating the hard-coded library
+# dependencies on cmake/modules/LLVMLibDeps.cmake when the tools'
+# build fail due to outdated dependencies:
+set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} llvm-config.target)
+
+install(FILES ${LLVM_CONFIG}
+  PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE
+  WORLD_READ WORLD_EXECUTE
+  DESTINATION bin)
+
+
+# Regeneration of library dependencies.
+
+# See the comments at the end of cmake/modules/LLVMConfig.cmake for
+# notes and guidelines.
+
+set(LLVMLibDeps ${LLVM_MAIN_SRC_DIR}/cmake/modules/LLVMLibDeps.cmake)
+set(LLVMLibDeps_TMP ${CMAKE_CURRENT_BINARY_DIR}/LLVMLibDeps.cmake.tmp)
+
+add_custom_command(OUTPUT ${LLVMLibDeps_TMP}
+  COMMAND sed -e s'@\\.a@@g' -e s'@\\.so@@g' -e 's@libLLVM@LLVM@g' -e 's@: @ @' -e 's@\\\(.*\\\)@set\(MSVC_LIB_DEPS_\\1\)@' ${FINAL_LIBDEPS} > ${LLVMLibDeps_TMP}
+  COMMAND ${CMAKE_COMMAND} -E copy_if_different ${LLVMLibDeps_TMP} ${LLVMLibDeps}
+  DEPENDS ${FINAL_LIBDEPS}
+  COMMENT "Updating cmake library dependencies file ${LLVMLibDeps}"
+  )
+
+if( LLVM_TARGETS_TO_BUILD STREQUAL LLVM_ALL_TARGETS )
+  add_custom_target(llvmlibdeps.target ALL DEPENDS ${LLVMLibDeps_TMP})
+  add_dependencies(llvmlibdeps.target llvm-config.target)
+  set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} llvmlibdeps.target)
+endif()
+
+set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} PARENT_SCOPE)
diff --git a/final/tools/llvm-config/Makefile b/final/tools/llvm-config/Makefile
new file mode 100644
index 00000000000..c7f7b3234d6
--- /dev/null
+++ b/final/tools/llvm-config/Makefile
@@ -0,0 +1,131 @@
+##===- tools/llvm-config/Makefile --------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+
+EXTRA_DIST = LibDeps.txt FinalLibDeps.txt llvm-config.in.in find-cycles.pl
+
+include $(LEVEL)/Makefile.common
+
+# If we don't have Perl, we can't generate the library dependencies upon which 
+# llvm-config depends. Therefore, only if we detect perl will we do anything
+# useful.
+ifeq ($(HAVE_PERL),1)
+
+# Combine preprocessor flags (except for -I) and CXX flags.
+SUB_CPPFLAGS = ${CPP.BaseFlags}
+SUB_CFLAGS   = ${CPP.BaseFlags} ${C.Flags}
+SUB_CXXFLAGS = ${CPP.BaseFlags} ${CXX.Flags}
+
+# This is blank for now.  We need to be careful about adding stuff here:
+# LDFLAGS tend not to be portable, and we don't currently require the
+# user to use libtool when linking against LLVM.
+SUB_LDFLAGS = 
+
+FinalLibDeps = $(PROJ_OBJ_DIR)/FinalLibDeps.txt
+LibDeps      = $(PROJ_OBJ_DIR)/LibDeps.txt
+LibDepsTemp  = $(PROJ_OBJ_DIR)/LibDeps.txt.tmp
+GenLibDeps   = $(PROJ_SRC_ROOT)/utils/GenLibDeps.pl
+
+$(LibDepsTemp): $(GenLibDeps) $(LibDir) $(wildcard $(LibDir)/*.a $(LibDir)/*.o)
+	$(Echo) "Regenerating LibDeps.txt.tmp"
+	$(Verb) $(PERL) $(GenLibDeps) -flat $(LibDir) "$(NM_PATH)" > $(LibDepsTemp)
+
+$(LibDeps): $(LibDepsTemp)
+	$(Verb) $(CMP) -s $@ $< || ( $(CP) $< $@ && \
+	  $(EchoCmd) Updated LibDeps.txt because dependencies changed )
+
+# Find all the cyclic dependencies between various LLVM libraries, so we
+# don't have to process them at runtime.
+$(FinalLibDeps): find-cycles.pl $(LibDeps)
+	$(Echo) "Checking for cyclic dependencies between LLVM libraries."
+	$(Verb) $(PERL) $< < $(LibDeps) > $@ || rm -f $@
+
+# Rerun our configure substitutions as needed.
+ConfigInIn = $(PROJ_SRC_DIR)/llvm-config.in.in
+llvm-config.in: $(ConfigInIn) $(ConfigStatusScript)
+	$(Verb) cd $(PROJ_OBJ_ROOT) ; \
+		$(ConfigStatusScript) tools/llvm-config/llvm-config.in
+
+llvm-config-perobj: llvm-config.in $(GenLibDeps) $(LibDir) $(wildcard $(LibDir)/*.a)
+	$(Echo) "Generating llvm-config-perobj"
+	$(Verb) $(PERL) $(GenLibDeps) -perobj -flat $(LibDir) "$(NM_PATH)" >PerobjDeps.txt
+	$(Echo) "Checking for cyclic dependencies between LLVM objects."
+	$(Verb) $(PERL) $(PROJ_SRC_DIR)/find-cycles.pl < PerobjDepsIncl.txt > PerobjDepsInclFinal.txt || rm -f $@
+	$(Verb) $(ECHO) 's/@LLVM_CPPFLAGS@/$(subst /,\/,$(SUB_CPPFLAGS))/' \
+	> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_CFLAGS@/$(subst /,\/,$(SUB_CFLAGS))/' \
+	>> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_CXXFLAGS@/$(subst /,\/,$(SUB_CXXFLAGS))/' \
+	>> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_LDFLAGS@/$(subst /,\/,$(SUB_LDFLAGS))/' \
+	>> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_BUILDMODE@/$(subst /,\/,$(BuildMode))/' \
+	>> temp.sed
+	$(Verb) $(SED) -f temp.sed < $< > $@
+	$(Verb) $(RM) temp.sed
+	$(Verb) cat PerobjDepsFinal.txt >> $@
+	$(Verb) chmod +x $@
+
+llvm-config-perobjincl: llvm-config.in $(GenLibDeps) $(LibDir) $(wildcard $(LibDir)/*.a)
+	$(Echo) "Generating llvm-config-perobjincl"
+	$(Verb) $(PERL) $(GenLibDeps) -perobj -perobjincl -flat $(LibDir) "$(NM_PATH)" >PerobjDepsIncl.txt
+	$(Echo) "Checking for cyclic dependencies between LLVM objects."
+	$(Verb) $(PERL) $(PROJ_SRC_DIR)/find-cycles.pl < PerobjDepsIncl.txt > PerobjDepsInclFinal.txt
+	$(Verb) $(ECHO) 's/@LLVM_CPPFLAGS@/$(subst /,\/,$(SUB_CPPFLAGS))/' \
+	> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_CFLAGS@/$(subst /,\/,$(SUB_CFLAGS))/' \
+	>> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_CXXFLAGS@/$(subst /,\/,$(SUB_CXXFLAGS))/' \
+	>> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_LDFLAGS@/$(subst /,\/,$(SUB_LDFLAGS))/' \
+	>> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_BUILDMODE@/$(subst /,\/,$(BuildMode))/' \
+	>> temp.sed
+	$(Verb) $(SED) -f temp.sed < $< > $@
+	$(Verb) $(RM) temp.sed
+	$(Verb) cat PerobjDepsInclFinal.txt >> $@
+	$(Verb) chmod +x $@
+
+# Build our final script.
+$(ToolDir)/llvm-config: llvm-config.in $(FinalLibDeps)
+	$(Echo) "Building llvm-config script."
+	$(Verb) $(ECHO) 's/@LLVM_CPPFLAGS@/$(subst /,\/,$(SUB_CPPFLAGS))/' \
+	  > temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_CFLAGS@/$(subst /,\/,$(SUB_CFLAGS))/' \
+	  >> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_CXXFLAGS@/$(subst /,\/,$(SUB_CXXFLAGS))/' \
+	  >> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_LDFLAGS@/$(subst /,\/,$(SUB_LDFLAGS))/' \
+	  >> temp.sed
+	$(Verb) $(ECHO) 's/@LLVM_BUILDMODE@/$(subst /,\/,$(BuildMode))/' \
+	  >> temp.sed
+	$(Verb) $(SED) -f temp.sed < $< > $@
+	$(Verb) $(RM) temp.sed
+	$(Verb) cat $(FinalLibDeps) >> $@
+	$(Verb) chmod +x $@
+
+else
+# We don't have perl, just generate a dummy llvm-config
+$(ToolDir)/llvm-config:
+	$(Echo) "Building place holder llvm-config script."
+	$(Verb) $(ECHO) 'echo llvm-config: Perl not found so llvm-config could not be generated' >> $@
+	$(Verb) chmod +x $@
+
+endif
+# Hook into the standard Makefile rules.
+all-local:: $(ToolDir)/llvm-config
+clean-local::
+	$(Verb) $(RM) -f $(ToolDir)/llvm-config llvm-config.in $(FinalLibDeps) \
+	  $(LibDeps) GenLibDeps.out
+install-local:: all-local
+	$(Echo) Installing llvm-config
+	$(Verb) $(MKDIR) $(DESTDIR)$(PROJ_bindir)
+	$(Verb) $(ScriptInstall) $(ToolDir)/llvm-config $(DESTDIR)$(PROJ_bindir)
+
diff --git a/final/tools/llvm-config/find-cycles.pl b/final/tools/llvm-config/find-cycles.pl
new file mode 100755
index 00000000000..5cbf5b4b277
--- /dev/null
+++ b/final/tools/llvm-config/find-cycles.pl
@@ -0,0 +1,170 @@
+#!/usr/bin/perl
+#
+# Program:  find-cycles.pl
+#
+# Synopsis: Given a list of possibly cyclic dependencies, merge all the
+#           cycles.  This makes it possible to topologically sort the
+#           dependencies between different parts of LLVM.
+#
+# Syntax:   find-cycles.pl < LibDeps.txt > FinalLibDeps.txt
+#
+# Input:    cycmem1: cycmem2 dep1 dep2
+#           cycmem2: cycmem1 dep3 dep4
+#           boring: dep4
+#
+# Output:   cycmem1 cycmem2: dep1 dep2 dep3 dep4
+#           boring: dep4
+#
+# This file was written by Eric Kidd, and is placed into the public domain.
+#
+
+use 5.006;
+use strict;
+use warnings;
+
+my %DEPS;
+my @CYCLES;
+sub find_all_cycles;
+
+# Read our dependency information.
+while (<>) {
+    chomp;
+    my ($module, $dependency_str) = /^\s*([^:]+):\s*(.*)\s*$/;
+    die "Malformed data: $_" unless defined $dependency_str;
+    my @dependencies = split(/ /, $dependency_str);
+    $DEPS{$module} = \@dependencies;
+}
+
+# Partition our raw dependencies into sets of cyclically-connected nodes.
+find_all_cycles();
+
+# Print out the finished cycles, with their dependencies.
+my @output;
+my $cycles_found = 0;
+foreach my $cycle (@CYCLES) {
+    my @modules = sort keys %{$cycle};
+
+    # Merge the dependencies of all modules in this cycle.
+    my %dependencies;
+    foreach my $module (@modules) {
+        @dependencies{@{$DEPS{$module}}} = 1;
+    }
+
+    # Prune the known cyclic dependencies.
+    foreach my $module (@modules) {
+        delete $dependencies{$module};
+    }
+
+    # Warn about possible linker problems.
+    my @archives = grep(/\.a$/, @modules);
+    if (@archives > 1) {
+        $cycles_found = $cycles_found + 1;
+        print STDERR "find-cycles.pl: Circular dependency between *.a files:\n";
+        print STDERR "find-cycles.pl:   ", join(' ', @archives), "\n";
+        push @modules, @archives; # WORKAROUND: Duplicate *.a files. Ick.
+    } elsif (@modules > 1) {
+        $cycles_found = $cycles_found + 1;
+        print STDERR "find-cycles.pl: Circular dependency between *.o files:\n";
+        print STDERR "find-cycles.pl:   ", join(' ', @modules), "\n";
+        push @modules, @modules; # WORKAROUND: Duplicate *.o files. Ick.
+    }
+
+    # Add to our output.  (@modules is already as sorted as we need it to be.)
+    push @output, (join(' ', @modules) . ': ' .
+                   join(' ', sort keys %dependencies) . "\n");
+}
+print sort @output;
+
+exit $cycles_found;
+
+#==========================================================================
+#  Depedency Cycle Support
+#==========================================================================
+#  For now, we have cycles in our dependency graph.  Ideally, each cycle
+#  would be collapsed down to a single *.a file, saving us all this work.
+#
+#  To understand this code, you'll need a working knowledge of Perl 5,
+#  and possibly some quality time with 'man perlref'.
+
+my %SEEN;
+my %CYCLES;
+sub find_cycles ($@);
+sub found_cycles ($@);
+
+sub find_all_cycles {
+    # Find all multi-item cycles.
+    my @modules = sort keys %DEPS;
+    foreach my $module (@modules) { find_cycles($module); }
+
+    # Build fake one-item "cycles" for the remaining modules, so we can
+    # treat them uniformly.
+    foreach my $module (@modules) {
+        unless (defined $CYCLES{$module}) {
+            my %cycle = ($module, 1);
+            $CYCLES{$module} = \%cycle;
+        }
+    }
+
+    # Find all our unique cycles.  We have to do this the hard way because
+    # we apparently can't store hash references as hash keys without making
+    # 'strict refs' sad.
+    my %seen;
+    foreach my $cycle (values %CYCLES) {
+        unless ($seen{$cycle}) {
+            $seen{$cycle} = 1;
+            push @CYCLES, $cycle;
+        }
+    }
+}
+
+# Walk through our graph depth-first (keeping a trail in @path), and report
+# any cycles we find.
+sub find_cycles ($@) {
+    my ($module, @path) = @_;
+    if (str_in_list($module, @path)) {
+        found_cycle($module, @path);
+    } else {
+        return if defined $SEEN{$module};
+        $SEEN{$module} = 1;
+        foreach my $dep (@{$DEPS{$module}}) {
+            find_cycles($dep, @path, $module);
+        }
+    }
+}
+
+# Give a cycle, attempt to merge it with pre-existing cycle data.
+sub found_cycle ($@) {
+    my ($module, @path) = @_;
+
+    # Pop any modules which aren't part of our cycle.
+    while ($path[0] ne $module) { shift @path; }
+    #print join("->", @path, $module) . "\n";
+
+    # Collect the modules in our cycle into a hash.
+    my %cycle;
+    foreach my $item (@path) {
+        $cycle{$item} = 1;
+        if (defined $CYCLES{$item}) {
+            # Looks like we intersect with an existing cycle, so merge
+            # all those in, too.
+            foreach my $old_item (keys %{$CYCLES{$item}}) {
+                $cycle{$old_item} = 1;
+            }
+        }
+    }
+
+    # Update our global cycle table.
+    my $cycle_ref = \%cycle;
+    foreach my $item (keys %cycle) {
+        $CYCLES{$item} = $cycle_ref;
+    }
+    #print join(":", sort keys %cycle) . "\n";
+}
+
+sub str_in_list ($@) {
+    my ($str, @list) = @_;
+    foreach my $item (@list) {
+        return 1 if ($item eq $str);
+    }
+    return 0;
+}
diff --git a/final/tools/llvm-config/llvm-config.in.in b/final/tools/llvm-config/llvm-config.in.in
new file mode 100644
index 00000000000..840a10e23a1
--- /dev/null
+++ b/final/tools/llvm-config/llvm-config.in.in
@@ -0,0 +1,463 @@
+#!@PERL@
+##===- tools/llvm-config ---------------------------------------*- perl -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+#
+# Synopsis: Prints out compiler options needed to build against an installed
+#           copy of LLVM.
+#
+# Syntax:   llvm-config OPTIONS... [COMPONENTS...]
+# 
+##===----------------------------------------------------------------------===##
+
+use 5.006;
+use strict;
+use warnings;
+use Cwd 'abs_path';
+
+#---- begin autoconf values ----
+my $PACKAGE_NAME        = q{@PACKAGE_NAME@};
+my $VERSION             = q{@PACKAGE_VERSION@};
+my $PREFIX              = q{@LLVM_PREFIX@};
+my $LLVM_CONFIGTIME     = q{@LLVM_CONFIGTIME@};
+my $LLVM_SRC_ROOT       = q{@abs_top_srcdir@};
+my $LLVM_OBJ_ROOT       = q{@abs_top_builddir@};
+my $ARCH                = lc(q{@ARCH@});
+my $TARGET_TRIPLE       = q{@target@};
+my $TARGETS_TO_BUILD    = q{@TARGETS_TO_BUILD@};
+my $TARGET_HAS_JIT      = q{@TARGET_HAS_JIT@};
+my @TARGETS_BUILT       = map { lc($_) } qw{@TARGETS_TO_BUILD@};
+#---- end autoconf values ----
+
+# Must pretend x86_64 architecture is really x86, otherwise the native backend
+# won't get linked in.
+$ARCH = "x86" if $ARCH eq "x86_64";
+
+#---- begin Makefile values ----
+my $CPPFLAGS            = q{@LLVM_CPPFLAGS@};
+my $CFLAGS              = q{@LLVM_CFLAGS@};
+my $CXXFLAGS            = q{@LLVM_CXXFLAGS@};
+my $LDFLAGS             = q{@LLVM_LDFLAGS@};
+my $SYSTEM_LIBS         = q{@LIBS@};
+my $LLVM_BUILDMODE      = q{@LLVM_BUILDMODE@};
+#---- end Makefile values ----
+
+# Figure out where llvm-config is being run from.  Primarily, we care if it has
+# been installed, or is running from the build directory, which changes the
+# locations of some files.
+
+# Convert the current executable name into its directory (e.g. ".").
+my ($RUN_DIR) = ($0 =~ /^(.*)\/.*$/);
+
+# Turn the directory into an absolute directory on the file system, also pop up
+# from "bin" into the build or prefix dir.
+my $ABS_RUN_DIR = abs_path("$RUN_DIR/..");
+chomp($ABS_RUN_DIR);
+
+# Compute the absolute object directory build, e.g. "foo/llvm/Debug".
+my $ABS_OBJ_ROOT = "$LLVM_OBJ_ROOT/$LLVM_BUILDMODE";
+$ABS_OBJ_ROOT = abs_path("$ABS_OBJ_ROOT") if (-d $ABS_OBJ_ROOT);
+chomp($ABS_OBJ_ROOT);
+
+my $INCLUDEDIR = "$ABS_RUN_DIR/include";
+my $INCLUDEOPTION = "-I$INCLUDEDIR";
+my $LIBDIR     = "$ABS_RUN_DIR/lib";
+my $BINDIR     = "$ABS_RUN_DIR/bin";
+if ($ABS_RUN_DIR eq $ABS_OBJ_ROOT) {
+  # If we are running out of the build directory, the include dir is in the
+  # srcdir.
+  $INCLUDEDIR = "$LLVM_SRC_ROOT/include";
+  # We need include files from both the srcdir and objdir.
+  $INCLUDEOPTION = "-I$INCLUDEDIR -I$LLVM_OBJ_ROOT/include"
+} else {
+  # If installed, ignore the prefix the tree was configured with, use the
+  # current prefix.
+  $PREFIX = $ABS_RUN_DIR;
+}
+
+sub usage;
+sub fix_library_names (@);
+sub fix_library_files (@);
+sub expand_dependencies (@);
+sub name_map_entries;
+
+# Parse our command-line arguments.
+usage if @ARGV == 0;
+my @components;
+my $has_opt = 0;
+my $want_libs = 0;
+my $want_libnames = 0;
+my $want_libfiles = 0;
+my $want_components = 0;
+foreach my $arg (@ARGV) {
+    if ($arg =~ /^-/) {
+        if ($arg eq "--version") {
+            $has_opt = 1; print "$VERSION\n";
+        } elsif ($arg eq "--prefix") {
+            $has_opt = 1; print "$PREFIX\n";
+        } elsif ($arg eq "--bindir") {
+            $has_opt = 1; print "$BINDIR\n";
+        } elsif ($arg eq "--includedir") {
+            $has_opt = 1; print "$INCLUDEDIR\n";
+        } elsif ($arg eq "--libdir") {
+            $has_opt = 1; print "$LIBDIR\n";
+        } elsif ($arg eq "--cppflags") {
+            $has_opt = 1; print "$INCLUDEOPTION $CPPFLAGS\n";
+        } elsif ($arg eq "--cflags") {
+            $has_opt = 1; print "$INCLUDEOPTION $CFLAGS\n";
+        } elsif ($arg eq "--cxxflags") {
+            $has_opt = 1; print "$INCLUDEOPTION $CXXFLAGS\n";
+        } elsif ($arg eq "--ldflags") {
+            $has_opt = 1; print "-L$LIBDIR $LDFLAGS $SYSTEM_LIBS\n";
+        } elsif ($arg eq "--libs") {
+            $has_opt = 1; $want_libs = 1;
+        } elsif ($arg eq "--libnames") {
+            $has_opt = 1; $want_libnames = 1;
+        } elsif ($arg eq "--libfiles") {
+            $has_opt = 1; $want_libfiles = 1;
+        } elsif ($arg eq "--components") {
+            $has_opt = 1; print join(' ', name_map_entries), "\n";
+        } elsif ($arg eq "--targets-built") {
+            $has_opt = 1; print join(' ', @TARGETS_BUILT), "\n";
+        } elsif ($arg eq "--host-target") {
+            $has_opt = 1; print "$TARGET_TRIPLE\n";
+        } elsif ($arg eq "--build-mode") {
+            $has_opt = 1; print "$LLVM_BUILDMODE\n";
+        } elsif ($arg eq "--obj-root") {
+            $has_opt = 1; print abs_path("$LLVM_OBJ_ROOT/");
+        } elsif ($arg eq "--src-root") {
+            $has_opt = 1; print abs_path("$LLVM_SRC_ROOT/");
+        } else {
+            usage();
+        }
+    } else {
+        push @components, $arg;
+    }
+}
+
+# If no options were specified, fail.
+usage unless $has_opt;
+
+# If no components were specified, default to 'all'.
+if (@components == 0) {
+    push @components, 'all';
+}
+
+# Force component names to lower case.
+@components = map lc, @components;
+
+# Handle any arguments which require building our dependency graph.
+if ($want_libs || $want_libnames || $want_libfiles) {
+    my @libs = expand_dependencies(@components);
+    print join(' ', fix_library_names(@libs)), "\n" if ($want_libs);
+    print join(' ',  @libs), "\n" if ($want_libnames);
+    print join(' ', fix_library_files(@libs)), "\n" if ($want_libfiles);
+}
+
+exit 0;
+
+#==========================================================================
+#  Support Routines
+#==========================================================================
+
+sub usage {
+    print STDERR <<__EOD__;
+Usage: llvm-config <OPTION>... [<COMPONENT>...]
+
+Get various configuration information needed to compile programs which use
+LLVM.  Typically called from 'configure' scripts.  Examples:
+  llvm-config --cxxflags
+  llvm-config --ldflags
+  llvm-config --libs engine bcreader scalaropts
+
+Options:
+  --version          Print LLVM version.
+  --prefix           Print the installation prefix.
+  --src-root         Print the source root LLVM was built from.
+  --obj-root         Print the object root used to build LLVM.
+  --bindir           Directory containing LLVM executables.
+  --includedir       Directory containing LLVM headers.
+  --libdir           Directory containing LLVM libraries.
+  --cppflags         C preprocessor flags for files that include LLVM headers.
+  --cflags           C compiler flags for files that include LLVM headers.
+  --cxxflags         C++ compiler flags for files that include LLVM headers.
+  --ldflags          Print Linker flags.
+  --libs             Libraries needed to link against LLVM components.
+  --libnames         Bare library names for in-tree builds.
+  --libfiles         Fully qualified library filenames for makefile depends.
+  --components       List of all possible components.
+  --targets-built    List of all targets currently built.
+  --host-target      Target triple used to configure LLVM.
+  --build-mode       Print build mode of LLVM tree (e.g. Debug or Release).
+Typical components:
+  all                All LLVM libraries (default).
+  backend            Either a native backend or the C backend.
+  engine             Either a native JIT or a bitcode interpreter.
+__EOD__
+    exit(1);
+}
+
+# Use -lfoo instead of libfoo.a whenever possible, and add directories to
+# files which can't be found using -L.
+sub fix_library_names (@) {
+    my @libs = @_;
+    my @result;
+    foreach my $lib (@libs) {
+        # Transform the bare library name appropriately.
+        my ($basename) = ($lib =~ /^lib([^.]*)\.a/);
+        if (defined $basename) {
+            push @result, "-l$basename";
+        } else {
+            push @result, "$LIBDIR/$lib";
+        }
+    }
+    return @result;
+}
+
+# Turn the list of libraries into a list of files.
+sub fix_library_files(@) {
+    my @libs = @_;
+    my @result;
+    foreach my $lib (@libs) {
+        # Transform the bare library name into a filename.
+        push @result, "$LIBDIR/$lib";
+    }
+    return @result;
+}
+
+#==========================================================================
+#  Library Dependency Analysis
+#==========================================================================
+#  Given a few human-readable library names, find all their dependencies
+#  and sort them into an order which the linker will like.  If we packed
+#  our libraries into fewer archives, we could make the linker do much
+#  of this work for us.
+#
+#  Libraries have two different types of names in this code: Human-friendly
+#  "component" names entered on the command-line, and the raw file names
+#  we use internally (and ultimately pass to the linker).
+#
+#  To understand this code, you'll need a working knowledge of Perl 5,
+#  and possibly some quality time with 'man perlref'.
+
+sub load_dependencies;
+sub build_name_map;
+sub have_native_backend;
+sub find_best_engine;
+sub expand_names (@);
+sub find_all_required_sets (@);
+sub find_all_required_sets_helper ($$@);
+
+# Each "set" contains one or more libraries which must be included as a
+# group (due to cyclic dependencies).  Sets are represented as a Perl array
+# reference pointing to a list of internal library names.
+my @SETS;
+
+# Various mapping tables.
+my %LIB_TO_SET_MAP; # Maps internal library names to their sets.
+my %SET_DEPS;       # Maps sets to a list of libraries they depend on.
+my %NAME_MAP;       # Maps human-entered names to internal names.
+
+# Have our dependencies been loaded yet?
+my $DEPENDENCIES_LOADED = 0;
+
+# Given a list of human-friendly component names, translate them into a
+# complete set of linker arguments.
+sub expand_dependencies (@) {
+    my @libs = @_;
+    load_dependencies;
+    my @required_sets = find_all_required_sets(expand_names(@libs));
+    my @sorted_sets = topologically_sort_sets(@required_sets);
+
+    # Expand the library sets into libraries.
+    my @result;
+    foreach my $set (@sorted_sets) { push @result, @{$set}; }
+    return @result;
+}
+
+# Load in the raw dependency data stored at the end of this file.
+sub load_dependencies {
+    return if $DEPENDENCIES_LOADED;
+    $DEPENDENCIES_LOADED = 1;
+    while (<DATA>) {
+        # Parse our line.
+        my ($libs, $deps) = /^\s*([^:]+):\s*(.*)\s*$/;
+        die "Malformed dependency data" unless defined $deps;
+        my @libs = split(' ', $libs);
+        my @deps = split(' ', $deps);
+
+        # Record our dependency data.
+        my $set = \@libs;
+        push @SETS, $set;
+        foreach my $lib (@libs) { $LIB_TO_SET_MAP{$lib} = $set; }
+        $SET_DEPS{$set} = \@deps;
+    }
+    build_name_map;
+}
+
+# Build a map converting human-friendly component names into internal
+# library names.
+sub build_name_map {
+    # Add entries for all the actual libraries.
+    foreach my $set (@SETS) {
+        foreach my $lib (sort @$set) {
+            my $short_name = $lib;
+            $short_name =~ s/^(lib)?LLVM([^.]*)\..*$/$2/;
+            $short_name =~ tr/A-Z/a-z/;
+            $NAME_MAP{$short_name} = [$lib];
+        }
+    }
+
+    # Add target-specific entries
+    foreach my $target (@TARGETS_BUILT) {
+        # FIXME: Temporary, until we don't switch all targets
+        if (defined $NAME_MAP{$target.'asmprinter'}) {
+            $NAME_MAP{$target} = [$target.'info',
+                                  $target.'asmprinter', 
+                                  $target.'codegen']
+        } elsif (defined $NAME_MAP{$target.'codegen'}) {
+          $NAME_MAP{$target} = [$target.'info',
+                                $target.'codegen']
+        } else {
+            $NAME_MAP{$target} = [$target.'info',
+                                  $NAME_MAP{$target}[0]]
+        }
+
+        if (defined $NAME_MAP{$target.'asmparser'}) {
+            push @{$NAME_MAP{$target}},$target.'asmparser'
+        }
+
+        if (defined $NAME_MAP{$target.'disassembler'}) {
+            push @{$NAME_MAP{$target}},$target.'disassembler'
+        }
+    }
+
+    # Add virtual entries.
+    $NAME_MAP{'native'}  = have_native_backend() ? [$ARCH] : [];
+    $NAME_MAP{'nativecodegen'} = have_native_backend() ? [$ARCH.'codegen'] : [];
+    $NAME_MAP{'backend'} = have_native_backend() ? ['native'] : ['cbackend'];
+    $NAME_MAP{'engine'}  = find_best_engine;
+    $NAME_MAP{'all'}     = [name_map_entries];   # Must be last.
+}
+
+# Return true if we have a native backend to use.
+sub have_native_backend {
+    my %BUILT;
+    foreach my $target (@TARGETS_BUILT) { $BUILT{$target} = 1; }
+    return defined $NAME_MAP{$ARCH} && defined $BUILT{$ARCH};
+}
+
+# Find a working subclass of ExecutionEngine for this platform.
+sub find_best_engine {
+    if (have_native_backend && $TARGET_HAS_JIT) {
+        return ['jit', 'native'];
+    } else {
+        return ['interpreter'];
+    }
+}
+
+# Get all the human-friendly component names.
+sub name_map_entries {
+    load_dependencies;
+    return sort keys %NAME_MAP;
+}
+
+# Map human-readable names to internal library names.
+sub expand_names (@) {
+    my @names = @_;
+    my @result;
+    foreach my $name (@names) {
+        if (defined $LIB_TO_SET_MAP{$name}) {
+            # We've hit bottom: An actual library name.
+            push @result, $name;
+        } elsif (defined $NAME_MAP{$name}) {
+            # We've found a short name to expand.
+            push @result, expand_names(@{$NAME_MAP{$name}});
+        } else {
+            print STDERR "llvm-config: unknown component name: $name\n";
+            exit(1);
+        }
+    }
+    return @result;
+}
+
+# Given a list of internal library names, return all sets of libraries which
+# will need to be included by the linker (in no particular order).
+sub find_all_required_sets (@) {
+    my @libs = @_;
+    my %sets_added;
+    my @result;
+    find_all_required_sets_helper(\%sets_added, \@result, @libs);
+    return @result;
+}
+
+# Recursive closures are pretty broken in Perl, so we're going to separate
+# this function from find_all_required_sets and pass in the state we need
+# manually, as references.  Yes, this is fairly unpleasant.
+sub find_all_required_sets_helper ($$@) {
+    my ($sets_added, $result, @libs) = @_;
+    foreach my $lib (@libs) {
+        my $set = $LIB_TO_SET_MAP{$lib};
+        next if defined $$sets_added{$set};
+        $$sets_added{$set} = 1;
+        push @$result, $set;
+        find_all_required_sets_helper($sets_added, $result, @{$SET_DEPS{$set}});
+    }
+}
+
+# Print a list of sets, with a label.  Used for debugging.
+sub print_sets ($@) {
+    my ($label, @sets) = @_;
+    my @output;
+    foreach my $set (@sets) { push @output, join(',', @$set); }
+    print "$label: ", join(';', @output), "\n";
+}
+
+# Returns true if $lib is a key in $added.
+sub has_lib_been_added ($$) {
+    my ($added, $lib) = @_;
+    return defined $$added{$LIB_TO_SET_MAP{$lib}};
+}
+
+# Returns true if all the dependencies of $set appear in $added.
+sub have_all_deps_been_added ($$) {
+    my ($added, $set) = @_;
+    #print_sets("  Checking", $set);
+    #print_sets("     Wants", $SET_DEPS{$set});
+    foreach my $lib (@{$SET_DEPS{$set}}) {
+        return 0 unless has_lib_been_added($added, $lib);
+    }
+    return 1;
+}
+
+# Given a list of sets, topologically sort them using dependencies.
+sub topologically_sort_sets (@) {
+    my @sets = @_;
+    my %added;
+    my @result;
+    SCAN: while (@sets) { # We'll delete items from @sets as we go.
+        #print_sets("So far", reverse(@result));
+        #print_sets("Remaining", @sets);
+        for (my $i = 0; $i < @sets; ++$i) {
+            my $set = $sets[$i];
+            if (have_all_deps_been_added(\%added, $set)) {
+                push @result, $set;
+                $added{$set} = 1;
+                #print "Removing $i.\n";
+                splice(@sets, $i, 1);
+                next SCAN; # Restart our scan.
+            }
+        }
+        die "Can't find a library with no dependencies";
+    }
+    return reverse(@result);
+}
+
+# Our library dependency data will be added after the '__END__' token, and will
+# be read through the magic <DATA> filehandle.
+__END__
diff --git a/final/tools/llvm-diff/CMakeLists.txt b/final/tools/llvm-diff/CMakeLists.txt
new file mode 100644
index 00000000000..f6d65c947a5
--- /dev/null
+++ b/final/tools/llvm-diff/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_LINK_COMPONENTS support asmparser bitreader)
+
+add_llvm_tool(llvm-diff
+  llvm-diff.cpp
+  DifferenceEngine.cpp
+  )
diff --git a/final/tools/llvm-diff/DifferenceEngine.cpp b/final/tools/llvm-diff/DifferenceEngine.cpp
new file mode 100644
index 00000000000..b0a24d0737e
--- /dev/null
+++ b/final/tools/llvm-diff/DifferenceEngine.cpp
@@ -0,0 +1,676 @@
+//===-- DifferenceEngine.cpp - Structural function/module comparison ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the implementation of the LLVM difference
+// engine, which structurally compares global values within a module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DifferenceEngine.h"
+
+#include "llvm/Function.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/type_traits.h"
+
+#include <utility>
+
+using namespace llvm;
+
+namespace {
+
+/// A priority queue, implemented as a heap.
+template <class T, class Sorter, unsigned InlineCapacity>
+class PriorityQueue {
+  Sorter Precedes;
+  llvm::SmallVector<T, InlineCapacity> Storage;
+
+public:
+  PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {}
+
+  /// Checks whether the heap is empty.
+  bool empty() const { return Storage.empty(); }
+
+  /// Insert a new value on the heap.
+  void insert(const T &V) {
+    unsigned Index = Storage.size();
+    Storage.push_back(V);
+    if (Index == 0) return;
+
+    T *data = Storage.data();
+    while (true) {
+      unsigned Target = (Index + 1) / 2 - 1;
+      if (!Precedes(data[Index], data[Target])) return;
+      std::swap(data[Index], data[Target]);
+      if (Target == 0) return;
+      Index = Target;
+    }
+  }
+
+  /// Remove the minimum value in the heap.  Only valid on a non-empty heap.
+  T remove_min() {
+    assert(!empty());
+    T tmp = Storage[0];
+    
+    unsigned NewSize = Storage.size() - 1;
+    if (NewSize) {
+      // Move the slot at the end to the beginning.
+      if (isPodLike<T>::value)
+        Storage[0] = Storage[NewSize];
+      else
+        std::swap(Storage[0], Storage[NewSize]);
+
+      // Bubble the root up as necessary.
+      unsigned Index = 0;
+      while (true) {
+        // With a 1-based index, the children would be Index*2 and Index*2+1.
+        unsigned R = (Index + 1) * 2;
+        unsigned L = R - 1;
+
+        // If R is out of bounds, we're done after this in any case.
+        if (R >= NewSize) {
+          // If L is also out of bounds, we're done immediately.
+          if (L >= NewSize) break;
+
+          // Otherwise, test whether we should swap L and Index.
+          if (Precedes(Storage[L], Storage[Index]))
+            std::swap(Storage[L], Storage[Index]);
+          break;
+        }
+
+        // Otherwise, we need to compare with the smaller of L and R.
+        // Prefer R because it's closer to the end of the array.
+        unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R);
+
+        // If Index is >= the min of L and R, then heap ordering is restored.
+        if (!Precedes(Storage[IndexToTest], Storage[Index]))
+          break;
+
+        // Otherwise, keep bubbling up.
+        std::swap(Storage[IndexToTest], Storage[Index]);
+        Index = IndexToTest;
+      }
+    }
+    Storage.pop_back();
+
+    return tmp;
+  }
+};
+
+/// A function-scope difference engine.
+class FunctionDifferenceEngine {
+  DifferenceEngine &Engine;
+
+  /// The current mapping from old local values to new local values.
+  DenseMap<Value*, Value*> Values;
+
+  /// The current mapping from old blocks to new blocks.
+  DenseMap<BasicBlock*, BasicBlock*> Blocks;
+
+  DenseSet<std::pair<Value*, Value*> > TentativeValues;
+
+  unsigned getUnprocPredCount(BasicBlock *Block) const {
+    unsigned Count = 0;
+    for (pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E; ++I)
+      if (!Blocks.count(*I)) Count++;
+    return Count;
+  }
+
+  typedef std::pair<BasicBlock*, BasicBlock*> BlockPair;
+
+  /// A type which sorts a priority queue by the number of unprocessed
+  /// predecessor blocks it has remaining.
+  ///
+  /// This is actually really expensive to calculate.
+  struct QueueSorter {
+    const FunctionDifferenceEngine &fde;
+    explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {}
+
+    bool operator()(const BlockPair &Old, const BlockPair &New) {
+      return fde.getUnprocPredCount(Old.first)
+           < fde.getUnprocPredCount(New.first);
+    }
+  };
+
+  /// A queue of unified blocks to process.
+  PriorityQueue<BlockPair, QueueSorter, 20> Queue;
+
+  /// Try to unify the given two blocks.  Enqueues them for processing
+  /// if they haven't already been processed.
+  ///
+  /// Returns true if there was a problem unifying them.
+  bool tryUnify(BasicBlock *L, BasicBlock *R) {
+    BasicBlock *&Ref = Blocks[L];
+
+    if (Ref) {
+      if (Ref == R) return false;
+
+      Engine.logf("successor %l cannot be equivalent to %r; "
+                  "it's already equivalent to %r")
+        << L << R << Ref;
+      return true;
+    }
+
+    Ref = R;
+    Queue.insert(BlockPair(L, R));
+    return false;
+  }
+  
+  /// Unifies two instructions, given that they're known not to have
+  /// structural differences.
+  void unify(Instruction *L, Instruction *R) {
+    DifferenceEngine::Context C(Engine, L, R);
+
+    bool Result = diff(L, R, true, true);
+    assert(!Result && "structural differences second time around?");
+    (void) Result;
+    if (!L->use_empty())
+      Values[L] = R;
+  }
+
+  void processQueue() {
+    while (!Queue.empty()) {
+      BlockPair Pair = Queue.remove_min();
+      diff(Pair.first, Pair.second);
+    }
+  }
+
+  void diff(BasicBlock *L, BasicBlock *R) {
+    DifferenceEngine::Context C(Engine, L, R);
+
+    BasicBlock::iterator LI = L->begin(), LE = L->end();
+    BasicBlock::iterator RI = R->begin(), RE = R->end();
+
+    llvm::SmallVector<std::pair<Instruction*,Instruction*>, 20> TentativePairs;
+
+    do {
+      assert(LI != LE && RI != RE);
+      Instruction *LeftI = &*LI, *RightI = &*RI;
+
+      // If the instructions differ, start the more sophisticated diff
+      // algorithm at the start of the block.
+      if (diff(LeftI, RightI, false, false)) {
+        TentativeValues.clear();
+        return runBlockDiff(L->begin(), R->begin());
+      }
+
+      // Otherwise, tentatively unify them.
+      if (!LeftI->use_empty())
+        TentativeValues.insert(std::make_pair(LeftI, RightI));
+
+      ++LI, ++RI;
+    } while (LI != LE); // This is sufficient: we can't get equality of
+                        // terminators if there are residual instructions.
+
+    // Unify everything in the block, non-tentatively this time.
+    TentativeValues.clear();
+    for (LI = L->begin(), RI = R->begin(); LI != LE; ++LI, ++RI)
+      unify(&*LI, &*RI);
+  }
+
+  bool matchForBlockDiff(Instruction *L, Instruction *R);
+  void runBlockDiff(BasicBlock::iterator LI, BasicBlock::iterator RI);
+
+  bool diffCallSites(CallSite L, CallSite R, bool Complain) {
+    // FIXME: call attributes
+    if (!equivalentAsOperands(L.getCalledValue(), R.getCalledValue())) {
+      if (Complain) Engine.log("called functions differ");
+      return true;
+    }
+    if (L.arg_size() != R.arg_size()) {
+      if (Complain) Engine.log("argument counts differ");
+      return true;
+    }
+    for (unsigned I = 0, E = L.arg_size(); I != E; ++I)
+      if (!equivalentAsOperands(L.getArgument(I), R.getArgument(I))) {
+        if (Complain)
+          Engine.logf("arguments %l and %r differ")
+            << L.getArgument(I) << R.getArgument(I);
+        return true;
+      }
+    return false;
+  }
+
+  bool diff(Instruction *L, Instruction *R, bool Complain, bool TryUnify) {
+    // FIXME: metadata (if Complain is set)
+
+    // Different opcodes always imply different operations.
+    if (L->getOpcode() != R->getOpcode()) {
+      if (Complain) Engine.log("different instruction types");
+      return true;
+    }
+
+    if (isa<CmpInst>(L)) {
+      if (cast<CmpInst>(L)->getPredicate()
+            != cast<CmpInst>(R)->getPredicate()) {
+        if (Complain) Engine.log("different predicates");
+        return true;
+      }
+    } else if (isa<CallInst>(L)) {
+      return diffCallSites(CallSite(L), CallSite(R), Complain);
+    } else if (isa<PHINode>(L)) {
+      // FIXME: implement.
+
+      // This is really wierd;  type uniquing is broken?
+      if (L->getType() != R->getType()) {
+        if (!L->getType()->isPointerTy() || !R->getType()->isPointerTy()) {
+          if (Complain) Engine.log("different phi types");
+          return true;
+        }
+      }
+      return false;
+
+    // Terminators.
+    } else if (isa<InvokeInst>(L)) {
+      InvokeInst *LI = cast<InvokeInst>(L);
+      InvokeInst *RI = cast<InvokeInst>(R);
+      if (diffCallSites(CallSite(LI), CallSite(RI), Complain))
+        return true;
+
+      if (TryUnify) {
+        tryUnify(LI->getNormalDest(), RI->getNormalDest());
+        tryUnify(LI->getUnwindDest(), RI->getUnwindDest());
+      }
+      return false;
+
+    } else if (isa<BranchInst>(L)) {
+      BranchInst *LI = cast<BranchInst>(L);
+      BranchInst *RI = cast<BranchInst>(R);
+      if (LI->isConditional() != RI->isConditional()) {
+        if (Complain) Engine.log("branch conditionality differs");
+        return true;
+      }
+
+      if (LI->isConditional()) {
+        if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
+          if (Complain) Engine.log("branch conditions differ");
+          return true;
+        }
+        if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1));
+      }
+      if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0));
+      return false;
+
+    } else if (isa<SwitchInst>(L)) {
+      SwitchInst *LI = cast<SwitchInst>(L);
+      SwitchInst *RI = cast<SwitchInst>(R);
+      if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
+        if (Complain) Engine.log("switch conditions differ");
+        return true;
+      }
+      if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest());
+
+      bool Difference = false;
+
+      DenseMap<ConstantInt*,BasicBlock*> LCases;
+      for (unsigned I = 1, E = LI->getNumCases(); I != E; ++I)
+        LCases[LI->getCaseValue(I)] = LI->getSuccessor(I);
+      for (unsigned I = 1, E = RI->getNumCases(); I != E; ++I) {
+        ConstantInt *CaseValue = RI->getCaseValue(I);
+        BasicBlock *LCase = LCases[CaseValue];
+        if (LCase) {
+          if (TryUnify) tryUnify(LCase, RI->getSuccessor(I));
+          LCases.erase(CaseValue);
+        } else if (!Difference) {
+          if (Complain)
+            Engine.logf("right switch has extra case %r") << CaseValue;
+          Difference = true;
+        }
+      }
+      if (!Difference)
+        for (DenseMap<ConstantInt*,BasicBlock*>::iterator
+               I = LCases.begin(), E = LCases.end(); I != E; ++I) {
+          if (Complain)
+            Engine.logf("left switch has extra case %l") << I->first;
+          Difference = true;
+        }
+      return Difference;
+    } else if (isa<UnreachableInst>(L)) {
+      return false;
+    }
+
+    if (L->getNumOperands() != R->getNumOperands()) {
+      if (Complain) Engine.log("instructions have different operand counts");
+      return true;
+    }
+
+    for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
+      Value *LO = L->getOperand(I), *RO = R->getOperand(I);
+      if (!equivalentAsOperands(LO, RO)) {
+        if (Complain) Engine.logf("operands %l and %r differ") << LO << RO;
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  bool equivalentAsOperands(Constant *L, Constant *R) {
+    // Use equality as a preliminary filter.
+    if (L == R)
+      return true;
+
+    if (L->getValueID() != R->getValueID())
+      return false;
+    
+    // Ask the engine about global values.
+    if (isa<GlobalValue>(L))
+      return Engine.equivalentAsOperands(cast<GlobalValue>(L),
+                                         cast<GlobalValue>(R));
+
+    // Compare constant expressions structurally.
+    if (isa<ConstantExpr>(L))
+      return equivalentAsOperands(cast<ConstantExpr>(L),
+                                  cast<ConstantExpr>(R));
+
+    // Nulls of the "same type" don't always actually have the same
+    // type; I don't know why.  Just white-list them.
+    if (isa<ConstantPointerNull>(L))
+      return true;
+
+    // Block addresses only match if we've already encountered the
+    // block.  FIXME: tentative matches?
+    if (isa<BlockAddress>(L))
+      return Blocks[cast<BlockAddress>(L)->getBasicBlock()]
+                 == cast<BlockAddress>(R)->getBasicBlock();
+
+    return false;
+  }
+
+  bool equivalentAsOperands(ConstantExpr *L, ConstantExpr *R) {
+    if (L == R)
+      return true;
+    if (L->getOpcode() != R->getOpcode())
+      return false;
+
+    switch (L->getOpcode()) {
+    case Instruction::ICmp:
+    case Instruction::FCmp:
+      if (L->getPredicate() != R->getPredicate())
+        return false;
+      break;
+
+    case Instruction::GetElementPtr:
+      // FIXME: inbounds?
+      break;
+
+    default:
+      break;
+    }
+
+    if (L->getNumOperands() != R->getNumOperands())
+      return false;
+
+    for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I)
+      if (!equivalentAsOperands(L->getOperand(I), R->getOperand(I)))
+        return false;
+
+    return true;
+  }
+
+  bool equivalentAsOperands(Value *L, Value *R) {
+    // Fall out if the values have different kind.
+    // This possibly shouldn't take priority over oracles.
+    if (L->getValueID() != R->getValueID())
+      return false;
+
+    // Value subtypes:  Argument, Constant, Instruction, BasicBlock,
+    //                  InlineAsm, MDNode, MDString, PseudoSourceValue
+
+    if (isa<Constant>(L))
+      return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R));
+
+    if (isa<Instruction>(L))
+      return Values[L] == R || TentativeValues.count(std::make_pair(L, R));
+
+    if (isa<Argument>(L))
+      return Values[L] == R;
+
+    if (isa<BasicBlock>(L))
+      return Blocks[cast<BasicBlock>(L)] != R;
+
+    // Pretend everything else is identical.
+    return true;
+  }
+
+  // Avoid a gcc warning about accessing 'this' in an initializer.
+  FunctionDifferenceEngine *this_() { return this; }
+
+public:
+  FunctionDifferenceEngine(DifferenceEngine &Engine) :
+    Engine(Engine), Queue(QueueSorter(*this_())) {}
+
+  void diff(Function *L, Function *R) {
+    if (L->arg_size() != R->arg_size())
+      Engine.log("different argument counts");
+
+    // Map the arguments.
+    for (Function::arg_iterator
+           LI = L->arg_begin(), LE = L->arg_end(),
+           RI = R->arg_begin(), RE = R->arg_end();
+         LI != LE && RI != RE; ++LI, ++RI)
+      Values[&*LI] = &*RI;
+
+    tryUnify(&*L->begin(), &*R->begin());
+    processQueue();
+  }
+};
+
+struct DiffEntry {
+  DiffEntry() : Cost(0) {}
+
+  unsigned Cost;
+  llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange
+};
+
+bool FunctionDifferenceEngine::matchForBlockDiff(Instruction *L,
+                                                 Instruction *R) {
+  return !diff(L, R, false, false);
+}
+
+void FunctionDifferenceEngine::runBlockDiff(BasicBlock::iterator LStart,
+                                            BasicBlock::iterator RStart) {
+  BasicBlock::iterator LE = LStart->getParent()->end();
+  BasicBlock::iterator RE = RStart->getParent()->end();
+
+  unsigned NL = std::distance(LStart, LE);
+
+  SmallVector<DiffEntry, 20> Paths1(NL+1);
+  SmallVector<DiffEntry, 20> Paths2(NL+1);
+
+  DiffEntry *Cur = Paths1.data();
+  DiffEntry *Next = Paths2.data();
+
+  const unsigned LeftCost = 2;
+  const unsigned RightCost = 2;
+  const unsigned MatchCost = 0;
+
+  assert(TentativeValues.empty());
+
+  // Initialize the first column.
+  for (unsigned I = 0; I != NL+1; ++I) {
+    Cur[I].Cost = I * LeftCost;
+    for (unsigned J = 0; J != I; ++J)
+      Cur[I].Path.push_back(DifferenceEngine::DC_left);
+  }
+
+  for (BasicBlock::iterator RI = RStart; RI != RE; ++RI) {
+    // Initialize the first row.
+    Next[0] = Cur[0];
+    Next[0].Cost += RightCost;
+    Next[0].Path.push_back(DifferenceEngine::DC_right);
+
+    unsigned Index = 1;
+    for (BasicBlock::iterator LI = LStart; LI != LE; ++LI, ++Index) {
+      if (matchForBlockDiff(&*LI, &*RI)) {
+        Next[Index] = Cur[Index-1];
+        Next[Index].Cost += MatchCost;
+        Next[Index].Path.push_back(DifferenceEngine::DC_match);
+        TentativeValues.insert(std::make_pair(&*LI, &*RI));
+      } else if (Next[Index-1].Cost <= Cur[Index].Cost) {
+        Next[Index] = Next[Index-1];
+        Next[Index].Cost += LeftCost;
+        Next[Index].Path.push_back(DifferenceEngine::DC_left);
+      } else {
+        Next[Index] = Cur[Index];
+        Next[Index].Cost += RightCost;
+        Next[Index].Path.push_back(DifferenceEngine::DC_right);
+      }
+    }
+
+    std::swap(Cur, Next);
+  }
+
+  // We don't need the tentative values anymore; everything from here
+  // on out should be non-tentative.
+  TentativeValues.clear();
+
+  SmallVectorImpl<char> &Path = Cur[NL].Path;
+  BasicBlock::iterator LI = LStart, RI = RStart;
+
+  DifferenceEngine::DiffLogBuilder Diff(Engine);
+
+  // Drop trailing matches.
+  while (Path.back() == DifferenceEngine::DC_match)
+    Path.pop_back();
+
+  // Skip leading matches.
+  SmallVectorImpl<char>::iterator
+    PI = Path.begin(), PE = Path.end();
+  while (PI != PE && *PI == DifferenceEngine::DC_match) {
+    unify(&*LI, &*RI);
+    ++PI, ++LI, ++RI;
+  }
+
+  for (; PI != PE; ++PI) {
+    switch (static_cast<DifferenceEngine::DiffChange>(*PI)) {
+    case DifferenceEngine::DC_match:
+      assert(LI != LE && RI != RE);
+      {
+        Instruction *L = &*LI, *R = &*RI;
+        unify(L, R);
+        Diff.addMatch(L, R);
+      }
+      ++LI; ++RI;
+      break;
+
+    case DifferenceEngine::DC_left:
+      assert(LI != LE);
+      Diff.addLeft(&*LI);
+      ++LI;
+      break;
+
+    case DifferenceEngine::DC_right:
+      assert(RI != RE);
+      Diff.addRight(&*RI);
+      ++RI;
+      break;
+    }
+  }
+
+  // Finishing unifying and complaining about the tails of the block,
+  // which should be matches all the way through.
+  while (LI != LE) {
+    assert(RI != RE);
+    unify(&*LI, &*RI);
+    ++LI, ++RI;
+  }
+
+  // If the terminators have different kinds, but one is an invoke and the
+  // other is an unconditional branch immediately following a call, unify
+  // the results and the destinations.
+  TerminatorInst *LTerm = LStart->getParent()->getTerminator();
+  TerminatorInst *RTerm = RStart->getParent()->getTerminator();
+  if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) {
+    if (cast<BranchInst>(LTerm)->isConditional()) return;
+    BasicBlock::iterator I = LTerm;
+    if (I == LStart->getParent()->begin()) return;
+    --I;
+    if (!isa<CallInst>(*I)) return;
+    CallInst *LCall = cast<CallInst>(&*I);
+    InvokeInst *RInvoke = cast<InvokeInst>(RTerm);
+    if (!equivalentAsOperands(LCall->getCalledValue(), RInvoke->getCalledValue()))
+      return;
+    if (!LCall->use_empty())
+      Values[LCall] = RInvoke;
+    tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest());
+  } else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) {
+    if (cast<BranchInst>(RTerm)->isConditional()) return;
+    BasicBlock::iterator I = RTerm;
+    if (I == RStart->getParent()->begin()) return;
+    --I;
+    if (!isa<CallInst>(*I)) return;
+    CallInst *RCall = cast<CallInst>(I);
+    InvokeInst *LInvoke = cast<InvokeInst>(LTerm);
+    if (!equivalentAsOperands(LInvoke->getCalledValue(), RCall->getCalledValue()))
+      return;
+    if (!LInvoke->use_empty())
+      Values[LInvoke] = RCall;
+    tryUnify(LInvoke->getNormalDest(), RTerm->getSuccessor(0));
+  }
+}
+
+}
+
+void DifferenceEngine::diff(Function *L, Function *R) {
+  Context C(*this, L, R);
+
+  // FIXME: types
+  // FIXME: attributes and CC
+  // FIXME: parameter attributes
+  
+  // If both are declarations, we're done.
+  if (L->empty() && R->empty())
+    return;
+  else if (L->empty())
+    log("left function is declaration, right function is definition");
+  else if (R->empty())
+    log("right function is declaration, left function is definition");
+  else
+    FunctionDifferenceEngine(*this).diff(L, R);
+}
+
+void DifferenceEngine::diff(Module *L, Module *R) {
+  StringSet<> LNames;
+  SmallVector<std::pair<Function*,Function*>, 20> Queue;
+
+  for (Module::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+    Function *LFn = &*I;
+    LNames.insert(LFn->getName());
+
+    if (Function *RFn = R->getFunction(LFn->getName()))
+      Queue.push_back(std::make_pair(LFn, RFn));
+    else
+      logf("function %l exists only in left module") << LFn;
+  }
+
+  for (Module::iterator I = R->begin(), E = R->end(); I != E; ++I) {
+    Function *RFn = &*I;
+    if (!LNames.count(RFn->getName()))
+      logf("function %r exists only in right module") << RFn;
+  }
+
+  for (SmallVectorImpl<std::pair<Function*,Function*> >::iterator
+         I = Queue.begin(), E = Queue.end(); I != E; ++I)
+    diff(I->first, I->second);
+}
+
+bool DifferenceEngine::equivalentAsOperands(GlobalValue *L, GlobalValue *R) {
+  if (globalValueOracle) return (*globalValueOracle)(L, R);
+  return L->getName() == R->getName();
+}
diff --git a/final/tools/llvm-diff/DifferenceEngine.h b/final/tools/llvm-diff/DifferenceEngine.h
new file mode 100644
index 00000000000..6eefb06118f
--- /dev/null
+++ b/final/tools/llvm-diff/DifferenceEngine.h
@@ -0,0 +1,179 @@
+//===-- DifferenceEngine.h - Module comparator ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the interface to the LLVM difference engine,
+// which structurally compares functions within a module.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LLVM_DIFFERENCE_ENGINE_H_
+#define _LLVM_DIFFERENCE_ENGINE_H_
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+
+#include <utility>
+
+namespace llvm {
+  class Function;
+  class GlobalValue;
+  class Instruction;
+  class LLVMContext;
+  class Module;
+  class Twine;
+  class Value;
+
+  /// A class for performing structural comparisons of LLVM assembly.
+  class DifferenceEngine {
+  public:
+    /// A temporary-object class for building up log messages.
+    class LogBuilder {
+      DifferenceEngine &Engine;
+
+      /// The use of a stored StringRef here is okay because
+      /// LogBuilder should be used only as a temporary, and as a
+      /// temporary it will be destructed before whatever temporary
+      /// might be initializing this format.
+      StringRef Format;
+
+      SmallVector<Value*, 4> Arguments;
+
+    public:
+      LogBuilder(DifferenceEngine &Engine, StringRef Format)
+        : Engine(Engine), Format(Format) {}
+
+      LogBuilder &operator<<(Value *V) {
+        Arguments.push_back(V);
+        return *this;
+      }
+
+      ~LogBuilder() {
+        Engine.consumer.logf(*this);
+      }
+
+      StringRef getFormat() const { return Format; }
+
+      unsigned getNumArguments() const { return Arguments.size(); }
+      Value *getArgument(unsigned I) const { return Arguments[I]; }
+    };
+
+    enum DiffChange { DC_match, DC_left, DC_right };
+
+    /// A temporary-object class for building up diff messages.
+    class DiffLogBuilder {
+      typedef std::pair<Instruction*,Instruction*> DiffRecord;
+      SmallVector<DiffRecord, 20> Diff;
+
+      DifferenceEngine &Engine;
+
+    public:
+      DiffLogBuilder(DifferenceEngine &Engine) : Engine(Engine) {}
+      ~DiffLogBuilder() { Engine.consumer.logd(*this); }
+
+      void addMatch(Instruction *L, Instruction *R) {
+        Diff.push_back(DiffRecord(L, R));
+      }
+      void addLeft(Instruction *L) {
+        // HACK: VS 2010 has a bug in the stdlib that requires this.
+        Diff.push_back(DiffRecord(L, DiffRecord::second_type(0)));
+      }
+      void addRight(Instruction *R) {
+        // HACK: VS 2010 has a bug in the stdlib that requires this.
+        Diff.push_back(DiffRecord(DiffRecord::first_type(0), R));
+      }
+
+      unsigned getNumLines() const { return Diff.size(); }
+      DiffChange getLineKind(unsigned I) const {
+        return (Diff[I].first ? (Diff[I].second ? DC_match : DC_left)
+                              : DC_right);
+      }
+      Instruction *getLeft(unsigned I) const { return Diff[I].first; }
+      Instruction *getRight(unsigned I) const { return Diff[I].second; }
+    };
+
+    /// The interface for consumers of difference data.
+    struct Consumer {
+      /// Record that a local context has been entered.  Left and
+      /// Right are IR "containers" of some sort which are being
+      /// considered for structural equivalence: global variables,
+      /// functions, blocks, instructions, etc.
+      virtual void enterContext(Value *Left, Value *Right) = 0;
+
+      /// Record that a local context has been exited.
+      virtual void exitContext() = 0;
+
+      /// Record a difference within the current context.
+      virtual void log(StringRef Text) = 0;
+
+      /// Record a formatted difference within the current context.
+      virtual void logf(const LogBuilder &Log) = 0;
+
+      /// Record a line-by-line instruction diff.
+      virtual void logd(const DiffLogBuilder &Log) = 0;
+
+    protected:
+      virtual ~Consumer() {}
+    };
+
+    /// A RAII object for recording the current context.
+    struct Context {
+      Context(DifferenceEngine &Engine, Value *L, Value *R) : Engine(Engine) {
+        Engine.consumer.enterContext(L, R);
+      }
+
+      ~Context() {
+        Engine.consumer.exitContext();
+      }
+
+    private:
+      DifferenceEngine &Engine;
+    };
+
+    /// An oracle for answering whether two values are equivalent as
+    /// operands.
+    struct Oracle {
+      virtual bool operator()(Value *L, Value *R) = 0;
+
+    protected:
+      virtual ~Oracle() {}
+    };
+
+    DifferenceEngine(LLVMContext &context, Consumer &consumer)
+      : context(context), consumer(consumer), globalValueOracle(0) {}
+
+    void diff(Module *L, Module *R);
+    void diff(Function *L, Function *R);
+
+    void log(StringRef text) {
+      consumer.log(text);
+    }
+
+    LogBuilder logf(StringRef text) {
+      return LogBuilder(*this, text);
+    }
+
+    /// Installs an oracle to decide whether two global values are
+    /// equivalent as operands.  Without an oracle, global values are
+    /// considered equivalent as operands precisely when they have the
+    /// same name.
+    void setGlobalValueOracle(Oracle *oracle) {
+      globalValueOracle = oracle;
+    }
+
+    /// Determines whether two global values are equivalent.
+    bool equivalentAsOperands(GlobalValue *L, GlobalValue *R);
+
+  private:
+    LLVMContext &context;
+    Consumer &consumer;
+    Oracle *globalValueOracle;
+  };
+}
+
+#endif
diff --git a/final/tools/llvm-diff/Makefile b/final/tools/llvm-diff/Makefile
new file mode 100644
index 00000000000..58e49fa9596
--- /dev/null
+++ b/final/tools/llvm-diff/Makefile
@@ -0,0 +1,17 @@
+##===- tools/llvm-diff/Makefile ----------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = llvm-diff
+LINK_COMPONENTS := asmparser bitreader
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvm-diff/llvm-diff.cpp b/final/tools/llvm-diff/llvm-diff.cpp
new file mode 100644
index 00000000000..b932ccc7437
--- /dev/null
+++ b/final/tools/llvm-diff/llvm-diff.cpp
@@ -0,0 +1,314 @@
+//===-- llvm-diff.cpp - Module comparator command-line driver ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the command-line driver for the difference engine.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DifferenceEngine.h"
+
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/IRReader.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SourceMgr.h"
+
+#include <string>
+#include <utility>
+
+
+using namespace llvm;
+
+/// Reads a module from a file.  On error, messages are written to stderr
+/// and null is returned.
+static Module *ReadModule(LLVMContext &Context, StringRef Name) {
+  SMDiagnostic Diag;
+  Module *M = ParseIRFile(Name, Diag, Context);
+  if (!M)
+    Diag.Print("llvmdiff", errs());
+  return M;
+}
+
+namespace {
+  struct DiffContext {
+    DiffContext(Value *L, Value *R)
+      : L(L), R(R), Differences(false), IsFunction(isa<Function>(L)) {}
+    Value *L;
+    Value *R;
+    bool Differences;
+    bool IsFunction;
+    DenseMap<Value*,unsigned> LNumbering;
+    DenseMap<Value*,unsigned> RNumbering;
+  };
+}
+
+static void ComputeNumbering(Function *F, DenseMap<Value*,unsigned> &Numbering){
+  unsigned IN = 0;
+
+  // Arguments get the first numbers.
+  for (Function::arg_iterator
+         AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI)
+    if (!AI->hasName())
+      Numbering[&*AI] = IN++;
+
+  // Walk the basic blocks in order.
+  for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) {
+    if (!FI->hasName())
+      Numbering[&*FI] = IN++;
+
+    // Walk the instructions in order.
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI)
+      // void instructions don't get numbers.
+      if (!BI->hasName() && !BI->getType()->isVoidTy())
+        Numbering[&*BI] = IN++;
+  }
+
+  assert(!Numbering.empty() && "asked for numbering but numbering was no-op");
+}
+
+namespace {
+class DiffConsumer : public DifferenceEngine::Consumer {
+private:
+  raw_ostream &out;
+  Module *LModule;
+  Module *RModule;
+  SmallVector<DiffContext, 5> contexts;
+  bool Differences;
+  unsigned Indent;
+
+  void printValue(Value *V, bool isL) {
+    if (V->hasName()) {
+      out << (isa<GlobalValue>(V) ? '@' : '%') << V->getName();
+      return;
+    }
+    if (V->getType()->isVoidTy()) {
+      if (isa<StoreInst>(V)) {
+        out << "store to ";
+        printValue(cast<StoreInst>(V)->getPointerOperand(), isL);
+      } else if (isa<CallInst>(V)) {
+        out << "call to ";
+        printValue(cast<CallInst>(V)->getCalledValue(), isL);
+      } else if (isa<InvokeInst>(V)) {
+        out << "invoke to ";
+        printValue(cast<InvokeInst>(V)->getCalledValue(), isL);
+      } else {
+        out << *V;
+      }
+      return;
+    }
+
+    unsigned N = contexts.size();
+    while (N > 0) {
+      --N;
+      DiffContext &ctxt = contexts[N];
+      if (!ctxt.IsFunction) continue;
+      if (isL) {
+        if (ctxt.LNumbering.empty())
+          ComputeNumbering(cast<Function>(ctxt.L), ctxt.LNumbering);
+        out << '%' << ctxt.LNumbering[V];
+        return;
+      } else {
+        if (ctxt.RNumbering.empty())
+          ComputeNumbering(cast<Function>(ctxt.R), ctxt.RNumbering);
+        out << '%' << ctxt.RNumbering[V];
+        return;
+      }
+    }
+
+    out << "<anonymous>";
+  }
+
+  void header() {
+    if (contexts.empty()) return;
+    for (SmallVectorImpl<DiffContext>::iterator
+           I = contexts.begin(), E = contexts.end(); I != E; ++I) {
+      if (I->Differences) continue;
+      if (isa<Function>(I->L)) {
+        // Extra newline between functions.
+        if (Differences) out << "\n";
+
+        Function *L = cast<Function>(I->L);
+        Function *R = cast<Function>(I->R);
+        if (L->getName() != R->getName())
+          out << "in function " << L->getName()
+              << " / " << R->getName() << ":\n";
+        else
+          out << "in function " << L->getName() << ":\n";
+      } else if (isa<BasicBlock>(I->L)) {
+        BasicBlock *L = cast<BasicBlock>(I->L);
+        BasicBlock *R = cast<BasicBlock>(I->R);
+        if (L->hasName() && R->hasName() && L->getName() == R->getName())
+          out << "  in block %" << L->getName() << ":\n";
+        else {
+          out << "  in block ";
+          printValue(L, true);
+          out << " / ";
+          printValue(R, false);
+          out << ":\n";
+        }
+      } else if (isa<Instruction>(I->L)) {
+        out << "    in instruction ";
+        printValue(I->L, true);
+        out << " / ";
+        printValue(I->R, false);
+        out << ":\n";
+      }
+
+      I->Differences = true;
+    }
+  }
+
+  void indent() {
+    unsigned N = Indent;
+    while (N--) out << ' ';
+  }
+
+public:
+  DiffConsumer(Module *L, Module *R)
+    : out(errs()), LModule(L), RModule(R), Differences(false), Indent(0) {}
+
+  bool hadDifferences() const { return Differences; }
+
+  void enterContext(Value *L, Value *R) {
+    contexts.push_back(DiffContext(L, R));
+    Indent += 2;
+  }
+  void exitContext() {
+    Differences |= contexts.back().Differences;
+    contexts.pop_back();
+    Indent -= 2;
+  }
+
+  void log(StringRef text) {
+    header();
+    indent();
+    out << text << '\n';
+  }
+
+  void logf(const DifferenceEngine::LogBuilder &Log) {
+    header();
+    indent();
+
+    unsigned arg = 0;
+
+    StringRef format = Log.getFormat();
+    while (true) {
+      size_t percent = format.find('%');
+      if (percent == StringRef::npos) {
+        out << format;
+        break;
+      }
+      assert(format[percent] == '%');
+
+      if (percent > 0) out << format.substr(0, percent);
+
+      switch (format[percent+1]) {
+      case '%': out << '%'; break;
+      case 'l': printValue(Log.getArgument(arg++), true); break;
+      case 'r': printValue(Log.getArgument(arg++), false); break;
+      default: llvm_unreachable("unknown format character");
+      }
+
+      format = format.substr(percent+2);
+    }
+
+    out << '\n';
+  }
+
+  void logd(const DifferenceEngine::DiffLogBuilder &Log) {
+    header();
+
+    for (unsigned I = 0, E = Log.getNumLines(); I != E; ++I) {
+      indent();
+      switch (Log.getLineKind(I)) {
+      case DifferenceEngine::DC_match:
+        out << "  ";
+        Log.getLeft(I)->dump();
+        //printValue(Log.getLeft(I), true);
+        break;
+      case DifferenceEngine::DC_left:
+        out << "< ";
+        Log.getLeft(I)->dump();
+        //printValue(Log.getLeft(I), true);
+        break;
+      case DifferenceEngine::DC_right:
+        out << "> ";
+        Log.getRight(I)->dump();
+        //printValue(Log.getRight(I), false);
+        break;
+      }
+      //out << "\n";
+    }
+  }
+  
+};
+} // end anonymous namespace
+
+static void diffGlobal(DifferenceEngine &Engine, Module *L, Module *R,
+                       StringRef Name) {
+  // Drop leading sigils from the global name.
+  if (Name.startswith("@")) Name = Name.substr(1);
+
+  Function *LFn = L->getFunction(Name);
+  Function *RFn = R->getFunction(Name);
+  if (LFn && RFn)
+    Engine.diff(LFn, RFn);
+  else if (!LFn && !RFn)
+    errs() << "No function named @" << Name << " in either module\n";
+  else if (!LFn)
+    errs() << "No function named @" << Name << " in left module\n";
+  else
+    errs() << "No function named @" << Name << " in right module\n";
+}
+
+static cl::opt<std::string> LeftFilename(cl::Positional,
+                                         cl::desc("<first file>"),
+                                         cl::Required);
+static cl::opt<std::string> RightFilename(cl::Positional,
+                                          cl::desc("<second file>"),
+                                          cl::Required);
+static cl::list<std::string> GlobalsToCompare(cl::Positional,
+                                              cl::desc("<globals to compare>"));
+
+int main(int argc, char **argv) {
+  cl::ParseCommandLineOptions(argc, argv);
+
+  LLVMContext Context;
+  
+  // Load both modules.  Die if that fails.
+  Module *LModule = ReadModule(Context, LeftFilename);
+  Module *RModule = ReadModule(Context, RightFilename);
+  if (!LModule || !RModule) return 1;
+
+  DiffConsumer Consumer(LModule, RModule);
+  DifferenceEngine Engine(Context, Consumer);
+
+  // If any global names were given, just diff those.
+  if (!GlobalsToCompare.empty()) {
+    for (unsigned I = 0, E = GlobalsToCompare.size(); I != E; ++I)
+      diffGlobal(Engine, LModule, RModule, GlobalsToCompare[I]);
+
+  // Otherwise, diff everything in the module.
+  } else {
+    Engine.diff(LModule, RModule);
+  }
+
+  delete LModule;
+  delete RModule;
+
+  return Consumer.hadDifferences();
+}
diff --git a/final/tools/llvm-dis/CMakeLists.txt b/final/tools/llvm-dis/CMakeLists.txt
new file mode 100644
index 00000000000..d62a6b5ec89
--- /dev/null
+++ b/final/tools/llvm-dis/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_LINK_COMPONENTS bitreader)
+set(LLVM_REQUIRES_EH 1)
+
+add_llvm_tool(llvm-dis
+  llvm-dis.cpp
+  )
diff --git a/final/tools/llvm-dis/Makefile b/final/tools/llvm-dis/Makefile
new file mode 100644
index 00000000000..22c9ecc300e
--- /dev/null
+++ b/final/tools/llvm-dis/Makefile
@@ -0,0 +1,17 @@
+##===- tools/llvm-dis/Makefile ------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+
+TOOLNAME = llvm-dis
+LINK_COMPONENTS := bitreader
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvm-dis/llvm-dis.cpp b/final/tools/llvm-dis/llvm-dis.cpp
new file mode 100644
index 00000000000..b4977ced5bd
--- /dev/null
+++ b/final/tools/llvm-dis/llvm-dis.cpp
@@ -0,0 +1,141 @@
+//===-- llvm-dis.cpp - The low-level LLVM disassembler --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This utility may be invoked in the following manner:
+//  llvm-dis [options]      - Read LLVM bitcode from stdin, write asm to stdout
+//  llvm-dis [options] x.bc - Read LLVM bitcode from the x.bc file, write asm
+//                            to the x.ll file.
+//  Options:
+//      --help   - Output information about command line switches
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Override output filename"),
+               cl::value_desc("filename"));
+
+static cl::opt<bool>
+Force("f", cl::desc("Enable binary output on terminals"));
+
+static cl::opt<bool>
+DontPrint("disable-output", cl::desc("Don't output the .ll file"), cl::Hidden);
+
+static cl::opt<bool>
+ShowAnnotations("show-annotations",
+                cl::desc("Add informational comments to the .ll file"));
+
+namespace {
+
+class CommentWriter : public AssemblyAnnotationWriter {
+public:
+  void emitFunctionAnnot(const Function *F,
+                         formatted_raw_ostream &OS) {
+    OS << "; [#uses=" << F->getNumUses() << ']';  // Output # uses
+    OS << '\n';
+  }
+  void printInfoComment(const Value &V, formatted_raw_ostream &OS) {
+    if (V.getType()->isVoidTy()) return;
+
+    OS.PadToColumn(50);
+    OS << "; [#uses=" << V.getNumUses() << ']';  // Output # uses
+  }
+};
+
+} // end anon namespace
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+
+  LLVMContext &Context = getGlobalContext();
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+
+  cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .ll disassembler\n");
+
+  std::string ErrorMessage;
+  std::auto_ptr<Module> M;
+
+  {
+    OwningPtr<MemoryBuffer> BufferPtr;
+    if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, BufferPtr))
+      ErrorMessage = ec.message();
+    else
+      M.reset(ParseBitcodeFile(BufferPtr.get(), Context, &ErrorMessage));
+  }
+
+  if (M.get() == 0) {
+    errs() << argv[0] << ": ";
+    if (ErrorMessage.size())
+      errs() << ErrorMessage << "\n";
+    else
+      errs() << "bitcode didn't read correctly.\n";
+    return 1;
+  }
+
+  // Just use stdout.  We won't actually print anything on it.
+  if (DontPrint)
+    OutputFilename = "-";
+
+  if (OutputFilename.empty()) { // Unspecified output, infer it.
+    if (InputFilename == "-") {
+      OutputFilename = "-";
+    } else {
+      const std::string &IFN = InputFilename;
+      int Len = IFN.length();
+      // If the source ends in .bc, strip it off.
+      if (IFN[Len-3] == '.' && IFN[Len-2] == 'b' && IFN[Len-1] == 'c')
+        OutputFilename = std::string(IFN.begin(), IFN.end()-3)+".ll";
+      else
+        OutputFilename = IFN+".ll";
+    }
+  }
+
+  std::string ErrorInfo;
+  OwningPtr<tool_output_file>
+  Out(new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+                           raw_fd_ostream::F_Binary));
+  if (!ErrorInfo.empty()) {
+    errs() << ErrorInfo << '\n';
+    return 1;
+  }
+
+  OwningPtr<AssemblyAnnotationWriter> Annotator;
+  if (ShowAnnotations)
+    Annotator.reset(new CommentWriter());
+
+  // All that llvm-dis does is write the assembly to a file.
+  if (!DontPrint)
+    M->print(Out->os(), Annotator.get());
+
+  // Declare success.
+  Out->keep();
+
+  return 0;
+}
+
diff --git a/final/tools/llvm-extract/CMakeLists.txt b/final/tools/llvm-extract/CMakeLists.txt
new file mode 100644
index 00000000000..a4e3266e353
--- /dev/null
+++ b/final/tools/llvm-extract/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS asmparser ipo bitreader bitwriter)
+
+add_llvm_tool(llvm-extract
+  llvm-extract.cpp
+  )
diff --git a/final/tools/llvm-extract/Makefile b/final/tools/llvm-extract/Makefile
new file mode 100644
index 00000000000..5672aa3299a
--- /dev/null
+++ b/final/tools/llvm-extract/Makefile
@@ -0,0 +1,18 @@
+##===- tools/llvm-extract/Makefile -------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+
+TOOLNAME = llvm-extract
+LINK_COMPONENTS := ipo bitreader bitwriter asmparser
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvm-extract/llvm-extract.cpp b/final/tools/llvm-extract/llvm-extract.cpp
new file mode 100644
index 00000000000..8c2f43a4f7d
--- /dev/null
+++ b/final/tools/llvm-extract/llvm-extract.cpp
@@ -0,0 +1,174 @@
+//===- llvm-extract.cpp - LLVM function extraction utility ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This utility changes the input module to only contain a single function,
+// which is primarily used for debugging transformations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/IRReader.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <memory>
+using namespace llvm;
+
+// InputFilename - The filename to read from.
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input bitcode file>"),
+              cl::init("-"), cl::value_desc("filename"));
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Specify output filename"),
+               cl::value_desc("filename"), cl::init("-"));
+
+static cl::opt<bool>
+Force("f", cl::desc("Enable binary output on terminals"));
+
+static cl::opt<bool>
+DeleteFn("delete", cl::desc("Delete specified Globals from Module"));
+
+// ExtractFuncs - The functions to extract from the module... 
+static cl::list<std::string>
+ExtractFuncs("func", cl::desc("Specify function to extract"),
+             cl::ZeroOrMore, cl::value_desc("function"));
+
+// ExtractGlobals - The globals to extract from the module...
+static cl::list<std::string>
+ExtractGlobals("glob", cl::desc("Specify global to extract"),
+               cl::ZeroOrMore, cl::value_desc("global"));
+
+static cl::opt<bool>
+OutputAssembly("S",
+               cl::desc("Write output as LLVM assembly"), cl::Hidden);
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+
+  LLVMContext &Context = getGlobalContext();
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+  cl::ParseCommandLineOptions(argc, argv, "llvm extractor\n");
+
+  // Use lazy loading, since we only care about selected global values.
+  SMDiagnostic Err;
+  std::auto_ptr<Module> M;
+  M.reset(getLazyIRFileModule(InputFilename, Err, Context));
+
+  if (M.get() == 0) {
+    Err.Print(argv[0], errs());
+    return 1;
+  }
+
+  std::vector<GlobalValue *> GVs;
+
+  // Figure out which globals we should extract.
+  for (size_t i = 0, e = ExtractGlobals.size(); i != e; ++i) {
+    GlobalValue *GV = M.get()->getNamedGlobal(ExtractGlobals[i]);
+    if (!GV) {
+      errs() << argv[0] << ": program doesn't contain global named '"
+             << ExtractGlobals[i] << "'!\n";
+      return 1;
+    }
+    GVs.push_back(GV);
+  }
+
+  // Figure out which functions we should extract.
+  for (size_t i = 0, e = ExtractFuncs.size(); i != e; ++i) {
+    GlobalValue *GV = M.get()->getFunction(ExtractFuncs[i]);
+    if (!GV) {
+      errs() << argv[0] << ": program doesn't contain function named '"
+             << ExtractFuncs[i] << "'!\n";
+      return 1;
+    }
+    GVs.push_back(GV);
+  }
+
+  // Materialize requisite global values.
+  if (!DeleteFn)
+    for (size_t i = 0, e = GVs.size(); i != e; ++i) {
+      GlobalValue *GV = GVs[i];
+      if (GV->isMaterializable()) {
+        std::string ErrInfo;
+        if (GV->Materialize(&ErrInfo)) {
+          errs() << argv[0] << ": error reading input: " << ErrInfo << "\n";
+          return 1;
+        }
+      }
+    }
+  else {
+    // Deleting. Materialize every GV that's *not* in GVs.
+    SmallPtrSet<GlobalValue *, 8> GVSet(GVs.begin(), GVs.end());
+    for (Module::global_iterator I = M->global_begin(), E = M->global_end();
+         I != E; ++I) {
+      GlobalVariable *G = I;
+      if (!GVSet.count(G) && G->isMaterializable()) {
+        std::string ErrInfo;
+        if (G->Materialize(&ErrInfo)) {
+          errs() << argv[0] << ": error reading input: " << ErrInfo << "\n";
+          return 1;
+        }
+      }
+    }
+    for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) {
+      Function *F = I;
+      if (!GVSet.count(F) && F->isMaterializable()) {
+        std::string ErrInfo;
+        if (F->Materialize(&ErrInfo)) {
+          errs() << argv[0] << ": error reading input: " << ErrInfo << "\n";
+          return 1;
+        }
+      }
+    }
+  }
+
+  // In addition to deleting all other functions, we also want to spiff it
+  // up a little bit.  Do this now.
+  PassManager Passes;
+  Passes.add(new TargetData(M.get())); // Use correct TargetData
+
+  Passes.add(createGVExtractionPass(GVs, DeleteFn));
+  if (!DeleteFn)
+    Passes.add(createGlobalDCEPass());           // Delete unreachable globals
+  Passes.add(createStripDeadDebugInfoPass());    // Remove dead debug info
+  Passes.add(createDeadTypeEliminationPass());   // Remove dead types...
+  Passes.add(createStripDeadPrototypesPass());   // Remove dead func decls
+
+  std::string ErrorInfo;
+  tool_output_file Out(OutputFilename.c_str(), ErrorInfo,
+                       raw_fd_ostream::F_Binary);
+  if (!ErrorInfo.empty()) {
+    errs() << ErrorInfo << '\n';
+    return 1;
+  }
+
+  if (OutputAssembly)
+    Passes.add(createPrintModulePass(&Out.os()));
+  else if (Force || !CheckBitcodeOutputToConsole(Out.os(), true))
+    Passes.add(createBitcodeWriterPass(Out.os()));
+
+  Passes.run(*M.get());
+
+  // Declare success.
+  Out.keep();
+
+  return 0;
+}
diff --git a/final/tools/llvm-ld/CMakeLists.txt b/final/tools/llvm-ld/CMakeLists.txt
new file mode 100644
index 00000000000..370bcb4abf5
--- /dev/null
+++ b/final/tools/llvm-ld/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(LLVM_LINK_COMPONENTS ipo scalaropts linker archive bitwriter)
+
+add_llvm_tool(llvm-ld
+  Optimize.cpp
+  llvm-ld.cpp
+  )
+
+add_dependencies(llvm-ld llvm-stub)
diff --git a/final/tools/llvm-ld/Makefile b/final/tools/llvm-ld/Makefile
new file mode 100644
index 00000000000..1ef9bf11745
--- /dev/null
+++ b/final/tools/llvm-ld/Makefile
@@ -0,0 +1,15 @@
+##===- tools/llvm-ld/Makefile ------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+
+TOOLNAME = llvm-ld
+LINK_COMPONENTS = ipo scalaropts linker archive bitwriter
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvm-ld/Optimize.cpp b/final/tools/llvm-ld/Optimize.cpp
new file mode 100644
index 00000000000..ef4502bab8d
--- /dev/null
+++ b/final/tools/llvm-ld/Optimize.cpp
@@ -0,0 +1,128 @@
+//===- Optimize.cpp - Optimize a complete program -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements all optimization of the linked module for llvm-ld.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/StandardPasses.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/PassNameParser.h"
+#include "llvm/Support/PluginLoader.h"
+using namespace llvm;
+
+// Pass Name Options as generated by the PassNameParser
+static cl::list<const PassInfo*, bool, PassNameParser>
+  OptimizationList(cl::desc("Optimizations available:"));
+
+//Don't verify at the end
+static cl::opt<bool> DontVerify("disable-verify", cl::ReallyHidden);
+
+static cl::opt<bool> DisableInline("disable-inlining",
+  cl::desc("Do not run the inliner pass"));
+
+static cl::opt<bool>
+DisableOptimizations("disable-opt",
+  cl::desc("Do not run any optimization passes"));
+
+static cl::opt<bool> DisableInternalize("disable-internalize",
+  cl::desc("Do not mark all symbols as internal"));
+
+static cl::opt<bool> VerifyEach("verify-each",
+ cl::desc("Verify intermediate results of all passes"));
+
+static cl::alias ExportDynamic("export-dynamic",
+  cl::aliasopt(DisableInternalize),
+  cl::desc("Alias for -disable-internalize"));
+
+static cl::opt<bool> Strip("strip-all", 
+  cl::desc("Strip all symbol info from executable"));
+
+static cl::alias A0("s", cl::desc("Alias for --strip-all"), 
+  cl::aliasopt(Strip));
+
+static cl::opt<bool> StripDebug("strip-debug",
+  cl::desc("Strip debugger symbol info from executable"));
+
+static cl::alias A1("S", cl::desc("Alias for --strip-debug"),
+  cl::aliasopt(StripDebug));
+
+// A utility function that adds a pass to the pass manager but will also add
+// a verifier pass after if we're supposed to verify.
+static inline void addPass(PassManager &PM, Pass *P) {
+  // Add the pass to the pass manager...
+  PM.add(P);
+
+  // If we are verifying all of the intermediate steps, add the verifier...
+  if (VerifyEach)
+    PM.add(createVerifierPass());
+}
+
+namespace llvm {
+
+/// Optimize - Perform link time optimizations. This will run the scalar
+/// optimizations, any loaded plugin-optimization modules, and then the
+/// inter-procedural optimizations if applicable.
+void Optimize(Module* M) {
+
+  // Instantiate the pass manager to organize the passes.
+  PassManager Passes;
+
+  // If we're verifying, start off with a verification pass.
+  if (VerifyEach)
+    Passes.add(createVerifierPass());
+
+  // Add an appropriate TargetData instance for this module...
+  addPass(Passes, new TargetData(M));
+
+  if (!DisableOptimizations)
+    createStandardLTOPasses(&Passes, !DisableInternalize, !DisableInline,
+                            VerifyEach);
+
+  // If the -s or -S command line options were specified, strip the symbols out
+  // of the resulting program to make it smaller.  -s and -S are GNU ld options
+  // that we are supporting; they alias -strip-all and -strip-debug.
+  if (Strip || StripDebug)
+    addPass(Passes, createStripSymbolsPass(StripDebug && !Strip));
+
+  // Create a new optimization pass for each one specified on the command line
+  std::auto_ptr<TargetMachine> target;
+  for (unsigned i = 0; i < OptimizationList.size(); ++i) {
+    const PassInfo *Opt = OptimizationList[i];
+    if (Opt->getNormalCtor())
+      addPass(Passes, Opt->getNormalCtor()());
+    else
+      errs() << "llvm-ld: cannot create pass: " << Opt->getPassName() 
+             << "\n";
+  }
+
+  // The user's passes may leave cruft around. Clean up after them them but
+  // only if we haven't got DisableOptimizations set
+  if (!DisableOptimizations) {
+    addPass(Passes, createInstructionCombiningPass());
+    addPass(Passes, createCFGSimplificationPass());
+    addPass(Passes, createAggressiveDCEPass());
+    addPass(Passes, createGlobalDCEPass());
+  }
+
+  // Make sure everything is still good.
+  if (!DontVerify)
+    Passes.add(createVerifierPass());
+
+  // Run our queue of passes all at once now, efficiently.
+  Passes.run(*M);
+}
+
+}
diff --git a/final/tools/llvm-ld/llvm-ld.cpp b/final/tools/llvm-ld/llvm-ld.cpp
new file mode 100644
index 00000000000..cd6ce256db7
--- /dev/null
+++ b/final/tools/llvm-ld/llvm-ld.cpp
@@ -0,0 +1,732 @@
+//===- llvm-ld.cpp - LLVM 'ld' compatible linker --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This utility is intended to be compatible with GCC, and follows standard
+// system 'ld' conventions.  As such, the default output file is ./a.out.
+// Additionally, this program outputs a shell script that is used to invoke LLI
+// to execute the program.  In this manner, the generated executable (a.out for
+// example), is directly executable, whereas the bitcode file actually lives in
+// the a.out.bc file generated by this program.
+//
+// Note that if someone (or a script) deletes the executable program generated,
+// the .bc file will be left around.  Considering that this is a temporary hack,
+// I'm not too worried about this.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LinkAllVMCore.h"
+#include "llvm/Linker.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Config/config.h"
+#include <memory>
+#include <cstring>
+using namespace llvm;
+
+// Rightly this should go in a header file but it just seems such a waste.
+namespace llvm {
+extern void Optimize(Module*);
+}
+
+// Input/Output Options
+static cl::list<std::string> InputFilenames(cl::Positional, cl::OneOrMore,
+  cl::desc("<input bitcode files>"));
+
+static cl::opt<std::string> OutputFilename("o", cl::init("a.out"),
+  cl::desc("Override output filename"),
+  cl::value_desc("filename"));
+
+static cl::opt<std::string> BitcodeOutputFilename("b", cl::init(""),
+  cl::desc("Override bitcode output filename"),
+  cl::value_desc("filename"));
+
+static cl::opt<bool> Verbose("v",
+  cl::desc("Print information about actions taken"));
+
+static cl::list<std::string> LibPaths("L", cl::Prefix,
+  cl::desc("Specify a library search path"),
+  cl::value_desc("directory"));
+
+static cl::list<std::string> FrameworkPaths("F", cl::Prefix,
+  cl::desc("Specify a framework search path"),
+  cl::value_desc("directory"));
+
+static cl::list<std::string> Libraries("l", cl::Prefix,
+  cl::desc("Specify libraries to link to"),
+  cl::value_desc("library prefix"));
+
+static cl::list<std::string> Frameworks("framework",
+  cl::desc("Specify frameworks to link to"),
+  cl::value_desc("framework"));
+
+// Options to control the linking, optimization, and code gen processes
+static cl::opt<bool> LinkAsLibrary("link-as-library",
+  cl::desc("Link the .bc files together as a library, not an executable"));
+
+static cl::alias Relink("r", cl::aliasopt(LinkAsLibrary),
+  cl::desc("Alias for -link-as-library"));
+
+static cl::opt<bool> Native("native",
+  cl::desc("Generate a native binary instead of a shell script"));
+
+static cl::opt<bool>NativeCBE("native-cbe",
+  cl::desc("Generate a native binary with the C backend and GCC"));
+
+static cl::list<std::string> PostLinkOpts("post-link-opts",
+  cl::value_desc("path"),
+  cl::desc("Run one or more optimization programs after linking"));
+
+static cl::list<std::string> XLinker("Xlinker", cl::value_desc("option"),
+  cl::desc("Pass options to the system linker"));
+
+// Compatibility options that llvm-ld ignores but are supported for
+// compatibility with LD
+static cl::opt<std::string> CO3("soname", cl::Hidden,
+  cl::desc("Compatibility option: ignored"));
+
+static cl::opt<std::string> CO4("version-script", cl::Hidden,
+  cl::desc("Compatibility option: ignored"));
+
+static cl::opt<bool> CO5("eh-frame-hdr", cl::Hidden,
+  cl::desc("Compatibility option: ignored"));
+
+static  cl::opt<std::string> CO6("h", cl::Hidden,
+  cl::desc("Compatibility option: ignored"));
+
+static cl::opt<bool> CO7("start-group", cl::Hidden,
+  cl::desc("Compatibility option: ignored"));
+
+static cl::opt<bool> CO8("end-group", cl::Hidden,
+  cl::desc("Compatibility option: ignored"));
+
+static cl::opt<std::string> CO9("m", cl::Hidden,
+  cl::desc("Compatibility option: ignored"));
+
+/// This is just for convenience so it doesn't have to be passed around
+/// everywhere.
+static std::string progname;
+
+/// FileRemover objects to clean up output files in the event of an error.
+static FileRemover OutputRemover;
+static FileRemover BitcodeOutputRemover;
+
+/// PrintAndExit - Prints a message to standard error and exits with error code
+///
+/// Inputs:
+///  Message  - The message to print to standard error.
+///
+static void PrintAndExit(const std::string &Message, Module *M, int errcode = 1) {
+  errs() << progname << ": " << Message << "\n";
+  delete M;
+  llvm_shutdown();
+  exit(errcode);
+}
+
+static void PrintCommand(const std::vector<const char*> &args) {
+  std::vector<const char*>::const_iterator I = args.begin(), E = args.end();
+  for (; I != E; ++I)
+    if (*I)
+      errs() << "'" << *I << "'" << " ";
+  errs() << "\n";
+}
+
+/// CopyEnv - This function takes an array of environment variables and makes a
+/// copy of it.  This copy can then be manipulated any way the caller likes
+/// without affecting the process's real environment.
+///
+/// Inputs:
+///  envp - An array of C strings containing an environment.
+///
+/// Return value:
+///  NULL - An error occurred.
+///
+///  Otherwise, a pointer to a new array of C strings is returned.  Every string
+///  in the array is a duplicate of the one in the original array (i.e. we do
+///  not copy the char *'s from one array to another).
+///
+static char ** CopyEnv(char ** const envp) {
+  // Count the number of entries in the old list;
+  unsigned entries;   // The number of entries in the old environment list
+  for (entries = 0; envp[entries] != NULL; entries++)
+    /*empty*/;
+
+  // Add one more entry for the NULL pointer that ends the list.
+  ++entries;
+
+  // If there are no entries at all, just return NULL.
+  if (entries == 0)
+    return NULL;
+
+  // Allocate a new environment list.
+  char **newenv = new char* [entries];
+  if (newenv == NULL)
+    return NULL;
+
+  // Make a copy of the list.  Don't forget the NULL that ends the list.
+  entries = 0;
+  while (envp[entries] != NULL) {
+    size_t len = strlen(envp[entries]) + 1;
+    newenv[entries] = new char[len];
+    memcpy(newenv[entries], envp[entries], len);
+    ++entries;
+  }
+  newenv[entries] = NULL;
+
+  return newenv;
+}
+
+
+/// RemoveEnv - Remove the specified environment variable from the environment
+/// array.
+///
+/// Inputs:
+///  name - The name of the variable to remove.  It cannot be NULL.
+///  envp - The array of environment variables.  It cannot be NULL.
+///
+/// Notes:
+///  This is mainly done because functions to remove items from the environment
+///  are not available across all platforms.  In particular, Solaris does not
+///  seem to have an unsetenv() function or a setenv() function (or they are
+///  undocumented if they do exist).
+///
+static void RemoveEnv(const char * name, char ** const envp) {
+  for (unsigned index=0; envp[index] != NULL; index++) {
+    // Find the first equals sign in the array and make it an EOS character.
+    char *p = strchr (envp[index], '=');
+    if (p == NULL)
+      continue;
+    else
+      *p = '\0';
+
+    // Compare the two strings.  If they are equal, zap this string.
+    // Otherwise, restore it.
+    if (!strcmp(name, envp[index]))
+      *envp[index] = '\0';
+    else
+      *p = '=';
+  }
+
+  return;
+}
+
+/// GenerateBitcode - generates a bitcode file from the module provided
+void GenerateBitcode(Module* M, const std::string& FileName) {
+
+  if (Verbose)
+    errs() << "Generating Bitcode To " << FileName << '\n';
+
+  // Create the output file.
+  std::string ErrorInfo;
+  tool_output_file Out(FileName.c_str(), ErrorInfo,
+                       raw_fd_ostream::F_Binary);
+  if (!ErrorInfo.empty()) {
+    PrintAndExit(ErrorInfo, M);
+    return;
+  }
+
+  // Write it out
+  WriteBitcodeToFile(M, Out.os());
+  Out.keep();
+}
+
+/// GenerateAssembly - generates a native assembly language source file from the
+/// specified bitcode file.
+///
+/// Inputs:
+///  InputFilename  - The name of the input bitcode file.
+///  OutputFilename - The name of the file to generate.
+///  llc            - The pathname to use for LLC.
+///  envp           - The environment to use when running LLC.
+///
+/// Return non-zero value on error.
+///
+static int GenerateAssembly(const std::string &OutputFilename,
+                            const std::string &InputFilename,
+                            const sys::Path &llc,
+                            std::string &ErrMsg ) {
+  // Run LLC to convert the bitcode file into assembly code.
+  std::vector<const char*> args;
+  args.push_back(llc.c_str());
+  // We will use GCC to assemble the program so set the assembly syntax to AT&T,
+  // regardless of what the target in the bitcode file is.
+  args.push_back("-x86-asm-syntax=att");
+  args.push_back("-o");
+  args.push_back(OutputFilename.c_str());
+  args.push_back(InputFilename.c_str());
+  args.push_back(0);
+
+  if (Verbose) {
+    errs() << "Generating Assembly With: \n";
+    PrintCommand(args);
+  }
+
+  return sys::Program::ExecuteAndWait(llc, &args[0], 0, 0, 0, 0, &ErrMsg);
+}
+
+/// GenerateCFile - generates a C source file from the specified bitcode file.
+static int GenerateCFile(const std::string &OutputFile,
+                         const std::string &InputFile,
+                         const sys::Path &llc,
+                         std::string& ErrMsg) {
+  // Run LLC to convert the bitcode file into C.
+  std::vector<const char*> args;
+  args.push_back(llc.c_str());
+  args.push_back("-march=c");
+  args.push_back("-o");
+  args.push_back(OutputFile.c_str());
+  args.push_back(InputFile.c_str());
+  args.push_back(0);
+
+  if (Verbose) {
+    errs() << "Generating C Source With: \n";
+    PrintCommand(args);
+  }
+
+  return sys::Program::ExecuteAndWait(llc, &args[0], 0, 0, 0, 0, &ErrMsg);
+}
+
+/// GenerateNative - generates a native object file from the
+/// specified bitcode file.
+///
+/// Inputs:
+///  InputFilename   - The name of the input bitcode file.
+///  OutputFilename  - The name of the file to generate.
+///  NativeLinkItems - The native libraries, files, code with which to link
+///  LibPaths        - The list of directories in which to find libraries.
+///  FrameworksPaths - The list of directories in which to find frameworks.
+///  Frameworks      - The list of frameworks (dynamic libraries)
+///  gcc             - The pathname to use for GGC.
+///  envp            - A copy of the process's current environment.
+///
+/// Outputs:
+///  None.
+///
+/// Returns non-zero value on error.
+///
+static int GenerateNative(const std::string &OutputFilename,
+                          const std::string &InputFilename,
+                          const Linker::ItemList &LinkItems,
+                          const sys::Path &gcc, char ** const envp,
+                          std::string& ErrMsg) {
+  // Remove these environment variables from the environment of the
+  // programs that we will execute.  It appears that GCC sets these
+  // environment variables so that the programs it uses can configure
+  // themselves identically.
+  //
+  // However, when we invoke GCC below, we want it to use its normal
+  // configuration.  Hence, we must sanitize its environment.
+  char ** clean_env = CopyEnv(envp);
+  if (clean_env == NULL)
+    return 1;
+  RemoveEnv("LIBRARY_PATH", clean_env);
+  RemoveEnv("COLLECT_GCC_OPTIONS", clean_env);
+  RemoveEnv("GCC_EXEC_PREFIX", clean_env);
+  RemoveEnv("COMPILER_PATH", clean_env);
+  RemoveEnv("COLLECT_GCC", clean_env);
+
+
+  // Run GCC to assemble and link the program into native code.
+  //
+  // Note:
+  //  We can't just assemble and link the file with the system assembler
+  //  and linker because we don't know where to put the _start symbol.
+  //  GCC mysteriously knows how to do it.
+  std::vector<std::string> args;
+  args.push_back(gcc.c_str());
+  args.push_back("-fno-strict-aliasing");
+  args.push_back("-O3");
+  args.push_back("-o");
+  args.push_back(OutputFilename);
+  args.push_back(InputFilename);
+
+  // Add in the library and framework paths
+  for (unsigned index = 0; index < LibPaths.size(); index++) {
+    args.push_back("-L" + LibPaths[index]);
+  }
+  for (unsigned index = 0; index < FrameworkPaths.size(); index++) {
+    args.push_back("-F" + FrameworkPaths[index]);
+  }
+
+  // Add the requested options
+  for (unsigned index = 0; index < XLinker.size(); index++)
+    args.push_back(XLinker[index]);
+
+  // Add in the libraries to link.
+  for (unsigned index = 0; index < LinkItems.size(); index++)
+    if (LinkItems[index].first != "crtend") {
+      if (LinkItems[index].second)
+        args.push_back("-l" + LinkItems[index].first);
+      else
+        args.push_back(LinkItems[index].first);
+    }
+
+  // Add in frameworks to link.
+  for (unsigned index = 0; index < Frameworks.size(); index++) {
+    args.push_back("-framework");
+    args.push_back(Frameworks[index]);
+  }
+
+  // Now that "args" owns all the std::strings for the arguments, call the c_str
+  // method to get the underlying string array.  We do this game so that the
+  // std::string array is guaranteed to outlive the const char* array.
+  std::vector<const char *> Args;
+  for (unsigned i = 0, e = args.size(); i != e; ++i)
+    Args.push_back(args[i].c_str());
+  Args.push_back(0);
+
+  if (Verbose) {
+    errs() << "Generating Native Executable With:\n";
+    PrintCommand(Args);
+  }
+
+  // Run the compiler to assembly and link together the program.
+  int R = sys::Program::ExecuteAndWait(
+    gcc, &Args[0], const_cast<const char **>(clean_env), 0, 0, 0, &ErrMsg);
+  delete [] clean_env;
+  return R;
+}
+
+/// EmitShellScript - Output the wrapper file that invokes the JIT on the LLVM
+/// bitcode file for the program.
+static void EmitShellScript(char **argv, Module *M) {
+  if (Verbose)
+    errs() << "Emitting Shell Script\n";
+#if defined(_WIN32)
+  // Windows doesn't support #!/bin/sh style shell scripts in .exe files.  To
+  // support windows systems, we copy the llvm-stub.exe executable from the
+  // build tree to the destination file.
+  std::string ErrMsg;
+  sys::Path llvmstub = PrependMainExecutablePath("llvm-stub", argv[0],
+                                                 (void *)(intptr_t)&Optimize);
+  if (llvmstub.isEmpty())
+    PrintAndExit("Could not find llvm-stub.exe executable!", M);
+
+  if (0 != sys::CopyFile(sys::Path(OutputFilename), llvmstub, &ErrMsg))
+    PrintAndExit(ErrMsg, M);
+
+  return;
+#endif
+
+  // Output the script to start the program...
+  std::string ErrorInfo;
+  tool_output_file Out2(OutputFilename.c_str(), ErrorInfo);
+  if (!ErrorInfo.empty())
+    PrintAndExit(ErrorInfo, M);
+
+  Out2.os() << "#!/bin/sh\n";
+  // Allow user to setenv LLVMINTERP if lli is not in their PATH.
+  Out2.os() << "lli=${LLVMINTERP-lli}\n";
+  Out2.os() << "exec $lli \\\n";
+  // gcc accepts -l<lib> and implicitly searches /lib and /usr/lib.
+  LibPaths.push_back("/lib");
+  LibPaths.push_back("/usr/lib");
+  LibPaths.push_back("/usr/X11R6/lib");
+  // We don't need to link in libc! In fact, /usr/lib/libc.so may not be a
+  // shared object at all! See RH 8: plain text.
+  std::vector<std::string>::iterator libc =
+    std::find(Libraries.begin(), Libraries.end(), "c");
+  if (libc != Libraries.end()) Libraries.erase(libc);
+  // List all the shared object (native) libraries this executable will need
+  // on the command line, so that we don't have to do this manually!
+  for (std::vector<std::string>::iterator i = Libraries.begin(),
+         e = Libraries.end(); i != e; ++i) {
+    // try explicit -L arguments first:
+    sys::Path FullLibraryPath;
+    for (cl::list<std::string>::const_iterator P = LibPaths.begin(),
+           E = LibPaths.end(); P != E; ++P) {
+      FullLibraryPath = *P;
+      FullLibraryPath.appendComponent("lib" + *i);
+      FullLibraryPath.appendSuffix(sys::Path::GetDLLSuffix());
+      if (!FullLibraryPath.isEmpty()) {
+        if (!FullLibraryPath.isDynamicLibrary()) {
+          // Not a native shared library; mark as invalid
+          FullLibraryPath = sys::Path();
+        } else break;
+      }
+    }
+    if (FullLibraryPath.isEmpty())
+      FullLibraryPath = sys::Path::FindLibrary(*i);
+    if (!FullLibraryPath.isEmpty())
+      Out2.os() << "    -load=" << FullLibraryPath.str() << " \\\n";
+  }
+  Out2.os() << "    "  << BitcodeOutputFilename << " ${1+\"$@\"}\n";
+  Out2.keep();
+}
+
+// BuildLinkItems -- This function generates a LinkItemList for the LinkItems
+// linker function by combining the Files and Libraries in the order they were
+// declared on the command line.
+static void BuildLinkItems(
+  Linker::ItemList& Items,
+  const cl::list<std::string>& Files,
+  const cl::list<std::string>& Libraries) {
+
+  // Build the list of linkage items for LinkItems.
+
+  cl::list<std::string>::const_iterator fileIt = Files.begin();
+  cl::list<std::string>::const_iterator libIt  = Libraries.begin();
+
+  int libPos = -1, filePos = -1;
+  while ( libIt != Libraries.end() || fileIt != Files.end() ) {
+    if (libIt != Libraries.end())
+      libPos = Libraries.getPosition(libIt - Libraries.begin());
+    else
+      libPos = -1;
+    if (fileIt != Files.end())
+      filePos = Files.getPosition(fileIt - Files.begin());
+    else
+      filePos = -1;
+
+    if (filePos != -1 && (libPos == -1 || filePos < libPos)) {
+      // Add a source file
+      Items.push_back(std::make_pair(*fileIt++, false));
+    } else if (libPos != -1 && (filePos == -1 || libPos < filePos)) {
+      // Add a library
+      Items.push_back(std::make_pair(*libIt++, true));
+    }
+  }
+}
+
+int main(int argc, char **argv, char **envp) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+
+  LLVMContext &Context = getGlobalContext();
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  // Initialize passes
+  PassRegistry &Registry = *PassRegistry::getPassRegistry();
+  initializeCore(Registry);
+  initializeScalarOpts(Registry);
+  initializeIPO(Registry);
+  initializeAnalysis(Registry);
+  initializeIPA(Registry);
+  initializeTransformUtils(Registry);
+  initializeInstCombine(Registry);
+  initializeTarget(Registry);
+
+  // Initial global variable above for convenience printing of program name.
+  progname = sys::path::stem(argv[0]);
+
+  // Parse the command line options
+  cl::ParseCommandLineOptions(argc, argv, "llvm linker\n");
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+  if (!LinkAsLibrary) {
+    // Default to "a.exe" instead of "a.out".
+    if (OutputFilename.getNumOccurrences() == 0)
+      OutputFilename = "a.exe";
+
+    // If there is no suffix add an "exe" one.
+    if (sys::path::extension(OutputFilename).empty())
+      OutputFilename.append(".exe");
+  }
+#endif
+
+  // Generate the bitcode for the optimized module.
+  // If -b wasn't specified, use the name specified
+  // with -o to construct BitcodeOutputFilename.
+  if (BitcodeOutputFilename.empty()) {
+    BitcodeOutputFilename = OutputFilename;
+    if (!LinkAsLibrary) BitcodeOutputFilename += ".bc";
+  }
+
+  // Arrange for the bitcode output file to be deleted on any errors.
+  BitcodeOutputRemover.setFile(sys::Path(BitcodeOutputFilename));
+  sys::RemoveFileOnSignal(sys::Path(BitcodeOutputFilename));
+
+  // Arrange for the output file to be deleted on any errors.
+  if (!LinkAsLibrary) {
+    OutputRemover.setFile(sys::Path(OutputFilename));
+    sys::RemoveFileOnSignal(sys::Path(OutputFilename));
+  }
+
+  // Construct a Linker (now that Verbose is set)
+  Linker TheLinker(progname, OutputFilename, Context, Verbose);
+
+  // Keep track of the native link items (versus the bitcode items)
+  Linker::ItemList NativeLinkItems;
+
+  // Add library paths to the linker
+  TheLinker.addPaths(LibPaths);
+  TheLinker.addSystemPaths();
+
+  // Remove any consecutive duplicates of the same library...
+  Libraries.erase(std::unique(Libraries.begin(), Libraries.end()),
+                  Libraries.end());
+
+  if (LinkAsLibrary) {
+    std::vector<sys::Path> Files;
+    for (unsigned i = 0; i < InputFilenames.size(); ++i )
+      Files.push_back(sys::Path(InputFilenames[i]));
+    if (TheLinker.LinkInFiles(Files))
+      return 1; // Error already printed
+
+    // The libraries aren't linked in but are noted as "dependent" in the
+    // module.
+    for (cl::list<std::string>::const_iterator I = Libraries.begin(),
+         E = Libraries.end(); I != E ; ++I) {
+      TheLinker.getModule()->addLibrary(*I);
+    }
+  } else {
+    // Build a list of the items from our command line
+    Linker::ItemList Items;
+    BuildLinkItems(Items, InputFilenames, Libraries);
+
+    // Link all the items together
+    if (TheLinker.LinkInItems(Items, NativeLinkItems) )
+      return 1; // Error already printed
+  }
+
+  std::auto_ptr<Module> Composite(TheLinker.releaseModule());
+
+  // Optimize the module
+  Optimize(Composite.get());
+
+  // Generate the bitcode output.
+  GenerateBitcode(Composite.get(), BitcodeOutputFilename);
+
+  // If we are not linking a library, generate either a native executable
+  // or a JIT shell script, depending upon what the user wants.
+  if (!LinkAsLibrary) {
+    // If the user wants to run a post-link optimization, run it now.
+    if (!PostLinkOpts.empty()) {
+      std::vector<std::string> opts = PostLinkOpts;
+      for (std::vector<std::string>::iterator I = opts.begin(),
+           E = opts.end(); I != E; ++I) {
+        sys::Path prog(*I);
+        if (!prog.canExecute()) {
+          prog = sys::Program::FindProgramByName(*I);
+          if (prog.isEmpty())
+            PrintAndExit(std::string("Optimization program '") + *I +
+                         "' is not found or not executable.", Composite.get());
+        }
+        // Get the program arguments
+        sys::Path tmp_output("opt_result");
+        std::string ErrMsg;
+        if (tmp_output.createTemporaryFileOnDisk(true, &ErrMsg))
+          PrintAndExit(ErrMsg, Composite.get());
+
+        const char* args[4];
+        args[0] = I->c_str();
+        args[1] = BitcodeOutputFilename.c_str();
+        args[2] = tmp_output.c_str();
+        args[3] = 0;
+        if (0 == sys::Program::ExecuteAndWait(prog, args, 0,0,0,0, &ErrMsg)) {
+          if (tmp_output.isBitcodeFile()) {
+            sys::Path target(BitcodeOutputFilename);
+            target.eraseFromDisk();
+            if (tmp_output.renamePathOnDisk(target, &ErrMsg))
+              PrintAndExit(ErrMsg, Composite.get(), 2);
+          } else
+            PrintAndExit("Post-link optimization output is not bitcode",
+                         Composite.get());
+        } else {
+          PrintAndExit(ErrMsg, Composite.get());
+        }
+      }
+    }
+
+    // If the user wants to generate a native executable, compile it from the
+    // bitcode file.
+    //
+    // Otherwise, create a script that will run the bitcode through the JIT.
+    if (Native) {
+      // Name of the Assembly Language output file
+      sys::Path AssemblyFile ( OutputFilename);
+      AssemblyFile.appendSuffix("s");
+
+      // Mark the output files for removal.
+      FileRemover AssemblyFileRemover(AssemblyFile);
+      sys::RemoveFileOnSignal(AssemblyFile);
+
+      // Determine the locations of the llc and gcc programs.
+      sys::Path llc = PrependMainExecutablePath("llc", argv[0],
+                                                (void *)(intptr_t)&Optimize);
+      if (llc.isEmpty())
+        PrintAndExit("Failed to find llc", Composite.get());
+
+      sys::Path gcc = sys::Program::FindProgramByName("gcc");
+      if (gcc.isEmpty())
+        PrintAndExit("Failed to find gcc", Composite.get());
+
+      // Generate an assembly language file for the bitcode.
+      std::string ErrMsg;
+      if (0 != GenerateAssembly(AssemblyFile.str(), BitcodeOutputFilename,
+          llc, ErrMsg))
+        PrintAndExit(ErrMsg, Composite.get());
+
+      if (0 != GenerateNative(OutputFilename, AssemblyFile.str(),
+                              NativeLinkItems, gcc, envp, ErrMsg))
+        PrintAndExit(ErrMsg, Composite.get());
+    } else if (NativeCBE) {
+      sys::Path CFile (OutputFilename);
+      CFile.appendSuffix("cbe.c");
+
+      // Mark the output files for removal.
+      FileRemover CFileRemover(CFile);
+      sys::RemoveFileOnSignal(CFile);
+
+      // Determine the locations of the llc and gcc programs.
+      sys::Path llc = PrependMainExecutablePath("llc", argv[0],
+                                                (void *)(intptr_t)&Optimize);
+      if (llc.isEmpty())
+        PrintAndExit("Failed to find llc", Composite.get());
+
+      sys::Path gcc = sys::Program::FindProgramByName("gcc");
+      if (gcc.isEmpty())
+        PrintAndExit("Failed to find gcc", Composite.get());
+
+      // Generate an assembly language file for the bitcode.
+      std::string ErrMsg;
+      if (GenerateCFile(CFile.str(), BitcodeOutputFilename, llc, ErrMsg))
+        PrintAndExit(ErrMsg, Composite.get());
+
+      if (GenerateNative(OutputFilename, CFile.str(),
+                         NativeLinkItems, gcc, envp, ErrMsg))
+        PrintAndExit(ErrMsg, Composite.get());
+    } else {
+      EmitShellScript(argv, Composite.get());
+    }
+
+    // Make the script executable...
+    std::string ErrMsg;
+    if (sys::Path(OutputFilename).makeExecutableOnDisk(&ErrMsg))
+      PrintAndExit(ErrMsg, Composite.get());
+
+    // Make the bitcode file readable and directly executable in LLEE as well
+    if (sys::Path(BitcodeOutputFilename).makeExecutableOnDisk(&ErrMsg))
+      PrintAndExit(ErrMsg, Composite.get());
+
+    if (sys::Path(BitcodeOutputFilename).makeReadableOnDisk(&ErrMsg))
+      PrintAndExit(ErrMsg, Composite.get());
+  }
+
+  // Operations which may fail are now complete.
+  BitcodeOutputRemover.releaseFile();
+  if (!LinkAsLibrary)
+    OutputRemover.releaseFile();
+
+  // Graceful exit
+  return 0;
+}
diff --git a/final/tools/llvm-link/CMakeLists.txt b/final/tools/llvm-link/CMakeLists.txt
new file mode 100644
index 00000000000..11933f7f959
--- /dev/null
+++ b/final/tools/llvm-link/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS linker bitreader bitwriter asmparser)
+
+add_llvm_tool(llvm-link
+  llvm-link.cpp
+  )
diff --git a/final/tools/llvm-link/Makefile b/final/tools/llvm-link/Makefile
new file mode 100644
index 00000000000..26370187c55
--- /dev/null
+++ b/final/tools/llvm-link/Makefile
@@ -0,0 +1,17 @@
+##===- tools/llvm-link/Makefile ----------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+
+TOOLNAME = llvm-link
+LINK_COMPONENTS = linker bitreader bitwriter asmparser
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvm-link/llvm-link.cpp b/final/tools/llvm-link/llvm-link.cpp
new file mode 100644
index 00000000000..3fb7ba42cd0
--- /dev/null
+++ b/final/tools/llvm-link/llvm-link.cpp
@@ -0,0 +1,141 @@
+//===- llvm-link.cpp - Low-level LLVM linker ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This utility may be invoked in the following manner:
+//  llvm-link a.bc b.bc c.bc -o x.bc
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Linker.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/IRReader.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/Path.h"
+#include <memory>
+using namespace llvm;
+
+static cl::list<std::string>
+InputFilenames(cl::Positional, cl::OneOrMore,
+               cl::desc("<input bitcode files>"));
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Override output filename"), cl::init("-"),
+               cl::value_desc("filename"));
+
+static cl::opt<bool>
+Force("f", cl::desc("Enable binary output on terminals"));
+
+static cl::opt<bool>
+OutputAssembly("S",
+         cl::desc("Write output as LLVM assembly"), cl::Hidden);
+
+static cl::opt<bool>
+Verbose("v", cl::desc("Print information about actions taken"));
+
+static cl::opt<bool>
+DumpAsm("d", cl::desc("Print assembly as linked"), cl::Hidden);
+
+// LoadFile - Read the specified bitcode file in and return it.  This routine
+// searches the link path for the specified file to try to find it...
+//
+static inline std::auto_ptr<Module> LoadFile(const char *argv0,
+                                             const std::string &FN, 
+                                             LLVMContext& Context) {
+  sys::Path Filename;
+  if (!Filename.set(FN)) {
+    errs() << "Invalid file name: '" << FN << "'\n";
+    return std::auto_ptr<Module>();
+  }
+
+  SMDiagnostic Err;
+  if (Verbose) errs() << "Loading '" << Filename.c_str() << "'\n";
+  Module* Result = 0;
+  
+  const std::string &FNStr = Filename.str();
+  Result = ParseIRFile(FNStr, Err, Context);
+  if (Result) return std::auto_ptr<Module>(Result);   // Load successful!
+
+  Err.Print(argv0, errs());
+  return std::auto_ptr<Module>();
+}
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  
+  LLVMContext &Context = getGlobalContext();
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+  cl::ParseCommandLineOptions(argc, argv, "llvm linker\n");
+
+  unsigned BaseArg = 0;
+  std::string ErrorMessage;
+
+  std::auto_ptr<Module> Composite(LoadFile(argv[0],
+                                           InputFilenames[BaseArg], Context));
+  if (Composite.get() == 0) {
+    errs() << argv[0] << ": error loading file '"
+           << InputFilenames[BaseArg] << "'\n";
+    return 1;
+  }
+
+  for (unsigned i = BaseArg+1; i < InputFilenames.size(); ++i) {
+    std::auto_ptr<Module> M(LoadFile(argv[0],
+                                     InputFilenames[i], Context));
+    if (M.get() == 0) {
+      errs() << argv[0] << ": error loading file '" <<InputFilenames[i]<< "'\n";
+      return 1;
+    }
+
+    if (Verbose) errs() << "Linking in '" << InputFilenames[i] << "'\n";
+
+    if (Linker::LinkModules(Composite.get(), M.get(), &ErrorMessage)) {
+      errs() << argv[0] << ": link error in '" << InputFilenames[i]
+             << "': " << ErrorMessage << "\n";
+      return 1;
+    }
+  }
+
+  // TODO: Iterate over the -l list and link in any modules containing
+  // global symbols that have not been resolved so far.
+
+  if (DumpAsm) errs() << "Here's the assembly:\n" << *Composite;
+
+  std::string ErrorInfo;
+  tool_output_file Out(OutputFilename.c_str(), ErrorInfo,
+                       raw_fd_ostream::F_Binary);
+  if (!ErrorInfo.empty()) {
+    errs() << ErrorInfo << '\n';
+    return 1;
+  }
+
+  if (verifyModule(*Composite)) {
+    errs() << argv[0] << ": linked module is broken!\n";
+    return 1;
+  }
+
+  if (Verbose) errs() << "Writing bitcode...\n";
+  if (OutputAssembly) {
+    Out.os() << *Composite;
+  } else if (Force || !CheckBitcodeOutputToConsole(Out.os(), true))
+    WriteBitcodeToFile(Composite.get(), Out.os());
+
+  // Declare success.
+  Out.keep();
+
+  return 0;
+}
diff --git a/final/tools/llvm-mc/CMakeLists.txt b/final/tools/llvm-mc/CMakeLists.txt
new file mode 100644
index 00000000000..805caf403a1
--- /dev/null
+++ b/final/tools/llvm-mc/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} support MC MCParser MCDisassembler)
+
+add_llvm_tool(llvm-mc
+  llvm-mc.cpp
+  Disassembler.cpp
+  )
diff --git a/final/tools/llvm-mc/Disassembler.cpp b/final/tools/llvm-mc/Disassembler.cpp
new file mode 100644
index 00000000000..d98b57ebc65
--- /dev/null
+++ b/final/tools/llvm-mc/Disassembler.cpp
@@ -0,0 +1,346 @@
+//===- Disassembler.cpp - Disassembler for hex strings --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the disassembler of strings of bytes written in
+// hexadecimal, from standard input or from a file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Disassembler.h"
+#include "../../lib/MC/MCDisassembler/EDDisassembler.h"
+#include "../../lib/MC/MCDisassembler/EDInst.h"
+#include "../../lib/MC/MCDisassembler/EDOperand.h"
+#include "../../lib/MC/MCDisassembler/EDToken.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SourceMgr.h"
+using namespace llvm;
+
+typedef std::vector<std::pair<unsigned char, const char*> > ByteArrayTy;
+
+namespace {
+class VectorMemoryObject : public MemoryObject {
+private:
+  const ByteArrayTy &Bytes;
+public:
+  VectorMemoryObject(const ByteArrayTy &bytes) : Bytes(bytes) {}
+  
+  uint64_t getBase() const { return 0; }
+  uint64_t getExtent() const { return Bytes.size(); }
+
+  int readByte(uint64_t Addr, uint8_t *Byte) const {
+    if (Addr >= getExtent())
+      return -1;
+    *Byte = Bytes[Addr].first;
+    return 0;
+  }
+};
+}
+
+static bool PrintInsts(const MCDisassembler &DisAsm,
+                       MCInstPrinter &Printer, const ByteArrayTy &Bytes,
+                       SourceMgr &SM, raw_ostream &Out) {
+  // Wrap the vector in a MemoryObject.
+  VectorMemoryObject memoryObject(Bytes);
+  
+  // Disassemble it to strings.
+  uint64_t Size;
+  uint64_t Index;
+  
+  for (Index = 0; Index < Bytes.size(); Index += Size) {
+    MCInst Inst;
+    
+    if (DisAsm.getInstruction(Inst, Size, memoryObject, Index, 
+                               /*REMOVE*/ nulls())) {
+      Printer.printInst(&Inst, Out);
+      Out << "\n";
+    } else {
+      SM.PrintMessage(SMLoc::getFromPointer(Bytes[Index].second),
+                      "invalid instruction encoding", "warning");
+      if (Size == 0)
+        Size = 1; // skip illegible bytes
+    }
+  }
+  
+  return false;
+}
+
+static bool ByteArrayFromString(ByteArrayTy &ByteArray, 
+                                StringRef &Str, 
+                                SourceMgr &SM) {
+  while (!Str.empty()) {
+    // Strip horizontal whitespace.
+    if (size_t Pos = Str.find_first_not_of(" \t\r")) {
+      Str = Str.substr(Pos);
+      continue;
+    }
+    
+    // If this is the end of a line or start of a comment, remove the rest of
+    // the line.
+    if (Str[0] == '\n' || Str[0] == '#') {
+      // Strip to the end of line if we already processed any bytes on this
+      // line.  This strips the comment and/or the \n.
+      if (Str[0] == '\n') {
+        Str = Str.substr(1);
+      } else {
+        Str = Str.substr(Str.find_first_of('\n'));
+        if (!Str.empty())
+          Str = Str.substr(1);
+      }
+      continue;
+    }
+    
+    // Get the current token.
+    size_t Next = Str.find_first_of(" \t\n\r#");
+    StringRef Value = Str.substr(0, Next);
+    
+    // Convert to a byte and add to the byte vector.
+    unsigned ByteVal;
+    if (Value.getAsInteger(0, ByteVal) || ByteVal > 255) {
+      // If we have an error, print it and skip to the end of line.
+      SM.PrintMessage(SMLoc::getFromPointer(Value.data()),
+                      "invalid input token", "error");
+      Str = Str.substr(Str.find('\n'));
+      ByteArray.clear();
+      continue;
+    }
+    
+    ByteArray.push_back(std::make_pair((unsigned char)ByteVal, Value.data()));
+    Str = Str.substr(Next);
+  }
+  
+  return false;
+}
+
+int Disassembler::disassemble(const Target &T, const std::string &Triple,
+                              MemoryBuffer &Buffer,
+                              raw_ostream &Out) {
+  // Set up disassembler.
+  OwningPtr<const MCAsmInfo> AsmInfo(T.createAsmInfo(Triple));
+  
+  if (!AsmInfo) {
+    errs() << "error: no assembly info for target " << Triple << "\n";
+    return -1;
+  }
+  
+  OwningPtr<const MCDisassembler> DisAsm(T.createMCDisassembler());
+  if (!DisAsm) {
+    errs() << "error: no disassembler for target " << Triple << "\n";
+    return -1;
+  }
+  
+  int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
+  OwningPtr<MCInstPrinter> IP(T.createMCInstPrinter(AsmPrinterVariant,
+                                                    *AsmInfo));
+  if (!IP) {
+    errs() << "error: no instruction printer for target " << Triple << '\n';
+    return -1;
+  }
+  
+  bool ErrorOccurred = false;
+  
+  SourceMgr SM;
+  SM.AddNewSourceBuffer(&Buffer, SMLoc());
+  
+  // Convert the input to a vector for disassembly.
+  ByteArrayTy ByteArray;
+  StringRef Str = Buffer.getBuffer();
+  
+  ErrorOccurred |= ByteArrayFromString(ByteArray, Str, SM);
+  
+  if (!ByteArray.empty())
+    ErrorOccurred |= PrintInsts(*DisAsm, *IP, ByteArray, SM, Out);
+    
+  return ErrorOccurred;
+}
+
+static int byteArrayReader(uint8_t *B, uint64_t A, void *Arg) {
+  ByteArrayTy &ByteArray = *((ByteArrayTy*)Arg);
+  
+  if (A >= ByteArray.size())
+    return -1;
+  
+  *B = ByteArray[A].first;
+  
+  return 0;
+}
+
+static int verboseEvaluator(uint64_t *V, unsigned R, void *Arg) {
+  EDDisassembler &disassembler = *(EDDisassembler *)((void **)Arg)[0];
+  raw_ostream &Out = *(raw_ostream *)((void **)Arg)[1];
+  
+  if (const char *regName = disassembler.nameWithRegisterID(R))
+    Out << "[" << regName << "/" << R << "]";
+  
+  if (disassembler.registerIsStackPointer(R))
+    Out << "(sp)";
+  if (disassembler.registerIsProgramCounter(R))
+    Out << "(pc)";
+  
+  *V = 0;
+  return 0;
+}
+
+int Disassembler::disassembleEnhanced(const std::string &TS, 
+                                      MemoryBuffer &Buffer,
+                                      raw_ostream &Out) {
+  ByteArrayTy ByteArray;
+  StringRef Str = Buffer.getBuffer();
+  SourceMgr SM;
+  
+  SM.AddNewSourceBuffer(&Buffer, SMLoc());
+  
+  if (ByteArrayFromString(ByteArray, Str, SM)) {
+    return -1;
+  }
+  
+  Triple T(TS);
+  EDDisassembler::AssemblySyntax AS;
+  
+  switch (T.getArch()) {
+  default:
+    errs() << "error: no default assembly syntax for " << TS.c_str() << "\n";
+    return -1;
+  case Triple::arm:
+  case Triple::thumb:
+    AS = EDDisassembler::kEDAssemblySyntaxARMUAL;
+    break;
+  case Triple::x86:
+  case Triple::x86_64:
+    AS = EDDisassembler::kEDAssemblySyntaxX86ATT;
+    break;
+  }
+  
+  EDDisassembler::initialize();
+  OwningPtr<EDDisassembler>
+    disassembler(EDDisassembler::getDisassembler(TS.c_str(), AS));
+  
+  if (disassembler == 0) {
+    errs() << "error: couldn't get disassembler for " << TS << '\n';
+    return -1;
+  }
+  
+  while (ByteArray.size()) {
+    OwningPtr<EDInst>
+      inst(disassembler->createInst(byteArrayReader, 0, &ByteArray));
+  
+    ByteArray.erase (ByteArray.begin(), ByteArray.begin() + inst->byteSize());
+                               
+    if (inst == 0) {
+      errs() << "error: Didn't get an instruction\n";
+      return -1;
+    }
+    
+    unsigned numTokens = inst->numTokens();
+    if ((int)numTokens < 0) {
+      errs() << "error: couldn't count the instruction's tokens\n";
+      return -1;
+    }
+    
+    for (unsigned tokenIndex = 0; tokenIndex != numTokens; ++tokenIndex) {
+      EDToken *token;
+      
+      if (inst->getToken(token, tokenIndex)) {
+        errs() << "error: Couldn't get token\n";
+        return -1;
+      }
+      
+      const char *buf;
+      if (token->getString(buf)) {
+        errs() << "error: Couldn't get string for token\n";
+        return -1;
+      }
+      
+      Out << '[';
+      int operandIndex = token->operandID();
+      
+      if (operandIndex >= 0)
+        Out << operandIndex << "-";
+      
+      switch (token->type()) {
+      default: Out << "?"; break;
+      case EDToken::kTokenWhitespace: Out << "w"; break;
+      case EDToken::kTokenPunctuation: Out << "p"; break;
+      case EDToken::kTokenOpcode: Out << "o"; break;
+      case EDToken::kTokenLiteral: Out << "l"; break;
+      case EDToken::kTokenRegister: Out << "r"; break;
+      }
+      
+      Out << ":" << buf;
+    
+      if (token->type() == EDToken::kTokenLiteral) {
+        Out << "=";
+        if (token->literalSign())
+          Out << "-";
+        uint64_t absoluteValue;
+        if (token->literalAbsoluteValue(absoluteValue)) {
+          errs() << "error: Couldn't get the value of a literal token\n";
+          return -1;
+        }
+        Out << absoluteValue;
+      } else if (token->type() == EDToken::kTokenRegister) {
+        Out << "=";
+        unsigned regID;
+        if (token->registerID(regID)) {
+          errs() << "error: Couldn't get the ID of a register token\n";
+          return -1;
+        }
+        Out << "r" << regID;
+      }
+      
+      Out << "]";
+    }
+    
+    Out << " ";
+      
+    if (inst->isBranch())
+      Out << "<br> ";
+    if (inst->isMove())
+      Out << "<mov> ";
+    
+    unsigned numOperands = inst->numOperands();
+    
+    if ((int)numOperands < 0) {
+      errs() << "error: Couldn't count operands\n";
+      return -1;
+    }
+    
+    for (unsigned operandIndex = 0; operandIndex != numOperands; ++operandIndex) {
+      Out << operandIndex << ":";
+      
+      EDOperand *operand;
+      if (inst->getOperand(operand, operandIndex)) {
+        errs() << "error: couldn't get operand\n";
+        return -1;
+      }
+      
+      uint64_t evaluatedResult;
+      void *Arg[] = { disassembler.get(), &Out };
+      if (operand->evaluate(evaluatedResult, verboseEvaluator, Arg)) {
+        errs() << "error: Couldn't evaluate an operand\n";
+        return -1;
+      }
+      Out << "=" << evaluatedResult << " ";
+    }
+    
+    Out << '\n';
+  }
+  
+  return 0;
+}
+
diff --git a/final/tools/llvm-mc/Disassembler.h b/final/tools/llvm-mc/Disassembler.h
new file mode 100644
index 00000000000..b56f2e95455
--- /dev/null
+++ b/final/tools/llvm-mc/Disassembler.h
@@ -0,0 +1,40 @@
+//===- Disassembler.h - Text File Disassembler ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class implements the disassembler of strings of bytes written in
+// hexadecimal, from standard input or from a file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef DISASSEMBLER_H
+#define DISASSEMBLER_H
+
+#include <string>
+
+namespace llvm {
+
+class Target;
+class MemoryBuffer;
+class raw_ostream;
+
+class Disassembler {
+public:
+  static int disassemble(const Target &target, 
+                         const std::string &tripleString,
+                         MemoryBuffer &buffer,
+                         raw_ostream &Out);
+  
+  static int disassembleEnhanced(const std::string &tripleString,
+                                 MemoryBuffer &buffer,
+                                 raw_ostream &Out);
+};
+  
+} // namespace llvm
+
+#endif
diff --git a/final/tools/llvm-mc/Makefile b/final/tools/llvm-mc/Makefile
new file mode 100644
index 00000000000..934a6e4dd08
--- /dev/null
+++ b/final/tools/llvm-mc/Makefile
@@ -0,0 +1,24 @@
+##===- tools/llvm-mc/Makefile ------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = llvm-mc
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Include this here so we can get the configuration of the targets
+# that have been configured for construction. We have to do this 
+# early so we can set up LINK_COMPONENTS before including Makefile.rules
+include $(LEVEL)/Makefile.config
+
+LINK_COMPONENTS := $(TARGETS_TO_BUILD) MCDisassembler MCParser MC support
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
+
diff --git a/final/tools/llvm-mc/llvm-mc.cpp b/final/tools/llvm-mc/llvm-mc.cpp
new file mode 100644
index 00000000000..2c22bedf1c2
--- /dev/null
+++ b/final/tools/llvm-mc/llvm-mc.cpp
@@ -0,0 +1,451 @@
+//===-- llvm-mc.cpp - Machine Code Hacking Driver -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This utility is a simple driver that allows command line hacking on machine
+// code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/SubtargetFeature.h" // FIXME.
+#include "llvm/Target/TargetAsmInfo.h"  // FIXME.
+#include "llvm/Target/TargetLowering.h"  // FIXME.
+#include "llvm/Target/TargetLoweringObjectFile.h"  // FIXME.
+#include "llvm/Target/TargetMachine.h"  // FIXME.
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
+#include "Disassembler.h"
+using namespace llvm;
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Output filename"),
+               cl::value_desc("filename"));
+
+static cl::opt<bool>
+ShowEncoding("show-encoding", cl::desc("Show instruction encodings"));
+
+static cl::opt<bool>
+ShowInst("show-inst", cl::desc("Show internal instruction representation"));
+
+static cl::opt<bool>
+ShowInstOperands("show-inst-operands",
+                 cl::desc("Show instructions operands as parsed"));
+
+static cl::opt<unsigned>
+OutputAsmVariant("output-asm-variant",
+                 cl::desc("Syntax variant to use for output printing"));
+
+static cl::opt<bool>
+RelaxAll("mc-relax-all", cl::desc("Relax all fixups"));
+
+static cl::opt<bool>
+NoExecStack("mc-no-exec-stack", cl::desc("File doesn't need an exec stack"));
+
+static cl::opt<bool>
+EnableLogging("enable-api-logging", cl::desc("Enable MC API logging"));
+
+enum OutputFileType {
+  OFT_Null,
+  OFT_AssemblyFile,
+  OFT_ObjectFile
+};
+static cl::opt<OutputFileType>
+FileType("filetype", cl::init(OFT_AssemblyFile),
+  cl::desc("Choose an output file type:"),
+  cl::values(
+       clEnumValN(OFT_AssemblyFile, "asm",
+                  "Emit an assembly ('.s') file"),
+       clEnumValN(OFT_Null, "null",
+                  "Don't emit anything (for timing purposes)"),
+       clEnumValN(OFT_ObjectFile, "obj",
+                  "Emit a native object ('.o') file"),
+       clEnumValEnd));
+
+static cl::list<std::string>
+IncludeDirs("I", cl::desc("Directory of include files"),
+            cl::value_desc("directory"), cl::Prefix);
+
+static cl::opt<std::string>
+ArchName("arch", cl::desc("Target arch to assemble for, "
+                            "see -version for available targets"));
+
+static cl::opt<std::string>
+TripleName("triple", cl::desc("Target triple to assemble for, "
+                              "see -version for available targets"));
+
+static cl::opt<std::string>
+MCPU("mcpu",
+     cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+     cl::value_desc("cpu-name"),
+     cl::init(""));
+
+static cl::opt<bool>
+NoInitialTextSection("n", cl::desc(
+                   "Don't assume assembly file starts in the text section"));
+
+enum ActionType {
+  AC_AsLex,
+  AC_Assemble,
+  AC_Disassemble,
+  AC_EDisassemble
+};
+
+static cl::opt<ActionType>
+Action(cl::desc("Action to perform:"),
+       cl::init(AC_Assemble),
+       cl::values(clEnumValN(AC_AsLex, "as-lex",
+                             "Lex tokens from a .s file"),
+                  clEnumValN(AC_Assemble, "assemble",
+                             "Assemble a .s file (default)"),
+                  clEnumValN(AC_Disassemble, "disassemble",
+                             "Disassemble strings of hex bytes"),
+                  clEnumValN(AC_EDisassemble, "edis",
+                             "Enhanced disassembly of strings of hex bytes"),
+                  clEnumValEnd));
+
+static const Target *GetTarget(const char *ProgName) {
+  // Figure out the target triple.
+  if (TripleName.empty())
+    TripleName = sys::getHostTriple();
+  if (!ArchName.empty()) {
+    llvm::Triple TT(TripleName);
+    TT.setArchName(ArchName);
+    TripleName = TT.str();
+  }
+
+  // Get the target specific parser.
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+  if (TheTarget)
+    return TheTarget;
+
+  errs() << ProgName << ": error: unable to get target for '" << TripleName
+         << "', see --version and --triple.\n";
+  return 0;
+}
+
+static tool_output_file *GetOutputStream() {
+  if (OutputFilename == "")
+    OutputFilename = "-";
+
+  std::string Err;
+  tool_output_file *Out = new tool_output_file(OutputFilename.c_str(), Err,
+                                               raw_fd_ostream::F_Binary);
+  if (!Err.empty()) {
+    errs() << Err << '\n';
+    delete Out;
+    return 0;
+  }
+
+  return Out;
+}
+
+static int AsLexInput(const char *ProgName) {
+  OwningPtr<MemoryBuffer> BufferPtr;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, BufferPtr)) {
+    errs() << ProgName << ": " << ec.message() << '\n';
+    return 1;
+  }
+  MemoryBuffer *Buffer = BufferPtr.take();
+
+  SourceMgr SrcMgr;
+  
+  // Tell SrcMgr about this buffer, which is what TGParser will pick up.
+  SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+  
+  // Record the location of the include directories so that the lexer can find
+  // it later.
+  SrcMgr.setIncludeDirs(IncludeDirs);
+
+  const Target *TheTarget = GetTarget(ProgName);
+  if (!TheTarget)
+    return 1;
+
+  llvm::OwningPtr<MCAsmInfo> MAI(TheTarget->createAsmInfo(TripleName));
+  assert(MAI && "Unable to create target asm info!");
+
+  AsmLexer Lexer(*MAI);
+  Lexer.setBuffer(SrcMgr.getMemoryBuffer(0));
+
+  OwningPtr<tool_output_file> Out(GetOutputStream());
+  if (!Out)
+    return 1;
+
+  bool Error = false;
+  while (Lexer.Lex().isNot(AsmToken::Eof)) {
+    AsmToken Tok = Lexer.getTok();
+
+    switch (Tok.getKind()) {
+    default:
+      SrcMgr.PrintMessage(Lexer.getLoc(), "unknown token", "warning");
+      Error = true;
+      break;
+    case AsmToken::Error:
+      Error = true; // error already printed.
+      break;
+    case AsmToken::Identifier:
+      Out->os() << "identifier: " << Lexer.getTok().getString();
+      break;
+    case AsmToken::Integer:
+      Out->os() << "int: " << Lexer.getTok().getString();
+      break;
+    case AsmToken::Real:
+      Out->os() << "real: " << Lexer.getTok().getString();
+      break;
+    case AsmToken::Register:
+      Out->os() << "register: " << Lexer.getTok().getRegVal();
+      break;
+    case AsmToken::String:
+      Out->os() << "string: " << Lexer.getTok().getString();
+      break;
+
+    case AsmToken::Amp:            Out->os() << "Amp"; break;
+    case AsmToken::AmpAmp:         Out->os() << "AmpAmp"; break;
+    case AsmToken::At:             Out->os() << "At"; break;
+    case AsmToken::Caret:          Out->os() << "Caret"; break;
+    case AsmToken::Colon:          Out->os() << "Colon"; break;
+    case AsmToken::Comma:          Out->os() << "Comma"; break;
+    case AsmToken::Dollar:         Out->os() << "Dollar"; break;
+    case AsmToken::Dot:            Out->os() << "Dot"; break;
+    case AsmToken::EndOfStatement: Out->os() << "EndOfStatement"; break;
+    case AsmToken::Eof:            Out->os() << "Eof"; break;
+    case AsmToken::Equal:          Out->os() << "Equal"; break;
+    case AsmToken::EqualEqual:     Out->os() << "EqualEqual"; break;
+    case AsmToken::Exclaim:        Out->os() << "Exclaim"; break;
+    case AsmToken::ExclaimEqual:   Out->os() << "ExclaimEqual"; break;
+    case AsmToken::Greater:        Out->os() << "Greater"; break;
+    case AsmToken::GreaterEqual:   Out->os() << "GreaterEqual"; break;
+    case AsmToken::GreaterGreater: Out->os() << "GreaterGreater"; break;
+    case AsmToken::Hash:           Out->os() << "Hash"; break;
+    case AsmToken::LBrac:          Out->os() << "LBrac"; break;
+    case AsmToken::LCurly:         Out->os() << "LCurly"; break;
+    case AsmToken::LParen:         Out->os() << "LParen"; break;
+    case AsmToken::Less:           Out->os() << "Less"; break;
+    case AsmToken::LessEqual:      Out->os() << "LessEqual"; break;
+    case AsmToken::LessGreater:    Out->os() << "LessGreater"; break;
+    case AsmToken::LessLess:       Out->os() << "LessLess"; break;
+    case AsmToken::Minus:          Out->os() << "Minus"; break;
+    case AsmToken::Percent:        Out->os() << "Percent"; break;
+    case AsmToken::Pipe:           Out->os() << "Pipe"; break;
+    case AsmToken::PipePipe:       Out->os() << "PipePipe"; break;
+    case AsmToken::Plus:           Out->os() << "Plus"; break;
+    case AsmToken::RBrac:          Out->os() << "RBrac"; break;
+    case AsmToken::RCurly:         Out->os() << "RCurly"; break;
+    case AsmToken::RParen:         Out->os() << "RParen"; break;
+    case AsmToken::Slash:          Out->os() << "Slash"; break;
+    case AsmToken::Star:           Out->os() << "Star"; break;
+    case AsmToken::Tilde:          Out->os() << "Tilde"; break;
+    }
+
+    // Print the token string.
+    Out->os() << " (\"";
+    Out->os().write_escaped(Tok.getString());
+    Out->os() << "\")\n";
+  }
+
+  // Keep output if no errors.
+  if (Error == 0) Out->keep();
+ 
+  return Error;
+}
+
+static int AssembleInput(const char *ProgName) {
+  const Target *TheTarget = GetTarget(ProgName);
+  if (!TheTarget)
+    return 1;
+
+  OwningPtr<MemoryBuffer> BufferPtr;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, BufferPtr)) {
+    errs() << ProgName << ": " << ec.message() << '\n';
+    return 1;
+  }
+  MemoryBuffer *Buffer = BufferPtr.take();
+  
+  SourceMgr SrcMgr;
+  
+  // Tell SrcMgr about this buffer, which is what the parser will pick up.
+  SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+  
+  // Record the location of the include directories so that the lexer can find
+  // it later.
+  SrcMgr.setIncludeDirs(IncludeDirs);
+  
+  
+  llvm::OwningPtr<MCAsmInfo> MAI(TheTarget->createAsmInfo(TripleName));
+  assert(MAI && "Unable to create target asm info!");
+  
+  // Package up features to be passed to target/subtarget
+  std::string FeaturesStr;
+  if (MCPU.size()) {
+    SubtargetFeatures Features;
+    Features.setCPU(MCPU);
+    FeaturesStr = Features.getString();
+  }
+
+  // FIXME: We shouldn't need to do this (and link in codegen).
+  //        When we split this out, we should do it in a way that makes
+  //        it straightforward to switch subtargets on the fly (.e.g,
+  //        the .cpu and .code16 directives).
+  OwningPtr<TargetMachine> TM(TheTarget->createTargetMachine(TripleName,
+                                                             FeaturesStr));
+
+  if (!TM) {
+    errs() << ProgName << ": error: could not create target for triple '"
+           << TripleName << "'.\n";
+    return 1;
+  }
+
+  const TargetAsmInfo *tai = new TargetAsmInfo(*TM);
+  MCContext Ctx(*MAI, tai);
+
+  OwningPtr<tool_output_file> Out(GetOutputStream());
+  if (!Out)
+    return 1;
+
+  formatted_raw_ostream FOS(Out->os());
+  OwningPtr<MCStreamer> Str;
+
+  const TargetLoweringObjectFile &TLOF =
+    TM->getTargetLowering()->getObjFileLowering();
+  const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(Ctx, *TM);
+
+  // FIXME: There is a bit of code duplication with addPassesToEmitFile.
+  if (FileType == OFT_AssemblyFile) {
+    MCInstPrinter *IP =
+      TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI);
+    MCCodeEmitter *CE = 0;
+    TargetAsmBackend *TAB = 0;
+    if (ShowEncoding) {
+      CE = TheTarget->createCodeEmitter(*TM, Ctx);
+      TAB = TheTarget->createAsmBackend(TripleName);
+    }
+    Str.reset(TheTarget->createAsmStreamer(Ctx, FOS, /*asmverbose*/true,
+                                           /*useLoc*/ true, IP, CE, TAB,
+                                           ShowInst));
+  } else if (FileType == OFT_Null) {
+    Str.reset(createNullStreamer(Ctx));
+  } else {
+    assert(FileType == OFT_ObjectFile && "Invalid file type!");
+    MCCodeEmitter *CE = TheTarget->createCodeEmitter(*TM, Ctx);
+    TargetAsmBackend *TAB = TheTarget->createAsmBackend(TripleName);
+    Str.reset(TheTarget->createObjectStreamer(TripleName, Ctx, *TAB,
+                                              FOS, CE, RelaxAll,
+                                              NoExecStack));
+  }
+
+  if (EnableLogging) {
+    Str.reset(createLoggingStreamer(Str.take(), errs()));
+  }
+
+  OwningPtr<MCAsmParser> Parser(createMCAsmParser(*TheTarget, SrcMgr, Ctx,
+                                                   *Str.get(), *MAI));
+  OwningPtr<TargetAsmParser> TAP(TheTarget->createAsmParser(*Parser, *TM));
+  if (!TAP) {
+    errs() << ProgName 
+           << ": error: this target does not support assembly parsing.\n";
+    return 1;
+  }
+
+  Parser->setShowParsedOperands(ShowInstOperands);
+  Parser->setTargetParser(*TAP.get());
+
+  int Res = Parser->Run(NoInitialTextSection);
+
+  // Keep output if no errors.
+  if (Res == 0) Out->keep();
+
+  return Res;
+}
+
+static int DisassembleInput(const char *ProgName, bool Enhanced) {
+  const Target *TheTarget = GetTarget(ProgName);
+  if (!TheTarget)
+    return 0;
+
+  OwningPtr<MemoryBuffer> Buffer;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename, Buffer)) {
+    errs() << ProgName << ": " << ec.message() << '\n';
+    return 1;
+  }
+  
+  OwningPtr<tool_output_file> Out(GetOutputStream());
+  if (!Out)
+    return 1;
+
+  int Res;
+  if (Enhanced)
+    Res =
+      Disassembler::disassembleEnhanced(TripleName, *Buffer.take(), Out->os());
+  else
+    Res = Disassembler::disassemble(*TheTarget, TripleName,
+                                    *Buffer.take(), Out->os());
+
+  // Keep output if no errors.
+  if (Res == 0) Out->keep();
+
+  return Res;
+}
+
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  // Initialize targets and assembly printers/parsers.
+  llvm::InitializeAllTargetInfos();
+  // FIXME: We shouldn't need to initialize the Target(Machine)s.
+  llvm::InitializeAllTargets();
+  llvm::InitializeAllAsmPrinters();
+  llvm::InitializeAllAsmParsers();
+  llvm::InitializeAllDisassemblers();
+  
+  cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n");
+  TripleName = Triple::normalize(TripleName);
+
+  switch (Action) {
+  default:
+  case AC_AsLex:
+    return AsLexInput(argv[0]);
+  case AC_Assemble:
+    return AssembleInput(argv[0]);
+  case AC_Disassemble:
+    return DisassembleInput(argv[0], false);
+  case AC_EDisassemble:
+    return DisassembleInput(argv[0], true);
+  }
+  
+  return 0;
+}
+
diff --git a/final/tools/llvm-nm/CMakeLists.txt b/final/tools/llvm-nm/CMakeLists.txt
new file mode 100644
index 00000000000..b6cd80b477a
--- /dev/null
+++ b/final/tools/llvm-nm/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS archive bitreader object)
+
+add_llvm_tool(llvm-nm
+  llvm-nm.cpp
+  )
diff --git a/final/tools/llvm-nm/Makefile b/final/tools/llvm-nm/Makefile
new file mode 100644
index 00000000000..6bb4cd4acc2
--- /dev/null
+++ b/final/tools/llvm-nm/Makefile
@@ -0,0 +1,17 @@
+##===- tools/llvm-nm/Makefile ------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+
+TOOLNAME = llvm-nm
+LINK_COMPONENTS = archive bitreader object
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvm-nm/llvm-nm.cpp b/final/tools/llvm-nm/llvm-nm.cpp
new file mode 100644
index 00000000000..1afa5032957
--- /dev/null
+++ b/final/tools/llvm-nm/llvm-nm.cpp
@@ -0,0 +1,361 @@
+//===-- llvm-nm.cpp - Symbol table dumping utility for llvm ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program is a utility that works like traditional Unix "nm",
+// that is, it prints out the names of symbols in a bitcode file,
+// along with some information about each symbol.
+//
+// This "nm" does not print symbols' addresses. It supports many of
+// the features of GNU "nm", including its different output formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/Archive.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/system_error.h"
+#include <algorithm>
+#include <cctype>
+#include <cerrno>
+#include <cstring>
+#include <vector>
+using namespace llvm;
+using namespace object;
+
+namespace {
+  enum OutputFormatTy { bsd, sysv, posix };
+  cl::opt<OutputFormatTy>
+  OutputFormat("format",
+       cl::desc("Specify output format"),
+         cl::values(clEnumVal(bsd,   "BSD format"),
+                    clEnumVal(sysv,  "System V format"),
+                    clEnumVal(posix, "POSIX.2 format"),
+                    clEnumValEnd), cl::init(bsd));
+  cl::alias OutputFormat2("f", cl::desc("Alias for --format"),
+                          cl::aliasopt(OutputFormat));
+
+  cl::list<std::string>
+  InputFilenames(cl::Positional, cl::desc("<input bitcode files>"),
+                 cl::ZeroOrMore);
+
+  cl::opt<bool> UndefinedOnly("undefined-only",
+                              cl::desc("Show only undefined symbols"));
+  cl::alias UndefinedOnly2("u", cl::desc("Alias for --undefined-only"),
+                           cl::aliasopt(UndefinedOnly));
+
+  cl::opt<bool> DefinedOnly("defined-only",
+                            cl::desc("Show only defined symbols"));
+
+  cl::opt<bool> ExternalOnly("extern-only",
+                             cl::desc("Show only external symbols"));
+  cl::alias ExternalOnly2("g", cl::desc("Alias for --extern-only"),
+                          cl::aliasopt(ExternalOnly));
+
+  cl::opt<bool> BSDFormat("B", cl::desc("Alias for --format=bsd"));
+  cl::opt<bool> POSIXFormat("P", cl::desc("Alias for --format=posix"));
+
+  cl::opt<bool> PrintFileName("print-file-name",
+    cl::desc("Precede each symbol with the object file it came from"));
+
+  cl::alias PrintFileNameA("A", cl::desc("Alias for --print-file-name"),
+                                cl::aliasopt(PrintFileName));
+  cl::alias PrintFileNameo("o", cl::desc("Alias for --print-file-name"),
+                                cl::aliasopt(PrintFileName));
+
+  cl::opt<bool> DebugSyms("debug-syms",
+    cl::desc("Show all symbols, even debugger only"));
+  cl::alias DebugSymsa("a", cl::desc("Alias for --debug-syms"),
+                            cl::aliasopt(DebugSyms));
+
+  cl::opt<bool> NumericSort("numeric-sort",
+    cl::desc("Sort symbols by address"));
+  cl::alias NumericSortn("n", cl::desc("Alias for --numeric-sort"),
+                              cl::aliasopt(NumericSort));
+  cl::alias NumericSortv("v", cl::desc("Alias for --numeric-sort"),
+                              cl::aliasopt(NumericSort));
+
+  cl::opt<bool> NoSort("no-sort",
+    cl::desc("Show symbols in order encountered"));
+  cl::alias NoSortp("p", cl::desc("Alias for --no-sort"),
+                         cl::aliasopt(NoSort));
+
+  cl::opt<bool> PrintSize("print-size",
+    cl::desc("Show symbol size instead of address"));
+  cl::alias PrintSizeS("S", cl::desc("Alias for --print-size"),
+                            cl::aliasopt(PrintSize));
+
+  cl::opt<bool> SizeSort("size-sort", cl::desc("Sort symbols by size"));
+
+  bool PrintAddress = true;
+
+  bool MultipleFiles = false;
+
+  std::string ToolName;
+}
+
+namespace {
+  struct NMSymbol {
+    uint64_t  Address;
+    uint64_t  Size;
+    char      TypeChar;
+    StringRef Name;
+  };
+
+  static bool CompareSymbolAddress(const NMSymbol &a, const NMSymbol &b) {
+    if (a.Address < b.Address)
+      return true;
+    else if (a.Address == b.Address && a.Name < b.Name)
+      return true;
+    else
+      return false;
+
+  }
+
+  static bool CompareSymbolSize(const NMSymbol &a, const NMSymbol &b) {
+    if (a.Size < b.Size)
+      return true;
+    else if (a.Size == b.Size && a.Name < b.Name)
+      return true;
+    else
+      return false;
+  }
+
+  static bool CompareSymbolName(const NMSymbol &a, const NMSymbol &b) {
+    return a.Name < b.Name;
+  }
+
+  StringRef CurrentFilename;
+  typedef std::vector<NMSymbol> SymbolListT;
+  SymbolListT SymbolList;
+}
+
+static void SortAndPrintSymbolList() {
+  if (!NoSort) {
+    if (NumericSort)
+      std::sort(SymbolList.begin(), SymbolList.end(), CompareSymbolAddress);
+    else if (SizeSort)
+      std::sort(SymbolList.begin(), SymbolList.end(), CompareSymbolSize);
+    else
+      std::sort(SymbolList.begin(), SymbolList.end(), CompareSymbolName);
+  }
+
+  if (OutputFormat == posix && MultipleFiles) {
+    outs() << '\n' << CurrentFilename << ":\n";
+  } else if (OutputFormat == bsd && MultipleFiles) {
+    outs() << "\n" << CurrentFilename << ":\n";
+  } else if (OutputFormat == sysv) {
+    outs() << "\n\nSymbols from " << CurrentFilename << ":\n\n"
+           << "Name                  Value   Class        Type"
+           << "         Size   Line  Section\n";
+  }
+
+  for (SymbolListT::iterator i = SymbolList.begin(),
+                             e = SymbolList.end(); i != e; ++i) {
+    if ((i->TypeChar != 'U') && UndefinedOnly)
+      continue;
+    if ((i->TypeChar == 'U') && DefinedOnly)
+      continue;
+    if (SizeSort && !PrintAddress && i->Size == UnknownAddressOrSize)
+      continue;
+
+    char SymbolAddrStr[10] = "";
+    char SymbolSizeStr[10] = "";
+
+    if (OutputFormat == sysv || i->Address == object::UnknownAddressOrSize)
+      strcpy(SymbolAddrStr, "        ");
+    if (OutputFormat == sysv)
+      strcpy(SymbolSizeStr, "        ");
+
+    if (i->Address != object::UnknownAddressOrSize)
+      format("%08x", i->Address).print(SymbolAddrStr, sizeof(SymbolAddrStr));
+    if (i->Size != object::UnknownAddressOrSize)
+      format("%08x", i->Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
+
+    if (OutputFormat == posix) {
+      outs() << i->Name << " " << i->TypeChar << " "
+             << SymbolAddrStr << SymbolSizeStr << "\n";
+    } else if (OutputFormat == bsd) {
+      if (PrintAddress)
+        outs() << SymbolAddrStr << ' ';
+      if (PrintSize) {
+        outs() << SymbolSizeStr;
+        if (i->Size != object::UnknownAddressOrSize)
+          outs() << ' ';
+      }
+      outs() << i->TypeChar << " " << i->Name  << "\n";
+    } else if (OutputFormat == sysv) {
+      std::string PaddedName (i->Name);
+      while (PaddedName.length () < 20)
+        PaddedName += " ";
+      outs() << PaddedName << "|" << SymbolAddrStr << "|   "
+             << i->TypeChar
+             << "  |                  |" << SymbolSizeStr << "|     |\n";
+    }
+  }
+
+  SymbolList.clear();
+}
+
+static char TypeCharForSymbol(GlobalValue &GV) {
+  if (GV.isDeclaration())                                  return 'U';
+  if (GV.hasLinkOnceLinkage())                             return 'C';
+  if (GV.hasCommonLinkage())                               return 'C';
+  if (GV.hasWeakLinkage())                                 return 'W';
+  if (isa<Function>(GV) && GV.hasInternalLinkage())        return 't';
+  if (isa<Function>(GV))                                   return 'T';
+  if (isa<GlobalVariable>(GV) && GV.hasInternalLinkage())  return 'd';
+  if (isa<GlobalVariable>(GV))                             return 'D';
+  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(&GV)) {
+    const GlobalValue *AliasedGV = GA->getAliasedGlobal();
+    if (isa<Function>(AliasedGV))                          return 'T';
+    if (isa<GlobalVariable>(AliasedGV))                    return 'D';
+  }
+                                                           return '?';
+}
+
+static void DumpSymbolNameForGlobalValue(GlobalValue &GV) {
+  // Private linkage and available_externally linkage don't exist in symtab.
+  if (GV.hasPrivateLinkage() ||
+      GV.hasLinkerPrivateLinkage() ||
+      GV.hasLinkerPrivateWeakLinkage() ||
+      GV.hasLinkerPrivateWeakDefAutoLinkage() ||
+      GV.hasAvailableExternallyLinkage())
+    return;
+  char TypeChar = TypeCharForSymbol(GV);
+  if (GV.hasLocalLinkage () && ExternalOnly)
+    return;
+
+  NMSymbol s;
+  s.Address = object::UnknownAddressOrSize;
+  s.Size = object::UnknownAddressOrSize;
+  s.TypeChar = TypeChar;
+  s.Name     = GV.getName();
+  SymbolList.push_back(s);
+}
+
+static void DumpSymbolNamesFromModule(Module *M) {
+  CurrentFilename = M->getModuleIdentifier();
+  std::for_each (M->begin(), M->end(), DumpSymbolNameForGlobalValue);
+  std::for_each (M->global_begin(), M->global_end(),
+                 DumpSymbolNameForGlobalValue);
+  std::for_each (M->alias_begin(), M->alias_end(),
+                 DumpSymbolNameForGlobalValue);
+
+  SortAndPrintSymbolList();
+}
+
+static void DumpSymbolNamesFromObject(ObjectFile *obj) {
+  for (ObjectFile::symbol_iterator i = obj->begin_symbols(),
+                                   e = obj->end_symbols(); i != e; ++i) {
+    if (!DebugSyms && i->isInternal())
+      continue;
+    NMSymbol s;
+    s.Size = object::UnknownAddressOrSize;
+    s.Address = object::UnknownAddressOrSize;
+    if (PrintSize || SizeSort)
+      s.Size = i->getSize();
+    if (PrintAddress)
+      s.Address = i->getAddress();
+    s.TypeChar = i->getNMTypeChar();
+    s.Name     = i->getName();
+    SymbolList.push_back(s);
+  }
+
+  CurrentFilename = obj->getFilename();
+  SortAndPrintSymbolList();
+}
+
+static void DumpSymbolNamesFromFile(std::string &Filename) {
+  LLVMContext &Context = getGlobalContext();
+  std::string ErrorMessage;
+  sys::Path aPath(Filename);
+  bool exists;
+  if (sys::fs::exists(aPath.str(), exists) || !exists)
+    errs() << ToolName << ": '" << Filename << "': " << "No such file\n";
+  // Note: Currently we do not support reading an archive from stdin.
+  if (Filename == "-" || aPath.isBitcodeFile()) {
+    OwningPtr<MemoryBuffer> Buffer;
+    if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buffer))
+      ErrorMessage = ec.message();
+    Module *Result = 0;
+    if (Buffer.get())
+      Result = ParseBitcodeFile(Buffer.get(), Context, &ErrorMessage);
+
+    if (Result) {
+      DumpSymbolNamesFromModule(Result);
+      delete Result;
+    } else
+      errs() << ToolName << ": " << Filename << ": " << ErrorMessage << "\n";
+
+  } else if (aPath.isArchive()) {
+    std::string ErrMsg;
+    Archive* archive = Archive::OpenAndLoad(sys::Path(Filename), Context,
+                                            &ErrorMessage);
+    if (!archive)
+      errs() << ToolName << ": " << Filename << ": " << ErrorMessage << "\n";
+    std::vector<Module *> Modules;
+    if (archive->getAllModules(Modules, &ErrorMessage)) {
+      errs() << ToolName << ": " << Filename << ": " << ErrorMessage << "\n";
+      return;
+    }
+    MultipleFiles = true;
+    std::for_each (Modules.begin(), Modules.end(), DumpSymbolNamesFromModule);
+  } else if (aPath.isObjectFile()) {
+    std::auto_ptr<ObjectFile> obj(ObjectFile::createObjectFile(aPath.str()));
+    if (!obj.get()) {
+      errs() << ToolName << ": " << Filename << ": "
+             << "Failed to open object file\n";
+      return;
+    }
+    DumpSymbolNamesFromObject(obj.get());
+  } else {
+    errs() << ToolName << ": " << Filename << ": "
+           << "unrecognizable file type\n";
+    return;
+  }
+}
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+  cl::ParseCommandLineOptions(argc, argv, "llvm symbol table dumper\n");
+
+  ToolName = argv[0];
+  if (BSDFormat) OutputFormat = bsd;
+  if (POSIXFormat) OutputFormat = posix;
+
+  // The relative order of these is important. If you pass --size-sort it should
+  // only print out the size. However, if you pass -S --size-sort, it should
+  // print out both the size and address.
+  if (SizeSort && !PrintSize) PrintAddress = false;
+  if (OutputFormat == sysv || SizeSort) PrintSize = true;
+
+  switch (InputFilenames.size()) {
+  case 0: InputFilenames.push_back("-");
+  case 1: break;
+  default: MultipleFiles = true;
+  }
+
+  std::for_each(InputFilenames.begin(), InputFilenames.end(),
+                DumpSymbolNamesFromFile);
+  return 0;
+}
diff --git a/final/tools/llvm-objdump/CMakeLists.txt b/final/tools/llvm-objdump/CMakeLists.txt
new file mode 100644
index 00000000000..4181b32bca6
--- /dev/null
+++ b/final/tools/llvm-objdump/CMakeLists.txt
@@ -0,0 +1,11 @@
+set(LLVM_LINK_COMPONENTS
+  ${LLVM_TARGETS_TO_BUILD}
+  MC
+  MCParser
+  MCDisassembler
+  Object
+  )
+
+add_llvm_tool(llvm-objdump
+  llvm-objdump.cpp
+  )
diff --git a/final/tools/llvm-objdump/Makefile b/final/tools/llvm-objdump/Makefile
new file mode 100644
index 00000000000..4d7cd34eac9
--- /dev/null
+++ b/final/tools/llvm-objdump/Makefile
@@ -0,0 +1,17 @@
+##===- tools/llvm-objdump/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+
+TOOLNAME = llvm-objdump
+LINK_COMPONENTS = $(TARGETS_TO_BUILD) MC MCParser MCDisassembler Object
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvm-objdump/llvm-objdump.cpp b/final/tools/llvm-objdump/llvm-objdump.cpp
new file mode 100644
index 00000000000..1fef8b6e249
--- /dev/null
+++ b/final/tools/llvm-objdump/llvm-objdump.cpp
@@ -0,0 +1,255 @@
+//===-- llvm-objdump.cpp - Object file dumping utility for llvm -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program is a utility that works like binutils "objdump", that is, it
+// dumps out a plethora of information about an object file depending on the
+// flags.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/ObjectFile.h"
+// This config must be included before llvm-config.h.
+#include "llvm/Config/config.h"
+#include "../../lib/MC/MCDisassembler/EDDisassembler.h"
+#include "../../lib/MC/MCDisassembler/EDInst.h"
+#include "../../lib/MC/MCDisassembler/EDOperand.h"
+#include "../../lib/MC/MCDisassembler/EDToken.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetSelect.h"
+#include <algorithm>
+#include <cctype>
+#include <cerrno>
+#include <cstring>
+#include <vector>
+using namespace llvm;
+using namespace object;
+
+namespace {
+  cl::list<std::string>
+  InputFilenames(cl::Positional, cl::desc("<input object files>"),
+                 cl::ZeroOrMore);
+
+  cl::opt<bool>
+  Disassemble("disassemble",
+    cl::desc("Display assembler mnemonics for the machine instructions"));
+  cl::alias
+  Disassembled("d", cl::desc("Alias for --disassemble"),
+               cl::aliasopt(Disassemble));
+
+  cl::opt<std::string>
+  TripleName("triple", cl::desc("Target triple to disassemble for, "
+                                "see -version for available targets"));
+
+  cl::opt<std::string>
+  ArchName("arch", cl::desc("Target arch to disassemble for, "
+                            "see -version for available targets"));
+
+  StringRef ToolName;
+}
+
+static const Target *GetTarget(const ObjectFile *Obj = NULL) {
+  // Figure out the target triple.
+  llvm::Triple TT("unknown-unknown-unknown");
+  if (TripleName.empty()) {
+    if (Obj)
+      TT.setArch(Triple::ArchType(Obj->getArch()));
+  } else
+    TT.setTriple(Triple::normalize(TripleName));
+
+  if (!ArchName.empty())
+    TT.setArchName(ArchName);
+
+  TripleName = TT.str();
+
+  // Get the target specific parser.
+  std::string Error;
+  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+  if (TheTarget)
+    return TheTarget;
+
+  errs() << ToolName << ": error: unable to get target for '" << TripleName
+         << "', see --version and --triple.\n";
+  return 0;
+}
+
+namespace {
+class StringRefMemoryObject : public MemoryObject {
+private:
+  StringRef Bytes;
+public:
+  StringRefMemoryObject(StringRef bytes) : Bytes(bytes) {}
+
+  uint64_t getBase() const { return 0; }
+  uint64_t getExtent() const { return Bytes.size(); }
+
+  int readByte(uint64_t Addr, uint8_t *Byte) const {
+    if (Addr > getExtent())
+      return -1;
+    *Byte = Bytes[Addr];
+    return 0;
+  }
+};
+}
+
+static void DumpBytes(StringRef bytes) {
+  static char hex_rep[] = "0123456789abcdef";
+  // FIXME: The real way to do this is to figure out the longest instruction
+  //        and align to that size before printing. I'll fix this when I get
+  //        around to outputting relocations.
+  // 15 is the longest x86 instruction
+  // 3 is for the hex rep of a byte + a space.
+  // 1 is for the null terminator.
+  enum { OutputSize = (15 * 3) + 1 };
+  char output[OutputSize];
+
+  assert(bytes.size() <= 15
+    && "DumpBytes only supports instructions of up to 15 bytes");
+  memset(output, ' ', sizeof(output));
+  unsigned index = 0;
+  for (StringRef::iterator i = bytes.begin(),
+                           e = bytes.end(); i != e; ++i) {
+    output[index] = hex_rep[(*i & 0xF0) >> 4];
+    output[index + 1] = hex_rep[*i & 0xF];
+    index += 3;
+  }
+
+  output[sizeof(output) - 1] = 0;
+  outs() << output;
+}
+
+static void DisassembleInput(const StringRef &Filename) {
+  OwningPtr<MemoryBuffer> Buff;
+
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) {
+    errs() << ToolName << ": " << Filename << ": " << ec.message() << "\n";
+    return;
+  }
+
+  OwningPtr<ObjectFile> Obj(ObjectFile::createObjectFile(Buff.take()));
+
+  const Target *TheTarget = GetTarget(Obj.get());
+  if (!TheTarget) {
+    // GetTarget prints out stuff.
+    return;
+  }
+
+  outs() << '\n';
+  outs() << Filename
+         << ":\tfile format " << Obj->getFileFormatName() << "\n\n\n";
+
+  for (ObjectFile::section_iterator i = Obj->begin_sections(),
+                                    e = Obj->end_sections();
+                                    i != e; ++i) {
+    if (!i->isText())
+      continue;
+    outs() << "Disassembly of section " << i->getName() << ":\n\n";
+
+    // Set up disassembler.
+    OwningPtr<const MCAsmInfo> AsmInfo(TheTarget->createAsmInfo(TripleName));
+
+    if (!AsmInfo) {
+      errs() << "error: no assembly info for target " << TripleName << "\n";
+      return;
+    }
+
+    OwningPtr<const MCDisassembler> DisAsm(TheTarget->createMCDisassembler());
+    if (!DisAsm) {
+      errs() << "error: no disassembler for target " << TripleName << "\n";
+      return;
+    }
+
+    int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
+    OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
+                                  AsmPrinterVariant, *AsmInfo));
+    if (!IP) {
+      errs() << "error: no instruction printer for target " << TripleName << '\n';
+      return;
+    }
+
+    StringRef Bytes = i->getContents();
+    StringRefMemoryObject memoryObject(Bytes);
+    uint64_t Size;
+    uint64_t Index;
+
+    for (Index = 0; Index < Bytes.size(); Index += Size) {
+      MCInst Inst;
+
+#     ifndef NDEBUG
+      raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
+#     else
+      raw_ostream &DebugOut = nulls();
+#     endif
+
+      if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, DebugOut)) {
+        outs() << format("%8x:\t", i->getAddress() + Index);
+        DumpBytes(StringRef(Bytes.data() + Index, Size));
+        IP->printInst(&Inst, outs());
+        outs() << "\n";
+      } else {
+        errs() << ToolName << ": warning: invalid instruction encoding\n";
+        if (Size == 0)
+          Size = 1; // skip illegible bytes
+      }
+    }
+  }
+}
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  // Initialize targets and assembly printers/parsers.
+  llvm::InitializeAllTargetInfos();
+  // FIXME: We shouldn't need to initialize the Target(Machine)s.
+  llvm::InitializeAllTargets();
+  llvm::InitializeAllAsmPrinters();
+  llvm::InitializeAllAsmParsers();
+  llvm::InitializeAllDisassemblers();
+
+  cl::ParseCommandLineOptions(argc, argv, "llvm object file dumper\n");
+  TripleName = Triple::normalize(TripleName);
+
+  ToolName = argv[0];
+
+  // Defaults to a.out if no filenames specified.
+  if (InputFilenames.size() == 0)
+    InputFilenames.push_back("a.out");
+
+  // -d is the only flag that is currently implemented, so just print help if
+  // it is not set.
+  if (!Disassemble) {
+    cl::PrintHelpMessage();
+    return 2;
+  }
+
+  std::for_each(InputFilenames.begin(), InputFilenames.end(),
+                DisassembleInput);
+
+  return 0;
+}
diff --git a/final/tools/llvm-prof/CMakeLists.txt b/final/tools/llvm-prof/CMakeLists.txt
new file mode 100644
index 00000000000..442112bc88e
--- /dev/null
+++ b/final/tools/llvm-prof/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS bitreader analysis)
+
+add_llvm_tool(llvm-prof
+  llvm-prof.cpp
+  )
diff --git a/final/tools/llvm-prof/Makefile b/final/tools/llvm-prof/Makefile
new file mode 100644
index 00000000000..86eb54d51bf
--- /dev/null
+++ b/final/tools/llvm-prof/Makefile
@@ -0,0 +1,17 @@
+##===- tools/llvm-prof/Makefile ----------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+
+TOOLNAME = llvm-prof
+LINK_COMPONENTS = bitreader analysis
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvm-prof/llvm-prof.cpp b/final/tools/llvm-prof/llvm-prof.cpp
new file mode 100644
index 00000000000..9d0b46833be
--- /dev/null
+++ b/final/tools/llvm-prof/llvm-prof.cpp
@@ -0,0 +1,293 @@
+//===- llvm-prof.cpp - Read in and process llvmprof.out data files --------===//
+//
+//                      The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tools is meant for use with the various LLVM profiling instrumentation
+// passes.  It reads in the data file produced by executing an instrumented
+// program, and outputs a nice report.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InstrTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Assembly/AssemblyAnnotationWriter.h"
+#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/Analysis/ProfileInfoLoader.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
+#include <algorithm>
+#include <iomanip>
+#include <map>
+#include <set>
+
+using namespace llvm;
+
+namespace {
+  cl::opt<std::string>
+  BitcodeFile(cl::Positional, cl::desc("<program bitcode file>"),
+              cl::Required);
+
+  cl::opt<std::string>
+  ProfileDataFile(cl::Positional, cl::desc("<llvmprof.out file>"),
+                  cl::Optional, cl::init("llvmprof.out"));
+
+  cl::opt<bool>
+  PrintAnnotatedLLVM("annotated-llvm",
+                     cl::desc("Print LLVM code with frequency annotations"));
+  cl::alias PrintAnnotated2("A", cl::desc("Alias for --annotated-llvm"),
+                            cl::aliasopt(PrintAnnotatedLLVM));
+  cl::opt<bool>
+  PrintAllCode("print-all-code",
+               cl::desc("Print annotated code for the entire program"));
+}
+
+// PairSecondSort - A sorting predicate to sort by the second element of a pair.
+template<class T>
+struct PairSecondSortReverse
+  : public std::binary_function<std::pair<T, double>,
+                                std::pair<T, double>, bool> {
+  bool operator()(const std::pair<T, double> &LHS,
+                  const std::pair<T, double> &RHS) const {
+    return LHS.second > RHS.second;
+  }
+};
+
+static double ignoreMissing(double w) {
+  if (w == ProfileInfo::MissingValue) return 0;
+  return w;
+}
+
+namespace {
+  class ProfileAnnotator : public AssemblyAnnotationWriter {
+    ProfileInfo &PI;
+  public:
+    ProfileAnnotator(ProfileInfo &pi) : PI(pi) {}
+
+    virtual void emitFunctionAnnot(const Function *F,
+                                   formatted_raw_ostream &OS) {
+      double w = PI.getExecutionCount(F);
+      if (w != ProfileInfo::MissingValue) {
+        OS << ";;; %" << F->getName() << " called "<<(unsigned)w
+           <<" times.\n;;;\n";
+      }
+    }
+    virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
+                                          formatted_raw_ostream &OS) {
+      double w = PI.getExecutionCount(BB);
+      if (w != ProfileInfo::MissingValue) {
+        if (w != 0) {
+          OS << "\t;;; Basic block executed " << (unsigned)w << " times.\n";
+        } else {
+          OS << "\t;;; Never executed!\n";
+        }
+      }
+    }
+
+    virtual void emitBasicBlockEndAnnot(const BasicBlock *BB,
+                                        formatted_raw_ostream &OS) {
+      // Figure out how many times each successor executed.
+      std::vector<std::pair<ProfileInfo::Edge, double> > SuccCounts;
+
+      const TerminatorInst *TI = BB->getTerminator();
+      for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) {
+        BasicBlock* Succ = TI->getSuccessor(s);
+        double w = ignoreMissing(PI.getEdgeWeight(std::make_pair(BB, Succ)));
+        if (w != 0)
+          SuccCounts.push_back(std::make_pair(std::make_pair(BB, Succ), w));
+      }
+      if (!SuccCounts.empty()) {
+        OS << "\t;;; Out-edge counts:";
+        for (unsigned i = 0, e = SuccCounts.size(); i != e; ++i)
+          OS << " [" << (SuccCounts[i]).second << " -> "
+             << (SuccCounts[i]).first.second->getName() << "]";
+        OS << "\n";
+      }
+    }
+  };
+}
+
+namespace {
+  /// ProfileInfoPrinterPass - Helper pass to dump the profile information for
+  /// a module.
+  //
+  // FIXME: This should move elsewhere.
+  class ProfileInfoPrinterPass : public ModulePass {
+    ProfileInfoLoader &PIL;
+  public:
+    static char ID; // Class identification, replacement for typeinfo.
+    explicit ProfileInfoPrinterPass(ProfileInfoLoader &_PIL) 
+      : ModulePass(ID), PIL(_PIL) {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<ProfileInfo>();
+    }
+
+    bool runOnModule(Module &M);
+  };
+}
+
+char ProfileInfoPrinterPass::ID = 0;
+
+bool ProfileInfoPrinterPass::runOnModule(Module &M) {
+  ProfileInfo &PI = getAnalysis<ProfileInfo>();
+  std::map<const Function  *, unsigned> FuncFreqs;
+  std::map<const BasicBlock*, unsigned> BlockFreqs;
+  std::map<ProfileInfo::Edge, unsigned> EdgeFreqs;
+
+  // Output a report. Eventually, there will be multiple reports selectable on
+  // the command line, for now, just keep things simple.
+
+  // Emit the most frequent function table...
+  std::vector<std::pair<Function*, double> > FunctionCounts;
+  std::vector<std::pair<BasicBlock*, double> > Counts;
+  for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) {
+    if (FI->isDeclaration()) continue;
+    double w = ignoreMissing(PI.getExecutionCount(FI));
+    FunctionCounts.push_back(std::make_pair(FI, w));
+    for (Function::iterator BB = FI->begin(), BBE = FI->end(); 
+         BB != BBE; ++BB) {
+      double w = ignoreMissing(PI.getExecutionCount(BB));
+      Counts.push_back(std::make_pair(BB, w));
+    }
+  }
+
+  // Sort by the frequency, backwards.
+  sort(FunctionCounts.begin(), FunctionCounts.end(),
+            PairSecondSortReverse<Function*>());
+
+  double TotalExecutions = 0;
+  for (unsigned i = 0, e = FunctionCounts.size(); i != e; ++i)
+    TotalExecutions += FunctionCounts[i].second;
+
+  outs() << "===" << std::string(73, '-') << "===\n"
+         << "LLVM profiling output for execution";
+  if (PIL.getNumExecutions() != 1) outs() << "s";
+  outs() << ":\n";
+
+  for (unsigned i = 0, e = PIL.getNumExecutions(); i != e; ++i) {
+    outs() << "  ";
+    if (e != 1) outs() << i+1 << ". ";
+    outs() << PIL.getExecution(i) << "\n";
+  }
+
+  outs() << "\n===" << std::string(73, '-') << "===\n";
+  outs() << "Function execution frequencies:\n\n";
+
+  // Print out the function frequencies...
+  outs() << " ##   Frequency\n";
+  for (unsigned i = 0, e = FunctionCounts.size(); i != e; ++i) {
+    if (FunctionCounts[i].second == 0) {
+      outs() << "\n  NOTE: " << e-i << " function" 
+        << (e-i-1 ? "s were" : " was") << " never executed!\n";
+      break;
+    }
+
+    outs() << format("%3d", i+1) << ". "
+      << format("%5.2g", FunctionCounts[i].second) << "/"
+      << format("%g", TotalExecutions) << " "
+      << FunctionCounts[i].first->getNameStr() << "\n";
+  }
+
+  std::set<Function*> FunctionsToPrint;
+
+  TotalExecutions = 0;
+  for (unsigned i = 0, e = Counts.size(); i != e; ++i)
+    TotalExecutions += Counts[i].second;
+  
+  // Sort by the frequency, backwards.
+  sort(Counts.begin(), Counts.end(),
+       PairSecondSortReverse<BasicBlock*>());
+  
+  outs() << "\n===" << std::string(73, '-') << "===\n";
+  outs() << "Top 20 most frequently executed basic blocks:\n\n";
+  
+  // Print out the function frequencies...
+  outs() <<" ##      %% \tFrequency\n";
+  unsigned BlocksToPrint = Counts.size();
+  if (BlocksToPrint > 20) BlocksToPrint = 20;
+  for (unsigned i = 0; i != BlocksToPrint; ++i) {
+    if (Counts[i].second == 0) break;
+    Function *F = Counts[i].first->getParent();
+    outs() << format("%3d", i+1) << ". " 
+      << format("%5g", Counts[i].second/(double)TotalExecutions*100) << "% "
+      << format("%5.0f", Counts[i].second) << "/"
+      << format("%g", TotalExecutions) << "\t"
+      << F->getNameStr() << "() - "
+       << Counts[i].first->getNameStr() << "\n";
+    FunctionsToPrint.insert(F);
+  }
+
+  if (PrintAnnotatedLLVM || PrintAllCode) {
+    outs() << "\n===" << std::string(73, '-') << "===\n";
+    outs() << "Annotated LLVM code for the module:\n\n";
+  
+    ProfileAnnotator PA(PI);
+
+    if (FunctionsToPrint.empty() || PrintAllCode)
+      M.print(outs(), &PA);
+    else
+      // Print just a subset of the functions.
+      for (std::set<Function*>::iterator I = FunctionsToPrint.begin(),
+             E = FunctionsToPrint.end(); I != E; ++I)
+        (*I)->print(outs(), &PA);
+  }
+
+  return false;
+}
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+
+  LLVMContext &Context = getGlobalContext();
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+  
+  cl::ParseCommandLineOptions(argc, argv, "llvm profile dump decoder\n");
+
+  // Read in the bitcode file...
+  std::string ErrorMessage;
+  OwningPtr<MemoryBuffer> Buffer;
+  error_code ec;
+  Module *M = 0;
+  if (!(ec = MemoryBuffer::getFileOrSTDIN(BitcodeFile, Buffer))) {
+    M = ParseBitcodeFile(Buffer.get(), Context, &ErrorMessage);
+  } else
+    ErrorMessage = ec.message();
+  if (M == 0) {
+    errs() << argv[0] << ": " << BitcodeFile << ": "
+      << ErrorMessage << "\n";
+    return 1;
+  }
+
+  // Read the profiling information. This is redundant since we load it again
+  // using the standard profile info provider pass, but for now this gives us
+  // access to additional information not exposed via the ProfileInfo
+  // interface.
+  ProfileInfoLoader PIL(argv[0], ProfileDataFile, *M);
+
+  // Run the printer pass.
+  PassManager PassMgr;
+  PassMgr.add(createProfileLoaderPass(ProfileDataFile));
+  PassMgr.add(new ProfileInfoPrinterPass(PIL));
+  PassMgr.run(*M);
+
+  return 0;
+}
diff --git a/final/tools/llvm-ranlib/CMakeLists.txt b/final/tools/llvm-ranlib/CMakeLists.txt
new file mode 100644
index 00000000000..3116d2e4ff7
--- /dev/null
+++ b/final/tools/llvm-ranlib/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_LINK_COMPONENTS archive)
+set(LLVM_REQUIRES_EH 1)
+
+add_llvm_tool(llvm-ranlib
+  llvm-ranlib.cpp
+  )
diff --git a/final/tools/llvm-ranlib/Makefile b/final/tools/llvm-ranlib/Makefile
new file mode 100644
index 00000000000..46a10e64466
--- /dev/null
+++ b/final/tools/llvm-ranlib/Makefile
@@ -0,0 +1,18 @@
+##===- tools/llvm-ranlib/Makefile --------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = llvm-ranlib
+LINK_COMPONENTS = archive
+REQUIRES_EH := 1
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvm-ranlib/llvm-ranlib.cpp b/final/tools/llvm-ranlib/llvm-ranlib.cpp
new file mode 100644
index 00000000000..64f795f7f63
--- /dev/null
+++ b/final/tools/llvm-ranlib/llvm-ranlib.cpp
@@ -0,0 +1,101 @@
+//===-- llvm-ranlib.cpp - LLVM archive index generator --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Adds or updates an index (symbol table) for an LLVM archive file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Bitcode/Archive.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
+#include <memory>
+using namespace llvm;
+
+// llvm-ar operation code and modifier flags
+static cl::opt<std::string>
+ArchiveName(cl::Positional, cl::Optional, cl::desc("<archive-file>"));
+
+static cl::opt<bool>
+Verbose("verbose",cl::Optional,cl::init(false),
+        cl::desc("Print the symbol table"));
+
+// printSymbolTable - print out the archive's symbol table.
+void printSymbolTable(Archive* TheArchive) {
+  outs() << "\nArchive Symbol Table:\n";
+  const Archive::SymTabType& symtab = TheArchive->getSymbolTable();
+  for (Archive::SymTabType::const_iterator I=symtab.begin(), E=symtab.end();
+       I != E; ++I ) {
+    unsigned offset = TheArchive->getFirstFileOffset() + I->second;
+    outs() << " " << format("%9u", offset) << "\t" << I->first <<"\n";
+  }
+}
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  llvm::sys::PrintStackTraceOnErrorSignal();
+  llvm::PrettyStackTraceProgram X(argc, argv);
+
+  LLVMContext &Context = getGlobalContext();
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  // Have the command line options parsed and handle things
+  // like --help and --version.
+  cl::ParseCommandLineOptions(argc, argv,
+    "LLVM Archive Index Generator (llvm-ranlib)\n\n"
+    "  This program adds or updates an index of bitcode symbols\n"
+    "  to an LLVM archive file."
+  );
+
+  int exitCode = 0;
+
+  // Make sure we don't exit with "unhandled exception".
+  try {
+
+    // Check the path name of the archive
+    sys::Path ArchivePath;
+    if (!ArchivePath.set(ArchiveName))
+      throw std::string("Archive name invalid: ") + ArchiveName;
+
+    // Make sure it exists, we don't create empty archives
+    bool Exists;
+    if (llvm::sys::fs::exists(ArchivePath.str(), Exists) || !Exists)
+      throw std::string("Archive file does not exist");
+
+    std::string err_msg;
+    std::auto_ptr<Archive>
+      AutoArchive(Archive::OpenAndLoad(ArchivePath, Context, &err_msg));
+    Archive* TheArchive = AutoArchive.get();
+    if (!TheArchive)
+      throw err_msg;
+
+    if (TheArchive->writeToDisk(true, false, false, &err_msg ))
+      throw err_msg;
+
+    if (Verbose)
+      printSymbolTable(TheArchive);
+
+  } catch (const char* msg) {
+    errs() << argv[0] << ": " << msg << "\n\n";
+    exitCode = 1;
+  } catch (const std::string& msg) {
+    errs() << argv[0] << ": " << msg << "\n";
+    exitCode = 2;
+  } catch (...) {
+    errs() << argv[0] << ": An unexpected unknown exception occurred.\n";
+    exitCode = 3;
+  }
+  return exitCode;
+}
diff --git a/final/tools/llvm-shlib/Makefile b/final/tools/llvm-shlib/Makefile
new file mode 100644
index 00000000000..9e6facab702
--- /dev/null
+++ b/final/tools/llvm-shlib/Makefile
@@ -0,0 +1,120 @@
+##===- tools/shlib/Makefile --------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+
+LIBRARYNAME = LLVM-$(LLVMVersion)
+
+NO_BUILD_ARCHIVE = 1
+LINK_LIBS_IN_SHARED = 1
+SHARED_LIBRARY = 1
+
+include $(LEVEL)/Makefile.config
+
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+  EXPORTED_SYMBOL_FILE = $(ObjDir)/$(LIBRARYNAME).exports
+
+  ifeq (1,$(ENABLE_EMBED_STDCXX))
+    # It is needed to force static-stdc++.a linked.
+    SHLIB_FRAG_NAMES += stdc++.a.o
+  endif
+
+endif
+
+include $(LEVEL)/Makefile.common
+
+# Include all archives in libLLVM.(so|dylib) except the ones that have
+# their own dynamic libraries.
+Archives := $(wildcard $(LibDir)/libLLVM*.a)
+SharedLibraries := $(wildcard $(LibDir)/libLLVM*$(SHLIBEXT))
+IncludeInLibLlvm := $(filter-out $(basename $(SharedLibraries)).a, $(Archives))
+LLVMLibsOptions := $(IncludeInLibLlvm:$(LibDir)/lib%.a=-l%)
+LLVMLibsPaths   := $(IncludeInLibLlvm)
+
+$(LibName.SO): $(LLVMLibsPaths)
+
+ifeq ($(HOST_OS),Darwin)
+    # set dylib internal version number to llvmCore submission number
+    ifdef LLVM_SUBMIT_VERSION
+        LLVMLibsOptions := $(LLVMLibsOptions) -Wl,-current_version \
+                        -Wl,$(LLVM_SUBMIT_VERSION).$(LLVM_SUBMIT_SUBVERSION) \
+                        -Wl,-compatibility_version -Wl,1
+    endif
+    # Include everything from the .a's into the shared library.
+    LLVMLibsOptions    := $(LLVMLibsOptions) -all_load
+    # extra options to override libtool defaults 
+    LLVMLibsOptions    := $(LLVMLibsOptions)  \
+                         -Wl,-dead_strip \
+                         -Wl,-seg1addr -Wl,0xE0000000 
+
+    # Mac OS X 10.4 and earlier tools do not allow a second -install_name on command line
+    DARWIN_VERS := $(shell echo $(TARGET_TRIPLE) | sed 's/.*darwin\([0-9]*\).*/\1/')
+    ifneq ($(DARWIN_VERS),8)
+       LLVMLibsOptions    := $(LLVMLibsOptions)  \
+                            -Wl,-install_name \
+                            -Wl,"@executable_path/../lib/lib$(LIBRARYNAME)$(SHLIBEXT)"
+    endif
+endif
+
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Linux FreeBSD OpenBSD))
+    # Include everything from the .a's into the shared library.
+    LLVMLibsOptions := -Wl,--whole-archive $(LLVMLibsOptions) \
+                       -Wl,--no-whole-archive
+endif
+
+ifeq ($(HOST_OS),Linux)
+    # Don't allow unresolved symbols.
+    LLVMLibsOptions += -Wl,--no-undefined
+endif
+
+ifeq ($(HOST_OS),SunOS)
+    # add -z allextract ahead of other libraries on Solaris
+    LLVMLibsOptions := -Wl,-z -Wl,allextract $(LLVMLibsOptions)
+endif
+
+ifeq ($(HOST_OS), $(filter $(HOST_OS), Cygwin MingW))
+
+SHLIB_STUBS := $(addprefix $(ObjDir)/, $(SHLIB_FRAG_NAMES))
+SHLIB_FRAGS := $(patsubst %.a.o, $(ObjDir)/%.syms.txt, $(LIBRARYNAME).a.o $(SHLIB_FRAG_NAMES))
+LLVMLibsOptions := $(SHLIB_STUBS) $(LLVMLibsOptions)
+
+$(LibName.SO): $(SHLIB_STUBS)
+
+%.syms.txt: %.a.o
+	$(Echo) Collecting global symbols of $(notdir $*)
+	$(Verb) $(NM_PATH) -g $< > $@
+
+$(ObjDir)/$(LIBRARYNAME).exports: $(SHLIB_FRAGS) $(ObjDir)/.dir
+	$(Echo) Generating exports for $(LIBRARYNAME)
+	$(Verb) ($(SED) -n \
+			-e "s/^.* T _\([^.][^.]*\)$$/\1/p" \
+			-e "s/^.* [BDR] _\([^.][^.]*\)$$/\1 DATA/p" \
+			$(SHLIB_FRAGS) \
+		 | sort -u) > $@
+
+$(ObjDir)/$(LIBRARYNAME).a.o: $(LLVMLibsPaths) $(ObjDir)/.dir
+	$(Echo) Linking all LLVMLibs together for $(LIBRARYNAME)
+	$(Verb) $(Link) -nostartfiles -Wl,-r -nodefaultlibs -o $@ \
+			-Wl,--whole-archive $(LLVMLibsPaths) \
+			-Wl,--no-whole-archive
+
+$(ObjDir)/stdc++.a.o: $(ObjDir)/.dir
+	$(Echo) Linking all libs together for static libstdc++.a
+	$(Verb) $(Link) -nostartfiles -Wl,-r -nodefaultlibs -o $@ \
+			-Wl,--whole-archive -lstdc++ \
+			-Wl,--no-whole-archive
+# FIXME: workaround to invalidate -lstdc++
+	$(Echo) Making dummy -lstdc++ to lib
+	$(Verb) $(AR) rc $(ToolDir)/libstdc++.dll.a
+# FIXME: Is install-local needed?
+
+clean-local::
+	$(Verb) $(RM) -f $(ToolDir)/libstdc++.dll.a
+
+endif
diff --git a/final/tools/llvm-stub/CMakeLists.txt b/final/tools/llvm-stub/CMakeLists.txt
new file mode 100644
index 00000000000..a98dc9ed49a
--- /dev/null
+++ b/final/tools/llvm-stub/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_tool(llvm-stub
+  llvm-stub.c
+  )
diff --git a/final/tools/llvm-stub/Makefile b/final/tools/llvm-stub/Makefile
new file mode 100644
index 00000000000..7ffe14976bb
--- /dev/null
+++ b/final/tools/llvm-stub/Makefile
@@ -0,0 +1,13 @@
+##===- tools/llvm-stub/Makefile ----------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = llvm-stub
+include $(LEVEL)/Makefile.common
+
diff --git a/final/tools/llvm-stub/llvm-stub.c b/final/tools/llvm-stub/llvm-stub.c
new file mode 100644
index 00000000000..31c2d09c6b7
--- /dev/null
+++ b/final/tools/llvm-stub/llvm-stub.c
@@ -0,0 +1,77 @@
+/*===- llvm-stub.c - Stub executable to run llvm bitcode files ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tool is used by the gccld program to enable transparent execution of
+// bitcode files by the user.  Specifically, gccld outputs two files when asked
+// to compile a <program> file:
+//    1. It outputs the LLVM bitcode file to <program>.bc
+//    2. It outputs a stub executable that runs lli on <program>.bc
+//
+// This allows the end user to just say ./<program> and have the JIT executed
+// automatically.  On unix, the stub executable emitted is actually a bourne
+// shell script that does the forwarding.  Windows does not like #!/bin/sh
+// programs in .exe files, so we make it an actual program, defined here.
+//
+//===----------------------------------------------------------------------===*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "llvm/Config/config.h"
+
+#if defined(HAVE_UNISTD_H) && !defined(_MSC_VER)
+#include <unistd.h>
+#endif
+
+#ifdef _WIN32
+#include <process.h>
+#include <io.h>
+#endif
+
+int main(int argc, char** argv) {
+  const char *Interp = getenv("LLVMINTERP");
+  const char **Args;
+  if (Interp == 0) Interp = "lli";
+
+  /* Set up the command line options to pass to the JIT. */
+  Args = (const char**)malloc(sizeof(char*) * (argc+2));
+  /* argv[0] is the JIT */
+  Args[0] = Interp;
+
+#ifdef LLVM_ON_WIN32
+  {
+    int len = strlen(argv[0]);
+    if (len < 4 || strcmp(argv[0] + len - 4, ".exe") != 0) {
+      /* .exe suffix is stripped off of argv[0] if the executable was run on the
+       * command line without one. Put it back on.
+       */
+      argv[0] = strcat(strcpy((char*)malloc(len + 5), argv[0]), ".exe");
+    }
+  }
+#endif
+
+  /* argv[1] is argv[0] + ".bc". */
+  Args[1] = strcat(strcpy((char*)malloc(strlen(argv[0])+4), argv[0]), ".bc");
+
+  /* The rest of the args are as before. */
+  memcpy((char **)Args+2, argv+1, sizeof(char*)*argc);
+
+  /* Run the JIT. */
+#ifndef _WIN32
+  execvp(Interp, (char **)Args); /* POSIX execvp takes a char *const[]. */
+#else
+  execvp(Interp, Args); /* windows execvp takes a const char *const *. */
+#endif
+  /* if _execv returns, the JIT could not be started. */
+  fprintf(stderr, "Could not execute the LLVM JIT.  Either add 'lli' to your"
+          " path, or set the\ninterpreter you want to use in the LLVMINTERP "
+          "environment variable.\n");
+  return 1;
+}
diff --git a/final/tools/llvmc/CMakeLists.txt b/final/tools/llvmc/CMakeLists.txt
new file mode 100644
index 00000000000..10ad5d82009
--- /dev/null
+++ b/final/tools/llvmc/CMakeLists.txt
@@ -0,0 +1,4 @@
+# add_subdirectory(src)
+
+# TODO: support plugins and user-configured builds.
+# See ./doc/LLVMC-Reference.rst "Customizing LLVMC: the compilation graph"
diff --git a/final/tools/llvmc/Makefile b/final/tools/llvmc/Makefile
new file mode 100644
index 00000000000..7c03e2a74f7
--- /dev/null
+++ b/final/tools/llvmc/Makefile
@@ -0,0 +1,18 @@
+##===- tools/llvmc/Makefile --------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open
+# Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+
+DIRS = src
+
+ifeq ($(BUILD_EXAMPLES),1)
+  OPTIONAL_DIRS += examples
+endif
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvmc/doc/LLVMC-Reference.rst b/final/tools/llvmc/doc/LLVMC-Reference.rst
new file mode 100644
index 00000000000..ec9098b90cd
--- /dev/null
+++ b/final/tools/llvmc/doc/LLVMC-Reference.rst
@@ -0,0 +1,842 @@
+===================================
+Customizing LLVMC: Reference Manual
+===================================
+..
+   This file was automatically generated by rst2html.
+   Please do not edit directly!
+   The ReST source lives in the directory 'tools/llvmc/doc'.
+
+.. contents::
+
+.. raw:: html
+
+   <div class="doc_author">
+   <p>Written by <a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a></p>
+   </div>
+
+Introduction
+============
+
+LLVMC is a generic compiler driver, designed to be customizable and
+extensible. It plays the same role for LLVM as the ``gcc`` program
+does for GCC - LLVMC's job is essentially to transform a set of input
+files into a set of targets depending on configuration rules and user
+options. What makes LLVMC different is that these transformation rules
+are completely customizable - in fact, LLVMC knows nothing about the
+specifics of transformation (even the command-line options are mostly
+not hard-coded) and regards the transformation structure as an
+abstract graph. The structure of this graph is completely determined
+by plugins, which can be either statically or dynamically linked. This
+makes it possible to easily adapt LLVMC for other purposes - for
+example, as a build tool for game resources.
+
+Because LLVMC employs TableGen_ as its configuration language, you
+need to be familiar with it to customize LLVMC.
+
+.. _TableGen: http://llvm.org/docs/TableGenFundamentals.html
+
+
+Compiling with LLVMC
+====================
+
+LLVMC tries hard to be as compatible with ``gcc`` as possible,
+although there are some small differences. Most of the time, however,
+you shouldn't be able to notice them::
+
+     $ # This works as expected:
+     $ llvmc -O3 -Wall hello.cpp
+     $ ./a.out
+     hello
+
+One nice feature of LLVMC is that one doesn't have to distinguish between
+different compilers for different languages (think ``g++`` vs.  ``gcc``) - the
+right toolchain is chosen automatically based on input language names (which
+are, in turn, determined from file extensions). If you want to force files
+ending with ".c" to compile as C++, use the ``-x`` option, just like you would
+do it with ``gcc``::
+
+      $ # hello.c is really a C++ file
+      $ llvmc -x c++ hello.c
+      $ ./a.out
+      hello
+
+On the other hand, when using LLVMC as a linker to combine several C++
+object files you should provide the ``--linker`` option since it's
+impossible for LLVMC to choose the right linker in that case::
+
+    $ llvmc -c hello.cpp
+    $ llvmc hello.o
+    [A lot of link-time errors skipped]
+    $ llvmc --linker=c++ hello.o
+    $ ./a.out
+    hello
+
+By default, LLVMC uses ``llvm-gcc`` to compile the source code. It is also
+possible to choose the ``clang`` compiler with the ``-clang`` option.
+
+
+Predefined options
+==================
+
+LLVMC has some built-in options that can't be overridden in the
+configuration libraries:
+
+* ``-o FILE`` - Output file name.
+
+* ``-x LANGUAGE`` - Specify the language of the following input files
+  until the next -x option.
+
+* ``-load PLUGIN_NAME`` - Load the specified plugin DLL. Example:
+  ``-load $LLVM_DIR/Release/lib/LLVMCSimple.so``.
+
+* ``-v`` - Enable verbose mode, i.e. print out all executed commands.
+
+* ``--save-temps`` - Write temporary files to the current directory and do not
+  delete them on exit. This option can also take an argument: the
+  ``--save-temps=obj`` switch will write files into the directory specified with
+  the ``-o`` option. The ``--save-temps=cwd`` and ``--save-temps`` switches are
+  both synonyms for the default behaviour.
+
+* ``--temp-dir DIRECTORY`` - Store temporary files in the given directory. This
+  directory is deleted on exit unless ``--save-temps`` is specified. If
+  ``--save-temps=obj`` is also specified, ``--temp-dir`` is given the
+  precedence.
+
+* ``--check-graph`` - Check the compilation for common errors like mismatched
+  output/input language names, multiple default edges and cycles. Because of
+  plugins, these checks can't be performed at compile-time. Exit with code zero
+  if no errors were found, and return the number of found errors
+  otherwise. Hidden option, useful for debugging LLVMC plugins.
+
+* ``--view-graph`` - Show a graphical representation of the compilation graph
+  and exit. Requires that you have ``dot`` and ``gv`` programs installed. Hidden
+  option, useful for debugging LLVMC plugins.
+
+* ``--write-graph`` - Write a ``compilation-graph.dot`` file in the current
+  directory with the compilation graph description in Graphviz format (identical
+  to the file used by the ``--view-graph`` option). The ``-o`` option can be
+  used to set the output file name. Hidden option, useful for debugging LLVMC
+  plugins.
+
+* ``--help``, ``--help-hidden``, ``--version`` - These options have
+  their standard meaning.
+
+Compiling LLVMC plugins
+=======================
+
+It's easiest to start working on your own LLVMC plugin by copying the
+skeleton project which lives under ``$LLVMC_DIR/plugins/Simple``::
+
+   $ cd $LLVMC_DIR/plugins
+   $ cp -r Simple MyPlugin
+   $ cd MyPlugin
+   $ ls
+   Makefile PluginMain.cpp Simple.td
+
+As you can see, our basic plugin consists of only two files (not
+counting the build script). ``Simple.td`` contains TableGen
+description of the compilation graph; its format is documented in the
+following sections. ``PluginMain.cpp`` is just a helper file used to
+compile the auto-generated C++ code produced from TableGen source. It
+can also contain hook definitions (see `below`__).
+
+__ hooks_
+
+The first thing that you should do is to change the ``LLVMC_PLUGIN``
+variable in the ``Makefile`` to avoid conflicts (since this variable
+is used to name the resulting library)::
+
+   LLVMC_PLUGIN=MyPlugin
+
+It is also a good idea to rename ``Simple.td`` to something less
+generic::
+
+   $ mv Simple.td MyPlugin.td
+
+To build your plugin as a dynamic library, just ``cd`` to its source
+directory and run ``make``. The resulting file will be called
+``plugin_llvmc_$(LLVMC_PLUGIN).$(DLL_EXTENSION)`` (in our case,
+``plugin_llvmc_MyPlugin.so``). This library can be then loaded in with the
+``-load`` option. Example::
+
+    $ cd $LLVMC_DIR/plugins/Simple
+    $ make
+    $ llvmc -load $LLVM_DIR/Release/lib/plugin_llvmc_Simple.so
+
+Compiling standalone LLVMC-based drivers
+========================================
+
+By default, the ``llvmc`` executable consists of a driver core plus several
+statically linked plugins (``Base`` and ``Clang`` at the moment). You can
+produce a standalone LLVMC-based driver executable by linking the core with your
+own plugins. The recommended way to do this is by starting with the provided
+``Skeleton`` example (``$LLVMC_DIR/example/Skeleton``)::
+
+    $ cd $LLVMC_DIR/example/
+    $ cp -r Skeleton mydriver
+    $ cd mydriver
+    $ vim Makefile
+    [...]
+    $ make
+
+If you're compiling LLVM with different source and object directories, then you
+must perform the following additional steps before running ``make``::
+
+    # LLVMC_SRC_DIR = $LLVM_SRC_DIR/tools/llvmc/
+    # LLVMC_OBJ_DIR = $LLVM_OBJ_DIR/tools/llvmc/
+    $ cp $LLVMC_SRC_DIR/example/mydriver/Makefile \
+      $LLVMC_OBJ_DIR/example/mydriver/
+    $ cd $LLVMC_OBJ_DIR/example/mydriver
+    $ make
+
+Another way to do the same thing is by using the following command::
+
+    $ cd $LLVMC_DIR
+    $ make LLVMC_BUILTIN_PLUGINS=MyPlugin LLVMC_BASED_DRIVER_NAME=mydriver
+
+This works with both srcdir == objdir and srcdir != objdir, but assumes that the
+plugin source directory was placed under ``$LLVMC_DIR/plugins``.
+
+Sometimes, you will want a 'bare-bones' version of LLVMC that has no
+built-in plugins. It can be compiled with the following command::
+
+    $ cd $LLVMC_DIR
+    $ make LLVMC_BUILTIN_PLUGINS=""
+
+
+Customizing LLVMC: the compilation graph
+========================================
+
+Each TableGen configuration file should include the common
+definitions::
+
+   include "llvm/CompilerDriver/Common.td"
+
+Internally, LLVMC stores information about possible source
+transformations in form of a graph. Nodes in this graph represent
+tools, and edges between two nodes represent a transformation path. A
+special "root" node is used to mark entry points for the
+transformations. LLVMC also assigns a weight to each edge (more on
+this later) to choose between several alternative edges.
+
+The definition of the compilation graph (see file
+``plugins/Base/Base.td`` for an example) is just a list of edges::
+
+    def CompilationGraph : CompilationGraph<[
+        Edge<"root", "llvm_gcc_c">,
+        Edge<"root", "llvm_gcc_assembler">,
+        ...
+
+        Edge<"llvm_gcc_c", "llc">,
+        Edge<"llvm_gcc_cpp", "llc">,
+        ...
+
+        OptionalEdge<"llvm_gcc_c", "opt", (case (switch_on "opt"),
+                                          (inc_weight))>,
+        OptionalEdge<"llvm_gcc_cpp", "opt", (case (switch_on "opt"),
+                                                  (inc_weight))>,
+        ...
+
+        OptionalEdge<"llvm_gcc_assembler", "llvm_gcc_cpp_linker",
+            (case (input_languages_contain "c++"), (inc_weight),
+                  (or (parameter_equals "linker", "g++"),
+                      (parameter_equals "linker", "c++")), (inc_weight))>,
+        ...
+
+        ]>;
+
+As you can see, the edges can be either default or optional, where
+optional edges are differentiated by an additional ``case`` expression
+used to calculate the weight of this edge. Notice also that we refer
+to tools via their names (as strings). This makes it possible to add
+edges to an existing compilation graph in plugins without having to
+know about all tool definitions used in the graph.
+
+The default edges are assigned a weight of 1, and optional edges get a
+weight of 0 + 2*N where N is the number of tests that evaluated to
+true in the ``case`` expression. It is also possible to provide an
+integer parameter to ``inc_weight`` and ``dec_weight`` - in this case,
+the weight is increased (or decreased) by the provided value instead
+of the default 2. It is also possible to change the default weight of
+an optional edge by using the ``default`` clause of the ``case``
+construct.
+
+When passing an input file through the graph, LLVMC picks the edge
+with the maximum weight. To avoid ambiguity, there should be only one
+default edge between two nodes (with the exception of the root node,
+which gets a special treatment - there you are allowed to specify one
+default edge *per language*).
+
+When multiple plugins are loaded, their compilation graphs are merged
+together. Since multiple edges that have the same end nodes are not
+allowed (i.e. the graph is not a multigraph), an edge defined in
+several plugins will be replaced by the definition from the plugin
+that was loaded last. Plugin load order can be controlled by using the
+plugin priority feature described above.
+
+To get a visual representation of the compilation graph (useful for
+debugging), run ``llvmc --view-graph``. You will need ``dot`` and
+``gsview`` installed for this to work properly.
+
+Describing options
+==================
+
+Command-line options that the plugin supports are defined by using an
+``OptionList``::
+
+    def Options : OptionList<[
+    (switch_option "E", (help "Help string")),
+    (alias_option "quiet", "q")
+    ...
+    ]>;
+
+As you can see, the option list is just a list of DAGs, where each DAG
+is an option description consisting of the option name and some
+properties. A plugin can define more than one option list (they are
+all merged together in the end), which can be handy if one wants to
+separate option groups syntactically.
+
+* Possible option types:
+
+   - ``switch_option`` - a simple boolean switch without arguments, for example
+     ``-O2`` or ``-time``. At most one occurrence is allowed by default.
+
+   - ``parameter_option`` - option that takes one argument, for example
+     ``-std=c99``. It is also allowed to use spaces instead of the equality
+     sign: ``-std c99``. At most one occurrence is allowed.
+
+   - ``parameter_list_option`` - same as the above, but more than one option
+     occurence is allowed.
+
+   - ``prefix_option`` - same as the parameter_option, but the option name and
+     argument do not have to be separated. Example: ``-ofile``. This can be also
+     specified as ``-o file``; however, ``-o=file`` will be parsed incorrectly
+     (``=file`` will be interpreted as option value). At most one occurrence is
+     allowed.
+
+   - ``prefix_list_option`` - same as the above, but more than one occurence of
+     the option is allowed; example: ``-lm -lpthread``.
+
+   - ``alias_option`` - a special option type for creating aliases. Unlike other
+     option types, aliases are not allowed to have any properties besides the
+     aliased option name.
+     Usage example: ``(alias_option "preprocess", "E")``
+
+   - ``switch_list_option`` - like ``switch_option`` with the ``zero_or_more``
+     property, but remembers how many times the switch was turned on. Useful
+     mostly for forwarding. Example: when ``-foo`` is a switch option (with the
+     ``zero_or_more`` property), the command ``driver -foo -foo`` is forwarded
+     as ``some-tool -foo``, but when ``-foo`` is a switch list, the same command
+     is forwarded as ``some-tool -foo -foo``.
+
+
+* Possible option properties:
+
+   - ``help`` - help string associated with this option. Used for ``--help``
+     output.
+
+   - ``required`` - this option must be specified exactly once (or, in case of
+     the list options without the ``multi_val`` property, at least
+     once). Incompatible with ``optional`` and ``one_or_more``.
+
+   - ``optional`` - the option can be specified either zero times or exactly
+     once. The default for switch options. Useful only for list options in
+     conjunction with ``multi_val``. Incompatible with ``required``,
+     ``zero_or_more`` and ``one_or_more``.
+
+   - ``one_or_more`` - the option must be specified at least once. Can be useful
+     to allow switch options be both obligatory and be specified multiple
+     times. For list options is useful only in conjunction with ``multi_val``;
+     for ordinary it is synonymous with ``required``. Incompatible with
+     ``required``, ``optional`` and ``zero_or_more``.
+
+   - ``zero_or_more`` - the option can be specified zero or more times. Useful
+     to allow a single switch option to be specified more than
+     once. Incompatible with ``required``, ``optional`` and ``one_or_more``.
+
+   - ``hidden`` - the description of this option will not appear in
+     the ``--help`` output (but will appear in the ``--help-hidden``
+     output).
+
+   - ``really_hidden`` - the option will not be mentioned in any help
+     output.
+
+   - ``comma_separated`` - Indicates that any commas specified for an option's
+     value should be used to split the value up into multiple values for the
+     option. This property is valid only for list options. In conjunction with
+     ``forward_value`` can be used to implement option forwarding in style of
+     gcc's ``-Wa,``.
+
+   - ``multi_val n`` - this option takes *n* arguments (can be useful in some
+     special cases). Usage example: ``(parameter_list_option "foo", (multi_val
+     3))``; the command-line syntax is '-foo a b c'. Only list options can have
+     this attribute; you can, however, use the ``one_or_more``, ``optional``
+     and ``required`` properties.
+
+   - ``init`` - this option has a default value, either a string (if it is a
+     parameter), or a boolean (if it is a switch; as in C++, boolean constants
+     are called ``true`` and ``false``). List options can't have ``init``
+     attribute.
+     Usage examples: ``(switch_option "foo", (init true))``; ``(prefix_option
+     "bar", (init "baz"))``.
+
+   - ``extern`` - this option is defined in some other plugin, see `below`__.
+
+   __ extern_
+
+.. _extern:
+
+External options
+----------------
+
+Sometimes, when linking several plugins together, one plugin needs to
+access options defined in some other plugin. Because of the way
+options are implemented, such options must be marked as
+``extern``. This is what the ``extern`` option property is
+for. Example::
+
+     ...
+     (switch_option "E", (extern))
+     ...
+
+If an external option has additional attributes besides 'extern', they are
+ignored. See also the section on plugin `priorities`__.
+
+__ priorities_
+
+.. _case:
+
+Conditional evaluation
+======================
+
+The 'case' construct is the main means by which programmability is
+achieved in LLVMC. It can be used to calculate edge weights, program
+actions and modify the shell commands to be executed. The 'case'
+expression is designed after the similarly-named construct in
+functional languages and takes the form ``(case (test_1), statement_1,
+(test_2), statement_2, ... (test_N), statement_N)``. The statements
+are evaluated only if the corresponding tests evaluate to true.
+
+Examples::
+
+    // Edge weight calculation
+
+    // Increases edge weight by 5 if "-A" is provided on the
+    // command-line, and by 5 more if "-B" is also provided.
+    (case
+        (switch_on "A"), (inc_weight 5),
+        (switch_on "B"), (inc_weight 5))
+
+
+    // Tool command line specification
+
+    // Evaluates to "cmdline1" if the option "-A" is provided on the
+    // command line; to "cmdline2" if "-B" is provided;
+    // otherwise to "cmdline3".
+
+    (case
+        (switch_on "A"), "cmdline1",
+        (switch_on "B"), "cmdline2",
+        (default), "cmdline3")
+
+Note the slight difference in 'case' expression handling in contexts
+of edge weights and command line specification - in the second example
+the value of the ``"B"`` switch is never checked when switch ``"A"`` is
+enabled, and the whole expression always evaluates to ``"cmdline1"`` in
+that case.
+
+Case expressions can also be nested, i.e. the following is legal::
+
+    (case (switch_on "E"), (case (switch_on "o"), ..., (default), ...)
+          (default), ...)
+
+You should, however, try to avoid doing that because it hurts
+readability. It is usually better to split tool descriptions and/or
+use TableGen inheritance instead.
+
+* Possible tests are:
+
+  - ``switch_on`` - Returns true if a given command-line switch is provided by
+    the user. Can be given multiple arguments, in that case ``(switch_on "foo",
+    "bar", "baz")`` is equivalent to ``(and (switch_on "foo"), (switch_on
+    "bar"), (switch_on "baz"))``.
+    Example: ``(switch_on "opt")``.
+
+  - ``any_switch_on`` - Given a number of switch options, returns true if any of
+    the switches is turned on.
+    Example: ``(any_switch_on "foo", "bar", "baz")`` is equivalent to ``(or
+    (switch_on "foo"), (switch_on "bar"), (switch_on "baz"))``.
+
+  - ``parameter_equals`` - Returns true if a command-line parameter (first
+    argument) equals a given value (second argument).
+    Example: ``(parameter_equals "W", "all")``.
+
+  - ``element_in_list`` - Returns true if a command-line parameter list (first
+    argument) contains a given value (second argument).
+    Example: ``(element_in_list "l", "pthread")``.
+
+  - ``input_languages_contain`` - Returns true if a given language
+    belongs to the current input language set.
+    Example: ``(input_languages_contain "c++")``.
+
+  - ``in_language`` - Evaluates to true if the input file language is equal to
+    the argument. At the moment works only with ``command`` and ``actions`` (on
+    non-join nodes).
+    Example: ``(in_language "c++")``.
+
+  - ``not_empty`` - Returns true if a given option (which should be either a
+    parameter or a parameter list) is set by the user. Like ``switch_on``, can
+    be also given multiple arguments.
+    Examples: ``(not_empty "o")``, ``(not_empty "o", "l")``.
+
+  - ``any_not_empty`` - Returns true if ``not_empty`` returns true for any of
+    the provided options.
+    Example: ``(any_not_empty "foo", "bar", "baz")`` is equivalent to ``(or
+    (not_empty "foo"), (not_empty "bar"), (not_empty "baz"))``.
+
+  - ``empty`` - The opposite of ``not_empty``. Equivalent to ``(not (not_empty
+    X))``. Can be given multiple arguments.
+
+  - ``any_not_empty`` - Returns true if ``not_empty`` returns true for any of
+    the provided options.
+    Example: ``(any_empty "foo", "bar", "baz")`` is equivalent to ``(or
+    (not_empty "foo"), (not_empty "bar"), (not_empty "baz"))``.
+
+  - ``single_input_file`` - Returns true if there was only one input file
+    provided on the command-line. Used without arguments:
+    ``(single_input_file)``.
+
+  - ``multiple_input_files`` - Equivalent to ``(not (single_input_file))`` (the
+    case of zero input files is considered an error).
+
+  - ``default`` - Always evaluates to true. Should always be the last
+    test in the ``case`` expression.
+
+  - ``and`` - A standard logical combinator that returns true iff all of
+    its arguments return true. Used like this: ``(and (test1), (test2),
+    ... (testN))``. Nesting of ``and`` and ``or`` is allowed, but not
+    encouraged.
+
+  - ``or`` - A logical combinator that returns true iff any of its arguments
+    return true.
+    Example: ``(or (test1), (test2), ... (testN))``.
+
+  - ``not`` - Standard unary logical combinator that negates its
+    argument.
+    Example: ``(not (or (test1), (test2), ... (testN)))``.
+
+
+
+Writing a tool description
+==========================
+
+As was said earlier, nodes in the compilation graph represent tools,
+which are described separately. A tool definition looks like this
+(taken from the ``include/llvm/CompilerDriver/Tools.td`` file)::
+
+  def llvm_gcc_cpp : Tool<[
+      (in_language "c++"),
+      (out_language "llvm-assembler"),
+      (output_suffix "bc"),
+      (cmd_line "llvm-g++ -c $INFILE -o $OUTFILE -emit-llvm"),
+      (sink)
+      ]>;
+
+This defines a new tool called ``llvm_gcc_cpp``, which is an alias for
+``llvm-g++``. As you can see, a tool definition is just a list of
+properties; most of them should be self-explanatory. The ``sink``
+property means that this tool should be passed all command-line
+options that aren't mentioned in the option list.
+
+The complete list of all currently implemented tool properties follows.
+
+* Possible tool properties:
+
+  - ``in_language`` - input language name. Can be given multiple arguments, in
+    case the tool supports multiple input languages.
+
+  - ``out_language`` - output language name. Multiple output languages are
+    allowed.
+
+  - ``output_suffix`` - output file suffix. Can also be changed
+    dynamically, see documentation on actions.
+
+  - ``cmd_line`` - the actual command used to run the tool. You can
+    use ``$INFILE`` and ``$OUTFILE`` variables, output redirection
+    with ``>``, hook invocations (``$CALL``), environment variables
+    (via ``$ENV``) and the ``case`` construct.
+
+  - ``join`` - this tool is a "join node" in the graph, i.e. it gets a
+    list of input files and joins them together. Used for linkers.
+
+  - ``sink`` - all command-line options that are not handled by other
+    tools are passed to this tool.
+
+  - ``actions`` - A single big ``case`` expression that specifies how
+    this tool reacts on command-line options (described in more detail
+    `below`__).
+
+__ actions_
+
+.. _actions:
+
+Actions
+-------
+
+A tool often needs to react to command-line options, and this is
+precisely what the ``actions`` property is for. The next example
+illustrates this feature::
+
+  def llvm_gcc_linker : Tool<[
+      (in_language "object-code"),
+      (out_language "executable"),
+      (output_suffix "out"),
+      (cmd_line "llvm-gcc $INFILE -o $OUTFILE"),
+      (join),
+      (actions (case (not_empty "L"), (forward "L"),
+                     (not_empty "l"), (forward "l"),
+                     (not_empty "dummy"),
+                               [(append_cmd "-dummy1"), (append_cmd "-dummy2")])
+      ]>;
+
+The ``actions`` tool property is implemented on top of the omnipresent
+``case`` expression. It associates one or more different *actions*
+with given conditions - in the example, the actions are ``forward``,
+which forwards a given option unchanged, and ``append_cmd``, which
+appends a given string to the tool execution command. Multiple actions
+can be associated with a single condition by using a list of actions
+(used in the example to append some dummy options). The same ``case``
+construct can also be used in the ``cmd_line`` property to modify the
+tool command line.
+
+The "join" property used in the example means that this tool behaves
+like a linker.
+
+The list of all possible actions follows.
+
+* Possible actions:
+
+   - ``append_cmd`` - Append a string to the tool invocation command.
+     Example: ``(case (switch_on "pthread"), (append_cmd "-lpthread"))``.
+
+   - ``error`` - Exit with error.
+     Example: ``(error "Mixing -c and -S is not allowed!")``.
+
+   - ``warning`` - Print a warning.
+     Example: ``(warning "Specifying both -O1 and -O2 is meaningless!")``.
+
+   - ``forward`` - Forward the option unchanged.
+     Example: ``(forward "Wall")``.
+
+   - ``forward_as`` - Change the option's name, but forward the argument
+     unchanged.
+     Example: ``(forward_as "O0", "--disable-optimization")``.
+
+   - ``forward_value`` - Forward only option's value. Cannot be used with switch
+     options (since they don't have values), but works fine with lists.
+     Example: ``(forward_value "Wa,")``.
+
+   - ``forward_transformed_value`` - As above, but applies a hook to the
+     option's value before forwarding (see `below`__). When
+     ``forward_transformed_value`` is applied to a list
+     option, the hook must have signature
+     ``std::string hooks::HookName (const std::vector<std::string>&)``.
+     Example: ``(forward_transformed_value "m", "ConvertToMAttr")``.
+
+     __ hooks_
+
+   - ``output_suffix`` - Modify the output suffix of this tool.
+     Example: ``(output_suffix "i")``.
+
+   - ``stop_compilation`` - Stop compilation after this tool processes its
+     input. Used without arguments.
+     Example: ``(stop_compilation)``.
+
+
+Language map
+============
+
+If you are adding support for a new language to LLVMC, you'll need to
+modify the language map, which defines mappings from file extensions
+to language names. It is used to choose the proper toolchain(s) for a
+given input file set. Language map definition looks like this::
+
+    def LanguageMap : LanguageMap<
+        [LangToSuffixes<"c++", ["cc", "cp", "cxx", "cpp", "CPP", "c++", "C"]>,
+         LangToSuffixes<"c", ["c"]>,
+         ...
+        ]>;
+
+For example, without those definitions the following command wouldn't work::
+
+    $ llvmc hello.cpp
+    llvmc: Unknown suffix: cpp
+
+The language map entries are needed only for the tools that are linked from the
+root node. Since a tool can't have multiple output languages, for inner nodes of
+the graph the input and output languages should match. This is enforced at
+compile-time.
+
+Option preprocessor
+===================
+
+It is sometimes useful to run error-checking code before processing the
+compilation graph. For example, if optimization options "-O1" and "-O2" are
+implemented as switches, we might want to output a warning if the user invokes
+the driver with both of these options enabled.
+
+The ``OptionPreprocessor`` feature is reserved specially for these
+occasions. Example (adapted from the built-in Base plugin)::
+
+
+    def Preprocess : OptionPreprocessor<
+    (case (not (any_switch_on "O0", "O1", "O2", "O3")),
+               (set_option "O2"),
+          (and (switch_on "O3"), (any_switch_on "O0", "O1", "O2")),
+               (unset_option "O0", "O1", "O2"),
+          (and (switch_on "O2"), (any_switch_on "O0", "O1")),
+               (unset_option "O0", "O1"),
+          (and (switch_on "O1"), (switch_on "O0")),
+               (unset_option "O0"))
+    >;
+
+Here, ``OptionPreprocessor`` is used to unset all spurious ``-O`` options so
+that they are not forwarded to the compiler. If no optimization options are
+specified, ``-O2`` is enabled.
+
+``OptionPreprocessor`` is basically a single big ``case`` expression, which is
+evaluated only once right after the plugin is loaded. The only allowed actions
+in ``OptionPreprocessor`` are ``error``, ``warning``, and two special actions:
+``unset_option`` and ``set_option``. As their names suggest, they can be used to
+set or unset a given option. To set an option with ``set_option``, use the
+two-argument form: ``(set_option "parameter", VALUE)``. Here, ``VALUE`` can be
+either a string, a string list, or a boolean constant.
+
+For convenience, ``set_option`` and ``unset_option`` also work with multiple
+arguments. That is, instead of ``[(unset_option "A"), (unset_option "B")]`` you
+can use ``(unset_option "A", "B")``. Obviously, ``(set_option "A", "B")`` is
+only valid if both ``A`` and ``B`` are switches.
+
+
+More advanced topics
+====================
+
+.. _hooks:
+
+Hooks and environment variables
+-------------------------------
+
+Normally, LLVMC executes programs from the system ``PATH``. Sometimes,
+this is not sufficient: for example, we may want to specify tool paths
+or names in the configuration file. This can be easily achieved via
+the hooks mechanism. To write your own hooks, just add their
+definitions to the ``PluginMain.cpp`` or drop a ``.cpp`` file into the
+your plugin directory. Hooks should live in the ``hooks`` namespace
+and have the signature ``std::string hooks::MyHookName ([const char*
+Arg0 [ const char* Arg2 [, ...]]])``. They can be used from the
+``cmd_line`` tool property::
+
+    (cmd_line "$CALL(MyHook)/path/to/file -o $CALL(AnotherHook)")
+
+To pass arguments to hooks, use the following syntax::
+
+    (cmd_line "$CALL(MyHook, 'Arg1', 'Arg2', 'Arg # 3')/path/to/file -o1 -o2")
+
+It is also possible to use environment variables in the same manner::
+
+   (cmd_line "$ENV(VAR1)/path/to/file -o $ENV(VAR2)")
+
+To change the command line string based on user-provided options use
+the ``case`` expression (documented `above`__)::
+
+    (cmd_line
+      (case
+        (switch_on "E"),
+           "llvm-g++ -E -x c $INFILE -o $OUTFILE",
+        (default),
+           "llvm-g++ -c -x c $INFILE -o $OUTFILE -emit-llvm"))
+
+__ case_
+
+.. _priorities:
+
+How plugins are loaded
+----------------------
+
+It is possible for LLVMC plugins to depend on each other. For example,
+one can create edges between nodes defined in some other plugin. To
+make this work, however, that plugin should be loaded first. To
+achieve this, the concept of plugin priority was introduced. By
+default, every plugin has priority zero; to specify the priority
+explicitly, put the following line in your plugin's TableGen file::
+
+    def Priority : PluginPriority<$PRIORITY_VALUE>;
+    # Where PRIORITY_VALUE is some integer > 0
+
+Plugins are loaded in order of their (increasing) priority, starting
+with 0. Therefore, the plugin with the highest priority value will be
+loaded last.
+
+Debugging
+---------
+
+When writing LLVMC plugins, it can be useful to get a visual view of
+the resulting compilation graph. This can be achieved via the command
+line option ``--view-graph``. This command assumes that Graphviz_ and
+Ghostview_ are installed. There is also a ``--write-graph`` option that
+creates a Graphviz source file (``compilation-graph.dot``) in the
+current directory.
+
+Another useful ``llvmc`` option is ``--check-graph``. It checks the
+compilation graph for common errors like mismatched output/input
+language names, multiple default edges and cycles. These checks can't
+be performed at compile-time because the plugins can load code
+dynamically. When invoked with ``--check-graph``, ``llvmc`` doesn't
+perform any compilation tasks and returns the number of encountered
+errors as its status code.
+
+.. _Graphviz: http://www.graphviz.org/
+.. _Ghostview: http://pages.cs.wisc.edu/~ghost/
+
+Conditioning on the executable name
+-----------------------------------
+
+For now, the executable name (the value passed to the driver in ``argv[0]``) is
+accessible only in the C++ code (i.e. hooks). Use the following code::
+
+    namespace llvmc {
+    extern const char* ProgramName;
+    }
+
+    namespace hooks {
+
+    std::string MyHook() {
+    //...
+    if (strcmp(ProgramName, "mydriver") == 0) {
+       //...
+
+    }
+
+    } // end namespace hooks
+
+In general, you're encouraged not to make the behaviour dependent on the
+executable file name, and use command-line switches instead. See for example how
+the ``Base`` plugin behaves when it needs to choose the correct linker options
+(think ``g++`` vs. ``gcc``).
+
+.. raw:: html
+
+   <hr />
+   <address>
+   <a href="http://jigsaw.w3.org/css-validator/check/referer">
+   <img src="http://jigsaw.w3.org/css-validator/images/vcss-blue"
+      alt="Valid CSS" /></a>
+   <a href="http://validator.w3.org/check?uri=referer">
+   <img src="http://www.w3.org/Icons/valid-xhtml10-blue"
+      alt="Valid XHTML 1.0 Transitional"/></a>
+
+   <a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a><br />
+   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br />
+
+   Last modified: $Date: 2008-12-11 11:34:48 -0600 (Thu, 11 Dec 2008) $
+   </address>
diff --git a/final/tools/llvmc/doc/LLVMC-Tutorial.rst b/final/tools/llvmc/doc/LLVMC-Tutorial.rst
new file mode 100644
index 00000000000..e7e8f081e0f
--- /dev/null
+++ b/final/tools/llvmc/doc/LLVMC-Tutorial.rst
@@ -0,0 +1,129 @@
+======================
+Tutorial - Using LLVMC
+======================
+..
+   This file was automatically generated by rst2html.
+   Please do not edit directly!
+   The ReST source lives in the directory 'tools/llvmc/doc'.
+
+.. contents::
+
+.. raw:: html
+
+   <div class="doc_author">
+   <p>Written by <a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a></p>
+   </div>
+
+Introduction
+============
+
+LLVMC is a generic compiler driver, which plays the same role for LLVM
+as the ``gcc`` program does for GCC - the difference being that LLVMC
+is designed to be more adaptable and easier to customize. Most of
+LLVMC functionality is implemented via plugins, which can be loaded
+dynamically or compiled in. This tutorial describes the basic usage
+and configuration of LLVMC.
+
+
+Compiling with LLVMC
+====================
+
+In general, LLVMC tries to be command-line compatible with ``gcc`` as
+much as possible, so most of the familiar options work::
+
+     $ llvmc -O3 -Wall hello.cpp
+     $ ./a.out
+     hello
+
+This will invoke ``llvm-g++`` under the hood (you can see which
+commands are executed by using the ``-v`` option). For further help on
+command-line LLVMC usage, refer to the ``llvmc --help`` output.
+
+
+Using LLVMC to generate toolchain drivers
+=========================================
+
+LLVMC plugins are written mostly using TableGen_, so you need to
+be familiar with it to get anything done.
+
+.. _TableGen: http://llvm.org/docs/TableGenFundamentals.html
+
+Start by compiling ``example/Simple``, which is a primitive wrapper for
+``gcc``::
+
+    $ cd $LLVM_DIR/tools/llvmc
+    $ cp -r example/Simple plugins/Simple
+
+      # NB: A less verbose way to compile standalone LLVMC-based drivers is
+      # described in the reference manual.
+
+    $ make LLVMC_BASED_DRIVER_NAME=mygcc LLVMC_BUILTIN_PLUGINS=Simple
+    $ cat > hello.c
+    [...]
+    $ mygcc hello.c
+    $ ./hello.out
+    Hello
+
+Here we link our plugin with the LLVMC core statically to form an executable
+file called ``mygcc``. It is also possible to build our plugin as a dynamic
+library to be loaded by the ``llvmc`` executable (or any other LLVMC-based
+standalone driver); this is described in the reference manual.
+
+Contents of the file ``Simple.td`` look like this::
+
+    // Include common definitions
+    include "llvm/CompilerDriver/Common.td"
+
+    // Tool descriptions
+    def gcc : Tool<
+    [(in_language "c"),
+     (out_language "executable"),
+     (output_suffix "out"),
+     (cmd_line "gcc $INFILE -o $OUTFILE"),
+     (sink)
+    ]>;
+
+    // Language map
+    def LanguageMap : LanguageMap<[LangToSuffixes<"c", ["c"]>]>;
+
+    // Compilation graph
+    def CompilationGraph : CompilationGraph<[Edge<"root", "gcc">]>;
+
+As you can see, this file consists of three parts: tool descriptions,
+language map, and the compilation graph definition.
+
+At the heart of LLVMC is the idea of a compilation graph: vertices in
+this graph are tools, and edges represent a transformation path
+between two tools (for example, assembly source produced by the
+compiler can be transformed into executable code by an assembler). The
+compilation graph is basically a list of edges; a special node named
+``root`` is used to mark graph entry points.
+
+Tool descriptions are represented as property lists: most properties
+in the example above should be self-explanatory; the ``sink`` property
+means that all options lacking an explicit description should be
+forwarded to this tool.
+
+The ``LanguageMap`` associates a language name with a list of suffixes
+and is used for deciding which toolchain corresponds to a given input
+file.
+
+To learn more about LLVMC customization, refer to the reference
+manual and plugin source code in the ``plugins`` directory.
+
+.. raw:: html
+
+   <hr />
+   <address>
+   <a href="http://jigsaw.w3.org/css-validator/check/referer">
+   <img src="http://jigsaw.w3.org/css-validator/images/vcss-blue"
+      alt="Valid CSS" /></a>
+   <a href="http://validator.w3.org/check?uri=referer">
+   <img src="http://www.w3.org/Icons/valid-xhtml10-blue"
+      alt="Valid XHTML 1.0 Transitional"/></a>
+
+   <a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a><br />
+   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br />
+
+   Last modified: $Date: 2008-12-11 11:34:48 -0600 (Thu, 11 Dec 2008) $
+   </address>
diff --git a/final/tools/llvmc/doc/Makefile b/final/tools/llvmc/doc/Makefile
new file mode 100644
index 00000000000..ef98767e7b4
--- /dev/null
+++ b/final/tools/llvmc/doc/Makefile
@@ -0,0 +1,33 @@
+##===- tools/llvmc/doc/Makefile ----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL=../../..
+
+ifneq (,$(strip $(wildcard $(LEVEL)/Makefile.config)))
+include $(LEVEL)/Makefile.config
+else
+CP=cp
+RM=rm
+endif
+
+DOC_DIR=../../../docs
+RST2HTML=rst2html --stylesheet=llvm.css --link-stylesheet
+
+all : LLVMC-Reference.html LLVMC-Tutorial.html
+	$(CP) LLVMC-Reference.html $(DOC_DIR)/CompilerDriver.html
+	$(CP) LLVMC-Tutorial.html $(DOC_DIR)/CompilerDriverTutorial.html
+
+LLVMC-Tutorial.html : LLVMC-Tutorial.rst
+	$(RST2HTML) $< $@
+
+LLVMC-Reference.html : LLVMC-Reference.rst
+	$(RST2HTML) $< $@
+
+clean :
+	$(RM) LLVMC-Tutorial.html LLVMC-Reference.html
diff --git a/final/tools/llvmc/doc/img/lines.gif b/final/tools/llvmc/doc/img/lines.gif
new file mode 100644
index 0000000000000000000000000000000000000000..88f491edc302684624036548ce3910f32cd4514f
GIT binary patch
literal 91
zcmZ?wbhEHb6kw2JSj4~}!yWbi|Nq;!Zv#mNB%lLggVZoEYe?+6^Uvpm=jsLv^V7@!
jMO<%uq861ZHD`s_rm4RV&HHn4>)eNv)_rVZVXy`Ob}}Q<

literal 0
HcmV?d00001

diff --git a/final/tools/llvmc/examples/Hello/Hello.cpp b/final/tools/llvmc/examples/Hello/Hello.cpp
new file mode 100644
index 00000000000..71f04fdd49d
--- /dev/null
+++ b/final/tools/llvmc/examples/Hello/Hello.cpp
@@ -0,0 +1,29 @@
+//===- Hello.cpp - Example code from "Writing an LLVMC Plugin" ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Shows how to write llvmc-based drivers without using TableGen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/AutoGenerated.h"
+#include "llvm/CompilerDriver/Main.inc"
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvmc {
+namespace autogenerated {
+
+int PreprocessOptions () { return 0; }
+
+int PopulateLanguageMap (LanguageMap&) { llvm::outs() << "Hello!\n"; return 0; }
+
+int PopulateCompilationGraph (CompilationGraph&) { return 0; }
+
+}
+}
diff --git a/final/tools/llvmc/examples/Hello/Makefile b/final/tools/llvmc/examples/Hello/Makefile
new file mode 100644
index 00000000000..c281be655dd
--- /dev/null
+++ b/final/tools/llvmc/examples/Hello/Makefile
@@ -0,0 +1,14 @@
+##===- tools/llvmc/examples/Hello/Makefile -----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+
+LLVMC_BASED_DRIVER = Hello
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvmc/examples/Makefile b/final/tools/llvmc/examples/Makefile
new file mode 100644
index 00000000000..8468e931044
--- /dev/null
+++ b/final/tools/llvmc/examples/Makefile
@@ -0,0 +1,14 @@
+##===- tools/llvmc/examples/Makefile -----------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL=../../..
+
+PARALLEL_DIRS := Hello Simple mcc16 Skeleton
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvmc/examples/Simple/Makefile b/final/tools/llvmc/examples/Simple/Makefile
new file mode 100644
index 00000000000..c10387c4f55
--- /dev/null
+++ b/final/tools/llvmc/examples/Simple/Makefile
@@ -0,0 +1,15 @@
+##===- llvmc/examples/Simple/Makefile ----------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+
+LLVMC_BASED_DRIVER = Simple
+BUILT_SOURCES = Simple.inc
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvmc/examples/Simple/Simple.cpp b/final/tools/llvmc/examples/Simple/Simple.cpp
new file mode 100644
index 00000000000..8ac73130f9e
--- /dev/null
+++ b/final/tools/llvmc/examples/Simple/Simple.cpp
@@ -0,0 +1,2 @@
+#include "llvm/CompilerDriver/Main.inc"
+#include "Simple.inc"
diff --git a/final/tools/llvmc/examples/Simple/Simple.td b/final/tools/llvmc/examples/Simple/Simple.td
new file mode 100644
index 00000000000..b47483b5d38
--- /dev/null
+++ b/final/tools/llvmc/examples/Simple/Simple.td
@@ -0,0 +1,41 @@
+//===- Simple.td - A simple LLVMC-based driver ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// A simple LLVMC-based gcc wrapper.
+//
+// To compile, use this command:
+//
+//      $ cd $LLVM_OBJ_DIR/tools/llvmc
+//      $ make BUILD_EXAMPLES=1
+//
+// Run as:
+//
+//      $ $LLVM_OBJ_DIR/$(BuildMode)/bin/Simple
+//
+// For instructions on how to build your own LLVMC-based driver, see
+// the 'examples/Skeleton' directory.
+//===----------------------------------------------------------------------===//
+
+include "llvm/CompilerDriver/Common.td"
+
+def gcc : Tool<
+[(in_language "c"),
+ (out_language "executable"),
+ (output_suffix "out"),
+ (command "gcc"),
+ (sink),
+
+ // -o is what is used by default, out_file_option here is included for
+ // instructive purposes.
+ (out_file_option "-o")
+]>;
+
+def LanguageMap : LanguageMap<[(lang_to_suffixes "c", "c")]>;
+
+def CompilationGraph : CompilationGraph<[(edge "root", "gcc")]>;
diff --git a/final/tools/llvmc/examples/Skeleton/AutoGenerated.td b/final/tools/llvmc/examples/Skeleton/AutoGenerated.td
new file mode 100644
index 00000000000..97483ce9282
--- /dev/null
+++ b/final/tools/llvmc/examples/Skeleton/AutoGenerated.td
@@ -0,0 +1,7 @@
+//===- AutoGenerated.td ------------------------------------*- tablegen -*-===//
+//
+// Write the TableGen description of your llvmc-based driver here.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/CompilerDriver/Common.td"
diff --git a/final/tools/llvmc/examples/Skeleton/Hooks.cpp b/final/tools/llvmc/examples/Skeleton/Hooks.cpp
new file mode 100644
index 00000000000..ddd38f6e707
--- /dev/null
+++ b/final/tools/llvmc/examples/Skeleton/Hooks.cpp
@@ -0,0 +1,12 @@
+//===--- Hooks.cpp - The LLVM Compiler Driver -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Hook definitions should go here.
+//
+//===----------------------------------------------------------------------===//
diff --git a/final/tools/llvmc/examples/Skeleton/Main.cpp b/final/tools/llvmc/examples/Skeleton/Main.cpp
new file mode 100644
index 00000000000..24c7768f93b
--- /dev/null
+++ b/final/tools/llvmc/examples/Skeleton/Main.cpp
@@ -0,0 +1,15 @@
+//===--- Main.cpp - The LLVM Compiler Driver -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Just include CompilerDriver/Main.inc and AutoGenerated.inc.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/Main.inc"
+#include "AutoGenerated.inc"
diff --git a/final/tools/llvmc/examples/Skeleton/Makefile b/final/tools/llvmc/examples/Skeleton/Makefile
new file mode 100644
index 00000000000..41ca8235e24
--- /dev/null
+++ b/final/tools/llvmc/examples/Skeleton/Makefile
@@ -0,0 +1,20 @@
+##===- llvmc/examples/Skeleton/Makefile --------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open
+# Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+# Change this so that $(LEVEL)/Makefile.common refers to
+# $LLVM_OBJ_DIR/Makefile.common or $YOUR_LLVM_BASED_PROJECT/Makefile.common.
+export LEVEL = ../../../..
+
+# Change this to the name of your LLVMC-based driver.
+LLVMC_BASED_DRIVER = llvmc-skeleton
+
+# Change this to the name of .inc file built from your .td file.
+BUILT_SOURCES = AutoGenerated.inc
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvmc/examples/Skeleton/README b/final/tools/llvmc/examples/Skeleton/README
new file mode 100644
index 00000000000..61ff6fbf235
--- /dev/null
+++ b/final/tools/llvmc/examples/Skeleton/README
@@ -0,0 +1,6 @@
+
+This is a template that can be used to create your own LLVMC-based drivers. Just
+copy the `Skeleton` directory to the location of your preference and edit
+`Skeleton/Makefile` and `Skeleton/AutoGenerated.inc`.
+
+The build system assumes that your project is based on LLVM.
diff --git a/final/tools/llvmc/examples/mcc16/Hooks.cpp b/final/tools/llvmc/examples/mcc16/Hooks.cpp
new file mode 100644
index 00000000000..95158efeece
--- /dev/null
+++ b/final/tools/llvmc/examples/mcc16/Hooks.cpp
@@ -0,0 +1,109 @@
+#include "llvm/Support/Path.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <string>
+
+namespace llvmc {
+  extern char *ProgramName;
+
+  namespace autogenerated {
+    extern llvm::cl::opt<std::string> Parameter_p;
+  }
+}
+
+using namespace llvm;
+using namespace llvmc;
+
+// Returns the platform specific directory separator via #ifdefs.
+// FIXME: This currently work on linux and windows only. It does not
+// work on other unices.
+static std::string GetDirSeparator() {
+#if __linux__ || __APPLE__
+  return "/";
+#else
+  return "\\";
+#endif
+}
+
+namespace hooks {
+// Get preprocessor define for the part.
+// It is __partname format in lower case.
+std::string
+GetLowerCasePartDefine(void) {
+  std::string Partname;
+  if (autogenerated::Parameter_p.empty()) {
+    Partname = "16f1xxx";
+  } else {
+    Partname = autogenerated::Parameter_p;
+  }
+
+  std::string LowerCase;
+  for (unsigned i = 0; i < Partname.size(); i++) {
+    LowerCase.push_back(std::tolower(Partname[i]));
+  }
+
+  return "__" + LowerCase;
+}
+
+std::string
+GetUpperCasePartDefine(void) {
+  std::string Partname;
+  if (autogenerated::Parameter_p.empty()) {
+    Partname = "16f1xxx";
+  } else {
+    Partname = autogenerated::Parameter_p;
+  }
+
+  std::string UpperCase;
+  for (unsigned i = 0; i < Partname.size(); i++) {
+    UpperCase.push_back(std::toupper(Partname[i]));
+  }
+
+  return "__" +  UpperCase;
+}
+
+// Get the dir where c16 executables reside.
+std::string GetBinDir() {
+  // Construct a Path object from the program name.
+  void *P = (void*) (intptr_t) GetBinDir;
+  sys::Path ProgramFullPath
+    = sys::Path::GetMainExecutable(llvmc::ProgramName, P);
+
+  // Get the dir name for the program. It's last component should be 'bin'.
+  std::string BinDir = ProgramFullPath.getDirname();
+
+  // llvm::errs() << "BinDir: " << BinDir << '\n';
+  return BinDir + GetDirSeparator();
+}
+
+// Get the Top-level Installation dir for c16.
+std::string GetInstallDir() {
+  sys::Path BinDirPath = sys::Path(GetBinDir());
+
+  // Go one more level up to get the install dir.
+  std::string InstallDir  = BinDirPath.getDirname();
+
+  return InstallDir + GetDirSeparator();
+}
+
+// Get the dir where the c16 header files reside.
+std::string GetStdHeadersDir() {
+  return GetInstallDir() + "include";
+}
+
+// Get the dir where the assembler header files reside.
+std::string GetStdAsmHeadersDir() {
+  return GetInstallDir() + "inc";
+}
+
+// Get the dir where the linker scripts reside.
+std::string GetStdLinkerScriptsDir() {
+  return GetInstallDir() + "lkr";
+}
+
+// Get the dir where startup code, intrinsics and lib reside.
+std::string GetStdLibsDir() {
+  return GetInstallDir() + "lib";
+}
+}
diff --git a/final/tools/llvmc/examples/mcc16/Main.cpp b/final/tools/llvmc/examples/mcc16/Main.cpp
new file mode 100644
index 00000000000..5d4992dd9ce
--- /dev/null
+++ b/final/tools/llvmc/examples/mcc16/Main.cpp
@@ -0,0 +1,57 @@
+//===--- Main.cpp - The LLVM Compiler Driver -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Usually this file just includes CompilerDriver/Main.inc, but here we apply
+//  some trickery to make the built-in '-save-temps' option hidden and enable
+//  '--temp-dir' by default.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CompilerDriver/BuiltinOptions.h"
+#include "llvm/CompilerDriver/Main.h"
+
+#include "llvm/Support/Path.h"
+#include "llvm/Config/config.h"
+
+#include <iostream>
+
+#include "PIC16.inc"
+
+namespace {
+
+// Modify the PACKAGE_VERSION to use build number in top level configure file.
+void PIC16VersionPrinter(void) {
+  std::cout << "MPLAB C16 1.0 " << PACKAGE_VERSION << "\n";
+}
+
+}
+
+int main(int argc, char** argv) {
+
+  // HACK
+  SaveTemps.setHiddenFlag(llvm::cl::Hidden);
+  TempDirname.setHiddenFlag(llvm::cl::Hidden);
+  Languages.setHiddenFlag(llvm::cl::Hidden);
+  DryRun.setHiddenFlag(llvm::cl::Hidden);
+
+  llvm::cl::SetVersionPrinter(PIC16VersionPrinter);
+
+  // Ask for a standard temp dir, but just cache its basename., and delete it.
+  llvm::sys::Path tempDir;
+  tempDir = llvm::sys::Path::GetTemporaryDirectory();
+  TempDirname = tempDir.getBasename();
+  tempDir.eraseFromDisk(true);
+
+  // We are creating a temp dir in current dir, with the cached name.
+  //  But before that remove if one already exists with that name..
+  tempDir = TempDirname;
+  tempDir.eraseFromDisk(true);
+
+  return llvmc::Main(argc, argv);
+}
diff --git a/final/tools/llvmc/examples/mcc16/Makefile b/final/tools/llvmc/examples/mcc16/Makefile
new file mode 100644
index 00000000000..4409cffb553
--- /dev/null
+++ b/final/tools/llvmc/examples/mcc16/Makefile
@@ -0,0 +1,15 @@
+##===- llvmc/examples/mcc16/Makefile -----------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+
+LLVMC_BASED_DRIVER = mcc16
+BUILT_SOURCES = PIC16.inc
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/llvmc/examples/mcc16/PIC16.td b/final/tools/llvmc/examples/mcc16/PIC16.td
new file mode 100644
index 00000000000..6f0419675e0
--- /dev/null
+++ b/final/tools/llvmc/examples/mcc16/PIC16.td
@@ -0,0 +1,234 @@
+//===- PIC16.td - PIC16 toolchain driver -------------------*- tablegen -*-===//
+//
+// A basic driver for the PIC16 toolchain.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/CompilerDriver/Common.td"
+
+// Options
+
+def OptionList : OptionList<[
+ (switch_option "g",
+    (help "Enable Debugging")),
+ (switch_option "E",
+    (help "Stop after preprocessing, do not compile")),
+ (switch_option "S",
+    (help "Stop after compilation, do not assemble")),
+ (switch_option "bc",
+    (help "Stop after b-code generation, do not compile")),
+ (switch_option "c",
+    (help "Stop after assemble, do not link")),
+ (prefix_option "p",
+    (help "Specify part name")),
+ (prefix_list_option "I",
+    (help "Add a directory to include path")),
+ (prefix_list_option "L",
+    (help "Add a directory to library path")),
+ (prefix_list_option "K",
+    (help "Add a directory to linker script search path")),
+ (parameter_option "l",
+    (help "Specify a library to link")),
+ (parameter_option "k",
+    (help "Specify a linker script")),
+ (parameter_option "m",
+    (help "Generate linker map file with the given name")),
+ (prefix_list_option "D",
+    (help "Define a macro")),
+ (switch_option "X",
+    (help "Do not invoke mp2hex to create an output hex file.")),
+ (switch_option "O0",
+    (help "Do not optimize")),
+ (switch_option "O1",
+    (help "Optimization Level 1.")),
+ (switch_option "O2",
+    (help "Optimization Level 2.")),
+ (switch_option "O3",
+    (help "Optimization Level 3.")),
+ (switch_option "Od",
+    (help "Perform Debug-safe Optimizations only.")),
+ (switch_option "w",
+    (help "Disable all warnings.")),
+// (switch_option "O1",
+//    (help "Optimization level 1")),
+// (switch_option "O2",
+//    (help "Optimization level 2. (Default)")),
+// (parameter_option "pre-RA-sched",
+//    (help "Example of an option that is passed to llc")),
+ (parameter_option "regalloc",
+    (help "Register allocator to use (possible values: simple, linearscan, pbqp, local; default=linearscan)")),
+ (prefix_list_option "Wa,", (comma_separated),
+    (help "Pass options to assembler (Run 'gpasm -help' for assembler options)")),
+ (prefix_list_option "Wl,", (comma_separated),
+    (help "Pass options to linker (Run 'mplink -help' for linker options)"))
+// (prefix_list_option "Wllc,",
+//    (help "Pass options to llc")),
+// (prefix_list_option "Wo,",
+//    (help "Pass options to llvm-ld"))
+]>;
+
+// Tools
+class clang_based<string language, string cmd, string ext_E> : Tool<
+[(in_language language),
+ (out_language "llvm-bitcode"),
+ (output_suffix "bc"),
+ (command cmd),
+ (actions (case
+                (and (multiple_input_files),
+                     (or (switch_on "S"), (switch_on "c"))),
+                  (error "cannot specify -o with -c or -S with multiple files"),
+                (switch_on "E"), [(forward "E"),
+                                  (stop_compilation), (output_suffix ext_E)],
+                (and (switch_on "E"), (empty "o")), (no_out_file),
+                (switch_on "bc"),[(stop_compilation), (output_suffix "bc")],
+                (switch_on "g"), (append_cmd "-g"),
+                (switch_on "w"), (append_cmd "-w"),
+                (switch_on "O1"), (append_cmd ""),
+                (switch_on "O2"), (append_cmd ""),
+                (switch_on "O3"), (append_cmd ""),
+                (switch_on "Od"), (append_cmd ""),
+                (not_empty "D"), (forward "D"),
+                (not_empty "I"), (forward "I"),
+                (switch_on "O0"), (append_cmd "-O0"),
+                (default), (append_cmd "-O1")))
+// (sink)
+]>;
+
+def clang_cc : clang_based<"c", "$CALL(GetBinDir)clang -cc1                                                        -I $CALL(GetStdHeadersDir)                                                      -D $CALL(GetLowerCasePartDefine)                                                -D $CALL(GetUpperCasePartDefine) -triple=pic16-                                 -emit-llvm-bc ", "i">;
+
+//def clang_cc : Tool<[
+// (in_language "c"),
+// (out_language "llvm-bitcode"),
+// (output_suffix "bc"),
+// (cmd_line "$CALL(GetBinDir)clang-cc -I $CALL(GetStdHeadersDir) -triple=pic16- -emit-llvm-bc "),
+// (cmd_line kkkkk
+// (actions (case
+//          (switch_on "g"), (append_cmd "g"),
+//          (not_empty "I"), (forward "I"))),
+// (sink)
+//]>;
+
+
+// pre-link-and-lto step.
+def llvm_ld : Tool<[
+ (in_language "llvm-bitcode"),
+ (out_language "llvm-bitcode"),
+ (output_suffix "bc"),
+ (command "$CALL(GetBinDir)llvm-ld -L $CALL(GetStdLibsDir) -disable-licm-promotion -l std"),
+ (out_file_option "-b"),
+ (actions (case
+          (switch_on "O0"), (append_cmd "-disable-opt"),
+          (switch_on "O1"), (append_cmd "-disable-opt"),
+// Whenever O3 is not specified on the command line, default i.e. disable-inlining will always be added.
+          (switch_on "O2"), (append_cmd ""),
+          (switch_on "O3"), (append_cmd ""),
+          (default), (append_cmd "-disable-inlining"))),
+ (join)
+]>;
+
+// optimize single file
+def llvm_ld_optimizer : Tool<[
+ (in_language "llvm-bitcode"),
+ (out_language "llvm-bitcode"),
+ (output_suffix "bc"),
+// FIXME: we are still not disabling licm-promotion.
+// -disable-licm-promotion and building stdn library causes c16-71 to fail.
+ (command "$CALL(GetBinDir)llvm-ld "),
+ (out_file_option "-b"),
+ (actions (case
+          (switch_on "O0"), (append_cmd "-disable-opt"),
+          (switch_on "O1"), (append_cmd "-disable-opt"),
+// Whenever O3 is not specified on the command line, default i.e. disable-inlining will always be added.
+          (switch_on "O2"), (append_cmd ""),
+          (switch_on "O3"), (append_cmd ""),
+          (default), (append_cmd "-disable-inlining")))
+]>;
+
+// optimizer step.
+def pic16passes : Tool<[
+ (in_language "llvm-bitcode"),
+ (out_language "llvm-bitcode"),
+ (output_suffix "obc"),
+ (command "$CALL(GetBinDir)opt -pic16cloner -pic16overlay -f"),
+ (actions (case
+          (switch_on "O0"), (append_cmd "-disable-opt")))
+]>;
+
+def llc : Tool<[
+ (in_language "llvm-bitcode"),
+ (out_language "assembler"),
+ (output_suffix "s"),
+ (command "$CALL(GetBinDir)llc -march=pic16 -disable-jump-tables -pre-RA-sched=list-burr -f"),
+ (actions (case
+          (switch_on "S"), (stop_compilation),
+//          (not_empty "Wllc,"), (unpack_values "Wllc,"),
+//         (not_empty "pre-RA-sched"), (forward "pre-RA-sched")))
+         (not_empty "regalloc"), (forward "regalloc"),
+         (empty "regalloc"), (append_cmd "-regalloc=linearscan")))
+]>;
+
+def gpasm : Tool<[
+ (in_language "assembler"),
+ (out_language "object-code"),
+ (output_suffix "o"),
+ (command "$CALL(GetBinDir)gpasm -z -r decimal -I $CALL(GetStdAsmHeadersDir) -C -c -w 2"),
+ (actions (case
+          (switch_on "c"), (stop_compilation),
+          (switch_on "g"), (append_cmd "-g"),
+          (not_empty "p"), (forward "p"),
+          (empty "p"), (append_cmd "-p 16f1xxx"),
+          (not_empty "Wa,"), (forward_value "Wa,")))
+]>;
+
+def mplink : Tool<[
+ (in_language "object-code"),
+ (out_language "executable"),
+ (output_suffix "cof"),
+ (command "$CALL(GetBinDir)mplink -e -k $CALL(GetStdLinkerScriptsDir) -l $CALL(GetStdLibsDir) intrinsics.lib stdn.lib"),
+ (actions (case
+          (not_empty "Wl,"), (forward_value "Wl,"),
+          (switch_on "X"), (append_cmd "-x"),
+          (not_empty "L"), (forward_as "L", "-l"),
+          (not_empty "K"), (forward_as "K", "-k"),
+          (not_empty "m"), (forward "m"),
+          (not_empty "p"), [(forward "p"), (append_cmd "-c")],
+          (empty "p"), (append_cmd "-p 16f1xxx -c"),
+//          (not_empty "l"), [(unpack_values "l"),(append_cmd ".lib")])),
+          (not_empty "k"), (forward "k"),
+          (not_empty "l"), (forward "l"))),
+ (join)
+]>;
+
+// Language map
+
+def LanguageMap : LanguageMap<[
+    (lang_to_suffixes "c", "c"),
+    (lang_to_suffixes "c-cpp-output", "i"),
+    (lang_to_suffixes "assembler", "s"),
+    (lang_to_suffixes "assembler-with-cpp", "S"),
+    (lang_to_suffixes "llvm-assembler", "ll"),
+    (lang_to_suffixes "llvm-bitcode", "bc"),
+    (lang_to_suffixes "object-code", "o"),
+    (lang_to_suffixes "executable", "cof")
+]>;
+
+// Compilation graph
+
+def CompilationGraph : CompilationGraph<[
+    (edge "root", "clang_cc"),
+    (edge "root", "llvm_ld"),
+    (optional_edge "root", "llvm_ld_optimizer",
+                   (case (switch_on "S"), (inc_weight),
+                         (switch_on "c"), (inc_weight))),
+    (edge "root", "gpasm"),
+    (edge "root", "mplink"),
+    (edge "clang_cc", "llvm_ld"),
+    (optional_edge "clang_cc", "llvm_ld_optimizer",
+                   (case (switch_on "S"), (inc_weight),
+                         (switch_on "c"), (inc_weight))),
+    (edge "llvm_ld", "pic16passes"),
+    (edge "llvm_ld_optimizer", "pic16passes"),
+    (edge "pic16passes", "llc"),
+    (edge "llc", "gpasm"),
+    (edge "gpasm", "mplink")
+]>;
diff --git a/final/tools/llvmc/examples/mcc16/README b/final/tools/llvmc/examples/mcc16/README
new file mode 100644
index 00000000000..6d2b73d5d97
--- /dev/null
+++ b/final/tools/llvmc/examples/mcc16/README
@@ -0,0 +1,75 @@
+This is a basic compiler driver for the PIC16 toolchain that shows how to create
+your own llvmc-based drivers. It is based on the examples/Skeleton template.
+
+The PIC16 toolchain looks like this:
+
+clang-cc (FE) -> llvm-ld (optimizer) -> llc (codegen) -> native-as -> native-ld
+
+Following features were requested by Sanjiv:
+
+From: Sanjiv Gupta <sanjiv.gupta <at> microchip.com>
+Subject: Re: llvmc for PIC16
+Newsgroups: gmane.comp.compilers.llvm.devel
+Date: 2009-06-05 06:51:14 GMT
+
+The salient features that we want to have in the driver are:
+1. llvm-ld will be used as "The Optimizer".
+2. If the user has specified to generate the final executable, then
+llvm-ld should run on all the .bc files generated by clang and create a
+single optimized .bc file for further tools.
+3. -Wo <options> - pass optimizations to the llvm-ld
+4. mcc16 -Wl <options> - pass options to native linker.
+5. mcc16 -Wa <options> - pass options to native assembler.
+
+Here are some example command lines and sample command invocations as to
+what should be done.
+
+$ mcc16 -S foo.c
+// [clang-cc foo.c] -> foo.bc
+// [llvm-ld foo.bc] -> foo.opt.bc
+// [llc foo.opt.bc] -> foo.s
+
+$ mcc16 -S foo.c bar.c
+// [clang-cc foo.c] -> foo.bc
+// [llvm-ld foo.bc] -> foo.opt.bc
+// [llc foo.opt.bc] -> foo.s
+// [clang-cc bar.c] -> bar.bc
+// [llvm-ld bar.bc] -> bar.opt.bc
+// [llc bar.opt.bc] -> bar.s
+
+** Use of -g causes llvm-ld to run with -disable-opt
+$ mcc16 -S -g foo.c
+// [clang-cc foo.c] -> foo.bc
+// [llvm-ld -disable-opt foo.bc] -> foo.opt.bc
+// [llc foo.opt.bc] -> foo.s
+
+** -I is passed to clang-cc, -pre-RA-sched=list-burr to llc.
+$ mcc16 -S -g -I ../include -pre-RA-sched=list-burr foo.c
+// [clang-cc -I ../include foo.c] -> foo.bc
+// [llvm-ld -disable-opt foo.bc] -> foo.opt.bc
+// [llc -pre-RA-sched=list-burr foo.opt.bc] -> foo.s
+
+** -Wo passes options to llvm-ld
+$ mcc16 -Wo=opt1,opt2 -S -I ../include -pre-RA-sched=list-burr foo.c
+// [clang-cc -I ../include foo.c] -> foo.bc
+// [llvm-ld -opt1 -opt2 foo.bc] -> foo.opt.bc
+// [llc -pre-RA-sched=list-burr foo.opt.bc] -> foo.s
+
+** -Wa passes options to native as.
+$ mcc16 -c foo.c -Wa=opt1
+// [clang-cc foo.c] -> foo.bc
+// [llvm-ld foo.bc] -> foo.opt.bc
+// [llc foo.opt.bc] -> foo.s
+// [native-as -opt1 foo.s] -> foo.o
+
+$ mcc16 -Wo=opt1 -Wl=opt2 -Wa=opt3 foo.c bar.c
+// [clang-cc foo.c] -> foo.bc
+// [clang-cc bar.c] -> bar.bc
+// [llvm-ld -opt1 foo.bc bar.bc] -> a.out.bc
+// [llc a.out.bc] -> a.out.s
+// [native-as -opt3 a.out.s] -> a.out.o
+// [native-ld -opt2 a.out.o] -> a.out
+
+Is this achievable by a tablegen based driver ?
+
+- Sanjiv
diff --git a/final/tools/llvmc/src/AutoGenerated.td b/final/tools/llvmc/src/AutoGenerated.td
new file mode 100644
index 00000000000..8507b1ff225
--- /dev/null
+++ b/final/tools/llvmc/src/AutoGenerated.td
@@ -0,0 +1,17 @@
+//===- AutoGenerated.td - LLVMC toolchain descriptions -----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains compilation graph description used by llvmc.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/CompilerDriver/Common.td"
+
+include "Base.td"
+include "Clang.td"
diff --git a/final/tools/llvmc/src/Base.td.in b/final/tools/llvmc/src/Base.td.in
new file mode 100644
index 00000000000..50533f11fa4
--- /dev/null
+++ b/final/tools/llvmc/src/Base.td.in
@@ -0,0 +1,461 @@
+//===- Base.td - LLVMC toolchain descriptions --------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains compilation graph description used by llvmc.
+//
+//===----------------------------------------------------------------------===//
+
+// Options
+
+def OptList : OptionList<[
+ (switch_option "emit-llvm",
+    (help "Emit LLVM .ll files instead of native object files")),
+ (switch_option "E",
+    (help "Stop after the preprocessing stage, do not run the compiler")),
+ (switch_option "fsyntax-only",
+    (help "Stop after checking the input for syntax errors")),
+ (switch_option "opt",
+    (help "Enable opt")),
+ (switch_option "O0",
+    (help "Turn off optimization"), (zero_or_more)),
+ (switch_option "O1",
+    (help "Optimization level 1"), (zero_or_more)),
+ (switch_option "O2",
+    (help "Optimization level 2"), (zero_or_more)),
+ (switch_option "O3",
+    (help "Optimization level 3"), (zero_or_more)),
+ (switch_option "S",
+    (help "Stop after compilation, do not assemble")),
+ (switch_option "c",
+    (help "Compile and assemble, but do not link")),
+ (switch_option "m32",
+    (help "Generate code for a 32-bit environment"), (hidden)),
+ (switch_option "m64",
+    (help "Generate code for a 64-bit environment"), (hidden)),
+ (switch_option "fPIC",
+    (help "Relocation model: PIC"), (hidden)),
+ (switch_option "mdynamic-no-pic",
+    (help "Relocation model: dynamic-no-pic"), (hidden)),
+ (parameter_option "linker",
+    (help "Choose linker (possible values: gcc, g++)")),
+ (parameter_option "mtune",
+    (help "Target a specific CPU type"), (forward_not_split)),
+ (parameter_list_option "march",
+    (help "Generate code for the specified machine type")),
+ (parameter_option "mcpu",
+    (help "A deprecated synonym for -mtune"), (hidden), (forward_not_split)),
+ (parameter_option "mfpu",
+    (help "Specify type of floating point unit"),
+    (hidden), (forward_not_split)),
+ (parameter_option "mabi",
+    (help "Generate code for the specified ABI"), (hidden)),
+ (parameter_option "mfloat-abi",
+    (help "Specifies which floating-point ABI to use"), (hidden)),
+ (switch_option "mfix-and-continue",
+    (help "Needed by gdb to load .o files dynamically"), (hidden)),
+ (parameter_option "MF",
+    (help "Specify a file to write dependencies to"), (hidden)),
+ (parameter_list_option "MT",
+    (help "Change the name of the rule emitted by dependency generation"),
+    (hidden)),
+ (parameter_list_option "include",
+    (help "Include the named file prior to preprocessing")),
+ (parameter_list_option "iquote",
+    (help "Search dir only for files requested with #inlcude \"file\""),
+    (hidden)),
+ (prefix_list_option "I",
+    (help "Add a directory to include path")),
+ (prefix_list_option "D",
+    (help "Define a macro")),
+ (parameter_list_option "Xpreprocessor", (hidden),
+    (help "Pass options to preprocessor")),
+ (prefix_list_option "Wa,", (comma_separated),
+    (help "Pass options to assembler")),
+ (parameter_list_option "Xassembler", (hidden),
+    (help "Pass options to assembler")),
+ (prefix_list_option "Wllc,", (comma_separated),
+    (help "Pass options to llc")),
+ (prefix_list_option "Wl,",
+    (help "Pass options to linker")),
+ (parameter_list_option "Xlinker", (hidden),
+    (help "Pass options to linker")),
+ (prefix_list_option "Wo,", (comma_separated),
+    (help "Pass options to opt")),
+ (prefix_list_option "m",
+     (help "Enable or disable various extensions (-mmmx, -msse, etc.)"),
+     (hidden))
+]>;
+
+def LinkerOptList : OptionList<[
+ (prefix_list_option "L",
+    (help "Add a directory to link path")),
+ (prefix_list_option "l",
+    (help "Search a library when linking")),
+ (parameter_option "filelist", (hidden),
+    (help "Link the files listed in file")),
+ (switch_option "nostartfiles",
+    (help "Do not use the standard system startup files when linking"),
+    (hidden)),
+ (switch_option "nodefaultlibs",
+    (help "Do not use the standard system libraries when linking"), (hidden)),
+ (switch_option "nostdlib",
+    (help
+    "Do not use the standard system startup files or libraries when linking"),
+    (hidden)),
+ (switch_option "pie",
+     (help "Produce a position independent executable"), (hidden)),
+ (switch_option "rdynamic",
+     (help "Add all symbols to the dynamic export table"), (hidden)),
+ (switch_option "s",
+    (help "Strip all symbols"), (hidden)),
+ (switch_option "static",
+     (help "Do not link against shared libraries"), (hidden)),
+ (switch_option "static-libgcc",
+     (help "Use static libgcc"), (hidden)),
+ (switch_option "shared",
+     (help "Create a DLL instead of the regular executable")),
+ (switch_option "shared-libgcc",
+     (help "Use shared libgcc"), (hidden)),
+ (parameter_option "T",
+     (help "Read linker script"), (hidden)),
+ (parameter_option "u",
+     (help "Start with undefined reference to SYMBOL"), (hidden)),
+ (switch_option "pthread",
+    (help "Enable threads")),
+
+ // TODO: Add a conditional compilation mechanism to make Darwin-only options
+ // like '-arch' really Darwin-only.
+ (parameter_option "arch",
+    (help "Compile for the specified target architecture"), (hidden)),
+ (prefix_list_option "F",
+    (help "Add a directory to framework search path")),
+ (parameter_list_option "framework",
+    (help "Specifies a framework to link against")),
+ (parameter_list_option "weak_framework",
+    (help "Specifies a framework to weakly link against"), (hidden)),
+ (switch_option "dynamiclib", (hidden),
+     (help "Produce a dynamic library")),
+ (switch_option "prebind", (hidden),
+     (help "Prebind all undefined symbols")),
+ (switch_option "dead_strip", (hidden),
+     (help "Remove unreachable blocks of code")),
+ (switch_option "single_module", (hidden),
+     (help "Build the library so it contains only one module")),
+ (parameter_option "install_name", (hidden),
+     (help "File name the library will be installed in")),
+ (parameter_option "compatibility_version", (hidden),
+     (help "Compatibility version number")),
+ (parameter_option "current_version", (hidden),
+     (help "Current version number"))
+]>;
+
+// Option preprocessor.
+
+def Preprocess : OptionPreprocessor<
+(case (not (any_switch_on "O0", "O1", "O2", "O3")),
+           (set_option "O2"),
+      (and (switch_on "O3"), (any_switch_on "O0", "O1", "O2")),
+           (unset_option "O0", "O1", "O2"),
+      (and (switch_on "O2"), (any_switch_on "O0", "O1")),
+           (unset_option "O0", "O1"),
+      (switch_on "O1", "O0"),
+           (unset_option "O0"))
+>;
+
+// Tools
+
+class llvm_gcc_based <string cmd, string in_lang, string E_ext, dag out_lang,
+                      string out_ext> : Tool<
+[(in_language in_lang),
+ out_lang,
+ (output_suffix out_ext),
+ (command cmd),
+ (actions
+     (case
+         (and (not_empty "o"),
+              (multiple_input_files), (or (switch_on "S"), (switch_on "c"))),
+              (error "cannot specify -o with -c or -S with multiple files"),
+         (switch_on "E"),
+              [(forward "E"), (stop_compilation), (output_suffix E_ext)],
+         (and (switch_on "E"), (empty "o")), (no_out_file),
+
+         // ('-emit-llvm') && !('opt') -> stop compilation
+         (and (switch_on "emit-llvm"), (not (switch_on "opt"))),
+              (stop_compilation),
+         // ('-S' && '-emit-llvm') && !('opt') -> output .ll
+         (and (switch_on "emit-llvm", "S"), (not (switch_on "opt"))),
+              [(forward "S"), (output_suffix "ll")],
+         // Ususally just output .bc
+         (not (switch_on "fsyntax-only")),
+              [(append_cmd "-c"), (append_cmd "-emit-llvm")],
+
+         // -fsyntax-only
+         (switch_on "fsyntax-only"), [(forward "fsyntax-only"),
+                                      (no_out_file), (stop_compilation)],
+
+         // Forwards
+         (not_empty "Xpreprocessor"), (forward "Xpreprocessor"),
+         (not_empty "include"), (forward "include"),
+         (not_empty "iquote"), (forward "iquote"),
+         (not_empty "save-temps"), (append_cmd "-save-temps"),
+         (not_empty "I"), (forward "I"),
+         (not_empty "F"), (forward "F"),
+         (not_empty "D"), (forward "D"),
+         (not_empty "arch"), (forward "arch"),
+         (not_empty "march"), (forward "march"),
+         (not_empty "mcpu"), (forward "mcpu"),
+         (not_empty "mtune"), (forward "mtune"),
+         (not_empty "mfpu"), (forward "mfpu"),
+         (not_empty "mabi"), (forward "mabi"),
+         (not_empty "mfloat-abi"), (forward "mfloat-abi"),
+         (not_empty "m"), (forward "m"),
+         (switch_on "mfix-and-continue"), (forward "mfix-and-continue"),
+         (switch_on "m32"), (forward "m32"),
+         (switch_on "m64"), (forward "m64"),
+         (switch_on "O0"), (forward "O0"),
+         (switch_on "O1"), (forward "O1"),
+         (switch_on "O2"), (forward "O2"),
+         (switch_on "O3"), (forward "O3"),
+         (switch_on "fPIC"), (forward "fPIC"),
+         (switch_on "mdynamic-no-pic"), (forward "mdynamic-no-pic"),
+         (not_empty "MF"), (forward "MF"),
+         (not_empty "MT"), (forward "MT"))),
+ (sink)
+]>;
+
+class llvm_gcc_comp_based <string cmd, string in_lang, string E_ext>
+: llvm_gcc_based<cmd, in_lang, E_ext,
+  (out_language "llvm-bitcode", "object-code"), "bc">;
+
+class llvm_gcc_pch_based <string cmd, string in_lang, string E_ext>
+: llvm_gcc_based<cmd, in_lang, E_ext,
+  (out_language "precompiled-header"), "gch">;
+
+def llvm_gcc_c : llvm_gcc_comp_based
+    <"@LLVMGCCCOMMAND@ -x c", "c", "i">;
+def llvm_gcc_cpp : llvm_gcc_comp_based
+    <"@LLVMGXXCOMMAND@ -x c++", "c++", "i">;
+def llvm_gcc_m : llvm_gcc_comp_based
+    <"@LLVMGCCCOMMAND@ -x objective-c", "objective-c", "mi">;
+def llvm_gcc_mxx : llvm_gcc_comp_based
+    <"@LLVMGCCCOMMAND@ -x objective-c++", "objective-c++", "mi">;
+
+def llvm_gcc_c_pch : llvm_gcc_pch_based
+    <"@LLVMGCCCOMMAND@ -x c-header", "c-header", "i">;
+def llvm_gcc_cpp_pch : llvm_gcc_pch_based
+    <"@LLVMGXXCOMMAND@ -x c++-header", "c++-header", "i">;
+def llvm_gcc_m_pch : llvm_gcc_pch_based
+    <"@LLVMGCCCOMMAND@ -x objective-c-header", "objective-c-header", "mi">;
+def llvm_gcc_mxx_pch : llvm_gcc_pch_based
+    <"@LLVMGCCCOMMAND@ -x objective-c++-header", "objective-c++-header", "mi">;
+
+def opt : Tool<
+[(in_language "llvm-bitcode"),
+ (out_language "llvm-bitcode"),
+ (output_suffix "opt.bc"),
+ (actions (case (switch_on "emit-llvm"), (stop_compilation),
+                (switch_on "emit-llvm", "S"),
+                [(append_cmd "-S"), (output_suffix "ll")],
+                (not_empty "Wo,"), (forward_value "Wo,"),
+                (switch_on "O1"), (forward "O1"),
+                (switch_on "O2"), (forward "O2"),
+                (switch_on "O3"), (forward "O3"))),
+ (command "opt -f")
+]>;
+
+def llvm_as : Tool<
+[(in_language "llvm-assembler"),
+ (out_language "llvm-bitcode"),
+ (output_suffix "bc"),
+ (command "llvm-as"),
+ (actions (case (and (switch_on "emit-llvm"), (not (switch_on "opt"))),
+                (stop_compilation)))
+]>;
+
+def llvm_gcc_assembler : Tool<
+[(in_language "assembler"),
+ (out_language "object-code"),
+ (output_suffix "o"),
+ (command "@LLVMGCCCOMMAND@ -c -x assembler"),
+ (actions (case
+          (switch_on "c"), (stop_compilation),
+          (not_empty "arch"), (forward "arch"),
+          (not_empty "Xassembler"), (forward "Xassembler"),
+          (not_empty "march"), (forward "march"),
+          (not_empty "mcpu"), (forward "mcpu"),
+          (not_empty "mtune"), (forward "mtune"),
+          (not_empty "mabi"), (forward "mabi"),
+          (not_empty "mfloat-abi"), (forward "mfloat-abi"),
+          (switch_on "m32"), (forward "m32"),
+          (switch_on "m64"), (forward "m64"),
+          (not_empty "Wa,"), (forward "Wa,")))
+]>;
+
+def llc : Tool<
+[(in_language "llvm-bitcode", "llvm-assembler"),
+ (out_language "assembler"),
+ (output_suffix "s"),
+ (command "llc"),
+ (actions (case
+          (switch_on "S"), (stop_compilation),
+          (switch_on "O0"), (forward "O0"),
+          (switch_on "O1"), (forward "O1"),
+          (switch_on "O2"), (forward "O2"),
+          (switch_on "O3"), (forward "O3"),
+          (switch_on "fPIC"), (append_cmd "-relocation-model=pic"),
+          (switch_on "mdynamic-no-pic"),
+                     (append_cmd "-relocation-model=dynamic-no-pic"),
+          (not_empty "march"), (forward_transformed_value
+                                "march", "ConvertMArchToMAttr"),
+          (not_empty "mcpu"), (forward_transformed_value "mcpu", "ConvertMCpu"),
+          (and (not_empty "mtune"), (empty "mcpu")),
+                     (forward_as "mtune", "-mcpu"),
+          (not_empty "mfpu"), (forward_transformed_value "mfpu", "ConvertMFpu"),
+          (not_empty "m"), (forward_transformed_value "m", "ConvertToMAttr"),
+          (not_empty "Wllc,"), (forward_value "Wllc,")))
+]>;
+
+// Base class for linkers
+class llvm_gcc_based_linker <string cmd, dag on_empty> : Tool<
+[(in_language "object-code", "static-library", "dynamic-library"),
+ (out_language "executable"),
+ (output_suffix "out"),
+ (command cmd),
+ (works_on_empty (case (and (not_empty "filelist"), on_empty), true,
+                       (default), false)),
+ (join),
+ (actions (case
+          (switch_on "pthread"), (append_cmd "-lpthread"),
+          (not_empty "L"), (forward "L"),
+          (not_empty "F"), (forward "F"),
+          (not_empty "arch"), (forward "arch"),
+          (not_empty "framework"), (forward "framework"),
+          (not_empty "weak_framework"), (forward "weak_framework"),
+          (not_empty "filelist"), (forward "filelist"),
+          (not_empty "march"), (forward "march"),
+          (not_empty "mcpu"), (forward "mcpu"),
+          (not_empty "mtune"), (forward "mtune"),
+          (not_empty "mabi"), (forward "mabi"),
+          (not_empty "mfloat-abi"), (forward "mfloat-abi"),
+          (switch_on "m32"), (forward "m32"),
+          (switch_on "m64"), (forward "m64"),
+          (not_empty "l"), (forward "l"),
+          (not_empty "Xlinker"), (forward "Xlinker"),
+          (not_empty "Wl,"), (forward "Wl,"),
+          (switch_on "nostartfiles"), (forward "nostartfiles"),
+          (switch_on "nodefaultlibs"), (forward "nodefaultlibs"),
+          (switch_on "nostdlib"), (forward "nostdlib"),
+          (switch_on "pie"), (forward "pie"),
+          (switch_on "rdynamic"), (forward "rdynamic"),
+          (switch_on "s"), (forward "s"),
+          (switch_on "static"), (forward "static"),
+          (switch_on "static-libgcc"), (forward "static-libgcc"),
+          (switch_on "shared"), (forward "shared"),
+          (switch_on "shared-libgcc"), (forward "shared-libgcc"),
+          (not_empty "T"), (forward "T"),
+          (not_empty "u"), (forward "u"),
+          (switch_on "dynamiclib"), (forward "dynamiclib"),
+          (switch_on "prebind"), (forward "prebind"),
+          (switch_on "dead_strip"), (forward "dead_strip"),
+          (switch_on "single_module"), (forward "single_module"),
+          (not_empty "compatibility_version"),
+                     (forward "compatibility_version"),
+          (not_empty "current_version"), (forward "current_version"),
+          (not_empty "install_name"), (forward "install_name")))
+]>;
+
+// Default linker
+def llvm_gcc_linker : llvm_gcc_based_linker<"@LLVMGCCCOMMAND@",
+    (not (or (parameter_equals "linker", "g++"),
+         (parameter_equals "linker", "c++")))>;
+// Alternative linker for C++
+def llvm_gcc_cpp_linker : llvm_gcc_based_linker<"@LLVMGXXCOMMAND@",
+    (or (parameter_equals "linker", "g++"),
+        (parameter_equals "linker", "c++"))>;
+
+// Language map
+
+def LanguageMap : LanguageMap<[
+    (lang_to_suffixes "precompiled-header", ["gch", "pch"]),
+    (lang_to_suffixes "c++", ["cc", "cp", "cxx", "cpp", "CPP", "c++", "C"]),
+    (lang_to_suffixes "c++-header", "hpp"),
+    (lang_to_suffixes "c", "c"),
+    (lang_to_suffixes "c-header", "h"),
+    (lang_to_suffixes "c-cpp-output", "i"),
+    (lang_to_suffixes "objective-c-cpp-output", "mi"),
+    (lang_to_suffixes "objective-c++", "mm"),
+    (lang_to_suffixes "objective-c++-header", "hmm"),
+    (lang_to_suffixes "objective-c", "m"),
+    (lang_to_suffixes "objective-c-header", "hm"),
+    (lang_to_suffixes "assembler", "s"),
+    (lang_to_suffixes "assembler-with-cpp", "S"),
+    (lang_to_suffixes "llvm-assembler", "ll"),
+    (lang_to_suffixes "llvm-bitcode", "bc"),
+    (lang_to_suffixes "object-code", ["o", "*empty*"]),
+    (lang_to_suffixes "static-library", ["a", "lib"]),
+    (lang_to_suffixes "dynamic-library", ["so", "dylib", "dll"]),
+    (lang_to_suffixes "executable", "out")
+]>;
+
+// Compilation graph
+
+def CompilationGraph : CompilationGraph<[
+    (edge "root", "llvm_gcc_c"),
+    (edge "root", "llvm_gcc_assembler"),
+    (edge "root", "llvm_gcc_cpp"),
+    (edge "root", "llvm_gcc_m"),
+    (edge "root", "llvm_gcc_mxx"),
+    (edge "root", "llc"),
+
+    (edge "root", "llvm_gcc_c_pch"),
+    (edge "root", "llvm_gcc_cpp_pch"),
+    (edge "root", "llvm_gcc_m_pch"),
+    (edge "root", "llvm_gcc_mxx_pch"),
+
+    (edge "llvm_gcc_c", "llc"),
+    (edge "llvm_gcc_cpp", "llc"),
+    (edge "llvm_gcc_m", "llc"),
+    (edge "llvm_gcc_mxx", "llc"),
+    (edge "llvm_as", "llc"),
+
+    (optional_edge "root", "llvm_as",
+                   (case (or (switch_on "emit-llvm"),
+                             (switch_on "opt")), (inc_weight))),
+    (optional_edge "llvm_gcc_c", "opt",
+                   (case (switch_on "opt"), (inc_weight))),
+    (optional_edge "llvm_gcc_cpp", "opt",
+                   (case (switch_on "opt"), (inc_weight))),
+    (optional_edge "llvm_gcc_m", "opt",
+                   (case (switch_on "opt"), (inc_weight))),
+    (optional_edge "llvm_gcc_mxx", "opt",
+                   (case (switch_on "opt"), (inc_weight))),
+    (optional_edge "llvm_as", "opt",
+                   (case (switch_on "opt"), (inc_weight))),
+    (edge "opt", "llc"),
+
+    (edge "llc", "llvm_gcc_assembler"),
+    (edge "llvm_gcc_assembler", "llvm_gcc_linker"),
+    (optional_edge "llvm_gcc_assembler", "llvm_gcc_cpp_linker",
+                 (case
+                     (or (input_languages_contain "c++"),
+                         (input_languages_contain "objective-c++")),
+                     (inc_weight),
+                     (or (parameter_equals "linker", "g++"),
+                         (parameter_equals "linker", "c++")), (inc_weight))),
+
+
+    (edge "root", "llvm_gcc_linker"),
+    (optional_edge "root", "llvm_gcc_cpp_linker",
+                 (case
+                     (or (input_languages_contain "c++"),
+                         (input_languages_contain "objective-c++")),
+                     (inc_weight),
+                     (or (parameter_equals "linker", "g++"),
+                         (parameter_equals "linker", "c++")), (inc_weight)))
+]>;
diff --git a/final/tools/llvmc/src/Clang.td b/final/tools/llvmc/src/Clang.td
new file mode 100644
index 00000000000..e2d32e88ff2
--- /dev/null
+++ b/final/tools/llvmc/src/Clang.td
@@ -0,0 +1,87 @@
+//===- Clang.td - LLVMC toolchain descriptions -------------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains compilation graph description used by llvmc.
+//
+//===----------------------------------------------------------------------===//
+
+
+def Options : OptionList<[
+(switch_option "clang", (help "Use Clang instead of llvm-gcc"))
+]>;
+
+class clang_based<string language, string cmd, string ext_E> : Tool<
+[(in_language language),
+ (out_language "llvm-bitcode"),
+ (output_suffix "bc"),
+ (command cmd),
+ (actions (case (switch_on "E"),
+                    [(forward "E"), (stop_compilation), (output_suffix ext_E)],
+                (and (switch_on "E"), (empty "o")), (no_out_file),
+                (switch_on "fsyntax-only"), (stop_compilation),
+                (switch_on "S", "emit-llvm"),
+                           [(append_cmd "-emit-llvm"),
+                            (stop_compilation), (output_suffix "ll")],
+                (not (switch_on "S", "emit-llvm")),
+                     (append_cmd "-emit-llvm-bc"),
+                (switch_on "c", "emit-llvm"),
+                           (stop_compilation),
+                (not_empty "include"), (forward "include"),
+                (not_empty "I"), (forward "I"))),
+ (sink)
+]>;
+
+def clang_c : clang_based<"c", "clang -x c", "i">;
+def clang_cpp : clang_based<"c++", "clang -x c++", "i">;
+def clang_objective_c : clang_based<"objective-c",
+    "clang -x objective-c", "mi">;
+def clang_objective_cpp : clang_based<"objective-c++",
+    "clang -x objective-c++", "mi">;
+
+def as : Tool<
+[(in_language "assembler"),
+ (out_language "object-code"),
+ (output_suffix "o"),
+ (command "as"),
+ (actions (case (not_empty "Wa,"), (forward_value "Wa,"),
+                (switch_on "c"), (stop_compilation)))
+]>;
+
+// Default linker
+def llvm_ld : Tool<
+[(in_language "object-code"),
+ (out_language "executable"),
+ (output_suffix "out"),
+ (command "llvm-ld -native -disable-internalize"),
+ (actions (case
+          (switch_on "pthread"), (append_cmd "-lpthread"),
+          (not_empty "L"), (forward "L"),
+          (not_empty "l"), (forward "l"),
+          (not_empty "Wl,"), (forward_value "Wl,"))),
+ (join)
+]>;
+
+// Compilation graph
+
+def ClangCompilationGraph : CompilationGraph<[
+    (optional_edge "root", "clang_c",
+                           (case (switch_on "clang"), (inc_weight))),
+    (optional_edge "root", "clang_cpp",
+                           (case (switch_on "clang"), (inc_weight))),
+    (optional_edge "root", "clang_objective_c",
+                           (case (switch_on "clang"), (inc_weight))),
+    (optional_edge "root", "clang_objective_cpp",
+                           (case (switch_on "clang"), (inc_weight))),
+    (edge "clang_c", "llc"),
+    (edge "clang_cpp", "llc"),
+    (edge "clang_objective_c", "llc"),
+    (edge "clang_objective_cpp", "llc"),
+    (optional_edge "llc", "as", (case (switch_on "clang"), (inc_weight))),
+    (edge "as", "llvm_ld")
+]>;
diff --git a/final/tools/llvmc/src/Hooks.cpp b/final/tools/llvmc/src/Hooks.cpp
new file mode 100644
index 00000000000..5aa250e512b
--- /dev/null
+++ b/final/tools/llvmc/src/Hooks.cpp
@@ -0,0 +1,194 @@
+#include "llvm/ADT/StringMap.h"
+
+#include <string>
+#include <vector>
+
+namespace hooks {
+
+/// NUM_KEYS - Calculate the size of a const char* array.
+#define NUM_KEYS(Keys) sizeof(Keys) / sizeof(const char*)
+
+// See http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
+inline unsigned NextHighestPowerOf2 (unsigned i) {
+  --i;
+  i |= i >> 1;
+  i |= i >> 2;
+  i |= i >> 4;
+  i |= i >> 8;
+  i |= i >> 16;
+  ++i;
+  return i;
+}
+
+typedef std::vector<std::string> StrVec;
+typedef llvm::StringMap<const char*> ArgMap;
+
+/// AddPlusOrMinus - Convert 'no-foo' to '-foo' and 'foo' to '+foo'.
+void AddPlusOrMinus (const std::string& Arg, std::string& out) {
+  if (Arg.find("no-") == 0 && Arg[3] != 0) {
+    out += '-';
+    out += Arg.c_str() + 3;
+  }
+  else {
+    out += '+';
+    out += Arg;
+  }
+}
+
+// -march values that need to be special-cased.
+const char* MArchKeysARM[] = { "armv4t", "armv5t", "armv5te", "armv6",
+                               "armv6-m", "armv6t2", "armv7-a", "armv7-m" };
+const char* MArchValuesARM[] = { "v4t", "v5t", "v5te", "v6", "v6m", "v6t2",
+                                 "v7a", "v7m" };
+const unsigned MArchNumKeysARM = NUM_KEYS(MArchKeysARM);
+const unsigned MArchMapSize = NextHighestPowerOf2(MArchNumKeysARM);
+
+// -march values that should be forwarded as -mcpu
+const char* MArchMCpuKeysARM[] = { "iwmmxt", "ep9312" };
+const char* MArchMCpuValuesARM[] = { "iwmmxt", "ep9312"};
+const unsigned MArchMCpuNumKeysARM = NUM_KEYS(MArchMCpuKeysARM);
+const unsigned MArchMCpuMapSize = NextHighestPowerOf2(MArchMCpuNumKeysARM);
+
+
+void FillInArgMap(ArgMap& Args, const char* Keys[],
+                  const char* Values[], unsigned NumKeys)
+{
+  for (unsigned i = 0; i < NumKeys; ++i) {
+    // Explicit cast to StringRef here is necessary to pick up the right
+    // overload.
+    Args.GetOrCreateValue(llvm::StringRef(Keys[i]), Values[i]);
+  }
+}
+
+/// ConvertMArchToMAttr - Convert -march from the gcc dialect to
+/// something llc can understand.
+std::string ConvertMArchToMAttr(const StrVec& Opts) {
+  static ArgMap MArchMap(MArchMapSize);
+  static ArgMap MArchMCpuMap(MArchMapSize);
+  static bool StaticDataInitialized = false;
+
+  if (!StaticDataInitialized) {
+    FillInArgMap(MArchMap, MArchKeysARM, MArchValuesARM, MArchNumKeysARM);
+    FillInArgMap(MArchMCpuMap, MArchMCpuKeysARM,
+                 MArchMCpuValuesARM, MArchMCpuNumKeysARM);
+    StaticDataInitialized = true;
+  }
+
+  std::string mattr("-mattr=");
+  std::string mcpu("-mcpu=");
+  bool mattrTouched = false;
+  bool mcpuTouched = false;
+
+  for (StrVec::const_iterator B = Opts.begin(), E = Opts.end(); B!=E; ++B) {
+    const std::string& Arg = *B;
+
+    // Check if the argument should be forwarded to -mcpu instead of -mattr.
+    {
+      ArgMap::const_iterator I = MArchMCpuMap.find(Arg);
+
+      if (I != MArchMCpuMap.end()) {
+        mcpuTouched = true;
+        mcpu += I->getValue();
+        continue;
+      }
+    }
+
+    if (mattrTouched)
+      mattr += ",";
+
+    // Check if the argument is a special case.
+    {
+      ArgMap::const_iterator I = MArchMap.find(Arg);
+
+      if (I != MArchMap.end()) {
+        mattrTouched = true;
+        mattr += '+';
+        mattr += I->getValue();
+        continue;
+      }
+    }
+
+    AddPlusOrMinus(Arg, mattr);
+  }
+
+  std::string out;
+  if (mattrTouched)
+    out += mattr;
+  if (mcpuTouched)
+    out += (mattrTouched ? " " : "") + mcpu;
+
+  return out;
+}
+
+// -mcpu values that need to be special-cased.
+const char* MCpuKeysPPC[] = { "G3", "G4", "G5", "powerpc", "powerpc64"};
+const char* MCpuValuesPPC[] = { "g3", "g4", "g5", "ppc", "ppc64"};
+const unsigned MCpuNumKeysPPC = NUM_KEYS(MCpuKeysPPC);
+const unsigned MCpuMapSize = NextHighestPowerOf2(MCpuNumKeysPPC);
+
+/// ConvertMCpu - Convert -mcpu value from the gcc to the llc dialect.
+std::string ConvertMCpu(const char* Val) {
+  static ArgMap MCpuMap(MCpuMapSize);
+  static bool StaticDataInitialized = false;
+
+  if (!StaticDataInitialized) {
+    FillInArgMap(MCpuMap, MCpuKeysPPC, MCpuValuesPPC, MCpuNumKeysPPC);
+    StaticDataInitialized = true;
+  }
+
+  std::string ret = "-mcpu=";
+  ArgMap::const_iterator I = MCpuMap.find(Val);
+  if (I != MCpuMap.end()) {
+    return ret + I->getValue();
+  }
+  return ret + Val;
+}
+
+// -mfpu values that need to be special-cased.
+const char* MFpuKeysARM[] = { "vfp", "vfpv3",
+                              "vfpv3-fp16", "vfpv3-d16", "vfpv3-d16-fp16",
+                              "neon", "neon-fp16" };
+const char* MFpuValuesARM[] = { "vfp2", "vfp3",
+                                "+vfp3,+fp16", "+vfp3,+d16", "+vfp3,+d16,+fp16",
+                                "+neon", "+neon,+neonfp" };
+const unsigned MFpuNumKeysARM = NUM_KEYS(MFpuKeysARM);
+const unsigned MFpuMapSize = NextHighestPowerOf2(MFpuNumKeysARM);
+
+/// ConvertMFpu - Convert -mfpu value from the gcc to the llc dialect.
+std::string ConvertMFpu(const char* Val) {
+  static ArgMap MFpuMap(MFpuMapSize);
+  static bool StaticDataInitialized = false;
+
+  if (!StaticDataInitialized) {
+    FillInArgMap(MFpuMap, MFpuKeysARM, MFpuValuesARM, MFpuNumKeysARM);
+    StaticDataInitialized = true;
+  }
+
+  std::string ret = "-mattr=";
+  ArgMap::const_iterator I = MFpuMap.find(Val);
+  if (I != MFpuMap.end()) {
+    return ret + I->getValue();
+  }
+  return ret + '+' + Val;
+}
+
+/// ConvertToMAttr - Convert '-mfoo' and '-mno-bar' to '-mattr=+foo,-bar'.
+std::string ConvertToMAttr(const StrVec& Opts) {
+  std::string out("-mattr=");
+  bool firstIter = true;
+
+  for (StrVec::const_iterator B = Opts.begin(), E = Opts.end(); B!=E; ++B) {
+    const std::string& Arg = *B;
+
+    if (firstIter)
+      firstIter = false;
+    else
+      out += ",";
+
+    AddPlusOrMinus(Arg, out);
+  }
+
+  return out;
+}
+
+}
diff --git a/final/tools/llvmc/src/Main.cpp b/final/tools/llvmc/src/Main.cpp
new file mode 100644
index 00000000000..9f9c71aa8c3
--- /dev/null
+++ b/final/tools/llvmc/src/Main.cpp
@@ -0,0 +1,16 @@
+//===--- Main.cpp - The LLVM Compiler Driver -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Just include AutoGenerated.inc and CompilerDriver/Main.inc.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AutoGenerated.inc"
+
+#include "llvm/CompilerDriver/Main.inc"
diff --git a/final/tools/llvmc/src/Makefile b/final/tools/llvmc/src/Makefile
new file mode 100644
index 00000000000..f3f30911a40
--- /dev/null
+++ b/final/tools/llvmc/src/Makefile
@@ -0,0 +1,14 @@
+##===- tools/llvmc/src/Makefile ----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open
+# Source License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LLVMC_BASED_DRIVER = llvmc
+BUILT_SOURCES = AutoGenerated.inc
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/lto/LTOCodeGenerator.cpp b/final/tools/lto/LTOCodeGenerator.cpp
new file mode 100644
index 00000000000..6739e067674
--- /dev/null
+++ b/final/tools/lto/LTOCodeGenerator.cpp
@@ -0,0 +1,415 @@
+//===-LTOCodeGenerator.cpp - LLVM Link Time Optimizer ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Link Time Optimization library. This library is 
+// intended to be used by linker to optimize code at link time.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LTOModule.h"
+#include "LTOCodeGenerator.h"
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Linker.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/SubtargetFeature.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/StandardPasses.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Config/config.h"
+#include <cstdlib>
+#include <unistd.h>
+#include <fcntl.h>
+
+
+using namespace llvm;
+
+static cl::opt<bool> DisableInline("disable-inlining",
+  cl::desc("Do not run the inliner pass"));
+
+
+const char* LTOCodeGenerator::getVersionString()
+{
+#ifdef LLVM_VERSION_INFO
+    return PACKAGE_NAME " version " PACKAGE_VERSION ", " LLVM_VERSION_INFO;
+#else
+    return PACKAGE_NAME " version " PACKAGE_VERSION;
+#endif
+}
+
+
+LTOCodeGenerator::LTOCodeGenerator() 
+    : _context(getGlobalContext()),
+      _linker("LinkTimeOptimizer", "ld-temp.o", _context), _target(NULL),
+      _emitDwarfDebugInfo(false), _scopeRestrictionsDone(false),
+      _codeModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC),
+      _nativeObjectFile(NULL)
+{
+    InitializeAllTargets();
+    InitializeAllAsmPrinters();
+}
+
+LTOCodeGenerator::~LTOCodeGenerator()
+{
+    delete _target;
+    delete _nativeObjectFile;
+}
+
+
+
+bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg)
+{
+  bool ret = _linker.LinkInModule(mod->getLLVVMModule(), &errMsg);
+
+  const std::vector<const char*> &undefs = mod->getAsmUndefinedRefs();
+  for (int i = 0, e = undefs.size(); i != e; ++i)
+    _asmUndefinedRefs[undefs[i]] = 1;
+
+  return ret;
+}
+    
+
+bool LTOCodeGenerator::setDebugInfo(lto_debug_model debug, std::string& errMsg)
+{
+    switch (debug) {
+        case LTO_DEBUG_MODEL_NONE:
+            _emitDwarfDebugInfo = false;
+            return false;
+            
+        case LTO_DEBUG_MODEL_DWARF:
+            _emitDwarfDebugInfo = true;
+            return false;
+    }
+    errMsg = "unknown debug format";
+    return true;
+}
+
+
+bool LTOCodeGenerator::setCodePICModel(lto_codegen_model model, 
+                                       std::string& errMsg)
+{
+    switch (model) {
+        case LTO_CODEGEN_PIC_MODEL_STATIC:
+        case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
+        case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
+            _codeModel = model;
+            return false;
+    }
+    errMsg = "unknown pic model";
+    return true;
+}
+
+void LTOCodeGenerator::setCpu(const char* mCpu)
+{
+  _mCpu = mCpu;
+}
+
+void LTOCodeGenerator::addMustPreserveSymbol(const char* sym)
+{
+    _mustPreserveSymbols[sym] = 1;
+}
+
+
+bool LTOCodeGenerator::writeMergedModules(const char *path,
+                                          std::string &errMsg) {
+  if (determineTarget(errMsg))
+    return true;
+
+  // mark which symbols can not be internalized 
+  applyScopeRestrictions();
+
+  // create output file
+  std::string ErrInfo;
+  tool_output_file Out(path, ErrInfo,
+                       raw_fd_ostream::F_Binary);
+  if (!ErrInfo.empty()) {
+    errMsg = "could not open bitcode file for writing: ";
+    errMsg += path;
+    return true;
+  }
+    
+  // write bitcode to it
+  WriteBitcodeToFile(_linker.getModule(), Out.os());
+  Out.os().close();
+
+  if (Out.os().has_error()) {
+    errMsg = "could not write bitcode file: ";
+    errMsg += path;
+    Out.os().clear_error();
+    return true;
+  }
+  
+  Out.keep();
+  return false;
+}
+
+
+const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg)
+{
+    // make unique temp .o file to put generated object file
+    sys::PathWithStatus uniqueObjPath("lto-llvm.o");
+    if ( uniqueObjPath.createTemporaryFileOnDisk(false, &errMsg) ) {
+        uniqueObjPath.eraseFromDisk();
+        return NULL;
+    }
+    sys::RemoveFileOnSignal(uniqueObjPath);
+
+    // generate object file
+    bool genResult = false;
+    tool_output_file objFile(uniqueObjPath.c_str(), errMsg);
+    if (!errMsg.empty())
+      return NULL;
+    genResult = this->generateObjectFile(objFile.os(), errMsg);
+    objFile.os().close();
+    if (objFile.os().has_error()) {
+      objFile.os().clear_error();
+      return NULL;
+    }
+    objFile.keep();
+    if ( genResult ) {
+      uniqueObjPath.eraseFromDisk();
+      return NULL;
+    }
+
+    const std::string& uniqueObjStr = uniqueObjPath.str();
+    // remove old buffer if compile() called twice
+    delete _nativeObjectFile;
+
+    // read .o file into memory buffer
+    OwningPtr<MemoryBuffer> BuffPtr;
+    if (error_code ec = MemoryBuffer::getFile(uniqueObjStr.c_str(),BuffPtr))
+      errMsg = ec.message();
+    _nativeObjectFile = BuffPtr.take();
+
+    // remove temp files
+    uniqueObjPath.eraseFromDisk();
+
+    // return buffer, unless error
+    if ( _nativeObjectFile == NULL )
+        return NULL;
+    *length = _nativeObjectFile->getBufferSize();
+    return _nativeObjectFile->getBufferStart();
+}
+
+bool LTOCodeGenerator::determineTarget(std::string& errMsg)
+{
+    if ( _target == NULL ) {
+        std::string Triple = _linker.getModule()->getTargetTriple();
+        if (Triple.empty())
+          Triple = sys::getHostTriple();
+
+        // create target machine from info for merged modules
+        const Target *march = TargetRegistry::lookupTarget(Triple, errMsg);
+        if ( march == NULL )
+            return true;
+
+        // The relocation model is actually a static member of TargetMachine
+        // and needs to be set before the TargetMachine is instantiated.
+        switch( _codeModel ) {
+        case LTO_CODEGEN_PIC_MODEL_STATIC:
+            TargetMachine::setRelocationModel(Reloc::Static);
+            break;
+        case LTO_CODEGEN_PIC_MODEL_DYNAMIC:
+            TargetMachine::setRelocationModel(Reloc::PIC_);
+            break;
+        case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC:
+            TargetMachine::setRelocationModel(Reloc::DynamicNoPIC);
+            break;
+        }
+
+        // construct LTModule, hand over ownership of module and target
+        SubtargetFeatures Features;
+        Features.getDefaultSubtargetFeatures(_mCpu, llvm::Triple(Triple));
+        std::string FeatureStr = Features.getString();
+        _target = march->createTargetMachine(Triple, FeatureStr);
+    }
+    return false;
+}
+
+void LTOCodeGenerator::applyRestriction(GlobalValue &GV,
+                                     std::vector<const char*> &mustPreserveList,
+                                        SmallPtrSet<GlobalValue*, 8> &asmUsed,
+                                        Mangler &mangler) {
+  SmallString<64> Buffer;
+  mangler.getNameWithPrefix(Buffer, &GV, false);
+
+  if (GV.isDeclaration())
+    return;
+  if (_mustPreserveSymbols.count(Buffer))
+    mustPreserveList.push_back(GV.getName().data());
+  if (_asmUndefinedRefs.count(Buffer))
+    asmUsed.insert(&GV);
+}
+
+static void findUsedValues(GlobalVariable *LLVMUsed,
+                           SmallPtrSet<GlobalValue*, 8> &UsedValues) {
+  if (LLVMUsed == 0) return;
+
+  ConstantArray *Inits = dyn_cast<ConstantArray>(LLVMUsed->getInitializer());
+  if (Inits == 0) return;
+
+  for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i)
+    if (GlobalValue *GV = 
+          dyn_cast<GlobalValue>(Inits->getOperand(i)->stripPointerCasts()))
+      UsedValues.insert(GV);
+}
+
+void LTOCodeGenerator::applyScopeRestrictions() {
+  if (_scopeRestrictionsDone) return;
+  Module *mergedModule = _linker.getModule();
+
+  // Start off with a verification pass.
+  PassManager passes;
+  passes.add(createVerifierPass());
+
+  // mark which symbols can not be internalized 
+  MCContext Context(*_target->getMCAsmInfo(), NULL);
+  Mangler mangler(Context, *_target->getTargetData());
+  std::vector<const char*> mustPreserveList;
+  SmallPtrSet<GlobalValue*, 8> asmUsed;
+
+  for (Module::iterator f = mergedModule->begin(),
+         e = mergedModule->end(); f != e; ++f)
+    applyRestriction(*f, mustPreserveList, asmUsed, mangler);
+  for (Module::global_iterator v = mergedModule->global_begin(), 
+         e = mergedModule->global_end(); v !=  e; ++v)
+    applyRestriction(*v, mustPreserveList, asmUsed, mangler);
+  for (Module::alias_iterator a = mergedModule->alias_begin(),
+         e = mergedModule->alias_end(); a != e; ++a)
+    applyRestriction(*a, mustPreserveList, asmUsed, mangler);
+
+  GlobalVariable *LLVMCompilerUsed =
+    mergedModule->getGlobalVariable("llvm.compiler.used");
+  findUsedValues(LLVMCompilerUsed, asmUsed);
+  if (LLVMCompilerUsed)
+    LLVMCompilerUsed->eraseFromParent();
+
+  const llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(_context);
+  std::vector<Constant*> asmUsed2;
+  for (SmallPtrSet<GlobalValue*, 16>::const_iterator i = asmUsed.begin(),
+         e = asmUsed.end(); i !=e; ++i) {
+    GlobalValue *GV = *i;
+    Constant *c = ConstantExpr::getBitCast(GV, i8PTy);
+    asmUsed2.push_back(c);
+  }
+
+  llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, asmUsed2.size());
+  LLVMCompilerUsed =
+    new llvm::GlobalVariable(*mergedModule, ATy, false,
+                             llvm::GlobalValue::AppendingLinkage,
+                             llvm::ConstantArray::get(ATy, asmUsed2),
+                             "llvm.compiler.used");
+
+  LLVMCompilerUsed->setSection("llvm.metadata");
+
+  passes.add(createInternalizePass(mustPreserveList));
+
+  // apply scope restrictions
+  passes.run(*mergedModule);
+  
+  _scopeRestrictionsDone = true;
+}
+
+/// Optimize merged modules using various IPO passes
+bool LTOCodeGenerator::generateObjectFile(raw_ostream& out,
+                                          std::string& errMsg)
+{
+    if ( this->determineTarget(errMsg) ) 
+        return true;
+
+    // mark which symbols can not be internalized 
+    this->applyScopeRestrictions();
+
+    Module* mergedModule = _linker.getModule();
+
+    // if options were requested, set them
+    if ( !_codegenOptions.empty() )
+        cl::ParseCommandLineOptions(_codegenOptions.size(), 
+                                    const_cast<char **>(&_codegenOptions[0]));
+
+    // Instantiate the pass manager to organize the passes.
+    PassManager passes;
+
+    // Start off with a verification pass.
+    passes.add(createVerifierPass());
+
+    // Add an appropriate TargetData instance for this module...
+    passes.add(new TargetData(*_target->getTargetData()));
+    
+    createStandardLTOPasses(&passes, /*Internalize=*/ false, !DisableInline,
+                            /*VerifyEach=*/ false);
+
+    // Make sure everything is still good.
+    passes.add(createVerifierPass());
+
+    FunctionPassManager* codeGenPasses = new FunctionPassManager(mergedModule);
+
+    codeGenPasses->add(new TargetData(*_target->getTargetData()));
+
+    formatted_raw_ostream Out(out);
+
+    if (_target->addPassesToEmitFile(*codeGenPasses, Out,
+                                     TargetMachine::CGFT_ObjectFile,
+                                     CodeGenOpt::Aggressive)) {
+      errMsg = "target file type not supported";
+      return true;
+    }
+
+    // Run our queue of passes all at once now, efficiently.
+    passes.run(*mergedModule);
+
+    // Run the code generator, and write assembly file
+    codeGenPasses->doInitialization();
+
+    for (Module::iterator
+           it = mergedModule->begin(), e = mergedModule->end(); it != e; ++it)
+      if (!it->isDeclaration())
+        codeGenPasses->run(*it);
+
+    codeGenPasses->doFinalization();
+    delete codeGenPasses;
+
+    return false; // success
+}
+
+
+/// Optimize merged modules using various IPO passes
+void LTOCodeGenerator::setCodeGenDebugOptions(const char* options)
+{
+    for (std::pair<StringRef, StringRef> o = getToken(options);
+         !o.first.empty(); o = getToken(o.second)) {
+        // ParseCommandLineOptions() expects argv[0] to be program name.
+        // Lazily add that.
+        if ( _codegenOptions.empty() ) 
+            _codegenOptions.push_back("libLTO");
+        _codegenOptions.push_back(strdup(o.first.str().c_str()));
+    }
+}
diff --git a/final/tools/lto/LTOCodeGenerator.h b/final/tools/lto/LTOCodeGenerator.h
new file mode 100644
index 00000000000..7798db974cb
--- /dev/null
+++ b/final/tools/lto/LTOCodeGenerator.h
@@ -0,0 +1,72 @@
+//===-LTOCodeGenerator.h - LLVM Link Time Optimizer -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file declares the LTOCodeGenerator class. 
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LTO_CODE_GENERATOR_H
+#define LTO_CODE_GENERATOR_H
+
+#include "llvm/Linker.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+#include <string>
+
+
+//
+// C++ class which implements the opaque lto_code_gen_t
+//
+
+struct LTOCodeGenerator {
+    static const char*        getVersionString();
+    
+                            LTOCodeGenerator();
+                            ~LTOCodeGenerator();
+                            
+    bool                addModule(struct LTOModule*, std::string& errMsg);
+    bool                setDebugInfo(lto_debug_model, std::string& errMsg);
+    bool                setCodePICModel(lto_codegen_model, std::string& errMsg);
+    void                setCpu(const char *cpu);
+    void                addMustPreserveSymbol(const char* sym);
+    bool                writeMergedModules(const char* path, 
+                                                           std::string& errMsg);
+    const void*         compile(size_t* length, std::string& errMsg);
+    void                setCodeGenDebugOptions(const char *opts); 
+private:
+    bool                generateObjectFile(llvm::raw_ostream& out, 
+                                           std::string& errMsg);
+    void                applyScopeRestrictions();
+    void                applyRestriction(llvm::GlobalValue &GV,
+                                     std::vector<const char*> &mustPreserveList,
+                        llvm::SmallPtrSet<llvm::GlobalValue*, 8> &asmUsed,
+                                         llvm::Mangler &mangler);
+    bool                determineTarget(std::string& errMsg);
+    
+    typedef llvm::StringMap<uint8_t> StringSet;
+
+    llvm::LLVMContext&          _context;
+    llvm::Linker                _linker;
+    llvm::TargetMachine*        _target;
+    bool                        _emitDwarfDebugInfo;
+    bool                        _scopeRestrictionsDone;
+    lto_codegen_model           _codeModel;
+    StringSet                   _mustPreserveSymbols;
+    StringSet                   _asmUndefinedRefs;
+    llvm::MemoryBuffer*         _nativeObjectFile;
+    std::vector<const char*>    _codegenOptions;
+    std::string                 _mCpu;
+};
+
+#endif // LTO_CODE_GENERATOR_H
+
diff --git a/final/tools/lto/LTOModule.cpp b/final/tools/lto/LTOModule.cpp
new file mode 100644
index 00000000000..bdea0c31a67
--- /dev/null
+++ b/final/tools/lto/LTOModule.cpp
@@ -0,0 +1,748 @@
+//===-- LTOModule.cpp - LLVM Link Time Optimizer --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Link Time Optimization library. This library is
+// intended to be used by linker to optimize code at link time.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LTOModule.h"
+
+#include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/SubtargetFeature.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetAsmParser.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetSelect.h"
+
+using namespace llvm;
+
+bool LTOModule::isBitcodeFile(const void *mem, size_t length) {
+  return llvm::sys::IdentifyFileType((char*)mem, length)
+    == llvm::sys::Bitcode_FileType;
+}
+
+bool LTOModule::isBitcodeFile(const char *path) {
+  return llvm::sys::Path(path).isBitcodeFile();
+}
+
+bool LTOModule::isBitcodeFileForTarget(const void *mem, size_t length,
+                                       const char *triplePrefix) {
+  MemoryBuffer *buffer = makeBuffer(mem, length);
+  if (!buffer)
+    return false;
+  return isTargetMatch(buffer, triplePrefix);
+}
+
+
+bool LTOModule::isBitcodeFileForTarget(const char *path,
+                                       const char *triplePrefix) {
+  OwningPtr<MemoryBuffer> buffer;
+  if (MemoryBuffer::getFile(path, buffer))
+    return false;
+  return isTargetMatch(buffer.take(), triplePrefix);
+}
+
+// Takes ownership of buffer.
+bool LTOModule::isTargetMatch(MemoryBuffer *buffer, const char *triplePrefix) {
+  std::string Triple = getBitcodeTargetTriple(buffer, getGlobalContext());
+  delete buffer;
+  return (strncmp(Triple.c_str(), triplePrefix,
+ 		  strlen(triplePrefix)) == 0);
+}
+
+
+LTOModule::LTOModule(Module *m, TargetMachine *t)
+  : _module(m), _target(t)
+{
+}
+
+LTOModule *LTOModule::makeLTOModule(const char *path,
+                                    std::string &errMsg) {
+  OwningPtr<MemoryBuffer> buffer;
+  if (error_code ec = MemoryBuffer::getFile(path, buffer)) {
+    errMsg = ec.message();
+    return NULL;
+  }
+  return makeLTOModule(buffer.get(), errMsg);
+}
+
+LTOModule *LTOModule::makeLTOModule(int fd, const char *path,
+                                    off_t size,
+                                    std::string &errMsg) {
+  OwningPtr<MemoryBuffer> buffer;
+  if (error_code ec = MemoryBuffer::getOpenFile(fd, path, buffer, size)) {
+    errMsg = ec.message();
+    return NULL;
+  }
+  return makeLTOModule(buffer.get(), errMsg);
+}
+
+/// makeBuffer - Create a MemoryBuffer from a memory range.  MemoryBuffer
+/// requires the byte past end of the buffer to be a zero.  We might get lucky
+/// and already be that way, otherwise make a copy.  Also if next byte is on a
+/// different page, don't assume it is readable.
+MemoryBuffer *LTOModule::makeBuffer(const void *mem, size_t length) {
+  const char *startPtr = (char*)mem;
+  const char *endPtr = startPtr+length;
+  if (((uintptr_t)endPtr & (sys::Process::GetPageSize()-1)) == 0 ||
+      *endPtr != 0)
+    return MemoryBuffer::getMemBufferCopy(StringRef(startPtr, length));
+
+  return MemoryBuffer::getMemBuffer(StringRef(startPtr, length));
+}
+
+
+LTOModule *LTOModule::makeLTOModule(const void *mem, size_t length,
+                                    std::string &errMsg) {
+  OwningPtr<MemoryBuffer> buffer(makeBuffer(mem, length));
+  if (!buffer)
+    return NULL;
+  return makeLTOModule(buffer.get(), errMsg);
+}
+
+LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
+                                    std::string &errMsg) {
+  static bool Initialized = false;
+  if (!Initialized) {
+    InitializeAllTargets();
+    InitializeAllAsmParsers();
+    Initialized = true;
+  }
+
+  // parse bitcode buffer
+  OwningPtr<Module> m(ParseBitcodeFile(buffer, getGlobalContext(), &errMsg));
+  if (!m)
+    return NULL;
+
+  std::string Triple = m->getTargetTriple();
+  if (Triple.empty())
+    Triple = sys::getHostTriple();
+
+  // find machine architecture for this module
+  const Target *march = TargetRegistry::lookupTarget(Triple, errMsg);
+  if (!march)
+    return NULL;
+
+  // construct LTModule, hand over ownership of module and target
+  SubtargetFeatures Features;
+  Features.getDefaultSubtargetFeatures("" /* cpu */, llvm::Triple(Triple));
+  std::string FeatureStr = Features.getString();
+  TargetMachine *target = march->createTargetMachine(Triple, FeatureStr);
+  LTOModule *Ret = new LTOModule(m.take(), target);
+  bool Err = Ret->ParseSymbols();
+  if (Err) {
+    delete Ret;
+    return NULL;
+  }
+  return Ret;
+}
+
+
+const char *LTOModule::getTargetTriple() {
+  return _module->getTargetTriple().c_str();
+}
+
+void LTOModule::setTargetTriple(const char *triple) {
+  _module->setTargetTriple(triple);
+}
+
+void LTOModule::addDefinedFunctionSymbol(Function *f, Mangler &mangler) {
+  // add to list of defined symbols
+  addDefinedSymbol(f, mangler, true);
+
+  // add external symbols referenced by this function.
+  for (Function::iterator b = f->begin(); b != f->end(); ++b) {
+    for (BasicBlock::iterator i = b->begin(); i != b->end(); ++i) {
+      for (unsigned count = 0, total = i->getNumOperands();
+           count != total; ++count) {
+        findExternalRefs(i->getOperand(count), mangler);
+      }
+    }
+  }
+}
+
+// Get string that data pointer points to.
+bool LTOModule::objcClassNameFromExpression(Constant *c, std::string &name) {
+  if (ConstantExpr *ce = dyn_cast<ConstantExpr>(c)) {
+    Constant *op = ce->getOperand(0);
+    if (GlobalVariable *gvn = dyn_cast<GlobalVariable>(op)) {
+      Constant *cn = gvn->getInitializer();
+      if (ConstantArray *ca = dyn_cast<ConstantArray>(cn)) {
+        if (ca->isCString()) {
+          name = ".objc_class_name_" + ca->getAsString();
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
+// Parse i386/ppc ObjC class data structure.
+void LTOModule::addObjCClass(GlobalVariable *clgv) {
+  if (ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer())) {
+    // second slot in __OBJC,__class is pointer to superclass name
+    std::string superclassName;
+    if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
+      NameAndAttributes info;
+      StringMap<NameAndAttributes>::value_type &entry =
+        _undefines.GetOrCreateValue(superclassName.c_str());
+      if (!entry.getValue().name) {
+        const char *symbolName = entry.getKey().data();
+        info.name = symbolName;
+        info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
+        entry.setValue(info);
+      }
+    }
+    // third slot in __OBJC,__class is pointer to class name
+    std::string className;
+    if (objcClassNameFromExpression(c->getOperand(2), className)) {
+      StringSet::value_type &entry =
+        _defines.GetOrCreateValue(className.c_str());
+      entry.setValue(1);
+      NameAndAttributes info;
+      info.name = entry.getKey().data();
+      info.attributes = (lto_symbol_attributes)
+        (LTO_SYMBOL_PERMISSIONS_DATA |
+         LTO_SYMBOL_DEFINITION_REGULAR |
+         LTO_SYMBOL_SCOPE_DEFAULT);
+      _symbols.push_back(info);
+    }
+  }
+}
+
+
+// Parse i386/ppc ObjC category data structure.
+void LTOModule::addObjCCategory(GlobalVariable *clgv) {
+  if (ConstantStruct *c = dyn_cast<ConstantStruct>(clgv->getInitializer())) {
+    // second slot in __OBJC,__category is pointer to target class name
+    std::string targetclassName;
+    if (objcClassNameFromExpression(c->getOperand(1), targetclassName)) {
+      NameAndAttributes info;
+
+      StringMap<NameAndAttributes>::value_type &entry =
+        _undefines.GetOrCreateValue(targetclassName.c_str());
+
+      if (entry.getValue().name)
+        return;
+
+      const char *symbolName = entry.getKey().data();
+      info.name = symbolName;
+      info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
+      entry.setValue(info);
+    }
+  }
+}
+
+
+// Parse i386/ppc ObjC class list data structure.
+void LTOModule::addObjCClassRef(GlobalVariable *clgv) {
+  std::string targetclassName;
+  if (objcClassNameFromExpression(clgv->getInitializer(), targetclassName)) {
+    NameAndAttributes info;
+
+    StringMap<NameAndAttributes>::value_type &entry =
+      _undefines.GetOrCreateValue(targetclassName.c_str());
+    if (entry.getValue().name)
+      return;
+
+    const char *symbolName = entry.getKey().data();
+    info.name = symbolName;
+    info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
+    entry.setValue(info);
+  }
+}
+
+
+void LTOModule::addDefinedDataSymbol(GlobalValue *v, Mangler &mangler) {
+  // Add to list of defined symbols.
+  addDefinedSymbol(v, mangler, false);
+
+  // Special case i386/ppc ObjC data structures in magic sections:
+  // The issue is that the old ObjC object format did some strange
+  // contortions to avoid real linker symbols.  For instance, the
+  // ObjC class data structure is allocated statically in the executable
+  // that defines that class.  That data structures contains a pointer to
+  // its superclass.  But instead of just initializing that part of the
+  // struct to the address of its superclass, and letting the static and
+  // dynamic linkers do the rest, the runtime works by having that field
+  // instead point to a C-string that is the name of the superclass.
+  // At runtime the objc initialization updates that pointer and sets
+  // it to point to the actual super class.  As far as the linker
+  // knows it is just a pointer to a string.  But then someone wanted the
+  // linker to issue errors at build time if the superclass was not found.
+  // So they figured out a way in mach-o object format to use an absolute
+  // symbols (.objc_class_name_Foo = 0) and a floating reference
+  // (.reference .objc_class_name_Bar) to cause the linker into erroring when
+  // a class was missing.
+  // The following synthesizes the implicit .objc_* symbols for the linker
+  // from the ObjC data structures generated by the front end.
+  if (v->hasSection() /* && isTargetDarwin */) {
+    // special case if this data blob is an ObjC class definition
+    if (v->getSection().compare(0, 15, "__OBJC,__class,") == 0) {
+      if (GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
+        addObjCClass(gv);
+      }
+    }
+
+    // special case if this data blob is an ObjC category definition
+    else if (v->getSection().compare(0, 18, "__OBJC,__category,") == 0) {
+      if (GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
+        addObjCCategory(gv);
+      }
+    }
+
+    // special case if this data blob is the list of referenced classes
+    else if (v->getSection().compare(0, 18, "__OBJC,__cls_refs,") == 0) {
+      if (GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) {
+        addObjCClassRef(gv);
+      }
+    }
+  }
+
+  // add external symbols referenced by this data.
+  for (unsigned count = 0, total = v->getNumOperands();
+       count != total; ++count) {
+    findExternalRefs(v->getOperand(count), mangler);
+  }
+}
+
+
+void LTOModule::addDefinedSymbol(GlobalValue *def, Mangler &mangler,
+                                 bool isFunction) {
+  // ignore all llvm.* symbols
+  if (def->getName().startswith("llvm."))
+    return;
+
+  // ignore available_externally
+  if (def->hasAvailableExternallyLinkage())
+    return;
+
+  // string is owned by _defines
+  SmallString<64> Buffer;
+  mangler.getNameWithPrefix(Buffer, def, false);
+
+  // set alignment part log2() can have rounding errors
+  uint32_t align = def->getAlignment();
+  uint32_t attr = align ? CountTrailingZeros_32(def->getAlignment()) : 0;
+
+  // set permissions part
+  if (isFunction)
+    attr |= LTO_SYMBOL_PERMISSIONS_CODE;
+  else {
+    GlobalVariable *gv = dyn_cast<GlobalVariable>(def);
+    if (gv && gv->isConstant())
+      attr |= LTO_SYMBOL_PERMISSIONS_RODATA;
+    else
+      attr |= LTO_SYMBOL_PERMISSIONS_DATA;
+  }
+
+  // set definition part
+  if (def->hasWeakLinkage() || def->hasLinkOnceLinkage() ||
+      def->hasLinkerPrivateWeakLinkage() ||
+      def->hasLinkerPrivateWeakDefAutoLinkage())
+    attr |= LTO_SYMBOL_DEFINITION_WEAK;
+  else if (def->hasCommonLinkage())
+    attr |= LTO_SYMBOL_DEFINITION_TENTATIVE;
+  else
+    attr |= LTO_SYMBOL_DEFINITION_REGULAR;
+
+  // set scope part
+  if (def->hasHiddenVisibility())
+    attr |= LTO_SYMBOL_SCOPE_HIDDEN;
+  else if (def->hasProtectedVisibility())
+    attr |= LTO_SYMBOL_SCOPE_PROTECTED;
+  else if (def->hasExternalLinkage() || def->hasWeakLinkage() ||
+           def->hasLinkOnceLinkage() || def->hasCommonLinkage() ||
+           def->hasLinkerPrivateWeakLinkage())
+    attr |= LTO_SYMBOL_SCOPE_DEFAULT;
+  else if (def->hasLinkerPrivateWeakDefAutoLinkage())
+    attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN;
+  else
+    attr |= LTO_SYMBOL_SCOPE_INTERNAL;
+
+  // add to table of symbols
+  NameAndAttributes info;
+  StringSet::value_type &entry = _defines.GetOrCreateValue(Buffer.c_str());
+  entry.setValue(1);
+
+  StringRef Name = entry.getKey();
+  info.name = Name.data();
+  assert(info.name[Name.size()] == '\0');
+  info.attributes = (lto_symbol_attributes)attr;
+  _symbols.push_back(info);
+}
+
+void LTOModule::addAsmGlobalSymbol(const char *name,
+                                   lto_symbol_attributes scope) {
+  StringSet::value_type &entry = _defines.GetOrCreateValue(name);
+
+  // only add new define if not already defined
+  if (entry.getValue())
+    return;
+
+  entry.setValue(1);
+  const char *symbolName = entry.getKey().data();
+  uint32_t attr = LTO_SYMBOL_DEFINITION_REGULAR;
+  attr |= scope;
+  NameAndAttributes info;
+  info.name = symbolName;
+  info.attributes = (lto_symbol_attributes)attr;
+  _symbols.push_back(info);
+}
+
+void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
+  StringMap<NameAndAttributes>::value_type &entry =
+    _undefines.GetOrCreateValue(name);
+
+  _asm_undefines.push_back(entry.getKey().data());
+
+  // we already have the symbol
+  if (entry.getValue().name)
+    return;
+
+  uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;;
+  attr |= LTO_SYMBOL_SCOPE_DEFAULT;
+  NameAndAttributes info;
+  info.name = entry.getKey().data();
+  info.attributes = (lto_symbol_attributes)attr;
+
+  entry.setValue(info);
+}
+
+void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl,
+                                            Mangler &mangler) {
+  // ignore all llvm.* symbols
+  if (decl->getName().startswith("llvm."))
+    return;
+
+  // ignore all aliases
+  if (isa<GlobalAlias>(decl))
+    return;
+
+  SmallString<64> name;
+  mangler.getNameWithPrefix(name, decl, false);
+
+  StringMap<NameAndAttributes>::value_type &entry =
+    _undefines.GetOrCreateValue(name.c_str());
+
+  // we already have the symbol
+  if (entry.getValue().name)
+    return;
+
+  NameAndAttributes info;
+
+  info.name = entry.getKey().data();
+  if (decl->hasExternalWeakLinkage())
+    info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
+  else
+    info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
+
+  entry.setValue(info);
+}
+
+
+
+// Find external symbols referenced by VALUE. This is a recursive function.
+void LTOModule::findExternalRefs(Value *value, Mangler &mangler) {
+  if (GlobalValue *gv = dyn_cast<GlobalValue>(value)) {
+    if (!gv->hasExternalLinkage())
+      addPotentialUndefinedSymbol(gv, mangler);
+    // If this is a variable definition, do not recursively process
+    // initializer.  It might contain a reference to this variable
+    // and cause an infinite loop.  The initializer will be
+    // processed in addDefinedDataSymbol().
+    return;
+  }
+
+  // GlobalValue, even with InternalLinkage type, may have operands with
+  // ExternalLinkage type. Do not ignore these operands.
+  if (Constant *c = dyn_cast<Constant>(value)) {
+    // Handle ConstantExpr, ConstantStruct, ConstantArry etc.
+    for (unsigned i = 0, e = c->getNumOperands(); i != e; ++i)
+      findExternalRefs(c->getOperand(i), mangler);
+  }
+}
+
+namespace {
+  class RecordStreamer : public MCStreamer {
+  public:
+    enum State { NeverSeen, Global, Defined, DefinedGlobal, Used};
+
+  private:
+    StringMap<State> Symbols;
+
+    void markDefined(const MCSymbol &Symbol) {
+      State &S = Symbols[Symbol.getName()];
+      switch (S) {
+      case DefinedGlobal:
+      case Global:
+        S = DefinedGlobal;
+        break;
+      case NeverSeen:
+      case Defined:
+      case Used:
+        S = Defined;
+        break;
+      }
+    }
+    void markGlobal(const MCSymbol &Symbol) {
+      State &S = Symbols[Symbol.getName()];
+      switch (S) {
+      case DefinedGlobal:
+      case Defined:
+        S = DefinedGlobal;
+        break;
+
+      case NeverSeen:
+      case Global:
+      case Used:
+        S = Global;
+        break;
+      }
+    }
+    void markUsed(const MCSymbol &Symbol) {
+      State &S = Symbols[Symbol.getName()];
+      switch (S) {
+      case DefinedGlobal:
+      case Defined:
+      case Global:
+        break;
+
+      case NeverSeen:
+      case Used:
+        S = Used;
+        break;
+      }
+    }
+
+    // FIXME: mostly copied for the obj streamer.
+    void AddValueSymbols(const MCExpr *Value) {
+      switch (Value->getKind()) {
+      case MCExpr::Target:
+        // FIXME: What should we do in here?
+        break;
+
+      case MCExpr::Constant:
+        break;
+
+      case MCExpr::Binary: {
+        const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+        AddValueSymbols(BE->getLHS());
+        AddValueSymbols(BE->getRHS());
+        break;
+      }
+
+      case MCExpr::SymbolRef:
+        markUsed(cast<MCSymbolRefExpr>(Value)->getSymbol());
+        break;
+
+      case MCExpr::Unary:
+        AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr());
+        break;
+      }
+    }
+
+  public:
+    typedef StringMap<State>::const_iterator const_iterator;
+
+    const_iterator begin() {
+      return Symbols.begin();
+    }
+
+    const_iterator end() {
+      return Symbols.end();
+    }
+
+    RecordStreamer(MCContext &Context) : MCStreamer(Context) {}
+
+    virtual void ChangeSection(const MCSection *Section) {}
+    virtual void InitSections() {}
+    virtual void EmitLabel(MCSymbol *Symbol) {
+      Symbol->setSection(*getCurrentSection());
+      markDefined(*Symbol);
+    }
+    virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {}
+    virtual void EmitThumbFunc(MCSymbol *Func) {}
+    virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+      // FIXME: should we handle aliases?
+      markDefined(*Symbol);
+    }
+    virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
+      if (Attribute == MCSA_Global)
+        markGlobal(*Symbol);
+    }
+    virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+    virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {}
+    virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+    virtual void EmitCOFFSymbolStorageClass(int StorageClass) {}
+    virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+                              unsigned Size , unsigned ByteAlignment) {
+      markDefined(*Symbol);
+    }
+    virtual void EmitCOFFSymbolType(int Type) {}
+    virtual void EndCOFFSymbolDef() {}
+    virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+                                  unsigned ByteAlignment) {
+      markDefined(*Symbol);
+    }
+    virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
+    virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {}
+    virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+                                uint64_t Size, unsigned ByteAlignment) {}
+    virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {}
+    virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
+                               bool isPCRel, unsigned AddrSpace) {}
+    virtual void EmitULEB128Value(const MCExpr *Value,
+                                  unsigned AddrSpace = 0) {}
+    virtual void EmitSLEB128Value(const MCExpr *Value,
+                                  unsigned AddrSpace = 0) {}
+    virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+                                      unsigned ValueSize,
+                                      unsigned MaxBytesToEmit) {}
+    virtual void EmitCodeAlignment(unsigned ByteAlignment,
+                                   unsigned MaxBytesToEmit) {}
+    virtual void EmitValueToOffset(const MCExpr *Offset,
+                                   unsigned char Value ) {}
+    virtual void EmitFileDirective(StringRef Filename) {}
+    virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
+                                          const MCSymbol *LastLabel,
+                                        const MCSymbol *Label) {}
+
+    virtual void EmitInstruction(const MCInst &Inst) {
+      // Scan for values.
+      for (unsigned i = Inst.getNumOperands(); i--; )
+        if (Inst.getOperand(i).isExpr())
+          AddValueSymbols(Inst.getOperand(i).getExpr());
+    }
+    virtual void Finish() {}
+  };
+}
+
+bool LTOModule::addAsmGlobalSymbols(MCContext &Context) {
+  const std::string &inlineAsm = _module->getModuleInlineAsm();
+
+  OwningPtr<RecordStreamer> Streamer(new RecordStreamer(Context));
+  MemoryBuffer *Buffer = MemoryBuffer::getMemBuffer(inlineAsm);
+  SourceMgr SrcMgr;
+  SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
+  OwningPtr<MCAsmParser> Parser(createMCAsmParser(_target->getTarget(), SrcMgr,
+                                                  Context, *Streamer,
+                                                  *_target->getMCAsmInfo()));
+  OwningPtr<TargetAsmParser>
+    TAP(_target->getTarget().createAsmParser(*Parser.get(), *_target.get()));
+  Parser->setTargetParser(*TAP);
+  int Res = Parser->Run(false);
+  if (Res)
+    return true;
+
+  for (RecordStreamer::const_iterator i = Streamer->begin(),
+         e = Streamer->end(); i != e; ++i) {
+    StringRef Key = i->first();
+    RecordStreamer::State Value = i->second;
+    if (Value == RecordStreamer::DefinedGlobal)
+      addAsmGlobalSymbol(Key.data(), LTO_SYMBOL_SCOPE_DEFAULT);
+    else if (Value == RecordStreamer::Defined)
+      addAsmGlobalSymbol(Key.data(), LTO_SYMBOL_SCOPE_INTERNAL);
+    else if (Value == RecordStreamer::Global ||
+             Value == RecordStreamer::Used)
+      addAsmGlobalSymbolUndef(Key.data());
+  }
+  return false;
+}
+
+bool LTOModule::ParseSymbols() {
+  // Use mangler to add GlobalPrefix to names to match linker names.
+  MCContext Context(*_target->getMCAsmInfo(), NULL);
+  Mangler mangler(Context, *_target->getTargetData());
+
+  // add functions
+  for (Module::iterator f = _module->begin(); f != _module->end(); ++f) {
+    if (f->isDeclaration())
+      addPotentialUndefinedSymbol(f, mangler);
+    else
+      addDefinedFunctionSymbol(f, mangler);
+  }
+
+  // add data
+  for (Module::global_iterator v = _module->global_begin(),
+         e = _module->global_end(); v !=  e; ++v) {
+    if (v->isDeclaration())
+      addPotentialUndefinedSymbol(v, mangler);
+    else
+      addDefinedDataSymbol(v, mangler);
+  }
+
+  // add asm globals
+  if (addAsmGlobalSymbols(Context))
+    return true;
+
+  // add aliases
+  for (Module::alias_iterator i = _module->alias_begin(),
+         e = _module->alias_end(); i != e; ++i) {
+    if (i->isDeclaration())
+      addPotentialUndefinedSymbol(i, mangler);
+    else
+      addDefinedDataSymbol(i, mangler);
+  }
+
+  // make symbols for all undefines
+  for (StringMap<NameAndAttributes>::iterator it=_undefines.begin();
+       it != _undefines.end(); ++it) {
+    // if this symbol also has a definition, then don't make an undefine
+    // because it is a tentative definition
+    if (_defines.count(it->getKey()) == 0) {
+      NameAndAttributes info = it->getValue();
+      _symbols.push_back(info);
+    }
+  }
+  return false;
+}
+
+
+uint32_t LTOModule::getSymbolCount() {
+  return _symbols.size();
+}
+
+
+lto_symbol_attributes LTOModule::getSymbolAttributes(uint32_t index) {
+  if (index < _symbols.size())
+    return _symbols[index].attributes;
+  else
+    return lto_symbol_attributes(0);
+}
+
+const char *LTOModule::getSymbolName(uint32_t index) {
+  if (index < _symbols.size())
+    return _symbols[index].name;
+  else
+    return NULL;
+}
diff --git a/final/tools/lto/LTOModule.h b/final/tools/lto/LTOModule.h
new file mode 100644
index 00000000000..21e8475177e
--- /dev/null
+++ b/final/tools/lto/LTOModule.h
@@ -0,0 +1,120 @@
+//===-LTOModule.h - LLVM Link Time Optimizer ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file declares the LTOModule class. 
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LTO_MODULE_H
+#define LTO_MODULE_H
+
+#include "llvm/Module.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/StringMap.h"
+
+#include "llvm-c/lto.h"
+
+#include <vector>
+#include <string>
+
+
+// forward references to llvm classes
+namespace llvm {
+    class Mangler;
+    class MemoryBuffer;
+    class GlobalValue;
+    class Value;
+    class Function;
+}
+
+
+//
+// C++ class which implements the opaque lto_module_t
+//
+struct LTOModule {
+
+    static bool              isBitcodeFile(const void* mem, size_t length);
+    static bool              isBitcodeFile(const char* path);
+
+    static bool              isBitcodeFileForTarget(const void* mem, 
+                                    size_t length, const char* triplePrefix);
+
+    static bool              isBitcodeFileForTarget(const char* path, 
+                                                    const char* triplePrefix);
+
+    static LTOModule*        makeLTOModule(const char* path,
+                                          std::string& errMsg);
+    static LTOModule*        makeLTOModule(int fd, const char *path,
+                                           off_t size,
+                                           std::string& errMsg);
+    static LTOModule*        makeLTOModule(const void* mem, size_t length,
+                                           std::string& errMsg);
+
+    const char*              getTargetTriple();
+    void                     setTargetTriple(const char*);
+    uint32_t                 getSymbolCount();
+    lto_symbol_attributes    getSymbolAttributes(uint32_t index);
+    const char*              getSymbolName(uint32_t index);
+    
+    llvm::Module *           getLLVVMModule() { return _module.get(); }
+    const std::vector<const char*> &getAsmUndefinedRefs() {
+            return _asm_undefines;
+    }
+
+private:
+                            LTOModule(llvm::Module* m, llvm::TargetMachine* t);
+
+    bool                    ParseSymbols();
+    void                    addDefinedSymbol(llvm::GlobalValue* def, 
+                                                    llvm::Mangler& mangler, 
+                                                    bool isFunction);
+    void                    addPotentialUndefinedSymbol(llvm::GlobalValue* decl, 
+                                                        llvm::Mangler &mangler);
+    void                    findExternalRefs(llvm::Value* value, 
+                                                llvm::Mangler& mangler);
+    void                    addDefinedFunctionSymbol(llvm::Function* f, 
+                                                        llvm::Mangler &mangler);
+    void                    addDefinedDataSymbol(llvm::GlobalValue* v, 
+                                                        llvm::Mangler &mangler);
+    bool                    addAsmGlobalSymbols(llvm::MCContext &Context);
+    void                    addAsmGlobalSymbol(const char *,
+                                               lto_symbol_attributes scope);
+    void                    addAsmGlobalSymbolUndef(const char *);
+    void                    addObjCClass(llvm::GlobalVariable* clgv);
+    void                    addObjCCategory(llvm::GlobalVariable* clgv);
+    void                    addObjCClassRef(llvm::GlobalVariable* clgv);
+    bool                    objcClassNameFromExpression(llvm::Constant* c, 
+                                                    std::string& name);
+
+    static bool             isTargetMatch(llvm::MemoryBuffer* memBuffer,
+                                                    const char* triplePrefix);
+
+    static LTOModule*       makeLTOModule(llvm::MemoryBuffer* buffer,
+                                                        std::string& errMsg);
+    static llvm::MemoryBuffer* makeBuffer(const void* mem, size_t length);
+
+    typedef llvm::StringMap<uint8_t> StringSet;
+    
+    struct NameAndAttributes { 
+        const char*            name; 
+        lto_symbol_attributes  attributes; 
+    };
+
+    llvm::OwningPtr<llvm::Module>           _module;
+    llvm::OwningPtr<llvm::TargetMachine>    _target;
+    std::vector<NameAndAttributes>          _symbols;
+    // _defines and _undefines only needed to disambiguate tentative definitions
+    StringSet                               _defines;    
+    llvm::StringMap<NameAndAttributes>      _undefines;
+    std::vector<const char*>                _asm_undefines;
+};
+
+#endif // LTO_MODULE_H
+
diff --git a/final/tools/lto/Makefile b/final/tools/lto/Makefile
new file mode 100644
index 00000000000..294c81b9e73
--- /dev/null
+++ b/final/tools/lto/Makefile
@@ -0,0 +1,58 @@
+##===- tools/lto/Makefile ----------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LTO
+
+EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/lto.exports
+
+# Include this here so we can get the configuration of the targets
+# that have been configured for construction. We have to do this 
+# early so we can set up LINK_COMPONENTS before including Makefile.rules
+include $(LEVEL)/Makefile.config
+
+LINK_LIBS_IN_SHARED = 1
+SHARED_LIBRARY = 1
+
+LINK_COMPONENTS := $(TARGETS_TO_BUILD) ipo scalaropts linker bitreader bitwriter
+
+include $(LEVEL)/Makefile.common
+
+ifdef LLVM_VERSION_INFO
+CXX.Flags += -DLLVM_VERSION_INFO='"$(LLVM_VERSION_INFO)"'
+endif
+
+ifeq ($(HOST_OS),Darwin)
+    # Special hack to allow libLTO to have an offset version number.
+    ifdef LLVM_LTO_VERSION_OFFSET
+        LTO_LIBRARY_VERSION := $(shell expr $(LLVM_SUBMIT_VERSION) + \
+                                            $(LLVM_LTO_VERSION_OFFSET))
+    else
+        LTO_LIBRARY_VERSION := $(LLVM_SUBMIT_VERSION)
+    endif
+
+    # set dylib internal version number to llvmCore submission number
+    ifdef LLVM_SUBMIT_VERSION
+        LLVMLibsOptions := $(LLVMLibsOptions) -Wl,-current_version \
+                        -Wl,$(LTO_LIBRARY_VERSION).$(LLVM_SUBMIT_SUBVERSION) \
+                        -Wl,-compatibility_version -Wl,1
+    endif
+    # extra options to override libtool defaults 
+    LLVMLibsOptions    := $(LLVMLibsOptions)  \
+                         -Wl,-dead_strip \
+                         -Wl,-seg1addr -Wl,0xE0000000 
+
+    # Mac OS X 10.4 and earlier tools do not allow a second -install_name on command line
+    DARWIN_VERS := $(shell echo $(TARGET_TRIPLE) | sed 's/.*darwin\([0-9]*\).*/\1/')
+    ifneq ($(DARWIN_VERS),8)
+       LLVMLibsOptions    := $(LLVMLibsOptions)  \
+                            -Wl,-install_name \
+                            -Wl,"@executable_path/../lib/lib$(LIBRARYNAME)$(SHLIBEXT)"
+    endif
+endif
diff --git a/final/tools/lto/lto.cpp b/final/tools/lto/lto.cpp
new file mode 100644
index 00000000000..f48570c1495
--- /dev/null
+++ b/final/tools/lto/lto.cpp
@@ -0,0 +1,291 @@
+//===-lto.cpp - LLVM Link Time Optimizer ----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Link Time Optimization library. This library is 
+// intended to be used by linker to optimize code at link time.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm-c/lto.h"
+#include "llvm-c/Core.h"
+
+#include "LTOModule.h"
+#include "LTOCodeGenerator.h"
+
+
+// holds most recent error string
+// *** not thread safe ***
+static std::string sLastErrorString;
+
+
+
+//
+// returns a printable string
+//
+extern const char* lto_get_version()
+{
+    return LTOCodeGenerator::getVersionString();
+}
+
+//
+// returns the last error string or NULL if last operation was successful
+//
+const char* lto_get_error_message()
+{
+    return sLastErrorString.c_str();
+}
+
+
+
+//
+// validates if a file is a loadable object file
+//
+bool lto_module_is_object_file(const char* path)
+{
+    return LTOModule::isBitcodeFile(path);
+}
+
+
+//
+// validates if a file is a loadable object file compilable for requested target
+//
+bool lto_module_is_object_file_for_target(const char* path, 
+                                            const char* target_triplet_prefix)
+{
+    return LTOModule::isBitcodeFileForTarget(path, target_triplet_prefix);
+}
+
+
+//
+// validates if a buffer is a loadable object file
+//
+bool lto_module_is_object_file_in_memory(const void* mem, size_t length)
+{
+    return LTOModule::isBitcodeFile(mem, length);
+}
+
+
+//
+// validates if a buffer is a loadable object file compilable for the target
+//
+bool lto_module_is_object_file_in_memory_for_target(const void* mem, 
+                            size_t length, const char* target_triplet_prefix)
+{
+    return LTOModule::isBitcodeFileForTarget(mem, length, target_triplet_prefix);
+}
+
+
+
+//
+// loads an object file from disk  
+// returns NULL on error (check lto_get_error_message() for details)
+//
+lto_module_t lto_module_create(const char* path)
+{
+     return LTOModule::makeLTOModule(path, sLastErrorString);
+}
+
+//
+// loads an object file from disk
+// returns NULL on error (check lto_get_error_message() for details)
+//
+lto_module_t lto_module_create_from_fd(int fd, const char *path, off_t size)
+{
+     return LTOModule::makeLTOModule(fd, path, size, sLastErrorString);
+}
+
+//
+// loads an object file from memory 
+// returns NULL on error (check lto_get_error_message() for details)
+//
+lto_module_t lto_module_create_from_memory(const void* mem, size_t length)
+{
+     return LTOModule::makeLTOModule(mem, length, sLastErrorString);
+}
+
+
+//
+// frees all memory for a module
+// upon return the lto_module_t is no longer valid
+//
+void lto_module_dispose(lto_module_t mod)
+{
+    delete mod;
+}
+
+
+//
+// returns triplet string which the object module was compiled under
+//
+const char* lto_module_get_target_triple(lto_module_t mod)
+{
+    return mod->getTargetTriple();
+}
+
+//
+// sets triple string with which the object will be codegened.
+//
+void lto_module_set_target_triple(lto_module_t mod, const char *triple)
+{
+    return mod->setTargetTriple(triple);
+}
+
+
+//
+// returns the number of symbols in the object module
+//
+unsigned int lto_module_get_num_symbols(lto_module_t mod)
+{
+    return mod->getSymbolCount();
+}
+
+//
+// returns the name of the ith symbol in the object module
+//
+const char* lto_module_get_symbol_name(lto_module_t mod, unsigned int index)
+{
+    return mod->getSymbolName(index);
+}
+
+
+//
+// returns the attributes of the ith symbol in the object module
+//
+lto_symbol_attributes lto_module_get_symbol_attribute(lto_module_t mod, 
+                                                      unsigned int index)
+{
+    return mod->getSymbolAttributes(index);
+}
+
+
+
+
+
+//
+// instantiates a code generator
+// returns NULL if there is an error
+//
+lto_code_gen_t lto_codegen_create(void)
+{
+     return new LTOCodeGenerator();
+}
+
+
+
+//
+// frees all memory for a code generator
+// upon return the lto_code_gen_t is no longer valid
+//
+void lto_codegen_dispose(lto_code_gen_t cg)
+{
+    delete cg;
+}
+
+
+
+//
+// add an object module to the set of modules for which code will be generated
+// returns true on error (check lto_get_error_message() for details)
+//
+bool lto_codegen_add_module(lto_code_gen_t cg, lto_module_t mod)
+{
+    return cg->addModule(mod, sLastErrorString);
+}
+
+
+//
+// sets what if any format of debug info should be generated
+// returns true on error (check lto_get_error_message() for details)
+//
+bool lto_codegen_set_debug_model(lto_code_gen_t cg, lto_debug_model debug)
+{
+    return cg->setDebugInfo(debug, sLastErrorString);
+}
+
+
+//
+// sets what code model to generated
+// returns true on error (check lto_get_error_message() for details)
+//
+bool lto_codegen_set_pic_model(lto_code_gen_t cg, lto_codegen_model model)
+{
+  return cg->setCodePICModel(model, sLastErrorString);
+}
+
+//
+// sets the cpu to generate code for
+//
+void lto_codegen_set_cpu(lto_code_gen_t cg, const char* cpu)
+{
+  return cg->setCpu(cpu);
+}
+
+//
+// sets the path to the assembler tool
+//
+void lto_codegen_set_assembler_path(lto_code_gen_t cg, const char* path)
+{
+  // In here only for backwards compatibility. We use MC now.
+}
+
+
+//
+// sets extra arguments that libLTO should pass to the assembler
+//
+void lto_codegen_set_assembler_args(lto_code_gen_t cg, const char** args,
+                                    int nargs)
+{
+  // In here only for backwards compatibility. We use MC now.
+}
+
+//
+// adds to a list of all global symbols that must exist in the final
+// generated code.  If a function is not listed there, it might be
+// inlined into every usage and optimized away.
+//
+void lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg, const char* symbol)
+{
+  cg->addMustPreserveSymbol(symbol);
+}
+
+
+//
+// writes a new file at the specified path that contains the
+// merged contents of all modules added so far.
+// returns true on error (check lto_get_error_message() for details)
+//
+bool lto_codegen_write_merged_modules(lto_code_gen_t cg, const char* path)
+{
+  return cg->writeMergedModules(path, sLastErrorString);
+}
+
+
+//
+// Generates code for all added modules into one native object file.
+// On sucess returns a pointer to a generated mach-o/ELF buffer and
+// length set to the buffer size.  The buffer is owned by the 
+// lto_code_gen_t and will be freed when lto_codegen_dispose()
+// is called, or lto_codegen_compile() is called again.
+// On failure, returns NULL (check lto_get_error_message() for details).
+//
+extern const void*
+lto_codegen_compile(lto_code_gen_t cg, size_t* length)
+{
+  return cg->compile(length, sLastErrorString);
+}
+
+
+//
+// Used to pass extra options to the code generator
+//
+extern void
+lto_codegen_debug_options(lto_code_gen_t cg, const char * opt)
+{
+  cg->setCodeGenDebugOptions(opt);
+}
diff --git a/final/tools/lto/lto.exports b/final/tools/lto/lto.exports
new file mode 100644
index 00000000000..a3740911edc
--- /dev/null
+++ b/final/tools/lto/lto.exports
@@ -0,0 +1,27 @@
+lto_get_error_message
+lto_get_version
+lto_module_create
+lto_module_create_from_fd
+lto_module_create_from_memory
+lto_module_get_num_symbols
+lto_module_get_symbol_attribute
+lto_module_get_symbol_name
+lto_module_get_target_triple
+lto_module_set_target_triple
+lto_module_is_object_file
+lto_module_is_object_file_for_target
+lto_module_is_object_file_in_memory
+lto_module_is_object_file_in_memory_for_target
+lto_module_dispose
+lto_codegen_add_module
+lto_codegen_add_must_preserve_symbol
+lto_codegen_compile
+lto_codegen_create
+lto_codegen_dispose
+lto_codegen_set_debug_model
+lto_codegen_set_pic_model
+lto_codegen_write_merged_modules
+lto_codegen_debug_options
+lto_codegen_set_assembler_args
+lto_codegen_set_assembler_path
+lto_codegen_set_cpu
diff --git a/final/tools/macho-dump/CMakeLists.txt b/final/tools/macho-dump/CMakeLists.txt
new file mode 100644
index 00000000000..d55e1d5c413
--- /dev/null
+++ b/final/tools/macho-dump/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} support object)
+
+add_llvm_tool(macho-dump
+  macho-dump.cpp
+  )
diff --git a/final/tools/macho-dump/Makefile b/final/tools/macho-dump/Makefile
new file mode 100644
index 00000000000..638015e9289
--- /dev/null
+++ b/final/tools/macho-dump/Makefile
@@ -0,0 +1,23 @@
+##===- tools/macho-dump/Makefile ---------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = macho-dump
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Include this here so we can get the configuration of the targets
+# that have been configured for construction. We have to do this
+# early so we can set up LINK_COMPONENTS before including Makefile.rules
+include $(LEVEL)/Makefile.config
+
+LINK_COMPONENTS := support object
+
+include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/final/tools/macho-dump/macho-dump.cpp b/final/tools/macho-dump/macho-dump.cpp
new file mode 100644
index 00000000000..c4c558d9acd
--- /dev/null
+++ b/final/tools/macho-dump/macho-dump.cpp
@@ -0,0 +1,391 @@
+//===-- macho-dump.cpp - Mach Object Dumping Tool -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a testing tool for use with the MC/Mach-O LLVM components.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/MachOObject.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+using namespace llvm::object;
+
+static cl::opt<std::string>
+InputFile(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+
+static cl::opt<bool>
+ShowSectionData("dump-section-data", cl::desc("Dump the contents of sections"),
+                cl::init(false));
+
+///
+
+static const char *ProgramName;
+
+static void Message(const char *Type, const Twine &Msg) {
+  errs() << ProgramName << ": " << Type << ": " << Msg << "\n";
+}
+
+static int Error(const Twine &Msg) {
+  Message("error", Msg);
+  return 1;
+}
+
+static void Warning(const Twine &Msg) {
+  Message("warning", Msg);
+}
+
+///
+
+static int DumpHeader(MachOObject &Obj) {
+  // Read the header.
+  const macho::Header &Hdr = Obj.getHeader();
+  outs() << "('cputype', " << Hdr.CPUType << ")\n";
+  outs() << "('cpusubtype', " << Hdr.CPUSubtype << ")\n";
+  outs() << "('filetype', " << Hdr.FileType << ")\n";
+  outs() << "('num_load_commands', " << Hdr.NumLoadCommands << ")\n";
+  outs() << "('load_commands_size', " << Hdr.SizeOfLoadCommands << ")\n";
+  outs() << "('flag', " << Hdr.Flags << ")\n";
+
+  // Print extended header if 64-bit.
+  if (Obj.is64Bit()) {
+    const macho::Header64Ext &Hdr64 = Obj.getHeader64Ext();
+    outs() << "('reserved', " << Hdr64.Reserved << ")\n";
+  }
+
+  return 0;
+}
+
+static void DumpSegmentCommandData(StringRef Name,
+                                   uint64_t VMAddr, uint64_t VMSize,
+                                   uint64_t FileOffset, uint64_t FileSize,
+                                   uint32_t MaxProt, uint32_t InitProt,
+                                   uint32_t NumSections, uint32_t Flags) {
+  outs() << "  ('segment_name', '";
+  outs().write_escaped(Name, /*UseHexEscapes=*/true) << "')\n";
+  outs() << "  ('vm_addr', " << VMAddr << ")\n";
+  outs() << "  ('vm_size', " << VMSize << ")\n";
+  outs() << "  ('file_offset', " << FileOffset << ")\n";
+  outs() << "  ('file_size', " << FileSize << ")\n";
+  outs() << "  ('maxprot', " << MaxProt << ")\n";
+  outs() << "  ('initprot', " << InitProt << ")\n";
+  outs() << "  ('num_sections', " << NumSections << ")\n";
+  outs() << "  ('flags', " << Flags << ")\n";
+}
+
+static int DumpSectionData(MachOObject &Obj, unsigned Index, StringRef Name,
+                           StringRef SegmentName, uint64_t Address,
+                           uint64_t Size, uint32_t Offset,
+                           uint32_t Align, uint32_t RelocationTableOffset,
+                           uint32_t NumRelocationTableEntries,
+                           uint32_t Flags, uint32_t Reserved1,
+                           uint32_t Reserved2, uint64_t Reserved3 = ~0ULL) {
+  outs() << "    # Section " << Index << "\n";
+  outs() << "   (('section_name', '";
+  outs().write_escaped(Name, /*UseHexEscapes=*/true) << "')\n";
+  outs() << "    ('segment_name', '";
+  outs().write_escaped(SegmentName, /*UseHexEscapes=*/true) << "')\n";
+  outs() << "    ('address', " << Address << ")\n";
+  outs() << "    ('size', " << Size << ")\n";
+  outs() << "    ('offset', " << Offset << ")\n";
+  outs() << "    ('alignment', " << Align << ")\n";
+  outs() << "    ('reloc_offset', " << RelocationTableOffset << ")\n";
+  outs() << "    ('num_reloc', " << NumRelocationTableEntries << ")\n";
+  outs() << "    ('flags', " << format("0x%x", Flags) << ")\n";
+  outs() << "    ('reserved1', " << Reserved1 << ")\n";
+  outs() << "    ('reserved2', " << Reserved2 << ")\n";
+  if (Reserved3 != ~0ULL)
+    outs() << "    ('reserved3', " << Reserved3 << ")\n";
+  outs() << "   ),\n";
+
+  // Dump the relocation entries.
+  int Res = 0;
+  outs() << "  ('_relocations', [\n";
+  for (unsigned i = 0; i != NumRelocationTableEntries; ++i) {
+    InMemoryStruct<macho::RelocationEntry> RE;
+    Obj.ReadRelocationEntry(RelocationTableOffset, i, RE);
+    if (!RE) {
+      Res = Error("unable to read relocation table entry '" + Twine(i) + "'");
+      break;
+    }
+    
+    outs() << "    # Relocation " << i << "\n";
+    outs() << "    (('word-0', " << format("0x%x", RE->Word0) << "),\n";
+    outs() << "     ('word-1', " << format("0x%x", RE->Word1) << ")),\n";
+  }
+  outs() << "  ])\n";
+
+  // Dump the section data, if requested.
+  if (ShowSectionData) {
+    outs() << "  ('_section_data', '";
+    StringRef Data = Obj.getData(Offset, Size);
+    for (unsigned i = 0; i != Data.size(); ++i) {
+      if (i && (i % 4) == 0)
+        outs() << ' ';
+      outs() << hexdigit((Data[i] >> 4) & 0xF, /*LowerCase=*/true);
+      outs() << hexdigit((Data[i] >> 0) & 0xF, /*LowerCase=*/true);
+    }
+    outs() << "')\n";
+  }
+
+  return Res;
+}
+
+static int DumpSegmentCommand(MachOObject &Obj,
+                               const MachOObject::LoadCommandInfo &LCI) {
+  InMemoryStruct<macho::SegmentLoadCommand> SLC;
+  Obj.ReadSegmentLoadCommand(LCI, SLC);
+  if (!SLC)
+    return Error("unable to read segment load command");
+
+  DumpSegmentCommandData(StringRef(SLC->Name, 16), SLC->VMAddress,
+                         SLC->VMSize, SLC->FileOffset, SLC->FileSize,
+                         SLC->MaxVMProtection, SLC->InitialVMProtection,
+                         SLC->NumSections, SLC->Flags);
+
+  // Dump the sections.
+  int Res = 0;
+  outs() << "  ('sections', [\n";
+  for (unsigned i = 0; i != SLC->NumSections; ++i) {
+    InMemoryStruct<macho::Section> Sect;
+    Obj.ReadSection(LCI, i, Sect);
+    if (!SLC) {
+      Res = Error("unable to read section '" + Twine(i) + "'");
+      break;
+    }
+
+    if ((Res = DumpSectionData(Obj, i, StringRef(Sect->Name, 16),
+                               StringRef(Sect->SegmentName, 16), Sect->Address,
+                               Sect->Size, Sect->Offset, Sect->Align,
+                               Sect->RelocationTableOffset,
+                               Sect->NumRelocationTableEntries, Sect->Flags,
+                               Sect->Reserved1, Sect->Reserved2)))
+      break;
+  }
+  outs() << "  ])\n";
+
+  return Res;
+}
+
+static int DumpSegment64Command(MachOObject &Obj,
+                               const MachOObject::LoadCommandInfo &LCI) {
+  InMemoryStruct<macho::Segment64LoadCommand> SLC;
+  Obj.ReadSegment64LoadCommand(LCI, SLC);
+  if (!SLC)
+    return Error("unable to read segment load command");
+
+  DumpSegmentCommandData(StringRef(SLC->Name, 16), SLC->VMAddress,
+                         SLC->VMSize, SLC->FileOffset, SLC->FileSize,
+                         SLC->MaxVMProtection, SLC->InitialVMProtection,
+                         SLC->NumSections, SLC->Flags);
+
+  // Dump the sections.
+  int Res = 0;
+  outs() << "  ('sections', [\n";
+  for (unsigned i = 0; i != SLC->NumSections; ++i) {
+    InMemoryStruct<macho::Section64> Sect;
+    Obj.ReadSection64(LCI, i, Sect);
+    if (!SLC) {
+      Res = Error("unable to read section '" + Twine(i) + "'");
+      break;
+    }
+
+    if ((Res = DumpSectionData(Obj, i, StringRef(Sect->Name, 16),
+                               StringRef(Sect->SegmentName, 16), Sect->Address,
+                               Sect->Size, Sect->Offset, Sect->Align,
+                               Sect->RelocationTableOffset,
+                               Sect->NumRelocationTableEntries, Sect->Flags,
+                               Sect->Reserved1, Sect->Reserved2,
+                               Sect->Reserved3)))
+      break;
+  }
+  outs() << "  ])\n";
+
+  return 0;
+}
+
+static void DumpSymbolTableEntryData(MachOObject &Obj,
+                                     unsigned Index, uint32_t StringIndex,
+                                     uint8_t Type, uint8_t SectionIndex,
+                                     uint16_t Flags, uint64_t Value) {
+  outs() << "    # Symbol " << Index << "\n";
+  outs() << "   (('n_strx', " << StringIndex << ")\n";
+  outs() << "    ('n_type', " << format("0x%x", Type) << ")\n";
+  outs() << "    ('n_sect', " << uint32_t(SectionIndex) << ")\n";
+  outs() << "    ('n_desc', " << Flags << ")\n";
+  outs() << "    ('n_value', " << Value << ")\n";
+  outs() << "    ('_string', '" << Obj.getStringAtIndex(StringIndex) << "')\n";
+  outs() << "   ),\n";
+}
+
+static int DumpSymtabCommand(MachOObject &Obj,
+                             const MachOObject::LoadCommandInfo &LCI) {
+  InMemoryStruct<macho::SymtabLoadCommand> SLC;
+  Obj.ReadSymtabLoadCommand(LCI, SLC);
+  if (!SLC)
+    return Error("unable to read segment load command");
+
+  outs() << "  ('symoff', " << SLC->SymbolTableOffset << ")\n";
+  outs() << "  ('nsyms', " << SLC->NumSymbolTableEntries << ")\n";
+  outs() << "  ('stroff', " << SLC->StringTableOffset << ")\n";
+  outs() << "  ('strsize', " << SLC->StringTableSize << ")\n";
+
+  // Cache the string table data.
+  Obj.RegisterStringTable(*SLC);
+
+  // Dump the string data.
+  outs() << "  ('_string_data', '";
+  outs().write_escaped(Obj.getStringTableData(),
+                       /*UseHexEscapes=*/true) << "')\n";
+
+  // Dump the symbol table.
+  int Res = 0;
+  outs() << "  ('_symbols', [\n";
+  for (unsigned i = 0; i != SLC->NumSymbolTableEntries; ++i) {
+    if (Obj.is64Bit()) {
+      InMemoryStruct<macho::Symbol64TableEntry> STE;
+      Obj.ReadSymbol64TableEntry(SLC->SymbolTableOffset, i, STE);
+      if (!STE) {
+        Res = Error("unable to read symbol: '" + Twine(i) + "'");
+        break;
+      }
+
+      DumpSymbolTableEntryData(Obj, i, STE->StringIndex, STE->Type,
+                               STE->SectionIndex, STE->Flags, STE->Value);
+    } else {
+      InMemoryStruct<macho::SymbolTableEntry> STE;
+      Obj.ReadSymbolTableEntry(SLC->SymbolTableOffset, i, STE);
+      if (!SLC) {
+        Res = Error("unable to read symbol: '" + Twine(i) + "'");
+        break;
+      }
+
+      DumpSymbolTableEntryData(Obj, i, STE->StringIndex, STE->Type,
+                               STE->SectionIndex, STE->Flags, STE->Value);
+    }
+  }
+  outs() << "  ])\n";
+
+  return Res;
+}
+
+static int DumpDysymtabCommand(MachOObject &Obj,
+                             const MachOObject::LoadCommandInfo &LCI) {
+  InMemoryStruct<macho::DysymtabLoadCommand> DLC;
+  Obj.ReadDysymtabLoadCommand(LCI, DLC);
+  if (!DLC)
+    return Error("unable to read segment load command");
+
+  outs() << "  ('ilocalsym', " << DLC->LocalSymbolsIndex << ")\n";
+  outs() << "  ('nlocalsym', " << DLC->NumLocalSymbols << ")\n";
+  outs() << "  ('iextdefsym', " << DLC->ExternalSymbolsIndex << ")\n";
+  outs() << "  ('nextdefsym', " << DLC->NumExternalSymbols << ")\n";
+  outs() << "  ('iundefsym', " << DLC->UndefinedSymbolsIndex << ")\n";
+  outs() << "  ('nundefsym', " << DLC->NumUndefinedSymbols << ")\n";
+  outs() << "  ('tocoff', " << DLC->TOCOffset << ")\n";
+  outs() << "  ('ntoc', " << DLC->NumTOCEntries << ")\n";
+  outs() << "  ('modtaboff', " << DLC->ModuleTableOffset << ")\n";
+  outs() << "  ('nmodtab', " << DLC->NumModuleTableEntries << ")\n";
+  outs() << "  ('extrefsymoff', " << DLC->ReferenceSymbolTableOffset << ")\n";
+  outs() << "  ('nextrefsyms', "
+         << DLC->NumReferencedSymbolTableEntries << ")\n";
+  outs() << "  ('indirectsymoff', " << DLC->IndirectSymbolTableOffset << ")\n";
+  outs() << "  ('nindirectsyms', "
+         << DLC->NumIndirectSymbolTableEntries << ")\n";
+  outs() << "  ('extreloff', " << DLC->ExternalRelocationTableOffset << ")\n";
+  outs() << "  ('nextrel', " << DLC->NumExternalRelocationTableEntries << ")\n";
+  outs() << "  ('locreloff', " << DLC->LocalRelocationTableOffset << ")\n";
+  outs() << "  ('nlocrel', " << DLC->NumLocalRelocationTableEntries << ")\n";
+
+  // Dump the indirect symbol table.
+  int Res = 0;
+  outs() << "  ('_indirect_symbols', [\n";
+  for (unsigned i = 0; i != DLC->NumIndirectSymbolTableEntries; ++i) {
+    InMemoryStruct<macho::IndirectSymbolTableEntry> ISTE;
+    Obj.ReadIndirectSymbolTableEntry(*DLC, i, ISTE);
+    if (!ISTE) {
+      Res = Error("unable to read segment load command");
+      break;
+    }
+
+    outs() << "    # Indirect Symbol " << i << "\n";
+    outs() << "    (('symbol_index', "
+           << format("0x%x", ISTE->Index) << "),),\n";
+  }
+  outs() << "  ])\n";
+
+  return Res;
+}
+
+static int DumpLoadCommand(MachOObject &Obj, unsigned Index) {
+  const MachOObject::LoadCommandInfo &LCI = Obj.getLoadCommandInfo(Index);
+  int Res = 0;
+
+  outs() << "  # Load Command " << Index << "\n"
+         << " (('command', " << LCI.Command.Type << ")\n"
+         << "  ('size', " << LCI.Command.Size << ")\n";
+  switch (LCI.Command.Type) {
+  case macho::LCT_Segment:
+    Res = DumpSegmentCommand(Obj, LCI);
+    break;
+  case macho::LCT_Segment64:
+    Res = DumpSegment64Command(Obj, LCI);
+    break;
+  case macho::LCT_Symtab:
+    Res = DumpSymtabCommand(Obj, LCI);
+    break;
+  case macho::LCT_Dysymtab:
+    Res = DumpDysymtabCommand(Obj, LCI);
+    break;
+  default:
+    Warning("unknown load command: " + Twine(LCI.Command.Type));
+    break;
+  }
+  outs() << " ),\n";
+
+  return Res;
+}
+
+int main(int argc, char **argv) {
+  ProgramName = argv[0];
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  cl::ParseCommandLineOptions(argc, argv, "llvm Mach-O dumping tool\n");
+
+  // Load the input file.
+  std::string ErrorStr;
+  OwningPtr<MemoryBuffer> InputBuffer;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFile, InputBuffer))
+    return Error("unable to read input: '" + ec.message() + "'");
+
+  // Construct the Mach-O wrapper object.
+  OwningPtr<MachOObject> InputObject(
+    MachOObject::LoadFromBuffer(InputBuffer.take(), &ErrorStr));
+  if (!InputObject)
+    return Error("unable to load object: '" + ErrorStr + "'");
+
+  if (int Res = DumpHeader(*InputObject))
+    return Res;
+
+  // Print the load commands.
+  int Res = 0;
+  outs() << "('load_commands', [\n";
+  for (unsigned i = 0; i != InputObject->getHeader().NumLoadCommands; ++i)
+    if ((Res = DumpLoadCommand(*InputObject, i)))
+      break;
+  outs() << "])\n";
+
+  return Res;
+}
diff --git a/final/tools/opt/AnalysisWrappers.cpp b/final/tools/opt/AnalysisWrappers.cpp
new file mode 100644
index 00000000000..a2b57bb3e11
--- /dev/null
+++ b/final/tools/opt/AnalysisWrappers.cpp
@@ -0,0 +1,94 @@
+//===- AnalysisWrappers.cpp - Wrappers around non-pass analyses -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines pass wrappers around LLVM analyses that don't make sense to
+// be passes.  It provides a nice standard pass interface to these classes so
+// that they can be printed out by analyze.
+//
+// These classes are separated out of analyze.cpp so that it is more clear which
+// code is the integral part of the analyze tool, and which part of the code is
+// just making it so more passes are available.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  /// ExternalFunctionsPassedConstants - This pass prints out call sites to
+  /// external functions that are called with constant arguments.  This can be
+  /// useful when looking for standard library functions we should constant fold
+  /// or handle in alias analyses.
+  struct ExternalFunctionsPassedConstants : public ModulePass {
+    static char ID; // Pass ID, replacement for typeid
+    ExternalFunctionsPassedConstants() : ModulePass(ID) {}
+    virtual bool runOnModule(Module &M) {
+      for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
+        if (!I->isDeclaration()) continue;
+        
+        bool PrintedFn = false;
+        for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
+             UI != E; ++UI) {
+          Instruction *User = dyn_cast<Instruction>(*UI);
+          if (!User) continue;
+          
+          CallSite CS(cast<Value>(User));
+          if (!CS) continue;
+          
+          for (CallSite::arg_iterator AI = CS.arg_begin(),
+               E = CS.arg_end(); AI != E; ++AI) {
+            if (!isa<Constant>(*AI)) continue;
+
+            if (!PrintedFn) {
+              errs() << "Function '" << I->getName() << "':\n";
+              PrintedFn = true;
+            }
+            errs() << *User;
+            break;
+          }
+        }
+      }
+
+      return false;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char ExternalFunctionsPassedConstants::ID = 0;
+static RegisterPass<ExternalFunctionsPassedConstants>
+  P1("print-externalfnconstants",
+     "Print external fn callsites passed constants");
+
+namespace {
+  struct CallGraphPrinter : public ModulePass {
+    static char ID; // Pass ID, replacement for typeid
+    CallGraphPrinter() : ModulePass(ID) {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequiredTransitive<CallGraph>();
+    }
+    virtual bool runOnModule(Module &M) {
+      getAnalysis<CallGraph>().print(errs(), &M);
+      return false;
+    }
+  };
+}
+
+char CallGraphPrinter::ID = 0;
+static RegisterPass<CallGraphPrinter>
+  P2("print-callgraph", "Print a call graph");
diff --git a/final/tools/opt/CMakeLists.txt b/final/tools/opt/CMakeLists.txt
new file mode 100644
index 00000000000..0570d0e04af
--- /dev/null
+++ b/final/tools/opt/CMakeLists.txt
@@ -0,0 +1,8 @@
+set(LLVM_LINK_COMPONENTS bitreader asmparser bitwriter instrumentation scalaropts ipo)
+
+add_llvm_tool(opt
+  AnalysisWrappers.cpp
+  GraphPrinters.cpp
+  PrintSCC.cpp
+  opt.cpp
+  )
diff --git a/final/tools/opt/GraphPrinters.cpp b/final/tools/opt/GraphPrinters.cpp
new file mode 100644
index 00000000000..791caf571c2
--- /dev/null
+++ b/final/tools/opt/GraphPrinters.cpp
@@ -0,0 +1,120 @@
+//===- GraphPrinters.cpp - DOT printers for various graph types -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines several printers for various different types of graphs used
+// by the LLVM infrastructure.  It uses the generic graph interface to convert
+// the graph into a .dot graph.  These graphs can then be processed with the
+// "dot" tool to convert them to postscript or some other suitable format.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Pass.h"
+#include "llvm/Value.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Support/ToolOutputFile.h"
+using namespace llvm;
+
+template<typename GraphType>
+static void WriteGraphToFile(raw_ostream &O, const std::string &GraphName,
+                             const GraphType &GT) {
+  std::string Filename = GraphName + ".dot";
+  O << "Writing '" << Filename << "'...";
+  std::string ErrInfo;
+  tool_output_file F(Filename.c_str(), ErrInfo);
+
+  if (ErrInfo.empty()) {
+    WriteGraph(F.os(), GT);
+    F.os().close();
+    if (!F.os().has_error()) {
+      O << "\n";
+      F.keep();
+      return;
+    }
+  }
+  O << "  error opening file for writing!\n";
+  F.os().clear_error();
+}
+
+
+//===----------------------------------------------------------------------===//
+//                              Call Graph Printer
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+  template<>
+  struct DOTGraphTraits<CallGraph*> : public DefaultDOTGraphTraits {
+
+  DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+
+    static std::string getGraphName(CallGraph *F) {
+      return "Call Graph";
+    }
+
+    static std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) {
+      if (Node->getFunction())
+        return ((Value*)Node->getFunction())->getName();
+      return "external node";
+    }
+  };
+}
+
+
+namespace {
+  struct CallGraphPrinter : public ModulePass {
+    static char ID; // Pass ID, replacement for typeid
+    CallGraphPrinter() : ModulePass(ID) {}
+
+    virtual bool runOnModule(Module &M) {
+      WriteGraphToFile(llvm::errs(), "callgraph", &getAnalysis<CallGraph>());
+      return false;
+    }
+
+    void print(raw_ostream &OS, const llvm::Module*) const {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.addRequired<CallGraph>();
+      AU.setPreservesAll();
+    }
+  };
+}
+
+char CallGraphPrinter::ID = 0;
+static RegisterPass<CallGraphPrinter> P2("dot-callgraph",
+                                         "Print Call Graph to 'dot' file");
+
+//===----------------------------------------------------------------------===//
+//                            DomInfoPrinter Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+  class DomInfoPrinter : public FunctionPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    DomInfoPrinter() : FunctionPass(ID) {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<DominatorTree>();
+      AU.addRequired<DominanceFrontier>();
+
+    }
+
+    virtual bool runOnFunction(Function &F) {
+      getAnalysis<DominatorTree>().dump();
+      getAnalysis<DominanceFrontier>().dump();
+      return false;
+    }
+  };
+}
+
+char DomInfoPrinter::ID = 0;
+static RegisterPass<DomInfoPrinter>
+DIP("print-dom-info", "Dominator Info Printer", true, true);
diff --git a/final/tools/opt/Makefile b/final/tools/opt/Makefile
new file mode 100644
index 00000000000..726cad87123
--- /dev/null
+++ b/final/tools/opt/Makefile
@@ -0,0 +1,14 @@
+##===- tools/opt/Makefile ----------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../..
+TOOLNAME = opt
+
+LINK_COMPONENTS := bitreader bitwriter asmparser instrumentation scalaropts ipo
+
+include $(LEVEL)/Makefile.common
diff --git a/final/tools/opt/PrintSCC.cpp b/final/tools/opt/PrintSCC.cpp
new file mode 100644
index 00000000000..533f49ec2a8
--- /dev/null
+++ b/final/tools/opt/PrintSCC.cpp
@@ -0,0 +1,112 @@
+//===- PrintSCC.cpp - Enumerate SCCs in some key graphs -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides passes to print out SCCs in a CFG or a CallGraph.
+// Normally, you would not use these passes; instead, you would use the
+// scc_iterator directly to enumerate SCCs and process them in some way.  These
+// passes serve three purposes:
+//
+// (1) As a reference for how to use the scc_iterator.
+// (2) To print out the SCCs for a CFG or a CallGraph:
+//       analyze -print-cfg-sccs            to print the SCCs in each CFG of a module.
+//       analyze -print-cfg-sccs -stats     to print the #SCCs and the maximum SCC size.
+//       analyze -print-cfg-sccs -debug > /dev/null to watch the algorithm in action.
+//
+//     and similarly:
+//       analyze -print-callgraph-sccs [-stats] [-debug] to print SCCs in the CallGraph
+//
+// (3) To test the scc_iterator.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/SCCIterator.h"
+using namespace llvm;
+
+namespace {
+  struct CFGSCC : public FunctionPass {
+    static char ID;  // Pass identification, replacement for typeid
+    CFGSCC() : FunctionPass(ID) {}
+    bool runOnFunction(Function& func);
+
+    void print(raw_ostream &O, const Module* = 0) const { }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+  };
+
+  struct CallGraphSCC : public ModulePass {
+    static char ID;  // Pass identification, replacement for typeid
+    CallGraphSCC() : ModulePass(ID) {}
+
+    // run - Print out SCCs in the call graph for the specified module.
+    bool runOnModule(Module &M);
+
+    void print(raw_ostream &O, const Module* = 0) const { }
+
+    // getAnalysisUsage - This pass requires the CallGraph.
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+      AU.addRequired<CallGraph>();
+    }
+  };
+}
+
+char CFGSCC::ID = 0;
+static RegisterPass<CFGSCC>
+Y("print-cfg-sccs", "Print SCCs of each function CFG");
+
+char CallGraphSCC::ID = 0;
+static RegisterPass<CallGraphSCC>
+Z("print-callgraph-sccs", "Print SCCs of the Call Graph");
+
+bool CFGSCC::runOnFunction(Function &F) {
+  unsigned sccNum = 0;
+  errs() << "SCCs for Function " << F.getName() << " in PostOrder:";
+  for (scc_iterator<Function*> SCCI = scc_begin(&F),
+         E = scc_end(&F); SCCI != E; ++SCCI) {
+    std::vector<BasicBlock*> &nextSCC = *SCCI;
+    errs() << "\nSCC #" << ++sccNum << " : ";
+    for (std::vector<BasicBlock*>::const_iterator I = nextSCC.begin(),
+           E = nextSCC.end(); I != E; ++I)
+      errs() << (*I)->getName() << ", ";
+    if (nextSCC.size() == 1 && SCCI.hasLoop())
+      errs() << " (Has self-loop).";
+  }
+  errs() << "\n";
+
+  return true;
+}
+
+
+// run - Print out SCCs in the call graph for the specified module.
+bool CallGraphSCC::runOnModule(Module &M) {
+  CallGraphNode* rootNode = getAnalysis<CallGraph>().getRoot();
+  unsigned sccNum = 0;
+  errs() << "SCCs for the program in PostOrder:";
+  for (scc_iterator<CallGraphNode*> SCCI = scc_begin(rootNode),
+         E = scc_end(rootNode); SCCI != E; ++SCCI) {
+    const std::vector<CallGraphNode*> &nextSCC = *SCCI;
+    errs() << "\nSCC #" << ++sccNum << " : ";
+    for (std::vector<CallGraphNode*>::const_iterator I = nextSCC.begin(),
+           E = nextSCC.end(); I != E; ++I)
+      errs() << ((*I)->getFunction() ? (*I)->getFunction()->getNameStr()
+                 : std::string("external node")) << ", ";
+    if (nextSCC.size() == 1 && SCCI.hasLoop())
+      errs() << " (Has self-loop).";
+  }
+  errs() << "\n";
+
+  return true;
+}
diff --git a/final/tools/opt/opt.cpp b/final/tools/opt/opt.cpp
new file mode 100644
index 00000000000..e55b4b3e2af
--- /dev/null
+++ b/final/tools/opt/opt.cpp
@@ -0,0 +1,696 @@
+//===- opt.cpp - The LLVM Modular Optimizer -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Optimizations may be specified an arbitrary number of times on the command
+// line, They are run in the order specified.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/PassNameParser.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/IRReader.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PluginLoader.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/StandardPasses.h"
+#include "llvm/Support/SystemUtils.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/LinkAllPasses.h"
+#include "llvm/LinkAllVMCore.h"
+#include <memory>
+#include <algorithm>
+using namespace llvm;
+
+// The OptimizationList is automatically populated with registered Passes by the
+// PassNameParser.
+//
+static cl::list<const PassInfo*, bool, PassNameParser>
+PassList(cl::desc("Optimizations available:"));
+
+// Other command line options...
+//
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input bitcode file>"),
+    cl::init("-"), cl::value_desc("filename"));
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Override output filename"),
+               cl::value_desc("filename"));
+
+static cl::opt<bool>
+Force("f", cl::desc("Enable binary output on terminals"));
+
+static cl::opt<bool>
+PrintEachXForm("p", cl::desc("Print module after each transformation"));
+
+static cl::opt<bool>
+NoOutput("disable-output",
+         cl::desc("Do not write result bitcode file"), cl::Hidden);
+
+static cl::opt<bool>
+OutputAssembly("S", cl::desc("Write output as LLVM assembly"));
+
+static cl::opt<bool>
+NoVerify("disable-verify", cl::desc("Do not verify result module"), cl::Hidden);
+
+static cl::opt<bool>
+VerifyEach("verify-each", cl::desc("Verify after each transform"));
+
+static cl::opt<bool>
+StripDebug("strip-debug",
+           cl::desc("Strip debugger symbol info from translation unit"));
+
+static cl::opt<bool>
+DisableInline("disable-inlining", cl::desc("Do not run the inliner pass"));
+
+static cl::opt<bool>
+DisableOptimizations("disable-opt",
+                     cl::desc("Do not run any optimization passes"));
+
+static cl::opt<bool>
+DisableInternalize("disable-internalize",
+                   cl::desc("Do not mark all symbols as internal"));
+
+static cl::opt<bool>
+StandardCompileOpts("std-compile-opts",
+                   cl::desc("Include the standard compile time optimizations"));
+
+static cl::opt<bool>
+StandardLinkOpts("std-link-opts",
+                 cl::desc("Include the standard link time optimizations"));
+
+static cl::opt<bool>
+OptLevelO1("O1",
+           cl::desc("Optimization level 1. Similar to llvm-gcc -O1"));
+
+static cl::opt<bool>
+OptLevelO2("O2",
+           cl::desc("Optimization level 2. Similar to llvm-gcc -O2"));
+
+static cl::opt<bool>
+OptLevelO3("O3",
+           cl::desc("Optimization level 3. Similar to llvm-gcc -O3"));
+
+static cl::opt<bool>
+UnitAtATime("funit-at-a-time",
+            cl::desc("Enable IPO. This is same as llvm-gcc's -funit-at-a-time"),
+            cl::init(true));
+
+static cl::opt<bool>
+DisableSimplifyLibCalls("disable-simplify-libcalls",
+                        cl::desc("Disable simplify-libcalls"));
+
+static cl::opt<bool>
+Quiet("q", cl::desc("Obsolete option"), cl::Hidden);
+
+static cl::alias
+QuietA("quiet", cl::desc("Alias for -q"), cl::aliasopt(Quiet));
+
+static cl::opt<bool>
+AnalyzeOnly("analyze", cl::desc("Only perform analysis, no optimization"));
+
+static cl::opt<bool>
+PrintBreakpoints("print-breakpoints-for-testing", 
+                 cl::desc("Print select breakpoints location for testing"));
+
+static cl::opt<std::string>
+DefaultDataLayout("default-data-layout", 
+          cl::desc("data layout string to use if not specified by module"),
+          cl::value_desc("layout-string"), cl::init(""));
+
+// ---------- Define Printers for module and function passes ------------
+namespace {
+
+struct CallGraphSCCPassPrinter : public CallGraphSCCPass {
+  static char ID;
+  const PassInfo *PassToPrint;
+  raw_ostream &Out;
+  std::string PassName;
+
+  CallGraphSCCPassPrinter(const PassInfo *PI, raw_ostream &out) :
+    CallGraphSCCPass(ID), PassToPrint(PI), Out(out) {
+      std::string PassToPrintName =  PassToPrint->getPassName();
+      PassName = "CallGraphSCCPass Printer: " + PassToPrintName;
+    }
+
+  virtual bool runOnSCC(CallGraphSCC &SCC) {
+    if (!Quiet)
+      Out << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
+
+    // Get and print pass...
+    for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) {
+      Function *F = (*I)->getFunction();
+      if (F)
+        getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out,
+                                                              F->getParent());
+    }
+    return false;
+  }
+
+  virtual const char *getPassName() const { return PassName.c_str(); }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequiredID(PassToPrint->getTypeInfo());
+    AU.setPreservesAll();
+  }
+};
+
+char CallGraphSCCPassPrinter::ID = 0;
+
+struct ModulePassPrinter : public ModulePass {
+  static char ID;
+  const PassInfo *PassToPrint;
+  raw_ostream &Out;
+  std::string PassName;
+
+  ModulePassPrinter(const PassInfo *PI, raw_ostream &out)
+    : ModulePass(ID), PassToPrint(PI), Out(out) {
+      std::string PassToPrintName =  PassToPrint->getPassName();
+      PassName = "ModulePass Printer: " + PassToPrintName;
+    }
+
+  virtual bool runOnModule(Module &M) {
+    if (!Quiet)
+      Out << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
+
+    // Get and print pass...
+    getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out, &M);
+    return false;
+  }
+
+  virtual const char *getPassName() const { return PassName.c_str(); }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequiredID(PassToPrint->getTypeInfo());
+    AU.setPreservesAll();
+  }
+};
+
+char ModulePassPrinter::ID = 0;
+struct FunctionPassPrinter : public FunctionPass {
+  const PassInfo *PassToPrint;
+  raw_ostream &Out;
+  static char ID;
+  std::string PassName;
+
+  FunctionPassPrinter(const PassInfo *PI, raw_ostream &out)
+    : FunctionPass(ID), PassToPrint(PI), Out(out) {
+      std::string PassToPrintName =  PassToPrint->getPassName();
+      PassName = "FunctionPass Printer: " + PassToPrintName;
+    }
+
+  virtual bool runOnFunction(Function &F) {
+    if (!Quiet)
+      Out << "Printing analysis '" << PassToPrint->getPassName()
+          << "' for function '" << F.getName() << "':\n";
+
+    // Get and print pass...
+    getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out,
+            F.getParent());
+    return false;
+  }
+
+  virtual const char *getPassName() const { return PassName.c_str(); }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequiredID(PassToPrint->getTypeInfo());
+    AU.setPreservesAll();
+  }
+};
+
+char FunctionPassPrinter::ID = 0;
+
+struct LoopPassPrinter : public LoopPass {
+  static char ID;
+  const PassInfo *PassToPrint;
+  raw_ostream &Out;
+  std::string PassName;
+
+  LoopPassPrinter(const PassInfo *PI, raw_ostream &out) :
+    LoopPass(ID), PassToPrint(PI), Out(out) {
+      std::string PassToPrintName =  PassToPrint->getPassName();
+      PassName = "LoopPass Printer: " + PassToPrintName;
+    }
+
+
+  virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
+    if (!Quiet)
+      Out << "Printing analysis '" << PassToPrint->getPassName() << "':\n";
+
+    // Get and print pass...
+    getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out,
+                        L->getHeader()->getParent()->getParent());
+    return false;
+  }
+
+  virtual const char *getPassName() const { return PassName.c_str(); }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequiredID(PassToPrint->getTypeInfo());
+    AU.setPreservesAll();
+  }
+};
+
+char LoopPassPrinter::ID = 0;
+
+struct RegionPassPrinter : public RegionPass {
+  static char ID;
+  const PassInfo *PassToPrint;
+  raw_ostream &Out;
+  std::string PassName;
+
+  RegionPassPrinter(const PassInfo *PI, raw_ostream &out) : RegionPass(ID),
+    PassToPrint(PI), Out(out) {
+    std::string PassToPrintName =  PassToPrint->getPassName();
+    PassName = "RegionPass Printer: " + PassToPrintName;
+  }
+
+  virtual bool runOnRegion(Region *R, RGPassManager &RGM) {
+    if (!Quiet) {
+      Out << "Printing analysis '" << PassToPrint->getPassName() << "' for "
+        << "region: '" << R->getNameStr() << "' in function '"
+        << R->getEntry()->getParent()->getNameStr() << "':\n";
+    }
+    // Get and print pass...
+   getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out,
+                       R->getEntry()->getParent()->getParent());
+    return false;
+  }
+
+  virtual const char *getPassName() const { return PassName.c_str(); }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequiredID(PassToPrint->getTypeInfo());
+    AU.setPreservesAll();
+  }
+};
+
+char RegionPassPrinter::ID = 0;
+
+struct BasicBlockPassPrinter : public BasicBlockPass {
+  const PassInfo *PassToPrint;
+  raw_ostream &Out;
+  static char ID;
+  std::string PassName;
+
+  BasicBlockPassPrinter(const PassInfo *PI, raw_ostream &out)
+    : BasicBlockPass(ID), PassToPrint(PI), Out(out) {
+      std::string PassToPrintName =  PassToPrint->getPassName();
+      PassName = "BasicBlockPass Printer: " + PassToPrintName;
+    }
+
+  virtual bool runOnBasicBlock(BasicBlock &BB) {
+    if (!Quiet)
+      Out << "Printing Analysis info for BasicBlock '" << BB.getName()
+          << "': Pass " << PassToPrint->getPassName() << ":\n";
+
+    // Get and print pass...
+    getAnalysisID<Pass>(PassToPrint->getTypeInfo()).print(Out, 
+            BB.getParent()->getParent());
+    return false;
+  }
+
+  virtual const char *getPassName() const { return PassName.c_str(); }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequiredID(PassToPrint->getTypeInfo());
+    AU.setPreservesAll();
+  }
+};
+
+char BasicBlockPassPrinter::ID = 0;
+
+struct BreakpointPrinter : public FunctionPass {
+  raw_ostream &Out;
+  static char ID;
+
+  BreakpointPrinter(raw_ostream &out)
+    : FunctionPass(ID), Out(out) {
+    }
+
+  virtual bool runOnFunction(Function &F) {
+    BasicBlock &EntryBB = F.getEntryBlock();
+    BasicBlock::const_iterator BI = EntryBB.end();
+    --BI;
+    do {
+      const Instruction *In = BI;
+      const DebugLoc DL = In->getDebugLoc();
+      if (!DL.isUnknown()) {
+        DIScope S(DL.getScope(getGlobalContext()));
+        Out << S.getFilename() << " " << DL.getLine() << "\n";
+        break;
+      }
+      --BI;
+    } while (BI != EntryBB.begin());
+    return false;
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+};
+
+char BreakpointPrinter::ID = 0;
+
+inline void addPass(PassManagerBase &PM, Pass *P) {
+  // Add the pass to the pass manager...
+  PM.add(P);
+
+  // If we are verifying all of the intermediate steps, add the verifier...
+  if (VerifyEach) PM.add(createVerifierPass());
+}
+
+/// AddOptimizationPasses - This routine adds optimization passes
+/// based on selected optimization level, OptLevel. This routine
+/// duplicates llvm-gcc behaviour.
+///
+/// OptLevel - Optimization Level
+void AddOptimizationPasses(PassManagerBase &MPM, PassManagerBase &FPM,
+                           unsigned OptLevel) {
+  createStandardFunctionPasses(&FPM, OptLevel);
+
+  llvm::Pass *InliningPass = 0;
+  if (DisableInline) {
+    // No inlining pass
+  } else if (OptLevel) {
+    unsigned Threshold = 225;
+    if (OptLevel > 2)
+      Threshold = 275;
+    InliningPass = createFunctionInliningPass(Threshold);
+  } else {
+    InliningPass = createAlwaysInlinerPass();
+  }
+  createStandardModulePasses(&MPM, OptLevel,
+                             /*OptimizeSize=*/ false,
+                             UnitAtATime,
+                             /*UnrollLoops=*/ OptLevel > 1,
+                             !DisableSimplifyLibCalls,
+                             /*HaveExceptions=*/ true,
+                             InliningPass);
+}
+
+void AddStandardCompilePasses(PassManagerBase &PM) {
+  PM.add(createVerifierPass());                  // Verify that input is correct
+
+  addPass(PM, createLowerSetJmpPass());          // Lower llvm.setjmp/.longjmp
+
+  // If the -strip-debug command line option was specified, do it.
+  if (StripDebug)
+    addPass(PM, createStripSymbolsPass(true));
+
+  if (DisableOptimizations) return;
+
+  llvm::Pass *InliningPass = !DisableInline ? createFunctionInliningPass() : 0;
+
+  // -std-compile-opts adds the same module passes as -O3.
+  createStandardModulePasses(&PM, 3,
+                             /*OptimizeSize=*/ false,
+                             /*UnitAtATime=*/ true,
+                             /*UnrollLoops=*/ true,
+                             !DisableSimplifyLibCalls,
+                             /*HaveExceptions=*/ true,
+                             InliningPass);
+}
+
+void AddStandardLinkPasses(PassManagerBase &PM) {
+  PM.add(createVerifierPass());                  // Verify that input is correct
+
+  // If the -strip-debug command line option was specified, do it.
+  if (StripDebug)
+    addPass(PM, createStripSymbolsPass(true));
+
+  if (DisableOptimizations) return;
+
+  createStandardLTOPasses(&PM, /*Internalize=*/ !DisableInternalize,
+                          /*RunInliner=*/ !DisableInline,
+                          /*VerifyEach=*/ VerifyEach);
+}
+
+} // anonymous namespace
+
+
+//===----------------------------------------------------------------------===//
+// main for opt
+//
+int main(int argc, char **argv) {
+  sys::PrintStackTraceOnErrorSignal();
+  llvm::PrettyStackTraceProgram X(argc, argv);
+
+  // Enable debug stream buffering.
+  EnableDebugBuffering = true;
+
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+  LLVMContext &Context = getGlobalContext();
+  
+  // Initialize passes
+  PassRegistry &Registry = *PassRegistry::getPassRegistry();
+  initializeCore(Registry);
+  initializeScalarOpts(Registry);
+  initializeIPO(Registry);
+  initializeAnalysis(Registry);
+  initializeIPA(Registry);
+  initializeTransformUtils(Registry);
+  initializeInstCombine(Registry);
+  initializeInstrumentation(Registry);
+  initializeTarget(Registry);
+  
+  cl::ParseCommandLineOptions(argc, argv,
+    "llvm .bc -> .bc modular optimizer and analysis printer\n");
+
+  if (AnalyzeOnly && NoOutput) {
+    errs() << argv[0] << ": analyze mode conflicts with no-output mode.\n";
+    return 1;
+  }
+
+  // Allocate a full target machine description only if necessary.
+  // FIXME: The choice of target should be controllable on the command line.
+  std::auto_ptr<TargetMachine> target;
+
+  SMDiagnostic Err;
+
+  // Load the input module...
+  std::auto_ptr<Module> M;
+  M.reset(ParseIRFile(InputFilename, Err, Context));
+
+  if (M.get() == 0) {
+    Err.Print(argv[0], errs());
+    return 1;
+  }
+
+  // Figure out what stream we are supposed to write to...
+  OwningPtr<tool_output_file> Out;
+  if (NoOutput) {
+    if (!OutputFilename.empty())
+      errs() << "WARNING: The -o (output filename) option is ignored when\n"
+                "the --disable-output option is used.\n";
+  } else {
+    // Default to standard output.
+    if (OutputFilename.empty())
+      OutputFilename = "-";
+
+    std::string ErrorInfo;
+    Out.reset(new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+                                   raw_fd_ostream::F_Binary));
+    if (!ErrorInfo.empty()) {
+      errs() << ErrorInfo << '\n';
+      return 1;
+    }
+  }
+
+  // If the output is set to be emitted to standard out, and standard out is a
+  // console, print out a warning message and refuse to do it.  We don't
+  // impress anyone by spewing tons of binary goo to a terminal.
+  if (!Force && !NoOutput && !AnalyzeOnly && !OutputAssembly)
+    if (CheckBitcodeOutputToConsole(Out->os(), !Quiet))
+      NoOutput = true;
+
+  // Create a PassManager to hold and optimize the collection of passes we are
+  // about to build.
+  //
+  PassManager Passes;
+
+  // Add an appropriate TargetLibraryInfo pass for the module's triple.
+  TargetLibraryInfo *TLI = new TargetLibraryInfo(Triple(M->getTargetTriple()));
+  
+  // The -disable-simplify-libcalls flag actually disables all builtin optzns.
+  if (DisableSimplifyLibCalls)
+    TLI->disableAllFunctions();
+  Passes.add(TLI);
+  
+  // Add an appropriate TargetData instance for this module.
+  TargetData *TD = 0;
+  const std::string &ModuleDataLayout = M.get()->getDataLayout();
+  if (!ModuleDataLayout.empty())
+    TD = new TargetData(ModuleDataLayout);
+  else if (!DefaultDataLayout.empty())
+    TD = new TargetData(DefaultDataLayout);
+
+  if (TD)
+    Passes.add(TD);
+
+  OwningPtr<PassManager> FPasses;
+  if (OptLevelO1 || OptLevelO2 || OptLevelO3) {
+    FPasses.reset(new PassManager());
+    if (TD)
+      FPasses->add(new TargetData(*TD));
+  }
+
+  if (PrintBreakpoints) {
+    // Default to standard output.
+    if (!Out) {
+      if (OutputFilename.empty())
+        OutputFilename = "-";
+      
+      std::string ErrorInfo;
+      Out.reset(new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+                                     raw_fd_ostream::F_Binary));
+      if (!ErrorInfo.empty()) {
+        errs() << ErrorInfo << '\n';
+        return 1;
+      }
+    }
+    Passes.add(new BreakpointPrinter(Out->os()));
+    NoOutput = true;
+  }
+
+  // If the -strip-debug command line option was specified, add it.  If
+  // -std-compile-opts was also specified, it will handle StripDebug.
+  if (StripDebug && !StandardCompileOpts)
+    addPass(Passes, createStripSymbolsPass(true));
+
+  // Create a new optimization pass for each one specified on the command line
+  for (unsigned i = 0; i < PassList.size(); ++i) {
+    // Check to see if -std-compile-opts was specified before this option.  If
+    // so, handle it.
+    if (StandardCompileOpts &&
+        StandardCompileOpts.getPosition() < PassList.getPosition(i)) {
+      AddStandardCompilePasses(Passes);
+      StandardCompileOpts = false;
+    }
+
+    if (StandardLinkOpts &&
+        StandardLinkOpts.getPosition() < PassList.getPosition(i)) {
+      AddStandardLinkPasses(Passes);
+      StandardLinkOpts = false;
+    }
+
+    if (OptLevelO1 && OptLevelO1.getPosition() < PassList.getPosition(i)) {
+      AddOptimizationPasses(Passes, *FPasses, 1);
+      OptLevelO1 = false;
+    }
+
+    if (OptLevelO2 && OptLevelO2.getPosition() < PassList.getPosition(i)) {
+      AddOptimizationPasses(Passes, *FPasses, 2);
+      OptLevelO2 = false;
+    }
+
+    if (OptLevelO3 && OptLevelO3.getPosition() < PassList.getPosition(i)) {
+      AddOptimizationPasses(Passes, *FPasses, 3);
+      OptLevelO3 = false;
+    }
+
+    const PassInfo *PassInf = PassList[i];
+    Pass *P = 0;
+    if (PassInf->getNormalCtor())
+      P = PassInf->getNormalCtor()();
+    else
+      errs() << argv[0] << ": cannot create pass: "
+             << PassInf->getPassName() << "\n";
+    if (P) {
+      PassKind Kind = P->getPassKind();
+      addPass(Passes, P);
+
+      if (AnalyzeOnly) {
+        switch (Kind) {
+        case PT_BasicBlock:
+          Passes.add(new BasicBlockPassPrinter(PassInf, Out->os()));
+          break;
+        case PT_Region:
+          Passes.add(new RegionPassPrinter(PassInf, Out->os()));
+          break;
+        case PT_Loop:
+          Passes.add(new LoopPassPrinter(PassInf, Out->os()));
+          break;
+        case PT_Function:
+          Passes.add(new FunctionPassPrinter(PassInf, Out->os()));
+          break;
+        case PT_CallGraphSCC:
+          Passes.add(new CallGraphSCCPassPrinter(PassInf, Out->os()));
+          break;
+        default:
+          Passes.add(new ModulePassPrinter(PassInf, Out->os()));
+          break;
+        }
+      }
+    }
+
+    if (PrintEachXForm)
+      Passes.add(createPrintModulePass(&errs()));
+  }
+
+  // If -std-compile-opts was specified at the end of the pass list, add them.
+  if (StandardCompileOpts) {
+    AddStandardCompilePasses(Passes);
+    StandardCompileOpts = false;
+  }
+
+  if (StandardLinkOpts) {
+    AddStandardLinkPasses(Passes);
+    StandardLinkOpts = false;
+  }
+
+  if (OptLevelO1)
+    AddOptimizationPasses(Passes, *FPasses, 1);
+
+  if (OptLevelO2)
+    AddOptimizationPasses(Passes, *FPasses, 2);
+
+  if (OptLevelO3)
+    AddOptimizationPasses(Passes, *FPasses, 3);
+
+  if (OptLevelO1 || OptLevelO2 || OptLevelO3)
+    FPasses->run(*M.get());
+
+  // Check that the module is well formed on completion of optimization
+  if (!NoVerify && !VerifyEach)
+    Passes.add(createVerifierPass());
+
+  // Write bitcode or assembly to the output as the last step...
+  if (!NoOutput && !AnalyzeOnly) {
+    if (OutputAssembly)
+      Passes.add(createPrintModulePass(&Out->os()));
+    else
+      Passes.add(createBitcodeWriterPass(Out->os()));
+  }
+
+  // Now that we have all of the passes ready, run them.
+  Passes.run(*M.get());
+
+  // Declare success.
+  if (!NoOutput || PrintBreakpoints)
+    Out->keep();
+
+  return 0;
+}
diff --git a/final/unittests/ADT/APFloatTest.cpp b/final/unittests/ADT/APFloatTest.cpp
new file mode 100644
index 00000000000..964b04da473
--- /dev/null
+++ b/final/unittests/ADT/APFloatTest.cpp
@@ -0,0 +1,579 @@
+//===- llvm/unittest/ADT/APFloat.cpp - APFloat unit tests ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <ostream>
+#include <string>
+#include "llvm/Support/raw_ostream.h"
+#include "gtest/gtest.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+
+using namespace llvm;
+
+static double convertToDoubleFromString(const char *Str) {
+  llvm::APFloat F(0.0);
+  F.convertFromString(Str, llvm::APFloat::rmNearestTiesToEven);
+  return F.convertToDouble();
+}
+
+static std::string convertToString(double d, unsigned Prec, unsigned Pad) {
+  llvm::SmallVector<char, 100> Buffer;
+  llvm::APFloat F(d);
+  F.toString(Buffer, Prec, Pad);
+  return std::string(Buffer.data(), Buffer.size());
+}
+
+namespace {
+
+TEST(APFloatTest, Zero) {
+  EXPECT_EQ(0.0f,  APFloat(APFloat::IEEEsingle,  0.0f).convertToFloat());
+  EXPECT_EQ(-0.0f, APFloat(APFloat::IEEEsingle, -0.0f).convertToFloat());
+
+  EXPECT_EQ(0.0,  APFloat(APFloat::IEEEdouble,  0.0).convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, -0.0).convertToDouble());
+}
+
+TEST(APFloatTest, fromZeroDecimalString) {
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0.").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0.").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0.").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  ".0").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+.0").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-.0").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0.0").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0.0").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0.0").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "00000.").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+00000.").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-00000.").convertToDouble());
+
+  EXPECT_EQ(0.0,  APFloat(APFloat::IEEEdouble, ".00000").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+.00000").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-.00000").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0000.00000").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0000.00000").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0000.00000").convertToDouble());
+}
+
+TEST(APFloatTest, fromZeroDecimalSingleExponentString) {
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,   "0e1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble,  "+0e1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble,  "-0e1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0e+1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0e+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0e+1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0e-1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0e-1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0e-1").convertToDouble());
+
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,   "0.e1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble,  "+0.e1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble,  "-0.e1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0.e+1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0.e+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0.e+1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0.e-1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0.e-1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0.e-1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,   ".0e1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble,  "+.0e1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble,  "-.0e1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  ".0e+1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+.0e+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-.0e+1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  ".0e-1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+.0e-1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-.0e-1").convertToDouble());
+
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,   "0.0e1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble,  "+0.0e1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble,  "-0.0e1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0.0e+1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0.0e+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0.0e+1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0.0e-1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0.0e-1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0.0e-1").convertToDouble());
+
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "000.0000e1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+000.0000e+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-000.0000e+1").convertToDouble());
+}
+
+TEST(APFloatTest, fromZeroDecimalLargeExponentString) {
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0e1234").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0e1234").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0e1234").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0e+1234").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0e+1234").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0e+1234").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0e-1234").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0e-1234").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0e-1234").convertToDouble());
+
+  EXPECT_EQ(0.0,  APFloat(APFloat::IEEEdouble, "000.0000e1234").convertToDouble());
+  EXPECT_EQ(0.0,  APFloat(APFloat::IEEEdouble, "000.0000e-1234").convertToDouble());
+
+  EXPECT_EQ(0.0,  APFloat(APFloat::IEEEdouble, StringRef("0e1234\02", 6)).convertToDouble());
+}
+
+TEST(APFloatTest, fromZeroHexadecimalString) {
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0p1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0p1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0p1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0p+1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0p+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0p+1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0p-1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0p-1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0p-1").convertToDouble());
+
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0.p1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0.p1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0.p1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0.p+1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0.p+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0.p+1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0.p-1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0.p-1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0.p-1").convertToDouble());
+
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x.0p1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x.0p1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x.0p1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x.0p+1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x.0p+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x.0p+1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x.0p-1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x.0p-1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x.0p-1").convertToDouble());
+
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0.0p1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0.0p1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0.0p1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0.0p+1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0.0p+1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0.0p+1").convertToDouble());
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble,  "0x0.0p-1").convertToDouble());
+  EXPECT_EQ(+0.0, APFloat(APFloat::IEEEdouble, "+0x0.0p-1").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0.0p-1").convertToDouble());
+
+
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x00000.p1").convertToDouble());
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x0000.00000p1").convertToDouble());
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x.00000p1").convertToDouble());
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x0.p1").convertToDouble());
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x0p1234").convertToDouble());
+  EXPECT_EQ(-0.0, APFloat(APFloat::IEEEdouble, "-0x0p1234").convertToDouble());
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x00000.p1234").convertToDouble());
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x0000.00000p1234").convertToDouble());
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x.00000p1234").convertToDouble());
+  EXPECT_EQ( 0.0, APFloat(APFloat::IEEEdouble, "0x0.p1234").convertToDouble());
+}
+
+TEST(APFloatTest, fromDecimalString) {
+  EXPECT_EQ(1.0,      APFloat(APFloat::IEEEdouble, "1").convertToDouble());
+  EXPECT_EQ(2.0,      APFloat(APFloat::IEEEdouble, "2.").convertToDouble());
+  EXPECT_EQ(0.5,      APFloat(APFloat::IEEEdouble, ".5").convertToDouble());
+  EXPECT_EQ(1.0,      APFloat(APFloat::IEEEdouble, "1.0").convertToDouble());
+  EXPECT_EQ(-2.0,     APFloat(APFloat::IEEEdouble, "-2").convertToDouble());
+  EXPECT_EQ(-4.0,     APFloat(APFloat::IEEEdouble, "-4.").convertToDouble());
+  EXPECT_EQ(-0.5,     APFloat(APFloat::IEEEdouble, "-.5").convertToDouble());
+  EXPECT_EQ(-1.5,     APFloat(APFloat::IEEEdouble, "-1.5").convertToDouble());
+  EXPECT_EQ(1.25e12,  APFloat(APFloat::IEEEdouble, "1.25e12").convertToDouble());
+  EXPECT_EQ(1.25e+12, APFloat(APFloat::IEEEdouble, "1.25e+12").convertToDouble());
+  EXPECT_EQ(1.25e-12, APFloat(APFloat::IEEEdouble, "1.25e-12").convertToDouble());
+  EXPECT_EQ(1024.0,   APFloat(APFloat::IEEEdouble, "1024.").convertToDouble());
+  EXPECT_EQ(1024.05,  APFloat(APFloat::IEEEdouble, "1024.05000").convertToDouble());
+  EXPECT_EQ(0.05,     APFloat(APFloat::IEEEdouble, ".05000").convertToDouble());
+  EXPECT_EQ(2.0,      APFloat(APFloat::IEEEdouble, "2.").convertToDouble());
+  EXPECT_EQ(2.0e2,    APFloat(APFloat::IEEEdouble, "2.e2").convertToDouble());
+  EXPECT_EQ(2.0e+2,   APFloat(APFloat::IEEEdouble, "2.e+2").convertToDouble());
+  EXPECT_EQ(2.0e-2,   APFloat(APFloat::IEEEdouble, "2.e-2").convertToDouble());
+  EXPECT_EQ(2.05e2,    APFloat(APFloat::IEEEdouble, "002.05000e2").convertToDouble());
+  EXPECT_EQ(2.05e+2,   APFloat(APFloat::IEEEdouble, "002.05000e+2").convertToDouble());
+  EXPECT_EQ(2.05e-2,   APFloat(APFloat::IEEEdouble, "002.05000e-2").convertToDouble());
+  EXPECT_EQ(2.05e12,   APFloat(APFloat::IEEEdouble, "002.05000e12").convertToDouble());
+  EXPECT_EQ(2.05e+12,  APFloat(APFloat::IEEEdouble, "002.05000e+12").convertToDouble());
+  EXPECT_EQ(2.05e-12,  APFloat(APFloat::IEEEdouble, "002.05000e-12").convertToDouble());
+
+  // These are "carefully selected" to overflow the fast log-base
+  // calculations in APFloat.cpp
+  EXPECT_TRUE(APFloat(APFloat::IEEEdouble, "99e99999").isInfinity());
+  EXPECT_TRUE(APFloat(APFloat::IEEEdouble, "-99e99999").isInfinity());
+  EXPECT_TRUE(APFloat(APFloat::IEEEdouble, "1e-99999").isPosZero());
+  EXPECT_TRUE(APFloat(APFloat::IEEEdouble, "-1e-99999").isNegZero());
+}
+
+TEST(APFloatTest, fromHexadecimalString) {
+  EXPECT_EQ( 1.0, APFloat(APFloat::IEEEdouble,  "0x1p0").convertToDouble());
+  EXPECT_EQ(+1.0, APFloat(APFloat::IEEEdouble, "+0x1p0").convertToDouble());
+  EXPECT_EQ(-1.0, APFloat(APFloat::IEEEdouble, "-0x1p0").convertToDouble());
+
+  EXPECT_EQ( 1.0, APFloat(APFloat::IEEEdouble,  "0x1p+0").convertToDouble());
+  EXPECT_EQ(+1.0, APFloat(APFloat::IEEEdouble, "+0x1p+0").convertToDouble());
+  EXPECT_EQ(-1.0, APFloat(APFloat::IEEEdouble, "-0x1p+0").convertToDouble());
+
+  EXPECT_EQ( 1.0, APFloat(APFloat::IEEEdouble,  "0x1p-0").convertToDouble());
+  EXPECT_EQ(+1.0, APFloat(APFloat::IEEEdouble, "+0x1p-0").convertToDouble());
+  EXPECT_EQ(-1.0, APFloat(APFloat::IEEEdouble, "-0x1p-0").convertToDouble());
+
+
+  EXPECT_EQ( 2.0, APFloat(APFloat::IEEEdouble,  "0x1p1").convertToDouble());
+  EXPECT_EQ(+2.0, APFloat(APFloat::IEEEdouble, "+0x1p1").convertToDouble());
+  EXPECT_EQ(-2.0, APFloat(APFloat::IEEEdouble, "-0x1p1").convertToDouble());
+
+  EXPECT_EQ( 2.0, APFloat(APFloat::IEEEdouble,  "0x1p+1").convertToDouble());
+  EXPECT_EQ(+2.0, APFloat(APFloat::IEEEdouble, "+0x1p+1").convertToDouble());
+  EXPECT_EQ(-2.0, APFloat(APFloat::IEEEdouble, "-0x1p+1").convertToDouble());
+
+  EXPECT_EQ( 0.5, APFloat(APFloat::IEEEdouble,  "0x1p-1").convertToDouble());
+  EXPECT_EQ(+0.5, APFloat(APFloat::IEEEdouble, "+0x1p-1").convertToDouble());
+  EXPECT_EQ(-0.5, APFloat(APFloat::IEEEdouble, "-0x1p-1").convertToDouble());
+
+
+  EXPECT_EQ( 3.0, APFloat(APFloat::IEEEdouble,  "0x1.8p1").convertToDouble());
+  EXPECT_EQ(+3.0, APFloat(APFloat::IEEEdouble, "+0x1.8p1").convertToDouble());
+  EXPECT_EQ(-3.0, APFloat(APFloat::IEEEdouble, "-0x1.8p1").convertToDouble());
+
+  EXPECT_EQ( 3.0, APFloat(APFloat::IEEEdouble,  "0x1.8p+1").convertToDouble());
+  EXPECT_EQ(+3.0, APFloat(APFloat::IEEEdouble, "+0x1.8p+1").convertToDouble());
+  EXPECT_EQ(-3.0, APFloat(APFloat::IEEEdouble, "-0x1.8p+1").convertToDouble());
+
+  EXPECT_EQ( 0.75, APFloat(APFloat::IEEEdouble,  "0x1.8p-1").convertToDouble());
+  EXPECT_EQ(+0.75, APFloat(APFloat::IEEEdouble, "+0x1.8p-1").convertToDouble());
+  EXPECT_EQ(-0.75, APFloat(APFloat::IEEEdouble, "-0x1.8p-1").convertToDouble());
+
+
+  EXPECT_EQ( 8192.0, APFloat(APFloat::IEEEdouble,  "0x1000.000p1").convertToDouble());
+  EXPECT_EQ(+8192.0, APFloat(APFloat::IEEEdouble, "+0x1000.000p1").convertToDouble());
+  EXPECT_EQ(-8192.0, APFloat(APFloat::IEEEdouble, "-0x1000.000p1").convertToDouble());
+
+  EXPECT_EQ( 8192.0, APFloat(APFloat::IEEEdouble,  "0x1000.000p+1").convertToDouble());
+  EXPECT_EQ(+8192.0, APFloat(APFloat::IEEEdouble, "+0x1000.000p+1").convertToDouble());
+  EXPECT_EQ(-8192.0, APFloat(APFloat::IEEEdouble, "-0x1000.000p+1").convertToDouble());
+
+  EXPECT_EQ( 2048.0, APFloat(APFloat::IEEEdouble,  "0x1000.000p-1").convertToDouble());
+  EXPECT_EQ(+2048.0, APFloat(APFloat::IEEEdouble, "+0x1000.000p-1").convertToDouble());
+  EXPECT_EQ(-2048.0, APFloat(APFloat::IEEEdouble, "-0x1000.000p-1").convertToDouble());
+
+
+  EXPECT_EQ( 8192.0, APFloat(APFloat::IEEEdouble,  "0x1000p1").convertToDouble());
+  EXPECT_EQ(+8192.0, APFloat(APFloat::IEEEdouble, "+0x1000p1").convertToDouble());
+  EXPECT_EQ(-8192.0, APFloat(APFloat::IEEEdouble, "-0x1000p1").convertToDouble());
+
+  EXPECT_EQ( 8192.0, APFloat(APFloat::IEEEdouble,  "0x1000p+1").convertToDouble());
+  EXPECT_EQ(+8192.0, APFloat(APFloat::IEEEdouble, "+0x1000p+1").convertToDouble());
+  EXPECT_EQ(-8192.0, APFloat(APFloat::IEEEdouble, "-0x1000p+1").convertToDouble());
+
+  EXPECT_EQ( 2048.0, APFloat(APFloat::IEEEdouble,  "0x1000p-1").convertToDouble());
+  EXPECT_EQ(+2048.0, APFloat(APFloat::IEEEdouble, "+0x1000p-1").convertToDouble());
+  EXPECT_EQ(-2048.0, APFloat(APFloat::IEEEdouble, "-0x1000p-1").convertToDouble());
+
+
+  EXPECT_EQ( 16384.0, APFloat(APFloat::IEEEdouble,  "0x10p10").convertToDouble());
+  EXPECT_EQ(+16384.0, APFloat(APFloat::IEEEdouble, "+0x10p10").convertToDouble());
+  EXPECT_EQ(-16384.0, APFloat(APFloat::IEEEdouble, "-0x10p10").convertToDouble());
+
+  EXPECT_EQ( 16384.0, APFloat(APFloat::IEEEdouble,  "0x10p+10").convertToDouble());
+  EXPECT_EQ(+16384.0, APFloat(APFloat::IEEEdouble, "+0x10p+10").convertToDouble());
+  EXPECT_EQ(-16384.0, APFloat(APFloat::IEEEdouble, "-0x10p+10").convertToDouble());
+
+  EXPECT_EQ( 0.015625, APFloat(APFloat::IEEEdouble,  "0x10p-10").convertToDouble());
+  EXPECT_EQ(+0.015625, APFloat(APFloat::IEEEdouble, "+0x10p-10").convertToDouble());
+  EXPECT_EQ(-0.015625, APFloat(APFloat::IEEEdouble, "-0x10p-10").convertToDouble());
+
+  EXPECT_EQ(1.0625, APFloat(APFloat::IEEEdouble, "0x1.1p0").convertToDouble());
+  EXPECT_EQ(1.0, APFloat(APFloat::IEEEdouble, "0x1p0").convertToDouble());
+
+  EXPECT_EQ(2.71828, convertToDoubleFromString("2.71828"));
+}
+
+TEST(APFloatTest, toString) {
+  ASSERT_EQ("10", convertToString(10.0, 6, 3));
+  ASSERT_EQ("1.0E+1", convertToString(10.0, 6, 0));
+  ASSERT_EQ("10100", convertToString(1.01E+4, 5, 2));
+  ASSERT_EQ("1.01E+4", convertToString(1.01E+4, 4, 2));
+  ASSERT_EQ("1.01E+4", convertToString(1.01E+4, 5, 1));
+  ASSERT_EQ("0.0101", convertToString(1.01E-2, 5, 2));
+  ASSERT_EQ("0.0101", convertToString(1.01E-2, 4, 2));
+  ASSERT_EQ("1.01E-2", convertToString(1.01E-2, 5, 1));
+  ASSERT_EQ("0.7853981633974483", convertToString(0.78539816339744830961, 0, 3));
+  ASSERT_EQ("4.940656458412465E-324", convertToString(4.9406564584124654e-324, 0, 3));
+  ASSERT_EQ("873.1834", convertToString(873.1834, 0, 1));
+  ASSERT_EQ("8.731834E+2", convertToString(873.1834, 0, 0));
+}
+
+static APInt nanbits(const fltSemantics &Sem,
+                     bool SNaN, bool Negative, uint64_t fill) {
+  APInt apfill(64, fill);
+  if (SNaN)
+    return APFloat::getSNaN(Sem, Negative, &apfill).bitcastToAPInt();
+  else
+    return APFloat::getQNaN(Sem, Negative, &apfill).bitcastToAPInt();
+}
+
+TEST(APFloatTest, makeNaN) {
+  ASSERT_EQ(0x7fc00000, nanbits(APFloat::IEEEsingle, false, false, 0));
+  ASSERT_EQ(0xffc00000, nanbits(APFloat::IEEEsingle, false, true, 0));
+  ASSERT_EQ(0x7fc0ae72, nanbits(APFloat::IEEEsingle, false, false, 0xae72));
+  ASSERT_EQ(0x7fffae72, nanbits(APFloat::IEEEsingle, false, false, 0xffffae72));
+  ASSERT_EQ(0x7fa00000, nanbits(APFloat::IEEEsingle, true, false, 0));
+  ASSERT_EQ(0xffa00000, nanbits(APFloat::IEEEsingle, true, true, 0));
+  ASSERT_EQ(0x7f80ae72, nanbits(APFloat::IEEEsingle, true, false, 0xae72));
+  ASSERT_EQ(0x7fbfae72, nanbits(APFloat::IEEEsingle, true, false, 0xffffae72));
+
+  ASSERT_EQ(0x7ff8000000000000ULL, nanbits(APFloat::IEEEdouble, false, false, 0));
+  ASSERT_EQ(0xfff8000000000000ULL, nanbits(APFloat::IEEEdouble, false, true, 0));
+  ASSERT_EQ(0x7ff800000000ae72ULL, nanbits(APFloat::IEEEdouble, false, false, 0xae72));
+  ASSERT_EQ(0x7fffffffffffae72ULL, nanbits(APFloat::IEEEdouble, false, false, 0xffffffffffffae72ULL));
+  ASSERT_EQ(0x7ff4000000000000ULL, nanbits(APFloat::IEEEdouble, true, false, 0));
+  ASSERT_EQ(0xfff4000000000000ULL, nanbits(APFloat::IEEEdouble, true, true, 0));
+  ASSERT_EQ(0x7ff000000000ae72ULL, nanbits(APFloat::IEEEdouble, true, false, 0xae72));
+  ASSERT_EQ(0x7ff7ffffffffae72ULL, nanbits(APFloat::IEEEdouble, true, false, 0xffffffffffffae72ULL));
+}
+
+#ifdef GTEST_HAS_DEATH_TEST
+#ifndef NDEBUG
+TEST(APFloatTest, SemanticsDeath) {
+  EXPECT_DEATH(APFloat(APFloat::IEEEsingle, 0.0f).convertToDouble(), "Float semantics are not IEEEdouble");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, 0.0 ).convertToFloat(),  "Float semantics are not IEEEsingle");
+}
+
+TEST(APFloatTest, StringDecimalDeath) {
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  ""), "Invalid string length");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+"), "String has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-"), "String has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("\0", 1)), "Invalid character in significand");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("1\0", 2)), "Invalid character in significand");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("1\02", 3)), "Invalid character in significand");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("1\02e1", 5)), "Invalid character in significand");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("1e\0", 3)), "Invalid character in exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("1e1\0", 4)), "Invalid character in exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("1e1\02", 5)), "Invalid character in exponent");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "1.0f"), "Invalid character in significand");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, ".."), "String contains multiple dots");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "..0"), "String contains multiple dots");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "1.0.0"), "String contains multiple dots");
+}
+
+TEST(APFloatTest, StringDecimalSignificandDeath) {
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "."), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+."), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-."), "Significand has no digits");
+
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "e"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+e"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-e"), "Significand has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "e1"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+e1"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-e1"), "Significand has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  ".e1"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+.e1"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-.e1"), "Significand has no digits");
+
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  ".e"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+.e"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-.e"), "Significand has no digits");
+}
+
+TEST(APFloatTest, StringDecimalExponentDeath) {
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,   "1e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "+1e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "-1e"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,   "1.e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "+1.e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "-1.e"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,   ".1e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "+.1e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "-.1e"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,   "1.1e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "+1.1e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "-1.1e"), "Exponent has no digits");
+
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "1e+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "1e-"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  ".1e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, ".1e+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, ".1e-"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "1.0e"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "1.0e+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "1.0e-"), "Exponent has no digits");
+}
+
+TEST(APFloatTest, StringHexadecimalDeath) {
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x"), "Invalid string");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x"), "Invalid string");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x"), "Invalid string");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x0"), "Hex strings require an exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x0"), "Hex strings require an exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x0"), "Hex strings require an exponent");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x0."), "Hex strings require an exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x0."), "Hex strings require an exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x0."), "Hex strings require an exponent");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x.0"), "Hex strings require an exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x.0"), "Hex strings require an exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x.0"), "Hex strings require an exponent");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x0.0"), "Hex strings require an exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x0.0"), "Hex strings require an exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x0.0"), "Hex strings require an exponent");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("0x\0", 3)), "Invalid character in significand");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("0x1\0", 4)), "Invalid character in significand");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("0x1\02", 5)), "Invalid character in significand");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("0x1\02p1", 7)), "Invalid character in significand");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("0x1p\0", 5)), "Invalid character in exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("0x1p1\0", 6)), "Invalid character in exponent");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, StringRef("0x1p1\02", 7)), "Invalid character in exponent");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "0x1p0f"), "Invalid character in exponent");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "0x..p1"), "String contains multiple dots");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "0x..0p1"), "String contains multiple dots");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "0x1.0.0p1"), "String contains multiple dots");
+}
+
+TEST(APFloatTest, StringHexadecimalSignificandDeath) {
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x."), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x."), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x."), "Significand has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0xp"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0xp"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0xp"), "Significand has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0xp+"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0xp+"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0xp+"), "Significand has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0xp-"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0xp-"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0xp-"), "Significand has no digits");
+
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x.p"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x.p"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x.p"), "Significand has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x.p+"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x.p+"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x.p+"), "Significand has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x.p-"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x.p-"), "Significand has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x.p-"), "Significand has no digits");
+}
+
+TEST(APFloatTest, StringHexadecimalExponentDeath) {
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1p"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1p"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1p"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1p+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1p+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1p+"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1p-"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1p-"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1p-"), "Exponent has no digits");
+
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1.p"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1.p"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1.p"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1.p+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1.p+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1.p+"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1.p-"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1.p-"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1.p-"), "Exponent has no digits");
+
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x.1p"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x.1p"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x.1p"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x.1p+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x.1p+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x.1p+"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x.1p-"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x.1p-"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x.1p-"), "Exponent has no digits");
+
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1.1p"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1.1p"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1.1p"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1.1p+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1.1p+"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1.1p+"), "Exponent has no digits");
+
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble,  "0x1.1p-"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "+0x1.1p-"), "Exponent has no digits");
+  EXPECT_DEATH(APFloat(APFloat::IEEEdouble, "-0x1.1p-"), "Exponent has no digits");
+}
+#endif
+#endif
+
+}
diff --git a/final/unittests/ADT/APIntTest.cpp b/final/unittests/ADT/APIntTest.cpp
new file mode 100644
index 00000000000..e05bdbfc710
--- /dev/null
+++ b/final/unittests/ADT/APIntTest.cpp
@@ -0,0 +1,369 @@
+//===- llvm/unittest/ADT/APInt.cpp - APInt unit tests ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <ostream>
+#include "gtest/gtest.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallString.h"
+
+using namespace llvm;
+
+namespace {
+
+// Test that APInt shift left works when bitwidth > 64 and shiftamt == 0
+TEST(APIntTest, ShiftLeftByZero) {
+  APInt One = APInt::getNullValue(65) + 1;
+  APInt Shl = One.shl(0);
+  EXPECT_TRUE(Shl[0]);
+  EXPECT_FALSE(Shl[1]);
+}
+
+TEST(APIntTest, i128_NegativeCount) {
+  APInt Minus3(128, static_cast<uint64_t>(-3), true);
+  EXPECT_EQ(126u, Minus3.countLeadingOnes());
+  EXPECT_EQ(-3, Minus3.getSExtValue());
+
+  APInt Minus1(128, static_cast<uint64_t>(-1), true);
+  EXPECT_EQ(0u, Minus1.countLeadingZeros());
+  EXPECT_EQ(128u, Minus1.countLeadingOnes());
+  EXPECT_EQ(128u, Minus1.getActiveBits());
+  EXPECT_EQ(0u, Minus1.countTrailingZeros());
+  EXPECT_EQ(128u, Minus1.countTrailingOnes());
+  EXPECT_EQ(128u, Minus1.countPopulation());
+  EXPECT_EQ(-1, Minus1.getSExtValue());
+}
+
+// XFAIL this test on FreeBSD where the system gcc-4.2.1 seems to miscompile it.
+#if defined(__llvm__) || !defined(__FreeBSD__)
+
+TEST(APIntTest, i33_Count) {
+  APInt i33minus2(33, static_cast<uint64_t>(-2), true);
+  EXPECT_EQ(0u, i33minus2.countLeadingZeros());
+  EXPECT_EQ(32u, i33minus2.countLeadingOnes());
+  EXPECT_EQ(33u, i33minus2.getActiveBits());
+  EXPECT_EQ(1u, i33minus2.countTrailingZeros());
+  EXPECT_EQ(32u, i33minus2.countPopulation());
+  EXPECT_EQ(-2, i33minus2.getSExtValue());
+  EXPECT_EQ(((uint64_t)-2)&((1ull<<33) -1), i33minus2.getZExtValue());
+}
+
+#endif
+
+TEST(APIntTest, i65_Count) {
+  APInt i65minus(65, 0, true);
+  i65minus.setBit(64);
+  EXPECT_EQ(0u, i65minus.countLeadingZeros());
+  EXPECT_EQ(1u, i65minus.countLeadingOnes());
+  EXPECT_EQ(65u, i65minus.getActiveBits());
+  EXPECT_EQ(64u, i65minus.countTrailingZeros());
+  EXPECT_EQ(1u, i65minus.countPopulation());
+}
+
+TEST(APIntTest, i128_PositiveCount) {
+  APInt u128max = APInt::getAllOnesValue(128);
+  EXPECT_EQ(128u, u128max.countLeadingOnes());
+  EXPECT_EQ(0u, u128max.countLeadingZeros());
+  EXPECT_EQ(128u, u128max.getActiveBits());
+  EXPECT_EQ(0u, u128max.countTrailingZeros());
+  EXPECT_EQ(128u, u128max.countTrailingOnes());
+  EXPECT_EQ(128u, u128max.countPopulation());
+
+  APInt u64max(128, static_cast<uint64_t>(-1), false);
+  EXPECT_EQ(64u, u64max.countLeadingZeros());
+  EXPECT_EQ(0u, u64max.countLeadingOnes());
+  EXPECT_EQ(64u, u64max.getActiveBits());
+  EXPECT_EQ(0u, u64max.countTrailingZeros());
+  EXPECT_EQ(64u, u64max.countTrailingOnes());
+  EXPECT_EQ(64u, u64max.countPopulation());
+  EXPECT_EQ((uint64_t)~0ull, u64max.getZExtValue());
+
+  APInt zero(128, 0, true);
+  EXPECT_EQ(128u, zero.countLeadingZeros());
+  EXPECT_EQ(0u, zero.countLeadingOnes());
+  EXPECT_EQ(0u, zero.getActiveBits());
+  EXPECT_EQ(128u, zero.countTrailingZeros());
+  EXPECT_EQ(0u, zero.countTrailingOnes());
+  EXPECT_EQ(0u, zero.countPopulation());
+  EXPECT_EQ(0u, zero.getSExtValue());
+  EXPECT_EQ(0u, zero.getZExtValue());
+
+  APInt one(128, 1, true);
+  EXPECT_EQ(127u, one.countLeadingZeros());
+  EXPECT_EQ(0u, one.countLeadingOnes());
+  EXPECT_EQ(1u, one.getActiveBits());
+  EXPECT_EQ(0u, one.countTrailingZeros());
+  EXPECT_EQ(1u, one.countTrailingOnes());
+  EXPECT_EQ(1u, one.countPopulation());
+  EXPECT_EQ(1, one.getSExtValue());
+  EXPECT_EQ(1u, one.getZExtValue());
+}
+
+TEST(APIntTest, i1) {
+  const APInt neg_two(1, static_cast<uint64_t>(-2), true);
+  const APInt neg_one(1, static_cast<uint64_t>(-1), true);
+  const APInt zero(1, 0);
+  const APInt one(1, 1);
+  const APInt two(1, 2);
+
+  EXPECT_EQ(0, neg_two.getSExtValue());
+  EXPECT_EQ(-1, neg_one.getSExtValue());
+  EXPECT_EQ(1u, neg_one.getZExtValue());
+  EXPECT_EQ(0u, zero.getZExtValue());
+  EXPECT_EQ(-1, one.getSExtValue());
+  EXPECT_EQ(1u, one.getZExtValue());
+  EXPECT_EQ(0u, two.getZExtValue());
+  EXPECT_EQ(0, two.getSExtValue());
+
+  // Basic equalities for 1-bit values.
+  EXPECT_EQ(zero, two);
+  EXPECT_EQ(zero, neg_two);
+  EXPECT_EQ(one, neg_one);
+  EXPECT_EQ(two, neg_two);
+
+  // Additions.
+  EXPECT_EQ(two, one + one);
+  EXPECT_EQ(zero, neg_one + one);
+  EXPECT_EQ(neg_two, neg_one + neg_one);
+
+  // Subtractions.
+  EXPECT_EQ(neg_two, neg_one - one);
+  EXPECT_EQ(two, one - neg_one);
+  EXPECT_EQ(zero, one - one);
+
+  // Shifts.
+  EXPECT_EQ(zero, one << one);
+  EXPECT_EQ(one, one << zero);
+  EXPECT_EQ(zero, one.shl(1));
+  EXPECT_EQ(one, one.shl(0));
+  EXPECT_EQ(zero, one.lshr(1));
+  EXPECT_EQ(zero, one.ashr(1));
+
+  // Multiplies.
+  EXPECT_EQ(neg_one, neg_one * one);
+  EXPECT_EQ(neg_one, one * neg_one);
+  EXPECT_EQ(one, neg_one * neg_one);
+  EXPECT_EQ(one, one * one);
+
+  // Divides.
+  EXPECT_EQ(neg_one, one.sdiv(neg_one));
+  EXPECT_EQ(neg_one, neg_one.sdiv(one));
+  EXPECT_EQ(one, neg_one.sdiv(neg_one));
+  EXPECT_EQ(one, one.sdiv(one));
+
+  EXPECT_EQ(neg_one, one.udiv(neg_one));
+  EXPECT_EQ(neg_one, neg_one.udiv(one));
+  EXPECT_EQ(one, neg_one.udiv(neg_one));
+  EXPECT_EQ(one, one.udiv(one));
+
+  // Remainders.
+  EXPECT_EQ(zero, neg_one.srem(one));
+  EXPECT_EQ(zero, neg_one.urem(one));
+  EXPECT_EQ(zero, one.srem(neg_one));
+}
+
+TEST(APIntTest, fromString) {
+  EXPECT_EQ(APInt(32, 0), APInt(32,   "0", 2));
+  EXPECT_EQ(APInt(32, 1), APInt(32,   "1", 2));
+  EXPECT_EQ(APInt(32, 2), APInt(32,  "10", 2));
+  EXPECT_EQ(APInt(32, 3), APInt(32,  "11", 2));
+  EXPECT_EQ(APInt(32, 4), APInt(32, "100", 2));
+
+  EXPECT_EQ(APInt(32, 0), APInt(32,   "+0", 2));
+  EXPECT_EQ(APInt(32, 1), APInt(32,   "+1", 2));
+  EXPECT_EQ(APInt(32, 2), APInt(32,  "+10", 2));
+  EXPECT_EQ(APInt(32, 3), APInt(32,  "+11", 2));
+  EXPECT_EQ(APInt(32, 4), APInt(32, "+100", 2));
+
+  EXPECT_EQ(APInt(32, uint64_t(-0LL)), APInt(32,   "-0", 2));
+  EXPECT_EQ(APInt(32, uint64_t(-1LL)), APInt(32,   "-1", 2));
+  EXPECT_EQ(APInt(32, uint64_t(-2LL)), APInt(32,  "-10", 2));
+  EXPECT_EQ(APInt(32, uint64_t(-3LL)), APInt(32,  "-11", 2));
+  EXPECT_EQ(APInt(32, uint64_t(-4LL)), APInt(32, "-100", 2));
+
+
+  EXPECT_EQ(APInt(32,  0), APInt(32,  "0",  8));
+  EXPECT_EQ(APInt(32,  1), APInt(32,  "1",  8));
+  EXPECT_EQ(APInt(32,  7), APInt(32,  "7",  8));
+  EXPECT_EQ(APInt(32,  8), APInt(32,  "10", 8));
+  EXPECT_EQ(APInt(32, 15), APInt(32,  "17", 8));
+  EXPECT_EQ(APInt(32, 16), APInt(32,  "20", 8));
+
+  EXPECT_EQ(APInt(32,  +0), APInt(32,  "+0",  8));
+  EXPECT_EQ(APInt(32,  +1), APInt(32,  "+1",  8));
+  EXPECT_EQ(APInt(32,  +7), APInt(32,  "+7",  8));
+  EXPECT_EQ(APInt(32,  +8), APInt(32,  "+10", 8));
+  EXPECT_EQ(APInt(32, +15), APInt(32,  "+17", 8));
+  EXPECT_EQ(APInt(32, +16), APInt(32,  "+20", 8));
+
+  EXPECT_EQ(APInt(32,  uint64_t(-0LL)), APInt(32,  "-0",  8));
+  EXPECT_EQ(APInt(32,  uint64_t(-1LL)), APInt(32,  "-1",  8));
+  EXPECT_EQ(APInt(32,  uint64_t(-7LL)), APInt(32,  "-7",  8));
+  EXPECT_EQ(APInt(32,  uint64_t(-8LL)), APInt(32,  "-10", 8));
+  EXPECT_EQ(APInt(32, uint64_t(-15LL)), APInt(32,  "-17", 8));
+  EXPECT_EQ(APInt(32, uint64_t(-16LL)), APInt(32,  "-20", 8));
+
+
+  EXPECT_EQ(APInt(32,  0), APInt(32,  "0", 10));
+  EXPECT_EQ(APInt(32,  1), APInt(32,  "1", 10));
+  EXPECT_EQ(APInt(32,  9), APInt(32,  "9", 10));
+  EXPECT_EQ(APInt(32, 10), APInt(32, "10", 10));
+  EXPECT_EQ(APInt(32, 19), APInt(32, "19", 10));
+  EXPECT_EQ(APInt(32, 20), APInt(32, "20", 10));
+
+  EXPECT_EQ(APInt(32,  uint64_t(-0LL)), APInt(32,  "-0", 10));
+  EXPECT_EQ(APInt(32,  uint64_t(-1LL)), APInt(32,  "-1", 10));
+  EXPECT_EQ(APInt(32,  uint64_t(-9LL)), APInt(32,  "-9", 10));
+  EXPECT_EQ(APInt(32, uint64_t(-10LL)), APInt(32, "-10", 10));
+  EXPECT_EQ(APInt(32, uint64_t(-19LL)), APInt(32, "-19", 10));
+  EXPECT_EQ(APInt(32, uint64_t(-20LL)), APInt(32, "-20", 10));
+
+
+  EXPECT_EQ(APInt(32,  0), APInt(32,  "0", 16));
+  EXPECT_EQ(APInt(32,  1), APInt(32,  "1", 16));
+  EXPECT_EQ(APInt(32, 15), APInt(32,  "F", 16));
+  EXPECT_EQ(APInt(32, 16), APInt(32, "10", 16));
+  EXPECT_EQ(APInt(32, 31), APInt(32, "1F", 16));
+  EXPECT_EQ(APInt(32, 32), APInt(32, "20", 16));
+
+  EXPECT_EQ(APInt(32,  uint64_t(-0LL)), APInt(32,  "-0", 16));
+  EXPECT_EQ(APInt(32,  uint64_t(-1LL)), APInt(32,  "-1", 16));
+  EXPECT_EQ(APInt(32, uint64_t(-15LL)), APInt(32,  "-F", 16));
+  EXPECT_EQ(APInt(32, uint64_t(-16LL)), APInt(32, "-10", 16));
+  EXPECT_EQ(APInt(32, uint64_t(-31LL)), APInt(32, "-1F", 16));
+  EXPECT_EQ(APInt(32, uint64_t(-32LL)), APInt(32, "-20", 16));
+}
+
+TEST(APIntTest, StringBitsNeeded2) {
+  EXPECT_EQ(1U, APInt::getBitsNeeded(  "0", 2));
+  EXPECT_EQ(1U, APInt::getBitsNeeded(  "1", 2));
+  EXPECT_EQ(2U, APInt::getBitsNeeded( "10", 2));
+  EXPECT_EQ(2U, APInt::getBitsNeeded( "11", 2));
+  EXPECT_EQ(3U, APInt::getBitsNeeded("100", 2));
+
+  EXPECT_EQ(1U, APInt::getBitsNeeded(  "+0", 2));
+  EXPECT_EQ(1U, APInt::getBitsNeeded(  "+1", 2));
+  EXPECT_EQ(2U, APInt::getBitsNeeded( "+10", 2));
+  EXPECT_EQ(2U, APInt::getBitsNeeded( "+11", 2));
+  EXPECT_EQ(3U, APInt::getBitsNeeded("+100", 2));
+
+  EXPECT_EQ(2U, APInt::getBitsNeeded(  "-0", 2));
+  EXPECT_EQ(2U, APInt::getBitsNeeded(  "-1", 2));
+  EXPECT_EQ(3U, APInt::getBitsNeeded( "-10", 2));
+  EXPECT_EQ(3U, APInt::getBitsNeeded( "-11", 2));
+  EXPECT_EQ(4U, APInt::getBitsNeeded("-100", 2));
+}
+
+TEST(APIntTest, StringBitsNeeded8) {
+  EXPECT_EQ(3U, APInt::getBitsNeeded( "0", 8));
+  EXPECT_EQ(3U, APInt::getBitsNeeded( "7", 8));
+  EXPECT_EQ(6U, APInt::getBitsNeeded("10", 8));
+  EXPECT_EQ(6U, APInt::getBitsNeeded("17", 8));
+  EXPECT_EQ(6U, APInt::getBitsNeeded("20", 8));
+
+  EXPECT_EQ(3U, APInt::getBitsNeeded( "+0", 8));
+  EXPECT_EQ(3U, APInt::getBitsNeeded( "+7", 8));
+  EXPECT_EQ(6U, APInt::getBitsNeeded("+10", 8));
+  EXPECT_EQ(6U, APInt::getBitsNeeded("+17", 8));
+  EXPECT_EQ(6U, APInt::getBitsNeeded("+20", 8));
+
+  EXPECT_EQ(4U, APInt::getBitsNeeded( "-0", 8));
+  EXPECT_EQ(4U, APInt::getBitsNeeded( "-7", 8));
+  EXPECT_EQ(7U, APInt::getBitsNeeded("-10", 8));
+  EXPECT_EQ(7U, APInt::getBitsNeeded("-17", 8));
+  EXPECT_EQ(7U, APInt::getBitsNeeded("-20", 8));
+}
+
+TEST(APIntTest, StringBitsNeeded10) {
+  EXPECT_EQ(1U, APInt::getBitsNeeded( "0", 10));
+  EXPECT_EQ(2U, APInt::getBitsNeeded( "3", 10));
+  EXPECT_EQ(4U, APInt::getBitsNeeded( "9", 10));
+  EXPECT_EQ(4U, APInt::getBitsNeeded("10", 10));
+  EXPECT_EQ(5U, APInt::getBitsNeeded("19", 10));
+  EXPECT_EQ(5U, APInt::getBitsNeeded("20", 10));
+
+  EXPECT_EQ(1U, APInt::getBitsNeeded( "+0", 10));
+  EXPECT_EQ(4U, APInt::getBitsNeeded( "+9", 10));
+  EXPECT_EQ(4U, APInt::getBitsNeeded("+10", 10));
+  EXPECT_EQ(5U, APInt::getBitsNeeded("+19", 10));
+  EXPECT_EQ(5U, APInt::getBitsNeeded("+20", 10));
+
+  EXPECT_EQ(2U, APInt::getBitsNeeded( "-0", 10));
+  EXPECT_EQ(5U, APInt::getBitsNeeded( "-9", 10));
+  EXPECT_EQ(5U, APInt::getBitsNeeded("-10", 10));
+  EXPECT_EQ(6U, APInt::getBitsNeeded("-19", 10));
+  EXPECT_EQ(6U, APInt::getBitsNeeded("-20", 10));
+}
+
+TEST(APIntTest, StringBitsNeeded16) {
+  EXPECT_EQ(4U, APInt::getBitsNeeded( "0", 16));
+  EXPECT_EQ(4U, APInt::getBitsNeeded( "F", 16));
+  EXPECT_EQ(8U, APInt::getBitsNeeded("10", 16));
+  EXPECT_EQ(8U, APInt::getBitsNeeded("1F", 16));
+  EXPECT_EQ(8U, APInt::getBitsNeeded("20", 16));
+
+  EXPECT_EQ(4U, APInt::getBitsNeeded( "+0", 16));
+  EXPECT_EQ(4U, APInt::getBitsNeeded( "+F", 16));
+  EXPECT_EQ(8U, APInt::getBitsNeeded("+10", 16));
+  EXPECT_EQ(8U, APInt::getBitsNeeded("+1F", 16));
+  EXPECT_EQ(8U, APInt::getBitsNeeded("+20", 16));
+
+  EXPECT_EQ(5U, APInt::getBitsNeeded( "-0", 16));
+  EXPECT_EQ(5U, APInt::getBitsNeeded( "-F", 16));
+  EXPECT_EQ(9U, APInt::getBitsNeeded("-10", 16));
+  EXPECT_EQ(9U, APInt::getBitsNeeded("-1F", 16));
+  EXPECT_EQ(9U, APInt::getBitsNeeded("-20", 16));
+}
+
+TEST(APIntTest, Log2) {
+  EXPECT_EQ(APInt(15, 7).logBase2(), 2U);
+  EXPECT_EQ(APInt(15, 7).ceilLogBase2(), 3U);
+  EXPECT_EQ(APInt(15, 7).exactLogBase2(), -1);
+  EXPECT_EQ(APInt(15, 8).logBase2(), 3U);
+  EXPECT_EQ(APInt(15, 8).ceilLogBase2(), 3U);
+  EXPECT_EQ(APInt(15, 8).exactLogBase2(), 3);
+  EXPECT_EQ(APInt(15, 9).logBase2(), 3U);
+  EXPECT_EQ(APInt(15, 9).ceilLogBase2(), 4U);
+  EXPECT_EQ(APInt(15, 9).exactLogBase2(), -1);
+}
+
+TEST(APIntTest, magic) {
+  EXPECT_EQ(APInt(32, 3).magic().m, APInt(32, "55555556", 16));
+  EXPECT_EQ(APInt(32, 3).magic().s, 0U);
+  EXPECT_EQ(APInt(32, 5).magic().m, APInt(32, "66666667", 16));
+  EXPECT_EQ(APInt(32, 5).magic().s, 1U);
+  EXPECT_EQ(APInt(32, 7).magic().m, APInt(32, "92492493", 16));
+  EXPECT_EQ(APInt(32, 7).magic().s, 2U);
+}
+
+TEST(APIntTest, magicu) {
+  EXPECT_EQ(APInt(32, 3).magicu().m, APInt(32, "AAAAAAAB", 16));
+  EXPECT_EQ(APInt(32, 3).magicu().s, 1U);
+  EXPECT_EQ(APInt(32, 5).magicu().m, APInt(32, "CCCCCCCD", 16));
+  EXPECT_EQ(APInt(32, 5).magicu().s, 2U);
+  EXPECT_EQ(APInt(32, 7).magicu().m, APInt(32, "24924925", 16));
+  EXPECT_EQ(APInt(32, 7).magicu().s, 3U);
+}
+
+#ifdef GTEST_HAS_DEATH_TEST
+#ifndef NDEBUG
+TEST(APIntTest, StringDeath) {
+  EXPECT_DEATH(APInt(0, "", 0), "Bitwidth too small");
+  EXPECT_DEATH(APInt(32, "", 0), "Invalid string length");
+  EXPECT_DEATH(APInt(32, "0", 0), "Radix should be 2, 8, 10, or 16!");
+  EXPECT_DEATH(APInt(32, "", 10), "Invalid string length");
+  EXPECT_DEATH(APInt(32, "-", 10), "String is only a sign, needs a value.");
+  EXPECT_DEATH(APInt(1, "1234", 10), "Insufficient bit width");
+  EXPECT_DEATH(APInt(32, "\0", 10), "Invalid string length");
+  EXPECT_DEATH(APInt(32, StringRef("1\02", 3), 10), "Invalid character in digit string");
+  EXPECT_DEATH(APInt(32, "1L", 10), "Invalid character in digit string");
+}
+#endif
+#endif
+
+}
diff --git a/final/unittests/ADT/BitVectorTest.cpp b/final/unittests/ADT/BitVectorTest.cpp
new file mode 100644
index 00000000000..fa663121a8a
--- /dev/null
+++ b/final/unittests/ADT/BitVectorTest.cpp
@@ -0,0 +1,201 @@
+//===- llvm/unittest/ADT/BitVectorTest.cpp - BitVector tests --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Some of these tests fail on PowerPC for unknown reasons.
+#ifndef __ppc__
+
+#include "llvm/ADT/BitVector.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(BitVectorTest, TrivialOperation) {
+  BitVector Vec;
+  EXPECT_EQ(0U, Vec.count());
+  EXPECT_EQ(0U, Vec.size());
+  EXPECT_FALSE(Vec.any());
+  EXPECT_TRUE(Vec.all());
+  EXPECT_TRUE(Vec.none());
+  EXPECT_TRUE(Vec.empty());
+
+  Vec.resize(5, true);
+  EXPECT_EQ(5U, Vec.count());
+  EXPECT_EQ(5U, Vec.size());
+  EXPECT_TRUE(Vec.any());
+  EXPECT_TRUE(Vec.all());
+  EXPECT_FALSE(Vec.none());
+  EXPECT_FALSE(Vec.empty());
+
+  Vec.resize(11);
+  EXPECT_EQ(5U, Vec.count());
+  EXPECT_EQ(11U, Vec.size());
+  EXPECT_TRUE(Vec.any());
+  EXPECT_FALSE(Vec.all());
+  EXPECT_FALSE(Vec.none());
+  EXPECT_FALSE(Vec.empty());
+
+  BitVector Inv = ~Vec;
+  EXPECT_EQ(6U, Inv.count());
+  EXPECT_EQ(11U, Inv.size());
+  EXPECT_TRUE(Inv.any());
+  EXPECT_FALSE(Inv.all());
+  EXPECT_FALSE(Inv.none());
+  EXPECT_FALSE(Inv.empty());
+
+  EXPECT_FALSE(Inv == Vec);
+  EXPECT_TRUE(Inv != Vec);
+  Vec = ~Vec;
+  EXPECT_TRUE(Inv == Vec);
+  EXPECT_FALSE(Inv != Vec);
+
+  // Add some "interesting" data to Vec.
+  Vec.resize(23, true);
+  Vec.resize(25, false);
+  Vec.resize(26, true);
+  Vec.resize(29, false);
+  Vec.resize(33, true);
+  Vec.resize(57, false);
+  unsigned Count = 0;
+  for (unsigned i = Vec.find_first(); i != -1u; i = Vec.find_next(i)) {
+    ++Count;
+    EXPECT_TRUE(Vec[i]);
+    EXPECT_TRUE(Vec.test(i));
+  }
+  EXPECT_EQ(Count, Vec.count());
+  EXPECT_EQ(Count, 23u);
+  EXPECT_FALSE(Vec[0]);
+  EXPECT_TRUE(Vec[32]);
+  EXPECT_FALSE(Vec[56]);
+  Vec.resize(61, false);
+
+  BitVector Copy = Vec;
+  BitVector Alt(3, false);
+  Alt.resize(6, true);
+  std::swap(Alt, Vec);
+  EXPECT_TRUE(Copy == Alt);
+  EXPECT_TRUE(Vec.size() == 6);
+  EXPECT_TRUE(Vec.count() == 3);
+  EXPECT_TRUE(Vec.find_first() == 3);
+  std::swap(Copy, Vec);
+
+  // Add some more "interesting" data.
+  Vec.resize(68, true);
+  Vec.resize(78, false);
+  Vec.resize(89, true);
+  Vec.resize(90, false);
+  Vec.resize(91, true);
+  Vec.resize(130, false);
+  Count = 0;
+  for (unsigned i = Vec.find_first(); i != -1u; i = Vec.find_next(i)) {
+    ++Count;
+    EXPECT_TRUE(Vec[i]);
+    EXPECT_TRUE(Vec.test(i));
+  }
+  EXPECT_EQ(Count, Vec.count());
+  EXPECT_EQ(Count, 42u);
+  EXPECT_FALSE(Vec[0]);
+  EXPECT_TRUE(Vec[32]);
+  EXPECT_FALSE(Vec[60]);
+  EXPECT_FALSE(Vec[129]);
+
+  Vec.flip(60);
+  EXPECT_TRUE(Vec[60]);
+  EXPECT_EQ(Count + 1, Vec.count());
+  Vec.flip(60);
+  EXPECT_FALSE(Vec[60]);
+  EXPECT_EQ(Count, Vec.count());
+
+  Vec.reset(32);
+  EXPECT_FALSE(Vec[32]);
+  EXPECT_EQ(Count - 1, Vec.count());
+  Vec.set(32);
+  EXPECT_TRUE(Vec[32]);
+  EXPECT_EQ(Count, Vec.count());
+
+  Vec.flip();
+  EXPECT_EQ(Vec.size() - Count, Vec.count());
+
+  Vec.reset();
+  EXPECT_EQ(0U, Vec.count());
+  EXPECT_EQ(130U, Vec.size());
+  EXPECT_FALSE(Vec.any());
+  EXPECT_FALSE(Vec.all());
+  EXPECT_TRUE(Vec.none());
+  EXPECT_FALSE(Vec.empty());
+
+  Inv = ~BitVector();
+  EXPECT_EQ(0U, Inv.count());
+  EXPECT_EQ(0U, Inv.size());
+  EXPECT_FALSE(Inv.any());
+  EXPECT_TRUE(Inv.all());
+  EXPECT_TRUE(Inv.none());
+  EXPECT_TRUE(Inv.empty());
+
+  Vec.clear();
+  EXPECT_EQ(0U, Vec.count());
+  EXPECT_EQ(0U, Vec.size());
+  EXPECT_FALSE(Vec.any());
+  EXPECT_TRUE(Vec.all());
+  EXPECT_TRUE(Vec.none());
+  EXPECT_TRUE(Vec.empty());
+}
+
+TEST(BitVectorTest, CompoundAssignment) {
+  BitVector A;
+  A.resize(10);
+  A.set(4);
+  A.set(7);
+
+  BitVector B;
+  B.resize(50);
+  B.set(5);
+  B.set(18);
+
+  A |= B;
+  EXPECT_TRUE(A.test(4));
+  EXPECT_TRUE(A.test(5));
+  EXPECT_TRUE(A.test(7));
+  EXPECT_TRUE(A.test(18));
+  EXPECT_EQ(4U, A.count());
+  EXPECT_EQ(50U, A.size());
+
+  B.resize(10);
+  B.set();
+  B.reset(2);
+  B.reset(7);
+  A &= B;
+  EXPECT_FALSE(A.test(2));
+  EXPECT_FALSE(A.test(7));
+  EXPECT_EQ(2U, A.count());
+  EXPECT_EQ(50U, A.size());
+
+  B.resize(100);
+  B.set();
+
+  A ^= B;
+  EXPECT_TRUE(A.test(2));
+  EXPECT_TRUE(A.test(7));
+  EXPECT_EQ(98U, A.count());
+  EXPECT_EQ(100U, A.size());
+}
+
+TEST(BitVectorTest, ProxyIndex) {
+  BitVector Vec(3);
+  EXPECT_TRUE(Vec.none());
+  Vec[0] = Vec[1] = Vec[2] = true;
+  EXPECT_EQ(Vec.size(), Vec.count());
+  Vec[2] = Vec[1] = Vec[0] = false;
+  EXPECT_TRUE(Vec.none());
+}
+
+}
+
+#endif
diff --git a/final/unittests/ADT/DAGDeltaAlgorithmTest.cpp b/final/unittests/ADT/DAGDeltaAlgorithmTest.cpp
new file mode 100644
index 00000000000..b90e0c7fc23
--- /dev/null
+++ b/final/unittests/ADT/DAGDeltaAlgorithmTest.cpp
@@ -0,0 +1,122 @@
+//===- llvm/unittest/ADT/DAGDeltaAlgorithmTest.cpp ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/DAGDeltaAlgorithm.h"
+#include <algorithm>
+#include <cstdarg>
+using namespace llvm;
+
+namespace std {
+
+static std::ostream &operator<<(std::ostream &OS,
+                         const std::set<unsigned> &S) {
+  OS << "{";
+  for (std::set<unsigned>::const_iterator it = S.begin(),
+         ie = S.end(); it != ie; ++it) {
+    if (it != S.begin())
+      OS << ",";
+    OS << *it;
+  }
+  OS << "}";
+  return OS;
+}
+
+}
+
+namespace {
+
+typedef DAGDeltaAlgorithm::edge_ty edge_ty;
+
+class FixedDAGDeltaAlgorithm : public DAGDeltaAlgorithm {
+  changeset_ty FailingSet;
+  unsigned NumTests;
+
+protected:
+  virtual bool ExecuteOneTest(const changeset_ty &Changes) {
+    ++NumTests;
+    return std::includes(Changes.begin(), Changes.end(),
+                         FailingSet.begin(), FailingSet.end());
+  }
+
+public:
+  FixedDAGDeltaAlgorithm(const changeset_ty &_FailingSet)
+    : FailingSet(_FailingSet),
+      NumTests(0) {}
+
+  unsigned getNumTests() const { return NumTests; }
+};
+
+std::set<unsigned> fixed_set(unsigned N, ...) {
+  std::set<unsigned> S;
+  va_list ap;
+  va_start(ap, N);
+  for (unsigned i = 0; i != N; ++i)
+    S.insert(va_arg(ap, unsigned));
+  va_end(ap);
+  return S;
+}
+
+std::set<unsigned> range(unsigned Start, unsigned End) {
+  std::set<unsigned> S;
+  while (Start != End)
+    S.insert(Start++);
+  return S;
+}
+
+std::set<unsigned> range(unsigned N) {
+  return range(0, N);
+}
+
+TEST(DAGDeltaAlgorithmTest, Basic) {
+  std::vector<edge_ty> Deps;
+
+  // Dependencies:
+  //  1 - 3
+  Deps.clear();
+  Deps.push_back(std::make_pair(3, 1));
+
+  // P = {3,5,7} \in S,
+  //   [0, 20),
+  // should minimize to {1,3,5,7} in a reasonable number of tests.
+  FixedDAGDeltaAlgorithm FDA(fixed_set(3, 3, 5, 7));
+  EXPECT_EQ(fixed_set(4, 1, 3, 5, 7), FDA.Run(range(20), Deps));
+  EXPECT_GE(46U, FDA.getNumTests());
+
+  // Dependencies:
+  // 0 - 1
+  //  \- 2 - 3
+  //  \- 4
+  Deps.clear();
+  Deps.push_back(std::make_pair(1, 0));
+  Deps.push_back(std::make_pair(2, 0));
+  Deps.push_back(std::make_pair(4, 0));
+  Deps.push_back(std::make_pair(3, 2));
+
+  // This is a case where we must hold required changes.
+  //
+  // P = {1,3} \in S,
+  //   [0, 5),
+  // should minimize to {0,1,2,3} in a small number of tests.
+  FixedDAGDeltaAlgorithm FDA2(fixed_set(2, 1, 3));
+  EXPECT_EQ(fixed_set(4, 0, 1, 2, 3), FDA2.Run(range(5), Deps));
+  EXPECT_GE(9U, FDA2.getNumTests());
+
+  // This is a case where we should quickly prune part of the tree.
+  //
+  // P = {4} \in S,
+  //   [0, 5),
+  // should minimize to {0,4} in a small number of tests.
+  FixedDAGDeltaAlgorithm FDA3(fixed_set(1, 4));
+  EXPECT_EQ(fixed_set(2, 0, 4), FDA3.Run(range(5), Deps));
+  EXPECT_GE(6U, FDA3.getNumTests());
+}
+
+}
+
diff --git a/final/unittests/ADT/DeltaAlgorithmTest.cpp b/final/unittests/ADT/DeltaAlgorithmTest.cpp
new file mode 100644
index 00000000000..a1884cd8f3a
--- /dev/null
+++ b/final/unittests/ADT/DeltaAlgorithmTest.cpp
@@ -0,0 +1,100 @@
+//===- llvm/unittest/ADT/DeltaAlgorithmTest.cpp ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/DeltaAlgorithm.h"
+#include <algorithm>
+#include <cstdarg>
+using namespace llvm;
+
+namespace std {
+
+std::ostream &operator<<(std::ostream &OS,
+                         const std::set<unsigned> &S) {
+  OS << "{";
+  for (std::set<unsigned>::const_iterator it = S.begin(),
+         ie = S.end(); it != ie; ++it) {
+    if (it != S.begin())
+      OS << ",";
+    OS << *it;
+  }
+  OS << "}";
+  return OS;
+}
+
+}
+
+namespace {
+
+class FixedDeltaAlgorithm : public DeltaAlgorithm {
+  changeset_ty FailingSet;
+  unsigned NumTests;
+
+protected:
+  virtual bool ExecuteOneTest(const changeset_ty &Changes) {
+    ++NumTests;
+    return std::includes(Changes.begin(), Changes.end(),
+                         FailingSet.begin(), FailingSet.end());
+  }
+
+public:
+  FixedDeltaAlgorithm(const changeset_ty &_FailingSet)
+    : FailingSet(_FailingSet),
+      NumTests(0) {}
+
+  unsigned getNumTests() const { return NumTests; }
+};
+
+std::set<unsigned> fixed_set(unsigned N, ...) {
+  std::set<unsigned> S;
+  va_list ap;
+  va_start(ap, N);
+  for (unsigned i = 0; i != N; ++i)
+    S.insert(va_arg(ap, unsigned));
+  va_end(ap);
+  return S;
+}
+
+std::set<unsigned> range(unsigned Start, unsigned End) {
+  std::set<unsigned> S;
+  while (Start != End)
+    S.insert(Start++);
+  return S;
+}
+
+std::set<unsigned> range(unsigned N) {
+  return range(0, N);
+}
+
+TEST(DeltaAlgorithmTest, Basic) {
+  // P = {3,5,7} \in S
+  //   [0, 20) should minimize to {3,5,7} in a reasonable number of tests.
+  std::set<unsigned> Fails = fixed_set(3, 3, 5, 7);
+  FixedDeltaAlgorithm FDA(Fails);
+  EXPECT_EQ(fixed_set(3, 3, 5, 7), FDA.Run(range(20)));
+  EXPECT_GE(33U, FDA.getNumTests());
+
+  // P = {3,5,7} \in S
+  //   [10, 20) should minimize to [10,20)
+  EXPECT_EQ(range(10,20), FDA.Run(range(10,20)));
+
+  // P = [0,4) \in S
+  //   [0, 4) should minimize to [0,4) in 11 tests.
+  //
+  // 11 = |{ {},
+  //         {0}, {1}, {2}, {3},
+  //         {1, 2, 3}, {0, 2, 3}, {0, 1, 3}, {0, 1, 2}, 
+  //         {0, 1}, {2, 3} }|
+  FDA = FixedDeltaAlgorithm(range(10));
+  EXPECT_EQ(range(4), FDA.Run(range(4)));
+  EXPECT_EQ(11U, FDA.getNumTests());  
+}
+
+}
+
diff --git a/final/unittests/ADT/DenseMapTest.cpp b/final/unittests/ADT/DenseMapTest.cpp
new file mode 100644
index 00000000000..afac651a6b2
--- /dev/null
+++ b/final/unittests/ADT/DenseMapTest.cpp
@@ -0,0 +1,179 @@
+//===- llvm/unittest/ADT/DenseMapMap.cpp - DenseMap unit tests --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/DenseMap.h"
+
+using namespace llvm;
+
+namespace {
+
+// Test fixture
+class DenseMapTest : public testing::Test {
+protected:
+  DenseMap<uint32_t, uint32_t> uintMap;
+  DenseMap<uint32_t *, uint32_t *> uintPtrMap;
+  uint32_t dummyInt;
+};
+
+// Empty map tests
+TEST_F(DenseMapTest, EmptyIntMapTest) {
+  // Size tests
+  EXPECT_EQ(0u, uintMap.size());
+  EXPECT_TRUE(uintMap.empty());
+
+  // Iterator tests
+  EXPECT_TRUE(uintMap.begin() == uintMap.end());
+
+  // Lookup tests
+  EXPECT_FALSE(uintMap.count(0u));
+  EXPECT_TRUE(uintMap.find(0u) == uintMap.end());
+  EXPECT_EQ(0u, uintMap.lookup(0u));
+}
+
+// Empty map tests for pointer map
+TEST_F(DenseMapTest, EmptyPtrMapTest) {
+  // Size tests
+  EXPECT_EQ(0u, uintPtrMap.size());
+  EXPECT_TRUE(uintPtrMap.empty());
+
+  // Iterator tests
+  EXPECT_TRUE(uintPtrMap.begin() == uintPtrMap.end());
+
+  // Lookup tests
+  EXPECT_FALSE(uintPtrMap.count(&dummyInt));
+  EXPECT_TRUE(uintPtrMap.find(&dummyInt) == uintPtrMap.begin());
+  EXPECT_EQ(0, uintPtrMap.lookup(&dummyInt));
+}
+
+// Constant map tests
+TEST_F(DenseMapTest, ConstEmptyMapTest) {
+  const DenseMap<uint32_t, uint32_t> & constUintMap = uintMap;
+  const DenseMap<uint32_t *, uint32_t *> & constUintPtrMap = uintPtrMap;
+  EXPECT_EQ(0u, constUintMap.size());
+  EXPECT_EQ(0u, constUintPtrMap.size());
+  EXPECT_TRUE(constUintMap.empty());
+  EXPECT_TRUE(constUintPtrMap.empty());
+  EXPECT_TRUE(constUintMap.begin() == constUintMap.end());
+  EXPECT_TRUE(constUintPtrMap.begin() == constUintPtrMap.end());
+}
+
+// A map with a single entry
+TEST_F(DenseMapTest, SingleEntryMapTest) {
+  uintMap[0] = 1;
+
+  // Size tests
+  EXPECT_EQ(1u, uintMap.size());
+  EXPECT_FALSE(uintMap.begin() == uintMap.end());
+  EXPECT_FALSE(uintMap.empty());
+
+  // Iterator tests
+  DenseMap<uint32_t, uint32_t>::iterator it = uintMap.begin();
+  EXPECT_EQ(0u, it->first);
+  EXPECT_EQ(1u, it->second);
+  ++it;
+  EXPECT_TRUE(it == uintMap.end());
+
+  // Lookup tests
+  EXPECT_TRUE(uintMap.count(0u));
+  EXPECT_TRUE(uintMap.find(0u) == uintMap.begin());
+  EXPECT_EQ(1u, uintMap.lookup(0u));
+  EXPECT_EQ(1u, uintMap[0]);
+}
+
+// Test clear() method
+TEST_F(DenseMapTest, ClearTest) {
+  uintMap[0] = 1;
+  uintMap.clear();
+
+  EXPECT_EQ(0u, uintMap.size());
+  EXPECT_TRUE(uintMap.empty());
+  EXPECT_TRUE(uintMap.begin() == uintMap.end());
+}
+
+// Test erase(iterator) method
+TEST_F(DenseMapTest, EraseTest) {
+  uintMap[0] = 1;
+  uintMap.erase(uintMap.begin());
+
+  EXPECT_EQ(0u, uintMap.size());
+  EXPECT_TRUE(uintMap.empty());
+  EXPECT_TRUE(uintMap.begin() == uintMap.end());
+}
+
+// Test erase(value) method
+TEST_F(DenseMapTest, EraseTest2) {
+  uintMap[0] = 1;
+  uintMap.erase(0);
+
+  EXPECT_EQ(0u, uintMap.size());
+  EXPECT_TRUE(uintMap.empty());
+  EXPECT_TRUE(uintMap.begin() == uintMap.end());
+}
+
+// Test insert() method
+TEST_F(DenseMapTest, InsertTest) {
+  uintMap.insert(std::make_pair(0u, 1u));
+  EXPECT_EQ(1u, uintMap.size());
+  EXPECT_EQ(1u, uintMap[0]);
+}
+
+// Test copy constructor method
+TEST_F(DenseMapTest, CopyConstructorTest) {
+  uintMap[0] = 1;
+  DenseMap<uint32_t, uint32_t> copyMap(uintMap);
+
+  EXPECT_EQ(1u, copyMap.size());
+  EXPECT_EQ(1u, copyMap[0]);
+}
+
+// Test assignment operator method
+TEST_F(DenseMapTest, AssignmentTest) {
+  uintMap[0] = 1;
+  DenseMap<uint32_t, uint32_t> copyMap = uintMap;
+
+  EXPECT_EQ(1u, copyMap.size());
+  EXPECT_EQ(1u, copyMap[0]);
+}
+
+// A more complex iteration test
+TEST_F(DenseMapTest, IterationTest) {
+  bool visited[100];
+
+  // Insert 100 numbers into the map
+  for (int i = 0; i < 100; ++i) {
+    visited[i] = false;
+    uintMap[i] = 3;
+  }
+
+  // Iterate over all numbers and mark each one found.
+  for (DenseMap<uint32_t, uint32_t>::iterator it = uintMap.begin();
+      it != uintMap.end(); ++it) {
+    visited[it->first] = true;
+  }
+
+  // Ensure every number was visited.
+  for (int i = 0; i < 100; ++i) {
+    ASSERT_TRUE(visited[i]) << "Entry #" << i << " was never visited";
+  }
+}
+
+// const_iterator test
+TEST_F(DenseMapTest, ConstIteratorTest) {
+  // Check conversion from iterator to const_iterator.
+  DenseMap<uint32_t, uint32_t>::iterator it = uintMap.begin();
+  DenseMap<uint32_t, uint32_t>::const_iterator cit(it);
+  EXPECT_TRUE(it == cit);
+
+  // Check copying of const_iterators.
+  DenseMap<uint32_t, uint32_t>::const_iterator cit2(cit);
+  EXPECT_TRUE(cit == cit2);
+}
+
+}
diff --git a/final/unittests/ADT/DenseSetTest.cpp b/final/unittests/ADT/DenseSetTest.cpp
new file mode 100644
index 00000000000..7a35f521a19
--- /dev/null
+++ b/final/unittests/ADT/DenseSetTest.cpp
@@ -0,0 +1,30 @@
+//===- llvm/unittest/ADT/DenseSetTest.cpp - DenseSet unit tests --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include <llvm/ADT/DenseSet.h>
+
+using namespace llvm;
+
+namespace {
+
+// Test fixture
+class DenseSetTest : public testing::Test {
+};
+
+// Test hashing with a set of only two entries.
+TEST_F(DenseSetTest, DoubleEntrySetTest) {
+  llvm::DenseSet<unsigned> set(2);
+  set.insert(0);
+  set.insert(1);
+  // Original failure was an infinite loop in this call:
+  EXPECT_EQ(0, set.count(2));
+}
+
+}
diff --git a/final/unittests/ADT/FoldingSet.cpp b/final/unittests/ADT/FoldingSet.cpp
new file mode 100644
index 00000000000..a18a0df9571
--- /dev/null
+++ b/final/unittests/ADT/FoldingSet.cpp
@@ -0,0 +1,39 @@
+//===- llvm/unittest/ADT/FoldingSetTest.cpp -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// FoldingSet unit tests.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/FoldingSet.h"
+#include <string>
+
+using namespace llvm;
+
+namespace {
+
+// Unaligned string test.
+TEST(FoldingSetTest, UnalignedStringTest) {
+  SCOPED_TRACE("UnalignedStringTest");
+
+  FoldingSetNodeID a, b;
+  // An aligned string
+  std::string str1= "a test string";
+  a.AddString(str1);
+
+  // An unaligned string
+  std::string str2 = ">" + str1;
+  b.AddString(str2.c_str() + 1);
+
+  EXPECT_EQ(a.ComputeHash(), b.ComputeHash());
+}
+
+}
+
diff --git a/final/unittests/ADT/ImmutableSetTest.cpp b/final/unittests/ADT/ImmutableSetTest.cpp
new file mode 100644
index 00000000000..febd441db16
--- /dev/null
+++ b/final/unittests/ADT/ImmutableSetTest.cpp
@@ -0,0 +1,201 @@
+//===----------- ImmutableSetTest.cpp - ImmutableSet unit tests ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/ImmutableSet.h"
+
+using namespace llvm;
+
+namespace {
+class ImmutableSetTest : public testing::Test {
+protected:
+  // for callback tests
+  static char buffer[10];
+
+  struct MyIter {
+    int counter;
+    char *ptr;
+
+    MyIter() : counter(0), ptr(buffer) {
+      for (unsigned i=0; i<sizeof(buffer);++i) buffer[i]='\0';
+    }
+    void operator()(char c) {
+      *ptr++ = c;
+      ++counter;
+    }
+  };
+};
+char ImmutableSetTest::buffer[10];
+
+
+TEST_F(ImmutableSetTest, EmptyIntSetTest) {
+  ImmutableSet<int>::Factory f;
+
+  EXPECT_TRUE(f.getEmptySet() == f.getEmptySet());
+  EXPECT_FALSE(f.getEmptySet() != f.getEmptySet());
+  EXPECT_TRUE(f.getEmptySet().isEmpty());
+
+  ImmutableSet<int> S = f.getEmptySet();
+  EXPECT_EQ(0u, S.getHeight());
+  EXPECT_TRUE(S.begin() == S.end());
+  EXPECT_FALSE(S.begin() != S.end());
+}
+
+
+TEST_F(ImmutableSetTest, OneElemIntSetTest) {
+  ImmutableSet<int>::Factory f;
+  ImmutableSet<int> S = f.getEmptySet();
+
+  ImmutableSet<int> S2 = f.add(S, 3);
+  EXPECT_TRUE(S.isEmpty());
+  EXPECT_FALSE(S2.isEmpty());
+  EXPECT_FALSE(S == S2);
+  EXPECT_TRUE(S != S2);
+  EXPECT_FALSE(S.contains(3));
+  EXPECT_TRUE(S2.contains(3));
+  EXPECT_FALSE(S2.begin() == S2.end());
+  EXPECT_TRUE(S2.begin() != S2.end());
+
+  ImmutableSet<int> S3 = f.add(S, 2);
+  EXPECT_TRUE(S.isEmpty());
+  EXPECT_FALSE(S3.isEmpty());
+  EXPECT_FALSE(S == S3);
+  EXPECT_TRUE(S != S3);
+  EXPECT_FALSE(S.contains(2));
+  EXPECT_TRUE(S3.contains(2));
+
+  EXPECT_FALSE(S2 == S3);
+  EXPECT_TRUE(S2 != S3);
+  EXPECT_FALSE(S2.contains(2));
+  EXPECT_FALSE(S3.contains(3));
+}
+
+TEST_F(ImmutableSetTest, MultiElemIntSetTest) {
+  ImmutableSet<int>::Factory f;
+  ImmutableSet<int> S = f.getEmptySet();
+
+  ImmutableSet<int> S2 = f.add(f.add(f.add(S, 3), 4), 5);
+  ImmutableSet<int> S3 = f.add(f.add(f.add(S2, 9), 20), 43);
+  ImmutableSet<int> S4 = f.add(S2, 9);
+
+  EXPECT_TRUE(S.isEmpty());
+  EXPECT_FALSE(S2.isEmpty());
+  EXPECT_FALSE(S3.isEmpty());
+  EXPECT_FALSE(S4.isEmpty());
+
+  EXPECT_FALSE(S.contains(3));
+  EXPECT_FALSE(S.contains(9));
+
+  EXPECT_TRUE(S2.contains(3));
+  EXPECT_TRUE(S2.contains(4));
+  EXPECT_TRUE(S2.contains(5));
+  EXPECT_FALSE(S2.contains(9));
+  EXPECT_FALSE(S2.contains(0));
+
+  EXPECT_TRUE(S3.contains(43));
+  EXPECT_TRUE(S3.contains(20));
+  EXPECT_TRUE(S3.contains(9));
+  EXPECT_TRUE(S3.contains(3));
+  EXPECT_TRUE(S3.contains(4));
+  EXPECT_TRUE(S3.contains(5));
+  EXPECT_FALSE(S3.contains(0));
+
+  EXPECT_TRUE(S4.contains(9));
+  EXPECT_TRUE(S4.contains(3));
+  EXPECT_TRUE(S4.contains(4));
+  EXPECT_TRUE(S4.contains(5));
+  EXPECT_FALSE(S4.contains(20));
+  EXPECT_FALSE(S4.contains(43));
+}
+
+TEST_F(ImmutableSetTest, RemoveIntSetTest) {
+  ImmutableSet<int>::Factory f;
+  ImmutableSet<int> S = f.getEmptySet();
+
+  ImmutableSet<int> S2 = f.add(f.add(S, 4), 5);
+  ImmutableSet<int> S3 = f.add(S2, 3);
+  ImmutableSet<int> S4 = f.remove(S3, 3);
+
+  EXPECT_TRUE(S3.contains(3));
+  EXPECT_FALSE(S2.contains(3));
+  EXPECT_FALSE(S4.contains(3));
+
+  EXPECT_TRUE(S2 == S4);
+  EXPECT_TRUE(S3 != S2);
+  EXPECT_TRUE(S3 != S4);
+
+  EXPECT_TRUE(S3.contains(4));
+  EXPECT_TRUE(S3.contains(5));
+
+  EXPECT_TRUE(S4.contains(4));
+  EXPECT_TRUE(S4.contains(5));
+}
+
+TEST_F(ImmutableSetTest, CallbackCharSetTest) {
+  ImmutableSet<char>::Factory f;
+  ImmutableSet<char> S = f.getEmptySet();
+
+  ImmutableSet<char> S2 = f.add(f.add(f.add(S, 'a'), 'e'), 'i');
+  ImmutableSet<char> S3 = f.add(f.add(S2, 'o'), 'u');
+
+  S3.foreach<MyIter>();
+
+  ASSERT_STREQ("aeiou", buffer);
+}
+
+TEST_F(ImmutableSetTest, Callback2CharSetTest) {
+  ImmutableSet<char>::Factory f;
+  ImmutableSet<char> S = f.getEmptySet();
+
+  ImmutableSet<char> S2 = f.add(f.add(f.add(S, 'b'), 'c'), 'd');
+  ImmutableSet<char> S3 = f.add(f.add(f.add(S2, 'f'), 'g'), 'h');
+
+  MyIter obj;
+  S3.foreach<MyIter>(obj);
+  ASSERT_STREQ("bcdfgh", buffer);
+  ASSERT_EQ(6, obj.counter);
+
+  MyIter obj2;
+  S2.foreach<MyIter>(obj2);
+  ASSERT_STREQ("bcd", buffer);
+  ASSERT_EQ(3, obj2.counter);
+
+  MyIter obj3;
+  S.foreach<MyIter>(obj);
+  ASSERT_STREQ("", buffer);
+  ASSERT_EQ(0, obj3.counter);
+}
+
+TEST_F(ImmutableSetTest, IterLongSetTest) {
+  ImmutableSet<long>::Factory f;
+  ImmutableSet<long> S = f.getEmptySet();
+
+  ImmutableSet<long> S2 = f.add(f.add(f.add(S, 0), 1), 2);
+  ImmutableSet<long> S3 = f.add(f.add(f.add(S2, 3), 4), 5);
+
+  int i = 0;
+  for (ImmutableSet<long>::iterator I = S.begin(), E = S.end(); I != E; ++I) {
+    ASSERT_EQ(i++, *I);
+  }
+  ASSERT_EQ(0, i);
+
+  i = 0;
+  for (ImmutableSet<long>::iterator I = S2.begin(), E = S2.end(); I != E; ++I) {
+    ASSERT_EQ(i++, *I);
+  }
+  ASSERT_EQ(3, i);
+
+  i = 0;
+  for (ImmutableSet<long>::iterator I = S3.begin(), E = S3.end(); I != E; I++) {
+    ASSERT_EQ(i++, *I);
+  }
+  ASSERT_EQ(6, i);
+}
+
+}
diff --git a/final/unittests/ADT/IntEqClassesTest.cpp b/final/unittests/ADT/IntEqClassesTest.cpp
new file mode 100644
index 00000000000..fc908c1e8bf
--- /dev/null
+++ b/final/unittests/ADT/IntEqClassesTest.cpp
@@ -0,0 +1,107 @@
+//===---- ADT/IntEqClassesTest.cpp - IntEqClasses unit tests ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntEqClasses.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(IntEqClasses, Simple) {
+  IntEqClasses ec(10);
+
+  ec.join(0, 1);
+  ec.join(3, 2);
+  ec.join(4, 5);
+  ec.join(7, 6);
+
+  EXPECT_EQ(0u, ec.findLeader(0));
+  EXPECT_EQ(0u, ec.findLeader(1));
+  EXPECT_EQ(2u, ec.findLeader(2));
+  EXPECT_EQ(2u, ec.findLeader(3));
+  EXPECT_EQ(4u, ec.findLeader(4));
+  EXPECT_EQ(4u, ec.findLeader(5));
+  EXPECT_EQ(6u, ec.findLeader(6));
+  EXPECT_EQ(6u, ec.findLeader(7));
+  EXPECT_EQ(8u, ec.findLeader(8));
+  EXPECT_EQ(9u, ec.findLeader(9));
+
+  // join two non-leaders.
+  ec.join(1, 3);
+
+  EXPECT_EQ(0u, ec.findLeader(0));
+  EXPECT_EQ(0u, ec.findLeader(1));
+  EXPECT_EQ(0u, ec.findLeader(2));
+  EXPECT_EQ(0u, ec.findLeader(3));
+  EXPECT_EQ(4u, ec.findLeader(4));
+  EXPECT_EQ(4u, ec.findLeader(5));
+  EXPECT_EQ(6u, ec.findLeader(6));
+  EXPECT_EQ(6u, ec.findLeader(7));
+  EXPECT_EQ(8u, ec.findLeader(8));
+  EXPECT_EQ(9u, ec.findLeader(9));
+
+  // join two leaders.
+  ec.join(4, 8);
+
+  EXPECT_EQ(0u, ec.findLeader(0));
+  EXPECT_EQ(0u, ec.findLeader(1));
+  EXPECT_EQ(0u, ec.findLeader(2));
+  EXPECT_EQ(0u, ec.findLeader(3));
+  EXPECT_EQ(4u, ec.findLeader(4));
+  EXPECT_EQ(4u, ec.findLeader(5));
+  EXPECT_EQ(6u, ec.findLeader(6));
+  EXPECT_EQ(6u, ec.findLeader(7));
+  EXPECT_EQ(4u, ec.findLeader(8));
+  EXPECT_EQ(9u, ec.findLeader(9));
+
+  // join mixed.
+  ec.join(9, 1);
+
+  EXPECT_EQ(0u, ec.findLeader(0));
+  EXPECT_EQ(0u, ec.findLeader(1));
+  EXPECT_EQ(0u, ec.findLeader(2));
+  EXPECT_EQ(0u, ec.findLeader(3));
+  EXPECT_EQ(4u, ec.findLeader(4));
+  EXPECT_EQ(4u, ec.findLeader(5));
+  EXPECT_EQ(6u, ec.findLeader(6));
+  EXPECT_EQ(6u, ec.findLeader(7));
+  EXPECT_EQ(4u, ec.findLeader(8));
+  EXPECT_EQ(0u, ec.findLeader(9));
+
+  // compressed map.
+  ec.compress();
+  EXPECT_EQ(3u, ec.getNumClasses());
+
+  EXPECT_EQ(0u, ec[0]);
+  EXPECT_EQ(0u, ec[1]);
+  EXPECT_EQ(0u, ec[2]);
+  EXPECT_EQ(0u, ec[3]);
+  EXPECT_EQ(1u, ec[4]);
+  EXPECT_EQ(1u, ec[5]);
+  EXPECT_EQ(2u, ec[6]);
+  EXPECT_EQ(2u, ec[7]);
+  EXPECT_EQ(1u, ec[8]);
+  EXPECT_EQ(0u, ec[9]);
+
+  // uncompressed map.
+  ec.uncompress();
+  EXPECT_EQ(0u, ec.findLeader(0));
+  EXPECT_EQ(0u, ec.findLeader(1));
+  EXPECT_EQ(0u, ec.findLeader(2));
+  EXPECT_EQ(0u, ec.findLeader(3));
+  EXPECT_EQ(4u, ec.findLeader(4));
+  EXPECT_EQ(4u, ec.findLeader(5));
+  EXPECT_EQ(6u, ec.findLeader(6));
+  EXPECT_EQ(6u, ec.findLeader(7));
+  EXPECT_EQ(4u, ec.findLeader(8));
+  EXPECT_EQ(0u, ec.findLeader(9));
+}
+
+} // end anonymous namespace
diff --git a/final/unittests/ADT/IntervalMapTest.cpp b/final/unittests/ADT/IntervalMapTest.cpp
new file mode 100644
index 00000000000..b5556d265ae
--- /dev/null
+++ b/final/unittests/ADT/IntervalMapTest.cpp
@@ -0,0 +1,716 @@
+//===---- ADT/IntervalMapTest.cpp - IntervalMap unit tests ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/IntervalMap.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+typedef IntervalMap<unsigned, unsigned, 4> UUMap;
+
+// Empty map tests
+TEST(IntervalMapTest, EmptyMap) {
+  UUMap::Allocator allocator;
+  UUMap map(allocator);
+  EXPECT_TRUE(map.empty());
+
+  // Lookup on empty map.
+  EXPECT_EQ(0u, map.lookup(0));
+  EXPECT_EQ(7u, map.lookup(0, 7));
+  EXPECT_EQ(0u, map.lookup(~0u-1));
+  EXPECT_EQ(7u, map.lookup(~0u-1, 7));
+
+  // Iterators.
+  EXPECT_TRUE(map.begin() == map.begin());
+  EXPECT_TRUE(map.begin() == map.end());
+  EXPECT_TRUE(map.end() == map.end());
+  EXPECT_FALSE(map.begin() != map.begin());
+  EXPECT_FALSE(map.begin() != map.end());
+  EXPECT_FALSE(map.end() != map.end());
+  EXPECT_FALSE(map.begin().valid());
+  EXPECT_FALSE(map.end().valid());
+  UUMap::iterator I = map.begin();
+  EXPECT_FALSE(I.valid());
+  EXPECT_TRUE(I == map.end());
+
+  // Default constructor and cross-constness compares.
+  UUMap::const_iterator CI;
+  CI = map.begin();
+  EXPECT_TRUE(CI == I);
+  UUMap::iterator I2;
+  I2 = map.end();
+  EXPECT_TRUE(I2 == CI);
+}
+
+// Single entry map tests
+TEST(IntervalMapTest, SingleEntryMap) {
+  UUMap::Allocator allocator;
+  UUMap map(allocator);
+  map.insert(100, 150, 1);
+  EXPECT_FALSE(map.empty());
+
+  // Lookup around interval.
+  EXPECT_EQ(0u, map.lookup(0));
+  EXPECT_EQ(0u, map.lookup(99));
+  EXPECT_EQ(1u, map.lookup(100));
+  EXPECT_EQ(1u, map.lookup(101));
+  EXPECT_EQ(1u, map.lookup(125));
+  EXPECT_EQ(1u, map.lookup(149));
+  EXPECT_EQ(1u, map.lookup(150));
+  EXPECT_EQ(0u, map.lookup(151));
+  EXPECT_EQ(0u, map.lookup(200));
+  EXPECT_EQ(0u, map.lookup(~0u-1));
+
+  // Iterators.
+  EXPECT_TRUE(map.begin() == map.begin());
+  EXPECT_FALSE(map.begin() == map.end());
+  EXPECT_TRUE(map.end() == map.end());
+  EXPECT_TRUE(map.begin().valid());
+  EXPECT_FALSE(map.end().valid());
+
+  // Iter deref.
+  UUMap::iterator I = map.begin();
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(100u, I.start());
+  EXPECT_EQ(150u, I.stop());
+  EXPECT_EQ(1u, I.value());
+
+  // Preincrement.
+  ++I;
+  EXPECT_FALSE(I.valid());
+  EXPECT_FALSE(I == map.begin());
+  EXPECT_TRUE(I == map.end());
+
+  // PreDecrement.
+  --I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(100u, I.start());
+  EXPECT_EQ(150u, I.stop());
+  EXPECT_EQ(1u, I.value());
+  EXPECT_TRUE(I == map.begin());
+  EXPECT_FALSE(I == map.end());
+
+  // Change the value.
+  I.setValue(2);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(100u, I.start());
+  EXPECT_EQ(150u, I.stop());
+  EXPECT_EQ(2u, I.value());
+
+  // Grow the bounds.
+  I.setStart(0);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(0u, I.start());
+  EXPECT_EQ(150u, I.stop());
+  EXPECT_EQ(2u, I.value());
+
+  I.setStop(200);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(0u, I.start());
+  EXPECT_EQ(200u, I.stop());
+  EXPECT_EQ(2u, I.value());
+
+  // Shrink the bounds.
+  I.setStart(150);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(150u, I.start());
+  EXPECT_EQ(200u, I.stop());
+  EXPECT_EQ(2u, I.value());
+
+  I.setStop(160);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(150u, I.start());
+  EXPECT_EQ(160u, I.stop());
+  EXPECT_EQ(2u, I.value());
+
+  // Erase last elem.
+  I.erase();
+  EXPECT_TRUE(map.empty());
+  EXPECT_EQ(0, std::distance(map.begin(), map.end()));
+}
+
+// Flat coalescing tests.
+TEST(IntervalMapTest, RootCoalescing) {
+  UUMap::Allocator allocator;
+  UUMap map(allocator);
+  map.insert(100, 150, 1);
+
+  // Coalesce from the left.
+  map.insert(90, 99, 1);
+  EXPECT_EQ(1, std::distance(map.begin(), map.end()));
+  EXPECT_EQ(90u, map.start());
+  EXPECT_EQ(150u, map.stop());
+
+  // Coalesce from the right.
+  map.insert(151, 200, 1);
+  EXPECT_EQ(1, std::distance(map.begin(), map.end()));
+  EXPECT_EQ(90u, map.start());
+  EXPECT_EQ(200u, map.stop());
+
+  // Non-coalesce from the left.
+  map.insert(60, 89, 2);
+  EXPECT_EQ(2, std::distance(map.begin(), map.end()));
+  EXPECT_EQ(60u, map.start());
+  EXPECT_EQ(200u, map.stop());
+  EXPECT_EQ(2u, map.lookup(89));
+  EXPECT_EQ(1u, map.lookup(90));
+
+  UUMap::iterator I = map.begin();
+  EXPECT_EQ(60u, I.start());
+  EXPECT_EQ(89u, I.stop());
+  EXPECT_EQ(2u, I.value());
+  ++I;
+  EXPECT_EQ(90u, I.start());
+  EXPECT_EQ(200u, I.stop());
+  EXPECT_EQ(1u, I.value());
+  ++I;
+  EXPECT_FALSE(I.valid());
+
+  // Non-coalesce from the right.
+  map.insert(201, 210, 2);
+  EXPECT_EQ(3, std::distance(map.begin(), map.end()));
+  EXPECT_EQ(60u, map.start());
+  EXPECT_EQ(210u, map.stop());
+  EXPECT_EQ(2u, map.lookup(201));
+  EXPECT_EQ(1u, map.lookup(200));
+
+  // Erase from the left.
+  map.begin().erase();
+  EXPECT_EQ(2, std::distance(map.begin(), map.end()));
+  EXPECT_EQ(90u, map.start());
+  EXPECT_EQ(210u, map.stop());
+
+  // Erase from the right.
+  (--map.end()).erase();
+  EXPECT_EQ(1, std::distance(map.begin(), map.end()));
+  EXPECT_EQ(90u, map.start());
+  EXPECT_EQ(200u, map.stop());
+
+  // Add non-coalescing, then trigger coalescing with setValue.
+  map.insert(80, 89, 2);
+  map.insert(201, 210, 2);
+  EXPECT_EQ(3, std::distance(map.begin(), map.end()));
+  (++map.begin()).setValue(2);
+  EXPECT_EQ(1, std::distance(map.begin(), map.end()));
+  I = map.begin();
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(80u, I.start());
+  EXPECT_EQ(210u, I.stop());
+  EXPECT_EQ(2u, I.value());
+}
+
+// Flat multi-coalescing tests.
+TEST(IntervalMapTest, RootMultiCoalescing) {
+  UUMap::Allocator allocator;
+  UUMap map(allocator);
+  map.insert(140, 150, 1);
+  map.insert(160, 170, 1);
+  map.insert(100, 110, 1);
+  map.insert(120, 130, 1);
+  EXPECT_EQ(4, std::distance(map.begin(), map.end()));
+  EXPECT_EQ(100u, map.start());
+  EXPECT_EQ(170u, map.stop());
+
+  // Verify inserts.
+  UUMap::iterator I = map.begin();
+  EXPECT_EQ(100u, I.start());
+  EXPECT_EQ(110u, I.stop());
+  ++I;
+  EXPECT_EQ(120u, I.start());
+  EXPECT_EQ(130u, I.stop());
+  ++I;
+  EXPECT_EQ(140u, I.start());
+  EXPECT_EQ(150u, I.stop());
+  ++I;
+  EXPECT_EQ(160u, I.start());
+  EXPECT_EQ(170u, I.stop());
+  ++I;
+  EXPECT_FALSE(I.valid());
+
+  // Test advanceTo on flat tree.
+  I = map.begin();
+  I.advanceTo(135);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(140u, I.start());
+  EXPECT_EQ(150u, I.stop());
+
+  I.advanceTo(145);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(140u, I.start());
+  EXPECT_EQ(150u, I.stop());
+
+  I.advanceTo(200);
+  EXPECT_FALSE(I.valid());
+
+  I.advanceTo(300);
+  EXPECT_FALSE(I.valid());
+
+  // Coalesce left with followers.
+  // [100;110] [120;130] [140;150] [160;170]
+  map.insert(111, 115, 1);
+  I = map.begin();
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(100u, I.start());
+  EXPECT_EQ(115u, I.stop());
+  ++I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(120u, I.start());
+  EXPECT_EQ(130u, I.stop());
+  ++I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(140u, I.start());
+  EXPECT_EQ(150u, I.stop());
+  ++I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(160u, I.start());
+  EXPECT_EQ(170u, I.stop());
+  ++I;
+  EXPECT_FALSE(I.valid());
+
+  // Coalesce right with followers.
+  // [100;115] [120;130] [140;150] [160;170]
+  map.insert(135, 139, 1);
+  I = map.begin();
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(100u, I.start());
+  EXPECT_EQ(115u, I.stop());
+  ++I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(120u, I.start());
+  EXPECT_EQ(130u, I.stop());
+  ++I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(135u, I.start());
+  EXPECT_EQ(150u, I.stop());
+  ++I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(160u, I.start());
+  EXPECT_EQ(170u, I.stop());
+  ++I;
+  EXPECT_FALSE(I.valid());
+
+  // Coalesce left and right with followers.
+  // [100;115] [120;130] [135;150] [160;170]
+  map.insert(131, 134, 1);
+  I = map.begin();
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(100u, I.start());
+  EXPECT_EQ(115u, I.stop());
+  ++I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(120u, I.start());
+  EXPECT_EQ(150u, I.stop());
+  ++I;
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(160u, I.start());
+  EXPECT_EQ(170u, I.stop());
+  ++I;
+  EXPECT_FALSE(I.valid());
+
+  // Test clear() on non-branched map.
+  map.clear();
+  EXPECT_TRUE(map.empty());
+  EXPECT_TRUE(map.begin() == map.end());
+}
+
+// Branched, non-coalescing tests.
+TEST(IntervalMapTest, Branched) {
+  UUMap::Allocator allocator;
+  UUMap map(allocator);
+
+  // Insert enough intervals to force a branched tree.
+  // This creates 9 leaf nodes with 11 elements each, tree height = 1.
+  for (unsigned i = 1; i < 100; ++i) {
+    map.insert(10*i, 10*i+5, i);
+    EXPECT_EQ(10u, map.start());
+    EXPECT_EQ(10*i+5, map.stop());
+  }
+
+  // Tree limits.
+  EXPECT_FALSE(map.empty());
+  EXPECT_EQ(10u, map.start());
+  EXPECT_EQ(995u, map.stop());
+
+  // Tree lookup.
+  for (unsigned i = 1; i < 100; ++i) {
+    EXPECT_EQ(0u, map.lookup(10*i-1));
+    EXPECT_EQ(i, map.lookup(10*i));
+    EXPECT_EQ(i, map.lookup(10*i+5));
+    EXPECT_EQ(0u, map.lookup(10*i+6));
+  }
+
+  // Forward iteration.
+  UUMap::iterator I = map.begin();
+  for (unsigned i = 1; i < 100; ++i) {
+    ASSERT_TRUE(I.valid());
+    EXPECT_EQ(10*i, I.start());
+    EXPECT_EQ(10*i+5, I.stop());
+    EXPECT_EQ(i, *I);
+    ++I;
+  }
+  EXPECT_FALSE(I.valid());
+  EXPECT_TRUE(I == map.end());
+
+  // Backwards iteration.
+  for (unsigned i = 99; i; --i) {
+    --I;
+    ASSERT_TRUE(I.valid());
+    EXPECT_EQ(10*i, I.start());
+    EXPECT_EQ(10*i+5, I.stop());
+    EXPECT_EQ(i, *I);
+  }
+  EXPECT_TRUE(I == map.begin());
+
+  // Test advanceTo in same node.
+  I.advanceTo(20);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(20u, I.start());
+  EXPECT_EQ(25u, I.stop());
+
+  // Change value, no coalescing.
+  I.setValue(0);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(20u, I.start());
+  EXPECT_EQ(25u, I.stop());
+  EXPECT_EQ(0u, I.value());
+
+  // Close the gap right, no coalescing.
+  I.setStop(29);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(20u, I.start());
+  EXPECT_EQ(29u, I.stop());
+  EXPECT_EQ(0u, I.value());
+
+  // Change value, no coalescing.
+  I.setValue(2);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(20u, I.start());
+  EXPECT_EQ(29u, I.stop());
+  EXPECT_EQ(2u, I.value());
+
+  // Change value, now coalescing.
+  I.setValue(3);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(20u, I.start());
+  EXPECT_EQ(35u, I.stop());
+  EXPECT_EQ(3u, I.value());
+
+  // Close the gap, now coalescing.
+  I.setValue(4);
+  ASSERT_TRUE(I.valid());
+  I.setStop(39);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(20u, I.start());
+  EXPECT_EQ(45u, I.stop());
+  EXPECT_EQ(4u, I.value());
+
+  // advanceTo another node.
+  I.advanceTo(200);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(200u, I.start());
+  EXPECT_EQ(205u, I.stop());
+
+  // Close the gap left, no coalescing.
+  I.setStart(196);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(196u, I.start());
+  EXPECT_EQ(205u, I.stop());
+  EXPECT_EQ(20u, I.value());
+
+  // Change value, no coalescing.
+  I.setValue(0);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(196u, I.start());
+  EXPECT_EQ(205u, I.stop());
+  EXPECT_EQ(0u, I.value());
+
+  // Change value, now coalescing.
+  I.setValue(19);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(190u, I.start());
+  EXPECT_EQ(205u, I.stop());
+  EXPECT_EQ(19u, I.value());
+
+  // Close the gap, now coalescing.
+  I.setValue(18);
+  ASSERT_TRUE(I.valid());
+  I.setStart(186);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(180u, I.start());
+  EXPECT_EQ(205u, I.stop());
+  EXPECT_EQ(18u, I.value());
+
+  // Erase from the front.
+  I = map.begin();
+  for (unsigned i = 0; i != 20; ++i) {
+    I.erase();
+    EXPECT_TRUE(I == map.begin());
+    EXPECT_FALSE(map.empty());
+    EXPECT_EQ(I.start(), map.start());
+    EXPECT_EQ(995u, map.stop());
+  }
+
+  // Test clear() on branched map.
+  map.clear();
+  EXPECT_TRUE(map.empty());
+  EXPECT_TRUE(map.begin() == map.end());
+}
+
+// Branched, high, non-coalescing tests.
+TEST(IntervalMapTest, Branched2) {
+  UUMap::Allocator allocator;
+  UUMap map(allocator);
+
+  // Insert enough intervals to force a height >= 2 tree.
+  for (unsigned i = 1; i < 1000; ++i)
+    map.insert(10*i, 10*i+5, i);
+
+  // Tree limits.
+  EXPECT_FALSE(map.empty());
+  EXPECT_EQ(10u, map.start());
+  EXPECT_EQ(9995u, map.stop());
+
+  // Tree lookup.
+  for (unsigned i = 1; i < 1000; ++i) {
+    EXPECT_EQ(0u, map.lookup(10*i-1));
+    EXPECT_EQ(i, map.lookup(10*i));
+    EXPECT_EQ(i, map.lookup(10*i+5));
+    EXPECT_EQ(0u, map.lookup(10*i+6));
+  }
+
+  // Forward iteration.
+  UUMap::iterator I = map.begin();
+  for (unsigned i = 1; i < 1000; ++i) {
+    ASSERT_TRUE(I.valid());
+    EXPECT_EQ(10*i, I.start());
+    EXPECT_EQ(10*i+5, I.stop());
+    EXPECT_EQ(i, *I);
+    ++I;
+  }
+  EXPECT_FALSE(I.valid());
+  EXPECT_TRUE(I == map.end());
+
+  // Backwards iteration.
+  for (unsigned i = 999; i; --i) {
+    --I;
+    ASSERT_TRUE(I.valid());
+    EXPECT_EQ(10*i, I.start());
+    EXPECT_EQ(10*i+5, I.stop());
+    EXPECT_EQ(i, *I);
+  }
+  EXPECT_TRUE(I == map.begin());
+
+  // Test advanceTo in same node.
+  I.advanceTo(20);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(20u, I.start());
+  EXPECT_EQ(25u, I.stop());
+
+  // advanceTo sibling leaf node.
+  I.advanceTo(200);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(200u, I.start());
+  EXPECT_EQ(205u, I.stop());
+
+  // advanceTo further.
+  I.advanceTo(2000);
+  ASSERT_TRUE(I.valid());
+  EXPECT_EQ(2000u, I.start());
+  EXPECT_EQ(2005u, I.stop());
+
+  // advanceTo beyond end()
+  I.advanceTo(20000);
+  EXPECT_FALSE(I.valid());
+
+  // end().advanceTo() is valid as long as x > map.stop()
+  I.advanceTo(30000);
+  EXPECT_FALSE(I.valid());
+
+  // Test clear() on branched map.
+  map.clear();
+  EXPECT_TRUE(map.empty());
+  EXPECT_TRUE(map.begin() == map.end());
+}
+
+// Random insertions, coalescing to a single interval.
+TEST(IntervalMapTest, RandomCoalescing) {
+  UUMap::Allocator allocator;
+  UUMap map(allocator);
+
+  // This is a poor PRNG with maximal period:
+  // x_n = 5 x_{n-1} + 1 mod 2^N
+
+  unsigned x = 100;
+  for (unsigned i = 0; i != 4096; ++i) {
+    map.insert(10*x, 10*x+9, 1);
+    EXPECT_GE(10*x, map.start());
+    EXPECT_LE(10*x+9, map.stop());
+    x = (5*x+1)%4096;
+  }
+
+  // Map should be fully coalesced after that exercise.
+  EXPECT_FALSE(map.empty());
+  EXPECT_EQ(0u, map.start());
+  EXPECT_EQ(40959u, map.stop());
+  EXPECT_EQ(1, std::distance(map.begin(), map.end()));
+
+}
+
+TEST(IntervalMapOverlapsTest, SmallMaps) {
+  typedef IntervalMapOverlaps<UUMap,UUMap> UUOverlaps;
+  UUMap::Allocator allocator;
+  UUMap mapA(allocator);
+  UUMap mapB(allocator);
+
+  // empty, empty.
+  EXPECT_FALSE(UUOverlaps(mapA, mapB).valid());
+
+  mapA.insert(1, 2, 3);
+
+  // full, empty
+  EXPECT_FALSE(UUOverlaps(mapA, mapB).valid());
+  // empty, full
+  EXPECT_FALSE(UUOverlaps(mapB, mapA).valid());
+
+  mapB.insert(3, 4, 5);
+
+  // full, full, non-overlapping
+  EXPECT_FALSE(UUOverlaps(mapA, mapB).valid());
+  EXPECT_FALSE(UUOverlaps(mapB, mapA).valid());
+
+  // Add an overlapping segment.
+  mapA.insert(4, 5, 6);
+
+  UUOverlaps AB(mapA, mapB);
+  ASSERT_TRUE(AB.valid());
+  EXPECT_EQ(4u, AB.a().start());
+  EXPECT_EQ(3u, AB.b().start());
+  ++AB;
+  EXPECT_FALSE(AB.valid());
+
+  UUOverlaps BA(mapB, mapA);
+  ASSERT_TRUE(BA.valid());
+  EXPECT_EQ(3u, BA.a().start());
+  EXPECT_EQ(4u, BA.b().start());
+  // advance past end.
+  BA.advanceTo(6);
+  EXPECT_FALSE(BA.valid());
+  // advance an invalid iterator.
+  BA.advanceTo(7);
+  EXPECT_FALSE(BA.valid());
+}
+
+TEST(IntervalMapOverlapsTest, BigMaps) {
+  typedef IntervalMapOverlaps<UUMap,UUMap> UUOverlaps;
+  UUMap::Allocator allocator;
+  UUMap mapA(allocator);
+  UUMap mapB(allocator);
+
+  // [0;4] [10;14] [20;24] ...
+  for (unsigned n = 0; n != 100; ++n)
+    mapA.insert(10*n, 10*n+4, n);
+
+  // [5;6] [15;16] [25;26] ...
+  for (unsigned n = 10; n != 20; ++n)
+    mapB.insert(10*n+5, 10*n+6, n);
+
+  // [208;209] [218;219] ...
+  for (unsigned n = 20; n != 30; ++n)
+    mapB.insert(10*n+8, 10*n+9, n);
+
+  // insert some overlapping segments.
+  mapB.insert(400, 400, 400);
+  mapB.insert(401, 401, 401);
+  mapB.insert(402, 500, 402);
+  mapB.insert(600, 601, 402);
+
+  UUOverlaps AB(mapA, mapB);
+  ASSERT_TRUE(AB.valid());
+  EXPECT_EQ(400u, AB.a().start());
+  EXPECT_EQ(400u, AB.b().start());
+  ++AB;
+  ASSERT_TRUE(AB.valid());
+  EXPECT_EQ(400u, AB.a().start());
+  EXPECT_EQ(401u, AB.b().start());
+  ++AB;
+  ASSERT_TRUE(AB.valid());
+  EXPECT_EQ(400u, AB.a().start());
+  EXPECT_EQ(402u, AB.b().start());
+  ++AB;
+  ASSERT_TRUE(AB.valid());
+  EXPECT_EQ(410u, AB.a().start());
+  EXPECT_EQ(402u, AB.b().start());
+  ++AB;
+  ASSERT_TRUE(AB.valid());
+  EXPECT_EQ(420u, AB.a().start());
+  EXPECT_EQ(402u, AB.b().start());
+  AB.skipB();
+  ASSERT_TRUE(AB.valid());
+  EXPECT_EQ(600u, AB.a().start());
+  EXPECT_EQ(600u, AB.b().start());
+  ++AB;
+  EXPECT_FALSE(AB.valid());
+
+  // Test advanceTo.
+  UUOverlaps AB2(mapA, mapB);
+  AB2.advanceTo(410);
+  ASSERT_TRUE(AB2.valid());
+  EXPECT_EQ(410u, AB2.a().start());
+  EXPECT_EQ(402u, AB2.b().start());
+
+  // It is valid to advanceTo with any monotonic sequence.
+  AB2.advanceTo(411);
+  ASSERT_TRUE(AB2.valid());
+  EXPECT_EQ(410u, AB2.a().start());
+  EXPECT_EQ(402u, AB2.b().start());
+
+  // Check reversed maps.
+  UUOverlaps BA(mapB, mapA);
+  ASSERT_TRUE(BA.valid());
+  EXPECT_EQ(400u, BA.b().start());
+  EXPECT_EQ(400u, BA.a().start());
+  ++BA;
+  ASSERT_TRUE(BA.valid());
+  EXPECT_EQ(400u, BA.b().start());
+  EXPECT_EQ(401u, BA.a().start());
+  ++BA;
+  ASSERT_TRUE(BA.valid());
+  EXPECT_EQ(400u, BA.b().start());
+  EXPECT_EQ(402u, BA.a().start());
+  ++BA;
+  ASSERT_TRUE(BA.valid());
+  EXPECT_EQ(410u, BA.b().start());
+  EXPECT_EQ(402u, BA.a().start());
+  ++BA;
+  ASSERT_TRUE(BA.valid());
+  EXPECT_EQ(420u, BA.b().start());
+  EXPECT_EQ(402u, BA.a().start());
+  BA.skipA();
+  ASSERT_TRUE(BA.valid());
+  EXPECT_EQ(600u, BA.b().start());
+  EXPECT_EQ(600u, BA.a().start());
+  ++BA;
+  EXPECT_FALSE(BA.valid());
+
+  // Test advanceTo.
+  UUOverlaps BA2(mapB, mapA);
+  BA2.advanceTo(410);
+  ASSERT_TRUE(BA2.valid());
+  EXPECT_EQ(410u, BA2.b().start());
+  EXPECT_EQ(402u, BA2.a().start());
+
+  BA2.advanceTo(411);
+  ASSERT_TRUE(BA2.valid());
+  EXPECT_EQ(410u, BA2.b().start());
+  EXPECT_EQ(402u, BA2.a().start());
+}
+
+} // namespace
diff --git a/final/unittests/ADT/Makefile b/final/unittests/ADT/Makefile
new file mode 100644
index 00000000000..c255a0b44d0
--- /dev/null
+++ b/final/unittests/ADT/Makefile
@@ -0,0 +1,23 @@
+##===- unittests/ADT/Makefile ------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TESTNAME = ADT
+LINK_COMPONENTS := support
+
+include $(LEVEL)/Makefile.config
+
+# Xfail BitVectorTest for now on PPC Darwin.  7598360.
+ifeq ($(ARCH),PowerPC)
+ifeq ($(TARGET_OS),Darwin)
+CPP.Flags += -DXFAIL
+endif
+endif
+
+include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/final/unittests/ADT/SmallBitVectorTest.cpp b/final/unittests/ADT/SmallBitVectorTest.cpp
new file mode 100644
index 00000000000..c4dda9e88d4
--- /dev/null
+++ b/final/unittests/ADT/SmallBitVectorTest.cpp
@@ -0,0 +1,196 @@
+//===- llvm/unittest/ADT/SmallBitVectorTest.cpp - SmallBitVector tests ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallBitVector.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(SmallBitVectorTest, TrivialOperation) {
+  SmallBitVector Vec;
+  EXPECT_EQ(0U, Vec.count());
+  EXPECT_EQ(0U, Vec.size());
+  EXPECT_FALSE(Vec.any());
+  EXPECT_TRUE(Vec.all());
+  EXPECT_TRUE(Vec.none());
+  EXPECT_TRUE(Vec.empty());
+
+  Vec.resize(5, true);
+  EXPECT_EQ(5U, Vec.count());
+  EXPECT_EQ(5U, Vec.size());
+  EXPECT_TRUE(Vec.any());
+  EXPECT_TRUE(Vec.all());
+  EXPECT_FALSE(Vec.none());
+  EXPECT_FALSE(Vec.empty());
+
+  Vec.resize(11);
+  EXPECT_EQ(5U, Vec.count());
+  EXPECT_EQ(11U, Vec.size());
+  EXPECT_TRUE(Vec.any());
+  EXPECT_FALSE(Vec.all());
+  EXPECT_FALSE(Vec.none());
+  EXPECT_FALSE(Vec.empty());
+
+  SmallBitVector Inv = ~Vec;
+  EXPECT_EQ(6U, Inv.count());
+  EXPECT_EQ(11U, Inv.size());
+  EXPECT_TRUE(Inv.any());
+  EXPECT_FALSE(Inv.all());
+  EXPECT_FALSE(Inv.none());
+  EXPECT_FALSE(Inv.empty());
+
+  EXPECT_FALSE(Inv == Vec);
+  EXPECT_TRUE(Inv != Vec);
+  Vec = ~Vec;
+  EXPECT_TRUE(Inv == Vec);
+  EXPECT_FALSE(Inv != Vec);
+
+  // Add some "interesting" data to Vec.
+  Vec.resize(23, true);
+  Vec.resize(25, false);
+  Vec.resize(26, true);
+  Vec.resize(29, false);
+  Vec.resize(33, true);
+  Vec.resize(57, false);
+  unsigned Count = 0;
+  for (unsigned i = Vec.find_first(); i != -1u; i = Vec.find_next(i)) {
+    ++Count;
+    EXPECT_TRUE(Vec[i]);
+    EXPECT_TRUE(Vec.test(i));
+  }
+  EXPECT_EQ(Count, Vec.count());
+  EXPECT_EQ(Count, 23u);
+  EXPECT_FALSE(Vec[0]);
+  EXPECT_TRUE(Vec[32]);
+  EXPECT_FALSE(Vec[56]);
+  Vec.resize(61, false);
+
+  SmallBitVector Copy = Vec;
+  SmallBitVector Alt(3, false);
+  Alt.resize(6, true);
+  std::swap(Alt, Vec);
+  EXPECT_TRUE(Copy == Alt);
+  EXPECT_TRUE(Vec.size() == 6);
+  EXPECT_TRUE(Vec.count() == 3);
+  EXPECT_TRUE(Vec.find_first() == 3);
+  std::swap(Copy, Vec);
+
+  // Add some more "interesting" data.
+  Vec.resize(68, true);
+  Vec.resize(78, false);
+  Vec.resize(89, true);
+  Vec.resize(90, false);
+  Vec.resize(91, true);
+  Vec.resize(130, false);
+  Count = 0;
+  for (unsigned i = Vec.find_first(); i != -1u; i = Vec.find_next(i)) {
+    ++Count;
+    EXPECT_TRUE(Vec[i]);
+    EXPECT_TRUE(Vec.test(i));
+  }
+  EXPECT_EQ(Count, Vec.count());
+  EXPECT_EQ(Count, 42u);
+  EXPECT_FALSE(Vec[0]);
+  EXPECT_TRUE(Vec[32]);
+  EXPECT_FALSE(Vec[60]);
+  EXPECT_FALSE(Vec[129]);
+
+  Vec.flip(60);
+  EXPECT_TRUE(Vec[60]);
+  EXPECT_EQ(Count + 1, Vec.count());
+  Vec.flip(60);
+  EXPECT_FALSE(Vec[60]);
+  EXPECT_EQ(Count, Vec.count());
+
+  Vec.reset(32);
+  EXPECT_FALSE(Vec[32]);
+  EXPECT_EQ(Count - 1, Vec.count());
+  Vec.set(32);
+  EXPECT_TRUE(Vec[32]);
+  EXPECT_EQ(Count, Vec.count());
+
+  Vec.flip();
+  EXPECT_EQ(Vec.size() - Count, Vec.count());
+
+  Vec.reset();
+  EXPECT_EQ(0U, Vec.count());
+  EXPECT_EQ(130U, Vec.size());
+  EXPECT_FALSE(Vec.any());
+  EXPECT_FALSE(Vec.all());
+  EXPECT_TRUE(Vec.none());
+  EXPECT_FALSE(Vec.empty());
+
+  Inv = ~SmallBitVector();
+  EXPECT_EQ(0U, Inv.count());
+  EXPECT_EQ(0U, Inv.size());
+  EXPECT_FALSE(Inv.any());
+  EXPECT_TRUE(Inv.all());
+  EXPECT_TRUE(Inv.none());
+  EXPECT_TRUE(Inv.empty());
+
+  Vec.clear();
+  EXPECT_EQ(0U, Vec.count());
+  EXPECT_EQ(0U, Vec.size());
+  EXPECT_FALSE(Vec.any());
+  EXPECT_TRUE(Vec.all());
+  EXPECT_TRUE(Vec.none());
+  EXPECT_TRUE(Vec.empty());
+}
+
+TEST(SmallBitVectorTest, CompoundAssignment) {
+  SmallBitVector A;
+  A.resize(10);
+  A.set(4);
+  A.set(7);
+
+  SmallBitVector B;
+  B.resize(50);
+  B.set(5);
+  B.set(18);
+
+  A |= B;
+  EXPECT_TRUE(A.test(4));
+  EXPECT_TRUE(A.test(5));
+  EXPECT_TRUE(A.test(7));
+  EXPECT_TRUE(A.test(18));
+  EXPECT_EQ(4U, A.count());
+  EXPECT_EQ(50U, A.size());
+
+  B.resize(10);
+  B.set();
+  B.reset(2);
+  B.reset(7);
+  A &= B;
+  EXPECT_FALSE(A.test(2));
+  EXPECT_FALSE(A.test(7));
+  EXPECT_EQ(2U, A.count());
+  EXPECT_EQ(50U, A.size());
+
+  B.resize(100);
+  B.set();
+
+  A ^= B;
+  EXPECT_TRUE(A.test(2));
+  EXPECT_TRUE(A.test(7));
+  EXPECT_EQ(98U, A.count());
+  EXPECT_EQ(100U, A.size());
+}
+
+TEST(SmallBitVectorTest, ProxyIndex) {
+  SmallBitVector Vec(3);
+  EXPECT_TRUE(Vec.none());
+  Vec[0] = Vec[1] = Vec[2] = true;
+  EXPECT_EQ(Vec.size(), Vec.count());
+  Vec[2] = Vec[1] = Vec[0] = false;
+  EXPECT_TRUE(Vec.none());
+}
+
+}
diff --git a/final/unittests/ADT/SmallStringTest.cpp b/final/unittests/ADT/SmallStringTest.cpp
new file mode 100644
index 00000000000..099d8159c91
--- /dev/null
+++ b/final/unittests/ADT/SmallStringTest.cpp
@@ -0,0 +1,48 @@
+//===- llvm/unittest/ADT/SmallStringTest.cpp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SmallString unit tests.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/SmallString.h"
+#include <stdarg.h>
+#include <climits>
+#include <cstring>
+
+using namespace llvm;
+
+namespace {
+
+// Test fixture class
+class SmallStringTest : public testing::Test {
+protected:
+  typedef SmallString<40> StringType;
+
+  StringType theString;
+
+  void assertEmpty(StringType & v) {
+    // Size tests
+    EXPECT_EQ(0u, v.size());
+    EXPECT_TRUE(v.empty());
+    // Iterator tests
+    EXPECT_TRUE(v.begin() == v.end());
+  }
+};
+
+// New string test.
+TEST_F(SmallStringTest, EmptyStringTest) {
+  SCOPED_TRACE("EmptyStringTest");
+  assertEmpty(theString);
+  EXPECT_TRUE(theString.rbegin() == theString.rend());
+}
+
+}
+
diff --git a/final/unittests/ADT/SmallVectorTest.cpp b/final/unittests/ADT/SmallVectorTest.cpp
new file mode 100644
index 00000000000..f4da54dbca1
--- /dev/null
+++ b/final/unittests/ADT/SmallVectorTest.cpp
@@ -0,0 +1,410 @@
+//===- llvm/unittest/ADT/SmallVectorTest.cpp ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// SmallVector unit tests.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include <stdarg.h>
+#include <list>
+
+using namespace llvm;
+
+namespace {
+
+/// A helper class that counts the total number of constructor and
+/// destructor calls.
+class Constructable {
+private:
+  static int numConstructorCalls;
+  static int numDestructorCalls;
+  static int numAssignmentCalls;
+
+  int value;
+
+public:
+  Constructable() : value(0) {
+    ++numConstructorCalls;
+  }
+  
+  Constructable(int val) : value(val) {
+    ++numConstructorCalls;
+  }
+  
+  Constructable(const Constructable & src) {
+    value = src.value;
+    ++numConstructorCalls;
+  }
+  
+  ~Constructable() {
+    ++numDestructorCalls;
+  }
+  
+  Constructable & operator=(const Constructable & src) {
+    value = src.value;
+    ++numAssignmentCalls;
+    return *this;
+  }
+  
+  int getValue() const {
+    return abs(value);
+  }
+
+  static void reset() {
+    numConstructorCalls = 0;
+    numDestructorCalls = 0;
+    numAssignmentCalls = 0;
+  }
+  
+  static int getNumConstructorCalls() {
+    return numConstructorCalls;
+  }
+
+  static int getNumDestructorCalls() {
+    return numDestructorCalls;
+  }
+
+  friend bool operator==(const Constructable & c0, const Constructable & c1) {
+    return c0.getValue() == c1.getValue();
+  }
+
+  friend bool LLVM_ATTRIBUTE_UNUSED
+  operator!=(const Constructable & c0, const Constructable & c1) {
+    return c0.getValue() != c1.getValue();
+  }
+};
+
+int Constructable::numConstructorCalls;
+int Constructable::numDestructorCalls;
+int Constructable::numAssignmentCalls;
+
+// Test fixture class
+class SmallVectorTest : public testing::Test {
+protected:
+  typedef SmallVector<Constructable, 4> VectorType;
+  
+  VectorType theVector;
+  VectorType otherVector;
+  
+  void SetUp() {
+    Constructable::reset();
+  }
+
+  void assertEmpty(VectorType & v) {
+    // Size tests
+    EXPECT_EQ(0u, v.size());
+    EXPECT_TRUE(v.empty());
+
+    // Iterator tests
+    EXPECT_TRUE(v.begin() == v.end());
+  }
+
+  // Assert that theVector contains the specified values, in order.
+  void assertValuesInOrder(VectorType & v, size_t size, ...) {
+    EXPECT_EQ(size, v.size());
+    
+    va_list ap;
+    va_start(ap, size);
+    for (size_t i = 0; i < size; ++i) {
+      int value = va_arg(ap, int);
+      EXPECT_EQ(value, v[i].getValue());
+    }
+
+    va_end(ap);
+  }
+  
+  // Generate a sequence of values to initialize the vector.
+  void makeSequence(VectorType & v, int start, int end) {
+    for (int i = start; i <= end; ++i) {
+      v.push_back(Constructable(i));
+    }
+  }
+};
+
+// New vector test.
+TEST_F(SmallVectorTest, EmptyVectorTest) {
+  SCOPED_TRACE("EmptyVectorTest");
+  assertEmpty(theVector);
+  EXPECT_TRUE(theVector.rbegin() == theVector.rend());
+  EXPECT_EQ(0, Constructable::getNumConstructorCalls());
+  EXPECT_EQ(0, Constructable::getNumDestructorCalls());
+}
+
+// Simple insertions and deletions.
+TEST_F(SmallVectorTest, PushPopTest) {
+  SCOPED_TRACE("PushPopTest");
+
+  // Push an element
+  theVector.push_back(Constructable(1));
+
+  // Size tests
+  assertValuesInOrder(theVector, 1u, 1);
+  EXPECT_FALSE(theVector.begin() == theVector.end());
+  EXPECT_FALSE(theVector.empty());
+
+  // Push another element
+  theVector.push_back(Constructable(2));
+  assertValuesInOrder(theVector, 2u, 1, 2);
+
+  // Pop one element
+  theVector.pop_back();
+  assertValuesInOrder(theVector, 1u, 1);
+
+  // Pop another element
+  theVector.pop_back();
+  assertEmpty(theVector);
+  
+  // Check number of constructor calls. Should be 2 for each list element,
+  // one for the argument to push_back, and one for the list element itself.
+  EXPECT_EQ(4, Constructable::getNumConstructorCalls());
+  EXPECT_EQ(4, Constructable::getNumDestructorCalls());
+}
+
+// Clear test.
+TEST_F(SmallVectorTest, ClearTest) {
+  SCOPED_TRACE("ClearTest");
+
+  makeSequence(theVector, 1, 2);
+  theVector.clear();
+
+  assertEmpty(theVector);
+  EXPECT_EQ(4, Constructable::getNumConstructorCalls());
+  EXPECT_EQ(4, Constructable::getNumDestructorCalls());
+}
+
+// Resize smaller test.
+TEST_F(SmallVectorTest, ResizeShrinkTest) {
+  SCOPED_TRACE("ResizeShrinkTest");
+
+  makeSequence(theVector, 1, 3);
+  theVector.resize(1);
+
+  assertValuesInOrder(theVector, 1u, 1);
+  EXPECT_EQ(6, Constructable::getNumConstructorCalls());
+  EXPECT_EQ(5, Constructable::getNumDestructorCalls());
+}
+
+// Resize bigger test.
+TEST_F(SmallVectorTest, ResizeGrowTest) {
+  SCOPED_TRACE("ResizeGrowTest");
+
+  theVector.resize(2);
+  
+  // The extra constructor/destructor calls come from the temporary object used
+  // to initialize the contents of the resized array (via copy construction).
+  EXPECT_EQ(3, Constructable::getNumConstructorCalls());
+  EXPECT_EQ(1, Constructable::getNumDestructorCalls());
+  EXPECT_EQ(2u, theVector.size());
+}
+
+// Resize with fill value.
+TEST_F(SmallVectorTest, ResizeFillTest) {
+  SCOPED_TRACE("ResizeFillTest");
+
+  theVector.resize(3, Constructable(77));
+  assertValuesInOrder(theVector, 3u, 77, 77, 77);
+}
+
+// Overflow past fixed size.
+TEST_F(SmallVectorTest, OverflowTest) {
+  SCOPED_TRACE("OverflowTest");
+
+  // Push more elements than the fixed size.
+  makeSequence(theVector, 1, 10);
+
+  // Test size and values.
+  EXPECT_EQ(10u, theVector.size());
+  for (int i = 0; i < 10; ++i) {
+    EXPECT_EQ(i+1, theVector[i].getValue());
+  }
+  
+  // Now resize back to fixed size.
+  theVector.resize(1);
+  
+  assertValuesInOrder(theVector, 1u, 1);
+}
+
+// Iteration tests.
+TEST_F(SmallVectorTest, IterationTest) {
+  makeSequence(theVector, 1, 2);
+
+  // Forward Iteration
+  VectorType::iterator it = theVector.begin();
+  EXPECT_TRUE(*it == theVector.front());
+  EXPECT_TRUE(*it == theVector[0]);
+  EXPECT_EQ(1, it->getValue());
+  ++it;
+  EXPECT_TRUE(*it == theVector[1]);
+  EXPECT_TRUE(*it == theVector.back());
+  EXPECT_EQ(2, it->getValue());
+  ++it;
+  EXPECT_TRUE(it == theVector.end());
+  --it;
+  EXPECT_TRUE(*it == theVector[1]);
+  EXPECT_EQ(2, it->getValue());
+  --it;
+  EXPECT_TRUE(*it == theVector[0]);
+  EXPECT_EQ(1, it->getValue());
+
+  // Reverse Iteration
+  VectorType::reverse_iterator rit = theVector.rbegin();
+  EXPECT_TRUE(*rit == theVector[1]);
+  EXPECT_EQ(2, rit->getValue());
+  ++rit;
+  EXPECT_TRUE(*rit == theVector[0]);
+  EXPECT_EQ(1, rit->getValue());
+  ++rit;
+  EXPECT_TRUE(rit == theVector.rend());
+  --rit;
+  EXPECT_TRUE(*rit == theVector[0]);
+  EXPECT_EQ(1, rit->getValue());
+  --rit;
+  EXPECT_TRUE(*rit == theVector[1]);
+  EXPECT_EQ(2, rit->getValue());
+}
+
+// Swap test.
+TEST_F(SmallVectorTest, SwapTest) {
+  SCOPED_TRACE("SwapTest");
+
+  makeSequence(theVector, 1, 2);
+  std::swap(theVector, otherVector);
+
+  assertEmpty(theVector);
+  assertValuesInOrder(otherVector, 2u, 1, 2);
+}
+
+// Append test
+TEST_F(SmallVectorTest, AppendTest) {
+  SCOPED_TRACE("AppendTest");
+
+  makeSequence(otherVector, 2, 3);
+
+  theVector.push_back(Constructable(1));
+  theVector.append(otherVector.begin(), otherVector.end());
+
+  assertValuesInOrder(theVector, 3u, 1, 2, 3);
+}
+
+// Append repeated test
+TEST_F(SmallVectorTest, AppendRepeatedTest) {
+  SCOPED_TRACE("AppendRepeatedTest");
+
+  theVector.push_back(Constructable(1));
+  theVector.append(2, Constructable(77));
+  assertValuesInOrder(theVector, 3u, 1, 77, 77);
+}
+
+// Assign test
+TEST_F(SmallVectorTest, AssignTest) {
+  SCOPED_TRACE("AssignTest");
+
+  theVector.push_back(Constructable(1));
+  theVector.assign(2, Constructable(77));
+  assertValuesInOrder(theVector, 2u, 77, 77);
+}
+
+// Erase a single element
+TEST_F(SmallVectorTest, EraseTest) {
+  SCOPED_TRACE("EraseTest");
+
+  makeSequence(theVector, 1, 3);
+  theVector.erase(theVector.begin());
+  assertValuesInOrder(theVector, 2u, 2, 3);
+}
+
+// Erase a range of elements
+TEST_F(SmallVectorTest, EraseRangeTest) {
+  SCOPED_TRACE("EraseRangeTest");
+
+  makeSequence(theVector, 1, 3);
+  theVector.erase(theVector.begin(), theVector.begin() + 2);
+  assertValuesInOrder(theVector, 1u, 3);
+}
+
+// Insert a single element.
+TEST_F(SmallVectorTest, InsertTest) {
+  SCOPED_TRACE("InsertTest");
+
+  makeSequence(theVector, 1, 3);
+  theVector.insert(theVector.begin() + 1, Constructable(77));
+  assertValuesInOrder(theVector, 4u, 1, 77, 2, 3);
+}
+
+// Insert repeated elements.
+TEST_F(SmallVectorTest, InsertRepeatedTest) {
+  SCOPED_TRACE("InsertRepeatedTest");
+
+  makeSequence(theVector, 10, 15);
+  theVector.insert(theVector.begin() + 1, 2, Constructable(16));
+  assertValuesInOrder(theVector, 8u, 10, 16, 16, 11, 12, 13, 14, 15);
+}
+
+// Insert range.
+TEST_F(SmallVectorTest, InsertRangeTest) {
+  SCOPED_TRACE("InsertRepeatedTest");
+
+  makeSequence(theVector, 1, 3);
+  theVector.insert(theVector.begin() + 1, 3, Constructable(77));
+  assertValuesInOrder(theVector, 6u, 1, 77, 77, 77, 2, 3);
+}
+
+// Comparison tests.
+TEST_F(SmallVectorTest, ComparisonTest) {
+  SCOPED_TRACE("ComparisonTest");
+
+  makeSequence(theVector, 1, 3);
+  makeSequence(otherVector, 1, 3);
+  
+  EXPECT_TRUE(theVector == otherVector);
+  EXPECT_FALSE(theVector != otherVector);
+
+  otherVector.clear();
+  makeSequence(otherVector, 2, 4);
+  
+  EXPECT_FALSE(theVector == otherVector);
+  EXPECT_TRUE(theVector != otherVector);
+}
+
+// Constant vector tests.
+TEST_F(SmallVectorTest, ConstVectorTest) {
+  const VectorType constVector;
+
+  EXPECT_EQ(0u, constVector.size());
+  EXPECT_TRUE(constVector.empty());
+  EXPECT_TRUE(constVector.begin() == constVector.end());
+}
+
+// Direct array access.
+TEST_F(SmallVectorTest, DirectVectorTest) {
+  EXPECT_EQ(0u, theVector.size());
+  EXPECT_LE(4u, theVector.capacity());
+  EXPECT_EQ(0, Constructable::getNumConstructorCalls());
+  theVector.end()[0] = 1;
+  theVector.end()[1] = 2;
+  theVector.end()[2] = 3;
+  theVector.end()[3] = 4;
+  theVector.set_size(4);
+  EXPECT_EQ(4u, theVector.size());
+  EXPECT_EQ(4, Constructable::getNumConstructorCalls());
+  EXPECT_EQ(1, theVector[0].getValue());
+  EXPECT_EQ(2, theVector[1].getValue());
+  EXPECT_EQ(3, theVector[2].getValue());
+  EXPECT_EQ(4, theVector[3].getValue());
+}
+
+TEST_F(SmallVectorTest, IteratorTest) {
+  std::list<int> L;
+  theVector.insert(theVector.end(), L.begin(), L.end());
+}
+
+}
diff --git a/final/unittests/ADT/SparseBitVectorTest.cpp b/final/unittests/ADT/SparseBitVectorTest.cpp
new file mode 100644
index 00000000000..d8fc5ce25db
--- /dev/null
+++ b/final/unittests/ADT/SparseBitVectorTest.cpp
@@ -0,0 +1,36 @@
+//===- llvm/unittest/ADT/SparseBitVectorTest.cpp - SparseBitVector tests --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SparseBitVector.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(SparseBitVectorTest, TrivialOperation) {
+  SparseBitVector<> Vec;
+  EXPECT_EQ(0U, Vec.count());
+  EXPECT_FALSE(Vec.test(17));
+  Vec.set(5);
+  EXPECT_TRUE(Vec.test(5));
+  EXPECT_FALSE(Vec.test(17));
+  Vec.reset(6);
+  EXPECT_TRUE(Vec.test(5));
+  EXPECT_FALSE(Vec.test(6));
+  Vec.reset(5);
+  EXPECT_FALSE(Vec.test(5));
+  EXPECT_TRUE(Vec.test_and_set(17));
+  EXPECT_FALSE(Vec.test_and_set(17));
+  EXPECT_TRUE(Vec.test(17));
+  Vec.clear();
+  EXPECT_FALSE(Vec.test(17));
+}
+
+}
diff --git a/final/unittests/ADT/StringMapTest.cpp b/final/unittests/ADT/StringMapTest.cpp
new file mode 100644
index 00000000000..ea91348a5bd
--- /dev/null
+++ b/final/unittests/ADT/StringMapTest.cpp
@@ -0,0 +1,207 @@
+//===- llvm/unittest/ADT/StringMapMap.cpp - StringMap unit tests ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/DataTypes.h"
+using namespace llvm;
+
+namespace {
+
+// Test fixture
+class StringMapTest : public testing::Test {
+protected:
+  StringMap<uint32_t> testMap;
+
+  static const char testKey[];
+  static const uint32_t testValue;
+  static const char* testKeyFirst;
+  static size_t testKeyLength;
+  static const std::string testKeyStr;
+
+  void assertEmptyMap() {
+    // Size tests
+    EXPECT_EQ(0u, testMap.size());
+    EXPECT_TRUE(testMap.empty());
+
+    // Iterator tests
+    EXPECT_TRUE(testMap.begin() == testMap.end());
+
+    // Lookup tests
+    EXPECT_EQ(0u, testMap.count(testKey));
+    EXPECT_EQ(0u, testMap.count(StringRef(testKeyFirst, testKeyLength)));
+    EXPECT_EQ(0u, testMap.count(testKeyStr));
+    EXPECT_TRUE(testMap.find(testKey) == testMap.end());
+    EXPECT_TRUE(testMap.find(StringRef(testKeyFirst, testKeyLength)) == 
+                testMap.end());
+    EXPECT_TRUE(testMap.find(testKeyStr) == testMap.end());
+  }
+
+  void assertSingleItemMap() {
+    // Size tests
+    EXPECT_EQ(1u, testMap.size());
+    EXPECT_FALSE(testMap.begin() == testMap.end());
+    EXPECT_FALSE(testMap.empty());
+
+    // Iterator tests
+    StringMap<uint32_t>::iterator it = testMap.begin();
+    EXPECT_STREQ(testKey, it->first());
+    EXPECT_EQ(testValue, it->second);
+    ++it;
+    EXPECT_TRUE(it == testMap.end());
+
+    // Lookup tests
+    EXPECT_EQ(1u, testMap.count(testKey));
+    EXPECT_EQ(1u, testMap.count(StringRef(testKeyFirst, testKeyLength)));
+    EXPECT_EQ(1u, testMap.count(testKeyStr));
+    EXPECT_TRUE(testMap.find(testKey) == testMap.begin());
+    EXPECT_TRUE(testMap.find(StringRef(testKeyFirst, testKeyLength)) == 
+                testMap.begin());
+    EXPECT_TRUE(testMap.find(testKeyStr) == testMap.begin());
+  }
+};
+
+const char StringMapTest::testKey[] = "key";
+const uint32_t StringMapTest::testValue = 1u;
+const char* StringMapTest::testKeyFirst = testKey;
+size_t StringMapTest::testKeyLength = sizeof(testKey) - 1;
+const std::string StringMapTest::testKeyStr(testKey);
+
+// Empty map tests.
+TEST_F(StringMapTest, EmptyMapTest) {
+  SCOPED_TRACE("EmptyMapTest");
+  assertEmptyMap();
+}
+
+// Constant map tests.
+TEST_F(StringMapTest, ConstEmptyMapTest) {
+  const StringMap<uint32_t>& constTestMap = testMap;
+
+  // Size tests
+  EXPECT_EQ(0u, constTestMap.size());
+  EXPECT_TRUE(constTestMap.empty());
+
+  // Iterator tests
+  EXPECT_TRUE(constTestMap.begin() == constTestMap.end());
+
+  // Lookup tests
+  EXPECT_EQ(0u, constTestMap.count(testKey));
+  EXPECT_EQ(0u, constTestMap.count(StringRef(testKeyFirst, testKeyLength)));
+  EXPECT_EQ(0u, constTestMap.count(testKeyStr));
+  EXPECT_TRUE(constTestMap.find(testKey) == constTestMap.end());
+  EXPECT_TRUE(constTestMap.find(StringRef(testKeyFirst, testKeyLength)) ==
+              constTestMap.end());
+  EXPECT_TRUE(constTestMap.find(testKeyStr) == constTestMap.end());
+}
+
+// A map with a single entry.
+TEST_F(StringMapTest, SingleEntryMapTest) {
+  SCOPED_TRACE("SingleEntryMapTest");
+  testMap[testKey] = testValue;
+  assertSingleItemMap();
+}
+
+// Test clear() method.
+TEST_F(StringMapTest, ClearTest) {
+  SCOPED_TRACE("ClearTest");
+  testMap[testKey] = testValue;
+  testMap.clear();
+  assertEmptyMap();
+}
+
+// Test erase(iterator) method.
+TEST_F(StringMapTest, EraseIteratorTest) {
+  SCOPED_TRACE("EraseIteratorTest");
+  testMap[testKey] = testValue;
+  testMap.erase(testMap.begin());
+  assertEmptyMap();
+}
+
+// Test erase(value) method.
+TEST_F(StringMapTest, EraseValueTest) {
+  SCOPED_TRACE("EraseValueTest");
+  testMap[testKey] = testValue;
+  testMap.erase(testKey);
+  assertEmptyMap();
+}
+
+// Test inserting two values and erasing one.
+TEST_F(StringMapTest, InsertAndEraseTest) {
+  SCOPED_TRACE("InsertAndEraseTest");
+  testMap[testKey] = testValue;
+  testMap["otherKey"] = 2;
+  testMap.erase("otherKey");
+  assertSingleItemMap();
+}
+
+// A more complex iteration test.
+TEST_F(StringMapTest, IterationTest) {
+  bool visited[100];
+
+  // Insert 100 numbers into the map
+  for (int i = 0; i < 100; ++i) {
+    std::stringstream ss;
+    ss << "key_" << i;
+    testMap[ss.str()] = i;
+    visited[i] = false;
+  }
+
+  // Iterate over all numbers and mark each one found.
+  for (StringMap<uint32_t>::iterator it = testMap.begin();
+      it != testMap.end(); ++it) {
+    std::stringstream ss;
+    ss << "key_" << it->second;
+    ASSERT_STREQ(ss.str().c_str(), it->first());
+    visited[it->second] = true;
+  }
+
+  // Ensure every number was visited.
+  for (int i = 0; i < 100; ++i) {
+    ASSERT_TRUE(visited[i]) << "Entry #" << i << " was never visited";
+  }
+}
+
+} // end anonymous namespace
+
+namespace llvm {
+
+template <>
+class StringMapEntryInitializer<uint32_t> {
+public:
+  template <typename InitTy>
+  static void Initialize(StringMapEntry<uint32_t> &T, InitTy InitVal) {
+    T.second = InitVal;
+  }
+};
+
+} // end llvm namespace
+
+namespace {
+
+// Test StringMapEntry::Create() method.
+TEST_F(StringMapTest, StringMapEntryTest) {
+  StringMap<uint32_t>::value_type* entry =
+      StringMap<uint32_t>::value_type::Create(
+          testKeyFirst, testKeyFirst + testKeyLength, 1u);
+  EXPECT_STREQ(testKey, entry->first());
+  EXPECT_EQ(1u, entry->second);
+  free(entry);
+}
+
+// Test insert() method.
+TEST_F(StringMapTest, InsertTest) {
+  SCOPED_TRACE("InsertTest");
+  testMap.insert(
+      StringMap<uint32_t>::value_type::Create(
+          testKeyFirst, testKeyFirst + testKeyLength, 
+          testMap.getAllocator(), 1u));
+  assertSingleItemMap();
+}
+
+} // end anonymous namespace
diff --git a/final/unittests/ADT/StringRefTest.cpp b/final/unittests/ADT/StringRefTest.cpp
new file mode 100644
index 00000000000..5731e4abaf1
--- /dev/null
+++ b/final/unittests/ADT/StringRefTest.cpp
@@ -0,0 +1,282 @@
+//===- llvm/unittest/ADT/StringRefTest.cpp - StringRef unit tests ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace llvm {
+
+std::ostream &operator<<(std::ostream &OS, const StringRef &S) {
+  OS << S.str();
+  return OS;
+}
+
+std::ostream &operator<<(std::ostream &OS,
+                         const std::pair<StringRef, StringRef> &P) {
+  OS << "(" << P.first << ", " << P.second << ")";
+  return OS;
+}
+
+}
+
+namespace {
+TEST(StringRefTest, Construction) {
+  EXPECT_EQ("", StringRef());
+  EXPECT_EQ("hello", StringRef("hello"));
+  EXPECT_EQ("hello", StringRef("hello world", 5));
+  EXPECT_EQ("hello", StringRef(std::string("hello")));
+}
+
+TEST(StringRefTest, Iteration) {
+  StringRef S("hello");
+  const char *p = "hello";
+  for (const char *it = S.begin(), *ie = S.end(); it != ie; ++it, ++p)
+    EXPECT_EQ(*it, *p);
+}
+
+TEST(StringRefTest, StringOps) {
+  const char *p = "hello";
+  EXPECT_EQ(p, StringRef(p, 0).data());
+  EXPECT_TRUE(StringRef().empty());
+  EXPECT_EQ((size_t) 5, StringRef("hello").size());
+  EXPECT_EQ(-1, StringRef("aab").compare("aad"));
+  EXPECT_EQ( 0, StringRef("aab").compare("aab"));
+  EXPECT_EQ( 1, StringRef("aab").compare("aaa"));
+  EXPECT_EQ(-1, StringRef("aab").compare("aabb"));
+  EXPECT_EQ( 1, StringRef("aab").compare("aa"));
+  EXPECT_EQ( 1, StringRef("\xFF").compare("\1"));
+
+  EXPECT_EQ(-1, StringRef("AaB").compare_lower("aAd"));
+  EXPECT_EQ( 0, StringRef("AaB").compare_lower("aab"));
+  EXPECT_EQ( 1, StringRef("AaB").compare_lower("AAA"));
+  EXPECT_EQ(-1, StringRef("AaB").compare_lower("aaBb"));
+  EXPECT_EQ( 1, StringRef("AaB").compare_lower("aA"));
+  EXPECT_EQ( 1, StringRef("\xFF").compare_lower("\1"));
+
+  EXPECT_EQ(-1, StringRef("aab").compare_numeric("aad"));
+  EXPECT_EQ( 0, StringRef("aab").compare_numeric("aab"));
+  EXPECT_EQ( 1, StringRef("aab").compare_numeric("aaa"));
+  EXPECT_EQ(-1, StringRef("aab").compare_numeric("aabb"));
+  EXPECT_EQ( 1, StringRef("aab").compare_numeric("aa"));
+  EXPECT_EQ(-1, StringRef("1").compare_numeric("10"));
+  EXPECT_EQ( 0, StringRef("10").compare_numeric("10"));
+  EXPECT_EQ( 0, StringRef("10a").compare_numeric("10a"));
+  EXPECT_EQ( 1, StringRef("2").compare_numeric("1"));
+  EXPECT_EQ( 0, StringRef("llvm_v1i64_ty").compare_numeric("llvm_v1i64_ty"));
+  EXPECT_EQ( 1, StringRef("\xFF").compare_numeric("\1"));
+}
+
+TEST(StringRefTest, Operators) {
+  EXPECT_EQ("", StringRef());
+  EXPECT_TRUE(StringRef("aab") < StringRef("aad"));
+  EXPECT_FALSE(StringRef("aab") < StringRef("aab"));
+  EXPECT_TRUE(StringRef("aab") <= StringRef("aab"));
+  EXPECT_FALSE(StringRef("aab") <= StringRef("aaa"));
+  EXPECT_TRUE(StringRef("aad") > StringRef("aab"));
+  EXPECT_FALSE(StringRef("aab") > StringRef("aab"));
+  EXPECT_TRUE(StringRef("aab") >= StringRef("aab"));
+  EXPECT_FALSE(StringRef("aaa") >= StringRef("aab"));
+  EXPECT_EQ(StringRef("aab"), StringRef("aab"));
+  EXPECT_FALSE(StringRef("aab") == StringRef("aac"));
+  EXPECT_FALSE(StringRef("aab") != StringRef("aab"));
+  EXPECT_TRUE(StringRef("aab") != StringRef("aac"));
+  EXPECT_EQ('a', StringRef("aab")[1]);
+}
+
+TEST(StringRefTest, Substr) {
+  StringRef Str("hello");
+  EXPECT_EQ("lo", Str.substr(3));
+  EXPECT_EQ("", Str.substr(100));
+  EXPECT_EQ("hello", Str.substr(0, 100));
+  EXPECT_EQ("o", Str.substr(4, 10));
+}
+
+TEST(StringRefTest, Slice) {
+  StringRef Str("hello");
+  EXPECT_EQ("l", Str.slice(2, 3));
+  EXPECT_EQ("ell", Str.slice(1, 4));
+  EXPECT_EQ("llo", Str.slice(2, 100));
+  EXPECT_EQ("", Str.slice(2, 1));
+  EXPECT_EQ("", Str.slice(10, 20));
+}
+
+TEST(StringRefTest, Split) {
+  StringRef Str("hello");
+  EXPECT_EQ(std::make_pair(StringRef("hello"), StringRef("")),
+            Str.split('X'));
+  EXPECT_EQ(std::make_pair(StringRef("h"), StringRef("llo")),
+            Str.split('e'));
+  EXPECT_EQ(std::make_pair(StringRef(""), StringRef("ello")),
+            Str.split('h'));
+  EXPECT_EQ(std::make_pair(StringRef("he"), StringRef("lo")),
+            Str.split('l'));
+  EXPECT_EQ(std::make_pair(StringRef("hell"), StringRef("")),
+            Str.split('o'));
+
+  EXPECT_EQ(std::make_pair(StringRef("hello"), StringRef("")),
+            Str.rsplit('X'));
+  EXPECT_EQ(std::make_pair(StringRef("h"), StringRef("llo")),
+            Str.rsplit('e'));
+  EXPECT_EQ(std::make_pair(StringRef(""), StringRef("ello")),
+            Str.rsplit('h'));
+  EXPECT_EQ(std::make_pair(StringRef("hel"), StringRef("o")),
+            Str.rsplit('l'));
+  EXPECT_EQ(std::make_pair(StringRef("hell"), StringRef("")),
+            Str.rsplit('o'));
+}
+
+TEST(StringRefTest, Split2) {
+  SmallVector<StringRef, 5> parts;
+  SmallVector<StringRef, 5> expected;
+
+  expected.push_back("ab"); expected.push_back("c");
+  StringRef(",ab,,c,").split(parts, ",", -1, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back(""); expected.push_back("ab"); expected.push_back("");
+  expected.push_back("c"); expected.push_back("");
+  StringRef(",ab,,c,").split(parts, ",", -1, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("");
+  StringRef("").split(parts, ",", -1, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  StringRef("").split(parts, ",", -1, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  StringRef(",").split(parts, ",", -1, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back(""); expected.push_back("");
+  StringRef(",").split(parts, ",", -1, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a"); expected.push_back("b");
+  StringRef("a,b").split(parts, ",", -1, true);
+  EXPECT_TRUE(parts == expected);
+
+  // Test MaxSplit
+  expected.clear(); parts.clear();
+  expected.push_back("a,,b,c");
+  StringRef("a,,b,c").split(parts, ",", 0, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a,,b,c");
+  StringRef("a,,b,c").split(parts, ",", 0, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a"); expected.push_back(",b,c");
+  StringRef("a,,b,c").split(parts, ",", 1, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a"); expected.push_back(",b,c");
+  StringRef("a,,b,c").split(parts, ",", 1, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a"); expected.push_back(""); expected.push_back("b,c");
+  StringRef("a,,b,c").split(parts, ",", 2, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a"); expected.push_back("b,c");
+  StringRef("a,,b,c").split(parts, ",", 2, false);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a"); expected.push_back(""); expected.push_back("b");
+  expected.push_back("c");
+  StringRef("a,,b,c").split(parts, ",", 3, true);
+  EXPECT_TRUE(parts == expected);
+
+  expected.clear(); parts.clear();
+  expected.push_back("a"); expected.push_back("b"); expected.push_back("c");
+  StringRef("a,,b,c").split(parts, ",", 3, false);
+  EXPECT_TRUE(parts == expected);
+}
+
+TEST(StringRefTest, StartsWith) {
+  StringRef Str("hello");
+  EXPECT_TRUE(Str.startswith("he"));
+  EXPECT_FALSE(Str.startswith("helloworld"));
+  EXPECT_FALSE(Str.startswith("hi"));
+}
+
+TEST(StringRefTest, EndsWith) {
+  StringRef Str("hello");
+  EXPECT_TRUE(Str.endswith("lo"));
+  EXPECT_FALSE(Str.endswith("helloworld"));
+  EXPECT_FALSE(Str.endswith("worldhello"));
+  EXPECT_FALSE(Str.endswith("so"));
+}
+
+TEST(StringRefTest, Find) {
+  StringRef Str("hello");
+  EXPECT_EQ(2U, Str.find('l'));
+  EXPECT_EQ(StringRef::npos, Str.find('z'));
+  EXPECT_EQ(StringRef::npos, Str.find("helloworld"));
+  EXPECT_EQ(0U, Str.find("hello"));
+  EXPECT_EQ(1U, Str.find("ello"));
+  EXPECT_EQ(StringRef::npos, Str.find("zz"));
+  EXPECT_EQ(2U, Str.find("ll", 2));
+  EXPECT_EQ(StringRef::npos, Str.find("ll", 3));
+
+  EXPECT_EQ(3U, Str.rfind('l'));
+  EXPECT_EQ(StringRef::npos, Str.rfind('z'));
+  EXPECT_EQ(StringRef::npos, Str.rfind("helloworld"));
+  EXPECT_EQ(0U, Str.rfind("hello"));
+  EXPECT_EQ(1U, Str.rfind("ello"));
+  EXPECT_EQ(StringRef::npos, Str.rfind("zz"));
+
+  EXPECT_EQ(2U, Str.find_first_of('l'));
+  EXPECT_EQ(1U, Str.find_first_of("el"));
+  EXPECT_EQ(StringRef::npos, Str.find_first_of("xyz"));
+
+  EXPECT_EQ(1U, Str.find_first_not_of('h'));
+  EXPECT_EQ(4U, Str.find_first_not_of("hel"));
+  EXPECT_EQ(StringRef::npos, Str.find_first_not_of("hello"));
+}
+
+TEST(StringRefTest, Count) {
+  StringRef Str("hello");
+  EXPECT_EQ(2U, Str.count('l'));
+  EXPECT_EQ(1U, Str.count('o'));
+  EXPECT_EQ(0U, Str.count('z'));
+  EXPECT_EQ(0U, Str.count("helloworld"));
+  EXPECT_EQ(1U, Str.count("hello"));
+  EXPECT_EQ(1U, Str.count("ello"));
+  EXPECT_EQ(0U, Str.count("zz"));
+}
+
+TEST(StringRefTest, EditDistance) {
+  StringRef Str("hello");
+  EXPECT_EQ(2U, Str.edit_distance("hill"));
+}
+
+TEST(StringRefTest, Misc) {
+  std::string Storage;
+  raw_string_ostream OS(Storage);
+  OS << StringRef("hello");
+  EXPECT_EQ("hello", OS.str());
+}
+
+} // end anonymous namespace
diff --git a/final/unittests/ADT/TripleTest.cpp b/final/unittests/ADT/TripleTest.cpp
new file mode 100644
index 00000000000..160b69253b6
--- /dev/null
+++ b/final/unittests/ADT/TripleTest.cpp
@@ -0,0 +1,270 @@
+//===----------- Triple.cpp - Triple unit tests ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/Triple.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(TripleTest, BasicParsing) {
+  Triple T;
+
+  T = Triple("");
+  EXPECT_EQ("", T.getArchName().str());
+  EXPECT_EQ("", T.getVendorName().str());
+  EXPECT_EQ("", T.getOSName().str());
+  EXPECT_EQ("", T.getEnvironmentName().str());
+
+  T = Triple("-");
+  EXPECT_EQ("", T.getArchName().str());
+  EXPECT_EQ("", T.getVendorName().str());
+  EXPECT_EQ("", T.getOSName().str());
+  EXPECT_EQ("", T.getEnvironmentName().str());
+
+  T = Triple("--");
+  EXPECT_EQ("", T.getArchName().str());
+  EXPECT_EQ("", T.getVendorName().str());
+  EXPECT_EQ("", T.getOSName().str());
+  EXPECT_EQ("", T.getEnvironmentName().str());
+
+  T = Triple("---");
+  EXPECT_EQ("", T.getArchName().str());
+  EXPECT_EQ("", T.getVendorName().str());
+  EXPECT_EQ("", T.getOSName().str());
+  EXPECT_EQ("", T.getEnvironmentName().str());
+
+  T = Triple("----");
+  EXPECT_EQ("", T.getArchName().str());
+  EXPECT_EQ("", T.getVendorName().str());
+  EXPECT_EQ("", T.getOSName().str());
+  EXPECT_EQ("-", T.getEnvironmentName().str());
+
+  T = Triple("a");
+  EXPECT_EQ("a", T.getArchName().str());
+  EXPECT_EQ("", T.getVendorName().str());
+  EXPECT_EQ("", T.getOSName().str());
+  EXPECT_EQ("", T.getEnvironmentName().str());
+
+  T = Triple("a-b");
+  EXPECT_EQ("a", T.getArchName().str());
+  EXPECT_EQ("b", T.getVendorName().str());
+  EXPECT_EQ("", T.getOSName().str());
+  EXPECT_EQ("", T.getEnvironmentName().str());
+
+  T = Triple("a-b-c");
+  EXPECT_EQ("a", T.getArchName().str());
+  EXPECT_EQ("b", T.getVendorName().str());
+  EXPECT_EQ("c", T.getOSName().str());
+  EXPECT_EQ("", T.getEnvironmentName().str());
+
+  T = Triple("a-b-c-d");
+  EXPECT_EQ("a", T.getArchName().str());
+  EXPECT_EQ("b", T.getVendorName().str());
+  EXPECT_EQ("c", T.getOSName().str());
+  EXPECT_EQ("d", T.getEnvironmentName().str());
+}
+
+TEST(TripleTest, ParsedIDs) {
+  Triple T;
+
+  T = Triple("i386-apple-darwin");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ(Triple::Apple, T.getVendor());
+  EXPECT_EQ(Triple::Darwin, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
+  T = Triple("x86_64-pc-linux-gnu");
+  EXPECT_EQ(Triple::x86_64, T.getArch());
+  EXPECT_EQ(Triple::PC, T.getVendor());
+  EXPECT_EQ(Triple::Linux, T.getOS());
+  EXPECT_EQ(Triple::GNU, T.getEnvironment());
+
+  T = Triple("powerpc-dunno-notsure");
+  EXPECT_EQ(Triple::ppc, T.getArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::UnknownOS, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
+  T = Triple("arm-none-none-eabi");
+  EXPECT_EQ(Triple::arm, T.getArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::UnknownOS, T.getOS());
+  EXPECT_EQ(Triple::EABI, T.getEnvironment());
+
+  T = Triple("huh");
+  EXPECT_EQ(Triple::UnknownArch, T.getArch());
+}
+
+static std::string Join(StringRef A, StringRef B, StringRef C) {
+  std::string Str = A; Str += '-'; Str += B; Str += '-'; Str += C;
+  return Str;
+}
+
+static std::string Join(StringRef A, StringRef B, StringRef C, StringRef D) {
+  std::string Str = A; Str += '-'; Str += B; Str += '-'; Str += C; Str += '-';
+  Str += D; return Str;
+}
+
+TEST(TripleTest, Normalization) {
+
+  EXPECT_EQ("", Triple::normalize(""));
+  EXPECT_EQ("-", Triple::normalize("-"));
+  EXPECT_EQ("--", Triple::normalize("--"));
+  EXPECT_EQ("---", Triple::normalize("---"));
+  EXPECT_EQ("----", Triple::normalize("----"));
+
+  EXPECT_EQ("a", Triple::normalize("a"));
+  EXPECT_EQ("a-b", Triple::normalize("a-b"));
+  EXPECT_EQ("a-b-c", Triple::normalize("a-b-c"));
+  EXPECT_EQ("a-b-c-d", Triple::normalize("a-b-c-d"));
+
+  EXPECT_EQ("i386-b-c", Triple::normalize("i386-b-c"));
+  EXPECT_EQ("i386-a-c", Triple::normalize("a-i386-c"));
+  EXPECT_EQ("i386-a-b", Triple::normalize("a-b-i386"));
+  EXPECT_EQ("i386-a-b-c", Triple::normalize("a-b-c-i386"));
+
+  EXPECT_EQ("a-pc-c", Triple::normalize("a-pc-c"));
+  EXPECT_EQ("-pc-b-c", Triple::normalize("pc-b-c"));
+  EXPECT_EQ("a-pc-b", Triple::normalize("a-b-pc"));
+  EXPECT_EQ("a-pc-b-c", Triple::normalize("a-b-c-pc"));
+
+  EXPECT_EQ("a-b-linux", Triple::normalize("a-b-linux"));
+  EXPECT_EQ("--linux-b-c", Triple::normalize("linux-b-c"));
+  EXPECT_EQ("a--linux-c", Triple::normalize("a-linux-c"));
+
+  EXPECT_EQ("i386-pc-a", Triple::normalize("a-pc-i386"));
+  EXPECT_EQ("i386-pc-", Triple::normalize("-pc-i386"));
+  EXPECT_EQ("-pc-linux-c", Triple::normalize("linux-pc-c"));
+  EXPECT_EQ("-pc-linux", Triple::normalize("linux-pc-"));
+
+  EXPECT_EQ("i386", Triple::normalize("i386"));
+  EXPECT_EQ("-pc", Triple::normalize("pc"));
+  EXPECT_EQ("--linux", Triple::normalize("linux"));
+
+  EXPECT_EQ("x86_64--linux-gnu", Triple::normalize("x86_64-gnu-linux"));
+
+  // Check that normalizing a permutated set of valid components returns a
+  // triple with the unpermuted components.
+  StringRef C[4];
+  for (int Arch = 1+Triple::UnknownArch; Arch < Triple::InvalidArch; ++Arch) {
+    C[0] = Triple::getArchTypeName(Triple::ArchType(Arch));
+    for (int Vendor = 1+Triple::UnknownVendor; Vendor <= Triple::PC;
+         ++Vendor) {
+      C[1] = Triple::getVendorTypeName(Triple::VendorType(Vendor));
+      for (int OS = 1+Triple::UnknownOS; OS <= Triple::Minix; ++OS) {
+        C[2] = Triple::getOSTypeName(Triple::OSType(OS));
+
+        // If a value has multiple interpretations, then the permutation
+        // test will inevitably fail.  Currently this is only the case for
+        // "psp" which parses as both an architecture and an O/S.
+        if (OS == Triple::Psp)
+          continue;
+
+        std::string E = Join(C[0], C[1], C[2]);
+        EXPECT_EQ(E, Triple::normalize(Join(C[0], C[1], C[2])));
+
+        EXPECT_EQ(E, Triple::normalize(Join(C[0], C[2], C[1])));
+        EXPECT_EQ(E, Triple::normalize(Join(C[1], C[2], C[0])));
+        EXPECT_EQ(E, Triple::normalize(Join(C[1], C[0], C[2])));
+        EXPECT_EQ(E, Triple::normalize(Join(C[2], C[0], C[1])));
+        EXPECT_EQ(E, Triple::normalize(Join(C[2], C[1], C[0])));
+
+        for (int Env = 1+Triple::UnknownEnvironment; Env <= Triple::MachO;
+             ++Env) {
+          C[3] = Triple::getEnvironmentTypeName(Triple::EnvironmentType(Env));
+
+          std::string F = Join(C[0], C[1], C[2], C[3]);
+          EXPECT_EQ(F, Triple::normalize(Join(C[0], C[1], C[2], C[3])));
+
+          EXPECT_EQ(F, Triple::normalize(Join(C[0], C[1], C[3], C[2])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[0], C[2], C[3], C[1])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[0], C[2], C[1], C[3])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[0], C[3], C[1], C[2])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[0], C[3], C[2], C[1])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[1], C[2], C[3], C[0])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[1], C[2], C[0], C[3])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[1], C[3], C[0], C[2])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[1], C[3], C[2], C[0])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[1], C[0], C[2], C[3])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[1], C[0], C[3], C[2])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[2], C[3], C[0], C[1])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[2], C[3], C[1], C[0])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[2], C[0], C[1], C[3])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[2], C[0], C[3], C[1])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[2], C[1], C[3], C[0])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[2], C[1], C[0], C[3])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[3], C[0], C[1], C[2])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[3], C[0], C[2], C[1])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[3], C[1], C[2], C[0])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[3], C[1], C[0], C[2])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[3], C[2], C[0], C[1])));
+          EXPECT_EQ(F, Triple::normalize(Join(C[3], C[2], C[1], C[0])));
+        }
+      }
+    }
+  }
+
+  EXPECT_EQ("a-b-psp", Triple::normalize("a-b-psp"));
+  EXPECT_EQ("psp-b-c", Triple::normalize("psp-b-c"));
+
+  // Various real-world funky triples.  The value returned by GCC's config.sub
+  // is given in the comment.
+  EXPECT_EQ("i386--mingw32", Triple::normalize("i386-mingw32")); // i386-pc-mingw32
+  EXPECT_EQ("x86_64--linux-gnu", Triple::normalize("x86_64-linux-gnu")); // x86_64-pc-linux-gnu
+  EXPECT_EQ("i486--linux-gnu", Triple::normalize("i486-linux-gnu")); // i486-pc-linux-gnu
+  EXPECT_EQ("i386-redhat-linux", Triple::normalize("i386-redhat-linux")); // i386-redhat-linux-gnu
+  EXPECT_EQ("i686--linux", Triple::normalize("i686-linux")); // i686-pc-linux-gnu
+  EXPECT_EQ("arm-none--eabi", Triple::normalize("arm-none-eabi")); // arm-none-eabi
+}
+
+TEST(TripleTest, MutateName) {
+  Triple T;
+  EXPECT_EQ(Triple::UnknownArch, T.getArch());
+  EXPECT_EQ(Triple::UnknownVendor, T.getVendor());
+  EXPECT_EQ(Triple::UnknownOS, T.getOS());
+  EXPECT_EQ(Triple::UnknownEnvironment, T.getEnvironment());
+
+  T.setArchName("i386");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ("i386--", T.getTriple());
+
+  T.setVendorName("pc");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ(Triple::PC, T.getVendor());
+  EXPECT_EQ("i386-pc-", T.getTriple());
+
+  T.setOSName("linux");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ(Triple::PC, T.getVendor());
+  EXPECT_EQ(Triple::Linux, T.getOS());
+  EXPECT_EQ("i386-pc-linux", T.getTriple());
+
+  T.setEnvironmentName("gnu");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ(Triple::PC, T.getVendor());
+  EXPECT_EQ(Triple::Linux, T.getOS());
+  EXPECT_EQ("i386-pc-linux-gnu", T.getTriple());
+
+  T.setOSName("freebsd");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ(Triple::PC, T.getVendor());
+  EXPECT_EQ(Triple::FreeBSD, T.getOS());
+  EXPECT_EQ("i386-pc-freebsd-gnu", T.getTriple());
+
+  T.setOSAndEnvironmentName("darwin");
+  EXPECT_EQ(Triple::x86, T.getArch());
+  EXPECT_EQ(Triple::PC, T.getVendor());
+  EXPECT_EQ(Triple::Darwin, T.getOS());
+  EXPECT_EQ("i386-pc-darwin", T.getTriple());
+
+}
+
+}
diff --git a/final/unittests/ADT/TwineTest.cpp b/final/unittests/ADT/TwineTest.cpp
new file mode 100644
index 00000000000..57f54cb0060
--- /dev/null
+++ b/final/unittests/ADT/TwineTest.cpp
@@ -0,0 +1,83 @@
+//===- TwineTest.cpp - Twine unit tests -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+std::string repr(const Twine &Value) {
+  std::string res;
+  llvm::raw_string_ostream OS(res);
+  Value.printRepr(OS);
+  return OS.str();
+}
+
+TEST(TwineTest, Construction) {
+  EXPECT_EQ("", Twine().str());
+  EXPECT_EQ("hi", Twine("hi").str());
+  EXPECT_EQ("hi", Twine(std::string("hi")).str());
+  EXPECT_EQ("hi", Twine(StringRef("hi")).str());
+  EXPECT_EQ("hi", Twine(StringRef(std::string("hi"))).str());
+  EXPECT_EQ("hi", Twine(StringRef("hithere", 2)).str());
+}
+
+TEST(TwineTest, Numbers) {
+  EXPECT_EQ("123", Twine(123U).str());
+  EXPECT_EQ("123", Twine(123).str());
+  EXPECT_EQ("-123", Twine(-123).str());
+  EXPECT_EQ("123", Twine(123).str());
+  EXPECT_EQ("-123", Twine(-123).str());
+  EXPECT_EQ("123", Twine((char) 123).str());
+  EXPECT_EQ("-123", Twine((signed char) -123).str());
+
+  EXPECT_EQ("7b", Twine::utohexstr(123).str());
+}
+
+TEST(TwineTest, Concat) {
+  // Check verse repr, since we care about the actual representation not just
+  // the result.
+
+  // Concat with null.
+  EXPECT_EQ("(Twine null empty)", 
+            repr(Twine("hi").concat(Twine::createNull())));
+  EXPECT_EQ("(Twine null empty)", 
+            repr(Twine::createNull().concat(Twine("hi"))));
+  
+  // Concat with empty.
+  EXPECT_EQ("(Twine cstring:\"hi\" empty)", 
+            repr(Twine("hi").concat(Twine())));
+  EXPECT_EQ("(Twine cstring:\"hi\" empty)", 
+            repr(Twine().concat(Twine("hi"))));
+
+  // Concatenation of unary ropes.
+  EXPECT_EQ("(Twine cstring:\"a\" cstring:\"b\")", 
+            repr(Twine("a").concat(Twine("b"))));
+
+  // Concatenation of other ropes.
+  EXPECT_EQ("(Twine rope:(Twine cstring:\"a\" cstring:\"b\") cstring:\"c\")", 
+            repr(Twine("a").concat(Twine("b")).concat(Twine("c"))));
+  EXPECT_EQ("(Twine cstring:\"a\" rope:(Twine cstring:\"b\" cstring:\"c\"))",
+            repr(Twine("a").concat(Twine("b").concat(Twine("c")))));
+}
+
+TEST(TwineTest, toNullTerminatedStringRef) {
+  SmallString<8> storage;
+  EXPECT_EQ(0, *Twine("hello").toNullTerminatedStringRef(storage).end());
+  EXPECT_EQ(0,
+           *Twine(StringRef("hello")).toNullTerminatedStringRef(storage).end());
+}
+
+  // I suppose linking in the entire code generator to add a unit test to check
+  // the code size of the concat operation is overkill... :)
+
+} // end anonymous namespace
diff --git a/final/unittests/ADT/ilistTest.cpp b/final/unittests/ADT/ilistTest.cpp
new file mode 100644
index 00000000000..09a699a9624
--- /dev/null
+++ b/final/unittests/ADT/ilistTest.cpp
@@ -0,0 +1,44 @@
+//===- llvm/unittest/ADT/APInt.cpp - APInt unit tests ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <ostream>
+#include "gtest/gtest.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+
+using namespace llvm;
+
+namespace {
+
+struct Node : ilist_node<Node> {
+  int Value;
+
+  Node() {}
+  Node(int _Value) : Value(_Value) {}
+};
+
+TEST(ilistTest, Basic) {
+  ilist<Node> List;
+  List.push_back(Node(1));
+  EXPECT_EQ(1, List.back().Value);
+  EXPECT_EQ(0, List.back().getPrevNode());
+  EXPECT_EQ(0, List.back().getNextNode());
+
+  List.push_back(Node(2));
+  EXPECT_EQ(2, List.back().Value);
+  EXPECT_EQ(2, List.front().getNextNode()->Value);
+  EXPECT_EQ(1, List.back().getPrevNode()->Value);
+
+  const ilist<Node> &ConstList = List;
+  EXPECT_EQ(2, ConstList.back().Value);
+  EXPECT_EQ(2, ConstList.front().getNextNode()->Value);
+  EXPECT_EQ(1, ConstList.back().getPrevNode()->Value);
+}
+
+}
diff --git a/final/unittests/Analysis/Makefile b/final/unittests/Analysis/Makefile
new file mode 100644
index 00000000000..f89240ec704
--- /dev/null
+++ b/final/unittests/Analysis/Makefile
@@ -0,0 +1,15 @@
+##===- unittests/Analysis/Makefile -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TESTNAME = Analysis
+LINK_COMPONENTS := core support target analysis ipa
+
+include $(LEVEL)/Makefile.config
+include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/final/unittests/Analysis/ScalarEvolutionTest.cpp b/final/unittests/Analysis/ScalarEvolutionTest.cpp
new file mode 100644
index 00000000000..b7341603cf6
--- /dev/null
+++ b/final/unittests/Analysis/ScalarEvolutionTest.cpp
@@ -0,0 +1,82 @@
+//===- ScalarEvolutionsTest.cpp - ScalarEvolution unit tests --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <llvm/Analysis/ScalarEvolutionExpressions.h>
+#include <llvm/GlobalVariable.h>
+#include <llvm/Constants.h>
+#include <llvm/LLVMContext.h>
+#include <llvm/Module.h>
+#include <llvm/PassManager.h>
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace {
+
+TEST(ScalarEvolutionsTest, SCEVUnknownRAUW) {
+  LLVMContext Context;
+  Module M("world", Context);
+
+  const FunctionType *FTy = FunctionType::get(Type::getVoidTy(Context),
+                                              std::vector<const Type *>(), false);
+  Function *F = cast<Function>(M.getOrInsertFunction("f", FTy));
+  BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
+  ReturnInst::Create(Context, 0, BB);
+
+  const Type *Ty = Type::getInt1Ty(Context);
+  Constant *Init = Constant::getNullValue(Ty);
+  Value *V0 = new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage, Init, "V0");
+  Value *V1 = new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage, Init, "V1");
+  Value *V2 = new GlobalVariable(M, Ty, false, GlobalValue::ExternalLinkage, Init, "V2");
+
+  // Create a ScalarEvolution and "run" it so that it gets initialized.
+  PassManager PM;
+  ScalarEvolution &SE = *new ScalarEvolution();
+  PM.add(&SE);
+  PM.run(M);
+
+  const SCEV *S0 = SE.getSCEV(V0);
+  const SCEV *S1 = SE.getSCEV(V1);
+  const SCEV *S2 = SE.getSCEV(V2);
+
+  const SCEV *P0 = SE.getAddExpr(S0, S0);
+  const SCEV *P1 = SE.getAddExpr(S1, S1);
+  const SCEV *P2 = SE.getAddExpr(S2, S2);
+
+  const SCEVMulExpr *M0 = cast<SCEVMulExpr>(P0);
+  const SCEVMulExpr *M1 = cast<SCEVMulExpr>(P1);
+  const SCEVMulExpr *M2 = cast<SCEVMulExpr>(P2);
+
+  EXPECT_EQ(cast<SCEVConstant>(M0->getOperand(0))->getValue()->getZExtValue(),
+            2u);
+  EXPECT_EQ(cast<SCEVConstant>(M1->getOperand(0))->getValue()->getZExtValue(),
+            2u);
+  EXPECT_EQ(cast<SCEVConstant>(M2->getOperand(0))->getValue()->getZExtValue(),
+            2u);
+
+  // Before the RAUWs, these are all pointing to separate values.
+  EXPECT_EQ(cast<SCEVUnknown>(M0->getOperand(1))->getValue(), V0);
+  EXPECT_EQ(cast<SCEVUnknown>(M1->getOperand(1))->getValue(), V1);
+  EXPECT_EQ(cast<SCEVUnknown>(M2->getOperand(1))->getValue(), V2);
+
+  // Do some RAUWs.
+  V2->replaceAllUsesWith(V1);
+  V1->replaceAllUsesWith(V0);
+
+  // After the RAUWs, these should all be pointing to V0.
+  EXPECT_EQ(cast<SCEVUnknown>(M0->getOperand(1))->getValue(), V0);
+  EXPECT_EQ(cast<SCEVUnknown>(M1->getOperand(1))->getValue(), V0);
+  EXPECT_EQ(cast<SCEVUnknown>(M2->getOperand(1))->getValue(), V0);
+
+  // Manually clean up, since we allocated new SCEV objects after the
+  // pass was finished.
+  SE.releaseMemory();
+}
+
+}  // end anonymous namespace
+}  // end namespace llvm
diff --git a/final/unittests/CMakeLists.txt b/final/unittests/CMakeLists.txt
new file mode 100644
index 00000000000..da4a6524bd9
--- /dev/null
+++ b/final/unittests/CMakeLists.txt
@@ -0,0 +1,144 @@
+function(add_llvm_unittest test_dirname)
+  string(REGEX MATCH "([^/]+)$" test_name ${test_dirname})
+  if (CMAKE_BUILD_TYPE)
+    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY
+      ${LLVM_BINARY_DIR}/unittests/${test_dirname}/${CMAKE_BUILD_TYPE})
+  else()
+    set(CMAKE_RUNTIME_OUTPUT_DIRECTORY
+      ${LLVM_BINARY_DIR}/unittests/${test_dirname})
+  endif()
+  if( NOT LLVM_BUILD_TESTS )
+    set(EXCLUDE_FROM_ALL ON)
+  endif()
+  add_llvm_executable(${test_name}Tests ${ARGN})
+  add_dependencies(UnitTests ${test_name}Tests)
+  set_target_properties(${test_name}Tests PROPERTIES FOLDER "Tests")
+endfunction()
+
+add_custom_target(UnitTests)
+set_target_properties(UnitTests PROPERTIES FOLDER "Tests")
+
+include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include)
+add_definitions(-DGTEST_HAS_RTTI=0)
+if( CMAKE_COMPILER_IS_GNUCXX )
+  llvm_replace_compiler_option(CMAKE_CXX_FLAGS "-frtti" "-fno-rtti")
+elseif( MSVC )
+  llvm_replace_compiler_option(CMAKE_CXX_FLAGS "/GR" "/GR-")
+endif()
+
+if (NOT LLVM_ENABLE_THREADS)
+  add_definitions(-DGTEST_HAS_PTHREAD=0)
+endif()
+
+if(SUPPORTS_NO_VARIADIC_MACROS_FLAG)
+  add_definitions("-Wno-variadic-macros")
+endif()
+
+set(LLVM_LINK_COMPONENTS
+  jit
+  interpreter
+  nativecodegen
+  BitWriter
+  BitReader
+  AsmParser
+  Core
+  Support
+  )
+
+set(LLVM_USED_LIBS
+  gtest
+  gtest_main
+  LLVMSupport # gtest needs it for raw_ostream.
+  )
+
+add_llvm_unittest(ADT
+  ADT/APFloatTest.cpp
+  ADT/APIntTest.cpp
+  ADT/BitVectorTest.cpp
+  ADT/DAGDeltaAlgorithmTest.cpp
+  ADT/DeltaAlgorithmTest.cpp
+  ADT/DenseMapTest.cpp
+  ADT/DenseSetTest.cpp
+  ADT/FoldingSet.cpp
+  ADT/ilistTest.cpp
+  ADT/ImmutableSetTest.cpp
+  ADT/IntEqClassesTest.cpp
+  ADT/IntervalMapTest.cpp
+  ADT/SmallBitVectorTest.cpp
+  ADT/SmallStringTest.cpp
+  ADT/SmallVectorTest.cpp
+  ADT/SparseBitVectorTest.cpp
+  ADT/StringMapTest.cpp
+  ADT/StringRefTest.cpp
+  ADT/TripleTest.cpp
+  ADT/TwineTest.cpp
+ )
+
+add_llvm_unittest(Analysis
+  Analysis/ScalarEvolutionTest.cpp
+  )
+
+add_llvm_unittest(ExecutionEngine
+  ExecutionEngine/ExecutionEngineTest.cpp
+  )
+
+set(JITTestsSources
+  ExecutionEngine/JIT/JITEventListenerTest.cpp
+  ExecutionEngine/JIT/JITMemoryManagerTest.cpp
+  ExecutionEngine/JIT/JITTest.cpp
+  ExecutionEngine/JIT/MultiJITTest.cpp
+  )
+
+if(MSVC)
+  list(APPEND JITTestsSources ExecutionEngine/JIT/JITTests.def)
+endif()
+
+add_llvm_unittest(ExecutionEngine/JIT ${JITTestsSources})
+
+if(MINGW)
+  set_property(TARGET JITTests PROPERTY LINK_FLAGS -Wl,--export-all-symbols)
+endif()
+
+add_llvm_unittest(Transforms/Utils
+  Transforms/Utils/Cloning.cpp
+  )
+
+set(VMCoreSources
+  VMCore/ConstantsTest.cpp
+  VMCore/DerivedTypesTest.cpp
+  VMCore/InstructionsTest.cpp
+  VMCore/MetadataTest.cpp
+  VMCore/PassManagerTest.cpp
+  VMCore/ValueMapTest.cpp
+  VMCore/VerifierTest.cpp
+  )
+
+# MSVC9 and 8 cannot compile ValueMapTest.cpp due to their bug.
+# See issue#331418 in Visual Studio.
+if(MSVC AND MSVC_VERSION LESS 1600)
+  list(REMOVE_ITEM VMCoreSources VMCore/ValueMapTest.cpp)
+endif()
+
+add_llvm_unittest(VMCore ${VMCoreSources})
+
+set(LLVM_LINK_COMPONENTS
+  Support
+  Core
+  )
+
+add_llvm_unittest(Support
+  Support/AllocatorTest.cpp
+  Support/Casting.cpp
+  Support/CommandLineTest.cpp
+  Support/ConstantRangeTest.cpp
+  Support/EndianTest.cpp
+  Support/LeakDetectorTest.cpp
+  Support/MathExtrasTest.cpp
+  Support/Path.cpp
+  Support/raw_ostream_test.cpp
+  Support/RegexTest.cpp
+  Support/SwapByteOrderTest.cpp
+  Support/TimeValue.cpp
+  Support/TypeBuilderTest.cpp
+  Support/ValueHandleTest.cpp
+  )
diff --git a/final/unittests/ExecutionEngine/ExecutionEngineTest.cpp b/final/unittests/ExecutionEngine/ExecutionEngineTest.cpp
new file mode 100644
index 00000000000..904ee2b6c49
--- /dev/null
+++ b/final/unittests/ExecutionEngine/ExecutionEngineTest.cpp
@@ -0,0 +1,129 @@
+//===- ExecutionEngineTest.cpp - Unit tests for ExecutionEngine -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+class ExecutionEngineTest : public testing::Test {
+protected:
+  ExecutionEngineTest()
+    : M(new Module("<main>", getGlobalContext())),
+      Engine(EngineBuilder(M).create()) {
+  }
+
+  virtual void SetUp() {
+    ASSERT_TRUE(Engine.get() != NULL);
+  }
+
+  GlobalVariable *NewExtGlobal(const Type *T, const Twine &Name) {
+    return new GlobalVariable(*M, T, false,  // Not constant.
+                              GlobalValue::ExternalLinkage, NULL, Name);
+  }
+
+  Module *const M;
+  const OwningPtr<ExecutionEngine> Engine;
+};
+
+TEST_F(ExecutionEngineTest, ForwardGlobalMapping) {
+  GlobalVariable *G1 =
+      NewExtGlobal(Type::getInt32Ty(getGlobalContext()), "Global1");
+  int32_t Mem1 = 3;
+  Engine->addGlobalMapping(G1, &Mem1);
+  EXPECT_EQ(&Mem1, Engine->getPointerToGlobalIfAvailable(G1));
+  int32_t Mem2 = 4;
+  Engine->updateGlobalMapping(G1, &Mem2);
+  EXPECT_EQ(&Mem2, Engine->getPointerToGlobalIfAvailable(G1));
+  Engine->updateGlobalMapping(G1, NULL);
+  EXPECT_EQ(NULL, Engine->getPointerToGlobalIfAvailable(G1));
+  Engine->updateGlobalMapping(G1, &Mem2);
+  EXPECT_EQ(&Mem2, Engine->getPointerToGlobalIfAvailable(G1));
+
+  GlobalVariable *G2 =
+      NewExtGlobal(Type::getInt32Ty(getGlobalContext()), "Global1");
+  EXPECT_EQ(NULL, Engine->getPointerToGlobalIfAvailable(G2))
+    << "The NULL return shouldn't depend on having called"
+    << " updateGlobalMapping(..., NULL)";
+  // Check that update...() can be called before add...().
+  Engine->updateGlobalMapping(G2, &Mem1);
+  EXPECT_EQ(&Mem1, Engine->getPointerToGlobalIfAvailable(G2));
+  EXPECT_EQ(&Mem2, Engine->getPointerToGlobalIfAvailable(G1))
+    << "A second mapping shouldn't affect the first.";
+}
+
+TEST_F(ExecutionEngineTest, ReverseGlobalMapping) {
+  GlobalVariable *G1 =
+      NewExtGlobal(Type::getInt32Ty(getGlobalContext()), "Global1");
+
+  int32_t Mem1 = 3;
+  Engine->addGlobalMapping(G1, &Mem1);
+  EXPECT_EQ(G1, Engine->getGlobalValueAtAddress(&Mem1));
+  int32_t Mem2 = 4;
+  Engine->updateGlobalMapping(G1, &Mem2);
+  EXPECT_EQ(NULL, Engine->getGlobalValueAtAddress(&Mem1));
+  EXPECT_EQ(G1, Engine->getGlobalValueAtAddress(&Mem2));
+
+  GlobalVariable *G2 =
+      NewExtGlobal(Type::getInt32Ty(getGlobalContext()), "Global2");
+  Engine->updateGlobalMapping(G2, &Mem1);
+  EXPECT_EQ(G2, Engine->getGlobalValueAtAddress(&Mem1));
+  EXPECT_EQ(G1, Engine->getGlobalValueAtAddress(&Mem2));
+  Engine->updateGlobalMapping(G1, NULL);
+  EXPECT_EQ(G2, Engine->getGlobalValueAtAddress(&Mem1))
+    << "Removing one mapping doesn't affect a different one.";
+  EXPECT_EQ(NULL, Engine->getGlobalValueAtAddress(&Mem2));
+  Engine->updateGlobalMapping(G2, &Mem2);
+  EXPECT_EQ(NULL, Engine->getGlobalValueAtAddress(&Mem1));
+  EXPECT_EQ(G2, Engine->getGlobalValueAtAddress(&Mem2))
+    << "Once a mapping is removed, we can point another GV at the"
+    << " now-free address.";
+}
+
+TEST_F(ExecutionEngineTest, ClearModuleMappings) {
+  GlobalVariable *G1 =
+      NewExtGlobal(Type::getInt32Ty(getGlobalContext()), "Global1");
+
+  int32_t Mem1 = 3;
+  Engine->addGlobalMapping(G1, &Mem1);
+  EXPECT_EQ(G1, Engine->getGlobalValueAtAddress(&Mem1));
+
+  Engine->clearGlobalMappingsFromModule(M);
+
+  EXPECT_EQ(NULL, Engine->getGlobalValueAtAddress(&Mem1));
+
+  GlobalVariable *G2 =
+      NewExtGlobal(Type::getInt32Ty(getGlobalContext()), "Global2");
+  // After clearing the module mappings, we can assign a new GV to the
+  // same address.
+  Engine->addGlobalMapping(G2, &Mem1);
+  EXPECT_EQ(G2, Engine->getGlobalValueAtAddress(&Mem1));
+}
+
+TEST_F(ExecutionEngineTest, DestructionRemovesGlobalMapping) {
+  GlobalVariable *G1 =
+    NewExtGlobal(Type::getInt32Ty(getGlobalContext()), "Global1");
+  int32_t Mem1 = 3;
+  Engine->addGlobalMapping(G1, &Mem1);
+  // Make sure the reverse mapping is enabled.
+  EXPECT_EQ(G1, Engine->getGlobalValueAtAddress(&Mem1));
+  // When the GV goes away, the ExecutionEngine should remove any
+  // mappings that refer to it.
+  G1->eraseFromParent();
+  EXPECT_EQ(NULL, Engine->getGlobalValueAtAddress(&Mem1));
+}
+
+}
diff --git a/final/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp b/final/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
new file mode 100644
index 00000000000..a36ec3bf2a1
--- /dev/null
+++ b/final/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
@@ -0,0 +1,238 @@
+//===- JITEventListenerTest.cpp - Unit tests for JITEventListeners --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITEventListener.h"
+
+#include "llvm/LLVMContext.h"
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Target/TargetSelect.h"
+#include "gtest/gtest.h"
+#include <vector>
+
+using namespace llvm;
+
+int dummy;
+
+namespace {
+
+struct FunctionEmittedEvent {
+  // Indices are local to the RecordingJITEventListener, since the
+  // JITEventListener interface makes no guarantees about the order of
+  // calls between Listeners.
+  unsigned Index;
+  const Function *F;
+  void *Code;
+  size_t Size;
+  JITEvent_EmittedFunctionDetails Details;
+};
+struct FunctionFreedEvent {
+  unsigned Index;
+  void *Code;
+};
+
+struct RecordingJITEventListener : public JITEventListener {
+  std::vector<FunctionEmittedEvent> EmittedEvents;
+  std::vector<FunctionFreedEvent> FreedEvents;
+
+  int NextIndex;
+
+  RecordingJITEventListener() : NextIndex(0) {}
+
+  virtual void NotifyFunctionEmitted(const Function &F,
+                                     void *Code, size_t Size,
+                                     const EmittedFunctionDetails &Details) {
+    FunctionEmittedEvent Event = {NextIndex++, &F, Code, Size, Details};
+    EmittedEvents.push_back(Event);
+  }
+
+  virtual void NotifyFreeingMachineCode(void *OldPtr) {
+    FunctionFreedEvent Event = {NextIndex++, OldPtr};
+    FreedEvents.push_back(Event);
+  }
+};
+
+class JITEventListenerTest : public testing::Test {
+ protected:
+  JITEventListenerTest()
+      : M(new Module("module", getGlobalContext())),
+        EE(EngineBuilder(M)
+           .setEngineKind(EngineKind::JIT)
+           .create()) {
+  }
+
+  Module *M;
+  const OwningPtr<ExecutionEngine> EE;
+};
+
+Function *buildFunction(Module *M) {
+  Function *Result = Function::Create(
+      TypeBuilder<int32_t(int32_t), false>::get(getGlobalContext()),
+      GlobalValue::ExternalLinkage, "id", M);
+  Value *Arg = Result->arg_begin();
+  BasicBlock *BB = BasicBlock::Create(M->getContext(), "entry", Result);
+  ReturnInst::Create(M->getContext(), Arg, BB);
+  return Result;
+}
+
+// Tests that a single JITEventListener follows JIT events accurately.
+TEST_F(JITEventListenerTest, Simple) {
+  RecordingJITEventListener Listener;
+  EE->RegisterJITEventListener(&Listener);
+  Function *F1 = buildFunction(M);
+  Function *F2 = buildFunction(M);
+
+  void *F1_addr = EE->getPointerToFunction(F1);
+  void *F2_addr = EE->getPointerToFunction(F2);
+  EE->getPointerToFunction(F1);  // Should do nothing.
+  EE->freeMachineCodeForFunction(F1);
+  EE->freeMachineCodeForFunction(F2);
+
+  ASSERT_EQ(2U, Listener.EmittedEvents.size());
+  ASSERT_EQ(2U, Listener.FreedEvents.size());
+
+  EXPECT_EQ(0U, Listener.EmittedEvents[0].Index);
+  EXPECT_EQ(F1, Listener.EmittedEvents[0].F);
+  EXPECT_EQ(F1_addr, Listener.EmittedEvents[0].Code);
+  EXPECT_LT(0U, Listener.EmittedEvents[0].Size)
+      << "We don't know how big the function will be, but it had better"
+      << " contain some bytes.";
+
+  EXPECT_EQ(1U, Listener.EmittedEvents[1].Index);
+  EXPECT_EQ(F2, Listener.EmittedEvents[1].F);
+  EXPECT_EQ(F2_addr, Listener.EmittedEvents[1].Code);
+  EXPECT_LT(0U, Listener.EmittedEvents[1].Size)
+      << "We don't know how big the function will be, but it had better"
+      << " contain some bytes.";
+
+  EXPECT_EQ(2U, Listener.FreedEvents[0].Index);
+  EXPECT_EQ(F1_addr, Listener.FreedEvents[0].Code);
+
+  EXPECT_EQ(3U, Listener.FreedEvents[1].Index);
+  EXPECT_EQ(F2_addr, Listener.FreedEvents[1].Code);
+
+  F1->eraseFromParent();
+  F2->eraseFromParent();
+}
+
+// Tests that a single JITEventListener follows JIT events accurately.
+TEST_F(JITEventListenerTest, MultipleListenersDontInterfere) {
+  RecordingJITEventListener Listener1;
+  RecordingJITEventListener Listener2;
+  RecordingJITEventListener Listener3;
+  Function *F1 = buildFunction(M);
+  Function *F2 = buildFunction(M);
+
+  EE->RegisterJITEventListener(&Listener1);
+  EE->RegisterJITEventListener(&Listener2);
+  void *F1_addr = EE->getPointerToFunction(F1);
+  EE->RegisterJITEventListener(&Listener3);
+  EE->UnregisterJITEventListener(&Listener1);
+  void *F2_addr = EE->getPointerToFunction(F2);
+  EE->UnregisterJITEventListener(&Listener2);
+  EE->UnregisterJITEventListener(&Listener3);
+  EE->freeMachineCodeForFunction(F1);
+  EE->RegisterJITEventListener(&Listener2);
+  EE->RegisterJITEventListener(&Listener3);
+  EE->RegisterJITEventListener(&Listener1);
+  EE->freeMachineCodeForFunction(F2);
+  EE->UnregisterJITEventListener(&Listener1);
+  EE->UnregisterJITEventListener(&Listener2);
+  EE->UnregisterJITEventListener(&Listener3);
+
+  // Listener 1.
+  ASSERT_EQ(1U, Listener1.EmittedEvents.size());
+  ASSERT_EQ(1U, Listener1.FreedEvents.size());
+
+  EXPECT_EQ(0U, Listener1.EmittedEvents[0].Index);
+  EXPECT_EQ(F1, Listener1.EmittedEvents[0].F);
+  EXPECT_EQ(F1_addr, Listener1.EmittedEvents[0].Code);
+  EXPECT_LT(0U, Listener1.EmittedEvents[0].Size)
+      << "We don't know how big the function will be, but it had better"
+      << " contain some bytes.";
+
+  EXPECT_EQ(1U, Listener1.FreedEvents[0].Index);
+  EXPECT_EQ(F2_addr, Listener1.FreedEvents[0].Code);
+
+  // Listener 2.
+  ASSERT_EQ(2U, Listener2.EmittedEvents.size());
+  ASSERT_EQ(1U, Listener2.FreedEvents.size());
+
+  EXPECT_EQ(0U, Listener2.EmittedEvents[0].Index);
+  EXPECT_EQ(F1, Listener2.EmittedEvents[0].F);
+  EXPECT_EQ(F1_addr, Listener2.EmittedEvents[0].Code);
+  EXPECT_LT(0U, Listener2.EmittedEvents[0].Size)
+      << "We don't know how big the function will be, but it had better"
+      << " contain some bytes.";
+
+  EXPECT_EQ(1U, Listener2.EmittedEvents[1].Index);
+  EXPECT_EQ(F2, Listener2.EmittedEvents[1].F);
+  EXPECT_EQ(F2_addr, Listener2.EmittedEvents[1].Code);
+  EXPECT_LT(0U, Listener2.EmittedEvents[1].Size)
+      << "We don't know how big the function will be, but it had better"
+      << " contain some bytes.";
+
+  EXPECT_EQ(2U, Listener2.FreedEvents[0].Index);
+  EXPECT_EQ(F2_addr, Listener2.FreedEvents[0].Code);
+
+  // Listener 3.
+  ASSERT_EQ(1U, Listener3.EmittedEvents.size());
+  ASSERT_EQ(1U, Listener3.FreedEvents.size());
+
+  EXPECT_EQ(0U, Listener3.EmittedEvents[0].Index);
+  EXPECT_EQ(F2, Listener3.EmittedEvents[0].F);
+  EXPECT_EQ(F2_addr, Listener3.EmittedEvents[0].Code);
+  EXPECT_LT(0U, Listener3.EmittedEvents[0].Size)
+      << "We don't know how big the function will be, but it had better"
+      << " contain some bytes.";
+
+  EXPECT_EQ(1U, Listener3.FreedEvents[0].Index);
+  EXPECT_EQ(F2_addr, Listener3.FreedEvents[0].Code);
+
+  F1->eraseFromParent();
+  F2->eraseFromParent();
+}
+
+TEST_F(JITEventListenerTest, MatchesMachineCodeInfo) {
+  RecordingJITEventListener Listener;
+  MachineCodeInfo MCI;
+  Function *F = buildFunction(M);
+
+  EE->RegisterJITEventListener(&Listener);
+  EE->runJITOnFunction(F, &MCI);
+  void *F_addr = EE->getPointerToFunction(F);
+  EE->freeMachineCodeForFunction(F);
+
+  ASSERT_EQ(1U, Listener.EmittedEvents.size());
+  ASSERT_EQ(1U, Listener.FreedEvents.size());
+
+  EXPECT_EQ(0U, Listener.EmittedEvents[0].Index);
+  EXPECT_EQ(F, Listener.EmittedEvents[0].F);
+  EXPECT_EQ(F_addr, Listener.EmittedEvents[0].Code);
+  EXPECT_EQ(MCI.address(), Listener.EmittedEvents[0].Code);
+  EXPECT_EQ(MCI.size(), Listener.EmittedEvents[0].Size);
+
+  EXPECT_EQ(1U, Listener.FreedEvents[0].Index);
+  EXPECT_EQ(F_addr, Listener.FreedEvents[0].Code);
+}
+
+class JITEnvironment : public testing::Environment {
+  virtual void SetUp() {
+    // Required to create a JIT.
+    InitializeNativeTarget();
+  }
+};
+testing::Environment* const jit_env =
+  testing::AddGlobalTestEnvironment(new JITEnvironment);
+
+}  // anonymous namespace
diff --git a/final/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp b/final/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
new file mode 100644
index 00000000000..ff5af3b72d4
--- /dev/null
+++ b/final/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
@@ -0,0 +1,279 @@
+//===- JITMemoryManagerTest.cpp - Unit tests for the JIT memory manager ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/LLVMContext.h"
+
+using namespace llvm;
+
+namespace {
+
+Function *makeFakeFunction() {
+  std::vector<const Type*> params;
+  const FunctionType *FTy =
+      FunctionType::get(Type::getVoidTy(getGlobalContext()), params, false);
+  return Function::Create(FTy, GlobalValue::ExternalLinkage);
+}
+
+// Allocate three simple functions that fit in the initial slab.  This exercises
+// the code in the case that we don't have to allocate more memory to store the
+// function bodies.
+TEST(JITMemoryManagerTest, NoAllocations) {
+  OwningPtr<JITMemoryManager> MemMgr(
+      JITMemoryManager::CreateDefaultMemManager());
+  uintptr_t size;
+  std::string Error;
+
+  // Allocate the functions.
+  OwningPtr<Function> F1(makeFakeFunction());
+  size = 1024;
+  uint8_t *FunctionBody1 = MemMgr->startFunctionBody(F1.get(), size);
+  memset(FunctionBody1, 0xFF, 1024);
+  MemMgr->endFunctionBody(F1.get(), FunctionBody1, FunctionBody1 + 1024);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  OwningPtr<Function> F2(makeFakeFunction());
+  size = 1024;
+  uint8_t *FunctionBody2 = MemMgr->startFunctionBody(F2.get(), size);
+  memset(FunctionBody2, 0xFF, 1024);
+  MemMgr->endFunctionBody(F2.get(), FunctionBody2, FunctionBody2 + 1024);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  OwningPtr<Function> F3(makeFakeFunction());
+  size = 1024;
+  uint8_t *FunctionBody3 = MemMgr->startFunctionBody(F3.get(), size);
+  memset(FunctionBody3, 0xFF, 1024);
+  MemMgr->endFunctionBody(F3.get(), FunctionBody3, FunctionBody3 + 1024);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  // Deallocate them out of order, in case that matters.
+  MemMgr->deallocateFunctionBody(FunctionBody2);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+  MemMgr->deallocateFunctionBody(FunctionBody1);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+  MemMgr->deallocateFunctionBody(FunctionBody3);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+}
+
+// Make three large functions that take up most of the space in the slab.  Then
+// try allocating three smaller functions that don't require additional slabs.
+TEST(JITMemoryManagerTest, TestCodeAllocation) {
+  OwningPtr<JITMemoryManager> MemMgr(
+      JITMemoryManager::CreateDefaultMemManager());
+  uintptr_t size;
+  std::string Error;
+
+  // Big functions are a little less than the largest block size.
+  const uintptr_t smallFuncSize = 1024;
+  const uintptr_t bigFuncSize = (MemMgr->GetDefaultCodeSlabSize() -
+                                 smallFuncSize * 2);
+
+  // Allocate big functions
+  OwningPtr<Function> F1(makeFakeFunction());
+  size = bigFuncSize;
+  uint8_t *FunctionBody1 = MemMgr->startFunctionBody(F1.get(), size);
+  ASSERT_LE(bigFuncSize, size);
+  memset(FunctionBody1, 0xFF, bigFuncSize);
+  MemMgr->endFunctionBody(F1.get(), FunctionBody1, FunctionBody1 + bigFuncSize);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  OwningPtr<Function> F2(makeFakeFunction());
+  size = bigFuncSize;
+  uint8_t *FunctionBody2 = MemMgr->startFunctionBody(F2.get(), size);
+  ASSERT_LE(bigFuncSize, size);
+  memset(FunctionBody2, 0xFF, bigFuncSize);
+  MemMgr->endFunctionBody(F2.get(), FunctionBody2, FunctionBody2 + bigFuncSize);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  OwningPtr<Function> F3(makeFakeFunction());
+  size = bigFuncSize;
+  uint8_t *FunctionBody3 = MemMgr->startFunctionBody(F3.get(), size);
+  ASSERT_LE(bigFuncSize, size);
+  memset(FunctionBody3, 0xFF, bigFuncSize);
+  MemMgr->endFunctionBody(F3.get(), FunctionBody3, FunctionBody3 + bigFuncSize);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  // Check that each large function took it's own slab.
+  EXPECT_EQ(3U, MemMgr->GetNumCodeSlabs());
+
+  // Allocate small functions
+  OwningPtr<Function> F4(makeFakeFunction());
+  size = smallFuncSize;
+  uint8_t *FunctionBody4 = MemMgr->startFunctionBody(F4.get(), size);
+  ASSERT_LE(smallFuncSize, size);
+  memset(FunctionBody4, 0xFF, smallFuncSize);
+  MemMgr->endFunctionBody(F4.get(), FunctionBody4,
+                          FunctionBody4 + smallFuncSize);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  OwningPtr<Function> F5(makeFakeFunction());
+  size = smallFuncSize;
+  uint8_t *FunctionBody5 = MemMgr->startFunctionBody(F5.get(), size);
+  ASSERT_LE(smallFuncSize, size);
+  memset(FunctionBody5, 0xFF, smallFuncSize);
+  MemMgr->endFunctionBody(F5.get(), FunctionBody5,
+                          FunctionBody5 + smallFuncSize);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  OwningPtr<Function> F6(makeFakeFunction());
+  size = smallFuncSize;
+  uint8_t *FunctionBody6 = MemMgr->startFunctionBody(F6.get(), size);
+  ASSERT_LE(smallFuncSize, size);
+  memset(FunctionBody6, 0xFF, smallFuncSize);
+  MemMgr->endFunctionBody(F6.get(), FunctionBody6,
+                          FunctionBody6 + smallFuncSize);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+
+  // Check that the small functions didn't allocate any new slabs.
+  EXPECT_EQ(3U, MemMgr->GetNumCodeSlabs());
+
+  // Deallocate them out of order, in case that matters.
+  MemMgr->deallocateFunctionBody(FunctionBody2);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+  MemMgr->deallocateFunctionBody(FunctionBody1);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+  MemMgr->deallocateFunctionBody(FunctionBody4);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+  MemMgr->deallocateFunctionBody(FunctionBody3);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+  MemMgr->deallocateFunctionBody(FunctionBody5);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+  MemMgr->deallocateFunctionBody(FunctionBody6);
+  EXPECT_TRUE(MemMgr->CheckInvariants(Error)) << Error;
+}
+
+// Allocate five global ints of varying widths and alignment, and check their
+// alignment and overlap.
+TEST(JITMemoryManagerTest, TestSmallGlobalInts) {
+  OwningPtr<JITMemoryManager> MemMgr(
+      JITMemoryManager::CreateDefaultMemManager());
+  uint8_t  *a = (uint8_t *)MemMgr->allocateGlobal(8,  0);
+  uint16_t *b = (uint16_t*)MemMgr->allocateGlobal(16, 2);
+  uint32_t *c = (uint32_t*)MemMgr->allocateGlobal(32, 4);
+  uint64_t *d = (uint64_t*)MemMgr->allocateGlobal(64, 8);
+
+  // Check the alignment.
+  EXPECT_EQ(0U, ((uintptr_t)b) & 0x1);
+  EXPECT_EQ(0U, ((uintptr_t)c) & 0x3);
+  EXPECT_EQ(0U, ((uintptr_t)d) & 0x7);
+
+  // Initialize them each one at a time and make sure they don't overlap.
+  *a = 0xff;
+  *b = 0U;
+  *c = 0U;
+  *d = 0U;
+  EXPECT_EQ(0xffU, *a);
+  EXPECT_EQ(0U, *b);
+  EXPECT_EQ(0U, *c);
+  EXPECT_EQ(0U, *d);
+  *a = 0U;
+  *b = 0xffffU;
+  EXPECT_EQ(0U, *a);
+  EXPECT_EQ(0xffffU, *b);
+  EXPECT_EQ(0U, *c);
+  EXPECT_EQ(0U, *d);
+  *b = 0U;
+  *c = 0xffffffffU;
+  EXPECT_EQ(0U, *a);
+  EXPECT_EQ(0U, *b);
+  EXPECT_EQ(0xffffffffU, *c);
+  EXPECT_EQ(0U, *d);
+  *c = 0U;
+  *d = 0xffffffffffffffffULL;
+  EXPECT_EQ(0U, *a);
+  EXPECT_EQ(0U, *b);
+  EXPECT_EQ(0U, *c);
+  EXPECT_EQ(0xffffffffffffffffULL, *d);
+
+  // Make sure we didn't allocate any extra slabs for this tiny amount of data.
+  EXPECT_EQ(1U, MemMgr->GetNumDataSlabs());
+}
+
+// Allocate a small global, a big global, and a third global, and make sure we
+// only use two slabs for that.
+TEST(JITMemoryManagerTest, TestLargeGlobalArray) {
+  OwningPtr<JITMemoryManager> MemMgr(
+      JITMemoryManager::CreateDefaultMemManager());
+  size_t Size = 4 * MemMgr->GetDefaultDataSlabSize();
+  uint64_t *a = (uint64_t*)MemMgr->allocateGlobal(64, 8);
+  uint8_t *g = MemMgr->allocateGlobal(Size, 8);
+  uint64_t *b = (uint64_t*)MemMgr->allocateGlobal(64, 8);
+
+  // Check the alignment.
+  EXPECT_EQ(0U, ((uintptr_t)a) & 0x7);
+  EXPECT_EQ(0U, ((uintptr_t)g) & 0x7);
+  EXPECT_EQ(0U, ((uintptr_t)b) & 0x7);
+
+  // Initialize them to make sure we don't segfault and make sure they don't
+  // overlap.
+  memset(a, 0x1, 8);
+  memset(g, 0x2, Size);
+  memset(b, 0x3, 8);
+  EXPECT_EQ(0x0101010101010101ULL, *a);
+  // Just check the edges.
+  EXPECT_EQ(0x02U, g[0]);
+  EXPECT_EQ(0x02U, g[Size - 1]);
+  EXPECT_EQ(0x0303030303030303ULL, *b);
+
+  // Check the number of slabs.
+  EXPECT_EQ(2U, MemMgr->GetNumDataSlabs());
+}
+
+// Allocate lots of medium globals so that we can test moving the bump allocator
+// to a new slab.
+TEST(JITMemoryManagerTest, TestManyGlobals) {
+  OwningPtr<JITMemoryManager> MemMgr(
+      JITMemoryManager::CreateDefaultMemManager());
+  size_t SlabSize = MemMgr->GetDefaultDataSlabSize();
+  size_t Size = 128;
+  int Iters = (SlabSize / Size) + 1;
+
+  // We should start with no slabs.
+  EXPECT_EQ(0U, MemMgr->GetNumDataSlabs());
+
+  // After allocating a bunch of globals, we should have two.
+  for (int I = 0; I < Iters; ++I)
+    MemMgr->allocateGlobal(Size, 8);
+  EXPECT_EQ(2U, MemMgr->GetNumDataSlabs());
+
+  // And after much more, we should have three.
+  for (int I = 0; I < Iters; ++I)
+    MemMgr->allocateGlobal(Size, 8);
+  EXPECT_EQ(3U, MemMgr->GetNumDataSlabs());
+}
+
+// Allocate lots of function stubs so that we can test moving the stub bump
+// allocator to a new slab.
+TEST(JITMemoryManagerTest, TestManyStubs) {
+  OwningPtr<JITMemoryManager> MemMgr(
+      JITMemoryManager::CreateDefaultMemManager());
+  size_t SlabSize = MemMgr->GetDefaultStubSlabSize();
+  size_t Size = 128;
+  int Iters = (SlabSize / Size) + 1;
+
+  // We should start with no slabs.
+  EXPECT_EQ(0U, MemMgr->GetNumDataSlabs());
+
+  // After allocating a bunch of stubs, we should have two.
+  for (int I = 0; I < Iters; ++I)
+    MemMgr->allocateStub(NULL, Size, 8);
+  EXPECT_EQ(2U, MemMgr->GetNumStubSlabs());
+
+  // And after much more, we should have three.
+  for (int I = 0; I < Iters; ++I)
+    MemMgr->allocateStub(NULL, Size, 8);
+  EXPECT_EQ(3U, MemMgr->GetNumStubSlabs());
+}
+
+}
diff --git a/final/unittests/ExecutionEngine/JIT/JITTest.cpp b/final/unittests/ExecutionEngine/JIT/JITTest.cpp
new file mode 100644
index 00000000000..ceacbbe62a4
--- /dev/null
+++ b/final/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -0,0 +1,779 @@
+//===- JITTest.cpp - Unit tests for the JIT -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Assembly/Parser.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Constant.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Target/TargetSelect.h"
+#include "llvm/Type.h"
+
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+
+Function *makeReturnGlobal(std::string Name, GlobalVariable *G, Module *M) {
+  std::vector<const Type*> params;
+  const FunctionType *FTy = FunctionType::get(G->getType()->getElementType(),
+                                              params, false);
+  Function *F = Function::Create(FTy, GlobalValue::ExternalLinkage, Name, M);
+  BasicBlock *Entry = BasicBlock::Create(M->getContext(), "entry", F);
+  IRBuilder<> builder(Entry);
+  Value *Load = builder.CreateLoad(G);
+  const Type *GTy = G->getType()->getElementType();
+  Value *Add = builder.CreateAdd(Load, ConstantInt::get(GTy, 1LL));
+  builder.CreateStore(Add, G);
+  builder.CreateRet(Add);
+  return F;
+}
+
+std::string DumpFunction(const Function *F) {
+  std::string Result;
+  raw_string_ostream(Result) << "" << *F;
+  return Result;
+}
+
+class RecordingJITMemoryManager : public JITMemoryManager {
+  const OwningPtr<JITMemoryManager> Base;
+public:
+  RecordingJITMemoryManager()
+    : Base(JITMemoryManager::CreateDefaultMemManager()) {
+    stubsAllocated = 0;
+  }
+
+  virtual void setMemoryWritable() { Base->setMemoryWritable(); }
+  virtual void setMemoryExecutable() { Base->setMemoryExecutable(); }
+  virtual void setPoisonMemory(bool poison) { Base->setPoisonMemory(poison); }
+  virtual void AllocateGOT() { Base->AllocateGOT(); }
+  virtual uint8_t *getGOTBase() const { return Base->getGOTBase(); }
+  struct StartFunctionBodyCall {
+    StartFunctionBodyCall(uint8_t *Result, const Function *F,
+                          uintptr_t ActualSize, uintptr_t ActualSizeResult)
+      : Result(Result), F(F), F_dump(DumpFunction(F)),
+        ActualSize(ActualSize), ActualSizeResult(ActualSizeResult) {}
+    uint8_t *Result;
+    const Function *F;
+    std::string F_dump;
+    uintptr_t ActualSize;
+    uintptr_t ActualSizeResult;
+  };
+  std::vector<StartFunctionBodyCall> startFunctionBodyCalls;
+  virtual uint8_t *startFunctionBody(const Function *F,
+                                     uintptr_t &ActualSize) {
+    uintptr_t InitialActualSize = ActualSize;
+    uint8_t *Result = Base->startFunctionBody(F, ActualSize);
+    startFunctionBodyCalls.push_back(
+      StartFunctionBodyCall(Result, F, InitialActualSize, ActualSize));
+    return Result;
+  }
+  int stubsAllocated;
+  virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
+                                unsigned Alignment) {
+    stubsAllocated++;
+    return Base->allocateStub(F, StubSize, Alignment);
+  }
+  struct EndFunctionBodyCall {
+    EndFunctionBodyCall(const Function *F, uint8_t *FunctionStart,
+                        uint8_t *FunctionEnd)
+      : F(F), F_dump(DumpFunction(F)),
+        FunctionStart(FunctionStart), FunctionEnd(FunctionEnd) {}
+    const Function *F;
+    std::string F_dump;
+    uint8_t *FunctionStart;
+    uint8_t *FunctionEnd;
+  };
+  std::vector<EndFunctionBodyCall> endFunctionBodyCalls;
+  virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart,
+                               uint8_t *FunctionEnd) {
+    endFunctionBodyCalls.push_back(
+      EndFunctionBodyCall(F, FunctionStart, FunctionEnd));
+    Base->endFunctionBody(F, FunctionStart, FunctionEnd);
+  }
+  virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
+    return Base->allocateSpace(Size, Alignment);
+  }
+  virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
+    return Base->allocateGlobal(Size, Alignment);
+  }
+  struct DeallocateFunctionBodyCall {
+    DeallocateFunctionBodyCall(const void *Body) : Body(Body) {}
+    const void *Body;
+  };
+  std::vector<DeallocateFunctionBodyCall> deallocateFunctionBodyCalls;
+  virtual void deallocateFunctionBody(void *Body) {
+    deallocateFunctionBodyCalls.push_back(DeallocateFunctionBodyCall(Body));
+    Base->deallocateFunctionBody(Body);
+  }
+  struct DeallocateExceptionTableCall {
+    DeallocateExceptionTableCall(const void *ET) : ET(ET) {}
+    const void *ET;
+  };
+  std::vector<DeallocateExceptionTableCall> deallocateExceptionTableCalls;
+  virtual void deallocateExceptionTable(void *ET) {
+    deallocateExceptionTableCalls.push_back(DeallocateExceptionTableCall(ET));
+    Base->deallocateExceptionTable(ET);
+  }
+  struct StartExceptionTableCall {
+    StartExceptionTableCall(uint8_t *Result, const Function *F,
+                            uintptr_t ActualSize, uintptr_t ActualSizeResult)
+      : Result(Result), F(F), F_dump(DumpFunction(F)),
+        ActualSize(ActualSize), ActualSizeResult(ActualSizeResult) {}
+    uint8_t *Result;
+    const Function *F;
+    std::string F_dump;
+    uintptr_t ActualSize;
+    uintptr_t ActualSizeResult;
+  };
+  std::vector<StartExceptionTableCall> startExceptionTableCalls;
+  virtual uint8_t* startExceptionTable(const Function* F,
+                                       uintptr_t &ActualSize) {
+    uintptr_t InitialActualSize = ActualSize;
+    uint8_t *Result = Base->startExceptionTable(F, ActualSize);
+    startExceptionTableCalls.push_back(
+      StartExceptionTableCall(Result, F, InitialActualSize, ActualSize));
+    return Result;
+  }
+  struct EndExceptionTableCall {
+    EndExceptionTableCall(const Function *F, uint8_t *TableStart,
+                          uint8_t *TableEnd, uint8_t* FrameRegister)
+      : F(F), F_dump(DumpFunction(F)),
+        TableStart(TableStart), TableEnd(TableEnd),
+        FrameRegister(FrameRegister) {}
+    const Function *F;
+    std::string F_dump;
+    uint8_t *TableStart;
+    uint8_t *TableEnd;
+    uint8_t *FrameRegister;
+  };
+  std::vector<EndExceptionTableCall> endExceptionTableCalls;
+  virtual void endExceptionTable(const Function *F, uint8_t *TableStart,
+                                 uint8_t *TableEnd, uint8_t* FrameRegister) {
+      endExceptionTableCalls.push_back(
+          EndExceptionTableCall(F, TableStart, TableEnd, FrameRegister));
+    return Base->endExceptionTable(F, TableStart, TableEnd, FrameRegister);
+  }
+};
+
+bool LoadAssemblyInto(Module *M, const char *assembly) {
+  SMDiagnostic Error;
+  bool success =
+    NULL != ParseAssemblyString(assembly, M, Error, M->getContext());
+  std::string errMsg;
+  raw_string_ostream os(errMsg);
+  Error.Print("", os);
+  EXPECT_TRUE(success) << os.str();
+  return success;
+}
+
+class JITTest : public testing::Test {
+ protected:
+  virtual void SetUp() {
+    M = new Module("<main>", Context);
+    RJMM = new RecordingJITMemoryManager;
+    RJMM->setPoisonMemory(true);
+    std::string Error;
+    TheJIT.reset(EngineBuilder(M).setEngineKind(EngineKind::JIT)
+                 .setJITMemoryManager(RJMM)
+                 .setErrorStr(&Error).create());
+    ASSERT_TRUE(TheJIT.get() != NULL) << Error;
+  }
+
+  void LoadAssembly(const char *assembly) {
+    LoadAssemblyInto(M, assembly);
+  }
+
+  LLVMContext Context;
+  Module *M;  // Owned by ExecutionEngine.
+  RecordingJITMemoryManager *RJMM;
+  OwningPtr<ExecutionEngine> TheJIT;
+};
+
+// Regression test for a bug.  The JIT used to allocate globals inside the same
+// memory block used for the function, and when the function code was freed,
+// the global was left in the same place.  This test allocates a function
+// that uses and global, deallocates it, and then makes sure that the global
+// stays alive after that.
+TEST(JIT, GlobalInFunction) {
+  LLVMContext context;
+  Module *M = new Module("<main>", context);
+
+  JITMemoryManager *MemMgr = JITMemoryManager::CreateDefaultMemManager();
+  // Tell the memory manager to poison freed memory so that accessing freed
+  // memory is more easily tested.
+  MemMgr->setPoisonMemory(true);
+  std::string Error;
+  OwningPtr<ExecutionEngine> JIT(EngineBuilder(M)
+                                 .setEngineKind(EngineKind::JIT)
+                                 .setErrorStr(&Error)
+                                 .setJITMemoryManager(MemMgr)
+                                 // The next line enables the fix:
+                                 .setAllocateGVsWithCode(false)
+                                 .create());
+  ASSERT_EQ(Error, "");
+
+  // Create a global variable.
+  const Type *GTy = Type::getInt32Ty(context);
+  GlobalVariable *G = new GlobalVariable(
+      *M,
+      GTy,
+      false,  // Not constant.
+      GlobalValue::InternalLinkage,
+      Constant::getNullValue(GTy),
+      "myglobal");
+
+  // Make a function that points to a global.
+  Function *F1 = makeReturnGlobal("F1", G, M);
+
+  // Get the pointer to the native code to force it to JIT the function and
+  // allocate space for the global.
+  void (*F1Ptr)() =
+      reinterpret_cast<void(*)()>((intptr_t)JIT->getPointerToFunction(F1));
+
+  // Since F1 was codegen'd, a pointer to G should be available.
+  int32_t *GPtr = (int32_t*)JIT->getPointerToGlobalIfAvailable(G);
+  ASSERT_NE((int32_t*)NULL, GPtr);
+  EXPECT_EQ(0, *GPtr);
+
+  // F1() should increment G.
+  F1Ptr();
+  EXPECT_EQ(1, *GPtr);
+
+  // Make a second function identical to the first, referring to the same
+  // global.
+  Function *F2 = makeReturnGlobal("F2", G, M);
+  void (*F2Ptr)() =
+      reinterpret_cast<void(*)()>((intptr_t)JIT->getPointerToFunction(F2));
+
+  // F2() should increment G.
+  F2Ptr();
+  EXPECT_EQ(2, *GPtr);
+
+  // Deallocate F1.
+  JIT->freeMachineCodeForFunction(F1);
+
+  // F2() should *still* increment G.
+  F2Ptr();
+  EXPECT_EQ(3, *GPtr);
+}
+
+int PlusOne(int arg) {
+  return arg + 1;
+}
+
+TEST_F(JITTest, FarCallToKnownFunction) {
+  // x86-64 can only make direct calls to functions within 32 bits of
+  // the current PC.  To call anything farther away, we have to load
+  // the address into a register and call through the register.  The
+  // current JIT does this by allocating a stub for any far call.
+  // There was a bug in which the JIT tried to emit a direct call when
+  // the target was already in the JIT's global mappings and lazy
+  // compilation was disabled.
+
+  Function *KnownFunction = Function::Create(
+      TypeBuilder<int(int), false>::get(Context),
+      GlobalValue::ExternalLinkage, "known", M);
+  TheJIT->addGlobalMapping(KnownFunction, (void*)(intptr_t)PlusOne);
+
+  // int test() { return known(7); }
+  Function *TestFunction = Function::Create(
+      TypeBuilder<int(), false>::get(Context),
+      GlobalValue::ExternalLinkage, "test", M);
+  BasicBlock *Entry = BasicBlock::Create(Context, "entry", TestFunction);
+  IRBuilder<> Builder(Entry);
+  Value *result = Builder.CreateCall(
+      KnownFunction,
+      ConstantInt::get(TypeBuilder<int, false>::get(Context), 7));
+  Builder.CreateRet(result);
+
+  TheJIT->DisableLazyCompilation(true);
+  int (*TestFunctionPtr)() = reinterpret_cast<int(*)()>(
+      (intptr_t)TheJIT->getPointerToFunction(TestFunction));
+  // This used to crash in trying to call PlusOne().
+  EXPECT_EQ(8, TestFunctionPtr());
+}
+
+// Test a function C which calls A and B which call each other.
+TEST_F(JITTest, NonLazyCompilationStillNeedsStubs) {
+  TheJIT->DisableLazyCompilation(true);
+
+  const FunctionType *Func1Ty =
+      cast<FunctionType>(TypeBuilder<void(void), false>::get(Context));
+  std::vector<const Type*> arg_types;
+  arg_types.push_back(Type::getInt1Ty(Context));
+  const FunctionType *FuncTy = FunctionType::get(
+      Type::getVoidTy(Context), arg_types, false);
+  Function *Func1 = Function::Create(Func1Ty, Function::ExternalLinkage,
+                                     "func1", M);
+  Function *Func2 = Function::Create(FuncTy, Function::InternalLinkage,
+                                     "func2", M);
+  Function *Func3 = Function::Create(FuncTy, Function::InternalLinkage,
+                                     "func3", M);
+  BasicBlock *Block1 = BasicBlock::Create(Context, "block1", Func1);
+  BasicBlock *Block2 = BasicBlock::Create(Context, "block2", Func2);
+  BasicBlock *True2 = BasicBlock::Create(Context, "cond_true", Func2);
+  BasicBlock *False2 = BasicBlock::Create(Context, "cond_false", Func2);
+  BasicBlock *Block3 = BasicBlock::Create(Context, "block3", Func3);
+  BasicBlock *True3 = BasicBlock::Create(Context, "cond_true", Func3);
+  BasicBlock *False3 = BasicBlock::Create(Context, "cond_false", Func3);
+
+  // Make Func1 call Func2(0) and Func3(0).
+  IRBuilder<> Builder(Block1);
+  Builder.CreateCall(Func2, ConstantInt::getTrue(Context));
+  Builder.CreateCall(Func3, ConstantInt::getTrue(Context));
+  Builder.CreateRetVoid();
+
+  // void Func2(bool b) { if (b) { Func3(false); return; } return; }
+  Builder.SetInsertPoint(Block2);
+  Builder.CreateCondBr(Func2->arg_begin(), True2, False2);
+  Builder.SetInsertPoint(True2);
+  Builder.CreateCall(Func3, ConstantInt::getFalse(Context));
+  Builder.CreateRetVoid();
+  Builder.SetInsertPoint(False2);
+  Builder.CreateRetVoid();
+
+  // void Func3(bool b) { if (b) { Func2(false); return; } return; }
+  Builder.SetInsertPoint(Block3);
+  Builder.CreateCondBr(Func3->arg_begin(), True3, False3);
+  Builder.SetInsertPoint(True3);
+  Builder.CreateCall(Func2, ConstantInt::getFalse(Context));
+  Builder.CreateRetVoid();
+  Builder.SetInsertPoint(False3);
+  Builder.CreateRetVoid();
+
+  // Compile the function to native code
+  void (*F1Ptr)() =
+     reinterpret_cast<void(*)()>((intptr_t)TheJIT->getPointerToFunction(Func1));
+
+  F1Ptr();
+}
+
+// Regression test for PR5162.  This used to trigger an AssertingVH inside the
+// JIT's Function to stub mapping.
+TEST_F(JITTest, NonLazyLeaksNoStubs) {
+  TheJIT->DisableLazyCompilation(true);
+
+  // Create two functions with a single basic block each.
+  const FunctionType *FuncTy =
+      cast<FunctionType>(TypeBuilder<int(), false>::get(Context));
+  Function *Func1 = Function::Create(FuncTy, Function::ExternalLinkage,
+                                     "func1", M);
+  Function *Func2 = Function::Create(FuncTy, Function::InternalLinkage,
+                                     "func2", M);
+  BasicBlock *Block1 = BasicBlock::Create(Context, "block1", Func1);
+  BasicBlock *Block2 = BasicBlock::Create(Context, "block2", Func2);
+
+  // The first function calls the second and returns the result
+  IRBuilder<> Builder(Block1);
+  Value *Result = Builder.CreateCall(Func2);
+  Builder.CreateRet(Result);
+
+  // The second function just returns a constant
+  Builder.SetInsertPoint(Block2);
+  Builder.CreateRet(ConstantInt::get(TypeBuilder<int, false>::get(Context),42));
+
+  // Compile the function to native code
+  (void)TheJIT->getPointerToFunction(Func1);
+
+  // Free the JIT state for the functions
+  TheJIT->freeMachineCodeForFunction(Func1);
+  TheJIT->freeMachineCodeForFunction(Func2);
+
+  // Delete the first function (and show that is has no users)
+  EXPECT_EQ(Func1->getNumUses(), 0u);
+  Func1->eraseFromParent();
+
+  // Delete the second function (and show that it has no users - it had one,
+  // func1 but that's gone now)
+  EXPECT_EQ(Func2->getNumUses(), 0u);
+  Func2->eraseFromParent();
+}
+
+TEST_F(JITTest, ModuleDeletion) {
+  TheJIT->DisableLazyCompilation(false);
+  LoadAssembly("define void @main() { "
+               "  call i32 @computeVal() "
+               "  ret void "
+               "} "
+               " "
+               "define internal i32 @computeVal()  { "
+               "  ret i32 0 "
+               "} ");
+  Function *func = M->getFunction("main");
+  TheJIT->getPointerToFunction(func);
+  TheJIT->removeModule(M);
+  delete M;
+
+  SmallPtrSet<const void*, 2> FunctionsDeallocated;
+  for (unsigned i = 0, e = RJMM->deallocateFunctionBodyCalls.size();
+       i != e; ++i) {
+    FunctionsDeallocated.insert(RJMM->deallocateFunctionBodyCalls[i].Body);
+  }
+  for (unsigned i = 0, e = RJMM->startFunctionBodyCalls.size(); i != e; ++i) {
+    EXPECT_TRUE(FunctionsDeallocated.count(
+                  RJMM->startFunctionBodyCalls[i].Result))
+      << "Function leaked: \n" << RJMM->startFunctionBodyCalls[i].F_dump;
+  }
+  EXPECT_EQ(RJMM->startFunctionBodyCalls.size(),
+            RJMM->deallocateFunctionBodyCalls.size());
+
+  SmallPtrSet<const void*, 2> ExceptionTablesDeallocated;
+  unsigned NumTablesDeallocated = 0;
+  for (unsigned i = 0, e = RJMM->deallocateExceptionTableCalls.size();
+       i != e; ++i) {
+    ExceptionTablesDeallocated.insert(
+        RJMM->deallocateExceptionTableCalls[i].ET);
+    if (RJMM->deallocateExceptionTableCalls[i].ET != NULL) {
+        // If JITEmitDebugInfo is off, we'll "deallocate" NULL, which doesn't
+        // appear in startExceptionTableCalls.
+        NumTablesDeallocated++;
+    }
+  }
+  for (unsigned i = 0, e = RJMM->startExceptionTableCalls.size(); i != e; ++i) {
+    EXPECT_TRUE(ExceptionTablesDeallocated.count(
+                  RJMM->startExceptionTableCalls[i].Result))
+      << "Function's exception table leaked: \n"
+      << RJMM->startExceptionTableCalls[i].F_dump;
+  }
+  EXPECT_EQ(RJMM->startExceptionTableCalls.size(),
+            NumTablesDeallocated);
+}
+
+// ARM and PPC still emit stubs for calls since the target may be too far away
+// to call directly.  This #if can probably be removed when
+// http://llvm.org/PR5201 is fixed.
+#if !defined(__arm__) && !defined(__powerpc__) && !defined(__ppc__)
+typedef int (*FooPtr) ();
+
+TEST_F(JITTest, NoStubs) {
+  LoadAssembly("define void @bar() {"
+	       "entry: "
+	       "ret void"
+	       "}"
+	       " "
+	       "define i32 @foo() {"
+	       "entry:"
+	       "call void @bar()"
+	       "ret i32 undef"
+	       "}"
+	       " "
+	       "define i32 @main() {"
+	       "entry:"
+	       "%0 = call i32 @foo()"
+	       "call void @bar()"
+	       "ret i32 undef"
+	       "}");
+  Function *foo = M->getFunction("foo");
+  uintptr_t tmp = (uintptr_t)(TheJIT->getPointerToFunction(foo));
+  FooPtr ptr = (FooPtr)(tmp);
+
+  (ptr)();
+
+  // We should now allocate no more stubs, we have the code to foo
+  // and the existing stub for bar.
+  int stubsBefore = RJMM->stubsAllocated;
+  Function *func = M->getFunction("main");
+  TheJIT->getPointerToFunction(func);
+
+  Function *bar = M->getFunction("bar");
+  TheJIT->getPointerToFunction(bar);
+
+  ASSERT_EQ(stubsBefore, RJMM->stubsAllocated);
+}
+#endif  // !ARM && !PPC
+
+TEST_F(JITTest, FunctionPointersOutliveTheirCreator) {
+  TheJIT->DisableLazyCompilation(true);
+  LoadAssembly("define i8()* @get_foo_addr() { "
+               "  ret i8()* @foo "
+               "} "
+               " "
+               "define i8 @foo() { "
+               "  ret i8 42 "
+               "} ");
+  Function *F_get_foo_addr = M->getFunction("get_foo_addr");
+
+  typedef char(*fooT)();
+  fooT (*get_foo_addr)() = reinterpret_cast<fooT(*)()>(
+      (intptr_t)TheJIT->getPointerToFunction(F_get_foo_addr));
+  fooT foo_addr = get_foo_addr();
+
+  // Now free get_foo_addr.  This should not free the machine code for foo or
+  // any call stub returned as foo's canonical address.
+  TheJIT->freeMachineCodeForFunction(F_get_foo_addr);
+
+  // Check by calling the reported address of foo.
+  EXPECT_EQ(42, foo_addr());
+
+  // The reported address should also be the same as the result of a subsequent
+  // getPointerToFunction(foo).
+#if 0
+  // Fails until PR5126 is fixed:
+  Function *F_foo = M->getFunction("foo");
+  fooT foo = reinterpret_cast<fooT>(
+      (intptr_t)TheJIT->getPointerToFunction(F_foo));
+  EXPECT_EQ((intptr_t)foo, (intptr_t)foo_addr);
+#endif
+}
+
+// ARM doesn't have an implementation of replaceMachineCodeForFunction(), so
+// recompileAndRelinkFunction doesn't work.
+#if !defined(__arm__)
+TEST_F(JITTest, FunctionIsRecompiledAndRelinked) {
+  Function *F = Function::Create(TypeBuilder<int(void), false>::get(Context),
+                                 GlobalValue::ExternalLinkage, "test", M);
+  BasicBlock *Entry = BasicBlock::Create(Context, "entry", F);
+  IRBuilder<> Builder(Entry);
+  Value *Val = ConstantInt::get(TypeBuilder<int, false>::get(Context), 1);
+  Builder.CreateRet(Val);
+
+  TheJIT->DisableLazyCompilation(true);
+  // Compile the function once, and make sure it works.
+  int (*OrigFPtr)() = reinterpret_cast<int(*)()>(
+    (intptr_t)TheJIT->recompileAndRelinkFunction(F));
+  EXPECT_EQ(1, OrigFPtr());
+
+  // Now change the function to return a different value.
+  Entry->eraseFromParent();
+  BasicBlock *NewEntry = BasicBlock::Create(Context, "new_entry", F);
+  Builder.SetInsertPoint(NewEntry);
+  Val = ConstantInt::get(TypeBuilder<int, false>::get(Context), 2);
+  Builder.CreateRet(Val);
+  // Recompile it, which should produce a new function pointer _and_ update the
+  // old one.
+  int (*NewFPtr)() = reinterpret_cast<int(*)()>(
+    (intptr_t)TheJIT->recompileAndRelinkFunction(F));
+
+  EXPECT_EQ(2, NewFPtr())
+    << "The new pointer should call the new version of the function";
+  EXPECT_EQ(2, OrigFPtr())
+    << "The old pointer's target should now jump to the new version";
+}
+#endif  // !defined(__arm__)
+
+}  // anonymous namespace
+// This variable is intentionally defined differently in the statically-compiled
+// program from the IR input to the JIT to assert that the JIT doesn't use its
+// definition.
+extern "C" int32_t JITTest_AvailableExternallyGlobal;
+int32_t JITTest_AvailableExternallyGlobal = 42;
+namespace {
+
+TEST_F(JITTest, AvailableExternallyGlobalIsntEmitted) {
+  TheJIT->DisableLazyCompilation(true);
+  LoadAssembly("@JITTest_AvailableExternallyGlobal = "
+               "  available_externally global i32 7 "
+               " "
+               "define i32 @loader() { "
+               "  %result = load i32* @JITTest_AvailableExternallyGlobal "
+               "  ret i32 %result "
+               "} ");
+  Function *loaderIR = M->getFunction("loader");
+
+  int32_t (*loader)() = reinterpret_cast<int32_t(*)()>(
+    (intptr_t)TheJIT->getPointerToFunction(loaderIR));
+  EXPECT_EQ(42, loader()) << "func should return 42 from the external global,"
+                          << " not 7 from the IR version.";
+}
+
+}  // anonymous namespace
+// This function is intentionally defined differently in the statically-compiled
+// program from the IR input to the JIT to assert that the JIT doesn't use its
+// definition.
+extern "C" int32_t JITTest_AvailableExternallyFunction() {
+  return 42;
+}
+namespace {
+
+TEST_F(JITTest, AvailableExternallyFunctionIsntCompiled) {
+  TheJIT->DisableLazyCompilation(true);
+  LoadAssembly("define available_externally i32 "
+               "    @JITTest_AvailableExternallyFunction() { "
+               "  ret i32 7 "
+               "} "
+               " "
+               "define i32 @func() { "
+               "  %result = tail call i32 "
+               "    @JITTest_AvailableExternallyFunction() "
+               "  ret i32 %result "
+               "} ");
+  Function *funcIR = M->getFunction("func");
+
+  int32_t (*func)() = reinterpret_cast<int32_t(*)()>(
+    (intptr_t)TheJIT->getPointerToFunction(funcIR));
+  EXPECT_EQ(42, func()) << "func should return 42 from the static version,"
+                        << " not 7 from the IR version.";
+}
+
+TEST_F(JITTest, EscapedLazyStubStillCallable) {
+  TheJIT->DisableLazyCompilation(false);
+  LoadAssembly("define internal i32 @stubbed() { "
+               "  ret i32 42 "
+               "} "
+               " "
+               "define i32()* @get_stub() { "
+               "  ret i32()* @stubbed "
+               "} ");
+  typedef int32_t(*StubTy)();
+
+  // Call get_stub() to get the address of @stubbed without actually JITting it.
+  Function *get_stubIR = M->getFunction("get_stub");
+  StubTy (*get_stub)() = reinterpret_cast<StubTy(*)()>(
+    (intptr_t)TheJIT->getPointerToFunction(get_stubIR));
+  StubTy stubbed = get_stub();
+  // Now get_stubIR is the only reference to stubbed's stub.
+  get_stubIR->eraseFromParent();
+  // Now there are no references inside the JIT, but we've got a pointer outside
+  // it.  The stub should be callable and return the right value.
+  EXPECT_EQ(42, stubbed());
+}
+
+// Converts the LLVM assembly to bitcode and returns it in a std::string.  An
+// empty string indicates an error.
+std::string AssembleToBitcode(LLVMContext &Context, const char *Assembly) {
+  Module TempModule("TempModule", Context);
+  if (!LoadAssemblyInto(&TempModule, Assembly)) {
+    return "";
+  }
+
+  std::string Result;
+  raw_string_ostream OS(Result);
+  WriteBitcodeToFile(&TempModule, OS);
+  OS.flush();
+  return Result;
+}
+
+// Returns a newly-created ExecutionEngine that reads the bitcode in 'Bitcode'
+// lazily.  The associated Module (owned by the ExecutionEngine) is returned in
+// M.  Both will be NULL on an error.  Bitcode must live at least as long as the
+// ExecutionEngine.
+ExecutionEngine *getJITFromBitcode(
+  LLVMContext &Context, const std::string &Bitcode, Module *&M) {
+  // c_str() is null-terminated like MemoryBuffer::getMemBuffer requires.
+  MemoryBuffer *BitcodeBuffer =
+    MemoryBuffer::getMemBuffer(Bitcode, "Bitcode for test");
+  std::string errMsg;
+  M = getLazyBitcodeModule(BitcodeBuffer, Context, &errMsg);
+  if (M == NULL) {
+    ADD_FAILURE() << errMsg;
+    delete BitcodeBuffer;
+    return NULL;
+  }
+  ExecutionEngine *TheJIT = EngineBuilder(M)
+    .setEngineKind(EngineKind::JIT)
+    .setErrorStr(&errMsg)
+    .create();
+  if (TheJIT == NULL) {
+    ADD_FAILURE() << errMsg;
+    delete M;
+    M = NULL;
+    return NULL;
+  }
+  return TheJIT;
+}
+
+TEST(LazyLoadedJITTest, MaterializableAvailableExternallyFunctionIsntCompiled) {
+  LLVMContext Context;
+  const std::string Bitcode =
+    AssembleToBitcode(Context,
+                      "define available_externally i32 "
+                      "    @JITTest_AvailableExternallyFunction() { "
+                      "  ret i32 7 "
+                      "} "
+                      " "
+                      "define i32 @func() { "
+                      "  %result = tail call i32 "
+                      "    @JITTest_AvailableExternallyFunction() "
+                      "  ret i32 %result "
+                      "} ");
+  ASSERT_FALSE(Bitcode.empty()) << "Assembling failed";
+  Module *M;
+  OwningPtr<ExecutionEngine> TheJIT(getJITFromBitcode(Context, Bitcode, M));
+  ASSERT_TRUE(TheJIT.get()) << "Failed to create JIT.";
+  TheJIT->DisableLazyCompilation(true);
+
+  Function *funcIR = M->getFunction("func");
+  Function *availableFunctionIR =
+    M->getFunction("JITTest_AvailableExternallyFunction");
+
+  // Double-check that the available_externally function is still unmaterialized
+  // when getPointerToFunction needs to find out if it's available_externally.
+  EXPECT_TRUE(availableFunctionIR->isMaterializable());
+
+  int32_t (*func)() = reinterpret_cast<int32_t(*)()>(
+    (intptr_t)TheJIT->getPointerToFunction(funcIR));
+  EXPECT_EQ(42, func()) << "func should return 42 from the static version,"
+                        << " not 7 from the IR version.";
+}
+
+TEST(LazyLoadedJITTest, EagerCompiledRecursionThroughGhost) {
+  LLVMContext Context;
+  const std::string Bitcode =
+    AssembleToBitcode(Context,
+                      "define i32 @recur1(i32 %a) { "
+                      "  %zero = icmp eq i32 %a, 0 "
+                      "  br i1 %zero, label %done, label %notdone "
+                      "done: "
+                      "  ret i32 3 "
+                      "notdone: "
+                      "  %am1 = sub i32 %a, 1 "
+                      "  %result = call i32 @recur2(i32 %am1) "
+                      "  ret i32 %result "
+                      "} "
+                      " "
+                      "define i32 @recur2(i32 %b) { "
+                      "  %result = call i32 @recur1(i32 %b) "
+                      "  ret i32 %result "
+                      "} ");
+  ASSERT_FALSE(Bitcode.empty()) << "Assembling failed";
+  Module *M;
+  OwningPtr<ExecutionEngine> TheJIT(getJITFromBitcode(Context, Bitcode, M));
+  ASSERT_TRUE(TheJIT.get()) << "Failed to create JIT.";
+  TheJIT->DisableLazyCompilation(true);
+
+  Function *recur1IR = M->getFunction("recur1");
+  Function *recur2IR = M->getFunction("recur2");
+  EXPECT_TRUE(recur1IR->isMaterializable());
+  EXPECT_TRUE(recur2IR->isMaterializable());
+
+  int32_t (*recur1)(int32_t) = reinterpret_cast<int32_t(*)(int32_t)>(
+    (intptr_t)TheJIT->getPointerToFunction(recur1IR));
+  EXPECT_EQ(3, recur1(4));
+}
+
+// This code is copied from JITEventListenerTest, but it only runs once for all
+// the tests in this directory.  Everything seems fine, but that's strange
+// behavior.
+class JITEnvironment : public testing::Environment {
+  virtual void SetUp() {
+    // Required to create a JIT.
+    InitializeNativeTarget();
+  }
+};
+testing::Environment* const jit_env =
+  testing::AddGlobalTestEnvironment(new JITEnvironment);
+
+}
diff --git a/final/unittests/ExecutionEngine/JIT/JITTests.def b/final/unittests/ExecutionEngine/JIT/JITTests.def
new file mode 100644
index 00000000000..17c91e87bcd
--- /dev/null
+++ b/final/unittests/ExecutionEngine/JIT/JITTests.def
@@ -0,0 +1,4 @@
+EXPORTS
+getPointerToNamedFunction
+JITTest_AvailableExternallyFunction
+JITTest_AvailableExternallyGlobal
diff --git a/final/unittests/ExecutionEngine/JIT/Makefile b/final/unittests/ExecutionEngine/JIT/Makefile
new file mode 100644
index 00000000000..f5abe75a8f6
--- /dev/null
+++ b/final/unittests/ExecutionEngine/JIT/Makefile
@@ -0,0 +1,18 @@
+##===- unittests/ExecutionEngine/JIT/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+TESTNAME = JIT
+LINK_COMPONENTS := asmparser bitreader bitwriter core jit native support
+
+include $(LEVEL)/Makefile.config
+include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
+
+# Permit these tests to use the JIT's symbolic lookup.
+LD.Flags += $(RDYNAMIC)
diff --git a/final/unittests/ExecutionEngine/JIT/MultiJITTest.cpp b/final/unittests/ExecutionEngine/JIT/MultiJITTest.cpp
new file mode 100644
index 00000000000..8997d39836c
--- /dev/null
+++ b/final/unittests/ExecutionEngine/JIT/MultiJITTest.cpp
@@ -0,0 +1,164 @@
+//===- MultiJITTest.cpp - Unit tests for instantiating multiple JITs ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/Assembly/Parser.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/Support/SourceMgr.h"
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+
+bool LoadAssemblyInto(Module *M, const char *assembly) {
+  SMDiagnostic Error;
+  bool success =
+    NULL != ParseAssemblyString(assembly, M, Error, M->getContext());
+  std::string errMsg;
+  raw_string_ostream os(errMsg);
+  Error.Print("", os);
+  EXPECT_TRUE(success) << os.str();
+  return success;
+}
+
+void createModule1(LLVMContext &Context1, Module *&M1, Function *&FooF1) {
+  M1 = new Module("test1", Context1);
+  LoadAssemblyInto(M1,
+                   "define i32 @add1(i32 %ArgX1) { "
+                   "entry: "
+                   "  %addresult = add i32 1, %ArgX1 "
+                   "  ret i32 %addresult "
+                   "} "
+                   " "
+                   "define i32 @foo1() { "
+                   "entry: "
+                   "  %add1 = call i32 @add1(i32 10) "
+                   "  ret i32 %add1 "
+                   "} ");
+  FooF1 = M1->getFunction("foo1");
+}
+
+void createModule2(LLVMContext &Context2, Module *&M2, Function *&FooF2) {
+  M2 = new Module("test2", Context2);
+  LoadAssemblyInto(M2,
+                   "define i32 @add2(i32 %ArgX2) { "
+                   "entry: "
+                   "  %addresult = add i32 2, %ArgX2 "
+                   "  ret i32 %addresult "
+                   "} "
+                   " "
+                   "define i32 @foo2() { "
+                   "entry: "
+                   "  %add2 = call i32 @add2(i32 10) "
+                   "  ret i32 %add2 "
+                   "} ");
+  FooF2 = M2->getFunction("foo2");
+}
+
+TEST(MultiJitTest, EagerMode) {
+  LLVMContext Context1;
+  Module *M1 = 0;
+  Function *FooF1 = 0;
+  createModule1(Context1, M1, FooF1);
+
+  LLVMContext Context2;
+  Module *M2 = 0;
+  Function *FooF2 = 0;
+  createModule2(Context2, M2, FooF2);
+
+  // Now we create the JIT in eager mode
+  OwningPtr<ExecutionEngine> EE1(EngineBuilder(M1).create());
+  EE1->DisableLazyCompilation(true);
+  OwningPtr<ExecutionEngine> EE2(EngineBuilder(M2).create());
+  EE2->DisableLazyCompilation(true);
+
+  // Call the `foo' function with no arguments:
+  std::vector<GenericValue> noargs;
+  GenericValue gv1 = EE1->runFunction(FooF1, noargs);
+  GenericValue gv2 = EE2->runFunction(FooF2, noargs);
+
+  // Import result of execution:
+  EXPECT_EQ(gv1.IntVal, 11);
+  EXPECT_EQ(gv2.IntVal, 12);
+
+  EE1->freeMachineCodeForFunction(FooF1);
+  EE2->freeMachineCodeForFunction(FooF2);
+}
+
+TEST(MultiJitTest, LazyMode) {
+  LLVMContext Context1;
+  Module *M1 = 0;
+  Function *FooF1 = 0;
+  createModule1(Context1, M1, FooF1);
+
+  LLVMContext Context2;
+  Module *M2 = 0;
+  Function *FooF2 = 0;
+  createModule2(Context2, M2, FooF2);
+
+  // Now we create the JIT in lazy mode
+  OwningPtr<ExecutionEngine> EE1(EngineBuilder(M1).create());
+  EE1->DisableLazyCompilation(false);
+  OwningPtr<ExecutionEngine> EE2(EngineBuilder(M2).create());
+  EE2->DisableLazyCompilation(false);
+
+  // Call the `foo' function with no arguments:
+  std::vector<GenericValue> noargs;
+  GenericValue gv1 = EE1->runFunction(FooF1, noargs);
+  GenericValue gv2 = EE2->runFunction(FooF2, noargs);
+
+  // Import result of execution:
+  EXPECT_EQ(gv1.IntVal, 11);
+  EXPECT_EQ(gv2.IntVal, 12);
+
+  EE1->freeMachineCodeForFunction(FooF1);
+  EE2->freeMachineCodeForFunction(FooF2);
+}
+
+extern "C" {
+  extern void *getPointerToNamedFunction(const char *Name);
+}
+
+TEST(MultiJitTest, JitPool) {
+  LLVMContext Context1;
+  Module *M1 = 0;
+  Function *FooF1 = 0;
+  createModule1(Context1, M1, FooF1);
+
+  LLVMContext Context2;
+  Module *M2 = 0;
+  Function *FooF2 = 0;
+  createModule2(Context2, M2, FooF2);
+
+  // Now we create two JITs
+  OwningPtr<ExecutionEngine> EE1(EngineBuilder(M1).create());
+  OwningPtr<ExecutionEngine> EE2(EngineBuilder(M2).create());
+
+  Function *F1 = EE1->FindFunctionNamed("foo1");
+  void *foo1 = EE1->getPointerToFunction(F1);
+
+  Function *F2 = EE2->FindFunctionNamed("foo2");
+  void *foo2 = EE2->getPointerToFunction(F2);
+
+  // Function in M1
+  EXPECT_EQ(getPointerToNamedFunction("foo1"), foo1);
+
+  // Function in M2
+  EXPECT_EQ(getPointerToNamedFunction("foo2"), foo2);
+
+  // Symbol search
+  EXPECT_EQ((intptr_t)getPointerToNamedFunction("getPointerToNamedFunction"),
+            (intptr_t)&getPointerToNamedFunction);
+}
+
+}  // anonymous namespace
diff --git a/final/unittests/ExecutionEngine/Makefile b/final/unittests/ExecutionEngine/Makefile
new file mode 100644
index 00000000000..d4ef92ffb39
--- /dev/null
+++ b/final/unittests/ExecutionEngine/Makefile
@@ -0,0 +1,18 @@
+##===- unittests/ExecutionEngine/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TESTNAME = ExecutionEngine
+LINK_COMPONENTS := engine interpreter
+
+include $(LEVEL)/Makefile.config
+
+PARALLEL_DIRS = JIT
+
+include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/final/unittests/Makefile b/final/unittests/Makefile
new file mode 100644
index 00000000000..0401cd1c673
--- /dev/null
+++ b/final/unittests/Makefile
@@ -0,0 +1,17 @@
+##===- unittests/Makefile ----------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ..
+
+PARALLEL_DIRS = ADT ExecutionEngine Support Transforms VMCore Analysis
+
+include $(LEVEL)/Makefile.common
+
+clean::
+	$(Verb) $(RM) -f *Tests
diff --git a/final/unittests/Makefile.unittest b/final/unittests/Makefile.unittest
new file mode 100644
index 00000000000..580ad7d7191
--- /dev/null
+++ b/final/unittests/Makefile.unittest
@@ -0,0 +1,62 @@
+##===- unittests/Makefile.unittest -------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+#
+# This file is included by all of the unit test makefiles.
+#
+##===----------------------------------------------------------------------===##
+
+# Set up variables for building a unit test.
+ifdef TESTNAME
+
+ifndef MAKEFILE_UNITTEST_NO_INCLUDE_COMMON
+include $(LEVEL)/Makefile.common
+endif
+
+LLVMUnitTestExe = $(BuildMode)/$(TESTNAME)Tests$(EXEEXT)
+
+# Note that these flags are duplicated when building GoogleTest itself in
+# utils/unittest/googletest/Makefile; ensure that any changes are made to both.
+CPP.Flags += -I$(LLVM_SRC_ROOT)/utils/unittest/googletest/include
+CPP.Flags += $(NO_MISSING_FIELD_INITIALIZERS) $(NO_VARIADIC_MACROS)
+CPP.Flags += -DGTEST_HAS_RTTI=0
+# libstdc++'s TR1 <tuple> header depends on RTTI and uses C++'0x features not
+# supported by Clang, so force googletest to use its own tuple implementation.
+CPP.Flags += -DGTEST_USE_OWN_TR1_TUPLE
+
+# Disable pthreads if LLVM was configured without them.
+ifneq ($(HAVE_PTHREAD), 1)
+  CPP.Flags += -DGTEST_HAS_PTHREAD=0
+endif
+
+TESTLIBS = -lGoogleTest -lUnitTestMain
+
+ifeq ($(ENABLE_SHARED), 1)
+  ifneq (,$(RPATH))
+    # Add the absolute path to the dynamic library.  This is ok because
+    # we'll never install unittests.
+    LD.Flags += $(RPATH) -Wl,$(SharedLibDir)
+  endif
+  # Also set {DYLD,LD}_LIBRARY_PATH because OSX ignores the rpath most
+  # of the time.
+  Run.Shared := $(SHLIBPATH_VAR)="$(SharedLibDir)$${$(SHLIBPATH_VAR):+:}$$$(SHLIBPATH_VAR)"
+endif
+
+$(LLVMUnitTestExe): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths)
+	$(Echo) Linking $(BuildMode) unit test $(TESTNAME) $(StripWarnMsg)
+	$(Verb) $(Link) -o $@ $(TOOLLINKOPTS) $(ObjectsO) $(ProjLibsOptions) \
+	$(TESTLIBS) $(LLVMLibsOptions) $(ExtraLibs) $(TOOLLINKOPTSB) $(LIBS)
+	$(Echo) ======= Finished Linking $(BuildMode) Unit test $(TESTNAME) \
+          $(StripWarnMsg)
+
+all:: $(LLVMUnitTestExe)
+
+unitcheck:: $(LLVMUnitTestExe)
+	$(Run.Shared) $(LLVMUnitTestExe)
+
+endif
diff --git a/final/unittests/Support/AllocatorTest.cpp b/final/unittests/Support/AllocatorTest.cpp
new file mode 100644
index 00000000000..6c0fca90456
--- /dev/null
+++ b/final/unittests/Support/AllocatorTest.cpp
@@ -0,0 +1,143 @@
+//===- llvm/unittest/Support/AllocatorTest.cpp - BumpPtrAllocator tests ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Allocator.h"
+
+#include "gtest/gtest.h"
+#include <cstdlib>
+
+using namespace llvm;
+
+namespace {
+
+TEST(AllocatorTest, Basics) {
+  BumpPtrAllocator Alloc;
+  int *a = (int*)Alloc.Allocate(sizeof(int), 0);
+  int *b = (int*)Alloc.Allocate(sizeof(int) * 10, 0);
+  int *c = (int*)Alloc.Allocate(sizeof(int), 0);
+  *a = 1;
+  b[0] = 2;
+  b[9] = 2;
+  *c = 3;
+  EXPECT_EQ(1, *a);
+  EXPECT_EQ(2, b[0]);
+  EXPECT_EQ(2, b[9]);
+  EXPECT_EQ(3, *c);
+  EXPECT_EQ(1U, Alloc.GetNumSlabs());
+}
+
+// Allocate enough bytes to create three slabs.
+TEST(AllocatorTest, ThreeSlabs) {
+  BumpPtrAllocator Alloc(4096, 4096);
+  Alloc.Allocate(3000, 0);
+  EXPECT_EQ(1U, Alloc.GetNumSlabs());
+  Alloc.Allocate(3000, 0);
+  EXPECT_EQ(2U, Alloc.GetNumSlabs());
+  Alloc.Allocate(3000, 0);
+  EXPECT_EQ(3U, Alloc.GetNumSlabs());
+}
+
+// Allocate enough bytes to create two slabs, reset the allocator, and do it
+// again.
+TEST(AllocatorTest, TestReset) {
+  BumpPtrAllocator Alloc(4096, 4096);
+  Alloc.Allocate(3000, 0);
+  EXPECT_EQ(1U, Alloc.GetNumSlabs());
+  Alloc.Allocate(3000, 0);
+  EXPECT_EQ(2U, Alloc.GetNumSlabs());
+  Alloc.Reset();
+  EXPECT_EQ(1U, Alloc.GetNumSlabs());
+  Alloc.Allocate(3000, 0);
+  EXPECT_EQ(1U, Alloc.GetNumSlabs());
+  Alloc.Allocate(3000, 0);
+  EXPECT_EQ(2U, Alloc.GetNumSlabs());
+}
+
+// Test some allocations at varying alignments.
+TEST(AllocatorTest, TestAlignment) {
+  BumpPtrAllocator Alloc;
+  uintptr_t a;
+  a = (uintptr_t)Alloc.Allocate(1, 2);
+  EXPECT_EQ(0U, a & 1);
+  a = (uintptr_t)Alloc.Allocate(1, 4);
+  EXPECT_EQ(0U, a & 3);
+  a = (uintptr_t)Alloc.Allocate(1, 8);
+  EXPECT_EQ(0U, a & 7);
+  a = (uintptr_t)Alloc.Allocate(1, 16);
+  EXPECT_EQ(0U, a & 15);
+  a = (uintptr_t)Alloc.Allocate(1, 32);
+  EXPECT_EQ(0U, a & 31);
+  a = (uintptr_t)Alloc.Allocate(1, 64);
+  EXPECT_EQ(0U, a & 63);
+  a = (uintptr_t)Alloc.Allocate(1, 128);
+  EXPECT_EQ(0U, a & 127);
+}
+
+// Test allocating just over the slab size.  This tests a bug where before the
+// allocator incorrectly calculated the buffer end pointer.
+TEST(AllocatorTest, TestOverflow) {
+  BumpPtrAllocator Alloc(4096, 4096);
+
+  // Fill the slab right up until the end pointer.
+  Alloc.Allocate(4096 - sizeof(MemSlab), 0);
+  EXPECT_EQ(1U, Alloc.GetNumSlabs());
+
+  // If we don't allocate a new slab, then we will have overflowed.
+  Alloc.Allocate(1, 0);
+  EXPECT_EQ(2U, Alloc.GetNumSlabs());
+}
+
+// Mock slab allocator that returns slabs aligned on 4096 bytes.  There is no
+// easy portable way to do this, so this is kind of a hack.
+class MockSlabAllocator : public SlabAllocator {
+  MemSlab *LastSlab;
+
+public:
+  virtual ~MockSlabAllocator() { }
+
+  virtual MemSlab *Allocate(size_t Size) {
+    // Allocate space for the alignment, the slab, and a void* that goes right
+    // before the slab.
+    size_t Alignment = 4096;
+    void *MemBase = malloc(Size + Alignment - 1 + sizeof(void*));
+
+    // Make the slab.
+    MemSlab *Slab = (MemSlab*)(((uintptr_t)MemBase+sizeof(void*)+Alignment-1) &
+                               ~(uintptr_t)(Alignment - 1));
+    Slab->Size = Size;
+    Slab->NextPtr = 0;
+
+    // Hold a pointer to the base so we can free the whole malloced block.
+    ((void**)Slab)[-1] = MemBase;
+
+    LastSlab = Slab;
+    return Slab;
+  }
+
+  virtual void Deallocate(MemSlab *Slab) {
+    free(((void**)Slab)[-1]);
+  }
+
+  MemSlab *GetLastSlab() {
+    return LastSlab;
+  }
+};
+
+// Allocate a large-ish block with a really large alignment so that the
+// allocator will think that it has space, but after it does the alignment it
+// will not.
+TEST(AllocatorTest, TestBigAlignment) {
+  MockSlabAllocator SlabAlloc;
+  BumpPtrAllocator Alloc(4096, 4096, SlabAlloc);
+  uintptr_t Ptr = (uintptr_t)Alloc.Allocate(3000, 2048);
+  MemSlab *Slab = SlabAlloc.GetLastSlab();
+  EXPECT_LE(Ptr + 3000, ((uintptr_t)Slab) + Slab->Size);
+}
+
+}  // anonymous namespace
diff --git a/final/unittests/Support/Casting.cpp b/final/unittests/Support/Casting.cpp
new file mode 100644
index 00000000000..ae84693bd63
--- /dev/null
+++ b/final/unittests/Support/Casting.cpp
@@ -0,0 +1,154 @@
+//===---------- llvm/unittest/Support/Casting.cpp - Casting tests ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "gtest/gtest.h"
+#include <cstdlib>
+
+namespace llvm {
+
+// set up two example classes
+// with conversion facility
+//
+struct bar {
+  bar() {}
+  struct foo *baz();
+  struct foo *caz();
+  struct foo *daz();
+  struct foo *naz();
+private:
+  bar(const bar &);
+};
+struct foo {
+  void ext() const;
+  /*  static bool classof(const bar *X) {
+    cerr << "Classof: " << X << "\n";
+    return true;
+    }*/
+};
+
+template <> struct isa_impl<foo, bar> {
+  static inline bool doit(const bar &Val) {
+    dbgs() << "Classof: " << &Val << "\n";
+    return true;
+  }
+};
+
+foo *bar::baz() {
+    return cast<foo>(this);
+}
+
+foo *bar::caz() {
+    return cast_or_null<foo>(this);
+}
+
+foo *bar::daz() {
+    return dyn_cast<foo>(this);
+}
+
+foo *bar::naz() {
+    return dyn_cast_or_null<foo>(this);
+}
+
+
+bar *fub();
+} // End llvm namespace
+
+using namespace llvm;
+
+namespace {
+
+const foo *null_foo = NULL;
+
+extern bar &B1;
+extern const bar *B2;
+// test various configurations of const
+const bar &B3 = B1;
+const bar *const B4 = B2;
+
+TEST(CastingTest, isa) {
+  EXPECT_TRUE(isa<foo>(B1));
+  EXPECT_TRUE(isa<foo>(B2));
+  EXPECT_TRUE(isa<foo>(B3));
+  EXPECT_TRUE(isa<foo>(B4));
+}
+
+TEST(CastingTest, cast) {
+  foo &F1 = cast<foo>(B1);
+  EXPECT_NE(&F1, null_foo);
+  const foo *F3 = cast<foo>(B2);
+  EXPECT_NE(F3, null_foo);
+  const foo *F4 = cast<foo>(B2);
+  EXPECT_NE(F4, null_foo);
+  const foo &F5 = cast<foo>(B3);
+  EXPECT_NE(&F5, null_foo);
+  const foo *F6 = cast<foo>(B4);
+  EXPECT_NE(F6, null_foo);
+  foo *F7 = cast<foo>(fub());
+  EXPECT_EQ(F7, null_foo);
+  foo *F8 = B1.baz();
+  EXPECT_NE(F8, null_foo);
+}
+
+TEST(CastingTest, cast_or_null) {
+  const foo *F11 = cast_or_null<foo>(B2);
+  EXPECT_NE(F11, null_foo);
+  const foo *F12 = cast_or_null<foo>(B2);
+  EXPECT_NE(F12, null_foo);
+  const foo *F13 = cast_or_null<foo>(B4);
+  EXPECT_NE(F13, null_foo);
+  const foo *F14 = cast_or_null<foo>(fub());  // Shouldn't print.
+  EXPECT_EQ(F14, null_foo);
+  foo *F15 = B1.caz();
+  EXPECT_NE(F15, null_foo);
+}
+
+TEST(CastingTest, dyn_cast) {
+  const foo *F1 = dyn_cast<foo>(B2);
+  EXPECT_NE(F1, null_foo);
+  const foo *F2 = dyn_cast<foo>(B2);
+  EXPECT_NE(F2, null_foo);
+  const foo *F3 = dyn_cast<foo>(B4);
+  EXPECT_NE(F3, null_foo);
+  // foo *F4 = dyn_cast<foo>(fub()); // not permittible
+  // EXPECT_EQ(F4, null_foo);
+  foo *F5 = B1.daz();
+  EXPECT_NE(F5, null_foo);
+}
+
+TEST(CastingTest, dyn_cast_or_null) {
+  const foo *F1 = dyn_cast_or_null<foo>(B2);
+  EXPECT_NE(F1, null_foo);
+  const foo *F2 = dyn_cast_or_null<foo>(B2);
+  EXPECT_NE(F2, null_foo);
+  const foo *F3 = dyn_cast_or_null<foo>(B4);
+  EXPECT_NE(F3, null_foo);
+  foo *F4 = dyn_cast_or_null<foo>(fub());
+  EXPECT_EQ(F4, null_foo);
+  foo *F5 = B1.naz();
+  EXPECT_NE(F5, null_foo);
+}
+
+// These lines are errors...
+//foo *F20 = cast<foo>(B2);  // Yields const foo*
+//foo &F21 = cast<foo>(B3);  // Yields const foo&
+//foo *F22 = cast<foo>(B4);  // Yields const foo*
+//foo &F23 = cast_or_null<foo>(B1);
+//const foo &F24 = cast_or_null<foo>(B3);
+
+
+bar B;
+bar &B1 = B;
+const bar *B2 = &B;
+}  // anonymous namespace
+
+bar *llvm::fub() { return 0; }
diff --git a/final/unittests/Support/CommandLineTest.cpp b/final/unittests/Support/CommandLineTest.cpp
new file mode 100644
index 00000000000..72fa24a5ac0
--- /dev/null
+++ b/final/unittests/Support/CommandLineTest.cpp
@@ -0,0 +1,60 @@
+//===- llvm/unittest/Support/CommandLineTest.cpp - CommandLine tests ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Config/config.h"
+
+#include "gtest/gtest.h"
+
+#include <string>
+#include <stdlib.h>
+
+using namespace llvm;
+
+namespace {
+
+class TempEnvVar {
+ public:
+  TempEnvVar(const char *name, const char *value)
+      : name(name) {
+    const char *old_value = getenv(name);
+    EXPECT_EQ(NULL, old_value) << old_value;
+#if HAVE_SETENV
+    setenv(name, value, true);
+#else
+#   define SKIP_ENVIRONMENT_TESTS
+#endif
+  }
+
+  ~TempEnvVar() {
+#if HAVE_SETENV
+    // Assume setenv and unsetenv come together.
+    unsetenv(name);
+#endif
+  }
+
+ private:
+  const char *const name;
+};
+
+#ifndef SKIP_ENVIRONMENT_TESTS
+
+const char test_env_var[] = "LLVM_TEST_COMMAND_LINE_FLAGS";
+
+cl::opt<std::string> EnvironmentTestOption("env-test-opt");
+TEST(CommandLineTest, ParseEnvironment) {
+  TempEnvVar TEV(test_env_var, "-env-test-opt=hello");
+  EXPECT_EQ("", EnvironmentTestOption);
+  cl::ParseEnvironmentOptions("CommandLineTest", test_env_var);
+  EXPECT_EQ("hello", EnvironmentTestOption);
+}
+
+#endif  // SKIP_ENVIRONMENT_TESTS
+
+}  // anonymous namespace
diff --git a/final/unittests/Support/ConstantRangeTest.cpp b/final/unittests/Support/ConstantRangeTest.cpp
new file mode 100644
index 00000000000..161e2cfb7e5
--- /dev/null
+++ b/final/unittests/Support/ConstantRangeTest.cpp
@@ -0,0 +1,440 @@
+//===- llvm/unittest/Support/ConstantRangeTest.cpp - ConstantRange tests --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ConstantRange.h"
+#include "llvm/Instructions.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+class ConstantRangeTest : public ::testing::Test {
+protected:
+  static ConstantRange Full;
+  static ConstantRange Empty;
+  static ConstantRange One;
+  static ConstantRange Some;
+  static ConstantRange Wrap;
+};
+
+ConstantRange ConstantRangeTest::Full(16);
+ConstantRange ConstantRangeTest::Empty(16, false);
+ConstantRange ConstantRangeTest::One(APInt(16, 0xa));
+ConstantRange ConstantRangeTest::Some(APInt(16, 0xa), APInt(16, 0xaaa));
+ConstantRange ConstantRangeTest::Wrap(APInt(16, 0xaaa), APInt(16, 0xa));
+
+TEST_F(ConstantRangeTest, Basics) {
+  EXPECT_TRUE(Full.isFullSet());
+  EXPECT_FALSE(Full.isEmptySet());
+  EXPECT_TRUE(Full.inverse().isEmptySet());
+  EXPECT_FALSE(Full.isWrappedSet());
+  EXPECT_TRUE(Full.contains(APInt(16, 0x0)));
+  EXPECT_TRUE(Full.contains(APInt(16, 0x9)));
+  EXPECT_TRUE(Full.contains(APInt(16, 0xa)));
+  EXPECT_TRUE(Full.contains(APInt(16, 0xaa9)));
+  EXPECT_TRUE(Full.contains(APInt(16, 0xaaa)));
+
+  EXPECT_FALSE(Empty.isFullSet());
+  EXPECT_TRUE(Empty.isEmptySet());
+  EXPECT_TRUE(Empty.inverse().isFullSet());
+  EXPECT_FALSE(Empty.isWrappedSet());
+  EXPECT_FALSE(Empty.contains(APInt(16, 0x0)));
+  EXPECT_FALSE(Empty.contains(APInt(16, 0x9)));
+  EXPECT_FALSE(Empty.contains(APInt(16, 0xa)));
+  EXPECT_FALSE(Empty.contains(APInt(16, 0xaa9)));
+  EXPECT_FALSE(Empty.contains(APInt(16, 0xaaa)));
+
+  EXPECT_FALSE(One.isFullSet());
+  EXPECT_FALSE(One.isEmptySet());
+  EXPECT_FALSE(One.isWrappedSet());
+  EXPECT_FALSE(One.contains(APInt(16, 0x0)));
+  EXPECT_FALSE(One.contains(APInt(16, 0x9)));
+  EXPECT_TRUE(One.contains(APInt(16, 0xa)));
+  EXPECT_FALSE(One.contains(APInt(16, 0xaa9)));
+  EXPECT_FALSE(One.contains(APInt(16, 0xaaa)));
+  EXPECT_FALSE(One.inverse().contains(APInt(16, 0xa)));
+
+  EXPECT_FALSE(Some.isFullSet());
+  EXPECT_FALSE(Some.isEmptySet());
+  EXPECT_FALSE(Some.isWrappedSet());
+  EXPECT_FALSE(Some.contains(APInt(16, 0x0)));
+  EXPECT_FALSE(Some.contains(APInt(16, 0x9)));
+  EXPECT_TRUE(Some.contains(APInt(16, 0xa)));
+  EXPECT_TRUE(Some.contains(APInt(16, 0xaa9)));
+  EXPECT_FALSE(Some.contains(APInt(16, 0xaaa)));
+
+  EXPECT_FALSE(Wrap.isFullSet());
+  EXPECT_FALSE(Wrap.isEmptySet());
+  EXPECT_TRUE(Wrap.isWrappedSet());
+  EXPECT_TRUE(Wrap.contains(APInt(16, 0x0)));
+  EXPECT_TRUE(Wrap.contains(APInt(16, 0x9)));
+  EXPECT_FALSE(Wrap.contains(APInt(16, 0xa)));
+  EXPECT_FALSE(Wrap.contains(APInt(16, 0xaa9)));
+  EXPECT_TRUE(Wrap.contains(APInt(16, 0xaaa)));
+}
+
+TEST_F(ConstantRangeTest, Equality) {
+  EXPECT_EQ(Full, Full);
+  EXPECT_EQ(Empty, Empty);
+  EXPECT_EQ(One, One);
+  EXPECT_EQ(Some, Some);
+  EXPECT_EQ(Wrap, Wrap);
+  EXPECT_NE(Full, Empty);
+  EXPECT_NE(Full, One);
+  EXPECT_NE(Full, Some);
+  EXPECT_NE(Full, Wrap);
+  EXPECT_NE(Empty, One);
+  EXPECT_NE(Empty, Some);
+  EXPECT_NE(Empty, Wrap);
+  EXPECT_NE(One, Some);
+  EXPECT_NE(One, Wrap);
+  EXPECT_NE(Some, Wrap);
+}
+
+TEST_F(ConstantRangeTest, SingleElement) {
+  EXPECT_EQ(Full.getSingleElement(), static_cast<APInt *>(NULL));
+  EXPECT_EQ(Empty.getSingleElement(), static_cast<APInt *>(NULL));
+  EXPECT_EQ(*One.getSingleElement(), APInt(16, 0xa));
+  EXPECT_EQ(Some.getSingleElement(), static_cast<APInt *>(NULL));
+  EXPECT_EQ(Wrap.getSingleElement(), static_cast<APInt *>(NULL));
+
+  EXPECT_FALSE(Full.isSingleElement());
+  EXPECT_FALSE(Empty.isSingleElement());
+  EXPECT_TRUE(One.isSingleElement());
+  EXPECT_FALSE(Some.isSingleElement());
+  EXPECT_FALSE(Wrap.isSingleElement());
+}
+
+TEST_F(ConstantRangeTest, GetSetSize) {
+  EXPECT_EQ(Full.getSetSize(), APInt(16, 0));
+  EXPECT_EQ(Empty.getSetSize(), APInt(16, 0));
+  EXPECT_EQ(One.getSetSize(), APInt(16, 1));
+  EXPECT_EQ(Some.getSetSize(), APInt(16, 0xaa0));
+  EXPECT_EQ(Wrap.getSetSize(), APInt(16, 0x10000 - 0xaa0));
+}
+
+TEST_F(ConstantRangeTest, GetMinsAndMaxes) {
+  EXPECT_EQ(Full.getUnsignedMax(), APInt(16, UINT16_MAX));
+  EXPECT_EQ(One.getUnsignedMax(), APInt(16, 0xa));
+  EXPECT_EQ(Some.getUnsignedMax(), APInt(16, 0xaa9));
+  EXPECT_EQ(Wrap.getUnsignedMax(), APInt(16, UINT16_MAX));
+
+  EXPECT_EQ(Full.getUnsignedMin(), APInt(16, 0));
+  EXPECT_EQ(One.getUnsignedMin(), APInt(16, 0xa));
+  EXPECT_EQ(Some.getUnsignedMin(), APInt(16, 0xa));
+  EXPECT_EQ(Wrap.getUnsignedMin(), APInt(16, 0));
+
+  EXPECT_EQ(Full.getSignedMax(), APInt(16, INT16_MAX));
+  EXPECT_EQ(One.getSignedMax(), APInt(16, 0xa));
+  EXPECT_EQ(Some.getSignedMax(), APInt(16, 0xaa9));
+  EXPECT_EQ(Wrap.getSignedMax(), APInt(16, INT16_MAX));
+
+  EXPECT_EQ(Full.getSignedMin(), APInt(16, (uint64_t)INT16_MIN));
+  EXPECT_EQ(One.getSignedMin(), APInt(16, 0xa));
+  EXPECT_EQ(Some.getSignedMin(), APInt(16, 0xa));
+  EXPECT_EQ(Wrap.getSignedMin(), APInt(16, (uint64_t)INT16_MIN));
+
+  // Found by Klee
+  EXPECT_EQ(ConstantRange(APInt(4, 7), APInt(4, 0)).getSignedMax(),
+            APInt(4, 7));
+}
+
+TEST_F(ConstantRangeTest, SignWrapped) {
+  EXPECT_TRUE(Full.isSignWrappedSet());
+  EXPECT_FALSE(Empty.isSignWrappedSet());
+  EXPECT_FALSE(One.isSignWrappedSet());
+  EXPECT_FALSE(Some.isSignWrappedSet());
+  EXPECT_TRUE(Wrap.isSignWrappedSet());
+
+  EXPECT_FALSE(ConstantRange(APInt(8, 127), APInt(8, 128)).isSignWrappedSet());
+  EXPECT_TRUE(ConstantRange(APInt(8, 127), APInt(8, 129)).isSignWrappedSet());
+  EXPECT_FALSE(ConstantRange(APInt(8, 128), APInt(8, 129)).isSignWrappedSet());
+  EXPECT_TRUE(ConstantRange(APInt(8, 10), APInt(8, 9)).isSignWrappedSet());
+  EXPECT_TRUE(ConstantRange(APInt(8, 10), APInt(8, 250)).isSignWrappedSet());
+  EXPECT_FALSE(ConstantRange(APInt(8, 250), APInt(8, 10)).isSignWrappedSet());
+  EXPECT_FALSE(ConstantRange(APInt(8, 250), APInt(8, 251)).isSignWrappedSet());
+}
+
+TEST_F(ConstantRangeTest, Trunc) {
+  ConstantRange TFull = Full.truncate(10);
+  ConstantRange TEmpty = Empty.truncate(10);
+  ConstantRange TOne = One.truncate(10);
+  ConstantRange TSome = Some.truncate(10);
+  ConstantRange TWrap = Wrap.truncate(10);
+  EXPECT_TRUE(TFull.isFullSet());
+  EXPECT_TRUE(TEmpty.isEmptySet());
+  EXPECT_EQ(TOne, ConstantRange(One.getLower().trunc(10),
+                                One.getUpper().trunc(10)));
+  EXPECT_TRUE(TSome.isFullSet());
+}
+
+TEST_F(ConstantRangeTest, ZExt) {
+  ConstantRange ZFull = Full.zeroExtend(20);
+  ConstantRange ZEmpty = Empty.zeroExtend(20);
+  ConstantRange ZOne = One.zeroExtend(20);
+  ConstantRange ZSome = Some.zeroExtend(20);
+  ConstantRange ZWrap = Wrap.zeroExtend(20);
+  EXPECT_EQ(ZFull, ConstantRange(APInt(20, 0), APInt(20, 0x10000)));
+  EXPECT_TRUE(ZEmpty.isEmptySet());
+  EXPECT_EQ(ZOne, ConstantRange(One.getLower().zext(20),
+                                One.getUpper().zext(20)));
+  EXPECT_EQ(ZSome, ConstantRange(Some.getLower().zext(20),
+                                 Some.getUpper().zext(20)));
+  EXPECT_EQ(ZWrap, ConstantRange(APInt(20, 0), APInt(20, 0x10000)));
+}
+
+TEST_F(ConstantRangeTest, SExt) {
+  ConstantRange SFull = Full.signExtend(20);
+  ConstantRange SEmpty = Empty.signExtend(20);
+  ConstantRange SOne = One.signExtend(20);
+  ConstantRange SSome = Some.signExtend(20);
+  ConstantRange SWrap = Wrap.signExtend(20);
+  EXPECT_EQ(SFull, ConstantRange(APInt(20, (uint64_t)INT16_MIN, true),
+                                 APInt(20, INT16_MAX + 1, true)));
+  EXPECT_TRUE(SEmpty.isEmptySet());
+  EXPECT_EQ(SOne, ConstantRange(One.getLower().sext(20),
+                                One.getUpper().sext(20)));
+  EXPECT_EQ(SSome, ConstantRange(Some.getLower().sext(20),
+                                 Some.getUpper().sext(20)));
+  EXPECT_EQ(SWrap, ConstantRange(APInt(20, (uint64_t)INT16_MIN, true),
+                                 APInt(20, INT16_MAX + 1, true)));
+
+  EXPECT_EQ(ConstantRange(APInt(8, 120), APInt(8, 140)).signExtend(16),
+            ConstantRange(APInt(16, -128), APInt(16, 128)));
+}
+
+TEST_F(ConstantRangeTest, IntersectWith) {
+  EXPECT_EQ(Empty.intersectWith(Full), Empty);
+  EXPECT_EQ(Empty.intersectWith(Empty), Empty);
+  EXPECT_EQ(Empty.intersectWith(One), Empty);
+  EXPECT_EQ(Empty.intersectWith(Some), Empty);
+  EXPECT_EQ(Empty.intersectWith(Wrap), Empty);
+  EXPECT_EQ(Full.intersectWith(Full), Full);
+  EXPECT_EQ(Some.intersectWith(Some), Some);
+  EXPECT_EQ(Some.intersectWith(One), One);
+  EXPECT_EQ(Full.intersectWith(One), One);
+  EXPECT_EQ(Full.intersectWith(Some), Some);
+  EXPECT_EQ(Some.intersectWith(Wrap), Empty);
+  EXPECT_EQ(One.intersectWith(Wrap), Empty);
+  EXPECT_EQ(One.intersectWith(Wrap), Wrap.intersectWith(One));
+
+  // Klee generated testcase from PR4545.
+  // The intersection of i16 [4, 2) and [6, 5) is disjoint, looking like
+  // 01..4.6789ABCDEF where the dots represent values not in the intersection.
+  ConstantRange LHS(APInt(16, 4), APInt(16, 2));
+  ConstantRange RHS(APInt(16, 6), APInt(16, 5));
+  EXPECT_TRUE(LHS.intersectWith(RHS) == LHS);
+}
+
+TEST_F(ConstantRangeTest, UnionWith) {
+  EXPECT_EQ(Wrap.unionWith(One),
+            ConstantRange(APInt(16, 0xaaa), APInt(16, 0xb)));
+  EXPECT_EQ(One.unionWith(Wrap), Wrap.unionWith(One));
+  EXPECT_EQ(Empty.unionWith(Empty), Empty);
+  EXPECT_EQ(Full.unionWith(Full), Full);
+  EXPECT_EQ(Some.unionWith(Wrap), Full);
+
+  // PR4545
+  EXPECT_EQ(ConstantRange(APInt(16, 14), APInt(16, 1)).unionWith(
+                                    ConstantRange(APInt(16, 0), APInt(16, 8))),
+            ConstantRange(APInt(16, 14), APInt(16, 8)));
+  EXPECT_EQ(ConstantRange(APInt(16, 6), APInt(16, 4)).unionWith(
+                                    ConstantRange(APInt(16, 4), APInt(16, 0))),
+              ConstantRange(16));
+  EXPECT_EQ(ConstantRange(APInt(16, 1), APInt(16, 0)).unionWith(
+                                    ConstantRange(APInt(16, 2), APInt(16, 1))),
+              ConstantRange(16));
+}
+
+TEST_F(ConstantRangeTest, SubtractAPInt) {
+  EXPECT_EQ(Full.subtract(APInt(16, 4)), Full);
+  EXPECT_EQ(Empty.subtract(APInt(16, 4)), Empty);
+  EXPECT_EQ(Some.subtract(APInt(16, 4)),
+            ConstantRange(APInt(16, 0x6), APInt(16, 0xaa6)));
+  EXPECT_EQ(Wrap.subtract(APInt(16, 4)),
+            ConstantRange(APInt(16, 0xaa6), APInt(16, 0x6)));
+  EXPECT_EQ(One.subtract(APInt(16, 4)),
+            ConstantRange(APInt(16, 0x6)));
+}
+
+TEST_F(ConstantRangeTest, Add) {
+  EXPECT_EQ(Full.add(APInt(16, 4)), Full);
+  EXPECT_EQ(Full.add(Full), Full);
+  EXPECT_EQ(Full.add(Empty), Empty);
+  EXPECT_EQ(Full.add(One), Full);
+  EXPECT_EQ(Full.add(Some), Full);
+  EXPECT_EQ(Full.add(Wrap), Full);
+  EXPECT_EQ(Empty.add(Empty), Empty);
+  EXPECT_EQ(Empty.add(One), Empty);
+  EXPECT_EQ(Empty.add(Some), Empty);
+  EXPECT_EQ(Empty.add(Wrap), Empty);
+  EXPECT_EQ(Empty.add(APInt(16, 4)), Empty);
+  EXPECT_EQ(Some.add(APInt(16, 4)),
+            ConstantRange(APInt(16, 0xe), APInt(16, 0xaae)));
+  EXPECT_EQ(Wrap.add(APInt(16, 4)),
+            ConstantRange(APInt(16, 0xaae), APInt(16, 0xe)));
+  EXPECT_EQ(One.add(APInt(16, 4)),
+            ConstantRange(APInt(16, 0xe)));
+}
+
+TEST_F(ConstantRangeTest, Sub) {
+  EXPECT_EQ(Full.sub(APInt(16, 4)), Full);
+  EXPECT_EQ(Full.sub(Full), Full);
+  EXPECT_EQ(Full.sub(Empty), Empty);
+  EXPECT_EQ(Full.sub(One), Full);
+  EXPECT_EQ(Full.sub(Some), Full);
+  EXPECT_EQ(Full.sub(Wrap), Full);
+  EXPECT_EQ(Empty.sub(Empty), Empty);
+  EXPECT_EQ(Empty.sub(One), Empty);
+  EXPECT_EQ(Empty.sub(Some), Empty);
+  EXPECT_EQ(Empty.sub(Wrap), Empty);
+  EXPECT_EQ(Empty.sub(APInt(16, 4)), Empty);
+  EXPECT_EQ(Some.sub(APInt(16, 4)),
+            ConstantRange(APInt(16, 0x6), APInt(16, 0xaa6)));
+  EXPECT_EQ(Wrap.sub(APInt(16, 4)),
+            ConstantRange(APInt(16, 0xaa6), APInt(16, 0x6)));
+  EXPECT_EQ(One.sub(APInt(16, 4)),
+            ConstantRange(APInt(16, 0x6)));
+}
+
+TEST_F(ConstantRangeTest, Multiply) {
+  EXPECT_EQ(Full.multiply(Full), Full);
+  EXPECT_EQ(Full.multiply(Empty), Empty);
+  EXPECT_EQ(Full.multiply(One), Full);
+  EXPECT_EQ(Full.multiply(Some), Full);
+  EXPECT_EQ(Full.multiply(Wrap), Full);
+  EXPECT_EQ(Empty.multiply(Empty), Empty);
+  EXPECT_EQ(Empty.multiply(One), Empty);
+  EXPECT_EQ(Empty.multiply(Some), Empty);
+  EXPECT_EQ(Empty.multiply(Wrap), Empty);
+  EXPECT_EQ(One.multiply(One), ConstantRange(APInt(16, 0xa*0xa),
+                                             APInt(16, 0xa*0xa + 1)));
+  EXPECT_EQ(One.multiply(Some), ConstantRange(APInt(16, 0xa*0xa),
+                                              APInt(16, 0xa*0xaa9 + 1)));
+  EXPECT_EQ(One.multiply(Wrap), Full);
+  EXPECT_EQ(Some.multiply(Some), Full);
+  EXPECT_EQ(Some.multiply(Wrap), Full);
+  EXPECT_EQ(Wrap.multiply(Wrap), Full);
+
+  // http://llvm.org/PR4545
+  EXPECT_EQ(ConstantRange(APInt(4, 1), APInt(4, 6)).multiply(
+                ConstantRange(APInt(4, 6), APInt(4, 2))),
+            ConstantRange(4, /*isFullSet=*/true));
+}
+
+TEST_F(ConstantRangeTest, UMax) {
+  EXPECT_EQ(Full.umax(Full), Full);
+  EXPECT_EQ(Full.umax(Empty), Empty);
+  EXPECT_EQ(Full.umax(Some), ConstantRange(APInt(16, 0xa), APInt(16, 0)));
+  EXPECT_EQ(Full.umax(Wrap), Full);
+  EXPECT_EQ(Full.umax(Some), ConstantRange(APInt(16, 0xa), APInt(16, 0)));
+  EXPECT_EQ(Empty.umax(Empty), Empty);
+  EXPECT_EQ(Empty.umax(Some), Empty);
+  EXPECT_EQ(Empty.umax(Wrap), Empty);
+  EXPECT_EQ(Empty.umax(One), Empty);
+  EXPECT_EQ(Some.umax(Some), Some);
+  EXPECT_EQ(Some.umax(Wrap), ConstantRange(APInt(16, 0xa), APInt(16, 0)));
+  EXPECT_EQ(Some.umax(One), Some);
+  // TODO: ConstantRange is currently over-conservative here.
+  EXPECT_EQ(Wrap.umax(Wrap), Full);
+  EXPECT_EQ(Wrap.umax(One), ConstantRange(APInt(16, 0xa), APInt(16, 0)));
+  EXPECT_EQ(One.umax(One), One);
+}
+
+TEST_F(ConstantRangeTest, SMax) {
+  EXPECT_EQ(Full.smax(Full), Full);
+  EXPECT_EQ(Full.smax(Empty), Empty);
+  EXPECT_EQ(Full.smax(Some), ConstantRange(APInt(16, 0xa),
+                                           APInt::getSignedMinValue(16)));
+  EXPECT_EQ(Full.smax(Wrap), Full);
+  EXPECT_EQ(Full.smax(One), ConstantRange(APInt(16, 0xa),
+                                          APInt::getSignedMinValue(16)));
+  EXPECT_EQ(Empty.smax(Empty), Empty);
+  EXPECT_EQ(Empty.smax(Some), Empty);
+  EXPECT_EQ(Empty.smax(Wrap), Empty);
+  EXPECT_EQ(Empty.smax(One), Empty);
+  EXPECT_EQ(Some.smax(Some), Some);
+  EXPECT_EQ(Some.smax(Wrap), ConstantRange(APInt(16, 0xa),
+                                           APInt(16, (uint64_t)INT16_MIN)));
+  EXPECT_EQ(Some.smax(One), Some);
+  EXPECT_EQ(Wrap.smax(One), ConstantRange(APInt(16, 0xa),
+                                          APInt(16, (uint64_t)INT16_MIN)));
+  EXPECT_EQ(One.smax(One), One);
+}
+
+TEST_F(ConstantRangeTest, UDiv) {
+  EXPECT_EQ(Full.udiv(Full), Full);
+  EXPECT_EQ(Full.udiv(Empty), Empty);
+  EXPECT_EQ(Full.udiv(One), ConstantRange(APInt(16, 0),
+                                          APInt(16, 0xffff / 0xa + 1)));
+  EXPECT_EQ(Full.udiv(Some), ConstantRange(APInt(16, 0),
+                                           APInt(16, 0xffff / 0xa + 1)));
+  EXPECT_EQ(Full.udiv(Wrap), Full);
+  EXPECT_EQ(Empty.udiv(Empty), Empty);
+  EXPECT_EQ(Empty.udiv(One), Empty);
+  EXPECT_EQ(Empty.udiv(Some), Empty);
+  EXPECT_EQ(Empty.udiv(Wrap), Empty);
+  EXPECT_EQ(One.udiv(One), ConstantRange(APInt(16, 1)));
+  EXPECT_EQ(One.udiv(Some), ConstantRange(APInt(16, 0), APInt(16, 2)));
+  EXPECT_EQ(One.udiv(Wrap), ConstantRange(APInt(16, 0), APInt(16, 0xb)));
+  EXPECT_EQ(Some.udiv(Some), ConstantRange(APInt(16, 0), APInt(16, 0x111)));
+  EXPECT_EQ(Some.udiv(Wrap), ConstantRange(APInt(16, 0), APInt(16, 0xaaa)));
+  EXPECT_EQ(Wrap.udiv(Wrap), Full);
+}
+
+TEST_F(ConstantRangeTest, Shl) {
+  EXPECT_EQ(Full.shl(Full), Full);
+  EXPECT_EQ(Full.shl(Empty), Empty);
+  EXPECT_EQ(Full.shl(One), Full);    // TODO: [0, (-1 << 0xa) + 1)
+  EXPECT_EQ(Full.shl(Some), Full);   // TODO: [0, (-1 << 0xa) + 1)
+  EXPECT_EQ(Full.shl(Wrap), Full);
+  EXPECT_EQ(Empty.shl(Empty), Empty);
+  EXPECT_EQ(Empty.shl(One), Empty);
+  EXPECT_EQ(Empty.shl(Some), Empty);
+  EXPECT_EQ(Empty.shl(Wrap), Empty);
+  EXPECT_EQ(One.shl(One), ConstantRange(APInt(16, 0xa << 0xa),
+                                        APInt(16, (0xa << 0xa) + 1)));
+  EXPECT_EQ(One.shl(Some), Full);    // TODO: [0xa << 0xa, 0)
+  EXPECT_EQ(One.shl(Wrap), Full);    // TODO: [0xa, 0xa << 14 + 1)
+  EXPECT_EQ(Some.shl(Some), Full);   // TODO: [0xa << 0xa, 0xfc01)
+  EXPECT_EQ(Some.shl(Wrap), Full);   // TODO: [0xa, 0x7ff << 0x5 + 1)
+  EXPECT_EQ(Wrap.shl(Wrap), Full);
+}
+
+TEST_F(ConstantRangeTest, Lshr) {
+  EXPECT_EQ(Full.lshr(Full), Full);
+  EXPECT_EQ(Full.lshr(Empty), Empty);
+  EXPECT_EQ(Full.lshr(One), ConstantRange(APInt(16, 0),
+                                          APInt(16, (0xffff >> 0xa) + 1)));
+  EXPECT_EQ(Full.lshr(Some), ConstantRange(APInt(16, 0),
+                                           APInt(16, (0xffff >> 0xa) + 1)));
+  EXPECT_EQ(Full.lshr(Wrap), Full);
+  EXPECT_EQ(Empty.lshr(Empty), Empty);
+  EXPECT_EQ(Empty.lshr(One), Empty);
+  EXPECT_EQ(Empty.lshr(Some), Empty);
+  EXPECT_EQ(Empty.lshr(Wrap), Empty);
+  EXPECT_EQ(One.lshr(One), ConstantRange(APInt(16, 0)));
+  EXPECT_EQ(One.lshr(Some), ConstantRange(APInt(16, 0)));
+  EXPECT_EQ(One.lshr(Wrap), ConstantRange(APInt(16, 0), APInt(16, 0xb)));
+  EXPECT_EQ(Some.lshr(Some), ConstantRange(APInt(16, 0),
+                                           APInt(16, (0xaaa >> 0xa) + 1)));
+  EXPECT_EQ(Some.lshr(Wrap), ConstantRange(APInt(16, 0), APInt(16, 0xaaa)));
+  EXPECT_EQ(Wrap.lshr(Wrap), Full);
+}
+
+TEST(ConstantRange, MakeICmpRegion) {
+  // PR8250
+  ConstantRange SMax = ConstantRange(APInt::getSignedMaxValue(32));
+  EXPECT_TRUE(ConstantRange::makeICmpRegion(ICmpInst::ICMP_SGT,
+                                            SMax).isEmptySet());
+}
+
+}  // anonymous namespace
diff --git a/final/unittests/Support/EndianTest.cpp b/final/unittests/Support/EndianTest.cpp
new file mode 100644
index 00000000000..6fe0247d46b
--- /dev/null
+++ b/final/unittests/Support/EndianTest.cpp
@@ -0,0 +1,72 @@
+//===- unittests/Support/EndianTest.cpp - Endian.h tests ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/DataTypes.h"
+#include <cstdlib>
+#include <ctime>
+using namespace llvm;
+using namespace support;
+
+#undef max
+
+namespace {
+
+TEST(Endian, Read) {
+  // These are 5 bytes so we can be sure at least one of the reads is unaligned.
+  unsigned char big[] = {0x00, 0x01, 0x02, 0x03, 0x04};
+  unsigned char little[] = {0x00, 0x04, 0x03, 0x02, 0x01};
+  int32_t BigAsHost = 0x00010203;
+  EXPECT_EQ(BigAsHost, (endian::read_be<int32_t, unaligned>(big)));
+  int32_t LittleAsHost = 0x02030400;
+  EXPECT_EQ(LittleAsHost, (endian::read_le<int32_t, unaligned>(little)));
+
+  EXPECT_EQ((endian::read_be<int32_t, unaligned>(big + 1)),
+            (endian::read_le<int32_t, unaligned>(little + 1)));
+}
+
+TEST(Endian, Write) {
+  unsigned char data[5];
+  endian::write_be<int32_t, unaligned>(data, -1362446643);
+  EXPECT_EQ(data[0], 0xAE);
+  EXPECT_EQ(data[1], 0xCA);
+  EXPECT_EQ(data[2], 0xB6);
+  EXPECT_EQ(data[3], 0xCD);
+  endian::write_be<int32_t, unaligned>(data + 1, -1362446643);
+  EXPECT_EQ(data[1], 0xAE);
+  EXPECT_EQ(data[2], 0xCA);
+  EXPECT_EQ(data[3], 0xB6);
+  EXPECT_EQ(data[4], 0xCD);
+
+  endian::write_le<int32_t, unaligned>(data, -1362446643);
+  EXPECT_EQ(data[0], 0xCD);
+  EXPECT_EQ(data[1], 0xB6);
+  EXPECT_EQ(data[2], 0xCA);
+  EXPECT_EQ(data[3], 0xAE);
+  endian::write_le<int32_t, unaligned>(data + 1, -1362446643);
+  EXPECT_EQ(data[1], 0xCD);
+  EXPECT_EQ(data[2], 0xB6);
+  EXPECT_EQ(data[3], 0xCA);
+  EXPECT_EQ(data[4], 0xAE);
+}
+
+TEST(Endian, PackedEndianSpecificIntegral) {
+  // These are 5 bytes so we can be sure at least one of the reads is unaligned.
+  unsigned char big[] = {0x00, 0x01, 0x02, 0x03, 0x04};
+  unsigned char little[] = {0x00, 0x04, 0x03, 0x02, 0x01};
+  big32_t    *big_val    =
+    reinterpret_cast<big32_t *>(big + 1);
+  little32_t *little_val =
+    reinterpret_cast<little32_t *>(little + 1);
+
+  EXPECT_EQ(*big_val, *little_val);
+}
+
+}
diff --git a/final/unittests/Support/LeakDetectorTest.cpp b/final/unittests/Support/LeakDetectorTest.cpp
new file mode 100644
index 00000000000..d198c7a8bda
--- /dev/null
+++ b/final/unittests/Support/LeakDetectorTest.cpp
@@ -0,0 +1,31 @@
+//===- llvm/unittest/LeakDetector/LeakDetector.cpp - LeakDetector tests ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/Support/LeakDetector.h"
+
+using namespace llvm;
+
+namespace {
+
+#ifdef GTEST_HAS_DEATH_TEST
+#ifndef NDEBUG
+TEST(LeakDetector, Death1) {
+  LeakDetector::addGarbageObject((void*) 1);
+  LeakDetector::addGarbageObject((void*) 2);
+
+  EXPECT_DEATH(LeakDetector::addGarbageObject((void*) 1),
+               ".*Ts.count\\(o\\) == 0 && \"Object already in set!\"");
+  EXPECT_DEATH(LeakDetector::addGarbageObject((void*) 2),
+               "Cache != o && \"Object already in set!\"");
+}
+#endif
+#endif
+
+}
diff --git a/final/unittests/Support/Makefile b/final/unittests/Support/Makefile
new file mode 100644
index 00000000000..815bdd269da
--- /dev/null
+++ b/final/unittests/Support/Makefile
@@ -0,0 +1,15 @@
+##===- unittests/ADT/Makefile ------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TESTNAME = Support
+LINK_COMPONENTS := core support
+
+include $(LEVEL)/Makefile.config
+include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/final/unittests/Support/MathExtrasTest.cpp b/final/unittests/Support/MathExtrasTest.cpp
new file mode 100644
index 00000000000..0a6724c7e70
--- /dev/null
+++ b/final/unittests/Support/MathExtrasTest.cpp
@@ -0,0 +1,104 @@
+//===- unittests/Support/MathExtrasTest.cpp - math utils tests ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(MathExtras, isPowerOf2_32) {
+  EXPECT_TRUE(isPowerOf2_32(1 << 6));
+  EXPECT_TRUE(isPowerOf2_32(1 << 12));
+  EXPECT_FALSE(isPowerOf2_32((1 << 19) + 3));
+  EXPECT_FALSE(isPowerOf2_32(0xABCDEF0));
+}
+
+TEST(MathExtras, isPowerOf2_64) {
+  EXPECT_TRUE(isPowerOf2_64(1LL << 46));
+  EXPECT_TRUE(isPowerOf2_64(1LL << 12));
+  EXPECT_FALSE(isPowerOf2_64((1LL << 53) + 3));
+  EXPECT_FALSE(isPowerOf2_64(0xABCDEF0ABCDEF0LL));
+}
+
+TEST(MathExtras, ByteSwap_32) {
+  EXPECT_EQ(0x44332211u, ByteSwap_32(0x11223344));
+  EXPECT_EQ(0xDDCCBBAAu, ByteSwap_32(0xAABBCCDD));
+}
+
+TEST(MathExtras, ByteSwap_64) {
+  EXPECT_EQ(0x8877665544332211ULL, ByteSwap_64(0x1122334455667788LL));
+  EXPECT_EQ(0x1100FFEEDDCCBBAAULL, ByteSwap_64(0xAABBCCDDEEFF0011LL));
+}
+
+TEST(MathExtras, CountLeadingZeros_32) {
+  EXPECT_EQ(8u, CountLeadingZeros_32(0x00F000FF));
+  EXPECT_EQ(8u, CountLeadingZeros_32(0x00F12345));
+  for (unsigned i = 0; i <= 30; ++i) {
+    EXPECT_EQ(31 - i, CountLeadingZeros_32(1 << i));
+  }
+}
+
+TEST(MathExtras, CountLeadingZeros_64) {
+  EXPECT_EQ(8u, CountLeadingZeros_64(0x00F1234500F12345LL));
+  EXPECT_EQ(1u, CountLeadingZeros_64(1LL << 62));
+  for (unsigned i = 0; i <= 62; ++i) {
+    EXPECT_EQ(63 - i, CountLeadingZeros_64(1LL << i));
+  }
+}
+
+TEST(MathExtras, CountLeadingOnes_32) {
+  for (int i = 30; i >= 0; --i) {
+    // Start with all ones and unset some bit.
+    EXPECT_EQ(31u - i, CountLeadingOnes_32(0xFFFFFFFF ^ (1 << i)));
+  }
+}
+
+TEST(MathExtras, CountLeadingOnes_64) {
+  for (int i = 62; i >= 0; --i) {
+    // Start with all ones and unset some bit.
+    EXPECT_EQ(63u - i, CountLeadingOnes_64(0xFFFFFFFFFFFFFFFFLL ^ (1LL << i)));
+  }
+  for (int i = 30; i >= 0; --i) {
+    // Start with all ones and unset some bit.
+    EXPECT_EQ(31u - i, CountLeadingOnes_32(0xFFFFFFFF ^ (1 << i)));
+  }
+}
+
+TEST(MathExtras, FloatBits) {
+  static const float kValue = 5632.34f;
+  EXPECT_FLOAT_EQ(kValue, BitsToFloat(FloatToBits(kValue)));
+}
+
+TEST(MathExtras, DoubleBits) {
+  static const double kValue = 87987234.983498;
+  EXPECT_FLOAT_EQ(kValue, BitsToDouble(DoubleToBits(kValue)));
+}
+
+TEST(MathExtras, MinAlign) {
+  EXPECT_EQ(1u, MinAlign(2, 3));
+  EXPECT_EQ(2u, MinAlign(2, 4));
+  EXPECT_EQ(1u, MinAlign(17, 64));
+  EXPECT_EQ(256u, MinAlign(256, 512));
+}
+
+TEST(MathExtras, NextPowerOf2) {
+  EXPECT_EQ(4u, NextPowerOf2(3));
+  EXPECT_EQ(16u, NextPowerOf2(15));
+  EXPECT_EQ(256u, NextPowerOf2(128));
+}
+
+TEST(MathExtras, RoundUpToAlignment) {
+  EXPECT_EQ(8u, RoundUpToAlignment(5, 8));
+  EXPECT_EQ(24u, RoundUpToAlignment(17, 8));
+  EXPECT_EQ(0u, RoundUpToAlignment(~0LL, 8));
+}
+
+}
diff --git a/final/unittests/Support/Path.cpp b/final/unittests/Support/Path.cpp
new file mode 100644
index 00000000000..60d08bc92db
--- /dev/null
+++ b/final/unittests/Support/Path.cpp
@@ -0,0 +1,253 @@
+//===- llvm/unittest/Support/Path.cpp - Path tests ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/PathV2.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+using namespace llvm::sys;
+
+#define ASSERT_NO_ERROR(x) \
+  if (error_code ASSERT_NO_ERROR_ec = x) { \
+    SmallString<128> MessageStorage; \
+    raw_svector_ostream Message(MessageStorage); \
+    Message << #x ": did not return errc::success.\n" \
+            << "error number: " << ASSERT_NO_ERROR_ec.value() << "\n" \
+            << "error message: " << ASSERT_NO_ERROR_ec.message() << "\n"; \
+    GTEST_FATAL_FAILURE_(MessageStorage.c_str()); \
+  } else {}
+
+namespace {
+
+TEST(is_separator, Works) {
+  EXPECT_TRUE(path::is_separator('/'));
+  EXPECT_FALSE(path::is_separator('\0'));
+  EXPECT_FALSE(path::is_separator('-'));
+  EXPECT_FALSE(path::is_separator(' '));
+
+#ifdef LLVM_ON_WIN32
+  EXPECT_TRUE(path::is_separator('\\'));
+#else
+  EXPECT_FALSE(path::is_separator('\\'));
+#endif
+}
+
+TEST(Support, Path) {
+  SmallVector<StringRef, 40> paths;
+  paths.push_back("");
+  paths.push_back(".");
+  paths.push_back("..");
+  paths.push_back("foo");
+  paths.push_back("/");
+  paths.push_back("/foo");
+  paths.push_back("foo/");
+  paths.push_back("/foo/");
+  paths.push_back("foo/bar");
+  paths.push_back("/foo/bar");
+  paths.push_back("//net");
+  paths.push_back("//net/foo");
+  paths.push_back("///foo///");
+  paths.push_back("///foo///bar");
+  paths.push_back("/.");
+  paths.push_back("./");
+  paths.push_back("/..");
+  paths.push_back("../");
+  paths.push_back("foo/.");
+  paths.push_back("foo/..");
+  paths.push_back("foo/./");
+  paths.push_back("foo/./bar");
+  paths.push_back("foo/..");
+  paths.push_back("foo/../");
+  paths.push_back("foo/../bar");
+  paths.push_back("c:");
+  paths.push_back("c:/");
+  paths.push_back("c:foo");
+  paths.push_back("c:/foo");
+  paths.push_back("c:foo/");
+  paths.push_back("c:/foo/");
+  paths.push_back("c:/foo/bar");
+  paths.push_back("prn:");
+  paths.push_back("c:\\");
+  paths.push_back("c:foo");
+  paths.push_back("c:\\foo");
+  paths.push_back("c:foo\\");
+  paths.push_back("c:\\foo\\");
+  paths.push_back("c:\\foo/");
+  paths.push_back("c:/foo\\bar");
+
+  for (SmallVector<StringRef, 40>::const_iterator i = paths.begin(),
+                                                  e = paths.end();
+                                                  i != e;
+                                                  ++i) {
+    for (sys::path::const_iterator ci = sys::path::begin(*i),
+                                   ce = sys::path::end(*i);
+                                   ci != ce;
+                                   ++ci) {
+      ASSERT_FALSE(ci->empty());
+    }
+
+#if 0 // Valgrind is whining about this.
+    outs() << "    Reverse Iteration: [";
+    for (sys::path::reverse_iterator ci = sys::path::rbegin(*i),
+                                     ce = sys::path::rend(*i);
+                                     ci != ce;
+                                     ++ci) {
+      outs() << *ci << ',';
+    }
+    outs() << "]\n";
+#endif
+
+    path::has_root_path(*i);
+    path::root_path(*i);
+    path::has_root_name(*i);
+    path::root_name(*i);
+    path::has_root_directory(*i);
+    path::root_directory(*i);
+    path::has_parent_path(*i);
+    path::parent_path(*i);
+    path::has_filename(*i);
+    path::filename(*i);
+    path::has_stem(*i);
+    path::stem(*i);
+    path::has_extension(*i);
+    path::extension(*i);
+    path::is_absolute(*i);
+    path::is_relative(*i);
+
+    SmallString<128> temp_store;
+    temp_store = *i;
+    ASSERT_NO_ERROR(fs::make_absolute(temp_store));
+    temp_store = *i;
+    path::remove_filename(temp_store);
+
+    temp_store = *i;
+    path::replace_extension(temp_store, "ext");
+    StringRef filename(temp_store.begin(), temp_store.size()), stem, ext;
+    stem = path::stem(filename);
+    ext  = path::extension(filename);
+    EXPECT_EQ(*(--sys::path::end(filename)), (stem + ext).str());
+
+    path::native(*i, temp_store);
+  }
+}
+
+class FileSystemTest : public testing::Test {
+protected:
+  /// Unique temporary directory in which all created filesystem entities must
+  /// be placed. It is recursively removed at the end of each test.
+  SmallString<128> TestDirectory;
+
+  virtual void SetUp() {
+    int fd;
+    ASSERT_NO_ERROR(
+      fs::unique_file("file-system-test-%%-%%-%%-%%/test-directory.anchor", fd,
+                      TestDirectory));
+    // We don't care about this specific file.
+    ::close(fd);
+    TestDirectory = path::parent_path(TestDirectory);
+    errs() << "Test Directory: " << TestDirectory << '\n';
+    errs().flush();
+  }
+
+  virtual void TearDown() {
+    uint32_t removed;
+    ASSERT_NO_ERROR(fs::remove_all(TestDirectory.str(), removed));
+  }
+};
+
+TEST_F(FileSystemTest, TempFiles) {
+  // Create a temp file.
+  int FileDescriptor;
+  SmallString<64> TempPath;
+  ASSERT_NO_ERROR(
+    fs::unique_file("%%-%%-%%-%%.temp", FileDescriptor, TempPath));
+
+  // Make sure it exists.
+  bool TempFileExists;
+  ASSERT_NO_ERROR(sys::fs::exists(Twine(TempPath), TempFileExists));
+  EXPECT_TRUE(TempFileExists);
+
+  // Create another temp tile.
+  int FD2;
+  SmallString<64> TempPath2;
+  ASSERT_NO_ERROR(fs::unique_file("%%-%%-%%-%%.temp", FD2, TempPath2));
+  ASSERT_NE(TempPath.str(), TempPath2.str());
+
+  // Try to copy the first to the second.
+  EXPECT_EQ(
+    fs::copy_file(Twine(TempPath), Twine(TempPath2)), errc::file_exists);
+
+  ::close(FD2);
+  // Try again with the proper options.
+  ASSERT_NO_ERROR(fs::copy_file(Twine(TempPath), Twine(TempPath2),
+                                fs::copy_option::overwrite_if_exists));
+  // Remove Temp2.
+  ASSERT_NO_ERROR(fs::remove(Twine(TempPath2), TempFileExists));
+  EXPECT_TRUE(TempFileExists);
+
+  // Make sure Temp2 doesn't exist.
+  ASSERT_NO_ERROR(fs::exists(Twine(TempPath2), TempFileExists));
+  EXPECT_FALSE(TempFileExists);
+
+  // Create a hard link to Temp1.
+  ASSERT_NO_ERROR(fs::create_hard_link(Twine(TempPath), Twine(TempPath2)));
+  bool equal;
+  ASSERT_NO_ERROR(fs::equivalent(Twine(TempPath), Twine(TempPath2), equal));
+  EXPECT_TRUE(equal);
+
+  // Remove Temp1.
+  ::close(FileDescriptor);
+  ASSERT_NO_ERROR(fs::remove(Twine(TempPath), TempFileExists));
+  EXPECT_TRUE(TempFileExists);
+
+  // Remove the hard link.
+  ASSERT_NO_ERROR(fs::remove(Twine(TempPath2), TempFileExists));
+  EXPECT_TRUE(TempFileExists);
+
+  // Make sure Temp1 doesn't exist.
+  ASSERT_NO_ERROR(fs::exists(Twine(TempPath), TempFileExists));
+  EXPECT_FALSE(TempFileExists);
+}
+
+TEST_F(FileSystemTest, DirectoryIteration) {
+  error_code ec;
+  for (fs::directory_iterator i(".", ec), e; i != e; i.increment(ec))
+    ASSERT_NO_ERROR(ec);
+}
+
+TEST_F(FileSystemTest, Magic) {
+  struct type {
+    const char *filename;
+    const char *magic_str;
+    size_t      magic_str_len;
+  } types [] = {{"magic.archive", "!<arch>\x0A", 8}};
+
+  // Create some files filled with magic.
+  for (type *i = types, *e = types + (sizeof(types) / sizeof(type)); i != e;
+                                                                     ++i) {
+    SmallString<128> file_pathname(TestDirectory);
+    path::append(file_pathname, i->filename);
+    std::string ErrMsg;
+    raw_fd_ostream file(file_pathname.c_str(), ErrMsg,
+                        raw_fd_ostream::F_Binary);
+    ASSERT_FALSE(file.has_error());
+    StringRef magic(i->magic_str, i->magic_str_len);
+    file << magic;
+    file.close();
+    bool res = false;
+    ASSERT_NO_ERROR(fs::has_magic(file_pathname.c_str(), magic, res));
+    EXPECT_TRUE(res);
+  }
+}
+
+} // anonymous namespace
diff --git a/final/unittests/Support/RegexTest.cpp b/final/unittests/Support/RegexTest.cpp
new file mode 100644
index 00000000000..65b66c3eee8
--- /dev/null
+++ b/final/unittests/Support/RegexTest.cpp
@@ -0,0 +1,94 @@
+//===- llvm/unittest/Support/RegexTest.cpp - Regex tests --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/ADT/SmallVector.h"
+#include <cstring>
+
+using namespace llvm;
+namespace {
+
+class RegexTest : public ::testing::Test {
+};
+
+TEST_F(RegexTest, Basics) {
+  Regex r1("^[0-9]+$");
+  EXPECT_TRUE(r1.match("916"));
+  EXPECT_TRUE(r1.match("9"));
+  EXPECT_FALSE(r1.match("9a"));
+
+  SmallVector<StringRef, 1> Matches;
+  Regex r2("[0-9]+");
+  EXPECT_TRUE(r2.match("aa216b", &Matches));
+  EXPECT_EQ(1u, Matches.size());
+  EXPECT_EQ("216", Matches[0].str());
+
+  Regex r3("[0-9]+([a-f])?:([0-9]+)");
+  EXPECT_TRUE(r3.match("9a:513b", &Matches));
+  EXPECT_EQ(3u, Matches.size());
+  EXPECT_EQ("9a:513", Matches[0].str());
+  EXPECT_EQ("a", Matches[1].str());
+  EXPECT_EQ("513", Matches[2].str());
+
+  EXPECT_TRUE(r3.match("9:513b", &Matches));
+  EXPECT_EQ(3u, Matches.size());
+  EXPECT_EQ("9:513", Matches[0].str());
+  EXPECT_EQ("", Matches[1].str());
+  EXPECT_EQ("513", Matches[2].str());
+
+  Regex r4("a[^b]+b");
+  std::string String="axxb";
+  String[2] = '\0';
+  EXPECT_FALSE(r4.match("abb"));
+  EXPECT_TRUE(r4.match(String, &Matches));
+  EXPECT_EQ(1u, Matches.size());
+  EXPECT_EQ(String, Matches[0].str());
+
+
+  std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)";
+  String="YX99a:513b";
+  NulPattern[7] = '\0';
+  Regex r5(NulPattern);
+  EXPECT_FALSE(r5.match(String));
+  EXPECT_FALSE(r5.match("X9"));
+  String[3]='\0';
+  EXPECT_TRUE(r5.match(String));
+}
+
+TEST_F(RegexTest, Substitution) {
+  std::string Error;
+
+  EXPECT_EQ("aNUMber", Regex("[0-9]+").sub("NUM", "a1234ber"));
+
+  // Standard Escapes
+  EXPECT_EQ("a\\ber", Regex("[0-9]+").sub("\\\\", "a1234ber", &Error));
+  EXPECT_EQ(Error, "");
+  EXPECT_EQ("a\nber", Regex("[0-9]+").sub("\\n", "a1234ber", &Error));
+  EXPECT_EQ(Error, "");
+  EXPECT_EQ("a\tber", Regex("[0-9]+").sub("\\t", "a1234ber", &Error));
+  EXPECT_EQ(Error, "");
+  EXPECT_EQ("ajber", Regex("[0-9]+").sub("\\j", "a1234ber", &Error));
+  EXPECT_EQ(Error, "");
+
+  EXPECT_EQ("aber", Regex("[0-9]+").sub("\\", "a1234ber", &Error));
+  EXPECT_EQ(Error, "replacement string contained trailing backslash");
+  
+  // Backreferences
+  EXPECT_EQ("aa1234bber", Regex("a[0-9]+b").sub("a\\0b", "a1234ber", &Error));
+  EXPECT_EQ(Error, "");
+
+  EXPECT_EQ("a1234ber", Regex("a([0-9]+)b").sub("a\\1b", "a1234ber", &Error));
+  EXPECT_EQ(Error, "");
+
+  EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error));
+  EXPECT_EQ(Error, "invalid backreference string '100'");
+}
+
+}
diff --git a/final/unittests/Support/SwapByteOrderTest.cpp b/final/unittests/Support/SwapByteOrderTest.cpp
new file mode 100644
index 00000000000..c2a0c279388
--- /dev/null
+++ b/final/unittests/Support/SwapByteOrderTest.cpp
@@ -0,0 +1,128 @@
+//===- unittests/Support/SwapByteOrderTest.cpp - swap byte order test -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/Support/SwapByteOrder.h"
+#include <cstdlib>
+#include <ctime>
+using namespace llvm;
+
+#undef max
+
+namespace {
+
+// In these first two tests all of the origional_uintx values are truncated
+// except for 64. We could avoid this, but there's really no point.
+
+TEST(SwapByteOrder, UnsignedRoundTrip) {
+  // The point of the bit twiddling of magic is to test with and without bits
+  // in every byte.
+  uint64_t value = 1;
+  for (std::size_t i = 0; i <= sizeof(value); ++i) {
+    uint8_t origional_uint8 = static_cast<uint8_t>(value);
+    EXPECT_EQ(origional_uint8,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_uint8)));
+
+    uint16_t origional_uint16 = static_cast<uint16_t>(value);
+    EXPECT_EQ(origional_uint16,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_uint16)));
+
+    uint32_t origional_uint32 = static_cast<uint32_t>(value);
+    EXPECT_EQ(origional_uint32,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_uint32)));
+
+    uint64_t origional_uint64 = static_cast<uint64_t>(value);
+    EXPECT_EQ(origional_uint64,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_uint64)));
+
+    value = (value << 8) | 0x55; // binary 0101 0101.
+  }
+}
+
+TEST(SwapByteOrder, SignedRoundTrip) {
+  // The point of the bit twiddling of magic is to test with and without bits
+  // in every byte.
+  uint64_t value = 1;
+  for (std::size_t i = 0; i <= sizeof(value); ++i) {
+    int8_t origional_int8 = static_cast<int8_t>(value);
+    EXPECT_EQ(origional_int8,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_int8)));
+
+    int16_t origional_int16 = static_cast<int16_t>(value);
+    EXPECT_EQ(origional_int16,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_int16)));
+
+    int32_t origional_int32 = static_cast<int32_t>(value);
+    EXPECT_EQ(origional_int32,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_int32)));
+
+    int64_t origional_int64 = static_cast<int64_t>(value);
+    EXPECT_EQ(origional_int64,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_int64)));
+
+    // Test other sign.
+    value *= -1;
+
+    origional_int8 = static_cast<int8_t>(value);
+    EXPECT_EQ(origional_int8,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_int8)));
+
+    origional_int16 = static_cast<int16_t>(value);
+    EXPECT_EQ(origional_int16,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_int16)));
+
+    origional_int32 = static_cast<int32_t>(value);
+    EXPECT_EQ(origional_int32,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_int32)));
+
+    origional_int64 = static_cast<int64_t>(value);
+    EXPECT_EQ(origional_int64,
+      sys::SwapByteOrder(sys::SwapByteOrder(origional_int64)));
+
+    // Return to normal sign and twiddle.
+    value *= -1;
+    value = (value << 8) | 0x55; // binary 0101 0101.
+  }
+}
+
+TEST(SwapByteOrder, uint8_t) {
+  EXPECT_EQ(uint8_t(0x11), sys::SwapByteOrder(uint8_t(0x11)));
+}
+
+TEST(SwapByteOrder, uint16_t) {
+  EXPECT_EQ(uint16_t(0x1122), sys::SwapByteOrder(uint16_t(0x2211)));
+}
+
+TEST(SwapByteOrder, uint32_t) {
+  EXPECT_EQ(uint32_t(0x11223344), sys::SwapByteOrder(uint32_t(0x44332211)));
+}
+
+TEST(SwapByteOrder, uint64_t) {
+  EXPECT_EQ(uint64_t(0x1122334455667788ULL),
+    sys::SwapByteOrder(uint64_t(0x8877665544332211ULL)));
+}
+
+TEST(SwapByteOrder, int8_t) {
+  EXPECT_EQ(int8_t(0x11), sys::SwapByteOrder(int8_t(0x11)));
+}
+
+TEST(SwapByteOrder, int16_t) {
+  EXPECT_EQ(int16_t(0x1122), sys::SwapByteOrder(int16_t(0x2211)));
+}
+
+TEST(SwapByteOrder, int32_t) {
+  EXPECT_EQ(int32_t(0x11223344), sys::SwapByteOrder(int32_t(0x44332211)));
+}
+
+TEST(SwapByteOrder, int64_t) {
+  EXPECT_EQ(int64_t(0x1122334455667788LL),
+    sys::SwapByteOrder(int64_t(0x8877665544332211LL)));
+}
+
+}
diff --git a/final/unittests/Support/TimeValue.cpp b/final/unittests/Support/TimeValue.cpp
new file mode 100644
index 00000000000..27883ae3356
--- /dev/null
+++ b/final/unittests/Support/TimeValue.cpp
@@ -0,0 +1,23 @@
+//===- llvm/unittest/Support/TimeValue.cpp - Time Value tests -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/Support/TimeValue.h"
+#include <time.h>
+
+using namespace llvm;
+namespace {
+
+TEST(Support, TimeValue) {
+  sys::TimeValue now = sys::TimeValue::now();
+  time_t now_t = time(NULL);
+  EXPECT_TRUE(abs(static_cast<long>(now_t - now.toEpochTime())) < 2);
+}
+
+}
diff --git a/final/unittests/Support/TypeBuilderTest.cpp b/final/unittests/Support/TypeBuilderTest.cpp
new file mode 100644
index 00000000000..e805827ae22
--- /dev/null
+++ b/final/unittests/Support/TypeBuilderTest.cpp
@@ -0,0 +1,253 @@
+//===- llvm/unittest/Support/TypeBuilderTest.cpp - TypeBuilder tests -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/LLVMContext.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(TypeBuilderTest, Void) {
+  EXPECT_EQ(Type::getVoidTy(getGlobalContext()), (TypeBuilder<void, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getVoidTy(getGlobalContext()), (TypeBuilder<void, false>::get(getGlobalContext())));
+  // Special cases for C compatibility:
+  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
+            (TypeBuilder<void*, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
+            (TypeBuilder<const void*, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
+            (TypeBuilder<volatile void*, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
+            (TypeBuilder<const volatile void*, false>::get(
+              getGlobalContext())));
+}
+
+TEST(TypeBuilderTest, HostIntegers) {
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()), (TypeBuilder<int8_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()), (TypeBuilder<uint8_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt16Ty(getGlobalContext()), (TypeBuilder<int16_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt16Ty(getGlobalContext()), (TypeBuilder<uint16_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt32Ty(getGlobalContext()), (TypeBuilder<int32_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt32Ty(getGlobalContext()), (TypeBuilder<uint32_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt64Ty(getGlobalContext()), (TypeBuilder<int64_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt64Ty(getGlobalContext()), (TypeBuilder<uint64_t, false>::get(getGlobalContext())));
+
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), sizeof(size_t) * CHAR_BIT),
+            (TypeBuilder<size_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), sizeof(ptrdiff_t) * CHAR_BIT),
+            (TypeBuilder<ptrdiff_t, false>::get(getGlobalContext())));
+}
+
+TEST(TypeBuilderTest, CrossCompilableIntegers) {
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), 1), (TypeBuilder<types::i<1>, true>::get(getGlobalContext())));
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), 1), (TypeBuilder<types::i<1>, false>::get(getGlobalContext())));
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), 72), (TypeBuilder<types::i<72>, true>::get(getGlobalContext())));
+  EXPECT_EQ(IntegerType::get(getGlobalContext(), 72), (TypeBuilder<types::i<72>, false>::get(getGlobalContext())));
+}
+
+TEST(TypeBuilderTest, Float) {
+  EXPECT_EQ(Type::getFloatTy(getGlobalContext()), (TypeBuilder<float, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getDoubleTy(getGlobalContext()), (TypeBuilder<double, false>::get(getGlobalContext())));
+  // long double isn't supported yet.
+  EXPECT_EQ(Type::getFloatTy(getGlobalContext()), (TypeBuilder<types::ieee_float, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getFloatTy(getGlobalContext()), (TypeBuilder<types::ieee_float, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getDoubleTy(getGlobalContext()), (TypeBuilder<types::ieee_double, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getDoubleTy(getGlobalContext()), (TypeBuilder<types::ieee_double, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getX86_FP80Ty(getGlobalContext()), (TypeBuilder<types::x86_fp80, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getX86_FP80Ty(getGlobalContext()), (TypeBuilder<types::x86_fp80, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getFP128Ty(getGlobalContext()), (TypeBuilder<types::fp128, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getFP128Ty(getGlobalContext()), (TypeBuilder<types::fp128, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getPPC_FP128Ty(getGlobalContext()), (TypeBuilder<types::ppc_fp128, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getPPC_FP128Ty(getGlobalContext()), (TypeBuilder<types::ppc_fp128, false>::get(getGlobalContext())));
+}
+
+TEST(TypeBuilderTest, Derived) {
+  EXPECT_EQ(PointerType::getUnqual(Type::getInt8PtrTy(getGlobalContext())),
+            (TypeBuilder<int8_t**, false>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 7),
+            (TypeBuilder<int8_t[7], false>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 0),
+            (TypeBuilder<int8_t[], false>::get(getGlobalContext())));
+
+  EXPECT_EQ(PointerType::getUnqual(Type::getInt8PtrTy(getGlobalContext())),
+            (TypeBuilder<types::i<8>**, false>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 7),
+            (TypeBuilder<types::i<8>[7], false>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 0),
+            (TypeBuilder<types::i<8>[], false>::get(getGlobalContext())));
+
+  EXPECT_EQ(PointerType::getUnqual(Type::getInt8PtrTy(getGlobalContext())),
+            (TypeBuilder<types::i<8>**, true>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 7),
+            (TypeBuilder<types::i<8>[7], true>::get(getGlobalContext())));
+  EXPECT_EQ(ArrayType::get(Type::getInt8Ty(getGlobalContext()), 0),
+            (TypeBuilder<types::i<8>[], true>::get(getGlobalContext())));
+
+
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const int8_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<volatile int8_t, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const volatile int8_t, false>::get(getGlobalContext())));
+
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const types::i<8>, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<volatile types::i<8>, false>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const volatile types::i<8>, false>::get(getGlobalContext())));
+
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const types::i<8>, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<volatile types::i<8>, true>::get(getGlobalContext())));
+  EXPECT_EQ(Type::getInt8Ty(getGlobalContext()),
+            (TypeBuilder<const volatile types::i<8>, true>::get(getGlobalContext())));
+
+  EXPECT_EQ(Type::getInt8PtrTy(getGlobalContext()),
+            (TypeBuilder<const volatile int8_t*const volatile, false>::get(getGlobalContext())));
+}
+
+TEST(TypeBuilderTest, Functions) {
+  std::vector<const Type*> params;
+  EXPECT_EQ(FunctionType::get(Type::getVoidTy(getGlobalContext()), params, false),
+            (TypeBuilder<void(), true>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
+            (TypeBuilder<int8_t(...), false>::get(getGlobalContext())));
+  params.push_back(TypeBuilder<int32_t*, false>::get(getGlobalContext()));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
+            (TypeBuilder<int8_t(const int32_t*), false>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
+            (TypeBuilder<int8_t(const int32_t*, ...), false>::get(getGlobalContext())));
+  params.push_back(TypeBuilder<char*, false>::get(getGlobalContext()));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
+            (TypeBuilder<int8_t(int32_t*, void*), false>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
+            (TypeBuilder<int8_t(int32_t*, char*, ...), false>::get(getGlobalContext())));
+  params.push_back(TypeBuilder<char, false>::get(getGlobalContext()));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
+            (TypeBuilder<int8_t(int32_t*, void*, char), false>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
+            (TypeBuilder<int8_t(int32_t*, char*, char, ...), false>::get(getGlobalContext())));
+  params.push_back(TypeBuilder<char, false>::get(getGlobalContext()));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
+            (TypeBuilder<int8_t(int32_t*, void*, char, char), false>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
+            (TypeBuilder<int8_t(int32_t*, char*, char, char, ...),
+                         false>::get(getGlobalContext())));
+  params.push_back(TypeBuilder<char, false>::get(getGlobalContext()));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, false),
+            (TypeBuilder<int8_t(int32_t*, void*, char, char, char),
+                         false>::get(getGlobalContext())));
+  EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
+            (TypeBuilder<int8_t(int32_t*, char*, char, char, char, ...),
+                         false>::get(getGlobalContext())));
+}
+
+TEST(TypeBuilderTest, Context) {
+  // We used to cache TypeBuilder results in static local variables.  This
+  // produced the same type for different contexts, which of course broke
+  // things.
+  LLVMContext context1;
+  EXPECT_EQ(&context1,
+            &(TypeBuilder<types::i<1>, true>::get(context1))->getContext());
+  LLVMContext context2;
+  EXPECT_EQ(&context2,
+            &(TypeBuilder<types::i<1>, true>::get(context2))->getContext());
+}
+
+class MyType {
+  int a;
+  int *b;
+  void *array[1];
+};
+
+class MyPortableType {
+  int32_t a;
+  int32_t *b;
+  void *array[1];
+};
+
+}  // anonymous namespace
+
+namespace llvm {
+template<bool cross> class TypeBuilder<MyType, cross> {
+public:
+  static const StructType *get(LLVMContext &Context) {
+    // Using the static result variable ensures that the type is
+    // only looked up once.
+    std::vector<const Type*> st;
+    st.push_back(TypeBuilder<int, cross>::get(Context));
+    st.push_back(TypeBuilder<int*, cross>::get(Context));
+    st.push_back(TypeBuilder<void*[], cross>::get(Context));
+    static const StructType *const result = StructType::get(Context, st);
+    return result;
+  }
+
+  // You may find this a convenient place to put some constants
+  // to help with getelementptr.  They don't have any effect on
+  // the operation of TypeBuilder.
+  enum Fields {
+    FIELD_A,
+    FIELD_B,
+    FIELD_ARRAY
+  };
+};
+
+template<bool cross> class TypeBuilder<MyPortableType, cross> {
+public:
+  static const StructType *get(LLVMContext &Context) {
+    // Using the static result variable ensures that the type is
+    // only looked up once.
+    std::vector<const Type*> st;
+    st.push_back(TypeBuilder<types::i<32>, cross>::get(Context));
+    st.push_back(TypeBuilder<types::i<32>*, cross>::get(Context));
+    st.push_back(TypeBuilder<types::i<8>*[], cross>::get(Context));
+    static const StructType *const result = StructType::get(Context, st);
+    return result;
+  }
+
+  // You may find this a convenient place to put some constants
+  // to help with getelementptr.  They don't have any effect on
+  // the operation of TypeBuilder.
+  enum Fields {
+    FIELD_A,
+    FIELD_B,
+    FIELD_ARRAY
+  };
+};
+}  // namespace llvm
+namespace {
+
+TEST(TypeBuilderTest, Extensions) {
+  EXPECT_EQ(PointerType::getUnqual(StructType::get(getGlobalContext(), 
+                                     TypeBuilder<int, false>::get(getGlobalContext()),
+                                     TypeBuilder<int*, false>::get(getGlobalContext()),
+                                     TypeBuilder<void*[], false>::get(getGlobalContext()),
+                                     NULL)),
+            (TypeBuilder<MyType*, false>::get(getGlobalContext())));
+  EXPECT_EQ(PointerType::getUnqual(StructType::get(getGlobalContext(), 
+                                     TypeBuilder<types::i<32>, false>::get(getGlobalContext()),
+                                     TypeBuilder<types::i<32>*, false>::get(getGlobalContext()),
+                                     TypeBuilder<types::i<8>*[], false>::get(getGlobalContext()),
+                                     NULL)),
+            (TypeBuilder<MyPortableType*, false>::get(getGlobalContext())));
+  EXPECT_EQ(PointerType::getUnqual(StructType::get(getGlobalContext(), 
+                                     TypeBuilder<types::i<32>, false>::get(getGlobalContext()),
+                                     TypeBuilder<types::i<32>*, false>::get(getGlobalContext()),
+                                     TypeBuilder<types::i<8>*[], false>::get(getGlobalContext()),
+                                     NULL)),
+            (TypeBuilder<MyPortableType*, true>::get(getGlobalContext())));
+}
+
+}  // anonymous namespace
diff --git a/final/unittests/Support/ValueHandleTest.cpp b/final/unittests/Support/ValueHandleTest.cpp
new file mode 100644
index 00000000000..2e5e5b167c4
--- /dev/null
+++ b/final/unittests/Support/ValueHandleTest.cpp
@@ -0,0 +1,411 @@
+//===- llvm/unittest/Support/ValueHandleTest.cpp - ValueHandle tests --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ValueHandle.h"
+
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/OwningPtr.h"
+
+#include "gtest/gtest.h"
+
+#include <memory>
+
+using namespace llvm;
+
+namespace {
+
+class ValueHandle : public testing::Test {
+protected:
+  Constant *ConstantV;
+  std::auto_ptr<BitCastInst> BitcastV;
+
+  ValueHandle() :
+    ConstantV(ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 0)),
+    BitcastV(new BitCastInst(ConstantV, Type::getInt32Ty(getGlobalContext()))) {
+  }
+};
+
+class ConcreteCallbackVH : public CallbackVH {
+public:
+  ConcreteCallbackVH(Value *V) : CallbackVH(V) {}
+};
+
+TEST_F(ValueHandle, WeakVH_BasicOperation) {
+  WeakVH WVH(BitcastV.get());
+  EXPECT_EQ(BitcastV.get(), WVH);
+  WVH = ConstantV;
+  EXPECT_EQ(ConstantV, WVH);
+
+  // Make sure I can call a method on the underlying Value.  It
+  // doesn't matter which method.
+  EXPECT_EQ(Type::getInt32Ty(getGlobalContext()), WVH->getType());
+  EXPECT_EQ(Type::getInt32Ty(getGlobalContext()), (*WVH).getType());
+}
+
+TEST_F(ValueHandle, WeakVH_Comparisons) {
+  WeakVH BitcastWVH(BitcastV.get());
+  WeakVH ConstantWVH(ConstantV);
+
+  EXPECT_TRUE(BitcastWVH == BitcastWVH);
+  EXPECT_TRUE(BitcastV.get() == BitcastWVH);
+  EXPECT_TRUE(BitcastWVH == BitcastV.get());
+  EXPECT_FALSE(BitcastWVH == ConstantWVH);
+
+  EXPECT_TRUE(BitcastWVH != ConstantWVH);
+  EXPECT_TRUE(BitcastV.get() != ConstantWVH);
+  EXPECT_TRUE(BitcastWVH != ConstantV);
+  EXPECT_FALSE(BitcastWVH != BitcastWVH);
+
+  // Cast to Value* so comparisons work.
+  Value *BV = BitcastV.get();
+  Value *CV = ConstantV;
+  EXPECT_EQ(BV < CV, BitcastWVH < ConstantWVH);
+  EXPECT_EQ(BV <= CV, BitcastWVH <= ConstantWVH);
+  EXPECT_EQ(BV > CV, BitcastWVH > ConstantWVH);
+  EXPECT_EQ(BV >= CV, BitcastWVH >= ConstantWVH);
+
+  EXPECT_EQ(BV < CV, BitcastV.get() < ConstantWVH);
+  EXPECT_EQ(BV <= CV, BitcastV.get() <= ConstantWVH);
+  EXPECT_EQ(BV > CV, BitcastV.get() > ConstantWVH);
+  EXPECT_EQ(BV >= CV, BitcastV.get() >= ConstantWVH);
+
+  EXPECT_EQ(BV < CV, BitcastWVH < ConstantV);
+  EXPECT_EQ(BV <= CV, BitcastWVH <= ConstantV);
+  EXPECT_EQ(BV > CV, BitcastWVH > ConstantV);
+  EXPECT_EQ(BV >= CV, BitcastWVH >= ConstantV);
+}
+
+TEST_F(ValueHandle, WeakVH_FollowsRAUW) {
+  WeakVH WVH(BitcastV.get());
+  WeakVH WVH_Copy(WVH);
+  WeakVH WVH_Recreated(BitcastV.get());
+  BitcastV->replaceAllUsesWith(ConstantV);
+  EXPECT_EQ(ConstantV, WVH);
+  EXPECT_EQ(ConstantV, WVH_Copy);
+  EXPECT_EQ(ConstantV, WVH_Recreated);
+}
+
+TEST_F(ValueHandle, WeakVH_NullOnDeletion) {
+  WeakVH WVH(BitcastV.get());
+  WeakVH WVH_Copy(WVH);
+  WeakVH WVH_Recreated(BitcastV.get());
+  BitcastV.reset();
+  Value *null_value = NULL;
+  EXPECT_EQ(null_value, WVH);
+  EXPECT_EQ(null_value, WVH_Copy);
+  EXPECT_EQ(null_value, WVH_Recreated);
+}
+
+
+TEST_F(ValueHandle, AssertingVH_BasicOperation) {
+  AssertingVH<CastInst> AVH(BitcastV.get());
+  CastInst *implicit_to_exact_type = AVH;
+  (void)implicit_to_exact_type;  // Avoid warning.
+
+  AssertingVH<Value> GenericAVH(BitcastV.get());
+  EXPECT_EQ(BitcastV.get(), GenericAVH);
+  GenericAVH = ConstantV;
+  EXPECT_EQ(ConstantV, GenericAVH);
+
+  // Make sure I can call a method on the underlying CastInst.  It
+  // doesn't matter which method.
+  EXPECT_FALSE(AVH->mayWriteToMemory());
+  EXPECT_FALSE((*AVH).mayWriteToMemory());
+}
+
+TEST_F(ValueHandle, AssertingVH_Const) {
+  const CastInst *ConstBitcast = BitcastV.get();
+  AssertingVH<const CastInst> AVH(ConstBitcast);
+  const CastInst *implicit_to_exact_type = AVH;
+  (void)implicit_to_exact_type;  // Avoid warning.
+}
+
+TEST_F(ValueHandle, AssertingVH_Comparisons) {
+  AssertingVH<Value> BitcastAVH(BitcastV.get());
+  AssertingVH<Value> ConstantAVH(ConstantV);
+
+  EXPECT_TRUE(BitcastAVH == BitcastAVH);
+  EXPECT_TRUE(BitcastV.get() == BitcastAVH);
+  EXPECT_TRUE(BitcastAVH == BitcastV.get());
+  EXPECT_FALSE(BitcastAVH == ConstantAVH);
+
+  EXPECT_TRUE(BitcastAVH != ConstantAVH);
+  EXPECT_TRUE(BitcastV.get() != ConstantAVH);
+  EXPECT_TRUE(BitcastAVH != ConstantV);
+  EXPECT_FALSE(BitcastAVH != BitcastAVH);
+
+  // Cast to Value* so comparisons work.
+  Value *BV = BitcastV.get();
+  Value *CV = ConstantV;
+  EXPECT_EQ(BV < CV, BitcastAVH < ConstantAVH);
+  EXPECT_EQ(BV <= CV, BitcastAVH <= ConstantAVH);
+  EXPECT_EQ(BV > CV, BitcastAVH > ConstantAVH);
+  EXPECT_EQ(BV >= CV, BitcastAVH >= ConstantAVH);
+
+  EXPECT_EQ(BV < CV, BitcastV.get() < ConstantAVH);
+  EXPECT_EQ(BV <= CV, BitcastV.get() <= ConstantAVH);
+  EXPECT_EQ(BV > CV, BitcastV.get() > ConstantAVH);
+  EXPECT_EQ(BV >= CV, BitcastV.get() >= ConstantAVH);
+
+  EXPECT_EQ(BV < CV, BitcastAVH < ConstantV);
+  EXPECT_EQ(BV <= CV, BitcastAVH <= ConstantV);
+  EXPECT_EQ(BV > CV, BitcastAVH > ConstantV);
+  EXPECT_EQ(BV >= CV, BitcastAVH >= ConstantV);
+}
+
+TEST_F(ValueHandle, AssertingVH_DoesNotFollowRAUW) {
+  AssertingVH<Value> AVH(BitcastV.get());
+  BitcastV->replaceAllUsesWith(ConstantV);
+  EXPECT_EQ(BitcastV.get(), AVH);
+}
+
+#ifdef NDEBUG
+
+TEST_F(ValueHandle, AssertingVH_ReducesToPointer) {
+  EXPECT_EQ(sizeof(CastInst *), sizeof(AssertingVH<CastInst>));
+}
+
+#else  // !NDEBUG
+
+#ifdef GTEST_HAS_DEATH_TEST
+
+TEST_F(ValueHandle, AssertingVH_Asserts) {
+  AssertingVH<Value> AVH(BitcastV.get());
+  EXPECT_DEATH({BitcastV.reset();},
+               "An asserting value handle still pointed to this value!");
+  AssertingVH<Value> Copy(AVH);
+  AVH = NULL;
+  EXPECT_DEATH({BitcastV.reset();},
+               "An asserting value handle still pointed to this value!");
+  Copy = NULL;
+  BitcastV.reset();
+}
+
+#endif  // GTEST_HAS_DEATH_TEST
+
+#endif  // NDEBUG
+
+TEST_F(ValueHandle, CallbackVH_BasicOperation) {
+  ConcreteCallbackVH CVH(BitcastV.get());
+  EXPECT_EQ(BitcastV.get(), CVH);
+  CVH = ConstantV;
+  EXPECT_EQ(ConstantV, CVH);
+
+  // Make sure I can call a method on the underlying Value.  It
+  // doesn't matter which method.
+  EXPECT_EQ(Type::getInt32Ty(getGlobalContext()), CVH->getType());
+  EXPECT_EQ(Type::getInt32Ty(getGlobalContext()), (*CVH).getType());
+}
+
+TEST_F(ValueHandle, CallbackVH_Comparisons) {
+  ConcreteCallbackVH BitcastCVH(BitcastV.get());
+  ConcreteCallbackVH ConstantCVH(ConstantV);
+
+  EXPECT_TRUE(BitcastCVH == BitcastCVH);
+  EXPECT_TRUE(BitcastV.get() == BitcastCVH);
+  EXPECT_TRUE(BitcastCVH == BitcastV.get());
+  EXPECT_FALSE(BitcastCVH == ConstantCVH);
+
+  EXPECT_TRUE(BitcastCVH != ConstantCVH);
+  EXPECT_TRUE(BitcastV.get() != ConstantCVH);
+  EXPECT_TRUE(BitcastCVH != ConstantV);
+  EXPECT_FALSE(BitcastCVH != BitcastCVH);
+
+  // Cast to Value* so comparisons work.
+  Value *BV = BitcastV.get();
+  Value *CV = ConstantV;
+  EXPECT_EQ(BV < CV, BitcastCVH < ConstantCVH);
+  EXPECT_EQ(BV <= CV, BitcastCVH <= ConstantCVH);
+  EXPECT_EQ(BV > CV, BitcastCVH > ConstantCVH);
+  EXPECT_EQ(BV >= CV, BitcastCVH >= ConstantCVH);
+
+  EXPECT_EQ(BV < CV, BitcastV.get() < ConstantCVH);
+  EXPECT_EQ(BV <= CV, BitcastV.get() <= ConstantCVH);
+  EXPECT_EQ(BV > CV, BitcastV.get() > ConstantCVH);
+  EXPECT_EQ(BV >= CV, BitcastV.get() >= ConstantCVH);
+
+  EXPECT_EQ(BV < CV, BitcastCVH < ConstantV);
+  EXPECT_EQ(BV <= CV, BitcastCVH <= ConstantV);
+  EXPECT_EQ(BV > CV, BitcastCVH > ConstantV);
+  EXPECT_EQ(BV >= CV, BitcastCVH >= ConstantV);
+}
+
+TEST_F(ValueHandle, CallbackVH_CallbackOnDeletion) {
+  class RecordingVH : public CallbackVH {
+  public:
+    int DeletedCalls;
+    int AURWCalls;
+
+    RecordingVH() : DeletedCalls(0), AURWCalls(0) {}
+    RecordingVH(Value *V) : CallbackVH(V), DeletedCalls(0), AURWCalls(0) {}
+
+  private:
+    virtual void deleted() { DeletedCalls++; CallbackVH::deleted(); }
+    virtual void allUsesReplacedWith(Value *) { AURWCalls++; }
+  };
+
+  RecordingVH RVH;
+  RVH = BitcastV.get();
+  EXPECT_EQ(0, RVH.DeletedCalls);
+  EXPECT_EQ(0, RVH.AURWCalls);
+  BitcastV.reset();
+  EXPECT_EQ(1, RVH.DeletedCalls);
+  EXPECT_EQ(0, RVH.AURWCalls);
+}
+
+TEST_F(ValueHandle, CallbackVH_CallbackOnRAUW) {
+  class RecordingVH : public CallbackVH {
+  public:
+    int DeletedCalls;
+    Value *AURWArgument;
+
+    RecordingVH() : DeletedCalls(0), AURWArgument(NULL) {}
+    RecordingVH(Value *V)
+      : CallbackVH(V), DeletedCalls(0), AURWArgument(NULL) {}
+
+  private:
+    virtual void deleted() { DeletedCalls++; CallbackVH::deleted(); }
+    virtual void allUsesReplacedWith(Value *new_value) {
+      EXPECT_EQ(NULL, AURWArgument);
+      AURWArgument = new_value;
+    }
+  };
+
+  RecordingVH RVH;
+  RVH = BitcastV.get();
+  EXPECT_EQ(0, RVH.DeletedCalls);
+  EXPECT_EQ(NULL, RVH.AURWArgument);
+  BitcastV->replaceAllUsesWith(ConstantV);
+  EXPECT_EQ(0, RVH.DeletedCalls);
+  EXPECT_EQ(ConstantV, RVH.AURWArgument);
+}
+
+TEST_F(ValueHandle, CallbackVH_DeletionCanRAUW) {
+  class RecoveringVH : public CallbackVH {
+  public:
+    int DeletedCalls;
+    Value *AURWArgument;
+    LLVMContext *Context;
+
+    RecoveringVH() : DeletedCalls(0), AURWArgument(NULL), 
+                     Context(&getGlobalContext()) {}
+    RecoveringVH(Value *V)
+      : CallbackVH(V), DeletedCalls(0), AURWArgument(NULL), 
+        Context(&getGlobalContext()) {}
+
+  private:
+    virtual void deleted() {
+      getValPtr()->replaceAllUsesWith(Constant::getNullValue(Type::getInt32Ty(getGlobalContext())));
+      setValPtr(NULL);
+    }
+    virtual void allUsesReplacedWith(Value *new_value) {
+      ASSERT_TRUE(NULL != getValPtr());
+      EXPECT_EQ(1U, getValPtr()->getNumUses());
+      EXPECT_EQ(NULL, AURWArgument);
+      AURWArgument = new_value;
+    }
+  };
+
+  // Normally, if a value has uses, deleting it will crash.  However, we can use
+  // a CallbackVH to remove the uses before the check for no uses.
+  RecoveringVH RVH;
+  RVH = BitcastV.get();
+  std::auto_ptr<BinaryOperator> BitcastUser(
+    BinaryOperator::CreateAdd(RVH, 
+                              Constant::getNullValue(Type::getInt32Ty(getGlobalContext()))));
+  EXPECT_EQ(BitcastV.get(), BitcastUser->getOperand(0));
+  BitcastV.reset();  // Would crash without the ValueHandler.
+  EXPECT_EQ(Constant::getNullValue(Type::getInt32Ty(getGlobalContext())), RVH.AURWArgument);
+  EXPECT_EQ(Constant::getNullValue(Type::getInt32Ty(getGlobalContext())),
+            BitcastUser->getOperand(0));
+}
+
+TEST_F(ValueHandle, DestroyingOtherVHOnSameValueDoesntBreakIteration) {
+  // When a CallbackVH modifies other ValueHandles in its callbacks,
+  // that shouldn't interfere with non-modified ValueHandles receiving
+  // their appropriate callbacks.
+  //
+  // We create the active CallbackVH in the middle of a palindromic
+  // arrangement of other VHs so that the bad behavior would be
+  // triggered in whichever order callbacks run.
+
+  class DestroyingVH : public CallbackVH {
+  public:
+    OwningPtr<WeakVH> ToClear[2];
+    DestroyingVH(Value *V) {
+      ToClear[0].reset(new WeakVH(V));
+      setValPtr(V);
+      ToClear[1].reset(new WeakVH(V));
+    }
+    virtual void deleted() {
+      ToClear[0].reset();
+      ToClear[1].reset();
+      CallbackVH::deleted();
+    }
+    virtual void allUsesReplacedWith(Value *) {
+      ToClear[0].reset();
+      ToClear[1].reset();
+    }
+  };
+
+  {
+    WeakVH ShouldBeVisited1(BitcastV.get());
+    DestroyingVH C(BitcastV.get());
+    WeakVH ShouldBeVisited2(BitcastV.get());
+
+    BitcastV->replaceAllUsesWith(ConstantV);
+    EXPECT_EQ(ConstantV, static_cast<Value*>(ShouldBeVisited1));
+    EXPECT_EQ(ConstantV, static_cast<Value*>(ShouldBeVisited2));
+  }
+
+  {
+    WeakVH ShouldBeVisited1(BitcastV.get());
+    DestroyingVH C(BitcastV.get());
+    WeakVH ShouldBeVisited2(BitcastV.get());
+
+    BitcastV.reset();
+    EXPECT_EQ(NULL, static_cast<Value*>(ShouldBeVisited1));
+    EXPECT_EQ(NULL, static_cast<Value*>(ShouldBeVisited2));
+  }
+}
+
+TEST_F(ValueHandle, AssertingVHCheckedLast) {
+  // If a CallbackVH exists to clear out a group of AssertingVHs on
+  // Value deletion, the CallbackVH should get a chance to do so
+  // before the AssertingVHs assert.
+
+  class ClearingVH : public CallbackVH {
+  public:
+    AssertingVH<Value> *ToClear[2];
+    ClearingVH(Value *V,
+               AssertingVH<Value> &A0, AssertingVH<Value> &A1)
+      : CallbackVH(V) {
+      ToClear[0] = &A0;
+      ToClear[1] = &A1;
+    }
+
+    virtual void deleted() {
+      *ToClear[0] = 0;
+      *ToClear[1] = 0;
+      CallbackVH::deleted();
+    }
+  };
+
+  AssertingVH<Value> A1, A2;
+  A1 = BitcastV.get();
+  ClearingVH C(BitcastV.get(), A1, A2);
+  A2 = BitcastV.get();
+  // C.deleted() should run first, clearing the two AssertingVHs,
+  // which should prevent them from asserting.
+  BitcastV.reset();
+}
+
+}
diff --git a/final/unittests/Support/raw_ostream_test.cpp b/final/unittests/Support/raw_ostream_test.cpp
new file mode 100644
index 00000000000..2b797b43666
--- /dev/null
+++ b/final/unittests/Support/raw_ostream_test.cpp
@@ -0,0 +1,146 @@
+//===- llvm/unittest/Support/raw_ostream_test.cpp - raw_ostream tests -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+
+template<typename T> std::string printToString(const T &Value) {
+  std::string res;
+  llvm::raw_string_ostream(res) << Value;
+  return res;    
+}
+
+/// printToString - Print the given value to a stream which only has \arg
+/// BytesLeftInBuffer bytes left in the buffer. This is useful for testing edge
+/// cases in the buffer handling logic.
+template<typename T> std::string printToString(const T &Value,
+                                               unsigned BytesLeftInBuffer) {
+  // FIXME: This is relying on internal knowledge of how raw_ostream works to
+  // get the buffer position right.
+  SmallString<256> SVec;
+  assert(BytesLeftInBuffer < 256 && "Invalid buffer count!");
+  llvm::raw_svector_ostream OS(SVec);
+  unsigned StartIndex = 256 - BytesLeftInBuffer;
+  for (unsigned i = 0; i != StartIndex; ++i)
+    OS << '?';
+  OS << Value;
+  return OS.str().substr(StartIndex);
+}
+
+template<typename T> std::string printToStringUnbuffered(const T &Value) {
+  std::string res;
+  llvm::raw_string_ostream OS(res);
+  OS.SetUnbuffered();
+  OS << Value;
+  return res;
+}
+
+TEST(raw_ostreamTest, Types_Buffered) {
+  // Char
+  EXPECT_EQ("c", printToString('c'));
+
+  // String
+  EXPECT_EQ("hello", printToString("hello"));
+  EXPECT_EQ("hello", printToString(std::string("hello")));
+
+  // Int
+  EXPECT_EQ("0", printToString(0));
+  EXPECT_EQ("2425", printToString(2425));
+  EXPECT_EQ("-2425", printToString(-2425));
+
+  // Long long
+  EXPECT_EQ("0", printToString(0LL));
+  EXPECT_EQ("257257257235709", printToString(257257257235709LL));
+  EXPECT_EQ("-257257257235709", printToString(-257257257235709LL));
+
+  // Double
+  EXPECT_EQ("1.100000e+00", printToString(1.1));
+
+  // void*
+  EXPECT_EQ("0x0", printToString((void*) 0));
+  EXPECT_EQ("0xbeef", printToString((void*) 0xbeef));
+  EXPECT_EQ("0xdeadbeef", printToString((void*) 0xdeadbeef));
+
+  // Min and max.
+  EXPECT_EQ("18446744073709551615", printToString(UINT64_MAX));
+  EXPECT_EQ("-9223372036854775808", printToString(INT64_MIN));
+}
+
+TEST(raw_ostreamTest, Types_Unbuffered) {  
+  // Char
+  EXPECT_EQ("c", printToStringUnbuffered('c'));
+
+  // String
+  EXPECT_EQ("hello", printToStringUnbuffered("hello"));
+  EXPECT_EQ("hello", printToStringUnbuffered(std::string("hello")));
+
+  // Int
+  EXPECT_EQ("0", printToStringUnbuffered(0));
+  EXPECT_EQ("2425", printToStringUnbuffered(2425));
+  EXPECT_EQ("-2425", printToStringUnbuffered(-2425));
+
+  // Long long
+  EXPECT_EQ("0", printToStringUnbuffered(0LL));
+  EXPECT_EQ("257257257235709", printToStringUnbuffered(257257257235709LL));
+  EXPECT_EQ("-257257257235709", printToStringUnbuffered(-257257257235709LL));
+
+  // Double
+  EXPECT_EQ("1.100000e+00", printToStringUnbuffered(1.1));
+
+  // void*
+  EXPECT_EQ("0x0", printToStringUnbuffered((void*) 0));
+  EXPECT_EQ("0xbeef", printToStringUnbuffered((void*) 0xbeef));
+  EXPECT_EQ("0xdeadbeef", printToStringUnbuffered((void*) 0xdeadbeef));
+
+  // Min and max.
+  EXPECT_EQ("18446744073709551615", printToStringUnbuffered(UINT64_MAX));
+  EXPECT_EQ("-9223372036854775808", printToStringUnbuffered(INT64_MIN));
+}
+
+TEST(raw_ostreamTest, BufferEdge) {  
+  EXPECT_EQ("1.20", printToString(format("%.2f", 1.2), 1));
+  EXPECT_EQ("1.20", printToString(format("%.2f", 1.2), 2));
+  EXPECT_EQ("1.20", printToString(format("%.2f", 1.2), 3));
+  EXPECT_EQ("1.20", printToString(format("%.2f", 1.2), 4));
+  EXPECT_EQ("1.20", printToString(format("%.2f", 1.2), 10));
+}
+
+TEST(raw_ostreamTest, TinyBuffer) {
+  std::string Str;
+  raw_string_ostream OS(Str);
+  OS.SetBufferSize(1);
+  OS << "hello";
+  OS << 1;
+  OS << 'w' << 'o' << 'r' << 'l' << 'd';
+  EXPECT_EQ("hello1world", OS.str());
+}
+
+TEST(raw_ostreamTest, WriteEscaped) {
+  std::string Str;
+
+  Str = "";
+  raw_string_ostream(Str).write_escaped("hi");
+  EXPECT_EQ("hi", Str);
+
+  Str = "";
+  raw_string_ostream(Str).write_escaped("\\\t\n\"");
+  EXPECT_EQ("\\\\\\t\\n\\\"", Str);
+
+  Str = "";
+  raw_string_ostream(Str).write_escaped("\1\10\200");
+  EXPECT_EQ("\\001\\010\\200", Str);
+}
+
+}
diff --git a/final/unittests/Transforms/Makefile b/final/unittests/Transforms/Makefile
new file mode 100644
index 00000000000..599b18a057d
--- /dev/null
+++ b/final/unittests/Transforms/Makefile
@@ -0,0 +1,17 @@
+##===- unittests/Transforms/Makefile -----------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+
+PARALLEL_DIRS = Utils
+
+include $(LEVEL)/Makefile.common
+
+clean::
+	$(Verb) $(RM) -f *Tests
diff --git a/final/unittests/Transforms/Utils/Cloning.cpp b/final/unittests/Transforms/Utils/Cloning.cpp
new file mode 100644
index 00000000000..b65ac34dacd
--- /dev/null
+++ b/final/unittests/Transforms/Utils/Cloning.cpp
@@ -0,0 +1,141 @@
+//===- Cloning.cpp - Unit tests for the Cloner ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/Argument.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+class CloneInstruction : public ::testing::Test {
+protected:
+  virtual void SetUp() {
+    V = NULL;
+  }
+
+  template <typename T>
+  T *clone(T *V1) {
+    Value *V2 = V1->clone();
+    Orig.insert(V1);
+    Clones.insert(V2);
+    return cast<T>(V2);
+  }
+
+  void eraseClones() {
+    DeleteContainerPointers(Clones);
+  }
+
+  virtual void TearDown() {
+    eraseClones();
+    DeleteContainerPointers(Orig);
+    delete V;
+  }
+
+  SmallPtrSet<Value *, 4> Orig;   // Erase on exit
+  SmallPtrSet<Value *, 4> Clones; // Erase in eraseClones
+
+  LLVMContext context;
+  Value *V;
+};
+
+TEST_F(CloneInstruction, OverflowBits) {
+  V = new Argument(Type::getInt32Ty(context));
+
+  BinaryOperator *Add = BinaryOperator::Create(Instruction::Add, V, V);
+  BinaryOperator *Sub = BinaryOperator::Create(Instruction::Sub, V, V);
+  BinaryOperator *Mul = BinaryOperator::Create(Instruction::Mul, V, V);
+
+  BinaryOperator *AddClone = this->clone(Add);
+  BinaryOperator *SubClone = this->clone(Sub);
+  BinaryOperator *MulClone = this->clone(Mul);
+
+  EXPECT_FALSE(AddClone->hasNoUnsignedWrap());
+  EXPECT_FALSE(AddClone->hasNoSignedWrap());
+  EXPECT_FALSE(SubClone->hasNoUnsignedWrap());
+  EXPECT_FALSE(SubClone->hasNoSignedWrap());
+  EXPECT_FALSE(MulClone->hasNoUnsignedWrap());
+  EXPECT_FALSE(MulClone->hasNoSignedWrap());
+
+  eraseClones();
+
+  Add->setHasNoUnsignedWrap();
+  Sub->setHasNoUnsignedWrap();
+  Mul->setHasNoUnsignedWrap();
+
+  AddClone = this->clone(Add);
+  SubClone = this->clone(Sub);
+  MulClone = this->clone(Mul);
+
+  EXPECT_TRUE(AddClone->hasNoUnsignedWrap());
+  EXPECT_FALSE(AddClone->hasNoSignedWrap());
+  EXPECT_TRUE(SubClone->hasNoUnsignedWrap());
+  EXPECT_FALSE(SubClone->hasNoSignedWrap());
+  EXPECT_TRUE(MulClone->hasNoUnsignedWrap());
+  EXPECT_FALSE(MulClone->hasNoSignedWrap());
+
+  eraseClones();
+
+  Add->setHasNoSignedWrap();
+  Sub->setHasNoSignedWrap();
+  Mul->setHasNoSignedWrap();
+
+  AddClone = this->clone(Add);
+  SubClone = this->clone(Sub);
+  MulClone = this->clone(Mul);
+
+  EXPECT_TRUE(AddClone->hasNoUnsignedWrap());
+  EXPECT_TRUE(AddClone->hasNoSignedWrap());
+  EXPECT_TRUE(SubClone->hasNoUnsignedWrap());
+  EXPECT_TRUE(SubClone->hasNoSignedWrap());
+  EXPECT_TRUE(MulClone->hasNoUnsignedWrap());
+  EXPECT_TRUE(MulClone->hasNoSignedWrap());
+
+  eraseClones();
+
+  Add->setHasNoUnsignedWrap(false);
+  Sub->setHasNoUnsignedWrap(false);
+  Mul->setHasNoUnsignedWrap(false);
+
+  AddClone = this->clone(Add);
+  SubClone = this->clone(Sub);
+  MulClone = this->clone(Mul);
+
+  EXPECT_FALSE(AddClone->hasNoUnsignedWrap());
+  EXPECT_TRUE(AddClone->hasNoSignedWrap());
+  EXPECT_FALSE(SubClone->hasNoUnsignedWrap());
+  EXPECT_TRUE(SubClone->hasNoSignedWrap());
+  EXPECT_FALSE(MulClone->hasNoUnsignedWrap());
+  EXPECT_TRUE(MulClone->hasNoSignedWrap());
+}
+
+TEST_F(CloneInstruction, Inbounds) {
+  V = new Argument(Type::getInt32PtrTy(context));
+
+  Constant *Z = Constant::getNullValue(Type::getInt32Ty(context));
+  std::vector<Value *> ops;
+  ops.push_back(Z);
+  GetElementPtrInst *GEP = GetElementPtrInst::Create(V, ops.begin(), ops.end());
+  EXPECT_FALSE(this->clone(GEP)->isInBounds());
+
+  GEP->setIsInBounds();
+  EXPECT_TRUE(this->clone(GEP)->isInBounds());
+}
+
+TEST_F(CloneInstruction, Exact) {
+  V = new Argument(Type::getInt32Ty(context));
+
+  BinaryOperator *SDiv = BinaryOperator::Create(Instruction::SDiv, V, V);
+  EXPECT_FALSE(this->clone(SDiv)->isExact());
+
+  SDiv->setIsExact(true);
+  EXPECT_TRUE(this->clone(SDiv)->isExact());
+}
diff --git a/final/unittests/Transforms/Utils/Local.cpp b/final/unittests/Transforms/Utils/Local.cpp
new file mode 100644
index 00000000000..e0322b37d33
--- /dev/null
+++ b/final/unittests/Transforms/Utils/Local.cpp
@@ -0,0 +1,60 @@
+//===- Local.cpp - Unit tests for Local -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/IRBuilder.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+TEST(Local, RecursivelyDeleteDeadPHINodes) {
+  LLVMContext &C(getGlobalContext());
+
+  IRBuilder<> builder(C);
+
+  // Make blocks
+  BasicBlock *bb0 = BasicBlock::Create(C);
+  BasicBlock *bb1 = BasicBlock::Create(C);
+
+  builder.SetInsertPoint(bb0);
+  PHINode    *phi = builder.CreatePHI(Type::getInt32Ty(C));
+  BranchInst *br0 = builder.CreateCondBr(builder.getTrue(), bb0, bb1);
+
+  builder.SetInsertPoint(bb1);
+  BranchInst *br1 = builder.CreateBr(bb0);
+
+  phi->addIncoming(phi, bb0);
+  phi->addIncoming(phi, bb1);
+
+  // The PHI will be removed
+  EXPECT_TRUE(RecursivelyDeleteDeadPHINode(phi));
+
+  // Make sure the blocks only contain the branches
+  EXPECT_EQ(&bb0->front(), br0);
+  EXPECT_EQ(&bb1->front(), br1);
+
+  builder.SetInsertPoint(bb0);
+  phi = builder.CreatePHI(Type::getInt32Ty(C));
+
+  EXPECT_TRUE(RecursivelyDeleteDeadPHINode(phi));
+
+  builder.SetInsertPoint(bb0);
+  phi = builder.CreatePHI(Type::getInt32Ty(C));
+  builder.CreateAdd(phi, phi);
+
+  EXPECT_TRUE(RecursivelyDeleteDeadPHINode(phi));
+
+  bb0->dropAllReferences();
+  bb1->dropAllReferences();
+  delete bb0;
+  delete bb1;
+}
diff --git a/final/unittests/Transforms/Utils/Makefile b/final/unittests/Transforms/Utils/Makefile
new file mode 100644
index 00000000000..fdf4be0e0ef
--- /dev/null
+++ b/final/unittests/Transforms/Utils/Makefile
@@ -0,0 +1,15 @@
+##===- unittests/Transforms/Utils/Makefile -----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+TESTNAME = Utils
+LINK_COMPONENTS := core support transformutils
+
+include $(LEVEL)/Makefile.config
+include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/final/unittests/VMCore/ConstantsTest.cpp b/final/unittests/VMCore/ConstantsTest.cpp
new file mode 100644
index 00000000000..8277584ba24
--- /dev/null
+++ b/final/unittests/VMCore/ConstantsTest.cpp
@@ -0,0 +1,122 @@
+//===- llvm/unittest/VMCore/ConstantsTest.cpp - Constants unit tests ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace {
+
+TEST(ConstantsTest, Integer_i1) {
+  const IntegerType* Int1 = IntegerType::get(getGlobalContext(), 1);
+  Constant* One = ConstantInt::get(Int1, 1, true);
+  Constant* Zero = ConstantInt::get(Int1, 0);
+  Constant* NegOne = ConstantInt::get(Int1, static_cast<uint64_t>(-1), true);
+  EXPECT_EQ(NegOne, ConstantInt::getSigned(Int1, -1));
+  Constant* Undef = UndefValue::get(Int1);
+
+  // Input:  @b = constant i1 add(i1 1 , i1 1)
+  // Output: @b = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getAdd(One, One));
+
+  // @c = constant i1 add(i1 -1, i1 1)
+  // @c = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getAdd(NegOne, One));
+
+  // @d = constant i1 add(i1 -1, i1 -1)
+  // @d = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getAdd(NegOne, NegOne));
+
+  // @e = constant i1 sub(i1 -1, i1 1)
+  // @e = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getSub(NegOne, One));
+
+  // @f = constant i1 sub(i1 1 , i1 -1)
+  // @f = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getSub(One, NegOne));
+
+  // @g = constant i1 sub(i1 1 , i1 1)
+  // @g = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getSub(One, One));
+
+  // @h = constant i1 shl(i1 1 , i1 1)  ; undefined
+  // @h = constant i1 undef
+  EXPECT_EQ(Undef, ConstantExpr::getShl(One, One));
+
+  // @i = constant i1 shl(i1 1 , i1 0)
+  // @i = constant i1 true
+  EXPECT_EQ(One, ConstantExpr::getShl(One, Zero));
+
+  // @j = constant i1 lshr(i1 1, i1 1)  ; undefined
+  // @j = constant i1 undef
+  EXPECT_EQ(Undef, ConstantExpr::getLShr(One, One));
+
+  // @m = constant i1 ashr(i1 1, i1 1)  ; undefined
+  // @m = constant i1 undef
+  EXPECT_EQ(Undef, ConstantExpr::getAShr(One, One));
+
+  // @n = constant i1 mul(i1 -1, i1 1)
+  // @n = constant i1 true
+  EXPECT_EQ(One, ConstantExpr::getMul(NegOne, One));
+
+  // @o = constant i1 sdiv(i1 -1, i1 1) ; overflow
+  // @o = constant i1 true
+  EXPECT_EQ(One, ConstantExpr::getSDiv(NegOne, One));
+
+  // @p = constant i1 sdiv(i1 1 , i1 -1); overflow
+  // @p = constant i1 true
+  EXPECT_EQ(One, ConstantExpr::getSDiv(One, NegOne));
+
+  // @q = constant i1 udiv(i1 -1, i1 1)
+  // @q = constant i1 true
+  EXPECT_EQ(One, ConstantExpr::getUDiv(NegOne, One));
+
+  // @r = constant i1 udiv(i1 1, i1 -1)
+  // @r = constant i1 true
+  EXPECT_EQ(One, ConstantExpr::getUDiv(One, NegOne));
+
+  // @s = constant i1 srem(i1 -1, i1 1) ; overflow
+  // @s = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getSRem(NegOne, One));
+
+  // @t = constant i1 urem(i1 -1, i1 1)
+  // @t = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getURem(NegOne, One));
+
+  // @u = constant i1 srem(i1  1, i1 -1) ; overflow
+  // @u = constant i1 false
+  EXPECT_EQ(Zero, ConstantExpr::getSRem(One, NegOne));
+}
+
+TEST(ConstantsTest, IntSigns) {
+  const IntegerType* Int8Ty = Type::getInt8Ty(getGlobalContext());
+  EXPECT_EQ(100, ConstantInt::get(Int8Ty, 100, false)->getSExtValue());
+  EXPECT_EQ(100, ConstantInt::get(Int8Ty, 100, true)->getSExtValue());
+  EXPECT_EQ(100, ConstantInt::getSigned(Int8Ty, 100)->getSExtValue());
+  EXPECT_EQ(-50, ConstantInt::get(Int8Ty, 206)->getSExtValue());
+  EXPECT_EQ(-50, ConstantInt::getSigned(Int8Ty, -50)->getSExtValue());
+  EXPECT_EQ(206U, ConstantInt::getSigned(Int8Ty, -50)->getZExtValue());
+
+  // Overflow is handled by truncation.
+  EXPECT_EQ(0x3b, ConstantInt::get(Int8Ty, 0x13b)->getSExtValue());
+}
+
+TEST(ConstantsTest, FP128Test) {
+  const Type *FP128Ty = Type::getFP128Ty(getGlobalContext());
+
+  const IntegerType *Int128Ty = Type::getIntNTy(getGlobalContext(), 128);
+  Constant *Zero128 = Constant::getNullValue(Int128Ty);
+  Constant *X = ConstantExpr::getUIToFP(Zero128, FP128Ty);
+  EXPECT_TRUE(isa<ConstantFP>(X));
+}
+
+}  // end anonymous namespace
+}  // end namespace llvm
diff --git a/final/unittests/VMCore/DerivedTypesTest.cpp b/final/unittests/VMCore/DerivedTypesTest.cpp
new file mode 100644
index 00000000000..9dea6ff2a90
--- /dev/null
+++ b/final/unittests/VMCore/DerivedTypesTest.cpp
@@ -0,0 +1,88 @@
+//===- llvm/unittest/VMCore/DerivedTypesTest.cpp - Types unit tests -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "../lib/VMCore/LLVMContextImpl.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Constants.h"
+#include "llvm/Support/ValueHandle.h"
+using namespace llvm;
+
+namespace {
+
+static void PR7658() {
+  LLVMContext ctx;
+  
+  WeakVH NullPtr;
+  PATypeHolder h1;
+  {
+    OpaqueType *o1 = OpaqueType::get(ctx);
+    PointerType *p1 = PointerType::get(o1, 0);
+    
+    std::vector<const Type *> t1;
+    t1.push_back(IntegerType::get(ctx, 32));
+    t1.push_back(p1);
+    NullPtr = ConstantPointerNull::get(p1);
+    OpaqueType *o2 = OpaqueType::get (ctx);
+    PointerType *p2 = PointerType::get (o2, 0);
+    t1.push_back(p2);
+    
+    
+    StructType *s1 = StructType::get(ctx, t1);
+    h1 = s1;
+    o1->refineAbstractTypeTo(s1);
+    o2->refineAbstractTypeTo(h1.get());  // h1 = { i32, \2*, \2* }
+  }
+  
+  
+  OpaqueType *o3 = OpaqueType::get(ctx);
+  PointerType *p3 = PointerType::get(o3, 0);  // p3 = opaque*
+  
+  std::vector<const Type *> t2;
+  t2.push_back(IntegerType::get(ctx, 32));
+  t2.push_back(p3);
+  
+  std::vector<Constant *> v2;
+  v2.push_back(ConstantInt::get(IntegerType::get(ctx, 32), 14));
+  v2.push_back(ConstantPointerNull::get(p3));
+  
+  OpaqueType *o4 = OpaqueType::get(ctx);
+  {
+    PointerType *p4 = PointerType::get(o4, 0);
+    t2.push_back(p4);
+    v2.push_back(ConstantPointerNull::get(p4));
+  }
+  
+  WeakVH CS = ConstantStruct::get(ctx, v2, false); // { i32 14, opaque* null, opaque* null}
+  
+  StructType *s2 = StructType::get(ctx, t2);
+  PATypeHolder h2(s2);
+  o3->refineAbstractTypeTo(s2);
+  o4->refineAbstractTypeTo(h2.get());
+}
+  
+
+TEST(OpaqueTypeTest, RegisterWithContext) {
+  LLVMContext C;
+  LLVMContextImpl *pImpl = C.pImpl;
+
+  // 1 refers to the AlwaysOpaqueTy allocated in the Context's constructor and
+  // destroyed in the destructor.
+  EXPECT_EQ(1u, pImpl->OpaqueTypes.size());
+  {
+    PATypeHolder Type = OpaqueType::get(C);
+    EXPECT_EQ(2u, pImpl->OpaqueTypes.size());
+  }
+  EXPECT_EQ(1u, pImpl->OpaqueTypes.size());
+  
+  PR7658();
+}
+
+}  // namespace
diff --git a/final/unittests/VMCore/InstructionsTest.cpp b/final/unittests/VMCore/InstructionsTest.cpp
new file mode 100644
index 00000000000..1d1127d863b
--- /dev/null
+++ b/final/unittests/VMCore/InstructionsTest.cpp
@@ -0,0 +1,111 @@
+//===- llvm/unittest/VMCore/InstructionsTest.cpp - Instructions unit tests ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/STLExtras.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace {
+
+TEST(InstructionsTest, ReturnInst) {
+  LLVMContext &C(getGlobalContext());
+
+  // test for PR6589
+  const ReturnInst* r0 = ReturnInst::Create(C);
+  EXPECT_EQ(r0->getNumOperands(), 0U);
+  EXPECT_EQ(r0->op_begin(), r0->op_end());
+
+  const IntegerType* Int1 = IntegerType::get(C, 1);
+  Constant* One = ConstantInt::get(Int1, 1, true);
+  const ReturnInst* r1 = ReturnInst::Create(C, One);
+  EXPECT_EQ(r1->getNumOperands(), 1U);
+  User::const_op_iterator b(r1->op_begin());
+  EXPECT_NE(b, r1->op_end());
+  EXPECT_EQ(*b, One);
+  EXPECT_EQ(r1->getOperand(0), One);
+  ++b;
+  EXPECT_EQ(b, r1->op_end());
+
+  // clean up
+  delete r0;
+  delete r1;
+}
+
+TEST(InstructionsTest, BranchInst) {
+  LLVMContext &C(getGlobalContext());
+
+  // Make a BasicBlocks
+  BasicBlock* bb0 = BasicBlock::Create(C);
+  BasicBlock* bb1 = BasicBlock::Create(C);
+
+  // Mandatory BranchInst
+  const BranchInst* b0 = BranchInst::Create(bb0);
+
+  EXPECT_TRUE(b0->isUnconditional());
+  EXPECT_FALSE(b0->isConditional());
+  EXPECT_EQ(b0->getNumSuccessors(), 1U);
+
+  // check num operands
+  EXPECT_EQ(b0->getNumOperands(), 1U);
+
+  EXPECT_NE(b0->op_begin(), b0->op_end());
+  EXPECT_EQ(llvm::next(b0->op_begin()), b0->op_end());
+
+  EXPECT_EQ(llvm::next(b0->op_begin()), b0->op_end());
+
+  const IntegerType* Int1 = IntegerType::get(C, 1);
+  Constant* One = ConstantInt::get(Int1, 1, true);
+
+  // Conditional BranchInst
+  BranchInst* b1 = BranchInst::Create(bb0, bb1, One);
+
+  EXPECT_FALSE(b1->isUnconditional());
+  EXPECT_TRUE(b1->isConditional());
+  EXPECT_EQ(b1->getNumSuccessors(), 2U);
+
+  // check num operands
+  EXPECT_EQ(b1->getNumOperands(), 3U);
+
+  User::const_op_iterator b(b1->op_begin());
+
+  // check COND
+  EXPECT_NE(b, b1->op_end());
+  EXPECT_EQ(*b, One);
+  EXPECT_EQ(b1->getOperand(0), One);
+  EXPECT_EQ(b1->getCondition(), One);
+  ++b;
+
+  // check ELSE
+  EXPECT_EQ(*b, bb1);
+  EXPECT_EQ(b1->getOperand(1), bb1);
+  EXPECT_EQ(b1->getSuccessor(1), bb1);
+  ++b;
+
+  // check THEN
+  EXPECT_EQ(*b, bb0);
+  EXPECT_EQ(b1->getOperand(2), bb0);
+  EXPECT_EQ(b1->getSuccessor(0), bb0);
+  ++b;
+
+  EXPECT_EQ(b, b1->op_end());
+
+  // clean up
+  delete b0;
+  delete b1;
+
+  delete bb0;
+  delete bb1;
+}
+
+}  // end anonymous namespace
+}  // end namespace llvm
diff --git a/final/unittests/VMCore/Makefile b/final/unittests/VMCore/Makefile
new file mode 100644
index 00000000000..1b2b69c6d60
--- /dev/null
+++ b/final/unittests/VMCore/Makefile
@@ -0,0 +1,15 @@
+##===- unittests/VMCore/Makefile ---------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TESTNAME = VMCore
+LINK_COMPONENTS := core support target ipa
+
+include $(LEVEL)/Makefile.config
+include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/final/unittests/VMCore/MetadataTest.cpp b/final/unittests/VMCore/MetadataTest.cpp
new file mode 100644
index 00000000000..942b8482325
--- /dev/null
+++ b/final/unittests/VMCore/MetadataTest.cpp
@@ -0,0 +1,145 @@
+//===- llvm/unittest/VMCore/Metadata.cpp - Metadata unit tests ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Metadata.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ValueHandle.h"
+using namespace llvm;
+
+namespace {
+
+class MetadataTest : public testing::Test {
+protected:
+  LLVMContext Context;
+};
+typedef MetadataTest MDStringTest;
+
+// Test that construction of MDString with different value produces different
+// MDString objects, even with the same string pointer and nulls in the string.
+TEST_F(MDStringTest, CreateDifferent) {
+  char x[3] = { 'f', 0, 'A' };
+  MDString *s1 = MDString::get(Context, StringRef(&x[0], 3));
+  x[2] = 'B';
+  MDString *s2 = MDString::get(Context, StringRef(&x[0], 3));
+  EXPECT_NE(s1, s2);
+}
+
+// Test that creation of MDStrings with the same string contents produces the
+// same MDString object, even with different pointers.
+TEST_F(MDStringTest, CreateSame) {
+  char x[4] = { 'a', 'b', 'c', 'X' };
+  char y[4] = { 'a', 'b', 'c', 'Y' };
+
+  MDString *s1 = MDString::get(Context, StringRef(&x[0], 3));
+  MDString *s2 = MDString::get(Context, StringRef(&y[0], 3));
+  EXPECT_EQ(s1, s2);
+}
+
+// Test that MDString prints out the string we fed it.
+TEST_F(MDStringTest, PrintingSimple) {
+  char *str = new char[13];
+  strncpy(str, "testing 1 2 3", 13);
+  MDString *s = MDString::get(Context, StringRef(str, 13));
+  strncpy(str, "aaaaaaaaaaaaa", 13);
+  delete[] str;
+
+  std::string Str;
+  raw_string_ostream oss(Str);
+  s->print(oss);
+  EXPECT_STREQ("metadata !\"testing 1 2 3\"", oss.str().c_str());
+}
+
+// Test printing of MDString with non-printable characters.
+TEST_F(MDStringTest, PrintingComplex) {
+  char str[5] = {0, '\n', '"', '\\', -1};
+  MDString *s = MDString::get(Context, StringRef(str+0, 5));
+  std::string Str;
+  raw_string_ostream oss(Str);
+  s->print(oss);
+  EXPECT_STREQ("metadata !\"\\00\\0A\\22\\5C\\FF\"", oss.str().c_str());
+}
+
+typedef MetadataTest MDNodeTest;
+
+// Test the two constructors, and containing other Constants.
+TEST_F(MDNodeTest, Simple) {
+  char x[3] = { 'a', 'b', 'c' };
+  char y[3] = { '1', '2', '3' };
+
+  MDString *s1 = MDString::get(Context, StringRef(&x[0], 3));
+  MDString *s2 = MDString::get(Context, StringRef(&y[0], 3));
+  ConstantInt *CI = ConstantInt::get(getGlobalContext(), APInt(8, 0));
+
+  std::vector<Value *> V;
+  V.push_back(s1);
+  V.push_back(CI);
+  V.push_back(s2);
+
+  MDNode *n1 = MDNode::get(Context, &V[0], 3);
+  Value *const c1 = n1;
+  MDNode *n2 = MDNode::get(Context, &c1, 1);
+  MDNode *n3 = MDNode::get(Context, &V[0], 3);
+  EXPECT_NE(n1, n2);
+#ifdef ENABLE_MDNODE_UNIQUING
+  EXPECT_EQ(n1, n3);
+#else
+  (void) n3;
+#endif
+
+  EXPECT_EQ(3u, n1->getNumOperands());
+  EXPECT_EQ(s1, n1->getOperand(0));
+  EXPECT_EQ(CI, n1->getOperand(1));
+  EXPECT_EQ(s2, n1->getOperand(2));
+
+  EXPECT_EQ(1u, n2->getNumOperands());
+  EXPECT_EQ(n1, n2->getOperand(0));
+}
+
+TEST_F(MDNodeTest, Delete) {
+  Constant *C = ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 1);
+  Instruction *I = new BitCastInst(C, Type::getInt32Ty(getGlobalContext()));
+
+  Value *const V = I;
+  MDNode *n = MDNode::get(Context, &V, 1);
+  WeakVH wvh = n;
+
+  EXPECT_EQ(n, wvh);
+
+  delete I;
+}
+
+TEST(NamedMDNodeTest, Search) {
+  LLVMContext Context;
+  Constant *C = ConstantInt::get(Type::getInt32Ty(Context), 1);
+  Constant *C2 = ConstantInt::get(Type::getInt32Ty(Context), 2);
+
+  Value *const V = C;
+  Value *const V2 = C2;
+  MDNode *n = MDNode::get(Context, &V, 1);
+  MDNode *n2 = MDNode::get(Context, &V2, 1);
+
+  Module M("MyModule", Context);
+  const char *Name = "llvm.NMD1";
+  NamedMDNode *NMD = M.getOrInsertNamedMetadata(Name);
+  NMD->addOperand(n);
+  NMD->addOperand(n2);
+
+  std::string Str;
+  raw_string_ostream oss(Str);
+  NMD->print(oss);
+  EXPECT_STREQ("!llvm.NMD1 = !{!0, !1}\n",
+               oss.str().c_str());
+}
+}
diff --git a/final/unittests/VMCore/PassManagerTest.cpp b/final/unittests/VMCore/PassManagerTest.cpp
new file mode 100644
index 00000000000..0073751e4cd
--- /dev/null
+++ b/final/unittests/VMCore/PassManagerTest.cpp
@@ -0,0 +1,548 @@
+//===- llvm/unittest/VMCore/PassManager.cpp - Constants unit tests ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Module.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/CallGraphSCCPass.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Constants.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Function.h"
+#include "llvm/CallingConv.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/Instructions.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/PassManager.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Assembly/PrintModulePass.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace llvm {
+  void initializeModuleNDMPass(PassRegistry&);
+  void initializeFPassPass(PassRegistry&);
+  void initializeCGPassPass(PassRegistry&);
+  void initializeLPassPass(PassRegistry&);
+  void initializeBPassPass(PassRegistry&);
+  
+  namespace {
+    // ND = no deps
+    // NM = no modifications
+    struct ModuleNDNM: public ModulePass {
+    public:
+      static char run;
+      static char ID;
+      ModuleNDNM() : ModulePass(ID) { }
+      virtual bool runOnModule(Module &M) {
+        run++;
+        return false;
+      }
+      virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+        AU.setPreservesAll();
+      }
+    };
+    char ModuleNDNM::ID=0;
+    char ModuleNDNM::run=0;
+
+    struct ModuleNDM : public ModulePass {
+    public:
+      static char run;
+      static char ID;
+      ModuleNDM() : ModulePass(ID) {}
+      virtual bool runOnModule(Module &M) {
+        run++;
+        return true;
+      }
+    };
+    char ModuleNDM::ID=0;
+    char ModuleNDM::run=0;
+
+    struct ModuleNDM2 : public ModulePass {
+    public:
+      static char run;
+      static char ID;
+      ModuleNDM2() : ModulePass(ID) {}
+      virtual bool runOnModule(Module &M) {
+        run++;
+        return true;
+      }
+    };
+    char ModuleNDM2::ID=0;
+    char ModuleNDM2::run=0;
+
+    struct ModuleDNM : public ModulePass {
+    public:
+      static char run;
+      static char ID;
+      ModuleDNM() : ModulePass(ID) {
+        initializeModuleNDMPass(*PassRegistry::getPassRegistry());
+      }
+      virtual bool runOnModule(Module &M) {
+        EXPECT_TRUE(getAnalysisIfAvailable<TargetData>());
+        run++;
+        return false;
+      }
+      virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+        AU.addRequired<ModuleNDM>();
+        AU.setPreservesAll();
+      }
+    };
+    char ModuleDNM::ID=0;
+    char ModuleDNM::run=0;
+
+    template<typename P>
+    struct PassTestBase : public P {
+    protected:
+      static int runc;
+      static bool initialized;
+      static bool finalized;
+      int allocated;
+      void run() {
+        EXPECT_TRUE(initialized);
+        EXPECT_FALSE(finalized);
+        EXPECT_EQ(0, allocated);
+        allocated++;
+        runc++;
+      }
+    public:
+      static char ID;
+      static void finishedOK(int run) {
+        EXPECT_GT(runc, 0);
+        EXPECT_TRUE(initialized);
+        EXPECT_TRUE(finalized);
+        EXPECT_EQ(run, runc);
+      }
+      PassTestBase() : P(ID), allocated(0) {
+        initialized = false;
+        finalized = false;
+        runc = 0;
+      }
+
+      virtual void releaseMemory() {
+        EXPECT_GT(runc, 0);
+        EXPECT_GT(allocated, 0);
+        allocated--;
+      }
+    };
+    template<typename P> char PassTestBase<P>::ID;
+    template<typename P> int PassTestBase<P>::runc;
+    template<typename P> bool PassTestBase<P>::initialized;
+    template<typename P> bool PassTestBase<P>::finalized;
+
+    template<typename T, typename P>
+    struct PassTest : public PassTestBase<P> {
+    public:
+      virtual bool doInitialization(T &t) {
+        EXPECT_FALSE(PassTestBase<P>::initialized);
+        PassTestBase<P>::initialized = true;
+        return false;
+      }
+      virtual bool doFinalization(T &t) {
+        EXPECT_FALSE(PassTestBase<P>::finalized);
+        PassTestBase<P>::finalized = true;
+        EXPECT_EQ(0, PassTestBase<P>::allocated);
+        return false;
+      }
+    };
+
+    struct CGPass : public PassTest<CallGraph, CallGraphSCCPass> {
+    public:
+      CGPass() {
+        initializeCGPassPass(*PassRegistry::getPassRegistry());
+      }
+      virtual bool runOnSCC(CallGraphSCC &SCMM) {
+        EXPECT_TRUE(getAnalysisIfAvailable<TargetData>());
+        run();
+        return false;
+      }
+    };
+
+    struct FPass : public PassTest<Module, FunctionPass> {
+    public:
+      virtual bool runOnFunction(Function &F) {
+        // FIXME: PR4112
+        // EXPECT_TRUE(getAnalysisIfAvailable<TargetData>());
+        run();
+        return false;
+      }
+    };
+
+    struct LPass : public PassTestBase<LoopPass> {
+    private:
+      static int initcount;
+      static int fincount;
+    public:
+      LPass() {
+        initializeLPassPass(*PassRegistry::getPassRegistry());
+        initcount = 0; fincount=0;
+        EXPECT_FALSE(initialized);
+      }
+      static void finishedOK(int run, int finalized) {
+        PassTestBase<LoopPass>::finishedOK(run);
+        EXPECT_EQ(run, initcount);
+        EXPECT_EQ(finalized, fincount);
+      }
+      virtual bool doInitialization(Loop* L, LPPassManager &LPM) {
+        initialized = true;
+        initcount++;
+        return false;
+      }
+      virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
+        EXPECT_TRUE(getAnalysisIfAvailable<TargetData>());
+        run();
+        return false;
+      }
+      virtual bool doFinalization() {
+        fincount++;
+        finalized = true;
+        return false;
+      }
+    };
+    int LPass::initcount=0;
+    int LPass::fincount=0;
+
+    struct BPass : public PassTestBase<BasicBlockPass> {
+    private:
+      static int inited;
+      static int fin;
+    public:
+      static void finishedOK(int run, int N) {
+        PassTestBase<BasicBlockPass>::finishedOK(run);
+        EXPECT_EQ(inited, N);
+        EXPECT_EQ(fin, N);
+      }
+      BPass() {
+        inited = 0;
+        fin = 0;
+      }
+      virtual bool doInitialization(Module &M) {
+        EXPECT_FALSE(initialized);
+        initialized = true;
+        return false;
+      }
+      virtual bool doInitialization(Function &F) {
+        inited++;
+        return false;
+      }
+      virtual bool runOnBasicBlock(BasicBlock &BB) {
+        EXPECT_TRUE(getAnalysisIfAvailable<TargetData>());
+        run();
+        return false;
+      }
+      virtual bool doFinalization(Function &F) {
+        fin++;
+        return false;
+      }
+      virtual bool doFinalization(Module &M) {
+        EXPECT_FALSE(finalized);
+        finalized = true;
+        EXPECT_EQ(0, allocated);
+        return false;
+      }
+    };
+    int BPass::inited=0;
+    int BPass::fin=0;
+
+    struct OnTheFlyTest: public ModulePass {
+    public:
+      static char ID;
+      OnTheFlyTest() : ModulePass(ID) {
+        initializeFPassPass(*PassRegistry::getPassRegistry());
+      }
+      virtual bool runOnModule(Module &M) {
+        EXPECT_TRUE(getAnalysisIfAvailable<TargetData>());
+        for (Module::iterator I=M.begin(),E=M.end(); I != E; ++I) {
+          Function &F = *I;
+          {
+            SCOPED_TRACE("Running on the fly function pass");
+            getAnalysis<FPass>(F);
+          }
+        }
+        return false;
+      }
+      virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+        AU.addRequired<FPass>();
+      }
+    };
+    char OnTheFlyTest::ID=0;
+
+    TEST(PassManager, RunOnce) {
+      Module M("test-once", getGlobalContext());
+      struct ModuleNDNM *mNDNM = new ModuleNDNM();
+      struct ModuleDNM *mDNM = new ModuleDNM();
+      struct ModuleNDM *mNDM = new ModuleNDM();
+      struct ModuleNDM2 *mNDM2 = new ModuleNDM2();
+
+      mNDM->run = mNDNM->run = mDNM->run = mNDM2->run = 0;
+
+      PassManager Passes;
+      Passes.add(new TargetData(&M));
+      Passes.add(mNDM2);
+      Passes.add(mNDM);
+      Passes.add(mNDNM);
+      Passes.add(mDNM);
+
+      Passes.run(M);
+      // each pass must be run exactly once, since nothing invalidates them
+      EXPECT_EQ(1, mNDM->run);
+      EXPECT_EQ(1, mNDNM->run);
+      EXPECT_EQ(1, mDNM->run);
+      EXPECT_EQ(1, mNDM2->run);
+    }
+
+    TEST(PassManager, ReRun) {
+      Module M("test-rerun", getGlobalContext());
+      struct ModuleNDNM *mNDNM = new ModuleNDNM();
+      struct ModuleDNM *mDNM = new ModuleDNM();
+      struct ModuleNDM *mNDM = new ModuleNDM();
+      struct ModuleNDM2 *mNDM2 = new ModuleNDM2();
+
+      mNDM->run = mNDNM->run = mDNM->run = mNDM2->run = 0;
+
+      PassManager Passes;
+      Passes.add(new TargetData(&M));
+      Passes.add(mNDM);
+      Passes.add(mNDNM);
+      Passes.add(mNDM2);// invalidates mNDM needed by mDNM
+      Passes.add(mDNM);
+
+      Passes.run(M);
+      // Some passes must be rerun because a pass that modified the
+      // module/function was run inbetween
+      EXPECT_EQ(2, mNDM->run);
+      EXPECT_EQ(1, mNDNM->run);
+      EXPECT_EQ(1, mNDM2->run);
+      EXPECT_EQ(1, mDNM->run);
+    }
+
+    Module* makeLLVMModule();
+
+    template<typename T>
+    void MemoryTestHelper(int run) {
+      OwningPtr<Module> M(makeLLVMModule());
+      T *P = new T();
+      PassManager Passes;
+      Passes.add(new TargetData(M.get()));
+      Passes.add(P);
+      Passes.run(*M);
+      T::finishedOK(run);
+    }
+
+    template<typename T>
+    void MemoryTestHelper(int run, int N) {
+      Module *M = makeLLVMModule();
+      T *P = new T();
+      PassManager Passes;
+      Passes.add(new TargetData(M));
+      Passes.add(P);
+      Passes.run(*M);
+      T::finishedOK(run, N);
+      delete M;
+    }
+
+    TEST(PassManager, Memory) {
+      // SCC#1: test1->test2->test3->test1
+      // SCC#2: test4
+      // SCC#3: indirect call node
+      {
+        SCOPED_TRACE("Callgraph pass");
+        MemoryTestHelper<CGPass>(3);
+      }
+
+      {
+        SCOPED_TRACE("Function pass");
+        MemoryTestHelper<FPass>(4);// 4 functions
+      }
+
+      {
+        SCOPED_TRACE("Loop pass");
+        MemoryTestHelper<LPass>(2, 1); //2 loops, 1 function
+      }
+      {
+        SCOPED_TRACE("Basic block pass");
+        MemoryTestHelper<BPass>(7, 4); //9 basic blocks
+      }
+
+    }
+
+    TEST(PassManager, MemoryOnTheFly) {
+      Module *M = makeLLVMModule();
+      {
+        SCOPED_TRACE("Running OnTheFlyTest");
+        struct OnTheFlyTest *O = new OnTheFlyTest();
+        PassManager Passes;
+        Passes.add(new TargetData(M));
+        Passes.add(O);
+        Passes.run(*M);
+
+        FPass::finishedOK(4);
+      }
+      delete M;
+    }
+
+    Module* makeLLVMModule() {
+      // Module Construction
+      Module* mod = new Module("test-mem", getGlobalContext());
+      mod->setDataLayout("e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
+                         "i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-"
+                         "a0:0:64-s0:64:64-f80:128:128");
+      mod->setTargetTriple("x86_64-unknown-linux-gnu");
+
+      // Type Definitions
+      std::vector<const Type*>FuncTy_0_args;
+      FunctionType* FuncTy_0 = FunctionType::get(
+        /*Result=*/IntegerType::get(getGlobalContext(), 32),
+        /*Params=*/FuncTy_0_args,
+        /*isVarArg=*/false);
+
+      std::vector<const Type*>FuncTy_2_args;
+      FuncTy_2_args.push_back(IntegerType::get(getGlobalContext(), 1));
+      FunctionType* FuncTy_2 = FunctionType::get(
+        /*Result=*/Type::getVoidTy(getGlobalContext()),
+        /*Params=*/FuncTy_2_args,
+        /*isVarArg=*/false);
+
+
+      // Function Declarations
+
+      Function* func_test1 = Function::Create(
+        /*Type=*/FuncTy_0,
+        /*Linkage=*/GlobalValue::ExternalLinkage,
+        /*Name=*/"test1", mod);
+      func_test1->setCallingConv(CallingConv::C);
+      AttrListPtr func_test1_PAL;
+      func_test1->setAttributes(func_test1_PAL);
+
+      Function* func_test2 = Function::Create(
+        /*Type=*/FuncTy_0,
+        /*Linkage=*/GlobalValue::ExternalLinkage,
+        /*Name=*/"test2", mod);
+      func_test2->setCallingConv(CallingConv::C);
+      AttrListPtr func_test2_PAL;
+      func_test2->setAttributes(func_test2_PAL);
+
+      Function* func_test3 = Function::Create(
+        /*Type=*/FuncTy_0,
+        /*Linkage=*/GlobalValue::ExternalLinkage,
+        /*Name=*/"test3", mod);
+      func_test3->setCallingConv(CallingConv::C);
+      AttrListPtr func_test3_PAL;
+      func_test3->setAttributes(func_test3_PAL);
+
+      Function* func_test4 = Function::Create(
+        /*Type=*/FuncTy_2,
+        /*Linkage=*/GlobalValue::ExternalLinkage,
+        /*Name=*/"test4", mod);
+      func_test4->setCallingConv(CallingConv::C);
+      AttrListPtr func_test4_PAL;
+      func_test4->setAttributes(func_test4_PAL);
+
+      // Global Variable Declarations
+
+
+      // Constant Definitions
+
+      // Global Variable Definitions
+
+      // Function Definitions
+
+      // Function: test1 (func_test1)
+      {
+
+        BasicBlock* label_entry = BasicBlock::Create(getGlobalContext(), "entry",func_test1,0);
+
+        // Block entry (label_entry)
+        CallInst* int32_3 = CallInst::Create(func_test2, "", label_entry);
+        int32_3->setCallingConv(CallingConv::C);
+        int32_3->setTailCall(false);AttrListPtr int32_3_PAL;
+        int32_3->setAttributes(int32_3_PAL);
+
+        ReturnInst::Create(getGlobalContext(), int32_3, label_entry);
+
+      }
+
+      // Function: test2 (func_test2)
+      {
+
+        BasicBlock* label_entry_5 = BasicBlock::Create(getGlobalContext(), "entry",func_test2,0);
+
+        // Block entry (label_entry_5)
+        CallInst* int32_6 = CallInst::Create(func_test3, "", label_entry_5);
+        int32_6->setCallingConv(CallingConv::C);
+        int32_6->setTailCall(false);AttrListPtr int32_6_PAL;
+        int32_6->setAttributes(int32_6_PAL);
+
+        ReturnInst::Create(getGlobalContext(), int32_6, label_entry_5);
+
+      }
+
+      // Function: test3 (func_test3)
+      {
+
+        BasicBlock* label_entry_8 = BasicBlock::Create(getGlobalContext(), "entry",func_test3,0);
+
+        // Block entry (label_entry_8)
+        CallInst* int32_9 = CallInst::Create(func_test1, "", label_entry_8);
+        int32_9->setCallingConv(CallingConv::C);
+        int32_9->setTailCall(false);AttrListPtr int32_9_PAL;
+        int32_9->setAttributes(int32_9_PAL);
+
+        ReturnInst::Create(getGlobalContext(), int32_9, label_entry_8);
+
+      }
+
+      // Function: test4 (func_test4)
+      {
+        Function::arg_iterator args = func_test4->arg_begin();
+        Value* int1_f = args++;
+        int1_f->setName("f");
+
+        BasicBlock* label_entry_11 = BasicBlock::Create(getGlobalContext(), "entry",func_test4,0);
+        BasicBlock* label_bb = BasicBlock::Create(getGlobalContext(), "bb",func_test4,0);
+        BasicBlock* label_bb1 = BasicBlock::Create(getGlobalContext(), "bb1",func_test4,0);
+        BasicBlock* label_return = BasicBlock::Create(getGlobalContext(), "return",func_test4,0);
+
+        // Block entry (label_entry_11)
+        BranchInst::Create(label_bb, label_entry_11);
+
+        // Block bb (label_bb)
+        BranchInst::Create(label_bb, label_bb1, int1_f, label_bb);
+
+        // Block bb1 (label_bb1)
+        BranchInst::Create(label_bb1, label_return, int1_f, label_bb1);
+
+        // Block return (label_return)
+        ReturnInst::Create(getGlobalContext(), label_return);
+
+      }
+      return mod;
+    }
+
+  }
+}
+
+INITIALIZE_PASS(ModuleNDM, "mndm", "mndm", false, false)
+INITIALIZE_PASS_BEGIN(CGPass, "cgp","cgp", false, false)
+INITIALIZE_AG_DEPENDENCY(CallGraph)
+INITIALIZE_PASS_END(CGPass, "cgp","cgp", false, false)
+INITIALIZE_PASS(FPass, "fp","fp", false, false)
+INITIALIZE_PASS_BEGIN(LPass, "lp","lp", false, false)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_END(LPass, "lp","lp", false, false)
+INITIALIZE_PASS(BPass, "bp","bp", false, false)
diff --git a/final/unittests/VMCore/ValueMapTest.cpp b/final/unittests/VMCore/ValueMapTest.cpp
new file mode 100644
index 00000000000..152e8eaaf1f
--- /dev/null
+++ b/final/unittests/VMCore/ValueMapTest.cpp
@@ -0,0 +1,294 @@
+//===- llvm/unittest/ADT/ValueMapTest.cpp - ValueMap unit tests -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ValueMap.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Config/config.h"
+
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+// Test fixture
+template<typename T>
+class ValueMapTest : public testing::Test {
+protected:
+  Constant *ConstantV;
+  OwningPtr<BitCastInst> BitcastV;
+  OwningPtr<BinaryOperator> AddV;
+
+  ValueMapTest() :
+    ConstantV(ConstantInt::get(Type::getInt32Ty(getGlobalContext()), 0)),
+    BitcastV(new BitCastInst(ConstantV, Type::getInt32Ty(getGlobalContext()))),
+    AddV(BinaryOperator::CreateAdd(ConstantV, ConstantV)) {
+  }
+};
+
+// Run everything on Value*, a subtype to make sure that casting works as
+// expected, and a const subtype to make sure we cast const correctly.
+typedef ::testing::Types<Value, Instruction, const Instruction> KeyTypes;
+TYPED_TEST_CASE(ValueMapTest, KeyTypes);
+
+TYPED_TEST(ValueMapTest, Null) {
+  ValueMap<TypeParam*, int> VM1;
+  VM1[NULL] = 7;
+  EXPECT_EQ(7, VM1.lookup(NULL));
+}
+
+TYPED_TEST(ValueMapTest, FollowsValue) {
+  ValueMap<TypeParam*, int> VM;
+  VM[this->BitcastV.get()] = 7;
+  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
+  EXPECT_EQ(0, VM.count(this->AddV.get()));
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  EXPECT_EQ(7, VM.lookup(this->AddV.get()));
+  EXPECT_EQ(0, VM.count(this->BitcastV.get()));
+  this->AddV.reset();
+  EXPECT_EQ(0, VM.count(this->AddV.get()));
+  EXPECT_EQ(0, VM.count(this->BitcastV.get()));
+  EXPECT_EQ(0U, VM.size());
+}
+
+TYPED_TEST(ValueMapTest, OperationsWork) {
+  ValueMap<TypeParam*, int> VM;
+  ValueMap<TypeParam*, int> VM2(16);  (void)VM2;
+  typename ValueMapConfig<TypeParam*>::ExtraData Data;
+  ValueMap<TypeParam*, int> VM3(Data, 16);  (void)VM3;
+  EXPECT_TRUE(VM.empty());
+
+  VM[this->BitcastV.get()] = 7;
+
+  // Find:
+  typename ValueMap<TypeParam*, int>::iterator I =
+    VM.find(this->BitcastV.get());
+  ASSERT_TRUE(I != VM.end());
+  EXPECT_EQ(this->BitcastV.get(), I->first);
+  EXPECT_EQ(7, I->second);
+  EXPECT_TRUE(VM.find(this->AddV.get()) == VM.end());
+
+  // Const find:
+  const ValueMap<TypeParam*, int> &CVM = VM;
+  typename ValueMap<TypeParam*, int>::const_iterator CI =
+    CVM.find(this->BitcastV.get());
+  ASSERT_TRUE(CI != CVM.end());
+  EXPECT_EQ(this->BitcastV.get(), CI->first);
+  EXPECT_EQ(7, CI->second);
+  EXPECT_TRUE(CVM.find(this->AddV.get()) == CVM.end());
+
+  // Insert:
+  std::pair<typename ValueMap<TypeParam*, int>::iterator, bool> InsertResult1 =
+    VM.insert(std::make_pair(this->AddV.get(), 3));
+  EXPECT_EQ(this->AddV.get(), InsertResult1.first->first);
+  EXPECT_EQ(3, InsertResult1.first->second);
+  EXPECT_TRUE(InsertResult1.second);
+  EXPECT_EQ(true, VM.count(this->AddV.get()));
+  std::pair<typename ValueMap<TypeParam*, int>::iterator, bool> InsertResult2 =
+    VM.insert(std::make_pair(this->AddV.get(), 5));
+  EXPECT_EQ(this->AddV.get(), InsertResult2.first->first);
+  EXPECT_EQ(3, InsertResult2.first->second);
+  EXPECT_FALSE(InsertResult2.second);
+
+  // Erase:
+  VM.erase(InsertResult2.first);
+  EXPECT_EQ(0U, VM.count(this->AddV.get()));
+  EXPECT_EQ(1U, VM.count(this->BitcastV.get()));
+  VM.erase(this->BitcastV.get());
+  EXPECT_EQ(0U, VM.count(this->BitcastV.get()));
+  EXPECT_EQ(0U, VM.size());
+
+  // Range insert:
+  SmallVector<std::pair<Instruction*, int>, 2> Elems;
+  Elems.push_back(std::make_pair(this->AddV.get(), 1));
+  Elems.push_back(std::make_pair(this->BitcastV.get(), 2));
+  VM.insert(Elems.begin(), Elems.end());
+  EXPECT_EQ(1, VM.lookup(this->AddV.get()));
+  EXPECT_EQ(2, VM.lookup(this->BitcastV.get()));
+}
+
+template<typename ExpectedType, typename VarType>
+void CompileAssertHasType(VarType) {
+  typedef char assert[is_same<ExpectedType, VarType>::value ? 1 : -1];
+}
+
+TYPED_TEST(ValueMapTest, Iteration) {
+  ValueMap<TypeParam*, int> VM;
+  VM[this->BitcastV.get()] = 2;
+  VM[this->AddV.get()] = 3;
+  size_t size = 0;
+  for (typename ValueMap<TypeParam*, int>::iterator I = VM.begin(), E = VM.end();
+       I != E; ++I) {
+    ++size;
+    std::pair<TypeParam*, int> value = *I; (void)value;
+    CompileAssertHasType<TypeParam*>(I->first);
+    if (I->second == 2) {
+      EXPECT_EQ(this->BitcastV.get(), I->first);
+      I->second = 5;
+    } else if (I->second == 3) {
+      EXPECT_EQ(this->AddV.get(), I->first);
+      I->second = 6;
+    } else {
+      ADD_FAILURE() << "Iterated through an extra value.";
+    }
+  }
+  EXPECT_EQ(2U, size);
+  EXPECT_EQ(5, VM[this->BitcastV.get()]);
+  EXPECT_EQ(6, VM[this->AddV.get()]);
+
+  size = 0;
+  // Cast to const ValueMap to avoid a bug in DenseMap's iterators.
+  const ValueMap<TypeParam*, int>& CVM = VM;
+  for (typename ValueMap<TypeParam*, int>::const_iterator I = CVM.begin(),
+         E = CVM.end(); I != E; ++I) {
+    ++size;
+    std::pair<TypeParam*, int> value = *I;  (void)value;
+    CompileAssertHasType<TypeParam*>(I->first);
+    if (I->second == 5) {
+      EXPECT_EQ(this->BitcastV.get(), I->first);
+    } else if (I->second == 6) {
+      EXPECT_EQ(this->AddV.get(), I->first);
+    } else {
+      ADD_FAILURE() << "Iterated through an extra value.";
+    }
+  }
+  EXPECT_EQ(2U, size);
+}
+
+TYPED_TEST(ValueMapTest, DefaultCollisionBehavior) {
+  // By default, we overwrite the old value with the replaced value.
+  ValueMap<TypeParam*, int> VM;
+  VM[this->BitcastV.get()] = 7;
+  VM[this->AddV.get()] = 9;
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  EXPECT_EQ(0, VM.count(this->BitcastV.get()));
+  EXPECT_EQ(9, VM.lookup(this->AddV.get()));
+}
+
+TYPED_TEST(ValueMapTest, ConfiguredCollisionBehavior) {
+  // TODO: Implement this when someone needs it.
+}
+
+template<typename KeyT>
+struct LockMutex : ValueMapConfig<KeyT> {
+  struct ExtraData {
+    sys::Mutex *M;
+    bool *CalledRAUW;
+    bool *CalledDeleted;
+  };
+  static void onRAUW(const ExtraData &Data, KeyT Old, KeyT New) {
+    *Data.CalledRAUW = true;
+    EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked.";
+  }
+  static void onDelete(const ExtraData &Data, KeyT Old) {
+    *Data.CalledDeleted = true;
+    EXPECT_FALSE(Data.M->tryacquire()) << "Mutex should already be locked.";
+  }
+  static sys::Mutex *getMutex(const ExtraData &Data) { return Data.M; }
+};
+#if ENABLE_THREADS
+TYPED_TEST(ValueMapTest, LocksMutex) {
+  sys::Mutex M(false);  // Not recursive.
+  bool CalledRAUW = false, CalledDeleted = false;
+  typename LockMutex<TypeParam*>::ExtraData Data =
+    {&M, &CalledRAUW, &CalledDeleted};
+  ValueMap<TypeParam*, int, LockMutex<TypeParam*> > VM(Data);
+  VM[this->BitcastV.get()] = 7;
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  this->AddV.reset();
+  EXPECT_TRUE(CalledRAUW);
+  EXPECT_TRUE(CalledDeleted);
+}
+#endif
+
+template<typename KeyT>
+struct NoFollow : ValueMapConfig<KeyT> {
+  enum { FollowRAUW = false };
+};
+
+TYPED_TEST(ValueMapTest, NoFollowRAUW) {
+  ValueMap<TypeParam*, int, NoFollow<TypeParam*> > VM;
+  VM[this->BitcastV.get()] = 7;
+  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
+  EXPECT_EQ(0, VM.count(this->AddV.get()));
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
+  EXPECT_EQ(0, VM.lookup(this->AddV.get()));
+  this->AddV.reset();
+  EXPECT_EQ(7, VM.lookup(this->BitcastV.get()));
+  EXPECT_EQ(0, VM.lookup(this->AddV.get()));
+  this->BitcastV.reset();
+  EXPECT_EQ(0, VM.lookup(this->BitcastV.get()));
+  EXPECT_EQ(0, VM.lookup(this->AddV.get()));
+  EXPECT_EQ(0U, VM.size());
+}
+
+template<typename KeyT>
+struct CountOps : ValueMapConfig<KeyT> {
+  struct ExtraData {
+    int *Deletions;
+    int *RAUWs;
+  };
+
+  static void onRAUW(const ExtraData &Data, KeyT Old, KeyT New) {
+    ++*Data.RAUWs;
+  }
+  static void onDelete(const ExtraData &Data, KeyT Old) {
+    ++*Data.Deletions;
+  }
+};
+
+TYPED_TEST(ValueMapTest, CallsConfig) {
+  int Deletions = 0, RAUWs = 0;
+  typename CountOps<TypeParam*>::ExtraData Data = {&Deletions, &RAUWs};
+  ValueMap<TypeParam*, int, CountOps<TypeParam*> > VM(Data);
+  VM[this->BitcastV.get()] = 7;
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  EXPECT_EQ(0, Deletions);
+  EXPECT_EQ(1, RAUWs);
+  this->AddV.reset();
+  EXPECT_EQ(1, Deletions);
+  EXPECT_EQ(1, RAUWs);
+  this->BitcastV.reset();
+  EXPECT_EQ(1, Deletions);
+  EXPECT_EQ(1, RAUWs);
+}
+
+template<typename KeyT>
+struct ModifyingConfig : ValueMapConfig<KeyT> {
+  // We'll put a pointer here back to the ValueMap this key is in, so
+  // that we can modify it (and clobber *this) before the ValueMap
+  // tries to do the same modification.  In previous versions of
+  // ValueMap, that exploded.
+  typedef ValueMap<KeyT, int, ModifyingConfig<KeyT> > **ExtraData;
+
+  static void onRAUW(ExtraData Map, KeyT Old, KeyT New) {
+    (*Map)->erase(Old);
+  }
+  static void onDelete(ExtraData Map, KeyT Old) {
+    (*Map)->erase(Old);
+  }
+};
+TYPED_TEST(ValueMapTest, SurvivesModificationByConfig) {
+  ValueMap<TypeParam*, int, ModifyingConfig<TypeParam*> > *MapAddress;
+  ValueMap<TypeParam*, int, ModifyingConfig<TypeParam*> > VM(&MapAddress);
+  MapAddress = &VM;
+  // Now the ModifyingConfig can modify the Map inside a callback.
+  VM[this->BitcastV.get()] = 7;
+  this->BitcastV->replaceAllUsesWith(this->AddV.get());
+  EXPECT_FALSE(VM.count(this->BitcastV.get()));
+  EXPECT_FALSE(VM.count(this->AddV.get()));
+  VM[this->AddV.get()] = 7;
+  this->AddV.reset();
+  EXPECT_FALSE(VM.count(this->AddV.get()));
+}
+
+}
diff --git a/final/unittests/VMCore/VerifierTest.cpp b/final/unittests/VMCore/VerifierTest.cpp
new file mode 100644
index 00000000000..1924661200b
--- /dev/null
+++ b/final/unittests/VMCore/VerifierTest.cpp
@@ -0,0 +1,64 @@
+//===- llvm/unittest/VMCore/VerifierTest.cpp - Verifier unit tests --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Instructions.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Analysis/Verifier.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace {
+
+TEST(VerifierTest, Branch_i1) {
+  LLVMContext &C = getGlobalContext();
+  FunctionType *FTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg=*/false);
+  OwningPtr<Function> F(Function::Create(FTy, GlobalValue::ExternalLinkage));
+  BasicBlock *Entry = BasicBlock::Create(C, "entry", F.get());
+  BasicBlock *Exit = BasicBlock::Create(C, "exit", F.get());
+  ReturnInst::Create(C, Exit);
+
+  // To avoid triggering an assertion in BranchInst::Create, we first create
+  // a branch with an 'i1' condition ...
+
+  Constant *False = ConstantInt::getFalse(C);
+  BranchInst *BI = BranchInst::Create(Exit, Exit, False, Entry);
+
+  // ... then use setOperand to redirect it to a value of different type.
+
+  Constant *Zero32 = ConstantInt::get(IntegerType::get(C, 32), 0);
+  BI->setOperand(0, Zero32);
+
+  EXPECT_TRUE(verifyFunction(*F, ReturnStatusAction));
+}
+
+TEST(VerifierTest, AliasUnnamedAddr) {
+  LLVMContext &C = getGlobalContext();
+  Module M("M", C);
+  const Type *Ty = Type::getInt8Ty(C);
+  Constant *Init = Constant::getNullValue(Ty);
+  GlobalVariable *Aliasee = new GlobalVariable(M, Ty, true,
+                                               GlobalValue::ExternalLinkage,
+                                               Init, "foo");
+  GlobalAlias *GA = new GlobalAlias(Type::getInt8PtrTy(C),
+                                    GlobalValue::ExternalLinkage,
+                                    "bar", Aliasee, &M);
+  GA->setUnnamedAddr(true);
+  std::string Error;
+  EXPECT_TRUE(verifyModule(M, ReturnStatusAction, &Error));
+  EXPECT_TRUE(StringRef(Error).startswith("Alias cannot have unnamed_addr"));
+}
+}
+}
diff --git a/final/utils/CollectDebugInfoUsingLLDB.py b/final/utils/CollectDebugInfoUsingLLDB.py
new file mode 100755
index 00000000000..4dbd19afa62
--- /dev/null
+++ b/final/utils/CollectDebugInfoUsingLLDB.py
@@ -0,0 +1,182 @@
+#!/usr/bin/python
+
+#----------------------------------------------------------------------
+# 
+# Be sure to add the python path that points to the LLDB shared library.
+# On MacOSX csh, tcsh:
+#   setenv PYTHONPATH /Developer/Library/PrivateFrameworks/LLDB.framework/Resources/Python
+# On MacOSX sh, bash:
+#   export PYTHONPATH=/Developer/Library/PrivateFrameworks/LLDB.framework/Resources/Python
+#
+# This script collect debugging information using LLDB. This script is
+# used by TEST=dbg in llvm testsuite to measure quality of debug info in
+# optimized builds.
+#
+# Usage:
+# export PYTHONPATH=...
+# ./CollectDebugInfUsingLLDB.py program bp_file out_file
+#     program - Executable program with debug info.
+#     bp_file - Simple text file listing breakpoints.
+#               <absolute file name> <line number>
+#     out_file - Output file where the debug info will be emitted.
+#----------------------------------------------------------------------
+
+import lldb
+import os
+import sys
+import time
+
+# AlreadyPrintedValues - A place to keep track of recursive values.
+AlreadyPrintedValues = {}
+
+# ISAlreadyPrinted - Return true if value is already printed.
+def IsAlreadyPrinted(value_name):
+        if AlreadyPrintedValues.get(value_name) is None:
+                AlreadyPrintedValues[value_name] = 1
+                return False
+        return True
+
+
+# print_var_value - Print a variable's value.
+def print_var_value (v, file, frame):
+        if v.IsValid() == False:
+                return
+        if IsAlreadyPrinted(v.GetName()):
+                return
+        total_children = v.GetNumChildren()
+        if total_children > 0:
+            c = 0
+            while (c < total_children) :
+                    child = v.GetChildAtIndex(c)
+                    if child is None:
+                        file.write("None")
+                    else:
+                        if (child.GetName()) is None:
+                                file.write("None")
+                        else:
+                                file.write(child.GetName())
+                                file.write('=')
+                                print_var_value(child, file, frame)
+                                file.write(',')
+                    c = c + 1
+        else:
+            if v.GetValue(frame) is None:
+                file.write("None")
+            else:
+                file.write(v.GetValue(frame))
+
+# print_vars - Print variable values in output file.
+def print_vars (tag, vars, fname, line, file, frame, target, thread):
+    # disable this thread.
+    count = thread.GetStopReasonDataCount()
+    bid = 0
+    tid = 0
+    for i in range(count):
+        id = thread.GetStopReasonDataAtIndex(i)
+        bp = target.FindBreakpointByID(id)
+        if bp.IsValid():
+            if bp.IsEnabled() == True:
+                    bid = bp.GetID()
+                    tid = bp.GetThreadID()
+                    bp.SetEnabled(False)
+        else:
+            bp_loc = bp.FindLocationByID(thread.GetStopReasonDataAtIndex(i+1))
+            if bp_loc.IsValid():
+                bid = bp_loc.GetBreakPoint().GetID()
+                tid = bp_loc.ThreadGetID()
+                bp_loc.SetEnabled(False);
+
+    for i in range(vars.GetSize()):
+            v = vars.GetValueAtIndex(i)
+            if v.GetName() is not None:
+                    file.write(tag)
+                    file.write(fname)
+                    file.write(':')
+                    file.write(str(line))
+                    file.write(' ')
+                    file.write(str(tid))
+                    file.write(':')
+                    file.write(str(bid))
+                    file.write(' ')
+                    file.write(v.GetName())
+                    file.write(' ')
+                    AlreadyPrintedValues.clear()
+                    print_var_value (v, file, frame)
+                    file.write('\n')
+
+# set_breakpoints - set breakpoints as listed in input file.
+def set_breakpoints (target, breakpoint_filename, file):
+    f = open(breakpoint_filename, "r")
+    lines = f.readlines()
+    for l in range(len(lines)):
+        c = lines[l].split()
+        # print "setting break point - ", c
+        bp = target.BreakpointCreateByLocation (str(c[0]), int(c[1]))
+        file.write("#Breakpoint ")
+        file.write(str(c[0]))
+        file.write(':')
+        file.write(str(c[1]))
+        file.write(' ')
+        file.write(str(bp.GetThreadID()))
+        file.write(':')
+        file.write(str(bp.GetID()))
+        file.write('\n')
+    f.close()
+
+# stopeed_at_breakpoint - Return True if process is stopeed at a
+# breakpoint.
+def stopped_at_breakpoint (process):
+    if process.IsValid():
+        state = process.GetState()
+        if state == lldb.eStateStopped:
+                thread = process.GetThreadAtIndex(0)
+                if thread.IsValid():
+                        if thread.GetStopReason() == lldb.eStopReasonBreakpoint:
+                                return True
+    return False
+
+# Create a new debugger instance
+debugger = lldb.SBDebugger.Create()
+
+# When we step or continue, don't return from the function until the process 
+# stops. We do this by setting the async mode to false.
+debugger.SetAsync (False)
+
+# Create a target from a file and arch
+##print "Creating a target for '%s'" % sys.argv[1]
+
+target = debugger.CreateTargetWithFileAndArch (sys.argv[1], lldb.LLDB_ARCH_DEFAULT)
+
+if target.IsValid():
+    #print "target is valid"
+    file=open(str(sys.argv[3]), 'w')    
+    set_breakpoints (target, sys.argv[2], file)
+
+    # Launch the process. Since we specified synchronous mode, we won't return
+    # from this function until we hit the breakpoint at main
+    sberror = lldb.SBError()
+    process = target.Launch (None, None, os.ctermid(), os.ctermid(), os.ctermid(), None, 0, False, sberror)
+    # Make sure the launch went ok
+    while stopped_at_breakpoint(process):
+        thread = process.GetThreadAtIndex (0)
+        frame = thread.GetFrameAtIndex (0)
+        if frame.IsValid():
+            # #Print some simple frame info
+            ##print frame
+            #print "frame is valid"
+            function = frame.GetFunction()
+            if function.IsValid():
+                fname = function.GetMangledName()
+                if fname is None:
+                    fname = function.GetName()
+                #print "function : ",fname
+                line = frame.GetLineEntry().GetLine()
+                vars = frame.GetVariables(1,0,0,0)
+                print_vars ("#Argument ", vars, fname, line, file, frame, target, thread)
+                # vars = frame.GetVariables(0,1,0,0)
+                # print_vars ("#Variables ", vars, fname, line, file, frame, target, thread)
+
+        process.Continue()
+    file.close()
+
+lldb.SBDebugger.Terminate()
diff --git a/final/utils/CompareDebugInfo.py b/final/utils/CompareDebugInfo.py
new file mode 100755
index 00000000000..2cd647e43a8
--- /dev/null
+++ b/final/utils/CompareDebugInfo.py
@@ -0,0 +1,182 @@
+#!/usr/bin/python
+
+import os
+import sys
+
+DBG_OUTPUT_FILE="Output/" + sys.argv[1] + ".dbg.out"
+OPT_DBG_OUTPUT_FILE="Output/" + sys.argv[1] + ".dbg.opt.out"
+LOG_FILE="Output/" + sys.argv[1] + ".log"
+NATIVE_DBG_OUTPUT_FILE="Output/" + sys.argv[1] + ".native.dbg.out"
+NATIVE_OPT_DBG_OUTPUT_FILE="Output/" + sys.argv[1] + ".native.dbg.opt.out"
+NATIVE_LOG_FILE="Output/" + sys.argv[1] + ".native.log"
+REPORT_FILE="Output/" + sys.argv[1] + ".dbg.report.html"
+
+class BreakPoint:
+    def __init__(self, bp_name):
+        self.name = bp_name
+        self.values = {}
+        self.missing_args = []
+        self.matching_args = []
+        self.notmatching_args = []
+        self.missing_bp = False
+
+    def setMissing(self):
+        self.missing_bp = True
+
+    def getArgCount(self):
+        return len(self.values)
+
+    def getMissingArgCount(self):
+        if self.missing_bp == True:
+            return len(self.values)
+        return len(self.missing_args)
+
+    def getMatchingArgCount(self):
+        if self.missing_bp == True:
+            return 0
+        return len(self.matching_args)
+
+    def getNotMatchingArgCount(self):
+        if self.missing_bp == True:
+            return 0
+        return len(self.notmatching_args)
+
+    def recordArgument(self, arg_name, value):
+        self.values[arg_name] = value
+        
+    def __repr__(self):
+        print self.name
+        items = self.values.items()
+        for i in range(len(items)):
+            print items[i][0]," = ",items[i][1]
+        return ''
+
+    def compare_args(self, other, file):
+        myitems = self.values.items()
+        otheritems = other.values.items()
+        match = False
+        for i in range(len(myitems)):
+            if i >= len(otheritems):
+                match = True
+                self.missing_args.append(myitems[i][0])
+            elif cmp(myitems[i][1], otheritems[i][1]):
+                match = True
+                self.notmatching_args.append(myitems[i][0])
+            else:
+                self.matching_args.append(myitems[i][0])
+
+        self.print_list(self.matching_args, " Matching arguments ", file)
+        self.print_list(self.notmatching_args, " Not Matching arguments ", file)
+        self.print_list(self.missing_args, " Missing arguments ", file)
+        return match
+
+    def print_list(self, items, txt, pfile):
+        if len(items) == 0:
+            return
+        pfile.write(self.name)
+        pfile.write(txt)
+        for e in items:
+            pfile.write(e)
+            pfile.write(' ')
+        pfile.write('\n')
+
+def read_input(filename, dict):
+    f = open(filename, "r")
+    lines = f.readlines()
+    for l in range(len(lines)):
+        c = lines[l].split()
+        if c[0] == "#Breakpoint":
+            bp = dict.get(c[2])
+            if bp is None:
+                bp = BreakPoint(c[1])
+            dict[c[2]] = bp
+        if c[0] == "#Argument":
+            bp = dict.get(c[2])
+            if bp is None:
+                bp = BreakPoint(c[1])
+            dict[c[2]] = bp
+            bp.recordArgument(c[3], c[4])
+    return
+
+f1_breakpoints = {}
+read_input(DBG_OUTPUT_FILE, f1_breakpoints)
+f1_items = f1_breakpoints.items()
+
+f2_breakpoints = {}
+read_input(OPT_DBG_OUTPUT_FILE, f2_breakpoints)
+f2_items = f2_breakpoints.items()
+    
+f = open(LOG_FILE, "w")
+f.write("Log output\n")
+for f2bp in range(len(f2_items)):
+    id = f2_items[f2bp][0]
+    bp = f2_items[f2bp][1]
+    bp1 = f1_breakpoints.get(id)
+    if bp1 is None:
+        bp.setMissing()
+    else:
+        bp1.compare_args(bp,f)
+f.close()
+
+nf1_breakpoints = {}
+read_input(NATIVE_DBG_OUTPUT_FILE, nf1_breakpoints)
+nf1_items = nf1_breakpoints.items()
+
+nf2_breakpoints = {}
+read_input(NATIVE_OPT_DBG_OUTPUT_FILE, nf2_breakpoints)
+nf2_items = nf2_breakpoints.items()
+    
+nfl = open(NATIVE_LOG_FILE, "w")
+for nf2bp in range(len(nf2_items)):
+    id = nf2_items[nf2bp][0]
+    bp = nf2_items[nf2bp][1]
+    bp1 = nf1_breakpoints.get(id)
+    if bp1 is None:
+        bp.setMissing()
+    else:
+        bp1.compare_args(bp,nfl)
+nfl.close()
+
+f1_arg_count = 0
+f1_matching_arg_count = 0
+f1_notmatching_arg_count = 0
+f1_missing_arg_count = 0
+for idx in range(len(f1_items)):
+    bp = f1_items[idx][1]
+    f1_arg_count = f1_arg_count + bp.getArgCount()
+    f1_matching_arg_count = f1_matching_arg_count + bp.getMatchingArgCount()
+    f1_notmatching_arg_count = f1_notmatching_arg_count + bp.getNotMatchingArgCount()
+    f1_missing_arg_count = f1_missing_arg_count + bp.getMissingArgCount()
+
+nf1_arg_count = 0
+nf1_matching_arg_count = 0
+nf1_notmatching_arg_count = 0
+nf1_missing_arg_count = 0
+for idx in range(len(nf1_items)):
+    bp = nf1_items[idx][1]
+    nf1_arg_count = nf1_arg_count + bp.getArgCount()
+    nf1_matching_arg_count = nf1_matching_arg_count + bp.getMatchingArgCount()
+    nf1_notmatching_arg_count = nf1_notmatching_arg_count + bp.getNotMatchingArgCount()
+    nf1_missing_arg_count = nf1_missing_arg_count + bp.getMissingArgCount()
+
+rf = open(REPORT_FILE, "w")
+rf.write("<tr><td>")
+rf.write(str(sys.argv[1]))
+rf.write("</td><td>|</td><td>")
+rf.write(str(nf1_arg_count))
+rf.write("</td><td><b>")
+rf.write(str(nf1_matching_arg_count))
+rf.write("</b></td><td>")
+rf.write(str(nf1_notmatching_arg_count))
+rf.write("</td><td>")
+rf.write(str(nf1_missing_arg_count))
+rf.write("</td><td>|</td><td>")
+rf.write(str(f1_arg_count))
+rf.write("</td><td><b>")
+rf.write(str(f1_matching_arg_count))
+rf.write("</b></td><td>")
+rf.write(str(f1_notmatching_arg_count))
+rf.write("</td><td>")
+rf.write(str(f1_missing_arg_count))
+rf.write("\n")
+rf.close()
diff --git a/final/utils/DSAclean.py b/final/utils/DSAclean.py
new file mode 100755
index 00000000000..6c43357019e
--- /dev/null
+++ b/final/utils/DSAclean.py
@@ -0,0 +1,32 @@
+#! /usr/bin/python
+
+#changelog: 
+#10/13/2005b: replaced the # in tmp(.#*)* with alphanumeric and _, this will then remove
+#nodes such as %tmp.1.i and %tmp._i.3
+#10/13/2005: exntended to remove variables of the form %tmp(.#)* rather than just 
+#%tmp.#, i.e. it now will remove %tmp.12.3.15 etc, additionally fixed a spelling error in
+#the comments
+#10/12/2005: now it only removes nodes and edges for which the label is %tmp.# rather
+#than removing all lines for which the lable CONTAINS %tmp.#
+import re
+import sys
+if( len(sys.argv) < 3 ):
+	print 'usage is: ./DSAclean <dot_file_to_be_cleaned> <out_put_file>'
+	sys.exit(1)
+#get a file object
+input = open(sys.argv[1], 'r')
+output = open(sys.argv[2], 'w')
+#we'll get this one line at a time...while we could just put the whole thing in a string
+#it would kill old computers
+buffer = input.readline()
+while buffer != '':
+	if re.compile("label(\s*)=(\s*)\"\s%tmp(.\w*)*(\s*)\"").search(buffer):
+		#skip next line, write neither this line nor the next
+		buffer = input.readline()
+	else:
+		#this isn't a tmp Node, we can write it
+		output.write(buffer)
+	#prepare for the next iteration
+	buffer = input.readline()
+input.close()
+output.close()
diff --git a/final/utils/DSAextract.py b/final/utils/DSAextract.py
new file mode 100644
index 00000000000..134e9453fbb
--- /dev/null
+++ b/final/utils/DSAextract.py
@@ -0,0 +1,111 @@
+#! /usr/bin/python
+
+#this is a script to extract given named nodes from a dot file, with
+#the associated edges.  An edge is kept iff for edge x -> y
+# x and y are both nodes specified to be kept.
+
+#known issues: if a line contains '->' and is not an edge line
+#problems will occur.  If node labels do not begin with
+#Node this also will not work.  Since this is designed to work
+#on DSA dot output and not general dot files this is ok.
+#If you want to use this on other files rename the node labels
+#to Node[.*] with a script or something.  This also relies on
+#the length of a node name being 13 characters (as it is in all
+#DSA dot output files)
+
+#Note that the name of the node can be any substring of the actual
+#name in the dot file.  Thus if you say specify COLLAPSED
+#as a parameter this script will pull out all COLLAPSED
+#nodes in the file
+
+#Specifying escape characters in the name like \n also will not work, 
+#as Python
+#will make it \\n, I'm not really sure how to fix this
+
+#currently the script prints the names it is searching for
+#to STDOUT, so you can check to see if they are what you intend
+
+import re
+import string
+import sys
+
+
+if len(sys.argv) < 3:
+	print 'usage is ./DSAextract <dot_file_to_modify> \
+			<output_file> [list of nodes to extract]'
+
+#open the input file
+input = open(sys.argv[1], 'r')
+
+#construct a set of node names
+node_name_set = set()
+for name in sys.argv[3:]:
+	node_name_set |= set([name])
+
+#construct a list of compiled regular expressions from the 
+#node_name_set
+regexp_list = []
+for name in node_name_set:
+	regexp_list.append(re.compile(name))
+
+#used to see what kind of line we are on
+nodeexp = re.compile('Node')
+#used to check to see if the current line is an edge line
+arrowexp = re.compile('->')
+
+node_set = set()
+
+#read the file one line at a time
+buffer = input.readline()
+while buffer != '':
+	#filter out the unecessary checks on all the edge lines
+	if not arrowexp.search(buffer):
+		#check to see if this is a node we are looking for
+		for regexp in regexp_list:
+			#if this name is for the current node, add the dot variable name
+			#for the node (it will be Node(hex number)) to our set of nodes
+			if regexp.search(buffer):
+				node_set |= set([re.split('\s+',buffer,2)[1]])
+				break
+	buffer = input.readline()
+
+
+#test code
+#print '\n'
+
+print node_name_set
+
+#print node_set
+	
+
+#open the output file
+output = open(sys.argv[2], 'w')
+#start the second pass over the file
+input = open(sys.argv[1], 'r')
+
+buffer = input.readline()
+while buffer != '':
+	#there are three types of lines we are looking for
+	#1) node lines, 2) edge lines 3) support lines (like page size, etc)
+	
+	#is this an edge line?
+	#note that this is no completely robust, if a none edge line
+	#for some reason contains -> it will be missidentified
+	#hand edit the file if this happens
+	if arrowexp.search(buffer):
+		#check to make sure that both nodes are in the node list
+		#if they are print this to output
+		nodes = arrowexp.split(buffer)
+		nodes[0] = string.strip(nodes[0])
+		nodes[1] = string.strip(nodes[1])
+		if nodes[0][:13] in node_set and \
+				nodes[1][:13] in node_set:
+					output.write(buffer)
+	elif nodeexp.search(buffer): #this is a node line
+		node = re.split('\s+', buffer,2)[1]
+		if node in node_set:
+			output.write(buffer)
+	else: #this is a support line
+		output.write(buffer)
+	buffer = input.readline()
+
diff --git a/final/utils/FileCheck/CMakeLists.txt b/final/utils/FileCheck/CMakeLists.txt
new file mode 100644
index 00000000000..fa56f92a8f2
--- /dev/null
+++ b/final/utils/FileCheck/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_llvm_utility(FileCheck
+  FileCheck.cpp
+  )
+
+target_link_libraries(FileCheck LLVMSupport)
+if( MINGW )
+  target_link_libraries(FileCheck imagehlp psapi)
+endif( MINGW )
+if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD )
+  target_link_libraries(FileCheck pthread)
+endif()
diff --git a/final/utils/FileCheck/FileCheck.cpp b/final/utils/FileCheck/FileCheck.cpp
new file mode 100644
index 00000000000..5d4cb0c0c5f
--- /dev/null
+++ b/final/utils/FileCheck/FileCheck.cpp
@@ -0,0 +1,757 @@
+//===- FileCheck.cpp - Check that File's Contents match what is expected --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// FileCheck does a line-by line check of a file that validates whether it
+// contains the expected content.  This is useful for regression tests etc.
+//
+// This program exits with an error status of 2 on error, exit status of 0 if
+// the file matched the expected contents, and exit status of 1 if it did not
+// contain the expected contents.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<std::string>
+CheckFilename(cl::Positional, cl::desc("<check-file>"), cl::Required);
+
+static cl::opt<std::string>
+InputFilename("input-file", cl::desc("File to check (defaults to stdin)"),
+              cl::init("-"), cl::value_desc("filename"));
+
+static cl::opt<std::string>
+CheckPrefix("check-prefix", cl::init("CHECK"),
+            cl::desc("Prefix to use from check file (defaults to 'CHECK')"));
+
+static cl::opt<bool>
+NoCanonicalizeWhiteSpace("strict-whitespace",
+              cl::desc("Do not treat all horizontal whitespace as equivalent"));
+
+//===----------------------------------------------------------------------===//
+// Pattern Handling Code.
+//===----------------------------------------------------------------------===//
+
+class Pattern {
+  SMLoc PatternLoc;
+
+  /// MatchEOF - When set, this pattern only matches the end of file. This is
+  /// used for trailing CHECK-NOTs.
+  bool MatchEOF;
+
+  /// FixedStr - If non-empty, this pattern is a fixed string match with the
+  /// specified fixed string.
+  StringRef FixedStr;
+
+  /// RegEx - If non-empty, this is a regex pattern.
+  std::string RegExStr;
+
+  /// VariableUses - Entries in this vector map to uses of a variable in the
+  /// pattern, e.g. "foo[[bar]]baz".  In this case, the RegExStr will contain
+  /// "foobaz" and we'll get an entry in this vector that tells us to insert the
+  /// value of bar at offset 3.
+  std::vector<std::pair<StringRef, unsigned> > VariableUses;
+
+  /// VariableDefs - Entries in this vector map to definitions of a variable in
+  /// the pattern, e.g. "foo[[bar:.*]]baz".  In this case, the RegExStr will
+  /// contain "foo(.*)baz" and VariableDefs will contain the pair "bar",1.  The
+  /// index indicates what parenthesized value captures the variable value.
+  std::vector<std::pair<StringRef, unsigned> > VariableDefs;
+
+public:
+
+  Pattern(bool matchEOF = false) : MatchEOF(matchEOF) { }
+
+  bool ParsePattern(StringRef PatternStr, SourceMgr &SM);
+
+  /// Match - Match the pattern string against the input buffer Buffer.  This
+  /// returns the position that is matched or npos if there is no match.  If
+  /// there is a match, the size of the matched string is returned in MatchLen.
+  ///
+  /// The VariableTable StringMap provides the current values of filecheck
+  /// variables and is updated if this match defines new values.
+  size_t Match(StringRef Buffer, size_t &MatchLen,
+               StringMap<StringRef> &VariableTable) const;
+
+  /// PrintFailureInfo - Print additional information about a failure to match
+  /// involving this pattern.
+  void PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
+                        const StringMap<StringRef> &VariableTable) const;
+
+private:
+  static void AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr);
+  bool AddRegExToRegEx(StringRef RegExStr, unsigned &CurParen, SourceMgr &SM);
+
+  /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of
+  /// matching this pattern at the start of \arg Buffer; a distance of zero
+  /// should correspond to a perfect match.
+  unsigned ComputeMatchDistance(StringRef Buffer,
+                               const StringMap<StringRef> &VariableTable) const;
+};
+
+
+bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) {
+  PatternLoc = SMLoc::getFromPointer(PatternStr.data());
+
+  // Ignore trailing whitespace.
+  while (!PatternStr.empty() &&
+         (PatternStr.back() == ' ' || PatternStr.back() == '\t'))
+    PatternStr = PatternStr.substr(0, PatternStr.size()-1);
+
+  // Check that there is something on the line.
+  if (PatternStr.empty()) {
+    SM.PrintMessage(PatternLoc, "found empty check string with prefix '" +
+                    CheckPrefix+":'", "error");
+    return true;
+  }
+
+  // Check to see if this is a fixed string, or if it has regex pieces.
+  if (PatternStr.size() < 2 ||
+      (PatternStr.find("{{") == StringRef::npos &&
+       PatternStr.find("[[") == StringRef::npos)) {
+    FixedStr = PatternStr;
+    return false;
+  }
+
+  // Paren value #0 is for the fully matched string.  Any new parenthesized
+  // values add from their.
+  unsigned CurParen = 1;
+
+  // Otherwise, there is at least one regex piece.  Build up the regex pattern
+  // by escaping scary characters in fixed strings, building up one big regex.
+  while (!PatternStr.empty()) {
+    // RegEx matches.
+    if (PatternStr.size() >= 2 &&
+        PatternStr[0] == '{' && PatternStr[1] == '{') {
+
+      // Otherwise, this is the start of a regex match.  Scan for the }}.
+      size_t End = PatternStr.find("}}");
+      if (End == StringRef::npos) {
+        SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
+                        "found start of regex string with no end '}}'", "error");
+        return true;
+      }
+
+      if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM))
+        return true;
+      PatternStr = PatternStr.substr(End+2);
+      continue;
+    }
+
+    // Named RegEx matches.  These are of two forms: [[foo:.*]] which matches .*
+    // (or some other regex) and assigns it to the FileCheck variable 'foo'. The
+    // second form is [[foo]] which is a reference to foo.  The variable name
+    // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject
+    // it.  This is to catch some common errors.
+    if (PatternStr.size() >= 2 &&
+        PatternStr[0] == '[' && PatternStr[1] == '[') {
+      // Verify that it is terminated properly.
+      size_t End = PatternStr.find("]]");
+      if (End == StringRef::npos) {
+        SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()),
+                        "invalid named regex reference, no ]] found", "error");
+        return true;
+      }
+
+      StringRef MatchStr = PatternStr.substr(2, End-2);
+      PatternStr = PatternStr.substr(End+2);
+
+      // Get the regex name (e.g. "foo").
+      size_t NameEnd = MatchStr.find(':');
+      StringRef Name = MatchStr.substr(0, NameEnd);
+
+      if (Name.empty()) {
+        SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+                        "invalid name in named regex: empty name", "error");
+        return true;
+      }
+
+      // Verify that the name is well formed.
+      for (unsigned i = 0, e = Name.size(); i != e; ++i)
+        if (Name[i] != '_' &&
+            (Name[i] < 'a' || Name[i] > 'z') &&
+            (Name[i] < 'A' || Name[i] > 'Z') &&
+            (Name[i] < '0' || Name[i] > '9')) {
+          SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i),
+                          "invalid name in named regex", "error");
+          return true;
+        }
+
+      // Name can't start with a digit.
+      if (isdigit(Name[0])) {
+        SM.PrintMessage(SMLoc::getFromPointer(Name.data()),
+                        "invalid name in named regex", "error");
+        return true;
+      }
+
+      // Handle [[foo]].
+      if (NameEnd == StringRef::npos) {
+        VariableUses.push_back(std::make_pair(Name, RegExStr.size()));
+        continue;
+      }
+
+      // Handle [[foo:.*]].
+      VariableDefs.push_back(std::make_pair(Name, CurParen));
+      RegExStr += '(';
+      ++CurParen;
+
+      if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM))
+        return true;
+
+      RegExStr += ')';
+    }
+
+    // Handle fixed string matches.
+    // Find the end, which is the start of the next regex.
+    size_t FixedMatchEnd = PatternStr.find("{{");
+    FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[["));
+    AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd), RegExStr);
+    PatternStr = PatternStr.substr(FixedMatchEnd);
+    continue;
+  }
+
+  return false;
+}
+
+void Pattern::AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr) {
+  // Add the characters from FixedStr to the regex, escaping as needed.  This
+  // avoids "leaning toothpicks" in common patterns.
+  for (unsigned i = 0, e = FixedStr.size(); i != e; ++i) {
+    switch (FixedStr[i]) {
+    // These are the special characters matched in "p_ere_exp".
+    case '(':
+    case ')':
+    case '^':
+    case '$':
+    case '|':
+    case '*':
+    case '+':
+    case '?':
+    case '.':
+    case '[':
+    case '\\':
+    case '{':
+      TheStr += '\\';
+      // FALL THROUGH.
+    default:
+      TheStr += FixedStr[i];
+      break;
+    }
+  }
+}
+
+bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen,
+                              SourceMgr &SM) {
+  Regex R(RegexStr);
+  std::string Error;
+  if (!R.isValid(Error)) {
+    SM.PrintMessage(SMLoc::getFromPointer(RegexStr.data()),
+                    "invalid regex: " + Error, "error");
+    return true;
+  }
+
+  RegExStr += RegexStr.str();
+  CurParen += R.getNumMatches();
+  return false;
+}
+
+/// Match - Match the pattern string against the input buffer Buffer.  This
+/// returns the position that is matched or npos if there is no match.  If
+/// there is a match, the size of the matched string is returned in MatchLen.
+size_t Pattern::Match(StringRef Buffer, size_t &MatchLen,
+                      StringMap<StringRef> &VariableTable) const {
+  // If this is the EOF pattern, match it immediately.
+  if (MatchEOF) {
+    MatchLen = 0;
+    return Buffer.size();
+  }
+
+  // If this is a fixed string pattern, just match it now.
+  if (!FixedStr.empty()) {
+    MatchLen = FixedStr.size();
+    return Buffer.find(FixedStr);
+  }
+
+  // Regex match.
+
+  // If there are variable uses, we need to create a temporary string with the
+  // actual value.
+  StringRef RegExToMatch = RegExStr;
+  std::string TmpStr;
+  if (!VariableUses.empty()) {
+    TmpStr = RegExStr;
+
+    unsigned InsertOffset = 0;
+    for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
+      StringMap<StringRef>::iterator it =
+        VariableTable.find(VariableUses[i].first);
+      // If the variable is undefined, return an error.
+      if (it == VariableTable.end())
+        return StringRef::npos;
+
+      // Look up the value and escape it so that we can plop it into the regex.
+      std::string Value;
+      AddFixedStringToRegEx(it->second, Value);
+
+      // Plop it into the regex at the adjusted offset.
+      TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset,
+                    Value.begin(), Value.end());
+      InsertOffset += Value.size();
+    }
+
+    // Match the newly constructed regex.
+    RegExToMatch = TmpStr;
+  }
+
+
+  SmallVector<StringRef, 4> MatchInfo;
+  if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo))
+    return StringRef::npos;
+
+  // Successful regex match.
+  assert(!MatchInfo.empty() && "Didn't get any match");
+  StringRef FullMatch = MatchInfo[0];
+
+  // If this defines any variables, remember their values.
+  for (unsigned i = 0, e = VariableDefs.size(); i != e; ++i) {
+    assert(VariableDefs[i].second < MatchInfo.size() &&
+           "Internal paren error");
+    VariableTable[VariableDefs[i].first] = MatchInfo[VariableDefs[i].second];
+  }
+
+  MatchLen = FullMatch.size();
+  return FullMatch.data()-Buffer.data();
+}
+
+unsigned Pattern::ComputeMatchDistance(StringRef Buffer,
+                              const StringMap<StringRef> &VariableTable) const {
+  // Just compute the number of matching characters. For regular expressions, we
+  // just compare against the regex itself and hope for the best.
+  //
+  // FIXME: One easy improvement here is have the regex lib generate a single
+  // example regular expression which matches, and use that as the example
+  // string.
+  StringRef ExampleString(FixedStr);
+  if (ExampleString.empty())
+    ExampleString = RegExStr;
+
+  // Only compare up to the first line in the buffer, or the string size.
+  StringRef BufferPrefix = Buffer.substr(0, ExampleString.size());
+  BufferPrefix = BufferPrefix.split('\n').first;
+  return BufferPrefix.edit_distance(ExampleString);
+}
+
+void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer,
+                               const StringMap<StringRef> &VariableTable) const{
+  // If this was a regular expression using variables, print the current
+  // variable values.
+  if (!VariableUses.empty()) {
+    for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) {
+      StringRef Var = VariableUses[i].first;
+      StringMap<StringRef>::const_iterator it = VariableTable.find(Var);
+      SmallString<256> Msg;
+      raw_svector_ostream OS(Msg);
+
+      // Check for undefined variable references.
+      if (it == VariableTable.end()) {
+        OS << "uses undefined variable \"";
+        OS.write_escaped(Var) << "\"";;
+      } else {
+        OS << "with variable \"";
+        OS.write_escaped(Var) << "\" equal to \"";
+        OS.write_escaped(it->second) << "\"";
+      }
+
+      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), OS.str(), "note",
+                      /*ShowLine=*/false);
+    }
+  }
+
+  // Attempt to find the closest/best fuzzy match.  Usually an error happens
+  // because some string in the output didn't exactly match. In these cases, we
+  // would like to show the user a best guess at what "should have" matched, to
+  // save them having to actually check the input manually.
+  size_t NumLinesForward = 0;
+  size_t Best = StringRef::npos;
+  double BestQuality = 0;
+
+  // Use an arbitrary 4k limit on how far we will search.
+  for (size_t i = 0, e = std::min(size_t(4096), Buffer.size()); i != e; ++i) {
+    if (Buffer[i] == '\n')
+      ++NumLinesForward;
+
+    // Patterns have leading whitespace stripped, so skip whitespace when
+    // looking for something which looks like a pattern.
+    if (Buffer[i] == ' ' || Buffer[i] == '\t')
+      continue;
+
+    // Compute the "quality" of this match as an arbitrary combination of the
+    // match distance and the number of lines skipped to get to this match.
+    unsigned Distance = ComputeMatchDistance(Buffer.substr(i), VariableTable);
+    double Quality = Distance + (NumLinesForward / 100.);
+
+    if (Quality < BestQuality || Best == StringRef::npos) {
+      Best = i;
+      BestQuality = Quality;
+    }
+  }
+
+  // Print the "possible intended match here" line if we found something
+  // reasonable and not equal to what we showed in the "scanning from here"
+  // line.
+  if (Best && Best != StringRef::npos && BestQuality < 50) {
+      SM.PrintMessage(SMLoc::getFromPointer(Buffer.data() + Best),
+                      "possible intended match here", "note");
+
+    // FIXME: If we wanted to be really friendly we would show why the match
+    // failed, as it can be hard to spot simple one character differences.
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Check Strings.
+//===----------------------------------------------------------------------===//
+
+/// CheckString - This is a check that we found in the input file.
+struct CheckString {
+  /// Pat - The pattern to match.
+  Pattern Pat;
+
+  /// Loc - The location in the match file that the check string was specified.
+  SMLoc Loc;
+
+  /// IsCheckNext - This is true if this is a CHECK-NEXT: directive (as opposed
+  /// to a CHECK: directive.
+  bool IsCheckNext;
+
+  /// NotStrings - These are all of the strings that are disallowed from
+  /// occurring between this match string and the previous one (or start of
+  /// file).
+  std::vector<std::pair<SMLoc, Pattern> > NotStrings;
+
+  CheckString(const Pattern &P, SMLoc L, bool isCheckNext)
+    : Pat(P), Loc(L), IsCheckNext(isCheckNext) {}
+};
+
+/// CanonicalizeInputFile - Remove duplicate horizontal space from the specified
+/// memory buffer, free it, and return a new one.
+static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) {
+  SmallString<128> NewFile;
+  NewFile.reserve(MB->getBufferSize());
+
+  for (const char *Ptr = MB->getBufferStart(), *End = MB->getBufferEnd();
+       Ptr != End; ++Ptr) {
+    // Eliminate trailing dosish \r.
+    if (Ptr <= End - 2 && Ptr[0] == '\r' && Ptr[1] == '\n') {
+      continue;
+    }
+
+    // If C is not a horizontal whitespace, skip it.
+    if (*Ptr != ' ' && *Ptr != '\t') {
+      NewFile.push_back(*Ptr);
+      continue;
+    }
+
+    // Otherwise, add one space and advance over neighboring space.
+    NewFile.push_back(' ');
+    while (Ptr+1 != End &&
+           (Ptr[1] == ' ' || Ptr[1] == '\t'))
+      ++Ptr;
+  }
+
+  // Free the old buffer and return a new one.
+  MemoryBuffer *MB2 =
+    MemoryBuffer::getMemBufferCopy(NewFile.str(), MB->getBufferIdentifier());
+
+  delete MB;
+  return MB2;
+}
+
+
+/// ReadCheckFile - Read the check file, which specifies the sequence of
+/// expected strings.  The strings are added to the CheckStrings vector.
+static bool ReadCheckFile(SourceMgr &SM,
+                          std::vector<CheckString> &CheckStrings) {
+  // Open the check file, and tell SourceMgr about it.
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec =
+        MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), File)) {
+    errs() << "Could not open check file '" << CheckFilename << "': "
+           << ec.message() << '\n';
+    return true;
+  }
+  MemoryBuffer *F = File.take();
+
+  // If we want to canonicalize whitespace, strip excess whitespace from the
+  // buffer containing the CHECK lines.
+  if (!NoCanonicalizeWhiteSpace)
+    F = CanonicalizeInputFile(F);
+
+  SM.AddNewSourceBuffer(F, SMLoc());
+
+  // Find all instances of CheckPrefix followed by : in the file.
+  StringRef Buffer = F->getBuffer();
+
+  std::vector<std::pair<SMLoc, Pattern> > NotMatches;
+
+  while (1) {
+    // See if Prefix occurs in the memory buffer.
+    Buffer = Buffer.substr(Buffer.find(CheckPrefix));
+
+    // If we didn't find a match, we're done.
+    if (Buffer.empty())
+      break;
+
+    const char *CheckPrefixStart = Buffer.data();
+
+    // When we find a check prefix, keep track of whether we find CHECK: or
+    // CHECK-NEXT:
+    bool IsCheckNext = false, IsCheckNot = false;
+
+    // Verify that the : is present after the prefix.
+    if (Buffer[CheckPrefix.size()] == ':') {
+      Buffer = Buffer.substr(CheckPrefix.size()+1);
+    } else if (Buffer.size() > CheckPrefix.size()+6 &&
+               memcmp(Buffer.data()+CheckPrefix.size(), "-NEXT:", 6) == 0) {
+      Buffer = Buffer.substr(CheckPrefix.size()+7);
+      IsCheckNext = true;
+    } else if (Buffer.size() > CheckPrefix.size()+5 &&
+               memcmp(Buffer.data()+CheckPrefix.size(), "-NOT:", 5) == 0) {
+      Buffer = Buffer.substr(CheckPrefix.size()+6);
+      IsCheckNot = true;
+    } else {
+      Buffer = Buffer.substr(1);
+      continue;
+    }
+
+    // Okay, we found the prefix, yay.  Remember the rest of the line, but
+    // ignore leading and trailing whitespace.
+    Buffer = Buffer.substr(Buffer.find_first_not_of(" \t"));
+
+    // Scan ahead to the end of line.
+    size_t EOL = Buffer.find_first_of("\n\r");
+
+    // Remember the location of the start of the pattern, for diagnostics.
+    SMLoc PatternLoc = SMLoc::getFromPointer(Buffer.data());
+
+    // Parse the pattern.
+    Pattern P;
+    if (P.ParsePattern(Buffer.substr(0, EOL), SM))
+      return true;
+
+    Buffer = Buffer.substr(EOL);
+
+
+    // Verify that CHECK-NEXT lines have at least one CHECK line before them.
+    if (IsCheckNext && CheckStrings.empty()) {
+      SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart),
+                      "found '"+CheckPrefix+"-NEXT:' without previous '"+
+                      CheckPrefix+ ": line", "error");
+      return true;
+    }
+
+    // Handle CHECK-NOT.
+    if (IsCheckNot) {
+      NotMatches.push_back(std::make_pair(SMLoc::getFromPointer(Buffer.data()),
+                                          P));
+      continue;
+    }
+
+
+    // Okay, add the string we captured to the output vector and move on.
+    CheckStrings.push_back(CheckString(P,
+                                       PatternLoc,
+                                       IsCheckNext));
+    std::swap(NotMatches, CheckStrings.back().NotStrings);
+  }
+
+  // Add an EOF pattern for any trailing CHECK-NOTs.
+  if (!NotMatches.empty()) {
+    CheckStrings.push_back(CheckString(Pattern(true),
+                                       SMLoc::getFromPointer(Buffer.data()),
+                                       false));
+    std::swap(NotMatches, CheckStrings.back().NotStrings);
+  }
+
+  if (CheckStrings.empty()) {
+    errs() << "error: no check strings found with prefix '" << CheckPrefix
+           << ":'\n";
+    return true;
+  }
+
+  return false;
+}
+
+static void PrintCheckFailed(const SourceMgr &SM, const CheckString &CheckStr,
+                             StringRef Buffer,
+                             StringMap<StringRef> &VariableTable) {
+  // Otherwise, we have an error, emit an error message.
+  SM.PrintMessage(CheckStr.Loc, "expected string not found in input",
+                  "error");
+
+  // Print the "scanning from here" line.  If the current position is at the
+  // end of a line, advance to the start of the next line.
+  Buffer = Buffer.substr(Buffer.find_first_not_of(" \t\n\r"));
+
+  SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), "scanning from here",
+                  "note");
+
+  // Allow the pattern to print additional information if desired.
+  CheckStr.Pat.PrintFailureInfo(SM, Buffer, VariableTable);
+}
+
+/// CountNumNewlinesBetween - Count the number of newlines in the specified
+/// range.
+static unsigned CountNumNewlinesBetween(StringRef Range) {
+  unsigned NumNewLines = 0;
+  while (1) {
+    // Scan for newline.
+    Range = Range.substr(Range.find_first_of("\n\r"));
+    if (Range.empty()) return NumNewLines;
+
+    ++NumNewLines;
+
+    // Handle \n\r and \r\n as a single newline.
+    if (Range.size() > 1 &&
+        (Range[1] == '\n' || Range[1] == '\r') &&
+        (Range[0] != Range[1]))
+      Range = Range.substr(1);
+    Range = Range.substr(1);
+  }
+}
+
+int main(int argc, char **argv) {
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  cl::ParseCommandLineOptions(argc, argv);
+
+  SourceMgr SM;
+
+  // Read the expected strings from the check file.
+  std::vector<CheckString> CheckStrings;
+  if (ReadCheckFile(SM, CheckStrings))
+    return 2;
+
+  // Open the file to check and add it to SourceMgr.
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec =
+        MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) {
+    errs() << "Could not open input file '" << InputFilename << "': "
+           << ec.message() << '\n';
+    return true;
+  }
+  MemoryBuffer *F = File.take();
+
+  if (F->getBufferSize() == 0) {
+    errs() << "FileCheck error: '" << InputFilename << "' is empty.\n";
+    return 1;
+  }
+  
+  // Remove duplicate spaces in the input file if requested.
+  if (!NoCanonicalizeWhiteSpace)
+    F = CanonicalizeInputFile(F);
+
+  SM.AddNewSourceBuffer(F, SMLoc());
+
+  /// VariableTable - This holds all the current filecheck variables.
+  StringMap<StringRef> VariableTable;
+
+  // Check that we have all of the expected strings, in order, in the input
+  // file.
+  StringRef Buffer = F->getBuffer();
+
+  const char *LastMatch = Buffer.data();
+
+  for (unsigned StrNo = 0, e = CheckStrings.size(); StrNo != e; ++StrNo) {
+    const CheckString &CheckStr = CheckStrings[StrNo];
+
+    StringRef SearchFrom = Buffer;
+
+    // Find StrNo in the file.
+    size_t MatchLen = 0;
+    size_t MatchPos = CheckStr.Pat.Match(Buffer, MatchLen, VariableTable);
+    Buffer = Buffer.substr(MatchPos);
+
+    // If we didn't find a match, reject the input.
+    if (MatchPos == StringRef::npos) {
+      PrintCheckFailed(SM, CheckStr, SearchFrom, VariableTable);
+      return 1;
+    }
+
+    StringRef SkippedRegion(LastMatch, Buffer.data()-LastMatch);
+
+    // If this check is a "CHECK-NEXT", verify that the previous match was on
+    // the previous line (i.e. that there is one newline between them).
+    if (CheckStr.IsCheckNext) {
+      // Count the number of newlines between the previous match and this one.
+      assert(LastMatch != F->getBufferStart() &&
+             "CHECK-NEXT can't be the first check in a file");
+
+      unsigned NumNewLines = CountNumNewlinesBetween(SkippedRegion);
+      if (NumNewLines == 0) {
+        SM.PrintMessage(CheckStr.Loc,
+                    CheckPrefix+"-NEXT: is on the same line as previous match",
+                        "error");
+        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
+                        "'next' match was here", "note");
+        SM.PrintMessage(SMLoc::getFromPointer(LastMatch),
+                        "previous match was here", "note");
+        return 1;
+      }
+
+      if (NumNewLines != 1) {
+        SM.PrintMessage(CheckStr.Loc,
+                        CheckPrefix+
+                        "-NEXT: is not on the line after the previous match",
+                        "error");
+        SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()),
+                        "'next' match was here", "note");
+        SM.PrintMessage(SMLoc::getFromPointer(LastMatch),
+                        "previous match was here", "note");
+        return 1;
+      }
+    }
+
+    // If this match had "not strings", verify that they don't exist in the
+    // skipped region.
+    for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size();
+         ChunkNo != e; ++ChunkNo) {
+      size_t MatchLen = 0;
+      size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion,
+                                                             MatchLen,
+                                                             VariableTable);
+      if (Pos == StringRef::npos) continue;
+
+      SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos),
+                      CheckPrefix+"-NOT: string occurred!", "error");
+      SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first,
+                      CheckPrefix+"-NOT: pattern specified here", "note");
+      return 1;
+    }
+
+
+    // Otherwise, everything is good.  Step over the matched text and remember
+    // the position after the match as the end of the last match.
+    Buffer = Buffer.substr(MatchLen);
+    LastMatch = Buffer.data();
+  }
+
+  return 0;
+}
diff --git a/final/utils/FileCheck/Makefile b/final/utils/FileCheck/Makefile
new file mode 100644
index 00000000000..268b7bc919a
--- /dev/null
+++ b/final/utils/FileCheck/Makefile
@@ -0,0 +1,21 @@
+##===- utils/FileCheck/Makefile ----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = FileCheck
+USEDLIBS = LLVMSupport.a
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Don't install this utility
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/utils/FileUpdate/CMakeLists.txt b/final/utils/FileUpdate/CMakeLists.txt
new file mode 100644
index 00000000000..655aaec3bc2
--- /dev/null
+++ b/final/utils/FileUpdate/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_llvm_utility(FileUpdate
+  FileUpdate.cpp
+  )
+
+target_link_libraries(FileUpdate LLVMSupport)
+if( MINGW )
+  target_link_libraries(FileUpdate imagehlp psapi)
+endif( MINGW )
+if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD )
+  target_link_libraries(FileUpdate pthread)
+endif()
diff --git a/final/utils/FileUpdate/FileUpdate.cpp b/final/utils/FileUpdate/FileUpdate.cpp
new file mode 100644
index 00000000000..3ea1e4f306e
--- /dev/null
+++ b/final/utils/FileUpdate/FileUpdate.cpp
@@ -0,0 +1,87 @@
+//===- FileUpdate.cpp - Conditionally update a file -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// FileUpdate is a utility for conditionally updating a file from its input
+// based on whether the input differs from the output. It is used to avoid
+// unnecessary modifications in a build system.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
+using namespace llvm;
+
+static cl::opt<bool>
+Quiet("quiet", cl::desc("Don't print unnecessary status information"),
+      cl::init(false));
+
+static cl::opt<std::string>
+InputFilename("input-file", cl::desc("Input file (defaults to stdin)"),
+              cl::init("-"), cl::value_desc("filename"));
+
+static cl::opt<std::string>
+OutputFilename(cl::Positional, cl::desc("<output-file>"), cl::Required);
+
+int main(int argc, char **argv) {
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  cl::ParseCommandLineOptions(argc, argv);
+
+  if (OutputFilename == "-") {
+    errs() << argv[0] << ": error: Can't update standard output\n";
+    return 1;
+  }
+
+  // Get the input data.
+  OwningPtr<MemoryBuffer> In;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), In)) {
+    errs() << argv[0] << ": error: Unable to get input '"
+           << InputFilename << "': " << ec.message() << '\n';
+    return 1;
+  }
+
+  // Get the output data.
+  OwningPtr<MemoryBuffer> Out;
+  MemoryBuffer::getFile(OutputFilename.c_str(), Out);
+
+  // If the output exists and the contents match, we are done.
+  if (Out && In->getBufferSize() == Out->getBufferSize() &&
+      memcmp(In->getBufferStart(), Out->getBufferStart(),
+             Out->getBufferSize()) == 0) {
+    if (!Quiet)
+      errs() << argv[0] << ": Not updating '" << OutputFilename
+             << "', contents match input.\n";
+    return 0;
+  }
+
+  // Otherwise, overwrite the output.
+  if (!Quiet)
+    errs() << argv[0] << ": Updating '" << OutputFilename
+           << "', contents changed.\n";
+  std::string ErrorStr;
+  tool_output_file OutStream(OutputFilename.c_str(), ErrorStr,
+                             raw_fd_ostream::F_Binary);
+  if (!ErrorStr.empty()) {
+    errs() << argv[0] << ": Unable to write output '"
+           << OutputFilename << "': " << ErrorStr << '\n';
+    return 1;
+  }
+
+  OutStream.os().write(In->getBufferStart(), In->getBufferSize());
+
+  // Declare success.
+  OutStream.keep();
+
+  return 0;
+}
diff --git a/final/utils/FileUpdate/Makefile b/final/utils/FileUpdate/Makefile
new file mode 100644
index 00000000000..1e6c0a838c2
--- /dev/null
+++ b/final/utils/FileUpdate/Makefile
@@ -0,0 +1,21 @@
+##===- utils/FileUpdate/Makefile ---------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = FileUpdate
+USEDLIBS = LLVMSupport.a
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Don't install this utility
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/utils/GenLibDeps.pl b/final/utils/GenLibDeps.pl
new file mode 100755
index 00000000000..ca852adfcc0
--- /dev/null
+++ b/final/utils/GenLibDeps.pl
@@ -0,0 +1,386 @@
+#!/usr/bin/perl -w
+#
+# Program:  GenLibDeps.pl
+#
+# Synopsis: Generate HTML output that shows the dependencies between a set of
+#           libraries. The output of this script should periodically replace 
+#           the similar content in the UsingLibraries.html document.
+#
+# Syntax:   GenLibDeps.pl [-flat] <directory_with_libraries_in_it> [path_to_nm_binary]
+#
+use strict;
+use warnings;
+# Parse arguments... 
+my $FLAT = 0;
+my $WHY = 0;
+my $PEROBJ = 0;
+my $PEROBJINCL = 0;
+while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
+  shift;
+  last if /^--$/;  # Stop processing arguments on --
+
+  # List command line options here...
+  if (/^-flat$/)     { $FLAT = 1; next; }
+  if (/^-why/)       { $WHY = 1; $FLAT = 1; next; }
+  if (/^-perobj$/)    { $PEROBJ = 1; next; }
+  if (/^-perobjincl/) { $PEROBJINCL = 1; next;}
+  print "Unknown option: $_ : ignoring!\n";
+}
+
+# Give first option a name.
+my $Directory = $ARGV[0];
+if (!defined($Directory) || ! -d "$Directory") {
+  die "First argument must specify the directory containing LLVM libs\n";
+}
+
+my $nmPath = $ARGV[1];
+
+# Find the "dot" program
+my $DotPath="";
+if (!$FLAT) {
+  chomp($DotPath = `which dot`);
+  die "Can't find 'dot'" if (! -x "$DotPath");
+}
+
+if (defined($ENV{NM})) {
+  chomp($nmPath=$ENV{NM});
+}
+
+if (!defined($nmPath) || $nmPath eq "") {
+  chomp($nmPath=`which nm`);
+  die "Can't find 'nm'" if (! -x "$nmPath");
+}
+
+my $ranlibPath;
+if ($PEROBJ) {
+  $ranlibPath = $ARGV[2];
+  if (defined($ENV{RANLIB})) {
+    chomp($ranlibPath=$ENV{RANLIB});
+  }
+
+  if (!defined($ranlibPath) || $ranlibPath eq "") {
+    chomp($ranlibPath=`which ranlib`);
+    die "Can't find 'ranlib'" if (! -x "$ranlibPath");
+  }
+}
+
+# Open the directory and read its contents, sorting by name and differentiating
+# by whether its a library (.a) or an object file (.o)
+opendir DIR,$Directory;
+my @files = readdir DIR;
+closedir DIR;
+my @libs = grep(/libLLVM.*\.(dylib|so|a)$/,sort(@files));
+# Omit the all-of-llvm shared library.
+@libs = grep(!/libLLVM-\d\.\d(svn)?\.(dylib|so)/, @libs);
+my @objs = grep(/LLVM.*\.o$/,sort(@files));
+
+# Declare the hashes we will use to keep track of the library and object file
+# symbol definitions.
+my %libdefs;
+my %objdefs;
+
+my %libobjs;
+my %objdeps=();
+# Gather library definitions at object file granularity (optional)
+if ($PEROBJ) {
+  foreach my $lib (@libs ) {
+    `$ranlibPath $Directory/$lib`;
+    my $libpath = $lib;
+    $libpath =~ s/^libLLVM(.*)\.a/$1/;
+    $libpath =~ s/(.+)CodeGen$/Target\/$1/;
+    $libpath =~ s/(.+)AsmPrinter$/Target\/$1\/AsmPrinter/;
+    $libpath =~ s/(.+)AsmParser$/Target\/$1\/AsmParser/;
+    $libpath =~ s/(.+)Info$/Target\/$1\/TargetInfo/;
+    $libpath =~ s/(.+)Disassembler$/Target\/$1\/Disassembler/;
+    $libpath =~ s/SelectionDAG/CodeGen\/SelectionDAG/;
+    $libpath =~ s/^AsmPrinter/CodeGen\/AsmPrinter/;
+    $libpath =~ s/^BitReader/Bitcode\/Reader/;
+    $libpath =~ s/^BitWriter/Bitcode\/Writer/;
+    $libpath =~ s/^CBackend/Target\/CBackend/;
+    $libpath =~ s/^CppBackend/Target\/CppBackend/;
+    $libpath =~ s/^MSIL/Target\/MSIL/;
+    $libpath =~ s/^Core/VMCore/;
+    $libpath =~ s/^Instrumentation/Transforms\/Instrumentation/;
+    $libpath =~ s/^Interpreter/ExecutionEngine\/Interpreter/;
+    $libpath =~ s/^JIT/ExecutionEngine\/JIT/;
+    $libpath =~ s/^ScalarOpts/Transforms\/Scalar/;
+    $libpath =~ s/^TransformUtils/Transforms\/Utils/;
+    $libpath =~ s/^ipa/Analysis\/IPA/;
+    $libpath =~ s/^ipo/Transforms\/IPO/;
+    $libpath = "lib/".$libpath."/";
+    open DEFS, "$nmPath -sg $Directory/$lib|";
+    while (<DEFS>) {
+      chomp;
+      if (/^([^ ]*) in ([^ ]*)/) {
+        my $objfile = $libpath.$2;
+        $objdefs{$1} = $objfile;
+        $objdeps{$objfile} = {};
+        $libobjs{$lib}{$objfile}=1;
+#        my $p = "../llvm/".$objfile;
+#        $p =~ s/Support\/reg(.*).o/Support\/reg$1.c/;
+#        $p =~ s/.o$/.cpp/;
+#        unless (-e $p) {
+#          die "$p\n"
+#        }
+      }
+    }
+    close DEFS or die "nm failed";
+  }
+  foreach my $lib (@libs ) {
+    my $libpath = $lib;
+    $libpath =~ s/^libLLVM(.*)\.a/$1/;
+    $libpath =~ s/(.+)CodeGen$/Target\/$1/;
+    $libpath =~ s/(.+)AsmPrinter$/Target\/$1\/AsmPrinter/;
+    $libpath =~ s/(.+)AsmParser$/Target\/$1\/AsmParser/;
+    $libpath =~ s/(.+)Info$/Target\/$1\/TargetInfo/;
+    $libpath =~ s/(.+)Disassembler$/Target\/$1\/Disassembler/;
+    $libpath =~ s/SelectionDAG/CodeGen\/SelectionDAG/;
+    $libpath =~ s/^AsmPrinter/CodeGen\/AsmPrinter/;
+    $libpath =~ s/^BitReader/Bitcode\/Reader/;
+    $libpath =~ s/^BitWriter/Bitcode\/Writer/;
+    $libpath =~ s/^CBackend/Target\/CBackend/;
+    $libpath =~ s/^CppBackend/Target\/CppBackend/;
+    $libpath =~ s/^MSIL/Target\/MSIL/;
+    $libpath =~ s/^Core/VMCore/;
+    $libpath =~ s/^Instrumentation/Transforms\/Instrumentation/;
+    $libpath =~ s/^Interpreter/ExecutionEngine\/Interpreter/;
+    $libpath =~ s/^JIT/ExecutionEngine\/JIT/;
+    $libpath =~ s/^ScalarOpts/Transforms\/Scalar/;
+    $libpath =~ s/^TransformUtils/Transforms\/Utils/;
+    $libpath =~ s/^ipa/Analysis\/IPA/;
+    $libpath =~ s/^ipo/Transforms\/IPO/;
+    $libpath = "lib/".$libpath."/";
+    open UDEFS, "$nmPath -Aup $Directory/$lib|";
+    while (<UDEFS>) {
+      chomp;
+      if (/:([^:]+):/) {
+        my $obj = $libpath.$1;
+        s/[^ ]+: *U //;
+        if (defined($objdefs{$_})) {
+          $objdeps{$obj}{$objdefs{$_}}=1;
+        }
+      }
+    }
+    close UDEFS or die "nm failed"
+  }
+} else {
+# Gather definitions from the libraries
+foreach my $lib (@libs ) {
+  open DEFS, "$nmPath -g $Directory/$lib|";
+  while (<DEFS>) {
+    next if (! / [ABCDGRST] /);
+    s/^[^ ]* [ABCDGRST] //;    
+    s/\015?\012//; # not sure if <DEFS> is in binmode and uses LF or CRLF.
+                   # this strips both LF and CRLF.
+    $libdefs{$_} = $lib;
+  }
+  close DEFS or die "nm failed";
+}
+}
+
+# Gather definitions from the object files.
+foreach my $obj (@objs ) {
+  open DEFS, "$nmPath -g $Directory/$obj |";
+  while (<DEFS>) {
+    next if (! / [ABCDGRST] /);
+    s/^[^ ]* [ABCDGRST] //;
+    s/\015?\012//; # not sure if <DEFS> is in binmode and uses LF or CRLF.
+                   # this strips both LF and CRLF.    
+    $objdefs{$_} = $obj;
+  }
+  close DEFS or die "nm failed";
+}
+
+# Generate one entry in the <dl> list. This generates the <dt> and <dd> elements
+# for one library or object file. The <dt> provides the name of the library or
+# object. The <dd> provides a list of the libraries/objects it depends on.
+sub gen_one_entry {
+  my $lib = $_[0];
+  my $lib_ns = $lib;
+  $lib_ns =~ s/(.*)\.[oa]/$1/;
+  if ($FLAT) {
+    print "$lib:";
+    if ($WHY) { print "\n"; }
+  } else {
+    print "  <dt><b>$lib</b</dt><dd><ul>\n";
+  }
+  open UNDEFS, 
+    "$nmPath -u $Directory/$lib | sed -e 's/^[ 0]* U //' | sort | uniq |";
+  my %DepLibs;
+  while (<UNDEFS>) {
+    chomp;
+    my $lib_printed = 0;
+    if (defined($libdefs{$_}) && $libdefs{$_} ne $lib) {
+      $DepLibs{$libdefs{$_}} = [] unless exists $DepLibs{$libdefs{$_}};
+      push(@{$DepLibs{$libdefs{$_}}}, $_);
+    } elsif (defined($objdefs{$_}) && $objdefs{$_} ne $lib) {
+      if ($PEROBJ && !$PEROBJINCL) {
+        # -perobjincl makes .a files depend on .o files they contain themselves
+        # default is don't depend on these.
+        next if defined $libobjs{$lib}{$objdefs{$_}};
+      }
+      my $libroot = $lib;
+      $libroot =~ s/lib(.*).a/$1/;
+      if ($objdefs{$_} ne "$libroot.o") {
+        $DepLibs{$objdefs{$_}} = [] unless exists $DepLibs{$objdefs{$_}};
+        push(@{$DepLibs{$objdefs{$_}}}, $_);
+      }
+    }
+  }
+  close UNDEFS or die "nm failed";
+  unless(keys %DepLibs) {
+    # above failed
+    open UNDEFS, "$nmPath -u $Directory/$lib |";
+    while (<UNDEFS>) {
+      # to bypass non-working sed
+      if ('  ' eq substr($_,0,2) and index($_,'U ')) {
+        $_ = substr($_,index($_,'U ')+2)
+      };
+      $_ = substr($_,index($_,'  *U ')+5) if -1!=index($_,'  *U ');
+
+      chomp;
+      my $lib_printed = 0;
+      if (defined($libdefs{$_}) && $libdefs{$_} ne $lib) {
+        $DepLibs{$libdefs{$_}} = [] unless exists $DepLibs{$libdefs{$_}};
+        push(@{$DepLibs{$libdefs{$_}}}, $_);
+      } elsif (defined($objdefs{$_}) && $objdefs{$_} ne $lib) {
+        my $libroot = $lib;
+        $libroot =~ s/lib(.*).a/$1/;
+        if ($objdefs{$_} ne "$libroot.o") {
+          $DepLibs{$objdefs{$_}} = [] unless exists $DepLibs{$objdefs{$_}};
+          push(@{$DepLibs{$objdefs{$_}}}, $_);
+        }
+      }
+    }
+    close UNDEFS or die "nm failed";
+  }
+  if ($PEROBJINCL) {
+     # include the .a's objects
+     for my $obj (keys %{$libobjs{$lib}}) {
+        $DepLibs{$obj} = ["<.a object>"] unless exists $DepLibs{$obj};
+     }
+     my $madechange = 1;
+     while($madechange) {
+      $madechange = 0;
+      my %temp = %DepLibs;
+      foreach my $obj (keys %DepLibs) {
+        foreach my $objdeps (keys %{$objdeps{$obj}}) {
+          next if defined $temp{$objdeps};
+          push(@{$temp{$objdeps}}, $obj);
+          $madechange = 1;
+        }
+      }
+      %DepLibs = %temp;
+     }
+  }
+
+  for my $key (sort keys %DepLibs) {
+    if ($FLAT) {
+      print " $key";
+      if ($WHY) {
+        print "\n";
+        my @syms = @{$DepLibs{$key}};
+        foreach my $sym (@syms) {
+          print "  $sym\n";
+        }
+      }
+    } else {
+      print "    <li>$key</li>\n";
+    }
+    my $suffix = substr($key,length($key)-1,1);
+    $key =~ s/(.*)\.[oa]/$1/;
+    if ($suffix eq "a") {
+      if (!$FLAT) { print DOT "$lib_ns -> $key [ weight=0 ];\n" };
+    } else {
+      if (!$FLAT) { print DOT "$lib_ns -> $key [ weight=10];\n" };
+    }
+  }
+  if ($FLAT) {
+    if (!$WHY) {
+      print "\n";
+    }
+  } else {
+    print "  </ul></dd>\n";
+  }
+}
+
+# Make sure we flush on write. This is slower but correct based on the way we
+# write I/O in gen_one_entry.
+$| = 1;
+
+# Print the definition list tag
+if (!$FLAT) {
+    print "<dl>\n";
+
+  open DOT, "| $DotPath -Tgif > libdeps.gif";
+
+  print DOT "digraph LibDeps {\n";
+  print DOT "  size=\"40,15\"; \n";
+  print DOT "  ratio=\"1.33333\"; \n";
+  print DOT "  margin=\"0.25\"; \n";
+  print DOT "  rankdir=\"LR\"; \n";
+  print DOT "  mclimit=\"50.0\"; \n";
+  print DOT "  ordering=\"out\"; \n";
+  print DOT "  center=\"1\";\n";
+  print DOT "node [shape=\"box\",\n";
+  print DOT "      color=\"#000088\",\n";
+  print DOT "      fillcolor=\"#FFFACD\",\n";
+  print DOT "      fontcolor=\"#3355BB\",\n";
+  print DOT "      style=\"filled\",\n";
+  print DOT "      fontname=\"sans\",\n";
+  print DOT "      fontsize=\"24\"\n";
+  print DOT "];\n";
+  print DOT "edge [dir=\"forward\",style=\"solid\",color=\"#000088\"];\n";
+}
+
+# Print libraries first
+foreach my $lib (@libs) {
+  gen_one_entry($lib);
+}
+
+if ($PEROBJ) {
+  foreach my $obj (keys %objdeps) {
+     print "$obj:";
+     if (!$PEROBJINCL) {
+      foreach my $dep (keys %{$objdeps{$obj}}) {
+          print " $dep";
+      }
+    }
+     print "\n";
+  }
+}
+
+if (!$FLAT) {
+  print DOT "}\n";
+  close DOT;
+  open DOT, "| $DotPath -Tgif > objdeps.gif";
+  print DOT "digraph ObjDeps {\n";
+  print DOT "  size=\"8,10\";\n";
+  print DOT "  margin=\"0.25\";\n";
+  print DOT "  rankdir=\"LR\";\n";
+  print DOT "  mclimit=\"50.0\";\n";
+  print DOT "  ordering=\"out\";\n";
+  print DOT "  center=\"1\";\n";
+  print DOT "node [shape=\"box\",\n";
+  print DOT "      color=\"#000088\",\n";
+  print DOT "      fillcolor=\"#FFFACD\",\n";
+  print DOT "      fontcolor=\"#3355BB\",\n";
+  print DOT "      fontname=\"sans\",\n";
+  print DOT "      style=\"filled\",\n";
+  print DOT "      fontsize=\"24\"\n";
+  print DOT "];\n";
+  print DOT "edge [dir=\"forward\",style=\"solid\",color=\"#000088\"];\n";
+}
+
+# Print objects second
+foreach my $obj (@objs) {
+  gen_one_entry($obj);
+}
+
+if (!$FLAT) {
+  print DOT "}\n";
+  close DOT;
+
+# Print end tag of definition list element
+  print "</dl>\n";
+}
diff --git a/final/utils/GetRepositoryPath b/final/utils/GetRepositoryPath
new file mode 100755
index 00000000000..326231c9e5d
--- /dev/null
+++ b/final/utils/GetRepositoryPath
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+usage() {
+  echo "usage: $0 <source root>"
+  echo "  Prints the source control repository path of the given source"
+  echo "  directory, the exact format of the revision string depends on the"
+  echo "  source control system. If the source control system isn't known,"
+  echo "  the output is empty and the exit code is 1."
+  exit 1
+}
+
+if [ $# != 1 ] || [ ! -d $1 ]; then
+  usage;
+fi
+
+cd $1
+if [ -d .svn ]; then
+  svn info | grep 'URL:' | cut -d: -f2-
+elif [ -d .git/svn ]; then
+  git svn info | grep 'URL:' | cut -d: -f2-
+elif [ -d .git ]; then
+  git remote -v | grep 'fetch' | awk '{ print $2 }'
+else
+  exit 1;
+fi
+
+exit 0
diff --git a/final/utils/GetSourceVersion b/final/utils/GetSourceVersion
new file mode 100755
index 00000000000..cbed7daf5b6
--- /dev/null
+++ b/final/utils/GetSourceVersion
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+usage() {
+  echo "usage: $0 <source root>"
+  echo "  Prints the source control revision of the given source directory,"
+  echo "  the exact format of the revision string depends on the source "
+  echo "  control system. If the source control system isn't known, the output"
+  echo "  is empty and the exit code is 1."
+  exit 1
+}
+
+if [ $# != 1 ] || [ ! -d $1 ]; then
+  usage;
+fi
+
+cd $1
+if [ -d .svn ]; then
+  svnversion | sed -e "s#\([0-9]*\)[A-Z]*#\1#"
+elif [ -d .git/svn ]; then
+  git svn info | grep 'Revision:' | cut -d: -f2-
+elif [ -d .git ]; then
+  git log -1 --pretty=format:%H
+else
+  exit 1;
+fi
+
+exit 0
diff --git a/final/utils/KillTheDoctor/CMakeLists.txt b/final/utils/KillTheDoctor/CMakeLists.txt
new file mode 100644
index 00000000000..37c2b7ceb46
--- /dev/null
+++ b/final/utils/KillTheDoctor/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_utility(KillTheDoctor
+  KillTheDoctor.cpp
+  )
+
+target_link_libraries(KillTheDoctor LLVMSupport)
diff --git a/final/utils/KillTheDoctor/KillTheDoctor.cpp b/final/utils/KillTheDoctor/KillTheDoctor.cpp
new file mode 100644
index 00000000000..7a89dd379b7
--- /dev/null
+++ b/final/utils/KillTheDoctor/KillTheDoctor.cpp
@@ -0,0 +1,596 @@
+//===- KillTheDoctor - Prevent Dr. Watson from stopping tests ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program provides an extremely hacky way to stop Dr. Watson from starting
+// due to unhandled exceptions in child processes.
+//
+// This simply starts the program named in the first positional argument with
+// the arguments following it under a debugger. All this debugger does is catch
+// any unhandled exceptions thrown in the child process and close the program
+// (and hopefully tells someone about it).
+//
+// This also provides another really hacky method to prevent assert dialog boxes
+// from popping up. When --no-user32 is passed, if any process loads user32.dll,
+// we assume it is trying to call MessageBoxEx and terminate it. The proper way
+// to do this would be to actually set a break point, but there's quite a bit
+// of code involved to get the address of MessageBoxEx in the remote process's
+// address space due to Address space layout randomization (ASLR). This can be
+// added if it's ever actually needed.
+//
+// If the subprocess exits for any reason other than successful termination, -1
+// is returned. If the process exits normally the value it returned is returned.
+//
+// I hate Windows.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/type_traits.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
+#include <algorithm>
+#include <cerrno>
+#include <cstdlib>
+#include <map>
+#include <string>
+#include <Windows.h>
+#include <WinError.h>
+#include <Dbghelp.h>
+#include <psapi.h>
+using namespace llvm;
+
+#undef max
+
+namespace {
+  cl::opt<std::string> ProgramToRun(cl::Positional,
+    cl::desc("<program to run>"));
+  cl::list<std::string>  Argv(cl::ConsumeAfter,
+    cl::desc("<program arguments>..."));
+  cl::opt<bool> TraceExecution("x",
+    cl::desc("Print detailed output about what is being run to stderr."));
+  cl::opt<unsigned> Timeout("t", cl::init(0),
+    cl::desc("Set maximum runtime in seconds. Defaults to infinite."));
+  cl::opt<bool> NoUser32("no-user32",
+    cl::desc("Terminate process if it loads user32.dll."));
+
+  StringRef ToolName;
+
+  template <typename HandleType>
+  class ScopedHandle {
+    typedef typename HandleType::handle_type handle_type;
+
+    handle_type Handle;
+
+  public:
+    ScopedHandle()
+      : Handle(HandleType::GetInvalidHandle()) {}
+
+    explicit ScopedHandle(handle_type handle)
+      : Handle(handle) {}
+
+    ~ScopedHandle() {
+      HandleType::Destruct(Handle);
+    }
+
+    ScopedHandle& operator=(handle_type handle) {
+      // Cleanup current handle.
+      if (!HandleType::isValid(Handle))
+        HandleType::Destruct(Handle);
+      Handle = handle;
+      return *this;
+    }
+
+    operator bool() const {
+      return HandleType::isValid(Handle);
+    }
+
+    operator handle_type() {
+      return Handle;
+    }
+  };
+
+  // This implements the most common handle in the Windows API.
+  struct CommonHandle {
+    typedef HANDLE handle_type;
+
+    static handle_type GetInvalidHandle() {
+      return INVALID_HANDLE_VALUE;
+    }
+
+    static void Destruct(handle_type Handle) {
+      ::CloseHandle(Handle);
+    }
+
+    static bool isValid(handle_type Handle) {
+      return Handle != GetInvalidHandle();
+    }
+  };
+
+  struct FileMappingHandle {
+    typedef HANDLE handle_type;
+
+    static handle_type GetInvalidHandle() {
+      return NULL;
+    }
+
+    static void Destruct(handle_type Handle) {
+      ::CloseHandle(Handle);
+    }
+
+    static bool isValid(handle_type Handle) {
+      return Handle != GetInvalidHandle();
+    }
+  };
+
+  struct MappedViewOfFileHandle {
+    typedef LPVOID handle_type;
+
+    static handle_type GetInvalidHandle() {
+      return NULL;
+    }
+
+    static void Destruct(handle_type Handle) {
+      ::UnmapViewOfFile(Handle);
+    }
+
+    static bool isValid(handle_type Handle) {
+      return Handle != GetInvalidHandle();
+    }
+  };
+
+  struct ProcessHandle : CommonHandle {};
+  struct ThreadHandle  : CommonHandle {};
+  struct TokenHandle   : CommonHandle {};
+  struct FileHandle    : CommonHandle {};
+
+  typedef ScopedHandle<FileMappingHandle>       FileMappingScopedHandle;
+  typedef ScopedHandle<MappedViewOfFileHandle>  MappedViewOfFileScopedHandle;
+  typedef ScopedHandle<ProcessHandle>           ProcessScopedHandle;
+  typedef ScopedHandle<ThreadHandle>            ThreadScopedHandle;
+  typedef ScopedHandle<TokenHandle>             TokenScopedHandle;
+  typedef ScopedHandle<FileHandle>              FileScopedHandle;
+}
+
+static error_code GetFileNameFromHandle(HANDLE FileHandle,
+                                        std::string& Name) {
+  char Filename[MAX_PATH+1];
+  bool Sucess = false;
+  Name.clear();
+
+  // Get the file size.
+  LARGE_INTEGER FileSize;
+  Sucess = ::GetFileSizeEx(FileHandle, &FileSize);
+
+  if (!Sucess)
+    return windows_error(::GetLastError());
+
+  // Create a file mapping object.
+  FileMappingScopedHandle FileMapping(
+    ::CreateFileMappingA(FileHandle,
+                         NULL,
+                         PAGE_READONLY,
+                         0,
+                         1,
+                         NULL));
+
+  if (!FileMapping)
+    return windows_error(::GetLastError());
+
+  // Create a file mapping to get the file name.
+  MappedViewOfFileScopedHandle MappedFile(
+    ::MapViewOfFile(FileMapping, FILE_MAP_READ, 0, 0, 1));
+
+  if (!MappedFile)
+    return windows_error(::GetLastError());
+
+  Sucess = ::GetMappedFileNameA(::GetCurrentProcess(),
+                                MappedFile,
+                                Filename,
+                                array_lengthof(Filename) - 1);
+
+  if (!Sucess)
+    return windows_error(::GetLastError());
+  else {
+    Name = Filename;
+    return windows_error::success;
+  }
+}
+
+static std::string QuoteProgramPathIfNeeded(StringRef Command) {
+  if (Command.find_first_of(' ') == StringRef::npos)
+    return Command;
+  else {
+    std::string ret;
+    ret.reserve(Command.size() + 3);
+    ret.push_back('"');
+    ret.append(Command.begin(), Command.end());
+    ret.push_back('"');
+    return ret;
+  }
+}
+
+/// @brief Find program using shell lookup rules.
+/// @param Program This is either an absolute path, relative path, or simple a
+///        program name. Look in PATH for any programs that match. If no
+///        extension is present, try all extensions in PATHEXT.
+/// @return If ec == errc::success, The absolute path to the program. Otherwise
+///         the return value is undefined.
+static std::string FindProgram(const std::string &Program, error_code &ec) {
+  char PathName[MAX_PATH + 1];
+  typedef SmallVector<StringRef, 12> pathext_t;
+  pathext_t pathext;
+  // Check for the program without an extension (in case it already has one).
+  pathext.push_back("");
+  SplitString(std::getenv("PATHEXT"), pathext, ";");
+
+  for (pathext_t::iterator i = pathext.begin(), e = pathext.end(); i != e; ++i){
+    SmallString<5> ext;
+    for (std::size_t ii = 0, e = i->size(); ii != e; ++ii)
+      ext.push_back(::tolower((*i)[ii]));
+    LPCSTR Extension = NULL;
+    if (ext.size() && ext[0] == '.')
+      Extension = ext.c_str();
+    DWORD length = ::SearchPathA(NULL,
+                                 Program.c_str(),
+                                 Extension,
+                                 array_lengthof(PathName),
+                                 PathName,
+                                 NULL);
+    if (length == 0)
+      ec = windows_error(::GetLastError());
+    else if (length > array_lengthof(PathName)) {
+      // This may have been the file, return with error.
+      ec = windows_error::buffer_overflow;
+      break;
+    } else {
+      // We found the path! Return it.
+      ec = windows_error::success;
+      break;
+    }
+  }
+
+  // Make sure PathName is valid.
+  PathName[MAX_PATH] = 0;
+  return PathName;
+}
+
+static error_code EnableDebugPrivileges() {
+  HANDLE TokenHandle;
+  BOOL success = ::OpenProcessToken(::GetCurrentProcess(),
+                                    TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY,
+                                    &TokenHandle);
+  if (!success)
+    return windows_error(::GetLastError());
+
+  TokenScopedHandle Token(TokenHandle);
+  TOKEN_PRIVILEGES  TokenPrivileges;
+  LUID              LocallyUniqueID;
+
+  success = ::LookupPrivilegeValueA(NULL,
+                                    SE_DEBUG_NAME,
+                                    &LocallyUniqueID);
+  if (!success)
+    return windows_error(::GetLastError());
+
+  TokenPrivileges.PrivilegeCount = 1;
+  TokenPrivileges.Privileges[0].Luid = LocallyUniqueID;
+  TokenPrivileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
+
+  success = ::AdjustTokenPrivileges(Token,
+                                    FALSE,
+                                    &TokenPrivileges,
+                                    sizeof(TOKEN_PRIVILEGES),
+                                    NULL,
+                                    NULL);
+  // The value of success is basically useless. Either way we are just returning
+  // the value of ::GetLastError().
+  return windows_error(::GetLastError());
+}
+
+static StringRef ExceptionCodeToString(DWORD ExceptionCode) {
+  switch(ExceptionCode) {
+  case EXCEPTION_ACCESS_VIOLATION: return "EXCEPTION_ACCESS_VIOLATION";
+  case EXCEPTION_ARRAY_BOUNDS_EXCEEDED:
+    return "EXCEPTION_ARRAY_BOUNDS_EXCEEDED";
+  case EXCEPTION_BREAKPOINT: return "EXCEPTION_BREAKPOINT";
+  case EXCEPTION_DATATYPE_MISALIGNMENT:
+    return "EXCEPTION_DATATYPE_MISALIGNMENT";
+  case EXCEPTION_FLT_DENORMAL_OPERAND: return "EXCEPTION_FLT_DENORMAL_OPERAND";
+  case EXCEPTION_FLT_DIVIDE_BY_ZERO: return "EXCEPTION_FLT_DIVIDE_BY_ZERO";
+  case EXCEPTION_FLT_INEXACT_RESULT: return "EXCEPTION_FLT_INEXACT_RESULT";
+  case EXCEPTION_FLT_INVALID_OPERATION:
+    return "EXCEPTION_FLT_INVALID_OPERATION";
+  case EXCEPTION_FLT_OVERFLOW: return "EXCEPTION_FLT_OVERFLOW";
+  case EXCEPTION_FLT_STACK_CHECK: return "EXCEPTION_FLT_STACK_CHECK";
+  case EXCEPTION_FLT_UNDERFLOW: return "EXCEPTION_FLT_UNDERFLOW";
+  case EXCEPTION_ILLEGAL_INSTRUCTION: return "EXCEPTION_ILLEGAL_INSTRUCTION";
+  case EXCEPTION_IN_PAGE_ERROR: return "EXCEPTION_IN_PAGE_ERROR";
+  case EXCEPTION_INT_DIVIDE_BY_ZERO: return "EXCEPTION_INT_DIVIDE_BY_ZERO";
+  case EXCEPTION_INT_OVERFLOW: return "EXCEPTION_INT_OVERFLOW";
+  case EXCEPTION_INVALID_DISPOSITION: return "EXCEPTION_INVALID_DISPOSITION";
+  case EXCEPTION_NONCONTINUABLE_EXCEPTION:
+    return "EXCEPTION_NONCONTINUABLE_EXCEPTION";
+  case EXCEPTION_PRIV_INSTRUCTION: return "EXCEPTION_PRIV_INSTRUCTION";
+  case EXCEPTION_SINGLE_STEP: return "EXCEPTION_SINGLE_STEP";
+  case EXCEPTION_STACK_OVERFLOW: return "EXCEPTION_STACK_OVERFLOW";
+  default: return "<unknown>";
+  }
+}
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  ToolName = argv[0];
+
+  cl::ParseCommandLineOptions(argc, argv, "Dr. Watson Assassin.\n");
+  if (ProgramToRun.size() == 0) {
+    cl::PrintHelpMessage();
+    return -1;
+  }
+
+  if (Timeout > std::numeric_limits<uint32_t>::max() / 1000) {
+    errs() << ToolName << ": Timeout value too large, must be less than: "
+                       << std::numeric_limits<uint32_t>::max() / 1000
+                       << '\n';
+    return -1;
+  }
+
+  std::string CommandLine(ProgramToRun);
+
+  error_code ec;
+  ProgramToRun = FindProgram(ProgramToRun, ec);
+  if (ec) {
+    errs() << ToolName << ": Failed to find program: '" << CommandLine
+           << "': " << ec.message() << '\n';
+    return -1;
+  }
+
+  if (TraceExecution)
+    errs() << ToolName << ": Found Program: " << ProgramToRun << '\n';
+
+  for (std::vector<std::string>::iterator i = Argv.begin(),
+                                          e = Argv.end();
+                                          i != e; ++i) {
+    CommandLine.push_back(' ');
+    CommandLine.append(*i);
+  }
+
+  if (TraceExecution)
+    errs() << ToolName << ": Program Image Path: " << ProgramToRun << '\n'
+           << ToolName << ": Command Line: " << CommandLine << '\n';
+
+  STARTUPINFO StartupInfo;
+  PROCESS_INFORMATION ProcessInfo;
+  std::memset(&StartupInfo, 0, sizeof(StartupInfo));
+  StartupInfo.cb = sizeof(StartupInfo);
+  std::memset(&ProcessInfo, 0, sizeof(ProcessInfo));
+
+  // Set error mode to not display any message boxes. The child process inherits
+  // this.
+  ::SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX);
+  ::_set_error_mode(_OUT_TO_STDERR);
+
+  BOOL success = ::CreateProcessA(ProgramToRun.c_str(),
+                            LPSTR(CommandLine.c_str()),
+                                  NULL,
+                                  NULL,
+                                  FALSE,
+                                  DEBUG_PROCESS,
+                                  NULL,
+                                  NULL,
+                                  &StartupInfo,
+                                  &ProcessInfo);
+  if (!success) {
+    errs() << ToolName << ": Failed to run program: '" << ProgramToRun
+           << "': " << error_code(windows_error(::GetLastError())).message()
+           << '\n';
+    return -1;
+  }
+
+  // Make sure ::CloseHandle is called on exit.
+  std::map<DWORD, HANDLE> ProcessIDToHandle;
+
+  DEBUG_EVENT DebugEvent;
+  std::memset(&DebugEvent, 0, sizeof(DebugEvent));
+  DWORD dwContinueStatus = DBG_CONTINUE;
+
+  // Run the program under the debugger until either it exits, or throws an
+  // exception.
+  if (TraceExecution)
+    errs() << ToolName << ": Debugging...\n";
+
+  while(true) {
+    DWORD TimeLeft = INFINITE;
+    if (Timeout > 0) {
+      FILETIME CreationTime, ExitTime, KernelTime, UserTime;
+      ULARGE_INTEGER a, b;
+      success = ::GetProcessTimes(ProcessInfo.hProcess,
+                                  &CreationTime,
+                                  &ExitTime,
+                                  &KernelTime,
+                                  &UserTime);
+      if (!success) {
+        ec = windows_error(::GetLastError());
+
+        errs() << ToolName << ": Failed to get process times: "
+               << ec.message() << '\n';
+        return -1;
+      }
+      a.LowPart = KernelTime.dwLowDateTime;
+      a.HighPart = KernelTime.dwHighDateTime;
+      b.LowPart = UserTime.dwLowDateTime;
+      b.HighPart = UserTime.dwHighDateTime;
+      // Convert 100-nanosecond units to milliseconds.
+      uint64_t TotalTimeMiliseconds = (a.QuadPart + b.QuadPart) / 10000;
+      // Handle the case where the process has been running for more than 49
+      // days.
+      if (TotalTimeMiliseconds > std::numeric_limits<uint32_t>::max()) {
+        errs() << ToolName << ": Timeout Failed: Process has been running for"
+                              "more than 49 days.\n";
+        return -1;
+      }
+
+      // We check with > instead of using Timeleft because if
+      // TotalTimeMiliseconds is greater than Timeout * 1000, TimeLeft would
+      // underflow.
+      if (TotalTimeMiliseconds > (Timeout * 1000)) {
+        errs() << ToolName << ": Process timed out.\n";
+        ::TerminateProcess(ProcessInfo.hProcess, -1);
+        // Otherwise other stuff starts failing...
+        return -1;
+      }
+
+      TimeLeft = (Timeout * 1000) - static_cast<uint32_t>(TotalTimeMiliseconds);
+    }
+    success = WaitForDebugEvent(&DebugEvent, TimeLeft);
+
+    if (!success) {
+      ec = windows_error(::GetLastError());
+
+      if (ec == errc::timed_out) {
+        errs() << ToolName << ": Process timed out.\n";
+        ::TerminateProcess(ProcessInfo.hProcess, -1);
+        // Otherwise other stuff starts failing...
+        return -1;
+      }
+
+      errs() << ToolName << ": Failed to wait for debug event in program: '"
+             << ProgramToRun << "': " << ec.message() << '\n';
+      return -1;
+    }
+
+    switch(DebugEvent.dwDebugEventCode) {
+    case CREATE_PROCESS_DEBUG_EVENT:
+      // Make sure we remove the handle on exit.
+      if (TraceExecution)
+        errs() << ToolName << ": Debug Event: CREATE_PROCESS_DEBUG_EVENT\n";
+      ProcessIDToHandle[DebugEvent.dwProcessId] =
+        DebugEvent.u.CreateProcessInfo.hProcess;
+      ::CloseHandle(DebugEvent.u.CreateProcessInfo.hFile);
+      break;
+    case EXIT_PROCESS_DEBUG_EVENT: {
+        if (TraceExecution)
+          errs() << ToolName << ": Debug Event: EXIT_PROCESS_DEBUG_EVENT\n";
+
+        // If this is the process we originally created, exit with its exit
+        // code.
+        if (DebugEvent.dwProcessId == ProcessInfo.dwProcessId)
+          return DebugEvent.u.ExitProcess.dwExitCode;
+
+        // Otherwise cleanup any resources we have for it.
+        std::map<DWORD, HANDLE>::iterator ExitingProcess =
+          ProcessIDToHandle.find(DebugEvent.dwProcessId);
+        if (ExitingProcess == ProcessIDToHandle.end()) {
+          errs() << ToolName << ": Got unknown process id!\n";
+          return -1;
+        }
+        ::CloseHandle(ExitingProcess->second);
+        ProcessIDToHandle.erase(ExitingProcess);
+      }
+      break;
+    case CREATE_THREAD_DEBUG_EVENT:
+      ::CloseHandle(DebugEvent.u.CreateThread.hThread);
+      break;
+    case LOAD_DLL_DEBUG_EVENT: {
+        // Cleanup the file handle.
+        FileScopedHandle DLLFile(DebugEvent.u.LoadDll.hFile);
+        std::string DLLName;
+        ec = GetFileNameFromHandle(DLLFile, DLLName);
+        if (ec) {
+          DLLName = "<failed to get file name from file handle> : ";
+          DLLName += ec.message();
+        }
+        if (TraceExecution) {
+          errs() << ToolName << ": Debug Event: LOAD_DLL_DEBUG_EVENT\n";
+          errs().indent(ToolName.size()) << ": DLL Name : " << DLLName << '\n';
+        }
+
+        if (NoUser32 && sys::path::stem(DLLName) == "user32") {
+          // Program is loading user32.dll, in the applications we are testing,
+          // this only happens if an assert has fired. By now the message has
+          // already been printed, so simply close the program.
+          errs() << ToolName << ": user32.dll loaded!\n";
+          errs().indent(ToolName.size())
+                 << ": This probably means that assert was called. Closing "
+                    "program to prevent message box from popping up.\n";
+          dwContinueStatus = DBG_CONTINUE;
+          ::TerminateProcess(ProcessIDToHandle[DebugEvent.dwProcessId], -1);
+          return -1;
+        }
+      }
+      break;
+    case EXCEPTION_DEBUG_EVENT: {
+        // Close the application if this exception will not be handled by the
+        // child application.
+        if (TraceExecution)
+          errs() << ToolName << ": Debug Event: EXCEPTION_DEBUG_EVENT\n";
+
+        EXCEPTION_DEBUG_INFO  &Exception = DebugEvent.u.Exception;
+        if (Exception.dwFirstChance > 0) {
+          if (TraceExecution) {
+            errs().indent(ToolName.size()) << ": Debug Info : ";
+            errs() << "First chance exception at "
+                   << Exception.ExceptionRecord.ExceptionAddress
+                   << ", exception code: "
+                   << ExceptionCodeToString(
+                        Exception.ExceptionRecord.ExceptionCode)
+                   << " (" << Exception.ExceptionRecord.ExceptionCode << ")\n";
+          }
+          dwContinueStatus = DBG_EXCEPTION_NOT_HANDLED;
+        } else {
+          errs() << ToolName << ": Unhandled exception in: " << ProgramToRun
+                 << "!\n";
+                 errs().indent(ToolName.size()) << ": location: ";
+                 errs() << Exception.ExceptionRecord.ExceptionAddress
+                        << ", exception code: "
+                        << ExceptionCodeToString(
+                            Exception.ExceptionRecord.ExceptionCode)
+                        << " (" << Exception.ExceptionRecord.ExceptionCode
+                        << ")\n";
+          dwContinueStatus = DBG_CONTINUE;
+          ::TerminateProcess(ProcessIDToHandle[DebugEvent.dwProcessId], -1);
+          return -1;
+        }
+      }
+      break;
+    default:
+      // Do nothing.
+      if (TraceExecution)
+        errs() << ToolName << ": Debug Event: <unknown>\n";
+      break;
+    }
+
+    success = ContinueDebugEvent(DebugEvent.dwProcessId,
+                                 DebugEvent.dwThreadId,
+                                 dwContinueStatus);
+    if (!success) {
+      ec = windows_error(::GetLastError());
+      errs() << ToolName << ": Failed to continue debugging program: '"
+             << ProgramToRun << "': " << ec.message() << '\n';
+      return -1;
+    }
+
+    dwContinueStatus = DBG_CONTINUE;
+  }
+
+  assert(0 && "Fell out of debug loop. This shouldn't be possible!");
+  return -1;
+}
diff --git a/final/utils/Makefile b/final/utils/Makefile
new file mode 100644
index 00000000000..9d4dc5c2f90
--- /dev/null
+++ b/final/utils/Makefile
@@ -0,0 +1,22 @@
+##===- utils/Makefile --------------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ..
+PARALLEL_DIRS := FileCheck FileUpdate TableGen PerfectShuffle \
+	      count fpcmp llvm-lit not unittest
+
+EXTRA_DIST := cgiplotNLT.pl check-each-file codegen-diff countloc.sh \
+              DSAclean.py DSAextract.py emacs findsym.pl GenLibDeps.pl \
+	      getsrcs.sh importNLT.pl llvmdo llvmgrep llvm-native-gcc \
+	      llvm-native-gxx makellvm NightlyTest.gnuplot NightlyTest.pl \
+	      NightlyTestTemplate.html NLT.schema \
+	      parseNLT.pl plotNLT.pl profile.pl \
+	      webNLT.pl vim
+
+include $(LEVEL)/Makefile.common
diff --git a/final/utils/Misc/zkill b/final/utils/Misc/zkill
new file mode 100755
index 00000000000..bc0bfd586f7
--- /dev/null
+++ b/final/utils/Misc/zkill
@@ -0,0 +1,276 @@
+#!/usr/bin/env python
+
+import os
+import re
+import sys
+
+def _write_message(kind, message):
+    import inspect, os, sys
+
+    # Get the file/line where this message was generated.
+    f = inspect.currentframe()
+    # Step out of _write_message, and then out of wrapper.
+    f = f.f_back.f_back
+    file,line,_,_,_ = inspect.getframeinfo(f)
+    location = '%s:%d' % (os.path.basename(file), line)
+
+    print >>sys.stderr, '%s: %s: %s' % (location, kind, message)
+
+note = lambda message: _write_message('note', message)
+warning = lambda message: _write_message('warning', message)
+error = lambda message: (_write_message('error', message), sys.exit(1))
+
+def re_full_match(pattern, str):
+    m = re.match(pattern, str)
+    if m and m.end() != len(str):
+        m = None
+    return m
+
+def parse_time(value):
+    minutes,value = value.split(':',1)
+    if '.' in value:
+        seconds,fseconds = value.split('.',1)
+    else:
+        seconds = value
+    return int(minutes) * 60 + int(seconds) + float('.'+fseconds)
+
+def extractExecutable(command):
+    """extractExecutable - Given a string representing a command line, attempt
+    to extract the executable path, even if it includes spaces."""
+
+    # Split into potential arguments.
+    args = command.split(' ')
+
+    # Scanning from the beginning, try to see if the first N args, when joined,
+    # exist. If so that's probably the executable.
+    for i in range(1,len(args)):
+        cmd = ' '.join(args[:i])
+        if os.path.exists(cmd):
+            return cmd
+
+    # Otherwise give up and return the first "argument".
+    return args[0]
+
+class Struct:
+    def __init__(self, **kwargs):
+        self.fields = kwargs.keys()
+        self.__dict__.update(kwargs)
+
+    def __repr__(self):
+        return 'Struct(%s)' % ', '.join(['%s=%r' % (k,getattr(self,k))
+                                         for k in self.fields])
+
+kExpectedPSFields = [('PID', int, 'pid'),
+                     ('USER', str, 'user'),
+                     ('COMMAND', str, 'command'),
+                     ('%CPU', float, 'cpu_percent'),
+                     ('TIME', parse_time, 'cpu_time'),
+                     ('VSZ', int, 'vmem_size'),
+                     ('RSS', int, 'rss')]
+def getProcessTable():
+    import subprocess
+    p = subprocess.Popen(['ps', 'aux'], stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE)
+    out,err = p.communicate()
+    res = p.wait()
+    if p.wait():
+        error('unable to get process table')
+    elif err.strip():
+        error('unable to get process table: %s' % err)
+
+    lns = out.split('\n')
+    it = iter(lns)
+    header = it.next().split()
+    numRows = len(header)
+
+    # Make sure we have the expected fields.
+    indexes = []
+    for field in kExpectedPSFields:
+        try:
+            indexes.append(header.index(field[0]))
+        except:
+            if opts.debug:
+                raise
+            error('unable to get process table, no %r field.' % field[0])
+
+    table = []
+    for i,ln in enumerate(it):
+        if not ln.strip():
+            continue
+
+        fields = ln.split(None, numRows - 1)
+        if len(fields) != numRows:
+            warning('unable to process row: %r' % ln)
+            continue
+
+        record = {}
+        for field,idx in zip(kExpectedPSFields, indexes):
+            value = fields[idx]
+            try:
+                record[field[2]] = field[1](value)
+            except:
+                if opts.debug:
+                    raise
+                warning('unable to process %r in row: %r' % (field[0], ln))
+                break
+        else:
+            # Add our best guess at the executable.
+            record['executable'] = extractExecutable(record['command'])
+            table.append(Struct(**record))
+
+    return table
+
+def getSignalValue(name):
+    import signal
+    if name.startswith('SIG'):
+        value = getattr(signal, name)
+        if value and isinstance(value, int):
+            return value
+    error('unknown signal: %r' % name)
+
+import signal
+kSignals = {}
+for name in dir(signal):
+    if name.startswith('SIG') and name == name.upper() and name.isalpha():
+        kSignals[name[3:]] = getattr(signal, name)
+
+def main():
+    global opts
+    from optparse import OptionParser, OptionGroup
+    parser = OptionParser("usage: %prog [options] {pid}*")
+
+    # FIXME: Add -NNN and -SIGNAME options.
+
+    parser.add_option("-s", "", dest="signalName",
+                      help="Name of the signal to use (default=%default)",
+                      action="store", default='INT',
+                      choices=kSignals.keys())
+    parser.add_option("-l", "", dest="listSignals",
+                      help="List known signal names",
+                      action="store_true", default=False)
+
+    parser.add_option("-n", "--dry-run", dest="dryRun",
+                      help="Only print the actions that would be taken",
+                      action="store_true", default=False)
+    parser.add_option("-v", "--verbose", dest="verbose",
+                      help="Print more verbose output",
+                      action="store_true", default=False)
+    parser.add_option("", "--debug", dest="debug",
+                      help="Enable debugging output",
+                      action="store_true", default=False)
+    parser.add_option("", "--force", dest="force",
+                      help="Perform the specified commands, even if it seems like a bad idea",
+                      action="store_true", default=False)
+
+    inf = float('inf')
+    group = OptionGroup(parser, "Process Filters")
+    group.add_option("", "--name", dest="execName", metavar="REGEX",
+                      help="Kill processes whose name matches the given regexp",
+                      action="store", default=None)
+    group.add_option("", "--exec", dest="execPath", metavar="REGEX",
+                      help="Kill processes whose executable matches the given regexp",
+                      action="store", default=None)
+    group.add_option("", "--user", dest="userName", metavar="REGEX",
+                      help="Kill processes whose user matches the given regexp",
+                      action="store", default=None)
+    group.add_option("", "--min-cpu", dest="minCPU", metavar="PCT",
+                      help="Kill processes with CPU usage >= PCT",
+                      action="store", type=float, default=None)
+    group.add_option("", "--max-cpu", dest="maxCPU", metavar="PCT",
+                      help="Kill processes with CPU usage <= PCT",
+                      action="store", type=float, default=inf)
+    group.add_option("", "--min-mem", dest="minMem", metavar="N",
+                      help="Kill processes with virtual size >= N (MB)",
+                      action="store", type=float, default=None)
+    group.add_option("", "--max-mem", dest="maxMem", metavar="N",
+                      help="Kill processes with virtual size <= N (MB)",
+                      action="store", type=float, default=inf)
+    group.add_option("", "--min-rss", dest="minRSS", metavar="N",
+                      help="Kill processes with RSS >= N",
+                      action="store", type=float, default=None)
+    group.add_option("", "--max-rss", dest="maxRSS", metavar="N",
+                      help="Kill processes with RSS <= N",
+                      action="store", type=float, default=inf)
+    group.add_option("", "--min-time", dest="minTime", metavar="N",
+                      help="Kill processes with CPU time >= N (seconds)",
+                      action="store", type=float, default=None)
+    group.add_option("", "--max-time", dest="maxTime", metavar="N",
+                      help="Kill processes with CPU time <= N (seconds)",
+                      action="store", type=float, default=inf)
+    parser.add_option_group(group)
+
+    (opts, args) = parser.parse_args()
+
+    if opts.listSignals:
+        items = [(v,k) for k,v in kSignals.items()]
+        items.sort()
+        for i in range(0, len(items), 4):
+            print '\t'.join(['%2d) SIG%s' % (k,v)
+                             for k,v in items[i:i+4]])
+        sys.exit(0)
+
+    # Figure out the signal to use.
+    signal = kSignals[opts.signalName]
+    signalValueName = str(signal)
+    if opts.verbose:
+        name = dict((v,k) for k,v in kSignals.items()).get(signal,None)
+        if name:
+            signalValueName = name
+            note('using signal %d (SIG%s)' % (signal, name))
+        else:
+            note('using signal %d' % signal)
+
+    # Get the pid list to consider.
+    pids = set()
+    for arg in args:
+        try:
+            pids.add(int(arg))
+        except:
+            parser.error('invalid positional argument: %r' % arg)
+
+    filtered = ps = getProcessTable()
+
+    # Apply filters.
+    if pids:
+        filtered = [p for p in filtered
+                    if p.pid in pids]
+    if opts.execName is not None:
+        filtered = [p for p in filtered
+                    if re_full_match(opts.execName,
+                                     os.path.basename(p.executable))]
+    if opts.execPath is not None:
+        filtered = [p for p in filtered
+                    if re_full_match(opts.execPath, p.executable)]
+    if opts.userName is not None:
+        filtered = [p for p in filtered
+                    if re_full_match(opts.userName, p.user)]
+    filtered = [p for p in filtered
+                if opts.minCPU <= p.cpu_percent <= opts.maxCPU]
+    filtered = [p for p in filtered
+                if opts.minMem <= float(p.vmem_size) / (1<<20) <= opts.maxMem]
+    filtered = [p for p in filtered
+                if opts.minRSS <= p.rss <= opts.maxRSS]
+    filtered = [p for p in filtered
+                if opts.minTime <= p.cpu_time <= opts.maxTime]
+
+    if len(filtered) == len(ps):
+        if not opts.force and not opts.dryRun:
+            error('refusing to kill all processes without --force')
+
+    if not filtered:
+        warning('no processes selected')
+
+    for p in filtered:
+        if opts.verbose:
+            note('kill(%r, %s) # (user=%r, executable=%r, CPU=%2.2f%%, time=%r, vmem=%r, rss=%r)' %
+                 (p.pid, signalValueName, p.user, p.executable, p.cpu_percent, p.cpu_time, p.vmem_size, p.rss))
+        if not opts.dryRun:
+            try:
+                os.kill(p.pid, signal)
+            except OSError:
+                if opts.debug:
+                    raise
+                warning('unable to kill PID: %r' % p.pid)
+
+if __name__ == '__main__':
+    main()
diff --git a/final/utils/NLT.schema b/final/utils/NLT.schema
new file mode 100644
index 00000000000..4bcddbc9f7f
--- /dev/null
+++ b/final/utils/NLT.schema
@@ -0,0 +1,8 @@
+CREATE TABLE `Tests` (
+  `NAME` varchar(255) NOT NULL default '',
+  `RUN` date NOT NULL default '0000-00-00',
+  `TEST` varchar(32) NOT NULL default '',
+  `VALUE` double NOT NULL default '0',
+  KEY `name_index` (`NAME`)
+) ENGINE=MyISAM DEFAULT CHARSET=latin1
+
diff --git a/final/utils/NewNightlyTest.pl b/final/utils/NewNightlyTest.pl
new file mode 100755
index 00000000000..1b48168262a
--- /dev/null
+++ b/final/utils/NewNightlyTest.pl
@@ -0,0 +1,836 @@
+#!/usr/bin/perl
+use POSIX qw(strftime);
+use File::Copy;
+use File::Find;
+use Socket;
+
+#
+# Program:  NewNightlyTest.pl
+#
+# Synopsis: Perform a series of tests which are designed to be run nightly.
+#           This is used to keep track of the status of the LLVM tree, tracking
+#           regressions and performance changes. Submits this information
+#           to llvm.org where it is placed into the nightlytestresults database.
+#
+# Syntax:   NightlyTest.pl [OPTIONS] [CVSROOT BUILDDIR WEBDIR]
+#   where
+# OPTIONS may include one or more of the following:
+#
+# MAIN OPTIONS:
+#  -config LLVMPATH If specified, use an existing LLVM build and only run and
+#                   report the test information. The LLVMCONFIG argument should
+#                   be the path to the llvm-config executable in the LLVM build.
+#                   This should be the first argument if given. NOT YET
+#                   IMPLEMENTED.
+#  -nickname NAME   The NAME argument specifieds the nickname this script
+#                   will submit to the nightlytest results repository.
+#  -nouname         Don't include uname data (machine will be identified by nickname only).
+#  -submit-server   Specifies a server to submit the test results too. If this
+#                   option is not specified it defaults to
+#                   llvm.org. This is basically just the address of the
+#                   webserver
+#  -submit-script   Specifies which script to call on the submit server. If
+#                   this option is not specified it defaults to
+#                   /nightlytest/NightlyTestAccept.php. This is basically
+#                   everything after the www.yourserver.org.
+#  -submit-aux      If specified, an auxiliary script to run in addition to the
+#                   normal submit script. The script will be passed the path to
+#                   the "sentdata.txt" file as its sole argument.
+#  -nosubmit        Do not report the test results back to a submit server.
+#
+#
+# BUILD OPTIONS (not used with -config):
+#  -nocheckout      Do not create, checkout, update, or configure
+#                   the source tree.
+#  -noremove        Do not remove the BUILDDIR after it has been built.
+#  -noremoveresults Do not remove the WEBDIR after it has been built.
+#  -noclean         Do not run 'make clean' before building.
+#  -nobuild         Do not build llvm. If tests are enabled perform them
+#                   on the llvm build specified in the build directory
+#  -release         Build an LLVM Release+Asserts version
+#  -release-asserts Build an LLVM Release version
+#  -disable-bindings     Disable building LLVM bindings.
+#  -with-clang      Checkout Clang source into tools/clang.
+#  -compileflags    Next argument specifies extra options passed to make when
+#                   building LLVM.
+#  -use-gmake       Use gmake instead of the default make command to build
+#                   llvm and run tests.
+#  -llvmgccdir      Next argument specifies the llvm-gcc install prefix.
+#
+# TESTING OPTIONS:
+#  -notest          Do not even attempt to run the test programs.
+#  -nodejagnu       Do not run feature or regression tests
+#  -enable-llcbeta  Enable testing of beta features in llc.
+#  -enable-lli      Enable testing of lli (interpreter) features, default is off
+#  -disable-pic	    Disable building with Position Independent Code.
+#  -disable-llc     Disable LLC tests in the nightly tester.
+#  -disable-jit     Disable JIT tests in the nightly tester.
+#  -disable-cbe     Disable C backend tests in the nightly tester.
+#  -disable-lto     Disable link time optimization.
+#  -test-cflags     Next argument specifies that C compilation options that
+#                   override the default when running the testsuite.
+#  -test-cxxflags   Next argument specifies that C++ compilation options that
+#                   override the default when running the testsuite.
+#  -extraflags      Next argument specifies extra options that are passed to
+#                   compile the tests.
+#  -noexternals     Do not run the external tests (for cases where povray
+#                   or SPEC are not installed)
+#  -with-externals  Specify a directory where the external tests are located.
+#
+# OTHER OPTIONS:
+#  -parallel        Run parallel jobs with GNU Make (see -parallel-jobs).
+#  -parallel-jobs   The number of parallel Make jobs to use (default is two).
+#  -parallel-test   Allow parallel execution of llvm-test
+#  -verbose         Turn on some debug output
+#  -nice            Checkout/Configure/Build with "nice" to reduce impact
+#                   on busy servers.
+#  -f2c             Next argument specifies path to F2C utility
+#  -gccpath         Path to gcc/g++ used to build LLVM
+#  -target          Specify the target triplet
+#  -cflags          Next argument specifies that C compilation options that
+#                   override the default.
+#  -cxxflags        Next argument specifies that C++ compilation options that
+#                   override the default.
+#  -ldflags         Next argument specifies that linker options that override
+#                   the default.
+#
+# CVSROOT is ignored, it is passed for backwards compatibility.
+# BUILDDIR is the directory where sources for this test run will be checked out
+#  AND objects for this test run will be built. This directory MUST NOT
+#  exist before the script is run; it will be created by the svn checkout
+#  process and erased (unless -noremove is specified; see above.)
+# WEBDIR is the directory into which the test results web page will be written,
+#  AND in which the "index.html" is assumed to be a symlink to the most recent
+#  copy of the results. This directory will be created if it does not exist.
+# LLVMGCCDIR is the directory in which the LLVM GCC Front End is installed
+#  to. This is the same as you would have for a normal LLVM build.
+#
+##############################################################
+#
+# Getting environment variables
+#
+##############################################################
+my $HOME       = $ENV{'HOME'};
+my $SVNURL     = $ENV{"SVNURL"};
+$SVNURL        = 'http://llvm.org/svn/llvm-project' unless $SVNURL;
+my $TestSVNURL = $ENV{"TestSVNURL"};
+$TestSVNURL    = 'http://llvm.org/svn/llvm-project' unless $TestSVNURL;
+my $BuildDir   = $ENV{'BUILDDIR'};
+my $WebDir     = $ENV{'WEBDIR'};
+
+##############################################################
+#
+# Calculate the date prefix...
+#
+##############################################################
+use POSIX;
+@TIME = localtime;
+my $DATE = strftime("%Y-%m-%d_%H-%M-%S", localtime());
+
+##############################################################
+#
+# Parse arguments...
+#
+##############################################################
+$CONFIG_PATH="";
+$CONFIGUREARGS="";
+$nickname="";
+$NOTEST=0;
+$MAKECMD="make";
+$SUBMITSERVER = "llvm.org";
+$SUBMITSCRIPT = "/nightlytest/NightlyTestAccept.php";
+$SUBMITAUX="";
+$SUBMIT = 1;
+$PARALLELJOBS = "2";
+my $TESTFLAGS="";
+
+if ($ENV{'LLVMGCCDIR'}) {
+  $CONFIGUREARGS .= " --with-llvmgccdir=" . $ENV{'LLVMGCCDIR'};
+  $LLVMGCCPATH = $ENV{'LLVMGCCDIR'} . '/bin';
+}
+else {
+  $LLVMGCCPATH = "";
+}
+
+while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
+  shift;
+  last if /^--$/;  # Stop processing arguments on --
+
+  # List command line options here...
+  if (/^-config$/)         { $CONFIG_PATH = "$ARGV[0]"; shift; next; }
+  if (/^-nocheckout$/)     { $NOCHECKOUT = 1; next; }
+  if (/^-noclean$/)        { $NOCLEAN = 1; next; }
+  if (/^-noremove$/)       { $NOREMOVE = 1; next; }
+  if (/^-noremoveatend$/)  { $NOREMOVEATEND = 1; next; }
+  if (/^-noremoveresults$/){ $NOREMOVERESULTS = 1; next; }
+  if (/^-notest$/)         { $NOTEST = 1; next; }
+  if (/^-norunningtests$/) { next; } # Backward compatibility, ignored.
+  if (/^-parallel-jobs$/)  { $PARALLELJOBS = "$ARGV[0]"; shift; next;}
+  if (/^-parallel$/)       { $MAKEOPTS = "$MAKEOPTS -j$PARALLELJOBS"; next; }
+  if (/^-parallel-test$/)  { $PROGTESTOPTS .= " ENABLE_PARALLEL_REPORT=1"; next; }
+  if (/^-with-clang$/)     { $WITHCLANG = 1; next; }
+  if (/^-release$/)        { $MAKEOPTS = "$MAKEOPTS ENABLE_OPTIMIZED=1 ".
+                             "OPTIMIZE_OPTION=-O2"; next;}
+  if (/^-release-asserts$/){ $MAKEOPTS = "$MAKEOPTS ENABLE_OPTIMIZED=1 ".
+                             "DISABLE_ASSERTIONS=1 ".
+                             "OPTIMIZE_OPTION=-O2"; next;}
+  if (/^-enable-llcbeta$/) { $PROGTESTOPTS .= " ENABLE_LLCBETA=1"; next; }
+  if (/^-disable-pic$/)    { $CONFIGUREARGS .= " --enable-pic=no"; next; }
+  if (/^-enable-lli$/)     { $PROGTESTOPTS .= " ENABLE_LLI=1";
+                             $CONFIGUREARGS .= " --enable-lli"; next; }
+  if (/^-disable-llc$/)    { $PROGTESTOPTS .= " DISABLE_LLC=1";
+                             $CONFIGUREARGS .= " --disable-llc_diffs"; next; }
+  if (/^-disable-jit$/)    { $PROGTESTOPTS .= " DISABLE_JIT=1";
+                             $CONFIGUREARGS .= " --disable-jit"; next; }
+  if (/^-disable-bindings$/)    { $CONFIGUREARGS .= " --disable-bindings"; next; }
+  if (/^-disable-cbe$/)    { $PROGTESTOPTS .= " DISABLE_CBE=1"; next; }
+  if (/^-disable-lto$/)    { $PROGTESTOPTS .= " DISABLE_LTO=1"; next; }
+  if (/^-test-opts$/)      { $PROGTESTOPTS .= " $ARGV[0]"; shift; next; }
+  if (/^-verbose$/)        { $VERBOSE = 1; next; }
+  if (/^-teelogs$/)        { $TEELOGS = 1; next; }
+  if (/^-nice$/)           { $NICE = "nice "; next; }
+  if (/^-f2c$/)            { $CONFIGUREARGS .= " --with-f2c=$ARGV[0]";
+                             shift; next; }
+  if (/^-with-externals$/) { $CONFIGUREARGS .= " --with-externals=$ARGV[0]";
+                             shift; next; }
+  if (/^-configure-args$/) { $CONFIGUREARGS .= " $ARGV[0]";
+                             shift; next; }
+  if (/^-submit-server/)   { $SUBMITSERVER = "$ARGV[0]"; shift; next; }
+  if (/^-submit-script/)   { $SUBMITSCRIPT = "$ARGV[0]"; shift; next; }
+  if (/^-submit-aux/)      { $SUBMITAUX = "$ARGV[0]"; shift; next; }
+  if (/^-nosubmit$/)       { $SUBMIT = 0; next; }
+  if (/^-nickname$/)       { $nickname = "$ARGV[0]"; shift; next; }
+  if (/^-gccpath/)         { $CONFIGUREARGS .=
+                             " CC=$ARGV[0]/gcc CXX=$ARGV[0]/g++";
+                             $GCCPATH=$ARGV[0]; shift;  next; }
+  else                     { $GCCPATH=""; }
+  if (/^-target/)          { $CONFIGUREARGS .= " --target=$ARGV[0]";
+                             shift; next; }
+  if (/^-cflags/)          { $MAKEOPTS = "$MAKEOPTS C.Flags=\'$ARGV[0]\'";
+                             shift; next; }
+  if (/^-cxxflags/)        { $MAKEOPTS = "$MAKEOPTS CXX.Flags=\'$ARGV[0]\'";
+                             shift; next; }
+  if (/^-ldflags/)         { $MAKEOPTS = "$MAKEOPTS LD.Flags=\'$ARGV[0]\'";
+                             shift; next; }
+  if (/^-test-cflags/)     { $TESTFLAGS = "$TESTFLAGS CFLAGS=\'$ARGV[0]\'";
+                             shift; next; }
+  if (/^-test-cxxflags/)   { $TESTFLAGS = "$TESTFLAGS CXXFLAGS=\'$ARGV[0]\'";
+                             shift; next; }
+  if (/^-compileflags/)    { $MAKEOPTS = "$MAKEOPTS $ARGV[0]"; shift; next; }
+  if (/^-llvmgccdir/)      { $CONFIGUREARGS .= " --with-llvmgccdir=\'$ARGV[0]\'";
+                             $LLVMGCCPATH = $ARGV[0] . '/bin';
+                             shift; next;}
+  if (/^-noexternals$/)    { $NOEXTERNALS = 1; next; }
+  if (/^-nouname$/)        { $NOUNAME = 1; next; }
+  if (/^-use-gmake/)       { $MAKECMD = "gmake"; shift; next; }
+  if (/^-extraflags/)      { $CONFIGUREARGS .=
+                             " --with-extra-options=\'$ARGV[0]\'"; shift; next;}
+  if (/^-noexternals$/)    { $NOEXTERNALS = 1; next; }
+  if (/^-nodejagnu$/)      { next; }
+  if (/^-nobuild$/)        { $NOBUILD = 1; next; }
+  print "Unknown option: $_ : ignoring!\n";
+}
+
+if ($CONFIGUREARGS !~ /--disable-jit/) {
+  $CONFIGUREARGS .= " --enable-jit";
+}
+
+if (@ARGV != 0 and @ARGV != 3) {
+  die "error: must specify 0 or 3 options!";
+}
+
+if (@ARGV == 3) {
+  if ($CONFIG_PATH ne "") {
+      die "error: arguments are unsupported in -config mode,";
+  }
+
+  # ARGV[0] used to be the CVS root, ignored for backward compatibility.
+  $BuildDir   = $ARGV[1];
+  $WebDir     = $ARGV[2];
+}
+
+if ($CONFIG_PATH ne "") {
+  $BuildDir = "";
+  $SVNURL = $TestSVNURL = "";
+  if ($WebDir     eq "") {
+    die("please specify a web directory");
+  }
+} else {
+  if ($BuildDir   eq "" or
+      $WebDir     eq "") {
+    die("please specify a build directory, and a web directory");
+  }
+}
+
+if ($nickname eq "") {
+  die ("Please invoke NewNightlyTest.pl with command line option " .
+       "\"-nickname <nickname>\"");
+}
+
+my $LLVMSrcDir   = $ENV{'LLVMSRCDIR'};
+$LLVMSrcDir    = "$BuildDir/llvm" unless $LLVMSrcDir;
+my $LLVMObjDir   = $ENV{'LLVMOBJDIR'};
+$LLVMObjDir    = "$BuildDir/llvm" unless $LLVMObjDir;
+my $LLVMTestDir   = $ENV{'LLVMTESTDIR'};
+$LLVMTestDir    = "$BuildDir/llvm/projects/llvm-test" unless $LLVMTestDir;
+
+##############################################################
+#
+# Define the file names we'll use
+#
+##############################################################
+
+my $Prefix = "$WebDir/$DATE";
+my $SingleSourceLog = "$Prefix-SingleSource-ProgramTest.txt.gz";
+my $MultiSourceLog = "$Prefix-MultiSource-ProgramTest.txt.gz";
+my $ExternalLog = "$Prefix-External-ProgramTest.txt.gz";
+
+# These are only valid in non-config mode.
+my $ConfigureLog = "", $BuildLog = "", $COLog = "";
+my $DejagnuLog = "", $DejagnuSum = "", $DejagnuLog = "";
+
+# Are we in config mode?
+my $ConfigMode = 0;
+
+##############################################################
+#
+# Helper functions
+#
+##############################################################
+
+sub GetDir {
+  my $Suffix = shift;
+  opendir DH, $WebDir;
+  my @Result = reverse sort grep !/$DATE/, grep /[-0-9]+$Suffix/, readdir DH;
+  closedir DH;
+  return @Result;
+}
+
+sub RunLoggedCommand {
+  my $Command = shift;
+  my $Log = shift;
+  my $Title = shift;
+  if ($TEELOGS) {
+      if ($VERBOSE) {
+          print "$Title\n";
+          print "$Command 2>&1 | tee $Log\n";
+      }
+      system "$Command 2>&1 | tee $Log";
+  } else {
+      if ($VERBOSE) {
+          print "$Title\n";
+          print "$Command > $Log 2>&1\n";
+      }
+      system "$Command > $Log 2>&1";
+  }
+}
+
+sub RunAppendingLoggedCommand {
+  my $Command = shift;
+  my $Log = shift;
+  my $Title = shift;
+  if ($TEELOGS) {
+      if ($VERBOSE) {
+          print "$Title\n";
+          print "$Command 2>&1 | tee -a $Log\n";
+      }
+      system "$Command 2>&1 | tee -a $Log";
+  } else {
+      if ($VERBOSE) {
+          print "$Title\n";
+          print "$Command >> $Log 2>&1\n";
+      }
+      system "$Command >> $Log 2>&1";
+  }
+}
+
+sub GetRegex {   # (Regex with ()'s, value)
+  if ($_[1] =~ /$_[0]/m) {
+    return $1;
+  }
+  return "0";
+}
+
+sub ChangeDir { # directory, logical name
+  my ($dir,$name) = @_;
+  chomp($dir);
+  if ( $VERBOSE ) { print "Changing To: $name ($dir)\n"; }
+  $result = chdir($dir);
+  if (!$result) {
+    print "ERROR!!! Cannot change directory to: $name ($dir) because $!\n";
+    return false;
+  }
+  return true;
+}
+
+sub ReadFile {
+  if (open (FILE, $_[0])) {
+    undef $/;
+    my $Ret = <FILE>;
+    close FILE;
+    $/ = '\n';
+    return $Ret;
+  } else {
+    print "Could not open file '$_[0]' for reading!\n";
+    return "";
+  }
+}
+
+sub WriteFile {  # (filename, contents)
+  open (FILE, ">$_[0]") or die "Could not open file '$_[0]' for writing!\n";
+  print FILE $_[1];
+  close FILE;
+}
+
+sub CopyFile { #filename, newfile
+  my ($file, $newfile) = @_;
+  chomp($file);
+  if ($VERBOSE) { print "Copying $file to $newfile\n"; }
+  copy($file, $newfile);
+}
+
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# This function acts as a mini web browswer submitting data
+# to our central server via the post method
+#
+#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+sub WriteSentData {
+    $variables = $_[0];
+
+    # Write out the "...-sentdata.txt" file.
+
+    my $sentdata="";
+    foreach $x (keys (%$variables)){
+        $value = $variables->{$x};
+        $sentdata.= "$x  => $value\n";
+    }
+    WriteFile "$Prefix-sentdata.txt", $sentdata;
+}
+
+sub SendData {
+    $host = $_[0];
+    $file = $_[1];
+    $variables = $_[2];
+
+    if (!($SUBMITAUX eq "")) {
+        system "$SUBMITAUX \"$Prefix-sentdata.txt\"";
+    }
+
+    if (!$SUBMIT) {
+        return "Skipped standard submit.\n";
+    }
+
+    # Create the content to send to the server.
+
+    my $content;
+    foreach $key (keys (%$variables)){
+        $value = $variables->{$key};
+        $value =~ s/([^A-Za-z0-9])/sprintf("%%%02X", ord($1))/seg;
+        $content .= "$key=$value&";
+    }
+
+    # Send the data to the server.
+    #
+    # FIXME: This code should be more robust?
+
+    $port=80;
+    $socketaddr= sockaddr_in $port, inet_aton $host or die "Bad hostname\n";
+    socket SOCK, PF_INET, SOCK_STREAM, getprotobyname('tcp') or
+      die "Bad socket\n";
+    connect SOCK, $socketaddr or die "Bad connection\n";
+    select((select(SOCK), $| = 1)[0]);
+
+    $length = length($content);
+
+    my $send= "POST $file HTTP/1.0\n";
+    $send.= "Host: $host\n";
+    $send.= "Content-Type: application/x-www-form-urlencoded\n";
+    $send.= "Content-length: $length\n\n";
+    $send.= "$content";
+
+    print SOCK $send;
+    my $result;
+    while(<SOCK>){
+        $result  .= $_;
+    }
+    close(SOCK);
+
+    return $result;
+}
+
+##############################################################
+#
+# Individual Build & Test Functions
+#
+##############################################################
+
+# Create the source repository directory.
+sub CheckoutSource {
+  die "Invalid call!" unless $ConfigMode == 0;
+  if (-d $BuildDir) {
+    if (!$NOREMOVE) {
+      if ( $VERBOSE ) {
+        print "Build directory exists! Removing it\n";
+      }
+      system "rm -rf $BuildDir";
+      mkdir $BuildDir or die "Could not create checkout directory $BuildDir!";
+    } else {
+      if ( $VERBOSE ) {
+        print "Build directory exists!\n";
+      }
+    }
+  } else {
+    mkdir $BuildDir or die "Could not create checkout directory $BuildDir!";
+  }
+
+  ChangeDir( $BuildDir, "checkout directory" );
+  my $SVNCMD = "$NICE svn co --non-interactive";
+  RunLoggedCommand("( time -p $SVNCMD $SVNURL/llvm/trunk llvm; cd llvm/projects ; " .
+                   "  $SVNCMD $TestSVNURL/test-suite/trunk llvm-test )", $COLog,
+                   "CHECKOUT LLVM");
+  if ($WITHCLANG) {
+      RunLoggedCommand("( cd llvm/tools ; " .
+                       "  $SVNCMD $SVNURL/cfe/trunk clang )", $COLog,
+                       "CHECKOUT CLANG");
+  }
+}
+
+# Build the entire tree, saving build messages to the build log. Returns false
+# on build failure.
+sub BuildLLVM {
+  die "Invalid call!" unless $ConfigMode == 0;
+  my $EXTRAFLAGS = "--enable-spec --with-objroot=.";
+  RunLoggedCommand("(time -p $NICE ./configure $CONFIGUREARGS $EXTRAFLAGS) ",
+                   $ConfigureLog, "CONFIGURE");
+  # Build the entire tree, capturing the output into $BuildLog
+  if (!$NOCLEAN) {
+      RunAppendingLoggedCommand("($NICE $MAKECMD $MAKEOPTS clean)", $BuildLog, "BUILD CLEAN");
+  }
+  RunAppendingLoggedCommand("(time -p $NICE $MAKECMD $MAKEOPTS)", $BuildLog, "BUILD");
+
+  if (`grep -a '^$MAKECMD\[^:]*: .*Error' $BuildLog | wc -l` + 0 ||
+      `grep -a '^$MAKECMD: \*\*\*.*Stop.' $BuildLog | wc -l` + 0) {
+    return 0;
+  }
+
+  return 1;
+}
+
+# Run the named tests (i.e. "SingleSource" "MultiSource" "External")
+sub TestDirectory {
+  my $SubDir = shift;
+  ChangeDir( "$LLVMTestDir/$SubDir",
+             "Programs Test Subdirectory" ) || return ("", "");
+
+  my $ProgramTestLog = "$Prefix-$SubDir-ProgramTest.txt";
+
+  # Make sure to clean the test results.
+  RunLoggedCommand("$MAKECMD -k $MAKEOPTS $PROGTESTOPTS clean $TESTFLAGS",
+                   $ProgramTestLog, "TEST DIRECTORY $SubDir");
+
+  # Run the programs tests... creating a report.nightly.csv file.
+  my $LLCBetaOpts = "";
+  RunLoggedCommand("$MAKECMD -k $MAKEOPTS $PROGTESTOPTS report.nightly.csv ".
+                   "$TESTFLAGS TEST=nightly",
+                   $ProgramTestLog, "TEST DIRECTORY $SubDir");
+  $LLCBetaOpts = `$MAKECMD print-llcbeta-option`;
+
+  my $ProgramsTable;
+  if (`grep -a '^$MAKECMD\[^:]: .*Error' $ProgramTestLog | wc -l` + 0) {
+    $ProgramsTable="Error running test $SubDir\n";
+    print "ERROR TESTING\n";
+  } elsif (`grep -a '^$MAKECMD\[^:]: .*No rule to make target' $ProgramTestLog | wc -l` + 0) {
+    $ProgramsTable="Makefile error running tests $SubDir!\n";
+    print "ERROR TESTING\n";
+  } else {
+    # Create a list of the tests which were run...
+    system "egrep -a 'TEST-(PASS|FAIL)' < $ProgramTestLog ".
+           "| sort > $Prefix-$SubDir-Tests.txt";
+  }
+  $ProgramsTable = ReadFile "report.nightly.csv";
+
+  ChangeDir( "../../..", "Programs Test Parent Directory" );
+  return ($ProgramsTable, $LLCBetaOpts);
+}
+
+# Run all the nightly tests and return the program tables and the list of tests,
+# passes, fails, and xfails.
+sub RunNightlyTest() {
+  ($SSProgs, $llcbeta_options) = TestDirectory("SingleSource");
+  WriteFile "$Prefix-SingleSource-Performance.txt", $SSProgs;
+  ($MSProgs, $llcbeta_options) = TestDirectory("MultiSource");
+  WriteFile "$Prefix-MultiSource-Performance.txt", $MSProgs;
+  if ( ! $NOEXTERNALS ) {
+    ($ExtProgs, $llcbeta_options) = TestDirectory("External");
+    WriteFile "$Prefix-External-Performance.txt", $ExtProgs;
+    system "cat $Prefix-SingleSource-Tests.txt " .
+               "$Prefix-MultiSource-Tests.txt ".
+               "$Prefix-External-Tests.txt | sort > $Prefix-Tests.txt";
+    system "cat $Prefix-SingleSource-Performance.txt " .
+               "$Prefix-MultiSource-Performance.txt ".
+               "$Prefix-External-Performance.txt | sort > $Prefix-Performance.txt";
+  } else {
+    $ExtProgs = "External TEST STAGE SKIPPED\n";
+    if ( $VERBOSE ) {
+      print "External TEST STAGE SKIPPED\n";
+    }
+    system "cat $Prefix-SingleSource-Tests.txt " .
+               "$Prefix-MultiSource-Tests.txt ".
+               " | sort > $Prefix-Tests.txt";
+    system "cat $Prefix-SingleSource-Performance.txt " .
+               "$Prefix-MultiSource-Performance.txt ".
+               " | sort > $Prefix-Performance.txt";
+  }
+
+  # Compile passes, fails, xfails.
+  my $All = (ReadFile "$Prefix-Tests.txt");
+  my @TestSuiteResultLines = split "\n", $All;
+  my ($Passes, $Fails, $XFails) = "";
+
+  for ($x=0; $x < @TestSuiteResultLines; $x++) {
+    if (@TestSuiteResultLines[$x] =~ m/^PASS:/) {
+      $Passes .= "$TestSuiteResultLines[$x]\n";
+    }
+    elsif (@TestSuiteResultLines[$x] =~ m/^FAIL:/) {
+      $Fails .= "$TestSuiteResultLines[$x]\n";
+    }
+    elsif (@TestSuiteResultLines[$x] =~ m/^XFAIL:/) {
+      $XFails .= "$TestSuiteResultLines[$x]\n";
+    }
+  }
+
+  return ($SSProgs, $MSProgs, $ExtProgs, $All, $Passes, $Fails, $XFails);
+}
+
+##############################################################
+#
+# Initialize filenames
+#
+##############################################################
+
+if (! -d $WebDir) {
+  mkdir $WebDir, 0777 or die "Unable to create web directory: '$WebDir'.";
+  if($VERBOSE){
+    warn "$WebDir did not exist; creating it.\n";
+  }
+}
+
+if ($CONFIG_PATH ne "") {
+  $ConfigMode = 1;
+  $LLVMSrcDir = GetRegex "^(.*)\\s+", `$CONFIG_PATH --src-root`;
+  $LLVMObjDir = GetRegex "^(.*)\\s+", `$CONFIG_PATH --obj-root`;
+  # FIXME: Add llvm-config hook for this?
+  $LLVMTestDir = $LLVMObjDir . "/projects/test-suite";
+} else {
+  $ConfigureLog = "$Prefix-Configure-Log.txt";
+  $BuildLog = "$Prefix-Build-Log.txt";
+  $COLog = "$Prefix-CVS-Log.txt";
+}
+
+if ($VERBOSE) {
+  if ($CONFIG_PATH ne "") {
+    print "INITIALIZED (config mode)\n";
+    print "WebDir    = $WebDir\n";
+    print "Prefix    = $Prefix\n";
+    print "LLVM Src  = $LLVMSrcDir\n";
+    print "LLVM Obj  = $LLVMObjDir\n";
+    print "LLVM Test = $LLVMTestDir\n";
+  } else {
+    print "INITIALIZED\n";
+    print "SVN URL  = $SVNURL\n";
+    print "COLog    = $COLog\n";
+    print "BuildDir = $BuildDir\n";
+    print "WebDir   = $WebDir\n";
+    print "Prefix   = $Prefix\n";
+    print "BuildLog = $BuildLog\n";
+  }
+}
+
+##############################################################
+#
+# The actual NewNightlyTest logic.
+#
+##############################################################
+
+$starttime = `date "+20%y-%m-%d %H:%M:%S"`;
+
+my $BuildError = 0, $BuildStatus = "OK";
+if ($ConfigMode == 0) {
+  if (!$NOCHECKOUT) {
+    CheckoutSource();
+  }
+
+  # Build LLVM.
+  ChangeDir( $LLVMSrcDir , "llvm source directory") ;
+  if ($NOCHECKOUT || $NOBUILD) {
+    $BuildStatus = "Skipped by user";
+  } else {
+    if (!BuildLLVM()) {
+      if( $VERBOSE) { print  "\n***ERROR BUILDING TREE\n\n"; }
+      $BuildError = 1;
+      $BuildStatus = "Error: compilation aborted";
+    }
+  }
+}
+
+# Run the llvm-test tests.
+my ($SingleSourceProgramsTable, $MultiSourceProgramsTable, $ExternalProgramsTable,
+    $all_tests, $passes, $fails, $xfails) = "";
+if (!$NOTEST && !$BuildError) {
+  ($SingleSourceProgramsTable, $MultiSourceProgramsTable, $ExternalProgramsTable,
+   $all_tests, $passes, $fails, $xfails) = RunNightlyTest();
+}
+
+$endtime = `date "+20%y-%m-%d %H:%M:%S"`;
+
+# The last bit of logic is to remove the build and web dirs, after sending data
+# to the server.
+
+##############################################################
+#
+# Accumulate the information to send to the server.
+#
+##############################################################
+
+if ( $VERBOSE ) { print "PREPARING LOGS TO BE SENT TO SERVER\n"; }
+
+if ( ! $NOUNAME ) {
+    $machine_data = "uname: ".`uname -a`.
+        "hardware: ".`uname -m`.
+        "os: ".`uname -sr`.
+        "name: ".`uname -n`.
+        "date: ".`date \"+20%y-%m-%d\"`.
+        "time: ".`date +\"%H:%M:%S\"`;
+} else {
+    $machine_data = "uname: (excluded)\n".
+        "hardware: ".`uname -m`.
+        "os: ".`uname -sr`.
+        "name: $nickname\n".
+        "date: ".`date \"+20%y-%m-%d\"`.
+        "time: ".`date +\"%H:%M:%S\"`;
+}
+
+# Get gcc version.
+my $gcc_version_long = "";
+if ($GCCPATH ne "") {
+  $gcc_version_long = `$GCCPATH/gcc --version`;
+} elsif ($ENV{"CC"}) {
+  $gcc_version_long = `$ENV{"CC"} --version`;
+} else {
+  $gcc_version_long = `gcc --version`;
+}
+my $gcc_version = (split '\n', $gcc_version_long)[0];
+
+# Get llvm-gcc target triple.
+#
+# FIXME: This shouldn't be hardwired to llvm-gcc.
+my $llvmgcc_version_long = "";
+if ($LLVMGCCPATH ne "") {
+  $llvmgcc_version_long = `$LLVMGCCPATH/llvm-gcc -v 2>&1`;
+} else {
+  $llvmgcc_version_long = `llvm-gcc -v 2>&1`;
+}
+(split '\n', $llvmgcc_version_long)[1] =~ /Target: (.+)/;
+my $targetTriple = $1;
+
+# Logs.
+my ($ConfigureLogData, $BuildLogData, $CheckoutLogData) = "";
+if ($ConfigMode == 0) {
+  $ConfigureLogData = ReadFile $ConfigureLog;
+  $BuildLogData = ReadFile $BuildLog;
+  $CheckoutLogData = ReadFile $COLog;
+}
+
+# Checkout info.
+my $CheckoutTime_Wall = GetRegex "^real ([0-9.]+)", $CheckoutLogData;
+my $CheckoutTime_User = GetRegex "^user ([0-9.]+)", $CheckoutLogData;
+my $CheckoutTime_Sys = GetRegex "^sys ([0-9.]+)", $CheckoutLogData;
+my $CheckoutTime_CPU = $CVSCheckoutTime_User + $CVSCheckoutTime_Sys;
+
+# Configure info.
+my $ConfigTimeU = GetRegex "^user ([0-9.]+)", $ConfigureLogData;
+my $ConfigTimeS = GetRegex "^sys ([0-9.]+)", $ConfigureLogData;
+my $ConfigTime  = $ConfigTimeU+$ConfigTimeS;  # ConfigTime = User+System
+my $ConfigWallTime = GetRegex "^real ([0-9.]+)",$ConfigureLogData;
+$ConfigTime=-1 unless $ConfigTime;
+$ConfigWallTime=-1 unless $ConfigWallTime;
+
+# Build info.
+my $BuildTimeU = GetRegex "^user ([0-9.]+)", $BuildLogData;
+my $BuildTimeS = GetRegex "^sys ([0-9.]+)", $BuildLogData;
+my $BuildTime  = $BuildTimeU+$BuildTimeS;  # BuildTime = User+System
+my $BuildWallTime = GetRegex "^real ([0-9.]+)", $BuildLogData;
+$BuildTime=-1 unless $BuildTime;
+$BuildWallTime=-1 unless $BuildWallTime;
+
+if ( $VERBOSE ) { print "SEND THE DATA VIA THE POST REQUEST\n"; }
+
+my %hash_of_data = (
+  'machine_data' => $machine_data,
+  'build_data' => $ConfigureLogData . $BuildLogData,
+  'gcc_version' => $gcc_version,
+  'nickname' => $nickname,
+  'dejagnutime_wall' => "0.0",
+  'dejagnutime_cpu' => "0.0",
+  'cvscheckouttime_wall' => $CheckoutTime_Wall,
+  'cvscheckouttime_cpu' => $CheckoutTime_CPU,
+  'configtime_wall' => $ConfigWallTime,
+  'configtime_cpu'=> $ConfigTime,
+  'buildtime_wall' => $BuildWallTime,
+  'buildtime_cpu' => $BuildTime,
+  'buildstatus' => $BuildStatus,
+  'singlesource_programstable' => $SingleSourceProgramsTable,
+  'multisource_programstable' => $MultiSourceProgramsTable,
+  'externalsource_programstable' => $ExternalProgramsTable,
+  'llcbeta_options' => $llcbeta_options,
+  'passing_tests' => $passes,
+  'expfail_tests' => $xfails,
+  'unexpfail_tests' => $fails,
+  'all_tests' => $all_tests,
+  'dejagnutests_results' => "Dejagnu skipped by user choice.",
+  'dejagnutests_log' => "",
+  'starttime' => $starttime,
+  'endtime' => $endtime,
+  'target_triple' => $targetTriple,
+
+  # Unused, but left around for backwards compatability.
+  'warnings' => "",
+  'cvsusercommitlist' => "",
+  'cvsuserupdatelist' => "",
+  'cvsaddedfiles' => "",
+  'cvsmodifiedfiles' => "",
+  'cvsremovedfiles' => "",
+  'lines_of_code' => "",
+  'cvs_file_count' => 0,
+  'cvs_dir_count' => 0,
+  'warnings_removed' => "",
+  'warnings_added' => "",
+  'new_tests' => "",
+  'removed_tests' => "",
+  'o_file_sizes' => "",
+  'a_file_sizes' => ""
+);
+
+# Write out the "...-sentdata.txt" file.
+WriteSentData \%hash_of_data;
+
+if ($SUBMIT || !($SUBMITAUX eq "")) {
+  my $response = SendData $SUBMITSERVER,$SUBMITSCRIPT,\%hash_of_data;
+  if( $VERBOSE) { print "============================\n$response"; }
+} else {
+  print "============================\n";
+  foreach $x(keys %hash_of_data){
+      print "$x  => $hash_of_data{$x}\n";
+  }
+}
+
+##############################################################
+#
+# Remove the source tree...
+#
+##############################################################
+system ( "$NICE rm -rf $BuildDir")
+  if (!$NOCHECKOUT and !$NOREMOVE and !$NOREMOVEATEND);
+system ( "$NICE rm -rf $WebDir")
+  if (!$NOCHECKOUT and !$NOREMOVE and !$NOREMOVERESULTS);
diff --git a/final/utils/NightlyTest.gnuplot b/final/utils/NightlyTest.gnuplot
new file mode 100644
index 00000000000..514b72ab20a
--- /dev/null
+++ b/final/utils/NightlyTest.gnuplot
@@ -0,0 +1,214 @@
+set terminal png
+
+##------- Plot small Date vs LOC ----
+set output "running_loc.png"
+set xlabel "Date" 
+set ylabel "Lines of Code"
+set xdata time
+set timefmt "%Y-%m-%d-%H:%M:%S:"
+set format x "%b %d, %Y"
+
+set size .75,.75
+set xtics rotate
+set xlabel 0,-1
+plot "running_loc.txt" using 1:2 title '' with lines, \
+     "running_loc.txt" using 1:2 title "Date vs. Lines of Code" with lines
+
+##------- Plot large Date vs LOC ----
+set size 1.5,1.5
+set xtics norotate
+set xlabel 0,0
+set output "running_loc_large.png"
+plot "running_loc.txt" using 1:2 title '', \
+     "running_loc.txt" using 1:2 title "Date vs. Lines of Code" with lines
+
+
+# Delete all labels...
+set nolabel
+
+##------- Olden CBE performance ----
+
+set size .75,.75
+set xtics rotate
+set xlabel 0,-1
+set output "running_Olden_cbe_time.png"
+set ylabel "CBE compiled execution time (s)"
+plot "running_Olden_cbe_time.txt" u 1:2 t '' with lines, \
+     "running_Olden_cbe_time.txt" u 1:2 t "bh" with lines, \
+     "running_Olden_cbe_time.txt" u 1:3 t "em3d" with lines, \
+     "running_Olden_cbe_time.txt" u 1:4 t "mst" with lines, \
+     "running_Olden_cbe_time.txt" u 1:5 t "power" with lines, \
+     "running_Olden_cbe_time.txt" u 1:6 t "tsp" with lines, \
+     "running_Olden_cbe_time.txt" u 1:7 t "bisort" with lines, \
+     "running_Olden_cbe_time.txt" u 1:8 t "health" with lines, \
+     "running_Olden_cbe_time.txt" u 1:9 t "perimeter" with lines, \
+     "running_Olden_cbe_time.txt" u 1:10 t "treeadd" with lines, \
+     "running_Olden_cbe_time.txt" u 1:11 t "voronoi" \
+   with lines
+
+set size 1.5,1.5
+set xtics norotate
+set xlabel 0,0
+set output "running_Olden_cbe_time_large.png"
+plot "running_Olden_cbe_time.txt" u 1:2 t '' with lines, \
+     "running_Olden_cbe_time.txt" u 1:2 t "bh" with lines, \
+     "running_Olden_cbe_time.txt" u 1:3 t "em3d" with lines, \
+     "running_Olden_cbe_time.txt" u 1:4 t "mst" with lines, \
+     "running_Olden_cbe_time.txt" u 1:5 t "power" with lines, \
+     "running_Olden_cbe_time.txt" u 1:6 t "tsp" with lines, \
+     "running_Olden_cbe_time.txt" u 1:7 t "bisort" with lines, \
+     "running_Olden_cbe_time.txt" u 1:8 t "health" with lines, \
+     "running_Olden_cbe_time.txt" u 1:9 t "perimeter" with lines, \
+     "running_Olden_cbe_time.txt" u 1:10 t "treeadd" with lines, \
+     "running_Olden_cbe_time.txt" u 1:11 t "voronoi" \
+   with lines
+
+##------- Olden JIT performance ----
+
+set size .75,.75
+set xtics rotate
+set xlabel 0,-1
+set output "running_Olden_jit_time.png"
+set ylabel "JIT execution time (s)"
+plot "running_Olden_jit_time.txt" u 1:2 t '' with lines, \
+     "running_Olden_jit_time.txt" u 1:2 t "bh" with lines, \
+     "running_Olden_jit_time.txt" u 1:3 t "em3d" with lines, \
+     "running_Olden_jit_time.txt" u 1:4 t "mst" with lines, \
+     "running_Olden_jit_time.txt" u 1:5 t "power" with lines, \
+     "running_Olden_jit_time.txt" u 1:6 t "tsp" with lines, \
+     "running_Olden_jit_time.txt" u 1:7 t "bisort" with lines, \
+     "running_Olden_jit_time.txt" u 1:8 t "health" with lines, \
+     "running_Olden_jit_time.txt" u 1:9 t "perimeter" with lines, \
+     "running_Olden_jit_time.txt" u 1:10 t "treeadd" with lines, \
+     "running_Olden_jit_time.txt" u 1:11 t "voronoi" \
+   with lines
+
+set size 1.5,1.5
+set xtics norotate
+set xlabel 0,0
+set output "running_Olden_jit_time_large.png"
+plot "running_Olden_jit_time.txt" u 1:2 t '' with lines, \
+     "running_Olden_jit_time.txt" u 1:2 t "bh" with lines, \
+     "running_Olden_jit_time.txt" u 1:3 t "em3d" with lines, \
+     "running_Olden_jit_time.txt" u 1:4 t "mst" with lines, \
+     "running_Olden_jit_time.txt" u 1:5 t "power" with lines, \
+     "running_Olden_jit_time.txt" u 1:6 t "tsp" with lines, \
+     "running_Olden_jit_time.txt" u 1:7 t "bisort" with lines, \
+     "running_Olden_jit_time.txt" u 1:8 t "health" with lines, \
+     "running_Olden_jit_time.txt" u 1:9 t "perimeter" with lines, \
+     "running_Olden_jit_time.txt" u 1:10 t "treeadd" with lines, \
+     "running_Olden_jit_time.txt" u 1:11 t "voronoi" \
+   with lines
+
+##------- Olden LLC performance ----
+
+set size .75,.75
+set xtics rotate
+set xlabel 0,-1
+set output "running_Olden_llc_time.png"
+set ylabel "LLC compiled execution time (s)"
+plot "running_Olden_llc_time.txt" u 1:2 t '' with lines, \
+     "running_Olden_llc_time.txt" u 1:2 t "bh" with lines, \
+     "running_Olden_llc_time.txt" u 1:3 t "em3d" with lines, \
+     "running_Olden_llc_time.txt" u 1:4 t "mst" with lines, \
+     "running_Olden_llc_time.txt" u 1:5 t "power" with lines, \
+     "running_Olden_llc_time.txt" u 1:6 t "tsp" with lines, \
+     "running_Olden_llc_time.txt" u 1:7 t "bisort" with lines, \
+     "running_Olden_llc_time.txt" u 1:8 t "health" with lines, \
+     "running_Olden_llc_time.txt" u 1:9 t "perimeter" with lines, \
+     "running_Olden_llc_time.txt" u 1:10 t "treeadd" with lines, \
+     "running_Olden_llc_time.txt" u 1:11 t "voronoi" \
+   with lines
+
+set size 1.5,1.5
+set xtics norotate
+set xlabel 0,0
+set output "running_Olden_llc_time_large.png"
+plot "running_Olden_llc_time.txt" u 1:2 t '' with lines, \
+     "running_Olden_llc_time.txt" u 1:2 t "bh" with lines, \
+     "running_Olden_llc_time.txt" u 1:3 t "em3d" with lines, \
+     "running_Olden_llc_time.txt" u 1:4 t "mst" with lines, \
+     "running_Olden_llc_time.txt" u 1:5 t "power" with lines, \
+     "running_Olden_llc_time.txt" u 1:6 t "tsp" with lines, \
+     "running_Olden_llc_time.txt" u 1:7 t "bisort" with lines, \
+     "running_Olden_llc_time.txt" u 1:8 t "health" with lines, \
+     "running_Olden_llc_time.txt" u 1:9 t "perimeter" with lines, \
+     "running_Olden_llc_time.txt" u 1:10 t "treeadd" with lines, \
+     "running_Olden_llc_time.txt" u 1:11 t "voronoi" \
+   with lines
+
+
+##------- Olden optimizer time ----
+
+set size .75,.75
+set xtics rotate
+set xlabel 0,-1
+set output "running_Olden_opt_time.png"
+set ylabel "Time to run the optimizer (s)"
+plot "running_Olden_opt_time.txt" u 1:2 t '' with lines, \
+     "running_Olden_opt_time.txt" u 1:2 t "bh" with lines, \
+     "running_Olden_opt_time.txt" u 1:3 t "em3d" with lines, \
+     "running_Olden_opt_time.txt" u 1:4 t "mst" with lines, \
+     "running_Olden_opt_time.txt" u 1:5 t "power" with lines, \
+     "running_Olden_opt_time.txt" u 1:6 t "tsp" with lines, \
+     "running_Olden_opt_time.txt" u 1:7 t "bisort" with lines, \
+     "running_Olden_opt_time.txt" u 1:8 t "health" with lines, \
+     "running_Olden_opt_time.txt" u 1:9 t "perimeter" with lines, \
+     "running_Olden_opt_time.txt" u 1:10 t "treeadd" with lines, \
+     "running_Olden_opt_time.txt" u 1:11 t "voronoi" \
+   with lines
+
+set size 1.5,1.5
+set xtics norotate
+set xlabel 0,0
+set output "running_Olden_opt_time_large.png"
+plot "running_Olden_opt_time.txt" u 1:2 t '' with lines, \
+     "running_Olden_opt_time.txt" u 1:2 t "bh" with lines, \
+     "running_Olden_opt_time.txt" u 1:3 t "em3d" with lines, \
+     "running_Olden_opt_time.txt" u 1:4 t "mst" with lines, \
+     "running_Olden_opt_time.txt" u 1:5 t "power" with lines, \
+     "running_Olden_opt_time.txt" u 1:6 t "tsp" with lines, \
+     "running_Olden_opt_time.txt" u 1:7 t "bisort" with lines, \
+     "running_Olden_opt_time.txt" u 1:8 t "health" with lines, \
+     "running_Olden_opt_time.txt" u 1:9 t "perimeter" with lines, \
+     "running_Olden_opt_time.txt" u 1:10 t "treeadd" with lines, \
+     "running_Olden_opt_time.txt" u 1:11 t "voronoi" \
+   with lines
+
+
+##------- Bytecode size ----
+
+set size .75,.75
+set xtics rotate
+set xlabel 0,-1
+set output "running_Olden_bytecode.png"
+set ylabel "Program bytecode size (bytes)"
+plot "running_Olden_bytecode.txt" u 1:2 t '' with lines, \
+     "running_Olden_bytecode.txt" u 1:2 t "bh" with lines, \
+     "running_Olden_bytecode.txt" u 1:3 t "em3d" with lines, \
+     "running_Olden_bytecode.txt" u 1:4 t "mst" with lines, \
+     "running_Olden_bytecode.txt" u 1:5 t "power" with lines, \
+     "running_Olden_bytecode.txt" u 1:6 t "tsp" with lines, \
+     "running_Olden_bytecode.txt" u 1:7 t "bisort" with lines, \
+     "running_Olden_bytecode.txt" u 1:8 t "health" with lines, \
+     "running_Olden_bytecode.txt" u 1:9 t "perimeter" with lines, \
+     "running_Olden_bytecode.txt" u 1:10 t "treeadd" with lines, \
+     "running_Olden_bytecode.txt" u 1:11 t "voronoi" \
+   with lines
+
+set size 1.5,1.5
+set xtics norotate
+set xlabel 0,0
+set output "running_Olden_bytecode_large.png"
+plot "running_Olden_bytecode.txt" u 1:2 t '' with lines, \
+     "running_Olden_bytecode.txt" u 1:2 t "bh" with lines, \
+     "running_Olden_bytecode.txt" u 1:3 t "em3d" with lines, \
+     "running_Olden_bytecode.txt" u 1:4 t "mst" with lines, \
+     "running_Olden_bytecode.txt" u 1:5 t "power" with lines, \
+     "running_Olden_bytecode.txt" u 1:6 t "tsp" with lines, \
+     "running_Olden_bytecode.txt" u 1:7 t "bisort" with lines, \
+     "running_Olden_bytecode.txt" u 1:8 t "health" with lines, \
+     "running_Olden_bytecode.txt" u 1:9 t "perimeter" with lines, \
+     "running_Olden_bytecode.txt" u 1:10 t "treeadd" with lines, \
+     "running_Olden_bytecode.txt" u 1:11 t "voronoi" \
+   with lines
diff --git a/final/utils/NightlyTestTemplate.html b/final/utils/NightlyTestTemplate.html
new file mode 100644
index 00000000000..c38bb2e776b
--- /dev/null
+++ b/final/utils/NightlyTestTemplate.html
@@ -0,0 +1,244 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html><head><title>LLVM Test Results for $DateString</title></head>
+
+<body bgcolor=white>
+<center><font size=+3 face=Verdana><b>LLVM Test Results for $DateString</b></font></center>
+<hr height=1>
+
+<table width=100%>
+<tr><td valign=top align=center>
+
+<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
+<table border="0" cellpadding="5" cellspacing="0"><tr><td bgcolor="#DDAA77">
+<font size=+1><b>Sections:</b></font><br>
+</td></tr><tr><td bgcolor="#FFCC99" align=center>
+<a href="#Overview">Overview</a><br>
+<a href="#Changes">Changes</a><br>
+<a href="#Dejagnu">Dejagnu Tests</a><br>
+<a href="#Trends">Trends</a><br>
+<a href="#Programs">Programs</a><br>
+</td></tr></table></td></tr></table>
+
+<p>
+<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
+<table border="0" cellpadding="5" cellspacing="0"><tr><td bgcolor="#DDAA77"
+<font size=+1><b>Previous:</b></font><br>
+</td></tr><tr><td bgcolor="#FFCC99">
+  $PrevDaysList
+</td></tr></table></td></tr></table>
+<p>
+
+<font size=+1><b>Back to:</b></font><br>
+<a href="http://llvm.org/testresults/">Test&nbsp;Results</a><br>
+<a href="http://llvm.org/">LLVM&nbsp;Page</a><p>
+
+</td><td valign=top>
+
+<center>
+<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
+<table border="0" cellpadding="10" cellspacing="0"><tr><td bgcolor="#DDAA77"
+<font size=+2 face=Verdana><b><a name="Overview">Today's Test Results Overview</font></b>
+</td></tr></table></td></tr></table></center><p>
+
+<!-- Running LOC graph -->
+<table align=right>
+<tr><td>
+<a href="running_loc_large.png"
+   ><img border=0 width=480 height=360 src="running_loc.png"></a>
+</td></tr>
+<tr><td align=center>Lines Of Code over Time<br>
+<font size=-1><a href="running_loc_large.png">Click for larger view</a></font>
+</td></tr>
+</table>
+
+<h2>Nightly Test Overview:</h2>
+<ul>
+  <li>Start: <b>$TestStartTime</b></li>
+  <li>Finish: <b>$TestFinishTime</b></li>
+  <li>Platform: <b>$TestPlatform</b></li>
+</ul>
+<h2>CVS Tree Overview:</h2>
+<ul>
+<li><a href="$DATE-CVS-Log.txt">CVS Checkout Log</a>
+<ul>
+    <b>$NumDirsInCVS</b> dirs, <b>$NumFilesInCVS</b> files, <b>$LOC</b>
+    lines of code, checked out in <b>$CVSCheckoutTime</b> seconds<br></ul>
+<li><a href="$DATE-Build-Log.txt">Compilation Log</a>
+<table>
+<tr><td><b>Item</b></td><td><b>CPU Time</b></td><td><b>Wall Clock</b></td></tr>
+<tr><td>Configure CVS Tree</td><td>$ConfigTime</td><td>$ConfigWallTime</td></tr>
+<tr><td>Build CVS Tree</td><td>$BuildTime</td><td>$BuildWallTime</td></tr>
+<tr><td>Run Dejagnu Tests</td><td>$DejagnuTime</td><td>$DejagnuWallTime</td></tr>
+</table></li>
+<li>Number of object files compiled: <b>$NumObjects</b></li>
+<li>Number of libraries linked: <b>$NumLibraries</b></li>
+<li>Number of executables linked:<b> $NumExecutables</b></li>
+<li>Build Status: $BuildStatus</li>
+</ul>
+
+<h2>Warnings during the build:</h2>
+$WarningsList
+
+<br><br><center>
+<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
+<table border="0" cellpadding="10" cellspacing="0"><tr><td bgcolor="#DDAA77"
+<font size=+2 face=Verdana><b><a name="Changes">Changes from Yesterday</font></b>
+</td></tr></table></td></tr></table></center><p>
+
+<h2>Changes to CVS:</h2>
+<ul>
+<li>Users who committed to CVS: <b>$UserCommitList</b>
+<li>Users who updated from CVS: <b>$UserUpdateList</b>
+<li>Added Files:    $AddedFilesList
+<li>Modified Files: $ModifiedFilesList
+<li>Removed Files:  $RemovedFilesList
+</ul><p>
+
+<h2>Changes to Warnings:</h2>
+<p>Warnings Added:</p>
+$WarningsAdded
+<p>Warnings Removed:</p>
+$WarningsRemoved
+
+<h2>Changes in the test suite:</h2>
+<ul>
+<li>New Tests: $TestsAdded
+<li>Removed Tests: $TestsRemoved
+<li>Newly passing tests: $TestsFixed
+<li>Newly failing tests: $TestsBroken
+</ul>
+</td></tr></tbody></table>
+
+
+<br/><br/><center>
+<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
+<table border="0" cellpadding="10" cellspacing="0"><tr><td bgcolor="#DDAA77"
+<font size=+2 face=Verdana><b><a name="Dejagnu">Dejagnu Test Results</font></b>
+</td></tr></table></td></tr></table></center>
+<br/>
+$DejagnuTestResults
+<p>A complete log of testing <a href="$DATE-Dejagnu-testrun.log">Feature and Regression</a> is available for further analysis.</p>
+
+<br><br><center>
+<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
+<table border="0" cellpadding="10" cellspacing="0"><tr><td bgcolor="#DDAA77"
+<font size=+2 face=Verdana><b><a name="Trends">Changes Over Time</font></b>
+</td></tr></table></td></tr></table></center><p>
+
+
+Here are some charts showing how the LLVM optimizer and code generators are
+changing over time.  For now we use the Olden benchmark suite to measure this,
+but eventually we will switch to using SPEC CPU2000.  All programs are run with
+"LARGE_PROBLEM_SIZE" enabled.  Click on any of the charts to get a larger
+version.<p>
+
+<h2>Compilation Measurements:</h2>
+
+<table border="0" align=center>
+<tr>
+<td width=50% align=center>
+<a href="running_Olden_bytecode_large.png"><img width=480 height=360 border=0 src="running_Olden_bytecode.png"></a><br>
+Size of LLVM bytecode files
+</td>
+<td width=50% align=center>
+<a href="running_Olden_opt_time_large.png"><img width=480 height=360 border=0 src="running_Olden_opt_time.png"></a><br>
+Time to run the LLVM optimizer on each program
+</td></tr>
+</table>
+
+<h2>Program Execution Measurements:</h2>
+
+<table border="0" align=center>
+<tr>
+<td width=50% align=center>
+<a href="running_Olden_cbe_time_large.png"><img width=480 height=360 border=0 src="running_Olden_cbe_time.png"></a><br>
+Execution time for CBE generated executable
+</td>
+<td width=50% align=center>
+<a href="running_Olden_llc_time_large.png"><img width=480 height=360 border=0 src="running_Olden_llc_time.png"></a><br>
+Execution time for the LLC generated executable
+</td></tr>
+
+<tr>
+<td align=center>
+<a href="running_Olden_jit_time_large.png"><img width=480 height=360 border=0 src="running_Olden_jit_time.png"></a><br>
+Execution time for program in the JIT
+</td>
+<td></td></tr>
+</table>
+
+
+
+
+<br><br><center>
+<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
+<table border="0" cellpadding="10" cellspacing="0"><tr><td bgcolor="#DDAA77"
+<font size=+2 face=Verdana><b><a name="Programs">Program Tests</font></b>
+</td></tr></table></td></tr></table></center><p>
+
+This section tests LLVM on a variety of programs in the test suite.  This
+includes benchmark suites like the Olden, McCat, Ptrdist, and SPEC benchmarks as
+well as a few random programs with test inputs.  This section is meant to track
+how stable LLVM is as a whole. A failure in the execution of any test is marked
+with an asterisk: `*'. The columns of the tables are:<p>
+
+<ol>
+<li><a name="Program">Program</a> - The name of the program for that row.</li>
+<li><a name="GCCAS">GCCAS</a> - Time to run LLVM optimizers on the program.</li>
+<li><a name="Bytecode">Bytecode</a> - The size of the bytecode for the
+    program</li>
+<li><a name="Instrs">Instrs</a> - The number of LLVM instructions in the
+    compiled bytecode</li>
+<li><a name="LLC<br>compile">LLC compile</a> - The time taken compile with
+    LLC (the static backend)</li>
+<li><a name="JIT<br>codegen">JIT codegen</a> - The amount of time spent in the
+    JIT itself, instead of executing the program.</li>
+<li><a name="Machine<br>code">Machine code</a> - The number of bytes of machine
+    code generated by the JIT.</li>
+<li><a name="GCC">GCC</a> - The time taken to execute the program when compiled
+    with GCC -O2.</li>
+<li><a name="CBE">CBE</a> - The time taken to execute the program after
+    compilation through the C backend, compiled with -O2.</li>
+<li><a name="LLC">LLC</a> - How long does the program generated by the static
+    backend LLC take to execute </li>
+<li><a name="JIT">JIT</a> - The amount of time spent running the
+    program with the JIT; this includes the code generation phase (listed above)
+    and actually running the program.</li>
+<li><a name="GCC/LLC">GCC/LLC</a> - The speed-up of the LLC output vs the native 
+    GCC output: greater than 1 is a speedup, less than 1 is a slowdown.</li>
+<li><a name="GCC/CBE">GCC/CBE</a> - The speed-up of the CBE output vs the native 
+    GCC output: greater than 1 is a speedup, less than 1 is a slowdown.</li>
+<li><a name="LLC-BETA">LLC-BETA</a> - How long does the program generated by the static
+    backend LLC take to execute the program, when compiled with new experimental 
+    features.  This is temporary, for tuning.</li>
+</ol><p>
+
+A complete log of testing 
+<a href="$DATE-SingleSource-ProgramTest.txt.gz">SingleSource</a>, 
+<a href="$DATE-MultiSource-ProgramTest.txt.gz">MultiSource</a>, and
+<a href="$DATE-External-ProgramTest.txt.gz">External</a> programs are
+available for further analysis.
+
+<h2>Programs/External</h2>
+
+<center>
+<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
+$ExternalProgramsTable
+</td></tr></table></center>
+
+<h2>Programs/MultiSource</h2>
+
+<center>
+<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
+$MultiSourceProgramsTable
+</td></tr></table></center>
+
+<h2>Programs/SingleSource</h2>
+
+<center>
+<table border="0" cellspacing="0" cellpadding="2"><tr><td bgcolor="#000000"> 
+$SingleSourceProgramsTable
+</td></tr></table></center>
+
+</td></tr></html>
+
diff --git a/final/utils/PerfectShuffle/Makefile b/final/utils/PerfectShuffle/Makefile
new file mode 100644
index 00000000000..28709fefd31
--- /dev/null
+++ b/final/utils/PerfectShuffle/Makefile
@@ -0,0 +1,18 @@
+##===- utils/PerfectShuffle/Makefile -----------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = llvm-PerfectShuffle
+NO_INSTALL = 1
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/utils/PerfectShuffle/PerfectShuffle.cpp b/final/utils/PerfectShuffle/PerfectShuffle.cpp
new file mode 100644
index 00000000000..98f8f4cc0ca
--- /dev/null
+++ b/final/utils/PerfectShuffle/PerfectShuffle.cpp
@@ -0,0 +1,572 @@
+//===-- PerfectShuffle.cpp - Perfect Shuffle Generator --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file computes an optimal sequence of instructions for doing all shuffles
+// of two 4-element vectors.  With a release build and when configured to emit
+// an altivec instruction table, this takes about 30s to run on a 2.7Ghz
+// PowerPC G5.
+//
+//===----------------------------------------------------------------------===//
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <cassert>
+#include <cstdlib>
+struct Operator;
+
+// Masks are 4-nibble hex numbers.  Values 0-7 in any nibble means that it takes
+// an element from that value of the input vectors.  A value of 8 means the
+// entry is undefined.
+
+// Mask manipulation functions.
+static inline unsigned short MakeMask(unsigned V0, unsigned V1,
+                                      unsigned V2, unsigned V3) {
+  return (V0 << (3*4)) | (V1 << (2*4)) | (V2 << (1*4)) | (V3 << (0*4));
+}
+
+/// getMaskElt - Return element N of the specified mask.
+static unsigned getMaskElt(unsigned Mask, unsigned Elt) {
+  return (Mask >> ((3-Elt)*4)) & 0xF;
+}
+
+static unsigned setMaskElt(unsigned Mask, unsigned Elt, unsigned NewVal) {
+  unsigned FieldShift = ((3-Elt)*4);
+  return (Mask & ~(0xF << FieldShift)) | (NewVal << FieldShift);
+}
+
+// Reject elements where the values are 9-15.
+static bool isValidMask(unsigned short Mask) {
+  unsigned short UndefBits = Mask & 0x8888;
+  return (Mask & ((UndefBits >> 1)|(UndefBits>>2)|(UndefBits>>3))) == 0;
+}
+
+/// hasUndefElements - Return true if any of the elements in the mask are undefs
+///
+static bool hasUndefElements(unsigned short Mask) {
+  return (Mask & 0x8888) != 0;
+}
+
+/// isOnlyLHSMask - Return true if this mask only refers to its LHS, not
+/// including undef values..
+static bool isOnlyLHSMask(unsigned short Mask) {
+  return (Mask & 0x4444) == 0;
+}
+
+/// getLHSOnlyMask - Given a mask that refers to its LHS and RHS, modify it to
+/// refer to the LHS only (for when one argument value is passed into the same
+/// function twice).
+#if 0
+static unsigned short getLHSOnlyMask(unsigned short Mask) {
+  return Mask & 0xBBBB;  // Keep only LHS and Undefs.
+}
+#endif
+
+/// getCompressedMask - Turn a 16-bit uncompressed mask (where each elt uses 4
+/// bits) into a compressed 13-bit mask, where each elt is multiplied by 9.
+static unsigned getCompressedMask(unsigned short Mask) {
+  return getMaskElt(Mask, 0)*9*9*9 + getMaskElt(Mask, 1)*9*9 +
+         getMaskElt(Mask, 2)*9     + getMaskElt(Mask, 3);
+}
+
+static void PrintMask(unsigned i, std::ostream &OS) {
+  OS << "<" << (char)(getMaskElt(i, 0) == 8 ? 'u' : ('0'+getMaskElt(i, 0)))
+     << "," << (char)(getMaskElt(i, 1) == 8 ? 'u' : ('0'+getMaskElt(i, 1)))
+     << "," << (char)(getMaskElt(i, 2) == 8 ? 'u' : ('0'+getMaskElt(i, 2)))
+     << "," << (char)(getMaskElt(i, 3) == 8 ? 'u' : ('0'+getMaskElt(i, 3)))
+     << ">";
+}
+
+/// ShuffleVal - This represents a shufflevector operation.
+struct ShuffleVal {
+  unsigned Cost;  // Number of instrs used to generate this value.
+  Operator *Op;   // The Operation used to generate this value.
+  unsigned short Arg0, Arg1;  // Input operands for this value.
+
+  ShuffleVal() : Cost(1000000) {}
+};
+
+
+/// ShufTab - This is the actual shuffle table that we are trying to generate.
+///
+static ShuffleVal ShufTab[65536];
+
+/// TheOperators - All of the operators that this target supports.
+static std::vector<Operator*> TheOperators;
+
+/// Operator - This is a vector operation that is available for use.
+struct Operator {
+  unsigned short ShuffleMask;
+  unsigned short OpNum;
+  const char *Name;
+  unsigned Cost;
+
+  Operator(unsigned short shufflemask, const char *name, unsigned opnum,
+           unsigned cost = 1)
+    : ShuffleMask(shufflemask), OpNum(opnum), Name(name), Cost(cost) {
+    TheOperators.push_back(this);
+  }
+  ~Operator() {
+    assert(TheOperators.back() == this);
+    TheOperators.pop_back();
+  }
+
+  bool isOnlyLHSOperator() const {
+    return isOnlyLHSMask(ShuffleMask);
+  }
+
+  const char *getName() const { return Name; }
+  unsigned getCost() const { return Cost; }
+
+  unsigned short getTransformedMask(unsigned short LHSMask, unsigned RHSMask) {
+    // Extract the elements from LHSMask and RHSMask, as appropriate.
+    unsigned Result = 0;
+    for (unsigned i = 0; i != 4; ++i) {
+      unsigned SrcElt = (ShuffleMask >> (4*i)) & 0xF;
+      unsigned ResElt;
+      if (SrcElt < 4)
+        ResElt = getMaskElt(LHSMask, SrcElt);
+      else if (SrcElt < 8)
+        ResElt = getMaskElt(RHSMask, SrcElt-4);
+      else {
+        assert(SrcElt == 8 && "Bad src elt!");
+        ResElt = 8;
+      }
+      Result |= ResElt << (4*i);
+    }
+    return Result;
+  }
+};
+
+static const char *getZeroCostOpName(unsigned short Op) {
+  if (ShufTab[Op].Arg0 == 0x0123)
+    return "LHS";
+  else if (ShufTab[Op].Arg0 == 0x4567)
+    return "RHS";
+  else {
+    assert(0 && "bad zero cost operation");
+    abort();
+  }
+}
+
+static void PrintOperation(unsigned ValNo, unsigned short Vals[]) {
+  unsigned short ThisOp = Vals[ValNo];
+  std::cerr << "t" << ValNo;
+  PrintMask(ThisOp, std::cerr);
+  std::cerr << " = " << ShufTab[ThisOp].Op->getName() << "(";
+
+  if (ShufTab[ShufTab[ThisOp].Arg0].Cost == 0) {
+    std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg0);
+    PrintMask(ShufTab[ThisOp].Arg0, std::cerr);
+  } else {
+    // Figure out what tmp # it is.
+    for (unsigned i = 0; ; ++i)
+      if (Vals[i] == ShufTab[ThisOp].Arg0) {
+        std::cerr << "t" << i;
+        break;
+      }
+  }
+
+  if (!ShufTab[Vals[ValNo]].Op->isOnlyLHSOperator()) {
+    std::cerr << ", ";
+    if (ShufTab[ShufTab[ThisOp].Arg1].Cost == 0) {
+      std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg1);
+      PrintMask(ShufTab[ThisOp].Arg1, std::cerr);
+    } else {
+      // Figure out what tmp # it is.
+      for (unsigned i = 0; ; ++i)
+        if (Vals[i] == ShufTab[ThisOp].Arg1) {
+          std::cerr << "t" << i;
+          break;
+        }
+    }
+  }
+  std::cerr << ")  ";
+}
+
+static unsigned getNumEntered() {
+  unsigned Count = 0;
+  for (unsigned i = 0; i != 65536; ++i)
+    Count += ShufTab[i].Cost < 100;
+  return Count;
+}
+
+static void EvaluateOps(unsigned short Elt, unsigned short Vals[],
+                        unsigned &NumVals) {
+  if (ShufTab[Elt].Cost == 0) return;
+
+  // If this value has already been evaluated, it is free.  FIXME: match undefs.
+  for (unsigned i = 0, e = NumVals; i != e; ++i)
+    if (Vals[i] == Elt) return;
+
+  // Otherwise, get the operands of the value, then add it.
+  unsigned Arg0 = ShufTab[Elt].Arg0, Arg1 = ShufTab[Elt].Arg1;
+  if (ShufTab[Arg0].Cost)
+    EvaluateOps(Arg0, Vals, NumVals);
+  if (Arg0 != Arg1 && ShufTab[Arg1].Cost)
+    EvaluateOps(Arg1, Vals, NumVals);
+
+  Vals[NumVals++] = Elt;
+}
+
+
+int main() {
+  // Seed the table with accesses to the LHS and RHS.
+  ShufTab[0x0123].Cost = 0;
+  ShufTab[0x0123].Op = 0;
+  ShufTab[0x0123].Arg0 = 0x0123;
+  ShufTab[0x4567].Cost = 0;
+  ShufTab[0x4567].Op = 0;
+  ShufTab[0x4567].Arg0 = 0x4567;
+
+  // Seed the first-level of shuffles, shuffles whose inputs are the input to
+  // the vectorshuffle operation.
+  bool MadeChange = true;
+  unsigned OpCount = 0;
+  while (MadeChange) {
+    MadeChange = false;
+    ++OpCount;
+    std::cerr << "Starting iteration #" << OpCount << " with "
+              << getNumEntered() << " entries established.\n";
+
+    // Scan the table for two reasons: First, compute the maximum cost of any
+    // operation left in the table.  Second, make sure that values with undefs
+    // have the cheapest alternative that they match.
+    unsigned MaxCost = ShufTab[0].Cost;
+    for (unsigned i = 1; i != 0x8889; ++i) {
+      if (!isValidMask(i)) continue;
+      if (ShufTab[i].Cost > MaxCost)
+        MaxCost = ShufTab[i].Cost;
+
+      // If this value has an undef, make it be computed the cheapest possible
+      // way of any of the things that it matches.
+      if (hasUndefElements(i)) {
+        // This code is a little bit tricky, so here's the idea: consider some
+        // permutation, like 7u4u.  To compute the lowest cost for 7u4u, we
+        // need to take the minimum cost of all of 7[0-8]4[0-8], 81 entries.  If
+        // there are 3 undefs, the number rises to 729 entries we have to scan,
+        // and for the 4 undef case, we have to scan the whole table.
+        //
+        // Instead of doing this huge amount of scanning, we process the table
+        // entries *in order*, and use the fact that 'u' is 8, larger than any
+        // valid index.  Given an entry like 7u4u then, we only need to scan
+        // 7[0-7]4u - 8 entries.  We can get away with this, because we already
+        // know that each of 704u, 714u, 724u, etc contain the minimum value of
+        // all of the 704[0-8], 714[0-8] and 724[0-8] entries respectively.
+        unsigned UndefIdx;
+        if (i & 0x8000)
+          UndefIdx = 0;
+        else if (i & 0x0800)
+          UndefIdx = 1;
+        else if (i & 0x0080)
+          UndefIdx = 2;
+        else if (i & 0x0008)
+          UndefIdx = 3;
+        else
+          abort();
+
+        unsigned MinVal  = i;
+        unsigned MinCost = ShufTab[i].Cost;
+
+        // Scan the 8 entries.
+        for (unsigned j = 0; j != 8; ++j) {
+          unsigned NewElt = setMaskElt(i, UndefIdx, j);
+          if (ShufTab[NewElt].Cost < MinCost) {
+            MinCost = ShufTab[NewElt].Cost;
+            MinVal = NewElt;
+          }
+        }
+
+        // If we found something cheaper than what was here before, use it.
+        if (i != MinVal) {
+          MadeChange = true;
+          ShufTab[i] = ShufTab[MinVal];
+        }
+      }
+    }
+
+    for (unsigned LHS = 0; LHS != 0x8889; ++LHS) {
+      if (!isValidMask(LHS)) continue;
+      if (ShufTab[LHS].Cost > 1000) continue;
+
+      // If nothing involving this operand could possibly be cheaper than what
+      // we already have, don't consider it.
+      if (ShufTab[LHS].Cost + 1 >= MaxCost)
+        continue;
+
+      for (unsigned opnum = 0, e = TheOperators.size(); opnum != e; ++opnum) {
+        Operator *Op = TheOperators[opnum];
+
+        // Evaluate op(LHS,LHS)
+        unsigned ResultMask = Op->getTransformedMask(LHS, LHS);
+
+        unsigned Cost = ShufTab[LHS].Cost + Op->getCost();
+        if (Cost < ShufTab[ResultMask].Cost) {
+          ShufTab[ResultMask].Cost = Cost;
+          ShufTab[ResultMask].Op = Op;
+          ShufTab[ResultMask].Arg0 = LHS;
+          ShufTab[ResultMask].Arg1 = LHS;
+          MadeChange = true;
+        }
+
+        // If this is a two input instruction, include the op(x,y) cases.  If
+        // this is a one input instruction, skip this.
+        if (Op->isOnlyLHSOperator()) continue;
+
+        for (unsigned RHS = 0; RHS != 0x8889; ++RHS) {
+          if (!isValidMask(RHS)) continue;
+          if (ShufTab[RHS].Cost > 1000) continue;
+
+          // If nothing involving this operand could possibly be cheaper than
+          // what we already have, don't consider it.
+          if (ShufTab[RHS].Cost + 1 >= MaxCost)
+            continue;
+
+
+          // Evaluate op(LHS,RHS)
+          unsigned ResultMask = Op->getTransformedMask(LHS, RHS);
+
+          if (ShufTab[ResultMask].Cost <= OpCount ||
+              ShufTab[ResultMask].Cost <= ShufTab[LHS].Cost ||
+              ShufTab[ResultMask].Cost <= ShufTab[RHS].Cost)
+            continue;
+
+          // Figure out the cost to evaluate this, knowing that CSE's only need
+          // to be evaluated once.
+          unsigned short Vals[30];
+          unsigned NumVals = 0;
+          EvaluateOps(LHS, Vals, NumVals);
+          EvaluateOps(RHS, Vals, NumVals);
+
+          unsigned Cost = NumVals + Op->getCost();
+          if (Cost < ShufTab[ResultMask].Cost) {
+            ShufTab[ResultMask].Cost = Cost;
+            ShufTab[ResultMask].Op = Op;
+            ShufTab[ResultMask].Arg0 = LHS;
+            ShufTab[ResultMask].Arg1 = RHS;
+            MadeChange = true;
+          }
+        }
+      }
+    }
+  }
+
+  std::cerr << "Finished Table has " << getNumEntered()
+            << " entries established.\n";
+
+  unsigned CostArray[10] = { 0 };
+
+  // Compute a cost histogram.
+  for (unsigned i = 0; i != 65536; ++i) {
+    if (!isValidMask(i)) continue;
+    if (ShufTab[i].Cost > 9)
+      ++CostArray[9];
+    else
+      ++CostArray[ShufTab[i].Cost];
+  }
+
+  for (unsigned i = 0; i != 9; ++i)
+    if (CostArray[i])
+      std::cout << "// " << CostArray[i] << " entries have cost " << i << "\n";
+  if (CostArray[9])
+    std::cout << "// " << CostArray[9] << " entries have higher cost!\n";
+
+
+  // Build up the table to emit.
+  std::cout << "\n// This table is 6561*4 = 26244 bytes in size.\n";
+  std::cout << "static const unsigned PerfectShuffleTable[6561+1] = {\n";
+
+  for (unsigned i = 0; i != 0x8889; ++i) {
+    if (!isValidMask(i)) continue;
+
+    // CostSat - The cost of this operation saturated to two bits.
+    unsigned CostSat = ShufTab[i].Cost;
+    if (CostSat > 4) CostSat = 4;
+    if (CostSat == 0) CostSat = 1;
+    --CostSat;  // Cost is now between 0-3.
+
+    unsigned OpNum = ShufTab[i].Op ? ShufTab[i].Op->OpNum : 0;
+    assert(OpNum < 16 && "Too few bits to encode operation!");
+
+    unsigned LHS = getCompressedMask(ShufTab[i].Arg0);
+    unsigned RHS = getCompressedMask(ShufTab[i].Arg1);
+
+    // Encode this as 2 bits of saturated cost, 4 bits of opcodes, 13 bits of
+    // LHS, and 13 bits of RHS = 32 bits.
+    unsigned Val = (CostSat << 30) | (OpNum << 26) | (LHS << 13) | RHS;
+
+    std::cout << "  " << std::setw(10) << Val << "U, // ";
+    PrintMask(i, std::cout);
+    std::cout << ": Cost " << ShufTab[i].Cost;
+    std::cout << " " << (ShufTab[i].Op ? ShufTab[i].Op->getName() : "copy");
+    std::cout << " ";
+    if (ShufTab[ShufTab[i].Arg0].Cost == 0) {
+      std::cout << getZeroCostOpName(ShufTab[i].Arg0);
+    } else {
+      PrintMask(ShufTab[i].Arg0, std::cout);
+    }
+
+    if (ShufTab[i].Op && !ShufTab[i].Op->isOnlyLHSOperator()) {
+      std::cout << ", ";
+      if (ShufTab[ShufTab[i].Arg1].Cost == 0) {
+        std::cout << getZeroCostOpName(ShufTab[i].Arg1);
+      } else {
+        PrintMask(ShufTab[i].Arg1, std::cout);
+      }
+    }
+    std::cout << "\n";
+  }
+  std::cout << "  0\n};\n";
+
+  if (0) {
+    // Print out the table.
+    for (unsigned i = 0; i != 0x8889; ++i) {
+      if (!isValidMask(i)) continue;
+      if (ShufTab[i].Cost < 1000) {
+        PrintMask(i, std::cerr);
+        std::cerr << " - Cost " << ShufTab[i].Cost << " - ";
+
+        unsigned short Vals[30];
+        unsigned NumVals = 0;
+        EvaluateOps(i, Vals, NumVals);
+
+        for (unsigned j = 0, e = NumVals; j != e; ++j)
+          PrintOperation(j, Vals);
+        std::cerr << "\n";
+      }
+    }
+  }
+}
+
+
+#ifdef GENERATE_ALTIVEC
+
+///===---------------------------------------------------------------------===//
+/// The altivec instruction definitions.  This is the altivec-specific part of
+/// this file.
+///===---------------------------------------------------------------------===//
+
+// Note that the opcode numbers here must match those in the PPC backend.
+enum {
+  OP_COPY = 0,   // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+  OP_VMRGHW,
+  OP_VMRGLW,
+  OP_VSPLTISW0,
+  OP_VSPLTISW1,
+  OP_VSPLTISW2,
+  OP_VSPLTISW3,
+  OP_VSLDOI4,
+  OP_VSLDOI8,
+  OP_VSLDOI12
+};
+
+struct vmrghw : public Operator {
+  vmrghw() : Operator(0x0415, "vmrghw", OP_VMRGHW) {}
+} the_vmrghw;
+
+struct vmrglw : public Operator {
+  vmrglw() : Operator(0x2637, "vmrglw", OP_VMRGLW) {}
+} the_vmrglw;
+
+template<unsigned Elt>
+struct vspltisw : public Operator {
+  vspltisw(const char *N, unsigned Opc)
+    : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {}
+};
+
+vspltisw<0> the_vspltisw0("vspltisw0", OP_VSPLTISW0);
+vspltisw<1> the_vspltisw1("vspltisw1", OP_VSPLTISW1);
+vspltisw<2> the_vspltisw2("vspltisw2", OP_VSPLTISW2);
+vspltisw<3> the_vspltisw3("vspltisw3", OP_VSPLTISW3);
+
+template<unsigned N>
+struct vsldoi : public Operator {
+  vsldoi(const char *Name, unsigned Opc)
+    : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) {
+  }
+};
+
+vsldoi<1> the_vsldoi1("vsldoi4" , OP_VSLDOI4);
+vsldoi<2> the_vsldoi2("vsldoi8" , OP_VSLDOI8);
+vsldoi<3> the_vsldoi3("vsldoi12", OP_VSLDOI12);
+
+#endif
+
+#define GENERATE_NEON
+
+#ifdef GENERATE_NEON
+enum {
+  OP_COPY = 0,   // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
+  OP_VREV,
+  OP_VDUP0,
+  OP_VDUP1,
+  OP_VDUP2,
+  OP_VDUP3,
+  OP_VEXT1,
+  OP_VEXT2,
+  OP_VEXT3,
+  OP_VUZPL, // VUZP, left result
+  OP_VUZPR, // VUZP, right result
+  OP_VZIPL, // VZIP, left result
+  OP_VZIPR, // VZIP, right result
+  OP_VTRNL, // VTRN, left result
+  OP_VTRNR  // VTRN, right result
+};
+
+struct vrev : public Operator {
+  vrev() : Operator(0x1032, "vrev", OP_VREV) {}
+} the_vrev;
+
+template<unsigned Elt>
+struct vdup : public Operator {
+  vdup(const char *N, unsigned Opc)
+    : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {}
+};
+
+vdup<0> the_vdup0("vdup0", OP_VDUP0);
+vdup<1> the_vdup1("vdup1", OP_VDUP1);
+vdup<2> the_vdup2("vdup2", OP_VDUP2);
+vdup<3> the_vdup3("vdup3", OP_VDUP3);
+
+template<unsigned N>
+struct vext : public Operator {
+  vext(const char *Name, unsigned Opc)
+    : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) {
+  }
+};
+
+vext<1> the_vext1("vext1", OP_VEXT1);
+vext<2> the_vext2("vext2", OP_VEXT2);
+vext<3> the_vext3("vext3", OP_VEXT3);
+
+struct vuzpl : public Operator {
+  vuzpl() : Operator(0x0246, "vuzpl", OP_VUZPL, 2) {}
+} the_vuzpl;
+
+struct vuzpr : public Operator {
+  vuzpr() : Operator(0x1357, "vuzpr", OP_VUZPR, 2) {}
+} the_vuzpr;
+
+struct vzipl : public Operator {
+  vzipl() : Operator(0x0415, "vzipl", OP_VZIPL, 2) {}
+} the_vzipl;
+
+struct vzipr : public Operator {
+  vzipr() : Operator(0x2637, "vzipr", OP_VZIPR, 2) {}
+} the_vzipr;
+
+struct vtrnl : public Operator {
+  vtrnl() : Operator(0x0426, "vtrnl", OP_VTRNL, 2) {}
+} the_vtrnl;
+
+struct vtrnr : public Operator {
+  vtrnr() : Operator(0x1537, "vtrnr", OP_VTRNR, 2) {}
+} the_vtrnr;
+
+#endif
diff --git a/final/utils/TableGen/ARMDecoderEmitter.cpp b/final/utils/TableGen/ARMDecoderEmitter.cpp
new file mode 100644
index 00000000000..a2403e6a1d5
--- /dev/null
+++ b/final/utils/TableGen/ARMDecoderEmitter.cpp
@@ -0,0 +1,1857 @@
+//===------------ ARMDecoderEmitter.cpp - Decoder Generator ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains the tablegen backend that emits the decoder functions for ARM and
+// Thumb.  The disassembler core includes the auto-generated file, invokes the
+// decoder functions, and builds up the MCInst based on the decoded Opcode.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-decoder-emitter"
+
+#include "ARMDecoderEmitter.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <vector>
+#include <map>
+#include <string>
+
+using namespace llvm;
+
+/////////////////////////////////////////////////////
+//                                                 //
+//  Enums and Utilities for ARM Instruction Format //
+//                                                 //
+/////////////////////////////////////////////////////
+
+#define ARM_FORMATS                   \
+  ENTRY(ARM_FORMAT_PSEUDO,         0) \
+  ENTRY(ARM_FORMAT_MULFRM,         1) \
+  ENTRY(ARM_FORMAT_BRFRM,          2) \
+  ENTRY(ARM_FORMAT_BRMISCFRM,      3) \
+  ENTRY(ARM_FORMAT_DPFRM,          4) \
+  ENTRY(ARM_FORMAT_DPSOREGFRM,     5) \
+  ENTRY(ARM_FORMAT_LDFRM,          6) \
+  ENTRY(ARM_FORMAT_STFRM,          7) \
+  ENTRY(ARM_FORMAT_LDMISCFRM,      8) \
+  ENTRY(ARM_FORMAT_STMISCFRM,      9) \
+  ENTRY(ARM_FORMAT_LDSTMULFRM,    10) \
+  ENTRY(ARM_FORMAT_LDSTEXFRM,     11) \
+  ENTRY(ARM_FORMAT_ARITHMISCFRM,  12) \
+  ENTRY(ARM_FORMAT_SATFRM,        13) \
+  ENTRY(ARM_FORMAT_EXTFRM,        14) \
+  ENTRY(ARM_FORMAT_VFPUNARYFRM,   15) \
+  ENTRY(ARM_FORMAT_VFPBINARYFRM,  16) \
+  ENTRY(ARM_FORMAT_VFPCONV1FRM,   17) \
+  ENTRY(ARM_FORMAT_VFPCONV2FRM,   18) \
+  ENTRY(ARM_FORMAT_VFPCONV3FRM,   19) \
+  ENTRY(ARM_FORMAT_VFPCONV4FRM,   20) \
+  ENTRY(ARM_FORMAT_VFPCONV5FRM,   21) \
+  ENTRY(ARM_FORMAT_VFPLDSTFRM,    22) \
+  ENTRY(ARM_FORMAT_VFPLDSTMULFRM, 23) \
+  ENTRY(ARM_FORMAT_VFPMISCFRM,    24) \
+  ENTRY(ARM_FORMAT_THUMBFRM,      25) \
+  ENTRY(ARM_FORMAT_MISCFRM,       26) \
+  ENTRY(ARM_FORMAT_NEONGETLNFRM,  27) \
+  ENTRY(ARM_FORMAT_NEONSETLNFRM,  28) \
+  ENTRY(ARM_FORMAT_NEONDUPFRM,    29) \
+  ENTRY(ARM_FORMAT_NLdSt,         30) \
+  ENTRY(ARM_FORMAT_N1RegModImm,   31) \
+  ENTRY(ARM_FORMAT_N2Reg,         32) \
+  ENTRY(ARM_FORMAT_NVCVT,         33) \
+  ENTRY(ARM_FORMAT_NVecDupLn,     34) \
+  ENTRY(ARM_FORMAT_N2RegVecShL,   35) \
+  ENTRY(ARM_FORMAT_N2RegVecShR,   36) \
+  ENTRY(ARM_FORMAT_N3Reg,         37) \
+  ENTRY(ARM_FORMAT_N3RegVecSh,    38) \
+  ENTRY(ARM_FORMAT_NVecExtract,   39) \
+  ENTRY(ARM_FORMAT_NVecMulScalar, 40) \
+  ENTRY(ARM_FORMAT_NVTBL,         41)
+
+// ARM instruction format specifies the encoding used by the instruction.
+#define ENTRY(n, v) n = v,
+typedef enum {
+  ARM_FORMATS
+  ARM_FORMAT_NA
+} ARMFormat;
+#undef ENTRY
+
+// Converts enum to const char*.
+static const char *stringForARMFormat(ARMFormat form) {
+#define ENTRY(n, v) case n: return #n;
+  switch(form) {
+    ARM_FORMATS
+  case ARM_FORMAT_NA:
+  default:
+    return "";
+  }
+#undef ENTRY
+}
+
+enum {
+  IndexModeNone = 0,
+  IndexModePre  = 1,
+  IndexModePost = 2,
+  IndexModeUpd  = 3
+};
+
+/////////////////////////
+//                     //
+//  Utility functions  //
+//                     //
+/////////////////////////
+
+/// byteFromBitsInit - Return the byte value from a BitsInit.
+/// Called from getByteField().
+static uint8_t byteFromBitsInit(BitsInit &init) {
+  int width = init.getNumBits();
+
+  assert(width <= 8 && "Field is too large for uint8_t!");
+
+  int index;
+  uint8_t mask = 0x01;
+
+  uint8_t ret = 0;
+
+  for (index = 0; index < width; index++) {
+    if (static_cast<BitInit*>(init.getBit(index))->getValue())
+      ret |= mask;
+
+    mask <<= 1;
+  }
+
+  return ret;
+}
+
+static uint8_t getByteField(const Record &def, const char *str) {
+  BitsInit *bits = def.getValueAsBitsInit(str);
+  return byteFromBitsInit(*bits);
+}
+
+static BitsInit &getBitsField(const Record &def, const char *str) {
+  BitsInit *bits = def.getValueAsBitsInit(str);
+  return *bits;
+}
+
+/// sameStringExceptSuffix - Return true if the two strings differ only in RHS's
+/// suffix.  ("VST4d8", "VST4d8_UPD", "_UPD") as input returns true.
+static
+bool sameStringExceptSuffix(const StringRef LHS, const StringRef RHS,
+                            const StringRef Suffix) {
+
+  if (RHS.startswith(LHS) && RHS.endswith(Suffix))
+    return RHS.size() == LHS.size() + Suffix.size();
+
+  return false;
+}
+
+/// thumbInstruction - Determine whether we have a Thumb instruction.
+/// See also ARMInstrFormats.td.
+static bool thumbInstruction(uint8_t Form) {
+  return Form == ARM_FORMAT_THUMBFRM;
+}
+
+// The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
+// for a bit value.
+//
+// BIT_UNFILTERED is used as the init value for a filter position.  It is used
+// only for filter processings.
+typedef enum {
+  BIT_TRUE,      // '1'
+  BIT_FALSE,     // '0'
+  BIT_UNSET,     // '?'
+  BIT_UNFILTERED // unfiltered
+} bit_value_t;
+
+static bool ValueSet(bit_value_t V) {
+  return (V == BIT_TRUE || V == BIT_FALSE);
+}
+static bool ValueNotSet(bit_value_t V) {
+  return (V == BIT_UNSET);
+}
+static int Value(bit_value_t V) {
+  return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1);
+}
+static bit_value_t bitFromBits(BitsInit &bits, unsigned index) {
+  if (BitInit *bit = dynamic_cast<BitInit*>(bits.getBit(index)))
+    return bit->getValue() ? BIT_TRUE : BIT_FALSE;
+
+  // The bit is uninitialized.
+  return BIT_UNSET;
+}
+// Prints the bit value for each position.
+static void dumpBits(raw_ostream &o, BitsInit &bits) {
+  unsigned index;
+
+  for (index = bits.getNumBits(); index > 0; index--) {
+    switch (bitFromBits(bits, index - 1)) {
+    case BIT_TRUE:
+      o << "1";
+      break;
+    case BIT_FALSE:
+      o << "0";
+      break;
+    case BIT_UNSET:
+      o << "_";
+      break;
+    default:
+      assert(0 && "unexpected return value from bitFromBits");
+    }
+  }
+}
+
+// Enums for the available target names.
+typedef enum {
+  TARGET_ARM = 0,
+  TARGET_THUMB
+} TARGET_NAME_t;
+
+// FIXME: Possibly auto-detected?
+#define BIT_WIDTH 32
+
+// Forward declaration.
+class ARMFilterChooser;
+
+// Representation of the instruction to work on.
+typedef bit_value_t insn_t[BIT_WIDTH];
+
+/// Filter - Filter works with FilterChooser to produce the decoding tree for
+/// the ISA.
+///
+/// It is useful to think of a Filter as governing the switch stmts of the
+/// decoding tree in a certain level.  Each case stmt delegates to an inferior
+/// FilterChooser to decide what further decoding logic to employ, or in another
+/// words, what other remaining bits to look at.  The FilterChooser eventually
+/// chooses a best Filter to do its job.
+///
+/// This recursive scheme ends when the number of Opcodes assigned to the
+/// FilterChooser becomes 1 or if there is a conflict.  A conflict happens when
+/// the Filter/FilterChooser combo does not know how to distinguish among the
+/// Opcodes assigned.
+///
+/// An example of a conflict is 
+///
+/// Conflict:
+///                     111101000.00........00010000....
+///                     111101000.00........0001........
+///                     1111010...00........0001........
+///                     1111010...00....................
+///                     1111010.........................
+///                     1111............................
+///                     ................................
+///     VST4q8a         111101000_00________00010000____
+///     VST4q8b         111101000_00________00010000____
+///
+/// The Debug output shows the path that the decoding tree follows to reach the
+/// the conclusion that there is a conflict.  VST4q8a is a vst4 to double-spaced
+/// even registers, while VST4q8b is a vst4 to double-spaced odd regsisters.
+///
+/// The encoding info in the .td files does not specify this meta information,
+/// which could have been used by the decoder to resolve the conflict.  The
+/// decoder could try to decode the even/odd register numbering and assign to
+/// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
+/// version and return the Opcode since the two have the same Asm format string.
+class ARMFilter {
+protected:
+  ARMFilterChooser *Owner; // points to the FilterChooser who owns this filter
+  unsigned StartBit; // the starting bit position
+  unsigned NumBits; // number of bits to filter
+  bool Mixed; // a mixed region contains both set and unset bits
+
+  // Map of well-known segment value to the set of uid's with that value. 
+  std::map<uint64_t, std::vector<unsigned> > FilteredInstructions;
+
+  // Set of uid's with non-constant segment values.
+  std::vector<unsigned> VariableInstructions;
+
+  // Map of well-known segment value to its delegate.
+  std::map<unsigned, ARMFilterChooser*> FilterChooserMap;
+
+  // Number of instructions which fall under FilteredInstructions category.
+  unsigned NumFiltered;
+
+  // Keeps track of the last opcode in the filtered bucket.
+  unsigned LastOpcFiltered;
+
+  // Number of instructions which fall under VariableInstructions category.
+  unsigned NumVariable;
+
+public:
+  unsigned getNumFiltered() { return NumFiltered; }
+  unsigned getNumVariable() { return NumVariable; }
+  unsigned getSingletonOpc() {
+    assert(NumFiltered == 1);
+    return LastOpcFiltered;
+  }
+  // Return the filter chooser for the group of instructions without constant
+  // segment values.
+  ARMFilterChooser &getVariableFC() {
+    assert(NumFiltered == 1);
+    assert(FilterChooserMap.size() == 1);
+    return *(FilterChooserMap.find((unsigned)-1)->second);
+  }
+
+  ARMFilter(const ARMFilter &f);
+  ARMFilter(ARMFilterChooser &owner, unsigned startBit, unsigned numBits,
+            bool mixed);
+
+  ~ARMFilter();
+
+  // Divides the decoding task into sub tasks and delegates them to the
+  // inferior FilterChooser's.
+  //
+  // A special case arises when there's only one entry in the filtered
+  // instructions.  In order to unambiguously decode the singleton, we need to
+  // match the remaining undecoded encoding bits against the singleton.
+  void recurse();
+
+  // Emit code to decode instructions given a segment or segments of bits.
+  void emit(raw_ostream &o, unsigned &Indentation);
+
+  // Returns the number of fanout produced by the filter.  More fanout implies
+  // the filter distinguishes more categories of instructions.
+  unsigned usefulness() const;
+}; // End of class Filter
+
+// These are states of our finite state machines used in FilterChooser's
+// filterProcessor() which produces the filter candidates to use.
+typedef enum {
+  ATTR_NONE,
+  ATTR_FILTERED,
+  ATTR_ALL_SET,
+  ATTR_ALL_UNSET,
+  ATTR_MIXED
+} bitAttr_t;
+
+/// ARMFilterChooser - FilterChooser chooses the best filter among a set of Filters
+/// in order to perform the decoding of instructions at the current level.
+///
+/// Decoding proceeds from the top down.  Based on the well-known encoding bits
+/// of instructions available, FilterChooser builds up the possible Filters that
+/// can further the task of decoding by distinguishing among the remaining
+/// candidate instructions.
+///
+/// Once a filter has been chosen, it is called upon to divide the decoding task
+/// into sub-tasks and delegates them to its inferior FilterChoosers for further
+/// processings.
+///
+/// It is useful to think of a Filter as governing the switch stmts of the
+/// decoding tree.  And each case is delegated to an inferior FilterChooser to
+/// decide what further remaining bits to look at.
+class ARMFilterChooser {
+  static TARGET_NAME_t TargetName;
+
+protected:
+  friend class ARMFilter;
+
+  // Vector of codegen instructions to choose our filter.
+  const std::vector<const CodeGenInstruction*> &AllInstructions;
+
+  // Vector of uid's for this filter chooser to work on.
+  const std::vector<unsigned> Opcodes;
+
+  // Vector of candidate filters.
+  std::vector<ARMFilter> Filters;
+
+  // Array of bit values passed down from our parent.
+  // Set to all BIT_UNFILTERED's for Parent == NULL.
+  bit_value_t FilterBitValues[BIT_WIDTH];
+
+  // Links to the FilterChooser above us in the decoding tree.
+  ARMFilterChooser *Parent;
+  
+  // Index of the best filter from Filters.
+  int BestIndex;
+
+public:
+  static void setTargetName(TARGET_NAME_t tn) { TargetName = tn; }
+
+  ARMFilterChooser(const ARMFilterChooser &FC) :
+      AllInstructions(FC.AllInstructions), Opcodes(FC.Opcodes),
+      Filters(FC.Filters), Parent(FC.Parent), BestIndex(FC.BestIndex) {
+    memcpy(FilterBitValues, FC.FilterBitValues, sizeof(FilterBitValues));
+  }
+
+  ARMFilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
+                const std::vector<unsigned> &IDs) :
+      AllInstructions(Insts), Opcodes(IDs), Filters(), Parent(NULL),
+      BestIndex(-1) {
+    for (unsigned i = 0; i < BIT_WIDTH; ++i)
+      FilterBitValues[i] = BIT_UNFILTERED;
+
+    doFilter();
+  }
+
+  ARMFilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
+                   const std::vector<unsigned> &IDs,
+                   bit_value_t (&ParentFilterBitValues)[BIT_WIDTH],
+                   ARMFilterChooser &parent) :
+      AllInstructions(Insts), Opcodes(IDs), Filters(), Parent(&parent),
+      BestIndex(-1) {
+    for (unsigned i = 0; i < BIT_WIDTH; ++i)
+      FilterBitValues[i] = ParentFilterBitValues[i];
+
+    doFilter();
+  }
+
+  // The top level filter chooser has NULL as its parent.
+  bool isTopLevel() { return Parent == NULL; }
+
+  // This provides an opportunity for target specific code emission.
+  void emitTopHook(raw_ostream &o);
+
+  // Emit the top level typedef and decodeInstruction() function.
+  void emitTop(raw_ostream &o, unsigned &Indentation);
+
+  // This provides an opportunity for target specific code emission after
+  // emitTop().
+  void emitBot(raw_ostream &o, unsigned &Indentation);
+
+protected:
+  // Populates the insn given the uid.
+  void insnWithID(insn_t &Insn, unsigned Opcode) const {
+    BitsInit &Bits = getBitsField(*AllInstructions[Opcode]->TheDef, "Inst");
+
+    for (unsigned i = 0; i < BIT_WIDTH; ++i)
+      Insn[i] = bitFromBits(Bits, i);
+
+    // Set Inst{21} to 1 (wback) when IndexModeBits == IndexModeUpd.
+    Record *R = AllInstructions[Opcode]->TheDef;
+    if (R->getValue("IndexModeBits") &&
+        getByteField(*R, "IndexModeBits") == IndexModeUpd)
+      Insn[21] = BIT_TRUE;
+  }
+
+  // Returns the record name.
+  const std::string &nameWithID(unsigned Opcode) const {
+    return AllInstructions[Opcode]->TheDef->getName();
+  }
+
+  // Populates the field of the insn given the start position and the number of
+  // consecutive bits to scan for.
+  //
+  // Returns false if there exists any uninitialized bit value in the range.
+  // Returns true, otherwise.
+  bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit,
+      unsigned NumBits) const;
+
+  /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
+  /// filter array as a series of chars.
+  void dumpFilterArray(raw_ostream &o, bit_value_t (&filter)[BIT_WIDTH]);
+
+  /// dumpStack - dumpStack traverses the filter chooser chain and calls
+  /// dumpFilterArray on each filter chooser up to the top level one.
+  void dumpStack(raw_ostream &o, const char *prefix);
+
+  ARMFilter &bestFilter() {
+    assert(BestIndex != -1 && "BestIndex not set");
+    return Filters[BestIndex];
+  }
+
+  // Called from Filter::recurse() when singleton exists.  For debug purpose.
+  void SingletonExists(unsigned Opc);
+
+  bool PositionFiltered(unsigned i) {
+    return ValueSet(FilterBitValues[i]);
+  }
+
+  // Calculates the island(s) needed to decode the instruction.
+  // This returns a lit of undecoded bits of an instructions, for example,
+  // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
+  // decoded bits in order to verify that the instruction matches the Opcode.
+  unsigned getIslands(std::vector<unsigned> &StartBits,
+      std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
+      insn_t &Insn);
+
+  // The purpose of this function is for the API client to detect possible
+  // Load/Store Coprocessor instructions.  If the coprocessor number is of
+  // the instruction is either 10 or 11, the decoder should not report the
+  // instruction as LDC/LDC2/STC/STC2, but should match against Advanced SIMD or
+  // VFP instructions.
+  bool LdStCopEncoding1(unsigned Opc) {
+    const std::string &Name = nameWithID(Opc);
+    if (Name == "LDC_OFFSET" || Name == "LDC_OPTION" ||
+        Name == "LDC_POST" || Name == "LDC_PRE" ||
+        Name == "LDCL_OFFSET" || Name == "LDCL_OPTION" ||
+        Name == "LDCL_POST" || Name == "LDCL_PRE" ||
+        Name == "STC_OFFSET" || Name == "STC_OPTION" ||
+        Name == "STC_POST" || Name == "STC_PRE" ||
+        Name == "STCL_OFFSET" || Name == "STCL_OPTION" ||
+        Name == "STCL_POST" || Name == "STCL_PRE")
+      return true;
+    else
+      return false;
+  }
+
+  // Emits code to decode the singleton.  Return true if we have matched all the
+  // well-known bits.
+  bool emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,unsigned Opc);
+
+  // Emits code to decode the singleton, and then to decode the rest.
+  void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
+                            ARMFilter &Best);
+
+  // Assign a single filter and run with it.
+  void runSingleFilter(ARMFilterChooser &owner, unsigned startBit,
+                       unsigned numBit, bool mixed);
+
+  // reportRegion is a helper function for filterProcessor to mark a region as
+  // eligible for use as a filter region.
+  void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex,
+      bool AllowMixed);
+
+  // FilterProcessor scans the well-known encoding bits of the instructions and
+  // builds up a list of candidate filters.  It chooses the best filter and
+  // recursively descends down the decoding tree.
+  bool filterProcessor(bool AllowMixed, bool Greedy = true);
+
+  // Decides on the best configuration of filter(s) to use in order to decode
+  // the instructions.  A conflict of instructions may occur, in which case we
+  // dump the conflict set to the standard error.
+  void doFilter();
+
+  // Emits code to decode our share of instructions.  Returns true if the
+  // emitted code causes a return, which occurs if we know how to decode
+  // the instruction at this level or the instruction is not decodeable.
+  bool emit(raw_ostream &o, unsigned &Indentation);
+};
+
+///////////////////////////
+//                       //
+// Filter Implmenetation //
+//                       //
+///////////////////////////
+
+ARMFilter::ARMFilter(const ARMFilter &f) :
+  Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
+  FilteredInstructions(f.FilteredInstructions),
+  VariableInstructions(f.VariableInstructions),
+  FilterChooserMap(f.FilterChooserMap), NumFiltered(f.NumFiltered),
+  LastOpcFiltered(f.LastOpcFiltered), NumVariable(f.NumVariable) {
+}
+
+ARMFilter::ARMFilter(ARMFilterChooser &owner, unsigned startBit, unsigned numBits,
+    bool mixed) : Owner(&owner), StartBit(startBit), NumBits(numBits),
+                  Mixed(mixed) {
+  assert(StartBit + NumBits - 1 < BIT_WIDTH);
+
+  NumFiltered = 0;
+  LastOpcFiltered = 0;
+  NumVariable = 0;
+
+  for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) {
+    insn_t Insn;
+
+    // Populates the insn given the uid.
+    Owner->insnWithID(Insn, Owner->Opcodes[i]);
+
+    uint64_t Field;
+    // Scans the segment for possibly well-specified encoding bits.
+    bool ok = Owner->fieldFromInsn(Field, Insn, StartBit, NumBits);
+
+    if (ok) {
+      // The encoding bits are well-known.  Lets add the uid of the
+      // instruction into the bucket keyed off the constant field value.
+      LastOpcFiltered = Owner->Opcodes[i];
+      FilteredInstructions[Field].push_back(LastOpcFiltered);
+      ++NumFiltered;
+    } else {
+      // Some of the encoding bit(s) are unspecfied.  This contributes to
+      // one additional member of "Variable" instructions.
+      VariableInstructions.push_back(Owner->Opcodes[i]);
+      ++NumVariable;
+    }
+  }
+
+  assert((FilteredInstructions.size() + VariableInstructions.size() > 0)
+         && "Filter returns no instruction categories");
+}
+
+ARMFilter::~ARMFilter() {
+  std::map<unsigned, ARMFilterChooser*>::iterator filterIterator;
+  for (filterIterator = FilterChooserMap.begin();
+       filterIterator != FilterChooserMap.end();
+       filterIterator++) {
+    delete filterIterator->second;
+  }
+}
+
+// Divides the decoding task into sub tasks and delegates them to the
+// inferior FilterChooser's.
+//
+// A special case arises when there's only one entry in the filtered
+// instructions.  In order to unambiguously decode the singleton, we need to
+// match the remaining undecoded encoding bits against the singleton.
+void ARMFilter::recurse() {
+  std::map<uint64_t, std::vector<unsigned> >::const_iterator mapIterator;
+
+  bit_value_t BitValueArray[BIT_WIDTH];
+  // Starts by inheriting our parent filter chooser's filter bit values.
+  memcpy(BitValueArray, Owner->FilterBitValues, sizeof(BitValueArray));
+
+  unsigned bitIndex;
+
+  if (VariableInstructions.size()) {
+    // Conservatively marks each segment position as BIT_UNSET.
+    for (bitIndex = 0; bitIndex < NumBits; bitIndex++)
+      BitValueArray[StartBit + bitIndex] = BIT_UNSET;
+
+    // Delegates to an inferior filter chooser for futher processing on this
+    // group of instructions whose segment values are variable.
+    FilterChooserMap.insert(std::pair<unsigned, ARMFilterChooser*>(
+                              (unsigned)-1,
+                              new ARMFilterChooser(Owner->AllInstructions,
+                                                   VariableInstructions,
+                                                   BitValueArray,
+                                                   *Owner)
+                              ));
+  }
+
+  // No need to recurse for a singleton filtered instruction.
+  // See also Filter::emit().
+  if (getNumFiltered() == 1) {
+    //Owner->SingletonExists(LastOpcFiltered);
+    assert(FilterChooserMap.size() == 1);
+    return;
+  }
+
+  // Otherwise, create sub choosers.
+  for (mapIterator = FilteredInstructions.begin();
+       mapIterator != FilteredInstructions.end();
+       mapIterator++) {
+
+    // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
+    for (bitIndex = 0; bitIndex < NumBits; bitIndex++) {
+      if (mapIterator->first & (1ULL << bitIndex))
+        BitValueArray[StartBit + bitIndex] = BIT_TRUE;
+      else
+        BitValueArray[StartBit + bitIndex] = BIT_FALSE;
+    }
+
+    // Delegates to an inferior filter chooser for futher processing on this
+    // category of instructions.
+    FilterChooserMap.insert(std::pair<unsigned, ARMFilterChooser*>(
+                              mapIterator->first,
+                              new ARMFilterChooser(Owner->AllInstructions,
+                                                   mapIterator->second,
+                                                   BitValueArray,
+                                                   *Owner)
+                              ));
+  }
+}
+
+// Emit code to decode instructions given a segment or segments of bits.
+void ARMFilter::emit(raw_ostream &o, unsigned &Indentation) {
+  o.indent(Indentation) << "// Check Inst{";
+
+  if (NumBits > 1)
+    o << (StartBit + NumBits - 1) << '-';
+
+  o << StartBit << "} ...\n";
+
+  o.indent(Indentation) << "switch (fieldFromInstruction(insn, "
+                        << StartBit << ", " << NumBits << ")) {\n";
+
+  std::map<unsigned, ARMFilterChooser*>::iterator filterIterator;
+
+  bool DefaultCase = false;
+  for (filterIterator = FilterChooserMap.begin();
+       filterIterator != FilterChooserMap.end();
+       filterIterator++) {
+
+    // Field value -1 implies a non-empty set of variable instructions.
+    // See also recurse().
+    if (filterIterator->first == (unsigned)-1) {
+      DefaultCase = true;
+
+      o.indent(Indentation) << "default:\n";
+      o.indent(Indentation) << "  break; // fallthrough\n";
+
+      // Closing curly brace for the switch statement.
+      // This is unconventional because we want the default processing to be
+      // performed for the fallthrough cases as well, i.e., when the "cases"
+      // did not prove a decoded instruction.
+      o.indent(Indentation) << "}\n";
+
+    } else
+      o.indent(Indentation) << "case " << filterIterator->first << ":\n";
+
+    // We arrive at a category of instructions with the same segment value.
+    // Now delegate to the sub filter chooser for further decodings.
+    // The case may fallthrough, which happens if the remaining well-known
+    // encoding bits do not match exactly.
+    if (!DefaultCase) { ++Indentation; ++Indentation; }
+
+    bool finished = filterIterator->second->emit(o, Indentation);
+    // For top level default case, there's no need for a break statement.
+    if (Owner->isTopLevel() && DefaultCase)
+      break;
+    if (!finished)
+      o.indent(Indentation) << "break;\n";
+
+    if (!DefaultCase) { --Indentation; --Indentation; }
+  }
+
+  // If there is no default case, we still need to supply a closing brace.
+  if (!DefaultCase) {
+    // Closing curly brace for the switch statement.
+    o.indent(Indentation) << "}\n";
+  }
+}
+
+// Returns the number of fanout produced by the filter.  More fanout implies
+// the filter distinguishes more categories of instructions.
+unsigned ARMFilter::usefulness() const {
+  if (VariableInstructions.size())
+    return FilteredInstructions.size();
+  else
+    return FilteredInstructions.size() + 1;
+}
+
+//////////////////////////////////
+//                              //
+// Filterchooser Implementation //
+//                              //
+//////////////////////////////////
+
+// Define the symbol here.
+TARGET_NAME_t ARMFilterChooser::TargetName;
+
+// This provides an opportunity for target specific code emission.
+void ARMFilterChooser::emitTopHook(raw_ostream &o) {
+  if (TargetName == TARGET_ARM) {
+    // Emit code that references the ARMFormat data type.
+    o << "static const ARMFormat ARMFormats[] = {\n";
+    for (unsigned i = 0, e = AllInstructions.size(); i != e; ++i) {
+      const Record &Def = *(AllInstructions[i]->TheDef);
+      const std::string &Name = Def.getName();
+      if (Def.isSubClassOf("InstARM") || Def.isSubClassOf("InstThumb"))
+        o.indent(2) << 
+          stringForARMFormat((ARMFormat)getByteField(Def, "Form"));
+      else
+        o << "  ARM_FORMAT_NA";
+
+      o << ",\t// Inst #" << i << " = " << Name << '\n';
+    }
+    o << "  ARM_FORMAT_NA\t// Unreachable.\n";
+    o << "};\n\n";
+  }
+}
+
+// Emit the top level typedef and decodeInstruction() function.
+void ARMFilterChooser::emitTop(raw_ostream &o, unsigned &Indentation) {
+  // Run the target specific emit hook.
+  emitTopHook(o);
+
+  switch (BIT_WIDTH) {
+  case 8:
+    o.indent(Indentation) << "typedef uint8_t field_t;\n";
+    break;
+  case 16:
+    o.indent(Indentation) << "typedef uint16_t field_t;\n";
+    break;
+  case 32:
+    o.indent(Indentation) << "typedef uint32_t field_t;\n";
+    break;
+  case 64:
+    o.indent(Indentation) << "typedef uint64_t field_t;\n";
+    break;
+  default:
+    assert(0 && "Unexpected instruction size!");
+  }
+
+  o << '\n';
+
+  o.indent(Indentation) << "static field_t " <<
+    "fieldFromInstruction(field_t insn, unsigned startBit, unsigned numBits)\n";
+
+  o.indent(Indentation) << "{\n";
+
+  ++Indentation; ++Indentation;
+  o.indent(Indentation) << "assert(startBit + numBits <= " << BIT_WIDTH
+                        << " && \"Instruction field out of bounds!\");\n";
+  o << '\n';
+  o.indent(Indentation) << "field_t fieldMask;\n";
+  o << '\n';
+  o.indent(Indentation) << "if (numBits == " << BIT_WIDTH << ")\n";
+
+  ++Indentation; ++Indentation;
+  o.indent(Indentation) << "fieldMask = (field_t)-1;\n";
+  --Indentation; --Indentation;
+
+  o.indent(Indentation) << "else\n";
+
+  ++Indentation; ++Indentation;
+  o.indent(Indentation) << "fieldMask = ((1 << numBits) - 1) << startBit;\n";
+  --Indentation; --Indentation;
+
+  o << '\n';
+  o.indent(Indentation) << "return (insn & fieldMask) >> startBit;\n";
+  --Indentation; --Indentation;
+
+  o.indent(Indentation) << "}\n";
+
+  o << '\n';
+
+  o.indent(Indentation) <<"static uint16_t decodeInstruction(field_t insn) {\n";
+
+  ++Indentation; ++Indentation;
+  // Emits code to decode the instructions.
+  emit(o, Indentation);
+
+  o << '\n';
+  o.indent(Indentation) << "return 0;\n";
+  --Indentation; --Indentation;
+
+  o.indent(Indentation) << "}\n";
+
+  o << '\n';
+}
+
+// This provides an opportunity for target specific code emission after
+// emitTop().
+void ARMFilterChooser::emitBot(raw_ostream &o, unsigned &Indentation) {
+  if (TargetName != TARGET_THUMB) return;
+
+  // Emit code that decodes the Thumb ISA.
+  o.indent(Indentation)
+    << "static uint16_t decodeThumbInstruction(field_t insn) {\n";
+
+  ++Indentation; ++Indentation;
+
+  // Emits code to decode the instructions.
+  emit(o, Indentation);
+
+  o << '\n';
+  o.indent(Indentation) << "return 0;\n";
+
+  --Indentation; --Indentation;
+
+  o.indent(Indentation) << "}\n";
+}
+
+// Populates the field of the insn given the start position and the number of
+// consecutive bits to scan for.
+//
+// Returns false if and on the first uninitialized bit value encountered.
+// Returns true, otherwise.
+bool ARMFilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
+    unsigned StartBit, unsigned NumBits) const {
+  Field = 0;
+
+  for (unsigned i = 0; i < NumBits; ++i) {
+    if (Insn[StartBit + i] == BIT_UNSET)
+      return false;
+
+    if (Insn[StartBit + i] == BIT_TRUE)
+      Field = Field | (1ULL << i);
+  }
+
+  return true;
+}
+
+/// dumpFilterArray - dumpFilterArray prints out debugging info for the given
+/// filter array as a series of chars.
+void ARMFilterChooser::dumpFilterArray(raw_ostream &o,
+    bit_value_t (&filter)[BIT_WIDTH]) {
+  unsigned bitIndex;
+
+  for (bitIndex = BIT_WIDTH; bitIndex > 0; bitIndex--) {
+    switch (filter[bitIndex - 1]) {
+    case BIT_UNFILTERED:
+      o << ".";
+      break;
+    case BIT_UNSET:
+      o << "_";
+      break;
+    case BIT_TRUE:
+      o << "1";
+      break;
+    case BIT_FALSE:
+      o << "0";
+      break;
+    }
+  }
+}
+
+/// dumpStack - dumpStack traverses the filter chooser chain and calls
+/// dumpFilterArray on each filter chooser up to the top level one.
+void ARMFilterChooser::dumpStack(raw_ostream &o, const char *prefix) {
+  ARMFilterChooser *current = this;
+
+  while (current) {
+    o << prefix;
+    dumpFilterArray(o, current->FilterBitValues);
+    o << '\n';
+    current = current->Parent;
+  }
+}
+
+// Called from Filter::recurse() when singleton exists.  For debug purpose.
+void ARMFilterChooser::SingletonExists(unsigned Opc) {
+  insn_t Insn0;
+  insnWithID(Insn0, Opc);
+
+  errs() << "Singleton exists: " << nameWithID(Opc)
+         << " with its decoding dominating ";
+  for (unsigned i = 0; i < Opcodes.size(); ++i) {
+    if (Opcodes[i] == Opc) continue;
+    errs() << nameWithID(Opcodes[i]) << ' ';
+  }
+  errs() << '\n';
+
+  dumpStack(errs(), "\t\t");
+  for (unsigned i = 0; i < Opcodes.size(); i++) {
+    const std::string &Name = nameWithID(Opcodes[i]);
+
+    errs() << '\t' << Name << " ";
+    dumpBits(errs(),
+             getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
+    errs() << '\n';
+  }
+}
+
+// Calculates the island(s) needed to decode the instruction.
+// This returns a list of undecoded bits of an instructions, for example,
+// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
+// decoded bits in order to verify that the instruction matches the Opcode.
+unsigned ARMFilterChooser::getIslands(std::vector<unsigned> &StartBits,
+    std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
+    insn_t &Insn) {
+  unsigned Num, BitNo;
+  Num = BitNo = 0;
+
+  uint64_t FieldVal = 0;
+
+  // 0: Init
+  // 1: Water (the bit value does not affect decoding)
+  // 2: Island (well-known bit value needed for decoding)
+  int State = 0;
+  int Val = -1;
+
+  for (unsigned i = 0; i < BIT_WIDTH; ++i) {
+    Val = Value(Insn[i]);
+    bool Filtered = PositionFiltered(i);
+    switch (State) {
+    default:
+      assert(0 && "Unreachable code!");
+      break;
+    case 0:
+    case 1:
+      if (Filtered || Val == -1)
+        State = 1; // Still in Water
+      else {
+        State = 2; // Into the Island
+        BitNo = 0;
+        StartBits.push_back(i);
+        FieldVal = Val;
+      }
+      break;
+    case 2:
+      if (Filtered || Val == -1) {
+        State = 1; // Into the Water
+        EndBits.push_back(i - 1);
+        FieldVals.push_back(FieldVal);
+        ++Num;
+      } else {
+        State = 2; // Still in Island
+        ++BitNo;
+        FieldVal = FieldVal | Val << BitNo;
+      }
+      break;
+    }
+  }
+  // If we are still in Island after the loop, do some housekeeping.
+  if (State == 2) {
+    EndBits.push_back(BIT_WIDTH - 1);
+    FieldVals.push_back(FieldVal);
+    ++Num;
+  }
+
+  assert(StartBits.size() == Num && EndBits.size() == Num &&
+         FieldVals.size() == Num);
+  return Num;
+}
+
+// Emits code to decode the singleton.  Return true if we have matched all the
+// well-known bits.
+bool ARMFilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
+                                         unsigned Opc) {
+  std::vector<unsigned> StartBits;
+  std::vector<unsigned> EndBits;
+  std::vector<uint64_t> FieldVals;
+  insn_t Insn;
+  insnWithID(Insn, Opc);
+
+  // This provides a good opportunity to check for possible Ld/St Coprocessor
+  // Opcode and escapes if the coproc # is either 10 or 11.  It is a NEON/VFP
+  // instruction is disguise.
+  if (TargetName == TARGET_ARM && LdStCopEncoding1(Opc)) {
+    o.indent(Indentation);
+    // A8.6.51 & A8.6.188
+    // If coproc = 0b101?, i.e, slice(insn, 11, 8) = 10 or 11, escape.
+    o << "if (fieldFromInstruction(insn, 9, 3) == 5) break; // fallthrough\n";
+  }
+
+  // Look for islands of undecoded bits of the singleton.
+  getIslands(StartBits, EndBits, FieldVals, Insn);
+
+  unsigned Size = StartBits.size();
+  unsigned I, NumBits;
+
+  // If we have matched all the well-known bits, just issue a return.
+  if (Size == 0) {
+    o.indent(Indentation) << "return " << Opc << "; // " << nameWithID(Opc)
+                          << '\n';
+    return true;
+  }
+
+  // Otherwise, there are more decodings to be done!
+
+  // Emit code to match the island(s) for the singleton.
+  o.indent(Indentation) << "// Check ";
+
+  for (I = Size; I != 0; --I) {
+    o << "Inst{" << EndBits[I-1] << '-' << StartBits[I-1] << "} ";
+    if (I > 1)
+      o << "&& ";
+    else
+      o << "for singleton decoding...\n";
+  }
+
+  o.indent(Indentation) << "if (";
+
+  for (I = Size; I != 0; --I) {
+    NumBits = EndBits[I-1] - StartBits[I-1] + 1;
+    o << "fieldFromInstruction(insn, " << StartBits[I-1] << ", " << NumBits
+      << ") == " << FieldVals[I-1];
+    if (I > 1)
+      o << " && ";
+    else
+      o << ")\n";
+  }
+
+  o.indent(Indentation) << "  return " << Opc << "; // " << nameWithID(Opc)
+                        << '\n';
+
+  return false;
+}
+
+// Emits code to decode the singleton, and then to decode the rest.
+void ARMFilterChooser::emitSingletonDecoder(raw_ostream &o,
+                                            unsigned &Indentation,
+                                            ARMFilter &Best) {
+
+  unsigned Opc = Best.getSingletonOpc();
+
+  emitSingletonDecoder(o, Indentation, Opc);
+
+  // Emit code for the rest.
+  o.indent(Indentation) << "else\n";
+
+  Indentation += 2;
+  Best.getVariableFC().emit(o, Indentation);
+  Indentation -= 2;
+}
+
+// Assign a single filter and run with it.  Top level API client can initialize
+// with a single filter to start the filtering process.
+void ARMFilterChooser::runSingleFilter(ARMFilterChooser &owner,
+                                       unsigned startBit,
+                                       unsigned numBit, bool mixed) {
+  Filters.clear();
+  ARMFilter F(*this, startBit, numBit, true);
+  Filters.push_back(F);
+  BestIndex = 0; // Sole Filter instance to choose from.
+  bestFilter().recurse();
+}
+
+// reportRegion is a helper function for filterProcessor to mark a region as
+// eligible for use as a filter region.
+void ARMFilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
+                                    unsigned BitIndex, bool AllowMixed) {
+  if (RA == ATTR_MIXED && AllowMixed)
+    Filters.push_back(ARMFilter(*this, StartBit, BitIndex - StartBit, true));   
+  else if (RA == ATTR_ALL_SET && !AllowMixed)
+    Filters.push_back(ARMFilter(*this, StartBit, BitIndex - StartBit, false));
+}
+
+// FilterProcessor scans the well-known encoding bits of the instructions and
+// builds up a list of candidate filters.  It chooses the best filter and
+// recursively descends down the decoding tree.
+bool ARMFilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
+  Filters.clear();
+  BestIndex = -1;
+  unsigned numInstructions = Opcodes.size();
+
+  assert(numInstructions && "Filter created with no instructions");
+
+  // No further filtering is necessary.
+  if (numInstructions == 1)
+    return true;
+
+  // Heuristics.  See also doFilter()'s "Heuristics" comment when num of
+  // instructions is 3.
+  if (AllowMixed && !Greedy) {
+    assert(numInstructions == 3);
+
+    for (unsigned i = 0; i < Opcodes.size(); ++i) {
+      std::vector<unsigned> StartBits;
+      std::vector<unsigned> EndBits;
+      std::vector<uint64_t> FieldVals;
+      insn_t Insn;
+
+      insnWithID(Insn, Opcodes[i]);
+
+      // Look for islands of undecoded bits of any instruction.
+      if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) {
+        // Found an instruction with island(s).  Now just assign a filter.
+        runSingleFilter(*this, StartBits[0], EndBits[0] - StartBits[0] + 1,
+                        true);
+        return true;
+      }
+    }
+  }
+
+  unsigned BitIndex, InsnIndex;
+
+  // We maintain BIT_WIDTH copies of the bitAttrs automaton.
+  // The automaton consumes the corresponding bit from each
+  // instruction.
+  //
+  //   Input symbols: 0, 1, and _ (unset).
+  //   States:        NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
+  //   Initial state: NONE.
+  //
+  // (NONE) ------- [01] -> (ALL_SET)
+  // (NONE) ------- _ ----> (ALL_UNSET)
+  // (ALL_SET) ---- [01] -> (ALL_SET)
+  // (ALL_SET) ---- _ ----> (MIXED)
+  // (ALL_UNSET) -- [01] -> (MIXED)
+  // (ALL_UNSET) -- _ ----> (ALL_UNSET)
+  // (MIXED) ------ . ----> (MIXED)
+  // (FILTERED)---- . ----> (FILTERED)
+
+  bitAttr_t bitAttrs[BIT_WIDTH];
+
+  // FILTERED bit positions provide no entropy and are not worthy of pursuing.
+  // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position.
+  for (BitIndex = 0; BitIndex < BIT_WIDTH; ++BitIndex)
+    if (FilterBitValues[BitIndex] == BIT_TRUE ||
+        FilterBitValues[BitIndex] == BIT_FALSE)
+      bitAttrs[BitIndex] = ATTR_FILTERED;
+    else
+      bitAttrs[BitIndex] = ATTR_NONE;
+
+  for (InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
+    insn_t insn;
+
+    insnWithID(insn, Opcodes[InsnIndex]);
+
+    for (BitIndex = 0; BitIndex < BIT_WIDTH; ++BitIndex) {
+      switch (bitAttrs[BitIndex]) {
+      case ATTR_NONE:
+        if (insn[BitIndex] == BIT_UNSET)
+          bitAttrs[BitIndex] = ATTR_ALL_UNSET;
+        else
+          bitAttrs[BitIndex] = ATTR_ALL_SET;
+        break;
+      case ATTR_ALL_SET:
+        if (insn[BitIndex] == BIT_UNSET)
+          bitAttrs[BitIndex] = ATTR_MIXED;
+        break;
+      case ATTR_ALL_UNSET:
+        if (insn[BitIndex] != BIT_UNSET)
+          bitAttrs[BitIndex] = ATTR_MIXED;
+        break;
+      case ATTR_MIXED:
+      case ATTR_FILTERED:
+        break;
+      }
+    }
+  }
+
+  // The regionAttr automaton consumes the bitAttrs automatons' state,
+  // lowest-to-highest.
+  //
+  //   Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
+  //   States:        NONE, ALL_SET, MIXED
+  //   Initial state: NONE
+  //
+  // (NONE) ----- F --> (NONE)
+  // (NONE) ----- S --> (ALL_SET)     ; and set region start
+  // (NONE) ----- U --> (NONE)
+  // (NONE) ----- M --> (MIXED)       ; and set region start
+  // (ALL_SET) -- F --> (NONE)        ; and report an ALL_SET region
+  // (ALL_SET) -- S --> (ALL_SET)
+  // (ALL_SET) -- U --> (NONE)        ; and report an ALL_SET region
+  // (ALL_SET) -- M --> (MIXED)       ; and report an ALL_SET region
+  // (MIXED) ---- F --> (NONE)        ; and report a MIXED region
+  // (MIXED) ---- S --> (ALL_SET)     ; and report a MIXED region
+  // (MIXED) ---- U --> (NONE)        ; and report a MIXED region
+  // (MIXED) ---- M --> (MIXED)
+
+  bitAttr_t RA = ATTR_NONE;
+  unsigned StartBit = 0;
+
+  for (BitIndex = 0; BitIndex < BIT_WIDTH; BitIndex++) {
+    bitAttr_t bitAttr = bitAttrs[BitIndex];
+
+    assert(bitAttr != ATTR_NONE && "Bit without attributes");
+
+    switch (RA) {
+    case ATTR_NONE:
+      switch (bitAttr) {
+      case ATTR_FILTERED:
+        break;
+      case ATTR_ALL_SET:
+        StartBit = BitIndex;
+        RA = ATTR_ALL_SET;
+        break;
+      case ATTR_ALL_UNSET:
+        break;
+      case ATTR_MIXED:
+        StartBit = BitIndex;
+        RA = ATTR_MIXED;
+        break;
+      default:
+        assert(0 && "Unexpected bitAttr!");
+      }
+      break;
+    case ATTR_ALL_SET:
+      switch (bitAttr) {
+      case ATTR_FILTERED:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        RA = ATTR_NONE;
+        break;
+      case ATTR_ALL_SET:
+        break;
+      case ATTR_ALL_UNSET:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        RA = ATTR_NONE;
+        break;
+      case ATTR_MIXED:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        StartBit = BitIndex;
+        RA = ATTR_MIXED;
+        break;
+      default:
+        assert(0 && "Unexpected bitAttr!");
+      }
+      break;
+    case ATTR_MIXED:
+      switch (bitAttr) {
+      case ATTR_FILTERED:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        StartBit = BitIndex;
+        RA = ATTR_NONE;
+        break;
+      case ATTR_ALL_SET:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        StartBit = BitIndex;
+        RA = ATTR_ALL_SET;
+        break;
+      case ATTR_ALL_UNSET:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        RA = ATTR_NONE;
+        break;
+      case ATTR_MIXED:
+        break;
+      default:
+        assert(0 && "Unexpected bitAttr!");
+      }
+      break;
+    case ATTR_ALL_UNSET:
+      assert(0 && "regionAttr state machine has no ATTR_UNSET state");
+    case ATTR_FILTERED:
+      assert(0 && "regionAttr state machine has no ATTR_FILTERED state");
+    }
+  }
+
+  // At the end, if we're still in ALL_SET or MIXED states, report a region
+  switch (RA) {
+  case ATTR_NONE:
+    break;
+  case ATTR_FILTERED:
+    break;
+  case ATTR_ALL_SET:
+    reportRegion(RA, StartBit, BitIndex, AllowMixed);
+    break;
+  case ATTR_ALL_UNSET:
+    break;
+  case ATTR_MIXED:
+    reportRegion(RA, StartBit, BitIndex, AllowMixed);
+    break;
+  }
+
+  // We have finished with the filter processings.  Now it's time to choose
+  // the best performing filter.
+  BestIndex = 0;
+  bool AllUseless = true;
+  unsigned BestScore = 0;
+
+  for (unsigned i = 0, e = Filters.size(); i != e; ++i) {
+    unsigned Usefulness = Filters[i].usefulness();
+
+    if (Usefulness)
+      AllUseless = false;
+
+    if (Usefulness > BestScore) {
+      BestIndex = i;
+      BestScore = Usefulness;
+    }
+  }
+
+  if (!AllUseless)
+    bestFilter().recurse();
+
+  return !AllUseless;
+} // end of FilterChooser::filterProcessor(bool)
+
+// Decides on the best configuration of filter(s) to use in order to decode
+// the instructions.  A conflict of instructions may occur, in which case we
+// dump the conflict set to the standard error.
+void ARMFilterChooser::doFilter() {
+  unsigned Num = Opcodes.size();
+  assert(Num && "FilterChooser created with no instructions");
+
+  // Heuristics: Use Inst{31-28} as the top level filter for ARM ISA.
+  if (TargetName == TARGET_ARM && Parent == NULL) {
+    runSingleFilter(*this, 28, 4, false);
+    return;
+  }
+
+  // Try regions of consecutive known bit values first. 
+  if (filterProcessor(false))
+    return;
+
+  // Then regions of mixed bits (both known and unitialized bit values allowed).
+  if (filterProcessor(true))
+    return;
+
+  // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
+  // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
+  // well-known encoding pattern.  In such case, we backtrack and scan for the
+  // the very first consecutive ATTR_ALL_SET region and assign a filter to it.
+  if (Num == 3 && filterProcessor(true, false))
+    return;
+
+  // If we come to here, the instruction decoding has failed.
+  // Set the BestIndex to -1 to indicate so.
+  BestIndex = -1;
+}
+
+// Emits code to decode our share of instructions.  Returns true if the
+// emitted code causes a return, which occurs if we know how to decode
+// the instruction at this level or the instruction is not decodeable.
+bool ARMFilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
+  if (Opcodes.size() == 1)
+    // There is only one instruction in the set, which is great!
+    // Call emitSingletonDecoder() to see whether there are any remaining
+    // encodings bits.
+    return emitSingletonDecoder(o, Indentation, Opcodes[0]);
+
+  // Choose the best filter to do the decodings!
+  if (BestIndex != -1) {
+    ARMFilter &Best = bestFilter();
+    if (Best.getNumFiltered() == 1)
+      emitSingletonDecoder(o, Indentation, Best);
+    else
+      bestFilter().emit(o, Indentation);
+    return false;
+  }
+
+  // If we reach here, there is a conflict in decoding.  Let's resolve the known
+  // conflicts!
+  if ((TargetName == TARGET_ARM || TargetName == TARGET_THUMB) &&
+      Opcodes.size() == 2) {
+    // Resolve the known conflict sets:
+    //
+    // 1. source registers are identical => VMOVDneon; otherwise => VORRd
+    // 2. source registers are identical => VMOVQ; otherwise => VORRq
+    // 3. LDR, LDRcp => return LDR for now.
+    // FIXME: How can we distinguish between LDR and LDRcp?  Do we need to?
+    // 4. tLDM, tLDM_UPD => Rn = Inst{10-8}, reglist = Inst{7-0},
+    //    wback = registers<Rn> = 0
+    // NOTE: (tLDM, tLDM_UPD) resolution must come before Advanced SIMD
+    //       addressing mode resolution!!!
+    // 5. VLD[234]LN*/VST[234]LN* vs. VLD[234]LN*_UPD/VST[234]LN*_UPD conflicts
+    //    are resolved returning the non-UPD versions of the instructions if the
+    //    Rm field, i.e., Inst{3-0} is 0b1111.  This is specified in A7.7.1
+    //    Advanced SIMD addressing mode.
+    const std::string &name1 = nameWithID(Opcodes[0]);
+    const std::string &name2 = nameWithID(Opcodes[1]);
+    if ((name1 == "VMOVDneon" && name2 == "VORRd") ||
+        (name1 == "VMOVQ" && name2 == "VORRq")) {
+      // Inserting the opening curly brace for this case block.
+      --Indentation; --Indentation;
+      o.indent(Indentation) << "{\n";
+      ++Indentation; ++Indentation;
+
+      o.indent(Indentation)
+        << "field_t N = fieldFromInstruction(insn, 7, 1), "
+        << "M = fieldFromInstruction(insn, 5, 1);\n";
+      o.indent(Indentation)
+        << "field_t Vn = fieldFromInstruction(insn, 16, 4), "
+        << "Vm = fieldFromInstruction(insn, 0, 4);\n";
+      o.indent(Indentation)
+        << "return (N == M && Vn == Vm) ? "
+        << Opcodes[0] << " /* " << name1 << " */ : "
+        << Opcodes[1] << " /* " << name2 << " */ ;\n";
+
+      // Inserting the closing curly brace for this case block.
+      --Indentation; --Indentation;
+      o.indent(Indentation) << "}\n";
+      ++Indentation; ++Indentation;
+
+      return true;
+    }
+    if (name1 == "LDR" && name2 == "LDRcp") {
+      o.indent(Indentation)
+        << "return " << Opcodes[0]
+        << "; // Returning LDR for {LDR, LDRcp}\n";
+      return true;
+    }
+    if (name1 == "tLDM" && name2 == "tLDM_UPD") {
+      // Inserting the opening curly brace for this case block.
+      --Indentation; --Indentation;
+      o.indent(Indentation) << "{\n";
+      ++Indentation; ++Indentation;
+      
+      o.indent(Indentation)
+        << "unsigned Rn = fieldFromInstruction(insn, 8, 3), "
+        << "list = fieldFromInstruction(insn, 0, 8);\n";
+      o.indent(Indentation)
+        << "return ((list >> Rn) & 1) == 0 ? "
+        << Opcodes[1] << " /* " << name2 << " */ : "
+        << Opcodes[0] << " /* " << name1 << " */ ;\n";
+
+      // Inserting the closing curly brace for this case block.
+      --Indentation; --Indentation;
+      o.indent(Indentation) << "}\n";
+      ++Indentation; ++Indentation;
+
+      return true;
+    }
+    if (sameStringExceptSuffix(name1, name2, "_UPD")) {
+      o.indent(Indentation)
+        << "return fieldFromInstruction(insn, 0, 4) == 15 ? " << Opcodes[0]
+        << " /* " << name1 << " */ : " << Opcodes[1] << "/* " << name2
+        << " */ ; // Advanced SIMD addressing mode\n";
+      return true;
+    }
+
+    // Otherwise, it does not belong to the known conflict sets.
+  }
+
+  // We don't know how to decode these instructions!  Return 0 and dump the
+  // conflict set!
+  o.indent(Indentation) << "return 0;" << " // Conflict set: ";
+  for (int i = 0, N = Opcodes.size(); i < N; ++i) {
+    o << nameWithID(Opcodes[i]);
+    if (i < (N - 1))
+      o << ", ";
+    else
+      o << '\n';
+  }
+
+  // Print out useful conflict information for postmortem analysis.
+  errs() << "Decoding Conflict:\n";
+
+  dumpStack(errs(), "\t\t");
+
+  for (unsigned i = 0; i < Opcodes.size(); i++) {
+    const std::string &Name = nameWithID(Opcodes[i]);
+
+    errs() << '\t' << Name << " ";
+    dumpBits(errs(),
+             getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
+    errs() << '\n';
+  }
+
+  return true;
+}
+
+
+////////////////////////////////////////////
+//                                        //
+//  ARMDEBackend                          //
+//  (Helper class for ARMDecoderEmitter)  //
+//                                        //
+////////////////////////////////////////////
+
+class ARMDecoderEmitter::ARMDEBackend {
+public:
+  ARMDEBackend(ARMDecoderEmitter &frontend, RecordKeeper &Records) :
+    NumberedInstructions(),
+    Opcodes(),
+    Frontend(frontend),
+    Target(Records),
+    FC(NULL)
+  {
+    if (Target.getName() == "ARM")
+      TargetName = TARGET_ARM;
+    else {
+      errs() << "Target name " << Target.getName() << " not recognized\n";
+      assert(0 && "Unknown target");
+    }
+
+    // Populate the instructions for our TargetName.
+    populateInstructions();
+  }
+
+  ~ARMDEBackend() {
+    if (FC) {
+      delete FC;
+      FC = NULL;
+    }
+  }
+
+  void getInstructionsByEnumValue(std::vector<const CodeGenInstruction*>
+                                                &NumberedInstructions) {
+    // We must emit the PHI opcode first...
+    std::string Namespace = Target.getInstNamespace();
+    assert(!Namespace.empty() && "No instructions defined.");
+
+    NumberedInstructions = Target.getInstructionsByEnumValue();
+  }
+
+  bool populateInstruction(const CodeGenInstruction &CGI, TARGET_NAME_t TN);
+
+  void populateInstructions();
+
+  // Emits disassembler code for instruction decoding.  This delegates to the
+  // FilterChooser instance to do the heavy lifting.
+  void emit(raw_ostream &o);
+
+protected:
+  std::vector<const CodeGenInstruction*> NumberedInstructions;
+  std::vector<unsigned> Opcodes;
+  // Special case for the ARM chip, which supports ARM and Thumb ISAs.
+  // Opcodes2 will be populated with the Thumb opcodes.
+  std::vector<unsigned> Opcodes2;
+  ARMDecoderEmitter &Frontend;
+  CodeGenTarget Target;
+  ARMFilterChooser *FC;
+
+  TARGET_NAME_t TargetName;
+};
+
+bool ARMDecoderEmitter::
+ARMDEBackend::populateInstruction(const CodeGenInstruction &CGI,
+                                  TARGET_NAME_t TN) {
+  const Record &Def = *CGI.TheDef;
+  const StringRef Name = Def.getName();
+  uint8_t Form = getByteField(Def, "Form");
+
+  BitsInit &Bits = getBitsField(Def, "Inst");
+
+  // If all the bit positions are not specified; do not decode this instruction.
+  // We are bound to fail!  For proper disassembly, the well-known encoding bits
+  // of the instruction must be fully specified.
+  //
+  // This also removes pseudo instructions from considerations of disassembly,
+  // which is a better design and less fragile than the name matchings.
+  if (Bits.allInComplete()) return false;
+
+  // Ignore "asm parser only" instructions.
+  if (Def.getValueAsBit("isAsmParserOnly"))
+    return false;
+
+  if (TN == TARGET_ARM) {
+    // FIXME: what about Int_MemBarrierV6 and Int_SyncBarrierV6?
+    if ((Name != "Int_MemBarrierV7" && Name != "Int_SyncBarrierV7") &&
+        Form == ARM_FORMAT_PSEUDO)
+      return false;
+    if (thumbInstruction(Form))
+      return false;
+
+    // Tail calls are other patterns that generate existing instructions.
+    if (Name == "TCRETURNdi" || Name == "TCRETURNdiND" ||
+        Name == "TCRETURNri" || Name == "TCRETURNriND" ||
+        Name == "TAILJMPd"  || Name == "TAILJMPdt" ||
+        Name == "TAILJMPdND" || Name == "TAILJMPdNDt" ||
+        Name == "TAILJMPr"  || Name == "TAILJMPrND" ||
+        Name == "MOVr_TC")
+      return false;
+
+    //
+    // The following special cases are for conflict resolutions.
+    //
+
+    // NEON NLdStFrm conflict resolutions:
+    //
+    // 1. Ignore suffix "odd" and "odd_UPD", prefer the "even" register-
+    //    numbered ones which have the same Asm format string.
+    // 2. Ignore VST2d64_UPD, which conflicts with VST1q64_UPD.
+    // 3. Ignore VLD2d64_UPD, which conflicts with VLD1q64_UPD.
+    // 4. Ignore VLD1q[_UPD], which conflicts with VLD1q64[_UPD].
+    // 5. Ignore VST1q[_UPD], which conflicts with VST1q64[_UPD].
+    if (Name.endswith("odd") || Name.endswith("odd_UPD") ||
+        Name == "VST2d64_UPD" || Name == "VLD2d64_UPD" ||
+        Name == "VLD1q" || Name == "VLD1q_UPD" ||
+        Name == "VST1q" || Name == "VST1q_UPD")
+      return false;
+
+    // RSCSri and RSCSrs set the 's' bit, but are not predicated.  We are
+    // better off using the generic RSCri and RSCrs instructions.
+    if (Name == "RSCSri" || Name == "RSCSrs") return false;
+
+    // MOVCCr, MOVCCs, MOVCCi, MOVCCi16, FCYPScc, FCYPDcc, FNEGScc, and
+    // FNEGDcc are used in the compiler to implement conditional moves.
+    // We can ignore them in favor of their more generic versions of
+    // instructions. See also SDNode *ARMDAGToDAGISel::Select(SDValue Op).
+    if (Name == "MOVCCr"   || Name == "MOVCCs"  || Name == "MOVCCi" ||
+        Name == "MOVCCi16" || Name == "FCPYScc" || Name == "FCPYDcc" ||
+        Name == "FNEGScc"  || Name == "FNEGDcc")
+      return false;
+
+    // Ditto for VMOVDcc, VMOVScc, VNEGDcc, and VNEGScc.
+    if (Name == "VMOVDcc" || Name == "VMOVScc" || Name == "VNEGDcc" ||
+        Name == "VNEGScc")
+      return false;
+
+    // LDMIA_RET is a special case of LDM (Load Multiple) where the registers
+    // loaded include the PC, causing a branch to a loaded address.  Ignore
+    // the LDMIA_RET instruction when decoding.
+    if (Name == "LDMIA_RET") return false;
+
+    // Bcc is in a more generic form than B.  Ignore B when decoding.
+    if (Name == "B") return false;
+
+    // Ignore the non-Darwin BL instructions and the TPsoft (TLS) instruction.
+    if (Name == "BL" || Name == "BL_pred" || Name == "BLX" ||
+        Name == "BLX_pred" || Name == "TPsoft")
+      return false;
+
+    // Ignore VDUPf[d|q] instructions known to conflict with VDUP32[d-q] for
+    // decoding.  The instruction duplicates an element from an ARM core
+    // register into every element of the destination vector.  There is no
+    // distinction between data types.
+    if (Name == "VDUPfd" || Name == "VDUPfq") return false;
+
+    // A8-598: VEXT
+    // Vector Extract extracts elements from the bottom end of the second
+    // operand vector and the top end of the first, concatenates them and
+    // places the result in the destination vector.  The elements of the
+    // vectors are treated as being 8-bit bitfields.  There is no distinction
+    // between data types.  The size of the operation can be specified in
+    // assembler as vext.size.  If the value is 16, 32, or 64, the syntax is
+    // a pseudo-instruction for a VEXT instruction specifying the equivalent
+    // number of bytes.
+    //
+    // Variants VEXTd16, VEXTd32, VEXTd8, and VEXTdf are reduced to VEXTd8;
+    // variants VEXTq16, VEXTq32, VEXTq8, and VEXTqf are reduced to VEXTq8.
+    if (Name == "VEXTd16" || Name == "VEXTd32" || Name == "VEXTdf" ||
+        Name == "VEXTq16" || Name == "VEXTq32" || Name == "VEXTqf")
+      return false;
+
+    // Vector Reverse is similar to Vector Extract.  There is no distinction
+    // between data types, other than size.
+    //
+    // VREV64df is equivalent to VREV64d32.
+    // VREV64qf is equivalent to VREV64q32.
+    if (Name == "VREV64df" || Name == "VREV64qf") return false;
+
+    // VDUPLNfd is equivalent to VDUPLN32d.
+    // VDUPLNfq is equivalent to VDUPLN32q.
+    // VLD1df is equivalent to VLD1d32.
+    // VLD1qf is equivalent to VLD1q32.
+    // VLD2d64 is equivalent to VLD1q64.
+    // VST1df is equivalent to VST1d32.
+    // VST1qf is equivalent to VST1q32.
+    // VST2d64 is equivalent to VST1q64.
+    if (Name == "VDUPLNfd" || Name == "VDUPLNfq" ||
+        Name == "VLD1df"   || Name == "VLD1qf"   || Name == "VLD2d64" ||
+        Name == "VST1df"   || Name == "VST1qf"   || Name == "VST2d64")
+      return false;
+  } else if (TN == TARGET_THUMB) {
+    if (!thumbInstruction(Form))
+      return false;
+
+    // On Darwin R9 is call-clobbered.  Ignore the non-Darwin counterparts.
+    if (Name == "tBL" || Name == "tBLXi" || Name == "tBLXr")
+      return false;
+
+    // Ignore the TPsoft (TLS) instructions, which conflict with tBLr9.
+    if (Name == "tTPsoft" || Name == "t2TPsoft")
+      return false;
+
+    // Ignore tADR, prefer tADDrPCi.
+    if (Name == "tADR")
+      return false;
+
+    // Ignore tADDrSP, tADDspr, and tPICADD, prefer the generic tADDhirr.
+    // Ignore t2SUBrSPs, prefer the t2SUB[S]r[r|s].
+    // Ignore t2ADDrSPs, prefer the t2ADD[S]r[r|s].
+    // Ignore t2ADDrSPi/t2SUBrSPi, which have more generic couterparts.
+    // Ignore t2ADDrSPi12/t2SUBrSPi12, which have more generic couterparts
+    if (Name == "tADDrSP" || Name == "tADDspr" || Name == "tPICADD" ||
+        Name == "t2SUBrSPs" || Name == "t2ADDrSPs" ||
+        Name == "t2ADDrSPi" || Name == "t2SUBrSPi" ||
+        Name == "t2ADDrSPi12" || Name == "t2SUBrSPi12")
+      return false;
+
+    // Ignore t2LDRDpci, prefer the generic t2LDRDi8, t2LDRD_PRE, t2LDRD_POST.
+    if (Name == "t2LDRDpci")
+      return false;
+
+    // Resolve conflicts:
+    //
+    //   tBfar conflicts with tBLr9
+    //   tPOP_RET/t2LDMIA_RET conflict with tPOP/t2LDM (ditto)
+    //   tMOVCCi conflicts with tMOVi8
+    //   tMOVCCr conflicts with tMOVgpr2gpr
+    //   tSpill conflicts with tSTRspi
+    //   tLDRcp conflicts with tLDRspi
+    //   tRestore conflicts with tLDRspi
+    //   t2MOVCCi16 conflicts with tMOVi16
+    if (Name == "tBfar" ||
+        Name == "tPOP_RET" || Name == "t2LDMIA_RET" ||
+        Name == "tMOVCCi" || Name == "tMOVCCr" ||
+        Name == "tSpill" || Name == "tLDRcp" || Name == "tRestore" ||
+        Name == "t2MOVCCi16")
+      return false;
+  }
+
+  DEBUG({
+      // Dumps the instruction encoding format.
+      switch (TargetName) {
+      case TARGET_ARM:
+      case TARGET_THUMB:
+        errs() << Name << " " << stringForARMFormat((ARMFormat)Form);
+        break;
+      }
+
+      errs() << " ";
+
+      // Dumps the instruction encoding bits.
+      dumpBits(errs(), Bits);
+
+      errs() << '\n';
+
+      // Dumps the list of operand info.
+      for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) {
+        const CGIOperandList::OperandInfo &Info = CGI.Operands[i];
+        const std::string &OperandName = Info.Name;
+        const Record &OperandDef = *Info.Rec;
+
+        errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n";
+      }
+    });
+
+  return true;
+}
+
+void ARMDecoderEmitter::ARMDEBackend::populateInstructions() {
+  getInstructionsByEnumValue(NumberedInstructions);
+
+  unsigned numUIDs = NumberedInstructions.size();
+  if (TargetName == TARGET_ARM) {
+    for (unsigned uid = 0; uid < numUIDs; uid++) {
+      // filter out intrinsics
+      if (!NumberedInstructions[uid]->TheDef->isSubClassOf("InstARM"))
+        continue;
+
+      if (populateInstruction(*NumberedInstructions[uid], TargetName))
+        Opcodes.push_back(uid);
+    }
+
+    // Special handling for the ARM chip, which supports two modes of execution.
+    // This branch handles the Thumb opcodes.
+    for (unsigned uid = 0; uid < numUIDs; uid++) {
+      // filter out intrinsics
+      if (!NumberedInstructions[uid]->TheDef->isSubClassOf("InstARM")
+          && !NumberedInstructions[uid]->TheDef->isSubClassOf("InstThumb"))
+        continue;
+
+      if (populateInstruction(*NumberedInstructions[uid], TARGET_THUMB))
+        Opcodes2.push_back(uid);
+    }
+
+    return;
+  }
+
+  // For other targets.
+  for (unsigned uid = 0; uid < numUIDs; uid++) {
+    Record *R = NumberedInstructions[uid]->TheDef;
+    if (R->getValueAsString("Namespace") == "TargetOpcode")
+      continue;
+
+    if (populateInstruction(*NumberedInstructions[uid], TargetName))
+      Opcodes.push_back(uid);
+  }
+}
+
+// Emits disassembler code for instruction decoding.  This delegates to the
+// FilterChooser instance to do the heavy lifting.
+void ARMDecoderEmitter::ARMDEBackend::emit(raw_ostream &o) {
+  switch (TargetName) {
+  case TARGET_ARM:
+    Frontend.EmitSourceFileHeader("ARM/Thumb Decoders", o);
+    break;
+  default:
+    assert(0 && "Unreachable code!");
+  }
+
+  o << "#include \"llvm/Support/DataTypes.h\"\n";
+  o << "#include <assert.h>\n";
+  o << '\n';
+  o << "namespace llvm {\n\n";
+
+  ARMFilterChooser::setTargetName(TargetName);
+
+  switch (TargetName) {
+  case TARGET_ARM: {
+    // Emit common utility and ARM ISA decoder.
+    FC = new ARMFilterChooser(NumberedInstructions, Opcodes);
+    // Reset indentation level.
+    unsigned Indentation = 0;
+    FC->emitTop(o, Indentation);
+    delete FC;
+
+    // Emit Thumb ISA decoder as well.
+    ARMFilterChooser::setTargetName(TARGET_THUMB);
+    FC = new ARMFilterChooser(NumberedInstructions, Opcodes2);
+    // Reset indentation level.
+    Indentation = 0;
+    FC->emitBot(o, Indentation);
+    break;
+  }
+  default:
+    assert(0 && "Unreachable code!");
+  }
+
+  o << "\n} // End llvm namespace \n";
+}
+
+/////////////////////////
+//  Backend interface  //
+/////////////////////////
+
+void ARMDecoderEmitter::initBackend()
+{
+  Backend = new ARMDEBackend(*this, Records);
+}
+
+void ARMDecoderEmitter::run(raw_ostream &o)
+{
+  Backend->emit(o);
+}
+
+void ARMDecoderEmitter::shutdownBackend()
+{
+  delete Backend;
+  Backend = NULL;
+}
diff --git a/final/utils/TableGen/ARMDecoderEmitter.h b/final/utils/TableGen/ARMDecoderEmitter.h
new file mode 100644
index 00000000000..1faeb91fae8
--- /dev/null
+++ b/final/utils/TableGen/ARMDecoderEmitter.h
@@ -0,0 +1,50 @@
+//===------------ ARMDecoderEmitter.h - Decoder Generator -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the ARM Disassembler.
+// It contains the tablegen backend declaration ARMDecoderEmitter.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMDECODEREMITTER_H
+#define ARMDECODEREMITTER_H
+
+#include "TableGenBackend.h"
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+class ARMDecoderEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+public:
+  ARMDecoderEmitter(RecordKeeper &R) : Records(R) {
+    initBackend();
+  }
+    
+  ~ARMDecoderEmitter() {
+    shutdownBackend();
+  }
+
+  // run - Output the code emitter
+  void run(raw_ostream &o);
+    
+private:
+  // Helper class for ARMDecoderEmitter.
+  class ARMDEBackend;
+
+  ARMDEBackend *Backend;
+    
+  void initBackend();
+  void shutdownBackend();
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/AsmMatcherEmitter.cpp b/final/utils/TableGen/AsmMatcherEmitter.cpp
new file mode 100644
index 00000000000..e3def418523
--- /dev/null
+++ b/final/utils/TableGen/AsmMatcherEmitter.cpp
@@ -0,0 +1,2371 @@
+//===- AsmMatcherEmitter.cpp - Generate an assembly matcher ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits a target specifier matcher for converting parsed
+// assembly operands in the MCInst structures. It also emits a matcher for
+// custom operand parsing.
+//
+// Converting assembly operands into MCInst structures
+// ---------------------------------------------------
+//
+// The input to the target specific matcher is a list of literal tokens and
+// operands. The target specific parser should generally eliminate any syntax
+// which is not relevant for matching; for example, comma tokens should have
+// already been consumed and eliminated by the parser. Most instructions will
+// end up with a single literal token (the instruction name) and some number of
+// operands.
+//
+// Some example inputs, for X86:
+//   'addl' (immediate ...) (register ...)
+//   'add' (immediate ...) (memory ...)
+//   'call' '*' %epc
+//
+// The assembly matcher is responsible for converting this input into a precise
+// machine instruction (i.e., an instruction with a well defined encoding). This
+// mapping has several properties which complicate matching:
+//
+//  - It may be ambiguous; many architectures can legally encode particular
+//    variants of an instruction in different ways (for example, using a smaller
+//    encoding for small immediates). Such ambiguities should never be
+//    arbitrarily resolved by the assembler, the assembler is always responsible
+//    for choosing the "best" available instruction.
+//
+//  - It may depend on the subtarget or the assembler context. Instructions
+//    which are invalid for the current mode, but otherwise unambiguous (e.g.,
+//    an SSE instruction in a file being assembled for i486) should be accepted
+//    and rejected by the assembler front end. However, if the proper encoding
+//    for an instruction is dependent on the assembler context then the matcher
+//    is responsible for selecting the correct machine instruction for the
+//    current mode.
+//
+// The core matching algorithm attempts to exploit the regularity in most
+// instruction sets to quickly determine the set of possibly matching
+// instructions, and the simplify the generated code. Additionally, this helps
+// to ensure that the ambiguities are intentionally resolved by the user.
+//
+// The matching is divided into two distinct phases:
+//
+//   1. Classification: Each operand is mapped to the unique set which (a)
+//      contains it, and (b) is the largest such subset for which a single
+//      instruction could match all members.
+//
+//      For register classes, we can generate these subgroups automatically. For
+//      arbitrary operands, we expect the user to define the classes and their
+//      relations to one another (for example, 8-bit signed immediates as a
+//      subset of 32-bit immediates).
+//
+//      By partitioning the operands in this way, we guarantee that for any
+//      tuple of classes, any single instruction must match either all or none
+//      of the sets of operands which could classify to that tuple.
+//
+//      In addition, the subset relation amongst classes induces a partial order
+//      on such tuples, which we use to resolve ambiguities.
+//
+//   2. The input can now be treated as a tuple of classes (static tokens are
+//      simple singleton sets). Each such tuple should generally map to a single
+//      instruction (we currently ignore cases where this isn't true, whee!!!),
+//      which we can emit a simple matcher for.
+//
+// Custom Operand Parsing
+// ----------------------
+//
+//  Some targets need a custom way to parse operands, some specific instructions
+//  can contain arguments that can represent processor flags and other kinds of
+//  identifiers that need to be mapped to specific valeus in the final encoded
+//  instructions. The target specific custom operand parsing works in the
+//  following way:
+//
+//   1. A operand match table is built, each entry contains a mnemonic, an
+//      operand class, a mask for all operand positions for that same
+//      class/mnemonic and target features to be checked while trying to match.
+//
+//   2. The operand matcher will try every possible entry with the same
+//      mnemonic and will check if the target feature for this mnemonic also
+//      matches. After that, if the operand to be matched has its index
+//      present in the mask, a successfull match occurs. Otherwise, fallback
+//      to the regular operand parsing.
+//
+//   3. For a match success, each operand class that has a 'ParserMethod'
+//      becomes part of a switch from where the custom method is called.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AsmMatcherEmitter.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+#include "StringMatcher.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include <map>
+#include <set>
+using namespace llvm;
+
+static cl::opt<std::string>
+MatchPrefix("match-prefix", cl::init(""),
+            cl::desc("Only match instructions with the given prefix"));
+
+namespace {
+class AsmMatcherInfo;
+struct SubtargetFeatureInfo;
+
+/// ClassInfo - Helper class for storing the information about a particular
+/// class of operands which can be matched.
+struct ClassInfo {
+  enum ClassInfoKind {
+    /// Invalid kind, for use as a sentinel value.
+    Invalid = 0,
+
+    /// The class for a particular token.
+    Token,
+
+    /// The (first) register class, subsequent register classes are
+    /// RegisterClass0+1, and so on.
+    RegisterClass0,
+
+    /// The (first) user defined class, subsequent user defined classes are
+    /// UserClass0+1, and so on.
+    UserClass0 = 1<<16
+  };
+
+  /// Kind - The class kind, which is either a predefined kind, or (UserClass0 +
+  /// N) for the Nth user defined class.
+  unsigned Kind;
+
+  /// SuperClasses - The super classes of this class. Note that for simplicities
+  /// sake user operands only record their immediate super class, while register
+  /// operands include all superclasses.
+  std::vector<ClassInfo*> SuperClasses;
+
+  /// Name - The full class name, suitable for use in an enum.
+  std::string Name;
+
+  /// ClassName - The unadorned generic name for this class (e.g., Token).
+  std::string ClassName;
+
+  /// ValueName - The name of the value this class represents; for a token this
+  /// is the literal token string, for an operand it is the TableGen class (or
+  /// empty if this is a derived class).
+  std::string ValueName;
+
+  /// PredicateMethod - The name of the operand method to test whether the
+  /// operand matches this class; this is not valid for Token or register kinds.
+  std::string PredicateMethod;
+
+  /// RenderMethod - The name of the operand method to add this operand to an
+  /// MCInst; this is not valid for Token or register kinds.
+  std::string RenderMethod;
+
+  /// ParserMethod - The name of the operand method to do a target specific
+  /// parsing on the operand.
+  std::string ParserMethod;
+
+  /// For register classes, the records for all the registers in this class.
+  std::set<Record*> Registers;
+
+public:
+  /// isRegisterClass() - Check if this is a register class.
+  bool isRegisterClass() const {
+    return Kind >= RegisterClass0 && Kind < UserClass0;
+  }
+
+  /// isUserClass() - Check if this is a user defined class.
+  bool isUserClass() const {
+    return Kind >= UserClass0;
+  }
+
+  /// isRelatedTo - Check whether this class is "related" to \arg RHS. Classes
+  /// are related if they are in the same class hierarchy.
+  bool isRelatedTo(const ClassInfo &RHS) const {
+    // Tokens are only related to tokens.
+    if (Kind == Token || RHS.Kind == Token)
+      return Kind == Token && RHS.Kind == Token;
+
+    // Registers classes are only related to registers classes, and only if
+    // their intersection is non-empty.
+    if (isRegisterClass() || RHS.isRegisterClass()) {
+      if (!isRegisterClass() || !RHS.isRegisterClass())
+        return false;
+
+      std::set<Record*> Tmp;
+      std::insert_iterator< std::set<Record*> > II(Tmp, Tmp.begin());
+      std::set_intersection(Registers.begin(), Registers.end(),
+                            RHS.Registers.begin(), RHS.Registers.end(),
+                            II);
+
+      return !Tmp.empty();
+    }
+
+    // Otherwise we have two users operands; they are related if they are in the
+    // same class hierarchy.
+    //
+    // FIXME: This is an oversimplification, they should only be related if they
+    // intersect, however we don't have that information.
+    assert(isUserClass() && RHS.isUserClass() && "Unexpected class!");
+    const ClassInfo *Root = this;
+    while (!Root->SuperClasses.empty())
+      Root = Root->SuperClasses.front();
+
+    const ClassInfo *RHSRoot = &RHS;
+    while (!RHSRoot->SuperClasses.empty())
+      RHSRoot = RHSRoot->SuperClasses.front();
+
+    return Root == RHSRoot;
+  }
+
+  /// isSubsetOf - Test whether this class is a subset of \arg RHS;
+  bool isSubsetOf(const ClassInfo &RHS) const {
+    // This is a subset of RHS if it is the same class...
+    if (this == &RHS)
+      return true;
+
+    // ... or if any of its super classes are a subset of RHS.
+    for (std::vector<ClassInfo*>::const_iterator it = SuperClasses.begin(),
+           ie = SuperClasses.end(); it != ie; ++it)
+      if ((*it)->isSubsetOf(RHS))
+        return true;
+
+    return false;
+  }
+
+  /// operator< - Compare two classes.
+  bool operator<(const ClassInfo &RHS) const {
+    if (this == &RHS)
+      return false;
+
+    // Unrelated classes can be ordered by kind.
+    if (!isRelatedTo(RHS))
+      return Kind < RHS.Kind;
+
+    switch (Kind) {
+    case Invalid:
+      assert(0 && "Invalid kind!");
+    case Token:
+      // Tokens are comparable by value.
+      //
+      // FIXME: Compare by enum value.
+      return ValueName < RHS.ValueName;
+
+    default:
+      // This class preceeds the RHS if it is a proper subset of the RHS.
+      if (isSubsetOf(RHS))
+        return true;
+      if (RHS.isSubsetOf(*this))
+        return false;
+
+      // Otherwise, order by name to ensure we have a total ordering.
+      return ValueName < RHS.ValueName;
+    }
+  }
+};
+
+/// MatchableInfo - Helper class for storing the necessary information for an
+/// instruction or alias which is capable of being matched.
+struct MatchableInfo {
+  struct AsmOperand {
+    /// Token - This is the token that the operand came from.
+    StringRef Token;
+
+    /// The unique class instance this operand should match.
+    ClassInfo *Class;
+
+    /// The operand name this is, if anything.
+    StringRef SrcOpName;
+
+    /// The suboperand index within SrcOpName, or -1 for the entire operand.
+    int SubOpIdx;
+
+    explicit AsmOperand(StringRef T) : Token(T), Class(0), SubOpIdx(-1) {}
+  };
+
+  /// ResOperand - This represents a single operand in the result instruction
+  /// generated by the match.  In cases (like addressing modes) where a single
+  /// assembler operand expands to multiple MCOperands, this represents the
+  /// single assembler operand, not the MCOperand.
+  struct ResOperand {
+    enum {
+      /// RenderAsmOperand - This represents an operand result that is
+      /// generated by calling the render method on the assembly operand.  The
+      /// corresponding AsmOperand is specified by AsmOperandNum.
+      RenderAsmOperand,
+
+      /// TiedOperand - This represents a result operand that is a duplicate of
+      /// a previous result operand.
+      TiedOperand,
+
+      /// ImmOperand - This represents an immediate value that is dumped into
+      /// the operand.
+      ImmOperand,
+
+      /// RegOperand - This represents a fixed register that is dumped in.
+      RegOperand
+    } Kind;
+
+    union {
+      /// This is the operand # in the AsmOperands list that this should be
+      /// copied from.
+      unsigned AsmOperandNum;
+
+      /// TiedOperandNum - This is the (earlier) result operand that should be
+      /// copied from.
+      unsigned TiedOperandNum;
+
+      /// ImmVal - This is the immediate value added to the instruction.
+      int64_t ImmVal;
+
+      /// Register - This is the register record.
+      Record *Register;
+    };
+
+    /// MINumOperands - The number of MCInst operands populated by this
+    /// operand.
+    unsigned MINumOperands;
+
+    static ResOperand getRenderedOp(unsigned AsmOpNum, unsigned NumOperands) {
+      ResOperand X;
+      X.Kind = RenderAsmOperand;
+      X.AsmOperandNum = AsmOpNum;
+      X.MINumOperands = NumOperands;
+      return X;
+    }
+
+    static ResOperand getTiedOp(unsigned TiedOperandNum) {
+      ResOperand X;
+      X.Kind = TiedOperand;
+      X.TiedOperandNum = TiedOperandNum;
+      X.MINumOperands = 1;
+      return X;
+    }
+
+    static ResOperand getImmOp(int64_t Val) {
+      ResOperand X;
+      X.Kind = ImmOperand;
+      X.ImmVal = Val;
+      X.MINumOperands = 1;
+      return X;
+    }
+
+    static ResOperand getRegOp(Record *Reg) {
+      ResOperand X;
+      X.Kind = RegOperand;
+      X.Register = Reg;
+      X.MINumOperands = 1;
+      return X;
+    }
+  };
+
+  /// TheDef - This is the definition of the instruction or InstAlias that this
+  /// matchable came from.
+  Record *const TheDef;
+
+  /// DefRec - This is the definition that it came from.
+  PointerUnion<const CodeGenInstruction*, const CodeGenInstAlias*> DefRec;
+
+  const CodeGenInstruction *getResultInst() const {
+    if (DefRec.is<const CodeGenInstruction*>())
+      return DefRec.get<const CodeGenInstruction*>();
+    return DefRec.get<const CodeGenInstAlias*>()->ResultInst;
+  }
+
+  /// ResOperands - This is the operand list that should be built for the result
+  /// MCInst.
+  std::vector<ResOperand> ResOperands;
+
+  /// AsmString - The assembly string for this instruction (with variants
+  /// removed), e.g. "movsx $src, $dst".
+  std::string AsmString;
+
+  /// Mnemonic - This is the first token of the matched instruction, its
+  /// mnemonic.
+  StringRef Mnemonic;
+
+  /// AsmOperands - The textual operands that this instruction matches,
+  /// annotated with a class and where in the OperandList they were defined.
+  /// This directly corresponds to the tokenized AsmString after the mnemonic is
+  /// removed.
+  SmallVector<AsmOperand, 4> AsmOperands;
+
+  /// Predicates - The required subtarget features to match this instruction.
+  SmallVector<SubtargetFeatureInfo*, 4> RequiredFeatures;
+
+  /// ConversionFnKind - The enum value which is passed to the generated
+  /// ConvertToMCInst to convert parsed operands into an MCInst for this
+  /// function.
+  std::string ConversionFnKind;
+
+  MatchableInfo(const CodeGenInstruction &CGI)
+    : TheDef(CGI.TheDef), DefRec(&CGI), AsmString(CGI.AsmString) {
+  }
+
+  MatchableInfo(const CodeGenInstAlias *Alias)
+    : TheDef(Alias->TheDef), DefRec(Alias), AsmString(Alias->AsmString) {
+  }
+
+  void Initialize(const AsmMatcherInfo &Info,
+                  SmallPtrSet<Record*, 16> &SingletonRegisters);
+
+  /// Validate - Return true if this matchable is a valid thing to match against
+  /// and perform a bunch of validity checking.
+  bool Validate(StringRef CommentDelimiter, bool Hack) const;
+
+  /// getSingletonRegisterForAsmOperand - If the specified token is a singleton
+  /// register, return the Record for it, otherwise return null.
+  Record *getSingletonRegisterForAsmOperand(unsigned i,
+                                            const AsmMatcherInfo &Info) const;
+
+  /// FindAsmOperand - Find the AsmOperand with the specified name and
+  /// suboperand index.
+  int FindAsmOperand(StringRef N, int SubOpIdx) const {
+    for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i)
+      if (N == AsmOperands[i].SrcOpName &&
+          SubOpIdx == AsmOperands[i].SubOpIdx)
+        return i;
+    return -1;
+  }
+
+  /// FindAsmOperandNamed - Find the first AsmOperand with the specified name.
+  /// This does not check the suboperand index.
+  int FindAsmOperandNamed(StringRef N) const {
+    for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i)
+      if (N == AsmOperands[i].SrcOpName)
+        return i;
+    return -1;
+  }
+
+  void BuildInstructionResultOperands();
+  void BuildAliasResultOperands();
+
+  /// operator< - Compare two matchables.
+  bool operator<(const MatchableInfo &RHS) const {
+    // The primary comparator is the instruction mnemonic.
+    if (Mnemonic != RHS.Mnemonic)
+      return Mnemonic < RHS.Mnemonic;
+
+    if (AsmOperands.size() != RHS.AsmOperands.size())
+      return AsmOperands.size() < RHS.AsmOperands.size();
+
+    // Compare lexicographically by operand. The matcher validates that other
+    // orderings wouldn't be ambiguous using \see CouldMatchAmbiguouslyWith().
+    for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
+      if (*AsmOperands[i].Class < *RHS.AsmOperands[i].Class)
+        return true;
+      if (*RHS.AsmOperands[i].Class < *AsmOperands[i].Class)
+        return false;
+    }
+
+    return false;
+  }
+
+  /// CouldMatchAmbiguouslyWith - Check whether this matchable could
+  /// ambiguously match the same set of operands as \arg RHS (without being a
+  /// strictly superior match).
+  bool CouldMatchAmbiguouslyWith(const MatchableInfo &RHS) {
+    // The primary comparator is the instruction mnemonic.
+    if (Mnemonic != RHS.Mnemonic)
+      return false;
+
+    // The number of operands is unambiguous.
+    if (AsmOperands.size() != RHS.AsmOperands.size())
+      return false;
+
+    // Otherwise, make sure the ordering of the two instructions is unambiguous
+    // by checking that either (a) a token or operand kind discriminates them,
+    // or (b) the ordering among equivalent kinds is consistent.
+
+    // Tokens and operand kinds are unambiguous (assuming a correct target
+    // specific parser).
+    for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i)
+      if (AsmOperands[i].Class->Kind != RHS.AsmOperands[i].Class->Kind ||
+          AsmOperands[i].Class->Kind == ClassInfo::Token)
+        if (*AsmOperands[i].Class < *RHS.AsmOperands[i].Class ||
+            *RHS.AsmOperands[i].Class < *AsmOperands[i].Class)
+          return false;
+
+    // Otherwise, this operand could commute if all operands are equivalent, or
+    // there is a pair of operands that compare less than and a pair that
+    // compare greater than.
+    bool HasLT = false, HasGT = false;
+    for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
+      if (*AsmOperands[i].Class < *RHS.AsmOperands[i].Class)
+        HasLT = true;
+      if (*RHS.AsmOperands[i].Class < *AsmOperands[i].Class)
+        HasGT = true;
+    }
+
+    return !(HasLT ^ HasGT);
+  }
+
+  void dump();
+
+private:
+  void TokenizeAsmString(const AsmMatcherInfo &Info);
+};
+
+/// SubtargetFeatureInfo - Helper class for storing information on a subtarget
+/// feature which participates in instruction matching.
+struct SubtargetFeatureInfo {
+  /// \brief The predicate record for this feature.
+  Record *TheDef;
+
+  /// \brief An unique index assigned to represent this feature.
+  unsigned Index;
+
+  SubtargetFeatureInfo(Record *D, unsigned Idx) : TheDef(D), Index(Idx) {}
+
+  /// \brief The name of the enumerated constant identifying this feature.
+  std::string getEnumName() const {
+    return "Feature_" + TheDef->getName();
+  }
+};
+
+struct OperandMatchEntry {
+  unsigned OperandMask;
+  MatchableInfo* MI;
+  ClassInfo *CI;
+
+  static OperandMatchEntry Create(MatchableInfo* mi, ClassInfo *ci,
+                                  unsigned opMask) {
+    OperandMatchEntry X;
+    X.OperandMask = opMask;
+    X.CI = ci;
+    X.MI = mi;
+    return X;
+  }
+};
+
+
+class AsmMatcherInfo {
+public:
+  /// Tracked Records
+  RecordKeeper &Records;
+
+  /// The tablegen AsmParser record.
+  Record *AsmParser;
+
+  /// Target - The target information.
+  CodeGenTarget &Target;
+
+  /// The AsmParser "RegisterPrefix" value.
+  std::string RegisterPrefix;
+
+  /// The classes which are needed for matching.
+  std::vector<ClassInfo*> Classes;
+
+  /// The information on the matchables to match.
+  std::vector<MatchableInfo*> Matchables;
+
+  /// Info for custom matching operands by user defined methods.
+  std::vector<OperandMatchEntry> OperandMatchInfo;
+
+  /// Map of Register records to their class information.
+  std::map<Record*, ClassInfo*> RegisterClasses;
+
+  /// Map of Predicate records to their subtarget information.
+  std::map<Record*, SubtargetFeatureInfo*> SubtargetFeatures;
+
+private:
+  /// Map of token to class information which has already been constructed.
+  std::map<std::string, ClassInfo*> TokenClasses;
+
+  /// Map of RegisterClass records to their class information.
+  std::map<Record*, ClassInfo*> RegisterClassClasses;
+
+  /// Map of AsmOperandClass records to their class information.
+  std::map<Record*, ClassInfo*> AsmOperandClasses;
+
+private:
+  /// getTokenClass - Lookup or create the class for the given token.
+  ClassInfo *getTokenClass(StringRef Token);
+
+  /// getOperandClass - Lookup or create the class for the given operand.
+  ClassInfo *getOperandClass(const CGIOperandList::OperandInfo &OI,
+                             int SubOpIdx = -1);
+
+  /// BuildRegisterClasses - Build the ClassInfo* instances for register
+  /// classes.
+  void BuildRegisterClasses(SmallPtrSet<Record*, 16> &SingletonRegisters);
+
+  /// BuildOperandClasses - Build the ClassInfo* instances for user defined
+  /// operand classes.
+  void BuildOperandClasses();
+
+  void BuildInstructionOperandReference(MatchableInfo *II, StringRef OpName,
+                                        unsigned AsmOpIdx);
+  void BuildAliasOperandReference(MatchableInfo *II, StringRef OpName,
+                                  MatchableInfo::AsmOperand &Op);
+
+public:
+  AsmMatcherInfo(Record *AsmParser,
+                 CodeGenTarget &Target,
+                 RecordKeeper &Records);
+
+  /// BuildInfo - Construct the various tables used during matching.
+  void BuildInfo();
+
+  /// BuildOperandMatchInfo - Build the necessary information to handle user
+  /// defined operand parsing methods.
+  void BuildOperandMatchInfo();
+
+  /// getSubtargetFeature - Lookup or create the subtarget feature info for the
+  /// given operand.
+  SubtargetFeatureInfo *getSubtargetFeature(Record *Def) const {
+    assert(Def->isSubClassOf("Predicate") && "Invalid predicate type!");
+    std::map<Record*, SubtargetFeatureInfo*>::const_iterator I =
+      SubtargetFeatures.find(Def);
+    return I == SubtargetFeatures.end() ? 0 : I->second;
+  }
+
+  RecordKeeper &getRecords() const {
+    return Records;
+  }
+};
+
+}
+
+void MatchableInfo::dump() {
+  errs() << TheDef->getName() << " -- " << "flattened:\"" << AsmString <<"\"\n";
+
+  for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
+    AsmOperand &Op = AsmOperands[i];
+    errs() << "  op[" << i << "] = " << Op.Class->ClassName << " - ";
+    errs() << '\"' << Op.Token << "\"\n";
+  }
+}
+
+void MatchableInfo::Initialize(const AsmMatcherInfo &Info,
+                               SmallPtrSet<Record*, 16> &SingletonRegisters) {
+  // TODO: Eventually support asmparser for Variant != 0.
+  AsmString = CodeGenInstruction::FlattenAsmStringVariants(AsmString, 0);
+
+  TokenizeAsmString(Info);
+
+  // Compute the require features.
+  std::vector<Record*> Predicates =TheDef->getValueAsListOfDefs("Predicates");
+  for (unsigned i = 0, e = Predicates.size(); i != e; ++i)
+    if (SubtargetFeatureInfo *Feature =
+        Info.getSubtargetFeature(Predicates[i]))
+      RequiredFeatures.push_back(Feature);
+
+  // Collect singleton registers, if used.
+  for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
+    if (Record *Reg = getSingletonRegisterForAsmOperand(i, Info))
+      SingletonRegisters.insert(Reg);
+  }
+}
+
+/// TokenizeAsmString - Tokenize a simplified assembly string.
+void MatchableInfo::TokenizeAsmString(const AsmMatcherInfo &Info) {
+  StringRef String = AsmString;
+  unsigned Prev = 0;
+  bool InTok = true;
+  for (unsigned i = 0, e = String.size(); i != e; ++i) {
+    switch (String[i]) {
+    case '[':
+    case ']':
+    case '*':
+    case '!':
+    case ' ':
+    case '\t':
+    case ',':
+      if (InTok) {
+        AsmOperands.push_back(AsmOperand(String.slice(Prev, i)));
+        InTok = false;
+      }
+      if (!isspace(String[i]) && String[i] != ',')
+        AsmOperands.push_back(AsmOperand(String.substr(i, 1)));
+      Prev = i + 1;
+      break;
+
+    case '\\':
+      if (InTok) {
+        AsmOperands.push_back(AsmOperand(String.slice(Prev, i)));
+        InTok = false;
+      }
+      ++i;
+      assert(i != String.size() && "Invalid quoted character");
+      AsmOperands.push_back(AsmOperand(String.substr(i, 1)));
+      Prev = i + 1;
+      break;
+
+    case '$': {
+      if (InTok) {
+        AsmOperands.push_back(AsmOperand(String.slice(Prev, i)));
+        InTok = false;
+      }
+
+      // If this isn't "${", treat like a normal token.
+      if (i + 1 == String.size() || String[i + 1] != '{') {
+        Prev = i;
+        break;
+      }
+
+      StringRef::iterator End = std::find(String.begin() + i, String.end(),'}');
+      assert(End != String.end() && "Missing brace in operand reference!");
+      size_t EndPos = End - String.begin();
+      AsmOperands.push_back(AsmOperand(String.slice(i, EndPos+1)));
+      Prev = EndPos + 1;
+      i = EndPos;
+      break;
+    }
+
+    case '.':
+      if (InTok)
+        AsmOperands.push_back(AsmOperand(String.slice(Prev, i)));
+      Prev = i;
+      InTok = true;
+      break;
+
+    default:
+      InTok = true;
+    }
+  }
+  if (InTok && Prev != String.size())
+    AsmOperands.push_back(AsmOperand(String.substr(Prev)));
+
+  // The first token of the instruction is the mnemonic, which must be a
+  // simple string, not a $foo variable or a singleton register.
+  assert(!AsmOperands.empty() && "Instruction has no tokens?");
+  Mnemonic = AsmOperands[0].Token;
+  if (Mnemonic[0] == '$' || getSingletonRegisterForAsmOperand(0, Info))
+    throw TGError(TheDef->getLoc(),
+                  "Invalid instruction mnemonic '" + Mnemonic.str() + "'!");
+
+  // Remove the first operand, it is tracked in the mnemonic field.
+  AsmOperands.erase(AsmOperands.begin());
+}
+
+bool MatchableInfo::Validate(StringRef CommentDelimiter, bool Hack) const {
+  // Reject matchables with no .s string.
+  if (AsmString.empty())
+    throw TGError(TheDef->getLoc(), "instruction with empty asm string");
+
+  // Reject any matchables with a newline in them, they should be marked
+  // isCodeGenOnly if they are pseudo instructions.
+  if (AsmString.find('\n') != std::string::npos)
+    throw TGError(TheDef->getLoc(),
+                  "multiline instruction is not valid for the asmparser, "
+                  "mark it isCodeGenOnly");
+
+  // Remove comments from the asm string.  We know that the asmstring only
+  // has one line.
+  if (!CommentDelimiter.empty() &&
+      StringRef(AsmString).find(CommentDelimiter) != StringRef::npos)
+    throw TGError(TheDef->getLoc(),
+                  "asmstring for instruction has comment character in it, "
+                  "mark it isCodeGenOnly");
+
+  // Reject matchables with operand modifiers, these aren't something we can
+  // handle, the target should be refactored to use operands instead of
+  // modifiers.
+  //
+  // Also, check for instructions which reference the operand multiple times;
+  // this implies a constraint we would not honor.
+  std::set<std::string> OperandNames;
+  for (unsigned i = 0, e = AsmOperands.size(); i != e; ++i) {
+    StringRef Tok = AsmOperands[i].Token;
+    if (Tok[0] == '$' && Tok.find(':') != StringRef::npos)
+      throw TGError(TheDef->getLoc(),
+                    "matchable with operand modifier '" + Tok.str() +
+                    "' not supported by asm matcher.  Mark isCodeGenOnly!");
+
+    // Verify that any operand is only mentioned once.
+    // We reject aliases and ignore instructions for now.
+    if (Tok[0] == '$' && !OperandNames.insert(Tok).second) {
+      if (!Hack)
+        throw TGError(TheDef->getLoc(),
+                      "ERROR: matchable with tied operand '" + Tok.str() +
+                      "' can never be matched!");
+      // FIXME: Should reject these.  The ARM backend hits this with $lane in a
+      // bunch of instructions.  It is unclear what the right answer is.
+      DEBUG({
+        errs() << "warning: '" << TheDef->getName() << "': "
+               << "ignoring instruction with tied operand '"
+               << Tok.str() << "'\n";
+      });
+      return false;
+    }
+  }
+
+  return true;
+}
+
+/// getSingletonRegisterForAsmOperand - If the specified token is a singleton
+/// register, return the register name, otherwise return a null StringRef.
+Record *MatchableInfo::
+getSingletonRegisterForAsmOperand(unsigned i, const AsmMatcherInfo &Info) const{
+  StringRef Tok = AsmOperands[i].Token;
+  if (!Tok.startswith(Info.RegisterPrefix))
+    return 0;
+
+  StringRef RegName = Tok.substr(Info.RegisterPrefix.size());
+  if (const CodeGenRegister *Reg = Info.Target.getRegisterByName(RegName))
+    return Reg->TheDef;
+
+  // If there is no register prefix (i.e. "%" in "%eax"), then this may
+  // be some random non-register token, just ignore it.
+  if (Info.RegisterPrefix.empty())
+    return 0;
+
+  // Otherwise, we have something invalid prefixed with the register prefix,
+  // such as %foo.
+  std::string Err = "unable to find register for '" + RegName.str() +
+  "' (which matches register prefix)";
+  throw TGError(TheDef->getLoc(), Err);
+}
+
+static std::string getEnumNameForToken(StringRef Str) {
+  std::string Res;
+
+  for (StringRef::iterator it = Str.begin(), ie = Str.end(); it != ie; ++it) {
+    switch (*it) {
+    case '*': Res += "_STAR_"; break;
+    case '%': Res += "_PCT_"; break;
+    case ':': Res += "_COLON_"; break;
+    case '!': Res += "_EXCLAIM_"; break;
+    case '.': Res += "_DOT_"; break;
+    default:
+      if (isalnum(*it))
+        Res += *it;
+      else
+        Res += "_" + utostr((unsigned) *it) + "_";
+    }
+  }
+
+  return Res;
+}
+
+ClassInfo *AsmMatcherInfo::getTokenClass(StringRef Token) {
+  ClassInfo *&Entry = TokenClasses[Token];
+
+  if (!Entry) {
+    Entry = new ClassInfo();
+    Entry->Kind = ClassInfo::Token;
+    Entry->ClassName = "Token";
+    Entry->Name = "MCK_" + getEnumNameForToken(Token);
+    Entry->ValueName = Token;
+    Entry->PredicateMethod = "<invalid>";
+    Entry->RenderMethod = "<invalid>";
+    Entry->ParserMethod = "";
+    Classes.push_back(Entry);
+  }
+
+  return Entry;
+}
+
+ClassInfo *
+AsmMatcherInfo::getOperandClass(const CGIOperandList::OperandInfo &OI,
+                                int SubOpIdx) {
+  Record *Rec = OI.Rec;
+  if (SubOpIdx != -1)
+    Rec = dynamic_cast<DefInit*>(OI.MIOperandInfo->getArg(SubOpIdx))->getDef();
+
+  if (Rec->isSubClassOf("RegisterClass")) {
+    if (ClassInfo *CI = RegisterClassClasses[Rec])
+      return CI;
+    throw TGError(Rec->getLoc(), "register class has no class info!");
+  }
+
+  assert(Rec->isSubClassOf("Operand") && "Unexpected operand!");
+  Record *MatchClass = Rec->getValueAsDef("ParserMatchClass");
+  if (ClassInfo *CI = AsmOperandClasses[MatchClass])
+    return CI;
+
+  throw TGError(Rec->getLoc(), "operand has no match class!");
+}
+
+void AsmMatcherInfo::
+BuildRegisterClasses(SmallPtrSet<Record*, 16> &SingletonRegisters) {
+  const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
+  const std::vector<CodeGenRegisterClass> &RegClassList =
+    Target.getRegisterClasses();
+
+  // The register sets used for matching.
+  std::set< std::set<Record*> > RegisterSets;
+
+  // Gather the defined sets.
+  for (std::vector<CodeGenRegisterClass>::const_iterator it =
+       RegClassList.begin(), ie = RegClassList.end(); it != ie; ++it)
+    RegisterSets.insert(std::set<Record*>(it->Elements.begin(),
+                                          it->Elements.end()));
+
+  // Add any required singleton sets.
+  for (SmallPtrSet<Record*, 16>::iterator it = SingletonRegisters.begin(),
+       ie = SingletonRegisters.end(); it != ie; ++it) {
+    Record *Rec = *it;
+    RegisterSets.insert(std::set<Record*>(&Rec, &Rec + 1));
+  }
+
+  // Introduce derived sets where necessary (when a register does not determine
+  // a unique register set class), and build the mapping of registers to the set
+  // they should classify to.
+  std::map<Record*, std::set<Record*> > RegisterMap;
+  for (std::vector<CodeGenRegister>::const_iterator it = Registers.begin(),
+         ie = Registers.end(); it != ie; ++it) {
+    const CodeGenRegister &CGR = *it;
+    // Compute the intersection of all sets containing this register.
+    std::set<Record*> ContainingSet;
+
+    for (std::set< std::set<Record*> >::iterator it = RegisterSets.begin(),
+           ie = RegisterSets.end(); it != ie; ++it) {
+      if (!it->count(CGR.TheDef))
+        continue;
+
+      if (ContainingSet.empty()) {
+        ContainingSet = *it;
+        continue;
+      }
+
+      std::set<Record*> Tmp;
+      std::swap(Tmp, ContainingSet);
+      std::insert_iterator< std::set<Record*> > II(ContainingSet,
+                                                   ContainingSet.begin());
+      std::set_intersection(Tmp.begin(), Tmp.end(), it->begin(), it->end(), II);
+    }
+
+    if (!ContainingSet.empty()) {
+      RegisterSets.insert(ContainingSet);
+      RegisterMap.insert(std::make_pair(CGR.TheDef, ContainingSet));
+    }
+  }
+
+  // Construct the register classes.
+  std::map<std::set<Record*>, ClassInfo*> RegisterSetClasses;
+  unsigned Index = 0;
+  for (std::set< std::set<Record*> >::iterator it = RegisterSets.begin(),
+         ie = RegisterSets.end(); it != ie; ++it, ++Index) {
+    ClassInfo *CI = new ClassInfo();
+    CI->Kind = ClassInfo::RegisterClass0 + Index;
+    CI->ClassName = "Reg" + utostr(Index);
+    CI->Name = "MCK_Reg" + utostr(Index);
+    CI->ValueName = "";
+    CI->PredicateMethod = ""; // unused
+    CI->RenderMethod = "addRegOperands";
+    CI->Registers = *it;
+    Classes.push_back(CI);
+    RegisterSetClasses.insert(std::make_pair(*it, CI));
+  }
+
+  // Find the superclasses; we could compute only the subgroup lattice edges,
+  // but there isn't really a point.
+  for (std::set< std::set<Record*> >::iterator it = RegisterSets.begin(),
+         ie = RegisterSets.end(); it != ie; ++it) {
+    ClassInfo *CI = RegisterSetClasses[*it];
+    for (std::set< std::set<Record*> >::iterator it2 = RegisterSets.begin(),
+           ie2 = RegisterSets.end(); it2 != ie2; ++it2)
+      if (*it != *it2 &&
+          std::includes(it2->begin(), it2->end(), it->begin(), it->end()))
+        CI->SuperClasses.push_back(RegisterSetClasses[*it2]);
+  }
+
+  // Name the register classes which correspond to a user defined RegisterClass.
+  for (std::vector<CodeGenRegisterClass>::const_iterator
+       it = RegClassList.begin(), ie = RegClassList.end(); it != ie; ++it) {
+    ClassInfo *CI = RegisterSetClasses[std::set<Record*>(it->Elements.begin(),
+                                                         it->Elements.end())];
+    if (CI->ValueName.empty()) {
+      CI->ClassName = it->getName();
+      CI->Name = "MCK_" + it->getName();
+      CI->ValueName = it->getName();
+    } else
+      CI->ValueName = CI->ValueName + "," + it->getName();
+
+    RegisterClassClasses.insert(std::make_pair(it->TheDef, CI));
+  }
+
+  // Populate the map for individual registers.
+  for (std::map<Record*, std::set<Record*> >::iterator it = RegisterMap.begin(),
+         ie = RegisterMap.end(); it != ie; ++it)
+    RegisterClasses[it->first] = RegisterSetClasses[it->second];
+
+  // Name the register classes which correspond to singleton registers.
+  for (SmallPtrSet<Record*, 16>::iterator it = SingletonRegisters.begin(),
+         ie = SingletonRegisters.end(); it != ie; ++it) {
+    Record *Rec = *it;
+    ClassInfo *CI = RegisterClasses[Rec];
+    assert(CI && "Missing singleton register class info!");
+
+    if (CI->ValueName.empty()) {
+      CI->ClassName = Rec->getName();
+      CI->Name = "MCK_" + Rec->getName();
+      CI->ValueName = Rec->getName();
+    } else
+      CI->ValueName = CI->ValueName + "," + Rec->getName();
+  }
+}
+
+void AsmMatcherInfo::BuildOperandClasses() {
+  std::vector<Record*> AsmOperands =
+    Records.getAllDerivedDefinitions("AsmOperandClass");
+
+  // Pre-populate AsmOperandClasses map.
+  for (std::vector<Record*>::iterator it = AsmOperands.begin(),
+         ie = AsmOperands.end(); it != ie; ++it)
+    AsmOperandClasses[*it] = new ClassInfo();
+
+  unsigned Index = 0;
+  for (std::vector<Record*>::iterator it = AsmOperands.begin(),
+         ie = AsmOperands.end(); it != ie; ++it, ++Index) {
+    ClassInfo *CI = AsmOperandClasses[*it];
+    CI->Kind = ClassInfo::UserClass0 + Index;
+
+    ListInit *Supers = (*it)->getValueAsListInit("SuperClasses");
+    for (unsigned i = 0, e = Supers->getSize(); i != e; ++i) {
+      DefInit *DI = dynamic_cast<DefInit*>(Supers->getElement(i));
+      if (!DI) {
+        PrintError((*it)->getLoc(), "Invalid super class reference!");
+        continue;
+      }
+
+      ClassInfo *SC = AsmOperandClasses[DI->getDef()];
+      if (!SC)
+        PrintError((*it)->getLoc(), "Invalid super class reference!");
+      else
+        CI->SuperClasses.push_back(SC);
+    }
+    CI->ClassName = (*it)->getValueAsString("Name");
+    CI->Name = "MCK_" + CI->ClassName;
+    CI->ValueName = (*it)->getName();
+
+    // Get or construct the predicate method name.
+    Init *PMName = (*it)->getValueInit("PredicateMethod");
+    if (StringInit *SI = dynamic_cast<StringInit*>(PMName)) {
+      CI->PredicateMethod = SI->getValue();
+    } else {
+      assert(dynamic_cast<UnsetInit*>(PMName) &&
+             "Unexpected PredicateMethod field!");
+      CI->PredicateMethod = "is" + CI->ClassName;
+    }
+
+    // Get or construct the render method name.
+    Init *RMName = (*it)->getValueInit("RenderMethod");
+    if (StringInit *SI = dynamic_cast<StringInit*>(RMName)) {
+      CI->RenderMethod = SI->getValue();
+    } else {
+      assert(dynamic_cast<UnsetInit*>(RMName) &&
+             "Unexpected RenderMethod field!");
+      CI->RenderMethod = "add" + CI->ClassName + "Operands";
+    }
+
+    // Get the parse method name or leave it as empty.
+    Init *PRMName = (*it)->getValueInit("ParserMethod");
+    if (StringInit *SI = dynamic_cast<StringInit*>(PRMName))
+      CI->ParserMethod = SI->getValue();
+
+    AsmOperandClasses[*it] = CI;
+    Classes.push_back(CI);
+  }
+}
+
+AsmMatcherInfo::AsmMatcherInfo(Record *asmParser,
+                               CodeGenTarget &target,
+                               RecordKeeper &records)
+  : Records(records), AsmParser(asmParser), Target(target),
+    RegisterPrefix(AsmParser->getValueAsString("RegisterPrefix")) {
+}
+
+/// BuildOperandMatchInfo - Build the necessary information to handle user
+/// defined operand parsing methods.
+void AsmMatcherInfo::BuildOperandMatchInfo() {
+
+  /// Map containing a mask with all operands indicies that can be found for
+  /// that class inside a instruction.
+  std::map<ClassInfo*, unsigned> OpClassMask;
+
+  for (std::vector<MatchableInfo*>::const_iterator it =
+       Matchables.begin(), ie = Matchables.end();
+       it != ie; ++it) {
+    MatchableInfo &II = **it;
+    OpClassMask.clear();
+
+    // Keep track of all operands of this instructions which belong to the
+    // same class.
+    for (unsigned i = 0, e = II.AsmOperands.size(); i != e; ++i) {
+      MatchableInfo::AsmOperand &Op = II.AsmOperands[i];
+      if (Op.Class->ParserMethod.empty())
+        continue;
+      unsigned &OperandMask = OpClassMask[Op.Class];
+      OperandMask |= (1 << i);
+    }
+
+    // Generate operand match info for each mnemonic/operand class pair.
+    for (std::map<ClassInfo*, unsigned>::iterator iit = OpClassMask.begin(),
+         iie = OpClassMask.end(); iit != iie; ++iit) {
+      unsigned OpMask = iit->second;
+      ClassInfo *CI = iit->first;
+      OperandMatchInfo.push_back(OperandMatchEntry::Create(&II, CI, OpMask));
+    }
+  }
+}
+
+void AsmMatcherInfo::BuildInfo() {
+  // Build information about all of the AssemblerPredicates.
+  std::vector<Record*> AllPredicates =
+    Records.getAllDerivedDefinitions("Predicate");
+  for (unsigned i = 0, e = AllPredicates.size(); i != e; ++i) {
+    Record *Pred = AllPredicates[i];
+    // Ignore predicates that are not intended for the assembler.
+    if (!Pred->getValueAsBit("AssemblerMatcherPredicate"))
+      continue;
+
+    if (Pred->getName().empty())
+      throw TGError(Pred->getLoc(), "Predicate has no name!");
+
+    unsigned FeatureNo = SubtargetFeatures.size();
+    SubtargetFeatures[Pred] = new SubtargetFeatureInfo(Pred, FeatureNo);
+    assert(FeatureNo < 32 && "Too many subtarget features!");
+  }
+
+  StringRef CommentDelimiter = AsmParser->getValueAsString("CommentDelimiter");
+
+  // Parse the instructions; we need to do this first so that we can gather the
+  // singleton register classes.
+  SmallPtrSet<Record*, 16> SingletonRegisters;
+  for (CodeGenTarget::inst_iterator I = Target.inst_begin(),
+       E = Target.inst_end(); I != E; ++I) {
+    const CodeGenInstruction &CGI = **I;
+
+    // If the tblgen -match-prefix option is specified (for tblgen hackers),
+    // filter the set of instructions we consider.
+    if (!StringRef(CGI.TheDef->getName()).startswith(MatchPrefix))
+      continue;
+
+    // Ignore "codegen only" instructions.
+    if (CGI.TheDef->getValueAsBit("isCodeGenOnly"))
+      continue;
+
+    // Validate the operand list to ensure we can handle this instruction.
+    for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) {
+      const CGIOperandList::OperandInfo &OI = CGI.Operands[i];
+
+      // Validate tied operands.
+      if (OI.getTiedRegister() != -1) {
+        // If we have a tied operand that consists of multiple MCOperands,
+        // reject it.  We reject aliases and ignore instructions for now.
+        if (OI.MINumOperands != 1) {
+          // FIXME: Should reject these.  The ARM backend hits this with $lane
+          // in a bunch of instructions. It is unclear what the right answer is.
+          DEBUG({
+            errs() << "warning: '" << CGI.TheDef->getName() << "': "
+            << "ignoring instruction with multi-operand tied operand '"
+            << OI.Name << "'\n";
+          });
+          continue;
+        }
+      }
+    }
+
+    OwningPtr<MatchableInfo> II(new MatchableInfo(CGI));
+
+    II->Initialize(*this, SingletonRegisters);
+
+    // Ignore instructions which shouldn't be matched and diagnose invalid
+    // instruction definitions with an error.
+    if (!II->Validate(CommentDelimiter, true))
+      continue;
+
+    // Ignore "Int_*" and "*_Int" instructions, which are internal aliases.
+    //
+    // FIXME: This is a total hack.
+    if (StringRef(II->TheDef->getName()).startswith("Int_") ||
+        StringRef(II->TheDef->getName()).endswith("_Int"))
+      continue;
+
+     Matchables.push_back(II.take());
+  }
+
+  // Parse all of the InstAlias definitions and stick them in the list of
+  // matchables.
+  std::vector<Record*> AllInstAliases =
+    Records.getAllDerivedDefinitions("InstAlias");
+  for (unsigned i = 0, e = AllInstAliases.size(); i != e; ++i) {
+    CodeGenInstAlias *Alias = new CodeGenInstAlias(AllInstAliases[i], Target);
+
+    // If the tblgen -match-prefix option is specified (for tblgen hackers),
+    // filter the set of instruction aliases we consider, based on the target
+    // instruction.
+    if (!StringRef(Alias->ResultInst->TheDef->getName()).startswith(
+          MatchPrefix))
+      continue;
+
+    OwningPtr<MatchableInfo> II(new MatchableInfo(Alias));
+
+    II->Initialize(*this, SingletonRegisters);
+
+    // Validate the alias definitions.
+    II->Validate(CommentDelimiter, false);
+
+    Matchables.push_back(II.take());
+  }
+
+  // Build info for the register classes.
+  BuildRegisterClasses(SingletonRegisters);
+
+  // Build info for the user defined assembly operand classes.
+  BuildOperandClasses();
+
+  // Build the information about matchables, now that we have fully formed
+  // classes.
+  for (std::vector<MatchableInfo*>::iterator it = Matchables.begin(),
+         ie = Matchables.end(); it != ie; ++it) {
+    MatchableInfo *II = *it;
+
+    // Parse the tokens after the mnemonic.
+    // Note: BuildInstructionOperandReference may insert new AsmOperands, so
+    // don't precompute the loop bound.
+    for (unsigned i = 0; i != II->AsmOperands.size(); ++i) {
+      MatchableInfo::AsmOperand &Op = II->AsmOperands[i];
+      StringRef Token = Op.Token;
+
+      // Check for singleton registers.
+      if (Record *RegRecord = II->getSingletonRegisterForAsmOperand(i, *this)) {
+        Op.Class = RegisterClasses[RegRecord];
+        assert(Op.Class && Op.Class->Registers.size() == 1 &&
+               "Unexpected class for singleton register");
+        continue;
+      }
+
+      // Check for simple tokens.
+      if (Token[0] != '$') {
+        Op.Class = getTokenClass(Token);
+        continue;
+      }
+
+      if (Token.size() > 1 && isdigit(Token[1])) {
+        Op.Class = getTokenClass(Token);
+        continue;
+      }
+
+      // Otherwise this is an operand reference.
+      StringRef OperandName;
+      if (Token[1] == '{')
+        OperandName = Token.substr(2, Token.size() - 3);
+      else
+        OperandName = Token.substr(1);
+
+      if (II->DefRec.is<const CodeGenInstruction*>())
+        BuildInstructionOperandReference(II, OperandName, i);
+      else
+        BuildAliasOperandReference(II, OperandName, Op);
+    }
+
+    if (II->DefRec.is<const CodeGenInstruction*>())
+      II->BuildInstructionResultOperands();
+    else
+      II->BuildAliasResultOperands();
+  }
+
+  // Reorder classes so that classes preceed super classes.
+  std::sort(Classes.begin(), Classes.end(), less_ptr<ClassInfo>());
+}
+
+/// BuildInstructionOperandReference - The specified operand is a reference to a
+/// named operand such as $src.  Resolve the Class and OperandInfo pointers.
+void AsmMatcherInfo::
+BuildInstructionOperandReference(MatchableInfo *II,
+                                 StringRef OperandName,
+                                 unsigned AsmOpIdx) {
+  const CodeGenInstruction &CGI = *II->DefRec.get<const CodeGenInstruction*>();
+  const CGIOperandList &Operands = CGI.Operands;
+  MatchableInfo::AsmOperand *Op = &II->AsmOperands[AsmOpIdx];
+
+  // Map this token to an operand.
+  unsigned Idx;
+  if (!Operands.hasOperandNamed(OperandName, Idx))
+    throw TGError(II->TheDef->getLoc(), "error: unable to find operand: '" +
+                  OperandName.str() + "'");
+
+  // If the instruction operand has multiple suboperands, but the parser
+  // match class for the asm operand is still the default "ImmAsmOperand",
+  // then handle each suboperand separately.
+  if (Op->SubOpIdx == -1 && Operands[Idx].MINumOperands > 1) {
+    Record *Rec = Operands[Idx].Rec;
+    assert(Rec->isSubClassOf("Operand") && "Unexpected operand!");
+    Record *MatchClass = Rec->getValueAsDef("ParserMatchClass");
+    if (MatchClass && MatchClass->getValueAsString("Name") == "Imm") {
+      // Insert remaining suboperands after AsmOpIdx in II->AsmOperands.
+      StringRef Token = Op->Token; // save this in case Op gets moved
+      for (unsigned SI = 1, SE = Operands[Idx].MINumOperands; SI != SE; ++SI) {
+        MatchableInfo::AsmOperand NewAsmOp(Token);
+        NewAsmOp.SubOpIdx = SI;
+        II->AsmOperands.insert(II->AsmOperands.begin()+AsmOpIdx+SI, NewAsmOp);
+      }
+      // Replace Op with first suboperand.
+      Op = &II->AsmOperands[AsmOpIdx]; // update the pointer in case it moved
+      Op->SubOpIdx = 0;
+    }
+  }
+
+  // Set up the operand class.
+  Op->Class = getOperandClass(Operands[Idx], Op->SubOpIdx);
+
+  // If the named operand is tied, canonicalize it to the untied operand.
+  // For example, something like:
+  //   (outs GPR:$dst), (ins GPR:$src)
+  // with an asmstring of
+  //   "inc $src"
+  // we want to canonicalize to:
+  //   "inc $dst"
+  // so that we know how to provide the $dst operand when filling in the result.
+  int OITied = Operands[Idx].getTiedRegister();
+  if (OITied != -1) {
+    // The tied operand index is an MIOperand index, find the operand that
+    // contains it.
+    std::pair<unsigned, unsigned> Idx = Operands.getSubOperandNumber(OITied);
+    OperandName = Operands[Idx.first].Name;
+    Op->SubOpIdx = Idx.second;
+  }
+
+  Op->SrcOpName = OperandName;
+}
+
+/// BuildAliasOperandReference - When parsing an operand reference out of the
+/// matching string (e.g. "movsx $src, $dst"), determine what the class of the
+/// operand reference is by looking it up in the result pattern definition.
+void AsmMatcherInfo::BuildAliasOperandReference(MatchableInfo *II,
+                                                StringRef OperandName,
+                                                MatchableInfo::AsmOperand &Op) {
+  const CodeGenInstAlias &CGA = *II->DefRec.get<const CodeGenInstAlias*>();
+
+  // Set up the operand class.
+  for (unsigned i = 0, e = CGA.ResultOperands.size(); i != e; ++i)
+    if (CGA.ResultOperands[i].isRecord() &&
+        CGA.ResultOperands[i].getName() == OperandName) {
+      // It's safe to go with the first one we find, because CodeGenInstAlias
+      // validates that all operands with the same name have the same record.
+      unsigned ResultIdx = CGA.ResultInstOperandIndex[i].first;
+      Op.SubOpIdx = CGA.ResultInstOperandIndex[i].second;
+      Op.Class = getOperandClass(CGA.ResultInst->Operands[ResultIdx],
+                                 Op.SubOpIdx);
+      Op.SrcOpName = OperandName;
+      return;
+    }
+
+  throw TGError(II->TheDef->getLoc(), "error: unable to find operand: '" +
+                OperandName.str() + "'");
+}
+
+void MatchableInfo::BuildInstructionResultOperands() {
+  const CodeGenInstruction *ResultInst = getResultInst();
+
+  // Loop over all operands of the result instruction, determining how to
+  // populate them.
+  for (unsigned i = 0, e = ResultInst->Operands.size(); i != e; ++i) {
+    const CGIOperandList::OperandInfo &OpInfo = ResultInst->Operands[i];
+
+    // If this is a tied operand, just copy from the previously handled operand.
+    int TiedOp = OpInfo.getTiedRegister();
+    if (TiedOp != -1) {
+      ResOperands.push_back(ResOperand::getTiedOp(TiedOp));
+      continue;
+    }
+
+    // Find out what operand from the asmparser this MCInst operand comes from.
+    int SrcOperand = FindAsmOperandNamed(OpInfo.Name);
+    if (OpInfo.Name.empty() || SrcOperand == -1)
+      throw TGError(TheDef->getLoc(), "Instruction '" +
+                    TheDef->getName() + "' has operand '" + OpInfo.Name +
+                    "' that doesn't appear in asm string!");
+
+    // Check if the one AsmOperand populates the entire operand.
+    unsigned NumOperands = OpInfo.MINumOperands;
+    if (AsmOperands[SrcOperand].SubOpIdx == -1) {
+      ResOperands.push_back(ResOperand::getRenderedOp(SrcOperand, NumOperands));
+      continue;
+    }
+
+    // Add a separate ResOperand for each suboperand.
+    for (unsigned AI = 0; AI < NumOperands; ++AI) {
+      assert(AsmOperands[SrcOperand+AI].SubOpIdx == (int)AI &&
+             AsmOperands[SrcOperand+AI].SrcOpName == OpInfo.Name &&
+             "unexpected AsmOperands for suboperands");
+      ResOperands.push_back(ResOperand::getRenderedOp(SrcOperand + AI, 1));
+    }
+  }
+}
+
+void MatchableInfo::BuildAliasResultOperands() {
+  const CodeGenInstAlias &CGA = *DefRec.get<const CodeGenInstAlias*>();
+  const CodeGenInstruction *ResultInst = getResultInst();
+
+  // Loop over all operands of the result instruction, determining how to
+  // populate them.
+  unsigned AliasOpNo = 0;
+  unsigned LastOpNo = CGA.ResultInstOperandIndex.size();
+  for (unsigned i = 0, e = ResultInst->Operands.size(); i != e; ++i) {
+    const CGIOperandList::OperandInfo *OpInfo = &ResultInst->Operands[i];
+
+    // If this is a tied operand, just copy from the previously handled operand.
+    int TiedOp = OpInfo->getTiedRegister();
+    if (TiedOp != -1) {
+      ResOperands.push_back(ResOperand::getTiedOp(TiedOp));
+      continue;
+    }
+
+    // Handle all the suboperands for this operand.
+    const std::string &OpName = OpInfo->Name;
+    for ( ; AliasOpNo <  LastOpNo &&
+            CGA.ResultInstOperandIndex[AliasOpNo].first == i; ++AliasOpNo) {
+      int SubIdx = CGA.ResultInstOperandIndex[AliasOpNo].second;
+
+      // Find out what operand from the asmparser that this MCInst operand
+      // comes from.
+      switch (CGA.ResultOperands[AliasOpNo].Kind) {
+      default: assert(0 && "unexpected InstAlias operand kind");
+      case CodeGenInstAlias::ResultOperand::K_Record: {
+        StringRef Name = CGA.ResultOperands[AliasOpNo].getName();
+        int SrcOperand = FindAsmOperand(Name, SubIdx);
+        if (SrcOperand == -1)
+          throw TGError(TheDef->getLoc(), "Instruction '" +
+                        TheDef->getName() + "' has operand '" + OpName +
+                        "' that doesn't appear in asm string!");
+        unsigned NumOperands = (SubIdx == -1 ? OpInfo->MINumOperands : 1);
+        ResOperands.push_back(ResOperand::getRenderedOp(SrcOperand,
+                                                        NumOperands));
+        break;
+      }
+      case CodeGenInstAlias::ResultOperand::K_Imm: {
+        int64_t ImmVal = CGA.ResultOperands[AliasOpNo].getImm();
+        ResOperands.push_back(ResOperand::getImmOp(ImmVal));
+        break;
+      }
+      case CodeGenInstAlias::ResultOperand::K_Reg: {
+        Record *Reg = CGA.ResultOperands[AliasOpNo].getRegister();
+        ResOperands.push_back(ResOperand::getRegOp(Reg));
+        break;
+      }
+      }
+    }
+  }
+}
+
+static void EmitConvertToMCInst(CodeGenTarget &Target, StringRef ClassName,
+                                std::vector<MatchableInfo*> &Infos,
+                                raw_ostream &OS) {
+  // Write the convert function to a separate stream, so we can drop it after
+  // the enum.
+  std::string ConvertFnBody;
+  raw_string_ostream CvtOS(ConvertFnBody);
+
+  // Function we have already generated.
+  std::set<std::string> GeneratedFns;
+
+  // Start the unified conversion function.
+  CvtOS << "bool " << Target.getName() << ClassName << "::\n";
+  CvtOS << "ConvertToMCInst(unsigned Kind, MCInst &Inst, "
+        << "unsigned Opcode,\n"
+        << "                      const SmallVectorImpl<MCParsedAsmOperand*"
+        << "> &Operands) {\n";
+  CvtOS << "  Inst.setOpcode(Opcode);\n";
+  CvtOS << "  switch (Kind) {\n";
+  CvtOS << "  default:\n";
+
+  // Start the enum, which we will generate inline.
+
+  OS << "// Unified function for converting operands to MCInst instances.\n\n";
+  OS << "enum ConversionKind {\n";
+
+  // TargetOperandClass - This is the target's operand class, like X86Operand.
+  std::string TargetOperandClass = Target.getName() + "Operand";
+
+  for (std::vector<MatchableInfo*>::const_iterator it = Infos.begin(),
+         ie = Infos.end(); it != ie; ++it) {
+    MatchableInfo &II = **it;
+
+    // Check if we have a custom match function.
+    StringRef AsmMatchConverter = II.getResultInst()->TheDef->getValueAsString(
+      "AsmMatchConverter");
+    if (!AsmMatchConverter.empty()) {
+      std::string Signature = "ConvertCustom_" + AsmMatchConverter.str();
+      II.ConversionFnKind = Signature;
+
+      // Check if we have already generated this signature.
+      if (!GeneratedFns.insert(Signature).second)
+        continue;
+
+      // If not, emit it now.  Add to the enum list.
+      OS << "  " << Signature << ",\n";
+
+      CvtOS << "  case " << Signature << ":\n";
+      CvtOS << "    return " << AsmMatchConverter
+            << "(Inst, Opcode, Operands);\n";
+      continue;
+    }
+
+    // Build the conversion function signature.
+    std::string Signature = "Convert";
+    std::string CaseBody;
+    raw_string_ostream CaseOS(CaseBody);
+
+    // Compute the convert enum and the case body.
+    for (unsigned i = 0, e = II.ResOperands.size(); i != e; ++i) {
+      const MatchableInfo::ResOperand &OpInfo = II.ResOperands[i];
+
+      // Generate code to populate each result operand.
+      switch (OpInfo.Kind) {
+      case MatchableInfo::ResOperand::RenderAsmOperand: {
+        // This comes from something we parsed.
+        MatchableInfo::AsmOperand &Op = II.AsmOperands[OpInfo.AsmOperandNum];
+
+        // Registers are always converted the same, don't duplicate the
+        // conversion function based on them.
+        Signature += "__";
+        if (Op.Class->isRegisterClass())
+          Signature += "Reg";
+        else
+          Signature += Op.Class->ClassName;
+        Signature += utostr(OpInfo.MINumOperands);
+        Signature += "_" + itostr(OpInfo.AsmOperandNum);
+
+        CaseOS << "    ((" << TargetOperandClass << "*)Operands["
+               << (OpInfo.AsmOperandNum+1) << "])->" << Op.Class->RenderMethod
+               << "(Inst, " << OpInfo.MINumOperands << ");\n";
+        break;
+      }
+
+      case MatchableInfo::ResOperand::TiedOperand: {
+        // If this operand is tied to a previous one, just copy the MCInst
+        // operand from the earlier one.We can only tie single MCOperand values.
+        //assert(OpInfo.MINumOperands == 1 && "Not a singular MCOperand");
+        unsigned TiedOp = OpInfo.TiedOperandNum;
+        assert(i > TiedOp && "Tied operand preceeds its target!");
+        CaseOS << "    Inst.addOperand(Inst.getOperand(" << TiedOp << "));\n";
+        Signature += "__Tie" + utostr(TiedOp);
+        break;
+      }
+      case MatchableInfo::ResOperand::ImmOperand: {
+        int64_t Val = OpInfo.ImmVal;
+        CaseOS << "    Inst.addOperand(MCOperand::CreateImm(" << Val << "));\n";
+        Signature += "__imm" + itostr(Val);
+        break;
+      }
+      case MatchableInfo::ResOperand::RegOperand: {
+        if (OpInfo.Register == 0) {
+          CaseOS << "    Inst.addOperand(MCOperand::CreateReg(0));\n";
+          Signature += "__reg0";
+        } else {
+          std::string N = getQualifiedName(OpInfo.Register);
+          CaseOS << "    Inst.addOperand(MCOperand::CreateReg(" << N << "));\n";
+          Signature += "__reg" + OpInfo.Register->getName();
+        }
+      }
+      }
+    }
+
+    II.ConversionFnKind = Signature;
+
+    // Check if we have already generated this signature.
+    if (!GeneratedFns.insert(Signature).second)
+      continue;
+
+    // If not, emit it now.  Add to the enum list.
+    OS << "  " << Signature << ",\n";
+
+    CvtOS << "  case " << Signature << ":\n";
+    CvtOS << CaseOS.str();
+    CvtOS << "    return true;\n";
+  }
+
+  // Finish the convert function.
+
+  CvtOS << "  }\n";
+  CvtOS << "  return false;\n";
+  CvtOS << "}\n\n";
+
+  // Finish the enum, and drop the convert function after it.
+
+  OS << "  NumConversionVariants\n";
+  OS << "};\n\n";
+
+  OS << CvtOS.str();
+}
+
+/// EmitMatchClassEnumeration - Emit the enumeration for match class kinds.
+static void EmitMatchClassEnumeration(CodeGenTarget &Target,
+                                      std::vector<ClassInfo*> &Infos,
+                                      raw_ostream &OS) {
+  OS << "namespace {\n\n";
+
+  OS << "/// MatchClassKind - The kinds of classes which participate in\n"
+     << "/// instruction matching.\n";
+  OS << "enum MatchClassKind {\n";
+  OS << "  InvalidMatchClass = 0,\n";
+  for (std::vector<ClassInfo*>::iterator it = Infos.begin(),
+         ie = Infos.end(); it != ie; ++it) {
+    ClassInfo &CI = **it;
+    OS << "  " << CI.Name << ", // ";
+    if (CI.Kind == ClassInfo::Token) {
+      OS << "'" << CI.ValueName << "'\n";
+    } else if (CI.isRegisterClass()) {
+      if (!CI.ValueName.empty())
+        OS << "register class '" << CI.ValueName << "'\n";
+      else
+        OS << "derived register class\n";
+    } else {
+      OS << "user defined class '" << CI.ValueName << "'\n";
+    }
+  }
+  OS << "  NumMatchClassKinds\n";
+  OS << "};\n\n";
+
+  OS << "}\n\n";
+}
+
+/// EmitValidateOperandClass - Emit the function to validate an operand class.
+static void EmitValidateOperandClass(AsmMatcherInfo &Info,
+                                     raw_ostream &OS) {
+  OS << "static bool ValidateOperandClass(MCParsedAsmOperand *GOp, "
+     << "MatchClassKind Kind) {\n";
+  OS << "  " << Info.Target.getName() << "Operand &Operand = *("
+     << Info.Target.getName() << "Operand*)GOp;\n";
+
+  // Check for Token operands first.
+  OS << "  if (Operand.isToken())\n";
+  OS << "    return MatchTokenString(Operand.getToken()) == Kind;\n\n";
+
+  // Check for register operands, including sub-classes.
+  OS << "  if (Operand.isReg()) {\n";
+  OS << "    MatchClassKind OpKind;\n";
+  OS << "    switch (Operand.getReg()) {\n";
+  OS << "    default: OpKind = InvalidMatchClass; break;\n";
+  for (std::map<Record*, ClassInfo*>::iterator
+         it = Info.RegisterClasses.begin(), ie = Info.RegisterClasses.end();
+       it != ie; ++it)
+    OS << "    case " << Info.Target.getName() << "::"
+       << it->first->getName() << ": OpKind = " << it->second->Name
+       << "; break;\n";
+  OS << "    }\n";
+  OS << "    return IsSubclass(OpKind, Kind);\n";
+  OS << "  }\n\n";
+
+  // Check the user classes. We don't care what order since we're only
+  // actually matching against one of them.
+  for (std::vector<ClassInfo*>::iterator it = Info.Classes.begin(),
+         ie = Info.Classes.end(); it != ie; ++it) {
+    ClassInfo &CI = **it;
+
+    if (!CI.isUserClass())
+      continue;
+
+    OS << "  // '" << CI.ClassName << "' class\n";
+    OS << "  if (Kind == " << CI.Name
+       << " && Operand." << CI.PredicateMethod << "()) {\n";
+    OS << "    return true;\n";
+    OS << "  }\n\n";
+  }
+
+  OS << "  return false;\n";
+  OS << "}\n\n";
+}
+
+/// EmitIsSubclass - Emit the subclass predicate function.
+static void EmitIsSubclass(CodeGenTarget &Target,
+                           std::vector<ClassInfo*> &Infos,
+                           raw_ostream &OS) {
+  OS << "/// IsSubclass - Compute whether \\arg A is a subclass of \\arg B.\n";
+  OS << "static bool IsSubclass(MatchClassKind A, MatchClassKind B) {\n";
+  OS << "  if (A == B)\n";
+  OS << "    return true;\n\n";
+
+  OS << "  switch (A) {\n";
+  OS << "  default:\n";
+  OS << "    return false;\n";
+  for (std::vector<ClassInfo*>::iterator it = Infos.begin(),
+         ie = Infos.end(); it != ie; ++it) {
+    ClassInfo &A = **it;
+
+    if (A.Kind != ClassInfo::Token) {
+      std::vector<StringRef> SuperClasses;
+      for (std::vector<ClassInfo*>::iterator it = Infos.begin(),
+             ie = Infos.end(); it != ie; ++it) {
+        ClassInfo &B = **it;
+
+        if (&A != &B && A.isSubsetOf(B))
+          SuperClasses.push_back(B.Name);
+      }
+
+      if (SuperClasses.empty())
+        continue;
+
+      OS << "\n  case " << A.Name << ":\n";
+
+      if (SuperClasses.size() == 1) {
+        OS << "    return B == " << SuperClasses.back() << ";\n";
+        continue;
+      }
+
+      OS << "    switch (B) {\n";
+      OS << "    default: return false;\n";
+      for (unsigned i = 0, e = SuperClasses.size(); i != e; ++i)
+        OS << "    case " << SuperClasses[i] << ": return true;\n";
+      OS << "    }\n";
+    }
+  }
+  OS << "  }\n";
+  OS << "}\n\n";
+}
+
+/// EmitMatchTokenString - Emit the function to match a token string to the
+/// appropriate match class value.
+static void EmitMatchTokenString(CodeGenTarget &Target,
+                                 std::vector<ClassInfo*> &Infos,
+                                 raw_ostream &OS) {
+  // Construct the match list.
+  std::vector<StringMatcher::StringPair> Matches;
+  for (std::vector<ClassInfo*>::iterator it = Infos.begin(),
+         ie = Infos.end(); it != ie; ++it) {
+    ClassInfo &CI = **it;
+
+    if (CI.Kind == ClassInfo::Token)
+      Matches.push_back(StringMatcher::StringPair(CI.ValueName,
+                                                  "return " + CI.Name + ";"));
+  }
+
+  OS << "static MatchClassKind MatchTokenString(StringRef Name) {\n";
+
+  StringMatcher("Name", Matches, OS).Emit();
+
+  OS << "  return InvalidMatchClass;\n";
+  OS << "}\n\n";
+}
+
+/// EmitMatchRegisterName - Emit the function to match a string to the target
+/// specific register enum.
+static void EmitMatchRegisterName(CodeGenTarget &Target, Record *AsmParser,
+                                  raw_ostream &OS) {
+  // Construct the match list.
+  std::vector<StringMatcher::StringPair> Matches;
+  for (unsigned i = 0, e = Target.getRegisters().size(); i != e; ++i) {
+    const CodeGenRegister &Reg = Target.getRegisters()[i];
+    if (Reg.TheDef->getValueAsString("AsmName").empty())
+      continue;
+
+    Matches.push_back(StringMatcher::StringPair(
+                                        Reg.TheDef->getValueAsString("AsmName"),
+                                        "return " + utostr(i + 1) + ";"));
+  }
+
+  OS << "static unsigned MatchRegisterName(StringRef Name) {\n";
+
+  StringMatcher("Name", Matches, OS).Emit();
+
+  OS << "  return 0;\n";
+  OS << "}\n\n";
+}
+
+/// EmitSubtargetFeatureFlagEnumeration - Emit the subtarget feature flag
+/// definitions.
+static void EmitSubtargetFeatureFlagEnumeration(AsmMatcherInfo &Info,
+                                                raw_ostream &OS) {
+  OS << "// Flags for subtarget features that participate in "
+     << "instruction matching.\n";
+  OS << "enum SubtargetFeatureFlag {\n";
+  for (std::map<Record*, SubtargetFeatureInfo*>::const_iterator
+         it = Info.SubtargetFeatures.begin(),
+         ie = Info.SubtargetFeatures.end(); it != ie; ++it) {
+    SubtargetFeatureInfo &SFI = *it->second;
+    OS << "  " << SFI.getEnumName() << " = (1 << " << SFI.Index << "),\n";
+  }
+  OS << "  Feature_None = 0\n";
+  OS << "};\n\n";
+}
+
+/// EmitComputeAvailableFeatures - Emit the function to compute the list of
+/// available features given a subtarget.
+static void EmitComputeAvailableFeatures(AsmMatcherInfo &Info,
+                                         raw_ostream &OS) {
+  std::string ClassName =
+    Info.AsmParser->getValueAsString("AsmParserClassName");
+
+  OS << "unsigned " << Info.Target.getName() << ClassName << "::\n"
+     << "ComputeAvailableFeatures(const " << Info.Target.getName()
+     << "Subtarget *Subtarget) const {\n";
+  OS << "  unsigned Features = 0;\n";
+  for (std::map<Record*, SubtargetFeatureInfo*>::const_iterator
+         it = Info.SubtargetFeatures.begin(),
+         ie = Info.SubtargetFeatures.end(); it != ie; ++it) {
+    SubtargetFeatureInfo &SFI = *it->second;
+    OS << "  if (" << SFI.TheDef->getValueAsString("CondString")
+       << ")\n";
+    OS << "    Features |= " << SFI.getEnumName() << ";\n";
+  }
+  OS << "  return Features;\n";
+  OS << "}\n\n";
+}
+
+static std::string GetAliasRequiredFeatures(Record *R,
+                                            const AsmMatcherInfo &Info) {
+  std::vector<Record*> ReqFeatures = R->getValueAsListOfDefs("Predicates");
+  std::string Result;
+  unsigned NumFeatures = 0;
+  for (unsigned i = 0, e = ReqFeatures.size(); i != e; ++i) {
+    SubtargetFeatureInfo *F = Info.getSubtargetFeature(ReqFeatures[i]);
+
+    if (F == 0)
+      throw TGError(R->getLoc(), "Predicate '" + ReqFeatures[i]->getName() +
+                    "' is not marked as an AssemblerPredicate!");
+
+    if (NumFeatures)
+      Result += '|';
+
+    Result += F->getEnumName();
+    ++NumFeatures;
+  }
+
+  if (NumFeatures > 1)
+    Result = '(' + Result + ')';
+  return Result;
+}
+
+/// EmitMnemonicAliases - If the target has any MnemonicAlias<> definitions,
+/// emit a function for them and return true, otherwise return false.
+static bool EmitMnemonicAliases(raw_ostream &OS, const AsmMatcherInfo &Info) {
+  // Ignore aliases when match-prefix is set.
+  if (!MatchPrefix.empty())
+    return false;
+
+  std::vector<Record*> Aliases =
+    Info.getRecords().getAllDerivedDefinitions("MnemonicAlias");
+  if (Aliases.empty()) return false;
+
+  OS << "static void ApplyMnemonicAliases(StringRef &Mnemonic, "
+        "unsigned Features) {\n";
+
+  // Keep track of all the aliases from a mnemonic.  Use an std::map so that the
+  // iteration order of the map is stable.
+  std::map<std::string, std::vector<Record*> > AliasesFromMnemonic;
+
+  for (unsigned i = 0, e = Aliases.size(); i != e; ++i) {
+    Record *R = Aliases[i];
+    AliasesFromMnemonic[R->getValueAsString("FromMnemonic")].push_back(R);
+  }
+
+  // Process each alias a "from" mnemonic at a time, building the code executed
+  // by the string remapper.
+  std::vector<StringMatcher::StringPair> Cases;
+  for (std::map<std::string, std::vector<Record*> >::iterator
+       I = AliasesFromMnemonic.begin(), E = AliasesFromMnemonic.end();
+       I != E; ++I) {
+    const std::vector<Record*> &ToVec = I->second;
+
+    // Loop through each alias and emit code that handles each case.  If there
+    // are two instructions without predicates, emit an error.  If there is one,
+    // emit it last.
+    std::string MatchCode;
+    int AliasWithNoPredicate = -1;
+
+    for (unsigned i = 0, e = ToVec.size(); i != e; ++i) {
+      Record *R = ToVec[i];
+      std::string FeatureMask = GetAliasRequiredFeatures(R, Info);
+
+      // If this unconditionally matches, remember it for later and diagnose
+      // duplicates.
+      if (FeatureMask.empty()) {
+        if (AliasWithNoPredicate != -1) {
+          // We can't have two aliases from the same mnemonic with no predicate.
+          PrintError(ToVec[AliasWithNoPredicate]->getLoc(),
+                     "two MnemonicAliases with the same 'from' mnemonic!");
+          throw TGError(R->getLoc(), "this is the other MnemonicAlias.");
+        }
+
+        AliasWithNoPredicate = i;
+        continue;
+      }
+      if (R->getValueAsString("ToMnemonic") == I->first)
+        throw TGError(R->getLoc(), "MnemonicAlias to the same string");
+
+      if (!MatchCode.empty())
+        MatchCode += "else ";
+      MatchCode += "if ((Features & " + FeatureMask + ") == "+FeatureMask+")\n";
+      MatchCode += "  Mnemonic = \"" +R->getValueAsString("ToMnemonic")+"\";\n";
+    }
+
+    if (AliasWithNoPredicate != -1) {
+      Record *R = ToVec[AliasWithNoPredicate];
+      if (!MatchCode.empty())
+        MatchCode += "else\n  ";
+      MatchCode += "Mnemonic = \"" + R->getValueAsString("ToMnemonic")+"\";\n";
+    }
+
+    MatchCode += "return;";
+
+    Cases.push_back(std::make_pair(I->first, MatchCode));
+  }
+
+  StringMatcher("Mnemonic", Cases, OS).Emit();
+  OS << "}\n\n";
+
+  return true;
+}
+
+static void EmitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
+                              const AsmMatcherInfo &Info, StringRef ClassName) {
+  // Emit the static custom operand parsing table;
+  OS << "namespace {\n";
+  OS << "  struct OperandMatchEntry {\n";
+  OS << "    const char *Mnemonic;\n";
+  OS << "    unsigned OperandMask;\n";
+  OS << "    MatchClassKind Class;\n";
+  OS << "    unsigned RequiredFeatures;\n";
+  OS << "  };\n\n";
+
+  OS << "  // Predicate for searching for an opcode.\n";
+  OS << "  struct LessOpcodeOperand {\n";
+  OS << "    bool operator()(const OperandMatchEntry &LHS, StringRef RHS) {\n";
+  OS << "      return StringRef(LHS.Mnemonic) < RHS;\n";
+  OS << "    }\n";
+  OS << "    bool operator()(StringRef LHS, const OperandMatchEntry &RHS) {\n";
+  OS << "      return LHS < StringRef(RHS.Mnemonic);\n";
+  OS << "    }\n";
+  OS << "    bool operator()(const OperandMatchEntry &LHS,";
+  OS << " const OperandMatchEntry &RHS) {\n";
+  OS << "      return StringRef(LHS.Mnemonic) < StringRef(RHS.Mnemonic);\n";
+  OS << "    }\n";
+  OS << "  };\n";
+
+  OS << "} // end anonymous namespace.\n\n";
+
+  OS << "static const OperandMatchEntry OperandMatchTable["
+     << Info.OperandMatchInfo.size() << "] = {\n";
+
+  OS << "  /* Mnemonic, Operand List Mask, Operand Class, Features */\n";
+  for (std::vector<OperandMatchEntry>::const_iterator it =
+       Info.OperandMatchInfo.begin(), ie = Info.OperandMatchInfo.end();
+       it != ie; ++it) {
+    const OperandMatchEntry &OMI = *it;
+    const MatchableInfo &II = *OMI.MI;
+
+    OS << "  { \"" << II.Mnemonic << "\""
+       << ", " << OMI.OperandMask;
+
+    OS << " /* ";
+    bool printComma = false;
+    for (int i = 0, e = 31; i !=e; ++i)
+      if (OMI.OperandMask & (1 << i)) {
+        if (printComma)
+          OS << ", ";
+        OS << i;
+        printComma = true;
+      }
+    OS << " */";
+
+    OS << ", " << OMI.CI->Name
+       << ", ";
+
+    // Write the required features mask.
+    if (!II.RequiredFeatures.empty()) {
+      for (unsigned i = 0, e = II.RequiredFeatures.size(); i != e; ++i) {
+        if (i) OS << "|";
+        OS << II.RequiredFeatures[i]->getEnumName();
+      }
+    } else
+      OS << "0";
+    OS << " },\n";
+  }
+  OS << "};\n\n";
+
+  // Emit the operand class switch to call the correct custom parser for
+  // the found operand class.
+  OS << Target.getName() << ClassName << "::OperandMatchResultTy "
+     << Target.getName() << ClassName << "::\n"
+     << "TryCustomParseOperand(SmallVectorImpl<MCParsedAsmOperand*>"
+     << " &Operands,\n                      unsigned MCK) {\n\n"
+     << "  switch(MCK) {\n";
+
+  for (std::vector<ClassInfo*>::const_iterator it = Info.Classes.begin(),
+       ie = Info.Classes.end(); it != ie; ++it) {
+    ClassInfo *CI = *it;
+    if (CI->ParserMethod.empty())
+      continue;
+    OS << "  case " << CI->Name << ":\n"
+       << "    return " << CI->ParserMethod << "(Operands);\n";
+  }
+
+  OS << "  default:\n";
+  OS << "    return MatchOperand_NoMatch;\n";
+  OS << "  }\n";
+  OS << "  return MatchOperand_NoMatch;\n";
+  OS << "}\n\n";
+
+  // Emit the static custom operand parser. This code is very similar with
+  // the other matcher. Also use MatchResultTy here just in case we go for
+  // a better error handling.
+  OS << Target.getName() << ClassName << "::OperandMatchResultTy "
+     << Target.getName() << ClassName << "::\n"
+     << "MatchOperandParserImpl(SmallVectorImpl<MCParsedAsmOperand*>"
+     << " &Operands,\n                       StringRef Mnemonic) {\n";
+
+  // Emit code to get the available features.
+  OS << "  // Get the current feature set.\n";
+  OS << "  unsigned AvailableFeatures = getAvailableFeatures();\n\n";
+
+  OS << "  // Get the next operand index.\n";
+  OS << "  unsigned NextOpNum = Operands.size()-1;\n";
+
+  // Emit code to search the table.
+  OS << "  // Search the table.\n";
+  OS << "  std::pair<const OperandMatchEntry*, const OperandMatchEntry*>";
+  OS << " MnemonicRange =\n";
+  OS << "    std::equal_range(OperandMatchTable, OperandMatchTable+"
+     << Info.OperandMatchInfo.size() << ", Mnemonic,\n"
+     << "                     LessOpcodeOperand());\n\n";
+
+  OS << "  if (MnemonicRange.first == MnemonicRange.second)\n";
+  OS << "    return MatchOperand_NoMatch;\n\n";
+
+  OS << "  for (const OperandMatchEntry *it = MnemonicRange.first,\n"
+     << "       *ie = MnemonicRange.second; it != ie; ++it) {\n";
+
+  OS << "    // equal_range guarantees that instruction mnemonic matches.\n";
+  OS << "    assert(Mnemonic == it->Mnemonic);\n\n";
+
+  // Emit check that the required features are available.
+  OS << "    // check if the available features match\n";
+  OS << "    if ((AvailableFeatures & it->RequiredFeatures) "
+     << "!= it->RequiredFeatures) {\n";
+  OS << "      continue;\n";
+  OS << "    }\n\n";
+
+  // Emit check to ensure the operand number matches.
+  OS << "    // check if the operand in question has a custom parser.\n";
+  OS << "    if (!(it->OperandMask & (1 << NextOpNum)))\n";
+  OS << "      continue;\n\n";
+
+  // Emit call to the custom parser method
+  OS << "    // call custom parse method to handle the operand\n";
+  OS << "    OperandMatchResultTy Result = ";
+  OS << "TryCustomParseOperand(Operands, it->Class);\n";
+  OS << "    if (Result != MatchOperand_NoMatch)\n";
+  OS << "      return Result;\n";
+  OS << "  }\n\n";
+
+  OS << "  // Okay, we had no match.\n";
+  OS << "  return MatchOperand_NoMatch;\n";
+  OS << "}\n\n";
+}
+
+void AsmMatcherEmitter::run(raw_ostream &OS) {
+  CodeGenTarget Target(Records);
+  Record *AsmParser = Target.getAsmParser();
+  std::string ClassName = AsmParser->getValueAsString("AsmParserClassName");
+
+  // Compute the information on the instructions to match.
+  AsmMatcherInfo Info(AsmParser, Target, Records);
+  Info.BuildInfo();
+
+  // Sort the instruction table using the partial order on classes. We use
+  // stable_sort to ensure that ambiguous instructions are still
+  // deterministically ordered.
+  std::stable_sort(Info.Matchables.begin(), Info.Matchables.end(),
+                   less_ptr<MatchableInfo>());
+
+  DEBUG_WITH_TYPE("instruction_info", {
+      for (std::vector<MatchableInfo*>::iterator
+             it = Info.Matchables.begin(), ie = Info.Matchables.end();
+           it != ie; ++it)
+        (*it)->dump();
+    });
+
+  // Check for ambiguous matchables.
+  DEBUG_WITH_TYPE("ambiguous_instrs", {
+    unsigned NumAmbiguous = 0;
+    for (unsigned i = 0, e = Info.Matchables.size(); i != e; ++i) {
+      for (unsigned j = i + 1; j != e; ++j) {
+        MatchableInfo &A = *Info.Matchables[i];
+        MatchableInfo &B = *Info.Matchables[j];
+
+        if (A.CouldMatchAmbiguouslyWith(B)) {
+          errs() << "warning: ambiguous matchables:\n";
+          A.dump();
+          errs() << "\nis incomparable with:\n";
+          B.dump();
+          errs() << "\n\n";
+          ++NumAmbiguous;
+        }
+      }
+    }
+    if (NumAmbiguous)
+      errs() << "warning: " << NumAmbiguous
+             << " ambiguous matchables!\n";
+  });
+
+  // Compute the information on the custom operand parsing.
+  Info.BuildOperandMatchInfo();
+
+  // Write the output.
+
+  EmitSourceFileHeader("Assembly Matcher Source Fragment", OS);
+
+  // Information for the class declaration.
+  OS << "\n#ifdef GET_ASSEMBLER_HEADER\n";
+  OS << "#undef GET_ASSEMBLER_HEADER\n";
+  OS << "  // This should be included into the middle of the declaration of\n";
+  OS << "  // your subclasses implementation of TargetAsmParser.\n";
+  OS << "  unsigned ComputeAvailableFeatures(const " <<
+           Target.getName() << "Subtarget *Subtarget) const;\n";
+  OS << "  enum MatchResultTy {\n";
+  OS << "    Match_ConversionFail,\n";
+  OS << "    Match_InvalidOperand,\n";
+  OS << "    Match_MissingFeature,\n";
+  OS << "    Match_MnemonicFail,\n";
+  OS << "    Match_Success\n";
+  OS << "  };\n";
+  OS << "  bool ConvertToMCInst(unsigned Kind, MCInst &Inst, "
+     << "unsigned Opcode,\n"
+     << "                       const SmallVectorImpl<MCParsedAsmOperand*> "
+     << "&Operands);\n";
+  OS << "  bool MnemonicIsValid(StringRef Mnemonic);\n";
+  OS << "  MatchResultTy MatchInstructionImpl(\n";
+  OS << "    const SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
+  OS << "    MCInst &Inst, unsigned &ErrorInfo);\n";
+
+  if (Info.OperandMatchInfo.size()) {
+    OS << "\n  enum OperandMatchResultTy {\n";
+    OS << "    MatchOperand_Success,    // operand matched successfully\n";
+    OS << "    MatchOperand_NoMatch,    // operand did not match\n";
+    OS << "    MatchOperand_ParseFail   // operand matched but had errors\n";
+    OS << "  };\n";
+    OS << "  OperandMatchResultTy MatchOperandParserImpl(\n";
+    OS << "    SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
+    OS << "    StringRef Mnemonic);\n";
+
+    OS << "  OperandMatchResultTy TryCustomParseOperand(\n";
+    OS << "    SmallVectorImpl<MCParsedAsmOperand*> &Operands,\n";
+    OS << "    unsigned MCK);\n\n";
+  }
+
+  OS << "#endif // GET_ASSEMBLER_HEADER_INFO\n\n";
+
+  OS << "\n#ifdef GET_REGISTER_MATCHER\n";
+  OS << "#undef GET_REGISTER_MATCHER\n\n";
+
+  // Emit the subtarget feature enumeration.
+  EmitSubtargetFeatureFlagEnumeration(Info, OS);
+
+  // Emit the function to match a register name to number.
+  EmitMatchRegisterName(Target, AsmParser, OS);
+
+  OS << "#endif // GET_REGISTER_MATCHER\n\n";
+
+
+  OS << "\n#ifdef GET_MATCHER_IMPLEMENTATION\n";
+  OS << "#undef GET_MATCHER_IMPLEMENTATION\n\n";
+
+  // Generate the function that remaps for mnemonic aliases.
+  bool HasMnemonicAliases = EmitMnemonicAliases(OS, Info);
+
+  // Generate the unified function to convert operands into an MCInst.
+  EmitConvertToMCInst(Target, ClassName, Info.Matchables, OS);
+
+  // Emit the enumeration for classes which participate in matching.
+  EmitMatchClassEnumeration(Target, Info.Classes, OS);
+
+  // Emit the routine to match token strings to their match class.
+  EmitMatchTokenString(Target, Info.Classes, OS);
+
+  // Emit the subclass predicate routine.
+  EmitIsSubclass(Target, Info.Classes, OS);
+
+  // Emit the routine to validate an operand against a match class.
+  EmitValidateOperandClass(Info, OS);
+
+  // Emit the available features compute function.
+  EmitComputeAvailableFeatures(Info, OS);
+
+
+  size_t MaxNumOperands = 0;
+  for (std::vector<MatchableInfo*>::const_iterator it =
+         Info.Matchables.begin(), ie = Info.Matchables.end();
+       it != ie; ++it)
+    MaxNumOperands = std::max(MaxNumOperands, (*it)->AsmOperands.size());
+
+  // Emit the static match table; unused classes get initalized to 0 which is
+  // guaranteed to be InvalidMatchClass.
+  //
+  // FIXME: We can reduce the size of this table very easily. First, we change
+  // it so that store the kinds in separate bit-fields for each index, which
+  // only needs to be the max width used for classes at that index (we also need
+  // to reject based on this during classification). If we then make sure to
+  // order the match kinds appropriately (putting mnemonics last), then we
+  // should only end up using a few bits for each class, especially the ones
+  // following the mnemonic.
+  OS << "namespace {\n";
+  OS << "  struct MatchEntry {\n";
+  OS << "    unsigned Opcode;\n";
+  OS << "    const char *Mnemonic;\n";
+  OS << "    ConversionKind ConvertFn;\n";
+  OS << "    MatchClassKind Classes[" << MaxNumOperands << "];\n";
+  OS << "    unsigned RequiredFeatures;\n";
+  OS << "  };\n\n";
+
+  OS << "  // Predicate for searching for an opcode.\n";
+  OS << "  struct LessOpcode {\n";
+  OS << "    bool operator()(const MatchEntry &LHS, StringRef RHS) {\n";
+  OS << "      return StringRef(LHS.Mnemonic) < RHS;\n";
+  OS << "    }\n";
+  OS << "    bool operator()(StringRef LHS, const MatchEntry &RHS) {\n";
+  OS << "      return LHS < StringRef(RHS.Mnemonic);\n";
+  OS << "    }\n";
+  OS << "    bool operator()(const MatchEntry &LHS, const MatchEntry &RHS) {\n";
+  OS << "      return StringRef(LHS.Mnemonic) < StringRef(RHS.Mnemonic);\n";
+  OS << "    }\n";
+  OS << "  };\n";
+
+  OS << "} // end anonymous namespace.\n\n";
+
+  OS << "static const MatchEntry MatchTable["
+     << Info.Matchables.size() << "] = {\n";
+
+  for (std::vector<MatchableInfo*>::const_iterator it =
+       Info.Matchables.begin(), ie = Info.Matchables.end();
+       it != ie; ++it) {
+    MatchableInfo &II = **it;
+
+    OS << "  { " << Target.getName() << "::"
+       << II.getResultInst()->TheDef->getName() << ", \"" << II.Mnemonic << "\""
+       << ", " << II.ConversionFnKind << ", { ";
+    for (unsigned i = 0, e = II.AsmOperands.size(); i != e; ++i) {
+      MatchableInfo::AsmOperand &Op = II.AsmOperands[i];
+
+      if (i) OS << ", ";
+      OS << Op.Class->Name;
+    }
+    OS << " }, ";
+
+    // Write the required features mask.
+    if (!II.RequiredFeatures.empty()) {
+      for (unsigned i = 0, e = II.RequiredFeatures.size(); i != e; ++i) {
+        if (i) OS << "|";
+        OS << II.RequiredFeatures[i]->getEnumName();
+      }
+    } else
+      OS << "0";
+
+    OS << "},\n";
+  }
+
+  OS << "};\n\n";
+
+  // A method to determine if a mnemonic is in the list.
+  OS << "bool " << Target.getName() << ClassName << "::\n"
+     << "MnemonicIsValid(StringRef Mnemonic) {\n";
+  OS << "  // Search the table.\n";
+  OS << "  std::pair<const MatchEntry*, const MatchEntry*> MnemonicRange =\n";
+  OS << "    std::equal_range(MatchTable, MatchTable+"
+     << Info.Matchables.size() << ", Mnemonic, LessOpcode());\n";
+  OS << "  return MnemonicRange.first != MnemonicRange.second;\n";
+  OS << "}\n\n";
+
+  // Finally, build the match function.
+  OS << Target.getName() << ClassName << "::MatchResultTy "
+     << Target.getName() << ClassName << "::\n"
+     << "MatchInstructionImpl(const SmallVectorImpl<MCParsedAsmOperand*>"
+     << " &Operands,\n";
+  OS << "                     MCInst &Inst, unsigned &ErrorInfo) {\n";
+
+  // Emit code to get the available features.
+  OS << "  // Get the current feature set.\n";
+  OS << "  unsigned AvailableFeatures = getAvailableFeatures();\n\n";
+
+  OS << "  // Get the instruction mnemonic, which is the first token.\n";
+  OS << "  StringRef Mnemonic = ((" << Target.getName()
+     << "Operand*)Operands[0])->getToken();\n\n";
+
+  if (HasMnemonicAliases) {
+    OS << "  // Process all MnemonicAliases to remap the mnemonic.\n";
+    OS << "  ApplyMnemonicAliases(Mnemonic, AvailableFeatures);\n\n";
+  }
+
+  // Emit code to compute the class list for this operand vector.
+  OS << "  // Eliminate obvious mismatches.\n";
+  OS << "  if (Operands.size() > " << (MaxNumOperands+1) << ") {\n";
+  OS << "    ErrorInfo = " << (MaxNumOperands+1) << ";\n";
+  OS << "    return Match_InvalidOperand;\n";
+  OS << "  }\n\n";
+
+  OS << "  // Some state to try to produce better error messages.\n";
+  OS << "  bool HadMatchOtherThanFeatures = false;\n\n";
+  OS << "  // Set ErrorInfo to the operand that mismatches if it is\n";
+  OS << "  // wrong for all instances of the instruction.\n";
+  OS << "  ErrorInfo = ~0U;\n";
+
+  // Emit code to search the table.
+  OS << "  // Search the table.\n";
+  OS << "  std::pair<const MatchEntry*, const MatchEntry*> MnemonicRange =\n";
+  OS << "    std::equal_range(MatchTable, MatchTable+"
+     << Info.Matchables.size() << ", Mnemonic, LessOpcode());\n\n";
+
+  OS << "  // Return a more specific error code if no mnemonics match.\n";
+  OS << "  if (MnemonicRange.first == MnemonicRange.second)\n";
+  OS << "    return Match_MnemonicFail;\n\n";
+
+  OS << "  for (const MatchEntry *it = MnemonicRange.first, "
+     << "*ie = MnemonicRange.second;\n";
+  OS << "       it != ie; ++it) {\n";
+
+  OS << "    // equal_range guarantees that instruction mnemonic matches.\n";
+  OS << "    assert(Mnemonic == it->Mnemonic);\n";
+
+  // Emit check that the subclasses match.
+  OS << "    bool OperandsValid = true;\n";
+  OS << "    for (unsigned i = 0; i != " << MaxNumOperands << "; ++i) {\n";
+  OS << "      if (i + 1 >= Operands.size()) {\n";
+  OS << "        OperandsValid = (it->Classes[i] == " <<"InvalidMatchClass);\n";
+  OS << "        break;";
+  OS << "      }\n";
+  OS << "      if (ValidateOperandClass(Operands[i+1], it->Classes[i]))\n";
+  OS << "        continue;\n";
+  OS << "      // If this operand is broken for all of the instances of this\n";
+  OS << "      // mnemonic, keep track of it so we can report loc info.\n";
+  OS << "      if (it == MnemonicRange.first || ErrorInfo <= i+1)\n";
+  OS << "        ErrorInfo = i+1;\n";
+  OS << "      // Otherwise, just reject this instance of the mnemonic.\n";
+  OS << "      OperandsValid = false;\n";
+  OS << "      break;\n";
+  OS << "    }\n\n";
+
+  OS << "    if (!OperandsValid) continue;\n";
+
+  // Emit check that the required features are available.
+  OS << "    if ((AvailableFeatures & it->RequiredFeatures) "
+     << "!= it->RequiredFeatures) {\n";
+  OS << "      HadMatchOtherThanFeatures = true;\n";
+  OS << "      continue;\n";
+  OS << "    }\n";
+  OS << "\n";
+  OS << "    // We have selected a definite instruction, convert the parsed\n"
+     << "    // operands into the appropriate MCInst.\n";
+  OS << "    if (!ConvertToMCInst(it->ConvertFn, Inst,\n"
+     << "                         it->Opcode, Operands))\n";
+  OS << "      return Match_ConversionFail;\n";
+  OS << "\n";
+
+  // Call the post-processing function, if used.
+  std::string InsnCleanupFn =
+    AsmParser->getValueAsString("AsmParserInstCleanup");
+  if (!InsnCleanupFn.empty())
+    OS << "    " << InsnCleanupFn << "(Inst);\n";
+
+  OS << "    return Match_Success;\n";
+  OS << "  }\n\n";
+
+  OS << "  // Okay, we had no match.  Try to return a useful error code.\n";
+  OS << "  if (HadMatchOtherThanFeatures) return Match_MissingFeature;\n";
+  OS << "  return Match_InvalidOperand;\n";
+  OS << "}\n\n";
+
+  if (Info.OperandMatchInfo.size())
+    EmitCustomOperandParsing(OS, Target, Info, ClassName);
+
+  OS << "#endif // GET_MATCHER_IMPLEMENTATION\n\n";
+}
diff --git a/final/utils/TableGen/AsmMatcherEmitter.h b/final/utils/TableGen/AsmMatcherEmitter.h
new file mode 100644
index 00000000000..729c938fcd3
--- /dev/null
+++ b/final/utils/TableGen/AsmMatcherEmitter.h
@@ -0,0 +1,33 @@
+//===- AsmMatcherEmitter.h - Generate an assembly matcher -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits a target specifier matcher for converting parsed
+// assembly operands in the MCInst structures.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ASMMATCHER_EMITTER_H
+#define ASMMATCHER_EMITTER_H
+
+#include "TableGenBackend.h"
+#include <map>
+#include <vector>
+#include <cassert>
+
+namespace llvm {
+  class AsmMatcherEmitter : public TableGenBackend {
+    RecordKeeper &Records;
+  public:
+    AsmMatcherEmitter(RecordKeeper &R) : Records(R) {}
+
+    // run - Output the matcher, returning true on failure.
+    void run(raw_ostream &o);
+  };
+}
+#endif
diff --git a/final/utils/TableGen/AsmWriterEmitter.cpp b/final/utils/TableGen/AsmWriterEmitter.cpp
new file mode 100644
index 00000000000..cd31e0c3448
--- /dev/null
+++ b/final/utils/TableGen/AsmWriterEmitter.cpp
@@ -0,0 +1,803 @@
+//===- AsmWriterEmitter.cpp - Generate an assembly writer -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is emits an assembly printer for the current target.
+// Note that this is currently fairly skeletal, but will grow over time.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AsmWriterEmitter.h"
+#include "AsmWriterInst.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+#include "StringToOffsetTable.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+static void PrintCases(std::vector<std::pair<std::string,
+                       AsmWriterOperand> > &OpsToPrint, raw_ostream &O) {
+  O << "    case " << OpsToPrint.back().first << ": ";
+  AsmWriterOperand TheOp = OpsToPrint.back().second;
+  OpsToPrint.pop_back();
+
+  // Check to see if any other operands are identical in this list, and if so,
+  // emit a case label for them.
+  for (unsigned i = OpsToPrint.size(); i != 0; --i)
+    if (OpsToPrint[i-1].second == TheOp) {
+      O << "\n    case " << OpsToPrint[i-1].first << ": ";
+      OpsToPrint.erase(OpsToPrint.begin()+i-1);
+    }
+
+  // Finally, emit the code.
+  O << TheOp.getCode();
+  O << "break;\n";
+}
+
+
+/// EmitInstructions - Emit the last instruction in the vector and any other
+/// instructions that are suitably similar to it.
+static void EmitInstructions(std::vector<AsmWriterInst> &Insts,
+                             raw_ostream &O) {
+  AsmWriterInst FirstInst = Insts.back();
+  Insts.pop_back();
+
+  std::vector<AsmWriterInst> SimilarInsts;
+  unsigned DifferingOperand = ~0;
+  for (unsigned i = Insts.size(); i != 0; --i) {
+    unsigned DiffOp = Insts[i-1].MatchesAllButOneOp(FirstInst);
+    if (DiffOp != ~1U) {
+      if (DifferingOperand == ~0U)  // First match!
+        DifferingOperand = DiffOp;
+
+      // If this differs in the same operand as the rest of the instructions in
+      // this class, move it to the SimilarInsts list.
+      if (DifferingOperand == DiffOp || DiffOp == ~0U) {
+        SimilarInsts.push_back(Insts[i-1]);
+        Insts.erase(Insts.begin()+i-1);
+      }
+    }
+  }
+
+  O << "  case " << FirstInst.CGI->Namespace << "::"
+    << FirstInst.CGI->TheDef->getName() << ":\n";
+  for (unsigned i = 0, e = SimilarInsts.size(); i != e; ++i)
+    O << "  case " << SimilarInsts[i].CGI->Namespace << "::"
+      << SimilarInsts[i].CGI->TheDef->getName() << ":\n";
+  for (unsigned i = 0, e = FirstInst.Operands.size(); i != e; ++i) {
+    if (i != DifferingOperand) {
+      // If the operand is the same for all instructions, just print it.
+      O << "    " << FirstInst.Operands[i].getCode();
+    } else {
+      // If this is the operand that varies between all of the instructions,
+      // emit a switch for just this operand now.
+      O << "    switch (MI->getOpcode()) {\n";
+      std::vector<std::pair<std::string, AsmWriterOperand> > OpsToPrint;
+      OpsToPrint.push_back(std::make_pair(FirstInst.CGI->Namespace + "::" +
+                                          FirstInst.CGI->TheDef->getName(),
+                                          FirstInst.Operands[i]));
+
+      for (unsigned si = 0, e = SimilarInsts.size(); si != e; ++si) {
+        AsmWriterInst &AWI = SimilarInsts[si];
+        OpsToPrint.push_back(std::make_pair(AWI.CGI->Namespace+"::"+
+                                            AWI.CGI->TheDef->getName(),
+                                            AWI.Operands[i]));
+      }
+      std::reverse(OpsToPrint.begin(), OpsToPrint.end());
+      while (!OpsToPrint.empty())
+        PrintCases(OpsToPrint, O);
+      O << "    }";
+    }
+    O << "\n";
+  }
+  O << "    break;\n";
+}
+
+void AsmWriterEmitter::
+FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
+                          std::vector<unsigned> &InstIdxs,
+                          std::vector<unsigned> &InstOpsUsed) const {
+  InstIdxs.assign(NumberedInstructions.size(), ~0U);
+
+  // This vector parallels UniqueOperandCommands, keeping track of which
+  // instructions each case are used for.  It is a comma separated string of
+  // enums.
+  std::vector<std::string> InstrsForCase;
+  InstrsForCase.resize(UniqueOperandCommands.size());
+  InstOpsUsed.assign(UniqueOperandCommands.size(), 0);
+
+  for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
+    const AsmWriterInst *Inst = getAsmWriterInstByID(i);
+    if (Inst == 0) continue;  // PHI, INLINEASM, PROLOG_LABEL, etc.
+
+    std::string Command;
+    if (Inst->Operands.empty())
+      continue;   // Instruction already done.
+
+    Command = "    " + Inst->Operands[0].getCode() + "\n";
+
+    // Check to see if we already have 'Command' in UniqueOperandCommands.
+    // If not, add it.
+    bool FoundIt = false;
+    for (unsigned idx = 0, e = UniqueOperandCommands.size(); idx != e; ++idx)
+      if (UniqueOperandCommands[idx] == Command) {
+        InstIdxs[i] = idx;
+        InstrsForCase[idx] += ", ";
+        InstrsForCase[idx] += Inst->CGI->TheDef->getName();
+        FoundIt = true;
+        break;
+      }
+    if (!FoundIt) {
+      InstIdxs[i] = UniqueOperandCommands.size();
+      UniqueOperandCommands.push_back(Command);
+      InstrsForCase.push_back(Inst->CGI->TheDef->getName());
+
+      // This command matches one operand so far.
+      InstOpsUsed.push_back(1);
+    }
+  }
+
+  // For each entry of UniqueOperandCommands, there is a set of instructions
+  // that uses it.  If the next command of all instructions in the set are
+  // identical, fold it into the command.
+  for (unsigned CommandIdx = 0, e = UniqueOperandCommands.size();
+       CommandIdx != e; ++CommandIdx) {
+
+    for (unsigned Op = 1; ; ++Op) {
+      // Scan for the first instruction in the set.
+      std::vector<unsigned>::iterator NIT =
+        std::find(InstIdxs.begin(), InstIdxs.end(), CommandIdx);
+      if (NIT == InstIdxs.end()) break;  // No commonality.
+
+      // If this instruction has no more operands, we isn't anything to merge
+      // into this command.
+      const AsmWriterInst *FirstInst =
+        getAsmWriterInstByID(NIT-InstIdxs.begin());
+      if (!FirstInst || FirstInst->Operands.size() == Op)
+        break;
+
+      // Otherwise, scan to see if all of the other instructions in this command
+      // set share the operand.
+      bool AllSame = true;
+      // Keep track of the maximum, number of operands or any
+      // instruction we see in the group.
+      size_t MaxSize = FirstInst->Operands.size();
+
+      for (NIT = std::find(NIT+1, InstIdxs.end(), CommandIdx);
+           NIT != InstIdxs.end();
+           NIT = std::find(NIT+1, InstIdxs.end(), CommandIdx)) {
+        // Okay, found another instruction in this command set.  If the operand
+        // matches, we're ok, otherwise bail out.
+        const AsmWriterInst *OtherInst =
+          getAsmWriterInstByID(NIT-InstIdxs.begin());
+
+        if (OtherInst &&
+            OtherInst->Operands.size() > FirstInst->Operands.size())
+          MaxSize = std::max(MaxSize, OtherInst->Operands.size());
+
+        if (!OtherInst || OtherInst->Operands.size() == Op ||
+            OtherInst->Operands[Op] != FirstInst->Operands[Op]) {
+          AllSame = false;
+          break;
+        }
+      }
+      if (!AllSame) break;
+
+      // Okay, everything in this command set has the same next operand.  Add it
+      // to UniqueOperandCommands and remember that it was consumed.
+      std::string Command = "    " + FirstInst->Operands[Op].getCode() + "\n";
+
+      UniqueOperandCommands[CommandIdx] += Command;
+      InstOpsUsed[CommandIdx]++;
+    }
+  }
+
+  // Prepend some of the instructions each case is used for onto the case val.
+  for (unsigned i = 0, e = InstrsForCase.size(); i != e; ++i) {
+    std::string Instrs = InstrsForCase[i];
+    if (Instrs.size() > 70) {
+      Instrs.erase(Instrs.begin()+70, Instrs.end());
+      Instrs += "...";
+    }
+
+    if (!Instrs.empty())
+      UniqueOperandCommands[i] = "    // " + Instrs + "\n" +
+        UniqueOperandCommands[i];
+  }
+}
+
+
+static void UnescapeString(std::string &Str) {
+  for (unsigned i = 0; i != Str.size(); ++i) {
+    if (Str[i] == '\\' && i != Str.size()-1) {
+      switch (Str[i+1]) {
+      default: continue;  // Don't execute the code after the switch.
+      case 'a': Str[i] = '\a'; break;
+      case 'b': Str[i] = '\b'; break;
+      case 'e': Str[i] = 27; break;
+      case 'f': Str[i] = '\f'; break;
+      case 'n': Str[i] = '\n'; break;
+      case 'r': Str[i] = '\r'; break;
+      case 't': Str[i] = '\t'; break;
+      case 'v': Str[i] = '\v'; break;
+      case '"': Str[i] = '\"'; break;
+      case '\'': Str[i] = '\''; break;
+      case '\\': Str[i] = '\\'; break;
+      }
+      // Nuke the second character.
+      Str.erase(Str.begin()+i+1);
+    }
+  }
+}
+
+/// EmitPrintInstruction - Generate the code for the "printInstruction" method
+/// implementation.
+void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
+  CodeGenTarget Target(Records);
+  Record *AsmWriter = Target.getAsmWriter();
+  std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
+  bool isMC = AsmWriter->getValueAsBit("isMCAsmWriter");
+  const char *MachineInstrClassName = isMC ? "MCInst" : "MachineInstr";
+
+  O <<
+  "/// printInstruction - This method is automatically generated by tablegen\n"
+  "/// from the instruction set description.\n"
+    "void " << Target.getName() << ClassName
+            << "::printInstruction(const " << MachineInstrClassName
+            << " *MI, raw_ostream &O) {\n";
+
+  std::vector<AsmWriterInst> Instructions;
+
+  for (CodeGenTarget::inst_iterator I = Target.inst_begin(),
+         E = Target.inst_end(); I != E; ++I)
+    if (!(*I)->AsmString.empty() &&
+        (*I)->TheDef->getName() != "PHI")
+      Instructions.push_back(
+        AsmWriterInst(**I,
+                      AsmWriter->getValueAsInt("Variant"),
+                      AsmWriter->getValueAsInt("FirstOperandColumn"),
+                      AsmWriter->getValueAsInt("OperandSpacing")));
+
+  // Get the instruction numbering.
+  NumberedInstructions = Target.getInstructionsByEnumValue();
+
+  // Compute the CodeGenInstruction -> AsmWriterInst mapping.  Note that not
+  // all machine instructions are necessarily being printed, so there may be
+  // target instructions not in this map.
+  for (unsigned i = 0, e = Instructions.size(); i != e; ++i)
+    CGIAWIMap.insert(std::make_pair(Instructions[i].CGI, &Instructions[i]));
+
+  // Build an aggregate string, and build a table of offsets into it.
+  StringToOffsetTable StringTable;
+
+  /// OpcodeInfo - This encodes the index of the string to use for the first
+  /// chunk of the output as well as indices used for operand printing.
+  std::vector<unsigned> OpcodeInfo;
+
+  unsigned MaxStringIdx = 0;
+  for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
+    AsmWriterInst *AWI = CGIAWIMap[NumberedInstructions[i]];
+    unsigned Idx;
+    if (AWI == 0) {
+      // Something not handled by the asmwriter printer.
+      Idx = ~0U;
+    } else if (AWI->Operands[0].OperandType !=
+                        AsmWriterOperand::isLiteralTextOperand ||
+               AWI->Operands[0].Str.empty()) {
+      // Something handled by the asmwriter printer, but with no leading string.
+      Idx = StringTable.GetOrAddStringOffset("");
+    } else {
+      std::string Str = AWI->Operands[0].Str;
+      UnescapeString(Str);
+      Idx = StringTable.GetOrAddStringOffset(Str);
+      MaxStringIdx = std::max(MaxStringIdx, Idx);
+
+      // Nuke the string from the operand list.  It is now handled!
+      AWI->Operands.erase(AWI->Operands.begin());
+    }
+
+    // Bias offset by one since we want 0 as a sentinel.
+    OpcodeInfo.push_back(Idx+1);
+  }
+
+  // Figure out how many bits we used for the string index.
+  unsigned AsmStrBits = Log2_32_Ceil(MaxStringIdx+2);
+
+  // To reduce code size, we compactify common instructions into a few bits
+  // in the opcode-indexed table.
+  unsigned BitsLeft = 32-AsmStrBits;
+
+  std::vector<std::vector<std::string> > TableDrivenOperandPrinters;
+
+  while (1) {
+    std::vector<std::string> UniqueOperandCommands;
+    std::vector<unsigned> InstIdxs;
+    std::vector<unsigned> NumInstOpsHandled;
+    FindUniqueOperandCommands(UniqueOperandCommands, InstIdxs,
+                              NumInstOpsHandled);
+
+    // If we ran out of operands to print, we're done.
+    if (UniqueOperandCommands.empty()) break;
+
+    // Compute the number of bits we need to represent these cases, this is
+    // ceil(log2(numentries)).
+    unsigned NumBits = Log2_32_Ceil(UniqueOperandCommands.size());
+
+    // If we don't have enough bits for this operand, don't include it.
+    if (NumBits > BitsLeft) {
+      DEBUG(errs() << "Not enough bits to densely encode " << NumBits
+                   << " more bits\n");
+      break;
+    }
+
+    // Otherwise, we can include this in the initial lookup table.  Add it in.
+    BitsLeft -= NumBits;
+    for (unsigned i = 0, e = InstIdxs.size(); i != e; ++i)
+      if (InstIdxs[i] != ~0U)
+        OpcodeInfo[i] |= InstIdxs[i] << (BitsLeft+AsmStrBits);
+
+    // Remove the info about this operand.
+    for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
+      if (AsmWriterInst *Inst = getAsmWriterInstByID(i))
+        if (!Inst->Operands.empty()) {
+          unsigned NumOps = NumInstOpsHandled[InstIdxs[i]];
+          assert(NumOps <= Inst->Operands.size() &&
+                 "Can't remove this many ops!");
+          Inst->Operands.erase(Inst->Operands.begin(),
+                               Inst->Operands.begin()+NumOps);
+        }
+    }
+
+    // Remember the handlers for this set of operands.
+    TableDrivenOperandPrinters.push_back(UniqueOperandCommands);
+  }
+
+
+
+  O<<"  static const unsigned OpInfo[] = {\n";
+  for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
+    O << "    " << OpcodeInfo[i] << "U,\t// "
+      << NumberedInstructions[i]->TheDef->getName() << "\n";
+  }
+  // Add a dummy entry so the array init doesn't end with a comma.
+  O << "    0U\n";
+  O << "  };\n\n";
+
+  // Emit the string itself.
+  O << "  const char *AsmStrs = \n";
+  StringTable.EmitString(O);
+  O << ";\n\n";
+
+  O << "  O << \"\\t\";\n\n";
+
+  O << "  // Emit the opcode for the instruction.\n"
+    << "  unsigned Bits = OpInfo[MI->getOpcode()];\n"
+    << "  assert(Bits != 0 && \"Cannot print this instruction.\");\n"
+    << "  O << AsmStrs+(Bits & " << (1 << AsmStrBits)-1 << ")-1;\n\n";
+
+  // Output the table driven operand information.
+  BitsLeft = 32-AsmStrBits;
+  for (unsigned i = 0, e = TableDrivenOperandPrinters.size(); i != e; ++i) {
+    std::vector<std::string> &Commands = TableDrivenOperandPrinters[i];
+
+    // Compute the number of bits we need to represent these cases, this is
+    // ceil(log2(numentries)).
+    unsigned NumBits = Log2_32_Ceil(Commands.size());
+    assert(NumBits <= BitsLeft && "consistency error");
+
+    // Emit code to extract this field from Bits.
+    BitsLeft -= NumBits;
+
+    O << "\n  // Fragment " << i << " encoded into " << NumBits
+      << " bits for " << Commands.size() << " unique commands.\n";
+
+    if (Commands.size() == 2) {
+      // Emit two possibilitys with if/else.
+      O << "  if ((Bits >> " << (BitsLeft+AsmStrBits) << ") & "
+        << ((1 << NumBits)-1) << ") {\n"
+        << Commands[1]
+        << "  } else {\n"
+        << Commands[0]
+        << "  }\n\n";
+    } else if (Commands.size() == 1) {
+      // Emit a single possibility.
+      O << Commands[0] << "\n\n";
+    } else {
+      O << "  switch ((Bits >> " << (BitsLeft+AsmStrBits) << ") & "
+        << ((1 << NumBits)-1) << ") {\n"
+        << "  default:   // unreachable.\n";
+
+      // Print out all the cases.
+      for (unsigned i = 0, e = Commands.size(); i != e; ++i) {
+        O << "  case " << i << ":\n";
+        O << Commands[i];
+        O << "    break;\n";
+      }
+      O << "  }\n\n";
+    }
+  }
+
+  // Okay, delete instructions with no operand info left.
+  for (unsigned i = 0, e = Instructions.size(); i != e; ++i) {
+    // Entire instruction has been emitted?
+    AsmWriterInst &Inst = Instructions[i];
+    if (Inst.Operands.empty()) {
+      Instructions.erase(Instructions.begin()+i);
+      --i; --e;
+    }
+  }
+
+
+  // Because this is a vector, we want to emit from the end.  Reverse all of the
+  // elements in the vector.
+  std::reverse(Instructions.begin(), Instructions.end());
+
+
+  // Now that we've emitted all of the operand info that fit into 32 bits, emit
+  // information for those instructions that are left.  This is a less dense
+  // encoding, but we expect the main 32-bit table to handle the majority of
+  // instructions.
+  if (!Instructions.empty()) {
+    // Find the opcode # of inline asm.
+    O << "  switch (MI->getOpcode()) {\n";
+    while (!Instructions.empty())
+      EmitInstructions(Instructions, O);
+
+    O << "  }\n";
+    O << "  return;\n";
+  }
+
+  O << "}\n";
+}
+
+
+void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
+  CodeGenTarget Target(Records);
+  Record *AsmWriter = Target.getAsmWriter();
+  std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
+  const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
+
+  StringToOffsetTable StringTable;
+  O <<
+  "\n\n/// getRegisterName - This method is automatically generated by tblgen\n"
+  "/// from the register set description.  This returns the assembler name\n"
+  "/// for the specified register.\n"
+  "const char *" << Target.getName() << ClassName
+  << "::getRegisterName(unsigned RegNo) {\n"
+  << "  assert(RegNo && RegNo < " << (Registers.size()+1)
+  << " && \"Invalid register number!\");\n"
+  << "\n"
+  << "  static const unsigned RegAsmOffset[] = {";
+  for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
+    const CodeGenRegister &Reg = Registers[i];
+
+    std::string AsmName = Reg.TheDef->getValueAsString("AsmName");
+    if (AsmName.empty())
+      AsmName = Reg.getName();
+
+
+    if ((i % 14) == 0)
+      O << "\n    ";
+
+    O << StringTable.GetOrAddStringOffset(AsmName) << ", ";
+  }
+  O << "0\n"
+    << "  };\n"
+    << "\n";
+
+  O << "  const char *AsmStrs =\n";
+  StringTable.EmitString(O);
+  O << ";\n";
+
+  O << "  return AsmStrs+RegAsmOffset[RegNo-1];\n"
+    << "}\n";
+}
+
+void AsmWriterEmitter::EmitGetInstructionName(raw_ostream &O) {
+  CodeGenTarget Target(Records);
+  Record *AsmWriter = Target.getAsmWriter();
+  std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
+
+  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+    Target.getInstructionsByEnumValue();
+
+  StringToOffsetTable StringTable;
+  O <<
+"\n\n#ifdef GET_INSTRUCTION_NAME\n"
+"#undef GET_INSTRUCTION_NAME\n\n"
+"/// getInstructionName: This method is automatically generated by tblgen\n"
+"/// from the instruction set description.  This returns the enum name of the\n"
+"/// specified instruction.\n"
+  "const char *" << Target.getName() << ClassName
+  << "::getInstructionName(unsigned Opcode) {\n"
+  << "  assert(Opcode < " << NumberedInstructions.size()
+  << " && \"Invalid instruction number!\");\n"
+  << "\n"
+  << "  static const unsigned InstAsmOffset[] = {";
+  for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
+    const CodeGenInstruction &Inst = *NumberedInstructions[i];
+
+    std::string AsmName = Inst.TheDef->getName();
+    if ((i % 14) == 0)
+      O << "\n    ";
+
+    O << StringTable.GetOrAddStringOffset(AsmName) << ", ";
+  }
+  O << "0\n"
+  << "  };\n"
+  << "\n";
+
+  O << "  const char *Strs =\n";
+  StringTable.EmitString(O);
+  O << ";\n";
+
+  O << "  return Strs+InstAsmOffset[Opcode];\n"
+  << "}\n\n#endif\n";
+}
+
+void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
+  CodeGenTarget Target(Records);
+  Record *AsmWriter = Target.getAsmWriter();
+
+  O << "\n#ifdef PRINT_ALIAS_INSTR\n";
+  O << "#undef PRINT_ALIAS_INSTR\n\n";
+
+  // Enumerate the register classes.
+  const std::vector<CodeGenRegisterClass> &RegisterClasses =
+    Target.getRegisterClasses();
+
+  O << "namespace { // Register classes\n";
+  O << "  enum RegClass {\n";
+
+  // Emit the register enum value for each RegisterClass.
+  for (unsigned I = 0, E = RegisterClasses.size(); I != E; ++I) {
+    if (I != 0) O << ",\n";
+    O << "    RC_" << RegisterClasses[I].TheDef->getName();
+  }
+
+  O << "\n  };\n";
+  O << "} // end anonymous namespace\n\n";
+
+  // Emit a function that returns 'true' if a regsiter is part of a particular
+  // register class. I.e., RAX is part of GR64 on X86.
+  O << "static bool regIsInRegisterClass"
+    << "(unsigned RegClass, unsigned Reg) {\n";
+
+  // Emit the switch that checks if a register belongs to a particular register
+  // class.
+  O << "  switch (RegClass) {\n";
+  O << "  default: break;\n";
+
+  for (unsigned I = 0, E = RegisterClasses.size(); I != E; ++I) {
+    const CodeGenRegisterClass &RC = RegisterClasses[I];
+
+    // Give the register class a legal C name if it's anonymous.
+    std::string Name = RC.TheDef->getName();
+    O << "  case RC_" << Name << ":\n";
+  
+    // Emit the register list now.
+    unsigned IE = RC.Elements.size();
+    if (IE == 1) {
+      O << "    if (Reg == " << getQualifiedName(RC.Elements[0]) << ")\n";
+      O << "      return true;\n";
+    } else {
+      O << "    switch (Reg) {\n";
+      O << "    default: break;\n";
+
+      for (unsigned II = 0; II != IE; ++II) {
+        Record *Reg = RC.Elements[II];
+        O << "    case " << getQualifiedName(Reg) << ":\n";
+      }
+
+      O << "      return true;\n";
+      O << "    }\n";
+    }
+
+    O << "    break;\n";
+  }
+
+  O << "  }\n\n";
+  O << "  return false;\n";
+  O << "}\n\n";
+
+  // Emit the method that prints the alias instruction.
+  std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
+
+  bool isMC = AsmWriter->getValueAsBit("isMCAsmWriter");
+  const char *MachineInstrClassName = isMC ? "MCInst" : "MachineInstr";
+
+  O << "bool " << Target.getName() << ClassName
+    << "::printAliasInstr(const " << MachineInstrClassName
+    << " *MI, raw_ostream &OS) {\n";
+
+  std::vector<Record*> AllInstAliases =
+    Records.getAllDerivedDefinitions("InstAlias");
+
+  // Create a map from the qualified name to a list of potential matches.
+  std::map<std::string, std::vector<CodeGenInstAlias*> > AliasMap;
+  for (std::vector<Record*>::iterator
+         I = AllInstAliases.begin(), E = AllInstAliases.end(); I != E; ++I) {
+    CodeGenInstAlias *Alias = new CodeGenInstAlias(*I, Target);
+    const Record *R = *I;
+    const DagInit *DI = R->getValueAsDag("ResultInst");
+    const DefInit *Op = dynamic_cast<const DefInit*>(DI->getOperator());
+    AliasMap[getQualifiedName(Op->getDef())].push_back(Alias);
+  }
+
+  if (AliasMap.empty() || !isMC) {
+    // FIXME: Support MachineInstr InstAliases?
+    O << "  return true;\n";
+    O << "}\n\n";
+    O << "#endif // PRINT_ALIAS_INSTR\n";
+    return;
+  }
+
+  O << "  StringRef AsmString;\n";
+  O << "  std::map<StringRef, unsigned> OpMap;\n";
+  O << "  switch (MI->getOpcode()) {\n";
+  O << "  default: return true;\n";
+
+  for (std::map<std::string, std::vector<CodeGenInstAlias*> >::iterator
+         I = AliasMap.begin(), E = AliasMap.end(); I != E; ++I) {
+    std::vector<CodeGenInstAlias*> &Aliases = I->second;
+
+    std::map<std::string, unsigned> CondCount;
+    std::map<std::string, std::string> BodyMap;
+
+    std::string AsmString = "";
+
+    for (std::vector<CodeGenInstAlias*>::iterator
+           II = Aliases.begin(), IE = Aliases.end(); II != IE; ++II) {
+      const CodeGenInstAlias *CGA = *II;
+      AsmString = CGA->AsmString;
+      unsigned Indent = 8;
+      unsigned LastOpNo = CGA->ResultInstOperandIndex.size();
+
+      std::string Cond;
+      raw_string_ostream CondO(Cond);
+
+      CondO << "if (MI->getNumOperands() == " << LastOpNo;
+
+      std::map<StringRef, unsigned> OpMap;
+      bool CantHandle = false;
+
+      for (unsigned i = 0, e = LastOpNo; i != e; ++i) {
+        const CodeGenInstAlias::ResultOperand &RO = CGA->ResultOperands[i];
+
+        switch (RO.Kind) {
+        default: assert(0 && "unexpected InstAlias operand kind");
+        case CodeGenInstAlias::ResultOperand::K_Record: {
+          const Record *Rec = RO.getRecord();
+          StringRef ROName = RO.getName();
+
+          if (Rec->isSubClassOf("RegisterClass")) {
+            CondO << " &&\n";
+            CondO.indent(Indent) << "MI->getOperand(" << i << ").isReg() &&\n";
+            if (OpMap.find(ROName) == OpMap.end()) {
+              OpMap[ROName] = i;
+              CondO.indent(Indent)
+                << "regIsInRegisterClass(RC_"
+                << CGA->ResultOperands[i].getRecord()->getName()
+                << ", MI->getOperand(" << i << ").getReg())";
+            } else {
+              CondO.indent(Indent)
+                << "MI->getOperand(" << i
+                << ").getReg() == MI->getOperand("
+                << OpMap[ROName] << ").getReg()";
+            }
+          } else {
+            assert(Rec->isSubClassOf("Operand") && "Unexpected operand!");
+            // FIXME: We need to handle these situations.
+            CantHandle = true;
+            break;
+          }
+
+          break;
+        }
+        case CodeGenInstAlias::ResultOperand::K_Imm:
+          CondO << " &&\n";
+          CondO.indent(Indent) << "MI->getOperand(" << i << ").getImm() == ";
+          CondO << CGA->ResultOperands[i].getImm();
+          break;
+        case CodeGenInstAlias::ResultOperand::K_Reg:
+          CondO << " &&\n";
+          CondO.indent(Indent) << "MI->getOperand(" << i << ").getReg() == ";
+          CondO << Target.getName() << "::"
+                << CGA->ResultOperands[i].getRegister()->getName();
+          break;
+        }
+
+        if (CantHandle) break;
+      }
+
+      if (CantHandle) continue;
+
+      CondO << ")";
+
+      std::string Body;
+      raw_string_ostream BodyO(Body);
+
+      BodyO << "      // " << CGA->Result->getAsString() << "\n";
+      BodyO << "      AsmString = \"" << AsmString << "\";\n";
+
+      for (std::map<StringRef, unsigned>::iterator
+             III = OpMap.begin(), IIE = OpMap.end(); III != IIE; ++III)
+        BodyO << "      OpMap[\"" << III->first << "\"] = "
+              << III->second << ";\n";
+
+      ++CondCount[CondO.str()];
+      BodyMap[CondO.str()] = BodyO.str();
+    }
+
+    std::string Code;
+    raw_string_ostream CodeO(Code);
+
+    bool EmitElse = false;
+    for (std::map<std::string, unsigned>::iterator
+           II = CondCount.begin(), IE = CondCount.end(); II != IE; ++II) {
+      if (II->second != 1) continue;
+      CodeO << "    ";
+      if (EmitElse) CodeO << "} else ";
+      CodeO << II->first << " {\n";
+      CodeO << BodyMap[II->first];
+      EmitElse = true;
+    }
+
+    if (CodeO.str().empty()) continue;
+
+    O << "  case " << I->first << ":\n";
+    O << CodeO.str();
+    O << "    }\n";
+    O << "    break;\n";
+  }
+
+  O << "  }\n\n";
+
+  // Code that prints the alias, replacing the operands with the ones from the
+  // MCInst.
+  O << "  if (AsmString.empty()) return true;\n";
+  O << "  std::pair<StringRef, StringRef> ASM = AsmString.split(' ');\n";
+  O << "  OS << '\\t' << ASM.first;\n";
+
+  O << "  if (!ASM.second.empty()) {\n";
+  O << "    OS << '\\t';\n";
+  O << "    for (StringRef::iterator\n";
+  O << "         I = ASM.second.begin(), E = ASM.second.end(); I != E; ) {\n";
+  O << "      if (*I == '$') {\n";
+  O << "        StringRef::iterator Start = ++I;\n";
+  O << "        while (I != E &&\n";
+  O << "               ((*I >= 'a' && *I <= 'z') ||\n";
+  O << "                (*I >= 'A' && *I <= 'Z') ||\n";
+  O << "                (*I >= '0' && *I <= '9') ||\n";
+  O << "                *I == '_'))\n";
+  O << "          ++I;\n";
+  O << "        StringRef Name(Start, I - Start);\n";
+  O << "        printOperand(MI, OpMap[Name], OS);\n";
+  O << "      } else {\n";
+  O << "        OS << *I++;\n";
+  O << "      }\n";
+  O << "    }\n";
+  O << "  }\n\n";
+  
+  O << "  return false;\n";
+  O << "}\n\n";
+
+  O << "#endif // PRINT_ALIAS_INSTR\n";
+}
+
+void AsmWriterEmitter::run(raw_ostream &O) {
+  EmitSourceFileHeader("Assembly Writer Source Fragment", O);
+
+  EmitPrintInstruction(O);
+  EmitGetRegisterName(O);
+  EmitGetInstructionName(O);
+  EmitPrintAliasInstruction(O);
+}
+
diff --git a/final/utils/TableGen/AsmWriterEmitter.h b/final/utils/TableGen/AsmWriterEmitter.h
new file mode 100644
index 00000000000..5e8d6f5b7fe
--- /dev/null
+++ b/final/utils/TableGen/AsmWriterEmitter.h
@@ -0,0 +1,55 @@
+//===- AsmWriterEmitter.h - Generate an assembly writer ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting an assembly printer for the
+// code generator.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ASMWRITER_EMITTER_H
+#define ASMWRITER_EMITTER_H
+
+#include "TableGenBackend.h"
+#include <map>
+#include <vector>
+#include <cassert>
+
+namespace llvm {
+  class AsmWriterInst;
+  class CodeGenInstruction;
+  
+  class AsmWriterEmitter : public TableGenBackend {
+    RecordKeeper &Records;
+    std::map<const CodeGenInstruction*, AsmWriterInst*> CGIAWIMap;
+    std::vector<const CodeGenInstruction*> NumberedInstructions;
+  public:
+    AsmWriterEmitter(RecordKeeper &R) : Records(R) {}
+
+    // run - Output the asmwriter, returning true on failure.
+    void run(raw_ostream &o);
+
+private:
+    void EmitPrintInstruction(raw_ostream &o);
+    void EmitGetRegisterName(raw_ostream &o);
+    void EmitGetInstructionName(raw_ostream &o);
+    void EmitPrintAliasInstruction(raw_ostream &O);
+    
+    AsmWriterInst *getAsmWriterInstByID(unsigned ID) const {
+      assert(ID < NumberedInstructions.size());
+      std::map<const CodeGenInstruction*, AsmWriterInst*>::const_iterator I =
+        CGIAWIMap.find(NumberedInstructions[ID]);
+      assert(I != CGIAWIMap.end() && "Didn't find inst!");
+      return I->second;
+    }
+    void FindUniqueOperandCommands(std::vector<std::string> &UOC,
+                                   std::vector<unsigned> &InstIdxs,
+                                   std::vector<unsigned> &InstOpsUsed) const;
+  };
+}
+#endif
diff --git a/final/utils/TableGen/AsmWriterInst.cpp b/final/utils/TableGen/AsmWriterInst.cpp
new file mode 100644
index 00000000000..fdf447f2aaf
--- /dev/null
+++ b/final/utils/TableGen/AsmWriterInst.cpp
@@ -0,0 +1,233 @@
+//===- AsmWriterInst.h - Classes encapsulating a printable inst -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These classes implement a parser for assembly strings.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AsmWriterInst.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+#include "llvm/ADT/StringExtras.h"
+
+using namespace llvm;
+
+static bool isIdentChar(char C) {
+  return (C >= 'a' && C <= 'z') ||
+  (C >= 'A' && C <= 'Z') ||
+  (C >= '0' && C <= '9') ||
+  C == '_';
+}
+
+std::string AsmWriterOperand::getCode() const {
+  if (OperandType == isLiteralTextOperand) {
+    if (Str.size() == 1)
+      return "O << '" + Str + "'; ";
+    return "O << \"" + Str + "\"; ";
+  }
+  
+  if (OperandType == isLiteralStatementOperand)
+    return Str;
+  
+  std::string Result = Str + "(MI";
+  if (MIOpNo != ~0U)
+    Result += ", " + utostr(MIOpNo);
+  Result += ", O";
+  if (!MiModifier.empty())
+    Result += ", \"" + MiModifier + '"';
+  return Result + "); ";
+}
+
+/// ParseAsmString - Parse the specified Instruction's AsmString into this
+/// AsmWriterInst.
+///
+AsmWriterInst::AsmWriterInst(const CodeGenInstruction &CGI,
+                             unsigned Variant,
+                             int FirstOperandColumn,
+                             int OperandSpacing) {
+  this->CGI = &CGI;
+  
+  // This is the number of tabs we've seen if we're doing columnar layout.
+  unsigned CurColumn = 0;
+  
+  
+  // NOTE: Any extensions to this code need to be mirrored in the 
+  // AsmPrinter::printInlineAsm code that executes as compile time (assuming
+  // that inline asm strings should also get the new feature)!
+  std::string AsmString = CGI.FlattenAsmStringVariants(CGI.AsmString, Variant);
+  std::string::size_type LastEmitted = 0;
+  while (LastEmitted != AsmString.size()) {
+    std::string::size_type DollarPos =
+      AsmString.find_first_of("$\\", LastEmitted);
+    if (DollarPos == std::string::npos) DollarPos = AsmString.size();
+    
+    // Emit a constant string fragment.
+    if (DollarPos != LastEmitted) {
+      for (; LastEmitted != DollarPos; ++LastEmitted)
+        switch (AsmString[LastEmitted]) {
+          case '\n':
+            AddLiteralString("\\n");
+            break;
+          case '\t':
+            // If the asm writer is not using a columnar layout, \t is not
+            // magic.
+            if (FirstOperandColumn == -1 || OperandSpacing == -1) {
+              AddLiteralString("\\t");
+            } else {
+              // We recognize a tab as an operand delimeter.
+              unsigned DestColumn = FirstOperandColumn + 
+              CurColumn++ * OperandSpacing;
+              Operands.push_back(
+                AsmWriterOperand(
+                  "O.PadToColumn(" +
+                  utostr(DestColumn) + ");\n",
+                  AsmWriterOperand::isLiteralStatementOperand));
+            }
+            break;
+          case '"':
+            AddLiteralString("\\\"");
+            break;
+          case '\\':
+            AddLiteralString("\\\\");
+            break;
+          default:
+            AddLiteralString(std::string(1, AsmString[LastEmitted]));
+            break;
+        }
+    } else if (AsmString[DollarPos] == '\\') {
+      if (DollarPos+1 != AsmString.size()) {
+        if (AsmString[DollarPos+1] == 'n') {
+          AddLiteralString("\\n");
+        } else if (AsmString[DollarPos+1] == 't') {
+          // If the asm writer is not using a columnar layout, \t is not
+          // magic.
+          if (FirstOperandColumn == -1 || OperandSpacing == -1) {
+            AddLiteralString("\\t");
+            break;
+          }
+          
+          // We recognize a tab as an operand delimeter.
+          unsigned DestColumn = FirstOperandColumn + 
+          CurColumn++ * OperandSpacing;
+          Operands.push_back(
+            AsmWriterOperand("O.PadToColumn(" + utostr(DestColumn) + ");\n",
+                             AsmWriterOperand::isLiteralStatementOperand));
+          break;
+        } else if (std::string("${|}\\").find(AsmString[DollarPos+1]) 
+                   != std::string::npos) {
+          AddLiteralString(std::string(1, AsmString[DollarPos+1]));
+        } else {
+          throw "Non-supported escaped character found in instruction '" +
+          CGI.TheDef->getName() + "'!";
+        }
+        LastEmitted = DollarPos+2;
+        continue;
+      }
+    } else if (DollarPos+1 != AsmString.size() &&
+               AsmString[DollarPos+1] == '$') {
+      AddLiteralString("$");  // "$$" -> $
+      LastEmitted = DollarPos+2;
+    } else {
+      // Get the name of the variable.
+      std::string::size_type VarEnd = DollarPos+1;
+      
+      // handle ${foo}bar as $foo by detecting whether the character following
+      // the dollar sign is a curly brace.  If so, advance VarEnd and DollarPos
+      // so the variable name does not contain the leading curly brace.
+      bool hasCurlyBraces = false;
+      if (VarEnd < AsmString.size() && '{' == AsmString[VarEnd]) {
+        hasCurlyBraces = true;
+        ++DollarPos;
+        ++VarEnd;
+      }
+      
+      while (VarEnd < AsmString.size() && isIdentChar(AsmString[VarEnd]))
+        ++VarEnd;
+      std::string VarName(AsmString.begin()+DollarPos+1,
+                          AsmString.begin()+VarEnd);
+      
+      // Modifier - Support ${foo:modifier} syntax, where "modifier" is passed
+      // into printOperand.  Also support ${:feature}, which is passed into
+      // PrintSpecial.
+      std::string Modifier;
+      
+      // In order to avoid starting the next string at the terminating curly
+      // brace, advance the end position past it if we found an opening curly
+      // brace.
+      if (hasCurlyBraces) {
+        if (VarEnd >= AsmString.size())
+          throw "Reached end of string before terminating curly brace in '"
+          + CGI.TheDef->getName() + "'";
+        
+        // Look for a modifier string.
+        if (AsmString[VarEnd] == ':') {
+          ++VarEnd;
+          if (VarEnd >= AsmString.size())
+            throw "Reached end of string before terminating curly brace in '"
+            + CGI.TheDef->getName() + "'";
+          
+          unsigned ModifierStart = VarEnd;
+          while (VarEnd < AsmString.size() && isIdentChar(AsmString[VarEnd]))
+            ++VarEnd;
+          Modifier = std::string(AsmString.begin()+ModifierStart,
+                                 AsmString.begin()+VarEnd);
+          if (Modifier.empty())
+            throw "Bad operand modifier name in '"+ CGI.TheDef->getName() + "'";
+        }
+        
+        if (AsmString[VarEnd] != '}')
+          throw "Variable name beginning with '{' did not end with '}' in '"
+          + CGI.TheDef->getName() + "'";
+        ++VarEnd;
+      }
+      if (VarName.empty() && Modifier.empty())
+        throw "Stray '$' in '" + CGI.TheDef->getName() +
+        "' asm string, maybe you want $$?";
+      
+      if (VarName.empty()) {
+        // Just a modifier, pass this into PrintSpecial.
+        Operands.push_back(AsmWriterOperand("PrintSpecial", 
+                                            ~0U, 
+                                            ~0U, 
+                                            Modifier));
+      } else {
+        // Otherwise, normal operand.
+        unsigned OpNo = CGI.Operands.getOperandNamed(VarName);
+        CGIOperandList::OperandInfo OpInfo = CGI.Operands[OpNo];
+        
+        unsigned MIOp = OpInfo.MIOperandNo;
+        Operands.push_back(AsmWriterOperand(OpInfo.PrinterMethodName, 
+                                            OpNo, MIOp, Modifier));
+      }
+      LastEmitted = VarEnd;
+    }
+  }
+  
+  Operands.push_back(AsmWriterOperand("return;",
+    AsmWriterOperand::isLiteralStatementOperand));
+}
+
+/// MatchesAllButOneOp - If this instruction is exactly identical to the
+/// specified instruction except for one differing operand, return the differing
+/// operand number.  If more than one operand mismatches, return ~1, otherwise
+/// if the instructions are identical return ~0.
+unsigned AsmWriterInst::MatchesAllButOneOp(const AsmWriterInst &Other)const{
+  if (Operands.size() != Other.Operands.size()) return ~1;
+  
+  unsigned MismatchOperand = ~0U;
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+    if (Operands[i] != Other.Operands[i]) {
+      if (MismatchOperand != ~0U)  // Already have one mismatch?
+        return ~1U;
+      else
+        MismatchOperand = i;
+    }
+  }
+  return MismatchOperand;
+}
diff --git a/final/utils/TableGen/AsmWriterInst.h b/final/utils/TableGen/AsmWriterInst.h
new file mode 100644
index 00000000000..ec7d8eb1039
--- /dev/null
+++ b/final/utils/TableGen/AsmWriterInst.h
@@ -0,0 +1,113 @@
+//===- AsmWriterInst.h - Classes encapsulating a printable inst -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These classes implement a parser for assembly strings.  The parser splits
+// the string into operands, which can be literal strings (the constant bits of
+// the string), actual operands (i.e., operands from the MachineInstr), and
+// dynamically-generated text, specified by raw C++ code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ASMWRITER_INST_H
+#define ASMWRITER_INST_H
+
+#include <string>
+#include <vector>
+
+namespace llvm {
+  class CodeGenInstruction;
+  class Record;
+
+  struct AsmWriterOperand {
+    enum OpType {
+      // Output this text surrounded by quotes to the asm.
+      isLiteralTextOperand,
+      // This is the name of a routine to call to print the operand.
+      isMachineInstrOperand,
+      // Output this text verbatim to the asm writer.  It is code that
+      // will output some text to the asm.
+      isLiteralStatementOperand
+    } OperandType;
+
+    /// Str - For isLiteralTextOperand, this IS the literal text.  For
+    /// isMachineInstrOperand, this is the PrinterMethodName for the operand..
+    /// For isLiteralStatementOperand, this is the code to insert verbatim
+    /// into the asm writer.
+    std::string Str;
+
+    /// CGIOpNo - For isMachineInstrOperand, this is the index of the operand in
+    /// the CodeGenInstruction.
+    unsigned CGIOpNo;
+
+    /// MiOpNo - For isMachineInstrOperand, this is the operand number of the
+    /// machine instruction.
+    unsigned MIOpNo;
+
+    /// MiModifier - For isMachineInstrOperand, this is the modifier string for
+    /// an operand, specified with syntax like ${opname:modifier}.
+    std::string MiModifier;
+
+    // To make VS STL happy
+    AsmWriterOperand(OpType op = isLiteralTextOperand):OperandType(op) {}
+
+    AsmWriterOperand(const std::string &LitStr,
+                     OpType op = isLiteralTextOperand)
+    : OperandType(op), Str(LitStr) {}
+
+    AsmWriterOperand(const std::string &Printer,
+                     unsigned _CGIOpNo,
+                     unsigned _MIOpNo,
+                     const std::string &Modifier,
+                     OpType op = isMachineInstrOperand)
+    : OperandType(op), Str(Printer), CGIOpNo(_CGIOpNo), MIOpNo(_MIOpNo),
+    MiModifier(Modifier) {}
+
+    bool operator!=(const AsmWriterOperand &Other) const {
+      if (OperandType != Other.OperandType || Str != Other.Str) return true;
+      if (OperandType == isMachineInstrOperand)
+        return MIOpNo != Other.MIOpNo || MiModifier != Other.MiModifier;
+      return false;
+    }
+    bool operator==(const AsmWriterOperand &Other) const {
+      return !operator!=(Other);
+    }
+
+    /// getCode - Return the code that prints this operand.
+    std::string getCode() const;
+  };
+
+  class AsmWriterInst {
+  public:
+    std::vector<AsmWriterOperand> Operands;
+    const CodeGenInstruction *CGI;
+
+    AsmWriterInst(const CodeGenInstruction &CGI,
+                  unsigned Variant,
+                  int FirstOperandColumn,
+                  int OperandSpacing);
+
+    /// MatchesAllButOneOp - If this instruction is exactly identical to the
+    /// specified instruction except for one differing operand, return the
+    /// differing operand number.  Otherwise return ~0.
+    unsigned MatchesAllButOneOp(const AsmWriterInst &Other) const;
+
+  private:
+    void AddLiteralString(const std::string &Str) {
+      // If the last operand was already a literal text string, append this to
+      // it, otherwise add a new operand.
+      if (!Operands.empty() &&
+          Operands.back().OperandType == AsmWriterOperand::isLiteralTextOperand)
+        Operands.back().Str.append(Str);
+      else
+        Operands.push_back(AsmWriterOperand(Str));
+    }
+  };
+}
+
+#endif
diff --git a/final/utils/TableGen/CMakeLists.txt b/final/utils/TableGen/CMakeLists.txt
new file mode 100644
index 00000000000..514b1912992
--- /dev/null
+++ b/final/utils/TableGen/CMakeLists.txt
@@ -0,0 +1,56 @@
+set(LLVM_REQUIRES_EH 1)
+set(LLVM_REQUIRES_RTTI 1)
+
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_TOOLS_BINARY_DIR})
+
+add_llvm_utility(tblgen
+  ARMDecoderEmitter.cpp
+  AsmMatcherEmitter.cpp
+  AsmWriterEmitter.cpp
+  AsmWriterInst.cpp
+  CallingConvEmitter.cpp
+  ClangASTNodesEmitter.cpp
+  ClangAttrEmitter.cpp
+  ClangDiagnosticsEmitter.cpp
+  ClangSACheckersEmitter.cpp
+  CodeEmitterGen.cpp
+  CodeGenDAGPatterns.cpp
+  CodeGenInstruction.cpp
+  CodeGenTarget.cpp
+  DAGISelEmitter.cpp
+  DAGISelMatcherEmitter.cpp
+  DAGISelMatcherGen.cpp
+  DAGISelMatcherOpt.cpp
+  DAGISelMatcher.cpp
+  DisassemblerEmitter.cpp
+  EDEmitter.cpp
+  FastISelEmitter.cpp
+  FixedLenDecoderEmitter.cpp
+  InstrEnumEmitter.cpp
+  InstrInfoEmitter.cpp
+  IntrinsicEmitter.cpp
+  LLVMCConfigurationEmitter.cpp
+  NeonEmitter.cpp
+  OptParserEmitter.cpp
+  Record.cpp
+  RegisterInfoEmitter.cpp
+  StringMatcher.cpp
+  SubtargetEmitter.cpp
+  TGLexer.cpp
+  TGParser.cpp
+  TGValueTypes.cpp
+  TableGen.cpp
+  TableGenBackend.cpp
+  X86DisassemblerTables.cpp
+  X86RecognizableInstr.cpp
+  )
+
+target_link_libraries(tblgen LLVMSupport)
+if( MINGW )
+  target_link_libraries(tblgen imagehlp psapi)
+endif( MINGW )
+if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD AND NOT BEOS )
+  target_link_libraries(tblgen pthread)
+endif()
+
+install(TARGETS tblgen RUNTIME DESTINATION bin)
diff --git a/final/utils/TableGen/CallingConvEmitter.cpp b/final/utils/TableGen/CallingConvEmitter.cpp
new file mode 100644
index 00000000000..c51afd82a37
--- /dev/null
+++ b/final/utils/TableGen/CallingConvEmitter.cpp
@@ -0,0 +1,212 @@
+//===- CallingConvEmitter.cpp - Generate calling conventions --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting descriptions of the calling
+// conventions supported by this target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CallingConvEmitter.h"
+#include "Record.h"
+#include "CodeGenTarget.h"
+using namespace llvm;
+
+void CallingConvEmitter::run(raw_ostream &O) {
+  EmitSourceFileHeader("Calling Convention Implementation Fragment", O);
+
+  std::vector<Record*> CCs = Records.getAllDerivedDefinitions("CallingConv");
+  
+  // Emit prototypes for all of the CC's so that they can forward ref each
+  // other.
+  for (unsigned i = 0, e = CCs.size(); i != e; ++i) {
+    O << "static bool " << CCs[i]->getName()
+      << "(unsigned ValNo, MVT ValVT,\n"
+      << std::string(CCs[i]->getName().size()+13, ' ')
+      << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
+      << std::string(CCs[i]->getName().size()+13, ' ')
+      << "ISD::ArgFlagsTy ArgFlags, CCState &State);\n";
+  }
+  
+  // Emit each calling convention description in full.
+  for (unsigned i = 0, e = CCs.size(); i != e; ++i)
+    EmitCallingConv(CCs[i], O);
+}
+
+
+void CallingConvEmitter::EmitCallingConv(Record *CC, raw_ostream &O) {
+  ListInit *CCActions = CC->getValueAsListInit("Actions");
+  Counter = 0;
+
+  O << "\n\nstatic bool " << CC->getName()
+    << "(unsigned ValNo, MVT ValVT,\n"
+    << std::string(CC->getName().size()+13, ' ')
+    << "MVT LocVT, CCValAssign::LocInfo LocInfo,\n"
+    << std::string(CC->getName().size()+13, ' ')
+    << "ISD::ArgFlagsTy ArgFlags, CCState &State) {\n";
+  // Emit all of the actions, in order.
+  for (unsigned i = 0, e = CCActions->getSize(); i != e; ++i) {
+    O << "\n";
+    EmitAction(CCActions->getElementAsRecord(i), 2, O);
+  }
+  
+  O << "\n  return true;  // CC didn't match.\n";
+  O << "}\n";
+}
+
+void CallingConvEmitter::EmitAction(Record *Action,
+                                    unsigned Indent, raw_ostream &O) {
+  std::string IndentStr = std::string(Indent, ' ');
+  
+  if (Action->isSubClassOf("CCPredicateAction")) {
+    O << IndentStr << "if (";
+    
+    if (Action->isSubClassOf("CCIfType")) {
+      ListInit *VTs = Action->getValueAsListInit("VTs");
+      for (unsigned i = 0, e = VTs->getSize(); i != e; ++i) {
+        Record *VT = VTs->getElementAsRecord(i);
+        if (i != 0) O << " ||\n    " << IndentStr;
+        O << "LocVT == " << getEnumName(getValueType(VT));
+      }
+
+    } else if (Action->isSubClassOf("CCIf")) {
+      O << Action->getValueAsString("Predicate");
+    } else {
+      Action->dump();
+      throw "Unknown CCPredicateAction!";
+    }
+    
+    O << ") {\n";
+    EmitAction(Action->getValueAsDef("SubAction"), Indent+2, O);
+    O << IndentStr << "}\n";
+  } else {
+    if (Action->isSubClassOf("CCDelegateTo")) {
+      Record *CC = Action->getValueAsDef("CC");
+      O << IndentStr << "if (!" << CC->getName()
+        << "(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))\n"
+        << IndentStr << "  return false;\n";
+    } else if (Action->isSubClassOf("CCAssignToReg")) {
+      ListInit *RegList = Action->getValueAsListInit("RegList");
+      if (RegList->getSize() == 1) {
+        O << IndentStr << "if (unsigned Reg = State.AllocateReg(";
+        O << getQualifiedName(RegList->getElementAsRecord(0)) << ")) {\n";
+      } else {
+        O << IndentStr << "static const unsigned RegList" << ++Counter
+          << "[] = {\n";
+        O << IndentStr << "  ";
+        for (unsigned i = 0, e = RegList->getSize(); i != e; ++i) {
+          if (i != 0) O << ", ";
+          O << getQualifiedName(RegList->getElementAsRecord(i));
+        }
+        O << "\n" << IndentStr << "};\n";
+        O << IndentStr << "if (unsigned Reg = State.AllocateReg(RegList"
+          << Counter << ", " << RegList->getSize() << ")) {\n";
+      }
+      O << IndentStr << "  State.addLoc(CCValAssign::getReg(ValNo, ValVT, "
+        << "Reg, LocVT, LocInfo));\n";
+      O << IndentStr << "  return false;\n";
+      O << IndentStr << "}\n";
+    } else if (Action->isSubClassOf("CCAssignToRegWithShadow")) {
+      ListInit *RegList = Action->getValueAsListInit("RegList");
+      ListInit *ShadowRegList = Action->getValueAsListInit("ShadowRegList");
+      if (ShadowRegList->getSize() >0 &&
+          ShadowRegList->getSize() != RegList->getSize())
+        throw "Invalid length of list of shadowed registers";
+
+      if (RegList->getSize() == 1) {
+        O << IndentStr << "if (unsigned Reg = State.AllocateReg(";
+        O << getQualifiedName(RegList->getElementAsRecord(0));
+        O << ", " << getQualifiedName(ShadowRegList->getElementAsRecord(0));
+        O << ")) {\n";
+      } else {
+        unsigned RegListNumber = ++Counter;
+        unsigned ShadowRegListNumber = ++Counter;
+
+        O << IndentStr << "static const unsigned RegList" << RegListNumber
+          << "[] = {\n";
+        O << IndentStr << "  ";
+        for (unsigned i = 0, e = RegList->getSize(); i != e; ++i) {
+          if (i != 0) O << ", ";
+          O << getQualifiedName(RegList->getElementAsRecord(i));
+        }
+        O << "\n" << IndentStr << "};\n";
+
+        O << IndentStr << "static const unsigned RegList"
+          << ShadowRegListNumber << "[] = {\n";
+        O << IndentStr << "  ";
+        for (unsigned i = 0, e = ShadowRegList->getSize(); i != e; ++i) {
+          if (i != 0) O << ", ";
+          O << getQualifiedName(ShadowRegList->getElementAsRecord(i));
+        }
+        O << "\n" << IndentStr << "};\n";
+
+        O << IndentStr << "if (unsigned Reg = State.AllocateReg(RegList"
+          << RegListNumber << ", " << "RegList" << ShadowRegListNumber
+          << ", " << RegList->getSize() << ")) {\n";
+      }
+      O << IndentStr << "  State.addLoc(CCValAssign::getReg(ValNo, ValVT, "
+        << "Reg, LocVT, LocInfo));\n";
+      O << IndentStr << "  return false;\n";
+      O << IndentStr << "}\n";
+    } else if (Action->isSubClassOf("CCAssignToStack")) {
+      int Size = Action->getValueAsInt("Size");
+      int Align = Action->getValueAsInt("Align");
+
+      O << IndentStr << "unsigned Offset" << ++Counter
+        << " = State.AllocateStack(";
+      if (Size)
+        O << Size << ", ";
+      else
+        O << "\n" << IndentStr << "  State.getTarget().getTargetData()"
+          "->getTypeAllocSize(EVT(LocVT).getTypeForEVT(State.getContext())), ";
+      if (Align)
+        O << Align;
+      else
+        O << "\n" << IndentStr << "  State.getTarget().getTargetData()"
+          "->getABITypeAlignment(EVT(LocVT).getTypeForEVT(State.getContext()))";
+      if (Action->isSubClassOf("CCAssignToStackWithShadow"))
+        O << ", " << getQualifiedName(Action->getValueAsDef("ShadowReg"));
+      O << ");\n" << IndentStr
+        << "State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset"
+        << Counter << ", LocVT, LocInfo));\n";
+      O << IndentStr << "return false;\n";
+    } else if (Action->isSubClassOf("CCPromoteToType")) {
+      Record *DestTy = Action->getValueAsDef("DestTy");
+      O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n";
+      O << IndentStr << "if (ArgFlags.isSExt())\n"
+        << IndentStr << IndentStr << "LocInfo = CCValAssign::SExt;\n"
+        << IndentStr << "else if (ArgFlags.isZExt())\n"
+        << IndentStr << IndentStr << "LocInfo = CCValAssign::ZExt;\n"
+        << IndentStr << "else\n"
+        << IndentStr << IndentStr << "LocInfo = CCValAssign::AExt;\n";
+    } else if (Action->isSubClassOf("CCBitConvertToType")) {
+      Record *DestTy = Action->getValueAsDef("DestTy");
+      O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n";
+      O << IndentStr << "LocInfo = CCValAssign::BCvt;\n";
+    } else if (Action->isSubClassOf("CCPassIndirect")) {
+      Record *DestTy = Action->getValueAsDef("DestTy");
+      O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n";
+      O << IndentStr << "LocInfo = CCValAssign::Indirect;\n";
+    } else if (Action->isSubClassOf("CCPassByVal")) {
+      int Size = Action->getValueAsInt("Size");
+      int Align = Action->getValueAsInt("Align");
+      O << IndentStr
+        << "State.HandleByVal(ValNo, ValVT, LocVT, LocInfo, "
+        << Size << ", " << Align << ", ArgFlags);\n";
+      O << IndentStr << "return false;\n";
+    } else if (Action->isSubClassOf("CCCustom")) {
+      O << IndentStr
+        << "if (" << Action->getValueAsString("FuncName") << "(ValNo, ValVT, "
+        << "LocVT, LocInfo, ArgFlags, State))\n";
+      O << IndentStr << IndentStr << "return false;\n";
+    } else {
+      Action->dump();
+      throw "Unknown CCAction!";
+    }
+  }
+}
diff --git a/final/utils/TableGen/CallingConvEmitter.h b/final/utils/TableGen/CallingConvEmitter.h
new file mode 100644
index 00000000000..7fc2507e888
--- /dev/null
+++ b/final/utils/TableGen/CallingConvEmitter.h
@@ -0,0 +1,38 @@
+//===- CallingConvEmitter.h - Generate calling conventions ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting descriptions of the calling
+// conventions supported by this target.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CALLINGCONV_EMITTER_H
+#define CALLINGCONV_EMITTER_H
+
+#include "TableGenBackend.h"
+#include <map>
+#include <vector>
+#include <cassert>
+
+namespace llvm {
+  class CallingConvEmitter : public TableGenBackend {
+    RecordKeeper &Records;
+  public:
+    explicit CallingConvEmitter(RecordKeeper &R) : Records(R) {}
+
+    // run - Output the asmwriter, returning true on failure.
+    void run(raw_ostream &o);
+    
+  private:
+    void EmitCallingConv(Record *CC, raw_ostream &O);
+    void EmitAction(Record *Action, unsigned Indent, raw_ostream &O);
+    unsigned Counter;
+  };
+}
+#endif
diff --git a/final/utils/TableGen/ClangASTNodesEmitter.cpp b/final/utils/TableGen/ClangASTNodesEmitter.cpp
new file mode 100644
index 00000000000..187ab467999
--- /dev/null
+++ b/final/utils/TableGen/ClangASTNodesEmitter.cpp
@@ -0,0 +1,165 @@
+//=== ClangASTNodesEmitter.cpp - Generate Clang AST node tables -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These tablegen backends emit Clang AST node tables
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangASTNodesEmitter.h"
+#include <set>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Statement Node Tables (.inc file) generation.
+//===----------------------------------------------------------------------===//
+
+// Returns the first and last non-abstract subrecords
+// Called recursively to ensure that nodes remain contiguous
+std::pair<Record *, Record *> ClangASTNodesEmitter::EmitNode(
+                                                           const ChildMap &Tree,
+                                                           raw_ostream &OS,
+                                                           Record *Base) {
+  std::string BaseName = macroName(Base->getName());
+
+  ChildIterator i = Tree.lower_bound(Base), e = Tree.upper_bound(Base);
+
+  Record *First = 0, *Last = 0;
+  // This might be the pseudo-node for Stmt; don't assume it has an Abstract
+  // bit
+  if (Base->getValue("Abstract") && !Base->getValueAsBit("Abstract"))
+    First = Last = Base;
+
+  for (; i != e; ++i) {
+    Record *R = i->second;
+    bool Abstract = R->getValueAsBit("Abstract");
+    std::string NodeName = macroName(R->getName());
+
+    OS << "#ifndef " << NodeName << "\n";
+    OS << "#  define " << NodeName << "(Type, Base) "
+        << BaseName << "(Type, Base)\n";
+    OS << "#endif\n";
+
+    if (Abstract)
+      OS << "ABSTRACT_" << macroName(Root.getName()) << "(" << NodeName << "("
+          << R->getName() << ", " << baseName(*Base) << "))\n";
+    else
+      OS << NodeName << "(" << R->getName() << ", "
+          << baseName(*Base) << ")\n";
+
+    if (Tree.find(R) != Tree.end()) {
+      const std::pair<Record *, Record *> &Result
+        = EmitNode(Tree, OS, R);
+      if (!First && Result.first)
+        First = Result.first;
+      if (Result.second)
+        Last = Result.second;
+    } else {
+      if (!Abstract) {
+        Last = R;
+
+        if (!First)
+          First = R;
+      }
+    }
+
+    OS << "#undef " << NodeName << "\n\n";
+  }
+
+  if (First) {
+    assert (Last && "Got a first node but not a last node for a range!");
+    if (Base == &Root)
+      OS << "LAST_" << macroName(Root.getName()) << "_RANGE(";
+    else
+      OS << macroName(Root.getName()) << "_RANGE(";
+    OS << Base->getName() << ", " << First->getName() << ", "
+       << Last->getName() << ")\n\n";
+  }
+
+  return std::make_pair(First, Last);
+}
+
+void ClangASTNodesEmitter::run(raw_ostream &OS) {
+  // Write the preamble
+  OS << "#ifndef ABSTRACT_" << macroName(Root.getName()) << "\n";
+  OS << "#  define ABSTRACT_" << macroName(Root.getName()) << "(Type) Type\n";
+  OS << "#endif\n";
+
+  OS << "#ifndef " << macroName(Root.getName()) << "_RANGE\n";
+  OS << "#  define "
+     << macroName(Root.getName()) << "_RANGE(Base, First, Last)\n";
+  OS << "#endif\n\n";
+
+  OS << "#ifndef LAST_" << macroName(Root.getName()) << "_RANGE\n";
+  OS << "#  define LAST_" 
+     << macroName(Root.getName()) << "_RANGE(Base, First, Last) " 
+     << macroName(Root.getName()) << "_RANGE(Base, First, Last)\n";
+  OS << "#endif\n\n";
+ 
+  // Emit statements
+  const std::vector<Record*> Stmts
+    = Records.getAllDerivedDefinitions(Root.getName());
+
+  ChildMap Tree;
+
+  for (unsigned i = 0, e = Stmts.size(); i != e; ++i) {
+    Record *R = Stmts[i];
+
+    if (R->getValue("Base"))
+      Tree.insert(std::make_pair(R->getValueAsDef("Base"), R));
+    else
+      Tree.insert(std::make_pair(&Root, R));
+  }
+
+  EmitNode(Tree, OS, &Root);
+
+  OS << "#undef " << macroName(Root.getName()) << "\n";
+  OS << "#undef " << macroName(Root.getName()) << "_RANGE\n";
+  OS << "#undef LAST_" << macroName(Root.getName()) << "_RANGE\n";
+  OS << "#undef ABSTRACT_" << macroName(Root.getName()) << "\n";
+}
+
+void ClangDeclContextEmitter::run(raw_ostream &OS) {
+  // FIXME: Find a .td file format to allow for this to be represented better.
+
+  OS << "#ifndef DECL_CONTEXT\n";
+  OS << "#  define DECL_CONTEXT(DECL)\n";
+  OS << "#endif\n";
+  
+  OS << "#ifndef DECL_CONTEXT_BASE\n";
+  OS << "#  define DECL_CONTEXT_BASE(DECL) DECL_CONTEXT(DECL)\n";
+  OS << "#endif\n";
+  
+  typedef std::set<Record*> RecordSet;
+  typedef std::vector<Record*> RecordVector;
+  
+  RecordVector DeclContextsVector
+    = Records.getAllDerivedDefinitions("DeclContext");
+  RecordVector Decls = Records.getAllDerivedDefinitions("Decl");
+  RecordSet DeclContexts (DeclContextsVector.begin(), DeclContextsVector.end());
+   
+  for (RecordVector::iterator i = Decls.begin(), e = Decls.end(); i != e; ++i) {
+    Record *R = *i;
+
+    if (R->getValue("Base")) {
+      Record *B = R->getValueAsDef("Base");
+      if (DeclContexts.find(B) != DeclContexts.end()) {
+        OS << "DECL_CONTEXT_BASE(" << B->getName() << ")\n";
+        DeclContexts.erase(B);
+      }
+    }
+  }
+
+  for (RecordSet::iterator i = DeclContexts.begin(), e = DeclContexts.end();
+       i != e; ++i) {
+    OS << "DECL_CONTEXT(" << (*i)->getName() << ")\n";
+  }
+
+  OS << "#undef DECL_CONTEXT\n";
+  OS << "#undef DECL_CONTEXT_BASE\n";
+}
diff --git a/final/utils/TableGen/ClangASTNodesEmitter.h b/final/utils/TableGen/ClangASTNodesEmitter.h
new file mode 100644
index 00000000000..712333bd2d2
--- /dev/null
+++ b/final/utils/TableGen/ClangASTNodesEmitter.h
@@ -0,0 +1,84 @@
+//===- ClangASTNodesEmitter.h - Generate Clang AST node tables -*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These tablegen backends emit Clang AST node tables
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANGAST_EMITTER_H
+#define CLANGAST_EMITTER_H
+
+#include "TableGenBackend.h"
+#include "Record.h"
+#include <string>
+#include <cctype>
+#include <map>
+
+namespace llvm {
+
+/// ClangASTNodesEmitter - The top-level class emits .inc files containing
+///  declarations of Clang statements.
+///
+class ClangASTNodesEmitter : public TableGenBackend {
+  // A map from a node to each of its derived nodes.
+  typedef std::multimap<Record*, Record*> ChildMap;
+  typedef ChildMap::const_iterator ChildIterator;
+
+  RecordKeeper &Records;
+  Record Root;
+  const std::string &BaseSuffix;
+
+  // Create a macro-ized version of a name
+  static std::string macroName(std::string S) {
+    for (unsigned i = 0; i < S.size(); ++i)
+      S[i] = std::toupper(S[i]);
+
+    return S;
+  }
+
+  // Return the name to be printed in the base field. Normally this is
+  // the record's name plus the base suffix, but if it is the root node and
+  // the suffix is non-empty, it's just the suffix.
+  std::string baseName(Record &R) {
+    if (&R == &Root && !BaseSuffix.empty())
+      return BaseSuffix;
+    
+    return R.getName() + BaseSuffix;
+  }
+
+  std::pair<Record *, Record *> EmitNode (const ChildMap &Tree, raw_ostream& OS,
+                                          Record *Base);
+public:
+  explicit ClangASTNodesEmitter(RecordKeeper &R, const std::string &N,
+                                const std::string &S)
+    : Records(R), Root(N, SMLoc(), R), BaseSuffix(S)
+    {}
+
+  // run - Output the .inc file contents
+  void run(raw_ostream &OS);
+};
+
+/// ClangDeclContextEmitter - Emits an addendum to a .inc file to enumerate the
+/// clang declaration contexts.
+///
+class ClangDeclContextEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+
+public:
+  explicit ClangDeclContextEmitter(RecordKeeper &R)
+    : Records(R)
+  {}
+
+  // run - Output the .inc file contents
+  void run(raw_ostream &OS);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/ClangAttrEmitter.cpp b/final/utils/TableGen/ClangAttrEmitter.cpp
new file mode 100644
index 00000000000..f8bc07ee150
--- /dev/null
+++ b/final/utils/TableGen/ClangAttrEmitter.cpp
@@ -0,0 +1,708 @@
+//===- ClangAttrEmitter.cpp - Generate Clang attribute handling =-*- C++ -*--=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These tablegen backends emit Clang attribute processing code
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangAttrEmitter.h"
+#include "Record.h"
+#include "llvm/ADT/StringSwitch.h"
+#include <algorithm>
+#include <cctype>
+
+using namespace llvm;
+
+static const std::vector<StringRef>
+getValueAsListOfStrings(Record &R, StringRef FieldName) {
+  ListInit *List = R.getValueAsListInit(FieldName);
+  assert (List && "Got a null ListInit");
+
+  std::vector<StringRef> Strings;
+  Strings.reserve(List->getSize());
+
+  for (ListInit::iterator i = List->begin(), e = List->end(); i != e; ++i) {
+    assert(*i && "Got a null element in a ListInit");
+    if (StringInit *S = dynamic_cast<StringInit *>(*i))
+      Strings.push_back(S->getValue());
+    else if (CodeInit *C = dynamic_cast<CodeInit *>(*i))
+      Strings.push_back(C->getValue());
+    else
+      assert(false && "Got a non-string, non-code element in a ListInit");
+  }
+
+  return Strings;
+}
+
+std::string ReadPCHRecord(StringRef type) {
+  return StringSwitch<std::string>(type)
+    .EndsWith("Decl *", "cast_or_null<" + std::string(type, 0, type.size()-1) +
+              ">(GetDecl(Record[Idx++]))")
+    .Case("QualType", "GetType(Record[Idx++])")
+    .Case("Expr *", "ReadSubExpr()")
+    .Default("Record[Idx++]");
+}
+
+// Assumes that the way to get the value is SA->getname()
+std::string WritePCHRecord(StringRef type, StringRef name) {
+  return StringSwitch<std::string>(type)
+    .EndsWith("Decl *", "AddDeclRef(" + std::string(name) +
+                        ", Record);\n")
+    .Case("QualType", "AddTypeRef(" + std::string(name) + ", Record);\n")
+    .Case("Expr *", "AddStmt(" + std::string(name) + ");\n")
+    .Default("Record.push_back(" + std::string(name) + ");\n");
+}
+
+namespace {
+  class Argument {
+    std::string lowerName, upperName;
+    StringRef attrName;
+
+  public:
+    Argument(Record &Arg, StringRef Attr)
+      : lowerName(Arg.getValueAsString("Name")), upperName(lowerName),
+        attrName(Attr) {
+      if (!lowerName.empty()) {
+        lowerName[0] = std::tolower(lowerName[0]);
+        upperName[0] = std::toupper(upperName[0]);
+      }
+    }
+    virtual ~Argument() {}
+
+    StringRef getLowerName() const { return lowerName; }
+    StringRef getUpperName() const { return upperName; }
+    StringRef getAttrName() const { return attrName; }
+
+    // These functions print the argument contents formatted in different ways.
+    virtual void writeAccessors(raw_ostream &OS) const = 0;
+    virtual void writeAccessorDefinitions(raw_ostream &OS) const {}
+    virtual void writeCloneArgs(raw_ostream &OS) const = 0;
+    virtual void writeCtorBody(raw_ostream &OS) const {}
+    virtual void writeCtorInitializers(raw_ostream &OS) const = 0;
+    virtual void writeCtorParameters(raw_ostream &OS) const = 0;
+    virtual void writeDeclarations(raw_ostream &OS) const = 0;
+    virtual void writePCHReadArgs(raw_ostream &OS) const = 0;
+    virtual void writePCHReadDecls(raw_ostream &OS) const = 0;
+    virtual void writePCHWrite(raw_ostream &OS) const = 0;
+  };
+
+  class SimpleArgument : public Argument {
+    std::string type;
+
+  public:
+    SimpleArgument(Record &Arg, StringRef Attr, std::string T)
+      : Argument(Arg, Attr), type(T)
+    {}
+
+    void writeAccessors(raw_ostream &OS) const {
+      OS << "  " << type << " get" << getUpperName() << "() const {\n";
+      OS << "    return " << getLowerName() << ";\n";
+      OS << "  }";
+    }
+    void writeCloneArgs(raw_ostream &OS) const {
+      OS << getLowerName();
+    }
+    void writeCtorInitializers(raw_ostream &OS) const {
+      OS << getLowerName() << "(" << getUpperName() << ")";
+    }
+    void writeCtorParameters(raw_ostream &OS) const {
+      OS << type << " " << getUpperName();
+    }
+    void writeDeclarations(raw_ostream &OS) const {
+      OS << type << " " << getLowerName() << ";";
+    }
+    void writePCHReadDecls(raw_ostream &OS) const {
+      std::string read = ReadPCHRecord(type);
+      OS << "    " << type << " " << getLowerName() << " = " << read << ";\n";
+    }
+    void writePCHReadArgs(raw_ostream &OS) const {
+      OS << getLowerName();
+    }
+    void writePCHWrite(raw_ostream &OS) const {
+      OS << "    " << WritePCHRecord(type, "SA->get" +
+                                           std::string(getUpperName()) + "()");
+    }
+  };
+
+  class StringArgument : public Argument {
+  public:
+    StringArgument(Record &Arg, StringRef Attr)
+      : Argument(Arg, Attr)
+    {}
+
+    void writeAccessors(raw_ostream &OS) const {
+      OS << "  llvm::StringRef get" << getUpperName() << "() const {\n";
+      OS << "    return llvm::StringRef(" << getLowerName() << ", "
+         << getLowerName() << "Length);\n";
+      OS << "  }\n";
+      OS << "  unsigned get" << getUpperName() << "Length() const {\n";
+      OS << "    return " << getLowerName() << "Length;\n";
+      OS << "  }\n";
+      OS << "  void set" << getUpperName()
+         << "(ASTContext &C, llvm::StringRef S) {\n";
+      OS << "    " << getLowerName() << "Length = S.size();\n";
+      OS << "    this->" << getLowerName() << " = new (C, 1) char ["
+         << getLowerName() << "Length];\n";
+      OS << "    std::memcpy(this->" << getLowerName() << ", S.data(), "
+         << getLowerName() << "Length);\n";
+      OS << "  }";
+    }
+    void writeCloneArgs(raw_ostream &OS) const {
+      OS << "get" << getUpperName() << "()";
+    }
+    void writeCtorBody(raw_ostream &OS) const {
+      OS << "      std::memcpy(" << getLowerName() << ", " << getUpperName()
+         << ".data(), " << getLowerName() << "Length);";
+    }
+    void writeCtorInitializers(raw_ostream &OS) const {
+      OS << getLowerName() << "Length(" << getUpperName() << ".size()),"
+         << getLowerName() << "(new (Ctx, 1) char[" << getLowerName()
+         << "Length])";
+    }
+    void writeCtorParameters(raw_ostream &OS) const {
+      OS << "llvm::StringRef " << getUpperName();
+    }
+    void writeDeclarations(raw_ostream &OS) const {
+      OS << "unsigned " << getLowerName() << "Length;\n";
+      OS << "char *" << getLowerName() << ";";
+    }
+    void writePCHReadDecls(raw_ostream &OS) const {
+      OS << "    std::string " << getLowerName()
+         << "= ReadString(Record, Idx);\n";
+    }
+    void writePCHReadArgs(raw_ostream &OS) const {
+      OS << getLowerName();
+    }
+    void writePCHWrite(raw_ostream &OS) const {
+      OS << "    AddString(SA->get" << getUpperName() << "(), Record);\n";
+    }
+  };
+
+  class AlignedArgument : public Argument {
+  public:
+    AlignedArgument(Record &Arg, StringRef Attr)
+      : Argument(Arg, Attr)
+    {}
+
+    void writeAccessors(raw_ostream &OS) const {
+      OS << "  bool is" << getUpperName() << "Dependent() const;\n";
+
+      OS << "  unsigned get" << getUpperName() << "(ASTContext &Ctx) const;\n";
+
+      OS << "  bool is" << getUpperName() << "Expr() const {\n";
+      OS << "    return is" << getLowerName() << "Expr;\n";
+      OS << "  }\n";
+
+      OS << "  Expr *get" << getUpperName() << "Expr() const {\n";
+      OS << "    assert(is" << getLowerName() << "Expr);\n";
+      OS << "    return " << getLowerName() << "Expr;\n";
+      OS << "  }\n";
+
+      OS << "  TypeSourceInfo *get" << getUpperName() << "Type() const {\n";
+      OS << "    assert(!is" << getLowerName() << "Expr);\n";
+      OS << "    return " << getLowerName() << "Type;\n";
+      OS << "  }";
+    }
+    void writeAccessorDefinitions(raw_ostream &OS) const {
+      OS << "bool " << getAttrName() << "Attr::is" << getUpperName()
+         << "Dependent() const {\n";
+      OS << "  if (is" << getLowerName() << "Expr)\n";
+      OS << "    return " << getLowerName() << "Expr && (" << getLowerName()
+         << "Expr->isValueDependent() || " << getLowerName()
+         << "Expr->isTypeDependent());\n"; 
+      OS << "  else\n";
+      OS << "    return " << getLowerName()
+         << "Type->getType()->isDependentType();\n";
+      OS << "}\n";
+
+      // FIXME: Do not do the calculation here
+      // FIXME: Handle types correctly
+      // A null pointer means maximum alignment
+      // FIXME: Load the platform-specific maximum alignment, rather than
+      //        16, the x86 max.
+      OS << "unsigned " << getAttrName() << "Attr::get" << getUpperName()
+         << "(ASTContext &Ctx) const {\n";
+      OS << "  assert(!is" << getUpperName() << "Dependent());\n";
+      OS << "  if (is" << getLowerName() << "Expr)\n";
+      OS << "    return (" << getLowerName() << "Expr ? " << getLowerName()
+         << "Expr->EvaluateAsInt(Ctx).getZExtValue() : 16)"
+         << "* Ctx.getCharWidth();\n";
+      OS << "  else\n";
+      OS << "    return 0; // FIXME\n";
+      OS << "}\n";
+    }
+    void writeCloneArgs(raw_ostream &OS) const {
+      OS << "is" << getLowerName() << "Expr, is" << getLowerName()
+         << "Expr ? static_cast<void*>(" << getLowerName()
+         << "Expr) : " << getLowerName()
+         << "Type";
+    }
+    void writeCtorBody(raw_ostream &OS) const {
+      OS << "    if (is" << getLowerName() << "Expr)\n";
+      OS << "       " << getLowerName() << "Expr = reinterpret_cast<Expr *>("
+         << getUpperName() << ");\n";
+      OS << "    else\n";
+      OS << "       " << getLowerName()
+         << "Type = reinterpret_cast<TypeSourceInfo *>(" << getUpperName()
+         << ");";
+    }
+    void writeCtorInitializers(raw_ostream &OS) const {
+      OS << "is" << getLowerName() << "Expr(Is" << getUpperName() << "Expr)";
+    }
+    void writeCtorParameters(raw_ostream &OS) const {
+      OS << "bool Is" << getUpperName() << "Expr, void *" << getUpperName();
+    }
+    void writeDeclarations(raw_ostream &OS) const {
+      OS << "bool is" << getLowerName() << "Expr;\n";
+      OS << "union {\n";
+      OS << "Expr *" << getLowerName() << "Expr;\n";
+      OS << "TypeSourceInfo *" << getLowerName() << "Type;\n";
+      OS << "};";
+    }
+    void writePCHReadArgs(raw_ostream &OS) const {
+      OS << "is" << getLowerName() << "Expr, " << getLowerName() << "Ptr";
+    }
+    void writePCHReadDecls(raw_ostream &OS) const {
+      OS << "    bool is" << getLowerName() << "Expr = Record[Idx++];\n";
+      OS << "    void *" << getLowerName() << "Ptr;\n";
+      OS << "    if (is" << getLowerName() << "Expr)\n";
+      OS << "      " << getLowerName() << "Ptr = ReadExpr(F);\n";
+      OS << "    else\n";
+      OS << "      " << getLowerName()
+         << "Ptr = GetTypeSourceInfo(F, Record, Idx);\n";
+    }
+    void writePCHWrite(raw_ostream &OS) const {
+      OS << "    Record.push_back(SA->is" << getUpperName() << "Expr());\n";
+      OS << "    if (SA->is" << getUpperName() << "Expr())\n";
+      OS << "      AddStmt(SA->get" << getUpperName() << "Expr());\n";
+      OS << "    else\n";
+      OS << "      AddTypeSourceInfo(SA->get" << getUpperName()
+         << "Type(), Record);\n";
+    }
+  };
+
+  class VariadicArgument : public Argument {
+    std::string type;
+
+  public:
+    VariadicArgument(Record &Arg, StringRef Attr, std::string T)
+      : Argument(Arg, Attr), type(T)
+    {}
+
+    std::string getType() const { return type; }
+
+    void writeAccessors(raw_ostream &OS) const {
+      OS << "  typedef " << type << "* " << getLowerName() << "_iterator;\n";
+      OS << "  " << getLowerName() << "_iterator " << getLowerName()
+         << "_begin() const {\n";
+      OS << "    return " << getLowerName() << ";\n";
+      OS << "  }\n";
+      OS << "  " << getLowerName() << "_iterator " << getLowerName()
+         << "_end() const {\n";
+      OS << "    return " << getLowerName() << " + " << getLowerName()
+         << "Size;\n";
+      OS << "  }\n";
+      OS << "  unsigned " << getLowerName() << "_size() const {\n"
+         << "    return " << getLowerName() << "Size;\n;";
+      OS << "  }";
+    }
+    void writeCloneArgs(raw_ostream &OS) const {
+      OS << getLowerName() << ", " << getLowerName() << "Size";
+    }
+    void writeCtorBody(raw_ostream &OS) const {
+      // FIXME: memcpy is not safe on non-trivial types.
+      OS << "    std::memcpy(" << getLowerName() << ", " << getUpperName()
+         << ", " << getLowerName() << "Size * sizeof(" << getType() << "));\n";
+    }
+    void writeCtorInitializers(raw_ostream &OS) const {
+      OS << getLowerName() << "Size(" << getUpperName() << "Size), "
+         << getLowerName() << "(new (Ctx, 16) " << getType() << "["
+         << getLowerName() << "Size])";
+    }
+    void writeCtorParameters(raw_ostream &OS) const {
+      OS << getType() << " *" << getUpperName() << ", unsigned "
+         << getUpperName() << "Size";
+    }
+    void writeDeclarations(raw_ostream &OS) const {
+      OS << "  unsigned " << getLowerName() << "Size;\n";
+      OS << "  " << getType() << " *" << getLowerName() << ";";
+    }
+    void writePCHReadDecls(raw_ostream &OS) const {
+      OS << "  unsigned " << getLowerName() << "Size = Record[Idx++];\n";
+      OS << "  llvm::SmallVector<" << type << ", 4> " << getLowerName()
+         << ";\n";
+      OS << "  " << getLowerName() << ".reserve(" << getLowerName()
+         << "Size);\n";
+      OS << "  for (unsigned i = " << getLowerName() << "Size; i; --i)\n";
+      
+      std::string read = ReadPCHRecord(type);
+      OS << "    " << getLowerName() << ".push_back(" << read << ");\n";
+    }
+    void writePCHReadArgs(raw_ostream &OS) const {
+      OS << getLowerName() << ".data(), " << getLowerName() << "Size";
+    }
+    void writePCHWrite(raw_ostream &OS) const{
+      OS << "    Record.push_back(SA->" << getLowerName() << "_size());\n";
+      OS << "    for (" << getAttrName() << "Attr::" << getLowerName()
+         << "_iterator i = SA->" << getLowerName() << "_begin(), e = SA->"
+         << getLowerName() << "_end(); i != e; ++i)\n";
+      OS << "      " << WritePCHRecord(type, "(*i)");
+    }
+  };
+
+  class EnumArgument : public Argument {
+    std::string type;
+    std::vector<StringRef> values, enums;
+  public:
+    EnumArgument(Record &Arg, StringRef Attr)
+      : Argument(Arg, Attr), type(Arg.getValueAsString("Type")),
+        values(getValueAsListOfStrings(Arg, "Values")),
+        enums(getValueAsListOfStrings(Arg, "Enums"))
+    {}
+
+    void writeAccessors(raw_ostream &OS) const {
+      OS << "  " << type << " get" << getUpperName() << "() const {\n";
+      OS << "    return " << getLowerName() << ";\n";
+      OS << "  }";
+    }
+    void writeCloneArgs(raw_ostream &OS) const {
+      OS << getLowerName();
+    }
+    void writeCtorInitializers(raw_ostream &OS) const {
+      OS << getLowerName() << "(" << getUpperName() << ")";
+    }
+    void writeCtorParameters(raw_ostream &OS) const {
+      OS << type << " " << getUpperName();
+    }
+    void writeDeclarations(raw_ostream &OS) const {
+      // Calculate the various enum values
+      std::vector<StringRef> uniques(enums);
+      std::sort(uniques.begin(), uniques.end());
+      uniques.erase(std::unique(uniques.begin(), uniques.end()),
+                    uniques.end());
+      // FIXME: Emit a proper error
+      assert(!uniques.empty());
+
+      std::vector<StringRef>::iterator i = uniques.begin(),
+                                       e = uniques.end();
+      // The last one needs to not have a comma.
+      --e;
+
+      OS << "public:\n";
+      OS << "  enum " << type << " {\n";
+      for (; i != e; ++i)
+        OS << "    " << *i << ",\n";
+      OS << "    " << *e << "\n";
+      OS << "  };\n";
+      OS << "private:\n";
+      OS << "  " << type << " " << getLowerName() << ";";
+    }
+    void writePCHReadDecls(raw_ostream &OS) const {
+      OS << "    " << getAttrName() << "Attr::" << type << " " << getLowerName()
+         << "(static_cast<" << getAttrName() << "Attr::" << type
+         << ">(Record[Idx++]));\n";
+    }
+    void writePCHReadArgs(raw_ostream &OS) const {
+      OS << getLowerName();
+    }
+    void writePCHWrite(raw_ostream &OS) const {
+      OS << "Record.push_back(SA->get" << getUpperName() << "());\n";
+    }
+  };
+}
+
+static Argument *createArgument(Record &Arg, StringRef Attr,
+                                Record *Search = 0) {
+  if (!Search)
+    Search = &Arg;
+
+  Argument *Ptr = 0;
+  llvm::StringRef ArgName = Search->getName();
+
+  if (ArgName == "AlignedArgument") Ptr = new AlignedArgument(Arg, Attr);
+  else if (ArgName == "EnumArgument") Ptr = new EnumArgument(Arg, Attr);
+  else if (ArgName == "ExprArgument") Ptr = new SimpleArgument(Arg, Attr,
+                                                               "Expr *");
+  else if (ArgName == "FunctionArgument")
+    Ptr = new SimpleArgument(Arg, Attr, "FunctionDecl *");
+  else if (ArgName == "IdentifierArgument")
+    Ptr = new SimpleArgument(Arg, Attr, "IdentifierInfo *");
+  else if (ArgName == "IntArgument") Ptr = new SimpleArgument(Arg, Attr, "int");
+  else if (ArgName == "StringArgument") Ptr = new StringArgument(Arg, Attr);
+  else if (ArgName == "TypeArgument")
+    Ptr = new SimpleArgument(Arg, Attr, "QualType");
+  else if (ArgName == "UnsignedArgument")
+    Ptr = new SimpleArgument(Arg, Attr, "unsigned");
+  else if (ArgName == "VariadicUnsignedArgument")
+    Ptr = new VariadicArgument(Arg, Attr, "unsigned");
+
+  if (!Ptr) {
+    std::vector<Record*> Bases = Search->getSuperClasses();
+    for (std::vector<Record*>::iterator i = Bases.begin(), e = Bases.end();
+         i != e; ++i) {
+      Ptr = createArgument(Arg, Attr, *i);
+      if (Ptr)
+        break;
+    }
+  }
+  return Ptr;
+}
+
+void ClangAttrClassEmitter::run(raw_ostream &OS) {
+  OS << "// This file is generated by TableGen. Do not edit.\n\n";
+  OS << "#ifndef LLVM_CLANG_ATTR_CLASSES_INC\n";
+  OS << "#define LLVM_CLANG_ATTR_CLASSES_INC\n\n";
+
+  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr");
+
+  for (std::vector<Record*>::iterator i = Attrs.begin(), e = Attrs.end();
+       i != e; ++i) {
+    Record &R = **i;
+    const std::string &SuperName = R.getSuperClasses().back()->getName();
+
+    OS << "class " << R.getName() << "Attr : public " << SuperName << " {\n";
+
+    std::vector<Record*> ArgRecords = R.getValueAsListOfDefs("Args");
+    std::vector<Argument*> Args;
+    std::vector<Argument*>::iterator ai, ae;
+    Args.reserve(ArgRecords.size());
+
+    for (std::vector<Record*>::iterator ri = ArgRecords.begin(),
+                                        re = ArgRecords.end();
+         ri != re; ++ri) {
+      Record &ArgRecord = **ri;
+      Argument *Arg = createArgument(ArgRecord, R.getName());
+      assert(Arg);
+      Args.push_back(Arg);
+
+      Arg->writeDeclarations(OS);
+      OS << "\n\n";
+    }
+
+    ae = Args.end();
+
+    OS << "\n public:\n";
+    OS << "  " << R.getName() << "Attr(SourceLocation L, ASTContext &Ctx\n";
+    
+    for (ai = Args.begin(); ai != ae; ++ai) {
+      OS << "              , ";
+      (*ai)->writeCtorParameters(OS);
+      OS << "\n";
+    }
+    
+    OS << "             )\n";
+    OS << "    : " << SuperName << "(attr::" << R.getName() << ", L)\n";
+
+    for (ai = Args.begin(); ai != ae; ++ai) {
+      OS << "              , ";
+      (*ai)->writeCtorInitializers(OS);
+      OS << "\n";
+    }
+
+    OS << "  {\n";
+  
+    for (ai = Args.begin(); ai != ae; ++ai) {
+      (*ai)->writeCtorBody(OS);
+      OS << "\n";
+    }
+    OS << "  }\n\n";
+
+    OS << "  virtual " << R.getName() << "Attr *clone (ASTContext &C) const;\n";
+
+    for (ai = Args.begin(); ai != ae; ++ai) {
+      (*ai)->writeAccessors(OS);
+      OS << "\n\n";
+    }
+
+    OS << R.getValueAsCode("AdditionalMembers");
+    OS << "\n\n";
+
+    OS << "  static bool classof(const Attr *A) { return A->getKind() == "
+       << "attr::" << R.getName() << "; }\n";
+    OS << "  static bool classof(const " << R.getName()
+       << "Attr *) { return true; }\n";
+    OS << "};\n\n";
+  }
+
+  OS << "#endif\n";
+}
+
+void ClangAttrImplEmitter::run(raw_ostream &OS) {
+  OS << "// This file is generated by TableGen. Do not edit.\n\n";
+
+  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr");
+  std::vector<Record*>::iterator i = Attrs.begin(), e = Attrs.end(), ri, re;
+  std::vector<Argument*>::iterator ai, ae;
+
+  for (; i != e; ++i) {
+    Record &R = **i;
+    std::vector<Record*> ArgRecords = R.getValueAsListOfDefs("Args");
+    std::vector<Argument*> Args;
+    for (ri = ArgRecords.begin(), re = ArgRecords.end(); ri != re; ++ri)
+      Args.push_back(createArgument(**ri, R.getName()));
+
+    for (ai = Args.begin(), ae = Args.end(); ai != ae; ++ai)
+      (*ai)->writeAccessorDefinitions(OS);
+
+    OS << R.getName() << "Attr *" << R.getName()
+       << "Attr::clone(ASTContext &C) const {\n";
+    OS << "  return new (C) " << R.getName() << "Attr(getLocation(), C";
+    for (ai = Args.begin(); ai != ae; ++ai) {
+      OS << ", ";
+      (*ai)->writeCloneArgs(OS);
+    }
+    OS << ");\n}\n\n";
+  }
+}
+
+static void EmitAttrList(raw_ostream &OS, StringRef Class,
+                         const std::vector<Record*> &AttrList) {
+  std::vector<Record*>::const_iterator i = AttrList.begin(), e = AttrList.end();
+
+  if (i != e) {
+    // Move the end iterator back to emit the last attribute.
+    for(--e; i != e; ++i)
+      OS << Class << "(" << (*i)->getName() << ")\n";
+    
+    OS << "LAST_" << Class << "(" << (*i)->getName() << ")\n\n";
+  }
+}
+
+void ClangAttrListEmitter::run(raw_ostream &OS) {
+  OS << "// This file is generated by TableGen. Do not edit.\n\n";
+
+  OS << "#ifndef LAST_ATTR\n";
+  OS << "#define LAST_ATTR(NAME) ATTR(NAME)\n";
+  OS << "#endif\n\n";
+
+  OS << "#ifndef INHERITABLE_ATTR\n";
+  OS << "#define INHERITABLE_ATTR(NAME) ATTR(NAME)\n";
+  OS << "#endif\n\n";
+
+  OS << "#ifndef LAST_INHERITABLE_ATTR\n";
+  OS << "#define LAST_INHERITABLE_ATTR(NAME) INHERITABLE_ATTR(NAME)\n";
+  OS << "#endif\n\n";
+
+  OS << "#ifndef INHERITABLE_PARAM_ATTR\n";
+  OS << "#define INHERITABLE_PARAM_ATTR(NAME) ATTR(NAME)\n";
+  OS << "#endif\n\n";
+
+  OS << "#ifndef LAST_INHERITABLE_PARAM_ATTR\n";
+  OS << "#define LAST_INHERITABLE_PARAM_ATTR(NAME)"
+        " INHERITABLE_PARAM_ATTR(NAME)\n";
+  OS << "#endif\n\n";
+
+  Record *InhClass = Records.getClass("InheritableAttr");
+  Record *InhParamClass = Records.getClass("InheritableParamAttr");
+  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr"),
+                       NonInhAttrs, InhAttrs, InhParamAttrs;
+  for (std::vector<Record*>::iterator i = Attrs.begin(), e = Attrs.end();
+       i != e; ++i) {
+    if ((*i)->isSubClassOf(InhParamClass))
+      InhParamAttrs.push_back(*i);
+    else if ((*i)->isSubClassOf(InhClass))
+      InhAttrs.push_back(*i);
+    else
+      NonInhAttrs.push_back(*i);
+  }
+
+  EmitAttrList(OS, "INHERITABLE_PARAM_ATTR", InhParamAttrs);
+  EmitAttrList(OS, "INHERITABLE_ATTR", InhAttrs);
+  EmitAttrList(OS, "ATTR", NonInhAttrs);
+
+  OS << "#undef LAST_ATTR\n";
+  OS << "#undef INHERITABLE_ATTR\n";
+  OS << "#undef LAST_INHERITABLE_ATTR\n";
+  OS << "#undef LAST_INHERITABLE_PARAM_ATTR\n";
+  OS << "#undef ATTR\n";
+}
+
+void ClangAttrPCHReadEmitter::run(raw_ostream &OS) {
+  OS << "// This file is generated by TableGen. Do not edit.\n\n";
+
+  Record *InhClass = Records.getClass("InheritableAttr");
+  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr"),
+                       ArgRecords;
+  std::vector<Record*>::iterator i = Attrs.begin(), e = Attrs.end(), ai, ae;
+  std::vector<Argument*> Args;
+  std::vector<Argument*>::iterator ri, re;
+
+  OS << "  switch (Kind) {\n";
+  OS << "  default:\n";
+  OS << "    assert(0 && \"Unknown attribute!\");\n";
+  OS << "    break;\n";
+  for (; i != e; ++i) {
+    Record &R = **i;
+    OS << "  case attr::" << R.getName() << ": {\n";
+    if (R.isSubClassOf(InhClass))
+      OS << "    bool isInherited = Record[Idx++];\n";
+    ArgRecords = R.getValueAsListOfDefs("Args");
+    Args.clear();
+    for (ai = ArgRecords.begin(), ae = ArgRecords.end(); ai != ae; ++ai) {
+      Argument *A = createArgument(**ai, R.getName());
+      Args.push_back(A);
+      A->writePCHReadDecls(OS);
+    }
+    OS << "    New = new (*Context) " << R.getName() << "Attr(Loc, *Context";
+    for (ri = Args.begin(), re = Args.end(); ri != re; ++ri) {
+      OS << ", ";
+      (*ri)->writePCHReadArgs(OS);
+    }
+    OS << ");\n";
+    if (R.isSubClassOf(InhClass))
+      OS << "    cast<InheritableAttr>(New)->setInherited(isInherited);\n";
+    OS << "    break;\n";
+    OS << "  }\n";
+  }
+  OS << "  }\n";
+}
+
+void ClangAttrPCHWriteEmitter::run(raw_ostream &OS) {
+  Record *InhClass = Records.getClass("InheritableAttr");
+  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr"), Args;
+  std::vector<Record*>::iterator i = Attrs.begin(), e = Attrs.end(), ai, ae;
+
+  OS << "  switch (A->getKind()) {\n";
+  OS << "  default:\n";
+  OS << "    llvm_unreachable(\"Unknown attribute kind!\");\n";
+  OS << "    break;\n";
+  for (; i != e; ++i) {
+    Record &R = **i;
+    OS << "  case attr::" << R.getName() << ": {\n";
+    Args = R.getValueAsListOfDefs("Args");
+    if (R.isSubClassOf(InhClass) || !Args.empty())
+      OS << "    const " << R.getName() << "Attr *SA = cast<" << R.getName()
+         << "Attr>(A);\n";
+    if (R.isSubClassOf(InhClass))
+      OS << "    Record.push_back(SA->isInherited());\n";
+    for (ai = Args.begin(), ae = Args.end(); ai != ae; ++ai)
+      createArgument(**ai, R.getName())->writePCHWrite(OS);
+    OS << "    break;\n";
+    OS << "  }\n";
+  }
+  OS << "  }\n";
+}
+
+void ClangAttrSpellingListEmitter::run(raw_ostream &OS) {
+  OS << "// This file is generated by TableGen. Do not edit.\n\n";
+
+  std::vector<Record*> Attrs = Records.getAllDerivedDefinitions("Attr");
+  
+  for (std::vector<Record*>::iterator I = Attrs.begin(), E = Attrs.end(); I != E; ++I) {
+    Record &Attr = **I;
+
+    std::vector<StringRef> Spellings = getValueAsListOfStrings(Attr, "Spellings");
+
+    for (std::vector<StringRef>::const_iterator I = Spellings.begin(), E = Spellings.end(); I != E; ++I) {
+      StringRef Spelling = *I;
+      OS << ".Case(\"" << Spelling << "\", true)\n";
+    }
+  }
+
+}
diff --git a/final/utils/TableGen/ClangAttrEmitter.h b/final/utils/TableGen/ClangAttrEmitter.h
new file mode 100644
index 00000000000..af870098a84
--- /dev/null
+++ b/final/utils/TableGen/ClangAttrEmitter.h
@@ -0,0 +1,101 @@
+//===- ClangAttrEmitter.h - Generate Clang attribute handling =-*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These tablegen backends emit Clang attribute processing code
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANGATTR_EMITTER_H
+#define CLANGATTR_EMITTER_H
+
+#include "TableGenBackend.h"
+
+namespace llvm {
+
+/// ClangAttrClassEmitter - class emits the class defintions for attributes for
+///   clang.
+class ClangAttrClassEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+ 
+ public:
+  explicit ClangAttrClassEmitter(RecordKeeper &R)
+    : Records(R)
+    {}
+
+  void run(raw_ostream &OS);
+};
+
+/// ClangAttrImplEmitter - class emits the class method defintions for
+///   attributes for clang.
+class ClangAttrImplEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+ 
+ public:
+  explicit ClangAttrImplEmitter(RecordKeeper &R)
+    : Records(R)
+    {}
+
+  void run(raw_ostream &OS);
+};
+
+/// ClangAttrListEmitter - class emits the enumeration list for attributes for
+///   clang.
+class ClangAttrListEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+
+ public:
+  explicit ClangAttrListEmitter(RecordKeeper &R)
+    : Records(R)
+    {}
+
+  void run(raw_ostream &OS);
+};
+
+/// ClangAttrPCHReadEmitter - class emits the code to read an attribute from
+///   a clang precompiled header.
+class ClangAttrPCHReadEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+
+public:
+  explicit ClangAttrPCHReadEmitter(RecordKeeper &R)
+    : Records(R)
+    {}
+
+  void run(raw_ostream &OS);
+};
+
+/// ClangAttrPCHWriteEmitter - class emits the code to read an attribute from
+///   a clang precompiled header.
+class ClangAttrPCHWriteEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+
+public:
+  explicit ClangAttrPCHWriteEmitter(RecordKeeper &R)
+    : Records(R)
+    {}
+
+  void run(raw_ostream &OS);
+};
+
+/// ClangAttrSpellingListEmitter - class emits the list of spellings for attributes for
+///   clang.
+class ClangAttrSpellingListEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+
+ public:
+  explicit ClangAttrSpellingListEmitter(RecordKeeper &R)
+    : Records(R)
+    {}
+
+  void run(raw_ostream &OS);
+};
+
+}
+
+#endif
diff --git a/final/utils/TableGen/ClangDiagnosticsEmitter.cpp b/final/utils/TableGen/ClangDiagnosticsEmitter.cpp
new file mode 100644
index 00000000000..60e67c46746
--- /dev/null
+++ b/final/utils/TableGen/ClangDiagnosticsEmitter.cpp
@@ -0,0 +1,296 @@
+//=- ClangDiagnosticsEmitter.cpp - Generate Clang diagnostics tables -*- C++ -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These tablegen backends emit Clang diagnostics tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangDiagnosticsEmitter.h"
+#include "Record.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/VectorExtras.h"
+#include <set>
+#include <map>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Diagnostic category computation code.
+//===----------------------------------------------------------------------===//
+
+namespace {
+class DiagGroupParentMap {
+  RecordKeeper &Records;
+  std::map<const Record*, std::vector<Record*> > Mapping;
+public:
+  DiagGroupParentMap(RecordKeeper &records) : Records(records) {
+    std::vector<Record*> DiagGroups
+      = Records.getAllDerivedDefinitions("DiagGroup");
+    for (unsigned i = 0, e = DiagGroups.size(); i != e; ++i) {
+      std::vector<Record*> SubGroups =
+        DiagGroups[i]->getValueAsListOfDefs("SubGroups");
+      for (unsigned j = 0, e = SubGroups.size(); j != e; ++j)
+        Mapping[SubGroups[j]].push_back(DiagGroups[i]);
+    }
+  }
+  
+  const std::vector<Record*> &getParents(const Record *Group) {
+    return Mapping[Group];
+  }
+};
+} // end anonymous namespace.
+
+
+static std::string
+getCategoryFromDiagGroup(const Record *Group,
+                         DiagGroupParentMap &DiagGroupParents) {
+  // If the DiagGroup has a category, return it.
+  std::string CatName = Group->getValueAsString("CategoryName");
+  if (!CatName.empty()) return CatName;
+  
+  // The diag group may the subgroup of one or more other diagnostic groups,
+  // check these for a category as well.
+  const std::vector<Record*> &Parents = DiagGroupParents.getParents(Group);
+  for (unsigned i = 0, e = Parents.size(); i != e; ++i) {
+    CatName = getCategoryFromDiagGroup(Parents[i], DiagGroupParents);
+    if (!CatName.empty()) return CatName;
+  }
+  return "";
+}
+
+/// getDiagnosticCategory - Return the category that the specified diagnostic
+/// lives in.
+static std::string getDiagnosticCategory(const Record *R,
+                                         DiagGroupParentMap &DiagGroupParents) {
+  // If the diagnostic is in a group, and that group has a category, use it.
+  if (DefInit *Group = dynamic_cast<DefInit*>(R->getValueInit("Group"))) {
+    // Check the diagnostic's diag group for a category.
+    std::string CatName = getCategoryFromDiagGroup(Group->getDef(),
+                                                   DiagGroupParents);
+    if (!CatName.empty()) return CatName;
+  }
+  
+  // If the diagnostic itself has a category, get it.
+  return R->getValueAsString("CategoryName");
+}
+
+namespace {
+  class DiagCategoryIDMap {
+    RecordKeeper &Records;
+    StringMap<unsigned> CategoryIDs;
+    std::vector<std::string> CategoryStrings;
+  public:
+    DiagCategoryIDMap(RecordKeeper &records) : Records(records) {
+      DiagGroupParentMap ParentInfo(Records);
+      
+      // The zero'th category is "".
+      CategoryStrings.push_back("");
+      CategoryIDs[""] = 0;
+      
+      std::vector<Record*> Diags =
+      Records.getAllDerivedDefinitions("Diagnostic");
+      for (unsigned i = 0, e = Diags.size(); i != e; ++i) {
+        std::string Category = getDiagnosticCategory(Diags[i], ParentInfo);
+        if (Category.empty()) continue;  // Skip diags with no category.
+        
+        unsigned &ID = CategoryIDs[Category];
+        if (ID != 0) continue;  // Already seen.
+        
+        ID = CategoryStrings.size();
+        CategoryStrings.push_back(Category);
+      }
+    }
+    
+    unsigned getID(StringRef CategoryString) {
+      return CategoryIDs[CategoryString];
+    }
+    
+    typedef std::vector<std::string>::iterator iterator;
+    iterator begin() { return CategoryStrings.begin(); }
+    iterator end() { return CategoryStrings.end(); }
+  };
+} // end anonymous namespace.
+
+
+
+//===----------------------------------------------------------------------===//
+// Warning Tables (.inc file) generation.
+//===----------------------------------------------------------------------===//
+
+void ClangDiagsDefsEmitter::run(raw_ostream &OS) {
+  // Write the #if guard
+  if (!Component.empty()) {
+    std::string ComponentName = UppercaseString(Component);
+    OS << "#ifdef " << ComponentName << "START\n";
+    OS << "__" << ComponentName << "START = DIAG_START_" << ComponentName
+       << ",\n";
+    OS << "#undef " << ComponentName << "START\n";
+    OS << "#endif\n\n";
+  }
+
+  const std::vector<Record*> &Diags =
+    Records.getAllDerivedDefinitions("Diagnostic");
+  
+  DiagCategoryIDMap CategoryIDs(Records);
+  DiagGroupParentMap DGParentMap(Records);
+
+  for (unsigned i = 0, e = Diags.size(); i != e; ++i) {
+    const Record &R = *Diags[i];
+    // Filter by component.
+    if (!Component.empty() && Component != R.getValueAsString("Component"))
+      continue;
+    
+    OS << "DIAG(" << R.getName() << ", ";
+    OS << R.getValueAsDef("Class")->getName();
+    OS << ", diag::" << R.getValueAsDef("DefaultMapping")->getName();
+    
+    // Description string.
+    OS << ", \"";
+    OS.write_escaped(R.getValueAsString("Text")) << '"';
+    
+    // Warning associated with the diagnostic.
+    if (DefInit *DI = dynamic_cast<DefInit*>(R.getValueInit("Group"))) {
+      OS << ", \"";
+      OS.write_escaped(DI->getDef()->getValueAsString("GroupName")) << '"';
+    } else {
+      OS << ", 0";
+    }
+
+    // SFINAE bit
+    if (R.getValueAsBit("SFINAE"))
+      OS << ", true";
+    else
+      OS << ", false";
+
+    // Access control bit
+    if (R.getValueAsBit("AccessControl"))
+      OS << ", true";
+    else
+      OS << ", false";
+
+    // Category number.
+    OS << ", " << CategoryIDs.getID(getDiagnosticCategory(&R, DGParentMap));
+    OS << ")\n";
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Warning Group Tables generation
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct GroupInfo {
+  std::vector<const Record*> DiagsInGroup;
+  std::vector<std::string> SubGroups;
+  unsigned IDNo;
+};
+} // end anonymous namespace.
+
+void ClangDiagGroupsEmitter::run(raw_ostream &OS) {
+  // Compute a mapping from a DiagGroup to all of its parents.
+  DiagGroupParentMap DGParentMap(Records);
+  
+  // Invert the 1-[0/1] mapping of diags to group into a one to many mapping of
+  // groups to diags in the group.
+  std::map<std::string, GroupInfo> DiagsInGroup;
+  
+  std::vector<Record*> Diags =
+    Records.getAllDerivedDefinitions("Diagnostic");
+  for (unsigned i = 0, e = Diags.size(); i != e; ++i) {
+    const Record *R = Diags[i];
+    DefInit *DI = dynamic_cast<DefInit*>(R->getValueInit("Group"));
+    if (DI == 0) continue;
+    std::string GroupName = DI->getDef()->getValueAsString("GroupName");
+    DiagsInGroup[GroupName].DiagsInGroup.push_back(R);
+  }
+  
+  // Add all DiagGroup's to the DiagsInGroup list to make sure we pick up empty
+  // groups (these are warnings that GCC supports that clang never produces).
+  std::vector<Record*> DiagGroups
+    = Records.getAllDerivedDefinitions("DiagGroup");
+  for (unsigned i = 0, e = DiagGroups.size(); i != e; ++i) {
+    Record *Group = DiagGroups[i];
+    GroupInfo &GI = DiagsInGroup[Group->getValueAsString("GroupName")];
+    
+    std::vector<Record*> SubGroups = Group->getValueAsListOfDefs("SubGroups");
+    for (unsigned j = 0, e = SubGroups.size(); j != e; ++j)
+      GI.SubGroups.push_back(SubGroups[j]->getValueAsString("GroupName"));
+  }
+  
+  // Assign unique ID numbers to the groups.
+  unsigned IDNo = 0;
+  for (std::map<std::string, GroupInfo>::iterator
+       I = DiagsInGroup.begin(), E = DiagsInGroup.end(); I != E; ++I, ++IDNo)
+    I->second.IDNo = IDNo;
+  
+  // Walk through the groups emitting an array for each diagnostic of the diags
+  // that are mapped to.
+  OS << "\n#ifdef GET_DIAG_ARRAYS\n";
+  unsigned MaxLen = 0;
+  for (std::map<std::string, GroupInfo>::iterator
+       I = DiagsInGroup.begin(), E = DiagsInGroup.end(); I != E; ++I) {
+    MaxLen = std::max(MaxLen, (unsigned)I->first.size());
+    
+    std::vector<const Record*> &V = I->second.DiagsInGroup;
+    if (!V.empty()) {
+      OS << "static const short DiagArray" << I->second.IDNo << "[] = { ";
+      for (unsigned i = 0, e = V.size(); i != e; ++i)
+        OS << "diag::" << V[i]->getName() << ", ";
+      OS << "-1 };\n";
+    }
+    
+    const std::vector<std::string> &SubGroups = I->second.SubGroups;
+    if (!SubGroups.empty()) {
+      OS << "static const short DiagSubGroup" << I->second.IDNo << "[] = { ";
+      for (unsigned i = 0, e = SubGroups.size(); i != e; ++i) {
+        std::map<std::string, GroupInfo>::iterator RI =
+          DiagsInGroup.find(SubGroups[i]);
+        assert(RI != DiagsInGroup.end() && "Referenced without existing?");
+        OS << RI->second.IDNo << ", ";
+      }
+      OS << "-1 };\n";
+    }
+  }
+  OS << "#endif // GET_DIAG_ARRAYS\n\n";
+  
+  // Emit the table now.
+  OS << "\n#ifdef GET_DIAG_TABLE\n";
+  for (std::map<std::string, GroupInfo>::iterator
+       I = DiagsInGroup.begin(), E = DiagsInGroup.end(); I != E; ++I) {
+    // Group option string.
+    OS << "  { \"";
+    OS.write_escaped(I->first) << "\","
+                               << std::string(MaxLen-I->first.size()+1, ' ');
+    
+    // Diagnostics in the group.
+    if (I->second.DiagsInGroup.empty())
+      OS << "0, ";
+    else
+      OS << "DiagArray" << I->second.IDNo << ", ";
+    
+    // Subgroups.
+    if (I->second.SubGroups.empty())
+      OS << 0;
+    else
+      OS << "DiagSubGroup" << I->second.IDNo;
+    OS << " },\n";
+  }
+  OS << "#endif // GET_DIAG_TABLE\n\n";
+  
+  // Emit the category table next.
+  DiagCategoryIDMap CategoriesByID(Records);
+  OS << "\n#ifdef GET_CATEGORY_TABLE\n";
+  for (DiagCategoryIDMap::iterator I = CategoriesByID.begin(),
+       E = CategoriesByID.end(); I != E; ++I)
+    OS << "CATEGORY(\"" << *I << "\")\n";
+  OS << "#endif // GET_CATEGORY_TABLE\n\n";
+}
diff --git a/final/utils/TableGen/ClangDiagnosticsEmitter.h b/final/utils/TableGen/ClangDiagnosticsEmitter.h
new file mode 100644
index 00000000000..edd062a7383
--- /dev/null
+++ b/final/utils/TableGen/ClangDiagnosticsEmitter.h
@@ -0,0 +1,46 @@
+//===- ClangDiagnosticsEmitter.h - Generate Clang diagnostics tables -*- C++ -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// These tablegen backends emit Clang diagnostics tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANGDIAGS_EMITTER_H
+#define CLANGDIAGS_EMITTER_H
+
+#include "TableGenBackend.h"
+
+namespace llvm {
+
+/// ClangDiagsDefsEmitter - The top-level class emits .def files containing
+///  declarations of Clang diagnostics.
+///
+class ClangDiagsDefsEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+  const std::string& Component;
+public:
+  explicit ClangDiagsDefsEmitter(RecordKeeper &R, const std::string& component)
+    : Records(R), Component(component) {}
+
+  // run - Output the .def file contents
+  void run(raw_ostream &OS);
+};
+
+class ClangDiagGroupsEmitter : public TableGenBackend {
+    RecordKeeper &Records;
+public:
+  explicit ClangDiagGroupsEmitter(RecordKeeper &R) : Records(R) {}
+    
+  void run(raw_ostream &OS);
+};
+
+  
+} // End llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/ClangSACheckersEmitter.cpp b/final/utils/TableGen/ClangSACheckersEmitter.cpp
new file mode 100644
index 00000000000..8865db36b6c
--- /dev/null
+++ b/final/utils/TableGen/ClangSACheckersEmitter.cpp
@@ -0,0 +1,230 @@
+//=- ClangSACheckersEmitter.cpp - Generate Clang SA checkers tables -*- C++ -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits Clang Static Analyzer checkers tables.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ClangSACheckersEmitter.h"
+#include "Record.h"
+#include "llvm/ADT/DenseSet.h"
+#include <map>
+#include <string>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Static Analyzer Checkers Tables generation
+//===----------------------------------------------------------------------===//
+
+/// \brief True if it is specified hidden or a parent package is specified
+/// as hidden, otherwise false.
+static bool isHidden(const Record &R) {
+  if (R.getValueAsBit("Hidden"))
+    return true;
+  // Not declared as hidden, check the parent package if it is hidden.
+  if (DefInit *DI = dynamic_cast<DefInit*>(R.getValueInit("ParentPackage")))
+    return isHidden(*DI->getDef());
+
+  return false;
+}
+
+static bool isCheckerNamed(const Record *R) {
+  return !R->getValueAsString("CheckerName").empty();
+}
+
+static std::string getPackageFullName(const Record *R);
+
+static std::string getParentPackageFullName(const Record *R) {
+  std::string name;
+  if (DefInit *DI = dynamic_cast<DefInit*>(R->getValueInit("ParentPackage")))
+    name = getPackageFullName(DI->getDef());
+  return name;
+}
+
+static std::string getPackageFullName(const Record *R) {
+  std::string name = getParentPackageFullName(R);
+  if (!name.empty()) name += ".";
+  return name + R->getValueAsString("PackageName");
+}
+
+static std::string getCheckerFullName(const Record *R) {
+  std::string name = getParentPackageFullName(R);
+  if (isCheckerNamed(R)) {
+    if (!name.empty()) name += ".";
+    name += R->getValueAsString("CheckerName");
+  }
+  return name;
+}
+
+static std::string getStringValue(const Record &R, StringRef field) {
+  if (StringInit *
+        SI = dynamic_cast<StringInit*>(R.getValueInit(field)))
+    return SI->getValue();
+  return std::string();
+}
+
+namespace {
+struct GroupInfo {
+  std::vector<const Record*> Checkers;
+  llvm::DenseSet<const Record *> SubGroups;
+  bool Hidden;
+  unsigned Index;
+
+  GroupInfo() : Hidden(false) { }
+};
+}
+
+void ClangSACheckersEmitter::run(raw_ostream &OS) {
+  std::vector<Record*> checkers = Records.getAllDerivedDefinitions("Checker");
+  llvm::DenseMap<const Record *, unsigned> checkerRecIndexMap;
+  for (unsigned i = 0, e = checkers.size(); i != e; ++i)
+    checkerRecIndexMap[checkers[i]] = i;
+  
+  OS << "\n#ifdef GET_CHECKERS\n";
+  for (unsigned i = 0, e = checkers.size(); i != e; ++i) {
+    const Record &R = *checkers[i];
+
+    OS << "CHECKER(" << "\"";
+    std::string name;
+    if (isCheckerNamed(&R))
+      name = getCheckerFullName(&R);
+    OS.write_escaped(name) << "\", ";
+    OS << R.getName() << ", ";
+    OS << getStringValue(R, "DescFile") << ", ";
+    OS << "\"";
+    OS.write_escaped(getStringValue(R, "HelpText")) << "\", ";
+    // Hidden bit
+    if (isHidden(R))
+      OS << "true";
+    else
+      OS << "false";
+    OS << ")\n";
+  }
+  OS << "#endif // GET_CHECKERS\n\n";
+
+  // Invert the mapping of checkers to package/group into a one to many
+  // mapping of packages/groups to checkers.
+  std::map<std::string, GroupInfo> groupInfoByName;
+  llvm::DenseMap<const Record *, GroupInfo *> recordGroupMap;
+
+  std::vector<Record*> packages = Records.getAllDerivedDefinitions("Package");
+  for (unsigned i = 0, e = packages.size(); i != e; ++i) {
+    Record *R = packages[i];
+    std::string fullName = getPackageFullName(R);
+    if (!fullName.empty()) {
+      GroupInfo &info = groupInfoByName[fullName];
+      info.Hidden = isHidden(*R);
+      recordGroupMap[R] = &info;
+    }
+  }
+
+  std::vector<Record*>
+      checkerGroups = Records.getAllDerivedDefinitions("CheckerGroup");
+  for (unsigned i = 0, e = checkerGroups.size(); i != e; ++i) {
+    Record *R = checkerGroups[i];
+    std::string name = R->getValueAsString("GroupName");
+    if (!name.empty()) {
+      GroupInfo &info = groupInfoByName[name];
+      recordGroupMap[R] = &info;
+    }
+  }
+
+  for (unsigned i = 0, e = checkers.size(); i != e; ++i) {
+    Record *R = checkers[i];
+    Record *package = 0;
+    if (DefInit *
+          DI = dynamic_cast<DefInit*>(R->getValueInit("ParentPackage")))
+      package = DI->getDef();
+    if (!isCheckerNamed(R) && !package)
+      throw "Checker '" + R->getName() + "' is neither named, nor in a package!";
+
+    if (isCheckerNamed(R)) {
+      // Create a pseudo-group to hold this checker.
+      std::string fullName = getCheckerFullName(R);
+      GroupInfo &info = groupInfoByName[fullName];
+      info.Hidden = R->getValueAsBit("Hidden");
+      recordGroupMap[R] = &info;
+      info.Checkers.push_back(R);
+    } else {
+      recordGroupMap[package]->Checkers.push_back(R);
+    }
+
+    Record *currR = isCheckerNamed(R) ? R : package;
+    // Insert the checker and its parent packages into the subgroups set of
+    // the corresponding parent package.
+    while (DefInit *DI
+             = dynamic_cast<DefInit*>(currR->getValueInit("ParentPackage"))) {
+      Record *parentPackage = DI->getDef();
+      recordGroupMap[parentPackage]->SubGroups.insert(currR);
+      currR = parentPackage;
+    }
+    // Insert the checker into the set of its group.
+    if (DefInit *DI = dynamic_cast<DefInit*>(R->getValueInit("Group")))
+      recordGroupMap[DI->getDef()]->Checkers.push_back(R);
+  }
+
+  unsigned index = 0;
+  for (std::map<std::string, GroupInfo>::iterator
+         I = groupInfoByName.begin(), E = groupInfoByName.end(); I != E; ++I)
+    I->second.Index = index++;
+
+  // Walk through the packages/groups/checkers emitting an array for each
+  // set of checkers and an array for each set of subpackages.
+
+  OS << "\n#ifdef GET_MEMBER_ARRAYS\n";
+  unsigned maxLen = 0;
+  for (std::map<std::string, GroupInfo>::iterator
+         I = groupInfoByName.begin(), E = groupInfoByName.end(); I != E; ++I) {
+    maxLen = std::max(maxLen, (unsigned)I->first.size());
+    
+    std::vector<const Record*> &V = I->second.Checkers;
+    if (!V.empty()) {
+      OS << "static const short CheckerArray" << I->second.Index << "[] = { ";
+      for (unsigned i = 0, e = V.size(); i != e; ++i)
+        OS << checkerRecIndexMap[V[i]] << ", ";
+      OS << "-1 };\n";
+    }
+    
+    llvm::DenseSet<const Record *> &subGroups = I->second.SubGroups;
+    if (!subGroups.empty()) {
+      OS << "static const short SubPackageArray" << I->second.Index << "[] = { ";
+      for (llvm::DenseSet<const Record *>::iterator
+             I = subGroups.begin(), E = subGroups.end(); I != E; ++I) {
+        OS << recordGroupMap[*I]->Index << ", ";
+      }
+      OS << "-1 };\n";
+    }
+  }
+  OS << "#endif // GET_MEMBER_ARRAYS\n\n";
+
+  OS << "\n#ifdef GET_CHECKNAME_TABLE\n";
+  for (std::map<std::string, GroupInfo>::iterator
+         I = groupInfoByName.begin(), E = groupInfoByName.end(); I != E; ++I) {
+    // Group option string.
+    OS << "  { \"";
+    OS.write_escaped(I->first) << "\","
+                               << std::string(maxLen-I->first.size()+1, ' ');
+    
+    if (I->second.Checkers.empty())
+      OS << "0, ";
+    else
+      OS << "CheckerArray" << I->second.Index << ", ";
+    
+    // Subgroups.
+    if (I->second.SubGroups.empty())
+      OS << "0, ";
+    else
+      OS << "SubPackageArray" << I->second.Index << ", ";
+
+    OS << (I->second.Hidden ? "true" : "false");
+
+    OS << " },\n";
+  }
+  OS << "#endif // GET_CHECKNAME_TABLE\n\n";
+}
diff --git a/final/utils/TableGen/ClangSACheckersEmitter.h b/final/utils/TableGen/ClangSACheckersEmitter.h
new file mode 100644
index 00000000000..6bd16354732
--- /dev/null
+++ b/final/utils/TableGen/ClangSACheckersEmitter.h
@@ -0,0 +1,31 @@
+//===- ClangSACheckersEmitter.h - Generate Clang SA checkers tables -*- C++ -*-
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits Clang Static Analyzer checkers tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CLANGSACHECKERS_EMITTER_H
+#define CLANGSACHECKERS_EMITTER_H
+
+#include "TableGenBackend.h"
+
+namespace llvm {
+
+class ClangSACheckersEmitter : public TableGenBackend {
+    RecordKeeper &Records;
+public:
+  explicit ClangSACheckersEmitter(RecordKeeper &R) : Records(R) {}
+
+  void run(raw_ostream &OS);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/CodeEmitterGen.cpp b/final/utils/TableGen/CodeEmitterGen.cpp
new file mode 100644
index 00000000000..957dd19da1c
--- /dev/null
+++ b/final/utils/TableGen/CodeEmitterGen.cpp
@@ -0,0 +1,295 @@
+//===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// CodeEmitterGen uses the descriptions of instructions and their fields to
+// construct an automated code emitter: a function that, given a MachineInstr,
+// returns the (currently, 32-bit unsigned) value of the instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeEmitterGen.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include <map>
+using namespace llvm;
+
+// FIXME: Somewhat hackish to use a command line option for this. There should
+// be a CodeEmitter class in the Target.td that controls this sort of thing
+// instead.
+static cl::opt<bool>
+MCEmitter("mc-emitter",
+          cl::desc("Generate CodeEmitter for use with the MC library."),
+          cl::init(false));
+
+void CodeEmitterGen::reverseBits(std::vector<Record*> &Insts) {
+  for (std::vector<Record*>::iterator I = Insts.begin(), E = Insts.end();
+       I != E; ++I) {
+    Record *R = *I;
+    if (R->getValueAsString("Namespace") == "TargetOpcode")
+      continue;
+
+    BitsInit *BI = R->getValueAsBitsInit("Inst");
+
+    unsigned numBits = BI->getNumBits();
+    BitsInit *NewBI = new BitsInit(numBits);
+    for (unsigned bit = 0, end = numBits / 2; bit != end; ++bit) {
+      unsigned bitSwapIdx = numBits - bit - 1;
+      Init *OrigBit = BI->getBit(bit);
+      Init *BitSwap = BI->getBit(bitSwapIdx);
+      NewBI->setBit(bit, BitSwap);
+      NewBI->setBit(bitSwapIdx, OrigBit);
+    }
+    if (numBits % 2) {
+      unsigned middle = (numBits + 1) / 2;
+      NewBI->setBit(middle, BI->getBit(middle));
+    }
+
+    // Update the bits in reversed order so that emitInstrOpBits will get the
+    // correct endianness.
+    R->getValue("Inst")->setValue(NewBI);
+  }
+}
+
+// If the VarBitInit at position 'bit' matches the specified variable then
+// return the variable bit position.  Otherwise return -1.
+int CodeEmitterGen::getVariableBit(const std::string &VarName,
+                                   BitsInit *BI, int bit) {
+  if (VarBitInit *VBI = dynamic_cast<VarBitInit*>(BI->getBit(bit)))
+    if (VarInit *VI = dynamic_cast<VarInit*>(VBI->getVariable()))
+      if (VI->getName() == VarName)
+        return VBI->getBitNum();
+
+  return -1;
+}
+
+void CodeEmitterGen::
+AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
+                        unsigned &NumberedOp,
+                        std::string &Case, CodeGenTarget &Target) {
+  CodeGenInstruction &CGI = Target.getInstruction(R);
+
+  // Determine if VarName actually contributes to the Inst encoding.
+  int bit = BI->getNumBits()-1;
+
+  // Scan for a bit that this contributed to.
+  for (; bit >= 0; ) {
+    if (getVariableBit(VarName, BI, bit) != -1)
+      break;
+    
+    --bit;
+  }
+  
+  // If we found no bits, ignore this value, otherwise emit the call to get the
+  // operand encoding.
+  if (bit < 0) return;
+  
+  // If the operand matches by name, reference according to that
+  // operand number. Non-matching operands are assumed to be in
+  // order.
+  unsigned OpIdx;
+  if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) {
+    // Get the machine operand number for the indicated operand.
+    OpIdx = CGI.Operands[OpIdx].MIOperandNo;
+    assert(!CGI.Operands.isFlatOperandNotEmitted(OpIdx) &&
+           "Explicitly used operand also marked as not emitted!");
+  } else {
+    /// If this operand is not supposed to be emitted by the
+    /// generated emitter, skip it.
+    while (CGI.Operands.isFlatOperandNotEmitted(NumberedOp))
+      ++NumberedOp;
+    OpIdx = NumberedOp++;
+  }
+  
+  std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx);
+  std::string &EncoderMethodName = CGI.Operands[SO.first].EncoderMethodName;
+  
+  // If the source operand has a custom encoder, use it. This will
+  // get the encoding for all of the suboperands.
+  if (!EncoderMethodName.empty()) {
+    // A custom encoder has all of the information for the
+    // sub-operands, if there are more than one, so only
+    // query the encoder once per source operand.
+    if (SO.second == 0) {
+      Case += "      // op: " + VarName + "\n" +
+              "      op = " + EncoderMethodName + "(MI, " + utostr(OpIdx);
+      if (MCEmitter)
+        Case += ", Fixups";
+      Case += ");\n";
+    }
+  } else {
+    Case += "      // op: " + VarName + "\n" +
+      "      op = getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";
+    if (MCEmitter)
+      Case += ", Fixups";
+    Case += ");\n";
+  }
+  
+  for (; bit >= 0; ) {
+    int varBit = getVariableBit(VarName, BI, bit);
+    
+    // If this bit isn't from a variable, skip it.
+    if (varBit == -1) {
+      --bit;
+      continue;
+    }
+    
+    // Figure out the consecutive range of bits covered by this operand, in
+    // order to generate better encoding code.
+    int beginInstBit = bit;
+    int beginVarBit = varBit;
+    int N = 1;
+    for (--bit; bit >= 0;) {
+      varBit = getVariableBit(VarName, BI, bit);
+      if (varBit == -1 || varBit != (beginVarBit - N)) break;
+      ++N;
+      --bit;
+    }
+     
+    unsigned opMask = ~0U >> (32-N);
+    int opShift = beginVarBit - N + 1;
+    opMask <<= opShift;
+    opShift = beginInstBit - beginVarBit;
+    
+    if (opShift > 0) {
+      Case += "      Value |= (op & " + utostr(opMask) + "U) << " +
+              itostr(opShift) + ";\n";
+    } else if (opShift < 0) {
+      Case += "      Value |= (op & " + utostr(opMask) + "U) >> " + 
+              itostr(-opShift) + ";\n";
+    } else {
+      Case += "      Value |= op & " + utostr(opMask) + "U;\n";
+    }
+  }
+}
+
+
+std::string CodeEmitterGen::getInstructionCase(Record *R,
+                                               CodeGenTarget &Target) {
+  std::string Case;
+  
+  BitsInit *BI = R->getValueAsBitsInit("Inst");
+  const std::vector<RecordVal> &Vals = R->getValues();
+  unsigned NumberedOp = 0;
+
+  // Loop over all of the fields in the instruction, determining which are the
+  // operands to the instruction.
+  for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+    // Ignore fixed fields in the record, we're looking for values like:
+    //    bits<5> RST = { ?, ?, ?, ?, ? };
+    if (Vals[i].getPrefix() || Vals[i].getValue()->isComplete())
+      continue;
+    
+    AddCodeToMergeInOperand(R, BI, Vals[i].getName(), NumberedOp, Case, Target);
+  }
+  
+  std::string PostEmitter = R->getValueAsString("PostEncoderMethod");
+  if (!PostEmitter.empty())
+    Case += "      Value = " + PostEmitter + "(MI, Value);\n";
+  
+  return Case;
+}
+
+void CodeEmitterGen::run(raw_ostream &o) {
+  CodeGenTarget Target(Records);
+  std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
+
+  // For little-endian instruction bit encodings, reverse the bit order
+  if (Target.isLittleEndianEncoding()) reverseBits(Insts);
+
+  EmitSourceFileHeader("Machine Code Emitter", o);
+
+  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+    Target.getInstructionsByEnumValue();
+
+  // Emit function declaration
+  o << "unsigned " << Target.getName();
+  if (MCEmitter)
+    o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
+      << "    SmallVectorImpl<MCFixup> &Fixups) const {\n";
+  else
+    o << "CodeEmitter::getBinaryCodeForInstr(const MachineInstr &MI) const {\n";
+
+  // Emit instruction base values
+  o << "  static const unsigned InstBits[] = {\n";
+  for (std::vector<const CodeGenInstruction*>::const_iterator
+          IN = NumberedInstructions.begin(),
+          EN = NumberedInstructions.end();
+       IN != EN; ++IN) {
+    const CodeGenInstruction *CGI = *IN;
+    Record *R = CGI->TheDef;
+
+    if (R->getValueAsString("Namespace") == "TargetOpcode") {
+      o << "    0U,\n";
+      continue;
+    }
+
+    BitsInit *BI = R->getValueAsBitsInit("Inst");
+
+    // Start by filling in fixed values.
+    unsigned Value = 0;
+    for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {
+      if (BitInit *B = dynamic_cast<BitInit*>(BI->getBit(e-i-1)))
+        Value |= B->getValue() << (e-i-1);
+    }
+    o << "    " << Value << "U," << '\t' << "// " << R->getName() << "\n";
+  }
+  o << "    0U\n  };\n";
+
+  // Map to accumulate all the cases.
+  std::map<std::string, std::vector<std::string> > CaseMap;
+
+  // Construct all cases statement for each opcode
+  for (std::vector<Record*>::iterator IC = Insts.begin(), EC = Insts.end();
+        IC != EC; ++IC) {
+    Record *R = *IC;
+    if (R->getValueAsString("Namespace") == "TargetOpcode")
+      continue;
+    const std::string &InstName = R->getValueAsString("Namespace") + "::"
+      + R->getName();
+    std::string Case = getInstructionCase(R, Target);
+
+    CaseMap[Case].push_back(InstName);
+  }
+
+  // Emit initial function code
+  o << "  const unsigned opcode = MI.getOpcode();\n"
+    << "  unsigned Value = InstBits[opcode];\n"
+    << "  unsigned op = 0;\n"
+    << "  (void)op;  // suppress warning\n"
+    << "  switch (opcode) {\n";
+
+  // Emit each case statement
+  std::map<std::string, std::vector<std::string> >::iterator IE, EE;
+  for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
+    const std::string &Case = IE->first;
+    std::vector<std::string> &InstList = IE->second;
+
+    for (int i = 0, N = InstList.size(); i < N; i++) {
+      if (i) o << "\n";
+      o << "    case " << InstList[i]  << ":";
+    }
+    o << " {\n";
+    o << Case;
+    o << "      break;\n"
+      << "    }\n";
+  }
+
+  // Default case: unhandled opcode
+  o << "  default:\n"
+    << "    std::string msg;\n"
+    << "    raw_string_ostream Msg(msg);\n"
+    << "    Msg << \"Not supported instr: \" << MI;\n"
+    << "    report_fatal_error(Msg.str());\n"
+    << "  }\n"
+    << "  return Value;\n"
+    << "}\n\n";
+}
diff --git a/final/utils/TableGen/CodeEmitterGen.h b/final/utils/TableGen/CodeEmitterGen.h
new file mode 100644
index 00000000000..a874d970fea
--- /dev/null
+++ b/final/utils/TableGen/CodeEmitterGen.h
@@ -0,0 +1,49 @@
+//===- CodeEmitterGen.h - Code Emitter Generator ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// FIXME: document
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEMITTERGEN_H
+#define CODEMITTERGEN_H
+
+#include "TableGenBackend.h"
+#include <vector>
+#include <string>
+
+namespace llvm {
+
+class RecordVal;
+class BitsInit;
+class CodeGenTarget;
+
+class CodeEmitterGen : public TableGenBackend {
+  RecordKeeper &Records;
+public:
+  CodeEmitterGen(RecordKeeper &R) : Records(R) {}
+
+  // run - Output the code emitter
+  void run(raw_ostream &o);
+private:
+  void emitMachineOpEmitter(raw_ostream &o, const std::string &Namespace);
+  void emitGetValueBit(raw_ostream &o, const std::string &Namespace);
+  void reverseBits(std::vector<Record*> &Insts);
+  int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
+  std::string getInstructionCase(Record *R, CodeGenTarget &Target);
+  void
+  AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
+                          unsigned &NumberedOp,
+                          std::string &Case, CodeGenTarget &Target);
+    
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/CodeGenDAGPatterns.cpp b/final/utils/TableGen/CodeGenDAGPatterns.cpp
new file mode 100644
index 00000000000..aa60f871bff
--- /dev/null
+++ b/final/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -0,0 +1,3179 @@
+//===- CodeGenDAGPatterns.cpp - Read DAG patterns from .td file -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CodeGenDAGPatterns class, which is used to read and
+// represent the patterns present in a .td file for instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenDAGPatterns.h"
+#include "Record.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Debug.h"
+#include <set>
+#include <algorithm>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  EEVT::TypeSet Implementation
+//===----------------------------------------------------------------------===//
+
+static inline bool isInteger(MVT::SimpleValueType VT) {
+  return EVT(VT).isInteger();
+}
+static inline bool isFloatingPoint(MVT::SimpleValueType VT) {
+  return EVT(VT).isFloatingPoint();
+}
+static inline bool isVector(MVT::SimpleValueType VT) {
+  return EVT(VT).isVector();
+}
+static inline bool isScalar(MVT::SimpleValueType VT) {
+  return !EVT(VT).isVector();
+}
+
+EEVT::TypeSet::TypeSet(MVT::SimpleValueType VT, TreePattern &TP) {
+  if (VT == MVT::iAny)
+    EnforceInteger(TP);
+  else if (VT == MVT::fAny)
+    EnforceFloatingPoint(TP);
+  else if (VT == MVT::vAny)
+    EnforceVector(TP);
+  else {
+    assert((VT < MVT::LAST_VALUETYPE || VT == MVT::iPTR ||
+            VT == MVT::iPTRAny) && "Not a concrete type!");
+    TypeVec.push_back(VT);
+  }
+}
+
+
+EEVT::TypeSet::TypeSet(const std::vector<MVT::SimpleValueType> &VTList) {
+  assert(!VTList.empty() && "empty list?");
+  TypeVec.append(VTList.begin(), VTList.end());
+
+  if (!VTList.empty())
+    assert(VTList[0] != MVT::iAny && VTList[0] != MVT::vAny &&
+           VTList[0] != MVT::fAny);
+
+  // Verify no duplicates.
+  array_pod_sort(TypeVec.begin(), TypeVec.end());
+  assert(std::unique(TypeVec.begin(), TypeVec.end()) == TypeVec.end());
+}
+
+/// FillWithPossibleTypes - Set to all legal types and return true, only valid
+/// on completely unknown type sets.
+bool EEVT::TypeSet::FillWithPossibleTypes(TreePattern &TP,
+                                          bool (*Pred)(MVT::SimpleValueType),
+                                          const char *PredicateName) {
+  assert(isCompletelyUnknown());
+  const std::vector<MVT::SimpleValueType> &LegalTypes =
+    TP.getDAGPatterns().getTargetInfo().getLegalValueTypes();
+
+  for (unsigned i = 0, e = LegalTypes.size(); i != e; ++i)
+    if (Pred == 0 || Pred(LegalTypes[i]))
+      TypeVec.push_back(LegalTypes[i]);
+
+  // If we have nothing that matches the predicate, bail out.
+  if (TypeVec.empty())
+    TP.error("Type inference contradiction found, no " +
+             std::string(PredicateName) + " types found");
+  // No need to sort with one element.
+  if (TypeVec.size() == 1) return true;
+
+  // Remove duplicates.
+  array_pod_sort(TypeVec.begin(), TypeVec.end());
+  TypeVec.erase(std::unique(TypeVec.begin(), TypeVec.end()), TypeVec.end());
+
+  return true;
+}
+
+/// hasIntegerTypes - Return true if this TypeSet contains iAny or an
+/// integer value type.
+bool EEVT::TypeSet::hasIntegerTypes() const {
+  for (unsigned i = 0, e = TypeVec.size(); i != e; ++i)
+    if (isInteger(TypeVec[i]))
+      return true;
+  return false;
+}
+
+/// hasFloatingPointTypes - Return true if this TypeSet contains an fAny or
+/// a floating point value type.
+bool EEVT::TypeSet::hasFloatingPointTypes() const {
+  for (unsigned i = 0, e = TypeVec.size(); i != e; ++i)
+    if (isFloatingPoint(TypeVec[i]))
+      return true;
+  return false;
+}
+
+/// hasVectorTypes - Return true if this TypeSet contains a vAny or a vector
+/// value type.
+bool EEVT::TypeSet::hasVectorTypes() const {
+  for (unsigned i = 0, e = TypeVec.size(); i != e; ++i)
+    if (isVector(TypeVec[i]))
+      return true;
+  return false;
+}
+
+
+std::string EEVT::TypeSet::getName() const {
+  if (TypeVec.empty()) return "<empty>";
+
+  std::string Result;
+
+  for (unsigned i = 0, e = TypeVec.size(); i != e; ++i) {
+    std::string VTName = llvm::getEnumName(TypeVec[i]);
+    // Strip off MVT:: prefix if present.
+    if (VTName.substr(0,5) == "MVT::")
+      VTName = VTName.substr(5);
+    if (i) Result += ':';
+    Result += VTName;
+  }
+
+  if (TypeVec.size() == 1)
+    return Result;
+  return "{" + Result + "}";
+}
+
+/// MergeInTypeInfo - This merges in type information from the specified
+/// argument.  If 'this' changes, it returns true.  If the two types are
+/// contradictory (e.g. merge f32 into i32) then this throws an exception.
+bool EEVT::TypeSet::MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP){
+  if (InVT.isCompletelyUnknown() || *this == InVT)
+    return false;
+
+  if (isCompletelyUnknown()) {
+    *this = InVT;
+    return true;
+  }
+
+  assert(TypeVec.size() >= 1 && InVT.TypeVec.size() >= 1 && "No unknowns");
+
+  // Handle the abstract cases, seeing if we can resolve them better.
+  switch (TypeVec[0]) {
+  default: break;
+  case MVT::iPTR:
+  case MVT::iPTRAny:
+    if (InVT.hasIntegerTypes()) {
+      EEVT::TypeSet InCopy(InVT);
+      InCopy.EnforceInteger(TP);
+      InCopy.EnforceScalar(TP);
+
+      if (InCopy.isConcrete()) {
+        // If the RHS has one integer type, upgrade iPTR to i32.
+        TypeVec[0] = InVT.TypeVec[0];
+        return true;
+      }
+
+      // If the input has multiple scalar integers, this doesn't add any info.
+      if (!InCopy.isCompletelyUnknown())
+        return false;
+    }
+    break;
+  }
+
+  // If the input constraint is iAny/iPTR and this is an integer type list,
+  // remove non-integer types from the list.
+  if ((InVT.TypeVec[0] == MVT::iPTR || InVT.TypeVec[0] == MVT::iPTRAny) &&
+      hasIntegerTypes()) {
+    bool MadeChange = EnforceInteger(TP);
+
+    // If we're merging in iPTR/iPTRAny and the node currently has a list of
+    // multiple different integer types, replace them with a single iPTR.
+    if ((InVT.TypeVec[0] == MVT::iPTR || InVT.TypeVec[0] == MVT::iPTRAny) &&
+        TypeVec.size() != 1) {
+      TypeVec.resize(1);
+      TypeVec[0] = InVT.TypeVec[0];
+      MadeChange = true;
+    }
+
+    return MadeChange;
+  }
+
+  // If this is a type list and the RHS is a typelist as well, eliminate entries
+  // from this list that aren't in the other one.
+  bool MadeChange = false;
+  TypeSet InputSet(*this);
+
+  for (unsigned i = 0; i != TypeVec.size(); ++i) {
+    bool InInVT = false;
+    for (unsigned j = 0, e = InVT.TypeVec.size(); j != e; ++j)
+      if (TypeVec[i] == InVT.TypeVec[j]) {
+        InInVT = true;
+        break;
+      }
+
+    if (InInVT) continue;
+    TypeVec.erase(TypeVec.begin()+i--);
+    MadeChange = true;
+  }
+
+  // If we removed all of our types, we have a type contradiction.
+  if (!TypeVec.empty())
+    return MadeChange;
+
+  // FIXME: Really want an SMLoc here!
+  TP.error("Type inference contradiction found, merging '" +
+           InVT.getName() + "' into '" + InputSet.getName() + "'");
+  return true; // unreachable
+}
+
+/// EnforceInteger - Remove all non-integer types from this set.
+bool EEVT::TypeSet::EnforceInteger(TreePattern &TP) {
+  // If we know nothing, then get the full set.
+  if (TypeVec.empty())
+    return FillWithPossibleTypes(TP, isInteger, "integer");
+  if (!hasFloatingPointTypes())
+    return false;
+
+  TypeSet InputSet(*this);
+
+  // Filter out all the fp types.
+  for (unsigned i = 0; i != TypeVec.size(); ++i)
+    if (!isInteger(TypeVec[i]))
+      TypeVec.erase(TypeVec.begin()+i--);
+
+  if (TypeVec.empty())
+    TP.error("Type inference contradiction found, '" +
+             InputSet.getName() + "' needs to be integer");
+  return true;
+}
+
+/// EnforceFloatingPoint - Remove all integer types from this set.
+bool EEVT::TypeSet::EnforceFloatingPoint(TreePattern &TP) {
+  // If we know nothing, then get the full set.
+  if (TypeVec.empty())
+    return FillWithPossibleTypes(TP, isFloatingPoint, "floating point");
+
+  if (!hasIntegerTypes())
+    return false;
+
+  TypeSet InputSet(*this);
+
+  // Filter out all the fp types.
+  for (unsigned i = 0; i != TypeVec.size(); ++i)
+    if (!isFloatingPoint(TypeVec[i]))
+      TypeVec.erase(TypeVec.begin()+i--);
+
+  if (TypeVec.empty())
+    TP.error("Type inference contradiction found, '" +
+             InputSet.getName() + "' needs to be floating point");
+  return true;
+}
+
+/// EnforceScalar - Remove all vector types from this.
+bool EEVT::TypeSet::EnforceScalar(TreePattern &TP) {
+  // If we know nothing, then get the full set.
+  if (TypeVec.empty())
+    return FillWithPossibleTypes(TP, isScalar, "scalar");
+
+  if (!hasVectorTypes())
+    return false;
+
+  TypeSet InputSet(*this);
+
+  // Filter out all the vector types.
+  for (unsigned i = 0; i != TypeVec.size(); ++i)
+    if (!isScalar(TypeVec[i]))
+      TypeVec.erase(TypeVec.begin()+i--);
+
+  if (TypeVec.empty())
+    TP.error("Type inference contradiction found, '" +
+             InputSet.getName() + "' needs to be scalar");
+  return true;
+}
+
+/// EnforceVector - Remove all vector types from this.
+bool EEVT::TypeSet::EnforceVector(TreePattern &TP) {
+  // If we know nothing, then get the full set.
+  if (TypeVec.empty())
+    return FillWithPossibleTypes(TP, isVector, "vector");
+
+  TypeSet InputSet(*this);
+  bool MadeChange = false;
+
+  // Filter out all the scalar types.
+  for (unsigned i = 0; i != TypeVec.size(); ++i)
+    if (!isVector(TypeVec[i])) {
+      TypeVec.erase(TypeVec.begin()+i--);
+      MadeChange = true;
+    }
+
+  if (TypeVec.empty())
+    TP.error("Type inference contradiction found, '" +
+             InputSet.getName() + "' needs to be a vector");
+  return MadeChange;
+}
+
+
+
+/// EnforceSmallerThan - 'this' must be a smaller VT than Other.  Update
+/// this an other based on this information.
+bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) {
+  // Both operands must be integer or FP, but we don't care which.
+  bool MadeChange = false;
+
+  if (isCompletelyUnknown())
+    MadeChange = FillWithPossibleTypes(TP);
+
+  if (Other.isCompletelyUnknown())
+    MadeChange = Other.FillWithPossibleTypes(TP);
+
+  // If one side is known to be integer or known to be FP but the other side has
+  // no information, get at least the type integrality info in there.
+  if (!hasFloatingPointTypes())
+    MadeChange |= Other.EnforceInteger(TP);
+  else if (!hasIntegerTypes())
+    MadeChange |= Other.EnforceFloatingPoint(TP);
+  if (!Other.hasFloatingPointTypes())
+    MadeChange |= EnforceInteger(TP);
+  else if (!Other.hasIntegerTypes())
+    MadeChange |= EnforceFloatingPoint(TP);
+
+  assert(!isCompletelyUnknown() && !Other.isCompletelyUnknown() &&
+         "Should have a type list now");
+
+  // If one contains vectors but the other doesn't pull vectors out.
+  if (!hasVectorTypes())
+    MadeChange |= Other.EnforceScalar(TP);
+  if (!hasVectorTypes())
+    MadeChange |= EnforceScalar(TP);
+
+  if (TypeVec.size() == 1 && Other.TypeVec.size() == 1) {
+    // If we are down to concrete types, this code does not currently
+    // handle nodes which have multiple types, where some types are
+    // integer, and some are fp.  Assert that this is not the case.
+    assert(!(hasIntegerTypes() && hasFloatingPointTypes()) &&
+           !(Other.hasIntegerTypes() && Other.hasFloatingPointTypes()) &&
+           "SDTCisOpSmallerThanOp does not handle mixed int/fp types!");
+
+    // Otherwise, if these are both vector types, either this vector
+    // must have a larger bitsize than the other, or this element type
+    // must be larger than the other.
+    EVT Type(TypeVec[0]);
+    EVT OtherType(Other.TypeVec[0]);
+
+    if (hasVectorTypes() && Other.hasVectorTypes()) {
+      if (Type.getSizeInBits() >= OtherType.getSizeInBits())
+        if (Type.getVectorElementType().getSizeInBits()
+            >= OtherType.getVectorElementType().getSizeInBits())
+          TP.error("Type inference contradiction found, '" +
+                   getName() + "' element type not smaller than '" +
+                   Other.getName() +"'!");
+    }
+    else
+      // For scalar types, the bitsize of this type must be larger
+      // than that of the other.
+      if (Type.getSizeInBits() >= OtherType.getSizeInBits())
+        TP.error("Type inference contradiction found, '" +
+                 getName() + "' is not smaller than '" +
+                 Other.getName() +"'!");
+
+  }
+  
+
+  // Handle int and fp as disjoint sets.  This won't work for patterns
+  // that have mixed fp/int types but those are likely rare and would
+  // not have been accepted by this code previously.
+
+  // Okay, find the smallest type from the current set and remove it from the
+  // largest set.
+  MVT::SimpleValueType SmallestInt = MVT::LAST_VALUETYPE;
+  for (unsigned i = 0, e = TypeVec.size(); i != e; ++i)
+    if (isInteger(TypeVec[i])) {
+      SmallestInt = TypeVec[i];
+      break;
+    }
+  for (unsigned i = 1, e = TypeVec.size(); i != e; ++i)
+    if (isInteger(TypeVec[i]) && TypeVec[i] < SmallestInt)
+      SmallestInt = TypeVec[i];
+
+  MVT::SimpleValueType SmallestFP = MVT::LAST_VALUETYPE;
+  for (unsigned i = 0, e = TypeVec.size(); i != e; ++i)
+    if (isFloatingPoint(TypeVec[i])) {
+      SmallestFP = TypeVec[i];
+      break;
+    }
+  for (unsigned i = 1, e = TypeVec.size(); i != e; ++i)
+    if (isFloatingPoint(TypeVec[i]) && TypeVec[i] < SmallestFP)
+      SmallestFP = TypeVec[i];
+
+  int OtherIntSize = 0;
+  int OtherFPSize = 0;
+  for (SmallVector<MVT::SimpleValueType, 2>::iterator TVI =
+         Other.TypeVec.begin();
+       TVI != Other.TypeVec.end();
+       /* NULL */) {
+    if (isInteger(*TVI)) {
+      ++OtherIntSize;
+      if (*TVI == SmallestInt) {
+        TVI = Other.TypeVec.erase(TVI);
+        --OtherIntSize;
+        MadeChange = true;
+        continue;
+      }
+    }
+    else if (isFloatingPoint(*TVI)) {
+      ++OtherFPSize;
+      if (*TVI == SmallestFP) {
+        TVI = Other.TypeVec.erase(TVI);
+        --OtherFPSize;
+        MadeChange = true;
+        continue;
+      }
+    }
+    ++TVI;
+  }
+
+  // If this is the only type in the large set, the constraint can never be
+  // satisfied.
+  if ((Other.hasIntegerTypes() && OtherIntSize == 0)
+      || (Other.hasFloatingPointTypes() && OtherFPSize == 0))
+    TP.error("Type inference contradiction found, '" +
+             Other.getName() + "' has nothing larger than '" + getName() +"'!");
+
+  // Okay, find the largest type in the Other set and remove it from the
+  // current set.
+  MVT::SimpleValueType LargestInt = MVT::Other;
+  for (unsigned i = 0, e = Other.TypeVec.size(); i != e; ++i)
+    if (isInteger(Other.TypeVec[i])) {
+      LargestInt = Other.TypeVec[i];
+      break;
+    }
+  for (unsigned i = 1, e = Other.TypeVec.size(); i != e; ++i)
+    if (isInteger(Other.TypeVec[i]) && Other.TypeVec[i] > LargestInt)
+      LargestInt = Other.TypeVec[i];
+
+  MVT::SimpleValueType LargestFP = MVT::Other;
+  for (unsigned i = 0, e = Other.TypeVec.size(); i != e; ++i)
+    if (isFloatingPoint(Other.TypeVec[i])) {
+      LargestFP = Other.TypeVec[i];
+      break;
+    }
+  for (unsigned i = 1, e = Other.TypeVec.size(); i != e; ++i)
+    if (isFloatingPoint(Other.TypeVec[i]) && Other.TypeVec[i] > LargestFP)
+      LargestFP = Other.TypeVec[i];
+
+  int IntSize = 0;
+  int FPSize = 0;
+  for (SmallVector<MVT::SimpleValueType, 2>::iterator TVI =
+         TypeVec.begin();
+       TVI != TypeVec.end();
+       /* NULL */) {
+    if (isInteger(*TVI)) {
+      ++IntSize;
+      if (*TVI == LargestInt) {
+        TVI = TypeVec.erase(TVI);
+        --IntSize;
+        MadeChange = true;
+        continue;
+      }
+    }
+    else if (isFloatingPoint(*TVI)) {
+      ++FPSize;
+      if (*TVI == LargestFP) {
+        TVI = TypeVec.erase(TVI);
+        --FPSize;
+        MadeChange = true;
+        continue;
+      }
+    }
+    ++TVI;
+  }
+
+  // If this is the only type in the small set, the constraint can never be
+  // satisfied.
+  if ((hasIntegerTypes() && IntSize == 0)
+      || (hasFloatingPointTypes() && FPSize == 0))
+    TP.error("Type inference contradiction found, '" +
+             getName() + "' has nothing smaller than '" + Other.getName()+"'!");
+
+  return MadeChange;
+}
+
+/// EnforceVectorEltTypeIs - 'this' is now constrainted to be a vector type
+/// whose element is specified by VTOperand.
+bool EEVT::TypeSet::EnforceVectorEltTypeIs(EEVT::TypeSet &VTOperand,
+                                           TreePattern &TP) {
+  // "This" must be a vector and "VTOperand" must be a scalar.
+  bool MadeChange = false;
+  MadeChange |= EnforceVector(TP);
+  MadeChange |= VTOperand.EnforceScalar(TP);
+
+  // If we know the vector type, it forces the scalar to agree.
+  if (isConcrete()) {
+    EVT IVT = getConcrete();
+    IVT = IVT.getVectorElementType();
+    return MadeChange |
+      VTOperand.MergeInTypeInfo(IVT.getSimpleVT().SimpleTy, TP);
+  }
+
+  // If the scalar type is known, filter out vector types whose element types
+  // disagree.
+  if (!VTOperand.isConcrete())
+    return MadeChange;
+
+  MVT::SimpleValueType VT = VTOperand.getConcrete();
+
+  TypeSet InputSet(*this);
+
+  // Filter out all the types which don't have the right element type.
+  for (unsigned i = 0; i != TypeVec.size(); ++i) {
+    assert(isVector(TypeVec[i]) && "EnforceVector didn't work");
+    if (EVT(TypeVec[i]).getVectorElementType().getSimpleVT().SimpleTy != VT) {
+      TypeVec.erase(TypeVec.begin()+i--);
+      MadeChange = true;
+    }
+  }
+
+  if (TypeVec.empty())  // FIXME: Really want an SMLoc here!
+    TP.error("Type inference contradiction found, forcing '" +
+             InputSet.getName() + "' to have a vector element");
+  return MadeChange;
+}
+
+/// EnforceVectorSubVectorTypeIs - 'this' is now constrainted to be a
+/// vector type specified by VTOperand.
+bool EEVT::TypeSet::EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VTOperand,
+                                                 TreePattern &TP) {
+  // "This" must be a vector and "VTOperand" must be a vector.
+  bool MadeChange = false;
+  MadeChange |= EnforceVector(TP);
+  MadeChange |= VTOperand.EnforceVector(TP);
+
+  // "This" must be larger than "VTOperand."
+  MadeChange |= VTOperand.EnforceSmallerThan(*this, TP);
+
+  // If we know the vector type, it forces the scalar types to agree.
+  if (isConcrete()) {
+    EVT IVT = getConcrete();
+    IVT = IVT.getVectorElementType();
+
+    EEVT::TypeSet EltTypeSet(IVT.getSimpleVT().SimpleTy, TP);
+    MadeChange |= VTOperand.EnforceVectorEltTypeIs(EltTypeSet, TP);
+  } else if (VTOperand.isConcrete()) {
+    EVT IVT = VTOperand.getConcrete();
+    IVT = IVT.getVectorElementType();
+
+    EEVT::TypeSet EltTypeSet(IVT.getSimpleVT().SimpleTy, TP);
+    MadeChange |= EnforceVectorEltTypeIs(EltTypeSet, TP);
+  }
+
+  return MadeChange;
+}
+
+//===----------------------------------------------------------------------===//
+// Helpers for working with extended types.
+
+bool RecordPtrCmp::operator()(const Record *LHS, const Record *RHS) const {
+  return LHS->getID() < RHS->getID();
+}
+
+/// Dependent variable map for CodeGenDAGPattern variant generation
+typedef std::map<std::string, int> DepVarMap;
+
+/// Const iterator shorthand for DepVarMap
+typedef DepVarMap::const_iterator DepVarMap_citer;
+
+namespace {
+void FindDepVarsOf(TreePatternNode *N, DepVarMap &DepMap) {
+  if (N->isLeaf()) {
+    if (dynamic_cast<DefInit*>(N->getLeafValue()) != NULL) {
+      DepMap[N->getName()]++;
+    }
+  } else {
+    for (size_t i = 0, e = N->getNumChildren(); i != e; ++i)
+      FindDepVarsOf(N->getChild(i), DepMap);
+  }
+}
+
+//! Find dependent variables within child patterns
+/*!
+ */
+void FindDepVars(TreePatternNode *N, MultipleUseVarSet &DepVars) {
+  DepVarMap depcounts;
+  FindDepVarsOf(N, depcounts);
+  for (DepVarMap_citer i = depcounts.begin(); i != depcounts.end(); ++i) {
+    if (i->second > 1) {            // std::pair<std::string, int>
+      DepVars.insert(i->first);
+    }
+  }
+}
+
+//! Dump the dependent variable set:
+#ifndef NDEBUG
+void DumpDepVars(MultipleUseVarSet &DepVars) {
+  if (DepVars.empty()) {
+    DEBUG(errs() << "<empty set>");
+  } else {
+    DEBUG(errs() << "[ ");
+    for (MultipleUseVarSet::const_iterator i = DepVars.begin(),
+         e = DepVars.end(); i != e; ++i) {
+      DEBUG(errs() << (*i) << " ");
+    }
+    DEBUG(errs() << "]");
+  }
+}
+#endif
+
+}
+
+//===----------------------------------------------------------------------===//
+// PatternToMatch implementation
+//
+
+
+/// getPatternSize - Return the 'size' of this pattern.  We want to match large
+/// patterns before small ones.  This is used to determine the size of a
+/// pattern.
+static unsigned getPatternSize(const TreePatternNode *P,
+                               const CodeGenDAGPatterns &CGP) {
+  unsigned Size = 3;  // The node itself.
+  // If the root node is a ConstantSDNode, increases its size.
+  // e.g. (set R32:$dst, 0).
+  if (P->isLeaf() && dynamic_cast<IntInit*>(P->getLeafValue()))
+    Size += 2;
+
+  // FIXME: This is a hack to statically increase the priority of patterns
+  // which maps a sub-dag to a complex pattern. e.g. favors LEA over ADD.
+  // Later we can allow complexity / cost for each pattern to be (optionally)
+  // specified. To get best possible pattern match we'll need to dynamically
+  // calculate the complexity of all patterns a dag can potentially map to.
+  const ComplexPattern *AM = P->getComplexPatternInfo(CGP);
+  if (AM)
+    Size += AM->getNumOperands() * 3;
+
+  // If this node has some predicate function that must match, it adds to the
+  // complexity of this node.
+  if (!P->getPredicateFns().empty())
+    ++Size;
+
+  // Count children in the count if they are also nodes.
+  for (unsigned i = 0, e = P->getNumChildren(); i != e; ++i) {
+    TreePatternNode *Child = P->getChild(i);
+    if (!Child->isLeaf() && Child->getNumTypes() &&
+        Child->getType(0) != MVT::Other)
+      Size += getPatternSize(Child, CGP);
+    else if (Child->isLeaf()) {
+      if (dynamic_cast<IntInit*>(Child->getLeafValue()))
+        Size += 5;  // Matches a ConstantSDNode (+3) and a specific value (+2).
+      else if (Child->getComplexPatternInfo(CGP))
+        Size += getPatternSize(Child, CGP);
+      else if (!Child->getPredicateFns().empty())
+        ++Size;
+    }
+  }
+
+  return Size;
+}
+
+/// Compute the complexity metric for the input pattern.  This roughly
+/// corresponds to the number of nodes that are covered.
+unsigned PatternToMatch::
+getPatternComplexity(const CodeGenDAGPatterns &CGP) const {
+  return getPatternSize(getSrcPattern(), CGP) + getAddedComplexity();
+}
+
+
+/// getPredicateCheck - Return a single string containing all of this
+/// pattern's predicates concatenated with "&&" operators.
+///
+std::string PatternToMatch::getPredicateCheck() const {
+  std::string PredicateCheck;
+  for (unsigned i = 0, e = Predicates->getSize(); i != e; ++i) {
+    if (DefInit *Pred = dynamic_cast<DefInit*>(Predicates->getElement(i))) {
+      Record *Def = Pred->getDef();
+      if (!Def->isSubClassOf("Predicate")) {
+#ifndef NDEBUG
+        Def->dump();
+#endif
+        assert(0 && "Unknown predicate type!");
+      }
+      if (!PredicateCheck.empty())
+        PredicateCheck += " && ";
+      PredicateCheck += "(" + Def->getValueAsString("CondString") + ")";
+    }
+  }
+
+  return PredicateCheck;
+}
+
+//===----------------------------------------------------------------------===//
+// SDTypeConstraint implementation
+//
+
+SDTypeConstraint::SDTypeConstraint(Record *R) {
+  OperandNo = R->getValueAsInt("OperandNum");
+
+  if (R->isSubClassOf("SDTCisVT")) {
+    ConstraintType = SDTCisVT;
+    x.SDTCisVT_Info.VT = getValueType(R->getValueAsDef("VT"));
+    if (x.SDTCisVT_Info.VT == MVT::isVoid)
+      throw TGError(R->getLoc(), "Cannot use 'Void' as type to SDTCisVT");
+
+  } else if (R->isSubClassOf("SDTCisPtrTy")) {
+    ConstraintType = SDTCisPtrTy;
+  } else if (R->isSubClassOf("SDTCisInt")) {
+    ConstraintType = SDTCisInt;
+  } else if (R->isSubClassOf("SDTCisFP")) {
+    ConstraintType = SDTCisFP;
+  } else if (R->isSubClassOf("SDTCisVec")) {
+    ConstraintType = SDTCisVec;
+  } else if (R->isSubClassOf("SDTCisSameAs")) {
+    ConstraintType = SDTCisSameAs;
+    x.SDTCisSameAs_Info.OtherOperandNum = R->getValueAsInt("OtherOperandNum");
+  } else if (R->isSubClassOf("SDTCisVTSmallerThanOp")) {
+    ConstraintType = SDTCisVTSmallerThanOp;
+    x.SDTCisVTSmallerThanOp_Info.OtherOperandNum =
+      R->getValueAsInt("OtherOperandNum");
+  } else if (R->isSubClassOf("SDTCisOpSmallerThanOp")) {
+    ConstraintType = SDTCisOpSmallerThanOp;
+    x.SDTCisOpSmallerThanOp_Info.BigOperandNum =
+      R->getValueAsInt("BigOperandNum");
+  } else if (R->isSubClassOf("SDTCisEltOfVec")) {
+    ConstraintType = SDTCisEltOfVec;
+    x.SDTCisEltOfVec_Info.OtherOperandNum = R->getValueAsInt("OtherOpNum");
+  } else if (R->isSubClassOf("SDTCisSubVecOfVec")) {
+    ConstraintType = SDTCisSubVecOfVec;
+    x.SDTCisSubVecOfVec_Info.OtherOperandNum =
+      R->getValueAsInt("OtherOpNum");
+  } else {
+    errs() << "Unrecognized SDTypeConstraint '" << R->getName() << "'!\n";
+    exit(1);
+  }
+}
+
+/// getOperandNum - Return the node corresponding to operand #OpNo in tree
+/// N, and the result number in ResNo.
+static TreePatternNode *getOperandNum(unsigned OpNo, TreePatternNode *N,
+                                      const SDNodeInfo &NodeInfo,
+                                      unsigned &ResNo) {
+  unsigned NumResults = NodeInfo.getNumResults();
+  if (OpNo < NumResults) {
+    ResNo = OpNo;
+    return N;
+  }
+
+  OpNo -= NumResults;
+
+  if (OpNo >= N->getNumChildren()) {
+    errs() << "Invalid operand number in type constraint "
+           << (OpNo+NumResults) << " ";
+    N->dump();
+    errs() << '\n';
+    exit(1);
+  }
+
+  return N->getChild(OpNo);
+}
+
+/// ApplyTypeConstraint - Given a node in a pattern, apply this type
+/// constraint to the nodes operands.  This returns true if it makes a
+/// change, false otherwise.  If a type contradiction is found, throw an
+/// exception.
+bool SDTypeConstraint::ApplyTypeConstraint(TreePatternNode *N,
+                                           const SDNodeInfo &NodeInfo,
+                                           TreePattern &TP) const {
+  unsigned ResNo = 0; // The result number being referenced.
+  TreePatternNode *NodeToApply = getOperandNum(OperandNo, N, NodeInfo, ResNo);
+
+  switch (ConstraintType) {
+  default: assert(0 && "Unknown constraint type!");
+  case SDTCisVT:
+    // Operand must be a particular type.
+    return NodeToApply->UpdateNodeType(ResNo, x.SDTCisVT_Info.VT, TP);
+  case SDTCisPtrTy:
+    // Operand must be same as target pointer type.
+    return NodeToApply->UpdateNodeType(ResNo, MVT::iPTR, TP);
+  case SDTCisInt:
+    // Require it to be one of the legal integer VTs.
+    return NodeToApply->getExtType(ResNo).EnforceInteger(TP);
+  case SDTCisFP:
+    // Require it to be one of the legal fp VTs.
+    return NodeToApply->getExtType(ResNo).EnforceFloatingPoint(TP);
+  case SDTCisVec:
+    // Require it to be one of the legal vector VTs.
+    return NodeToApply->getExtType(ResNo).EnforceVector(TP);
+  case SDTCisSameAs: {
+    unsigned OResNo = 0;
+    TreePatternNode *OtherNode =
+      getOperandNum(x.SDTCisSameAs_Info.OtherOperandNum, N, NodeInfo, OResNo);
+    return NodeToApply->UpdateNodeType(OResNo, OtherNode->getExtType(ResNo),TP)|
+           OtherNode->UpdateNodeType(ResNo,NodeToApply->getExtType(OResNo),TP);
+  }
+  case SDTCisVTSmallerThanOp: {
+    // The NodeToApply must be a leaf node that is a VT.  OtherOperandNum must
+    // have an integer type that is smaller than the VT.
+    if (!NodeToApply->isLeaf() ||
+        !dynamic_cast<DefInit*>(NodeToApply->getLeafValue()) ||
+        !static_cast<DefInit*>(NodeToApply->getLeafValue())->getDef()
+               ->isSubClassOf("ValueType"))
+      TP.error(N->getOperator()->getName() + " expects a VT operand!");
+    MVT::SimpleValueType VT =
+     getValueType(static_cast<DefInit*>(NodeToApply->getLeafValue())->getDef());
+
+    EEVT::TypeSet TypeListTmp(VT, TP);
+
+    unsigned OResNo = 0;
+    TreePatternNode *OtherNode =
+      getOperandNum(x.SDTCisVTSmallerThanOp_Info.OtherOperandNum, N, NodeInfo,
+                    OResNo);
+
+    return TypeListTmp.EnforceSmallerThan(OtherNode->getExtType(OResNo), TP);
+  }
+  case SDTCisOpSmallerThanOp: {
+    unsigned BResNo = 0;
+    TreePatternNode *BigOperand =
+      getOperandNum(x.SDTCisOpSmallerThanOp_Info.BigOperandNum, N, NodeInfo,
+                    BResNo);
+    return NodeToApply->getExtType(ResNo).
+                  EnforceSmallerThan(BigOperand->getExtType(BResNo), TP);
+  }
+  case SDTCisEltOfVec: {
+    unsigned VResNo = 0;
+    TreePatternNode *VecOperand =
+      getOperandNum(x.SDTCisEltOfVec_Info.OtherOperandNum, N, NodeInfo,
+                    VResNo);
+
+    // Filter vector types out of VecOperand that don't have the right element
+    // type.
+    return VecOperand->getExtType(VResNo).
+      EnforceVectorEltTypeIs(NodeToApply->getExtType(ResNo), TP);
+  }
+  case SDTCisSubVecOfVec: {
+    unsigned VResNo = 0;
+    TreePatternNode *BigVecOperand =
+      getOperandNum(x.SDTCisSubVecOfVec_Info.OtherOperandNum, N, NodeInfo,
+                    VResNo);
+
+    // Filter vector types out of BigVecOperand that don't have the
+    // right subvector type.
+    return BigVecOperand->getExtType(VResNo).
+      EnforceVectorSubVectorTypeIs(NodeToApply->getExtType(ResNo), TP);
+  }
+  }
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// SDNodeInfo implementation
+//
+SDNodeInfo::SDNodeInfo(Record *R) : Def(R) {
+  EnumName    = R->getValueAsString("Opcode");
+  SDClassName = R->getValueAsString("SDClass");
+  Record *TypeProfile = R->getValueAsDef("TypeProfile");
+  NumResults = TypeProfile->getValueAsInt("NumResults");
+  NumOperands = TypeProfile->getValueAsInt("NumOperands");
+
+  // Parse the properties.
+  Properties = 0;
+  std::vector<Record*> PropList = R->getValueAsListOfDefs("Properties");
+  for (unsigned i = 0, e = PropList.size(); i != e; ++i) {
+    if (PropList[i]->getName() == "SDNPCommutative") {
+      Properties |= 1 << SDNPCommutative;
+    } else if (PropList[i]->getName() == "SDNPAssociative") {
+      Properties |= 1 << SDNPAssociative;
+    } else if (PropList[i]->getName() == "SDNPHasChain") {
+      Properties |= 1 << SDNPHasChain;
+    } else if (PropList[i]->getName() == "SDNPOutGlue") {
+      Properties |= 1 << SDNPOutGlue;
+    } else if (PropList[i]->getName() == "SDNPInGlue") {
+      Properties |= 1 << SDNPInGlue;
+    } else if (PropList[i]->getName() == "SDNPOptInGlue") {
+      Properties |= 1 << SDNPOptInGlue;
+    } else if (PropList[i]->getName() == "SDNPMayStore") {
+      Properties |= 1 << SDNPMayStore;
+    } else if (PropList[i]->getName() == "SDNPMayLoad") {
+      Properties |= 1 << SDNPMayLoad;
+    } else if (PropList[i]->getName() == "SDNPSideEffect") {
+      Properties |= 1 << SDNPSideEffect;
+    } else if (PropList[i]->getName() == "SDNPMemOperand") {
+      Properties |= 1 << SDNPMemOperand;
+    } else if (PropList[i]->getName() == "SDNPVariadic") {
+      Properties |= 1 << SDNPVariadic;
+    } else {
+      errs() << "Unknown SD Node property '" << PropList[i]->getName()
+             << "' on node '" << R->getName() << "'!\n";
+      exit(1);
+    }
+  }
+
+
+  // Parse the type constraints.
+  std::vector<Record*> ConstraintList =
+    TypeProfile->getValueAsListOfDefs("Constraints");
+  TypeConstraints.assign(ConstraintList.begin(), ConstraintList.end());
+}
+
+/// getKnownType - If the type constraints on this node imply a fixed type
+/// (e.g. all stores return void, etc), then return it as an
+/// MVT::SimpleValueType.  Otherwise, return EEVT::Other.
+MVT::SimpleValueType SDNodeInfo::getKnownType(unsigned ResNo) const {
+  unsigned NumResults = getNumResults();
+  assert(NumResults <= 1 &&
+         "We only work with nodes with zero or one result so far!");
+  assert(ResNo == 0 && "Only handles single result nodes so far");
+
+  for (unsigned i = 0, e = TypeConstraints.size(); i != e; ++i) {
+    // Make sure that this applies to the correct node result.
+    if (TypeConstraints[i].OperandNo >= NumResults)  // FIXME: need value #
+      continue;
+
+    switch (TypeConstraints[i].ConstraintType) {
+    default: break;
+    case SDTypeConstraint::SDTCisVT:
+      return TypeConstraints[i].x.SDTCisVT_Info.VT;
+    case SDTypeConstraint::SDTCisPtrTy:
+      return MVT::iPTR;
+    }
+  }
+  return MVT::Other;
+}
+
+//===----------------------------------------------------------------------===//
+// TreePatternNode implementation
+//
+
+TreePatternNode::~TreePatternNode() {
+#if 0 // FIXME: implement refcounted tree nodes!
+  for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
+    delete getChild(i);
+#endif
+}
+
+static unsigned GetNumNodeResults(Record *Operator, CodeGenDAGPatterns &CDP) {
+  if (Operator->getName() == "set" ||
+      Operator->getName() == "implicit")
+    return 0;  // All return nothing.
+
+  if (Operator->isSubClassOf("Intrinsic"))
+    return CDP.getIntrinsic(Operator).IS.RetVTs.size();
+
+  if (Operator->isSubClassOf("SDNode"))
+    return CDP.getSDNodeInfo(Operator).getNumResults();
+
+  if (Operator->isSubClassOf("PatFrag")) {
+    // If we've already parsed this pattern fragment, get it.  Otherwise, handle
+    // the forward reference case where one pattern fragment references another
+    // before it is processed.
+    if (TreePattern *PFRec = CDP.getPatternFragmentIfRead(Operator))
+      return PFRec->getOnlyTree()->getNumTypes();
+
+    // Get the result tree.
+    DagInit *Tree = Operator->getValueAsDag("Fragment");
+    Record *Op = 0;
+    if (Tree && dynamic_cast<DefInit*>(Tree->getOperator()))
+      Op = dynamic_cast<DefInit*>(Tree->getOperator())->getDef();
+    assert(Op && "Invalid Fragment");
+    return GetNumNodeResults(Op, CDP);
+  }
+
+  if (Operator->isSubClassOf("Instruction")) {
+    CodeGenInstruction &InstInfo = CDP.getTargetInfo().getInstruction(Operator);
+
+    // FIXME: Should allow access to all the results here.
+    unsigned NumDefsToAdd = InstInfo.Operands.NumDefs ? 1 : 0;
+
+    // Add on one implicit def if it has a resolvable type.
+    if (InstInfo.HasOneImplicitDefWithKnownVT(CDP.getTargetInfo()) !=MVT::Other)
+      ++NumDefsToAdd;
+    return NumDefsToAdd;
+  }
+
+  if (Operator->isSubClassOf("SDNodeXForm"))
+    return 1;  // FIXME: Generalize SDNodeXForm
+
+  Operator->dump();
+  errs() << "Unhandled node in GetNumNodeResults\n";
+  exit(1);
+}
+
+void TreePatternNode::print(raw_ostream &OS) const {
+  if (isLeaf())
+    OS << *getLeafValue();
+  else
+    OS << '(' << getOperator()->getName();
+
+  for (unsigned i = 0, e = Types.size(); i != e; ++i)
+    OS << ':' << getExtType(i).getName();
+
+  if (!isLeaf()) {
+    if (getNumChildren() != 0) {
+      OS << " ";
+      getChild(0)->print(OS);
+      for (unsigned i = 1, e = getNumChildren(); i != e; ++i) {
+        OS << ", ";
+        getChild(i)->print(OS);
+      }
+    }
+    OS << ")";
+  }
+
+  for (unsigned i = 0, e = PredicateFns.size(); i != e; ++i)
+    OS << "<<P:" << PredicateFns[i] << ">>";
+  if (TransformFn)
+    OS << "<<X:" << TransformFn->getName() << ">>";
+  if (!getName().empty())
+    OS << ":$" << getName();
+
+}
+void TreePatternNode::dump() const {
+  print(errs());
+}
+
+/// isIsomorphicTo - Return true if this node is recursively
+/// isomorphic to the specified node.  For this comparison, the node's
+/// entire state is considered. The assigned name is ignored, since
+/// nodes with differing names are considered isomorphic. However, if
+/// the assigned name is present in the dependent variable set, then
+/// the assigned name is considered significant and the node is
+/// isomorphic if the names match.
+bool TreePatternNode::isIsomorphicTo(const TreePatternNode *N,
+                                     const MultipleUseVarSet &DepVars) const {
+  if (N == this) return true;
+  if (N->isLeaf() != isLeaf() || getExtTypes() != N->getExtTypes() ||
+      getPredicateFns() != N->getPredicateFns() ||
+      getTransformFn() != N->getTransformFn())
+    return false;
+
+  if (isLeaf()) {
+    if (DefInit *DI = dynamic_cast<DefInit*>(getLeafValue())) {
+      if (DefInit *NDI = dynamic_cast<DefInit*>(N->getLeafValue())) {
+        return ((DI->getDef() == NDI->getDef())
+                && (DepVars.find(getName()) == DepVars.end()
+                    || getName() == N->getName()));
+      }
+    }
+    return getLeafValue() == N->getLeafValue();
+  }
+
+  if (N->getOperator() != getOperator() ||
+      N->getNumChildren() != getNumChildren()) return false;
+  for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
+    if (!getChild(i)->isIsomorphicTo(N->getChild(i), DepVars))
+      return false;
+  return true;
+}
+
+/// clone - Make a copy of this tree and all of its children.
+///
+TreePatternNode *TreePatternNode::clone() const {
+  TreePatternNode *New;
+  if (isLeaf()) {
+    New = new TreePatternNode(getLeafValue(), getNumTypes());
+  } else {
+    std::vector<TreePatternNode*> CChildren;
+    CChildren.reserve(Children.size());
+    for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
+      CChildren.push_back(getChild(i)->clone());
+    New = new TreePatternNode(getOperator(), CChildren, getNumTypes());
+  }
+  New->setName(getName());
+  New->Types = Types;
+  New->setPredicateFns(getPredicateFns());
+  New->setTransformFn(getTransformFn());
+  return New;
+}
+
+/// RemoveAllTypes - Recursively strip all the types of this tree.
+void TreePatternNode::RemoveAllTypes() {
+  for (unsigned i = 0, e = Types.size(); i != e; ++i)
+    Types[i] = EEVT::TypeSet();  // Reset to unknown type.
+  if (isLeaf()) return;
+  for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
+    getChild(i)->RemoveAllTypes();
+}
+
+
+/// SubstituteFormalArguments - Replace the formal arguments in this tree
+/// with actual values specified by ArgMap.
+void TreePatternNode::
+SubstituteFormalArguments(std::map<std::string, TreePatternNode*> &ArgMap) {
+  if (isLeaf()) return;
+
+  for (unsigned i = 0, e = getNumChildren(); i != e; ++i) {
+    TreePatternNode *Child = getChild(i);
+    if (Child->isLeaf()) {
+      Init *Val = Child->getLeafValue();
+      if (dynamic_cast<DefInit*>(Val) &&
+          static_cast<DefInit*>(Val)->getDef()->getName() == "node") {
+        // We found a use of a formal argument, replace it with its value.
+        TreePatternNode *NewChild = ArgMap[Child->getName()];
+        assert(NewChild && "Couldn't find formal argument!");
+        assert((Child->getPredicateFns().empty() ||
+                NewChild->getPredicateFns() == Child->getPredicateFns()) &&
+               "Non-empty child predicate clobbered!");
+        setChild(i, NewChild);
+      }
+    } else {
+      getChild(i)->SubstituteFormalArguments(ArgMap);
+    }
+  }
+}
+
+
+/// InlinePatternFragments - If this pattern refers to any pattern
+/// fragments, inline them into place, giving us a pattern without any
+/// PatFrag references.
+TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
+  if (isLeaf()) return this;  // nothing to do.
+  Record *Op = getOperator();
+
+  if (!Op->isSubClassOf("PatFrag")) {
+    // Just recursively inline children nodes.
+    for (unsigned i = 0, e = getNumChildren(); i != e; ++i) {
+      TreePatternNode *Child = getChild(i);
+      TreePatternNode *NewChild = Child->InlinePatternFragments(TP);
+
+      assert((Child->getPredicateFns().empty() ||
+              NewChild->getPredicateFns() == Child->getPredicateFns()) &&
+             "Non-empty child predicate clobbered!");
+
+      setChild(i, NewChild);
+    }
+    return this;
+  }
+
+  // Otherwise, we found a reference to a fragment.  First, look up its
+  // TreePattern record.
+  TreePattern *Frag = TP.getDAGPatterns().getPatternFragment(Op);
+
+  // Verify that we are passing the right number of operands.
+  if (Frag->getNumArgs() != Children.size())
+    TP.error("'" + Op->getName() + "' fragment requires " +
+             utostr(Frag->getNumArgs()) + " operands!");
+
+  TreePatternNode *FragTree = Frag->getOnlyTree()->clone();
+
+  std::string Code = Op->getValueAsCode("Predicate");
+  if (!Code.empty())
+    FragTree->addPredicateFn("Predicate_"+Op->getName());
+
+  // Resolve formal arguments to their actual value.
+  if (Frag->getNumArgs()) {
+    // Compute the map of formal to actual arguments.
+    std::map<std::string, TreePatternNode*> ArgMap;
+    for (unsigned i = 0, e = Frag->getNumArgs(); i != e; ++i)
+      ArgMap[Frag->getArgName(i)] = getChild(i)->InlinePatternFragments(TP);
+
+    FragTree->SubstituteFormalArguments(ArgMap);
+  }
+
+  FragTree->setName(getName());
+  for (unsigned i = 0, e = Types.size(); i != e; ++i)
+    FragTree->UpdateNodeType(i, getExtType(i), TP);
+
+  // Transfer in the old predicates.
+  for (unsigned i = 0, e = getPredicateFns().size(); i != e; ++i)
+    FragTree->addPredicateFn(getPredicateFns()[i]);
+
+  // Get a new copy of this fragment to stitch into here.
+  //delete this;    // FIXME: implement refcounting!
+
+  // The fragment we inlined could have recursive inlining that is needed.  See
+  // if there are any pattern fragments in it and inline them as needed.
+  return FragTree->InlinePatternFragments(TP);
+}
+
+/// getImplicitType - Check to see if the specified record has an implicit
+/// type which should be applied to it.  This will infer the type of register
+/// references from the register file information, for example.
+///
+static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
+                                     bool NotRegisters, TreePattern &TP) {
+  // Check to see if this is a register or a register class.
+  if (R->isSubClassOf("RegisterClass")) {
+    assert(ResNo == 0 && "Regclass ref only has one result!");
+    if (NotRegisters)
+      return EEVT::TypeSet(); // Unknown.
+    const CodeGenTarget &T = TP.getDAGPatterns().getTargetInfo();
+    return EEVT::TypeSet(T.getRegisterClass(R).getValueTypes());
+  }
+
+  if (R->isSubClassOf("PatFrag")) {
+    assert(ResNo == 0 && "FIXME: PatFrag with multiple results?");
+    // Pattern fragment types will be resolved when they are inlined.
+    return EEVT::TypeSet(); // Unknown.
+  }
+
+  if (R->isSubClassOf("Register")) {
+    assert(ResNo == 0 && "Registers only produce one result!");
+    if (NotRegisters)
+      return EEVT::TypeSet(); // Unknown.
+    const CodeGenTarget &T = TP.getDAGPatterns().getTargetInfo();
+    return EEVT::TypeSet(T.getRegisterVTs(R));
+  }
+
+  if (R->isSubClassOf("SubRegIndex")) {
+    assert(ResNo == 0 && "SubRegisterIndices only produce one result!");
+    return EEVT::TypeSet();
+  }
+
+  if (R->isSubClassOf("ValueType") || R->isSubClassOf("CondCode")) {
+    assert(ResNo == 0 && "This node only has one result!");
+    // Using a VTSDNode or CondCodeSDNode.
+    return EEVT::TypeSet(MVT::Other, TP);
+  }
+
+  if (R->isSubClassOf("ComplexPattern")) {
+    assert(ResNo == 0 && "FIXME: ComplexPattern with multiple results?");
+    if (NotRegisters)
+      return EEVT::TypeSet(); // Unknown.
+   return EEVT::TypeSet(TP.getDAGPatterns().getComplexPattern(R).getValueType(),
+                         TP);
+  }
+  if (R->isSubClassOf("PointerLikeRegClass")) {
+    assert(ResNo == 0 && "Regclass can only have one result!");
+    return EEVT::TypeSet(MVT::iPTR, TP);
+  }
+
+  if (R->getName() == "node" || R->getName() == "srcvalue" ||
+      R->getName() == "zero_reg") {
+    // Placeholder.
+    return EEVT::TypeSet(); // Unknown.
+  }
+
+  TP.error("Unknown node flavor used in pattern: " + R->getName());
+  return EEVT::TypeSet(MVT::Other, TP);
+}
+
+
+/// getIntrinsicInfo - If this node corresponds to an intrinsic, return the
+/// CodeGenIntrinsic information for it, otherwise return a null pointer.
+const CodeGenIntrinsic *TreePatternNode::
+getIntrinsicInfo(const CodeGenDAGPatterns &CDP) const {
+  if (getOperator() != CDP.get_intrinsic_void_sdnode() &&
+      getOperator() != CDP.get_intrinsic_w_chain_sdnode() &&
+      getOperator() != CDP.get_intrinsic_wo_chain_sdnode())
+    return 0;
+
+  unsigned IID =
+    dynamic_cast<IntInit*>(getChild(0)->getLeafValue())->getValue();
+  return &CDP.getIntrinsicInfo(IID);
+}
+
+/// getComplexPatternInfo - If this node corresponds to a ComplexPattern,
+/// return the ComplexPattern information, otherwise return null.
+const ComplexPattern *
+TreePatternNode::getComplexPatternInfo(const CodeGenDAGPatterns &CGP) const {
+  if (!isLeaf()) return 0;
+
+  DefInit *DI = dynamic_cast<DefInit*>(getLeafValue());
+  if (DI && DI->getDef()->isSubClassOf("ComplexPattern"))
+    return &CGP.getComplexPattern(DI->getDef());
+  return 0;
+}
+
+/// NodeHasProperty - Return true if this node has the specified property.
+bool TreePatternNode::NodeHasProperty(SDNP Property,
+                                      const CodeGenDAGPatterns &CGP) const {
+  if (isLeaf()) {
+    if (const ComplexPattern *CP = getComplexPatternInfo(CGP))
+      return CP->hasProperty(Property);
+    return false;
+  }
+
+  Record *Operator = getOperator();
+  if (!Operator->isSubClassOf("SDNode")) return false;
+
+  return CGP.getSDNodeInfo(Operator).hasProperty(Property);
+}
+
+
+
+
+/// TreeHasProperty - Return true if any node in this tree has the specified
+/// property.
+bool TreePatternNode::TreeHasProperty(SDNP Property,
+                                      const CodeGenDAGPatterns &CGP) const {
+  if (NodeHasProperty(Property, CGP))
+    return true;
+  for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
+    if (getChild(i)->TreeHasProperty(Property, CGP))
+      return true;
+  return false;
+}
+
+/// isCommutativeIntrinsic - Return true if the node corresponds to a
+/// commutative intrinsic.
+bool
+TreePatternNode::isCommutativeIntrinsic(const CodeGenDAGPatterns &CDP) const {
+  if (const CodeGenIntrinsic *Int = getIntrinsicInfo(CDP))
+    return Int->isCommutative;
+  return false;
+}
+
+
+/// ApplyTypeConstraints - Apply all of the type constraints relevant to
+/// this node and its children in the tree.  This returns true if it makes a
+/// change, false otherwise.  If a type contradiction is found, throw an
+/// exception.
+bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
+  CodeGenDAGPatterns &CDP = TP.getDAGPatterns();
+  if (isLeaf()) {
+    if (DefInit *DI = dynamic_cast<DefInit*>(getLeafValue())) {
+      // If it's a regclass or something else known, include the type.
+      bool MadeChange = false;
+      for (unsigned i = 0, e = Types.size(); i != e; ++i)
+        MadeChange |= UpdateNodeType(i, getImplicitType(DI->getDef(), i,
+                                                        NotRegisters, TP), TP);
+      return MadeChange;
+    }
+
+    if (IntInit *II = dynamic_cast<IntInit*>(getLeafValue())) {
+      assert(Types.size() == 1 && "Invalid IntInit");
+
+      // Int inits are always integers. :)
+      bool MadeChange = Types[0].EnforceInteger(TP);
+
+      if (!Types[0].isConcrete())
+        return MadeChange;
+
+      MVT::SimpleValueType VT = getType(0);
+      if (VT == MVT::iPTR || VT == MVT::iPTRAny)
+        return MadeChange;
+
+      unsigned Size = EVT(VT).getSizeInBits();
+      // Make sure that the value is representable for this type.
+      if (Size >= 32) return MadeChange;
+
+      int Val = (II->getValue() << (32-Size)) >> (32-Size);
+      if (Val == II->getValue()) return MadeChange;
+
+      // If sign-extended doesn't fit, does it fit as unsigned?
+      unsigned ValueMask;
+      unsigned UnsignedVal;
+      ValueMask = unsigned(~uint32_t(0UL) >> (32-Size));
+      UnsignedVal = unsigned(II->getValue());
+
+      if ((ValueMask & UnsignedVal) == UnsignedVal)
+        return MadeChange;
+
+      TP.error("Integer value '" + itostr(II->getValue())+
+               "' is out of range for type '" + getEnumName(getType(0)) + "'!");
+      return MadeChange;
+    }
+    return false;
+  }
+
+  // special handling for set, which isn't really an SDNode.
+  if (getOperator()->getName() == "set") {
+    assert(getNumTypes() == 0 && "Set doesn't produce a value");
+    assert(getNumChildren() >= 2 && "Missing RHS of a set?");
+    unsigned NC = getNumChildren();
+
+    TreePatternNode *SetVal = getChild(NC-1);
+    bool MadeChange = SetVal->ApplyTypeConstraints(TP, NotRegisters);
+
+    for (unsigned i = 0; i < NC-1; ++i) {
+      TreePatternNode *Child = getChild(i);
+      MadeChange |= Child->ApplyTypeConstraints(TP, NotRegisters);
+
+      // Types of operands must match.
+      MadeChange |= Child->UpdateNodeType(0, SetVal->getExtType(i), TP);
+      MadeChange |= SetVal->UpdateNodeType(i, Child->getExtType(0), TP);
+    }
+    return MadeChange;
+  }
+
+  if (getOperator()->getName() == "implicit") {
+    assert(getNumTypes() == 0 && "Node doesn't produce a value");
+
+    bool MadeChange = false;
+    for (unsigned i = 0; i < getNumChildren(); ++i)
+      MadeChange = getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
+    return MadeChange;
+  }
+
+  if (getOperator()->getName() == "COPY_TO_REGCLASS") {
+    bool MadeChange = false;
+    MadeChange |= getChild(0)->ApplyTypeConstraints(TP, NotRegisters);
+    MadeChange |= getChild(1)->ApplyTypeConstraints(TP, NotRegisters);
+
+    assert(getChild(0)->getNumTypes() == 1 &&
+           getChild(1)->getNumTypes() == 1 && "Unhandled case");
+
+    // child #1 of COPY_TO_REGCLASS should be a register class.  We don't care
+    // what type it gets, so if it didn't get a concrete type just give it the
+    // first viable type from the reg class.
+    if (!getChild(1)->hasTypeSet(0) &&
+        !getChild(1)->getExtType(0).isCompletelyUnknown()) {
+      MVT::SimpleValueType RCVT = getChild(1)->getExtType(0).getTypeList()[0];
+      MadeChange |= getChild(1)->UpdateNodeType(0, RCVT, TP);
+    }
+    return MadeChange;
+  }
+
+  if (const CodeGenIntrinsic *Int = getIntrinsicInfo(CDP)) {
+    bool MadeChange = false;
+
+    // Apply the result type to the node.
+    unsigned NumRetVTs = Int->IS.RetVTs.size();
+    unsigned NumParamVTs = Int->IS.ParamVTs.size();
+
+    for (unsigned i = 0, e = NumRetVTs; i != e; ++i)
+      MadeChange |= UpdateNodeType(i, Int->IS.RetVTs[i], TP);
+
+    if (getNumChildren() != NumParamVTs + 1)
+      TP.error("Intrinsic '" + Int->Name + "' expects " +
+               utostr(NumParamVTs) + " operands, not " +
+               utostr(getNumChildren() - 1) + " operands!");
+
+    // Apply type info to the intrinsic ID.
+    MadeChange |= getChild(0)->UpdateNodeType(0, MVT::iPTR, TP);
+
+    for (unsigned i = 0, e = getNumChildren()-1; i != e; ++i) {
+      MadeChange |= getChild(i+1)->ApplyTypeConstraints(TP, NotRegisters);
+
+      MVT::SimpleValueType OpVT = Int->IS.ParamVTs[i];
+      assert(getChild(i+1)->getNumTypes() == 1 && "Unhandled case");
+      MadeChange |= getChild(i+1)->UpdateNodeType(0, OpVT, TP);
+    }
+    return MadeChange;
+  }
+
+  if (getOperator()->isSubClassOf("SDNode")) {
+    const SDNodeInfo &NI = CDP.getSDNodeInfo(getOperator());
+
+    // Check that the number of operands is sane.  Negative operands -> varargs.
+    if (NI.getNumOperands() >= 0 &&
+        getNumChildren() != (unsigned)NI.getNumOperands())
+      TP.error(getOperator()->getName() + " node requires exactly " +
+               itostr(NI.getNumOperands()) + " operands!");
+
+    bool MadeChange = NI.ApplyTypeConstraints(this, TP);
+    for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
+      MadeChange |= getChild(i)->ApplyTypeConstraints(TP, NotRegisters);
+    return MadeChange;
+  }
+
+  if (getOperator()->isSubClassOf("Instruction")) {
+    const DAGInstruction &Inst = CDP.getInstruction(getOperator());
+    CodeGenInstruction &InstInfo =
+      CDP.getTargetInfo().getInstruction(getOperator());
+
+    bool MadeChange = false;
+
+    // Apply the result types to the node, these come from the things in the
+    // (outs) list of the instruction.
+    // FIXME: Cap at one result so far.
+    unsigned NumResultsToAdd = InstInfo.Operands.NumDefs ? 1 : 0;
+    for (unsigned ResNo = 0; ResNo != NumResultsToAdd; ++ResNo) {
+      Record *ResultNode = Inst.getResult(ResNo);
+
+      if (ResultNode->isSubClassOf("PointerLikeRegClass")) {
+        MadeChange |= UpdateNodeType(ResNo, MVT::iPTR, TP);
+      } else if (ResultNode->getName() == "unknown") {
+        // Nothing to do.
+      } else {
+        assert(ResultNode->isSubClassOf("RegisterClass") &&
+               "Operands should be register classes!");
+        const CodeGenRegisterClass &RC =
+          CDP.getTargetInfo().getRegisterClass(ResultNode);
+        MadeChange |= UpdateNodeType(ResNo, RC.getValueTypes(), TP);
+      }
+    }
+
+    // If the instruction has implicit defs, we apply the first one as a result.
+    // FIXME: This sucks, it should apply all implicit defs.
+    if (!InstInfo.ImplicitDefs.empty()) {
+      unsigned ResNo = NumResultsToAdd;
+
+      // FIXME: Generalize to multiple possible types and multiple possible
+      // ImplicitDefs.
+      MVT::SimpleValueType VT =
+        InstInfo.HasOneImplicitDefWithKnownVT(CDP.getTargetInfo());
+
+      if (VT != MVT::Other)
+        MadeChange |= UpdateNodeType(ResNo, VT, TP);
+    }
+
+    // If this is an INSERT_SUBREG, constrain the source and destination VTs to
+    // be the same.
+    if (getOperator()->getName() == "INSERT_SUBREG") {
+      assert(getChild(0)->getNumTypes() == 1 && "FIXME: Unhandled");
+      MadeChange |= UpdateNodeType(0, getChild(0)->getExtType(0), TP);
+      MadeChange |= getChild(0)->UpdateNodeType(0, getExtType(0), TP);
+    }
+
+    unsigned ChildNo = 0;
+    for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) {
+      Record *OperandNode = Inst.getOperand(i);
+
+      // If the instruction expects a predicate or optional def operand, we
+      // codegen this by setting the operand to it's default value if it has a
+      // non-empty DefaultOps field.
+      if ((OperandNode->isSubClassOf("PredicateOperand") ||
+           OperandNode->isSubClassOf("OptionalDefOperand")) &&
+          !CDP.getDefaultOperand(OperandNode).DefaultOps.empty())
+        continue;
+
+      // Verify that we didn't run out of provided operands.
+      if (ChildNo >= getNumChildren())
+        TP.error("Instruction '" + getOperator()->getName() +
+                 "' expects more operands than were provided.");
+
+      MVT::SimpleValueType VT;
+      TreePatternNode *Child = getChild(ChildNo++);
+      unsigned ChildResNo = 0;  // Instructions always use res #0 of their op.
+
+      if (OperandNode->isSubClassOf("RegisterClass")) {
+        const CodeGenRegisterClass &RC =
+          CDP.getTargetInfo().getRegisterClass(OperandNode);
+        MadeChange |= Child->UpdateNodeType(ChildResNo, RC.getValueTypes(), TP);
+      } else if (OperandNode->isSubClassOf("Operand")) {
+        VT = getValueType(OperandNode->getValueAsDef("Type"));
+        MadeChange |= Child->UpdateNodeType(ChildResNo, VT, TP);
+      } else if (OperandNode->isSubClassOf("PointerLikeRegClass")) {
+        MadeChange |= Child->UpdateNodeType(ChildResNo, MVT::iPTR, TP);
+      } else if (OperandNode->getName() == "unknown") {
+        // Nothing to do.
+      } else {
+        assert(0 && "Unknown operand type!");
+        abort();
+      }
+      MadeChange |= Child->ApplyTypeConstraints(TP, NotRegisters);
+    }
+
+    if (ChildNo != getNumChildren())
+      TP.error("Instruction '" + getOperator()->getName() +
+               "' was provided too many operands!");
+
+    return MadeChange;
+  }
+
+  assert(getOperator()->isSubClassOf("SDNodeXForm") && "Unknown node type!");
+
+  // Node transforms always take one operand.
+  if (getNumChildren() != 1)
+    TP.error("Node transform '" + getOperator()->getName() +
+             "' requires one operand!");
+
+  bool MadeChange = getChild(0)->ApplyTypeConstraints(TP, NotRegisters);
+
+
+  // If either the output or input of the xform does not have exact
+  // type info. We assume they must be the same. Otherwise, it is perfectly
+  // legal to transform from one type to a completely different type.
+#if 0
+  if (!hasTypeSet() || !getChild(0)->hasTypeSet()) {
+    bool MadeChange = UpdateNodeType(getChild(0)->getExtType(), TP);
+    MadeChange |= getChild(0)->UpdateNodeType(getExtType(), TP);
+    return MadeChange;
+  }
+#endif
+  return MadeChange;
+}
+
+/// OnlyOnRHSOfCommutative - Return true if this value is only allowed on the
+/// RHS of a commutative operation, not the on LHS.
+static bool OnlyOnRHSOfCommutative(TreePatternNode *N) {
+  if (!N->isLeaf() && N->getOperator()->getName() == "imm")
+    return true;
+  if (N->isLeaf() && dynamic_cast<IntInit*>(N->getLeafValue()))
+    return true;
+  return false;
+}
+
+
+/// canPatternMatch - If it is impossible for this pattern to match on this
+/// target, fill in Reason and return false.  Otherwise, return true.  This is
+/// used as a sanity check for .td files (to prevent people from writing stuff
+/// that can never possibly work), and to prevent the pattern permuter from
+/// generating stuff that is useless.
+bool TreePatternNode::canPatternMatch(std::string &Reason,
+                                      const CodeGenDAGPatterns &CDP) {
+  if (isLeaf()) return true;
+
+  for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
+    if (!getChild(i)->canPatternMatch(Reason, CDP))
+      return false;
+
+  // If this is an intrinsic, handle cases that would make it not match.  For
+  // example, if an operand is required to be an immediate.
+  if (getOperator()->isSubClassOf("Intrinsic")) {
+    // TODO:
+    return true;
+  }
+
+  // If this node is a commutative operator, check that the LHS isn't an
+  // immediate.
+  const SDNodeInfo &NodeInfo = CDP.getSDNodeInfo(getOperator());
+  bool isCommIntrinsic = isCommutativeIntrinsic(CDP);
+  if (NodeInfo.hasProperty(SDNPCommutative) || isCommIntrinsic) {
+    // Scan all of the operands of the node and make sure that only the last one
+    // is a constant node, unless the RHS also is.
+    if (!OnlyOnRHSOfCommutative(getChild(getNumChildren()-1))) {
+      bool Skip = isCommIntrinsic ? 1 : 0; // First operand is intrinsic id.
+      for (unsigned i = Skip, e = getNumChildren()-1; i != e; ++i)
+        if (OnlyOnRHSOfCommutative(getChild(i))) {
+          Reason="Immediate value must be on the RHS of commutative operators!";
+          return false;
+        }
+    }
+  }
+
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+// TreePattern implementation
+//
+
+TreePattern::TreePattern(Record *TheRec, ListInit *RawPat, bool isInput,
+                         CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp){
+  isInputPattern = isInput;
+  for (unsigned i = 0, e = RawPat->getSize(); i != e; ++i)
+    Trees.push_back(ParseTreePattern(RawPat->getElement(i), ""));
+}
+
+TreePattern::TreePattern(Record *TheRec, DagInit *Pat, bool isInput,
+                         CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp){
+  isInputPattern = isInput;
+  Trees.push_back(ParseTreePattern(Pat, ""));
+}
+
+TreePattern::TreePattern(Record *TheRec, TreePatternNode *Pat, bool isInput,
+                         CodeGenDAGPatterns &cdp) : TheRecord(TheRec), CDP(cdp){
+  isInputPattern = isInput;
+  Trees.push_back(Pat);
+}
+
+void TreePattern::error(const std::string &Msg) const {
+  dump();
+  throw TGError(TheRecord->getLoc(), "In " + TheRecord->getName() + ": " + Msg);
+}
+
+void TreePattern::ComputeNamedNodes() {
+  for (unsigned i = 0, e = Trees.size(); i != e; ++i)
+    ComputeNamedNodes(Trees[i]);
+}
+
+void TreePattern::ComputeNamedNodes(TreePatternNode *N) {
+  if (!N->getName().empty())
+    NamedNodes[N->getName()].push_back(N);
+
+  for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i)
+    ComputeNamedNodes(N->getChild(i));
+}
+
+
+TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
+  if (DefInit *DI = dynamic_cast<DefInit*>(TheInit)) {
+    Record *R = DI->getDef();
+
+    // Direct reference to a leaf DagNode or PatFrag?  Turn it into a
+    // TreePatternNode if its own.  For example:
+    ///   (foo GPR, imm) -> (foo GPR, (imm))
+    if (R->isSubClassOf("SDNode") || R->isSubClassOf("PatFrag"))
+      return ParseTreePattern(new DagInit(DI, "",
+                          std::vector<std::pair<Init*, std::string> >()),
+                              OpName);
+
+    // Input argument?
+    TreePatternNode *Res = new TreePatternNode(DI, 1);
+    if (R->getName() == "node" && !OpName.empty()) {
+      if (OpName.empty())
+        error("'node' argument requires a name to match with operand list");
+      Args.push_back(OpName);
+    }
+
+    Res->setName(OpName);
+    return Res;
+  }
+
+  if (IntInit *II = dynamic_cast<IntInit*>(TheInit)) {
+    if (!OpName.empty())
+      error("Constant int argument should not have a name!");
+    return new TreePatternNode(II, 1);
+  }
+
+  if (BitsInit *BI = dynamic_cast<BitsInit*>(TheInit)) {
+    // Turn this into an IntInit.
+    Init *II = BI->convertInitializerTo(new IntRecTy());
+    if (II == 0 || !dynamic_cast<IntInit*>(II))
+      error("Bits value must be constants!");
+    return ParseTreePattern(II, OpName);
+  }
+
+  DagInit *Dag = dynamic_cast<DagInit*>(TheInit);
+  if (!Dag) {
+    TheInit->dump();
+    error("Pattern has unexpected init kind!");
+  }
+  DefInit *OpDef = dynamic_cast<DefInit*>(Dag->getOperator());
+  if (!OpDef) error("Pattern has unexpected operator type!");
+  Record *Operator = OpDef->getDef();
+
+  if (Operator->isSubClassOf("ValueType")) {
+    // If the operator is a ValueType, then this must be "type cast" of a leaf
+    // node.
+    if (Dag->getNumArgs() != 1)
+      error("Type cast only takes one operand!");
+
+    TreePatternNode *New = ParseTreePattern(Dag->getArg(0), Dag->getArgName(0));
+
+    // Apply the type cast.
+    assert(New->getNumTypes() == 1 && "FIXME: Unhandled");
+    New->UpdateNodeType(0, getValueType(Operator), *this);
+
+    if (!OpName.empty())
+      error("ValueType cast should not have a name!");
+    return New;
+  }
+
+  // Verify that this is something that makes sense for an operator.
+  if (!Operator->isSubClassOf("PatFrag") &&
+      !Operator->isSubClassOf("SDNode") &&
+      !Operator->isSubClassOf("Instruction") &&
+      !Operator->isSubClassOf("SDNodeXForm") &&
+      !Operator->isSubClassOf("Intrinsic") &&
+      Operator->getName() != "set" &&
+      Operator->getName() != "implicit")
+    error("Unrecognized node '" + Operator->getName() + "'!");
+
+  //  Check to see if this is something that is illegal in an input pattern.
+  if (isInputPattern) {
+    if (Operator->isSubClassOf("Instruction") ||
+        Operator->isSubClassOf("SDNodeXForm"))
+      error("Cannot use '" + Operator->getName() + "' in an input pattern!");
+  } else {
+    if (Operator->isSubClassOf("Intrinsic"))
+      error("Cannot use '" + Operator->getName() + "' in an output pattern!");
+
+    if (Operator->isSubClassOf("SDNode") &&
+        Operator->getName() != "imm" &&
+        Operator->getName() != "fpimm" &&
+        Operator->getName() != "tglobaltlsaddr" &&
+        Operator->getName() != "tconstpool" &&
+        Operator->getName() != "tjumptable" &&
+        Operator->getName() != "tframeindex" &&
+        Operator->getName() != "texternalsym" &&
+        Operator->getName() != "tblockaddress" &&
+        Operator->getName() != "tglobaladdr" &&
+        Operator->getName() != "bb" &&
+        Operator->getName() != "vt")
+      error("Cannot use '" + Operator->getName() + "' in an output pattern!");
+  }
+
+  std::vector<TreePatternNode*> Children;
+
+  // Parse all the operands.
+  for (unsigned i = 0, e = Dag->getNumArgs(); i != e; ++i)
+    Children.push_back(ParseTreePattern(Dag->getArg(i), Dag->getArgName(i)));
+
+  // If the operator is an intrinsic, then this is just syntactic sugar for for
+  // (intrinsic_* <number>, ..children..).  Pick the right intrinsic node, and
+  // convert the intrinsic name to a number.
+  if (Operator->isSubClassOf("Intrinsic")) {
+    const CodeGenIntrinsic &Int = getDAGPatterns().getIntrinsic(Operator);
+    unsigned IID = getDAGPatterns().getIntrinsicID(Operator)+1;
+
+    // If this intrinsic returns void, it must have side-effects and thus a
+    // chain.
+    if (Int.IS.RetVTs.empty())
+      Operator = getDAGPatterns().get_intrinsic_void_sdnode();
+    else if (Int.ModRef != CodeGenIntrinsic::NoMem)
+      // Has side-effects, requires chain.
+      Operator = getDAGPatterns().get_intrinsic_w_chain_sdnode();
+    else // Otherwise, no chain.
+      Operator = getDAGPatterns().get_intrinsic_wo_chain_sdnode();
+
+    TreePatternNode *IIDNode = new TreePatternNode(new IntInit(IID), 1);
+    Children.insert(Children.begin(), IIDNode);
+  }
+
+  unsigned NumResults = GetNumNodeResults(Operator, CDP);
+  TreePatternNode *Result = new TreePatternNode(Operator, Children, NumResults);
+  Result->setName(OpName);
+
+  if (!Dag->getName().empty()) {
+    assert(Result->getName().empty());
+    Result->setName(Dag->getName());
+  }
+  return Result;
+}
+
+/// SimplifyTree - See if we can simplify this tree to eliminate something that
+/// will never match in favor of something obvious that will.  This is here
+/// strictly as a convenience to target authors because it allows them to write
+/// more type generic things and have useless type casts fold away.
+///
+/// This returns true if any change is made.
+static bool SimplifyTree(TreePatternNode *&N) {
+  if (N->isLeaf())
+    return false;
+
+  // If we have a bitconvert with a resolved type and if the source and
+  // destination types are the same, then the bitconvert is useless, remove it.
+  if (N->getOperator()->getName() == "bitconvert" &&
+      N->getExtType(0).isConcrete() &&
+      N->getExtType(0) == N->getChild(0)->getExtType(0) &&
+      N->getName().empty()) {
+    N = N->getChild(0);
+    SimplifyTree(N);
+    return true;
+  }
+
+  // Walk all children.
+  bool MadeChange = false;
+  for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) {
+    TreePatternNode *Child = N->getChild(i);
+    MadeChange |= SimplifyTree(Child);
+    N->setChild(i, Child);
+  }
+  return MadeChange;
+}
+
+
+
+/// InferAllTypes - Infer/propagate as many types throughout the expression
+/// patterns as possible.  Return true if all types are inferred, false
+/// otherwise.  Throw an exception if a type contradiction is found.
+bool TreePattern::
+InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
+  if (NamedNodes.empty())
+    ComputeNamedNodes();
+
+  bool MadeChange = true;
+  while (MadeChange) {
+    MadeChange = false;
+    for (unsigned i = 0, e = Trees.size(); i != e; ++i) {
+      MadeChange |= Trees[i]->ApplyTypeConstraints(*this, false);
+      MadeChange |= SimplifyTree(Trees[i]);
+    }
+
+    // If there are constraints on our named nodes, apply them.
+    for (StringMap<SmallVector<TreePatternNode*,1> >::iterator
+         I = NamedNodes.begin(), E = NamedNodes.end(); I != E; ++I) {
+      SmallVectorImpl<TreePatternNode*> &Nodes = I->second;
+
+      // If we have input named node types, propagate their types to the named
+      // values here.
+      if (InNamedTypes) {
+        // FIXME: Should be error?
+        assert(InNamedTypes->count(I->getKey()) &&
+               "Named node in output pattern but not input pattern?");
+
+        const SmallVectorImpl<TreePatternNode*> &InNodes =
+          InNamedTypes->find(I->getKey())->second;
+
+        // The input types should be fully resolved by now.
+        for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
+          // If this node is a register class, and it is the root of the pattern
+          // then we're mapping something onto an input register.  We allow
+          // changing the type of the input register in this case.  This allows
+          // us to match things like:
+          //  def : Pat<(v1i64 (bitconvert(v2i32 DPR:$src))), (v1i64 DPR:$src)>;
+          if (Nodes[i] == Trees[0] && Nodes[i]->isLeaf()) {
+            DefInit *DI = dynamic_cast<DefInit*>(Nodes[i]->getLeafValue());
+            if (DI && DI->getDef()->isSubClassOf("RegisterClass"))
+              continue;
+          }
+
+          assert(Nodes[i]->getNumTypes() == 1 &&
+                 InNodes[0]->getNumTypes() == 1 &&
+                 "FIXME: cannot name multiple result nodes yet");
+          MadeChange |= Nodes[i]->UpdateNodeType(0, InNodes[0]->getExtType(0),
+                                                 *this);
+        }
+      }
+
+      // If there are multiple nodes with the same name, they must all have the
+      // same type.
+      if (I->second.size() > 1) {
+        for (unsigned i = 0, e = Nodes.size()-1; i != e; ++i) {
+          TreePatternNode *N1 = Nodes[i], *N2 = Nodes[i+1];
+          assert(N1->getNumTypes() == 1 && N2->getNumTypes() == 1 &&
+                 "FIXME: cannot name multiple result nodes yet");
+
+          MadeChange |= N1->UpdateNodeType(0, N2->getExtType(0), *this);
+          MadeChange |= N2->UpdateNodeType(0, N1->getExtType(0), *this);
+        }
+      }
+    }
+  }
+
+  bool HasUnresolvedTypes = false;
+  for (unsigned i = 0, e = Trees.size(); i != e; ++i)
+    HasUnresolvedTypes |= Trees[i]->ContainsUnresolvedType();
+  return !HasUnresolvedTypes;
+}
+
+void TreePattern::print(raw_ostream &OS) const {
+  OS << getRecord()->getName();
+  if (!Args.empty()) {
+    OS << "(" << Args[0];
+    for (unsigned i = 1, e = Args.size(); i != e; ++i)
+      OS << ", " << Args[i];
+    OS << ")";
+  }
+  OS << ": ";
+
+  if (Trees.size() > 1)
+    OS << "[\n";
+  for (unsigned i = 0, e = Trees.size(); i != e; ++i) {
+    OS << "\t";
+    Trees[i]->print(OS);
+    OS << "\n";
+  }
+
+  if (Trees.size() > 1)
+    OS << "]\n";
+}
+
+void TreePattern::dump() const { print(errs()); }
+
+//===----------------------------------------------------------------------===//
+// CodeGenDAGPatterns implementation
+//
+
+CodeGenDAGPatterns::CodeGenDAGPatterns(RecordKeeper &R) :
+  Records(R), Target(R) {
+
+  Intrinsics = LoadIntrinsics(Records, false);
+  TgtIntrinsics = LoadIntrinsics(Records, true);
+  ParseNodeInfo();
+  ParseNodeTransforms();
+  ParseComplexPatterns();
+  ParsePatternFragments();
+  ParseDefaultOperands();
+  ParseInstructions();
+  ParsePatterns();
+
+  // Generate variants.  For example, commutative patterns can match
+  // multiple ways.  Add them to PatternsToMatch as well.
+  GenerateVariants();
+
+  // Infer instruction flags.  For example, we can detect loads,
+  // stores, and side effects in many cases by examining an
+  // instruction's pattern.
+  InferInstructionFlags();
+}
+
+CodeGenDAGPatterns::~CodeGenDAGPatterns() {
+  for (pf_iterator I = PatternFragments.begin(),
+       E = PatternFragments.end(); I != E; ++I)
+    delete I->second;
+}
+
+
+Record *CodeGenDAGPatterns::getSDNodeNamed(const std::string &Name) const {
+  Record *N = Records.getDef(Name);
+  if (!N || !N->isSubClassOf("SDNode")) {
+    errs() << "Error getting SDNode '" << Name << "'!\n";
+    exit(1);
+  }
+  return N;
+}
+
+// Parse all of the SDNode definitions for the target, populating SDNodes.
+void CodeGenDAGPatterns::ParseNodeInfo() {
+  std::vector<Record*> Nodes = Records.getAllDerivedDefinitions("SDNode");
+  while (!Nodes.empty()) {
+    SDNodes.insert(std::make_pair(Nodes.back(), Nodes.back()));
+    Nodes.pop_back();
+  }
+
+  // Get the builtin intrinsic nodes.
+  intrinsic_void_sdnode     = getSDNodeNamed("intrinsic_void");
+  intrinsic_w_chain_sdnode  = getSDNodeNamed("intrinsic_w_chain");
+  intrinsic_wo_chain_sdnode = getSDNodeNamed("intrinsic_wo_chain");
+}
+
+/// ParseNodeTransforms - Parse all SDNodeXForm instances into the SDNodeXForms
+/// map, and emit them to the file as functions.
+void CodeGenDAGPatterns::ParseNodeTransforms() {
+  std::vector<Record*> Xforms = Records.getAllDerivedDefinitions("SDNodeXForm");
+  while (!Xforms.empty()) {
+    Record *XFormNode = Xforms.back();
+    Record *SDNode = XFormNode->getValueAsDef("Opcode");
+    std::string Code = XFormNode->getValueAsCode("XFormFunction");
+    SDNodeXForms.insert(std::make_pair(XFormNode, NodeXForm(SDNode, Code)));
+
+    Xforms.pop_back();
+  }
+}
+
+void CodeGenDAGPatterns::ParseComplexPatterns() {
+  std::vector<Record*> AMs = Records.getAllDerivedDefinitions("ComplexPattern");
+  while (!AMs.empty()) {
+    ComplexPatterns.insert(std::make_pair(AMs.back(), AMs.back()));
+    AMs.pop_back();
+  }
+}
+
+
+/// ParsePatternFragments - Parse all of the PatFrag definitions in the .td
+/// file, building up the PatternFragments map.  After we've collected them all,
+/// inline fragments together as necessary, so that there are no references left
+/// inside a pattern fragment to a pattern fragment.
+///
+void CodeGenDAGPatterns::ParsePatternFragments() {
+  std::vector<Record*> Fragments = Records.getAllDerivedDefinitions("PatFrag");
+
+  // First step, parse all of the fragments.
+  for (unsigned i = 0, e = Fragments.size(); i != e; ++i) {
+    DagInit *Tree = Fragments[i]->getValueAsDag("Fragment");
+    TreePattern *P = new TreePattern(Fragments[i], Tree, true, *this);
+    PatternFragments[Fragments[i]] = P;
+
+    // Validate the argument list, converting it to set, to discard duplicates.
+    std::vector<std::string> &Args = P->getArgList();
+    std::set<std::string> OperandsSet(Args.begin(), Args.end());
+
+    if (OperandsSet.count(""))
+      P->error("Cannot have unnamed 'node' values in pattern fragment!");
+
+    // Parse the operands list.
+    DagInit *OpsList = Fragments[i]->getValueAsDag("Operands");
+    DefInit *OpsOp = dynamic_cast<DefInit*>(OpsList->getOperator());
+    // Special cases: ops == outs == ins. Different names are used to
+    // improve readability.
+    if (!OpsOp ||
+        (OpsOp->getDef()->getName() != "ops" &&
+         OpsOp->getDef()->getName() != "outs" &&
+         OpsOp->getDef()->getName() != "ins"))
+      P->error("Operands list should start with '(ops ... '!");
+
+    // Copy over the arguments.
+    Args.clear();
+    for (unsigned j = 0, e = OpsList->getNumArgs(); j != e; ++j) {
+      if (!dynamic_cast<DefInit*>(OpsList->getArg(j)) ||
+          static_cast<DefInit*>(OpsList->getArg(j))->
+          getDef()->getName() != "node")
+        P->error("Operands list should all be 'node' values.");
+      if (OpsList->getArgName(j).empty())
+        P->error("Operands list should have names for each operand!");
+      if (!OperandsSet.count(OpsList->getArgName(j)))
+        P->error("'" + OpsList->getArgName(j) +
+                 "' does not occur in pattern or was multiply specified!");
+      OperandsSet.erase(OpsList->getArgName(j));
+      Args.push_back(OpsList->getArgName(j));
+    }
+
+    if (!OperandsSet.empty())
+      P->error("Operands list does not contain an entry for operand '" +
+               *OperandsSet.begin() + "'!");
+
+    // If there is a code init for this fragment, keep track of the fact that
+    // this fragment uses it.
+    std::string Code = Fragments[i]->getValueAsCode("Predicate");
+    if (!Code.empty())
+      P->getOnlyTree()->addPredicateFn("Predicate_"+Fragments[i]->getName());
+
+    // If there is a node transformation corresponding to this, keep track of
+    // it.
+    Record *Transform = Fragments[i]->getValueAsDef("OperandTransform");
+    if (!getSDNodeTransform(Transform).second.empty())    // not noop xform?
+      P->getOnlyTree()->setTransformFn(Transform);
+  }
+
+  // Now that we've parsed all of the tree fragments, do a closure on them so
+  // that there are not references to PatFrags left inside of them.
+  for (unsigned i = 0, e = Fragments.size(); i != e; ++i) {
+    TreePattern *ThePat = PatternFragments[Fragments[i]];
+    ThePat->InlinePatternFragments();
+
+    // Infer as many types as possible.  Don't worry about it if we don't infer
+    // all of them, some may depend on the inputs of the pattern.
+    try {
+      ThePat->InferAllTypes();
+    } catch (...) {
+      // If this pattern fragment is not supported by this target (no types can
+      // satisfy its constraints), just ignore it.  If the bogus pattern is
+      // actually used by instructions, the type consistency error will be
+      // reported there.
+    }
+
+    // If debugging, print out the pattern fragment result.
+    DEBUG(ThePat->dump());
+  }
+}
+
+void CodeGenDAGPatterns::ParseDefaultOperands() {
+  std::vector<Record*> DefaultOps[2];
+  DefaultOps[0] = Records.getAllDerivedDefinitions("PredicateOperand");
+  DefaultOps[1] = Records.getAllDerivedDefinitions("OptionalDefOperand");
+
+  // Find some SDNode.
+  assert(!SDNodes.empty() && "No SDNodes parsed?");
+  Init *SomeSDNode = new DefInit(SDNodes.begin()->first);
+
+  for (unsigned iter = 0; iter != 2; ++iter) {
+    for (unsigned i = 0, e = DefaultOps[iter].size(); i != e; ++i) {
+      DagInit *DefaultInfo = DefaultOps[iter][i]->getValueAsDag("DefaultOps");
+
+      // Clone the DefaultInfo dag node, changing the operator from 'ops' to
+      // SomeSDnode so that we can parse this.
+      std::vector<std::pair<Init*, std::string> > Ops;
+      for (unsigned op = 0, e = DefaultInfo->getNumArgs(); op != e; ++op)
+        Ops.push_back(std::make_pair(DefaultInfo->getArg(op),
+                                     DefaultInfo->getArgName(op)));
+      DagInit *DI = new DagInit(SomeSDNode, "", Ops);
+
+      // Create a TreePattern to parse this.
+      TreePattern P(DefaultOps[iter][i], DI, false, *this);
+      assert(P.getNumTrees() == 1 && "This ctor can only produce one tree!");
+
+      // Copy the operands over into a DAGDefaultOperand.
+      DAGDefaultOperand DefaultOpInfo;
+
+      TreePatternNode *T = P.getTree(0);
+      for (unsigned op = 0, e = T->getNumChildren(); op != e; ++op) {
+        TreePatternNode *TPN = T->getChild(op);
+        while (TPN->ApplyTypeConstraints(P, false))
+          /* Resolve all types */;
+
+        if (TPN->ContainsUnresolvedType()) {
+          if (iter == 0)
+            throw "Value #" + utostr(i) + " of PredicateOperand '" +
+              DefaultOps[iter][i]->getName() +"' doesn't have a concrete type!";
+          else
+            throw "Value #" + utostr(i) + " of OptionalDefOperand '" +
+              DefaultOps[iter][i]->getName() +"' doesn't have a concrete type!";
+        }
+        DefaultOpInfo.DefaultOps.push_back(TPN);
+      }
+
+      // Insert it into the DefaultOperands map so we can find it later.
+      DefaultOperands[DefaultOps[iter][i]] = DefaultOpInfo;
+    }
+  }
+}
+
+/// HandleUse - Given "Pat" a leaf in the pattern, check to see if it is an
+/// instruction input.  Return true if this is a real use.
+static bool HandleUse(TreePattern *I, TreePatternNode *Pat,
+                      std::map<std::string, TreePatternNode*> &InstInputs) {
+  // No name -> not interesting.
+  if (Pat->getName().empty()) {
+    if (Pat->isLeaf()) {
+      DefInit *DI = dynamic_cast<DefInit*>(Pat->getLeafValue());
+      if (DI && DI->getDef()->isSubClassOf("RegisterClass"))
+        I->error("Input " + DI->getDef()->getName() + " must be named!");
+    }
+    return false;
+  }
+
+  Record *Rec;
+  if (Pat->isLeaf()) {
+    DefInit *DI = dynamic_cast<DefInit*>(Pat->getLeafValue());
+    if (!DI) I->error("Input $" + Pat->getName() + " must be an identifier!");
+    Rec = DI->getDef();
+  } else {
+    Rec = Pat->getOperator();
+  }
+
+  // SRCVALUE nodes are ignored.
+  if (Rec->getName() == "srcvalue")
+    return false;
+
+  TreePatternNode *&Slot = InstInputs[Pat->getName()];
+  if (!Slot) {
+    Slot = Pat;
+    return true;
+  }
+  Record *SlotRec;
+  if (Slot->isLeaf()) {
+    SlotRec = dynamic_cast<DefInit*>(Slot->getLeafValue())->getDef();
+  } else {
+    assert(Slot->getNumChildren() == 0 && "can't be a use with children!");
+    SlotRec = Slot->getOperator();
+  }
+
+  // Ensure that the inputs agree if we've already seen this input.
+  if (Rec != SlotRec)
+    I->error("All $" + Pat->getName() + " inputs must agree with each other");
+  if (Slot->getExtTypes() != Pat->getExtTypes())
+    I->error("All $" + Pat->getName() + " inputs must agree with each other");
+  return true;
+}
+
+/// FindPatternInputsAndOutputs - Scan the specified TreePatternNode (which is
+/// part of "I", the instruction), computing the set of inputs and outputs of
+/// the pattern.  Report errors if we see anything naughty.
+void CodeGenDAGPatterns::
+FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
+                            std::map<std::string, TreePatternNode*> &InstInputs,
+                            std::map<std::string, TreePatternNode*>&InstResults,
+                            std::vector<Record*> &InstImpResults) {
+  if (Pat->isLeaf()) {
+    bool isUse = HandleUse(I, Pat, InstInputs);
+    if (!isUse && Pat->getTransformFn())
+      I->error("Cannot specify a transform function for a non-input value!");
+    return;
+  }
+
+  if (Pat->getOperator()->getName() == "implicit") {
+    for (unsigned i = 0, e = Pat->getNumChildren(); i != e; ++i) {
+      TreePatternNode *Dest = Pat->getChild(i);
+      if (!Dest->isLeaf())
+        I->error("implicitly defined value should be a register!");
+
+      DefInit *Val = dynamic_cast<DefInit*>(Dest->getLeafValue());
+      if (!Val || !Val->getDef()->isSubClassOf("Register"))
+        I->error("implicitly defined value should be a register!");
+      InstImpResults.push_back(Val->getDef());
+    }
+    return;
+  }
+
+  if (Pat->getOperator()->getName() != "set") {
+    // If this is not a set, verify that the children nodes are not void typed,
+    // and recurse.
+    for (unsigned i = 0, e = Pat->getNumChildren(); i != e; ++i) {
+      if (Pat->getChild(i)->getNumTypes() == 0)
+        I->error("Cannot have void nodes inside of patterns!");
+      FindPatternInputsAndOutputs(I, Pat->getChild(i), InstInputs, InstResults,
+                                  InstImpResults);
+    }
+
+    // If this is a non-leaf node with no children, treat it basically as if
+    // it were a leaf.  This handles nodes like (imm).
+    bool isUse = HandleUse(I, Pat, InstInputs);
+
+    if (!isUse && Pat->getTransformFn())
+      I->error("Cannot specify a transform function for a non-input value!");
+    return;
+  }
+
+  // Otherwise, this is a set, validate and collect instruction results.
+  if (Pat->getNumChildren() == 0)
+    I->error("set requires operands!");
+
+  if (Pat->getTransformFn())
+    I->error("Cannot specify a transform function on a set node!");
+
+  // Check the set destinations.
+  unsigned NumDests = Pat->getNumChildren()-1;
+  for (unsigned i = 0; i != NumDests; ++i) {
+    TreePatternNode *Dest = Pat->getChild(i);
+    if (!Dest->isLeaf())
+      I->error("set destination should be a register!");
+
+    DefInit *Val = dynamic_cast<DefInit*>(Dest->getLeafValue());
+    if (!Val)
+      I->error("set destination should be a register!");
+
+    if (Val->getDef()->isSubClassOf("RegisterClass") ||
+        Val->getDef()->isSubClassOf("PointerLikeRegClass")) {
+      if (Dest->getName().empty())
+        I->error("set destination must have a name!");
+      if (InstResults.count(Dest->getName()))
+        I->error("cannot set '" + Dest->getName() +"' multiple times");
+      InstResults[Dest->getName()] = Dest;
+    } else if (Val->getDef()->isSubClassOf("Register")) {
+      InstImpResults.push_back(Val->getDef());
+    } else {
+      I->error("set destination should be a register!");
+    }
+  }
+
+  // Verify and collect info from the computation.
+  FindPatternInputsAndOutputs(I, Pat->getChild(NumDests),
+                              InstInputs, InstResults, InstImpResults);
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Analysis
+//===----------------------------------------------------------------------===//
+
+class InstAnalyzer {
+  const CodeGenDAGPatterns &CDP;
+  bool &mayStore;
+  bool &mayLoad;
+  bool &HasSideEffects;
+  bool &IsVariadic;
+public:
+  InstAnalyzer(const CodeGenDAGPatterns &cdp,
+               bool &maystore, bool &mayload, bool &hse, bool &isv)
+    : CDP(cdp), mayStore(maystore), mayLoad(mayload), HasSideEffects(hse),
+      IsVariadic(isv) {
+  }
+
+  /// Analyze - Analyze the specified instruction, returning true if the
+  /// instruction had a pattern.
+  bool Analyze(Record *InstRecord) {
+    const TreePattern *Pattern = CDP.getInstruction(InstRecord).getPattern();
+    if (Pattern == 0) {
+      HasSideEffects = 1;
+      return false;  // No pattern.
+    }
+
+    // FIXME: Assume only the first tree is the pattern. The others are clobber
+    // nodes.
+    AnalyzeNode(Pattern->getTree(0));
+    return true;
+  }
+
+private:
+  void AnalyzeNode(const TreePatternNode *N) {
+    if (N->isLeaf()) {
+      if (DefInit *DI = dynamic_cast<DefInit*>(N->getLeafValue())) {
+        Record *LeafRec = DI->getDef();
+        // Handle ComplexPattern leaves.
+        if (LeafRec->isSubClassOf("ComplexPattern")) {
+          const ComplexPattern &CP = CDP.getComplexPattern(LeafRec);
+          if (CP.hasProperty(SDNPMayStore)) mayStore = true;
+          if (CP.hasProperty(SDNPMayLoad)) mayLoad = true;
+          if (CP.hasProperty(SDNPSideEffect)) HasSideEffects = true;
+        }
+      }
+      return;
+    }
+
+    // Analyze children.
+    for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i)
+      AnalyzeNode(N->getChild(i));
+
+    // Ignore set nodes, which are not SDNodes.
+    if (N->getOperator()->getName() == "set")
+      return;
+
+    // Get information about the SDNode for the operator.
+    const SDNodeInfo &OpInfo = CDP.getSDNodeInfo(N->getOperator());
+
+    // Notice properties of the node.
+    if (OpInfo.hasProperty(SDNPMayStore)) mayStore = true;
+    if (OpInfo.hasProperty(SDNPMayLoad)) mayLoad = true;
+    if (OpInfo.hasProperty(SDNPSideEffect)) HasSideEffects = true;
+    if (OpInfo.hasProperty(SDNPVariadic)) IsVariadic = true;
+
+    if (const CodeGenIntrinsic *IntInfo = N->getIntrinsicInfo(CDP)) {
+      // If this is an intrinsic, analyze it.
+      if (IntInfo->ModRef >= CodeGenIntrinsic::ReadArgMem)
+        mayLoad = true;// These may load memory.
+
+      if (IntInfo->ModRef >= CodeGenIntrinsic::ReadWriteArgMem)
+        mayStore = true;// Intrinsics that can write to memory are 'mayStore'.
+
+      if (IntInfo->ModRef >= CodeGenIntrinsic::ReadWriteMem)
+        // WriteMem intrinsics can have other strange effects.
+        HasSideEffects = true;
+    }
+  }
+
+};
+
+static void InferFromPattern(const CodeGenInstruction &Inst,
+                             bool &MayStore, bool &MayLoad,
+                             bool &HasSideEffects, bool &IsVariadic,
+                             const CodeGenDAGPatterns &CDP) {
+  MayStore = MayLoad = HasSideEffects = IsVariadic = false;
+
+  bool HadPattern =
+    InstAnalyzer(CDP, MayStore, MayLoad, HasSideEffects, IsVariadic)
+    .Analyze(Inst.TheDef);
+
+  // InstAnalyzer only correctly analyzes mayStore/mayLoad so far.
+  if (Inst.mayStore) {  // If the .td file explicitly sets mayStore, use it.
+    // If we decided that this is a store from the pattern, then the .td file
+    // entry is redundant.
+    if (MayStore)
+      fprintf(stderr,
+              "Warning: mayStore flag explicitly set on instruction '%s'"
+              " but flag already inferred from pattern.\n",
+              Inst.TheDef->getName().c_str());
+    MayStore = true;
+  }
+
+  if (Inst.mayLoad) {  // If the .td file explicitly sets mayLoad, use it.
+    // If we decided that this is a load from the pattern, then the .td file
+    // entry is redundant.
+    if (MayLoad)
+      fprintf(stderr,
+              "Warning: mayLoad flag explicitly set on instruction '%s'"
+              " but flag already inferred from pattern.\n",
+              Inst.TheDef->getName().c_str());
+    MayLoad = true;
+  }
+
+  if (Inst.neverHasSideEffects) {
+    if (HadPattern)
+      fprintf(stderr, "Warning: neverHasSideEffects set on instruction '%s' "
+              "which already has a pattern\n", Inst.TheDef->getName().c_str());
+    HasSideEffects = false;
+  }
+
+  if (Inst.hasSideEffects) {
+    if (HasSideEffects)
+      fprintf(stderr, "Warning: hasSideEffects set on instruction '%s' "
+              "which already inferred this.\n", Inst.TheDef->getName().c_str());
+    HasSideEffects = true;
+  }
+
+  if (Inst.Operands.isVariadic)
+    IsVariadic = true;  // Can warn if we want.
+}
+
+/// ParseInstructions - Parse all of the instructions, inlining and resolving
+/// any fragments involved.  This populates the Instructions list with fully
+/// resolved instructions.
+void CodeGenDAGPatterns::ParseInstructions() {
+  std::vector<Record*> Instrs = Records.getAllDerivedDefinitions("Instruction");
+
+  for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
+    ListInit *LI = 0;
+
+    if (dynamic_cast<ListInit*>(Instrs[i]->getValueInit("Pattern")))
+      LI = Instrs[i]->getValueAsListInit("Pattern");
+
+    // If there is no pattern, only collect minimal information about the
+    // instruction for its operand list.  We have to assume that there is one
+    // result, as we have no detailed info.
+    if (!LI || LI->getSize() == 0) {
+      std::vector<Record*> Results;
+      std::vector<Record*> Operands;
+
+      CodeGenInstruction &InstInfo = Target.getInstruction(Instrs[i]);
+
+      if (InstInfo.Operands.size() != 0) {
+        if (InstInfo.Operands.NumDefs == 0) {
+          // These produce no results
+          for (unsigned j = 0, e = InstInfo.Operands.size(); j < e; ++j)
+            Operands.push_back(InstInfo.Operands[j].Rec);
+        } else {
+          // Assume the first operand is the result.
+          Results.push_back(InstInfo.Operands[0].Rec);
+
+          // The rest are inputs.
+          for (unsigned j = 1, e = InstInfo.Operands.size(); j < e; ++j)
+            Operands.push_back(InstInfo.Operands[j].Rec);
+        }
+      }
+
+      // Create and insert the instruction.
+      std::vector<Record*> ImpResults;
+      Instructions.insert(std::make_pair(Instrs[i],
+                          DAGInstruction(0, Results, Operands, ImpResults)));
+      continue;  // no pattern.
+    }
+
+    // Parse the instruction.
+    TreePattern *I = new TreePattern(Instrs[i], LI, true, *this);
+    // Inline pattern fragments into it.
+    I->InlinePatternFragments();
+
+    // Infer as many types as possible.  If we cannot infer all of them, we can
+    // never do anything with this instruction pattern: report it to the user.
+    if (!I->InferAllTypes())
+      I->error("Could not infer all types in pattern!");
+
+    // InstInputs - Keep track of all of the inputs of the instruction, along
+    // with the record they are declared as.
+    std::map<std::string, TreePatternNode*> InstInputs;
+
+    // InstResults - Keep track of all the virtual registers that are 'set'
+    // in the instruction, including what reg class they are.
+    std::map<std::string, TreePatternNode*> InstResults;
+
+    std::vector<Record*> InstImpResults;
+
+    // Verify that the top-level forms in the instruction are of void type, and
+    // fill in the InstResults map.
+    for (unsigned j = 0, e = I->getNumTrees(); j != e; ++j) {
+      TreePatternNode *Pat = I->getTree(j);
+      if (Pat->getNumTypes() != 0)
+        I->error("Top-level forms in instruction pattern should have"
+                 " void types");
+
+      // Find inputs and outputs, and verify the structure of the uses/defs.
+      FindPatternInputsAndOutputs(I, Pat, InstInputs, InstResults,
+                                  InstImpResults);
+    }
+
+    // Now that we have inputs and outputs of the pattern, inspect the operands
+    // list for the instruction.  This determines the order that operands are
+    // added to the machine instruction the node corresponds to.
+    unsigned NumResults = InstResults.size();
+
+    // Parse the operands list from the (ops) list, validating it.
+    assert(I->getArgList().empty() && "Args list should still be empty here!");
+    CodeGenInstruction &CGI = Target.getInstruction(Instrs[i]);
+
+    // Check that all of the results occur first in the list.
+    std::vector<Record*> Results;
+    TreePatternNode *Res0Node = 0;
+    for (unsigned i = 0; i != NumResults; ++i) {
+      if (i == CGI.Operands.size())
+        I->error("'" + InstResults.begin()->first +
+                 "' set but does not appear in operand list!");
+      const std::string &OpName = CGI.Operands[i].Name;
+
+      // Check that it exists in InstResults.
+      TreePatternNode *RNode = InstResults[OpName];
+      if (RNode == 0)
+        I->error("Operand $" + OpName + " does not exist in operand list!");
+
+      if (i == 0)
+        Res0Node = RNode;
+      Record *R = dynamic_cast<DefInit*>(RNode->getLeafValue())->getDef();
+      if (R == 0)
+        I->error("Operand $" + OpName + " should be a set destination: all "
+                 "outputs must occur before inputs in operand list!");
+
+      if (CGI.Operands[i].Rec != R)
+        I->error("Operand $" + OpName + " class mismatch!");
+
+      // Remember the return type.
+      Results.push_back(CGI.Operands[i].Rec);
+
+      // Okay, this one checks out.
+      InstResults.erase(OpName);
+    }
+
+    // Loop over the inputs next.  Make a copy of InstInputs so we can destroy
+    // the copy while we're checking the inputs.
+    std::map<std::string, TreePatternNode*> InstInputsCheck(InstInputs);
+
+    std::vector<TreePatternNode*> ResultNodeOperands;
+    std::vector<Record*> Operands;
+    for (unsigned i = NumResults, e = CGI.Operands.size(); i != e; ++i) {
+      CGIOperandList::OperandInfo &Op = CGI.Operands[i];
+      const std::string &OpName = Op.Name;
+      if (OpName.empty())
+        I->error("Operand #" + utostr(i) + " in operands list has no name!");
+
+      if (!InstInputsCheck.count(OpName)) {
+        // If this is an predicate operand or optional def operand with an
+        // DefaultOps set filled in, we can ignore this.  When we codegen it,
+        // we will do so as always executed.
+        if (Op.Rec->isSubClassOf("PredicateOperand") ||
+            Op.Rec->isSubClassOf("OptionalDefOperand")) {
+          // Does it have a non-empty DefaultOps field?  If so, ignore this
+          // operand.
+          if (!getDefaultOperand(Op.Rec).DefaultOps.empty())
+            continue;
+        }
+        I->error("Operand $" + OpName +
+                 " does not appear in the instruction pattern");
+      }
+      TreePatternNode *InVal = InstInputsCheck[OpName];
+      InstInputsCheck.erase(OpName);   // It occurred, remove from map.
+
+      if (InVal->isLeaf() &&
+          dynamic_cast<DefInit*>(InVal->getLeafValue())) {
+        Record *InRec = static_cast<DefInit*>(InVal->getLeafValue())->getDef();
+        if (Op.Rec != InRec && !InRec->isSubClassOf("ComplexPattern"))
+          I->error("Operand $" + OpName + "'s register class disagrees"
+                   " between the operand and pattern");
+      }
+      Operands.push_back(Op.Rec);
+
+      // Construct the result for the dest-pattern operand list.
+      TreePatternNode *OpNode = InVal->clone();
+
+      // No predicate is useful on the result.
+      OpNode->clearPredicateFns();
+
+      // Promote the xform function to be an explicit node if set.
+      if (Record *Xform = OpNode->getTransformFn()) {
+        OpNode->setTransformFn(0);
+        std::vector<TreePatternNode*> Children;
+        Children.push_back(OpNode);
+        OpNode = new TreePatternNode(Xform, Children, OpNode->getNumTypes());
+      }
+
+      ResultNodeOperands.push_back(OpNode);
+    }
+
+    if (!InstInputsCheck.empty())
+      I->error("Input operand $" + InstInputsCheck.begin()->first +
+               " occurs in pattern but not in operands list!");
+
+    TreePatternNode *ResultPattern =
+      new TreePatternNode(I->getRecord(), ResultNodeOperands,
+                          GetNumNodeResults(I->getRecord(), *this));
+    // Copy fully inferred output node type to instruction result pattern.
+    for (unsigned i = 0; i != NumResults; ++i)
+      ResultPattern->setType(i, Res0Node->getExtType(i));
+
+    // Create and insert the instruction.
+    // FIXME: InstImpResults should not be part of DAGInstruction.
+    DAGInstruction TheInst(I, Results, Operands, InstImpResults);
+    Instructions.insert(std::make_pair(I->getRecord(), TheInst));
+
+    // Use a temporary tree pattern to infer all types and make sure that the
+    // constructed result is correct.  This depends on the instruction already
+    // being inserted into the Instructions map.
+    TreePattern Temp(I->getRecord(), ResultPattern, false, *this);
+    Temp.InferAllTypes(&I->getNamedNodesMap());
+
+    DAGInstruction &TheInsertedInst = Instructions.find(I->getRecord())->second;
+    TheInsertedInst.setResultPattern(Temp.getOnlyTree());
+
+    DEBUG(I->dump());
+  }
+
+  // If we can, convert the instructions to be patterns that are matched!
+  for (std::map<Record*, DAGInstruction, RecordPtrCmp>::iterator II =
+        Instructions.begin(),
+       E = Instructions.end(); II != E; ++II) {
+    DAGInstruction &TheInst = II->second;
+    const TreePattern *I = TheInst.getPattern();
+    if (I == 0) continue;  // No pattern.
+
+    // FIXME: Assume only the first tree is the pattern. The others are clobber
+    // nodes.
+    TreePatternNode *Pattern = I->getTree(0);
+    TreePatternNode *SrcPattern;
+    if (Pattern->getOperator()->getName() == "set") {
+      SrcPattern = Pattern->getChild(Pattern->getNumChildren()-1)->clone();
+    } else{
+      // Not a set (store or something?)
+      SrcPattern = Pattern;
+    }
+
+    Record *Instr = II->first;
+    AddPatternToMatch(I,
+                      PatternToMatch(Instr,
+                                     Instr->getValueAsListInit("Predicates"),
+                                     SrcPattern,
+                                     TheInst.getResultPattern(),
+                                     TheInst.getImpResults(),
+                                     Instr->getValueAsInt("AddedComplexity"),
+                                     Instr->getID()));
+  }
+}
+
+
+typedef std::pair<const TreePatternNode*, unsigned> NameRecord;
+
+static void FindNames(const TreePatternNode *P,
+                      std::map<std::string, NameRecord> &Names,
+                      const TreePattern *PatternTop) {
+  if (!P->getName().empty()) {
+    NameRecord &Rec = Names[P->getName()];
+    // If this is the first instance of the name, remember the node.
+    if (Rec.second++ == 0)
+      Rec.first = P;
+    else if (Rec.first->getExtTypes() != P->getExtTypes())
+      PatternTop->error("repetition of value: $" + P->getName() +
+                        " where different uses have different types!");
+  }
+
+  if (!P->isLeaf()) {
+    for (unsigned i = 0, e = P->getNumChildren(); i != e; ++i)
+      FindNames(P->getChild(i), Names, PatternTop);
+  }
+}
+
+void CodeGenDAGPatterns::AddPatternToMatch(const TreePattern *Pattern,
+                                           const PatternToMatch &PTM) {
+  // Do some sanity checking on the pattern we're about to match.
+  std::string Reason;
+  if (!PTM.getSrcPattern()->canPatternMatch(Reason, *this))
+    Pattern->error("Pattern can never match: " + Reason);
+
+  // If the source pattern's root is a complex pattern, that complex pattern
+  // must specify the nodes it can potentially match.
+  if (const ComplexPattern *CP =
+        PTM.getSrcPattern()->getComplexPatternInfo(*this))
+    if (CP->getRootNodes().empty())
+      Pattern->error("ComplexPattern at root must specify list of opcodes it"
+                     " could match");
+
+
+  // Find all of the named values in the input and output, ensure they have the
+  // same type.
+  std::map<std::string, NameRecord> SrcNames, DstNames;
+  FindNames(PTM.getSrcPattern(), SrcNames, Pattern);
+  FindNames(PTM.getDstPattern(), DstNames, Pattern);
+
+  // Scan all of the named values in the destination pattern, rejecting them if
+  // they don't exist in the input pattern.
+  for (std::map<std::string, NameRecord>::iterator
+       I = DstNames.begin(), E = DstNames.end(); I != E; ++I) {
+    if (SrcNames[I->first].first == 0)
+      Pattern->error("Pattern has input without matching name in output: $" +
+                     I->first);
+  }
+
+  // Scan all of the named values in the source pattern, rejecting them if the
+  // name isn't used in the dest, and isn't used to tie two values together.
+  for (std::map<std::string, NameRecord>::iterator
+       I = SrcNames.begin(), E = SrcNames.end(); I != E; ++I)
+    if (DstNames[I->first].first == 0 && SrcNames[I->first].second == 1)
+      Pattern->error("Pattern has dead named input: $" + I->first);
+
+  PatternsToMatch.push_back(PTM);
+}
+
+
+
+void CodeGenDAGPatterns::InferInstructionFlags() {
+  const std::vector<const CodeGenInstruction*> &Instructions =
+    Target.getInstructionsByEnumValue();
+  for (unsigned i = 0, e = Instructions.size(); i != e; ++i) {
+    CodeGenInstruction &InstInfo =
+      const_cast<CodeGenInstruction &>(*Instructions[i]);
+    // Determine properties of the instruction from its pattern.
+    bool MayStore, MayLoad, HasSideEffects, IsVariadic;
+    InferFromPattern(InstInfo, MayStore, MayLoad, HasSideEffects, IsVariadic,
+                     *this);
+    InstInfo.mayStore = MayStore;
+    InstInfo.mayLoad = MayLoad;
+    InstInfo.hasSideEffects = HasSideEffects;
+    InstInfo.Operands.isVariadic = IsVariadic;
+  }
+}
+
+/// Given a pattern result with an unresolved type, see if we can find one
+/// instruction with an unresolved result type.  Force this result type to an
+/// arbitrary element if it's possible types to converge results.
+static bool ForceArbitraryInstResultType(TreePatternNode *N, TreePattern &TP) {
+  if (N->isLeaf())
+    return false;
+
+  // Analyze children.
+  for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i)
+    if (ForceArbitraryInstResultType(N->getChild(i), TP))
+      return true;
+
+  if (!N->getOperator()->isSubClassOf("Instruction"))
+    return false;
+
+  // If this type is already concrete or completely unknown we can't do
+  // anything.
+  for (unsigned i = 0, e = N->getNumTypes(); i != e; ++i) {
+    if (N->getExtType(i).isCompletelyUnknown() || N->getExtType(i).isConcrete())
+      continue;
+
+    // Otherwise, force its type to the first possibility (an arbitrary choice).
+    if (N->getExtType(i).MergeInTypeInfo(N->getExtType(i).getTypeList()[0], TP))
+      return true;
+  }
+
+  return false;
+}
+
+void CodeGenDAGPatterns::ParsePatterns() {
+  std::vector<Record*> Patterns = Records.getAllDerivedDefinitions("Pattern");
+
+  for (unsigned i = 0, e = Patterns.size(); i != e; ++i) {
+    Record *CurPattern = Patterns[i];
+    DagInit *Tree = CurPattern->getValueAsDag("PatternToMatch");
+    TreePattern *Pattern = new TreePattern(CurPattern, Tree, true, *this);
+
+    // Inline pattern fragments into it.
+    Pattern->InlinePatternFragments();
+
+    ListInit *LI = CurPattern->getValueAsListInit("ResultInstrs");
+    if (LI->getSize() == 0) continue;  // no pattern.
+
+    // Parse the instruction.
+    TreePattern *Result = new TreePattern(CurPattern, LI, false, *this);
+
+    // Inline pattern fragments into it.
+    Result->InlinePatternFragments();
+
+    if (Result->getNumTrees() != 1)
+      Result->error("Cannot handle instructions producing instructions "
+                    "with temporaries yet!");
+
+    bool IterateInference;
+    bool InferredAllPatternTypes, InferredAllResultTypes;
+    do {
+      // Infer as many types as possible.  If we cannot infer all of them, we
+      // can never do anything with this pattern: report it to the user.
+      InferredAllPatternTypes =
+        Pattern->InferAllTypes(&Pattern->getNamedNodesMap());
+
+      // Infer as many types as possible.  If we cannot infer all of them, we
+      // can never do anything with this pattern: report it to the user.
+      InferredAllResultTypes =
+        Result->InferAllTypes(&Pattern->getNamedNodesMap());
+
+      IterateInference = false;
+
+      // Apply the type of the result to the source pattern.  This helps us
+      // resolve cases where the input type is known to be a pointer type (which
+      // is considered resolved), but the result knows it needs to be 32- or
+      // 64-bits.  Infer the other way for good measure.
+      for (unsigned i = 0, e = std::min(Result->getTree(0)->getNumTypes(),
+                                        Pattern->getTree(0)->getNumTypes());
+           i != e; ++i) {
+        IterateInference = Pattern->getTree(0)->
+          UpdateNodeType(i, Result->getTree(0)->getExtType(i), *Result);
+        IterateInference |= Result->getTree(0)->
+          UpdateNodeType(i, Pattern->getTree(0)->getExtType(i), *Result);
+      }
+
+      // If our iteration has converged and the input pattern's types are fully
+      // resolved but the result pattern is not fully resolved, we may have a
+      // situation where we have two instructions in the result pattern and
+      // the instructions require a common register class, but don't care about
+      // what actual MVT is used.  This is actually a bug in our modelling:
+      // output patterns should have register classes, not MVTs.
+      //
+      // In any case, to handle this, we just go through and disambiguate some
+      // arbitrary types to the result pattern's nodes.
+      if (!IterateInference && InferredAllPatternTypes &&
+          !InferredAllResultTypes)
+        IterateInference = ForceArbitraryInstResultType(Result->getTree(0),
+                                                        *Result);
+    } while (IterateInference);
+
+    // Verify that we inferred enough types that we can do something with the
+    // pattern and result.  If these fire the user has to add type casts.
+    if (!InferredAllPatternTypes)
+      Pattern->error("Could not infer all types in pattern!");
+    if (!InferredAllResultTypes) {
+      Pattern->dump();
+      Result->error("Could not infer all types in pattern result!");
+    }
+
+    // Validate that the input pattern is correct.
+    std::map<std::string, TreePatternNode*> InstInputs;
+    std::map<std::string, TreePatternNode*> InstResults;
+    std::vector<Record*> InstImpResults;
+    for (unsigned j = 0, ee = Pattern->getNumTrees(); j != ee; ++j)
+      FindPatternInputsAndOutputs(Pattern, Pattern->getTree(j),
+                                  InstInputs, InstResults,
+                                  InstImpResults);
+
+    // Promote the xform function to be an explicit node if set.
+    TreePatternNode *DstPattern = Result->getOnlyTree();
+    std::vector<TreePatternNode*> ResultNodeOperands;
+    for (unsigned ii = 0, ee = DstPattern->getNumChildren(); ii != ee; ++ii) {
+      TreePatternNode *OpNode = DstPattern->getChild(ii);
+      if (Record *Xform = OpNode->getTransformFn()) {
+        OpNode->setTransformFn(0);
+        std::vector<TreePatternNode*> Children;
+        Children.push_back(OpNode);
+        OpNode = new TreePatternNode(Xform, Children, OpNode->getNumTypes());
+      }
+      ResultNodeOperands.push_back(OpNode);
+    }
+    DstPattern = Result->getOnlyTree();
+    if (!DstPattern->isLeaf())
+      DstPattern = new TreePatternNode(DstPattern->getOperator(),
+                                       ResultNodeOperands,
+                                       DstPattern->getNumTypes());
+
+    for (unsigned i = 0, e = Result->getOnlyTree()->getNumTypes(); i != e; ++i)
+      DstPattern->setType(i, Result->getOnlyTree()->getExtType(i));
+
+    TreePattern Temp(Result->getRecord(), DstPattern, false, *this);
+    Temp.InferAllTypes();
+
+
+    AddPatternToMatch(Pattern,
+                    PatternToMatch(CurPattern,
+                                   CurPattern->getValueAsListInit("Predicates"),
+                                   Pattern->getTree(0),
+                                   Temp.getOnlyTree(), InstImpResults,
+                                   CurPattern->getValueAsInt("AddedComplexity"),
+                                   CurPattern->getID()));
+  }
+}
+
+/// CombineChildVariants - Given a bunch of permutations of each child of the
+/// 'operator' node, put them together in all possible ways.
+static void CombineChildVariants(TreePatternNode *Orig,
+               const std::vector<std::vector<TreePatternNode*> > &ChildVariants,
+                                 std::vector<TreePatternNode*> &OutVariants,
+                                 CodeGenDAGPatterns &CDP,
+                                 const MultipleUseVarSet &DepVars) {
+  // Make sure that each operand has at least one variant to choose from.
+  for (unsigned i = 0, e = ChildVariants.size(); i != e; ++i)
+    if (ChildVariants[i].empty())
+      return;
+
+  // The end result is an all-pairs construction of the resultant pattern.
+  std::vector<unsigned> Idxs;
+  Idxs.resize(ChildVariants.size());
+  bool NotDone;
+  do {
+#ifndef NDEBUG
+    DEBUG(if (!Idxs.empty()) {
+            errs() << Orig->getOperator()->getName() << ": Idxs = [ ";
+              for (unsigned i = 0; i < Idxs.size(); ++i) {
+                errs() << Idxs[i] << " ";
+            }
+            errs() << "]\n";
+          });
+#endif
+    // Create the variant and add it to the output list.
+    std::vector<TreePatternNode*> NewChildren;
+    for (unsigned i = 0, e = ChildVariants.size(); i != e; ++i)
+      NewChildren.push_back(ChildVariants[i][Idxs[i]]);
+    TreePatternNode *R = new TreePatternNode(Orig->getOperator(), NewChildren,
+                                             Orig->getNumTypes());
+
+    // Copy over properties.
+    R->setName(Orig->getName());
+    R->setPredicateFns(Orig->getPredicateFns());
+    R->setTransformFn(Orig->getTransformFn());
+    for (unsigned i = 0, e = Orig->getNumTypes(); i != e; ++i)
+      R->setType(i, Orig->getExtType(i));
+
+    // If this pattern cannot match, do not include it as a variant.
+    std::string ErrString;
+    if (!R->canPatternMatch(ErrString, CDP)) {
+      delete R;
+    } else {
+      bool AlreadyExists = false;
+
+      // Scan to see if this pattern has already been emitted.  We can get
+      // duplication due to things like commuting:
+      //   (and GPRC:$a, GPRC:$b) -> (and GPRC:$b, GPRC:$a)
+      // which are the same pattern.  Ignore the dups.
+      for (unsigned i = 0, e = OutVariants.size(); i != e; ++i)
+        if (R->isIsomorphicTo(OutVariants[i], DepVars)) {
+          AlreadyExists = true;
+          break;
+        }
+
+      if (AlreadyExists)
+        delete R;
+      else
+        OutVariants.push_back(R);
+    }
+
+    // Increment indices to the next permutation by incrementing the
+    // indicies from last index backward, e.g., generate the sequence
+    // [0, 0], [0, 1], [1, 0], [1, 1].
+    int IdxsIdx;
+    for (IdxsIdx = Idxs.size() - 1; IdxsIdx >= 0; --IdxsIdx) {
+      if (++Idxs[IdxsIdx] == ChildVariants[IdxsIdx].size())
+        Idxs[IdxsIdx] = 0;
+      else
+        break;
+    }
+    NotDone = (IdxsIdx >= 0);
+  } while (NotDone);
+}
+
+/// CombineChildVariants - A helper function for binary operators.
+///
+static void CombineChildVariants(TreePatternNode *Orig,
+                                 const std::vector<TreePatternNode*> &LHS,
+                                 const std::vector<TreePatternNode*> &RHS,
+                                 std::vector<TreePatternNode*> &OutVariants,
+                                 CodeGenDAGPatterns &CDP,
+                                 const MultipleUseVarSet &DepVars) {
+  std::vector<std::vector<TreePatternNode*> > ChildVariants;
+  ChildVariants.push_back(LHS);
+  ChildVariants.push_back(RHS);
+  CombineChildVariants(Orig, ChildVariants, OutVariants, CDP, DepVars);
+}
+
+
+static void GatherChildrenOfAssociativeOpcode(TreePatternNode *N,
+                                     std::vector<TreePatternNode *> &Children) {
+  assert(N->getNumChildren()==2 &&"Associative but doesn't have 2 children!");
+  Record *Operator = N->getOperator();
+
+  // Only permit raw nodes.
+  if (!N->getName().empty() || !N->getPredicateFns().empty() ||
+      N->getTransformFn()) {
+    Children.push_back(N);
+    return;
+  }
+
+  if (N->getChild(0)->isLeaf() || N->getChild(0)->getOperator() != Operator)
+    Children.push_back(N->getChild(0));
+  else
+    GatherChildrenOfAssociativeOpcode(N->getChild(0), Children);
+
+  if (N->getChild(1)->isLeaf() || N->getChild(1)->getOperator() != Operator)
+    Children.push_back(N->getChild(1));
+  else
+    GatherChildrenOfAssociativeOpcode(N->getChild(1), Children);
+}
+
+/// GenerateVariantsOf - Given a pattern N, generate all permutations we can of
+/// the (potentially recursive) pattern by using algebraic laws.
+///
+static void GenerateVariantsOf(TreePatternNode *N,
+                               std::vector<TreePatternNode*> &OutVariants,
+                               CodeGenDAGPatterns &CDP,
+                               const MultipleUseVarSet &DepVars) {
+  // We cannot permute leaves.
+  if (N->isLeaf()) {
+    OutVariants.push_back(N);
+    return;
+  }
+
+  // Look up interesting info about the node.
+  const SDNodeInfo &NodeInfo = CDP.getSDNodeInfo(N->getOperator());
+
+  // If this node is associative, re-associate.
+  if (NodeInfo.hasProperty(SDNPAssociative)) {
+    // Re-associate by pulling together all of the linked operators
+    std::vector<TreePatternNode*> MaximalChildren;
+    GatherChildrenOfAssociativeOpcode(N, MaximalChildren);
+
+    // Only handle child sizes of 3.  Otherwise we'll end up trying too many
+    // permutations.
+    if (MaximalChildren.size() == 3) {
+      // Find the variants of all of our maximal children.
+      std::vector<TreePatternNode*> AVariants, BVariants, CVariants;
+      GenerateVariantsOf(MaximalChildren[0], AVariants, CDP, DepVars);
+      GenerateVariantsOf(MaximalChildren[1], BVariants, CDP, DepVars);
+      GenerateVariantsOf(MaximalChildren[2], CVariants, CDP, DepVars);
+
+      // There are only two ways we can permute the tree:
+      //   (A op B) op C    and    A op (B op C)
+      // Within these forms, we can also permute A/B/C.
+
+      // Generate legal pair permutations of A/B/C.
+      std::vector<TreePatternNode*> ABVariants;
+      std::vector<TreePatternNode*> BAVariants;
+      std::vector<TreePatternNode*> ACVariants;
+      std::vector<TreePatternNode*> CAVariants;
+      std::vector<TreePatternNode*> BCVariants;
+      std::vector<TreePatternNode*> CBVariants;
+      CombineChildVariants(N, AVariants, BVariants, ABVariants, CDP, DepVars);
+      CombineChildVariants(N, BVariants, AVariants, BAVariants, CDP, DepVars);
+      CombineChildVariants(N, AVariants, CVariants, ACVariants, CDP, DepVars);
+      CombineChildVariants(N, CVariants, AVariants, CAVariants, CDP, DepVars);
+      CombineChildVariants(N, BVariants, CVariants, BCVariants, CDP, DepVars);
+      CombineChildVariants(N, CVariants, BVariants, CBVariants, CDP, DepVars);
+
+      // Combine those into the result: (x op x) op x
+      CombineChildVariants(N, ABVariants, CVariants, OutVariants, CDP, DepVars);
+      CombineChildVariants(N, BAVariants, CVariants, OutVariants, CDP, DepVars);
+      CombineChildVariants(N, ACVariants, BVariants, OutVariants, CDP, DepVars);
+      CombineChildVariants(N, CAVariants, BVariants, OutVariants, CDP, DepVars);
+      CombineChildVariants(N, BCVariants, AVariants, OutVariants, CDP, DepVars);
+      CombineChildVariants(N, CBVariants, AVariants, OutVariants, CDP, DepVars);
+
+      // Combine those into the result: x op (x op x)
+      CombineChildVariants(N, CVariants, ABVariants, OutVariants, CDP, DepVars);
+      CombineChildVariants(N, CVariants, BAVariants, OutVariants, CDP, DepVars);
+      CombineChildVariants(N, BVariants, ACVariants, OutVariants, CDP, DepVars);
+      CombineChildVariants(N, BVariants, CAVariants, OutVariants, CDP, DepVars);
+      CombineChildVariants(N, AVariants, BCVariants, OutVariants, CDP, DepVars);
+      CombineChildVariants(N, AVariants, CBVariants, OutVariants, CDP, DepVars);
+      return;
+    }
+  }
+
+  // Compute permutations of all children.
+  std::vector<std::vector<TreePatternNode*> > ChildVariants;
+  ChildVariants.resize(N->getNumChildren());
+  for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i)
+    GenerateVariantsOf(N->getChild(i), ChildVariants[i], CDP, DepVars);
+
+  // Build all permutations based on how the children were formed.
+  CombineChildVariants(N, ChildVariants, OutVariants, CDP, DepVars);
+
+  // If this node is commutative, consider the commuted order.
+  bool isCommIntrinsic = N->isCommutativeIntrinsic(CDP);
+  if (NodeInfo.hasProperty(SDNPCommutative) || isCommIntrinsic) {
+    assert((N->getNumChildren()==2 || isCommIntrinsic) &&
+           "Commutative but doesn't have 2 children!");
+    // Don't count children which are actually register references.
+    unsigned NC = 0;
+    for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) {
+      TreePatternNode *Child = N->getChild(i);
+      if (Child->isLeaf())
+        if (DefInit *DI = dynamic_cast<DefInit*>(Child->getLeafValue())) {
+          Record *RR = DI->getDef();
+          if (RR->isSubClassOf("Register"))
+            continue;
+        }
+      NC++;
+    }
+    // Consider the commuted order.
+    if (isCommIntrinsic) {
+      // Commutative intrinsic. First operand is the intrinsic id, 2nd and 3rd
+      // operands are the commutative operands, and there might be more operands
+      // after those.
+      assert(NC >= 3 &&
+             "Commutative intrinsic should have at least 3 childrean!");
+      std::vector<std::vector<TreePatternNode*> > Variants;
+      Variants.push_back(ChildVariants[0]); // Intrinsic id.
+      Variants.push_back(ChildVariants[2]);
+      Variants.push_back(ChildVariants[1]);
+      for (unsigned i = 3; i != NC; ++i)
+        Variants.push_back(ChildVariants[i]);
+      CombineChildVariants(N, Variants, OutVariants, CDP, DepVars);
+    } else if (NC == 2)
+      CombineChildVariants(N, ChildVariants[1], ChildVariants[0],
+                           OutVariants, CDP, DepVars);
+  }
+}
+
+
+// GenerateVariants - Generate variants.  For example, commutative patterns can
+// match multiple ways.  Add them to PatternsToMatch as well.
+void CodeGenDAGPatterns::GenerateVariants() {
+  DEBUG(errs() << "Generating instruction variants.\n");
+
+  // Loop over all of the patterns we've collected, checking to see if we can
+  // generate variants of the instruction, through the exploitation of
+  // identities.  This permits the target to provide aggressive matching without
+  // the .td file having to contain tons of variants of instructions.
+  //
+  // Note that this loop adds new patterns to the PatternsToMatch list, but we
+  // intentionally do not reconsider these.  Any variants of added patterns have
+  // already been added.
+  //
+  for (unsigned i = 0, e = PatternsToMatch.size(); i != e; ++i) {
+    MultipleUseVarSet             DepVars;
+    std::vector<TreePatternNode*> Variants;
+    FindDepVars(PatternsToMatch[i].getSrcPattern(), DepVars);
+    DEBUG(errs() << "Dependent/multiply used variables: ");
+    DEBUG(DumpDepVars(DepVars));
+    DEBUG(errs() << "\n");
+    GenerateVariantsOf(PatternsToMatch[i].getSrcPattern(), Variants, *this,
+                       DepVars);
+
+    assert(!Variants.empty() && "Must create at least original variant!");
+    Variants.erase(Variants.begin());  // Remove the original pattern.
+
+    if (Variants.empty())  // No variants for this pattern.
+      continue;
+
+    DEBUG(errs() << "FOUND VARIANTS OF: ";
+          PatternsToMatch[i].getSrcPattern()->dump();
+          errs() << "\n");
+
+    for (unsigned v = 0, e = Variants.size(); v != e; ++v) {
+      TreePatternNode *Variant = Variants[v];
+
+      DEBUG(errs() << "  VAR#" << v <<  ": ";
+            Variant->dump();
+            errs() << "\n");
+
+      // Scan to see if an instruction or explicit pattern already matches this.
+      bool AlreadyExists = false;
+      for (unsigned p = 0, e = PatternsToMatch.size(); p != e; ++p) {
+        // Skip if the top level predicates do not match.
+        if (PatternsToMatch[i].getPredicates() !=
+            PatternsToMatch[p].getPredicates())
+          continue;
+        // Check to see if this variant already exists.
+        if (Variant->isIsomorphicTo(PatternsToMatch[p].getSrcPattern(),
+                                    DepVars)) {
+          DEBUG(errs() << "  *** ALREADY EXISTS, ignoring variant.\n");
+          AlreadyExists = true;
+          break;
+        }
+      }
+      // If we already have it, ignore the variant.
+      if (AlreadyExists) continue;
+
+      // Otherwise, add it to the list of patterns we have.
+      PatternsToMatch.
+        push_back(PatternToMatch(PatternsToMatch[i].getSrcRecord(),
+                                 PatternsToMatch[i].getPredicates(),
+                                 Variant, PatternsToMatch[i].getDstPattern(),
+                                 PatternsToMatch[i].getDstRegs(),
+                                 PatternsToMatch[i].getAddedComplexity(),
+                                 Record::getNewUID()));
+    }
+
+    DEBUG(errs() << "\n");
+  }
+}
+
diff --git a/final/utils/TableGen/CodeGenDAGPatterns.h b/final/utils/TableGen/CodeGenDAGPatterns.h
new file mode 100644
index 00000000000..946dceed66c
--- /dev/null
+++ b/final/utils/TableGen/CodeGenDAGPatterns.h
@@ -0,0 +1,758 @@
+//===- CodeGenDAGPatterns.h - Read DAG patterns from .td file ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the CodeGenDAGPatterns class, which is used to read and
+// represent the patterns present in a .td file for instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_DAGPATTERNS_H
+#define CODEGEN_DAGPATTERNS_H
+
+#include "CodeGenTarget.h"
+#include "CodeGenIntrinsics.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include <set>
+#include <algorithm>
+#include <vector>
+#include <map>
+
+namespace llvm {
+  class Record;
+  struct Init;
+  class ListInit;
+  class DagInit;
+  class SDNodeInfo;
+  class TreePattern;
+  class TreePatternNode;
+  class CodeGenDAGPatterns;
+  class ComplexPattern;
+
+/// EEVT::DAGISelGenValueType - These are some extended forms of
+/// MVT::SimpleValueType that we use as lattice values during type inference.
+/// The existing MVT iAny, fAny and vAny types suffice to represent
+/// arbitrary integer, floating-point, and vector types, so only an unknown
+/// value is needed.
+namespace EEVT {
+  /// TypeSet - This is either empty if it's completely unknown, or holds a set
+  /// of types.  It is used during type inference because register classes can
+  /// have multiple possible types and we don't know which one they get until
+  /// type inference is complete.
+  ///
+  /// TypeSet can have three states:
+  ///    Vector is empty: The type is completely unknown, it can be any valid
+  ///       target type.
+  ///    Vector has multiple constrained types: (e.g. v4i32 + v4f32) it is one
+  ///       of those types only.
+  ///    Vector has one concrete type: The type is completely known.
+  ///
+  class TypeSet {
+    SmallVector<MVT::SimpleValueType, 4> TypeVec;
+  public:
+    TypeSet() {}
+    TypeSet(MVT::SimpleValueType VT, TreePattern &TP);
+    TypeSet(const std::vector<MVT::SimpleValueType> &VTList);
+
+    bool isCompletelyUnknown() const { return TypeVec.empty(); }
+
+    bool isConcrete() const {
+      if (TypeVec.size() != 1) return false;
+      unsigned char T = TypeVec[0]; (void)T;
+      assert(T < MVT::LAST_VALUETYPE || T == MVT::iPTR || T == MVT::iPTRAny);
+      return true;
+    }
+
+    MVT::SimpleValueType getConcrete() const {
+      assert(isConcrete() && "Type isn't concrete yet");
+      return (MVT::SimpleValueType)TypeVec[0];
+    }
+
+    bool isDynamicallyResolved() const {
+      return getConcrete() == MVT::iPTR || getConcrete() == MVT::iPTRAny;
+    }
+
+    const SmallVectorImpl<MVT::SimpleValueType> &getTypeList() const {
+      assert(!TypeVec.empty() && "Not a type list!");
+      return TypeVec;
+    }
+
+    bool isVoid() const {
+      return TypeVec.size() == 1 && TypeVec[0] == MVT::isVoid;
+    }
+
+    /// hasIntegerTypes - Return true if this TypeSet contains any integer value
+    /// types.
+    bool hasIntegerTypes() const;
+
+    /// hasFloatingPointTypes - Return true if this TypeSet contains an fAny or
+    /// a floating point value type.
+    bool hasFloatingPointTypes() const;
+
+    /// hasVectorTypes - Return true if this TypeSet contains a vector value
+    /// type.
+    bool hasVectorTypes() const;
+
+    /// getName() - Return this TypeSet as a string.
+    std::string getName() const;
+
+    /// MergeInTypeInfo - This merges in type information from the specified
+    /// argument.  If 'this' changes, it returns true.  If the two types are
+    /// contradictory (e.g. merge f32 into i32) then this throws an exception.
+    bool MergeInTypeInfo(const EEVT::TypeSet &InVT, TreePattern &TP);
+
+    bool MergeInTypeInfo(MVT::SimpleValueType InVT, TreePattern &TP) {
+      return MergeInTypeInfo(EEVT::TypeSet(InVT, TP), TP);
+    }
+
+    /// Force this type list to only contain integer types.
+    bool EnforceInteger(TreePattern &TP);
+
+    /// Force this type list to only contain floating point types.
+    bool EnforceFloatingPoint(TreePattern &TP);
+
+    /// EnforceScalar - Remove all vector types from this type list.
+    bool EnforceScalar(TreePattern &TP);
+
+    /// EnforceVector - Remove all non-vector types from this type list.
+    bool EnforceVector(TreePattern &TP);
+
+    /// EnforceSmallerThan - 'this' must be a smaller VT than Other.  Update
+    /// this an other based on this information.
+    bool EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP);
+
+    /// EnforceVectorEltTypeIs - 'this' is now constrainted to be a vector type
+    /// whose element is VT.
+    bool EnforceVectorEltTypeIs(EEVT::TypeSet &VT, TreePattern &TP);
+
+    /// EnforceVectorSubVectorTypeIs - 'this' is now constrainted to
+    /// be a vector type VT.
+    bool EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VT, TreePattern &TP);
+
+    bool operator!=(const TypeSet &RHS) const { return TypeVec != RHS.TypeVec; }
+    bool operator==(const TypeSet &RHS) const { return TypeVec == RHS.TypeVec; }
+
+  private:
+    /// FillWithPossibleTypes - Set to all legal types and return true, only
+    /// valid on completely unknown type sets.  If Pred is non-null, only MVTs
+    /// that pass the predicate are added.
+    bool FillWithPossibleTypes(TreePattern &TP,
+                               bool (*Pred)(MVT::SimpleValueType) = 0,
+                               const char *PredicateName = 0);
+  };
+}
+
+/// Set type used to track multiply used variables in patterns
+typedef std::set<std::string> MultipleUseVarSet;
+
+/// SDTypeConstraint - This is a discriminated union of constraints,
+/// corresponding to the SDTypeConstraint tablegen class in Target.td.
+struct SDTypeConstraint {
+  SDTypeConstraint(Record *R);
+
+  unsigned OperandNo;   // The operand # this constraint applies to.
+  enum {
+    SDTCisVT, SDTCisPtrTy, SDTCisInt, SDTCisFP, SDTCisVec, SDTCisSameAs,
+    SDTCisVTSmallerThanOp, SDTCisOpSmallerThanOp, SDTCisEltOfVec,
+    SDTCisSubVecOfVec
+  } ConstraintType;
+
+  union {   // The discriminated union.
+    struct {
+      MVT::SimpleValueType VT;
+    } SDTCisVT_Info;
+    struct {
+      unsigned OtherOperandNum;
+    } SDTCisSameAs_Info;
+    struct {
+      unsigned OtherOperandNum;
+    } SDTCisVTSmallerThanOp_Info;
+    struct {
+      unsigned BigOperandNum;
+    } SDTCisOpSmallerThanOp_Info;
+    struct {
+      unsigned OtherOperandNum;
+    } SDTCisEltOfVec_Info;
+    struct {
+      unsigned OtherOperandNum;
+    } SDTCisSubVecOfVec_Info;
+  } x;
+
+  /// ApplyTypeConstraint - Given a node in a pattern, apply this type
+  /// constraint to the nodes operands.  This returns true if it makes a
+  /// change, false otherwise.  If a type contradiction is found, throw an
+  /// exception.
+  bool ApplyTypeConstraint(TreePatternNode *N, const SDNodeInfo &NodeInfo,
+                           TreePattern &TP) const;
+};
+
+/// SDNodeInfo - One of these records is created for each SDNode instance in
+/// the target .td file.  This represents the various dag nodes we will be
+/// processing.
+class SDNodeInfo {
+  Record *Def;
+  std::string EnumName;
+  std::string SDClassName;
+  unsigned Properties;
+  unsigned NumResults;
+  int NumOperands;
+  std::vector<SDTypeConstraint> TypeConstraints;
+public:
+  SDNodeInfo(Record *R);  // Parse the specified record.
+
+  unsigned getNumResults() const { return NumResults; }
+
+  /// getNumOperands - This is the number of operands required or -1 if
+  /// variadic.
+  int getNumOperands() const { return NumOperands; }
+  Record *getRecord() const { return Def; }
+  const std::string &getEnumName() const { return EnumName; }
+  const std::string &getSDClassName() const { return SDClassName; }
+
+  const std::vector<SDTypeConstraint> &getTypeConstraints() const {
+    return TypeConstraints;
+  }
+
+  /// getKnownType - If the type constraints on this node imply a fixed type
+  /// (e.g. all stores return void, etc), then return it as an
+  /// MVT::SimpleValueType.  Otherwise, return MVT::Other.
+  MVT::SimpleValueType getKnownType(unsigned ResNo) const;
+
+  /// hasProperty - Return true if this node has the specified property.
+  ///
+  bool hasProperty(enum SDNP Prop) const { return Properties & (1 << Prop); }
+
+  /// ApplyTypeConstraints - Given a node in a pattern, apply the type
+  /// constraints for this node to the operands of the node.  This returns
+  /// true if it makes a change, false otherwise.  If a type contradiction is
+  /// found, throw an exception.
+  bool ApplyTypeConstraints(TreePatternNode *N, TreePattern &TP) const {
+    bool MadeChange = false;
+    for (unsigned i = 0, e = TypeConstraints.size(); i != e; ++i)
+      MadeChange |= TypeConstraints[i].ApplyTypeConstraint(N, *this, TP);
+    return MadeChange;
+  }
+};
+
+/// FIXME: TreePatternNode's can be shared in some cases (due to dag-shaped
+/// patterns), and as such should be ref counted.  We currently just leak all
+/// TreePatternNode objects!
+class TreePatternNode {
+  /// The type of each node result.  Before and during type inference, each
+  /// result may be a set of possible types.  After (successful) type inference,
+  /// each is a single concrete type.
+  SmallVector<EEVT::TypeSet, 1> Types;
+
+  /// Operator - The Record for the operator if this is an interior node (not
+  /// a leaf).
+  Record *Operator;
+
+  /// Val - The init value (e.g. the "GPRC" record, or "7") for a leaf.
+  ///
+  Init *Val;
+
+  /// Name - The name given to this node with the :$foo notation.
+  ///
+  std::string Name;
+
+  /// PredicateFns - The predicate functions to execute on this node to check
+  /// for a match.  If this list is empty, no predicate is involved.
+  std::vector<std::string> PredicateFns;
+
+  /// TransformFn - The transformation function to execute on this node before
+  /// it can be substituted into the resulting instruction on a pattern match.
+  Record *TransformFn;
+
+  std::vector<TreePatternNode*> Children;
+public:
+  TreePatternNode(Record *Op, const std::vector<TreePatternNode*> &Ch,
+                  unsigned NumResults)
+    : Operator(Op), Val(0), TransformFn(0), Children(Ch) {
+    Types.resize(NumResults);
+  }
+  TreePatternNode(Init *val, unsigned NumResults)    // leaf ctor
+    : Operator(0), Val(val), TransformFn(0) {
+    Types.resize(NumResults);
+  }
+  ~TreePatternNode();
+
+  const std::string &getName() const { return Name; }
+  void setName(StringRef N) { Name.assign(N.begin(), N.end()); }
+
+  bool isLeaf() const { return Val != 0; }
+
+  // Type accessors.
+  unsigned getNumTypes() const { return Types.size(); }
+  MVT::SimpleValueType getType(unsigned ResNo) const {
+    return Types[ResNo].getConcrete();
+  }
+  const SmallVectorImpl<EEVT::TypeSet> &getExtTypes() const { return Types; }
+  const EEVT::TypeSet &getExtType(unsigned ResNo) const { return Types[ResNo]; }
+  EEVT::TypeSet &getExtType(unsigned ResNo) { return Types[ResNo]; }
+  void setType(unsigned ResNo, const EEVT::TypeSet &T) { Types[ResNo] = T; }
+
+  bool hasTypeSet(unsigned ResNo) const {
+    return Types[ResNo].isConcrete();
+  }
+  bool isTypeCompletelyUnknown(unsigned ResNo) const {
+    return Types[ResNo].isCompletelyUnknown();
+  }
+  bool isTypeDynamicallyResolved(unsigned ResNo) const {
+    return Types[ResNo].isDynamicallyResolved();
+  }
+
+  Init *getLeafValue() const { assert(isLeaf()); return Val; }
+  Record *getOperator() const { assert(!isLeaf()); return Operator; }
+
+  unsigned getNumChildren() const { return Children.size(); }
+  TreePatternNode *getChild(unsigned N) const { return Children[N]; }
+  void setChild(unsigned i, TreePatternNode *N) {
+    Children[i] = N;
+  }
+
+  /// hasChild - Return true if N is any of our children.
+  bool hasChild(const TreePatternNode *N) const {
+    for (unsigned i = 0, e = Children.size(); i != e; ++i)
+      if (Children[i] == N) return true;
+    return false;
+  }
+
+  const std::vector<std::string> &getPredicateFns() const {return PredicateFns;}
+  void clearPredicateFns() { PredicateFns.clear(); }
+  void setPredicateFns(const std::vector<std::string> &Fns) {
+    assert(PredicateFns.empty() && "Overwriting non-empty predicate list!");
+    PredicateFns = Fns;
+  }
+  void addPredicateFn(const std::string &Fn) {
+    assert(!Fn.empty() && "Empty predicate string!");
+    if (std::find(PredicateFns.begin(), PredicateFns.end(), Fn) ==
+          PredicateFns.end())
+      PredicateFns.push_back(Fn);
+  }
+
+  Record *getTransformFn() const { return TransformFn; }
+  void setTransformFn(Record *Fn) { TransformFn = Fn; }
+
+  /// getIntrinsicInfo - If this node corresponds to an intrinsic, return the
+  /// CodeGenIntrinsic information for it, otherwise return a null pointer.
+  const CodeGenIntrinsic *getIntrinsicInfo(const CodeGenDAGPatterns &CDP) const;
+
+  /// getComplexPatternInfo - If this node corresponds to a ComplexPattern,
+  /// return the ComplexPattern information, otherwise return null.
+  const ComplexPattern *
+  getComplexPatternInfo(const CodeGenDAGPatterns &CGP) const;
+
+  /// NodeHasProperty - Return true if this node has the specified property.
+  bool NodeHasProperty(SDNP Property, const CodeGenDAGPatterns &CGP) const;
+
+  /// TreeHasProperty - Return true if any node in this tree has the specified
+  /// property.
+  bool TreeHasProperty(SDNP Property, const CodeGenDAGPatterns &CGP) const;
+
+  /// isCommutativeIntrinsic - Return true if the node is an intrinsic which is
+  /// marked isCommutative.
+  bool isCommutativeIntrinsic(const CodeGenDAGPatterns &CDP) const;
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+
+public:   // Higher level manipulation routines.
+
+  /// clone - Return a new copy of this tree.
+  ///
+  TreePatternNode *clone() const;
+
+  /// RemoveAllTypes - Recursively strip all the types of this tree.
+  void RemoveAllTypes();
+
+  /// isIsomorphicTo - Return true if this node is recursively isomorphic to
+  /// the specified node.  For this comparison, all of the state of the node
+  /// is considered, except for the assigned name.  Nodes with differing names
+  /// that are otherwise identical are considered isomorphic.
+  bool isIsomorphicTo(const TreePatternNode *N,
+                      const MultipleUseVarSet &DepVars) const;
+
+  /// SubstituteFormalArguments - Replace the formal arguments in this tree
+  /// with actual values specified by ArgMap.
+  void SubstituteFormalArguments(std::map<std::string,
+                                          TreePatternNode*> &ArgMap);
+
+  /// InlinePatternFragments - If this pattern refers to any pattern
+  /// fragments, inline them into place, giving us a pattern without any
+  /// PatFrag references.
+  TreePatternNode *InlinePatternFragments(TreePattern &TP);
+
+  /// ApplyTypeConstraints - Apply all of the type constraints relevant to
+  /// this node and its children in the tree.  This returns true if it makes a
+  /// change, false otherwise.  If a type contradiction is found, throw an
+  /// exception.
+  bool ApplyTypeConstraints(TreePattern &TP, bool NotRegisters);
+
+  /// UpdateNodeType - Set the node type of N to VT if VT contains
+  /// information.  If N already contains a conflicting type, then throw an
+  /// exception.  This returns true if any information was updated.
+  ///
+  bool UpdateNodeType(unsigned ResNo, const EEVT::TypeSet &InTy,
+                      TreePattern &TP) {
+    return Types[ResNo].MergeInTypeInfo(InTy, TP);
+  }
+
+  bool UpdateNodeType(unsigned ResNo, MVT::SimpleValueType InTy,
+                      TreePattern &TP) {
+    return Types[ResNo].MergeInTypeInfo(EEVT::TypeSet(InTy, TP), TP);
+  }
+
+  /// ContainsUnresolvedType - Return true if this tree contains any
+  /// unresolved types.
+  bool ContainsUnresolvedType() const {
+    for (unsigned i = 0, e = Types.size(); i != e; ++i)
+      if (!Types[i].isConcrete()) return true;
+
+    for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
+      if (getChild(i)->ContainsUnresolvedType()) return true;
+    return false;
+  }
+
+  /// canPatternMatch - If it is impossible for this pattern to match on this
+  /// target, fill in Reason and return false.  Otherwise, return true.
+  bool canPatternMatch(std::string &Reason, const CodeGenDAGPatterns &CDP);
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const TreePatternNode &TPN) {
+  TPN.print(OS);
+  return OS;
+}
+
+
+/// TreePattern - Represent a pattern, used for instructions, pattern
+/// fragments, etc.
+///
+class TreePattern {
+  /// Trees - The list of pattern trees which corresponds to this pattern.
+  /// Note that PatFrag's only have a single tree.
+  ///
+  std::vector<TreePatternNode*> Trees;
+
+  /// NamedNodes - This is all of the nodes that have names in the trees in this
+  /// pattern.
+  StringMap<SmallVector<TreePatternNode*,1> > NamedNodes;
+
+  /// TheRecord - The actual TableGen record corresponding to this pattern.
+  ///
+  Record *TheRecord;
+
+  /// Args - This is a list of all of the arguments to this pattern (for
+  /// PatFrag patterns), which are the 'node' markers in this pattern.
+  std::vector<std::string> Args;
+
+  /// CDP - the top-level object coordinating this madness.
+  ///
+  CodeGenDAGPatterns &CDP;
+
+  /// isInputPattern - True if this is an input pattern, something to match.
+  /// False if this is an output pattern, something to emit.
+  bool isInputPattern;
+public:
+
+  /// TreePattern constructor - Parse the specified DagInits into the
+  /// current record.
+  TreePattern(Record *TheRec, ListInit *RawPat, bool isInput,
+              CodeGenDAGPatterns &ise);
+  TreePattern(Record *TheRec, DagInit *Pat, bool isInput,
+              CodeGenDAGPatterns &ise);
+  TreePattern(Record *TheRec, TreePatternNode *Pat, bool isInput,
+              CodeGenDAGPatterns &ise);
+
+  /// getTrees - Return the tree patterns which corresponds to this pattern.
+  ///
+  const std::vector<TreePatternNode*> &getTrees() const { return Trees; }
+  unsigned getNumTrees() const { return Trees.size(); }
+  TreePatternNode *getTree(unsigned i) const { return Trees[i]; }
+  TreePatternNode *getOnlyTree() const {
+    assert(Trees.size() == 1 && "Doesn't have exactly one pattern!");
+    return Trees[0];
+  }
+
+  const StringMap<SmallVector<TreePatternNode*,1> > &getNamedNodesMap() {
+    if (NamedNodes.empty())
+      ComputeNamedNodes();
+    return NamedNodes;
+  }
+
+  /// getRecord - Return the actual TableGen record corresponding to this
+  /// pattern.
+  ///
+  Record *getRecord() const { return TheRecord; }
+
+  unsigned getNumArgs() const { return Args.size(); }
+  const std::string &getArgName(unsigned i) const {
+    assert(i < Args.size() && "Argument reference out of range!");
+    return Args[i];
+  }
+  std::vector<std::string> &getArgList() { return Args; }
+
+  CodeGenDAGPatterns &getDAGPatterns() const { return CDP; }
+
+  /// InlinePatternFragments - If this pattern refers to any pattern
+  /// fragments, inline them into place, giving us a pattern without any
+  /// PatFrag references.
+  void InlinePatternFragments() {
+    for (unsigned i = 0, e = Trees.size(); i != e; ++i)
+      Trees[i] = Trees[i]->InlinePatternFragments(*this);
+  }
+
+  /// InferAllTypes - Infer/propagate as many types throughout the expression
+  /// patterns as possible.  Return true if all types are inferred, false
+  /// otherwise.  Throw an exception if a type contradiction is found.
+  bool InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> >
+                          *NamedTypes=0);
+
+  /// error - Throw an exception, prefixing it with information about this
+  /// pattern.
+  void error(const std::string &Msg) const;
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+
+private:
+  TreePatternNode *ParseTreePattern(Init *DI, StringRef OpName);
+  void ComputeNamedNodes();
+  void ComputeNamedNodes(TreePatternNode *N);
+};
+
+/// DAGDefaultOperand - One of these is created for each PredicateOperand
+/// or OptionalDefOperand that has a set ExecuteAlways / DefaultOps field.
+struct DAGDefaultOperand {
+  std::vector<TreePatternNode*> DefaultOps;
+};
+
+class DAGInstruction {
+  TreePattern *Pattern;
+  std::vector<Record*> Results;
+  std::vector<Record*> Operands;
+  std::vector<Record*> ImpResults;
+  TreePatternNode *ResultPattern;
+public:
+  DAGInstruction(TreePattern *TP,
+                 const std::vector<Record*> &results,
+                 const std::vector<Record*> &operands,
+                 const std::vector<Record*> &impresults)
+    : Pattern(TP), Results(results), Operands(operands),
+      ImpResults(impresults), ResultPattern(0) {}
+
+  const TreePattern *getPattern() const { return Pattern; }
+  unsigned getNumResults() const { return Results.size(); }
+  unsigned getNumOperands() const { return Operands.size(); }
+  unsigned getNumImpResults() const { return ImpResults.size(); }
+  const std::vector<Record*>& getImpResults() const { return ImpResults; }
+
+  void setResultPattern(TreePatternNode *R) { ResultPattern = R; }
+
+  Record *getResult(unsigned RN) const {
+    assert(RN < Results.size());
+    return Results[RN];
+  }
+
+  Record *getOperand(unsigned ON) const {
+    assert(ON < Operands.size());
+    return Operands[ON];
+  }
+
+  Record *getImpResult(unsigned RN) const {
+    assert(RN < ImpResults.size());
+    return ImpResults[RN];
+  }
+
+  TreePatternNode *getResultPattern() const { return ResultPattern; }
+};
+
+/// PatternToMatch - Used by CodeGenDAGPatterns to keep tab of patterns
+/// processed to produce isel.
+class PatternToMatch {
+public:
+  PatternToMatch(Record *srcrecord, ListInit *preds,
+                 TreePatternNode *src, TreePatternNode *dst,
+                 const std::vector<Record*> &dstregs,
+                 unsigned complexity, unsigned uid)
+    : SrcRecord(srcrecord), Predicates(preds), SrcPattern(src), DstPattern(dst),
+      Dstregs(dstregs), AddedComplexity(complexity), ID(uid) {}
+
+  Record          *SrcRecord;   // Originating Record for the pattern.
+  ListInit        *Predicates;  // Top level predicate conditions to match.
+  TreePatternNode *SrcPattern;  // Source pattern to match.
+  TreePatternNode *DstPattern;  // Resulting pattern.
+  std::vector<Record*> Dstregs; // Physical register defs being matched.
+  unsigned         AddedComplexity; // Add to matching pattern complexity.
+  unsigned         ID;          // Unique ID for the record.
+
+  Record          *getSrcRecord()  const { return SrcRecord; }
+  ListInit        *getPredicates() const { return Predicates; }
+  TreePatternNode *getSrcPattern() const { return SrcPattern; }
+  TreePatternNode *getDstPattern() const { return DstPattern; }
+  const std::vector<Record*> &getDstRegs() const { return Dstregs; }
+  unsigned         getAddedComplexity() const { return AddedComplexity; }
+
+  std::string getPredicateCheck() const;
+
+  /// Compute the complexity metric for the input pattern.  This roughly
+  /// corresponds to the number of nodes that are covered.
+  unsigned getPatternComplexity(const CodeGenDAGPatterns &CGP) const;
+};
+
+// Deterministic comparison of Record*.
+struct RecordPtrCmp {
+  bool operator()(const Record *LHS, const Record *RHS) const;
+};
+
+class CodeGenDAGPatterns {
+  RecordKeeper &Records;
+  CodeGenTarget Target;
+  std::vector<CodeGenIntrinsic> Intrinsics;
+  std::vector<CodeGenIntrinsic> TgtIntrinsics;
+
+  std::map<Record*, SDNodeInfo, RecordPtrCmp> SDNodes;
+  std::map<Record*, std::pair<Record*, std::string>, RecordPtrCmp> SDNodeXForms;
+  std::map<Record*, ComplexPattern, RecordPtrCmp> ComplexPatterns;
+  std::map<Record*, TreePattern*, RecordPtrCmp> PatternFragments;
+  std::map<Record*, DAGDefaultOperand, RecordPtrCmp> DefaultOperands;
+  std::map<Record*, DAGInstruction, RecordPtrCmp> Instructions;
+
+  // Specific SDNode definitions:
+  Record *intrinsic_void_sdnode;
+  Record *intrinsic_w_chain_sdnode, *intrinsic_wo_chain_sdnode;
+
+  /// PatternsToMatch - All of the things we are matching on the DAG.  The first
+  /// value is the pattern to match, the second pattern is the result to
+  /// emit.
+  std::vector<PatternToMatch> PatternsToMatch;
+public:
+  CodeGenDAGPatterns(RecordKeeper &R);
+  ~CodeGenDAGPatterns();
+
+  CodeGenTarget &getTargetInfo() { return Target; }
+  const CodeGenTarget &getTargetInfo() const { return Target; }
+
+  Record *getSDNodeNamed(const std::string &Name) const;
+
+  const SDNodeInfo &getSDNodeInfo(Record *R) const {
+    assert(SDNodes.count(R) && "Unknown node!");
+    return SDNodes.find(R)->second;
+  }
+
+  // Node transformation lookups.
+  typedef std::pair<Record*, std::string> NodeXForm;
+  const NodeXForm &getSDNodeTransform(Record *R) const {
+    assert(SDNodeXForms.count(R) && "Invalid transform!");
+    return SDNodeXForms.find(R)->second;
+  }
+
+  typedef std::map<Record*, NodeXForm, RecordPtrCmp>::const_iterator
+          nx_iterator;
+  nx_iterator nx_begin() const { return SDNodeXForms.begin(); }
+  nx_iterator nx_end() const { return SDNodeXForms.end(); }
+
+
+  const ComplexPattern &getComplexPattern(Record *R) const {
+    assert(ComplexPatterns.count(R) && "Unknown addressing mode!");
+    return ComplexPatterns.find(R)->second;
+  }
+
+  const CodeGenIntrinsic &getIntrinsic(Record *R) const {
+    for (unsigned i = 0, e = Intrinsics.size(); i != e; ++i)
+      if (Intrinsics[i].TheDef == R) return Intrinsics[i];
+    for (unsigned i = 0, e = TgtIntrinsics.size(); i != e; ++i)
+      if (TgtIntrinsics[i].TheDef == R) return TgtIntrinsics[i];
+    assert(0 && "Unknown intrinsic!");
+    abort();
+  }
+
+  const CodeGenIntrinsic &getIntrinsicInfo(unsigned IID) const {
+    if (IID-1 < Intrinsics.size())
+      return Intrinsics[IID-1];
+    if (IID-Intrinsics.size()-1 < TgtIntrinsics.size())
+      return TgtIntrinsics[IID-Intrinsics.size()-1];
+    assert(0 && "Bad intrinsic ID!");
+    abort();
+  }
+
+  unsigned getIntrinsicID(Record *R) const {
+    for (unsigned i = 0, e = Intrinsics.size(); i != e; ++i)
+      if (Intrinsics[i].TheDef == R) return i;
+    for (unsigned i = 0, e = TgtIntrinsics.size(); i != e; ++i)
+      if (TgtIntrinsics[i].TheDef == R) return i + Intrinsics.size();
+    assert(0 && "Unknown intrinsic!");
+    abort();
+  }
+
+  const DAGDefaultOperand &getDefaultOperand(Record *R) const {
+    assert(DefaultOperands.count(R) &&"Isn't an analyzed default operand!");
+    return DefaultOperands.find(R)->second;
+  }
+
+  // Pattern Fragment information.
+  TreePattern *getPatternFragment(Record *R) const {
+    assert(PatternFragments.count(R) && "Invalid pattern fragment request!");
+    return PatternFragments.find(R)->second;
+  }
+  TreePattern *getPatternFragmentIfRead(Record *R) const {
+    if (!PatternFragments.count(R)) return 0;
+    return PatternFragments.find(R)->second;
+  }
+
+  typedef std::map<Record*, TreePattern*, RecordPtrCmp>::const_iterator
+          pf_iterator;
+  pf_iterator pf_begin() const { return PatternFragments.begin(); }
+  pf_iterator pf_end() const { return PatternFragments.end(); }
+
+  // Patterns to match information.
+  typedef std::vector<PatternToMatch>::const_iterator ptm_iterator;
+  ptm_iterator ptm_begin() const { return PatternsToMatch.begin(); }
+  ptm_iterator ptm_end() const { return PatternsToMatch.end(); }
+
+
+
+  const DAGInstruction &getInstruction(Record *R) const {
+    assert(Instructions.count(R) && "Unknown instruction!");
+    return Instructions.find(R)->second;
+  }
+
+  Record *get_intrinsic_void_sdnode() const {
+    return intrinsic_void_sdnode;
+  }
+  Record *get_intrinsic_w_chain_sdnode() const {
+    return intrinsic_w_chain_sdnode;
+  }
+  Record *get_intrinsic_wo_chain_sdnode() const {
+    return intrinsic_wo_chain_sdnode;
+  }
+
+  bool hasTargetIntrinsics() { return !TgtIntrinsics.empty(); }
+
+private:
+  void ParseNodeInfo();
+  void ParseNodeTransforms();
+  void ParseComplexPatterns();
+  void ParsePatternFragments();
+  void ParseDefaultOperands();
+  void ParseInstructions();
+  void ParsePatterns();
+  void InferInstructionFlags();
+  void GenerateVariants();
+
+  void AddPatternToMatch(const TreePattern *Pattern, const PatternToMatch &PTM);
+  void FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
+                                   std::map<std::string,
+                                   TreePatternNode*> &InstInputs,
+                                   std::map<std::string,
+                                   TreePatternNode*> &InstResults,
+                                   std::vector<Record*> &InstImpResults);
+};
+} // end namespace llvm
+
+#endif
diff --git a/final/utils/TableGen/CodeGenInstruction.cpp b/final/utils/TableGen/CodeGenInstruction.cpp
new file mode 100644
index 00000000000..f37d3eabcd4
--- /dev/null
+++ b/final/utils/TableGen/CodeGenInstruction.cpp
@@ -0,0 +1,537 @@
+//===- CodeGenInstruction.cpp - CodeGen Instruction Class Wrapper ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CodeGenInstruction class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenInstruction.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include <set>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// CGIOperandList Implementation
+//===----------------------------------------------------------------------===//
+
+CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
+  isPredicable = false;
+  hasOptionalDef = false;
+  isVariadic = false;
+
+  DagInit *OutDI = R->getValueAsDag("OutOperandList");
+
+  if (DefInit *Init = dynamic_cast<DefInit*>(OutDI->getOperator())) {
+    if (Init->getDef()->getName() != "outs")
+      throw R->getName() + ": invalid def name for output list: use 'outs'";
+  } else
+    throw R->getName() + ": invalid output list: use 'outs'";
+
+  NumDefs = OutDI->getNumArgs();
+
+  DagInit *InDI = R->getValueAsDag("InOperandList");
+  if (DefInit *Init = dynamic_cast<DefInit*>(InDI->getOperator())) {
+    if (Init->getDef()->getName() != "ins")
+      throw R->getName() + ": invalid def name for input list: use 'ins'";
+  } else
+    throw R->getName() + ": invalid input list: use 'ins'";
+
+  unsigned MIOperandNo = 0;
+  std::set<std::string> OperandNames;
+  for (unsigned i = 0, e = InDI->getNumArgs()+OutDI->getNumArgs(); i != e; ++i){
+    Init *ArgInit;
+    std::string ArgName;
+    if (i < NumDefs) {
+      ArgInit = OutDI->getArg(i);
+      ArgName = OutDI->getArgName(i);
+    } else {
+      ArgInit = InDI->getArg(i-NumDefs);
+      ArgName = InDI->getArgName(i-NumDefs);
+    }
+
+    DefInit *Arg = dynamic_cast<DefInit*>(ArgInit);
+    if (!Arg)
+      throw "Illegal operand for the '" + R->getName() + "' instruction!";
+
+    Record *Rec = Arg->getDef();
+    std::string PrintMethod = "printOperand";
+    std::string EncoderMethod;
+    unsigned NumOps = 1;
+    DagInit *MIOpInfo = 0;
+    if (Rec->isSubClassOf("Operand")) {
+      PrintMethod = Rec->getValueAsString("PrintMethod");
+      // If there is an explicit encoder method, use it.
+      EncoderMethod = Rec->getValueAsString("EncoderMethod");
+      MIOpInfo = Rec->getValueAsDag("MIOperandInfo");
+
+      // Verify that MIOpInfo has an 'ops' root value.
+      if (!dynamic_cast<DefInit*>(MIOpInfo->getOperator()) ||
+          dynamic_cast<DefInit*>(MIOpInfo->getOperator())
+          ->getDef()->getName() != "ops")
+        throw "Bad value for MIOperandInfo in operand '" + Rec->getName() +
+        "'\n";
+
+      // If we have MIOpInfo, then we have #operands equal to number of entries
+      // in MIOperandInfo.
+      if (unsigned NumArgs = MIOpInfo->getNumArgs())
+        NumOps = NumArgs;
+
+      if (Rec->isSubClassOf("PredicateOperand"))
+        isPredicable = true;
+      else if (Rec->isSubClassOf("OptionalDefOperand"))
+        hasOptionalDef = true;
+    } else if (Rec->getName() == "variable_ops") {
+      isVariadic = true;
+      continue;
+    } else if (!Rec->isSubClassOf("RegisterClass") &&
+               !Rec->isSubClassOf("PointerLikeRegClass") &&
+               Rec->getName() != "unknown")
+      throw "Unknown operand class '" + Rec->getName() +
+      "' in '" + R->getName() + "' instruction!";
+
+    // Check that the operand has a name and that it's unique.
+    if (ArgName.empty())
+      throw "In instruction '" + R->getName() + "', operand #" + utostr(i) +
+      " has no name!";
+    if (!OperandNames.insert(ArgName).second)
+      throw "In instruction '" + R->getName() + "', operand #" + utostr(i) +
+      " has the same name as a previous operand!";
+
+    OperandList.push_back(OperandInfo(Rec, ArgName, PrintMethod, EncoderMethod,
+                                      MIOperandNo, NumOps, MIOpInfo));
+    MIOperandNo += NumOps;
+  }
+
+
+  // Make sure the constraints list for each operand is large enough to hold
+  // constraint info, even if none is present.
+  for (unsigned i = 0, e = OperandList.size(); i != e; ++i)
+    OperandList[i].Constraints.resize(OperandList[i].MINumOperands);
+}
+
+
+/// getOperandNamed - Return the index of the operand with the specified
+/// non-empty name.  If the instruction does not have an operand with the
+/// specified name, throw an exception.
+///
+unsigned CGIOperandList::getOperandNamed(StringRef Name) const {
+  unsigned OpIdx;
+  if (hasOperandNamed(Name, OpIdx)) return OpIdx;
+  throw "'" + TheDef->getName() + "' does not have an operand named '$" +
+    Name.str() + "'!";
+}
+
+/// hasOperandNamed - Query whether the instruction has an operand of the
+/// given name. If so, return true and set OpIdx to the index of the
+/// operand. Otherwise, return false.
+bool CGIOperandList::hasOperandNamed(StringRef Name, unsigned &OpIdx) const {
+  assert(!Name.empty() && "Cannot search for operand with no name!");
+  for (unsigned i = 0, e = OperandList.size(); i != e; ++i)
+    if (OperandList[i].Name == Name) {
+      OpIdx = i;
+      return true;
+    }
+  return false;
+}
+
+std::pair<unsigned,unsigned>
+CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
+  if (Op.empty() || Op[0] != '$')
+    throw TheDef->getName() + ": Illegal operand name: '" + Op + "'";
+
+  std::string OpName = Op.substr(1);
+  std::string SubOpName;
+
+  // Check to see if this is $foo.bar.
+  std::string::size_type DotIdx = OpName.find_first_of(".");
+  if (DotIdx != std::string::npos) {
+    SubOpName = OpName.substr(DotIdx+1);
+    if (SubOpName.empty())
+      throw TheDef->getName() + ": illegal empty suboperand name in '" +Op +"'";
+    OpName = OpName.substr(0, DotIdx);
+  }
+
+  unsigned OpIdx = getOperandNamed(OpName);
+
+  if (SubOpName.empty()) {  // If no suboperand name was specified:
+    // If one was needed, throw.
+    if (OperandList[OpIdx].MINumOperands > 1 && !AllowWholeOp &&
+        SubOpName.empty())
+      throw TheDef->getName() + ": Illegal to refer to"
+      " whole operand part of complex operand '" + Op + "'";
+
+    // Otherwise, return the operand.
+    return std::make_pair(OpIdx, 0U);
+  }
+
+  // Find the suboperand number involved.
+  DagInit *MIOpInfo = OperandList[OpIdx].MIOperandInfo;
+  if (MIOpInfo == 0)
+    throw TheDef->getName() + ": unknown suboperand name in '" + Op + "'";
+
+  // Find the operand with the right name.
+  for (unsigned i = 0, e = MIOpInfo->getNumArgs(); i != e; ++i)
+    if (MIOpInfo->getArgName(i) == SubOpName)
+      return std::make_pair(OpIdx, i);
+
+  // Otherwise, didn't find it!
+  throw TheDef->getName() + ": unknown suboperand name in '" + Op + "'";
+}
+
+static void ParseConstraint(const std::string &CStr, CGIOperandList &Ops) {
+  // EARLY_CLOBBER: @early $reg
+  std::string::size_type wpos = CStr.find_first_of(" \t");
+  std::string::size_type start = CStr.find_first_not_of(" \t");
+  std::string Tok = CStr.substr(start, wpos - start);
+  if (Tok == "@earlyclobber") {
+    std::string Name = CStr.substr(wpos+1);
+    wpos = Name.find_first_not_of(" \t");
+    if (wpos == std::string::npos)
+      throw "Illegal format for @earlyclobber constraint: '" + CStr + "'";
+    Name = Name.substr(wpos);
+    std::pair<unsigned,unsigned> Op = Ops.ParseOperandName(Name, false);
+
+    // Build the string for the operand
+    if (!Ops[Op.first].Constraints[Op.second].isNone())
+      throw "Operand '" + Name + "' cannot have multiple constraints!";
+    Ops[Op.first].Constraints[Op.second] =
+    CGIOperandList::ConstraintInfo::getEarlyClobber();
+    return;
+  }
+
+  // Only other constraint is "TIED_TO" for now.
+  std::string::size_type pos = CStr.find_first_of('=');
+  assert(pos != std::string::npos && "Unrecognized constraint");
+  start = CStr.find_first_not_of(" \t");
+  std::string Name = CStr.substr(start, pos - start);
+
+  // TIED_TO: $src1 = $dst
+  wpos = Name.find_first_of(" \t");
+  if (wpos == std::string::npos)
+    throw "Illegal format for tied-to constraint: '" + CStr + "'";
+  std::string DestOpName = Name.substr(0, wpos);
+  std::pair<unsigned,unsigned> DestOp = Ops.ParseOperandName(DestOpName, false);
+
+  Name = CStr.substr(pos+1);
+  wpos = Name.find_first_not_of(" \t");
+  if (wpos == std::string::npos)
+    throw "Illegal format for tied-to constraint: '" + CStr + "'";
+
+  std::pair<unsigned,unsigned> SrcOp =
+  Ops.ParseOperandName(Name.substr(wpos), false);
+  if (SrcOp > DestOp)
+    throw "Illegal tied-to operand constraint '" + CStr + "'";
+
+
+  unsigned FlatOpNo = Ops.getFlattenedOperandNumber(SrcOp);
+
+  if (!Ops[DestOp.first].Constraints[DestOp.second].isNone())
+    throw "Operand '" + DestOpName + "' cannot have multiple constraints!";
+  Ops[DestOp.first].Constraints[DestOp.second] =
+  CGIOperandList::ConstraintInfo::getTied(FlatOpNo);
+}
+
+static void ParseConstraints(const std::string &CStr, CGIOperandList &Ops) {
+  if (CStr.empty()) return;
+
+  const std::string delims(",");
+  std::string::size_type bidx, eidx;
+
+  bidx = CStr.find_first_not_of(delims);
+  while (bidx != std::string::npos) {
+    eidx = CStr.find_first_of(delims, bidx);
+    if (eidx == std::string::npos)
+      eidx = CStr.length();
+
+    ParseConstraint(CStr.substr(bidx, eidx - bidx), Ops);
+    bidx = CStr.find_first_not_of(delims, eidx);
+  }
+}
+
+void CGIOperandList::ProcessDisableEncoding(std::string DisableEncoding) {
+  while (1) {
+    std::string OpName;
+    tie(OpName, DisableEncoding) = getToken(DisableEncoding, " ,\t");
+    if (OpName.empty()) break;
+
+    // Figure out which operand this is.
+    std::pair<unsigned,unsigned> Op = ParseOperandName(OpName, false);
+
+    // Mark the operand as not-to-be encoded.
+    if (Op.second >= OperandList[Op.first].DoNotEncode.size())
+      OperandList[Op.first].DoNotEncode.resize(Op.second+1);
+    OperandList[Op.first].DoNotEncode[Op.second] = true;
+  }
+
+}
+
+//===----------------------------------------------------------------------===//
+// CodeGenInstruction Implementation
+//===----------------------------------------------------------------------===//
+
+CodeGenInstruction::CodeGenInstruction(Record *R) : TheDef(R), Operands(R) {
+  Namespace = R->getValueAsString("Namespace");
+  AsmString = R->getValueAsString("AsmString");
+
+  isReturn     = R->getValueAsBit("isReturn");
+  isBranch     = R->getValueAsBit("isBranch");
+  isIndirectBranch = R->getValueAsBit("isIndirectBranch");
+  isCompare    = R->getValueAsBit("isCompare");
+  isMoveImm    = R->getValueAsBit("isMoveImm");
+  isBarrier    = R->getValueAsBit("isBarrier");
+  isCall       = R->getValueAsBit("isCall");
+  canFoldAsLoad = R->getValueAsBit("canFoldAsLoad");
+  mayLoad      = R->getValueAsBit("mayLoad");
+  mayStore     = R->getValueAsBit("mayStore");
+  isPredicable = Operands.isPredicable || R->getValueAsBit("isPredicable");
+  isConvertibleToThreeAddress = R->getValueAsBit("isConvertibleToThreeAddress");
+  isCommutable = R->getValueAsBit("isCommutable");
+  isTerminator = R->getValueAsBit("isTerminator");
+  isReMaterializable = R->getValueAsBit("isReMaterializable");
+  hasDelaySlot = R->getValueAsBit("hasDelaySlot");
+  usesCustomInserter = R->getValueAsBit("usesCustomInserter");
+  hasCtrlDep   = R->getValueAsBit("hasCtrlDep");
+  isNotDuplicable = R->getValueAsBit("isNotDuplicable");
+  hasSideEffects = R->getValueAsBit("hasSideEffects");
+  neverHasSideEffects = R->getValueAsBit("neverHasSideEffects");
+  isAsCheapAsAMove = R->getValueAsBit("isAsCheapAsAMove");
+  hasExtraSrcRegAllocReq = R->getValueAsBit("hasExtraSrcRegAllocReq");
+  hasExtraDefRegAllocReq = R->getValueAsBit("hasExtraDefRegAllocReq");
+  ImplicitDefs = R->getValueAsListOfDefs("Defs");
+  ImplicitUses = R->getValueAsListOfDefs("Uses");
+
+  if (neverHasSideEffects + hasSideEffects > 1)
+    throw R->getName() + ": multiple conflicting side-effect flags set!";
+
+  // Parse Constraints.
+  ParseConstraints(R->getValueAsString("Constraints"), Operands);
+
+  // Parse the DisableEncoding field.
+  Operands.ProcessDisableEncoding(R->getValueAsString("DisableEncoding"));
+}
+
+/// HasOneImplicitDefWithKnownVT - If the instruction has at least one
+/// implicit def and it has a known VT, return the VT, otherwise return
+/// MVT::Other.
+MVT::SimpleValueType CodeGenInstruction::
+HasOneImplicitDefWithKnownVT(const CodeGenTarget &TargetInfo) const {
+  if (ImplicitDefs.empty()) return MVT::Other;
+
+  // Check to see if the first implicit def has a resolvable type.
+  Record *FirstImplicitDef = ImplicitDefs[0];
+  assert(FirstImplicitDef->isSubClassOf("Register"));
+  const std::vector<MVT::SimpleValueType> &RegVTs =
+    TargetInfo.getRegisterVTs(FirstImplicitDef);
+  if (RegVTs.size() == 1)
+    return RegVTs[0];
+  return MVT::Other;
+}
+
+
+/// FlattenAsmStringVariants - Flatten the specified AsmString to only
+/// include text from the specified variant, returning the new string.
+std::string CodeGenInstruction::
+FlattenAsmStringVariants(StringRef Cur, unsigned Variant) {
+  std::string Res = "";
+
+  for (;;) {
+    // Find the start of the next variant string.
+    size_t VariantsStart = 0;
+    for (size_t e = Cur.size(); VariantsStart != e; ++VariantsStart)
+      if (Cur[VariantsStart] == '{' &&
+          (VariantsStart == 0 || (Cur[VariantsStart-1] != '$' &&
+                                  Cur[VariantsStart-1] != '\\')))
+        break;
+
+    // Add the prefix to the result.
+    Res += Cur.slice(0, VariantsStart);
+    if (VariantsStart == Cur.size())
+      break;
+
+    ++VariantsStart; // Skip the '{'.
+
+    // Scan to the end of the variants string.
+    size_t VariantsEnd = VariantsStart;
+    unsigned NestedBraces = 1;
+    for (size_t e = Cur.size(); VariantsEnd != e; ++VariantsEnd) {
+      if (Cur[VariantsEnd] == '}' && Cur[VariantsEnd-1] != '\\') {
+        if (--NestedBraces == 0)
+          break;
+      } else if (Cur[VariantsEnd] == '{')
+        ++NestedBraces;
+    }
+
+    // Select the Nth variant (or empty).
+    StringRef Selection = Cur.slice(VariantsStart, VariantsEnd);
+    for (unsigned i = 0; i != Variant; ++i)
+      Selection = Selection.split('|').second;
+    Res += Selection.split('|').first;
+
+    assert(VariantsEnd != Cur.size() &&
+           "Unterminated variants in assembly string!");
+    Cur = Cur.substr(VariantsEnd + 1);
+  }
+
+  return Res;
+}
+
+
+//===----------------------------------------------------------------------===//
+/// CodeGenInstAlias Implementation
+//===----------------------------------------------------------------------===//
+
+/// tryAliasOpMatch - This is a helper function for the CodeGenInstAlias
+/// constructor.  It checks if an argument in an InstAlias pattern matches
+/// the corresponding operand of the instruction.  It returns true on a
+/// successful match, with ResOp set to the result operand to be used.
+bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
+                                       Record *InstOpRec, bool hasSubOps,
+                                       SMLoc Loc, CodeGenTarget &T,
+                                       ResultOperand &ResOp) {
+  Init *Arg = Result->getArg(AliasOpNo);
+  DefInit *ADI = dynamic_cast<DefInit*>(Arg);
+
+  if (ADI && ADI->getDef() == InstOpRec) {
+    // If the operand is a record, it must have a name, and the record type
+    // must match up with the instruction's argument type.
+    if (Result->getArgName(AliasOpNo).empty())
+      throw TGError(Loc, "result argument #" + utostr(AliasOpNo) +
+                    " must have a name!");
+    ResOp = ResultOperand(Result->getArgName(AliasOpNo), ADI->getDef());
+    return true;
+  }
+
+  // Handle explicit registers.
+  if (ADI && ADI->getDef()->isSubClassOf("Register")) {
+    if (!InstOpRec->isSubClassOf("RegisterClass"))
+      return false;
+
+    if (!T.getRegisterClass(InstOpRec).containsRegister(ADI->getDef()))
+      throw TGError(Loc, "fixed register " +ADI->getDef()->getName()
+                    + " is not a member of the " + InstOpRec->getName() +
+                    " register class!");
+
+    if (!Result->getArgName(AliasOpNo).empty())
+      throw TGError(Loc, "result fixed register argument must "
+                    "not have a name!");
+
+    ResOp = ResultOperand(ADI->getDef());
+    return true;
+  }
+
+  // Handle "zero_reg" for optional def operands.
+  if (ADI && ADI->getDef()->getName() == "zero_reg") {
+
+    // Check if this is an optional def.
+    if (!InstOpRec->isSubClassOf("OptionalDefOperand"))
+      throw TGError(Loc, "reg0 used for result that is not an "
+                    "OptionalDefOperand!");
+
+    ResOp = ResultOperand(static_cast<Record*>(0));
+    return true;
+  }
+
+  if (IntInit *II = dynamic_cast<IntInit*>(Arg)) {
+    if (hasSubOps || !InstOpRec->isSubClassOf("Operand"))
+      return false;
+    // Integer arguments can't have names.
+    if (!Result->getArgName(AliasOpNo).empty())
+      throw TGError(Loc, "result argument #" + utostr(AliasOpNo) +
+                    " must not have a name!");
+    ResOp = ResultOperand(II->getValue());
+    return true;
+  }
+
+  return false;
+}
+
+CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T) : TheDef(R) {
+  AsmString = R->getValueAsString("AsmString");
+  Result = R->getValueAsDag("ResultInst");
+
+  // Verify that the root of the result is an instruction.
+  DefInit *DI = dynamic_cast<DefInit*>(Result->getOperator());
+  if (DI == 0 || !DI->getDef()->isSubClassOf("Instruction"))
+    throw TGError(R->getLoc(), "result of inst alias should be an instruction");
+
+  ResultInst = &T.getInstruction(DI->getDef());
+
+  // NameClass - If argument names are repeated, we need to verify they have
+  // the same class.
+  StringMap<Record*> NameClass;
+  for (unsigned i = 0, e = Result->getNumArgs(); i != e; ++i) {
+    DefInit *ADI = dynamic_cast<DefInit*>(Result->getArg(i));
+    if (!ADI || Result->getArgName(i).empty())
+      continue;
+    // Verify we don't have something like: (someinst GR16:$foo, GR32:$foo)
+    // $foo can exist multiple times in the result list, but it must have the
+    // same type.
+    Record *&Entry = NameClass[Result->getArgName(i)];
+    if (Entry && Entry != ADI->getDef())
+      throw TGError(R->getLoc(), "result value $" + Result->getArgName(i) +
+                    " is both " + Entry->getName() + " and " +
+                    ADI->getDef()->getName() + "!");
+    Entry = ADI->getDef();
+  }
+
+  // Decode and validate the arguments of the result.
+  unsigned AliasOpNo = 0;
+  for (unsigned i = 0, e = ResultInst->Operands.size(); i != e; ++i) {
+
+    // Tied registers don't have an entry in the result dag.
+    if (ResultInst->Operands[i].getTiedRegister() != -1)
+      continue;
+
+    if (AliasOpNo >= Result->getNumArgs())
+      throw TGError(R->getLoc(), "not enough arguments for instruction!");
+
+    Record *InstOpRec = ResultInst->Operands[i].Rec;
+    unsigned NumSubOps = ResultInst->Operands[i].MINumOperands;
+    ResultOperand ResOp(static_cast<int64_t>(0));
+    if (tryAliasOpMatch(Result, AliasOpNo, InstOpRec, (NumSubOps > 1),
+                        R->getLoc(), T, ResOp)) {
+      ResultOperands.push_back(ResOp);
+      ResultInstOperandIndex.push_back(std::make_pair(i, -1));
+      ++AliasOpNo;
+      continue;
+    }
+
+    // If the argument did not match the instruction operand, and the operand
+    // is composed of multiple suboperands, try matching the suboperands.
+    if (NumSubOps > 1) {
+      DagInit *MIOI = ResultInst->Operands[i].MIOperandInfo;
+      for (unsigned SubOp = 0; SubOp != NumSubOps; ++SubOp) {
+        if (AliasOpNo >= Result->getNumArgs())
+          throw TGError(R->getLoc(), "not enough arguments for instruction!");
+        Record *SubRec = dynamic_cast<DefInit*>(MIOI->getArg(SubOp))->getDef();
+        if (tryAliasOpMatch(Result, AliasOpNo, SubRec, false,
+                            R->getLoc(), T, ResOp)) {
+          ResultOperands.push_back(ResOp);
+          ResultInstOperandIndex.push_back(std::make_pair(i, SubOp));
+          ++AliasOpNo;
+        } else {
+          throw TGError(R->getLoc(), "result argument #" + utostr(AliasOpNo) +
+                        " does not match instruction operand class " +
+                        (SubOp == 0 ? InstOpRec->getName() :SubRec->getName()));
+        }
+      }
+      continue;
+    }
+    throw TGError(R->getLoc(), "result argument #" + utostr(AliasOpNo) +
+                  " does not match instruction operand class " +
+                  InstOpRec->getName());
+  }
+
+  if (AliasOpNo != Result->getNumArgs())
+    throw TGError(R->getLoc(), "too many operands for instruction!");
+}
diff --git a/final/utils/TableGen/CodeGenInstruction.h b/final/utils/TableGen/CodeGenInstruction.h
new file mode 100644
index 00000000000..58913b9da26
--- /dev/null
+++ b/final/utils/TableGen/CodeGenInstruction.h
@@ -0,0 +1,316 @@
+//===- CodeGenInstruction.h - Instruction Class Wrapper ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a wrapper class for the 'Instruction' TableGen class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_INSTRUCTION_H
+#define CODEGEN_INSTRUCTION_H
+
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/SourceMgr.h"
+#include <string>
+#include <vector>
+#include <utility>
+
+namespace llvm {
+  class Record;
+  class DagInit;
+  class CodeGenTarget;
+  class StringRef;
+  
+  class CGIOperandList {
+  public:
+    class ConstraintInfo {
+      enum { None, EarlyClobber, Tied } Kind;
+      unsigned OtherTiedOperand;
+    public:
+      ConstraintInfo() : Kind(None) {}
+      
+      static ConstraintInfo getEarlyClobber() {
+        ConstraintInfo I;
+        I.Kind = EarlyClobber;
+        I.OtherTiedOperand = 0;
+        return I;
+      }
+      
+      static ConstraintInfo getTied(unsigned Op) {
+        ConstraintInfo I;
+        I.Kind = Tied;
+        I.OtherTiedOperand = Op;
+        return I;
+      }
+      
+      bool isNone() const { return Kind == None; }
+      bool isEarlyClobber() const { return Kind == EarlyClobber; }
+      bool isTied() const { return Kind == Tied; }
+      
+      unsigned getTiedOperand() const {
+        assert(isTied());
+        return OtherTiedOperand;
+      }
+    };
+
+    /// OperandInfo - The information we keep track of for each operand in the
+    /// operand list for a tablegen instruction.
+    struct OperandInfo {
+      /// Rec - The definition this operand is declared as.
+      ///
+      Record *Rec;
+      
+      /// Name - If this operand was assigned a symbolic name, this is it,
+      /// otherwise, it's empty.
+      std::string Name;
+      
+      /// PrinterMethodName - The method used to print operands of this type in
+      /// the asmprinter.
+      std::string PrinterMethodName;
+      
+      /// EncoderMethodName - The method used to get the machine operand value
+      /// for binary encoding. "getMachineOpValue" by default.
+      std::string EncoderMethodName;
+      
+      /// MIOperandNo - Currently (this is meant to be phased out), some logical
+      /// operands correspond to multiple MachineInstr operands.  In the X86
+      /// target for example, one address operand is represented as 4
+      /// MachineOperands.  Because of this, the operand number in the
+      /// OperandList may not match the MachineInstr operand num.  Until it
+      /// does, this contains the MI operand index of this operand.
+      unsigned MIOperandNo;
+      unsigned MINumOperands;   // The number of operands.
+      
+      /// DoNotEncode - Bools are set to true in this vector for each operand in
+      /// the DisableEncoding list.  These should not be emitted by the code
+      /// emitter.
+      std::vector<bool> DoNotEncode;
+      
+      /// MIOperandInfo - Default MI operand type. Note an operand may be made
+      /// up of multiple MI operands.
+      DagInit *MIOperandInfo;
+      
+      /// Constraint info for this operand.  This operand can have pieces, so we
+      /// track constraint info for each.
+      std::vector<ConstraintInfo> Constraints;
+      
+      OperandInfo(Record *R, const std::string &N, const std::string &PMN,
+                  const std::string &EMN, unsigned MION, unsigned MINO,
+                  DagInit *MIOI)
+      : Rec(R), Name(N), PrinterMethodName(PMN), EncoderMethodName(EMN),
+        MIOperandNo(MION), MINumOperands(MINO), MIOperandInfo(MIOI) {}
+      
+      
+      /// getTiedOperand - If this operand is tied to another one, return the
+      /// other operand number.  Otherwise, return -1.
+      int getTiedRegister() const {
+        for (unsigned j = 0, e = Constraints.size(); j != e; ++j) {
+          const CGIOperandList::ConstraintInfo &CI = Constraints[j];
+          if (CI.isTied()) return CI.getTiedOperand();
+        }
+        return -1;
+      }
+    };
+    
+    CGIOperandList(Record *D);
+    
+    Record *TheDef;            // The actual record containing this OperandList.
+
+    /// NumDefs - Number of def operands declared, this is the number of
+    /// elements in the instruction's (outs) list.
+    ///
+    unsigned NumDefs;
+    
+    /// OperandList - The list of declared operands, along with their declared
+    /// type (which is a record).
+    std::vector<OperandInfo> OperandList;
+    
+    // Information gleaned from the operand list.
+    bool isPredicable;
+    bool hasOptionalDef;
+    bool isVariadic;
+    
+    // Provide transparent accessors to the operand list.
+    unsigned size() const { return OperandList.size(); }
+    const OperandInfo &operator[](unsigned i) const { return OperandList[i]; }
+    OperandInfo &operator[](unsigned i) { return OperandList[i]; }
+    OperandInfo &back() { return OperandList.back(); }
+    const OperandInfo &back() const { return OperandList.back(); }
+    
+    
+    /// getOperandNamed - Return the index of the operand with the specified
+    /// non-empty name.  If the instruction does not have an operand with the
+    /// specified name, throw an exception.
+    unsigned getOperandNamed(StringRef Name) const;
+    
+    /// hasOperandNamed - Query whether the instruction has an operand of the
+    /// given name. If so, return true and set OpIdx to the index of the
+    /// operand. Otherwise, return false.
+    bool hasOperandNamed(StringRef Name, unsigned &OpIdx) const;
+      
+    /// ParseOperandName - Parse an operand name like "$foo" or "$foo.bar",
+    /// where $foo is a whole operand and $foo.bar refers to a suboperand.
+    /// This throws an exception if the name is invalid.  If AllowWholeOp is
+    /// true, references to operands with suboperands are allowed, otherwise
+    /// not.
+    std::pair<unsigned,unsigned> ParseOperandName(const std::string &Op,
+                                                  bool AllowWholeOp = true);
+    
+    /// getFlattenedOperandNumber - Flatten a operand/suboperand pair into a
+    /// flat machineinstr operand #.
+    unsigned getFlattenedOperandNumber(std::pair<unsigned,unsigned> Op) const {
+      return OperandList[Op.first].MIOperandNo + Op.second;
+    }
+    
+    /// getSubOperandNumber - Unflatten a operand number into an
+    /// operand/suboperand pair.
+    std::pair<unsigned,unsigned> getSubOperandNumber(unsigned Op) const {
+      for (unsigned i = 0; ; ++i) {
+        assert(i < OperandList.size() && "Invalid flat operand #");
+        if (OperandList[i].MIOperandNo+OperandList[i].MINumOperands > Op)
+          return std::make_pair(i, Op-OperandList[i].MIOperandNo);
+      }
+    }
+    
+    
+    /// isFlatOperandNotEmitted - Return true if the specified flat operand #
+    /// should not be emitted with the code emitter.
+    bool isFlatOperandNotEmitted(unsigned FlatOpNo) const {
+      std::pair<unsigned,unsigned> Op = getSubOperandNumber(FlatOpNo);
+      if (OperandList[Op.first].DoNotEncode.size() > Op.second)
+        return OperandList[Op.first].DoNotEncode[Op.second];
+      return false;
+    }
+    
+    void ProcessDisableEncoding(std::string Value);
+  };
+  
+
+  class CodeGenInstruction {
+  public:
+    Record *TheDef;            // The actual record defining this instruction.
+    std::string Namespace;     // The namespace the instruction is in.
+
+    /// AsmString - The format string used to emit a .s file for the
+    /// instruction.
+    std::string AsmString;
+
+    /// Operands - This is information about the (ins) and (outs) list specified
+    /// to the instruction.
+    CGIOperandList Operands;
+
+    /// ImplicitDefs/ImplicitUses - These are lists of registers that are
+    /// implicitly defined and used by the instruction.
+    std::vector<Record*> ImplicitDefs, ImplicitUses;
+
+    // Various boolean values we track for the instruction.
+    bool isReturn;
+    bool isBranch;
+    bool isIndirectBranch;
+    bool isCompare;
+    bool isMoveImm;
+    bool isBarrier;
+    bool isCall;
+    bool canFoldAsLoad;
+    bool mayLoad, mayStore;
+    bool isPredicable;
+    bool isConvertibleToThreeAddress;
+    bool isCommutable;
+    bool isTerminator;
+    bool isReMaterializable;
+    bool hasDelaySlot;
+    bool usesCustomInserter;
+    bool hasCtrlDep;
+    bool isNotDuplicable;
+    bool hasSideEffects;
+    bool neverHasSideEffects;
+    bool isAsCheapAsAMove;
+    bool hasExtraSrcRegAllocReq;
+    bool hasExtraDefRegAllocReq;
+
+
+    CodeGenInstruction(Record *R);
+
+    /// HasOneImplicitDefWithKnownVT - If the instruction has at least one
+    /// implicit def and it has a known VT, return the VT, otherwise return
+    /// MVT::Other.
+    MVT::SimpleValueType
+      HasOneImplicitDefWithKnownVT(const CodeGenTarget &TargetInfo) const;
+    
+    
+    /// FlattenAsmStringVariants - Flatten the specified AsmString to only
+    /// include text from the specified variant, returning the new string.
+    static std::string FlattenAsmStringVariants(StringRef AsmString,
+                                                unsigned Variant);
+  };
+  
+  
+  /// CodeGenInstAlias - This represents an InstAlias definition.
+  class CodeGenInstAlias {
+  public:
+    Record *TheDef;            // The actual record defining this InstAlias.
+    
+    /// AsmString - The format string used to emit a .s file for the
+    /// instruction.
+    std::string AsmString;
+    
+    /// Result - The result instruction.
+    DagInit *Result;
+    
+    /// ResultInst - The instruction generated by the alias (decoded from
+    /// Result).
+    CodeGenInstruction *ResultInst;
+    
+    
+    struct ResultOperand {
+    private:
+      StringRef Name;
+      Record *R;
+      
+      int64_t Imm;
+    public:      
+      enum {
+        K_Record,
+        K_Imm,
+        K_Reg
+      } Kind;
+      
+      ResultOperand(StringRef N, Record *r) : Name(N), R(r), Kind(K_Record) {}
+      ResultOperand(int64_t I) : Imm(I), Kind(K_Imm) {}
+      ResultOperand(Record *r) : R(r), Kind(K_Reg) {}
+
+      bool isRecord() const { return Kind == K_Record; }
+      bool isImm() const { return Kind == K_Imm; }
+      bool isReg() const { return Kind == K_Reg; }
+      
+      StringRef getName() const { assert(isRecord()); return Name; }
+      Record *getRecord() const { assert(isRecord()); return R; }
+      int64_t getImm() const { assert(isImm()); return Imm; }
+      Record *getRegister() const { assert(isReg()); return R; }
+    };
+    
+    /// ResultOperands - The decoded operands for the result instruction.
+    std::vector<ResultOperand> ResultOperands;
+
+    /// ResultInstOperandIndex - For each operand, this vector holds a pair of
+    /// indices to identify the corresponding operand in the result
+    /// instruction.  The first index specifies the operand and the second
+    /// index specifies the suboperand.  If there are no suboperands or if all
+    /// of them are matched by the operand, the second value should be -1.
+    std::vector<std::pair<unsigned, int> > ResultInstOperandIndex;
+    
+    CodeGenInstAlias(Record *R, CodeGenTarget &T);
+
+    bool tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
+                         Record *InstOpRec, bool hasSubOps, SMLoc Loc,
+                         CodeGenTarget &T, ResultOperand &ResOp);
+  };    
+}
+
+#endif
diff --git a/final/utils/TableGen/CodeGenIntrinsics.h b/final/utils/TableGen/CodeGenIntrinsics.h
new file mode 100644
index 00000000000..3208c0d628d
--- /dev/null
+++ b/final/utils/TableGen/CodeGenIntrinsics.h
@@ -0,0 +1,87 @@
+//===- CodeGenIntrinsic.h - Intrinsic Class Wrapper ------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a wrapper class for the 'Intrinsic' TableGen class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_INTRINSIC_H
+#define CODEGEN_INTRINSIC_H
+
+#include <string>
+#include <vector>
+#include "llvm/CodeGen/ValueTypes.h"
+
+namespace llvm {
+  class Record;
+  class RecordKeeper;
+  class CodeGenTarget;
+
+  struct CodeGenIntrinsic {
+    Record *TheDef;            // The actual record defining this intrinsic.
+    std::string Name;          // The name of the LLVM function "llvm.bswap.i32"
+    std::string EnumName;      // The name of the enum "bswap_i32"
+    std::string GCCBuiltinName;// Name of the corresponding GCC builtin, or "".
+    std::string TargetPrefix;  // Target prefix, e.g. "ppc" for t-s intrinsics.
+
+    /// IntrinsicSignature - This structure holds the return values and
+    /// parameter values of an intrinsic. If the number of return values is > 1,
+    /// then the intrinsic implicitly returns a first-class aggregate. The
+    /// numbering of the types starts at 0 with the first return value and
+    /// continues from there through the parameter list. This is useful for
+    /// "matching" types.
+    struct IntrinsicSignature {
+      /// RetVTs - The MVT::SimpleValueType for each return type. Note that this
+      /// list is only populated when in the context of a target .td file. When
+      /// building Intrinsics.td, this isn't available, because we don't know
+      /// the target pointer size.
+      std::vector<MVT::SimpleValueType> RetVTs;
+
+      /// RetTypeDefs - The records for each return type.
+      std::vector<Record*> RetTypeDefs;
+
+      /// ParamVTs - The MVT::SimpleValueType for each parameter type. Note that
+      /// this list is only populated when in the context of a target .td file.
+      /// When building Intrinsics.td, this isn't available, because we don't
+      /// know the target pointer size.
+      std::vector<MVT::SimpleValueType> ParamVTs;
+
+      /// ParamTypeDefs - The records for each parameter type.
+      std::vector<Record*> ParamTypeDefs;
+    };
+
+    IntrinsicSignature IS;
+
+    // Memory mod/ref behavior of this intrinsic.
+    enum {
+      NoMem, ReadArgMem, ReadMem, ReadWriteArgMem, ReadWriteMem
+    } ModRef;
+
+    /// This is set to true if the intrinsic is overloaded by its argument
+    /// types.
+    bool isOverloaded;
+
+    /// isCommutative - True if the intrinsic is commutative.
+    bool isCommutative;
+    
+    enum ArgAttribute {
+      NoCapture
+    };
+    std::vector<std::pair<unsigned, ArgAttribute> > ArgumentAttributes;
+
+    CodeGenIntrinsic(Record *R);
+  };
+
+  /// LoadIntrinsics - Read all of the intrinsics defined in the specified
+  /// .td file.
+  std::vector<CodeGenIntrinsic> LoadIntrinsics(const RecordKeeper &RC,
+                                               bool TargetOnly);
+}
+
+#endif
diff --git a/final/utils/TableGen/CodeGenRegisters.h b/final/utils/TableGen/CodeGenRegisters.h
new file mode 100644
index 00000000000..bbd0cefa580
--- /dev/null
+++ b/final/utils/TableGen/CodeGenRegisters.h
@@ -0,0 +1,101 @@
+//===- CodeGenRegisters.h - Register and RegisterClass Info -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines structures to encapsulate information gleaned from the
+// target register and register class definitions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_REGISTERS_H
+#define CODEGEN_REGISTERS_H
+
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/ADT/DenseMap.h"
+#include <string>
+#include <vector>
+#include <set>
+#include <cstdlib>
+
+namespace llvm {
+  class Record;
+
+  /// CodeGenRegister - Represents a register definition.
+  struct CodeGenRegister {
+    Record *TheDef;
+    const std::string &getName() const;
+    unsigned DeclaredSpillSize, DeclaredSpillAlignment;
+    CodeGenRegister(Record *R);
+  };
+
+
+  struct CodeGenRegisterClass {
+    Record *TheDef;
+    std::string Namespace;
+    std::vector<Record*> Elements;
+    std::vector<MVT::SimpleValueType> VTs;
+    unsigned SpillSize;
+    unsigned SpillAlignment;
+    int CopyCost;
+    // Map SubRegIndex -> RegisterClass
+    DenseMap<Record*,Record*> SubRegClasses;
+    std::string MethodProtos, MethodBodies;
+
+    const std::string &getName() const;
+    const std::vector<MVT::SimpleValueType> &getValueTypes() const {return VTs;}
+    unsigned getNumValueTypes() const { return VTs.size(); }
+    
+    MVT::SimpleValueType getValueTypeNum(unsigned VTNum) const {
+      if (VTNum < VTs.size())
+        return VTs[VTNum];
+      assert(0 && "VTNum greater than number of ValueTypes in RegClass!");
+      abort();
+    }
+    
+    bool containsRegister(Record *R) const {
+      for (unsigned i = 0, e = Elements.size(); i != e; ++i)
+        if (Elements[i] == R) return true;
+      return false;
+    }
+    
+    // Returns true if RC is a strict subclass.
+    // RC is a sub-class of this class if it is a valid replacement for any
+    // instruction operand where a register of this classis required. It must 
+    // satisfy these conditions:
+    //
+    // 1. All RC registers are also in this.
+    // 2. The RC spill size must not be smaller than our spill size.
+    // 3. RC spill alignment must be compatible with ours.
+    //
+    bool hasSubClass(const CodeGenRegisterClass *RC) const {
+
+      if (RC->Elements.size() > Elements.size() ||
+          (SpillAlignment && RC->SpillAlignment % SpillAlignment) ||
+          SpillSize > RC->SpillSize)
+        return false;
+
+      std::set<Record*> RegSet;
+      for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+        Record *Reg = Elements[i];
+        RegSet.insert(Reg);
+      }
+
+      for (unsigned i = 0, e = RC->Elements.size(); i != e; ++i) {
+        Record *Reg = RC->Elements[i];
+        if (!RegSet.count(Reg))
+          return false;
+      }
+
+      return true;
+    }
+
+    CodeGenRegisterClass(Record *R);
+  };
+}
+
+#endif
diff --git a/final/utils/TableGen/CodeGenTarget.cpp b/final/utils/TableGen/CodeGenTarget.cpp
new file mode 100644
index 00000000000..d0f7d8b4407
--- /dev/null
+++ b/final/utils/TableGen/CodeGenTarget.cpp
@@ -0,0 +1,582 @@
+//===- CodeGenTarget.cpp - CodeGen Target Class Wrapper -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class wraps target description classes used by the various code
+// generation TableGen backends.  This makes it easier to access the data and
+// provides a single place that needs to check it for validity.  All of these
+// classes throw exceptions on error conditions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenTarget.h"
+#include "CodeGenIntrinsics.h"
+#include "Record.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/CommandLine.h"
+#include <algorithm>
+using namespace llvm;
+
+static cl::opt<unsigned>
+AsmParserNum("asmparsernum", cl::init(0),
+             cl::desc("Make -gen-asm-parser emit assembly parser #N"));
+
+static cl::opt<unsigned>
+AsmWriterNum("asmwriternum", cl::init(0),
+             cl::desc("Make -gen-asm-writer emit assembly writer #N"));
+
+/// getValueType - Return the MVT::SimpleValueType that the specified TableGen
+/// record corresponds to.
+MVT::SimpleValueType llvm::getValueType(Record *Rec) {
+  return (MVT::SimpleValueType)Rec->getValueAsInt("Value");
+}
+
+std::string llvm::getName(MVT::SimpleValueType T) {
+  switch (T) {
+  case MVT::Other:   return "UNKNOWN";
+  case MVT::iPTR:    return "TLI.getPointerTy()";
+  case MVT::iPTRAny: return "TLI.getPointerTy()";
+  default: return getEnumName(T);
+  }
+}
+
+std::string llvm::getEnumName(MVT::SimpleValueType T) {
+  switch (T) {
+  case MVT::Other:    return "MVT::Other";
+  case MVT::i1:       return "MVT::i1";
+  case MVT::i8:       return "MVT::i8";
+  case MVT::i16:      return "MVT::i16";
+  case MVT::i32:      return "MVT::i32";
+  case MVT::i64:      return "MVT::i64";
+  case MVT::i128:     return "MVT::i128";
+  case MVT::iAny:     return "MVT::iAny";
+  case MVT::fAny:     return "MVT::fAny";
+  case MVT::vAny:     return "MVT::vAny";
+  case MVT::f32:      return "MVT::f32";
+  case MVT::f64:      return "MVT::f64";
+  case MVT::f80:      return "MVT::f80";
+  case MVT::f128:     return "MVT::f128";
+  case MVT::ppcf128:  return "MVT::ppcf128";
+  case MVT::x86mmx:   return "MVT::x86mmx";
+  case MVT::Glue:     return "MVT::Glue";
+  case MVT::isVoid:   return "MVT::isVoid";
+  case MVT::v2i8:     return "MVT::v2i8";
+  case MVT::v4i8:     return "MVT::v4i8";
+  case MVT::v8i8:     return "MVT::v8i8";
+  case MVT::v16i8:    return "MVT::v16i8";
+  case MVT::v32i8:    return "MVT::v32i8";
+  case MVT::v2i16:    return "MVT::v2i16";
+  case MVT::v4i16:    return "MVT::v4i16";
+  case MVT::v8i16:    return "MVT::v8i16";
+  case MVT::v16i16:   return "MVT::v16i16";
+  case MVT::v2i32:    return "MVT::v2i32";
+  case MVT::v4i32:    return "MVT::v4i32";
+  case MVT::v8i32:    return "MVT::v8i32";
+  case MVT::v1i64:    return "MVT::v1i64";
+  case MVT::v2i64:    return "MVT::v2i64";
+  case MVT::v4i64:    return "MVT::v4i64";
+  case MVT::v8i64:    return "MVT::v8i64";
+  case MVT::v2f32:    return "MVT::v2f32";
+  case MVT::v4f32:    return "MVT::v4f32";
+  case MVT::v8f32:    return "MVT::v8f32";
+  case MVT::v2f64:    return "MVT::v2f64";
+  case MVT::v4f64:    return "MVT::v4f64";
+  case MVT::Metadata: return "MVT::Metadata";
+  case MVT::iPTR:     return "MVT::iPTR";
+  case MVT::iPTRAny:  return "MVT::iPTRAny";
+  default: assert(0 && "ILLEGAL VALUE TYPE!"); return "";
+  }
+}
+
+/// getQualifiedName - Return the name of the specified record, with a
+/// namespace qualifier if the record contains one.
+///
+std::string llvm::getQualifiedName(const Record *R) {
+  std::string Namespace = R->getValueAsString("Namespace");
+  if (Namespace.empty()) return R->getName();
+  return Namespace + "::" + R->getName();
+}
+
+
+
+
+/// getTarget - Return the current instance of the Target class.
+///
+CodeGenTarget::CodeGenTarget(RecordKeeper &records) : Records(records) {
+  std::vector<Record*> Targets = Records.getAllDerivedDefinitions("Target");
+  if (Targets.size() == 0)
+    throw std::string("ERROR: No 'Target' subclasses defined!");
+  if (Targets.size() != 1)
+    throw std::string("ERROR: Multiple subclasses of Target defined!");
+  TargetRec = Targets[0];
+}
+
+
+const std::string &CodeGenTarget::getName() const {
+  return TargetRec->getName();
+}
+
+std::string CodeGenTarget::getInstNamespace() const {
+  for (inst_iterator i = inst_begin(), e = inst_end(); i != e; ++i) {
+    // Make sure not to pick up "TargetOpcode" by accidentally getting
+    // the namespace off the PHI instruction or something.
+    if ((*i)->Namespace != "TargetOpcode")
+      return (*i)->Namespace;
+  }
+
+  return "";
+}
+
+Record *CodeGenTarget::getInstructionSet() const {
+  return TargetRec->getValueAsDef("InstructionSet");
+}
+
+
+/// getAsmParser - Return the AssemblyParser definition for this target.
+///
+Record *CodeGenTarget::getAsmParser() const {
+  std::vector<Record*> LI = TargetRec->getValueAsListOfDefs("AssemblyParsers");
+  if (AsmParserNum >= LI.size())
+    throw "Target does not have an AsmParser #" + utostr(AsmParserNum) + "!";
+  return LI[AsmParserNum];
+}
+
+/// getAsmWriter - Return the AssemblyWriter definition for this target.
+///
+Record *CodeGenTarget::getAsmWriter() const {
+  std::vector<Record*> LI = TargetRec->getValueAsListOfDefs("AssemblyWriters");
+  if (AsmWriterNum >= LI.size())
+    throw "Target does not have an AsmWriter #" + utostr(AsmWriterNum) + "!";
+  return LI[AsmWriterNum];
+}
+
+void CodeGenTarget::ReadRegisters() const {
+  std::vector<Record*> Regs = Records.getAllDerivedDefinitions("Register");
+  if (Regs.empty())
+    throw std::string("No 'Register' subclasses defined!");
+  std::sort(Regs.begin(), Regs.end(), LessRecord());
+
+  Registers.reserve(Regs.size());
+  Registers.assign(Regs.begin(), Regs.end());
+}
+
+CodeGenRegister::CodeGenRegister(Record *R) : TheDef(R) {
+  DeclaredSpillSize = R->getValueAsInt("SpillSize");
+  DeclaredSpillAlignment = R->getValueAsInt("SpillAlignment");
+}
+
+const std::string &CodeGenRegister::getName() const {
+  return TheDef->getName();
+}
+
+void CodeGenTarget::ReadSubRegIndices() const {
+  SubRegIndices = Records.getAllDerivedDefinitions("SubRegIndex");
+  std::sort(SubRegIndices.begin(), SubRegIndices.end(), LessRecord());
+}
+
+void CodeGenTarget::ReadRegisterClasses() const {
+  std::vector<Record*> RegClasses =
+    Records.getAllDerivedDefinitions("RegisterClass");
+  if (RegClasses.empty())
+    throw std::string("No 'RegisterClass' subclasses defined!");
+
+  RegisterClasses.reserve(RegClasses.size());
+  RegisterClasses.assign(RegClasses.begin(), RegClasses.end());
+}
+
+/// getRegisterByName - If there is a register with the specific AsmName,
+/// return it.
+const CodeGenRegister *CodeGenTarget::getRegisterByName(StringRef Name) const {
+  const std::vector<CodeGenRegister> &Regs = getRegisters();
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    const CodeGenRegister &Reg = Regs[i];
+    if (Reg.TheDef->getValueAsString("AsmName") == Name)
+      return &Reg;
+  }
+  
+  return 0;
+}
+
+std::vector<MVT::SimpleValueType> CodeGenTarget::
+getRegisterVTs(Record *R) const {
+  std::vector<MVT::SimpleValueType> Result;
+  const std::vector<CodeGenRegisterClass> &RCs = getRegisterClasses();
+  for (unsigned i = 0, e = RCs.size(); i != e; ++i) {
+    const CodeGenRegisterClass &RC = RegisterClasses[i];
+    for (unsigned ei = 0, ee = RC.Elements.size(); ei != ee; ++ei) {
+      if (R == RC.Elements[ei]) {
+        const std::vector<MVT::SimpleValueType> &InVTs = RC.getValueTypes();
+        Result.insert(Result.end(), InVTs.begin(), InVTs.end());
+      }
+    }
+  }
+  
+  // Remove duplicates.
+  array_pod_sort(Result.begin(), Result.end());
+  Result.erase(std::unique(Result.begin(), Result.end()), Result.end());
+  return Result;
+}
+
+
+CodeGenRegisterClass::CodeGenRegisterClass(Record *R) : TheDef(R) {
+  // Rename anonymous register classes.
+  if (R->getName().size() > 9 && R->getName()[9] == '.') {
+    static unsigned AnonCounter = 0;
+    R->setName("AnonRegClass_"+utostr(AnonCounter++));
+  } 
+  
+  std::vector<Record*> TypeList = R->getValueAsListOfDefs("RegTypes");
+  for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
+    Record *Type = TypeList[i];
+    if (!Type->isSubClassOf("ValueType"))
+      throw "RegTypes list member '" + Type->getName() +
+        "' does not derive from the ValueType class!";
+    VTs.push_back(getValueType(Type));
+  }
+  assert(!VTs.empty() && "RegisterClass must contain at least one ValueType!");
+  
+  std::vector<Record*> RegList = R->getValueAsListOfDefs("MemberList");
+  for (unsigned i = 0, e = RegList.size(); i != e; ++i) {
+    Record *Reg = RegList[i];
+    if (!Reg->isSubClassOf("Register"))
+      throw "Register Class member '" + Reg->getName() +
+            "' does not derive from the Register class!";
+    Elements.push_back(Reg);
+  }
+
+  // SubRegClasses is a list<dag> containing (RC, subregindex, ...) dags.
+  ListInit *SRC = R->getValueAsListInit("SubRegClasses");
+  for (ListInit::const_iterator i = SRC->begin(), e = SRC->end(); i != e; ++i) {
+    DagInit *DAG = dynamic_cast<DagInit*>(*i);
+    if (!DAG) throw "SubRegClasses must contain DAGs";
+    DefInit *DAGOp = dynamic_cast<DefInit*>(DAG->getOperator());
+    Record *RCRec;
+    if (!DAGOp || !(RCRec = DAGOp->getDef())->isSubClassOf("RegisterClass"))
+      throw "Operator '" + DAG->getOperator()->getAsString() +
+        "' in SubRegClasses is not a RegisterClass";
+    // Iterate over args, all SubRegIndex instances.
+    for (DagInit::const_arg_iterator ai = DAG->arg_begin(), ae = DAG->arg_end();
+         ai != ae; ++ai) {
+      DefInit *Idx = dynamic_cast<DefInit*>(*ai);
+      Record *IdxRec;
+      if (!Idx || !(IdxRec = Idx->getDef())->isSubClassOf("SubRegIndex"))
+        throw "Argument '" + (*ai)->getAsString() +
+          "' in SubRegClasses is not a SubRegIndex";
+      if (!SubRegClasses.insert(std::make_pair(IdxRec, RCRec)).second)
+        throw "SubRegIndex '" + IdxRec->getName() + "' mentioned twice";
+    }
+  }
+
+  // Allow targets to override the size in bits of the RegisterClass.
+  unsigned Size = R->getValueAsInt("Size");
+
+  Namespace = R->getValueAsString("Namespace");
+  SpillSize = Size ? Size : EVT(VTs[0]).getSizeInBits();
+  SpillAlignment = R->getValueAsInt("Alignment");
+  CopyCost = R->getValueAsInt("CopyCost");
+  MethodBodies = R->getValueAsCode("MethodBodies");
+  MethodProtos = R->getValueAsCode("MethodProtos");
+}
+
+const std::string &CodeGenRegisterClass::getName() const {
+  return TheDef->getName();
+}
+
+void CodeGenTarget::ReadLegalValueTypes() const {
+  const std::vector<CodeGenRegisterClass> &RCs = getRegisterClasses();
+  for (unsigned i = 0, e = RCs.size(); i != e; ++i)
+    for (unsigned ri = 0, re = RCs[i].VTs.size(); ri != re; ++ri)
+      LegalValueTypes.push_back(RCs[i].VTs[ri]);
+  
+  // Remove duplicates.
+  std::sort(LegalValueTypes.begin(), LegalValueTypes.end());
+  LegalValueTypes.erase(std::unique(LegalValueTypes.begin(),
+                                    LegalValueTypes.end()),
+                        LegalValueTypes.end());
+}
+
+
+void CodeGenTarget::ReadInstructions() const {
+  std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
+  if (Insts.size() <= 2)
+    throw std::string("No 'Instruction' subclasses defined!");
+
+  // Parse the instructions defined in the .td file.
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i)
+    Instructions[Insts[i]] = new CodeGenInstruction(Insts[i]);
+}
+
+static const CodeGenInstruction *
+GetInstByName(const char *Name,
+              const DenseMap<const Record*, CodeGenInstruction*> &Insts, 
+              RecordKeeper &Records) {
+  const Record *Rec = Records.getDef(Name);
+  
+  DenseMap<const Record*, CodeGenInstruction*>::const_iterator
+    I = Insts.find(Rec);
+  if (Rec == 0 || I == Insts.end())
+    throw std::string("Could not find '") + Name + "' instruction!";
+  return I->second;
+}
+
+namespace {
+/// SortInstByName - Sorting predicate to sort instructions by name.
+///
+struct SortInstByName {
+  bool operator()(const CodeGenInstruction *Rec1,
+                  const CodeGenInstruction *Rec2) const {
+    return Rec1->TheDef->getName() < Rec2->TheDef->getName();
+  }
+};
+}
+
+/// getInstructionsByEnumValue - Return all of the instructions defined by the
+/// target, ordered by their enum value.
+void CodeGenTarget::ComputeInstrsByEnum() const {
+  // The ordering here must match the ordering in TargetOpcodes.h.
+  const char *const FixedInstrs[] = {
+    "PHI",
+    "INLINEASM",
+    "PROLOG_LABEL",
+    "EH_LABEL",
+    "GC_LABEL",
+    "KILL",
+    "EXTRACT_SUBREG",
+    "INSERT_SUBREG",
+    "IMPLICIT_DEF",
+    "SUBREG_TO_REG",
+    "COPY_TO_REGCLASS",
+    "DBG_VALUE",
+    "REG_SEQUENCE",
+    "COPY",
+    0
+  };
+  const DenseMap<const Record*, CodeGenInstruction*> &Insts = getInstructions();
+  for (const char *const *p = FixedInstrs; *p; ++p) {
+    const CodeGenInstruction *Instr = GetInstByName(*p, Insts, Records);
+    assert(Instr && "Missing target independent instruction");
+    assert(Instr->Namespace == "TargetOpcode" && "Bad namespace");
+    InstrsByEnum.push_back(Instr);
+  }
+  unsigned EndOfPredefines = InstrsByEnum.size();
+
+  for (DenseMap<const Record*, CodeGenInstruction*>::const_iterator
+       I = Insts.begin(), E = Insts.end(); I != E; ++I) {
+    const CodeGenInstruction *CGI = I->second;
+    if (CGI->Namespace != "TargetOpcode")
+      InstrsByEnum.push_back(CGI);
+  }
+
+  assert(InstrsByEnum.size() == Insts.size() && "Missing predefined instr");
+
+  // All of the instructions are now in random order based on the map iteration.
+  // Sort them by name.
+  std::sort(InstrsByEnum.begin()+EndOfPredefines, InstrsByEnum.end(),
+            SortInstByName());
+}
+
+
+/// isLittleEndianEncoding - Return whether this target encodes its instruction
+/// in little-endian format, i.e. bits laid out in the order [0..n]
+///
+bool CodeGenTarget::isLittleEndianEncoding() const {
+  return getInstructionSet()->getValueAsBit("isLittleEndianEncoding");
+}
+
+//===----------------------------------------------------------------------===//
+// ComplexPattern implementation
+//
+ComplexPattern::ComplexPattern(Record *R) {
+  Ty          = ::getValueType(R->getValueAsDef("Ty"));
+  NumOperands = R->getValueAsInt("NumOperands");
+  SelectFunc  = R->getValueAsString("SelectFunc");
+  RootNodes   = R->getValueAsListOfDefs("RootNodes");
+
+  // Parse the properties.
+  Properties = 0;
+  std::vector<Record*> PropList = R->getValueAsListOfDefs("Properties");
+  for (unsigned i = 0, e = PropList.size(); i != e; ++i)
+    if (PropList[i]->getName() == "SDNPHasChain") {
+      Properties |= 1 << SDNPHasChain;
+    } else if (PropList[i]->getName() == "SDNPOptInGlue") {
+      Properties |= 1 << SDNPOptInGlue;
+    } else if (PropList[i]->getName() == "SDNPMayStore") {
+      Properties |= 1 << SDNPMayStore;
+    } else if (PropList[i]->getName() == "SDNPMayLoad") {
+      Properties |= 1 << SDNPMayLoad;
+    } else if (PropList[i]->getName() == "SDNPSideEffect") {
+      Properties |= 1 << SDNPSideEffect;
+    } else if (PropList[i]->getName() == "SDNPMemOperand") {
+      Properties |= 1 << SDNPMemOperand;
+    } else if (PropList[i]->getName() == "SDNPVariadic") {
+      Properties |= 1 << SDNPVariadic;
+    } else if (PropList[i]->getName() == "SDNPWantRoot") {
+      Properties |= 1 << SDNPWantRoot;
+    } else if (PropList[i]->getName() == "SDNPWantParent") {
+      Properties |= 1 << SDNPWantParent;
+    } else {
+      errs() << "Unsupported SD Node property '" << PropList[i]->getName()
+             << "' on ComplexPattern '" << R->getName() << "'!\n";
+      exit(1);
+    }
+}
+
+//===----------------------------------------------------------------------===//
+// CodeGenIntrinsic Implementation
+//===----------------------------------------------------------------------===//
+
+std::vector<CodeGenIntrinsic> llvm::LoadIntrinsics(const RecordKeeper &RC,
+                                                   bool TargetOnly) {
+  std::vector<Record*> I = RC.getAllDerivedDefinitions("Intrinsic");
+  
+  std::vector<CodeGenIntrinsic> Result;
+
+  for (unsigned i = 0, e = I.size(); i != e; ++i) {
+    bool isTarget = I[i]->getValueAsBit("isTarget");
+    if (isTarget == TargetOnly)
+      Result.push_back(CodeGenIntrinsic(I[i]));
+  }
+  return Result;
+}
+
+CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
+  TheDef = R;
+  std::string DefName = R->getName();
+  ModRef = ReadWriteMem;
+  isOverloaded = false;
+  isCommutative = false;
+  
+  if (DefName.size() <= 4 || 
+      std::string(DefName.begin(), DefName.begin() + 4) != "int_")
+    throw "Intrinsic '" + DefName + "' does not start with 'int_'!";
+
+  EnumName = std::string(DefName.begin()+4, DefName.end());
+
+  if (R->getValue("GCCBuiltinName"))  // Ignore a missing GCCBuiltinName field.
+    GCCBuiltinName = R->getValueAsString("GCCBuiltinName");
+
+  TargetPrefix = R->getValueAsString("TargetPrefix");
+  Name = R->getValueAsString("LLVMName");
+
+  if (Name == "") {
+    // If an explicit name isn't specified, derive one from the DefName.
+    Name = "llvm.";
+
+    for (unsigned i = 0, e = EnumName.size(); i != e; ++i)
+      Name += (EnumName[i] == '_') ? '.' : EnumName[i];
+  } else {
+    // Verify it starts with "llvm.".
+    if (Name.size() <= 5 || 
+        std::string(Name.begin(), Name.begin() + 5) != "llvm.")
+      throw "Intrinsic '" + DefName + "'s name does not start with 'llvm.'!";
+  }
+  
+  // If TargetPrefix is specified, make sure that Name starts with
+  // "llvm.<targetprefix>.".
+  if (!TargetPrefix.empty()) {
+    if (Name.size() < 6+TargetPrefix.size() ||
+        std::string(Name.begin() + 5, Name.begin() + 6 + TargetPrefix.size())
+        != (TargetPrefix + "."))
+      throw "Intrinsic '" + DefName + "' does not start with 'llvm." +
+        TargetPrefix + ".'!";
+  }
+  
+  // Parse the list of return types.
+  std::vector<MVT::SimpleValueType> OverloadedVTs;
+  ListInit *TypeList = R->getValueAsListInit("RetTypes");
+  for (unsigned i = 0, e = TypeList->getSize(); i != e; ++i) {
+    Record *TyEl = TypeList->getElementAsRecord(i);
+    assert(TyEl->isSubClassOf("LLVMType") && "Expected a type!");
+    MVT::SimpleValueType VT;
+    if (TyEl->isSubClassOf("LLVMMatchType")) {
+      unsigned MatchTy = TyEl->getValueAsInt("Number");
+      assert(MatchTy < OverloadedVTs.size() &&
+             "Invalid matching number!");
+      VT = OverloadedVTs[MatchTy];
+      // It only makes sense to use the extended and truncated vector element
+      // variants with iAny types; otherwise, if the intrinsic is not
+      // overloaded, all the types can be specified directly.
+      assert(((!TyEl->isSubClassOf("LLVMExtendedElementVectorType") &&
+               !TyEl->isSubClassOf("LLVMTruncatedElementVectorType")) ||
+              VT == MVT::iAny || VT == MVT::vAny) &&
+             "Expected iAny or vAny type");
+    } else {
+      VT = getValueType(TyEl->getValueAsDef("VT"));
+    }
+    if (EVT(VT).isOverloaded()) {
+      OverloadedVTs.push_back(VT);
+      isOverloaded = true;
+    }
+
+    // Reject invalid types.
+    if (VT == MVT::isVoid)
+      throw "Intrinsic '" + DefName + " has void in result type list!";
+    
+    IS.RetVTs.push_back(VT);
+    IS.RetTypeDefs.push_back(TyEl);
+  }
+  
+  // Parse the list of parameter types.
+  TypeList = R->getValueAsListInit("ParamTypes");
+  for (unsigned i = 0, e = TypeList->getSize(); i != e; ++i) {
+    Record *TyEl = TypeList->getElementAsRecord(i);
+    assert(TyEl->isSubClassOf("LLVMType") && "Expected a type!");
+    MVT::SimpleValueType VT;
+    if (TyEl->isSubClassOf("LLVMMatchType")) {
+      unsigned MatchTy = TyEl->getValueAsInt("Number");
+      assert(MatchTy < OverloadedVTs.size() &&
+             "Invalid matching number!");
+      VT = OverloadedVTs[MatchTy];
+      // It only makes sense to use the extended and truncated vector element
+      // variants with iAny types; otherwise, if the intrinsic is not
+      // overloaded, all the types can be specified directly.
+      assert(((!TyEl->isSubClassOf("LLVMExtendedElementVectorType") &&
+               !TyEl->isSubClassOf("LLVMTruncatedElementVectorType")) ||
+              VT == MVT::iAny || VT == MVT::vAny) &&
+             "Expected iAny or vAny type");
+    } else
+      VT = getValueType(TyEl->getValueAsDef("VT"));
+    
+    if (EVT(VT).isOverloaded()) {
+      OverloadedVTs.push_back(VT);
+      isOverloaded = true;
+    }
+    
+    // Reject invalid types.
+    if (VT == MVT::isVoid && i != e-1 /*void at end means varargs*/)
+      throw "Intrinsic '" + DefName + " has void in result type list!";
+    
+    IS.ParamVTs.push_back(VT);
+    IS.ParamTypeDefs.push_back(TyEl);
+  }
+
+  // Parse the intrinsic properties.
+  ListInit *PropList = R->getValueAsListInit("Properties");
+  for (unsigned i = 0, e = PropList->getSize(); i != e; ++i) {
+    Record *Property = PropList->getElementAsRecord(i);
+    assert(Property->isSubClassOf("IntrinsicProperty") &&
+           "Expected a property!");
+    
+    if (Property->getName() == "IntrNoMem")
+      ModRef = NoMem;
+    else if (Property->getName() == "IntrReadArgMem")
+      ModRef = ReadArgMem;
+    else if (Property->getName() == "IntrReadMem")
+      ModRef = ReadMem;
+    else if (Property->getName() == "IntrReadWriteArgMem")
+      ModRef = ReadWriteArgMem;
+    else if (Property->getName() == "Commutative")
+      isCommutative = true;
+    else if (Property->isSubClassOf("NoCapture")) {
+      unsigned ArgNo = Property->getValueAsInt("ArgNo");
+      ArgumentAttributes.push_back(std::make_pair(ArgNo, NoCapture));
+    } else
+      assert(0 && "Unknown property!");
+  }
+}
diff --git a/final/utils/TableGen/CodeGenTarget.h b/final/utils/TableGen/CodeGenTarget.h
new file mode 100644
index 00000000000..f1058eb6318
--- /dev/null
+++ b/final/utils/TableGen/CodeGenTarget.h
@@ -0,0 +1,269 @@
+//===- CodeGenTarget.h - Target Class Wrapper -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines wrappers for the Target class and related global
+// functionality.  This makes it easier to access the data and provides a single
+// place that needs to check it for validity.  All of these classes throw
+// exceptions on error conditions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef CODEGEN_TARGET_H
+#define CODEGEN_TARGET_H
+
+#include "CodeGenRegisters.h"
+#include "CodeGenInstruction.h"
+#include "Record.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+namespace llvm {
+
+struct CodeGenRegister;
+class CodeGenTarget;
+
+// SelectionDAG node properties.
+//  SDNPMemOperand: indicates that a node touches memory and therefore must
+//                  have an associated memory operand that describes the access.
+enum SDNP {
+  SDNPCommutative, 
+  SDNPAssociative, 
+  SDNPHasChain,
+  SDNPOutGlue,
+  SDNPInGlue,
+  SDNPOptInGlue,
+  SDNPMayLoad,
+  SDNPMayStore,
+  SDNPSideEffect,
+  SDNPMemOperand,
+  SDNPVariadic,
+  SDNPWantRoot,
+  SDNPWantParent
+};
+
+/// getValueType - Return the MVT::SimpleValueType that the specified TableGen
+/// record corresponds to.
+MVT::SimpleValueType getValueType(Record *Rec);
+
+std::string getName(MVT::SimpleValueType T);
+std::string getEnumName(MVT::SimpleValueType T);
+
+/// getQualifiedName - Return the name of the specified record, with a
+/// namespace qualifier if the record contains one.
+std::string getQualifiedName(const Record *R);
+  
+/// CodeGenTarget - This class corresponds to the Target class in the .td files.
+///
+class CodeGenTarget {
+  RecordKeeper &Records;
+  Record *TargetRec;
+
+  mutable DenseMap<const Record*, CodeGenInstruction*> Instructions;
+  mutable std::vector<CodeGenRegister> Registers;
+  mutable std::vector<Record*> SubRegIndices;
+  mutable std::vector<CodeGenRegisterClass> RegisterClasses;
+  mutable std::vector<MVT::SimpleValueType> LegalValueTypes;
+  void ReadRegisters() const;
+  void ReadSubRegIndices() const;
+  void ReadRegisterClasses() const;
+  void ReadInstructions() const;
+  void ReadLegalValueTypes() const;
+  
+  mutable std::vector<const CodeGenInstruction*> InstrsByEnum;
+public:
+  CodeGenTarget(RecordKeeper &Records);
+
+  Record *getTargetRecord() const { return TargetRec; }
+  const std::string &getName() const;
+
+  /// getInstNamespace - Return the target-specific instruction namespace.
+  ///
+  std::string getInstNamespace() const;
+
+  /// getInstructionSet - Return the InstructionSet object.
+  ///
+  Record *getInstructionSet() const;
+
+  /// getAsmParser - Return the AssemblyParser definition for this target.
+  ///
+  Record *getAsmParser() const;
+
+  /// getAsmWriter - Return the AssemblyWriter definition for this target.
+  ///
+  Record *getAsmWriter() const;
+
+  const std::vector<CodeGenRegister> &getRegisters() const {
+    if (Registers.empty()) ReadRegisters();
+    return Registers;
+  }
+  
+  /// getRegisterByName - If there is a register with the specific AsmName,
+  /// return it.
+  const CodeGenRegister *getRegisterByName(StringRef Name) const;
+
+  const std::vector<Record*> &getSubRegIndices() const {
+    if (SubRegIndices.empty()) ReadSubRegIndices();
+    return SubRegIndices;
+  }
+
+  // Map a SubRegIndex Record to its number.
+  unsigned getSubRegIndexNo(Record *idx) const {
+    if (SubRegIndices.empty()) ReadSubRegIndices();
+    std::vector<Record*>::const_iterator i =
+      std::find(SubRegIndices.begin(), SubRegIndices.end(), idx);
+    assert(i != SubRegIndices.end() && "Not a SubRegIndex");
+    return (i - SubRegIndices.begin()) + 1;
+  }
+
+  const std::vector<CodeGenRegisterClass> &getRegisterClasses() const {
+    if (RegisterClasses.empty()) ReadRegisterClasses();
+    return RegisterClasses;
+  }
+
+  const CodeGenRegisterClass &getRegisterClass(Record *R) const {
+    const std::vector<CodeGenRegisterClass> &RC = getRegisterClasses();
+    for (unsigned i = 0, e = RC.size(); i != e; ++i)
+      if (RC[i].TheDef == R)
+        return RC[i];
+    assert(0 && "Didn't find the register class");
+    abort();
+  }
+  
+  /// getRegisterClassForRegister - Find the register class that contains the
+  /// specified physical register.  If the register is not in a register
+  /// class, return null. If the register is in multiple classes, and the
+  /// classes have a superset-subset relationship and the same set of
+  /// types, return the superclass.  Otherwise return null.
+  const CodeGenRegisterClass *getRegisterClassForRegister(Record *R) const {
+    const std::vector<CodeGenRegisterClass> &RCs = getRegisterClasses();
+    const CodeGenRegisterClass *FoundRC = 0;
+    for (unsigned i = 0, e = RCs.size(); i != e; ++i) {
+      const CodeGenRegisterClass &RC = RegisterClasses[i];
+      for (unsigned ei = 0, ee = RC.Elements.size(); ei != ee; ++ei) {
+        if (R != RC.Elements[ei])
+          continue;
+
+        // If a register's classes have different types, return null.
+        if (FoundRC && RC.getValueTypes() != FoundRC->getValueTypes())
+          return 0;
+
+        // If this is the first class that contains the register,
+        // make a note of it and go on to the next class.
+        if (!FoundRC) {
+          FoundRC = &RC;
+          break;
+        }
+
+        std::vector<Record *> Elements(RC.Elements);
+        std::vector<Record *> FoundElements(FoundRC->Elements);
+        std::sort(Elements.begin(), Elements.end());
+        std::sort(FoundElements.begin(), FoundElements.end());
+
+        // Check to see if the previously found class that contains
+        // the register is a subclass of the current class. If so,
+        // prefer the superclass.
+        if (std::includes(Elements.begin(), Elements.end(),
+                          FoundElements.begin(), FoundElements.end())) {
+          FoundRC = &RC;
+          break;
+        }
+
+        // Check to see if the previously found class that contains
+        // the register is a superclass of the current class. If so,
+        // prefer the superclass.
+        if (std::includes(FoundElements.begin(), FoundElements.end(),
+                          Elements.begin(), Elements.end()))
+          break;
+
+        // Multiple classes, and neither is a superclass of the other.
+        // Return null.
+        return 0;
+      }
+    }
+    return FoundRC;
+  }
+
+  /// getRegisterVTs - Find the union of all possible SimpleValueTypes for the
+  /// specified physical register.
+  std::vector<MVT::SimpleValueType> getRegisterVTs(Record *R) const;
+  
+  const std::vector<MVT::SimpleValueType> &getLegalValueTypes() const {
+    if (LegalValueTypes.empty()) ReadLegalValueTypes();
+    return LegalValueTypes;
+  }
+  
+  /// isLegalValueType - Return true if the specified value type is natively
+  /// supported by the target (i.e. there are registers that directly hold it).
+  bool isLegalValueType(MVT::SimpleValueType VT) const {
+    const std::vector<MVT::SimpleValueType> &LegalVTs = getLegalValueTypes();
+    for (unsigned i = 0, e = LegalVTs.size(); i != e; ++i)
+      if (LegalVTs[i] == VT) return true;
+    return false;    
+  }
+
+private:
+  DenseMap<const Record*, CodeGenInstruction*> &getInstructions() const {
+    if (Instructions.empty()) ReadInstructions();
+    return Instructions;
+  }
+public:
+  
+  CodeGenInstruction &getInstruction(const Record *InstRec) const {
+    if (Instructions.empty()) ReadInstructions();
+    DenseMap<const Record*, CodeGenInstruction*>::iterator I =
+      Instructions.find(InstRec);
+    assert(I != Instructions.end() && "Not an instruction");
+    return *I->second;
+  }
+
+  /// getInstructionsByEnumValue - Return all of the instructions defined by the
+  /// target, ordered by their enum value.
+  const std::vector<const CodeGenInstruction*> &
+  getInstructionsByEnumValue() const {
+    if (InstrsByEnum.empty()) ComputeInstrsByEnum();
+    return InstrsByEnum;
+  }
+
+  typedef std::vector<const CodeGenInstruction*>::const_iterator inst_iterator;
+  inst_iterator inst_begin() const{return getInstructionsByEnumValue().begin();}
+  inst_iterator inst_end() const { return getInstructionsByEnumValue().end(); }
+  
+  
+  /// isLittleEndianEncoding - are instruction bit patterns defined as  [0..n]?
+  ///
+  bool isLittleEndianEncoding() const;
+  
+private:
+  void ComputeInstrsByEnum() const;
+};
+
+/// ComplexPattern - ComplexPattern info, corresponding to the ComplexPattern
+/// tablegen class in TargetSelectionDAG.td
+class ComplexPattern {
+  MVT::SimpleValueType Ty;
+  unsigned NumOperands;
+  std::string SelectFunc;
+  std::vector<Record*> RootNodes;
+  unsigned Properties; // Node properties
+public:
+  ComplexPattern() : NumOperands(0) {}
+  ComplexPattern(Record *R);
+
+  MVT::SimpleValueType getValueType() const { return Ty; }
+  unsigned getNumOperands() const { return NumOperands; }
+  const std::string &getSelectFunc() const { return SelectFunc; }
+  const std::vector<Record*> &getRootNodes() const {
+    return RootNodes;
+  }
+  bool hasProperty(enum SDNP Prop) const { return Properties & (1 << Prop); }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/DAGISelEmitter.cpp b/final/utils/TableGen/DAGISelEmitter.cpp
new file mode 100644
index 00000000000..dee8f7731f5
--- /dev/null
+++ b/final/utils/TableGen/DAGISelEmitter.cpp
@@ -0,0 +1,159 @@
+//===- DAGISelEmitter.cpp - Generate an instruction selector --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits a DAG instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DAGISelEmitter.h"
+#include "DAGISelMatcher.h"
+#include "Record.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DAGISelEmitter Helper methods
+//
+
+/// getResultPatternCost - Compute the number of instructions for this pattern.
+/// This is a temporary hack.  We should really include the instruction
+/// latencies in this calculation.
+static unsigned getResultPatternCost(TreePatternNode *P,
+                                     CodeGenDAGPatterns &CGP) {
+  if (P->isLeaf()) return 0;
+
+  unsigned Cost = 0;
+  Record *Op = P->getOperator();
+  if (Op->isSubClassOf("Instruction")) {
+    Cost++;
+    CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(Op);
+    if (II.usesCustomInserter)
+      Cost += 10;
+  }
+  for (unsigned i = 0, e = P->getNumChildren(); i != e; ++i)
+    Cost += getResultPatternCost(P->getChild(i), CGP);
+  return Cost;
+}
+
+/// getResultPatternCodeSize - Compute the code size of instructions for this
+/// pattern.
+static unsigned getResultPatternSize(TreePatternNode *P,
+                                     CodeGenDAGPatterns &CGP) {
+  if (P->isLeaf()) return 0;
+
+  unsigned Cost = 0;
+  Record *Op = P->getOperator();
+  if (Op->isSubClassOf("Instruction")) {
+    Cost += Op->getValueAsInt("CodeSize");
+  }
+  for (unsigned i = 0, e = P->getNumChildren(); i != e; ++i)
+    Cost += getResultPatternSize(P->getChild(i), CGP);
+  return Cost;
+}
+
+namespace {
+// PatternSortingPredicate - return true if we prefer to match LHS before RHS.
+// In particular, we want to match maximal patterns first and lowest cost within
+// a particular complexity first.
+struct PatternSortingPredicate {
+  PatternSortingPredicate(CodeGenDAGPatterns &cgp) : CGP(cgp) {}
+  CodeGenDAGPatterns &CGP;
+
+  bool operator()(const PatternToMatch *LHS, const PatternToMatch *RHS) {
+    const TreePatternNode *LHSSrc = LHS->getSrcPattern();
+    const TreePatternNode *RHSSrc = RHS->getSrcPattern();
+
+    if (LHSSrc->getNumTypes() != 0 && RHSSrc->getNumTypes() != 0 &&
+        LHSSrc->getType(0) != RHSSrc->getType(0)) {
+      MVT::SimpleValueType V1 = LHSSrc->getType(0), V2 = RHSSrc->getType(0);
+      if (MVT(V1).isVector() != MVT(V2).isVector())
+        return MVT(V2).isVector();
+
+      if (MVT(V1).isFloatingPoint() != MVT(V2).isFloatingPoint())
+        return MVT(V2).isFloatingPoint();
+    }
+
+    // Otherwise, if the patterns might both match, sort based on complexity,
+    // which means that we prefer to match patterns that cover more nodes in the
+    // input over nodes that cover fewer.
+    unsigned LHSSize = LHS->getPatternComplexity(CGP);
+    unsigned RHSSize = RHS->getPatternComplexity(CGP);
+    if (LHSSize > RHSSize) return true;   // LHS -> bigger -> less cost
+    if (LHSSize < RHSSize) return false;
+
+    // If the patterns have equal complexity, compare generated instruction cost
+    unsigned LHSCost = getResultPatternCost(LHS->getDstPattern(), CGP);
+    unsigned RHSCost = getResultPatternCost(RHS->getDstPattern(), CGP);
+    if (LHSCost < RHSCost) return true;
+    if (LHSCost > RHSCost) return false;
+
+    unsigned LHSPatSize = getResultPatternSize(LHS->getDstPattern(), CGP);
+    unsigned RHSPatSize = getResultPatternSize(RHS->getDstPattern(), CGP);
+    if (LHSPatSize < RHSPatSize) return true;
+    if (LHSPatSize > RHSPatSize) return false;
+
+    // Sort based on the UID of the pattern, giving us a deterministic ordering
+    // if all other sorting conditions fail.
+    assert(LHS == RHS || LHS->ID != RHS->ID);
+    return LHS->ID < RHS->ID;
+  }
+};
+}
+
+
+void DAGISelEmitter::run(raw_ostream &OS) {
+  EmitSourceFileHeader("DAG Instruction Selector for the " +
+                       CGP.getTargetInfo().getName() + " target", OS);
+
+  OS << "// *** NOTE: This file is #included into the middle of the target\n"
+     << "// *** instruction selector class.  These functions are really "
+     << "methods.\n\n";
+
+  DEBUG(errs() << "\n\nALL PATTERNS TO MATCH:\n\n";
+        for (CodeGenDAGPatterns::ptm_iterator I = CGP.ptm_begin(),
+             E = CGP.ptm_end(); I != E; ++I) {
+          errs() << "PATTERN: ";   I->getSrcPattern()->dump();
+          errs() << "\nRESULT:  "; I->getDstPattern()->dump();
+          errs() << "\n";
+        });
+
+  // Add all the patterns to a temporary list so we can sort them.
+  std::vector<const PatternToMatch*> Patterns;
+  for (CodeGenDAGPatterns::ptm_iterator I = CGP.ptm_begin(), E = CGP.ptm_end();
+       I != E; ++I)
+    Patterns.push_back(&*I);
+
+  // We want to process the matches in order of minimal cost.  Sort the patterns
+  // so the least cost one is at the start.
+  std::sort(Patterns.begin(), Patterns.end(), PatternSortingPredicate(CGP));
+
+
+  // Convert each variant of each pattern into a Matcher.
+  std::vector<Matcher*> PatternMatchers;
+  for (unsigned i = 0, e = Patterns.size(); i != e; ++i) {
+    for (unsigned Variant = 0; ; ++Variant) {
+      if (Matcher *M = ConvertPatternToMatcher(*Patterns[i], Variant, CGP))
+        PatternMatchers.push_back(M);
+      else
+        break;
+    }
+  }
+
+  Matcher *TheMatcher = new ScopeMatcher(&PatternMatchers[0],
+                                         PatternMatchers.size());
+
+  CodeGenTarget Target(Records);
+  const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
+  bool useEmitRegister2 = Registers.size() > 255;
+
+  TheMatcher = OptimizeMatcher(TheMatcher, CGP);
+  //Matcher->dump();
+  EmitMatcherTable(TheMatcher, CGP, useEmitRegister2, OS);
+  delete TheMatcher;
+}
diff --git a/final/utils/TableGen/DAGISelEmitter.h b/final/utils/TableGen/DAGISelEmitter.h
new file mode 100644
index 00000000000..2117e65455a
--- /dev/null
+++ b/final/utils/TableGen/DAGISelEmitter.h
@@ -0,0 +1,38 @@
+//===- DAGISelEmitter.h - Generate an instruction selector ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits a DAG instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef DAGISEL_EMITTER_H
+#define DAGISEL_EMITTER_H
+
+#include "TableGenBackend.h"
+#include "CodeGenDAGPatterns.h"
+#include <set>
+
+namespace llvm {
+
+/// DAGISelEmitter - The top-level class which coordinates construction
+/// and emission of the instruction selector.
+///
+class DAGISelEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+  CodeGenDAGPatterns CGP;
+public:
+  explicit DAGISelEmitter(RecordKeeper &R) : Records(R), CGP(R) {}
+
+  // run - Output the isel, returning true on failure.
+  void run(raw_ostream &OS);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/DAGISelMatcher.cpp b/final/utils/TableGen/DAGISelMatcher.cpp
new file mode 100644
index 00000000000..2afa2b907bc
--- /dev/null
+++ b/final/utils/TableGen/DAGISelMatcher.cpp
@@ -0,0 +1,401 @@
+//===- DAGISelMatcher.cpp - Representation of DAG pattern matcher ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DAGISelMatcher.h"
+#include "CodeGenDAGPatterns.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+void Matcher::dump() const {
+  print(errs(), 0);
+}
+
+void Matcher::print(raw_ostream &OS, unsigned indent) const {
+  printImpl(OS, indent);
+  if (Next)
+    return Next->print(OS, indent);
+}
+
+void Matcher::printOne(raw_ostream &OS) const {
+  printImpl(OS, 0);
+}
+
+/// unlinkNode - Unlink the specified node from this chain.  If Other == this,
+/// we unlink the next pointer and return it.  Otherwise we unlink Other from
+/// the list and return this.
+Matcher *Matcher::unlinkNode(Matcher *Other) {
+  if (this == Other)
+    return takeNext();
+
+  // Scan until we find the predecessor of Other.
+  Matcher *Cur = this;
+  for (; Cur && Cur->getNext() != Other; Cur = Cur->getNext())
+    /*empty*/;
+
+  if (Cur == 0) return 0;
+  Cur->takeNext();
+  Cur->setNext(Other->takeNext());
+  return this;
+}
+
+/// canMoveBefore - Return true if this matcher is the same as Other, or if
+/// we can move this matcher past all of the nodes in-between Other and this
+/// node.  Other must be equal to or before this.
+bool Matcher::canMoveBefore(const Matcher *Other) const {
+  for (;; Other = Other->getNext()) {
+    assert(Other && "Other didn't come before 'this'?");
+    if (this == Other) return true;
+
+    // We have to be able to move this node across the Other node.
+    if (!canMoveBeforeNode(Other))
+      return false;
+  }
+}
+
+/// canMoveBefore - Return true if it is safe to move the current matcher
+/// across the specified one.
+bool Matcher::canMoveBeforeNode(const Matcher *Other) const {
+  // We can move simple predicates before record nodes.
+  if (isSimplePredicateNode())
+    return Other->isSimplePredicateOrRecordNode();
+
+  // We can move record nodes across simple predicates.
+  if (isSimplePredicateOrRecordNode())
+    return isSimplePredicateNode();
+
+  // We can't move record nodes across each other etc.
+  return false;
+}
+
+
+ScopeMatcher::~ScopeMatcher() {
+  for (unsigned i = 0, e = Children.size(); i != e; ++i)
+    delete Children[i];
+}
+
+
+// printImpl methods.
+
+void ScopeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "Scope\n";
+  for (unsigned i = 0, e = getNumChildren(); i != e; ++i) {
+    if (getChild(i) == 0)
+      OS.indent(indent+1) << "NULL POINTER\n";
+    else
+      getChild(i)->print(OS, indent+2);
+  }
+}
+
+void RecordMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "Record\n";
+}
+
+void RecordChildMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "RecordChild: " << ChildNo << '\n';
+}
+
+void RecordMemRefMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "RecordMemRef\n";
+}
+
+void CaptureGlueInputMatcher::printImpl(raw_ostream &OS, unsigned indent) const{
+  OS.indent(indent) << "CaptureGlueInput\n";
+}
+
+void MoveChildMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "MoveChild " << ChildNo << '\n';
+}
+
+void MoveParentMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "MoveParent\n";
+}
+
+void CheckSameMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "CheckSame " << MatchNumber << '\n';
+}
+
+void CheckPatternPredicateMatcher::
+printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "CheckPatternPredicate " << Predicate << '\n';
+}
+
+void CheckPredicateMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "CheckPredicate " << PredName << '\n';
+}
+
+void CheckOpcodeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "CheckOpcode " << Opcode.getEnumName() << '\n';
+}
+
+void SwitchOpcodeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "SwitchOpcode: {\n";
+  for (unsigned i = 0, e = Cases.size(); i != e; ++i) {
+    OS.indent(indent) << "case " << Cases[i].first->getEnumName() << ":\n";
+    Cases[i].second->print(OS, indent+2);
+  }
+  OS.indent(indent) << "}\n";
+}
+
+
+void CheckTypeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "CheckType " << getEnumName(Type) << ", ResNo="
+    << ResNo << '\n';
+}
+
+void SwitchTypeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "SwitchType: {\n";
+  for (unsigned i = 0, e = Cases.size(); i != e; ++i) {
+    OS.indent(indent) << "case " << getEnumName(Cases[i].first) << ":\n";
+    Cases[i].second->print(OS, indent+2);
+  }
+  OS.indent(indent) << "}\n";
+}
+
+void CheckChildTypeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "CheckChildType " << ChildNo << " "
+    << getEnumName(Type) << '\n';
+}
+
+
+void CheckIntegerMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "CheckInteger " << Value << '\n';
+}
+
+void CheckCondCodeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "CheckCondCode ISD::" << CondCodeName << '\n';
+}
+
+void CheckValueTypeMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "CheckValueType MVT::" << TypeName << '\n';
+}
+
+void CheckComplexPatMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "CheckComplexPat " << Pattern.getSelectFunc() << '\n';
+}
+
+void CheckAndImmMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "CheckAndImm " << Value << '\n';
+}
+
+void CheckOrImmMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "CheckOrImm " << Value << '\n';
+}
+
+void CheckFoldableChainNodeMatcher::printImpl(raw_ostream &OS,
+                                              unsigned indent) const {
+  OS.indent(indent) << "CheckFoldableChainNode\n";
+}
+
+void EmitIntegerMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "EmitInteger " << Val << " VT=" << VT << '\n';
+}
+
+void EmitStringIntegerMatcher::
+printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "EmitStringInteger " << Val << " VT=" << VT << '\n';
+}
+
+void EmitRegisterMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "EmitRegister ";
+  if (Reg)
+    OS << Reg->getName();
+  else
+    OS << "zero_reg";
+  OS << " VT=" << VT << '\n';
+}
+
+void EmitConvertToTargetMatcher::
+printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "EmitConvertToTarget " << Slot << '\n';
+}
+
+void EmitMergeInputChainsMatcher::
+printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "EmitMergeInputChains <todo: args>\n";
+}
+
+void EmitCopyToRegMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "EmitCopyToReg <todo: args>\n";
+}
+
+void EmitNodeXFormMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "EmitNodeXForm " << NodeXForm->getName()
+     << " Slot=" << Slot << '\n';
+}
+
+
+void EmitNodeMatcherCommon::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent);
+  OS << (isa<MorphNodeToMatcher>(this) ? "MorphNodeTo: " : "EmitNode: ")
+     << OpcodeName << ": <todo flags> ";
+
+  for (unsigned i = 0, e = VTs.size(); i != e; ++i)
+    OS << ' ' << getEnumName(VTs[i]);
+  OS << '(';
+  for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+    OS << Operands[i] << ' ';
+  OS << ")\n";
+}
+
+void MarkGlueResultsMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "MarkGlueResults <todo: args>\n";
+}
+
+void CompleteMatchMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
+  OS.indent(indent) << "CompleteMatch <todo args>\n";
+  OS.indent(indent) << "Src = " << *Pattern.getSrcPattern() << "\n";
+  OS.indent(indent) << "Dst = " << *Pattern.getDstPattern() << "\n";
+}
+
+// getHashImpl Implementation.
+
+unsigned CheckPatternPredicateMatcher::getHashImpl() const {
+  return HashString(Predicate);
+}
+
+unsigned CheckPredicateMatcher::getHashImpl() const {
+  return HashString(PredName);
+}
+
+unsigned CheckOpcodeMatcher::getHashImpl() const {
+  return HashString(Opcode.getEnumName());
+}
+
+unsigned CheckCondCodeMatcher::getHashImpl() const {
+  return HashString(CondCodeName);
+}
+
+unsigned CheckValueTypeMatcher::getHashImpl() const {
+  return HashString(TypeName);
+}
+
+unsigned EmitStringIntegerMatcher::getHashImpl() const {
+  return HashString(Val) ^ VT;
+}
+
+template<typename It>
+static unsigned HashUnsigneds(It I, It E) {
+  unsigned Result = 0;
+  for (; I != E; ++I)
+    Result = (Result<<3) ^ *I;
+  return Result;
+}
+
+unsigned EmitMergeInputChainsMatcher::getHashImpl() const {
+  return HashUnsigneds(ChainNodes.begin(), ChainNodes.end());
+}
+
+bool CheckOpcodeMatcher::isEqualImpl(const Matcher *M) const {
+  // Note: pointer equality isn't enough here, we have to check the enum names
+  // to ensure that the nodes are for the same opcode.
+  return cast<CheckOpcodeMatcher>(M)->Opcode.getEnumName() ==
+          Opcode.getEnumName();
+}
+
+
+bool EmitNodeMatcherCommon::isEqualImpl(const Matcher *m) const {
+  const EmitNodeMatcherCommon *M = cast<EmitNodeMatcherCommon>(m);
+  return M->OpcodeName == OpcodeName && M->VTs == VTs &&
+         M->Operands == Operands && M->HasChain == HasChain &&
+         M->HasInGlue == HasInGlue && M->HasOutGlue == HasOutGlue &&
+         M->HasMemRefs == HasMemRefs &&
+         M->NumFixedArityOperands == NumFixedArityOperands;
+}
+
+unsigned EmitNodeMatcherCommon::getHashImpl() const {
+  return (HashString(OpcodeName) << 4) | Operands.size();
+}
+
+
+unsigned MarkGlueResultsMatcher::getHashImpl() const {
+  return HashUnsigneds(GlueResultNodes.begin(), GlueResultNodes.end());
+}
+
+unsigned CompleteMatchMatcher::getHashImpl() const {
+  return HashUnsigneds(Results.begin(), Results.end()) ^
+          ((unsigned)(intptr_t)&Pattern << 8);
+}
+
+// isContradictoryImpl Implementations.
+
+static bool TypesAreContradictory(MVT::SimpleValueType T1,
+                                  MVT::SimpleValueType T2) {
+  // If the two types are the same, then they are the same, so they don't
+  // contradict.
+  if (T1 == T2) return false;
+
+  // If either type is about iPtr, then they don't conflict unless the other
+  // one is not a scalar integer type.
+  if (T1 == MVT::iPTR)
+    return !MVT(T2).isInteger() || MVT(T2).isVector();
+
+  if (T2 == MVT::iPTR)
+    return !MVT(T1).isInteger() || MVT(T1).isVector();
+
+  // Otherwise, they are two different non-iPTR types, they conflict.
+  return true;
+}
+
+bool CheckOpcodeMatcher::isContradictoryImpl(const Matcher *M) const {
+  if (const CheckOpcodeMatcher *COM = dyn_cast<CheckOpcodeMatcher>(M)) {
+    // One node can't have two different opcodes!
+    // Note: pointer equality isn't enough here, we have to check the enum names
+    // to ensure that the nodes are for the same opcode.
+    return COM->getOpcode().getEnumName() != getOpcode().getEnumName();
+  }
+
+  // If the node has a known type, and if the type we're checking for is
+  // different, then we know they contradict.  For example, a check for
+  // ISD::STORE will never be true at the same time a check for Type i32 is.
+  if (const CheckTypeMatcher *CT = dyn_cast<CheckTypeMatcher>(M)) {
+    // If checking for a result the opcode doesn't have, it can't match.
+    if (CT->getResNo() >= getOpcode().getNumResults())
+      return true;
+
+    MVT::SimpleValueType NodeType = getOpcode().getKnownType(CT->getResNo());
+    if (NodeType != MVT::Other)
+      return TypesAreContradictory(NodeType, CT->getType());
+  }
+
+  return false;
+}
+
+bool CheckTypeMatcher::isContradictoryImpl(const Matcher *M) const {
+  if (const CheckTypeMatcher *CT = dyn_cast<CheckTypeMatcher>(M))
+    return TypesAreContradictory(getType(), CT->getType());
+  return false;
+}
+
+bool CheckChildTypeMatcher::isContradictoryImpl(const Matcher *M) const {
+  if (const CheckChildTypeMatcher *CC = dyn_cast<CheckChildTypeMatcher>(M)) {
+    // If the two checks are about different nodes, we don't know if they
+    // conflict!
+    if (CC->getChildNo() != getChildNo())
+      return false;
+
+    return TypesAreContradictory(getType(), CC->getType());
+  }
+  return false;
+}
+
+bool CheckIntegerMatcher::isContradictoryImpl(const Matcher *M) const {
+  if (const CheckIntegerMatcher *CIM = dyn_cast<CheckIntegerMatcher>(M))
+    return CIM->getValue() != getValue();
+  return false;
+}
+
+bool CheckValueTypeMatcher::isContradictoryImpl(const Matcher *M) const {
+  if (const CheckValueTypeMatcher *CVT = dyn_cast<CheckValueTypeMatcher>(M))
+    return CVT->getTypeName() != getTypeName();
+  return false;
+}
+
diff --git a/final/utils/TableGen/DAGISelMatcher.h b/final/utils/TableGen/DAGISelMatcher.h
new file mode 100644
index 00000000000..d17051c0b67
--- /dev/null
+++ b/final/utils/TableGen/DAGISelMatcher.h
@@ -0,0 +1,1114 @@
+//===- DAGISelMatcher.h - Representation of DAG pattern matcher -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TBLGEN_DAGISELMATCHER_H
+#define TBLGEN_DAGISELMATCHER_H
+
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Casting.h"
+
+namespace llvm {
+  class CodeGenDAGPatterns;
+  class Matcher;
+  class PatternToMatch;
+  class raw_ostream;
+  class ComplexPattern;
+  class Record;
+  class SDNodeInfo;
+
+Matcher *ConvertPatternToMatcher(const PatternToMatch &Pattern,unsigned Variant,
+                                 const CodeGenDAGPatterns &CGP);
+Matcher *OptimizeMatcher(Matcher *Matcher, const CodeGenDAGPatterns &CGP);
+void EmitMatcherTable(const Matcher *Matcher, const CodeGenDAGPatterns &CGP,
+                      bool useEmitRegister2, raw_ostream &OS);
+
+
+/// Matcher - Base class for all the the DAG ISel Matcher representation
+/// nodes.
+class Matcher {
+  // The next matcher node that is executed after this one.  Null if this is the
+  // last stage of a match.
+  OwningPtr<Matcher> Next;
+public:
+  enum KindTy {
+    // Matcher state manipulation.
+    Scope,                // Push a checking scope.
+    RecordNode,           // Record the current node.
+    RecordChild,          // Record a child of the current node.
+    RecordMemRef,         // Record the memref in the current node.
+    CaptureGlueInput,     // If the current node has an input glue, save it.
+    MoveChild,            // Move current node to specified child.
+    MoveParent,           // Move current node to parent.
+
+    // Predicate checking.
+    CheckSame,            // Fail if not same as prev match.
+    CheckPatternPredicate,
+    CheckPredicate,       // Fail if node predicate fails.
+    CheckOpcode,          // Fail if not opcode.
+    SwitchOpcode,         // Dispatch based on opcode.
+    CheckType,            // Fail if not correct type.
+    SwitchType,           // Dispatch based on type.
+    CheckChildType,       // Fail if child has wrong type.
+    CheckInteger,         // Fail if wrong val.
+    CheckCondCode,        // Fail if not condcode.
+    CheckValueType,
+    CheckComplexPat,
+    CheckAndImm,
+    CheckOrImm,
+    CheckFoldableChainNode,
+
+    // Node creation/emisssion.
+    EmitInteger,          // Create a TargetConstant
+    EmitStringInteger,    // Create a TargetConstant from a string.
+    EmitRegister,         // Create a register.
+    EmitConvertToTarget,  // Convert a imm/fpimm to target imm/fpimm
+    EmitMergeInputChains, // Merge together a chains for an input.
+    EmitCopyToReg,        // Emit a copytoreg into a physreg.
+    EmitNode,             // Create a DAG node
+    EmitNodeXForm,        // Run a SDNodeXForm
+    MarkGlueResults,      // Indicate which interior nodes have glue results.
+    CompleteMatch,        // Finish a match and update the results.
+    MorphNodeTo           // Build a node, finish a match and update results.
+  };
+  const KindTy Kind;
+
+protected:
+  Matcher(KindTy K) : Kind(K) {}
+public:
+  virtual ~Matcher() {}
+
+  KindTy getKind() const { return Kind; }
+
+  Matcher *getNext() { return Next.get(); }
+  const Matcher *getNext() const { return Next.get(); }
+  void setNext(Matcher *C) { Next.reset(C); }
+  Matcher *takeNext() { return Next.take(); }
+
+  OwningPtr<Matcher> &getNextPtr() { return Next; }
+
+  static inline bool classof(const Matcher *) { return true; }
+
+  bool isEqual(const Matcher *M) const {
+    if (getKind() != M->getKind()) return false;
+    return isEqualImpl(M);
+  }
+
+  unsigned getHash() const {
+    // Clear the high bit so we don't conflict with tombstones etc.
+    return ((getHashImpl() << 4) ^ getKind()) & (~0U>>1);
+  }
+
+  /// isSafeToReorderWithPatternPredicate - Return true if it is safe to sink a
+  /// PatternPredicate node past this one.
+  virtual bool isSafeToReorderWithPatternPredicate() const {
+    return false;
+  }
+
+  /// isSimplePredicateNode - Return true if this is a simple predicate that
+  /// operates on the node or its children without potential side effects or a
+  /// change of the current node.
+  bool isSimplePredicateNode() const {
+    switch (getKind()) {
+    default: return false;
+    case CheckSame:
+    case CheckPatternPredicate:
+    case CheckPredicate:
+    case CheckOpcode:
+    case CheckType:
+    case CheckChildType:
+    case CheckInteger:
+    case CheckCondCode:
+    case CheckValueType:
+    case CheckAndImm:
+    case CheckOrImm:
+    case CheckFoldableChainNode:
+      return true;
+    }
+  }
+
+  /// isSimplePredicateOrRecordNode - Return true if this is a record node or
+  /// a simple predicate.
+  bool isSimplePredicateOrRecordNode() const {
+    return isSimplePredicateNode() ||
+           getKind() == RecordNode || getKind() == RecordChild;
+  }
+
+  /// unlinkNode - Unlink the specified node from this chain.  If Other == this,
+  /// we unlink the next pointer and return it.  Otherwise we unlink Other from
+  /// the list and return this.
+  Matcher *unlinkNode(Matcher *Other);
+
+  /// canMoveBefore - Return true if this matcher is the same as Other, or if
+  /// we can move this matcher past all of the nodes in-between Other and this
+  /// node.  Other must be equal to or before this.
+  bool canMoveBefore(const Matcher *Other) const;
+
+  /// canMoveBefore - Return true if it is safe to move the current matcher
+  /// across the specified one.
+  bool canMoveBeforeNode(const Matcher *Other) const;
+
+  /// isContradictory - Return true of these two matchers could never match on
+  /// the same node.
+  bool isContradictory(const Matcher *Other) const {
+    // Since this predicate is reflexive, we canonicalize the ordering so that
+    // we always match a node against nodes with kinds that are greater or equal
+    // to them.  For example, we'll pass in a CheckType node as an argument to
+    // the CheckOpcode method, not the other way around.
+    if (getKind() < Other->getKind())
+      return isContradictoryImpl(Other);
+    return Other->isContradictoryImpl(this);
+  }
+
+  void print(raw_ostream &OS, unsigned indent = 0) const;
+  void printOne(raw_ostream &OS) const;
+  void dump() const;
+protected:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const = 0;
+  virtual bool isEqualImpl(const Matcher *M) const = 0;
+  virtual unsigned getHashImpl() const = 0;
+  virtual bool isContradictoryImpl(const Matcher *M) const { return false; }
+};
+
+/// ScopeMatcher - This attempts to match each of its children to find the first
+/// one that successfully matches.  If one child fails, it tries the next child.
+/// If none of the children match then this check fails.  It never has a 'next'.
+class ScopeMatcher : public Matcher {
+  SmallVector<Matcher*, 4> Children;
+public:
+  ScopeMatcher(Matcher *const *children, unsigned numchildren)
+    : Matcher(Scope), Children(children, children+numchildren) {
+  }
+  virtual ~ScopeMatcher();
+
+  unsigned getNumChildren() const { return Children.size(); }
+
+  Matcher *getChild(unsigned i) { return Children[i]; }
+  const Matcher *getChild(unsigned i) const { return Children[i]; }
+
+  void resetChild(unsigned i, Matcher *N) {
+    delete Children[i];
+    Children[i] = N;
+  }
+
+  Matcher *takeChild(unsigned i) {
+    Matcher *Res = Children[i];
+    Children[i] = 0;
+    return Res;
+  }
+
+  void setNumChildren(unsigned NC) {
+    if (NC < Children.size()) {
+      // delete any children we're about to lose pointers to.
+      for (unsigned i = NC, e = Children.size(); i != e; ++i)
+        delete Children[i];
+    }
+    Children.resize(NC);
+  }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == Scope;
+  }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const { return false; }
+  virtual unsigned getHashImpl() const { return 12312; }
+};
+
+/// RecordMatcher - Save the current node in the operand list.
+class RecordMatcher : public Matcher {
+  /// WhatFor - This is a string indicating why we're recording this.  This
+  /// should only be used for comment generation not anything semantic.
+  std::string WhatFor;
+
+  /// ResultNo - The slot number in the RecordedNodes vector that this will be,
+  /// just printed as a comment.
+  unsigned ResultNo;
+public:
+  RecordMatcher(const std::string &whatfor, unsigned resultNo)
+    : Matcher(RecordNode), WhatFor(whatfor), ResultNo(resultNo) {}
+
+  const std::string &getWhatFor() const { return WhatFor; }
+  unsigned getResultNo() const { return ResultNo; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == RecordNode;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const { return true; }
+  virtual unsigned getHashImpl() const { return 0; }
+};
+
+/// RecordChildMatcher - Save a numbered child of the current node, or fail
+/// the match if it doesn't exist.  This is logically equivalent to:
+///    MoveChild N + RecordNode + MoveParent.
+class RecordChildMatcher : public Matcher {
+  unsigned ChildNo;
+
+  /// WhatFor - This is a string indicating why we're recording this.  This
+  /// should only be used for comment generation not anything semantic.
+  std::string WhatFor;
+
+  /// ResultNo - The slot number in the RecordedNodes vector that this will be,
+  /// just printed as a comment.
+  unsigned ResultNo;
+public:
+  RecordChildMatcher(unsigned childno, const std::string &whatfor,
+                     unsigned resultNo)
+  : Matcher(RecordChild), ChildNo(childno), WhatFor(whatfor),
+    ResultNo(resultNo) {}
+
+  unsigned getChildNo() const { return ChildNo; }
+  const std::string &getWhatFor() const { return WhatFor; }
+  unsigned getResultNo() const { return ResultNo; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == RecordChild;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<RecordChildMatcher>(M)->getChildNo() == getChildNo();
+  }
+  virtual unsigned getHashImpl() const { return getChildNo(); }
+};
+
+/// RecordMemRefMatcher - Save the current node's memref.
+class RecordMemRefMatcher : public Matcher {
+public:
+  RecordMemRefMatcher() : Matcher(RecordMemRef) {}
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == RecordMemRef;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const { return true; }
+  virtual unsigned getHashImpl() const { return 0; }
+};
+
+
+/// CaptureGlueInputMatcher - If the current record has a glue input, record
+/// it so that it is used as an input to the generated code.
+class CaptureGlueInputMatcher : public Matcher {
+public:
+  CaptureGlueInputMatcher() : Matcher(CaptureGlueInput) {}
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == CaptureGlueInput;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const { return true; }
+  virtual unsigned getHashImpl() const { return 0; }
+};
+
+/// MoveChildMatcher - This tells the interpreter to move into the
+/// specified child node.
+class MoveChildMatcher : public Matcher {
+  unsigned ChildNo;
+public:
+  MoveChildMatcher(unsigned childNo) : Matcher(MoveChild), ChildNo(childNo) {}
+
+  unsigned getChildNo() const { return ChildNo; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == MoveChild;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<MoveChildMatcher>(M)->getChildNo() == getChildNo();
+  }
+  virtual unsigned getHashImpl() const { return getChildNo(); }
+};
+
+/// MoveParentMatcher - This tells the interpreter to move to the parent
+/// of the current node.
+class MoveParentMatcher : public Matcher {
+public:
+  MoveParentMatcher() : Matcher(MoveParent) {}
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == MoveParent;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const { return true; }
+  virtual unsigned getHashImpl() const { return 0; }
+};
+
+/// CheckSameMatcher - This checks to see if this node is exactly the same
+/// node as the specified match that was recorded with 'Record'.  This is used
+/// when patterns have the same name in them, like '(mul GPR:$in, GPR:$in)'.
+class CheckSameMatcher : public Matcher {
+  unsigned MatchNumber;
+public:
+  CheckSameMatcher(unsigned matchnumber)
+    : Matcher(CheckSame), MatchNumber(matchnumber) {}
+
+  unsigned getMatchNumber() const { return MatchNumber; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == CheckSame;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<CheckSameMatcher>(M)->getMatchNumber() == getMatchNumber();
+  }
+  virtual unsigned getHashImpl() const { return getMatchNumber(); }
+};
+
+/// CheckPatternPredicateMatcher - This checks the target-specific predicate
+/// to see if the entire pattern is capable of matching.  This predicate does
+/// not take a node as input.  This is used for subtarget feature checks etc.
+class CheckPatternPredicateMatcher : public Matcher {
+  std::string Predicate;
+public:
+  CheckPatternPredicateMatcher(StringRef predicate)
+    : Matcher(CheckPatternPredicate), Predicate(predicate) {}
+
+  StringRef getPredicate() const { return Predicate; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == CheckPatternPredicate;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<CheckPatternPredicateMatcher>(M)->getPredicate() == Predicate;
+  }
+  virtual unsigned getHashImpl() const;
+};
+
+/// CheckPredicateMatcher - This checks the target-specific predicate to
+/// see if the node is acceptable.
+class CheckPredicateMatcher : public Matcher {
+  StringRef PredName;
+public:
+  CheckPredicateMatcher(StringRef predname)
+    : Matcher(CheckPredicate), PredName(predname) {}
+
+  StringRef getPredicateName() const { return PredName; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == CheckPredicate;
+  }
+
+  // TODO: Ok?
+  //virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<CheckPredicateMatcher>(M)->PredName == PredName;
+  }
+  virtual unsigned getHashImpl() const;
+};
+
+
+/// CheckOpcodeMatcher - This checks to see if the current node has the
+/// specified opcode, if not it fails to match.
+class CheckOpcodeMatcher : public Matcher {
+  const SDNodeInfo &Opcode;
+public:
+  CheckOpcodeMatcher(const SDNodeInfo &opcode)
+    : Matcher(CheckOpcode), Opcode(opcode) {}
+
+  const SDNodeInfo &getOpcode() const { return Opcode; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == CheckOpcode;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const;
+  virtual unsigned getHashImpl() const;
+  virtual bool isContradictoryImpl(const Matcher *M) const;
+};
+
+/// SwitchOpcodeMatcher - Switch based on the current node's opcode, dispatching
+/// to one matcher per opcode.  If the opcode doesn't match any of the cases,
+/// then the match fails.  This is semantically equivalent to a Scope node where
+/// every child does a CheckOpcode, but is much faster.
+class SwitchOpcodeMatcher : public Matcher {
+  SmallVector<std::pair<const SDNodeInfo*, Matcher*>, 8> Cases;
+public:
+  SwitchOpcodeMatcher(const std::pair<const SDNodeInfo*, Matcher*> *cases,
+                      unsigned numcases)
+    : Matcher(SwitchOpcode), Cases(cases, cases+numcases) {}
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == SwitchOpcode;
+  }
+
+  unsigned getNumCases() const { return Cases.size(); }
+
+  const SDNodeInfo &getCaseOpcode(unsigned i) const { return *Cases[i].first; }
+  Matcher *getCaseMatcher(unsigned i) { return Cases[i].second; }
+  const Matcher *getCaseMatcher(unsigned i) const { return Cases[i].second; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const { return false; }
+  virtual unsigned getHashImpl() const { return 4123; }
+};
+
+/// CheckTypeMatcher - This checks to see if the current node has the
+/// specified type at the specified result, if not it fails to match.
+class CheckTypeMatcher : public Matcher {
+  MVT::SimpleValueType Type;
+  unsigned ResNo;
+public:
+  CheckTypeMatcher(MVT::SimpleValueType type, unsigned resno)
+    : Matcher(CheckType), Type(type), ResNo(resno) {}
+
+  MVT::SimpleValueType getType() const { return Type; }
+  unsigned getResNo() const { return ResNo; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == CheckType;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<CheckTypeMatcher>(M)->Type == Type;
+  }
+  virtual unsigned getHashImpl() const { return Type; }
+  virtual bool isContradictoryImpl(const Matcher *M) const;
+};
+
+/// SwitchTypeMatcher - Switch based on the current node's type, dispatching
+/// to one matcher per case.  If the type doesn't match any of the cases,
+/// then the match fails.  This is semantically equivalent to a Scope node where
+/// every child does a CheckType, but is much faster.
+class SwitchTypeMatcher : public Matcher {
+  SmallVector<std::pair<MVT::SimpleValueType, Matcher*>, 8> Cases;
+public:
+  SwitchTypeMatcher(const std::pair<MVT::SimpleValueType, Matcher*> *cases,
+                    unsigned numcases)
+  : Matcher(SwitchType), Cases(cases, cases+numcases) {}
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == SwitchType;
+  }
+
+  unsigned getNumCases() const { return Cases.size(); }
+
+  MVT::SimpleValueType getCaseType(unsigned i) const { return Cases[i].first; }
+  Matcher *getCaseMatcher(unsigned i) { return Cases[i].second; }
+  const Matcher *getCaseMatcher(unsigned i) const { return Cases[i].second; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const { return false; }
+  virtual unsigned getHashImpl() const { return 4123; }
+};
+
+
+/// CheckChildTypeMatcher - This checks to see if a child node has the
+/// specified type, if not it fails to match.
+class CheckChildTypeMatcher : public Matcher {
+  unsigned ChildNo;
+  MVT::SimpleValueType Type;
+public:
+  CheckChildTypeMatcher(unsigned childno, MVT::SimpleValueType type)
+    : Matcher(CheckChildType), ChildNo(childno), Type(type) {}
+
+  unsigned getChildNo() const { return ChildNo; }
+  MVT::SimpleValueType getType() const { return Type; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == CheckChildType;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<CheckChildTypeMatcher>(M)->ChildNo == ChildNo &&
+           cast<CheckChildTypeMatcher>(M)->Type == Type;
+  }
+  virtual unsigned getHashImpl() const { return (Type << 3) | ChildNo; }
+  virtual bool isContradictoryImpl(const Matcher *M) const;
+};
+
+
+/// CheckIntegerMatcher - This checks to see if the current node is a
+/// ConstantSDNode with the specified integer value, if not it fails to match.
+class CheckIntegerMatcher : public Matcher {
+  int64_t Value;
+public:
+  CheckIntegerMatcher(int64_t value)
+    : Matcher(CheckInteger), Value(value) {}
+
+  int64_t getValue() const { return Value; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == CheckInteger;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<CheckIntegerMatcher>(M)->Value == Value;
+  }
+  virtual unsigned getHashImpl() const { return Value; }
+  virtual bool isContradictoryImpl(const Matcher *M) const;
+};
+
+/// CheckCondCodeMatcher - This checks to see if the current node is a
+/// CondCodeSDNode with the specified condition, if not it fails to match.
+class CheckCondCodeMatcher : public Matcher {
+  StringRef CondCodeName;
+public:
+  CheckCondCodeMatcher(StringRef condcodename)
+    : Matcher(CheckCondCode), CondCodeName(condcodename) {}
+
+  StringRef getCondCodeName() const { return CondCodeName; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == CheckCondCode;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<CheckCondCodeMatcher>(M)->CondCodeName == CondCodeName;
+  }
+  virtual unsigned getHashImpl() const;
+};
+
+/// CheckValueTypeMatcher - This checks to see if the current node is a
+/// VTSDNode with the specified type, if not it fails to match.
+class CheckValueTypeMatcher : public Matcher {
+  StringRef TypeName;
+public:
+  CheckValueTypeMatcher(StringRef type_name)
+    : Matcher(CheckValueType), TypeName(type_name) {}
+
+  StringRef getTypeName() const { return TypeName; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == CheckValueType;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<CheckValueTypeMatcher>(M)->TypeName == TypeName;
+  }
+  virtual unsigned getHashImpl() const;
+  bool isContradictoryImpl(const Matcher *M) const;
+};
+
+
+
+/// CheckComplexPatMatcher - This node runs the specified ComplexPattern on
+/// the current node.
+class CheckComplexPatMatcher : public Matcher {
+  const ComplexPattern &Pattern;
+
+  /// MatchNumber - This is the recorded nodes slot that contains the node we
+  /// want to match against.
+  unsigned MatchNumber;
+
+  /// Name - The name of the node we're matching, for comment emission.
+  std::string Name;
+
+  /// FirstResult - This is the first slot in the RecordedNodes list that the
+  /// result of the match populates.
+  unsigned FirstResult;
+public:
+  CheckComplexPatMatcher(const ComplexPattern &pattern, unsigned matchnumber,
+                         const std::string &name, unsigned firstresult)
+    : Matcher(CheckComplexPat), Pattern(pattern), MatchNumber(matchnumber),
+      Name(name), FirstResult(firstresult) {}
+
+  const ComplexPattern &getPattern() const { return Pattern; }
+  unsigned getMatchNumber() const { return MatchNumber; }
+
+  const std::string getName() const { return Name; }
+  unsigned getFirstResult() const { return FirstResult; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == CheckComplexPat;
+  }
+
+  // Not safe to move a pattern predicate past a complex pattern.
+  virtual bool isSafeToReorderWithPatternPredicate() const { return false; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return &cast<CheckComplexPatMatcher>(M)->Pattern == &Pattern &&
+           cast<CheckComplexPatMatcher>(M)->MatchNumber == MatchNumber;
+  }
+  virtual unsigned getHashImpl() const {
+    return (unsigned)(intptr_t)&Pattern ^ MatchNumber;
+  }
+};
+
+/// CheckAndImmMatcher - This checks to see if the current node is an 'and'
+/// with something equivalent to the specified immediate.
+class CheckAndImmMatcher : public Matcher {
+  int64_t Value;
+public:
+  CheckAndImmMatcher(int64_t value)
+    : Matcher(CheckAndImm), Value(value) {}
+
+  int64_t getValue() const { return Value; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == CheckAndImm;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<CheckAndImmMatcher>(M)->Value == Value;
+  }
+  virtual unsigned getHashImpl() const { return Value; }
+};
+
+/// CheckOrImmMatcher - This checks to see if the current node is an 'and'
+/// with something equivalent to the specified immediate.
+class CheckOrImmMatcher : public Matcher {
+  int64_t Value;
+public:
+  CheckOrImmMatcher(int64_t value)
+    : Matcher(CheckOrImm), Value(value) {}
+
+  int64_t getValue() const { return Value; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == CheckOrImm;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<CheckOrImmMatcher>(M)->Value == Value;
+  }
+  virtual unsigned getHashImpl() const { return Value; }
+};
+
+/// CheckFoldableChainNodeMatcher - This checks to see if the current node
+/// (which defines a chain operand) is safe to fold into a larger pattern.
+class CheckFoldableChainNodeMatcher : public Matcher {
+public:
+  CheckFoldableChainNodeMatcher()
+    : Matcher(CheckFoldableChainNode) {}
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == CheckFoldableChainNode;
+  }
+
+  virtual bool isSafeToReorderWithPatternPredicate() const { return true; }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const { return true; }
+  virtual unsigned getHashImpl() const { return 0; }
+};
+
+/// EmitIntegerMatcher - This creates a new TargetConstant.
+class EmitIntegerMatcher : public Matcher {
+  int64_t Val;
+  MVT::SimpleValueType VT;
+public:
+  EmitIntegerMatcher(int64_t val, MVT::SimpleValueType vt)
+    : Matcher(EmitInteger), Val(val), VT(vt) {}
+
+  int64_t getValue() const { return Val; }
+  MVT::SimpleValueType getVT() const { return VT; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == EmitInteger;
+  }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<EmitIntegerMatcher>(M)->Val == Val &&
+           cast<EmitIntegerMatcher>(M)->VT == VT;
+  }
+  virtual unsigned getHashImpl() const { return (Val << 4) | VT; }
+};
+
+/// EmitStringIntegerMatcher - A target constant whose value is represented
+/// by a string.
+class EmitStringIntegerMatcher : public Matcher {
+  std::string Val;
+  MVT::SimpleValueType VT;
+public:
+  EmitStringIntegerMatcher(const std::string &val, MVT::SimpleValueType vt)
+    : Matcher(EmitStringInteger), Val(val), VT(vt) {}
+
+  const std::string &getValue() const { return Val; }
+  MVT::SimpleValueType getVT() const { return VT; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == EmitStringInteger;
+  }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<EmitStringIntegerMatcher>(M)->Val == Val &&
+           cast<EmitStringIntegerMatcher>(M)->VT == VT;
+  }
+  virtual unsigned getHashImpl() const;
+};
+
+/// EmitRegisterMatcher - This creates a new TargetConstant.
+class EmitRegisterMatcher : public Matcher {
+  /// Reg - The def for the register that we're emitting.  If this is null, then
+  /// this is a reference to zero_reg.
+  Record *Reg;
+  MVT::SimpleValueType VT;
+public:
+  EmitRegisterMatcher(Record *reg, MVT::SimpleValueType vt)
+    : Matcher(EmitRegister), Reg(reg), VT(vt) {}
+
+  Record *getReg() const { return Reg; }
+  MVT::SimpleValueType getVT() const { return VT; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == EmitRegister;
+  }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<EmitRegisterMatcher>(M)->Reg == Reg &&
+           cast<EmitRegisterMatcher>(M)->VT == VT;
+  }
+  virtual unsigned getHashImpl() const {
+    return ((unsigned)(intptr_t)Reg) << 4 | VT;
+  }
+};
+
+/// EmitConvertToTargetMatcher - Emit an operation that reads a specified
+/// recorded node and converts it from being a ISD::Constant to
+/// ISD::TargetConstant, likewise for ConstantFP.
+class EmitConvertToTargetMatcher : public Matcher {
+  unsigned Slot;
+public:
+  EmitConvertToTargetMatcher(unsigned slot)
+    : Matcher(EmitConvertToTarget), Slot(slot) {}
+
+  unsigned getSlot() const { return Slot; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == EmitConvertToTarget;
+  }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<EmitConvertToTargetMatcher>(M)->Slot == Slot;
+  }
+  virtual unsigned getHashImpl() const { return Slot; }
+};
+
+/// EmitMergeInputChainsMatcher - Emit a node that merges a list of input
+/// chains together with a token factor.  The list of nodes are the nodes in the
+/// matched pattern that have chain input/outputs.  This node adds all input
+/// chains of these nodes if they are not themselves a node in the pattern.
+class EmitMergeInputChainsMatcher : public Matcher {
+  SmallVector<unsigned, 3> ChainNodes;
+public:
+  EmitMergeInputChainsMatcher(const unsigned *nodes, unsigned NumNodes)
+    : Matcher(EmitMergeInputChains), ChainNodes(nodes, nodes+NumNodes) {}
+
+  unsigned getNumNodes() const { return ChainNodes.size(); }
+
+  unsigned getNode(unsigned i) const {
+    assert(i < ChainNodes.size());
+    return ChainNodes[i];
+  }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == EmitMergeInputChains;
+  }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<EmitMergeInputChainsMatcher>(M)->ChainNodes == ChainNodes;
+  }
+  virtual unsigned getHashImpl() const;
+};
+
+/// EmitCopyToRegMatcher - Emit a CopyToReg node from a value to a physreg,
+/// pushing the chain and glue results.
+///
+class EmitCopyToRegMatcher : public Matcher {
+  unsigned SrcSlot; // Value to copy into the physreg.
+  Record *DestPhysReg;
+public:
+  EmitCopyToRegMatcher(unsigned srcSlot, Record *destPhysReg)
+    : Matcher(EmitCopyToReg), SrcSlot(srcSlot), DestPhysReg(destPhysReg) {}
+
+  unsigned getSrcSlot() const { return SrcSlot; }
+  Record *getDestPhysReg() const { return DestPhysReg; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == EmitCopyToReg;
+  }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<EmitCopyToRegMatcher>(M)->SrcSlot == SrcSlot &&
+           cast<EmitCopyToRegMatcher>(M)->DestPhysReg == DestPhysReg;
+  }
+  virtual unsigned getHashImpl() const {
+    return SrcSlot ^ ((unsigned)(intptr_t)DestPhysReg << 4);
+  }
+};
+
+
+
+/// EmitNodeXFormMatcher - Emit an operation that runs an SDNodeXForm on a
+/// recorded node and records the result.
+class EmitNodeXFormMatcher : public Matcher {
+  unsigned Slot;
+  Record *NodeXForm;
+public:
+  EmitNodeXFormMatcher(unsigned slot, Record *nodeXForm)
+    : Matcher(EmitNodeXForm), Slot(slot), NodeXForm(nodeXForm) {}
+
+  unsigned getSlot() const { return Slot; }
+  Record *getNodeXForm() const { return NodeXForm; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == EmitNodeXForm;
+  }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<EmitNodeXFormMatcher>(M)->Slot == Slot &&
+           cast<EmitNodeXFormMatcher>(M)->NodeXForm == NodeXForm;
+  }
+  virtual unsigned getHashImpl() const {
+    return Slot ^ ((unsigned)(intptr_t)NodeXForm << 4);
+  }
+};
+
+/// EmitNodeMatcherCommon - Common class shared between EmitNode and
+/// MorphNodeTo.
+class EmitNodeMatcherCommon : public Matcher {
+  std::string OpcodeName;
+  const SmallVector<MVT::SimpleValueType, 3> VTs;
+  const SmallVector<unsigned, 6> Operands;
+  bool HasChain, HasInGlue, HasOutGlue, HasMemRefs;
+
+  /// NumFixedArityOperands - If this is a fixed arity node, this is set to -1.
+  /// If this is a varidic node, this is set to the number of fixed arity
+  /// operands in the root of the pattern.  The rest are appended to this node.
+  int NumFixedArityOperands;
+public:
+  EmitNodeMatcherCommon(const std::string &opcodeName,
+                        const MVT::SimpleValueType *vts, unsigned numvts,
+                        const unsigned *operands, unsigned numops,
+                        bool hasChain, bool hasInGlue, bool hasOutGlue,
+                        bool hasmemrefs,
+                        int numfixedarityoperands, bool isMorphNodeTo)
+    : Matcher(isMorphNodeTo ? MorphNodeTo : EmitNode), OpcodeName(opcodeName),
+      VTs(vts, vts+numvts), Operands(operands, operands+numops),
+      HasChain(hasChain), HasInGlue(hasInGlue), HasOutGlue(hasOutGlue),
+      HasMemRefs(hasmemrefs), NumFixedArityOperands(numfixedarityoperands) {}
+
+  const std::string &getOpcodeName() const { return OpcodeName; }
+
+  unsigned getNumVTs() const { return VTs.size(); }
+  MVT::SimpleValueType getVT(unsigned i) const {
+    assert(i < VTs.size());
+    return VTs[i];
+  }
+
+  unsigned getNumOperands() const { return Operands.size(); }
+  unsigned getOperand(unsigned i) const {
+    assert(i < Operands.size());
+    return Operands[i];
+  }
+
+  const SmallVectorImpl<MVT::SimpleValueType> &getVTList() const { return VTs; }
+  const SmallVectorImpl<unsigned> &getOperandList() const { return Operands; }
+
+
+  bool hasChain() const { return HasChain; }
+  bool hasInFlag() const { return HasInGlue; }
+  bool hasOutFlag() const { return HasOutGlue; }
+  bool hasMemRefs() const { return HasMemRefs; }
+  int getNumFixedArityOperands() const { return NumFixedArityOperands; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == EmitNode || N->getKind() == MorphNodeTo;
+  }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const;
+  virtual unsigned getHashImpl() const;
+};
+
+/// EmitNodeMatcher - This signals a successful match and generates a node.
+class EmitNodeMatcher : public EmitNodeMatcherCommon {
+  unsigned FirstResultSlot;
+public:
+  EmitNodeMatcher(const std::string &opcodeName,
+                  const MVT::SimpleValueType *vts, unsigned numvts,
+                  const unsigned *operands, unsigned numops,
+                  bool hasChain, bool hasInFlag, bool hasOutFlag,
+                  bool hasmemrefs,
+                  int numfixedarityoperands, unsigned firstresultslot)
+  : EmitNodeMatcherCommon(opcodeName, vts, numvts, operands, numops, hasChain,
+                          hasInFlag, hasOutFlag, hasmemrefs,
+                          numfixedarityoperands, false),
+    FirstResultSlot(firstresultslot) {}
+
+  unsigned getFirstResultSlot() const { return FirstResultSlot; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == EmitNode;
+  }
+
+};
+
+class MorphNodeToMatcher : public EmitNodeMatcherCommon {
+  const PatternToMatch &Pattern;
+public:
+  MorphNodeToMatcher(const std::string &opcodeName,
+                     const MVT::SimpleValueType *vts, unsigned numvts,
+                     const unsigned *operands, unsigned numops,
+                     bool hasChain, bool hasInFlag, bool hasOutFlag,
+                     bool hasmemrefs,
+                     int numfixedarityoperands, const PatternToMatch &pattern)
+    : EmitNodeMatcherCommon(opcodeName, vts, numvts, operands, numops, hasChain,
+                            hasInFlag, hasOutFlag, hasmemrefs,
+                            numfixedarityoperands, true),
+      Pattern(pattern) {
+  }
+
+  const PatternToMatch &getPattern() const { return Pattern; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == MorphNodeTo;
+  }
+};
+
+/// MarkGlueResultsMatcher - This node indicates which non-root nodes in the
+/// pattern produce glue.  This allows CompleteMatchMatcher to update them
+/// with the output glue of the resultant code.
+class MarkGlueResultsMatcher : public Matcher {
+  SmallVector<unsigned, 3> GlueResultNodes;
+public:
+  MarkGlueResultsMatcher(const unsigned *nodes, unsigned NumNodes)
+    : Matcher(MarkGlueResults), GlueResultNodes(nodes, nodes+NumNodes) {}
+
+  unsigned getNumNodes() const { return GlueResultNodes.size(); }
+
+  unsigned getNode(unsigned i) const {
+    assert(i < GlueResultNodes.size());
+    return GlueResultNodes[i];
+  }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == MarkGlueResults;
+  }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<MarkGlueResultsMatcher>(M)->GlueResultNodes == GlueResultNodes;
+  }
+  virtual unsigned getHashImpl() const;
+};
+
+/// CompleteMatchMatcher - Complete a match by replacing the results of the
+/// pattern with the newly generated nodes.  This also prints a comment
+/// indicating the source and dest patterns.
+class CompleteMatchMatcher : public Matcher {
+  SmallVector<unsigned, 2> Results;
+  const PatternToMatch &Pattern;
+public:
+  CompleteMatchMatcher(const unsigned *results, unsigned numresults,
+                       const PatternToMatch &pattern)
+  : Matcher(CompleteMatch), Results(results, results+numresults),
+    Pattern(pattern) {}
+
+  unsigned getNumResults() const { return Results.size(); }
+  unsigned getResult(unsigned R) const { return Results[R]; }
+  const PatternToMatch &getPattern() const { return Pattern; }
+
+  static inline bool classof(const Matcher *N) {
+    return N->getKind() == CompleteMatch;
+  }
+
+private:
+  virtual void printImpl(raw_ostream &OS, unsigned indent) const;
+  virtual bool isEqualImpl(const Matcher *M) const {
+    return cast<CompleteMatchMatcher>(M)->Results == Results &&
+          &cast<CompleteMatchMatcher>(M)->Pattern == &Pattern;
+  }
+  virtual unsigned getHashImpl() const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/utils/TableGen/DAGISelMatcherEmitter.cpp b/final/utils/TableGen/DAGISelMatcherEmitter.cpp
new file mode 100644
index 00000000000..3a71ddb712d
--- /dev/null
+++ b/final/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -0,0 +1,827 @@
+//===- DAGISelMatcherEmitter.cpp - Matcher Emitter ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to generate C++ code for a matcher.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DAGISelMatcher.h"
+#include "CodeGenDAGPatterns.h"
+#include "Record.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+enum {
+  CommentIndent = 30
+};
+
+// To reduce generated source code size.
+static cl::opt<bool>
+OmitComments("omit-comments", cl::desc("Do not generate comments"),
+             cl::init(false));
+
+namespace {
+class MatcherTableEmitter {
+  const CodeGenDAGPatterns &CGP;
+  StringMap<unsigned> NodePredicateMap, PatternPredicateMap;
+  std::vector<std::string> NodePredicates, PatternPredicates;
+
+  DenseMap<const ComplexPattern*, unsigned> ComplexPatternMap;
+  std::vector<const ComplexPattern*> ComplexPatterns;
+
+
+  DenseMap<Record*, unsigned> NodeXFormMap;
+  std::vector<Record*> NodeXForms;
+
+  bool useEmitRegister2;
+
+public:
+  MatcherTableEmitter(const CodeGenDAGPatterns &cgp, bool _useEmitRegister2)
+    : CGP(cgp), useEmitRegister2(_useEmitRegister2) {}
+
+  unsigned EmitMatcherList(const Matcher *N, unsigned Indent,
+                           unsigned StartIdx, formatted_raw_ostream &OS);
+
+  void EmitPredicateFunctions(formatted_raw_ostream &OS);
+
+  void EmitHistogram(const Matcher *N, formatted_raw_ostream &OS);
+private:
+  unsigned EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
+                       formatted_raw_ostream &OS);
+
+  unsigned getNodePredicate(StringRef PredName) {
+    unsigned &Entry = NodePredicateMap[PredName];
+    if (Entry == 0) {
+      NodePredicates.push_back(PredName.str());
+      Entry = NodePredicates.size();
+    }
+    return Entry-1;
+  }
+  unsigned getPatternPredicate(StringRef PredName) {
+    unsigned &Entry = PatternPredicateMap[PredName];
+    if (Entry == 0) {
+      PatternPredicates.push_back(PredName.str());
+      Entry = PatternPredicates.size();
+    }
+    return Entry-1;
+  }
+
+  unsigned getComplexPat(const ComplexPattern &P) {
+    unsigned &Entry = ComplexPatternMap[&P];
+    if (Entry == 0) {
+      ComplexPatterns.push_back(&P);
+      Entry = ComplexPatterns.size();
+    }
+    return Entry-1;
+  }
+
+  unsigned getNodeXFormID(Record *Rec) {
+    unsigned &Entry = NodeXFormMap[Rec];
+    if (Entry == 0) {
+      NodeXForms.push_back(Rec);
+      Entry = NodeXForms.size();
+    }
+    return Entry-1;
+  }
+
+};
+} // end anonymous namespace.
+
+static unsigned GetVBRSize(unsigned Val) {
+  if (Val <= 127) return 1;
+
+  unsigned NumBytes = 0;
+  while (Val >= 128) {
+    Val >>= 7;
+    ++NumBytes;
+  }
+  return NumBytes+1;
+}
+
+/// EmitVBRValue - Emit the specified value as a VBR, returning the number of
+/// bytes emitted.
+static uint64_t EmitVBRValue(uint64_t Val, raw_ostream &OS) {
+  if (Val <= 127) {
+    OS << Val << ", ";
+    return 1;
+  }
+
+  uint64_t InVal = Val;
+  unsigned NumBytes = 0;
+  while (Val >= 128) {
+    OS << (Val&127) << "|128,";
+    Val >>= 7;
+    ++NumBytes;
+  }
+  OS << Val;
+  if (!OmitComments)
+    OS << "/*" << InVal << "*/";
+  OS << ", ";
+  return NumBytes+1;
+}
+
+/// EmitMatcherOpcodes - Emit bytes for the specified matcher and return
+/// the number of bytes emitted.
+unsigned MatcherTableEmitter::
+EmitMatcher(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
+            formatted_raw_ostream &OS) {
+  OS.PadToColumn(Indent*2);
+
+  switch (N->getKind()) {
+  case Matcher::Scope: {
+    const ScopeMatcher *SM = cast<ScopeMatcher>(N);
+    assert(SM->getNext() == 0 && "Shouldn't have next after scope");
+
+    unsigned StartIdx = CurrentIdx;
+
+    // Emit all of the children.
+    for (unsigned i = 0, e = SM->getNumChildren(); i != e; ++i) {
+      if (i == 0) {
+        OS << "OPC_Scope, ";
+        ++CurrentIdx;
+      } else  {
+        if (!OmitComments) {
+          OS << "/*" << CurrentIdx << "*/";
+          OS.PadToColumn(Indent*2) << "/*Scope*/ ";
+        } else
+          OS.PadToColumn(Indent*2);
+      }
+
+      // We need to encode the child and the offset of the failure code before
+      // emitting either of them.  Handle this by buffering the output into a
+      // string while we get the size.  Unfortunately, the offset of the
+      // children depends on the VBR size of the child, so for large children we
+      // have to iterate a bit.
+      SmallString<128> TmpBuf;
+      unsigned ChildSize = 0;
+      unsigned VBRSize = 0;
+      do {
+        VBRSize = GetVBRSize(ChildSize);
+
+        TmpBuf.clear();
+        raw_svector_ostream OS(TmpBuf);
+        formatted_raw_ostream FOS(OS);
+        ChildSize = EmitMatcherList(SM->getChild(i), Indent+1,
+                                    CurrentIdx+VBRSize, FOS);
+      } while (GetVBRSize(ChildSize) != VBRSize);
+
+      assert(ChildSize != 0 && "Should not have a zero-sized child!");
+
+      CurrentIdx += EmitVBRValue(ChildSize, OS);
+      if (!OmitComments) {
+        OS << "/*->" << CurrentIdx+ChildSize << "*/";
+
+        if (i == 0)
+          OS.PadToColumn(CommentIndent) << "// " << SM->getNumChildren()
+            << " children in Scope";
+      }
+
+      OS << '\n' << TmpBuf.str();
+      CurrentIdx += ChildSize;
+    }
+
+    // Emit a zero as a sentinel indicating end of 'Scope'.
+    if (!OmitComments)
+      OS << "/*" << CurrentIdx << "*/";
+    OS.PadToColumn(Indent*2) << "0, ";
+    if (!OmitComments)
+      OS << "/*End of Scope*/";
+    OS << '\n';
+    return CurrentIdx - StartIdx + 1;
+  }
+
+  case Matcher::RecordNode:
+    OS << "OPC_RecordNode,";
+    if (!OmitComments)
+      OS.PadToColumn(CommentIndent) << "// #"
+        << cast<RecordMatcher>(N)->getResultNo() << " = "
+        << cast<RecordMatcher>(N)->getWhatFor();
+    OS << '\n';
+    return 1;
+
+  case Matcher::RecordChild:
+    OS << "OPC_RecordChild" << cast<RecordChildMatcher>(N)->getChildNo()
+       << ',';
+    if (!OmitComments)
+      OS.PadToColumn(CommentIndent) << "// #"
+        << cast<RecordChildMatcher>(N)->getResultNo() << " = "
+        << cast<RecordChildMatcher>(N)->getWhatFor();
+    OS << '\n';
+    return 1;
+
+  case Matcher::RecordMemRef:
+    OS << "OPC_RecordMemRef,\n";
+    return 1;
+
+  case Matcher::CaptureGlueInput:
+    OS << "OPC_CaptureGlueInput,\n";
+    return 1;
+
+  case Matcher::MoveChild:
+    OS << "OPC_MoveChild, " << cast<MoveChildMatcher>(N)->getChildNo() << ",\n";
+    return 2;
+
+  case Matcher::MoveParent:
+    OS << "OPC_MoveParent,\n";
+    return 1;
+
+  case Matcher::CheckSame:
+    OS << "OPC_CheckSame, "
+       << cast<CheckSameMatcher>(N)->getMatchNumber() << ",\n";
+    return 2;
+
+  case Matcher::CheckPatternPredicate: {
+    StringRef Pred = cast<CheckPatternPredicateMatcher>(N)->getPredicate();
+    OS << "OPC_CheckPatternPredicate, " << getPatternPredicate(Pred) << ',';
+    if (!OmitComments)
+      OS.PadToColumn(CommentIndent) << "// " << Pred;
+    OS << '\n';
+    return 2;
+  }
+  case Matcher::CheckPredicate: {
+    StringRef Pred = cast<CheckPredicateMatcher>(N)->getPredicateName();
+    OS << "OPC_CheckPredicate, " << getNodePredicate(Pred) << ',';
+    if (!OmitComments)
+      OS.PadToColumn(CommentIndent) << "// " << Pred;
+    OS << '\n';
+    return 2;
+  }
+
+  case Matcher::CheckOpcode:
+    OS << "OPC_CheckOpcode, TARGET_VAL("
+       << cast<CheckOpcodeMatcher>(N)->getOpcode().getEnumName() << "),\n";
+    return 3;
+
+  case Matcher::SwitchOpcode:
+  case Matcher::SwitchType: {
+    unsigned StartIdx = CurrentIdx;
+
+    unsigned NumCases;
+    if (const SwitchOpcodeMatcher *SOM = dyn_cast<SwitchOpcodeMatcher>(N)) {
+      OS << "OPC_SwitchOpcode ";
+      NumCases = SOM->getNumCases();
+    } else {
+      OS << "OPC_SwitchType ";
+      NumCases = cast<SwitchTypeMatcher>(N)->getNumCases();
+    }
+
+    if (!OmitComments)
+      OS << "/*" << NumCases << " cases */";
+    OS << ", ";
+    ++CurrentIdx;
+
+    // For each case we emit the size, then the opcode, then the matcher.
+    for (unsigned i = 0, e = NumCases; i != e; ++i) {
+      const Matcher *Child;
+      unsigned IdxSize;
+      if (const SwitchOpcodeMatcher *SOM = dyn_cast<SwitchOpcodeMatcher>(N)) {
+        Child = SOM->getCaseMatcher(i);
+        IdxSize = 2;  // size of opcode in table is 2 bytes.
+      } else {
+        Child = cast<SwitchTypeMatcher>(N)->getCaseMatcher(i);
+        IdxSize = 1;  // size of type in table is 1 byte.
+      }
+
+      // We need to encode the opcode and the offset of the case code before
+      // emitting the case code.  Handle this by buffering the output into a
+      // string while we get the size.  Unfortunately, the offset of the
+      // children depends on the VBR size of the child, so for large children we
+      // have to iterate a bit.
+      SmallString<128> TmpBuf;
+      unsigned ChildSize = 0;
+      unsigned VBRSize = 0;
+      do {
+        VBRSize = GetVBRSize(ChildSize);
+
+        TmpBuf.clear();
+        raw_svector_ostream OS(TmpBuf);
+        formatted_raw_ostream FOS(OS);
+        ChildSize = EmitMatcherList(Child, Indent+1, CurrentIdx+VBRSize+IdxSize,
+                                    FOS);
+      } while (GetVBRSize(ChildSize) != VBRSize);
+
+      assert(ChildSize != 0 && "Should not have a zero-sized child!");
+
+      if (i != 0) {
+        OS.PadToColumn(Indent*2);
+        if (!OmitComments)
+        OS << (isa<SwitchOpcodeMatcher>(N) ?
+                   "/*SwitchOpcode*/ " : "/*SwitchType*/ ");
+      }
+
+      // Emit the VBR.
+      CurrentIdx += EmitVBRValue(ChildSize, OS);
+
+      OS << ' ';
+      if (const SwitchOpcodeMatcher *SOM = dyn_cast<SwitchOpcodeMatcher>(N))
+        OS << "TARGET_VAL(" << SOM->getCaseOpcode(i).getEnumName() << "),";
+      else
+        OS << getEnumName(cast<SwitchTypeMatcher>(N)->getCaseType(i)) << ',';
+
+      CurrentIdx += IdxSize;
+
+      if (!OmitComments)
+        OS << "// ->" << CurrentIdx+ChildSize;
+      OS << '\n';
+      OS << TmpBuf.str();
+      CurrentIdx += ChildSize;
+    }
+
+    // Emit the final zero to terminate the switch.
+    OS.PadToColumn(Indent*2) << "0, ";
+    if (!OmitComments)
+      OS << (isa<SwitchOpcodeMatcher>(N) ?
+             "// EndSwitchOpcode" : "// EndSwitchType");
+
+    OS << '\n';
+    ++CurrentIdx;
+    return CurrentIdx-StartIdx;
+  }
+
+ case Matcher::CheckType:
+    assert(cast<CheckTypeMatcher>(N)->getResNo() == 0 &&
+           "FIXME: Add support for CheckType of resno != 0");
+    OS << "OPC_CheckType, "
+       << getEnumName(cast<CheckTypeMatcher>(N)->getType()) << ",\n";
+    return 2;
+
+  case Matcher::CheckChildType:
+    OS << "OPC_CheckChild"
+       << cast<CheckChildTypeMatcher>(N)->getChildNo() << "Type, "
+       << getEnumName(cast<CheckChildTypeMatcher>(N)->getType()) << ",\n";
+    return 2;
+
+  case Matcher::CheckInteger: {
+    OS << "OPC_CheckInteger, ";
+    unsigned Bytes=1+EmitVBRValue(cast<CheckIntegerMatcher>(N)->getValue(), OS);
+    OS << '\n';
+    return Bytes;
+  }
+  case Matcher::CheckCondCode:
+    OS << "OPC_CheckCondCode, ISD::"
+       << cast<CheckCondCodeMatcher>(N)->getCondCodeName() << ",\n";
+    return 2;
+
+  case Matcher::CheckValueType:
+    OS << "OPC_CheckValueType, MVT::"
+       << cast<CheckValueTypeMatcher>(N)->getTypeName() << ",\n";
+    return 2;
+
+  case Matcher::CheckComplexPat: {
+    const CheckComplexPatMatcher *CCPM = cast<CheckComplexPatMatcher>(N);
+    const ComplexPattern &Pattern = CCPM->getPattern();
+    OS << "OPC_CheckComplexPat, /*CP*/" << getComplexPat(Pattern) << ", /*#*/"
+       << CCPM->getMatchNumber() << ',';
+
+    if (!OmitComments) {
+      OS.PadToColumn(CommentIndent) << "// " << Pattern.getSelectFunc();
+      OS << ":$" << CCPM->getName();
+      for (unsigned i = 0, e = Pattern.getNumOperands(); i != e; ++i)
+        OS << " #" << CCPM->getFirstResult()+i;
+
+      if (Pattern.hasProperty(SDNPHasChain))
+        OS << " + chain result";
+    }
+    OS << '\n';
+    return 3;
+  }
+
+  case Matcher::CheckAndImm: {
+    OS << "OPC_CheckAndImm, ";
+    unsigned Bytes=1+EmitVBRValue(cast<CheckAndImmMatcher>(N)->getValue(), OS);
+    OS << '\n';
+    return Bytes;
+  }
+
+  case Matcher::CheckOrImm: {
+    OS << "OPC_CheckOrImm, ";
+    unsigned Bytes = 1+EmitVBRValue(cast<CheckOrImmMatcher>(N)->getValue(), OS);
+    OS << '\n';
+    return Bytes;
+  }
+
+  case Matcher::CheckFoldableChainNode:
+    OS << "OPC_CheckFoldableChainNode,\n";
+    return 1;
+
+  case Matcher::EmitInteger: {
+    int64_t Val = cast<EmitIntegerMatcher>(N)->getValue();
+    OS << "OPC_EmitInteger, "
+       << getEnumName(cast<EmitIntegerMatcher>(N)->getVT()) << ", ";
+    unsigned Bytes = 2+EmitVBRValue(Val, OS);
+    OS << '\n';
+    return Bytes;
+  }
+  case Matcher::EmitStringInteger: {
+    const std::string &Val = cast<EmitStringIntegerMatcher>(N)->getValue();
+    // These should always fit into one byte.
+    OS << "OPC_EmitInteger, "
+      << getEnumName(cast<EmitStringIntegerMatcher>(N)->getVT()) << ", "
+      << Val << ",\n";
+    return 3;
+  }
+
+  case Matcher::EmitRegister:
+    if (useEmitRegister2) {
+      OS << "OPC_EmitRegister2, "
+        << getEnumName(cast<EmitRegisterMatcher>(N)->getVT()) << ", ";
+      if (Record *R = cast<EmitRegisterMatcher>(N)->getReg())
+        OS << "TARGET_VAL(" << getQualifiedName(R) << "),\n";
+      else {
+        OS << "TARGET_VAL(0) ";
+        if (!OmitComments)
+          OS << "/*zero_reg*/";
+        OS << ",\n";
+      }
+      return 4;
+    } else {
+      OS << "OPC_EmitRegister, "
+        << getEnumName(cast<EmitRegisterMatcher>(N)->getVT()) << ", ";
+      if (Record *R = cast<EmitRegisterMatcher>(N)->getReg())
+        OS << getQualifiedName(R) << ",\n";
+      else {
+        OS << "0 ";
+        if (!OmitComments)
+          OS << "/*zero_reg*/";
+        OS << ",\n";
+      }
+      return 3;
+    }
+
+  case Matcher::EmitConvertToTarget:
+    OS << "OPC_EmitConvertToTarget, "
+       << cast<EmitConvertToTargetMatcher>(N)->getSlot() << ",\n";
+    return 2;
+
+  case Matcher::EmitMergeInputChains: {
+    const EmitMergeInputChainsMatcher *MN =
+      cast<EmitMergeInputChainsMatcher>(N);
+
+    // Handle the specialized forms OPC_EmitMergeInputChains1_0 and 1_1.
+    if (MN->getNumNodes() == 1 && MN->getNode(0) < 2) {
+      OS << "OPC_EmitMergeInputChains1_" << MN->getNode(0) << ",\n";
+      return 1;
+    }
+
+    OS << "OPC_EmitMergeInputChains, " << MN->getNumNodes() << ", ";
+    for (unsigned i = 0, e = MN->getNumNodes(); i != e; ++i)
+      OS << MN->getNode(i) << ", ";
+    OS << '\n';
+    return 2+MN->getNumNodes();
+  }
+  case Matcher::EmitCopyToReg:
+    OS << "OPC_EmitCopyToReg, "
+       << cast<EmitCopyToRegMatcher>(N)->getSrcSlot() << ", "
+       << getQualifiedName(cast<EmitCopyToRegMatcher>(N)->getDestPhysReg())
+       << ",\n";
+    return 3;
+  case Matcher::EmitNodeXForm: {
+    const EmitNodeXFormMatcher *XF = cast<EmitNodeXFormMatcher>(N);
+    OS << "OPC_EmitNodeXForm, " << getNodeXFormID(XF->getNodeXForm()) << ", "
+       << XF->getSlot() << ',';
+    if (!OmitComments)
+      OS.PadToColumn(CommentIndent) << "// "<<XF->getNodeXForm()->getName();
+    OS <<'\n';
+    return 3;
+  }
+
+  case Matcher::EmitNode:
+  case Matcher::MorphNodeTo: {
+    const EmitNodeMatcherCommon *EN = cast<EmitNodeMatcherCommon>(N);
+    OS << (isa<EmitNodeMatcher>(EN) ? "OPC_EmitNode" : "OPC_MorphNodeTo");
+    OS << ", TARGET_VAL(" << EN->getOpcodeName() << "), 0";
+
+    if (EN->hasChain())   OS << "|OPFL_Chain";
+    if (EN->hasInFlag())  OS << "|OPFL_GlueInput";
+    if (EN->hasOutFlag()) OS << "|OPFL_GlueOutput";
+    if (EN->hasMemRefs()) OS << "|OPFL_MemRefs";
+    if (EN->getNumFixedArityOperands() != -1)
+      OS << "|OPFL_Variadic" << EN->getNumFixedArityOperands();
+    OS << ",\n";
+
+    OS.PadToColumn(Indent*2+4) << EN->getNumVTs();
+    if (!OmitComments)
+      OS << "/*#VTs*/";
+    OS << ", ";
+    for (unsigned i = 0, e = EN->getNumVTs(); i != e; ++i)
+      OS << getEnumName(EN->getVT(i)) << ", ";
+
+    OS << EN->getNumOperands();
+    if (!OmitComments)
+      OS << "/*#Ops*/";
+    OS << ", ";
+    unsigned NumOperandBytes = 0;
+    for (unsigned i = 0, e = EN->getNumOperands(); i != e; ++i)
+      NumOperandBytes += EmitVBRValue(EN->getOperand(i), OS);
+
+    if (!OmitComments) {
+      // Print the result #'s for EmitNode.
+      if (const EmitNodeMatcher *E = dyn_cast<EmitNodeMatcher>(EN)) {
+        if (unsigned NumResults = EN->getNumVTs()) {
+          OS.PadToColumn(CommentIndent) << "// Results = ";
+          unsigned First = E->getFirstResultSlot();
+          for (unsigned i = 0; i != NumResults; ++i)
+            OS << "#" << First+i << " ";
+        }
+      }
+      OS << '\n';
+
+      if (const MorphNodeToMatcher *SNT = dyn_cast<MorphNodeToMatcher>(N)) {
+        OS.PadToColumn(Indent*2) << "// Src: "
+          << *SNT->getPattern().getSrcPattern() << " - Complexity = "
+          << SNT->getPattern().getPatternComplexity(CGP) << '\n';
+        OS.PadToColumn(Indent*2) << "// Dst: "
+          << *SNT->getPattern().getDstPattern() << '\n';
+      }
+    } else
+      OS << '\n';
+
+    return 6+EN->getNumVTs()+NumOperandBytes;
+  }
+  case Matcher::MarkGlueResults: {
+    const MarkGlueResultsMatcher *CFR = cast<MarkGlueResultsMatcher>(N);
+    OS << "OPC_MarkGlueResults, " << CFR->getNumNodes() << ", ";
+    unsigned NumOperandBytes = 0;
+    for (unsigned i = 0, e = CFR->getNumNodes(); i != e; ++i)
+      NumOperandBytes += EmitVBRValue(CFR->getNode(i), OS);
+    OS << '\n';
+    return 2+NumOperandBytes;
+  }
+  case Matcher::CompleteMatch: {
+    const CompleteMatchMatcher *CM = cast<CompleteMatchMatcher>(N);
+    OS << "OPC_CompleteMatch, " << CM->getNumResults() << ", ";
+    unsigned NumResultBytes = 0;
+    for (unsigned i = 0, e = CM->getNumResults(); i != e; ++i)
+      NumResultBytes += EmitVBRValue(CM->getResult(i), OS);
+    OS << '\n';
+    if (!OmitComments) {
+      OS.PadToColumn(Indent*2) << "// Src: "
+        << *CM->getPattern().getSrcPattern() << " - Complexity = "
+        << CM->getPattern().getPatternComplexity(CGP) << '\n';
+      OS.PadToColumn(Indent*2) << "// Dst: "
+        << *CM->getPattern().getDstPattern();
+    }
+    OS << '\n';
+    return 2 + NumResultBytes;
+  }
+  }
+  assert(0 && "Unreachable");
+  return 0;
+}
+
+/// EmitMatcherList - Emit the bytes for the specified matcher subtree.
+unsigned MatcherTableEmitter::
+EmitMatcherList(const Matcher *N, unsigned Indent, unsigned CurrentIdx,
+                formatted_raw_ostream &OS) {
+  unsigned Size = 0;
+  while (N) {
+    if (!OmitComments)
+      OS << "/*" << CurrentIdx << "*/";
+    unsigned MatcherSize = EmitMatcher(N, Indent, CurrentIdx, OS);
+    Size += MatcherSize;
+    CurrentIdx += MatcherSize;
+
+    // If there are other nodes in this list, iterate to them, otherwise we're
+    // done.
+    N = N->getNext();
+  }
+  return Size;
+}
+
+void MatcherTableEmitter::EmitPredicateFunctions(formatted_raw_ostream &OS) {
+  // Emit pattern predicates.
+  if (!PatternPredicates.empty()) {
+    OS << "bool CheckPatternPredicate(unsigned PredNo) const {\n";
+    OS << "  switch (PredNo) {\n";
+    OS << "  default: assert(0 && \"Invalid predicate in table?\");\n";
+    for (unsigned i = 0, e = PatternPredicates.size(); i != e; ++i)
+      OS << "  case " << i << ": return "  << PatternPredicates[i] << ";\n";
+    OS << "  }\n";
+    OS << "}\n\n";
+  }
+
+  // Emit Node predicates.
+  // FIXME: Annoyingly, these are stored by name, which we never even emit. Yay?
+  StringMap<TreePattern*> PFsByName;
+
+  for (CodeGenDAGPatterns::pf_iterator I = CGP.pf_begin(), E = CGP.pf_end();
+       I != E; ++I)
+    PFsByName[I->first->getName()] = I->second;
+
+  if (!NodePredicates.empty()) {
+    OS << "bool CheckNodePredicate(SDNode *Node, unsigned PredNo) const {\n";
+    OS << "  switch (PredNo) {\n";
+    OS << "  default: assert(0 && \"Invalid predicate in table?\");\n";
+    for (unsigned i = 0, e = NodePredicates.size(); i != e; ++i) {
+      // FIXME: Storing this by name is horrible.
+      TreePattern *P =PFsByName[NodePredicates[i].substr(strlen("Predicate_"))];
+      assert(P && "Unknown name?");
+
+      // Emit the predicate code corresponding to this pattern.
+      std::string Code = P->getRecord()->getValueAsCode("Predicate");
+      assert(!Code.empty() && "No code in this predicate");
+      OS << "  case " << i << ": { // " << NodePredicates[i] << '\n';
+      std::string ClassName;
+      if (P->getOnlyTree()->isLeaf())
+        ClassName = "SDNode";
+      else
+        ClassName =
+          CGP.getSDNodeInfo(P->getOnlyTree()->getOperator()).getSDClassName();
+      if (ClassName == "SDNode")
+        OS << "    SDNode *N = Node;\n";
+      else
+        OS << "    " << ClassName << "*N = cast<" << ClassName << ">(Node);\n";
+      OS << Code << "\n  }\n";
+    }
+    OS << "  }\n";
+    OS << "}\n\n";
+  }
+
+  // Emit CompletePattern matchers.
+  // FIXME: This should be const.
+  if (!ComplexPatterns.empty()) {
+    OS << "bool CheckComplexPattern(SDNode *Root, SDNode *Parent, SDValue N,\n";
+    OS << "                         unsigned PatternNo,\n";
+    OS << "         SmallVectorImpl<std::pair<SDValue, SDNode*> > &Result) {\n";
+    OS << "  unsigned NextRes = Result.size();\n";
+    OS << "  switch (PatternNo) {\n";
+    OS << "  default: assert(0 && \"Invalid pattern # in table?\");\n";
+    for (unsigned i = 0, e = ComplexPatterns.size(); i != e; ++i) {
+      const ComplexPattern &P = *ComplexPatterns[i];
+      unsigned NumOps = P.getNumOperands();
+
+      if (P.hasProperty(SDNPHasChain))
+        ++NumOps;  // Get the chained node too.
+
+      OS << "  case " << i << ":\n";
+      OS << "    Result.resize(NextRes+" << NumOps << ");\n";
+      OS << "    return "  << P.getSelectFunc();
+
+      OS << "(";
+      // If the complex pattern wants the root of the match, pass it in as the
+      // first argument.
+      if (P.hasProperty(SDNPWantRoot))
+        OS << "Root, ";
+
+      // If the complex pattern wants the parent of the operand being matched,
+      // pass it in as the next argument.
+      if (P.hasProperty(SDNPWantParent))
+        OS << "Parent, ";
+
+      OS << "N";
+      for (unsigned i = 0; i != NumOps; ++i)
+        OS << ", Result[NextRes+" << i << "].first";
+      OS << ");\n";
+    }
+    OS << "  }\n";
+    OS << "}\n\n";
+  }
+
+
+  // Emit SDNodeXForm handlers.
+  // FIXME: This should be const.
+  if (!NodeXForms.empty()) {
+    OS << "SDValue RunSDNodeXForm(SDValue V, unsigned XFormNo) {\n";
+    OS << "  switch (XFormNo) {\n";
+    OS << "  default: assert(0 && \"Invalid xform # in table?\");\n";
+
+    // FIXME: The node xform could take SDValue's instead of SDNode*'s.
+    for (unsigned i = 0, e = NodeXForms.size(); i != e; ++i) {
+      const CodeGenDAGPatterns::NodeXForm &Entry =
+        CGP.getSDNodeTransform(NodeXForms[i]);
+
+      Record *SDNode = Entry.first;
+      const std::string &Code = Entry.second;
+
+      OS << "  case " << i << ": {  ";
+      if (!OmitComments)
+        OS << "// " << NodeXForms[i]->getName();
+      OS << '\n';
+
+      std::string ClassName = CGP.getSDNodeInfo(SDNode).getSDClassName();
+      if (ClassName == "SDNode")
+        OS << "    SDNode *N = V.getNode();\n";
+      else
+        OS << "    " << ClassName << " *N = cast<" << ClassName
+           << ">(V.getNode());\n";
+      OS << Code << "\n  }\n";
+    }
+    OS << "  }\n";
+    OS << "}\n\n";
+  }
+}
+
+static void BuildHistogram(const Matcher *M, std::vector<unsigned> &OpcodeFreq){
+  for (; M != 0; M = M->getNext()) {
+    // Count this node.
+    if (unsigned(M->getKind()) >= OpcodeFreq.size())
+      OpcodeFreq.resize(M->getKind()+1);
+    OpcodeFreq[M->getKind()]++;
+
+    // Handle recursive nodes.
+    if (const ScopeMatcher *SM = dyn_cast<ScopeMatcher>(M)) {
+      for (unsigned i = 0, e = SM->getNumChildren(); i != e; ++i)
+        BuildHistogram(SM->getChild(i), OpcodeFreq);
+    } else if (const SwitchOpcodeMatcher *SOM =
+                 dyn_cast<SwitchOpcodeMatcher>(M)) {
+      for (unsigned i = 0, e = SOM->getNumCases(); i != e; ++i)
+        BuildHistogram(SOM->getCaseMatcher(i), OpcodeFreq);
+    } else if (const SwitchTypeMatcher *STM = dyn_cast<SwitchTypeMatcher>(M)) {
+      for (unsigned i = 0, e = STM->getNumCases(); i != e; ++i)
+        BuildHistogram(STM->getCaseMatcher(i), OpcodeFreq);
+    }
+  }
+}
+
+void MatcherTableEmitter::EmitHistogram(const Matcher *M,
+                                        formatted_raw_ostream &OS) {
+  if (OmitComments)
+    return;
+
+  std::vector<unsigned> OpcodeFreq;
+  BuildHistogram(M, OpcodeFreq);
+
+  OS << "  // Opcode Histogram:\n";
+  for (unsigned i = 0, e = OpcodeFreq.size(); i != e; ++i) {
+    OS << "  // #";
+    switch ((Matcher::KindTy)i) {
+    case Matcher::Scope: OS << "OPC_Scope"; break;
+    case Matcher::RecordNode: OS << "OPC_RecordNode"; break;
+    case Matcher::RecordChild: OS << "OPC_RecordChild"; break;
+    case Matcher::RecordMemRef: OS << "OPC_RecordMemRef"; break;
+    case Matcher::CaptureGlueInput: OS << "OPC_CaptureGlueInput"; break;
+    case Matcher::MoveChild: OS << "OPC_MoveChild"; break;
+    case Matcher::MoveParent: OS << "OPC_MoveParent"; break;
+    case Matcher::CheckSame: OS << "OPC_CheckSame"; break;
+    case Matcher::CheckPatternPredicate:
+      OS << "OPC_CheckPatternPredicate"; break;
+    case Matcher::CheckPredicate: OS << "OPC_CheckPredicate"; break;
+    case Matcher::CheckOpcode: OS << "OPC_CheckOpcode"; break;
+    case Matcher::SwitchOpcode: OS << "OPC_SwitchOpcode"; break;
+    case Matcher::CheckType: OS << "OPC_CheckType"; break;
+    case Matcher::SwitchType: OS << "OPC_SwitchType"; break;
+    case Matcher::CheckChildType: OS << "OPC_CheckChildType"; break;
+    case Matcher::CheckInteger: OS << "OPC_CheckInteger"; break;
+    case Matcher::CheckCondCode: OS << "OPC_CheckCondCode"; break;
+    case Matcher::CheckValueType: OS << "OPC_CheckValueType"; break;
+    case Matcher::CheckComplexPat: OS << "OPC_CheckComplexPat"; break;
+    case Matcher::CheckAndImm: OS << "OPC_CheckAndImm"; break;
+    case Matcher::CheckOrImm: OS << "OPC_CheckOrImm"; break;
+    case Matcher::CheckFoldableChainNode:
+      OS << "OPC_CheckFoldableChainNode"; break;
+    case Matcher::EmitInteger: OS << "OPC_EmitInteger"; break;
+    case Matcher::EmitStringInteger: OS << "OPC_EmitStringInteger"; break;
+    case Matcher::EmitRegister: OS << "OPC_EmitRegister"; break;
+    case Matcher::EmitConvertToTarget: OS << "OPC_EmitConvertToTarget"; break;
+    case Matcher::EmitMergeInputChains: OS << "OPC_EmitMergeInputChains"; break;
+    case Matcher::EmitCopyToReg: OS << "OPC_EmitCopyToReg"; break;
+    case Matcher::EmitNode: OS << "OPC_EmitNode"; break;
+    case Matcher::MorphNodeTo: OS << "OPC_MorphNodeTo"; break;
+    case Matcher::EmitNodeXForm: OS << "OPC_EmitNodeXForm"; break;
+    case Matcher::MarkGlueResults: OS << "OPC_MarkGlueResults"; break;
+    case Matcher::CompleteMatch: OS << "OPC_CompleteMatch"; break;
+    }
+
+    OS.PadToColumn(40) << " = " << OpcodeFreq[i] << '\n';
+  }
+  OS << '\n';
+}
+
+
+void llvm::EmitMatcherTable(const Matcher *TheMatcher,
+                            const CodeGenDAGPatterns &CGP,
+                            bool useEmitRegister2,
+                            raw_ostream &O) {
+  formatted_raw_ostream OS(O);
+
+  OS << "// The main instruction selector code.\n";
+  OS << "SDNode *SelectCode(SDNode *N) {\n";
+
+  MatcherTableEmitter MatcherEmitter(CGP, useEmitRegister2);
+
+  OS << "  // Some target values are emitted as 2 bytes, TARGET_VAL handles\n";
+  OS << "  // this.\n";
+  OS << "  #define TARGET_VAL(X) X & 255, unsigned(X) >> 8\n";
+  OS << "  static const unsigned char MatcherTable[] = {\n";
+  unsigned TotalSize = MatcherEmitter.EmitMatcherList(TheMatcher, 5, 0, OS);
+  OS << "    0\n  }; // Total Array size is " << (TotalSize+1) << " bytes\n\n";
+
+  MatcherEmitter.EmitHistogram(TheMatcher, OS);
+
+  OS << "  #undef TARGET_VAL\n";
+  OS << "  return SelectCodeCommon(N, MatcherTable,sizeof(MatcherTable));\n}\n";
+  OS << '\n';
+
+  // Next up, emit the function for node and pattern predicates:
+  MatcherEmitter.EmitPredicateFunctions(OS);
+}
diff --git a/final/utils/TableGen/DAGISelMatcherGen.cpp b/final/utils/TableGen/DAGISelMatcherGen.cpp
new file mode 100644
index 00000000000..7c0badec1d6
--- /dev/null
+++ b/final/utils/TableGen/DAGISelMatcherGen.cpp
@@ -0,0 +1,909 @@
+//===- DAGISelMatcherGen.cpp - Matcher generator --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DAGISelMatcher.h"
+#include "CodeGenDAGPatterns.h"
+#include "Record.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include <utility>
+using namespace llvm;
+
+
+/// getRegisterValueType - Look up and return the ValueType of the specified
+/// register. If the register is a member of multiple register classes which
+/// have different associated types, return MVT::Other.
+static MVT::SimpleValueType getRegisterValueType(Record *R,
+                                                 const CodeGenTarget &T) {
+  bool FoundRC = false;
+  MVT::SimpleValueType VT = MVT::Other;
+  const std::vector<CodeGenRegisterClass> &RCs = T.getRegisterClasses();
+  std::vector<Record*>::const_iterator Element;
+
+  for (unsigned rc = 0, e = RCs.size(); rc != e; ++rc) {
+    const CodeGenRegisterClass &RC = RCs[rc];
+    if (!std::count(RC.Elements.begin(), RC.Elements.end(), R))
+      continue;
+
+    if (!FoundRC) {
+      FoundRC = true;
+      VT = RC.getValueTypeNum(0);
+      continue;
+    }
+
+    // If this occurs in multiple register classes, they all have to agree.
+    assert(VT == RC.getValueTypeNum(0));
+  }
+  return VT;
+}
+
+
+namespace {
+  class MatcherGen {
+    const PatternToMatch &Pattern;
+    const CodeGenDAGPatterns &CGP;
+
+    /// PatWithNoTypes - This is a clone of Pattern.getSrcPattern() that starts
+    /// out with all of the types removed.  This allows us to insert type checks
+    /// as we scan the tree.
+    TreePatternNode *PatWithNoTypes;
+
+    /// VariableMap - A map from variable names ('$dst') to the recorded operand
+    /// number that they were captured as.  These are biased by 1 to make
+    /// insertion easier.
+    StringMap<unsigned> VariableMap;
+
+    /// NextRecordedOperandNo - As we emit opcodes to record matched values in
+    /// the RecordedNodes array, this keeps track of which slot will be next to
+    /// record into.
+    unsigned NextRecordedOperandNo;
+
+    /// MatchedChainNodes - This maintains the position in the recorded nodes
+    /// array of all of the recorded input nodes that have chains.
+    SmallVector<unsigned, 2> MatchedChainNodes;
+
+    /// MatchedGlueResultNodes - This maintains the position in the recorded
+    /// nodes array of all of the recorded input nodes that have glue results.
+    SmallVector<unsigned, 2> MatchedGlueResultNodes;
+
+    /// MatchedComplexPatterns - This maintains a list of all of the
+    /// ComplexPatterns that we need to check.  The patterns are known to have
+    /// names which were recorded.  The second element of each pair is the first
+    /// slot number that the OPC_CheckComplexPat opcode drops the matched
+    /// results into.
+    SmallVector<std::pair<const TreePatternNode*,
+                          unsigned>, 2> MatchedComplexPatterns;
+
+    /// PhysRegInputs - List list has an entry for each explicitly specified
+    /// physreg input to the pattern.  The first elt is the Register node, the
+    /// second is the recorded slot number the input pattern match saved it in.
+    SmallVector<std::pair<Record*, unsigned>, 2> PhysRegInputs;
+
+    /// Matcher - This is the top level of the generated matcher, the result.
+    Matcher *TheMatcher;
+
+    /// CurPredicate - As we emit matcher nodes, this points to the latest check
+    /// which should have future checks stuck into its Next position.
+    Matcher *CurPredicate;
+  public:
+    MatcherGen(const PatternToMatch &pattern, const CodeGenDAGPatterns &cgp);
+
+    ~MatcherGen() {
+      delete PatWithNoTypes;
+    }
+
+    bool EmitMatcherCode(unsigned Variant);
+    void EmitResultCode();
+
+    Matcher *GetMatcher() const { return TheMatcher; }
+  private:
+    void AddMatcher(Matcher *NewNode);
+    void InferPossibleTypes();
+
+    // Matcher Generation.
+    void EmitMatchCode(const TreePatternNode *N, TreePatternNode *NodeNoTypes);
+    void EmitLeafMatchCode(const TreePatternNode *N);
+    void EmitOperatorMatchCode(const TreePatternNode *N,
+                               TreePatternNode *NodeNoTypes);
+
+    // Result Code Generation.
+    unsigned getNamedArgumentSlot(StringRef Name) {
+      unsigned VarMapEntry = VariableMap[Name];
+      assert(VarMapEntry != 0 &&
+             "Variable referenced but not defined and not caught earlier!");
+      return VarMapEntry-1;
+    }
+
+    /// GetInstPatternNode - Get the pattern for an instruction.
+    const TreePatternNode *GetInstPatternNode(const DAGInstruction &Ins,
+                                              const TreePatternNode *N);
+
+    void EmitResultOperand(const TreePatternNode *N,
+                           SmallVectorImpl<unsigned> &ResultOps);
+    void EmitResultOfNamedOperand(const TreePatternNode *N,
+                                  SmallVectorImpl<unsigned> &ResultOps);
+    void EmitResultLeafAsOperand(const TreePatternNode *N,
+                                 SmallVectorImpl<unsigned> &ResultOps);
+    void EmitResultInstructionAsOperand(const TreePatternNode *N,
+                                        SmallVectorImpl<unsigned> &ResultOps);
+    void EmitResultSDNodeXFormAsOperand(const TreePatternNode *N,
+                                        SmallVectorImpl<unsigned> &ResultOps);
+    };
+
+} // end anon namespace.
+
+MatcherGen::MatcherGen(const PatternToMatch &pattern,
+                       const CodeGenDAGPatterns &cgp)
+: Pattern(pattern), CGP(cgp), NextRecordedOperandNo(0),
+  TheMatcher(0), CurPredicate(0) {
+  // We need to produce the matcher tree for the patterns source pattern.  To do
+  // this we need to match the structure as well as the types.  To do the type
+  // matching, we want to figure out the fewest number of type checks we need to
+  // emit.  For example, if there is only one integer type supported by a
+  // target, there should be no type comparisons at all for integer patterns!
+  //
+  // To figure out the fewest number of type checks needed, clone the pattern,
+  // remove the types, then perform type inference on the pattern as a whole.
+  // If there are unresolved types, emit an explicit check for those types,
+  // apply the type to the tree, then rerun type inference.  Iterate until all
+  // types are resolved.
+  //
+  PatWithNoTypes = Pattern.getSrcPattern()->clone();
+  PatWithNoTypes->RemoveAllTypes();
+
+  // If there are types that are manifestly known, infer them.
+  InferPossibleTypes();
+}
+
+/// InferPossibleTypes - As we emit the pattern, we end up generating type
+/// checks and applying them to the 'PatWithNoTypes' tree.  As we do this, we
+/// want to propagate implied types as far throughout the tree as possible so
+/// that we avoid doing redundant type checks.  This does the type propagation.
+void MatcherGen::InferPossibleTypes() {
+  // TP - Get *SOME* tree pattern, we don't care which.  It is only used for
+  // diagnostics, which we know are impossible at this point.
+  TreePattern &TP = *CGP.pf_begin()->second;
+
+  try {
+    bool MadeChange = true;
+    while (MadeChange)
+      MadeChange = PatWithNoTypes->ApplyTypeConstraints(TP,
+                                                true/*Ignore reg constraints*/);
+  } catch (...) {
+    errs() << "Type constraint application shouldn't fail!";
+    abort();
+  }
+}
+
+
+/// AddMatcher - Add a matcher node to the current graph we're building.
+void MatcherGen::AddMatcher(Matcher *NewNode) {
+  if (CurPredicate != 0)
+    CurPredicate->setNext(NewNode);
+  else
+    TheMatcher = NewNode;
+  CurPredicate = NewNode;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Pattern Match Generation
+//===----------------------------------------------------------------------===//
+
+/// EmitLeafMatchCode - Generate matching code for leaf nodes.
+void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
+  assert(N->isLeaf() && "Not a leaf?");
+
+  // Direct match against an integer constant.
+  if (IntInit *II = dynamic_cast<IntInit*>(N->getLeafValue())) {
+    // If this is the root of the dag we're matching, we emit a redundant opcode
+    // check to ensure that this gets folded into the normal top-level
+    // OpcodeSwitch.
+    if (N == Pattern.getSrcPattern()) {
+      const SDNodeInfo &NI = CGP.getSDNodeInfo(CGP.getSDNodeNamed("imm"));
+      AddMatcher(new CheckOpcodeMatcher(NI));
+    }
+
+    return AddMatcher(new CheckIntegerMatcher(II->getValue()));
+  }
+
+  DefInit *DI = dynamic_cast<DefInit*>(N->getLeafValue());
+  if (DI == 0) {
+    errs() << "Unknown leaf kind: " << *DI << "\n";
+    abort();
+  }
+
+  Record *LeafRec = DI->getDef();
+  if (// Handle register references.  Nothing to do here, they always match.
+      LeafRec->isSubClassOf("RegisterClass") ||
+      LeafRec->isSubClassOf("PointerLikeRegClass") ||
+      LeafRec->isSubClassOf("SubRegIndex") ||
+      // Place holder for SRCVALUE nodes. Nothing to do here.
+      LeafRec->getName() == "srcvalue")
+    return;
+
+  // If we have a physreg reference like (mul gpr:$src, EAX) then we need to
+  // record the register
+  if (LeafRec->isSubClassOf("Register")) {
+    AddMatcher(new RecordMatcher("physreg input "+LeafRec->getName(),
+                                 NextRecordedOperandNo));
+    PhysRegInputs.push_back(std::make_pair(LeafRec, NextRecordedOperandNo++));
+    return;
+  }
+
+  if (LeafRec->isSubClassOf("ValueType"))
+    return AddMatcher(new CheckValueTypeMatcher(LeafRec->getName()));
+
+  if (LeafRec->isSubClassOf("CondCode"))
+    return AddMatcher(new CheckCondCodeMatcher(LeafRec->getName()));
+
+  if (LeafRec->isSubClassOf("ComplexPattern")) {
+    // We can't model ComplexPattern uses that don't have their name taken yet.
+    // The OPC_CheckComplexPattern operation implicitly records the results.
+    if (N->getName().empty()) {
+      errs() << "We expect complex pattern uses to have names: " << *N << "\n";
+      exit(1);
+    }
+
+    // Remember this ComplexPattern so that we can emit it after all the other
+    // structural matches are done.
+    MatchedComplexPatterns.push_back(std::make_pair(N, 0));
+    return;
+  }
+
+  errs() << "Unknown leaf kind: " << *N << "\n";
+  abort();
+}
+
+void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
+                                       TreePatternNode *NodeNoTypes) {
+  assert(!N->isLeaf() && "Not an operator?");
+  const SDNodeInfo &CInfo = CGP.getSDNodeInfo(N->getOperator());
+
+  // If this is an 'and R, 1234' where the operation is AND/OR and the RHS is
+  // a constant without a predicate fn that has more that one bit set, handle
+  // this as a special case.  This is usually for targets that have special
+  // handling of certain large constants (e.g. alpha with it's 8/16/32-bit
+  // handling stuff).  Using these instructions is often far more efficient
+  // than materializing the constant.  Unfortunately, both the instcombiner
+  // and the dag combiner can often infer that bits are dead, and thus drop
+  // them from the mask in the dag.  For example, it might turn 'AND X, 255'
+  // into 'AND X, 254' if it knows the low bit is set.  Emit code that checks
+  // to handle this.
+  if ((N->getOperator()->getName() == "and" ||
+       N->getOperator()->getName() == "or") &&
+      N->getChild(1)->isLeaf() && N->getChild(1)->getPredicateFns().empty() &&
+      N->getPredicateFns().empty()) {
+    if (IntInit *II = dynamic_cast<IntInit*>(N->getChild(1)->getLeafValue())) {
+      if (!isPowerOf2_32(II->getValue())) {  // Don't bother with single bits.
+        // If this is at the root of the pattern, we emit a redundant
+        // CheckOpcode so that the following checks get factored properly under
+        // a single opcode check.
+        if (N == Pattern.getSrcPattern())
+          AddMatcher(new CheckOpcodeMatcher(CInfo));
+
+        // Emit the CheckAndImm/CheckOrImm node.
+        if (N->getOperator()->getName() == "and")
+          AddMatcher(new CheckAndImmMatcher(II->getValue()));
+        else
+          AddMatcher(new CheckOrImmMatcher(II->getValue()));
+
+        // Match the LHS of the AND as appropriate.
+        AddMatcher(new MoveChildMatcher(0));
+        EmitMatchCode(N->getChild(0), NodeNoTypes->getChild(0));
+        AddMatcher(new MoveParentMatcher());
+        return;
+      }
+    }
+  }
+
+  // Check that the current opcode lines up.
+  AddMatcher(new CheckOpcodeMatcher(CInfo));
+
+  // If this node has memory references (i.e. is a load or store), tell the
+  // interpreter to capture them in the memref array.
+  if (N->NodeHasProperty(SDNPMemOperand, CGP))
+    AddMatcher(new RecordMemRefMatcher());
+
+  // If this node has a chain, then the chain is operand #0 is the SDNode, and
+  // the child numbers of the node are all offset by one.
+  unsigned OpNo = 0;
+  if (N->NodeHasProperty(SDNPHasChain, CGP)) {
+    // Record the node and remember it in our chained nodes list.
+    AddMatcher(new RecordMatcher("'" + N->getOperator()->getName() +
+                                         "' chained node",
+                                 NextRecordedOperandNo));
+    // Remember all of the input chains our pattern will match.
+    MatchedChainNodes.push_back(NextRecordedOperandNo++);
+
+    // Don't look at the input chain when matching the tree pattern to the
+    // SDNode.
+    OpNo = 1;
+
+    // If this node is not the root and the subtree underneath it produces a
+    // chain, then the result of matching the node is also produce a chain.
+    // Beyond that, this means that we're also folding (at least) the root node
+    // into the node that produce the chain (for example, matching
+    // "(add reg, (load ptr))" as a add_with_memory on X86).  This is
+    // problematic, if the 'reg' node also uses the load (say, its chain).
+    // Graphically:
+    //
+    //         [LD]
+    //         ^  ^
+    //         |  \                              DAG's like cheese.
+    //        /    |
+    //       /    [YY]
+    //       |     ^
+    //      [XX]--/
+    //
+    // It would be invalid to fold XX and LD.  In this case, folding the two
+    // nodes together would induce a cycle in the DAG, making it a 'cyclic DAG'
+    // To prevent this, we emit a dynamic check for legality before allowing
+    // this to be folded.
+    //
+    const TreePatternNode *Root = Pattern.getSrcPattern();
+    if (N != Root) {                             // Not the root of the pattern.
+      // If there is a node between the root and this node, then we definitely
+      // need to emit the check.
+      bool NeedCheck = !Root->hasChild(N);
+
+      // If it *is* an immediate child of the root, we can still need a check if
+      // the root SDNode has multiple inputs.  For us, this means that it is an
+      // intrinsic, has multiple operands, or has other inputs like chain or
+      // glue).
+      if (!NeedCheck) {
+        const SDNodeInfo &PInfo = CGP.getSDNodeInfo(Root->getOperator());
+        NeedCheck =
+          Root->getOperator() == CGP.get_intrinsic_void_sdnode() ||
+          Root->getOperator() == CGP.get_intrinsic_w_chain_sdnode() ||
+          Root->getOperator() == CGP.get_intrinsic_wo_chain_sdnode() ||
+          PInfo.getNumOperands() > 1 ||
+          PInfo.hasProperty(SDNPHasChain) ||
+          PInfo.hasProperty(SDNPInGlue) ||
+          PInfo.hasProperty(SDNPOptInGlue);
+      }
+
+      if (NeedCheck)
+        AddMatcher(new CheckFoldableChainNodeMatcher());
+    }
+  }
+
+  // If this node has an output glue and isn't the root, remember it.
+  if (N->NodeHasProperty(SDNPOutGlue, CGP) &&
+      N != Pattern.getSrcPattern()) {
+    // TODO: This redundantly records nodes with both glues and chains.
+
+    // Record the node and remember it in our chained nodes list.
+    AddMatcher(new RecordMatcher("'" + N->getOperator()->getName() +
+                                         "' glue output node",
+                                 NextRecordedOperandNo));
+    // Remember all of the nodes with output glue our pattern will match.
+    MatchedGlueResultNodes.push_back(NextRecordedOperandNo++);
+  }
+
+  // If this node is known to have an input glue or if it *might* have an input
+  // glue, capture it as the glue input of the pattern.
+  if (N->NodeHasProperty(SDNPOptInGlue, CGP) ||
+      N->NodeHasProperty(SDNPInGlue, CGP))
+    AddMatcher(new CaptureGlueInputMatcher());
+
+  for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i, ++OpNo) {
+    // Get the code suitable for matching this child.  Move to the child, check
+    // it then move back to the parent.
+    AddMatcher(new MoveChildMatcher(OpNo));
+    EmitMatchCode(N->getChild(i), NodeNoTypes->getChild(i));
+    AddMatcher(new MoveParentMatcher());
+  }
+}
+
+
+void MatcherGen::EmitMatchCode(const TreePatternNode *N,
+                               TreePatternNode *NodeNoTypes) {
+  // If N and NodeNoTypes don't agree on a type, then this is a case where we
+  // need to do a type check.  Emit the check, apply the tyep to NodeNoTypes and
+  // reinfer any correlated types.
+  SmallVector<unsigned, 2> ResultsToTypeCheck;
+
+  for (unsigned i = 0, e = NodeNoTypes->getNumTypes(); i != e; ++i) {
+    if (NodeNoTypes->getExtType(i) == N->getExtType(i)) continue;
+    NodeNoTypes->setType(i, N->getExtType(i));
+    InferPossibleTypes();
+    ResultsToTypeCheck.push_back(i);
+  }
+
+  // If this node has a name associated with it, capture it in VariableMap. If
+  // we already saw this in the pattern, emit code to verify dagness.
+  if (!N->getName().empty()) {
+    unsigned &VarMapEntry = VariableMap[N->getName()];
+    if (VarMapEntry == 0) {
+      // If it is a named node, we must emit a 'Record' opcode.
+      AddMatcher(new RecordMatcher("$" + N->getName(), NextRecordedOperandNo));
+      VarMapEntry = ++NextRecordedOperandNo;
+    } else {
+      // If we get here, this is a second reference to a specific name.  Since
+      // we already have checked that the first reference is valid, we don't
+      // have to recursively match it, just check that it's the same as the
+      // previously named thing.
+      AddMatcher(new CheckSameMatcher(VarMapEntry-1));
+      return;
+    }
+  }
+
+  if (N->isLeaf())
+    EmitLeafMatchCode(N);
+  else
+    EmitOperatorMatchCode(N, NodeNoTypes);
+
+  // If there are node predicates for this node, generate their checks.
+  for (unsigned i = 0, e = N->getPredicateFns().size(); i != e; ++i)
+    AddMatcher(new CheckPredicateMatcher(N->getPredicateFns()[i]));
+
+  for (unsigned i = 0, e = ResultsToTypeCheck.size(); i != e; ++i)
+    AddMatcher(new CheckTypeMatcher(N->getType(ResultsToTypeCheck[i]),
+                                    ResultsToTypeCheck[i]));
+}
+
+/// EmitMatcherCode - Generate the code that matches the predicate of this
+/// pattern for the specified Variant.  If the variant is invalid this returns
+/// true and does not generate code, if it is valid, it returns false.
+bool MatcherGen::EmitMatcherCode(unsigned Variant) {
+  // If the root of the pattern is a ComplexPattern and if it is specified to
+  // match some number of root opcodes, these are considered to be our variants.
+  // Depending on which variant we're generating code for, emit the root opcode
+  // check.
+  if (const ComplexPattern *CP =
+                   Pattern.getSrcPattern()->getComplexPatternInfo(CGP)) {
+    const std::vector<Record*> &OpNodes = CP->getRootNodes();
+    assert(!OpNodes.empty() &&"Complex Pattern must specify what it can match");
+    if (Variant >= OpNodes.size()) return true;
+
+    AddMatcher(new CheckOpcodeMatcher(CGP.getSDNodeInfo(OpNodes[Variant])));
+  } else {
+    if (Variant != 0) return true;
+  }
+
+  // Emit the matcher for the pattern structure and types.
+  EmitMatchCode(Pattern.getSrcPattern(), PatWithNoTypes);
+
+  // If the pattern has a predicate on it (e.g. only enabled when a subtarget
+  // feature is around, do the check).
+  if (!Pattern.getPredicateCheck().empty())
+    AddMatcher(new CheckPatternPredicateMatcher(Pattern.getPredicateCheck()));
+
+  // Now that we've completed the structural type match, emit any ComplexPattern
+  // checks (e.g. addrmode matches).  We emit this after the structural match
+  // because they are generally more expensive to evaluate and more difficult to
+  // factor.
+  for (unsigned i = 0, e = MatchedComplexPatterns.size(); i != e; ++i) {
+    const TreePatternNode *N = MatchedComplexPatterns[i].first;
+
+    // Remember where the results of this match get stuck.
+    MatchedComplexPatterns[i].second = NextRecordedOperandNo;
+
+    // Get the slot we recorded the value in from the name on the node.
+    unsigned RecNodeEntry = VariableMap[N->getName()];
+    assert(!N->getName().empty() && RecNodeEntry &&
+           "Complex pattern should have a name and slot");
+    --RecNodeEntry;  // Entries in VariableMap are biased.
+
+    const ComplexPattern &CP =
+      CGP.getComplexPattern(((DefInit*)N->getLeafValue())->getDef());
+
+    // Emit a CheckComplexPat operation, which does the match (aborting if it
+    // fails) and pushes the matched operands onto the recorded nodes list.
+    AddMatcher(new CheckComplexPatMatcher(CP, RecNodeEntry,
+                                          N->getName(), NextRecordedOperandNo));
+
+    // Record the right number of operands.
+    NextRecordedOperandNo += CP.getNumOperands();
+    if (CP.hasProperty(SDNPHasChain)) {
+      // If the complex pattern has a chain, then we need to keep track of the
+      // fact that we just recorded a chain input.  The chain input will be
+      // matched as the last operand of the predicate if it was successful.
+      ++NextRecordedOperandNo; // Chained node operand.
+
+      // It is the last operand recorded.
+      assert(NextRecordedOperandNo > 1 &&
+             "Should have recorded input/result chains at least!");
+      MatchedChainNodes.push_back(NextRecordedOperandNo-1);
+    }
+
+    // TODO: Complex patterns can't have output glues, if they did, we'd want
+    // to record them.
+  }
+
+  return false;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Node Result Generation
+//===----------------------------------------------------------------------===//
+
+void MatcherGen::EmitResultOfNamedOperand(const TreePatternNode *N,
+                                          SmallVectorImpl<unsigned> &ResultOps){
+  assert(!N->getName().empty() && "Operand not named!");
+
+  // A reference to a complex pattern gets all of the results of the complex
+  // pattern's match.
+  if (const ComplexPattern *CP = N->getComplexPatternInfo(CGP)) {
+    unsigned SlotNo = 0;
+    for (unsigned i = 0, e = MatchedComplexPatterns.size(); i != e; ++i)
+      if (MatchedComplexPatterns[i].first->getName() == N->getName()) {
+        SlotNo = MatchedComplexPatterns[i].second;
+        break;
+      }
+    assert(SlotNo != 0 && "Didn't get a slot number assigned?");
+
+    // The first slot entry is the node itself, the subsequent entries are the
+    // matched values.
+    for (unsigned i = 0, e = CP->getNumOperands(); i != e; ++i)
+      ResultOps.push_back(SlotNo+i);
+    return;
+  }
+
+  unsigned SlotNo = getNamedArgumentSlot(N->getName());
+
+  // If this is an 'imm' or 'fpimm' node, make sure to convert it to the target
+  // version of the immediate so that it doesn't get selected due to some other
+  // node use.
+  if (!N->isLeaf()) {
+    StringRef OperatorName = N->getOperator()->getName();
+    if (OperatorName == "imm" || OperatorName == "fpimm") {
+      AddMatcher(new EmitConvertToTargetMatcher(SlotNo));
+      ResultOps.push_back(NextRecordedOperandNo++);
+      return;
+    }
+  }
+
+  ResultOps.push_back(SlotNo);
+}
+
+void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
+                                         SmallVectorImpl<unsigned> &ResultOps) {
+  assert(N->isLeaf() && "Must be a leaf");
+
+  if (IntInit *II = dynamic_cast<IntInit*>(N->getLeafValue())) {
+    AddMatcher(new EmitIntegerMatcher(II->getValue(), N->getType(0)));
+    ResultOps.push_back(NextRecordedOperandNo++);
+    return;
+  }
+
+  // If this is an explicit register reference, handle it.
+  if (DefInit *DI = dynamic_cast<DefInit*>(N->getLeafValue())) {
+    if (DI->getDef()->isSubClassOf("Register")) {
+      AddMatcher(new EmitRegisterMatcher(DI->getDef(), N->getType(0)));
+      ResultOps.push_back(NextRecordedOperandNo++);
+      return;
+    }
+
+    if (DI->getDef()->getName() == "zero_reg") {
+      AddMatcher(new EmitRegisterMatcher(0, N->getType(0)));
+      ResultOps.push_back(NextRecordedOperandNo++);
+      return;
+    }
+
+    // Handle a reference to a register class. This is used
+    // in COPY_TO_SUBREG instructions.
+    if (DI->getDef()->isSubClassOf("RegisterClass")) {
+      std::string Value = getQualifiedName(DI->getDef()) + "RegClassID";
+      AddMatcher(new EmitStringIntegerMatcher(Value, MVT::i32));
+      ResultOps.push_back(NextRecordedOperandNo++);
+      return;
+    }
+
+    // Handle a subregister index. This is used for INSERT_SUBREG etc.
+    if (DI->getDef()->isSubClassOf("SubRegIndex")) {
+      std::string Value = getQualifiedName(DI->getDef());
+      AddMatcher(new EmitStringIntegerMatcher(Value, MVT::i32));
+      ResultOps.push_back(NextRecordedOperandNo++);
+      return;
+    }
+  }
+
+  errs() << "unhandled leaf node: \n";
+  N->dump();
+}
+
+/// GetInstPatternNode - Get the pattern for an instruction.
+///
+const TreePatternNode *MatcherGen::
+GetInstPatternNode(const DAGInstruction &Inst, const TreePatternNode *N) {
+  const TreePattern *InstPat = Inst.getPattern();
+
+  // FIXME2?: Assume actual pattern comes before "implicit".
+  TreePatternNode *InstPatNode;
+  if (InstPat)
+    InstPatNode = InstPat->getTree(0);
+  else if (/*isRoot*/ N == Pattern.getDstPattern())
+    InstPatNode = Pattern.getSrcPattern();
+  else
+    return 0;
+
+  if (InstPatNode && !InstPatNode->isLeaf() &&
+      InstPatNode->getOperator()->getName() == "set")
+    InstPatNode = InstPatNode->getChild(InstPatNode->getNumChildren()-1);
+
+  return InstPatNode;
+}
+
+void MatcherGen::
+EmitResultInstructionAsOperand(const TreePatternNode *N,
+                               SmallVectorImpl<unsigned> &OutputOps) {
+  Record *Op = N->getOperator();
+  const CodeGenTarget &CGT = CGP.getTargetInfo();
+  CodeGenInstruction &II = CGT.getInstruction(Op);
+  const DAGInstruction &Inst = CGP.getInstruction(Op);
+
+  // If we can, get the pattern for the instruction we're generating.  We derive
+  // a variety of information from this pattern, such as whether it has a chain.
+  //
+  // FIXME2: This is extremely dubious for several reasons, not the least of
+  // which it gives special status to instructions with patterns that Pat<>
+  // nodes can't duplicate.
+  const TreePatternNode *InstPatNode = GetInstPatternNode(Inst, N);
+
+  // NodeHasChain - Whether the instruction node we're creating takes chains.
+  bool NodeHasChain = InstPatNode &&
+                      InstPatNode->TreeHasProperty(SDNPHasChain, CGP);
+
+  bool isRoot = N == Pattern.getDstPattern();
+
+  // TreeHasOutGlue - True if this tree has glue.
+  bool TreeHasInGlue = false, TreeHasOutGlue = false;
+  if (isRoot) {
+    const TreePatternNode *SrcPat = Pattern.getSrcPattern();
+    TreeHasInGlue = SrcPat->TreeHasProperty(SDNPOptInGlue, CGP) ||
+                    SrcPat->TreeHasProperty(SDNPInGlue, CGP);
+
+    // FIXME2: this is checking the entire pattern, not just the node in
+    // question, doing this just for the root seems like a total hack.
+    TreeHasOutGlue = SrcPat->TreeHasProperty(SDNPOutGlue, CGP);
+  }
+
+  // NumResults - This is the number of results produced by the instruction in
+  // the "outs" list.
+  unsigned NumResults = Inst.getNumResults();
+
+  // Loop over all of the operands of the instruction pattern, emitting code
+  // to fill them all in.  The node 'N' usually has number children equal to
+  // the number of input operands of the instruction.  However, in cases
+  // where there are predicate operands for an instruction, we need to fill
+  // in the 'execute always' values.  Match up the node operands to the
+  // instruction operands to do this.
+  SmallVector<unsigned, 8> InstOps;
+  for (unsigned ChildNo = 0, InstOpNo = NumResults, e = II.Operands.size();
+       InstOpNo != e; ++InstOpNo) {
+
+    // Determine what to emit for this operand.
+    Record *OperandNode = II.Operands[InstOpNo].Rec;
+    if ((OperandNode->isSubClassOf("PredicateOperand") ||
+         OperandNode->isSubClassOf("OptionalDefOperand")) &&
+        !CGP.getDefaultOperand(OperandNode).DefaultOps.empty()) {
+      // This is a predicate or optional def operand; emit the
+      // 'default ops' operands.
+      const DAGDefaultOperand &DefaultOp
+        = CGP.getDefaultOperand(OperandNode);
+      for (unsigned i = 0, e = DefaultOp.DefaultOps.size(); i != e; ++i)
+        EmitResultOperand(DefaultOp.DefaultOps[i], InstOps);
+      continue;
+    }
+
+    const TreePatternNode *Child = N->getChild(ChildNo);
+
+    // Otherwise this is a normal operand or a predicate operand without
+    // 'execute always'; emit it.
+    unsigned BeforeAddingNumOps = InstOps.size();
+    EmitResultOperand(Child, InstOps);
+    assert(InstOps.size() > BeforeAddingNumOps && "Didn't add any operands");
+
+    // If the operand is an instruction and it produced multiple results, just
+    // take the first one.
+    if (!Child->isLeaf() && Child->getOperator()->isSubClassOf("Instruction"))
+      InstOps.resize(BeforeAddingNumOps+1);
+
+    ++ChildNo;
+  }
+
+  // If this node has input glue or explicitly specified input physregs, we
+  // need to add chained and glued copyfromreg nodes and materialize the glue
+  // input.
+  if (isRoot && !PhysRegInputs.empty()) {
+    // Emit all of the CopyToReg nodes for the input physical registers.  These
+    // occur in patterns like (mul:i8 AL:i8, GR8:i8:$src).
+    for (unsigned i = 0, e = PhysRegInputs.size(); i != e; ++i)
+      AddMatcher(new EmitCopyToRegMatcher(PhysRegInputs[i].second,
+                                          PhysRegInputs[i].first));
+    // Even if the node has no other glue inputs, the resultant node must be
+    // glued to the CopyFromReg nodes we just generated.
+    TreeHasInGlue = true;
+  }
+
+  // Result order: node results, chain, glue
+
+  // Determine the result types.
+  SmallVector<MVT::SimpleValueType, 4> ResultVTs;
+  for (unsigned i = 0, e = N->getNumTypes(); i != e; ++i)
+    ResultVTs.push_back(N->getType(i));
+
+  // If this is the root instruction of a pattern that has physical registers in
+  // its result pattern, add output VTs for them.  For example, X86 has:
+  //   (set AL, (mul ...))
+  // This also handles implicit results like:
+  //   (implicit EFLAGS)
+  if (isRoot && !Pattern.getDstRegs().empty()) {
+    // If the root came from an implicit def in the instruction handling stuff,
+    // don't re-add it.
+    Record *HandledReg = 0;
+    if (II.HasOneImplicitDefWithKnownVT(CGT) != MVT::Other)
+      HandledReg = II.ImplicitDefs[0];
+
+    for (unsigned i = 0; i != Pattern.getDstRegs().size(); ++i) {
+      Record *Reg = Pattern.getDstRegs()[i];
+      if (!Reg->isSubClassOf("Register") || Reg == HandledReg) continue;
+      ResultVTs.push_back(getRegisterValueType(Reg, CGT));
+    }
+  }
+
+  // If this is the root of the pattern and the pattern we're matching includes
+  // a node that is variadic, mark the generated node as variadic so that it
+  // gets the excess operands from the input DAG.
+  int NumFixedArityOperands = -1;
+  if (isRoot &&
+      (Pattern.getSrcPattern()->NodeHasProperty(SDNPVariadic, CGP)))
+    NumFixedArityOperands = Pattern.getSrcPattern()->getNumChildren();
+
+  // If this is the root node and any of the nodes matched nodes in the input
+  // pattern have MemRefs in them, have the interpreter collect them and plop
+  // them onto this node.
+  //
+  // FIXME3: This is actively incorrect for result patterns where the root of
+  // the pattern is not the memory reference and is also incorrect when the
+  // result pattern has multiple memory-referencing instructions.  For example,
+  // in the X86 backend, this pattern causes the memrefs to get attached to the
+  // CVTSS2SDrr instead of the MOVSSrm:
+  //
+  //  def : Pat<(extloadf32 addr:$src),
+  //            (CVTSS2SDrr (MOVSSrm addr:$src))>;
+  //
+  bool NodeHasMemRefs =
+    isRoot && Pattern.getSrcPattern()->TreeHasProperty(SDNPMemOperand, CGP);
+
+  assert((!ResultVTs.empty() || TreeHasOutGlue || NodeHasChain) &&
+         "Node has no result");
+
+  AddMatcher(new EmitNodeMatcher(II.Namespace+"::"+II.TheDef->getName(),
+                                 ResultVTs.data(), ResultVTs.size(),
+                                 InstOps.data(), InstOps.size(),
+                                 NodeHasChain, TreeHasInGlue, TreeHasOutGlue,
+                                 NodeHasMemRefs, NumFixedArityOperands,
+                                 NextRecordedOperandNo));
+
+  // The non-chain and non-glue results of the newly emitted node get recorded.
+  for (unsigned i = 0, e = ResultVTs.size(); i != e; ++i) {
+    if (ResultVTs[i] == MVT::Other || ResultVTs[i] == MVT::Glue) break;
+    OutputOps.push_back(NextRecordedOperandNo++);
+  }
+}
+
+void MatcherGen::
+EmitResultSDNodeXFormAsOperand(const TreePatternNode *N,
+                               SmallVectorImpl<unsigned> &ResultOps) {
+  assert(N->getOperator()->isSubClassOf("SDNodeXForm") && "Not SDNodeXForm?");
+
+  // Emit the operand.
+  SmallVector<unsigned, 8> InputOps;
+
+  // FIXME2: Could easily generalize this to support multiple inputs and outputs
+  // to the SDNodeXForm.  For now we just support one input and one output like
+  // the old instruction selector.
+  assert(N->getNumChildren() == 1);
+  EmitResultOperand(N->getChild(0), InputOps);
+
+  // The input currently must have produced exactly one result.
+  assert(InputOps.size() == 1 && "Unexpected input to SDNodeXForm");
+
+  AddMatcher(new EmitNodeXFormMatcher(InputOps[0], N->getOperator()));
+  ResultOps.push_back(NextRecordedOperandNo++);
+}
+
+void MatcherGen::EmitResultOperand(const TreePatternNode *N,
+                                   SmallVectorImpl<unsigned> &ResultOps) {
+  // This is something selected from the pattern we matched.
+  if (!N->getName().empty())
+    return EmitResultOfNamedOperand(N, ResultOps);
+
+  if (N->isLeaf())
+    return EmitResultLeafAsOperand(N, ResultOps);
+
+  Record *OpRec = N->getOperator();
+  if (OpRec->isSubClassOf("Instruction"))
+    return EmitResultInstructionAsOperand(N, ResultOps);
+  if (OpRec->isSubClassOf("SDNodeXForm"))
+    return EmitResultSDNodeXFormAsOperand(N, ResultOps);
+  errs() << "Unknown result node to emit code for: " << *N << '\n';
+  throw std::string("Unknown node in result pattern!");
+}
+
+void MatcherGen::EmitResultCode() {
+  // Patterns that match nodes with (potentially multiple) chain inputs have to
+  // merge them together into a token factor.  This informs the generated code
+  // what all the chained nodes are.
+  if (!MatchedChainNodes.empty())
+    AddMatcher(new EmitMergeInputChainsMatcher
+               (MatchedChainNodes.data(), MatchedChainNodes.size()));
+
+  // Codegen the root of the result pattern, capturing the resulting values.
+  SmallVector<unsigned, 8> Ops;
+  EmitResultOperand(Pattern.getDstPattern(), Ops);
+
+  // At this point, we have however many values the result pattern produces.
+  // However, the input pattern might not need all of these.  If there are
+  // excess values at the end (such as implicit defs of condition codes etc)
+  // just lop them off.  This doesn't need to worry about glue or chains, just
+  // explicit results.
+  //
+  unsigned NumSrcResults = Pattern.getSrcPattern()->getNumTypes();
+
+  // If the pattern also has (implicit) results, count them as well.
+  if (!Pattern.getDstRegs().empty()) {
+    // If the root came from an implicit def in the instruction handling stuff,
+    // don't re-add it.
+    Record *HandledReg = 0;
+    const TreePatternNode *DstPat = Pattern.getDstPattern();
+    if (!DstPat->isLeaf() &&DstPat->getOperator()->isSubClassOf("Instruction")){
+      const CodeGenTarget &CGT = CGP.getTargetInfo();
+      CodeGenInstruction &II = CGT.getInstruction(DstPat->getOperator());
+
+      if (II.HasOneImplicitDefWithKnownVT(CGT) != MVT::Other)
+        HandledReg = II.ImplicitDefs[0];
+    }
+
+    for (unsigned i = 0; i != Pattern.getDstRegs().size(); ++i) {
+      Record *Reg = Pattern.getDstRegs()[i];
+      if (!Reg->isSubClassOf("Register") || Reg == HandledReg) continue;
+      ++NumSrcResults;
+    }
+  }
+
+  assert(Ops.size() >= NumSrcResults && "Didn't provide enough results");
+  Ops.resize(NumSrcResults);
+
+  // If the matched pattern covers nodes which define a glue result, emit a node
+  // that tells the matcher about them so that it can update their results.
+  if (!MatchedGlueResultNodes.empty())
+    AddMatcher(new MarkGlueResultsMatcher(MatchedGlueResultNodes.data(),
+                                          MatchedGlueResultNodes.size()));
+
+  AddMatcher(new CompleteMatchMatcher(Ops.data(), Ops.size(), Pattern));
+}
+
+
+/// ConvertPatternToMatcher - Create the matcher for the specified pattern with
+/// the specified variant.  If the variant number is invalid, this returns null.
+Matcher *llvm::ConvertPatternToMatcher(const PatternToMatch &Pattern,
+                                       unsigned Variant,
+                                       const CodeGenDAGPatterns &CGP) {
+  MatcherGen Gen(Pattern, CGP);
+
+  // Generate the code for the matcher.
+  if (Gen.EmitMatcherCode(Variant))
+    return 0;
+
+  // FIXME2: Kill extra MoveParent commands at the end of the matcher sequence.
+  // FIXME2: Split result code out to another table, and make the matcher end
+  // with an "Emit <index>" command.  This allows result generation stuff to be
+  // shared and factored?
+
+  // If the match succeeds, then we generate Pattern.
+  Gen.EmitResultCode();
+
+  // Unconditional match.
+  return Gen.GetMatcher();
+}
diff --git a/final/utils/TableGen/DAGISelMatcherOpt.cpp b/final/utils/TableGen/DAGISelMatcherOpt.cpp
new file mode 100644
index 00000000000..3169ea1e16a
--- /dev/null
+++ b/final/utils/TableGen/DAGISelMatcherOpt.cpp
@@ -0,0 +1,514 @@
+//===- DAGISelMatcherOpt.cpp - Optimize a DAG Matcher ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the DAG Matcher optimizer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "isel-opt"
+#include "DAGISelMatcher.h"
+#include "CodeGenDAGPatterns.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+using namespace llvm;
+
+/// ContractNodes - Turn multiple matcher node patterns like 'MoveChild+Record'
+/// into single compound nodes like RecordChild.
+static void ContractNodes(OwningPtr<Matcher> &MatcherPtr,
+                          const CodeGenDAGPatterns &CGP) {
+  // If we reached the end of the chain, we're done.
+  Matcher *N = MatcherPtr.get();
+  if (N == 0) return;
+  
+  // If we have a scope node, walk down all of the children.
+  if (ScopeMatcher *Scope = dyn_cast<ScopeMatcher>(N)) {
+    for (unsigned i = 0, e = Scope->getNumChildren(); i != e; ++i) {
+      OwningPtr<Matcher> Child(Scope->takeChild(i));
+      ContractNodes(Child, CGP);
+      Scope->resetChild(i, Child.take());
+    }
+    return;
+  }
+  
+  // If we found a movechild node with a node that comes in a 'foochild' form,
+  // transform it.
+  if (MoveChildMatcher *MC = dyn_cast<MoveChildMatcher>(N)) {
+    Matcher *New = 0;
+    if (RecordMatcher *RM = dyn_cast<RecordMatcher>(MC->getNext()))
+      if (MC->getChildNo() < 8)  // Only have RecordChild0...7
+        New = new RecordChildMatcher(MC->getChildNo(), RM->getWhatFor(),
+                                     RM->getResultNo());
+    
+    if (CheckTypeMatcher *CT = dyn_cast<CheckTypeMatcher>(MC->getNext()))
+      if (MC->getChildNo() < 8 &&  // Only have CheckChildType0...7
+          CT->getResNo() == 0)     // CheckChildType checks res #0
+        New = new CheckChildTypeMatcher(MC->getChildNo(), CT->getType());
+    
+    if (New) {
+      // Insert the new node.
+      New->setNext(MatcherPtr.take());
+      MatcherPtr.reset(New);
+      // Remove the old one.
+      MC->setNext(MC->getNext()->takeNext());
+      return ContractNodes(MatcherPtr, CGP);
+    }
+  }
+  
+  // Zap movechild -> moveparent.
+  if (MoveChildMatcher *MC = dyn_cast<MoveChildMatcher>(N))
+    if (MoveParentMatcher *MP = 
+          dyn_cast<MoveParentMatcher>(MC->getNext())) {
+      MatcherPtr.reset(MP->takeNext());
+      return ContractNodes(MatcherPtr, CGP);
+    }
+
+  // Turn EmitNode->MarkFlagResults->CompleteMatch into
+  // MarkFlagResults->EmitNode->CompleteMatch when we can to encourage
+  // MorphNodeTo formation.  This is safe because MarkFlagResults never refers
+  // to the root of the pattern.
+  if (isa<EmitNodeMatcher>(N) && isa<MarkGlueResultsMatcher>(N->getNext()) &&
+      isa<CompleteMatchMatcher>(N->getNext()->getNext())) {
+    // Unlink the two nodes from the list.
+    Matcher *EmitNode = MatcherPtr.take();
+    Matcher *MFR = EmitNode->takeNext();
+    Matcher *Tail = MFR->takeNext();
+        
+    // Relink them.
+    MatcherPtr.reset(MFR);
+    MFR->setNext(EmitNode);
+    EmitNode->setNext(Tail);
+    return ContractNodes(MatcherPtr, CGP);
+  }
+
+  // Turn EmitNode->CompleteMatch into MorphNodeTo if we can.
+  if (EmitNodeMatcher *EN = dyn_cast<EmitNodeMatcher>(N))
+    if (CompleteMatchMatcher *CM =
+          dyn_cast<CompleteMatchMatcher>(EN->getNext())) {
+      // We can only use MorphNodeTo if the result values match up.
+      unsigned RootResultFirst = EN->getFirstResultSlot();
+      bool ResultsMatch = true;
+      for (unsigned i = 0, e = CM->getNumResults(); i != e; ++i)
+        if (CM->getResult(i) != RootResultFirst+i)
+          ResultsMatch = false;
+      
+      // If the selected node defines a subset of the glue/chain results, we
+      // can't use MorphNodeTo.  For example, we can't use MorphNodeTo if the
+      // matched pattern has a chain but the root node doesn't.
+      const PatternToMatch &Pattern = CM->getPattern();
+      
+      if (!EN->hasChain() &&
+          Pattern.getSrcPattern()->NodeHasProperty(SDNPHasChain, CGP))
+        ResultsMatch = false;
+
+      // If the matched node has glue and the output root doesn't, we can't
+      // use MorphNodeTo.
+      //
+      // NOTE: Strictly speaking, we don't have to check for glue here
+      // because the code in the pattern generator doesn't handle it right.  We
+      // do it anyway for thoroughness.
+      if (!EN->hasOutFlag() &&
+          Pattern.getSrcPattern()->NodeHasProperty(SDNPOutGlue, CGP))
+        ResultsMatch = false;
+      
+      
+      // If the root result node defines more results than the source root node
+      // *and* has a chain or glue input, then we can't match it because it
+      // would end up replacing the extra result with the chain/glue.
+#if 0
+      if ((EN->hasGlue() || EN->hasChain()) &&
+          EN->getNumNonChainGlueVTs() > ... need to get no results reliably ...)
+        ResultMatch = false;
+#endif
+          
+      if (ResultsMatch) {
+        const SmallVectorImpl<MVT::SimpleValueType> &VTs = EN->getVTList();
+        const SmallVectorImpl<unsigned> &Operands = EN->getOperandList();
+        MatcherPtr.reset(new MorphNodeToMatcher(EN->getOpcodeName(),
+                                                VTs.data(), VTs.size(),
+                                                Operands.data(),Operands.size(),
+                                                EN->hasChain(), EN->hasInFlag(),
+                                                EN->hasOutFlag(),
+                                                EN->hasMemRefs(),
+                                                EN->getNumFixedArityOperands(),
+                                                Pattern));
+        return;
+      }
+
+      // FIXME2: Kill off all the SelectionDAG::SelectNodeTo and getMachineNode
+      // variants.
+    }
+  
+  ContractNodes(N->getNextPtr(), CGP);
+  
+  
+  // If we have a CheckType/CheckChildType/Record node followed by a
+  // CheckOpcode, invert the two nodes.  We prefer to do structural checks
+  // before type checks, as this opens opportunities for factoring on targets
+  // like X86 where many operations are valid on multiple types.
+  if ((isa<CheckTypeMatcher>(N) || isa<CheckChildTypeMatcher>(N) ||
+       isa<RecordMatcher>(N)) &&
+      isa<CheckOpcodeMatcher>(N->getNext())) {
+    // Unlink the two nodes from the list.
+    Matcher *CheckType = MatcherPtr.take();
+    Matcher *CheckOpcode = CheckType->takeNext();
+    Matcher *Tail = CheckOpcode->takeNext();
+    
+    // Relink them.
+    MatcherPtr.reset(CheckOpcode);
+    CheckOpcode->setNext(CheckType);
+    CheckType->setNext(Tail);
+    return ContractNodes(MatcherPtr, CGP);
+  }
+}
+
+/// SinkPatternPredicates - Pattern predicates can be checked at any level of
+/// the matching tree.  The generator dumps them at the top level of the pattern
+/// though, which prevents factoring from being able to see past them.  This
+/// optimization sinks them as far down into the pattern as possible.
+///
+/// Conceptually, we'd like to sink these predicates all the way to the last
+/// matcher predicate in the series.  However, it turns out that some
+/// ComplexPatterns have side effects on the graph, so we really don't want to
+/// run a the complex pattern if the pattern predicate will fail.  For this
+/// reason, we refuse to sink the pattern predicate past a ComplexPattern.
+///
+static void SinkPatternPredicates(OwningPtr<Matcher> &MatcherPtr) {
+  // Recursively scan for a PatternPredicate.
+  // If we reached the end of the chain, we're done.
+  Matcher *N = MatcherPtr.get();
+  if (N == 0) return;
+  
+  // Walk down all members of a scope node.
+  if (ScopeMatcher *Scope = dyn_cast<ScopeMatcher>(N)) {
+    for (unsigned i = 0, e = Scope->getNumChildren(); i != e; ++i) {
+      OwningPtr<Matcher> Child(Scope->takeChild(i));
+      SinkPatternPredicates(Child);
+      Scope->resetChild(i, Child.take());
+    }
+    return;
+  }
+  
+  // If this node isn't a CheckPatternPredicateMatcher we keep scanning until
+  // we find one.
+  CheckPatternPredicateMatcher *CPPM =dyn_cast<CheckPatternPredicateMatcher>(N);
+  if (CPPM == 0)
+    return SinkPatternPredicates(N->getNextPtr());
+  
+  // Ok, we found one, lets try to sink it. Check if we can sink it past the
+  // next node in the chain.  If not, we won't be able to change anything and
+  // might as well bail.
+  if (!CPPM->getNext()->isSafeToReorderWithPatternPredicate())
+    return;
+  
+  // Okay, we know we can sink it past at least one node.  Unlink it from the
+  // chain and scan for the new insertion point.
+  MatcherPtr.take();  // Don't delete CPPM.
+  MatcherPtr.reset(CPPM->takeNext());
+  
+  N = MatcherPtr.get();
+  while (N->getNext()->isSafeToReorderWithPatternPredicate())
+    N = N->getNext();
+  
+  // At this point, we want to insert CPPM after N.
+  CPPM->setNext(N->takeNext());
+  N->setNext(CPPM);
+}
+
+/// FindNodeWithKind - Scan a series of matchers looking for a matcher with a
+/// specified kind.  Return null if we didn't find one otherwise return the
+/// matcher.
+static Matcher *FindNodeWithKind(Matcher *M, Matcher::KindTy Kind) {
+  for (; M; M = M->getNext())
+    if (M->getKind() == Kind)
+      return M;
+  return 0;
+}
+
+
+/// FactorNodes - Turn matches like this:
+///   Scope
+///     OPC_CheckType i32
+///       ABC
+///     OPC_CheckType i32
+///       XYZ
+/// into:
+///   OPC_CheckType i32
+///     Scope
+///       ABC
+///       XYZ
+///
+static void FactorNodes(OwningPtr<Matcher> &MatcherPtr) {
+  // If we reached the end of the chain, we're done.
+  Matcher *N = MatcherPtr.get();
+  if (N == 0) return;
+  
+  // If this is not a push node, just scan for one.
+  ScopeMatcher *Scope = dyn_cast<ScopeMatcher>(N);
+  if (Scope == 0)
+    return FactorNodes(N->getNextPtr());
+  
+  // Okay, pull together the children of the scope node into a vector so we can
+  // inspect it more easily.  While we're at it, bucket them up by the hash
+  // code of their first predicate.
+  SmallVector<Matcher*, 32> OptionsToMatch;
+  
+  for (unsigned i = 0, e = Scope->getNumChildren(); i != e; ++i) {
+    // Factor the subexpression.
+    OwningPtr<Matcher> Child(Scope->takeChild(i));
+    FactorNodes(Child);
+    
+    if (Matcher *N = Child.take())
+      OptionsToMatch.push_back(N);
+  }
+  
+  SmallVector<Matcher*, 32> NewOptionsToMatch;
+  
+  // Loop over options to match, merging neighboring patterns with identical
+  // starting nodes into a shared matcher.
+  for (unsigned OptionIdx = 0, e = OptionsToMatch.size(); OptionIdx != e;) {
+    // Find the set of matchers that start with this node.
+    Matcher *Optn = OptionsToMatch[OptionIdx++];
+
+    if (OptionIdx == e) {
+      NewOptionsToMatch.push_back(Optn);
+      continue;
+    }
+    
+    // See if the next option starts with the same matcher.  If the two
+    // neighbors *do* start with the same matcher, we can factor the matcher out
+    // of at least these two patterns.  See what the maximal set we can merge
+    // together is.
+    SmallVector<Matcher*, 8> EqualMatchers;
+    EqualMatchers.push_back(Optn);
+    
+    // Factor all of the known-equal matchers after this one into the same
+    // group.
+    while (OptionIdx != e && OptionsToMatch[OptionIdx]->isEqual(Optn))
+      EqualMatchers.push_back(OptionsToMatch[OptionIdx++]);
+
+    // If we found a non-equal matcher, see if it is contradictory with the
+    // current node.  If so, we know that the ordering relation between the
+    // current sets of nodes and this node don't matter.  Look past it to see if
+    // we can merge anything else into this matching group.
+    unsigned Scan = OptionIdx;
+    while (1) {
+      // If we ran out of stuff to scan, we're done.
+      if (Scan == e) break;
+      
+      Matcher *ScanMatcher = OptionsToMatch[Scan];
+      
+      // If we found an entry that matches out matcher, merge it into the set to
+      // handle.
+      if (Optn->isEqual(ScanMatcher)) {
+        // If is equal after all, add the option to EqualMatchers and remove it
+        // from OptionsToMatch.
+        EqualMatchers.push_back(ScanMatcher);
+        OptionsToMatch.erase(OptionsToMatch.begin()+Scan);
+        --e;
+        continue;
+      }
+      
+      // If the option we're checking for contradicts the start of the list,
+      // skip over it.
+      if (Optn->isContradictory(ScanMatcher)) {
+        ++Scan;
+        continue;
+      }
+
+      // If we're scanning for a simple node, see if it occurs later in the
+      // sequence.  If so, and if we can move it up, it might be contradictory
+      // or the same as what we're looking for.  If so, reorder it.
+      if (Optn->isSimplePredicateOrRecordNode()) {
+        Matcher *M2 = FindNodeWithKind(ScanMatcher, Optn->getKind());
+        if (M2 != 0 && M2 != ScanMatcher &&
+            M2->canMoveBefore(ScanMatcher) &&
+            (M2->isEqual(Optn) || M2->isContradictory(Optn))) {
+          Matcher *MatcherWithoutM2 = ScanMatcher->unlinkNode(M2);
+          M2->setNext(MatcherWithoutM2);
+          OptionsToMatch[Scan] = M2;
+          continue;
+        }
+      }
+      
+      // Otherwise, we don't know how to handle this entry, we have to bail.
+      break;
+    }
+      
+    if (Scan != e &&
+        // Don't print it's obvious nothing extra could be merged anyway.
+        Scan+1 != e) {
+      DEBUG(errs() << "Couldn't merge this:\n";
+            Optn->print(errs(), 4);
+            errs() << "into this:\n";
+            OptionsToMatch[Scan]->print(errs(), 4);
+            if (Scan+1 != e)
+              OptionsToMatch[Scan+1]->printOne(errs());
+            if (Scan+2 < e)
+              OptionsToMatch[Scan+2]->printOne(errs());
+            errs() << "\n");
+    }
+    
+    // If we only found one option starting with this matcher, no factoring is
+    // possible.
+    if (EqualMatchers.size() == 1) {
+      NewOptionsToMatch.push_back(EqualMatchers[0]);
+      continue;
+    }
+    
+    // Factor these checks by pulling the first node off each entry and
+    // discarding it.  Take the first one off the first entry to reuse.
+    Matcher *Shared = Optn;
+    Optn = Optn->takeNext();
+    EqualMatchers[0] = Optn;
+
+    // Remove and delete the first node from the other matchers we're factoring.
+    for (unsigned i = 1, e = EqualMatchers.size(); i != e; ++i) {
+      Matcher *Tmp = EqualMatchers[i]->takeNext();
+      delete EqualMatchers[i];
+      EqualMatchers[i] = Tmp;
+    }
+    
+    Shared->setNext(new ScopeMatcher(&EqualMatchers[0], EqualMatchers.size()));
+
+    // Recursively factor the newly created node.
+    FactorNodes(Shared->getNextPtr());
+    
+    NewOptionsToMatch.push_back(Shared);
+  }
+  
+  // If we're down to a single pattern to match, then we don't need this scope
+  // anymore.
+  if (NewOptionsToMatch.size() == 1) {
+    MatcherPtr.reset(NewOptionsToMatch[0]);
+    return;
+  }
+  
+  if (NewOptionsToMatch.empty()) {
+    MatcherPtr.reset(0);
+    return;
+  }
+  
+  // If our factoring failed (didn't achieve anything) see if we can simplify in
+  // other ways.
+  
+  // Check to see if all of the leading entries are now opcode checks.  If so,
+  // we can convert this Scope to be a OpcodeSwitch instead.
+  bool AllOpcodeChecks = true, AllTypeChecks = true;
+  for (unsigned i = 0, e = NewOptionsToMatch.size(); i != e; ++i) {
+    // Check to see if this breaks a series of CheckOpcodeMatchers.
+    if (AllOpcodeChecks &&
+        !isa<CheckOpcodeMatcher>(NewOptionsToMatch[i])) {
+#if 0
+      if (i > 3) {
+        errs() << "FAILING OPC #" << i << "\n";
+        NewOptionsToMatch[i]->dump();
+      }
+#endif
+      AllOpcodeChecks = false;
+    }
+
+    // Check to see if this breaks a series of CheckTypeMatcher's.
+    if (AllTypeChecks) {
+      CheckTypeMatcher *CTM =
+        cast_or_null<CheckTypeMatcher>(FindNodeWithKind(NewOptionsToMatch[i],
+                                                        Matcher::CheckType));
+      if (CTM == 0 ||
+          // iPTR checks could alias any other case without us knowing, don't
+          // bother with them.
+          CTM->getType() == MVT::iPTR ||
+          // SwitchType only works for result #0.
+          CTM->getResNo() != 0 ||
+          // If the CheckType isn't at the start of the list, see if we can move
+          // it there.
+          !CTM->canMoveBefore(NewOptionsToMatch[i])) {
+#if 0
+        if (i > 3 && AllTypeChecks) {
+          errs() << "FAILING TYPE #" << i << "\n";
+          NewOptionsToMatch[i]->dump();
+        }
+#endif
+        AllTypeChecks = false;
+      }
+    }
+  }
+  
+  // If all the options are CheckOpcode's, we can form the SwitchOpcode, woot.
+  if (AllOpcodeChecks) {
+    StringSet<> Opcodes;
+    SmallVector<std::pair<const SDNodeInfo*, Matcher*>, 8> Cases;
+    for (unsigned i = 0, e = NewOptionsToMatch.size(); i != e; ++i) {
+      CheckOpcodeMatcher *COM = cast<CheckOpcodeMatcher>(NewOptionsToMatch[i]);
+      assert(Opcodes.insert(COM->getOpcode().getEnumName()) &&
+             "Duplicate opcodes not factored?");
+      Cases.push_back(std::make_pair(&COM->getOpcode(), COM->getNext()));
+    }
+    
+    MatcherPtr.reset(new SwitchOpcodeMatcher(&Cases[0], Cases.size()));
+    return;
+  }
+  
+  // If all the options are CheckType's, we can form the SwitchType, woot.
+  if (AllTypeChecks) {
+    DenseMap<unsigned, unsigned> TypeEntry;
+    SmallVector<std::pair<MVT::SimpleValueType, Matcher*>, 8> Cases;
+    for (unsigned i = 0, e = NewOptionsToMatch.size(); i != e; ++i) {
+      CheckTypeMatcher *CTM =
+        cast_or_null<CheckTypeMatcher>(FindNodeWithKind(NewOptionsToMatch[i],
+                                                        Matcher::CheckType));
+      Matcher *MatcherWithoutCTM = NewOptionsToMatch[i]->unlinkNode(CTM);
+      MVT::SimpleValueType CTMTy = CTM->getType();
+      delete CTM;
+      
+      unsigned &Entry = TypeEntry[CTMTy];
+      if (Entry != 0) {
+        // If we have unfactored duplicate types, then we should factor them.
+        Matcher *PrevMatcher = Cases[Entry-1].second;
+        if (ScopeMatcher *SM = dyn_cast<ScopeMatcher>(PrevMatcher)) {
+          SM->setNumChildren(SM->getNumChildren()+1);
+          SM->resetChild(SM->getNumChildren()-1, MatcherWithoutCTM);
+          continue;
+        }
+        
+        Matcher *Entries[2] = { PrevMatcher, MatcherWithoutCTM };
+        Cases[Entry-1].second = new ScopeMatcher(Entries, 2);
+        continue;
+      }
+      
+      Entry = Cases.size()+1;
+      Cases.push_back(std::make_pair(CTMTy, MatcherWithoutCTM));
+    }
+    
+    if (Cases.size() != 1) {
+      MatcherPtr.reset(new SwitchTypeMatcher(&Cases[0], Cases.size()));
+    } else {
+      // If we factored and ended up with one case, create it now.
+      MatcherPtr.reset(new CheckTypeMatcher(Cases[0].first, 0));
+      MatcherPtr->setNext(Cases[0].second);
+    }
+    return;
+  }
+  
+
+  // Reassemble the Scope node with the adjusted children.
+  Scope->setNumChildren(NewOptionsToMatch.size());
+  for (unsigned i = 0, e = NewOptionsToMatch.size(); i != e; ++i)
+    Scope->resetChild(i, NewOptionsToMatch[i]);
+}
+
+Matcher *llvm::OptimizeMatcher(Matcher *TheMatcher,
+                               const CodeGenDAGPatterns &CGP) {
+  OwningPtr<Matcher> MatcherPtr(TheMatcher);
+  ContractNodes(MatcherPtr, CGP);
+  SinkPatternPredicates(MatcherPtr);
+  FactorNodes(MatcherPtr);
+  return MatcherPtr.take();
+}
diff --git a/final/utils/TableGen/DisassemblerEmitter.cpp b/final/utils/TableGen/DisassemblerEmitter.cpp
new file mode 100644
index 00000000000..90a2af21f3a
--- /dev/null
+++ b/final/utils/TableGen/DisassemblerEmitter.cpp
@@ -0,0 +1,138 @@
+//===- DisassemblerEmitter.cpp - Generate a disassembler ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DisassemblerEmitter.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+#include "X86DisassemblerTables.h"
+#include "X86RecognizableInstr.h"
+#include "ARMDecoderEmitter.h"
+#include "FixedLenDecoderEmitter.h"
+
+using namespace llvm;
+using namespace llvm::X86Disassembler;
+
+/// DisassemblerEmitter - Contains disassembler table emitters for various
+/// architectures.
+
+/// X86 Disassembler Emitter
+///
+/// *** IF YOU'RE HERE TO RESOLVE A "Primary decode conflict", LOOK DOWN NEAR
+///     THE END OF THIS COMMENT!
+///
+/// The X86 disassembler emitter is part of the X86 Disassembler, which is
+/// documented in lib/Target/X86/X86Disassembler.h.
+///
+/// The emitter produces the tables that the disassembler uses to translate
+/// instructions.  The emitter generates the following tables:
+///
+/// - One table (CONTEXTS_SYM) that contains a mapping of attribute masks to
+///   instruction contexts.  Although for each attribute there are cases where
+///   that attribute determines decoding, in the majority of cases decoding is
+///   the same whether or not an attribute is present.  For example, a 64-bit
+///   instruction with an OPSIZE prefix and an XS prefix decodes the same way in
+///   all cases as a 64-bit instruction with only OPSIZE set.  (The XS prefix
+///   may have effects on its execution, but does not change the instruction
+///   returned.)  This allows considerable space savings in other tables.
+/// - Four tables (ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, and
+///   THREEBYTE3A_SYM) contain the hierarchy that the decoder traverses while
+///   decoding an instruction.  At the lowest level of this hierarchy are
+///   instruction UIDs, 16-bit integers that can be used to uniquely identify
+///   the instruction and correspond exactly to its position in the list of
+///   CodeGenInstructions for the target.
+/// - One table (INSTRUCTIONS_SYM) contains information about the operands of
+///   each instruction and how to decode them.
+///
+/// During table generation, there may be conflicts between instructions that
+/// occupy the same space in the decode tables.  These conflicts are resolved as
+/// follows in setTableFields() (X86DisassemblerTables.cpp)
+///
+/// - If the current context is the native context for one of the instructions
+///   (that is, the attributes specified for it in the LLVM tables specify
+///   precisely the current context), then it has priority.
+/// - If the current context isn't native for either of the instructions, then
+///   the higher-priority context wins (that is, the one that is more specific).
+///   That hierarchy is determined by outranks() (X86DisassemblerTables.cpp)
+/// - If the current context is native for both instructions, then the table
+///   emitter reports a conflict and dies.
+///
+/// *** RESOLUTION FOR "Primary decode conflict"S
+///
+/// If two instructions collide, typically the solution is (in order of
+/// likelihood):
+///
+/// (1) to filter out one of the instructions by editing filter()
+///     (X86RecognizableInstr.cpp).  This is the most common resolution, but
+///     check the Intel manuals first to make sure that (2) and (3) are not the
+///     problem.
+/// (2) to fix the tables (X86.td and its subsidiaries) so the opcodes are
+///     accurate.  Sometimes they are not.
+/// (3) to fix the tables to reflect the actual context (for example, required
+///     prefixes), and possibly to add a new context by editing
+///     lib/Target/X86/X86DisassemblerDecoderCommon.h.  This is unlikely to be
+///     the cause.
+///
+/// DisassemblerEmitter.cpp contains the implementation for the emitter,
+///   which simply pulls out instructions from the CodeGenTarget and pushes them
+///   into X86DisassemblerTables.
+/// X86DisassemblerTables.h contains the interface for the instruction tables,
+///   which manage and emit the structures discussed above.
+/// X86DisassemblerTables.cpp contains the implementation for the instruction
+///   tables.
+/// X86ModRMFilters.h contains filters that can be used to determine which
+///   ModR/M values are valid for a particular instruction.  These are used to
+///   populate ModRMDecisions.
+/// X86RecognizableInstr.h contains the interface for a single instruction,
+///   which knows how to translate itself from a CodeGenInstruction and provide
+///   the information necessary for integration into the tables.
+/// X86RecognizableInstr.cpp contains the implementation for a single
+///   instruction.
+
+void DisassemblerEmitter::run(raw_ostream &OS) {
+  CodeGenTarget Target(Records);
+
+  OS << "/*===- TableGen'erated file "
+     << "---------------------------------------*- C -*-===*\n"
+     << " *\n"
+     << " * " << Target.getName() << " Disassembler\n"
+     << " *\n"
+     << " * Automatically generated file, do not edit!\n"
+     << " *\n"
+     << " *===---------------------------------------------------------------"
+     << "-------===*/\n";
+
+  // X86 uses a custom disassembler.
+  if (Target.getName() == "X86") {
+    DisassemblerTables Tables;
+  
+    const std::vector<const CodeGenInstruction*> &numberedInstructions =
+      Target.getInstructionsByEnumValue();
+    
+    for (unsigned i = 0, e = numberedInstructions.size(); i != e; ++i)
+      RecognizableInstr::processInstr(Tables, *numberedInstructions[i], i);
+
+    // FIXME: As long as we are using exceptions, might as well drop this to the
+    // actual conflict site.
+    if (Tables.hasConflicts())
+      throw TGError(Target.getTargetRecord()->getLoc(),
+                    "Primary decode conflict");
+
+    Tables.emit(OS);
+    return;
+  }
+
+  // Fixed-instruction-length targets use a common disassembler.
+  // ARM use its own implementation for now.
+  if (Target.getName() == "ARM") {
+    ARMDecoderEmitter(Records).run(OS);
+    return;
+  }  
+
+  FixedLenDecoderEmitter(Records).run(OS);
+}
diff --git a/final/utils/TableGen/DisassemblerEmitter.h b/final/utils/TableGen/DisassemblerEmitter.h
new file mode 100644
index 00000000000..7229d81649e
--- /dev/null
+++ b/final/utils/TableGen/DisassemblerEmitter.h
@@ -0,0 +1,28 @@
+//===- DisassemblerEmitter.h - Disassembler Generator -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef DISASSEMBLEREMITTER_H
+#define DISASSEMBLEREMITTER_H
+
+#include "TableGenBackend.h"
+
+namespace llvm {
+
+  class DisassemblerEmitter : public TableGenBackend {
+    RecordKeeper &Records;
+  public:
+    DisassemblerEmitter(RecordKeeper &R) : Records(R) {}
+
+    /// run - Output the disassembler.
+    void run(raw_ostream &o);
+  };
+
+} // end llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/EDEmitter.cpp b/final/utils/TableGen/EDEmitter.cpp
new file mode 100644
index 00000000000..e171da02a2f
--- /dev/null
+++ b/final/utils/TableGen/EDEmitter.cpp
@@ -0,0 +1,920 @@
+//===- EDEmitter.cpp - Generate instruction descriptions for ED -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting a description of each
+// instruction in a format that the enhanced disassembler can use to tokenize
+// and parse instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "EDEmitter.h"
+
+#include "AsmWriterInst.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+
+#include "llvm/MC/EDInstInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <map>
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+///////////////////////////////////////////////////////////
+// Support classes for emitting nested C data structures //
+///////////////////////////////////////////////////////////
+
+namespace {
+
+  class EnumEmitter {
+  private:
+    std::string Name;
+    std::vector<std::string> Entries;
+  public:
+    EnumEmitter(const char *N) : Name(N) {
+    }
+    int addEntry(const char *e) {
+      Entries.push_back(std::string(e));
+      return Entries.size() - 1;
+    }
+    void emit(raw_ostream &o, unsigned int &i) {
+      o.indent(i) << "enum " << Name.c_str() << " {" << "\n";
+      i += 2;
+
+      unsigned int index = 0;
+      unsigned int numEntries = Entries.size();
+      for (index = 0; index < numEntries; ++index) {
+        o.indent(i) << Entries[index];
+        if (index < (numEntries - 1))
+          o << ",";
+        o << "\n";
+      }
+
+      i -= 2;
+      o.indent(i) << "};" << "\n";
+    }
+
+    void emitAsFlags(raw_ostream &o, unsigned int &i) {
+      o.indent(i) << "enum " << Name.c_str() << " {" << "\n";
+      i += 2;
+
+      unsigned int index = 0;
+      unsigned int numEntries = Entries.size();
+      unsigned int flag = 1;
+      for (index = 0; index < numEntries; ++index) {
+        o.indent(i) << Entries[index] << " = " << format("0x%x", flag);
+        if (index < (numEntries - 1))
+          o << ",";
+        o << "\n";
+        flag <<= 1;
+      }
+
+      i -= 2;
+      o.indent(i) << "};" << "\n";
+    }
+  };
+
+  class ConstantEmitter {
+  public:
+    virtual ~ConstantEmitter() { }
+    virtual void emit(raw_ostream &o, unsigned int &i) = 0;
+  };
+
+  class LiteralConstantEmitter : public ConstantEmitter {
+  private:
+    bool IsNumber;
+    union {
+      int Number;
+      const char* String;
+    };
+  public:
+    LiteralConstantEmitter(int number = 0) :
+      IsNumber(true),
+      Number(number) {
+    }
+    void set(const char *string) {
+      IsNumber = false;
+      Number = 0;
+      String = string;
+    }
+    bool is(const char *string) {
+      return !strcmp(String, string);
+    }
+    void emit(raw_ostream &o, unsigned int &i) {
+      if (IsNumber)
+        o << Number;
+      else
+        o << String;
+    }
+  };
+
+  class CompoundConstantEmitter : public ConstantEmitter {
+  private:
+    unsigned int Padding;
+    std::vector<ConstantEmitter *> Entries;
+  public:
+    CompoundConstantEmitter(unsigned int padding = 0) : Padding(padding) {
+    }
+    CompoundConstantEmitter &addEntry(ConstantEmitter *e) {
+      Entries.push_back(e);
+
+      return *this;
+    }
+    ~CompoundConstantEmitter() {
+      while (Entries.size()) {
+        ConstantEmitter *entry = Entries.back();
+        Entries.pop_back();
+        delete entry;
+      }
+    }
+    void emit(raw_ostream &o, unsigned int &i) {
+      o << "{" << "\n";
+      i += 2;
+
+      unsigned int index;
+      unsigned int numEntries = Entries.size();
+
+      unsigned int numToPrint;
+
+      if (Padding) {
+        if (numEntries > Padding) {
+          fprintf(stderr, "%u entries but %u padding\n", numEntries, Padding);
+          llvm_unreachable("More entries than padding");
+        }
+        numToPrint = Padding;
+      } else {
+        numToPrint = numEntries;
+      }
+
+      for (index = 0; index < numToPrint; ++index) {
+        o.indent(i);
+        if (index < numEntries)
+          Entries[index]->emit(o, i);
+        else
+          o << "-1";
+
+        if (index < (numToPrint - 1))
+          o << ",";
+        o << "\n";
+      }
+
+      i -= 2;
+      o.indent(i) << "}";
+    }
+  };
+
+  class FlagsConstantEmitter : public ConstantEmitter {
+  private:
+    std::vector<std::string> Flags;
+  public:
+    FlagsConstantEmitter() {
+    }
+    FlagsConstantEmitter &addEntry(const char *f) {
+      Flags.push_back(std::string(f));
+      return *this;
+    }
+    void emit(raw_ostream &o, unsigned int &i) {
+      unsigned int index;
+      unsigned int numFlags = Flags.size();
+      if (numFlags == 0)
+        o << "0";
+
+      for (index = 0; index < numFlags; ++index) {
+        o << Flags[index].c_str();
+        if (index < (numFlags - 1))
+          o << " | ";
+      }
+    }
+  };
+}
+
+EDEmitter::EDEmitter(RecordKeeper &R) : Records(R) {
+}
+
+/// populateOperandOrder - Accepts a CodeGenInstruction and generates its
+///   AsmWriterInst for the desired assembly syntax, giving an ordered list of
+///   operands in the order they appear in the printed instruction.  Then, for
+///   each entry in that list, determines the index of the same operand in the
+///   CodeGenInstruction, and emits the resulting mapping into an array, filling
+///   in unused slots with -1.
+///
+/// @arg operandOrder - The array that will be populated with the operand
+///                     mapping.  Each entry will contain -1 (invalid index
+///                     into the operands present in the AsmString) or a number
+///                     representing an index in the operand descriptor array.
+/// @arg inst         - The instruction to use when looking up the operands
+/// @arg syntax       - The syntax to use, according to LLVM's enumeration
+void populateOperandOrder(CompoundConstantEmitter *operandOrder,
+                          const CodeGenInstruction &inst,
+                          unsigned syntax) {
+  unsigned int numArgs = 0;
+
+  AsmWriterInst awInst(inst, syntax, -1, -1);
+
+  std::vector<AsmWriterOperand>::iterator operandIterator;
+
+  for (operandIterator = awInst.Operands.begin();
+       operandIterator != awInst.Operands.end();
+       ++operandIterator) {
+    if (operandIterator->OperandType ==
+        AsmWriterOperand::isMachineInstrOperand) {
+      operandOrder->addEntry(
+        new LiteralConstantEmitter(operandIterator->CGIOpNo));
+      numArgs++;
+    }
+  }
+}
+
+/////////////////////////////////////////////////////
+// Support functions for handling X86 instructions //
+/////////////////////////////////////////////////////
+
+#define SET(flag) { type->set(flag); return 0; }
+
+#define REG(str) if (name == str) SET("kOperandTypeRegister");
+#define MEM(str) if (name == str) SET("kOperandTypeX86Memory");
+#define LEA(str) if (name == str) SET("kOperandTypeX86EffectiveAddress");
+#define IMM(str) if (name == str) SET("kOperandTypeImmediate");
+#define PCR(str) if (name == str) SET("kOperandTypeX86PCRelative");
+
+/// X86TypeFromOpName - Processes the name of a single X86 operand (which is
+///   actually its type) and translates it into an operand type
+///
+/// @arg flags    - The type object to set
+/// @arg name     - The name of the operand
+static int X86TypeFromOpName(LiteralConstantEmitter *type,
+                             const std::string &name) {
+  REG("GR8");
+  REG("GR8_NOREX");
+  REG("GR16");
+  REG("GR32");
+  REG("GR32_NOREX");
+  REG("GR32_TC");
+  REG("FR32");
+  REG("RFP32");
+  REG("GR64");
+  REG("GR64_TC");
+  REG("FR64");
+  REG("VR64");
+  REG("RFP64");
+  REG("RFP80");
+  REG("VR128");
+  REG("VR256");
+  REG("RST");
+  REG("SEGMENT_REG");
+  REG("DEBUG_REG");
+  REG("CONTROL_REG");
+
+  IMM("i8imm");
+  IMM("i16imm");
+  IMM("i16i8imm");
+  IMM("i32imm");
+  IMM("i32i8imm");
+  IMM("i64imm");
+  IMM("i64i8imm");
+  IMM("i64i32imm");
+  IMM("SSECC");
+
+  // all R, I, R, I, R
+  MEM("i8mem");
+  MEM("i8mem_NOREX");
+  MEM("i16mem");
+  MEM("i32mem");
+  MEM("i32mem_TC");
+  MEM("f32mem");
+  MEM("ssmem");
+  MEM("opaque32mem");
+  MEM("opaque48mem");
+  MEM("i64mem");
+  MEM("i64mem_TC");
+  MEM("f64mem");
+  MEM("sdmem");
+  MEM("f80mem");
+  MEM("opaque80mem");
+  MEM("i128mem");
+  MEM("i256mem");
+  MEM("f128mem");
+  MEM("f256mem");
+  MEM("opaque512mem");
+
+  // all R, I, R, I
+  LEA("lea32mem");
+  LEA("lea64_32mem");
+  LEA("lea64mem");
+
+  // all I
+  PCR("i16imm_pcrel");
+  PCR("i32imm_pcrel");
+  PCR("i64i32imm_pcrel");
+  PCR("brtarget8");
+  PCR("offset8");
+  PCR("offset16");
+  PCR("offset32");
+  PCR("offset64");
+  PCR("brtarget");
+  PCR("uncondbrtarget");
+  PCR("bltarget");
+
+  // all I, ARM mode only, conditional/unconditional
+  PCR("br_target");
+  PCR("bl_target");
+  return 1;
+}
+
+#undef REG
+#undef MEM
+#undef LEA
+#undef IMM
+#undef PCR
+
+#undef SET
+
+/// X86PopulateOperands - Handles all the operands in an X86 instruction, adding
+///   the appropriate flags to their descriptors
+///
+/// @operandFlags - A reference the array of operand flag objects
+/// @inst         - The instruction to use as a source of information
+static void X86PopulateOperands(
+  LiteralConstantEmitter *(&operandTypes)[EDIS_MAX_OPERANDS],
+  const CodeGenInstruction &inst) {
+  if (!inst.TheDef->isSubClassOf("X86Inst"))
+    return;
+
+  unsigned int index;
+  unsigned int numOperands = inst.Operands.size();
+
+  for (index = 0; index < numOperands; ++index) {
+    const CGIOperandList::OperandInfo &operandInfo = inst.Operands[index];
+    Record &rec = *operandInfo.Rec;
+
+    if (X86TypeFromOpName(operandTypes[index], rec.getName()) &&
+        !rec.isSubClassOf("PointerLikeRegClass")) {
+      errs() << "Operand type: " << rec.getName().c_str() << "\n";
+      errs() << "Operand name: " << operandInfo.Name.c_str() << "\n";
+      errs() << "Instruction name: " << inst.TheDef->getName().c_str() << "\n";
+      llvm_unreachable("Unhandled type");
+    }
+  }
+}
+
+/// decorate1 - Decorates a named operand with a new flag
+///
+/// @operandFlags - The array of operand flag objects, which don't have names
+/// @inst         - The CodeGenInstruction, which provides a way to translate
+///                 between names and operand indices
+/// @opName       - The name of the operand
+/// @flag         - The name of the flag to add
+static inline void decorate1(
+  FlagsConstantEmitter *(&operandFlags)[EDIS_MAX_OPERANDS],
+  const CodeGenInstruction &inst,
+  const char *opName,
+  const char *opFlag) {
+  unsigned opIndex;
+
+  opIndex = inst.Operands.getOperandNamed(std::string(opName));
+
+  operandFlags[opIndex]->addEntry(opFlag);
+}
+
+#define DECORATE1(opName, opFlag) decorate1(operandFlags, inst, opName, opFlag)
+
+#define MOV(source, target) {               \
+  instType.set("kInstructionTypeMove");     \
+  DECORATE1(source, "kOperandFlagSource");  \
+  DECORATE1(target, "kOperandFlagTarget");  \
+}
+
+#define BRANCH(target) {                    \
+  instType.set("kInstructionTypeBranch");   \
+  DECORATE1(target, "kOperandFlagTarget");  \
+}
+
+#define PUSH(source) {                      \
+  instType.set("kInstructionTypePush");     \
+  DECORATE1(source, "kOperandFlagSource");  \
+}
+
+#define POP(target) {                       \
+  instType.set("kInstructionTypePop");      \
+  DECORATE1(target, "kOperandFlagTarget");  \
+}
+
+#define CALL(target) {                      \
+  instType.set("kInstructionTypeCall");     \
+  DECORATE1(target, "kOperandFlagTarget");  \
+}
+
+#define RETURN() {                          \
+  instType.set("kInstructionTypeReturn");   \
+}
+
+/// X86ExtractSemantics - Performs various checks on the name of an X86
+///   instruction to determine what sort of an instruction it is and then adds
+///   the appropriate flags to the instruction and its operands
+///
+/// @arg instType     - A reference to the type for the instruction as a whole
+/// @arg operandFlags - A reference to the array of operand flag object pointers
+/// @arg inst         - A reference to the original instruction
+static void X86ExtractSemantics(
+  LiteralConstantEmitter &instType,
+  FlagsConstantEmitter *(&operandFlags)[EDIS_MAX_OPERANDS],
+  const CodeGenInstruction &inst) {
+  const std::string &name = inst.TheDef->getName();
+
+  if (name.find("MOV") != name.npos) {
+    if (name.find("MOV_V") != name.npos) {
+      // ignore (this is a pseudoinstruction)
+    } else if (name.find("MASK") != name.npos) {
+      // ignore (this is a masking move)
+    } else if (name.find("r0") != name.npos) {
+      // ignore (this is a pseudoinstruction)
+    } else if (name.find("PS") != name.npos ||
+             name.find("PD") != name.npos) {
+      // ignore (this is a shuffling move)
+    } else if (name.find("MOVS") != name.npos) {
+      // ignore (this is a string move)
+    } else if (name.find("_F") != name.npos) {
+      // TODO handle _F moves to ST(0)
+    } else if (name.find("a") != name.npos) {
+      // TODO handle moves to/from %ax
+    } else if (name.find("CMOV") != name.npos) {
+      MOV("src2", "dst");
+    } else if (name.find("PC") != name.npos) {
+      MOV("label", "reg")
+    } else {
+      MOV("src", "dst");
+    }
+  }
+
+  if (name.find("JMP") != name.npos ||
+      name.find("J") == 0) {
+    if (name.find("FAR") != name.npos && name.find("i") != name.npos) {
+      BRANCH("off");
+    } else {
+      BRANCH("dst");
+    }
+  }
+
+  if (name.find("PUSH") != name.npos) {
+    if (name.find("CS") != name.npos ||
+        name.find("DS") != name.npos ||
+        name.find("ES") != name.npos ||
+        name.find("FS") != name.npos ||
+        name.find("GS") != name.npos ||
+        name.find("SS") != name.npos) {
+      instType.set("kInstructionTypePush");
+      // TODO add support for fixed operands
+    } else if (name.find("F") != name.npos) {
+      // ignore (this pushes onto the FP stack)
+    } else if (name.find("A") != name.npos) {
+      // ignore (pushes all GP registoers onto the stack)
+    } else if (name[name.length() - 1] == 'm') {
+      PUSH("src");
+    } else if (name.find("i") != name.npos) {
+      PUSH("imm");
+    } else {
+      PUSH("reg");
+    }
+  }
+
+  if (name.find("POP") != name.npos) {
+    if (name.find("POPCNT") != name.npos) {
+      // ignore (not a real pop)
+    } else if (name.find("CS") != name.npos ||
+               name.find("DS") != name.npos ||
+               name.find("ES") != name.npos ||
+               name.find("FS") != name.npos ||
+               name.find("GS") != name.npos ||
+               name.find("SS") != name.npos) {
+      instType.set("kInstructionTypePop");
+      // TODO add support for fixed operands
+    } else if (name.find("F") != name.npos) {
+      // ignore (this pops from the FP stack)
+    } else if (name.find("A") != name.npos) {
+      // ignore (pushes all GP registoers onto the stack)
+    } else if (name[name.length() - 1] == 'm') {
+      POP("dst");
+    } else {
+      POP("reg");
+    }
+  }
+
+  if (name.find("CALL") != name.npos) {
+    if (name.find("ADJ") != name.npos) {
+      // ignore (not a call)
+    } else if (name.find("SYSCALL") != name.npos) {
+      // ignore (doesn't go anywhere we know about)
+    } else if (name.find("VMCALL") != name.npos) {
+      // ignore (rather different semantics than a regular call)
+    } else if (name.find("FAR") != name.npos && name.find("i") != name.npos) {
+      CALL("off");
+    } else {
+      CALL("dst");
+    }
+  }
+
+  if (name.find("RET") != name.npos) {
+    RETURN();
+  }
+}
+
+#undef MOV
+#undef BRANCH
+#undef PUSH
+#undef POP
+#undef CALL
+#undef RETURN
+
+/////////////////////////////////////////////////////
+// Support functions for handling ARM instructions //
+/////////////////////////////////////////////////////
+
+#define SET(flag) { type->set(flag); return 0; }
+
+#define REG(str)    if (name == str) SET("kOperandTypeRegister");
+#define IMM(str)    if (name == str) SET("kOperandTypeImmediate");
+
+#define MISC(str, type)   if (name == str) SET(type);
+
+/// ARMFlagFromOpName - Processes the name of a single ARM operand (which is
+///   actually its type) and translates it into an operand type
+///
+/// @arg type     - The type object to set
+/// @arg name     - The name of the operand
+static int ARMFlagFromOpName(LiteralConstantEmitter *type,
+                             const std::string &name) {
+  REG("GPR");
+  REG("rGPR");
+  REG("tcGPR");
+  REG("cc_out");
+  REG("s_cc_out");
+  REG("tGPR");
+  REG("DPR");
+  REG("DPR_VFP2");
+  REG("DPR_8");
+  REG("SPR");
+  REG("QPR");
+  REG("QQPR");
+  REG("QQQQPR");
+
+  IMM("i32imm");
+  IMM("i32imm_hilo16");
+  IMM("bf_inv_mask_imm");
+  IMM("lsb_pos_imm");
+  IMM("width_imm");
+  IMM("jtblock_operand");
+  IMM("nohash_imm");
+  IMM("p_imm");
+  IMM("c_imm");
+  IMM("imod_op");
+  IMM("iflags_op");
+  IMM("cpinst_operand");
+  IMM("setend_op");
+  IMM("cps_opt");
+  IMM("vfp_f64imm");
+  IMM("vfp_f32imm");
+  IMM("memb_opt");
+  IMM("msr_mask");
+  IMM("neg_zero");
+  IMM("imm0_31");
+  IMM("imm0_31_m1");
+  IMM("nModImm");
+  IMM("imm0_4095");
+  IMM("jt2block_operand");
+  IMM("t_imm_s4");
+  IMM("pclabel");
+  IMM("adrlabel");
+  IMM("t_adrlabel");
+  IMM("t2adrlabel");
+  IMM("shift_imm");
+  IMM("neon_vcvt_imm32");
+  IMM("shr_imm8");
+  IMM("shr_imm16");
+  IMM("shr_imm32");
+  IMM("shr_imm64");
+
+  MISC("brtarget", "kOperandTypeARMBranchTarget");                // ?
+  MISC("uncondbrtarget", "kOperandTypeARMBranchTarget");           // ?
+  MISC("t_brtarget", "kOperandTypeARMBranchTarget");              // ?
+  MISC("t_bcctarget", "kOperandTypeARMBranchTarget");             // ?
+  MISC("t_cbtarget", "kOperandTypeARMBranchTarget");              // ?
+  MISC("bltarget", "kOperandTypeARMBranchTarget");                // ?
+
+  MISC("br_target", "kOperandTypeARMBranchTarget");                // ?
+  MISC("bl_target", "kOperandTypeARMBranchTarget");                // ?
+
+  MISC("t_bltarget", "kOperandTypeARMBranchTarget");              // ?
+  MISC("t_blxtarget", "kOperandTypeARMBranchTarget");             // ?
+  MISC("so_reg", "kOperandTypeARMSoReg");                         // R, R, I
+  MISC("shift_so_reg", "kOperandTypeARMSoReg");                   // R, R, I
+  MISC("t2_so_reg", "kOperandTypeThumb2SoReg");                   // R, I
+  MISC("so_imm", "kOperandTypeARMSoImm");                         // I
+  MISC("rot_imm", "kOperandTypeARMRotImm");                       // I
+  MISC("t2_so_imm", "kOperandTypeThumb2SoImm");                   // I
+  MISC("so_imm2part", "kOperandTypeARMSoImm2Part");               // I
+  MISC("pred", "kOperandTypeARMPredicate");                       // I, R
+  MISC("it_pred", "kOperandTypeARMPredicate");                    // I
+  MISC("addrmode_imm12", "kOperandTypeAddrModeImm12");            // R, I
+  MISC("ldst_so_reg", "kOperandTypeLdStSOReg");                   // R, R, I
+  MISC("addrmode2", "kOperandTypeARMAddrMode2");                  // R, R, I
+  MISC("am2offset", "kOperandTypeARMAddrMode2Offset");            // R, I
+  MISC("addrmode3", "kOperandTypeARMAddrMode3");                  // R, R, I
+  MISC("am3offset", "kOperandTypeARMAddrMode3Offset");            // R, I
+  MISC("ldstm_mode", "kOperandTypeARMLdStmMode");                 // I
+  MISC("addrmode5", "kOperandTypeARMAddrMode5");                  // R, I
+  MISC("addrmode6", "kOperandTypeARMAddrMode6");                  // R, R, I, I
+  MISC("am6offset", "kOperandTypeARMAddrMode6Offset");            // R, I, I
+  MISC("addrmode6dup", "kOperandTypeARMAddrMode6");               // R, R, I, I
+  MISC("addrmodepc", "kOperandTypeARMAddrModePC");                // R, I
+  MISC("reglist", "kOperandTypeARMRegisterList");                 // I, R, ...
+  MISC("dpr_reglist", "kOperandTypeARMDPRRegisterList");          // I, R, ...
+  MISC("spr_reglist", "kOperandTypeARMSPRRegisterList");          // I, R, ...
+  MISC("it_mask", "kOperandTypeThumbITMask");                     // I
+  MISC("t2addrmode_imm8", "kOperandTypeThumb2AddrModeImm8");      // R, I
+  MISC("t2am_imm8_offset", "kOperandTypeThumb2AddrModeImm8Offset");//I
+  MISC("t2addrmode_imm12", "kOperandTypeThumb2AddrModeImm12");    // R, I
+  MISC("t2addrmode_so_reg", "kOperandTypeThumb2AddrModeSoReg");   // R, R, I
+  MISC("t2addrmode_imm8s4", "kOperandTypeThumb2AddrModeImm8s4");  // R, I
+  MISC("t2am_imm8s4_offset", "kOperandTypeThumb2AddrModeImm8s4Offset");
+                                                                  // R, I
+  MISC("tb_addrmode", "kOperandTypeARMTBAddrMode");               // I
+  MISC("t_addrmode_rrs1", "kOperandTypeThumbAddrModeRegS");       // R, R
+  MISC("t_addrmode_rrs2", "kOperandTypeThumbAddrModeRegS");       // R, R
+  MISC("t_addrmode_rrs4", "kOperandTypeThumbAddrModeRegS");       // R, R
+  MISC("t_addrmode_is1", "kOperandTypeThumbAddrModeImmS");        // R, I
+  MISC("t_addrmode_is2", "kOperandTypeThumbAddrModeImmS");        // R, I
+  MISC("t_addrmode_is4", "kOperandTypeThumbAddrModeImmS");        // R, I
+  MISC("t_addrmode_rr", "kOperandTypeThumbAddrModeRR");           // R, R
+  MISC("t_addrmode_sp", "kOperandTypeThumbAddrModeSP");           // R, I
+  MISC("t_addrmode_pc", "kOperandTypeThumbAddrModePC");           // R, I
+
+  return 1;
+}
+
+#undef SOREG
+#undef SOIMM
+#undef PRED
+#undef REG
+#undef MEM
+#undef LEA
+#undef IMM
+#undef PCR
+
+#undef SET
+
+/// ARMPopulateOperands - Handles all the operands in an ARM instruction, adding
+///   the appropriate flags to their descriptors
+///
+/// @operandFlags - A reference the array of operand flag objects
+/// @inst         - The instruction to use as a source of information
+static void ARMPopulateOperands(
+  LiteralConstantEmitter *(&operandTypes)[EDIS_MAX_OPERANDS],
+  const CodeGenInstruction &inst) {
+  if (!inst.TheDef->isSubClassOf("InstARM") &&
+      !inst.TheDef->isSubClassOf("InstThumb"))
+    return;
+
+  unsigned int index;
+  unsigned int numOperands = inst.Operands.size();
+
+  if (numOperands > EDIS_MAX_OPERANDS) {
+    errs() << "numOperands == " << numOperands << " > " <<
+      EDIS_MAX_OPERANDS << '\n';
+    llvm_unreachable("Too many operands");
+  }
+
+  for (index = 0; index < numOperands; ++index) {
+    const CGIOperandList::OperandInfo &operandInfo = inst.Operands[index];
+    Record &rec = *operandInfo.Rec;
+
+    if (ARMFlagFromOpName(operandTypes[index], rec.getName())) {
+      errs() << "Operand type: " << rec.getName() << '\n';
+      errs() << "Operand name: " << operandInfo.Name << '\n';
+      errs() << "Instruction name: " << inst.TheDef->getName() << '\n';
+      llvm_unreachable("Unhandled type");
+    }
+  }
+}
+
+#define BRANCH(target) {                    \
+  instType.set("kInstructionTypeBranch");   \
+  DECORATE1(target, "kOperandFlagTarget");  \
+}
+
+/// ARMExtractSemantics - Performs various checks on the name of an ARM
+///   instruction to determine what sort of an instruction it is and then adds
+///   the appropriate flags to the instruction and its operands
+///
+/// @arg instType     - A reference to the type for the instruction as a whole
+/// @arg operandTypes - A reference to the array of operand type object pointers
+/// @arg operandFlags - A reference to the array of operand flag object pointers
+/// @arg inst         - A reference to the original instruction
+static void ARMExtractSemantics(
+  LiteralConstantEmitter &instType,
+  LiteralConstantEmitter *(&operandTypes)[EDIS_MAX_OPERANDS],
+  FlagsConstantEmitter *(&operandFlags)[EDIS_MAX_OPERANDS],
+  const CodeGenInstruction &inst) {
+  const std::string &name = inst.TheDef->getName();
+
+  if (name == "tBcc"   ||
+      name == "tB"     ||
+      name == "t2Bcc"  ||
+      name == "Bcc"    ||
+      name == "tCBZ"   ||
+      name == "tCBNZ") {
+    BRANCH("target");
+  }
+
+  if (name == "tBLr9"      ||
+      name == "BLr9_pred"  ||
+      name == "tBLXi_r9"   ||
+      name == "tBLXr_r9"   ||
+      name == "BLXr9"      ||
+      name == "t2BXJ"      ||
+      name == "BXJ") {
+    BRANCH("func");
+
+    unsigned opIndex;
+    opIndex = inst.Operands.getOperandNamed("func");
+    if (operandTypes[opIndex]->is("kOperandTypeImmediate"))
+      operandTypes[opIndex]->set("kOperandTypeARMBranchTarget");
+  }
+}
+
+#undef BRANCH
+
+/// populateInstInfo - Fills an array of InstInfos with information about each
+///   instruction in a target
+///
+/// @arg infoArray  - The array of InstInfo objects to populate
+/// @arg target     - The CodeGenTarget to use as a source of instructions
+static void populateInstInfo(CompoundConstantEmitter &infoArray,
+                             CodeGenTarget &target) {
+  const std::vector<const CodeGenInstruction*> &numberedInstructions =
+    target.getInstructionsByEnumValue();
+
+  unsigned int index;
+  unsigned int numInstructions = numberedInstructions.size();
+
+  for (index = 0; index < numInstructions; ++index) {
+    const CodeGenInstruction& inst = *numberedInstructions[index];
+
+    CompoundConstantEmitter *infoStruct = new CompoundConstantEmitter;
+    infoArray.addEntry(infoStruct);
+
+    LiteralConstantEmitter *instType = new LiteralConstantEmitter;
+    infoStruct->addEntry(instType);
+
+    LiteralConstantEmitter *numOperandsEmitter =
+      new LiteralConstantEmitter(inst.Operands.size());
+    infoStruct->addEntry(numOperandsEmitter);
+
+    CompoundConstantEmitter *operandTypeArray = new CompoundConstantEmitter;
+    infoStruct->addEntry(operandTypeArray);
+
+    LiteralConstantEmitter *operandTypes[EDIS_MAX_OPERANDS];
+
+    CompoundConstantEmitter *operandFlagArray = new CompoundConstantEmitter;
+    infoStruct->addEntry(operandFlagArray);
+
+    FlagsConstantEmitter *operandFlags[EDIS_MAX_OPERANDS];
+
+    for (unsigned operandIndex = 0;
+         operandIndex < EDIS_MAX_OPERANDS;
+         ++operandIndex) {
+      operandTypes[operandIndex] = new LiteralConstantEmitter;
+      operandTypeArray->addEntry(operandTypes[operandIndex]);
+
+      operandFlags[operandIndex] = new FlagsConstantEmitter;
+      operandFlagArray->addEntry(operandFlags[operandIndex]);
+    }
+
+    unsigned numSyntaxes = 0;
+
+    if (target.getName() == "X86") {
+      X86PopulateOperands(operandTypes, inst);
+      X86ExtractSemantics(*instType, operandFlags, inst);
+      numSyntaxes = 2;
+    }
+    else if (target.getName() == "ARM") {
+      ARMPopulateOperands(operandTypes, inst);
+      ARMExtractSemantics(*instType, operandTypes, operandFlags, inst);
+      numSyntaxes = 1;
+    }
+
+    CompoundConstantEmitter *operandOrderArray = new CompoundConstantEmitter;
+
+    infoStruct->addEntry(operandOrderArray);
+
+    for (unsigned syntaxIndex = 0;
+         syntaxIndex < EDIS_MAX_SYNTAXES;
+         ++syntaxIndex) {
+      CompoundConstantEmitter *operandOrder =
+        new CompoundConstantEmitter(EDIS_MAX_OPERANDS);
+
+      operandOrderArray->addEntry(operandOrder);
+
+      if (syntaxIndex < numSyntaxes) {
+        populateOperandOrder(operandOrder, inst, syntaxIndex);
+      }
+    }
+
+    infoStruct = NULL;
+  }
+}
+
+static void emitCommonEnums(raw_ostream &o, unsigned int &i) {
+  EnumEmitter operandTypes("OperandTypes");
+  operandTypes.addEntry("kOperandTypeNone");
+  operandTypes.addEntry("kOperandTypeImmediate");
+  operandTypes.addEntry("kOperandTypeRegister");
+  operandTypes.addEntry("kOperandTypeX86Memory");
+  operandTypes.addEntry("kOperandTypeX86EffectiveAddress");
+  operandTypes.addEntry("kOperandTypeX86PCRelative");
+  operandTypes.addEntry("kOperandTypeARMBranchTarget");
+  operandTypes.addEntry("kOperandTypeARMSoReg");
+  operandTypes.addEntry("kOperandTypeARMSoImm");
+  operandTypes.addEntry("kOperandTypeARMRotImm");
+  operandTypes.addEntry("kOperandTypeARMSoImm2Part");
+  operandTypes.addEntry("kOperandTypeARMPredicate");
+  operandTypes.addEntry("kOperandTypeAddrModeImm12");
+  operandTypes.addEntry("kOperandTypeLdStSOReg");
+  operandTypes.addEntry("kOperandTypeARMAddrMode2");
+  operandTypes.addEntry("kOperandTypeARMAddrMode2Offset");
+  operandTypes.addEntry("kOperandTypeARMAddrMode3");
+  operandTypes.addEntry("kOperandTypeARMAddrMode3Offset");
+  operandTypes.addEntry("kOperandTypeARMLdStmMode");
+  operandTypes.addEntry("kOperandTypeARMAddrMode5");
+  operandTypes.addEntry("kOperandTypeARMAddrMode6");
+  operandTypes.addEntry("kOperandTypeARMAddrMode6Offset");
+  operandTypes.addEntry("kOperandTypeARMAddrModePC");
+  operandTypes.addEntry("kOperandTypeARMRegisterList");
+  operandTypes.addEntry("kOperandTypeARMDPRRegisterList");
+  operandTypes.addEntry("kOperandTypeARMSPRRegisterList");
+  operandTypes.addEntry("kOperandTypeARMTBAddrMode");
+  operandTypes.addEntry("kOperandTypeThumbITMask");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeRegS");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeImmS");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeRR");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeSP");
+  operandTypes.addEntry("kOperandTypeThumbAddrModePC");
+  operandTypes.addEntry("kOperandTypeThumb2SoReg");
+  operandTypes.addEntry("kOperandTypeThumb2SoImm");
+  operandTypes.addEntry("kOperandTypeThumb2AddrModeImm8");
+  operandTypes.addEntry("kOperandTypeThumb2AddrModeImm8Offset");
+  operandTypes.addEntry("kOperandTypeThumb2AddrModeImm12");
+  operandTypes.addEntry("kOperandTypeThumb2AddrModeSoReg");
+  operandTypes.addEntry("kOperandTypeThumb2AddrModeImm8s4");
+  operandTypes.addEntry("kOperandTypeThumb2AddrModeImm8s4Offset");
+  operandTypes.emit(o, i);
+
+  o << "\n";
+
+  EnumEmitter operandFlags("OperandFlags");
+  operandFlags.addEntry("kOperandFlagSource");
+  operandFlags.addEntry("kOperandFlagTarget");
+  operandFlags.emitAsFlags(o, i);
+
+  o << "\n";
+
+  EnumEmitter instructionTypes("InstructionTypes");
+  instructionTypes.addEntry("kInstructionTypeNone");
+  instructionTypes.addEntry("kInstructionTypeMove");
+  instructionTypes.addEntry("kInstructionTypeBranch");
+  instructionTypes.addEntry("kInstructionTypePush");
+  instructionTypes.addEntry("kInstructionTypePop");
+  instructionTypes.addEntry("kInstructionTypeCall");
+  instructionTypes.addEntry("kInstructionTypeReturn");
+  instructionTypes.emit(o, i);
+
+  o << "\n";
+}
+
+void EDEmitter::run(raw_ostream &o) {
+  unsigned int i = 0;
+
+  CompoundConstantEmitter infoArray;
+  CodeGenTarget target(Records);
+
+  populateInstInfo(infoArray, target);
+
+  emitCommonEnums(o, i);
+
+  o << "namespace {\n";
+
+  o << "llvm::EDInstInfo instInfo" << target.getName().c_str() << "[] = ";
+  infoArray.emit(o, i);
+  o << ";" << "\n";
+
+  o << "}\n";
+}
diff --git a/final/utils/TableGen/EDEmitter.h b/final/utils/TableGen/EDEmitter.h
new file mode 100644
index 00000000000..e30373fed2e
--- /dev/null
+++ b/final/utils/TableGen/EDEmitter.h
@@ -0,0 +1,34 @@
+//===- EDEmitter.h - Generate instruction descriptions for ED ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting a description of each
+// instruction in a format that the semantic disassembler can use to tokenize
+// and parse instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SEMANTIC_INFO_EMITTER_H
+#define SEMANTIC_INFO_EMITTER_H
+
+#include "TableGenBackend.h"
+
+namespace llvm {
+  
+  class EDEmitter : public TableGenBackend {
+    RecordKeeper &Records;
+  public:
+    EDEmitter(RecordKeeper &R);
+    
+    // run - Output the instruction table.
+    void run(raw_ostream &o);
+  };
+  
+} // End llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/FastISelEmitter.cpp b/final/utils/TableGen/FastISelEmitter.cpp
new file mode 100644
index 00000000000..f01de1dcfce
--- /dev/null
+++ b/final/utils/TableGen/FastISelEmitter.cpp
@@ -0,0 +1,653 @@
+//===- FastISelEmitter.cpp - Generate an instruction selector -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits code for use by the "fast" instruction
+// selection algorithm. See the comments at the top of
+// lib/CodeGen/SelectionDAG/FastISel.cpp for background.
+//
+// This file scans through the target's tablegen instruction-info files
+// and extracts instructions with obvious-looking patterns, and it emits
+// code to look up these instructions by type and operator.
+//
+//===----------------------------------------------------------------------===//
+
+#include "FastISelEmitter.h"
+#include "Record.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/VectorExtras.h"
+using namespace llvm;
+
+namespace {
+
+/// InstructionMemo - This class holds additional information about an
+/// instruction needed to emit code for it.
+///
+struct InstructionMemo {
+  std::string Name;
+  const CodeGenRegisterClass *RC;
+  std::string SubRegNo;
+  std::vector<std::string>* PhysRegs;
+};
+
+/// OperandsSignature - This class holds a description of a list of operand
+/// types. It has utility methods for emitting text based on the operands.
+///
+struct OperandsSignature {
+  std::vector<std::string> Operands;
+
+  bool operator<(const OperandsSignature &O) const {
+    return Operands < O.Operands;
+  }
+
+  bool empty() const { return Operands.empty(); }
+
+  /// initialize - Examine the given pattern and initialize the contents
+  /// of the Operands array accordingly. Return true if all the operands
+  /// are supported, false otherwise.
+  ///
+  bool initialize(TreePatternNode *InstPatNode,
+                  const CodeGenTarget &Target,
+                  MVT::SimpleValueType VT) {
+
+    if (!InstPatNode->isLeaf()) {
+      if (InstPatNode->getOperator()->getName() == "imm") {
+        Operands.push_back("i");
+        return true;
+      }
+      if (InstPatNode->getOperator()->getName() == "fpimm") {
+        Operands.push_back("f");
+        return true;
+      }
+    }
+
+    const CodeGenRegisterClass *DstRC = 0;
+
+    for (unsigned i = 0, e = InstPatNode->getNumChildren(); i != e; ++i) {
+      TreePatternNode *Op = InstPatNode->getChild(i);
+
+      // For now, filter out any operand with a predicate.
+      // For now, filter out any operand with multiple values.
+      if (!Op->getPredicateFns().empty() ||
+          Op->getNumTypes() != 1)
+        return false;
+
+      assert(Op->hasTypeSet(0) && "Type infererence not done?");
+      // For now, all the operands must have the same type.
+      if (Op->getType(0) != VT)
+        return false;
+
+      if (!Op->isLeaf()) {
+        if (Op->getOperator()->getName() == "imm") {
+          Operands.push_back("i");
+          continue;
+        }
+        if (Op->getOperator()->getName() == "fpimm") {
+          Operands.push_back("f");
+          continue;
+        }
+        // For now, ignore other non-leaf nodes.
+        return false;
+      }
+      DefInit *OpDI = dynamic_cast<DefInit*>(Op->getLeafValue());
+      if (!OpDI)
+        return false;
+      Record *OpLeafRec = OpDI->getDef();
+      // For now, the only other thing we accept is register operands.
+
+      const CodeGenRegisterClass *RC = 0;
+      if (OpLeafRec->isSubClassOf("RegisterClass"))
+        RC = &Target.getRegisterClass(OpLeafRec);
+      else if (OpLeafRec->isSubClassOf("Register"))
+        RC = Target.getRegisterClassForRegister(OpLeafRec);
+      else
+        return false;
+
+      // For now, this needs to be a register class of some sort.
+      if (!RC)
+        return false;
+
+      // For now, all the operands must have the same register class or be
+      // a strict subclass of the destination.
+      if (DstRC) {
+        if (DstRC != RC && !DstRC->hasSubClass(RC))
+          return false;
+      } else
+        DstRC = RC;
+      Operands.push_back("r");
+    }
+    return true;
+  }
+
+  void PrintParameters(raw_ostream &OS) const {
+    for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+      if (Operands[i] == "r") {
+        OS << "unsigned Op" << i << ", bool Op" << i << "IsKill";
+      } else if (Operands[i] == "i") {
+        OS << "uint64_t imm" << i;
+      } else if (Operands[i] == "f") {
+        OS << "ConstantFP *f" << i;
+      } else {
+        assert("Unknown operand kind!");
+        abort();
+      }
+      if (i + 1 != e)
+        OS << ", ";
+    }
+  }
+
+  void PrintArguments(raw_ostream &OS,
+                      const std::vector<std::string>& PR) const {
+    assert(PR.size() == Operands.size());
+    bool PrintedArg = false;
+    for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+      if (PR[i] != "")
+        // Implicit physical register operand.
+        continue;
+
+      if (PrintedArg)
+        OS << ", ";
+      if (Operands[i] == "r") {
+        OS << "Op" << i << ", Op" << i << "IsKill";
+        PrintedArg = true;
+      } else if (Operands[i] == "i") {
+        OS << "imm" << i;
+        PrintedArg = true;
+      } else if (Operands[i] == "f") {
+        OS << "f" << i;
+        PrintedArg = true;
+      } else {
+        assert("Unknown operand kind!");
+        abort();
+      }
+    }
+  }
+
+  void PrintArguments(raw_ostream &OS) const {
+    for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+      if (Operands[i] == "r") {
+        OS << "Op" << i << ", Op" << i << "IsKill";
+      } else if (Operands[i] == "i") {
+        OS << "imm" << i;
+      } else if (Operands[i] == "f") {
+        OS << "f" << i;
+      } else {
+        assert("Unknown operand kind!");
+        abort();
+      }
+      if (i + 1 != e)
+        OS << ", ";
+    }
+  }
+
+
+  void PrintManglingSuffix(raw_ostream &OS,
+                           const std::vector<std::string>& PR) const {
+    for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+      if (PR[i] != "")
+        // Implicit physical register operand. e.g. Instruction::Mul expect to
+        // select to a binary op. On x86, mul may take a single operand with
+        // the other operand being implicit. We must emit something that looks
+        // like a binary instruction except for the very inner FastEmitInst_*
+        // call.
+        continue;
+      OS << Operands[i];
+    }
+  }
+
+  void PrintManglingSuffix(raw_ostream &OS) const {
+    for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
+      OS << Operands[i];
+    }
+  }
+};
+
+class FastISelMap {
+  typedef std::map<std::string, InstructionMemo> PredMap;
+  typedef std::map<MVT::SimpleValueType, PredMap> RetPredMap;
+  typedef std::map<MVT::SimpleValueType, RetPredMap> TypeRetPredMap;
+  typedef std::map<std::string, TypeRetPredMap> OpcodeTypeRetPredMap;
+  typedef std::map<OperandsSignature, OpcodeTypeRetPredMap>
+            OperandsOpcodeTypeRetPredMap;
+
+  OperandsOpcodeTypeRetPredMap SimplePatterns;
+
+  std::string InstNS;
+
+public:
+  explicit FastISelMap(std::string InstNS);
+
+  void CollectPatterns(CodeGenDAGPatterns &CGP);
+  void PrintFunctionDefinitions(raw_ostream &OS);
+};
+
+}
+
+static std::string getOpcodeName(Record *Op, CodeGenDAGPatterns &CGP) {
+  return CGP.getSDNodeInfo(Op).getEnumName();
+}
+
+static std::string getLegalCName(std::string OpName) {
+  std::string::size_type pos = OpName.find("::");
+  if (pos != std::string::npos)
+    OpName.replace(pos, 2, "_");
+  return OpName;
+}
+
+FastISelMap::FastISelMap(std::string instns)
+  : InstNS(instns) {
+}
+
+void FastISelMap::CollectPatterns(CodeGenDAGPatterns &CGP) {
+  const CodeGenTarget &Target = CGP.getTargetInfo();
+
+  // Determine the target's namespace name.
+  InstNS = Target.getInstNamespace() + "::";
+  assert(InstNS.size() > 2 && "Can't determine target-specific namespace!");
+
+  // Scan through all the patterns and record the simple ones.
+  for (CodeGenDAGPatterns::ptm_iterator I = CGP.ptm_begin(),
+       E = CGP.ptm_end(); I != E; ++I) {
+    const PatternToMatch &Pattern = *I;
+
+    // For now, just look at Instructions, so that we don't have to worry
+    // about emitting multiple instructions for a pattern.
+    TreePatternNode *Dst = Pattern.getDstPattern();
+    if (Dst->isLeaf()) continue;
+    Record *Op = Dst->getOperator();
+    if (!Op->isSubClassOf("Instruction"))
+      continue;
+    CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(Op);
+    if (II.Operands.size() == 0)
+      continue;
+
+    // For now, ignore multi-instruction patterns.
+    bool MultiInsts = false;
+    for (unsigned i = 0, e = Dst->getNumChildren(); i != e; ++i) {
+      TreePatternNode *ChildOp = Dst->getChild(i);
+      if (ChildOp->isLeaf())
+        continue;
+      if (ChildOp->getOperator()->isSubClassOf("Instruction")) {
+        MultiInsts = true;
+        break;
+      }
+    }
+    if (MultiInsts)
+      continue;
+
+    // For now, ignore instructions where the first operand is not an
+    // output register.
+    const CodeGenRegisterClass *DstRC = 0;
+    std::string SubRegNo;
+    if (Op->getName() != "EXTRACT_SUBREG") {
+      Record *Op0Rec = II.Operands[0].Rec;
+      if (!Op0Rec->isSubClassOf("RegisterClass"))
+        continue;
+      DstRC = &Target.getRegisterClass(Op0Rec);
+      if (!DstRC)
+        continue;
+    } else {
+      // If this isn't a leaf, then continue since the register classes are
+      // a bit too complicated for now.
+      if (!Dst->getChild(1)->isLeaf()) continue;
+
+      DefInit *SR = dynamic_cast<DefInit*>(Dst->getChild(1)->getLeafValue());
+      if (SR)
+        SubRegNo = getQualifiedName(SR->getDef());
+      else
+        SubRegNo = Dst->getChild(1)->getLeafValue()->getAsString();
+    }
+
+    // Inspect the pattern.
+    TreePatternNode *InstPatNode = Pattern.getSrcPattern();
+    if (!InstPatNode) continue;
+    if (InstPatNode->isLeaf()) continue;
+
+    // Ignore multiple result nodes for now.
+    if (InstPatNode->getNumTypes() > 1) continue;
+
+    Record *InstPatOp = InstPatNode->getOperator();
+    std::string OpcodeName = getOpcodeName(InstPatOp, CGP);
+    MVT::SimpleValueType RetVT = MVT::isVoid;
+    if (InstPatNode->getNumTypes()) RetVT = InstPatNode->getType(0);
+    MVT::SimpleValueType VT = RetVT;
+    if (InstPatNode->getNumChildren()) {
+      assert(InstPatNode->getChild(0)->getNumTypes() == 1);
+      VT = InstPatNode->getChild(0)->getType(0);
+    }
+
+    // For now, filter out instructions which just set a register to
+    // an Operand or an immediate, like MOV32ri.
+    if (InstPatOp->isSubClassOf("Operand"))
+      continue;
+
+    // For now, filter out any instructions with predicates.
+    if (!InstPatNode->getPredicateFns().empty())
+      continue;
+
+    // Check all the operands.
+    OperandsSignature Operands;
+    if (!Operands.initialize(InstPatNode, Target, VT))
+      continue;
+
+    std::vector<std::string>* PhysRegInputs = new std::vector<std::string>();
+    if (!InstPatNode->isLeaf() &&
+        (InstPatNode->getOperator()->getName() == "imm" ||
+         InstPatNode->getOperator()->getName() == "fpimmm"))
+      PhysRegInputs->push_back("");
+    else if (!InstPatNode->isLeaf()) {
+      for (unsigned i = 0, e = InstPatNode->getNumChildren(); i != e; ++i) {
+        TreePatternNode *Op = InstPatNode->getChild(i);
+        if (!Op->isLeaf()) {
+          PhysRegInputs->push_back("");
+          continue;
+        }
+
+        DefInit *OpDI = dynamic_cast<DefInit*>(Op->getLeafValue());
+        Record *OpLeafRec = OpDI->getDef();
+        std::string PhysReg;
+        if (OpLeafRec->isSubClassOf("Register")) {
+          PhysReg += static_cast<StringInit*>(OpLeafRec->getValue( \
+                     "Namespace")->getValue())->getValue();
+          PhysReg += "::";
+
+          std::vector<CodeGenRegister> Regs = Target.getRegisters();
+          for (unsigned i = 0; i < Regs.size(); ++i) {
+            if (Regs[i].TheDef == OpLeafRec) {
+              PhysReg += Regs[i].getName();
+              break;
+            }
+          }
+        }
+
+        PhysRegInputs->push_back(PhysReg);
+      }
+    } else
+      PhysRegInputs->push_back("");
+
+    // Get the predicate that guards this pattern.
+    std::string PredicateCheck = Pattern.getPredicateCheck();
+
+    // Ok, we found a pattern that we can handle. Remember it.
+    InstructionMemo Memo = {
+      Pattern.getDstPattern()->getOperator()->getName(),
+      DstRC,
+      SubRegNo,
+      PhysRegInputs
+    };
+    if (SimplePatterns[Operands][OpcodeName][VT][RetVT]
+            .count(PredicateCheck))
+      throw TGError(Pattern.getSrcRecord()->getLoc(), "Duplicate record!");
+
+    SimplePatterns[Operands][OpcodeName][VT][RetVT][PredicateCheck] = Memo;
+  }
+}
+
+void FastISelMap::PrintFunctionDefinitions(raw_ostream &OS) {
+  // Now emit code for all the patterns that we collected.
+  for (OperandsOpcodeTypeRetPredMap::const_iterator OI = SimplePatterns.begin(),
+       OE = SimplePatterns.end(); OI != OE; ++OI) {
+    const OperandsSignature &Operands = OI->first;
+    const OpcodeTypeRetPredMap &OTM = OI->second;
+
+    for (OpcodeTypeRetPredMap::const_iterator I = OTM.begin(), E = OTM.end();
+         I != E; ++I) {
+      const std::string &Opcode = I->first;
+      const TypeRetPredMap &TM = I->second;
+
+      OS << "// FastEmit functions for " << Opcode << ".\n";
+      OS << "\n";
+
+      // Emit one function for each opcode,type pair.
+      for (TypeRetPredMap::const_iterator TI = TM.begin(), TE = TM.end();
+           TI != TE; ++TI) {
+        MVT::SimpleValueType VT = TI->first;
+        const RetPredMap &RM = TI->second;
+        if (RM.size() != 1) {
+          for (RetPredMap::const_iterator RI = RM.begin(), RE = RM.end();
+               RI != RE; ++RI) {
+            MVT::SimpleValueType RetVT = RI->first;
+            const PredMap &PM = RI->second;
+            bool HasPred = false;
+
+            OS << "unsigned FastEmit_"
+               << getLegalCName(Opcode)
+               << "_" << getLegalCName(getName(VT))
+               << "_" << getLegalCName(getName(RetVT)) << "_";
+            Operands.PrintManglingSuffix(OS);
+            OS << "(";
+            Operands.PrintParameters(OS);
+            OS << ") {\n";
+
+            // Emit code for each possible instruction. There may be
+            // multiple if there are subtarget concerns.
+            for (PredMap::const_iterator PI = PM.begin(), PE = PM.end();
+                 PI != PE; ++PI) {
+              std::string PredicateCheck = PI->first;
+              const InstructionMemo &Memo = PI->second;
+
+              if (PredicateCheck.empty()) {
+                assert(!HasPred &&
+                       "Multiple instructions match, at least one has "
+                       "a predicate and at least one doesn't!");
+              } else {
+                OS << "  if (" + PredicateCheck + ") {\n";
+                OS << "  ";
+                HasPred = true;
+              }
+
+              for (unsigned i = 0; i < Memo.PhysRegs->size(); ++i) {
+                if ((*Memo.PhysRegs)[i] != "")
+                  OS << "  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, "
+                     << "TII.get(TargetOpcode::COPY), "
+                     << (*Memo.PhysRegs)[i] << ").addReg(Op" << i << ");\n";
+              }
+
+              OS << "  return FastEmitInst_";
+              if (Memo.SubRegNo.empty()) {
+                Operands.PrintManglingSuffix(OS, *Memo.PhysRegs);
+                OS << "(" << InstNS << Memo.Name << ", ";
+                OS << InstNS << Memo.RC->getName() << "RegisterClass";
+                if (!Operands.empty())
+                  OS << ", ";
+                Operands.PrintArguments(OS, *Memo.PhysRegs);
+                OS << ");\n";
+              } else {
+                OS << "extractsubreg(" << getName(RetVT);
+                OS << ", Op0, Op0IsKill, ";
+                OS << Memo.SubRegNo;
+                OS << ");\n";
+              }
+
+              if (HasPred)
+                OS << "  }\n";
+
+            }
+            // Return 0 if none of the predicates were satisfied.
+            if (HasPred)
+              OS << "  return 0;\n";
+            OS << "}\n";
+            OS << "\n";
+          }
+
+          // Emit one function for the type that demultiplexes on return type.
+          OS << "unsigned FastEmit_"
+             << getLegalCName(Opcode) << "_"
+             << getLegalCName(getName(VT)) << "_";
+          Operands.PrintManglingSuffix(OS);
+          OS << "(MVT RetVT";
+          if (!Operands.empty())
+            OS << ", ";
+          Operands.PrintParameters(OS);
+          OS << ") {\nswitch (RetVT.SimpleTy) {\n";
+          for (RetPredMap::const_iterator RI = RM.begin(), RE = RM.end();
+               RI != RE; ++RI) {
+            MVT::SimpleValueType RetVT = RI->first;
+            OS << "  case " << getName(RetVT) << ": return FastEmit_"
+               << getLegalCName(Opcode) << "_" << getLegalCName(getName(VT))
+               << "_" << getLegalCName(getName(RetVT)) << "_";
+            Operands.PrintManglingSuffix(OS);
+            OS << "(";
+            Operands.PrintArguments(OS);
+            OS << ");\n";
+          }
+          OS << "  default: return 0;\n}\n}\n\n";
+
+        } else {
+          // Non-variadic return type.
+          OS << "unsigned FastEmit_"
+             << getLegalCName(Opcode) << "_"
+             << getLegalCName(getName(VT)) << "_";
+          Operands.PrintManglingSuffix(OS);
+          OS << "(MVT RetVT";
+          if (!Operands.empty())
+            OS << ", ";
+          Operands.PrintParameters(OS);
+          OS << ") {\n";
+
+          OS << "  if (RetVT.SimpleTy != " << getName(RM.begin()->first)
+             << ")\n    return 0;\n";
+
+          const PredMap &PM = RM.begin()->second;
+          bool HasPred = false;
+
+          // Emit code for each possible instruction. There may be
+          // multiple if there are subtarget concerns.
+          for (PredMap::const_iterator PI = PM.begin(), PE = PM.end(); PI != PE;
+               ++PI) {
+            std::string PredicateCheck = PI->first;
+            const InstructionMemo &Memo = PI->second;
+
+            if (PredicateCheck.empty()) {
+              assert(!HasPred &&
+                     "Multiple instructions match, at least one has "
+                     "a predicate and at least one doesn't!");
+            } else {
+              OS << "  if (" + PredicateCheck + ") {\n";
+              OS << "  ";
+              HasPred = true;
+            }
+
+            for (unsigned i = 0; i < Memo.PhysRegs->size(); ++i) {
+              if ((*Memo.PhysRegs)[i] != "")
+                OS << "  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, "
+                   << "TII.get(TargetOpcode::COPY), "
+                   << (*Memo.PhysRegs)[i] << ").addReg(Op" << i << ");\n";
+            }
+
+            OS << "  return FastEmitInst_";
+
+            if (Memo.SubRegNo.empty()) {
+              Operands.PrintManglingSuffix(OS, *Memo.PhysRegs);
+              OS << "(" << InstNS << Memo.Name << ", ";
+              OS << InstNS << Memo.RC->getName() << "RegisterClass";
+              if (!Operands.empty())
+                OS << ", ";
+              Operands.PrintArguments(OS, *Memo.PhysRegs);
+              OS << ");\n";
+            } else {
+              OS << "extractsubreg(RetVT, Op0, Op0IsKill, ";
+              OS << Memo.SubRegNo;
+              OS << ");\n";
+            }
+
+             if (HasPred)
+               OS << "  }\n";
+          }
+
+          // Return 0 if none of the predicates were satisfied.
+          if (HasPred)
+            OS << "  return 0;\n";
+          OS << "}\n";
+          OS << "\n";
+        }
+      }
+
+      // Emit one function for the opcode that demultiplexes based on the type.
+      OS << "unsigned FastEmit_"
+         << getLegalCName(Opcode) << "_";
+      Operands.PrintManglingSuffix(OS);
+      OS << "(MVT VT, MVT RetVT";
+      if (!Operands.empty())
+        OS << ", ";
+      Operands.PrintParameters(OS);
+      OS << ") {\n";
+      OS << "  switch (VT.SimpleTy) {\n";
+      for (TypeRetPredMap::const_iterator TI = TM.begin(), TE = TM.end();
+           TI != TE; ++TI) {
+        MVT::SimpleValueType VT = TI->first;
+        std::string TypeName = getName(VT);
+        OS << "  case " << TypeName << ": return FastEmit_"
+           << getLegalCName(Opcode) << "_" << getLegalCName(TypeName) << "_";
+        Operands.PrintManglingSuffix(OS);
+        OS << "(RetVT";
+        if (!Operands.empty())
+          OS << ", ";
+        Operands.PrintArguments(OS);
+        OS << ");\n";
+      }
+      OS << "  default: return 0;\n";
+      OS << "  }\n";
+      OS << "}\n";
+      OS << "\n";
+    }
+
+    OS << "// Top-level FastEmit function.\n";
+    OS << "\n";
+
+    // Emit one function for the operand signature that demultiplexes based
+    // on opcode and type.
+    OS << "unsigned FastEmit_";
+    Operands.PrintManglingSuffix(OS);
+    OS << "(MVT VT, MVT RetVT, unsigned Opcode";
+    if (!Operands.empty())
+      OS << ", ";
+    Operands.PrintParameters(OS);
+    OS << ") {\n";
+    OS << "  switch (Opcode) {\n";
+    for (OpcodeTypeRetPredMap::const_iterator I = OTM.begin(), E = OTM.end();
+         I != E; ++I) {
+      const std::string &Opcode = I->first;
+
+      OS << "  case " << Opcode << ": return FastEmit_"
+         << getLegalCName(Opcode) << "_";
+      Operands.PrintManglingSuffix(OS);
+      OS << "(VT, RetVT";
+      if (!Operands.empty())
+        OS << ", ";
+      Operands.PrintArguments(OS);
+      OS << ");\n";
+    }
+    OS << "  default: return 0;\n";
+    OS << "  }\n";
+    OS << "}\n";
+    OS << "\n";
+  }
+}
+
+void FastISelEmitter::run(raw_ostream &OS) {
+  const CodeGenTarget &Target = CGP.getTargetInfo();
+
+  // Determine the target's namespace name.
+  std::string InstNS = Target.getInstNamespace() + "::";
+  assert(InstNS.size() > 2 && "Can't determine target-specific namespace!");
+
+  EmitSourceFileHeader("\"Fast\" Instruction Selector for the " +
+                       Target.getName() + " target", OS);
+
+  FastISelMap F(InstNS);
+  F.CollectPatterns(CGP);
+  F.PrintFunctionDefinitions(OS);
+}
+
+FastISelEmitter::FastISelEmitter(RecordKeeper &R)
+  : Records(R),
+    CGP(R) {
+}
+
diff --git a/final/utils/TableGen/FastISelEmitter.h b/final/utils/TableGen/FastISelEmitter.h
new file mode 100644
index 00000000000..ce4e77e6f8f
--- /dev/null
+++ b/final/utils/TableGen/FastISelEmitter.h
@@ -0,0 +1,39 @@
+//===- FastISelEmitter.h - Generate an instruction selector -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits a "fast" instruction selector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FASTISEL_EMITTER_H
+#define FASTISEL_EMITTER_H
+
+#include "TableGenBackend.h"
+#include "CodeGenDAGPatterns.h"
+
+namespace llvm {
+
+class CodeGenTarget;
+
+/// FastISelEmitter - The top-level class which coordinates construction
+/// and emission of the instruction selector.
+///
+class FastISelEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+  CodeGenDAGPatterns CGP;
+public:
+  explicit FastISelEmitter(RecordKeeper &R);
+
+  // run - Output the isel, returning true on failure.
+  void run(raw_ostream &OS);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/FixedLenDecoderEmitter.cpp b/final/utils/TableGen/FixedLenDecoderEmitter.cpp
new file mode 100644
index 00000000000..2c222b39b13
--- /dev/null
+++ b/final/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -0,0 +1,1372 @@
+//===------------ FixedLenDecoderEmitter.cpp - Decoder Generator ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// It contains the tablegen backend that emits the decoder functions for
+// targets with fixed length instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "decoder-emitter"
+
+#include "FixedLenDecoderEmitter.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <vector>
+#include <map>
+#include <string>
+
+using namespace llvm;
+
+// The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
+// for a bit value.
+//
+// BIT_UNFILTERED is used as the init value for a filter position.  It is used
+// only for filter processings.
+typedef enum {
+  BIT_TRUE,      // '1'
+  BIT_FALSE,     // '0'
+  BIT_UNSET,     // '?'
+  BIT_UNFILTERED // unfiltered
+} bit_value_t;
+
+static bool ValueSet(bit_value_t V) {
+  return (V == BIT_TRUE || V == BIT_FALSE);
+}
+static bool ValueNotSet(bit_value_t V) {
+  return (V == BIT_UNSET);
+}
+static int Value(bit_value_t V) {
+  return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1);
+}
+static bit_value_t bitFromBits(BitsInit &bits, unsigned index) {
+  if (BitInit *bit = dynamic_cast<BitInit*>(bits.getBit(index)))
+    return bit->getValue() ? BIT_TRUE : BIT_FALSE;
+
+  // The bit is uninitialized.
+  return BIT_UNSET;
+}
+// Prints the bit value for each position.
+static void dumpBits(raw_ostream &o, BitsInit &bits) {
+  unsigned index;
+
+  for (index = bits.getNumBits(); index > 0; index--) {
+    switch (bitFromBits(bits, index - 1)) {
+    case BIT_TRUE:
+      o << "1";
+      break;
+    case BIT_FALSE:
+      o << "0";
+      break;
+    case BIT_UNSET:
+      o << "_";
+      break;
+    default:
+      assert(0 && "unexpected return value from bitFromBits");
+    }
+  }
+}
+
+static BitsInit &getBitsField(const Record &def, const char *str) {
+  BitsInit *bits = def.getValueAsBitsInit(str);
+  return *bits;
+}
+
+// Forward declaration.
+class FilterChooser;
+
+// FIXME: Possibly auto-detected?
+#define BIT_WIDTH 32
+
+// Representation of the instruction to work on.
+typedef bit_value_t insn_t[BIT_WIDTH];
+
+/// Filter - Filter works with FilterChooser to produce the decoding tree for
+/// the ISA.
+///
+/// It is useful to think of a Filter as governing the switch stmts of the
+/// decoding tree in a certain level.  Each case stmt delegates to an inferior
+/// FilterChooser to decide what further decoding logic to employ, or in another
+/// words, what other remaining bits to look at.  The FilterChooser eventually
+/// chooses a best Filter to do its job.
+///
+/// This recursive scheme ends when the number of Opcodes assigned to the
+/// FilterChooser becomes 1 or if there is a conflict.  A conflict happens when
+/// the Filter/FilterChooser combo does not know how to distinguish among the
+/// Opcodes assigned.
+///
+/// An example of a conflict is
+///
+/// Conflict:
+///                     111101000.00........00010000....
+///                     111101000.00........0001........
+///                     1111010...00........0001........
+///                     1111010...00....................
+///                     1111010.........................
+///                     1111............................
+///                     ................................
+///     VST4q8a         111101000_00________00010000____
+///     VST4q8b         111101000_00________00010000____
+///
+/// The Debug output shows the path that the decoding tree follows to reach the
+/// the conclusion that there is a conflict.  VST4q8a is a vst4 to double-spaced
+/// even registers, while VST4q8b is a vst4 to double-spaced odd regsisters.
+///
+/// The encoding info in the .td files does not specify this meta information,
+/// which could have been used by the decoder to resolve the conflict.  The
+/// decoder could try to decode the even/odd register numbering and assign to
+/// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
+/// version and return the Opcode since the two have the same Asm format string.
+class Filter {
+protected:
+  FilterChooser *Owner; // points to the FilterChooser who owns this filter
+  unsigned StartBit; // the starting bit position
+  unsigned NumBits; // number of bits to filter
+  bool Mixed; // a mixed region contains both set and unset bits
+
+  // Map of well-known segment value to the set of uid's with that value.
+  std::map<uint64_t, std::vector<unsigned> > FilteredInstructions;
+
+  // Set of uid's with non-constant segment values.
+  std::vector<unsigned> VariableInstructions;
+
+  // Map of well-known segment value to its delegate.
+  std::map<unsigned, FilterChooser*> FilterChooserMap;
+
+  // Number of instructions which fall under FilteredInstructions category.
+  unsigned NumFiltered;
+
+  // Keeps track of the last opcode in the filtered bucket.
+  unsigned LastOpcFiltered;
+
+  // Number of instructions which fall under VariableInstructions category.
+  unsigned NumVariable;
+
+public:
+  unsigned getNumFiltered() { return NumFiltered; }
+  unsigned getNumVariable() { return NumVariable; }
+  unsigned getSingletonOpc() {
+    assert(NumFiltered == 1);
+    return LastOpcFiltered;
+  }
+  // Return the filter chooser for the group of instructions without constant
+  // segment values.
+  FilterChooser &getVariableFC() {
+    assert(NumFiltered == 1);
+    assert(FilterChooserMap.size() == 1);
+    return *(FilterChooserMap.find((unsigned)-1)->second);
+  }
+
+  Filter(const Filter &f);
+  Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed);
+
+  ~Filter();
+
+  // Divides the decoding task into sub tasks and delegates them to the
+  // inferior FilterChooser's.
+  //
+  // A special case arises when there's only one entry in the filtered
+  // instructions.  In order to unambiguously decode the singleton, we need to
+  // match the remaining undecoded encoding bits against the singleton.
+  void recurse();
+
+  // Emit code to decode instructions given a segment or segments of bits.
+  void emit(raw_ostream &o, unsigned &Indentation);
+
+  // Returns the number of fanout produced by the filter.  More fanout implies
+  // the filter distinguishes more categories of instructions.
+  unsigned usefulness() const;
+}; // End of class Filter
+
+// These are states of our finite state machines used in FilterChooser's
+// filterProcessor() which produces the filter candidates to use.
+typedef enum {
+  ATTR_NONE,
+  ATTR_FILTERED,
+  ATTR_ALL_SET,
+  ATTR_ALL_UNSET,
+  ATTR_MIXED
+} bitAttr_t;
+
+/// FilterChooser - FilterChooser chooses the best filter among a set of Filters
+/// in order to perform the decoding of instructions at the current level.
+///
+/// Decoding proceeds from the top down.  Based on the well-known encoding bits
+/// of instructions available, FilterChooser builds up the possible Filters that
+/// can further the task of decoding by distinguishing among the remaining
+/// candidate instructions.
+///
+/// Once a filter has been chosen, it is called upon to divide the decoding task
+/// into sub-tasks and delegates them to its inferior FilterChoosers for further
+/// processings.
+///
+/// It is useful to think of a Filter as governing the switch stmts of the
+/// decoding tree.  And each case is delegated to an inferior FilterChooser to
+/// decide what further remaining bits to look at.
+class FilterChooser {
+protected:
+  friend class Filter;
+
+  // Vector of codegen instructions to choose our filter.
+  const std::vector<const CodeGenInstruction*> &AllInstructions;
+
+  // Vector of uid's for this filter chooser to work on.
+  const std::vector<unsigned> Opcodes;
+
+  // Lookup table for the operand decoding of instructions.
+  std::map<unsigned, std::vector<OperandInfo> > &Operands;
+
+  // Vector of candidate filters.
+  std::vector<Filter> Filters;
+
+  // Array of bit values passed down from our parent.
+  // Set to all BIT_UNFILTERED's for Parent == NULL.
+  bit_value_t FilterBitValues[BIT_WIDTH];
+
+  // Links to the FilterChooser above us in the decoding tree.
+  FilterChooser *Parent;
+
+  // Index of the best filter from Filters.
+  int BestIndex;
+
+public:
+  FilterChooser(const FilterChooser &FC) :
+    AllInstructions(FC.AllInstructions), Opcodes(FC.Opcodes),
+      Operands(FC.Operands), Filters(FC.Filters), Parent(FC.Parent),
+      BestIndex(FC.BestIndex) {
+    memcpy(FilterBitValues, FC.FilterBitValues, sizeof(FilterBitValues));
+  }
+
+  FilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
+                const std::vector<unsigned> &IDs,
+    std::map<unsigned, std::vector<OperandInfo> > &Ops) :
+      AllInstructions(Insts), Opcodes(IDs), Operands(Ops), Filters(),
+      Parent(NULL), BestIndex(-1) {
+    for (unsigned i = 0; i < BIT_WIDTH; ++i)
+      FilterBitValues[i] = BIT_UNFILTERED;
+
+    doFilter();
+  }
+
+  FilterChooser(const std::vector<const CodeGenInstruction*> &Insts,
+                const std::vector<unsigned> &IDs,
+        std::map<unsigned, std::vector<OperandInfo> > &Ops,
+                bit_value_t (&ParentFilterBitValues)[BIT_WIDTH],
+                FilterChooser &parent) :
+      AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
+      Filters(), Parent(&parent), BestIndex(-1) {
+    for (unsigned i = 0; i < BIT_WIDTH; ++i)
+      FilterBitValues[i] = ParentFilterBitValues[i];
+
+    doFilter();
+  }
+
+  // The top level filter chooser has NULL as its parent.
+  bool isTopLevel() { return Parent == NULL; }
+
+  // Emit the top level typedef and decodeInstruction() function.
+  void emitTop(raw_ostream &o, unsigned Indentation);
+
+protected:
+  // Populates the insn given the uid.
+  void insnWithID(insn_t &Insn, unsigned Opcode) const {
+    BitsInit &Bits = getBitsField(*AllInstructions[Opcode]->TheDef, "Inst");
+
+    for (unsigned i = 0; i < BIT_WIDTH; ++i)
+      Insn[i] = bitFromBits(Bits, i);
+  }
+
+  // Returns the record name.
+  const std::string &nameWithID(unsigned Opcode) const {
+    return AllInstructions[Opcode]->TheDef->getName();
+  }
+
+  // Populates the field of the insn given the start position and the number of
+  // consecutive bits to scan for.
+  //
+  // Returns false if there exists any uninitialized bit value in the range.
+  // Returns true, otherwise.
+  bool fieldFromInsn(uint64_t &Field, insn_t &Insn, unsigned StartBit,
+      unsigned NumBits) const;
+
+  /// dumpFilterArray - dumpFilterArray prints out debugging info for the given
+  /// filter array as a series of chars.
+  void dumpFilterArray(raw_ostream &o, bit_value_t (&filter)[BIT_WIDTH]);
+
+  /// dumpStack - dumpStack traverses the filter chooser chain and calls
+  /// dumpFilterArray on each filter chooser up to the top level one.
+  void dumpStack(raw_ostream &o, const char *prefix);
+
+  Filter &bestFilter() {
+    assert(BestIndex != -1 && "BestIndex not set");
+    return Filters[BestIndex];
+  }
+
+  // Called from Filter::recurse() when singleton exists.  For debug purpose.
+  void SingletonExists(unsigned Opc);
+
+  bool PositionFiltered(unsigned i) {
+    return ValueSet(FilterBitValues[i]);
+  }
+
+  // Calculates the island(s) needed to decode the instruction.
+  // This returns a lit of undecoded bits of an instructions, for example,
+  // Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
+  // decoded bits in order to verify that the instruction matches the Opcode.
+  unsigned getIslands(std::vector<unsigned> &StartBits,
+      std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
+      insn_t &Insn);
+
+  // Emits code to decode the singleton.  Return true if we have matched all the
+  // well-known bits.
+  bool emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,unsigned Opc);
+
+  // Emits code to decode the singleton, and then to decode the rest.
+  void emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,Filter &Best);
+
+  // Assign a single filter and run with it.
+  void runSingleFilter(FilterChooser &owner, unsigned startBit, unsigned numBit,
+      bool mixed);
+
+  // reportRegion is a helper function for filterProcessor to mark a region as
+  // eligible for use as a filter region.
+  void reportRegion(bitAttr_t RA, unsigned StartBit, unsigned BitIndex,
+      bool AllowMixed);
+
+  // FilterProcessor scans the well-known encoding bits of the instructions and
+  // builds up a list of candidate filters.  It chooses the best filter and
+  // recursively descends down the decoding tree.
+  bool filterProcessor(bool AllowMixed, bool Greedy = true);
+
+  // Decides on the best configuration of filter(s) to use in order to decode
+  // the instructions.  A conflict of instructions may occur, in which case we
+  // dump the conflict set to the standard error.
+  void doFilter();
+
+  // Emits code to decode our share of instructions.  Returns true if the
+  // emitted code causes a return, which occurs if we know how to decode
+  // the instruction at this level or the instruction is not decodeable.
+  bool emit(raw_ostream &o, unsigned &Indentation);
+};
+
+///////////////////////////
+//                       //
+// Filter Implmenetation //
+//                       //
+///////////////////////////
+
+Filter::Filter(const Filter &f) :
+  Owner(f.Owner), StartBit(f.StartBit), NumBits(f.NumBits), Mixed(f.Mixed),
+  FilteredInstructions(f.FilteredInstructions),
+  VariableInstructions(f.VariableInstructions),
+  FilterChooserMap(f.FilterChooserMap), NumFiltered(f.NumFiltered),
+  LastOpcFiltered(f.LastOpcFiltered), NumVariable(f.NumVariable) {
+}
+
+Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
+    bool mixed) : Owner(&owner), StartBit(startBit), NumBits(numBits),
+                  Mixed(mixed) {
+  assert(StartBit + NumBits - 1 < BIT_WIDTH);
+
+  NumFiltered = 0;
+  LastOpcFiltered = 0;
+  NumVariable = 0;
+
+  for (unsigned i = 0, e = Owner->Opcodes.size(); i != e; ++i) {
+    insn_t Insn;
+
+    // Populates the insn given the uid.
+    Owner->insnWithID(Insn, Owner->Opcodes[i]);
+
+    uint64_t Field;
+    // Scans the segment for possibly well-specified encoding bits.
+    bool ok = Owner->fieldFromInsn(Field, Insn, StartBit, NumBits);
+
+    if (ok) {
+      // The encoding bits are well-known.  Lets add the uid of the
+      // instruction into the bucket keyed off the constant field value.
+      LastOpcFiltered = Owner->Opcodes[i];
+      FilteredInstructions[Field].push_back(LastOpcFiltered);
+      ++NumFiltered;
+    } else {
+      // Some of the encoding bit(s) are unspecfied.  This contributes to
+      // one additional member of "Variable" instructions.
+      VariableInstructions.push_back(Owner->Opcodes[i]);
+      ++NumVariable;
+    }
+  }
+
+  assert((FilteredInstructions.size() + VariableInstructions.size() > 0)
+         && "Filter returns no instruction categories");
+}
+
+Filter::~Filter() {
+  std::map<unsigned, FilterChooser*>::iterator filterIterator;
+  for (filterIterator = FilterChooserMap.begin();
+       filterIterator != FilterChooserMap.end();
+       filterIterator++) {
+    delete filterIterator->second;
+  }
+}
+
+// Divides the decoding task into sub tasks and delegates them to the
+// inferior FilterChooser's.
+//
+// A special case arises when there's only one entry in the filtered
+// instructions.  In order to unambiguously decode the singleton, we need to
+// match the remaining undecoded encoding bits against the singleton.
+void Filter::recurse() {
+  std::map<uint64_t, std::vector<unsigned> >::const_iterator mapIterator;
+
+  bit_value_t BitValueArray[BIT_WIDTH];
+  // Starts by inheriting our parent filter chooser's filter bit values.
+  memcpy(BitValueArray, Owner->FilterBitValues, sizeof(BitValueArray));
+
+  unsigned bitIndex;
+
+  if (VariableInstructions.size()) {
+    // Conservatively marks each segment position as BIT_UNSET.
+    for (bitIndex = 0; bitIndex < NumBits; bitIndex++)
+      BitValueArray[StartBit + bitIndex] = BIT_UNSET;
+
+    // Delegates to an inferior filter chooser for futher processing on this
+    // group of instructions whose segment values are variable.
+    FilterChooserMap.insert(std::pair<unsigned, FilterChooser*>(
+                              (unsigned)-1,
+                              new FilterChooser(Owner->AllInstructions,
+                                                VariableInstructions,
+                                                Owner->Operands,
+                                                BitValueArray,
+                                                *Owner)
+                              ));
+  }
+
+  // No need to recurse for a singleton filtered instruction.
+  // See also Filter::emit().
+  if (getNumFiltered() == 1) {
+    //Owner->SingletonExists(LastOpcFiltered);
+    assert(FilterChooserMap.size() == 1);
+    return;
+  }
+
+  // Otherwise, create sub choosers.
+  for (mapIterator = FilteredInstructions.begin();
+       mapIterator != FilteredInstructions.end();
+       mapIterator++) {
+
+    // Marks all the segment positions with either BIT_TRUE or BIT_FALSE.
+    for (bitIndex = 0; bitIndex < NumBits; bitIndex++) {
+      if (mapIterator->first & (1ULL << bitIndex))
+        BitValueArray[StartBit + bitIndex] = BIT_TRUE;
+      else
+        BitValueArray[StartBit + bitIndex] = BIT_FALSE;
+    }
+
+    // Delegates to an inferior filter chooser for futher processing on this
+    // category of instructions.
+    FilterChooserMap.insert(std::pair<unsigned, FilterChooser*>(
+                              mapIterator->first,
+                              new FilterChooser(Owner->AllInstructions,
+                                                mapIterator->second,
+                                                Owner->Operands,
+                                                BitValueArray,
+                                                *Owner)
+                              ));
+  }
+}
+
+// Emit code to decode instructions given a segment or segments of bits.
+void Filter::emit(raw_ostream &o, unsigned &Indentation) {
+  o.indent(Indentation) << "// Check Inst{";
+
+  if (NumBits > 1)
+    o << (StartBit + NumBits - 1) << '-';
+
+  o << StartBit << "} ...\n";
+
+  o.indent(Indentation) << "switch (fieldFromInstruction(insn, "
+                        << StartBit << ", " << NumBits << ")) {\n";
+
+  std::map<unsigned, FilterChooser*>::iterator filterIterator;
+
+  bool DefaultCase = false;
+  for (filterIterator = FilterChooserMap.begin();
+       filterIterator != FilterChooserMap.end();
+       filterIterator++) {
+
+    // Field value -1 implies a non-empty set of variable instructions.
+    // See also recurse().
+    if (filterIterator->first == (unsigned)-1) {
+      DefaultCase = true;
+
+      o.indent(Indentation) << "default:\n";
+      o.indent(Indentation) << "  break; // fallthrough\n";
+
+      // Closing curly brace for the switch statement.
+      // This is unconventional because we want the default processing to be
+      // performed for the fallthrough cases as well, i.e., when the "cases"
+      // did not prove a decoded instruction.
+      o.indent(Indentation) << "}\n";
+
+    } else
+      o.indent(Indentation) << "case " << filterIterator->first << ":\n";
+
+    // We arrive at a category of instructions with the same segment value.
+    // Now delegate to the sub filter chooser for further decodings.
+    // The case may fallthrough, which happens if the remaining well-known
+    // encoding bits do not match exactly.
+    if (!DefaultCase) { ++Indentation; ++Indentation; }
+
+    bool finished = filterIterator->second->emit(o, Indentation);
+    // For top level default case, there's no need for a break statement.
+    if (Owner->isTopLevel() && DefaultCase)
+      break;
+    if (!finished)
+      o.indent(Indentation) << "break;\n";
+
+    if (!DefaultCase) { --Indentation; --Indentation; }
+  }
+
+  // If there is no default case, we still need to supply a closing brace.
+  if (!DefaultCase) {
+    // Closing curly brace for the switch statement.
+    o.indent(Indentation) << "}\n";
+  }
+}
+
+// Returns the number of fanout produced by the filter.  More fanout implies
+// the filter distinguishes more categories of instructions.
+unsigned Filter::usefulness() const {
+  if (VariableInstructions.size())
+    return FilteredInstructions.size();
+  else
+    return FilteredInstructions.size() + 1;
+}
+
+//////////////////////////////////
+//                              //
+// Filterchooser Implementation //
+//                              //
+//////////////////////////////////
+
+// Emit the top level typedef and decodeInstruction() function.
+void FilterChooser::emitTop(raw_ostream &o, unsigned Indentation) {
+  switch (BIT_WIDTH) {
+  case 8:
+    o.indent(Indentation) << "typedef uint8_t field_t;\n";
+    break;
+  case 16:
+    o.indent(Indentation) << "typedef uint16_t field_t;\n";
+    break;
+  case 32:
+    o.indent(Indentation) << "typedef uint32_t field_t;\n";
+    break;
+  case 64:
+    o.indent(Indentation) << "typedef uint64_t field_t;\n";
+    break;
+  default:
+    assert(0 && "Unexpected instruction size!");
+  }
+
+  o << '\n';
+
+  o.indent(Indentation) << "static field_t " <<
+    "fieldFromInstruction(field_t insn, unsigned startBit, unsigned numBits)\n";
+
+  o.indent(Indentation) << "{\n";
+
+  ++Indentation; ++Indentation;
+  o.indent(Indentation) << "assert(startBit + numBits <= " << BIT_WIDTH
+                        << " && \"Instruction field out of bounds!\");\n";
+  o << '\n';
+  o.indent(Indentation) << "field_t fieldMask;\n";
+  o << '\n';
+  o.indent(Indentation) << "if (numBits == " << BIT_WIDTH << ")\n";
+
+  ++Indentation; ++Indentation;
+  o.indent(Indentation) << "fieldMask = (field_t)-1;\n";
+  --Indentation; --Indentation;
+
+  o.indent(Indentation) << "else\n";
+
+  ++Indentation; ++Indentation;
+  o.indent(Indentation) << "fieldMask = ((1 << numBits) - 1) << startBit;\n";
+  --Indentation; --Indentation;
+
+  o << '\n';
+  o.indent(Indentation) << "return (insn & fieldMask) >> startBit;\n";
+  --Indentation; --Indentation;
+
+  o.indent(Indentation) << "}\n";
+
+  o << '\n';
+
+  o.indent(Indentation) <<
+    "static bool decodeInstruction(MCInst &MI, field_t insn) {\n";
+  o.indent(Indentation) << "  unsigned tmp = 0;\n";
+
+  ++Indentation; ++Indentation;
+  // Emits code to decode the instructions.
+  emit(o, Indentation);
+
+  o << '\n';
+  o.indent(Indentation) << "return false;\n";
+  --Indentation; --Indentation;
+
+  o.indent(Indentation) << "}\n";
+
+  o << '\n';
+}
+
+// Populates the field of the insn given the start position and the number of
+// consecutive bits to scan for.
+//
+// Returns false if and on the first uninitialized bit value encountered.
+// Returns true, otherwise.
+bool FilterChooser::fieldFromInsn(uint64_t &Field, insn_t &Insn,
+    unsigned StartBit, unsigned NumBits) const {
+  Field = 0;
+
+  for (unsigned i = 0; i < NumBits; ++i) {
+    if (Insn[StartBit + i] == BIT_UNSET)
+      return false;
+
+    if (Insn[StartBit + i] == BIT_TRUE)
+      Field = Field | (1ULL << i);
+  }
+
+  return true;
+}
+
+/// dumpFilterArray - dumpFilterArray prints out debugging info for the given
+/// filter array as a series of chars.
+void FilterChooser::dumpFilterArray(raw_ostream &o,
+                                    bit_value_t (&filter)[BIT_WIDTH]) {
+  unsigned bitIndex;
+
+  for (bitIndex = BIT_WIDTH; bitIndex > 0; bitIndex--) {
+    switch (filter[bitIndex - 1]) {
+    case BIT_UNFILTERED:
+      o << ".";
+      break;
+    case BIT_UNSET:
+      o << "_";
+      break;
+    case BIT_TRUE:
+      o << "1";
+      break;
+    case BIT_FALSE:
+      o << "0";
+      break;
+    }
+  }
+}
+
+/// dumpStack - dumpStack traverses the filter chooser chain and calls
+/// dumpFilterArray on each filter chooser up to the top level one.
+void FilterChooser::dumpStack(raw_ostream &o, const char *prefix) {
+  FilterChooser *current = this;
+
+  while (current) {
+    o << prefix;
+    dumpFilterArray(o, current->FilterBitValues);
+    o << '\n';
+    current = current->Parent;
+  }
+}
+
+// Called from Filter::recurse() when singleton exists.  For debug purpose.
+void FilterChooser::SingletonExists(unsigned Opc) {
+  insn_t Insn0;
+  insnWithID(Insn0, Opc);
+
+  errs() << "Singleton exists: " << nameWithID(Opc)
+         << " with its decoding dominating ";
+  for (unsigned i = 0; i < Opcodes.size(); ++i) {
+    if (Opcodes[i] == Opc) continue;
+    errs() << nameWithID(Opcodes[i]) << ' ';
+  }
+  errs() << '\n';
+
+  dumpStack(errs(), "\t\t");
+  for (unsigned i = 0; i < Opcodes.size(); i++) {
+    const std::string &Name = nameWithID(Opcodes[i]);
+
+    errs() << '\t' << Name << " ";
+    dumpBits(errs(),
+             getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
+    errs() << '\n';
+  }
+}
+
+// Calculates the island(s) needed to decode the instruction.
+// This returns a list of undecoded bits of an instructions, for example,
+// Inst{20} = 1 && Inst{3-0} == 0b1111 represents two islands of yet-to-be
+// decoded bits in order to verify that the instruction matches the Opcode.
+unsigned FilterChooser::getIslands(std::vector<unsigned> &StartBits,
+    std::vector<unsigned> &EndBits, std::vector<uint64_t> &FieldVals,
+    insn_t &Insn) {
+  unsigned Num, BitNo;
+  Num = BitNo = 0;
+
+  uint64_t FieldVal = 0;
+
+  // 0: Init
+  // 1: Water (the bit value does not affect decoding)
+  // 2: Island (well-known bit value needed for decoding)
+  int State = 0;
+  int Val = -1;
+
+  for (unsigned i = 0; i < BIT_WIDTH; ++i) {
+    Val = Value(Insn[i]);
+    bool Filtered = PositionFiltered(i);
+    switch (State) {
+    default:
+      assert(0 && "Unreachable code!");
+      break;
+    case 0:
+    case 1:
+      if (Filtered || Val == -1)
+        State = 1; // Still in Water
+      else {
+        State = 2; // Into the Island
+        BitNo = 0;
+        StartBits.push_back(i);
+        FieldVal = Val;
+      }
+      break;
+    case 2:
+      if (Filtered || Val == -1) {
+        State = 1; // Into the Water
+        EndBits.push_back(i - 1);
+        FieldVals.push_back(FieldVal);
+        ++Num;
+      } else {
+        State = 2; // Still in Island
+        ++BitNo;
+        FieldVal = FieldVal | Val << BitNo;
+      }
+      break;
+    }
+  }
+  // If we are still in Island after the loop, do some housekeeping.
+  if (State == 2) {
+    EndBits.push_back(BIT_WIDTH - 1);
+    FieldVals.push_back(FieldVal);
+    ++Num;
+  }
+
+  assert(StartBits.size() == Num && EndBits.size() == Num &&
+         FieldVals.size() == Num);
+  return Num;
+}
+
+// Emits code to decode the singleton.  Return true if we have matched all the
+// well-known bits.
+bool FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
+                                         unsigned Opc) {
+  std::vector<unsigned> StartBits;
+  std::vector<unsigned> EndBits;
+  std::vector<uint64_t> FieldVals;
+  insn_t Insn;
+  insnWithID(Insn, Opc);
+
+  // Look for islands of undecoded bits of the singleton.
+  getIslands(StartBits, EndBits, FieldVals, Insn);
+
+  unsigned Size = StartBits.size();
+  unsigned I, NumBits;
+
+  // If we have matched all the well-known bits, just issue a return.
+  if (Size == 0) {
+    o.indent(Indentation) << "{\n";
+    o.indent(Indentation) << "  MI.setOpcode(" << Opc << ");\n";
+    std::vector<OperandInfo>& InsnOperands = Operands[Opc];
+    for (std::vector<OperandInfo>::iterator
+         I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
+      // If a custom instruction decoder was specified, use that.
+      if (I->FieldBase == ~0U && I->FieldLength == ~0U) {
+        o.indent(Indentation) << "  " << I->Decoder << "(MI, insn);\n";
+        break;
+      }
+
+      o.indent(Indentation)
+        << "  tmp = fieldFromInstruction(insn, " << I->FieldBase
+        << ", " << I->FieldLength << ");\n";
+      if (I->Decoder != "") {
+        o.indent(Indentation) << "  " << I->Decoder << "(MI, tmp);\n";
+      } else {
+        o.indent(Indentation)
+          << "  MI.addOperand(MCOperand::CreateImm(tmp));\n";
+      }
+    }
+
+    o.indent(Indentation) << "  return true; // " << nameWithID(Opc)
+                          << '\n';
+    o.indent(Indentation) << "}\n";
+    return true;
+  }
+
+  // Otherwise, there are more decodings to be done!
+
+  // Emit code to match the island(s) for the singleton.
+  o.indent(Indentation) << "// Check ";
+
+  for (I = Size; I != 0; --I) {
+    o << "Inst{" << EndBits[I-1] << '-' << StartBits[I-1] << "} ";
+    if (I > 1)
+      o << "&& ";
+    else
+      o << "for singleton decoding...\n";
+  }
+
+  o.indent(Indentation) << "if (";
+
+  for (I = Size; I != 0; --I) {
+    NumBits = EndBits[I-1] - StartBits[I-1] + 1;
+    o << "fieldFromInstruction(insn, " << StartBits[I-1] << ", " << NumBits
+      << ") == " << FieldVals[I-1];
+    if (I > 1)
+      o << " && ";
+    else
+      o << ") {\n";
+  }
+  o.indent(Indentation) << "  MI.setOpcode(" << Opc << ");\n";
+  std::vector<OperandInfo>& InsnOperands = Operands[Opc];
+  for (std::vector<OperandInfo>::iterator
+       I = InsnOperands.begin(), E = InsnOperands.end(); I != E; ++I) {
+    // If a custom instruction decoder was specified, use that.
+    if (I->FieldBase == ~0U && I->FieldLength == ~0U) {
+      o.indent(Indentation) << "  " << I->Decoder << "(MI, insn);\n";
+      break;
+    }
+
+    o.indent(Indentation)
+      << "  tmp = fieldFromInstruction(insn, " << I->FieldBase
+      << ", " << I->FieldLength << ");\n";
+    if (I->Decoder != "") {
+      o.indent(Indentation) << "  " << I->Decoder << "(MI, tmp);\n";
+    } else {
+      o.indent(Indentation)
+        << "  MI.addOperand(MCOperand::CreateImm(tmp));\n";
+    }
+  }
+  o.indent(Indentation) << "  return true; // " << nameWithID(Opc)
+                        << '\n';
+  o.indent(Indentation) << "}\n";
+
+  return false;
+}
+
+// Emits code to decode the singleton, and then to decode the rest.
+void FilterChooser::emitSingletonDecoder(raw_ostream &o, unsigned &Indentation,
+    Filter &Best) {
+
+  unsigned Opc = Best.getSingletonOpc();
+
+  emitSingletonDecoder(o, Indentation, Opc);
+
+  // Emit code for the rest.
+  o.indent(Indentation) << "else\n";
+
+  Indentation += 2;
+  Best.getVariableFC().emit(o, Indentation);
+  Indentation -= 2;
+}
+
+// Assign a single filter and run with it.  Top level API client can initialize
+// with a single filter to start the filtering process.
+void FilterChooser::runSingleFilter(FilterChooser &owner, unsigned startBit,
+    unsigned numBit, bool mixed) {
+  Filters.clear();
+  Filter F(*this, startBit, numBit, true);
+  Filters.push_back(F);
+  BestIndex = 0; // Sole Filter instance to choose from.
+  bestFilter().recurse();
+}
+
+// reportRegion is a helper function for filterProcessor to mark a region as
+// eligible for use as a filter region.
+void FilterChooser::reportRegion(bitAttr_t RA, unsigned StartBit,
+    unsigned BitIndex, bool AllowMixed) {
+  if (RA == ATTR_MIXED && AllowMixed)
+    Filters.push_back(Filter(*this, StartBit, BitIndex - StartBit, true));
+  else if (RA == ATTR_ALL_SET && !AllowMixed)
+    Filters.push_back(Filter(*this, StartBit, BitIndex - StartBit, false));
+}
+
+// FilterProcessor scans the well-known encoding bits of the instructions and
+// builds up a list of candidate filters.  It chooses the best filter and
+// recursively descends down the decoding tree.
+bool FilterChooser::filterProcessor(bool AllowMixed, bool Greedy) {
+  Filters.clear();
+  BestIndex = -1;
+  unsigned numInstructions = Opcodes.size();
+
+  assert(numInstructions && "Filter created with no instructions");
+
+  // No further filtering is necessary.
+  if (numInstructions == 1)
+    return true;
+
+  // Heuristics.  See also doFilter()'s "Heuristics" comment when num of
+  // instructions is 3.
+  if (AllowMixed && !Greedy) {
+    assert(numInstructions == 3);
+
+    for (unsigned i = 0; i < Opcodes.size(); ++i) {
+      std::vector<unsigned> StartBits;
+      std::vector<unsigned> EndBits;
+      std::vector<uint64_t> FieldVals;
+      insn_t Insn;
+
+      insnWithID(Insn, Opcodes[i]);
+
+      // Look for islands of undecoded bits of any instruction.
+      if (getIslands(StartBits, EndBits, FieldVals, Insn) > 0) {
+        // Found an instruction with island(s).  Now just assign a filter.
+        runSingleFilter(*this, StartBits[0], EndBits[0] - StartBits[0] + 1,
+                        true);
+        return true;
+      }
+    }
+  }
+
+  unsigned BitIndex, InsnIndex;
+
+  // We maintain BIT_WIDTH copies of the bitAttrs automaton.
+  // The automaton consumes the corresponding bit from each
+  // instruction.
+  //
+  //   Input symbols: 0, 1, and _ (unset).
+  //   States:        NONE, FILTERED, ALL_SET, ALL_UNSET, and MIXED.
+  //   Initial state: NONE.
+  //
+  // (NONE) ------- [01] -> (ALL_SET)
+  // (NONE) ------- _ ----> (ALL_UNSET)
+  // (ALL_SET) ---- [01] -> (ALL_SET)
+  // (ALL_SET) ---- _ ----> (MIXED)
+  // (ALL_UNSET) -- [01] -> (MIXED)
+  // (ALL_UNSET) -- _ ----> (ALL_UNSET)
+  // (MIXED) ------ . ----> (MIXED)
+  // (FILTERED)---- . ----> (FILTERED)
+
+  bitAttr_t bitAttrs[BIT_WIDTH];
+
+  // FILTERED bit positions provide no entropy and are not worthy of pursuing.
+  // Filter::recurse() set either BIT_TRUE or BIT_FALSE for each position.
+  for (BitIndex = 0; BitIndex < BIT_WIDTH; ++BitIndex)
+    if (FilterBitValues[BitIndex] == BIT_TRUE ||
+        FilterBitValues[BitIndex] == BIT_FALSE)
+      bitAttrs[BitIndex] = ATTR_FILTERED;
+    else
+      bitAttrs[BitIndex] = ATTR_NONE;
+
+  for (InsnIndex = 0; InsnIndex < numInstructions; ++InsnIndex) {
+    insn_t insn;
+
+    insnWithID(insn, Opcodes[InsnIndex]);
+
+    for (BitIndex = 0; BitIndex < BIT_WIDTH; ++BitIndex) {
+      switch (bitAttrs[BitIndex]) {
+      case ATTR_NONE:
+        if (insn[BitIndex] == BIT_UNSET)
+          bitAttrs[BitIndex] = ATTR_ALL_UNSET;
+        else
+          bitAttrs[BitIndex] = ATTR_ALL_SET;
+        break;
+      case ATTR_ALL_SET:
+        if (insn[BitIndex] == BIT_UNSET)
+          bitAttrs[BitIndex] = ATTR_MIXED;
+        break;
+      case ATTR_ALL_UNSET:
+        if (insn[BitIndex] != BIT_UNSET)
+          bitAttrs[BitIndex] = ATTR_MIXED;
+        break;
+      case ATTR_MIXED:
+      case ATTR_FILTERED:
+        break;
+      }
+    }
+  }
+
+  // The regionAttr automaton consumes the bitAttrs automatons' state,
+  // lowest-to-highest.
+  //
+  //   Input symbols: F(iltered), (all_)S(et), (all_)U(nset), M(ixed)
+  //   States:        NONE, ALL_SET, MIXED
+  //   Initial state: NONE
+  //
+  // (NONE) ----- F --> (NONE)
+  // (NONE) ----- S --> (ALL_SET)     ; and set region start
+  // (NONE) ----- U --> (NONE)
+  // (NONE) ----- M --> (MIXED)       ; and set region start
+  // (ALL_SET) -- F --> (NONE)        ; and report an ALL_SET region
+  // (ALL_SET) -- S --> (ALL_SET)
+  // (ALL_SET) -- U --> (NONE)        ; and report an ALL_SET region
+  // (ALL_SET) -- M --> (MIXED)       ; and report an ALL_SET region
+  // (MIXED) ---- F --> (NONE)        ; and report a MIXED region
+  // (MIXED) ---- S --> (ALL_SET)     ; and report a MIXED region
+  // (MIXED) ---- U --> (NONE)        ; and report a MIXED region
+  // (MIXED) ---- M --> (MIXED)
+
+  bitAttr_t RA = ATTR_NONE;
+  unsigned StartBit = 0;
+
+  for (BitIndex = 0; BitIndex < BIT_WIDTH; BitIndex++) {
+    bitAttr_t bitAttr = bitAttrs[BitIndex];
+
+    assert(bitAttr != ATTR_NONE && "Bit without attributes");
+
+    switch (RA) {
+    case ATTR_NONE:
+      switch (bitAttr) {
+      case ATTR_FILTERED:
+        break;
+      case ATTR_ALL_SET:
+        StartBit = BitIndex;
+        RA = ATTR_ALL_SET;
+        break;
+      case ATTR_ALL_UNSET:
+        break;
+      case ATTR_MIXED:
+        StartBit = BitIndex;
+        RA = ATTR_MIXED;
+        break;
+      default:
+        assert(0 && "Unexpected bitAttr!");
+      }
+      break;
+    case ATTR_ALL_SET:
+      switch (bitAttr) {
+      case ATTR_FILTERED:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        RA = ATTR_NONE;
+        break;
+      case ATTR_ALL_SET:
+        break;
+      case ATTR_ALL_UNSET:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        RA = ATTR_NONE;
+        break;
+      case ATTR_MIXED:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        StartBit = BitIndex;
+        RA = ATTR_MIXED;
+        break;
+      default:
+        assert(0 && "Unexpected bitAttr!");
+      }
+      break;
+    case ATTR_MIXED:
+      switch (bitAttr) {
+      case ATTR_FILTERED:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        StartBit = BitIndex;
+        RA = ATTR_NONE;
+        break;
+      case ATTR_ALL_SET:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        StartBit = BitIndex;
+        RA = ATTR_ALL_SET;
+        break;
+      case ATTR_ALL_UNSET:
+        reportRegion(RA, StartBit, BitIndex, AllowMixed);
+        RA = ATTR_NONE;
+        break;
+      case ATTR_MIXED:
+        break;
+      default:
+        assert(0 && "Unexpected bitAttr!");
+      }
+      break;
+    case ATTR_ALL_UNSET:
+      assert(0 && "regionAttr state machine has no ATTR_UNSET state");
+    case ATTR_FILTERED:
+      assert(0 && "regionAttr state machine has no ATTR_FILTERED state");
+    }
+  }
+
+  // At the end, if we're still in ALL_SET or MIXED states, report a region
+  switch (RA) {
+  case ATTR_NONE:
+    break;
+  case ATTR_FILTERED:
+    break;
+  case ATTR_ALL_SET:
+    reportRegion(RA, StartBit, BitIndex, AllowMixed);
+    break;
+  case ATTR_ALL_UNSET:
+    break;
+  case ATTR_MIXED:
+    reportRegion(RA, StartBit, BitIndex, AllowMixed);
+    break;
+  }
+
+  // We have finished with the filter processings.  Now it's time to choose
+  // the best performing filter.
+  BestIndex = 0;
+  bool AllUseless = true;
+  unsigned BestScore = 0;
+
+  for (unsigned i = 0, e = Filters.size(); i != e; ++i) {
+    unsigned Usefulness = Filters[i].usefulness();
+
+    if (Usefulness)
+      AllUseless = false;
+
+    if (Usefulness > BestScore) {
+      BestIndex = i;
+      BestScore = Usefulness;
+    }
+  }
+
+  if (!AllUseless)
+    bestFilter().recurse();
+
+  return !AllUseless;
+} // end of FilterChooser::filterProcessor(bool)
+
+// Decides on the best configuration of filter(s) to use in order to decode
+// the instructions.  A conflict of instructions may occur, in which case we
+// dump the conflict set to the standard error.
+void FilterChooser::doFilter() {
+  unsigned Num = Opcodes.size();
+  assert(Num && "FilterChooser created with no instructions");
+
+  // Try regions of consecutive known bit values first.
+  if (filterProcessor(false))
+    return;
+
+  // Then regions of mixed bits (both known and unitialized bit values allowed).
+  if (filterProcessor(true))
+    return;
+
+  // Heuristics to cope with conflict set {t2CMPrs, t2SUBSrr, t2SUBSrs} where
+  // no single instruction for the maximum ATTR_MIXED region Inst{14-4} has a
+  // well-known encoding pattern.  In such case, we backtrack and scan for the
+  // the very first consecutive ATTR_ALL_SET region and assign a filter to it.
+  if (Num == 3 && filterProcessor(true, false))
+    return;
+
+  // If we come to here, the instruction decoding has failed.
+  // Set the BestIndex to -1 to indicate so.
+  BestIndex = -1;
+}
+
+// Emits code to decode our share of instructions.  Returns true if the
+// emitted code causes a return, which occurs if we know how to decode
+// the instruction at this level or the instruction is not decodeable.
+bool FilterChooser::emit(raw_ostream &o, unsigned &Indentation) {
+  if (Opcodes.size() == 1)
+    // There is only one instruction in the set, which is great!
+    // Call emitSingletonDecoder() to see whether there are any remaining
+    // encodings bits.
+    return emitSingletonDecoder(o, Indentation, Opcodes[0]);
+
+  // Choose the best filter to do the decodings!
+  if (BestIndex != -1) {
+    Filter &Best = bestFilter();
+    if (Best.getNumFiltered() == 1)
+      emitSingletonDecoder(o, Indentation, Best);
+    else
+      bestFilter().emit(o, Indentation);
+    return false;
+  }
+
+  // We don't know how to decode these instructions!  Return 0 and dump the
+  // conflict set!
+  o.indent(Indentation) << "return 0;" << " // Conflict set: ";
+  for (int i = 0, N = Opcodes.size(); i < N; ++i) {
+    o << nameWithID(Opcodes[i]);
+    if (i < (N - 1))
+      o << ", ";
+    else
+      o << '\n';
+  }
+
+  // Print out useful conflict information for postmortem analysis.
+  errs() << "Decoding Conflict:\n";
+
+  dumpStack(errs(), "\t\t");
+
+  for (unsigned i = 0; i < Opcodes.size(); i++) {
+    const std::string &Name = nameWithID(Opcodes[i]);
+
+    errs() << '\t' << Name << " ";
+    dumpBits(errs(),
+             getBitsField(*AllInstructions[Opcodes[i]]->TheDef, "Inst"));
+    errs() << '\n';
+  }
+
+  return true;
+}
+
+bool FixedLenDecoderEmitter::populateInstruction(const CodeGenInstruction &CGI,
+                                                 unsigned Opc){
+  const Record &Def = *CGI.TheDef;
+  // If all the bit positions are not specified; do not decode this instruction.
+  // We are bound to fail!  For proper disassembly, the well-known encoding bits
+  // of the instruction must be fully specified.
+  //
+  // This also removes pseudo instructions from considerations of disassembly,
+  // which is a better design and less fragile than the name matchings.
+  BitsInit &Bits = getBitsField(Def, "Inst");
+  if (Bits.allInComplete()) return false;
+
+  // Ignore "asm parser only" instructions.
+  if (Def.getValueAsBit("isAsmParserOnly"))
+    return false;
+
+  std::vector<OperandInfo> InsnOperands;
+
+  // If the instruction has specified a custom decoding hook, use that instead
+  // of trying to auto-generate the decoder.
+  std::string InstDecoder = Def.getValueAsString("DecoderMethod");
+  if (InstDecoder != "") {
+    InsnOperands.push_back(OperandInfo(~0U, ~0U, InstDecoder));
+    Operands[Opc] = InsnOperands;
+    return true;
+  }
+
+  // Generate a description of the operand of the instruction that we know
+  // how to decode automatically.
+  // FIXME: We'll need to have a way to manually override this as needed.
+
+  // Gather the outputs/inputs of the instruction, so we can find their
+  // positions in the encoding.  This assumes for now that they appear in the
+  // MCInst in the order that they're listed.
+  std::vector<std::pair<Init*, std::string> > InOutOperands;
+  DagInit *Out  = Def.getValueAsDag("OutOperandList");
+  DagInit *In  = Def.getValueAsDag("InOperandList");
+  for (unsigned i = 0; i < Out->getNumArgs(); ++i)
+    InOutOperands.push_back(std::make_pair(Out->getArg(i), Out->getArgName(i)));
+  for (unsigned i = 0; i < In->getNumArgs(); ++i)
+    InOutOperands.push_back(std::make_pair(In->getArg(i), In->getArgName(i)));
+
+  // For each operand, see if we can figure out where it is encoded.
+  for (std::vector<std::pair<Init*, std::string> >::iterator
+       NI = InOutOperands.begin(), NE = InOutOperands.end(); NI != NE; ++NI) {
+    unsigned PrevBit = ~0;
+    unsigned Base = ~0;
+    unsigned PrevPos = ~0;
+    std::string Decoder = "";
+
+    for (unsigned bi = 0; bi < Bits.getNumBits(); ++bi) {
+      VarBitInit *BI = dynamic_cast<VarBitInit*>(Bits.getBit(bi));
+      if (!BI) continue;
+
+      VarInit *Var = dynamic_cast<VarInit*>(BI->getVariable());
+      assert(Var);
+      unsigned CurrBit = BI->getBitNum();
+      if (Var->getName() != NI->second) continue;
+
+      // Figure out the lowest bit of the value, and the width of the field.
+      // Deliberately don't try to handle cases where the field is scattered,
+      // or where not all bits of the the field are explicit.
+      if (Base == ~0U && PrevBit == ~0U && PrevPos == ~0U) {
+        if (CurrBit == 0)
+          Base = bi;
+        else
+          continue;
+      }
+
+      if ((PrevPos != ~0U && bi-1 != PrevPos) ||
+          (CurrBit != ~0U && CurrBit-1 != PrevBit)) {
+        PrevBit = ~0;
+        Base = ~0;
+        PrevPos = ~0;
+      }
+
+      PrevPos = bi;
+      PrevBit = CurrBit;
+
+      // At this point, we can locate the field, but we need to know how to
+      // interpret it.  As a first step, require the target to provide callbacks
+      // for decoding register classes.
+      // FIXME: This need to be extended to handle instructions with custom
+      // decoder methods, and operands with (simple) MIOperandInfo's.
+      TypedInit *TI = dynamic_cast<TypedInit*>(NI->first);
+      RecordRecTy *Type = dynamic_cast<RecordRecTy*>(TI->getType());
+      Record *TypeRecord = Type->getRecord();
+      bool isReg = false;
+      if (TypeRecord->isSubClassOf("RegisterClass")) {
+        Decoder = "Decode" + Type->getRecord()->getName() + "RegisterClass";
+        isReg = true;
+      }
+
+      RecordVal *DecoderString = TypeRecord->getValue("DecoderMethod");
+      StringInit *String = DecoderString ?
+        dynamic_cast<StringInit*>(DecoderString->getValue()) :
+        0;
+      if (!isReg && String && String->getValue() != "")
+        Decoder = String->getValue();
+    }
+
+    if (Base != ~0U) {
+      InsnOperands.push_back(OperandInfo(Base, PrevBit+1, Decoder));
+      DEBUG(errs() << "ENCODED OPERAND: $" << NI->second << " @ ("
+                   << utostr(Base+PrevBit) << ", " << utostr(Base) << ")\n");
+    }
+  }
+
+  Operands[Opc] = InsnOperands;
+
+
+#if 0
+  DEBUG({
+      // Dumps the instruction encoding bits.
+      dumpBits(errs(), Bits);
+
+      errs() << '\n';
+
+      // Dumps the list of operand info.
+      for (unsigned i = 0, e = CGI.Operands.size(); i != e; ++i) {
+        const CGIOperandList::OperandInfo &Info = CGI.Operands[i];
+        const std::string &OperandName = Info.Name;
+        const Record &OperandDef = *Info.Rec;
+
+        errs() << "\t" << OperandName << " (" << OperandDef.getName() << ")\n";
+      }
+    });
+#endif
+
+  return true;
+}
+
+void FixedLenDecoderEmitter::populateInstructions() {
+  for (unsigned i = 0, e = NumberedInstructions.size(); i < e; ++i) {
+    Record *R = NumberedInstructions[i]->TheDef;
+    if (R->getValueAsString("Namespace") == "TargetOpcode")
+      continue;
+
+    if (populateInstruction(*NumberedInstructions[i], i))
+      Opcodes.push_back(i);
+  }
+}
+
+// Emits disassembler code for instruction decoding.
+void FixedLenDecoderEmitter::run(raw_ostream &o)
+{
+  o << "#include \"llvm/MC/MCInst.h\"\n";
+  o << "#include \"llvm/Support/DataTypes.h\"\n";
+  o << "#include <assert.h>\n";
+  o << '\n';
+  o << "namespace llvm {\n\n";
+
+  NumberedInstructions = Target.getInstructionsByEnumValue();
+  populateInstructions();
+  FilterChooser FC(NumberedInstructions, Opcodes, Operands);
+  FC.emitTop(o, 0);
+
+  o << "\n} // End llvm namespace \n";
+}
diff --git a/final/utils/TableGen/FixedLenDecoderEmitter.h b/final/utils/TableGen/FixedLenDecoderEmitter.h
new file mode 100644
index 00000000000..d46a495540e
--- /dev/null
+++ b/final/utils/TableGen/FixedLenDecoderEmitter.h
@@ -0,0 +1,56 @@
+//===------------ FixedLenDecoderEmitter.h - Decoder Generator --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// It contains the tablegen backend that emits the decoder functions for
+// targets with fixed length instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FixedLenDECODEREMITTER_H
+#define FixedLenDECODEREMITTER_H
+
+#include "CodeGenTarget.h"
+#include "TableGenBackend.h"
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+struct OperandInfo {
+  unsigned FieldBase;
+  unsigned FieldLength;
+  std::string Decoder;
+
+  OperandInfo(unsigned FB, unsigned FL, std::string D)
+    : FieldBase(FB), FieldLength(FL), Decoder(D) { }
+};
+
+class FixedLenDecoderEmitter : public TableGenBackend {
+public:
+  FixedLenDecoderEmitter(RecordKeeper &R) :
+    Records(R), Target(R),
+    NumberedInstructions(Target.getInstructionsByEnumValue()) {}
+
+  // run - Output the code emitter
+  void run(raw_ostream &o);
+
+private:
+  RecordKeeper &Records;
+  CodeGenTarget Target;
+  std::vector<const CodeGenInstruction*> NumberedInstructions;
+  std::vector<unsigned> Opcodes;
+  std::map<unsigned, std::vector<OperandInfo> > Operands;
+
+  bool populateInstruction(const CodeGenInstruction &CGI, unsigned Opc);
+  void populateInstructions();
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/InstrEnumEmitter.cpp b/final/utils/TableGen/InstrEnumEmitter.cpp
new file mode 100644
index 00000000000..aa596892f52
--- /dev/null
+++ b/final/utils/TableGen/InstrEnumEmitter.cpp
@@ -0,0 +1,48 @@
+//===- InstrEnumEmitter.cpp - Generate Instruction Set Enums --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting enums for each machine
+// instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstrEnumEmitter.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+#include <cstdio>
+using namespace llvm;
+
+// runEnums - Print out enum values for all of the instructions.
+void InstrEnumEmitter::run(raw_ostream &OS) {
+  EmitSourceFileHeader("Target Instruction Enum Values", OS);
+  OS << "namespace llvm {\n\n";
+
+  CodeGenTarget Target(Records);
+
+  // We must emit the PHI opcode first...
+  std::string Namespace = Target.getInstNamespace();
+  
+  if (Namespace.empty()) {
+    fprintf(stderr, "No instructions defined!\n");
+    exit(1);
+  }
+
+  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+    Target.getInstructionsByEnumValue();
+
+  OS << "namespace " << Namespace << " {\n";
+  OS << "  enum {\n";
+  for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
+    OS << "    " << NumberedInstructions[i]->TheDef->getName()
+       << "\t= " << i << ",\n";
+  }
+  OS << "    INSTRUCTION_LIST_END = " << NumberedInstructions.size() << "\n";
+  OS << "  };\n}\n";
+  OS << "} // End llvm namespace \n";
+}
diff --git a/final/utils/TableGen/InstrEnumEmitter.h b/final/utils/TableGen/InstrEnumEmitter.h
new file mode 100644
index 00000000000..89f8b659d70
--- /dev/null
+++ b/final/utils/TableGen/InstrEnumEmitter.h
@@ -0,0 +1,33 @@
+//===- InstrEnumEmitter.h - Generate Instruction Set Enums ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting enums for each machine
+// instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTRENUM_EMITTER_H
+#define INSTRENUM_EMITTER_H
+
+#include "TableGenBackend.h"
+
+namespace llvm {
+
+class InstrEnumEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+public:
+  InstrEnumEmitter(RecordKeeper &R) : Records(R) {}
+
+  // run - Output the instruction set description, returning true on failure.
+  void run(raw_ostream &OS);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/InstrInfoEmitter.cpp b/final/utils/TableGen/InstrInfoEmitter.cpp
new file mode 100644
index 00000000000..2b684bede3e
--- /dev/null
+++ b/final/utils/TableGen/InstrInfoEmitter.cpp
@@ -0,0 +1,336 @@
+//===- InstrInfoEmitter.cpp - Generate a Instruction Set Desc. ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting a description of the target
+// instruction set for the code generator.
+//
+//===----------------------------------------------------------------------===//
+
+#include "InstrInfoEmitter.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+static void PrintDefList(const std::vector<Record*> &Uses,
+                         unsigned Num, raw_ostream &OS) {
+  OS << "static const unsigned ImplicitList" << Num << "[] = { ";
+  for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+    OS << getQualifiedName(Uses[i]) << ", ";
+  OS << "0 };\n";
+}
+
+static void PrintBarriers(std::vector<Record*> &Barriers,
+                          unsigned Num, raw_ostream &OS) {
+  OS << "static const TargetRegisterClass* Barriers" << Num << "[] = { ";
+  for (unsigned i = 0, e = Barriers.size(); i != e; ++i)
+    OS << "&" << getQualifiedName(Barriers[i]) << "RegClass, ";
+  OS << "NULL };\n";
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary Information.
+//===----------------------------------------------------------------------===//
+
+void InstrInfoEmitter::GatherItinClasses() {
+  std::vector<Record*> DefList =
+  Records.getAllDerivedDefinitions("InstrItinClass");
+  std::sort(DefList.begin(), DefList.end(), LessRecord());
+  
+  for (unsigned i = 0, N = DefList.size(); i < N; i++)
+    ItinClassMap[DefList[i]->getName()] = i;
+}  
+
+unsigned InstrInfoEmitter::getItinClassNumber(const Record *InstRec) {
+  return ItinClassMap[InstRec->getValueAsDef("Itinerary")->getName()];
+}
+
+//===----------------------------------------------------------------------===//
+// Operand Info Emission.
+//===----------------------------------------------------------------------===//
+
+std::vector<std::string>
+InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
+  std::vector<std::string> Result;
+  
+  for (unsigned i = 0, e = Inst.Operands.size(); i != e; ++i) {
+    // Handle aggregate operands and normal operands the same way by expanding
+    // either case into a list of operands for this op.
+    std::vector<CGIOperandList::OperandInfo> OperandList;
+
+    // This might be a multiple operand thing.  Targets like X86 have
+    // registers in their multi-operand operands.  It may also be an anonymous
+    // operand, which has a single operand, but no declared class for the
+    // operand.
+    DagInit *MIOI = Inst.Operands[i].MIOperandInfo;
+    
+    if (!MIOI || MIOI->getNumArgs() == 0) {
+      // Single, anonymous, operand.
+      OperandList.push_back(Inst.Operands[i]);
+    } else {
+      for (unsigned j = 0, e = Inst.Operands[i].MINumOperands; j != e; ++j) {
+        OperandList.push_back(Inst.Operands[i]);
+
+        Record *OpR = dynamic_cast<DefInit*>(MIOI->getArg(j))->getDef();
+        OperandList.back().Rec = OpR;
+      }
+    }
+
+    for (unsigned j = 0, e = OperandList.size(); j != e; ++j) {
+      Record *OpR = OperandList[j].Rec;
+      std::string Res;
+      
+      if (OpR->isSubClassOf("RegisterClass"))
+        Res += getQualifiedName(OpR) + "RegClassID, ";
+      else if (OpR->isSubClassOf("PointerLikeRegClass"))
+        Res += utostr(OpR->getValueAsInt("RegClassKind")) + ", ";
+      else
+        // -1 means the operand does not have a fixed register class.
+        Res += "-1, ";
+      
+      // Fill in applicable flags.
+      Res += "0";
+        
+      // Ptr value whose register class is resolved via callback.
+      if (OpR->isSubClassOf("PointerLikeRegClass"))
+        Res += "|(1<<TOI::LookupPtrRegClass)";
+
+      // Predicate operands.  Check to see if the original unexpanded operand
+      // was of type PredicateOperand.
+      if (Inst.Operands[i].Rec->isSubClassOf("PredicateOperand"))
+        Res += "|(1<<TOI::Predicate)";
+        
+      // Optional def operands.  Check to see if the original unexpanded operand
+      // was of type OptionalDefOperand.
+      if (Inst.Operands[i].Rec->isSubClassOf("OptionalDefOperand"))
+        Res += "|(1<<TOI::OptionalDef)";
+
+      // Fill in constraint info.
+      Res += ", ";
+      
+      const CGIOperandList::ConstraintInfo &Constraint =
+        Inst.Operands[i].Constraints[j];
+      if (Constraint.isNone())
+        Res += "0";
+      else if (Constraint.isEarlyClobber())
+        Res += "(1 << TOI::EARLY_CLOBBER)";
+      else {
+        assert(Constraint.isTied());
+        Res += "((" + utostr(Constraint.getTiedOperand()) +
+                    " << 16) | (1 << TOI::TIED_TO))";
+      }
+        
+      Result.push_back(Res);
+    }
+  }
+
+  return Result;
+}
+
+void InstrInfoEmitter::EmitOperandInfo(raw_ostream &OS, 
+                                       OperandInfoMapTy &OperandInfoIDs) {
+  // ID #0 is for no operand info.
+  unsigned OperandListNum = 0;
+  OperandInfoIDs[std::vector<std::string>()] = ++OperandListNum;
+  
+  OS << "\n";
+  const CodeGenTarget &Target = CDP.getTargetInfo();
+  for (CodeGenTarget::inst_iterator II = Target.inst_begin(),
+       E = Target.inst_end(); II != E; ++II) {
+    std::vector<std::string> OperandInfo = GetOperandInfo(**II);
+    unsigned &N = OperandInfoIDs[OperandInfo];
+    if (N != 0) continue;
+    
+    N = ++OperandListNum;
+    OS << "static const TargetOperandInfo OperandInfo" << N << "[] = { ";
+    for (unsigned i = 0, e = OperandInfo.size(); i != e; ++i)
+      OS << "{ " << OperandInfo[i] << " }, ";
+    OS << "};\n";
+  }
+}
+
+void InstrInfoEmitter::DetectRegisterClassBarriers(std::vector<Record*> &Defs,
+                                  const std::vector<CodeGenRegisterClass> &RCs,
+                                  std::vector<Record*> &Barriers) {
+  std::set<Record*> DefSet;
+  unsigned NumDefs = Defs.size();
+  for (unsigned i = 0; i < NumDefs; ++i)
+    DefSet.insert(Defs[i]);
+
+  for (unsigned i = 0, e = RCs.size(); i != e; ++i) {
+    const CodeGenRegisterClass &RC = RCs[i];
+    unsigned NumRegs = RC.Elements.size();
+    if (NumRegs > NumDefs)
+      continue; // Can't possibly clobber this RC.
+
+    bool Clobber = true;
+    for (unsigned j = 0; j < NumRegs; ++j) {
+      Record *Reg = RC.Elements[j];
+      if (!DefSet.count(Reg)) {
+        Clobber = false;
+        break;
+      }
+    }
+    if (Clobber)
+      Barriers.push_back(RC.TheDef);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// Main Output.
+//===----------------------------------------------------------------------===//
+
+// run - Emit the main instruction description records for the target...
+void InstrInfoEmitter::run(raw_ostream &OS) {
+  GatherItinClasses();
+
+  EmitSourceFileHeader("Target Instruction Descriptors", OS);
+  OS << "namespace llvm {\n\n";
+
+  CodeGenTarget &Target = CDP.getTargetInfo();
+  const std::string &TargetName = Target.getName();
+  Record *InstrInfo = Target.getInstructionSet();
+  const std::vector<CodeGenRegisterClass> &RCs = Target.getRegisterClasses();
+
+  // Keep track of all of the def lists we have emitted already.
+  std::map<std::vector<Record*>, unsigned> EmittedLists;
+  unsigned ListNumber = 0;
+  std::map<std::vector<Record*>, unsigned> EmittedBarriers;
+  unsigned BarrierNumber = 0;
+  std::map<Record*, unsigned> BarriersMap;
+ 
+  // Emit all of the instruction's implicit uses and defs.
+  for (CodeGenTarget::inst_iterator II = Target.inst_begin(),
+         E = Target.inst_end(); II != E; ++II) {
+    Record *Inst = (*II)->TheDef;
+    std::vector<Record*> Uses = Inst->getValueAsListOfDefs("Uses");
+    if (!Uses.empty()) {
+      unsigned &IL = EmittedLists[Uses];
+      if (!IL) PrintDefList(Uses, IL = ++ListNumber, OS);
+    }
+    std::vector<Record*> Defs = Inst->getValueAsListOfDefs("Defs");
+    if (!Defs.empty()) {
+      std::vector<Record*> RCBarriers;
+      DetectRegisterClassBarriers(Defs, RCs, RCBarriers);
+      if (!RCBarriers.empty()) {
+        unsigned &IB = EmittedBarriers[RCBarriers];
+        if (!IB) PrintBarriers(RCBarriers, IB = ++BarrierNumber, OS);
+        BarriersMap.insert(std::make_pair(Inst, IB));
+      }
+
+      unsigned &IL = EmittedLists[Defs];
+      if (!IL) PrintDefList(Defs, IL = ++ListNumber, OS);
+    }
+  }
+
+  OperandInfoMapTy OperandInfoIDs;
+  
+  // Emit all of the operand info records.
+  EmitOperandInfo(OS, OperandInfoIDs);
+  
+  // Emit all of the TargetInstrDesc records in their ENUM ordering.
+  //
+  OS << "\nstatic const TargetInstrDesc " << TargetName
+     << "Insts[] = {\n";
+  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+    Target.getInstructionsByEnumValue();
+
+  for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i)
+    emitRecord(*NumberedInstructions[i], i, InstrInfo, EmittedLists,
+               BarriersMap, OperandInfoIDs, OS);
+  OS << "};\n";
+  OS << "} // End llvm namespace \n";
+}
+
+void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
+                                  Record *InstrInfo,
+                         std::map<std::vector<Record*>, unsigned> &EmittedLists,
+                                  std::map<Record*, unsigned> &BarriersMap,
+                                  const OperandInfoMapTy &OpInfo,
+                                  raw_ostream &OS) {
+  int MinOperands = 0;
+  if (!Inst.Operands.size() == 0)
+    // Each logical operand can be multiple MI operands.
+    MinOperands = Inst.Operands.back().MIOperandNo +
+                  Inst.Operands.back().MINumOperands;
+
+  OS << "  { ";
+  OS << Num << ",\t" << MinOperands << ",\t"
+     << Inst.Operands.NumDefs << ",\t" << getItinClassNumber(Inst.TheDef)
+     << ",\t\"" << Inst.TheDef->getName() << "\", 0";
+
+  // Emit all of the target indepedent flags...
+  if (Inst.isReturn)           OS << "|(1<<TID::Return)";
+  if (Inst.isBranch)           OS << "|(1<<TID::Branch)";
+  if (Inst.isIndirectBranch)   OS << "|(1<<TID::IndirectBranch)";
+  if (Inst.isCompare)          OS << "|(1<<TID::Compare)";
+  if (Inst.isMoveImm)          OS << "|(1<<TID::MoveImm)";
+  if (Inst.isBarrier)          OS << "|(1<<TID::Barrier)";
+  if (Inst.hasDelaySlot)       OS << "|(1<<TID::DelaySlot)";
+  if (Inst.isCall)             OS << "|(1<<TID::Call)";
+  if (Inst.canFoldAsLoad)      OS << "|(1<<TID::FoldableAsLoad)";
+  if (Inst.mayLoad)            OS << "|(1<<TID::MayLoad)";
+  if (Inst.mayStore)           OS << "|(1<<TID::MayStore)";
+  if (Inst.isPredicable)       OS << "|(1<<TID::Predicable)";
+  if (Inst.isConvertibleToThreeAddress) OS << "|(1<<TID::ConvertibleTo3Addr)";
+  if (Inst.isCommutable)       OS << "|(1<<TID::Commutable)";
+  if (Inst.isTerminator)       OS << "|(1<<TID::Terminator)";
+  if (Inst.isReMaterializable) OS << "|(1<<TID::Rematerializable)";
+  if (Inst.isNotDuplicable)    OS << "|(1<<TID::NotDuplicable)";
+  if (Inst.Operands.hasOptionalDef) OS << "|(1<<TID::HasOptionalDef)";
+  if (Inst.usesCustomInserter) OS << "|(1<<TID::UsesCustomInserter)";
+  if (Inst.Operands.isVariadic)OS << "|(1<<TID::Variadic)";
+  if (Inst.hasSideEffects)     OS << "|(1<<TID::UnmodeledSideEffects)";
+  if (Inst.isAsCheapAsAMove)   OS << "|(1<<TID::CheapAsAMove)";
+  if (Inst.hasExtraSrcRegAllocReq) OS << "|(1<<TID::ExtraSrcRegAllocReq)";
+  if (Inst.hasExtraDefRegAllocReq) OS << "|(1<<TID::ExtraDefRegAllocReq)";
+
+  // Emit all of the target-specific flags...
+  BitsInit *TSF = Inst.TheDef->getValueAsBitsInit("TSFlags");
+  if (!TSF) throw "no TSFlags?";
+  uint64_t Value = 0;
+  for (unsigned i = 0, e = TSF->getNumBits(); i != e; ++i) {
+    if (BitInit *Bit = dynamic_cast<BitInit*>(TSF->getBit(i)))
+      Value |= uint64_t(Bit->getValue()) << i;
+    else
+      throw "Invalid TSFlags bit in " + Inst.TheDef->getName();
+  }
+  OS << ", 0x";
+  OS.write_hex(Value);
+  OS << "ULL, ";
+
+  // Emit the implicit uses and defs lists...
+  std::vector<Record*> UseList = Inst.TheDef->getValueAsListOfDefs("Uses");
+  if (UseList.empty())
+    OS << "NULL, ";
+  else
+    OS << "ImplicitList" << EmittedLists[UseList] << ", ";
+
+  std::vector<Record*> DefList = Inst.TheDef->getValueAsListOfDefs("Defs");
+  if (DefList.empty())
+    OS << "NULL, ";
+  else
+    OS << "ImplicitList" << EmittedLists[DefList] << ", ";
+
+  std::map<Record*, unsigned>::iterator BI = BarriersMap.find(Inst.TheDef);
+  if (BI == BarriersMap.end())
+    OS << "NULL, ";
+  else
+    OS << "Barriers" << BI->second << ", ";
+
+  // Emit the operand info.
+  std::vector<std::string> OperandInfo = GetOperandInfo(Inst);
+  if (OperandInfo.empty())
+    OS << "0";
+  else
+    OS << "OperandInfo" << OpInfo.find(OperandInfo)->second;
+
+  OS << " },  // Inst #" << Num << " = " << Inst.TheDef->getName() << "\n";
+}
diff --git a/final/utils/TableGen/InstrInfoEmitter.h b/final/utils/TableGen/InstrInfoEmitter.h
new file mode 100644
index 00000000000..abb1c6bc188
--- /dev/null
+++ b/final/utils/TableGen/InstrInfoEmitter.h
@@ -0,0 +1,66 @@
+//===- InstrInfoEmitter.h - Generate a Instruction Set Desc. ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting a description of the target
+// instruction set for the code generator.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INSTRINFO_EMITTER_H
+#define INSTRINFO_EMITTER_H
+
+#include "TableGenBackend.h"
+#include "CodeGenDAGPatterns.h"
+#include <vector>
+#include <map>
+
+namespace llvm {
+
+class StringInit;
+class IntInit;
+class ListInit;
+class CodeGenInstruction;
+
+class InstrInfoEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+  CodeGenDAGPatterns CDP;
+  std::map<std::string, unsigned> ItinClassMap;
+  
+public:
+  InstrInfoEmitter(RecordKeeper &R) : Records(R), CDP(R) { }
+
+  // run - Output the instruction set description, returning true on failure.
+  void run(raw_ostream &OS);
+
+private:
+  typedef std::map<std::vector<std::string>, unsigned> OperandInfoMapTy;
+  
+  void emitRecord(const CodeGenInstruction &Inst, unsigned Num,
+                  Record *InstrInfo, 
+                  std::map<std::vector<Record*>, unsigned> &EL,
+                  std::map<Record*, unsigned> &BM,
+                  const OperandInfoMapTy &OpInfo,
+                  raw_ostream &OS);
+
+  // Itinerary information.
+  void GatherItinClasses();
+  unsigned getItinClassNumber(const Record *InstRec);
+  
+  // Operand information.
+  void EmitOperandInfo(raw_ostream &OS, OperandInfoMapTy &OperandInfoIDs);
+  std::vector<std::string> GetOperandInfo(const CodeGenInstruction &Inst);
+
+  void DetectRegisterClassBarriers(std::vector<Record*> &Defs,
+                                   const std::vector<CodeGenRegisterClass> &RCs,
+                                   std::vector<Record*> &Barriers);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/IntrinsicEmitter.cpp b/final/utils/TableGen/IntrinsicEmitter.cpp
new file mode 100644
index 00000000000..08f67284a27
--- /dev/null
+++ b/final/utils/TableGen/IntrinsicEmitter.cpp
@@ -0,0 +1,679 @@
+//===- IntrinsicEmitter.cpp - Generate intrinsic information --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits information about intrinsic functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenTarget.h"
+#include "IntrinsicEmitter.h"
+#include "Record.h"
+#include "StringMatcher.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// IntrinsicEmitter Implementation
+//===----------------------------------------------------------------------===//
+
+void IntrinsicEmitter::run(raw_ostream &OS) {
+  EmitSourceFileHeader("Intrinsic Function Source Fragment", OS);
+  
+  std::vector<CodeGenIntrinsic> Ints = LoadIntrinsics(Records, TargetOnly);
+  
+  if (TargetOnly && !Ints.empty())
+    TargetPrefix = Ints[0].TargetPrefix;
+
+  EmitPrefix(OS);
+
+  // Emit the enum information.
+  EmitEnumInfo(Ints, OS);
+
+  // Emit the intrinsic ID -> name table.
+  EmitIntrinsicToNameTable(Ints, OS);
+
+  // Emit the intrinsic ID -> overload table.
+  EmitIntrinsicToOverloadTable(Ints, OS);
+
+  // Emit the function name recognizer.
+  EmitFnNameRecognizer(Ints, OS);
+  
+  // Emit the intrinsic verifier.
+  EmitVerifier(Ints, OS);
+  
+  // Emit the intrinsic declaration generator.
+  EmitGenerator(Ints, OS);
+  
+  // Emit the intrinsic parameter attributes.
+  EmitAttributes(Ints, OS);
+
+  // Emit intrinsic alias analysis mod/ref behavior.
+  EmitModRefBehavior(Ints, OS);
+
+  // Emit a list of intrinsics with corresponding GCC builtins.
+  EmitGCCBuiltinList(Ints, OS);
+
+  // Emit code to translate GCC builtins into LLVM intrinsics.
+  EmitIntrinsicToGCCBuiltinMap(Ints, OS);
+
+  EmitSuffix(OS);
+}
+
+void IntrinsicEmitter::EmitPrefix(raw_ostream &OS) {
+  OS << "// VisualStudio defines setjmp as _setjmp\n"
+        "#if defined(_MSC_VER) && defined(setjmp) && \\\n"
+        "                         !defined(setjmp_undefined_for_msvc)\n"
+        "#  pragma push_macro(\"setjmp\")\n"
+        "#  undef setjmp\n"
+        "#  define setjmp_undefined_for_msvc\n"
+        "#endif\n\n";
+}
+
+void IntrinsicEmitter::EmitSuffix(raw_ostream &OS) {
+  OS << "#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)\n"
+        "// let's return it to _setjmp state\n"
+        "#  pragma pop_macro(\"setjmp\")\n"
+        "#  undef setjmp_undefined_for_msvc\n"
+        "#endif\n\n";
+}
+
+void IntrinsicEmitter::EmitEnumInfo(const std::vector<CodeGenIntrinsic> &Ints,
+                                    raw_ostream &OS) {
+  OS << "// Enum values for Intrinsics.h\n";
+  OS << "#ifdef GET_INTRINSIC_ENUM_VALUES\n";
+  for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
+    OS << "    " << Ints[i].EnumName;
+    OS << ((i != e-1) ? ", " : "  ");
+    OS << std::string(40-Ints[i].EnumName.size(), ' ') 
+      << "// " << Ints[i].Name << "\n";
+  }
+  OS << "#endif\n\n";
+}
+
+void IntrinsicEmitter::
+EmitFnNameRecognizer(const std::vector<CodeGenIntrinsic> &Ints, 
+                     raw_ostream &OS) {
+  // Build a 'first character of function name' -> intrinsic # mapping.
+  std::map<char, std::vector<unsigned> > IntMapping;
+  for (unsigned i = 0, e = Ints.size(); i != e; ++i)
+    IntMapping[Ints[i].Name[5]].push_back(i);
+  
+  OS << "// Function name -> enum value recognizer code.\n";
+  OS << "#ifdef GET_FUNCTION_RECOGNIZER\n";
+  OS << "  StringRef NameR(Name+6, Len-6);   // Skip over 'llvm.'\n";
+  OS << "  switch (Name[5]) {                  // Dispatch on first letter.\n";
+  OS << "  default: break;\n";
+  // Emit the intrinsic matching stuff by first letter.
+  for (std::map<char, std::vector<unsigned> >::iterator I = IntMapping.begin(),
+       E = IntMapping.end(); I != E; ++I) {
+    OS << "  case '" << I->first << "':\n";
+    std::vector<unsigned> &IntList = I->second;
+
+    // Emit all the overloaded intrinsics first, build a table of the
+    // non-overloaded ones.
+    std::vector<StringMatcher::StringPair> MatchTable;
+    
+    for (unsigned i = 0, e = IntList.size(); i != e; ++i) {
+      unsigned IntNo = IntList[i];
+      std::string Result = "return " + TargetPrefix + "Intrinsic::" +
+        Ints[IntNo].EnumName + ";";
+
+      if (!Ints[IntNo].isOverloaded) {
+        MatchTable.push_back(std::make_pair(Ints[IntNo].Name.substr(6),Result));
+        continue;
+      }
+
+      // For overloaded intrinsics, only the prefix needs to match
+      std::string TheStr = Ints[IntNo].Name.substr(6);
+      TheStr += '.';  // Require "bswap." instead of bswap.
+      OS << "    if (NameR.startswith(\"" << TheStr << "\")) "
+         << Result << '\n';
+    }
+    
+    // Emit the matcher logic for the fixed length strings.
+    StringMatcher("NameR", MatchTable, OS).Emit(1);
+    OS << "    break;  // end of '" << I->first << "' case.\n";
+  }
+  
+  OS << "  }\n";
+  OS << "#endif\n\n";
+}
+
+void IntrinsicEmitter::
+EmitIntrinsicToNameTable(const std::vector<CodeGenIntrinsic> &Ints, 
+                         raw_ostream &OS) {
+  OS << "// Intrinsic ID to name table\n";
+  OS << "#ifdef GET_INTRINSIC_NAME_TABLE\n";
+  OS << "  // Note that entry #0 is the invalid intrinsic!\n";
+  for (unsigned i = 0, e = Ints.size(); i != e; ++i)
+    OS << "  \"" << Ints[i].Name << "\",\n";
+  OS << "#endif\n\n";
+}
+
+void IntrinsicEmitter::
+EmitIntrinsicToOverloadTable(const std::vector<CodeGenIntrinsic> &Ints, 
+                         raw_ostream &OS) {
+  OS << "// Intrinsic ID to overload table\n";
+  OS << "#ifdef GET_INTRINSIC_OVERLOAD_TABLE\n";
+  OS << "  // Note that entry #0 is the invalid intrinsic!\n";
+  for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
+    OS << "  ";
+    if (Ints[i].isOverloaded)
+      OS << "true";
+    else
+      OS << "false";
+    OS << ",\n";
+  }
+  OS << "#endif\n\n";
+}
+
+static void EmitTypeForValueType(raw_ostream &OS, MVT::SimpleValueType VT) {
+  if (EVT(VT).isInteger()) {
+    unsigned BitWidth = EVT(VT).getSizeInBits();
+    OS << "IntegerType::get(Context, " << BitWidth << ")";
+  } else if (VT == MVT::Other) {
+    // MVT::OtherVT is used to mean the empty struct type here.
+    OS << "StructType::get(Context)";
+  } else if (VT == MVT::f32) {
+    OS << "Type::getFloatTy(Context)";
+  } else if (VT == MVT::f64) {
+    OS << "Type::getDoubleTy(Context)";
+  } else if (VT == MVT::f80) {
+    OS << "Type::getX86_FP80Ty(Context)";
+  } else if (VT == MVT::f128) {
+    OS << "Type::getFP128Ty(Context)";
+  } else if (VT == MVT::ppcf128) {
+    OS << "Type::getPPC_FP128Ty(Context)";
+  } else if (VT == MVT::isVoid) {
+    OS << "Type::getVoidTy(Context)";
+  } else if (VT == MVT::Metadata) {
+    OS << "Type::getMetadataTy(Context)";
+  } else if (VT == MVT::x86mmx) {
+    OS << "Type::getX86_MMXTy(Context)";
+  } else {
+    assert(false && "Unsupported ValueType!");
+  }
+}
+
+static void EmitTypeGenerate(raw_ostream &OS, const Record *ArgType,
+                             unsigned &ArgNo);
+
+static void EmitTypeGenerate(raw_ostream &OS,
+                             const std::vector<Record*> &ArgTypes,
+                             unsigned &ArgNo) {
+  if (ArgTypes.empty())
+    return EmitTypeForValueType(OS, MVT::isVoid);
+  
+  if (ArgTypes.size() == 1)
+    return EmitTypeGenerate(OS, ArgTypes.front(), ArgNo);
+
+  OS << "StructType::get(Context, ";
+
+  for (std::vector<Record*>::const_iterator
+         I = ArgTypes.begin(), E = ArgTypes.end(); I != E; ++I) {
+    EmitTypeGenerate(OS, *I, ArgNo);
+    OS << ", ";
+  }
+
+  OS << " NULL)";
+}
+
+static void EmitTypeGenerate(raw_ostream &OS, const Record *ArgType,
+                             unsigned &ArgNo) {
+  MVT::SimpleValueType VT = getValueType(ArgType->getValueAsDef("VT"));
+
+  if (ArgType->isSubClassOf("LLVMMatchType")) {
+    unsigned Number = ArgType->getValueAsInt("Number");
+    assert(Number < ArgNo && "Invalid matching number!");
+    if (ArgType->isSubClassOf("LLVMExtendedElementVectorType"))
+      OS << "VectorType::getExtendedElementVectorType"
+         << "(dyn_cast<VectorType>(Tys[" << Number << "]))";
+    else if (ArgType->isSubClassOf("LLVMTruncatedElementVectorType"))
+      OS << "VectorType::getTruncatedElementVectorType"
+         << "(dyn_cast<VectorType>(Tys[" << Number << "]))";
+    else
+      OS << "Tys[" << Number << "]";
+  } else if (VT == MVT::iAny || VT == MVT::fAny || VT == MVT::vAny) {
+    // NOTE: The ArgNo variable here is not the absolute argument number, it is
+    // the index of the "arbitrary" type in the Tys array passed to the
+    // Intrinsic::getDeclaration function. Consequently, we only want to
+    // increment it when we actually hit an overloaded type. Getting this wrong
+    // leads to very subtle bugs!
+    OS << "Tys[" << ArgNo++ << "]";
+  } else if (EVT(VT).isVector()) {
+    EVT VVT = VT;
+    OS << "VectorType::get(";
+    EmitTypeForValueType(OS, VVT.getVectorElementType().getSimpleVT().SimpleTy);
+    OS << ", " << VVT.getVectorNumElements() << ")";
+  } else if (VT == MVT::iPTR) {
+    OS << "PointerType::getUnqual(";
+    EmitTypeGenerate(OS, ArgType->getValueAsDef("ElTy"), ArgNo);
+    OS << ")";
+  } else if (VT == MVT::iPTRAny) {
+    // Make sure the user has passed us an argument type to overload. If not,
+    // treat it as an ordinary (not overloaded) intrinsic.
+    OS << "(" << ArgNo << " < numTys) ? Tys[" << ArgNo 
+    << "] : PointerType::getUnqual(";
+    EmitTypeGenerate(OS, ArgType->getValueAsDef("ElTy"), ArgNo);
+    OS << ")";
+    ++ArgNo;
+  } else if (VT == MVT::isVoid) {
+    if (ArgNo == 0)
+      OS << "Type::getVoidTy(Context)";
+    else
+      // MVT::isVoid is used to mean varargs here.
+      OS << "...";
+  } else {
+    EmitTypeForValueType(OS, VT);
+  }
+}
+
+/// RecordListComparator - Provide a deterministic comparator for lists of
+/// records.
+namespace {
+  typedef std::pair<std::vector<Record*>, std::vector<Record*> > RecPair;
+  struct RecordListComparator {
+    bool operator()(const RecPair &LHS,
+                    const RecPair &RHS) const {
+      unsigned i = 0;
+      const std::vector<Record*> *LHSVec = &LHS.first;
+      const std::vector<Record*> *RHSVec = &RHS.first;
+      unsigned RHSSize = RHSVec->size();
+      unsigned LHSSize = LHSVec->size();
+
+      for (; i != LHSSize; ++i) {
+        if (i == RHSSize) return false;  // RHS is shorter than LHS.
+        if ((*LHSVec)[i] != (*RHSVec)[i])
+          return (*LHSVec)[i]->getName() < (*RHSVec)[i]->getName();
+      }
+
+      if (i != RHSSize) return true;
+
+      i = 0;
+      LHSVec = &LHS.second;
+      RHSVec = &RHS.second;
+      RHSSize = RHSVec->size();
+      LHSSize = LHSVec->size();
+
+      for (i = 0; i != LHSSize; ++i) {
+        if (i == RHSSize) return false;  // RHS is shorter than LHS.
+        if ((*LHSVec)[i] != (*RHSVec)[i])
+          return (*LHSVec)[i]->getName() < (*RHSVec)[i]->getName();
+      }
+
+      return i != RHSSize;
+    }
+  };
+}
+
+void IntrinsicEmitter::EmitVerifier(const std::vector<CodeGenIntrinsic> &Ints, 
+                                    raw_ostream &OS) {
+  OS << "// Verifier::visitIntrinsicFunctionCall code.\n";
+  OS << "#ifdef GET_INTRINSIC_VERIFIER\n";
+  OS << "  switch (ID) {\n";
+  OS << "  default: assert(0 && \"Invalid intrinsic!\");\n";
+  
+  // This checking can emit a lot of very common code.  To reduce the amount of
+  // code that we emit, batch up cases that have identical types.  This avoids
+  // problems where GCC can run out of memory compiling Verifier.cpp.
+  typedef std::map<RecPair, std::vector<unsigned>, RecordListComparator> MapTy;
+  MapTy UniqueArgInfos;
+  
+  // Compute the unique argument type info.
+  for (unsigned i = 0, e = Ints.size(); i != e; ++i)
+    UniqueArgInfos[make_pair(Ints[i].IS.RetTypeDefs,
+                             Ints[i].IS.ParamTypeDefs)].push_back(i);
+
+  // Loop through the array, emitting one comparison for each batch.
+  for (MapTy::iterator I = UniqueArgInfos.begin(),
+       E = UniqueArgInfos.end(); I != E; ++I) {
+    for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+      OS << "  case Intrinsic::" << Ints[I->second[i]].EnumName << ":\t\t// "
+         << Ints[I->second[i]].Name << "\n";
+    
+    const RecPair &ArgTypes = I->first;
+    const std::vector<Record*> &RetTys = ArgTypes.first;
+    const std::vector<Record*> &ParamTys = ArgTypes.second;
+    std::vector<unsigned> OverloadedTypeIndices;
+
+    OS << "    VerifyIntrinsicPrototype(ID, IF, " << RetTys.size() << ", "
+       << ParamTys.size();
+
+    // Emit return types.
+    for (unsigned j = 0, je = RetTys.size(); j != je; ++j) {
+      Record *ArgType = RetTys[j];
+      OS << ", ";
+
+      if (ArgType->isSubClassOf("LLVMMatchType")) {
+        unsigned Number = ArgType->getValueAsInt("Number");
+        assert(Number < OverloadedTypeIndices.size() &&
+               "Invalid matching number!");
+        Number = OverloadedTypeIndices[Number];
+        if (ArgType->isSubClassOf("LLVMExtendedElementVectorType"))
+          OS << "~(ExtendedElementVectorType | " << Number << ")";
+        else if (ArgType->isSubClassOf("LLVMTruncatedElementVectorType"))
+          OS << "~(TruncatedElementVectorType | " << Number << ")";
+        else
+          OS << "~" << Number;
+      } else {
+        MVT::SimpleValueType VT = getValueType(ArgType->getValueAsDef("VT"));
+        OS << getEnumName(VT);
+
+        if (EVT(VT).isOverloaded())
+          OverloadedTypeIndices.push_back(j);
+
+        if (VT == MVT::isVoid && j != 0 && j != je - 1)
+          throw "Var arg type not last argument";
+      }
+    }
+
+    // Emit the parameter types.
+    for (unsigned j = 0, je = ParamTys.size(); j != je; ++j) {
+      Record *ArgType = ParamTys[j];
+      OS << ", ";
+
+      if (ArgType->isSubClassOf("LLVMMatchType")) {
+        unsigned Number = ArgType->getValueAsInt("Number");
+        assert(Number < OverloadedTypeIndices.size() &&
+               "Invalid matching number!");
+        Number = OverloadedTypeIndices[Number];
+        if (ArgType->isSubClassOf("LLVMExtendedElementVectorType"))
+          OS << "~(ExtendedElementVectorType | " << Number << ")";
+        else if (ArgType->isSubClassOf("LLVMTruncatedElementVectorType"))
+          OS << "~(TruncatedElementVectorType | " << Number << ")";
+        else
+          OS << "~" << Number;
+      } else {
+        MVT::SimpleValueType VT = getValueType(ArgType->getValueAsDef("VT"));
+        OS << getEnumName(VT);
+
+        if (EVT(VT).isOverloaded())
+          OverloadedTypeIndices.push_back(j + RetTys.size());
+
+        if (VT == MVT::isVoid && j != 0 && j != je - 1)
+          throw "Var arg type not last argument";
+      }
+    }
+      
+    OS << ");\n";
+    OS << "    break;\n";
+  }
+  OS << "  }\n";
+  OS << "#endif\n\n";
+}
+
+void IntrinsicEmitter::EmitGenerator(const std::vector<CodeGenIntrinsic> &Ints, 
+                                     raw_ostream &OS) {
+  OS << "// Code for generating Intrinsic function declarations.\n";
+  OS << "#ifdef GET_INTRINSIC_GENERATOR\n";
+  OS << "  switch (id) {\n";
+  OS << "  default: assert(0 && \"Invalid intrinsic!\");\n";
+  
+  // Similar to GET_INTRINSIC_VERIFIER, batch up cases that have identical
+  // types.
+  typedef std::map<RecPair, std::vector<unsigned>, RecordListComparator> MapTy;
+  MapTy UniqueArgInfos;
+  
+  // Compute the unique argument type info.
+  for (unsigned i = 0, e = Ints.size(); i != e; ++i)
+    UniqueArgInfos[make_pair(Ints[i].IS.RetTypeDefs,
+                             Ints[i].IS.ParamTypeDefs)].push_back(i);
+
+  // Loop through the array, emitting one generator for each batch.
+  std::string IntrinsicStr = TargetPrefix + "Intrinsic::";
+  
+  for (MapTy::iterator I = UniqueArgInfos.begin(),
+       E = UniqueArgInfos.end(); I != E; ++I) {
+    for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+      OS << "  case " << IntrinsicStr << Ints[I->second[i]].EnumName 
+         << ":\t\t// " << Ints[I->second[i]].Name << "\n";
+    
+    const RecPair &ArgTypes = I->first;
+    const std::vector<Record*> &RetTys = ArgTypes.first;
+    const std::vector<Record*> &ParamTys = ArgTypes.second;
+
+    unsigned N = ParamTys.size();
+
+    if (N > 1 &&
+        getValueType(ParamTys[N - 1]->getValueAsDef("VT")) == MVT::isVoid) {
+      OS << "    IsVarArg = true;\n";
+      --N;
+    }
+
+    unsigned ArgNo = 0;
+    OS << "    ResultTy = ";
+    EmitTypeGenerate(OS, RetTys, ArgNo);
+    OS << ";\n";
+    
+    for (unsigned j = 0; j != N; ++j) {
+      OS << "    ArgTys.push_back(";
+      EmitTypeGenerate(OS, ParamTys[j], ArgNo);
+      OS << ");\n";
+    }
+
+    OS << "    break;\n";
+  }
+
+  OS << "  }\n";
+  OS << "#endif\n\n";
+}
+
+/// EmitAttributes - This emits the Intrinsic::getAttributes method.
+void IntrinsicEmitter::
+EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS) {
+  OS << "// Add parameter attributes that are not common to all intrinsics.\n";
+  OS << "#ifdef GET_INTRINSIC_ATTRIBUTES\n";
+  if (TargetOnly)
+    OS << "static AttrListPtr getAttributes(" << TargetPrefix 
+       << "Intrinsic::ID id) {";
+  else
+    OS << "AttrListPtr Intrinsic::getAttributes(ID id) {";
+  OS << "  // No intrinsic can throw exceptions.\n";
+  OS << "  Attributes Attr = Attribute::NoUnwind;\n";
+  OS << "  switch (id) {\n";
+  OS << "  default: break;\n";
+  unsigned MaxArgAttrs = 0;
+  for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
+    MaxArgAttrs =
+      std::max(MaxArgAttrs, unsigned(Ints[i].ArgumentAttributes.size()));
+    switch (Ints[i].ModRef) {
+    default: break;
+    case CodeGenIntrinsic::NoMem:
+      OS << "  case " << TargetPrefix << "Intrinsic::" << Ints[i].EnumName 
+         << ":\n";
+      break;
+    }
+  }
+  OS << "    Attr |= Attribute::ReadNone; // These do not access memory.\n";
+  OS << "    break;\n";
+  for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
+    switch (Ints[i].ModRef) {
+    default: break;
+    case CodeGenIntrinsic::ReadArgMem:
+    case CodeGenIntrinsic::ReadMem:
+      OS << "  case " << TargetPrefix << "Intrinsic::" << Ints[i].EnumName 
+         << ":\n";
+      break;
+    }
+  }
+  OS << "    Attr |= Attribute::ReadOnly; // These do not write memory.\n";
+  OS << "    break;\n";
+  OS << "  }\n";
+  OS << "  AttributeWithIndex AWI[" << MaxArgAttrs+1 << "];\n";
+  OS << "  unsigned NumAttrs = 0;\n";
+  OS << "  switch (id) {\n";
+  OS << "  default: break;\n";
+  
+  // Add argument attributes for any intrinsics that have them.
+  for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
+    if (Ints[i].ArgumentAttributes.empty()) continue;
+    
+    OS << "  case " << TargetPrefix << "Intrinsic::" << Ints[i].EnumName 
+       << ":\n";
+
+    std::vector<std::pair<unsigned, CodeGenIntrinsic::ArgAttribute> > ArgAttrs =
+      Ints[i].ArgumentAttributes;
+    // Sort by argument index.
+    std::sort(ArgAttrs.begin(), ArgAttrs.end());
+
+    unsigned NumArgsWithAttrs = 0;
+
+    while (!ArgAttrs.empty()) {
+      unsigned ArgNo = ArgAttrs[0].first;
+      
+      OS << "    AWI[" << NumArgsWithAttrs++ << "] = AttributeWithIndex::get("
+         << ArgNo+1 << ", 0";
+
+      while (!ArgAttrs.empty() && ArgAttrs[0].first == ArgNo) {
+        switch (ArgAttrs[0].second) {
+        default: assert(0 && "Unknown arg attribute");
+        case CodeGenIntrinsic::NoCapture:
+          OS << "|Attribute::NoCapture";
+          break;
+        }
+        ArgAttrs.erase(ArgAttrs.begin());
+      }
+      OS << ");\n";
+    }
+    
+    OS << "    NumAttrs = " << NumArgsWithAttrs << ";\n";
+    OS << "    break;\n";
+  }
+  
+  OS << "  }\n";
+  OS << "  AWI[NumAttrs] = AttributeWithIndex::get(~0, Attr);\n";
+  OS << "  return AttrListPtr::get(AWI, NumAttrs+1);\n";
+  OS << "}\n";
+  OS << "#endif // GET_INTRINSIC_ATTRIBUTES\n\n";
+}
+
+/// EmitModRefBehavior - Determine intrinsic alias analysis mod/ref behavior.
+void IntrinsicEmitter::
+EmitModRefBehavior(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS){
+  OS << "// Determine intrinsic alias analysis mod/ref behavior.\n";
+  OS << "#ifdef GET_INTRINSIC_MODREF_BEHAVIOR\n";
+  OS << "switch (iid) {\n";
+  OS << "default:\n    return UnknownModRefBehavior;\n";
+  for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
+    if (Ints[i].ModRef == CodeGenIntrinsic::ReadWriteMem)
+      continue;
+    OS << "case " << TargetPrefix << "Intrinsic::" << Ints[i].EnumName
+      << ":\n";
+    switch (Ints[i].ModRef) {
+    default:
+      assert(false && "Unknown Mod/Ref type!");
+    case CodeGenIntrinsic::NoMem:
+      OS << "  return DoesNotAccessMemory;\n";
+      break;
+    case CodeGenIntrinsic::ReadArgMem:
+      OS << "  return OnlyReadsArgumentPointees;\n";
+      break;
+    case CodeGenIntrinsic::ReadMem:
+      OS << "  return OnlyReadsMemory;\n";
+      break;
+    case CodeGenIntrinsic::ReadWriteArgMem:
+      OS << "  return OnlyAccessesArgumentPointees;\n";
+      break;
+    }
+  }
+  OS << "}\n";
+  OS << "#endif // GET_INTRINSIC_MODREF_BEHAVIOR\n\n";
+}
+
+void IntrinsicEmitter::
+EmitGCCBuiltinList(const std::vector<CodeGenIntrinsic> &Ints, raw_ostream &OS){
+  OS << "// Get the GCC builtin that corresponds to an LLVM intrinsic.\n";
+  OS << "#ifdef GET_GCC_BUILTIN_NAME\n";
+  OS << "  switch (F->getIntrinsicID()) {\n";
+  OS << "  default: BuiltinName = \"\"; break;\n";
+  for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
+    if (!Ints[i].GCCBuiltinName.empty()) {
+      OS << "  case Intrinsic::" << Ints[i].EnumName << ": BuiltinName = \""
+         << Ints[i].GCCBuiltinName << "\"; break;\n";
+    }
+  }
+  OS << "  }\n";
+  OS << "#endif\n\n";
+}
+
+/// EmitTargetBuiltins - All of the builtins in the specified map are for the
+/// same target, and we already checked it.
+static void EmitTargetBuiltins(const std::map<std::string, std::string> &BIM,
+                               const std::string &TargetPrefix,
+                               raw_ostream &OS) {
+  
+  std::vector<StringMatcher::StringPair> Results;
+  
+  for (std::map<std::string, std::string>::const_iterator I = BIM.begin(),
+       E = BIM.end(); I != E; ++I) {
+    std::string ResultCode =
+    "return " + TargetPrefix + "Intrinsic::" + I->second + ";";
+    Results.push_back(StringMatcher::StringPair(I->first, ResultCode));
+  }
+
+  StringMatcher("BuiltinName", Results, OS).Emit();
+}
+
+        
+void IntrinsicEmitter::
+EmitIntrinsicToGCCBuiltinMap(const std::vector<CodeGenIntrinsic> &Ints, 
+                             raw_ostream &OS) {
+  typedef std::map<std::string, std::map<std::string, std::string> > BIMTy;
+  BIMTy BuiltinMap;
+  for (unsigned i = 0, e = Ints.size(); i != e; ++i) {
+    if (!Ints[i].GCCBuiltinName.empty()) {
+      // Get the map for this target prefix.
+      std::map<std::string, std::string> &BIM =BuiltinMap[Ints[i].TargetPrefix];
+      
+      if (!BIM.insert(std::make_pair(Ints[i].GCCBuiltinName,
+                                     Ints[i].EnumName)).second)
+        throw "Intrinsic '" + Ints[i].TheDef->getName() +
+              "': duplicate GCC builtin name!";
+    }
+  }
+  
+  OS << "// Get the LLVM intrinsic that corresponds to a GCC builtin.\n";
+  OS << "// This is used by the C front-end.  The GCC builtin name is passed\n";
+  OS << "// in as BuiltinName, and a target prefix (e.g. 'ppc') is passed\n";
+  OS << "// in as TargetPrefix.  The result is assigned to 'IntrinsicID'.\n";
+  OS << "#ifdef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN\n";
+  
+  if (TargetOnly) {
+    OS << "static " << TargetPrefix << "Intrinsic::ID "
+       << "getIntrinsicForGCCBuiltin(const char "
+       << "*TargetPrefixStr, const char *BuiltinNameStr) {\n";
+  } else {
+    OS << "Intrinsic::ID Intrinsic::getIntrinsicForGCCBuiltin(const char "
+       << "*TargetPrefixStr, const char *BuiltinNameStr) {\n";
+  }
+  
+  OS << "  StringRef BuiltinName(BuiltinNameStr);\n";
+  OS << "  StringRef TargetPrefix(TargetPrefixStr);\n\n";
+  
+  // Note: this could emit significantly better code if we cared.
+  for (BIMTy::iterator I = BuiltinMap.begin(), E = BuiltinMap.end();I != E;++I){
+    OS << "  ";
+    if (!I->first.empty())
+      OS << "if (TargetPrefix == \"" << I->first << "\") ";
+    else
+      OS << "/* Target Independent Builtins */ ";
+    OS << "{\n";
+
+    // Emit the comparisons for this target prefix.
+    EmitTargetBuiltins(I->second, TargetPrefix, OS);
+    OS << "  }\n";
+  }
+  OS << "  return ";
+  if (!TargetPrefix.empty())
+    OS << "(" << TargetPrefix << "Intrinsic::ID)";
+  OS << "Intrinsic::not_intrinsic;\n";
+  OS << "}\n";
+  OS << "#endif\n\n";
+}
diff --git a/final/utils/TableGen/IntrinsicEmitter.h b/final/utils/TableGen/IntrinsicEmitter.h
new file mode 100644
index 00000000000..b1efecb92ee
--- /dev/null
+++ b/final/utils/TableGen/IntrinsicEmitter.h
@@ -0,0 +1,63 @@
+//===- IntrinsicEmitter.h - Generate intrinsic information ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits information about intrinsic functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef INTRINSIC_EMITTER_H
+#define INTRINSIC_EMITTER_H
+
+#include "CodeGenIntrinsics.h"
+#include "TableGenBackend.h"
+
+namespace llvm {
+  class IntrinsicEmitter : public TableGenBackend {
+    RecordKeeper &Records;
+    bool TargetOnly;
+    std::string TargetPrefix;
+    
+  public:
+    IntrinsicEmitter(RecordKeeper &R, bool T = false) 
+      : Records(R), TargetOnly(T) {}
+
+    void run(raw_ostream &OS);
+
+    void EmitPrefix(raw_ostream &OS);
+    
+    void EmitEnumInfo(const std::vector<CodeGenIntrinsic> &Ints, 
+                      raw_ostream &OS);
+
+    void EmitFnNameRecognizer(const std::vector<CodeGenIntrinsic> &Ints, 
+                              raw_ostream &OS);
+    void EmitIntrinsicToNameTable(const std::vector<CodeGenIntrinsic> &Ints, 
+                                  raw_ostream &OS);
+    void EmitIntrinsicToOverloadTable(const std::vector<CodeGenIntrinsic> &Ints, 
+                                      raw_ostream &OS);
+    void EmitVerifier(const std::vector<CodeGenIntrinsic> &Ints, 
+                      raw_ostream &OS);
+    void EmitGenerator(const std::vector<CodeGenIntrinsic> &Ints, 
+                       raw_ostream &OS);
+    void EmitAttributes(const std::vector<CodeGenIntrinsic> &Ints,
+                        raw_ostream &OS);
+    void EmitModRefBehavior(const std::vector<CodeGenIntrinsic> &Ints,
+                            raw_ostream &OS);
+    void EmitGCCBuiltinList(const std::vector<CodeGenIntrinsic> &Ints, 
+                            raw_ostream &OS);
+    void EmitIntrinsicToGCCBuiltinMap(const std::vector<CodeGenIntrinsic> &Ints, 
+                                      raw_ostream &OS);
+    void EmitSuffix(raw_ostream &OS);
+  };
+
+} // End llvm namespace
+
+#endif
+
+
+
diff --git a/final/utils/TableGen/LLVMCConfigurationEmitter.cpp b/final/utils/TableGen/LLVMCConfigurationEmitter.cpp
new file mode 100644
index 00000000000..c40a39dff72
--- /dev/null
+++ b/final/utils/TableGen/LLVMCConfigurationEmitter.cpp
@@ -0,0 +1,3105 @@
+//===- LLVMCConfigurationEmitter.cpp - Generate LLVMC config ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting LLVMC configuration code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMCConfigurationEmitter.h"
+#include "Record.h"
+
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
+
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <stdexcept>
+#include <string>
+#include <typeinfo>
+
+
+using namespace llvm;
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+/// Typedefs
+
+typedef std::vector<Record*> RecordVector;
+typedef std::vector<const DagInit*> DagVector;
+typedef std::vector<std::string> StrVector;
+
+//===----------------------------------------------------------------------===//
+/// Constants
+
+// Indentation.
+const unsigned TabWidth = 4;
+const unsigned Indent1  = TabWidth*1;
+const unsigned Indent2  = TabWidth*2;
+const unsigned Indent3  = TabWidth*3;
+const unsigned Indent4  = TabWidth*4;
+
+// Default help string.
+const char * const DefaultHelpString = "NO HELP MESSAGE PROVIDED";
+
+// Name for the "sink" option.
+const char * const SinkOptionName = "SinkOption";
+
+//===----------------------------------------------------------------------===//
+/// Helper functions
+
+/// Id - An 'identity' function object.
+struct Id {
+  template<typename T0>
+  void operator()(const T0&) const {
+  }
+  template<typename T0, typename T1>
+  void operator()(const T0&, const T1&) const {
+  }
+  template<typename T0, typename T1, typename T2>
+  void operator()(const T0&, const T1&, const T2&) const {
+  }
+};
+
+int InitPtrToInt(const Init* ptr) {
+  const IntInit& val = dynamic_cast<const IntInit&>(*ptr);
+  return val.getValue();
+}
+
+const std::string& InitPtrToString(const Init* ptr) {
+  const StringInit& val = dynamic_cast<const StringInit&>(*ptr);
+  return val.getValue();
+}
+
+const ListInit& InitPtrToList(const Init* ptr) {
+  const ListInit& val = dynamic_cast<const ListInit&>(*ptr);
+  return val;
+}
+
+const DagInit& InitPtrToDag(const Init* ptr) {
+  const DagInit& val = dynamic_cast<const DagInit&>(*ptr);
+  return val;
+}
+
+const std::string GetOperatorName(const DagInit& D) {
+  return D.getOperator()->getAsString();
+}
+
+/// CheckBooleanConstant - Check that the provided value is a boolean constant.
+void CheckBooleanConstant(const Init* I) {
+  const DefInit& val = dynamic_cast<const DefInit&>(*I);
+  const std::string& str = val.getAsString();
+
+  if (str != "true" && str != "false") {
+    throw "Incorrect boolean value: '" + str +
+      "': must be either 'true' or 'false'";
+  }
+}
+
+// CheckNumberOfArguments - Ensure that the number of args in d is
+// greater than or equal to min_arguments, otherwise throw an exception.
+void CheckNumberOfArguments (const DagInit& d, unsigned minArgs) {
+  if (d.getNumArgs() < minArgs)
+    throw GetOperatorName(d) + ": too few arguments!";
+}
+
+// EscapeVariableName - Escape commas and other symbols not allowed
+// in the C++ variable names. Makes it possible to use options named
+// like "Wa," (useful for prefix options).
+std::string EscapeVariableName (const std::string& Var) {
+  std::string ret;
+  for (unsigned i = 0; i != Var.size(); ++i) {
+    char cur_char = Var[i];
+    if (cur_char == ',') {
+      ret += "_comma_";
+    }
+    else if (cur_char == '+') {
+      ret += "_plus_";
+    }
+    else if (cur_char == '-') {
+      ret += "_dash_";
+    }
+    else {
+      ret.push_back(cur_char);
+    }
+  }
+  return ret;
+}
+
+/// EscapeQuotes - Replace '"' with '\"'.
+std::string EscapeQuotes (const std::string& Var) {
+  std::string ret;
+  for (unsigned i = 0; i != Var.size(); ++i) {
+    char cur_char = Var[i];
+    if (cur_char == '"') {
+      ret += "\\\"";
+    }
+    else {
+      ret.push_back(cur_char);
+    }
+  }
+  return ret;
+}
+
+/// OneOf - Does the input string contain this character?
+bool OneOf(const char* lst, char c) {
+  while (*lst) {
+    if (*lst++ == c)
+      return true;
+  }
+  return false;
+}
+
+template <class I, class S>
+void CheckedIncrement(I& P, I E, S ErrorString) {
+  ++P;
+  if (P == E)
+    throw ErrorString;
+}
+
+//===----------------------------------------------------------------------===//
+/// Back-end specific code
+
+
+/// OptionType - One of six different option types. See the
+/// documentation for detailed description of differences.
+namespace OptionType {
+
+  enum OptionType { Alias, Switch, SwitchList,
+                    Parameter, ParameterList, Prefix, PrefixList };
+
+  bool IsAlias(OptionType t) {
+    return (t == Alias);
+  }
+
+  bool IsList (OptionType t) {
+    return (t == SwitchList || t == ParameterList || t == PrefixList);
+  }
+
+  bool IsSwitch (OptionType t) {
+    return (t == Switch);
+  }
+
+  bool IsSwitchList (OptionType t) {
+    return (t == SwitchList);
+  }
+
+  bool IsParameter (OptionType t) {
+    return (t == Parameter || t == Prefix);
+  }
+
+}
+
+OptionType::OptionType stringToOptionType(const std::string& T) {
+  if (T == "alias_option")
+    return OptionType::Alias;
+  else if (T == "switch_option")
+    return OptionType::Switch;
+  else if (T == "switch_list_option")
+    return OptionType::SwitchList;
+  else if (T == "parameter_option")
+    return OptionType::Parameter;
+  else if (T == "parameter_list_option")
+    return OptionType::ParameterList;
+  else if (T == "prefix_option")
+    return OptionType::Prefix;
+  else if (T == "prefix_list_option")
+    return OptionType::PrefixList;
+  else
+    throw "Unknown option type: " + T + '!';
+}
+
+namespace OptionDescriptionFlags {
+  enum OptionDescriptionFlags { Required = 0x1, Hidden = 0x2,
+                                ReallyHidden = 0x4, OneOrMore = 0x8,
+                                Optional = 0x10, CommaSeparated = 0x20,
+                                ForwardNotSplit = 0x40, ZeroOrMore = 0x80 };
+}
+
+/// OptionDescription - Represents data contained in a single
+/// OptionList entry.
+struct OptionDescription {
+  OptionType::OptionType Type;
+  std::string Name;
+  unsigned Flags;
+  std::string Help;
+  unsigned MultiVal;
+  Init* InitVal;
+
+  OptionDescription(OptionType::OptionType t = OptionType::Switch,
+                    const std::string& n = "",
+                    const std::string& h = DefaultHelpString)
+    : Type(t), Name(n), Flags(0x0), Help(h), MultiVal(1), InitVal(0)
+  {}
+
+  /// GenTypeDeclaration - Returns the C++ variable type of this
+  /// option.
+  const char* GenTypeDeclaration() const;
+
+  /// GenVariableName - Returns the variable name used in the
+  /// generated C++ code.
+  std::string GenVariableName() const
+  { return "autogenerated::" + GenOptionType() + EscapeVariableName(Name); }
+
+  /// GenPlainVariableName - Returns the variable name without the namespace
+  /// prefix.
+  std::string GenPlainVariableName() const
+  { return GenOptionType() + EscapeVariableName(Name); }
+
+  /// Merge - Merge two option descriptions.
+  void Merge (const OptionDescription& other);
+
+  /// CheckConsistency - Check that the flags are consistent.
+  void CheckConsistency() const;
+
+  // Misc convenient getters/setters.
+
+  bool isAlias() const;
+
+  bool isMultiVal() const;
+
+  bool isCommaSeparated() const;
+  void setCommaSeparated();
+
+  bool isForwardNotSplit() const;
+  void setForwardNotSplit();
+
+  bool isRequired() const;
+  void setRequired();
+
+  bool isOneOrMore() const;
+  void setOneOrMore();
+
+  bool isZeroOrMore() const;
+  void setZeroOrMore();
+
+  bool isOptional() const;
+  void setOptional();
+
+  bool isHidden() const;
+  void setHidden();
+
+  bool isReallyHidden() const;
+  void setReallyHidden();
+
+  bool isSwitch() const
+  { return OptionType::IsSwitch(this->Type); }
+
+  bool isSwitchList() const
+  { return OptionType::IsSwitchList(this->Type); }
+
+  bool isParameter() const
+  { return OptionType::IsParameter(this->Type); }
+
+  bool isList() const
+  { return OptionType::IsList(this->Type); }
+
+  bool isParameterList() const
+  { return (OptionType::IsList(this->Type)
+            && !OptionType::IsSwitchList(this->Type)); }
+
+private:
+
+  // GenOptionType - Helper function used by GenVariableName().
+  std::string GenOptionType() const;
+};
+
+void OptionDescription::CheckConsistency() const {
+  unsigned i = 0;
+
+  i += this->isRequired();
+  i += this->isOptional();
+  i += this->isOneOrMore();
+  i += this->isZeroOrMore();
+
+  if (i > 1) {
+    throw "Only one of (required), (optional), (one_or_more) or "
+      "(zero_or_more) properties is allowed!";
+  }
+}
+
+void OptionDescription::Merge (const OptionDescription& other)
+{
+  if (other.Type != Type)
+    throw "Conflicting definitions for the option " + Name + "!";
+
+  if (Help == other.Help || Help == DefaultHelpString)
+    Help = other.Help;
+  else if (other.Help != DefaultHelpString) {
+    llvm::errs() << "Warning: several different help strings"
+      " defined for option " + Name + "\n";
+  }
+
+  Flags |= other.Flags;
+}
+
+bool OptionDescription::isAlias() const {
+  return OptionType::IsAlias(this->Type);
+}
+
+bool OptionDescription::isMultiVal() const {
+  return MultiVal > 1;
+}
+
+bool OptionDescription::isCommaSeparated() const {
+  return Flags & OptionDescriptionFlags::CommaSeparated;
+}
+void OptionDescription::setCommaSeparated() {
+  Flags |= OptionDescriptionFlags::CommaSeparated;
+}
+
+bool OptionDescription::isForwardNotSplit() const {
+  return Flags & OptionDescriptionFlags::ForwardNotSplit;
+}
+void OptionDescription::setForwardNotSplit() {
+  Flags |= OptionDescriptionFlags::ForwardNotSplit;
+}
+
+bool OptionDescription::isRequired() const {
+  return Flags & OptionDescriptionFlags::Required;
+}
+void OptionDescription::setRequired() {
+  Flags |= OptionDescriptionFlags::Required;
+}
+
+bool OptionDescription::isOneOrMore() const {
+  return Flags & OptionDescriptionFlags::OneOrMore;
+}
+void OptionDescription::setOneOrMore() {
+  Flags |= OptionDescriptionFlags::OneOrMore;
+}
+
+bool OptionDescription::isZeroOrMore() const {
+  return Flags & OptionDescriptionFlags::ZeroOrMore;
+}
+void OptionDescription::setZeroOrMore() {
+  Flags |= OptionDescriptionFlags::ZeroOrMore;
+}
+
+bool OptionDescription::isOptional() const {
+  return Flags & OptionDescriptionFlags::Optional;
+}
+void OptionDescription::setOptional() {
+  Flags |= OptionDescriptionFlags::Optional;
+}
+
+bool OptionDescription::isHidden() const {
+  return Flags & OptionDescriptionFlags::Hidden;
+}
+void OptionDescription::setHidden() {
+  Flags |= OptionDescriptionFlags::Hidden;
+}
+
+bool OptionDescription::isReallyHidden() const {
+  return Flags & OptionDescriptionFlags::ReallyHidden;
+}
+void OptionDescription::setReallyHidden() {
+  Flags |= OptionDescriptionFlags::ReallyHidden;
+}
+
+const char* OptionDescription::GenTypeDeclaration() const {
+  switch (Type) {
+  case OptionType::Alias:
+    return "cl::alias";
+  case OptionType::PrefixList:
+  case OptionType::ParameterList:
+    return "cl::list<std::string>";
+  case OptionType::Switch:
+    return "cl::opt<bool>";
+  case OptionType::SwitchList:
+    return "cl::list<bool>";
+  case OptionType::Parameter:
+  case OptionType::Prefix:
+  default:
+    return "cl::opt<std::string>";
+  }
+}
+
+std::string OptionDescription::GenOptionType() const {
+  switch (Type) {
+  case OptionType::Alias:
+    return "Alias_";
+  case OptionType::PrefixList:
+  case OptionType::ParameterList:
+    return "List_";
+  case OptionType::Switch:
+    return "Switch_";
+  case OptionType::SwitchList:
+    return "SwitchList_";
+  case OptionType::Prefix:
+  case OptionType::Parameter:
+  default:
+    return "Parameter_";
+  }
+}
+
+/// OptionDescriptions - An OptionDescription array plus some helper
+/// functions.
+class OptionDescriptions {
+  typedef StringMap<OptionDescription> container_type;
+
+  /// Descriptions - A list of OptionDescriptions.
+  container_type Descriptions;
+
+public:
+  /// FindOption - exception-throwing wrapper for find().
+  const OptionDescription& FindOption(const std::string& OptName) const;
+
+  // Wrappers for FindOption that throw an exception in case the option has a
+  // wrong type.
+  const OptionDescription& FindSwitch(const std::string& OptName) const;
+  const OptionDescription& FindParameter(const std::string& OptName) const;
+  const OptionDescription& FindParameterList(const std::string& OptName) const;
+  const OptionDescription&
+  FindListOrParameter(const std::string& OptName) const;
+  const OptionDescription&
+  FindParameterListOrParameter(const std::string& OptName) const;
+
+  /// insertDescription - Insert new OptionDescription into
+  /// OptionDescriptions list
+  void InsertDescription (const OptionDescription& o);
+
+  // Support for STL-style iteration
+  typedef container_type::const_iterator const_iterator;
+  const_iterator begin() const { return Descriptions.begin(); }
+  const_iterator end() const { return Descriptions.end(); }
+};
+
+const OptionDescription&
+OptionDescriptions::FindOption(const std::string& OptName) const {
+  const_iterator I = Descriptions.find(OptName);
+  if (I != Descriptions.end())
+    return I->second;
+  else
+    throw OptName + ": no such option!";
+}
+
+const OptionDescription&
+OptionDescriptions::FindSwitch(const std::string& OptName) const {
+  const OptionDescription& OptDesc = this->FindOption(OptName);
+  if (!OptDesc.isSwitch())
+    throw OptName + ": incorrect option type - should be a switch!";
+  return OptDesc;
+}
+
+const OptionDescription&
+OptionDescriptions::FindParameterList(const std::string& OptName) const {
+  const OptionDescription& OptDesc = this->FindOption(OptName);
+  if (!OptDesc.isList() || OptDesc.isSwitchList())
+    throw OptName + ": incorrect option type - should be a parameter list!";
+  return OptDesc;
+}
+
+const OptionDescription&
+OptionDescriptions::FindParameter(const std::string& OptName) const {
+  const OptionDescription& OptDesc = this->FindOption(OptName);
+  if (!OptDesc.isParameter())
+    throw OptName + ": incorrect option type - should be a parameter!";
+  return OptDesc;
+}
+
+const OptionDescription&
+OptionDescriptions::FindListOrParameter(const std::string& OptName) const {
+  const OptionDescription& OptDesc = this->FindOption(OptName);
+  if (!OptDesc.isList() && !OptDesc.isParameter())
+    throw OptName
+      + ": incorrect option type - should be a list or parameter!";
+  return OptDesc;
+}
+
+const OptionDescription&
+OptionDescriptions::FindParameterListOrParameter
+(const std::string& OptName) const {
+  const OptionDescription& OptDesc = this->FindOption(OptName);
+  if ((!OptDesc.isList() && !OptDesc.isParameter()) || OptDesc.isSwitchList())
+    throw OptName
+      + ": incorrect option type - should be a parameter list or parameter!";
+  return OptDesc;
+}
+
+void OptionDescriptions::InsertDescription (const OptionDescription& o) {
+  container_type::iterator I = Descriptions.find(o.Name);
+  if (I != Descriptions.end()) {
+    OptionDescription& D = I->second;
+    D.Merge(o);
+  }
+  else {
+    Descriptions[o.Name] = o;
+  }
+}
+
+/// HandlerTable - A base class for function objects implemented as
+/// 'tables of handlers'.
+template <typename Handler>
+class HandlerTable {
+protected:
+  // Implementation details.
+
+  /// HandlerMap - A map from property names to property handlers
+  typedef StringMap<Handler> HandlerMap;
+
+  static HandlerMap Handlers_;
+  static bool staticMembersInitialized_;
+
+public:
+
+  Handler GetHandler (const std::string& HandlerName) const {
+    typename HandlerMap::iterator method = Handlers_.find(HandlerName);
+
+    if (method != Handlers_.end()) {
+      Handler h = method->second;
+      return h;
+    }
+    else {
+      throw "No handler found for property " + HandlerName + "!";
+    }
+  }
+
+  void AddHandler(const char* Property, Handler H) {
+    Handlers_[Property] = H;
+  }
+
+};
+
+template <class Handler, class FunctionObject>
+Handler GetHandler(FunctionObject* Obj, const DagInit& Dag) {
+  const std::string& HandlerName = GetOperatorName(Dag);
+  return Obj->GetHandler(HandlerName);
+}
+
+template <class FunctionObject>
+void InvokeDagInitHandler(FunctionObject* Obj, Init* I) {
+  typedef void (FunctionObject::*Handler) (const DagInit&);
+
+  const DagInit& Dag = InitPtrToDag(I);
+  Handler h = GetHandler<Handler>(Obj, Dag);
+
+  ((Obj)->*(h))(Dag);
+}
+
+template <class FunctionObject>
+void InvokeDagInitHandler(const FunctionObject* const Obj,
+                          const Init* I, unsigned IndentLevel, raw_ostream& O)
+{
+  typedef void (FunctionObject::*Handler)
+    (const DagInit&, unsigned IndentLevel, raw_ostream& O) const;
+
+  const DagInit& Dag = InitPtrToDag(I);
+  Handler h = GetHandler<Handler>(Obj, Dag);
+
+  ((Obj)->*(h))(Dag, IndentLevel, O);
+}
+
+template <typename H>
+typename HandlerTable<H>::HandlerMap HandlerTable<H>::Handlers_;
+
+template <typename H>
+bool HandlerTable<H>::staticMembersInitialized_ = false;
+
+
+/// CollectOptionProperties - Function object for iterating over an
+/// option property list.
+class CollectOptionProperties;
+typedef void (CollectOptionProperties::* CollectOptionPropertiesHandler)
+(const DagInit&);
+
+class CollectOptionProperties
+: public HandlerTable<CollectOptionPropertiesHandler>
+{
+private:
+
+  /// optDescs_ - OptionDescriptions table. This is where the
+  /// information is stored.
+  OptionDescription& optDesc_;
+
+public:
+
+  explicit CollectOptionProperties(OptionDescription& OD)
+    : optDesc_(OD)
+  {
+    if (!staticMembersInitialized_) {
+      AddHandler("help", &CollectOptionProperties::onHelp);
+      AddHandler("hidden", &CollectOptionProperties::onHidden);
+      AddHandler("init", &CollectOptionProperties::onInit);
+      AddHandler("multi_val", &CollectOptionProperties::onMultiVal);
+      AddHandler("one_or_more", &CollectOptionProperties::onOneOrMore);
+      AddHandler("zero_or_more", &CollectOptionProperties::onZeroOrMore);
+      AddHandler("really_hidden", &CollectOptionProperties::onReallyHidden);
+      AddHandler("required", &CollectOptionProperties::onRequired);
+      AddHandler("optional", &CollectOptionProperties::onOptional);
+      AddHandler("comma_separated", &CollectOptionProperties::onCommaSeparated);
+      AddHandler("forward_not_split",
+                 &CollectOptionProperties::onForwardNotSplit);
+
+      staticMembersInitialized_ = true;
+    }
+  }
+
+  /// operator() - Just forwards to the corresponding property
+  /// handler.
+  void operator() (Init* I) {
+    InvokeDagInitHandler(this, I);
+  }
+
+private:
+
+  /// Option property handlers --
+  /// Methods that handle option properties such as (help) or (hidden).
+
+  void onHelp (const DagInit& d) {
+    CheckNumberOfArguments(d, 1);
+    optDesc_.Help = EscapeQuotes(InitPtrToString(d.getArg(0)));
+  }
+
+  void onHidden (const DagInit& d) {
+    CheckNumberOfArguments(d, 0);
+    optDesc_.setHidden();
+  }
+
+  void onReallyHidden (const DagInit& d) {
+    CheckNumberOfArguments(d, 0);
+    optDesc_.setReallyHidden();
+  }
+
+  void onCommaSeparated (const DagInit& d) {
+    CheckNumberOfArguments(d, 0);
+    if (!optDesc_.isParameterList())
+      throw "'comma_separated' is valid only on parameter list options!";
+    optDesc_.setCommaSeparated();
+  }
+
+  void onForwardNotSplit (const DagInit& d) {
+    CheckNumberOfArguments(d, 0);
+    if (!optDesc_.isParameter())
+      throw "'forward_not_split' is valid only for parameter options!";
+    optDesc_.setForwardNotSplit();
+  }
+
+  void onRequired (const DagInit& d) {
+    CheckNumberOfArguments(d, 0);
+
+    optDesc_.setRequired();
+    optDesc_.CheckConsistency();
+  }
+
+  void onInit (const DagInit& d) {
+    CheckNumberOfArguments(d, 1);
+    Init* i = d.getArg(0);
+    const std::string& str = i->getAsString();
+
+    bool correct = optDesc_.isParameter() && dynamic_cast<StringInit*>(i);
+    correct |= (optDesc_.isSwitch() && (str == "true" || str == "false"));
+
+    if (!correct)
+      throw "Incorrect usage of the 'init' option property!";
+
+    optDesc_.InitVal = i;
+  }
+
+  void onOneOrMore (const DagInit& d) {
+    CheckNumberOfArguments(d, 0);
+
+    optDesc_.setOneOrMore();
+    optDesc_.CheckConsistency();
+  }
+
+  void onZeroOrMore (const DagInit& d) {
+    CheckNumberOfArguments(d, 0);
+
+    if (optDesc_.isList())
+      llvm::errs() << "Warning: specifying the 'zero_or_more' property "
+        "on a list option has no effect.\n";
+
+    optDesc_.setZeroOrMore();
+    optDesc_.CheckConsistency();
+  }
+
+  void onOptional (const DagInit& d) {
+    CheckNumberOfArguments(d, 0);
+
+    if (!optDesc_.isList())
+      llvm::errs() << "Warning: specifying the 'optional' property"
+        "on a non-list option has no effect.\n";
+
+    optDesc_.setOptional();
+    optDesc_.CheckConsistency();
+  }
+
+  void onMultiVal (const DagInit& d) {
+    CheckNumberOfArguments(d, 1);
+    int val = InitPtrToInt(d.getArg(0));
+    if (val < 2)
+      throw "Error in the 'multi_val' property: "
+        "the value must be greater than 1!";
+    if (!optDesc_.isParameterList())
+      throw "The multi_val property is valid only on list options!";
+    optDesc_.MultiVal = val;
+  }
+
+};
+
+/// AddOption - A function object that is applied to every option
+/// description. Used by CollectOptionDescriptions.
+class AddOption {
+private:
+  OptionDescriptions& OptDescs_;
+
+public:
+  explicit AddOption(OptionDescriptions& OD) : OptDescs_(OD)
+  {}
+
+  void operator()(const Init* i) {
+    const DagInit& d = InitPtrToDag(i);
+    CheckNumberOfArguments(d, 1);
+
+    const OptionType::OptionType Type =
+      stringToOptionType(GetOperatorName(d));
+    const std::string& Name = InitPtrToString(d.getArg(0));
+
+    OptionDescription OD(Type, Name);
+
+    CheckNumberOfArguments(d, 2);
+
+    // Alias option store the aliased option name in the 'Help' field and do not
+    // have any properties.
+    if (OD.isAlias()) {
+      OD.Help = InitPtrToString(d.getArg(1));
+    }
+    else {
+      processOptionProperties(d, OD);
+    }
+
+    // Switch options are ZeroOrMore by default.
+    if (OD.isSwitch()) {
+      if (!(OD.isOptional() || OD.isOneOrMore() || OD.isRequired()))
+        OD.setZeroOrMore();
+    }
+
+    OptDescs_.InsertDescription(OD);
+  }
+
+private:
+  /// processOptionProperties - Go through the list of option
+  /// properties and call a corresponding handler for each.
+  static void processOptionProperties (const DagInit& d, OptionDescription& o) {
+    CheckNumberOfArguments(d, 2);
+    DagInit::const_arg_iterator B = d.arg_begin();
+    // Skip the first argument: it's always the option name.
+    ++B;
+    std::for_each(B, d.arg_end(), CollectOptionProperties(o));
+  }
+
+};
+
+/// CollectOptionDescriptions - Collects option properties from all
+/// OptionLists.
+void CollectOptionDescriptions (const RecordVector& V,
+                                OptionDescriptions& OptDescs)
+{
+  // For every OptionList:
+  for (RecordVector::const_iterator B = V.begin(), E = V.end(); B!=E; ++B)
+  {
+    // Throws an exception if the value does not exist.
+    ListInit* PropList = (*B)->getValueAsListInit("options");
+
+    // For every option description in this list: invoke AddOption.
+    std::for_each(PropList->begin(), PropList->end(), AddOption(OptDescs));
+  }
+}
+
+// Tool information record
+
+namespace ToolFlags {
+  enum ToolFlags { Join = 0x1, Sink = 0x2 };
+}
+
+struct ToolDescription : public RefCountedBase<ToolDescription> {
+  std::string Name;
+  Init* CmdLine;
+  Init* Actions;
+  StrVector InLanguage;
+  std::string InFileOption;
+  std::string OutFileOption;
+  StrVector OutLanguage;
+  std::string OutputSuffix;
+  unsigned Flags;
+  const Init* OnEmpty;
+
+  // Various boolean properties
+  void setSink()      { Flags |= ToolFlags::Sink; }
+  bool isSink() const { return Flags & ToolFlags::Sink; }
+  void setJoin()      { Flags |= ToolFlags::Join; }
+  bool isJoin() const { return Flags & ToolFlags::Join; }
+
+  // Default ctor here is needed because StringMap can only store
+  // DefaultConstructible objects
+  ToolDescription (const std::string &n = "")
+    : Name(n), CmdLine(0), Actions(0), OutFileOption("-o"),
+      Flags(0), OnEmpty(0)
+  {}
+};
+
+/// ToolDescriptions - A list of Tool information records.
+typedef std::vector<IntrusiveRefCntPtr<ToolDescription> > ToolDescriptions;
+
+
+/// CollectToolProperties - Function object for iterating over a list of
+/// tool property records.
+
+class CollectToolProperties;
+typedef void (CollectToolProperties::* CollectToolPropertiesHandler)
+(const DagInit&);
+
+class CollectToolProperties : public HandlerTable<CollectToolPropertiesHandler>
+{
+private:
+
+  /// toolDesc_ - Properties of the current Tool. This is where the
+  /// information is stored.
+  ToolDescription& toolDesc_;
+
+public:
+
+  explicit CollectToolProperties (ToolDescription& d)
+    : toolDesc_(d)
+  {
+    if (!staticMembersInitialized_) {
+
+      AddHandler("actions", &CollectToolProperties::onActions);
+      AddHandler("command", &CollectToolProperties::onCommand);
+      AddHandler("in_language", &CollectToolProperties::onInLanguage);
+      AddHandler("join", &CollectToolProperties::onJoin);
+      AddHandler("out_language", &CollectToolProperties::onOutLanguage);
+
+      AddHandler("out_file_option", &CollectToolProperties::onOutFileOption);
+      AddHandler("in_file_option", &CollectToolProperties::onInFileOption);
+
+      AddHandler("output_suffix", &CollectToolProperties::onOutputSuffix);
+      AddHandler("sink", &CollectToolProperties::onSink);
+      AddHandler("works_on_empty", &CollectToolProperties::onWorksOnEmpty);
+
+      staticMembersInitialized_ = true;
+    }
+  }
+
+  void operator() (Init* I) {
+    InvokeDagInitHandler(this, I);
+  }
+
+private:
+
+  /// Property handlers --
+  /// Functions that extract information about tool properties from
+  /// DAG representation.
+
+  void onActions (const DagInit& d) {
+    CheckNumberOfArguments(d, 1);
+    Init* Case = d.getArg(0);
+    if (typeid(*Case) != typeid(DagInit) ||
+        GetOperatorName(static_cast<DagInit&>(*Case)) != "case")
+      throw "The argument to (actions) should be a 'case' construct!";
+    toolDesc_.Actions = Case;
+  }
+
+  void onCommand (const DagInit& d) {
+    CheckNumberOfArguments(d, 1);
+    toolDesc_.CmdLine = d.getArg(0);
+  }
+
+  /// onInOutLanguage - Common implementation of on{In,Out}Language().
+  void onInOutLanguage (const DagInit& d, StrVector& OutVec) {
+    CheckNumberOfArguments(d, 1);
+
+    // Copy strings to the output vector.
+    for (unsigned i = 0, NumArgs = d.getNumArgs(); i < NumArgs; ++i) {
+      OutVec.push_back(InitPtrToString(d.getArg(i)));
+    }
+
+    // Remove duplicates.
+    std::sort(OutVec.begin(), OutVec.end());
+    StrVector::iterator newE = std::unique(OutVec.begin(), OutVec.end());
+    OutVec.erase(newE, OutVec.end());
+  }
+
+
+  void onInLanguage (const DagInit& d) {
+    this->onInOutLanguage(d, toolDesc_.InLanguage);
+  }
+
+  void onJoin (const DagInit& d) {
+    CheckNumberOfArguments(d, 0);
+    toolDesc_.setJoin();
+  }
+
+  void onOutLanguage (const DagInit& d) {
+    this->onInOutLanguage(d, toolDesc_.OutLanguage);
+  }
+
+  void onOutFileOption (const DagInit& d) {
+    CheckNumberOfArguments(d, 1);
+    toolDesc_.OutFileOption = InitPtrToString(d.getArg(0));
+  }
+
+  void onInFileOption (const DagInit& d) {
+    CheckNumberOfArguments(d, 1);
+    toolDesc_.InFileOption = InitPtrToString(d.getArg(0));
+  }
+
+  void onOutputSuffix (const DagInit& d) {
+    CheckNumberOfArguments(d, 1);
+    toolDesc_.OutputSuffix = InitPtrToString(d.getArg(0));
+  }
+
+  void onSink (const DagInit& d) {
+    CheckNumberOfArguments(d, 0);
+    toolDesc_.setSink();
+  }
+
+  void onWorksOnEmpty (const DagInit& d) {
+    toolDesc_.OnEmpty = d.getArg(0);
+  }
+
+};
+
+/// CollectToolDescriptions - Gather information about tool properties
+/// from the parsed TableGen data (basically a wrapper for the
+/// CollectToolProperties function object).
+void CollectToolDescriptions (const RecordVector& Tools,
+                              ToolDescriptions& ToolDescs)
+{
+  // Iterate over a properties list of every Tool definition
+  for (RecordVector::const_iterator B = Tools.begin(),
+         E = Tools.end(); B!=E; ++B) {
+    const Record* T = *B;
+    // Throws an exception if the value does not exist.
+    ListInit* PropList = T->getValueAsListInit("properties");
+
+    IntrusiveRefCntPtr<ToolDescription>
+      ToolDesc(new ToolDescription(T->getName()));
+
+    std::for_each(PropList->begin(), PropList->end(),
+                  CollectToolProperties(*ToolDesc));
+    ToolDescs.push_back(ToolDesc);
+  }
+}
+
+/// FillInEdgeVector - Merge all compilation graph definitions into
+/// one single edge list.
+void FillInEdgeVector(const RecordVector& CompilationGraphs,
+                      DagVector& Out) {
+  for (RecordVector::const_iterator B = CompilationGraphs.begin(),
+         E = CompilationGraphs.end(); B != E; ++B) {
+    const ListInit* Edges = (*B)->getValueAsListInit("edges");
+
+    for (ListInit::const_iterator B = Edges->begin(),
+           E = Edges->end(); B != E; ++B) {
+      Out.push_back(&InitPtrToDag(*B));
+    }
+  }
+}
+
+/// NotInGraph - Helper function object for FilterNotInGraph.
+struct NotInGraph {
+private:
+  const llvm::StringSet<>& ToolsInGraph_;
+
+public:
+  NotInGraph(const llvm::StringSet<>& ToolsInGraph)
+  : ToolsInGraph_(ToolsInGraph)
+  {}
+
+  bool operator()(const IntrusiveRefCntPtr<ToolDescription>& x) {
+    return (ToolsInGraph_.count(x->Name) == 0);
+  }
+};
+
+/// FilterNotInGraph - Filter out from ToolDescs all Tools not
+/// mentioned in the compilation graph definition.
+void FilterNotInGraph (const DagVector& EdgeVector,
+                       ToolDescriptions& ToolDescs) {
+
+  // List all tools mentioned in the graph.
+  llvm::StringSet<> ToolsInGraph;
+
+  for (DagVector::const_iterator B = EdgeVector.begin(),
+         E = EdgeVector.end(); B != E; ++B) {
+
+    const DagInit* Edge = *B;
+    const std::string& NodeA = InitPtrToString(Edge->getArg(0));
+    const std::string& NodeB = InitPtrToString(Edge->getArg(1));
+
+    if (NodeA != "root")
+      ToolsInGraph.insert(NodeA);
+    ToolsInGraph.insert(NodeB);
+  }
+
+  // Filter ToolPropertiesList.
+  ToolDescriptions::iterator new_end =
+    std::remove_if(ToolDescs.begin(), ToolDescs.end(),
+                   NotInGraph(ToolsInGraph));
+  ToolDescs.erase(new_end, ToolDescs.end());
+}
+
+/// FillInToolToLang - Fills in two tables that map tool names to
+/// input & output language names.  Helper function used by TypecheckGraph().
+void FillInToolToLang (const ToolDescriptions& ToolDescs,
+                       StringMap<StringSet<> >& ToolToInLang,
+                       StringMap<StringSet<> >& ToolToOutLang) {
+  for (ToolDescriptions::const_iterator B = ToolDescs.begin(),
+         E = ToolDescs.end(); B != E; ++B) {
+    const ToolDescription& D = *(*B);
+    for (StrVector::const_iterator B = D.InLanguage.begin(),
+           E = D.InLanguage.end(); B != E; ++B)
+      ToolToInLang[D.Name].insert(*B);
+    for (StrVector::const_iterator B = D.OutLanguage.begin(),
+           E = D.OutLanguage.end(); B != E; ++B)
+      ToolToOutLang[D.Name].insert(*B);
+  }
+}
+
+/// Intersect - Is set intersection non-empty?
+bool Intersect (const StringSet<>& S1, const StringSet<>& S2) {
+  for (StringSet<>::const_iterator B = S1.begin(), E = S1.end(); B != E; ++B) {
+    if (S2.count(B->first()) != 0)
+      return true;
+  }
+  return false;
+}
+
+/// TypecheckGraph - Check that names for output and input languages
+/// on all edges do match.
+void TypecheckGraph (const DagVector& EdgeVector,
+                     const ToolDescriptions& ToolDescs) {
+  StringMap<StringSet<> > ToolToInLang;
+  StringMap<StringSet<> > ToolToOutLang;
+
+  FillInToolToLang(ToolDescs, ToolToInLang, ToolToOutLang);
+
+  for (DagVector::const_iterator B = EdgeVector.begin(),
+         E = EdgeVector.end(); B != E; ++B) {
+    const DagInit* Edge = *B;
+    const std::string& NodeA = InitPtrToString(Edge->getArg(0));
+    const std::string& NodeB = InitPtrToString(Edge->getArg(1));
+    StringMap<StringSet<> >::iterator IA = ToolToOutLang.find(NodeA);
+    StringMap<StringSet<> >::iterator IB = ToolToInLang.find(NodeB);
+
+    if (NodeB == "root")
+      throw "Edges back to the root are not allowed!";
+
+    if (NodeA != "root") {
+      if (IA == ToolToOutLang.end())
+        throw NodeA + ": no output language defined!";
+      if (IB == ToolToInLang.end())
+        throw NodeB + ": no input language defined!";
+
+      if (!Intersect(IA->second, IB->second)) {
+        throw "Edge " + NodeA + "->" + NodeB
+          + ": output->input language mismatch";
+      }
+    }
+  }
+}
+
+/// WalkCase - Walks the 'case' expression DAG and invokes
+/// TestCallback on every test, and StatementCallback on every
+/// statement. Handles 'case' nesting, but not the 'and' and 'or'
+/// combinators (that is, they are passed directly to TestCallback).
+/// TestCallback must have type 'void TestCallback(const DagInit*, unsigned
+/// IndentLevel, bool FirstTest)'.
+/// StatementCallback must have type 'void StatementCallback(const Init*,
+/// unsigned IndentLevel)'.
+template <typename F1, typename F2>
+void WalkCase(const Init* Case, F1 TestCallback, F2 StatementCallback,
+              unsigned IndentLevel = 0)
+{
+  const DagInit& d = InitPtrToDag(Case);
+
+  // Error checks.
+  if (GetOperatorName(d) != "case")
+    throw "WalkCase should be invoked only on 'case' expressions!";
+
+  if (d.getNumArgs() < 2)
+    throw "There should be at least one clause in the 'case' expression:\n"
+      + d.getAsString();
+
+  // Main loop.
+  bool even = false;
+  const unsigned numArgs = d.getNumArgs();
+  unsigned i = 1;
+  for (DagInit::const_arg_iterator B = d.arg_begin(), E = d.arg_end();
+       B != E; ++B) {
+    Init* arg = *B;
+
+    if (!even)
+    {
+      // Handle test.
+      const DagInit& Test = InitPtrToDag(arg);
+
+      if (GetOperatorName(Test) == "default" && (i+1 != numArgs))
+        throw "The 'default' clause should be the last in the "
+          "'case' construct!";
+      if (i == numArgs)
+        throw "Case construct handler: no corresponding action "
+          "found for the test " + Test.getAsString() + '!';
+
+      TestCallback(Test, IndentLevel, (i == 1));
+    }
+    else
+    {
+      if (dynamic_cast<DagInit*>(arg)
+          && GetOperatorName(static_cast<DagInit&>(*arg)) == "case") {
+        // Nested 'case'.
+        WalkCase(arg, TestCallback, StatementCallback, IndentLevel + Indent1);
+      }
+
+      // Handle statement.
+      StatementCallback(arg, IndentLevel);
+    }
+
+    ++i;
+    even = !even;
+  }
+}
+
+/// ExtractOptionNames - A helper function object used by
+/// CheckForSuperfluousOptions() to walk the 'case' DAG.
+class ExtractOptionNames {
+  llvm::StringSet<>& OptionNames_;
+
+  void processDag(const Init* Statement) {
+    const DagInit& Stmt = InitPtrToDag(Statement);
+    const std::string& ActionName = GetOperatorName(Stmt);
+    if (ActionName == "forward" || ActionName == "forward_as" ||
+        ActionName == "forward_value" ||
+        ActionName == "forward_transformed_value" ||
+        ActionName == "parameter_equals" || ActionName == "element_in_list") {
+      CheckNumberOfArguments(Stmt, 1);
+
+      Init* Arg = Stmt.getArg(0);
+      if (typeid(*Arg) == typeid(StringInit))
+        OptionNames_.insert(InitPtrToString(Arg));
+    }
+    else if (ActionName == "any_switch_on" || ActionName == "switch_on" ||
+             ActionName == "any_not_empty" || ActionName == "any_empty" ||
+             ActionName == "not_empty" || ActionName == "empty") {
+      for (unsigned i = 0, NumArgs = Stmt.getNumArgs(); i < NumArgs; ++i) {
+        Init* Arg = Stmt.getArg(i);
+        if (typeid(*Arg) == typeid(StringInit))
+          OptionNames_.insert(InitPtrToString(Arg));
+      }
+    }
+    else if (ActionName == "and" || ActionName == "or" || ActionName == "not") {
+      for (unsigned i = 0, NumArgs = Stmt.getNumArgs(); i < NumArgs; ++i) {
+        this->processDag(Stmt.getArg(i));
+      }
+    }
+  }
+
+public:
+  ExtractOptionNames(llvm::StringSet<>& OptionNames) : OptionNames_(OptionNames)
+  {}
+
+  void operator()(const Init* Statement) {
+    // Statement is either a dag, or a list of dags.
+    if (typeid(*Statement) == typeid(ListInit)) {
+      const ListInit& DagList = *static_cast<const ListInit*>(Statement);
+      for (ListInit::const_iterator B = DagList.begin(), E = DagList.end();
+           B != E; ++B)
+        this->processDag(*B);
+    }
+    else {
+      this->processDag(Statement);
+    }
+  }
+
+  void operator()(const DagInit& Test, unsigned, bool) {
+    this->operator()(&Test);
+  }
+  void operator()(const Init* Statement, unsigned) {
+    this->operator()(Statement);
+  }
+};
+
+/// IsOptionalEdge - Validate that the 'optional_edge' has proper structure.
+bool IsOptionalEdge (const DagInit& Edg) {
+  return (GetOperatorName(Edg) == "optional_edge") && (Edg.getNumArgs() > 2);
+}
+
+/// CheckForSuperfluousOptions - Check that there are no side
+/// effect-free options (specified only in the OptionList). Otherwise,
+/// output a warning.
+void CheckForSuperfluousOptions (const DagVector& EdgeVector,
+                                 const ToolDescriptions& ToolDescs,
+                                 const OptionDescriptions& OptDescs) {
+  llvm::StringSet<> nonSuperfluousOptions;
+
+  // Add all options mentioned in the ToolDesc.Actions to the set of
+  // non-superfluous options.
+  for (ToolDescriptions::const_iterator B = ToolDescs.begin(),
+         E = ToolDescs.end(); B != E; ++B) {
+    const ToolDescription& TD = *(*B);
+    ExtractOptionNames Callback(nonSuperfluousOptions);
+    if (TD.Actions)
+      WalkCase(TD.Actions, Callback, Callback);
+  }
+
+  // Add all options mentioned in the 'case' clauses of the
+  // OptionalEdges of the compilation graph to the set of
+  // non-superfluous options.
+  for (DagVector::const_iterator B = EdgeVector.begin(),
+         E = EdgeVector.end(); B != E; ++B) {
+    const DagInit& Edge = **B;
+    if (IsOptionalEdge(Edge)) {
+      const DagInit& Weight = InitPtrToDag(Edge.getArg(2));
+      WalkCase(&Weight, ExtractOptionNames(nonSuperfluousOptions), Id());
+    }
+  }
+
+  // Check that all options in OptDescs belong to the set of
+  // non-superfluous options.
+  for (OptionDescriptions::const_iterator B = OptDescs.begin(),
+         E = OptDescs.end(); B != E; ++B) {
+    const OptionDescription& Val = B->second;
+    if (!nonSuperfluousOptions.count(Val.Name)
+        && Val.Type != OptionType::Alias)
+      llvm::errs() << "Warning: option '-" << Val.Name << "' has no effect! "
+        "Probable cause: this option is specified only in the OptionList.\n";
+  }
+}
+
+/// EmitCaseTest0Args - Helper function used by EmitCaseConstructHandler().
+bool EmitCaseTest0Args(const std::string& TestName, raw_ostream& O) {
+  if (TestName == "single_input_file") {
+    O << "InputFilenames.size() == 1";
+    return true;
+  }
+  else if (TestName == "multiple_input_files") {
+    O << "InputFilenames.size() > 1";
+    return true;
+  }
+
+  return false;
+}
+
+/// EmitMultipleArgumentTest - Helper function used by
+/// EmitCaseTestMultipleArgs()
+template <typename F>
+void EmitMultipleArgumentTest(const DagInit& D, const char* LogicOp,
+                              F Callback, raw_ostream& O)
+{
+  for (unsigned i = 0, NumArgs = D.getNumArgs(); i < NumArgs; ++i) {
+    if (i != 0)
+       O << ' ' << LogicOp << ' ';
+    Callback(InitPtrToString(D.getArg(i)), O);
+  }
+}
+
+// Callbacks for use with EmitMultipleArgumentTest
+
+class EmitSwitchOn {
+  const OptionDescriptions& OptDescs_;
+public:
+  EmitSwitchOn(const OptionDescriptions& OptDescs) : OptDescs_(OptDescs)
+  {}
+
+  void operator()(const std::string& OptName, raw_ostream& O) const {
+    const OptionDescription& OptDesc = OptDescs_.FindSwitch(OptName);
+    O << OptDesc.GenVariableName();
+  }
+};
+
+class EmitEmptyTest {
+  bool EmitNegate_;
+  const OptionDescriptions& OptDescs_;
+public:
+  EmitEmptyTest(bool EmitNegate, const OptionDescriptions& OptDescs)
+    : EmitNegate_(EmitNegate), OptDescs_(OptDescs)
+  {}
+
+  void operator()(const std::string& OptName, raw_ostream& O) const {
+    const char* Neg = (EmitNegate_ ? "!" : "");
+    if (OptName == "o") {
+      O << Neg << "OutputFilename.empty()";
+    }
+    else if (OptName == "save-temps") {
+      O << Neg << "(SaveTemps == SaveTempsEnum::Unset)";
+    }
+    else {
+      const OptionDescription& OptDesc = OptDescs_.FindListOrParameter(OptName);
+      O << Neg << OptDesc.GenVariableName() << ".empty()";
+    }
+  }
+};
+
+
+/// EmitCaseTestMultipleArgs - Helper function used by EmitCaseTest1Arg()
+bool EmitCaseTestMultipleArgs (const std::string& TestName,
+                               const DagInit& d,
+                               const OptionDescriptions& OptDescs,
+                               raw_ostream& O) {
+  if (TestName == "any_switch_on") {
+    EmitMultipleArgumentTest(d, "||", EmitSwitchOn(OptDescs), O);
+    return true;
+  }
+  else if (TestName == "switch_on") {
+    EmitMultipleArgumentTest(d, "&&", EmitSwitchOn(OptDescs), O);
+    return true;
+  }
+  else if (TestName == "any_not_empty") {
+    EmitMultipleArgumentTest(d, "||", EmitEmptyTest(true, OptDescs), O);
+    return true;
+  }
+  else if (TestName == "any_empty") {
+    EmitMultipleArgumentTest(d, "||", EmitEmptyTest(false, OptDescs), O);
+    return true;
+  }
+  else if (TestName == "not_empty") {
+    EmitMultipleArgumentTest(d, "&&", EmitEmptyTest(true, OptDescs), O);
+    return true;
+  }
+  else if (TestName == "empty") {
+    EmitMultipleArgumentTest(d, "&&", EmitEmptyTest(false, OptDescs), O);
+    return true;
+  }
+
+  return false;
+}
+
+/// EmitCaseTest1Arg - Helper function used by EmitCaseTest1OrMoreArgs()
+bool EmitCaseTest1Arg (const std::string& TestName,
+                       const DagInit& d,
+                       const OptionDescriptions& OptDescs,
+                       raw_ostream& O) {
+  const std::string& Arg = InitPtrToString(d.getArg(0));
+
+  if (TestName == "input_languages_contain") {
+    O << "InLangs.count(\"" << Arg << "\") != 0";
+    return true;
+  }
+  else if (TestName == "in_language") {
+    // This works only for single-argument Tool::GenerateAction. Join
+    // tools can process several files in different languages simultaneously.
+
+    // TODO: make this work with Edge::Weight (if possible).
+    O << "LangMap.GetLanguage(inFile) == \"" << Arg << '\"';
+    return true;
+  }
+
+  return false;
+}
+
+/// EmitCaseTest1OrMoreArgs - Helper function used by
+/// EmitCaseConstructHandler()
+bool EmitCaseTest1OrMoreArgs(const std::string& TestName,
+                             const DagInit& d,
+                             const OptionDescriptions& OptDescs,
+                             raw_ostream& O) {
+  CheckNumberOfArguments(d, 1);
+  return EmitCaseTest1Arg(TestName, d, OptDescs, O) ||
+    EmitCaseTestMultipleArgs(TestName, d, OptDescs, O);
+}
+
+/// EmitCaseTest2Args - Helper function used by EmitCaseConstructHandler().
+bool EmitCaseTest2Args(const std::string& TestName,
+                       const DagInit& d,
+                       unsigned IndentLevel,
+                       const OptionDescriptions& OptDescs,
+                       raw_ostream& O) {
+  CheckNumberOfArguments(d, 2);
+  const std::string& OptName = InitPtrToString(d.getArg(0));
+  const std::string& OptArg = InitPtrToString(d.getArg(1));
+
+  if (TestName == "parameter_equals") {
+    const OptionDescription& OptDesc = OptDescs.FindParameter(OptName);
+    O << OptDesc.GenVariableName() << " == \"" << OptArg << "\"";
+    return true;
+  }
+  else if (TestName == "element_in_list") {
+    const OptionDescription& OptDesc = OptDescs.FindParameterList(OptName);
+    const std::string& VarName = OptDesc.GenVariableName();
+    O << "std::find(" << VarName << ".begin(),\n";
+    O.indent(IndentLevel + Indent1)
+      << VarName << ".end(), \""
+      << OptArg << "\") != " << VarName << ".end()";
+    return true;
+  }
+
+  return false;
+}
+
+// Forward declaration.
+// EmitLogicalOperationTest and EmitCaseTest are mutually recursive.
+void EmitCaseTest(const DagInit& d, unsigned IndentLevel,
+                  const OptionDescriptions& OptDescs,
+                  raw_ostream& O);
+
+/// EmitLogicalOperationTest - Helper function used by
+/// EmitCaseConstructHandler.
+void EmitLogicalOperationTest(const DagInit& d, const char* LogicOp,
+                              unsigned IndentLevel,
+                              const OptionDescriptions& OptDescs,
+                              raw_ostream& O) {
+  O << '(';
+  for (unsigned i = 0, NumArgs = d.getNumArgs(); i < NumArgs; ++i) {
+    const DagInit& InnerTest = InitPtrToDag(d.getArg(i));
+    EmitCaseTest(InnerTest, IndentLevel, OptDescs, O);
+    if (i != NumArgs - 1) {
+      O << ")\n";
+      O.indent(IndentLevel + Indent1) << ' ' << LogicOp << " (";
+    }
+    else {
+      O << ')';
+    }
+  }
+}
+
+void EmitLogicalNot(const DagInit& d, unsigned IndentLevel,
+                    const OptionDescriptions& OptDescs, raw_ostream& O)
+{
+  CheckNumberOfArguments(d, 1);
+  const DagInit& InnerTest = InitPtrToDag(d.getArg(0));
+  O << "! (";
+  EmitCaseTest(InnerTest, IndentLevel, OptDescs, O);
+  O << ")";
+}
+
+/// EmitCaseTest - Helper function used by EmitCaseConstructHandler.
+void EmitCaseTest(const DagInit& d, unsigned IndentLevel,
+                  const OptionDescriptions& OptDescs,
+                  raw_ostream& O) {
+  const std::string& TestName = GetOperatorName(d);
+
+  if (TestName == "and")
+    EmitLogicalOperationTest(d, "&&", IndentLevel, OptDescs, O);
+  else if (TestName == "or")
+    EmitLogicalOperationTest(d, "||", IndentLevel, OptDescs, O);
+  else if (TestName == "not")
+    EmitLogicalNot(d, IndentLevel, OptDescs, O);
+  else if (EmitCaseTest0Args(TestName, O))
+    return;
+  else if (EmitCaseTest1OrMoreArgs(TestName, d, OptDescs, O))
+    return;
+  else if (EmitCaseTest2Args(TestName, d, IndentLevel, OptDescs, O))
+    return;
+  else
+    throw "Unknown test '" + TestName + "' used in the 'case' construct!";
+}
+
+
+/// EmitCaseTestCallback - Callback used by EmitCaseConstructHandler.
+class EmitCaseTestCallback {
+  bool EmitElseIf_;
+  const OptionDescriptions& OptDescs_;
+  raw_ostream& O_;
+public:
+
+  EmitCaseTestCallback(bool EmitElseIf,
+                       const OptionDescriptions& OptDescs, raw_ostream& O)
+    : EmitElseIf_(EmitElseIf), OptDescs_(OptDescs), O_(O)
+  {}
+
+  void operator()(const DagInit& Test, unsigned IndentLevel, bool FirstTest)
+  {
+    if (GetOperatorName(Test) == "default") {
+      O_.indent(IndentLevel) << "else {\n";
+    }
+    else {
+      O_.indent(IndentLevel)
+        << ((!FirstTest && EmitElseIf_) ? "else if (" : "if (");
+      EmitCaseTest(Test, IndentLevel, OptDescs_, O_);
+      O_ << ") {\n";
+    }
+  }
+};
+
+/// EmitCaseStatementCallback - Callback used by EmitCaseConstructHandler.
+template <typename F>
+class EmitCaseStatementCallback {
+  F Callback_;
+  raw_ostream& O_;
+public:
+
+  EmitCaseStatementCallback(F Callback, raw_ostream& O)
+    : Callback_(Callback), O_(O)
+  {}
+
+  void operator() (const Init* Statement, unsigned IndentLevel) {
+    // Is this a nested 'case'?
+    bool IsCase = dynamic_cast<const DagInit*>(Statement) &&
+      GetOperatorName(static_cast<const DagInit&>(*Statement)) == "case";
+
+    // If so, ignore it, it is handled by our caller, WalkCase.
+    if (!IsCase) {
+      if (typeid(*Statement) == typeid(ListInit)) {
+        const ListInit& DagList = *static_cast<const ListInit*>(Statement);
+        for (ListInit::const_iterator B = DagList.begin(), E = DagList.end();
+             B != E; ++B)
+          Callback_(*B, (IndentLevel + Indent1), O_);
+      }
+      else {
+        Callback_(Statement, (IndentLevel + Indent1), O_);
+      }
+    }
+    O_.indent(IndentLevel) << "}\n";
+  }
+
+};
+
+/// EmitCaseConstructHandler - Emit code that handles the 'case'
+/// construct. Takes a function object that should emit code for every case
+/// clause. Implemented on top of WalkCase.
+/// Callback's type is void F(const Init* Statement, unsigned IndentLevel,
+/// raw_ostream& O).
+/// EmitElseIf parameter controls the type of condition that is emitted ('if
+/// (..) {..} else if (..) {} .. else {..}' vs. 'if (..) {..} if(..)  {..}
+/// .. else {..}').
+template <typename F>
+void EmitCaseConstructHandler(const Init* Case, unsigned IndentLevel,
+                              F Callback, bool EmitElseIf,
+                              const OptionDescriptions& OptDescs,
+                              raw_ostream& O) {
+  WalkCase(Case, EmitCaseTestCallback(EmitElseIf, OptDescs, O),
+           EmitCaseStatementCallback<F>(Callback, O), IndentLevel);
+}
+
+/// TokenizeCmdLine - converts from
+/// "$CALL(HookName, 'Arg1', 'Arg2')/path -arg1 -arg2" to
+/// ["$CALL(", "HookName", "Arg1", "Arg2", ")/path", "-arg1", "-arg2"].
+void TokenizeCmdLine(const std::string& CmdLine, StrVector& Out) {
+  const char* Delimiters = " \t\n\v\f\r";
+  enum TokenizerState
+  { Normal, SpecialCommand, InsideSpecialCommand, InsideQuotationMarks }
+  cur_st  = Normal;
+
+  if (CmdLine.empty())
+    return;
+  Out.push_back("");
+
+  std::string::size_type B = CmdLine.find_first_not_of(Delimiters),
+    E = CmdLine.size();
+
+  for (; B != E; ++B) {
+    char cur_ch = CmdLine[B];
+
+    switch (cur_st) {
+    case Normal:
+      if (cur_ch == '$') {
+        cur_st = SpecialCommand;
+        break;
+      }
+      if (OneOf(Delimiters, cur_ch)) {
+        // Skip whitespace
+        B = CmdLine.find_first_not_of(Delimiters, B);
+        if (B == std::string::npos) {
+          B = E-1;
+          continue;
+        }
+        --B;
+        Out.push_back("");
+        continue;
+      }
+      break;
+
+
+    case SpecialCommand:
+      if (OneOf(Delimiters, cur_ch)) {
+        cur_st = Normal;
+        Out.push_back("");
+        continue;
+      }
+      if (cur_ch == '(') {
+        Out.push_back("");
+        cur_st = InsideSpecialCommand;
+        continue;
+      }
+      break;
+
+    case InsideSpecialCommand:
+      if (OneOf(Delimiters, cur_ch)) {
+        continue;
+      }
+      if (cur_ch == '\'') {
+        cur_st = InsideQuotationMarks;
+        Out.push_back("");
+        continue;
+      }
+      if (cur_ch == ')') {
+        cur_st = Normal;
+        Out.push_back("");
+      }
+      if (cur_ch == ',') {
+        continue;
+      }
+
+      break;
+
+    case InsideQuotationMarks:
+      if (cur_ch == '\'') {
+        cur_st = InsideSpecialCommand;
+        continue;
+      }
+      break;
+    }
+
+    Out.back().push_back(cur_ch);
+  }
+}
+
+/// SubstituteCall - Given "$CALL(HookName, [Arg1 [, Arg2 [...]]])", output
+/// "hooks::HookName([Arg1 [, Arg2 [, ...]]])". Helper function used by
+/// SubstituteSpecialCommands().
+StrVector::const_iterator
+SubstituteCall (StrVector::const_iterator Pos,
+                StrVector::const_iterator End,
+                bool IsJoin, raw_ostream& O)
+{
+  const char* errorMessage = "Syntax error in $CALL invocation!";
+  CheckedIncrement(Pos, End, errorMessage);
+  const std::string& CmdName = *Pos;
+
+  if (CmdName == ")")
+    throw "$CALL invocation: empty argument list!";
+
+  O << "hooks::";
+  O << CmdName << "(";
+
+
+  bool firstIteration = true;
+  while (true) {
+    CheckedIncrement(Pos, End, errorMessage);
+    const std::string& Arg = *Pos;
+    assert(Arg.size() != 0);
+
+    if (Arg[0] == ')')
+      break;
+
+    if (firstIteration)
+      firstIteration = false;
+    else
+      O << ", ";
+
+    if (Arg == "$INFILE") {
+      if (IsJoin)
+        throw "$CALL(Hook, $INFILE) can't be used with a Join tool!";
+      else
+        O << "inFile.c_str()";
+    }
+    else {
+      O << '"' << Arg << '"';
+    }
+  }
+
+  O << ')';
+
+  return Pos;
+}
+
+/// SubstituteEnv - Given '$ENV(VAR_NAME)', output 'getenv("VAR_NAME")'. Helper
+/// function used by SubstituteSpecialCommands().
+StrVector::const_iterator
+SubstituteEnv (StrVector::const_iterator Pos,
+               StrVector::const_iterator End, raw_ostream& O)
+{
+  const char* errorMessage = "Syntax error in $ENV invocation!";
+  CheckedIncrement(Pos, End, errorMessage);
+  const std::string& EnvName = *Pos;
+
+  if (EnvName == ")")
+    throw "$ENV invocation: empty argument list!";
+
+  O << "checkCString(std::getenv(\"";
+  O << EnvName;
+  O << "\"))";
+
+  CheckedIncrement(Pos, End, errorMessage);
+
+  return Pos;
+}
+
+/// SubstituteSpecialCommands - Given an invocation of $CALL or $ENV, output
+/// handler code. Helper function used by EmitCmdLineVecFill().
+StrVector::const_iterator
+SubstituteSpecialCommands (StrVector::const_iterator Pos,
+                           StrVector::const_iterator End,
+                           bool IsJoin, raw_ostream& O)
+{
+
+  const std::string& cmd = *Pos;
+
+  // Perform substitution.
+  if (cmd == "$CALL") {
+    Pos = SubstituteCall(Pos, End, IsJoin, O);
+  }
+  else if (cmd == "$ENV") {
+    Pos = SubstituteEnv(Pos, End, O);
+  }
+  else {
+    throw "Unknown special command: " + cmd;
+  }
+
+  // Handle '$CMD(ARG)/additional/text'.
+  const std::string& Leftover = *Pos;
+  assert(Leftover.at(0) == ')');
+  if (Leftover.size() != 1)
+    O << " + std::string(\"" << (Leftover.c_str() + 1) << "\")";
+
+  return Pos;
+}
+
+/// EmitCmdLineVecFill - Emit code that fills in the command line
+/// vector. Helper function used by EmitGenerateActionMethod().
+void EmitCmdLineVecFill(const Init* CmdLine, const std::string& ToolName,
+                        bool IsJoin, unsigned IndentLevel,
+                        raw_ostream& O) {
+  StrVector StrVec;
+  TokenizeCmdLine(InitPtrToString(CmdLine), StrVec);
+
+  if (StrVec.empty())
+    throw "Tool '" + ToolName + "' has empty command line!";
+
+  StrVector::const_iterator B = StrVec.begin(), E = StrVec.end();
+
+  // Emit the command itself.
+  assert(!StrVec[0].empty());
+  O.indent(IndentLevel) << "cmd = ";
+  if (StrVec[0][0] == '$') {
+    B = SubstituteSpecialCommands(B, E, IsJoin, O);
+    ++B;
+  }
+  else {
+    O << '"' << StrVec[0] << '"';
+    ++B;
+  }
+  O << ";\n";
+
+  // Go through the command arguments.
+  assert(B <= E);
+  for (; B != E; ++B) {
+    const std::string& cmd = *B;
+
+    assert(!cmd.empty());
+    O.indent(IndentLevel);
+
+    if (cmd.at(0) == '$') {
+      O << "vec.push_back(std::make_pair(0, ";
+      B = SubstituteSpecialCommands(B, E, IsJoin, O);
+      O << "));\n";
+    }
+    else {
+      O << "vec.push_back(std::make_pair(0, \"" << cmd << "\"));\n";
+    }
+  }
+
+}
+
+/// EmitForEachListElementCycleHeader - Emit common code for iterating through
+/// all elements of a list. Helper function used by
+/// EmitForwardOptionPropertyHandlingCode.
+void EmitForEachListElementCycleHeader (const OptionDescription& D,
+                                        unsigned IndentLevel,
+                                        raw_ostream& O) {
+  unsigned IndentLevel1 = IndentLevel + Indent1;
+
+  O.indent(IndentLevel)
+    << "for (" << D.GenTypeDeclaration()
+    << "::iterator B = " << D.GenVariableName() << ".begin(),\n";
+  O.indent(IndentLevel)
+    << "E = " << D.GenVariableName() << ".end(); B != E;) {\n";
+  O.indent(IndentLevel1) << "unsigned pos = " << D.GenVariableName()
+                         << ".getPosition(B - " << D.GenVariableName()
+                         << ".begin());\n";
+}
+
+/// EmitForwardOptionPropertyHandlingCode - Helper function used to
+/// implement EmitActionHandler. Emits code for
+/// handling the (forward) and (forward_as) option properties.
+void EmitForwardOptionPropertyHandlingCode (const OptionDescription& D,
+                                            unsigned IndentLevel,
+                                            const std::string& NewName,
+                                            raw_ostream& O) {
+  const std::string& Name = NewName.empty()
+    ? ("-" + D.Name)
+    : NewName;
+  unsigned IndentLevel1 = IndentLevel + Indent1;
+
+  switch (D.Type) {
+  case OptionType::Switch:
+    O.indent(IndentLevel)
+      << "vec.push_back(std::make_pair(" << D.GenVariableName()
+      << ".getPosition(), \"" << Name << "\"));\n";
+    break;
+  case OptionType::Parameter:
+    O.indent(IndentLevel) << "vec.push_back(std::make_pair("
+                          << D.GenVariableName()
+                          <<".getPosition(), \"" << Name;
+
+    if (!D.isForwardNotSplit()) {
+      O << "\"));\n";
+      O.indent(IndentLevel) << "vec.push_back(std::make_pair("
+                            << D.GenVariableName() << ".getPosition(), "
+                            << D.GenVariableName() << "));\n";
+    }
+    else {
+      O << "=\" + " << D.GenVariableName() << "));\n";
+    }
+    break;
+  case OptionType::Prefix:
+    O.indent(IndentLevel) << "vec.push_back(std::make_pair("
+                          << D.GenVariableName() << ".getPosition(), \""
+                          << Name << "\" + "
+                          << D.GenVariableName() << "));\n";
+    break;
+  case OptionType::PrefixList:
+    EmitForEachListElementCycleHeader(D, IndentLevel, O);
+    O.indent(IndentLevel1) << "vec.push_back(std::make_pair(pos, \""
+                           << Name << "\" + " << "*B));\n";
+    O.indent(IndentLevel1) << "++B;\n";
+
+    for (int i = 1, j = D.MultiVal; i < j; ++i) {
+      O.indent(IndentLevel1) << "vec.push_back(std::make_pair(pos, *B));\n";
+      O.indent(IndentLevel1) << "++B;\n";
+    }
+
+    O.indent(IndentLevel) << "}\n";
+    break;
+  case OptionType::ParameterList:
+    EmitForEachListElementCycleHeader(D, IndentLevel, O);
+    O.indent(IndentLevel1) << "vec.push_back(std::make_pair(pos, \""
+                           << Name << "\"));\n";
+
+    for (int i = 0, j = D.MultiVal; i < j; ++i) {
+      O.indent(IndentLevel1) << "vec.push_back(std::make_pair(pos, *B));\n";
+      O.indent(IndentLevel1) << "++B;\n";
+    }
+
+    O.indent(IndentLevel) << "}\n";
+    break;
+  case OptionType::SwitchList:
+    EmitForEachListElementCycleHeader(D, IndentLevel, O);
+    O.indent(IndentLevel1) << "vec.push_back(std::make_pair(pos, \""
+                           << Name << "\"));\n";
+    O.indent(IndentLevel1) << "++B;\n";
+    O.indent(IndentLevel) << "}\n";
+    break;
+  case OptionType::Alias:
+  default:
+    throw "Aliases are not allowed in tool option descriptions!";
+  }
+}
+
+/// ActionHandlingCallbackBase - Base class of EmitActionHandlersCallback and
+/// EmitPreprocessOptionsCallback.
+struct ActionHandlingCallbackBase
+{
+
+  void onErrorDag(const DagInit& d,
+                  unsigned IndentLevel, raw_ostream& O) const
+  {
+    O.indent(IndentLevel)
+      << "PrintError(\""
+      << (d.getNumArgs() >= 1 ? InitPtrToString(d.getArg(0)) : "Unknown error!")
+      << "\");\n";
+    O.indent(IndentLevel) << "return 1;\n";
+  }
+
+  void onWarningDag(const DagInit& d,
+                    unsigned IndentLevel, raw_ostream& O) const
+  {
+    CheckNumberOfArguments(d, 1);
+    O.indent(IndentLevel) << "llvm::errs() << \""
+                          << InitPtrToString(d.getArg(0)) << "\";\n";
+  }
+
+};
+
+/// EmitActionHandlersCallback - Emit code that handles actions. Used by
+/// EmitGenerateActionMethod() as an argument to EmitCaseConstructHandler().
+class EmitActionHandlersCallback;
+
+typedef void (EmitActionHandlersCallback::* EmitActionHandlersCallbackHandler)
+(const DagInit&, unsigned, raw_ostream&) const;
+
+class EmitActionHandlersCallback :
+  public ActionHandlingCallbackBase,
+  public HandlerTable<EmitActionHandlersCallbackHandler>
+{
+  typedef EmitActionHandlersCallbackHandler Handler;
+
+  const OptionDescriptions& OptDescs;
+
+  /// EmitHookInvocation - Common code for hook invocation from actions. Used by
+  /// onAppendCmd and onOutputSuffix.
+  void EmitHookInvocation(const std::string& Str,
+                          const char* BlockOpen, const char* BlockClose,
+                          unsigned IndentLevel, raw_ostream& O) const
+  {
+    StrVector Out;
+    TokenizeCmdLine(Str, Out);
+
+    for (StrVector::const_iterator B = Out.begin(), E = Out.end();
+         B != E; ++B) {
+      const std::string& cmd = *B;
+
+      O.indent(IndentLevel) << BlockOpen;
+
+      if (cmd.at(0) == '$')
+        B = SubstituteSpecialCommands(B, E,  /* IsJoin = */ true, O);
+      else
+        O << '"' << cmd << '"';
+
+      O << BlockClose;
+    }
+  }
+
+  void onAppendCmd (const DagInit& Dag,
+                    unsigned IndentLevel, raw_ostream& O) const
+  {
+    CheckNumberOfArguments(Dag, 1);
+    this->EmitHookInvocation(InitPtrToString(Dag.getArg(0)),
+                             "vec.push_back(std::make_pair(65536, ", "));\n",
+                             IndentLevel, O);
+  }
+
+  void onForward (const DagInit& Dag,
+                  unsigned IndentLevel, raw_ostream& O) const
+  {
+    CheckNumberOfArguments(Dag, 1);
+    const std::string& Name = InitPtrToString(Dag.getArg(0));
+    EmitForwardOptionPropertyHandlingCode(OptDescs.FindOption(Name),
+                                          IndentLevel, "", O);
+  }
+
+  void onForwardAs (const DagInit& Dag,
+                    unsigned IndentLevel, raw_ostream& O) const
+  {
+    CheckNumberOfArguments(Dag, 2);
+    const std::string& Name = InitPtrToString(Dag.getArg(0));
+    const std::string& NewName = InitPtrToString(Dag.getArg(1));
+    EmitForwardOptionPropertyHandlingCode(OptDescs.FindOption(Name),
+                                          IndentLevel, NewName, O);
+  }
+
+  void onForwardValue (const DagInit& Dag,
+                       unsigned IndentLevel, raw_ostream& O) const
+  {
+    CheckNumberOfArguments(Dag, 1);
+    const std::string& Name = InitPtrToString(Dag.getArg(0));
+    const OptionDescription& D = OptDescs.FindParameterListOrParameter(Name);
+
+    if (D.isSwitchList()) {
+      throw std::runtime_error
+        ("forward_value is not allowed with switch_list");
+    }
+
+    if (D.isParameter()) {
+      O.indent(IndentLevel) << "vec.push_back(std::make_pair("
+                            << D.GenVariableName() << ".getPosition(), "
+                            << D.GenVariableName() << "));\n";
+    }
+    else {
+      O.indent(IndentLevel) << "for (" << D.GenTypeDeclaration()
+                            << "::iterator B = " << D.GenVariableName()
+                            << ".begin(), \n";
+      O.indent(IndentLevel + Indent1) << " E = " << D.GenVariableName()
+                                      << ".end(); B != E; ++B)\n";
+      O.indent(IndentLevel) << "{\n";
+      O.indent(IndentLevel + Indent1)
+        << "unsigned pos = " << D.GenVariableName()
+        << ".getPosition(B - " << D.GenVariableName()
+        << ".begin());\n";
+      O.indent(IndentLevel + Indent1)
+        << "vec.push_back(std::make_pair(pos, *B));\n";
+      O.indent(IndentLevel) << "}\n";
+    }
+  }
+
+  void onForwardTransformedValue (const DagInit& Dag,
+                                  unsigned IndentLevel, raw_ostream& O) const
+  {
+    CheckNumberOfArguments(Dag, 2);
+    const std::string& Name = InitPtrToString(Dag.getArg(0));
+    const std::string& Hook = InitPtrToString(Dag.getArg(1));
+    const OptionDescription& D = OptDescs.FindParameterListOrParameter(Name);
+
+    O.indent(IndentLevel) << "vec.push_back(std::make_pair("
+                          << D.GenVariableName() << ".getPosition("
+                          << (D.isList() ? "0" : "") << "), "
+                          << "hooks::" << Hook << "(" << D.GenVariableName()
+                          << (D.isParameter() ? ".c_str()" : "") << ")));\n";
+  }
+
+  void onNoOutFile (const DagInit& Dag,
+                    unsigned IndentLevel, raw_ostream& O) const
+  {
+    CheckNumberOfArguments(Dag, 0);
+    O.indent(IndentLevel) << "no_out_file = true;\n";
+  }
+
+  void onOutputSuffix (const DagInit& Dag,
+                       unsigned IndentLevel, raw_ostream& O) const
+  {
+    CheckNumberOfArguments(Dag, 1);
+    this->EmitHookInvocation(InitPtrToString(Dag.getArg(0)),
+                             "output_suffix = ", ";\n", IndentLevel, O);
+  }
+
+  void onStopCompilation (const DagInit& Dag,
+                          unsigned IndentLevel, raw_ostream& O) const
+  {
+    O.indent(IndentLevel) << "stop_compilation = true;\n";
+  }
+
+
+  void onUnpackValues (const DagInit& Dag,
+                       unsigned IndentLevel, raw_ostream& O) const
+  {
+    throw "'unpack_values' is deprecated. "
+      "Use 'comma_separated' + 'forward_value' instead!";
+  }
+
+ public:
+
+  explicit EmitActionHandlersCallback(const OptionDescriptions& OD)
+    : OptDescs(OD)
+  {
+    if (!staticMembersInitialized_) {
+      AddHandler("error", &EmitActionHandlersCallback::onErrorDag);
+      AddHandler("warning", &EmitActionHandlersCallback::onWarningDag);
+      AddHandler("append_cmd", &EmitActionHandlersCallback::onAppendCmd);
+      AddHandler("forward", &EmitActionHandlersCallback::onForward);
+      AddHandler("forward_as", &EmitActionHandlersCallback::onForwardAs);
+      AddHandler("forward_value", &EmitActionHandlersCallback::onForwardValue);
+      AddHandler("forward_transformed_value",
+                 &EmitActionHandlersCallback::onForwardTransformedValue);
+      AddHandler("no_out_file",
+                 &EmitActionHandlersCallback::onNoOutFile);
+      AddHandler("output_suffix", &EmitActionHandlersCallback::onOutputSuffix);
+      AddHandler("stop_compilation",
+                 &EmitActionHandlersCallback::onStopCompilation);
+      AddHandler("unpack_values",
+                 &EmitActionHandlersCallback::onUnpackValues);
+
+
+      staticMembersInitialized_ = true;
+    }
+  }
+
+  void operator()(const Init* I,
+                  unsigned IndentLevel, raw_ostream& O) const
+  {
+    InvokeDagInitHandler(this, I, IndentLevel, O);
+  }
+};
+
+void EmitGenerateActionMethodHeader(const ToolDescription& D,
+                                    bool IsJoin, bool Naked,
+                                    raw_ostream& O)
+{
+  O.indent(Indent1) << "int GenerateAction(Action& Out,\n";
+
+  if (IsJoin)
+    O.indent(Indent2) << "const PathVector& inFiles,\n";
+  else
+    O.indent(Indent2) << "const sys::Path& inFile,\n";
+
+  O.indent(Indent2) << "const bool HasChildren,\n";
+  O.indent(Indent2) << "const llvm::sys::Path& TempDir,\n";
+  O.indent(Indent2) << "const InputLanguagesSet& InLangs,\n";
+  O.indent(Indent2) << "const LanguageMap& LangMap) const\n";
+  O.indent(Indent1) << "{\n";
+
+  if (!Naked) {
+    O.indent(Indent2) << "std::string cmd;\n";
+    O.indent(Indent2) << "std::string out_file;\n";
+    O.indent(Indent2)
+      << "std::vector<std::pair<unsigned, std::string> > vec;\n";
+    O.indent(Indent2) << "bool stop_compilation = !HasChildren;\n";
+    O.indent(Indent2) << "bool no_out_file = false;\n";
+    O.indent(Indent2) << "std::string output_suffix(\""
+                      << D.OutputSuffix << "\");\n";
+  }
+}
+
+// EmitGenerateActionMethod - Emit either a normal or a "join" version of the
+// Tool::GenerateAction() method.
+void EmitGenerateActionMethod (const ToolDescription& D,
+                               const OptionDescriptions& OptDescs,
+                               bool IsJoin, raw_ostream& O) {
+
+  EmitGenerateActionMethodHeader(D, IsJoin, /* Naked = */ false, O);
+
+  if (!D.CmdLine)
+    throw "Tool " + D.Name + " has no cmd_line property!";
+
+  // Process the 'command' property.
+  O << '\n';
+  EmitCmdLineVecFill(D.CmdLine, D.Name, IsJoin, Indent2, O);
+  O << '\n';
+
+  // Process the 'actions' list of this tool.
+  if (D.Actions)
+    EmitCaseConstructHandler(D.Actions, Indent2,
+                             EmitActionHandlersCallback(OptDescs),
+                             false, OptDescs, O);
+  O << '\n';
+
+  // Input file (s)
+  if (!D.InFileOption.empty()) {
+    O.indent(Indent2)
+      << "vec.push_back(std::make_pair(InputFilenames.getPosition(0), \""
+      << D.InFileOption << "\");\n";
+  }
+
+  if (IsJoin) {
+    O.indent(Indent2)
+      << "for (PathVector::const_iterator B = inFiles.begin(),\n";
+    O.indent(Indent3) << "E = inFiles.end(); B != E; ++B)\n";
+    O.indent(Indent2) << "{\n";
+    O.indent(Indent3) << "vec.push_back(std::make_pair("
+                      << "InputFilenames.getPosition(B - inFiles.begin()), "
+                      << "B->str()));\n";
+    O.indent(Indent2) << "}\n";
+  }
+  else {
+    O.indent(Indent2) << "vec.push_back(std::make_pair("
+                      << "InputFilenames.getPosition(0), inFile.str()));\n";
+  }
+
+  // Output file
+  O.indent(Indent2) << "if (!no_out_file) {\n";
+  if (!D.OutFileOption.empty())
+    O.indent(Indent3) << "vec.push_back(std::make_pair(65536, \""
+                      << D.OutFileOption << "\"));\n";
+
+  O.indent(Indent3) << "out_file = this->OutFilename("
+                    << (IsJoin ? "sys::Path(),\n" : "inFile,\n");
+  O.indent(Indent4) <<
+    "TempDir, stop_compilation, output_suffix.c_str()).str();\n\n";
+  O.indent(Indent3) << "vec.push_back(std::make_pair(65536, out_file));\n";
+
+  O.indent(Indent2) << "}\n\n";
+
+  // Handle the Sink property.
+  std::string SinkOption("autogenerated::");
+  SinkOption += SinkOptionName;
+  if (D.isSink()) {
+    O.indent(Indent2) << "if (!" << SinkOption << ".empty()) {\n";
+    O.indent(Indent3) << "for (cl::list<std::string>::iterator B = "
+                      << SinkOption << ".begin(), E = " << SinkOption
+                      << ".end(); B != E; ++B)\n";
+    O.indent(Indent4) << "vec.push_back(std::make_pair(" << SinkOption
+                      << ".getPosition(B - " << SinkOption
+                      <<  ".begin()), *B));\n";
+    O.indent(Indent2) << "}\n";
+  }
+
+  O.indent(Indent2) << "Out.Construct(cmd, this->SortArgs(vec), "
+                    << "stop_compilation, out_file);\n";
+  O.indent(Indent2) << "return 0;\n";
+  O.indent(Indent1) << "}\n\n";
+}
+
+/// EmitGenerateActionMethods - Emit two GenerateAction() methods for
+/// a given Tool class.
+void EmitGenerateActionMethods (const ToolDescription& ToolDesc,
+                                const OptionDescriptions& OptDescs,
+                                raw_ostream& O) {
+  if (!ToolDesc.isJoin()) {
+    EmitGenerateActionMethodHeader(ToolDesc, /* IsJoin = */ true,
+                                   /* Naked = */ true, O);
+    O.indent(Indent2) << "PrintError(\"" << ToolDesc.Name
+                      << " is not a Join tool!\");\n";
+    O.indent(Indent2) << "return -1;\n";
+    O.indent(Indent1) << "}\n\n";
+  }
+  else {
+    EmitGenerateActionMethod(ToolDesc, OptDescs, true, O);
+  }
+
+  EmitGenerateActionMethod(ToolDesc, OptDescs, false, O);
+}
+
+/// EmitInOutLanguageMethods - Emit the [Input,Output]Language()
+/// methods for a given Tool class.
+void EmitInOutLanguageMethods (const ToolDescription& D, raw_ostream& O) {
+  O.indent(Indent1) << "const char** InputLanguages() const {\n";
+  O.indent(Indent2) << "return InputLanguages_;\n";
+  O.indent(Indent1) << "}\n\n";
+
+  O.indent(Indent1) << "const char** OutputLanguages() const {\n";
+  O.indent(Indent2) << "return OutputLanguages_;\n";
+  O.indent(Indent1) << "}\n\n";
+}
+
+/// EmitNameMethod - Emit the Name() method for a given Tool class.
+void EmitNameMethod (const ToolDescription& D, raw_ostream& O) {
+  O.indent(Indent1) << "const char* Name() const {\n";
+  O.indent(Indent2) << "return \"" << D.Name << "\";\n";
+  O.indent(Indent1) << "}\n\n";
+}
+
+/// EmitIsJoinMethod - Emit the IsJoin() method for a given Tool
+/// class.
+void EmitIsJoinMethod (const ToolDescription& D, raw_ostream& O) {
+  O.indent(Indent1) << "bool IsJoin() const {\n";
+  if (D.isJoin())
+    O.indent(Indent2) << "return true;\n";
+  else
+    O.indent(Indent2) << "return false;\n";
+  O.indent(Indent1) << "}\n\n";
+}
+
+/// EmitWorksOnEmptyCallback - Callback used by EmitWorksOnEmptyMethod in
+/// conjunction with EmitCaseConstructHandler.
+void EmitWorksOnEmptyCallback (const Init* Value,
+                               unsigned IndentLevel, raw_ostream& O) {
+  CheckBooleanConstant(Value);
+  O.indent(IndentLevel) << "return " << Value->getAsString() << ";\n";
+}
+
+/// EmitWorksOnEmptyMethod - Emit the WorksOnEmpty() method for a given Tool
+/// class.
+void EmitWorksOnEmptyMethod (const ToolDescription& D,
+                             const OptionDescriptions& OptDescs,
+                             raw_ostream& O)
+{
+  O.indent(Indent1) << "bool WorksOnEmpty() const {\n";
+  if (D.OnEmpty == 0)
+    O.indent(Indent2) << "return false;\n";
+  else
+    EmitCaseConstructHandler(D.OnEmpty, Indent2, EmitWorksOnEmptyCallback,
+                             /*EmitElseIf = */ true, OptDescs, O);
+  O.indent(Indent1) << "}\n\n";
+}
+
+/// EmitStrArray - Emit definition of a 'const char**' static member
+/// variable. Helper used by EmitStaticMemberDefinitions();
+void EmitStrArray(const std::string& Name, const std::string& VarName,
+                  const StrVector& StrVec, raw_ostream& O) {
+  O << "const char* " << Name << "::" << VarName << "[] = {";
+  for (StrVector::const_iterator B = StrVec.begin(), E = StrVec.end();
+       B != E; ++B)
+    O << '\"' << *B << "\", ";
+  O << "0};\n";
+}
+
+/// EmitStaticMemberDefinitions - Emit static member definitions for a
+/// given Tool class.
+void EmitStaticMemberDefinitions(const ToolDescription& D, raw_ostream& O) {
+  if (D.InLanguage.empty())
+    throw "Tool " + D.Name + " has no 'in_language' property!";
+  if (D.OutLanguage.empty())
+    throw "Tool " + D.Name + " has no 'out_language' property!";
+
+  EmitStrArray(D.Name, "InputLanguages_", D.InLanguage, O);
+  EmitStrArray(D.Name, "OutputLanguages_", D.OutLanguage, O);
+  O << '\n';
+}
+
+/// EmitToolClassDefinition - Emit a Tool class definition.
+void EmitToolClassDefinition (const ToolDescription& D,
+                              const OptionDescriptions& OptDescs,
+                              raw_ostream& O) {
+  if (D.Name == "root")
+    return;
+
+  // Header
+  O << "class " << D.Name << " : public ";
+  if (D.isJoin())
+    O << "JoinTool";
+  else
+    O << "Tool";
+
+  O << " {\nprivate:\n";
+  O.indent(Indent1) << "static const char* InputLanguages_[];\n";
+  O.indent(Indent1) << "static const char* OutputLanguages_[];\n\n";
+
+  O << "public:\n";
+  EmitNameMethod(D, O);
+  EmitInOutLanguageMethods(D, O);
+  EmitIsJoinMethod(D, O);
+  EmitWorksOnEmptyMethod(D, OptDescs, O);
+  EmitGenerateActionMethods(D, OptDescs, O);
+
+  // Close class definition
+  O << "};\n";
+
+  EmitStaticMemberDefinitions(D, O);
+
+}
+
+/// EmitOptionDefinitions - Iterate over a list of option descriptions
+/// and emit registration code.
+void EmitOptionDefinitions (const OptionDescriptions& descs,
+                            bool HasSink, raw_ostream& O)
+{
+  std::vector<OptionDescription> Aliases;
+
+  // Emit static cl::Option variables.
+  for (OptionDescriptions::const_iterator B = descs.begin(),
+         E = descs.end(); B!=E; ++B) {
+    const OptionDescription& val = B->second;
+
+    if (val.Type == OptionType::Alias) {
+      Aliases.push_back(val);
+      continue;
+    }
+
+    O << val.GenTypeDeclaration() << ' '
+      << val.GenPlainVariableName();
+
+    O << "(\"" << val.Name << "\"\n";
+
+    if (val.Type == OptionType::Prefix || val.Type == OptionType::PrefixList)
+      O << ", cl::Prefix";
+
+    if (val.isRequired()) {
+      if (val.isList() && !val.isMultiVal())
+        O << ", cl::OneOrMore";
+      else
+        O << ", cl::Required";
+    }
+
+    if (val.isOptional())
+        O << ", cl::Optional";
+
+    if (val.isOneOrMore())
+        O << ", cl::OneOrMore";
+
+    if (val.isZeroOrMore())
+        O << ", cl::ZeroOrMore";
+
+    if (val.isReallyHidden())
+      O << ", cl::ReallyHidden";
+    else if (val.isHidden())
+      O << ", cl::Hidden";
+
+    if (val.isCommaSeparated())
+      O << ", cl::CommaSeparated";
+
+    if (val.MultiVal > 1)
+      O << ", cl::multi_val(" << val.MultiVal << ')';
+
+    if (val.InitVal) {
+      const std::string& str = val.InitVal->getAsString();
+      O << ", cl::init(" << str << ')';
+    }
+
+    if (!val.Help.empty())
+      O << ", cl::desc(\"" << val.Help << "\")";
+
+    O << ");\n\n";
+  }
+
+  // Emit the aliases (they should go after all the 'proper' options).
+  for (std::vector<OptionDescription>::const_iterator
+         B = Aliases.begin(), E = Aliases.end(); B != E; ++B) {
+    const OptionDescription& val = *B;
+
+    O << val.GenTypeDeclaration() << ' '
+      << val.GenPlainVariableName()
+      << "(\"" << val.Name << '\"';
+
+    const OptionDescription& D = descs.FindOption(val.Help);
+    O << ", cl::aliasopt(" << D.GenVariableName() << ")";
+
+    O << ", cl::desc(\"" << "An alias for -" + val.Help  << "\"));\n";
+  }
+
+  // Emit the sink option.
+  if (HasSink)
+    O << "cl::list<std::string> " << SinkOptionName << "(cl::Sink);\n";
+
+  O << '\n';
+}
+
+/// EmitPreprocessOptionsCallback - Helper function passed to
+/// EmitCaseConstructHandler() by EmitPreprocessOptions().
+
+class EmitPreprocessOptionsCallback;
+
+typedef void
+(EmitPreprocessOptionsCallback::* EmitPreprocessOptionsCallbackHandler)
+(const DagInit&, unsigned, raw_ostream&) const;
+
+class EmitPreprocessOptionsCallback :
+  public ActionHandlingCallbackBase,
+  public HandlerTable<EmitPreprocessOptionsCallbackHandler>
+{
+  typedef EmitPreprocessOptionsCallbackHandler Handler;
+  typedef void
+  (EmitPreprocessOptionsCallback::* HandlerImpl)
+  (const Init*, unsigned, raw_ostream&) const;
+
+  const OptionDescriptions& OptDescs_;
+
+  void onEachArgument(const DagInit& d, HandlerImpl h,
+                      unsigned IndentLevel, raw_ostream& O) const
+  {
+    CheckNumberOfArguments(d, 1);
+
+    for (unsigned i = 0, NumArgs = d.getNumArgs(); i < NumArgs; ++i) {
+      ((this)->*(h))(d.getArg(i), IndentLevel, O);
+    }
+  }
+
+  void onUnsetOptionImpl(const Init* I,
+                         unsigned IndentLevel, raw_ostream& O) const
+  {
+    const std::string& OptName = InitPtrToString(I);
+    const OptionDescription& OptDesc = OptDescs_.FindOption(OptName);
+
+    if (OptDesc.isSwitch()) {
+      O.indent(IndentLevel) << OptDesc.GenVariableName() << " = false;\n";
+    }
+    else if (OptDesc.isParameter()) {
+      O.indent(IndentLevel) << OptDesc.GenVariableName() << " = \"\";\n";
+    }
+    else if (OptDesc.isList()) {
+      O.indent(IndentLevel) << OptDesc.GenVariableName() << ".clear();\n";
+    }
+    else {
+      throw "Can't apply 'unset_option' to alias option '" + OptName + "'!";
+    }
+  }
+
+  void onUnsetOption(const DagInit& d,
+                     unsigned IndentLevel, raw_ostream& O) const
+  {
+    this->onEachArgument(d, &EmitPreprocessOptionsCallback::onUnsetOptionImpl,
+                         IndentLevel, O);
+  }
+
+  void onSetOptionImpl(const DagInit& D,
+                       unsigned IndentLevel, raw_ostream& O) const {
+    CheckNumberOfArguments(D, 2);
+
+    const std::string& OptName = InitPtrToString(D.getArg(0));
+    const OptionDescription& OptDesc = OptDescs_.FindOption(OptName);
+    const Init* Value = D.getArg(1);
+
+    if (OptDesc.isList()) {
+      const ListInit& List = InitPtrToList(Value);
+
+      O.indent(IndentLevel) << OptDesc.GenVariableName() << ".clear();\n";
+      for (ListInit::const_iterator B = List.begin(), E = List.end();
+           B != E; ++B) {
+        const Init* CurElem = *B;
+        if (OptDesc.isSwitchList())
+          CheckBooleanConstant(CurElem);
+
+        O.indent(IndentLevel)
+          << OptDesc.GenVariableName() << ".push_back(\""
+          << (OptDesc.isSwitchList() ? CurElem->getAsString()
+              : InitPtrToString(CurElem))
+          << "\");\n";
+      }
+    }
+    else if (OptDesc.isSwitch()) {
+      CheckBooleanConstant(Value);
+      O.indent(IndentLevel) << OptDesc.GenVariableName()
+                            << " = " << Value->getAsString() << ";\n";
+    }
+    else if (OptDesc.isParameter()) {
+      const std::string& Str = InitPtrToString(Value);
+      O.indent(IndentLevel) << OptDesc.GenVariableName()
+                            << " = \"" << Str << "\";\n";
+    }
+    else {
+      throw "Can't apply 'set_option' to alias option '" + OptName + "'!";
+    }
+  }
+
+  void onSetSwitch(const Init* I,
+                   unsigned IndentLevel, raw_ostream& O) const {
+    const std::string& OptName = InitPtrToString(I);
+    const OptionDescription& OptDesc = OptDescs_.FindOption(OptName);
+
+    if (OptDesc.isSwitch())
+      O.indent(IndentLevel) << OptDesc.GenVariableName() << " = true;\n";
+    else
+      throw "set_option: -" + OptName + " is not a switch option!";
+  }
+
+  void onSetOption(const DagInit& d,
+                   unsigned IndentLevel, raw_ostream& O) const
+  {
+    CheckNumberOfArguments(d, 1);
+
+    // 2-argument form: (set_option "A", true), (set_option "B", "C"),
+    // (set_option "D", ["E", "F"])
+    if (d.getNumArgs() == 2) {
+      const OptionDescription& OptDesc =
+        OptDescs_.FindOption(InitPtrToString(d.getArg(0)));
+      const Init* Opt2 = d.getArg(1);
+
+      if (!OptDesc.isSwitch() || typeid(*Opt2) != typeid(StringInit)) {
+        this->onSetOptionImpl(d, IndentLevel, O);
+        return;
+      }
+    }
+
+    // Multiple argument form: (set_option "A"), (set_option "B", "C", "D")
+    this->onEachArgument(d, &EmitPreprocessOptionsCallback::onSetSwitch,
+                         IndentLevel, O);
+  }
+
+public:
+
+  EmitPreprocessOptionsCallback(const OptionDescriptions& OptDescs)
+  : OptDescs_(OptDescs)
+  {
+    if (!staticMembersInitialized_) {
+      AddHandler("error", &EmitPreprocessOptionsCallback::onErrorDag);
+      AddHandler("warning", &EmitPreprocessOptionsCallback::onWarningDag);
+      AddHandler("unset_option", &EmitPreprocessOptionsCallback::onUnsetOption);
+      AddHandler("set_option", &EmitPreprocessOptionsCallback::onSetOption);
+
+      staticMembersInitialized_ = true;
+    }
+  }
+
+  void operator()(const Init* I,
+                  unsigned IndentLevel, raw_ostream& O) const
+  {
+    InvokeDagInitHandler(this, I, IndentLevel, O);
+  }
+
+};
+
+/// EmitPreprocessOptions - Emit the PreprocessOptions() function.
+void EmitPreprocessOptions (const RecordKeeper& Records,
+                            const OptionDescriptions& OptDecs, raw_ostream& O)
+{
+  O << "int PreprocessOptions () {\n";
+
+  const RecordVector& OptionPreprocessors =
+    Records.getAllDerivedDefinitions("OptionPreprocessor");
+
+  for (RecordVector::const_iterator B = OptionPreprocessors.begin(),
+         E = OptionPreprocessors.end(); B!=E; ++B) {
+    DagInit* Case = (*B)->getValueAsDag("preprocessor");
+    EmitCaseConstructHandler(Case, Indent1,
+                             EmitPreprocessOptionsCallback(OptDecs),
+                             false, OptDecs, O);
+  }
+
+  O << '\n';
+  O.indent(Indent1) << "return 0;\n";
+  O << "}\n\n";
+}
+
+class DoEmitPopulateLanguageMap;
+typedef void (DoEmitPopulateLanguageMap::* DoEmitPopulateLanguageMapHandler)
+(const DagInit& D);
+
+class DoEmitPopulateLanguageMap
+: public HandlerTable<DoEmitPopulateLanguageMapHandler>
+{
+private:
+  raw_ostream& O_;
+
+public:
+
+  explicit DoEmitPopulateLanguageMap (raw_ostream& O) : O_(O) {
+    if (!staticMembersInitialized_) {
+      AddHandler("lang_to_suffixes",
+                 &DoEmitPopulateLanguageMap::onLangToSuffixes);
+
+      staticMembersInitialized_ = true;
+    }
+  }
+
+  void operator() (Init* I) {
+    InvokeDagInitHandler(this, I);
+  }
+
+private:
+
+  void onLangToSuffixes (const DagInit& d) {
+    CheckNumberOfArguments(d, 2);
+
+    const std::string& Lang = InitPtrToString(d.getArg(0));
+    Init* Suffixes = d.getArg(1);
+
+    // Second argument to lang_to_suffixes is either a single string...
+    if (typeid(*Suffixes) == typeid(StringInit)) {
+      O_.indent(Indent1) << "langMap[\"" << InitPtrToString(Suffixes)
+                         << "\"] = \"" << Lang << "\";\n";
+    }
+    // ...or a list of strings.
+    else {
+      const ListInit& Lst = InitPtrToList(Suffixes);
+      assert(Lst.size() != 0);
+      for (ListInit::const_iterator B = Lst.begin(), E = Lst.end();
+           B != E; ++B) {
+        O_.indent(Indent1) << "langMap[\"" << InitPtrToString(*B)
+                           << "\"] = \"" << Lang << "\";\n";
+      }
+    }
+  }
+
+};
+
+/// EmitPopulateLanguageMap - Emit the PopulateLanguageMap() function.
+void EmitPopulateLanguageMap (const RecordKeeper& Records, raw_ostream& O)
+{
+  O << "int PopulateLanguageMap (LanguageMap& langMap) {\n";
+
+  // For each LanguageMap:
+  const RecordVector& LangMaps =
+    Records.getAllDerivedDefinitions("LanguageMap");
+
+  // Call DoEmitPopulateLanguageMap.
+  for (RecordVector::const_iterator B = LangMaps.begin(),
+         E = LangMaps.end(); B!=E; ++B) {
+    ListInit* LangMap = (*B)->getValueAsListInit("map");
+    std::for_each(LangMap->begin(), LangMap->end(),
+                  DoEmitPopulateLanguageMap(O));
+  }
+
+  O << '\n';
+  O.indent(Indent1) << "return 0;\n";
+  O << "}\n\n";
+}
+
+/// EmitEdgePropertyHandlerCallback - Emits code that handles edge
+/// properties. Helper function passed to EmitCaseConstructHandler() by
+/// EmitEdgeClass().
+void EmitEdgePropertyHandlerCallback (const Init* i, unsigned IndentLevel,
+                                      raw_ostream& O) {
+  const DagInit& d = InitPtrToDag(i);
+  const std::string& OpName = GetOperatorName(d);
+
+  if (OpName == "inc_weight") {
+    O.indent(IndentLevel) << "ret += ";
+  }
+  else if (OpName == "error") {
+    CheckNumberOfArguments(d, 1);
+    O.indent(IndentLevel) << "PrintError(\""
+                          << InitPtrToString(d.getArg(0))
+                          << "\");\n";
+    O.indent(IndentLevel) << "return -1;\n";
+    return;
+  }
+  else {
+    throw "Unknown operator in edge properties list: '" + OpName + "'!"
+      "\nOnly 'inc_weight', 'dec_weight' and 'error' are allowed.";
+  }
+
+  if (d.getNumArgs() > 0)
+    O << InitPtrToInt(d.getArg(0)) << ";\n";
+  else
+    O << "2;\n";
+
+}
+
+/// EmitEdgeClass - Emit a single Edge# class.
+void EmitEdgeClass (unsigned N, const std::string& Target,
+                    const DagInit& Case, const OptionDescriptions& OptDescs,
+                    raw_ostream& O) {
+
+  // Class constructor.
+  O << "class Edge" << N << ": public Edge {\n"
+    << "public:\n";
+  O.indent(Indent1) << "Edge" << N << "() : Edge(\"" << Target
+                    << "\") {}\n\n";
+
+  // Function Weight().
+  O.indent(Indent1)
+    << "int Weight(const InputLanguagesSet& InLangs) const {\n";
+  O.indent(Indent2) << "unsigned ret = 0;\n";
+
+  // Handle the 'case' construct.
+  EmitCaseConstructHandler(&Case, Indent2, EmitEdgePropertyHandlerCallback,
+                           false, OptDescs, O);
+
+  O.indent(Indent2) << "return ret;\n";
+  O.indent(Indent1) << "}\n\n};\n\n";
+}
+
+/// EmitEdgeClasses - Emit Edge* classes that represent graph edges.
+void EmitEdgeClasses (const DagVector& EdgeVector,
+                      const OptionDescriptions& OptDescs,
+                      raw_ostream& O) {
+  int i = 0;
+  for (DagVector::const_iterator B = EdgeVector.begin(),
+         E = EdgeVector.end(); B != E; ++B) {
+    const DagInit& Edge = **B;
+    const std::string& Name = GetOperatorName(Edge);
+
+    if (Name == "optional_edge") {
+      assert(IsOptionalEdge(Edge));
+      const std::string& NodeB = InitPtrToString(Edge.getArg(1));
+
+      const DagInit& Weight = InitPtrToDag(Edge.getArg(2));
+      EmitEdgeClass(i, NodeB, Weight, OptDescs, O);
+    }
+    else if (Name != "edge") {
+      throw "Unknown edge class: '" + Name + "'!";
+    }
+
+    ++i;
+  }
+}
+
+/// EmitPopulateCompilationGraph - Emit the PopulateCompilationGraph() function.
+void EmitPopulateCompilationGraph (const DagVector& EdgeVector,
+                                   const ToolDescriptions& ToolDescs,
+                                   raw_ostream& O)
+{
+  O << "int PopulateCompilationGraph (CompilationGraph& G) {\n";
+
+  for (ToolDescriptions::const_iterator B = ToolDescs.begin(),
+         E = ToolDescs.end(); B != E; ++B)
+    O.indent(Indent1) << "G.insertNode(new " << (*B)->Name << "());\n";
+
+  O << '\n';
+
+  // Insert edges.
+
+  int i = 0;
+  for (DagVector::const_iterator B = EdgeVector.begin(),
+         E = EdgeVector.end(); B != E; ++B) {
+    const DagInit& Edge = **B;
+    const std::string& NodeA = InitPtrToString(Edge.getArg(0));
+    const std::string& NodeB = InitPtrToString(Edge.getArg(1));
+
+    O.indent(Indent1) << "if (int ret = G.insertEdge(\"" << NodeA << "\", ";
+
+    if (IsOptionalEdge(Edge))
+      O << "new Edge" << i << "()";
+    else
+      O << "new SimpleEdge(\"" << NodeB << "\")";
+
+    O << "))\n";
+    O.indent(Indent2) << "return ret;\n";
+
+    ++i;
+  }
+
+  O << '\n';
+  O.indent(Indent1) << "return 0;\n";
+  O << "}\n\n";
+}
+
+/// HookInfo - Information about the hook type and number of arguments.
+struct HookInfo {
+
+  // A hook can either have a single parameter of type std::vector<std::string>,
+  // or NumArgs parameters of type const char*.
+  enum HookType { ListHook, ArgHook };
+
+  HookType Type;
+  unsigned NumArgs;
+
+  HookInfo() : Type(ArgHook), NumArgs(1)
+  {}
+
+  HookInfo(HookType T) : Type(T), NumArgs(1)
+  {}
+
+  HookInfo(unsigned N) : Type(ArgHook), NumArgs(N)
+  {}
+};
+
+typedef llvm::StringMap<HookInfo> HookInfoMap;
+
+/// ExtractHookNames - Extract the hook names from all instances of
+/// $CALL(HookName) in the provided command line string/action. Helper
+/// function used by FillInHookNames().
+class ExtractHookNames {
+  HookInfoMap& HookNames_;
+  const OptionDescriptions& OptDescs_;
+public:
+  ExtractHookNames(HookInfoMap& HookNames, const OptionDescriptions& OptDescs)
+    : HookNames_(HookNames), OptDescs_(OptDescs)
+  {}
+
+  void onAction (const DagInit& Dag) {
+    const std::string& Name = GetOperatorName(Dag);
+
+    if (Name == "forward_transformed_value") {
+      CheckNumberOfArguments(Dag, 2);
+      const std::string& OptName = InitPtrToString(Dag.getArg(0));
+      const std::string& HookName = InitPtrToString(Dag.getArg(1));
+      const OptionDescription& D =
+        OptDescs_.FindParameterListOrParameter(OptName);
+
+      HookNames_[HookName] = HookInfo(D.isList() ? HookInfo::ListHook
+                                      : HookInfo::ArgHook);
+    }
+    else if (Name == "append_cmd" || Name == "output_suffix") {
+      CheckNumberOfArguments(Dag, 1);
+      this->onCmdLine(InitPtrToString(Dag.getArg(0)));
+    }
+  }
+
+  void onCmdLine(const std::string& Cmd) {
+    StrVector cmds;
+    TokenizeCmdLine(Cmd, cmds);
+
+    for (StrVector::const_iterator B = cmds.begin(), E = cmds.end();
+         B != E; ++B) {
+      const std::string& cmd = *B;
+
+      if (cmd == "$CALL") {
+        unsigned NumArgs = 0;
+        CheckedIncrement(B, E, "Syntax error in $CALL invocation!");
+        const std::string& HookName = *B;
+
+        if (HookName.at(0) == ')')
+          throw "$CALL invoked with no arguments!";
+
+        while (++B != E && B->at(0) != ')') {
+          ++NumArgs;
+        }
+
+        HookInfoMap::const_iterator H = HookNames_.find(HookName);
+
+        if (H != HookNames_.end() && H->second.NumArgs != NumArgs &&
+            H->second.Type != HookInfo::ArgHook)
+          throw "Overloading of hooks is not allowed. Overloaded hook: "
+            + HookName;
+        else
+          HookNames_[HookName] = HookInfo(NumArgs);
+      }
+    }
+  }
+
+  void operator()(const Init* Arg) {
+
+    // We're invoked on an action (either a dag or a dag list).
+    if (typeid(*Arg) == typeid(DagInit)) {
+      const DagInit& Dag = InitPtrToDag(Arg);
+      this->onAction(Dag);
+      return;
+    }
+    else if (typeid(*Arg) == typeid(ListInit)) {
+      const ListInit& List = InitPtrToList(Arg);
+      for (ListInit::const_iterator B = List.begin(), E = List.end(); B != E;
+           ++B) {
+        const DagInit& Dag = InitPtrToDag(*B);
+        this->onAction(Dag);
+      }
+      return;
+    }
+
+    // We're invoked on a command line string.
+    this->onCmdLine(InitPtrToString(Arg));
+  }
+
+  void operator()(const Init* Statement, unsigned) {
+    this->operator()(Statement);
+  }
+};
+
+/// FillInHookNames - Actually extract the hook names from all command
+/// line strings. Helper function used by EmitHookDeclarations().
+void FillInHookNames(const ToolDescriptions& ToolDescs,
+                     const OptionDescriptions& OptDescs,
+                     HookInfoMap& HookNames)
+{
+  // For all tool descriptions:
+  for (ToolDescriptions::const_iterator B = ToolDescs.begin(),
+         E = ToolDescs.end(); B != E; ++B) {
+    const ToolDescription& D = *(*B);
+
+    // Look for 'forward_transformed_value' in 'actions'.
+    if (D.Actions)
+      WalkCase(D.Actions, Id(), ExtractHookNames(HookNames, OptDescs));
+
+    // Look for hook invocations in 'cmd_line'.
+    if (!D.CmdLine)
+      continue;
+    if (dynamic_cast<StringInit*>(D.CmdLine))
+      // This is a string.
+      ExtractHookNames(HookNames, OptDescs).operator()(D.CmdLine);
+    else
+      // This is a 'case' construct.
+      WalkCase(D.CmdLine, Id(), ExtractHookNames(HookNames, OptDescs));
+  }
+}
+
+/// EmitHookDeclarations - Parse CmdLine fields of all the tool
+/// property records and emit hook function declaration for each
+/// instance of $CALL(HookName).
+void EmitHookDeclarations(const ToolDescriptions& ToolDescs,
+                          const OptionDescriptions& OptDescs, raw_ostream& O) {
+  HookInfoMap HookNames;
+
+  FillInHookNames(ToolDescs, OptDescs, HookNames);
+  if (HookNames.empty())
+    return;
+
+  for (HookInfoMap::const_iterator B = HookNames.begin(),
+         E = HookNames.end(); B != E; ++B) {
+    const char* HookName = B->first();
+    const HookInfo& Info = B->second;
+
+    O.indent(Indent1) << "std::string " << HookName << "(";
+
+    if (Info.Type == HookInfo::ArgHook) {
+      for (unsigned i = 0, j = Info.NumArgs; i < j; ++i) {
+        O << "const char* Arg" << i << (i+1 == j ? "" : ", ");
+      }
+    }
+    else {
+      O << "const std::vector<std::string>& Arg";
+    }
+
+    O <<");\n";
+  }
+}
+
+/// EmitIncludes - Emit necessary #include directives and some
+/// additional declarations.
+void EmitIncludes(raw_ostream& O) {
+  O << "#include \"llvm/CompilerDriver/BuiltinOptions.h\"\n"
+    << "#include \"llvm/CompilerDriver/CompilationGraph.h\"\n"
+    << "#include \"llvm/CompilerDriver/Error.h\"\n"
+    << "#include \"llvm/CompilerDriver/Tool.h\"\n\n"
+
+    << "#include \"llvm/Support/CommandLine.h\"\n"
+    << "#include \"llvm/Support/raw_ostream.h\"\n\n"
+
+    << "#include <algorithm>\n"
+    << "#include <cstdlib>\n"
+    << "#include <iterator>\n"
+    << "#include <stdexcept>\n\n"
+
+    << "using namespace llvm;\n"
+    << "using namespace llvmc;\n\n"
+
+    << "inline const char* checkCString(const char* s)\n"
+    << "{ return s == NULL ? \"\" : s; }\n\n";
+}
+
+
+/// DriverData - Holds all information about the driver.
+struct DriverData {
+  OptionDescriptions OptDescs;
+  ToolDescriptions ToolDescs;
+  DagVector Edges;
+  bool HasSink;
+};
+
+/// HasSink - Go through the list of tool descriptions and check if
+/// there are any with the 'sink' property set.
+bool HasSink(const ToolDescriptions& ToolDescs) {
+  for (ToolDescriptions::const_iterator B = ToolDescs.begin(),
+         E = ToolDescs.end(); B != E; ++B)
+    if ((*B)->isSink())
+      return true;
+
+  return false;
+}
+
+/// CollectDriverData - Collect compilation graph edges, tool properties and
+/// option properties from the parse tree.
+void CollectDriverData (const RecordKeeper& Records, DriverData& Data) {
+  // Collect option properties.
+  const RecordVector& OptionLists =
+    Records.getAllDerivedDefinitions("OptionList");
+  CollectOptionDescriptions(OptionLists, Data.OptDescs);
+
+  // Collect tool properties.
+  const RecordVector& Tools = Records.getAllDerivedDefinitions("Tool");
+  CollectToolDescriptions(Tools, Data.ToolDescs);
+  Data.HasSink = HasSink(Data.ToolDescs);
+
+  // Collect compilation graph edges.
+  const RecordVector& CompilationGraphs =
+    Records.getAllDerivedDefinitions("CompilationGraph");
+  FillInEdgeVector(CompilationGraphs, Data.Edges);
+}
+
+/// CheckDriverData - Perform some sanity checks on the collected data.
+void CheckDriverData(DriverData& Data) {
+  // Filter out all tools not mentioned in the compilation graph.
+  FilterNotInGraph(Data.Edges, Data.ToolDescs);
+
+  // Typecheck the compilation graph.
+  TypecheckGraph(Data.Edges, Data.ToolDescs);
+
+  // Check that there are no options without side effects (specified
+  // only in the OptionList).
+  CheckForSuperfluousOptions(Data.Edges, Data.ToolDescs, Data.OptDescs);
+}
+
+void EmitDriverCode(const DriverData& Data, 
+                    raw_ostream& O, RecordKeeper &Records) {
+  // Emit file header.
+  EmitIncludes(O);
+
+  // Emit global option registration code.
+  O << "namespace llvmc {\n"
+    << "namespace autogenerated {\n\n";
+  EmitOptionDefinitions(Data.OptDescs, Data.HasSink, O);
+  O << "} // End namespace autogenerated.\n"
+    << "} // End namespace llvmc.\n\n";
+
+  // Emit hook declarations.
+  O << "namespace hooks {\n";
+  EmitHookDeclarations(Data.ToolDescs, Data.OptDescs, O);
+  O << "} // End namespace hooks.\n\n";
+
+  O << "namespace {\n\n";
+  O << "using namespace llvmc::autogenerated;\n\n";
+
+  // Emit Tool classes.
+  for (ToolDescriptions::const_iterator B = Data.ToolDescs.begin(),
+         E = Data.ToolDescs.end(); B!=E; ++B)
+    EmitToolClassDefinition(*(*B), Data.OptDescs, O);
+
+  // Emit Edge# classes.
+  EmitEdgeClasses(Data.Edges, Data.OptDescs, O);
+
+  O << "} // End anonymous namespace.\n\n";
+
+  O << "namespace llvmc {\n";
+  O << "namespace autogenerated {\n\n";
+
+  // Emit PreprocessOptions() function.
+  EmitPreprocessOptions(Records, Data.OptDescs, O);
+
+  // Emit PopulateLanguageMap() function
+  // (language map maps from file extensions to language names).
+  EmitPopulateLanguageMap(Records, O);
+
+  // Emit PopulateCompilationGraph() function.
+  EmitPopulateCompilationGraph(Data.Edges, Data.ToolDescs, O);
+
+  O << "} // End namespace autogenerated.\n";
+  O << "} // End namespace llvmc.\n\n";
+
+  // EOF
+}
+
+
+// End of anonymous namespace
+}
+
+/// run - The back-end entry point.
+void LLVMCConfigurationEmitter::run (raw_ostream &O) {
+  try {
+    DriverData Data;
+
+    CollectDriverData(Records, Data);
+    CheckDriverData(Data);
+
+    this->EmitSourceFileHeader("llvmc-based driver: auto-generated code", O);
+    EmitDriverCode(Data, O, Records);
+
+  } catch (std::exception& Error) {
+    throw Error.what() + std::string(" - usually this means a syntax error.");
+  }
+}
diff --git a/final/utils/TableGen/LLVMCConfigurationEmitter.h b/final/utils/TableGen/LLVMCConfigurationEmitter.h
new file mode 100644
index 00000000000..0f2ff371967
--- /dev/null
+++ b/final/utils/TableGen/LLVMCConfigurationEmitter.h
@@ -0,0 +1,34 @@
+//===- LLVMCConfigurationEmitter.cpp - Generate LLVMCC config ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting LLVMCC configuration code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_LLVMCCONF_EMITTER_H
+#define LLVM_UTILS_TABLEGEN_LLVMCCONF_EMITTER_H
+
+#include "TableGenBackend.h"
+
+namespace llvm {
+
+  /// LLVMCConfigurationEmitter - TableGen backend that generates
+  /// configuration code for LLVMC.
+  class LLVMCConfigurationEmitter : public TableGenBackend {
+    RecordKeeper &Records;
+  public:
+    explicit LLVMCConfigurationEmitter(RecordKeeper &records) : 
+      Records(records) {}
+
+    // run - Output the asmwriter, returning true on failure.
+    void run(raw_ostream &o);
+  };
+}
+
+#endif //LLVM_UTILS_TABLEGEN_LLVMCCONF_EMITTER_H
diff --git a/final/utils/TableGen/Makefile b/final/utils/TableGen/Makefile
new file mode 100644
index 00000000000..c01b6602faa
--- /dev/null
+++ b/final/utils/TableGen/Makefile
@@ -0,0 +1,20 @@
+##===- utils/TableGen/Makefile -----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = tblgen
+USEDLIBS = LLVMSupport.a
+REQUIRES_EH := 1
+REQUIRES_RTTI := 1
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/utils/TableGen/NeonEmitter.cpp b/final/utils/TableGen/NeonEmitter.cpp
new file mode 100644
index 00000000000..64224d9e51d
--- /dev/null
+++ b/final/utils/TableGen/NeonEmitter.cpp
@@ -0,0 +1,1522 @@
+//===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting arm_neon.h, which includes
+// a declaration and definition of each function specified by the ARM NEON
+// compiler interface.  See ARM document DUI0348B.
+//
+// Each NEON instruction is implemented in terms of 1 or more functions which
+// are suffixed with the element type of the input vectors.  Functions may be
+// implemented in terms of generic vector operations such as +, *, -, etc. or
+// by calling a __builtin_-prefixed function which will be handled by clang's
+// CodeGen library.
+//
+// Additional validation code can be generated by this file when runHeader() is
+// called, rather than the normal run() entry point.  A complete set of tests
+// for Neon intrinsics can be generated by calling the runTests() entry point.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NeonEmitter.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include <string>
+
+using namespace llvm;
+
+/// ParseTypes - break down a string such as "fQf" into a vector of StringRefs,
+/// which each StringRef representing a single type declared in the string.
+/// for "fQf" we would end up with 2 StringRefs, "f", and "Qf", representing
+/// 2xfloat and 4xfloat respectively.
+static void ParseTypes(Record *r, std::string &s,
+                       SmallVectorImpl<StringRef> &TV) {
+  const char *data = s.data();
+  int len = 0;
+
+  for (unsigned i = 0, e = s.size(); i != e; ++i, ++len) {
+    if (data[len] == 'P' || data[len] == 'Q' || data[len] == 'U')
+      continue;
+
+    switch (data[len]) {
+      case 'c':
+      case 's':
+      case 'i':
+      case 'l':
+      case 'h':
+      case 'f':
+        break;
+      default:
+        throw TGError(r->getLoc(),
+                      "Unexpected letter: " + std::string(data + len, 1));
+        break;
+    }
+    TV.push_back(StringRef(data, len + 1));
+    data += len + 1;
+    len = -1;
+  }
+}
+
+/// Widen - Convert a type code into the next wider type.  char -> short,
+/// short -> int, etc.
+static char Widen(const char t) {
+  switch (t) {
+    case 'c':
+      return 's';
+    case 's':
+      return 'i';
+    case 'i':
+      return 'l';
+    case 'h':
+      return 'f';
+    default: throw "unhandled type in widen!";
+  }
+  return '\0';
+}
+
+/// Narrow - Convert a type code into the next smaller type.  short -> char,
+/// float -> half float, etc.
+static char Narrow(const char t) {
+  switch (t) {
+    case 's':
+      return 'c';
+    case 'i':
+      return 's';
+    case 'l':
+      return 'i';
+    case 'f':
+      return 'h';
+    default: throw "unhandled type in narrow!";
+  }
+  return '\0';
+}
+
+/// For a particular StringRef, return the base type code, and whether it has
+/// the quad-vector, polynomial, or unsigned modifiers set.
+static char ClassifyType(StringRef ty, bool &quad, bool &poly, bool &usgn) {
+  unsigned off = 0;
+
+  // remember quad.
+  if (ty[off] == 'Q') {
+    quad = true;
+    ++off;
+  }
+
+  // remember poly.
+  if (ty[off] == 'P') {
+    poly = true;
+    ++off;
+  }
+
+  // remember unsigned.
+  if (ty[off] == 'U') {
+    usgn = true;
+    ++off;
+  }
+
+  // base type to get the type string for.
+  return ty[off];
+}
+
+/// ModType - Transform a type code and its modifiers based on a mod code. The
+/// mod code definitions may be found at the top of arm_neon.td.
+static char ModType(const char mod, char type, bool &quad, bool &poly,
+                    bool &usgn, bool &scal, bool &cnst, bool &pntr) {
+  switch (mod) {
+    case 't':
+      if (poly) {
+        poly = false;
+        usgn = true;
+      }
+      break;
+    case 'u':
+      usgn = true;
+      poly = false;
+      if (type == 'f')
+        type = 'i';
+      break;
+    case 'x':
+      usgn = false;
+      poly = false;
+      if (type == 'f')
+        type = 'i';
+      break;
+    case 'f':
+      if (type == 'h')
+        quad = true;
+      type = 'f';
+      usgn = false;
+      break;
+    case 'g':
+      quad = false;
+      break;
+    case 'w':
+      type = Widen(type);
+      quad = true;
+      break;
+    case 'n':
+      type = Widen(type);
+      break;
+    case 'i':
+      type = 'i';
+      scal = true;
+      break;
+    case 'l':
+      type = 'l';
+      scal = true;
+      usgn = true;
+      break;
+    case 's':
+    case 'a':
+      scal = true;
+      break;
+    case 'k':
+      quad = true;
+      break;
+    case 'c':
+      cnst = true;
+    case 'p':
+      pntr = true;
+      scal = true;
+      break;
+    case 'h':
+      type = Narrow(type);
+      if (type == 'h')
+        quad = false;
+      break;
+    case 'e':
+      type = Narrow(type);
+      usgn = true;
+      break;
+    default:
+      break;
+  }
+  return type;
+}
+
+/// TypeString - for a modifier and type, generate the name of the typedef for
+/// that type.  QUc -> uint8x8_t.
+static std::string TypeString(const char mod, StringRef typestr) {
+  bool quad = false;
+  bool poly = false;
+  bool usgn = false;
+  bool scal = false;
+  bool cnst = false;
+  bool pntr = false;
+
+  if (mod == 'v')
+    return "void";
+  if (mod == 'i')
+    return "int";
+
+  // base type to get the type string for.
+  char type = ClassifyType(typestr, quad, poly, usgn);
+
+  // Based on the modifying character, change the type and width if necessary.
+  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
+
+  SmallString<128> s;
+
+  if (usgn)
+    s.push_back('u');
+
+  switch (type) {
+    case 'c':
+      s += poly ? "poly8" : "int8";
+      if (scal)
+        break;
+      s += quad ? "x16" : "x8";
+      break;
+    case 's':
+      s += poly ? "poly16" : "int16";
+      if (scal)
+        break;
+      s += quad ? "x8" : "x4";
+      break;
+    case 'i':
+      s += "int32";
+      if (scal)
+        break;
+      s += quad ? "x4" : "x2";
+      break;
+    case 'l':
+      s += "int64";
+      if (scal)
+        break;
+      s += quad ? "x2" : "x1";
+      break;
+    case 'h':
+      s += "float16";
+      if (scal)
+        break;
+      s += quad ? "x8" : "x4";
+      break;
+    case 'f':
+      s += "float32";
+      if (scal)
+        break;
+      s += quad ? "x4" : "x2";
+      break;
+    default:
+      throw "unhandled type!";
+      break;
+  }
+
+  if (mod == '2')
+    s += "x2";
+  if (mod == '3')
+    s += "x3";
+  if (mod == '4')
+    s += "x4";
+
+  // Append _t, finishing the type string typedef type.
+  s += "_t";
+
+  if (cnst)
+    s += " const";
+
+  if (pntr)
+    s += " *";
+
+  return s.str();
+}
+
+/// BuiltinTypeString - for a modifier and type, generate the clang
+/// BuiltinsARM.def prototype code for the function.  See the top of clang's
+/// Builtins.def for a description of the type strings.
+static std::string BuiltinTypeString(const char mod, StringRef typestr,
+                                     ClassKind ck, bool ret) {
+  bool quad = false;
+  bool poly = false;
+  bool usgn = false;
+  bool scal = false;
+  bool cnst = false;
+  bool pntr = false;
+
+  if (mod == 'v')
+    return "v"; // void
+  if (mod == 'i')
+    return "i"; // int
+
+  // base type to get the type string for.
+  char type = ClassifyType(typestr, quad, poly, usgn);
+
+  // Based on the modifying character, change the type and width if necessary.
+  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
+
+  // All pointers are void* pointers.  Change type to 'v' now.
+  if (pntr) {
+    usgn = false;
+    poly = false;
+    type = 'v';
+  }
+  // Treat half-float ('h') types as unsigned short ('s') types.
+  if (type == 'h') {
+    type = 's';
+    usgn = true;
+  }
+  usgn = usgn | poly | ((ck == ClassI || ck == ClassW) && scal && type != 'f');
+
+  if (scal) {
+    SmallString<128> s;
+
+    if (usgn)
+      s.push_back('U');
+    else if (type == 'c')
+      s.push_back('S'); // make chars explicitly signed
+
+    if (type == 'l') // 64-bit long
+      s += "LLi";
+    else
+      s.push_back(type);
+
+    if (cnst)
+      s.push_back('C');
+    if (pntr)
+      s.push_back('*');
+    return s.str();
+  }
+
+  // Since the return value must be one type, return a vector type of the
+  // appropriate width which we will bitcast.  An exception is made for
+  // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
+  // fashion, storing them to a pointer arg.
+  if (ret) {
+    if (mod >= '2' && mod <= '4')
+      return "vv*"; // void result with void* first argument
+    if (mod == 'f' || (ck != ClassB && type == 'f'))
+      return quad ? "V4f" : "V2f";
+    if (ck != ClassB && type == 's')
+      return quad ? "V8s" : "V4s";
+    if (ck != ClassB && type == 'i')
+      return quad ? "V4i" : "V2i";
+    if (ck != ClassB && type == 'l')
+      return quad ? "V2LLi" : "V1LLi";
+
+    return quad ? "V16Sc" : "V8Sc";
+  }
+
+  // Non-return array types are passed as individual vectors.
+  if (mod == '2')
+    return quad ? "V16ScV16Sc" : "V8ScV8Sc";
+  if (mod == '3')
+    return quad ? "V16ScV16ScV16Sc" : "V8ScV8ScV8Sc";
+  if (mod == '4')
+    return quad ? "V16ScV16ScV16ScV16Sc" : "V8ScV8ScV8ScV8Sc";
+
+  if (mod == 'f' || (ck != ClassB && type == 'f'))
+    return quad ? "V4f" : "V2f";
+  if (ck != ClassB && type == 's')
+    return quad ? "V8s" : "V4s";
+  if (ck != ClassB && type == 'i')
+    return quad ? "V4i" : "V2i";
+  if (ck != ClassB && type == 'l')
+    return quad ? "V2LLi" : "V1LLi";
+
+  return quad ? "V16Sc" : "V8Sc";
+}
+
+/// MangleName - Append a type or width suffix to a base neon function name,
+/// and insert a 'q' in the appropriate location if the operation works on
+/// 128b rather than 64b.   E.g. turn "vst2_lane" into "vst2q_lane_f32", etc.
+static std::string MangleName(const std::string &name, StringRef typestr,
+                              ClassKind ck) {
+  if (name == "vcvt_f32_f16")
+    return name;
+
+  bool quad = false;
+  bool poly = false;
+  bool usgn = false;
+  char type = ClassifyType(typestr, quad, poly, usgn);
+
+  std::string s = name;
+
+  switch (type) {
+  case 'c':
+    switch (ck) {
+    case ClassS: s += poly ? "_p8" : usgn ? "_u8" : "_s8"; break;
+    case ClassI: s += "_i8"; break;
+    case ClassW: s += "_8"; break;
+    default: break;
+    }
+    break;
+  case 's':
+    switch (ck) {
+    case ClassS: s += poly ? "_p16" : usgn ? "_u16" : "_s16"; break;
+    case ClassI: s += "_i16"; break;
+    case ClassW: s += "_16"; break;
+    default: break;
+    }
+    break;
+  case 'i':
+    switch (ck) {
+    case ClassS: s += usgn ? "_u32" : "_s32"; break;
+    case ClassI: s += "_i32"; break;
+    case ClassW: s += "_32"; break;
+    default: break;
+    }
+    break;
+  case 'l':
+    switch (ck) {
+    case ClassS: s += usgn ? "_u64" : "_s64"; break;
+    case ClassI: s += "_i64"; break;
+    case ClassW: s += "_64"; break;
+    default: break;
+    }
+    break;
+  case 'h':
+    switch (ck) {
+    case ClassS:
+    case ClassI: s += "_f16"; break;
+    case ClassW: s += "_16"; break;
+    default: break;
+    }
+    break;
+  case 'f':
+    switch (ck) {
+    case ClassS:
+    case ClassI: s += "_f32"; break;
+    case ClassW: s += "_32"; break;
+    default: break;
+    }
+    break;
+  default:
+    throw "unhandled type!";
+    break;
+  }
+  if (ck == ClassB)
+    s += "_v";
+
+  // Insert a 'q' before the first '_' character so that it ends up before
+  // _lane or _n on vector-scalar operations.
+  if (quad) {
+    size_t pos = s.find('_');
+    s = s.insert(pos, "q");
+  }
+  return s;
+}
+
+// Generate the string "(argtype a, argtype b, ...)"
+static std::string GenArgs(const std::string &proto, StringRef typestr) {
+  bool define = proto.find('i') != std::string::npos;
+  char arg = 'a';
+
+  std::string s;
+  s += "(";
+
+  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
+    if (define) {
+      // Immediate macro arguments are used directly instead of being assigned
+      // to local temporaries; prepend an underscore prefix to make their
+      // names consistent with the local temporaries.
+      if (proto[i] == 'i')
+        s += "__";
+    } else {
+      s += TypeString(proto[i], typestr) + " __";
+    }
+    s.push_back(arg);
+    if ((i + 1) < e)
+      s += ", ";
+  }
+
+  s += ")";
+  return s;
+}
+
+// Macro arguments are not type-checked like inline function arguments, so
+// assign them to local temporaries to get the right type checking.
+static std::string GenMacroLocals(const std::string &proto, StringRef typestr) {
+  char arg = 'a';
+  std::string s;
+
+  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
+    // Do not create a temporary for an immediate argument.
+    // That would defeat the whole point of using a macro!
+    if (proto[i] == 'i') continue;
+
+    s += TypeString(proto[i], typestr) + " __";
+    s.push_back(arg);
+    s += " = (";
+    s.push_back(arg);
+    s += "); ";
+  }
+
+  s += "\\\n  ";
+  return s;
+}
+
+// Use the vmovl builtin to sign-extend or zero-extend a vector.
+static std::string Extend(StringRef typestr, const std::string &a) {
+  std::string s;
+  s = MangleName("vmovl", typestr, ClassS);
+  s += "(" + a + ")";
+  return s;
+}
+
+static std::string Duplicate(unsigned nElts, StringRef typestr,
+                             const std::string &a) {
+  std::string s;
+
+  s = "(" + TypeString('d', typestr) + "){ ";
+  for (unsigned i = 0; i != nElts; ++i) {
+    s += a;
+    if ((i + 1) < nElts)
+      s += ", ";
+  }
+  s += " }";
+
+  return s;
+}
+
+static std::string SplatLane(unsigned nElts, const std::string &vec,
+                             const std::string &lane) {
+  std::string s = "__builtin_shufflevector(" + vec + ", " + vec;
+  for (unsigned i = 0; i < nElts; ++i)
+    s += ", " + lane;
+  s += ")";
+  return s;
+}
+
+static unsigned GetNumElements(StringRef typestr, bool &quad) {
+  quad = false;
+  bool dummy = false;
+  char type = ClassifyType(typestr, quad, dummy, dummy);
+  unsigned nElts = 0;
+  switch (type) {
+  case 'c': nElts = 8; break;
+  case 's': nElts = 4; break;
+  case 'i': nElts = 2; break;
+  case 'l': nElts = 1; break;
+  case 'h': nElts = 4; break;
+  case 'f': nElts = 2; break;
+  default:
+    throw "unhandled type!";
+    break;
+  }
+  if (quad) nElts <<= 1;
+  return nElts;
+}
+
+// Generate the definition for this intrinsic, e.g. "a + b" for OpAdd.
+static std::string GenOpString(OpKind op, const std::string &proto,
+                               StringRef typestr) {
+  bool quad;
+  unsigned nElts = GetNumElements(typestr, quad);
+
+  // If this builtin takes an immediate argument, we need to #define it rather
+  // than use a standard declaration, so that SemaChecking can range check
+  // the immediate passed by the user.
+  bool define = proto.find('i') != std::string::npos;
+
+  std::string ts = TypeString(proto[0], typestr);
+  std::string s;
+  if (!define) {
+    s = "return ";
+  }
+
+  switch(op) {
+  case OpAdd:
+    s += "__a + __b;";
+    break;
+  case OpAddl:
+    s += Extend(typestr, "__a") + " + " + Extend(typestr, "__b") + ";";
+    break;
+  case OpAddw:
+    s += "__a + " + Extend(typestr, "__b") + ";";
+    break;
+  case OpSub:
+    s += "__a - __b;";
+    break;
+  case OpSubl:
+    s += Extend(typestr, "__a") + " - " + Extend(typestr, "__b") + ";";
+    break;
+  case OpSubw:
+    s += "__a - " + Extend(typestr, "__b") + ";";
+    break;
+  case OpMulN:
+    s += "__a * " + Duplicate(nElts, typestr, "__b") + ";";
+    break;
+  case OpMulLane:
+    s += "__a * " + SplatLane(nElts, "__b", "__c") + ";";
+    break;
+  case OpMul:
+    s += "__a * __b;";
+    break;
+  case OpMullN:
+    s += Extend(typestr, "__a") + " * " +
+      Extend(typestr, Duplicate(nElts << (int)quad, typestr, "__b")) + ";";
+    break;
+  case OpMullLane:
+    s += Extend(typestr, "__a") + " * " +
+      Extend(typestr, SplatLane(nElts, "__b", "__c")) + ";";
+    break;
+  case OpMull:
+    s += Extend(typestr, "__a") + " * " + Extend(typestr, "__b") + ";";
+    break;
+  case OpMlaN:
+    s += "__a + (__b * " + Duplicate(nElts, typestr, "__c") + ");";
+    break;
+  case OpMlaLane:
+    s += "__a + (__b * " + SplatLane(nElts, "__c", "__d") + ");";
+    break;
+  case OpMla:
+    s += "__a + (__b * __c);";
+    break;
+  case OpMlalN:
+    s += "__a + (" + Extend(typestr, "__b") + " * " +
+      Extend(typestr, Duplicate(nElts, typestr, "__c")) + ");";
+    break;
+  case OpMlalLane:
+    s += "__a + (" + Extend(typestr, "__b") + " * " +
+      Extend(typestr, SplatLane(nElts, "__c", "__d")) + ");";
+    break;
+  case OpMlal:
+    s += "__a + (" + Extend(typestr, "__b") + " * " +
+      Extend(typestr, "__c") + ");";
+    break;
+  case OpMlsN:
+    s += "__a - (__b * " + Duplicate(nElts, typestr, "__c") + ");";
+    break;
+  case OpMlsLane:
+    s += "__a - (__b * " + SplatLane(nElts, "__c", "__d") + ");";
+    break;
+  case OpMls:
+    s += "__a - (__b * __c);";
+    break;
+  case OpMlslN:
+    s += "__a - (" + Extend(typestr, "__b") + " * " +
+      Extend(typestr, Duplicate(nElts, typestr, "__c")) + ");";
+    break;
+  case OpMlslLane:
+    s += "__a - (" + Extend(typestr, "__b") + " * " +
+      Extend(typestr, SplatLane(nElts, "__c", "__d")) + ");";
+    break;
+  case OpMlsl:
+    s += "__a - (" + Extend(typestr, "__b") + " * " +
+      Extend(typestr, "__c") + ");";
+    break;
+  case OpQDMullLane:
+    s += MangleName("vqdmull", typestr, ClassS) + "(__a, " +
+      SplatLane(nElts, "__b", "__c") + ");";
+    break;
+  case OpQDMlalLane:
+    s += MangleName("vqdmlal", typestr, ClassS) + "(__a, __b, " +
+      SplatLane(nElts, "__c", "__d") + ");";
+    break;
+  case OpQDMlslLane:
+    s += MangleName("vqdmlsl", typestr, ClassS) + "(__a, __b, " +
+      SplatLane(nElts, "__c", "__d") + ");";
+    break;
+  case OpQDMulhLane:
+    s += MangleName("vqdmulh", typestr, ClassS) + "(__a, " +
+      SplatLane(nElts, "__b", "__c") + ");";
+    break;
+  case OpQRDMulhLane:
+    s += MangleName("vqrdmulh", typestr, ClassS) + "(__a, " +
+      SplatLane(nElts, "__b", "__c") + ");";
+    break;
+  case OpEq:
+    s += "(" + ts + ")(__a == __b);";
+    break;
+  case OpGe:
+    s += "(" + ts + ")(__a >= __b);";
+    break;
+  case OpLe:
+    s += "(" + ts + ")(__a <= __b);";
+    break;
+  case OpGt:
+    s += "(" + ts + ")(__a > __b);";
+    break;
+  case OpLt:
+    s += "(" + ts + ")(__a < __b);";
+    break;
+  case OpNeg:
+    s += " -__a;";
+    break;
+  case OpNot:
+    s += " ~__a;";
+    break;
+  case OpAnd:
+    s += "__a & __b;";
+    break;
+  case OpOr:
+    s += "__a | __b;";
+    break;
+  case OpXor:
+    s += "__a ^ __b;";
+    break;
+  case OpAndNot:
+    s += "__a & ~__b;";
+    break;
+  case OpOrNot:
+    s += "__a | ~__b;";
+    break;
+  case OpCast:
+    s += "(" + ts + ")__a;";
+    break;
+  case OpConcat:
+    s += "(" + ts + ")__builtin_shufflevector((int64x1_t)__a";
+    s += ", (int64x1_t)__b, 0, 1);";
+    break;
+  case OpHi:
+    s += "(" + ts +
+      ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 1);";
+    break;
+  case OpLo:
+    s += "(" + ts +
+      ")__builtin_shufflevector((int64x2_t)__a, (int64x2_t)__a, 0);";
+    break;
+  case OpDup:
+    s += Duplicate(nElts, typestr, "__a") + ";";
+    break;
+  case OpDupLane:
+    s += SplatLane(nElts, "__a", "__b") + ";";
+    break;
+  case OpSelect:
+    // ((0 & 1) | (~0 & 2))
+    s += "(" + ts + ")";
+    ts = TypeString(proto[1], typestr);
+    s += "((__a & (" + ts + ")__b) | ";
+    s += "(~__a & (" + ts + ")__c));";
+    break;
+  case OpRev16:
+    s += "__builtin_shufflevector(__a, __a";
+    for (unsigned i = 2; i <= nElts; i += 2)
+      for (unsigned j = 0; j != 2; ++j)
+        s += ", " + utostr(i - j - 1);
+    s += ");";
+    break;
+  case OpRev32: {
+    unsigned WordElts = nElts >> (1 + (int)quad);
+    s += "__builtin_shufflevector(__a, __a";
+    for (unsigned i = WordElts; i <= nElts; i += WordElts)
+      for (unsigned j = 0; j != WordElts; ++j)
+        s += ", " + utostr(i - j - 1);
+    s += ");";
+    break;
+  }
+  case OpRev64: {
+    unsigned DblWordElts = nElts >> (int)quad;
+    s += "__builtin_shufflevector(__a, __a";
+    for (unsigned i = DblWordElts; i <= nElts; i += DblWordElts)
+      for (unsigned j = 0; j != DblWordElts; ++j)
+        s += ", " + utostr(i - j - 1);
+    s += ");";
+    break;
+  }
+  case OpAbdl: {
+    std::string abd = MangleName("vabd", typestr, ClassS) + "(__a, __b)";
+    if (typestr[0] != 'U') {
+      // vabd results are always unsigned and must be zero-extended.
+      std::string utype = "U" + typestr.str();
+      s += "(" + TypeString(proto[0], typestr) + ")";
+      abd = "(" + TypeString('d', utype) + ")" + abd;
+      s += Extend(utype, abd) + ";";
+    } else {
+      s += Extend(typestr, abd) + ";";
+    }
+    break;
+  }
+  case OpAba:
+    s += "__a + " + MangleName("vabd", typestr, ClassS) + "(__b, __c);";
+    break;
+  case OpAbal: {
+    s += "__a + ";
+    std::string abd = MangleName("vabd", typestr, ClassS) + "(__b, __c)";
+    if (typestr[0] != 'U') {
+      // vabd results are always unsigned and must be zero-extended.
+      std::string utype = "U" + typestr.str();
+      s += "(" + TypeString(proto[0], typestr) + ")";
+      abd = "(" + TypeString('d', utype) + ")" + abd;
+      s += Extend(utype, abd) + ";";
+    } else {
+      s += Extend(typestr, abd) + ";";
+    }
+    break;
+  }
+  default:
+    throw "unknown OpKind!";
+    break;
+  }
+  return s;
+}
+
+static unsigned GetNeonEnum(const std::string &proto, StringRef typestr) {
+  unsigned mod = proto[0];
+  unsigned ret = 0;
+
+  if (mod == 'v' || mod == 'f')
+    mod = proto[1];
+
+  bool quad = false;
+  bool poly = false;
+  bool usgn = false;
+  bool scal = false;
+  bool cnst = false;
+  bool pntr = false;
+
+  // Base type to get the type string for.
+  char type = ClassifyType(typestr, quad, poly, usgn);
+
+  // Based on the modifying character, change the type and width if necessary.
+  type = ModType(mod, type, quad, poly, usgn, scal, cnst, pntr);
+
+  if (usgn)
+    ret |= 0x08;
+  if (quad && proto[1] != 'g')
+    ret |= 0x10;
+
+  switch (type) {
+    case 'c':
+      ret |= poly ? 5 : 0;
+      break;
+    case 's':
+      ret |= poly ? 6 : 1;
+      break;
+    case 'i':
+      ret |= 2;
+      break;
+    case 'l':
+      ret |= 3;
+      break;
+    case 'h':
+      ret |= 7;
+      break;
+    case 'f':
+      ret |= 4;
+      break;
+    default:
+      throw "unhandled type!";
+      break;
+  }
+  return ret;
+}
+
+// Generate the definition for this intrinsic, e.g. __builtin_neon_cls(a)
+static std::string GenBuiltin(const std::string &name, const std::string &proto,
+                              StringRef typestr, ClassKind ck) {
+  std::string s;
+
+  // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
+  // sret-like argument.
+  bool sret = (proto[0] >= '2' && proto[0] <= '4');
+
+  // If this builtin takes an immediate argument, we need to #define it rather
+  // than use a standard declaration, so that SemaChecking can range check
+  // the immediate passed by the user.
+  bool define = proto.find('i') != std::string::npos;
+
+  // Check if the prototype has a scalar operand with the type of the vector
+  // elements.  If not, bitcasting the args will take care of arg checking.
+  // The actual signedness etc. will be taken care of with special enums.
+  if (proto.find('s') == std::string::npos)
+    ck = ClassB;
+
+  if (proto[0] != 'v') {
+    std::string ts = TypeString(proto[0], typestr);
+
+    if (define) {
+      if (sret)
+        s += ts + " r; ";
+      else
+        s += "(" + ts + ")";
+    } else if (sret) {
+      s += ts + " r; ";
+    } else {
+      s += "return (" + ts + ")";
+    }
+  }
+
+  bool splat = proto.find('a') != std::string::npos;
+
+  s += "__builtin_neon_";
+  if (splat) {
+    // Call the non-splat builtin: chop off the "_n" suffix from the name.
+    std::string vname(name, 0, name.size()-2);
+    s += MangleName(vname, typestr, ck);
+  } else {
+    s += MangleName(name, typestr, ck);
+  }
+  s += "(";
+
+  // Pass the address of the return variable as the first argument to sret-like
+  // builtins.
+  if (sret)
+    s += "&r, ";
+
+  char arg = 'a';
+  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
+    std::string args = std::string(&arg, 1);
+
+    // Use the local temporaries instead of the macro arguments.
+    args = "__" + args;
+
+    bool argQuad = false;
+    bool argPoly = false;
+    bool argUsgn = false;
+    bool argScalar = false;
+    bool dummy = false;
+    char argType = ClassifyType(typestr, argQuad, argPoly, argUsgn);
+    argType = ModType(proto[i], argType, argQuad, argPoly, argUsgn, argScalar,
+                      dummy, dummy);
+
+    // Handle multiple-vector values specially, emitting each subvector as an
+    // argument to the __builtin.
+    if (proto[i] >= '2' && proto[i] <= '4') {
+      // Check if an explicit cast is needed.
+      if (argType != 'c' || argPoly || argUsgn)
+        args = (argQuad ? "(int8x16_t)" : "(int8x8_t)") + args;
+
+      for (unsigned vi = 0, ve = proto[i] - '0'; vi != ve; ++vi) {
+        s += args + ".val[" + utostr(vi) + "]";
+        if ((vi + 1) < ve)
+          s += ", ";
+      }
+      if ((i + 1) < e)
+        s += ", ";
+
+      continue;
+    }
+
+    if (splat && (i + 1) == e)
+      args = Duplicate(GetNumElements(typestr, argQuad), typestr, args);
+
+    // Check if an explicit cast is needed.
+    if ((splat || !argScalar) &&
+        ((ck == ClassB && argType != 'c') || argPoly || argUsgn)) {
+      std::string argTypeStr = "c";
+      if (ck != ClassB)
+        argTypeStr = argType;
+      if (argQuad)
+        argTypeStr = "Q" + argTypeStr;
+      args = "(" + TypeString('d', argTypeStr) + ")" + args;
+    }
+
+    s += args;
+    if ((i + 1) < e)
+      s += ", ";
+  }
+
+  // Extra constant integer to hold type class enum for this function, e.g. s8
+  if (ck == ClassB)
+    s += ", " + utostr(GetNeonEnum(proto, typestr));
+
+  s += ");";
+
+  if (proto[0] != 'v' && sret) {
+    if (define)
+      s += " r;";
+    else
+      s += " return r;";
+  }
+  return s;
+}
+
+static std::string GenBuiltinDef(const std::string &name,
+                                 const std::string &proto,
+                                 StringRef typestr, ClassKind ck) {
+  std::string s("BUILTIN(__builtin_neon_");
+
+  // If all types are the same size, bitcasting the args will take care
+  // of arg checking.  The actual signedness etc. will be taken care of with
+  // special enums.
+  if (proto.find('s') == std::string::npos)
+    ck = ClassB;
+
+  s += MangleName(name, typestr, ck);
+  s += ", \"";
+
+  for (unsigned i = 0, e = proto.size(); i != e; ++i)
+    s += BuiltinTypeString(proto[i], typestr, ck, i == 0);
+
+  // Extra constant integer to hold type class enum for this function, e.g. s8
+  if (ck == ClassB)
+    s += "i";
+
+  s += "\", \"n\")";
+  return s;
+}
+
+static std::string GenIntrinsic(const std::string &name,
+                                const std::string &proto,
+                                StringRef outTypeStr, StringRef inTypeStr,
+                                OpKind kind, ClassKind classKind) {
+  assert(!proto.empty() && "");
+  bool define = proto.find('i') != std::string::npos;
+  std::string s;
+
+  // static always inline + return type
+  if (define)
+    s += "#define ";
+  else
+    s += "__ai " + TypeString(proto[0], outTypeStr) + " ";
+
+  // Function name with type suffix
+  std::string mangledName = MangleName(name, outTypeStr, ClassS);
+  if (outTypeStr != inTypeStr) {
+    // If the input type is different (e.g., for vreinterpret), append a suffix
+    // for the input type.  String off a "Q" (quad) prefix so that MangleName
+    // does not insert another "q" in the name.
+    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
+    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
+    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
+  }
+  s += mangledName;
+
+  // Function arguments
+  s += GenArgs(proto, inTypeStr);
+
+  // Definition.
+  if (define) {
+    s += " __extension__ ({ \\\n  ";
+    s += GenMacroLocals(proto, inTypeStr);
+  } else {
+    s += " { \\\n  ";
+  }
+
+  if (kind != OpNone)
+    s += GenOpString(kind, proto, outTypeStr);
+  else
+    s += GenBuiltin(name, proto, outTypeStr, classKind);
+  if (define)
+    s += " })";
+  else
+    s += " }";
+  s += "\n";
+  return s;
+}
+
+/// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
+/// is comprised of type definitions and function declarations.
+void NeonEmitter::run(raw_ostream &OS) {
+  OS << 
+    "/*===---- arm_neon.h - ARM Neon intrinsics ------------------------------"
+    "---===\n"
+    " *\n"
+    " * Permission is hereby granted, free of charge, to any person obtaining "
+    "a copy\n"
+    " * of this software and associated documentation files (the \"Software\"),"
+    " to deal\n"
+    " * in the Software without restriction, including without limitation the "
+    "rights\n"
+    " * to use, copy, modify, merge, publish, distribute, sublicense, "
+    "and/or sell\n"
+    " * copies of the Software, and to permit persons to whom the Software is\n"
+    " * furnished to do so, subject to the following conditions:\n"
+    " *\n"
+    " * The above copyright notice and this permission notice shall be "
+    "included in\n"
+    " * all copies or substantial portions of the Software.\n"
+    " *\n"
+    " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
+    "EXPRESS OR\n"
+    " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
+    "MERCHANTABILITY,\n"
+    " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
+    "SHALL THE\n"
+    " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
+    "OTHER\n"
+    " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
+    "ARISING FROM,\n"
+    " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
+    "DEALINGS IN\n"
+    " * THE SOFTWARE.\n"
+    " *\n"
+    " *===--------------------------------------------------------------------"
+    "---===\n"
+    " */\n\n";
+
+  OS << "#ifndef __ARM_NEON_H\n";
+  OS << "#define __ARM_NEON_H\n\n";
+
+  OS << "#ifndef __ARM_NEON__\n";
+  OS << "#error \"NEON support not enabled\"\n";
+  OS << "#endif\n\n";
+
+  OS << "#include <stdint.h>\n\n";
+
+  // Emit NEON-specific scalar typedefs.
+  OS << "typedef float float32_t;\n";
+  OS << "typedef int8_t poly8_t;\n";
+  OS << "typedef int16_t poly16_t;\n";
+  OS << "typedef uint16_t float16_t;\n";
+
+  // Emit Neon vector typedefs.
+  std::string TypedefTypes("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfPcQPcPsQPs");
+  SmallVector<StringRef, 24> TDTypeVec;
+  ParseTypes(0, TypedefTypes, TDTypeVec);
+
+  // Emit vector typedefs.
+  for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
+    bool dummy, quad = false, poly = false;
+    (void) ClassifyType(TDTypeVec[i], quad, poly, dummy);
+    if (poly)
+      OS << "typedef __attribute__((neon_polyvector_type(";
+    else
+      OS << "typedef __attribute__((neon_vector_type(";
+
+    unsigned nElts = GetNumElements(TDTypeVec[i], quad);
+    OS << utostr(nElts) << "))) ";
+    if (nElts < 10)
+      OS << " ";
+
+    OS << TypeString('s', TDTypeVec[i]);
+    OS << " " << TypeString('d', TDTypeVec[i]) << ";\n";
+  }
+  OS << "\n";
+
+  // Emit struct typedefs.
+  for (unsigned vi = 2; vi != 5; ++vi) {
+    for (unsigned i = 0, e = TDTypeVec.size(); i != e; ++i) {
+      std::string ts = TypeString('d', TDTypeVec[i]);
+      std::string vs = TypeString('0' + vi, TDTypeVec[i]);
+      OS << "typedef struct " << vs << " {\n";
+      OS << "  " << ts << " val";
+      OS << "[" << utostr(vi) << "]";
+      OS << ";\n} ";
+      OS << vs << ";\n\n";
+    }
+  }
+
+  OS << "#define __ai static __attribute__((__always_inline__))\n\n";
+
+  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
+
+  // Emit vmovl and vabd intrinsics first so they can be used by other
+  // intrinsics.  (Some of the saturating multiply instructions are also
+  // used to implement the corresponding "_lane" variants, but tablegen
+  // sorts the records into alphabetical order so that the "_lane" variants
+  // come after the intrinsics they use.)
+  emitIntrinsic(OS, Records.getDef("VMOVL"));
+  emitIntrinsic(OS, Records.getDef("VABD"));
+
+  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
+    Record *R = RV[i];
+    if (R->getName() != "VMOVL" && R->getName() != "VABD")
+      emitIntrinsic(OS, R);
+  }
+
+  OS << "#undef __ai\n\n";
+  OS << "#endif /* __ARM_NEON_H */\n";
+}
+
+/// emitIntrinsic - Write out the arm_neon.h header file definitions for the
+/// intrinsics specified by record R.
+void NeonEmitter::emitIntrinsic(raw_ostream &OS, Record *R) {
+  std::string name = R->getValueAsString("Name");
+  std::string Proto = R->getValueAsString("Prototype");
+  std::string Types = R->getValueAsString("Types");
+
+  SmallVector<StringRef, 16> TypeVec;
+  ParseTypes(R, Types, TypeVec);
+
+  OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
+
+  ClassKind classKind = ClassNone;
+  if (R->getSuperClasses().size() >= 2)
+    classKind = ClassMap[R->getSuperClasses()[1]];
+  if (classKind == ClassNone && kind == OpNone)
+    throw TGError(R->getLoc(), "Builtin has no class kind");
+
+  for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
+    if (kind == OpReinterpret) {
+      bool outQuad = false;
+      bool dummy = false;
+      (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
+      for (unsigned srcti = 0, srcte = TypeVec.size();
+           srcti != srcte; ++srcti) {
+        bool inQuad = false;
+        (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
+        if (srcti == ti || inQuad != outQuad)
+          continue;
+        OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[srcti],
+                           OpCast, ClassS);
+      }
+    } else {
+      OS << GenIntrinsic(name, Proto, TypeVec[ti], TypeVec[ti],
+                         kind, classKind);
+    }
+  }
+  OS << "\n";
+}
+
+static unsigned RangeFromType(const char mod, StringRef typestr) {
+  // base type to get the type string for.
+  bool quad = false, dummy = false;
+  char type = ClassifyType(typestr, quad, dummy, dummy);
+  type = ModType(mod, type, quad, dummy, dummy, dummy, dummy, dummy);
+
+  switch (type) {
+    case 'c':
+      return (8 << (int)quad) - 1;
+    case 'h':
+    case 's':
+      return (4 << (int)quad) - 1;
+    case 'f':
+    case 'i':
+      return (2 << (int)quad) - 1;
+    case 'l':
+      return (1 << (int)quad) - 1;
+    default:
+      throw "unhandled type!";
+      break;
+  }
+  assert(0 && "unreachable");
+  return 0;
+}
+
+/// runHeader - Emit a file with sections defining:
+/// 1. the NEON section of BuiltinsARM.def.
+/// 2. the SemaChecking code for the type overload checking.
+/// 3. the SemaChecking code for validation of intrinsic immedate arguments.
+void NeonEmitter::runHeader(raw_ostream &OS) {
+  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
+
+  StringMap<OpKind> EmittedMap;
+
+  // Generate BuiltinsARM.def for NEON
+  OS << "#ifdef GET_NEON_BUILTINS\n";
+  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
+    Record *R = RV[i];
+    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
+    if (k != OpNone)
+      continue;
+
+    std::string Proto = R->getValueAsString("Prototype");
+
+    // Functions with 'a' (the splat code) in the type prototype should not get
+    // their own builtin as they use the non-splat variant.
+    if (Proto.find('a') != std::string::npos)
+      continue;
+
+    std::string Types = R->getValueAsString("Types");
+    SmallVector<StringRef, 16> TypeVec;
+    ParseTypes(R, Types, TypeVec);
+
+    if (R->getSuperClasses().size() < 2)
+      throw TGError(R->getLoc(), "Builtin has no class kind");
+
+    std::string name = R->getValueAsString("Name");
+    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
+
+    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
+      // Generate the BuiltinsARM.def declaration for this builtin, ensuring
+      // that each unique BUILTIN() macro appears only once in the output
+      // stream.
+      std::string bd = GenBuiltinDef(name, Proto, TypeVec[ti], ck);
+      if (EmittedMap.count(bd))
+        continue;
+
+      EmittedMap[bd] = OpNone;
+      OS << bd << "\n";
+    }
+  }
+  OS << "#endif\n\n";
+
+  // Generate the overloaded type checking code for SemaChecking.cpp
+  OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
+  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
+    Record *R = RV[i];
+    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
+    if (k != OpNone)
+      continue;
+
+    std::string Proto = R->getValueAsString("Prototype");
+    std::string Types = R->getValueAsString("Types");
+    std::string name = R->getValueAsString("Name");
+
+    // Functions with 'a' (the splat code) in the type prototype should not get
+    // their own builtin as they use the non-splat variant.
+    if (Proto.find('a') != std::string::npos)
+      continue;
+
+    // Functions which have a scalar argument cannot be overloaded, no need to
+    // check them if we are emitting the type checking code.
+    if (Proto.find('s') != std::string::npos)
+      continue;
+
+    SmallVector<StringRef, 16> TypeVec;
+    ParseTypes(R, Types, TypeVec);
+
+    if (R->getSuperClasses().size() < 2)
+      throw TGError(R->getLoc(), "Builtin has no class kind");
+
+    int si = -1, qi = -1;
+    unsigned mask = 0, qmask = 0;
+    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
+      // Generate the switch case(s) for this builtin for the type validation.
+      bool quad = false, poly = false, usgn = false;
+      (void) ClassifyType(TypeVec[ti], quad, poly, usgn);
+
+      if (quad) {
+        qi = ti;
+        qmask |= 1 << GetNeonEnum(Proto, TypeVec[ti]);
+      } else {
+        si = ti;
+        mask |= 1 << GetNeonEnum(Proto, TypeVec[ti]);
+      }
+    }
+    if (mask)
+      OS << "case ARM::BI__builtin_neon_"
+         << MangleName(name, TypeVec[si], ClassB)
+         << ": mask = " << "0x" << utohexstr(mask) << "; break;\n";
+    if (qmask)
+      OS << "case ARM::BI__builtin_neon_"
+         << MangleName(name, TypeVec[qi], ClassB)
+         << ": mask = " << "0x" << utohexstr(qmask) << "; break;\n";
+  }
+  OS << "#endif\n\n";
+
+  // Generate the intrinsic range checking code for shift/lane immediates.
+  OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
+  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
+    Record *R = RV[i];
+
+    OpKind k = OpMap[R->getValueAsDef("Operand")->getName()];
+    if (k != OpNone)
+      continue;
+
+    std::string name = R->getValueAsString("Name");
+    std::string Proto = R->getValueAsString("Prototype");
+    std::string Types = R->getValueAsString("Types");
+
+    // Functions with 'a' (the splat code) in the type prototype should not get
+    // their own builtin as they use the non-splat variant.
+    if (Proto.find('a') != std::string::npos)
+      continue;
+
+    // Functions which do not have an immediate do not need to have range
+    // checking code emitted.
+    size_t immPos = Proto.find('i');
+    if (immPos == std::string::npos)
+      continue;
+
+    SmallVector<StringRef, 16> TypeVec;
+    ParseTypes(R, Types, TypeVec);
+
+    if (R->getSuperClasses().size() < 2)
+      throw TGError(R->getLoc(), "Builtin has no class kind");
+
+    ClassKind ck = ClassMap[R->getSuperClasses()[1]];
+
+    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
+      std::string namestr, shiftstr, rangestr;
+
+      // Builtins which are overloaded by type will need to have their upper
+      // bound computed at Sema time based on the type constant.
+      if (Proto.find('s') == std::string::npos) {
+        ck = ClassB;
+        if (R->getValueAsBit("isShift")) {
+          shiftstr = ", true";
+
+          // Right shifts have an 'r' in the name, left shifts do not.
+          if (name.find('r') != std::string::npos)
+            rangestr = "l = 1; ";
+        }
+        rangestr += "u = RFT(TV" + shiftstr + ")";
+      } else {
+        // The immediate generally refers to a lane in the preceding argument.
+        assert(immPos > 0 && "unexpected immediate operand");
+        rangestr = "u = " + utostr(RangeFromType(Proto[immPos-1], TypeVec[ti]));
+      }
+      // Make sure cases appear only once by uniquing them in a string map.
+      namestr = MangleName(name, TypeVec[ti], ck);
+      if (EmittedMap.count(namestr))
+        continue;
+      EmittedMap[namestr] = OpNone;
+
+      // Calculate the index of the immediate that should be range checked.
+      unsigned immidx = 0;
+
+      // Builtins that return a struct of multiple vectors have an extra
+      // leading arg for the struct return.
+      if (Proto[0] >= '2' && Proto[0] <= '4')
+        ++immidx;
+
+      // Add one to the index for each argument until we reach the immediate
+      // to be checked.  Structs of vectors are passed as multiple arguments.
+      for (unsigned ii = 1, ie = Proto.size(); ii != ie; ++ii) {
+        switch (Proto[ii]) {
+          default:  immidx += 1; break;
+          case '2': immidx += 2; break;
+          case '3': immidx += 3; break;
+          case '4': immidx += 4; break;
+          case 'i': ie = ii + 1; break;
+        }
+      }
+      OS << "case ARM::BI__builtin_neon_" << MangleName(name, TypeVec[ti], ck)
+         << ": i = " << immidx << "; " << rangestr << "; break;\n";
+    }
+  }
+  OS << "#endif\n\n";
+}
+
+/// GenTest - Write out a test for the intrinsic specified by the name and
+/// type strings, including the embedded patterns for FileCheck to match.
+static std::string GenTest(const std::string &name,
+                           const std::string &proto,
+                           StringRef outTypeStr, StringRef inTypeStr,
+                           bool isShift) {
+  assert(!proto.empty() && "");
+  std::string s;
+
+  // Function name with type suffix
+  std::string mangledName = MangleName(name, outTypeStr, ClassS);
+  if (outTypeStr != inTypeStr) {
+    // If the input type is different (e.g., for vreinterpret), append a suffix
+    // for the input type.  String off a "Q" (quad) prefix so that MangleName
+    // does not insert another "q" in the name.
+    unsigned typeStrOff = (inTypeStr[0] == 'Q' ? 1 : 0);
+    StringRef inTypeNoQuad = inTypeStr.substr(typeStrOff);
+    mangledName = MangleName(mangledName, inTypeNoQuad, ClassS);
+  }
+
+  // Emit the FileCheck patterns.
+  s += "// CHECK: test_" + mangledName + "\n";
+  // s += "// CHECK: \n"; // FIXME: + expected instruction opcode.
+
+  // Emit the start of the test function.
+  s += TypeString(proto[0], outTypeStr) + " test_" + mangledName + "(";
+  char arg = 'a';
+  std::string comma;
+  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
+    // Do not create arguments for values that must be immediate constants.
+    if (proto[i] == 'i')
+      continue;
+    s += comma + TypeString(proto[i], inTypeStr) + " ";
+    s.push_back(arg);
+    comma = ", ";
+  }
+  s += ") { \\\n  ";
+
+  if (proto[0] != 'v')
+    s += "return ";
+  s += mangledName + "(";
+  arg = 'a';
+  for (unsigned i = 1, e = proto.size(); i != e; ++i, ++arg) {
+    if (proto[i] == 'i') {
+      // For immediate operands, test the maximum value.
+      if (isShift)
+        s += "1"; // FIXME
+      else
+        // The immediate generally refers to a lane in the preceding argument.
+        s += utostr(RangeFromType(proto[i-1], inTypeStr));
+    } else {
+      s.push_back(arg);
+    }
+    if ((i + 1) < e)
+      s += ", ";
+  }
+  s += ");\n}\n\n";
+  return s;
+}
+
+/// runTests - Write out a complete set of tests for all of the Neon
+/// intrinsics.
+void NeonEmitter::runTests(raw_ostream &OS) {
+  OS <<
+    "// RUN: %clang_cc1 -triple thumbv7-apple-darwin \\\n"
+    "// RUN:  -target-cpu cortex-a9 -ffreestanding -S -o - %s | FileCheck %s\n"
+    "\n"
+    "#include <arm_neon.h>\n"
+    "\n";
+
+  std::vector<Record*> RV = Records.getAllDerivedDefinitions("Inst");
+  for (unsigned i = 0, e = RV.size(); i != e; ++i) {
+    Record *R = RV[i];
+    std::string name = R->getValueAsString("Name");
+    std::string Proto = R->getValueAsString("Prototype");
+    std::string Types = R->getValueAsString("Types");
+    bool isShift = R->getValueAsBit("isShift");
+
+    SmallVector<StringRef, 16> TypeVec;
+    ParseTypes(R, Types, TypeVec);
+
+    OpKind kind = OpMap[R->getValueAsDef("Operand")->getName()];
+    for (unsigned ti = 0, te = TypeVec.size(); ti != te; ++ti) {
+      if (kind == OpReinterpret) {
+        bool outQuad = false;
+        bool dummy = false;
+        (void)ClassifyType(TypeVec[ti], outQuad, dummy, dummy);
+        for (unsigned srcti = 0, srcte = TypeVec.size();
+             srcti != srcte; ++srcti) {
+          bool inQuad = false;
+          (void)ClassifyType(TypeVec[srcti], inQuad, dummy, dummy);
+          if (srcti == ti || inQuad != outQuad)
+            continue;
+          OS << GenTest(name, Proto, TypeVec[ti], TypeVec[srcti], isShift);
+        }
+      } else {
+        OS << GenTest(name, Proto, TypeVec[ti], TypeVec[ti], isShift);
+      }
+    }
+    OS << "\n";
+  }
+}
+
diff --git a/final/utils/TableGen/NeonEmitter.h b/final/utils/TableGen/NeonEmitter.h
new file mode 100644
index 00000000000..1e6fcbf555d
--- /dev/null
+++ b/final/utils/TableGen/NeonEmitter.h
@@ -0,0 +1,180 @@
+//===- NeonEmitter.h - Generate arm_neon.h for use with clang ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting arm_neon.h, which includes
+// a declaration and definition of each function specified by the ARM NEON
+// compiler interface.  See ARM document DUI0348B.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NEON_EMITTER_H
+#define NEON_EMITTER_H
+
+#include "Record.h"
+#include "TableGenBackend.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+
+enum OpKind {
+  OpNone,
+  OpAdd,
+  OpAddl,
+  OpAddw,
+  OpSub,
+  OpSubl,
+  OpSubw,
+  OpMul,
+  OpMull,
+  OpMla,
+  OpMlal,
+  OpMls,
+  OpMlsl,
+  OpMulN,
+  OpMullN,
+  OpMlaN,
+  OpMlsN,
+  OpMlalN,
+  OpMlslN,
+  OpMulLane,
+  OpMullLane,
+  OpMlaLane,
+  OpMlsLane,
+  OpMlalLane,
+  OpMlslLane,
+  OpQDMullLane,
+  OpQDMlalLane,
+  OpQDMlslLane,
+  OpQDMulhLane,
+  OpQRDMulhLane,
+  OpEq,
+  OpGe,
+  OpLe,
+  OpGt,
+  OpLt,
+  OpNeg,
+  OpNot,
+  OpAnd,
+  OpOr,
+  OpXor,
+  OpAndNot,
+  OpOrNot,
+  OpCast,
+  OpConcat,
+  OpDup,
+  OpDupLane,
+  OpHi,
+  OpLo,
+  OpSelect,
+  OpRev16,
+  OpRev32,
+  OpRev64,
+  OpReinterpret,
+  OpAbdl,
+  OpAba,
+  OpAbal
+};
+
+enum ClassKind {
+  ClassNone,
+  ClassI,           // generic integer instruction, e.g., "i8" suffix
+  ClassS,           // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
+  ClassW,           // width-specific instruction, e.g., "8" suffix
+  ClassB            // bitcast arguments with enum argument to specify type
+};
+
+namespace llvm {
+
+  class NeonEmitter : public TableGenBackend {
+    RecordKeeper &Records;
+    StringMap<OpKind> OpMap;
+    DenseMap<Record*, ClassKind> ClassMap;
+
+  public:
+    NeonEmitter(RecordKeeper &R) : Records(R) {
+      OpMap["OP_NONE"]  = OpNone;
+      OpMap["OP_ADD"]   = OpAdd;
+      OpMap["OP_ADDL"]  = OpAddl;
+      OpMap["OP_ADDW"]  = OpAddw;
+      OpMap["OP_SUB"]   = OpSub;
+      OpMap["OP_SUBL"]  = OpSubl;
+      OpMap["OP_SUBW"]  = OpSubw;
+      OpMap["OP_MUL"]   = OpMul;
+      OpMap["OP_MULL"]  = OpMull;
+      OpMap["OP_MLA"]   = OpMla;
+      OpMap["OP_MLAL"]  = OpMlal;
+      OpMap["OP_MLS"]   = OpMls;
+      OpMap["OP_MLSL"]  = OpMlsl;
+      OpMap["OP_MUL_N"] = OpMulN;
+      OpMap["OP_MULL_N"]= OpMullN;
+      OpMap["OP_MLA_N"] = OpMlaN;
+      OpMap["OP_MLS_N"] = OpMlsN;
+      OpMap["OP_MLAL_N"] = OpMlalN;
+      OpMap["OP_MLSL_N"] = OpMlslN;
+      OpMap["OP_MUL_LN"]= OpMulLane;
+      OpMap["OP_MULL_LN"] = OpMullLane;
+      OpMap["OP_MLA_LN"]= OpMlaLane;
+      OpMap["OP_MLS_LN"]= OpMlsLane;
+      OpMap["OP_MLAL_LN"] = OpMlalLane;
+      OpMap["OP_MLSL_LN"] = OpMlslLane;
+      OpMap["OP_QDMULL_LN"] = OpQDMullLane;
+      OpMap["OP_QDMLAL_LN"] = OpQDMlalLane;
+      OpMap["OP_QDMLSL_LN"] = OpQDMlslLane;
+      OpMap["OP_QDMULH_LN"] = OpQDMulhLane;
+      OpMap["OP_QRDMULH_LN"] = OpQRDMulhLane;
+      OpMap["OP_EQ"]    = OpEq;
+      OpMap["OP_GE"]    = OpGe;
+      OpMap["OP_LE"]    = OpLe;
+      OpMap["OP_GT"]    = OpGt;
+      OpMap["OP_LT"]    = OpLt;
+      OpMap["OP_NEG"]   = OpNeg;
+      OpMap["OP_NOT"]   = OpNot;
+      OpMap["OP_AND"]   = OpAnd;
+      OpMap["OP_OR"]    = OpOr;
+      OpMap["OP_XOR"]   = OpXor;
+      OpMap["OP_ANDN"]  = OpAndNot;
+      OpMap["OP_ORN"]   = OpOrNot;
+      OpMap["OP_CAST"]  = OpCast;
+      OpMap["OP_CONC"]  = OpConcat;
+      OpMap["OP_HI"]    = OpHi;
+      OpMap["OP_LO"]    = OpLo;
+      OpMap["OP_DUP"]   = OpDup;
+      OpMap["OP_DUP_LN"] = OpDupLane;
+      OpMap["OP_SEL"]   = OpSelect;
+      OpMap["OP_REV16"] = OpRev16;
+      OpMap["OP_REV32"] = OpRev32;
+      OpMap["OP_REV64"] = OpRev64;
+      OpMap["OP_REINT"] = OpReinterpret;
+      OpMap["OP_ABDL"]  = OpAbdl;
+      OpMap["OP_ABA"]   = OpAba;
+      OpMap["OP_ABAL"]  = OpAbal;
+
+      Record *SI = R.getClass("SInst");
+      Record *II = R.getClass("IInst");
+      Record *WI = R.getClass("WInst");
+      ClassMap[SI] = ClassS;
+      ClassMap[II] = ClassI;
+      ClassMap[WI] = ClassW;
+    }
+
+    // run - Emit arm_neon.h.inc
+    void run(raw_ostream &o);
+
+    // runHeader - Emit all the __builtin prototypes used in arm_neon.h
+    void runHeader(raw_ostream &o);
+
+    // runTests - Emit tests for all the Neon intrinsics.
+    void runTests(raw_ostream &o);
+
+  private:
+    void emitIntrinsic(raw_ostream &OS, Record *R);
+  };
+
+} // End llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/OptParserEmitter.cpp b/final/utils/TableGen/OptParserEmitter.cpp
new file mode 100644
index 00000000000..6892912eee1
--- /dev/null
+++ b/final/utils/TableGen/OptParserEmitter.cpp
@@ -0,0 +1,194 @@
+//===- OptParserEmitter.cpp - Table Driven Command Line Parsing -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "OptParserEmitter.h"
+#include "Record.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+static int StrCmpOptionName(const char *A, const char *B) {
+  char a = *A, b = *B;
+  while (a == b) {
+    if (a == '\0')
+      return 0;
+
+    a = *++A;
+    b = *++B;
+  }
+
+  if (a == '\0') // A is a prefix of B.
+    return 1;
+  if (b == '\0') // B is a prefix of A.
+    return -1;
+
+  // Otherwise lexicographic.
+  return (a < b) ? -1 : 1;
+}
+
+static int CompareOptionRecords(const void *Av, const void *Bv) {
+  const Record *A = *(Record**) Av;
+  const Record *B = *(Record**) Bv;
+
+  // Sentinel options preceed all others and are only ordered by precedence.
+  bool ASent = A->getValueAsDef("Kind")->getValueAsBit("Sentinel");
+  bool BSent = B->getValueAsDef("Kind")->getValueAsBit("Sentinel");
+  if (ASent != BSent)
+    return ASent ? -1 : 1;
+
+  // Compare options by name, unless they are sentinels.
+  if (!ASent)
+    if (int Cmp = StrCmpOptionName(A->getValueAsString("Name").c_str(),
+                                   B->getValueAsString("Name").c_str()))
+    return Cmp;
+
+  // Then by the kind precedence;
+  int APrec = A->getValueAsDef("Kind")->getValueAsInt("Precedence");
+  int BPrec = B->getValueAsDef("Kind")->getValueAsInt("Precedence");
+  assert(APrec != BPrec && "Options are equivalent!");
+  return APrec < BPrec ? -1 : 1;
+}
+
+static const std::string getOptionName(const Record &R) {
+  // Use the record name unless EnumName is defined.
+  if (dynamic_cast<UnsetInit*>(R.getValueInit("EnumName")))
+    return R.getName();
+
+  return R.getValueAsString("EnumName");
+}
+
+static raw_ostream &write_cstring(raw_ostream &OS, llvm::StringRef Str) {
+  OS << '"';
+  OS.write_escaped(Str);
+  OS << '"';
+  return OS;
+}
+
+void OptParserEmitter::run(raw_ostream &OS) {
+  // Get the option groups and options.
+  const std::vector<Record*> &Groups =
+    Records.getAllDerivedDefinitions("OptionGroup");
+  std::vector<Record*> Opts = Records.getAllDerivedDefinitions("Option");
+
+  if (GenDefs)
+    EmitSourceFileHeader("Option Parsing Definitions", OS);
+  else
+    EmitSourceFileHeader("Option Parsing Table", OS);
+
+  array_pod_sort(Opts.begin(), Opts.end(), CompareOptionRecords);
+  if (GenDefs) {
+    OS << "#ifndef OPTION\n";
+    OS << "#error \"Define OPTION prior to including this file!\"\n";
+    OS << "#endif\n\n";
+
+    OS << "/////////\n";
+    OS << "// Groups\n\n";
+    for (unsigned i = 0, e = Groups.size(); i != e; ++i) {
+      const Record &R = *Groups[i];
+
+      // Start a single option entry.
+      OS << "OPTION(";
+
+      // The option string.
+      OS << '"' << R.getValueAsString("Name") << '"';
+
+      // The option identifier name.
+      OS  << ", "<< getOptionName(R);
+
+      // The option kind.
+      OS << ", Group";
+
+      // The containing option group (if any).
+      OS << ", ";
+      if (const DefInit *DI = dynamic_cast<DefInit*>(R.getValueInit("Group")))
+        OS << getOptionName(*DI->getDef());
+      else
+        OS << "INVALID";
+
+      // The other option arguments (unused for groups).
+      OS << ", INVALID, 0, 0";
+
+      // The option help text.
+      if (!dynamic_cast<UnsetInit*>(R.getValueInit("HelpText"))) {
+        OS << ",\n";
+        OS << "       ";
+        write_cstring(OS, R.getValueAsString("HelpText"));
+      } else
+        OS << ", 0";
+
+      // The option meta-variable name (unused).
+      OS << ", 0)\n";
+    }
+    OS << "\n";
+
+    OS << "//////////\n";
+    OS << "// Options\n\n";
+    for (unsigned i = 0, e = Opts.size(); i != e; ++i) {
+      const Record &R = *Opts[i];
+
+      // Start a single option entry.
+      OS << "OPTION(";
+
+      // The option string.
+      write_cstring(OS, R.getValueAsString("Name"));
+
+      // The option identifier name.
+      OS  << ", "<< getOptionName(R);
+
+      // The option kind.
+      OS << ", " << R.getValueAsDef("Kind")->getValueAsString("Name");
+
+      // The containing option group (if any).
+      OS << ", ";
+      if (const DefInit *DI = dynamic_cast<DefInit*>(R.getValueInit("Group")))
+        OS << getOptionName(*DI->getDef());
+      else
+        OS << "INVALID";
+
+      // The option alias (if any).
+      OS << ", ";
+      if (const DefInit *DI = dynamic_cast<DefInit*>(R.getValueInit("Alias")))
+        OS << getOptionName(*DI->getDef());
+      else
+        OS << "INVALID";
+
+      // The option flags.
+      const ListInit *LI = R.getValueAsListInit("Flags");
+      if (LI->empty()) {
+        OS << ", 0";
+      } else {
+        OS << ", ";
+        for (unsigned i = 0, e = LI->size(); i != e; ++i) {
+          if (i)
+            OS << " | ";
+          OS << dynamic_cast<DefInit*>(LI->getElement(i))->getDef()->getName();
+        }
+      }
+
+      // The option parameter field.
+      OS << ", " << R.getValueAsInt("NumArgs");
+
+      // The option help text.
+      if (!dynamic_cast<UnsetInit*>(R.getValueInit("HelpText"))) {
+        OS << ",\n";
+        OS << "       ";
+        write_cstring(OS, R.getValueAsString("HelpText"));
+      } else
+        OS << ", 0";
+
+      // The option meta-variable name.
+      OS << ", ";
+      if (!dynamic_cast<UnsetInit*>(R.getValueInit("MetaVarName")))
+        write_cstring(OS, R.getValueAsString("MetaVarName"));
+      else
+        OS << "0";
+
+      OS << ")\n";
+    }
+  }
+}
diff --git a/final/utils/TableGen/OptParserEmitter.h b/final/utils/TableGen/OptParserEmitter.h
new file mode 100644
index 00000000000..241a3f2b4a0
--- /dev/null
+++ b/final/utils/TableGen/OptParserEmitter.h
@@ -0,0 +1,34 @@
+//===- OptParserEmitter.h - Table Driven Command Line Parsing ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef UTILS_TABLEGEN_OPTPARSEREMITTER_H
+#define UTILS_TABLEGEN_OPTPARSEREMITTER_H
+
+#include "TableGenBackend.h"
+
+namespace llvm {
+  /// OptParserEmitter - This tablegen backend takes an input .td file
+  /// describing a list of options and emits a data structure for parsing and
+  /// working with those options when given an input command line.
+  class OptParserEmitter : public TableGenBackend {
+    RecordKeeper &Records;
+    bool GenDefs;
+
+  public:
+    OptParserEmitter(RecordKeeper &R, bool _GenDefs)
+      : Records(R), GenDefs(_GenDefs) {}
+
+    /// run - Output the option parsing information.
+    ///
+    /// \param GenHeader - Generate the header describing the option IDs.x
+    void run(raw_ostream &OS);
+  };
+}
+
+#endif
diff --git a/final/utils/TableGen/Record.cpp b/final/utils/TableGen/Record.cpp
new file mode 100644
index 00000000000..abbbafed09d
--- /dev/null
+++ b/final/utils/TableGen/Record.cpp
@@ -0,0 +1,1564 @@
+//===- Record.cpp - Record implementation ---------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the tablegen record classes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Record.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Format.h"
+#include "llvm/ADT/StringExtras.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//    Type implementations
+//===----------------------------------------------------------------------===//
+
+void RecTy::dump() const { print(errs()); }
+
+Init *BitRecTy::convertValue(BitsInit *BI) {
+  if (BI->getNumBits() != 1) return 0; // Only accept if just one bit!
+  return BI->getBit(0);
+}
+
+bool BitRecTy::baseClassOf(const BitsRecTy *RHS) const {
+  return RHS->getNumBits() == 1;
+}
+
+Init *BitRecTy::convertValue(IntInit *II) {
+  int64_t Val = II->getValue();
+  if (Val != 0 && Val != 1) return 0;  // Only accept 0 or 1 for a bit!
+
+  return new BitInit(Val != 0);
+}
+
+Init *BitRecTy::convertValue(TypedInit *VI) {
+  if (dynamic_cast<BitRecTy*>(VI->getType()))
+    return VI;  // Accept variable if it is already of bit type!
+  return 0;
+}
+
+std::string BitsRecTy::getAsString() const {
+  return "bits<" + utostr(Size) + ">";
+}
+
+Init *BitsRecTy::convertValue(UnsetInit *UI) {
+  BitsInit *Ret = new BitsInit(Size);
+
+  for (unsigned i = 0; i != Size; ++i)
+    Ret->setBit(i, new UnsetInit());
+  return Ret;
+}
+
+Init *BitsRecTy::convertValue(BitInit *UI) {
+  if (Size != 1) return 0;  // Can only convert single bit.
+  BitsInit *Ret = new BitsInit(1);
+  Ret->setBit(0, UI);
+  return Ret;
+}
+
+/// canFitInBitfield - Return true if the number of bits is large enough to hold
+/// the integer value.
+static bool canFitInBitfield(int64_t Value, unsigned NumBits) {
+  if (Value >= 0) {
+    if (Value & ~((1LL << NumBits) - 1))
+      return false;
+  } else if ((Value >> NumBits) != -1 || (Value & (1LL << (NumBits-1))) == 0) {
+    return false;
+  }
+
+  return true;
+}
+
+/// convertValue from Int initializer to bits type: Split the integer up into the
+/// appropriate bits.
+///
+Init *BitsRecTy::convertValue(IntInit *II) {
+  int64_t Value = II->getValue();
+  // Make sure this bitfield is large enough to hold the integer value.
+  if (!canFitInBitfield(Value, Size))
+    return 0;
+
+  BitsInit *Ret = new BitsInit(Size);
+  for (unsigned i = 0; i != Size; ++i)
+    Ret->setBit(i, new BitInit(Value & (1LL << i)));
+
+  return Ret;
+}
+
+Init *BitsRecTy::convertValue(BitsInit *BI) {
+  // If the number of bits is right, return it.  Otherwise we need to expand or
+  // truncate.
+  if (BI->getNumBits() == Size) return BI;
+  return 0;
+}
+
+Init *BitsRecTy::convertValue(TypedInit *VI) {
+  if (BitsRecTy *BRT = dynamic_cast<BitsRecTy*>(VI->getType()))
+    if (BRT->Size == Size) {
+      BitsInit *Ret = new BitsInit(Size);
+      for (unsigned i = 0; i != Size; ++i)
+        Ret->setBit(i, new VarBitInit(VI, i));
+      return Ret;
+    }
+
+  if (Size == 1 && dynamic_cast<BitRecTy*>(VI->getType())) {
+    BitsInit *Ret = new BitsInit(1);
+    Ret->setBit(0, VI);
+    return Ret;
+  }
+
+  if (TernOpInit *Tern = dynamic_cast<TernOpInit*>(VI)) {
+    if (Tern->getOpcode() == TernOpInit::IF) {
+      Init *LHS = Tern->getLHS();
+      Init *MHS = Tern->getMHS();
+      Init *RHS = Tern->getRHS();
+
+      IntInit *MHSi = dynamic_cast<IntInit*>(MHS);
+      IntInit *RHSi = dynamic_cast<IntInit*>(RHS);
+
+      if (MHSi && RHSi) {
+        int64_t MHSVal = MHSi->getValue();
+        int64_t RHSVal = RHSi->getValue();
+
+        if (canFitInBitfield(MHSVal, Size) && canFitInBitfield(RHSVal, Size)) {
+          BitsInit *Ret = new BitsInit(Size);
+
+          for (unsigned i = 0; i != Size; ++i)
+            Ret->setBit(i, new TernOpInit(TernOpInit::IF, LHS,
+                                          new IntInit((MHSVal & (1LL << i)) ? 1 : 0),
+                                          new IntInit((RHSVal & (1LL << i)) ? 1 : 0),
+                                          VI->getType()));
+
+          return Ret;
+        }
+      } else {
+        BitsInit *MHSbs = dynamic_cast<BitsInit*>(MHS);
+        BitsInit *RHSbs = dynamic_cast<BitsInit*>(RHS);
+
+        if (MHSbs && RHSbs) {
+          BitsInit *Ret = new BitsInit(Size);
+
+          for (unsigned i = 0; i != Size; ++i)
+            Ret->setBit(i, new TernOpInit(TernOpInit::IF, LHS,
+                                          MHSbs->getBit(i),
+                                          RHSbs->getBit(i),
+                                          VI->getType()));
+
+          return Ret;
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+
+Init *IntRecTy::convertValue(BitInit *BI) {
+  return new IntInit(BI->getValue());
+}
+
+Init *IntRecTy::convertValue(BitsInit *BI) {
+  int64_t Result = 0;
+  for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i)
+    if (BitInit *Bit = dynamic_cast<BitInit*>(BI->getBit(i))) {
+      Result |= Bit->getValue() << i;
+    } else {
+      return 0;
+    }
+  return new IntInit(Result);
+}
+
+Init *IntRecTy::convertValue(TypedInit *TI) {
+  if (TI->getType()->typeIsConvertibleTo(this))
+    return TI;  // Accept variable if already of the right type!
+  return 0;
+}
+
+Init *StringRecTy::convertValue(UnOpInit *BO) {
+  if (BO->getOpcode() == UnOpInit::CAST) {
+    Init *L = BO->getOperand()->convertInitializerTo(this);
+    if (L == 0) return 0;
+    if (L != BO->getOperand())
+      return new UnOpInit(UnOpInit::CAST, L, new StringRecTy);
+    return BO;
+  }
+
+  return convertValue((TypedInit*)BO);
+}
+
+Init *StringRecTy::convertValue(BinOpInit *BO) {
+  if (BO->getOpcode() == BinOpInit::STRCONCAT) {
+    Init *L = BO->getLHS()->convertInitializerTo(this);
+    Init *R = BO->getRHS()->convertInitializerTo(this);
+    if (L == 0 || R == 0) return 0;
+    if (L != BO->getLHS() || R != BO->getRHS())
+      return new BinOpInit(BinOpInit::STRCONCAT, L, R, new StringRecTy);
+    return BO;
+  }
+
+  return convertValue((TypedInit*)BO);
+}
+
+
+Init *StringRecTy::convertValue(TypedInit *TI) {
+  if (dynamic_cast<StringRecTy*>(TI->getType()))
+    return TI;  // Accept variable if already of the right type!
+  return 0;
+}
+
+std::string ListRecTy::getAsString() const {
+  return "list<" + Ty->getAsString() + ">";
+}
+
+Init *ListRecTy::convertValue(ListInit *LI) {
+  std::vector<Init*> Elements;
+
+  // Verify that all of the elements of the list are subclasses of the
+  // appropriate class!
+  for (unsigned i = 0, e = LI->getSize(); i != e; ++i)
+    if (Init *CI = LI->getElement(i)->convertInitializerTo(Ty))
+      Elements.push_back(CI);
+    else
+      return 0;
+
+  ListRecTy *LType = dynamic_cast<ListRecTy*>(LI->getType());
+  if (LType == 0) {
+    return 0;
+  }
+
+  return new ListInit(Elements, new ListRecTy(Ty));
+}
+
+Init *ListRecTy::convertValue(TypedInit *TI) {
+  // Ensure that TI is compatible with our class.
+  if (ListRecTy *LRT = dynamic_cast<ListRecTy*>(TI->getType()))
+    if (LRT->getElementType()->typeIsConvertibleTo(getElementType()))
+      return TI;
+  return 0;
+}
+
+Init *CodeRecTy::convertValue(TypedInit *TI) {
+  if (TI->getType()->typeIsConvertibleTo(this))
+    return TI;
+  return 0;
+}
+
+Init *DagRecTy::convertValue(TypedInit *TI) {
+  if (TI->getType()->typeIsConvertibleTo(this))
+    return TI;
+  return 0;
+}
+
+Init *DagRecTy::convertValue(UnOpInit *BO) {
+  if (BO->getOpcode() == UnOpInit::CAST) {
+    Init *L = BO->getOperand()->convertInitializerTo(this);
+    if (L == 0) return 0;
+    if (L != BO->getOperand())
+      return new UnOpInit(UnOpInit::CAST, L, new DagRecTy);
+    return BO;
+  }
+  return 0;
+}
+
+Init *DagRecTy::convertValue(BinOpInit *BO) {
+  if (BO->getOpcode() == BinOpInit::CONCAT) {
+    Init *L = BO->getLHS()->convertInitializerTo(this);
+    Init *R = BO->getRHS()->convertInitializerTo(this);
+    if (L == 0 || R == 0) return 0;
+    if (L != BO->getLHS() || R != BO->getRHS())
+      return new BinOpInit(BinOpInit::CONCAT, L, R, new DagRecTy);
+    return BO;
+  }
+  return 0;
+}
+
+std::string RecordRecTy::getAsString() const {
+  return Rec->getName();
+}
+
+Init *RecordRecTy::convertValue(DefInit *DI) {
+  // Ensure that DI is a subclass of Rec.
+  if (!DI->getDef()->isSubClassOf(Rec))
+    return 0;
+  return DI;
+}
+
+Init *RecordRecTy::convertValue(TypedInit *TI) {
+  // Ensure that TI is compatible with Rec.
+  if (RecordRecTy *RRT = dynamic_cast<RecordRecTy*>(TI->getType()))
+    if (RRT->getRecord()->isSubClassOf(getRecord()) ||
+        RRT->getRecord() == getRecord())
+      return TI;
+  return 0;
+}
+
+bool RecordRecTy::baseClassOf(const RecordRecTy *RHS) const {
+  if (Rec == RHS->getRecord() || RHS->getRecord()->isSubClassOf(Rec))
+    return true;
+
+  const std::vector<Record*> &SC = Rec->getSuperClasses();
+  for (unsigned i = 0, e = SC.size(); i != e; ++i)
+    if (RHS->getRecord()->isSubClassOf(SC[i]))
+      return true;
+
+  return false;
+}
+
+
+/// resolveTypes - Find a common type that T1 and T2 convert to.
+/// Return 0 if no such type exists.
+///
+RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
+  if (!T1->typeIsConvertibleTo(T2)) {
+    if (!T2->typeIsConvertibleTo(T1)) {
+      // If one is a Record type, check superclasses
+      RecordRecTy *RecTy1 = dynamic_cast<RecordRecTy*>(T1);
+      if (RecTy1) {
+        // See if T2 inherits from a type T1 also inherits from
+        const std::vector<Record *> &T1SuperClasses =
+          RecTy1->getRecord()->getSuperClasses();
+        for(std::vector<Record *>::const_iterator i = T1SuperClasses.begin(),
+              iend = T1SuperClasses.end();
+            i != iend;
+            ++i) {
+          RecordRecTy *SuperRecTy1 = new RecordRecTy(*i);
+          RecTy *NewType1 = resolveTypes(SuperRecTy1, T2);
+          if (NewType1 != 0) {
+            if (NewType1 != SuperRecTy1) {
+              delete SuperRecTy1;
+            }
+            return NewType1;
+          }
+        }
+      }
+      RecordRecTy *RecTy2 = dynamic_cast<RecordRecTy*>(T2);
+      if (RecTy2) {
+        // See if T1 inherits from a type T2 also inherits from
+        const std::vector<Record *> &T2SuperClasses =
+          RecTy2->getRecord()->getSuperClasses();
+        for (std::vector<Record *>::const_iterator i = T2SuperClasses.begin(),
+              iend = T2SuperClasses.end();
+            i != iend;
+            ++i) {
+          RecordRecTy *SuperRecTy2 = new RecordRecTy(*i);
+          RecTy *NewType2 = resolveTypes(T1, SuperRecTy2);
+          if (NewType2 != 0) {
+            if (NewType2 != SuperRecTy2) {
+              delete SuperRecTy2;
+            }
+            return NewType2;
+          }
+        }
+      }
+      return 0;
+    }
+    return T2;
+  }
+  return T1;
+}
+
+
+//===----------------------------------------------------------------------===//
+//    Initializer implementations
+//===----------------------------------------------------------------------===//
+
+void Init::dump() const { return print(errs()); }
+
+Init *BitsInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) {
+  BitsInit *BI = new BitsInit(Bits.size());
+  for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
+    if (Bits[i] >= getNumBits()) {
+      delete BI;
+      return 0;
+    }
+    BI->setBit(i, getBit(Bits[i]));
+  }
+  return BI;
+}
+
+std::string BitsInit::getAsString() const {
+  std::string Result = "{ ";
+  for (unsigned i = 0, e = getNumBits(); i != e; ++i) {
+    if (i) Result += ", ";
+    if (Init *Bit = getBit(e-i-1))
+      Result += Bit->getAsString();
+    else
+      Result += "*";
+  }
+  return Result + " }";
+}
+
+// resolveReferences - If there are any field references that refer to fields
+// that have been filled in, we can propagate the values now.
+//
+Init *BitsInit::resolveReferences(Record &R, const RecordVal *RV) {
+  bool Changed = false;
+  BitsInit *New = new BitsInit(getNumBits());
+
+  for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
+    Init *B;
+    Init *CurBit = getBit(i);
+
+    do {
+      B = CurBit;
+      CurBit = CurBit->resolveReferences(R, RV);
+      Changed |= B != CurBit;
+    } while (B != CurBit);
+    New->setBit(i, CurBit);
+  }
+
+  if (Changed)
+    return New;
+  delete New;
+  return this;
+}
+
+std::string IntInit::getAsString() const {
+  return itostr(Value);
+}
+
+Init *IntInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) {
+  BitsInit *BI = new BitsInit(Bits.size());
+
+  for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
+    if (Bits[i] >= 64) {
+      delete BI;
+      return 0;
+    }
+    BI->setBit(i, new BitInit(Value & (INT64_C(1) << Bits[i])));
+  }
+  return BI;
+}
+
+Init *ListInit::convertInitListSlice(const std::vector<unsigned> &Elements) {
+  std::vector<Init*> Vals;
+  for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
+    if (Elements[i] >= getSize())
+      return 0;
+    Vals.push_back(getElement(Elements[i]));
+  }
+  return new ListInit(Vals, getType());
+}
+
+Record *ListInit::getElementAsRecord(unsigned i) const {
+  assert(i < Values.size() && "List element index out of range!");
+  DefInit *DI = dynamic_cast<DefInit*>(Values[i]);
+  if (DI == 0) throw "Expected record in list!";
+  return DI->getDef();
+}
+
+Init *ListInit::resolveReferences(Record &R, const RecordVal *RV) {
+  std::vector<Init*> Resolved;
+  Resolved.reserve(getSize());
+  bool Changed = false;
+
+  for (unsigned i = 0, e = getSize(); i != e; ++i) {
+    Init *E;
+    Init *CurElt = getElement(i);
+
+    do {
+      E = CurElt;
+      CurElt = CurElt->resolveReferences(R, RV);
+      Changed |= E != CurElt;
+    } while (E != CurElt);
+    Resolved.push_back(E);
+  }
+
+  if (Changed)
+    return new ListInit(Resolved, getType());
+  return this;
+}
+
+Init *ListInit::resolveListElementReference(Record &R, const RecordVal *IRV,
+                                            unsigned Elt) {
+  if (Elt >= getSize())
+    return 0;  // Out of range reference.
+  Init *E = getElement(Elt);
+  // If the element is set to some value, or if we are resolving a reference
+  // to a specific variable and that variable is explicitly unset, then
+  // replace the VarListElementInit with it.
+  if (IRV || !dynamic_cast<UnsetInit*>(E))
+    return E;
+  return 0;
+}
+
+std::string ListInit::getAsString() const {
+  std::string Result = "[";
+  for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+    if (i) Result += ", ";
+    Result += Values[i]->getAsString();
+  }
+  return Result + "]";
+}
+
+Init *OpInit::resolveBitReference(Record &R, const RecordVal *IRV,
+                                  unsigned Bit) {
+  Init *Folded = Fold(&R, 0);
+
+  if (Folded != this) {
+    TypedInit *Typed = dynamic_cast<TypedInit *>(Folded);
+    if (Typed) {
+      return Typed->resolveBitReference(R, IRV, Bit);
+    }
+  }
+
+  return 0;
+}
+
+Init *OpInit::resolveListElementReference(Record &R, const RecordVal *IRV,
+                                          unsigned Elt) {
+  Init *Folded = Fold(&R, 0);
+
+  if (Folded != this) {
+    TypedInit *Typed = dynamic_cast<TypedInit *>(Folded);
+    if (Typed) {
+      return Typed->resolveListElementReference(R, IRV, Elt);
+    }
+  }
+
+  return 0;
+}
+
+Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
+  switch (getOpcode()) {
+  default: assert(0 && "Unknown unop");
+  case CAST: {
+    if (getType()->getAsString() == "string") {
+      StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+      if (LHSs) {
+        return LHSs;
+      }
+
+      DefInit *LHSd = dynamic_cast<DefInit*>(LHS);
+      if (LHSd) {
+        return new StringInit(LHSd->getDef()->getName());
+      }
+    } else {
+      StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+      if (LHSs) {
+        std::string Name = LHSs->getValue();
+
+        // From TGParser::ParseIDValue
+        if (CurRec) {
+          if (const RecordVal *RV = CurRec->getValue(Name)) {
+            if (RV->getType() != getType())
+              throw "type mismatch in cast";
+            return new VarInit(Name, RV->getType());
+          }
+
+          std::string TemplateArgName = CurRec->getName()+":"+Name;
+          if (CurRec->isTemplateArg(TemplateArgName)) {
+            const RecordVal *RV = CurRec->getValue(TemplateArgName);
+            assert(RV && "Template arg doesn't exist??");
+
+            if (RV->getType() != getType())
+              throw "type mismatch in cast";
+
+            return new VarInit(TemplateArgName, RV->getType());
+          }
+        }
+
+        if (CurMultiClass) {
+          std::string MCName = CurMultiClass->Rec.getName()+"::"+Name;
+          if (CurMultiClass->Rec.isTemplateArg(MCName)) {
+            const RecordVal *RV = CurMultiClass->Rec.getValue(MCName);
+            assert(RV && "Template arg doesn't exist??");
+
+            if (RV->getType() != getType())
+              throw "type mismatch in cast";
+
+            return new VarInit(MCName, RV->getType());
+          }
+        }
+
+        if (Record *D = (CurRec->getRecords()).getDef(Name))
+          return new DefInit(D);
+
+        errs() << "Variable not defined: '" + Name + "'\n";
+        assert(0 && "Variable not found");
+        return 0;
+      }
+    }
+    break;
+  }
+  case HEAD: {
+    ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
+    if (LHSl) {
+      if (LHSl->getSize() == 0) {
+        assert(0 && "Empty list in car");
+        return 0;
+      }
+      return LHSl->getElement(0);
+    }
+    break;
+  }
+  case TAIL: {
+    ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
+    if (LHSl) {
+      if (LHSl->getSize() == 0) {
+        assert(0 && "Empty list in cdr");
+        return 0;
+      }
+      ListInit *Result = new ListInit(LHSl->begin()+1, LHSl->end(),
+                                      LHSl->getType());
+      return Result;
+    }
+    break;
+  }
+  case EMPTY: {
+    ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
+    if (LHSl) {
+      if (LHSl->getSize() == 0) {
+        return new IntInit(1);
+      } else {
+        return new IntInit(0);
+      }
+    }
+    StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+    if (LHSs) {
+      if (LHSs->getValue().empty()) {
+        return new IntInit(1);
+      } else {
+        return new IntInit(0);
+      }
+    }
+
+    break;
+  }
+  }
+  return this;
+}
+
+Init *UnOpInit::resolveReferences(Record &R, const RecordVal *RV) {
+  Init *lhs = LHS->resolveReferences(R, RV);
+
+  if (LHS != lhs)
+    return (new UnOpInit(getOpcode(), lhs, getType()))->Fold(&R, 0);
+  return Fold(&R, 0);
+}
+
+std::string UnOpInit::getAsString() const {
+  std::string Result;
+  switch (Opc) {
+  case CAST: Result = "!cast<" + getType()->getAsString() + ">"; break;
+  case HEAD: Result = "!head"; break;
+  case TAIL: Result = "!tail"; break;
+  case EMPTY: Result = "!empty"; break;
+  }
+  return Result + "(" + LHS->getAsString() + ")";
+}
+
+Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
+  switch (getOpcode()) {
+  default: assert(0 && "Unknown binop");
+  case CONCAT: {
+    DagInit *LHSs = dynamic_cast<DagInit*>(LHS);
+    DagInit *RHSs = dynamic_cast<DagInit*>(RHS);
+    if (LHSs && RHSs) {
+      DefInit *LOp = dynamic_cast<DefInit*>(LHSs->getOperator());
+      DefInit *ROp = dynamic_cast<DefInit*>(RHSs->getOperator());
+      if (LOp == 0 || ROp == 0 || LOp->getDef() != ROp->getDef())
+        throw "Concated Dag operators do not match!";
+      std::vector<Init*> Args;
+      std::vector<std::string> ArgNames;
+      for (unsigned i = 0, e = LHSs->getNumArgs(); i != e; ++i) {
+        Args.push_back(LHSs->getArg(i));
+        ArgNames.push_back(LHSs->getArgName(i));
+      }
+      for (unsigned i = 0, e = RHSs->getNumArgs(); i != e; ++i) {
+        Args.push_back(RHSs->getArg(i));
+        ArgNames.push_back(RHSs->getArgName(i));
+      }
+      return new DagInit(LHSs->getOperator(), "", Args, ArgNames);
+    }
+    break;
+  }
+  case STRCONCAT: {
+    StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+    StringInit *RHSs = dynamic_cast<StringInit*>(RHS);
+    if (LHSs && RHSs)
+      return new StringInit(LHSs->getValue() + RHSs->getValue());
+    break;
+  }
+  case EQ: {
+    // try to fold eq comparison for 'bit' and 'int', otherwise fallback
+    // to string objects.
+    IntInit* L =
+      dynamic_cast<IntInit*>(LHS->convertInitializerTo(new IntRecTy()));
+    IntInit* R =
+      dynamic_cast<IntInit*>(RHS->convertInitializerTo(new IntRecTy()));
+
+    if (L && R)
+      return new IntInit(L->getValue() == R->getValue());
+
+    StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+    StringInit *RHSs = dynamic_cast<StringInit*>(RHS);
+
+    // Make sure we've resolved
+    if (LHSs && RHSs)
+      return new IntInit(LHSs->getValue() == RHSs->getValue());
+
+    break;
+  }
+  case SHL:
+  case SRA:
+  case SRL: {
+    IntInit *LHSi = dynamic_cast<IntInit*>(LHS);
+    IntInit *RHSi = dynamic_cast<IntInit*>(RHS);
+    if (LHSi && RHSi) {
+      int64_t LHSv = LHSi->getValue(), RHSv = RHSi->getValue();
+      int64_t Result;
+      switch (getOpcode()) {
+      default: assert(0 && "Bad opcode!");
+      case SHL: Result = LHSv << RHSv; break;
+      case SRA: Result = LHSv >> RHSv; break;
+      case SRL: Result = (uint64_t)LHSv >> (uint64_t)RHSv; break;
+      }
+      return new IntInit(Result);
+    }
+    break;
+  }
+  }
+  return this;
+}
+
+Init *BinOpInit::resolveReferences(Record &R, const RecordVal *RV) {
+  Init *lhs = LHS->resolveReferences(R, RV);
+  Init *rhs = RHS->resolveReferences(R, RV);
+
+  if (LHS != lhs || RHS != rhs)
+    return (new BinOpInit(getOpcode(), lhs, rhs, getType()))->Fold(&R, 0);
+  return Fold(&R, 0);
+}
+
+std::string BinOpInit::getAsString() const {
+  std::string Result;
+  switch (Opc) {
+  case CONCAT: Result = "!con"; break;
+  case SHL: Result = "!shl"; break;
+  case SRA: Result = "!sra"; break;
+  case SRL: Result = "!srl"; break;
+  case EQ: Result = "!eq"; break;
+  case STRCONCAT: Result = "!strconcat"; break;
+  }
+  return Result + "(" + LHS->getAsString() + ", " + RHS->getAsString() + ")";
+}
+
+static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
+                           Record *CurRec, MultiClass *CurMultiClass);
+
+static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
+                               RecTy *Type, Record *CurRec,
+                               MultiClass *CurMultiClass) {
+  std::vector<Init *> NewOperands;
+
+  TypedInit *TArg = dynamic_cast<TypedInit*>(Arg);
+
+  // If this is a dag, recurse
+  if (TArg && TArg->getType()->getAsString() == "dag") {
+    Init *Result = ForeachHelper(LHS, Arg, RHSo, Type,
+                                 CurRec, CurMultiClass);
+    if (Result != 0) {
+      return Result;
+    } else {
+      return 0;
+    }
+  }
+
+  for (int i = 0; i < RHSo->getNumOperands(); ++i) {
+    OpInit *RHSoo = dynamic_cast<OpInit*>(RHSo->getOperand(i));
+
+    if (RHSoo) {
+      Init *Result = EvaluateOperation(RHSoo, LHS, Arg,
+                                       Type, CurRec, CurMultiClass);
+      if (Result != 0) {
+        NewOperands.push_back(Result);
+      } else {
+        NewOperands.push_back(Arg);
+      }
+    } else if (LHS->getAsString() == RHSo->getOperand(i)->getAsString()) {
+      NewOperands.push_back(Arg);
+    } else {
+      NewOperands.push_back(RHSo->getOperand(i));
+    }
+  }
+
+  // Now run the operator and use its result as the new leaf
+  OpInit *NewOp = RHSo->clone(NewOperands);
+  Init *NewVal = NewOp->Fold(CurRec, CurMultiClass);
+  if (NewVal != NewOp) {
+    delete NewOp;
+    return NewVal;
+  }
+  return 0;
+}
+
+static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
+                           Record *CurRec, MultiClass *CurMultiClass) {
+  DagInit *MHSd = dynamic_cast<DagInit*>(MHS);
+  ListInit *MHSl = dynamic_cast<ListInit*>(MHS);
+
+  DagRecTy *DagType = dynamic_cast<DagRecTy*>(Type);
+  ListRecTy *ListType = dynamic_cast<ListRecTy*>(Type);
+
+  OpInit *RHSo = dynamic_cast<OpInit*>(RHS);
+
+  if (!RHSo) {
+    errs() << "!foreach requires an operator\n";
+    assert(0 && "No operator for !foreach");
+  }
+
+  TypedInit *LHSt = dynamic_cast<TypedInit*>(LHS);
+
+  if (!LHSt) {
+    errs() << "!foreach requires typed variable\n";
+    assert(0 && "No typed variable for !foreach");
+  }
+
+  if ((MHSd && DagType) || (MHSl && ListType)) {
+    if (MHSd) {
+      Init *Val = MHSd->getOperator();
+      Init *Result = EvaluateOperation(RHSo, LHS, Val,
+                                       Type, CurRec, CurMultiClass);
+      if (Result != 0) {
+        Val = Result;
+      }
+
+      std::vector<std::pair<Init *, std::string> > args;
+      for (unsigned int i = 0; i < MHSd->getNumArgs(); ++i) {
+        Init *Arg;
+        std::string ArgName;
+        Arg = MHSd->getArg(i);
+        ArgName = MHSd->getArgName(i);
+
+        // Process args
+        Init *Result = EvaluateOperation(RHSo, LHS, Arg, Type,
+                                         CurRec, CurMultiClass);
+        if (Result != 0) {
+          Arg = Result;
+        }
+
+        // TODO: Process arg names
+        args.push_back(std::make_pair(Arg, ArgName));
+      }
+
+      return new DagInit(Val, "", args);
+    }
+    if (MHSl) {
+      std::vector<Init *> NewOperands;
+      std::vector<Init *> NewList(MHSl->begin(), MHSl->end());
+
+      for (ListInit::iterator li = NewList.begin(),
+             liend = NewList.end();
+           li != liend;
+           ++li) {
+        Init *Item = *li;
+        NewOperands.clear();
+        for(int i = 0; i < RHSo->getNumOperands(); ++i) {
+          // First, replace the foreach variable with the list item
+          if (LHS->getAsString() == RHSo->getOperand(i)->getAsString()) {
+            NewOperands.push_back(Item);
+          } else {
+            NewOperands.push_back(RHSo->getOperand(i));
+          }
+        }
+
+        // Now run the operator and use its result as the new list item
+        OpInit *NewOp = RHSo->clone(NewOperands);
+        Init *NewItem = NewOp->Fold(CurRec, CurMultiClass);
+        if (NewItem != NewOp) {
+          *li = NewItem;
+          delete NewOp;
+        }
+      }
+      return new ListInit(NewList, MHSl->getType());
+    }
+  }
+  return 0;
+}
+
+Init *TernOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) {
+  switch (getOpcode()) {
+  default: assert(0 && "Unknown binop");
+  case SUBST: {
+    DefInit *LHSd = dynamic_cast<DefInit*>(LHS);
+    VarInit *LHSv = dynamic_cast<VarInit*>(LHS);
+    StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+
+    DefInit *MHSd = dynamic_cast<DefInit*>(MHS);
+    VarInit *MHSv = dynamic_cast<VarInit*>(MHS);
+    StringInit *MHSs = dynamic_cast<StringInit*>(MHS);
+
+    DefInit *RHSd = dynamic_cast<DefInit*>(RHS);
+    VarInit *RHSv = dynamic_cast<VarInit*>(RHS);
+    StringInit *RHSs = dynamic_cast<StringInit*>(RHS);
+
+    if ((LHSd && MHSd && RHSd)
+        || (LHSv && MHSv && RHSv)
+        || (LHSs && MHSs && RHSs)) {
+      if (RHSd) {
+        Record *Val = RHSd->getDef();
+        if (LHSd->getAsString() == RHSd->getAsString()) {
+          Val = MHSd->getDef();
+        }
+        return new DefInit(Val);
+      }
+      if (RHSv) {
+        std::string Val = RHSv->getName();
+        if (LHSv->getAsString() == RHSv->getAsString()) {
+          Val = MHSv->getName();
+        }
+        return new VarInit(Val, getType());
+      }
+      if (RHSs) {
+        std::string Val = RHSs->getValue();
+
+        std::string::size_type found;
+        std::string::size_type idx = 0;
+        do {
+          found = Val.find(LHSs->getValue(), idx);
+          if (found != std::string::npos) {
+            Val.replace(found, LHSs->getValue().size(), MHSs->getValue());
+          }
+          idx = found +  MHSs->getValue().size();
+        } while (found != std::string::npos);
+
+        return new StringInit(Val);
+      }
+    }
+    break;
+  }
+
+  case FOREACH: {
+    Init *Result = ForeachHelper(LHS, MHS, RHS, getType(),
+                                 CurRec, CurMultiClass);
+    if (Result != 0) {
+      return Result;
+    }
+    break;
+  }
+
+  case IF: {
+    IntInit *LHSi = dynamic_cast<IntInit*>(LHS);
+    if (Init *I = LHS->convertInitializerTo(new IntRecTy()))
+      LHSi = dynamic_cast<IntInit*>(I);
+    if (LHSi) {
+      if (LHSi->getValue()) {
+        return MHS;
+      } else {
+        return RHS;
+      }
+    }
+    break;
+  }
+  }
+
+  return this;
+}
+
+Init *TernOpInit::resolveReferences(Record &R, const RecordVal *RV) {
+  Init *lhs = LHS->resolveReferences(R, RV);
+
+  if (Opc == IF && lhs != LHS) {
+    IntInit *Value = dynamic_cast<IntInit*>(lhs);
+    if (Init *I = lhs->convertInitializerTo(new IntRecTy()))
+      Value = dynamic_cast<IntInit*>(I);
+    if (Value != 0) {
+      // Short-circuit
+      if (Value->getValue()) {
+        Init *mhs = MHS->resolveReferences(R, RV);
+        return (new TernOpInit(getOpcode(), lhs, mhs,
+                               RHS, getType()))->Fold(&R, 0);
+      } else {
+        Init *rhs = RHS->resolveReferences(R, RV);
+        return (new TernOpInit(getOpcode(), lhs, MHS,
+                               rhs, getType()))->Fold(&R, 0);
+      }
+    }
+  }
+
+  Init *mhs = MHS->resolveReferences(R, RV);
+  Init *rhs = RHS->resolveReferences(R, RV);
+
+  if (LHS != lhs || MHS != mhs || RHS != rhs)
+    return (new TernOpInit(getOpcode(), lhs, mhs, rhs, getType()))->Fold(&R, 0);
+  return Fold(&R, 0);
+}
+
+std::string TernOpInit::getAsString() const {
+  std::string Result;
+  switch (Opc) {
+  case SUBST: Result = "!subst"; break;
+  case FOREACH: Result = "!foreach"; break;
+  case IF: Result = "!if"; break;
+ }
+  return Result + "(" + LHS->getAsString() + ", " + MHS->getAsString() + ", "
+    + RHS->getAsString() + ")";
+}
+
+RecTy *TypedInit::getFieldType(const std::string &FieldName) const {
+  RecordRecTy *RecordType = dynamic_cast<RecordRecTy *>(getType());
+  if (RecordType) {
+    RecordVal *Field = RecordType->getRecord()->getValue(FieldName);
+    if (Field) {
+      return Field->getType();
+    }
+  }
+  return 0;
+}
+
+Init *TypedInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) {
+  BitsRecTy *T = dynamic_cast<BitsRecTy*>(getType());
+  if (T == 0) return 0;  // Cannot subscript a non-bits variable.
+  unsigned NumBits = T->getNumBits();
+
+  BitsInit *BI = new BitsInit(Bits.size());
+  for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
+    if (Bits[i] >= NumBits) {
+      delete BI;
+      return 0;
+    }
+    BI->setBit(i, new VarBitInit(this, Bits[i]));
+  }
+  return BI;
+}
+
+Init *TypedInit::convertInitListSlice(const std::vector<unsigned> &Elements) {
+  ListRecTy *T = dynamic_cast<ListRecTy*>(getType());
+  if (T == 0) return 0;  // Cannot subscript a non-list variable.
+
+  if (Elements.size() == 1)
+    return new VarListElementInit(this, Elements[0]);
+
+  std::vector<Init*> ListInits;
+  ListInits.reserve(Elements.size());
+  for (unsigned i = 0, e = Elements.size(); i != e; ++i)
+    ListInits.push_back(new VarListElementInit(this, Elements[i]));
+  return new ListInit(ListInits, T);
+}
+
+
+Init *VarInit::resolveBitReference(Record &R, const RecordVal *IRV,
+                                   unsigned Bit) {
+  if (R.isTemplateArg(getName())) return 0;
+  if (IRV && IRV->getName() != getName()) return 0;
+
+  RecordVal *RV = R.getValue(getName());
+  assert(RV && "Reference to a non-existent variable?");
+  assert(dynamic_cast<BitsInit*>(RV->getValue()));
+  BitsInit *BI = (BitsInit*)RV->getValue();
+
+  assert(Bit < BI->getNumBits() && "Bit reference out of range!");
+  Init *B = BI->getBit(Bit);
+
+  // If the bit is set to some value, or if we are resolving a reference to a
+  // specific variable and that variable is explicitly unset, then replace the
+  // VarBitInit with it.
+  if (IRV || !dynamic_cast<UnsetInit*>(B))
+    return B;
+  return 0;
+}
+
+Init *VarInit::resolveListElementReference(Record &R, const RecordVal *IRV,
+                                           unsigned Elt) {
+  if (R.isTemplateArg(getName())) return 0;
+  if (IRV && IRV->getName() != getName()) return 0;
+
+  RecordVal *RV = R.getValue(getName());
+  assert(RV && "Reference to a non-existent variable?");
+  ListInit *LI = dynamic_cast<ListInit*>(RV->getValue());
+  if (!LI) {
+    VarInit *VI = dynamic_cast<VarInit*>(RV->getValue());
+    assert(VI && "Invalid list element!");
+    return new VarListElementInit(VI, Elt);
+  }
+
+  if (Elt >= LI->getSize())
+    return 0;  // Out of range reference.
+  Init *E = LI->getElement(Elt);
+  // If the element is set to some value, or if we are resolving a reference
+  // to a specific variable and that variable is explicitly unset, then
+  // replace the VarListElementInit with it.
+  if (IRV || !dynamic_cast<UnsetInit*>(E))
+    return E;
+  return 0;
+}
+
+
+RecTy *VarInit::getFieldType(const std::string &FieldName) const {
+  if (RecordRecTy *RTy = dynamic_cast<RecordRecTy*>(getType()))
+    if (const RecordVal *RV = RTy->getRecord()->getValue(FieldName))
+      return RV->getType();
+  return 0;
+}
+
+Init *VarInit::getFieldInit(Record &R, const RecordVal *RV,
+                            const std::string &FieldName) const {
+  if (dynamic_cast<RecordRecTy*>(getType()))
+    if (const RecordVal *Val = R.getValue(VarName)) {
+      if (RV != Val && (RV || dynamic_cast<UnsetInit*>(Val->getValue())))
+        return 0;
+      Init *TheInit = Val->getValue();
+      assert(TheInit != this && "Infinite loop detected!");
+      if (Init *I = TheInit->getFieldInit(R, RV, FieldName))
+        return I;
+      else
+        return 0;
+    }
+  return 0;
+}
+
+/// resolveReferences - This method is used by classes that refer to other
+/// variables which may not be defined at the time the expression is formed.
+/// If a value is set for the variable later, this method will be called on
+/// users of the value to allow the value to propagate out.
+///
+Init *VarInit::resolveReferences(Record &R, const RecordVal *RV) {
+  if (RecordVal *Val = R.getValue(VarName))
+    if (RV == Val || (RV == 0 && !dynamic_cast<UnsetInit*>(Val->getValue())))
+      return Val->getValue();
+  return this;
+}
+
+std::string VarBitInit::getAsString() const {
+   return TI->getAsString() + "{" + utostr(Bit) + "}";
+}
+
+Init *VarBitInit::resolveReferences(Record &R, const RecordVal *RV) {
+  if (Init *I = getVariable()->resolveBitReference(R, RV, getBitNum()))
+    return I;
+  return this;
+}
+
+std::string VarListElementInit::getAsString() const {
+  return TI->getAsString() + "[" + utostr(Element) + "]";
+}
+
+Init *VarListElementInit::resolveReferences(Record &R, const RecordVal *RV) {
+  if (Init *I = getVariable()->resolveListElementReference(R, RV,
+                                                           getElementNum()))
+    return I;
+  return this;
+}
+
+Init *VarListElementInit::resolveBitReference(Record &R, const RecordVal *RV,
+                                              unsigned Bit) {
+  // FIXME: This should be implemented, to support references like:
+  // bit B = AA[0]{1};
+  return 0;
+}
+
+Init *VarListElementInit::
+resolveListElementReference(Record &R, const RecordVal *RV, unsigned Elt) {
+  // FIXME: This should be implemented, to support references like:
+  // int B = AA[0][1];
+  return 0;
+}
+
+RecTy *DefInit::getFieldType(const std::string &FieldName) const {
+  if (const RecordVal *RV = Def->getValue(FieldName))
+    return RV->getType();
+  return 0;
+}
+
+Init *DefInit::getFieldInit(Record &R, const RecordVal *RV,
+                            const std::string &FieldName) const {
+  return Def->getValue(FieldName)->getValue();
+}
+
+
+std::string DefInit::getAsString() const {
+  return Def->getName();
+}
+
+Init *FieldInit::resolveBitReference(Record &R, const RecordVal *RV,
+                                     unsigned Bit) {
+  if (Init *BitsVal = Rec->getFieldInit(R, RV, FieldName))
+    if (BitsInit *BI = dynamic_cast<BitsInit*>(BitsVal)) {
+      assert(Bit < BI->getNumBits() && "Bit reference out of range!");
+      Init *B = BI->getBit(Bit);
+
+      if (dynamic_cast<BitInit*>(B))  // If the bit is set.
+        return B;                     // Replace the VarBitInit with it.
+    }
+  return 0;
+}
+
+Init *FieldInit::resolveListElementReference(Record &R, const RecordVal *RV,
+                                             unsigned Elt) {
+  if (Init *ListVal = Rec->getFieldInit(R, RV, FieldName))
+    if (ListInit *LI = dynamic_cast<ListInit*>(ListVal)) {
+      if (Elt >= LI->getSize()) return 0;
+      Init *E = LI->getElement(Elt);
+
+      // If the element is set to some value, or if we are resolving a
+      // reference to a specific variable and that variable is explicitly
+      // unset, then replace the VarListElementInit with it.
+      if (RV || !dynamic_cast<UnsetInit*>(E))
+        return E;
+    }
+  return 0;
+}
+
+Init *FieldInit::resolveReferences(Record &R, const RecordVal *RV) {
+  Init *NewRec = RV ? Rec->resolveReferences(R, RV) : Rec;
+
+  Init *BitsVal = NewRec->getFieldInit(R, RV, FieldName);
+  if (BitsVal) {
+    Init *BVR = BitsVal->resolveReferences(R, RV);
+    return BVR->isComplete() ? BVR : this;
+  }
+
+  if (NewRec != Rec) {
+    return new FieldInit(NewRec, FieldName);
+  }
+  return this;
+}
+
+Init *DagInit::resolveReferences(Record &R, const RecordVal *RV) {
+  std::vector<Init*> NewArgs;
+  for (unsigned i = 0, e = Args.size(); i != e; ++i)
+    NewArgs.push_back(Args[i]->resolveReferences(R, RV));
+
+  Init *Op = Val->resolveReferences(R, RV);
+
+  if (Args != NewArgs || Op != Val)
+    return new DagInit(Op, ValName, NewArgs, ArgNames);
+
+  return this;
+}
+
+
+std::string DagInit::getAsString() const {
+  std::string Result = "(" + Val->getAsString();
+  if (!ValName.empty())
+    Result += ":" + ValName;
+  if (Args.size()) {
+    Result += " " + Args[0]->getAsString();
+    if (!ArgNames[0].empty()) Result += ":$" + ArgNames[0];
+    for (unsigned i = 1, e = Args.size(); i != e; ++i) {
+      Result += ", " + Args[i]->getAsString();
+      if (!ArgNames[i].empty()) Result += ":$" + ArgNames[i];
+    }
+  }
+  return Result + ")";
+}
+
+
+//===----------------------------------------------------------------------===//
+//    Other implementations
+//===----------------------------------------------------------------------===//
+
+RecordVal::RecordVal(const std::string &N, RecTy *T, unsigned P)
+  : Name(N), Ty(T), Prefix(P) {
+  Value = Ty->convertValue(new UnsetInit());
+  assert(Value && "Cannot create unset value for current type!");
+}
+
+void RecordVal::dump() const { errs() << *this; }
+
+void RecordVal::print(raw_ostream &OS, bool PrintSem) const {
+  if (getPrefix()) OS << "field ";
+  OS << *getType() << " " << getName();
+
+  if (getValue())
+    OS << " = " << *getValue();
+
+  if (PrintSem) OS << ";\n";
+}
+
+unsigned Record::LastID = 0;
+
+void Record::setName(const std::string &Name) {
+  if (TrackedRecords.getDef(getName()) == this) {
+    TrackedRecords.removeDef(getName());
+    this->Name = Name;
+    TrackedRecords.addDef(this);
+  } else {
+    TrackedRecords.removeClass(getName());
+    this->Name = Name;
+    TrackedRecords.addClass(this);
+  }
+}
+
+/// resolveReferencesTo - If anything in this record refers to RV, replace the
+/// reference to RV with the RHS of RV.  If RV is null, we resolve all possible
+/// references.
+void Record::resolveReferencesTo(const RecordVal *RV) {
+  for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+    if (Init *V = Values[i].getValue())
+      Values[i].setValue(V->resolveReferences(*this, RV));
+  }
+}
+
+void Record::dump() const { errs() << *this; }
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
+  OS << R.getName();
+
+  const std::vector<std::string> &TArgs = R.getTemplateArgs();
+  if (!TArgs.empty()) {
+    OS << "<";
+    for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
+      if (i) OS << ", ";
+      const RecordVal *RV = R.getValue(TArgs[i]);
+      assert(RV && "Template argument record not found??");
+      RV->print(OS, false);
+    }
+    OS << ">";
+  }
+
+  OS << " {";
+  const std::vector<Record*> &SC = R.getSuperClasses();
+  if (!SC.empty()) {
+    OS << "\t//";
+    for (unsigned i = 0, e = SC.size(); i != e; ++i)
+      OS << " " << SC[i]->getName();
+  }
+  OS << "\n";
+
+  const std::vector<RecordVal> &Vals = R.getValues();
+  for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+    if (Vals[i].getPrefix() && !R.isTemplateArg(Vals[i].getName()))
+      OS << Vals[i];
+  for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+    if (!Vals[i].getPrefix() && !R.isTemplateArg(Vals[i].getName()))
+      OS << Vals[i];
+
+  return OS << "}\n";
+}
+
+/// getValueInit - Return the initializer for a value with the specified name,
+/// or throw an exception if the field does not exist.
+///
+Init *Record::getValueInit(StringRef FieldName) const {
+  const RecordVal *R = getValue(FieldName);
+  if (R == 0 || R->getValue() == 0)
+    throw "Record `" + getName() + "' does not have a field named `" +
+      FieldName.str() + "'!\n";
+  return R->getValue();
+}
+
+
+/// getValueAsString - This method looks up the specified field and returns its
+/// value as a string, throwing an exception if the field does not exist or if
+/// the value is not a string.
+///
+std::string Record::getValueAsString(StringRef FieldName) const {
+  const RecordVal *R = getValue(FieldName);
+  if (R == 0 || R->getValue() == 0)
+    throw "Record `" + getName() + "' does not have a field named `" +
+          FieldName.str() + "'!\n";
+
+  if (const StringInit *SI = dynamic_cast<const StringInit*>(R->getValue()))
+    return SI->getValue();
+  throw "Record `" + getName() + "', field `" + FieldName.str() +
+        "' does not have a string initializer!";
+}
+
+/// getValueAsBitsInit - This method looks up the specified field and returns
+/// its value as a BitsInit, throwing an exception if the field does not exist
+/// or if the value is not the right type.
+///
+BitsInit *Record::getValueAsBitsInit(StringRef FieldName) const {
+  const RecordVal *R = getValue(FieldName);
+  if (R == 0 || R->getValue() == 0)
+    throw "Record `" + getName() + "' does not have a field named `" +
+          FieldName.str() + "'!\n";
+
+  if (BitsInit *BI = dynamic_cast<BitsInit*>(R->getValue()))
+    return BI;
+  throw "Record `" + getName() + "', field `" + FieldName.str() +
+        "' does not have a BitsInit initializer!";
+}
+
+/// getValueAsListInit - This method looks up the specified field and returns
+/// its value as a ListInit, throwing an exception if the field does not exist
+/// or if the value is not the right type.
+///
+ListInit *Record::getValueAsListInit(StringRef FieldName) const {
+  const RecordVal *R = getValue(FieldName);
+  if (R == 0 || R->getValue() == 0)
+    throw "Record `" + getName() + "' does not have a field named `" +
+          FieldName.str() + "'!\n";
+
+  if (ListInit *LI = dynamic_cast<ListInit*>(R->getValue()))
+    return LI;
+  throw "Record `" + getName() + "', field `" + FieldName.str() +
+        "' does not have a list initializer!";
+}
+
+/// getValueAsListOfDefs - This method looks up the specified field and returns
+/// its value as a vector of records, throwing an exception if the field does
+/// not exist or if the value is not the right type.
+///
+std::vector<Record*>
+Record::getValueAsListOfDefs(StringRef FieldName) const {
+  ListInit *List = getValueAsListInit(FieldName);
+  std::vector<Record*> Defs;
+  for (unsigned i = 0; i < List->getSize(); i++) {
+    if (DefInit *DI = dynamic_cast<DefInit*>(List->getElement(i))) {
+      Defs.push_back(DI->getDef());
+    } else {
+      throw "Record `" + getName() + "', field `" + FieldName.str() +
+            "' list is not entirely DefInit!";
+    }
+  }
+  return Defs;
+}
+
+/// getValueAsInt - This method looks up the specified field and returns its
+/// value as an int64_t, throwing an exception if the field does not exist or if
+/// the value is not the right type.
+///
+int64_t Record::getValueAsInt(StringRef FieldName) const {
+  const RecordVal *R = getValue(FieldName);
+  if (R == 0 || R->getValue() == 0)
+    throw "Record `" + getName() + "' does not have a field named `" +
+          FieldName.str() + "'!\n";
+
+  if (IntInit *II = dynamic_cast<IntInit*>(R->getValue()))
+    return II->getValue();
+  throw "Record `" + getName() + "', field `" + FieldName.str() +
+        "' does not have an int initializer!";
+}
+
+/// getValueAsListOfInts - This method looks up the specified field and returns
+/// its value as a vector of integers, throwing an exception if the field does
+/// not exist or if the value is not the right type.
+///
+std::vector<int64_t>
+Record::getValueAsListOfInts(StringRef FieldName) const {
+  ListInit *List = getValueAsListInit(FieldName);
+  std::vector<int64_t> Ints;
+  for (unsigned i = 0; i < List->getSize(); i++) {
+    if (IntInit *II = dynamic_cast<IntInit*>(List->getElement(i))) {
+      Ints.push_back(II->getValue());
+    } else {
+      throw "Record `" + getName() + "', field `" + FieldName.str() +
+            "' does not have a list of ints initializer!";
+    }
+  }
+  return Ints;
+}
+
+/// getValueAsDef - This method looks up the specified field and returns its
+/// value as a Record, throwing an exception if the field does not exist or if
+/// the value is not the right type.
+///
+Record *Record::getValueAsDef(StringRef FieldName) const {
+  const RecordVal *R = getValue(FieldName);
+  if (R == 0 || R->getValue() == 0)
+    throw "Record `" + getName() + "' does not have a field named `" +
+      FieldName.str() + "'!\n";
+
+  if (DefInit *DI = dynamic_cast<DefInit*>(R->getValue()))
+    return DI->getDef();
+  throw "Record `" + getName() + "', field `" + FieldName.str() +
+        "' does not have a def initializer!";
+}
+
+/// getValueAsBit - This method looks up the specified field and returns its
+/// value as a bit, throwing an exception if the field does not exist or if
+/// the value is not the right type.
+///
+bool Record::getValueAsBit(StringRef FieldName) const {
+  const RecordVal *R = getValue(FieldName);
+  if (R == 0 || R->getValue() == 0)
+    throw "Record `" + getName() + "' does not have a field named `" +
+      FieldName.str() + "'!\n";
+
+  if (BitInit *BI = dynamic_cast<BitInit*>(R->getValue()))
+    return BI->getValue();
+  throw "Record `" + getName() + "', field `" + FieldName.str() +
+        "' does not have a bit initializer!";
+}
+
+/// getValueAsDag - This method looks up the specified field and returns its
+/// value as an Dag, throwing an exception if the field does not exist or if
+/// the value is not the right type.
+///
+DagInit *Record::getValueAsDag(StringRef FieldName) const {
+  const RecordVal *R = getValue(FieldName);
+  if (R == 0 || R->getValue() == 0)
+    throw "Record `" + getName() + "' does not have a field named `" +
+      FieldName.str() + "'!\n";
+
+  if (DagInit *DI = dynamic_cast<DagInit*>(R->getValue()))
+    return DI;
+  throw "Record `" + getName() + "', field `" + FieldName.str() +
+        "' does not have a dag initializer!";
+}
+
+std::string Record::getValueAsCode(StringRef FieldName) const {
+  const RecordVal *R = getValue(FieldName);
+  if (R == 0 || R->getValue() == 0)
+    throw "Record `" + getName() + "' does not have a field named `" +
+      FieldName.str() + "'!\n";
+
+  if (const CodeInit *CI = dynamic_cast<const CodeInit*>(R->getValue()))
+    return CI->getValue();
+  throw "Record `" + getName() + "', field `" + FieldName.str() +
+    "' does not have a code initializer!";
+}
+
+
+void MultiClass::dump() const {
+  errs() << "Record:\n";
+  Rec.dump();
+
+  errs() << "Defs:\n";
+  for (RecordVector::const_iterator r = DefPrototypes.begin(),
+         rend = DefPrototypes.end();
+       r != rend;
+       ++r) {
+    (*r)->dump();
+  }
+}
+
+
+void RecordKeeper::dump() const { errs() << *this; }
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const RecordKeeper &RK) {
+  OS << "------------- Classes -----------------\n";
+  const std::map<std::string, Record*> &Classes = RK.getClasses();
+  for (std::map<std::string, Record*>::const_iterator I = Classes.begin(),
+         E = Classes.end(); I != E; ++I)
+    OS << "class " << *I->second;
+
+  OS << "------------- Defs -----------------\n";
+  const std::map<std::string, Record*> &Defs = RK.getDefs();
+  for (std::map<std::string, Record*>::const_iterator I = Defs.begin(),
+         E = Defs.end(); I != E; ++I)
+    OS << "def " << *I->second;
+  return OS;
+}
+
+
+/// getAllDerivedDefinitions - This method returns all concrete definitions
+/// that derive from the specified class name.  If a class with the specified
+/// name does not exist, an error is printed and true is returned.
+std::vector<Record*>
+RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const {
+  Record *Class = getClass(ClassName);
+  if (!Class)
+    throw "ERROR: Couldn't find the `" + ClassName + "' class!\n";
+
+  std::vector<Record*> Defs;
+  for (std::map<std::string, Record*>::const_iterator I = getDefs().begin(),
+         E = getDefs().end(); I != E; ++I)
+    if (I->second->isSubClassOf(Class))
+      Defs.push_back(I->second);
+
+  return Defs;
+}
+
diff --git a/final/utils/TableGen/Record.h b/final/utils/TableGen/Record.h
new file mode 100644
index 00000000000..f3a5df23ec5
--- /dev/null
+++ b/final/utils/TableGen/Record.h
@@ -0,0 +1,1507 @@
+//===- Record.h - Classes to represent Table Records ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the main TableGen data structures, including the TableGen
+// types, values, and high-level data structures.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef RECORD_H
+#define RECORD_H
+
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+namespace llvm {
+class raw_ostream;
+
+// RecTy subclasses.
+class BitRecTy;
+class BitsRecTy;
+class IntRecTy;
+class StringRecTy;
+class ListRecTy;
+class CodeRecTy;
+class DagRecTy;
+class RecordRecTy;
+
+// Init subclasses.
+struct Init;
+class UnsetInit;
+class BitInit;
+class BitsInit;
+class IntInit;
+class StringInit;
+class CodeInit;
+class ListInit;
+class UnOpInit;
+class BinOpInit;
+class TernOpInit;
+class DefInit;
+class DagInit;
+class TypedInit;
+class VarInit;
+class FieldInit;
+class VarBitInit;
+class VarListElementInit;
+
+// Other classes.
+class Record;
+class RecordVal;
+struct MultiClass;
+class RecordKeeper;
+
+//===----------------------------------------------------------------------===//
+//  Type Classes
+//===----------------------------------------------------------------------===//
+
+struct RecTy {
+  virtual ~RecTy() {}
+
+  virtual std::string getAsString() const = 0;
+  void print(raw_ostream &OS) const { OS << getAsString(); }
+  void dump() const;
+
+  /// typeIsConvertibleTo - Return true if all values of 'this' type can be
+  /// converted to the specified type.
+  virtual bool typeIsConvertibleTo(const RecTy *RHS) const = 0;
+
+public:   // These methods should only be called from subclasses of Init
+  virtual Init *convertValue( UnsetInit *UI) { return 0; }
+  virtual Init *convertValue(   BitInit *BI) { return 0; }
+  virtual Init *convertValue(  BitsInit *BI) { return 0; }
+  virtual Init *convertValue(   IntInit *II) { return 0; }
+  virtual Init *convertValue(StringInit *SI) { return 0; }
+  virtual Init *convertValue(  ListInit *LI) { return 0; }
+  virtual Init *convertValue( UnOpInit *UI) {
+    return convertValue((TypedInit*)UI);
+  }
+  virtual Init *convertValue( BinOpInit *UI) {
+    return convertValue((TypedInit*)UI);
+  }
+  virtual Init *convertValue( TernOpInit *UI) {
+    return convertValue((TypedInit*)UI);
+  }
+  virtual Init *convertValue(  CodeInit *CI) { return 0; }
+  virtual Init *convertValue(VarBitInit *VB) { return 0; }
+  virtual Init *convertValue(   DefInit *DI) { return 0; }
+  virtual Init *convertValue(   DagInit *DI) { return 0; }
+  virtual Init *convertValue( TypedInit *TI) { return 0; }
+  virtual Init *convertValue(   VarInit *VI) {
+    return convertValue((TypedInit*)VI);
+  }
+  virtual Init *convertValue( FieldInit *FI) {
+    return convertValue((TypedInit*)FI);
+  }
+
+public:   // These methods should only be called by subclasses of RecTy.
+  // baseClassOf - These virtual methods should be overloaded to return true iff
+  // all values of type 'RHS' can be converted to the 'this' type.
+  virtual bool baseClassOf(const BitRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const BitsRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const IntRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
+  virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const RecTy &Ty) {
+  Ty.print(OS);
+  return OS;
+}
+
+
+/// BitRecTy - 'bit' - Represent a single bit
+///
+class BitRecTy : public RecTy {
+public:
+  virtual Init *convertValue( UnsetInit *UI) { return (Init*)UI; }
+  virtual Init *convertValue(   BitInit *BI) { return (Init*)BI; }
+  virtual Init *convertValue(  BitsInit *BI);
+  virtual Init *convertValue(   IntInit *II);
+  virtual Init *convertValue(StringInit *SI) { return 0; }
+  virtual Init *convertValue(  ListInit *LI) { return 0; }
+  virtual Init *convertValue(  CodeInit *CI) { return 0; }
+  virtual Init *convertValue(VarBitInit *VB) { return (Init*)VB; }
+  virtual Init *convertValue(   DefInit *DI) { return 0; }
+  virtual Init *convertValue(   DagInit *DI) { return 0; }
+  virtual Init *convertValue( UnOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( BinOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( TernOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( TypedInit *TI);
+  virtual Init *convertValue(   VarInit *VI) { return RecTy::convertValue(VI);}
+  virtual Init *convertValue( FieldInit *FI) { return RecTy::convertValue(FI);}
+
+  std::string getAsString() const { return "bit"; }
+
+  bool typeIsConvertibleTo(const RecTy *RHS) const {
+    return RHS->baseClassOf(this);
+  }
+  virtual bool baseClassOf(const BitRecTy    *RHS) const { return true; }
+  virtual bool baseClassOf(const BitsRecTy   *RHS) const;
+  virtual bool baseClassOf(const IntRecTy    *RHS) const { return true; }
+  virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
+  virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
+
+};
+
+
+// BitsRecTy - 'bits<n>' - Represent a fixed number of bits
+/// BitsRecTy - 'bits&lt;n&gt;' - Represent a fixed number of bits
+///
+class BitsRecTy : public RecTy {
+  unsigned Size;
+public:
+  explicit BitsRecTy(unsigned Sz) : Size(Sz) {}
+
+  unsigned getNumBits() const { return Size; }
+
+  virtual Init *convertValue( UnsetInit *UI);
+  virtual Init *convertValue(   BitInit *UI);
+  virtual Init *convertValue(  BitsInit *BI);
+  virtual Init *convertValue(   IntInit *II);
+  virtual Init *convertValue(StringInit *SI) { return 0; }
+  virtual Init *convertValue(  ListInit *LI) { return 0; }
+  virtual Init *convertValue(  CodeInit *CI) { return 0; }
+  virtual Init *convertValue(VarBitInit *VB) { return 0; }
+  virtual Init *convertValue(   DefInit *DI) { return 0; }
+  virtual Init *convertValue(   DagInit *DI) { return 0; }
+  virtual Init *convertValue( UnOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( BinOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( TernOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( TypedInit *TI);
+  virtual Init *convertValue(   VarInit *VI) { return RecTy::convertValue(VI);}
+  virtual Init *convertValue( FieldInit *FI) { return RecTy::convertValue(FI);}
+
+  std::string getAsString() const;
+
+  bool typeIsConvertibleTo(const RecTy *RHS) const {
+    return RHS->baseClassOf(this);
+  }
+  virtual bool baseClassOf(const BitRecTy    *RHS) const { return Size == 1; }
+  virtual bool baseClassOf(const BitsRecTy   *RHS) const {
+    return RHS->Size == Size;
+  }
+  virtual bool baseClassOf(const IntRecTy    *RHS) const { return true; }
+  virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
+  virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
+
+};
+
+
+/// IntRecTy - 'int' - Represent an integer value of no particular size
+///
+class IntRecTy : public RecTy {
+public:
+  virtual Init *convertValue( UnsetInit *UI) { return (Init*)UI; }
+  virtual Init *convertValue(   BitInit *BI);
+  virtual Init *convertValue(  BitsInit *BI);
+  virtual Init *convertValue(   IntInit *II) { return (Init*)II; }
+  virtual Init *convertValue(StringInit *SI) { return 0; }
+  virtual Init *convertValue(  ListInit *LI) { return 0; }
+  virtual Init *convertValue(  CodeInit *CI) { return 0; }
+  virtual Init *convertValue(VarBitInit *VB) { return 0; }
+  virtual Init *convertValue(   DefInit *DI) { return 0; }
+  virtual Init *convertValue(   DagInit *DI) { return 0; }
+  virtual Init *convertValue( UnOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( BinOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( TernOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( TypedInit *TI);
+  virtual Init *convertValue(   VarInit *VI) { return RecTy::convertValue(VI);}
+  virtual Init *convertValue( FieldInit *FI) { return RecTy::convertValue(FI);}
+
+  std::string getAsString() const { return "int"; }
+
+  bool typeIsConvertibleTo(const RecTy *RHS) const {
+    return RHS->baseClassOf(this);
+  }
+
+  virtual bool baseClassOf(const BitRecTy    *RHS) const { return true; }
+  virtual bool baseClassOf(const BitsRecTy   *RHS) const { return true; }
+  virtual bool baseClassOf(const IntRecTy    *RHS) const { return true; }
+  virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
+  virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
+
+};
+
+/// StringRecTy - 'string' - Represent an string value
+///
+class StringRecTy : public RecTy {
+public:
+  virtual Init *convertValue( UnsetInit *UI) { return (Init*)UI; }
+  virtual Init *convertValue(   BitInit *BI) { return 0; }
+  virtual Init *convertValue(  BitsInit *BI) { return 0; }
+  virtual Init *convertValue(   IntInit *II) { return 0; }
+  virtual Init *convertValue(StringInit *SI) { return (Init*)SI; }
+  virtual Init *convertValue(  ListInit *LI) { return 0; }
+  virtual Init *convertValue( UnOpInit *BO);
+  virtual Init *convertValue( BinOpInit *BO);
+  virtual Init *convertValue( TernOpInit *BO) { return RecTy::convertValue(BO);}
+
+  virtual Init *convertValue(  CodeInit *CI) { return 0; }
+  virtual Init *convertValue(VarBitInit *VB) { return 0; }
+  virtual Init *convertValue(   DefInit *DI) { return 0; }
+  virtual Init *convertValue(   DagInit *DI) { return 0; }
+  virtual Init *convertValue( TypedInit *TI);
+  virtual Init *convertValue(   VarInit *VI) { return RecTy::convertValue(VI);}
+  virtual Init *convertValue( FieldInit *FI) { return RecTy::convertValue(FI);}
+
+  std::string getAsString() const { return "string"; }
+
+  bool typeIsConvertibleTo(const RecTy *RHS) const {
+    return RHS->baseClassOf(this);
+  }
+
+  virtual bool baseClassOf(const BitRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const BitsRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const IntRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const StringRecTy *RHS) const { return true; }
+  virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
+};
+
+// ListRecTy - 'list<Ty>' - Represent a list of values, all of which must be of
+// the specified type.
+/// ListRecTy - 'list&lt;Ty&gt;' - Represent a list of values, all of which must
+/// be of the specified type.
+///
+class ListRecTy : public RecTy {
+  RecTy *Ty;
+public:
+  explicit ListRecTy(RecTy *T) : Ty(T) {}
+
+  RecTy *getElementType() const { return Ty; }
+
+  virtual Init *convertValue( UnsetInit *UI) { return (Init*)UI; }
+  virtual Init *convertValue(   BitInit *BI) { return 0; }
+  virtual Init *convertValue(  BitsInit *BI) { return 0; }
+  virtual Init *convertValue(   IntInit *II) { return 0; }
+  virtual Init *convertValue(StringInit *SI) { return 0; }
+  virtual Init *convertValue(  ListInit *LI);
+  virtual Init *convertValue(  CodeInit *CI) { return 0; }
+  virtual Init *convertValue(VarBitInit *VB) { return 0; }
+  virtual Init *convertValue(   DefInit *DI) { return 0; }
+  virtual Init *convertValue(   DagInit *DI) { return 0; }
+  virtual Init *convertValue( UnOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( BinOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( TernOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( TypedInit *TI);
+  virtual Init *convertValue(   VarInit *VI) { return RecTy::convertValue(VI);}
+  virtual Init *convertValue( FieldInit *FI) { return RecTy::convertValue(FI);}
+
+  std::string getAsString() const;
+
+  bool typeIsConvertibleTo(const RecTy *RHS) const {
+    return RHS->baseClassOf(this);
+  }
+
+  virtual bool baseClassOf(const BitRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const BitsRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const IntRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
+  virtual bool baseClassOf(const ListRecTy   *RHS) const {
+    return RHS->getElementType()->typeIsConvertibleTo(Ty);
+  }
+  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
+};
+
+/// CodeRecTy - 'code' - Represent an code fragment, function or method.
+///
+class CodeRecTy : public RecTy {
+public:
+  virtual Init *convertValue( UnsetInit *UI) { return (Init*)UI; }
+  virtual Init *convertValue(   BitInit *BI) { return 0; }
+  virtual Init *convertValue(  BitsInit *BI) { return 0; }
+  virtual Init *convertValue(   IntInit *II) { return 0; }
+  virtual Init *convertValue(StringInit *SI) { return 0; }
+  virtual Init *convertValue(  ListInit *LI) { return 0; }
+  virtual Init *convertValue(  CodeInit *CI) { return (Init*)CI; }
+  virtual Init *convertValue(VarBitInit *VB) { return 0; }
+  virtual Init *convertValue(   DefInit *DI) { return 0; }
+  virtual Init *convertValue(   DagInit *DI) { return 0; }
+  virtual Init *convertValue( UnOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( BinOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( TernOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( TypedInit *TI);
+  virtual Init *convertValue(   VarInit *VI) { return RecTy::convertValue(VI);}
+  virtual Init *convertValue( FieldInit *FI) { return RecTy::convertValue(FI);}
+
+  std::string getAsString() const { return "code"; }
+
+  bool typeIsConvertibleTo(const RecTy *RHS) const {
+    return RHS->baseClassOf(this);
+  }
+  virtual bool baseClassOf(const BitRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const BitsRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const IntRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
+  virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return true; }
+  virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
+};
+
+/// DagRecTy - 'dag' - Represent a dag fragment
+///
+class DagRecTy : public RecTy {
+public:
+  virtual Init *convertValue( UnsetInit *UI) { return (Init*)UI; }
+  virtual Init *convertValue(   BitInit *BI) { return 0; }
+  virtual Init *convertValue(  BitsInit *BI) { return 0; }
+  virtual Init *convertValue(   IntInit *II) { return 0; }
+  virtual Init *convertValue(StringInit *SI) { return 0; }
+  virtual Init *convertValue(  ListInit *LI) { return 0; }
+  virtual Init *convertValue(  CodeInit *CI) { return 0; }
+  virtual Init *convertValue(VarBitInit *VB) { return 0; }
+  virtual Init *convertValue(   DefInit *DI) { return 0; }
+  virtual Init *convertValue( UnOpInit *BO);
+  virtual Init *convertValue( BinOpInit *BO);
+  virtual Init *convertValue( TernOpInit *BO) { return RecTy::convertValue(BO);}
+  virtual Init *convertValue(   DagInit *CI) { return (Init*)CI; }
+  virtual Init *convertValue( TypedInit *TI);
+  virtual Init *convertValue(   VarInit *VI) { return RecTy::convertValue(VI);}
+  virtual Init *convertValue( FieldInit *FI) { return RecTy::convertValue(FI);}
+
+  std::string getAsString() const { return "dag"; }
+
+  bool typeIsConvertibleTo(const RecTy *RHS) const {
+    return RHS->baseClassOf(this);
+  }
+
+  virtual bool baseClassOf(const BitRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const BitsRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const IntRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
+  virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const DagRecTy    *RHS) const { return true; }
+  virtual bool baseClassOf(const RecordRecTy *RHS) const { return false; }
+};
+
+
+/// RecordRecTy - '[classname]' - Represent an instance of a class, such as:
+/// (R32 X = EAX).
+///
+class RecordRecTy : public RecTy {
+  Record *Rec;
+public:
+  explicit RecordRecTy(Record *R) : Rec(R) {}
+
+  Record *getRecord() const { return Rec; }
+
+  virtual Init *convertValue( UnsetInit *UI) { return (Init*)UI; }
+  virtual Init *convertValue(   BitInit *BI) { return 0; }
+  virtual Init *convertValue(  BitsInit *BI) { return 0; }
+  virtual Init *convertValue(   IntInit *II) { return 0; }
+  virtual Init *convertValue(StringInit *SI) { return 0; }
+  virtual Init *convertValue(  ListInit *LI) { return 0; }
+  virtual Init *convertValue(  CodeInit *CI) { return 0; }
+  virtual Init *convertValue(VarBitInit *VB) { return 0; }
+  virtual Init *convertValue( UnOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( BinOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue( TernOpInit *UI) { return RecTy::convertValue(UI);}
+  virtual Init *convertValue(   DefInit *DI);
+  virtual Init *convertValue(   DagInit *DI) { return 0; }
+  virtual Init *convertValue( TypedInit *VI);
+  virtual Init *convertValue(   VarInit *VI) { return RecTy::convertValue(VI);}
+  virtual Init *convertValue( FieldInit *FI) { return RecTy::convertValue(FI);}
+
+  std::string getAsString() const;
+
+  bool typeIsConvertibleTo(const RecTy *RHS) const {
+    return RHS->baseClassOf(this);
+  }
+  virtual bool baseClassOf(const BitRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const BitsRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const IntRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const StringRecTy *RHS) const { return false; }
+  virtual bool baseClassOf(const ListRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const CodeRecTy   *RHS) const { return false; }
+  virtual bool baseClassOf(const DagRecTy    *RHS) const { return false; }
+  virtual bool baseClassOf(const RecordRecTy *RHS) const;
+};
+
+/// resolveTypes - Find a common type that T1 and T2 convert to.
+/// Return 0 if no such type exists.
+///
+RecTy *resolveTypes(RecTy *T1, RecTy *T2);
+
+//===----------------------------------------------------------------------===//
+//  Initializer Classes
+//===----------------------------------------------------------------------===//
+
+struct Init {
+  virtual ~Init() {}
+
+  /// isComplete - This virtual method should be overridden by values that may
+  /// not be completely specified yet.
+  virtual bool isComplete() const { return true; }
+
+  /// print - Print out this value.
+  void print(raw_ostream &OS) const { OS << getAsString(); }
+
+  /// getAsString - Convert this value to a string form.
+  virtual std::string getAsString() const = 0;
+
+  /// dump - Debugging method that may be called through a debugger, just
+  /// invokes print on stderr.
+  void dump() const;
+
+  /// convertInitializerTo - This virtual function is a simple call-back
+  /// function that should be overridden to call the appropriate
+  /// RecTy::convertValue method.
+  ///
+  virtual Init *convertInitializerTo(RecTy *Ty) = 0;
+
+  /// convertInitializerBitRange - This method is used to implement the bitrange
+  /// selection operator.  Given an initializer, it selects the specified bits
+  /// out, returning them as a new init of bits type.  If it is not legal to use
+  /// the bit subscript operator on this initializer, return null.
+  ///
+  virtual Init *convertInitializerBitRange(const std::vector<unsigned> &Bits) {
+    return 0;
+  }
+
+  /// convertInitListSlice - This method is used to implement the list slice
+  /// selection operator.  Given an initializer, it selects the specified list
+  /// elements, returning them as a new init of list type.  If it is not legal
+  /// to take a slice of this, return null.
+  ///
+  virtual Init *convertInitListSlice(const std::vector<unsigned> &Elements) {
+    return 0;
+  }
+
+  /// getFieldType - This method is used to implement the FieldInit class.
+  /// Implementors of this method should return the type of the named field if
+  /// they are of record type.
+  ///
+  virtual RecTy *getFieldType(const std::string &FieldName) const { return 0; }
+
+  /// getFieldInit - This method complements getFieldType to return the
+  /// initializer for the specified field.  If getFieldType returns non-null
+  /// this method should return non-null, otherwise it returns null.
+  ///
+  virtual Init *getFieldInit(Record &R, const RecordVal *RV,
+                             const std::string &FieldName) const {
+    return 0;
+  }
+
+  /// resolveReferences - This method is used by classes that refer to other
+  /// variables which may not be defined at the time the expression is formed.
+  /// If a value is set for the variable later, this method will be called on
+  /// users of the value to allow the value to propagate out.
+  ///
+  virtual Init *resolveReferences(Record &R, const RecordVal *RV) {
+    return this;
+  }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const Init &I) {
+  I.print(OS); return OS;
+}
+
+/// TypedInit - This is the common super-class of types that have a specific,
+/// explicit, type.
+///
+class TypedInit : public Init {
+  RecTy *Ty;
+public:
+  explicit TypedInit(RecTy *T) : Ty(T) {}
+
+  RecTy *getType() const { return Ty; }
+
+  virtual Init *convertInitializerBitRange(const std::vector<unsigned> &Bits);
+  virtual Init *convertInitListSlice(const std::vector<unsigned> &Elements);
+
+  /// getFieldType - This method is used to implement the FieldInit class.
+  /// Implementors of this method should return the type of the named field if
+  /// they are of record type.
+  ///
+  virtual RecTy *getFieldType(const std::string &FieldName) const;
+
+  /// resolveBitReference - This method is used to implement
+  /// VarBitInit::resolveReferences.  If the bit is able to be resolved, we
+  /// simply return the resolved value, otherwise we return null.
+  ///
+  virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
+                                    unsigned Bit) = 0;
+
+  /// resolveListElementReference - This method is used to implement
+  /// VarListElementInit::resolveReferences.  If the list element is resolvable
+  /// now, we return the resolved value, otherwise we return null.
+  virtual Init *resolveListElementReference(Record &R, const RecordVal *RV,
+                                            unsigned Elt) = 0;
+};
+
+
+/// UnsetInit - ? - Represents an uninitialized value
+///
+class UnsetInit : public Init {
+public:
+  virtual Init *convertInitializerTo(RecTy *Ty) {
+    return Ty->convertValue(this);
+  }
+
+  virtual bool isComplete() const { return false; }
+  virtual std::string getAsString() const { return "?"; }
+};
+
+
+/// BitInit - true/false - Represent a concrete initializer for a bit.
+///
+class BitInit : public Init {
+  bool Value;
+public:
+  explicit BitInit(bool V) : Value(V) {}
+
+  bool getValue() const { return Value; }
+
+  virtual Init *convertInitializerTo(RecTy *Ty) {
+    return Ty->convertValue(this);
+  }
+
+  virtual std::string getAsString() const { return Value ? "1" : "0"; }
+};
+
+/// BitsInit - { a, b, c } - Represents an initializer for a BitsRecTy value.
+/// It contains a vector of bits, whose size is determined by the type.
+///
+class BitsInit : public Init {
+  std::vector<Init*> Bits;
+public:
+  explicit BitsInit(unsigned Size) : Bits(Size) {}
+
+  unsigned getNumBits() const { return Bits.size(); }
+
+  Init *getBit(unsigned Bit) const {
+    assert(Bit < Bits.size() && "Bit index out of range!");
+    return Bits[Bit];
+  }
+  void setBit(unsigned Bit, Init *V) {
+    assert(Bit < Bits.size() && "Bit index out of range!");
+    assert(Bits[Bit] == 0 && "Bit already set!");
+    Bits[Bit] = V;
+  }
+
+  virtual Init *convertInitializerTo(RecTy *Ty) {
+    return Ty->convertValue(this);
+  }
+  virtual Init *convertInitializerBitRange(const std::vector<unsigned> &Bits);
+
+  virtual bool isComplete() const {
+    for (unsigned i = 0; i != getNumBits(); ++i)
+      if (!getBit(i)->isComplete()) return false;
+    return true;
+  }
+  bool allInComplete() const {
+    for (unsigned i = 0; i != getNumBits(); ++i)
+      if (getBit(i)->isComplete()) return false;
+    return true;
+  }
+  virtual std::string getAsString() const;
+
+  virtual Init *resolveReferences(Record &R, const RecordVal *RV);
+};
+
+
+/// IntInit - 7 - Represent an initalization by a literal integer value.
+///
+class IntInit : public TypedInit {
+  int64_t Value;
+public:
+  explicit IntInit(int64_t V) : TypedInit(new IntRecTy), Value(V) {}
+
+  int64_t getValue() const { return Value; }
+
+  virtual Init *convertInitializerTo(RecTy *Ty) {
+    return Ty->convertValue(this);
+  }
+  virtual Init *convertInitializerBitRange(const std::vector<unsigned> &Bits);
+
+  virtual std::string getAsString() const;
+
+  /// resolveBitReference - This method is used to implement
+  /// VarBitInit::resolveReferences.  If the bit is able to be resolved, we
+  /// simply return the resolved value, otherwise we return null.
+  ///
+  virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
+                                    unsigned Bit) {
+    assert(0 && "Illegal bit reference off int");
+    return 0;
+  }
+
+  /// resolveListElementReference - This method is used to implement
+  /// VarListElementInit::resolveReferences.  If the list element is resolvable
+  /// now, we return the resolved value, otherwise we return null.
+  virtual Init *resolveListElementReference(Record &R, const RecordVal *RV,
+                                            unsigned Elt) {
+    assert(0 && "Illegal element reference off int");
+    return 0;
+  }
+};
+
+
+/// StringInit - "foo" - Represent an initialization by a string value.
+///
+class StringInit : public TypedInit {
+  std::string Value;
+public:
+  explicit StringInit(const std::string &V)
+    : TypedInit(new StringRecTy), Value(V) {}
+
+  const std::string &getValue() const { return Value; }
+
+  virtual Init *convertInitializerTo(RecTy *Ty) {
+    return Ty->convertValue(this);
+  }
+
+  virtual std::string getAsString() const { return "\"" + Value + "\""; }
+
+  /// resolveBitReference - This method is used to implement
+  /// VarBitInit::resolveReferences.  If the bit is able to be resolved, we
+  /// simply return the resolved value, otherwise we return null.
+  ///
+  virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
+                                    unsigned Bit) {
+    assert(0 && "Illegal bit reference off string");
+    return 0;
+  }
+
+  /// resolveListElementReference - This method is used to implement
+  /// VarListElementInit::resolveReferences.  If the list element is resolvable
+  /// now, we return the resolved value, otherwise we return null.
+  virtual Init *resolveListElementReference(Record &R, const RecordVal *RV,
+                                            unsigned Elt) {
+    assert(0 && "Illegal element reference off string");
+    return 0;
+  }
+};
+
+/// CodeInit - "[{...}]" - Represent a code fragment.
+///
+class CodeInit : public Init {
+  std::string Value;
+public:
+  explicit CodeInit(const std::string &V) : Value(V) {}
+
+  const std::string getValue() const { return Value; }
+
+  virtual Init *convertInitializerTo(RecTy *Ty) {
+    return Ty->convertValue(this);
+  }
+
+  virtual std::string getAsString() const { return "[{" + Value + "}]"; }
+};
+
+/// ListInit - [AL, AH, CL] - Represent a list of defs
+///
+class ListInit : public TypedInit {
+  std::vector<Init*> Values;
+public:
+  typedef std::vector<Init*>::iterator       iterator;
+  typedef std::vector<Init*>::const_iterator const_iterator;
+
+  explicit ListInit(std::vector<Init*> &Vs, RecTy *EltTy)
+    : TypedInit(new ListRecTy(EltTy)) {
+    Values.swap(Vs);
+  }
+  explicit ListInit(iterator Start, iterator End, RecTy *EltTy)
+      : TypedInit(new ListRecTy(EltTy)), Values(Start, End) {}
+
+  unsigned getSize() const { return Values.size(); }
+  Init *getElement(unsigned i) const {
+    assert(i < Values.size() && "List element index out of range!");
+    return Values[i];
+  }
+
+  Record *getElementAsRecord(unsigned i) const;
+
+  Init *convertInitListSlice(const std::vector<unsigned> &Elements);
+
+  virtual Init *convertInitializerTo(RecTy *Ty) {
+    return Ty->convertValue(this);
+  }
+
+  /// resolveReferences - This method is used by classes that refer to other
+  /// variables which may not be defined at the time they expression is formed.
+  /// If a value is set for the variable later, this method will be called on
+  /// users of the value to allow the value to propagate out.
+  ///
+  virtual Init *resolveReferences(Record &R, const RecordVal *RV);
+
+  virtual std::string getAsString() const;
+
+  inline iterator       begin()       { return Values.begin(); }
+  inline const_iterator begin() const { return Values.begin(); }
+  inline iterator       end  ()       { return Values.end();   }
+  inline const_iterator end  () const { return Values.end();   }
+
+  inline size_t         size () const { return Values.size();  }
+  inline bool           empty() const { return Values.empty(); }
+
+  /// resolveBitReference - This method is used to implement
+  /// VarBitInit::resolveReferences.  If the bit is able to be resolved, we
+  /// simply return the resolved value, otherwise we return null.
+  ///
+  virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
+                                    unsigned Bit) {
+    assert(0 && "Illegal bit reference off list");
+    return 0;
+  }
+
+  /// resolveListElementReference - This method is used to implement
+  /// VarListElementInit::resolveReferences.  If the list element is resolvable
+  /// now, we return the resolved value, otherwise we return null.
+  virtual Init *resolveListElementReference(Record &R, const RecordVal *RV,
+                                            unsigned Elt);
+};
+
+
+/// OpInit - Base class for operators
+///
+class OpInit : public TypedInit {
+public:
+  OpInit(RecTy *Type) : TypedInit(Type) {}
+
+  // Clone - Clone this operator, replacing arguments with the new list
+  virtual OpInit *clone(std::vector<Init *> &Operands) = 0;
+
+  virtual int getNumOperands() const = 0;
+  virtual Init *getOperand(int i) = 0;
+
+  // Fold - If possible, fold this to a simpler init.  Return this if not
+  // possible to fold.
+  virtual Init *Fold(Record *CurRec, MultiClass *CurMultiClass) = 0;
+
+  virtual Init *convertInitializerTo(RecTy *Ty) {
+    return Ty->convertValue(this);
+  }
+
+  virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
+                                    unsigned Bit);
+  virtual Init *resolveListElementReference(Record &R, const RecordVal *RV,
+                                            unsigned Elt);
+};
+
+
+/// UnOpInit - !op (X) - Transform an init.
+///
+class UnOpInit : public OpInit {
+public:
+  enum UnaryOp { CAST, HEAD, TAIL, EMPTY };
+private:
+  UnaryOp Opc;
+  Init *LHS;
+public:
+  UnOpInit(UnaryOp opc, Init *lhs, RecTy *Type) :
+      OpInit(Type), Opc(opc), LHS(lhs) {
+  }
+
+  // Clone - Clone this operator, replacing arguments with the new list
+  virtual OpInit *clone(std::vector<Init *> &Operands) {
+    assert(Operands.size() == 1 &&
+           "Wrong number of operands for unary operation");
+    return new UnOpInit(getOpcode(), *Operands.begin(), getType());
+  }
+
+  int getNumOperands() const { return 1; }
+  Init *getOperand(int i) {
+    assert(i == 0 && "Invalid operand id for unary operator");
+    return getOperand();
+  }
+
+  UnaryOp getOpcode() const { return Opc; }
+  Init *getOperand() const { return LHS; }
+
+  // Fold - If possible, fold this to a simpler init.  Return this if not
+  // possible to fold.
+  Init *Fold(Record *CurRec, MultiClass *CurMultiClass);
+
+  virtual Init *resolveReferences(Record &R, const RecordVal *RV);
+
+  virtual std::string getAsString() const;
+};
+
+/// BinOpInit - !op (X, Y) - Combine two inits.
+///
+class BinOpInit : public OpInit {
+public:
+  enum BinaryOp { SHL, SRA, SRL, STRCONCAT, CONCAT, EQ };
+private:
+  BinaryOp Opc;
+  Init *LHS, *RHS;
+public:
+  BinOpInit(BinaryOp opc, Init *lhs, Init *rhs, RecTy *Type) :
+      OpInit(Type), Opc(opc), LHS(lhs), RHS(rhs) {
+  }
+
+  // Clone - Clone this operator, replacing arguments with the new list
+  virtual OpInit *clone(std::vector<Init *> &Operands) {
+    assert(Operands.size() == 2 &&
+           "Wrong number of operands for binary operation");
+    return new BinOpInit(getOpcode(), Operands[0], Operands[1], getType());
+  }
+
+  int getNumOperands() const { return 2; }
+  Init *getOperand(int i) {
+    assert((i == 0 || i == 1) && "Invalid operand id for binary operator");
+    if (i == 0) {
+      return getLHS();
+    } else {
+      return getRHS();
+    }
+  }
+
+  BinaryOp getOpcode() const { return Opc; }
+  Init *getLHS() const { return LHS; }
+  Init *getRHS() const { return RHS; }
+
+  // Fold - If possible, fold this to a simpler init.  Return this if not
+  // possible to fold.
+  Init *Fold(Record *CurRec, MultiClass *CurMultiClass);
+
+  virtual Init *resolveReferences(Record &R, const RecordVal *RV);
+
+  virtual std::string getAsString() const;
+};
+
+/// TernOpInit - !op (X, Y, Z) - Combine two inits.
+///
+class TernOpInit : public OpInit {
+public:
+  enum TernaryOp { SUBST, FOREACH, IF };
+private:
+  TernaryOp Opc;
+  Init *LHS, *MHS, *RHS;
+public:
+  TernOpInit(TernaryOp opc, Init *lhs, Init *mhs, Init *rhs, RecTy *Type) :
+      OpInit(Type), Opc(opc), LHS(lhs), MHS(mhs), RHS(rhs) {
+  }
+
+  // Clone - Clone this operator, replacing arguments with the new list
+  virtual OpInit *clone(std::vector<Init *> &Operands) {
+    assert(Operands.size() == 3 &&
+           "Wrong number of operands for ternary operation");
+    return new TernOpInit(getOpcode(), Operands[0], Operands[1], Operands[2],
+                          getType());
+  }
+
+  int getNumOperands() const { return 3; }
+  Init *getOperand(int i) {
+    assert((i == 0 || i == 1 || i == 2) &&
+           "Invalid operand id for ternary operator");
+    if (i == 0) {
+      return getLHS();
+    } else if (i == 1) {
+      return getMHS();
+    } else {
+      return getRHS();
+    }
+  }
+
+  TernaryOp getOpcode() const { return Opc; }
+  Init *getLHS() const { return LHS; }
+  Init *getMHS() const { return MHS; }
+  Init *getRHS() const { return RHS; }
+
+  // Fold - If possible, fold this to a simpler init.  Return this if not
+  // possible to fold.
+  Init *Fold(Record *CurRec, MultiClass *CurMultiClass);
+
+  virtual bool isComplete() const { return false; }
+
+  virtual Init *resolveReferences(Record &R, const RecordVal *RV);
+
+  virtual std::string getAsString() const;
+};
+
+
+/// VarInit - 'Opcode' - Represent a reference to an entire variable object.
+///
+class VarInit : public TypedInit {
+  std::string VarName;
+public:
+  explicit VarInit(const std::string &VN, RecTy *T)
+    : TypedInit(T), VarName(VN) {}
+
+  virtual Init *convertInitializerTo(RecTy *Ty) {
+    return Ty->convertValue(this);
+  }
+
+  const std::string &getName() const { return VarName; }
+
+  virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
+                                    unsigned Bit);
+  virtual Init *resolveListElementReference(Record &R, const RecordVal *RV,
+                                            unsigned Elt);
+
+  virtual RecTy *getFieldType(const std::string &FieldName) const;
+  virtual Init *getFieldInit(Record &R, const RecordVal *RV,
+                             const std::string &FieldName) const;
+
+  /// resolveReferences - This method is used by classes that refer to other
+  /// variables which may not be defined at the time they expression is formed.
+  /// If a value is set for the variable later, this method will be called on
+  /// users of the value to allow the value to propagate out.
+  ///
+  virtual Init *resolveReferences(Record &R, const RecordVal *RV);
+
+  virtual std::string getAsString() const { return VarName; }
+};
+
+
+/// VarBitInit - Opcode{0} - Represent access to one bit of a variable or field.
+///
+class VarBitInit : public Init {
+  TypedInit *TI;
+  unsigned Bit;
+public:
+  VarBitInit(TypedInit *T, unsigned B) : TI(T), Bit(B) {
+    assert(T->getType() && dynamic_cast<BitsRecTy*>(T->getType()) &&
+           ((BitsRecTy*)T->getType())->getNumBits() > B &&
+           "Illegal VarBitInit expression!");
+  }
+
+  virtual Init *convertInitializerTo(RecTy *Ty) {
+    return Ty->convertValue(this);
+  }
+
+  TypedInit *getVariable() const { return TI; }
+  unsigned getBitNum() const { return Bit; }
+
+  virtual std::string getAsString() const;
+  virtual Init *resolveReferences(Record &R, const RecordVal *RV);
+};
+
+/// VarListElementInit - List[4] - Represent access to one element of a var or
+/// field.
+class VarListElementInit : public TypedInit {
+  TypedInit *TI;
+  unsigned Element;
+public:
+  VarListElementInit(TypedInit *T, unsigned E)
+    : TypedInit(dynamic_cast<ListRecTy*>(T->getType())->getElementType()),
+                TI(T), Element(E) {
+    assert(T->getType() && dynamic_cast<ListRecTy*>(T->getType()) &&
+           "Illegal VarBitInit expression!");
+  }
+
+  virtual Init *convertInitializerTo(RecTy *Ty) {
+    return Ty->convertValue(this);
+  }
+
+  TypedInit *getVariable() const { return TI; }
+  unsigned getElementNum() const { return Element; }
+
+  virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
+                                    unsigned Bit);
+
+  /// resolveListElementReference - This method is used to implement
+  /// VarListElementInit::resolveReferences.  If the list element is resolvable
+  /// now, we return the resolved value, otherwise we return null.
+  virtual Init *resolveListElementReference(Record &R, const RecordVal *RV,
+                                            unsigned Elt);
+
+  virtual std::string getAsString() const;
+  virtual Init *resolveReferences(Record &R, const RecordVal *RV);
+};
+
+/// DefInit - AL - Represent a reference to a 'def' in the description
+///
+class DefInit : public TypedInit {
+  Record *Def;
+public:
+  explicit DefInit(Record *D) : TypedInit(new RecordRecTy(D)), Def(D) {}
+
+  virtual Init *convertInitializerTo(RecTy *Ty) {
+    return Ty->convertValue(this);
+  }
+
+  Record *getDef() const { return Def; }
+
+  //virtual Init *convertInitializerBitRange(const std::vector<unsigned> &Bits);
+
+  virtual RecTy *getFieldType(const std::string &FieldName) const;
+  virtual Init *getFieldInit(Record &R, const RecordVal *RV,
+                             const std::string &FieldName) const;
+
+  virtual std::string getAsString() const;
+
+  /// resolveBitReference - This method is used to implement
+  /// VarBitInit::resolveReferences.  If the bit is able to be resolved, we
+  /// simply return the resolved value, otherwise we return null.
+  ///
+  virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
+                                    unsigned Bit) {
+    assert(0 && "Illegal bit reference off def");
+    return 0;
+  }
+
+  /// resolveListElementReference - This method is used to implement
+  /// VarListElementInit::resolveReferences.  If the list element is resolvable
+  /// now, we return the resolved value, otherwise we return null.
+  virtual Init *resolveListElementReference(Record &R, const RecordVal *RV,
+                                            unsigned Elt) {
+    assert(0 && "Illegal element reference off def");
+    return 0;
+  }
+};
+
+
+/// FieldInit - X.Y - Represent a reference to a subfield of a variable
+///
+class FieldInit : public TypedInit {
+  Init *Rec;                // Record we are referring to
+  std::string FieldName;    // Field we are accessing
+public:
+  FieldInit(Init *R, const std::string &FN)
+    : TypedInit(R->getFieldType(FN)), Rec(R), FieldName(FN) {
+    assert(getType() && "FieldInit with non-record type!");
+  }
+
+  virtual Init *convertInitializerTo(RecTy *Ty) {
+    return Ty->convertValue(this);
+  }
+
+  virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
+                                    unsigned Bit);
+  virtual Init *resolveListElementReference(Record &R, const RecordVal *RV,
+                                            unsigned Elt);
+
+  virtual Init *resolveReferences(Record &R, const RecordVal *RV);
+
+  virtual std::string getAsString() const {
+    return Rec->getAsString() + "." + FieldName;
+  }
+};
+
+/// DagInit - (v a, b) - Represent a DAG tree value.  DAG inits are required
+/// to have at least one value then a (possibly empty) list of arguments.  Each
+/// argument can have a name associated with it.
+///
+class DagInit : public TypedInit {
+  Init *Val;
+  std::string ValName;
+  std::vector<Init*> Args;
+  std::vector<std::string> ArgNames;
+public:
+  DagInit(Init *V, std::string VN,
+          const std::vector<std::pair<Init*, std::string> > &args)
+    : TypedInit(new DagRecTy), Val(V), ValName(VN) {
+    Args.reserve(args.size());
+    ArgNames.reserve(args.size());
+    for (unsigned i = 0, e = args.size(); i != e; ++i) {
+      Args.push_back(args[i].first);
+      ArgNames.push_back(args[i].second);
+    }
+  }
+  DagInit(Init *V, std::string VN, const std::vector<Init*> &args,
+          const std::vector<std::string> &argNames)
+    : TypedInit(new DagRecTy), Val(V), ValName(VN), Args(args),
+      ArgNames(argNames) { }
+
+  virtual Init *convertInitializerTo(RecTy *Ty) {
+    return Ty->convertValue(this);
+  }
+
+  Init *getOperator() const { return Val; }
+
+  const std::string &getName() const { return ValName; }
+
+  unsigned getNumArgs() const { return Args.size(); }
+  Init *getArg(unsigned Num) const {
+    assert(Num < Args.size() && "Arg number out of range!");
+    return Args[Num];
+  }
+  const std::string &getArgName(unsigned Num) const {
+    assert(Num < ArgNames.size() && "Arg number out of range!");
+    return ArgNames[Num];
+  }
+
+  void setArg(unsigned Num, Init *I) {
+    assert(Num < Args.size() && "Arg number out of range!");
+    Args[Num] = I;
+  }
+
+  virtual Init *resolveReferences(Record &R, const RecordVal *RV);
+
+  virtual std::string getAsString() const;
+
+  typedef std::vector<Init*>::iterator             arg_iterator;
+  typedef std::vector<Init*>::const_iterator       const_arg_iterator;
+  typedef std::vector<std::string>::iterator       name_iterator;
+  typedef std::vector<std::string>::const_iterator const_name_iterator;
+
+  inline arg_iterator        arg_begin()       { return Args.begin(); }
+  inline const_arg_iterator  arg_begin() const { return Args.begin(); }
+  inline arg_iterator        arg_end  ()       { return Args.end();   }
+  inline const_arg_iterator  arg_end  () const { return Args.end();   }
+
+  inline size_t              arg_size () const { return Args.size();  }
+  inline bool                arg_empty() const { return Args.empty(); }
+
+  inline name_iterator       name_begin()       { return ArgNames.begin(); }
+  inline const_name_iterator name_begin() const { return ArgNames.begin(); }
+  inline name_iterator       name_end  ()       { return ArgNames.end();   }
+  inline const_name_iterator name_end  () const { return ArgNames.end();   }
+
+  inline size_t              name_size () const { return ArgNames.size();  }
+  inline bool                name_empty() const { return ArgNames.empty(); }
+
+  virtual Init *resolveBitReference(Record &R, const RecordVal *RV,
+                                    unsigned Bit) {
+    assert(0 && "Illegal bit reference off dag");
+    return 0;
+  }
+
+  virtual Init *resolveListElementReference(Record &R, const RecordVal *RV,
+                                            unsigned Elt) {
+    assert(0 && "Illegal element reference off dag");
+    return 0;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//  High-Level Classes
+//===----------------------------------------------------------------------===//
+
+class RecordVal {
+  std::string Name;
+  RecTy *Ty;
+  unsigned Prefix;
+  Init *Value;
+public:
+  RecordVal(const std::string &N, RecTy *T, unsigned P);
+
+  const std::string &getName() const { return Name; }
+
+  unsigned getPrefix() const { return Prefix; }
+  RecTy *getType() const { return Ty; }
+  Init *getValue() const { return Value; }
+
+  bool setValue(Init *V) {
+    if (V) {
+      Value = V->convertInitializerTo(Ty);
+      return Value == 0;
+    }
+    Value = 0;
+    return false;
+  }
+
+  void dump() const;
+  void print(raw_ostream &OS, bool PrintSem = true) const;
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const RecordVal &RV) {
+  RV.print(OS << "  ");
+  return OS;
+}
+
+class Record {
+  static unsigned LastID;
+
+  // Unique record ID.
+  unsigned ID;
+  std::string Name;
+  SMLoc Loc;
+  std::vector<std::string> TemplateArgs;
+  std::vector<RecordVal> Values;
+  std::vector<Record*> SuperClasses;
+
+  // Tracks Record instances. Not owned by Record.
+  RecordKeeper &TrackedRecords;
+
+public:
+
+  // Constructs a record.
+  explicit Record(const std::string &N, SMLoc loc, RecordKeeper &records) :
+    ID(LastID++), Name(N), Loc(loc), TrackedRecords(records) {}
+  ~Record() {}
+
+
+  static unsigned getNewUID() { return LastID++; }
+
+
+  unsigned getID() const { return ID; }
+
+  const std::string &getName() const { return Name; }
+  void setName(const std::string &Name);  // Also updates RecordKeeper.
+
+  SMLoc getLoc() const { return Loc; }
+
+  const std::vector<std::string> &getTemplateArgs() const {
+    return TemplateArgs;
+  }
+  const std::vector<RecordVal> &getValues() const { return Values; }
+  const std::vector<Record*>   &getSuperClasses() const { return SuperClasses; }
+
+  bool isTemplateArg(StringRef Name) const {
+    for (unsigned i = 0, e = TemplateArgs.size(); i != e; ++i)
+      if (TemplateArgs[i] == Name) return true;
+    return false;
+  }
+
+  const RecordVal *getValue(StringRef Name) const {
+    for (unsigned i = 0, e = Values.size(); i != e; ++i)
+      if (Values[i].getName() == Name) return &Values[i];
+    return 0;
+  }
+  RecordVal *getValue(StringRef Name) {
+    for (unsigned i = 0, e = Values.size(); i != e; ++i)
+      if (Values[i].getName() == Name) return &Values[i];
+    return 0;
+  }
+
+  void addTemplateArg(StringRef Name) {
+    assert(!isTemplateArg(Name) && "Template arg already defined!");
+    TemplateArgs.push_back(Name);
+  }
+
+  void addValue(const RecordVal &RV) {
+    assert(getValue(RV.getName()) == 0 && "Value already added!");
+    Values.push_back(RV);
+  }
+
+  void removeValue(StringRef Name) {
+    for (unsigned i = 0, e = Values.size(); i != e; ++i)
+      if (Values[i].getName() == Name) {
+        Values.erase(Values.begin()+i);
+        return;
+      }
+    assert(0 && "Cannot remove an entry that does not exist!");
+  }
+
+  bool isSubClassOf(const Record *R) const {
+    for (unsigned i = 0, e = SuperClasses.size(); i != e; ++i)
+      if (SuperClasses[i] == R)
+        return true;
+    return false;
+  }
+
+  bool isSubClassOf(StringRef Name) const {
+    for (unsigned i = 0, e = SuperClasses.size(); i != e; ++i)
+      if (SuperClasses[i]->getName() == Name)
+        return true;
+    return false;
+  }
+
+  void addSuperClass(Record *R) {
+    assert(!isSubClassOf(R) && "Already subclassing record!");
+    SuperClasses.push_back(R);
+  }
+
+  /// resolveReferences - If there are any field references that refer to fields
+  /// that have been filled in, we can propagate the values now.
+  ///
+  void resolveReferences() { resolveReferencesTo(0); }
+
+  /// resolveReferencesTo - If anything in this record refers to RV, replace the
+  /// reference to RV with the RHS of RV.  If RV is null, we resolve all
+  /// possible references.
+  void resolveReferencesTo(const RecordVal *RV);
+
+  RecordKeeper &getRecords() const {
+    return TrackedRecords;
+  }
+
+  void dump() const;
+
+  //===--------------------------------------------------------------------===//
+  // High-level methods useful to tablegen back-ends
+  //
+
+  /// getValueInit - Return the initializer for a value with the specified name,
+  /// or throw an exception if the field does not exist.
+  ///
+  Init *getValueInit(StringRef FieldName) const;
+
+  /// getValueAsString - This method looks up the specified field and returns
+  /// its value as a string, throwing an exception if the field does not exist
+  /// or if the value is not a string.
+  ///
+  std::string getValueAsString(StringRef FieldName) const;
+
+  /// getValueAsBitsInit - This method looks up the specified field and returns
+  /// its value as a BitsInit, throwing an exception if the field does not exist
+  /// or if the value is not the right type.
+  ///
+  BitsInit *getValueAsBitsInit(StringRef FieldName) const;
+
+  /// getValueAsListInit - This method looks up the specified field and returns
+  /// its value as a ListInit, throwing an exception if the field does not exist
+  /// or if the value is not the right type.
+  ///
+  ListInit *getValueAsListInit(StringRef FieldName) const;
+
+  /// getValueAsListOfDefs - This method looks up the specified field and
+  /// returns its value as a vector of records, throwing an exception if the
+  /// field does not exist or if the value is not the right type.
+  ///
+  std::vector<Record*> getValueAsListOfDefs(StringRef FieldName) const;
+
+  /// getValueAsListOfInts - This method looks up the specified field and
+  /// returns its value as a vector of integers, throwing an exception if the
+  /// field does not exist or if the value is not the right type.
+  ///
+  std::vector<int64_t> getValueAsListOfInts(StringRef FieldName) const;
+
+  /// getValueAsDef - This method looks up the specified field and returns its
+  /// value as a Record, throwing an exception if the field does not exist or if
+  /// the value is not the right type.
+  ///
+  Record *getValueAsDef(StringRef FieldName) const;
+
+  /// getValueAsBit - This method looks up the specified field and returns its
+  /// value as a bit, throwing an exception if the field does not exist or if
+  /// the value is not the right type.
+  ///
+  bool getValueAsBit(StringRef FieldName) const;
+
+  /// getValueAsInt - This method looks up the specified field and returns its
+  /// value as an int64_t, throwing an exception if the field does not exist or
+  /// if the value is not the right type.
+  ///
+  int64_t getValueAsInt(StringRef FieldName) const;
+
+  /// getValueAsDag - This method looks up the specified field and returns its
+  /// value as an Dag, throwing an exception if the field does not exist or if
+  /// the value is not the right type.
+  ///
+  DagInit *getValueAsDag(StringRef FieldName) const;
+
+  /// getValueAsCode - This method looks up the specified field and returns
+  /// its value as the string data in a CodeInit, throwing an exception if the
+  /// field does not exist or if the value is not a code object.
+  ///
+  std::string getValueAsCode(StringRef FieldName) const;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const Record &R);
+
+struct MultiClass {
+  Record Rec;  // Placeholder for template args and Name.
+  typedef std::vector<Record*> RecordVector;
+  RecordVector DefPrototypes;
+
+  void dump() const;
+
+  MultiClass(const std::string &Name, SMLoc Loc, RecordKeeper &Records) : 
+    Rec(Name, Loc, Records) {}
+};
+
+class RecordKeeper {
+  std::map<std::string, Record*> Classes, Defs;
+public:
+  ~RecordKeeper() {
+    for (std::map<std::string, Record*>::iterator I = Classes.begin(),
+           E = Classes.end(); I != E; ++I)
+      delete I->second;
+    for (std::map<std::string, Record*>::iterator I = Defs.begin(),
+           E = Defs.end(); I != E; ++I)
+      delete I->second;
+  }
+
+  const std::map<std::string, Record*> &getClasses() const { return Classes; }
+  const std::map<std::string, Record*> &getDefs() const { return Defs; }
+
+  Record *getClass(const std::string &Name) const {
+    std::map<std::string, Record*>::const_iterator I = Classes.find(Name);
+    return I == Classes.end() ? 0 : I->second;
+  }
+  Record *getDef(const std::string &Name) const {
+    std::map<std::string, Record*>::const_iterator I = Defs.find(Name);
+    return I == Defs.end() ? 0 : I->second;
+  }
+  void addClass(Record *R) {
+    assert(getClass(R->getName()) == 0 && "Class already exists!");
+    Classes.insert(std::make_pair(R->getName(), R));
+  }
+  void addDef(Record *R) {
+    assert(getDef(R->getName()) == 0 && "Def already exists!");
+    Defs.insert(std::make_pair(R->getName(), R));
+  }
+
+  /// removeClass - Remove, but do not delete, the specified record.
+  ///
+  void removeClass(const std::string &Name) {
+    assert(Classes.count(Name) && "Class does not exist!");
+    Classes.erase(Name);
+  }
+  /// removeDef - Remove, but do not delete, the specified record.
+  ///
+  void removeDef(const std::string &Name) {
+    assert(Defs.count(Name) && "Def does not exist!");
+    Defs.erase(Name);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // High-level helper methods, useful for tablegen backends...
+
+  /// getAllDerivedDefinitions - This method returns all concrete definitions
+  /// that derive from the specified class name.  If a class with the specified
+  /// name does not exist, an exception is thrown.
+  std::vector<Record*>
+  getAllDerivedDefinitions(const std::string &ClassName) const;
+
+  void dump() const;
+};
+
+/// LessRecord - Sorting predicate to sort record pointers by name.
+///
+struct LessRecord {
+  bool operator()(const Record *Rec1, const Record *Rec2) const {
+    return StringRef(Rec1->getName()).compare_numeric(Rec2->getName()) < 0;
+  }
+};
+
+/// LessRecordFieldName - Sorting predicate to sort record pointers by their
+/// name field.
+///
+struct LessRecordFieldName {
+  bool operator()(const Record *Rec1, const Record *Rec2) const {
+    return Rec1->getValueAsString("Name") < Rec2->getValueAsString("Name");
+  }
+};
+
+
+class TGError {
+  SMLoc Loc;
+  std::string Message;
+public:
+  TGError(SMLoc loc, const std::string &message) : Loc(loc), Message(message) {}
+
+  SMLoc getLoc() const { return Loc; }
+  const std::string &getMessage() const { return Message; }
+};
+
+
+raw_ostream &operator<<(raw_ostream &OS, const RecordKeeper &RK);
+
+void PrintError(SMLoc ErrorLoc, const Twine &Msg);
+
+} // End llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/RegisterInfoEmitter.cpp b/final/utils/TableGen/RegisterInfoEmitter.cpp
new file mode 100644
index 00000000000..96399a4d052
--- /dev/null
+++ b/final/utils/TableGen/RegisterInfoEmitter.cpp
@@ -0,0 +1,1009 @@
+//===- RegisterInfoEmitter.cpp - Generate a Register File Desc. -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting a description of a target
+// register file for a code generator.  It uses instances of the Register,
+// RegisterAliases, and RegisterClass classes to gather this information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RegisterInfoEmitter.h"
+#include "CodeGenTarget.h"
+#include "CodeGenRegisters.h"
+#include "Record.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <set>
+using namespace llvm;
+
+// runEnums - Print out enum values for all of the registers.
+void RegisterInfoEmitter::runEnums(raw_ostream &OS) {
+  CodeGenTarget Target(Records);
+  const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
+
+  std::string Namespace = Registers[0].TheDef->getValueAsString("Namespace");
+
+  EmitSourceFileHeader("Target Register Enum Values", OS);
+  OS << "namespace llvm {\n\n";
+
+  if (!Namespace.empty())
+    OS << "namespace " << Namespace << " {\n";
+  OS << "enum {\n  NoRegister,\n";
+
+  for (unsigned i = 0, e = Registers.size(); i != e; ++i)
+    OS << "  " << Registers[i].getName() << ", \t// " << i+1 << "\n";
+  OS << "  NUM_TARGET_REGS \t// " << Registers.size()+1 << "\n";
+  OS << "};\n";
+  if (!Namespace.empty())
+    OS << "}\n";
+
+  const std::vector<Record*> SubRegIndices = Target.getSubRegIndices();
+  if (!SubRegIndices.empty()) {
+    OS << "\n// Subregister indices\n";
+    Namespace = SubRegIndices[0]->getValueAsString("Namespace");
+    if (!Namespace.empty())
+      OS << "namespace " << Namespace << " {\n";
+    OS << "enum {\n  NoSubRegister,\n";
+    for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i)
+      OS << "  " << SubRegIndices[i]->getName() << ",\t// " << i+1 << "\n";
+    OS << "  NUM_TARGET_SUBREGS = " << SubRegIndices.size()+1 << "\n";
+    OS << "};\n";
+    if (!Namespace.empty())
+      OS << "}\n";
+  }
+  OS << "} // End llvm namespace \n";
+}
+
+void RegisterInfoEmitter::runHeader(raw_ostream &OS) {
+  EmitSourceFileHeader("Register Information Header Fragment", OS);
+  CodeGenTarget Target(Records);
+  const std::string &TargetName = Target.getName();
+  std::string ClassName = TargetName + "GenRegisterInfo";
+
+  OS << "#include \"llvm/Target/TargetRegisterInfo.h\"\n";
+  OS << "#include <string>\n\n";
+
+  OS << "namespace llvm {\n\n";
+
+  OS << "struct " << ClassName << " : public TargetRegisterInfo {\n"
+     << "  explicit " << ClassName
+     << "(int CallFrameSetupOpcode = -1, int CallFrameDestroyOpcode = -1);\n"
+     << "  virtual int getDwarfRegNumFull(unsigned RegNum, "
+     << "unsigned Flavour) const;\n"
+     << "  virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const = 0;\n"
+     << "  virtual bool needsStackRealignment(const MachineFunction &) const\n"
+     << "     { return false; }\n"
+     << "  unsigned getSubReg(unsigned RegNo, unsigned Index) const;\n"
+     << "  unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const;\n"
+     << "  unsigned composeSubRegIndices(unsigned, unsigned) const;\n"
+     << "};\n\n";
+
+  const std::vector<CodeGenRegisterClass> &RegisterClasses =
+    Target.getRegisterClasses();
+
+  if (!RegisterClasses.empty()) {
+    OS << "namespace " << RegisterClasses[0].Namespace
+       << " { // Register classes\n";
+       
+    OS << "  enum {\n";
+    for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i) {
+      if (i) OS << ",\n";
+      OS << "    " << RegisterClasses[i].getName() << "RegClassID";
+      OS << " = " << i;
+    }
+    OS << "\n  };\n\n";
+
+    for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i) {
+      const std::string &Name = RegisterClasses[i].getName();
+
+      // Output the register class definition.
+      OS << "  struct " << Name << "Class : public TargetRegisterClass {\n"
+         << "    " << Name << "Class();\n"
+         << RegisterClasses[i].MethodProtos << "  };\n";
+
+      // Output the extern for the instance.
+      OS << "  extern " << Name << "Class\t" << Name << "RegClass;\n";
+      // Output the extern for the pointer to the instance (should remove).
+      OS << "  static TargetRegisterClass * const "<< Name <<"RegisterClass = &"
+         << Name << "RegClass;\n";
+    }
+    OS << "} // end of namespace " << TargetName << "\n\n";
+  }
+  OS << "} // End llvm namespace \n";
+}
+
+static void addSuperReg(Record *R, Record *S,
+                  std::map<Record*, std::set<Record*>, LessRecord> &SubRegs,
+                  std::map<Record*, std::set<Record*>, LessRecord> &SuperRegs,
+                  std::map<Record*, std::set<Record*>, LessRecord> &Aliases) {
+  if (R == S) {
+    errs() << "Error: recursive sub-register relationship between"
+           << " register " << getQualifiedName(R)
+           << " and its sub-registers?\n";
+    abort();
+  }
+  if (!SuperRegs[R].insert(S).second)
+    return;
+  SubRegs[S].insert(R);
+  Aliases[R].insert(S);
+  Aliases[S].insert(R);
+  if (SuperRegs.count(S))
+    for (std::set<Record*>::iterator I = SuperRegs[S].begin(),
+           E = SuperRegs[S].end(); I != E; ++I)
+      addSuperReg(R, *I, SubRegs, SuperRegs, Aliases);
+}
+
+static void addSubSuperReg(Record *R, Record *S,
+                   std::map<Record*, std::set<Record*>, LessRecord> &SubRegs,
+                   std::map<Record*, std::set<Record*>, LessRecord> &SuperRegs,
+                   std::map<Record*, std::set<Record*>, LessRecord> &Aliases) {
+  if (R == S) {
+    errs() << "Error: recursive sub-register relationship between"
+           << " register " << getQualifiedName(R)
+           << " and its sub-registers?\n";
+    abort();
+  }
+
+  if (!SubRegs[R].insert(S).second)
+    return;
+  addSuperReg(S, R, SubRegs, SuperRegs, Aliases);
+  Aliases[R].insert(S);
+  Aliases[S].insert(R);
+  if (SubRegs.count(S))
+    for (std::set<Record*>::iterator I = SubRegs[S].begin(),
+           E = SubRegs[S].end(); I != E; ++I)
+      addSubSuperReg(R, *I, SubRegs, SuperRegs, Aliases);
+}
+
+struct RegisterMaps {
+  // Map SubRegIndex -> Register
+  typedef std::map<Record*, Record*, LessRecord> SubRegMap;
+  // Map Register -> SubRegMap
+  typedef std::map<Record*, SubRegMap> SubRegMaps;
+
+  SubRegMaps SubReg;
+  SubRegMap &inferSubRegIndices(Record *Reg);
+
+  // Composite SubRegIndex instances.
+  // Map (SubRegIndex,SubRegIndex) -> SubRegIndex
+  typedef DenseMap<std::pair<Record*,Record*>,Record*> CompositeMap;
+  CompositeMap Composite;
+
+  // Compute SubRegIndex compositions after inferSubRegIndices has run on all
+  // registers.
+  void computeComposites();
+};
+
+// Calculate all subregindices for Reg. Loopy subregs cause infinite recursion.
+RegisterMaps::SubRegMap &RegisterMaps::inferSubRegIndices(Record *Reg) {
+  SubRegMap &SRM = SubReg[Reg];
+  if (!SRM.empty())
+    return SRM;
+  std::vector<Record*> SubRegs = Reg->getValueAsListOfDefs("SubRegs");
+  std::vector<Record*> Indices = Reg->getValueAsListOfDefs("SubRegIndices");
+  if (SubRegs.size() != Indices.size())
+    throw "Register " + Reg->getName() + " SubRegIndices doesn't match SubRegs";
+
+  // First insert the direct subregs and make sure they are fully indexed.
+  for (unsigned i = 0, e = SubRegs.size(); i != e; ++i) {
+    if (!SRM.insert(std::make_pair(Indices[i], SubRegs[i])).second)
+      throw "SubRegIndex " + Indices[i]->getName()
+        + " appears twice in Register " + Reg->getName();
+    inferSubRegIndices(SubRegs[i]);
+  }
+
+  // Keep track of inherited subregs and how they can be reached.
+  // Register -> (SubRegIndex, SubRegIndex)
+  typedef std::map<Record*, std::pair<Record*,Record*>, LessRecord> OrphanMap;
+  OrphanMap Orphans;
+
+  // Clone inherited subregs. Here the order is important - earlier subregs take
+  // precedence.
+  for (unsigned i = 0, e = SubRegs.size(); i != e; ++i) {
+    SubRegMap &M = SubReg[SubRegs[i]];
+    for (SubRegMap::iterator si = M.begin(), se = M.end(); si != se; ++si)
+      if (!SRM.insert(*si).second)
+        Orphans[si->second] = std::make_pair(Indices[i], si->first);
+  }
+
+  // Finally process the composites.
+  ListInit *Comps = Reg->getValueAsListInit("CompositeIndices");
+  for (unsigned i = 0, e = Comps->size(); i != e; ++i) {
+    DagInit *Pat = dynamic_cast<DagInit*>(Comps->getElement(i));
+    if (!Pat)
+      throw "Invalid dag '" + Comps->getElement(i)->getAsString()
+        + "' in CompositeIndices";
+    DefInit *BaseIdxInit = dynamic_cast<DefInit*>(Pat->getOperator());
+    if (!BaseIdxInit || !BaseIdxInit->getDef()->isSubClassOf("SubRegIndex"))
+      throw "Invalid SubClassIndex in " + Pat->getAsString();
+
+    // Resolve list of subreg indices into R2.
+    Record *R2 = Reg;
+    for (DagInit::const_arg_iterator di = Pat->arg_begin(),
+         de = Pat->arg_end(); di != de; ++di) {
+      DefInit *IdxInit = dynamic_cast<DefInit*>(*di);
+      if (!IdxInit || !IdxInit->getDef()->isSubClassOf("SubRegIndex"))
+        throw "Invalid SubClassIndex in " + Pat->getAsString();
+      SubRegMap::const_iterator ni = SubReg[R2].find(IdxInit->getDef());
+      if (ni == SubReg[R2].end())
+        throw "Composite " + Pat->getAsString() + " refers to bad index in "
+          + R2->getName();
+      R2 = ni->second;
+    }
+
+    // Insert composite index. Allow overriding inherited indices etc.
+    SRM[BaseIdxInit->getDef()] = R2;
+
+    // R2 is now directly addressable, no longer an orphan.
+    Orphans.erase(R2);
+  }
+
+  // Now, Orphans contains the inherited subregisters without a direct index.
+  if (!Orphans.empty()) {
+    errs() << "Error: Register " << getQualifiedName(Reg)
+           << " inherited subregisters without an index:\n";
+    for (OrphanMap::iterator i = Orphans.begin(), e = Orphans.end(); i != e;
+         ++i) {
+      errs() << "  " << getQualifiedName(i->first)
+             << " = " << i->second.first->getName()
+             << ", " << i->second.second->getName() << "\n";
+    }
+    abort();
+  }
+  return SRM;
+}
+
+void RegisterMaps::computeComposites() {
+  for (SubRegMaps::const_iterator sri = SubReg.begin(), sre = SubReg.end();
+       sri != sre; ++sri) {
+    Record *Reg1 = sri->first;
+    const SubRegMap &SRM1 = sri->second;
+    for (SubRegMap::const_iterator i1 = SRM1.begin(), e1 = SRM1.end();
+         i1 != e1; ++i1) {
+      Record *Idx1 = i1->first;
+      Record *Reg2 = i1->second;
+      // Ignore identity compositions.
+      if (Reg1 == Reg2)
+        continue;
+      // If Reg2 has no subregs, Idx1 doesn't compose.
+      if (!SubReg.count(Reg2))
+        continue;
+      const SubRegMap &SRM2 = SubReg[Reg2];
+      // Try composing Idx1 with another SubRegIndex.
+      for (SubRegMap::const_iterator i2 = SRM2.begin(), e2 = SRM2.end();
+           i2 != e2; ++i2) {
+        std::pair<Record*,Record*> IdxPair(Idx1, i2->first);
+        Record *Reg3 = i2->second;
+        // OK Reg1:IdxPair == Reg3. Find the index with Reg:Idx == Reg3.
+        for (SubRegMap::const_iterator i1d = SRM1.begin(), e1d = SRM1.end();
+             i1d != e1d; ++i1d) {
+          // Ignore identity compositions.
+          if (Reg2 == Reg3)
+            continue;
+          if (i1d->second == Reg3) {
+            std::pair<CompositeMap::iterator,bool> Ins =
+              Composite.insert(std::make_pair(IdxPair, i1d->first));
+            // Conflicting composition?
+            if (!Ins.second && Ins.first->second != i1d->first) {
+              errs() << "Error: SubRegIndex " << getQualifiedName(Idx1)
+                     << " and " << getQualifiedName(IdxPair.second)
+                     << " compose ambiguously as "
+                     << getQualifiedName(Ins.first->second) << " or "
+                     << getQualifiedName(i1d->first) << "\n";
+              abort();
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // We don't care about the difference between (Idx1, Idx2) -> Idx2 and invalid
+  // compositions, so remove any mappings of that form.
+  for (CompositeMap::iterator i = Composite.begin(), e = Composite.end();
+       i != e;) {
+    CompositeMap::iterator j = i;
+    ++i;
+    if (j->first.second == j->second)
+      Composite.erase(j);
+  }
+}
+
+class RegisterSorter {
+private:
+  std::map<Record*, std::set<Record*>, LessRecord> &RegisterSubRegs;
+
+public:
+  RegisterSorter(std::map<Record*, std::set<Record*>, LessRecord> &RS)
+    : RegisterSubRegs(RS) {}
+
+  bool operator()(Record *RegA, Record *RegB) {
+    // B is sub-register of A.
+    return RegisterSubRegs.count(RegA) && RegisterSubRegs[RegA].count(RegB);
+  }
+};
+
+// RegisterInfoEmitter::run - Main register file description emitter.
+//
+void RegisterInfoEmitter::run(raw_ostream &OS) {
+  CodeGenTarget Target(Records);
+  EmitSourceFileHeader("Register Information Source Fragment", OS);
+
+  OS << "namespace llvm {\n\n";
+
+  // Start out by emitting each of the register classes... to do this, we build
+  // a set of registers which belong to a register class, this is to ensure that
+  // each register is only in a single register class.
+  //
+  const std::vector<CodeGenRegisterClass> &RegisterClasses =
+    Target.getRegisterClasses();
+
+  // Loop over all of the register classes... emitting each one.
+  OS << "namespace {     // Register classes...\n";
+
+  // RegClassesBelongedTo - Keep track of which register classes each reg
+  // belongs to.
+  std::multimap<Record*, const CodeGenRegisterClass*> RegClassesBelongedTo;
+
+  // Emit the register enum value arrays for each RegisterClass
+  for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) {
+    const CodeGenRegisterClass &RC = RegisterClasses[rc];
+
+    // Give the register class a legal C name if it's anonymous.
+    std::string Name = RC.TheDef->getName();
+  
+    // Emit the register list now.
+    OS << "  // " << Name << " Register Class...\n"
+       << "  static const unsigned " << Name
+       << "[] = {\n    ";
+    for (unsigned i = 0, e = RC.Elements.size(); i != e; ++i) {
+      Record *Reg = RC.Elements[i];
+      OS << getQualifiedName(Reg) << ", ";
+
+      // Keep track of which regclasses this register is in.
+      RegClassesBelongedTo.insert(std::make_pair(Reg, &RC));
+    }
+    OS << "\n  };\n\n";
+  }
+
+  // Emit the ValueType arrays for each RegisterClass
+  for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) {
+    const CodeGenRegisterClass &RC = RegisterClasses[rc];
+    
+    // Give the register class a legal C name if it's anonymous.
+    std::string Name = RC.TheDef->getName() + "VTs";
+    
+    // Emit the register list now.
+    OS << "  // " << Name 
+       << " Register Class Value Types...\n"
+       << "  static const EVT " << Name
+       << "[] = {\n    ";
+    for (unsigned i = 0, e = RC.VTs.size(); i != e; ++i)
+      OS << getEnumName(RC.VTs[i]) << ", ";
+    OS << "MVT::Other\n  };\n\n";
+  }
+  OS << "}  // end anonymous namespace\n\n";
+  
+  // Now that all of the structs have been emitted, emit the instances.
+  if (!RegisterClasses.empty()) {
+    OS << "namespace " << RegisterClasses[0].Namespace
+       << " {   // Register class instances\n";
+    for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i)
+      OS << "  " << RegisterClasses[i].getName()  << "Class\t"
+         << RegisterClasses[i].getName() << "RegClass;\n";
+         
+    std::map<unsigned, std::set<unsigned> > SuperClassMap;
+    std::map<unsigned, std::set<unsigned> > SuperRegClassMap;
+    OS << "\n";
+
+    unsigned NumSubRegIndices = Target.getSubRegIndices().size();
+
+    if (NumSubRegIndices) {
+      // Emit the sub-register classes for each RegisterClass
+      for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) {
+        const CodeGenRegisterClass &RC = RegisterClasses[rc];
+        std::vector<Record*> SRC(NumSubRegIndices);
+        for (DenseMap<Record*,Record*>::const_iterator
+             i = RC.SubRegClasses.begin(),
+             e = RC.SubRegClasses.end(); i != e; ++i) {
+          // Build SRC array.
+          unsigned idx = Target.getSubRegIndexNo(i->first);
+          SRC.at(idx-1) = i->second;
+
+          // Find the register class number of i->second for SuperRegClassMap.
+          for (unsigned rc2 = 0, e2 = RegisterClasses.size(); rc2 != e2; ++rc2) {
+            const CodeGenRegisterClass &RC2 =  RegisterClasses[rc2];
+            if (RC2.TheDef == i->second) {
+              SuperRegClassMap[rc2].insert(rc);
+              break;
+            }
+          }
+        }
+
+        // Give the register class a legal C name if it's anonymous.
+        std::string Name = RC.TheDef->getName();
+
+        OS << "  // " << Name
+           << " Sub-register Classes...\n"
+           << "  static const TargetRegisterClass* const "
+           << Name << "SubRegClasses[] = {\n    ";
+
+        for (unsigned idx = 0; idx != NumSubRegIndices; ++idx) {
+          if (idx)
+            OS << ", ";
+          if (SRC[idx])
+            OS << "&" << getQualifiedName(SRC[idx]) << "RegClass";
+          else
+            OS << "0";
+        }
+        OS << "\n  };\n\n";
+      }
+
+      // Emit the super-register classes for each RegisterClass
+      for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) {
+        const CodeGenRegisterClass &RC = RegisterClasses[rc];
+
+        // Give the register class a legal C name if it's anonymous.
+        std::string Name = RC.TheDef->getName();
+
+        OS << "  // " << Name
+           << " Super-register Classes...\n"
+           << "  static const TargetRegisterClass* const "
+           << Name << "SuperRegClasses[] = {\n    ";
+
+        bool Empty = true;
+        std::map<unsigned, std::set<unsigned> >::iterator I =
+          SuperRegClassMap.find(rc);
+        if (I != SuperRegClassMap.end()) {
+          for (std::set<unsigned>::iterator II = I->second.begin(),
+                 EE = I->second.end(); II != EE; ++II) {
+            const CodeGenRegisterClass &RC2 = RegisterClasses[*II];
+            if (!Empty)
+              OS << ", ";
+            OS << "&" << getQualifiedName(RC2.TheDef) << "RegClass";
+            Empty = false;
+          }
+        }
+
+        OS << (!Empty ? ", " : "") << "NULL";
+        OS << "\n  };\n\n";
+      }
+    } else {
+      // No subregindices in this target
+      OS << "  static const TargetRegisterClass* const "
+         << "NullRegClasses[] = { NULL };\n\n";
+    }
+
+    // Emit the sub-classes array for each RegisterClass
+    for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) {
+      const CodeGenRegisterClass &RC = RegisterClasses[rc];
+
+      // Give the register class a legal C name if it's anonymous.
+      std::string Name = RC.TheDef->getName();
+
+      OS << "  // " << Name 
+         << " Register Class sub-classes...\n"
+         << "  static const TargetRegisterClass* const "
+         << Name << "Subclasses[] = {\n    ";
+
+      bool Empty = true;
+      for (unsigned rc2 = 0, e2 = RegisterClasses.size(); rc2 != e2; ++rc2) {
+        const CodeGenRegisterClass &RC2 = RegisterClasses[rc2];
+
+        // Sub-classes are used to determine if a virtual register can be used
+        // as an instruction operand, or if it must be copied first.
+        if (rc == rc2 || !RC.hasSubClass(&RC2)) continue;
+      
+        if (!Empty) OS << ", ";
+        OS << "&" << getQualifiedName(RC2.TheDef) << "RegClass";
+        Empty = false;
+
+        std::map<unsigned, std::set<unsigned> >::iterator SCMI =
+          SuperClassMap.find(rc2);
+        if (SCMI == SuperClassMap.end()) {
+          SuperClassMap.insert(std::make_pair(rc2, std::set<unsigned>()));
+          SCMI = SuperClassMap.find(rc2);
+        }
+        SCMI->second.insert(rc);
+      }
+
+      OS << (!Empty ? ", " : "") << "NULL";
+      OS << "\n  };\n\n";
+    }
+
+    for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) {
+      const CodeGenRegisterClass &RC = RegisterClasses[rc];
+
+      // Give the register class a legal C name if it's anonymous.
+      std::string Name = RC.TheDef->getName();
+
+      OS << "  // " << Name 
+         << " Register Class super-classes...\n"
+         << "  static const TargetRegisterClass* const "
+         << Name << "Superclasses[] = {\n    ";
+
+      bool Empty = true;
+      std::map<unsigned, std::set<unsigned> >::iterator I =
+        SuperClassMap.find(rc);
+      if (I != SuperClassMap.end()) {
+        for (std::set<unsigned>::iterator II = I->second.begin(),
+               EE = I->second.end(); II != EE; ++II) {
+          const CodeGenRegisterClass &RC2 = RegisterClasses[*II];
+          if (!Empty) OS << ", ";
+          OS << "&" << getQualifiedName(RC2.TheDef) << "RegClass";
+          Empty = false;        
+        }
+      }
+
+      OS << (!Empty ? ", " : "") << "NULL";
+      OS << "\n  };\n\n";
+    }
+
+
+    for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i) {
+      const CodeGenRegisterClass &RC = RegisterClasses[i];
+      OS << RC.MethodBodies << "\n";
+      OS << RC.getName() << "Class::" << RC.getName() 
+         << "Class()  : TargetRegisterClass("
+         << RC.getName() + "RegClassID" << ", "
+         << '\"' << RC.getName() << "\", "
+         << RC.getName() + "VTs" << ", "
+         << RC.getName() + "Subclasses" << ", "
+         << RC.getName() + "Superclasses" << ", "
+         << (NumSubRegIndices ? RC.getName() + "Sub" : std::string("Null"))
+         << "RegClasses, "
+         << (NumSubRegIndices ? RC.getName() + "Super" : std::string("Null"))
+         << "RegClasses, "
+         << RC.SpillSize/8 << ", "
+         << RC.SpillAlignment/8 << ", "
+         << RC.CopyCost << ", "
+         << RC.getName() << ", " << RC.getName() << " + " << RC.Elements.size()
+         << ") {}\n";
+    }
+  
+    OS << "}\n";
+  }
+
+  OS << "\nnamespace {\n";
+  OS << "  const TargetRegisterClass* const RegisterClasses[] = {\n";
+  for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i)
+    OS << "    &" << getQualifiedName(RegisterClasses[i].TheDef)
+       << "RegClass,\n";
+  OS << "  };\n";
+
+  // Emit register sub-registers / super-registers, aliases...
+  std::map<Record*, std::set<Record*>, LessRecord> RegisterSubRegs;
+  std::map<Record*, std::set<Record*>, LessRecord> RegisterSuperRegs;
+  std::map<Record*, std::set<Record*>, LessRecord> RegisterAliases;
+  typedef std::map<Record*, std::vector<int64_t>, LessRecord> DwarfRegNumsMapTy;
+  DwarfRegNumsMapTy DwarfRegNums;
+  
+  const std::vector<CodeGenRegister> &Regs = Target.getRegisters();
+
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    Record *R = Regs[i].TheDef;
+    std::vector<Record*> LI = Regs[i].TheDef->getValueAsListOfDefs("Aliases");
+    // Add information that R aliases all of the elements in the list... and
+    // that everything in the list aliases R.
+    for (unsigned j = 0, e = LI.size(); j != e; ++j) {
+      Record *Reg = LI[j];
+      if (RegisterAliases[R].count(Reg))
+        errs() << "Warning: register alias between " << getQualifiedName(R)
+               << " and " << getQualifiedName(Reg)
+               << " specified multiple times!\n";
+      RegisterAliases[R].insert(Reg);
+
+      if (RegisterAliases[Reg].count(R))
+        errs() << "Warning: register alias between " << getQualifiedName(R)
+               << " and " << getQualifiedName(Reg)
+               << " specified multiple times!\n";
+      RegisterAliases[Reg].insert(R);
+    }
+  }
+
+  // Process sub-register sets.
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    Record *R = Regs[i].TheDef;
+    std::vector<Record*> LI = Regs[i].TheDef->getValueAsListOfDefs("SubRegs");
+    // Process sub-register set and add aliases information.
+    for (unsigned j = 0, e = LI.size(); j != e; ++j) {
+      Record *SubReg = LI[j];
+      if (RegisterSubRegs[R].count(SubReg))
+        errs() << "Warning: register " << getQualifiedName(SubReg)
+               << " specified as a sub-register of " << getQualifiedName(R)
+               << " multiple times!\n";
+      addSubSuperReg(R, SubReg, RegisterSubRegs, RegisterSuperRegs,
+                     RegisterAliases);
+    }
+  }
+  
+  // Print the SubregHashTable, a simple quadratically probed
+  // hash table for determining if a register is a subregister
+  // of another register.
+  unsigned NumSubRegs = 0;
+  std::map<Record*, unsigned> RegNo;
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    RegNo[Regs[i].TheDef] = i;
+    NumSubRegs += RegisterSubRegs[Regs[i].TheDef].size();
+  }
+  
+  unsigned SubregHashTableSize = 2 * NextPowerOf2(2 * NumSubRegs);
+  unsigned* SubregHashTable = new unsigned[2 * SubregHashTableSize];
+  std::fill(SubregHashTable, SubregHashTable + 2 * SubregHashTableSize, ~0U);
+  
+  unsigned hashMisses = 0;
+  
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    Record* R = Regs[i].TheDef;
+    for (std::set<Record*>::iterator I = RegisterSubRegs[R].begin(),
+         E = RegisterSubRegs[R].end(); I != E; ++I) {
+      Record* RJ = *I;
+      // We have to increase the indices of both registers by one when
+      // computing the hash because, in the generated code, there
+      // will be an extra empty slot at register 0.
+      size_t index = ((i+1) + (RegNo[RJ]+1) * 37) & (SubregHashTableSize-1);
+      unsigned ProbeAmt = 2;
+      while (SubregHashTable[index*2] != ~0U &&
+             SubregHashTable[index*2+1] != ~0U) {
+        index = (index + ProbeAmt) & (SubregHashTableSize-1);
+        ProbeAmt += 2;
+        
+        hashMisses++;
+      }
+      
+      SubregHashTable[index*2] = i;
+      SubregHashTable[index*2+1] = RegNo[RJ];
+    }
+  }
+  
+  OS << "\n\n  // Number of hash collisions: " << hashMisses << "\n";
+  
+  if (SubregHashTableSize) {
+    std::string Namespace = Regs[0].TheDef->getValueAsString("Namespace");
+    
+    OS << "  const unsigned SubregHashTable[] = { ";
+    for (unsigned i = 0; i < SubregHashTableSize - 1; ++i) {
+      if (i != 0)
+        // Insert spaces for nice formatting.
+        OS << "                                       ";
+      
+      if (SubregHashTable[2*i] != ~0U) {
+        OS << getQualifiedName(Regs[SubregHashTable[2*i]].TheDef) << ", "
+           << getQualifiedName(Regs[SubregHashTable[2*i+1]].TheDef) << ", \n";
+      } else {
+        OS << Namespace << "::NoRegister, " << Namespace << "::NoRegister, \n";
+      }
+    }
+    
+    unsigned Idx = SubregHashTableSize*2-2;
+    if (SubregHashTable[Idx] != ~0U) {
+      OS << "                                       "
+         << getQualifiedName(Regs[SubregHashTable[Idx]].TheDef) << ", "
+         << getQualifiedName(Regs[SubregHashTable[Idx+1]].TheDef) << " };\n";
+    } else {
+      OS << Namespace << "::NoRegister, " << Namespace << "::NoRegister };\n";
+    }
+    
+    OS << "  const unsigned SubregHashTableSize = "
+       << SubregHashTableSize << ";\n";
+  } else {
+    OS << "  const unsigned SubregHashTable[] = { ~0U, ~0U };\n"
+       << "  const unsigned SubregHashTableSize = 1;\n";
+  }
+  
+  delete [] SubregHashTable;
+
+
+  // Print the AliasHashTable, a simple quadratically probed
+  // hash table for determining if a register aliases another register.
+  unsigned NumAliases = 0;
+  RegNo.clear();
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    RegNo[Regs[i].TheDef] = i;
+    NumAliases += RegisterAliases[Regs[i].TheDef].size();
+  }
+  
+  unsigned AliasesHashTableSize = 2 * NextPowerOf2(2 * NumAliases);
+  unsigned* AliasesHashTable = new unsigned[2 * AliasesHashTableSize];
+  std::fill(AliasesHashTable, AliasesHashTable + 2 * AliasesHashTableSize, ~0U);
+  
+  hashMisses = 0;
+  
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    Record* R = Regs[i].TheDef;
+    for (std::set<Record*>::iterator I = RegisterAliases[R].begin(),
+         E = RegisterAliases[R].end(); I != E; ++I) {
+      Record* RJ = *I;
+      // We have to increase the indices of both registers by one when
+      // computing the hash because, in the generated code, there
+      // will be an extra empty slot at register 0.
+      size_t index = ((i+1) + (RegNo[RJ]+1) * 37) & (AliasesHashTableSize-1);
+      unsigned ProbeAmt = 2;
+      while (AliasesHashTable[index*2] != ~0U &&
+             AliasesHashTable[index*2+1] != ~0U) {
+        index = (index + ProbeAmt) & (AliasesHashTableSize-1);
+        ProbeAmt += 2;
+        
+        hashMisses++;
+      }
+      
+      AliasesHashTable[index*2] = i;
+      AliasesHashTable[index*2+1] = RegNo[RJ];
+    }
+  }
+  
+  OS << "\n\n  // Number of hash collisions: " << hashMisses << "\n";
+  
+  if (AliasesHashTableSize) {
+    std::string Namespace = Regs[0].TheDef->getValueAsString("Namespace");
+    
+    OS << "  const unsigned AliasesHashTable[] = { ";
+    for (unsigned i = 0; i < AliasesHashTableSize - 1; ++i) {
+      if (i != 0)
+        // Insert spaces for nice formatting.
+        OS << "                                       ";
+      
+      if (AliasesHashTable[2*i] != ~0U) {
+        OS << getQualifiedName(Regs[AliasesHashTable[2*i]].TheDef) << ", "
+           << getQualifiedName(Regs[AliasesHashTable[2*i+1]].TheDef) << ", \n";
+      } else {
+        OS << Namespace << "::NoRegister, " << Namespace << "::NoRegister, \n";
+      }
+    }
+    
+    unsigned Idx = AliasesHashTableSize*2-2;
+    if (AliasesHashTable[Idx] != ~0U) {
+      OS << "                                       "
+         << getQualifiedName(Regs[AliasesHashTable[Idx]].TheDef) << ", "
+         << getQualifiedName(Regs[AliasesHashTable[Idx+1]].TheDef) << " };\n";
+    } else {
+      OS << Namespace << "::NoRegister, " << Namespace << "::NoRegister };\n";
+    }
+    
+    OS << "  const unsigned AliasesHashTableSize = "
+       << AliasesHashTableSize << ";\n";
+  } else {
+    OS << "  const unsigned AliasesHashTable[] = { ~0U, ~0U };\n"
+       << "  const unsigned AliasesHashTableSize = 1;\n";
+  }
+  
+  delete [] AliasesHashTable;
+
+  if (!RegisterAliases.empty())
+    OS << "\n\n  // Register Overlap Lists...\n";
+
+  // Emit an overlap list for all registers.
+  for (std::map<Record*, std::set<Record*>, LessRecord >::iterator
+         I = RegisterAliases.begin(), E = RegisterAliases.end(); I != E; ++I) {
+    OS << "  const unsigned " << I->first->getName() << "_Overlaps[] = { "
+       << getQualifiedName(I->first) << ", ";
+    for (std::set<Record*>::iterator ASI = I->second.begin(),
+           E = I->second.end(); ASI != E; ++ASI)
+      OS << getQualifiedName(*ASI) << ", ";
+    OS << "0 };\n";
+  }
+
+  if (!RegisterSubRegs.empty())
+    OS << "\n\n  // Register Sub-registers Sets...\n";
+
+  // Emit the empty sub-registers list
+  OS << "  const unsigned Empty_SubRegsSet[] = { 0 };\n";
+  // Loop over all of the registers which have sub-registers, emitting the
+  // sub-registers list to memory.
+  for (std::map<Record*, std::set<Record*>, LessRecord>::iterator
+         I = RegisterSubRegs.begin(), E = RegisterSubRegs.end(); I != E; ++I) {
+   if (I->second.empty())
+     continue;
+    OS << "  const unsigned " << I->first->getName() << "_SubRegsSet[] = { ";
+    std::vector<Record*> SubRegsVector;
+    for (std::set<Record*>::iterator ASI = I->second.begin(),
+           E = I->second.end(); ASI != E; ++ASI)
+      SubRegsVector.push_back(*ASI);
+    RegisterSorter RS(RegisterSubRegs);
+    std::stable_sort(SubRegsVector.begin(), SubRegsVector.end(), RS);
+    for (unsigned i = 0, e = SubRegsVector.size(); i != e; ++i)
+      OS << getQualifiedName(SubRegsVector[i]) << ", ";
+    OS << "0 };\n";
+  }
+
+  if (!RegisterSuperRegs.empty())
+    OS << "\n\n  // Register Super-registers Sets...\n";
+
+  // Emit the empty super-registers list
+  OS << "  const unsigned Empty_SuperRegsSet[] = { 0 };\n";
+  // Loop over all of the registers which have super-registers, emitting the
+  // super-registers list to memory.
+  for (std::map<Record*, std::set<Record*>, LessRecord >::iterator
+         I = RegisterSuperRegs.begin(), E = RegisterSuperRegs.end(); I != E; ++I) {
+    if (I->second.empty())
+      continue;
+    OS << "  const unsigned " << I->first->getName() << "_SuperRegsSet[] = { ";
+
+    std::vector<Record*> SuperRegsVector;
+    for (std::set<Record*>::iterator ASI = I->second.begin(),
+           E = I->second.end(); ASI != E; ++ASI)
+      SuperRegsVector.push_back(*ASI);
+    RegisterSorter RS(RegisterSubRegs);
+    std::stable_sort(SuperRegsVector.begin(), SuperRegsVector.end(), RS);
+    for (unsigned i = 0, e = SuperRegsVector.size(); i != e; ++i)
+      OS << getQualifiedName(SuperRegsVector[i]) << ", ";
+    OS << "0 };\n";
+  }
+
+  OS<<"\n  const TargetRegisterDesc RegisterDescriptors[] = { // Descriptors\n";
+  OS << "    { \"NOREG\",\t0,\t0,\t0 },\n";
+
+  // Now that register alias and sub-registers sets have been emitted, emit the
+  // register descriptors now.
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    const CodeGenRegister &Reg = Regs[i];
+    OS << "    { \"";
+    OS << Reg.getName() << "\",\t" << Reg.getName() << "_Overlaps,\t";
+    if (!RegisterSubRegs[Reg.TheDef].empty())
+      OS << Reg.getName() << "_SubRegsSet,\t";
+    else
+      OS << "Empty_SubRegsSet,\t";
+    if (!RegisterSuperRegs[Reg.TheDef].empty())
+      OS << Reg.getName() << "_SuperRegsSet },\n";
+    else
+      OS << "Empty_SuperRegsSet },\n";
+  }
+  OS << "  };\n";      // End of register descriptors...
+
+  // Emit SubRegIndex names, skipping 0
+  const std::vector<Record*> SubRegIndices = Target.getSubRegIndices();
+  OS << "\n  const char *const SubRegIndexTable[] = { \"";
+  for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i) {
+    OS << SubRegIndices[i]->getName();
+    if (i+1 != e)
+      OS << "\", \"";
+  }
+  OS << "\" };\n\n";
+  OS << "}\n\n";       // End of anonymous namespace...
+
+  std::string ClassName = Target.getName() + "GenRegisterInfo";
+
+  // Calculate the mapping of subregister+index pairs to physical registers.
+  RegisterMaps RegMaps;
+
+  // Emit the subregister + index mapping function based on the information
+  // calculated above.
+  OS << "unsigned " << ClassName
+     << "::getSubReg(unsigned RegNo, unsigned Index) const {\n"
+     << "  switch (RegNo) {\n"
+     << "  default:\n    return 0;\n";
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    RegisterMaps::SubRegMap &SRM = RegMaps.inferSubRegIndices(Regs[i].TheDef);
+    if (SRM.empty())
+      continue;
+    OS << "  case " << getQualifiedName(Regs[i].TheDef) << ":\n";
+    OS << "    switch (Index) {\n";
+    OS << "    default: return 0;\n";
+    for (RegisterMaps::SubRegMap::const_iterator ii = SRM.begin(),
+         ie = SRM.end(); ii != ie; ++ii)
+      OS << "    case " << getQualifiedName(ii->first)
+         << ": return " << getQualifiedName(ii->second) << ";\n";
+    OS << "    };\n" << "    break;\n";
+  }
+  OS << "  };\n";
+  OS << "  return 0;\n";
+  OS << "}\n\n";
+
+  OS << "unsigned " << ClassName
+     << "::getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const {\n"
+     << "  switch (RegNo) {\n"
+     << "  default:\n    return 0;\n";
+   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+     RegisterMaps::SubRegMap &SRM = RegMaps.SubReg[Regs[i].TheDef];
+     if (SRM.empty())
+       continue;
+    OS << "  case " << getQualifiedName(Regs[i].TheDef) << ":\n";
+    for (RegisterMaps::SubRegMap::const_iterator ii = SRM.begin(),
+         ie = SRM.end(); ii != ie; ++ii)
+      OS << "    if (SubRegNo == " << getQualifiedName(ii->second)
+         << ")  return " << getQualifiedName(ii->first) << ";\n";
+    OS << "    return 0;\n";
+  }
+  OS << "  };\n";
+  OS << "  return 0;\n";
+  OS << "}\n\n";
+
+  // Emit composeSubRegIndices
+  RegMaps.computeComposites();
+  OS << "unsigned " << ClassName
+     << "::composeSubRegIndices(unsigned IdxA, unsigned IdxB) const {\n"
+     << "  switch (IdxA) {\n"
+     << "  default:\n    return IdxB;\n";
+  for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i) {
+    bool Open = false;
+    for (unsigned j = 0; j != e; ++j) {
+      if (Record *Comp = RegMaps.Composite.lookup(
+                          std::make_pair(SubRegIndices[i], SubRegIndices[j]))) {
+        if (!Open) {
+          OS << "  case " << getQualifiedName(SubRegIndices[i])
+             << ": switch(IdxB) {\n    default: return IdxB;\n";
+          Open = true;
+        }
+        OS << "    case " << getQualifiedName(SubRegIndices[j])
+           << ": return " << getQualifiedName(Comp) << ";\n";
+      }
+    }
+    if (Open)
+      OS << "    }\n";
+  }
+  OS << "  }\n}\n\n";
+
+  // Emit the constructor of the class...
+  OS << ClassName << "::" << ClassName
+     << "(int CallFrameSetupOpcode, int CallFrameDestroyOpcode)\n"
+     << "  : TargetRegisterInfo(RegisterDescriptors, " << Regs.size()+1
+     << ", RegisterClasses, RegisterClasses+" << RegisterClasses.size() <<",\n"
+     << "                 SubRegIndexTable,\n"
+     << "                 CallFrameSetupOpcode, CallFrameDestroyOpcode,\n"
+     << "                 SubregHashTable, SubregHashTableSize,\n"
+     << "                 AliasesHashTable, AliasesHashTableSize) {\n"
+     << "}\n\n";
+
+  // Collect all information about dwarf register numbers
+
+  // First, just pull all provided information to the map
+  unsigned maxLength = 0;
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    Record *Reg = Regs[i].TheDef;
+    std::vector<int64_t> RegNums = Reg->getValueAsListOfInts("DwarfNumbers");
+    maxLength = std::max((size_t)maxLength, RegNums.size());
+    if (DwarfRegNums.count(Reg))
+      errs() << "Warning: DWARF numbers for register " << getQualifiedName(Reg)
+             << "specified multiple times\n";
+    DwarfRegNums[Reg] = RegNums;
+  }
+
+  // Now we know maximal length of number list. Append -1's, where needed
+  for (DwarfRegNumsMapTy::iterator 
+       I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I)
+    for (unsigned i = I->second.size(), e = maxLength; i != e; ++i)
+      I->second.push_back(-1);
+
+  // Emit information about the dwarf register numbers.
+  OS << "int " << ClassName << "::getDwarfRegNumFull(unsigned RegNum, "
+     << "unsigned Flavour) const {\n"
+     << "  switch (Flavour) {\n"
+     << "  default:\n"
+     << "    assert(0 && \"Unknown DWARF flavour\");\n"
+     << "    return -1;\n";
+  
+  for (unsigned i = 0, e = maxLength; i != e; ++i) {
+    OS << "  case " << i << ":\n"
+       << "    switch (RegNum) {\n"
+       << "    default:\n"
+       << "      assert(0 && \"Invalid RegNum\");\n"
+       << "      return -1;\n";
+    
+    // Sort by name to get a stable order.
+    
+
+    for (DwarfRegNumsMapTy::iterator 
+           I = DwarfRegNums.begin(), E = DwarfRegNums.end(); I != E; ++I) {
+      int RegNo = I->second[i];
+      if (RegNo != -2)
+        OS << "    case " << getQualifiedName(I->first) << ":\n"
+           << "      return " << RegNo << ";\n";
+      else
+        OS << "    case " << getQualifiedName(I->first) << ":\n"
+           << "      assert(0 && \"Invalid register for this mode\");\n"
+           << "      return -1;\n";
+    }
+    OS << "    };\n";
+  }
+    
+  OS << "  };\n}\n\n";
+
+  OS << "} // End llvm namespace \n";
+}
diff --git a/final/utils/TableGen/RegisterInfoEmitter.h b/final/utils/TableGen/RegisterInfoEmitter.h
new file mode 100644
index 00000000000..1456b4f1ec7
--- /dev/null
+++ b/final/utils/TableGen/RegisterInfoEmitter.h
@@ -0,0 +1,40 @@
+//===- RegisterInfoEmitter.h - Generate a Register File Desc. ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend is responsible for emitting a description of a target
+// register file for a code generator.  It uses instances of the Register,
+// RegisterAliases, and RegisterClass classes to gather this information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef REGISTER_INFO_EMITTER_H
+#define REGISTER_INFO_EMITTER_H
+
+#include "TableGenBackend.h"
+
+namespace llvm {
+
+class RegisterInfoEmitter : public TableGenBackend {
+  RecordKeeper &Records;
+public:
+  RegisterInfoEmitter(RecordKeeper &R) : Records(R) {}
+
+  // run - Output the register file description, returning true on failure.
+  void run(raw_ostream &o);
+
+  // runHeader - Emit a header fragment for the register info emitter.
+  void runHeader(raw_ostream &o);
+
+  // runEnums - Print out enum values for all of the registers.
+  void runEnums(raw_ostream &o);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/StringMatcher.cpp b/final/utils/TableGen/StringMatcher.cpp
new file mode 100644
index 00000000000..6aedcbf458a
--- /dev/null
+++ b/final/utils/TableGen/StringMatcher.cpp
@@ -0,0 +1,149 @@
+//===- StringMatcher.cpp - Generate a matcher for input strings -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringMatcher class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "StringMatcher.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+using namespace llvm;
+
+/// FindFirstNonCommonLetter - Find the first character in the keys of the
+/// string pairs that is not shared across the whole set of strings.  All
+/// strings are assumed to have the same length.
+static unsigned 
+FindFirstNonCommonLetter(const std::vector<const
+                              StringMatcher::StringPair*> &Matches) {
+  assert(!Matches.empty());
+  for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) {
+    // Check to see if letter i is the same across the set.
+    char Letter = Matches[0]->first[i];
+    
+    for (unsigned str = 0, e = Matches.size(); str != e; ++str)
+      if (Matches[str]->first[i] != Letter)
+        return i;
+  }
+  
+  return Matches[0]->first.size();
+}
+
+/// EmitStringMatcherForChar - Given a set of strings that are known to be the
+/// same length and whose characters leading up to CharNo are the same, emit
+/// code to verify that CharNo and later are the same.
+///
+/// \return - True if control can leave the emitted code fragment.
+bool StringMatcher::
+EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
+                         unsigned CharNo, unsigned IndentCount) const {
+  assert(!Matches.empty() && "Must have at least one string to match!");
+  std::string Indent(IndentCount*2+4, ' ');
+  
+  // If we have verified that the entire string matches, we're done: output the
+  // matching code.
+  if (CharNo == Matches[0]->first.size()) {
+    assert(Matches.size() == 1 && "Had duplicate keys to match on");
+    
+    // If the to-execute code has \n's in it, indent each subsequent line.
+    StringRef Code = Matches[0]->second;
+    
+    std::pair<StringRef, StringRef> Split = Code.split('\n');
+    OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n";
+
+    Code = Split.second;
+    while (!Code.empty()) {
+      Split = Code.split('\n');
+      OS << Indent << Split.first << "\n";
+      Code = Split.second;
+    }
+    return false;
+  }
+  
+  // Bucket the matches by the character we are comparing.
+  std::map<char, std::vector<const StringPair*> > MatchesByLetter;
+  
+  for (unsigned i = 0, e = Matches.size(); i != e; ++i)
+    MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]);
+  
+  
+  // If we have exactly one bucket to match, see how many characters are common
+  // across the whole set and match all of them at once.
+  if (MatchesByLetter.size() == 1) {
+    unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches);
+    unsigned NumChars = FirstNonCommonLetter-CharNo;
+    
+    // Emit code to break out if the prefix doesn't match.
+    if (NumChars == 1) {
+      // Do the comparison with if (Str[1] != 'f')
+      // FIXME: Need to escape general characters.
+      OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] != '"
+      << Matches[0]->first[CharNo] << "')\n";
+      OS << Indent << "  break;\n";
+    } else {
+      // Do the comparison with if (Str.substr(1, 3) != "foo").    
+      // FIXME: Need to escape general strings.
+      OS << Indent << "if (" << StrVariableName << ".substr(" << CharNo << ", "
+      << NumChars << ") != \"";
+      OS << Matches[0]->first.substr(CharNo, NumChars) << "\")\n";
+      OS << Indent << "  break;\n";
+    }
+    
+    return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount);
+  }
+  
+  // Otherwise, we have multiple possible things, emit a switch on the
+  // character.
+  OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n";
+  OS << Indent << "default: break;\n";
+  
+  for (std::map<char, std::vector<const StringPair*> >::iterator LI = 
+       MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) {
+    // TODO: escape hard stuff (like \n) if we ever care about it.
+    OS << Indent << "case '" << LI->first << "':\t // "
+       << LI->second.size() << " string";
+    if (LI->second.size() != 1) OS << 's';
+    OS << " to match.\n";
+    if (EmitStringMatcherForChar(LI->second, CharNo+1, IndentCount+1))
+      OS << Indent << "  break;\n";
+  }
+  
+  OS << Indent << "}\n";
+  return true;
+}
+
+
+/// Emit - Top level entry point.
+///
+void StringMatcher::Emit(unsigned Indent) const {
+  // If nothing to match, just fall through.
+  if (Matches.empty()) return;
+  
+  // First level categorization: group strings by length.
+  std::map<unsigned, std::vector<const StringPair*> > MatchesByLength;
+  
+  for (unsigned i = 0, e = Matches.size(); i != e; ++i)
+    MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]);
+  
+  // Output a switch statement on length and categorize the elements within each
+  // bin.
+  OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n";
+  OS.indent(Indent*2+2) << "default: break;\n";
+  
+  for (std::map<unsigned, std::vector<const StringPair*> >::iterator LI =
+       MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) {
+    OS.indent(Indent*2+2) << "case " << LI->first << ":\t // "
+       << LI->second.size()
+       << " string" << (LI->second.size() == 1 ? "" : "s") << " to match.\n";
+    if (EmitStringMatcherForChar(LI->second, 0, Indent))
+      OS.indent(Indent*2+4) << "break;\n";
+  }
+  
+  OS.indent(Indent*2+2) << "}\n";
+}
diff --git a/final/utils/TableGen/StringMatcher.h b/final/utils/TableGen/StringMatcher.h
new file mode 100644
index 00000000000..1dadc76200b
--- /dev/null
+++ b/final/utils/TableGen/StringMatcher.h
@@ -0,0 +1,54 @@
+//===- StringMatcher.h - Generate a matcher for input strings ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the StringMatcher class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef STRINGMATCHER_H
+#define STRINGMATCHER_H
+
+#include <vector>
+#include <string>
+#include <utility>
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+  class raw_ostream;
+  
+/// StringMatcher - Given a list of strings and code to execute when they match,
+/// output a simple switch tree to classify the input string.
+/// 
+/// If a match is found, the code in Vals[i].second is executed; control must
+/// not exit this code fragment.  If nothing matches, execution falls through.
+///
+class StringMatcher {
+public:
+  typedef std::pair<std::string, std::string> StringPair;
+private:
+  StringRef StrVariableName;
+  const std::vector<StringPair> &Matches;
+  raw_ostream &OS;
+  
+public:
+  StringMatcher(StringRef strVariableName, 
+                const std::vector<StringPair> &matches, raw_ostream &os)
+    : StrVariableName(strVariableName), Matches(matches), OS(os) {}
+  
+  void Emit(unsigned Indent = 0) const;
+  
+  
+private:
+  bool EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
+                                unsigned CharNo, unsigned IndentCount) const;
+};
+
+} // end llvm namespace.
+
+#endif
diff --git a/final/utils/TableGen/StringToOffsetTable.h b/final/utils/TableGen/StringToOffsetTable.h
new file mode 100644
index 00000000000..ac9422c5d72
--- /dev/null
+++ b/final/utils/TableGen/StringToOffsetTable.h
@@ -0,0 +1,81 @@
+//===- StringToOffsetTable.h - Emit a big concatenated string ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TBLGEN_STRING_TO_OFFSET_TABLE_H
+#define TBLGEN_STRING_TO_OFFSET_TABLE_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+/// StringToOffsetTable - This class uniques a bunch of nul-terminated strings
+/// and keeps track of their offset in a massive contiguous string allocation.
+/// It can then output this string blob and use indexes into the string to
+/// reference each piece.
+class StringToOffsetTable {
+  StringMap<unsigned> StringOffset;
+  std::string AggregateString;
+public:
+  
+  unsigned GetOrAddStringOffset(StringRef Str) {
+    unsigned &Entry = StringOffset[Str];
+    if (Entry == 0) {
+      // Add the string to the aggregate if this is the first time found.
+      Entry = AggregateString.size();
+      AggregateString.append(Str.begin(), Str.end());
+      AggregateString += '\0';
+    }
+    
+    return Entry;
+  }
+  
+  void EmitString(raw_ostream &O) {
+    // Escape the string.
+    SmallString<256> Str;
+    raw_svector_ostream(Str).write_escaped(AggregateString);
+    AggregateString = Str.str();
+
+    O << "    \"";
+    unsigned CharsPrinted = 0;
+    for (unsigned i = 0, e = AggregateString.size(); i != e; ++i) {
+      if (CharsPrinted > 70) {
+        O << "\"\n    \"";
+        CharsPrinted = 0;
+      }
+      O << AggregateString[i];
+      ++CharsPrinted;
+      
+      // Print escape sequences all together.
+      if (AggregateString[i] != '\\')
+        continue;
+      
+      assert(i+1 < AggregateString.size() && "Incomplete escape sequence!");
+      if (isdigit(AggregateString[i+1])) {
+        assert(isdigit(AggregateString[i+2]) && 
+               isdigit(AggregateString[i+3]) &&
+               "Expected 3 digit octal escape!");
+        O << AggregateString[++i];
+        O << AggregateString[++i];
+        O << AggregateString[++i];
+        CharsPrinted += 3;
+      } else {
+        O << AggregateString[++i];
+        ++CharsPrinted;
+      }
+    }
+    O << "\"";
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/final/utils/TableGen/SubtargetEmitter.cpp b/final/utils/TableGen/SubtargetEmitter.cpp
new file mode 100644
index 00000000000..e35bdca9788
--- /dev/null
+++ b/final/utils/TableGen/SubtargetEmitter.cpp
@@ -0,0 +1,663 @@
+//===- SubtargetEmitter.cpp - Generate subtarget enumerations -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits subtarget enumerations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SubtargetEmitter.h"
+#include "CodeGenTarget.h"
+#include "Record.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Debug.h"
+#include <algorithm>
+using namespace llvm;
+
+//
+// Enumeration - Emit the specified class as an enumeration.
+//
+void SubtargetEmitter::Enumeration(raw_ostream &OS,
+                                   const char *ClassName,
+                                   bool isBits) {
+  // Get all records of class and sort
+  std::vector<Record*> DefList = Records.getAllDerivedDefinitions(ClassName);
+  std::sort(DefList.begin(), DefList.end(), LessRecord());
+
+  // Open enumeration
+  OS << "enum {\n";
+  
+  // For each record
+  for (unsigned i = 0, N = DefList.size(); i < N;) {
+    // Next record
+    Record *Def = DefList[i];
+    
+    // Get and emit name
+    OS << "  " << Def->getName();
+    
+    // If bit flags then emit expression (1 << i)
+    if (isBits)  OS << " = " << " 1 << " << i;
+
+    // Depending on 'if more in the list' emit comma
+    if (++i < N) OS << ",";
+    
+    OS << "\n";
+  }
+  
+  // Close enumeration
+  OS << "};\n";
+}
+
+//
+// FeatureKeyValues - Emit data of all the subtarget features.  Used by the
+// command line.
+//
+void SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
+  // Gather and sort all the features
+  std::vector<Record*> FeatureList =
+                           Records.getAllDerivedDefinitions("SubtargetFeature");
+  std::sort(FeatureList.begin(), FeatureList.end(), LessRecordFieldName());
+
+  // Begin feature table
+  OS << "// Sorted (by key) array of values for CPU features.\n"
+     << "static const llvm::SubtargetFeatureKV FeatureKV[] = {\n";
+  
+  // For each feature
+  for (unsigned i = 0, N = FeatureList.size(); i < N; ++i) {
+    // Next feature
+    Record *Feature = FeatureList[i];
+
+    const std::string &Name = Feature->getName();
+    const std::string &CommandLineName = Feature->getValueAsString("Name");
+    const std::string &Desc = Feature->getValueAsString("Desc");
+    
+    if (CommandLineName.empty()) continue;
+    
+    // Emit as { "feature", "description", featureEnum, i1 | i2 | ... | in }
+    OS << "  { "
+       << "\"" << CommandLineName << "\", "
+       << "\"" << Desc << "\", "
+       << Name << ", ";
+
+    const std::vector<Record*> &ImpliesList = 
+      Feature->getValueAsListOfDefs("Implies");
+    
+    if (ImpliesList.empty()) {
+      OS << "0";
+    } else {
+      for (unsigned j = 0, M = ImpliesList.size(); j < M;) {
+        OS << ImpliesList[j]->getName();
+        if (++j < M) OS << " | ";
+      }
+    }
+
+    OS << " }";
+    
+    // Depending on 'if more in the list' emit comma
+    if ((i + 1) < N) OS << ",";
+    
+    OS << "\n";
+  }
+  
+  // End feature table
+  OS << "};\n";
+
+  // Emit size of table
+  OS<<"\nenum {\n";
+  OS<<"  FeatureKVSize = sizeof(FeatureKV)/sizeof(llvm::SubtargetFeatureKV)\n";
+  OS<<"};\n";
+}
+
+//
+// CPUKeyValues - Emit data of all the subtarget processors.  Used by command
+// line.
+//
+void SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
+  // Gather and sort processor information
+  std::vector<Record*> ProcessorList =
+                          Records.getAllDerivedDefinitions("Processor");
+  std::sort(ProcessorList.begin(), ProcessorList.end(), LessRecordFieldName());
+
+  // Begin processor table
+  OS << "// Sorted (by key) array of values for CPU subtype.\n"
+     << "static const llvm::SubtargetFeatureKV SubTypeKV[] = {\n";
+     
+  // For each processor
+  for (unsigned i = 0, N = ProcessorList.size(); i < N;) {
+    // Next processor
+    Record *Processor = ProcessorList[i];
+
+    const std::string &Name = Processor->getValueAsString("Name");
+    const std::vector<Record*> &FeatureList = 
+      Processor->getValueAsListOfDefs("Features");
+    
+    // Emit as { "cpu", "description", f1 | f2 | ... fn },
+    OS << "  { "
+       << "\"" << Name << "\", "
+       << "\"Select the " << Name << " processor\", ";
+    
+    if (FeatureList.empty()) {
+      OS << "0";
+    } else {
+      for (unsigned j = 0, M = FeatureList.size(); j < M;) {
+        OS << FeatureList[j]->getName();
+        if (++j < M) OS << " | ";
+      }
+    }
+    
+    // The "0" is for the "implies" section of this data structure.
+    OS << ", 0 }";
+    
+    // Depending on 'if more in the list' emit comma
+    if (++i < N) OS << ",";
+    
+    OS << "\n";
+  }
+  
+  // End processor table
+  OS << "};\n";
+
+  // Emit size of table
+  OS<<"\nenum {\n";
+  OS<<"  SubTypeKVSize = sizeof(SubTypeKV)/sizeof(llvm::SubtargetFeatureKV)\n";
+  OS<<"};\n";
+}
+
+//
+// CollectAllItinClasses - Gathers and enumerates all the itinerary classes.
+// Returns itinerary class count.
+//
+unsigned SubtargetEmitter::
+CollectAllItinClasses(raw_ostream &OS,
+                      std::map<std::string, unsigned> &ItinClassesMap,
+                      std::vector<Record*> &ItinClassList) {
+  // For each itinerary class
+  unsigned N = ItinClassList.size();
+  for (unsigned i = 0; i < N; i++) {
+    // Next itinerary class
+    const Record *ItinClass = ItinClassList[i];
+    // Get name of itinerary class
+    // Assign itinerary class a unique number
+    ItinClassesMap[ItinClass->getName()] = i;
+  }
+  
+  // Emit size of table
+  OS<<"\nenum {\n";
+  OS<<"  ItinClassesSize = " << N << "\n";
+  OS<<"};\n";
+
+  // Return itinerary class count
+  return N;
+}
+
+//
+// FormItineraryStageString - Compose a string containing the stage
+// data initialization for the specified itinerary.  N is the number
+// of stages.
+//
+void SubtargetEmitter::FormItineraryStageString(const std::string &Name,
+                                                Record *ItinData,
+                                                std::string &ItinString,
+                                                unsigned &NStages) {
+  // Get states list
+  const std::vector<Record*> &StageList =
+    ItinData->getValueAsListOfDefs("Stages");
+
+  // For each stage
+  unsigned N = NStages = StageList.size();
+  for (unsigned i = 0; i < N;) {
+    // Next stage
+    const Record *Stage = StageList[i];
+  
+    // Form string as ,{ cycles, u1 | u2 | ... | un, timeinc, kind }
+    int Cycles = Stage->getValueAsInt("Cycles");
+    ItinString += "  { " + itostr(Cycles) + ", ";
+    
+    // Get unit list
+    const std::vector<Record*> &UnitList = Stage->getValueAsListOfDefs("Units");
+    
+    // For each unit
+    for (unsigned j = 0, M = UnitList.size(); j < M;) {
+      // Add name and bitwise or
+      ItinString += Name + "FU::" + UnitList[j]->getName();
+      if (++j < M) ItinString += " | ";
+    }
+    
+    int TimeInc = Stage->getValueAsInt("TimeInc");
+    ItinString += ", " + itostr(TimeInc);
+
+    int Kind = Stage->getValueAsInt("Kind");
+    ItinString += ", (llvm::InstrStage::ReservationKinds)" + itostr(Kind);
+
+    // Close off stage
+    ItinString += " }";
+    if (++i < N) ItinString += ", ";
+  }
+}
+
+//
+// FormItineraryOperandCycleString - Compose a string containing the
+// operand cycle initialization for the specified itinerary.  N is the
+// number of operands that has cycles specified.
+//
+void SubtargetEmitter::FormItineraryOperandCycleString(Record *ItinData,
+                         std::string &ItinString, unsigned &NOperandCycles) {
+  // Get operand cycle list
+  const std::vector<int64_t> &OperandCycleList =
+    ItinData->getValueAsListOfInts("OperandCycles");
+
+  // For each operand cycle
+  unsigned N = NOperandCycles = OperandCycleList.size();
+  for (unsigned i = 0; i < N;) {
+    // Next operand cycle
+    const int OCycle = OperandCycleList[i];
+  
+    ItinString += "  " + itostr(OCycle);
+    if (++i < N) ItinString += ", ";
+  }
+}
+
+void SubtargetEmitter::FormItineraryBypassString(const std::string &Name,
+                                                 Record *ItinData,
+                                                 std::string &ItinString,
+                                                 unsigned NOperandCycles) {
+  const std::vector<Record*> &BypassList =
+    ItinData->getValueAsListOfDefs("Bypasses");
+  unsigned N = BypassList.size();
+  unsigned i = 0;
+  for (; i < N;) {
+    ItinString += Name + "Bypass::" + BypassList[i]->getName();
+    if (++i < NOperandCycles) ItinString += ", ";
+  }
+  for (; i < NOperandCycles;) {
+    ItinString += " 0";
+    if (++i < NOperandCycles) ItinString += ", ";
+  }
+}
+
+//
+// EmitStageAndOperandCycleData - Generate unique itinerary stages and
+// operand cycle tables.  Record itineraries for processors.
+//
+void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
+       unsigned NItinClasses,
+       std::map<std::string, unsigned> &ItinClassesMap,
+       std::vector<Record*> &ItinClassList,
+       std::vector<std::vector<InstrItinerary> > &ProcList) {
+  // Gather processor iteraries
+  std::vector<Record*> ProcItinList =
+                       Records.getAllDerivedDefinitions("ProcessorItineraries");
+  
+  // If just no itinerary then don't bother
+  if (ProcItinList.size() < 2) return;
+
+  // Emit functional units for all the itineraries.
+  for (unsigned i = 0, N = ProcItinList.size(); i < N; ++i) {
+    // Next record
+    Record *Proc = ProcItinList[i];
+
+    std::vector<Record*> FUs = Proc->getValueAsListOfDefs("FU");
+    if (FUs.empty())
+      continue;
+
+    const std::string &Name = Proc->getName();
+    OS << "\n// Functional units for itineraries \"" << Name << "\"\n"
+       << "namespace " << Name << "FU {\n";
+
+    for (unsigned j = 0, FUN = FUs.size(); j < FUN; ++j)
+      OS << "  const unsigned " << FUs[j]->getName()
+         << " = 1 << " << j << ";\n";
+
+    OS << "}\n";
+
+    std::vector<Record*> BPs = Proc->getValueAsListOfDefs("BP");
+    if (BPs.size()) {
+      OS << "\n// Pipeline forwarding pathes for itineraries \"" << Name
+         << "\"\n" << "namespace " << Name << "Bypass {\n";
+
+      OS << "  const unsigned NoBypass = 0;\n";
+      for (unsigned j = 0, BPN = BPs.size(); j < BPN; ++j)
+        OS << "  const unsigned " << BPs[j]->getName()
+           << " = 1 << " << j << ";\n";
+
+      OS << "}\n";
+    }
+  }
+
+  // Begin stages table
+  std::string StageTable = "\nstatic const llvm::InstrStage Stages[] = {\n";
+  StageTable += "  { 0, 0, 0, llvm::InstrStage::Required }, // No itinerary\n";
+        
+  // Begin operand cycle table
+  std::string OperandCycleTable = "static const unsigned OperandCycles[] = {\n";
+  OperandCycleTable += "  0, // No itinerary\n";
+
+  // Begin pipeline bypass table
+  std::string BypassTable = "static const unsigned ForwardingPathes[] = {\n";
+  BypassTable += "  0, // No itinerary\n";
+        
+  unsigned StageCount = 1, OperandCycleCount = 1;
+  unsigned ItinStageEnum = 1, ItinOperandCycleEnum = 1;
+  std::map<std::string, unsigned> ItinStageMap, ItinOperandMap;
+  for (unsigned i = 0, N = ProcItinList.size(); i < N; i++) {
+    // Next record
+    Record *Proc = ProcItinList[i];
+    
+    // Get processor itinerary name
+    const std::string &Name = Proc->getName();
+    
+    // Skip default
+    if (Name == "NoItineraries") continue;
+    
+    // Create and expand processor itinerary to cover all itinerary classes
+    std::vector<InstrItinerary> ItinList;
+    ItinList.resize(NItinClasses);
+    
+    // Get itinerary data list
+    std::vector<Record*> ItinDataList = Proc->getValueAsListOfDefs("IID");
+    
+    // For each itinerary data
+    for (unsigned j = 0, M = ItinDataList.size(); j < M; j++) {
+      // Next itinerary data
+      Record *ItinData = ItinDataList[j];
+      
+      // Get string and stage count
+      std::string ItinStageString;
+      unsigned NStages;
+      FormItineraryStageString(Name, ItinData, ItinStageString, NStages);
+
+      // Get string and operand cycle count
+      std::string ItinOperandCycleString;
+      unsigned NOperandCycles;
+      FormItineraryOperandCycleString(ItinData, ItinOperandCycleString,
+                                      NOperandCycles);
+
+      std::string ItinBypassString;
+      FormItineraryBypassString(Name, ItinData, ItinBypassString,
+                                NOperandCycles);
+
+      // Check to see if stage already exists and create if it doesn't
+      unsigned FindStage = 0;
+      if (NStages > 0) {
+        FindStage = ItinStageMap[ItinStageString];
+        if (FindStage == 0) {
+          // Emit as { cycles, u1 | u2 | ... | un, timeinc }, // index
+          StageTable += ItinStageString + ", // " + itostr(ItinStageEnum) + "\n";
+          // Record Itin class number.
+          ItinStageMap[ItinStageString] = FindStage = StageCount;
+          StageCount += NStages;
+          ItinStageEnum++;
+        }
+      }
+      
+      // Check to see if operand cycle already exists and create if it doesn't
+      unsigned FindOperandCycle = 0;
+      if (NOperandCycles > 0) {
+        std::string ItinOperandString = ItinOperandCycleString+ItinBypassString;
+        FindOperandCycle = ItinOperandMap[ItinOperandString];
+        if (FindOperandCycle == 0) {
+          // Emit as  cycle, // index
+          OperandCycleTable += ItinOperandCycleString + ", // " + 
+            itostr(ItinOperandCycleEnum) + "\n";
+          // Record Itin class number.
+          ItinOperandMap[ItinOperandCycleString] = 
+            FindOperandCycle = OperandCycleCount;
+
+          // Emit as bypass, // index
+          BypassTable += ItinBypassString + ", // " + 
+            itostr(ItinOperandCycleEnum) + "\n";
+
+          OperandCycleCount += NOperandCycles;
+          ItinOperandCycleEnum++;
+        }
+      }
+      
+      // Locate where to inject into processor itinerary table
+      const std::string &Name = ItinData->getValueAsDef("TheClass")->getName();
+      unsigned Find = ItinClassesMap[Name];
+      
+      // Set up itinerary as location and location + stage count
+      unsigned NumUOps = ItinClassList[Find]->getValueAsInt("NumMicroOps");
+      InstrItinerary Intinerary = { NumUOps, FindStage, FindStage + NStages,
+                                    FindOperandCycle,
+                                    FindOperandCycle + NOperandCycles};
+
+      // Inject - empty slots will be 0, 0
+      ItinList[Find] = Intinerary;
+    }
+    
+    // Add process itinerary to list
+    ProcList.push_back(ItinList);
+  }
+
+  // Closing stage
+  StageTable += "  { 0, 0, 0, llvm::InstrStage::Required } // End itinerary\n";
+  StageTable += "};\n";
+
+  // Closing operand cycles
+  OperandCycleTable += "  0 // End itinerary\n";
+  OperandCycleTable += "};\n";
+
+  BypassTable += "  0 // End itinerary\n";
+  BypassTable += "};\n";
+
+  // Emit tables.
+  OS << StageTable;
+  OS << OperandCycleTable;
+  OS << BypassTable;
+  
+  // Emit size of tables
+  OS<<"\nenum {\n";
+  OS<<"  StagesSize = sizeof(Stages)/sizeof(llvm::InstrStage),\n";
+  OS<<"  OperandCyclesSize = sizeof(OperandCycles)/sizeof(unsigned)\n";
+  OS<<"};\n";
+}
+
+//
+// EmitProcessorData - Generate data for processor itineraries.
+//
+void SubtargetEmitter::EmitProcessorData(raw_ostream &OS,
+      std::vector<std::vector<InstrItinerary> > &ProcList) {
+  // Get an iterator for processor itinerary stages
+  std::vector<std::vector<InstrItinerary> >::iterator
+      ProcListIter = ProcList.begin();
+  
+  // For each processor itinerary
+  std::vector<Record*> Itins =
+                       Records.getAllDerivedDefinitions("ProcessorItineraries");
+  for (unsigned i = 0, N = Itins.size(); i < N; i++) {
+    // Next record
+    Record *Itin = Itins[i];
+
+    // Get processor itinerary name
+    const std::string &Name = Itin->getName();
+    
+    // Skip default
+    if (Name == "NoItineraries") continue;
+
+    // Begin processor itinerary table
+    OS << "\n";
+    OS << "static const llvm::InstrItinerary " << Name << "[] = {\n";
+    
+    // For each itinerary class
+    std::vector<InstrItinerary> &ItinList = *ProcListIter++;
+    for (unsigned j = 0, M = ItinList.size(); j < M; ++j) {
+      InstrItinerary &Intinerary = ItinList[j];
+      
+      // Emit in the form of 
+      // { firstStage, lastStage, firstCycle, lastCycle } // index
+      if (Intinerary.FirstStage == 0) {
+        OS << "  { 1, 0, 0, 0, 0 }";
+      } else {
+        OS << "  { " <<
+          Intinerary.NumMicroOps << ", " <<
+          Intinerary.FirstStage << ", " << 
+          Intinerary.LastStage << ", " << 
+          Intinerary.FirstOperandCycle << ", " << 
+          Intinerary.LastOperandCycle << " }";
+      }
+      
+      OS << ", // " << j << "\n";
+    }
+    
+    // End processor itinerary table
+    OS << "  { 1, ~0U, ~0U, ~0U, ~0U } // end marker\n";
+    OS << "};\n";
+  }
+}
+
+//
+// EmitProcessorLookup - generate cpu name to itinerary lookup table.
+//
+void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {
+  // Gather and sort processor information
+  std::vector<Record*> ProcessorList =
+                          Records.getAllDerivedDefinitions("Processor");
+  std::sort(ProcessorList.begin(), ProcessorList.end(), LessRecordFieldName());
+
+  // Begin processor table
+  OS << "\n";
+  OS << "// Sorted (by key) array of itineraries for CPU subtype.\n"
+     << "static const llvm::SubtargetInfoKV ProcItinKV[] = {\n";
+     
+  // For each processor
+  for (unsigned i = 0, N = ProcessorList.size(); i < N;) {
+    // Next processor
+    Record *Processor = ProcessorList[i];
+
+    const std::string &Name = Processor->getValueAsString("Name");
+    const std::string &ProcItin =
+      Processor->getValueAsDef("ProcItin")->getName();
+    
+    // Emit as { "cpu", procinit },
+    OS << "  { "
+       << "\"" << Name << "\", "
+       << "(void *)&" << ProcItin;
+        
+    OS << " }";
+    
+    // Depending on ''if more in the list'' emit comma
+    if (++i < N) OS << ",";
+    
+    OS << "\n";
+  }
+  
+  // End processor table
+  OS << "};\n";
+
+  // Emit size of table
+  OS<<"\nenum {\n";
+  OS<<"  ProcItinKVSize = sizeof(ProcItinKV)/"
+                            "sizeof(llvm::SubtargetInfoKV)\n";
+  OS<<"};\n";
+}
+
+//
+// EmitData - Emits all stages and itineries, folding common patterns.
+//
+void SubtargetEmitter::EmitData(raw_ostream &OS) {
+  std::map<std::string, unsigned> ItinClassesMap;
+  // Gather and sort all itinerary classes
+  std::vector<Record*> ItinClassList =
+    Records.getAllDerivedDefinitions("InstrItinClass");
+  std::sort(ItinClassList.begin(), ItinClassList.end(), LessRecord());
+  
+  // Enumerate all the itinerary classes
+  unsigned NItinClasses = CollectAllItinClasses(OS, ItinClassesMap,
+                                                ItinClassList);
+  // Make sure the rest is worth the effort
+  HasItineraries = NItinClasses != 1;   // Ignore NoItinerary.
+  
+  if (HasItineraries) {
+    std::vector<std::vector<InstrItinerary> > ProcList;
+    // Emit the stage data
+    EmitStageAndOperandCycleData(OS, NItinClasses, ItinClassesMap,
+                                 ItinClassList, ProcList);
+    // Emit the processor itinerary data
+    EmitProcessorData(OS, ProcList);
+    // Emit the processor lookup data
+    EmitProcessorLookup(OS);
+  }
+}
+
+//
+// ParseFeaturesFunction - Produces a subtarget specific function for parsing
+// the subtarget features string.
+//
+void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) {
+  std::vector<Record*> Features =
+                       Records.getAllDerivedDefinitions("SubtargetFeature");
+  std::sort(Features.begin(), Features.end(), LessRecord());
+
+  OS << "// ParseSubtargetFeatures - Parses features string setting specified\n" 
+     << "// subtarget options.\n" 
+     << "std::string llvm::";
+  OS << Target;
+  OS << "Subtarget::ParseSubtargetFeatures(const std::string &FS,\n"
+     << "                                  const std::string &CPU) {\n"
+     << "  DEBUG(dbgs() << \"\\nFeatures:\" << FS);\n"
+     << "  DEBUG(dbgs() << \"\\nCPU:\" << CPU);\n"
+     << "  SubtargetFeatures Features(FS);\n"
+     << "  Features.setCPUIfNone(CPU);\n"
+     << "  uint32_t Bits =  Features.getBits(SubTypeKV, SubTypeKVSize,\n"
+     << "                                    FeatureKV, FeatureKVSize);\n";
+
+  for (unsigned i = 0; i < Features.size(); i++) {
+    // Next record
+    Record *R = Features[i];
+    const std::string &Instance = R->getName();
+    const std::string &Value = R->getValueAsString("Value");
+    const std::string &Attribute = R->getValueAsString("Attribute");
+
+    if (Value=="true" || Value=="false")
+      OS << "  if ((Bits & " << Instance << ") != 0) "
+         << Attribute << " = " << Value << ";\n";
+    else
+      OS << "  if ((Bits & " << Instance << ") != 0 && " << Attribute << 
+            " < " << Value << ") " << Attribute << " = " << Value << ";\n";
+  }
+
+  if (HasItineraries) {
+    OS << "\n"
+       << "  InstrItinerary *Itinerary = (InstrItinerary *)"
+       <<              "Features.getInfo(ProcItinKV, ProcItinKVSize);\n"
+       << "  InstrItins = InstrItineraryData(Stages, OperandCycles, "
+       << "ForwardingPathes, Itinerary);\n";
+  }
+
+  OS << "  return Features.getCPU();\n"
+     << "}\n";
+}
+
+//
+// SubtargetEmitter::run - Main subtarget enumeration emitter.
+//
+void SubtargetEmitter::run(raw_ostream &OS) {
+  Target = CodeGenTarget(Records).getName();
+
+  EmitSourceFileHeader("Subtarget Enumeration Source Fragment", OS);
+
+  OS << "#include \"llvm/Support/Debug.h\"\n";
+  OS << "#include \"llvm/Support/raw_ostream.h\"\n";
+  OS << "#include \"llvm/Target/SubtargetFeature.h\"\n";
+  OS << "#include \"llvm/Target/TargetInstrItineraries.h\"\n\n";
+
+//  Enumeration(OS, "FuncUnit", true);
+//  OS<<"\n";
+//  Enumeration(OS, "InstrItinClass", false);
+//  OS<<"\n";
+  Enumeration(OS, "SubtargetFeature", true);
+  OS<<"\n";
+  FeatureKeyValues(OS);
+  OS<<"\n";
+  CPUKeyValues(OS);
+  OS<<"\n";
+  EmitData(OS);
+  OS<<"\n";
+  ParseFeaturesFunction(OS);
+}
diff --git a/final/utils/TableGen/SubtargetEmitter.h b/final/utils/TableGen/SubtargetEmitter.h
new file mode 100644
index 00000000000..3abec3b2409
--- /dev/null
+++ b/final/utils/TableGen/SubtargetEmitter.h
@@ -0,0 +1,70 @@
+//===- SubtargetEmitter.h - Generate subtarget enumerations -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tablegen backend emits subtarget enumerations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SUBTARGET_EMITTER_H
+#define SUBTARGET_EMITTER_H
+
+#include "TableGenBackend.h"
+#include "llvm/Target/TargetInstrItineraries.h"
+#include <vector>
+#include <map>
+#include <string>
+
+
+namespace llvm {
+
+class SubtargetEmitter : public TableGenBackend {
+  
+  RecordKeeper &Records;
+  std::string Target;
+  bool HasItineraries;
+  
+  void Enumeration(raw_ostream &OS, const char *ClassName, bool isBits);
+  void FeatureKeyValues(raw_ostream &OS);
+  void CPUKeyValues(raw_ostream &OS);
+  unsigned CollectAllItinClasses(raw_ostream &OS,
+                                 std::map<std::string,unsigned> &ItinClassesMap,
+                                 std::vector<Record*> &ItinClassList);
+  void FormItineraryStageString(const std::string &Names,
+                                Record *ItinData, std::string &ItinString,
+                                unsigned &NStages);
+  void FormItineraryOperandCycleString(Record *ItinData, std::string &ItinString,
+                                       unsigned &NOperandCycles);
+  void FormItineraryBypassString(const std::string &Names,
+                                 Record *ItinData,
+                                 std::string &ItinString, unsigned NOperandCycles);
+  void EmitStageAndOperandCycleData(raw_ostream &OS, unsigned NItinClasses,
+                     std::map<std::string, unsigned> &ItinClassesMap,
+                     std::vector<Record*> &ItinClassList,
+                     std::vector<std::vector<InstrItinerary> > &ProcList);
+  void EmitProcessorData(raw_ostream &OS,
+                       std::vector<std::vector<InstrItinerary> > &ProcList);
+  void EmitProcessorLookup(raw_ostream &OS);
+  void EmitData(raw_ostream &OS);
+  void ParseFeaturesFunction(raw_ostream &OS);
+  
+public:
+  SubtargetEmitter(RecordKeeper &R) : Records(R), HasItineraries(false) {}
+
+  // run - Output the subtarget enumerations, returning true on failure.
+  void run(raw_ostream &o);
+
+};
+
+
+} // End llvm namespace
+
+#endif
+
+
+
diff --git a/final/utils/TableGen/TGLexer.cpp b/final/utils/TableGen/TGLexer.cpp
new file mode 100644
index 00000000000..82d2b6491aa
--- /dev/null
+++ b/final/utils/TableGen/TGLexer.cpp
@@ -0,0 +1,442 @@
+//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the Lexer for TableGen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TGLexer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Config/config.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cerrno>
+using namespace llvm;
+
+TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
+  CurBuffer = 0;
+  CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+  CurPtr = CurBuf->getBufferStart();
+  TokStart = 0;
+}
+
+SMLoc TGLexer::getLoc() const {
+  return SMLoc::getFromPointer(TokStart);
+}
+
+
+/// ReturnError - Set the error to the specified string at the specified
+/// location.  This is defined to always return tgtok::Error.
+tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
+  PrintError(Loc, Msg);
+  return tgtok::Error;
+}
+
+
+void TGLexer::PrintError(const char *Loc, const Twine &Msg) const {
+  SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
+}
+
+void TGLexer::PrintError(SMLoc Loc, const Twine &Msg) const {
+  SrcMgr.PrintMessage(Loc, Msg, "error");
+}
+
+
+int TGLexer::getNextChar() {
+  char CurChar = *CurPtr++;
+  switch (CurChar) {
+  default:
+    return (unsigned char)CurChar;
+  case 0: {
+    // A nul character in the stream is either the end of the current buffer or
+    // a random nul in the file.  Disambiguate that here.
+    if (CurPtr-1 != CurBuf->getBufferEnd())
+      return 0;  // Just whitespace.
+    
+    // If this is the end of an included file, pop the parent file off the
+    // include stack.
+    SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
+    if (ParentIncludeLoc != SMLoc()) {
+      CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
+      CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+      CurPtr = ParentIncludeLoc.getPointer();
+      return getNextChar();
+    }
+    
+    // Otherwise, return end of file.
+    --CurPtr;  // Another call to lex will return EOF again.  
+    return EOF;
+  }
+  case '\n':
+  case '\r':
+    // Handle the newline character by ignoring it and incrementing the line
+    // count.  However, be careful about 'dos style' files with \n\r in them.
+    // Only treat a \n\r or \r\n as a single line.
+    if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
+        *CurPtr != CurChar)
+      ++CurPtr;  // Eat the two char newline sequence.
+    return '\n';
+  }  
+}
+
+tgtok::TokKind TGLexer::LexToken() {
+  TokStart = CurPtr;
+  // This always consumes at least one character.
+  int CurChar = getNextChar();
+
+  switch (CurChar) {
+  default:
+    // Handle letters: [a-zA-Z_#]
+    if (isalpha(CurChar) || CurChar == '_' || CurChar == '#')
+      return LexIdentifier();
+      
+    // Unknown character, emit an error.
+    return ReturnError(TokStart, "Unexpected character");
+  case EOF: return tgtok::Eof;
+  case ':': return tgtok::colon;
+  case ';': return tgtok::semi;
+  case '.': return tgtok::period;
+  case ',': return tgtok::comma;
+  case '<': return tgtok::less;
+  case '>': return tgtok::greater;
+  case ']': return tgtok::r_square;
+  case '{': return tgtok::l_brace;
+  case '}': return tgtok::r_brace;
+  case '(': return tgtok::l_paren;
+  case ')': return tgtok::r_paren;
+  case '=': return tgtok::equal;
+  case '?': return tgtok::question;
+      
+  case 0:
+  case ' ':
+  case '\t':
+  case '\n':
+  case '\r':
+    // Ignore whitespace.
+    return LexToken();
+  case '/':
+    // If this is the start of a // comment, skip until the end of the line or
+    // the end of the buffer.
+    if (*CurPtr == '/')
+      SkipBCPLComment();
+    else if (*CurPtr == '*') {
+      if (SkipCComment())
+        return tgtok::Error;
+    } else // Otherwise, this is an error.
+      return ReturnError(TokStart, "Unexpected character");
+    return LexToken();
+  case '-': case '+':
+  case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+  case '7': case '8': case '9':  
+    return LexNumber();
+  case '"': return LexString();
+  case '$': return LexVarName();
+  case '[': return LexBracket();
+  case '!': return LexExclaim();
+  }
+}
+
+/// LexString - Lex "[^"]*"
+tgtok::TokKind TGLexer::LexString() {
+  const char *StrStart = CurPtr;
+  
+  CurStrVal = "";
+  
+  while (*CurPtr != '"') {
+    // If we hit the end of the buffer, report an error.
+    if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd())
+      return ReturnError(StrStart, "End of file in string literal");
+    
+    if (*CurPtr == '\n' || *CurPtr == '\r')
+      return ReturnError(StrStart, "End of line in string literal");
+    
+    if (*CurPtr != '\\') {
+      CurStrVal += *CurPtr++;
+      continue;
+    }
+
+    ++CurPtr;
+    
+    switch (*CurPtr) {
+    case '\\': case '\'': case '"':
+      // These turn into their literal character.
+      CurStrVal += *CurPtr++;
+      break;
+    case 't':
+      CurStrVal += '\t';
+      ++CurPtr;
+      break;
+    case 'n':
+      CurStrVal += '\n';
+      ++CurPtr;
+      break;
+        
+    case '\n':
+    case '\r':
+      return ReturnError(CurPtr, "escaped newlines not supported in tblgen");
+
+    // If we hit the end of the buffer, report an error.
+    case '\0':
+      if (CurPtr == CurBuf->getBufferEnd())
+        return ReturnError(StrStart, "End of file in string literal");
+      // FALL THROUGH
+    default:
+      return ReturnError(CurPtr, "invalid escape in string literal");
+    }
+  }
+  
+  ++CurPtr;
+  return tgtok::StrVal;
+}
+
+tgtok::TokKind TGLexer::LexVarName() {
+  if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
+    return ReturnError(TokStart, "Invalid variable name");
+  
+  // Otherwise, we're ok, consume the rest of the characters.
+  const char *VarNameStart = CurPtr++;
+  
+  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
+    ++CurPtr;
+
+  CurStrVal.assign(VarNameStart, CurPtr);
+  return tgtok::VarName;
+}
+
+
+tgtok::TokKind TGLexer::LexIdentifier() {
+  // The first letter is [a-zA-Z_#].
+  const char *IdentStart = TokStart;
+  
+  // Match the rest of the identifier regex: [0-9a-zA-Z_#]*
+  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_' ||
+         *CurPtr == '#')
+    ++CurPtr;
+  
+  
+  // Check to see if this identifier is a keyword.
+  unsigned Len = CurPtr-IdentStart;
+  
+  if (Len == 3 && !memcmp(IdentStart, "int", 3)) return tgtok::Int;
+  if (Len == 3 && !memcmp(IdentStart, "bit", 3)) return tgtok::Bit;
+  if (Len == 4 && !memcmp(IdentStart, "bits", 4)) return tgtok::Bits;
+  if (Len == 6 && !memcmp(IdentStart, "string", 6)) return tgtok::String;
+  if (Len == 4 && !memcmp(IdentStart, "list", 4)) return tgtok::List;
+  if (Len == 4 && !memcmp(IdentStart, "code", 4)) return tgtok::Code;
+  if (Len == 3 && !memcmp(IdentStart, "dag", 3)) return tgtok::Dag;
+  
+  if (Len == 5 && !memcmp(IdentStart, "class", 5)) return tgtok::Class;
+  if (Len == 3 && !memcmp(IdentStart, "def", 3)) return tgtok::Def;
+  if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return tgtok::Defm;
+  if (Len == 10 && !memcmp(IdentStart, "multiclass", 10))
+    return tgtok::MultiClass;
+  if (Len == 5 && !memcmp(IdentStart, "field", 5)) return tgtok::Field;
+  if (Len == 3 && !memcmp(IdentStart, "let", 3)) return tgtok::Let;
+  if (Len == 2 && !memcmp(IdentStart, "in", 2)) return tgtok::In;
+  
+  if (Len == 7 && !memcmp(IdentStart, "include", 7)) {
+    if (LexInclude()) return tgtok::Error;
+    return Lex();
+  }
+    
+  CurStrVal.assign(IdentStart, CurPtr);
+  return tgtok::Id;
+}
+
+/// LexInclude - We just read the "include" token.  Get the string token that
+/// comes next and enter the include.
+bool TGLexer::LexInclude() {
+  // The token after the include must be a string.
+  tgtok::TokKind Tok = LexToken();
+  if (Tok == tgtok::Error) return true;
+  if (Tok != tgtok::StrVal) {
+    PrintError(getLoc(), "Expected filename after include");
+    return true;
+  }
+
+  // Get the string.
+  std::string Filename = CurStrVal;
+
+  
+  CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr));
+  if (CurBuffer == -1) {
+    PrintError(getLoc(), "Could not find include file '" + Filename + "'");
+    return true;
+  }
+  
+  // Save the line number and lex buffer of the includer.
+  CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+  CurPtr = CurBuf->getBufferStart();
+  return false;
+}
+
+void TGLexer::SkipBCPLComment() {
+  ++CurPtr;  // skip the second slash.
+  while (1) {
+    switch (*CurPtr) {
+    case '\n':
+    case '\r':
+      return;  // Newline is end of comment.
+    case 0:
+      // If this is the end of the buffer, end the comment.
+      if (CurPtr == CurBuf->getBufferEnd())
+        return;
+      break;
+    }
+    // Otherwise, skip the character.
+    ++CurPtr;
+  }
+}
+
+/// SkipCComment - This skips C-style /**/ comments.  The only difference from C
+/// is that we allow nesting.
+bool TGLexer::SkipCComment() {
+  ++CurPtr;  // skip the star.
+  unsigned CommentDepth = 1;
+  
+  while (1) {
+    int CurChar = getNextChar();
+    switch (CurChar) {
+    case EOF:
+      PrintError(TokStart, "Unterminated comment!");
+      return true;
+    case '*':
+      // End of the comment?
+      if (CurPtr[0] != '/') break;
+      
+      ++CurPtr;   // End the */.
+      if (--CommentDepth == 0)
+        return false;
+      break;
+    case '/':
+      // Start of a nested comment?
+      if (CurPtr[0] != '*') break;
+      ++CurPtr;
+      ++CommentDepth;
+      break;
+    }
+  }
+}
+
+/// LexNumber - Lex:
+///    [-+]?[0-9]+
+///    0x[0-9a-fA-F]+
+///    0b[01]+
+tgtok::TokKind TGLexer::LexNumber() {
+  if (CurPtr[-1] == '0') {
+    if (CurPtr[0] == 'x') {
+      ++CurPtr;
+      const char *NumStart = CurPtr;
+      while (isxdigit(CurPtr[0]))
+        ++CurPtr;
+      
+      // Requires at least one hex digit.
+      if (CurPtr == NumStart)
+        return ReturnError(TokStart, "Invalid hexadecimal number");
+
+      errno = 0;
+      CurIntVal = strtoll(NumStart, 0, 16);
+      if (errno == EINVAL)
+        return ReturnError(TokStart, "Invalid hexadecimal number");
+      if (errno == ERANGE) {
+        errno = 0;
+        CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
+        if (errno == EINVAL)
+          return ReturnError(TokStart, "Invalid hexadecimal number");
+        if (errno == ERANGE)
+          return ReturnError(TokStart, "Hexadecimal number out of range");
+      }
+      return tgtok::IntVal;
+    } else if (CurPtr[0] == 'b') {
+      ++CurPtr;
+      const char *NumStart = CurPtr;
+      while (CurPtr[0] == '0' || CurPtr[0] == '1')
+        ++CurPtr;
+
+      // Requires at least one binary digit.
+      if (CurPtr == NumStart)
+        return ReturnError(CurPtr-2, "Invalid binary number");
+      CurIntVal = strtoll(NumStart, 0, 2);
+      return tgtok::IntVal;
+    }
+  }
+
+  // Check for a sign without a digit.
+  if (!isdigit(CurPtr[0])) {
+    if (CurPtr[-1] == '-')
+      return tgtok::minus;
+    else if (CurPtr[-1] == '+')
+      return tgtok::plus;
+  }
+  
+  while (isdigit(CurPtr[0]))
+    ++CurPtr;
+  CurIntVal = strtoll(TokStart, 0, 10);
+  return tgtok::IntVal;
+}
+
+/// LexBracket - We just read '['.  If this is a code block, return it,
+/// otherwise return the bracket.  Match: '[' and '[{ ( [^}]+ | }[^]] )* }]'
+tgtok::TokKind TGLexer::LexBracket() {
+  if (CurPtr[0] != '{')
+    return tgtok::l_square;
+  ++CurPtr;
+  const char *CodeStart = CurPtr;
+  while (1) {
+    int Char = getNextChar();
+    if (Char == EOF) break;
+    
+    if (Char != '}') continue;
+    
+    Char = getNextChar();
+    if (Char == EOF) break;
+    if (Char == ']') {
+      CurStrVal.assign(CodeStart, CurPtr-2);
+      return tgtok::CodeFragment;
+    }
+  }
+  
+  return ReturnError(CodeStart-2, "Unterminated Code Block");
+}
+
+/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
+tgtok::TokKind TGLexer::LexExclaim() {
+  if (!isalpha(*CurPtr))
+    return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
+  
+  const char *Start = CurPtr++;
+  while (isalpha(*CurPtr))
+    ++CurPtr;
+  
+  // Check to see which operator this is.
+  tgtok::TokKind Kind =
+    StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
+    .Case("eq", tgtok::XEq)
+    .Case("if", tgtok::XIf)
+    .Case("head", tgtok::XHead)
+    .Case("tail", tgtok::XTail)
+    .Case("con", tgtok::XConcat)
+    .Case("shl", tgtok::XSHL)
+    .Case("sra", tgtok::XSRA)
+    .Case("srl", tgtok::XSRL)
+    .Case("cast", tgtok::XCast)
+    .Case("empty", tgtok::XEmpty)
+    .Case("subst", tgtok::XSubst)
+    .Case("foreach", tgtok::XForEach)
+    .Case("strconcat", tgtok::XStrConcat)
+    .Default(tgtok::Error);
+
+  return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
+}
+
diff --git a/final/utils/TableGen/TGLexer.h b/final/utils/TableGen/TGLexer.h
new file mode 100644
index 00000000000..55a6c5d9b52
--- /dev/null
+++ b/final/utils/TableGen/TGLexer.h
@@ -0,0 +1,122 @@
+//===- TGLexer.h - Lexer for TableGen Files ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents the Lexer for tablegen files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TGLEXER_H
+#define TGLEXER_H
+
+#include "llvm/Support/DataTypes.h"
+#include <vector>
+#include <string>
+#include <cassert>
+
+namespace llvm {
+class MemoryBuffer;
+class SourceMgr;
+class SMLoc;
+class Twine;
+
+namespace tgtok {
+  enum TokKind {
+    // Markers
+    Eof, Error,
+    
+    // Tokens with no info.
+    minus, plus,        // - +
+    l_square, r_square, // [ ]
+    l_brace, r_brace,   // { }
+    l_paren, r_paren,   // ( )
+    less, greater,      // < >
+    colon, semi,        // ; :
+    comma, period,      // , .
+    equal, question,    // = ?
+    
+    // Keywords.
+    Bit, Bits, Class, Code, Dag, Def, Defm, Field, In, Int, Let, List,
+    MultiClass, String,
+    
+    // !keywords.
+    XConcat, XSRA, XSRL, XSHL, XStrConcat, XCast, XSubst,
+    XForEach, XHead, XTail, XEmpty, XIf, XEq,
+
+    // Integer value.
+    IntVal,
+    
+    // String valued tokens.
+    Id, StrVal, VarName, CodeFragment
+  };
+}
+
+/// TGLexer - TableGen Lexer class.
+class TGLexer {
+  SourceMgr &SrcMgr;
+  
+  const char *CurPtr;
+  const MemoryBuffer *CurBuf;
+
+  // Information about the current token.
+  const char *TokStart;
+  tgtok::TokKind CurCode;
+  std::string CurStrVal;  // This is valid for ID, STRVAL, VARNAME, CODEFRAGMENT
+  int64_t CurIntVal;      // This is valid for INTVAL.
+
+  /// CurBuffer - This is the current buffer index we're lexing from as managed
+  /// by the SourceMgr object.
+  int CurBuffer;
+  
+public:
+  TGLexer(SourceMgr &SrcMgr);
+  ~TGLexer() {}
+  
+  tgtok::TokKind Lex() {
+    return CurCode = LexToken();
+  }
+  
+  tgtok::TokKind getCode() const { return CurCode; }
+
+  const std::string &getCurStrVal() const {
+    assert((CurCode == tgtok::Id || CurCode == tgtok::StrVal || 
+            CurCode == tgtok::VarName || CurCode == tgtok::CodeFragment) &&
+           "This token doesn't have a string value");
+    return CurStrVal;
+  }
+  int64_t getCurIntVal() const {
+    assert(CurCode == tgtok::IntVal && "This token isn't an integer");
+    return CurIntVal;
+  }
+
+  SMLoc getLoc() const;
+
+  void PrintError(const char *Loc, const Twine &Msg) const;
+  void PrintError(SMLoc Loc, const Twine &Msg) const;
+  
+private:
+  /// LexToken - Read the next token and return its code.
+  tgtok::TokKind LexToken();
+  
+  tgtok::TokKind ReturnError(const char *Loc, const Twine &Msg);
+  
+  int getNextChar();
+  void SkipBCPLComment();
+  bool SkipCComment();
+  tgtok::TokKind LexIdentifier();
+  bool LexInclude();
+  tgtok::TokKind LexString();
+  tgtok::TokKind LexVarName();
+  tgtok::TokKind LexNumber();
+  tgtok::TokKind LexBracket();
+  tgtok::TokKind LexExclaim();
+};
+  
+} // end namespace llvm
+
+#endif
diff --git a/final/utils/TableGen/TGParser.cpp b/final/utils/TableGen/TGParser.cpp
new file mode 100644
index 00000000000..f6041be95e1
--- /dev/null
+++ b/final/utils/TableGen/TGParser.cpp
@@ -0,0 +1,2151 @@
+//===- TGParser.cpp - Parser for TableGen Files ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the Parser for TableGen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TGParser.h"
+#include "Record.h"
+#include "llvm/ADT/StringExtras.h"
+#include <algorithm>
+#include <sstream>
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Support Code for the Semantic Actions.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+struct SubClassReference {
+  SMLoc RefLoc;
+  Record *Rec;
+  std::vector<Init*> TemplateArgs;
+  SubClassReference() : Rec(0) {}
+
+  bool isInvalid() const { return Rec == 0; }
+};
+
+struct SubMultiClassReference {
+  SMLoc RefLoc;
+  MultiClass *MC;
+  std::vector<Init*> TemplateArgs;
+  SubMultiClassReference() : MC(0) {}
+
+  bool isInvalid() const { return MC == 0; }
+  void dump() const;
+};
+
+void SubMultiClassReference::dump() const {
+  errs() << "Multiclass:\n";
+
+  MC->dump();
+
+  errs() << "Template args:\n";
+  for (std::vector<Init *>::const_iterator i = TemplateArgs.begin(),
+         iend = TemplateArgs.end();
+       i != iend;
+       ++i) {
+    (*i)->dump();
+  }
+}
+
+} // end namespace llvm
+
+bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) {
+  if (CurRec == 0)
+    CurRec = &CurMultiClass->Rec;
+
+  if (RecordVal *ERV = CurRec->getValue(RV.getName())) {
+    // The value already exists in the class, treat this as a set.
+    if (ERV->setValue(RV.getValue()))
+      return Error(Loc, "New definition of '" + RV.getName() + "' of type '" +
+                   RV.getType()->getAsString() + "' is incompatible with " +
+                   "previous definition of type '" +
+                   ERV->getType()->getAsString() + "'");
+  } else {
+    CurRec->addValue(RV);
+  }
+  return false;
+}
+
+/// SetValue -
+/// Return true on error, false on success.
+bool TGParser::SetValue(Record *CurRec, SMLoc Loc, const std::string &ValName,
+                        const std::vector<unsigned> &BitList, Init *V) {
+  if (!V) return false;
+
+  if (CurRec == 0) CurRec = &CurMultiClass->Rec;
+
+  RecordVal *RV = CurRec->getValue(ValName);
+  if (RV == 0)
+    return Error(Loc, "Value '" + ValName + "' unknown!");
+
+  // Do not allow assignments like 'X = X'.  This will just cause infinite loops
+  // in the resolution machinery.
+  if (BitList.empty())
+    if (VarInit *VI = dynamic_cast<VarInit*>(V))
+      if (VI->getName() == ValName)
+        return false;
+
+  // If we are assigning to a subset of the bits in the value... then we must be
+  // assigning to a field of BitsRecTy, which must have a BitsInit
+  // initializer.
+  //
+  if (!BitList.empty()) {
+    BitsInit *CurVal = dynamic_cast<BitsInit*>(RV->getValue());
+    if (CurVal == 0)
+      return Error(Loc, "Value '" + ValName + "' is not a bits type");
+
+    // Convert the incoming value to a bits type of the appropriate size...
+    Init *BI = V->convertInitializerTo(new BitsRecTy(BitList.size()));
+    if (BI == 0) {
+      V->convertInitializerTo(new BitsRecTy(BitList.size()));
+      return Error(Loc, "Initializer is not compatible with bit range");
+    }
+
+    // We should have a BitsInit type now.
+    BitsInit *BInit = dynamic_cast<BitsInit*>(BI);
+    assert(BInit != 0);
+
+    BitsInit *NewVal = new BitsInit(CurVal->getNumBits());
+
+    // Loop over bits, assigning values as appropriate.
+    for (unsigned i = 0, e = BitList.size(); i != e; ++i) {
+      unsigned Bit = BitList[i];
+      if (NewVal->getBit(Bit))
+        return Error(Loc, "Cannot set bit #" + utostr(Bit) + " of value '" +
+                     ValName + "' more than once");
+      NewVal->setBit(Bit, BInit->getBit(i));
+    }
+
+    for (unsigned i = 0, e = CurVal->getNumBits(); i != e; ++i)
+      if (NewVal->getBit(i) == 0)
+        NewVal->setBit(i, CurVal->getBit(i));
+
+    V = NewVal;
+  }
+
+  if (RV->setValue(V))
+   return Error(Loc, "Value '" + ValName + "' of type '" +
+                RV->getType()->getAsString() +
+                "' is incompatible with initializer '" + V->getAsString() +"'");
+  return false;
+}
+
+/// AddSubClass - Add SubClass as a subclass to CurRec, resolving its template
+/// args as SubClass's template arguments.
+bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
+  Record *SC = SubClass.Rec;
+  // Add all of the values in the subclass into the current class.
+  const std::vector<RecordVal> &Vals = SC->getValues();
+  for (unsigned i = 0, e = Vals.size(); i != e; ++i)
+    if (AddValue(CurRec, SubClass.RefLoc, Vals[i]))
+      return true;
+
+  const std::vector<std::string> &TArgs = SC->getTemplateArgs();
+
+  // Ensure that an appropriate number of template arguments are specified.
+  if (TArgs.size() < SubClass.TemplateArgs.size())
+    return Error(SubClass.RefLoc, "More template args specified than expected");
+
+  // Loop over all of the template arguments, setting them to the specified
+  // value or leaving them as the default if necessary.
+  for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
+    if (i < SubClass.TemplateArgs.size()) {
+      // If a value is specified for this template arg, set it now.
+      if (SetValue(CurRec, SubClass.RefLoc, TArgs[i], std::vector<unsigned>(),
+                   SubClass.TemplateArgs[i]))
+        return true;
+
+      // Resolve it next.
+      CurRec->resolveReferencesTo(CurRec->getValue(TArgs[i]));
+
+      // Now remove it.
+      CurRec->removeValue(TArgs[i]);
+
+    } else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) {
+      return Error(SubClass.RefLoc,"Value not specified for template argument #"
+                   + utostr(i) + " (" + TArgs[i] + ") of subclass '" +
+                   SC->getName() + "'!");
+    }
+  }
+
+  // Since everything went well, we can now set the "superclass" list for the
+  // current record.
+  const std::vector<Record*> &SCs = SC->getSuperClasses();
+  for (unsigned i = 0, e = SCs.size(); i != e; ++i) {
+    if (CurRec->isSubClassOf(SCs[i]))
+      return Error(SubClass.RefLoc,
+                   "Already subclass of '" + SCs[i]->getName() + "'!\n");
+    CurRec->addSuperClass(SCs[i]);
+  }
+
+  if (CurRec->isSubClassOf(SC))
+    return Error(SubClass.RefLoc,
+                 "Already subclass of '" + SC->getName() + "'!\n");
+  CurRec->addSuperClass(SC);
+  return false;
+}
+
+/// AddSubMultiClass - Add SubMultiClass as a subclass to
+/// CurMC, resolving its template args as SubMultiClass's
+/// template arguments.
+bool TGParser::AddSubMultiClass(MultiClass *CurMC,
+                                SubMultiClassReference &SubMultiClass) {
+  MultiClass *SMC = SubMultiClass.MC;
+  Record *CurRec = &CurMC->Rec;
+
+  const std::vector<RecordVal> &MCVals = CurRec->getValues();
+
+  // Add all of the values in the subclass into the current class.
+  const std::vector<RecordVal> &SMCVals = SMC->Rec.getValues();
+  for (unsigned i = 0, e = SMCVals.size(); i != e; ++i)
+    if (AddValue(CurRec, SubMultiClass.RefLoc, SMCVals[i]))
+      return true;
+
+  int newDefStart = CurMC->DefPrototypes.size();
+
+  // Add all of the defs in the subclass into the current multiclass.
+  for (MultiClass::RecordVector::const_iterator i = SMC->DefPrototypes.begin(),
+         iend = SMC->DefPrototypes.end();
+       i != iend;
+       ++i) {
+    // Clone the def and add it to the current multiclass
+    Record *NewDef = new Record(**i);
+
+    // Add all of the values in the superclass into the current def.
+    for (unsigned i = 0, e = MCVals.size(); i != e; ++i)
+      if (AddValue(NewDef, SubMultiClass.RefLoc, MCVals[i]))
+        return true;
+
+    CurMC->DefPrototypes.push_back(NewDef);
+  }
+
+  const std::vector<std::string> &SMCTArgs = SMC->Rec.getTemplateArgs();
+
+  // Ensure that an appropriate number of template arguments are
+  // specified.
+  if (SMCTArgs.size() < SubMultiClass.TemplateArgs.size())
+    return Error(SubMultiClass.RefLoc,
+                 "More template args specified than expected");
+
+  // Loop over all of the template arguments, setting them to the specified
+  // value or leaving them as the default if necessary.
+  for (unsigned i = 0, e = SMCTArgs.size(); i != e; ++i) {
+    if (i < SubMultiClass.TemplateArgs.size()) {
+      // If a value is specified for this template arg, set it in the
+      // superclass now.
+      if (SetValue(CurRec, SubMultiClass.RefLoc, SMCTArgs[i],
+                   std::vector<unsigned>(),
+                   SubMultiClass.TemplateArgs[i]))
+        return true;
+
+      // Resolve it next.
+      CurRec->resolveReferencesTo(CurRec->getValue(SMCTArgs[i]));
+
+      // Now remove it.
+      CurRec->removeValue(SMCTArgs[i]);
+
+      // If a value is specified for this template arg, set it in the
+      // new defs now.
+      for (MultiClass::RecordVector::iterator j =
+             CurMC->DefPrototypes.begin() + newDefStart,
+             jend = CurMC->DefPrototypes.end();
+           j != jend;
+           ++j) {
+        Record *Def = *j;
+
+        if (SetValue(Def, SubMultiClass.RefLoc, SMCTArgs[i],
+                     std::vector<unsigned>(),
+                     SubMultiClass.TemplateArgs[i]))
+          return true;
+
+        // Resolve it next.
+        Def->resolveReferencesTo(Def->getValue(SMCTArgs[i]));
+
+        // Now remove it
+        Def->removeValue(SMCTArgs[i]);
+      }
+    } else if (!CurRec->getValue(SMCTArgs[i])->getValue()->isComplete()) {
+      return Error(SubMultiClass.RefLoc,
+                   "Value not specified for template argument #"
+                   + utostr(i) + " (" + SMCTArgs[i] + ") of subclass '" +
+                   SMC->Rec.getName() + "'!");
+    }
+  }
+
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// Parser Code
+//===----------------------------------------------------------------------===//
+
+/// isObjectStart - Return true if this is a valid first token for an Object.
+static bool isObjectStart(tgtok::TokKind K) {
+  return K == tgtok::Class || K == tgtok::Def ||
+         K == tgtok::Defm || K == tgtok::Let || K == tgtok::MultiClass;
+}
+
+static std::string GetNewAnonymousName() {
+  static unsigned AnonCounter = 0;
+  return "anonymous."+utostr(AnonCounter++);
+}
+
+/// ParseObjectName - If an object name is specified, return it.  Otherwise,
+/// return an anonymous name.
+///   ObjectName ::= ID
+///   ObjectName ::= /*empty*/
+///
+std::string TGParser::ParseObjectName() {
+  if (Lex.getCode() != tgtok::Id)
+    return GetNewAnonymousName();
+
+  std::string Ret = Lex.getCurStrVal();
+  Lex.Lex();
+  return Ret;
+}
+
+
+/// ParseClassID - Parse and resolve a reference to a class name.  This returns
+/// null on error.
+///
+///    ClassID ::= ID
+///
+Record *TGParser::ParseClassID() {
+  if (Lex.getCode() != tgtok::Id) {
+    TokError("expected name for ClassID");
+    return 0;
+  }
+
+  Record *Result = Records.getClass(Lex.getCurStrVal());
+  if (Result == 0)
+    TokError("Couldn't find class '" + Lex.getCurStrVal() + "'");
+
+  Lex.Lex();
+  return Result;
+}
+
+/// ParseMultiClassID - Parse and resolve a reference to a multiclass name.
+/// This returns null on error.
+///
+///    MultiClassID ::= ID
+///
+MultiClass *TGParser::ParseMultiClassID() {
+  if (Lex.getCode() != tgtok::Id) {
+    TokError("expected name for ClassID");
+    return 0;
+  }
+
+  MultiClass *Result = MultiClasses[Lex.getCurStrVal()];
+  if (Result == 0)
+    TokError("Couldn't find class '" + Lex.getCurStrVal() + "'");
+
+  Lex.Lex();
+  return Result;
+}
+
+Record *TGParser::ParseDefmID() {
+  if (Lex.getCode() != tgtok::Id) {
+    TokError("expected multiclass name");
+    return 0;
+  }
+
+  MultiClass *MC = MultiClasses[Lex.getCurStrVal()];
+  if (MC == 0) {
+    TokError("Couldn't find multiclass '" + Lex.getCurStrVal() + "'");
+    return 0;
+  }
+
+  Lex.Lex();
+  return &MC->Rec;
+}
+
+
+/// ParseSubClassReference - Parse a reference to a subclass or to a templated
+/// subclass.  This returns a SubClassRefTy with a null Record* on error.
+///
+///  SubClassRef ::= ClassID
+///  SubClassRef ::= ClassID '<' ValueList '>'
+///
+SubClassReference TGParser::
+ParseSubClassReference(Record *CurRec, bool isDefm) {
+  SubClassReference Result;
+  Result.RefLoc = Lex.getLoc();
+
+  if (isDefm)
+    Result.Rec = ParseDefmID();
+  else
+    Result.Rec = ParseClassID();
+  if (Result.Rec == 0) return Result;
+
+  // If there is no template arg list, we're done.
+  if (Lex.getCode() != tgtok::less)
+    return Result;
+  Lex.Lex();  // Eat the '<'
+
+  if (Lex.getCode() == tgtok::greater) {
+    TokError("subclass reference requires a non-empty list of template values");
+    Result.Rec = 0;
+    return Result;
+  }
+
+  Result.TemplateArgs = ParseValueList(CurRec, Result.Rec);
+  if (Result.TemplateArgs.empty()) {
+    Result.Rec = 0;   // Error parsing value list.
+    return Result;
+  }
+
+  if (Lex.getCode() != tgtok::greater) {
+    TokError("expected '>' in template value list");
+    Result.Rec = 0;
+    return Result;
+  }
+  Lex.Lex();
+
+  return Result;
+}
+
+/// ParseSubMultiClassReference - Parse a reference to a subclass or to a
+/// templated submulticlass.  This returns a SubMultiClassRefTy with a null
+/// Record* on error.
+///
+///  SubMultiClassRef ::= MultiClassID
+///  SubMultiClassRef ::= MultiClassID '<' ValueList '>'
+///
+SubMultiClassReference TGParser::
+ParseSubMultiClassReference(MultiClass *CurMC) {
+  SubMultiClassReference Result;
+  Result.RefLoc = Lex.getLoc();
+
+  Result.MC = ParseMultiClassID();
+  if (Result.MC == 0) return Result;
+
+  // If there is no template arg list, we're done.
+  if (Lex.getCode() != tgtok::less)
+    return Result;
+  Lex.Lex();  // Eat the '<'
+
+  if (Lex.getCode() == tgtok::greater) {
+    TokError("subclass reference requires a non-empty list of template values");
+    Result.MC = 0;
+    return Result;
+  }
+
+  Result.TemplateArgs = ParseValueList(&CurMC->Rec, &Result.MC->Rec);
+  if (Result.TemplateArgs.empty()) {
+    Result.MC = 0;   // Error parsing value list.
+    return Result;
+  }
+
+  if (Lex.getCode() != tgtok::greater) {
+    TokError("expected '>' in template value list");
+    Result.MC = 0;
+    return Result;
+  }
+  Lex.Lex();
+
+  return Result;
+}
+
+/// ParseRangePiece - Parse a bit/value range.
+///   RangePiece ::= INTVAL
+///   RangePiece ::= INTVAL '-' INTVAL
+///   RangePiece ::= INTVAL INTVAL
+bool TGParser::ParseRangePiece(std::vector<unsigned> &Ranges) {
+  if (Lex.getCode() != tgtok::IntVal) {
+    TokError("expected integer or bitrange");
+    return true;
+  }
+  int64_t Start = Lex.getCurIntVal();
+  int64_t End;
+
+  if (Start < 0)
+    return TokError("invalid range, cannot be negative");
+
+  switch (Lex.Lex()) {  // eat first character.
+  default:
+    Ranges.push_back(Start);
+    return false;
+  case tgtok::minus:
+    if (Lex.Lex() != tgtok::IntVal) {
+      TokError("expected integer value as end of range");
+      return true;
+    }
+    End = Lex.getCurIntVal();
+    break;
+  case tgtok::IntVal:
+    End = -Lex.getCurIntVal();
+    break;
+  }
+  if (End < 0)
+    return TokError("invalid range, cannot be negative");
+  Lex.Lex();
+
+  // Add to the range.
+  if (Start < End) {
+    for (; Start <= End; ++Start)
+      Ranges.push_back(Start);
+  } else {
+    for (; Start >= End; --Start)
+      Ranges.push_back(Start);
+  }
+  return false;
+}
+
+/// ParseRangeList - Parse a list of scalars and ranges into scalar values.
+///
+///   RangeList ::= RangePiece (',' RangePiece)*
+///
+std::vector<unsigned> TGParser::ParseRangeList() {
+  std::vector<unsigned> Result;
+
+  // Parse the first piece.
+  if (ParseRangePiece(Result))
+    return std::vector<unsigned>();
+  while (Lex.getCode() == tgtok::comma) {
+    Lex.Lex();  // Eat the comma.
+
+    // Parse the next range piece.
+    if (ParseRangePiece(Result))
+      return std::vector<unsigned>();
+  }
+  return Result;
+}
+
+/// ParseOptionalRangeList - Parse either a range list in <>'s or nothing.
+///   OptionalRangeList ::= '<' RangeList '>'
+///   OptionalRangeList ::= /*empty*/
+bool TGParser::ParseOptionalRangeList(std::vector<unsigned> &Ranges) {
+  if (Lex.getCode() != tgtok::less)
+    return false;
+
+  SMLoc StartLoc = Lex.getLoc();
+  Lex.Lex(); // eat the '<'
+
+  // Parse the range list.
+  Ranges = ParseRangeList();
+  if (Ranges.empty()) return true;
+
+  if (Lex.getCode() != tgtok::greater) {
+    TokError("expected '>' at end of range list");
+    return Error(StartLoc, "to match this '<'");
+  }
+  Lex.Lex();   // eat the '>'.
+  return false;
+}
+
+/// ParseOptionalBitList - Parse either a bit list in {}'s or nothing.
+///   OptionalBitList ::= '{' RangeList '}'
+///   OptionalBitList ::= /*empty*/
+bool TGParser::ParseOptionalBitList(std::vector<unsigned> &Ranges) {
+  if (Lex.getCode() != tgtok::l_brace)
+    return false;
+
+  SMLoc StartLoc = Lex.getLoc();
+  Lex.Lex(); // eat the '{'
+
+  // Parse the range list.
+  Ranges = ParseRangeList();
+  if (Ranges.empty()) return true;
+
+  if (Lex.getCode() != tgtok::r_brace) {
+    TokError("expected '}' at end of bit list");
+    return Error(StartLoc, "to match this '{'");
+  }
+  Lex.Lex();   // eat the '}'.
+  return false;
+}
+
+
+/// ParseType - Parse and return a tblgen type.  This returns null on error.
+///
+///   Type ::= STRING                       // string type
+///   Type ::= BIT                          // bit type
+///   Type ::= BITS '<' INTVAL '>'          // bits<x> type
+///   Type ::= INT                          // int type
+///   Type ::= LIST '<' Type '>'            // list<x> type
+///   Type ::= CODE                         // code type
+///   Type ::= DAG                          // dag type
+///   Type ::= ClassID                      // Record Type
+///
+RecTy *TGParser::ParseType() {
+  switch (Lex.getCode()) {
+  default: TokError("Unknown token when expecting a type"); return 0;
+  case tgtok::String: Lex.Lex(); return new StringRecTy();
+  case tgtok::Bit:    Lex.Lex(); return new BitRecTy();
+  case tgtok::Int:    Lex.Lex(); return new IntRecTy();
+  case tgtok::Code:   Lex.Lex(); return new CodeRecTy();
+  case tgtok::Dag:    Lex.Lex(); return new DagRecTy();
+  case tgtok::Id:
+    if (Record *R = ParseClassID()) return new RecordRecTy(R);
+    return 0;
+  case tgtok::Bits: {
+    if (Lex.Lex() != tgtok::less) { // Eat 'bits'
+      TokError("expected '<' after bits type");
+      return 0;
+    }
+    if (Lex.Lex() != tgtok::IntVal) {  // Eat '<'
+      TokError("expected integer in bits<n> type");
+      return 0;
+    }
+    uint64_t Val = Lex.getCurIntVal();
+    if (Lex.Lex() != tgtok::greater) {  // Eat count.
+      TokError("expected '>' at end of bits<n> type");
+      return 0;
+    }
+    Lex.Lex();  // Eat '>'
+    return new BitsRecTy(Val);
+  }
+  case tgtok::List: {
+    if (Lex.Lex() != tgtok::less) { // Eat 'bits'
+      TokError("expected '<' after list type");
+      return 0;
+    }
+    Lex.Lex();  // Eat '<'
+    RecTy *SubType = ParseType();
+    if (SubType == 0) return 0;
+
+    if (Lex.getCode() != tgtok::greater) {
+      TokError("expected '>' at end of list<ty> type");
+      return 0;
+    }
+    Lex.Lex();  // Eat '>'
+    return new ListRecTy(SubType);
+  }
+  }
+}
+
+/// ParseIDValue - Parse an ID as a value and decode what it means.
+///
+///  IDValue ::= ID [def local value]
+///  IDValue ::= ID [def template arg]
+///  IDValue ::= ID [multiclass local value]
+///  IDValue ::= ID [multiclass template argument]
+///  IDValue ::= ID [def name]
+///
+Init *TGParser::ParseIDValue(Record *CurRec) {
+  assert(Lex.getCode() == tgtok::Id && "Expected ID in ParseIDValue");
+  std::string Name = Lex.getCurStrVal();
+  SMLoc Loc = Lex.getLoc();
+  Lex.Lex();
+  return ParseIDValue(CurRec, Name, Loc);
+}
+
+/// ParseIDValue - This is just like ParseIDValue above, but it assumes the ID
+/// has already been read.
+Init *TGParser::ParseIDValue(Record *CurRec,
+                             const std::string &Name, SMLoc NameLoc) {
+  if (CurRec) {
+    if (const RecordVal *RV = CurRec->getValue(Name))
+      return new VarInit(Name, RV->getType());
+
+    std::string TemplateArgName = CurRec->getName()+":"+Name;
+    if (CurRec->isTemplateArg(TemplateArgName)) {
+      const RecordVal *RV = CurRec->getValue(TemplateArgName);
+      assert(RV && "Template arg doesn't exist??");
+      return new VarInit(TemplateArgName, RV->getType());
+    }
+  }
+
+  if (CurMultiClass) {
+    std::string MCName = CurMultiClass->Rec.getName()+"::"+Name;
+    if (CurMultiClass->Rec.isTemplateArg(MCName)) {
+      const RecordVal *RV = CurMultiClass->Rec.getValue(MCName);
+      assert(RV && "Template arg doesn't exist??");
+      return new VarInit(MCName, RV->getType());
+    }
+  }
+
+  if (Record *D = Records.getDef(Name))
+    return new DefInit(D);
+
+  Error(NameLoc, "Variable not defined: '" + Name + "'");
+  return 0;
+}
+
+/// ParseOperation - Parse an operator.  This returns null on error.
+///
+/// Operation ::= XOperator ['<' Type '>'] '(' Args ')'
+///
+Init *TGParser::ParseOperation(Record *CurRec) {
+  switch (Lex.getCode()) {
+  default:
+    TokError("unknown operation");
+    return 0;
+    break;
+  case tgtok::XHead:
+  case tgtok::XTail:
+  case tgtok::XEmpty:
+  case tgtok::XCast: {  // Value ::= !unop '(' Value ')'
+    UnOpInit::UnaryOp Code;
+    RecTy *Type = 0;
+
+    switch (Lex.getCode()) {
+    default: assert(0 && "Unhandled code!");
+    case tgtok::XCast:
+      Lex.Lex();  // eat the operation
+      Code = UnOpInit::CAST;
+
+      Type = ParseOperatorType();
+
+      if (Type == 0) {
+        TokError("did not get type for unary operator");
+        return 0;
+      }
+
+      break;
+    case tgtok::XHead:
+      Lex.Lex();  // eat the operation
+      Code = UnOpInit::HEAD;
+      break;
+    case tgtok::XTail:
+      Lex.Lex();  // eat the operation
+      Code = UnOpInit::TAIL;
+      break;
+    case tgtok::XEmpty:
+      Lex.Lex();  // eat the operation
+      Code = UnOpInit::EMPTY;
+      Type = new IntRecTy;
+      break;
+    }
+    if (Lex.getCode() != tgtok::l_paren) {
+      TokError("expected '(' after unary operator");
+      return 0;
+    }
+    Lex.Lex();  // eat the '('
+
+    Init *LHS = ParseValue(CurRec);
+    if (LHS == 0) return 0;
+
+    if (Code == UnOpInit::HEAD
+        || Code == UnOpInit::TAIL
+        || Code == UnOpInit::EMPTY) {
+      ListInit *LHSl = dynamic_cast<ListInit*>(LHS);
+      StringInit *LHSs = dynamic_cast<StringInit*>(LHS);
+      TypedInit *LHSt = dynamic_cast<TypedInit*>(LHS);
+      if (LHSl == 0 && LHSs == 0 && LHSt == 0) {
+        TokError("expected list or string type argument in unary operator");
+        return 0;
+      }
+      if (LHSt) {
+        ListRecTy *LType = dynamic_cast<ListRecTy*>(LHSt->getType());
+        StringRecTy *SType = dynamic_cast<StringRecTy*>(LHSt->getType());
+        if (LType == 0 && SType == 0) {
+          TokError("expected list or string type argumnet in unary operator");
+          return 0;
+        }
+      }
+
+      if (Code == UnOpInit::HEAD
+          || Code == UnOpInit::TAIL) {
+        if (LHSl == 0 && LHSt == 0) {
+          TokError("expected list type argumnet in unary operator");
+          return 0;
+        }
+
+        if (LHSl && LHSl->getSize() == 0) {
+          TokError("empty list argument in unary operator");
+          return 0;
+        }
+        if (LHSl) {
+          Init *Item = LHSl->getElement(0);
+          TypedInit *Itemt = dynamic_cast<TypedInit*>(Item);
+          if (Itemt == 0) {
+            TokError("untyped list element in unary operator");
+            return 0;
+          }
+          if (Code == UnOpInit::HEAD) {
+            Type = Itemt->getType();
+          } else {
+            Type = new ListRecTy(Itemt->getType());
+          }
+        } else {
+          assert(LHSt && "expected list type argument in unary operator");
+          ListRecTy *LType = dynamic_cast<ListRecTy*>(LHSt->getType());
+          if (LType == 0) {
+            TokError("expected list type argumnet in unary operator");
+            return 0;
+          }
+          if (Code == UnOpInit::HEAD) {
+            Type = LType->getElementType();
+          } else {
+            Type = LType;
+          }
+        }
+      }
+    }
+
+    if (Lex.getCode() != tgtok::r_paren) {
+      TokError("expected ')' in unary operator");
+      return 0;
+    }
+    Lex.Lex();  // eat the ')'
+    return (new UnOpInit(Code, LHS, Type))->Fold(CurRec, CurMultiClass);
+  }
+
+  case tgtok::XConcat:
+  case tgtok::XSRA:
+  case tgtok::XSRL:
+  case tgtok::XSHL:
+  case tgtok::XEq:
+  case tgtok::XStrConcat: {  // Value ::= !binop '(' Value ',' Value ')'
+    tgtok::TokKind OpTok = Lex.getCode();
+    SMLoc OpLoc = Lex.getLoc();
+    Lex.Lex();  // eat the operation
+
+    BinOpInit::BinaryOp Code;
+    RecTy *Type = 0;
+
+    switch (OpTok) {
+    default: assert(0 && "Unhandled code!");
+    case tgtok::XConcat: Code = BinOpInit::CONCAT; Type = new DagRecTy(); break;
+    case tgtok::XSRA:    Code = BinOpInit::SRA;    Type = new IntRecTy(); break;
+    case tgtok::XSRL:    Code = BinOpInit::SRL;    Type = new IntRecTy(); break;
+    case tgtok::XSHL:    Code = BinOpInit::SHL;    Type = new IntRecTy(); break;
+    case tgtok::XEq:     Code = BinOpInit::EQ;     Type = new BitRecTy(); break;
+    case tgtok::XStrConcat:
+      Code = BinOpInit::STRCONCAT;
+      Type = new StringRecTy();
+      break;
+    }
+
+    if (Lex.getCode() != tgtok::l_paren) {
+      TokError("expected '(' after binary operator");
+      return 0;
+    }
+    Lex.Lex();  // eat the '('
+
+    SmallVector<Init*, 2> InitList;
+
+    InitList.push_back(ParseValue(CurRec));
+    if (InitList.back() == 0) return 0;
+
+    while (Lex.getCode() == tgtok::comma) {
+      Lex.Lex();  // eat the ','
+
+      InitList.push_back(ParseValue(CurRec));
+      if (InitList.back() == 0) return 0;
+    }
+
+    if (Lex.getCode() != tgtok::r_paren) {
+      TokError("expected ')' in operator");
+      return 0;
+    }
+    Lex.Lex();  // eat the ')'
+
+    // We allow multiple operands to associative operators like !strconcat as
+    // shorthand for nesting them.
+    if (Code == BinOpInit::STRCONCAT) {
+      while (InitList.size() > 2) {
+        Init *RHS = InitList.pop_back_val();
+        RHS = (new BinOpInit(Code, InitList.back(), RHS, Type))
+                      ->Fold(CurRec, CurMultiClass);
+        InitList.back() = RHS;
+      }
+    }
+
+    if (InitList.size() == 2)
+      return (new BinOpInit(Code, InitList[0], InitList[1], Type))
+        ->Fold(CurRec, CurMultiClass);
+
+    Error(OpLoc, "expected two operands to operator");
+    return 0;
+  }
+
+  case tgtok::XIf:
+  case tgtok::XForEach:
+  case tgtok::XSubst: {  // Value ::= !ternop '(' Value ',' Value ',' Value ')'
+    TernOpInit::TernaryOp Code;
+    RecTy *Type = 0;
+
+    tgtok::TokKind LexCode = Lex.getCode();
+    Lex.Lex();  // eat the operation
+    switch (LexCode) {
+    default: assert(0 && "Unhandled code!");
+    case tgtok::XIf:
+      Code = TernOpInit::IF;
+      break;
+    case tgtok::XForEach:
+      Code = TernOpInit::FOREACH;
+      break;
+    case tgtok::XSubst:
+      Code = TernOpInit::SUBST;
+      break;
+    }
+    if (Lex.getCode() != tgtok::l_paren) {
+      TokError("expected '(' after ternary operator");
+      return 0;
+    }
+    Lex.Lex();  // eat the '('
+
+    Init *LHS = ParseValue(CurRec);
+    if (LHS == 0) return 0;
+
+    if (Lex.getCode() != tgtok::comma) {
+      TokError("expected ',' in ternary operator");
+      return 0;
+    }
+    Lex.Lex();  // eat the ','
+
+    Init *MHS = ParseValue(CurRec);
+    if (MHS == 0) return 0;
+
+    if (Lex.getCode() != tgtok::comma) {
+      TokError("expected ',' in ternary operator");
+      return 0;
+    }
+    Lex.Lex();  // eat the ','
+
+    Init *RHS = ParseValue(CurRec);
+    if (RHS == 0) return 0;
+
+    if (Lex.getCode() != tgtok::r_paren) {
+      TokError("expected ')' in binary operator");
+      return 0;
+    }
+    Lex.Lex();  // eat the ')'
+
+    switch (LexCode) {
+    default: assert(0 && "Unhandled code!");
+    case tgtok::XIf: {
+      // FIXME: The `!if' operator doesn't handle non-TypedInit well at
+      // all. This can be made much more robust.
+      TypedInit *MHSt = dynamic_cast<TypedInit*>(MHS);
+      TypedInit *RHSt = dynamic_cast<TypedInit*>(RHS);
+
+      RecTy *MHSTy = 0;
+      RecTy *RHSTy = 0;
+
+      if (MHSt == 0 && RHSt == 0) {
+        BitsInit *MHSbits = dynamic_cast<BitsInit*>(MHS);
+        BitsInit *RHSbits = dynamic_cast<BitsInit*>(RHS);
+
+        if (MHSbits && RHSbits &&
+            MHSbits->getNumBits() == RHSbits->getNumBits()) {
+          Type = new BitRecTy();
+          break;
+        } else {
+          BitInit *MHSbit = dynamic_cast<BitInit*>(MHS);
+          BitInit *RHSbit = dynamic_cast<BitInit*>(RHS);
+
+          if (MHSbit && RHSbit) {
+            Type = new BitRecTy();
+            break;
+          }
+        }
+      } else if (MHSt != 0 && RHSt != 0) {
+        MHSTy = MHSt->getType();
+        RHSTy = RHSt->getType();
+      }
+
+      if (!MHSTy || !RHSTy) {
+        TokError("could not get type for !if");
+        return 0;
+      }
+
+      if (MHSTy->typeIsConvertibleTo(RHSTy)) {
+        Type = RHSTy;
+      } else if (RHSTy->typeIsConvertibleTo(MHSTy)) {
+        Type = MHSTy;
+      } else {
+        TokError("inconsistent types for !if");
+        return 0;
+      }
+      break;
+    }
+    case tgtok::XForEach: {
+      TypedInit *MHSt = dynamic_cast<TypedInit *>(MHS);
+      if (MHSt == 0) {
+        TokError("could not get type for !foreach");
+        return 0;
+      }
+      Type = MHSt->getType();
+      break;
+    }
+    case tgtok::XSubst: {
+      TypedInit *RHSt = dynamic_cast<TypedInit *>(RHS);
+      if (RHSt == 0) {
+        TokError("could not get type for !subst");
+        return 0;
+      }
+      Type = RHSt->getType();
+      break;
+    }
+    }
+    return (new TernOpInit(Code, LHS, MHS, RHS, Type))->Fold(CurRec,
+                                                             CurMultiClass);
+  }
+  }
+  TokError("could not parse operation");
+  return 0;
+}
+
+/// ParseOperatorType - Parse a type for an operator.  This returns
+/// null on error.
+///
+/// OperatorType ::= '<' Type '>'
+///
+RecTy *TGParser::ParseOperatorType() {
+  RecTy *Type = 0;
+
+  if (Lex.getCode() != tgtok::less) {
+    TokError("expected type name for operator");
+    return 0;
+  }
+  Lex.Lex();  // eat the <
+
+  Type = ParseType();
+
+  if (Type == 0) {
+    TokError("expected type name for operator");
+    return 0;
+  }
+
+  if (Lex.getCode() != tgtok::greater) {
+    TokError("expected type name for operator");
+    return 0;
+  }
+  Lex.Lex();  // eat the >
+
+  return Type;
+}
+
+
+/// ParseSimpleValue - Parse a tblgen value.  This returns null on error.
+///
+///   SimpleValue ::= IDValue
+///   SimpleValue ::= INTVAL
+///   SimpleValue ::= STRVAL+
+///   SimpleValue ::= CODEFRAGMENT
+///   SimpleValue ::= '?'
+///   SimpleValue ::= '{' ValueList '}'
+///   SimpleValue ::= ID '<' ValueListNE '>'
+///   SimpleValue ::= '[' ValueList ']'
+///   SimpleValue ::= '(' IDValue DagArgList ')'
+///   SimpleValue ::= CONCATTOK '(' Value ',' Value ')'
+///   SimpleValue ::= SHLTOK '(' Value ',' Value ')'
+///   SimpleValue ::= SRATOK '(' Value ',' Value ')'
+///   SimpleValue ::= SRLTOK '(' Value ',' Value ')'
+///   SimpleValue ::= STRCONCATTOK '(' Value ',' Value ')'
+///
+Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType) {
+  Init *R = 0;
+  switch (Lex.getCode()) {
+  default: TokError("Unknown token when parsing a value"); break;
+  case tgtok::IntVal: R = new IntInit(Lex.getCurIntVal()); Lex.Lex(); break;
+  case tgtok::StrVal: {
+    std::string Val = Lex.getCurStrVal();
+    Lex.Lex();
+
+    // Handle multiple consecutive concatenated strings.
+    while (Lex.getCode() == tgtok::StrVal) {
+      Val += Lex.getCurStrVal();
+      Lex.Lex();
+    }
+
+    R = new StringInit(Val);
+    break;
+  }
+  case tgtok::CodeFragment:
+    R = new CodeInit(Lex.getCurStrVal());
+    Lex.Lex();
+    break;
+  case tgtok::question:
+    R = new UnsetInit();
+    Lex.Lex();
+    break;
+  case tgtok::Id: {
+    SMLoc NameLoc = Lex.getLoc();
+    std::string Name = Lex.getCurStrVal();
+    if (Lex.Lex() != tgtok::less)  // consume the Id.
+      return ParseIDValue(CurRec, Name, NameLoc);    // Value ::= IDValue
+
+    // Value ::= ID '<' ValueListNE '>'
+    if (Lex.Lex() == tgtok::greater) {
+      TokError("expected non-empty value list");
+      return 0;
+    }
+
+    // This is a CLASS<initvalslist> expression.  This is supposed to synthesize
+    // a new anonymous definition, deriving from CLASS<initvalslist> with no
+    // body.
+    Record *Class = Records.getClass(Name);
+    if (!Class) {
+      Error(NameLoc, "Expected a class name, got '" + Name + "'");
+      return 0;
+    }
+
+    std::vector<Init*> ValueList = ParseValueList(CurRec, Class);
+    if (ValueList.empty()) return 0;
+
+    if (Lex.getCode() != tgtok::greater) {
+      TokError("expected '>' at end of value list");
+      return 0;
+    }
+    Lex.Lex();  // eat the '>'
+
+    // Create the new record, set it as CurRec temporarily.
+    static unsigned AnonCounter = 0;
+    Record *NewRec = new Record("anonymous.val."+utostr(AnonCounter++),
+                                NameLoc,
+                                Records);
+    SubClassReference SCRef;
+    SCRef.RefLoc = NameLoc;
+    SCRef.Rec = Class;
+    SCRef.TemplateArgs = ValueList;
+    // Add info about the subclass to NewRec.
+    if (AddSubClass(NewRec, SCRef))
+      return 0;
+    NewRec->resolveReferences();
+    Records.addDef(NewRec);
+
+    // The result of the expression is a reference to the new record.
+    return new DefInit(NewRec);
+  }
+  case tgtok::l_brace: {           // Value ::= '{' ValueList '}'
+    SMLoc BraceLoc = Lex.getLoc();
+    Lex.Lex(); // eat the '{'
+    std::vector<Init*> Vals;
+
+    if (Lex.getCode() != tgtok::r_brace) {
+      Vals = ParseValueList(CurRec);
+      if (Vals.empty()) return 0;
+    }
+    if (Lex.getCode() != tgtok::r_brace) {
+      TokError("expected '}' at end of bit list value");
+      return 0;
+    }
+    Lex.Lex();  // eat the '}'
+
+    BitsInit *Result = new BitsInit(Vals.size());
+    for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
+      Init *Bit = Vals[i]->convertInitializerTo(new BitRecTy());
+      if (Bit == 0) {
+        Error(BraceLoc, "Element #" + utostr(i) + " (" + Vals[i]->getAsString()+
+              ") is not convertable to a bit");
+        return 0;
+      }
+      Result->setBit(Vals.size()-i-1, Bit);
+    }
+    return Result;
+  }
+  case tgtok::l_square: {          // Value ::= '[' ValueList ']'
+    Lex.Lex(); // eat the '['
+    std::vector<Init*> Vals;
+
+    RecTy *DeducedEltTy = 0;
+    ListRecTy *GivenListTy = 0;
+
+    if (ItemType != 0) {
+      ListRecTy *ListType = dynamic_cast<ListRecTy*>(ItemType);
+      if (ListType == 0) {
+        std::stringstream s;
+        s << "Type mismatch for list, expected list type, got "
+          << ItemType->getAsString();
+        TokError(s.str());
+      }
+      GivenListTy = ListType;
+    }
+
+    if (Lex.getCode() != tgtok::r_square) {
+      Vals = ParseValueList(CurRec, 0,
+                            GivenListTy ? GivenListTy->getElementType() : 0);
+      if (Vals.empty()) return 0;
+    }
+    if (Lex.getCode() != tgtok::r_square) {
+      TokError("expected ']' at end of list value");
+      return 0;
+    }
+    Lex.Lex();  // eat the ']'
+
+    RecTy *GivenEltTy = 0;
+    if (Lex.getCode() == tgtok::less) {
+      // Optional list element type
+      Lex.Lex();  // eat the '<'
+
+      GivenEltTy = ParseType();
+      if (GivenEltTy == 0) {
+        // Couldn't parse element type
+        return 0;
+      }
+
+      if (Lex.getCode() != tgtok::greater) {
+        TokError("expected '>' at end of list element type");
+        return 0;
+      }
+      Lex.Lex();  // eat the '>'
+    }
+
+    // Check elements
+    RecTy *EltTy = 0;
+    for (std::vector<Init *>::iterator i = Vals.begin(), ie = Vals.end();
+         i != ie;
+         ++i) {
+      TypedInit *TArg = dynamic_cast<TypedInit*>(*i);
+      if (TArg == 0) {
+        TokError("Untyped list element");
+        return 0;
+      }
+      if (EltTy != 0) {
+        EltTy = resolveTypes(EltTy, TArg->getType());
+        if (EltTy == 0) {
+          TokError("Incompatible types in list elements");
+          return 0;
+        }
+      } else {
+        EltTy = TArg->getType();
+      }
+    }
+
+    if (GivenEltTy != 0) {
+      if (EltTy != 0) {
+        // Verify consistency
+        if (!EltTy->typeIsConvertibleTo(GivenEltTy)) {
+          TokError("Incompatible types in list elements");
+          return 0;
+        }
+      }
+      EltTy = GivenEltTy;
+    }
+
+    if (EltTy == 0) {
+      if (ItemType == 0) {
+        TokError("No type for list");
+        return 0;
+      }
+      DeducedEltTy = GivenListTy->getElementType();
+    } else {
+      // Make sure the deduced type is compatible with the given type
+      if (GivenListTy) {
+        if (!EltTy->typeIsConvertibleTo(GivenListTy->getElementType())) {
+          TokError("Element type mismatch for list");
+          return 0;
+        }
+      }
+      DeducedEltTy = EltTy;
+    }
+
+    return new ListInit(Vals, DeducedEltTy);
+  }
+  case tgtok::l_paren: {         // Value ::= '(' IDValue DagArgList ')'
+    Lex.Lex();   // eat the '('
+    if (Lex.getCode() != tgtok::Id && Lex.getCode() != tgtok::XCast) {
+      TokError("expected identifier in dag init");
+      return 0;
+    }
+
+    Init *Operator = ParseValue(CurRec);
+    if (Operator == 0) return 0;
+
+    // If the operator name is present, parse it.
+    std::string OperatorName;
+    if (Lex.getCode() == tgtok::colon) {
+      if (Lex.Lex() != tgtok::VarName) { // eat the ':'
+        TokError("expected variable name in dag operator");
+        return 0;
+      }
+      OperatorName = Lex.getCurStrVal();
+      Lex.Lex();  // eat the VarName.
+    }
+
+    std::vector<std::pair<llvm::Init*, std::string> > DagArgs;
+    if (Lex.getCode() != tgtok::r_paren) {
+      DagArgs = ParseDagArgList(CurRec);
+      if (DagArgs.empty()) return 0;
+    }
+
+    if (Lex.getCode() != tgtok::r_paren) {
+      TokError("expected ')' in dag init");
+      return 0;
+    }
+    Lex.Lex();  // eat the ')'
+
+    return new DagInit(Operator, OperatorName, DagArgs);
+  }
+
+  case tgtok::XHead:
+  case tgtok::XTail:
+  case tgtok::XEmpty:
+  case tgtok::XCast:  // Value ::= !unop '(' Value ')'
+  case tgtok::XConcat:
+  case tgtok::XSRA:
+  case tgtok::XSRL:
+  case tgtok::XSHL:
+  case tgtok::XEq:
+  case tgtok::XStrConcat:   // Value ::= !binop '(' Value ',' Value ')'
+  case tgtok::XIf:
+  case tgtok::XForEach:
+  case tgtok::XSubst: {  // Value ::= !ternop '(' Value ',' Value ',' Value ')'
+    return ParseOperation(CurRec);
+  }
+  }
+
+  return R;
+}
+
+/// ParseValue - Parse a tblgen value.  This returns null on error.
+///
+///   Value       ::= SimpleValue ValueSuffix*
+///   ValueSuffix ::= '{' BitList '}'
+///   ValueSuffix ::= '[' BitList ']'
+///   ValueSuffix ::= '.' ID
+///
+Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType) {
+  Init *Result = ParseSimpleValue(CurRec, ItemType);
+  if (Result == 0) return 0;
+
+  // Parse the suffixes now if present.
+  while (1) {
+    switch (Lex.getCode()) {
+    default: return Result;
+    case tgtok::l_brace: {
+      SMLoc CurlyLoc = Lex.getLoc();
+      Lex.Lex(); // eat the '{'
+      std::vector<unsigned> Ranges = ParseRangeList();
+      if (Ranges.empty()) return 0;
+
+      // Reverse the bitlist.
+      std::reverse(Ranges.begin(), Ranges.end());
+      Result = Result->convertInitializerBitRange(Ranges);
+      if (Result == 0) {
+        Error(CurlyLoc, "Invalid bit range for value");
+        return 0;
+      }
+
+      // Eat the '}'.
+      if (Lex.getCode() != tgtok::r_brace) {
+        TokError("expected '}' at end of bit range list");
+        return 0;
+      }
+      Lex.Lex();
+      break;
+    }
+    case tgtok::l_square: {
+      SMLoc SquareLoc = Lex.getLoc();
+      Lex.Lex(); // eat the '['
+      std::vector<unsigned> Ranges = ParseRangeList();
+      if (Ranges.empty()) return 0;
+
+      Result = Result->convertInitListSlice(Ranges);
+      if (Result == 0) {
+        Error(SquareLoc, "Invalid range for list slice");
+        return 0;
+      }
+
+      // Eat the ']'.
+      if (Lex.getCode() != tgtok::r_square) {
+        TokError("expected ']' at end of list slice");
+        return 0;
+      }
+      Lex.Lex();
+      break;
+    }
+    case tgtok::period:
+      if (Lex.Lex() != tgtok::Id) {  // eat the .
+        TokError("expected field identifier after '.'");
+        return 0;
+      }
+      if (!Result->getFieldType(Lex.getCurStrVal())) {
+        TokError("Cannot access field '" + Lex.getCurStrVal() + "' of value '" +
+                 Result->getAsString() + "'");
+        return 0;
+      }
+      Result = new FieldInit(Result, Lex.getCurStrVal());
+      Lex.Lex();  // eat field name
+      break;
+    }
+  }
+}
+
+/// ParseDagArgList - Parse the argument list for a dag literal expression.
+///
+///    ParseDagArgList ::= Value (':' VARNAME)?
+///    ParseDagArgList ::= ParseDagArgList ',' Value (':' VARNAME)?
+std::vector<std::pair<llvm::Init*, std::string> >
+TGParser::ParseDagArgList(Record *CurRec) {
+  std::vector<std::pair<llvm::Init*, std::string> > Result;
+
+  while (1) {
+    Init *Val = ParseValue(CurRec);
+    if (Val == 0) return std::vector<std::pair<llvm::Init*, std::string> >();
+
+    // If the variable name is present, add it.
+    std::string VarName;
+    if (Lex.getCode() == tgtok::colon) {
+      if (Lex.Lex() != tgtok::VarName) { // eat the ':'
+        TokError("expected variable name in dag literal");
+        return std::vector<std::pair<llvm::Init*, std::string> >();
+      }
+      VarName = Lex.getCurStrVal();
+      Lex.Lex();  // eat the VarName.
+    }
+
+    Result.push_back(std::make_pair(Val, VarName));
+
+    if (Lex.getCode() != tgtok::comma) break;
+    Lex.Lex(); // eat the ','
+  }
+
+  return Result;
+}
+
+
+/// ParseValueList - Parse a comma separated list of values, returning them as a
+/// vector.  Note that this always expects to be able to parse at least one
+/// value.  It returns an empty list if this is not possible.
+///
+///   ValueList ::= Value (',' Value)
+///
+std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
+                                            RecTy *EltTy) {
+  std::vector<Init*> Result;
+  RecTy *ItemType = EltTy;
+  unsigned int ArgN = 0;
+  if (ArgsRec != 0 && EltTy == 0) {
+    const std::vector<std::string> &TArgs = ArgsRec->getTemplateArgs();
+    const RecordVal *RV = ArgsRec->getValue(TArgs[ArgN]);
+    assert(RV && "Template argument record not found??");
+    ItemType = RV->getType();
+    ++ArgN;
+  }
+  Result.push_back(ParseValue(CurRec, ItemType));
+  if (Result.back() == 0) return std::vector<Init*>();
+
+  while (Lex.getCode() == tgtok::comma) {
+    Lex.Lex();  // Eat the comma
+
+    if (ArgsRec != 0 && EltTy == 0) {
+      const std::vector<std::string> &TArgs = ArgsRec->getTemplateArgs();
+      if (ArgN >= TArgs.size()) {
+        TokError("too many template arguments");
+        return std::vector<Init*>();
+      }
+      const RecordVal *RV = ArgsRec->getValue(TArgs[ArgN]);
+      assert(RV && "Template argument record not found??");
+      ItemType = RV->getType();
+      ++ArgN;
+    }
+    Result.push_back(ParseValue(CurRec, ItemType));
+    if (Result.back() == 0) return std::vector<Init*>();
+  }
+
+  return Result;
+}
+
+
+/// ParseDeclaration - Read a declaration, returning the name of field ID, or an
+/// empty string on error.  This can happen in a number of different context's,
+/// including within a def or in the template args for a def (which which case
+/// CurRec will be non-null) and within the template args for a multiclass (in
+/// which case CurRec will be null, but CurMultiClass will be set).  This can
+/// also happen within a def that is within a multiclass, which will set both
+/// CurRec and CurMultiClass.
+///
+///  Declaration ::= FIELD? Type ID ('=' Value)?
+///
+std::string TGParser::ParseDeclaration(Record *CurRec,
+                                       bool ParsingTemplateArgs) {
+  // Read the field prefix if present.
+  bool HasField = Lex.getCode() == tgtok::Field;
+  if (HasField) Lex.Lex();
+
+  RecTy *Type = ParseType();
+  if (Type == 0) return "";
+
+  if (Lex.getCode() != tgtok::Id) {
+    TokError("Expected identifier in declaration");
+    return "";
+  }
+
+  SMLoc IdLoc = Lex.getLoc();
+  std::string DeclName = Lex.getCurStrVal();
+  Lex.Lex();
+
+  if (ParsingTemplateArgs) {
+    if (CurRec) {
+      DeclName = CurRec->getName() + ":" + DeclName;
+    } else {
+      assert(CurMultiClass);
+    }
+    if (CurMultiClass)
+      DeclName = CurMultiClass->Rec.getName() + "::" + DeclName;
+  }
+
+  // Add the value.
+  if (AddValue(CurRec, IdLoc, RecordVal(DeclName, Type, HasField)))
+    return "";
+
+  // If a value is present, parse it.
+  if (Lex.getCode() == tgtok::equal) {
+    Lex.Lex();
+    SMLoc ValLoc = Lex.getLoc();
+    Init *Val = ParseValue(CurRec, Type);
+    if (Val == 0 ||
+        SetValue(CurRec, ValLoc, DeclName, std::vector<unsigned>(), Val))
+      return "";
+  }
+
+  return DeclName;
+}
+
+/// ParseTemplateArgList - Read a template argument list, which is a non-empty
+/// sequence of template-declarations in <>'s.  If CurRec is non-null, these are
+/// template args for a def, which may or may not be in a multiclass.  If null,
+/// these are the template args for a multiclass.
+///
+///    TemplateArgList ::= '<' Declaration (',' Declaration)* '>'
+///
+bool TGParser::ParseTemplateArgList(Record *CurRec) {
+  assert(Lex.getCode() == tgtok::less && "Not a template arg list!");
+  Lex.Lex(); // eat the '<'
+
+  Record *TheRecToAddTo = CurRec ? CurRec : &CurMultiClass->Rec;
+
+  // Read the first declaration.
+  std::string TemplArg = ParseDeclaration(CurRec, true/*templateargs*/);
+  if (TemplArg.empty())
+    return true;
+
+  TheRecToAddTo->addTemplateArg(TemplArg);
+
+  while (Lex.getCode() == tgtok::comma) {
+    Lex.Lex(); // eat the ','
+
+    // Read the following declarations.
+    TemplArg = ParseDeclaration(CurRec, true/*templateargs*/);
+    if (TemplArg.empty())
+      return true;
+    TheRecToAddTo->addTemplateArg(TemplArg);
+  }
+
+  if (Lex.getCode() != tgtok::greater)
+    return TokError("expected '>' at end of template argument list");
+  Lex.Lex(); // eat the '>'.
+  return false;
+}
+
+
+/// ParseBodyItem - Parse a single item at within the body of a def or class.
+///
+///   BodyItem ::= Declaration ';'
+///   BodyItem ::= LET ID OptionalBitList '=' Value ';'
+bool TGParser::ParseBodyItem(Record *CurRec) {
+  if (Lex.getCode() != tgtok::Let) {
+    if (ParseDeclaration(CurRec, false).empty())
+      return true;
+
+    if (Lex.getCode() != tgtok::semi)
+      return TokError("expected ';' after declaration");
+    Lex.Lex();
+    return false;
+  }
+
+  // LET ID OptionalRangeList '=' Value ';'
+  if (Lex.Lex() != tgtok::Id)
+    return TokError("expected field identifier after let");
+
+  SMLoc IdLoc = Lex.getLoc();
+  std::string FieldName = Lex.getCurStrVal();
+  Lex.Lex();  // eat the field name.
+
+  std::vector<unsigned> BitList;
+  if (ParseOptionalBitList(BitList))
+    return true;
+  std::reverse(BitList.begin(), BitList.end());
+
+  if (Lex.getCode() != tgtok::equal)
+    return TokError("expected '=' in let expression");
+  Lex.Lex();  // eat the '='.
+
+  RecordVal *Field = CurRec->getValue(FieldName);
+  if (Field == 0)
+    return TokError("Value '" + FieldName + "' unknown!");
+
+  RecTy *Type = Field->getType();
+
+  Init *Val = ParseValue(CurRec, Type);
+  if (Val == 0) return true;
+
+  if (Lex.getCode() != tgtok::semi)
+    return TokError("expected ';' after let expression");
+  Lex.Lex();
+
+  return SetValue(CurRec, IdLoc, FieldName, BitList, Val);
+}
+
+/// ParseBody - Read the body of a class or def.  Return true on error, false on
+/// success.
+///
+///   Body     ::= ';'
+///   Body     ::= '{' BodyList '}'
+///   BodyList BodyItem*
+///
+bool TGParser::ParseBody(Record *CurRec) {
+  // If this is a null definition, just eat the semi and return.
+  if (Lex.getCode() == tgtok::semi) {
+    Lex.Lex();
+    return false;
+  }
+
+  if (Lex.getCode() != tgtok::l_brace)
+    return TokError("Expected ';' or '{' to start body");
+  // Eat the '{'.
+  Lex.Lex();
+
+  while (Lex.getCode() != tgtok::r_brace)
+    if (ParseBodyItem(CurRec))
+      return true;
+
+  // Eat the '}'.
+  Lex.Lex();
+  return false;
+}
+
+/// ParseObjectBody - Parse the body of a def or class.  This consists of an
+/// optional ClassList followed by a Body.  CurRec is the current def or class
+/// that is being parsed.
+///
+///   ObjectBody      ::= BaseClassList Body
+///   BaseClassList   ::= /*empty*/
+///   BaseClassList   ::= ':' BaseClassListNE
+///   BaseClassListNE ::= SubClassRef (',' SubClassRef)*
+///
+bool TGParser::ParseObjectBody(Record *CurRec) {
+  // If there is a baseclass list, read it.
+  if (Lex.getCode() == tgtok::colon) {
+    Lex.Lex();
+
+    // Read all of the subclasses.
+    SubClassReference SubClass = ParseSubClassReference(CurRec, false);
+    while (1) {
+      // Check for error.
+      if (SubClass.Rec == 0) return true;
+
+      // Add it.
+      if (AddSubClass(CurRec, SubClass))
+        return true;
+
+      if (Lex.getCode() != tgtok::comma) break;
+      Lex.Lex(); // eat ','.
+      SubClass = ParseSubClassReference(CurRec, false);
+    }
+  }
+
+  // Process any variables on the let stack.
+  for (unsigned i = 0, e = LetStack.size(); i != e; ++i)
+    for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j)
+      if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name,
+                   LetStack[i][j].Bits, LetStack[i][j].Value))
+        return true;
+
+  return ParseBody(CurRec);
+}
+
+/// ParseDef - Parse and return a top level or multiclass def, return the record
+/// corresponding to it.  This returns null on error.
+///
+///   DefInst ::= DEF ObjectName ObjectBody
+///
+bool TGParser::ParseDef(MultiClass *CurMultiClass) {
+  SMLoc DefLoc = Lex.getLoc();
+  assert(Lex.getCode() == tgtok::Def && "Unknown tok");
+  Lex.Lex();  // Eat the 'def' token.
+
+  // Parse ObjectName and make a record for it.
+  Record *CurRec = new Record(ParseObjectName(), DefLoc, Records);
+
+  if (!CurMultiClass) {
+    // Top-level def definition.
+
+    // Ensure redefinition doesn't happen.
+    if (Records.getDef(CurRec->getName())) {
+      Error(DefLoc, "def '" + CurRec->getName() + "' already defined");
+      return true;
+    }
+    Records.addDef(CurRec);
+  } else {
+    // Otherwise, a def inside a multiclass, add it to the multiclass.
+    for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size(); i != e; ++i)
+      if (CurMultiClass->DefPrototypes[i]->getName() == CurRec->getName()) {
+        Error(DefLoc, "def '" + CurRec->getName() +
+              "' already defined in this multiclass!");
+        return true;
+      }
+    CurMultiClass->DefPrototypes.push_back(CurRec);
+  }
+
+  if (ParseObjectBody(CurRec))
+    return true;
+
+  if (CurMultiClass == 0)  // Def's in multiclasses aren't really defs.
+    CurRec->resolveReferences();
+
+  // If ObjectBody has template arguments, it's an error.
+  assert(CurRec->getTemplateArgs().empty() && "How'd this get template args?");
+
+  if (CurMultiClass) {
+    // Copy the template arguments for the multiclass into the def.
+    const std::vector<std::string> &TArgs =
+                                CurMultiClass->Rec.getTemplateArgs();
+
+    for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
+      const RecordVal *RV = CurMultiClass->Rec.getValue(TArgs[i]);
+      assert(RV && "Template arg doesn't exist?");
+      CurRec->addValue(*RV);
+    }
+  }
+
+  return false;
+}
+
+
+/// ParseClass - Parse a tblgen class definition.
+///
+///   ClassInst ::= CLASS ID TemplateArgList? ObjectBody
+///
+bool TGParser::ParseClass() {
+  assert(Lex.getCode() == tgtok::Class && "Unexpected token!");
+  Lex.Lex();
+
+  if (Lex.getCode() != tgtok::Id)
+    return TokError("expected class name after 'class' keyword");
+
+  Record *CurRec = Records.getClass(Lex.getCurStrVal());
+  if (CurRec) {
+    // If the body was previously defined, this is an error.
+    if (!CurRec->getValues().empty() ||
+        !CurRec->getSuperClasses().empty() ||
+        !CurRec->getTemplateArgs().empty())
+      return TokError("Class '" + CurRec->getName() + "' already defined");
+  } else {
+    // If this is the first reference to this class, create and add it.
+    CurRec = new Record(Lex.getCurStrVal(), Lex.getLoc(), Records);
+    Records.addClass(CurRec);
+  }
+  Lex.Lex(); // eat the name.
+
+  // If there are template args, parse them.
+  if (Lex.getCode() == tgtok::less)
+    if (ParseTemplateArgList(CurRec))
+      return true;
+
+  // Finally, parse the object body.
+  return ParseObjectBody(CurRec);
+}
+
+/// ParseLetList - Parse a non-empty list of assignment expressions into a list
+/// of LetRecords.
+///
+///   LetList ::= LetItem (',' LetItem)*
+///   LetItem ::= ID OptionalRangeList '=' Value
+///
+std::vector<LetRecord> TGParser::ParseLetList() {
+  std::vector<LetRecord> Result;
+
+  while (1) {
+    if (Lex.getCode() != tgtok::Id) {
+      TokError("expected identifier in let definition");
+      return std::vector<LetRecord>();
+    }
+    std::string Name = Lex.getCurStrVal();
+    SMLoc NameLoc = Lex.getLoc();
+    Lex.Lex();  // Eat the identifier.
+
+    // Check for an optional RangeList.
+    std::vector<unsigned> Bits;
+    if (ParseOptionalRangeList(Bits))
+      return std::vector<LetRecord>();
+    std::reverse(Bits.begin(), Bits.end());
+
+    if (Lex.getCode() != tgtok::equal) {
+      TokError("expected '=' in let expression");
+      return std::vector<LetRecord>();
+    }
+    Lex.Lex();  // eat the '='.
+
+    Init *Val = ParseValue(0);
+    if (Val == 0) return std::vector<LetRecord>();
+
+    // Now that we have everything, add the record.
+    Result.push_back(LetRecord(Name, Bits, Val, NameLoc));
+
+    if (Lex.getCode() != tgtok::comma)
+      return Result;
+    Lex.Lex();  // eat the comma.
+  }
+}
+
+/// ParseTopLevelLet - Parse a 'let' at top level.  This can be a couple of
+/// different related productions. This works inside multiclasses too.
+///
+///   Object ::= LET LetList IN '{' ObjectList '}'
+///   Object ::= LET LetList IN Object
+///
+bool TGParser::ParseTopLevelLet(MultiClass *CurMultiClass) {
+  assert(Lex.getCode() == tgtok::Let && "Unexpected token");
+  Lex.Lex();
+
+  // Add this entry to the let stack.
+  std::vector<LetRecord> LetInfo = ParseLetList();
+  if (LetInfo.empty()) return true;
+  LetStack.push_back(LetInfo);
+
+  if (Lex.getCode() != tgtok::In)
+    return TokError("expected 'in' at end of top-level 'let'");
+  Lex.Lex();
+
+  // If this is a scalar let, just handle it now
+  if (Lex.getCode() != tgtok::l_brace) {
+    // LET LetList IN Object
+    if (ParseObject(CurMultiClass))
+      return true;
+  } else {   // Object ::= LETCommand '{' ObjectList '}'
+    SMLoc BraceLoc = Lex.getLoc();
+    // Otherwise, this is a group let.
+    Lex.Lex();  // eat the '{'.
+
+    // Parse the object list.
+    if (ParseObjectList(CurMultiClass))
+      return true;
+
+    if (Lex.getCode() != tgtok::r_brace) {
+      TokError("expected '}' at end of top level let command");
+      return Error(BraceLoc, "to match this '{'");
+    }
+    Lex.Lex();
+  }
+
+  // Outside this let scope, this let block is not active.
+  LetStack.pop_back();
+  return false;
+}
+
+/// ParseMultiClass - Parse a multiclass definition.
+///
+///  MultiClassInst ::= MULTICLASS ID TemplateArgList?
+///                     ':' BaseMultiClassList '{' MultiClassDef+ '}'
+///
+bool TGParser::ParseMultiClass() {
+  assert(Lex.getCode() == tgtok::MultiClass && "Unexpected token");
+  Lex.Lex();  // Eat the multiclass token.
+
+  if (Lex.getCode() != tgtok::Id)
+    return TokError("expected identifier after multiclass for name");
+  std::string Name = Lex.getCurStrVal();
+
+  if (MultiClasses.count(Name))
+    return TokError("multiclass '" + Name + "' already defined");
+
+  CurMultiClass = MultiClasses[Name] = new MultiClass(Name, 
+                                                      Lex.getLoc(), Records);
+  Lex.Lex();  // Eat the identifier.
+
+  // If there are template args, parse them.
+  if (Lex.getCode() == tgtok::less)
+    if (ParseTemplateArgList(0))
+      return true;
+
+  bool inherits = false;
+
+  // If there are submulticlasses, parse them.
+  if (Lex.getCode() == tgtok::colon) {
+    inherits = true;
+
+    Lex.Lex();
+
+    // Read all of the submulticlasses.
+    SubMultiClassReference SubMultiClass =
+      ParseSubMultiClassReference(CurMultiClass);
+    while (1) {
+      // Check for error.
+      if (SubMultiClass.MC == 0) return true;
+
+      // Add it.
+      if (AddSubMultiClass(CurMultiClass, SubMultiClass))
+        return true;
+
+      if (Lex.getCode() != tgtok::comma) break;
+      Lex.Lex(); // eat ','.
+      SubMultiClass = ParseSubMultiClassReference(CurMultiClass);
+    }
+  }
+
+  if (Lex.getCode() != tgtok::l_brace) {
+    if (!inherits)
+      return TokError("expected '{' in multiclass definition");
+    else if (Lex.getCode() != tgtok::semi)
+      return TokError("expected ';' in multiclass definition");
+    else
+      Lex.Lex();  // eat the ';'.
+  } else {
+    if (Lex.Lex() == tgtok::r_brace)  // eat the '{'.
+      return TokError("multiclass must contain at least one def");
+
+    while (Lex.getCode() != tgtok::r_brace) {
+      switch (Lex.getCode()) {
+        default:
+          return TokError("expected 'let', 'def' or 'defm' in multiclass body");
+        case tgtok::Let:
+        case tgtok::Def:
+        case tgtok::Defm:
+          if (ParseObject(CurMultiClass))
+            return true;
+         break;
+      }
+    }
+    Lex.Lex();  // eat the '}'.
+  }
+
+  CurMultiClass = 0;
+  return false;
+}
+
+/// ParseDefm - Parse the instantiation of a multiclass.
+///
+///   DefMInst ::= DEFM ID ':' DefmSubClassRef ';'
+///
+bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
+  assert(Lex.getCode() == tgtok::Defm && "Unexpected token!");
+
+  std::string DefmPrefix;
+  if (Lex.Lex() == tgtok::Id) {  // eat the defm.
+    DefmPrefix = Lex.getCurStrVal();
+    Lex.Lex();  // Eat the defm prefix.
+  }
+
+  SMLoc DefmPrefixLoc = Lex.getLoc();
+  if (Lex.getCode() != tgtok::colon)
+    return TokError("expected ':' after defm identifier");
+
+  // Keep track of the new generated record definitions.
+  std::vector<Record*> NewRecDefs;
+
+  // This record also inherits from a regular class (non-multiclass)?
+  bool InheritFromClass = false;
+
+  // eat the colon.
+  Lex.Lex();
+
+  SMLoc SubClassLoc = Lex.getLoc();
+  SubClassReference Ref = ParseSubClassReference(0, true);
+
+  while (1) {
+    if (Ref.Rec == 0) return true;
+
+    // To instantiate a multiclass, we need to first get the multiclass, then
+    // instantiate each def contained in the multiclass with the SubClassRef
+    // template parameters.
+    MultiClass *MC = MultiClasses[Ref.Rec->getName()];
+    assert(MC && "Didn't lookup multiclass correctly?");
+    std::vector<Init*> &TemplateVals = Ref.TemplateArgs;
+
+    // Verify that the correct number of template arguments were specified.
+    const std::vector<std::string> &TArgs = MC->Rec.getTemplateArgs();
+    if (TArgs.size() < TemplateVals.size())
+      return Error(SubClassLoc,
+                   "more template args specified than multiclass expects");
+
+    // Loop over all the def's in the multiclass, instantiating each one.
+    for (unsigned i = 0, e = MC->DefPrototypes.size(); i != e; ++i) {
+      Record *DefProto = MC->DefPrototypes[i];
+
+      // Add in the defm name.  If the defm prefix is empty, give each
+      // instantiated def a unique name.  Otherwise, if "#NAME#" exists in the
+      // name, substitute the prefix for #NAME#.  Otherwise, use the defm name
+      // as a prefix.
+      std::string DefName = DefProto->getName();
+      if (DefmPrefix.empty()) {
+        DefName = GetNewAnonymousName();
+      } else {
+        std::string::size_type idx = DefName.find("#NAME#");
+        if (idx != std::string::npos) {
+          DefName.replace(idx, 6, DefmPrefix);
+        } else {
+          // Add the suffix to the defm name to get the new name.
+          DefName = DefmPrefix + DefName;
+        }
+      }
+
+      Record *CurRec = new Record(DefName, DefmPrefixLoc, Records);
+
+      SubClassReference Ref;
+      Ref.RefLoc = DefmPrefixLoc;
+      Ref.Rec = DefProto;
+      AddSubClass(CurRec, Ref);
+
+      // Loop over all of the template arguments, setting them to the specified
+      // value or leaving them as the default if necessary.
+      for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
+        // Check if a value is specified for this temp-arg.
+        if (i < TemplateVals.size()) {
+          // Set it now.
+          if (SetValue(CurRec, DefmPrefixLoc, TArgs[i], std::vector<unsigned>(),
+                       TemplateVals[i]))
+            return true;
+
+          // Resolve it next.
+          CurRec->resolveReferencesTo(CurRec->getValue(TArgs[i]));
+
+          // Now remove it.
+          CurRec->removeValue(TArgs[i]);
+
+        } else if (!CurRec->getValue(TArgs[i])->getValue()->isComplete()) {
+          return Error(SubClassLoc,
+                       "value not specified for template argument #"+
+                       utostr(i) + " (" + TArgs[i] + ") of multiclassclass '" +
+                       MC->Rec.getName() + "'");
+        }
+      }
+
+      // If the mdef is inside a 'let' expression, add to each def.
+      for (unsigned i = 0, e = LetStack.size(); i != e; ++i)
+        for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j)
+          if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name,
+                       LetStack[i][j].Bits, LetStack[i][j].Value)) {
+            Error(DefmPrefixLoc, "when instantiating this defm");
+            return true;
+          }
+
+      // Ensure redefinition doesn't happen.
+      if (Records.getDef(CurRec->getName()))
+        return Error(DefmPrefixLoc, "def '" + CurRec->getName() +
+                     "' already defined, instantiating defm with subdef '" +
+                     DefProto->getName() + "'");
+
+      // Don't create a top level definition for defm inside multiclasses,
+      // instead, only update the prototypes and bind the template args
+      // with the new created definition.
+      if (CurMultiClass) {
+        for (unsigned i = 0, e = CurMultiClass->DefPrototypes.size();
+             i != e; ++i) {
+          if (CurMultiClass->DefPrototypes[i]->getName() == CurRec->getName()) {
+            Error(DefmPrefixLoc, "defm '" + CurRec->getName() +
+                  "' already defined in this multiclass!");
+            return 0;
+          }
+        }
+        CurMultiClass->DefPrototypes.push_back(CurRec);
+
+        // Copy the template arguments for the multiclass into the new def.
+        const std::vector<std::string> &TA =
+          CurMultiClass->Rec.getTemplateArgs();
+
+        for (unsigned i = 0, e = TA.size(); i != e; ++i) {
+          const RecordVal *RV = CurMultiClass->Rec.getValue(TA[i]);
+          assert(RV && "Template arg doesn't exist?");
+          CurRec->addValue(*RV);
+        }
+      } else {
+        Records.addDef(CurRec);
+      }
+
+      NewRecDefs.push_back(CurRec);
+    }
+
+    if (Lex.getCode() != tgtok::comma) break;
+    Lex.Lex(); // eat ','.
+
+    SubClassLoc = Lex.getLoc();
+
+    // A defm can inherit from regular classes (non-multiclass) as
+    // long as they come in the end of the inheritance list.
+    InheritFromClass = (Records.getClass(Lex.getCurStrVal()) != 0);
+
+    if (InheritFromClass)
+      break;
+
+    Ref = ParseSubClassReference(0, true);
+  }
+
+  if (InheritFromClass) {
+    // Process all the classes to inherit as if they were part of a
+    // regular 'def' and inherit all record values.
+    SubClassReference SubClass = ParseSubClassReference(0, false);
+    while (1) {
+      // Check for error.
+      if (SubClass.Rec == 0) return true;
+
+      // Get the expanded definition prototypes and teach them about
+      // the record values the current class to inherit has
+      for (unsigned i = 0, e = NewRecDefs.size(); i != e; ++i) {
+        Record *CurRec = NewRecDefs[i];
+
+        // Add it.
+        if (AddSubClass(CurRec, SubClass))
+          return true;
+
+        // Process any variables on the let stack.
+        for (unsigned i = 0, e = LetStack.size(); i != e; ++i)
+          for (unsigned j = 0, e = LetStack[i].size(); j != e; ++j)
+            if (SetValue(CurRec, LetStack[i][j].Loc, LetStack[i][j].Name,
+                         LetStack[i][j].Bits, LetStack[i][j].Value))
+              return true;
+      }
+
+      if (Lex.getCode() != tgtok::comma) break;
+      Lex.Lex(); // eat ','.
+      SubClass = ParseSubClassReference(0, false);
+    }
+  }
+
+  if (!CurMultiClass)
+    for (unsigned i = 0, e = NewRecDefs.size(); i != e; ++i)
+      NewRecDefs[i]->resolveReferences();
+
+  if (Lex.getCode() != tgtok::semi)
+    return TokError("expected ';' at end of defm");
+  Lex.Lex();
+
+  return false;
+}
+
+/// ParseObject
+///   Object ::= ClassInst
+///   Object ::= DefInst
+///   Object ::= MultiClassInst
+///   Object ::= DefMInst
+///   Object ::= LETCommand '{' ObjectList '}'
+///   Object ::= LETCommand Object
+bool TGParser::ParseObject(MultiClass *MC) {
+  switch (Lex.getCode()) {
+  default:
+    return TokError("Expected class, def, defm, multiclass or let definition");
+  case tgtok::Let:   return ParseTopLevelLet(MC);
+  case tgtok::Def:   return ParseDef(MC);
+  case tgtok::Defm:  return ParseDefm(MC);
+  case tgtok::Class: return ParseClass();
+  case tgtok::MultiClass: return ParseMultiClass();
+  }
+}
+
+/// ParseObjectList
+///   ObjectList :== Object*
+bool TGParser::ParseObjectList(MultiClass *MC) {
+  while (isObjectStart(Lex.getCode())) {
+    if (ParseObject(MC))
+      return true;
+  }
+  return false;
+}
+
+bool TGParser::ParseFile() {
+  Lex.Lex(); // Prime the lexer.
+  if (ParseObjectList()) return true;
+
+  // If we have unread input at the end of the file, report it.
+  if (Lex.getCode() == tgtok::Eof)
+    return false;
+
+  return TokError("Unexpected input at top level");
+}
+
diff --git a/final/utils/TableGen/TGParser.h b/final/utils/TableGen/TGParser.h
new file mode 100644
index 00000000000..9cdf68ff974
--- /dev/null
+++ b/final/utils/TableGen/TGParser.h
@@ -0,0 +1,118 @@
+//===- TGParser.h - Parser for TableGen Files -------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents the Parser for tablegen files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TGPARSER_H
+#define TGPARSER_H
+
+#include "TGLexer.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/SourceMgr.h"
+#include <map>
+
+namespace llvm {
+  class Record;
+  class RecordVal;
+  class RecordKeeper;
+  struct RecTy;
+  struct Init;
+  struct MultiClass;
+  struct SubClassReference;
+  struct SubMultiClassReference;
+  
+  struct LetRecord {
+    std::string Name;
+    std::vector<unsigned> Bits;
+    Init *Value;
+    SMLoc Loc;
+    LetRecord(const std::string &N, const std::vector<unsigned> &B, Init *V,
+              SMLoc L)
+      : Name(N), Bits(B), Value(V), Loc(L) {
+    }
+  };
+  
+class TGParser {
+  TGLexer Lex;
+  std::vector<std::vector<LetRecord> > LetStack;
+  std::map<std::string, MultiClass*> MultiClasses;
+  
+  /// CurMultiClass - If we are parsing a 'multiclass' definition, this is the 
+  /// current value.
+  MultiClass *CurMultiClass;
+
+  // Record tracker
+  RecordKeeper &Records;
+public:
+  TGParser(SourceMgr &SrcMgr, RecordKeeper &records) : 
+    Lex(SrcMgr), CurMultiClass(0), Records(records) {}
+  
+  /// ParseFile - Main entrypoint for parsing a tblgen file.  These parser
+  /// routines return true on error, or false on success.
+  bool ParseFile();
+  
+  bool Error(SMLoc L, const Twine &Msg) const {
+    Lex.PrintError(L, Msg);
+    return true;
+  }
+  bool TokError(const Twine &Msg) const {
+    return Error(Lex.getLoc(), Msg);
+  }
+private:  // Semantic analysis methods.
+  bool AddValue(Record *TheRec, SMLoc Loc, const RecordVal &RV);
+  bool SetValue(Record *TheRec, SMLoc Loc, const std::string &ValName, 
+                const std::vector<unsigned> &BitList, Init *V);
+  bool AddSubClass(Record *Rec, SubClassReference &SubClass);
+  bool AddSubMultiClass(MultiClass *CurMC,
+                        SubMultiClassReference &SubMultiClass);
+
+private:  // Parser methods.
+  bool ParseObjectList(MultiClass *MC = 0);
+  bool ParseObject(MultiClass *MC);
+  bool ParseClass();
+  bool ParseMultiClass();
+  bool ParseDefm(MultiClass *CurMultiClass);
+  bool ParseDef(MultiClass *CurMultiClass);
+  bool ParseTopLevelLet(MultiClass *CurMultiClass);
+  std::vector<LetRecord> ParseLetList();
+
+  bool ParseObjectBody(Record *CurRec);
+  bool ParseBody(Record *CurRec);
+  bool ParseBodyItem(Record *CurRec);
+
+  bool ParseTemplateArgList(Record *CurRec);
+  std::string ParseDeclaration(Record *CurRec, bool ParsingTemplateArgs);
+
+  SubClassReference ParseSubClassReference(Record *CurRec, bool isDefm);
+  SubMultiClassReference ParseSubMultiClassReference(MultiClass *CurMC);
+
+  Init *ParseIDValue(Record *CurRec);
+  Init *ParseIDValue(Record *CurRec, const std::string &Name, SMLoc NameLoc);
+  Init *ParseSimpleValue(Record *CurRec, RecTy *ItemType = 0);
+  Init *ParseValue(Record *CurRec, RecTy *ItemType = 0);
+  std::vector<Init*> ParseValueList(Record *CurRec, Record *ArgsRec = 0, RecTy *EltTy = 0);
+  std::vector<std::pair<llvm::Init*, std::string> > ParseDagArgList(Record *);
+  bool ParseOptionalRangeList(std::vector<unsigned> &Ranges);
+  bool ParseOptionalBitList(std::vector<unsigned> &Ranges);
+  std::vector<unsigned> ParseRangeList();
+  bool ParseRangePiece(std::vector<unsigned> &Ranges);
+  RecTy *ParseType();
+  Init *ParseOperation(Record *CurRec);
+  RecTy *ParseOperatorType();
+  std::string ParseObjectName();
+  Record *ParseClassID();
+  MultiClass *ParseMultiClassID();
+  Record *ParseDefmID();
+};
+  
+} // end namespace llvm
+
+#endif
diff --git a/final/utils/TableGen/TGValueTypes.cpp b/final/utils/TableGen/TGValueTypes.cpp
new file mode 100644
index 00000000000..122d085b0d7
--- /dev/null
+++ b/final/utils/TableGen/TGValueTypes.cpp
@@ -0,0 +1,106 @@
+//===- ValueTypes.cpp - Tablegen extended ValueType implementation --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The EVT type is used by tablegen as well as in LLVM. In order to handle
+// extended types, the EVT type uses support functions that call into
+// LLVM's type system code. These aren't accessible in tablegen, so this
+// file provides simple replacements.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ValueTypes.h"
+#include <map>
+#include <vector>
+using namespace llvm;
+
+namespace llvm {
+
+class Type {
+public:
+  virtual unsigned getSizeInBits() const = 0;
+  virtual ~Type() {}
+};
+
+}
+
+class ExtendedIntegerType : public Type {
+  unsigned BitWidth;
+public:
+  explicit ExtendedIntegerType(unsigned bits)
+    : BitWidth(bits) {}
+  unsigned getSizeInBits() const {
+    return getBitWidth();
+  }
+  unsigned getBitWidth() const {
+    return BitWidth;
+  }
+};
+
+class ExtendedVectorType : public Type {
+  EVT ElementType;
+  unsigned NumElements;
+public:
+  ExtendedVectorType(EVT elty, unsigned num)
+    : ElementType(elty), NumElements(num) {}
+  unsigned getSizeInBits() const {
+    return getNumElements() * getElementType().getSizeInBits();
+  }
+  EVT getElementType() const {
+    return ElementType;
+  }
+  unsigned getNumElements() const {
+    return NumElements;
+  }
+};
+
+static std::map<unsigned, const Type *>
+  ExtendedIntegerTypeMap;
+static std::map<std::pair<uintptr_t, uintptr_t>, const Type *>
+  ExtendedVectorTypeMap;
+
+bool EVT::isExtendedFloatingPoint() const {
+  assert(isExtended() && "Type is not extended!");
+  // Extended floating-point types are not supported yet.
+  return false;
+}
+
+bool EVT::isExtendedInteger() const {
+  assert(isExtended() && "Type is not extended!");
+  return dynamic_cast<const ExtendedIntegerType *>(LLVMTy) != 0;
+}
+
+bool EVT::isExtendedVector() const {
+  assert(isExtended() && "Type is not extended!");
+  return dynamic_cast<const ExtendedVectorType *>(LLVMTy) != 0;
+}
+
+bool EVT::isExtended64BitVector() const {
+  assert(isExtended() && "Type is not extended!");
+  return isExtendedVector() && getSizeInBits() == 64;
+}
+
+bool EVT::isExtended128BitVector() const {
+  assert(isExtended() && "Type is not extended!");
+  return isExtendedVector() && getSizeInBits() == 128;
+}
+
+EVT EVT::getExtendedVectorElementType() const {
+  assert(isExtendedVector() && "Type is not an extended vector!");
+  return static_cast<const ExtendedVectorType *>(LLVMTy)->getElementType();
+}
+
+unsigned EVT::getExtendedVectorNumElements() const {
+  assert(isExtendedVector() && "Type is not an extended vector!");
+  return static_cast<const ExtendedVectorType *>(LLVMTy)->getNumElements();
+}
+
+unsigned EVT::getExtendedSizeInBits() const {
+  assert(isExtended() && "Type is not extended!");
+  return LLVMTy->getSizeInBits();
+}
diff --git a/final/utils/TableGen/TableGen.cpp b/final/utils/TableGen/TableGen.cpp
new file mode 100644
index 00000000000..3b7dc0193b2
--- /dev/null
+++ b/final/utils/TableGen/TableGen.cpp
@@ -0,0 +1,377 @@
+//===- TableGen.cpp - Top-Level TableGen implementation -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TableGen is a tool which can be used to build up a description of something,
+// then invoke one or more "tablegen backends" to emit information about the
+// description in some predefined format.  In practice, this is used by the LLVM
+// code generators to automate generation of a code generator through a
+// high-level description of the target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AsmMatcherEmitter.h"
+#include "AsmWriterEmitter.h"
+#include "CallingConvEmitter.h"
+#include "ClangASTNodesEmitter.h"
+#include "ClangAttrEmitter.h"
+#include "ClangDiagnosticsEmitter.h"
+#include "ClangSACheckersEmitter.h"
+#include "CodeEmitterGen.h"
+#include "DAGISelEmitter.h"
+#include "DisassemblerEmitter.h"
+#include "EDEmitter.h"
+#include "FastISelEmitter.h"
+#include "InstrEnumEmitter.h"
+#include "InstrInfoEmitter.h"
+#include "IntrinsicEmitter.h"
+#include "LLVMCConfigurationEmitter.h"
+#include "NeonEmitter.h"
+#include "OptParserEmitter.h"
+#include "Record.h"
+#include "RegisterInfoEmitter.h"
+#include "ARMDecoderEmitter.h"
+#include "SubtargetEmitter.h"
+#include "TGParser.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
+#include <algorithm>
+#include <cstdio>
+using namespace llvm;
+
+enum ActionType {
+  PrintRecords,
+  GenEmitter,
+  GenRegisterEnums, GenRegister, GenRegisterHeader,
+  GenInstrEnums, GenInstrs, GenAsmWriter, GenAsmMatcher,
+  GenARMDecoder,
+  GenDisassembler,
+  GenCallingConv,
+  GenClangAttrClasses,
+  GenClangAttrImpl,
+  GenClangAttrList,
+  GenClangAttrPCHRead,
+  GenClangAttrPCHWrite,
+  GenClangAttrSpellingList,
+  GenClangDiagsDefs,
+  GenClangDiagGroups,
+  GenClangDeclNodes,
+  GenClangStmtNodes,
+  GenClangSACheckers,
+  GenDAGISel,
+  GenFastISel,
+  GenOptParserDefs, GenOptParserImpl,
+  GenSubtarget,
+  GenIntrinsic,
+  GenTgtIntrinsic,
+  GenLLVMCConf,
+  GenEDInfo,
+  GenArmNeon,
+  GenArmNeonSema,
+  GenArmNeonTest,
+  PrintEnums
+};
+
+namespace {
+  cl::opt<ActionType>
+  Action(cl::desc("Action to perform:"),
+         cl::values(clEnumValN(PrintRecords, "print-records",
+                               "Print all records to stdout (default)"),
+                    clEnumValN(GenEmitter, "gen-emitter",
+                               "Generate machine code emitter"),
+                    clEnumValN(GenRegisterEnums, "gen-register-enums",
+                               "Generate enum values for registers"),
+                    clEnumValN(GenRegister, "gen-register-desc",
+                               "Generate a register info description"),
+                    clEnumValN(GenRegisterHeader, "gen-register-desc-header",
+                               "Generate a register info description header"),
+                    clEnumValN(GenInstrEnums, "gen-instr-enums",
+                               "Generate enum values for instructions"),
+                    clEnumValN(GenInstrs, "gen-instr-desc",
+                               "Generate instruction descriptions"),
+                    clEnumValN(GenCallingConv, "gen-callingconv",
+                               "Generate calling convention descriptions"),
+                    clEnumValN(GenAsmWriter, "gen-asm-writer",
+                               "Generate assembly writer"),
+                    clEnumValN(GenARMDecoder, "gen-arm-decoder",
+                               "Generate decoders for ARM/Thumb"),
+                    clEnumValN(GenDisassembler, "gen-disassembler",
+                               "Generate disassembler"),
+                    clEnumValN(GenAsmMatcher, "gen-asm-matcher",
+                               "Generate assembly instruction matcher"),
+                    clEnumValN(GenDAGISel, "gen-dag-isel",
+                               "Generate a DAG instruction selector"),
+                    clEnumValN(GenFastISel, "gen-fast-isel",
+                               "Generate a \"fast\" instruction selector"),
+                    clEnumValN(GenOptParserDefs, "gen-opt-parser-defs",
+                               "Generate option definitions"),
+                    clEnumValN(GenOptParserImpl, "gen-opt-parser-impl",
+                               "Generate option parser implementation"),
+                    clEnumValN(GenSubtarget, "gen-subtarget",
+                               "Generate subtarget enumerations"),
+                    clEnumValN(GenIntrinsic, "gen-intrinsic",
+                               "Generate intrinsic information"),
+                    clEnumValN(GenTgtIntrinsic, "gen-tgt-intrinsic",
+                               "Generate target intrinsic information"),
+                    clEnumValN(GenClangAttrClasses, "gen-clang-attr-classes",
+                               "Generate clang attribute clases"),
+                    clEnumValN(GenClangAttrImpl, "gen-clang-attr-impl",
+                               "Generate clang attribute implementations"),
+                    clEnumValN(GenClangAttrList, "gen-clang-attr-list",
+                               "Generate a clang attribute list"),
+                    clEnumValN(GenClangAttrPCHRead, "gen-clang-attr-pch-read",
+                               "Generate clang PCH attribute reader"),
+                    clEnumValN(GenClangAttrPCHWrite, "gen-clang-attr-pch-write",
+                               "Generate clang PCH attribute writer"),
+                    clEnumValN(GenClangAttrSpellingList, "gen-clang-attr-spelling-list",
+                               "Generate a clang attribute spelling list"),
+                    clEnumValN(GenClangDiagsDefs, "gen-clang-diags-defs",
+                               "Generate Clang diagnostics definitions"),
+                    clEnumValN(GenClangDiagGroups, "gen-clang-diag-groups",
+                               "Generate Clang diagnostic groups"),
+                    clEnumValN(GenClangDeclNodes, "gen-clang-decl-nodes",
+                               "Generate Clang AST declaration nodes"),
+                    clEnumValN(GenClangStmtNodes, "gen-clang-stmt-nodes",
+                               "Generate Clang AST statement nodes"),
+                    clEnumValN(GenClangSACheckers, "gen-clang-sa-checkers",
+                               "Generate Clang Static Analyzer checkers"),
+                    clEnumValN(GenLLVMCConf, "gen-llvmc",
+                               "Generate LLVMC configuration library"),
+                    clEnumValN(GenEDInfo, "gen-enhanced-disassembly-info",
+                               "Generate enhanced disassembly info"),
+                    clEnumValN(GenArmNeon, "gen-arm-neon",
+                               "Generate arm_neon.h for clang"),
+                    clEnumValN(GenArmNeonSema, "gen-arm-neon-sema",
+                               "Generate ARM NEON sema support for clang"),
+                    clEnumValN(GenArmNeonTest, "gen-arm-neon-test",
+                               "Generate ARM NEON tests for clang"),
+                    clEnumValN(PrintEnums, "print-enums",
+                               "Print enum values for a class"),
+                    clEnumValEnd));
+
+  cl::opt<std::string>
+  Class("class", cl::desc("Print Enum list for this class"),
+        cl::value_desc("class name"));
+
+  cl::opt<std::string>
+  OutputFilename("o", cl::desc("Output filename"), cl::value_desc("filename"),
+                 cl::init("-"));
+
+  cl::opt<std::string>
+  InputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
+
+  cl::list<std::string>
+  IncludeDirs("I", cl::desc("Directory of include files"),
+              cl::value_desc("directory"), cl::Prefix);
+
+  cl::opt<std::string>
+  ClangComponent("clang-component",
+                 cl::desc("Only use warnings from specified component"),
+                 cl::value_desc("component"), cl::Hidden);
+}
+
+
+static SourceMgr SrcMgr;
+
+void llvm::PrintError(SMLoc ErrorLoc, const Twine &Msg) {
+  SrcMgr.PrintMessage(ErrorLoc, Msg, "error");
+}
+
+
+
+/// ParseFile - this function begins the parsing of the specified tablegen
+/// file.
+static bool ParseFile(const std::string &Filename,
+                      const std::vector<std::string> &IncludeDirs,
+                      SourceMgr &SrcMgr,
+                      RecordKeeper &Records) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) {
+    errs() << "Could not open input file '" << Filename << "': "
+           << ec.message() <<"\n";
+    return true;
+  }
+  MemoryBuffer *F = File.take();
+
+  // Tell SrcMgr about this buffer, which is what TGParser will pick up.
+  SrcMgr.AddNewSourceBuffer(F, SMLoc());
+
+  // Record the location of the include directory so that the lexer can find
+  // it later.
+  SrcMgr.setIncludeDirs(IncludeDirs);
+
+  TGParser Parser(SrcMgr, Records);
+
+  return Parser.ParseFile();
+}
+
+int main(int argc, char **argv) {
+  RecordKeeper Records;
+
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  cl::ParseCommandLineOptions(argc, argv);
+
+
+  // Parse the input file.
+  if (ParseFile(InputFilename, IncludeDirs, SrcMgr, Records))
+    return 1;
+
+  std::string Error;
+  tool_output_file Out(OutputFilename.c_str(), Error);
+  if (!Error.empty()) {
+    errs() << argv[0] << ": error opening " << OutputFilename
+           << ":" << Error << "\n";
+    return 1;
+  }
+
+  try {
+    switch (Action) {
+    case PrintRecords:
+      Out.os() << Records;           // No argument, dump all contents
+      break;
+    case GenEmitter:
+      CodeEmitterGen(Records).run(Out.os());
+      break;
+
+    case GenRegisterEnums:
+      RegisterInfoEmitter(Records).runEnums(Out.os());
+      break;
+    case GenRegister:
+      RegisterInfoEmitter(Records).run(Out.os());
+      break;
+    case GenRegisterHeader:
+      RegisterInfoEmitter(Records).runHeader(Out.os());
+      break;
+    case GenInstrEnums:
+      InstrEnumEmitter(Records).run(Out.os());
+      break;
+    case GenInstrs:
+      InstrInfoEmitter(Records).run(Out.os());
+      break;
+    case GenCallingConv:
+      CallingConvEmitter(Records).run(Out.os());
+      break;
+    case GenAsmWriter:
+      AsmWriterEmitter(Records).run(Out.os());
+      break;
+    case GenARMDecoder:
+      ARMDecoderEmitter(Records).run(Out.os());
+      break;
+    case GenAsmMatcher:
+      AsmMatcherEmitter(Records).run(Out.os());
+      break;
+    case GenClangAttrClasses:
+      ClangAttrClassEmitter(Records).run(Out.os());
+      break;
+    case GenClangAttrImpl:
+      ClangAttrImplEmitter(Records).run(Out.os());
+      break;
+    case GenClangAttrList:
+      ClangAttrListEmitter(Records).run(Out.os());
+      break;
+    case GenClangAttrPCHRead:
+      ClangAttrPCHReadEmitter(Records).run(Out.os());
+      break;
+    case GenClangAttrPCHWrite:
+      ClangAttrPCHWriteEmitter(Records).run(Out.os());
+      break;
+    case GenClangAttrSpellingList:
+      ClangAttrSpellingListEmitter(Records).run(Out.os());
+      break;
+    case GenClangDiagsDefs:
+      ClangDiagsDefsEmitter(Records, ClangComponent).run(Out.os());
+      break;
+    case GenClangDiagGroups:
+      ClangDiagGroupsEmitter(Records).run(Out.os());
+      break;
+    case GenClangDeclNodes:
+      ClangASTNodesEmitter(Records, "Decl", "Decl").run(Out.os());
+      ClangDeclContextEmitter(Records).run(Out.os());
+      break;
+    case GenClangStmtNodes:
+      ClangASTNodesEmitter(Records, "Stmt", "").run(Out.os());
+      break;
+    case GenClangSACheckers:
+      ClangSACheckersEmitter(Records).run(Out.os());
+      break;
+    case GenDisassembler:
+      DisassemblerEmitter(Records).run(Out.os());
+      break;
+    case GenOptParserDefs:
+      OptParserEmitter(Records, true).run(Out.os());
+      break;
+    case GenOptParserImpl:
+      OptParserEmitter(Records, false).run(Out.os());
+      break;
+    case GenDAGISel:
+      DAGISelEmitter(Records).run(Out.os());
+      break;
+    case GenFastISel:
+      FastISelEmitter(Records).run(Out.os());
+      break;
+    case GenSubtarget:
+      SubtargetEmitter(Records).run(Out.os());
+      break;
+    case GenIntrinsic:
+      IntrinsicEmitter(Records).run(Out.os());
+      break;
+    case GenTgtIntrinsic:
+      IntrinsicEmitter(Records, true).run(Out.os());
+      break;
+    case GenLLVMCConf:
+      LLVMCConfigurationEmitter(Records).run(Out.os());
+      break;
+    case GenEDInfo:
+      EDEmitter(Records).run(Out.os());
+      break;
+    case GenArmNeon:
+      NeonEmitter(Records).run(Out.os());
+      break;
+    case GenArmNeonSema:
+      NeonEmitter(Records).runHeader(Out.os());
+      break;
+    case GenArmNeonTest:
+      NeonEmitter(Records).runTests(Out.os());
+      break;
+    case PrintEnums:
+    {
+      std::vector<Record*> Recs = Records.getAllDerivedDefinitions(Class);
+      for (unsigned i = 0, e = Recs.size(); i != e; ++i)
+        Out.os() << Recs[i]->getName() << ", ";
+      Out.os() << "\n";
+      break;
+    }
+    default:
+      assert(1 && "Invalid Action");
+      return 1;
+    }
+
+    // Declare success.
+    Out.keep();
+    return 0;
+
+  } catch (const TGError &Error) {
+    errs() << argv[0] << ": error:\n";
+    PrintError(Error.getLoc(), Error.getMessage());
+
+  } catch (const std::string &Error) {
+    errs() << argv[0] << ": " << Error << "\n";
+  } catch (const char *Error) {
+    errs() << argv[0] << ": " << Error << "\n";
+  } catch (...) {
+    errs() << argv[0] << ": Unknown unexpected exception occurred.\n";
+  }
+
+  return 1;
+}
diff --git a/final/utils/TableGen/TableGenBackend.cpp b/final/utils/TableGen/TableGenBackend.cpp
new file mode 100644
index 00000000000..b3e33b5f9c2
--- /dev/null
+++ b/final/utils/TableGen/TableGenBackend.cpp
@@ -0,0 +1,25 @@
+//===- TableGenBackend.cpp - Base class for TableGen Backends ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides useful services for TableGen backends...
+//
+//===----------------------------------------------------------------------===//
+
+#include "TableGenBackend.h"
+#include "Record.h"
+using namespace llvm;
+
+void TableGenBackend::EmitSourceFileHeader(const std::string &Desc,
+                                           raw_ostream &OS) const {
+  OS << "//===- TableGen'erated file -------------------------------------*-"
+       " C++ -*-===//\n//\n// " << Desc << "\n//\n// Automatically generate"
+       "d file, do not edit!\n//\n//===------------------------------------"
+       "----------------------------------===//\n\n";
+}
+
diff --git a/final/utils/TableGen/TableGenBackend.h b/final/utils/TableGen/TableGenBackend.h
new file mode 100644
index 00000000000..9c2b948b0df
--- /dev/null
+++ b/final/utils/TableGen/TableGenBackend.h
@@ -0,0 +1,43 @@
+//===- TableGenBackend.h - Base class for TableGen Backends -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The TableGenBackend class is provided as a common interface for all TableGen
+// backends.  It provides useful services and an standardized interface.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TABLEGENBACKEND_H
+#define TABLEGENBACKEND_H
+
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+
+namespace llvm {
+
+class Record;
+class RecordKeeper;
+
+struct TableGenBackend {
+  virtual ~TableGenBackend() {}
+
+  // run - All TableGen backends should implement the run method, which should
+  // be the main entry point.
+  virtual void run(raw_ostream &OS) = 0;
+
+
+public:   // Useful helper routines...
+  /// EmitSourceFileHeader - Output a LLVM style file header to the specified
+  /// ostream.
+  void EmitSourceFileHeader(const std::string &Desc, raw_ostream &OS) const;
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/final/utils/TableGen/X86DisassemblerShared.h b/final/utils/TableGen/X86DisassemblerShared.h
new file mode 100644
index 00000000000..0417e9dece4
--- /dev/null
+++ b/final/utils/TableGen/X86DisassemblerShared.h
@@ -0,0 +1,38 @@
+//===- X86DisassemblerShared.h - Emitter shared header ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86DISASSEMBLERSHARED_H
+#define X86DISASSEMBLERSHARED_H
+
+#include <string>
+#include <string.h>
+
+#define INSTRUCTION_SPECIFIER_FIELDS       \
+  bool                    filtered;        \
+  InstructionContext      insnContext;     \
+  std::string             name;            \
+                                           \
+  InstructionSpecifier() {                 \
+    filtered = false;                      \
+    insnContext = IC;                      \
+    name = "";                             \
+    modifierType = MODIFIER_NONE;          \
+    modifierBase = 0;                      \
+    memset(operands, 0, sizeof(operands)); \
+  }
+
+#define INSTRUCTION_IDS           \
+  InstrUID   instructionIDs[256];
+
+#include "../../lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h"
+
+#undef INSTRUCTION_SPECIFIER_FIELDS
+#undef INSTRUCTION_IDS
+
+#endif
diff --git a/final/utils/TableGen/X86DisassemblerTables.cpp b/final/utils/TableGen/X86DisassemblerTables.cpp
new file mode 100644
index 00000000000..94797f55f71
--- /dev/null
+++ b/final/utils/TableGen/X86DisassemblerTables.cpp
@@ -0,0 +1,601 @@
+//===- X86DisassemblerTables.cpp - Disassembler tables ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler Emitter.
+// It contains the implementation of the disassembler tables.
+// Documentation for the disassembler emitter in general can be found in
+//  X86DisasemblerEmitter.h.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86DisassemblerShared.h"
+#include "X86DisassemblerTables.h"
+
+#include "TableGenBackend.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+using namespace X86Disassembler;
+  
+/// inheritsFrom - Indicates whether all instructions in one class also belong
+///   to another class.
+///
+/// @param child  - The class that may be the subset
+/// @param parent - The class that may be the superset
+/// @return       - True if child is a subset of parent, false otherwise.
+static inline bool inheritsFrom(InstructionContext child,
+                                InstructionContext parent) {
+  if (child == parent)
+    return true;
+  
+  switch (parent) {
+  case IC:
+    return true;
+  case IC_64BIT:
+    return(inheritsFrom(child, IC_64BIT_REXW)   ||
+           inheritsFrom(child, IC_64BIT_OPSIZE) ||
+           inheritsFrom(child, IC_64BIT_XD)     ||
+           inheritsFrom(child, IC_64BIT_XS));
+  case IC_OPSIZE:
+    return(inheritsFrom(child, IC_64BIT_OPSIZE));
+  case IC_XD:
+    return(inheritsFrom(child, IC_64BIT_XD));
+  case IC_XS:
+    return(inheritsFrom(child, IC_64BIT_XS));
+  case IC_64BIT_REXW:
+    return(inheritsFrom(child, IC_64BIT_REXW_XS) ||
+           inheritsFrom(child, IC_64BIT_REXW_XD) ||
+           inheritsFrom(child, IC_64BIT_REXW_OPSIZE));
+  case IC_64BIT_OPSIZE:
+    return(inheritsFrom(child, IC_64BIT_REXW_OPSIZE));
+  case IC_64BIT_XD:
+    return(inheritsFrom(child, IC_64BIT_REXW_XD));
+  case IC_64BIT_XS:
+    return(inheritsFrom(child, IC_64BIT_REXW_XS));
+  case IC_64BIT_REXW_XD:
+    return false;
+  case IC_64BIT_REXW_XS:
+    return false;
+  case IC_64BIT_REXW_OPSIZE:
+    return false;
+  default:
+    return false;
+  }
+}
+
+/// outranks - Indicates whether, if an instruction has two different applicable
+///   classes, which class should be preferred when performing decode.  This
+///   imposes a total ordering (ties are resolved toward "lower")
+///
+/// @param upper  - The class that may be preferable
+/// @param lower  - The class that may be less preferable
+/// @return       - True if upper is to be preferred, false otherwise.
+static inline bool outranks(InstructionContext upper, 
+                            InstructionContext lower) {
+  assert(upper < IC_max);
+  assert(lower < IC_max);
+  
+#define ENUM_ENTRY(n, r, d) r,
+  static int ranks[IC_max] = {
+    INSTRUCTION_CONTEXTS
+  };
+#undef ENUM_ENTRY
+  
+  return (ranks[upper] > ranks[lower]);
+}
+
+/// stringForContext - Returns a string containing the name of a particular
+///   InstructionContext, usually for diagnostic purposes.
+///
+/// @param insnContext  - The instruction class to transform to a string.
+/// @return           - A statically-allocated string constant that contains the
+///                     name of the instruction class.
+static inline const char* stringForContext(InstructionContext insnContext) {
+  switch (insnContext) {
+  default:
+    llvm_unreachable("Unhandled instruction class");
+#define ENUM_ENTRY(n, r, d)   case n: return #n; break;
+  INSTRUCTION_CONTEXTS
+#undef ENUM_ENTRY
+  }
+
+  return 0;
+}
+
+/// stringForOperandType - Like stringForContext, but for OperandTypes.
+static inline const char* stringForOperandType(OperandType type) {
+  switch (type) {
+  default:
+    llvm_unreachable("Unhandled type");
+#define ENUM_ENTRY(i, d) case i: return #i;
+  TYPES
+#undef ENUM_ENTRY
+  }
+}
+
+/// stringForOperandEncoding - like stringForContext, but for
+///   OperandEncodings.
+static inline const char* stringForOperandEncoding(OperandEncoding encoding) {
+  switch (encoding) {
+  default:
+    llvm_unreachable("Unhandled encoding");
+#define ENUM_ENTRY(i, d) case i: return #i;
+  ENCODINGS
+#undef ENUM_ENTRY
+  }
+}
+
+void DisassemblerTables::emitOneID(raw_ostream &o,
+                                   uint32_t &i,
+                                   InstrUID id,
+                                   bool addComma) const {
+  if (id)
+    o.indent(i * 2) << format("0x%hx", id);
+  else
+    o.indent(i * 2) << 0;
+  
+  if (addComma)
+    o << ", ";
+  else
+    o << "  ";
+  
+  o << "/* ";
+  o << InstructionSpecifiers[id].name;
+  o << "*/";
+  
+  o << "\n";
+}
+
+/// emitEmptyTable - Emits the modRMEmptyTable, which is used as a ID table by
+///   all ModR/M decisions for instructions that are invalid for all possible
+///   ModR/M byte values.
+///
+/// @param o        - The output stream on which to emit the table.
+/// @param i        - The indentation level for that output stream.
+static void emitEmptyTable(raw_ostream &o, uint32_t &i)
+{
+  o.indent(i * 2) << "static const InstrUID modRMEmptyTable[1] = { 0 };\n";
+  o << "\n";
+}
+
+/// getDecisionType - Determines whether a ModRM decision with 255 entries can
+///   be compacted by eliminating redundant information.
+///
+/// @param decision - The decision to be compacted.
+/// @return         - The compactest available representation for the decision.
+static ModRMDecisionType getDecisionType(ModRMDecision &decision)
+{
+  bool satisfiesOneEntry = true;
+  bool satisfiesSplitRM = true;
+  
+  uint16_t index;
+  
+  for (index = 0; index < 256; ++index) {
+    if (decision.instructionIDs[index] != decision.instructionIDs[0])
+      satisfiesOneEntry = false;
+    
+    if (((index & 0xc0) == 0xc0) &&
+       (decision.instructionIDs[index] != decision.instructionIDs[0xc0]))
+      satisfiesSplitRM = false;
+    
+    if (((index & 0xc0) != 0xc0) &&
+       (decision.instructionIDs[index] != decision.instructionIDs[0x00]))
+      satisfiesSplitRM = false;
+  }
+  
+  if (satisfiesOneEntry)
+    return MODRM_ONEENTRY;
+  
+  if (satisfiesSplitRM)
+    return MODRM_SPLITRM;
+  
+  return MODRM_FULL;
+}
+
+/// stringForDecisionType - Returns a statically-allocated string corresponding
+///   to a particular decision type.
+///
+/// @param dt - The decision type.
+/// @return   - A pointer to the statically-allocated string (e.g., 
+///             "MODRM_ONEENTRY" for MODRM_ONEENTRY).
+static const char* stringForDecisionType(ModRMDecisionType dt)
+{
+#define ENUM_ENTRY(n) case n: return #n;
+  switch (dt) {
+    default:
+      llvm_unreachable("Unknown decision type");  
+    MODRMTYPES
+  };  
+#undef ENUM_ENTRY
+}
+  
+/// stringForModifierType - Returns a statically-allocated string corresponding
+///   to an opcode modifier type.
+///
+/// @param mt - The modifier type.
+/// @return   - A pointer to the statically-allocated string (e.g.,
+///             "MODIFIER_NONE" for MODIFIER_NONE).
+static const char* stringForModifierType(ModifierType mt)
+{
+#define ENUM_ENTRY(n) case n: return #n;
+  switch(mt) {
+    default:
+      llvm_unreachable("Unknown modifier type");
+    MODIFIER_TYPES
+  };
+#undef ENUM_ENTRY
+}
+  
+DisassemblerTables::DisassemblerTables() {
+  unsigned i;
+  
+  for (i = 0; i < 4; i++) {
+    Tables[i] = new ContextDecision;
+    memset(Tables[i], 0, sizeof(ContextDecision));
+  }
+  
+  HasConflicts = false;
+}
+  
+DisassemblerTables::~DisassemblerTables() {
+  unsigned i;
+  
+  for (i = 0; i < 4; i++)
+    delete Tables[i];
+}
+  
+void DisassemblerTables::emitModRMDecision(raw_ostream &o1,
+                                           raw_ostream &o2,
+                                           uint32_t &i1,
+                                           uint32_t &i2,
+                                           ModRMDecision &decision)
+  const {
+  static uint64_t sTableNumber = 0;
+  uint64_t thisTableNumber = sTableNumber;
+  ModRMDecisionType dt = getDecisionType(decision);
+  uint16_t index;
+  
+  if (dt == MODRM_ONEENTRY && decision.instructionIDs[0] == 0)
+  {
+    o2.indent(i2) << "{ /* ModRMDecision */" << "\n";
+    i2++;
+    
+    o2.indent(i2) << stringForDecisionType(dt) << "," << "\n";
+    o2.indent(i2) << "modRMEmptyTable";
+    
+    i2--;
+    o2.indent(i2) << "}";
+    return;
+  }
+    
+  o1.indent(i1) << "static const InstrUID modRMTable" << thisTableNumber;
+    
+  switch (dt) {
+    default:
+      llvm_unreachable("Unknown decision type");
+    case MODRM_ONEENTRY:
+      o1 << "[1]";
+      break;
+    case MODRM_SPLITRM:
+      o1 << "[2]";
+      break;
+    case MODRM_FULL:
+      o1 << "[256]";
+      break;      
+  }
+
+  o1 << " = {" << "\n";
+  i1++;
+    
+  switch (dt) {
+    default:
+      llvm_unreachable("Unknown decision type");
+    case MODRM_ONEENTRY:
+      emitOneID(o1, i1, decision.instructionIDs[0], false);
+      break;
+    case MODRM_SPLITRM:
+      emitOneID(o1, i1, decision.instructionIDs[0x00], true); // mod = 0b00
+      emitOneID(o1, i1, decision.instructionIDs[0xc0], false); // mod = 0b11
+      break;
+    case MODRM_FULL:
+      for (index = 0; index < 256; ++index)
+        emitOneID(o1, i1, decision.instructionIDs[index], index < 255);
+      break;
+  }
+    
+  i1--;
+  o1.indent(i1) << "};" << "\n";
+  o1 << "\n";
+    
+  o2.indent(i2) << "{ /* struct ModRMDecision */" << "\n";
+  i2++;
+    
+  o2.indent(i2) << stringForDecisionType(dt) << "," << "\n";
+  o2.indent(i2) << "modRMTable" << sTableNumber << "\n";
+    
+  i2--;
+  o2.indent(i2) << "}";
+    
+  ++sTableNumber;
+}
+
+void DisassemblerTables::emitOpcodeDecision(
+  raw_ostream &o1,
+  raw_ostream &o2,
+  uint32_t &i1,
+  uint32_t &i2,
+  OpcodeDecision &decision) const {
+  uint16_t index;
+
+  o2.indent(i2) << "{ /* struct OpcodeDecision */" << "\n";
+  i2++;
+  o2.indent(i2) << "{" << "\n";
+  i2++;
+
+  for (index = 0; index < 256; ++index) {
+    o2.indent(i2);
+
+    o2 << "/* 0x" << format("%02hhx", index) << " */" << "\n";
+
+    emitModRMDecision(o1, o2, i1, i2, decision.modRMDecisions[index]);
+
+    if (index <  255)
+      o2 << ",";
+
+    o2 << "\n";
+  }
+
+  i2--;
+  o2.indent(i2) << "}" << "\n";
+  i2--;
+  o2.indent(i2) << "}" << "\n";
+}
+
+void DisassemblerTables::emitContextDecision(
+  raw_ostream &o1,
+  raw_ostream &o2,
+  uint32_t &i1,
+  uint32_t &i2,
+  ContextDecision &decision,
+  const char* name) const {
+  o2.indent(i2) << "static const struct ContextDecision " << name << " = {\n";
+  i2++;
+  o2.indent(i2) << "{ /* opcodeDecisions */" << "\n";
+  i2++;
+
+  unsigned index;
+
+  for (index = 0; index < IC_max; ++index) {
+    o2.indent(i2) << "/* ";
+    o2 << stringForContext((InstructionContext)index);
+    o2 << " */";
+    o2 << "\n";
+
+    emitOpcodeDecision(o1, o2, i1, i2, decision.opcodeDecisions[index]);
+
+    if (index + 1 < IC_max)
+      o2 << ", ";
+  }
+
+  i2--;
+  o2.indent(i2) << "}" << "\n";
+  i2--;
+  o2.indent(i2) << "};" << "\n";
+}
+
+void DisassemblerTables::emitInstructionInfo(raw_ostream &o, uint32_t &i) 
+  const {
+  o.indent(i * 2) << "static const struct InstructionSpecifier ";
+  o << INSTRUCTIONS_STR "[" << InstructionSpecifiers.size() << "] = {\n";
+  
+  i++;
+
+  uint16_t numInstructions = InstructionSpecifiers.size();
+  uint16_t index, operandIndex;
+
+  for (index = 0; index < numInstructions; ++index) {
+    o.indent(i * 2) << "{ /* " << index << " */" << "\n";
+    i++;
+    
+    o.indent(i * 2) << 
+      stringForModifierType(InstructionSpecifiers[index].modifierType);
+    o << "," << "\n";
+    
+    o.indent(i * 2) << "0x";
+    o << format("%02hhx", (uint16_t)InstructionSpecifiers[index].modifierBase);
+    o << "," << "\n";
+
+    o.indent(i * 2) << "{" << "\n";
+    i++;
+
+    for (operandIndex = 0; operandIndex < X86_MAX_OPERANDS; ++operandIndex) {
+      o.indent(i * 2) << "{ ";
+      o << stringForOperandEncoding(InstructionSpecifiers[index]
+                                    .operands[operandIndex]
+                                    .encoding);
+      o << ", ";
+      o << stringForOperandType(InstructionSpecifiers[index]
+                                .operands[operandIndex]
+                                .type);
+      o << " }";
+
+      if (operandIndex < X86_MAX_OPERANDS - 1)
+        o << ",";
+
+      o << "\n";
+    }
+
+    i--;
+    o.indent(i * 2) << "}," << "\n";
+    
+    o.indent(i * 2) << "\"" << InstructionSpecifiers[index].name << "\"";
+    o << "\n";
+
+    i--;
+    o.indent(i * 2) << "}";
+
+    if (index + 1 < numInstructions)
+      o << ",";
+
+    o << "\n";
+  }
+
+  i--;
+  o.indent(i * 2) << "};" << "\n";
+}
+
+void DisassemblerTables::emitContextTable(raw_ostream &o, uint32_t &i) const {
+  uint16_t index;
+
+  o.indent(i * 2) << "static const InstructionContext " CONTEXTS_STR
+                     "[256] = {\n";
+  i++;
+
+  for (index = 0; index < 256; ++index) {
+    o.indent(i * 2);
+
+    if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XS))
+      o << "IC_64BIT_REXW_XS";
+    else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && (index & ATTR_XD))
+      o << "IC_64BIT_REXW_XD";
+    else if ((index & ATTR_64BIT) && (index & ATTR_REXW) && 
+             (index & ATTR_OPSIZE))
+      o << "IC_64BIT_REXW_OPSIZE";
+    else if ((index & ATTR_64BIT) && (index & ATTR_XS))
+      o << "IC_64BIT_XS";
+    else if ((index & ATTR_64BIT) && (index & ATTR_XD))
+      o << "IC_64BIT_XD";
+    else if ((index & ATTR_64BIT) && (index & ATTR_OPSIZE))
+      o << "IC_64BIT_OPSIZE";
+    else if ((index & ATTR_64BIT) && (index & ATTR_REXW))
+      o << "IC_64BIT_REXW";
+    else if ((index & ATTR_64BIT))
+      o << "IC_64BIT";
+    else if (index & ATTR_XS)
+      o << "IC_XS";
+    else if (index & ATTR_XD)
+      o << "IC_XD";
+    else if (index & ATTR_OPSIZE)
+      o << "IC_OPSIZE";
+    else
+      o << "IC";
+
+    if (index < 255)
+      o << ",";
+    else
+      o << " ";
+
+    o << " /* " << index << " */";
+
+    o << "\n";
+  }
+
+  i--;
+  o.indent(i * 2) << "};" << "\n";
+}
+
+void DisassemblerTables::emitContextDecisions(raw_ostream &o1,
+                                            raw_ostream &o2,
+                                            uint32_t &i1,
+                                            uint32_t &i2)
+  const {
+  emitContextDecision(o1, o2, i1, i2, *Tables[0], ONEBYTE_STR);
+  emitContextDecision(o1, o2, i1, i2, *Tables[1], TWOBYTE_STR);
+  emitContextDecision(o1, o2, i1, i2, *Tables[2], THREEBYTE38_STR);
+  emitContextDecision(o1, o2, i1, i2, *Tables[3], THREEBYTE3A_STR);
+}
+
+void DisassemblerTables::emit(raw_ostream &o) const {
+  uint32_t i1 = 0;
+  uint32_t i2 = 0;
+  
+  std::string s1;
+  std::string s2;
+  
+  raw_string_ostream o1(s1);
+  raw_string_ostream o2(s2);
+  
+  emitInstructionInfo(o, i2);
+  o << "\n";
+
+  emitContextTable(o, i2);
+  o << "\n";
+  
+  emitEmptyTable(o1, i1);
+  emitContextDecisions(o1, o2, i1, i2);
+  
+  o << o1.str();
+  o << "\n";
+  o << o2.str();
+  o << "\n";
+  o << "\n";
+}
+
+void DisassemblerTables::setTableFields(ModRMDecision     &decision,
+                                        const ModRMFilter &filter,
+                                        InstrUID          uid,
+                                        uint8_t           opcode) {
+  unsigned index;
+
+  for (index = 0; index < 256; ++index) {
+    if (filter.accepts(index)) {
+      if (decision.instructionIDs[index] == uid)
+        continue;
+
+      if (decision.instructionIDs[index] != 0) {
+        InstructionSpecifier &newInfo =
+          InstructionSpecifiers[uid];
+        InstructionSpecifier &previousInfo =
+          InstructionSpecifiers[decision.instructionIDs[index]];
+        
+        if(newInfo.filtered)
+          continue; // filtered instructions get lowest priority
+        
+        if(previousInfo.name == "NOOP")
+          continue; // special case for XCHG32ar and NOOP
+
+        if (outranks(previousInfo.insnContext, newInfo.insnContext))
+          continue;
+        
+        if (previousInfo.insnContext == newInfo.insnContext &&
+            !previousInfo.filtered) {
+          errs() << "Error: Primary decode conflict: ";
+          errs() << newInfo.name << " would overwrite " << previousInfo.name;
+          errs() << "\n";
+          errs() << "ModRM   " << index << "\n";
+          errs() << "Opcode  " << (uint16_t)opcode << "\n";
+          errs() << "Context " << stringForContext(newInfo.insnContext) << "\n";
+          HasConflicts = true;
+        }
+      }
+
+      decision.instructionIDs[index] = uid;
+    }
+  }
+}
+
+void DisassemblerTables::setTableFields(OpcodeType          type,
+                                        InstructionContext  insnContext,
+                                        uint8_t             opcode,
+                                        const ModRMFilter   &filter,
+                                        InstrUID            uid) {
+  unsigned index;
+  
+  ContextDecision &decision = *Tables[type];
+
+  for (index = 0; index < IC_max; ++index) {
+    if (inheritsFrom((InstructionContext)index, 
+                     InstructionSpecifiers[uid].insnContext))
+      setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode], 
+                     filter,
+                     uid,
+                     opcode);
+  }
+}
diff --git a/final/utils/TableGen/X86DisassemblerTables.h b/final/utils/TableGen/X86DisassemblerTables.h
new file mode 100644
index 00000000000..08eba019c09
--- /dev/null
+++ b/final/utils/TableGen/X86DisassemblerTables.h
@@ -0,0 +1,291 @@
+//===- X86DisassemblerTables.h - Disassembler tables ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler Emitter.
+// It contains the interface of the disassembler tables.
+// Documentation for the disassembler emitter in general can be found in
+//  X86DisasemblerEmitter.h.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86DISASSEMBLERTABLES_H
+#define X86DISASSEMBLERTABLES_H
+
+#include "X86DisassemblerShared.h"
+#include "X86ModRMFilters.h"
+
+#include "llvm/Support/raw_ostream.h"
+
+#include <vector>
+
+namespace llvm {
+
+namespace X86Disassembler {
+
+/// DisassemblerTables - Encapsulates all the decode tables being generated by
+///   the table emitter.  Contains functions to populate the tables as well as
+///   to emit them as hierarchical C structures suitable for consumption by the
+///   runtime.
+class DisassemblerTables {
+private:
+  /// The decoder tables.  There is one for each opcode type:
+  /// [0] one-byte opcodes
+  /// [1] two-byte opcodes of the form 0f __
+  /// [2] three-byte opcodes of the form 0f 38 __
+  /// [3] three-byte opcodes of the form 0f 3a __
+  ContextDecision* Tables[4];
+  
+  /// The instruction information table
+  std::vector<InstructionSpecifier> InstructionSpecifiers;
+  
+  /// True if there are primary decode conflicts in the instruction set
+  bool HasConflicts;
+  
+  /// emitOneID - Emits a table entry for a single instruction entry, at the
+  ///   innermost level of the structure hierarchy.  The entry is printed out
+  ///   in the format "nnnn, /* MNEMONIC */" where nnnn is the ID in decimal,
+  ///   the comma is printed if addComma is true, and the menonic is the name
+  ///   of the instruction as listed in the LLVM tables.
+  ///
+  /// @param o        - The output stream to print the entry on.
+  /// @param i        - The indentation level for o.
+  /// @param id       - The unique ID of the instruction to print.
+  /// @param addComma - Whether or not to print a comma after the ID.  True if
+  ///                    additional items will follow.
+  void emitOneID(raw_ostream &o,
+                 uint32_t &i,
+                 InstrUID id,
+                 bool addComma) const;
+  
+  /// emitModRMDecision - Emits a table of entries corresponding to a single
+  ///   ModR/M decision.  Compacts the ModR/M decision if possible.  ModR/M
+  ///   decisions are printed as:
+  ///
+  ///   { /* struct ModRMDecision */
+  ///     TYPE,
+  ///     modRMTablennnn
+  ///   }
+  ///
+  ///   where nnnn is a unique ID for the corresponding table of IDs.
+  ///   TYPE indicates whether the table has one entry that is the same
+  ///   regardless of ModR/M byte, two entries - one for bytes 0x00-0xbf and one
+  ///   for bytes 0xc0-0xff -, or 256 entries, one for each possible byte.  
+  ///   nnnn is the number of a table for looking up these values.  The tables
+  ///   are writen separately so that tables consisting entirely of zeros will
+  ///   not be duplicated.  (These all have the name modRMEmptyTable.)  A table
+  ///   is printed as:
+  ///   
+  ///   InstrUID modRMTablennnn[k] = {
+  ///     nnnn, /* MNEMONIC */
+  ///     ...
+  ///     nnnn /* MNEMONIC */
+  ///   };
+  ///
+  /// @param o1       - The output stream to print the ID table to.
+  /// @param o2       - The output stream to print the decision structure to.
+  /// @param i1       - The indentation level to use with stream o1.
+  /// @param i2       - The indentation level to use with stream o2.
+  /// @param decision - The ModR/M decision to emit.  This decision has 256
+  ///                   entries - emitModRMDecision decides how to compact it.
+  void emitModRMDecision(raw_ostream &o1,
+                         raw_ostream &o2,
+                         uint32_t &i1,
+                         uint32_t &i2,
+                         ModRMDecision &decision) const;
+  
+  /// emitOpcodeDecision - Emits an OpcodeDecision and all its subsidiary ModR/M
+  ///   decisions.  An OpcodeDecision is printed as:
+  ///
+  ///   { /* struct OpcodeDecision */
+  ///     /* 0x00 */
+  ///     { /* struct ModRMDecision */
+  ///       ...
+  ///     }
+  ///     ...
+  ///   }
+  ///
+  ///   where the ModRMDecision structure is printed as described in the
+  ///   documentation for emitModRMDecision().  emitOpcodeDecision() passes on a
+  ///   stream and indent level for the UID tables generated by
+  ///   emitModRMDecision(), but does not use them itself.
+  ///
+  /// @param o1       - The output stream to print the ID tables generated by
+  ///                   emitModRMDecision() to.
+  /// @param o2       - The output stream for the decision structure itself.
+  /// @param i1       - The indent level to use with stream o1.
+  /// @param i2       - The indent level to use with stream o2.
+  /// @param decision - The OpcodeDecision to emit along with its subsidiary
+  ///                    structures.
+  void emitOpcodeDecision(raw_ostream &o1,
+                          raw_ostream &o2,
+                          uint32_t &i1,
+                          uint32_t &i2,
+                          OpcodeDecision &decision) const;
+  
+  /// emitContextDecision - Emits a ContextDecision and all its subsidiary 
+  ///   Opcode and ModRMDecisions.  A ContextDecision is printed as:
+  ///
+  ///   struct ContextDecision NAME = {
+  ///     { /* OpcodeDecisions */
+  ///       /* IC */
+  ///       { /* struct OpcodeDecision */
+  ///         ...
+  ///       },
+  ///       ...
+  ///     }
+  ///   }
+  ///
+  ///   NAME is the name of the ContextDecision (typically one of the four names 
+  ///   ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, and THREEBYTE3A_SYM from
+  ///   X86DisassemblerDecoderCommon.h).
+  ///   IC is one of the contexts in InstructionContext.  There is an opcode
+  ///   decision for each possible context.
+  ///   The OpcodeDecision structures are printed as described in the
+  ///   documentation for emitOpcodeDecision.
+  ///
+  /// @param o1       - The output stream to print the ID tables generated by
+  ///                   emitModRMDecision() to.
+  /// @param o2       - The output stream to print the decision structure to.
+  /// @param i1       - The indent level to use with stream o1.
+  /// @param i2       - The indent level to use with stream o2.
+  /// @param decision - The ContextDecision to emit along with its subsidiary
+  ///                   structures.
+  /// @param name     - The name for the ContextDecision.
+  void emitContextDecision(raw_ostream &o1,
+                           raw_ostream &o2,
+                           uint32_t &i1,
+                           uint32_t &i2,                           
+                           ContextDecision &decision,
+                           const char* name) const;
+  
+  /// emitInstructionInfo - Prints the instruction specifier table, which has
+  ///   one entry for each instruction, and contains name and operand
+  ///   information.  This table is printed as:
+  ///
+  ///   struct InstructionSpecifier CONTEXTS_SYM[k] = {
+  ///     {
+  ///       /* nnnn */
+  ///       "MNEMONIC",
+  ///       0xnn,
+  ///       {
+  ///         {
+  ///           ENCODING,
+  ///           TYPE
+  ///         },
+  ///         ...
+  ///       }
+  ///     },
+  ///   };
+  ///
+  ///   k is the total number of instructions.
+  ///   nnnn is the ID of the current instruction (0-based).  This table 
+  ///   includes entries for non-instructions like PHINODE.
+  ///   0xnn is the lowest possible opcode for the current instruction, used for
+  ///   AddRegFrm instructions to compute the operand's value.
+  ///   ENCODING and TYPE describe the encoding and type for a single operand.
+  ///
+  /// @param o  - The output stream to which the instruction table should be 
+  ///             written.
+  /// @param i  - The indent level for use with the stream.
+  void emitInstructionInfo(raw_ostream &o, uint32_t &i) const;
+  
+  /// emitContextTable - Prints the table that is used to translate from an
+  ///   instruction attribute mask to an instruction context.  This table is
+  ///   printed as:
+  ///
+  ///   InstructionContext CONTEXTS_STR[256] = {
+  ///     IC, /* 0x00 */
+  ///     ...
+  ///   };
+  ///
+  ///   IC is the context corresponding to the mask 0x00, and there are 256
+  ///   possible masks.
+  ///
+  /// @param o  - The output stream to which the context table should be written.
+  /// @param i  - The indent level for use with the stream.
+  void emitContextTable(raw_ostream &o, uint32_t &i) const;
+  
+  /// emitContextDecisions - Prints all four ContextDecision structures using
+  ///   emitContextDecision().
+  ///
+  /// @param o1 - The output stream to print the ID tables generated by
+  ///             emitModRMDecision() to.
+  /// @param o2 - The output stream to print the decision structures to.
+  /// @param i1 - The indent level to use with stream o1.
+  /// @param i2 - The indent level to use with stream o2.
+  void emitContextDecisions(raw_ostream &o1,
+                            raw_ostream &o2,
+                            uint32_t &i1,
+                            uint32_t &i2) const; 
+
+  /// setTableFields - Uses a ModRMFilter to set the appropriate entries in a
+  ///   ModRMDecision to refer to a particular instruction ID.
+  ///
+  /// @param decision - The ModRMDecision to populate.
+  /// @param filter   - The filter to use in deciding which entries to populate.
+  /// @param uid      - The unique ID to set matching entries to.
+  /// @param opcode   - The opcode of the instruction, for error reporting.
+  void setTableFields(ModRMDecision &decision,
+                      const ModRMFilter &filter,
+                      InstrUID uid,
+                      uint8_t opcode);
+public:
+  /// Constructor - Allocates space for the class decisions and clears them.
+  DisassemblerTables();
+  
+  ~DisassemblerTables();
+  
+  /// emit - Emits the instruction table, context table, and class decisions.
+  ///
+  /// @param o  - The output stream to print the tables to.
+  void emit(raw_ostream &o) const;
+  
+  /// setTableFields - Uses the opcode type, instruction context, opcode, and a
+  ///   ModRMFilter as criteria to set a particular set of entries in the
+  ///   decode tables to point to a specific uid.
+  ///
+  /// @param type         - The opcode type (ONEBYTE, TWOBYTE, etc.)
+  /// @param insnContext  - The context to use (IC, IC_64BIT, etc.)
+  /// @param opcode       - The last byte of the opcode (not counting any escape
+  ///                       or extended opcodes).
+  /// @param filter       - The ModRMFilter that decides which ModR/M byte values
+  ///                       correspond to the desired instruction.
+  /// @param uid          - The unique ID of the instruction.
+  void setTableFields(OpcodeType type,
+                      InstructionContext insnContext,
+                      uint8_t opcode,
+                      const ModRMFilter &filter,
+                      InstrUID uid);  
+  
+  /// specForUID - Returns the instruction specifier for a given unique
+  ///   instruction ID.  Used when resolving collisions.
+  ///
+  /// @param uid  - The unique ID of the instruction.
+  /// @return     - A reference to the instruction specifier. 
+  InstructionSpecifier& specForUID(InstrUID uid) {
+    if (uid >= InstructionSpecifiers.size())
+      InstructionSpecifiers.resize(uid + 1);
+    
+    return InstructionSpecifiers[uid];
+  }
+  
+  // hasConflicts - Reports whether there were primary decode conflicts
+  //   from any instructions added to the tables.
+  // @return  - true if there were; false otherwise.
+  
+  bool hasConflicts() {
+    return HasConflicts;
+  }
+};
+
+} // namespace X86Disassembler
+
+} // namespace llvm
+
+#endif
diff --git a/final/utils/TableGen/X86ModRMFilters.h b/final/utils/TableGen/X86ModRMFilters.h
new file mode 100644
index 00000000000..199040bad84
--- /dev/null
+++ b/final/utils/TableGen/X86ModRMFilters.h
@@ -0,0 +1,197 @@
+//===- X86ModRMFilters.h - Disassembler ModR/M filterss ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler Emitter.
+// It contains ModR/M filters that determine which values of the ModR/M byte
+//  are valid for a partiuclar instruction.
+// Documentation for the disassembler emitter in general can be found in
+//  X86DisasemblerEmitter.h.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MODRMFILTERS_H
+#define X86MODRMFILTERS_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+
+namespace X86Disassembler {
+
+/// ModRMFilter - Abstract base class for clases that recognize patterns in
+///   ModR/M bytes.
+class ModRMFilter {
+public:
+  /// Destructor    - Override as necessary.
+  virtual ~ModRMFilter() { }
+
+  /// isDumb        - Indicates whether this filter returns the same value for
+  ///                 any value of the ModR/M byte.
+  ///
+  /// @result       - True if the filter returns the same value for any ModR/M
+  ///                 byte; false if not.
+  virtual bool isDumb() const { return false; }
+  
+  /// accepts       - Indicates whether the filter accepts a particular ModR/M
+  ///                 byte value.
+  ///
+  /// @result       - True if the filter accepts the ModR/M byte; false if not.
+  virtual bool accepts(uint8_t modRM) const = 0;
+};
+
+/// DumbFilter - Accepts any ModR/M byte.  Used for instructions that do not
+///   require a ModR/M byte or instructions where the entire ModR/M byte is used
+///   for operands.
+class DumbFilter : public ModRMFilter {
+public:
+  bool isDumb() const {
+    return true;
+  }
+  
+  bool accepts(uint8_t modRM) const {
+    return true;
+  }
+};
+
+/// ModFilter - Filters based on the mod bits [bits 7-6] of the ModR/M byte.
+///   Some instructions are classified based on whether they are 11 or anything
+///   else.  This filter performs that classification.
+class ModFilter : public ModRMFilter {
+private:
+  bool R;
+public:
+  /// Constructor
+  ///
+  /// @r            - True if the mod bits of the ModR/M byte must be 11; false
+  ///                 otherwise.  The name r derives from the fact that the mod
+  ///                 bits indicate whether the R/M bits [bits 2-0] signify a
+  ///                 register or a memory operand.
+  ModFilter(bool r) :
+    ModRMFilter(),
+    R(r) {
+  }
+    
+  bool accepts(uint8_t modRM) const {
+    if (R == ((modRM & 0xc0) == 0xc0))
+      return true;
+    else
+      return false;
+  }
+};
+
+/// EscapeFilter - Filters escape opcodes, which are classified in two ways.  If
+///   the ModR/M byte is between 0xc0 and 0xff, then there is one slot for each
+///   possible value.  Otherwise, there is one instruction for each value of the
+///   nnn field [bits 5-3], known elsewhere as the reg field.
+class EscapeFilter : public ModRMFilter {
+private:
+  bool C0_FF;
+  uint8_t NNN_or_ModRM;
+public:
+  /// Constructor
+  ///
+  /// @c0_ff        - True if the ModR/M byte must fall between 0xc0 and 0xff;
+  ///                 false otherwise.
+  /// @nnn_or_modRM - If c0_ff is true, the required value of the entire ModR/M
+  ///                 byte.  If c0_ff is false, the required value of the nnn
+  ///                 field.
+  EscapeFilter(bool c0_ff, uint8_t nnn_or_modRM) :
+    ModRMFilter(),
+    C0_FF(c0_ff),
+    NNN_or_ModRM(nnn_or_modRM) {
+  }
+    
+  bool accepts(uint8_t modRM) const {
+    if ((C0_FF && modRM >= 0xc0 && (modRM == NNN_or_ModRM)) ||
+        (!C0_FF && modRM < 0xc0  && ((modRM & 0x38) >> 3) == NNN_or_ModRM))
+      return true;
+    else
+      return false;
+  }
+};
+
+/// AddRegEscapeFilter - Some escape opcodes have one of the register operands
+///   added to the ModR/M byte, meaning that a range of eight ModR/M values
+///   maps to a single instruction.  Such instructions require the ModR/M byte
+///   to fall between 0xc0 and 0xff.
+class AddRegEscapeFilter : public ModRMFilter {
+private:
+  uint8_t ModRM;
+public:
+  /// Constructor
+  ///
+  /// @modRM        - The value of the ModR/M byte when the register operand
+  ///                 refers to the first register in the register set.
+  AddRegEscapeFilter(uint8_t modRM) : ModRM(modRM) {
+  }
+  
+  bool accepts(uint8_t modRM) const {
+    if (modRM >= ModRM && modRM < ModRM + 8)
+      return true;
+    else
+      return false;
+  }
+};
+
+/// ExtendedFilter - Extended opcodes are classified based on the value of the
+///   mod field [bits 7-6] and the value of the nnn field [bits 5-3]. 
+class ExtendedFilter : public ModRMFilter {
+private:
+  bool R;
+  uint8_t NNN;
+public:
+  /// Constructor
+  ///
+  /// @r            - True if the mod field must be set to 11; false otherwise.
+  ///                 The name is explained at ModFilter.
+  /// @nnn          - The required value of the nnn field.
+  ExtendedFilter(bool r, uint8_t nnn) : 
+    ModRMFilter(),
+    R(r),
+    NNN(nnn) {
+  }
+    
+  bool accepts(uint8_t modRM) const {
+    if (((R  && ((modRM & 0xc0) == 0xc0)) ||
+        (!R && ((modRM & 0xc0) != 0xc0))) &&
+        (((modRM & 0x38) >> 3) == NNN))
+      return true;
+    else
+      return false;
+  }
+};
+
+/// ExactFilter - The occasional extended opcode (such as VMCALL or MONITOR)
+///   requires the ModR/M byte to have a specific value.
+class ExactFilter : public ModRMFilter
+{
+private:
+  uint8_t ModRM;
+public:
+  /// Constructor
+  ///
+  /// @modRM        - The required value of the full ModR/M byte.
+  ExactFilter(uint8_t modRM) :
+    ModRMFilter(),
+    ModRM(modRM) {
+  }
+    
+  bool accepts(uint8_t modRM) const {
+    if (ModRM == modRM)
+      return true;
+    else
+      return false;
+  }
+};
+
+} // namespace X86Disassembler
+
+} // namespace llvm
+
+#endif
diff --git a/final/utils/TableGen/X86RecognizableInstr.cpp b/final/utils/TableGen/X86RecognizableInstr.cpp
new file mode 100644
index 00000000000..b0839c33982
--- /dev/null
+++ b/final/utils/TableGen/X86RecognizableInstr.cpp
@@ -0,0 +1,1006 @@
+//===- X86RecognizableInstr.cpp - Disassembler instruction spec --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler Emitter.
+// It contains the implementation of a single recognizable instruction.
+// Documentation for the disassembler emitter in general can be found in
+//  X86DisasemblerEmitter.h.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86DisassemblerShared.h"
+#include "X86RecognizableInstr.h"
+#include "X86ModRMFilters.h"
+
+#include "llvm/Support/ErrorHandling.h"
+
+#include <string>
+
+using namespace llvm;
+
+#define MRM_MAPPING     \
+  MAP(C1, 33)           \
+  MAP(C2, 34)           \
+  MAP(C3, 35)           \
+  MAP(C4, 36)           \
+  MAP(C8, 37)           \
+  MAP(C9, 38)           \
+  MAP(E8, 39)           \
+  MAP(F0, 40)           \
+  MAP(F8, 41)           \
+  MAP(F9, 42)           \
+  MAP(D0, 45)           \
+  MAP(D1, 46)
+
+// A clone of X86 since we can't depend on something that is generated.
+namespace X86Local {
+  enum {
+    Pseudo      = 0,
+    RawFrm      = 1,
+    AddRegFrm   = 2,
+    MRMDestReg  = 3,
+    MRMDestMem  = 4,
+    MRMSrcReg   = 5,
+    MRMSrcMem   = 6,
+    MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19, 
+    MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23,
+    MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27,
+    MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31,
+    MRMInitReg  = 32,
+#define MAP(from, to) MRM_##from = to,
+    MRM_MAPPING
+#undef MAP
+    RawFrmImm8  = 43,
+    RawFrmImm16 = 44,
+    lastMRM
+  };
+  
+  enum {
+    TB  = 1,
+    REP = 2,
+    D8 = 3, D9 = 4, DA = 5, DB = 6,
+    DC = 7, DD = 8, DE = 9, DF = 10,
+    XD = 11,  XS = 12,
+    T8 = 13,  P_TA = 14,
+    P_0F_AE = 16, P_0F_01 = 17
+  };
+}
+
+// If rows are added to the opcode extension tables, then corresponding entries
+// must be added here.  
+//
+// If the row corresponds to a single byte (i.e., 8f), then add an entry for
+// that byte to ONE_BYTE_EXTENSION_TABLES.
+//
+// If the row corresponds to two bytes where the first is 0f, add an entry for 
+// the second byte to TWO_BYTE_EXTENSION_TABLES.
+//
+// If the row corresponds to some other set of bytes, you will need to modify
+// the code in RecognizableInstr::emitDecodePath() as well, and add new prefixes
+// to the X86 TD files, except in two cases: if the first two bytes of such a 
+// new combination are 0f 38 or 0f 3a, you just have to add maps called
+// THREE_BYTE_38_EXTENSION_TABLES and THREE_BYTE_3A_EXTENSION_TABLES and add a
+// switch(Opcode) just below the case X86Local::T8: or case X86Local::TA: line
+// in RecognizableInstr::emitDecodePath().
+
+#define ONE_BYTE_EXTENSION_TABLES \
+  EXTENSION_TABLE(80)             \
+  EXTENSION_TABLE(81)             \
+  EXTENSION_TABLE(82)             \
+  EXTENSION_TABLE(83)             \
+  EXTENSION_TABLE(8f)             \
+  EXTENSION_TABLE(c0)             \
+  EXTENSION_TABLE(c1)             \
+  EXTENSION_TABLE(c6)             \
+  EXTENSION_TABLE(c7)             \
+  EXTENSION_TABLE(d0)             \
+  EXTENSION_TABLE(d1)             \
+  EXTENSION_TABLE(d2)             \
+  EXTENSION_TABLE(d3)             \
+  EXTENSION_TABLE(f6)             \
+  EXTENSION_TABLE(f7)             \
+  EXTENSION_TABLE(fe)             \
+  EXTENSION_TABLE(ff)
+  
+#define TWO_BYTE_EXTENSION_TABLES \
+  EXTENSION_TABLE(00)             \
+  EXTENSION_TABLE(01)             \
+  EXTENSION_TABLE(18)             \
+  EXTENSION_TABLE(71)             \
+  EXTENSION_TABLE(72)             \
+  EXTENSION_TABLE(73)             \
+  EXTENSION_TABLE(ae)             \
+  EXTENSION_TABLE(ba)             \
+  EXTENSION_TABLE(c7)
+
+using namespace X86Disassembler;
+
+/// needsModRMForDecode - Indicates whether a particular instruction requires a
+///   ModR/M byte for the instruction to be properly decoded.  For example, a 
+///   MRMDestReg instruction needs the Mod field in the ModR/M byte to be set to
+///   0b11.
+///
+/// @param form - The form of the instruction.
+/// @return     - true if the form implies that a ModR/M byte is required, false
+///               otherwise.
+static bool needsModRMForDecode(uint8_t form) {
+  if (form == X86Local::MRMDestReg    ||
+     form == X86Local::MRMDestMem    ||
+     form == X86Local::MRMSrcReg     ||
+     form == X86Local::MRMSrcMem     ||
+     (form >= X86Local::MRM0r && form <= X86Local::MRM7r) ||
+     (form >= X86Local::MRM0m && form <= X86Local::MRM7m))
+    return true;
+  else
+    return false;
+}
+
+/// isRegFormat - Indicates whether a particular form requires the Mod field of
+///   the ModR/M byte to be 0b11.
+///
+/// @param form - The form of the instruction.
+/// @return     - true if the form implies that Mod must be 0b11, false
+///               otherwise.
+static bool isRegFormat(uint8_t form) {
+  if (form == X86Local::MRMDestReg ||
+     form == X86Local::MRMSrcReg  ||
+     (form >= X86Local::MRM0r && form <= X86Local::MRM7r))
+    return true;
+  else
+    return false;
+}
+
+/// byteFromBitsInit - Extracts a value at most 8 bits in width from a BitsInit.
+///   Useful for switch statements and the like.
+///
+/// @param init - A reference to the BitsInit to be decoded.
+/// @return     - The field, with the first bit in the BitsInit as the lowest
+///               order bit.
+static uint8_t byteFromBitsInit(BitsInit &init) {
+  int width = init.getNumBits();
+
+  assert(width <= 8 && "Field is too large for uint8_t!");
+
+  int     index;
+  uint8_t mask = 0x01;
+
+  uint8_t ret = 0;
+
+  for (index = 0; index < width; index++) {
+    if (static_cast<BitInit*>(init.getBit(index))->getValue())
+      ret |= mask;
+
+    mask <<= 1;
+  }
+
+  return ret;
+}
+
+/// byteFromRec - Extract a value at most 8 bits in with from a Record given the
+///   name of the field.
+///
+/// @param rec  - The record from which to extract the value.
+/// @param name - The name of the field in the record.
+/// @return     - The field, as translated by byteFromBitsInit().
+static uint8_t byteFromRec(const Record* rec, const std::string &name) {
+  BitsInit* bits = rec->getValueAsBitsInit(name);
+  return byteFromBitsInit(*bits);
+}
+
+RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
+                                     const CodeGenInstruction &insn,
+                                     InstrUID uid) {
+  UID = uid;
+
+  Rec = insn.TheDef;
+  Name = Rec->getName();
+  Spec = &tables.specForUID(UID);
+  
+  if (!Rec->isSubClassOf("X86Inst")) {
+    ShouldBeEmitted = false;
+    return;
+  }
+  
+  Prefix   = byteFromRec(Rec, "Prefix");
+  Opcode   = byteFromRec(Rec, "Opcode");
+  Form     = byteFromRec(Rec, "FormBits");
+  SegOvr   = byteFromRec(Rec, "SegOvrBits");
+  
+  HasOpSizePrefix  = Rec->getValueAsBit("hasOpSizePrefix");
+  HasREX_WPrefix   = Rec->getValueAsBit("hasREX_WPrefix");
+  HasVEX_4VPrefix  = Rec->getValueAsBit("hasVEX_4VPrefix");
+  HasLockPrefix    = Rec->getValueAsBit("hasLockPrefix");
+  IsCodeGenOnly    = Rec->getValueAsBit("isCodeGenOnly");
+  
+  Name      = Rec->getName();
+  AsmString = Rec->getValueAsString("AsmString");
+  
+  Operands = &insn.Operands.OperandList;
+  
+  IsSSE            = HasOpSizePrefix && (Name.find("16") == Name.npos);
+  HasFROperands    = false;
+  
+  ShouldBeEmitted  = true;
+}
+  
+void RecognizableInstr::processInstr(DisassemblerTables &tables,
+                                   const CodeGenInstruction &insn,
+                                   InstrUID uid)
+{
+  // Ignore "asm parser only" instructions.
+  if (insn.TheDef->getValueAsBit("isAsmParserOnly"))
+    return;
+  
+  RecognizableInstr recogInstr(tables, insn, uid);
+  
+  recogInstr.emitInstructionSpecifier(tables);
+  
+  if (recogInstr.shouldBeEmitted())
+    recogInstr.emitDecodePath(tables);
+}
+
+InstructionContext RecognizableInstr::insnContext() const {
+  InstructionContext insnContext;
+
+  if (Name.find("64") != Name.npos || HasREX_WPrefix) {
+    if (HasREX_WPrefix && HasOpSizePrefix)
+      insnContext = IC_64BIT_REXW_OPSIZE;
+    else if (HasOpSizePrefix)
+      insnContext = IC_64BIT_OPSIZE;
+    else if (HasREX_WPrefix && Prefix == X86Local::XS)
+      insnContext = IC_64BIT_REXW_XS;
+    else if (HasREX_WPrefix && Prefix == X86Local::XD)
+      insnContext = IC_64BIT_REXW_XD;
+    else if (Prefix == X86Local::XD)
+      insnContext = IC_64BIT_XD;
+    else if (Prefix == X86Local::XS)
+      insnContext = IC_64BIT_XS;
+    else if (HasREX_WPrefix)
+      insnContext = IC_64BIT_REXW;
+    else
+      insnContext = IC_64BIT;
+  } else {
+    if (HasOpSizePrefix)
+      insnContext = IC_OPSIZE;
+    else if (Prefix == X86Local::XD)
+      insnContext = IC_XD;
+    else if (Prefix == X86Local::XS)
+      insnContext = IC_XS;
+    else
+      insnContext = IC;
+  }
+
+  return insnContext;
+}
+  
+RecognizableInstr::filter_ret RecognizableInstr::filter() const {
+  // Filter out intrinsics
+  
+  if (!Rec->isSubClassOf("X86Inst"))
+    return FILTER_STRONG;
+  
+  if (Form == X86Local::Pseudo ||
+      IsCodeGenOnly)
+    return FILTER_STRONG;
+  
+  if (Form == X86Local::MRMInitReg)
+    return FILTER_STRONG;
+
+  
+  // Filter out instructions with a LOCK prefix;
+  //   prefer forms that do not have the prefix
+  if (HasLockPrefix)
+    return FILTER_WEAK;
+  
+  // Filter out artificial instructions
+
+  if (Name.find("TAILJMP") != Name.npos    ||
+     Name.find("_Int") != Name.npos       ||
+     Name.find("_int") != Name.npos       ||
+     Name.find("Int_") != Name.npos       ||
+     Name.find("_NOREX") != Name.npos     ||
+     Name.find("_TC") != Name.npos     ||
+     Name.find("EH_RETURN") != Name.npos  ||
+     Name.find("V_SET") != Name.npos      ||
+     Name.find("LOCK_") != Name.npos      ||
+     Name.find("WIN") != Name.npos)
+    return FILTER_STRONG;
+
+  // Special cases.
+
+  if (Name.find("PCMPISTRI") != Name.npos && Name != "PCMPISTRI")
+    return FILTER_WEAK;
+  if (Name.find("PCMPESTRI") != Name.npos && Name != "PCMPESTRI")
+    return FILTER_WEAK;
+
+  if (Name.find("MOV") != Name.npos && Name.find("r0") != Name.npos)
+    return FILTER_WEAK;
+  if (Name.find("MOVZ") != Name.npos && Name.find("MOVZX") == Name.npos)
+    return FILTER_WEAK;
+  if (Name.find("Fs") != Name.npos)
+    return FILTER_WEAK;
+  if (Name == "MOVLPDrr"          ||
+      Name == "MOVLPSrr"          ||
+      Name == "PUSHFQ"            ||
+      Name == "BSF16rr"           ||
+      Name == "BSF16rm"           ||
+      Name == "BSR16rr"           ||
+      Name == "BSR16rm"           ||
+      Name == "MOVSX16rm8"        ||
+      Name == "MOVSX16rr8"        ||
+      Name == "MOVZX16rm8"        ||
+      Name == "MOVZX16rr8"        ||
+      Name == "PUSH32i16"         ||
+      Name == "PUSH64i16"         ||
+      Name == "MOVPQI2QImr"       ||
+      Name == "MOVSDmr"           ||
+      Name == "MOVSDrm"           ||
+      Name == "MOVSSmr"           ||
+      Name == "MOVSSrm"           ||
+      Name == "MMX_MOVD64rrv164"  ||
+      Name == "CRC32m16"          ||
+      Name == "MOV64ri64i32"      ||
+      Name == "CRC32r16")
+    return FILTER_WEAK;
+
+  // Filter out instructions with segment override prefixes.
+  // They're too messy to handle now and we'll special case them if needed.
+
+  if (SegOvr)
+    return FILTER_STRONG;
+  
+  // Filter out instructions that can't be printed.
+
+  if (AsmString.size() == 0)
+    return FILTER_STRONG;
+  
+  // Filter out instructions with subreg operands.
+  
+  if (AsmString.find("subreg") != AsmString.npos)
+    return FILTER_STRONG;
+
+  if (HasFROperands && Name.find("MOV") != Name.npos &&
+     ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) || 
+      (Name.find("to") != Name.npos)))
+    return FILTER_WEAK;
+
+  return FILTER_NORMAL;
+}
+  
+void RecognizableInstr::handleOperand(
+  bool optional,
+  unsigned &operandIndex,
+  unsigned &physicalOperandIndex,
+  unsigned &numPhysicalOperands,
+  unsigned *operandMapping,
+  OperandEncoding (*encodingFromString)(const std::string&, bool hasOpSizePrefix)) {
+  if (optional) {
+    if (physicalOperandIndex >= numPhysicalOperands)
+      return;
+  } else {
+    assert(physicalOperandIndex < numPhysicalOperands);
+  }
+  
+  while (operandMapping[operandIndex] != operandIndex) {
+    Spec->operands[operandIndex].encoding = ENCODING_DUP;
+    Spec->operands[operandIndex].type =
+      (OperandType)(TYPE_DUP0 + operandMapping[operandIndex]);
+    ++operandIndex;
+  }
+  
+  const std::string &typeName = (*Operands)[operandIndex].Rec->getName();
+  
+  Spec->operands[operandIndex].encoding = encodingFromString(typeName,
+                                                              HasOpSizePrefix);
+  Spec->operands[operandIndex].type = typeFromString(typeName, 
+                                                      IsSSE,
+                                                      HasREX_WPrefix,
+                                                      HasOpSizePrefix);
+  
+  ++operandIndex;
+  ++physicalOperandIndex;
+}
+
+void RecognizableInstr::emitInstructionSpecifier(DisassemblerTables &tables) {
+  Spec->name       = Name;
+    
+  if (!Rec->isSubClassOf("X86Inst"))
+    return;
+  
+  switch (filter()) {
+  case FILTER_WEAK:
+    Spec->filtered = true;
+    break;
+  case FILTER_STRONG:
+    ShouldBeEmitted = false;
+    return;
+  case FILTER_NORMAL:
+    break;
+  }
+  
+  Spec->insnContext = insnContext();
+    
+  const std::vector<CGIOperandList::OperandInfo> &OperandList = *Operands;
+  
+  unsigned operandIndex;
+  unsigned numOperands = OperandList.size();
+  unsigned numPhysicalOperands = 0;
+  
+  // operandMapping maps from operands in OperandList to their originals.
+  // If operandMapping[i] != i, then the entry is a duplicate.
+  unsigned operandMapping[X86_MAX_OPERANDS];
+  
+  bool hasFROperands = false;
+  
+  assert(numOperands < X86_MAX_OPERANDS && "X86_MAX_OPERANDS is not large enough");
+  
+  for (operandIndex = 0; operandIndex < numOperands; ++operandIndex) {
+    if (OperandList[operandIndex].Constraints.size()) {
+      const CGIOperandList::ConstraintInfo &Constraint =
+        OperandList[operandIndex].Constraints[0];
+      if (Constraint.isTied()) {
+        operandMapping[operandIndex] = Constraint.getTiedOperand();
+      } else {
+        ++numPhysicalOperands;
+        operandMapping[operandIndex] = operandIndex;
+      }
+    } else {
+      ++numPhysicalOperands;
+      operandMapping[operandIndex] = operandIndex;
+    }
+
+    const std::string &recName = OperandList[operandIndex].Rec->getName();
+
+    if (recName.find("FR") != recName.npos)
+      hasFROperands = true;
+  }
+  
+  if (hasFROperands && Name.find("MOV") != Name.npos &&
+     ((Name.find("2") != Name.npos && Name.find("32") == Name.npos) ||
+      (Name.find("to") != Name.npos)))
+    ShouldBeEmitted = false;
+  
+  if (!ShouldBeEmitted)
+    return;
+
+#define HANDLE_OPERAND(class)               \
+  handleOperand(false,                      \
+                operandIndex,               \
+                physicalOperandIndex,       \
+                numPhysicalOperands,        \
+                operandMapping,             \
+                class##EncodingFromString);
+  
+#define HANDLE_OPTIONAL(class)              \
+  handleOperand(true,                       \
+                operandIndex,               \
+                physicalOperandIndex,       \
+                numPhysicalOperands,        \
+                operandMapping,             \
+                class##EncodingFromString);
+  
+  // operandIndex should always be < numOperands
+  operandIndex = 0;
+  // physicalOperandIndex should always be < numPhysicalOperands
+  unsigned physicalOperandIndex = 0;
+    
+  switch (Form) {
+  case X86Local::RawFrm:
+    // Operand 1 (optional) is an address or immediate.
+    // Operand 2 (optional) is an immediate.
+    assert(numPhysicalOperands <= 2 && 
+           "Unexpected number of operands for RawFrm");
+    HANDLE_OPTIONAL(relocation)
+    HANDLE_OPTIONAL(immediate)
+    break;
+  case X86Local::AddRegFrm:
+    // Operand 1 is added to the opcode.
+    // Operand 2 (optional) is an address.
+    assert(numPhysicalOperands >= 1 && numPhysicalOperands <= 2 &&
+           "Unexpected number of operands for AddRegFrm");
+    HANDLE_OPERAND(opcodeModifier)
+    HANDLE_OPTIONAL(relocation)
+    break;
+  case X86Local::MRMDestReg:
+    // Operand 1 is a register operand in the R/M field.
+    // Operand 2 is a register operand in the Reg/Opcode field.
+    // Operand 3 (optional) is an immediate.
+    assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
+           "Unexpected number of operands for MRMDestRegFrm");
+    HANDLE_OPERAND(rmRegister)
+    HANDLE_OPERAND(roRegister)
+    HANDLE_OPTIONAL(immediate)
+    break;
+  case X86Local::MRMDestMem:
+    // Operand 1 is a memory operand (possibly SIB-extended)
+    // Operand 2 is a register operand in the Reg/Opcode field.
+    // Operand 3 (optional) is an immediate.
+    assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
+           "Unexpected number of operands for MRMDestMemFrm");
+    HANDLE_OPERAND(memory)
+    HANDLE_OPERAND(roRegister)
+    HANDLE_OPTIONAL(immediate)
+    break;
+  case X86Local::MRMSrcReg:
+    // Operand 1 is a register operand in the Reg/Opcode field.
+    // Operand 2 is a register operand in the R/M field.
+    // Operand 3 (optional) is an immediate.
+    assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
+           "Unexpected number of operands for MRMSrcRegFrm");
+    HANDLE_OPERAND(roRegister)
+    HANDLE_OPERAND(rmRegister)
+
+    if (HasVEX_4VPrefix)
+      // FIXME: In AVX, the register below becomes the one encoded
+      // in ModRMVEX and the one above the one in the VEX.VVVV field
+      HANDLE_OPTIONAL(rmRegister)
+    else
+      HANDLE_OPTIONAL(immediate)
+    break;
+  case X86Local::MRMSrcMem:
+    // Operand 1 is a register operand in the Reg/Opcode field.
+    // Operand 2 is a memory operand (possibly SIB-extended)
+    // Operand 3 (optional) is an immediate.
+    assert(numPhysicalOperands >= 2 && numPhysicalOperands <= 3 &&
+           "Unexpected number of operands for MRMSrcMemFrm");
+    HANDLE_OPERAND(roRegister)
+
+    if (HasVEX_4VPrefix)
+      // FIXME: In AVX, the register below becomes the one encoded
+      // in ModRMVEX and the one above the one in the VEX.VVVV field
+      HANDLE_OPTIONAL(rmRegister)
+
+    HANDLE_OPERAND(memory)
+    HANDLE_OPTIONAL(immediate)
+    break;
+  case X86Local::MRM0r:
+  case X86Local::MRM1r:
+  case X86Local::MRM2r:
+  case X86Local::MRM3r:
+  case X86Local::MRM4r:
+  case X86Local::MRM5r:
+  case X86Local::MRM6r:
+  case X86Local::MRM7r:
+    // Operand 1 is a register operand in the R/M field.
+    // Operand 2 (optional) is an immediate or relocation.
+    assert(numPhysicalOperands <= 2 &&
+           "Unexpected number of operands for MRMnRFrm");
+    HANDLE_OPTIONAL(rmRegister)
+    HANDLE_OPTIONAL(relocation)
+    break;
+  case X86Local::MRM0m:
+  case X86Local::MRM1m:
+  case X86Local::MRM2m:
+  case X86Local::MRM3m:
+  case X86Local::MRM4m:
+  case X86Local::MRM5m:
+  case X86Local::MRM6m:
+  case X86Local::MRM7m:
+    // Operand 1 is a memory operand (possibly SIB-extended)
+    // Operand 2 (optional) is an immediate or relocation.
+    assert(numPhysicalOperands >= 1 && numPhysicalOperands <= 2 &&
+           "Unexpected number of operands for MRMnMFrm");
+    HANDLE_OPERAND(memory)
+    HANDLE_OPTIONAL(relocation)
+    break;
+  case X86Local::RawFrmImm8:
+    // operand 1 is a 16-bit immediate
+    // operand 2 is an 8-bit immediate
+    assert(numPhysicalOperands == 2 &&
+           "Unexpected number of operands for X86Local::RawFrmImm8");
+    HANDLE_OPERAND(immediate)
+    HANDLE_OPERAND(immediate)
+    break;
+  case X86Local::RawFrmImm16:
+    // operand 1 is a 16-bit immediate
+    // operand 2 is a 16-bit immediate
+    HANDLE_OPERAND(immediate)
+    HANDLE_OPERAND(immediate)
+    break;
+  case X86Local::MRMInitReg:
+    // Ignored.
+    break;
+  }
+  
+  #undef HANDLE_OPERAND
+  #undef HANDLE_OPTIONAL
+}
+
+void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
+  // Special cases where the LLVM tables are not complete
+
+#define MAP(from, to)                     \
+  case X86Local::MRM_##from:              \
+    filter = new ExactFilter(0x##from);   \
+    break;
+
+  OpcodeType    opcodeType  = (OpcodeType)-1;
+  
+  ModRMFilter*  filter      = NULL; 
+  uint8_t       opcodeToSet = 0;
+
+  switch (Prefix) {
+  // Extended two-byte opcodes can start with f2 0f, f3 0f, or 0f
+  case X86Local::XD:
+  case X86Local::XS:
+  case X86Local::TB:
+    opcodeType = TWOBYTE;
+
+    switch (Opcode) {
+    default:
+      if (needsModRMForDecode(Form))
+        filter = new ModFilter(isRegFormat(Form));
+      else
+        filter = new DumbFilter();
+      break;
+#define EXTENSION_TABLE(n) case 0x##n:
+    TWO_BYTE_EXTENSION_TABLES
+#undef EXTENSION_TABLE
+      switch (Form) {
+      default:
+        llvm_unreachable("Unhandled two-byte extended opcode");
+      case X86Local::MRM0r:
+      case X86Local::MRM1r:
+      case X86Local::MRM2r:
+      case X86Local::MRM3r:
+      case X86Local::MRM4r:
+      case X86Local::MRM5r:
+      case X86Local::MRM6r:
+      case X86Local::MRM7r:
+        filter = new ExtendedFilter(true, Form - X86Local::MRM0r);
+        break;
+      case X86Local::MRM0m:
+      case X86Local::MRM1m:
+      case X86Local::MRM2m:
+      case X86Local::MRM3m:
+      case X86Local::MRM4m:
+      case X86Local::MRM5m:
+      case X86Local::MRM6m:
+      case X86Local::MRM7m:
+        filter = new ExtendedFilter(false, Form - X86Local::MRM0m);
+        break;
+      MRM_MAPPING
+      } // switch (Form)
+      break;
+    } // switch (Opcode)
+    opcodeToSet = Opcode;
+    break;
+  case X86Local::T8:
+    opcodeType = THREEBYTE_38;
+    if (needsModRMForDecode(Form))
+      filter = new ModFilter(isRegFormat(Form));
+    else
+      filter = new DumbFilter();
+    opcodeToSet = Opcode;
+    break;
+  case X86Local::P_TA:
+    opcodeType = THREEBYTE_3A;
+    if (needsModRMForDecode(Form))
+      filter = new ModFilter(isRegFormat(Form));
+    else
+      filter = new DumbFilter();
+    opcodeToSet = Opcode;
+    break;
+  case X86Local::D8:
+  case X86Local::D9:
+  case X86Local::DA:
+  case X86Local::DB:
+  case X86Local::DC:
+  case X86Local::DD:
+  case X86Local::DE:
+  case X86Local::DF:
+    assert(Opcode >= 0xc0 && "Unexpected opcode for an escape opcode");
+    opcodeType = ONEBYTE;
+    if (Form == X86Local::AddRegFrm) {
+      Spec->modifierType = MODIFIER_MODRM;
+      Spec->modifierBase = Opcode;
+      filter = new AddRegEscapeFilter(Opcode);
+    } else {
+      filter = new EscapeFilter(true, Opcode);
+    }
+    opcodeToSet = 0xd8 + (Prefix - X86Local::D8);
+    break;
+  default:
+    opcodeType = ONEBYTE;
+    switch (Opcode) {
+#define EXTENSION_TABLE(n) case 0x##n:
+    ONE_BYTE_EXTENSION_TABLES
+#undef EXTENSION_TABLE
+      switch (Form) {
+      default:
+        llvm_unreachable("Fell through the cracks of a single-byte "
+                         "extended opcode");
+      case X86Local::MRM0r:
+      case X86Local::MRM1r:
+      case X86Local::MRM2r:
+      case X86Local::MRM3r:
+      case X86Local::MRM4r:
+      case X86Local::MRM5r:
+      case X86Local::MRM6r:
+      case X86Local::MRM7r:
+        filter = new ExtendedFilter(true, Form - X86Local::MRM0r);
+        break;
+      case X86Local::MRM0m:
+      case X86Local::MRM1m:
+      case X86Local::MRM2m:
+      case X86Local::MRM3m:
+      case X86Local::MRM4m:
+      case X86Local::MRM5m:
+      case X86Local::MRM6m:
+      case X86Local::MRM7m:
+        filter = new ExtendedFilter(false, Form - X86Local::MRM0m);
+        break;
+      MRM_MAPPING
+      } // switch (Form)
+      break;
+    case 0xd8:
+    case 0xd9:
+    case 0xda:
+    case 0xdb:
+    case 0xdc:
+    case 0xdd:
+    case 0xde:
+    case 0xdf:
+      filter = new EscapeFilter(false, Form - X86Local::MRM0m);
+      break;
+    default:
+      if (needsModRMForDecode(Form))
+        filter = new ModFilter(isRegFormat(Form));
+      else
+        filter = new DumbFilter();
+      break;
+    } // switch (Opcode)
+    opcodeToSet = Opcode;
+  } // switch (Prefix)
+
+  assert(opcodeType != (OpcodeType)-1 &&
+         "Opcode type not set");
+  assert(filter && "Filter not set");
+
+  if (Form == X86Local::AddRegFrm) {
+    if(Spec->modifierType != MODIFIER_MODRM) {
+      assert(opcodeToSet < 0xf9 &&
+             "Not enough room for all ADDREG_FRM operands");
+    
+      uint8_t currentOpcode;
+
+      for (currentOpcode = opcodeToSet;
+           currentOpcode < opcodeToSet + 8;
+           ++currentOpcode)
+        tables.setTableFields(opcodeType, 
+                              insnContext(), 
+                              currentOpcode, 
+                              *filter, 
+                              UID);
+    
+      Spec->modifierType = MODIFIER_OPCODE;
+      Spec->modifierBase = opcodeToSet;
+    } else {
+      // modifierBase was set where MODIFIER_MODRM was set
+      tables.setTableFields(opcodeType, 
+                            insnContext(), 
+                            opcodeToSet, 
+                            *filter, 
+                            UID);
+    }
+  } else {
+    tables.setTableFields(opcodeType,
+                          insnContext(),
+                          opcodeToSet,
+                          *filter,
+                          UID);
+    
+    Spec->modifierType = MODIFIER_NONE;
+    Spec->modifierBase = opcodeToSet;
+  }
+  
+  delete filter;
+  
+#undef MAP
+}
+
+#define TYPE(str, type) if (s == str) return type;
+OperandType RecognizableInstr::typeFromString(const std::string &s,
+                                              bool isSSE,
+                                              bool hasREX_WPrefix,
+                                              bool hasOpSizePrefix) {
+  if (isSSE) {
+    // For SSE instructions, we ignore the OpSize prefix and force operand 
+    // sizes.
+    TYPE("GR16",              TYPE_R16)
+    TYPE("GR32",              TYPE_R32)
+    TYPE("GR64",              TYPE_R64)
+  }
+  if(hasREX_WPrefix) {
+    // For instructions with a REX_W prefix, a declared 32-bit register encoding
+    // is special.
+    TYPE("GR32",              TYPE_R32)
+  }
+  if(!hasOpSizePrefix) {
+    // For instructions without an OpSize prefix, a declared 16-bit register or
+    // immediate encoding is special.
+    TYPE("GR16",              TYPE_R16)
+    TYPE("i16imm",            TYPE_IMM16)
+  }
+  TYPE("i16mem",              TYPE_Mv)
+  TYPE("i16imm",              TYPE_IMMv)
+  TYPE("i16i8imm",            TYPE_IMMv)
+  TYPE("GR16",                TYPE_Rv)
+  TYPE("i32mem",              TYPE_Mv)
+  TYPE("i32imm",              TYPE_IMMv)
+  TYPE("i32i8imm",            TYPE_IMM32)
+  TYPE("GR32",                TYPE_Rv)
+  TYPE("i64mem",              TYPE_Mv)
+  TYPE("i64i32imm",           TYPE_IMM64)
+  TYPE("i64i8imm",            TYPE_IMM64)
+  TYPE("GR64",                TYPE_R64)
+  TYPE("i8mem",               TYPE_M8)
+  TYPE("i8imm",               TYPE_IMM8)
+  TYPE("GR8",                 TYPE_R8)
+  TYPE("VR128",               TYPE_XMM128)
+  TYPE("f128mem",             TYPE_M128)
+  TYPE("f256mem",             TYPE_M256)
+  TYPE("FR64",                TYPE_XMM64)
+  TYPE("f64mem",              TYPE_M64FP)
+  TYPE("sdmem",               TYPE_M64FP)
+  TYPE("FR32",                TYPE_XMM32)
+  TYPE("f32mem",              TYPE_M32FP)
+  TYPE("ssmem",               TYPE_M32FP)
+  TYPE("RST",                 TYPE_ST)
+  TYPE("i128mem",             TYPE_M128)
+  TYPE("i64i32imm_pcrel",     TYPE_REL64)
+  TYPE("i16imm_pcrel",        TYPE_REL16)
+  TYPE("i32imm_pcrel",        TYPE_REL32)
+  TYPE("SSECC",               TYPE_IMM3)
+  TYPE("brtarget",            TYPE_RELv)
+  TYPE("uncondbrtarget",      TYPE_RELv)
+  TYPE("brtarget8",           TYPE_REL8)
+  TYPE("f80mem",              TYPE_M80FP)
+  TYPE("lea32mem",            TYPE_LEA)
+  TYPE("lea64_32mem",         TYPE_LEA)
+  TYPE("lea64mem",            TYPE_LEA)
+  TYPE("VR64",                TYPE_MM64)
+  TYPE("i64imm",              TYPE_IMMv)
+  TYPE("opaque32mem",         TYPE_M1616)
+  TYPE("opaque48mem",         TYPE_M1632)
+  TYPE("opaque80mem",         TYPE_M1664)
+  TYPE("opaque512mem",        TYPE_M512)
+  TYPE("SEGMENT_REG",         TYPE_SEGMENTREG)
+  TYPE("DEBUG_REG",           TYPE_DEBUGREG)
+  TYPE("CONTROL_REG",         TYPE_CONTROLREG)
+  TYPE("offset8",             TYPE_MOFFS8)
+  TYPE("offset16",            TYPE_MOFFS16)
+  TYPE("offset32",            TYPE_MOFFS32)
+  TYPE("offset64",            TYPE_MOFFS64)
+  errs() << "Unhandled type string " << s << "\n";
+  llvm_unreachable("Unhandled type string");
+}
+#undef TYPE
+
+#define ENCODING(str, encoding) if (s == str) return encoding;
+OperandEncoding RecognizableInstr::immediateEncodingFromString
+  (const std::string &s,
+   bool hasOpSizePrefix) {
+  if(!hasOpSizePrefix) {
+    // For instructions without an OpSize prefix, a declared 16-bit register or
+    // immediate encoding is special.
+    ENCODING("i16imm",        ENCODING_IW)
+  }
+  ENCODING("i32i8imm",        ENCODING_IB)
+  ENCODING("SSECC",           ENCODING_IB)
+  ENCODING("i16imm",          ENCODING_Iv)
+  ENCODING("i16i8imm",        ENCODING_IB)
+  ENCODING("i32imm",          ENCODING_Iv)
+  ENCODING("i64i32imm",       ENCODING_ID)
+  ENCODING("i64i8imm",        ENCODING_IB)
+  ENCODING("i8imm",           ENCODING_IB)
+  errs() << "Unhandled immediate encoding " << s << "\n";
+  llvm_unreachable("Unhandled immediate encoding");
+}
+
+OperandEncoding RecognizableInstr::rmRegisterEncodingFromString
+  (const std::string &s,
+   bool hasOpSizePrefix) {
+  ENCODING("GR16",            ENCODING_RM)
+  ENCODING("GR32",            ENCODING_RM)
+  ENCODING("GR64",            ENCODING_RM)
+  ENCODING("GR8",             ENCODING_RM)
+  ENCODING("VR128",           ENCODING_RM)
+  ENCODING("FR64",            ENCODING_RM)
+  ENCODING("FR32",            ENCODING_RM)
+  ENCODING("VR64",            ENCODING_RM)
+  errs() << "Unhandled R/M register encoding " << s << "\n";
+  llvm_unreachable("Unhandled R/M register encoding");
+}
+
+OperandEncoding RecognizableInstr::roRegisterEncodingFromString
+  (const std::string &s,
+   bool hasOpSizePrefix) {
+  ENCODING("GR16",            ENCODING_REG)
+  ENCODING("GR32",            ENCODING_REG)
+  ENCODING("GR64",            ENCODING_REG)
+  ENCODING("GR8",             ENCODING_REG)
+  ENCODING("VR128",           ENCODING_REG)
+  ENCODING("FR64",            ENCODING_REG)
+  ENCODING("FR32",            ENCODING_REG)
+  ENCODING("VR64",            ENCODING_REG)
+  ENCODING("SEGMENT_REG",     ENCODING_REG)
+  ENCODING("DEBUG_REG",       ENCODING_REG)
+  ENCODING("CONTROL_REG",     ENCODING_REG)
+  errs() << "Unhandled reg/opcode register encoding " << s << "\n";
+  llvm_unreachable("Unhandled reg/opcode register encoding");
+}
+
+OperandEncoding RecognizableInstr::memoryEncodingFromString
+  (const std::string &s,
+   bool hasOpSizePrefix) {
+  ENCODING("i16mem",          ENCODING_RM)
+  ENCODING("i32mem",          ENCODING_RM)
+  ENCODING("i64mem",          ENCODING_RM)
+  ENCODING("i8mem",           ENCODING_RM)
+  ENCODING("ssmem",           ENCODING_RM)
+  ENCODING("sdmem",           ENCODING_RM)
+  ENCODING("f128mem",         ENCODING_RM)
+  ENCODING("f256mem",         ENCODING_RM)
+  ENCODING("f64mem",          ENCODING_RM)
+  ENCODING("f32mem",          ENCODING_RM)
+  ENCODING("i128mem",         ENCODING_RM)
+  ENCODING("f80mem",          ENCODING_RM)
+  ENCODING("lea32mem",        ENCODING_RM)
+  ENCODING("lea64_32mem",     ENCODING_RM)
+  ENCODING("lea64mem",        ENCODING_RM)
+  ENCODING("opaque32mem",     ENCODING_RM)
+  ENCODING("opaque48mem",     ENCODING_RM)
+  ENCODING("opaque80mem",     ENCODING_RM)
+  ENCODING("opaque512mem",    ENCODING_RM)
+  errs() << "Unhandled memory encoding " << s << "\n";
+  llvm_unreachable("Unhandled memory encoding");
+}
+
+OperandEncoding RecognizableInstr::relocationEncodingFromString
+  (const std::string &s,
+   bool hasOpSizePrefix) {
+  if(!hasOpSizePrefix) {
+    // For instructions without an OpSize prefix, a declared 16-bit register or
+    // immediate encoding is special.
+    ENCODING("i16imm",        ENCODING_IW)
+  }
+  ENCODING("i16imm",          ENCODING_Iv)
+  ENCODING("i16i8imm",        ENCODING_IB)
+  ENCODING("i32imm",          ENCODING_Iv)
+  ENCODING("i32i8imm",        ENCODING_IB)
+  ENCODING("i64i32imm",       ENCODING_ID)
+  ENCODING("i64i8imm",        ENCODING_IB)
+  ENCODING("i8imm",           ENCODING_IB)
+  ENCODING("i64i32imm_pcrel", ENCODING_ID)
+  ENCODING("i16imm_pcrel",    ENCODING_IW)
+  ENCODING("i32imm_pcrel",    ENCODING_ID)
+  ENCODING("brtarget",        ENCODING_Iv)
+  ENCODING("brtarget8",       ENCODING_IB)
+  ENCODING("i64imm",          ENCODING_IO)
+  ENCODING("offset8",         ENCODING_Ia)
+  ENCODING("offset16",        ENCODING_Ia)
+  ENCODING("offset32",        ENCODING_Ia)
+  ENCODING("offset64",        ENCODING_Ia)
+  errs() << "Unhandled relocation encoding " << s << "\n";
+  llvm_unreachable("Unhandled relocation encoding");
+}
+
+OperandEncoding RecognizableInstr::opcodeModifierEncodingFromString
+  (const std::string &s,
+   bool hasOpSizePrefix) {
+  ENCODING("RST",             ENCODING_I)
+  ENCODING("GR32",            ENCODING_Rv)
+  ENCODING("GR64",            ENCODING_RO)
+  ENCODING("GR16",            ENCODING_Rv)
+  ENCODING("GR8",             ENCODING_RB)
+  errs() << "Unhandled opcode modifier encoding " << s << "\n";
+  llvm_unreachable("Unhandled opcode modifier encoding");
+}
+#undef ENCODING
diff --git a/final/utils/TableGen/X86RecognizableInstr.h b/final/utils/TableGen/X86RecognizableInstr.h
new file mode 100644
index 00000000000..c043b909b42
--- /dev/null
+++ b/final/utils/TableGen/X86RecognizableInstr.h
@@ -0,0 +1,240 @@
+//===- X86RecognizableInstr.h - Disassembler instruction spec ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the X86 Disassembler Emitter.
+// It contains the interface of a single recognizable instruction.
+// Documentation for the disassembler emitter in general can be found in
+//  X86DisasemblerEmitter.h.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86RECOGNIZABLEINSTR_H
+#define X86RECOGNIZABLEINSTR_H
+
+#include "X86DisassemblerTables.h"
+
+#include "CodeGenTarget.h"
+#include "Record.h"
+
+#include "llvm/Support/DataTypes.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+namespace X86Disassembler {
+
+/// RecognizableInstr - Encapsulates all information required to decode a single
+///   instruction, as extracted from the LLVM instruction tables.  Has methods
+///   to interpret the information available in the LLVM tables, and to emit the
+///   instruction into DisassemblerTables.
+class RecognizableInstr {
+private:
+  /// The opcode of the instruction, as used in an MCInst
+  InstrUID UID;
+  /// The record from the .td files corresponding to this instruction
+  const Record* Rec;
+  /// The prefix field from the record
+  uint8_t Prefix;
+  /// The opcode field from the record; this is the opcode used in the Intel
+  /// encoding and therefore distinct from the UID
+  uint8_t Opcode;
+  /// The form field from the record
+  uint8_t Form;
+  /// The segment override field from the record
+  uint8_t SegOvr;
+  /// The hasOpSizePrefix field from the record
+  bool HasOpSizePrefix;
+  /// The hasREX_WPrefix field from the record
+  bool HasREX_WPrefix;
+  /// The hasVEX_4VPrefix field from the record
+  bool HasVEX_4VPrefix;
+  /// The hasLockPrefix field from the record
+  bool HasLockPrefix;
+  /// The isCodeGenOnly filed from the record
+  bool IsCodeGenOnly;
+  
+  /// The instruction name as listed in the tables
+  std::string Name;
+  /// The AT&T AsmString for the instruction
+  std::string AsmString;
+  
+  /// Indicates whether the instruction is SSE
+  bool IsSSE;
+  /// Indicates whether the instruction has FR operands - MOVs with FR operands
+  /// are typically ignored
+  bool HasFROperands;
+  /// Indicates whether the instruction should be emitted into the decode
+  /// tables; regardless, it will be emitted into the instruction info table
+  bool ShouldBeEmitted;
+  
+  /// The operands of the instruction, as listed in the CodeGenInstruction.
+  /// They are not one-to-one with operands listed in the MCInst; for example,
+  /// memory operands expand to 5 operands in the MCInst
+  const std::vector<CGIOperandList::OperandInfo>* Operands;
+  
+  /// The description of the instruction that is emitted into the instruction
+  /// info table
+  InstructionSpecifier* Spec;
+
+  /// insnContext - Returns the primary context in which the instruction is
+  ///   valid.
+  ///
+  /// @return - The context in which the instruction is valid.
+  InstructionContext insnContext() const;
+  
+  enum filter_ret {
+    FILTER_STRONG,    // instruction has no place in the instruction tables
+    FILTER_WEAK,      // instruction may conflict, and should be eliminated if
+                      // it does
+    FILTER_NORMAL     // instruction should have high priority and generate an
+                      // error if it conflcits with any other FILTER_NORMAL
+                      // instruction
+  };
+  
+  /// filter - Determines whether the instruction should be decodable.  Some 
+  ///   instructions are pure intrinsics and use unencodable operands; many
+  ///   synthetic instructions are duplicates of other instructions; other
+  ///   instructions only differ in the logical way in which they are used, and
+  ///   have the same decoding.  Because these would cause decode conflicts,
+  ///   they must be filtered out.
+  ///
+  /// @return - The degree of filtering to be applied (see filter_ret).
+  filter_ret filter() const;
+  
+  /// typeFromString - Translates an operand type from the string provided in
+  ///   the LLVM tables to an OperandType for use in the operand specifier.
+  ///
+  /// @param s              - The string, as extracted by calling Rec->getName()
+  ///                         on a CodeGenInstruction::OperandInfo.
+  /// @param isSSE          - Indicates whether the instruction is an SSE 
+  ///                         instruction.  For SSE instructions, immediates are 
+  ///                         fixed-size rather than being affected by the
+  ///                         mandatory OpSize prefix.
+  /// @param hasREX_WPrefix - Indicates whether the instruction has a REX.W
+  ///                         prefix.  If it does, 32-bit register operands stay
+  ///                         32-bit regardless of the operand size.
+  /// @param hasOpSizePrefix- Indicates whether the instruction has an OpSize
+  ///                         prefix.  If it does not, then 16-bit register
+  ///                         operands stay 16-bit.
+  /// @return               - The operand's type.
+  static OperandType typeFromString(const std::string& s, 
+                                    bool isSSE,
+                                    bool hasREX_WPrefix,
+                                    bool hasOpSizePrefix);
+  
+  /// immediateEncodingFromString - Translates an immediate encoding from the
+  ///   string provided in the LLVM tables to an OperandEncoding for use in
+  ///   the operand specifier.
+  ///
+  /// @param s                - See typeFromString().
+  /// @param hasOpSizePrefix  - Indicates whether the instruction has an OpSize
+  ///                           prefix.  If it does not, then 16-bit immediate
+  ///                           operands stay 16-bit.
+  /// @return                 - The operand's encoding.
+  static OperandEncoding immediateEncodingFromString(const std::string &s,
+                                                     bool hasOpSizePrefix);
+  
+  /// rmRegisterEncodingFromString - Like immediateEncodingFromString, but
+  ///   handles operands that are in the REG field of the ModR/M byte.
+  static OperandEncoding rmRegisterEncodingFromString(const std::string &s,
+                                                      bool hasOpSizePrefix);
+  
+  /// rmRegisterEncodingFromString - Like immediateEncodingFromString, but
+  ///   handles operands that are in the REG field of the ModR/M byte.
+  static OperandEncoding roRegisterEncodingFromString(const std::string &s,
+                                                      bool hasOpSizePrefix);
+  static OperandEncoding memoryEncodingFromString(const std::string &s,
+                                                  bool hasOpSizePrefix);
+  static OperandEncoding relocationEncodingFromString(const std::string &s,
+                                                      bool hasOpSizePrefix);
+  static OperandEncoding opcodeModifierEncodingFromString(const std::string &s,
+                                                          bool hasOpSizePrefix);
+  
+  /// handleOperand - Converts a single operand from the LLVM table format to
+  ///   the emitted table format, handling any duplicate operands it encounters
+  ///   and then one non-duplicate.
+  ///
+  /// @param optional             - Determines whether to assert that the
+  ///                               operand exists.
+  /// @param operandIndex         - The index into the generated operand table.
+  ///                               Incremented by this function one or more
+  ///                               times to reflect possible duplicate 
+  ///                               operands).
+  /// @param physicalOperandIndex - The index of the current operand into the
+  ///                               set of non-duplicate ('physical') operands.
+  ///                               Incremented by this function once.
+  /// @param numPhysicalOperands  - The number of non-duplicate operands in the
+  ///                               instructions.
+  /// @param operandMapping       - The operand mapping, which has an entry for
+  ///                               each operand that indicates whether it is a
+  ///                               duplicate, and of what.
+  void handleOperand(bool optional,
+                     unsigned &operandIndex,
+                     unsigned &physicalOperandIndex,
+                     unsigned &numPhysicalOperands,
+                     unsigned *operandMapping,
+                     OperandEncoding (*encodingFromString)
+                       (const std::string&,
+                        bool hasOpSizePrefix));
+  
+  /// shouldBeEmitted - Returns the shouldBeEmitted field.  Although filter()
+  ///   filters out many instructions, at various points in decoding we
+  ///   determine that the instruction should not actually be decodable.  In
+  ///   particular, MMX MOV instructions aren't emitted, but they're only
+  ///   identified during operand parsing.
+  ///
+  /// @return - true if at this point we believe the instruction should be
+  ///   emitted; false if not.  This will return false if filter() returns false
+  ///   once emitInstructionSpecifier() has been called.
+  bool shouldBeEmitted() const {
+    return ShouldBeEmitted;
+  }
+  
+  /// emitInstructionSpecifier - Loads the instruction specifier for the current
+  ///   instruction into a DisassemblerTables.
+  ///
+  /// @arg tables - The DisassemblerTables to populate with the specifier for
+  ///               the current instruction.
+  void emitInstructionSpecifier(DisassemblerTables &tables);
+  
+  /// emitDecodePath - Populates the proper fields in the decode tables
+  ///   corresponding to the decode paths for this instruction.
+  ///
+  /// @arg tables - The DisassemblerTables to populate with the decode
+  ///               decode information for the current instruction.
+  void emitDecodePath(DisassemblerTables &tables) const;
+
+  /// Constructor - Initializes a RecognizableInstr with the appropriate fields
+  ///   from a CodeGenInstruction.
+  ///
+  /// @arg tables - The DisassemblerTables that the specifier will be added to.
+  /// @arg insn   - The CodeGenInstruction to extract information from.
+  /// @arg uid    - The unique ID of the current instruction.
+  RecognizableInstr(DisassemblerTables &tables,
+                    const CodeGenInstruction &insn,
+                    InstrUID uid);
+public:
+  /// processInstr - Accepts a CodeGenInstruction and loads decode information
+  ///   for it into a DisassemblerTables if appropriate.
+  ///
+  /// @arg tables - The DiassemblerTables to be populated with decode
+  ///               information.
+  /// @arg insn   - The CodeGenInstruction to be used as a source for this
+  ///               information.
+  /// @uid        - The unique ID of the instruction.
+  static void processInstr(DisassemblerTables &tables,
+                           const CodeGenInstruction &insn,
+                           InstrUID uid);
+};
+  
+} // namespace X86Disassembler
+
+} // namespace llvm
+
+#endif
diff --git a/final/utils/Target/ARM/analyze-match-table.py b/final/utils/Target/ARM/analyze-match-table.py
new file mode 100644
index 00000000000..aa952d40085
--- /dev/null
+++ b/final/utils/Target/ARM/analyze-match-table.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+
+def analyze_match_table(path):
+    # Extract the instruction table.
+    data = open(path).read()
+    start = data.index("static const MatchEntry MatchTable")
+    end = data.index("\n};\n", start)
+    lines = data[start:end].split("\n")[1:]
+
+    # Parse the instructions.
+    insns = []
+    for ln in lines:
+        ln = ln.split("{", 1)[1]
+        ln = ln.rsplit("}", 1)[0]
+        a,bc = ln.split("{", 1)
+        b,c = bc.split("}", 1)
+        code, string, converter, _ = [s.strip()
+                                      for s in a.split(",")]
+        items = [s.strip() for s in b.split(",")]
+        _,features = [s.strip() for s in c.split(",")]
+        assert string[0] == string[-1] == '"'
+        string = string[1:-1]
+        insns.append((code,string,converter,items,features))
+
+    # For every mnemonic, compute whether or not it can have a carry setting
+    # operand and whether or not it can have a predication code.
+    mnemonic_flags = {}
+    for insn in insns:
+        mnemonic = insn[1]
+        items = insn[3]
+        flags = mnemonic_flags[mnemonic] = mnemonic_flags.get(mnemonic, set())
+        flags.update(items)
+
+    mnemonics = set(mnemonic_flags)
+    ccout_mnemonics = set(m for m in mnemonics
+                          if 'MCK_CCOut' in mnemonic_flags[m])
+    condcode_mnemonics = set(m for m in mnemonics
+                             if 'MCK_CondCode' in mnemonic_flags[m])
+    noncondcode_mnemonics = mnemonics - condcode_mnemonics
+    print ' || '.join('Mnemonic == "%s"' % m
+                      for m in ccout_mnemonics)
+    print ' || '.join('Mnemonic == "%s"' % m
+                      for m in noncondcode_mnemonics)
+
+def main():
+    import sys
+    if len(sys.argv) == 1:
+        import os
+        from lit.Util import capture
+        llvm_obj_root = capture(["llvm-config", "--obj-root"])
+        file = os.path.join(llvm_obj_root,
+                            "lib/Target/ARM/ARMGenAsmMatcher.inc")
+    elif len(sys.argv) == 2:
+        file = sys.argv[1]
+    else:
+        raise NotImplementedError
+
+    analyze_match_table(file)
+
+if __name__ == '__main__':
+    main()
diff --git a/final/utils/UpdateCMakeLists.pl b/final/utils/UpdateCMakeLists.pl
new file mode 100755
index 00000000000..8f535145de3
--- /dev/null
+++ b/final/utils/UpdateCMakeLists.pl
@@ -0,0 +1,119 @@
+#!/usr/bin/env perl
+use strict;
+use File::Find;
+use File::Copy;
+use Digest::MD5;
+
+my @fileTypes = ("cpp", "c");
+my %dirFiles;
+my %dirCMake;
+
+sub GetFiles {
+  my $dir = shift;
+  my $x = $dirFiles{$dir};  
+  if (!defined $x) {
+    $x = [];
+    $dirFiles{$dir} = $x;
+  }  
+  return $x;
+}
+
+sub ProcessFile {
+  my $file = $_;
+  my $dir = $File::Find::dir;
+  # Record if a CMake file was found.
+  if ($file eq "CMakeLists.txt") {
+    $dirCMake{$dir} = $File::Find::name;
+    return 0;
+  }
+  # Grab the extension of the file.
+  $file =~ /\.([^.]+)$/;
+  my $ext = $1;
+  my $files;
+  foreach my $x (@fileTypes) {
+    if ($ext eq $x) {
+      if (!defined $files) {
+        $files = GetFiles($dir);
+      }
+      push @$files, $file;
+      return 0;
+    }
+  }
+  return 0;
+}
+
+sub EmitCMakeList {
+  my $dir = shift;
+  my $files = $dirFiles{$dir};
+  
+  if (!defined $files) {
+    return;
+  }
+  
+  foreach my $file (sort @$files) {
+    print OUT "  ";
+    print OUT $file;
+    print OUT "\n";
+  }  
+}
+
+sub UpdateCMake {
+  my $cmakeList = shift;
+  my $dir = shift;
+  my $cmakeListNew = $cmakeList . ".new";
+  open(IN, $cmakeList);
+  open(OUT, ">", $cmakeListNew);
+  my $foundLibrary = 0;
+  
+  while(<IN>) {
+    if (!$foundLibrary) {
+      print OUT $_;
+      if (/^add_clang_library\(/ || /^add_llvm_library\(/ || /^add_llvm_target\(/
+          || /^add_executable\(/) {
+        $foundLibrary = 1;
+        EmitCMakeList($dir);
+      }
+    }
+    else {
+      if (/\)/) {
+        print OUT $_;
+        $foundLibrary = 0;
+      }
+    }
+  }
+
+  close(IN);
+  close(OUT);
+
+  open(FILE, $cmakeList) or
+    die("Cannot open $cmakeList when computing digest\n");
+  binmode FILE;
+  my $digestA = Digest::MD5->new->addfile(*FILE)->hexdigest;
+  close(FILE);
+    
+  open(FILE, $cmakeListNew) or
+    die("Cannot open $cmakeListNew when computing digest\n");
+  binmode FILE;
+  my $digestB = Digest::MD5->new->addfile(*FILE)->hexdigest;
+  close(FILE);
+  
+  if ($digestA ne $digestB) {
+    move($cmakeListNew, $cmakeList);
+    return 1;    
+  }
+  
+  unlink($cmakeListNew);
+  return 0;
+}
+
+sub UpdateCMakeFiles {
+  foreach my $dir (sort keys %dirCMake) {
+    if (UpdateCMake($dirCMake{$dir}, $dir)) {
+      print "Updated: $dir\n";
+    }
+  }
+}
+
+find({ wanted => \&ProcessFile, follow => 1 }, '.');
+UpdateCMakeFiles();
+
diff --git a/final/utils/bugpoint/RemoteRunSafely.sh b/final/utils/bugpoint/RemoteRunSafely.sh
new file mode 100644
index 00000000000..f390e339ea9
--- /dev/null
+++ b/final/utils/bugpoint/RemoteRunSafely.sh
@@ -0,0 +1,105 @@
+#!/bin/sh
+#
+# Program:  RemoteRunSafely.sh
+#
+# Synopsis: This script simply runs another program remotely using ssh.
+#           It always returns the another program exit code or exit with
+#           code 255 which indicates that the program could not be executed.
+#
+# Syntax: 
+#
+#   RemoteRunSafely.sh <hostname> [-l <login_name>] [-p <port>]
+#                <program> <args...>
+#
+#   where:
+#     <hostname>    is the remote host to execute the program,
+#     <login_name>  is the username on the remote host,
+#     <port>        is the port used by the remote client,
+#     <program>     is the path to the program to run,
+#     <args...>     are the arguments to pass to the program.
+#
+
+printUsageAndExit()
+{
+  echo "Usage:"
+  echo "./RemoteRunSafely.sh <hostname> [-l <login_name>] [-p <port>] " \
+       "<program> <args...>"
+  exit 255
+}
+
+moreArgsExpected()
+{
+  # $1 - remaining number of arguments
+  # $2 - number of arguments to shift
+  if [ $1 -lt $2 ]
+  then
+    echo "Error: Wrong number of argumants."
+    printUsageAndExit
+  fi
+}
+
+# Save a copy of the original arguments in a string before we
+# clobber them with the shift command.
+ORIG_ARGS="$*"
+#DEBUG: echo 'GOT: '$ORIG_ARGS
+
+moreArgsExpected $# 1
+RHOST=$1
+shift 1
+
+RUSER=`id -un`
+RCLIENT=ssh
+RPORT=
+WORKING_DIR=
+
+moreArgsExpected $# 1
+if [ $1 = "-l" ]; then
+  moreArgsExpected $# 2
+  RUSER=$2
+  shift 2
+fi
+moreArgsExpected $# 1
+if [ $1 = "-p" ]; then
+  moreArgsExpected $# 2
+  RPORT="-p $2"
+  shift 2
+fi
+
+moreArgsExpected $# 1
+PROGRAM=$(basename $1)
+WORKING_DIR=$(dirname $1)
+shift 1
+
+#DEBUG: echo 'DIR='${0%%`basename $0`}
+#DEBUG: echo 'RHOST='$RHOST
+#DEBUG: echo 'RUSER='$RUSER
+#DEBUG: echo 'PROGRAM='$PROGRAM
+#DEBUG: echo 'WORKING_DIR='$WORKING_DIR
+#DEBUG: echo 'ARGS='$*
+
+# Sanity check
+if [ "$RHOST" = "" -o "$PROGRAM" = "" ]; then
+  printUsageAndExit
+fi
+
+# Local program file must exist and be execuatble
+local_program=$WORKING_DIR"/"$PROGRAM
+if [ ! -x "$local_program" ]; then
+  echo "File "$local_program" does not exist or is not an executable.."
+  exit 255
+fi
+
+connection=$RUSER'@'$RHOST
+remote="./"$PROGRAM
+(
+  cat $local_program |        \
+  $RCLIENT $connection $RPORT \
+   'rm -f '$remote' ; '       \
+   'cat > '$remote' ; chmod +x '$remote' ; '$remote' '$*' ; ' \
+   'err=$? ; rm -f '$remote' ; exit $err'
+)
+err=$?
+
+#DEBUG: echo script exit $err
+exit $err
+
diff --git a/final/utils/buildit/GNUmakefile b/final/utils/buildit/GNUmakefile
new file mode 100644
index 00000000000..5140e1508a5
--- /dev/null
+++ b/final/utils/buildit/GNUmakefile
@@ -0,0 +1,135 @@
+# LLVM LOCAL file build machinery
+# LLVM Compiler Makefile for use by buildit.  
+#
+# This makefile is intended only for use with B&I buildit. For "normal" builds
+# use the conventional top-level makefile.
+#
+# You can specify TARGETS=ppc (or i386) on the buildit command line to limit the
+# build to just one target. The default is for ppc and i386. The compiler
+# targetted at this host gets built anyway, but not installed unless it's listed
+# in TARGETS.
+
+# Include the set of standard Apple makefile definitions.
+ifndef CoreOSMakefiles
+CoreOSMakefiles = $(MAKEFILEPATH)/CoreOS
+endif
+include $(CoreOSMakefiles)/Standard/Standard.make
+
+# Enable Apple extensions to (gnu)make.
+USE_APPLE_PB_SUPPORT = all
+
+RC_ARCHS := ppc i386
+HOSTS = $(RC_ARCHS)
+targets = echo $(RC_ARCHS)
+TARGETS := $(shell $(targets))
+
+SRCROOT = .
+
+SRC = $(shell cd $(SRCROOT) && pwd | sed s,/private,,)
+OBJROOT = $(SRC)/obj
+SYMROOT = $(OBJROOT)/../sym
+DSTROOT = $(OBJROOT)/../dst
+
+#######################################################################
+
+PREFIX = /Developer/usr/local
+
+# Unless assertions are forced on in the GMAKE command line, disable them.
+ifndef ENABLE_ASSERTIONS
+ENABLE_ASSERTIONS := no
+endif
+
+# Default is optimized build.
+ifeq ($(LLVM_DEBUG),1)
+LLVM_OPTIMIZED := no
+else
+LLVM_OPTIMIZED := yes
+endif
+
+# Default to not install libLTO.dylib.
+INSTALL_LIBLTO := no
+
+# Default to do a native build, not a cross-build for an ARM host or simulator.
+ARM_HOSTED_BUILD := no
+IOS_SIM_BUILD := no
+
+ifndef RC_ProjectSourceVersion
+RC_ProjectSourceVersion = 9999
+endif
+
+ifndef RC_ProjectSourceSubversion
+RC_ProjectSourceSubversion = 0
+endif
+
+# NOTE : Always put version numbers at the end because they are optional.
+install: $(OBJROOT) $(SYMROOT) $(DSTROOT)
+	cd $(OBJROOT) && \
+	  $(SRC)/utils/buildit/build_llvm "$(RC_ARCHS)" "$(TARGETS)" \
+	    $(SRC) $(PREFIX) $(DSTROOT) $(SYMROOT) \
+	    $(ENABLE_ASSERTIONS) $(LLVM_OPTIMIZED) $(INSTALL_LIBLTO) \
+	    $(ARM_HOSTED_BUILD) $(IOS_SIM_BUILD) \
+	    $(RC_ProjectSourceVersion) $(RC_ProjectSourceSubversion) 
+
+EmbeddedHosted:
+	$(MAKE) ARM_HOSTED_BUILD=yes PREFIX=/usr/local install
+
+# When building for the iOS simulator, MACOSX_DEPLOYMENT_TARGET is not set
+# by default, but it needs to be set when building tools that run on the host
+# (e.g., tblgen), so set it here.
+EmbeddedSim:
+	export MACOSX_DEPLOYMENT_TARGET=`sw_vers -productVersion`; \
+	$(MAKE) IOS_SIM_BUILD=yes PREFIX=$(SDKROOT)/usr/local install
+
+Embedded:
+	ARM_PLATFORM=`xcodebuild -version -sdk iphoneos PlatformPath` && \
+	$(MAKE) DSTROOT=$(DSTROOT)$$ARM_PLATFORM install
+
+# installhdrs does nothing, because the headers aren't useful until
+# the compiler is installed.
+installhdrs:
+
+# We build and install in one shell script.
+build: 
+
+installsrc:
+	@echo
+	@echo ++++++++++++++++++++++
+	@echo + Installing sources +
+	@echo ++++++++++++++++++++++
+	@echo
+	if [ $(SRCROOT) != . ]; then \
+	  $(PAX) -rw . $(SRCROOT); \
+	fi
+	find -d "$(SRCROOT)" \( -type d -a -name .svn -o \
+	                        -type f -a -name .DS_Store -o \
+				-name \*~ -o -name .\#\* \) \
+	  -exec rm -rf {} \;
+	rm -rf "$(SRCROOT)/test"
+
+#######################################################################
+
+clean:
+	@echo
+	@echo ++++++++++++
+	@echo + Cleaning +
+	@echo ++++++++++++
+	@echo
+	@if [ -d $(OBJROOT) -a "$(OBJROOT)" != / ]; then \
+	  echo '*** DELETING ' $(OBJROOT); \
+	  rm -rf $(OBJROOT); \
+	fi
+	@if [ -d $(SYMROOT) -a "$(SYMROOT)" != / ]; then \
+	  echo '*** DELETING ' $(SYMROOT); \
+	  rm -rf $(SYMROOT); \
+	fi
+	@if [ -d $(DSTROOT) -a "$(DSTROOT)" != / ]; then \
+	  echo '*** DELETING ' $(DSTROOT); \
+	  rm -rf $(DSTROOT); \
+	fi
+
+#######################################################################
+
+$(OBJROOT) $(SYMROOT) $(DSTROOT):
+	mkdir -p $@
+
+.PHONY: install installsrc clean EmbeddedHosted EmbeddedSim Embedded
diff --git a/final/utils/buildit/build_llvm b/final/utils/buildit/build_llvm
new file mode 100755
index 00000000000..38b0bfd3848
--- /dev/null
+++ b/final/utils/buildit/build_llvm
@@ -0,0 +1,375 @@
+#!/bin/sh
+# LLVM LOCAL file B&I
+
+set -x
+
+# Build LLVM the "Apple way".
+# Parameters:
+
+# The first parameter is a space-separated list of the architectures the
+# compilers will run on. For instance, "ppc i386". If the current machine
+# isn't in the list, it will (effectively) be added.
+HOSTS="$1"
+
+# The second parameter is a space-separated list of the architectures the
+# compilers will generate code for. If the current machine isn't in the list, a
+# compiler for it will get built anyway, but won't be installed.
+# FIXME: The list of targets is currently hard-coded and TARGETS is not used.
+TARGETS="$2"
+
+# The third parameter is the path to the compiler sources. There should be a
+# shell script named 'configure' in this directory. This script makes a copy...
+ORIG_SRC_DIR="$3"
+
+# The fourth parameter is the location where the LLVM will be installed. You can
+# move it once it's built, so this mostly controls the layout of $DEST_DIR.
+DEST_ROOT="$4"
+
+# The fifth parameter is the place where the compiler will be copied once it's
+# built.
+DEST_DIR="$5"
+
+# The sixth parameter is a directory in which to place information (like
+# unstripped executables and generated source files) helpful in debugging the
+# resulting compiler.
+SYM_DIR="$6"
+
+# The seventh parameter is a yes/no that indicates whether assertions should be
+# enabled in the LLVM libs/tools.
+LLVM_ASSERTIONS="$7"
+
+# The eighth parameter is a yes/no that indicates whether this is an optimized
+# build.
+LLVM_OPTIMIZED="$8"
+
+# The ninth parameter is a yes/no that indicates whether libLTO.dylib
+# should be installed.
+INSTALL_LIBLTO="$9"
+
+# A yes/no parameter that controls whether to cross-build for an ARM host.
+ARM_HOSTED_BUILD="${10}"
+
+# A yes/no parameter that controls whether to cross-build for the iOS simulator
+IOS_SIM_BUILD="${11}"
+
+# The version number of the submission, e.g. 1007.
+LLVM_SUBMIT_VERSION="${12}"
+
+# The subversion number of the submission, e.g. 03.
+LLVM_SUBMIT_SUBVERSION="${13}"
+
+# The current working directory is where the build will happen. It may already
+# contain a partial result of an interrupted build, in which case this script
+# will continue where it left off.
+DIR=`pwd`
+
+DARWIN_VERS=`uname -r | sed 's/\..*//'`
+echo DARWIN_VERS = $DARWIN_VERS
+
+################################################################################
+# Run the build.
+
+# Create the source tree we'll actually use to build, deleting
+# tcl since it doesn't actually build properly in a cross environment
+# and we don't really need it.
+SRC_DIR=$DIR/src
+rm -rf $SRC_DIR || exit 1
+mkdir $SRC_DIR || exit 1
+ln -s $ORIG_SRC_DIR/* $SRC_DIR/ || exit 1
+# We can't use the top-level Makefile as-is.  Remove the soft link:
+rm $SRC_DIR/Makefile || exit 1
+# Now create our own by editing the top-level Makefile, deleting every line marked "Apple-style":
+sed -e '/[Aa]pple-style/d' -e '/include.*GNUmakefile/d' $ORIG_SRC_DIR/Makefile > $SRC_DIR/Makefile || exit 1
+
+# Build the LLVM tree universal.
+mkdir -p $DIR/obj-llvm || exit 1
+cd $DIR/obj-llvm || exit 1
+
+if [ "$ARM_HOSTED_BUILD" = yes ]; then
+  # The cross-tools' build process expects to find an existing cross toolchain
+  # under names like 'arm-apple-darwin$DARWIN_VERS-as'; so make them.
+  rm -rf $DIR/bin || exit 1
+  mkdir $DIR/bin || exit 1
+  for prog in ar nm ranlib strip lipo ld as ; do
+    P=$DIR/bin/arm-apple-darwin$DARWIN_VERS-${prog}
+    T=`xcrun -sdk $SDKROOT -find ${prog}`
+    echo '#!/bin/sh' > $P || exit 1
+    echo 'exec '$T' "$@"' >> $P || exit 1
+    chmod a+x $P || exit 1
+  done
+  # Try to use the platform llvm-gcc. Fall back to gcc if it's not available.
+  for prog in gcc g++ ; do
+    P=$DIR/bin/arm-apple-darwin$DARWIN_VERS-${prog}
+    T=`xcrun -sdk $SDKROOT -find llvm-${prog}`
+    if [ "x$T" = "x" ] ; then
+      T=`xcrun -sdk $SDKROOT -find ${prog}`
+    fi
+    echo '#!/bin/sh' > $P || exit 1
+    echo 'exec '$T' -arch armv7 -isysroot '${SDKROOT}' "$@"' >> $P || exit 1
+    chmod a+x $P || exit 1
+  done
+
+  PATH=$DIR/bin:$PATH
+# otherwise, try to use llvm-gcc if it's available
+elif [ $DARWIN_VERS -gt 9 ]; then
+  # If the user has set CC or CXX, respect their wishes.  If not,
+  # compile with LLVM-GCC/LLVM-G++ if available; if LLVM is not
+  # available, fall back to usual GCC/G++ default.
+  savedPATH=$PATH ; PATH="/Developer/usr/bin:$PATH"
+  XTMPCC=$(which llvm-gcc)
+  if [ x$CC  = x -a x$XTMPCC != x ] ; then export CC=$XTMPCC  ; fi
+  XTMPCC=$(which llvm-g++)
+  if [ x$CXX = x -a x$XTMPCC != x ] ; then export CXX=$XTMPCC ; fi
+  PATH=$savedPATH
+  unset XTMPCC savedPATH
+fi
+
+if [ "$ARM_HOSTED_BUILD" = yes ]; then
+  configure_opts="--enable-targets=arm --host=arm-apple-darwin10 \
+                  --target=arm-apple-darwin10 --build=i686-apple-darwin10"
+elif [ "$IOS_SIM_BUILD" = yes ]; then
+  # Use a non-standard "darwin_sim" host triple to trigger a cross-build.
+  configure_opts="--enable-targets=x86 --host=i686-apple-darwin_sim \
+                  --build=i686-apple-darwin10"
+else
+  configure_opts="--enable-targets=arm,x86,powerpc,cbe"
+fi
+
+if [ \! -f Makefile.config ]; then
+  $SRC_DIR/configure --prefix=$DEST_DIR$DEST_ROOT $configure_opts \
+    --enable-assertions=$LLVM_ASSERTIONS \
+    --enable-optimized=$LLVM_OPTIMIZED \
+    --disable-bindings \
+    || exit 1
+fi
+
+SUBVERSION=`echo $RC_ProjectSourceVersion | sed -e 's/[^.]*\.\([0-9]*\).*/\1/'`
+
+if [ "x$SUBVERSION" != "x$RC_ProjectSourceVersion" ]; then
+    LLVM_SUBMIT_SUBVERSION=`printf "%02d" $SUBVERSION`
+    RC_ProjectSourceVersion=`echo $RC_ProjectSourceVersion | sed -e 's/\..*//'`
+    LLVM_SUBMIT_VERSION=$RC_ProjectSourceVersion
+fi
+
+if [ "x$LLVM_SUBMIT_SUBVERSION" = "x00" -o "x$LLVM_SUBMIT_SUBVERSION" = "x0" ]; then
+    LLVM_VERSION="$LLVM_SUBMIT_VERSION"
+else
+    LLVM_VERSION="$LLVM_SUBMIT_VERSION-$LLVM_SUBMIT_SUBVERSION"
+fi
+
+GCC_VER=`cc --version 2>/dev/null | sed 1q`
+
+if echo "$GCC_VER" | grep GCC > /dev/null; then
+    GCC_VER=`echo $GCC_VER | sed -e 's/.*(GCC) \([0-9.][0-9.]*\).*/\1/'`
+    MAJ_VER=`echo $GCC_VER | sed 's/\..*//'`
+    MIN_VER=`echo $GCC_VER | sed 's/[^.]*\.\([0-9]*\).*/\1/'`
+fi
+
+JOBS_FLAG=""
+
+# Note: If compiling with GCC 4.0, don't pass the -jN flag. Building universal
+# already has parallelism and we don't want to make the builders hit swap by
+# firing off too many gccs at the same time.
+if [ "x$MAJ_VER" != "x4" -o "x$MIN_VER" != "x0" ]; then
+    # Figure out how many make processes to run.
+    SYSCTL=`sysctl -n hw.activecpu`
+
+    # sysctl -n hw.* does not work when invoked via B&I chroot /BuildRoot.
+    # Builders can default to 2, since even if they are single processor,
+    # nothing else is running on the machine.
+    if [ -z "$SYSCTL" ]; then
+        SYSCTL=2
+    fi
+
+    JOBS_FLAG="-j $SYSCTL"
+fi
+
+make $JOBS_FLAG $OPTIMIZE_OPTS UNIVERSAL=1 UNIVERSAL_ARCH="$HOSTS" \
+    UNIVERSAL_SDK_PATH=$SDKROOT \
+    NO_RUNTIME_LIBS=1 \
+    DISABLE_EDIS=1 \
+    DEBUG_SYMBOLS=1 \
+    LLVM_SUBMIT_VERSION=$LLVM_SUBMIT_VERSION \
+    LLVM_SUBMIT_SUBVERSION=$LLVM_SUBMIT_SUBVERSION \
+    CXXFLAGS="-DLLVM_VERSION_INFO='\" Apple Build #$LLVM_VERSION\"'" \
+    VERBOSE=1
+
+if [ $? != 0 ] ; then
+    echo "error: LLVM 'make' failed!"
+    exit 1
+fi 
+
+################################################################################
+# Construct the actual destination root, by copying stuff from $DIR/dst-* to
+# $DEST_DIR, with occasional 'lipo' commands.
+
+cd $DEST_DIR || exit 1
+
+# Clean out DEST_DIR in case -noclean was passed to buildit.
+rm -rf * || exit 1
+
+cd $DIR/obj-llvm || exit 1
+
+# Install the tree into the destination directory.
+make $LOCAL_MAKEFLAGS $OPTIMIZE_OPTS UNIVERSAL=1 UNIVERSAL_ARCH="$HOSTS" \
+    NO_RUNTIME_LIBS=1 \
+    DISABLE_EDIS=1 \
+    DEBUG_SYMBOLS=1 \
+    LLVM_SUBMIT_VERSION=$LLVM_SUBMIT_VERSION \
+    LLVM_SUBMIT_SUBVERSION=$LLVM_SUBMIT_SUBVERSION \
+    OPTIMIZE_OPTION='-O3' VERBOSE=1 install
+
+if ! test $? == 0 ; then
+    echo "error: LLVM 'make install' failed!"
+    exit 1
+fi 
+
+# Install Version.h
+LLVM_MINOR_VERSION=`echo $LLVM_SUBMIT_SUBVERSION | sed -e 's,0*\([1-9][0-9]*\),\1,'`
+if [ "x$LLVM_MINOR_VERSION" = "x" ]; then
+    LLVM_MINOR_VERSION=0
+fi
+RC_ProjectSourceSubversion=`printf "%d" $LLVM_MINOR_VERSION`
+echo "#define LLVM_VERSION ${RC_ProjectSourceVersion}" > $DEST_DIR$DEST_ROOT/include/llvm/Version.h
+echo "#define LLVM_MINOR_VERSION ${RC_ProjectSourceSubversion}" >> $DEST_DIR$DEST_ROOT/include/llvm/Version.h
+
+if [ "x$LLVM_DEBUG" != "x1" ]; then
+    # Strip local symbols from llvm libraries.
+    #
+    # Use '-l' to strip i386 modules. N.B. that flag doesn't work with kext or
+    # PPC objects!
+    strip -Sl $DEST_DIR$DEST_ROOT/lib/*.[oa]
+    for f in `ls $DEST_DIR$DEST_ROOT/lib/*.so`; do
+        strip -Sxl $f
+    done
+fi
+
+# Copy over the tblgen utility.
+cp `find $DIR -name tblgen` $DEST_DIR$DEST_ROOT/bin
+
+# Remove .dir files 
+cd $DEST_DIR$DEST_ROOT
+rm -f bin/.dir etc/llvm/.dir lib/.dir
+
+# Remove PPC64 fat slices.
+cd $DEST_DIR$DEST_ROOT/bin
+if [ $MACOSX_DEPLOYMENT_TARGET = "10.4" ]; then
+    find . -perm 755 -type f \! \( -name '*gccas' -o -name '*gccld' -o -name llvm-config \) \
+        -exec lipo -extract ppc -extract i386 {} -output {} \;
+elif [ $MACOSX_DEPLOYMENT_TARGET = "10.5" ]; then
+    find . -perm 755 -type f \! \( -name '*gccas' -o -name '*gccld' -o -name llvm-config \) \
+        -exec lipo -extract ppc7400 -extract i386 {} -output {} \;
+else
+    find . -perm 755 -type f \! \( -name '*gccas' -o -name '*gccld' -o -name llvm-config \) \
+        -exec lipo -extract ppc7400 -extract i386 -extract x86_64 {} -output {} \;
+fi
+
+# The Hello dylib is an example of how to build a pass.
+# The BugpointPasses module is only used to test bugpoint.
+# These unversioned dylibs cause verification failures, so do not install them.
+# (The wildcards are used to match a "lib" prefix if it is present.)
+rm $DEST_DIR$DEST_ROOT/lib/*LLVMHello.dylib
+rm $DEST_DIR$DEST_ROOT/lib/*BugpointPasses.dylib
+
+# Compress manpages
+MDIR=$DEST_DIR$DEST_ROOT/share/man/man1
+gzip -f $MDIR/*
+
+################################################################################
+# Create SYM_DIR with information required for debugging.
+
+# Figure out how many make processes to run.
+SYSCTL=`sysctl -n hw.activecpu`
+
+# hw.activecpu only available in 10.2.6 and later
+if [ -z "$SYSCTL" ]; then
+  SYSCTL=`sysctl -n hw.ncpu`
+fi
+
+# sysctl -n hw.* does not work when invoked via B&I chroot /BuildRoot. Builders
+# can default to 2, since even if they are single processor, nothing else is
+# running on the machine.
+if [ -z "$SYSCTL" ]; then
+  SYSCTL=2
+fi
+
+cd $SYM_DIR || exit 1
+
+# Clean out SYM_DIR in case -noclean was passed to buildit.
+rm -rf * || exit 1
+
+# Generate .dSYM files
+find $DEST_DIR -perm -0111 -type f \
+    ! \( -name '*.la' -o -name gccas -o -name gccld -o -name llvm-config -o -name '*.a' \) \
+    -print | xargs -n 1 -P ${SYSCTL} dsymutil
+
+# Save .dSYM files and .a archives
+cd $DEST_DIR || exit 1
+find . \( -path \*.dSYM/\* -or -name \*.a \) -print \
+    | cpio -pdml $SYM_DIR || exit 1
+
+# Save source files.
+mkdir $SYM_DIR/src || exit 1
+cd $DIR || exit 1
+find obj-* -name \*.\[chy\] -o -name \*.cpp -print \
+    | cpio -pdml $SYM_DIR/src || exit 1
+
+################################################################################
+# Install and strip libLTO.dylib
+
+cd $DEST_DIR$DEST_ROOT
+if [ "$INSTALL_LIBLTO" = "yes" ]; then
+  DT_HOME="$DEST_DIR/Developer/usr"
+  mkdir -p $DT_HOME/lib
+  mv lib/libLTO.dylib $DT_HOME/lib/libLTO.dylib
+
+  # Save a copy of the unstripped dylib
+  mkdir -p $SYM_DIR/Developer/usr/lib
+  cp $DT_HOME/lib/libLTO.dylib $SYM_DIR/Developer/usr/lib/libLTO.dylib
+
+  # Use '-l' to strip i386 modules. N.B. that flag doesn't work with kext or
+  # PPC objects!
+  strip -arch all -Sl $DT_HOME/lib/libLTO.dylib
+
+  if [ "x$DISABLE_USR_LINKS" == "x" ]; then
+    # Add a symlink in /usr/lib for B&I.
+    mkdir -p $DEST_DIR/usr/lib/
+    (cd $DEST_DIR/usr/lib && \
+      ln -s ../../Developer/usr/lib/libLTO.dylib ./libLTO.dylib)
+  fi
+else
+  rm -f lib/libLTO.dylib
+fi
+rm -f lib/libLTO.a lib/libLTO.la
+
+# Omit lto.h from the result.  Clang will supply.
+find $DEST_DIR$DEST_ROOT -name lto.h -delete
+
+################################################################################
+# Remove debugging information from DEST_DIR.
+
+cd $DIR || exit 1
+
+find $DEST_DIR -name \*.a -print | xargs ranlib || exit 1
+find $DEST_DIR -name \*.dSYM -print | xargs rm -r || exit 1
+
+# Strip debugging information from files
+#
+# Use '-l' to strip i386 modules. N.B. that flag doesn't work with kext or
+# PPC objects!
+find $DEST_DIR -perm -0111 -type f \
+    ! \( -name '*.la' -o -name gccas -o -name gccld -o -name llvm-config \) \
+    -print | xargs -n 1 -P ${SYSCTL} strip -arch all -Sl
+
+chgrp -h -R wheel $DEST_DIR
+chgrp -R wheel $DEST_DIR
+
+################################################################################
+# Remove the docs directory
+
+rm -rf $DEST_DIR$DEST_ROOT/docs
+
+################################################################################
+# w00t! Done!
+
+exit 0
diff --git a/final/utils/cgiplotNLT.pl b/final/utils/cgiplotNLT.pl
new file mode 100755
index 00000000000..0360e4120d8
--- /dev/null
+++ b/final/utils/cgiplotNLT.pl
@@ -0,0 +1,68 @@
+#!/usr/bin/perl
+#takes a test and a program from a dp and produces a gnuplot script
+#use like perl plotNLT.pl password Programs/MultiSource/Benchmarks/ASCI_Purple/SMG2000/smg2000 llc
+
+use CGI;
+use DBI;
+my $q = new CGI;
+
+# database information
+$db="llvmalpha";
+$host="localhost";
+$userid="llvmdbuser";
+$passwd=$q->param('pwd');
+$connectionInfo="dbi:mysql:$db;$host";
+
+# make connection to database
+$dbh = DBI->connect($connectionInfo,$userid,$passwd) or die DBI->errstr;
+
+
+$count = 0;
+while ($q->param('n' . $count))
+  {
+    $count++;
+  }
+
+$| = 1;
+print "Content-type: image/png", "\n\n";
+
+open CMDSTREAM, "|gnuplot";
+#open CMDSTREAM, "|echo";
+
+print CMDSTREAM "set terminal png\n";
+print CMDSTREAM "set output\n";
+print CMDSTREAM "set xdata time\n";
+print CMDSTREAM 'set timefmt "%Y-%m-%d"';
+print CMDSTREAM "\nplot";
+for ($iter = 0; $iter < $count; $iter++) {
+  if ($iter)
+    { print CMDSTREAM ","; }
+  print CMDSTREAM " '-' using 1:2 title \"" . $q->param('t' . $iter) . "," . $q->param('n' . $iter) . "\"with lines";
+}
+
+print CMDSTREAM "\n";
+
+for ($iter = 0; $iter < $count; $iter++) {
+
+  $prog = $q->param('n' . $iter);
+  $test = $q->param('t' . $iter);
+
+  $query = "Select RUN, VALUE from Tests where TEST = '$test' AND NAME = '$prog' ORDER BY RUN";
+  #print "\n$query\n";
+  
+  my $sth = $dbh->prepare( $query) || die "Can't prepare statement: $DBI::errstr";;
+  
+  my $rc = $sth->execute or die DBI->errstr;
+  
+  while(($da,$v) = $sth->fetchrow_array)
+    {
+      print CMDSTREAM "$da $v\n";
+    }
+  
+  print CMDSTREAM "e\n";
+}
+print CMDSTREAM "exit\n";
+close CMDSTREAM;
+
+# disconnect from database
+$dbh->disconnect;
diff --git a/final/utils/check-each-file b/final/utils/check-each-file
new file mode 100755
index 00000000000..bd7633301dc
--- /dev/null
+++ b/final/utils/check-each-file
@@ -0,0 +1,150 @@
+#!/bin/sh
+# check-each-file
+# Used to narrow down a miscompilation to one .o file from a list. Please read
+# the usage procedure, below, for command-line syntax (or run it with --help).
+# This script depends on the llvm-native-gcc script.
+
+if [ x$1 = x--make-linker-script ]
+then
+	program=$2
+	linker=./link-$program
+	echo "Building $program with llvm-native-gcc"
+	rm -f $program
+	gmake -e $program CC=llvm-native-gcc CXX=llvm-native-gxx
+	echo "Erasing $program and re-linking it" 
+	rm -f $program
+	echo "rm -f $program" > $linker
+	gmake -n $program >> $linker
+	chmod 755 $linker
+	echo "Linker script created in $linker; testing it out"
+	output=`./$linker 2>&1`
+	case "$output" in
+		*undefined*reference*__main*) 
+			echo "$program appears to need a dummy __main function; adding one"
+			echo "void __main () { }" > __main.c
+			gcc -c __main.c
+			echo "Done; rebuilding $linker"
+			echo "rm -f $program" > $linker
+			gmake -n $program 2>&1 | sed '/gcc/s/$/__main.o/' >> $linker
+			./$linker > /dev/null 2>&1
+			if [ ! -x $program ]
+			then
+				echo "WARNING: linker script didn't work"
+			fi
+			;;
+		*)
+			if [ ! -x $program ]
+			then
+				echo "WARNING: linker script didn't work"
+			fi
+			;;
+	esac
+	echo "Linker script created in $linker; please check it manually"
+	exit 0
+fi
+
+checkfiles="$1"
+program="$2"
+linker="$3"
+checker="$4"
+
+usage () {
+	myname=`basename $0`
+	echo "$myname --make-linker-script PROGRAM"
+	echo "$myname OBJECTS-FILE PROGRAM LINKER CHECKER"
+	echo ""
+	echo "OBJECTS-FILE is a text file containing the names of all the .o files"
+	echo "PROGRAM is the name of the executable under test"
+	echo "(there must also exist a Makefile in the current directory which"
+	echo "has PROGRAM as a target)"
+	echo "LINKER is the script that builds PROGRAM; try --make-linker-script" 
+	echo "to automatically generate it"
+	echo "CHECKER is the script that exits 0 if PROGRAM is ok, 1 if it is not OK"
+	echo "(LINKER and CHECKER must be in your PATH, or you should specify ./)"
+	echo ""
+	echo "Bugs to <gaeke@uiuc.edu>."
+	exit 1
+}
+
+if [ x$1 = x--help ]
+then
+	usage
+fi
+
+if [ -z "$checkfiles" ]
+then
+	echo "ERROR: Must specify name of file w/ list of objects as 1st arg."
+	echo "(got \"$checkfiles\")"
+	usage
+fi
+if [ ! -f "$checkfiles" ]
+then
+	echo "ERROR: $checkfiles not found"
+	usage
+fi
+if [ -z "$program" ]
+then
+	echo "ERROR: Must specify name of program as 2nd arg."
+	usage
+fi
+if [ -z "$linker" ]
+then
+	echo "ERROR: Must specify name of link script as 3rd arg."
+	usage
+fi
+if [ ! -x "$linker" ]
+then
+	echo "ERROR: $linker not found or not executable"
+	echo "You may wish to try: $0 --make-linker-script $program"
+	usage
+fi
+if [ -z "$checker" ]
+then
+	echo "ERROR: Must specify name of $program check script as 3rd arg."
+	usage
+fi
+if [ ! -x "$checker" ]
+then
+	echo "ERROR: $checker not found or not executable"
+	usage
+fi
+
+files=`cat $checkfiles`
+echo "Recompiling everything with llvm-native-gcc"
+for f in $files
+do
+	rm -f $f
+	gmake $f CC=llvm-native-gcc CXX=llvm-native-gxx
+done
+rm -f $program
+$linker
+if $checker
+then
+	echo "Sorry, I can't help you, $program is OK when compiled with llvm-native-gcc"
+	exit 1
+fi
+for f in $files
+do
+	echo Trying to compile $f with native gcc and rebuild $program
+	mv ${f} ${f}__OLD__
+	gmake ${f} CC=gcc > /dev/null 2>&1
+	$linker
+	echo Checking validity of new $program
+	if $checker
+	then
+		echo Program is OK
+		okfiles="$okfiles $f"
+	else
+		echo Program is not OK
+		notokfiles="$notokfiles $f"
+	fi
+	mv ${f}__OLD__ ${f}
+done
+echo ""
+echo "Program is OK when these files are recompiled with native gcc: "
+echo "$okfiles"
+echo ""
+echo "Program is not OK when these files are recompiled with native gcc: "
+echo "$notokfiles"
+echo ""
+exit 0
diff --git a/final/utils/codegen-diff b/final/utils/codegen-diff
new file mode 100755
index 00000000000..2c3ac4c6dfa
--- /dev/null
+++ b/final/utils/codegen-diff
@@ -0,0 +1,135 @@
+#!/usr/bin/perl
+
+use Getopt::Std;
+$DEBUG = 0;
+
+sub parse_objdump_file {
+  my ($filename) = @_;
+  my @result;
+  open (INPUT, $filename) or die "$filename: $!\n";
+  print "opened objdump output file $filename\n" if $DEBUG;
+  while (<INPUT>) {
+    if (/\s*([0-9a-f]*):\t(([0-9a-f]{2} )+) *\t(.*)$/) {
+      my ($addr, $bytes, $instr) = ($1, $2, $4);
+      $addr = "0x" . $addr;
+      $bytes =~ s/\s*(.*\S)\s*/$1/; # trim any remaining whitespace
+      $instr =~ s/\s*(.*\S)\s*/$1/;
+      push (@result, {'addr' => $addr, 'bytes' => $bytes, 'instr' => $instr});
+      print "addr=$addr bytes='$bytes' instr='$instr'\n" if $DEBUG;
+    }
+  }
+  close INPUT;
+  return @result;
+}
+
+sub parse_gdb_file {
+  my ($filename) = @_;
+  my @result;
+  my $got_addr;
+  open (INPUT, $filename) or die "$filename: $!\n";
+  print "opened gdb output file $filename\n" if $DEBUG;
+  while (<INPUT>) {
+    if (/^(0x[0-9a-f]*):\t([^\t]*)\t[^:]*:\t((0x[0-9a-f]{2}\s*)+)\s*$/) {
+      my ($addr, $bytes, $instr) = ($1, $3, $2);
+      $bytes =~ s/0x//g;
+      $bytes =~ s/\s+/ /g;           # regularize whitespace
+      $bytes =~ s/\s*(.*\S)\s*/$1/;  # trim any remaining whitespace
+      $instr =~ s/\s*(.*\S)\s*/$1/;
+      push (@result, {'addr' => $addr, 'bytes' => $bytes, 'instr' => $instr});
+      print "addr=$addr bytes='$bytes' instr='$instr'\n" if $DEBUG;
+    } elsif (/^(0x[0-9a-f]*):\t$/) { # deal with gdb's line breaker
+      $got_addr = $1;
+    } elsif ($got_addr && /^    ([^\t]*)\t[^:]*:\t((0x[0-9a-f]{2}\s*)+)\s*$/) {
+      my ($addr, $bytes, $instr) = ($got_addr, $2, $1);
+      $bytes =~ s/0x//g;
+      $bytes =~ s/\s+/ /g;           # regularize whitespace
+      $bytes =~ s/\s*(.*\S)\s*/$1/;  # trim any remaining whitespace
+      $instr =~ s/\s*(.*\S)\s*/$1/;
+      push (@result, {'addr' => $addr, 'bytes' => $bytes, 'instr' => $instr});
+      print "addr=$addr bytes='$bytes' instr='$instr'\n" if $DEBUG;
+      undef $got_addr;
+    }
+  }
+  close INPUT;
+  return @result;
+}
+
+sub binary_diffs {
+  my ($objdump_file, $gdb_file) = @_;
+  my @file1 = parse_objdump_file ($objdump_file);
+  my @file2 = parse_gdb_file ($gdb_file);
+  my $lastrecord = ($#file1 >= $#file2) ? ($#file1) : ($#file2);
+  for (my $i = 0; $i <= $lastrecord; ++$i) {
+    my $d1 = $file1[$i];
+    my $d2 = $file2[$i];
+    if ($d1->{'bytes'} ne $d2->{'bytes'}) {
+      next if (($d1->{'instr'} eq $d2->{'instr'}) && $opt_d);
+      printf "0x%08x:\t%30s \t%s\n", 0+$d1->{'addr'}, $d1->{'bytes'}, $d1->{'instr'};
+      printf "0x%08x:\t%30s \t%s\n\n", 0+$d2->{'addr'}, $d2->{'bytes'}, $d2->{'instr'};
+    }
+  }
+}
+
+&getopts('d');
+$objdump_file = $ARGV[0];
+$gdb_file = $ARGV[1];
+binary_diffs ($objdump_file, $gdb_file);
+exit (0);
+__END__
+=pod
+
+=head1 NAME
+
+codegen-diff
+
+=head1 SYNOPSIS
+
+codegen-diff [-d] I<OBJDUMP-OUTPUT-FILE> I<GDB-DISASSEMBLY-FILE>
+
+=head1 DESCRIPTION
+
+B<codegen-diff> is a program that tries to show you the differences
+between the code that B<llc> generated and the code that B<lli> generated.
+
+The way you use it is as follows: first, you create I<OBJDUMP-OUTPUT-FILE>
+by running B<objdump> on the B<llc> compiled and linked binary. You need to
+trim down the result so it contains only the function of interest.
+
+Second, you create I<GDB-DISASSEMBLY-FILE> by running B<gdb>, with my patch
+to print out hex bytes in the B<disassemble> command output, on
+B<lli>.  Set a breakpoint in C<Emitter::finishFunction()> and wait until
+the function you want is compiled.  Then use the B<disassemble> command
+to print out the assembly dump of the function B<lli> just compiled.
+(Use C<lli -debug> to find out where the function starts and ends in memory.)
+It's easiest to save this output by using B<script>.
+
+Finally, you run B<codegen-diff>, as indicated in the Synopsis section of
+this manpage. It will print out a two-line stanza for each mismatched
+instruction, with the  B<llc> version first, and the  B<lli> version second.
+
+=head1 OPTIONS
+
+=over 4
+
+=item -d
+
+Don't show instructions where the bytes are different but they
+disassemble to the same thing. This puts a lot of trust in the
+disassembler, but it might help you highlight the more egregious cases
+of misassembly.
+
+=back
+
+=head1 AUTHOR
+
+B<codegen-diff> was written by Brian Gaeke.
+
+=head1 SEE ALSO
+
+L<gdb(1)>, L<objdump(1)>, L<script(1)>.
+
+You will need my B<gdb> patch:
+
+  http://llvm.cs.uiuc.edu/~gaeke/gdb-disassembly-print-bytes.patch
+
+=cut
diff --git a/final/utils/count/CMakeLists.txt b/final/utils/count/CMakeLists.txt
new file mode 100644
index 00000000000..4e0d371334e
--- /dev/null
+++ b/final/utils/count/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_utility(count
+  count.c
+  )
diff --git a/final/utils/count/Makefile b/final/utils/count/Makefile
new file mode 100644
index 00000000000..8de076a8803
--- /dev/null
+++ b/final/utils/count/Makefile
@@ -0,0 +1,20 @@
+##===- utils/count/Makefile --------------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = count
+USEDLIBS = 
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Don't install this utility
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/utils/count/count.c b/final/utils/count/count.c
new file mode 100644
index 00000000000..ae96791ae49
--- /dev/null
+++ b/final/utils/count/count.c
@@ -0,0 +1,50 @@
+/*===- count.c - The 'count' testing tool ---------------------------------===*\
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is distributed under the University of Illinois Open Source
+ * License. See LICENSE.TXT for details.
+ *
+\*===----------------------------------------------------------------------===*/
+
+#include <stdlib.h>
+#include <stdio.h>
+
+int main(int argc, char **argv) {
+  unsigned Count, NumLines, NumRead;
+  char Buffer[4096], *End;
+
+  if (argc != 2) {
+    fprintf(stderr, "usage: %s <expected line count>\n", argv[0]);
+    return 2;
+  }
+
+  Count = strtol(argv[1], &End, 10);
+  if (*End != '\0' && End != argv[1]) {
+    fprintf(stderr, "%s: invalid count argument '%s'\n", argv[0], argv[1]);
+    return 2;
+  }
+
+  NumLines = 0;
+  do {
+    unsigned i;
+
+    NumRead = fread(Buffer, 1, sizeof(Buffer), stdin);
+
+    for (i = 0; i != NumRead; ++i)
+      if (Buffer[i] == '\n')
+        ++NumLines;
+  } while (NumRead == sizeof(Buffer));
+    
+  if (!feof(stdin)) {
+    fprintf(stderr, "%s: error reading stdin\n", argv[0]);
+    return 3;
+  }
+
+  if (Count != NumLines) {
+    fprintf(stderr, "Expected %d lines, got %d.\n", Count, NumLines);
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/final/utils/countloc.sh b/final/utils/countloc.sh
new file mode 100755
index 00000000000..4d1b775d74a
--- /dev/null
+++ b/final/utils/countloc.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+##===- utils/countloc.sh - Counts Lines Of Code --------------*- Script -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+#
+# This script finds all the source code files in the source code directories
+# (excluding certain things), runs "wc -l" on them to get the number of lines in
+# each file and then sums up and prints the total with awk. 
+#
+# The script takes one optional option, -topdir, which specifies the top llvm
+# source directory. If it is not specified then the llvm-config tool is 
+# consulted to find top source dir.  
+#
+# Note that the implementation is based on llvmdo. See that script for more
+# details.
+##===----------------------------------------------------------------------===##
+
+if test $# -gt 1 ; then
+  if test "$1" = "-topdir" ; then
+    TOPDIR="$2"
+    shift; shift;
+  else
+    TOPDIR=`llvm-config --src-root`
+  fi
+fi
+
+if test -d "$TOPDIR" ; then
+  cd $TOPDIR
+  ./utils/llvmdo -topdir "$TOPDIR" -dirs "include lib tools test utils examples" -code-only wc -l | awk '\
+      BEGIN { loc=0; } \
+      { loc += $1; } \
+      END { print loc; }'
+else
+  echo "Can't find LLVM top directory"
+fi
diff --git a/final/utils/crosstool/ARM/README b/final/utils/crosstool/ARM/README
new file mode 100644
index 00000000000..ba58583b8bb
--- /dev/null
+++ b/final/utils/crosstool/ARM/README
@@ -0,0 +1,37 @@
+HOWTO create an LLVM crosstool from x86_64/Linux to ARM/Linux
+=============================================================
+
+1. % llvm/utils/crosstool/create-snapshots.sh
+
+   This will create llvm-[REV_L].tar.bz2 and llvm-gcc-4.2-[REV_G].tar.bz2,
+   where:
+     REV_L is the revision at which "llvm" was checked out, and
+     REV_G is the revision at which "llvm-gcc-4.2" was checked out
+
+   Note that REV_L might REV_G might not be the same revision.
+
+2. Download CodeSourcery toolchain.  The exact location depends on your
+   $CROSS_TARGET but the script will tell you what the location of the file is
+   if you run it without having the file available.
+
+   For example, if you're using $CROSS_TARGET == "arm-none-linux-gnueabi" then
+   you need to download:
+
+   http://www.codesourcery.com/sgpp/lite/arm/portal/package1787/public/arm-none-linux-gnueabi/arm-2007q3-51-arm-none-linux-gnueabi-i686-pc-linux-gnu.tar.bz2
+
+   NOTE: simply changing $CROSS_TARGET and modifying the URL accordingly will
+   not work -- you'll need to go to http://www.codesourcery.com and find the
+   correct file, as the release number in the file will also be different (e.g.,
+   in the file above, the release number is "51").
+
+3. You can override most values in the script without modifying it, e.g.
+   $INSTALL_ROOT (if you want to install in directory other than /usr/local).
+
+   Run the script as:
+
+   % env INSTALL_ROOT=[dir to install in] \
+         CODE_SOURCERY_PKG_PATH=[dir where you downloaded CodeSourcery tarball] \
+         LLVM_PKG_PATH=[dir where you stored your LLVM and LLVM-GCC snapshots] \
+         LLVM_SVN_REV=${REV_L} \
+         LLVMGCC_SVN_REV=${REV_G} \
+         build-install-linux.sh
diff --git a/final/utils/crosstool/ARM/build-install-linux.sh b/final/utils/crosstool/ARM/build-install-linux.sh
new file mode 100755
index 00000000000..f3f8df96630
--- /dev/null
+++ b/final/utils/crosstool/ARM/build-install-linux.sh
@@ -0,0 +1,200 @@
+#!/bin/bash
+#
+# Compiles and installs a Linux/x86_64 -> Linux/ARM crosstool based on LLVM and
+# LLVM-GCC-4.2 using SVN snapshots in provided tarballs.
+
+set -o nounset
+set -o errexit
+
+echo -n "Welcome to LLVM Linux/X86_64 -> Linux/ARM crosstool "
+echo "builder/installer; some steps will require sudo privileges."
+
+readonly INSTALL_ROOT="${INSTALL_ROOT:-/usr/local/crosstool}"
+# Both $USER and root *must* have read/write access to this dir.
+readonly SCRATCH_ROOT=$(mktemp -d "${TMPDIR:-/tmp}/llvm-project.XXXXXX")
+readonly SRC_ROOT="${SCRATCH_ROOT}/src"
+readonly OBJ_ROOT="${SCRATCH_ROOT}/obj"
+
+readonly CROSS_HOST="x86_64-unknown-linux-gnu"
+readonly CROSS_TARGET="arm-none-linux-gnueabi"
+readonly CROSS_MARCH="${CROSS_MARCH:-armv6}"
+
+readonly CODE_SOURCERY="${INSTALL_ROOT}/codesourcery"
+readonly CODE_SOURCERY_PKG_PATH="${CODE_SOURCERY_PKG_PATH:-${HOME}/codesourcery}"
+readonly CODE_SOURCERY_HTTP="http://www.codesourcery.com/sgpp/lite/arm/portal/package1787/public"
+readonly CODE_SOURCERY_PKG="arm-2007q3-51-arm-none-linux-gnueabi-i686-pc-linux-gnu.tar.bz2"
+readonly CODE_SOURCERY_ROOT="${CODE_SOURCERY}/arm-2007q3"
+readonly CODE_SOURCERY_BIN="${CODE_SOURCERY_ROOT}/bin"
+# Make sure ${CROSS_TARGET}-* binutils are in command path
+export PATH="${CODE_SOURCERY_BIN}:${PATH}"
+
+readonly CROSS_TARGET_AS="${CODE_SOURCERY_BIN}/${CROSS_TARGET}-as"
+readonly CROSS_TARGET_LD="${CODE_SOURCERY_BIN}/${CROSS_TARGET}-ld"
+
+readonly SYSROOT="${CODE_SOURCERY_ROOT}/${CROSS_TARGET}/libc"
+
+readonly LLVM_PKG_PATH="${LLVM_PKG_PATH:-${HOME}/llvm-project/snapshots}"
+
+# Latest SVN revisions known to be working in this configuration.
+readonly LLVM_DEFAULT_REV="74530"
+readonly LLVMGCC_DEFAULT_REV="74535"
+
+readonly LLVM_PKG="llvm-${LLVM_SVN_REV:-${LLVM_DEFAULT_REV}}.tar.bz2"
+readonly LLVM_SRC_DIR="${SRC_ROOT}/llvm"
+readonly LLVM_OBJ_DIR="${OBJ_ROOT}/llvm"
+readonly LLVM_INSTALL_DIR="${INSTALL_ROOT}/${CROSS_TARGET}/llvm"
+
+readonly LLVMGCC_PKG="llvm-gcc-4.2-${LLVMGCC_SVN_REV:-${LLVMGCC_DEFAULT_REV}}.tar.bz2"
+readonly LLVMGCC_SRC_DIR="${SRC_ROOT}/llvm-gcc-4.2"
+readonly LLVMGCC_OBJ_DIR="${OBJ_ROOT}/llvm-gcc-4.2"
+readonly LLVMGCC_INSTALL_DIR="${INSTALL_ROOT}/${CROSS_TARGET}/llvm-gcc-4.2"
+
+readonly MAKE_OPTS="${MAKE_OPTS:--j2}"
+
+# Params:
+#   $1: directory to be created
+#   $2: optional mkdir command prefix, e.g. "sudo"
+createDir() {
+  if [[ ! -e $1 ]]; then
+    ${2:-} mkdir -p $1
+  elif [[ -e $1 && ! -d $1 ]]; then
+    echo "$1 exists but is not a directory; exiting."
+    exit 3
+  fi
+}
+
+sudoCreateDir() {
+  createDir $1 sudo
+  sudo chown ${USER} $1
+}
+
+# Prints out and runs the command, but without logging -- intended for use with
+# lightweight commands that don't have useful output to parse, e.g. mkdir, tar,
+# etc.
+runCommand() {
+  local message="$1"
+  shift
+  echo "=> $message"
+  echo "==> Running: $*"
+  $*
+}
+
+runAndLog() {
+  local message="$1"
+  local log_file="$2"
+  shift 2
+  echo "=> $message; log in $log_file"
+  echo "==> Running: $*"
+  # Pop-up a terminal with the output of the current command?
+  # e.g.: xterm -e /bin/bash -c "$* >| tee $log_file"
+  $* &> $log_file
+  if [[ $? != 0 ]]; then
+    echo "Error occurred: see most recent log file for details."
+    exit
+  fi
+}
+
+installCodeSourcery() {
+  # Unpack the tarball, creating the CodeSourcery dir, if necessary.
+  if [[ ! -d ${CODE_SOURCERY_ROOT} ]]; then
+    sudoCreateDir ${CODE_SOURCERY}
+    cd ${CODE_SOURCERY}
+    if [[ -e ${CODE_SOURCERY_PKG_PATH}/${CODE_SOURCERY_PKG} ]]; then
+      runCommand "Unpacking CodeSourcery in ${CODE_SOURCERY}" \
+          tar jxf ${CODE_SOURCERY_PKG_PATH}/${CODE_SOURCERY_PKG}
+    else
+      echo -n "CodeSourcery tarball not found in "
+      echo "${CODE_SOURCERY_PKG_PATH}/${CODE_SOURCERY_PKG}"
+      echo -n "Fix the path or download it from "
+      echo "${CODE_SOURCERY_HTTP}/${CROSS_TARGET}/${CODE_SOURCERY_PKG}"
+      exit
+    fi
+  else
+    echo "CodeSourcery install dir already exists; skipping."
+  fi
+
+  # Verify our CodeSourcery toolchain installation.
+  if [[ ! -d "${SYSROOT}" ]]; then
+    echo -n "Error: CodeSourcery does not contain libc for ${CROSS_TARGET}: "
+    echo "${SYSROOT} not found."
+    exit
+  fi
+
+  for tool in ${CROSS_TARGET_AS} ${CROSS_TARGET_LD}; do
+    if [[ ! -e $tool ]]; then
+      echo "${tool} not found; exiting."
+      exit
+    fi
+  done
+}
+
+installLLVM() {
+  if [[ -d ${LLVM_INSTALL_DIR} ]]; then
+    echo "LLVM install dir ${LLVM_INSTALL_DIR} exists; skipping."
+    return
+  fi
+
+  sudoCreateDir ${LLVM_INSTALL_DIR}
+
+  # Unpack LLVM tarball; should create the directory "llvm".
+  cd ${SRC_ROOT}
+  runCommand "Unpacking LLVM" tar jxf ${LLVM_PKG_PATH}/${LLVM_PKG}
+
+  # Configure, build, and install LLVM.
+  createDir ${LLVM_OBJ_DIR}
+  cd ${LLVM_OBJ_DIR}
+  runAndLog "Configuring LLVM" ${LLVM_OBJ_DIR}/llvm-configure.log \
+      ${LLVM_SRC_DIR}/configure \
+      --disable-jit \
+      --enable-optimized \
+      --prefix=${LLVM_INSTALL_DIR} \
+      --target=${CROSS_TARGET} \
+      --with-llvmgccdir=${LLVMGCC_INSTALL_DIR}
+  runAndLog "Building LLVM" ${LLVM_OBJ_DIR}/llvm-build.log \
+      make ${MAKE_OPTS}
+  runAndLog "Installing LLVM" ${LLVM_OBJ_DIR}/llvm-install.log \
+      make ${MAKE_OPTS} install
+}
+
+installLLVMGCC() {
+  if [[ -d ${LLVMGCC_INSTALL_DIR} ]]; then
+    echo "LLVM-GCC install dir ${LLVMGCC_INSTALL_DIR} exists; skipping."
+    return
+  fi
+
+  sudoCreateDir ${LLVMGCC_INSTALL_DIR}
+
+  # Unpack LLVM-GCC tarball; should create the directory "llvm-gcc-4.2".
+  cd ${SRC_ROOT}
+  runCommand "Unpacking LLVM-GCC" tar jxf ${LLVM_PKG_PATH}/${LLVMGCC_PKG}
+
+  # Configure, build, and install LLVM-GCC.
+  createDir ${LLVMGCC_OBJ_DIR}
+  cd ${LLVMGCC_OBJ_DIR}
+  runAndLog "Configuring LLVM-GCC" ${LLVMGCC_OBJ_DIR}/llvmgcc-configure.log \
+      ${LLVMGCC_SRC_DIR}/configure \
+      --enable-languages=c,c++ \
+      --enable-llvm=${LLVM_INSTALL_DIR} \
+      --prefix=${LLVMGCC_INSTALL_DIR} \
+      --program-prefix=llvm- \
+      --target=${CROSS_TARGET} \
+      --with-arch=${CROSS_MARCH} \
+      --with-as=${CROSS_TARGET_AS} \
+      --with-ld=${CROSS_TARGET_LD} \
+      --with-sysroot=${SYSROOT}
+  runAndLog "Building LLVM-GCC" ${LLVMGCC_OBJ_DIR}/llvmgcc-build.log \
+      make
+  runAndLog "Installing LLVM-GCC" ${LLVMGCC_OBJ_DIR}/llvmgcc-install.log \
+      make install
+}
+
+echo "Building in ${SCRATCH_ROOT}; installing in ${INSTALL_ROOT}"
+
+createDir ${SRC_ROOT}
+createDir ${OBJ_ROOT}
+
+installCodeSourcery
+installLLVM
+installLLVMGCC
+
+echo "Done."
diff --git a/final/utils/crosstool/create-snapshots.sh b/final/utils/crosstool/create-snapshots.sh
new file mode 100755
index 00000000000..d70232a3ce4
--- /dev/null
+++ b/final/utils/crosstool/create-snapshots.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# Creates LLVM SVN snapshots: llvm-$REV.tar.bz2 and llvm-gcc-4.2-$REV.tar.bz2,
+# where $REV is an SVN revision of LLVM.  This is used for creating stable
+# tarballs which can be used to build known-to-work crosstools.
+#
+# Syntax:
+#   $0 [REV] -- grabs the revision $REV from SVN; if not specified, grabs the
+#   latest SVN revision.
+
+set -o nounset
+set -o errexit
+
+readonly LLVM_PROJECT_SVN="http://llvm.org/svn/llvm-project"
+
+getLatestRevisionFromSVN() {
+  svn info ${LLVM_PROJECT_SVN} | egrep ^Revision | sed 's/^Revision: //'
+}
+
+readonly REV="${1:-$(getLatestRevisionFromSVN)}"
+
+createTarballFromSVN() {
+  local module=$1
+  local log="${module}.log"
+  echo "Running: svn export -r ${REV} ${module}; log in ${log}"
+  svn -q export -r ${REV} ${LLVM_PROJECT_SVN}/${module}/trunk \
+      ${module} > ${log} 2>&1
+
+  # Create "module-revision.tar.bz2" packages from the SVN checkout dirs.
+  local tarball="${module}-${REV}.tar.bz2"
+  echo "Creating tarball: ${tarball}"
+  tar cjf ${tarball} ${module}
+
+  echo "Cleaning up '${module}'"
+  rm -rf ${module} ${log}
+}
+
+for module in "llvm" "llvm-gcc-4.2"; do
+  createTarballFromSVN ${module}
+done
+
diff --git a/final/utils/emacs/README b/final/utils/emacs/README
new file mode 100644
index 00000000000..e83eeae4b07
--- /dev/null
+++ b/final/utils/emacs/README
@@ -0,0 +1,27 @@
+-*- llvm/utils/emacs/README -*-
+
+These are syntax highlighting files for the Emacs and XEmacs editors. Included
+are:
+
+* llvm-mode.el
+
+  Syntax highlighting mode for LLVM assembly files. To use, add this code to
+  your ~/.emacs :
+
+  (setq load-path
+    (cons (expand-file-name "path-to-llvm/utils/emacs") load-path))
+  (require 'llvm-mode)
+  
+* tablegen-mode.el
+
+  Syntax highlighting mode for TableGen description files. To use, add this code
+  to your ~/.emacs:
+
+  (setq load-path
+    (cons (expand-file-name "path-to-llvm/utils/emacs") load-path))
+  (require 'tablegen-mode)
+
+
+Note: If you notice missing or incorrect syntax highlighting, please contact
+<llvmbugs [at] cs.uiuc.edu>; if you wish to provide a patch to improve the
+functionality, it will be most appreciated. Thank you.
diff --git a/final/utils/emacs/emacs.el b/final/utils/emacs/emacs.el
new file mode 100644
index 00000000000..969f538c817
--- /dev/null
+++ b/final/utils/emacs/emacs.el
@@ -0,0 +1,39 @@
+;; LLVM coding style guidelines in emacs
+;; Maintainer: LLVM Team, http://llvm.org/
+;; Modified:   2009-07-28
+
+;; Max 80 cols per line, indent by two spaces, no tabs.
+;; Apparently, this does not affect tabs in Makefiles.
+(custom-set-variables
+  '(fill-column 80)
+  '(c++-indent-level 2)
+  '(c-basic-offset 2)
+  '(indent-tabs-mode nil))
+
+
+;; Alternative to setting the global style.  Only files with "llvm" in
+;; their names will automatically set to the llvm.org coding style.
+(c-add-style "llvm.org"
+             '((fill-column . 80)
+	       (c++-indent-level . 2)
+	       (c-basic-offset . 2)
+	       (indent-tabs-mode . nil)
+               (c-offsets-alist . ((innamespace 0)))))
+
+(add-hook 'c-mode-hook
+	  (function
+	   (lambda nil 
+	     (if (string-match "llvm" buffer-file-name)
+		 (progn
+		   (c-set-style "llvm.org")
+		   )
+	       ))))
+
+(add-hook 'c++-mode-hook
+	  (function
+	   (lambda nil 
+	     (if (string-match "llvm" buffer-file-name)
+		 (progn
+		   (c-set-style "llvm.org")
+		   )
+	       ))))
diff --git a/final/utils/emacs/llvm-mode.el b/final/utils/emacs/llvm-mode.el
new file mode 100644
index 00000000000..3780624b5a4
--- /dev/null
+++ b/final/utils/emacs/llvm-mode.el
@@ -0,0 +1,133 @@
+;; Maintainer:  The LLVM team, http://llvm.org/
+;; Description: Major mode for the LLVM assembler language.
+;; Updated:     2007-09-19
+
+;; Create mode-specific tables.
+(defvar llvm-mode-syntax-table nil
+  "Syntax table used while in LLVM mode.")
+
+(defvar llvm-font-lock-keywords
+  (list
+   ;; Comments
+   '(";.*" . font-lock-comment-face)
+   ;; Variables
+   '("%[-a-zA-Z$\._][-a-zA-Z$\._0-9]*" . font-lock-variable-name-face)
+   ;; Labels
+   '("[-a-zA-Z$\._0-9]+:" . font-lock-variable-name-face)
+   ;; Strings
+   '("\"[^\"]+\"" . font-lock-string-face)
+   ;; Unnamed variable slots
+   '("%[-]?[0-9]+" . font-lock-variable-name-face)
+   ;; Types
+   `(,(regexp-opt '("void" "i[0-9]+" "float" "double" "type" "label" "opaque") 'words) . font-lock-type-face)
+   ;; Integer literals
+   '("\\b[-]?[0-9]+\\b" . font-lock-preprocessor-face)
+   ;; Floating point constants
+   '("\\b[-+]?[0-9]+\.[0-9]*\([eE][-+]?[0-9]+\)?\\b" . font-lock-preprocessor-face)
+   ;; Hex constants
+   '("\\b0x[0-9A-Fa-f]+\\b" . font-lock-preprocessor-face)
+   ;; Keywords
+   `(,(regexp-opt '("begin" "end" "true" "false" "zeroinitializer" "declare"
+                    "define" "global" "constant" "const" "internal" "linkonce" "linkonce_odr"
+                    "weak" "weak_odr" "appending" "uninitialized" "implementation" "..."
+                    "null" "undef" "to" "except" "not" "target" "endian" "little" "big"
+                    "pointersize" "deplibs" "volatile" "fastcc" "coldcc" "cc") 'words) . font-lock-keyword-face)
+   ;; Arithmetic and Logical Operators
+   `(,(regexp-opt '("add" "sub" "mul" "div" "rem" "and" "or" "xor"
+                    "setne" "seteq" "setlt" "setgt" "setle" "setge") 'words) . font-lock-keyword-face)
+   ;; Special instructions
+   `(,(regexp-opt '("phi" "tail" "call" "cast" "select" "to" "shl" "shr" "vaarg" "vanext") 'words) . font-lock-keyword-face)
+   ;; Control instructions
+   `(,(regexp-opt '("ret" "br" "switch" "invoke" "unwind" "unreachable") 'words) . font-lock-keyword-face)
+   ;; Memory operators
+   `(,(regexp-opt '("malloc" "alloca" "free" "load" "store" "getelementptr") 'words) . font-lock-keyword-face)
+   )
+  "Syntax highlighting for LLVM"
+  )
+
+;; ---------------------- Syntax table ---------------------------
+;; Shamelessly ripped from jasmin.el
+;; URL: http://www.neilvandyke.org/jasmin-emacs/jasmin.el.html
+
+(if (not llvm-mode-syntax-table)
+    (progn
+      (setq llvm-mode-syntax-table (make-syntax-table))
+      (mapcar (function (lambda (n)
+                          (modify-syntax-entry (aref n 0)
+                                               (aref n 1)
+                                               llvm-mode-syntax-table)))
+              '(
+                ;; whitespace (` ')
+                [?\^m " "]
+                [?\f  " "]
+                [?\n  " "]
+                [?\t  " "]
+                [?\   " "]
+                ;; word constituents (`w')
+                ;;[?<  "w"]
+                ;;[?>  "w"]
+                [?\%  "w"]
+                ;;[?_  "w  "]
+                ;; comments
+                [?\;  "< "]
+                [?\n  "> "]
+                ;;[?\r  "> "]
+                ;;[?\^m "> "]
+                ;; symbol constituents (`_')
+                ;; punctuation (`.')
+                ;; open paren (`(')
+                [?\( "("]
+                [?\[ "("]
+                [?\{ "("]
+                ;; close paren (`)')
+                [?\) ")"]
+                [?\] ")"]
+                [?\} ")"]
+                ;; string quote ('"')
+                [?\" "\""]
+                ))))
+
+;; --------------------- Abbrev table -----------------------------
+
+(defvar llvm-mode-abbrev-table nil
+  "Abbrev table used while in LLVM mode.")
+(define-abbrev-table 'llvm-mode-abbrev-table ())
+
+(defvar llvm-mode-hook nil)
+(defvar llvm-mode-map nil)   ; Create a mode-specific keymap.
+
+(if (not llvm-mode-map)
+    ()  ; Do not change the keymap if it is already set up.
+  (setq llvm-mode-map (make-sparse-keymap))
+  (define-key llvm-mode-map "\t" 'tab-to-tab-stop)
+  (define-key llvm-mode-map "\es" 'center-line)
+  (define-key llvm-mode-map "\eS" 'center-paragraph))
+
+
+(defun llvm-mode ()
+  "Major mode for editing LLVM source files.
+  \\{llvm-mode-map}
+  Runs llvm-mode-hook on startup."
+  (interactive)
+  (kill-all-local-variables)
+  (use-local-map llvm-mode-map)         ; Provides the local keymap.
+  (setq major-mode 'llvm-mode)          
+
+  (make-local-variable 'font-lock-defaults)
+  (setq major-mode 'llvm-mode           ; This is how describe-mode
+                                        ;   finds the doc string to print.
+  mode-name "LLVM"                      ; This name goes into the modeline.
+  font-lock-defaults `(llvm-font-lock-keywords))
+
+  (setq local-abbrev-table llvm-mode-abbrev-table)
+  (set-syntax-table llvm-mode-syntax-table)
+  (setq comment-start ";")
+  (run-hooks 'llvm-mode-hook))          ; Finally, this permits the user to
+                                        ;   customize the mode with a hook.
+
+;; Associate .ll files with llvm-mode
+(setq auto-mode-alist
+   (append '(("\\.ll$" . llvm-mode)) auto-mode-alist))
+
+(provide 'llvm-mode)
+;; end of llvm-mode.el
diff --git a/final/utils/emacs/tablegen-mode.el b/final/utils/emacs/tablegen-mode.el
new file mode 100644
index 00000000000..3853ce66a28
--- /dev/null
+++ b/final/utils/emacs/tablegen-mode.el
@@ -0,0 +1,122 @@
+;; Maintainer:  The LLVM team, http://llvm.org/
+;; Description: Major mode for TableGen description files (part of LLVM project)
+;; Updated:     2007-12-18
+
+(require 'comint)
+(require 'custom)
+(require 'ansi-color)
+
+;; Create mode-specific tables.
+(defvar td-decorators-face 'td-decorators-face
+  "Face method decorators.")
+(make-face 'td-decorators-face)
+
+(defvar tablegen-font-lock-keywords
+  (let ((kw (regexp-opt '("class" "defm" "def" "field" "include" "in"
+                         "let" "multiclass")
+                        'words))
+        (type-kw (regexp-opt '("bit" "bits" "code" "dag" "int" "list" "string")
+                             'words))
+        )
+    (list
+     ;; Comments
+;;     '("\/\/" . font-lock-comment-face)
+     ;; Strings
+     '("\"[^\"]+\"" . font-lock-string-face)
+     ;; Hex constants
+     '("\\<0x[0-9A-Fa-f]+\\>" . font-lock-preprocessor-face)
+     ;; Binary constants
+     '("\\<0b[01]+\\>" . font-lock-preprocessor-face)
+     ;; Integer literals
+     '("\\<[-]?[0-9]+\\>" . font-lock-preprocessor-face)
+     ;; Floating point constants
+     '("\\<[-+]?[0-9]+\.[0-9]*\([eE][-+]?[0-9]+\)?\\>" . font-lock-preprocessor-face)
+
+     '("^[ \t]*\\(@.+\\)" 1 'td-decorators-face)
+     ;; Keywords
+     (cons (concat kw "[ \n\t(]") 1)
+
+     ;; Type keywords
+     (cons (concat type-kw "[ \n\t(]") 1)
+     ))
+  "Additional expressions to highlight in TableGen mode.")
+(put 'tablegen-mode 'font-lock-defaults '(tablegen-font-lock-keywords))
+
+;; ---------------------- Syntax table ---------------------------
+;; Shamelessly ripped from jasmin.el
+;; URL: http://www.neilvandyke.org/jasmin-emacs/jasmin.el
+
+(defvar tablegen-mode-syntax-table nil
+  "Syntax table used in `tablegen-mode' buffers.")
+(when (not tablegen-mode-syntax-table)
+  (setq tablegen-mode-syntax-table (make-syntax-table))
+  ;; whitespace (` ')
+  (modify-syntax-entry ?\   " "      tablegen-mode-syntax-table)
+  (modify-syntax-entry ?\t  " "      tablegen-mode-syntax-table)
+  (modify-syntax-entry ?\r  " "      tablegen-mode-syntax-table)
+  (modify-syntax-entry ?\n  " "      tablegen-mode-syntax-table)
+  (modify-syntax-entry ?\f  " "      tablegen-mode-syntax-table)
+  ;; word constituents (`w')
+  (modify-syntax-entry ?\%  "w"      tablegen-mode-syntax-table)
+  (modify-syntax-entry ?\_  "w"      tablegen-mode-syntax-table)
+  ;; comments
+  (modify-syntax-entry ?/   ". 124b" tablegen-mode-syntax-table)
+  (modify-syntax-entry ?*   ". 23"   tablegen-mode-syntax-table)
+  (modify-syntax-entry ?\n  "> b"    tablegen-mode-syntax-table)
+  ;; open paren (`(')
+  (modify-syntax-entry ?\(  "("      tablegen-mode-syntax-table)
+  (modify-syntax-entry ?\[  "("      tablegen-mode-syntax-table)
+  (modify-syntax-entry ?\{  "("      tablegen-mode-syntax-table)
+  (modify-syntax-entry ?\<  "("      tablegen-mode-syntax-table)
+  ;; close paren (`)')
+  (modify-syntax-entry ?\)  ")"      tablegen-mode-syntax-table)
+  (modify-syntax-entry ?\]  ")"      tablegen-mode-syntax-table)
+  (modify-syntax-entry ?\}  ")"      tablegen-mode-syntax-table)
+  (modify-syntax-entry ?\>  ")"      tablegen-mode-syntax-table)
+  ;; string quote ('"')
+  (modify-syntax-entry ?\"  "\""     tablegen-mode-syntax-table)
+  )
+
+;; --------------------- Abbrev table -----------------------------
+
+(defvar tablegen-mode-abbrev-table nil
+  "Abbrev table used while in TableGen mode.")
+(define-abbrev-table 'tablegen-mode-abbrev-table ())
+
+(defvar tablegen-mode-hook nil)
+(defvar tablegen-mode-map nil)   ; Create a mode-specific keymap.
+
+(if (not tablegen-mode-map)
+    ()  ; Do not change the keymap if it is already set up.
+  (setq tablegen-mode-map (make-sparse-keymap))
+  (define-key tablegen-mode-map "\t"  'tab-to-tab-stop)
+  (define-key tablegen-mode-map "\es" 'center-line)
+  (define-key tablegen-mode-map "\eS" 'center-paragraph))
+
+(defun tablegen-mode ()
+  "Major mode for editing TableGen description files.
+  \\{tablegen-mode-map}
+  Runs tablegen-mode-hook on startup."
+  (interactive)
+  (kill-all-local-variables)
+  (use-local-map tablegen-mode-map)      ; Provides the local keymap.
+  (make-local-variable 'font-lock-defaults)
+  (setq major-mode 'tablegen-mode        ; This is how describe-mode
+                                         ;   finds the doc string to print.
+	mode-name             "TableGen" ; This name goes into the modeline.
+        local-abbrev-table    tablegen-mode-abbrev-table
+	font-lock-defaults    `(tablegen-font-lock-keywords)
+	require-final-newline t
+        )
+
+  (set-syntax-table tablegen-mode-syntax-table)
+  (make-local-variable 'comment-start)
+  (setq comment-start "//")
+  (run-hooks 'tablegen-mode-hook))       ; Finally, this permits the user to
+                                         ;   customize the mode with a hook.
+
+;; Associate .td files with tablegen-mode
+(setq auto-mode-alist (append '(("\\.td$" . tablegen-mode)) auto-mode-alist))
+
+(provide 'tablegen-mode)
+;; end of tablegen-mode.el
diff --git a/final/utils/findmisopt b/final/utils/findmisopt
new file mode 100755
index 00000000000..f2a872c6dc3
--- /dev/null
+++ b/final/utils/findmisopt
@@ -0,0 +1,178 @@
+#!/bin/bash
+#
+#  findmisopt
+#
+#      This is a quick and dirty hack to potentially find a misoptimization
+#      problem. Mostly its to work around problems in bugpoint that prevent
+#      it from finding a problem unless the set of failing optimizations are
+#      known and given to it on the command line.
+#
+#      Given a bitcode file that produces correct output (or return code), 
+#      this script will run through all the optimizations passes that gccas
+#      uses (in the same order) and will narrow down which optimizations
+#      cause the program either generate different output or return a 
+#      different result code. When the passes have been narrowed down, 
+#      bugpoint is invoked to further refine the problem to its origin. If a
+#      release version of bugpoint is available it will be used, otherwise 
+#      debug.
+#
+#   Usage:
+#      findmisopt bcfile outdir progargs [match]
+#
+#   Where:
+#      bcfile 
+#          is the bitcode file input (the unoptimized working case)
+#      outdir
+#          is a directory into which intermediate results are placed
+#      progargs
+#          is a single argument containing all the arguments the program needs
+#      proginput
+#          is a file name from which stdin should be directed
+#      match
+#          if specified to any value causes the result code of the program to
+#          be used to determine success/fail. If not specified success/fail is
+#          determined by diffing the program's output with the non-optimized
+#          output.
+#       
+if [ "$#" -lt 3 ] ; then
+  echo "usage: findmisopt bcfile outdir progargs [match]"
+  exit 1
+fi
+
+dir="${0%%/utils/findmisopt}"
+if [ -x "$dir/Release/bin/bugpoint" ] ; then
+  bugpoint="$dir/Release/bin/bugpoint"
+elif [ -x "$dir/Debug/bin/bugpoint" ] ; then
+  bugpoint="$dir/Debug/bin/bugpoint"
+else
+  echo "findmisopt: bugpoint not found"
+  exit 1
+fi
+
+bcfile="$1"
+outdir="$2"
+args="$3"
+input="$4"
+if [ ! -f "$input" ] ; then
+  input="/dev/null"
+fi
+match="$5"
+name=`basename $bcfile .bc`
+ll="$outdir/${name}.ll"
+s="$outdir/${name}.s"
+prog="$outdir/${name}"
+out="$outdir/${name}.out"
+optbc="$outdir/${name}.opt.bc"
+optll="$outdir/${name}.opt.ll"
+opts="$outdir/${name}.opt.s"
+optprog="$outdir/${name}.opt"
+optout="$outdir/${name}.opt.out"
+ldflags="-lstdc++ -lm -ldl -lc"
+
+echo "Test Name: $name"
+echo "Unoptimized program: $prog"
+echo "  Optimized program: $optprog"
+
+# Define the list of optimizations to run. This comprises the same set of 
+# optimizations that opt -std-compile-opts and gccld run, in the same order.
+opt_switches=`llvm-as < /dev/null -o - | opt -std-compile-opts -disable-output -debug-pass=Arguments 2>&1 | sed 's/Pass Arguments: //'`
+ld_switches=`llvm-as < /dev/null -o - | llvm-ld - -debug-pass=Arguments 2>&1 | sed 's/Pass Arguments: //'`
+all_switches="$opt_switches $ld_switches"
+echo "Passes : $all_switches"
+
+# Create output directory if it doesn't exist
+if [ -f "$outdir" ] ; then
+  echo "$outdir is not a directory"
+  exit 1
+fi
+
+if [ ! -d "$outdir" ] ; then
+  mkdir "$outdir" || exit 1
+fi
+
+# Generate the disassembly
+llvm-dis "$bcfile" -o "$ll" -f || exit 1
+
+# Generate the non-optimized program and its output
+llc "$bcfile" -o "$s" -f || exit 1
+gcc "$s" -o "$prog" $ldflags || exit 1
+"$prog" $args > "$out" 2>&1 <$input
+ex1=$?
+
+# Current set of switches is empty
+function tryit {
+  switches_to_use="$1"
+  opt $switches_to_use "$bcfile" -o "$optbc" -f || exit
+  llvm-dis "$optbc" -o "$optll" -f || exit
+  llc "$optbc" -o "$opts" -f || exit
+  gcc "$opts" -o "$optprog" $ldflags || exit
+  "$optprog" $args > "$optout" 2>&1 <"$input"
+  ex2=$?
+
+  if [ -n "$match" ] ; then
+    if [ "$ex1" -ne "$ex2" ] ; then
+      echo "Return code not the same with these switches:"
+      echo $switches
+      echo "Unoptimized returned: $ex1"
+      echo "Optimized   returned: $ex2"
+      return 0
+    fi
+  else
+    diff "$out" "$optout" > /dev/null
+    if [ $? -ne 0 ] ; then
+      echo "Diff fails with these switches:"
+      echo $switches
+      echo "Differences:"
+      diff "$out" "$optout" | head
+      return 0;
+    fi
+  fi
+  return 1
+}
+
+echo "Trying to find optimization that breaks program:"
+for sw in $all_switches ; do
+  echo -n " $sw"
+  switches="$switches $sw"
+  if tryit "$switches" ; then
+    break;
+  fi
+done
+
+# Terminate the previous output with a newline
+echo ""
+
+# Determine if we're done because none of the optimizations broke the program
+if [ "$switches" == " $all_switches" ] ; then
+  echo "The program did not miscompile"
+  exit 0
+fi
+
+final=""
+while [ ! -z "$switches" ] ; do
+  trimmed=`echo "$switches" | sed -e 's/^ *\(-[^ ]*\).*/\1/'`
+  switches=`echo "$switches" | sed -e 's/^ *-[^ ]* *//'`
+  echo "Trimmed $trimmed from left"
+  tryit "$final $switches"
+  if [ "$?" -eq "0" ] ; then
+    echo "Still Failing .. continuing ..."
+    continue
+  else
+    echo "Found required early pass: $trimmed"
+    final="$final $trimmed"
+    continue
+  fi
+  echo "Next Loop"
+done
+
+if [ "$final" == " $all_switches" ] ; then
+  echo "findmisopt: All optimizations pass. Perhaps this isn't a misopt?"
+  exit 0
+fi
+echo "Smallest Optimization list=$final"
+
+bpcmd="$bugpoint -run-llc -disable-loop-extraction --output "$out" --input /dev/null $bcfile $final --args $args"
+
+echo "Running: $bpcmd"
+$bpcmd
+echo "findmisopt finished."
diff --git a/final/utils/findoptdiff b/final/utils/findoptdiff
new file mode 100755
index 00000000000..7a2eab05d71
--- /dev/null
+++ b/final/utils/findoptdiff
@@ -0,0 +1,101 @@
+#!/bin/bash
+#
+#  findoptdiff
+#
+#      This script helps find the optimization difference between two llvm
+#      builds. It is useful when you have a build that is known to work and
+#      one that exhibits an optimization problem. Identifying the difference
+#      between the two builds can lead to discovery of the source of a
+#      mis-optimization.
+#
+#      The script takes two llvm build paths as arguments. These specify the
+#      the two llvm builds to compare. It is generally expected that they
+#      are "close cousins".  That is, they are the same except that the 
+#      second build contains some experimental optimization features that
+#      are suspected of producing a misoptimization.
+#
+#      The script takes two bitcode files, one from each build. They are
+#      presumed to be a compilation of the same program or program fragment
+#      with the only difference being the builds.
+#
+#      The script operates by iteratively applying the optimizations that gccas
+#      and gccld run until there is a difference in the assembly resulting
+#      from the optimization. The difference is then reported with the set of
+#      optimization passes that produce the difference.  The processing 
+#      continues until all optimization passes have been tried. The differences
+#      for each pass, if they do differ, are placed in a diffs.# file.
+#
+#      To work around differences in the assembly language format, the script
+#      can also take two filter arguments that post-process the assembly 
+#      so they can be differenced without making false positives for known
+#      differences in the two builds. These filters are optional.
+#
+#   Usage:
+#      findoptdiff llvm1 llvm2 bc1 bc2 filter1 filter2
+#
+#   Where:
+#      llvm1
+#          is the path to the first llvm build dir
+#      llvm2
+#          is the path to the second llvm build dir
+#      bc1
+#          is the bitcode file for the first llvm environment
+#      bc2
+#          is the bitcode file for the second llvm environment
+#      filter1
+#          is an optional filter for filtering the llvm1 generated assembly
+#      filter2
+#          is an optional filter for filtering the llvm2 generated assembly
+#       
+llvm1=$1
+llvm2=$2
+bc1=$3
+bc2=$4
+filt1=$5
+filt2=$6
+if [ -z "$filt1" ] ; then
+  filt1="cat"
+fi
+if [ -z "$filt2" ] ; then
+  filt2="cat"
+fi
+opt1="${bc1}.opt"
+opt2="${bc2}.opt" 
+ll1="${bc1}.ll"
+ll2="${bc2}.ll"
+opt1ll="${bc1}.opt.ll"
+opt2ll="${bc2}.opt.ll"
+dis1="$llvm1/Debug/bin/llvm-dis"
+dis2="$llvm2/Debug/bin/llvm-dis"
+opt1="$llvm1/Debug/bin/opt"
+opt2="$llvm2/Debug/bin/opt"
+
+all_switches="-verify -lowersetjmp -simplifycfg -mem2reg -globalopt -globaldce -ipconstprop -deadargelim -instcombine -simplifycfg -prune-eh -inline -simplify-libcalls -argpromotion -tailduplicate -simplifycfg -scalarrepl -instcombine -predsimplify -condprop -tailcallelim -simplifycfg -reassociate -licm -loop-unswitch -instcombine -indvars -loop-unroll -instcombine -load-vn -gcse -sccp -instcombine -condprop -dse -dce -simplifycfg -deadtypeelim -constmerge -internalize -ipsccp -globalopt -constmerge -deadargelim -inline -prune-eh -globalopt -globaldce -argpromotion -instcombine -predsimplify -scalarrepl -globalsmodref-aa -licm -load-vn -gcse -dse -instcombine -simplifycfg -verify"
+
+#counter=0
+function tryit {
+  switches_to_use="$1"
+  $opt1 $switches_to_use "$bc1" -o - | $dis1 | $filt1 > "$opt1ll"
+  $opt2 $switches_to_use "$bc2" -o - | $dis2 | $filt2 > "$opt2ll"
+  diffs="diffs."$((counter++))
+  diff "$opt1ll" "$opt2ll" > $diffs
+  if [ $? -ne 0 ] ; then
+    echo
+    echo "Diff fails with these switches:"
+    echo $switches
+    echo "Differences:"
+    head $diffs
+    echo 'Switches:' $switches_to_use >> $diffs
+  else
+    rm $diffs
+  fi
+  return 1
+}
+
+for sw in $all_switches ; do
+  echo -n " $sw"
+  switches="$switches $sw"
+  if tryit "$switches" ; then
+    break;
+  fi
+done
diff --git a/final/utils/findsym.pl b/final/utils/findsym.pl
new file mode 100755
index 00000000000..92346572fe0
--- /dev/null
+++ b/final/utils/findsym.pl
@@ -0,0 +1,33 @@
+#!/usr/bin/perl -w
+#
+# Program:  findsym.pl
+#
+# Synopsis: Generate a list of the libraries in which a symbol is defined or
+#           referenced.
+#
+# Syntax:   findsym.pl <directory_with_libraries_in_it> <symbol>
+#
+
+# Give first option a name.
+my $Directory = $ARGV[0];
+my $Symbol = $ARGV[1];
+
+# Open the directory and read its contents, sorting by name and differentiating
+# by whether its a library (.a) or an object file (.o)
+opendir DIR,$Directory;
+my @files = readdir DIR;
+closedir DIR;
+@objects = grep(/l?i?b?LLVM.*\.[oa]$/,sort(@files));
+
+# Gather definitions from the libraries
+foreach $lib (@objects) {
+  my $head = 0;
+  open SYMS, 
+    "nm $Directory/$lib | grep '$Symbol' | sort --key=3 | uniq |";
+  while (<SYMS>) {
+    if (!$head) { print "$lib:\n"; $head = 1; }
+    chomp($_);
+    print "  $_\n";
+  }
+  close SYMS;
+}
diff --git a/final/utils/fpcmp/Makefile b/final/utils/fpcmp/Makefile
new file mode 100644
index 00000000000..81db3b9c3f6
--- /dev/null
+++ b/final/utils/fpcmp/Makefile
@@ -0,0 +1,16 @@
+##===- utils/fpcmp/Makefile --------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = fpcmp
+USEDLIBS = LLVMSupport.a
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/utils/fpcmp/fpcmp.cpp b/final/utils/fpcmp/fpcmp.cpp
new file mode 100644
index 00000000000..5f6b5e8d6e5
--- /dev/null
+++ b/final/utils/fpcmp/fpcmp.cpp
@@ -0,0 +1,43 @@
+//===- fpcmp.cpp - A fuzzy "cmp" that permits floating point noise --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// fpcmp is a tool that basically works like the 'cmp' tool, except that it can
+// tolerate errors due to floating point noise, with the -r and -a options.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+  cl::opt<std::string>
+  File1(cl::Positional, cl::desc("<input file #1>"), cl::Required);
+  cl::opt<std::string>
+  File2(cl::Positional, cl::desc("<input file #2>"), cl::Required);
+
+  cl::opt<double>
+  RelTolerance("r", cl::desc("Relative error tolerated"), cl::init(0));
+  cl::opt<double>
+  AbsTolerance("a", cl::desc("Absolute error tolerated"), cl::init(0));
+}
+
+int main(int argc, char **argv) {
+  cl::ParseCommandLineOptions(argc, argv);
+
+  std::string ErrorMsg;
+  int DF = DiffFilesWithTolerance(sys::PathWithStatus(File1), 
+                                  sys::PathWithStatus(File2),
+                                  AbsTolerance, RelTolerance, &ErrorMsg);
+  if (!ErrorMsg.empty())
+    errs() << argv[0] << ": " << ErrorMsg << "\n";
+  return DF;
+}
+
diff --git a/final/utils/getsrcs.sh b/final/utils/getsrcs.sh
new file mode 100755
index 00000000000..c8bff8cf833
--- /dev/null
+++ b/final/utils/getsrcs.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+##===- utils/getsrcs.sh - Counts Lines Of Code ---------------*- Script -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# details.
+# 
+##===----------------------------------------------------------------------===##
+#
+# This script just prints out the path names for all the source files in LLVM.
+# The optional -topdir option can be used to specify the top LLVM source 
+# directory. Without it, the llvm-config command is consulted to find the
+# top source directory.
+#
+# Note that the implementation is based on llvmdo. See that script for more
+# details.
+##===----------------------------------------------------------------------===##
+
+if test "$1" = "-topdir" ; then
+  TOPDIR="$2"
+  shift; shift;
+else
+  TOPDIR=`llvm-config --src-root`
+fi
+
+if test -d "$TOPDIR" ; then
+  cd $TOPDIR
+  ./utils/llvmdo -topdir "$TOPDIR" \
+    -dirs "include lib tools utils examples projects" echo
+else
+  echo "Can't find LLVM top directory"
+fi
diff --git a/final/utils/git/find-rev b/final/utils/git/find-rev
new file mode 100755
index 00000000000..a6161db1898
--- /dev/null
+++ b/final/utils/git/find-rev
@@ -0,0 +1,50 @@
+#!/usr/bin/python
+
+import os, sys, subprocess
+
+def main():
+    from optparse import OptionParser, OptionGroup
+    parser = OptionParser("usage: %prog [options] <repo> <revision>")
+    parser.add_option("", "--dump-section-data", dest="dumpSectionData",
+                      help="Dump the contents of sections",
+                      action="store_true", default=False)    
+    (opts, args) = parser.parse_args()
+
+    if len(args) != 2:
+        parser.error("invalid number of arguments")
+
+    repo,rev = args
+
+    try:
+        rev = int(rev)
+    except:
+        parser.error("invalid revision argument (not an integer)")
+
+    os.chdir(repo)
+    p = subprocess.Popen(['git', 'rev-list', 'git-svn', '--pretty'],
+                         stdout=subprocess.PIPE)
+
+    bestRev = bestCommit = None
+    lastCommit = None
+    for ln in p.stdout:
+        if ln.startswith('commit '):
+            lastCommit = ln.split(' ',2)[1]
+        elif ln.startswith('    git-svn-id: '):
+            _,repo,_ = ln.strip().split(' ')
+            _,lrev = repo.rsplit('@',1)
+            lrev = int(lrev)
+            if lrev<=rev:
+                if bestRev is None or lrev>bestRev:
+                    assert lastCommit
+                    bestCommit = lastCommit
+                    bestRev = lrev
+                    if lrev == rev:
+                        break
+    
+    if bestCommit is not None:
+        print bestCommit
+        sys.exit(0)
+    sys.exit(1)
+
+if __name__=='__main__':
+    main()
diff --git a/final/utils/importNLT.pl b/final/utils/importNLT.pl
new file mode 100644
index 00000000000..c1b950dc34d
--- /dev/null
+++ b/final/utils/importNLT.pl
@@ -0,0 +1,86 @@
+#!/usr/bin/perl
+#take the output of parseNLT.pl and load it into a database
+# use like: cat file |perl parseNLT.pl |perl importNLT.pl password
+
+use DBI;
+
+# database information
+$db="llvmalpha";
+$host="localhost";
+$userid="llvmdbuser";
+$passwd=shift @ARGV;
+$connectionInfo="dbi:mysql:$db;$host";
+
+# make connection to database
+$dbh = DBI->connect($connectionInfo,$userid,$passwd) or die DBI->errstr;
+my $sth = $dbh->prepare( q{
+      INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES (?, STR_TO_DATE(?, '\%d \%M \%Y'), ?, ?)
+  }) || die "Can't prepare statement: $DBI::errstr";;
+
+while($d = <>)
+{
+  chomp $d;
+  if (18 == scalar split " ", $d)
+    {
+      ($day, $mon, $year, $prog, $gccas, $bc, $llccompile, $llcbetacompile, $jitcompile,
+       $mc, $gcc, $cbe, $llc, $llcbeta, $jit, $foo1, $foo2, $foo3) = split " ", $d;
+      if ($gccas =~ /\d+/)
+        {
+          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
+                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'gccas', $gccas)") || die DBI->errstr;
+        }
+      if ($bc =~ /\d/)
+        {
+          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
+                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'bytecode', $bc)") || die DBI->errstr;
+        }
+      if ($llccompile =~ /\d/)
+        {
+          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
+                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'llc-compile', $llccompile)") || die DBI->errstr;
+        }
+      if ($llcbetacompile =~ /\d/)
+        {
+          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
+                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'llc-beta-compile', $llcbetacompile)") || die DBI->errstr;
+        }
+      if ($jitcompile =~ /\d/)
+        {
+          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
+                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'jit-compile', $jitcompile)") || die DBI->errstr;
+        }
+      if ($mc =~ /\d/)
+        {
+          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
+                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'machine-code', $mc)") || die DBI->errstr;
+        }
+      if ($gcc =~ /\d/)
+        {
+          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
+                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'gcc', $gcc)") || die DBI->errstr;
+        }
+      if ($llc =~ /\d/)
+        {
+          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
+                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'llc', $llc)") || die DBI->errstr;
+        }
+      if ($llcbeta =~ /\d/)
+        {
+          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
+                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'llc-beta', $llcbeta)") || die DBI->errstr;
+        }
+      if ($jit =~ /\d/)
+        {
+          $dbh->do("INSERT INTO Tests (NAME, RUN, TEST, VALUE) VALUES
+                ('$prog', STR_TO_DATE('$day $mon $year', '\%d \%M \%Y'), 'jit', $jit)") || die DBI->errstr;
+        }
+      print ".";
+    }
+  else
+    {
+      print "\nNO: $d\n";
+    }
+}
+print "\n";
+# disconnect from database
+$dbh->disconnect;
diff --git a/final/utils/jedit/README b/final/utils/jedit/README
new file mode 100644
index 00000000000..6a6c8c76cc3
--- /dev/null
+++ b/final/utils/jedit/README
@@ -0,0 +1,14 @@
+-*- llvm/utils/jedit/README -*-
+
+These are syntax highlighting files for the jEdit editor. Included are:
+
+* tablegen.xml
+
+  Syntax highlighting mode for TableGen description files. To use, copy this
+  file to ~/.jedit/modes/ and add this code to your ~/.jedit/modes/catalog:
+
+  <MODE NAME="tablegen" FILE="tablegen.xml" FILE_NAME_GLOB="*.td" />
+
+Note: If you notice missing or incorrect syntax highlighting, please contact
+<llvmbugs [at] cs.uiuc.edu>; if you wish to provide a patch to improve the
+functionality, it will be most appreciated. Thank you.
diff --git a/final/utils/jedit/tablegen.xml b/final/utils/jedit/tablegen.xml
new file mode 100644
index 00000000000..2d80a3f9732
--- /dev/null
+++ b/final/utils/jedit/tablegen.xml
@@ -0,0 +1,39 @@
+<?xml version="1.0"?>
+<!DOCTYPE MODE SYSTEM "xmode.dtd">
+<MODE>
+	<PROPS>
+		<PROPERTY NAME="lineComment" VALUE="//" />
+		<PROPERTY NAME="commentStart" VALUE="/*" />
+		<PROPERTY NAME="commentEnd" VALUE="*/" />
+		<PROPERTY NAME="indentOpenBrackets" VALUE="{" />
+		<PROPERTY NAME="indentCloseBrackets" VALUE="}" />
+		<PROPERTY NAME="wordBreakChars" VALUE=",+-=&lt;&gt;/?^&amp;*" />
+		<PROPERTY NAME="unalignedOpenBrackets" VALUE="(&lt;" />
+		<PROPERTY NAME="unalignedCloseBrackets" VALUE=")&gt;" />
+	</PROPS>
+	<RULES IGNORE_CASE="FALSE" HIGHLIGHT_DIGITS="TRUE">
+		<EOL_SPAN TYPE="COMMENT1">//</EOL_SPAN>
+		<SPAN TYPE="COMMENT1">
+		    <BEGIN>/*</BEGIN>
+		    <END>*/</END>
+		</SPAN>
+		<SPAN TYPE="LITERAL1" NO_LINE_BREAK="TRUE" ESCAPE="\">
+		    <BEGIN>"</BEGIN>
+		    <END>"</END>
+		</SPAN>
+		<KEYWORDS>
+			<KEYWORD1>let</KEYWORD1>
+			<KEYWORD1>def</KEYWORD1>
+			<KEYWORD1>class</KEYWORD1>
+			<KEYWORD1>include</KEYWORD1>
+
+			<KEYWORD3>bit</KEYWORD3>
+			<KEYWORD3>int</KEYWORD3>
+			<KEYWORD3>string</KEYWORD3>
+			<KEYWORD3>bits</KEYWORD3>
+			<KEYWORD3>list</KEYWORD3>
+			<KEYWORD3>dag</KEYWORD3>
+			<KEYWORD3>code</KEYWORD3>
+		</KEYWORDS>
+	</RULES>
+</MODE>
diff --git a/final/utils/kate/README b/final/utils/kate/README
new file mode 100644
index 00000000000..efe53b7e237
--- /dev/null
+++ b/final/utils/kate/README
@@ -0,0 +1,12 @@
+-*- llvm/utils/kate/README -*-
+
+These are syntax highlighting files for the Kate editor. Included are:
+
+* llvm.xml
+
+  Syntax Highlighting Mode for the KDE Kate editor. To install just copy
+  this file to ~/.kde/share/apps/katepart/syntax (or better yet, symlink it).
+
+Note: If you notice missing or incorrect syntax highlighting, please contact
+<llvmbugs [at] cs.uiuc.edu>; if you wish to provide a patch to improve the
+functionality, it will be most appreciated. Thank you.
diff --git a/final/utils/kate/llvm.xml b/final/utils/kate/llvm.xml
new file mode 100644
index 00000000000..074fa16cb88
--- /dev/null
+++ b/final/utils/kate/llvm.xml
@@ -0,0 +1,255 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE language SYSTEM "language.dtd">
+<language name="LLVM" section="Sources"
+          version="1.00" kateversion="3.4.4"
+          extensions="*.ll"
+          mimetype=""
+          author="LLVM Team"
+          license="LLVM Release License">
+  <highlighting>
+    <list name="keywords">
+      <item> begin </item>
+      <item> end </item>
+      <item> true </item>
+      <item> false </item>
+      <item> declare </item>
+      <item> define </item>
+      <item> global </item>
+      <item> constant </item>
+      <item> gc </item>
+      <item> module </item>
+      <item> asm </item>
+      <item> target </item>
+      <item> datalayout </item>
+      <item> null </item>
+      <item> undef </item>
+      <item> blockaddress </item>
+      <item> sideeffect </item>
+      <item> alignstack </item>
+      <item> to </item>
+      <item> unwind </item>
+      <item> nuw </item>
+      <item> nsw </item>
+      <item> inbounds </item>
+      <item> tail </item>
+      <item> triple </item>
+      <item> type </item>
+      <item> align </item>
+      <item> alias </item>
+    </list>
+    <list name="linkage-types">
+      <item> private </item>
+      <item> linker_private </item>
+      <item> linker_private_weak </item>
+      <item> linker_private_weak_def_auto </item>
+      <item> internal </item>
+      <item> available_externally </item>
+      <item> linkonce </item>
+      <item> weak </item>
+      <item> common </item>
+      <item> appending </item>
+      <item> extern_weak </item>
+      <item> linkonce_odr </item>
+      <item> weak_odr </item>
+      <item> dllimport </item>
+      <item> dllexport </item>
+    </list>
+    <list name="calling-conventions">
+      <item> ccc </item>
+      <item> fastcc </item>
+      <item> coldcc </item>
+      <item> cc </item>
+    </list>
+    <list name="visibility-styles">
+      <item> default </item>
+      <item> hidden </item>
+      <item> protected </item>
+    </list>
+    <list name="parameter-attributes">
+      <item> zeroext </item>
+      <item> signext </item>
+      <item> inreg </item>
+      <item> byval </item>
+      <item> sret </item>
+      <item> noalias </item>
+      <item> nocapture </item>
+      <item> nest </item>
+    </list>
+    <list name="function-attributes">
+      <item> alignstack </item>
+      <item> alwaysinline </item>
+      <item> inlinehint </item>
+      <item> naked </item>
+      <item> noimplicitfloat </item>
+      <item> noinline </item>
+      <item> noredzone </item>
+      <item> noreturn </item>
+      <item> nounwind </item>
+      <item> optsize </item>
+      <item> readnone </item>
+      <item> readonly </item>
+      <item> ssp </item>
+      <item> sspreq </item>
+    </list>
+    <list name="types">
+      <item> float </item>
+      <item> double </item>
+      <item> fp128 </item>
+      <item> x86_fp80 </item>
+      <item> ppc_fp128 </item>
+      <item> x86mmx </item>
+      <item> void </item>
+      <item> label </item>
+      <item> metadata </item>
+      <item> opaque </item>
+    </list>
+    <list name="intrinsic-global-variables">
+      <item> llvm.used </item>
+      <item> llvm.compiler.used </item>
+      <item> llvm.global_ctors </item>
+      <item> llvm.global_dtors </item>
+    </list>
+    <list name="instructions">
+      <item> ret </item>
+      <item> br </item>
+      <item> switch </item>
+      <item> indirectbr </item>
+      <item> invoke </item>
+      <item> unwind </item>
+      <item> unreachable </item>
+      <item> add </item>
+      <item> fadd </item>
+      <item> sub </item>
+      <item> fsub </item>
+      <item> mul </item>
+      <item> fmul </item>
+      <item> udiv </item>
+      <item> sdiv </item>
+      <item> fdiv </item>
+      <item> urem </item>
+      <item> srem </item>
+      <item> frem </item>
+      <item> shl </item>
+      <item> lshr </item>
+      <item> ashr </item>
+      <item> and </item>
+      <item> or </item>
+      <item> xor </item>
+      <item> extractelement </item>
+      <item> insertelement </item>
+      <item> shufflevector </item>
+      <item> extractvalue </item>
+      <item> insertvalue </item>
+      <item> alloca </item>
+      <item> load </item>
+      <item> store </item>
+      <item> getelementptr </item>
+      <item> trunc </item>
+      <item> zext </item>
+      <item> sext </item>
+      <item> fptrunc </item>
+      <item> fpext </item>
+      <item> fptoui </item>
+      <item> fptosi </item>
+      <item> uitofp </item>
+      <item> sitofp </item>
+      <item> ptrtoint </item>
+      <item> inttoptr </item>
+      <item> bitcast </item>
+      <item> icmp </item>
+      <item> fcmp </item>
+      <item> phi </item>
+      <item> select </item>
+      <item> call </item>
+      <item> va_arg </item>
+    </list>
+    <list name="conditions">
+      <item> eq </item>
+      <item> ne </item>
+      <item> ugt </item>
+      <item> uge </item>
+      <item> ult </item>
+      <item> ule </item>
+      <item> sgt </item>
+      <item> sge </item>
+      <item> slt </item>
+      <item> sle </item>
+      <item> oeq </item>
+      <item> ogt </item>
+      <item> oge </item>
+      <item> olt </item>
+      <item> ole </item>
+      <item> one </item>
+      <item> ord </item>
+      <item> ueq </item>
+      <item> une </item>
+      <item> uno </item>
+    </list>
+    <contexts>
+      <context name="llvm" attribute="Normal Text" lineEndContext="#stay">
+        <DetectSpaces />
+        <AnyChar String="@%" attribute="Symbol" context="symbol" />
+
+        <DetectChar char="{" beginRegion="Brace1" />
+        <DetectChar char="}" endRegion="Brace1" />
+        <DetectChar char=";" attribute="Comment" context="comment" />
+        <DetectChar attribute="String" context="string" char="&quot;" />
+        <RegExpr String="i[0-9]+" attribute="Data Type" context="#stay" />
+        <RegExpr attribute="Symbol" String="[-a-zA-Z$._][-a-zA-Z$._0-9]*:" context="#stay" />
+        <Int attribute="Int" context="#stay" />
+
+        <keyword attribute="Keyword"   String="keywords" />
+        <keyword attribute="Keyword"   String="linkage-types" />
+        <keyword attribute="Keyword"   String="calling-conventions" />
+        <keyword attribute="Keyword"   String="visibility-styles" />
+        <keyword attribute="Keyword"   String="parameter-attributes" />
+        <keyword attribute="Keyword"   String="function-attributes" />
+        <keyword attribute="Data Type" String="types" />
+        <keyword attribute="Keyword"   String="intrinsic-global-variables" />
+        <keyword attribute="Keyword"   String="instructions" />
+        <keyword attribute="Keyword"   String="conditions" />
+      </context>
+
+      <context name="symbol" attribute="Symbol" lineEndContext="#pop">
+        <DetectChar attribute="Symbol" context="symbol-string" char="&quot;" />
+        <RegExpr attribute="Symbol" String="([-a-zA-Z$._][-a-zA-Z$._0-9]*|[0-9]+)" context="#pop" />
+      </context>
+
+      <context name="symbol-string" attribute="Symbol" lineEndContext="#stay">
+        <DetectChar attribute="Symbol" context="#pop#pop" char="&quot;" />
+      </context>
+
+      <context name="string" attribute="String" lineEndContext="#stay">
+        <DetectChar attribute="String" context="#pop" char="&quot;" />
+      </context>
+
+      <context name="comment" attribute="Comment" lineEndContext="#pop">
+        <DetectSpaces />
+        <!-- TODO: Add FileCheck syntax highlighting -->
+        <IncludeRules context="##Alerts" />
+        <DetectIdentifier />
+      </context>
+    </contexts>
+    <itemDatas>
+      <itemData name="Normal Text" defStyleNum="dsNormal" />
+      <itemData name="Keyword" defStyleNum="dsKeyword" />
+      <itemData name="Data Type" defStyleNum="dsDataType" />
+      <itemData name="Int" defStyleNum="dsDecVal" />
+      <itemData name="Hex" defStyleNum="dsBaseN" />
+      <itemData name="Float" defStyleNum="dsFloat" />
+      <itemData name="String" defStyleNum="dsString" />
+      <itemData name="Comment" defStyleNum="dsComment" />
+      <itemData name="Function" defStyleNum="dsFunction" />
+      <itemData name="Symbol" defStyleNum="dsFunction" />
+    </itemDatas>
+  </highlighting>
+  <general>
+    <comments>
+      <comment name="singleLine" start=";" />
+    </comments>
+    <keywords casesensitive="1" weakDeliminator="." />
+  </general>
+</language>
+<!--
+// kate: space-indent on; indent-width 2; replace-tabs on;
+-->
diff --git a/final/utils/lint/common_lint.py b/final/utils/lint/common_lint.py
new file mode 100644
index 00000000000..e982680c052
--- /dev/null
+++ b/final/utils/lint/common_lint.py
@@ -0,0 +1,97 @@
+#!/usr/bin/python
+#
+# Common lint functions applicable to multiple types of files.
+
+import re
+
+def VerifyLineLength(filename, lines, max_length):
+  """Checks to make sure the file has no lines with lines exceeding the length
+  limit.
+
+  Args:
+    filename: the file under consideration as string
+    lines: contents of the file as string array
+    max_length: maximum acceptable line length as number
+
+  Returns:
+    A list of tuples with format [(filename, line number, msg), ...] with any
+    violations found.
+  """
+  lint = []
+  line_num = 1
+  for line in lines:
+    length = len(line.rstrip('\n'))
+    if length > max_length:
+      lint.append((filename, line_num,
+                   'Line exceeds %d chars (%d)' % (max_length, length)))
+    line_num += 1
+  return lint
+
+def VerifyTabs(filename, lines):
+  """Checks to make sure the file has no tab characters.
+
+  Args:
+    filename: the file under consideration as string
+    lines: contents of the file as string array
+
+  Returns:
+    A list of tuples with format [(line_number, msg), ...] with any violations
+    found.
+  """
+  lint = []
+  tab_re = re.compile(r'\t')
+  line_num = 1
+  for line in lines:
+    if tab_re.match(line.rstrip('\n')):
+      lint.append((filename, line_num, 'Tab found instead of whitespace'))
+    line_num += 1
+  return lint
+
+
+def VerifyTrailingWhitespace(filename, lines):
+  """Checks to make sure the file has no lines with trailing whitespace.
+
+  Args:
+    filename: the file under consideration as string
+    lines: contents of the file as string array
+
+  Returns:
+    A list of tuples with format [(filename, line number, msg), ...] with any
+    violations found.
+  """
+  lint = []
+  trailing_whitespace_re = re.compile(r'\s+$')
+  line_num = 1
+  for line in lines:
+    if trailing_whitespace_re.match(line.rstrip('\n')):
+      lint.append((filename, line_num, 'Trailing whitespace'))
+    line_num += 1
+  return lint
+
+
+class BaseLint:
+  def RunOnFile(filename, lines):
+    raise Exception('RunOnFile() unimplemented')
+
+
+def RunLintOverAllFiles(linter, filenames):
+  """Runs linter over the contents of all files.
+
+  Args:
+    lint: subclass of BaseLint, implementing RunOnFile()
+    filenames: list of all files whose contents will be linted
+
+  Returns:
+    A list of tuples with format [(filename, line number, msg), ...] with any
+    violations found.
+  """
+  lint = []
+  for filename in filenames:
+    file = open(filename, 'r')
+    if not file:
+      print 'Cound not open %s' % filename
+      continue
+    lines = file.readlines()
+    lint.extend(linter.RunOnFile(filename, lines))
+
+  return lint
diff --git a/final/utils/lint/cpp_lint.py b/final/utils/lint/cpp_lint.py
new file mode 100755
index 00000000000..07fad5840fd
--- /dev/null
+++ b/final/utils/lint/cpp_lint.py
@@ -0,0 +1,94 @@
+#!/usr/bin/python
+#
+# Checks C++ files to make sure they conform to LLVM standards, as specified in
+# http://llvm.org/docs/CodingStandards.html .
+#
+# TODO: add unittests for the verifier functions:
+# http://docs.python.org/library/unittest.html .
+
+import common_lint
+import re
+import sys
+
+def VerifyIncludes(filename, lines):
+  """Makes sure the #includes are in proper order and no disallows files are
+  #included.
+
+  Args:
+    filename: the file under consideration as string
+    lines: contents of the file as string array
+  """
+  lint = []
+
+  include_gtest_re = re.compile(r'^#include "gtest/(.*)"')
+  include_llvm_re = re.compile(r'^#include "llvm/(.*)"')
+  include_support_re = re.compile(r'^#include "(Support/.*)"')
+  include_config_re = re.compile(r'^#include "(Config/.*)"')
+  include_system_re = re.compile(r'^#include <(.*)>')
+
+  DISALLOWED_SYSTEM_HEADERS = ['iostream']
+
+  line_num = 1
+  prev_config_header = None
+  prev_system_header = None
+  for line in lines:
+    # TODO: implement private headers
+    # TODO: implement gtest headers
+    # TODO: implement top-level llvm/* headers
+    # TODO: implement llvm/Support/* headers
+
+    # Process Config/* headers
+    config_header = include_config_re.match(line)
+    if config_header:
+      curr_config_header = config_header.group(1)
+      if prev_config_header:
+        if prev_config_header > curr_config_header:
+          lint.append((filename, line_num,
+                       'Config headers not in order: "%s" before "%s"' % (
+                         prev_config_header, curr_config_header)))
+
+    # Process system headers
+    system_header = include_system_re.match(line)
+    if system_header:
+      curr_system_header = system_header.group(1)
+
+      # Is it blacklisted?
+      if curr_system_header in DISALLOWED_SYSTEM_HEADERS:
+        lint.append((filename, line_num,
+                     'Disallowed system header: <%s>' % curr_system_header))
+      elif prev_system_header:
+        # Make sure system headers are alphabetized amongst themselves
+        if prev_system_header > curr_system_header:
+          lint.append((filename, line_num,
+                       'System headers not in order: <%s> before <%s>' % (
+                         prev_system_header, curr_system_header)))
+
+      prev_system_header = curr_system_header
+
+    line_num += 1
+
+  return lint
+
+
+class CppLint(common_lint.BaseLint):
+  MAX_LINE_LENGTH = 80
+
+  def RunOnFile(self, filename, lines):
+    lint = []
+    lint.extend(VerifyIncludes(filename, lines))
+    lint.extend(common_lint.VerifyLineLength(filename, lines,
+                                             CppLint.MAX_LINE_LENGTH))
+    lint.extend(common_lint.VerifyTabs(filename, lines))
+    lint.extend(common_lint.VerifyTrailingWhitespace(filename, lines))
+    return lint
+
+
+def CppLintMain(filenames):
+  all_lint = common_lint.RunLintOverAllFiles(CppLint(), filenames)
+  for lint in all_lint:
+    print '%s:%d:%s' % (lint[0], lint[1], lint[2])
+  return 0
+
+
+if __name__ == '__main__':
+  sys.exit(CppLintMain(sys.argv[1:]))
diff --git a/final/utils/lint/generic_lint.py b/final/utils/lint/generic_lint.py
new file mode 100755
index 00000000000..c8f4835bb6a
--- /dev/null
+++ b/final/utils/lint/generic_lint.py
@@ -0,0 +1,24 @@
+#!/usr/bin/python
+#
+# Checks files to make sure they conform to LLVM standards which can be applied
+# to any programming language: at present, line length and trailing whitespace.
+
+import common_lint
+import sys
+
+class GenericCodeLint(common_lint.BaseLint):
+  MAX_LINE_LENGTH = 80
+
+  def RunOnFile(self, filename, lines):
+    common_lint.VerifyLineLength(filename, lines,
+                                 GenericCodeLint.MAX_LINE_LENGTH)
+    common_lint.VerifyTrailingWhitespace(filename, lines)
+
+
+def GenericCodeLintMain(filenames):
+  common_lint.RunLintOverAllFiles(GenericCodeLint(), filenames)
+  return 0
+
+
+if __name__ == '__main__':
+  sys.exit(GenericCodeLintMain(sys.argv[1:]))
diff --git a/final/utils/lint/remove_trailing_whitespace.sh b/final/utils/lint/remove_trailing_whitespace.sh
new file mode 100755
index 00000000000..6e0c9be0932
--- /dev/null
+++ b/final/utils/lint/remove_trailing_whitespace.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+# Deletes trailing whitespace in-place in the passed-in files.
+# Sample syntax:
+#   $0 *.cpp
+
+perl -pi -e 's/\s+$/\n/' $*
diff --git a/final/utils/lit/TODO b/final/utils/lit/TODO
new file mode 100644
index 00000000000..6d7f7ea529a
--- /dev/null
+++ b/final/utils/lit/TODO
@@ -0,0 +1,9 @@
+ - Move temp directory name into local test config.
+
+ - Add --show-unsupported, don't show by default?
+
+ - Optionally use multiprocessing.
+
+ - Support valgrind in all configs, and LLVM style valgrind.
+
+ - Support a timeout / ulimit.
diff --git a/final/utils/lit/lit.py b/final/utils/lit/lit.py
new file mode 100755
index 00000000000..851063b3bd1
--- /dev/null
+++ b/final/utils/lit/lit.py
@@ -0,0 +1,5 @@
+#!/usr/bin/env python
+
+if __name__=='__main__':
+    import lit
+    lit.main()
diff --git a/final/utils/lit/lit/ExampleTests.ObjDir/lit.site.cfg b/final/utils/lit/lit/ExampleTests.ObjDir/lit.site.cfg
new file mode 100644
index 00000000000..14b6e013413
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests.ObjDir/lit.site.cfg
@@ -0,0 +1,15 @@
+# -*- Python -*-
+
+# Site specific configuration file.
+#
+# Typically this will be generated by the build system to automatically set
+# certain configuration variables which cannot be autodetected, so that 'lit'
+# can easily be used on the command line.
+
+import os
+
+# Preserve the obj_root, for use by the main lit.cfg.
+config.example_obj_root = os.path.dirname(__file__)
+
+lit.load_config(config, os.path.join(config.test_source_root,
+                                     'lit.cfg'))
diff --git a/final/utils/lit/lit/ExampleTests/Clang/fsyntax-only.c b/final/utils/lit/lit/ExampleTests/Clang/fsyntax-only.c
new file mode 100644
index 00000000000..a4a064ba0cf
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/Clang/fsyntax-only.c
@@ -0,0 +1,4 @@
+// RUN: clang -fsyntax-only -Xclang -verify %s
+
+int f0(void) {} // expected-warning {{control reaches end of non-void function}}
+
diff --git a/final/utils/lit/lit/ExampleTests/Clang/lit.cfg b/final/utils/lit/lit/ExampleTests/Clang/lit.cfg
new file mode 100644
index 00000000000..1e1e807f367
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/Clang/lit.cfg
@@ -0,0 +1,47 @@
+# -*- Python -*-
+
+# Configuration file for the 'lit' test runner.
+
+# name: The name of this test suite.
+config.name = 'Clang'
+
+# testFormat: The test format to use to interpret tests.
+#
+# For now we require '&&' between commands, until they get globally killed and
+# the test runner updated.
+config.test_format = lit.formats.ShTest(execute_external = True)
+
+# suffixes: A list of file extensions to treat as test files.
+config.suffixes = ['.c', '.cpp', '.m', '.mm']
+
+# target_triple: Used by ShTest and TclTest formats for XFAIL checks.
+config.target_triple = 'foo'
+
+###
+
+# Discover the 'clang' and 'clangcc' to use.
+
+import os
+
+def inferClang(PATH):
+    # Determine which clang to use.
+    clang = os.getenv('CLANG')
+
+    # If the user set clang in the environment, definitely use that and don't
+    # try to validate.
+    if clang:
+        return clang
+
+    # Otherwise look in the path.
+    clang = lit.util.which('clang', PATH)
+
+    if not clang:
+        lit.fatal("couldn't find 'clang' program, try setting "
+                  "CLANG in your environment")
+
+    return clang
+
+clang = inferClang(config.environment['PATH'])
+if not lit.quiet:
+    lit.note('using clang: %r' % clang)
+config.substitutions.append( (' clang ', ' ' + clang + ' ') )
diff --git a/final/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/bar-test.ll b/final/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/bar-test.ll
new file mode 100644
index 00000000000..3017b13e48c
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/bar-test.ll
@@ -0,0 +1,3 @@
+; RUN: true
+; XFAIL: *
+; XTARGET: darwin
diff --git a/final/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/dg.exp b/final/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/dg.exp
new file mode 100644
index 00000000000..2bda07a31cf
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/LLVM.InTree/test/Bar/dg.exp
@@ -0,0 +1,6 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
+}
+
diff --git a/final/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg b/final/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg
new file mode 100644
index 00000000000..e7ef037663a
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.cfg
@@ -0,0 +1,151 @@
+# -*- Python -*-
+
+# Configuration file for the 'lit' test runner.
+
+import os
+
+# name: The name of this test suite.
+config.name = 'LLVM'
+
+# testFormat: The test format to use to interpret tests.
+config.test_format = lit.formats.TclTest()
+
+# suffixes: A list of file extensions to treat as test files, this is actually
+# set by on_clone().
+config.suffixes = []
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
+
+# test_exec_root: The root path where tests should be run.
+llvm_obj_root = getattr(config, 'llvm_obj_root', None)
+if llvm_obj_root is not None:
+    config.test_exec_root = os.path.join(llvm_obj_root, 'test')
+
+###
+
+import os
+
+# Check that the object root is known.
+if config.test_exec_root is None:
+    # Otherwise, we haven't loaded the site specific configuration (the user is
+    # probably trying to run on a test file directly, and either the site
+    # configuration hasn't been created by the build system, or we are in an
+    # out-of-tree build situation).
+
+    # Try to detect the situation where we are using an out-of-tree build by
+    # looking for 'llvm-config'.
+    #
+    # FIXME: I debated (i.e., wrote and threw away) adding logic to
+    # automagically generate the lit.site.cfg if we are in some kind of fresh
+    # build situation. This means knowing how to invoke the build system
+    # though, and I decided it was too much magic.
+
+    llvm_config = lit.util.which('llvm-config', config.environment['PATH'])
+    if not llvm_config:
+        lit.fatal('No site specific configuration available!')
+
+    # Get the source and object roots.
+    llvm_src_root = lit.util.capture(['llvm-config', '--src-root']).strip()
+    llvm_obj_root = lit.util.capture(['llvm-config', '--obj-root']).strip()
+
+    # Validate that we got a tree which points to here.
+    this_src_root = os.path.dirname(config.test_source_root)
+    if os.path.realpath(llvm_src_root) != os.path.realpath(this_src_root):
+        lit.fatal('No site specific configuration available!')
+
+    # Check that the site specific configuration exists.
+    site_cfg = os.path.join(llvm_obj_root, 'test', 'lit.site.cfg')
+    if not os.path.exists(site_cfg):
+        lit.fatal('No site specific configuration available!')
+
+    # Okay, that worked. Notify the user of the automagic, and reconfigure.
+    lit.note('using out-of-tree build at %r' % llvm_obj_root)
+    lit.load_config(config, site_cfg)
+    raise SystemExit
+
+###
+
+# Load site data from DejaGNU's site.exp.
+import re
+site_exp = {}
+# FIXME: Implement lit.site.cfg.
+for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
+    m = re.match('set ([^ ]+) "([^"]*)"', line)
+    if m:
+        site_exp[m.group(1)] = m.group(2)
+
+# Add substitutions.
+for sub in ['prcontext', 'llvmgcc', 'llvmgxx', 'compile_cxx', 'compile_c',
+            'link', 'shlibext', 'ocamlopt', 'llvmdsymutil', 'llvmlibsdir',
+            'bugpoint_topts']:
+    if sub in ('llvmgcc', 'llvmgxx'):
+        config.substitutions.append(('%' + sub,
+                                     site_exp[sub] + ' -emit-llvm -w'))
+    else:
+        config.substitutions.append(('%' + sub, site_exp[sub]))
+
+excludes = []
+
+# Provide target_triple for use in XFAIL and XTARGET.
+config.target_triple = site_exp['target_triplet']
+
+# Provide llvm_supports_target for use in local configs.
+targets = set(site_exp["TARGETS_TO_BUILD"].split())
+def llvm_supports_target(name):
+    return name in targets
+
+langs = set(site_exp['llvmgcc_langs'].split(','))
+def llvm_gcc_supports(name):
+    return name in langs
+
+# Provide on_clone hook for reading 'dg.exp'.
+import os
+simpleLibData = re.compile(r"""load_lib llvm.exp
+
+RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]""",
+                           re.MULTILINE)
+conditionalLibData = re.compile(r"""load_lib llvm.exp
+
+if.*\[ ?(llvm[^ ]*) ([^ ]*) ?\].*{
+ *RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]
+\}""", re.MULTILINE)
+def on_clone(parent, cfg, for_path):
+    def addSuffixes(match):
+        if match[0] == '{' and match[-1] == '}':
+            cfg.suffixes = ['.' + s for s in match[1:-1].split(',')]
+        else:
+            cfg.suffixes = ['.' + match]
+
+    libPath = os.path.join(os.path.dirname(for_path),
+                           'dg.exp')
+    if not os.path.exists(libPath):
+        cfg.unsupported = True
+        return
+
+    # Reset unsupported, in case we inherited it.
+    cfg.unsupported = False
+    lib = open(libPath).read().strip()
+
+    # Check for a simple library.
+    m = simpleLibData.match(lib)
+    if m:
+        addSuffixes(m.group(1))
+        return
+
+    # Check for a conditional test set.
+    m = conditionalLibData.match(lib)
+    if m:
+        funcname,arg,match = m.groups()
+        addSuffixes(match)
+
+        func = globals().get(funcname)
+        if not func:
+            lit.error('unsupported predicate %r' % funcname)
+        elif not func(arg):
+            cfg.unsupported = True
+        return
+    # Otherwise, give up.
+    lit.error('unable to understand %r:\n%s' % (libPath, lib))
+
+config.on_clone = on_clone
diff --git a/final/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.site.cfg b/final/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.site.cfg
new file mode 100644
index 00000000000..3bfee547b7e
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/LLVM.InTree/test/lit.site.cfg
@@ -0,0 +1,10 @@
+# -*- Python -*-
+
+## Autogenerated by Makefile ##
+# Do not edit!
+
+# Preserve some key paths for use by main LLVM test suite config.
+config.llvm_obj_root = os.path.dirname(os.path.dirname(__file__))
+
+# Let the main config do the real work.
+lit.load_config(config, os.path.join(config.llvm_obj_root, 'test/lit.cfg'))
diff --git a/final/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp b/final/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp
new file mode 100644
index 00000000000..efa839e9ba0
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/LLVM.InTree/test/site.exp
@@ -0,0 +1,28 @@
+## these variables are automatically generated by make ##
+# Do not edit here.  If you wish to override these values
+# edit the last section
+set target_triplet "x86_64-apple-darwin10"
+set TARGETS_TO_BUILD "X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend MSIL CppBackend"
+set llvmgcc_langs "c,c++,objc,obj-c++"
+set prcontext "/usr/bin/tclsh8.4 /Volumes/Data/ddunbar/llvm/test/Scripts/prcontext.tcl"
+set llvmtoolsdir "/Users/ddunbar/llvm.obj.64/Debug/bin"
+set llvmlibsdir "/Users/ddunbar/llvm.obj.64/Debug/lib"
+set srcroot "/Volumes/Data/ddunbar/llvm"
+set objroot "/Volumes/Data/ddunbar/llvm.obj.64"
+set srcdir "/Volumes/Data/ddunbar/llvm/test"
+set objdir "/Volumes/Data/ddunbar/llvm.obj.64/test"
+set gccpath "/usr/bin/gcc -arch x86_64"
+set gxxpath "/usr/bin/g++ -arch x86_64"
+set compile_c " /usr/bin/gcc -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -c "
+set compile_cxx " /usr/bin/g++ -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -g -fno-exceptions -fno-common -Woverloaded-virtual -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -c "
+set link " /usr/bin/g++ -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -g -fno-exceptions -fno-common -Woverloaded-virtual -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -g -L/Users/ddunbar/llvm.obj.64/Debug/lib -L/Volumes/Data/ddunbar/llvm.obj.64/Debug/lib "
+set llvmgcc "/Users/ddunbar/llvm-gcc/install/bin/llvm-gcc -m64 "
+set llvmgxx "/Users/ddunbar/llvm-gcc/install/bin/llvm-gcc -m64 "
+set bugpoint_topts "-gcc-tool-args -m64"
+set shlibext ".dylib"
+set ocamlopt "/sw/bin/ocamlopt -cc \"g++ -Wall -D_FILE_OFFSET_BITS=64 -D_REENTRANT\" -I /Users/ddunbar/llvm.obj.64/Debug/lib/ocaml"
+set valgrind ""
+set grep "/usr/bin/grep"
+set gas "/usr/bin/as"
+set llvmdsymutil "dsymutil"
+## All variables above are generated by configure. Do Not Edit ## 
diff --git a/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/lit.local.cfg b/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/lit.local.cfg
new file mode 100644
index 00000000000..80d0c7ead6b
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/lit.local.cfg
@@ -0,0 +1 @@
+config.excludes = ['src']
diff --git a/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/Foo/lit.local.cfg b/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/Foo/lit.local.cfg
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/lit.site.cfg b/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/lit.site.cfg
new file mode 100644
index 00000000000..bdcc35e0938
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/lit.site.cfg
@@ -0,0 +1,11 @@
+# -*- Python -*-
+
+## Autogenerated by Makefile ##
+# Do not edit!
+
+# Preserve some key paths for use by main LLVM test suite config.
+config.llvm_obj_root = os.path.dirname(os.path.dirname(__file__))
+
+# Let the main config do the real work.
+lit.load_config(config, os.path.join(config.llvm_obj_root,
+                                     '../src/test/lit.cfg'))
diff --git a/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp b/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp
new file mode 100644
index 00000000000..efa839e9ba0
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/obj/test/site.exp
@@ -0,0 +1,28 @@
+## these variables are automatically generated by make ##
+# Do not edit here.  If you wish to override these values
+# edit the last section
+set target_triplet "x86_64-apple-darwin10"
+set TARGETS_TO_BUILD "X86 Sparc PowerPC Alpha ARM Mips CellSPU PIC16 XCore MSP430 SystemZ Blackfin CBackend MSIL CppBackend"
+set llvmgcc_langs "c,c++,objc,obj-c++"
+set prcontext "/usr/bin/tclsh8.4 /Volumes/Data/ddunbar/llvm/test/Scripts/prcontext.tcl"
+set llvmtoolsdir "/Users/ddunbar/llvm.obj.64/Debug/bin"
+set llvmlibsdir "/Users/ddunbar/llvm.obj.64/Debug/lib"
+set srcroot "/Volumes/Data/ddunbar/llvm"
+set objroot "/Volumes/Data/ddunbar/llvm.obj.64"
+set srcdir "/Volumes/Data/ddunbar/llvm/test"
+set objdir "/Volumes/Data/ddunbar/llvm.obj.64/test"
+set gccpath "/usr/bin/gcc -arch x86_64"
+set gxxpath "/usr/bin/g++ -arch x86_64"
+set compile_c " /usr/bin/gcc -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -c "
+set compile_cxx " /usr/bin/g++ -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -g -fno-exceptions -fno-common -Woverloaded-virtual -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -c "
+set link " /usr/bin/g++ -arch x86_64 -I/Users/ddunbar/llvm.obj.64/include -I/Users/ddunbar/llvm.obj.64/test -I/Volumes/Data/ddunbar/llvm.obj.64/include -I/Volumes/Data/ddunbar/llvm/include -I/Volumes/Data/ddunbar/llvm/test -D_DEBUG -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -D__STDC_CONSTANT_MACROS -g -fno-exceptions -fno-common -Woverloaded-virtual -m64 -pedantic -Wno-long-long -Wall -W -Wno-unused-parameter -Wwrite-strings -g -L/Users/ddunbar/llvm.obj.64/Debug/lib -L/Volumes/Data/ddunbar/llvm.obj.64/Debug/lib "
+set llvmgcc "/Users/ddunbar/llvm-gcc/install/bin/llvm-gcc -m64 "
+set llvmgxx "/Users/ddunbar/llvm-gcc/install/bin/llvm-gcc -m64 "
+set bugpoint_topts "-gcc-tool-args -m64"
+set shlibext ".dylib"
+set ocamlopt "/sw/bin/ocamlopt -cc \"g++ -Wall -D_FILE_OFFSET_BITS=64 -D_REENTRANT\" -I /Users/ddunbar/llvm.obj.64/Debug/lib/ocaml"
+set valgrind ""
+set grep "/usr/bin/grep"
+set gas "/usr/bin/as"
+set llvmdsymutil "dsymutil"
+## All variables above are generated by configure. Do Not Edit ## 
diff --git a/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/data.txt b/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/data.txt
new file mode 100644
index 00000000000..45b983be36b
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/data.txt
@@ -0,0 +1 @@
+hi
diff --git a/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/dg.exp b/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/dg.exp
new file mode 100644
index 00000000000..2bda07a31cf
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/dg.exp
@@ -0,0 +1,6 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll}]]
+}
+
diff --git a/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/pct-S.ll b/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/pct-S.ll
new file mode 100644
index 00000000000..3ff363315a3
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/Foo/pct-S.ll
@@ -0,0 +1 @@
+; RUN: grep "hi" %S/data.txt
diff --git a/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg b/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg
new file mode 100644
index 00000000000..e7ef037663a
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/LLVM.OutOfTree/src/test/lit.cfg
@@ -0,0 +1,151 @@
+# -*- Python -*-
+
+# Configuration file for the 'lit' test runner.
+
+import os
+
+# name: The name of this test suite.
+config.name = 'LLVM'
+
+# testFormat: The test format to use to interpret tests.
+config.test_format = lit.formats.TclTest()
+
+# suffixes: A list of file extensions to treat as test files, this is actually
+# set by on_clone().
+config.suffixes = []
+
+# test_source_root: The root path where tests are located.
+config.test_source_root = os.path.dirname(__file__)
+
+# test_exec_root: The root path where tests should be run.
+llvm_obj_root = getattr(config, 'llvm_obj_root', None)
+if llvm_obj_root is not None:
+    config.test_exec_root = os.path.join(llvm_obj_root, 'test')
+
+###
+
+import os
+
+# Check that the object root is known.
+if config.test_exec_root is None:
+    # Otherwise, we haven't loaded the site specific configuration (the user is
+    # probably trying to run on a test file directly, and either the site
+    # configuration hasn't been created by the build system, or we are in an
+    # out-of-tree build situation).
+
+    # Try to detect the situation where we are using an out-of-tree build by
+    # looking for 'llvm-config'.
+    #
+    # FIXME: I debated (i.e., wrote and threw away) adding logic to
+    # automagically generate the lit.site.cfg if we are in some kind of fresh
+    # build situation. This means knowing how to invoke the build system
+    # though, and I decided it was too much magic.
+
+    llvm_config = lit.util.which('llvm-config', config.environment['PATH'])
+    if not llvm_config:
+        lit.fatal('No site specific configuration available!')
+
+    # Get the source and object roots.
+    llvm_src_root = lit.util.capture(['llvm-config', '--src-root']).strip()
+    llvm_obj_root = lit.util.capture(['llvm-config', '--obj-root']).strip()
+
+    # Validate that we got a tree which points to here.
+    this_src_root = os.path.dirname(config.test_source_root)
+    if os.path.realpath(llvm_src_root) != os.path.realpath(this_src_root):
+        lit.fatal('No site specific configuration available!')
+
+    # Check that the site specific configuration exists.
+    site_cfg = os.path.join(llvm_obj_root, 'test', 'lit.site.cfg')
+    if not os.path.exists(site_cfg):
+        lit.fatal('No site specific configuration available!')
+
+    # Okay, that worked. Notify the user of the automagic, and reconfigure.
+    lit.note('using out-of-tree build at %r' % llvm_obj_root)
+    lit.load_config(config, site_cfg)
+    raise SystemExit
+
+###
+
+# Load site data from DejaGNU's site.exp.
+import re
+site_exp = {}
+# FIXME: Implement lit.site.cfg.
+for line in open(os.path.join(config.llvm_obj_root, 'test', 'site.exp')):
+    m = re.match('set ([^ ]+) "([^"]*)"', line)
+    if m:
+        site_exp[m.group(1)] = m.group(2)
+
+# Add substitutions.
+for sub in ['prcontext', 'llvmgcc', 'llvmgxx', 'compile_cxx', 'compile_c',
+            'link', 'shlibext', 'ocamlopt', 'llvmdsymutil', 'llvmlibsdir',
+            'bugpoint_topts']:
+    if sub in ('llvmgcc', 'llvmgxx'):
+        config.substitutions.append(('%' + sub,
+                                     site_exp[sub] + ' -emit-llvm -w'))
+    else:
+        config.substitutions.append(('%' + sub, site_exp[sub]))
+
+excludes = []
+
+# Provide target_triple for use in XFAIL and XTARGET.
+config.target_triple = site_exp['target_triplet']
+
+# Provide llvm_supports_target for use in local configs.
+targets = set(site_exp["TARGETS_TO_BUILD"].split())
+def llvm_supports_target(name):
+    return name in targets
+
+langs = set(site_exp['llvmgcc_langs'].split(','))
+def llvm_gcc_supports(name):
+    return name in langs
+
+# Provide on_clone hook for reading 'dg.exp'.
+import os
+simpleLibData = re.compile(r"""load_lib llvm.exp
+
+RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]""",
+                           re.MULTILINE)
+conditionalLibData = re.compile(r"""load_lib llvm.exp
+
+if.*\[ ?(llvm[^ ]*) ([^ ]*) ?\].*{
+ *RunLLVMTests \[lsort \[glob -nocomplain \$srcdir/\$subdir/\*\.(.*)\]\]
+\}""", re.MULTILINE)
+def on_clone(parent, cfg, for_path):
+    def addSuffixes(match):
+        if match[0] == '{' and match[-1] == '}':
+            cfg.suffixes = ['.' + s for s in match[1:-1].split(',')]
+        else:
+            cfg.suffixes = ['.' + match]
+
+    libPath = os.path.join(os.path.dirname(for_path),
+                           'dg.exp')
+    if not os.path.exists(libPath):
+        cfg.unsupported = True
+        return
+
+    # Reset unsupported, in case we inherited it.
+    cfg.unsupported = False
+    lib = open(libPath).read().strip()
+
+    # Check for a simple library.
+    m = simpleLibData.match(lib)
+    if m:
+        addSuffixes(m.group(1))
+        return
+
+    # Check for a conditional test set.
+    m = conditionalLibData.match(lib)
+    if m:
+        funcname,arg,match = m.groups()
+        addSuffixes(match)
+
+        func = globals().get(funcname)
+        if not func:
+            lit.error('unsupported predicate %r' % funcname)
+        elif not func(arg):
+            cfg.unsupported = True
+        return
+    # Otherwise, give up.
+    lit.error('unable to understand %r:\n%s' % (libPath, lib))
+
+config.on_clone = on_clone
diff --git a/final/utils/lit/lit/ExampleTests/ShExternal/lit.local.cfg b/final/utils/lit/lit/ExampleTests/ShExternal/lit.local.cfg
new file mode 100644
index 00000000000..1061da62fd3
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/ShExternal/lit.local.cfg
@@ -0,0 +1,6 @@
+# -*- Python -*-
+
+config.test_format = lit.formats.ShTest(execute_external = True)
+
+config.suffixes = ['.c']
+
diff --git a/final/utils/lit/lit/ExampleTests/ShInternal/lit.local.cfg b/final/utils/lit/lit/ExampleTests/ShInternal/lit.local.cfg
new file mode 100644
index 00000000000..448eaa4092b
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/ShInternal/lit.local.cfg
@@ -0,0 +1,6 @@
+# -*- Python -*-
+
+config.test_format = lit.formats.ShTest(execute_external = False)
+
+config.suffixes = ['.c']
+
diff --git a/final/utils/lit/lit/ExampleTests/TclTest/lit.local.cfg b/final/utils/lit/lit/ExampleTests/TclTest/lit.local.cfg
new file mode 100644
index 00000000000..6a37129acdf
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/TclTest/lit.local.cfg
@@ -0,0 +1,5 @@
+# -*- Python -*-
+
+config.test_format = lit.formats.TclTest()
+
+config.suffixes = ['.ll']
diff --git a/final/utils/lit/lit/ExampleTests/TclTest/stderr-pipe.ll b/final/utils/lit/lit/ExampleTests/TclTest/stderr-pipe.ll
new file mode 100644
index 00000000000..6c55fe8a0b1
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/TclTest/stderr-pipe.ll
@@ -0,0 +1 @@
+; RUN: gcc -### > /dev/null |& grep {gcc version}
diff --git a/final/utils/lit/lit/ExampleTests/TclTest/tcl-redir-1.ll b/final/utils/lit/lit/ExampleTests/TclTest/tcl-redir-1.ll
new file mode 100644
index 00000000000..61240ba4594
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/TclTest/tcl-redir-1.ll
@@ -0,0 +1,7 @@
+; RUN: echo 'hi' > %t.1 | echo 'hello' > %t.2
+; RUN: not grep 'hi' %t.1
+; RUN: grep 'hello' %t.2
+
+
+
+
diff --git a/final/utils/lit/lit/ExampleTests/fail.c b/final/utils/lit/lit/ExampleTests/fail.c
new file mode 100644
index 00000000000..84db41a5889
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/fail.c
@@ -0,0 +1,2 @@
+// RUN: echo 'I am some stdout'
+// RUN: false
diff --git a/final/utils/lit/lit/ExampleTests/lit.cfg b/final/utils/lit/lit/ExampleTests/lit.cfg
new file mode 100644
index 00000000000..20ee37dcef2
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/lit.cfg
@@ -0,0 +1,26 @@
+# -*- Python -*-
+
+# Configuration file for the 'lit' test runner.
+
+# name: The name of this test suite.
+config.name = 'Examples'
+
+# suffixes: A list of file extensions to treat as test files.
+config.suffixes = ['.c', '.cpp', '.m', '.mm', '.ll']
+
+# testFormat: The test format to use to interpret tests.
+config.test_format = lit.formats.ShTest()
+
+# test_source_root: The path where tests are located (default is the test suite
+# root).
+config.test_source_root = None
+
+# test_exec_root: The path where tests are located (default is the test suite
+# root).
+config.test_exec_root = None
+
+# target_triple: Used by ShTest and TclTest formats for XFAIL checks.
+config.target_triple = 'foo'
+
+# available_features: Used by ShTest and TclTest formats for REQUIRES checks.
+config.available_features = ['some-feature-name']
diff --git a/final/utils/lit/lit/ExampleTests/pass.c b/final/utils/lit/lit/ExampleTests/pass.c
new file mode 100644
index 00000000000..5c1031cccc4
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/pass.c
@@ -0,0 +1 @@
+// RUN: true
diff --git a/final/utils/lit/lit/ExampleTests/required-and-missing.c b/final/utils/lit/lit/ExampleTests/required-and-missing.c
new file mode 100644
index 00000000000..47ba72e4a31
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/required-and-missing.c
@@ -0,0 +1,4 @@
+// This test shouldn't be run, the required feature is missing.
+//
+// RUN: false
+// REQUIRES: some-missing-feature-name
diff --git a/final/utils/lit/lit/ExampleTests/required-and-present.c b/final/utils/lit/lit/ExampleTests/required-and-present.c
new file mode 100644
index 00000000000..2a09e08e5ae
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/required-and-present.c
@@ -0,0 +1,2 @@
+// RUN: true
+// REQUIRES: some-feature-name
diff --git a/final/utils/lit/lit/ExampleTests/xfail.c b/final/utils/lit/lit/ExampleTests/xfail.c
new file mode 100644
index 00000000000..b36cd99a300
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/xfail.c
@@ -0,0 +1,2 @@
+// RUN: false
+// XFAIL: *
diff --git a/final/utils/lit/lit/ExampleTests/xpass.c b/final/utils/lit/lit/ExampleTests/xpass.c
new file mode 100644
index 00000000000..ad84990f7e2
--- /dev/null
+++ b/final/utils/lit/lit/ExampleTests/xpass.c
@@ -0,0 +1,2 @@
+// RUN: true
+// XFAIL
diff --git a/final/utils/lit/lit/LitConfig.py b/final/utils/lit/lit/LitConfig.py
new file mode 100644
index 00000000000..7ca1b9c4c63
--- /dev/null
+++ b/final/utils/lit/lit/LitConfig.py
@@ -0,0 +1,131 @@
+class LitConfig:
+    """LitConfig - Configuration data for a 'lit' test runner instance, shared
+    across all tests.
+
+    The LitConfig object is also used to communicate with client configuration
+    files, it is always passed in as the global variable 'lit' so that
+    configuration files can access common functionality and internal components
+    easily.
+    """
+
+    # Provide access to Test module.
+    import Test
+
+    # Provide access to built-in formats.
+    import LitFormats as formats
+
+    # Provide access to built-in utility functions.
+    import Util as util
+
+    def __init__(self, progname, path, quiet,
+                 useValgrind, valgrindLeakCheck, valgrindArgs,
+                 useTclAsSh,
+                 noExecute, debug, isWindows,
+                 params):
+        # The name of the test runner.
+        self.progname = progname
+        # The items to add to the PATH environment variable.
+        self.path = list(map(str, path))
+        self.quiet = bool(quiet)
+        self.useValgrind = bool(useValgrind)
+        self.valgrindLeakCheck = bool(valgrindLeakCheck)
+        self.valgrindUserArgs = list(valgrindArgs)
+        self.useTclAsSh = bool(useTclAsSh)
+        self.noExecute = noExecute
+        self.debug = debug
+        self.isWindows = bool(isWindows)
+        self.params = dict(params)
+        self.bashPath = None
+
+        self.numErrors = 0
+        self.numWarnings = 0
+
+        self.valgrindArgs = []
+        self.valgrindTriple = ""
+        if self.useValgrind:
+            self.valgrindTriple = "-vg"
+            self.valgrindArgs = ['valgrind', '-q', '--run-libc-freeres=no',
+                                 '--tool=memcheck', '--trace-children=yes',
+                                 '--error-exitcode=123']
+            if self.valgrindLeakCheck:
+                self.valgrindTriple += "_leak"
+                self.valgrindArgs.append('--leak-check=full')
+            else:
+                # The default is 'summary'.
+                self.valgrindArgs.append('--leak-check=no')
+            self.valgrindArgs.extend(self.valgrindUserArgs)
+
+
+    def load_config(self, config, path):
+        """load_config(config, path) - Load a config object from an alternate
+        path."""
+        from TestingConfig import TestingConfig
+        return TestingConfig.frompath(path, config.parent, self,
+                                      mustExist = True,
+                                      config = config)
+
+    def getBashPath(self):
+        """getBashPath - Get the path to 'bash'"""
+        import os, Util
+
+        if self.bashPath is not None:
+            return self.bashPath
+
+        self.bashPath = Util.which('bash', os.pathsep.join(self.path))
+        if self.bashPath is None:
+            # Check some known paths.
+            for path in ('/bin/bash', '/usr/bin/bash', '/usr/local/bin/bash'):
+                if os.path.exists(path):
+                    self.bashPath = path
+                    break
+
+        if self.bashPath is None:
+            self.warning("Unable to find 'bash', running Tcl tests internally.")
+            self.bashPath = ''
+
+        return self.bashPath
+
+    def getToolsPath(self, dir, paths, tools):
+        import os, Util
+        if dir is not None and os.path.isabs(dir) and os.path.isdir(dir):
+            if not Util.checkToolsPath(dir, tools):
+                return None
+        else:
+            dir = Util.whichTools(tools, paths)
+
+        # bash
+        self.bashPath = Util.which('bash', dir)
+        if self.bashPath is None:
+            self.warning("Unable to find 'bash.exe'.")
+            self.bashPath = ''
+
+        return dir
+
+    def _write_message(self, kind, message):
+        import inspect, os, sys
+
+        # Get the file/line where this message was generated.
+        f = inspect.currentframe()
+        # Step out of _write_message, and then out of wrapper.
+        f = f.f_back.f_back
+        file,line,_,_,_ = inspect.getframeinfo(f)
+        location = '%s:%d' % (os.path.basename(file), line)
+
+        print >>sys.stderr, '%s: %s: %s: %s' % (self.progname, location,
+                                                kind, message)
+
+    def note(self, message):
+        self._write_message('note', message)
+
+    def warning(self, message):
+        self._write_message('warning', message)
+        self.numWarnings += 1
+
+    def error(self, message):
+        self._write_message('error', message)
+        self.numErrors += 1
+
+    def fatal(self, message):
+        import sys
+        self._write_message('fatal', message)
+        sys.exit(2)
diff --git a/final/utils/lit/lit/LitFormats.py b/final/utils/lit/lit/LitFormats.py
new file mode 100644
index 00000000000..931d107109b
--- /dev/null
+++ b/final/utils/lit/lit/LitFormats.py
@@ -0,0 +1,3 @@
+from TestFormats import FileBasedTest
+from TestFormats import GoogleTest, ShTest, TclTest
+from TestFormats import SyntaxCheckTest, OneCommandPerFileTest
diff --git a/final/utils/lit/lit/LitTestCase.py b/final/utils/lit/lit/LitTestCase.py
new file mode 100644
index 00000000000..89511858435
--- /dev/null
+++ b/final/utils/lit/lit/LitTestCase.py
@@ -0,0 +1,30 @@
+import unittest
+import Test
+
+"""
+TestCase adaptor for providing a 'unittest' compatible interface to 'lit' tests.
+"""
+
+class UnresolvedError(RuntimeError):
+    pass
+        
+class LitTestCase(unittest.TestCase):
+    def __init__(self, test, lit_config):
+        unittest.TestCase.__init__(self)
+        self._test = test
+        self._lit_config = lit_config
+
+    def id(self):
+        return self._test.getFullName()
+
+    def shortDescription(self):
+        return self._test.getFullName()
+
+    def runTest(self):
+        tr, output = self._test.config.test_format.execute(
+            self._test, self._lit_config)
+
+        if tr is Test.UNRESOLVED:
+            raise UnresolvedError(output)
+        elif tr.isFailure:
+            self.fail(output)
diff --git a/final/utils/lit/lit/ProgressBar.py b/final/utils/lit/lit/ProgressBar.py
new file mode 100644
index 00000000000..85c95f57f7a
--- /dev/null
+++ b/final/utils/lit/lit/ProgressBar.py
@@ -0,0 +1,267 @@
+#!/usr/bin/env python
+
+# Source: http://code.activestate.com/recipes/475116/, with
+# modifications by Daniel Dunbar.
+
+import sys, re, time
+
+class TerminalController:
+    """
+    A class that can be used to portably generate formatted output to
+    a terminal.  
+    
+    `TerminalController` defines a set of instance variables whose
+    values are initialized to the control sequence necessary to
+    perform a given action.  These can be simply included in normal
+    output to the terminal:
+
+        >>> term = TerminalController()
+        >>> print 'This is '+term.GREEN+'green'+term.NORMAL
+
+    Alternatively, the `render()` method can used, which replaces
+    '${action}' with the string required to perform 'action':
+
+        >>> term = TerminalController()
+        >>> print term.render('This is ${GREEN}green${NORMAL}')
+
+    If the terminal doesn't support a given action, then the value of
+    the corresponding instance variable will be set to ''.  As a
+    result, the above code will still work on terminals that do not
+    support color, except that their output will not be colored.
+    Also, this means that you can test whether the terminal supports a
+    given action by simply testing the truth value of the
+    corresponding instance variable:
+
+        >>> term = TerminalController()
+        >>> if term.CLEAR_SCREEN:
+        ...     print 'This terminal supports clearning the screen.'
+
+    Finally, if the width and height of the terminal are known, then
+    they will be stored in the `COLS` and `LINES` attributes.
+    """
+    # Cursor movement:
+    BOL = ''             #: Move the cursor to the beginning of the line
+    UP = ''              #: Move the cursor up one line
+    DOWN = ''            #: Move the cursor down one line
+    LEFT = ''            #: Move the cursor left one char
+    RIGHT = ''           #: Move the cursor right one char
+
+    # Deletion:
+    CLEAR_SCREEN = ''    #: Clear the screen and move to home position
+    CLEAR_EOL = ''       #: Clear to the end of the line.
+    CLEAR_BOL = ''       #: Clear to the beginning of the line.
+    CLEAR_EOS = ''       #: Clear to the end of the screen
+
+    # Output modes:
+    BOLD = ''            #: Turn on bold mode
+    BLINK = ''           #: Turn on blink mode
+    DIM = ''             #: Turn on half-bright mode
+    REVERSE = ''         #: Turn on reverse-video mode
+    NORMAL = ''          #: Turn off all modes
+
+    # Cursor display:
+    HIDE_CURSOR = ''     #: Make the cursor invisible
+    SHOW_CURSOR = ''     #: Make the cursor visible
+
+    # Terminal size:
+    COLS = None          #: Width of the terminal (None for unknown)
+    LINES = None         #: Height of the terminal (None for unknown)
+
+    # Foreground colors:
+    BLACK = BLUE = GREEN = CYAN = RED = MAGENTA = YELLOW = WHITE = ''
+    
+    # Background colors:
+    BG_BLACK = BG_BLUE = BG_GREEN = BG_CYAN = ''
+    BG_RED = BG_MAGENTA = BG_YELLOW = BG_WHITE = ''
+    
+    _STRING_CAPABILITIES = """
+    BOL=cr UP=cuu1 DOWN=cud1 LEFT=cub1 RIGHT=cuf1
+    CLEAR_SCREEN=clear CLEAR_EOL=el CLEAR_BOL=el1 CLEAR_EOS=ed BOLD=bold
+    BLINK=blink DIM=dim REVERSE=rev UNDERLINE=smul NORMAL=sgr0
+    HIDE_CURSOR=cinvis SHOW_CURSOR=cnorm""".split()
+    _COLORS = """BLACK BLUE GREEN CYAN RED MAGENTA YELLOW WHITE""".split()
+    _ANSICOLORS = "BLACK RED GREEN YELLOW BLUE MAGENTA CYAN WHITE".split()
+
+    def __init__(self, term_stream=sys.stdout):
+        """
+        Create a `TerminalController` and initialize its attributes
+        with appropriate values for the current terminal.
+        `term_stream` is the stream that will be used for terminal
+        output; if this stream is not a tty, then the terminal is
+        assumed to be a dumb terminal (i.e., have no capabilities).
+        """
+        # Curses isn't available on all platforms
+        try: import curses
+        except: return
+
+        # If the stream isn't a tty, then assume it has no capabilities.
+        if not term_stream.isatty(): return
+
+        # Check the terminal type.  If we fail, then assume that the
+        # terminal has no capabilities.
+        try: curses.setupterm()
+        except: return
+
+        # Look up numeric capabilities.
+        self.COLS = curses.tigetnum('cols')
+        self.LINES = curses.tigetnum('lines')
+        
+        # Look up string capabilities.
+        for capability in self._STRING_CAPABILITIES:
+            (attrib, cap_name) = capability.split('=')
+            setattr(self, attrib, self._tigetstr(cap_name) or '')
+
+        # Colors
+        set_fg = self._tigetstr('setf')
+        if set_fg:
+            for i,color in zip(range(len(self._COLORS)), self._COLORS):
+                setattr(self, color, curses.tparm(set_fg, i) or '')
+        set_fg_ansi = self._tigetstr('setaf')
+        if set_fg_ansi:
+            for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS):
+                setattr(self, color, curses.tparm(set_fg_ansi, i) or '')
+        set_bg = self._tigetstr('setb')
+        if set_bg:
+            for i,color in zip(range(len(self._COLORS)), self._COLORS):
+                setattr(self, 'BG_'+color, curses.tparm(set_bg, i) or '')
+        set_bg_ansi = self._tigetstr('setab')
+        if set_bg_ansi:
+            for i,color in zip(range(len(self._ANSICOLORS)), self._ANSICOLORS):
+                setattr(self, 'BG_'+color, curses.tparm(set_bg_ansi, i) or '')
+
+    def _tigetstr(self, cap_name):
+        # String capabilities can include "delays" of the form "$<2>".
+        # For any modern terminal, we should be able to just ignore
+        # these, so strip them out.
+        import curses
+        cap = curses.tigetstr(cap_name) or ''
+        return re.sub(r'\$<\d+>[/*]?', '', cap)
+
+    def render(self, template):
+        """
+        Replace each $-substitutions in the given template string with
+        the corresponding terminal control string (if it's defined) or
+        '' (if it's not).
+        """
+        return re.sub(r'\$\$|\${\w+}', self._render_sub, template)
+
+    def _render_sub(self, match):
+        s = match.group()
+        if s == '$$': return s
+        else: return getattr(self, s[2:-1])
+
+#######################################################################
+# Example use case: progress bar
+#######################################################################
+
+class SimpleProgressBar:
+    """
+    A simple progress bar which doesn't need any terminal support.
+
+    This prints out a progress bar like:
+      'Header: 0 .. 10.. 20.. ...'
+    """
+
+    def __init__(self, header):
+        self.header = header
+        self.atIndex = None
+
+    def update(self, percent, message):
+        if self.atIndex is None:
+            sys.stdout.write(self.header)
+            self.atIndex = 0
+
+        next = int(percent*50)
+        if next == self.atIndex:
+            return
+
+        for i in range(self.atIndex, next):
+            idx = i % 5
+            if idx == 0:
+                sys.stdout.write('%-2d' % (i*2))
+            elif idx == 1:
+                pass # Skip second char
+            elif idx < 4:
+                sys.stdout.write('.')
+            else:
+                sys.stdout.write(' ')
+        sys.stdout.flush()
+        self.atIndex = next
+
+    def clear(self):
+        if self.atIndex is not None:
+            sys.stdout.write('\n')
+            sys.stdout.flush()
+            self.atIndex = None
+
+class ProgressBar:
+    """
+    A 3-line progress bar, which looks like::
+    
+                                Header
+        20% [===========----------------------------------]
+                           progress message
+
+    The progress bar is colored, if the terminal supports color
+    output; and adjusts to the width of the terminal.
+    """
+    BAR = '%s${GREEN}[${BOLD}%s%s${NORMAL}${GREEN}]${NORMAL}%s\n'
+    HEADER = '${BOLD}${CYAN}%s${NORMAL}\n\n'
+        
+    def __init__(self, term, header, useETA=True):
+        self.term = term
+        if not (self.term.CLEAR_EOL and self.term.UP and self.term.BOL):
+            raise ValueError("Terminal isn't capable enough -- you "
+                             "should use a simpler progress dispaly.")
+        self.width = self.term.COLS or 75
+        self.bar = term.render(self.BAR)
+        self.header = self.term.render(self.HEADER % header.center(self.width))
+        self.cleared = 1 #: true if we haven't drawn the bar yet.
+        self.useETA = useETA
+        if self.useETA:
+            self.startTime = time.time()
+        self.update(0, '')
+
+    def update(self, percent, message):
+        if self.cleared:
+            sys.stdout.write(self.header)
+            self.cleared = 0
+        prefix = '%3d%% ' % (percent*100,)
+        suffix = ''
+        if self.useETA:
+            elapsed = time.time() - self.startTime
+            if percent > .0001 and elapsed > 1:
+                total = elapsed / percent
+                eta = int(total - elapsed)
+                h = eta//3600.
+                m = (eta//60) % 60
+                s = eta % 60
+                suffix = ' ETA: %02d:%02d:%02d'%(h,m,s)
+        barWidth = self.width - len(prefix) - len(suffix) - 2
+        n = int(barWidth*percent)
+        if len(message) < self.width:
+            message = message + ' '*(self.width - len(message))
+        else:
+            message = '... ' + message[-(self.width-4):]
+        sys.stdout.write(
+            self.term.BOL + self.term.UP + self.term.CLEAR_EOL +
+            (self.bar % (prefix, '='*n, '-'*(barWidth-n), suffix)) +
+            self.term.CLEAR_EOL + message)
+
+    def clear(self):
+        if not self.cleared:
+            sys.stdout.write(self.term.BOL + self.term.CLEAR_EOL +
+                             self.term.UP + self.term.CLEAR_EOL +
+                             self.term.UP + self.term.CLEAR_EOL)
+            self.cleared = 1
+
+def test():
+    import time
+    tc = TerminalController()
+    p = ProgressBar(tc, 'Tests')
+    for i in range(101):
+        p.update(i/100., str(i))        
+        time.sleep(.3)
+
+if __name__=='__main__':
+    test()
diff --git a/final/utils/lit/lit/ShCommands.py b/final/utils/lit/lit/ShCommands.py
new file mode 100644
index 00000000000..4550437ce22
--- /dev/null
+++ b/final/utils/lit/lit/ShCommands.py
@@ -0,0 +1,85 @@
+class Command:
+    def __init__(self, args, redirects):
+        self.args = list(args)
+        self.redirects = list(redirects)
+
+    def __repr__(self):
+        return 'Command(%r, %r)' % (self.args, self.redirects)
+
+    def __cmp__(self, other):
+        if not isinstance(other, Command):
+            return -1
+
+        return cmp((self.args, self.redirects),
+                   (other.args, other.redirects))
+
+    def toShell(self, file):
+        for arg in self.args:
+            if "'" not in arg:
+                quoted = "'%s'" % arg
+            elif '"' not in arg and '$' not in arg:
+                quoted = '"%s"' % arg
+            else:
+                raise NotImplementedError,'Unable to quote %r' % arg
+            print >>file, quoted,
+
+            # For debugging / validation.
+            import ShUtil
+            dequoted = list(ShUtil.ShLexer(quoted).lex())
+            if dequoted != [arg]:
+                raise NotImplementedError,'Unable to quote %r' % arg
+
+        for r in self.redirects:
+            if len(r[0]) == 1:
+                print >>file, "%s '%s'" % (r[0][0], r[1]),
+            else:
+                print >>file, "%s%s '%s'" % (r[0][1], r[0][0], r[1]),
+
+class Pipeline:
+    def __init__(self, commands, negate=False, pipe_err=False):
+        self.commands = commands
+        self.negate = negate
+        self.pipe_err = pipe_err
+
+    def __repr__(self):
+        return 'Pipeline(%r, %r, %r)' % (self.commands, self.negate,
+                                         self.pipe_err)
+
+    def __cmp__(self, other):
+        if not isinstance(other, Pipeline):
+            return -1
+
+        return cmp((self.commands, self.negate, self.pipe_err),
+                   (other.commands, other.negate, self.pipe_err))
+
+    def toShell(self, file, pipefail=False):
+        if pipefail != self.pipe_err:
+            raise ValueError,'Inconsistent "pipefail" attribute!'
+        if self.negate:
+            print >>file, '!',
+        for cmd in self.commands:
+            cmd.toShell(file)
+            if cmd is not self.commands[-1]:
+                print >>file, '|\n ',
+
+class Seq:
+    def __init__(self, lhs, op, rhs):
+        assert op in (';', '&', '||', '&&')
+        self.op = op
+        self.lhs = lhs
+        self.rhs = rhs
+
+    def __repr__(self):
+        return 'Seq(%r, %r, %r)' % (self.lhs, self.op, self.rhs)
+
+    def __cmp__(self, other):
+        if not isinstance(other, Seq):
+            return -1
+
+        return cmp((self.lhs, self.op, self.rhs),
+                   (other.lhs, other.op, other.rhs))
+
+    def toShell(self, file, pipefail=False):
+        self.lhs.toShell(file, pipefail)
+        print >>file, ' %s\n' % self.op
+        self.rhs.toShell(file, pipefail)
diff --git a/final/utils/lit/lit/ShUtil.py b/final/utils/lit/lit/ShUtil.py
new file mode 100644
index 00000000000..dda622a48a8
--- /dev/null
+++ b/final/utils/lit/lit/ShUtil.py
@@ -0,0 +1,353 @@
+import itertools
+
+import Util
+from ShCommands import Command, Pipeline, Seq
+
+class ShLexer:
+    def __init__(self, data, win32Escapes = False):
+        self.data = data
+        self.pos = 0
+        self.end = len(data)
+        self.win32Escapes = win32Escapes
+
+    def eat(self):
+        c = self.data[self.pos]
+        self.pos += 1
+        return c
+
+    def look(self):
+        return self.data[self.pos]
+
+    def maybe_eat(self, c):
+        """
+        maybe_eat(c) - Consume the character c if it is the next character,
+        returning True if a character was consumed. """
+        if self.data[self.pos] == c:
+            self.pos += 1
+            return True
+        return False
+
+    def lex_arg_fast(self, c):
+        # Get the leading whitespace free section.
+        chunk = self.data[self.pos - 1:].split(None, 1)[0]
+        
+        # If it has special characters, the fast path failed.
+        if ('|' in chunk or '&' in chunk or 
+            '<' in chunk or '>' in chunk or
+            "'" in chunk or '"' in chunk or
+            '\\' in chunk):
+            return None
+        
+        self.pos = self.pos - 1 + len(chunk)
+        return chunk
+        
+    def lex_arg_slow(self, c):
+        if c in "'\"":
+            str = self.lex_arg_quoted(c)
+        else:
+            str = c
+        while self.pos != self.end:
+            c = self.look()
+            if c.isspace() or c in "|&":
+                break
+            elif c in '><':
+                # This is an annoying case; we treat '2>' as a single token so
+                # we don't have to track whitespace tokens.
+
+                # If the parse string isn't an integer, do the usual thing.
+                if not str.isdigit():
+                    break
+
+                # Otherwise, lex the operator and convert to a redirection
+                # token.
+                num = int(str)
+                tok = self.lex_one_token()
+                assert isinstance(tok, tuple) and len(tok) == 1
+                return (tok[0], num)                    
+            elif c == '"':
+                self.eat()
+                str += self.lex_arg_quoted('"')
+            elif c == "'":
+                self.eat()
+                str += self.lex_arg_quoted("'")
+            elif not self.win32Escapes and c == '\\':
+                # Outside of a string, '\\' escapes everything.
+                self.eat()
+                if self.pos == self.end:
+                    Util.warning("escape at end of quoted argument in: %r" % 
+                                 self.data)
+                    return str
+                str += self.eat()
+            else:
+                str += self.eat()
+        return str
+
+    def lex_arg_quoted(self, delim):
+        str = ''
+        while self.pos != self.end:
+            c = self.eat()
+            if c == delim:
+                return str
+            elif c == '\\' and delim == '"':
+                # Inside a '"' quoted string, '\\' only escapes the quote
+                # character and backslash, otherwise it is preserved.
+                if self.pos == self.end:
+                    Util.warning("escape at end of quoted argument in: %r" % 
+                                 self.data)
+                    return str
+                c = self.eat()
+                if c == '"': # 
+                    str += '"'
+                elif c == '\\':
+                    str += '\\'
+                else:
+                    str += '\\' + c
+            else:
+                str += c
+        Util.warning("missing quote character in %r" % self.data)
+        return str
+    
+    def lex_arg_checked(self, c):
+        pos = self.pos
+        res = self.lex_arg_fast(c)
+        end = self.pos
+
+        self.pos = pos
+        reference = self.lex_arg_slow(c)
+        if res is not None:
+            if res != reference:
+                raise ValueError,"Fast path failure: %r != %r" % (res, reference)
+            if self.pos != end:
+                raise ValueError,"Fast path failure: %r != %r" % (self.pos, end)
+        return reference
+        
+    def lex_arg(self, c):
+        return self.lex_arg_fast(c) or self.lex_arg_slow(c)
+        
+    def lex_one_token(self):
+        """
+        lex_one_token - Lex a single 'sh' token. """
+
+        c = self.eat()
+        if c in ';!':
+            return (c,)
+        if c == '|':
+            if self.maybe_eat('|'):
+                return ('||',)
+            return (c,)
+        if c == '&':
+            if self.maybe_eat('&'):
+                return ('&&',)
+            if self.maybe_eat('>'): 
+                return ('&>',)
+            return (c,)
+        if c == '>':
+            if self.maybe_eat('&'):
+                return ('>&',)
+            if self.maybe_eat('>'):
+                return ('>>',)
+            return (c,)
+        if c == '<':
+            if self.maybe_eat('&'):
+                return ('<&',)
+            if self.maybe_eat('>'):
+                return ('<<',)
+            return (c,)
+
+        return self.lex_arg(c)
+
+    def lex(self):
+        while self.pos != self.end:
+            if self.look().isspace():
+                self.eat()
+            else:
+                yield self.lex_one_token()
+
+###
+ 
+class ShParser:
+    def __init__(self, data, win32Escapes = False):
+        self.data = data
+        self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex()
+    
+    def lex(self):
+        try:
+            return self.tokens.next()
+        except StopIteration:
+            return None
+    
+    def look(self):
+        next = self.lex()
+        if next is not None:
+            self.tokens = itertools.chain([next], self.tokens)
+        return next
+    
+    def parse_command(self):
+        tok = self.lex()
+        if not tok:
+            raise ValueError,"empty command!"
+        if isinstance(tok, tuple):
+            raise ValueError,"syntax error near unexpected token %r" % tok[0]
+        
+        args = [tok]
+        redirects = []
+        while 1:
+            tok = self.look()
+
+            # EOF?
+            if tok is None:
+                break
+
+            # If this is an argument, just add it to the current command.
+            if isinstance(tok, str):
+                args.append(self.lex())
+                continue
+
+            # Otherwise see if it is a terminator.
+            assert isinstance(tok, tuple)
+            if tok[0] in ('|',';','&','||','&&'):
+                break
+            
+            # Otherwise it must be a redirection.
+            op = self.lex()
+            arg = self.lex()
+            if not arg:
+                raise ValueError,"syntax error near token %r" % op[0]
+            redirects.append((op, arg))
+
+        return Command(args, redirects)
+
+    def parse_pipeline(self):
+        negate = False
+        if self.look() == ('!',):
+            self.lex()
+            negate = True
+
+        commands = [self.parse_command()]
+        while self.look() == ('|',):
+            self.lex()
+            commands.append(self.parse_command())
+        return Pipeline(commands, negate)
+            
+    def parse(self):
+        lhs = self.parse_pipeline()
+
+        while self.look():
+            operator = self.lex()
+            assert isinstance(operator, tuple) and len(operator) == 1
+
+            if not self.look():
+                raise ValueError, "missing argument to operator %r" % operator[0]
+            
+            # FIXME: Operator precedence!!
+            lhs = Seq(lhs, operator[0], self.parse_pipeline())
+
+        return lhs
+
+###
+
+import unittest
+
+class TestShLexer(unittest.TestCase):
+    def lex(self, str, *args, **kwargs):
+        return list(ShLexer(str, *args, **kwargs).lex())
+
+    def test_basic(self):
+        self.assertEqual(self.lex('a|b>c&d<e'),
+                         ['a', ('|',), 'b', ('>',), 'c', ('&',), 'd', 
+                          ('<',), 'e'])
+
+    def test_redirection_tokens(self):
+        self.assertEqual(self.lex('a2>c'),
+                         ['a2', ('>',), 'c'])
+        self.assertEqual(self.lex('a 2>c'),
+                         ['a', ('>',2), 'c'])
+        
+    def test_quoting(self):
+        self.assertEqual(self.lex(""" 'a' """),
+                         ['a'])
+        self.assertEqual(self.lex(""" "hello\\"world" """),
+                         ['hello"world'])
+        self.assertEqual(self.lex(""" "hello\\'world" """),
+                         ["hello\\'world"])
+        self.assertEqual(self.lex(""" "hello\\\\world" """),
+                         ["hello\\world"])
+        self.assertEqual(self.lex(""" he"llo wo"rld """),
+                         ["hello world"])
+        self.assertEqual(self.lex(""" a\\ b a\\\\b """),
+                         ["a b", "a\\b"])
+        self.assertEqual(self.lex(""" "" "" """),
+                         ["", ""])
+        self.assertEqual(self.lex(""" a\\ b """, win32Escapes = True),
+                         ['a\\', 'b'])
+
+class TestShParse(unittest.TestCase):
+    def parse(self, str):
+        return ShParser(str).parse()
+
+    def test_basic(self):
+        self.assertEqual(self.parse('echo hello'),
+                         Pipeline([Command(['echo', 'hello'], [])], False))
+        self.assertEqual(self.parse('echo ""'),
+                         Pipeline([Command(['echo', ''], [])], False))
+        self.assertEqual(self.parse("""echo -DFOO='a'"""),
+                         Pipeline([Command(['echo', '-DFOO=a'], [])], False))
+        self.assertEqual(self.parse('echo -DFOO="a"'),
+                         Pipeline([Command(['echo', '-DFOO=a'], [])], False))
+
+    def test_redirection(self):
+        self.assertEqual(self.parse('echo hello > c'),
+                         Pipeline([Command(['echo', 'hello'], 
+                                           [((('>'),), 'c')])], False))
+        self.assertEqual(self.parse('echo hello > c >> d'),
+                         Pipeline([Command(['echo', 'hello'], [(('>',), 'c'),
+                                                     (('>>',), 'd')])], False))
+        self.assertEqual(self.parse('a 2>&1'),
+                         Pipeline([Command(['a'], [(('>&',2), '1')])], False))
+
+    def test_pipeline(self):
+        self.assertEqual(self.parse('a | b'),
+                         Pipeline([Command(['a'], []),
+                                   Command(['b'], [])],
+                                  False))
+
+        self.assertEqual(self.parse('a | b | c'),
+                         Pipeline([Command(['a'], []),
+                                   Command(['b'], []),
+                                   Command(['c'], [])],
+                                  False))
+
+        self.assertEqual(self.parse('! a'),
+                         Pipeline([Command(['a'], [])],
+                                  True))
+
+    def test_list(self):        
+        self.assertEqual(self.parse('a ; b'),
+                         Seq(Pipeline([Command(['a'], [])], False),
+                             ';',
+                             Pipeline([Command(['b'], [])], False)))
+
+        self.assertEqual(self.parse('a & b'),
+                         Seq(Pipeline([Command(['a'], [])], False),
+                             '&',
+                             Pipeline([Command(['b'], [])], False)))
+
+        self.assertEqual(self.parse('a && b'),
+                         Seq(Pipeline([Command(['a'], [])], False),
+                             '&&',
+                             Pipeline([Command(['b'], [])], False)))
+
+        self.assertEqual(self.parse('a || b'),
+                         Seq(Pipeline([Command(['a'], [])], False),
+                             '||',
+                             Pipeline([Command(['b'], [])], False)))
+
+        self.assertEqual(self.parse('a && b || c'),
+                         Seq(Seq(Pipeline([Command(['a'], [])], False),
+                                 '&&',
+                                 Pipeline([Command(['b'], [])], False)),
+                             '||',
+                             Pipeline([Command(['c'], [])], False)))
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/final/utils/lit/lit/TclUtil.py b/final/utils/lit/lit/TclUtil.py
new file mode 100644
index 00000000000..4a3f34508d6
--- /dev/null
+++ b/final/utils/lit/lit/TclUtil.py
@@ -0,0 +1,322 @@
+import itertools
+
+from ShCommands import Command, Pipeline
+
+def tcl_preprocess(data):
+    # Tcl has a preprocessing step to replace escaped newlines.
+    i = data.find('\\\n')
+    if i == -1:
+        return data
+
+    # Replace '\\\n' and subsequent whitespace by a single space.
+    n = len(data)
+    str = data[:i]
+    i += 2
+    while i < n and data[i] in ' \t':
+        i += 1
+    return str + ' ' + data[i:]
+
+class TclLexer:
+    """TclLexer - Lex a string into "words", following the Tcl syntax."""
+
+    def __init__(self, data):
+        self.data = tcl_preprocess(data)
+        self.pos = 0
+        self.end = len(self.data)
+
+    def at_end(self):
+        return self.pos == self.end
+
+    def eat(self):
+        c = self.data[self.pos]
+        self.pos += 1
+        return c
+
+    def look(self):
+        return self.data[self.pos]
+
+    def maybe_eat(self, c):
+        """
+        maybe_eat(c) - Consume the character c if it is the next character,
+        returning True if a character was consumed. """
+        if self.data[self.pos] == c:
+            self.pos += 1
+            return True
+        return False
+
+    def escape(self, c):
+        if c == 'a':
+            return '\x07'
+        elif c == 'b':
+            return '\x08'
+        elif c == 'f':
+            return '\x0c'
+        elif c == 'n':
+            return '\n'
+        elif c == 'r':
+            return '\r'
+        elif c == 't':
+            return '\t'
+        elif c == 'v':
+            return '\x0b'
+        elif c in 'uxo':
+            raise ValueError,'Invalid quoted character %r' % c
+        else:
+            return c
+        
+    def lex_braced(self):
+        # Lex until whitespace or end of string, the opening brace has already
+        # been consumed.
+
+        str = ''        
+        while 1:
+            if self.at_end():
+                raise ValueError,"Unterminated '{' quoted word"
+            
+            c = self.eat()
+            if c == '}':
+                break
+            elif c == '{':
+                str += '{' + self.lex_braced() + '}'
+            elif c == '\\' and self.look() in '{}':
+                str += self.eat()
+            else:
+                str += c
+
+        return str
+
+    def lex_quoted(self):
+        str = ''
+
+        while 1:
+            if self.at_end():
+                raise ValueError,"Unterminated '\"' quoted word"
+            
+            c = self.eat()
+            if c == '"':
+                break
+            elif c == '\\':
+                if self.at_end():
+                    raise ValueError,'Missing quoted character'
+
+                str += self.escape(self.eat())
+            else:
+                str += c
+
+        return str
+
+    def lex_unquoted(self, process_all=False):
+        # Lex until whitespace or end of string.
+        str = ''
+        while not self.at_end():
+            if not process_all:
+                if self.look().isspace() or self.look() == ';':
+                    break
+
+            c = self.eat()
+            if c == '\\':
+                if self.at_end():
+                    raise ValueError,'Missing quoted character'
+
+                str += self.escape(self.eat())
+            elif c == '[':
+                raise NotImplementedError, ('Command substitution is '
+                                            'not supported')
+            elif c == '$' and not self.at_end() and (self.look().isalpha() or
+                                                     self.look() == '{'):
+                raise NotImplementedError, ('Variable substitution is '
+                                            'not supported')
+            else:
+                str += c
+
+        return str
+
+    def lex_one_token(self):
+        if self.maybe_eat('"'):
+            return self.lex_quoted()
+        elif self.maybe_eat('{'):
+            # Check for argument substitution.
+            if not self.maybe_eat('*'):
+                return self.lex_braced()
+
+            if not self.maybe_eat('}'):
+                    return '*' + self.lex_braced()
+                
+            if self.at_end() or self.look().isspace():
+                return '*'
+
+            raise NotImplementedError, "Argument substitution is unsupported"
+        else:
+            return self.lex_unquoted()
+
+    def lex(self):
+        while not self.at_end():
+            c = self.look()
+            if c in ' \t':
+                self.eat()
+            elif c in ';\n':
+                self.eat()
+                yield (';',)
+            else:
+                yield self.lex_one_token()
+
+class TclExecCommand:
+    kRedirectPrefixes1 = ('<', '>')
+    kRedirectPrefixes2 = ('<@', '<<', '2>', '>&', '>>', '>@')
+    kRedirectPrefixes3 = ('2>@', '2>>', '>>&', '>&@')
+    kRedirectPrefixes4 = ('2>@1',)
+
+    def __init__(self, args):
+        self.args = iter(args)
+
+    def lex(self):
+        try:
+            return self.args.next()
+        except StopIteration:
+            return None
+
+    def look(self):
+        next = self.lex()
+        if next is not None:
+            self.args = itertools.chain([next], self.args)
+        return next
+
+    def parse_redirect(self, tok, length):
+        if len(tok) == length:
+            arg = self.lex()
+            if arg is None:
+                raise ValueError,'Missing argument to %r redirection' % tok
+        else:
+            tok,arg = tok[:length],tok[length:]
+
+        if tok[0] == '2':
+            op = (tok[1:],2)
+        else:
+            op = (tok,)
+        return (op, arg)
+
+    def parse_pipeline(self):
+        if self.look() is None:
+            raise ValueError,"Expected at least one argument to exec"
+
+        commands = [Command([],[])]
+        while 1:
+            arg = self.lex()
+            if arg is None:
+                break
+            elif arg == '|':
+                commands.append(Command([],[]))
+            elif arg == '|&':
+                # Write this as a redirect of stderr; it must come first because
+                # stdout may have already been redirected.
+                commands[-1].redirects.insert(0, (('>&',2),'1'))
+                commands.append(Command([],[]))
+            elif arg[:4] in TclExecCommand.kRedirectPrefixes4:
+                commands[-1].redirects.append(self.parse_redirect(arg, 4))
+            elif arg[:3] in TclExecCommand.kRedirectPrefixes3:
+                commands[-1].redirects.append(self.parse_redirect(arg, 3))
+            elif arg[:2] in TclExecCommand.kRedirectPrefixes2:
+                commands[-1].redirects.append(self.parse_redirect(arg, 2))
+            elif arg[:1] in TclExecCommand.kRedirectPrefixes1:
+                commands[-1].redirects.append(self.parse_redirect(arg, 1))
+            else:
+                commands[-1].args.append(arg)
+
+        return Pipeline(commands, False, pipe_err=True)
+
+    def parse(self):
+        ignoreStderr = False
+        keepNewline = False
+
+        # Parse arguments.
+        while 1:
+            next = self.look()
+            if not isinstance(next, str) or next[0] != '-':
+                break
+
+            if next == '--':
+                self.lex()
+                break
+            elif next == '-ignorestderr':
+                ignoreStderr = True
+            elif next == '-keepnewline':
+                keepNewline = True
+            else:
+                raise ValueError,"Invalid exec argument %r" % next
+
+        return (ignoreStderr, keepNewline, self.parse_pipeline())
+
+###
+
+import unittest
+
+class TestTclLexer(unittest.TestCase):
+    def lex(self, str, *args, **kwargs):
+        return list(TclLexer(str, *args, **kwargs).lex())
+
+    def test_preprocess(self):
+        self.assertEqual(tcl_preprocess('a b'), 'a b')
+        self.assertEqual(tcl_preprocess('a\\\nb c'), 'a b c')
+
+    def test_unquoted(self):
+        self.assertEqual(self.lex('a b c'),
+                         ['a', 'b', 'c'])
+        self.assertEqual(self.lex(r'a\nb\tc\ '),
+                         ['a\nb\tc '])
+        self.assertEqual(self.lex(r'a \\\$b c $\\'),
+                         ['a', r'\$b', 'c', '$\\'])
+
+    def test_braced(self):
+        self.assertEqual(self.lex('a {b c} {}'),
+                         ['a', 'b c', ''])
+        self.assertEqual(self.lex(r'a {b {c\n}}'),
+                         ['a', 'b {c\\n}'])
+        self.assertEqual(self.lex(r'a {b\{}'),
+                         ['a', 'b{'])
+        self.assertEqual(self.lex(r'{*}'), ['*'])
+        self.assertEqual(self.lex(r'{*} a'), ['*', 'a'])
+        self.assertEqual(self.lex(r'{*} a'), ['*', 'a'])
+        self.assertEqual(self.lex('{a\\\n   b}'),
+                         ['a b'])
+
+    def test_quoted(self):
+        self.assertEqual(self.lex('a "b c"'),
+                         ['a', 'b c'])
+
+    def test_terminators(self):
+        self.assertEqual(self.lex('a\nb'),
+                         ['a', (';',), 'b'])
+        self.assertEqual(self.lex('a;b'),
+                         ['a', (';',), 'b'])
+        self.assertEqual(self.lex('a   ;   b'),
+                         ['a', (';',), 'b'])
+
+class TestTclExecCommand(unittest.TestCase):
+    def parse(self, str):
+        return TclExecCommand(list(TclLexer(str).lex())).parse()
+
+    def test_basic(self):
+        self.assertEqual(self.parse('echo hello'),
+                         (False, False,
+                          Pipeline([Command(['echo', 'hello'], [])],
+                                   False, True)))
+        self.assertEqual(self.parse('echo hello | grep hello'),
+                         (False, False,
+                          Pipeline([Command(['echo', 'hello'], []),
+                                    Command(['grep', 'hello'], [])],
+                                   False, True)))
+
+    def test_redirect(self):
+        self.assertEqual(self.parse('echo hello > a >b >>c 2> d |& e'),
+                         (False, False,
+                          Pipeline([Command(['echo', 'hello'],
+                                            [(('>&',2),'1'),
+                                             (('>',),'a'),
+                                             (('>',),'b'),
+                                             (('>>',),'c'),
+                                             (('>',2),'d')]),
+                                    Command(['e'], [])],
+                                   False, True)))
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/final/utils/lit/lit/Test.py b/final/utils/lit/lit/Test.py
new file mode 100644
index 00000000000..db2e0324651
--- /dev/null
+++ b/final/utils/lit/lit/Test.py
@@ -0,0 +1,79 @@
+import os
+
+# Test results.
+
+class TestResult:
+    def __init__(self, name, isFailure):
+        self.name = name
+        self.isFailure = isFailure
+
+PASS        = TestResult('PASS', False)
+XFAIL       = TestResult('XFAIL', False)
+FAIL        = TestResult('FAIL', True)
+XPASS       = TestResult('XPASS', True)
+UNRESOLVED  = TestResult('UNRESOLVED', True)
+UNSUPPORTED = TestResult('UNSUPPORTED', False)
+
+# Test classes.
+
+class TestFormat:
+    """TestFormat - Test information provider."""
+
+    def __init__(self, name):
+        self.name = name
+
+class TestSuite:
+    """TestSuite - Information on a group of tests.
+
+    A test suite groups together a set of logically related tests.
+    """
+
+    def __init__(self, name, source_root, exec_root, config):
+        self.name = name
+        self.source_root = source_root
+        self.exec_root = exec_root
+        # The test suite configuration.
+        self.config = config
+
+    def getSourcePath(self, components):
+        return os.path.join(self.source_root, *components)
+
+    def getExecPath(self, components):
+        return os.path.join(self.exec_root, *components)
+
+class Test:
+    """Test - Information on a single test instance."""
+
+    def __init__(self, suite, path_in_suite, config):
+        self.suite = suite
+        self.path_in_suite = path_in_suite
+        self.config = config
+        # The test result code, once complete.
+        self.result = None
+        # Any additional output from the test, once complete.
+        self.output = None
+        # The wall time to execute this test, if timing and once complete.
+        self.elapsed = None
+        # The repeat index of this test, or None.
+        self.index = None
+
+    def copyWithIndex(self, index):
+        import copy
+        res = copy.copy(self)
+        res.index = index
+        return res
+
+    def setResult(self, result, output, elapsed):
+        assert self.result is None, "Test result already set!"
+        self.result = result
+        self.output = output
+        self.elapsed = elapsed
+
+    def getFullName(self):
+        return self.suite.config.name + ' :: ' + '/'.join(self.path_in_suite)
+
+    def getSourcePath(self):
+        return self.suite.getSourcePath(self.path_in_suite)
+
+    def getExecPath(self):
+        return self.suite.getExecPath(self.path_in_suite)
diff --git a/final/utils/lit/lit/TestFormats.py b/final/utils/lit/lit/TestFormats.py
new file mode 100644
index 00000000000..6dda2fdb608
--- /dev/null
+++ b/final/utils/lit/lit/TestFormats.py
@@ -0,0 +1,228 @@
+import os
+import sys
+
+import Test
+import TestRunner
+import Util
+
+kIsWindows = sys.platform in ['win32', 'cygwin']
+
+class GoogleTest(object):
+    def __init__(self, test_sub_dir, test_suffix):
+        self.test_sub_dir = os.path.normcase(str(test_sub_dir)).split(';')
+        self.test_suffix = str(test_suffix)
+
+        # On Windows, assume tests will also end in '.exe'.
+        if kIsWindows:
+            self.test_suffix += '.exe'
+
+    def getGTestTests(self, path, litConfig, localConfig):
+        """getGTestTests(path) - [name]
+
+        Return the tests available in gtest executable.
+
+        Args:
+          path: String path to a gtest executable
+          litConfig: LitConfig instance
+          localConfig: TestingConfig instance"""
+
+        try:
+            lines = Util.capture([path, '--gtest_list_tests'],
+                                 env=localConfig.environment)
+            if kIsWindows:
+              lines = lines.replace('\r', '')
+            lines = lines.split('\n')
+        except:
+            litConfig.error("unable to discover google-tests in %r" % path)
+            raise StopIteration
+
+        nested_tests = []
+        for ln in lines:
+            if not ln.strip():
+                continue
+
+            prefix = ''
+            index = 0
+            while ln[index*2:index*2+2] == '  ':
+                index += 1
+            while len(nested_tests) > index:
+                nested_tests.pop()
+
+            ln = ln[index*2:]
+            if ln.endswith('.'):
+                nested_tests.append(ln)
+            else:
+                yield ''.join(nested_tests) + ln
+
+    def getTestsInDirectory(self, testSuite, path_in_suite,
+                            litConfig, localConfig):
+        source_path = testSuite.getSourcePath(path_in_suite)
+        for filename in os.listdir(source_path):
+            # Check for the one subdirectory (build directory) tests will be in.
+            if not '.' in self.test_sub_dir:
+                if not os.path.normcase(filename) in self.test_sub_dir:
+                    continue
+
+            filepath = os.path.join(source_path, filename)
+            if not os.path.isdir(filepath):
+                continue
+
+            for subfilename in os.listdir(filepath):
+                if subfilename.endswith(self.test_suffix):
+                    execpath = os.path.join(filepath, subfilename)
+
+                    # Discover the tests in this executable.
+                    for name in self.getGTestTests(execpath, litConfig,
+                                                   localConfig):
+                        testPath = path_in_suite + (filename, subfilename, name)
+                        yield Test.Test(testSuite, testPath, localConfig)
+
+    def execute(self, test, litConfig):
+        testPath,testName = os.path.split(test.getSourcePath())
+        while not os.path.exists(testPath):
+            # Handle GTest parametrized and typed tests, whose name includes
+            # some '/'s.
+            testPath, namePrefix = os.path.split(testPath)
+            testName = os.path.join(namePrefix, testName)
+
+        cmd = [testPath, '--gtest_filter=' + testName]
+        if litConfig.useValgrind:
+            cmd = litConfig.valgrindArgs + cmd
+
+        out, err, exitCode = TestRunner.executeCommand(
+            cmd, env=test.config.environment)
+
+        if not exitCode:
+            return Test.PASS,''
+
+        return Test.FAIL, out + err
+
+###
+
+class FileBasedTest(object):
+    def getTestsInDirectory(self, testSuite, path_in_suite,
+                            litConfig, localConfig):
+        source_path = testSuite.getSourcePath(path_in_suite)
+        for filename in os.listdir(source_path):
+            # Ignore dot files and excluded tests.
+            if (filename.startswith('.') or
+                filename in localConfig.excludes):
+                continue
+
+            filepath = os.path.join(source_path, filename)
+            if not os.path.isdir(filepath):
+                base,ext = os.path.splitext(filename)
+                if ext in localConfig.suffixes:
+                    yield Test.Test(testSuite, path_in_suite + (filename,),
+                                    localConfig)
+
+class ShTest(FileBasedTest):
+    def __init__(self, execute_external = False):
+        self.execute_external = execute_external
+
+    def execute(self, test, litConfig):
+        return TestRunner.executeShTest(test, litConfig,
+                                        self.execute_external)
+
+class TclTest(FileBasedTest):
+    def execute(self, test, litConfig):
+        return TestRunner.executeTclTest(test, litConfig)
+
+###
+
+import re
+import tempfile
+
+class OneCommandPerFileTest:
+    # FIXME: Refactor into generic test for running some command on a directory
+    # of inputs.
+
+    def __init__(self, command, dir, recursive=False,
+                 pattern=".*", useTempInput=False):
+        if isinstance(command, str):
+            self.command = [command]
+        else:
+            self.command = list(command)
+        if dir is not None:
+            dir = str(dir)
+        self.dir = dir
+        self.recursive = bool(recursive)
+        self.pattern = re.compile(pattern)
+        self.useTempInput = useTempInput
+
+    def getTestsInDirectory(self, testSuite, path_in_suite,
+                            litConfig, localConfig):
+        dir = self.dir
+        if dir is None:
+            dir = testSuite.getSourcePath(path_in_suite)
+
+        for dirname,subdirs,filenames in os.walk(dir):
+            if not self.recursive:
+                subdirs[:] = []
+
+            subdirs[:] = [d for d in subdirs
+                          if (d != '.svn' and
+                              d not in localConfig.excludes)]
+
+            for filename in filenames:
+                if (filename.startswith('.') or
+                    not self.pattern.match(filename) or
+                    filename in localConfig.excludes):
+                    continue
+
+                path = os.path.join(dirname,filename)
+                suffix = path[len(dir):]
+                if suffix.startswith(os.sep):
+                    suffix = suffix[1:]
+                test = Test.Test(testSuite,
+                                 path_in_suite + tuple(suffix.split(os.sep)),
+                                 localConfig)
+                # FIXME: Hack?
+                test.source_path = path
+                yield test
+
+    def createTempInput(self, tmp, test):
+        abstract
+
+    def execute(self, test, litConfig):
+        if test.config.unsupported:
+            return (Test.UNSUPPORTED, 'Test is unsupported')
+
+        cmd = list(self.command)
+
+        # If using temp input, create a temporary file and hand it to the
+        # subclass.
+        if self.useTempInput:
+            tmp = tempfile.NamedTemporaryFile(suffix='.cpp')
+            self.createTempInput(tmp, test)
+            tmp.flush()
+            cmd.append(tmp.name)
+        elif hasattr(test, 'source_path'):
+            cmd.append(test.source_path)
+        else:
+            cmd.append(test.getSourcePath())
+
+        out, err, exitCode = TestRunner.executeCommand(cmd)
+
+        diags = out + err
+        if not exitCode and not diags.strip():
+            return Test.PASS,''
+
+        # Try to include some useful information.
+        report = """Command: %s\n""" % ' '.join(["'%s'" % a
+                                                 for a in cmd])
+        if self.useTempInput:
+            report += """Temporary File: %s\n""" % tmp.name
+            report += "--\n%s--\n""" % open(tmp.name).read()
+        report += """Output:\n--\n%s--""" % diags
+
+        return Test.FAIL, report
+
+class SyntaxCheckTest(OneCommandPerFileTest):
+    def __init__(self, compiler, dir, extra_cxx_args=[], *args, **kwargs):
+        cmd = [compiler, '-x', 'c++', '-fsyntax-only'] + extra_cxx_args
+        OneCommandPerFileTest.__init__(self, cmd, dir,
+                                       useTempInput=1, *args, **kwargs)
+
+    def createTempInput(self, tmp, test):
+        print >>tmp, '#include "%s"' % test.source_path
diff --git a/final/utils/lit/lit/TestRunner.py b/final/utils/lit/lit/TestRunner.py
new file mode 100644
index 00000000000..80d0ba11839
--- /dev/null
+++ b/final/utils/lit/lit/TestRunner.py
@@ -0,0 +1,600 @@
+import os, signal, subprocess, sys
+import StringIO
+
+import ShUtil
+import Test
+import Util
+
+import platform
+import tempfile
+
+import re
+
+class InternalShellError(Exception):
+    def __init__(self, command, message):
+        self.command = command
+        self.message = message
+
+kIsWindows = platform.system() == 'Windows'
+
+# Don't use close_fds on Windows.
+kUseCloseFDs = not kIsWindows
+
+# Use temporary files to replace /dev/null on Windows.
+kAvoidDevNull = kIsWindows
+
+def executeCommand(command, cwd=None, env=None):
+    p = subprocess.Popen(command, cwd=cwd,
+                         stdin=subprocess.PIPE,
+                         stdout=subprocess.PIPE,
+                         stderr=subprocess.PIPE,
+                         env=env)
+    out,err = p.communicate()
+    exitCode = p.wait()
+
+    # Detect Ctrl-C in subprocess.
+    if exitCode == -signal.SIGINT:
+        raise KeyboardInterrupt
+
+    return out, err, exitCode
+
+def executeShCmd(cmd, cfg, cwd, results):
+    if isinstance(cmd, ShUtil.Seq):
+        if cmd.op == ';':
+            res = executeShCmd(cmd.lhs, cfg, cwd, results)
+            return executeShCmd(cmd.rhs, cfg, cwd, results)
+
+        if cmd.op == '&':
+            raise NotImplementedError,"unsupported test command: '&'"
+
+        if cmd.op == '||':
+            res = executeShCmd(cmd.lhs, cfg, cwd, results)
+            if res != 0:
+                res = executeShCmd(cmd.rhs, cfg, cwd, results)
+            return res
+        if cmd.op == '&&':
+            res = executeShCmd(cmd.lhs, cfg, cwd, results)
+            if res is None:
+                return res
+
+            if res == 0:
+                res = executeShCmd(cmd.rhs, cfg, cwd, results)
+            return res
+
+        raise ValueError,'Unknown shell command: %r' % cmd.op
+
+    assert isinstance(cmd, ShUtil.Pipeline)
+    procs = []
+    input = subprocess.PIPE
+    stderrTempFiles = []
+    opened_files = []
+    named_temp_files = []
+    # To avoid deadlock, we use a single stderr stream for piped
+    # output. This is null until we have seen some output using
+    # stderr.
+    for i,j in enumerate(cmd.commands):
+        # Apply the redirections, we use (N,) as a sentinal to indicate stdin,
+        # stdout, stderr for N equal to 0, 1, or 2 respectively. Redirects to or
+        # from a file are represented with a list [file, mode, file-object]
+        # where file-object is initially None.
+        redirects = [(0,), (1,), (2,)]
+        for r in j.redirects:
+            if r[0] == ('>',2):
+                redirects[2] = [r[1], 'w', None]
+            elif r[0] == ('>>',2):
+                redirects[2] = [r[1], 'a', None]
+            elif r[0] == ('>&',2) and r[1] in '012':
+                redirects[2] = redirects[int(r[1])]
+            elif r[0] == ('>&',) or r[0] == ('&>',):
+                redirects[1] = redirects[2] = [r[1], 'w', None]
+            elif r[0] == ('>',):
+                redirects[1] = [r[1], 'w', None]
+            elif r[0] == ('>>',):
+                redirects[1] = [r[1], 'a', None]
+            elif r[0] == ('<',):
+                redirects[0] = [r[1], 'r', None]
+            else:
+                raise NotImplementedError,"Unsupported redirect: %r" % (r,)
+
+        # Map from the final redirections to something subprocess can handle.
+        final_redirects = []
+        for index,r in enumerate(redirects):
+            if r == (0,):
+                result = input
+            elif r == (1,):
+                if index == 0:
+                    raise NotImplementedError,"Unsupported redirect for stdin"
+                elif index == 1:
+                    result = subprocess.PIPE
+                else:
+                    result = subprocess.STDOUT
+            elif r == (2,):
+                if index != 2:
+                    raise NotImplementedError,"Unsupported redirect on stdout"
+                result = subprocess.PIPE
+            else:
+                if r[2] is None:
+                    if kAvoidDevNull and r[0] == '/dev/null':
+                        r[2] = tempfile.TemporaryFile(mode=r[1])
+                    else:
+                        r[2] = open(r[0], r[1])
+                    # Workaround a Win32 and/or subprocess bug when appending.
+                    #
+                    # FIXME: Actually, this is probably an instance of PR6753.
+                    if r[1] == 'a':
+                        r[2].seek(0, 2)
+                    opened_files.append(r[2])
+                result = r[2]
+            final_redirects.append(result)
+
+        stdin, stdout, stderr = final_redirects
+
+        # If stderr wants to come from stdout, but stdout isn't a pipe, then put
+        # stderr on a pipe and treat it as stdout.
+        if (stderr == subprocess.STDOUT and stdout != subprocess.PIPE):
+            stderr = subprocess.PIPE
+            stderrIsStdout = True
+        else:
+            stderrIsStdout = False
+
+            # Don't allow stderr on a PIPE except for the last
+            # process, this could deadlock.
+            #
+            # FIXME: This is slow, but so is deadlock.
+            if stderr == subprocess.PIPE and j != cmd.commands[-1]:
+                stderr = tempfile.TemporaryFile(mode='w+b')
+                stderrTempFiles.append((i, stderr))
+
+        # Resolve the executable path ourselves.
+        args = list(j.args)
+        args[0] = Util.which(args[0], cfg.environment['PATH'])
+        if not args[0]:
+            raise InternalShellError(j, '%r: command not found' % j.args[0])
+
+        # Replace uses of /dev/null with temporary files.
+        if kAvoidDevNull:
+            for i,arg in enumerate(args):
+                if arg == "/dev/null":
+                    f = tempfile.NamedTemporaryFile(delete=False)
+                    f.close()
+                    named_temp_files.append(f.name)
+                    args[i] = f.name
+
+        procs.append(subprocess.Popen(args, cwd=cwd,
+                                      stdin = stdin,
+                                      stdout = stdout,
+                                      stderr = stderr,
+                                      env = cfg.environment,
+                                      close_fds = kUseCloseFDs))
+
+        # Immediately close stdin for any process taking stdin from us.
+        if stdin == subprocess.PIPE:
+            procs[-1].stdin.close()
+            procs[-1].stdin = None
+
+        # Update the current stdin source.
+        if stdout == subprocess.PIPE:
+            input = procs[-1].stdout
+        elif stderrIsStdout:
+            input = procs[-1].stderr
+        else:
+            input = subprocess.PIPE
+
+    # Explicitly close any redirected files. We need to do this now because we
+    # need to release any handles we may have on the temporary files (important
+    # on Win32, for example). Since we have already spawned the subprocess, our
+    # handles have already been transferred so we do not need them anymore.
+    for f in opened_files:
+        f.close()
+
+    # FIXME: There is probably still deadlock potential here. Yawn.
+    procData = [None] * len(procs)
+    procData[-1] = procs[-1].communicate()
+
+    for i in range(len(procs) - 1):
+        if procs[i].stdout is not None:
+            out = procs[i].stdout.read()
+        else:
+            out = ''
+        if procs[i].stderr is not None:
+            err = procs[i].stderr.read()
+        else:
+            err = ''
+        procData[i] = (out,err)
+
+    # Read stderr out of the temp files.
+    for i,f in stderrTempFiles:
+        f.seek(0, 0)
+        procData[i] = (procData[i][0], f.read())
+
+    exitCode = None
+    for i,(out,err) in enumerate(procData):
+        res = procs[i].wait()
+        # Detect Ctrl-C in subprocess.
+        if res == -signal.SIGINT:
+            raise KeyboardInterrupt
+
+        results.append((cmd.commands[i], out, err, res))
+        if cmd.pipe_err:
+            # Python treats the exit code as a signed char.
+            if res < 0:
+                exitCode = min(exitCode, res)
+            else:
+                exitCode = max(exitCode, res)
+        else:
+            exitCode = res
+
+    # Remove any named temporary files we created.
+    for f in named_temp_files:
+        try:
+            os.remove(f)
+        except OSError:
+            pass
+
+    if cmd.negate:
+        exitCode = not exitCode
+
+    return exitCode
+
+def executeScriptInternal(test, litConfig, tmpBase, commands, cwd):
+    ln = ' &&\n'.join(commands)
+    try:
+        cmd = ShUtil.ShParser(ln, litConfig.isWindows).parse()
+    except:
+        return (Test.FAIL, "shell parser error on: %r" % ln)
+
+    results = []
+    try:
+        exitCode = executeShCmd(cmd, test.config, cwd, results)
+    except InternalShellError,e:
+        out = ''
+        err = e.message
+        exitCode = 255
+
+    out = err = ''
+    for i,(cmd, cmd_out,cmd_err,res) in enumerate(results):
+        out += 'Command %d: %s\n' % (i, ' '.join('"%s"' % s for s in cmd.args))
+        out += 'Command %d Result: %r\n' % (i, res)
+        out += 'Command %d Output:\n%s\n\n' % (i, cmd_out)
+        out += 'Command %d Stderr:\n%s\n\n' % (i, cmd_err)
+
+    return out, err, exitCode
+
+def executeTclScriptInternal(test, litConfig, tmpBase, commands, cwd):
+    import TclUtil
+    cmds = []
+    for ln in commands:
+        # Given the unfortunate way LLVM's test are written, the line gets
+        # backslash substitution done twice.
+        ln = TclUtil.TclLexer(ln).lex_unquoted(process_all = True)
+
+        try:
+            tokens = list(TclUtil.TclLexer(ln).lex())
+        except:
+            return (Test.FAIL, "Tcl lexer error on: %r" % ln)
+
+        # Validate there are no control tokens.
+        for t in tokens:
+            if not isinstance(t, str):
+                return (Test.FAIL,
+                        "Invalid test line: %r containing %r" % (ln, t))
+
+        try:
+            cmds.append(TclUtil.TclExecCommand(tokens).parse_pipeline())
+        except:
+            return (Test.FAIL, "Tcl 'exec' parse error on: %r" % ln)
+
+    if litConfig.useValgrind:
+        for pipeline in cmds:
+            if pipeline.commands:
+                # Only valgrind the first command in each pipeline, to avoid
+                # valgrinding things like grep, not, and FileCheck.
+                cmd = pipeline.commands[0]
+                cmd.args = litConfig.valgrindArgs + cmd.args
+
+    cmd = cmds[0]
+    for c in cmds[1:]:
+        cmd = ShUtil.Seq(cmd, '&&', c)
+
+    # FIXME: This is lame, we shouldn't need bash. See PR5240.
+    bashPath = litConfig.getBashPath()
+    if litConfig.useTclAsSh and bashPath:
+        script = tmpBase + '.script'
+
+        # Write script file
+        f = open(script,'w')
+        print >>f, 'set -o pipefail'
+        cmd.toShell(f, pipefail = True)
+        f.close()
+
+        if 0:
+            print >>sys.stdout, cmd
+            print >>sys.stdout, open(script).read()
+            print >>sys.stdout
+            return '', '', 0
+
+        command = [litConfig.getBashPath(), script]
+        out,err,exitCode = executeCommand(command, cwd=cwd,
+                                          env=test.config.environment)
+
+        return out,err,exitCode
+    else:
+        results = []
+        try:
+            exitCode = executeShCmd(cmd, test.config, cwd, results)
+        except InternalShellError,e:
+            results.append((e.command, '', e.message + '\n', 255))
+            exitCode = 255
+
+    out = err = ''
+
+    for i,(cmd, cmd_out, cmd_err, res) in enumerate(results):
+        out += 'Command %d: %s\n' % (i, ' '.join('"%s"' % s for s in cmd.args))
+        out += 'Command %d Result: %r\n' % (i, res)
+        out += 'Command %d Output:\n%s\n\n' % (i, cmd_out)
+        out += 'Command %d Stderr:\n%s\n\n' % (i, cmd_err)
+
+    return out, err, exitCode
+
+def executeScript(test, litConfig, tmpBase, commands, cwd):
+    bashPath = litConfig.getBashPath();
+    isWin32CMDEXE = (litConfig.isWindows and not bashPath)
+    script = tmpBase + '.script'
+    if isWin32CMDEXE:
+        script += '.bat'
+
+    # Write script file
+    f = open(script,'w')
+    if isWin32CMDEXE:
+        f.write('\nif %ERRORLEVEL% NEQ 0 EXIT\n'.join(commands))
+    else:
+        f.write(' &&\n'.join(commands))
+    f.write('\n')
+    f.close()
+
+    if isWin32CMDEXE:
+        command = ['cmd','/c', script]
+    else:
+        if bashPath:
+            command = [bashPath, script]
+        else:
+            command = ['/bin/sh', script]
+        if litConfig.useValgrind:
+            # FIXME: Running valgrind on sh is overkill. We probably could just
+            # run on clang with no real loss.
+            command = litConfig.valgrindArgs + command
+
+    return executeCommand(command, cwd=cwd, env=test.config.environment)
+
+def isExpectedFail(xfails, xtargets, target_triple):
+    # Check if any xfail matches this target.
+    for item in xfails:
+        if item == '*' or item in target_triple:
+            break
+    else:
+        return False
+
+    # If so, see if it is expected to pass on this target.
+    #
+    # FIXME: Rename XTARGET to something that makes sense, like XPASS.
+    for item in xtargets:
+        if item == '*' or item in target_triple:
+            return False
+
+    return True
+
+def parseIntegratedTestScript(test, normalize_slashes=False):
+    """parseIntegratedTestScript - Scan an LLVM/Clang style integrated test
+    script and extract the lines to 'RUN' as well as 'XFAIL' and 'XTARGET'
+    information. The RUN lines also will have variable substitution performed.
+    """
+
+    # Get the temporary location, this is always relative to the test suite
+    # root, not test source root.
+    #
+    # FIXME: This should not be here?
+    sourcepath = test.getSourcePath()
+    sourcedir = os.path.dirname(sourcepath)
+    execpath = test.getExecPath()
+    execdir,execbase = os.path.split(execpath)
+    tmpBase = os.path.join(execdir, 'Output', execbase)
+    if test.index is not None:
+        tmpBase += '_%d' % test.index
+
+    # Normalize slashes, if requested.
+    if normalize_slashes:
+        sourcepath = sourcepath.replace('\\', '/')
+        sourcedir = sourcedir.replace('\\', '/')
+        tmpBase = tmpBase.replace('\\', '/')
+
+    # We use #_MARKER_# to hide %% while we do the other substitutions.
+    substitutions = [('%%', '#_MARKER_#')]
+    substitutions.extend(test.config.substitutions)
+    substitutions.extend([('%s', sourcepath),
+                          ('%S', sourcedir),
+                          ('%p', sourcedir),
+                          ('%t', tmpBase + '.tmp'),
+                          # FIXME: Remove this once we kill DejaGNU.
+                          ('%abs_tmp', tmpBase + '.tmp'),
+                          ('#_MARKER_#', '%')])
+
+    # Collect the test lines from the script.
+    script = []
+    xfails = []
+    xtargets = []
+    requires = []
+    for ln in open(sourcepath):
+        if 'RUN:' in ln:
+            # Isolate the command to run.
+            index = ln.index('RUN:')
+            ln = ln[index+4:]
+
+            # Trim trailing whitespace.
+            ln = ln.rstrip()
+
+            # Collapse lines with trailing '\\'.
+            if script and script[-1][-1] == '\\':
+                script[-1] = script[-1][:-1] + ln
+            else:
+                script.append(ln)
+        elif 'XFAIL:' in ln:
+            items = ln[ln.index('XFAIL:') + 6:].split(',')
+            xfails.extend([s.strip() for s in items])
+        elif 'XTARGET:' in ln:
+            items = ln[ln.index('XTARGET:') + 8:].split(',')
+            xtargets.extend([s.strip() for s in items])
+        elif 'REQUIRES:' in ln:
+            items = ln[ln.index('REQUIRES:') + 9:].split(',')
+            requires.extend([s.strip() for s in items])
+        elif 'END.' in ln:
+            # Check for END. lines.
+            if ln[ln.index('END.'):].strip() == 'END.':
+                break
+
+    # Apply substitutions to the script.  Allow full regular
+    # expression syntax.  Replace each matching occurrence of regular
+    # expression pattern a with substitution b in line ln.
+    def processLine(ln):
+        # Apply substitutions
+        for a,b in substitutions:
+            if kIsWindows:
+                b = b.replace("\\","\\\\")
+            ln = re.sub(a, b, ln)
+
+        # Strip the trailing newline and any extra whitespace.
+        return ln.strip()
+    script = map(processLine, script)
+
+    # Verify the script contains a run line.
+    if not script:
+        return (Test.UNRESOLVED, "Test has no run line!")
+
+    # Check for unterminated run lines.
+    if script[-1][-1] == '\\':
+        return (Test.UNRESOLVED, "Test has unterminated run lines (with '\\')")
+
+    # Check that we have the required features:
+    missing_required_features = [f for f in requires
+                                 if f not in test.config.available_features]
+    if missing_required_features:
+        msg = ', '.join(missing_required_features)
+        return (Test.UNSUPPORTED,
+                "Test requires the following features: %s" % msg)
+
+    isXFail = isExpectedFail(xfails, xtargets, test.suite.config.target_triple)
+    return script,isXFail,tmpBase,execdir
+
+def formatTestOutput(status, out, err, exitCode, failDueToStderr, script):
+    output = StringIO.StringIO()
+    print >>output, "Script:"
+    print >>output, "--"
+    print >>output, '\n'.join(script)
+    print >>output, "--"
+    print >>output, "Exit Code: %r" % exitCode,
+    if failDueToStderr:
+        print >>output, "(but there was output on stderr)"
+    else:
+        print >>output
+    if out:
+        print >>output, "Command Output (stdout):"
+        print >>output, "--"
+        output.write(out)
+        print >>output, "--"
+    if err:
+        print >>output, "Command Output (stderr):"
+        print >>output, "--"
+        output.write(err)
+        print >>output, "--"
+    return (status, output.getvalue())
+
+def executeTclTest(test, litConfig):
+    if test.config.unsupported:
+        return (Test.UNSUPPORTED, 'Test is unsupported')
+
+    # Parse the test script, normalizing slashes in substitutions on Windows
+    # (since otherwise Tcl style lexing will treat them as escapes).
+    res = parseIntegratedTestScript(test, normalize_slashes=kIsWindows)
+    if len(res) == 2:
+        return res
+
+    script, isXFail, tmpBase, execdir = res
+
+    if litConfig.noExecute:
+        return (Test.PASS, '')
+
+    # Create the output directory if it does not already exist.
+    Util.mkdir_p(os.path.dirname(tmpBase))
+
+    res = executeTclScriptInternal(test, litConfig, tmpBase, script, execdir)
+    if len(res) == 2:
+        return res
+
+    # Test for failure. In addition to the exit code, Tcl commands are
+    # considered to fail if there is any standard error output.
+    out,err,exitCode = res
+    if isXFail:
+        ok = exitCode != 0 or err
+        if ok:
+            status = Test.XFAIL
+        else:
+            status = Test.XPASS
+    else:
+        ok = exitCode == 0 and not err
+        if ok:
+            status = Test.PASS
+        else:
+            status = Test.FAIL
+
+    if ok:
+        return (status,'')
+
+    # Set a flag for formatTestOutput so it can explain why the test was
+    # considered to have failed, despite having an exit code of 0.
+    failDueToStderr = exitCode == 0 and err
+
+    return formatTestOutput(status, out, err, exitCode, failDueToStderr, script)
+
+def executeShTest(test, litConfig, useExternalSh):
+    if test.config.unsupported:
+        return (Test.UNSUPPORTED, 'Test is unsupported')
+
+    res = parseIntegratedTestScript(test, useExternalSh)
+    if len(res) == 2:
+        return res
+
+    script, isXFail, tmpBase, execdir = res
+
+    if litConfig.noExecute:
+        return (Test.PASS, '')
+
+    # Create the output directory if it does not already exist.
+    Util.mkdir_p(os.path.dirname(tmpBase))
+
+    if useExternalSh:
+        res = executeScript(test, litConfig, tmpBase, script, execdir)
+    else:
+        res = executeScriptInternal(test, litConfig, tmpBase, script, execdir)
+    if len(res) == 2:
+        return res
+
+    out,err,exitCode = res
+    if isXFail:
+        ok = exitCode != 0
+        if ok:
+            status = Test.XFAIL
+        else:
+            status = Test.XPASS
+    else:
+        ok = exitCode == 0
+        if ok:
+            status = Test.PASS
+        else:
+            status = Test.FAIL
+
+    if ok:
+        return (status,'')
+
+    # Sh tests are not considered to fail just from stderr output.
+    failDueToStderr = False
+
+    return formatTestOutput(status, out, err, exitCode, failDueToStderr, script)
diff --git a/final/utils/lit/lit/TestingConfig.py b/final/utils/lit/lit/TestingConfig.py
new file mode 100644
index 00000000000..25bb3417de4
--- /dev/null
+++ b/final/utils/lit/lit/TestingConfig.py
@@ -0,0 +1,103 @@
+import os
+
+class TestingConfig:
+    """"
+    TestingConfig - Information on the tests inside a suite.
+    """
+
+    @staticmethod
+    def frompath(path, parent, litConfig, mustExist, config = None):
+        if config is None:
+            # Set the environment based on the command line arguments.
+            environment = {
+                'LIBRARY_PATH' : os.environ.get('LIBRARY_PATH',''),
+                'LD_LIBRARY_PATH' : os.environ.get('LD_LIBRARY_PATH',''),
+                'PATH' : os.pathsep.join(litConfig.path +
+                                         [os.environ.get('PATH','')]),
+                'PATHEXT' : os.environ.get('PATHEXT',''),
+                'SYSTEMROOT' : os.environ.get('SYSTEMROOT',''),
+                'LLVM_DISABLE_CRT_DEBUG' : '1',
+                'PYTHONUNBUFFERED' : '1',
+                }
+
+            config = TestingConfig(parent,
+                                   name = '<unnamed>',
+                                   suffixes = set(),
+                                   test_format = None,
+                                   environment = environment,
+                                   substitutions = [],
+                                   unsupported = False,
+                                   on_clone = None,
+                                   test_exec_root = None,
+                                   test_source_root = None,
+                                   excludes = [],
+                                   available_features = [])
+
+        if os.path.exists(path):
+            # FIXME: Improve detection and error reporting of errors in the
+            # config file.
+            f = open(path)
+            cfg_globals = dict(globals())
+            cfg_globals['config'] = config
+            cfg_globals['lit'] = litConfig
+            cfg_globals['__file__'] = path
+            try:
+                exec f in cfg_globals
+            except SystemExit,status:
+                # We allow normal system exit inside a config file to just
+                # return control without error.
+                if status.args:
+                    raise
+            f.close()
+        elif mustExist:
+            litConfig.fatal('unable to load config from %r ' % path)
+
+        config.finish(litConfig)
+        return config
+
+    def __init__(self, parent, name, suffixes, test_format,
+                 environment, substitutions, unsupported, on_clone,
+                 test_exec_root, test_source_root, excludes,
+                 available_features):
+        self.parent = parent
+        self.name = str(name)
+        self.suffixes = set(suffixes)
+        self.test_format = test_format
+        self.environment = dict(environment)
+        self.substitutions = list(substitutions)
+        self.unsupported = unsupported
+        self.on_clone = on_clone
+        self.test_exec_root = test_exec_root
+        self.test_source_root = test_source_root
+        self.excludes = set(excludes)
+        self.available_features = set(available_features)
+
+    def clone(self, path):
+        # FIXME: Chain implementations?
+        #
+        # FIXME: Allow extra parameters?
+        cfg = TestingConfig(self, self.name, self.suffixes, self.test_format,
+                            self.environment, self.substitutions,
+                            self.unsupported, self.on_clone,
+                            self.test_exec_root, self.test_source_root,
+                            self.excludes, self.available_features)
+        if cfg.on_clone:
+            cfg.on_clone(self, cfg, path)
+        return cfg
+
+    def finish(self, litConfig):
+        """finish() - Finish this config object, after loading is complete."""
+
+        self.name = str(self.name)
+        self.suffixes = set(self.suffixes)
+        self.environment = dict(self.environment)
+        self.substitutions = list(self.substitutions)
+        if self.test_exec_root is not None:
+            # FIXME: This should really only be suite in test suite config
+            # files. Should we distinguish them?
+            self.test_exec_root = str(self.test_exec_root)
+        if self.test_source_root is not None:
+            # FIXME: This should really only be suite in test suite config
+            # files. Should we distinguish them?
+            self.test_source_root = str(self.test_source_root)
+        self.excludes = set(self.excludes)
diff --git a/final/utils/lit/lit/Util.py b/final/utils/lit/lit/Util.py
new file mode 100644
index 00000000000..5635f50baef
--- /dev/null
+++ b/final/utils/lit/lit/Util.py
@@ -0,0 +1,141 @@
+import os, sys
+
+def detectCPUs():
+    """
+    Detects the number of CPUs on a system. Cribbed from pp.
+    """
+    # Linux, Unix and MacOS:
+    if hasattr(os, "sysconf"):
+        if os.sysconf_names.has_key("SC_NPROCESSORS_ONLN"):
+            # Linux & Unix:
+            ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
+            if isinstance(ncpus, int) and ncpus > 0:
+                return ncpus
+        else: # OSX:
+            return int(os.popen2("sysctl -n hw.ncpu")[1].read())
+    # Windows:
+    if os.environ.has_key("NUMBER_OF_PROCESSORS"):
+        ncpus = int(os.environ["NUMBER_OF_PROCESSORS"])
+        if ncpus > 0:
+            return ncpus
+    return 1 # Default
+
+def mkdir_p(path):
+    """mkdir_p(path) - Make the "path" directory, if it does not exist; this
+    will also make directories for any missing parent directories."""
+    import errno
+
+    if not path or os.path.exists(path):
+        return
+
+    parent = os.path.dirname(path) 
+    if parent != path:
+        mkdir_p(parent)
+
+    try:
+        os.mkdir(path)
+    except OSError,e:
+        # Ignore EEXIST, which may occur during a race condition.
+        if e.errno != errno.EEXIST:
+            raise
+
+def capture(args, env=None):
+    import subprocess
+    """capture(command) - Run the given command (or argv list) in a shell and
+    return the standard output."""
+    p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+                         env=env)
+    out,_ = p.communicate()
+    return out
+
+def which(command, paths = None):
+    """which(command, [paths]) - Look up the given command in the paths string
+    (or the PATH environment variable, if unspecified)."""
+
+    if paths is None:
+        paths = os.environ.get('PATH','')
+
+    # Check for absolute match first.
+    if os.path.exists(command):
+        return command
+
+    # Would be nice if Python had a lib function for this.
+    if not paths:
+        paths = os.defpath
+
+    # Get suffixes to search.
+    # On Cygwin, 'PATHEXT' may exist but it should not be used.
+    if os.pathsep == ';':
+        pathext = os.environ.get('PATHEXT', '').split(';')
+    else:
+        pathext = ['']
+
+    # Search the paths...
+    for path in paths.split(os.pathsep):
+        for ext in pathext:
+            p = os.path.join(path, command + ext)
+            if os.path.exists(p):
+                return p
+
+    return None
+
+def checkToolsPath(dir, tools):
+    for tool in tools:
+        if not os.path.exists(os.path.join(dir, tool)):
+            return False;
+    return True;
+
+def whichTools(tools, paths):
+    for path in paths.split(os.pathsep):
+        if checkToolsPath(path, tools):
+            return path
+    return None
+
+def printHistogram(items, title = 'Items'):
+    import itertools, math
+
+    items.sort(key = lambda (_,v): v)
+
+    maxValue = max([v for _,v in items])
+
+    # Select first "nice" bar height that produces more than 10 bars.
+    power = int(math.ceil(math.log(maxValue, 10)))
+    for inc in itertools.cycle((5, 2, 2.5, 1)):
+        barH = inc * 10**power
+        N = int(math.ceil(maxValue / barH))
+        if N > 10:
+            break
+        elif inc == 1:
+            power -= 1
+
+    histo = [set() for i in range(N)]
+    for name,v in items:
+        bin = min(int(N * v/maxValue), N-1)
+        histo[bin].add(name)
+
+    barW = 40
+    hr = '-' * (barW + 34)
+    print '\nSlowest %s:' % title
+    print hr
+    for name,value in items[-20:]:
+        print '%.2fs: %s' % (value, name)
+    print '\n%s Times:' % title
+    print hr
+    pDigits = int(math.ceil(math.log(maxValue, 10)))
+    pfDigits = max(0, 3-pDigits)
+    if pfDigits:
+        pDigits += pfDigits + 1
+    cDigits = int(math.ceil(math.log(len(items), 10)))
+    print "[%s] :: [%s] :: [%s]" % ('Range'.center((pDigits+1)*2 + 3),
+                                    'Percentage'.center(barW),
+                                    'Count'.center(cDigits*2 + 1))
+    print hr
+    for i,row in enumerate(histo):
+        pct = float(len(row)) / len(items)
+        w = int(barW * pct)
+        print "[%*.*fs,%*.*fs)" % (pDigits, pfDigits, i*barH,
+                                   pDigits, pfDigits, (i+1)*barH),
+        print ":: [%s%s] :: [%*d/%*d]" % ('*'*w, ' '*(barW-w),
+                                          cDigits, len(row),
+                                          cDigits, len(items))
+
diff --git a/final/utils/lit/lit/__init__.py b/final/utils/lit/lit/__init__.py
new file mode 100644
index 00000000000..f3fbb1cd827
--- /dev/null
+++ b/final/utils/lit/lit/__init__.py
@@ -0,0 +1,10 @@
+"""'lit' Testing Tool"""
+
+from main import main
+
+__author__ = 'Daniel Dunbar'
+__email__ = 'daniel@zuster.org'
+__versioninfo__ = (0, 2, 0)
+__version__ = '.'.join(map(str, __versioninfo__)) + 'dev'
+
+__all__ = []
diff --git a/final/utils/lit/lit/main.py b/final/utils/lit/lit/main.py
new file mode 100755
index 00000000000..13d263009dd
--- /dev/null
+++ b/final/utils/lit/lit/main.py
@@ -0,0 +1,648 @@
+#!/usr/bin/env python
+
+"""
+lit - LLVM Integrated Tester.
+
+See lit.pod for more information.
+"""
+
+import math, os, platform, random, re, sys, time, threading, traceback
+
+import ProgressBar
+import TestRunner
+import Util
+
+from TestingConfig import TestingConfig
+import LitConfig
+import Test
+
+# Configuration files to look for when discovering test suites. These can be
+# overridden with --config-prefix.
+#
+# FIXME: Rename to 'config.lit', 'site.lit', and 'local.lit' ?
+gConfigName = 'lit.cfg'
+gSiteConfigName = 'lit.site.cfg'
+
+kLocalConfigName = 'lit.local.cfg'
+
+class TestingProgressDisplay:
+    def __init__(self, opts, numTests, progressBar=None):
+        self.opts = opts
+        self.numTests = numTests
+        self.current = None
+        self.lock = threading.Lock()
+        self.progressBar = progressBar
+        self.completed = 0
+
+    def update(self, test):
+        # Avoid locking overhead in quiet mode
+        if self.opts.quiet and not test.result.isFailure:
+            self.completed += 1
+            return
+
+        # Output lock.
+        self.lock.acquire()
+        try:
+            self.handleUpdate(test)
+        finally:
+            self.lock.release()
+
+    def finish(self):
+        if self.progressBar:
+            self.progressBar.clear()
+        elif self.opts.quiet:
+            pass
+        elif self.opts.succinct:
+            sys.stdout.write('\n')
+
+    def handleUpdate(self, test):
+        self.completed += 1
+        if self.progressBar:
+            self.progressBar.update(float(self.completed)/self.numTests,
+                                    test.getFullName())
+
+        if self.opts.succinct and not test.result.isFailure:
+            return
+
+        if self.progressBar:
+            self.progressBar.clear()
+
+        print '%s: %s (%d of %d)' % (test.result.name, test.getFullName(),
+                                     self.completed, self.numTests)
+
+        if test.result.isFailure and self.opts.showOutput:
+            print "%s TEST '%s' FAILED %s" % ('*'*20, test.getFullName(),
+                                              '*'*20)
+            print test.output
+            print "*" * 20
+
+        sys.stdout.flush()
+
+class TestProvider:
+    def __init__(self, tests, maxTime):
+        self.maxTime = maxTime
+        self.iter = iter(tests)
+        self.lock = threading.Lock()
+        self.startTime = time.time()
+
+    def get(self):
+        # Check if we have run out of time.
+        if self.maxTime is not None:
+            if time.time() - self.startTime > self.maxTime:
+                return None
+
+        # Otherwise take the next test.
+        self.lock.acquire()
+        try:
+            item = self.iter.next()
+        except StopIteration:
+            item = None
+        self.lock.release()
+        return item
+
+class Tester(threading.Thread):
+    def __init__(self, litConfig, provider, display):
+        threading.Thread.__init__(self)
+        self.litConfig = litConfig
+        self.provider = provider
+        self.display = display
+
+    def run(self):
+        while 1:
+            item = self.provider.get()
+            if item is None:
+                break
+            self.runTest(item)
+
+    def runTest(self, test):
+        result = None
+        startTime = time.time()
+        try:
+            result, output = test.config.test_format.execute(test,
+                                                             self.litConfig)
+        except KeyboardInterrupt:
+            # This is a sad hack. Unfortunately subprocess goes
+            # bonkers with ctrl-c and we start forking merrily.
+            print '\nCtrl-C detected, goodbye.'
+            os.kill(0,9)
+        except:
+            if self.litConfig.debug:
+                raise
+            result = Test.UNRESOLVED
+            output = 'Exception during script execution:\n'
+            output += traceback.format_exc()
+            output += '\n'
+        elapsed = time.time() - startTime
+
+        test.setResult(result, output, elapsed)
+        self.display.update(test)
+
+def dirContainsTestSuite(path):
+    cfgpath = os.path.join(path, gSiteConfigName)
+    if os.path.exists(cfgpath):
+        return cfgpath
+    cfgpath = os.path.join(path, gConfigName)
+    if os.path.exists(cfgpath):
+        return cfgpath
+
+def getTestSuite(item, litConfig, cache):
+    """getTestSuite(item, litConfig, cache) -> (suite, relative_path)
+
+    Find the test suite containing @arg item.
+
+    @retval (None, ...) - Indicates no test suite contains @arg item.
+    @retval (suite, relative_path) - The suite that @arg item is in, and its
+    relative path inside that suite.
+    """
+    def search1(path):
+        # Check for a site config or a lit config.
+        cfgpath = dirContainsTestSuite(path)
+
+        # If we didn't find a config file, keep looking.
+        if not cfgpath:
+            parent,base = os.path.split(path)
+            if parent == path:
+                return (None, ())
+
+            ts, relative = search(parent)
+            return (ts, relative + (base,))
+
+        # We found a config file, load it.
+        if litConfig.debug:
+            litConfig.note('loading suite config %r' % cfgpath)
+
+        cfg = TestingConfig.frompath(cfgpath, None, litConfig, mustExist = True)
+        source_root = os.path.realpath(cfg.test_source_root or path)
+        exec_root = os.path.realpath(cfg.test_exec_root or path)
+        return Test.TestSuite(cfg.name, source_root, exec_root, cfg), ()
+
+    def search(path):
+        # Check for an already instantiated test suite.
+        res = cache.get(path)
+        if res is None:
+            cache[path] = res = search1(path)
+        return res
+
+    # Canonicalize the path.
+    item = os.path.realpath(item)
+
+    # Skip files and virtual components.
+    components = []
+    while not os.path.isdir(item):
+        parent,base = os.path.split(item)
+        if parent == item:
+            return (None, ())
+        components.append(base)
+        item = parent
+    components.reverse()
+
+    ts, relative = search(item)
+    return ts, tuple(relative + tuple(components))
+
+def getLocalConfig(ts, path_in_suite, litConfig, cache):
+    def search1(path_in_suite):
+        # Get the parent config.
+        if not path_in_suite:
+            parent = ts.config
+        else:
+            parent = search(path_in_suite[:-1])
+
+        # Load the local configuration.
+        source_path = ts.getSourcePath(path_in_suite)
+        cfgpath = os.path.join(source_path, kLocalConfigName)
+        if litConfig.debug:
+            litConfig.note('loading local config %r' % cfgpath)
+        return TestingConfig.frompath(cfgpath, parent, litConfig,
+                                    mustExist = False,
+                                    config = parent.clone(cfgpath))
+
+    def search(path_in_suite):
+        key = (ts, path_in_suite)
+        res = cache.get(key)
+        if res is None:
+            cache[key] = res = search1(path_in_suite)
+        return res
+
+    return search(path_in_suite)
+
+def getTests(path, litConfig, testSuiteCache, localConfigCache):
+    # Find the test suite for this input and its relative path.
+    ts,path_in_suite = getTestSuite(path, litConfig, testSuiteCache)
+    if ts is None:
+        litConfig.warning('unable to find test suite for %r' % path)
+        return (),()
+
+    if litConfig.debug:
+        litConfig.note('resolved input %r to %r::%r' % (path, ts.name,
+                                                        path_in_suite))
+
+    return ts, getTestsInSuite(ts, path_in_suite, litConfig,
+                               testSuiteCache, localConfigCache)
+
+def getTestsInSuite(ts, path_in_suite, litConfig,
+                    testSuiteCache, localConfigCache):
+    # Check that the source path exists (errors here are reported by the
+    # caller).
+    source_path = ts.getSourcePath(path_in_suite)
+    if not os.path.exists(source_path):
+        return
+
+    # Check if the user named a test directly.
+    if not os.path.isdir(source_path):
+        lc = getLocalConfig(ts, path_in_suite[:-1], litConfig, localConfigCache)
+        yield Test.Test(ts, path_in_suite, lc)
+        return
+
+    # Otherwise we have a directory to search for tests, start by getting the
+    # local configuration.
+    lc = getLocalConfig(ts, path_in_suite, litConfig, localConfigCache)
+
+    # Search for tests.
+    if lc.test_format is not None:
+        for res in lc.test_format.getTestsInDirectory(ts, path_in_suite,
+                                                      litConfig, lc):
+            yield res
+
+    # Search subdirectories.
+    for filename in os.listdir(source_path):
+        # FIXME: This doesn't belong here?
+        if filename in ('Output', '.svn') or filename in lc.excludes:
+            continue
+
+        # Ignore non-directories.
+        file_sourcepath = os.path.join(source_path, filename)
+        if not os.path.isdir(file_sourcepath):
+            continue
+
+        # Check for nested test suites, first in the execpath in case there is a
+        # site configuration and then in the source path.
+        file_execpath = ts.getExecPath(path_in_suite + (filename,))
+        if dirContainsTestSuite(file_execpath):
+            sub_ts, subiter = getTests(file_execpath, litConfig,
+                                       testSuiteCache, localConfigCache)
+        elif dirContainsTestSuite(file_sourcepath):
+            sub_ts, subiter = getTests(file_sourcepath, litConfig,
+                                       testSuiteCache, localConfigCache)
+        else:
+            # Otherwise, continue loading from inside this test suite.
+            subiter = getTestsInSuite(ts, path_in_suite + (filename,),
+                                      litConfig, testSuiteCache,
+                                      localConfigCache)
+            sub_ts = None
+
+        N = 0
+        for res in subiter:
+            N += 1
+            yield res
+        if sub_ts and not N:
+            litConfig.warning('test suite %r contained no tests' % sub_ts.name)
+
+def runTests(numThreads, litConfig, provider, display):
+    # If only using one testing thread, don't use threads at all; this lets us
+    # profile, among other things.
+    if numThreads == 1:
+        t = Tester(litConfig, provider, display)
+        t.run()
+        return
+
+    # Otherwise spin up the testing threads and wait for them to finish.
+    testers = [Tester(litConfig, provider, display)
+               for i in range(numThreads)]
+    for t in testers:
+        t.start()
+    try:
+        for t in testers:
+            t.join()
+    except KeyboardInterrupt:
+        sys.exit(2)
+
+def load_test_suite(inputs):
+    import unittest
+
+    # Create the global config object.
+    litConfig = LitConfig.LitConfig(progname = 'lit',
+                                    path = [],
+                                    quiet = False,
+                                    useValgrind = False,
+                                    valgrindLeakCheck = False,
+                                    valgrindArgs = [],
+                                    useTclAsSh = False,
+                                    noExecute = False,
+                                    debug = False,
+                                    isWindows = (platform.system()=='Windows'),
+                                    params = {})
+
+    # Load the tests from the inputs.
+    tests = []
+    testSuiteCache = {}
+    localConfigCache = {}
+    for input in inputs:
+        prev = len(tests)
+        tests.extend(getTests(input, litConfig,
+                              testSuiteCache, localConfigCache)[1])
+        if prev == len(tests):
+            litConfig.warning('input %r contained no tests' % input)
+
+    # If there were any errors during test discovery, exit now.
+    if litConfig.numErrors:
+        print >>sys.stderr, '%d errors, exiting.' % litConfig.numErrors
+        sys.exit(2)
+
+    # Return a unittest test suite which just runs the tests in order.
+    def get_test_fn(test):
+        return unittest.FunctionTestCase(
+            lambda: test.config.test_format.execute(
+                test, litConfig),
+            description = test.getFullName())
+
+    from LitTestCase import LitTestCase
+    return unittest.TestSuite([LitTestCase(test, litConfig) for test in tests])
+
+def main(builtinParameters = {}):    # Bump the GIL check interval, its more important to get any one thread to a
+    # blocking operation (hopefully exec) than to try and unblock other threads.
+    #
+    # FIXME: This is a hack.
+    import sys
+    sys.setcheckinterval(1000)
+
+    global options
+    from optparse import OptionParser, OptionGroup
+    parser = OptionParser("usage: %prog [options] {file-or-path}")
+
+    parser.add_option("-j", "--threads", dest="numThreads", metavar="N",
+                      help="Number of testing threads",
+                      type=int, action="store", default=None)
+    parser.add_option("", "--config-prefix", dest="configPrefix",
+                      metavar="NAME", help="Prefix for 'lit' config files",
+                      action="store", default=None)
+    parser.add_option("", "--param", dest="userParameters",
+                      metavar="NAME=VAL",
+                      help="Add 'NAME' = 'VAL' to the user defined parameters",
+                      type=str, action="append", default=[])
+
+    group = OptionGroup(parser, "Output Format")
+    # FIXME: I find these names very confusing, although I like the
+    # functionality.
+    group.add_option("-q", "--quiet", dest="quiet",
+                     help="Suppress no error output",
+                     action="store_true", default=False)
+    group.add_option("-s", "--succinct", dest="succinct",
+                     help="Reduce amount of output",
+                     action="store_true", default=False)
+    group.add_option("-v", "--verbose", dest="showOutput",
+                     help="Show all test output",
+                     action="store_true", default=False)
+    group.add_option("", "--no-progress-bar", dest="useProgressBar",
+                     help="Do not use curses based progress bar",
+                     action="store_false", default=True)
+    parser.add_option_group(group)
+
+    group = OptionGroup(parser, "Test Execution")
+    group.add_option("", "--path", dest="path",
+                     help="Additional paths to add to testing environment",
+                     action="append", type=str, default=[])
+    group.add_option("", "--vg", dest="useValgrind",
+                     help="Run tests under valgrind",
+                     action="store_true", default=False)
+    group.add_option("", "--vg-leak", dest="valgrindLeakCheck",
+                     help="Check for memory leaks under valgrind",
+                     action="store_true", default=False)
+    group.add_option("", "--vg-arg", dest="valgrindArgs", metavar="ARG",
+                     help="Specify an extra argument for valgrind",
+                     type=str, action="append", default=[])
+    group.add_option("", "--time-tests", dest="timeTests",
+                     help="Track elapsed wall time for each test",
+                     action="store_true", default=False)
+    group.add_option("", "--no-execute", dest="noExecute",
+                     help="Don't execute any tests (assume PASS)",
+                     action="store_true", default=False)
+    parser.add_option_group(group)
+
+    group = OptionGroup(parser, "Test Selection")
+    group.add_option("", "--max-tests", dest="maxTests", metavar="N",
+                     help="Maximum number of tests to run",
+                     action="store", type=int, default=None)
+    group.add_option("", "--max-time", dest="maxTime", metavar="N",
+                     help="Maximum time to spend testing (in seconds)",
+                     action="store", type=float, default=None)
+    group.add_option("", "--shuffle", dest="shuffle",
+                     help="Run tests in random order",
+                     action="store_true", default=False)
+    parser.add_option_group(group)
+
+    group = OptionGroup(parser, "Debug and Experimental Options")
+    group.add_option("", "--debug", dest="debug",
+                      help="Enable debugging (for 'lit' development)",
+                      action="store_true", default=False)
+    group.add_option("", "--show-suites", dest="showSuites",
+                      help="Show discovered test suites",
+                      action="store_true", default=False)
+    group.add_option("", "--no-tcl-as-sh", dest="useTclAsSh",
+                      help="Don't run Tcl scripts using 'sh'",
+                      action="store_false", default=True)
+    group.add_option("", "--repeat", dest="repeatTests", metavar="N",
+                      help="Repeat tests N times (for timing)",
+                      action="store", default=None, type=int)
+    parser.add_option_group(group)
+
+    (opts, args) = parser.parse_args()
+
+    if not args:
+        parser.error('No inputs specified')
+
+    if opts.configPrefix is not None:
+        global gConfigName, gSiteConfigName
+        gConfigName = '%s.cfg' % opts.configPrefix
+        gSiteConfigName = '%s.site.cfg' % opts.configPrefix
+
+    if opts.numThreads is None:
+# Python <2.5 has a race condition causing lit to always fail with numThreads>1
+# http://bugs.python.org/issue1731717
+# I haven't seen this bug occur with 2.5.2 and later, so only enable multiple
+# threads by default there.
+       if sys.hexversion >= 0x2050200:
+               opts.numThreads = Util.detectCPUs()
+       else:
+               opts.numThreads = 1
+
+    inputs = args
+
+    # Create the user defined parameters.
+    userParams = dict(builtinParameters)
+    for entry in opts.userParameters:
+        if '=' not in entry:
+            name,val = entry,''
+        else:
+            name,val = entry.split('=', 1)
+        userParams[name] = val
+
+    # Create the global config object.
+    litConfig = LitConfig.LitConfig(progname = os.path.basename(sys.argv[0]),
+                                    path = opts.path,
+                                    quiet = opts.quiet,
+                                    useValgrind = opts.useValgrind,
+                                    valgrindLeakCheck = opts.valgrindLeakCheck,
+                                    valgrindArgs = opts.valgrindArgs,
+                                    useTclAsSh = opts.useTclAsSh,
+                                    noExecute = opts.noExecute,
+                                    debug = opts.debug,
+                                    isWindows = (platform.system()=='Windows'),
+                                    params = userParams)
+
+    # Expand '@...' form in inputs.
+    actual_inputs = []
+    for input in inputs:
+        if os.path.exists(input) or not input.startswith('@'):
+            actual_inputs.append(input)
+        else:
+            f = open(input[1:])
+            try:
+                for ln in f:
+                    ln = ln.strip()
+                    if ln:
+                        actual_inputs.append(ln)
+            finally:
+                f.close()
+                    
+            
+    # Load the tests from the inputs.
+    tests = []
+    testSuiteCache = {}
+    localConfigCache = {}
+    for input in actual_inputs:
+        prev = len(tests)
+        tests.extend(getTests(input, litConfig,
+                              testSuiteCache, localConfigCache)[1])
+        if prev == len(tests):
+            litConfig.warning('input %r contained no tests' % input)
+
+    # If there were any errors during test discovery, exit now.
+    if litConfig.numErrors:
+        print >>sys.stderr, '%d errors, exiting.' % litConfig.numErrors
+        sys.exit(2)
+
+    if opts.showSuites:
+        suitesAndTests = dict([(ts,[])
+                               for ts,_ in testSuiteCache.values()
+                               if ts])
+        for t in tests:
+            suitesAndTests[t.suite].append(t)
+
+        print '-- Test Suites --'
+        suitesAndTests = suitesAndTests.items()
+        suitesAndTests.sort(key = lambda (ts,_): ts.name)
+        for ts,ts_tests in suitesAndTests:
+            print '  %s - %d tests' %(ts.name, len(ts_tests))
+            print '    Source Root: %s' % ts.source_root
+            print '    Exec Root  : %s' % ts.exec_root
+
+    # Select and order the tests.
+    numTotalTests = len(tests)
+    if opts.shuffle:
+        random.shuffle(tests)
+    else:
+        tests.sort(key = lambda t: t.getFullName())
+    if opts.maxTests is not None:
+        tests = tests[:opts.maxTests]
+
+    extra = ''
+    if len(tests) != numTotalTests:
+        extra = ' of %d' % numTotalTests
+    header = '-- Testing: %d%s tests, %d threads --'%(len(tests),extra,
+                                                      opts.numThreads)
+
+    if opts.repeatTests:
+        tests = [t.copyWithIndex(i)
+                 for t in tests
+                 for i in range(opts.repeatTests)]
+
+    progressBar = None
+    if not opts.quiet:
+        if opts.succinct and opts.useProgressBar:
+            try:
+                tc = ProgressBar.TerminalController()
+                progressBar = ProgressBar.ProgressBar(tc, header)
+            except ValueError:
+                print header
+                progressBar = ProgressBar.SimpleProgressBar('Testing: ')
+        else:
+            print header
+
+    # Don't create more threads than tests.
+    opts.numThreads = min(len(tests), opts.numThreads)
+
+    startTime = time.time()
+    display = TestingProgressDisplay(opts, len(tests), progressBar)
+    provider = TestProvider(tests, opts.maxTime)
+    runTests(opts.numThreads, litConfig, provider, display)
+    display.finish()
+
+    if not opts.quiet:
+        print 'Testing Time: %.2fs'%(time.time() - startTime)
+
+    # Update results for any tests which weren't run.
+    for t in tests:
+        if t.result is None:
+            t.setResult(Test.UNRESOLVED, '', 0.0)
+
+    # List test results organized by kind.
+    hasFailures = False
+    byCode = {}
+    for t in tests:
+        if t.result not in byCode:
+            byCode[t.result] = []
+        byCode[t.result].append(t)
+        if t.result.isFailure:
+            hasFailures = True
+
+    # FIXME: Show unresolved and (optionally) unsupported tests.
+    for title,code in (('Unexpected Passing Tests', Test.XPASS),
+                       ('Failing Tests', Test.FAIL)):
+        elts = byCode.get(code)
+        if not elts:
+            continue
+        print '*'*20
+        print '%s (%d):' % (title, len(elts))
+        for t in elts:
+            print '    %s' % t.getFullName()
+        print
+
+    if opts.timeTests:
+        # Collate, in case we repeated tests.
+        times = {}
+        for t in tests:
+            key = t.getFullName()
+            times[key] = times.get(key, 0.) + t.elapsed
+
+        byTime = list(times.items())
+        byTime.sort(key = lambda (name,elapsed): elapsed)
+        if byTime:
+            Util.printHistogram(byTime, title='Tests')
+
+    for name,code in (('Expected Passes    ', Test.PASS),
+                      ('Expected Failures  ', Test.XFAIL),
+                      ('Unsupported Tests  ', Test.UNSUPPORTED),
+                      ('Unresolved Tests   ', Test.UNRESOLVED),
+                      ('Unexpected Passes  ', Test.XPASS),
+                      ('Unexpected Failures', Test.FAIL),):
+        if opts.quiet and not code.isFailure:
+            continue
+        N = len(byCode.get(code,[]))
+        if N:
+            print '  %s: %d' % (name,N)
+
+    # If we encountered any additional errors, exit abnormally.
+    if litConfig.numErrors:
+        print >>sys.stderr, '\n%d error(s), exiting.' % litConfig.numErrors
+        sys.exit(2)
+
+    # Warn about warnings.
+    if litConfig.numWarnings:
+        print >>sys.stderr, '\n%d warning(s) in tests.' % litConfig.numWarnings
+
+    if hasFailures:
+        sys.exit(1)
+    sys.exit(0)
+
+if __name__=='__main__':
+    main()
diff --git a/final/utils/lit/setup.py b/final/utils/lit/setup.py
new file mode 100644
index 00000000000..738ee23776d
--- /dev/null
+++ b/final/utils/lit/setup.py
@@ -0,0 +1,70 @@
+import lit
+
+# FIXME: Support distutils?
+from setuptools import setup, find_packages
+setup(
+    name = "lit",
+    version = lit.__version__,
+
+    author = lit.__author__,
+    author_email = lit.__email__,
+    url = 'http://llvm.org',
+    license = 'BSD',
+
+    description = "A Software Testing Tool",
+    keywords = 'test C++ automatic discovery',
+    long_description = """\
+*lit*
++++++
+
+About
+=====
+
+*lit* is a portable tool for executing LLVM and Clang style test suites,
+summarizing their results, and providing indication of failures. *lit* is
+designed to be a lightweight testing tool with as simple a user interface as
+possible.
+
+
+Features
+========
+
+ * Portable!
+ * Flexible test discovery.
+ * Parallel test execution.
+ * Support for multiple test formats and test suite designs.
+
+
+Documentation
+=============
+
+The offical *lit* documentation is in the man page, available online at the LLVM
+Command Guide: http://llvm.org/cmds/lit.html.
+
+
+Source
+======
+
+The *lit* source is available as part of LLVM, in the LLVM SVN repository:
+http://llvm.org/svn/llvm-project/llvm/trunk/utils/lit.
+""",
+
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'Environment :: Console',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: University of Illinois/NCSA Open Source License',
+        'Natural Language :: English',
+        'Operating System :: OS Independent',
+        'Programming Language :: Python',
+        'Topic :: Software Development :: Testing',
+        ],
+
+    zip_safe = False,
+    packages = find_packages(),
+    entry_points = {
+        'console_scripts': [
+            'lit = lit:main',
+            ],
+        }
+)
diff --git a/final/utils/llvm-lit/CMakeLists.txt b/final/utils/llvm-lit/CMakeLists.txt
new file mode 100644
index 00000000000..602cc881cd5
--- /dev/null
+++ b/final/utils/llvm-lit/CMakeLists.txt
@@ -0,0 +1,12 @@
+configure_file(
+  llvm-lit.in
+  ${LLVM_TOOLS_BINARY_DIR}/llvm-lit
+  )
+
+install(FILES
+  ${LLVM_TOOLS_BINARY_DIR}/llvm-lit
+  DESTINATION bin
+  PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE
+              GROUP_READ GROUP_EXECUTE
+              WORLD_READ WORLD_EXECUTE
+  )
diff --git a/final/utils/llvm-lit/Makefile b/final/utils/llvm-lit/Makefile
new file mode 100644
index 00000000000..77021bbc243
--- /dev/null
+++ b/final/utils/llvm-lit/Makefile
@@ -0,0 +1,22 @@
+##===- utils/llvm-lit/Makefile -----------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+
+include $(LEVEL)/Makefile.common
+
+all:: $(ToolDir)/llvm-lit
+
+$(ToolDir)/llvm-lit: llvm-lit.in Makefile $(ToolDir)/.dir
+	$(Echo) "Creating 'llvm-lit' script..."
+	$(Verb)$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g > lit.tmp
+	$(Verb)$(ECHOPATH) s=@LLVM_BINARY_DIR@=$(LLVM_OBJ_ROOT)=g >> lit.tmp
+	$(Verb)sed -f lit.tmp $< > $@
+	$(Verb)chmod +x $@
+	$(Verb)rm -f lit.tmp
diff --git a/final/utils/llvm-lit/llvm-lit.in b/final/utils/llvm-lit/llvm-lit.in
new file mode 100755
index 00000000000..1df1747a1cc
--- /dev/null
+++ b/final/utils/llvm-lit/llvm-lit.in
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+import os
+import sys
+
+# Variables configured at build time.
+llvm_source_root = "@LLVM_SOURCE_DIR@"
+llvm_obj_root = "@LLVM_BINARY_DIR@"
+
+# Make sure we can find the lit package.
+sys.path.append(os.path.join(llvm_source_root, 'utils', 'lit'))
+
+# Set up some builtin parameters, so that by default the LLVM test suite
+# configuration file knows how to find the object tree.
+builtin_parameters = {
+    'build_config' : "@CMAKE_CFG_INTDIR@",
+    'build_mode' : "@RUNTIME_BUILD_MODE@",
+    'llvm_site_config' : os.path.join(llvm_obj_root, 'test', 'lit.site.cfg')
+    }
+
+clang_site_config = os.path.join(llvm_obj_root, 'tools', 'clang', 'test', 'lit.site.cfg')
+if os.path.exists(clang_site_config):
+    builtin_parameters['clang_site_config'] = clang_site_config
+
+if __name__=='__main__':
+    import lit
+    lit.main(builtin_parameters)
diff --git a/final/utils/llvm-native-gcc b/final/utils/llvm-native-gcc
new file mode 100755
index 00000000000..91a557cc161
--- /dev/null
+++ b/final/utils/llvm-native-gcc
@@ -0,0 +1,249 @@
+#!/usr/bin/perl
+# Wrapper around LLVM tools to generate a native .o from llvm-gcc using an
+# LLVM back-end (CBE by default).
+
+# set up defaults.
+$Verbose = 0;
+$SaveTemps = 1;
+$PreprocessOnly = 0;
+$CompileDontLink = 0;
+$Backend = 'cbe';
+chomp ($ProgramName = `basename $0`);
+
+sub boldprint {
+	print "[1m", @_, "[0m";
+}
+
+# process command-line options.
+# most of these are passed on to llvm-gcc.
+$GCCOptions = "";
+for ($i = 0; $i <= $#ARGV; ++$i) {	
+	if ($ARGV[$i] =~ /-mllvm-backend=([a-z0-9]*)/) {
+		$Backend = $1;
+		if ($ProgramName =~ /llvm-native-gcc/) {
+			splice (@ARGV, $i, 1);
+			--$i;
+		}
+	} elsif ($ARGV[$i] eq "-E") {
+		$PreprocessOnly = 1;
+	} elsif ($ARGV[$i] eq "-c") {
+		$GCCOptions .= " " . $ARGV[$i];
+		$CompileDontLink = 1;
+	} elsif ($ARGV[$i] eq "-v") {
+		$GCCOptions .= " " . $ARGV[$i];
+		$Verbose = 1;
+	} elsif ($ARGV[$i] eq "-o") {
+		$OutputFile = $ARGV[$i + 1];
+	} elsif ($ARGV[$i] eq "-save-temps") {
+		$GCCOptions .= " " . $ARGV[$i];
+		$SaveTemps = 1;
+	} elsif ($ARGV[$i] =~ /\.bc$/) {
+		push (@BytecodeFiles, $ARGV[$i]);
+	} elsif ($ARGV[$i] =~ /^-L/) {
+		$GCCOptions .= " " . $ARGV[$i];
+		push (@LibDirs, $ARGV[$i]);
+	} elsif ($ARGV[$i] =~ /^-l/) {
+		$GCCOptions .= " " . $ARGV[$i];
+		push (@Libs, $ARGV[$i]);
+	} elsif ($ARGV[$i] =~ /\.(c|cpp|cc|i|ii|C)$/) {
+		$LastCFile = $ARGV[$i];
+	}
+}
+
+sub GetDefaultOutputFileName {
+	my $DefaultOutputFileBase;
+
+	if ($ProgramName =~ /llvm-native-gcc/) {
+		$DefaultOutputFileBase = $LastCFile;
+	} elsif ($ProgramName =~ /native-build/) {
+		$DefaultOutputFileBase = $BytecodeFiles[0];
+	}
+
+	my $def = $DefaultOutputFileBase;
+
+	die "Can't figure out name of output file.\n"
+		unless $DefaultOutputFileBase
+			   && (($ProgramName !~ /native-build/)
+				   || $#BytecodeFiles == 0);
+
+	print "Warning: defaulting output file name ",
+		"based on '$DefaultOutputFileBase'\n" if $Verbose;
+
+	if ($ProgramName =~ /llvm-native-gcc/) {
+		$def =~ s/\.(c|cpp|cc|i|ii|C)$/.o/;
+	} elsif ($ProgramName =~ /native-build/) {
+		$def =~ s/\.bc$/.$Backend/;
+		if ($CompileDontLink) {
+			$def .= ".o";
+		}
+	}
+
+	return $def;
+}
+
+# run a command, optionally echoing, and quitting if it fails:
+sub run {
+	my $command = join(" ", @_);
+	print "$command\n" if $Verbose;
+	$command =~ s/\"/\\\"/g;
+	system $command and die "$0: $command failed";
+}
+
+sub LinkBytecodeFilesIntoTemporary {
+	my $FinalOutputFileName = shift @_;
+	my @BytecodeFiles = @_;
+
+	my $BCFiles = join (" ", @BytecodeFiles);
+	my $LinkedBCFile;
+	if ($SaveTemps) {
+		$LinkedBCFile = "${FinalOutputFileName}.llvm.bc";
+	} else {
+		$LinkedBCFile = "/tmp/nativebuild-$$.llvm.bc";
+	}
+	run "llvm-link -o $LinkedBCFile $BCFiles";
+	return $LinkedBCFile;
+}
+
+sub CompileBytecodeToNative {
+	my ($BCFile, $Backend, $OutputFile) = @_;
+
+	my $GeneratedCode;
+	if ($Backend eq 'cbe') {
+		if ($SaveTemps) {
+			$GeneratedCode = "${OutputFile}.c";
+		} else {
+			$GeneratedCode = "/tmp/nativebuild-$$.c";
+		}
+		run "llc -enable-correct-eh-support -march=c -f -o $GeneratedCode $BCFile";
+	} elsif ($Backend eq 'llc') {
+		if ($SaveTemps) {
+			$GeneratedCode = "${OutputFile}.s";
+		} else {
+			$GeneratedCode = "/tmp/nativebuild-$$.s";
+		}
+		run "llc -enable-correct-eh-support -f -o $GeneratedCode $BCFile";
+	}
+	my $LibDirs = join (" ", @LibDirs);
+	my $Libs = join (" ", @Libs);
+	run "gcc $GCCOptions $GeneratedCode -o $OutputFile $LibDirs $Libs";
+	run "rm $BCFile $GeneratedCode"
+		unless $SaveTemps;
+}
+
+sub CompileCToNative {
+	my ($LLVMGCCCommand, $Backend, $OutputFile) = @_;
+	run $LLVMGCCCommand;
+	if ($PreprocessOnly) {
+		return;
+	}
+	my $BCFile = "${OutputFile}.llvm.bc";
+	if ($CompileDontLink) {
+		run "mv ${OutputFile} $BCFile";
+	} else { # gccld messes with the output file name
+		run "mv ${OutputFile}.bc $BCFile";
+	}
+	my $GeneratedCode;
+	if ($Backend eq 'cbe') {
+		$GeneratedCode = "${OutputFile}.cbe.c";
+		run "llc -enable-correct-eh-support -march=c -f -o $GeneratedCode $BCFile";
+	} elsif ($Backend eq 'llc') {
+		$GeneratedCode = "${OutputFile}.llc.s";
+		run "llc -enable-correct-eh-support -f -o $GeneratedCode $BCFile";
+	}
+	my $NativeGCCOptions = "";
+	if ($CompileDontLink) {
+		$NativeGCCOptions = "-c";
+	}
+	run "gcc $NativeGCCOptions $GeneratedCode -o $OutputFile";
+	run "rm ${OutputFile}.llvm.bc $GeneratedCode"
+		unless $SaveTemps;
+}
+
+# guess the name of the output file, if -o was not specified.
+$OutputFile = GetDefaultOutputFileName () unless $OutputFile;
+print "Output file is $OutputFile\n" if $Verbose;
+# do all the dirty work:
+if ($ProgramName eq /native-build/) {
+	my $LinkedBCFile = LinkBytecodeFilesIntoTemporary (@BytecodeFiles);
+	CompileBytecodeToNative ($LinkedBCFile, $Backend, $OutputFile);
+} elsif ($ProgramName =~ /llvm-native-gcc/) {
+	# build the llvm-gcc command line.
+	$LLVMGCCCommand = join (" ", ("llvm-gcc", @ARGV));
+	CompileCToNative ($LLVMGCCCommand, $Backend, $OutputFile);
+}
+
+# we're done.
+exit 0;
+
+__END__
+
+=pod
+
+=head1 NAME
+
+llvm-native-gcc
+
+=head1 SYNOPSIS
+
+llvm-native-gcc [OPTIONS...] FILE
+
+native-build [OPTIONS...] FILE
+
+=head1 DESCRIPTION
+
+llvm-native-gcc is a wrapper around the LLVM command-line tools which generates
+a native object (.o) file by compiling FILE with llvm-gcc, and then running 
+an LLVM back-end (CBE by default) over the resulting bitcode, and then
+compiling the resulting code to a native object file.
+
+If called as "native-build", it compiles bitcode to native code, and takes
+different options.
+
+=head1 OPTIONS
+
+llvm-native-gcc takes the same options as llvm-gcc. All options
+except -mllvm-backend=... are passed on to llvm-gcc.
+
+=over 4
+
+=item -mllvm-backend=BACKEND
+
+Use BACKEND for native code generation. 
+
+=item -v
+
+Print command lines that llvm-native-gcc runs.
+
+=item -o FILE
+
+llvm-native-gcc tries to guess the name of the llvm-gcc output file by looking
+for this option in the command line. If it can't find it, it finds the last C
+or C++ source file named on the command line, and turns its suffix into .o. See
+BUGS.
+
+=item -save-temps
+
+Save temporary files used by llvm-native-gcc (and llvm-gcc, and gcc).
+
+=back
+
+=head1 BUGS
+
+llvm-native-gcc only handles the case where llvm-gcc compiles a single
+file per invocation.  llvm-native-gcc has weak command-line argument
+parsing and is a poor substitute for making gcc/gcc.c do this stuff.
+
+This manual page does not adequately document native-build mode.
+
+llvm-native-gcc is pretty gross because it represents the blind merging of two
+other scripts that predated it. It could use some code clean-up.
+
+=head1 SEE ALSO
+
+gcc(1)
+
+=head1 AUTHOR
+
+Brian R. Gaeke
+
+=cut
diff --git a/final/utils/llvm-native-gxx b/final/utils/llvm-native-gxx
new file mode 100755
index 00000000000..db547f654e2
--- /dev/null
+++ b/final/utils/llvm-native-gxx
@@ -0,0 +1,249 @@
+#!/usr/bin/perl
+# Wrapper around LLVM tools to generate a native .o from llvm-gxx using an
+# LLVM back-end (CBE by default).
+
+# set up defaults.
+$Verbose = 0;
+$SaveTemps = 1;
+$PreprocessOnly = 0;
+$CompileDontLink = 0;
+$Backend = 'cbe';
+chomp ($ProgramName = `basename $0`);
+
+sub boldprint {
+	print "[1m", @_, "[0m";
+}
+
+# process command-line options.
+# most of these are passed on to llvm-gxx.
+$GCCOptions = "";
+for ($i = 0; $i <= $#ARGV; ++$i) {	
+	if ($ARGV[$i] =~ /-mllvm-backend=([a-z0-9]*)/) {
+		$Backend = $1;
+		if ($ProgramName =~ /llvm-native-gxx/) {
+			splice (@ARGV, $i, 1);
+			--$i;
+		}
+	} elsif ($ARGV[$i] eq "-E") {
+		$PreprocessOnly = 1;
+	} elsif ($ARGV[$i] eq "-c") {
+		$GCCOptions .= " " . $ARGV[$i];
+		$CompileDontLink = 1;
+	} elsif ($ARGV[$i] eq "-v") {
+		$GCCOptions .= " " . $ARGV[$i];
+		$Verbose = 1;
+	} elsif ($ARGV[$i] eq "-o") {
+		$OutputFile = $ARGV[$i + 1];
+	} elsif ($ARGV[$i] eq "-save-temps") {
+		$GCCOptions .= " " . $ARGV[$i];
+		$SaveTemps = 1;
+	} elsif ($ARGV[$i] =~ /\.bc$/) {
+		push (@BytecodeFiles, $ARGV[$i]);
+	} elsif ($ARGV[$i] =~ /^-L/) {
+		$GCCOptions .= " " . $ARGV[$i];
+		push (@LibDirs, $ARGV[$i]);
+	} elsif ($ARGV[$i] =~ /^-l/) {
+		$GCCOptions .= " " . $ARGV[$i];
+		push (@Libs, $ARGV[$i]);
+	} elsif ($ARGV[$i] =~ /\.(c|cpp|cc|i|ii|C)$/) {
+		$LastCFile = $ARGV[$i];
+	}
+}
+
+sub GetDefaultOutputFileName {
+	my $DefaultOutputFileBase;
+
+	if ($ProgramName =~ /llvm-native-gxx/) {
+		$DefaultOutputFileBase = $LastCFile;
+	} elsif ($ProgramName =~ /native-build/) {
+		$DefaultOutputFileBase = $BytecodeFiles[0];
+	}
+
+	my $def = $DefaultOutputFileBase;
+
+	die "Can't figure out name of output file.\n"
+		unless $DefaultOutputFileBase
+			   && (($ProgramName !~ /native-build/)
+				   || $#BytecodeFiles == 0);
+
+	print "Warning: defaulting output file name ",
+		"based on '$DefaultOutputFileBase'\n" if $Verbose;
+
+	if ($ProgramName =~ /llvm-native-gxx/) {
+		$def =~ s/\.(c|cpp|cc|i|ii|C)$/.o/;
+	} elsif ($ProgramName =~ /native-build/) {
+		$def =~ s/\.bc$/.$Backend/;
+		if ($CompileDontLink) {
+			$def .= ".o";
+		}
+	}
+
+	return $def;
+}
+
+# run a command, optionally echoing, and quitting if it fails:
+sub run {
+	my $command = join(" ", @_);
+	print "$command\n" if $Verbose;
+	$command =~ s/\"/\\\"/g;
+	system $command and die "$0: $command failed";
+}
+
+sub LinkBytecodeFilesIntoTemporary {
+	my $FinalOutputFileName = shift @_;
+	my @BytecodeFiles = @_;
+
+	my $BCFiles = join (" ", @BytecodeFiles);
+	my $LinkedBCFile;
+	if ($SaveTemps) {
+		$LinkedBCFile = "${FinalOutputFileName}.llvm.bc";
+	} else {
+		$LinkedBCFile = "/tmp/nativebuild-$$.llvm.bc";
+	}
+	run "llvm-link -o $LinkedBCFile $BCFiles";
+	return $LinkedBCFile;
+}
+
+sub CompileBytecodeToNative {
+	my ($BCFile, $Backend, $OutputFile) = @_;
+
+	my $GeneratedCode;
+	if ($Backend eq 'cbe') {
+		if ($SaveTemps) {
+			$GeneratedCode = "${OutputFile}.c";
+		} else {
+			$GeneratedCode = "/tmp/nativebuild-$$.c";
+		}
+		run "llc -march=c -f -o $GeneratedCode $BCFile";
+	} elsif ($Backend eq 'llc') {
+		if ($SaveTemps) {
+			$GeneratedCode = "${OutputFile}.s";
+		} else {
+			$GeneratedCode = "/tmp/nativebuild-$$.s";
+		}
+		run "llc -f -o $GeneratedCode $BCFile";
+	}
+	my $LibDirs = join (" ", @LibDirs);
+	my $Libs = join (" ", @Libs);
+	run "gcc $GCCOptions $GeneratedCode -o $OutputFile $LibDirs $Libs";
+	run "rm $BCFile $GeneratedCode"
+		unless $SaveTemps;
+}
+
+sub CompileCToNative {
+	my ($LLVMGCCCommand, $Backend, $OutputFile) = @_;
+	run $LLVMGCCCommand;
+	if ($PreprocessOnly) {
+		return;
+	}
+	my $BCFile = "${OutputFile}.llvm.bc";
+	if ($CompileDontLink) {
+		run "mv ${OutputFile} $BCFile";
+	} else { # gccld messes with the output file name
+		run "mv ${OutputFile}.bc $BCFile";
+	}
+	my $GeneratedCode;
+	if ($Backend eq 'cbe') {
+		$GeneratedCode = "${OutputFile}.cbe.c";
+		run "llc -march=c -f -o $GeneratedCode $BCFile";
+	} elsif ($Backend eq 'llc') {
+		$GeneratedCode = "${OutputFile}.llc.s";
+		run "llc -f -o $GeneratedCode $BCFile";
+	}
+	my $NativeGCCOptions = "";
+	if ($CompileDontLink) {
+		$NativeGCCOptions = "-c";
+	}
+	run "gcc $NativeGCCOptions $GeneratedCode -o $OutputFile";
+	run "rm ${OutputFile}.llvm.bc $GeneratedCode"
+		unless $SaveTemps;
+}
+
+# guess the name of the output file, if -o was not specified.
+$OutputFile = GetDefaultOutputFileName () unless $OutputFile;
+print "Output file is $OutputFile\n" if $Verbose;
+# do all the dirty work:
+if ($ProgramName eq /native-build/) {
+	my $LinkedBCFile = LinkBytecodeFilesIntoTemporary (@BytecodeFiles);
+	CompileBytecodeToNative ($LinkedBCFile, $Backend, $OutputFile);
+} elsif ($ProgramName =~ /llvm-native-gxx/) {
+	# build the llvm-gxx command line.
+	$LLVMGCCCommand = join (" ", ("llvm-g++", @ARGV));
+	CompileCToNative ($LLVMGCCCommand, $Backend, $OutputFile);
+}
+
+# we're done.
+exit 0;
+
+__END__
+
+=pod
+
+=head1 NAME
+
+llvm-native-gxx
+
+=head1 SYNOPSIS
+
+llvm-native-g++ [OPTIONS...] FILE
+
+native-build [OPTIONS...] FILE
+
+=head1 DESCRIPTION
+
+llvm-native-g++ is a wrapper around the LLVM command-line tools which generates
+a native object (.o) file by compiling FILE with llvm-g++, and then running 
+an LLVM back-end (CBE by default) over the resulting bitcode, and then
+compiling the resulting code to a native object file.
+
+If called as "native-build", it compiles bitcode to native code, and takes
+different options.
+
+=head1 OPTIONS
+
+llvm-native-g++ takes the same options as llvm-gcc. All options
+except -mllvm-backend=... are passed on to llvm-g++.
+
+=over 4
+
+=item -mllvm-backend=BACKEND
+
+Use BACKEND for native code generation. 
+
+=item -v
+
+Print command lines that llvm-native-g++ runs.
+
+=item -o FILE
+
+llvm-native-g++ tries to guess the name of the llvm-g++ output file by looking
+for this option in the command line. If it can't find it, it finds the last C
+or C++ source file named on the command line, and turns its suffix into .o. See
+BUGS.
+
+=item -save-temps
+
+Save temporary files used by llvm-native-g++ (and llvm-g++, and g++).
+
+=back
+
+=head1 BUGS
+
+llvm-native-g++ only handles the case where llvm-g++ compiles a single
+file per invocation.  llvm-native-g++ has weak command-line argument
+parsing and is a poor substitute for making g++/g++.c do this stuff.
+
+This manual page does not adequately document native-build mode.
+
+llvm-native-g++ is pretty gross because it represents the blind merging of two
+other scripts that predated it. It could use some code clean-up.
+
+=head1 SEE ALSO
+
+g++(1)
+
+=head1 AUTHOR
+
+Brian R. Gaeke
+
+=cut
diff --git a/final/utils/llvm.grm b/final/utils/llvm.grm
new file mode 100644
index 00000000000..9d6bdf79f53
--- /dev/null
+++ b/final/utils/llvm.grm
@@ -0,0 +1,418 @@
+(*
+
+polygen grammar for LLVM assembly language.
+
+This file defines an LLVM assembly language grammar for polygen,
+which is a tool for generating random text based on a grammar.
+It is strictly syntax-based, and makes no attempt to generate
+IR that is semantically valid. Most of the IR produced doesn't
+pass the Verifier.
+
+TODO: Metadata, in all its forms
+
+*)
+
+I ::=   "title:    LLVM assembly language\n"
+      ^ "status:   experimental\n"
+      ^ "audience: LLVM developers\n"
+;
+
+S ::= Module ;
+
+(*
+Define rules for non-keyword tokens. This is currently just a bunch
+of hacks. They don't cover many valid forms of tokens, and they also
+generate some invalid forms of tokens. The LLVM parser has custom
+C++ code to lex these; custom C++ code for emitting them would be
+convenient, but polygen doesn't support that.
+*)
+NonZeroDecimalDigit ::=     1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 ;
+DecimalDigit        ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 ;
+DecimalDigitSeq     ::= DecimalDigit [^ DecimalDigitSeq ];
+HexDigit            ::= 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9
+                      | a | b | c | d | e | f ;
+HexDigitSeq         ::= HexDigit [^ HexDigitSeq ];
+StringChar          ::= a | b | c | d | e | f | g | h | i | j | k | l | m
+                      | n | o | p | q | r | s | t | u | v | w | x | y | z ;
+StringConstantSeq   ::= StringChar [^ StringConstantSeq ];
+StringConstant      ::= StringChar [^ StringConstantSeq ];
+EUINT64VAL          ::= NonZeroDecimalDigit [^ DecimalDigitSeq ];
+ESINT64VAL          ::= [ "-" ] ^ EUINT64VAL ;
+EUAPINTVAL          ::= EUINT64VAL ;
+ESAPINTVAL          ::= ESINT64VAL ;
+LOCALVALID          ::= "%" ^ DecimalDigitSeq ;
+GLOBALVALID         ::= "@" ^ DecimalDigitSeq ;
+INTTYPE             ::= "i" ^ EUINT64VAL ;
+GLOBALVAR           ::= "@" ^ StringConstant ;
+LOCALVAR            ::= "%" ^ StringConstant ;
+STRINGCONSTANT      ::= "\"" ^ StringConstant ^ "\"" ;
+ATSTRINGCONSTANT    ::= "@" ^ STRINGCONSTANT ;
+PCTSTRINGCONSTANT   ::= "%" ^ STRINGCONSTANT ;
+LABELSTR            ::= StringConstant ;
+FPVAL               ::= ESAPINTVAL ^ "." ^ EUAPINTVAL | "0x" ^ HexDigitSeq ;
+
+(*
+The rest of this file is derived directly from llvmAsmParser.y.
+*)
+
+ArithmeticOps ::= + OptNW add | fadd | OptNW sub | fsub | OptNW mul | fmul |
+                  udiv | OptExact sdiv | fdiv | urem | srem | frem ;
+LogicalOps    ::= shl | lshr | ashr | and | or | xor;
+CastOps       ::= trunc | zext | sext | fptrunc | fpext | bitcast |
+                  uitofp | sitofp | fptoui | fptosi | inttoptr | ptrtoint ;
+
+IPredicates ::= eq | ne | slt | sgt | sle | sge | ult | ugt | ule | uge ;
+
+FPredicates ::= oeq | one | olt | ogt | ole | oge | ord | uno | ueq | une
+              | ult | ugt | ule | uge | true | false ;
+
+IntType ::= INTTYPE;
+FPType  ::= float | double | "ppc_fp128" | fp128 | "x86_fp80";
+
+LocalName ::= LOCALVAR | STRINGCONSTANT | PCTSTRINGCONSTANT ;
+OptLocalName ::= LocalName | _ ;
+
+OptAddrSpace ::= - addrspace ^ "(" ^ EUINT64VAL ^ ")" | _ ;
+
+OptLocalAssign ::= LocalName "=" | _ ;
+
+GlobalName ::= GLOBALVAR | ATSTRINGCONSTANT ;
+
+OptGlobalAssign ::= GlobalAssign | _ ;
+
+GlobalAssign ::= GlobalName "=" ;
+
+GVInternalLinkage
+  ::= + internal
+ | weak
+ | "weak_odr"
+ | linkonce
+ | "linkonce_odr"
+ | appending
+ | dllexport
+ | common
+ | private
+ | "linker_private"
+ | "linker_private_weak"
+ ;
+
+GVExternalLinkage
+  ::= dllimport
+ | "extern_weak"
+ | + external
+ ;
+
+GVVisibilityStyle
+  ::= + _
+ | default
+ | hidden
+ | protected
+ ;
+
+FunctionDeclareLinkage
+  ::= + _
+ | dllimport
+ | "extern_weak"
+ ;
+
+FunctionDefineLinkage
+  ::= + _
+ | internal
+ | linkonce
+ | "linkonce_odr"
+ | weak
+ | "weak_odr"
+ | dllexport
+ ;
+
+AliasLinkage ::= + _ | weak | "weak_odr" | internal ;
+
+OptCallingConv ::= + _ |
+                 ccc |
+                 fastcc |
+                 coldcc |
+                 "x86_stdcallcc" |
+                 "x86_fastcallcc" |
+                 cc EUINT64VAL ;
+
+ParamAttr ::= zeroext
+ | signext
+ | inreg
+ | sret
+ | noalias
+ | nocapture
+ | byval
+ | nest
+ | align EUINT64VAL
+ ;
+
+OptParamAttrs ::= + _ | OptParamAttrs ParamAttr ;
+
+RetAttr       ::= inreg
+              | zeroext
+              | signext
+              | noalias
+              ;
+
+OptRetAttrs  ::= _
+             | OptRetAttrs RetAttr
+             ;
+
+FuncAttr      ::= noreturn
+ | nounwind
+ | inreg
+ | zeroext
+ | signext
+ | readnone
+ | readonly
+ | inlinehint
+ | alignstack
+ | noinline
+ | alwaysinline
+ | optsize
+ | ssp
+ | sspreq
+ ;
+
+OptFuncAttrs  ::= + _ | OptFuncAttrs FuncAttr ;
+
+OptGC         ::= + _ | gc STRINGCONSTANT ;
+
+OptAlign      ::= + _ | align EUINT64VAL ;
+OptCAlign     ::= + _ | ^ "," align EUINT64VAL ;
+
+SectionString ::= section STRINGCONSTANT ;
+
+OptSection    ::= + _ | SectionString ;
+
+GlobalVarAttributes ::= + _ | ^ "," GlobalVarAttribute GlobalVarAttributes ;
+GlobalVarAttribute  ::= SectionString | align EUINT64VAL ;
+
+PrimType ::= INTTYPE | float | double | "ppc_fp128" | fp128 | "x86_fp80"
+          | - label ;
+
+Types
+  ::= opaque
+ | PrimType
+ | Types OptAddrSpace ^ "*"
+ | SymbolicValueRef
+ | "\\" ^ EUINT64VAL
+ | Types "(" ^ ArgTypeListI ^ ")" OptFuncAttrs
+ | void "(" ^ ArgTypeListI ^ ")" OptFuncAttrs
+ | "[" ^ EUINT64VAL "x" Types ^ "]"
+ | "<" ^ EUINT64VAL "x" Types ^ ">"
+ | "{" TypeListI "}"
+ | "{" ^ "}"
+ | "<" ^ "{" TypeListI "}" ^ ">"
+ | "<" ^ "{" ^ "}" ^ ">"
+ ;
+
+ArgType ::= Types OptParamAttrs ;
+
+ResultTypes ::= Types | void ;
+
+ArgTypeList ::= ArgType | ArgTypeList ^ "," ArgType ;
+
+ArgTypeListI ::= ArgTypeList | ArgTypeList ^ "," "..." | "..." | _ ;
+
+TypeListI ::= Types | TypeListI ^ "," Types ;
+
+ConstVal::= Types "[" ^ ConstVector ^ "]"
+ | Types "[" ^ "]"
+ | Types "c" ^ STRINGCONSTANT
+ | Types "<" ^ ConstVector ^ ">"
+ | Types "{" ConstVector "}"
+ | Types "{" ^ "}"
+ | Types "<" ^ "{" ConstVector "}" ^ ">"
+ | Types "<" ^ "{" ^ "}" ^ ">"
+ | Types null
+ | Types undef
+ | Types SymbolicValueRef
+ | Types ConstExpr
+ | Types zeroinitializer
+ | Types ESINT64VAL
+ | Types ESAPINTVAL
+ | Types EUINT64VAL
+ | Types EUAPINTVAL
+ | Types true
+ | Types false
+ | Types FPVAL ;
+
+ConstExpr::= CastOps "(" ^ ConstVal to Types ^ ")"
+ | getelementptr OptInBounds "(" ^ ConstVal IndexList ^ ")"
+ | select "(" ^ ConstVal ^ "," ConstVal ^ "," ConstVal ^ ")"
+ | ArithmeticOps "(" ^ ConstVal ^ "," ConstVal ^ ")"
+ | LogicalOps "(" ^ ConstVal ^ "," ConstVal ^ ")"
+ | icmp IPredicates "(" ^ ConstVal ^ "," ConstVal ^ ")"
+ | fcmp FPredicates "(" ^ ConstVal ^ "," ConstVal ^ ")"
+ | extractelement "(" ^ ConstVal ^ "," ConstVal ^ ")"
+ | insertelement "(" ^ ConstVal ^ "," ConstVal ^ "," ConstVal ^ ")"
+ | shufflevector "(" ^ ConstVal ^ "," ConstVal ^ "," ConstVal ^ ")"
+ | extractvalue "(" ^ ConstVal ^ ConstantIndexList ^ ")"
+ | insertvalue "(" ^ ConstVal ^ "," ConstVal ^ ConstantIndexList ^ ")" ;
+
+ConstVector ::= ConstVector ^ "," ConstVal | ConstVal ;
+
+GlobalType ::= global | constant ;
+
+ThreadLocal ::= - "thread_local" | _ ;
+
+AliaseeRef ::= ResultTypes SymbolicValueRef
+ | bitcast "(" ^ AliaseeRef to Types ^ ")" ;
+
+Module ::= +++ DefinitionList | --- _ ;
+
+DefinitionList ::= - Definition | + DefinitionList Definition ;
+
+Definition
+  ::= ^ ( +++++ define Function
+ | declare FunctionProto
+ | - module asm AsmBlock
+ | OptLocalAssign type Types
+ | OptGlobalAssign GVVisibilityStyle ThreadLocal OptAddrSpace GlobalType
+   ConstVal GlobalVarAttributes
+ | OptGlobalAssign GVInternalLinkage GVVisibilityStyle ThreadLocal OptAddrSpace
+   GlobalType ConstVal GlobalVarAttributes
+ | OptGlobalAssign GVExternalLinkage GVVisibilityStyle ThreadLocal OptAddrSpace
+   GlobalType Types GlobalVarAttributes
+ | OptGlobalAssign GVVisibilityStyle alias AliasLinkage AliaseeRef
+ | target TargetDefinition
+ | deplibs "=" LibrariesDefinition
+ ) ^ "\n";
+
+AsmBlock ::= STRINGCONSTANT ;
+
+TargetDefinition ::= triple "=" STRINGCONSTANT
+ | datalayout "=" STRINGCONSTANT ;
+
+LibrariesDefinition ::= "[" ( LibList | _ ) "]";
+
+LibList ::= LibList ^ "," STRINGCONSTANT | STRINGCONSTANT ;
+
+ArgListH ::= ArgListH ^ "," Types OptParamAttrs OptLocalName
+ | Types OptParamAttrs OptLocalName ;
+
+ArgList ::= ArgListH | ArgListH ^ "," "..." | "..." | _ ;
+
+FunctionHeaderH ::= OptCallingConv OptRetAttrs ResultTypes
+                  GlobalName ^ "(" ^ ArgList ^ ")"
+                  OptFuncAttrs OptSection OptAlign OptGC ;
+
+BEGIN ::= ( begin | "{" ) ^ "\n";
+
+FunctionHeader ::=
+  FunctionDefineLinkage GVVisibilityStyle FunctionHeaderH BEGIN ;
+
+END ::= ^ ( end | "}" ) ^ "\n";
+
+Function ::= BasicBlockList END ;
+
+FunctionProto ::= FunctionDeclareLinkage GVVisibilityStyle FunctionHeaderH ;
+
+OptSideEffect ::= _ | sideeffect ;
+
+ConstValueRef ::= ESINT64VAL
+ | EUINT64VAL
+ | FPVAL
+ | true
+ | false
+ | null
+ | undef
+ | zeroinitializer
+ | "<" ConstVector ">"
+ | "[" ConstVector "]"
+ | "[" ^ "]"
+ | "c" ^ STRINGCONSTANT
+ | "{" ConstVector "}"
+ | "{" ^ "}"
+ | "<" ^ "{" ConstVector "}" ^ ">"
+ | "<" ^ "{" ^ "}" ^ ">"
+ | ConstExpr
+ | asm OptSideEffect STRINGCONSTANT ^ "," STRINGCONSTANT ;
+
+SymbolicValueRef ::= LOCALVALID
+ | GLOBALVALID
+ | LocalName
+ | GlobalName ;
+
+ValueRef ::= SymbolicValueRef | ConstValueRef;
+
+ResolvedVal ::= Types ValueRef ;
+
+ReturnedVal ::= ResolvedVal | ReturnedVal ^ "," ResolvedVal ;
+
+BasicBlockList ::= BasicBlockList BasicBlock | FunctionHeader BasicBlock ;
+
+BasicBlock ::= InstructionList OptLocalAssign BBTerminatorInst ;
+
+InstructionList ::= +++ InstructionList Inst
+ | - _
+ | ^ LABELSTR ^ ":\n" ;
+
+BBTerminatorInst ::= ^ "  " ^
+ ( ret ReturnedVal
+ | ret void
+ | br label ValueRef
+ | br INTTYPE ValueRef ^ "," label ValueRef ^ "," label ValueRef
+ | switch IntType ValueRef ^ "," label ValueRef "[" JumpTable "]"
+ | switch IntType ValueRef ^ "," label ValueRef "[" ^ "]"
+ | invoke OptCallingConv ResultTypes ValueRef ^ "(" ^ ParamList ^ ")"
+   OptFuncAttrs
+   to label ValueRef unwind label ValueRef
+ | unwind
+ | unreachable ) ^ "\n";
+
+JumpTable ::= JumpTable IntType ConstValueRef ^ "," label ValueRef
+ | IntType ConstValueRef ^ "," label ValueRef ;
+
+Inst ::= ^ "  " ^ OptLocalAssign InstVal ^ "\n";
+
+PHIList ::= Types "[" ValueRef ^ "," ValueRef "]"
+ | PHIList ^ "," "[" ValueRef ^ "," ValueRef "]" ;
+
+ParamList ::= Types OptParamAttrs ValueRef OptParamAttrs
+ | label OptParamAttrs ValueRef OptParamAttrs
+ | ParamList ^ "," Types OptParamAttrs ValueRef OptParamAttrs
+ | ParamList ^ "," label OptParamAttrs ValueRef OptParamAttrs
+ | - _ ;
+
+IndexList ::= _ | IndexList ^ "," ResolvedVal ;
+
+ConstantIndexList ::= "," EUINT64VAL | ConstantIndexList ^ "," EUINT64VAL ;
+
+OptTailCall ::= tail call | call ;
+
+InstVal ::=
+   ArithmeticOps Types ValueRef ^ "," ValueRef
+ | LogicalOps Types ValueRef ^ "," ValueRef
+ | icmp IPredicates Types ValueRef ^ "," ValueRef
+ | fcmp FPredicates Types ValueRef ^ "," ValueRef
+ | CastOps ResolvedVal to Types
+ | select ResolvedVal ^ "," ResolvedVal ^ "," ResolvedVal
+ | "va_arg" ResolvedVal ^ "," Types
+ | extractelement ResolvedVal ^ "," ResolvedVal
+ | insertelement ResolvedVal ^ "," ResolvedVal ^ "," ResolvedVal
+ | shufflevector ResolvedVal ^ "," ResolvedVal ^ "," ResolvedVal
+ | phi PHIList
+ | OptTailCall OptCallingConv ResultTypes ValueRef ^ "(" ^ ParamList ^ ")"
+   OptFuncAttrs
+ | MemoryInst ;
+
+OptVolatile ::= - volatile | _ ;
+OptExact ::= - exact | _ ;
+OptNSW ::= - nsw | _ ;
+OptNUW ::= - nuw | _ ;
+OptNW  ::= OptNUW OptNSW | OptNSW OptNUW ;
+OptInBounds  ::= - inbounds | _ ;
+
+MemoryInst ::= malloc Types OptCAlign
+ | malloc Types ^ "," INTTYPE ValueRef OptCAlign
+ | alloca Types OptCAlign
+ | alloca Types ^ "," INTTYPE ValueRef OptCAlign
+ | free ResolvedVal
+ | OptVolatile load Types ValueRef OptCAlign
+ | OptVolatile store ResolvedVal ^ "," Types ValueRef OptCAlign
+ | getresult Types ValueRef ^ "," EUINT64VAL
+ | getelementptr OptInBounds Types ValueRef IndexList
+ | extractvalue Types ValueRef ^ ConstantIndexList 
+ | insertvalue Types ValueRef ^ "," Types ValueRef ^ ConstantIndexList ;
diff --git a/final/utils/llvmbuild b/final/utils/llvmbuild
new file mode 100755
index 00000000000..5912c50ddfe
--- /dev/null
+++ b/final/utils/llvmbuild
@@ -0,0 +1,749 @@
+#!/usr/bin/python3
+##===- utils/llvmbuild - Build the LLVM project ----------------*-python-*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+#
+# This script builds many different flavors of the LLVM ecosystem.  It
+# will build LLVM, Clang, llvm-gcc, and dragonegg as well as run tests
+# on them.  This script is convenient to use to check builds and tests
+# before committing changes to the upstream repository
+#
+# A typical source setup uses three trees and looks like this:
+#
+# official
+#   dragonegg
+#     trunk
+#   gcc
+#     trunk
+#   llvm
+#     trunk
+#       tools
+#         clang
+#     tags
+#       RELEASE_28
+#         tools
+#           clang
+#   llvm-gcc
+#     trunk
+#     tags
+#       RELEASE_28
+# staging
+#   dragonegg
+#     trunk
+#   gcc
+#     trunk
+#   llvm
+#     trunk
+#       tools
+#         clang
+#     tags
+#       RELEASE_28
+#         tools
+#           clang
+#   llvm-gcc
+#     trunk
+#     tags
+#       RELEASE_28
+# commit
+#   dragonegg
+#     trunk
+#   gcc
+#     trunk
+#   llvm
+#     trunk
+#       tools
+#         clang
+#     tags
+#       RELEASE_28
+#         tools
+#           clang
+#   llvm-gcc
+#     trunk
+#     tags
+#       RELEASE_28
+#
+# "gcc" above is the upstream FSF gcc and "gcc/trunk" refers to the
+# 4.5 branch as discussed in the dragonegg build guide.
+#
+# In a typical workflow, the "official" tree always contains unchanged
+# sources from the main LLVM project repositories.  The "staging" tree
+# is where local work is done.  A set of changes resides there waiting
+# to be moved upstream.  The "commit" tree is where changes from
+# "staging" make their way upstream.  Individual incremental changes
+# from "staging" are applied to "commit" and committed upstream after
+# a successful build and test run.  A successful build is one in which
+# testing results in no more failures than seen in the testing of the
+# "official" tree.
+# 
+# A build may be invoked as such:
+#
+# llvmbuild --src=~/llvm/commit --src=~/llvm/staging
+#   --src=~/llvm/official --branch=trunk --branch=tags/RELEASE_28
+#   --build=debug --build=release --build=paranoid
+#   --prefix=/home/greened/install --builddir=/home/greened/build
+#
+# This will build the LLVM ecosystem, including LLVM, Clang, llvm-gcc,
+# gcc 4.5 and dragonegg, putting build results in ~/build and
+# installing tools in ~/install.  llvmbuild creates separate build and
+# install directories for each source/branch/build flavor.  In the
+# above example, llvmbuild will build debug, release and paranoid
+# (debug+checks) flavors of the trunk and RELEASE_28 branches from
+# each source tree (official, staging and commit) for a total of
+# eighteen builds.  All builds will be run in parallel.
+#
+# The user may control parallelism via the --jobs and --threads
+# switches.  --jobs tells llvmbuild the maximum total number of builds
+# to activate in parallel.  The user may think of it as equivalent to
+# the GNU make -j switch.  --threads tells llvmbuild how many worker
+# threads to use to accomplish those builds.  If --threads is less
+# than --jobs, --threads workers will be launched and each one will
+# pick a source/branch/flavor combination to build.  Then llvmbuild
+# will invoke GNU make with -j (--jobs / --threads) to use up the
+# remaining job capacity.  Once a worker is finished with a build, it
+# will pick another combination off the list and start building it.
+#
+##===----------------------------------------------------------------------===##
+
+import optparse
+import os
+import sys
+import threading
+import queue
+import logging
+import traceback
+import subprocess
+import re
+
+# TODO: Use shutil.which when it is available (3.2 or later)
+def find_executable(executable, path=None):
+    """Try to find 'executable' in the directories listed in 'path' (a
+    string listing directories separated by 'os.pathsep'; defaults to
+    os.environ['PATH']).  Returns the complete filename or None if not
+    found
+    """
+    if path is None:
+        path = os.environ['PATH']
+    paths = path.split(os.pathsep)
+    extlist = ['']
+    if os.name == 'os2':
+        (base, ext) = os.path.splitext(executable)
+        # executable files on OS/2 can have an arbitrary extension, but
+        # .exe is automatically appended if no dot is present in the name
+        if not ext:
+            executable = executable + ".exe"
+    elif sys.platform == 'win32':
+        pathext = os.environ['PATHEXT'].lower().split(os.pathsep)
+        (base, ext) = os.path.splitext(executable)
+        if ext.lower() not in pathext:
+            extlist = pathext
+    for ext in extlist:
+        execname = executable + ext
+        if os.path.isfile(execname):
+            return execname
+        else:
+            for p in paths:
+                f = os.path.join(p, execname)
+                if os.path.isfile(f):
+                    return f
+    else:
+        return None
+
+def is_executable(fpath):
+    return os.path.exists(fpath) and os.access(fpath, os.X_OK)
+
+def add_options(parser):
+    parser.add_option("-v", "--verbose", action="store_true",
+                      default=False,
+                      help=("Output informational messages"
+                            " [default: %default]"))
+    parser.add_option("--src", action="append",
+                      help=("Top-level source directory [default: %default]"))
+    parser.add_option("--build", action="append",
+                      help=("Build types to run [default: %default]"))
+    parser.add_option("--branch", action="append",
+                      help=("Source branch to build [default: %default]"))
+    parser.add_option("--cc", default=find_executable("cc"),
+                      help=("The C compiler to use [default: %default]"))
+    parser.add_option("--cxx", default=find_executable("c++"),
+                      help=("The C++ compiler to use [default: %default]"))
+    parser.add_option("--threads", default=4, type="int",
+                      help=("The number of worker threads to use "
+                            "[default: %default]"))
+    parser.add_option("--jobs", "-j", default=8, type="int",
+                      help=("The number of simultaneous build jobs "
+                            "[default: %default]"))
+    parser.add_option("--prefix",
+                      help=("Root install directory [default: %default]"))
+    parser.add_option("--builddir",
+                      help=("Root build directory [default: %default]"))
+    parser.add_option("--extra-llvm-config-flags", default="",
+                      help=("Extra flags to pass to llvm configure [default: %default]"))
+    parser.add_option("--extra-llvm-gcc-config-flags", default="",
+                      help=("Extra flags to pass to llvm-gcc configure [default: %default]"))
+    parser.add_option("--extra-gcc-config-flags", default="",
+                      help=("Extra flags to pass to gcc configure [default: %default]"))
+    parser.add_option("--force-configure", default=False, action="store_true",
+                      help=("Force reconfigure of all components"))
+    return
+
+def check_options(parser, options, valid_builds):
+    # See if we're building valid flavors.
+    for build in options.build:
+        if (build not in valid_builds):
+            parser.error("'" + build + "' is not a valid build flavor "
+                         + str(valid_builds))
+
+    # See if we can find source directories.
+    for src in options.src:
+        for component in ["llvm", "llvm-gcc", "gcc", "dragonegg"]:
+            compsrc = src + "/" + component
+            if (not os.path.isdir(compsrc)):
+                parser.error("'" + compsrc + "' does not exist")
+                if (options.branch is not None):
+                    for branch in options.branch:
+                        if (not os.path.isdir(os.path.join(compsrc, branch))):
+                            parser.error("'" + os.path.join(compsrc, branch)
+                                         + "' does not exist")
+
+    # See if we can find the compilers
+    options.cc = find_executable(options.cc)
+    options.cxx = find_executable(options.cxx)
+
+    return
+
+# Find a unique short name for the given set of paths.  This searches
+# back through path components until it finds unique component names
+# among all given paths.
+def get_path_abbrevs(paths):
+    # Find the number of common starting characters in the last component
+    # of the paths.
+    unique_paths = list(paths)
+
+    class NotFoundException(Exception): pass
+
+    # Find a unique component of each path.
+    unique_bases = unique_paths[:]
+    found = 0
+    while len(unique_paths) > 0:
+        bases = [os.path.basename(src) for src in unique_paths]
+        components = { c for c in bases }
+        # Account for single entry in paths.
+        if len(components) > 1 or len(components) == len(bases):
+            # We found something unique.
+            for c in components:
+                if bases.count(c) == 1:
+                   index = bases.index(c)
+                   unique_bases[index] = c
+                   # Remove the corresponding path from the set under
+                   # consideration.
+                   unique_paths[index] = None
+            unique_paths = [ p for p in unique_paths if p is not None ]
+        unique_paths = [os.path.dirname(src) for src in unique_paths]
+
+    if len(unique_paths) > 0:
+        raise NotFoundException()
+
+    abbrevs = dict(zip(paths, [base for base in unique_bases]))
+
+    return abbrevs
+
+# Given a set of unique names, find a short character sequence that
+# uniquely identifies them.
+def get_short_abbrevs(unique_bases):
+    # Find a unique start character for each path base.
+    my_unique_bases = unique_bases[:]
+    unique_char_starts = unique_bases[:]
+    while len(my_unique_bases) > 0:
+        for start, char_tuple in enumerate(zip(*[base
+                                                 for base in my_unique_bases])):
+            chars = { c for c in char_tuple }
+            # Account for single path.
+            if len(chars) > 1 or len(chars) == len(char_tuple):
+                # We found something unique.
+                for c in chars:
+                    if char_tuple.count(c) == 1:
+                        index = char_tuple.index(c)
+                        unique_char_starts[index] = start
+                        # Remove the corresponding path from the set under
+                        # consideration.
+                        my_unique_bases[index] = None
+                my_unique_bases = [ b for b in my_unique_bases
+                                    if b is not None ]
+                break
+
+    if len(my_unique_bases) > 0:
+        raise NotFoundException()
+
+    abbrevs = [abbrev[start_index:start_index+3]
+               for abbrev, start_index
+               in zip([base for base in unique_bases],
+                      [index for index in unique_char_starts])]
+
+    abbrevs = dict(zip(unique_bases, abbrevs))
+
+    return abbrevs
+
+class Builder(threading.Thread):
+    class ExecutableNotFound(Exception): pass
+    class FileNotExecutable(Exception): pass
+
+    def __init__(self, work_queue, jobs,
+                 build_abbrev, source_abbrev, branch_abbrev,
+                 options):
+        super().__init__()
+        self.work_queue = work_queue
+        self.jobs = jobs
+        self.cc = options.cc
+        self.cxx = options.cxx
+        self.build_abbrev = build_abbrev
+        self.source_abbrev = source_abbrev
+        self.branch_abbrev = branch_abbrev
+        self.build_prefix = options.builddir
+        self.install_prefix = options.prefix
+        self.options = options
+        self.component_abbrev = dict(
+            llvm="llvm",
+            llvm_gcc="lgcc",
+            llvm2="llv2",
+            gcc="ugcc",
+            dagonegg="degg")
+    def run(self):
+        while True:
+            try:
+                source, branch, build = self.work_queue.get()
+                self.dobuild(source, branch, build)
+            except:
+                traceback.print_exc()
+            finally:
+                self.work_queue.task_done()
+
+    def execute(self, command, execdir, env, component):
+        prefix = self.component_abbrev[component.replace("-", "_")]
+        pwd = os.getcwd()
+        if not os.path.exists(execdir):
+            os.makedirs(execdir)
+
+        execenv = os.environ.copy()
+
+        for key, value in env.items():
+            execenv[key] = value
+ 
+        self.logger.debug("[" + prefix + "] " + "env " + str(env) + " "
+                          + " ".join(command));
+
+        try:
+            proc = subprocess.Popen(command,
+                                    cwd=execdir,
+                                    env=execenv,
+                                    stdout=subprocess.PIPE,
+                                    stderr=subprocess.STDOUT)
+
+            line = proc.stdout.readline()
+            while line:
+                self.logger.info("[" + prefix + "] "
+                                 + str(line, "utf-8").rstrip())
+                line = proc.stdout.readline()
+
+        except:
+            traceback.print_exc()
+
+    # Get a list of C++ include directories to pass to clang.
+    def get_includes(self):
+        # Assume we're building with g++ for now.
+        command = [self.cxx]
+        command += ["-v", "-x", "c++", "/dev/null", "-fsyntax-only"]
+        includes = []
+        self.logger.debug(command)
+        try:
+            proc = subprocess.Popen(command,
+                                    stdout=subprocess.PIPE,
+                                    stderr=subprocess.STDOUT)
+
+            gather = False
+            line = proc.stdout.readline()
+            while line:
+                self.logger.debug(line)
+                if re.search("End of search list", str(line)) is not None:
+                    self.logger.debug("Stop Gather")
+                    gather = False
+                if gather:
+                    includes.append(str(line, "utf-8").strip())
+                if re.search("#include <...> search starts", str(line)) is not None:
+                    self.logger.debug("Start Gather")
+                    gather = True
+                line = proc.stdout.readline()
+        except:
+            traceback.print_exc()
+        self.logger.debug(includes)
+        return includes
+
+    def dobuild(self, source, branch, build):
+        build_suffix = ""
+
+        ssabbrev = get_short_abbrevs([ab for ab in self.source_abbrev.values()])
+
+        if branch is not None:
+            sbabbrev = get_short_abbrevs([ab for ab in self.branch_abbrev.values()])
+
+            prefix = "[" + ssabbrev[self.source_abbrev[source]] + "-" + sbabbrev[self.branch_abbrev[branch]] + "-" + self.build_abbrev[build] + "]"
+            self.install_prefix += "/" + self.source_abbrev[source] + "/" + branch + "/" + build
+            build_suffix += self.source_abbrev[source] + "/" + branch + "/" + build
+        else:
+            prefix = "[" + ssabbrev[self.source_abbrev[source]] + "-" + self.build_abbrev[build] + "]"
+            self.install_prefix += "/" + self.source_abbrev[source] + "/" + build
+            build_suffix += "/" + self.source_abbrev[source] + "/" + build
+
+        self.logger = logging.getLogger(prefix)
+
+        self.logger.debug(self.install_prefix)
+
+        # Assume we're building with gcc for now.
+        cxxincludes = self.get_includes()
+        cxxroot = cxxincludes[0]
+        cxxarch = os.path.basename(cxxincludes[1])
+
+        configure_flags = dict(
+            llvm=dict(debug=["--prefix=" + self.install_prefix,
+                             "--with-extra-options=-Werror",
+                             "--with-cxx-include-root=" + cxxroot,
+                             "--with-cxx-include-arch=" + cxxarch],
+                      release=["--prefix=" + self.install_prefix,
+                               "--with-extra-options=-Werror",
+                               "--enable-optimized",
+                               "--with-cxx-include-root=" + cxxroot,
+                               "--with-cxx-include-arch=" + cxxarch],
+                      paranoid=["--prefix=" + self.install_prefix,
+                                "--with-extra-options=-Werror",
+                                "--enable-expensive-checks",
+                                "--with-cxx-include-root=" + cxxroot,
+                                "--with-cxx-include-arch=" + cxxarch]),
+            llvm_gcc=dict(debug=["--prefix=" + self.install_prefix,
+                                 "--enable-checking",
+                                 "--program-prefix=llvm-",
+                                 "--enable-llvm=" + self.build_prefix + "/llvm/" + build_suffix,
+# Fortran install seems to be broken.
+#                                 "--enable-languages=c,c++,fortran"],
+                                 "--enable-languages=c,c++"],
+                          release=["--prefix=" + self.install_prefix,
+                                   "--program-prefix=llvm-",
+                                   "--enable-llvm=" + self.build_prefix + "/llvm/" + build_suffix,
+# Fortran install seems to be broken.
+#                                   "--enable-languages=c,c++,fortran"],
+                                   "--enable-languages=c,c++"],
+                          paranoid=["--prefix=" + self.install_prefix,
+                                    "--enable-checking",
+                                    "--program-prefix=llvm-",
+                                    "--enable-llvm=" + self.build_prefix + "/llvm/" + build_suffix,
+# Fortran install seems to be broken.
+#                                    "--enable-languages=c,c++,fortran"]),
+                                    "--enable-languages=c,c++"]),
+            llvm2=dict(debug=["--prefix=" + self.install_prefix,
+                              "--with-extra-options=-Werror",
+                              "--with-llvmgccdir=" + self.install_prefix + "/bin",
+                              "--with-cxx-include-root=" + cxxroot,
+                              "--with-cxx-include-arch=" + cxxarch],
+                       release=["--prefix=" + self.install_prefix,
+                                "--with-extra-options=-Werror",
+                                "--enable-optimized",
+                                "--with-llvmgccdir=" + self.install_prefix + "/bin",
+                                "--with-cxx-include-root=" + cxxroot,
+                                "--with-cxx-include-arch=" + cxxarch],
+                       paranoid=["--prefix=" + self.install_prefix,
+                                 "--with-extra-options=-Werror",
+                                 "--enable-expensive-checks",
+                                 "--with-llvmgccdir=" + self.install_prefix + "/bin",
+                                 "--with-cxx-include-root=" + cxxroot,
+                                 "--with-cxx-include-arch=" + cxxarch]),
+            gcc=dict(debug=["--prefix=" + self.install_prefix,
+                            "--enable-checking"],
+                     release=["--prefix=" + self.install_prefix],
+                     paranoid=["--prefix=" + self.install_prefix,
+                               "--enable-checking"]),
+            dragonegg=dict(debug=[],
+                           release=[],
+                           paranoid=[]))
+
+        configure_env = dict(
+            llvm=dict(debug=dict(CC=self.cc,
+                                 CXX=self.cxx),
+                      release=dict(CC=self.cc,
+                                   CXX=self.cxx),
+                      paranoid=dict(CC=self.cc,
+                                    CXX=self.cxx)),
+            llvm_gcc=dict(debug=dict(CC=self.cc,
+                                     CXX=self.cxx),
+                          release=dict(CC=self.cc,
+                                       CXX=self.cxx),
+                          paranoid=dict(CC=self.cc,
+                                        CXX=self.cxx)),
+            llvm2=dict(debug=dict(CC=self.cc,
+                                  CXX=self.cxx),
+                       release=dict(CC=self.cc,
+                                    CXX=self.cxx),
+                       paranoid=dict(CC=self.cc,
+                                     CXX=self.cxx)),
+            gcc=dict(debug=dict(CC=self.cc,
+                                CXX=self.cxx),
+                     release=dict(CC=self.cc,
+                                  CXX=self.cxx),
+                     paranoid=dict(CC=self.cc,
+                                   CXX=self.cxx)),
+            dragonegg=dict(debug=dict(CC=self.cc,
+                                      CXX=self.cxx),
+                           release=dict(CC=self.cc,
+                                        CXX=self.cxx),
+                           paranoid=dict(CC=self.cc,
+                                         CXX=self.cxx)))
+
+        make_flags = dict(
+            llvm=dict(debug=["-j" + str(self.jobs)],
+                      release=["-j" + str(self.jobs)],
+                      paranoid=["-j" + str(self.jobs)]),
+            llvm_gcc=dict(debug=["-j" + str(self.jobs),
+                                 "bootstrap"],
+                          release=["-j" + str(self.jobs),
+                                   "bootstrap"],
+                          paranoid=["-j" + str(self.jobs),
+                                    "bootstrap"]),
+            llvm2=dict(debug=["-j" + str(self.jobs)],
+                       release=["-j" + str(self.jobs)],
+                       paranoid=["-j" + str(self.jobs)]),
+            gcc=dict(debug=["-j" + str(self.jobs),
+                            "bootstrap"],
+                     release=["-j" + str(self.jobs),
+                              "bootstrap"],
+                     paranoid=["-j" + str(self.jobs),
+                               "bootstrap"]),
+            dragonegg=dict(debug=["-j" + str(self.jobs)],
+                           release=["-j" + str(self.jobs)],
+                           paranoid=["-j" + str(self.jobs)]))
+
+        make_env = dict(
+            llvm=dict(debug=dict(),
+                      release=dict(),
+                      paranoid=dict()),
+            llvm_gcc=dict(debug=dict(),
+                          release=dict(),
+                          paranoid=dict()),
+            llvm2=dict(debug=dict(),
+                       release=dict(),
+                       paranoid=dict()),
+            gcc=dict(debug=dict(),
+                     release=dict(),
+                     paranoid=dict()),
+            dragonegg=dict(debug=dict(GCC=self.install_prefix + "/bin/gcc",
+                                      LLVM_CONFIG=self.install_prefix + "/bin/llvm-config"),
+                           release=dict(GCC=self.install_prefix + "/bin/gcc",
+                                        LLVM_CONFIG=self.install_prefix + "/bin/llvm-config"),
+                           paranoid=dict(GCC=self.install_prefix + "/bin/gcc",
+                                         LLVM_CONFIG=self.install_prefix + "/bin/llvm-config")))
+
+        make_install_flags = dict(
+            llvm=dict(debug=["install"],
+                      release=["install"],
+                      paranoid=["install"]),
+            llvm_gcc=dict(debug=["install"],
+                          release=["install"],
+                          paranoid=["install"]),
+            llvm2=dict(debug=["install"],
+                       release=["install"],
+                       paranoid=["install"]),
+            gcc=dict(debug=["install"],
+                     release=["install"],
+                     paranoid=["install"]),
+            dragonegg=dict(debug=["install"],
+                           release=["install"],
+                           paranoid=["install"]))
+
+        make_install_env = dict(
+            llvm=dict(debug=dict(),
+                      release=dict(),
+                      paranoid=dict()),
+            llvm_gcc=dict(debug=dict(),
+                          release=dict(),
+                          paranoid=dict()),
+            llvm2=dict(debug=dict(),
+                       release=dict(),
+                       paranoid=dict()),
+            gcc=dict(debug=dict(),
+                     release=dict(),
+                     paranoid=dict()),
+            dragonegg=dict(debug=dict(),
+                           release=dict(),
+                           paranoid=dict()))
+
+        make_check_flags = dict(
+            llvm=dict(debug=["check"],
+                      release=["check"],
+                      paranoid=["check"]),
+            llvm_gcc=dict(debug=["check"],
+                          release=["check"],
+                          paranoid=["check"]),
+            llvm2=dict(debug=["check"],
+                       release=["check"],
+                       paranoid=["check"]),
+            gcc=dict(debug=["check"],
+                     release=["check"],
+                     paranoid=["check"]),
+            dragonegg=dict(debug=["check"],
+                           release=["check"],
+                           paranoid=["check"]))
+
+        make_check_env = dict(
+            llvm=dict(debug=dict(),
+                      release=dict(),
+                      paranoid=dict()),
+            llvm_gcc=dict(debug=dict(),
+                          release=dict(),
+                          paranoid=dict()),
+            llvm2=dict(debug=dict(),
+                       release=dict(),
+                       paranoid=dict()),
+            gcc=dict(debug=dict(),
+                     release=dict(),
+                     paranoid=dict()),
+            dragonegg=dict(debug=dict(),
+                           release=dict(),
+                           paranoid=dict()))
+
+        for component in ["llvm", "llvm-gcc", "llvm2", "gcc", "dragonegg"]:
+            comp = component[:]
+
+            srcdir = source + "/" + comp.rstrip("2")
+            builddir = self.build_prefix + "/" + comp + "/" + build_suffix
+            installdir = self.install_prefix
+
+            if (branch is not None):
+                srcdir += "/" + branch
+
+            comp_key = comp.replace("-", "_")
+
+            config_args = configure_flags[comp_key][build][:]
+            config_args.extend(getattr(self.options,
+                                       "extra_" + comp_key
+                                       + "_config_flags").split())
+
+            self.logger.info("Configuring " + component + " in " + builddir)
+            self.configure(component, srcdir, builddir,
+                           config_args,
+                           configure_env[comp_key][build])
+
+            self.logger.info("Building " + component + " in " + builddir)
+            self.make(component, srcdir, builddir,
+                      make_flags[comp_key][build],
+                      make_env[comp_key][build])
+
+            self.logger.info("Installing " + component + " in " + installdir)
+            self.make(component, srcdir, builddir,
+                      make_install_flags[comp_key][build],
+                      make_install_env[comp_key][build])
+
+            self.logger.info("Testing " + component + " in " + builddir)
+            self.make(component, srcdir, builddir,
+                      make_check_flags[comp_key][build],
+                      make_check_env[comp_key][build])
+
+
+    def configure(self, component, srcdir, builddir, flags, env):
+        self.logger.debug("Configure " + str(flags) + " " + str(srcdir) + " -> "
+                          + str(builddir))
+
+        configure_files = dict(
+            llvm=[(srcdir + "/configure", builddir + "/Makefile")],
+            llvm_gcc=[(srcdir + "/configure", builddir + "/Makefile"),
+                      (srcdir + "/gcc/configure", builddir + "/gcc/Makefile")],
+            llvm2=[(srcdir + "/configure", builddir + "/Makefile")],
+            gcc=[(srcdir + "/configure", builddir + "/Makefile"),
+                 (srcdir + "/gcc/configure", builddir + "/gcc/Makefile")],
+            dragonegg=[()])
+
+
+        doconfig = False
+        for conf, mf in configure_files[component.replace("-", "_")]:
+            if not os.path.exists(conf):
+                return
+            if os.path.exists(conf) and os.path.exists(mf):
+                confstat = os.stat(conf)
+                makestat = os.stat(mf)
+                if confstat.st_mtime > makestat.st_mtime:
+                    doconfig = True
+                    break
+            else:
+                doconfig = True
+                break
+
+        if not doconfig and not self.options.force_configure:
+            return
+
+        program = srcdir + "/configure"
+        if not is_executable(program):
+            return
+
+        args = [program]
+        args += ["--verbose"]
+        args += flags
+        self.execute(args, builddir, env, component)
+
+    def make(self, component, srcdir, builddir, flags, env):
+        program = find_executable("make")
+        if program is None:
+            raise ExecutableNotFound
+
+        if not is_executable(program):
+            raise FileNotExecutable
+
+        args = [program]
+        args += flags
+        self.execute(args, builddir, env, component)
+
+# Global constants
+build_abbrev = dict(debug="dbg", release="opt", paranoid="par")
+
+# Parse options
+parser = optparse.OptionParser(version="%prog 1.0")
+add_options(parser)
+(options, args) = parser.parse_args()
+check_options(parser, options, build_abbrev.keys());
+
+if options.verbose:
+    logging.basicConfig(level=logging.DEBUG,
+                        format='%(name)-13s: %(message)s')
+else:
+    logging.basicConfig(level=logging.INFO,
+                        format='%(name)-13s: %(message)s')
+
+source_abbrev = get_path_abbrevs(set(options.src))
+branch_abbrev = get_path_abbrevs(set(options.branch))
+
+work_queue = queue.Queue()
+
+jobs = options.jobs // options.threads
+if jobs == 0:
+    jobs = 1
+
+numthreads = options.threads
+if jobs < numthreads:
+    numthreads = jobs
+    jobs = 1
+
+for t in range(numthreads):
+    builder = Builder(work_queue, jobs,
+                      build_abbrev, source_abbrev, branch_abbrev,
+                      options)
+    builder.daemon = True
+    builder.start()
+
+for build in set(options.build):
+    for source in set(options.src):
+        if options.branch is not None:
+            for branch in set(options.branch):
+                work_queue.put((source, branch, build))
+        else:
+            work_queue.put((source, None, build))
+
+work_queue.join()
diff --git a/final/utils/llvmdo b/final/utils/llvmdo
new file mode 100755
index 00000000000..bcfc221c2b1
--- /dev/null
+++ b/final/utils/llvmdo
@@ -0,0 +1,184 @@
+#!/bin/sh
+##===- utils/llvmdo - Counts Lines Of Code -------------------*- Script -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+#
+# This script is a general purpose "apply" function for the source files in LLVM
+# It uses "find" to locate all the source files and then applies the user's 
+# command to them. As such, this command is often not used by itself much but
+# the other find related tools (countloc.sh,llvmgrep,getsrcs.sh,userloc.sh) are
+# all based on this script.  This script defines "what is a source file" in 
+# LLVM and so should be maintained if new directories, new file extensions, 
+# etc. are used in LLVM as it progresses.
+#
+# Usage:
+#  llvmdo [-topdir DIR] [-dirs "DIRNAMES..."] [-code-only] PROGRAM ARGS...
+#
+# The -topdir option allows you to specify the llvm source root directly. If it
+# is not specified then it will be obtained with llvm-config which must be built
+# and in your path.
+#
+# The -dirs argument allows you to specify the set of directories that are 
+# searched. The default list of directories searched is:
+#   include lib tools utils runtime autoconf docs test examples projects
+# Note that you must use quotes around the list of directory names. 
+#
+# The -code-only option specifies that only those files that are considered 
+# "code" should be visited. HTML documentation is considered code, but things 
+# like README files, etc. are not.
+#
+# Finally, you simply specify whatever program you want to run against each 
+# file and the arguments to give it. The PROGRAM will be given the file name 
+# as its last argument.
+##===----------------------------------------------------------------------===##
+
+if test $# -lt 1 ; then
+  echo "Usage: llvmdo [-topdir DIR] [-dirs "DIRNAMES..."] [-code-only] PROGRAM ARGS..."
+  exit 1
+fi
+
+if test "$1" = "-topdir" ; then
+  TOPDIR="$2"
+  shift; shift;
+else
+  TOPDIR=`llvm-config --src-root`
+fi
+
+if test "$1" = "-dirs" ; then
+  LLVMDO_DIRS="$2"
+  shift ; shift
+elif test -z "$LLVMDO_DIRS" ; then
+  LLVMDO_DIRS="include lib tools utils runtime autoconf docs test examples projects cmake"
+fi
+
+if test "$1" = "-code-only" ; then
+  CODE_ONLY="set"
+  shift
+else
+  CODE_ONLY=""
+fi
+
+if test "$1" = "" ; then
+  echo "Missing program name to run"
+  exit 1
+fi
+
+PROGRAM=`which $1`
+if test ! -x "$PROGRAM" ; then
+  echo "Can't execute $1"
+  exit 1
+fi
+shift;
+
+paths_to_ignore="\
+  -path */.svn/ -o \
+  -path */.svn/* -o \
+  -path docs/doxygen/* -o \
+  -path docs/CommandGuide/html/* -o \
+  -path docs/CommandGuide/man/* -o \
+  -path docs/CommandGuide/ps/* -o \
+  -path docs/CommandGuide/man/* -o \
+  -path docs/HistoricalNotes/* -o \
+  -path docs/img/* -o \
+  -path */.libs/* -o \
+  -path lib/Support/bzip2/* -o \
+  -path projects/llvm-test/* \
+"
+files_to_match="\
+     -name *.ac \
+  -o -name *.b \
+  -o -name *.c \
+  -o -name *.cc \
+  -o -name *.cfg \
+  -o -name *.cpp \
+  -o -name *.css \
+  -o -name *.def \
+  -o -name *.el \
+  -o -name *.exp \
+  -o -name *.footer \
+  -o -name *.gnuplot' \
+  -o -name *.h \
+  -o -name *.header \
+  -o -name *.html \
+  -o -name *.in \
+  -o -name *.inc \
+  -o -name *.intro \
+  -o -name *.l \
+  -o -name *.ll \
+  -o -name *.lst \
+  -o -name *.m4 \
+  -o -name *.pod \
+  -o -name *.pl \
+  -o -name *.py \
+  -o -name *.sh \
+  -o -name *.schema \
+  -o -name *.st \
+  -o -name *.tcl \
+  -o -name *.td \
+  -o -name *.tr \
+  -o -name *.y \
+  -o -name Make* \
+  -o -name *.cmake \
+  -o -name llvmdo \
+  -o -name llvmgrep \
+  -o -name check-each-file \
+  -o -name codgen-diff \
+  -o -name llvm-native-gcc \
+  -o -name llvm-native-gxx \
+  -o -name makellvm \
+  -o -path include/llvm/ADT/ilist \
+  -o -path test/\*.ll \
+  -o -path test/Scripts/not \
+  -o -path runtime/\*.ll \
+"
+if test -z "$CODE_ONLY" ; then
+  files_to_match="$files_to_match \
+    -o -name *.txt \
+    -o -name *.TXT \
+    -o -name *.vim \
+    -o -name vimrc \
+    -o -name README \
+    -o -name COPYING.LIB \
+    -o -name LICENSE* "
+fi
+files_to_ignore="\
+     -name \.* \
+  -o -name *~ \
+  -o -name #* \
+  -o -name configure \
+  -o -name slow.ll \
+  -o -name *libtool* \
+  -o -name ltdl* \
+  -o -name ltdl.m4 \
+  -o -name ltmain.m4 \
+  -o -name ltmain.sh \
+  -o -name aclocal.m4 \
+  -o -name acinclude.m4 \
+  -o -name *LoopSimplifyCrash.ll \
+  -o -name *AST-Remove.ll \
+  -o -name PPCPerfectShuffle.h \
+"
+
+if test -d "$TOPDIR" ; then
+  cd $TOPDIR
+  # Have to use the right "find" on a per-platform basis. Most platforms have
+  # Gnu find as "find", but Solaris does not.
+  case `uname -s` in
+    SunOS) find_prog=gfind ;;
+    *) find_prog=find ;;
+  esac
+  # Turn off file name generation (globbing) so that substitution of the
+  # variables doesn't cause the shell to create lists of file names
+  set -f
+  $find_prog $LLVMDO_DIRS -type f \
+    \( $paths_to_ignore \) -prune \
+    -o \( \( $files_to_match \) \! \( $files_to_ignore \) \
+    -exec $PROGRAM "$@" {} \; \)
+else
+  echo "Can't find LLVM top directory in $TOPDIR"
+fi
diff --git a/final/utils/llvmgrep b/final/utils/llvmgrep
new file mode 100755
index 00000000000..540f0598579
--- /dev/null
+++ b/final/utils/llvmgrep
@@ -0,0 +1,39 @@
+#!/bin/sh
+##===- utils/llvmgrep - Counts Lines Of Code -----------------*- Script -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+#
+# This script searches your srcdir for an egrep style pattern. This can quickly
+# help you build a list of the places you need to modify when changing a header
+# or other "global" name. The only argument is the pattern you want to search
+# for. It should be quoted to escape shell interpretation of the pattern's
+# special characters.
+#
+# Note that the implementation is based on llvmdo. See that script for more
+# details.
+##===----------------------------------------------------------------------===##
+
+if test "$1" = "-topdir" ; then
+  TOPDIR="$2"
+  shift; shift;
+else
+  TOPDIR=`llvm-config --src-root`
+fi
+
+if test -d "$TOPDIR" ; then
+  cd $TOPDIR
+  case `uname -s` in
+    SunOS) grep_cmd="ggrep -H -n" ;;
+    Linux) grep_cmd="egrep -H -n" ;;
+    *) grep_cmd="egrep -l -n" ;;
+  esac
+  ./utils/llvmdo -topdir "$TOPDIR" \
+    -dirs "include lib tools utils docs examples test unittests projects cmake" $grep_cmd "$*"
+else
+  echo "Can't find LLVM top directory"
+fi
diff --git a/final/utils/makellvm b/final/utils/makellvm
new file mode 100755
index 00000000000..ae77712941a
--- /dev/null
+++ b/final/utils/makellvm
@@ -0,0 +1,145 @@
+#!/bin/csh -f
+
+set pstatus = 0
+onintr cleanup
+alias usage 'echo "USAGE: $0:t [-h] [-n] [-obj obj-root] [gmake-flags] [VAR=...] [toolname (default: opt)]"; set pstatus = 1; goto cleanup'
+
+set EXEC = opt
+set GMAKE_OPTS = ""
+set DEBUG = 0
+
+## Search path for automatically finding the obj-root to use.
+## Note: The src root directory ${LLVMDIR} will be prepended to this path later.
+## 
+set OBJROOTDIRLIST = ( )
+
+set doit = 1
+unset options_done
+while ( !( $?options_done ) && ($#argv > 0))
+    switch ($argv[1])
+	case -h :
+	    usage
+	case -f :
+	    if ($#argv < 2) usage
+	    shift argv; set MFILE = $argv[1]; shift argv; breaksw
+	case -n :
+	    set doit = 0; shift argv; breaksw
+	case -obj :
+	    set OBJROOT = $argv[2]; shift argv; shift argv
+	    if (! -d "$OBJROOT") then
+		echo "FATAL: Illegal obj-root directory ${OBJROOT}"
+		exit 1
+	    endif
+	    breaksw
+	case -d :
+	    set doit = 0; set DEBUG = 1; shift argv; breaksw
+	case -* :
+	    set GMAKE_OPTS = ( $GMAKE_OPTS $argv[1] ); shift argv; breaksw
+	default :
+	    set optarg = `echo -n $argv[1] | sed 's/^[^=]*$//'`
+	    if ($#optarg) then
+	        set GMAKE_OPTS = ( $GMAKE_OPTS $optarg )
+	        shift argv
+	    else
+	        set options_done
+	    endif
+            breaksw
+    endsw
+end
+
+if ($#argv > 1) then
+    echo 'ERROR: More than one tool is not supported by "makellvm"'
+    usage
+endif
+if ($#argv > 0) then
+    set EXEC = $argv[1]
+endif
+if ($DEBUG) then
+    echo "DEBUG: EXEC = $EXEC"
+endif
+
+## Compute LLVMDIR: the root of the current LLVM tree.
+## It is recorded in the variable LEVEL in Makefile, to compute it
+## 
+if (! $?MFILE) then
+    if (-f GNUmakefile) then
+	set MFILE = GNUmakefile
+    else if (-f makefile) then
+	set MFILE = makefile
+    else
+	set MFILE = Makefile
+    endif
+endif
+if ($DEBUG) then
+    echo "DEBUG: MFILE = $MFILE"
+endif
+if (! -f $MFILE) then
+    echo "Missing or invalid makefile: $MFILE"
+    exit 1
+endif
+
+set LLVMDIR = `awk '/LEVEL[ 	]*=/ {print $NF}' $MFILE`
+if ($DEBUG) then
+    echo "DEBUG: LLVMDIR = $LLVMDIR"
+endif
+
+if ($#LLVMDIR == 0 || ! -d "$LLVMDIR") then
+    echo "Unable to find LLVM src-root directory or directory is invalid."
+    echo "Are you within a valid LLVM directory for running gmake?"
+    exit 1
+endif
+
+## Try to determine the obj-root directory automatically if not specified
+## 
+set OBJROOTDIRLIST = ( ${LLVMDIR} $OBJROOTDIRLIST )	## add src dir
+if ($?OBJROOT == 0) then
+    ## Try to determine object root directory by looking for Makefile.config
+    foreach objdir ( $OBJROOTDIRLIST )
+	if (-f "${objdir}/Makefile.config") then
+	    set OBJROOT = ${objdir}
+            break
+        endif
+    end
+    if ($?OBJROOT == 0) then
+	echo "FATAL: Could not choose an obj-root directory from these choices:"
+	echo "       ${OBJROOTDIRLIST}."
+	echo "       You can specify it explicitly using '-obj obj-root'."
+	exit 1
+    endif
+    echo "Using OBJ-ROOT = ${OBJROOT} (specify '-obj obj-root' to override)."
+endif
+if (${OBJROOT} == ${LLVMDIR}) then
+    # run make in the source directory itself
+    set BUILDROOT = .
+else
+    # run make in the in the obj-root tree, in the directory for $cwd
+    set SRCROOT = `sh -c "cd $LLVMDIR; pwd | sed 's/\//\\\//g'"` 
+    set CURSRCDIR = `echo $cwd | sed -e "s/${SRCROOT}//"`
+    set BUILDROOT = ${OBJROOT}/${CURSRCDIR}
+    unset SRCROOT CURSRCDIR
+endif
+if ($DEBUG) then
+    echo "DEBUG: BUILDROOT = $BUILDROOT"
+endif
+if (! -d $BUILDROOT) then
+    echo "FATAL: Invalid build directory: ${BUILDROOT}"
+    exit 1
+endif
+cd $BUILDROOT 
+
+set CMD = "make $GMAKE_OPTS && (cd $LLVMDIR/tools/$EXEC && make $GMAKE_OPTS)"
+
+if ($doit == 1) then
+    csh -f -c "$CMD"
+    set pstatus = $?
+else
+    echo '(NOT EXECUTING) COMMAND:'
+    echo "  $CMD"
+endif
+
+
+#=========================================================
+# CODE TO BE EXECUTED IF INTERRUPT IS RECEIVED
+#=========================================================
+cleanup:
+    exit($pstatus)
diff --git a/final/utils/not/CMakeLists.txt b/final/utils/not/CMakeLists.txt
new file mode 100644
index 00000000000..f4c02290d7d
--- /dev/null
+++ b/final/utils/not/CMakeLists.txt
@@ -0,0 +1,11 @@
+add_llvm_utility(not
+  not.cpp
+  )
+
+target_link_libraries(not LLVMSupport)
+if( MINGW )
+  target_link_libraries(not imagehlp psapi)
+endif( MINGW )
+if( LLVM_ENABLE_THREADS AND HAVE_LIBPTHREAD )
+  target_link_libraries(not pthread)
+endif()
diff --git a/final/utils/not/Makefile b/final/utils/not/Makefile
new file mode 100644
index 00000000000..f37f166c6c7
--- /dev/null
+++ b/final/utils/not/Makefile
@@ -0,0 +1,21 @@
+##===- utils/not/Makefile ----------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TOOLNAME = not
+USEDLIBS = LLVMSupport.a
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS = 1
+
+# Don't install this utility
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/final/utils/not/not.cpp b/final/utils/not/not.cpp
new file mode 100644
index 00000000000..9a924b56a79
--- /dev/null
+++ b/final/utils/not/not.cpp
@@ -0,0 +1,27 @@
+//===- not.cpp - The 'not' testing tool -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+int main(int argc, const char **argv) {
+  sys::Path Program = sys::Program::FindProgramByName(argv[1]);
+
+  std::string ErrMsg;
+  int Result = sys::Program::ExecuteAndWait(Program, argv + 1, 0, 0, 0, 0,
+                                            &ErrMsg);
+  if (Result < 0) {
+    errs() << "Error: " << ErrMsg << "\n";
+    return 1;
+  }
+
+  return Result == 0;
+}
diff --git a/final/utils/parseNLT.pl b/final/utils/parseNLT.pl
new file mode 100644
index 00000000000..95afca73a13
--- /dev/null
+++ b/final/utils/parseNLT.pl
@@ -0,0 +1,34 @@
+#!/usr/bin/perl
+# a first attempt to parse the nightly tester pages into something
+# one can reason about, namely import into a database
+# USE: perl parseNLT.pl <2005-03-31.html
+# for example
+
+while(<>)
+  {
+    if (/LLVM Test Results for (\w+) (\d+), (\d+)</)
+      {
+        $mon = $1;
+        $day = $2;
+        $year = $3;
+      }
+    if (/<td>([^<]+)<\/td>/)
+      {
+        if ($prefix)
+          { $output .= "$1 "; $count++; }
+      }
+    if (/<tr/)
+      {
+        if ($output and $count > 3)
+          { print "\n$day $mon $year $prefix/$output"; }
+	$output = "";
+	$count = 0;
+      }
+    if (/<h2>(Programs.+)<\/h2>/)
+      {
+        $prefix = $1;
+      }
+  }
+
+if ($output)
+  { print "\n$day $mon $year $prefix/$output"; $output = ""; }
diff --git a/final/utils/plotNLT.pl b/final/utils/plotNLT.pl
new file mode 100644
index 00000000000..55d503d6893
--- /dev/null
+++ b/final/utils/plotNLT.pl
@@ -0,0 +1,53 @@
+#!/usr/bin/perl
+#takes a test and a program from a dp and produces a gnuplot script
+#use like perl plotNLT.pl password Programs/MultiSource/Benchmarks/ASCI_Purple/SMG2000/smg2000 llc
+
+use DBI;
+
+# database information
+$db="llvmalpha";
+$host="localhost";
+$userid="llvmdbuser";
+$passwd=shift @ARGV;
+$connectionInfo="dbi:mysql:$db;$host";
+
+# make connection to database
+$dbh = DBI->connect($connectionInfo,$userid,$passwd) or die DBI->errstr;
+
+
+$count = @ARGV / 2;
+
+print "set xdata time\n";
+print 'set timefmt "%Y-%m-%d"';
+print "\nplot";
+for ($iter = 0; $iter < $count; $iter++) {
+  if ($iter)
+    { print ","; }
+  print " '-' using 1:2 with lines";
+}
+
+print "\n";
+
+for ($iter = 0; $iter < $count; $iter++) {
+
+  $prog = shift @ARGV;
+  $test = shift @ARGV;
+
+  $query = "Select RUN, VALUE from Tests where TEST = '$test' AND NAME = '$prog' ORDER BY RUN";
+  #print "\n$query\n";
+  
+  my $sth = $dbh->prepare( $query) || die "Can't prepare statement: $DBI::errstr";;
+  
+  my $rc = $sth->execute or die DBI->errstr;
+  
+  while(($da,$v) = $sth->fetchrow_array)
+    {
+      print "$da $v\n";
+    }
+  
+  print "e\n";
+}
+
+
+# disconnect from database
+$dbh->disconnect;
diff --git a/final/utils/profile.pl b/final/utils/profile.pl
new file mode 100755
index 00000000000..318011560bc
--- /dev/null
+++ b/final/utils/profile.pl
@@ -0,0 +1,74 @@
+#!/usr/bin/perl -w
+#
+# Program:  profile.pl
+#
+# Synopsis: Insert instrumentation code into a program, run it with the JIT,
+#           then print out a profile report.
+#
+# Syntax:   profile.pl [OPTIONS] bitcodefile <arguments>
+#
+# OPTIONS may include one or more of the following:
+#     -block    - Enable basicblock profiling
+#     -edge     - Enable edge profiling
+#     -function - Enable function profiling
+#     -o <filename> - Emit profiling information to the specified file, instead
+#                     of llvmprof.out
+#
+# Any unrecognized options are passed into the invocation of llvm-prof
+#
+
+my $ProfilePass = "-insert-edge-profiling";
+
+my $LLVMProfOpts = "";
+my $ProgramOpts = "";
+my $ProfileFile = "";
+
+# Parse arguments...
+while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
+  shift;
+  last if /^--$/;  # Stop processing arguments on --
+
+  # List command line options here...
+  if (/^-?-block$/)    { $ProfilePass = "-insert-block-profiling"; next; }
+  if (/^-?-edge$/)     { $ProfilePass = "-insert-edge-profiling"; next; }
+  if (/^-?-function$/) { $ProfilePass = "-insert-function-profiling"; next; }
+  if (/^-?-o$/) {         # Read -o filename...
+    die "-o option requires a filename argument!" if (!scalar(@ARGV));
+    $ProgramOpts .= " -llvmprof-output $ARGV[0]";
+    $ProfileFile = $ARGV[0];
+    shift;
+    next;
+  }
+  if (/^-?-help$/) {
+    print "OVERVIEW: profile.pl - Instrumentation and profile printer.\n\n";
+    print "USAGE: profile.pl [options] program.bc <program args>\n\n";
+    print "OPTIONS:\n";
+    print "  -block    - Enable basicblock profiling\n";
+    print "  -edge     - Enable edge profiling\n";
+    print "  -function - Enable function profiling\n";
+    print "  -o <file> - Specify an output file other than llvm-prof.out.\n";
+    print "  -help     - Print this usage information\n";
+    print "\nAll other options are passed into llvm-prof.\n";
+    exit 1;
+  }
+
+  # Otherwise, pass the option on to llvm-prof
+  $LLVMProfOpts .= " " . $_;
+}
+
+die "Must specify LLVM bitcode file as first argument!" if (@ARGV == 0);
+
+my $BytecodeFile = $ARGV[0];
+
+shift @ARGV;
+
+my $libdir = `llvm-config --libdir`;
+chomp $libdir;
+
+my $LibProfPath = $libdir . "/profile_rt.so";
+
+system "opt -q -f $ProfilePass $BytecodeFile -o $BytecodeFile.inst";
+system "lli -fake-argv0 '$BytecodeFile' -load $LibProfPath " .
+       "$BytecodeFile.inst $ProgramOpts " . (join ' ', @ARGV);
+system "rm $BytecodeFile.inst";
+system "llvm-prof $LLVMProfOpts $BytecodeFile $ProfileFile";
diff --git a/final/utils/release/test-release.sh b/final/utils/release/test-release.sh
new file mode 100755
index 00000000000..540d38729d4
--- /dev/null
+++ b/final/utils/release/test-release.sh
@@ -0,0 +1,381 @@
+#!/bin/bash
+#===-- test-release.sh - Test the LLVM release candidates ------------------===#
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License.
+#
+#===------------------------------------------------------------------------===#
+#
+# Download, build, and test the release candidate for an LLVM release.
+#
+#===------------------------------------------------------------------------===#
+
+set -e                          # Exit if any command fails
+
+Release=""
+Release_no_dot=""
+RC=""
+do_checkout="yes"
+do_ada="no"
+do_objc="yes"
+do_fortran="yes"
+do_64bit="yes"
+BuildDir="`pwd`"
+
+function usage() {
+    echo "usage: `basename $0` -release X.Y -rc NUM [OPTIONS]"
+    echo ""
+    echo " -release X.Y      The release number to test."
+    echo " -rc NUM           The pre-release candidate number."
+    echo " -j NUM            Number of compile jobs to run. [default: 3]"
+    echo " -build-dir DIR    Directory to perform testing in. [default: pwd]"
+    echo " -no-checkout      Don't checkout the sources from SVN."
+    echo " -no-64bit         Don't test the 64-bit version. [default: yes]"
+    echo " -enable-ada       Build Ada. [default: disable]"
+    echo " -disable-objc     Disable ObjC build. [default: enable]"
+    echo " -disable-fortran  Disable Fortran build. [default: enable]"
+}
+
+while [ $# -gt 0 ]; do
+    case $1 in
+        -release | --release )
+            shift
+            Release="$1"
+            Release_no_dot="`echo $1 | sed -e 's,\.,,'`"
+            ;;
+        -rc | --rc | -RC | --RC )
+            shift
+            RC=$1
+            ;;
+        -j* )
+            NumJobs="`echo $1 | sed -e 's,-j\([0-9]*\),\1,g'`"
+            if [ -z "$NumJobs" ]; then
+                shift
+                NumJobs="$1"
+            fi
+            ;;
+        -build-dir | --build-dir | -builddir | --builddir )
+            shift
+            BuildDir="$1"
+            ;;
+        -no-checkout | --no-checkout )
+            do_checkout="no"
+            ;;
+        -no-64bit | --no-64bit )
+            do_64bit="no"
+            ;;
+        -enable-ada | --enable-ada )
+            do_ada="yes"
+            ;;
+        -disable-objc | --disable-objc )
+            do_objc="no"
+            ;;
+        -disable-fortran | --disable-fortran )
+            echo "WARNING: Do you *really* need to disable Fortran?"
+            sleep 5
+            do_fortran="no"
+            ;;
+        -help | --help | -h | --h | -\? )
+            usage
+            exit 0
+            ;;
+        * )
+            echo "unknown option: $1"
+            usage
+            exit 1
+            ;;
+    esac
+    shift
+done
+
+# Check required arguments.
+if [ -z "$Release" ]; then
+    echo "No release number specified!"
+    exit 1
+fi
+if [ -z "$RC" ]; then
+    echo "No release candidate number specified!"
+    exit 1
+fi
+
+# Figure out how many make processes to run.
+if [ -z "$NumJobs" ]; then
+    NumJobs=`sysctl -n hw.activecpu 2> /dev/null || true`
+fi
+if [ -z "$NumJobs" ]; then
+    NumJobs=`sysctl -n hw.ncpu 2> /dev/null || true`
+fi
+if [ -z "$NumJobs" ]; then
+    NumJobs=`grep -c processor /proc/cpuinfo 2> /dev/null || true`
+fi
+if [ -z "$NumJobs" ]; then
+    NumJobs=3
+fi
+
+# Location of sources.
+llvmCore_srcdir=$BuildDir/llvmCore-$Release-rc$RC.src
+llvmgcc42_srcdir=$BuildDir/llvmgcc42-$Release-rc$RC.src
+
+# SVN URLs for the sources.
+Base_url="http://llvm.org/svn/llvm-project"
+llvmCore_RC_url="$Base_url/llvm/tags/RELEASE_$Release_no_dot/rc$RC"
+llvmgcc42_RC_url="$Base_url/llvm-gcc-4.2/tags/RELEASE_$Release_no_dot/rc$RC"
+clang_RC_url="$Base_url/cfe/tags/RELEASE_$Release_no_dot/rc$RC"
+test_suite_RC_url="$Base_url/test-suite/tags/RELEASE_$Release_no_dot/rc$RC"
+
+# Make sure that the URLs are valid.
+function check_valid_urls() {
+    echo "# Validating SVN URLs"
+    if ! svn ls $llvmCore_RC_url > /dev/null 2>&1 ; then
+        echo "llvm $Release release candidate $RC doesn't exist!"
+        exit 1
+    fi
+    if ! svn ls $llvmgcc42_RC_url > /dev/null 2>&1 ; then
+        echo "llvm-gcc-4.2 $Release release candidate $RC doesn't exist!"
+        exit 1
+    fi
+    if ! svn ls $clang_RC_url > /dev/null 2>&1 ; then
+        echo "clang $Release release candidate $RC doesn't exist!"
+        exit 1
+    fi
+    if ! svn ls $test_suite_RC_url > /dev/null 2>&1 ; then
+        echo "test-suite $Release release candidate $RC doesn't exist!"
+        exit 1
+    fi
+}
+
+# Export sources to the the build directory.
+function export_sources() {
+    check_valid_urls
+
+    echo "# Exporting llvm $Release-RC$RC sources"
+    svn export -q $llvmCore_RC_url $llvmCore_srcdir
+    echo "# Exporting llvm-gcc-4.2 $Release-rc$RC  sources"
+    svn export -q $llvmgcc42_RC_url $llvmgcc42_srcdir
+    echo "# Exporting clang $Release-rc$RC sources"
+    svn export -q $clang_RC_url $llvmCore_srcdir/tools/clang
+    echo "# Exporting llvm test suite $Release-rc$RC sources"
+    svn export -q $test_suite_RC_url $llvmCore_srcdir/projects/llvm-test
+}
+
+function configure_llvmCore() {
+    Phase="$1"
+    Flavor="$2"
+    ObjDir="$3"
+    InstallDir="$4"
+    llvmgccDir="$5"
+
+    case $Flavor in
+        Release | Release-64 )
+            Optimized="yes"
+            Assertions="no"
+            ;;
+        Release+Asserts )
+            Optimized="yes"
+            Assertions="yes"
+            ;;
+        Debug )
+            Optimized="no"
+            Assertions="yes"
+            ;;
+        * )
+            echo "# Invalid flavor $Flavor!"
+            echo ""
+            return
+            ;;
+    esac
+
+    cd $ObjDir
+    echo "# Configuring llvm $Release-rc$RC $Flavor"
+    echo "# $llvmCore_srcdir/configure --prefix=$InstallDir \
+        --enable-optimized=$Optimized \
+        --enable-assertions=$Assertions \
+        --with-llvmgccdir=$llvmgccDir"
+    $llvmCore_srcdir/configure --prefix=$InstallDir \
+        --enable-optimized=$Optimized \
+        --enable-assertions=$Assertions \
+        --with-llvmgccdir=$llvmgccDir
+    cd -
+}
+
+function build_llvmCore() {
+    Phase="$1"
+    Flavor="$2"
+    ObjDir="$3"
+    ExtraOpts=""
+
+    CompilerFlags=""
+    if [ "$Phase" = "2" ]; then
+        CompilerFlags="CC=$llvmgccDir/bin/llvm-gcc CXX=$llvmgccDir/bin/llvm-g++"
+    fi
+    if [ "$Flavor" = "Release-64" ]; then
+        ExtraOpts="EXTRA_OPTIONS=-m64"
+    fi
+
+    cd $ObjDir
+    echo "# Compiling llvm $Release-rc$RC $Flavor"
+    echo "# make -j $NumJobs VERBOSE=1 $ExtraOpts"
+    make -j $NumJobs VERBOSE=1 $ExtraOpts $CompilerFlags
+
+    echo "# Installing llvm $Release-rc$RC $Flavor"
+    echo "# make install"
+    make install
+    cd -
+}
+
+function test_llvmCore() {
+    Phase="$1"
+    Flavor="$2"
+    ObjDir="$3"
+
+    cd $ObjDir
+    make check
+    make -C tools/clang test
+    make unittests
+    cd -
+}
+
+function configure_llvm_gcc() {
+    Phase="$1"
+    Flavor="$2"
+    ObjDir="$3"
+    InstallDir="$4"
+    llvmObjDir="$5"
+
+    languages="c,c++"
+    if [ "$do_objc" = "yes" ]; then
+        languages="$languages,objc,obj-c++"
+    fi
+    if [ "$do_fortran" = "yes" ]; then
+        languages="$languages,fortran"
+    fi
+    if [ "$do_ada" = "yes" ]; then
+        languages="$languages,ada"
+    fi
+
+    cd $ObjDir
+    echo "# Configuring llvm-gcc $Release-rc$RC $Flavor"
+    echo "# $llvmgcc42_srcdir/configure --prefix=$InstallDir \
+        --program-prefix=llvm- --enable-llvm=$llvmObjDir \
+        --enable-languages=$languages"
+    $llvmgcc42_srcdir/configure --prefix=$InstallDir \
+        --program-prefix=llvm- --enable-llvm=$llvmObjDir \
+        --enable-languages=$languages
+    cd -
+}
+
+function build_llvm_gcc() {
+    Phase="$1"
+    Flavor="$2"
+    ObjDir="$3"
+    llvmgccDir="$4"
+
+    CompilerFlags=""
+    if [ "$Phase" = "2" ]; then
+        CompilerFlags="CC=$llvmgccDir/bin/llvm-gcc CXX=$llvmgccDir/bin/llvm-g++"
+    fi
+
+    cd $ObjDir
+    echo "# Compiling llvm-gcc $Release-rc$RC $Flavor"
+    echo "# make -j $NumJobs bootstrap LLVM_VERSION_INFO=$Release"
+    make -j $NumJobs bootstrap LLVM_VERSION_INFO=$Release $CompilerFlags
+
+    echo "# Installing llvm-gcc $Release-rc$RC $Flavor"
+    echo "# make install"
+    make install
+    cd -
+}
+
+if [ "$do_checkout" = "yes" ]; then
+    export_sources
+fi
+
+Flavors="Debug Release Release+Asserts"
+if [ "$do_64bit" = "yes" ]; then
+    Flavors="$Flavors Release-64"
+fi
+
+for Flavor in $Flavors ; do
+    echo ""
+    echo ""
+    echo "********************************************************************************"
+    echo "  Release:     $Release-rc$RC"
+    echo "  Build:       $Flavor"
+    echo "  System Info: "
+    echo "    `uname -a`"
+    echo "********************************************************************************"
+    echo ""
+
+    llvmCore_phase1_objdir=$BuildDir/Phase1/$Flavor/llvmCore-$Release-rc$RC.obj
+    llvmCore_phase1_installdir=$BuildDir/Phase1/$Flavor/llvmCore-$Release-rc$RC.install
+
+    llvmCore_phase2_objdir=$BuildDir/Phase2/$Flavor/llvmCore-$Release-rc$RC.obj
+    llvmCore_phase2_installdir=$BuildDir/Phase2/$Flavor/llvmCore-$Release-rc$RC.install
+
+    rm -rf $llvmCore_phase1_objdir
+    rm -rf $llvmCore_phase1_installdir
+    rm -rf $llvmCore_phase2_objdir
+    rm -rf $llvmCore_phase2_installdir
+
+    mkdir -p $llvmCore_phase1_objdir
+    mkdir -p $llvmCore_phase1_installdir
+    mkdir -p $llvmCore_phase2_objdir
+    mkdir -p $llvmCore_phase2_installdir
+
+    llvmgcc42_phase1_objdir=$BuildDir/Phase1/$Flavor/llvmgcc42-$Release-rc$RC.obj
+    llvmgcc42_phase1_installdir=$BuildDir/Phase1/$Flavor/llvmgcc42-$Release-rc$RC.install
+
+    llvmgcc42_phase2_objdir=$BuildDir/Phase2/$Flavor/llvmgcc42-$Release-rc$RC.obj
+    llvmgcc42_phase2_installdir=$BuildDir/Phase2/$Flavor/llvmgcc42-$Release-rc$RC.install
+
+    rm -rf $llvmgcc42_phase1_objdir
+    rm -rf $llvmgcc42_phase1_installdir
+    rm -rf $llvmgcc42_phase2_objdir
+    rm -rf $llvmgcc42_phase2_installdir
+
+    mkdir -p $llvmgcc42_phase1_objdir
+    mkdir -p $llvmgcc42_phase1_installdir
+    mkdir -p $llvmgcc42_phase2_objdir
+    mkdir -p $llvmgcc42_phase2_installdir
+
+    ############################################################################
+    # Phase 1: Build llvmCore and llvmgcc42
+    echo "# Phase 1: Building llvmCore"
+    configure_llvmCore 1 $Flavor \
+        $llvmCore_phase1_objdir $llvmCore_phase1_installdir \
+        $llvmgcc42_phase1_installdir
+    build_llvmCore 1 $Flavor \
+        $llvmCore_phase1_objdir
+
+    echo "# Phase 1: Building llvmgcc42"
+    configure_llvm_gcc 1 $Flavor \
+        $llvmgcc42_phase1_objdir $llvmgcc42_phase1_installdir \
+        $llvmCore_phase1_objdir
+    build_llvm_gcc 1 $Flavor \
+        $llvmgcc42_phase1_objdir $llvmgcc42_phase1_installdir
+
+    ############################################################################
+    # Phase 2: Build llvmCore with newly built llvmgcc42 from phase 1.
+    echo "# Phase 2: Building llvmCore"
+    configure_llvmCore 2 $Flavor \
+        $llvmCore_phase2_objdir $llvmCore_phase2_installdir \
+        $llvmgcc42_phase1_installdir
+    build_llvmCore 2 $Flavor \
+        $llvmCore_phase2_objdir
+
+    echo "# Phase 2: Building llvmgcc42"
+    configure_llvm_gcc 2 $Flavor \
+        $llvmgcc42_phase2_objdir $llvmgcc42_phase2_installdir \
+        $llvmCore_phase2_objdir
+    build_llvm_gcc 2 $Flavor \
+        $llvmgcc42_phase2_objdir $llvmgcc42_phase1_installdir
+
+    echo "# Testing - built with llvmgcc42"
+    test_llvmCore 2 $Flavor $llvmCore_phase2_objdir
+done
+
+# Woo hoo!
+echo "### Testing Finished ###"
+exit 0
diff --git a/final/utils/test_debuginfo.pl b/final/utils/test_debuginfo.pl
new file mode 100755
index 00000000000..fb61fb02616
--- /dev/null
+++ b/final/utils/test_debuginfo.pl
@@ -0,0 +1,61 @@
+#!/usr/bin/perl
+#
+# This script tests debugging information generated by a compiler.
+# Input arguments
+#   - Input source program. Usually this source file is decorated using
+#     special comments to communicate debugger commands.
+#   - Executable file. This file is generated by the compiler.
+#
+# This perl script extracts debugger commands from input source program 
+# comments in a script. A debugger is used to load the executable file
+# and run the script generated from source program comments. Finally,
+# the debugger output is checked, using FileCheck, to validate 
+# debugging information.
+
+use File::Basename;
+
+my $testcase_file = $ARGV[0];
+my $executable_file = $ARGV[1];
+
+my $input_filename = basename $testcase_file;
+my $output_dir = dirname $executable_file;
+
+my $debugger_script_file = "$output_dir/$input_filename.debugger.script";
+my $output_file = "$output_dir/$input_filename.gdb.output";
+
+# Extract debugger commands from testcase. They are marked with DEBUGGER: 
+# at the beginnign of a comment line.
+open(INPUT, $testcase_file);
+open(OUTPUT, ">$debugger_script_file");
+while(<INPUT>) {
+    my($line) = $_;
+    $i = index($line, "DEBUGGER:");
+    if ( $i >= 0) {
+        $l = length("DEBUGGER:");
+        $s = substr($line, $i + $l);
+        print OUTPUT  "$s";
+    }
+}
+print OUTPUT "\n";
+print OUTPUT "quit\n";
+close(INPUT);
+close(OUTPUT);
+
+# setup debugger and debugger options to run a script.
+my $my_debugger = $ENV{'DEBUGGER'};
+if (!$my_debugger) {
+    $my_debugger = "gdb";
+}
+my $debugger_options = "-q -batch -n -x";
+
+# run debugger and capture output.
+system("$my_debugger $debugger_options $debugger_script_file $executable_file >& $output_file");
+
+# validate output.
+system("FileCheck", "-input-file", "$output_file", "$testcase_file");
+if ($?>>8 == 1) {
+    exit 1;
+}
+else {
+    exit 0;
+}
diff --git a/final/utils/unittest/CMakeLists.txt b/final/utils/unittest/CMakeLists.txt
new file mode 100644
index 00000000000..29218bb37c7
--- /dev/null
+++ b/final/utils/unittest/CMakeLists.txt
@@ -0,0 +1,41 @@
+########################################################################
+# Experimental CMake build script for Google Test.
+#
+# Consider this a prototype.  It will change drastically.  For now,
+# this is only for people on the cutting edge.
+#
+# To run the tests for Google Test itself on Linux, use 'make test' or
+# ctest.  You can select which tests to run using 'ctest -R regex'.
+# For more options, run 'ctest --help'.
+########################################################################
+#
+# Project-wide settings
+
+# Where gtest's .h files can be found.
+include_directories(
+  googletest/include
+  )
+
+if(WIN32)
+  add_definitions(-DGTEST_OS_WINDOWS=1)
+endif()
+
+if(SUPPORTS_NO_VARIADIC_MACROS_FLAG)
+  add_definitions("-Wno-variadic-macros")
+endif()
+
+set(LLVM_REQUIRES_RTTI 1)
+add_definitions( -DGTEST_HAS_RTTI=0 )
+
+add_llvm_library(gtest
+  googletest/gtest.cc
+  googletest/gtest-death-test.cc
+  googletest/gtest-filepath.cc
+  googletest/gtest-port.cc
+  googletest/gtest-test-part.cc
+  googletest/gtest-typed-test.cc
+  )
+
+add_llvm_library(gtest_main
+  UnitTestMain/TestMain.cpp
+  )
diff --git a/final/utils/unittest/Makefile b/final/utils/unittest/Makefile
new file mode 100644
index 00000000000..6a09341832b
--- /dev/null
+++ b/final/utils/unittest/Makefile
@@ -0,0 +1,13 @@
+##===- utils/unittest/Makefile -----------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+PARALLEL_DIRS = googletest UnitTestMain
+
+include $(LEVEL)/Makefile.common
diff --git a/final/utils/unittest/UnitTestMain/Makefile b/final/utils/unittest/UnitTestMain/Makefile
new file mode 100644
index 00000000000..30827795aff
--- /dev/null
+++ b/final/utils/unittest/UnitTestMain/Makefile
@@ -0,0 +1,32 @@
+##===- utils/unittest/UnitTestMain/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+
+include $(LEVEL)/Makefile.config
+
+LIBRARYNAME = UnitTestMain
+BUILD_ARCHIVE = 1
+REQUIRES_RTTI = 1
+
+CPP.Flags += -I$(LLVM_SRC_ROOT)/utils/unittest/googletest/include
+CPP.Flags += $(NO_MISSING_FIELD_INITIALIZERS) $(NO_VARIADIC_MACROS)
+CPP.Flags += -DGTEST_HAS_RTTI=0
+# libstdc++'s TR1 <tuple> header depends on RTTI and uses C++'0x features not
+# supported by Clang, so force googletest to use its own tuple implementation.
+CPP.Flags += -DGTEST_USE_OWN_TR1_TUPLE
+
+# Disable pthreads if LLVM was configured without them.
+ifneq ($(HAVE_PTHREAD), 1)
+  CPP.Flags += -DGTEST_HAS_PTHREAD=0
+endif
+
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/utils/unittest/UnitTestMain/TestMain.cpp b/final/utils/unittest/UnitTestMain/TestMain.cpp
new file mode 100644
index 00000000000..b35bae5abfb
--- /dev/null
+++ b/final/utils/unittest/UnitTestMain/TestMain.cpp
@@ -0,0 +1,42 @@
+//===--- utils/unittest/UnitTestMain/TestMain.cpp - unittest driver -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/Support/Signals.h"
+#include "gtest/gtest.h"
+
+
+#if defined(LLVM_ON_WIN32)
+# include <windows.h>
+# if defined(_MSC_VER)
+#   include <crtdbg.h>
+# endif
+#endif
+
+int main(int argc, char **argv) {
+  llvm::sys::PrintStackTraceOnErrorSignal();
+  testing::InitGoogleTest(&argc, argv);
+
+# if defined(LLVM_ON_WIN32)
+  // Disable all of the possible ways Windows conspires to make automated
+  // testing impossible.
+  ::SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX);
+#   if defined(_MSC_VER)
+    ::_set_error_mode(_OUT_TO_STDERR);
+    _CrtSetReportMode(_CRT_WARN, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
+    _CrtSetReportFile(_CRT_WARN, _CRTDBG_FILE_STDERR);
+    _CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
+    _CrtSetReportFile(_CRT_ERROR, _CRTDBG_FILE_STDERR);
+    _CrtSetReportMode(_CRT_ASSERT, _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
+    _CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
+#   endif
+# endif
+
+  return RUN_ALL_TESTS();
+}
diff --git a/final/utils/unittest/googletest/LICENSE.TXT b/final/utils/unittest/googletest/LICENSE.TXT
new file mode 100644
index 00000000000..1941a11f8ce
--- /dev/null
+++ b/final/utils/unittest/googletest/LICENSE.TXT
@@ -0,0 +1,28 @@
+Copyright 2008, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/final/utils/unittest/googletest/Makefile b/final/utils/unittest/googletest/Makefile
new file mode 100644
index 00000000000..21b29ffc2c3
--- /dev/null
+++ b/final/utils/unittest/googletest/Makefile
@@ -0,0 +1,39 @@
+##===- utils/unittest/googletest/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../../..
+
+include $(LEVEL)/Makefile.config
+
+LIBRARYNAME = GoogleTest
+BUILD_ARCHIVE = 1
+REQUIRES_RTTI = 1
+
+# Note that these flags are duplicated when building individual tests in
+# unittests/Makefile.unittest and ../UnitTestMain/Makefile; ensure that any
+# changes are made to both.
+CPP.Flags += -I$(LLVM_SRC_ROOT)/utils/unittest/googletest/include
+CPP.Flags += $(NO_MISSING_FIELD_INITIALIZERS) $(NO_VARIADIC_MACROS)
+CPP.Flags += -DGTEST_HAS_RTTI=0
+# libstdc++'s TR1 <tuple> header depends on RTTI and uses C++'0x features not
+# supported by Clang, so force googletest to use its own tuple implementation.
+CPP.Flags += -DGTEST_USE_OWN_TR1_TUPLE
+
+# Disable pthreads if LLVM was configured without them.
+ifneq ($(HAVE_PTHREAD), 1)
+  CPP.Flags += -DGTEST_HAS_PTHREAD=0
+endif
+
+ifeq ($(HOST_OS),MingW)
+  CPP.Flags += -DGTEST_OS_WINDOWS=1
+endif
+
+NO_INSTALL = 1
+
+include $(LEVEL)/Makefile.common
diff --git a/final/utils/unittest/googletest/README.LLVM b/final/utils/unittest/googletest/README.LLVM
new file mode 100644
index 00000000000..d6e6f987228
--- /dev/null
+++ b/final/utils/unittest/googletest/README.LLVM
@@ -0,0 +1,31 @@
+LLVM notes
+----------
+
+This directory contains Google Test 1.5.0, with all elements removed except for
+the actual source code, to minimize the addition to the LLVM distribution.
+
+Cleaned up as follows:
+
+# Remove all the unnecessary files and directories
+$ rm -f aclocal* CMakeLists.txt configure* Makefile* CHANGES CONTRIBUTORS README
+$ rm -rf build-aux codegear fused-src m4 make msvc samples scripts test xcode
+$ rm -f `find . -name \*\.pump`
+
+# Move all the source files to the current directory
+$ mv src/* .
+$ rmdir src
+
+# Move extra headers into the already-existing internal headers dir
+$ mv *.h include/gtest/internal/
+
+# Update paths to the included files
+$ perl -pi -e 's|^#include "src/|#include "gtest/internal/|' *.cc
+
+$ mv COPYING LICENSE.TXT
+
+
+Modified as follows:
+* To GTestStreamToHelper in include/gtest/internal/gtest-internal.h,
+  added the ability to stream with raw_os_ostream.
+* To refresh Haiku support in include/gtest/internal/gtest-port.h,
+  see http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20100621/102898.html
diff --git a/final/utils/unittest/googletest/gtest-death-test.cc b/final/utils/unittest/googletest/gtest-death-test.cc
new file mode 100644
index 00000000000..e4199de3dc4
--- /dev/null
+++ b/final/utils/unittest/googletest/gtest-death-test.cc
@@ -0,0 +1,1172 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan), vladl@google.com (Vlad Losev)
+//
+// This file implements death tests.
+
+#include <gtest/gtest-death-test.h>
+#include <gtest/internal/gtest-port.h>
+
+#if GTEST_HAS_DEATH_TEST
+
+#if GTEST_OS_MAC
+#include <crt_externs.h>
+#endif  // GTEST_OS_MAC
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdarg.h>
+
+#if GTEST_OS_WINDOWS
+#include <windows.h>
+#else
+#include <sys/mman.h>
+#include <sys/wait.h>
+#endif  // GTEST_OS_WINDOWS
+
+#endif  // GTEST_HAS_DEATH_TEST
+
+#include <gtest/gtest-message.h>
+#include <gtest/internal/gtest-string.h>
+
+// Indicates that this translation unit is part of Google Test's
+// implementation.  It must come before gtest-internal-inl.h is
+// included, or there will be a compiler error.  This trick is to
+// prevent a user from accidentally including gtest-internal-inl.h in
+// his code.
+#define GTEST_IMPLEMENTATION_ 1
+#include "gtest/internal/gtest-internal-inl.h"
+#undef GTEST_IMPLEMENTATION_
+
+namespace testing {
+
+// Constants.
+
+// The default death test style.
+static const char kDefaultDeathTestStyle[] = "fast";
+
+GTEST_DEFINE_string_(
+    death_test_style,
+    internal::StringFromGTestEnv("death_test_style", kDefaultDeathTestStyle),
+    "Indicates how to run a death test in a forked child process: "
+    "\"threadsafe\" (child process re-executes the test binary "
+    "from the beginning, running only the specific death test) or "
+    "\"fast\" (child process runs the death test immediately "
+    "after forking).");
+
+GTEST_DEFINE_bool_(
+    death_test_use_fork,
+    internal::BoolFromGTestEnv("death_test_use_fork", false),
+    "Instructs to use fork()/_exit() instead of clone() in death tests. "
+    "Ignored and always uses fork() on POSIX systems where clone() is not "
+    "implemented. Useful when running under valgrind or similar tools if "
+    "those do not support clone(). Valgrind 3.3.1 will just fail if "
+    "it sees an unsupported combination of clone() flags. "
+    "It is not recommended to use this flag w/o valgrind though it will "
+    "work in 99% of the cases. Once valgrind is fixed, this flag will "
+    "most likely be removed.");
+
+namespace internal {
+GTEST_DEFINE_string_(
+    internal_run_death_test, "",
+    "Indicates the file, line number, temporal index of "
+    "the single death test to run, and a file descriptor to "
+    "which a success code may be sent, all separated by "
+    "colons.  This flag is specified if and only if the current "
+    "process is a sub-process launched for running a thread-safe "
+    "death test.  FOR INTERNAL USE ONLY.");
+}  // namespace internal
+
+#if GTEST_HAS_DEATH_TEST
+
+// ExitedWithCode constructor.
+ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) {
+}
+
+// ExitedWithCode function-call operator.
+bool ExitedWithCode::operator()(int exit_status) const {
+#if GTEST_OS_WINDOWS
+  return exit_status == exit_code_;
+#else
+  return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_;
+#endif  // GTEST_OS_WINDOWS
+}
+
+#if !GTEST_OS_WINDOWS
+// KilledBySignal constructor.
+KilledBySignal::KilledBySignal(int signum) : signum_(signum) {
+}
+
+// KilledBySignal function-call operator.
+bool KilledBySignal::operator()(int exit_status) const {
+  return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_;
+}
+#endif  // !GTEST_OS_WINDOWS
+
+namespace internal {
+
+// Utilities needed for death tests.
+
+// Generates a textual description of a given exit code, in the format
+// specified by wait(2).
+static String ExitSummary(int exit_code) {
+  Message m;
+#if GTEST_OS_WINDOWS
+  m << "Exited with exit status " << exit_code;
+#else
+  if (WIFEXITED(exit_code)) {
+    m << "Exited with exit status " << WEXITSTATUS(exit_code);
+  } else if (WIFSIGNALED(exit_code)) {
+    m << "Terminated by signal " << WTERMSIG(exit_code);
+  }
+#ifdef WCOREDUMP
+  if (WCOREDUMP(exit_code)) {
+    m << " (core dumped)";
+  }
+#endif
+#endif  // GTEST_OS_WINDOWS
+  return m.GetString();
+}
+
+// Returns true if exit_status describes a process that was terminated
+// by a signal, or exited normally with a nonzero exit code.
+bool ExitedUnsuccessfully(int exit_status) {
+  return !ExitedWithCode(0)(exit_status);
+}
+
+#if !GTEST_OS_WINDOWS
+// Generates a textual failure message when a death test finds more than
+// one thread running, or cannot determine the number of threads, prior
+// to executing the given statement.  It is the responsibility of the
+// caller not to pass a thread_count of 1.
+static String DeathTestThreadWarning(size_t thread_count) {
+  Message msg;
+  msg << "Death tests use fork(), which is unsafe particularly"
+      << " in a threaded context. For this test, " << GTEST_NAME_ << " ";
+  if (thread_count == 0)
+    msg << "couldn't detect the number of threads.";
+  else
+    msg << "detected " << thread_count << " threads.";
+  return msg.GetString();
+}
+#endif  // !GTEST_OS_WINDOWS
+
+// Flag characters for reporting a death test that did not die.
+static const char kDeathTestLived = 'L';
+static const char kDeathTestReturned = 'R';
+static const char kDeathTestInternalError = 'I';
+
+// An enumeration describing all of the possible ways that a death test
+// can conclude.  DIED means that the process died while executing the
+// test code; LIVED means that process lived beyond the end of the test
+// code; and RETURNED means that the test statement attempted a "return,"
+// which is not allowed.  IN_PROGRESS means the test has not yet
+// concluded.
+enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED };
+
+// Routine for aborting the program which is safe to call from an
+// exec-style death test child process, in which case the error
+// message is propagated back to the parent process.  Otherwise, the
+// message is simply printed to stderr.  In either case, the program
+// then exits with status 1.
+void DeathTestAbort(const String& message) {
+  // On a POSIX system, this function may be called from a threadsafe-style
+  // death test child process, which operates on a very small stack.  Use
+  // the heap for any additional non-minuscule memory requirements.
+  const InternalRunDeathTestFlag* const flag =
+      GetUnitTestImpl()->internal_run_death_test_flag();
+  if (flag != NULL) {
+    FILE* parent = posix::FDOpen(flag->write_fd(), "w");
+    fputc(kDeathTestInternalError, parent);
+    fprintf(parent, "%s", message.c_str());
+    fflush(parent);
+    _exit(1);
+  } else {
+    fprintf(stderr, "%s", message.c_str());
+    fflush(stderr);
+    abort();
+  }
+}
+
+// A replacement for CHECK that calls DeathTestAbort if the assertion
+// fails.
+#define GTEST_DEATH_TEST_CHECK_(expression) \
+  do { \
+    if (!::testing::internal::IsTrue(expression)) { \
+      DeathTestAbort(::testing::internal::String::Format( \
+          "CHECK failed: File %s, line %d: %s", \
+          __FILE__, __LINE__, #expression)); \
+    } \
+  } while (::testing::internal::AlwaysFalse())
+
+// This macro is similar to GTEST_DEATH_TEST_CHECK_, but it is meant for
+// evaluating any system call that fulfills two conditions: it must return
+// -1 on failure, and set errno to EINTR when it is interrupted and
+// should be tried again.  The macro expands to a loop that repeatedly
+// evaluates the expression as long as it evaluates to -1 and sets
+// errno to EINTR.  If the expression evaluates to -1 but errno is
+// something other than EINTR, DeathTestAbort is called.
+#define GTEST_DEATH_TEST_CHECK_SYSCALL_(expression) \
+  do { \
+    int gtest_retval; \
+    do { \
+      gtest_retval = (expression); \
+    } while (gtest_retval == -1 && errno == EINTR); \
+    if (gtest_retval == -1) { \
+      DeathTestAbort(::testing::internal::String::Format( \
+          "CHECK failed: File %s, line %d: %s != -1", \
+          __FILE__, __LINE__, #expression)); \
+    } \
+  } while (::testing::internal::AlwaysFalse())
+
+// Returns the message describing the last system error in errno.
+String GetLastErrnoDescription() {
+    return String(errno == 0 ? "" : posix::StrError(errno));
+}
+
+// This is called from a death test parent process to read a failure
+// message from the death test child process and log it with the FATAL
+// severity. On Windows, the message is read from a pipe handle. On other
+// platforms, it is read from a file descriptor.
+static void FailFromInternalError(int fd) {
+  Message error;
+  char buffer[256];
+  int num_read;
+
+  do {
+    while ((num_read = posix::Read(fd, buffer, 255)) > 0) {
+      buffer[num_read] = '\0';
+      error << buffer;
+    }
+  } while (num_read == -1 && errno == EINTR);
+
+  if (num_read == 0) {
+    GTEST_LOG_(FATAL) << error.GetString();
+  } else {
+    const int last_error = errno;
+    GTEST_LOG_(FATAL) << "Error while reading death test internal: "
+                      << GetLastErrnoDescription() << " [" << last_error << "]";
+  }
+}
+
+// Death test constructor.  Increments the running death test count
+// for the current test.
+DeathTest::DeathTest() {
+  TestInfo* const info = GetUnitTestImpl()->current_test_info();
+  if (info == NULL) {
+    DeathTestAbort("Cannot run a death test outside of a TEST or "
+                   "TEST_F construct");
+  }
+}
+
+// Creates and returns a death test by dispatching to the current
+// death test factory.
+bool DeathTest::Create(const char* statement, const RE* regex,
+                       const char* file, int line, DeathTest** test) {
+  return GetUnitTestImpl()->death_test_factory()->Create(
+      statement, regex, file, line, test);
+}
+
+const char* DeathTest::LastMessage() {
+  return last_death_test_message_.c_str();
+}
+
+void DeathTest::set_last_death_test_message(const String& message) {
+  last_death_test_message_ = message;
+}
+
+String DeathTest::last_death_test_message_;
+
+// Provides cross platform implementation for some death functionality.
+class DeathTestImpl : public DeathTest {
+ protected:
+  DeathTestImpl(const char* a_statement, const RE* a_regex)
+      : statement_(a_statement),
+        regex_(a_regex),
+        spawned_(false),
+        status_(-1),
+        outcome_(IN_PROGRESS),
+        read_fd_(-1),
+        write_fd_(-1) {}
+
+  // read_fd_ is expected to be closed and cleared by a derived class.
+  ~DeathTestImpl() { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); }
+
+  void Abort(AbortReason reason);
+  virtual bool Passed(bool status_ok);
+
+  const char* statement() const { return statement_; }
+  const RE* regex() const { return regex_; }
+  bool spawned() const { return spawned_; }
+  void set_spawned(bool is_spawned) { spawned_ = is_spawned; }
+  int status() const { return status_; }
+  void set_status(int a_status) { status_ = a_status; }
+  DeathTestOutcome outcome() const { return outcome_; }
+  void set_outcome(DeathTestOutcome an_outcome) { outcome_ = an_outcome; }
+  int read_fd() const { return read_fd_; }
+  void set_read_fd(int fd) { read_fd_ = fd; }
+  int write_fd() const { return write_fd_; }
+  void set_write_fd(int fd) { write_fd_ = fd; }
+
+  // Called in the parent process only. Reads the result code of the death
+  // test child process via a pipe, interprets it to set the outcome_
+  // member, and closes read_fd_.  Outputs diagnostics and terminates in
+  // case of unexpected codes.
+  void ReadAndInterpretStatusByte();
+
+ private:
+  // The textual content of the code this object is testing.  This class
+  // doesn't own this string and should not attempt to delete it.
+  const char* const statement_;
+  // The regular expression which test output must match.  DeathTestImpl
+  // doesn't own this object and should not attempt to delete it.
+  const RE* const regex_;
+  // True if the death test child process has been successfully spawned.
+  bool spawned_;
+  // The exit status of the child process.
+  int status_;
+  // How the death test concluded.
+  DeathTestOutcome outcome_;
+  // Descriptor to the read end of the pipe to the child process.  It is
+  // always -1 in the child process.  The child keeps its write end of the
+  // pipe in write_fd_.
+  int read_fd_;
+  // Descriptor to the child's write end of the pipe to the parent process.
+  // It is always -1 in the parent process.  The parent keeps its end of the
+  // pipe in read_fd_.
+  int write_fd_;
+};
+
+// Called in the parent process only. Reads the result code of the death
+// test child process via a pipe, interprets it to set the outcome_
+// member, and closes read_fd_.  Outputs diagnostics and terminates in
+// case of unexpected codes.
+void DeathTestImpl::ReadAndInterpretStatusByte() {
+  char flag;
+  int bytes_read;
+
+  // The read() here blocks until data is available (signifying the
+  // failure of the death test) or until the pipe is closed (signifying
+  // its success), so it's okay to call this in the parent before
+  // the child process has exited.
+  do {
+    bytes_read = posix::Read(read_fd(), &flag, 1);
+  } while (bytes_read == -1 && errno == EINTR);
+
+  if (bytes_read == 0) {
+    set_outcome(DIED);
+  } else if (bytes_read == 1) {
+    switch (flag) {
+      case kDeathTestReturned:
+        set_outcome(RETURNED);
+        break;
+      case kDeathTestLived:
+        set_outcome(LIVED);
+        break;
+      case kDeathTestInternalError:
+        FailFromInternalError(read_fd());  // Does not return.
+        break;
+      default:
+        GTEST_LOG_(FATAL) << "Death test child process reported "
+                          << "unexpected status byte ("
+                          << static_cast<unsigned int>(flag) << ")";
+    }
+  } else {
+    GTEST_LOG_(FATAL) << "Read from death test child process failed: "
+                      << GetLastErrnoDescription();
+  }
+  GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Close(read_fd()));
+  set_read_fd(-1);
+}
+
+// Signals that the death test code which should have exited, didn't.
+// Should be called only in a death test child process.
+// Writes a status byte to the child's status file descriptor, then
+// calls _exit(1).
+void DeathTestImpl::Abort(AbortReason reason) {
+  // The parent process considers the death test to be a failure if
+  // it finds any data in our pipe.  So, here we write a single flag byte
+  // to the pipe, then exit.
+  const char status_ch =
+      reason == TEST_DID_NOT_DIE ? kDeathTestLived : kDeathTestReturned;
+  GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Write(write_fd(), &status_ch, 1));
+  GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Close(write_fd()));
+  _exit(1);  // Exits w/o any normal exit hooks (we were supposed to crash)
+}
+
+// Assesses the success or failure of a death test, using both private
+// members which have previously been set, and one argument:
+//
+// Private data members:
+//   outcome:  An enumeration describing how the death test
+//             concluded: DIED, LIVED, or RETURNED.  The death test fails
+//             in the latter two cases.
+//   status:   The exit status of the child process. On *nix, it is in the
+//             in the format specified by wait(2). On Windows, this is the
+//             value supplied to the ExitProcess() API or a numeric code
+//             of the exception that terminated the program.
+//   regex:    A regular expression object to be applied to
+//             the test's captured standard error output; the death test
+//             fails if it does not match.
+//
+// Argument:
+//   status_ok: true if exit_status is acceptable in the context of
+//              this particular death test, which fails if it is false
+//
+// Returns true iff all of the above conditions are met.  Otherwise, the
+// first failing condition, in the order given above, is the one that is
+// reported. Also sets the last death test message string.
+bool DeathTestImpl::Passed(bool status_ok) {
+  if (!spawned())
+    return false;
+
+  const String error_message = GetCapturedStderr();
+
+  bool success = false;
+  Message buffer;
+
+  buffer << "Death test: " << statement() << "\n";
+  switch (outcome()) {
+    case LIVED:
+      buffer << "    Result: failed to die.\n"
+             << " Error msg: " << error_message;
+      break;
+    case RETURNED:
+      buffer << "    Result: illegal return in test statement.\n"
+             << " Error msg: " << error_message;
+      break;
+    case DIED:
+      if (status_ok) {
+        const bool matched = RE::PartialMatch(error_message.c_str(), *regex());
+        if (matched) {
+          success = true;
+        } else {
+          buffer << "    Result: died but not with expected error.\n"
+                 << "  Expected: " << regex()->pattern() << "\n"
+                 << "Actual msg: " << error_message;
+        }
+      } else {
+        buffer << "    Result: died but not with expected exit code:\n"
+               << "            " << ExitSummary(status()) << "\n";
+      }
+      break;
+    case IN_PROGRESS:
+    default:
+      GTEST_LOG_(FATAL)
+          << "DeathTest::Passed somehow called before conclusion of test";
+  }
+
+  DeathTest::set_last_death_test_message(buffer.GetString());
+  return success;
+}
+
+#if GTEST_OS_WINDOWS
+// WindowsDeathTest implements death tests on Windows. Due to the
+// specifics of starting new processes on Windows, death tests there are
+// always threadsafe, and Google Test considers the
+// --gtest_death_test_style=fast setting to be equivalent to
+// --gtest_death_test_style=threadsafe there.
+//
+// A few implementation notes:  Like the Linux version, the Windows
+// implementation uses pipes for child-to-parent communication. But due to
+// the specifics of pipes on Windows, some extra steps are required:
+//
+// 1. The parent creates a communication pipe and stores handles to both
+//    ends of it.
+// 2. The parent starts the child and provides it with the information
+//    necessary to acquire the handle to the write end of the pipe.
+// 3. The child acquires the write end of the pipe and signals the parent
+//    using a Windows event.
+// 4. Now the parent can release the write end of the pipe on its side. If
+//    this is done before step 3, the object's reference count goes down to
+//    0 and it is destroyed, preventing the child from acquiring it. The
+//    parent now has to release it, or read operations on the read end of
+//    the pipe will not return when the child terminates.
+// 5. The parent reads child's output through the pipe (outcome code and
+//    any possible error messages) from the pipe, and its stderr and then
+//    determines whether to fail the test.
+//
+// Note: to distinguish Win32 API calls from the local method and function
+// calls, the former are explicitly resolved in the global namespace.
+//
+class WindowsDeathTest : public DeathTestImpl {
+ public:
+  WindowsDeathTest(const char* statement,
+                   const RE* regex,
+                   const char* file,
+                   int line)
+      : DeathTestImpl(statement, regex), file_(file), line_(line) {}
+
+  // All of these virtual functions are inherited from DeathTest.
+  virtual int Wait();
+  virtual TestRole AssumeRole();
+
+ private:
+  // The name of the file in which the death test is located.
+  const char* const file_;
+  // The line number on which the death test is located.
+  const int line_;
+  // Handle to the write end of the pipe to the child process.
+  AutoHandle write_handle_;
+  // Child process handle.
+  AutoHandle child_handle_;
+  // Event the child process uses to signal the parent that it has
+  // acquired the handle to the write end of the pipe. After seeing this
+  // event the parent can release its own handles to make sure its
+  // ReadFile() calls return when the child terminates.
+  AutoHandle event_handle_;
+};
+
+// Waits for the child in a death test to exit, returning its exit
+// status, or 0 if no child process exists.  As a side effect, sets the
+// outcome data member.
+int WindowsDeathTest::Wait() {
+  if (!spawned())
+    return 0;
+
+  // Wait until the child either signals that it has acquired the write end
+  // of the pipe or it dies.
+  const HANDLE wait_handles[2] = { child_handle_.Get(), event_handle_.Get() };
+  switch (::WaitForMultipleObjects(2,
+                                   wait_handles,
+                                   FALSE,  // Waits for any of the handles.
+                                   INFINITE)) {
+    case WAIT_OBJECT_0:
+    case WAIT_OBJECT_0 + 1:
+      break;
+    default:
+      GTEST_DEATH_TEST_CHECK_(false);  // Should not get here.
+  }
+
+  // The child has acquired the write end of the pipe or exited.
+  // We release the handle on our side and continue.
+  write_handle_.Reset();
+  event_handle_.Reset();
+
+  ReadAndInterpretStatusByte();
+
+  // Waits for the child process to exit if it haven't already. This
+  // returns immediately if the child has already exited, regardless of
+  // whether previous calls to WaitForMultipleObjects synchronized on this
+  // handle or not.
+  GTEST_DEATH_TEST_CHECK_(
+      WAIT_OBJECT_0 == ::WaitForSingleObject(child_handle_.Get(),
+                                             INFINITE));
+  DWORD status;
+  GTEST_DEATH_TEST_CHECK_(::GetExitCodeProcess(child_handle_.Get(), &status)
+                          != FALSE);
+  child_handle_.Reset();
+  set_status(static_cast<int>(status));
+  return this->status();
+}
+
+// The AssumeRole process for a Windows death test.  It creates a child
+// process with the same executable as the current process to run the
+// death test.  The child process is given the --gtest_filter and
+// --gtest_internal_run_death_test flags such that it knows to run the
+// current death test only.
+DeathTest::TestRole WindowsDeathTest::AssumeRole() {
+  const UnitTestImpl* const impl = GetUnitTestImpl();
+  const InternalRunDeathTestFlag* const flag =
+      impl->internal_run_death_test_flag();
+  const TestInfo* const info = impl->current_test_info();
+  const int death_test_index = info->result()->death_test_count();
+
+  if (flag != NULL) {
+    // ParseInternalRunDeathTestFlag() has performed all the necessary
+    // processing.
+    set_write_fd(flag->write_fd());
+    return EXECUTE_TEST;
+  }
+
+  // WindowsDeathTest uses an anonymous pipe to communicate results of
+  // a death test.
+  SECURITY_ATTRIBUTES handles_are_inheritable = {
+    sizeof(SECURITY_ATTRIBUTES), NULL, TRUE };
+  HANDLE read_handle, write_handle;
+  GTEST_DEATH_TEST_CHECK_(
+      ::CreatePipe(&read_handle, &write_handle, &handles_are_inheritable,
+                   0)  // Default buffer size.
+      != FALSE);
+  set_read_fd(::_open_osfhandle(reinterpret_cast<intptr_t>(read_handle),
+                                O_RDONLY));
+  write_handle_.Reset(write_handle);
+  event_handle_.Reset(::CreateEvent(
+      &handles_are_inheritable,
+      TRUE,    // The event will automatically reset to non-signaled state.
+      FALSE,   // The initial state is non-signalled.
+      NULL));  // The even is unnamed.
+  GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != NULL);
+  const String filter_flag = String::Format("--%s%s=%s.%s",
+                                            GTEST_FLAG_PREFIX_, kFilterFlag,
+                                            info->test_case_name(),
+                                            info->name());
+  const String internal_flag = String::Format(
+    "--%s%s=%s|%d|%d|%u|%Iu|%Iu",
+      GTEST_FLAG_PREFIX_,
+      kInternalRunDeathTestFlag,
+      file_, line_,
+      death_test_index,
+      static_cast<unsigned int>(::GetCurrentProcessId()),
+      // size_t has the same with as pointers on both 32-bit and 64-bit
+      // Windows platforms.
+      // See http://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx.
+      reinterpret_cast<size_t>(write_handle),
+      reinterpret_cast<size_t>(event_handle_.Get()));
+
+  char executable_path[_MAX_PATH + 1];  // NOLINT
+  GTEST_DEATH_TEST_CHECK_(
+      _MAX_PATH + 1 != ::GetModuleFileNameA(NULL,
+                                            executable_path,
+                                            _MAX_PATH));
+
+  String command_line = String::Format("%s %s \"%s\"",
+                                       ::GetCommandLineA(),
+                                       filter_flag.c_str(),
+                                       internal_flag.c_str());
+
+  DeathTest::set_last_death_test_message("");
+
+  CaptureStderr();
+  // Flush the log buffers since the log streams are shared with the child.
+  FlushInfoLog();
+
+  // The child process will share the standard handles with the parent.
+  STARTUPINFOA startup_info;
+  memset(&startup_info, 0, sizeof(STARTUPINFO));
+  startup_info.dwFlags = STARTF_USESTDHANDLES;
+  startup_info.hStdInput = ::GetStdHandle(STD_INPUT_HANDLE);
+  startup_info.hStdOutput = ::GetStdHandle(STD_OUTPUT_HANDLE);
+  startup_info.hStdError = ::GetStdHandle(STD_ERROR_HANDLE);
+
+  PROCESS_INFORMATION process_info;
+  GTEST_DEATH_TEST_CHECK_(::CreateProcessA(
+      executable_path,
+      const_cast<char*>(command_line.c_str()),
+      NULL,   // Retuned process handle is not inheritable.
+      NULL,   // Retuned thread handle is not inheritable.
+      TRUE,   // Child inherits all inheritable handles (for write_handle_).
+      0x0,    // Default creation flags.
+      NULL,   // Inherit the parent's environment.
+      UnitTest::GetInstance()->original_working_dir(),
+      &startup_info,
+      &process_info) != FALSE);
+  child_handle_.Reset(process_info.hProcess);
+  ::CloseHandle(process_info.hThread);
+  set_spawned(true);
+  return OVERSEE_TEST;
+}
+#else  // We are not on Windows.
+
+// ForkingDeathTest provides implementations for most of the abstract
+// methods of the DeathTest interface.  Only the AssumeRole method is
+// left undefined.
+class ForkingDeathTest : public DeathTestImpl {
+ public:
+  ForkingDeathTest(const char* statement, const RE* regex);
+
+  // All of these virtual functions are inherited from DeathTest.
+  virtual int Wait();
+
+ protected:
+  void set_child_pid(pid_t child_pid) { child_pid_ = child_pid; }
+
+ private:
+  // PID of child process during death test; 0 in the child process itself.
+  pid_t child_pid_;
+};
+
+// Constructs a ForkingDeathTest.
+ForkingDeathTest::ForkingDeathTest(const char* a_statement, const RE* a_regex)
+    : DeathTestImpl(a_statement, a_regex),
+      child_pid_(-1) {}
+
+// Waits for the child in a death test to exit, returning its exit
+// status, or 0 if no child process exists.  As a side effect, sets the
+// outcome data member.
+int ForkingDeathTest::Wait() {
+  if (!spawned())
+    return 0;
+
+  ReadAndInterpretStatusByte();
+
+  int status_value;
+  GTEST_DEATH_TEST_CHECK_SYSCALL_(waitpid(child_pid_, &status_value, 0));
+  set_status(status_value);
+  return status_value;
+}
+
+// A concrete death test class that forks, then immediately runs the test
+// in the child process.
+class NoExecDeathTest : public ForkingDeathTest {
+ public:
+  NoExecDeathTest(const char* a_statement, const RE* a_regex) :
+      ForkingDeathTest(a_statement, a_regex) { }
+  virtual TestRole AssumeRole();
+};
+
+// The AssumeRole process for a fork-and-run death test.  It implements a
+// straightforward fork, with a simple pipe to transmit the status byte.
+DeathTest::TestRole NoExecDeathTest::AssumeRole() {
+  const size_t thread_count = GetThreadCount();
+  if (thread_count != 1) {
+    GTEST_LOG_(WARNING) << DeathTestThreadWarning(thread_count);
+  }
+
+  int pipe_fd[2];
+  GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1);
+
+  DeathTest::set_last_death_test_message("");
+  CaptureStderr();
+  // When we fork the process below, the log file buffers are copied, but the
+  // file descriptors are shared.  We flush all log files here so that closing
+  // the file descriptors in the child process doesn't throw off the
+  // synchronization between descriptors and buffers in the parent process.
+  // This is as close to the fork as possible to avoid a race condition in case
+  // there are multiple threads running before the death test, and another
+  // thread writes to the log file.
+  FlushInfoLog();
+
+  const pid_t child_pid = fork();
+  GTEST_DEATH_TEST_CHECK_(child_pid != -1);
+  set_child_pid(child_pid);
+  if (child_pid == 0) {
+    GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[0]));
+    set_write_fd(pipe_fd[1]);
+    // Redirects all logging to stderr in the child process to prevent
+    // concurrent writes to the log files.  We capture stderr in the parent
+    // process and append the child process' output to a log.
+    LogToStderr();
+    // Event forwarding to the listeners of event listener API mush be shut
+    // down in death test subprocesses.
+    GetUnitTestImpl()->listeners()->SuppressEventForwarding();
+    return EXECUTE_TEST;
+  } else {
+    GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1]));
+    set_read_fd(pipe_fd[0]);
+    set_spawned(true);
+    return OVERSEE_TEST;
+  }
+}
+
+// A concrete death test class that forks and re-executes the main
+// program from the beginning, with command-line flags set that cause
+// only this specific death test to be run.
+class ExecDeathTest : public ForkingDeathTest {
+ public:
+  ExecDeathTest(const char* a_statement, const RE* a_regex,
+                const char* file, int line) :
+      ForkingDeathTest(a_statement, a_regex), file_(file), line_(line) { }
+  virtual TestRole AssumeRole();
+ private:
+  // The name of the file in which the death test is located.
+  const char* const file_;
+  // The line number on which the death test is located.
+  const int line_;
+};
+
+// Utility class for accumulating command-line arguments.
+class Arguments {
+ public:
+  Arguments() {
+    args_.push_back(NULL);
+  }
+
+  ~Arguments() {
+    for (std::vector<char*>::iterator i = args_.begin(); i != args_.end();
+         ++i) {
+      free(*i);
+    }
+  }
+  void AddArgument(const char* argument) {
+    args_.insert(args_.end() - 1, posix::StrDup(argument));
+  }
+
+  template <typename Str>
+  void AddArguments(const ::std::vector<Str>& arguments) {
+    for (typename ::std::vector<Str>::const_iterator i = arguments.begin();
+         i != arguments.end();
+         ++i) {
+      args_.insert(args_.end() - 1, posix::StrDup(i->c_str()));
+    }
+  }
+  char* const* Argv() {
+    return &args_[0];
+  }
+ private:
+  std::vector<char*> args_;
+};
+
+// A struct that encompasses the arguments to the child process of a
+// threadsafe-style death test process.
+struct ExecDeathTestArgs {
+  char* const* argv;  // Command-line arguments for the child's call to exec
+  int close_fd;       // File descriptor to close; the read end of a pipe
+};
+
+#if GTEST_OS_MAC
+inline char** GetEnviron() {
+  // When Google Test is built as a framework on MacOS X, the environ variable
+  // is unavailable. Apple's documentation (man environ) recommends using
+  // _NSGetEnviron() instead.
+  return *_NSGetEnviron();
+}
+#else
+// Some POSIX platforms expect you to declare environ. extern "C" makes
+// it reside in the global namespace.
+extern "C" char** environ;
+inline char** GetEnviron() { return environ; }
+#endif  // GTEST_OS_MAC
+
+// The main function for a threadsafe-style death test child process.
+// This function is called in a clone()-ed process and thus must avoid
+// any potentially unsafe operations like malloc or libc functions.
+static int ExecDeathTestChildMain(void* child_arg) {
+  ExecDeathTestArgs* const args = static_cast<ExecDeathTestArgs*>(child_arg);
+  GTEST_DEATH_TEST_CHECK_SYSCALL_(close(args->close_fd));
+
+  // We need to execute the test program in the same environment where
+  // it was originally invoked.  Therefore we change to the original
+  // working directory first.
+  const char* const original_dir =
+      UnitTest::GetInstance()->original_working_dir();
+  // We can safely call chdir() as it's a direct system call.
+  if (chdir(original_dir) != 0) {
+    DeathTestAbort(String::Format("chdir(\"%s\") failed: %s",
+                                  original_dir,
+                                  GetLastErrnoDescription().c_str()));
+    return EXIT_FAILURE;
+  }
+
+  // We can safely call execve() as it's a direct system call.  We
+  // cannot use execvp() as it's a libc function and thus potentially
+  // unsafe.  Since execve() doesn't search the PATH, the user must
+  // invoke the test program via a valid path that contains at least
+  // one path separator.
+  execve(args->argv[0], args->argv, GetEnviron());
+  DeathTestAbort(String::Format("execve(%s, ...) in %s failed: %s",
+                                args->argv[0],
+                                original_dir,
+                                GetLastErrnoDescription().c_str()));
+  return EXIT_FAILURE;
+}
+
+// Two utility routines that together determine the direction the stack
+// grows.
+// This could be accomplished more elegantly by a single recursive
+// function, but we want to guard against the unlikely possibility of
+// a smart compiler optimizing the recursion away.
+bool StackLowerThanAddress(const void* ptr) {
+  int dummy;
+  return &dummy < ptr;
+}
+
+bool StackGrowsDown() {
+  int dummy;
+  return StackLowerThanAddress(&dummy);
+}
+
+// A threadsafe implementation of fork(2) for threadsafe-style death tests
+// that uses clone(2).  It dies with an error message if anything goes
+// wrong.
+static pid_t ExecDeathTestFork(char* const* argv, int close_fd) {
+  ExecDeathTestArgs args = { argv, close_fd };
+  pid_t child_pid = -1;
+
+#if GTEST_HAS_CLONE
+  const bool use_fork = GTEST_FLAG(death_test_use_fork);
+
+  if (!use_fork) {
+    static const bool stack_grows_down = StackGrowsDown();
+    const size_t stack_size = getpagesize();
+    // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead.
+    void* const stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
+                             MAP_ANON | MAP_PRIVATE, -1, 0);
+    GTEST_DEATH_TEST_CHECK_(stack != MAP_FAILED);
+    void* const stack_top =
+        static_cast<char*>(stack) + (stack_grows_down ? stack_size : 0);
+
+    child_pid = clone(&ExecDeathTestChildMain, stack_top, SIGCHLD, &args);
+
+    GTEST_DEATH_TEST_CHECK_(munmap(stack, stack_size) != -1);
+  }
+#else
+  const bool use_fork = true;
+#endif  // GTEST_HAS_CLONE
+
+  if (use_fork && (child_pid = fork()) == 0) {
+      ExecDeathTestChildMain(&args);
+      _exit(0);
+  }
+
+  GTEST_DEATH_TEST_CHECK_(child_pid != -1);
+  return child_pid;
+}
+
+// The AssumeRole process for a fork-and-exec death test.  It re-executes the
+// main program from the beginning, setting the --gtest_filter
+// and --gtest_internal_run_death_test flags to cause only the current
+// death test to be re-run.
+DeathTest::TestRole ExecDeathTest::AssumeRole() {
+  const UnitTestImpl* const impl = GetUnitTestImpl();
+  const InternalRunDeathTestFlag* const flag =
+      impl->internal_run_death_test_flag();
+  const TestInfo* const info = impl->current_test_info();
+  const int death_test_index = info->result()->death_test_count();
+
+  if (flag != NULL) {
+    set_write_fd(flag->write_fd());
+    return EXECUTE_TEST;
+  }
+
+  int pipe_fd[2];
+  GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1);
+  // Clear the close-on-exec flag on the write end of the pipe, lest
+  // it be closed when the child process does an exec:
+  GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1);
+
+  const String filter_flag =
+      String::Format("--%s%s=%s.%s",
+                     GTEST_FLAG_PREFIX_, kFilterFlag,
+                     info->test_case_name(), info->name());
+  const String internal_flag =
+      String::Format("--%s%s=%s|%d|%d|%d",
+                     GTEST_FLAG_PREFIX_, kInternalRunDeathTestFlag,
+                     file_, line_, death_test_index, pipe_fd[1]);
+  Arguments args;
+  args.AddArguments(GetArgvs());
+  args.AddArgument(filter_flag.c_str());
+  args.AddArgument(internal_flag.c_str());
+
+  DeathTest::set_last_death_test_message("");
+
+  CaptureStderr();
+  // See the comment in NoExecDeathTest::AssumeRole for why the next line
+  // is necessary.
+  FlushInfoLog();
+
+  const pid_t child_pid = ExecDeathTestFork(args.Argv(), pipe_fd[0]);
+  GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1]));
+  set_child_pid(child_pid);
+  set_read_fd(pipe_fd[0]);
+  set_spawned(true);
+  return OVERSEE_TEST;
+}
+
+#endif  // !GTEST_OS_WINDOWS
+
+// Creates a concrete DeathTest-derived class that depends on the
+// --gtest_death_test_style flag, and sets the pointer pointed to
+// by the "test" argument to its address.  If the test should be
+// skipped, sets that pointer to NULL.  Returns true, unless the
+// flag is set to an invalid value.
+bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex,
+                                     const char* file, int line,
+                                     DeathTest** test) {
+  UnitTestImpl* const impl = GetUnitTestImpl();
+  const InternalRunDeathTestFlag* const flag =
+      impl->internal_run_death_test_flag();
+  const int death_test_index = impl->current_test_info()
+      ->increment_death_test_count();
+
+  if (flag != NULL) {
+    if (death_test_index > flag->index()) {
+      DeathTest::set_last_death_test_message(String::Format(
+          "Death test count (%d) somehow exceeded expected maximum (%d)",
+          death_test_index, flag->index()));
+      return false;
+    }
+
+    if (!(flag->file() == file && flag->line() == line &&
+          flag->index() == death_test_index)) {
+      *test = NULL;
+      return true;
+    }
+  }
+
+#if GTEST_OS_WINDOWS
+  if (GTEST_FLAG(death_test_style) == "threadsafe" ||
+      GTEST_FLAG(death_test_style) == "fast") {
+    *test = new WindowsDeathTest(statement, regex, file, line);
+  }
+#else
+  if (GTEST_FLAG(death_test_style) == "threadsafe") {
+    *test = new ExecDeathTest(statement, regex, file, line);
+  } else if (GTEST_FLAG(death_test_style) == "fast") {
+    *test = new NoExecDeathTest(statement, regex);
+  }
+#endif  // GTEST_OS_WINDOWS
+  else {  // NOLINT - this is more readable than unbalanced brackets inside #if.
+    DeathTest::set_last_death_test_message(String::Format(
+        "Unknown death test style \"%s\" encountered",
+        GTEST_FLAG(death_test_style).c_str()));
+    return false;
+  }
+
+  return true;
+}
+
+// Splits a given string on a given delimiter, populating a given
+// vector with the fields.  GTEST_HAS_DEATH_TEST implies that we have
+// ::std::string, so we can use it here.
+static void SplitString(const ::std::string& str, char delimiter,
+                        ::std::vector< ::std::string>* dest) {
+  ::std::vector< ::std::string> parsed;
+  ::std::string::size_type pos = 0;
+  while (::testing::internal::AlwaysTrue()) {
+    const ::std::string::size_type colon = str.find(delimiter, pos);
+    if (colon == ::std::string::npos) {
+      parsed.push_back(str.substr(pos));
+      break;
+    } else {
+      parsed.push_back(str.substr(pos, colon - pos));
+      pos = colon + 1;
+    }
+  }
+  dest->swap(parsed);
+}
+
+#if GTEST_OS_WINDOWS
+// Recreates the pipe and event handles from the provided parameters,
+// signals the event, and returns a file descriptor wrapped around the pipe
+// handle. This function is called in the child process only.
+int GetStatusFileDescriptor(unsigned int parent_process_id,
+                            size_t write_handle_as_size_t,
+                            size_t event_handle_as_size_t) {
+  AutoHandle parent_process_handle(::OpenProcess(PROCESS_DUP_HANDLE,
+                                                   FALSE,  // Non-inheritable.
+                                                   parent_process_id));
+  if (parent_process_handle.Get() == INVALID_HANDLE_VALUE) {
+    DeathTestAbort(String::Format("Unable to open parent process %u",
+                                  parent_process_id));
+  }
+
+  // TODO(vladl@google.com): Replace the following check with a
+  // compile-time assertion when available.
+  GTEST_CHECK_(sizeof(HANDLE) <= sizeof(size_t));
+
+  const HANDLE write_handle =
+      reinterpret_cast<HANDLE>(write_handle_as_size_t);
+  HANDLE dup_write_handle;
+
+  // The newly initialized handle is accessible only in in the parent
+  // process. To obtain one accessible within the child, we need to use
+  // DuplicateHandle.
+  if (!::DuplicateHandle(parent_process_handle.Get(), write_handle,
+                         ::GetCurrentProcess(), &dup_write_handle,
+                         0x0,    // Requested privileges ignored since
+                                 // DUPLICATE_SAME_ACCESS is used.
+                         FALSE,  // Request non-inheritable handler.
+                         DUPLICATE_SAME_ACCESS)) {
+    DeathTestAbort(String::Format(
+        "Unable to duplicate the pipe handle %Iu from the parent process %u",
+        write_handle_as_size_t, parent_process_id));
+  }
+
+  const HANDLE event_handle = reinterpret_cast<HANDLE>(event_handle_as_size_t);
+  HANDLE dup_event_handle;
+
+  if (!::DuplicateHandle(parent_process_handle.Get(), event_handle,
+                         ::GetCurrentProcess(), &dup_event_handle,
+                         0x0,
+                         FALSE,
+                         DUPLICATE_SAME_ACCESS)) {
+    DeathTestAbort(String::Format(
+        "Unable to duplicate the event handle %Iu from the parent process %u",
+        event_handle_as_size_t, parent_process_id));
+  }
+
+  const int write_fd =
+      ::_open_osfhandle(reinterpret_cast<intptr_t>(dup_write_handle), O_APPEND);
+  if (write_fd == -1) {
+    DeathTestAbort(String::Format(
+        "Unable to convert pipe handle %Iu to a file descriptor",
+        write_handle_as_size_t));
+  }
+
+  // Signals the parent that the write end of the pipe has been acquired
+  // so the parent can release its own write end.
+  ::SetEvent(dup_event_handle);
+
+  return write_fd;
+}
+#endif  // GTEST_OS_WINDOWS
+
+// Returns a newly created InternalRunDeathTestFlag object with fields
+// initialized from the GTEST_FLAG(internal_run_death_test) flag if
+// the flag is specified; otherwise returns NULL.
+InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() {
+  if (GTEST_FLAG(internal_run_death_test) == "") return NULL;
+
+  // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we
+  // can use it here.
+  int line = -1;
+  int index = -1;
+  ::std::vector< ::std::string> fields;
+  SplitString(GTEST_FLAG(internal_run_death_test).c_str(), '|', &fields);
+  int write_fd = -1;
+
+#if GTEST_OS_WINDOWS
+  unsigned int parent_process_id = 0;
+  size_t write_handle_as_size_t = 0;
+  size_t event_handle_as_size_t = 0;
+
+  if (fields.size() != 6
+      || !ParseNaturalNumber(fields[1], &line)
+      || !ParseNaturalNumber(fields[2], &index)
+      || !ParseNaturalNumber(fields[3], &parent_process_id)
+      || !ParseNaturalNumber(fields[4], &write_handle_as_size_t)
+      || !ParseNaturalNumber(fields[5], &event_handle_as_size_t)) {
+    DeathTestAbort(String::Format(
+        "Bad --gtest_internal_run_death_test flag: %s",
+        GTEST_FLAG(internal_run_death_test).c_str()));
+  }
+  write_fd = GetStatusFileDescriptor(parent_process_id,
+                                     write_handle_as_size_t,
+                                     event_handle_as_size_t);
+#else
+  if (fields.size() != 4
+      || !ParseNaturalNumber(fields[1], &line)
+      || !ParseNaturalNumber(fields[2], &index)
+      || !ParseNaturalNumber(fields[3], &write_fd)) {
+    DeathTestAbort(String::Format(
+        "Bad --gtest_internal_run_death_test flag: %s",
+        GTEST_FLAG(internal_run_death_test).c_str()));
+  }
+#endif  // GTEST_OS_WINDOWS
+  return new InternalRunDeathTestFlag(fields[0], line, index, write_fd);
+}
+
+}  // namespace internal
+
+#endif  // GTEST_HAS_DEATH_TEST
+
+}  // namespace testing
diff --git a/final/utils/unittest/googletest/gtest-filepath.cc b/final/utils/unittest/googletest/gtest-filepath.cc
new file mode 100644
index 00000000000..c1ef9188ad8
--- /dev/null
+++ b/final/utils/unittest/googletest/gtest-filepath.cc
@@ -0,0 +1,380 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: keith.ray@gmail.com (Keith Ray)
+
+#include <gtest/internal/gtest-filepath.h>
+#include <gtest/internal/gtest-port.h>
+
+#include <stdlib.h>
+
+#if GTEST_OS_WINDOWS_MOBILE
+#include <windows.h>
+#elif GTEST_OS_WINDOWS
+#include <direct.h>
+#include <io.h>
+#elif GTEST_OS_SYMBIAN
+// Symbian OpenC has PATH_MAX in sys/syslimits.h
+#include <sys/syslimits.h>
+#else
+#include <limits.h>
+#include <climits>  // Some Linux distributions define PATH_MAX here.
+#endif  // GTEST_OS_WINDOWS_MOBILE
+
+#if GTEST_OS_WINDOWS
+#define GTEST_PATH_MAX_ _MAX_PATH
+#elif defined(PATH_MAX)
+#define GTEST_PATH_MAX_ PATH_MAX
+#elif defined(_XOPEN_PATH_MAX)
+#define GTEST_PATH_MAX_ _XOPEN_PATH_MAX
+#else
+#define GTEST_PATH_MAX_ _POSIX_PATH_MAX
+#endif  // GTEST_OS_WINDOWS
+
+#include <gtest/internal/gtest-string.h>
+
+namespace testing {
+namespace internal {
+
+#if GTEST_OS_WINDOWS
+// On Windows, '\\' is the standard path separator, but many tools and the
+// Windows API also accept '/' as an alternate path separator. Unless otherwise
+// noted, a file path can contain either kind of path separators, or a mixture
+// of them.
+const char kPathSeparator = '\\';
+const char kAlternatePathSeparator = '/';
+const char kPathSeparatorString[] = "\\";
+const char kAlternatePathSeparatorString[] = "/";
+#if GTEST_OS_WINDOWS_MOBILE
+// Windows CE doesn't have a current directory. You should not use
+// the current directory in tests on Windows CE, but this at least
+// provides a reasonable fallback.
+const char kCurrentDirectoryString[] = "\\";
+// Windows CE doesn't define INVALID_FILE_ATTRIBUTES
+const DWORD kInvalidFileAttributes = 0xffffffff;
+#else
+const char kCurrentDirectoryString[] = ".\\";
+#endif  // GTEST_OS_WINDOWS_MOBILE
+#else
+const char kPathSeparator = '/';
+const char kPathSeparatorString[] = "/";
+const char kCurrentDirectoryString[] = "./";
+#endif  // GTEST_OS_WINDOWS
+
+// Returns whether the given character is a valid path separator.
+static bool IsPathSeparator(char c) {
+#if GTEST_HAS_ALT_PATH_SEP_
+  return (c == kPathSeparator) || (c == kAlternatePathSeparator);
+#else
+  return c == kPathSeparator;
+#endif
+}
+
+// Returns the current working directory, or "" if unsuccessful.
+FilePath FilePath::GetCurrentDir() {
+#if GTEST_OS_WINDOWS_MOBILE
+  // Windows CE doesn't have a current directory, so we just return
+  // something reasonable.
+  return FilePath(kCurrentDirectoryString);
+#elif GTEST_OS_WINDOWS
+  char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
+  return FilePath(_getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd);
+#else
+  char cwd[GTEST_PATH_MAX_ + 1] = { '\0' };
+  return FilePath(getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd);
+#endif  // GTEST_OS_WINDOWS_MOBILE
+}
+
+// Returns a copy of the FilePath with the case-insensitive extension removed.
+// Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
+// FilePath("dir/file"). If a case-insensitive extension is not
+// found, returns a copy of the original FilePath.
+FilePath FilePath::RemoveExtension(const char* extension) const {
+  String dot_extension(String::Format(".%s", extension));
+  if (pathname_.EndsWithCaseInsensitive(dot_extension.c_str())) {
+    return FilePath(String(pathname_.c_str(), pathname_.length() - 4));
+  }
+  return *this;
+}
+
+// Returns a pointer to the last occurence of a valid path separator in
+// the FilePath. On Windows, for example, both '/' and '\' are valid path
+// separators. Returns NULL if no path separator was found.
+const char* FilePath::FindLastPathSeparator() const {
+  const char* const last_sep = strrchr(c_str(), kPathSeparator);
+#if GTEST_HAS_ALT_PATH_SEP_
+  const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator);
+  // Comparing two pointers of which only one is NULL is undefined.
+  if (last_alt_sep != NULL &&
+      (last_sep == NULL || last_alt_sep > last_sep)) {
+    return last_alt_sep;
+  }
+#endif
+  return last_sep;
+}
+
+// Returns a copy of the FilePath with the directory part removed.
+// Example: FilePath("path/to/file").RemoveDirectoryName() returns
+// FilePath("file"). If there is no directory part ("just_a_file"), it returns
+// the FilePath unmodified. If there is no file part ("just_a_dir/") it
+// returns an empty FilePath ("").
+// On Windows platform, '\' is the path separator, otherwise it is '/'.
+FilePath FilePath::RemoveDirectoryName() const {
+  const char* const last_sep = FindLastPathSeparator();
+  return last_sep ? FilePath(String(last_sep + 1)) : *this;
+}
+
+// RemoveFileName returns the directory path with the filename removed.
+// Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
+// If the FilePath is "a_file" or "/a_file", RemoveFileName returns
+// FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
+// not have a file, like "just/a/dir/", it returns the FilePath unmodified.
+// On Windows platform, '\' is the path separator, otherwise it is '/'.
+FilePath FilePath::RemoveFileName() const {
+  const char* const last_sep = FindLastPathSeparator();
+  String dir;
+  if (last_sep) {
+    dir = String(c_str(), last_sep + 1 - c_str());
+  } else {
+    dir = kCurrentDirectoryString;
+  }
+  return FilePath(dir);
+}
+
+// Helper functions for naming files in a directory for xml output.
+
+// Given directory = "dir", base_name = "test", number = 0,
+// extension = "xml", returns "dir/test.xml". If number is greater
+// than zero (e.g., 12), returns "dir/test_12.xml".
+// On Windows platform, uses \ as the separator rather than /.
+FilePath FilePath::MakeFileName(const FilePath& directory,
+                                const FilePath& base_name,
+                                int number,
+                                const char* extension) {
+  String file;
+  if (number == 0) {
+    file = String::Format("%s.%s", base_name.c_str(), extension);
+  } else {
+    file = String::Format("%s_%d.%s", base_name.c_str(), number, extension);
+  }
+  return ConcatPaths(directory, FilePath(file));
+}
+
+// Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml".
+// On Windows, uses \ as the separator rather than /.
+FilePath FilePath::ConcatPaths(const FilePath& directory,
+                               const FilePath& relative_path) {
+  if (directory.IsEmpty())
+    return relative_path;
+  const FilePath dir(directory.RemoveTrailingPathSeparator());
+  return FilePath(String::Format("%s%c%s", dir.c_str(), kPathSeparator,
+                                 relative_path.c_str()));
+}
+
+// Returns true if pathname describes something findable in the file-system,
+// either a file, directory, or whatever.
+bool FilePath::FileOrDirectoryExists() const {
+#if GTEST_OS_WINDOWS_MOBILE
+  LPCWSTR unicode = String::AnsiToUtf16(pathname_.c_str());
+  const DWORD attributes = GetFileAttributes(unicode);
+  delete [] unicode;
+  return attributes != kInvalidFileAttributes;
+#else
+  posix::StatStruct file_stat;
+  return posix::Stat(pathname_.c_str(), &file_stat) == 0;
+#endif  // GTEST_OS_WINDOWS_MOBILE
+}
+
+// Returns true if pathname describes a directory in the file-system
+// that exists.
+bool FilePath::DirectoryExists() const {
+  bool result = false;
+#if GTEST_OS_WINDOWS
+  // Don't strip off trailing separator if path is a root directory on
+  // Windows (like "C:\\").
+  const FilePath& path(IsRootDirectory() ? *this :
+                                           RemoveTrailingPathSeparator());
+#else
+  const FilePath& path(*this);
+#endif
+
+#if GTEST_OS_WINDOWS_MOBILE
+  LPCWSTR unicode = String::AnsiToUtf16(path.c_str());
+  const DWORD attributes = GetFileAttributes(unicode);
+  delete [] unicode;
+  if ((attributes != kInvalidFileAttributes) &&
+      (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
+    result = true;
+  }
+#else
+  posix::StatStruct file_stat;
+  result = posix::Stat(path.c_str(), &file_stat) == 0 &&
+      posix::IsDir(file_stat);
+#endif  // GTEST_OS_WINDOWS_MOBILE
+
+  return result;
+}
+
+// Returns true if pathname describes a root directory. (Windows has one
+// root directory per disk drive.)
+bool FilePath::IsRootDirectory() const {
+#if GTEST_OS_WINDOWS
+  // TODO(wan@google.com): on Windows a network share like
+  // \\server\share can be a root directory, although it cannot be the
+  // current directory.  Handle this properly.
+  return pathname_.length() == 3 && IsAbsolutePath();
+#else
+  return pathname_.length() == 1 && IsPathSeparator(pathname_.c_str()[0]);
+#endif
+}
+
+// Returns true if pathname describes an absolute path.
+bool FilePath::IsAbsolutePath() const {
+  const char* const name = pathname_.c_str();
+#if GTEST_OS_WINDOWS
+  return pathname_.length() >= 3 &&
+     ((name[0] >= 'a' && name[0] <= 'z') ||
+      (name[0] >= 'A' && name[0] <= 'Z')) &&
+     name[1] == ':' &&
+     IsPathSeparator(name[2]);
+#else
+  return IsPathSeparator(name[0]);
+#endif
+}
+
+// Returns a pathname for a file that does not currently exist. The pathname
+// will be directory/base_name.extension or
+// directory/base_name_<number>.extension if directory/base_name.extension
+// already exists. The number will be incremented until a pathname is found
+// that does not already exist.
+// Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
+// There could be a race condition if two or more processes are calling this
+// function at the same time -- they could both pick the same filename.
+FilePath FilePath::GenerateUniqueFileName(const FilePath& directory,
+                                          const FilePath& base_name,
+                                          const char* extension) {
+  FilePath full_pathname;
+  int number = 0;
+  do {
+    full_pathname.Set(MakeFileName(directory, base_name, number++, extension));
+  } while (full_pathname.FileOrDirectoryExists());
+  return full_pathname;
+}
+
+// Returns true if FilePath ends with a path separator, which indicates that
+// it is intended to represent a directory. Returns false otherwise.
+// This does NOT check that a directory (or file) actually exists.
+bool FilePath::IsDirectory() const {
+  return !pathname_.empty() &&
+         IsPathSeparator(pathname_.c_str()[pathname_.length() - 1]);
+}
+
+// Create directories so that path exists. Returns true if successful or if
+// the directories already exist; returns false if unable to create directories
+// for any reason.
+bool FilePath::CreateDirectoriesRecursively() const {
+  if (!this->IsDirectory()) {
+    return false;
+  }
+
+  if (pathname_.length() == 0 || this->DirectoryExists()) {
+    return true;
+  }
+
+  const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName());
+  return parent.CreateDirectoriesRecursively() && this->CreateFolder();
+}
+
+// Create the directory so that path exists. Returns true if successful or
+// if the directory already exists; returns false if unable to create the
+// directory for any reason, including if the parent directory does not
+// exist. Not named "CreateDirectory" because that's a macro on Windows.
+bool FilePath::CreateFolder() const {
+#if GTEST_OS_WINDOWS_MOBILE
+  FilePath removed_sep(this->RemoveTrailingPathSeparator());
+  LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str());
+  int result = CreateDirectory(unicode, NULL) ? 0 : -1;
+  delete [] unicode;
+#elif GTEST_OS_WINDOWS
+  int result = _mkdir(pathname_.c_str());
+#else
+  int result = mkdir(pathname_.c_str(), 0777);
+#endif  // GTEST_OS_WINDOWS_MOBILE
+
+  if (result == -1) {
+    return this->DirectoryExists();  // An error is OK if the directory exists.
+  }
+  return true;  // No error.
+}
+
+// If input name has a trailing separator character, remove it and return the
+// name, otherwise return the name string unmodified.
+// On Windows platform, uses \ as the separator, other platforms use /.
+FilePath FilePath::RemoveTrailingPathSeparator() const {
+  return IsDirectory()
+      ? FilePath(String(pathname_.c_str(), pathname_.length() - 1))
+      : *this;
+}
+
+// Removes any redundant separators that might be in the pathname.
+// For example, "bar///foo" becomes "bar/foo". Does not eliminate other
+// redundancies that might be in a pathname involving "." or "..".
+// TODO(wan@google.com): handle Windows network shares (e.g. \\server\share).
+void FilePath::Normalize() {
+  if (pathname_.c_str() == NULL) {
+    pathname_ = "";
+    return;
+  }
+  const char* src = pathname_.c_str();
+  char* const dest = new char[pathname_.length() + 1];
+  char* dest_ptr = dest;
+  memset(dest_ptr, 0, pathname_.length() + 1);
+
+  while (*src != '\0') {
+    *dest_ptr = *src;
+    if (!IsPathSeparator(*src)) {
+      src++;
+    } else {
+#if GTEST_HAS_ALT_PATH_SEP_
+      if (*dest_ptr == kAlternatePathSeparator) {
+        *dest_ptr = kPathSeparator;
+      }
+#endif
+      while (IsPathSeparator(*src))
+        src++;
+    }
+    dest_ptr++;
+  }
+  *dest_ptr = '\0';
+  pathname_ = dest;
+  delete[] dest;
+}
+
+}  // namespace internal
+}  // namespace testing
diff --git a/final/utils/unittest/googletest/gtest-port.cc b/final/utils/unittest/googletest/gtest-port.cc
new file mode 100644
index 00000000000..56095994cdf
--- /dev/null
+++ b/final/utils/unittest/googletest/gtest-port.cc
@@ -0,0 +1,711 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+#include <gtest/internal/gtest-port.h>
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#if GTEST_OS_WINDOWS_MOBILE
+#include <windows.h>  // For TerminateProcess()
+#elif GTEST_OS_WINDOWS
+#include <io.h>
+#include <sys/stat.h>
+#else
+#include <unistd.h>
+#endif  // GTEST_OS_WINDOWS_MOBILE
+
+#if GTEST_OS_MAC
+#include <mach/mach_init.h>
+#include <mach/task.h>
+#include <mach/vm_map.h>
+#endif  // GTEST_OS_MAC
+
+#include <gtest/gtest-spi.h>
+#include <gtest/gtest-message.h>
+#include <gtest/internal/gtest-string.h>
+
+// Indicates that this translation unit is part of Google Test's
+// implementation.  It must come before gtest-internal-inl.h is
+// included, or there will be a compiler error.  This trick is to
+// prevent a user from accidentally including gtest-internal-inl.h in
+// his code.
+#define GTEST_IMPLEMENTATION_ 1
+#include "gtest/internal/gtest-internal-inl.h"
+#undef GTEST_IMPLEMENTATION_
+
+namespace testing {
+namespace internal {
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+// MSVC and C++Builder do not provide a definition of STDERR_FILENO.
+const int kStdOutFileno = 1;
+const int kStdErrFileno = 2;
+#else
+const int kStdOutFileno = STDOUT_FILENO;
+const int kStdErrFileno = STDERR_FILENO;
+#endif  // _MSC_VER
+
+#if GTEST_OS_MAC
+
+// Returns the number of threads running in the process, or 0 to indicate that
+// we cannot detect it.
+size_t GetThreadCount() {
+  const task_t task = mach_task_self();
+  mach_msg_type_number_t thread_count;
+  thread_act_array_t thread_list;
+  const kern_return_t status = task_threads(task, &thread_list, &thread_count);
+  if (status == KERN_SUCCESS) {
+    // task_threads allocates resources in thread_list and we need to free them
+    // to avoid leaks.
+    vm_deallocate(task,
+                  reinterpret_cast<vm_address_t>(thread_list),
+                  sizeof(thread_t) * thread_count);
+    return static_cast<size_t>(thread_count);
+  } else {
+    return 0;
+  }
+}
+
+#else
+
+size_t GetThreadCount() {
+  // There's no portable way to detect the number of threads, so we just
+  // return 0 to indicate that we cannot detect it.
+  return 0;
+}
+
+#endif  // GTEST_OS_MAC
+
+#if GTEST_USES_POSIX_RE
+
+// Implements RE.  Currently only needed for death tests.
+
+RE::~RE() {
+  if (is_valid_) {
+    // regfree'ing an invalid regex might crash because the content
+    // of the regex is undefined. Since the regex's are essentially
+    // the same, one cannot be valid (or invalid) without the other
+    // being so too.
+    regfree(&partial_regex_);
+    regfree(&full_regex_);
+  }
+  free(const_cast<char*>(pattern_));
+}
+
+// Returns true iff regular expression re matches the entire str.
+bool RE::FullMatch(const char* str, const RE& re) {
+  if (!re.is_valid_) return false;
+
+  regmatch_t match;
+  return regexec(&re.full_regex_, str, 1, &match, 0) == 0;
+}
+
+// Returns true iff regular expression re matches a substring of str
+// (including str itself).
+bool RE::PartialMatch(const char* str, const RE& re) {
+  if (!re.is_valid_) return false;
+
+  regmatch_t match;
+  return regexec(&re.partial_regex_, str, 1, &match, 0) == 0;
+}
+
+// Initializes an RE from its string representation.
+void RE::Init(const char* regex) {
+  pattern_ = posix::StrDup(regex);
+
+  // Reserves enough bytes to hold the regular expression used for a
+  // full match.
+  const size_t full_regex_len = strlen(regex) + 10;
+  char* const full_pattern = new char[full_regex_len];
+
+  snprintf(full_pattern, full_regex_len, "^(%s)$", regex);
+  is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0;
+  // We want to call regcomp(&partial_regex_, ...) even if the
+  // previous expression returns false.  Otherwise partial_regex_ may
+  // not be properly initialized can may cause trouble when it's
+  // freed.
+  //
+  // Some implementation of POSIX regex (e.g. on at least some
+  // versions of Cygwin) doesn't accept the empty string as a valid
+  // regex.  We change it to an equivalent form "()" to be safe.
+  if (is_valid_) {
+    const char* const partial_regex = (*regex == '\0') ? "()" : regex;
+    is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0;
+  }
+  EXPECT_TRUE(is_valid_)
+      << "Regular expression \"" << regex
+      << "\" is not a valid POSIX Extended regular expression.";
+
+  delete[] full_pattern;
+}
+
+#elif GTEST_USES_SIMPLE_RE
+
+// Returns true iff ch appears anywhere in str (excluding the
+// terminating '\0' character).
+bool IsInSet(char ch, const char* str) {
+  return ch != '\0' && strchr(str, ch) != NULL;
+}
+
+// Returns true iff ch belongs to the given classification.  Unlike
+// similar functions in <ctype.h>, these aren't affected by the
+// current locale.
+bool IsDigit(char ch) { return '0' <= ch && ch <= '9'; }
+bool IsPunct(char ch) {
+  return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~");
+}
+bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); }
+bool IsWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); }
+bool IsWordChar(char ch) {
+  return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') ||
+      ('0' <= ch && ch <= '9') || ch == '_';
+}
+
+// Returns true iff "\\c" is a supported escape sequence.
+bool IsValidEscape(char c) {
+  return (IsPunct(c) || IsInSet(c, "dDfnrsStvwW"));
+}
+
+// Returns true iff the given atom (specified by escaped and pattern)
+// matches ch.  The result is undefined if the atom is invalid.
+bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
+  if (escaped) {  // "\\p" where p is pattern_char.
+    switch (pattern_char) {
+      case 'd': return IsDigit(ch);
+      case 'D': return !IsDigit(ch);
+      case 'f': return ch == '\f';
+      case 'n': return ch == '\n';
+      case 'r': return ch == '\r';
+      case 's': return IsWhiteSpace(ch);
+      case 'S': return !IsWhiteSpace(ch);
+      case 't': return ch == '\t';
+      case 'v': return ch == '\v';
+      case 'w': return IsWordChar(ch);
+      case 'W': return !IsWordChar(ch);
+    }
+    return IsPunct(pattern_char) && pattern_char == ch;
+  }
+
+  return (pattern_char == '.' && ch != '\n') || pattern_char == ch;
+}
+
+// Helper function used by ValidateRegex() to format error messages.
+String FormatRegexSyntaxError(const char* regex, int index) {
+  return (Message() << "Syntax error at index " << index
+          << " in simple regular expression \"" << regex << "\": ").GetString();
+}
+
+// Generates non-fatal failures and returns false if regex is invalid;
+// otherwise returns true.
+bool ValidateRegex(const char* regex) {
+  if (regex == NULL) {
+    // TODO(wan@google.com): fix the source file location in the
+    // assertion failures to match where the regex is used in user
+    // code.
+    ADD_FAILURE() << "NULL is not a valid simple regular expression.";
+    return false;
+  }
+
+  bool is_valid = true;
+
+  // True iff ?, *, or + can follow the previous atom.
+  bool prev_repeatable = false;
+  for (int i = 0; regex[i]; i++) {
+    if (regex[i] == '\\') {  // An escape sequence
+      i++;
+      if (regex[i] == '\0') {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
+                      << "'\\' cannot appear at the end.";
+        return false;
+      }
+
+      if (!IsValidEscape(regex[i])) {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1)
+                      << "invalid escape sequence \"\\" << regex[i] << "\".";
+        is_valid = false;
+      }
+      prev_repeatable = true;
+    } else {  // Not an escape sequence.
+      const char ch = regex[i];
+
+      if (ch == '^' && i > 0) {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+                      << "'^' can only appear at the beginning.";
+        is_valid = false;
+      } else if (ch == '$' && regex[i + 1] != '\0') {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+                      << "'$' can only appear at the end.";
+        is_valid = false;
+      } else if (IsInSet(ch, "()[]{}|")) {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+                      << "'" << ch << "' is unsupported.";
+        is_valid = false;
+      } else if (IsRepeat(ch) && !prev_repeatable) {
+        ADD_FAILURE() << FormatRegexSyntaxError(regex, i)
+                      << "'" << ch << "' can only follow a repeatable token.";
+        is_valid = false;
+      }
+
+      prev_repeatable = !IsInSet(ch, "^$?*+");
+    }
+  }
+
+  return is_valid;
+}
+
+// Matches a repeated regex atom followed by a valid simple regular
+// expression.  The regex atom is defined as c if escaped is false,
+// or \c otherwise.  repeat is the repetition meta character (?, *,
+// or +).  The behavior is undefined if str contains too many
+// characters to be indexable by size_t, in which case the test will
+// probably time out anyway.  We are fine with this limitation as
+// std::string has it too.
+bool MatchRepetitionAndRegexAtHead(
+    bool escaped, char c, char repeat, const char* regex,
+    const char* str) {
+  const size_t min_count = (repeat == '+') ? 1 : 0;
+  const size_t max_count = (repeat == '?') ? 1 :
+      static_cast<size_t>(-1) - 1;
+  // We cannot call numeric_limits::max() as it conflicts with the
+  // max() macro on Windows.
+
+  for (size_t i = 0; i <= max_count; ++i) {
+    // We know that the atom matches each of the first i characters in str.
+    if (i >= min_count && MatchRegexAtHead(regex, str + i)) {
+      // We have enough matches at the head, and the tail matches too.
+      // Since we only care about *whether* the pattern matches str
+      // (as opposed to *how* it matches), there is no need to find a
+      // greedy match.
+      return true;
+    }
+    if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i]))
+      return false;
+  }
+  return false;
+}
+
+// Returns true iff regex matches a prefix of str.  regex must be a
+// valid simple regular expression and not start with "^", or the
+// result is undefined.
+bool MatchRegexAtHead(const char* regex, const char* str) {
+  if (*regex == '\0')  // An empty regex matches a prefix of anything.
+    return true;
+
+  // "$" only matches the end of a string.  Note that regex being
+  // valid guarantees that there's nothing after "$" in it.
+  if (*regex == '$')
+    return *str == '\0';
+
+  // Is the first thing in regex an escape sequence?
+  const bool escaped = *regex == '\\';
+  if (escaped)
+    ++regex;
+  if (IsRepeat(regex[1])) {
+    // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so
+    // here's an indirect recursion.  It terminates as the regex gets
+    // shorter in each recursion.
+    return MatchRepetitionAndRegexAtHead(
+        escaped, regex[0], regex[1], regex + 2, str);
+  } else {
+    // regex isn't empty, isn't "$", and doesn't start with a
+    // repetition.  We match the first atom of regex with the first
+    // character of str and recurse.
+    return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) &&
+        MatchRegexAtHead(regex + 1, str + 1);
+  }
+}
+
+// Returns true iff regex matches any substring of str.  regex must be
+// a valid simple regular expression, or the result is undefined.
+//
+// The algorithm is recursive, but the recursion depth doesn't exceed
+// the regex length, so we won't need to worry about running out of
+// stack space normally.  In rare cases the time complexity can be
+// exponential with respect to the regex length + the string length,
+// but usually it's must faster (often close to linear).
+bool MatchRegexAnywhere(const char* regex, const char* str) {
+  if (regex == NULL || str == NULL)
+    return false;
+
+  if (*regex == '^')
+    return MatchRegexAtHead(regex + 1, str);
+
+  // A successful match can be anywhere in str.
+  do {
+    if (MatchRegexAtHead(regex, str))
+      return true;
+  } while (*str++ != '\0');
+  return false;
+}
+
+// Implements the RE class.
+
+RE::~RE() {
+  free(const_cast<char*>(pattern_));
+  free(const_cast<char*>(full_pattern_));
+}
+
+// Returns true iff regular expression re matches the entire str.
+bool RE::FullMatch(const char* str, const RE& re) {
+  return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
+}
+
+// Returns true iff regular expression re matches a substring of str
+// (including str itself).
+bool RE::PartialMatch(const char* str, const RE& re) {
+  return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
+}
+
+// Initializes an RE from its string representation.
+void RE::Init(const char* regex) {
+  pattern_ = full_pattern_ = NULL;
+  if (regex != NULL) {
+    pattern_ = posix::StrDup(regex);
+  }
+
+  is_valid_ = ValidateRegex(regex);
+  if (!is_valid_) {
+    // No need to calculate the full pattern when the regex is invalid.
+    return;
+  }
+
+  const size_t len = strlen(regex);
+  // Reserves enough bytes to hold the regular expression used for a
+  // full match: we need space to prepend a '^', append a '$', and
+  // terminate the string with '\0'.
+  char* buffer = static_cast<char*>(malloc(len + 3));
+  full_pattern_ = buffer;
+
+  if (*regex != '^')
+    *buffer++ = '^';  // Makes sure full_pattern_ starts with '^'.
+
+  // We don't use snprintf or strncpy, as they trigger a warning when
+  // compiled with VC++ 8.0.
+  memcpy(buffer, regex, len);
+  buffer += len;
+
+  if (len == 0 || regex[len - 1] != '$')
+    *buffer++ = '$';  // Makes sure full_pattern_ ends with '$'.
+
+  *buffer = '\0';
+}
+
+#endif  // GTEST_USES_POSIX_RE
+
+
+GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line)
+    : severity_(severity) {
+  const char* const marker =
+      severity == GTEST_INFO ?    "[  INFO ]" :
+      severity == GTEST_WARNING ? "[WARNING]" :
+      severity == GTEST_ERROR ?   "[ ERROR ]" : "[ FATAL ]";
+  GetStream() << ::std::endl << marker << " "
+              << FormatFileLocation(file, line).c_str() << ": ";
+}
+
+// Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
+GTestLog::~GTestLog() {
+  GetStream() << ::std::endl;
+  if (severity_ == GTEST_FATAL) {
+    fflush(stderr);
+    posix::Abort();
+  }
+}
+// Disable Microsoft deprecation warnings for POSIX functions called from
+// this class (creat, dup, dup2, and close)
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable: 4996)
+#endif  // _MSC_VER
+
+#if GTEST_HAS_STREAM_REDIRECTION_
+
+// Object that captures an output stream (stdout/stderr).
+class CapturedStream {
+ public:
+  // The ctor redirects the stream to a temporary file.
+  CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) {
+#if GTEST_OS_WINDOWS
+    char temp_dir_path[MAX_PATH + 1] = { '\0' };  // NOLINT
+    char temp_file_path[MAX_PATH + 1] = { '\0' };  // NOLINT
+
+    ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path);
+    const UINT success = ::GetTempFileNameA(temp_dir_path,
+                                            "gtest_redir",
+                                            0,  // Generate unique file name.
+                                            temp_file_path);
+    GTEST_CHECK_(success != 0)
+        << "Unable to create a temporary file in " << temp_dir_path;
+    const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE);
+    GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file "
+                                    << temp_file_path;
+    filename_ = temp_file_path;
+#else
+    // There's no guarantee that a test has write access to the
+    // current directory, so we create the temporary file in the /tmp
+    // directory instead.
+    char name_template[] = "/tmp/captured_stream.XXXXXX";
+    const int captured_fd = mkstemp(name_template);
+    filename_ = name_template;
+#endif  // GTEST_OS_WINDOWS
+    fflush(NULL);
+    dup2(captured_fd, fd_);
+    close(captured_fd);
+  }
+
+  ~CapturedStream() {
+    remove(filename_.c_str());
+  }
+
+  String GetCapturedString() {
+    if (uncaptured_fd_ != -1) {
+      // Restores the original stream.
+      fflush(NULL);
+      dup2(uncaptured_fd_, fd_);
+      close(uncaptured_fd_);
+      uncaptured_fd_ = -1;
+    }
+
+    FILE* const file = posix::FOpen(filename_.c_str(), "r");
+    const String content = ReadEntireFile(file);
+    posix::FClose(file);
+    return content;
+  }
+
+ private:
+  // Reads the entire content of a file as a String.
+  static String ReadEntireFile(FILE* file);
+
+  // Returns the size (in bytes) of a file.
+  static size_t GetFileSize(FILE* file);
+
+  const int fd_;  // A stream to capture.
+  int uncaptured_fd_;
+  // Name of the temporary file holding the stderr output.
+  ::std::string filename_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream);
+};
+
+// Returns the size (in bytes) of a file.
+size_t CapturedStream::GetFileSize(FILE* file) {
+  fseek(file, 0, SEEK_END);
+  return static_cast<size_t>(ftell(file));
+}
+
+// Reads the entire content of a file as a string.
+String CapturedStream::ReadEntireFile(FILE* file) {
+  const size_t file_size = GetFileSize(file);
+  char* const buffer = new char[file_size];
+
+  size_t bytes_last_read = 0;  // # of bytes read in the last fread()
+  size_t bytes_read = 0;       // # of bytes read so far
+
+  fseek(file, 0, SEEK_SET);
+
+  // Keeps reading the file until we cannot read further or the
+  // pre-determined file size is reached.
+  do {
+    bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file);
+    bytes_read += bytes_last_read;
+  } while (bytes_last_read > 0 && bytes_read < file_size);
+
+  const String content(buffer, bytes_read);
+  delete[] buffer;
+
+  return content;
+}
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif  // _MSC_VER
+
+static CapturedStream* g_captured_stderr = NULL;
+static CapturedStream* g_captured_stdout = NULL;
+
+// Starts capturing an output stream (stdout/stderr).
+void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) {
+  if (*stream != NULL) {
+    GTEST_LOG_(FATAL) << "Only one " << stream_name
+                      << " capturer can exist at a time.";
+  }
+  *stream = new CapturedStream(fd);
+}
+
+// Stops capturing the output stream and returns the captured string.
+String GetCapturedStream(CapturedStream** captured_stream) {
+  const String content = (*captured_stream)->GetCapturedString();
+
+  delete *captured_stream;
+  *captured_stream = NULL;
+
+  return content;
+}
+
+// Starts capturing stdout.
+void CaptureStdout() {
+  CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout);
+}
+
+// Starts capturing stderr.
+void CaptureStderr() {
+  CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr);
+}
+
+// Stops capturing stdout and returns the captured string.
+String GetCapturedStdout() { return GetCapturedStream(&g_captured_stdout); }
+
+// Stops capturing stderr and returns the captured string.
+String GetCapturedStderr() { return GetCapturedStream(&g_captured_stderr); }
+
+#endif  // GTEST_HAS_STREAM_REDIRECTION_
+
+#if GTEST_HAS_DEATH_TEST
+
+// A copy of all command line arguments.  Set by InitGoogleTest().
+::std::vector<String> g_argvs;
+
+// Returns the command line as a vector of strings.
+const ::std::vector<String>& GetArgvs() { return g_argvs; }
+
+#endif  // GTEST_HAS_DEATH_TEST
+
+#if GTEST_OS_WINDOWS_MOBILE
+namespace posix {
+void Abort() {
+  DebugBreak();
+  TerminateProcess(GetCurrentProcess(), 1);
+}
+}  // namespace posix
+#endif  // GTEST_OS_WINDOWS_MOBILE
+
+// Returns the name of the environment variable corresponding to the
+// given flag.  For example, FlagToEnvVar("foo") will return
+// "GTEST_FOO" in the open-source version.
+static String FlagToEnvVar(const char* flag) {
+  const String full_flag =
+      (Message() << GTEST_FLAG_PREFIX_ << flag).GetString();
+
+  Message env_var;
+  for (size_t i = 0; i != full_flag.length(); i++) {
+    env_var << static_cast<char>(toupper(full_flag.c_str()[i]));
+  }
+
+  return env_var.GetString();
+}
+
+// Parses 'str' for a 32-bit signed integer.  If successful, writes
+// the result to *value and returns true; otherwise leaves *value
+// unchanged and returns false.
+bool ParseInt32(const Message& src_text, const char* str, Int32* value) {
+  // Parses the environment variable as a decimal integer.
+  char* end = NULL;
+  const long long_value = strtol(str, &end, 10);  // NOLINT
+
+  // Has strtol() consumed all characters in the string?
+  if (*end != '\0') {
+    // No - an invalid character was encountered.
+    Message msg;
+    msg << "WARNING: " << src_text
+        << " is expected to be a 32-bit integer, but actually"
+        << " has value \"" << str << "\".\n";
+    printf("%s", msg.GetString().c_str());
+    fflush(stdout);
+    return false;
+  }
+
+  // Is the parsed value in the range of an Int32?
+  const Int32 result = static_cast<Int32>(long_value);
+  if (long_value == LONG_MAX || long_value == LONG_MIN ||
+      // The parsed value overflows as a long.  (strtol() returns
+      // LONG_MAX or LONG_MIN when the input overflows.)
+      result != long_value
+      // The parsed value overflows as an Int32.
+      ) {
+    Message msg;
+    msg << "WARNING: " << src_text
+        << " is expected to be a 32-bit integer, but actually"
+        << " has value " << str << ", which overflows.\n";
+    printf("%s", msg.GetString().c_str());
+    fflush(stdout);
+    return false;
+  }
+
+  *value = result;
+  return true;
+}
+
+// Reads and returns the Boolean environment variable corresponding to
+// the given flag; if it's not set, returns default_value.
+//
+// The value is considered true iff it's not "0".
+bool BoolFromGTestEnv(const char* flag, bool default_value) {
+  const String env_var = FlagToEnvVar(flag);
+  const char* const string_value = posix::GetEnv(env_var.c_str());
+  return string_value == NULL ?
+      default_value : strcmp(string_value, "0") != 0;
+}
+
+// Reads and returns a 32-bit integer stored in the environment
+// variable corresponding to the given flag; if it isn't set or
+// doesn't represent a valid 32-bit integer, returns default_value.
+Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) {
+  const String env_var = FlagToEnvVar(flag);
+  const char* const string_value = posix::GetEnv(env_var.c_str());
+  if (string_value == NULL) {
+    // The environment variable is not set.
+    return default_value;
+  }
+
+  Int32 result = default_value;
+  if (!ParseInt32(Message() << "Environment variable " << env_var,
+                  string_value, &result)) {
+    printf("The default value %s is used.\n",
+           (Message() << default_value).GetString().c_str());
+    fflush(stdout);
+    return default_value;
+  }
+
+  return result;
+}
+
+// Reads and returns the string environment variable corresponding to
+// the given flag; if it's not set, returns default_value.
+const char* StringFromGTestEnv(const char* flag, const char* default_value) {
+  const String env_var = FlagToEnvVar(flag);
+  const char* const value = posix::GetEnv(env_var.c_str());
+  return value == NULL ? default_value : value;
+}
+
+}  // namespace internal
+}  // namespace testing
diff --git a/final/utils/unittest/googletest/gtest-test-part.cc b/final/utils/unittest/googletest/gtest-test-part.cc
new file mode 100644
index 00000000000..8249afeb4c6
--- /dev/null
+++ b/final/utils/unittest/googletest/gtest-test-part.cc
@@ -0,0 +1,110 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: mheule@google.com (Markus Heule)
+//
+// The Google C++ Testing Framework (Google Test)
+
+#include <gtest/gtest-test-part.h>
+
+// Indicates that this translation unit is part of Google Test's
+// implementation.  It must come before gtest-internal-inl.h is
+// included, or there will be a compiler error.  This trick is to
+// prevent a user from accidentally including gtest-internal-inl.h in
+// his code.
+#define GTEST_IMPLEMENTATION_ 1
+#include "gtest/internal/gtest-internal-inl.h"
+#undef GTEST_IMPLEMENTATION_
+
+namespace testing {
+
+using internal::GetUnitTestImpl;
+
+// Gets the summary of the failure message by omitting the stack trace
+// in it.
+internal::String TestPartResult::ExtractSummary(const char* message) {
+  const char* const stack_trace = strstr(message, internal::kStackTraceMarker);
+  return stack_trace == NULL ? internal::String(message) :
+      internal::String(message, stack_trace - message);
+}
+
+// Prints a TestPartResult object.
+std::ostream& operator<<(std::ostream& os, const TestPartResult& result) {
+  return os
+      << result.file_name() << ":" << result.line_number() << ": "
+      << (result.type() == TestPartResult::kSuccess ? "Success" :
+          result.type() == TestPartResult::kFatalFailure ? "Fatal failure" :
+          "Non-fatal failure") << ":\n"
+      << result.message() << std::endl;
+}
+
+// Appends a TestPartResult to the array.
+void TestPartResultArray::Append(const TestPartResult& result) {
+  array_.push_back(result);
+}
+
+// Returns the TestPartResult at the given index (0-based).
+const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const {
+  if (index < 0 || index >= size()) {
+    printf("\nInvalid index (%d) into TestPartResultArray.\n", index);
+    internal::posix::Abort();
+  }
+
+  return array_[index];
+}
+
+// Returns the number of TestPartResult objects in the array.
+int TestPartResultArray::size() const {
+  return static_cast<int>(array_.size());
+}
+
+namespace internal {
+
+HasNewFatalFailureHelper::HasNewFatalFailureHelper()
+    : has_new_fatal_failure_(false),
+      original_reporter_(GetUnitTestImpl()->
+                         GetTestPartResultReporterForCurrentThread()) {
+  GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(this);
+}
+
+HasNewFatalFailureHelper::~HasNewFatalFailureHelper() {
+  GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(
+      original_reporter_);
+}
+
+void HasNewFatalFailureHelper::ReportTestPartResult(
+    const TestPartResult& result) {
+  if (result.fatally_failed())
+    has_new_fatal_failure_ = true;
+  original_reporter_->ReportTestPartResult(result);
+}
+
+}  // namespace internal
+
+}  // namespace testing
diff --git a/final/utils/unittest/googletest/gtest-typed-test.cc b/final/utils/unittest/googletest/gtest-typed-test.cc
new file mode 100644
index 00000000000..3cc4b5de2aa
--- /dev/null
+++ b/final/utils/unittest/googletest/gtest-typed-test.cc
@@ -0,0 +1,110 @@
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+#include <gtest/gtest-typed-test.h>
+#include <gtest/gtest.h>
+
+namespace testing {
+namespace internal {
+
+#if GTEST_HAS_TYPED_TEST_P
+
+// Skips to the first non-space char in str. Returns an empty string if str
+// contains only whitespace characters.
+static const char* SkipSpaces(const char* str) {
+  while (isspace(*str))
+    str++;
+  return str;
+}
+
+// Verifies that registered_tests match the test names in
+// defined_test_names_; returns registered_tests if successful, or
+// aborts the program otherwise.
+const char* TypedTestCasePState::VerifyRegisteredTestNames(
+    const char* file, int line, const char* registered_tests) {
+  typedef ::std::set<const char*>::const_iterator DefinedTestIter;
+  registered_ = true;
+
+  // Skip initial whitespace in registered_tests since some
+  // preprocessors prefix stringizied literals with whitespace.
+  registered_tests = SkipSpaces(registered_tests);
+
+  Message errors;
+  ::std::set<String> tests;
+  for (const char* names = registered_tests; names != NULL;
+       names = SkipComma(names)) {
+    const String name = GetPrefixUntilComma(names);
+    if (tests.count(name) != 0) {
+      errors << "Test " << name << " is listed more than once.\n";
+      continue;
+    }
+
+    bool found = false;
+    for (DefinedTestIter it = defined_test_names_.begin();
+         it != defined_test_names_.end();
+         ++it) {
+      if (name == *it) {
+        found = true;
+        break;
+      }
+    }
+
+    if (found) {
+      tests.insert(name);
+    } else {
+      errors << "No test named " << name
+             << " can be found in this test case.\n";
+    }
+  }
+
+  for (DefinedTestIter it = defined_test_names_.begin();
+       it != defined_test_names_.end();
+       ++it) {
+    if (tests.count(*it) == 0) {
+      errors << "You forgot to list test " << *it << ".\n";
+    }
+  }
+
+  const String& errors_str = errors.GetString();
+  if (errors_str != "") {
+    fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(),
+            errors_str.c_str());
+    fflush(stderr);
+    posix::Abort();
+  }
+
+  return registered_tests;
+}
+
+#endif  // GTEST_HAS_TYPED_TEST_P
+
+}  // namespace internal
+}  // namespace testing
diff --git a/final/utils/unittest/googletest/gtest.cc b/final/utils/unittest/googletest/gtest.cc
new file mode 100644
index 00000000000..51732afd499
--- /dev/null
+++ b/final/utils/unittest/googletest/gtest.cc
@@ -0,0 +1,4675 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// The Google C++ Testing Framework (Google Test)
+
+#include <gtest/gtest.h>
+#include <gtest/gtest-spi.h>
+
+#include <ctype.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include <algorithm>
+#include <ostream>
+#include <sstream>
+#include <vector>
+
+#if GTEST_OS_LINUX
+
+// TODO(kenton@google.com): Use autoconf to detect availability of
+// gettimeofday().
+#define GTEST_HAS_GETTIMEOFDAY_ 1
+
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+// Declares vsnprintf().  This header is not available on Windows.
+#include <strings.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <string>
+#include <vector>
+
+#elif GTEST_OS_SYMBIAN
+#define GTEST_HAS_GETTIMEOFDAY_ 1
+#include <sys/time.h>  // NOLINT
+
+#elif GTEST_OS_ZOS
+#define GTEST_HAS_GETTIMEOFDAY_ 1
+#include <sys/time.h>  // NOLINT
+
+// On z/OS we additionally need strings.h for strcasecmp.
+#include <strings.h>  // NOLINT
+
+#elif GTEST_OS_WINDOWS_MOBILE  // We are on Windows CE.
+
+#include <windows.h>  // NOLINT
+
+#elif GTEST_OS_WINDOWS  // We are on Windows proper.
+
+#include <io.h>  // NOLINT
+#include <sys/timeb.h>  // NOLINT
+#include <sys/types.h>  // NOLINT
+#include <sys/stat.h>  // NOLINT
+
+#if GTEST_OS_WINDOWS_MINGW
+// MinGW has gettimeofday() but not _ftime64().
+// TODO(kenton@google.com): Use autoconf to detect availability of
+//   gettimeofday().
+// TODO(kenton@google.com): There are other ways to get the time on
+//   Windows, like GetTickCount() or GetSystemTimeAsFileTime().  MinGW
+//   supports these.  consider using them instead.
+#define GTEST_HAS_GETTIMEOFDAY_ 1
+#include <sys/time.h>  // NOLINT
+#endif  // GTEST_OS_WINDOWS_MINGW
+
+// cpplint thinks that the header is already included, so we want to
+// silence it.
+#include <windows.h>  // NOLINT
+
+#else
+
+// Assume other platforms have gettimeofday().
+// TODO(kenton@google.com): Use autoconf to detect availability of
+//   gettimeofday().
+#define GTEST_HAS_GETTIMEOFDAY_ 1
+
+// cpplint thinks that the header is already included, so we want to
+// silence it.
+#include <sys/time.h>  // NOLINT
+#include <unistd.h>  // NOLINT
+
+#endif  // GTEST_OS_LINUX
+
+#if GTEST_HAS_EXCEPTIONS
+#include <stdexcept>
+#endif
+
+// Indicates that this translation unit is part of Google Test's
+// implementation.  It must come before gtest-internal-inl.h is
+// included, or there will be a compiler error.  This trick is to
+// prevent a user from accidentally including gtest-internal-inl.h in
+// his code.
+#define GTEST_IMPLEMENTATION_ 1
+#include "gtest/internal/gtest-internal-inl.h"
+#undef GTEST_IMPLEMENTATION_
+
+#if GTEST_OS_WINDOWS
+#define vsnprintf _vsnprintf
+#endif  // GTEST_OS_WINDOWS
+
+namespace testing {
+
+using internal::CountIf;
+using internal::ForEach;
+using internal::GetElementOr;
+using internal::Shuffle;
+
+// Constants.
+
+// A test whose test case name or test name matches this filter is
+// disabled and not run.
+static const char kDisableTestFilter[] = "DISABLED_*:*/DISABLED_*";
+
+// A test case whose name matches this filter is considered a death
+// test case and will be run before test cases whose name doesn't
+// match this filter.
+static const char kDeathTestCaseFilter[] = "*DeathTest:*DeathTest/*";
+
+// A test filter that matches everything.
+static const char kUniversalFilter[] = "*";
+
+// The default output file for XML output.
+static const char kDefaultOutputFile[] = "test_detail.xml";
+
+// The environment variable name for the test shard index.
+static const char kTestShardIndex[] = "GTEST_SHARD_INDEX";
+// The environment variable name for the total number of test shards.
+static const char kTestTotalShards[] = "GTEST_TOTAL_SHARDS";
+// The environment variable name for the test shard status file.
+static const char kTestShardStatusFile[] = "GTEST_SHARD_STATUS_FILE";
+
+namespace internal {
+
+// The text used in failure messages to indicate the start of the
+// stack trace.
+const char kStackTraceMarker[] = "\nStack trace:\n";
+
+// g_help_flag is true iff the --help flag or an equivalent form is
+// specified on the command line.
+bool g_help_flag = false;
+
+}  // namespace internal
+
+GTEST_DEFINE_bool_(
+    also_run_disabled_tests,
+    internal::BoolFromGTestEnv("also_run_disabled_tests", false),
+    "Run disabled tests too, in addition to the tests normally being run.");
+
+GTEST_DEFINE_bool_(
+    break_on_failure,
+    internal::BoolFromGTestEnv("break_on_failure", false),
+    "True iff a failed assertion should be a debugger break-point.");
+
+GTEST_DEFINE_bool_(
+    catch_exceptions,
+    internal::BoolFromGTestEnv("catch_exceptions", false),
+    "True iff " GTEST_NAME_
+    " should catch exceptions and treat them as test failures.");
+
+GTEST_DEFINE_string_(
+    color,
+    internal::StringFromGTestEnv("color", "auto"),
+    "Whether to use colors in the output.  Valid values: yes, no, "
+    "and auto.  'auto' means to use colors if the output is "
+    "being sent to a terminal and the TERM environment variable "
+    "is set to xterm, xterm-color, xterm-256color, linux or cygwin.");
+
+GTEST_DEFINE_string_(
+    filter,
+    internal::StringFromGTestEnv("filter", kUniversalFilter),
+    "A colon-separated list of glob (not regex) patterns "
+    "for filtering the tests to run, optionally followed by a "
+    "'-' and a : separated list of negative patterns (tests to "
+    "exclude).  A test is run if it matches one of the positive "
+    "patterns and does not match any of the negative patterns.");
+
+GTEST_DEFINE_bool_(list_tests, false,
+                   "List all tests without running them.");
+
+GTEST_DEFINE_string_(
+    output,
+    internal::StringFromGTestEnv("output", ""),
+    "A format (currently must be \"xml\"), optionally followed "
+    "by a colon and an output file name or directory. A directory "
+    "is indicated by a trailing pathname separator. "
+    "Examples: \"xml:filename.xml\", \"xml::directoryname/\". "
+    "If a directory is specified, output files will be created "
+    "within that directory, with file-names based on the test "
+    "executable's name and, if necessary, made unique by adding "
+    "digits.");
+
+GTEST_DEFINE_bool_(
+    print_time,
+    internal::BoolFromGTestEnv("print_time", true),
+    "True iff " GTEST_NAME_
+    " should display elapsed time in text output.");
+
+GTEST_DEFINE_int32_(
+    random_seed,
+    internal::Int32FromGTestEnv("random_seed", 0),
+    "Random number seed to use when shuffling test orders.  Must be in range "
+    "[1, 99999], or 0 to use a seed based on the current time.");
+
+GTEST_DEFINE_int32_(
+    repeat,
+    internal::Int32FromGTestEnv("repeat", 1),
+    "How many times to repeat each test.  Specify a negative number "
+    "for repeating forever.  Useful for shaking out flaky tests.");
+
+GTEST_DEFINE_bool_(
+    show_internal_stack_frames, false,
+    "True iff " GTEST_NAME_ " should include internal stack frames when "
+    "printing test failure stack traces.");
+
+GTEST_DEFINE_bool_(
+    shuffle,
+    internal::BoolFromGTestEnv("shuffle", false),
+    "True iff " GTEST_NAME_
+    " should randomize tests' order on every run.");
+
+GTEST_DEFINE_int32_(
+    stack_trace_depth,
+    internal::Int32FromGTestEnv("stack_trace_depth", kMaxStackTraceDepth),
+    "The maximum number of stack frames to print when an "
+    "assertion fails.  The valid range is 0 through 100, inclusive.");
+
+GTEST_DEFINE_bool_(
+    throw_on_failure,
+    internal::BoolFromGTestEnv("throw_on_failure", false),
+    "When this flag is specified, a failed assertion will throw an exception "
+    "if exceptions are enabled or exit the program with a non-zero code "
+    "otherwise.");
+
+namespace internal {
+
+// Generates a random number from [0, range), using a Linear
+// Congruential Generator (LCG).  Crashes if 'range' is 0 or greater
+// than kMaxRange.
+UInt32 Random::Generate(UInt32 range) {
+  // These constants are the same as are used in glibc's rand(3).
+  state_ = (1103515245U*state_ + 12345U) % kMaxRange;
+
+  GTEST_CHECK_(range > 0)
+      << "Cannot generate a number in the range [0, 0).";
+  GTEST_CHECK_(range <= kMaxRange)
+      << "Generation of a number in [0, " << range << ") was requested, "
+      << "but this can only generate numbers in [0, " << kMaxRange << ").";
+
+  // Converting via modulus introduces a bit of downward bias, but
+  // it's simple, and a linear congruential generator isn't too good
+  // to begin with.
+  return state_ % range;
+}
+
+// GTestIsInitialized() returns true iff the user has initialized
+// Google Test.  Useful for catching the user mistake of not initializing
+// Google Test before calling RUN_ALL_TESTS().
+//
+// A user must call testing::InitGoogleTest() to initialize Google
+// Test.  g_init_gtest_count is set to the number of times
+// InitGoogleTest() has been called.  We don't protect this variable
+// under a mutex as it is only accessed in the main thread.
+int g_init_gtest_count = 0;
+static bool GTestIsInitialized() { return g_init_gtest_count != 0; }
+
+// Iterates over a vector of TestCases, keeping a running sum of the
+// results of calling a given int-returning method on each.
+// Returns the sum.
+static int SumOverTestCaseList(const std::vector<TestCase*>& case_list,
+                               int (TestCase::*method)() const) {
+  int sum = 0;
+  for (size_t i = 0; i < case_list.size(); i++) {
+    sum += (case_list[i]->*method)();
+  }
+  return sum;
+}
+
+// Returns true iff the test case passed.
+static bool TestCasePassed(const TestCase* test_case) {
+  return test_case->should_run() && test_case->Passed();
+}
+
+// Returns true iff the test case failed.
+static bool TestCaseFailed(const TestCase* test_case) {
+  return test_case->should_run() && test_case->Failed();
+}
+
+// Returns true iff test_case contains at least one test that should
+// run.
+static bool ShouldRunTestCase(const TestCase* test_case) {
+  return test_case->should_run();
+}
+
+// AssertHelper constructor.
+AssertHelper::AssertHelper(TestPartResult::Type type,
+                           const char* file,
+                           int line,
+                           const char* message)
+    : data_(new AssertHelperData(type, file, line, message)) {
+}
+
+AssertHelper::~AssertHelper() {
+  delete data_;
+}
+
+// Message assignment, for assertion streaming support.
+void AssertHelper::operator=(const Message& message) const {
+  UnitTest::GetInstance()->
+    AddTestPartResult(data_->type, data_->file, data_->line,
+                      AppendUserMessage(data_->message, message),
+                      UnitTest::GetInstance()->impl()
+                      ->CurrentOsStackTraceExceptTop(1)
+                      // Skips the stack frame for this function itself.
+                      );  // NOLINT
+}
+
+// Mutex for linked pointers.
+GTEST_DEFINE_STATIC_MUTEX_(g_linked_ptr_mutex);
+
+// Application pathname gotten in InitGoogleTest.
+String g_executable_path;
+
+// Returns the current application's name, removing directory path if that
+// is present.
+FilePath GetCurrentExecutableName() {
+  FilePath result;
+
+#if GTEST_OS_WINDOWS
+  result.Set(FilePath(g_executable_path).RemoveExtension("exe"));
+#else
+  result.Set(FilePath(g_executable_path));
+#endif  // GTEST_OS_WINDOWS
+
+  return result.RemoveDirectoryName();
+}
+
+// Functions for processing the gtest_output flag.
+
+// Returns the output format, or "" for normal printed output.
+String UnitTestOptions::GetOutputFormat() {
+  const char* const gtest_output_flag = GTEST_FLAG(output).c_str();
+  if (gtest_output_flag == NULL) return String("");
+
+  const char* const colon = strchr(gtest_output_flag, ':');
+  return (colon == NULL) ?
+      String(gtest_output_flag) :
+      String(gtest_output_flag, colon - gtest_output_flag);
+}
+
+// Returns the name of the requested output file, or the default if none
+// was explicitly specified.
+String UnitTestOptions::GetAbsolutePathToOutputFile() {
+  const char* const gtest_output_flag = GTEST_FLAG(output).c_str();
+  if (gtest_output_flag == NULL)
+    return String("");
+
+  const char* const colon = strchr(gtest_output_flag, ':');
+  if (colon == NULL)
+    return String(internal::FilePath::ConcatPaths(
+               internal::FilePath(
+                   UnitTest::GetInstance()->original_working_dir()),
+               internal::FilePath(kDefaultOutputFile)).ToString() );
+
+  internal::FilePath output_name(colon + 1);
+  if (!output_name.IsAbsolutePath())
+    // TODO(wan@google.com): on Windows \some\path is not an absolute
+    // path (as its meaning depends on the current drive), yet the
+    // following logic for turning it into an absolute path is wrong.
+    // Fix it.
+    output_name = internal::FilePath::ConcatPaths(
+        internal::FilePath(UnitTest::GetInstance()->original_working_dir()),
+        internal::FilePath(colon + 1));
+
+  if (!output_name.IsDirectory())
+    return output_name.ToString();
+
+  internal::FilePath result(internal::FilePath::GenerateUniqueFileName(
+      output_name, internal::GetCurrentExecutableName(),
+      GetOutputFormat().c_str()));
+  return result.ToString();
+}
+
+// Returns true iff the wildcard pattern matches the string.  The
+// first ':' or '\0' character in pattern marks the end of it.
+//
+// This recursive algorithm isn't very efficient, but is clear and
+// works well enough for matching test names, which are short.
+bool UnitTestOptions::PatternMatchesString(const char *pattern,
+                                           const char *str) {
+  switch (*pattern) {
+    case '\0':
+    case ':':  // Either ':' or '\0' marks the end of the pattern.
+      return *str == '\0';
+    case '?':  // Matches any single character.
+      return *str != '\0' && PatternMatchesString(pattern + 1, str + 1);
+    case '*':  // Matches any string (possibly empty) of characters.
+      return (*str != '\0' && PatternMatchesString(pattern, str + 1)) ||
+          PatternMatchesString(pattern + 1, str);
+    default:  // Non-special character.  Matches itself.
+      return *pattern == *str &&
+          PatternMatchesString(pattern + 1, str + 1);
+  }
+}
+
+bool UnitTestOptions::MatchesFilter(const String& name, const char* filter) {
+  const char *cur_pattern = filter;
+  for (;;) {
+    if (PatternMatchesString(cur_pattern, name.c_str())) {
+      return true;
+    }
+
+    // Finds the next pattern in the filter.
+    cur_pattern = strchr(cur_pattern, ':');
+
+    // Returns if no more pattern can be found.
+    if (cur_pattern == NULL) {
+      return false;
+    }
+
+    // Skips the pattern separater (the ':' character).
+    cur_pattern++;
+  }
+}
+
+// TODO(keithray): move String function implementations to gtest-string.cc.
+
+// Returns true iff the user-specified filter matches the test case
+// name and the test name.
+bool UnitTestOptions::FilterMatchesTest(const String &test_case_name,
+                                        const String &test_name) {
+  const String& full_name = String::Format("%s.%s",
+                                           test_case_name.c_str(),
+                                           test_name.c_str());
+
+  // Split --gtest_filter at '-', if there is one, to separate into
+  // positive filter and negative filter portions
+  const char* const p = GTEST_FLAG(filter).c_str();
+  const char* const dash = strchr(p, '-');
+  String positive;
+  String negative;
+  if (dash == NULL) {
+    positive = GTEST_FLAG(filter).c_str();  // Whole string is a positive filter
+    negative = String("");
+  } else {
+    positive = String(p, dash - p);  // Everything up to the dash
+    negative = String(dash+1);       // Everything after the dash
+    if (positive.empty()) {
+      // Treat '-test1' as the same as '*-test1'
+      positive = kUniversalFilter;
+    }
+  }
+
+  // A filter is a colon-separated list of patterns.  It matches a
+  // test if any pattern in it matches the test.
+  return (MatchesFilter(full_name, positive.c_str()) &&
+          !MatchesFilter(full_name, negative.c_str()));
+}
+
+#if GTEST_OS_WINDOWS
+// Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the
+// given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise.
+// This function is useful as an __except condition.
+int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) {
+  // Google Test should handle an exception if:
+  //   1. the user wants it to, AND
+  //   2. this is not a breakpoint exception.
+  return (GTEST_FLAG(catch_exceptions) &&
+          exception_code != EXCEPTION_BREAKPOINT) ?
+      EXCEPTION_EXECUTE_HANDLER :
+      EXCEPTION_CONTINUE_SEARCH;
+}
+#endif  // GTEST_OS_WINDOWS
+
+}  // namespace internal
+
+// The c'tor sets this object as the test part result reporter used by
+// Google Test.  The 'result' parameter specifies where to report the
+// results. Intercepts only failures from the current thread.
+ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
+    TestPartResultArray* result)
+    : intercept_mode_(INTERCEPT_ONLY_CURRENT_THREAD),
+      result_(result) {
+  Init();
+}
+
+// The c'tor sets this object as the test part result reporter used by
+// Google Test.  The 'result' parameter specifies where to report the
+// results.
+ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter(
+    InterceptMode intercept_mode, TestPartResultArray* result)
+    : intercept_mode_(intercept_mode),
+      result_(result) {
+  Init();
+}
+
+void ScopedFakeTestPartResultReporter::Init() {
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+  if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
+    old_reporter_ = impl->GetGlobalTestPartResultReporter();
+    impl->SetGlobalTestPartResultReporter(this);
+  } else {
+    old_reporter_ = impl->GetTestPartResultReporterForCurrentThread();
+    impl->SetTestPartResultReporterForCurrentThread(this);
+  }
+}
+
+// The d'tor restores the test part result reporter used by Google Test
+// before.
+ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() {
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+  if (intercept_mode_ == INTERCEPT_ALL_THREADS) {
+    impl->SetGlobalTestPartResultReporter(old_reporter_);
+  } else {
+    impl->SetTestPartResultReporterForCurrentThread(old_reporter_);
+  }
+}
+
+// Increments the test part result count and remembers the result.
+// This method is from the TestPartResultReporterInterface interface.
+void ScopedFakeTestPartResultReporter::ReportTestPartResult(
+    const TestPartResult& result) {
+  result_->Append(result);
+}
+
+namespace internal {
+
+// Returns the type ID of ::testing::Test.  We should always call this
+// instead of GetTypeId< ::testing::Test>() to get the type ID of
+// testing::Test.  This is to work around a suspected linker bug when
+// using Google Test as a framework on Mac OS X.  The bug causes
+// GetTypeId< ::testing::Test>() to return different values depending
+// on whether the call is from the Google Test framework itself or
+// from user test code.  GetTestTypeId() is guaranteed to always
+// return the same value, as it always calls GetTypeId<>() from the
+// gtest.cc, which is within the Google Test framework.
+TypeId GetTestTypeId() {
+  return GetTypeId<Test>();
+}
+
+// The value of GetTestTypeId() as seen from within the Google Test
+// library.  This is solely for testing GetTestTypeId().
+const TypeId kTestTypeIdInGoogleTest = GetTestTypeId();
+
+// This predicate-formatter checks that 'results' contains a test part
+// failure of the given type and that the failure message contains the
+// given substring.
+AssertionResult HasOneFailure(const char* /* results_expr */,
+                              const char* /* type_expr */,
+                              const char* /* substr_expr */,
+                              const TestPartResultArray& results,
+                              TestPartResult::Type type,
+                              const char* substr) {
+  const String expected(type == TestPartResult::kFatalFailure ?
+                        "1 fatal failure" :
+                        "1 non-fatal failure");
+  Message msg;
+  if (results.size() != 1) {
+    msg << "Expected: " << expected << "\n"
+        << "  Actual: " << results.size() << " failures";
+    for (int i = 0; i < results.size(); i++) {
+      msg << "\n" << results.GetTestPartResult(i);
+    }
+    return AssertionFailure(msg);
+  }
+
+  const TestPartResult& r = results.GetTestPartResult(0);
+  if (r.type() != type) {
+    msg << "Expected: " << expected << "\n"
+        << "  Actual:\n"
+        << r;
+    return AssertionFailure(msg);
+  }
+
+  if (strstr(r.message(), substr) == NULL) {
+    msg << "Expected: " << expected << " containing \""
+        << substr << "\"\n"
+        << "  Actual:\n"
+        << r;
+    return AssertionFailure(msg);
+  }
+
+  return AssertionSuccess();
+}
+
+// The constructor of SingleFailureChecker remembers where to look up
+// test part results, what type of failure we expect, and what
+// substring the failure message should contain.
+SingleFailureChecker:: SingleFailureChecker(
+    const TestPartResultArray* results,
+    TestPartResult::Type type,
+    const char* substr)
+    : results_(results),
+      type_(type),
+      substr_(substr) {}
+
+// The destructor of SingleFailureChecker verifies that the given
+// TestPartResultArray contains exactly one failure that has the given
+// type and contains the given substring.  If that's not the case, a
+// non-fatal failure will be generated.
+SingleFailureChecker::~SingleFailureChecker() {
+  EXPECT_PRED_FORMAT3(HasOneFailure, *results_, type_, substr_.c_str());
+}
+
+DefaultGlobalTestPartResultReporter::DefaultGlobalTestPartResultReporter(
+    UnitTestImpl* unit_test) : unit_test_(unit_test) {}
+
+void DefaultGlobalTestPartResultReporter::ReportTestPartResult(
+    const TestPartResult& result) {
+  unit_test_->current_test_result()->AddTestPartResult(result);
+  unit_test_->listeners()->repeater()->OnTestPartResult(result);
+}
+
+DefaultPerThreadTestPartResultReporter::DefaultPerThreadTestPartResultReporter(
+    UnitTestImpl* unit_test) : unit_test_(unit_test) {}
+
+void DefaultPerThreadTestPartResultReporter::ReportTestPartResult(
+    const TestPartResult& result) {
+  unit_test_->GetGlobalTestPartResultReporter()->ReportTestPartResult(result);
+}
+
+// Returns the global test part result reporter.
+TestPartResultReporterInterface*
+UnitTestImpl::GetGlobalTestPartResultReporter() {
+  internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
+  return global_test_part_result_repoter_;
+}
+
+// Sets the global test part result reporter.
+void UnitTestImpl::SetGlobalTestPartResultReporter(
+    TestPartResultReporterInterface* reporter) {
+  internal::MutexLock lock(&global_test_part_result_reporter_mutex_);
+  global_test_part_result_repoter_ = reporter;
+}
+
+// Returns the test part result reporter for the current thread.
+TestPartResultReporterInterface*
+UnitTestImpl::GetTestPartResultReporterForCurrentThread() {
+  return per_thread_test_part_result_reporter_.get();
+}
+
+// Sets the test part result reporter for the current thread.
+void UnitTestImpl::SetTestPartResultReporterForCurrentThread(
+    TestPartResultReporterInterface* reporter) {
+  per_thread_test_part_result_reporter_.set(reporter);
+}
+
+// Gets the number of successful test cases.
+int UnitTestImpl::successful_test_case_count() const {
+  return CountIf(test_cases_, TestCasePassed);
+}
+
+// Gets the number of failed test cases.
+int UnitTestImpl::failed_test_case_count() const {
+  return CountIf(test_cases_, TestCaseFailed);
+}
+
+// Gets the number of all test cases.
+int UnitTestImpl::total_test_case_count() const {
+  return static_cast<int>(test_cases_.size());
+}
+
+// Gets the number of all test cases that contain at least one test
+// that should run.
+int UnitTestImpl::test_case_to_run_count() const {
+  return CountIf(test_cases_, ShouldRunTestCase);
+}
+
+// Gets the number of successful tests.
+int UnitTestImpl::successful_test_count() const {
+  return SumOverTestCaseList(test_cases_, &TestCase::successful_test_count);
+}
+
+// Gets the number of failed tests.
+int UnitTestImpl::failed_test_count() const {
+  return SumOverTestCaseList(test_cases_, &TestCase::failed_test_count);
+}
+
+// Gets the number of disabled tests.
+int UnitTestImpl::disabled_test_count() const {
+  return SumOverTestCaseList(test_cases_, &TestCase::disabled_test_count);
+}
+
+// Gets the number of all tests.
+int UnitTestImpl::total_test_count() const {
+  return SumOverTestCaseList(test_cases_, &TestCase::total_test_count);
+}
+
+// Gets the number of tests that should run.
+int UnitTestImpl::test_to_run_count() const {
+  return SumOverTestCaseList(test_cases_, &TestCase::test_to_run_count);
+}
+
+// Returns the current OS stack trace as a String.
+//
+// The maximum number of stack frames to be included is specified by
+// the gtest_stack_trace_depth flag.  The skip_count parameter
+// specifies the number of top frames to be skipped, which doesn't
+// count against the number of frames to be included.
+//
+// For example, if Foo() calls Bar(), which in turn calls
+// CurrentOsStackTraceExceptTop(1), Foo() will be included in the
+// trace but Bar() and CurrentOsStackTraceExceptTop() won't.
+String UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) {
+  (void)skip_count;
+  return String("");
+}
+
+// Returns the current time in milliseconds.
+TimeInMillis GetTimeInMillis() {
+#if GTEST_OS_WINDOWS_MOBILE || defined(__BORLANDC__)
+  // Difference between 1970-01-01 and 1601-01-01 in milliseconds.
+  // http://analogous.blogspot.com/2005/04/epoch.html
+  const TimeInMillis kJavaEpochToWinFileTimeDelta =
+    static_cast<TimeInMillis>(116444736UL) * 100000UL;
+  const DWORD kTenthMicrosInMilliSecond = 10000;
+
+  SYSTEMTIME now_systime;
+  FILETIME now_filetime;
+  ULARGE_INTEGER now_int64;
+  // TODO(kenton@google.com): Shouldn't this just use
+  //   GetSystemTimeAsFileTime()?
+  GetSystemTime(&now_systime);
+  if (SystemTimeToFileTime(&now_systime, &now_filetime)) {
+    now_int64.LowPart = now_filetime.dwLowDateTime;
+    now_int64.HighPart = now_filetime.dwHighDateTime;
+    now_int64.QuadPart = (now_int64.QuadPart / kTenthMicrosInMilliSecond) -
+      kJavaEpochToWinFileTimeDelta;
+    return now_int64.QuadPart;
+  }
+  return 0;
+#elif GTEST_OS_WINDOWS && !GTEST_HAS_GETTIMEOFDAY_
+  __timeb64 now;
+#ifdef _MSC_VER
+  // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996
+  // (deprecated function) there.
+  // TODO(kenton@google.com): Use GetTickCount()?  Or use
+  //   SystemTimeToFileTime()
+#pragma warning(push)          // Saves the current warning state.
+#pragma warning(disable:4996)  // Temporarily disables warning 4996.
+  _ftime64(&now);
+#pragma warning(pop)           // Restores the warning state.
+#else
+  _ftime64(&now);
+#endif  // _MSC_VER
+  return static_cast<TimeInMillis>(now.time) * 1000 + now.millitm;
+#elif GTEST_HAS_GETTIMEOFDAY_
+  struct timeval now;
+  gettimeofday(&now, NULL);
+  return static_cast<TimeInMillis>(now.tv_sec) * 1000 + now.tv_usec / 1000;
+#else
+#error "Don't know how to get the current time on your system."
+#endif
+}
+
+// Utilities
+
+// class String
+
+// Returns the input enclosed in double quotes if it's not NULL;
+// otherwise returns "(null)".  For example, "\"Hello\"" is returned
+// for input "Hello".
+//
+// This is useful for printing a C string in the syntax of a literal.
+//
+// Known issue: escape sequences are not handled yet.
+String String::ShowCStringQuoted(const char* c_str) {
+  return c_str ? String::Format("\"%s\"", c_str) : String("(null)");
+}
+
+// Copies at most length characters from str into a newly-allocated
+// piece of memory of size length+1.  The memory is allocated with new[].
+// A terminating null byte is written to the memory, and a pointer to it
+// is returned.  If str is NULL, NULL is returned.
+static char* CloneString(const char* str, size_t length) {
+  if (str == NULL) {
+    return NULL;
+  } else {
+    char* const clone = new char[length + 1];
+    posix::StrNCpy(clone, str, length);
+    clone[length] = '\0';
+    return clone;
+  }
+}
+
+// Clones a 0-terminated C string, allocating memory using new.  The
+// caller is responsible for deleting[] the return value.  Returns the
+// cloned string, or NULL if the input is NULL.
+const char * String::CloneCString(const char* c_str) {
+  return (c_str == NULL) ?
+                    NULL : CloneString(c_str, strlen(c_str));
+}
+
+#if GTEST_OS_WINDOWS_MOBILE
+// Creates a UTF-16 wide string from the given ANSI string, allocating
+// memory using new. The caller is responsible for deleting the return
+// value using delete[]. Returns the wide string, or NULL if the
+// input is NULL.
+LPCWSTR String::AnsiToUtf16(const char* ansi) {
+  if (!ansi) return NULL;
+  const int length = strlen(ansi);
+  const int unicode_length =
+      MultiByteToWideChar(CP_ACP, 0, ansi, length,
+                          NULL, 0);
+  WCHAR* unicode = new WCHAR[unicode_length + 1];
+  MultiByteToWideChar(CP_ACP, 0, ansi, length,
+                      unicode, unicode_length);
+  unicode[unicode_length] = 0;
+  return unicode;
+}
+
+// Creates an ANSI string from the given wide string, allocating
+// memory using new. The caller is responsible for deleting the return
+// value using delete[]. Returns the ANSI string, or NULL if the
+// input is NULL.
+const char* String::Utf16ToAnsi(LPCWSTR utf16_str)  {
+  if (!utf16_str) return NULL;
+  const int ansi_length =
+      WideCharToMultiByte(CP_ACP, 0, utf16_str, -1,
+                          NULL, 0, NULL, NULL);
+  char* ansi = new char[ansi_length + 1];
+  WideCharToMultiByte(CP_ACP, 0, utf16_str, -1,
+                      ansi, ansi_length, NULL, NULL);
+  ansi[ansi_length] = 0;
+  return ansi;
+}
+
+#endif  // GTEST_OS_WINDOWS_MOBILE
+
+// Compares two C strings.  Returns true iff they have the same content.
+//
+// Unlike strcmp(), this function can handle NULL argument(s).  A NULL
+// C string is considered different to any non-NULL C string,
+// including the empty string.
+bool String::CStringEquals(const char * lhs, const char * rhs) {
+  if ( lhs == NULL ) return rhs == NULL;
+
+  if ( rhs == NULL ) return false;
+
+  return strcmp(lhs, rhs) == 0;
+}
+
+#if GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING
+
+// Converts an array of wide chars to a narrow string using the UTF-8
+// encoding, and streams the result to the given Message object.
+static void StreamWideCharsToMessage(const wchar_t* wstr, size_t length,
+                                     Message* msg) {
+  // TODO(wan): consider allowing a testing::String object to
+  // contain '\0'.  This will make it behave more like std::string,
+  // and will allow ToUtf8String() to return the correct encoding
+  // for '\0' s.t. we can get rid of the conditional here (and in
+  // several other places).
+  for (size_t i = 0; i != length; ) {  // NOLINT
+    if (wstr[i] != L'\0') {
+      *msg << WideStringToUtf8(wstr + i, static_cast<int>(length - i));
+      while (i != length && wstr[i] != L'\0')
+        i++;
+    } else {
+      *msg << '\0';
+      i++;
+    }
+  }
+}
+
+#endif  // GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING
+
+}  // namespace internal
+
+#if GTEST_HAS_STD_WSTRING
+// Converts the given wide string to a narrow string using the UTF-8
+// encoding, and streams the result to this Message object.
+Message& Message::operator <<(const ::std::wstring& wstr) {
+  internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this);
+  return *this;
+}
+#endif  // GTEST_HAS_STD_WSTRING
+
+#if GTEST_HAS_GLOBAL_WSTRING
+// Converts the given wide string to a narrow string using the UTF-8
+// encoding, and streams the result to this Message object.
+Message& Message::operator <<(const ::wstring& wstr) {
+  internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this);
+  return *this;
+}
+#endif  // GTEST_HAS_GLOBAL_WSTRING
+
+namespace internal {
+
+// Formats a value to be used in a failure message.
+
+// For a char value, we print it as a C++ char literal and as an
+// unsigned integer (both in decimal and in hexadecimal).
+String FormatForFailureMessage(char ch) {
+  const unsigned int ch_as_uint = ch;
+  // A String object cannot contain '\0', so we print "\\0" when ch is
+  // '\0'.
+  return String::Format("'%s' (%u, 0x%X)",
+                        ch ? String::Format("%c", ch).c_str() : "\\0",
+                        ch_as_uint, ch_as_uint);
+}
+
+// For a wchar_t value, we print it as a C++ wchar_t literal and as an
+// unsigned integer (both in decimal and in hexidecimal).
+String FormatForFailureMessage(wchar_t wchar) {
+  // The C++ standard doesn't specify the exact size of the wchar_t
+  // type.  It just says that it shall have the same size as another
+  // integral type, called its underlying type.
+  //
+  // Therefore, in order to print a wchar_t value in the numeric form,
+  // we first convert it to the largest integral type (UInt64) and
+  // then print the converted value.
+  //
+  // We use streaming to print the value as "%llu" doesn't work
+  // correctly with MSVC 7.1.
+  const UInt64 wchar_as_uint64 = wchar;
+  Message msg;
+  // A String object cannot contain '\0', so we print "\\0" when wchar is
+  // L'\0'.
+  char buffer[32];  // CodePointToUtf8 requires a buffer that big.
+  msg << "L'"
+      << (wchar ? CodePointToUtf8(static_cast<UInt32>(wchar), buffer) : "\\0")
+      << "' (" << wchar_as_uint64 << ", 0x" << ::std::setbase(16)
+      << wchar_as_uint64 << ")";
+  return msg.GetString();
+}
+
+}  // namespace internal
+
+// AssertionResult constructors.
+// Used in EXPECT_TRUE/FALSE(assertion_result).
+AssertionResult::AssertionResult(const AssertionResult& other)
+    : success_(other.success_),
+      message_(other.message_.get() != NULL ?
+               new internal::String(*other.message_) :
+               static_cast<internal::String*>(NULL)) {
+}
+
+// Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
+AssertionResult AssertionResult::operator!() const {
+  AssertionResult negation(!success_);
+  if (message_.get() != NULL)
+    negation << *message_;
+  return negation;
+}
+
+// Makes a successful assertion result.
+AssertionResult AssertionSuccess() {
+  return AssertionResult(true);
+}
+
+// Makes a failed assertion result.
+AssertionResult AssertionFailure() {
+  return AssertionResult(false);
+}
+
+// Makes a failed assertion result with the given failure message.
+// Deprecated; use AssertionFailure() << message.
+AssertionResult AssertionFailure(const Message& message) {
+  return AssertionFailure() << message;
+}
+
+namespace internal {
+
+// Constructs and returns the message for an equality assertion
+// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
+//
+// The first four parameters are the expressions used in the assertion
+// and their values, as strings.  For example, for ASSERT_EQ(foo, bar)
+// where foo is 5 and bar is 6, we have:
+//
+//   expected_expression: "foo"
+//   actual_expression:   "bar"
+//   expected_value:      "5"
+//   actual_value:        "6"
+//
+// The ignoring_case parameter is true iff the assertion is a
+// *_STRCASEEQ*.  When it's true, the string " (ignoring case)" will
+// be inserted into the message.
+AssertionResult EqFailure(const char* expected_expression,
+                          const char* actual_expression,
+                          const String& expected_value,
+                          const String& actual_value,
+                          bool ignoring_case) {
+  Message msg;
+  msg << "Value of: " << actual_expression;
+  if (actual_value != actual_expression) {
+    msg << "\n  Actual: " << actual_value;
+  }
+
+  msg << "\nExpected: " << expected_expression;
+  if (ignoring_case) {
+    msg << " (ignoring case)";
+  }
+  if (expected_value != expected_expression) {
+    msg << "\nWhich is: " << expected_value;
+  }
+
+  return AssertionFailure(msg);
+}
+
+// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
+String GetBoolAssertionFailureMessage(const AssertionResult& assertion_result,
+                                      const char* expression_text,
+                                      const char* actual_predicate_value,
+                                      const char* expected_predicate_value) {
+  const char* actual_message = assertion_result.message();
+  Message msg;
+  msg << "Value of: " << expression_text
+      << "\n  Actual: " << actual_predicate_value;
+  if (actual_message[0] != '\0')
+    msg << " (" << actual_message << ")";
+  msg << "\nExpected: " << expected_predicate_value;
+  return msg.GetString();
+}
+
+// Helper function for implementing ASSERT_NEAR.
+AssertionResult DoubleNearPredFormat(const char* expr1,
+                                     const char* expr2,
+                                     const char* abs_error_expr,
+                                     double val1,
+                                     double val2,
+                                     double abs_error) {
+  const double diff = fabs(val1 - val2);
+  if (diff <= abs_error) return AssertionSuccess();
+
+  // TODO(wan): do not print the value of an expression if it's
+  // already a literal.
+  Message msg;
+  msg << "The difference between " << expr1 << " and " << expr2
+      << " is " << diff << ", which exceeds " << abs_error_expr << ", where\n"
+      << expr1 << " evaluates to " << val1 << ",\n"
+      << expr2 << " evaluates to " << val2 << ", and\n"
+      << abs_error_expr << " evaluates to " << abs_error << ".";
+  return AssertionFailure(msg);
+}
+
+
+// Helper template for implementing FloatLE() and DoubleLE().
+template <typename RawType>
+AssertionResult FloatingPointLE(const char* expr1,
+                                const char* expr2,
+                                RawType val1,
+                                RawType val2) {
+  // Returns success if val1 is less than val2,
+  if (val1 < val2) {
+    return AssertionSuccess();
+  }
+
+  // or if val1 is almost equal to val2.
+  const FloatingPoint<RawType> lhs(val1), rhs(val2);
+  if (lhs.AlmostEquals(rhs)) {
+    return AssertionSuccess();
+  }
+
+  // Note that the above two checks will both fail if either val1 or
+  // val2 is NaN, as the IEEE floating-point standard requires that
+  // any predicate involving a NaN must return false.
+
+  StrStream val1_ss;
+  val1_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
+          << val1;
+
+  StrStream val2_ss;
+  val2_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
+          << val2;
+
+  Message msg;
+  msg << "Expected: (" << expr1 << ") <= (" << expr2 << ")\n"
+      << "  Actual: " << StrStreamToString(&val1_ss) << " vs "
+      << StrStreamToString(&val2_ss);
+
+  return AssertionFailure(msg);
+}
+
+}  // namespace internal
+
+// Asserts that val1 is less than, or almost equal to, val2.  Fails
+// otherwise.  In particular, it fails if either val1 or val2 is NaN.
+AssertionResult FloatLE(const char* expr1, const char* expr2,
+                        float val1, float val2) {
+  return internal::FloatingPointLE<float>(expr1, expr2, val1, val2);
+}
+
+// Asserts that val1 is less than, or almost equal to, val2.  Fails
+// otherwise.  In particular, it fails if either val1 or val2 is NaN.
+AssertionResult DoubleLE(const char* expr1, const char* expr2,
+                         double val1, double val2) {
+  return internal::FloatingPointLE<double>(expr1, expr2, val1, val2);
+}
+
+namespace internal {
+
+// The helper function for {ASSERT|EXPECT}_EQ with int or enum
+// arguments.
+AssertionResult CmpHelperEQ(const char* expected_expression,
+                            const char* actual_expression,
+                            BiggestInt expected,
+                            BiggestInt actual) {
+  if (expected == actual) {
+    return AssertionSuccess();
+  }
+
+  return EqFailure(expected_expression,
+                   actual_expression,
+                   FormatForComparisonFailureMessage(expected, actual),
+                   FormatForComparisonFailureMessage(actual, expected),
+                   false);
+}
+
+// A macro for implementing the helper functions needed to implement
+// ASSERT_?? and EXPECT_?? with integer or enum arguments.  It is here
+// just to avoid copy-and-paste of similar code.
+#define GTEST_IMPL_CMP_HELPER_(op_name, op)\
+AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
+                                   BiggestInt val1, BiggestInt val2) {\
+  if (val1 op val2) {\
+    return AssertionSuccess();\
+  } else {\
+    Message msg;\
+    msg << "Expected: (" << expr1 << ") " #op " (" << expr2\
+        << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\
+        << " vs " << FormatForComparisonFailureMessage(val2, val1);\
+    return AssertionFailure(msg);\
+  }\
+}
+
+// Implements the helper function for {ASSERT|EXPECT}_NE with int or
+// enum arguments.
+GTEST_IMPL_CMP_HELPER_(NE, !=)
+// Implements the helper function for {ASSERT|EXPECT}_LE with int or
+// enum arguments.
+GTEST_IMPL_CMP_HELPER_(LE, <=)
+// Implements the helper function for {ASSERT|EXPECT}_LT with int or
+// enum arguments.
+GTEST_IMPL_CMP_HELPER_(LT, < )
+// Implements the helper function for {ASSERT|EXPECT}_GE with int or
+// enum arguments.
+GTEST_IMPL_CMP_HELPER_(GE, >=)
+// Implements the helper function for {ASSERT|EXPECT}_GT with int or
+// enum arguments.
+GTEST_IMPL_CMP_HELPER_(GT, > )
+
+#undef GTEST_IMPL_CMP_HELPER_
+
+// The helper function for {ASSERT|EXPECT}_STREQ.
+AssertionResult CmpHelperSTREQ(const char* expected_expression,
+                               const char* actual_expression,
+                               const char* expected,
+                               const char* actual) {
+  if (String::CStringEquals(expected, actual)) {
+    return AssertionSuccess();
+  }
+
+  return EqFailure(expected_expression,
+                   actual_expression,
+                   String::ShowCStringQuoted(expected),
+                   String::ShowCStringQuoted(actual),
+                   false);
+}
+
+// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
+AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression,
+                                   const char* actual_expression,
+                                   const char* expected,
+                                   const char* actual) {
+  if (String::CaseInsensitiveCStringEquals(expected, actual)) {
+    return AssertionSuccess();
+  }
+
+  return EqFailure(expected_expression,
+                   actual_expression,
+                   String::ShowCStringQuoted(expected),
+                   String::ShowCStringQuoted(actual),
+                   true);
+}
+
+// The helper function for {ASSERT|EXPECT}_STRNE.
+AssertionResult CmpHelperSTRNE(const char* s1_expression,
+                               const char* s2_expression,
+                               const char* s1,
+                               const char* s2) {
+  if (!String::CStringEquals(s1, s2)) {
+    return AssertionSuccess();
+  } else {
+    Message msg;
+    msg << "Expected: (" << s1_expression << ") != ("
+        << s2_expression << "), actual: \""
+        << s1 << "\" vs \"" << s2 << "\"";
+    return AssertionFailure(msg);
+  }
+}
+
+// The helper function for {ASSERT|EXPECT}_STRCASENE.
+AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
+                                   const char* s2_expression,
+                                   const char* s1,
+                                   const char* s2) {
+  if (!String::CaseInsensitiveCStringEquals(s1, s2)) {
+    return AssertionSuccess();
+  } else {
+    Message msg;
+    msg << "Expected: (" << s1_expression << ") != ("
+        << s2_expression << ") (ignoring case), actual: \""
+        << s1 << "\" vs \"" << s2 << "\"";
+    return AssertionFailure(msg);
+  }
+}
+
+}  // namespace internal
+
+namespace {
+
+// Helper functions for implementing IsSubString() and IsNotSubstring().
+
+// This group of overloaded functions return true iff needle is a
+// substring of haystack.  NULL is considered a substring of itself
+// only.
+
+bool IsSubstringPred(const char* needle, const char* haystack) {
+  if (needle == NULL || haystack == NULL)
+    return needle == haystack;
+
+  return strstr(haystack, needle) != NULL;
+}
+
+bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) {
+  if (needle == NULL || haystack == NULL)
+    return needle == haystack;
+
+  return wcsstr(haystack, needle) != NULL;
+}
+
+// StringType here can be either ::std::string or ::std::wstring.
+template <typename StringType>
+bool IsSubstringPred(const StringType& needle,
+                     const StringType& haystack) {
+  return haystack.find(needle) != StringType::npos;
+}
+
+// This function implements either IsSubstring() or IsNotSubstring(),
+// depending on the value of the expected_to_be_substring parameter.
+// StringType here can be const char*, const wchar_t*, ::std::string,
+// or ::std::wstring.
+template <typename StringType>
+AssertionResult IsSubstringImpl(
+    bool expected_to_be_substring,
+    const char* needle_expr, const char* haystack_expr,
+    const StringType& needle, const StringType& haystack) {
+  if (IsSubstringPred(needle, haystack) == expected_to_be_substring)
+    return AssertionSuccess();
+
+  const bool is_wide_string = sizeof(needle[0]) > 1;
+  const char* const begin_string_quote = is_wide_string ? "L\"" : "\"";
+  return AssertionFailure(
+      Message()
+      << "Value of: " << needle_expr << "\n"
+      << "  Actual: " << begin_string_quote << needle << "\"\n"
+      << "Expected: " << (expected_to_be_substring ? "" : "not ")
+      << "a substring of " << haystack_expr << "\n"
+      << "Which is: " << begin_string_quote << haystack << "\"");
+}
+
+}  // namespace
+
+// IsSubstring() and IsNotSubstring() check whether needle is a
+// substring of haystack (NULL is considered a substring of itself
+// only), and return an appropriate error message when they fail.
+
+AssertionResult IsSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const char* needle, const char* haystack) {
+  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const wchar_t* needle, const wchar_t* haystack) {
+  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsNotSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const char* needle, const char* haystack) {
+  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsNotSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const wchar_t* needle, const wchar_t* haystack) {
+  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const ::std::string& needle, const ::std::string& haystack) {
+  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsNotSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const ::std::string& needle, const ::std::string& haystack) {
+  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
+}
+
+#if GTEST_HAS_STD_WSTRING
+AssertionResult IsSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const ::std::wstring& needle, const ::std::wstring& haystack) {
+  return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack);
+}
+
+AssertionResult IsNotSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const ::std::wstring& needle, const ::std::wstring& haystack) {
+  return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack);
+}
+#endif  // GTEST_HAS_STD_WSTRING
+
+namespace internal {
+
+#if GTEST_OS_WINDOWS
+
+namespace {
+
+// Helper function for IsHRESULT{SuccessFailure} predicates
+AssertionResult HRESULTFailureHelper(const char* expr,
+                                     const char* expected,
+                                     long hr) {  // NOLINT
+#if GTEST_OS_WINDOWS_MOBILE
+  // Windows CE doesn't support FormatMessage.
+  const char error_text[] = "";
+#else
+  // Looks up the human-readable system message for the HRESULT code
+  // and since we're not passing any params to FormatMessage, we don't
+  // want inserts expanded.
+  const DWORD kFlags = FORMAT_MESSAGE_FROM_SYSTEM |
+                       FORMAT_MESSAGE_IGNORE_INSERTS;
+  const DWORD kBufSize = 4096;  // String::Format can't exceed this length.
+  // Gets the system's human readable message string for this HRESULT.
+  char error_text[kBufSize] = { '\0' };
+  DWORD message_length = ::FormatMessageA(kFlags,
+                                          0,  // no source, we're asking system
+                                          hr,  // the error
+                                          0,  // no line width restrictions
+                                          error_text,  // output buffer
+                                          kBufSize,  // buf size
+                                          NULL);  // no arguments for inserts
+  // Trims tailing white space (FormatMessage leaves a trailing cr-lf)
+  for (; message_length && isspace(error_text[message_length - 1]);
+          --message_length) {
+    error_text[message_length - 1] = '\0';
+  }
+#endif  // GTEST_OS_WINDOWS_MOBILE
+
+  const String error_hex(String::Format("0x%08X ", hr));
+  Message msg;
+  msg << "Expected: " << expr << " " << expected << ".\n"
+      << "  Actual: " << error_hex << error_text << "\n";
+
+  return ::testing::AssertionFailure(msg);
+}
+
+}  // namespace
+
+AssertionResult IsHRESULTSuccess(const char* expr, long hr) {  // NOLINT
+  if (SUCCEEDED(hr)) {
+    return AssertionSuccess();
+  }
+  return HRESULTFailureHelper(expr, "succeeds", hr);
+}
+
+AssertionResult IsHRESULTFailure(const char* expr, long hr) {  // NOLINT
+  if (FAILED(hr)) {
+    return AssertionSuccess();
+  }
+  return HRESULTFailureHelper(expr, "fails", hr);
+}
+
+#endif  // GTEST_OS_WINDOWS
+
+// Utility functions for encoding Unicode text (wide strings) in
+// UTF-8.
+
+// A Unicode code-point can have upto 21 bits, and is encoded in UTF-8
+// like this:
+//
+// Code-point length   Encoding
+//   0 -  7 bits       0xxxxxxx
+//   8 - 11 bits       110xxxxx 10xxxxxx
+//  12 - 16 bits       1110xxxx 10xxxxxx 10xxxxxx
+//  17 - 21 bits       11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
+
+// The maximum code-point a one-byte UTF-8 sequence can represent.
+const UInt32 kMaxCodePoint1 = (static_cast<UInt32>(1) <<  7) - 1;
+
+// The maximum code-point a two-byte UTF-8 sequence can represent.
+const UInt32 kMaxCodePoint2 = (static_cast<UInt32>(1) << (5 + 6)) - 1;
+
+// The maximum code-point a three-byte UTF-8 sequence can represent.
+const UInt32 kMaxCodePoint3 = (static_cast<UInt32>(1) << (4 + 2*6)) - 1;
+
+// The maximum code-point a four-byte UTF-8 sequence can represent.
+const UInt32 kMaxCodePoint4 = (static_cast<UInt32>(1) << (3 + 3*6)) - 1;
+
+// Chops off the n lowest bits from a bit pattern.  Returns the n
+// lowest bits.  As a side effect, the original bit pattern will be
+// shifted to the right by n bits.
+inline UInt32 ChopLowBits(UInt32* bits, int n) {
+  const UInt32 low_bits = *bits & ((static_cast<UInt32>(1) << n) - 1);
+  *bits >>= n;
+  return low_bits;
+}
+
+// Converts a Unicode code point to a narrow string in UTF-8 encoding.
+// code_point parameter is of type UInt32 because wchar_t may not be
+// wide enough to contain a code point.
+// The output buffer str must containt at least 32 characters.
+// The function returns the address of the output buffer.
+// If the code_point is not a valid Unicode code point
+// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be output
+// as '(Invalid Unicode 0xXXXXXXXX)'.
+char* CodePointToUtf8(UInt32 code_point, char* str) {
+  if (code_point <= kMaxCodePoint1) {
+    str[1] = '\0';
+    str[0] = static_cast<char>(code_point);                          // 0xxxxxxx
+  } else if (code_point <= kMaxCodePoint2) {
+    str[2] = '\0';
+    str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
+    str[0] = static_cast<char>(0xC0 | code_point);                   // 110xxxxx
+  } else if (code_point <= kMaxCodePoint3) {
+    str[3] = '\0';
+    str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
+    str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
+    str[0] = static_cast<char>(0xE0 | code_point);                   // 1110xxxx
+  } else if (code_point <= kMaxCodePoint4) {
+    str[4] = '\0';
+    str[3] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
+    str[2] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
+    str[1] = static_cast<char>(0x80 | ChopLowBits(&code_point, 6));  // 10xxxxxx
+    str[0] = static_cast<char>(0xF0 | code_point);                   // 11110xxx
+  } else {
+    // The longest string String::Format can produce when invoked
+    // with these parameters is 28 character long (not including
+    // the terminating nul character). We are asking for 32 character
+    // buffer just in case. This is also enough for strncpy to
+    // null-terminate the destination string.
+    posix::StrNCpy(
+        str, String::Format("(Invalid Unicode 0x%X)", code_point).c_str(), 32);
+    str[31] = '\0';  // Makes sure no change in the format to strncpy leaves
+                     // the result unterminated.
+  }
+  return str;
+}
+
+// The following two functions only make sense if the the system
+// uses UTF-16 for wide string encoding. All supported systems
+// with 16 bit wchar_t (Windows, Cygwin, Symbian OS) do use UTF-16.
+
+// Determines if the arguments constitute UTF-16 surrogate pair
+// and thus should be combined into a single Unicode code point
+// using CreateCodePointFromUtf16SurrogatePair.
+inline bool IsUtf16SurrogatePair(wchar_t first, wchar_t second) {
+  return sizeof(wchar_t) == 2 &&
+      (first & 0xFC00) == 0xD800 && (second & 0xFC00) == 0xDC00;
+}
+
+// Creates a Unicode code point from UTF16 surrogate pair.
+inline UInt32 CreateCodePointFromUtf16SurrogatePair(wchar_t first,
+                                                    wchar_t second) {
+  const UInt32 mask = (1 << 10) - 1;
+  return (sizeof(wchar_t) == 2) ?
+      (((first & mask) << 10) | (second & mask)) + 0x10000 :
+      // This function should not be called when the condition is
+      // false, but we provide a sensible default in case it is.
+      static_cast<UInt32>(first);
+}
+
+// Converts a wide string to a narrow string in UTF-8 encoding.
+// The wide string is assumed to have the following encoding:
+//   UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
+//   UTF-32 if sizeof(wchar_t) == 4 (on Linux)
+// Parameter str points to a null-terminated wide string.
+// Parameter num_chars may additionally limit the number
+// of wchar_t characters processed. -1 is used when the entire string
+// should be processed.
+// If the string contains code points that are not valid Unicode code points
+// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
+// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
+// and contains invalid UTF-16 surrogate pairs, values in those pairs
+// will be encoded as individual Unicode characters from Basic Normal Plane.
+String WideStringToUtf8(const wchar_t* str, int num_chars) {
+  if (num_chars == -1)
+    num_chars = static_cast<int>(wcslen(str));
+
+  StrStream stream;
+  for (int i = 0; i < num_chars; ++i) {
+    UInt32 unicode_code_point;
+
+    if (str[i] == L'\0') {
+      break;
+    } else if (i + 1 < num_chars && IsUtf16SurrogatePair(str[i], str[i + 1])) {
+      unicode_code_point = CreateCodePointFromUtf16SurrogatePair(str[i],
+                                                                 str[i + 1]);
+      i++;
+    } else {
+      unicode_code_point = static_cast<UInt32>(str[i]);
+    }
+
+    char buffer[32];  // CodePointToUtf8 requires a buffer this big.
+    stream << CodePointToUtf8(unicode_code_point, buffer);
+  }
+  return StrStreamToString(&stream);
+}
+
+// Converts a wide C string to a String using the UTF-8 encoding.
+// NULL will be converted to "(null)".
+String String::ShowWideCString(const wchar_t * wide_c_str) {
+  if (wide_c_str == NULL) return String("(null)");
+
+  return String(internal::WideStringToUtf8(wide_c_str, -1).c_str());
+}
+
+// Similar to ShowWideCString(), except that this function encloses
+// the converted string in double quotes.
+String String::ShowWideCStringQuoted(const wchar_t* wide_c_str) {
+  if (wide_c_str == NULL) return String("(null)");
+
+  return String::Format("L\"%s\"",
+                        String::ShowWideCString(wide_c_str).c_str());
+}
+
+// Compares two wide C strings.  Returns true iff they have the same
+// content.
+//
+// Unlike wcscmp(), this function can handle NULL argument(s).  A NULL
+// C string is considered different to any non-NULL C string,
+// including the empty string.
+bool String::WideCStringEquals(const wchar_t * lhs, const wchar_t * rhs) {
+  if (lhs == NULL) return rhs == NULL;
+
+  if (rhs == NULL) return false;
+
+  return wcscmp(lhs, rhs) == 0;
+}
+
+// Helper function for *_STREQ on wide strings.
+AssertionResult CmpHelperSTREQ(const char* expected_expression,
+                               const char* actual_expression,
+                               const wchar_t* expected,
+                               const wchar_t* actual) {
+  if (String::WideCStringEquals(expected, actual)) {
+    return AssertionSuccess();
+  }
+
+  return EqFailure(expected_expression,
+                   actual_expression,
+                   String::ShowWideCStringQuoted(expected),
+                   String::ShowWideCStringQuoted(actual),
+                   false);
+}
+
+// Helper function for *_STRNE on wide strings.
+AssertionResult CmpHelperSTRNE(const char* s1_expression,
+                               const char* s2_expression,
+                               const wchar_t* s1,
+                               const wchar_t* s2) {
+  if (!String::WideCStringEquals(s1, s2)) {
+    return AssertionSuccess();
+  }
+
+  Message msg;
+  msg << "Expected: (" << s1_expression << ") != ("
+      << s2_expression << "), actual: "
+      << String::ShowWideCStringQuoted(s1)
+      << " vs " << String::ShowWideCStringQuoted(s2);
+  return AssertionFailure(msg);
+}
+
+// Compares two C strings, ignoring case.  Returns true iff they have
+// the same content.
+//
+// Unlike strcasecmp(), this function can handle NULL argument(s).  A
+// NULL C string is considered different to any non-NULL C string,
+// including the empty string.
+bool String::CaseInsensitiveCStringEquals(const char * lhs, const char * rhs) {
+  if (lhs == NULL)
+    return rhs == NULL;
+  if (rhs == NULL)
+    return false;
+  return posix::StrCaseCmp(lhs, rhs) == 0;
+}
+
+  // Compares two wide C strings, ignoring case.  Returns true iff they
+  // have the same content.
+  //
+  // Unlike wcscasecmp(), this function can handle NULL argument(s).
+  // A NULL C string is considered different to any non-NULL wide C string,
+  // including the empty string.
+  // NB: The implementations on different platforms slightly differ.
+  // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
+  // environment variable. On GNU platform this method uses wcscasecmp
+  // which compares according to LC_CTYPE category of the current locale.
+  // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
+  // current locale.
+bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
+                                              const wchar_t* rhs) {
+  if ( lhs == NULL ) return rhs == NULL;
+
+  if ( rhs == NULL ) return false;
+
+#if GTEST_OS_WINDOWS
+  return _wcsicmp(lhs, rhs) == 0;
+#elif GTEST_OS_LINUX
+  return wcscasecmp(lhs, rhs) == 0;
+#else
+  // Mac OS X and Cygwin don't define wcscasecmp.  Other unknown OSes
+  // may not define it either.
+  wint_t left, right;
+  do {
+    left = towlower(*lhs++);
+    right = towlower(*rhs++);
+  } while (left && left == right);
+  return left == right;
+#endif  // OS selector
+}
+
+// Compares this with another String.
+// Returns < 0 if this is less than rhs, 0 if this is equal to rhs, or > 0
+// if this is greater than rhs.
+int String::Compare(const String & rhs) const {
+  const char* const lhs_c_str = c_str();
+  const char* const rhs_c_str = rhs.c_str();
+
+  if (lhs_c_str == NULL) {
+    return rhs_c_str == NULL ? 0 : -1;  // NULL < anything except NULL
+  } else if (rhs_c_str == NULL) {
+    return 1;
+  }
+
+  const size_t shorter_str_len =
+      length() <= rhs.length() ? length() : rhs.length();
+  for (size_t i = 0; i != shorter_str_len; i++) {
+    if (lhs_c_str[i] < rhs_c_str[i]) {
+      return -1;
+    } else if (lhs_c_str[i] > rhs_c_str[i]) {
+      return 1;
+    }
+  }
+  return (length() < rhs.length()) ? -1 :
+      (length() > rhs.length()) ? 1 : 0;
+}
+
+// Returns true iff this String ends with the given suffix.  *Any*
+// String is considered to end with a NULL or empty suffix.
+bool String::EndsWith(const char* suffix) const {
+  if (suffix == NULL || CStringEquals(suffix, "")) return true;
+
+  if (c_str() == NULL) return false;
+
+  const size_t this_len = strlen(c_str());
+  const size_t suffix_len = strlen(suffix);
+  return (this_len >= suffix_len) &&
+         CStringEquals(c_str() + this_len - suffix_len, suffix);
+}
+
+// Returns true iff this String ends with the given suffix, ignoring case.
+// Any String is considered to end with a NULL or empty suffix.
+bool String::EndsWithCaseInsensitive(const char* suffix) const {
+  if (suffix == NULL || CStringEquals(suffix, "")) return true;
+
+  if (c_str() == NULL) return false;
+
+  const size_t this_len = strlen(c_str());
+  const size_t suffix_len = strlen(suffix);
+  return (this_len >= suffix_len) &&
+         CaseInsensitiveCStringEquals(c_str() + this_len - suffix_len, suffix);
+}
+
+// Formats a list of arguments to a String, using the same format
+// spec string as for printf.
+//
+// We do not use the StringPrintf class as it is not universally
+// available.
+//
+// The result is limited to 4096 characters (including the tailing 0).
+// If 4096 characters are not enough to format the input, or if
+// there's an error, "<formatting error or buffer exceeded>" is
+// returned.
+String String::Format(const char * format, ...) {
+  va_list args;
+  va_start(args, format);
+
+  char buffer[4096];
+  const int kBufferSize = sizeof(buffer)/sizeof(buffer[0]);
+
+  // MSVC 8 deprecates vsnprintf(), so we want to suppress warning
+  // 4996 (deprecated function) there.
+#ifdef _MSC_VER  // We are using MSVC.
+#pragma warning(push)          // Saves the current warning state.
+#pragma warning(disable:4996)  // Temporarily disables warning 4996.
+  const int size = vsnprintf(buffer, kBufferSize, format, args);
+#pragma warning(pop)           // Restores the warning state.
+#else  // We are not using MSVC.
+  const int size = vsnprintf(buffer, kBufferSize, format, args);
+#endif  // _MSC_VER
+  va_end(args);
+
+  // vsnprintf()'s behavior is not portable.  When the buffer is not
+  // big enough, it returns a negative value in MSVC, and returns the
+  // needed buffer size on Linux.  When there is an output error, it
+  // always returns a negative value.  For simplicity, we lump the two
+  // error cases together.
+  if (size < 0 || size >= kBufferSize) {
+    return String("<formatting error or buffer exceeded>");
+  } else {
+    return String(buffer, size);
+  }
+}
+
+// Converts the buffer in a StrStream to a String, converting NUL
+// bytes to "\\0" along the way.
+String StrStreamToString(StrStream* ss) {
+  const ::std::string& str = ss->str();
+  const char* const start = str.c_str();
+  const char* const end = start + str.length();
+
+  // We need to use a helper StrStream to do this transformation
+  // because String doesn't support push_back().
+  StrStream helper;
+  for (const char* ch = start; ch != end; ++ch) {
+    if (*ch == '\0') {
+      helper << "\\0";  // Replaces NUL with "\\0";
+    } else {
+      helper.put(*ch);
+    }
+  }
+
+  return String(helper.str().c_str());
+}
+
+// Appends the user-supplied message to the Google-Test-generated message.
+String AppendUserMessage(const String& gtest_msg,
+                         const Message& user_msg) {
+  // Appends the user message if it's non-empty.
+  const String user_msg_string = user_msg.GetString();
+  if (user_msg_string.empty()) {
+    return gtest_msg;
+  }
+
+  Message msg;
+  msg << gtest_msg << "\n" << user_msg_string;
+
+  return msg.GetString();
+}
+
+}  // namespace internal
+
+// class TestResult
+
+// Creates an empty TestResult.
+TestResult::TestResult()
+    : death_test_count_(0),
+      elapsed_time_(0) {
+}
+
+// D'tor.
+TestResult::~TestResult() {
+}
+
+// Returns the i-th test part result among all the results. i can
+// range from 0 to total_part_count() - 1. If i is not in that range,
+// aborts the program.
+const TestPartResult& TestResult::GetTestPartResult(int i) const {
+  if (i < 0 || i >= total_part_count())
+    internal::posix::Abort();
+  return test_part_results_.at(i);
+}
+
+// Returns the i-th test property. i can range from 0 to
+// test_property_count() - 1. If i is not in that range, aborts the
+// program.
+const TestProperty& TestResult::GetTestProperty(int i) const {
+  if (i < 0 || i >= test_property_count())
+    internal::posix::Abort();
+  return test_properties_.at(i);
+}
+
+// Clears the test part results.
+void TestResult::ClearTestPartResults() {
+  test_part_results_.clear();
+}
+
+// Adds a test part result to the list.
+void TestResult::AddTestPartResult(const TestPartResult& test_part_result) {
+  test_part_results_.push_back(test_part_result);
+}
+
+// Adds a test property to the list. If a property with the same key as the
+// supplied property is already represented, the value of this test_property
+// replaces the old value for that key.
+void TestResult::RecordProperty(const TestProperty& test_property) {
+  if (!ValidateTestProperty(test_property)) {
+    return;
+  }
+  internal::MutexLock lock(&test_properites_mutex_);
+  const std::vector<TestProperty>::iterator property_with_matching_key =
+      std::find_if(test_properties_.begin(), test_properties_.end(),
+                   internal::TestPropertyKeyIs(test_property.key()));
+  if (property_with_matching_key == test_properties_.end()) {
+    test_properties_.push_back(test_property);
+    return;
+  }
+  property_with_matching_key->SetValue(test_property.value());
+}
+
+// Adds a failure if the key is a reserved attribute of Google Test
+// testcase tags.  Returns true if the property is valid.
+bool TestResult::ValidateTestProperty(const TestProperty& test_property) {
+  internal::String key(test_property.key());
+  if (key == "name" || key == "status" || key == "time" || key == "classname") {
+    ADD_FAILURE()
+        << "Reserved key used in RecordProperty(): "
+        << key
+        << " ('name', 'status', 'time', and 'classname' are reserved by "
+        << GTEST_NAME_ << ")";
+    return false;
+  }
+  return true;
+}
+
+// Clears the object.
+void TestResult::Clear() {
+  test_part_results_.clear();
+  test_properties_.clear();
+  death_test_count_ = 0;
+  elapsed_time_ = 0;
+}
+
+// Returns true iff the test failed.
+bool TestResult::Failed() const {
+  for (int i = 0; i < total_part_count(); ++i) {
+    if (GetTestPartResult(i).failed())
+      return true;
+  }
+  return false;
+}
+
+// Returns true iff the test part fatally failed.
+static bool TestPartFatallyFailed(const TestPartResult& result) {
+  return result.fatally_failed();
+}
+
+// Returns true iff the test fatally failed.
+bool TestResult::HasFatalFailure() const {
+  return CountIf(test_part_results_, TestPartFatallyFailed) > 0;
+}
+
+// Returns true iff the test part non-fatally failed.
+static bool TestPartNonfatallyFailed(const TestPartResult& result) {
+  return result.nonfatally_failed();
+}
+
+// Returns true iff the test has a non-fatal failure.
+bool TestResult::HasNonfatalFailure() const {
+  return CountIf(test_part_results_, TestPartNonfatallyFailed) > 0;
+}
+
+// Gets the number of all test parts.  This is the sum of the number
+// of successful test parts and the number of failed test parts.
+int TestResult::total_part_count() const {
+  return static_cast<int>(test_part_results_.size());
+}
+
+// Returns the number of the test properties.
+int TestResult::test_property_count() const {
+  return static_cast<int>(test_properties_.size());
+}
+
+// class Test
+
+// Creates a Test object.
+
+// The c'tor saves the values of all Google Test flags.
+Test::Test()
+    : gtest_flag_saver_(new internal::GTestFlagSaver) {
+}
+
+// The d'tor restores the values of all Google Test flags.
+Test::~Test() {
+  delete gtest_flag_saver_;
+}
+
+// Sets up the test fixture.
+//
+// A sub-class may override this.
+void Test::SetUp() {
+}
+
+// Tears down the test fixture.
+//
+// A sub-class may override this.
+void Test::TearDown() {
+}
+
+// Allows user supplied key value pairs to be recorded for later output.
+void Test::RecordProperty(const char* key, const char* value) {
+  UnitTest::GetInstance()->RecordPropertyForCurrentTest(key, value);
+}
+
+// Allows user supplied key value pairs to be recorded for later output.
+void Test::RecordProperty(const char* key, int value) {
+  Message value_message;
+  value_message << value;
+  RecordProperty(key, value_message.GetString().c_str());
+}
+
+namespace internal {
+
+void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
+                                    const String& message) {
+  // This function is a friend of UnitTest and as such has access to
+  // AddTestPartResult.
+  UnitTest::GetInstance()->AddTestPartResult(
+      result_type,
+      NULL,  // No info about the source file where the exception occurred.
+      -1,    // We have no info on which line caused the exception.
+      message,
+      String());  // No stack trace, either.
+}
+
+}  // namespace internal
+
+#if GTEST_HAS_SEH
+// We are on Windows with SEH.
+
+// Adds an "exception thrown" fatal failure to the current test.
+static void AddExceptionThrownFailure(DWORD exception_code,
+                                      const char* location) {
+  Message message;
+  message << "Exception thrown with code 0x" << std::setbase(16) <<
+    exception_code << std::setbase(10) << " in " << location << ".";
+
+  internal::ReportFailureInUnknownLocation(TestPartResult::kFatalFailure,
+                                           message.GetString());
+}
+
+#endif  // GTEST_HAS_SEH
+
+// Google Test requires all tests in the same test case to use the same test
+// fixture class.  This function checks if the current test has the
+// same fixture class as the first test in the current test case.  If
+// yes, it returns true; otherwise it generates a Google Test failure and
+// returns false.
+bool Test::HasSameFixtureClass() {
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+  const TestCase* const test_case = impl->current_test_case();
+
+  // Info about the first test in the current test case.
+  const internal::TestInfoImpl* const first_test_info =
+      test_case->test_info_list()[0]->impl();
+  const internal::TypeId first_fixture_id = first_test_info->fixture_class_id();
+  const char* const first_test_name = first_test_info->name();
+
+  // Info about the current test.
+  const internal::TestInfoImpl* const this_test_info =
+      impl->current_test_info()->impl();
+  const internal::TypeId this_fixture_id = this_test_info->fixture_class_id();
+  const char* const this_test_name = this_test_info->name();
+
+  if (this_fixture_id != first_fixture_id) {
+    // Is the first test defined using TEST?
+    const bool first_is_TEST = first_fixture_id == internal::GetTestTypeId();
+    // Is this test defined using TEST?
+    const bool this_is_TEST = this_fixture_id == internal::GetTestTypeId();
+
+    if (first_is_TEST || this_is_TEST) {
+      // The user mixed TEST and TEST_F in this test case - we'll tell
+      // him/her how to fix it.
+
+      // Gets the name of the TEST and the name of the TEST_F.  Note
+      // that first_is_TEST and this_is_TEST cannot both be true, as
+      // the fixture IDs are different for the two tests.
+      const char* const TEST_name =
+          first_is_TEST ? first_test_name : this_test_name;
+      const char* const TEST_F_name =
+          first_is_TEST ? this_test_name : first_test_name;
+
+      ADD_FAILURE()
+          << "All tests in the same test case must use the same test fixture\n"
+          << "class, so mixing TEST_F and TEST in the same test case is\n"
+          << "illegal.  In test case " << this_test_info->test_case_name()
+          << ",\n"
+          << "test " << TEST_F_name << " is defined using TEST_F but\n"
+          << "test " << TEST_name << " is defined using TEST.  You probably\n"
+          << "want to change the TEST to TEST_F or move it to another test\n"
+          << "case.";
+    } else {
+      // The user defined two fixture classes with the same name in
+      // two namespaces - we'll tell him/her how to fix it.
+      ADD_FAILURE()
+          << "All tests in the same test case must use the same test fixture\n"
+          << "class.  However, in test case "
+          << this_test_info->test_case_name() << ",\n"
+          << "you defined test " << first_test_name
+          << " and test " << this_test_name << "\n"
+          << "using two different test fixture classes.  This can happen if\n"
+          << "the two classes are from different namespaces or translation\n"
+          << "units and have the same name.  You should probably rename one\n"
+          << "of the classes to put the tests into different test cases.";
+    }
+    return false;
+  }
+
+  return true;
+}
+
+// Runs the test and updates the test result.
+void Test::Run() {
+  if (!HasSameFixtureClass()) return;
+
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+#if GTEST_HAS_SEH
+  // Catch SEH-style exceptions.
+  impl->os_stack_trace_getter()->UponLeavingGTest();
+  __try {
+    SetUp();
+  } __except(internal::UnitTestOptions::GTestShouldProcessSEH(
+      GetExceptionCode())) {
+    AddExceptionThrownFailure(GetExceptionCode(), "SetUp()");
+  }
+
+  // We will run the test only if SetUp() had no fatal failure.
+  if (!HasFatalFailure()) {
+    impl->os_stack_trace_getter()->UponLeavingGTest();
+    __try {
+      TestBody();
+    } __except(internal::UnitTestOptions::GTestShouldProcessSEH(
+        GetExceptionCode())) {
+      AddExceptionThrownFailure(GetExceptionCode(), "the test body");
+    }
+  }
+
+  // However, we want to clean up as much as possible.  Hence we will
+  // always call TearDown(), even if SetUp() or the test body has
+  // failed.
+  impl->os_stack_trace_getter()->UponLeavingGTest();
+  __try {
+    TearDown();
+  } __except(internal::UnitTestOptions::GTestShouldProcessSEH(
+      GetExceptionCode())) {
+    AddExceptionThrownFailure(GetExceptionCode(), "TearDown()");
+  }
+
+#else  // We are on a compiler or platform that doesn't support SEH.
+  impl->os_stack_trace_getter()->UponLeavingGTest();
+  SetUp();
+
+  // We will run the test only if SetUp() was successful.
+  if (!HasFatalFailure()) {
+    impl->os_stack_trace_getter()->UponLeavingGTest();
+    TestBody();
+  }
+
+  // However, we want to clean up as much as possible.  Hence we will
+  // always call TearDown(), even if SetUp() or the test body has
+  // failed.
+  impl->os_stack_trace_getter()->UponLeavingGTest();
+  TearDown();
+#endif  // GTEST_HAS_SEH
+}
+
+
+// Returns true iff the current test has a fatal failure.
+bool Test::HasFatalFailure() {
+  return internal::GetUnitTestImpl()->current_test_result()->HasFatalFailure();
+}
+
+// Returns true iff the current test has a non-fatal failure.
+bool Test::HasNonfatalFailure() {
+  return internal::GetUnitTestImpl()->current_test_result()->
+      HasNonfatalFailure();
+}
+
+// class TestInfo
+
+// Constructs a TestInfo object. It assumes ownership of the test factory
+// object via impl_.
+TestInfo::TestInfo(const char* a_test_case_name,
+                   const char* a_name,
+                   const char* a_test_case_comment,
+                   const char* a_comment,
+                   internal::TypeId fixture_class_id,
+                   internal::TestFactoryBase* factory) {
+  impl_ = new internal::TestInfoImpl(this, a_test_case_name, a_name,
+                                     a_test_case_comment, a_comment,
+                                     fixture_class_id, factory);
+}
+
+// Destructs a TestInfo object.
+TestInfo::~TestInfo() {
+  delete impl_;
+}
+
+namespace internal {
+
+// Creates a new TestInfo object and registers it with Google Test;
+// returns the created object.
+//
+// Arguments:
+//
+//   test_case_name:   name of the test case
+//   name:             name of the test
+//   test_case_comment: a comment on the test case that will be included in
+//                      the test output
+//   comment:          a comment on the test that will be included in the
+//                     test output
+//   fixture_class_id: ID of the test fixture class
+//   set_up_tc:        pointer to the function that sets up the test case
+//   tear_down_tc:     pointer to the function that tears down the test case
+//   factory:          pointer to the factory that creates a test object.
+//                     The newly created TestInfo instance will assume
+//                     ownership of the factory object.
+TestInfo* MakeAndRegisterTestInfo(
+    const char* test_case_name, const char* name,
+    const char* test_case_comment, const char* comment,
+    TypeId fixture_class_id,
+    SetUpTestCaseFunc set_up_tc,
+    TearDownTestCaseFunc tear_down_tc,
+    TestFactoryBase* factory) {
+  TestInfo* const test_info =
+      new TestInfo(test_case_name, name, test_case_comment, comment,
+                   fixture_class_id, factory);
+  GetUnitTestImpl()->AddTestInfo(set_up_tc, tear_down_tc, test_info);
+  return test_info;
+}
+
+#if GTEST_HAS_PARAM_TEST
+void ReportInvalidTestCaseType(const char* test_case_name,
+                               const char* file, int line) {
+  Message errors;
+  errors
+      << "Attempted redefinition of test case " << test_case_name << ".\n"
+      << "All tests in the same test case must use the same test fixture\n"
+      << "class.  However, in test case " << test_case_name << ", you tried\n"
+      << "to define a test using a fixture class different from the one\n"
+      << "used earlier. This can happen if the two fixture classes are\n"
+      << "from different namespaces and have the same name. You should\n"
+      << "probably rename one of the classes to put the tests into different\n"
+      << "test cases.";
+
+  fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(),
+          errors.GetString().c_str());
+}
+#endif  // GTEST_HAS_PARAM_TEST
+
+}  // namespace internal
+
+// Returns the test case name.
+const char* TestInfo::test_case_name() const {
+  return impl_->test_case_name();
+}
+
+// Returns the test name.
+const char* TestInfo::name() const {
+  return impl_->name();
+}
+
+// Returns the test case comment.
+const char* TestInfo::test_case_comment() const {
+  return impl_->test_case_comment();
+}
+
+// Returns the test comment.
+const char* TestInfo::comment() const {
+  return impl_->comment();
+}
+
+// Returns true if this test should run.
+bool TestInfo::should_run() const { return impl_->should_run(); }
+
+// Returns true if this test matches the user-specified filter.
+bool TestInfo::matches_filter() const { return impl_->matches_filter(); }
+
+// Returns the result of the test.
+const TestResult* TestInfo::result() const { return impl_->result(); }
+
+// Increments the number of death tests encountered in this test so
+// far.
+int TestInfo::increment_death_test_count() {
+  return impl_->result()->increment_death_test_count();
+}
+
+namespace internal {
+
+// This method expands all parameterized tests registered with macros TEST_P
+// and INSTANTIATE_TEST_CASE_P into regular tests and registers those.
+// This will be done just once during the program runtime.
+void UnitTestImpl::RegisterParameterizedTests() {
+#if GTEST_HAS_PARAM_TEST
+  if (!parameterized_tests_registered_) {
+    parameterized_test_registry_.RegisterTests();
+    parameterized_tests_registered_ = true;
+  }
+#endif
+}
+
+// Creates the test object, runs it, records its result, and then
+// deletes it.
+void TestInfoImpl::Run() {
+  if (!should_run_) return;
+
+  // Tells UnitTest where to store test result.
+  UnitTestImpl* const impl = internal::GetUnitTestImpl();
+  impl->set_current_test_info(parent_);
+
+  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
+
+  // Notifies the unit test event listeners that a test is about to start.
+  repeater->OnTestStart(*parent_);
+
+  const TimeInMillis start = GetTimeInMillis();
+
+  impl->os_stack_trace_getter()->UponLeavingGTest();
+#if GTEST_HAS_SEH
+  // Catch SEH-style exceptions.
+  Test* test = NULL;
+
+  __try {
+    // Creates the test object.
+    test = factory_->CreateTest();
+  } __except(internal::UnitTestOptions::GTestShouldProcessSEH(
+      GetExceptionCode())) {
+    AddExceptionThrownFailure(GetExceptionCode(),
+                              "the test fixture's constructor");
+    return;
+  }
+#else  // We are on a compiler or platform that doesn't support SEH.
+
+  // TODO(wan): If test->Run() throws, test won't be deleted.  This is
+  // not a problem now as we don't use exceptions.  If we were to
+  // enable exceptions, we should revise the following to be
+  // exception-safe.
+
+  // Creates the test object.
+  Test* test = factory_->CreateTest();
+#endif  // GTEST_HAS_SEH
+
+  // Runs the test only if the constructor of the test fixture didn't
+  // generate a fatal failure.
+  if (!Test::HasFatalFailure()) {
+    test->Run();
+  }
+
+  // Deletes the test object.
+  impl->os_stack_trace_getter()->UponLeavingGTest();
+  delete test;
+  test = NULL;
+
+  result_.set_elapsed_time(GetTimeInMillis() - start);
+
+  // Notifies the unit test event listener that a test has just finished.
+  repeater->OnTestEnd(*parent_);
+
+  // Tells UnitTest to stop associating assertion results to this
+  // test.
+  impl->set_current_test_info(NULL);
+}
+
+}  // namespace internal
+
+// class TestCase
+
+// Gets the number of successful tests in this test case.
+int TestCase::successful_test_count() const {
+  return CountIf(test_info_list_, TestPassed);
+}
+
+// Gets the number of failed tests in this test case.
+int TestCase::failed_test_count() const {
+  return CountIf(test_info_list_, TestFailed);
+}
+
+int TestCase::disabled_test_count() const {
+  return CountIf(test_info_list_, TestDisabled);
+}
+
+// Get the number of tests in this test case that should run.
+int TestCase::test_to_run_count() const {
+  return CountIf(test_info_list_, ShouldRunTest);
+}
+
+// Gets the number of all tests.
+int TestCase::total_test_count() const {
+  return static_cast<int>(test_info_list_.size());
+}
+
+// Creates a TestCase with the given name.
+//
+// Arguments:
+//
+//   name:         name of the test case
+//   set_up_tc:    pointer to the function that sets up the test case
+//   tear_down_tc: pointer to the function that tears down the test case
+TestCase::TestCase(const char* a_name, const char* a_comment,
+                   Test::SetUpTestCaseFunc set_up_tc,
+                   Test::TearDownTestCaseFunc tear_down_tc)
+    : name_(a_name),
+      comment_(a_comment),
+      set_up_tc_(set_up_tc),
+      tear_down_tc_(tear_down_tc),
+      should_run_(false),
+      elapsed_time_(0) {
+}
+
+// Destructor of TestCase.
+TestCase::~TestCase() {
+  // Deletes every Test in the collection.
+  ForEach(test_info_list_, internal::Delete<TestInfo>);
+}
+
+// Returns the i-th test among all the tests. i can range from 0 to
+// total_test_count() - 1. If i is not in that range, returns NULL.
+const TestInfo* TestCase::GetTestInfo(int i) const {
+  const int index = GetElementOr(test_indices_, i, -1);
+  return index < 0 ? NULL : test_info_list_[index];
+}
+
+// Returns the i-th test among all the tests. i can range from 0 to
+// total_test_count() - 1. If i is not in that range, returns NULL.
+TestInfo* TestCase::GetMutableTestInfo(int i) {
+  const int index = GetElementOr(test_indices_, i, -1);
+  return index < 0 ? NULL : test_info_list_[index];
+}
+
+// Adds a test to this test case.  Will delete the test upon
+// destruction of the TestCase object.
+void TestCase::AddTestInfo(TestInfo * test_info) {
+  test_info_list_.push_back(test_info);
+  test_indices_.push_back(static_cast<int>(test_indices_.size()));
+}
+
+// Runs every test in this TestCase.
+void TestCase::Run() {
+  if (!should_run_) return;
+
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+  impl->set_current_test_case(this);
+
+  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
+
+  repeater->OnTestCaseStart(*this);
+  impl->os_stack_trace_getter()->UponLeavingGTest();
+  set_up_tc_();
+
+  const internal::TimeInMillis start = internal::GetTimeInMillis();
+  for (int i = 0; i < total_test_count(); i++) {
+    GetMutableTestInfo(i)->impl()->Run();
+  }
+  elapsed_time_ = internal::GetTimeInMillis() - start;
+
+  impl->os_stack_trace_getter()->UponLeavingGTest();
+  tear_down_tc_();
+  repeater->OnTestCaseEnd(*this);
+  impl->set_current_test_case(NULL);
+}
+
+// Clears the results of all tests in this test case.
+void TestCase::ClearResult() {
+  ForEach(test_info_list_, internal::TestInfoImpl::ClearTestResult);
+}
+
+// Returns true iff test passed.
+bool TestCase::TestPassed(const TestInfo * test_info) {
+  const internal::TestInfoImpl* const impl = test_info->impl();
+  return impl->should_run() && impl->result()->Passed();
+}
+
+// Returns true iff test failed.
+bool TestCase::TestFailed(const TestInfo * test_info) {
+  const internal::TestInfoImpl* const impl = test_info->impl();
+  return impl->should_run() && impl->result()->Failed();
+}
+
+// Returns true iff test is disabled.
+bool TestCase::TestDisabled(const TestInfo * test_info) {
+  return test_info->impl()->is_disabled();
+}
+
+// Returns true if the given test should run.
+bool TestCase::ShouldRunTest(const TestInfo *test_info) {
+  return test_info->impl()->should_run();
+}
+
+// Shuffles the tests in this test case.
+void TestCase::ShuffleTests(internal::Random* random) {
+  Shuffle(random, &test_indices_);
+}
+
+// Restores the test order to before the first shuffle.
+void TestCase::UnshuffleTests() {
+  for (size_t i = 0; i < test_indices_.size(); i++) {
+    test_indices_[i] = static_cast<int>(i);
+  }
+}
+
+// Formats a countable noun.  Depending on its quantity, either the
+// singular form or the plural form is used. e.g.
+//
+// FormatCountableNoun(1, "formula", "formuli") returns "1 formula".
+// FormatCountableNoun(5, "book", "books") returns "5 books".
+static internal::String FormatCountableNoun(int count,
+                                            const char * singular_form,
+                                            const char * plural_form) {
+  return internal::String::Format("%d %s", count,
+                                  count == 1 ? singular_form : plural_form);
+}
+
+// Formats the count of tests.
+static internal::String FormatTestCount(int test_count) {
+  return FormatCountableNoun(test_count, "test", "tests");
+}
+
+// Formats the count of test cases.
+static internal::String FormatTestCaseCount(int test_case_count) {
+  return FormatCountableNoun(test_case_count, "test case", "test cases");
+}
+
+// Converts a TestPartResult::Type enum to human-friendly string
+// representation.  Both kNonFatalFailure and kFatalFailure are translated
+// to "Failure", as the user usually doesn't care about the difference
+// between the two when viewing the test result.
+static const char * TestPartResultTypeToString(TestPartResult::Type type) {
+  switch (type) {
+    case TestPartResult::kSuccess:
+      return "Success";
+
+    case TestPartResult::kNonFatalFailure:
+    case TestPartResult::kFatalFailure:
+#ifdef _MSC_VER
+      return "error: ";
+#else
+      return "Failure\n";
+#endif
+  }
+
+  return "Unknown result type";
+}
+
+// Prints a TestPartResult to a String.
+static internal::String PrintTestPartResultToString(
+    const TestPartResult& test_part_result) {
+  return (Message()
+          << internal::FormatFileLocation(test_part_result.file_name(),
+                                          test_part_result.line_number())
+          << " " << TestPartResultTypeToString(test_part_result.type())
+          << test_part_result.message()).GetString();
+}
+
+// Prints a TestPartResult.
+static void PrintTestPartResult(const TestPartResult& test_part_result) {
+  const internal::String& result =
+      PrintTestPartResultToString(test_part_result);
+  printf("%s\n", result.c_str());
+  fflush(stdout);
+  // If the test program runs in Visual Studio or a debugger, the
+  // following statements add the test part result message to the Output
+  // window such that the user can double-click on it to jump to the
+  // corresponding source code location; otherwise they do nothing.
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+  // We don't call OutputDebugString*() on Windows Mobile, as printing
+  // to stdout is done by OutputDebugString() there already - we don't
+  // want the same message printed twice.
+  ::OutputDebugStringA(result.c_str());
+  ::OutputDebugStringA("\n");
+#endif
+}
+
+// class PrettyUnitTestResultPrinter
+
+namespace internal {
+
+enum GTestColor {
+  COLOR_DEFAULT,
+  COLOR_RED,
+  COLOR_GREEN,
+  COLOR_YELLOW
+};
+
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+
+// Returns the character attribute for the given color.
+WORD GetColorAttribute(GTestColor color) {
+  switch (color) {
+    case COLOR_RED:    return FOREGROUND_RED;
+    case COLOR_GREEN:  return FOREGROUND_GREEN;
+    case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
+    default:           return 0;
+  }
+}
+
+#else
+
+// Returns the ANSI color code for the given color.  COLOR_DEFAULT is
+// an invalid input.
+const char* GetAnsiColorCode(GTestColor color) {
+  switch (color) {
+    case COLOR_RED:     return "1";
+    case COLOR_GREEN:   return "2";
+    case COLOR_YELLOW:  return "3";
+    default:            return NULL;
+  };
+}
+
+#endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+
+// Returns true iff Google Test should use colors in the output.
+bool ShouldUseColor(bool stdout_is_tty) {
+  const char* const gtest_color = GTEST_FLAG(color).c_str();
+
+  if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) {
+#if GTEST_OS_WINDOWS
+    // On Windows the TERM variable is usually not set, but the
+    // console there does support colors.
+    return stdout_is_tty;
+#else
+    // On non-Windows platforms, we rely on the TERM variable.
+    const char* const term = posix::GetEnv("TERM");
+    const bool term_supports_color =
+        String::CStringEquals(term, "xterm") ||
+        String::CStringEquals(term, "xterm-color") ||
+        String::CStringEquals(term, "xterm-256color") ||
+        String::CStringEquals(term, "linux") ||
+        String::CStringEquals(term, "cygwin");
+    return stdout_is_tty && term_supports_color;
+#endif  // GTEST_OS_WINDOWS
+  }
+
+  return String::CaseInsensitiveCStringEquals(gtest_color, "yes") ||
+      String::CaseInsensitiveCStringEquals(gtest_color, "true") ||
+      String::CaseInsensitiveCStringEquals(gtest_color, "t") ||
+      String::CStringEquals(gtest_color, "1");
+  // We take "yes", "true", "t", and "1" as meaning "yes".  If the
+  // value is neither one of these nor "auto", we treat it as "no" to
+  // be conservative.
+}
+
+// Helpers for printing colored strings to stdout. Note that on Windows, we
+// cannot simply emit special characters and have the terminal change colors.
+// This routine must actually emit the characters rather than return a string
+// that would be colored when printed, as can be done on Linux.
+void ColoredPrintf(GTestColor color, const char* fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+
+#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS
+  const bool use_color = false;
+#else
+  static const bool in_color_mode =
+      ShouldUseColor(posix::IsATTY(posix::FileNo(stdout)) != 0);
+  const bool use_color = in_color_mode && (color != COLOR_DEFAULT);
+#endif  // GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS
+  // The '!= 0' comparison is necessary to satisfy MSVC 7.1.
+
+  if (!use_color) {
+    vprintf(fmt, args);
+    va_end(args);
+    return;
+  }
+
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+  const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
+
+  // Gets the current text color.
+  CONSOLE_SCREEN_BUFFER_INFO buffer_info;
+  GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
+  const WORD old_color_attrs = buffer_info.wAttributes;
+
+  // We need to flush the stream buffers into the console before each
+  // SetConsoleTextAttribute call lest it affect the text that is already
+  // printed but has not yet reached the console.
+  fflush(stdout);
+  SetConsoleTextAttribute(stdout_handle,
+                          GetColorAttribute(color) | FOREGROUND_INTENSITY);
+  vprintf(fmt, args);
+
+  fflush(stdout);
+  // Restores the text color.
+  SetConsoleTextAttribute(stdout_handle, old_color_attrs);
+#else
+  printf("\033[0;3%sm", GetAnsiColorCode(color));
+  vprintf(fmt, args);
+  printf("\033[m");  // Resets the terminal to default.
+#endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
+  va_end(args);
+}
+
+// This class implements the TestEventListener interface.
+//
+// Class PrettyUnitTestResultPrinter is copyable.
+class PrettyUnitTestResultPrinter : public TestEventListener {
+ public:
+  PrettyUnitTestResultPrinter() {}
+  static void PrintTestName(const char * test_case, const char * test) {
+    printf("%s.%s", test_case, test);
+  }
+
+  // The following methods override what's in the TestEventListener class.
+  virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
+  virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration);
+  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test);
+  virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
+  virtual void OnTestCaseStart(const TestCase& test_case);
+  virtual void OnTestStart(const TestInfo& test_info);
+  virtual void OnTestPartResult(const TestPartResult& result);
+  virtual void OnTestEnd(const TestInfo& test_info);
+  virtual void OnTestCaseEnd(const TestCase& test_case);
+  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test);
+  virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
+  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
+  virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}
+
+ private:
+  static void PrintFailedTests(const UnitTest& unit_test);
+
+  internal::String test_case_name_;
+};
+
+  // Fired before each iteration of tests starts.
+void PrettyUnitTestResultPrinter::OnTestIterationStart(
+    const UnitTest& unit_test, int iteration) {
+  if (GTEST_FLAG(repeat) != 1)
+    printf("\nRepeating all tests (iteration %d) . . .\n\n", iteration + 1);
+
+  const char* const filter = GTEST_FLAG(filter).c_str();
+
+  // Prints the filter if it's not *.  This reminds the user that some
+  // tests may be skipped.
+  if (!internal::String::CStringEquals(filter, kUniversalFilter)) {
+    ColoredPrintf(COLOR_YELLOW,
+                  "Note: %s filter = %s\n", GTEST_NAME_, filter);
+  }
+
+  if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) {
+    ColoredPrintf(COLOR_YELLOW,
+                  "Note: This is test shard %s of %s.\n",
+                  internal::posix::GetEnv(kTestShardIndex),
+                  internal::posix::GetEnv(kTestTotalShards));
+  }
+
+  if (GTEST_FLAG(shuffle)) {
+    ColoredPrintf(COLOR_YELLOW,
+                  "Note: Randomizing tests' orders with a seed of %d .\n",
+                  unit_test.random_seed());
+  }
+
+  ColoredPrintf(COLOR_GREEN,  "[==========] ");
+  printf("Running %s from %s.\n",
+         FormatTestCount(unit_test.test_to_run_count()).c_str(),
+         FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str());
+  fflush(stdout);
+}
+
+void PrettyUnitTestResultPrinter::OnEnvironmentsSetUpStart(
+    const UnitTest& /*unit_test*/) {
+  ColoredPrintf(COLOR_GREEN,  "[----------] ");
+  printf("Global test environment set-up.\n");
+  fflush(stdout);
+}
+
+void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase& test_case) {
+  test_case_name_ = test_case.name();
+  const internal::String counts =
+      FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
+  ColoredPrintf(COLOR_GREEN, "[----------] ");
+  printf("%s from %s", counts.c_str(), test_case_name_.c_str());
+  if (test_case.comment()[0] == '\0') {
+    printf("\n");
+  } else {
+    printf(", where %s\n", test_case.comment());
+  }
+  fflush(stdout);
+}
+
+void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) {
+  ColoredPrintf(COLOR_GREEN,  "[ RUN      ] ");
+  PrintTestName(test_case_name_.c_str(), test_info.name());
+  if (test_info.comment()[0] == '\0') {
+    printf("\n");
+  } else {
+    printf(", where %s\n", test_info.comment());
+  }
+  fflush(stdout);
+}
+
+// Called after an assertion failure.
+void PrettyUnitTestResultPrinter::OnTestPartResult(
+    const TestPartResult& result) {
+  // If the test part succeeded, we don't need to do anything.
+  if (result.type() == TestPartResult::kSuccess)
+    return;
+
+  // Print failure message from the assertion (e.g. expected this and got that).
+  PrintTestPartResult(result);
+  fflush(stdout);
+}
+
+void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
+  if (test_info.result()->Passed()) {
+    ColoredPrintf(COLOR_GREEN, "[       OK ] ");
+  } else {
+    ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
+  }
+  PrintTestName(test_case_name_.c_str(), test_info.name());
+  if (GTEST_FLAG(print_time)) {
+    printf(" (%s ms)\n", internal::StreamableToString(
+           test_info.result()->elapsed_time()).c_str());
+  } else {
+    printf("\n");
+  }
+  fflush(stdout);
+}
+
+void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase& test_case) {
+  if (!GTEST_FLAG(print_time)) return;
+
+  test_case_name_ = test_case.name();
+  const internal::String counts =
+      FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
+  ColoredPrintf(COLOR_GREEN, "[----------] ");
+  printf("%s from %s (%s ms total)\n\n",
+         counts.c_str(), test_case_name_.c_str(),
+         internal::StreamableToString(test_case.elapsed_time()).c_str());
+  fflush(stdout);
+}
+
+void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart(
+    const UnitTest& /*unit_test*/) {
+  ColoredPrintf(COLOR_GREEN,  "[----------] ");
+  printf("Global test environment tear-down\n");
+  fflush(stdout);
+}
+
+// Internal helper for printing the list of failed tests.
+void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) {
+  const int failed_test_count = unit_test.failed_test_count();
+  if (failed_test_count == 0) {
+    return;
+  }
+
+  for (int i = 0; i < unit_test.total_test_case_count(); ++i) {
+    const TestCase& test_case = *unit_test.GetTestCase(i);
+    if (!test_case.should_run() || (test_case.failed_test_count() == 0)) {
+      continue;
+    }
+    for (int j = 0; j < test_case.total_test_count(); ++j) {
+      const TestInfo& test_info = *test_case.GetTestInfo(j);
+      if (!test_info.should_run() || test_info.result()->Passed()) {
+        continue;
+      }
+      ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
+      printf("%s.%s", test_case.name(), test_info.name());
+      if (test_case.comment()[0] != '\0' ||
+          test_info.comment()[0] != '\0') {
+        printf(", where %s", test_case.comment());
+        if (test_case.comment()[0] != '\0' &&
+            test_info.comment()[0] != '\0') {
+          printf(" and ");
+        }
+      }
+      printf("%s\n", test_info.comment());
+    }
+  }
+}
+
+ void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
+                                                      int /*iteration*/) {
+  ColoredPrintf(COLOR_GREEN,  "[==========] ");
+  printf("%s from %s ran.",
+         FormatTestCount(unit_test.test_to_run_count()).c_str(),
+         FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str());
+  if (GTEST_FLAG(print_time)) {
+    printf(" (%s ms total)",
+           internal::StreamableToString(unit_test.elapsed_time()).c_str());
+  }
+  printf("\n");
+  ColoredPrintf(COLOR_GREEN,  "[  PASSED  ] ");
+  printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str());
+
+  int num_failures = unit_test.failed_test_count();
+  if (!unit_test.Passed()) {
+    const int failed_test_count = unit_test.failed_test_count();
+    ColoredPrintf(COLOR_RED,  "[  FAILED  ] ");
+    printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str());
+    PrintFailedTests(unit_test);
+    printf("\n%2d FAILED %s\n", num_failures,
+                        num_failures == 1 ? "TEST" : "TESTS");
+  }
+
+  int num_disabled = unit_test.disabled_test_count();
+  if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) {
+    if (!num_failures) {
+      printf("\n");  // Add a spacer if no FAILURE banner is displayed.
+    }
+    ColoredPrintf(COLOR_YELLOW,
+                  "  YOU HAVE %d DISABLED %s\n\n",
+                  num_disabled,
+                  num_disabled == 1 ? "TEST" : "TESTS");
+  }
+  // Ensure that Google Test output is printed before, e.g., heapchecker output.
+  fflush(stdout);
+}
+
+// End PrettyUnitTestResultPrinter
+
+// class TestEventRepeater
+//
+// This class forwards events to other event listeners.
+class TestEventRepeater : public TestEventListener {
+ public:
+  TestEventRepeater() : forwarding_enabled_(true) {}
+  virtual ~TestEventRepeater();
+  void Append(TestEventListener *listener);
+  TestEventListener* Release(TestEventListener* listener);
+
+  // Controls whether events will be forwarded to listeners_. Set to false
+  // in death test child processes.
+  bool forwarding_enabled() const { return forwarding_enabled_; }
+  void set_forwarding_enabled(bool enable) { forwarding_enabled_ = enable; }
+
+  virtual void OnTestProgramStart(const UnitTest& unit_test);
+  virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration);
+  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test);
+  virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test);
+  virtual void OnTestCaseStart(const TestCase& test_case);
+  virtual void OnTestStart(const TestInfo& test_info);
+  virtual void OnTestPartResult(const TestPartResult& result);
+  virtual void OnTestEnd(const TestInfo& test_info);
+  virtual void OnTestCaseEnd(const TestCase& test_case);
+  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test);
+  virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test);
+  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
+  virtual void OnTestProgramEnd(const UnitTest& unit_test);
+
+ private:
+  // Controls whether events will be forwarded to listeners_. Set to false
+  // in death test child processes.
+  bool forwarding_enabled_;
+  // The list of listeners that receive events.
+  std::vector<TestEventListener*> listeners_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventRepeater);
+};
+
+TestEventRepeater::~TestEventRepeater() {
+  ForEach(listeners_, Delete<TestEventListener>);
+}
+
+void TestEventRepeater::Append(TestEventListener *listener) {
+  listeners_.push_back(listener);
+}
+
+// TODO(vladl@google.com): Factor the search functionality into Vector::Find.
+TestEventListener* TestEventRepeater::Release(TestEventListener *listener) {
+  for (size_t i = 0; i < listeners_.size(); ++i) {
+    if (listeners_[i] == listener) {
+      listeners_.erase(listeners_.begin() + i);
+      return listener;
+    }
+  }
+
+  return NULL;
+}
+
+// Since most methods are very similar, use macros to reduce boilerplate.
+// This defines a member that forwards the call to all listeners.
+#define GTEST_REPEATER_METHOD_(Name, Type) \
+void TestEventRepeater::Name(const Type& parameter) { \
+  if (forwarding_enabled_) { \
+    for (size_t i = 0; i < listeners_.size(); i++) { \
+      listeners_[i]->Name(parameter); \
+    } \
+  } \
+}
+// This defines a member that forwards the call to all listeners in reverse
+// order.
+#define GTEST_REVERSE_REPEATER_METHOD_(Name, Type) \
+void TestEventRepeater::Name(const Type& parameter) { \
+  if (forwarding_enabled_) { \
+    for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) { \
+      listeners_[i]->Name(parameter); \
+    } \
+  } \
+}
+
+GTEST_REPEATER_METHOD_(OnTestProgramStart, UnitTest)
+GTEST_REPEATER_METHOD_(OnEnvironmentsSetUpStart, UnitTest)
+GTEST_REPEATER_METHOD_(OnTestCaseStart, TestCase)
+GTEST_REPEATER_METHOD_(OnTestStart, TestInfo)
+GTEST_REPEATER_METHOD_(OnTestPartResult, TestPartResult)
+GTEST_REPEATER_METHOD_(OnEnvironmentsTearDownStart, UnitTest)
+GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsSetUpEnd, UnitTest)
+GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsTearDownEnd, UnitTest)
+GTEST_REVERSE_REPEATER_METHOD_(OnTestEnd, TestInfo)
+GTEST_REVERSE_REPEATER_METHOD_(OnTestCaseEnd, TestCase)
+GTEST_REVERSE_REPEATER_METHOD_(OnTestProgramEnd, UnitTest)
+
+#undef GTEST_REPEATER_METHOD_
+#undef GTEST_REVERSE_REPEATER_METHOD_
+
+void TestEventRepeater::OnTestIterationStart(const UnitTest& unit_test,
+                                             int iteration) {
+  if (forwarding_enabled_) {
+    for (size_t i = 0; i < listeners_.size(); i++) {
+      listeners_[i]->OnTestIterationStart(unit_test, iteration);
+    }
+  }
+}
+
+void TestEventRepeater::OnTestIterationEnd(const UnitTest& unit_test,
+                                           int iteration) {
+  if (forwarding_enabled_) {
+    for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) {
+      listeners_[i]->OnTestIterationEnd(unit_test, iteration);
+    }
+  }
+}
+
+// End TestEventRepeater
+
+// This class generates an XML output file.
+class XmlUnitTestResultPrinter : public EmptyTestEventListener {
+ public:
+  explicit XmlUnitTestResultPrinter(const char* output_file);
+
+  virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration);
+
+ private:
+  // Is c a whitespace character that is normalized to a space character
+  // when it appears in an XML attribute value?
+  static bool IsNormalizableWhitespace(char c) {
+    return c == 0x9 || c == 0xA || c == 0xD;
+  }
+
+  // May c appear in a well-formed XML document?
+  static bool IsValidXmlCharacter(char c) {
+    return IsNormalizableWhitespace(c) || c >= 0x20;
+  }
+
+  // Returns an XML-escaped copy of the input string str.  If
+  // is_attribute is true, the text is meant to appear as an attribute
+  // value, and normalizable whitespace is preserved by replacing it
+  // with character references.
+  static String EscapeXml(const char* str, bool is_attribute);
+
+  // Returns the given string with all characters invalid in XML removed.
+  static String RemoveInvalidXmlCharacters(const char* str);
+
+  // Convenience wrapper around EscapeXml when str is an attribute value.
+  static String EscapeXmlAttribute(const char* str) {
+    return EscapeXml(str, true);
+  }
+
+  // Convenience wrapper around EscapeXml when str is not an attribute value.
+  static String EscapeXmlText(const char* str) { return EscapeXml(str, false); }
+
+  // Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
+  static void OutputXmlCDataSection(::std::ostream* stream, const char* data);
+
+  // Streams an XML representation of a TestInfo object.
+  static void OutputXmlTestInfo(::std::ostream* stream,
+                                const char* test_case_name,
+                                const TestInfo& test_info);
+
+  // Prints an XML representation of a TestCase object
+  static void PrintXmlTestCase(FILE* out, const TestCase& test_case);
+
+  // Prints an XML summary of unit_test to output stream out.
+  static void PrintXmlUnitTest(FILE* out, const UnitTest& unit_test);
+
+  // Produces a string representing the test properties in a result as space
+  // delimited XML attributes based on the property key="value" pairs.
+  // When the String is not empty, it includes a space at the beginning,
+  // to delimit this attribute from prior attributes.
+  static String TestPropertiesAsXmlAttributes(const TestResult& result);
+
+  // The output file.
+  const String output_file_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(XmlUnitTestResultPrinter);
+};
+
+// Creates a new XmlUnitTestResultPrinter.
+XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file)
+    : output_file_(output_file) {
+  if (output_file_.c_str() == NULL || output_file_.empty()) {
+    fprintf(stderr, "XML output file may not be null\n");
+    fflush(stderr);
+    exit(EXIT_FAILURE);
+  }
+}
+
+// Called after the unit test ends.
+void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
+                                                  int /*iteration*/) {
+  FILE* xmlout = NULL;
+  FilePath output_file(output_file_);
+  FilePath output_dir(output_file.RemoveFileName());
+
+  if (output_dir.CreateDirectoriesRecursively()) {
+    xmlout = posix::FOpen(output_file_.c_str(), "w");
+  }
+  if (xmlout == NULL) {
+    // TODO(wan): report the reason of the failure.
+    //
+    // We don't do it for now as:
+    //
+    //   1. There is no urgent need for it.
+    //   2. It's a bit involved to make the errno variable thread-safe on
+    //      all three operating systems (Linux, Windows, and Mac OS).
+    //   3. To interpret the meaning of errno in a thread-safe way,
+    //      we need the strerror_r() function, which is not available on
+    //      Windows.
+    fprintf(stderr,
+            "Unable to open file \"%s\"\n",
+            output_file_.c_str());
+    fflush(stderr);
+    exit(EXIT_FAILURE);
+  }
+  PrintXmlUnitTest(xmlout, unit_test);
+  fclose(xmlout);
+}
+
+// Returns an XML-escaped copy of the input string str.  If is_attribute
+// is true, the text is meant to appear as an attribute value, and
+// normalizable whitespace is preserved by replacing it with character
+// references.
+//
+// Invalid XML characters in str, if any, are stripped from the output.
+// It is expected that most, if not all, of the text processed by this
+// module will consist of ordinary English text.
+// If this module is ever modified to produce version 1.1 XML output,
+// most invalid characters can be retained using character references.
+// TODO(wan): It might be nice to have a minimally invasive, human-readable
+// escaping scheme for invalid characters, rather than dropping them.
+String XmlUnitTestResultPrinter::EscapeXml(const char* str, bool is_attribute) {
+  Message m;
+
+  if (str != NULL) {
+    for (const char* src = str; *src; ++src) {
+      switch (*src) {
+        case '<':
+          m << "&lt;";
+          break;
+        case '>':
+          m << "&gt;";
+          break;
+        case '&':
+          m << "&amp;";
+          break;
+        case '\'':
+          if (is_attribute)
+            m << "&apos;";
+          else
+            m << '\'';
+          break;
+        case '"':
+          if (is_attribute)
+            m << "&quot;";
+          else
+            m << '"';
+          break;
+        default:
+          if (IsValidXmlCharacter(*src)) {
+            if (is_attribute && IsNormalizableWhitespace(*src))
+              m << String::Format("&#x%02X;", unsigned(*src));
+            else
+              m << *src;
+          }
+          break;
+      }
+    }
+  }
+
+  return m.GetString();
+}
+
+// Returns the given string with all characters invalid in XML removed.
+// Currently invalid characters are dropped from the string. An
+// alternative is to replace them with certain characters such as . or ?.
+String XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters(const char* str) {
+  char* const output = new char[strlen(str) + 1];
+  char* appender = output;
+  for (char ch = *str; ch != '\0'; ch = *++str)
+    if (IsValidXmlCharacter(ch))
+      *appender++ = ch;
+  *appender = '\0';
+
+  String ret_value(output);
+  delete[] output;
+  return ret_value;
+}
+
+// The following routines generate an XML representation of a UnitTest
+// object.
+//
+// This is how Google Test concepts map to the DTD:
+//
+// <testsuites name="AllTests">        <-- corresponds to a UnitTest object
+//   <testsuite name="testcase-name">  <-- corresponds to a TestCase object
+//     <testcase name="test-name">     <-- corresponds to a TestInfo object
+//       <failure message="...">...</failure>
+//       <failure message="...">...</failure>
+//       <failure message="...">...</failure>
+//                                     <-- individual assertion failures
+//     </testcase>
+//   </testsuite>
+// </testsuites>
+
+// Formats the given time in milliseconds as seconds.
+std::string FormatTimeInMillisAsSeconds(TimeInMillis ms) {
+  ::std::stringstream ss;
+  ss << ms/1000.0;
+  return ss.str();
+}
+
+// Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
+void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream* stream,
+                                                     const char* data) {
+  const char* segment = data;
+  *stream << "<![CDATA[";
+  for (;;) {
+    const char* const next_segment = strstr(segment, "]]>");
+    if (next_segment != NULL) {
+      stream->write(
+          segment, static_cast<std::streamsize>(next_segment - segment));
+      *stream << "]]>]]&gt;<![CDATA[";
+      segment = next_segment + strlen("]]>");
+    } else {
+      *stream << segment;
+      break;
+    }
+  }
+  *stream << "]]>";
+}
+
+// Prints an XML representation of a TestInfo object.
+// TODO(wan): There is also value in printing properties with the plain printer.
+void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream,
+                                                 const char* test_case_name,
+                                                 const TestInfo& test_info) {
+  const TestResult& result = *test_info.result();
+  *stream << "    <testcase name=\""
+          << EscapeXmlAttribute(test_info.name()).c_str()
+          << "\" status=\""
+          << (test_info.should_run() ? "run" : "notrun")
+          << "\" time=\""
+          << FormatTimeInMillisAsSeconds(result.elapsed_time())
+          << "\" classname=\"" << EscapeXmlAttribute(test_case_name).c_str()
+          << "\"" << TestPropertiesAsXmlAttributes(result).c_str();
+
+  int failures = 0;
+  for (int i = 0; i < result.total_part_count(); ++i) {
+    const TestPartResult& part = result.GetTestPartResult(i);
+    if (part.failed()) {
+      if (++failures == 1)
+        *stream << ">\n";
+      *stream << "      <failure message=\""
+              << EscapeXmlAttribute(part.summary()).c_str()
+              << "\" type=\"\">";
+      const String message = RemoveInvalidXmlCharacters(String::Format(
+          "%s:%d\n%s",
+          part.file_name(), part.line_number(),
+          part.message()).c_str());
+      OutputXmlCDataSection(stream, message.c_str());
+      *stream << "</failure>\n";
+    }
+  }
+
+  if (failures == 0)
+    *stream << " />\n";
+  else
+    *stream << "    </testcase>\n";
+}
+
+// Prints an XML representation of a TestCase object
+void XmlUnitTestResultPrinter::PrintXmlTestCase(FILE* out,
+                                                const TestCase& test_case) {
+  fprintf(out,
+          "  <testsuite name=\"%s\" tests=\"%d\" failures=\"%d\" "
+          "disabled=\"%d\" ",
+          EscapeXmlAttribute(test_case.name()).c_str(),
+          test_case.total_test_count(),
+          test_case.failed_test_count(),
+          test_case.disabled_test_count());
+  fprintf(out,
+          "errors=\"0\" time=\"%s\">\n",
+          FormatTimeInMillisAsSeconds(test_case.elapsed_time()).c_str());
+  for (int i = 0; i < test_case.total_test_count(); ++i) {
+    StrStream stream;
+    OutputXmlTestInfo(&stream, test_case.name(), *test_case.GetTestInfo(i));
+    fprintf(out, "%s", StrStreamToString(&stream).c_str());
+  }
+  fprintf(out, "  </testsuite>\n");
+}
+
+// Prints an XML summary of unit_test to output stream out.
+void XmlUnitTestResultPrinter::PrintXmlUnitTest(FILE* out,
+                                                const UnitTest& unit_test) {
+  fprintf(out, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
+  fprintf(out,
+          "<testsuites tests=\"%d\" failures=\"%d\" disabled=\"%d\" "
+          "errors=\"0\" time=\"%s\" ",
+          unit_test.total_test_count(),
+          unit_test.failed_test_count(),
+          unit_test.disabled_test_count(),
+          FormatTimeInMillisAsSeconds(unit_test.elapsed_time()).c_str());
+  if (GTEST_FLAG(shuffle)) {
+    fprintf(out, "random_seed=\"%d\" ", unit_test.random_seed());
+  }
+  fprintf(out, "name=\"AllTests\">\n");
+  for (int i = 0; i < unit_test.total_test_case_count(); ++i)
+    PrintXmlTestCase(out, *unit_test.GetTestCase(i));
+  fprintf(out, "</testsuites>\n");
+}
+
+// Produces a string representing the test properties in a result as space
+// delimited XML attributes based on the property key="value" pairs.
+String XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes(
+    const TestResult& result) {
+  Message attributes;
+  for (int i = 0; i < result.test_property_count(); ++i) {
+    const TestProperty& property = result.GetTestProperty(i);
+    attributes << " " << property.key() << "="
+        << "\"" << EscapeXmlAttribute(property.value()) << "\"";
+  }
+  return attributes.GetString();
+}
+
+// End XmlUnitTestResultPrinter
+
+// Class ScopedTrace
+
+// Pushes the given source file location and message onto a per-thread
+// trace stack maintained by Google Test.
+// L < UnitTest::mutex_
+ScopedTrace::ScopedTrace(const char* file, int line, const Message& message) {
+  TraceInfo trace;
+  trace.file = file;
+  trace.line = line;
+  trace.message = message.GetString();
+
+  UnitTest::GetInstance()->PushGTestTrace(trace);
+}
+
+// Pops the info pushed by the c'tor.
+// L < UnitTest::mutex_
+ScopedTrace::~ScopedTrace() {
+  UnitTest::GetInstance()->PopGTestTrace();
+}
+
+
+// class OsStackTraceGetter
+
+// Returns the current OS stack trace as a String.  Parameters:
+//
+//   max_depth  - the maximum number of stack frames to be included
+//                in the trace.
+//   skip_count - the number of top frames to be skipped; doesn't count
+//                against max_depth.
+//
+// L < mutex_
+// We use "L < mutex_" to denote that the function may acquire mutex_.
+String OsStackTraceGetter::CurrentStackTrace(int, int) {
+  return String("");
+}
+
+// L < mutex_
+void OsStackTraceGetter::UponLeavingGTest() {
+}
+
+const char* const
+OsStackTraceGetter::kElidedFramesMarker =
+    "... " GTEST_NAME_ " internal frames ...";
+
+}  // namespace internal
+
+// class TestEventListeners
+
+TestEventListeners::TestEventListeners()
+    : repeater_(new internal::TestEventRepeater()),
+      default_result_printer_(NULL),
+      default_xml_generator_(NULL) {
+}
+
+TestEventListeners::~TestEventListeners() { delete repeater_; }
+
+// Returns the standard listener responsible for the default console
+// output.  Can be removed from the listeners list to shut down default
+// console output.  Note that removing this object from the listener list
+// with Release transfers its ownership to the user.
+void TestEventListeners::Append(TestEventListener* listener) {
+  repeater_->Append(listener);
+}
+
+// Removes the given event listener from the list and returns it.  It then
+// becomes the caller's responsibility to delete the listener. Returns
+// NULL if the listener is not found in the list.
+TestEventListener* TestEventListeners::Release(TestEventListener* listener) {
+  if (listener == default_result_printer_)
+    default_result_printer_ = NULL;
+  else if (listener == default_xml_generator_)
+    default_xml_generator_ = NULL;
+  return repeater_->Release(listener);
+}
+
+// Returns repeater that broadcasts the TestEventListener events to all
+// subscribers.
+TestEventListener* TestEventListeners::repeater() { return repeater_; }
+
+// Sets the default_result_printer attribute to the provided listener.
+// The listener is also added to the listener list and previous
+// default_result_printer is removed from it and deleted. The listener can
+// also be NULL in which case it will not be added to the list. Does
+// nothing if the previous and the current listener objects are the same.
+void TestEventListeners::SetDefaultResultPrinter(TestEventListener* listener) {
+  if (default_result_printer_ != listener) {
+    // It is an error to pass this method a listener that is already in the
+    // list.
+    delete Release(default_result_printer_);
+    default_result_printer_ = listener;
+    if (listener != NULL)
+      Append(listener);
+  }
+}
+
+// Sets the default_xml_generator attribute to the provided listener.  The
+// listener is also added to the listener list and previous
+// default_xml_generator is removed from it and deleted. The listener can
+// also be NULL in which case it will not be added to the list. Does
+// nothing if the previous and the current listener objects are the same.
+void TestEventListeners::SetDefaultXmlGenerator(TestEventListener* listener) {
+  if (default_xml_generator_ != listener) {
+    // It is an error to pass this method a listener that is already in the
+    // list.
+    delete Release(default_xml_generator_);
+    default_xml_generator_ = listener;
+    if (listener != NULL)
+      Append(listener);
+  }
+}
+
+// Controls whether events will be forwarded by the repeater to the
+// listeners in the list.
+bool TestEventListeners::EventForwardingEnabled() const {
+  return repeater_->forwarding_enabled();
+}
+
+void TestEventListeners::SuppressEventForwarding() {
+  repeater_->set_forwarding_enabled(false);
+}
+
+// class UnitTest
+
+// Gets the singleton UnitTest object.  The first time this method is
+// called, a UnitTest object is constructed and returned.  Consecutive
+// calls will return the same object.
+//
+// We don't protect this under mutex_ as a user is not supposed to
+// call this before main() starts, from which point on the return
+// value will never change.
+UnitTest * UnitTest::GetInstance() {
+  // When compiled with MSVC 7.1 in optimized mode, destroying the
+  // UnitTest object upon exiting the program messes up the exit code,
+  // causing successful tests to appear failed.  We have to use a
+  // different implementation in this case to bypass the compiler bug.
+  // This implementation makes the compiler happy, at the cost of
+  // leaking the UnitTest object.
+
+  // CodeGear C++Builder insists on a public destructor for the
+  // default implementation.  Use this implementation to keep good OO
+  // design with private destructor.
+
+#if (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__)
+  static UnitTest* const instance = new UnitTest;
+  return instance;
+#else
+  static UnitTest instance;
+  return &instance;
+#endif  // (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__)
+}
+
+// Gets the number of successful test cases.
+int UnitTest::successful_test_case_count() const {
+  return impl()->successful_test_case_count();
+}
+
+// Gets the number of failed test cases.
+int UnitTest::failed_test_case_count() const {
+  return impl()->failed_test_case_count();
+}
+
+// Gets the number of all test cases.
+int UnitTest::total_test_case_count() const {
+  return impl()->total_test_case_count();
+}
+
+// Gets the number of all test cases that contain at least one test
+// that should run.
+int UnitTest::test_case_to_run_count() const {
+  return impl()->test_case_to_run_count();
+}
+
+// Gets the number of successful tests.
+int UnitTest::successful_test_count() const {
+  return impl()->successful_test_count();
+}
+
+// Gets the number of failed tests.
+int UnitTest::failed_test_count() const { return impl()->failed_test_count(); }
+
+// Gets the number of disabled tests.
+int UnitTest::disabled_test_count() const {
+  return impl()->disabled_test_count();
+}
+
+// Gets the number of all tests.
+int UnitTest::total_test_count() const { return impl()->total_test_count(); }
+
+// Gets the number of tests that should run.
+int UnitTest::test_to_run_count() const { return impl()->test_to_run_count(); }
+
+// Gets the elapsed time, in milliseconds.
+internal::TimeInMillis UnitTest::elapsed_time() const {
+  return impl()->elapsed_time();
+}
+
+// Returns true iff the unit test passed (i.e. all test cases passed).
+bool UnitTest::Passed() const { return impl()->Passed(); }
+
+// Returns true iff the unit test failed (i.e. some test case failed
+// or something outside of all tests failed).
+bool UnitTest::Failed() const { return impl()->Failed(); }
+
+// Gets the i-th test case among all the test cases. i can range from 0 to
+// total_test_case_count() - 1. If i is not in that range, returns NULL.
+const TestCase* UnitTest::GetTestCase(int i) const {
+  return impl()->GetTestCase(i);
+}
+
+// Gets the i-th test case among all the test cases. i can range from 0 to
+// total_test_case_count() - 1. If i is not in that range, returns NULL.
+TestCase* UnitTest::GetMutableTestCase(int i) {
+  return impl()->GetMutableTestCase(i);
+}
+
+// Returns the list of event listeners that can be used to track events
+// inside Google Test.
+TestEventListeners& UnitTest::listeners() {
+  return *impl()->listeners();
+}
+
+// Registers and returns a global test environment.  When a test
+// program is run, all global test environments will be set-up in the
+// order they were registered.  After all tests in the program have
+// finished, all global test environments will be torn-down in the
+// *reverse* order they were registered.
+//
+// The UnitTest object takes ownership of the given environment.
+//
+// We don't protect this under mutex_, as we only support calling it
+// from the main thread.
+Environment* UnitTest::AddEnvironment(Environment* env) {
+  if (env == NULL) {
+    return NULL;
+  }
+
+  impl_->environments().push_back(env);
+  return env;
+}
+
+#if GTEST_HAS_EXCEPTIONS
+// A failed Google Test assertion will throw an exception of this type
+// when exceptions are enabled.  We derive it from std::runtime_error,
+// which is for errors presumably detectable only at run time.  Since
+// std::runtime_error inherits from std::exception, many testing
+// frameworks know how to extract and print the message inside it.
+class GoogleTestFailureException : public ::std::runtime_error {
+ public:
+  explicit GoogleTestFailureException(const TestPartResult& failure)
+      : ::std::runtime_error(PrintTestPartResultToString(failure).c_str()) {}
+};
+#endif
+
+// Adds a TestPartResult to the current TestResult object.  All Google Test
+// assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) eventually call
+// this to report their results.  The user code should use the
+// assertion macros instead of calling this directly.
+// L < mutex_
+void UnitTest::AddTestPartResult(TestPartResult::Type result_type,
+                                 const char* file_name,
+                                 int line_number,
+                                 const internal::String& message,
+                                 const internal::String& os_stack_trace) {
+  Message msg;
+  msg << message;
+
+  internal::MutexLock lock(&mutex_);
+  if (impl_->gtest_trace_stack().size() > 0) {
+    msg << "\n" << GTEST_NAME_ << " trace:";
+
+    for (int i = static_cast<int>(impl_->gtest_trace_stack().size());
+         i > 0; --i) {
+      const internal::TraceInfo& trace = impl_->gtest_trace_stack()[i - 1];
+      msg << "\n" << internal::FormatFileLocation(trace.file, trace.line)
+          << " " << trace.message;
+    }
+  }
+
+  if (os_stack_trace.c_str() != NULL && !os_stack_trace.empty()) {
+    msg << internal::kStackTraceMarker << os_stack_trace;
+  }
+
+  const TestPartResult result =
+    TestPartResult(result_type, file_name, line_number,
+                   msg.GetString().c_str());
+  impl_->GetTestPartResultReporterForCurrentThread()->
+      ReportTestPartResult(result);
+
+  if (result_type != TestPartResult::kSuccess) {
+    // gtest_break_on_failure takes precedence over
+    // gtest_throw_on_failure.  This allows a user to set the latter
+    // in the code (perhaps in order to use Google Test assertions
+    // with another testing framework) and specify the former on the
+    // command line for debugging.
+    if (GTEST_FLAG(break_on_failure)) {
+#if GTEST_OS_WINDOWS
+      // Using DebugBreak on Windows allows gtest to still break into a debugger
+      // when a failure happens and both the --gtest_break_on_failure and
+      // the --gtest_catch_exceptions flags are specified.
+      DebugBreak();
+#else
+      abort();
+#endif  // GTEST_OS_WINDOWS
+    } else if (GTEST_FLAG(throw_on_failure)) {
+#if GTEST_HAS_EXCEPTIONS
+      throw GoogleTestFailureException(result);
+#else
+      // We cannot call abort() as it generates a pop-up in debug mode
+      // that cannot be suppressed in VC 7.1 or below.
+      exit(1);
+#endif
+    }
+  }
+}
+
+// Creates and adds a property to the current TestResult. If a property matching
+// the supplied value already exists, updates its value instead.
+void UnitTest::RecordPropertyForCurrentTest(const char* key,
+                                            const char* value) {
+  const TestProperty test_property(key, value);
+  impl_->current_test_result()->RecordProperty(test_property);
+}
+
+// Runs all tests in this UnitTest object and prints the result.
+// Returns 0 if successful, or 1 otherwise.
+//
+// We don't protect this under mutex_, as we only support calling it
+// from the main thread.
+int UnitTest::Run() {
+#if GTEST_HAS_SEH
+  // Catch SEH-style exceptions.
+
+  const bool in_death_test_child_process =
+      internal::GTEST_FLAG(internal_run_death_test).length() > 0;
+
+  // Either the user wants Google Test to catch exceptions thrown by the
+  // tests or this is executing in the context of death test child
+  // process. In either case the user does not want to see pop-up dialogs
+  // about crashes - they are expected..
+  if (GTEST_FLAG(catch_exceptions) || in_death_test_child_process) {
+#if !GTEST_OS_WINDOWS_MOBILE
+    // SetErrorMode doesn't exist on CE.
+    SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT |
+                 SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX);
+#endif  // !GTEST_OS_WINDOWS_MOBILE
+
+#if (defined(_MSC_VER) || GTEST_OS_WINDOWS_MINGW) && !GTEST_OS_WINDOWS_MOBILE
+    // Death test children can be terminated with _abort().  On Windows,
+    // _abort() can show a dialog with a warning message.  This forces the
+    // abort message to go to stderr instead.
+    _set_error_mode(_OUT_TO_STDERR);
+#endif
+
+#if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE
+    // In the debug version, Visual Studio pops up a separate dialog
+    // offering a choice to debug the aborted program. We need to suppress
+    // this dialog or it will pop up for every EXPECT/ASSERT_DEATH statement
+    // executed. Google Test will notify the user of any unexpected
+    // failure via stderr.
+    //
+    // VC++ doesn't define _set_abort_behavior() prior to the version 8.0.
+    // Users of prior VC versions shall suffer the agony and pain of
+    // clicking through the countless debug dialogs.
+    // TODO(vladl@google.com): find a way to suppress the abort dialog() in the
+    // debug mode when compiled with VC 7.1 or lower.
+    if (!GTEST_FLAG(break_on_failure))
+      _set_abort_behavior(
+          0x0,                                    // Clear the following flags:
+          _WRITE_ABORT_MSG | _CALL_REPORTFAULT);  // pop-up window, core dump.
+#endif
+  }
+
+  __try {
+    return impl_->RunAllTests();
+  } __except(internal::UnitTestOptions::GTestShouldProcessSEH(
+      GetExceptionCode())) {
+    printf("Exception thrown with code 0x%x.\nFAIL\n", GetExceptionCode());
+    fflush(stdout);
+    return 1;
+  }
+
+#else  // We are on a compiler or platform that doesn't support SEH.
+
+  return impl_->RunAllTests();
+#endif  // GTEST_HAS_SEH
+}
+
+// Returns the working directory when the first TEST() or TEST_F() was
+// executed.
+const char* UnitTest::original_working_dir() const {
+  return impl_->original_working_dir_.c_str();
+}
+
+// Returns the TestCase object for the test that's currently running,
+// or NULL if no test is running.
+// L < mutex_
+const TestCase* UnitTest::current_test_case() const {
+  internal::MutexLock lock(&mutex_);
+  return impl_->current_test_case();
+}
+
+// Returns the TestInfo object for the test that's currently running,
+// or NULL if no test is running.
+// L < mutex_
+const TestInfo* UnitTest::current_test_info() const {
+  internal::MutexLock lock(&mutex_);
+  return impl_->current_test_info();
+}
+
+// Returns the random seed used at the start of the current test run.
+int UnitTest::random_seed() const { return impl_->random_seed(); }
+
+#if GTEST_HAS_PARAM_TEST
+// Returns ParameterizedTestCaseRegistry object used to keep track of
+// value-parameterized tests and instantiate and register them.
+// L < mutex_
+internal::ParameterizedTestCaseRegistry&
+    UnitTest::parameterized_test_registry() {
+  return impl_->parameterized_test_registry();
+}
+#endif  // GTEST_HAS_PARAM_TEST
+
+// Creates an empty UnitTest.
+UnitTest::UnitTest() {
+  impl_ = new internal::UnitTestImpl(this);
+}
+
+// Destructor of UnitTest.
+UnitTest::~UnitTest() {
+  delete impl_;
+}
+
+// Pushes a trace defined by SCOPED_TRACE() on to the per-thread
+// Google Test trace stack.
+// L < mutex_
+void UnitTest::PushGTestTrace(const internal::TraceInfo& trace) {
+  internal::MutexLock lock(&mutex_);
+  impl_->gtest_trace_stack().push_back(trace);
+}
+
+// Pops a trace from the per-thread Google Test trace stack.
+// L < mutex_
+void UnitTest::PopGTestTrace() {
+  internal::MutexLock lock(&mutex_);
+  impl_->gtest_trace_stack().pop_back();
+}
+
+namespace internal {
+
+UnitTestImpl::UnitTestImpl(UnitTest* parent)
+    : parent_(parent),
+#ifdef _MSC_VER
+#pragma warning(push)                    // Saves the current warning state.
+#pragma warning(disable:4355)            // Temporarily disables warning 4355
+                                         // (using this in initializer).
+      default_global_test_part_result_reporter_(this),
+      default_per_thread_test_part_result_reporter_(this),
+#pragma warning(pop)                     // Restores the warning state again.
+#else
+      default_global_test_part_result_reporter_(this),
+      default_per_thread_test_part_result_reporter_(this),
+#endif  // _MSC_VER
+      global_test_part_result_repoter_(
+          &default_global_test_part_result_reporter_),
+      per_thread_test_part_result_reporter_(
+          &default_per_thread_test_part_result_reporter_),
+#if GTEST_HAS_PARAM_TEST
+      parameterized_test_registry_(),
+      parameterized_tests_registered_(false),
+#endif  // GTEST_HAS_PARAM_TEST
+      last_death_test_case_(-1),
+      current_test_case_(NULL),
+      current_test_info_(NULL),
+      ad_hoc_test_result_(),
+      os_stack_trace_getter_(NULL),
+      post_flag_parse_init_performed_(false),
+      random_seed_(0),  // Will be overridden by the flag before first use.
+      random_(0),  // Will be reseeded before first use.
+#if GTEST_HAS_DEATH_TEST
+      elapsed_time_(0),
+      internal_run_death_test_flag_(NULL),
+      death_test_factory_(new DefaultDeathTestFactory) {
+#else
+      elapsed_time_(0) {
+#endif  // GTEST_HAS_DEATH_TEST
+  listeners()->SetDefaultResultPrinter(new PrettyUnitTestResultPrinter);
+}
+
+UnitTestImpl::~UnitTestImpl() {
+  // Deletes every TestCase.
+  ForEach(test_cases_, internal::Delete<TestCase>);
+
+  // Deletes every Environment.
+  ForEach(environments_, internal::Delete<Environment>);
+
+  delete os_stack_trace_getter_;
+}
+
+#if GTEST_HAS_DEATH_TEST
+// Disables event forwarding if the control is currently in a death test
+// subprocess. Must not be called before InitGoogleTest.
+void UnitTestImpl::SuppressTestEventsIfInSubprocess() {
+  if (internal_run_death_test_flag_.get() != NULL)
+    listeners()->SuppressEventForwarding();
+}
+#endif  // GTEST_HAS_DEATH_TEST
+
+// Initializes event listeners performing XML output as specified by
+// UnitTestOptions. Must not be called before InitGoogleTest.
+void UnitTestImpl::ConfigureXmlOutput() {
+  const String& output_format = UnitTestOptions::GetOutputFormat();
+  if (output_format == "xml") {
+    listeners()->SetDefaultXmlGenerator(new XmlUnitTestResultPrinter(
+        UnitTestOptions::GetAbsolutePathToOutputFile().c_str()));
+  } else if (output_format != "") {
+    printf("WARNING: unrecognized output format \"%s\" ignored.\n",
+           output_format.c_str());
+    fflush(stdout);
+  }
+}
+
+// Performs initialization dependent upon flag values obtained in
+// ParseGoogleTestFlagsOnly.  Is called from InitGoogleTest after the call to
+// ParseGoogleTestFlagsOnly.  In case a user neglects to call InitGoogleTest
+// this function is also called from RunAllTests.  Since this function can be
+// called more than once, it has to be idempotent.
+void UnitTestImpl::PostFlagParsingInit() {
+  // Ensures that this function does not execute more than once.
+  if (!post_flag_parse_init_performed_) {
+    post_flag_parse_init_performed_ = true;
+
+#if GTEST_HAS_DEATH_TEST
+    InitDeathTestSubprocessControlInfo();
+    SuppressTestEventsIfInSubprocess();
+#endif  // GTEST_HAS_DEATH_TEST
+
+    // Registers parameterized tests. This makes parameterized tests
+    // available to the UnitTest reflection API without running
+    // RUN_ALL_TESTS.
+    RegisterParameterizedTests();
+
+    // Configures listeners for XML output. This makes it possible for users
+    // to shut down the default XML output before invoking RUN_ALL_TESTS.
+    ConfigureXmlOutput();
+  }
+}
+
+// A predicate that checks the name of a TestCase against a known
+// value.
+//
+// This is used for implementation of the UnitTest class only.  We put
+// it in the anonymous namespace to prevent polluting the outer
+// namespace.
+//
+// TestCaseNameIs is copyable.
+class TestCaseNameIs {
+ public:
+  // Constructor.
+  explicit TestCaseNameIs(const String& name)
+      : name_(name) {}
+
+  // Returns true iff the name of test_case matches name_.
+  bool operator()(const TestCase* test_case) const {
+    return test_case != NULL && strcmp(test_case->name(), name_.c_str()) == 0;
+  }
+
+ private:
+  String name_;
+};
+
+// Finds and returns a TestCase with the given name.  If one doesn't
+// exist, creates one and returns it.  It's the CALLER'S
+// RESPONSIBILITY to ensure that this function is only called WHEN THE
+// TESTS ARE NOT SHUFFLED.
+//
+// Arguments:
+//
+//   test_case_name: name of the test case
+//   set_up_tc:      pointer to the function that sets up the test case
+//   tear_down_tc:   pointer to the function that tears down the test case
+TestCase* UnitTestImpl::GetTestCase(const char* test_case_name,
+                                    const char* comment,
+                                    Test::SetUpTestCaseFunc set_up_tc,
+                                    Test::TearDownTestCaseFunc tear_down_tc) {
+  // Can we find a TestCase with the given name?
+  const std::vector<TestCase*>::const_iterator test_case =
+      std::find_if(test_cases_.begin(), test_cases_.end(),
+                   TestCaseNameIs(test_case_name));
+
+  if (test_case != test_cases_.end())
+    return *test_case;
+
+  // No.  Let's create one.
+  TestCase* const new_test_case =
+      new TestCase(test_case_name, comment, set_up_tc, tear_down_tc);
+
+  // Is this a death test case?
+  if (internal::UnitTestOptions::MatchesFilter(String(test_case_name),
+                                               kDeathTestCaseFilter)) {
+    // Yes.  Inserts the test case after the last death test case
+    // defined so far.  This only works when the test cases haven't
+    // been shuffled.  Otherwise we may end up running a death test
+    // after a non-death test.
+    ++last_death_test_case_;
+    test_cases_.insert(test_cases_.begin() + last_death_test_case_,
+                       new_test_case);
+  } else {
+    // No.  Appends to the end of the list.
+    test_cases_.push_back(new_test_case);
+  }
+
+  test_case_indices_.push_back(static_cast<int>(test_case_indices_.size()));
+  return new_test_case;
+}
+
+// Helpers for setting up / tearing down the given environment.  They
+// are for use in the ForEach() function.
+static void SetUpEnvironment(Environment* env) { env->SetUp(); }
+static void TearDownEnvironment(Environment* env) { env->TearDown(); }
+
+// Runs all tests in this UnitTest object, prints the result, and
+// returns 0 if all tests are successful, or 1 otherwise.  If any
+// exception is thrown during a test on Windows, this test is
+// considered to be failed, but the rest of the tests will still be
+// run.  (We disable exceptions on Linux and Mac OS X, so the issue
+// doesn't apply there.)
+// When parameterized tests are enabled, it expands and registers
+// parameterized tests first in RegisterParameterizedTests().
+// All other functions called from RunAllTests() may safely assume that
+// parameterized tests are ready to be counted and run.
+int UnitTestImpl::RunAllTests() {
+  // Makes sure InitGoogleTest() was called.
+  if (!GTestIsInitialized()) {
+    printf("%s",
+           "\nThis test program did NOT call ::testing::InitGoogleTest "
+           "before calling RUN_ALL_TESTS().  Please fix it.\n");
+    return 1;
+  }
+
+  // Do not run any test if the --help flag was specified.
+  if (g_help_flag)
+    return 0;
+
+  // Repeats the call to the post-flag parsing initialization in case the
+  // user didn't call InitGoogleTest.
+  PostFlagParsingInit();
+
+  // Even if sharding is not on, test runners may want to use the
+  // GTEST_SHARD_STATUS_FILE to query whether the test supports the sharding
+  // protocol.
+  internal::WriteToShardStatusFileIfNeeded();
+
+  // True iff we are in a subprocess for running a thread-safe-style
+  // death test.
+  bool in_subprocess_for_death_test = false;
+
+#if GTEST_HAS_DEATH_TEST
+  in_subprocess_for_death_test = (internal_run_death_test_flag_.get() != NULL);
+#endif  // GTEST_HAS_DEATH_TEST
+
+  const bool should_shard = ShouldShard(kTestTotalShards, kTestShardIndex,
+                                        in_subprocess_for_death_test);
+
+  // Compares the full test names with the filter to decide which
+  // tests to run.
+  const bool has_tests_to_run = FilterTests(should_shard
+                                              ? HONOR_SHARDING_PROTOCOL
+                                              : IGNORE_SHARDING_PROTOCOL) > 0;
+
+  // Lists the tests and exits if the --gtest_list_tests flag was specified.
+  if (GTEST_FLAG(list_tests)) {
+    // This must be called *after* FilterTests() has been called.
+    ListTestsMatchingFilter();
+    return 0;
+  }
+
+  random_seed_ = GTEST_FLAG(shuffle) ?
+      GetRandomSeedFromFlag(GTEST_FLAG(random_seed)) : 0;
+
+  // True iff at least one test has failed.
+  bool failed = false;
+
+  TestEventListener* repeater = listeners()->repeater();
+
+  repeater->OnTestProgramStart(*parent_);
+
+  // How many times to repeat the tests?  We don't want to repeat them
+  // when we are inside the subprocess of a death test.
+  const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG(repeat);
+  // Repeats forever if the repeat count is negative.
+  const bool forever = repeat < 0;
+  for (int i = 0; forever || i != repeat; i++) {
+    ClearResult();
+
+    const TimeInMillis start = GetTimeInMillis();
+
+    // Shuffles test cases and tests if requested.
+    if (has_tests_to_run && GTEST_FLAG(shuffle)) {
+      random()->Reseed(random_seed_);
+      // This should be done before calling OnTestIterationStart(),
+      // such that a test event listener can see the actual test order
+      // in the event.
+      ShuffleTests();
+    }
+
+    // Tells the unit test event listeners that the tests are about to start.
+    repeater->OnTestIterationStart(*parent_, i);
+
+    // Runs each test case if there is at least one test to run.
+    if (has_tests_to_run) {
+      // Sets up all environments beforehand.
+      repeater->OnEnvironmentsSetUpStart(*parent_);
+      ForEach(environments_, SetUpEnvironment);
+      repeater->OnEnvironmentsSetUpEnd(*parent_);
+
+      // Runs the tests only if there was no fatal failure during global
+      // set-up.
+      if (!Test::HasFatalFailure()) {
+        for (int test_index = 0; test_index < total_test_case_count();
+             test_index++) {
+          GetMutableTestCase(test_index)->Run();
+        }
+      }
+
+      // Tears down all environments in reverse order afterwards.
+      repeater->OnEnvironmentsTearDownStart(*parent_);
+      std::for_each(environments_.rbegin(), environments_.rend(),
+                    TearDownEnvironment);
+      repeater->OnEnvironmentsTearDownEnd(*parent_);
+    }
+
+    elapsed_time_ = GetTimeInMillis() - start;
+
+    // Tells the unit test event listener that the tests have just finished.
+    repeater->OnTestIterationEnd(*parent_, i);
+
+    // Gets the result and clears it.
+    if (!Passed()) {
+      failed = true;
+    }
+
+    // Restores the original test order after the iteration.  This
+    // allows the user to quickly repro a failure that happens in the
+    // N-th iteration without repeating the first (N - 1) iterations.
+    // This is not enclosed in "if (GTEST_FLAG(shuffle)) { ... }", in
+    // case the user somehow changes the value of the flag somewhere
+    // (it's always safe to unshuffle the tests).
+    UnshuffleTests();
+
+    if (GTEST_FLAG(shuffle)) {
+      // Picks a new random seed for each iteration.
+      random_seed_ = GetNextRandomSeed(random_seed_);
+    }
+  }
+
+  repeater->OnTestProgramEnd(*parent_);
+
+  // Returns 0 if all tests passed, or 1 other wise.
+  return failed ? 1 : 0;
+}
+
+// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
+// if the variable is present. If a file already exists at this location, this
+// function will write over it. If the variable is present, but the file cannot
+// be created, prints an error and exits.
+void WriteToShardStatusFileIfNeeded() {
+  const char* const test_shard_file = posix::GetEnv(kTestShardStatusFile);
+  if (test_shard_file != NULL) {
+    FILE* const file = posix::FOpen(test_shard_file, "w");
+    if (file == NULL) {
+      ColoredPrintf(COLOR_RED,
+                    "Could not write to the test shard status file \"%s\" "
+                    "specified by the %s environment variable.\n",
+                    test_shard_file, kTestShardStatusFile);
+      fflush(stdout);
+      exit(EXIT_FAILURE);
+    }
+    fclose(file);
+  }
+}
+
+// Checks whether sharding is enabled by examining the relevant
+// environment variable values. If the variables are present,
+// but inconsistent (i.e., shard_index >= total_shards), prints
+// an error and exits. If in_subprocess_for_death_test, sharding is
+// disabled because it must only be applied to the original test
+// process. Otherwise, we could filter out death tests we intended to execute.
+bool ShouldShard(const char* total_shards_env,
+                 const char* shard_index_env,
+                 bool in_subprocess_for_death_test) {
+  if (in_subprocess_for_death_test) {
+    return false;
+  }
+
+  const Int32 total_shards = Int32FromEnvOrDie(total_shards_env, -1);
+  const Int32 shard_index = Int32FromEnvOrDie(shard_index_env, -1);
+
+  if (total_shards == -1 && shard_index == -1) {
+    return false;
+  } else if (total_shards == -1 && shard_index != -1) {
+    const Message msg = Message()
+      << "Invalid environment variables: you have "
+      << kTestShardIndex << " = " << shard_index
+      << ", but have left " << kTestTotalShards << " unset.\n";
+    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
+    fflush(stdout);
+    exit(EXIT_FAILURE);
+  } else if (total_shards != -1 && shard_index == -1) {
+    const Message msg = Message()
+      << "Invalid environment variables: you have "
+      << kTestTotalShards << " = " << total_shards
+      << ", but have left " << kTestShardIndex << " unset.\n";
+    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
+    fflush(stdout);
+    exit(EXIT_FAILURE);
+  } else if (shard_index < 0 || shard_index >= total_shards) {
+    const Message msg = Message()
+      << "Invalid environment variables: we require 0 <= "
+      << kTestShardIndex << " < " << kTestTotalShards
+      << ", but you have " << kTestShardIndex << "=" << shard_index
+      << ", " << kTestTotalShards << "=" << total_shards << ".\n";
+    ColoredPrintf(COLOR_RED, msg.GetString().c_str());
+    fflush(stdout);
+    exit(EXIT_FAILURE);
+  }
+
+  return total_shards > 1;
+}
+
+// Parses the environment variable var as an Int32. If it is unset,
+// returns default_val. If it is not an Int32, prints an error
+// and aborts.
+Int32 Int32FromEnvOrDie(const char* const var, Int32 default_val) {
+  const char* str_val = posix::GetEnv(var);
+  if (str_val == NULL) {
+    return default_val;
+  }
+
+  Int32 result;
+  if (!ParseInt32(Message() << "The value of environment variable " << var,
+                  str_val, &result)) {
+    exit(EXIT_FAILURE);
+  }
+  return result;
+}
+
+// Given the total number of shards, the shard index, and the test id,
+// returns true iff the test should be run on this shard. The test id is
+// some arbitrary but unique non-negative integer assigned to each test
+// method. Assumes that 0 <= shard_index < total_shards.
+bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) {
+  return (test_id % total_shards) == shard_index;
+}
+
+// Compares the name of each test with the user-specified filter to
+// decide whether the test should be run, then records the result in
+// each TestCase and TestInfo object.
+// If shard_tests == true, further filters tests based on sharding
+// variables in the environment - see
+// http://code.google.com/p/googletest/wiki/GoogleTestAdvancedGuide.
+// Returns the number of tests that should run.
+int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) {
+  const Int32 total_shards = shard_tests == HONOR_SHARDING_PROTOCOL ?
+      Int32FromEnvOrDie(kTestTotalShards, -1) : -1;
+  const Int32 shard_index = shard_tests == HONOR_SHARDING_PROTOCOL ?
+      Int32FromEnvOrDie(kTestShardIndex, -1) : -1;
+
+  // num_runnable_tests are the number of tests that will
+  // run across all shards (i.e., match filter and are not disabled).
+  // num_selected_tests are the number of tests to be run on
+  // this shard.
+  int num_runnable_tests = 0;
+  int num_selected_tests = 0;
+  for (size_t i = 0; i < test_cases_.size(); i++) {
+    TestCase* const test_case = test_cases_[i];
+    const String &test_case_name = test_case->name();
+    test_case->set_should_run(false);
+
+    for (size_t j = 0; j < test_case->test_info_list().size(); j++) {
+      TestInfo* const test_info = test_case->test_info_list()[j];
+      const String test_name(test_info->name());
+      // A test is disabled if test case name or test name matches
+      // kDisableTestFilter.
+      const bool is_disabled =
+          internal::UnitTestOptions::MatchesFilter(test_case_name,
+                                                   kDisableTestFilter) ||
+          internal::UnitTestOptions::MatchesFilter(test_name,
+                                                   kDisableTestFilter);
+      test_info->impl()->set_is_disabled(is_disabled);
+
+      const bool matches_filter =
+          internal::UnitTestOptions::FilterMatchesTest(test_case_name,
+                                                       test_name);
+      test_info->impl()->set_matches_filter(matches_filter);
+
+      const bool is_runnable =
+          (GTEST_FLAG(also_run_disabled_tests) || !is_disabled) &&
+          matches_filter;
+
+      const bool is_selected = is_runnable &&
+          (shard_tests == IGNORE_SHARDING_PROTOCOL ||
+           ShouldRunTestOnShard(total_shards, shard_index,
+                                num_runnable_tests));
+
+      num_runnable_tests += is_runnable;
+      num_selected_tests += is_selected;
+
+      test_info->impl()->set_should_run(is_selected);
+      test_case->set_should_run(test_case->should_run() || is_selected);
+    }
+  }
+  return num_selected_tests;
+}
+
+// Prints the names of the tests matching the user-specified filter flag.
+void UnitTestImpl::ListTestsMatchingFilter() {
+  for (size_t i = 0; i < test_cases_.size(); i++) {
+    const TestCase* const test_case = test_cases_[i];
+    bool printed_test_case_name = false;
+
+    for (size_t j = 0; j < test_case->test_info_list().size(); j++) {
+      const TestInfo* const test_info =
+          test_case->test_info_list()[j];
+      if (test_info->matches_filter()) {
+        if (!printed_test_case_name) {
+          printed_test_case_name = true;
+          printf("%s.\n", test_case->name());
+        }
+        printf("  %s\n", test_info->name());
+      }
+    }
+  }
+  fflush(stdout);
+}
+
+// Sets the OS stack trace getter.
+//
+// Does nothing if the input and the current OS stack trace getter are
+// the same; otherwise, deletes the old getter and makes the input the
+// current getter.
+void UnitTestImpl::set_os_stack_trace_getter(
+    OsStackTraceGetterInterface* getter) {
+  if (os_stack_trace_getter_ != getter) {
+    delete os_stack_trace_getter_;
+    os_stack_trace_getter_ = getter;
+  }
+}
+
+// Returns the current OS stack trace getter if it is not NULL;
+// otherwise, creates an OsStackTraceGetter, makes it the current
+// getter, and returns it.
+OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() {
+  if (os_stack_trace_getter_ == NULL) {
+    os_stack_trace_getter_ = new OsStackTraceGetter;
+  }
+
+  return os_stack_trace_getter_;
+}
+
+// Returns the TestResult for the test that's currently running, or
+// the TestResult for the ad hoc test if no test is running.
+TestResult* UnitTestImpl::current_test_result() {
+  return current_test_info_ ?
+    current_test_info_->impl()->result() : &ad_hoc_test_result_;
+}
+
+// Shuffles all test cases, and the tests within each test case,
+// making sure that death tests are still run first.
+void UnitTestImpl::ShuffleTests() {
+  // Shuffles the death test cases.
+  ShuffleRange(random(), 0, last_death_test_case_ + 1, &test_case_indices_);
+
+  // Shuffles the non-death test cases.
+  ShuffleRange(random(), last_death_test_case_ + 1,
+               static_cast<int>(test_cases_.size()), &test_case_indices_);
+
+  // Shuffles the tests inside each test case.
+  for (size_t i = 0; i < test_cases_.size(); i++) {
+    test_cases_[i]->ShuffleTests(random());
+  }
+}
+
+// Restores the test cases and tests to their order before the first shuffle.
+void UnitTestImpl::UnshuffleTests() {
+  for (size_t i = 0; i < test_cases_.size(); i++) {
+    // Unshuffles the tests in each test case.
+    test_cases_[i]->UnshuffleTests();
+    // Resets the index of each test case.
+    test_case_indices_[i] = static_cast<int>(i);
+  }
+}
+
+// TestInfoImpl constructor. The new instance assumes ownership of the test
+// factory object.
+TestInfoImpl::TestInfoImpl(TestInfo* parent,
+                           const char* a_test_case_name,
+                           const char* a_name,
+                           const char* a_test_case_comment,
+                           const char* a_comment,
+                           TypeId a_fixture_class_id,
+                           internal::TestFactoryBase* factory) :
+    parent_(parent),
+    test_case_name_(String(a_test_case_name)),
+    name_(String(a_name)),
+    test_case_comment_(String(a_test_case_comment)),
+    comment_(String(a_comment)),
+    fixture_class_id_(a_fixture_class_id),
+    should_run_(false),
+    is_disabled_(false),
+    matches_filter_(false),
+    factory_(factory) {
+}
+
+// TestInfoImpl destructor.
+TestInfoImpl::~TestInfoImpl() {
+  delete factory_;
+}
+
+// Returns the current OS stack trace as a String.
+//
+// The maximum number of stack frames to be included is specified by
+// the gtest_stack_trace_depth flag.  The skip_count parameter
+// specifies the number of top frames to be skipped, which doesn't
+// count against the number of frames to be included.
+//
+// For example, if Foo() calls Bar(), which in turn calls
+// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
+// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
+String GetCurrentOsStackTraceExceptTop(UnitTest* /*unit_test*/,
+                                       int skip_count) {
+  // We pass skip_count + 1 to skip this wrapper function in addition
+  // to what the user really wants to skip.
+  return GetUnitTestImpl()->CurrentOsStackTraceExceptTop(skip_count + 1);
+}
+
+// Used by the GTEST_HIDE_UNREACHABLE_CODE_ macro to suppress unreachable
+// code warnings.
+namespace {
+class ClassUniqueToAlwaysTrue {};
+}
+
+bool IsTrue(bool condition) { return condition; }
+
+bool AlwaysTrue() {
+#if GTEST_HAS_EXCEPTIONS
+  // This condition is always false so AlwaysTrue() never actually throws,
+  // but it makes the compiler think that it may throw.
+  if (IsTrue(false))
+    throw ClassUniqueToAlwaysTrue();
+#endif  // GTEST_HAS_EXCEPTIONS
+  return true;
+}
+
+// If *pstr starts with the given prefix, modifies *pstr to be right
+// past the prefix and returns true; otherwise leaves *pstr unchanged
+// and returns false.  None of pstr, *pstr, and prefix can be NULL.
+bool SkipPrefix(const char* prefix, const char** pstr) {
+  const size_t prefix_len = strlen(prefix);
+  if (strncmp(*pstr, prefix, prefix_len) == 0) {
+    *pstr += prefix_len;
+    return true;
+  }
+  return false;
+}
+
+// Parses a string as a command line flag.  The string should have
+// the format "--flag=value".  When def_optional is true, the "=value"
+// part can be omitted.
+//
+// Returns the value of the flag, or NULL if the parsing failed.
+const char* ParseFlagValue(const char* str,
+                           const char* flag,
+                           bool def_optional) {
+  // str and flag must not be NULL.
+  if (str == NULL || flag == NULL) return NULL;
+
+  // The flag must start with "--" followed by GTEST_FLAG_PREFIX_.
+  const String flag_str = String::Format("--%s%s", GTEST_FLAG_PREFIX_, flag);
+  const size_t flag_len = flag_str.length();
+  if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL;
+
+  // Skips the flag name.
+  const char* flag_end = str + flag_len;
+
+  // When def_optional is true, it's OK to not have a "=value" part.
+  if (def_optional && (flag_end[0] == '\0')) {
+    return flag_end;
+  }
+
+  // If def_optional is true and there are more characters after the
+  // flag name, or if def_optional is false, there must be a '=' after
+  // the flag name.
+  if (flag_end[0] != '=') return NULL;
+
+  // Returns the string after "=".
+  return flag_end + 1;
+}
+
+// Parses a string for a bool flag, in the form of either
+// "--flag=value" or "--flag".
+//
+// In the former case, the value is taken as true as long as it does
+// not start with '0', 'f', or 'F'.
+//
+// In the latter case, the value is taken as true.
+//
+// On success, stores the value of the flag in *value, and returns
+// true.  On failure, returns false without changing *value.
+bool ParseBoolFlag(const char* str, const char* flag, bool* value) {
+  // Gets the value of the flag as a string.
+  const char* const value_str = ParseFlagValue(str, flag, true);
+
+  // Aborts if the parsing failed.
+  if (value_str == NULL) return false;
+
+  // Converts the string value to a bool.
+  *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F');
+  return true;
+}
+
+// Parses a string for an Int32 flag, in the form of
+// "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true.  On failure, returns false without changing *value.
+bool ParseInt32Flag(const char* str, const char* flag, Int32* value) {
+  // Gets the value of the flag as a string.
+  const char* const value_str = ParseFlagValue(str, flag, false);
+
+  // Aborts if the parsing failed.
+  if (value_str == NULL) return false;
+
+  // Sets *value to the value of the flag.
+  return ParseInt32(Message() << "The value of flag --" << flag,
+                    value_str, value);
+}
+
+// Parses a string for a string flag, in the form of
+// "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true.  On failure, returns false without changing *value.
+bool ParseStringFlag(const char* str, const char* flag, String* value) {
+  // Gets the value of the flag as a string.
+  const char* const value_str = ParseFlagValue(str, flag, false);
+
+  // Aborts if the parsing failed.
+  if (value_str == NULL) return false;
+
+  // Sets *value to the value of the flag.
+  *value = value_str;
+  return true;
+}
+
+// Determines whether a string has a prefix that Google Test uses for its
+// flags, i.e., starts with GTEST_FLAG_PREFIX_ or GTEST_FLAG_PREFIX_DASH_.
+// If Google Test detects that a command line flag has its prefix but is not
+// recognized, it will print its help message. Flags starting with
+// GTEST_INTERNAL_PREFIX_ followed by "internal_" are considered Google Test
+// internal flags and do not trigger the help message.
+static bool HasGoogleTestFlagPrefix(const char* str) {
+  return (SkipPrefix("--", &str) ||
+          SkipPrefix("-", &str) ||
+          SkipPrefix("/", &str)) &&
+         !SkipPrefix(GTEST_FLAG_PREFIX_ "internal_", &str) &&
+         (SkipPrefix(GTEST_FLAG_PREFIX_, &str) ||
+          SkipPrefix(GTEST_FLAG_PREFIX_DASH_, &str));
+}
+
+// Prints a string containing code-encoded text.  The following escape
+// sequences can be used in the string to control the text color:
+//
+//   @@    prints a single '@' character.
+//   @R    changes the color to red.
+//   @G    changes the color to green.
+//   @Y    changes the color to yellow.
+//   @D    changes to the default terminal text color.
+//
+// TODO(wan@google.com): Write tests for this once we add stdout
+// capturing to Google Test.
+static void PrintColorEncoded(const char* str) {
+  GTestColor color = COLOR_DEFAULT;  // The current color.
+
+  // Conceptually, we split the string into segments divided by escape
+  // sequences.  Then we print one segment at a time.  At the end of
+  // each iteration, the str pointer advances to the beginning of the
+  // next segment.
+  for (;;) {
+    const char* p = strchr(str, '@');
+    if (p == NULL) {
+      ColoredPrintf(color, "%s", str);
+      return;
+    }
+
+    ColoredPrintf(color, "%s", String(str, p - str).c_str());
+
+    const char ch = p[1];
+    str = p + 2;
+    if (ch == '@') {
+      ColoredPrintf(color, "@");
+    } else if (ch == 'D') {
+      color = COLOR_DEFAULT;
+    } else if (ch == 'R') {
+      color = COLOR_RED;
+    } else if (ch == 'G') {
+      color = COLOR_GREEN;
+    } else if (ch == 'Y') {
+      color = COLOR_YELLOW;
+    } else {
+      --str;
+    }
+  }
+}
+
+static const char kColorEncodedHelpMessage[] =
+"This program contains tests written using " GTEST_NAME_ ". You can use the\n"
+"following command line flags to control its behavior:\n"
+"\n"
+"Test Selection:\n"
+"  @G--" GTEST_FLAG_PREFIX_ "list_tests@D\n"
+"      List the names of all tests instead of running them. The name of\n"
+"      TEST(Foo, Bar) is \"Foo.Bar\".\n"
+"  @G--" GTEST_FLAG_PREFIX_ "filter=@YPOSTIVE_PATTERNS"
+    "[@G-@YNEGATIVE_PATTERNS]@D\n"
+"      Run only the tests whose name matches one of the positive patterns but\n"
+"      none of the negative patterns. '?' matches any single character; '*'\n"
+"      matches any substring; ':' separates two patterns.\n"
+"  @G--" GTEST_FLAG_PREFIX_ "also_run_disabled_tests@D\n"
+"      Run all disabled tests too.\n"
+"\n"
+"Test Execution:\n"
+"  @G--" GTEST_FLAG_PREFIX_ "repeat=@Y[COUNT]@D\n"
+"      Run the tests repeatedly; use a negative count to repeat forever.\n"
+"  @G--" GTEST_FLAG_PREFIX_ "shuffle@D\n"
+"      Randomize tests' orders on every iteration.\n"
+"  @G--" GTEST_FLAG_PREFIX_ "random_seed=@Y[NUMBER]@D\n"
+"      Random number seed to use for shuffling test orders (between 1 and\n"
+"      99999, or 0 to use a seed based on the current time).\n"
+"\n"
+"Test Output:\n"
+"  @G--" GTEST_FLAG_PREFIX_ "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n"
+"      Enable/disable colored output. The default is @Gauto@D.\n"
+"  -@G-" GTEST_FLAG_PREFIX_ "print_time=0@D\n"
+"      Don't print the elapsed time of each test.\n"
+"  @G--" GTEST_FLAG_PREFIX_ "output=xml@Y[@G:@YDIRECTORY_PATH@G"
+    GTEST_PATH_SEP_ "@Y|@G:@YFILE_PATH]@D\n"
+"      Generate an XML report in the given directory or with the given file\n"
+"      name. @YFILE_PATH@D defaults to @Gtest_details.xml@D.\n"
+"\n"
+"Assertion Behavior:\n"
+#if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
+"  @G--" GTEST_FLAG_PREFIX_ "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n"
+"      Set the default death test style.\n"
+#endif  // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
+"  @G--" GTEST_FLAG_PREFIX_ "break_on_failure@D\n"
+"      Turn assertion failures into debugger break-points.\n"
+"  @G--" GTEST_FLAG_PREFIX_ "throw_on_failure@D\n"
+"      Turn assertion failures into C++ exceptions.\n"
+#if GTEST_OS_WINDOWS
+"  @G--" GTEST_FLAG_PREFIX_ "catch_exceptions@D\n"
+"      Suppress pop-ups caused by exceptions.\n"
+#endif  // GTEST_OS_WINDOWS
+"\n"
+"Except for @G--" GTEST_FLAG_PREFIX_ "list_tests@D, you can alternatively set "
+    "the corresponding\n"
+"environment variable of a flag (all letters in upper-case). For example, to\n"
+"disable colored text output, you can either specify @G--" GTEST_FLAG_PREFIX_
+    "color=no@D or set\n"
+"the @G" GTEST_FLAG_PREFIX_UPPER_ "COLOR@D environment variable to @Gno@D.\n"
+"\n"
+"For more information, please read the " GTEST_NAME_ " documentation at\n"
+"@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_ "\n"
+"(not one in your own code or tests), please report it to\n"
+"@G<" GTEST_DEV_EMAIL_ ">@D.\n";
+
+// Parses the command line for Google Test flags, without initializing
+// other parts of Google Test.  The type parameter CharType can be
+// instantiated to either char or wchar_t.
+template <typename CharType>
+void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) {
+  for (int i = 1; i < *argc; i++) {
+    const String arg_string = StreamableToString(argv[i]);
+    const char* const arg = arg_string.c_str();
+
+    using internal::ParseBoolFlag;
+    using internal::ParseInt32Flag;
+    using internal::ParseStringFlag;
+
+    // Do we see a Google Test flag?
+    if (ParseBoolFlag(arg, kAlsoRunDisabledTestsFlag,
+                      &GTEST_FLAG(also_run_disabled_tests)) ||
+        ParseBoolFlag(arg, kBreakOnFailureFlag,
+                      &GTEST_FLAG(break_on_failure)) ||
+        ParseBoolFlag(arg, kCatchExceptionsFlag,
+                      &GTEST_FLAG(catch_exceptions)) ||
+        ParseStringFlag(arg, kColorFlag, &GTEST_FLAG(color)) ||
+        ParseStringFlag(arg, kDeathTestStyleFlag,
+                        &GTEST_FLAG(death_test_style)) ||
+        ParseBoolFlag(arg, kDeathTestUseFork,
+                      &GTEST_FLAG(death_test_use_fork)) ||
+        ParseStringFlag(arg, kFilterFlag, &GTEST_FLAG(filter)) ||
+        ParseStringFlag(arg, kInternalRunDeathTestFlag,
+                        &GTEST_FLAG(internal_run_death_test)) ||
+        ParseBoolFlag(arg, kListTestsFlag, &GTEST_FLAG(list_tests)) ||
+        ParseStringFlag(arg, kOutputFlag, &GTEST_FLAG(output)) ||
+        ParseBoolFlag(arg, kPrintTimeFlag, &GTEST_FLAG(print_time)) ||
+        ParseInt32Flag(arg, kRandomSeedFlag, &GTEST_FLAG(random_seed)) ||
+        ParseInt32Flag(arg, kRepeatFlag, &GTEST_FLAG(repeat)) ||
+        ParseBoolFlag(arg, kShuffleFlag, &GTEST_FLAG(shuffle)) ||
+        ParseInt32Flag(arg, kStackTraceDepthFlag,
+                       &GTEST_FLAG(stack_trace_depth)) ||
+        ParseBoolFlag(arg, kThrowOnFailureFlag, &GTEST_FLAG(throw_on_failure))
+        ) {
+      // Yes.  Shift the remainder of the argv list left by one.  Note
+      // that argv has (*argc + 1) elements, the last one always being
+      // NULL.  The following loop moves the trailing NULL element as
+      // well.
+      for (int j = i; j != *argc; j++) {
+        argv[j] = argv[j + 1];
+      }
+
+      // Decrements the argument count.
+      (*argc)--;
+
+      // We also need to decrement the iterator as we just removed
+      // an element.
+      i--;
+    } else if (arg_string == "--help" || arg_string == "-h" ||
+               arg_string == "-?" || arg_string == "/?" ||
+               HasGoogleTestFlagPrefix(arg)) {
+      // Both help flag and unrecognized Google Test flags (excluding
+      // internal ones) trigger help display.
+      g_help_flag = true;
+    }
+  }
+
+  if (g_help_flag) {
+    // We print the help here instead of in RUN_ALL_TESTS(), as the
+    // latter may not be called at all if the user is using Google
+    // Test with another testing framework.
+    PrintColorEncoded(kColorEncodedHelpMessage);
+  }
+}
+
+// Parses the command line for Google Test flags, without initializing
+// other parts of Google Test.
+void ParseGoogleTestFlagsOnly(int* argc, char** argv) {
+  ParseGoogleTestFlagsOnlyImpl(argc, argv);
+}
+void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) {
+  ParseGoogleTestFlagsOnlyImpl(argc, argv);
+}
+
+// The internal implementation of InitGoogleTest().
+//
+// The type parameter CharType can be instantiated to either char or
+// wchar_t.
+template <typename CharType>
+void InitGoogleTestImpl(int* argc, CharType** argv) {
+  g_init_gtest_count++;
+
+  // We don't want to run the initialization code twice.
+  if (g_init_gtest_count != 1) return;
+
+  if (*argc <= 0) return;
+
+  internal::g_executable_path = internal::StreamableToString(argv[0]);
+
+#if GTEST_HAS_DEATH_TEST
+  g_argvs.clear();
+  for (int i = 0; i != *argc; i++) {
+    g_argvs.push_back(StreamableToString(argv[i]));
+  }
+#endif  // GTEST_HAS_DEATH_TEST
+
+  ParseGoogleTestFlagsOnly(argc, argv);
+  GetUnitTestImpl()->PostFlagParsingInit();
+}
+
+}  // namespace internal
+
+// Initializes Google Test.  This must be called before calling
+// RUN_ALL_TESTS().  In particular, it parses a command line for the
+// flags that Google Test recognizes.  Whenever a Google Test flag is
+// seen, it is removed from argv, and *argc is decremented.
+//
+// No value is returned.  Instead, the Google Test flag variables are
+// updated.
+//
+// Calling the function for the second time has no user-visible effect.
+void InitGoogleTest(int* argc, char** argv) {
+  internal::InitGoogleTestImpl(argc, argv);
+}
+
+// This overloaded version can be used in Windows programs compiled in
+// UNICODE mode.
+void InitGoogleTest(int* argc, wchar_t** argv) {
+  internal::InitGoogleTestImpl(argc, argv);
+}
+
+}  // namespace testing
diff --git a/final/utils/unittest/googletest/include/gtest/gtest-death-test.h b/final/utils/unittest/googletest/include/gtest/gtest-death-test.h
new file mode 100644
index 00000000000..121dc1fb5ac
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/gtest-death-test.h
@@ -0,0 +1,283 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file defines the public API for death tests.  It is
+// #included by gtest.h so a user doesn't need to include this
+// directly.
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+#define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
+
+#include <gtest/internal/gtest-death-test-internal.h>
+
+namespace testing {
+
+// This flag controls the style of death tests.  Valid values are "threadsafe",
+// meaning that the death test child process will re-execute the test binary
+// from the start, running only a single death test, or "fast",
+// meaning that the child process will execute the test logic immediately
+// after forking.
+GTEST_DECLARE_string_(death_test_style);
+
+#if GTEST_HAS_DEATH_TEST
+
+// The following macros are useful for writing death tests.
+
+// Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is
+// executed:
+//
+//   1. It generates a warning if there is more than one active
+//   thread.  This is because it's safe to fork() or clone() only
+//   when there is a single thread.
+//
+//   2. The parent process clone()s a sub-process and runs the death
+//   test in it; the sub-process exits with code 0 at the end of the
+//   death test, if it hasn't exited already.
+//
+//   3. The parent process waits for the sub-process to terminate.
+//
+//   4. The parent process checks the exit code and error message of
+//   the sub-process.
+//
+// Examples:
+//
+//   ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number");
+//   for (int i = 0; i < 5; i++) {
+//     EXPECT_DEATH(server.ProcessRequest(i),
+//                  "Invalid request .* in ProcessRequest()")
+//         << "Failed to die on request " << i);
+//   }
+//
+//   ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting");
+//
+//   bool KilledBySIGHUP(int exit_code) {
+//     return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP;
+//   }
+//
+//   ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!");
+//
+// On the regular expressions used in death tests:
+//
+//   On POSIX-compliant systems (*nix), we use the <regex.h> library,
+//   which uses the POSIX extended regex syntax.
+//
+//   On other platforms (e.g. Windows), we only support a simple regex
+//   syntax implemented as part of Google Test.  This limited
+//   implementation should be enough most of the time when writing
+//   death tests; though it lacks many features you can find in PCRE
+//   or POSIX extended regex syntax.  For example, we don't support
+//   union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and
+//   repetition count ("x{5,7}"), among others.
+//
+//   Below is the syntax that we do support.  We chose it to be a
+//   subset of both PCRE and POSIX extended regex, so it's easy to
+//   learn wherever you come from.  In the following: 'A' denotes a
+//   literal character, period (.), or a single \\ escape sequence;
+//   'x' and 'y' denote regular expressions; 'm' and 'n' are for
+//   natural numbers.
+//
+//     c     matches any literal character c
+//     \\d   matches any decimal digit
+//     \\D   matches any character that's not a decimal digit
+//     \\f   matches \f
+//     \\n   matches \n
+//     \\r   matches \r
+//     \\s   matches any ASCII whitespace, including \n
+//     \\S   matches any character that's not a whitespace
+//     \\t   matches \t
+//     \\v   matches \v
+//     \\w   matches any letter, _, or decimal digit
+//     \\W   matches any character that \\w doesn't match
+//     \\c   matches any literal character c, which must be a punctuation
+//     .     matches any single character except \n
+//     A?    matches 0 or 1 occurrences of A
+//     A*    matches 0 or many occurrences of A
+//     A+    matches 1 or many occurrences of A
+//     ^     matches the beginning of a string (not that of each line)
+//     $     matches the end of a string (not that of each line)
+//     xy    matches x followed by y
+//
+//   If you accidentally use PCRE or POSIX extended regex features
+//   not implemented by us, you will get a run-time failure.  In that
+//   case, please try to rewrite your regular expression within the
+//   above syntax.
+//
+//   This implementation is *not* meant to be as highly tuned or robust
+//   as a compiled regex library, but should perform well enough for a
+//   death test, which already incurs significant overhead by launching
+//   a child process.
+//
+// Known caveats:
+//
+//   A "threadsafe" style death test obtains the path to the test
+//   program from argv[0] and re-executes it in the sub-process.  For
+//   simplicity, the current implementation doesn't search the PATH
+//   when launching the sub-process.  This means that the user must
+//   invoke the test program via a path that contains at least one
+//   path separator (e.g. path/to/foo_test and
+//   /absolute/path/to/bar_test are fine, but foo_test is not).  This
+//   is rarely a problem as people usually don't put the test binary
+//   directory in PATH.
+//
+// TODO(wan@google.com): make thread-safe death tests search the PATH.
+
+// Asserts that a given statement causes the program to exit, with an
+// integer exit status that satisfies predicate, and emitting error output
+// that matches regex.
+#define ASSERT_EXIT(statement, predicate, regex) \
+  GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_FATAL_FAILURE_)
+
+// Like ASSERT_EXIT, but continues on to successive tests in the
+// test case, if any:
+#define EXPECT_EXIT(statement, predicate, regex) \
+  GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_NONFATAL_FAILURE_)
+
+// Asserts that a given statement causes the program to exit, either by
+// explicitly exiting with a nonzero exit code or being killed by a
+// signal, and emitting error output that matches regex.
+#define ASSERT_DEATH(statement, regex) \
+  ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
+
+// Like ASSERT_DEATH, but continues on to successive tests in the
+// test case, if any:
+#define EXPECT_DEATH(statement, regex) \
+  EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex)
+
+// Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*:
+
+// Tests that an exit code describes a normal exit with a given exit code.
+class GTEST_API_ ExitedWithCode {
+ public:
+  explicit ExitedWithCode(int exit_code);
+  bool operator()(int exit_status) const;
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ExitedWithCode& other);
+
+  const int exit_code_;
+};
+
+#if !GTEST_OS_WINDOWS
+// Tests that an exit code describes an exit due to termination by a
+// given signal.
+class GTEST_API_ KilledBySignal {
+ public:
+  explicit KilledBySignal(int signum);
+  bool operator()(int exit_status) const;
+ private:
+  const int signum_;
+};
+#endif  // !GTEST_OS_WINDOWS
+
+// EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode.
+// The death testing framework causes this to have interesting semantics,
+// since the sideeffects of the call are only visible in opt mode, and not
+// in debug mode.
+//
+// In practice, this can be used to test functions that utilize the
+// LOG(DFATAL) macro using the following style:
+//
+// int DieInDebugOr12(int* sideeffect) {
+//   if (sideeffect) {
+//     *sideeffect = 12;
+//   }
+//   LOG(DFATAL) << "death";
+//   return 12;
+// }
+//
+// TEST(TestCase, TestDieOr12WorksInDgbAndOpt) {
+//   int sideeffect = 0;
+//   // Only asserts in dbg.
+//   EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death");
+//
+// #ifdef NDEBUG
+//   // opt-mode has sideeffect visible.
+//   EXPECT_EQ(12, sideeffect);
+// #else
+//   // dbg-mode no visible sideeffect.
+//   EXPECT_EQ(0, sideeffect);
+// #endif
+// }
+//
+// This will assert that DieInDebugReturn12InOpt() crashes in debug
+// mode, usually due to a DCHECK or LOG(DFATAL), but returns the
+// appropriate fallback value (12 in this case) in opt mode. If you
+// need to test that a function has appropriate side-effects in opt
+// mode, include assertions against the side-effects.  A general
+// pattern for this is:
+//
+// EXPECT_DEBUG_DEATH({
+//   // Side-effects here will have an effect after this statement in
+//   // opt mode, but none in debug mode.
+//   EXPECT_EQ(12, DieInDebugOr12(&sideeffect));
+// }, "death");
+//
+#ifdef NDEBUG
+
+#define EXPECT_DEBUG_DEATH(statement, regex) \
+  do { statement; } while (::testing::internal::AlwaysFalse())
+
+#define ASSERT_DEBUG_DEATH(statement, regex) \
+  do { statement; } while (::testing::internal::AlwaysFalse())
+
+#else
+
+#define EXPECT_DEBUG_DEATH(statement, regex) \
+  EXPECT_DEATH(statement, regex)
+
+#define ASSERT_DEBUG_DEATH(statement, regex) \
+  ASSERT_DEATH(statement, regex)
+
+#endif  // NDEBUG for EXPECT_DEBUG_DEATH
+#endif  // GTEST_HAS_DEATH_TEST
+
+// EXPECT_DEATH_IF_SUPPORTED(statement, regex) and
+// ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if
+// death tests are supported; otherwise they just issue a warning.  This is
+// useful when you are combining death test assertions with normal test
+// assertions in one test.
+#if GTEST_HAS_DEATH_TEST
+#define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
+    EXPECT_DEATH(statement, regex)
+#define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
+    ASSERT_DEATH(statement, regex)
+#else
+#define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \
+    GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, )
+#define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \
+    GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, return)
+#endif
+
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_
diff --git a/final/utils/unittest/googletest/include/gtest/gtest-message.h b/final/utils/unittest/googletest/include/gtest/gtest-message.h
new file mode 100644
index 00000000000..f135b69427f
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/gtest-message.h
@@ -0,0 +1,230 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file defines the Message class.
+//
+// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
+// leave some internal implementation details in this header file.
+// They are clearly marked by comments like this:
+//
+//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+//
+// Such code is NOT meant to be used by a user directly, and is subject
+// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
+// program!
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+#define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
+
+#include <limits>
+
+#include <gtest/internal/gtest-string.h>
+#include <gtest/internal/gtest-internal.h>
+
+namespace testing {
+
+// The Message class works like an ostream repeater.
+//
+// Typical usage:
+//
+//   1. You stream a bunch of values to a Message object.
+//      It will remember the text in a StrStream.
+//   2. Then you stream the Message object to an ostream.
+//      This causes the text in the Message to be streamed
+//      to the ostream.
+//
+// For example;
+//
+//   testing::Message foo;
+//   foo << 1 << " != " << 2;
+//   std::cout << foo;
+//
+// will print "1 != 2".
+//
+// Message is not intended to be inherited from.  In particular, its
+// destructor is not virtual.
+//
+// Note that StrStream behaves differently in gcc and in MSVC.  You
+// can stream a NULL char pointer to it in the former, but not in the
+// latter (it causes an access violation if you do).  The Message
+// class hides this difference by treating a NULL char pointer as
+// "(null)".
+class GTEST_API_ Message {
+ private:
+  // The type of basic IO manipulators (endl, ends, and flush) for
+  // narrow streams.
+  typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&);
+
+ public:
+  // Constructs an empty Message.
+  // We allocate the StrStream separately because it otherwise each use of
+  // ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's
+  // stack frame leading to huge stack frames in some cases; gcc does not reuse
+  // the stack space.
+  Message() : ss_(new internal::StrStream) {
+    // By default, we want there to be enough precision when printing
+    // a double to a Message.
+    *ss_ << std::setprecision(std::numeric_limits<double>::digits10 + 2);
+  }
+
+  // Copy constructor.
+  Message(const Message& msg) : ss_(new internal::StrStream) {  // NOLINT
+    *ss_ << msg.GetString();
+  }
+
+  // Constructs a Message from a C-string.
+  explicit Message(const char* str) : ss_(new internal::StrStream) {
+    *ss_ << str;
+  }
+
+  ~Message() { delete ss_; }
+#if GTEST_OS_SYMBIAN
+  // Streams a value (either a pointer or not) to this object.
+  template <typename T>
+  inline Message& operator <<(const T& value) {
+    StreamHelper(typename internal::is_pointer<T>::type(), value);
+    return *this;
+  }
+#else
+  // Streams a non-pointer value to this object.
+  template <typename T>
+  inline Message& operator <<(const T& val) {
+    ::GTestStreamToHelper(ss_, val);
+    return *this;
+  }
+
+  // Streams a pointer value to this object.
+  //
+  // This function is an overload of the previous one.  When you
+  // stream a pointer to a Message, this definition will be used as it
+  // is more specialized.  (The C++ Standard, section
+  // [temp.func.order].)  If you stream a non-pointer, then the
+  // previous definition will be used.
+  //
+  // The reason for this overload is that streaming a NULL pointer to
+  // ostream is undefined behavior.  Depending on the compiler, you
+  // may get "0", "(nil)", "(null)", or an access violation.  To
+  // ensure consistent result across compilers, we always treat NULL
+  // as "(null)".
+  template <typename T>
+  inline Message& operator <<(T* const& pointer) {  // NOLINT
+    if (pointer == NULL) {
+      *ss_ << "(null)";
+    } else {
+      ::GTestStreamToHelper(ss_, pointer);
+    }
+    return *this;
+  }
+#endif  // GTEST_OS_SYMBIAN
+
+  // Since the basic IO manipulators are overloaded for both narrow
+  // and wide streams, we have to provide this specialized definition
+  // of operator <<, even though its body is the same as the
+  // templatized version above.  Without this definition, streaming
+  // endl or other basic IO manipulators to Message will confuse the
+  // compiler.
+  Message& operator <<(BasicNarrowIoManip val) {
+    *ss_ << val;
+    return *this;
+  }
+
+  // Instead of 1/0, we want to see true/false for bool values.
+  Message& operator <<(bool b) {
+    return *this << (b ? "true" : "false");
+  }
+
+  // These two overloads allow streaming a wide C string to a Message
+  // using the UTF-8 encoding.
+  Message& operator <<(const wchar_t* wide_c_str) {
+    return *this << internal::String::ShowWideCString(wide_c_str);
+  }
+  Message& operator <<(wchar_t* wide_c_str) {
+    return *this << internal::String::ShowWideCString(wide_c_str);
+  }
+
+#if GTEST_HAS_STD_WSTRING
+  // Converts the given wide string to a narrow string using the UTF-8
+  // encoding, and streams the result to this Message object.
+  Message& operator <<(const ::std::wstring& wstr);
+#endif  // GTEST_HAS_STD_WSTRING
+
+#if GTEST_HAS_GLOBAL_WSTRING
+  // Converts the given wide string to a narrow string using the UTF-8
+  // encoding, and streams the result to this Message object.
+  Message& operator <<(const ::wstring& wstr);
+#endif  // GTEST_HAS_GLOBAL_WSTRING
+
+  // Gets the text streamed to this object so far as a String.
+  // Each '\0' character in the buffer is replaced with "\\0".
+  //
+  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+  internal::String GetString() const {
+    return internal::StrStreamToString(ss_);
+  }
+
+ private:
+#if GTEST_OS_SYMBIAN
+  // These are needed as the Nokia Symbian Compiler cannot decide between
+  // const T& and const T* in a function template. The Nokia compiler _can_
+  // decide between class template specializations for T and T*, so a
+  // tr1::type_traits-like is_pointer works, and we can overload on that.
+  template <typename T>
+  inline void StreamHelper(internal::true_type /*dummy*/, T* pointer) {
+    if (pointer == NULL) {
+      *ss_ << "(null)";
+    } else {
+      ::GTestStreamToHelper(ss_, pointer);
+    }
+  }
+  template <typename T>
+  inline void StreamHelper(internal::false_type /*dummy*/, const T& value) {
+    ::GTestStreamToHelper(ss_, value);
+  }
+#endif  // GTEST_OS_SYMBIAN
+
+  // We'll hold the text streamed to this object here.
+  internal::StrStream* const ss_;
+
+  // We declare (but don't implement) this to prevent the compiler
+  // from implementing the assignment operator.
+  void operator=(const Message&);
+};
+
+// Streams a Message to an ostream.
+inline std::ostream& operator <<(std::ostream& os, const Message& sb) {
+  return os << sb.GetString();
+}
+
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_
diff --git a/final/utils/unittest/googletest/include/gtest/gtest-param-test.h b/final/utils/unittest/googletest/include/gtest/gtest-param-test.h
new file mode 100644
index 00000000000..3184d07ba0e
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/gtest-param-test.h
@@ -0,0 +1,1397 @@
+// This file was GENERATED by a script.  DO NOT EDIT BY HAND!!!
+
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: vladl@google.com (Vlad Losev)
+//
+// Macros and functions for implementing parameterized tests
+// in Google C++ Testing Framework (Google Test)
+//
+// This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
+//
+#ifndef GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+#define GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
+
+
+// Value-parameterized tests allow you to test your code with different
+// parameters without writing multiple copies of the same test.
+//
+// Here is how you use value-parameterized tests:
+
+#if 0
+
+// To write value-parameterized tests, first you should define a fixture
+// class. It must be derived from testing::TestWithParam<T>, where T is
+// the type of your parameter values. TestWithParam<T> is itself derived
+// from testing::Test. T can be any copyable type. If it's a raw pointer,
+// you are responsible for managing the lifespan of the pointed values.
+
+class FooTest : public ::testing::TestWithParam<const char*> {
+  // You can implement all the usual class fixture members here.
+};
+
+// Then, use the TEST_P macro to define as many parameterized tests
+// for this fixture as you want. The _P suffix is for "parameterized"
+// or "pattern", whichever you prefer to think.
+
+TEST_P(FooTest, DoesBlah) {
+  // Inside a test, access the test parameter with the GetParam() method
+  // of the TestWithParam<T> class:
+  EXPECT_TRUE(foo.Blah(GetParam()));
+  ...
+}
+
+TEST_P(FooTest, HasBlahBlah) {
+  ...
+}
+
+// Finally, you can use INSTANTIATE_TEST_CASE_P to instantiate the test
+// case with any set of parameters you want. Google Test defines a number
+// of functions for generating test parameters. They return what we call
+// (surprise!) parameter generators. Here is a  summary of them, which
+// are all in the testing namespace:
+//
+//
+//  Range(begin, end [, step]) - Yields values {begin, begin+step,
+//                               begin+step+step, ...}. The values do not
+//                               include end. step defaults to 1.
+//  Values(v1, v2, ..., vN)    - Yields values {v1, v2, ..., vN}.
+//  ValuesIn(container)        - Yields values from a C-style array, an STL
+//  ValuesIn(begin,end)          container, or an iterator range [begin, end).
+//  Bool()                     - Yields sequence {false, true}.
+//  Combine(g1, g2, ..., gN)   - Yields all combinations (the Cartesian product
+//                               for the math savvy) of the values generated
+//                               by the N generators.
+//
+// For more details, see comments at the definitions of these functions below
+// in this file.
+//
+// The following statement will instantiate tests from the FooTest test case
+// each with parameter values "meeny", "miny", and "moe".
+
+INSTANTIATE_TEST_CASE_P(InstantiationName,
+                        FooTest,
+                        Values("meeny", "miny", "moe"));
+
+// To distinguish different instances of the pattern, (yes, you
+// can instantiate it more then once) the first argument to the
+// INSTANTIATE_TEST_CASE_P macro is a prefix that will be added to the
+// actual test case name. Remember to pick unique prefixes for different
+// instantiations. The tests from the instantiation above will have
+// these names:
+//
+//    * InstantiationName/FooTest.DoesBlah/0 for "meeny"
+//    * InstantiationName/FooTest.DoesBlah/1 for "miny"
+//    * InstantiationName/FooTest.DoesBlah/2 for "moe"
+//    * InstantiationName/FooTest.HasBlahBlah/0 for "meeny"
+//    * InstantiationName/FooTest.HasBlahBlah/1 for "miny"
+//    * InstantiationName/FooTest.HasBlahBlah/2 for "moe"
+//
+// You can use these names in --gtest_filter.
+//
+// This statement will instantiate all tests from FooTest again, each
+// with parameter values "cat" and "dog":
+
+const char* pets[] = {"cat", "dog"};
+INSTANTIATE_TEST_CASE_P(AnotherInstantiationName, FooTest, ValuesIn(pets));
+
+// The tests from the instantiation above will have these names:
+//
+//    * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat"
+//    * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog"
+//    * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat"
+//    * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog"
+//
+// Please note that INSTANTIATE_TEST_CASE_P will instantiate all tests
+// in the given test case, whether their definitions come before or
+// AFTER the INSTANTIATE_TEST_CASE_P statement.
+//
+// Please also note that generator expressions (including parameters to the
+// generators) are evaluated in InitGoogleTest(), after main() has started.
+// This allows the user on one hand, to adjust generator parameters in order
+// to dynamically determine a set of tests to run and on the other hand,
+// give the user a chance to inspect the generated tests with Google Test
+// reflection API before RUN_ALL_TESTS() is executed.
+//
+// You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc
+// for more examples.
+//
+// In the future, we plan to publish the API for defining new parameter
+// generators. But for now this interface remains part of the internal
+// implementation and is subject to change.
+
+#endif  // 0
+
+#include <gtest/internal/gtest-port.h>
+
+#if !GTEST_OS_SYMBIAN
+#include <utility>
+#endif
+
+// scripts/fuse_gtest.py depends on gtest's own header being #included
+// *unconditionally*.  Therefore these #includes cannot be moved
+// inside #if GTEST_HAS_PARAM_TEST.
+#include <gtest/internal/gtest-internal.h>
+#include <gtest/internal/gtest-param-util.h>
+
+#if GTEST_HAS_PARAM_TEST
+
+namespace testing {
+
+// Functions producing parameter generators.
+//
+// Google Test uses these generators to produce parameters for value-
+// parameterized tests. When a parameterized test case is instantiated
+// with a particular generator, Google Test creates and runs tests
+// for each element in the sequence produced by the generator.
+//
+// In the following sample, tests from test case FooTest are instantiated
+// each three times with parameter values 3, 5, and 8:
+//
+// class FooTest : public TestWithParam<int> { ... };
+//
+// TEST_P(FooTest, TestThis) {
+// }
+// TEST_P(FooTest, TestThat) {
+// }
+// INSTANTIATE_TEST_CASE_P(TestSequence, FooTest, Values(3, 5, 8));
+//
+
+// Range() returns generators providing sequences of values in a range.
+//
+// Synopsis:
+// Range(start, end)
+//   - returns a generator producing a sequence of values {start, start+1,
+//     start+2, ..., }.
+// Range(start, end, step)
+//   - returns a generator producing a sequence of values {start, start+step,
+//     start+step+step, ..., }.
+// Notes:
+//   * The generated sequences never include end. For example, Range(1, 5)
+//     returns a generator producing a sequence {1, 2, 3, 4}. Range(1, 9, 2)
+//     returns a generator producing {1, 3, 5, 7}.
+//   * start and end must have the same type. That type may be any integral or
+//     floating-point type or a user defined type satisfying these conditions:
+//     * It must be assignable (have operator=() defined).
+//     * It must have operator+() (operator+(int-compatible type) for
+//       two-operand version).
+//     * It must have operator<() defined.
+//     Elements in the resulting sequences will also have that type.
+//   * Condition start < end must be satisfied in order for resulting sequences
+//     to contain any elements.
+//
+template <typename T, typename IncrementT>
+internal::ParamGenerator<T> Range(T start, T end, IncrementT step) {
+  return internal::ParamGenerator<T>(
+      new internal::RangeGenerator<T, IncrementT>(start, end, step));
+}
+
+template <typename T>
+internal::ParamGenerator<T> Range(T start, T end) {
+  return Range(start, end, 1);
+}
+
+// ValuesIn() function allows generation of tests with parameters coming from
+// a container.
+//
+// Synopsis:
+// ValuesIn(const T (&array)[N])
+//   - returns a generator producing sequences with elements from
+//     a C-style array.
+// ValuesIn(const Container& container)
+//   - returns a generator producing sequences with elements from
+//     an STL-style container.
+// ValuesIn(Iterator begin, Iterator end)
+//   - returns a generator producing sequences with elements from
+//     a range [begin, end) defined by a pair of STL-style iterators. These
+//     iterators can also be plain C pointers.
+//
+// Please note that ValuesIn copies the values from the containers
+// passed in and keeps them to generate tests in RUN_ALL_TESTS().
+//
+// Examples:
+//
+// This instantiates tests from test case StringTest
+// each with C-string values of "foo", "bar", and "baz":
+//
+// const char* strings[] = {"foo", "bar", "baz"};
+// INSTANTIATE_TEST_CASE_P(StringSequence, SrtingTest, ValuesIn(strings));
+//
+// This instantiates tests from test case StlStringTest
+// each with STL strings with values "a" and "b":
+//
+// ::std::vector< ::std::string> GetParameterStrings() {
+//   ::std::vector< ::std::string> v;
+//   v.push_back("a");
+//   v.push_back("b");
+//   return v;
+// }
+//
+// INSTANTIATE_TEST_CASE_P(CharSequence,
+//                         StlStringTest,
+//                         ValuesIn(GetParameterStrings()));
+//
+//
+// This will also instantiate tests from CharTest
+// each with parameter values 'a' and 'b':
+//
+// ::std::list<char> GetParameterChars() {
+//   ::std::list<char> list;
+//   list.push_back('a');
+//   list.push_back('b');
+//   return list;
+// }
+// ::std::list<char> l = GetParameterChars();
+// INSTANTIATE_TEST_CASE_P(CharSequence2,
+//                         CharTest,
+//                         ValuesIn(l.begin(), l.end()));
+//
+template <typename ForwardIterator>
+internal::ParamGenerator<
+    typename ::std::iterator_traits<ForwardIterator>::value_type> ValuesIn(
+  ForwardIterator begin,
+  ForwardIterator end) {
+  typedef typename ::std::iterator_traits<ForwardIterator>::value_type
+      ParamType;
+  return internal::ParamGenerator<ParamType>(
+      new internal::ValuesInIteratorRangeGenerator<ParamType>(begin, end));
+}
+
+template <typename T, size_t N>
+internal::ParamGenerator<T> ValuesIn(const T (&array)[N]) {
+  return ValuesIn(array, array + N);
+}
+
+template <class Container>
+internal::ParamGenerator<typename Container::value_type> ValuesIn(
+    const Container& container) {
+  return ValuesIn(container.begin(), container.end());
+}
+
+} // namespace testing
+
+#include <gtest/internal/gtest-param-util-generated.h>
+
+namespace testing {
+
+// Values() allows generating tests from explicitly specified list of
+// parameters.
+//
+// Synopsis:
+// Values(T v1, T v2, ..., T vN)
+//   - returns a generator producing sequences with elements v1, v2, ..., vN.
+//
+// For example, this instantiates tests from test case BarTest each
+// with values "one", "two", and "three":
+//
+// INSTANTIATE_TEST_CASE_P(NumSequence, BarTest, Values("one", "two", "three"));
+//
+// This instantiates tests from test case BazTest each with values 1, 2, 3.5.
+// The exact type of values will depend on the type of parameter in BazTest.
+//
+// INSTANTIATE_TEST_CASE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5));
+//
+// Currently, Values() supports from 1 to 50 parameters.
+//
+template <typename T1>
+internal::ValueArray1<T1> Values(T1 v1) {
+  return internal::ValueArray1<T1>(v1);
+}
+
+template <typename T1, typename T2>
+internal::ValueArray2<T1, T2> Values(T1 v1, T2 v2) {
+  return internal::ValueArray2<T1, T2>(v1, v2);
+}
+
+template <typename T1, typename T2, typename T3>
+internal::ValueArray3<T1, T2, T3> Values(T1 v1, T2 v2, T3 v3) {
+  return internal::ValueArray3<T1, T2, T3>(v1, v2, v3);
+}
+
+template <typename T1, typename T2, typename T3, typename T4>
+internal::ValueArray4<T1, T2, T3, T4> Values(T1 v1, T2 v2, T3 v3, T4 v4) {
+  return internal::ValueArray4<T1, T2, T3, T4>(v1, v2, v3, v4);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+internal::ValueArray5<T1, T2, T3, T4, T5> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+    T5 v5) {
+  return internal::ValueArray5<T1, T2, T3, T4, T5>(v1, v2, v3, v4, v5);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6>
+internal::ValueArray6<T1, T2, T3, T4, T5, T6> Values(T1 v1, T2 v2, T3 v3,
+    T4 v4, T5 v5, T6 v6) {
+  return internal::ValueArray6<T1, T2, T3, T4, T5, T6>(v1, v2, v3, v4, v5, v6);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7>
+internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7> Values(T1 v1, T2 v2, T3 v3,
+    T4 v4, T5 v5, T6 v6, T7 v7) {
+  return internal::ValueArray7<T1, T2, T3, T4, T5, T6, T7>(v1, v2, v3, v4, v5,
+      v6, v7);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8>
+internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8> Values(T1 v1, T2 v2,
+    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8) {
+  return internal::ValueArray8<T1, T2, T3, T4, T5, T6, T7, T8>(v1, v2, v3, v4,
+      v5, v6, v7, v8);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9>
+internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9> Values(T1 v1, T2 v2,
+    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9) {
+  return internal::ValueArray9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(v1, v2, v3,
+      v4, v5, v6, v7, v8, v9);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10>
+internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> Values(T1 v1,
+    T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10) {
+  return internal::ValueArray10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>(v1,
+      v2, v3, v4, v5, v6, v7, v8, v9, v10);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11>
+internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10,
+    T11> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11) {
+  return internal::ValueArray11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10,
+      T11>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12>
+internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+    T12> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12) {
+  return internal::ValueArray12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13>
+internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+    T13> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13) {
+  return internal::ValueArray13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14>
+internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) {
+  return internal::ValueArray14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
+      v14);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15>
+internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
+    T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) {
+  return internal::ValueArray15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+      v13, v14, v15);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16>
+internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16) {
+  return internal::ValueArray16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
+      v12, v13, v14, v15, v16);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17>
+internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16, T17 v17) {
+  return internal::ValueArray17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
+      v11, v12, v13, v14, v15, v16, v17);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18>
+internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
+    T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16, T17 v17, T18 v18) {
+  return internal::ValueArray18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
+      v10, v11, v12, v13, v14, v15, v16, v17, v18);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19>
+internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
+    T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
+    T15 v15, T16 v16, T17 v17, T18 v18, T19 v19) {
+  return internal::ValueArray19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19>(v1, v2, v3, v4, v5, v6, v7, v8,
+      v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20>
+internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
+    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20) {
+  return internal::ValueArray20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20>(v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21>
+internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
+    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21) {
+  return internal::ValueArray21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21>(v1, v2, v3, v4, v5, v6,
+      v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22>
+internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22> Values(T1 v1, T2 v2, T3 v3,
+    T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+    T21 v21, T22 v22) {
+  return internal::ValueArray22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22>(v1, v2, v3, v4,
+      v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+      v20, v21, v22);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23>
+internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> Values(T1 v1, T2 v2,
+    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+    T21 v21, T22 v22, T23 v23) {
+  return internal::ValueArray23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23>(v1, v2, v3,
+      v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+      v20, v21, v22, v23);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24>
+internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Values(T1 v1, T2 v2,
+    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+    T21 v21, T22 v22, T23 v23, T24 v24) {
+  return internal::ValueArray24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24>(v1, v2,
+      v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18,
+      v19, v20, v21, v22, v23, v24);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25>
+internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Values(T1 v1,
+    T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11,
+    T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19,
+    T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25) {
+  return internal::ValueArray25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25>(v1,
+      v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
+      v18, v19, v20, v21, v22, v23, v24, v25);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26>
+internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+    T26> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+    T26 v26) {
+  return internal::ValueArray26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
+      v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27>
+internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+    T27> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+    T26 v26, T27 v27) {
+  return internal::ValueArray27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14,
+      v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28>
+internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+    T28> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+    T26 v26, T27 v27, T28 v28) {
+  return internal::ValueArray28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
+      v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27,
+      v28);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29>
+internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+    T26 v26, T27 v27, T28 v28, T29 v29) {
+  return internal::ValueArray29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+      v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26,
+      v27, v28, v29);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30>
+internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
+    T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
+    T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
+    T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) {
+  return internal::ValueArray30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
+      v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25,
+      v26, v27, v28, v29, v30);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31>
+internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) {
+  return internal::ValueArray31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
+      v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24,
+      v25, v26, v27, v28, v29, v30, v31);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32>
+internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
+    T32 v32) {
+  return internal::ValueArray32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
+      v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
+      v24, v25, v26, v27, v28, v29, v30, v31, v32);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33>
+internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
+    T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
+    T32 v32, T33 v33) {
+  return internal::ValueArray33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33>(v1, v2, v3, v4, v5, v6, v7, v8,
+      v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
+      v24, v25, v26, v27, v28, v29, v30, v31, v32, v33);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34>
+internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
+    T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
+    T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22,
+    T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30,
+    T31 v31, T32 v32, T33 v33, T34 v34) {
+  return internal::ValueArray34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34>(v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22,
+      v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35>
+internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
+    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
+    T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
+    T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35) {
+  return internal::ValueArray35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35>(v1, v2, v3, v4, v5, v6,
+      v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
+      v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36>
+internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
+    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
+    T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
+    T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36) {
+  return internal::ValueArray36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36>(v1, v2, v3, v4,
+      v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+      v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
+      v34, v35, v36);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37>
+internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37> Values(T1 v1, T2 v2, T3 v3,
+    T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+    T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
+    T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
+    T37 v37) {
+  return internal::ValueArray37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37>(v1, v2, v3,
+      v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+      v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
+      v34, v35, v36, v37);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38>
+internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Values(T1 v1, T2 v2,
+    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+    T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
+    T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
+    T37 v37, T38 v38) {
+  return internal::ValueArray38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38>(v1, v2,
+      v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18,
+      v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32,
+      v33, v34, v35, v36, v37, v38);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39>
+internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Values(T1 v1, T2 v2,
+    T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12,
+    T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20,
+    T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28,
+    T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36,
+    T37 v37, T38 v38, T39 v39) {
+  return internal::ValueArray39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39>(v1,
+      v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17,
+      v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31,
+      v32, v33, v34, v35, v36, v37, v38, v39);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40>
+internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Values(T1 v1,
+    T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11,
+    T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19,
+    T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27,
+    T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35,
+    T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) {
+  return internal::ValueArray40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15,
+      v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29,
+      v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41>
+internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+    T41> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41) {
+  return internal::ValueArray41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14,
+      v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28,
+      v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42>
+internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+    T42> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+    T42 v42) {
+  return internal::ValueArray42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13,
+      v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27,
+      v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41,
+      v42);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43>
+internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+    T43> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+    T42 v42, T43 v43) {
+  return internal::ValueArray43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42, T43>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12,
+      v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26,
+      v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40,
+      v41, v42, v43);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44>
+internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    T44> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+    T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+    T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+    T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+    T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+    T42 v42, T43 v43, T44 v44) {
+  return internal::ValueArray44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42, T43, T44>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,
+      v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25,
+      v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39,
+      v40, v41, v42, v43, v44);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45>
+internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    T44, T45> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
+    T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
+    T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
+    T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32,
+    T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40,
+    T41 v41, T42 v42, T43 v43, T44 v44, T45 v45) {
+  return internal::ValueArray45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42, T43, T44, T45>(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10,
+      v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24,
+      v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38,
+      v39, v40, v41, v42, v43, v44, v45);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46>
+internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    T44, T45, T46> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
+    T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
+    T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) {
+  return internal::ValueArray46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42, T43, T44, T45, T46>(v1, v2, v3, v4, v5, v6, v7, v8, v9,
+      v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
+      v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37,
+      v38, v39, v40, v41, v42, v43, v44, v45, v46);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47>
+internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    T44, T45, T46, T47> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+    T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
+    T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
+    T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) {
+  return internal::ValueArray47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42, T43, T44, T45, T46, T47>(v1, v2, v3, v4, v5, v6, v7, v8,
+      v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23,
+      v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37,
+      v38, v39, v40, v41, v42, v43, v44, v45, v46, v47);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48>
+internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    T44, T45, T46, T47, T48> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6,
+    T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15,
+    T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23,
+    T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31,
+    T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39,
+    T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47,
+    T48 v48) {
+  return internal::ValueArray48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42, T43, T44, T45, T46, T47, T48>(v1, v2, v3, v4, v5, v6, v7,
+      v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22,
+      v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36,
+      v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48, typename T49>
+internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    T44, T45, T46, T47, T48, T49> Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5,
+    T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14,
+    T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22,
+    T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30,
+    T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38,
+    T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46,
+    T47 v47, T48 v48, T49 v49) {
+  return internal::ValueArray49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42, T43, T44, T45, T46, T47, T48, T49>(v1, v2, v3, v4, v5, v6,
+      v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21,
+      v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35,
+      v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49);
+}
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48, typename T49, typename T50>
+internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    T44, T45, T46, T47, T48, T49, T50> Values(T1 v1, T2 v2, T3 v3, T4 v4,
+    T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13,
+    T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21,
+    T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29,
+    T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37,
+    T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45,
+    T46 v46, T47 v47, T48 v48, T49 v49, T50 v50) {
+  return internal::ValueArray50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12, T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26, T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40, T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>(v1, v2, v3, v4,
+      v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19,
+      v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33,
+      v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47,
+      v48, v49, v50);
+}
+
+// Bool() allows generating tests with parameters in a set of (false, true).
+//
+// Synopsis:
+// Bool()
+//   - returns a generator producing sequences with elements {false, true}.
+//
+// It is useful when testing code that depends on Boolean flags. Combinations
+// of multiple flags can be tested when several Bool()'s are combined using
+// Combine() function.
+//
+// In the following example all tests in the test case FlagDependentTest
+// will be instantiated twice with parameters false and true.
+//
+// class FlagDependentTest : public testing::TestWithParam<bool> {
+//   virtual void SetUp() {
+//     external_flag = GetParam();
+//   }
+// }
+// INSTANTIATE_TEST_CASE_P(BoolSequence, FlagDependentTest, Bool());
+//
+inline internal::ParamGenerator<bool> Bool() {
+  return Values(false, true);
+}
+
+#if GTEST_HAS_COMBINE
+// Combine() allows the user to combine two or more sequences to produce
+// values of a Cartesian product of those sequences' elements.
+//
+// Synopsis:
+// Combine(gen1, gen2, ..., genN)
+//   - returns a generator producing sequences with elements coming from
+//     the Cartesian product of elements from the sequences generated by
+//     gen1, gen2, ..., genN. The sequence elements will have a type of
+//     tuple<T1, T2, ..., TN> where T1, T2, ..., TN are the types
+//     of elements from sequences produces by gen1, gen2, ..., genN.
+//
+// Combine can have up to 10 arguments. This number is currently limited
+// by the maximum number of elements in the tuple implementation used by Google
+// Test.
+//
+// Example:
+//
+// This will instantiate tests in test case AnimalTest each one with
+// the parameter values tuple("cat", BLACK), tuple("cat", WHITE),
+// tuple("dog", BLACK), and tuple("dog", WHITE):
+//
+// enum Color { BLACK, GRAY, WHITE };
+// class AnimalTest
+//     : public testing::TestWithParam<tuple<const char*, Color> > {...};
+//
+// TEST_P(AnimalTest, AnimalLooksNice) {...}
+//
+// INSTANTIATE_TEST_CASE_P(AnimalVariations, AnimalTest,
+//                         Combine(Values("cat", "dog"),
+//                                 Values(BLACK, WHITE)));
+//
+// This will instantiate tests in FlagDependentTest with all variations of two
+// Boolean flags:
+//
+// class FlagDependentTest
+//     : public testing::TestWithParam<tuple(bool, bool)> > {
+//   virtual void SetUp() {
+//     // Assigns external_flag_1 and external_flag_2 values from the tuple.
+//     tie(external_flag_1, external_flag_2) = GetParam();
+//   }
+// };
+//
+// TEST_P(FlagDependentTest, TestFeature1) {
+//   // Test your code using external_flag_1 and external_flag_2 here.
+// }
+// INSTANTIATE_TEST_CASE_P(TwoBoolSequence, FlagDependentTest,
+//                         Combine(Bool(), Bool()));
+//
+template <typename Generator1, typename Generator2>
+internal::CartesianProductHolder2<Generator1, Generator2> Combine(
+    const Generator1& g1, const Generator2& g2) {
+  return internal::CartesianProductHolder2<Generator1, Generator2>(
+      g1, g2);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3>
+internal::CartesianProductHolder3<Generator1, Generator2, Generator3> Combine(
+    const Generator1& g1, const Generator2& g2, const Generator3& g3) {
+  return internal::CartesianProductHolder3<Generator1, Generator2, Generator3>(
+      g1, g2, g3);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+    typename Generator4>
+internal::CartesianProductHolder4<Generator1, Generator2, Generator3,
+    Generator4> Combine(
+    const Generator1& g1, const Generator2& g2, const Generator3& g3,
+        const Generator4& g4) {
+  return internal::CartesianProductHolder4<Generator1, Generator2, Generator3,
+      Generator4>(
+      g1, g2, g3, g4);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+    typename Generator4, typename Generator5>
+internal::CartesianProductHolder5<Generator1, Generator2, Generator3,
+    Generator4, Generator5> Combine(
+    const Generator1& g1, const Generator2& g2, const Generator3& g3,
+        const Generator4& g4, const Generator5& g5) {
+  return internal::CartesianProductHolder5<Generator1, Generator2, Generator3,
+      Generator4, Generator5>(
+      g1, g2, g3, g4, g5);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+    typename Generator4, typename Generator5, typename Generator6>
+internal::CartesianProductHolder6<Generator1, Generator2, Generator3,
+    Generator4, Generator5, Generator6> Combine(
+    const Generator1& g1, const Generator2& g2, const Generator3& g3,
+        const Generator4& g4, const Generator5& g5, const Generator6& g6) {
+  return internal::CartesianProductHolder6<Generator1, Generator2, Generator3,
+      Generator4, Generator5, Generator6>(
+      g1, g2, g3, g4, g5, g6);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+    typename Generator4, typename Generator5, typename Generator6,
+    typename Generator7>
+internal::CartesianProductHolder7<Generator1, Generator2, Generator3,
+    Generator4, Generator5, Generator6, Generator7> Combine(
+    const Generator1& g1, const Generator2& g2, const Generator3& g3,
+        const Generator4& g4, const Generator5& g5, const Generator6& g6,
+        const Generator7& g7) {
+  return internal::CartesianProductHolder7<Generator1, Generator2, Generator3,
+      Generator4, Generator5, Generator6, Generator7>(
+      g1, g2, g3, g4, g5, g6, g7);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+    typename Generator4, typename Generator5, typename Generator6,
+    typename Generator7, typename Generator8>
+internal::CartesianProductHolder8<Generator1, Generator2, Generator3,
+    Generator4, Generator5, Generator6, Generator7, Generator8> Combine(
+    const Generator1& g1, const Generator2& g2, const Generator3& g3,
+        const Generator4& g4, const Generator5& g5, const Generator6& g6,
+        const Generator7& g7, const Generator8& g8) {
+  return internal::CartesianProductHolder8<Generator1, Generator2, Generator3,
+      Generator4, Generator5, Generator6, Generator7, Generator8>(
+      g1, g2, g3, g4, g5, g6, g7, g8);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+    typename Generator4, typename Generator5, typename Generator6,
+    typename Generator7, typename Generator8, typename Generator9>
+internal::CartesianProductHolder9<Generator1, Generator2, Generator3,
+    Generator4, Generator5, Generator6, Generator7, Generator8,
+    Generator9> Combine(
+    const Generator1& g1, const Generator2& g2, const Generator3& g3,
+        const Generator4& g4, const Generator5& g5, const Generator6& g6,
+        const Generator7& g7, const Generator8& g8, const Generator9& g9) {
+  return internal::CartesianProductHolder9<Generator1, Generator2, Generator3,
+      Generator4, Generator5, Generator6, Generator7, Generator8, Generator9>(
+      g1, g2, g3, g4, g5, g6, g7, g8, g9);
+}
+
+template <typename Generator1, typename Generator2, typename Generator3,
+    typename Generator4, typename Generator5, typename Generator6,
+    typename Generator7, typename Generator8, typename Generator9,
+    typename Generator10>
+internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
+    Generator4, Generator5, Generator6, Generator7, Generator8, Generator9,
+    Generator10> Combine(
+    const Generator1& g1, const Generator2& g2, const Generator3& g3,
+        const Generator4& g4, const Generator5& g5, const Generator6& g6,
+        const Generator7& g7, const Generator8& g8, const Generator9& g9,
+        const Generator10& g10) {
+  return internal::CartesianProductHolder10<Generator1, Generator2, Generator3,
+      Generator4, Generator5, Generator6, Generator7, Generator8, Generator9,
+      Generator10>(
+      g1, g2, g3, g4, g5, g6, g7, g8, g9, g10);
+}
+#endif  // GTEST_HAS_COMBINE
+
+
+
+#define TEST_P(test_case_name, test_name) \
+  class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
+      : public test_case_name { \
+   public: \
+    GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \
+    virtual void TestBody(); \
+   private: \
+    static int AddToRegistry() { \
+      ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
+          GetTestCasePatternHolder<test_case_name>(\
+              #test_case_name, __FILE__, __LINE__)->AddTestPattern(\
+                  #test_case_name, \
+                  #test_name, \
+                  new ::testing::internal::TestMetaFactory< \
+                      GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>()); \
+      return 0; \
+    } \
+    static int gtest_registering_dummy_; \
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(\
+        GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \
+  }; \
+  int GTEST_TEST_CLASS_NAME_(test_case_name, \
+                             test_name)::gtest_registering_dummy_ = \
+      GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \
+  void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()
+
+#define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator) \
+  ::testing::internal::ParamGenerator<test_case_name::ParamType> \
+      gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \
+  int gtest_##prefix##test_case_name##_dummy_ = \
+      ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \
+          GetTestCasePatternHolder<test_case_name>(\
+              #test_case_name, __FILE__, __LINE__)->AddTestCaseInstantiation(\
+                  #prefix, \
+                  &gtest_##prefix##test_case_name##_EvalGenerator_, \
+                  __FILE__, __LINE__)
+
+}  // namespace testing
+
+#endif  // GTEST_HAS_PARAM_TEST
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_
diff --git a/final/utils/unittest/googletest/include/gtest/gtest-spi.h b/final/utils/unittest/googletest/include/gtest/gtest-spi.h
new file mode 100644
index 00000000000..c41da484476
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/gtest-spi.h
@@ -0,0 +1,232 @@
+// Copyright 2007, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// Utilities for testing Google Test itself and code that uses Google Test
+// (e.g. frameworks built on top of Google Test).
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_SPI_H_
+#define GTEST_INCLUDE_GTEST_GTEST_SPI_H_
+
+#include <gtest/gtest.h>
+
+namespace testing {
+
+// This helper class can be used to mock out Google Test failure reporting
+// so that we can test Google Test or code that builds on Google Test.
+//
+// An object of this class appends a TestPartResult object to the
+// TestPartResultArray object given in the constructor whenever a Google Test
+// failure is reported. It can either intercept only failures that are
+// generated in the same thread that created this object or it can intercept
+// all generated failures. The scope of this mock object can be controlled with
+// the second argument to the two arguments constructor.
+class GTEST_API_ ScopedFakeTestPartResultReporter
+    : public TestPartResultReporterInterface {
+ public:
+  // The two possible mocking modes of this object.
+  enum InterceptMode {
+    INTERCEPT_ONLY_CURRENT_THREAD,  // Intercepts only thread local failures.
+    INTERCEPT_ALL_THREADS           // Intercepts all failures.
+  };
+
+  // The c'tor sets this object as the test part result reporter used
+  // by Google Test.  The 'result' parameter specifies where to report the
+  // results. This reporter will only catch failures generated in the current
+  // thread. DEPRECATED
+  explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result);
+
+  // Same as above, but you can choose the interception scope of this object.
+  ScopedFakeTestPartResultReporter(InterceptMode intercept_mode,
+                                   TestPartResultArray* result);
+
+  // The d'tor restores the previous test part result reporter.
+  virtual ~ScopedFakeTestPartResultReporter();
+
+  // Appends the TestPartResult object to the TestPartResultArray
+  // received in the constructor.
+  //
+  // This method is from the TestPartResultReporterInterface
+  // interface.
+  virtual void ReportTestPartResult(const TestPartResult& result);
+ private:
+  void Init();
+
+  const InterceptMode intercept_mode_;
+  TestPartResultReporterInterface* old_reporter_;
+  TestPartResultArray* const result_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedFakeTestPartResultReporter);
+};
+
+namespace internal {
+
+// A helper class for implementing EXPECT_FATAL_FAILURE() and
+// EXPECT_NONFATAL_FAILURE().  Its destructor verifies that the given
+// TestPartResultArray contains exactly one failure that has the given
+// type and contains the given substring.  If that's not the case, a
+// non-fatal failure will be generated.
+class GTEST_API_ SingleFailureChecker {
+ public:
+  // The constructor remembers the arguments.
+  SingleFailureChecker(const TestPartResultArray* results,
+                       TestPartResult::Type type,
+                       const char* substr);
+  ~SingleFailureChecker();
+ private:
+  const TestPartResultArray* const results_;
+  const TestPartResult::Type type_;
+  const String substr_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(SingleFailureChecker);
+};
+
+}  // namespace internal
+
+}  // namespace testing
+
+// A set of macros for testing Google Test assertions or code that's expected
+// to generate Google Test fatal failures.  It verifies that the given
+// statement will cause exactly one fatal Google Test failure with 'substr'
+// being part of the failure message.
+//
+// There are two different versions of this macro. EXPECT_FATAL_FAILURE only
+// affects and considers failures generated in the current thread and
+// EXPECT_FATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
+//
+// The verification of the assertion is done correctly even when the statement
+// throws an exception or aborts the current function.
+//
+// Known restrictions:
+//   - 'statement' cannot reference local non-static variables or
+//     non-static members of the current object.
+//   - 'statement' cannot return a value.
+//   - You cannot stream a failure message to this macro.
+//
+// Note that even though the implementations of the following two
+// macros are much alike, we cannot refactor them to use a common
+// helper macro, due to some peculiarity in how the preprocessor
+// works.  The AcceptsMacroThatExpandsToUnprotectedComma test in
+// gtest_unittest.cc will fail to compile if we do that.
+#define EXPECT_FATAL_FAILURE(statement, substr) \
+  do { \
+    class GTestExpectFatalFailureHelper {\
+     public:\
+      static void Execute() { statement; }\
+    };\
+    ::testing::TestPartResultArray gtest_failures;\
+    ::testing::internal::SingleFailureChecker gtest_checker(\
+        &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
+    {\
+      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
+          ::testing::ScopedFakeTestPartResultReporter:: \
+          INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
+      GTestExpectFatalFailureHelper::Execute();\
+    }\
+  } while (::testing::internal::AlwaysFalse())
+
+#define EXPECT_FATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
+  do { \
+    class GTestExpectFatalFailureHelper {\
+     public:\
+      static void Execute() { statement; }\
+    };\
+    ::testing::TestPartResultArray gtest_failures;\
+    ::testing::internal::SingleFailureChecker gtest_checker(\
+        &gtest_failures, ::testing::TestPartResult::kFatalFailure, (substr));\
+    {\
+      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
+          ::testing::ScopedFakeTestPartResultReporter:: \
+          INTERCEPT_ALL_THREADS, &gtest_failures);\
+      GTestExpectFatalFailureHelper::Execute();\
+    }\
+  } while (::testing::internal::AlwaysFalse())
+
+// A macro for testing Google Test assertions or code that's expected to
+// generate Google Test non-fatal failures.  It asserts that the given
+// statement will cause exactly one non-fatal Google Test failure with 'substr'
+// being part of the failure message.
+//
+// There are two different versions of this macro. EXPECT_NONFATAL_FAILURE only
+// affects and considers failures generated in the current thread and
+// EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS does the same but for all threads.
+//
+// 'statement' is allowed to reference local variables and members of
+// the current object.
+//
+// The verification of the assertion is done correctly even when the statement
+// throws an exception or aborts the current function.
+//
+// Known restrictions:
+//   - You cannot stream a failure message to this macro.
+//
+// Note that even though the implementations of the following two
+// macros are much alike, we cannot refactor them to use a common
+// helper macro, due to some peculiarity in how the preprocessor
+// works.  If we do that, the code won't compile when the user gives
+// EXPECT_NONFATAL_FAILURE() a statement that contains a macro that
+// expands to code containing an unprotected comma.  The
+// AcceptsMacroThatExpandsToUnprotectedComma test in gtest_unittest.cc
+// catches that.
+//
+// For the same reason, we have to write
+//   if (::testing::internal::AlwaysTrue()) { statement; }
+// instead of
+//   GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement)
+// to avoid an MSVC warning on unreachable code.
+#define EXPECT_NONFATAL_FAILURE(statement, substr) \
+  do {\
+    ::testing::TestPartResultArray gtest_failures;\
+    ::testing::internal::SingleFailureChecker gtest_checker(\
+        &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
+        (substr));\
+    {\
+      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
+          ::testing::ScopedFakeTestPartResultReporter:: \
+          INTERCEPT_ONLY_CURRENT_THREAD, &gtest_failures);\
+      if (::testing::internal::AlwaysTrue()) { statement; }\
+    }\
+  } while (::testing::internal::AlwaysFalse())
+
+#define EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(statement, substr) \
+  do {\
+    ::testing::TestPartResultArray gtest_failures;\
+    ::testing::internal::SingleFailureChecker gtest_checker(\
+        &gtest_failures, ::testing::TestPartResult::kNonFatalFailure, \
+        (substr));\
+    {\
+      ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\
+          ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS,\
+          &gtest_failures);\
+      if (::testing::internal::AlwaysTrue()) { statement; }\
+    }\
+  } while (::testing::internal::AlwaysFalse())
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_SPI_H_
diff --git a/final/utils/unittest/googletest/include/gtest/gtest-test-part.h b/final/utils/unittest/googletest/include/gtest/gtest-test-part.h
new file mode 100644
index 00000000000..f7147590676
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/gtest-test-part.h
@@ -0,0 +1,176 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: mheule@google.com (Markus Heule)
+//
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+#define GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
+
+#include <iosfwd>
+#include <vector>
+#include <gtest/internal/gtest-internal.h>
+#include <gtest/internal/gtest-string.h>
+
+namespace testing {
+
+// A copyable object representing the result of a test part (i.e. an
+// assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()).
+//
+// Don't inherit from TestPartResult as its destructor is not virtual.
+class GTEST_API_ TestPartResult {
+ public:
+  // The possible outcomes of a test part (i.e. an assertion or an
+  // explicit SUCCEED(), FAIL(), or ADD_FAILURE()).
+  enum Type {
+    kSuccess,          // Succeeded.
+    kNonFatalFailure,  // Failed but the test can continue.
+    kFatalFailure      // Failed and the test should be terminated.
+  };
+
+  // C'tor.  TestPartResult does NOT have a default constructor.
+  // Always use this constructor (with parameters) to create a
+  // TestPartResult object.
+  TestPartResult(Type a_type,
+                 const char* a_file_name,
+                 int a_line_number,
+                 const char* a_message)
+      : type_(a_type),
+        file_name_(a_file_name),
+        line_number_(a_line_number),
+        summary_(ExtractSummary(a_message)),
+        message_(a_message) {
+  }
+
+  // Gets the outcome of the test part.
+  Type type() const { return type_; }
+
+  // Gets the name of the source file where the test part took place, or
+  // NULL if it's unknown.
+  const char* file_name() const { return file_name_.c_str(); }
+
+  // Gets the line in the source file where the test part took place,
+  // or -1 if it's unknown.
+  int line_number() const { return line_number_; }
+
+  // Gets the summary of the failure message.
+  const char* summary() const { return summary_.c_str(); }
+
+  // Gets the message associated with the test part.
+  const char* message() const { return message_.c_str(); }
+
+  // Returns true iff the test part passed.
+  bool passed() const { return type_ == kSuccess; }
+
+  // Returns true iff the test part failed.
+  bool failed() const { return type_ != kSuccess; }
+
+  // Returns true iff the test part non-fatally failed.
+  bool nonfatally_failed() const { return type_ == kNonFatalFailure; }
+
+  // Returns true iff the test part fatally failed.
+  bool fatally_failed() const { return type_ == kFatalFailure; }
+ private:
+  Type type_;
+
+  // Gets the summary of the failure message by omitting the stack
+  // trace in it.
+  static internal::String ExtractSummary(const char* message);
+
+  // The name of the source file where the test part took place, or
+  // NULL if the source file is unknown.
+  internal::String file_name_;
+  // The line in the source file where the test part took place, or -1
+  // if the line number is unknown.
+  int line_number_;
+  internal::String summary_;  // The test failure summary.
+  internal::String message_;  // The test failure message.
+};
+
+// Prints a TestPartResult object.
+std::ostream& operator<<(std::ostream& os, const TestPartResult& result);
+
+// An array of TestPartResult objects.
+//
+// Don't inherit from TestPartResultArray as its destructor is not
+// virtual.
+class GTEST_API_ TestPartResultArray {
+ public:
+  TestPartResultArray() {}
+
+  // Appends the given TestPartResult to the array.
+  void Append(const TestPartResult& result);
+
+  // Returns the TestPartResult at the given index (0-based).
+  const TestPartResult& GetTestPartResult(int index) const;
+
+  // Returns the number of TestPartResult objects in the array.
+  int size() const;
+
+ private:
+  std::vector<TestPartResult> array_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray);
+};
+
+// This interface knows how to report a test part result.
+class TestPartResultReporterInterface {
+ public:
+  virtual ~TestPartResultReporterInterface() {}
+
+  virtual void ReportTestPartResult(const TestPartResult& result) = 0;
+};
+
+namespace internal {
+
+// This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a
+// statement generates new fatal failures. To do so it registers itself as the
+// current test part result reporter. Besides checking if fatal failures were
+// reported, it only delegates the reporting to the former result reporter.
+// The original result reporter is restored in the destructor.
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+class GTEST_API_ HasNewFatalFailureHelper
+    : public TestPartResultReporterInterface {
+ public:
+  HasNewFatalFailureHelper();
+  virtual ~HasNewFatalFailureHelper();
+  virtual void ReportTestPartResult(const TestPartResult& result);
+  bool has_new_fatal_failure() const { return has_new_fatal_failure_; }
+ private:
+  bool has_new_fatal_failure_;
+  TestPartResultReporterInterface* original_reporter_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper);
+};
+
+}  // namespace internal
+
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_
diff --git a/final/utils/unittest/googletest/include/gtest/gtest-typed-test.h b/final/utils/unittest/googletest/include/gtest/gtest-typed-test.h
new file mode 100644
index 00000000000..1ec8eb8d309
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/gtest-typed-test.h
@@ -0,0 +1,259 @@
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+#define GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
+
+// This header implements typed tests and type-parameterized tests.
+
+// Typed (aka type-driven) tests repeat the same test for types in a
+// list.  You must know which types you want to test with when writing
+// typed tests. Here's how you do it:
+
+#if 0
+
+// First, define a fixture class template.  It should be parameterized
+// by a type.  Remember to derive it from testing::Test.
+template <typename T>
+class FooTest : public testing::Test {
+ public:
+  ...
+  typedef std::list<T> List;
+  static T shared_;
+  T value_;
+};
+
+// Next, associate a list of types with the test case, which will be
+// repeated for each type in the list.  The typedef is necessary for
+// the macro to parse correctly.
+typedef testing::Types<char, int, unsigned int> MyTypes;
+TYPED_TEST_CASE(FooTest, MyTypes);
+
+// If the type list contains only one type, you can write that type
+// directly without Types<...>:
+//   TYPED_TEST_CASE(FooTest, int);
+
+// Then, use TYPED_TEST() instead of TEST_F() to define as many typed
+// tests for this test case as you want.
+TYPED_TEST(FooTest, DoesBlah) {
+  // Inside a test, refer to TypeParam to get the type parameter.
+  // Since we are inside a derived class template, C++ requires use to
+  // visit the members of FooTest via 'this'.
+  TypeParam n = this->value_;
+
+  // To visit static members of the fixture, add the TestFixture::
+  // prefix.
+  n += TestFixture::shared_;
+
+  // To refer to typedefs in the fixture, add the "typename
+  // TestFixture::" prefix.
+  typename TestFixture::List values;
+  values.push_back(n);
+  ...
+}
+
+TYPED_TEST(FooTest, HasPropertyA) { ... }
+
+#endif  // 0
+
+// Type-parameterized tests are abstract test patterns parameterized
+// by a type.  Compared with typed tests, type-parameterized tests
+// allow you to define the test pattern without knowing what the type
+// parameters are.  The defined pattern can be instantiated with
+// different types any number of times, in any number of translation
+// units.
+//
+// If you are designing an interface or concept, you can define a
+// suite of type-parameterized tests to verify properties that any
+// valid implementation of the interface/concept should have.  Then,
+// each implementation can easily instantiate the test suite to verify
+// that it conforms to the requirements, without having to write
+// similar tests repeatedly.  Here's an example:
+
+#if 0
+
+// First, define a fixture class template.  It should be parameterized
+// by a type.  Remember to derive it from testing::Test.
+template <typename T>
+class FooTest : public testing::Test {
+  ...
+};
+
+// Next, declare that you will define a type-parameterized test case
+// (the _P suffix is for "parameterized" or "pattern", whichever you
+// prefer):
+TYPED_TEST_CASE_P(FooTest);
+
+// Then, use TYPED_TEST_P() to define as many type-parameterized tests
+// for this type-parameterized test case as you want.
+TYPED_TEST_P(FooTest, DoesBlah) {
+  // Inside a test, refer to TypeParam to get the type parameter.
+  TypeParam n = 0;
+  ...
+}
+
+TYPED_TEST_P(FooTest, HasPropertyA) { ... }
+
+// Now the tricky part: you need to register all test patterns before
+// you can instantiate them.  The first argument of the macro is the
+// test case name; the rest are the names of the tests in this test
+// case.
+REGISTER_TYPED_TEST_CASE_P(FooTest,
+                           DoesBlah, HasPropertyA);
+
+// Finally, you are free to instantiate the pattern with the types you
+// want.  If you put the above code in a header file, you can #include
+// it in multiple C++ source files and instantiate it multiple times.
+//
+// To distinguish different instances of the pattern, the first
+// argument to the INSTANTIATE_* macro is a prefix that will be added
+// to the actual test case name.  Remember to pick unique prefixes for
+// different instances.
+typedef testing::Types<char, int, unsigned int> MyTypes;
+INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes);
+
+// If the type list contains only one type, you can write that type
+// directly without Types<...>:
+//   INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, int);
+
+#endif  // 0
+
+#include <gtest/internal/gtest-port.h>
+#include <gtest/internal/gtest-type-util.h>
+
+// Implements typed tests.
+
+#if GTEST_HAS_TYPED_TEST
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the name of the typedef for the type parameters of the
+// given test case.
+#define GTEST_TYPE_PARAMS_(TestCaseName) gtest_type_params_##TestCaseName##_
+
+// The 'Types' template argument below must have spaces around it
+// since some compilers may choke on '>>' when passing a template
+// instance (e.g. Types<int>)
+#define TYPED_TEST_CASE(CaseName, Types) \
+  typedef ::testing::internal::TypeList< Types >::type \
+      GTEST_TYPE_PARAMS_(CaseName)
+
+#define TYPED_TEST(CaseName, TestName) \
+  template <typename gtest_TypeParam_> \
+  class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \
+      : public CaseName<gtest_TypeParam_> { \
+   private: \
+    typedef CaseName<gtest_TypeParam_> TestFixture; \
+    typedef gtest_TypeParam_ TypeParam; \
+    virtual void TestBody(); \
+  }; \
+  bool gtest_##CaseName##_##TestName##_registered_ = \
+      ::testing::internal::TypeParameterizedTest< \
+          CaseName, \
+          ::testing::internal::TemplateSel< \
+              GTEST_TEST_CLASS_NAME_(CaseName, TestName)>, \
+          GTEST_TYPE_PARAMS_(CaseName)>::Register(\
+              "", #CaseName, #TestName, 0); \
+  template <typename gtest_TypeParam_> \
+  void GTEST_TEST_CLASS_NAME_(CaseName, TestName)<gtest_TypeParam_>::TestBody()
+
+#endif  // GTEST_HAS_TYPED_TEST
+
+// Implements type-parameterized tests.
+
+#if GTEST_HAS_TYPED_TEST_P
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the namespace name that the type-parameterized tests for
+// the given type-parameterized test case are defined in.  The exact
+// name of the namespace is subject to change without notice.
+#define GTEST_CASE_NAMESPACE_(TestCaseName) \
+  gtest_case_##TestCaseName##_
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Expands to the name of the variable used to remember the names of
+// the defined tests in the given test case.
+#define GTEST_TYPED_TEST_CASE_P_STATE_(TestCaseName) \
+  gtest_typed_test_case_p_state_##TestCaseName##_
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY.
+//
+// Expands to the name of the variable used to remember the names of
+// the registered tests in the given test case.
+#define GTEST_REGISTERED_TEST_NAMES_(TestCaseName) \
+  gtest_registered_test_names_##TestCaseName##_
+
+// The variables defined in the type-parameterized test macros are
+// static as typically these macros are used in a .h file that can be
+// #included in multiple translation units linked together.
+#define TYPED_TEST_CASE_P(CaseName) \
+  static ::testing::internal::TypedTestCasePState \
+      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName)
+
+#define TYPED_TEST_P(CaseName, TestName) \
+  namespace GTEST_CASE_NAMESPACE_(CaseName) { \
+  template <typename gtest_TypeParam_> \
+  class TestName : public CaseName<gtest_TypeParam_> { \
+   private: \
+    typedef CaseName<gtest_TypeParam_> TestFixture; \
+    typedef gtest_TypeParam_ TypeParam; \
+    virtual void TestBody(); \
+  }; \
+  static bool gtest_##TestName##_defined_ = \
+      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).AddTestName(\
+          __FILE__, __LINE__, #CaseName, #TestName); \
+  } \
+  template <typename gtest_TypeParam_> \
+  void GTEST_CASE_NAMESPACE_(CaseName)::TestName<gtest_TypeParam_>::TestBody()
+
+#define REGISTER_TYPED_TEST_CASE_P(CaseName, ...) \
+  namespace GTEST_CASE_NAMESPACE_(CaseName) { \
+  typedef ::testing::internal::Templates<__VA_ARGS__>::type gtest_AllTests_; \
+  } \
+  static const char* const GTEST_REGISTERED_TEST_NAMES_(CaseName) = \
+      GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).VerifyRegisteredTestNames(\
+          __FILE__, __LINE__, #__VA_ARGS__)
+
+// The 'Types' template argument below must have spaces around it
+// since some compilers may choke on '>>' when passing a template
+// instance (e.g. Types<int>)
+#define INSTANTIATE_TYPED_TEST_CASE_P(Prefix, CaseName, Types) \
+  bool gtest_##Prefix##_##CaseName = \
+      ::testing::internal::TypeParameterizedTestCase<CaseName, \
+          GTEST_CASE_NAMESPACE_(CaseName)::gtest_AllTests_, \
+          ::testing::internal::TypeList< Types >::type>::Register(\
+              #Prefix, #CaseName, GTEST_REGISTERED_TEST_NAMES_(CaseName))
+
+#endif  // GTEST_HAS_TYPED_TEST_P
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
diff --git a/final/utils/unittest/googletest/include/gtest/gtest.h b/final/utils/unittest/googletest/include/gtest/gtest.h
new file mode 100644
index 00000000000..5470082abd4
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/gtest.h
@@ -0,0 +1,2054 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file defines the public API for Google Test.  It should be
+// included by any test program that uses Google Test.
+//
+// IMPORTANT NOTE: Due to limitation of the C++ language, we have to
+// leave some internal implementation details in this header file.
+// They are clearly marked by comments like this:
+//
+//   // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+//
+// Such code is NOT meant to be used by a user directly, and is subject
+// to CHANGE WITHOUT NOTICE.  Therefore DO NOT DEPEND ON IT in a user
+// program!
+//
+// Acknowledgment: Google Test borrowed the idea of automatic test
+// registration from Barthelemy Dagenais' (barthelemy@prologique.com)
+// easyUnit framework.
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
+#define GTEST_INCLUDE_GTEST_GTEST_H_
+
+#include <limits>
+#include <vector>
+
+#include <gtest/internal/gtest-internal.h>
+#include <gtest/internal/gtest-string.h>
+#include <gtest/gtest-death-test.h>
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-param-test.h>
+#include <gtest/gtest_prod.h>
+#include <gtest/gtest-test-part.h>
+#include <gtest/gtest-typed-test.h>
+
+// Depending on the platform, different string classes are available.
+// On Linux, in addition to ::std::string, Google also makes use of
+// class ::string, which has the same interface as ::std::string, but
+// has a different implementation.
+//
+// The user can define GTEST_HAS_GLOBAL_STRING to 1 to indicate that
+// ::string is available AND is a distinct type to ::std::string, or
+// define it to 0 to indicate otherwise.
+//
+// If the user's ::std::string and ::string are the same class due to
+// aliasing, he should define GTEST_HAS_GLOBAL_STRING to 0.
+//
+// If the user doesn't define GTEST_HAS_GLOBAL_STRING, it is defined
+// heuristically.
+
+namespace testing {
+
+// Declares the flags.
+
+// This flag temporary enables the disabled tests.
+GTEST_DECLARE_bool_(also_run_disabled_tests);
+
+// This flag brings the debugger on an assertion failure.
+GTEST_DECLARE_bool_(break_on_failure);
+
+// This flag controls whether Google Test catches all test-thrown exceptions
+// and logs them as failures.
+GTEST_DECLARE_bool_(catch_exceptions);
+
+// This flag enables using colors in terminal output. Available values are
+// "yes" to enable colors, "no" (disable colors), or "auto" (the default)
+// to let Google Test decide.
+GTEST_DECLARE_string_(color);
+
+// This flag sets up the filter to select by name using a glob pattern
+// the tests to run. If the filter is not given all tests are executed.
+GTEST_DECLARE_string_(filter);
+
+// This flag causes the Google Test to list tests. None of the tests listed
+// are actually run if the flag is provided.
+GTEST_DECLARE_bool_(list_tests);
+
+// This flag controls whether Google Test emits a detailed XML report to a file
+// in addition to its normal textual output.
+GTEST_DECLARE_string_(output);
+
+// This flags control whether Google Test prints the elapsed time for each
+// test.
+GTEST_DECLARE_bool_(print_time);
+
+// This flag specifies the random number seed.
+GTEST_DECLARE_int32_(random_seed);
+
+// This flag sets how many times the tests are repeated. The default value
+// is 1. If the value is -1 the tests are repeating forever.
+GTEST_DECLARE_int32_(repeat);
+
+// This flag controls whether Google Test includes Google Test internal
+// stack frames in failure stack traces.
+GTEST_DECLARE_bool_(show_internal_stack_frames);
+
+// When this flag is specified, tests' order is randomized on every iteration.
+GTEST_DECLARE_bool_(shuffle);
+
+// This flag specifies the maximum number of stack frames to be
+// printed in a failure message.
+GTEST_DECLARE_int32_(stack_trace_depth);
+
+// When this flag is specified, a failed assertion will throw an
+// exception if exceptions are enabled, or exit the program with a
+// non-zero code otherwise.
+GTEST_DECLARE_bool_(throw_on_failure);
+
+// The upper limit for valid stack trace depths.
+const int kMaxStackTraceDepth = 100;
+
+namespace internal {
+
+class AssertHelper;
+class DefaultGlobalTestPartResultReporter;
+class ExecDeathTest;
+class NoExecDeathTest;
+class FinalSuccessChecker;
+class GTestFlagSaver;
+class TestInfoImpl;
+class TestResultAccessor;
+class TestEventListenersAccessor;
+class TestEventRepeater;
+class WindowsDeathTest;
+class UnitTestImpl* GetUnitTestImpl();
+void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
+                                    const String& message);
+class PrettyUnitTestResultPrinter;
+class XmlUnitTestResultPrinter;
+
+// Converts a streamable value to a String.  A NULL pointer is
+// converted to "(null)".  When the input value is a ::string,
+// ::std::string, ::wstring, or ::std::wstring object, each NUL
+// character in it is replaced with "\\0".
+// Declared in gtest-internal.h but defined here, so that it has access
+// to the definition of the Message class, required by the ARM
+// compiler.
+template <typename T>
+String StreamableToString(const T& streamable) {
+  return (Message() << streamable).GetString();
+}
+
+}  // namespace internal
+
+// A class for indicating whether an assertion was successful.  When
+// the assertion wasn't successful, the AssertionResult object
+// remembers a non-empty message that describes how it failed.
+//
+// To create an instance of this class, use one of the factory functions
+// (AssertionSuccess() and AssertionFailure()).
+//
+// This class is useful for two purposes:
+//   1. Defining predicate functions to be used with Boolean test assertions
+//      EXPECT_TRUE/EXPECT_FALSE and their ASSERT_ counterparts
+//   2. Defining predicate-format functions to be
+//      used with predicate assertions (ASSERT_PRED_FORMAT*, etc).
+//
+// For example, if you define IsEven predicate:
+//
+//   testing::AssertionResult IsEven(int n) {
+//     if ((n % 2) == 0)
+//       return testing::AssertionSuccess();
+//     else
+//       return testing::AssertionFailure() << n << " is odd";
+//   }
+//
+// Then the failed expectation EXPECT_TRUE(IsEven(Fib(5)))
+// will print the message
+//
+//   Value of: IsEven(Fib(5))
+//     Actual: false (5 is odd)
+//   Expected: true
+//
+// instead of a more opaque
+//
+//   Value of: IsEven(Fib(5))
+//     Actual: false
+//   Expected: true
+//
+// in case IsEven is a simple Boolean predicate.
+//
+// If you expect your predicate to be reused and want to support informative
+// messages in EXPECT_FALSE and ASSERT_FALSE (negative assertions show up
+// about half as often as positive ones in our tests), supply messages for
+// both success and failure cases:
+//
+//   testing::AssertionResult IsEven(int n) {
+//     if ((n % 2) == 0)
+//       return testing::AssertionSuccess() << n << " is even";
+//     else
+//       return testing::AssertionFailure() << n << " is odd";
+//   }
+//
+// Then a statement EXPECT_FALSE(IsEven(Fib(6))) will print
+//
+//   Value of: IsEven(Fib(6))
+//     Actual: true (8 is even)
+//   Expected: false
+//
+// NB: Predicates that support negative Boolean assertions have reduced
+// performance in positive ones so be careful not to use them in tests
+// that have lots (tens of thousands) of positive Boolean assertions.
+//
+// To use this class with EXPECT_PRED_FORMAT assertions such as:
+//
+//   // Verifies that Foo() returns an even number.
+//   EXPECT_PRED_FORMAT1(IsEven, Foo());
+//
+// you need to define:
+//
+//   testing::AssertionResult IsEven(const char* expr, int n) {
+//     if ((n % 2) == 0)
+//       return testing::AssertionSuccess();
+//     else
+//       return testing::AssertionFailure()
+//         << "Expected: " << expr << " is even\n  Actual: it's " << n;
+//   }
+//
+// If Foo() returns 5, you will see the following message:
+//
+//   Expected: Foo() is even
+//     Actual: it's 5
+//
+class GTEST_API_ AssertionResult {
+ public:
+  // Copy constructor.
+  // Used in EXPECT_TRUE/FALSE(assertion_result).
+  AssertionResult(const AssertionResult& other);
+  // Used in the EXPECT_TRUE/FALSE(bool_expression).
+  explicit AssertionResult(bool success) : success_(success) {}
+
+  // Returns true iff the assertion succeeded.
+  operator bool() const { return success_; }  // NOLINT
+
+  // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
+  AssertionResult operator!() const;
+
+  // Returns the text streamed into this AssertionResult. Test assertions
+  // use it when they fail (i.e., the predicate's outcome doesn't match the
+  // assertion's expectation). When nothing has been streamed into the
+  // object, returns an empty string.
+  const char* message() const {
+    return message_.get() != NULL && message_->c_str() != NULL ?
+           message_->c_str() : "";
+  }
+  // TODO(vladl@google.com): Remove this after making sure no clients use it.
+  // Deprecated; please use message() instead.
+  const char* failure_message() const { return message(); }
+
+  // Streams a custom failure message into this object.
+  template <typename T> AssertionResult& operator<<(const T& value);
+
+ private:
+  // No implementation - we want AssertionResult to be
+  // copy-constructible but not assignable.
+  void operator=(const AssertionResult& other);
+
+  // Stores result of the assertion predicate.
+  bool success_;
+  // Stores the message describing the condition in case the expectation
+  // construct is not satisfied with the predicate's outcome.
+  // Referenced via a pointer to avoid taking too much stack frame space
+  // with test assertions.
+  internal::scoped_ptr<internal::String> message_;
+};  // class AssertionResult
+
+// Streams a custom failure message into this object.
+template <typename T>
+AssertionResult& AssertionResult::operator<<(const T& value) {
+  Message msg;
+  if (message_.get() != NULL)
+    msg << *message_;
+  msg << value;
+  message_.reset(new internal::String(msg.GetString()));
+  return *this;
+}
+
+// Makes a successful assertion result.
+GTEST_API_ AssertionResult AssertionSuccess();
+
+// Makes a failed assertion result.
+GTEST_API_ AssertionResult AssertionFailure();
+
+// Makes a failed assertion result with the given failure message.
+// Deprecated; use AssertionFailure() << msg.
+GTEST_API_ AssertionResult AssertionFailure(const Message& msg);
+
+// The abstract class that all tests inherit from.
+//
+// In Google Test, a unit test program contains one or many TestCases, and
+// each TestCase contains one or many Tests.
+//
+// When you define a test using the TEST macro, you don't need to
+// explicitly derive from Test - the TEST macro automatically does
+// this for you.
+//
+// The only time you derive from Test is when defining a test fixture
+// to be used a TEST_F.  For example:
+//
+//   class FooTest : public testing::Test {
+//    protected:
+//     virtual void SetUp() { ... }
+//     virtual void TearDown() { ... }
+//     ...
+//   };
+//
+//   TEST_F(FooTest, Bar) { ... }
+//   TEST_F(FooTest, Baz) { ... }
+//
+// Test is not copyable.
+class GTEST_API_ Test {
+ public:
+  friend class internal::TestInfoImpl;
+
+  // Defines types for pointers to functions that set up and tear down
+  // a test case.
+  typedef internal::SetUpTestCaseFunc SetUpTestCaseFunc;
+  typedef internal::TearDownTestCaseFunc TearDownTestCaseFunc;
+
+  // The d'tor is virtual as we intend to inherit from Test.
+  virtual ~Test();
+
+  // Sets up the stuff shared by all tests in this test case.
+  //
+  // Google Test will call Foo::SetUpTestCase() before running the first
+  // test in test case Foo.  Hence a sub-class can define its own
+  // SetUpTestCase() method to shadow the one defined in the super
+  // class.
+  static void SetUpTestCase() {}
+
+  // Tears down the stuff shared by all tests in this test case.
+  //
+  // Google Test will call Foo::TearDownTestCase() after running the last
+  // test in test case Foo.  Hence a sub-class can define its own
+  // TearDownTestCase() method to shadow the one defined in the super
+  // class.
+  static void TearDownTestCase() {}
+
+  // Returns true iff the current test has a fatal failure.
+  static bool HasFatalFailure();
+
+  // Returns true iff the current test has a non-fatal failure.
+  static bool HasNonfatalFailure();
+
+  // Returns true iff the current test has a (either fatal or
+  // non-fatal) failure.
+  static bool HasFailure() { return HasFatalFailure() || HasNonfatalFailure(); }
+
+  // Logs a property for the current test.  Only the last value for a given
+  // key is remembered.
+  // These are public static so they can be called from utility functions
+  // that are not members of the test fixture.
+  // The arguments are const char* instead strings, as Google Test is used
+  // on platforms where string doesn't compile.
+  //
+  // Note that a driving consideration for these RecordProperty methods
+  // was to produce xml output suited to the Greenspan charting utility,
+  // which at present will only chart values that fit in a 32-bit int. It
+  // is the user's responsibility to restrict their values to 32-bit ints
+  // if they intend them to be used with Greenspan.
+  static void RecordProperty(const char* key, const char* value);
+  static void RecordProperty(const char* key, int value);
+
+ protected:
+  // Creates a Test object.
+  Test();
+
+  // Sets up the test fixture.
+  virtual void SetUp();
+
+  // Tears down the test fixture.
+  virtual void TearDown();
+
+ private:
+  // Returns true iff the current test has the same fixture class as
+  // the first test in the current test case.
+  static bool HasSameFixtureClass();
+
+  // Runs the test after the test fixture has been set up.
+  //
+  // A sub-class must implement this to define the test logic.
+  //
+  // DO NOT OVERRIDE THIS FUNCTION DIRECTLY IN A USER PROGRAM.
+  // Instead, use the TEST or TEST_F macro.
+  virtual void TestBody() = 0;
+
+  // Sets up, executes, and tears down the test.
+  void Run();
+
+  // Uses a GTestFlagSaver to save and restore all Google Test flags.
+  const internal::GTestFlagSaver* const gtest_flag_saver_;
+
+  // Often a user mis-spells SetUp() as Setup() and spends a long time
+  // wondering why it is never called by Google Test.  The declaration of
+  // the following method is solely for catching such an error at
+  // compile time:
+  //
+  //   - The return type is deliberately chosen to be not void, so it
+  //   will be a conflict if a user declares void Setup() in his test
+  //   fixture.
+  //
+  //   - This method is private, so it will be another compiler error
+  //   if a user calls it from his test fixture.
+  //
+  // DO NOT OVERRIDE THIS FUNCTION.
+  //
+  // If you see an error about overriding the following function or
+  // about it being private, you have mis-spelled SetUp() as Setup().
+  struct Setup_should_be_spelled_SetUp {};
+  virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; }
+
+  // We disallow copying Tests.
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(Test);
+};
+
+typedef internal::TimeInMillis TimeInMillis;
+
+// A copyable object representing a user specified test property which can be
+// output as a key/value string pair.
+//
+// Don't inherit from TestProperty as its destructor is not virtual.
+class TestProperty {
+ public:
+  // C'tor.  TestProperty does NOT have a default constructor.
+  // Always use this constructor (with parameters) to create a
+  // TestProperty object.
+  TestProperty(const char* a_key, const char* a_value) :
+    key_(a_key), value_(a_value) {
+  }
+
+  // Gets the user supplied key.
+  const char* key() const {
+    return key_.c_str();
+  }
+
+  // Gets the user supplied value.
+  const char* value() const {
+    return value_.c_str();
+  }
+
+  // Sets a new value, overriding the one supplied in the constructor.
+  void SetValue(const char* new_value) {
+    value_ = new_value;
+  }
+
+ private:
+  // The key supplied by the user.
+  internal::String key_;
+  // The value supplied by the user.
+  internal::String value_;
+};
+
+// The result of a single Test.  This includes a list of
+// TestPartResults, a list of TestProperties, a count of how many
+// death tests there are in the Test, and how much time it took to run
+// the Test.
+//
+// TestResult is not copyable.
+class GTEST_API_ TestResult {
+ public:
+  // Creates an empty TestResult.
+  TestResult();
+
+  // D'tor.  Do not inherit from TestResult.
+  ~TestResult();
+
+  // Gets the number of all test parts.  This is the sum of the number
+  // of successful test parts and the number of failed test parts.
+  int total_part_count() const;
+
+  // Returns the number of the test properties.
+  int test_property_count() const;
+
+  // Returns true iff the test passed (i.e. no test part failed).
+  bool Passed() const { return !Failed(); }
+
+  // Returns true iff the test failed.
+  bool Failed() const;
+
+  // Returns true iff the test fatally failed.
+  bool HasFatalFailure() const;
+
+  // Returns true iff the test has a non-fatal failure.
+  bool HasNonfatalFailure() const;
+
+  // Returns the elapsed time, in milliseconds.
+  TimeInMillis elapsed_time() const { return elapsed_time_; }
+
+  // Returns the i-th test part result among all the results. i can range
+  // from 0 to test_property_count() - 1. If i is not in that range, aborts
+  // the program.
+  const TestPartResult& GetTestPartResult(int i) const;
+
+  // Returns the i-th test property. i can range from 0 to
+  // test_property_count() - 1. If i is not in that range, aborts the
+  // program.
+  const TestProperty& GetTestProperty(int i) const;
+
+ private:
+  friend class TestInfo;
+  friend class UnitTest;
+  friend class internal::DefaultGlobalTestPartResultReporter;
+  friend class internal::ExecDeathTest;
+  friend class internal::TestInfoImpl;
+  friend class internal::TestResultAccessor;
+  friend class internal::UnitTestImpl;
+  friend class internal::WindowsDeathTest;
+
+  // Gets the vector of TestPartResults.
+  const std::vector<TestPartResult>& test_part_results() const {
+    return test_part_results_;
+  }
+
+  // Gets the vector of TestProperties.
+  const std::vector<TestProperty>& test_properties() const {
+    return test_properties_;
+  }
+
+  // Sets the elapsed time.
+  void set_elapsed_time(TimeInMillis elapsed) { elapsed_time_ = elapsed; }
+
+  // Adds a test property to the list. The property is validated and may add
+  // a non-fatal failure if invalid (e.g., if it conflicts with reserved
+  // key names). If a property is already recorded for the same key, the
+  // value will be updated, rather than storing multiple values for the same
+  // key.
+  void RecordProperty(const TestProperty& test_property);
+
+  // Adds a failure if the key is a reserved attribute of Google Test
+  // testcase tags.  Returns true if the property is valid.
+  // TODO(russr): Validate attribute names are legal and human readable.
+  static bool ValidateTestProperty(const TestProperty& test_property);
+
+  // Adds a test part result to the list.
+  void AddTestPartResult(const TestPartResult& test_part_result);
+
+  // Returns the death test count.
+  int death_test_count() const { return death_test_count_; }
+
+  // Increments the death test count, returning the new count.
+  int increment_death_test_count() { return ++death_test_count_; }
+
+  // Clears the test part results.
+  void ClearTestPartResults();
+
+  // Clears the object.
+  void Clear();
+
+  // Protects mutable state of the property vector and of owned
+  // properties, whose values may be updated.
+  internal::Mutex test_properites_mutex_;
+
+  // The vector of TestPartResults
+  std::vector<TestPartResult> test_part_results_;
+  // The vector of TestProperties
+  std::vector<TestProperty> test_properties_;
+  // Running count of death tests.
+  int death_test_count_;
+  // The elapsed time, in milliseconds.
+  TimeInMillis elapsed_time_;
+
+  // We disallow copying TestResult.
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestResult);
+};  // class TestResult
+
+// A TestInfo object stores the following information about a test:
+//
+//   Test case name
+//   Test name
+//   Whether the test should be run
+//   A function pointer that creates the test object when invoked
+//   Test result
+//
+// The constructor of TestInfo registers itself with the UnitTest
+// singleton such that the RUN_ALL_TESTS() macro knows which tests to
+// run.
+class GTEST_API_ TestInfo {
+ public:
+  // Destructs a TestInfo object.  This function is not virtual, so
+  // don't inherit from TestInfo.
+  ~TestInfo();
+
+  // Returns the test case name.
+  const char* test_case_name() const;
+
+  // Returns the test name.
+  const char* name() const;
+
+  // Returns the test case comment.
+  const char* test_case_comment() const;
+
+  // Returns the test comment.
+  const char* comment() const;
+
+  // Returns true if this test should run, that is if the test is not disabled
+  // (or it is disabled but the also_run_disabled_tests flag has been specified)
+  // and its full name matches the user-specified filter.
+  //
+  // Google Test allows the user to filter the tests by their full names.
+  // The full name of a test Bar in test case Foo is defined as
+  // "Foo.Bar".  Only the tests that match the filter will run.
+  //
+  // A filter is a colon-separated list of glob (not regex) patterns,
+  // optionally followed by a '-' and a colon-separated list of
+  // negative patterns (tests to exclude).  A test is run if it
+  // matches one of the positive patterns and does not match any of
+  // the negative patterns.
+  //
+  // For example, *A*:Foo.* is a filter that matches any string that
+  // contains the character 'A' or starts with "Foo.".
+  bool should_run() const;
+
+  // Returns the result of the test.
+  const TestResult* result() const;
+
+ private:
+#if GTEST_HAS_DEATH_TEST
+  friend class internal::DefaultDeathTestFactory;
+#endif  // GTEST_HAS_DEATH_TEST
+  friend class Test;
+  friend class TestCase;
+  friend class internal::TestInfoImpl;
+  friend class internal::UnitTestImpl;
+  friend TestInfo* internal::MakeAndRegisterTestInfo(
+      const char* test_case_name, const char* name,
+      const char* test_case_comment, const char* comment,
+      internal::TypeId fixture_class_id,
+      Test::SetUpTestCaseFunc set_up_tc,
+      Test::TearDownTestCaseFunc tear_down_tc,
+      internal::TestFactoryBase* factory);
+
+  // Returns true if this test matches the user-specified filter.
+  bool matches_filter() const;
+
+  // Increments the number of death tests encountered in this test so
+  // far.
+  int increment_death_test_count();
+
+  // Accessors for the implementation object.
+  internal::TestInfoImpl* impl() { return impl_; }
+  const internal::TestInfoImpl* impl() const { return impl_; }
+
+  // Constructs a TestInfo object. The newly constructed instance assumes
+  // ownership of the factory object.
+  TestInfo(const char* test_case_name, const char* name,
+           const char* test_case_comment, const char* comment,
+           internal::TypeId fixture_class_id,
+           internal::TestFactoryBase* factory);
+
+  // An opaque implementation object.
+  internal::TestInfoImpl* impl_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestInfo);
+};
+
+// A test case, which consists of a vector of TestInfos.
+//
+// TestCase is not copyable.
+class GTEST_API_ TestCase {
+ public:
+  // Creates a TestCase with the given name.
+  //
+  // TestCase does NOT have a default constructor.  Always use this
+  // constructor to create a TestCase object.
+  //
+  // Arguments:
+  //
+  //   name:         name of the test case
+  //   set_up_tc:    pointer to the function that sets up the test case
+  //   tear_down_tc: pointer to the function that tears down the test case
+  TestCase(const char* name, const char* comment,
+           Test::SetUpTestCaseFunc set_up_tc,
+           Test::TearDownTestCaseFunc tear_down_tc);
+
+  // Destructor of TestCase.
+  virtual ~TestCase();
+
+  // Gets the name of the TestCase.
+  const char* name() const { return name_.c_str(); }
+
+  // Returns the test case comment.
+  const char* comment() const { return comment_.c_str(); }
+
+  // Returns true if any test in this test case should run.
+  bool should_run() const { return should_run_; }
+
+  // Gets the number of successful tests in this test case.
+  int successful_test_count() const;
+
+  // Gets the number of failed tests in this test case.
+  int failed_test_count() const;
+
+  // Gets the number of disabled tests in this test case.
+  int disabled_test_count() const;
+
+  // Get the number of tests in this test case that should run.
+  int test_to_run_count() const;
+
+  // Gets the number of all tests in this test case.
+  int total_test_count() const;
+
+  // Returns true iff the test case passed.
+  bool Passed() const { return !Failed(); }
+
+  // Returns true iff the test case failed.
+  bool Failed() const { return failed_test_count() > 0; }
+
+  // Returns the elapsed time, in milliseconds.
+  TimeInMillis elapsed_time() const { return elapsed_time_; }
+
+  // Returns the i-th test among all the tests. i can range from 0 to
+  // total_test_count() - 1. If i is not in that range, returns NULL.
+  const TestInfo* GetTestInfo(int i) const;
+
+ private:
+  friend class Test;
+  friend class internal::UnitTestImpl;
+
+  // Gets the (mutable) vector of TestInfos in this TestCase.
+  std::vector<TestInfo*>& test_info_list() { return test_info_list_; }
+
+  // Gets the (immutable) vector of TestInfos in this TestCase.
+  const std::vector<TestInfo*>& test_info_list() const {
+    return test_info_list_;
+  }
+
+  // Returns the i-th test among all the tests. i can range from 0 to
+  // total_test_count() - 1. If i is not in that range, returns NULL.
+  TestInfo* GetMutableTestInfo(int i);
+
+  // Sets the should_run member.
+  void set_should_run(bool should) { should_run_ = should; }
+
+  // Adds a TestInfo to this test case.  Will delete the TestInfo upon
+  // destruction of the TestCase object.
+  void AddTestInfo(TestInfo * test_info);
+
+  // Clears the results of all tests in this test case.
+  void ClearResult();
+
+  // Clears the results of all tests in the given test case.
+  static void ClearTestCaseResult(TestCase* test_case) {
+    test_case->ClearResult();
+  }
+
+  // Runs every test in this TestCase.
+  void Run();
+
+  // Returns true iff test passed.
+  static bool TestPassed(const TestInfo * test_info);
+
+  // Returns true iff test failed.
+  static bool TestFailed(const TestInfo * test_info);
+
+  // Returns true iff test is disabled.
+  static bool TestDisabled(const TestInfo * test_info);
+
+  // Returns true if the given test should run.
+  static bool ShouldRunTest(const TestInfo *test_info);
+
+  // Shuffles the tests in this test case.
+  void ShuffleTests(internal::Random* random);
+
+  // Restores the test order to before the first shuffle.
+  void UnshuffleTests();
+
+  // Name of the test case.
+  internal::String name_;
+  // Comment on the test case.
+  internal::String comment_;
+  // The vector of TestInfos in their original order.  It owns the
+  // elements in the vector.
+  std::vector<TestInfo*> test_info_list_;
+  // Provides a level of indirection for the test list to allow easy
+  // shuffling and restoring the test order.  The i-th element in this
+  // vector is the index of the i-th test in the shuffled test list.
+  std::vector<int> test_indices_;
+  // Pointer to the function that sets up the test case.
+  Test::SetUpTestCaseFunc set_up_tc_;
+  // Pointer to the function that tears down the test case.
+  Test::TearDownTestCaseFunc tear_down_tc_;
+  // True iff any test in this test case should run.
+  bool should_run_;
+  // Elapsed time, in milliseconds.
+  TimeInMillis elapsed_time_;
+
+  // We disallow copying TestCases.
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestCase);
+};
+
+// An Environment object is capable of setting up and tearing down an
+// environment.  The user should subclass this to define his own
+// environment(s).
+//
+// An Environment object does the set-up and tear-down in virtual
+// methods SetUp() and TearDown() instead of the constructor and the
+// destructor, as:
+//
+//   1. You cannot safely throw from a destructor.  This is a problem
+//      as in some cases Google Test is used where exceptions are enabled, and
+//      we may want to implement ASSERT_* using exceptions where they are
+//      available.
+//   2. You cannot use ASSERT_* directly in a constructor or
+//      destructor.
+class Environment {
+ public:
+  // The d'tor is virtual as we need to subclass Environment.
+  virtual ~Environment() {}
+
+  // Override this to define how to set up the environment.
+  virtual void SetUp() {}
+
+  // Override this to define how to tear down the environment.
+  virtual void TearDown() {}
+ private:
+  // If you see an error about overriding the following function or
+  // about it being private, you have mis-spelled SetUp() as Setup().
+  struct Setup_should_be_spelled_SetUp {};
+  virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; }
+};
+
+// The interface for tracing execution of tests. The methods are organized in
+// the order the corresponding events are fired.
+class TestEventListener {
+ public:
+  virtual ~TestEventListener() {}
+
+  // Fired before any test activity starts.
+  virtual void OnTestProgramStart(const UnitTest& unit_test) = 0;
+
+  // Fired before each iteration of tests starts.  There may be more than
+  // one iteration if GTEST_FLAG(repeat) is set. iteration is the iteration
+  // index, starting from 0.
+  virtual void OnTestIterationStart(const UnitTest& unit_test,
+                                    int iteration) = 0;
+
+  // Fired before environment set-up for each iteration of tests starts.
+  virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) = 0;
+
+  // Fired after environment set-up for each iteration of tests ends.
+  virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) = 0;
+
+  // Fired before the test case starts.
+  virtual void OnTestCaseStart(const TestCase& test_case) = 0;
+
+  // Fired before the test starts.
+  virtual void OnTestStart(const TestInfo& test_info) = 0;
+
+  // Fired after a failed assertion or a SUCCESS().
+  virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0;
+
+  // Fired after the test ends.
+  virtual void OnTestEnd(const TestInfo& test_info) = 0;
+
+  // Fired after the test case ends.
+  virtual void OnTestCaseEnd(const TestCase& test_case) = 0;
+
+  // Fired before environment tear-down for each iteration of tests starts.
+  virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) = 0;
+
+  // Fired after environment tear-down for each iteration of tests ends.
+  virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) = 0;
+
+  // Fired after each iteration of tests finishes.
+  virtual void OnTestIterationEnd(const UnitTest& unit_test,
+                                  int iteration) = 0;
+
+  // Fired after all test activities have ended.
+  virtual void OnTestProgramEnd(const UnitTest& unit_test) = 0;
+};
+
+// The convenience class for users who need to override just one or two
+// methods and are not concerned that a possible change to a signature of
+// the methods they override will not be caught during the build.  For
+// comments about each method please see the definition of TestEventListener
+// above.
+class EmptyTestEventListener : public TestEventListener {
+ public:
+  virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {}
+  virtual void OnTestIterationStart(const UnitTest& /*unit_test*/,
+                                    int /*iteration*/) {}
+  virtual void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) {}
+  virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {}
+  virtual void OnTestCaseStart(const TestCase& /*test_case*/) {}
+  virtual void OnTestStart(const TestInfo& /*test_info*/) {}
+  virtual void OnTestPartResult(const TestPartResult& /*test_part_result*/) {}
+  virtual void OnTestEnd(const TestInfo& /*test_info*/) {}
+  virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {}
+  virtual void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) {}
+  virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {}
+  virtual void OnTestIterationEnd(const UnitTest& /*unit_test*/,
+                                  int /*iteration*/) {}
+  virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {}
+};
+
+// TestEventListeners lets users add listeners to track events in Google Test.
+class GTEST_API_ TestEventListeners {
+ public:
+  TestEventListeners();
+  ~TestEventListeners();
+
+  // Appends an event listener to the end of the list. Google Test assumes
+  // the ownership of the listener (i.e. it will delete the listener when
+  // the test program finishes).
+  void Append(TestEventListener* listener);
+
+  // Removes the given event listener from the list and returns it.  It then
+  // becomes the caller's responsibility to delete the listener. Returns
+  // NULL if the listener is not found in the list.
+  TestEventListener* Release(TestEventListener* listener);
+
+  // Returns the standard listener responsible for the default console
+  // output.  Can be removed from the listeners list to shut down default
+  // console output.  Note that removing this object from the listener list
+  // with Release transfers its ownership to the caller and makes this
+  // function return NULL the next time.
+  TestEventListener* default_result_printer() const {
+    return default_result_printer_;
+  }
+
+  // Returns the standard listener responsible for the default XML output
+  // controlled by the --gtest_output=xml flag.  Can be removed from the
+  // listeners list by users who want to shut down the default XML output
+  // controlled by this flag and substitute it with custom one.  Note that
+  // removing this object from the listener list with Release transfers its
+  // ownership to the caller and makes this function return NULL the next
+  // time.
+  TestEventListener* default_xml_generator() const {
+    return default_xml_generator_;
+  }
+
+ private:
+  friend class TestCase;
+  friend class internal::DefaultGlobalTestPartResultReporter;
+  friend class internal::NoExecDeathTest;
+  friend class internal::TestEventListenersAccessor;
+  friend class internal::TestInfoImpl;
+  friend class internal::UnitTestImpl;
+
+  // Returns repeater that broadcasts the TestEventListener events to all
+  // subscribers.
+  TestEventListener* repeater();
+
+  // Sets the default_result_printer attribute to the provided listener.
+  // The listener is also added to the listener list and previous
+  // default_result_printer is removed from it and deleted. The listener can
+  // also be NULL in which case it will not be added to the list. Does
+  // nothing if the previous and the current listener objects are the same.
+  void SetDefaultResultPrinter(TestEventListener* listener);
+
+  // Sets the default_xml_generator attribute to the provided listener.  The
+  // listener is also added to the listener list and previous
+  // default_xml_generator is removed from it and deleted. The listener can
+  // also be NULL in which case it will not be added to the list. Does
+  // nothing if the previous and the current listener objects are the same.
+  void SetDefaultXmlGenerator(TestEventListener* listener);
+
+  // Controls whether events will be forwarded by the repeater to the
+  // listeners in the list.
+  bool EventForwardingEnabled() const;
+  void SuppressEventForwarding();
+
+  // The actual list of listeners.
+  internal::TestEventRepeater* repeater_;
+  // Listener responsible for the standard result output.
+  TestEventListener* default_result_printer_;
+  // Listener responsible for the creation of the XML output file.
+  TestEventListener* default_xml_generator_;
+
+  // We disallow copying TestEventListeners.
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventListeners);
+};
+
+// A UnitTest consists of a vector of TestCases.
+//
+// This is a singleton class.  The only instance of UnitTest is
+// created when UnitTest::GetInstance() is first called.  This
+// instance is never deleted.
+//
+// UnitTest is not copyable.
+//
+// This class is thread-safe as long as the methods are called
+// according to their specification.
+class GTEST_API_ UnitTest {
+ public:
+  // Gets the singleton UnitTest object.  The first time this method
+  // is called, a UnitTest object is constructed and returned.
+  // Consecutive calls will return the same object.
+  static UnitTest* GetInstance();
+
+  // Runs all tests in this UnitTest object and prints the result.
+  // Returns 0 if successful, or 1 otherwise.
+  //
+  // This method can only be called from the main thread.
+  //
+  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+  int Run() GTEST_MUST_USE_RESULT_;
+
+  // Returns the working directory when the first TEST() or TEST_F()
+  // was executed.  The UnitTest object owns the string.
+  const char* original_working_dir() const;
+
+  // Returns the TestCase object for the test that's currently running,
+  // or NULL if no test is running.
+  const TestCase* current_test_case() const;
+
+  // Returns the TestInfo object for the test that's currently running,
+  // or NULL if no test is running.
+  const TestInfo* current_test_info() const;
+
+  // Returns the random seed used at the start of the current test run.
+  int random_seed() const;
+
+#if GTEST_HAS_PARAM_TEST
+  // Returns the ParameterizedTestCaseRegistry object used to keep track of
+  // value-parameterized tests and instantiate and register them.
+  //
+  // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+  internal::ParameterizedTestCaseRegistry& parameterized_test_registry();
+#endif  // GTEST_HAS_PARAM_TEST
+
+  // Gets the number of successful test cases.
+  int successful_test_case_count() const;
+
+  // Gets the number of failed test cases.
+  int failed_test_case_count() const;
+
+  // Gets the number of all test cases.
+  int total_test_case_count() const;
+
+  // Gets the number of all test cases that contain at least one test
+  // that should run.
+  int test_case_to_run_count() const;
+
+  // Gets the number of successful tests.
+  int successful_test_count() const;
+
+  // Gets the number of failed tests.
+  int failed_test_count() const;
+
+  // Gets the number of disabled tests.
+  int disabled_test_count() const;
+
+  // Gets the number of all tests.
+  int total_test_count() const;
+
+  // Gets the number of tests that should run.
+  int test_to_run_count() const;
+
+  // Gets the elapsed time, in milliseconds.
+  TimeInMillis elapsed_time() const;
+
+  // Returns true iff the unit test passed (i.e. all test cases passed).
+  bool Passed() const;
+
+  // Returns true iff the unit test failed (i.e. some test case failed
+  // or something outside of all tests failed).
+  bool Failed() const;
+
+  // Gets the i-th test case among all the test cases. i can range from 0 to
+  // total_test_case_count() - 1. If i is not in that range, returns NULL.
+  const TestCase* GetTestCase(int i) const;
+
+  // Returns the list of event listeners that can be used to track events
+  // inside Google Test.
+  TestEventListeners& listeners();
+
+ private:
+  // Registers and returns a global test environment.  When a test
+  // program is run, all global test environments will be set-up in
+  // the order they were registered.  After all tests in the program
+  // have finished, all global test environments will be torn-down in
+  // the *reverse* order they were registered.
+  //
+  // The UnitTest object takes ownership of the given environment.
+  //
+  // This method can only be called from the main thread.
+  Environment* AddEnvironment(Environment* env);
+
+  // Adds a TestPartResult to the current TestResult object.  All
+  // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc)
+  // eventually call this to report their results.  The user code
+  // should use the assertion macros instead of calling this directly.
+  void AddTestPartResult(TestPartResult::Type result_type,
+                         const char* file_name,
+                         int line_number,
+                         const internal::String& message,
+                         const internal::String& os_stack_trace);
+
+  // Adds a TestProperty to the current TestResult object. If the result already
+  // contains a property with the same key, the value will be updated.
+  void RecordPropertyForCurrentTest(const char* key, const char* value);
+
+  // Gets the i-th test case among all the test cases. i can range from 0 to
+  // total_test_case_count() - 1. If i is not in that range, returns NULL.
+  TestCase* GetMutableTestCase(int i);
+
+  // Accessors for the implementation object.
+  internal::UnitTestImpl* impl() { return impl_; }
+  const internal::UnitTestImpl* impl() const { return impl_; }
+
+  // These classes and funcions are friends as they need to access private
+  // members of UnitTest.
+  friend class Test;
+  friend class internal::AssertHelper;
+  friend class internal::ScopedTrace;
+  friend Environment* AddGlobalTestEnvironment(Environment* env);
+  friend internal::UnitTestImpl* internal::GetUnitTestImpl();
+  friend void internal::ReportFailureInUnknownLocation(
+      TestPartResult::Type result_type,
+      const internal::String& message);
+
+  // Creates an empty UnitTest.
+  UnitTest();
+
+  // D'tor
+  virtual ~UnitTest();
+
+  // Pushes a trace defined by SCOPED_TRACE() on to the per-thread
+  // Google Test trace stack.
+  void PushGTestTrace(const internal::TraceInfo& trace);
+
+  // Pops a trace from the per-thread Google Test trace stack.
+  void PopGTestTrace();
+
+  // Protects mutable state in *impl_.  This is mutable as some const
+  // methods need to lock it too.
+  mutable internal::Mutex mutex_;
+
+  // Opaque implementation object.  This field is never changed once
+  // the object is constructed.  We don't mark it as const here, as
+  // doing so will cause a warning in the constructor of UnitTest.
+  // Mutable state in *impl_ is protected by mutex_.
+  internal::UnitTestImpl* impl_;
+
+  // We disallow copying UnitTest.
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTest);
+};
+
+// A convenient wrapper for adding an environment for the test
+// program.
+//
+// You should call this before RUN_ALL_TESTS() is called, probably in
+// main().  If you use gtest_main, you need to call this before main()
+// starts for it to take effect.  For example, you can define a global
+// variable like this:
+//
+//   testing::Environment* const foo_env =
+//       testing::AddGlobalTestEnvironment(new FooEnvironment);
+//
+// However, we strongly recommend you to write your own main() and
+// call AddGlobalTestEnvironment() there, as relying on initialization
+// of global variables makes the code harder to read and may cause
+// problems when you register multiple environments from different
+// translation units and the environments have dependencies among them
+// (remember that the compiler doesn't guarantee the order in which
+// global variables from different translation units are initialized).
+inline Environment* AddGlobalTestEnvironment(Environment* env) {
+  return UnitTest::GetInstance()->AddEnvironment(env);
+}
+
+// Initializes Google Test.  This must be called before calling
+// RUN_ALL_TESTS().  In particular, it parses a command line for the
+// flags that Google Test recognizes.  Whenever a Google Test flag is
+// seen, it is removed from argv, and *argc is decremented.
+//
+// No value is returned.  Instead, the Google Test flag variables are
+// updated.
+//
+// Calling the function for the second time has no user-visible effect.
+GTEST_API_ void InitGoogleTest(int* argc, char** argv);
+
+// This overloaded version can be used in Windows programs compiled in
+// UNICODE mode.
+GTEST_API_ void InitGoogleTest(int* argc, wchar_t** argv);
+
+namespace internal {
+
+// These overloaded versions handle ::std::string and ::std::wstring.
+GTEST_API_ inline String FormatForFailureMessage(const ::std::string& str) {
+  return (Message() << '"' << str << '"').GetString();
+}
+
+#if GTEST_HAS_STD_WSTRING
+GTEST_API_ inline String FormatForFailureMessage(const ::std::wstring& wstr) {
+  return (Message() << "L\"" << wstr << '"').GetString();
+}
+#endif  // GTEST_HAS_STD_WSTRING
+
+// These overloaded versions handle ::string and ::wstring.
+#if GTEST_HAS_GLOBAL_STRING
+GTEST_API_ inline String FormatForFailureMessage(const ::string& str) {
+  return (Message() << '"' << str << '"').GetString();
+}
+#endif  // GTEST_HAS_GLOBAL_STRING
+
+#if GTEST_HAS_GLOBAL_WSTRING
+GTEST_API_ inline String FormatForFailureMessage(const ::wstring& wstr) {
+  return (Message() << "L\"" << wstr << '"').GetString();
+}
+#endif  // GTEST_HAS_GLOBAL_WSTRING
+
+// Formats a comparison assertion (e.g. ASSERT_EQ, EXPECT_LT, and etc)
+// operand to be used in a failure message.  The type (but not value)
+// of the other operand may affect the format.  This allows us to
+// print a char* as a raw pointer when it is compared against another
+// char*, and print it as a C string when it is compared against an
+// std::string object, for example.
+//
+// The default implementation ignores the type of the other operand.
+// Some specialized versions are used to handle formatting wide or
+// narrow C strings.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+template <typename T1, typename T2>
+String FormatForComparisonFailureMessage(const T1& value,
+                                         const T2& /* other_operand */) {
+  return FormatForFailureMessage(value);
+}
+
+// The helper function for {ASSERT|EXPECT}_EQ.
+template <typename T1, typename T2>
+AssertionResult CmpHelperEQ(const char* expected_expression,
+                            const char* actual_expression,
+                            const T1& expected,
+                            const T2& actual) {
+#ifdef _MSC_VER
+#pragma warning(push)          // Saves the current warning state.
+#pragma warning(disable:4389)  // Temporarily disables warning on
+                               // signed/unsigned mismatch.
+#pragma warning(disable:4805)  // Temporarily disables warning on
+                               // unsafe mix of types
+#endif
+
+  if (expected == actual) {
+    return AssertionSuccess();
+  }
+
+#ifdef _MSC_VER
+#pragma warning(pop)          // Restores the warning state.
+#endif
+
+  return EqFailure(expected_expression,
+                   actual_expression,
+                   FormatForComparisonFailureMessage(expected, actual),
+                   FormatForComparisonFailureMessage(actual, expected),
+                   false);
+}
+
+// With this overloaded version, we allow anonymous enums to be used
+// in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous enums
+// can be implicitly cast to BiggestInt.
+GTEST_API_ AssertionResult CmpHelperEQ(const char* expected_expression,
+                                       const char* actual_expression,
+                                       BiggestInt expected,
+                                       BiggestInt actual);
+
+// The helper class for {ASSERT|EXPECT}_EQ.  The template argument
+// lhs_is_null_literal is true iff the first argument to ASSERT_EQ()
+// is a null pointer literal.  The following default implementation is
+// for lhs_is_null_literal being false.
+template <bool lhs_is_null_literal>
+class EqHelper {
+ public:
+  // This templatized version is for the general case.
+  template <typename T1, typename T2>
+  static AssertionResult Compare(const char* expected_expression,
+                                 const char* actual_expression,
+                                 const T1& expected,
+                                 const T2& actual) {
+    return CmpHelperEQ(expected_expression, actual_expression, expected,
+                       actual);
+  }
+
+  // With this overloaded version, we allow anonymous enums to be used
+  // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous
+  // enums can be implicitly cast to BiggestInt.
+  //
+  // Even though its body looks the same as the above version, we
+  // cannot merge the two, as it will make anonymous enums unhappy.
+  static AssertionResult Compare(const char* expected_expression,
+                                 const char* actual_expression,
+                                 BiggestInt expected,
+                                 BiggestInt actual) {
+    return CmpHelperEQ(expected_expression, actual_expression, expected,
+                       actual);
+  }
+};
+
+// This specialization is used when the first argument to ASSERT_EQ()
+// is a null pointer literal.
+template <>
+class EqHelper<true> {
+ public:
+  // We define two overloaded versions of Compare().  The first
+  // version will be picked when the second argument to ASSERT_EQ() is
+  // NOT a pointer, e.g. ASSERT_EQ(0, AnIntFunction()) or
+  // EXPECT_EQ(false, a_bool).
+  template <typename T1, typename T2>
+  static AssertionResult Compare(const char* expected_expression,
+                                 const char* actual_expression,
+                                 const T1& expected,
+                                 const T2& actual) {
+    return CmpHelperEQ(expected_expression, actual_expression, expected,
+                       actual);
+  }
+
+  // This version will be picked when the second argument to
+  // ASSERT_EQ() is a pointer, e.g. ASSERT_EQ(NULL, a_pointer).
+  template <typename T1, typename T2>
+  static AssertionResult Compare(const char* expected_expression,
+                                 const char* actual_expression,
+                                 const T1& /* expected */,
+                                 T2* actual) {
+    // We already know that 'expected' is a null pointer.
+    return CmpHelperEQ(expected_expression, actual_expression,
+                       static_cast<T2*>(NULL), actual);
+  }
+};
+
+// A macro for implementing the helper functions needed to implement
+// ASSERT_?? and EXPECT_??.  It is here just to avoid copy-and-paste
+// of similar code.
+//
+// For each templatized helper function, we also define an overloaded
+// version for BiggestInt in order to reduce code bloat and allow
+// anonymous enums to be used with {ASSERT|EXPECT}_?? when compiled
+// with gcc 4.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+#define GTEST_IMPL_CMP_HELPER_(op_name, op)\
+template <typename T1, typename T2>\
+AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \
+                                   const T1& val1, const T2& val2) {\
+  if (val1 op val2) {\
+    return AssertionSuccess();\
+  } else {\
+    Message msg;\
+    msg << "Expected: (" << expr1 << ") " #op " (" << expr2\
+        << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\
+        << " vs " << FormatForComparisonFailureMessage(val2, val1);\
+    return AssertionFailure(msg);\
+  }\
+}\
+GTEST_API_ AssertionResult CmpHelper##op_name(\
+    const char* expr1, const char* expr2, BiggestInt val1, BiggestInt val2)
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+
+// Implements the helper function for {ASSERT|EXPECT}_NE
+GTEST_IMPL_CMP_HELPER_(NE, !=);
+// Implements the helper function for {ASSERT|EXPECT}_LE
+GTEST_IMPL_CMP_HELPER_(LE, <=);
+// Implements the helper function for {ASSERT|EXPECT}_LT
+GTEST_IMPL_CMP_HELPER_(LT, < );
+// Implements the helper function for {ASSERT|EXPECT}_GE
+GTEST_IMPL_CMP_HELPER_(GE, >=);
+// Implements the helper function for {ASSERT|EXPECT}_GT
+GTEST_IMPL_CMP_HELPER_(GT, > );
+
+#undef GTEST_IMPL_CMP_HELPER_
+
+// The helper function for {ASSERT|EXPECT}_STREQ.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression,
+                                          const char* actual_expression,
+                                          const char* expected,
+                                          const char* actual);
+
+// The helper function for {ASSERT|EXPECT}_STRCASEEQ.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char* expected_expression,
+                                              const char* actual_expression,
+                                              const char* expected,
+                                              const char* actual);
+
+// The helper function for {ASSERT|EXPECT}_STRNE.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
+                                          const char* s2_expression,
+                                          const char* s1,
+                                          const char* s2);
+
+// The helper function for {ASSERT|EXPECT}_STRCASENE.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char* s1_expression,
+                                              const char* s2_expression,
+                                              const char* s1,
+                                              const char* s2);
+
+
+// Helper function for *_STREQ on wide strings.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTREQ(const char* expected_expression,
+                                          const char* actual_expression,
+                                          const wchar_t* expected,
+                                          const wchar_t* actual);
+
+// Helper function for *_STRNE on wide strings.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression,
+                                          const char* s2_expression,
+                                          const wchar_t* s1,
+                                          const wchar_t* s2);
+
+}  // namespace internal
+
+// IsSubstring() and IsNotSubstring() are intended to be used as the
+// first argument to {EXPECT,ASSERT}_PRED_FORMAT2(), not by
+// themselves.  They check whether needle is a substring of haystack
+// (NULL is considered a substring of itself only), and return an
+// appropriate error message when they fail.
+//
+// The {needle,haystack}_expr arguments are the stringified
+// expressions that generated the two real arguments.
+GTEST_API_ AssertionResult IsSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const char* needle, const char* haystack);
+GTEST_API_ AssertionResult IsSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const wchar_t* needle, const wchar_t* haystack);
+GTEST_API_ AssertionResult IsNotSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const char* needle, const char* haystack);
+GTEST_API_ AssertionResult IsNotSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const wchar_t* needle, const wchar_t* haystack);
+GTEST_API_ AssertionResult IsSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const ::std::string& needle, const ::std::string& haystack);
+GTEST_API_ AssertionResult IsNotSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const ::std::string& needle, const ::std::string& haystack);
+
+#if GTEST_HAS_STD_WSTRING
+GTEST_API_ AssertionResult IsSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const ::std::wstring& needle, const ::std::wstring& haystack);
+GTEST_API_ AssertionResult IsNotSubstring(
+    const char* needle_expr, const char* haystack_expr,
+    const ::std::wstring& needle, const ::std::wstring& haystack);
+#endif  // GTEST_HAS_STD_WSTRING
+
+namespace internal {
+
+// Helper template function for comparing floating-points.
+//
+// Template parameter:
+//
+//   RawType: the raw floating-point type (either float or double)
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+template <typename RawType>
+AssertionResult CmpHelperFloatingPointEQ(const char* expected_expression,
+                                         const char* actual_expression,
+                                         RawType expected,
+                                         RawType actual) {
+  const FloatingPoint<RawType> lhs(expected), rhs(actual);
+
+  if (lhs.AlmostEquals(rhs)) {
+    return AssertionSuccess();
+  }
+
+  StrStream expected_ss;
+  expected_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
+              << expected;
+
+  StrStream actual_ss;
+  actual_ss << std::setprecision(std::numeric_limits<RawType>::digits10 + 2)
+            << actual;
+
+  return EqFailure(expected_expression,
+                   actual_expression,
+                   StrStreamToString(&expected_ss),
+                   StrStreamToString(&actual_ss),
+                   false);
+}
+
+// Helper function for implementing ASSERT_NEAR.
+//
+// INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM.
+GTEST_API_ AssertionResult DoubleNearPredFormat(const char* expr1,
+                                                const char* expr2,
+                                                const char* abs_error_expr,
+                                                double val1,
+                                                double val2,
+                                                double abs_error);
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+// A class that enables one to stream messages to assertion macros
+class GTEST_API_ AssertHelper {
+ public:
+  // Constructor.
+  AssertHelper(TestPartResult::Type type,
+               const char* file,
+               int line,
+               const char* message);
+  ~AssertHelper();
+
+  // Message assignment is a semantic trick to enable assertion
+  // streaming; see the GTEST_MESSAGE_ macro below.
+  void operator=(const Message& message) const;
+
+ private:
+  // We put our data in a struct so that the size of the AssertHelper class can
+  // be as small as possible.  This is important because gcc is incapable of
+  // re-using stack space even for temporary variables, so every EXPECT_EQ
+  // reserves stack space for another AssertHelper.
+  struct AssertHelperData {
+    AssertHelperData(TestPartResult::Type t,
+                     const char* srcfile,
+                     int line_num,
+                     const char* msg)
+        : type(t), file(srcfile), line(line_num), message(msg) { }
+
+    TestPartResult::Type const type;
+    const char*        const file;
+    int                const line;
+    String             const message;
+
+   private:
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelperData);
+  };
+
+  AssertHelperData* const data_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelper);
+};
+
+}  // namespace internal
+
+#if GTEST_HAS_PARAM_TEST
+// The abstract base class that all value-parameterized tests inherit from.
+//
+// This class adds support for accessing the test parameter value via
+// the GetParam() method.
+//
+// Use it with one of the parameter generator defining functions, like Range(),
+// Values(), ValuesIn(), Bool(), and Combine().
+//
+// class FooTest : public ::testing::TestWithParam<int> {
+//  protected:
+//   FooTest() {
+//     // Can use GetParam() here.
+//   }
+//   virtual ~FooTest() {
+//     // Can use GetParam() here.
+//   }
+//   virtual void SetUp() {
+//     // Can use GetParam() here.
+//   }
+//   virtual void TearDown {
+//     // Can use GetParam() here.
+//   }
+// };
+// TEST_P(FooTest, DoesBar) {
+//   // Can use GetParam() method here.
+//   Foo foo;
+//   ASSERT_TRUE(foo.DoesBar(GetParam()));
+// }
+// INSTANTIATE_TEST_CASE_P(OneToTenRange, FooTest, ::testing::Range(1, 10));
+
+template <typename T>
+class TestWithParam : public Test {
+ public:
+  typedef T ParamType;
+
+  // The current parameter value. Is also available in the test fixture's
+  // constructor.
+  const ParamType& GetParam() const { return *parameter_; }
+
+ private:
+  // Sets parameter value. The caller is responsible for making sure the value
+  // remains alive and unchanged throughout the current test.
+  static void SetParam(const ParamType* parameter) {
+    parameter_ = parameter;
+  }
+
+  // Static value used for accessing parameter during a test lifetime.
+  static const ParamType* parameter_;
+
+  // TestClass must be a subclass of TestWithParam<T>.
+  template <class TestClass> friend class internal::ParameterizedTestFactory;
+};
+
+template <typename T>
+const T* TestWithParam<T>::parameter_ = NULL;
+
+#endif  // GTEST_HAS_PARAM_TEST
+
+// Macros for indicating success/failure in test code.
+
+// ADD_FAILURE unconditionally adds a failure to the current test.
+// SUCCEED generates a success - it doesn't automatically make the
+// current test successful, as a test is only successful when it has
+// no failure.
+//
+// EXPECT_* verifies that a certain condition is satisfied.  If not,
+// it behaves like ADD_FAILURE.  In particular:
+//
+//   EXPECT_TRUE  verifies that a Boolean condition is true.
+//   EXPECT_FALSE verifies that a Boolean condition is false.
+//
+// FAIL and ASSERT_* are similar to ADD_FAILURE and EXPECT_*, except
+// that they will also abort the current function on failure.  People
+// usually want the fail-fast behavior of FAIL and ASSERT_*, but those
+// writing data-driven tests often find themselves using ADD_FAILURE
+// and EXPECT_* more.
+//
+// Examples:
+//
+//   EXPECT_TRUE(server.StatusIsOK());
+//   ASSERT_FALSE(server.HasPendingRequest(port))
+//       << "There are still pending requests " << "on port " << port;
+
+// Generates a nonfatal failure with a generic message.
+#define ADD_FAILURE() GTEST_NONFATAL_FAILURE_("Failed")
+
+// Generates a fatal failure with a generic message.
+#define GTEST_FAIL() GTEST_FATAL_FAILURE_("Failed")
+
+// Define this macro to 1 to omit the definition of FAIL(), which is a
+// generic name and clashes with some other libraries.
+#if !GTEST_DONT_DEFINE_FAIL
+#define FAIL() GTEST_FAIL()
+#endif
+
+// Generates a success with a generic message.
+#define GTEST_SUCCEED() GTEST_SUCCESS_("Succeeded")
+
+// Define this macro to 1 to omit the definition of SUCCEED(), which
+// is a generic name and clashes with some other libraries.
+#if !GTEST_DONT_DEFINE_SUCCEED
+#define SUCCEED() GTEST_SUCCEED()
+#endif
+
+// Macros for testing exceptions.
+//
+//    * {ASSERT|EXPECT}_THROW(statement, expected_exception):
+//         Tests that the statement throws the expected exception.
+//    * {ASSERT|EXPECT}_NO_THROW(statement):
+//         Tests that the statement doesn't throw any exception.
+//    * {ASSERT|EXPECT}_ANY_THROW(statement):
+//         Tests that the statement throws an exception.
+
+#define EXPECT_THROW(statement, expected_exception) \
+  GTEST_TEST_THROW_(statement, expected_exception, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_NO_THROW(statement) \
+  GTEST_TEST_NO_THROW_(statement, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_ANY_THROW(statement) \
+  GTEST_TEST_ANY_THROW_(statement, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_THROW(statement, expected_exception) \
+  GTEST_TEST_THROW_(statement, expected_exception, GTEST_FATAL_FAILURE_)
+#define ASSERT_NO_THROW(statement) \
+  GTEST_TEST_NO_THROW_(statement, GTEST_FATAL_FAILURE_)
+#define ASSERT_ANY_THROW(statement) \
+  GTEST_TEST_ANY_THROW_(statement, GTEST_FATAL_FAILURE_)
+
+// Boolean assertions. Condition can be either a Boolean expression or an
+// AssertionResult. For more information on how to use AssertionResult with
+// these macros see comments on that class.
+#define EXPECT_TRUE(condition) \
+  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
+                      GTEST_NONFATAL_FAILURE_)
+#define EXPECT_FALSE(condition) \
+  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
+                      GTEST_NONFATAL_FAILURE_)
+#define ASSERT_TRUE(condition) \
+  GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \
+                      GTEST_FATAL_FAILURE_)
+#define ASSERT_FALSE(condition) \
+  GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \
+                      GTEST_FATAL_FAILURE_)
+
+// Includes the auto-generated header that implements a family of
+// generic predicate assertion macros.
+#include <gtest/gtest_pred_impl.h>
+
+// Macros for testing equalities and inequalities.
+//
+//    * {ASSERT|EXPECT}_EQ(expected, actual): Tests that expected == actual
+//    * {ASSERT|EXPECT}_NE(v1, v2):           Tests that v1 != v2
+//    * {ASSERT|EXPECT}_LT(v1, v2):           Tests that v1 < v2
+//    * {ASSERT|EXPECT}_LE(v1, v2):           Tests that v1 <= v2
+//    * {ASSERT|EXPECT}_GT(v1, v2):           Tests that v1 > v2
+//    * {ASSERT|EXPECT}_GE(v1, v2):           Tests that v1 >= v2
+//
+// When they are not, Google Test prints both the tested expressions and
+// their actual values.  The values must be compatible built-in types,
+// or you will get a compiler error.  By "compatible" we mean that the
+// values can be compared by the respective operator.
+//
+// Note:
+//
+//   1. It is possible to make a user-defined type work with
+//   {ASSERT|EXPECT}_??(), but that requires overloading the
+//   comparison operators and is thus discouraged by the Google C++
+//   Usage Guide.  Therefore, you are advised to use the
+//   {ASSERT|EXPECT}_TRUE() macro to assert that two objects are
+//   equal.
+//
+//   2. The {ASSERT|EXPECT}_??() macros do pointer comparisons on
+//   pointers (in particular, C strings).  Therefore, if you use it
+//   with two C strings, you are testing how their locations in memory
+//   are related, not how their content is related.  To compare two C
+//   strings by content, use {ASSERT|EXPECT}_STR*().
+//
+//   3. {ASSERT|EXPECT}_EQ(expected, actual) is preferred to
+//   {ASSERT|EXPECT}_TRUE(expected == actual), as the former tells you
+//   what the actual value is when it fails, and similarly for the
+//   other comparisons.
+//
+//   4. Do not depend on the order in which {ASSERT|EXPECT}_??()
+//   evaluate their arguments, which is undefined.
+//
+//   5. These macros evaluate their arguments exactly once.
+//
+// Examples:
+//
+//   EXPECT_NE(5, Foo());
+//   EXPECT_EQ(NULL, a_pointer);
+//   ASSERT_LT(i, array_size);
+//   ASSERT_GT(records.size(), 0) << "There is no record left.";
+
+#define EXPECT_EQ(expected, actual) \
+  EXPECT_PRED_FORMAT2(::testing::internal:: \
+                      EqHelper<GTEST_IS_NULL_LITERAL_(expected)>::Compare, \
+                      expected, actual)
+#define EXPECT_NE(expected, actual) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, expected, actual)
+#define EXPECT_LE(val1, val2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
+#define EXPECT_LT(val1, val2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
+#define EXPECT_GE(val1, val2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
+#define EXPECT_GT(val1, val2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)
+
+#define ASSERT_EQ(expected, actual) \
+  ASSERT_PRED_FORMAT2(::testing::internal:: \
+                      EqHelper<GTEST_IS_NULL_LITERAL_(expected)>::Compare, \
+                      expected, actual)
+#define ASSERT_NE(val1, val2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2)
+#define ASSERT_LE(val1, val2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2)
+#define ASSERT_LT(val1, val2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2)
+#define ASSERT_GE(val1, val2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2)
+#define ASSERT_GT(val1, val2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2)
+
+// C String Comparisons.  All tests treat NULL and any non-NULL string
+// as different.  Two NULLs are equal.
+//
+//    * {ASSERT|EXPECT}_STREQ(s1, s2):     Tests that s1 == s2
+//    * {ASSERT|EXPECT}_STRNE(s1, s2):     Tests that s1 != s2
+//    * {ASSERT|EXPECT}_STRCASEEQ(s1, s2): Tests that s1 == s2, ignoring case
+//    * {ASSERT|EXPECT}_STRCASENE(s1, s2): Tests that s1 != s2, ignoring case
+//
+// For wide or narrow string objects, you can use the
+// {ASSERT|EXPECT}_??() macros.
+//
+// Don't depend on the order in which the arguments are evaluated,
+// which is undefined.
+//
+// These macros evaluate their arguments exactly once.
+
+#define EXPECT_STREQ(expected, actual) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual)
+#define EXPECT_STRNE(s1, s2) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
+#define EXPECT_STRCASEEQ(expected, actual) \
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual)
+#define EXPECT_STRCASENE(s1, s2)\
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)
+
+#define ASSERT_STREQ(expected, actual) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, expected, actual)
+#define ASSERT_STRNE(s1, s2) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2)
+#define ASSERT_STRCASEEQ(expected, actual) \
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, expected, actual)
+#define ASSERT_STRCASENE(s1, s2)\
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2)
+
+// Macros for comparing floating-point numbers.
+//
+//    * {ASSERT|EXPECT}_FLOAT_EQ(expected, actual):
+//         Tests that two float values are almost equal.
+//    * {ASSERT|EXPECT}_DOUBLE_EQ(expected, actual):
+//         Tests that two double values are almost equal.
+//    * {ASSERT|EXPECT}_NEAR(v1, v2, abs_error):
+//         Tests that v1 and v2 are within the given distance to each other.
+//
+// Google Test uses ULP-based comparison to automatically pick a default
+// error bound that is appropriate for the operands.  See the
+// FloatingPoint template class in gtest-internal.h if you are
+// interested in the implementation details.
+
+#define EXPECT_FLOAT_EQ(expected, actual)\
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
+                      expected, actual)
+
+#define EXPECT_DOUBLE_EQ(expected, actual)\
+  EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
+                      expected, actual)
+
+#define ASSERT_FLOAT_EQ(expected, actual)\
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<float>, \
+                      expected, actual)
+
+#define ASSERT_DOUBLE_EQ(expected, actual)\
+  ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ<double>, \
+                      expected, actual)
+
+#define EXPECT_NEAR(val1, val2, abs_error)\
+  EXPECT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \
+                      val1, val2, abs_error)
+
+#define ASSERT_NEAR(val1, val2, abs_error)\
+  ASSERT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \
+                      val1, val2, abs_error)
+
+// These predicate format functions work on floating-point values, and
+// can be used in {ASSERT|EXPECT}_PRED_FORMAT2*(), e.g.
+//
+//   EXPECT_PRED_FORMAT2(testing::DoubleLE, Foo(), 5.0);
+
+// Asserts that val1 is less than, or almost equal to, val2.  Fails
+// otherwise.  In particular, it fails if either val1 or val2 is NaN.
+GTEST_API_ AssertionResult FloatLE(const char* expr1, const char* expr2,
+                                   float val1, float val2);
+GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2,
+                                    double val1, double val2);
+
+
+#if GTEST_OS_WINDOWS
+
+// Macros that test for HRESULT failure and success, these are only useful
+// on Windows, and rely on Windows SDK macros and APIs to compile.
+//
+//    * {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}(expr)
+//
+// When expr unexpectedly fails or succeeds, Google Test prints the
+// expected result and the actual result with both a human-readable
+// string representation of the error, if available, as well as the
+// hex result code.
+#define EXPECT_HRESULT_SUCCEEDED(expr) \
+    EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))
+
+#define ASSERT_HRESULT_SUCCEEDED(expr) \
+    ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr))
+
+#define EXPECT_HRESULT_FAILED(expr) \
+    EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))
+
+#define ASSERT_HRESULT_FAILED(expr) \
+    ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr))
+
+#endif  // GTEST_OS_WINDOWS
+
+// Macros that execute statement and check that it doesn't generate new fatal
+// failures in the current thread.
+//
+//   * {ASSERT|EXPECT}_NO_FATAL_FAILURE(statement);
+//
+// Examples:
+//
+//   EXPECT_NO_FATAL_FAILURE(Process());
+//   ASSERT_NO_FATAL_FAILURE(Process()) << "Process() failed";
+//
+#define ASSERT_NO_FATAL_FAILURE(statement) \
+    GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_FATAL_FAILURE_)
+#define EXPECT_NO_FATAL_FAILURE(statement) \
+    GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_NONFATAL_FAILURE_)
+
+// Causes a trace (including the source file path, the current line
+// number, and the given message) to be included in every test failure
+// message generated by code in the current scope.  The effect is
+// undone when the control leaves the current scope.
+//
+// The message argument can be anything streamable to std::ostream.
+//
+// In the implementation, we include the current line number as part
+// of the dummy variable name, thus allowing multiple SCOPED_TRACE()s
+// to appear in the same block - as long as they are on different
+// lines.
+#define SCOPED_TRACE(message) \
+  ::testing::internal::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)(\
+    __FILE__, __LINE__, ::testing::Message() << (message))
+
+namespace internal {
+
+// This template is declared, but intentionally undefined.
+template <typename T1, typename T2>
+struct StaticAssertTypeEqHelper;
+
+template <typename T>
+struct StaticAssertTypeEqHelper<T, T> {};
+
+}  // namespace internal
+
+// Compile-time assertion for type equality.
+// StaticAssertTypeEq<type1, type2>() compiles iff type1 and type2 are
+// the same type.  The value it returns is not interesting.
+//
+// Instead of making StaticAssertTypeEq a class template, we make it a
+// function template that invokes a helper class template.  This
+// prevents a user from misusing StaticAssertTypeEq<T1, T2> by
+// defining objects of that type.
+//
+// CAVEAT:
+//
+// When used inside a method of a class template,
+// StaticAssertTypeEq<T1, T2>() is effective ONLY IF the method is
+// instantiated.  For example, given:
+//
+//   template <typename T> class Foo {
+//    public:
+//     void Bar() { testing::StaticAssertTypeEq<int, T>(); }
+//   };
+//
+// the code:
+//
+//   void Test1() { Foo<bool> foo; }
+//
+// will NOT generate a compiler error, as Foo<bool>::Bar() is never
+// actually instantiated.  Instead, you need:
+//
+//   void Test2() { Foo<bool> foo; foo.Bar(); }
+//
+// to cause a compiler error.
+template <typename T1, typename T2>
+bool StaticAssertTypeEq() {
+  internal::StaticAssertTypeEqHelper<T1, T2>();
+  return true;
+}
+
+// Defines a test.
+//
+// The first parameter is the name of the test case, and the second
+// parameter is the name of the test within the test case.
+//
+// The convention is to end the test case name with "Test".  For
+// example, a test case for the Foo class can be named FooTest.
+//
+// The user should put his test code between braces after using this
+// macro.  Example:
+//
+//   TEST(FooTest, InitializesCorrectly) {
+//     Foo foo;
+//     EXPECT_TRUE(foo.StatusIsOK());
+//   }
+
+// Note that we call GetTestTypeId() instead of GetTypeId<
+// ::testing::Test>() here to get the type ID of testing::Test.  This
+// is to work around a suspected linker bug when using Google Test as
+// a framework on Mac OS X.  The bug causes GetTypeId<
+// ::testing::Test>() to return different values depending on whether
+// the call is from the Google Test framework itself or from user test
+// code.  GetTestTypeId() is guaranteed to always return the same
+// value, as it always calls GetTypeId<>() from the Google Test
+// framework.
+#define GTEST_TEST(test_case_name, test_name)\
+  GTEST_TEST_(test_case_name, test_name, \
+              ::testing::Test, ::testing::internal::GetTestTypeId())
+
+// Define this macro to 1 to omit the definition of TEST(), which
+// is a generic name and clashes with some other libraries.
+#if !GTEST_DONT_DEFINE_TEST
+#define TEST(test_case_name, test_name) GTEST_TEST(test_case_name, test_name)
+#endif
+
+// Defines a test that uses a test fixture.
+//
+// The first parameter is the name of the test fixture class, which
+// also doubles as the test case name.  The second parameter is the
+// name of the test within the test case.
+//
+// A test fixture class must be declared earlier.  The user should put
+// his test code between braces after using this macro.  Example:
+//
+//   class FooTest : public testing::Test {
+//    protected:
+//     virtual void SetUp() { b_.AddElement(3); }
+//
+//     Foo a_;
+//     Foo b_;
+//   };
+//
+//   TEST_F(FooTest, InitializesCorrectly) {
+//     EXPECT_TRUE(a_.StatusIsOK());
+//   }
+//
+//   TEST_F(FooTest, ReturnsElementCountCorrectly) {
+//     EXPECT_EQ(0, a_.size());
+//     EXPECT_EQ(1, b_.size());
+//   }
+
+#define TEST_F(test_fixture, test_name)\
+  GTEST_TEST_(test_fixture, test_name, test_fixture, \
+              ::testing::internal::GetTypeId<test_fixture>())
+
+// Use this macro in main() to run all tests.  It returns 0 if all
+// tests are successful, or 1 otherwise.
+//
+// RUN_ALL_TESTS() should be invoked after the command line has been
+// parsed by InitGoogleTest().
+
+#define RUN_ALL_TESTS()\
+  (::testing::UnitTest::GetInstance()->Run())
+
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_H_
diff --git a/final/utils/unittest/googletest/include/gtest/gtest_pred_impl.h b/final/utils/unittest/googletest/include/gtest/gtest_pred_impl.h
new file mode 100644
index 00000000000..e1e2f8c4c88
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/gtest_pred_impl.h
@@ -0,0 +1,368 @@
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// This file is AUTOMATICALLY GENERATED on 10/02/2008 by command
+// 'gen_gtest_pred_impl.py 5'.  DO NOT EDIT BY HAND!
+//
+// Implements a family of generic predicate assertion macros.
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+#define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
+
+// Makes sure this header is not included before gtest.h.
+#ifndef GTEST_INCLUDE_GTEST_GTEST_H_
+#error Do not include gtest_pred_impl.h directly.  Include gtest.h instead.
+#endif  // GTEST_INCLUDE_GTEST_GTEST_H_
+
+// This header implements a family of generic predicate assertion
+// macros:
+//
+//   ASSERT_PRED_FORMAT1(pred_format, v1)
+//   ASSERT_PRED_FORMAT2(pred_format, v1, v2)
+//   ...
+//
+// where pred_format is a function or functor that takes n (in the
+// case of ASSERT_PRED_FORMATn) values and their source expression
+// text, and returns a testing::AssertionResult.  See the definition
+// of ASSERT_EQ in gtest.h for an example.
+//
+// If you don't care about formatting, you can use the more
+// restrictive version:
+//
+//   ASSERT_PRED1(pred, v1)
+//   ASSERT_PRED2(pred, v1, v2)
+//   ...
+//
+// where pred is an n-ary function or functor that returns bool,
+// and the values v1, v2, ..., must support the << operator for
+// streaming to std::ostream.
+//
+// We also define the EXPECT_* variations.
+//
+// For now we only support predicates whose arity is at most 5.
+// Please email googletestframework@googlegroups.com if you need
+// support for higher arities.
+
+// GTEST_ASSERT_ is the basic statement to which all of the assertions
+// in this file reduce.  Don't use this in your code.
+
+#define GTEST_ASSERT_(expression, on_failure) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (const ::testing::AssertionResult gtest_ar = (expression)) \
+    ; \
+  else \
+    on_failure(gtest_ar.failure_message())
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED1.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1>
+AssertionResult AssertPred1Helper(const char* pred_text,
+                                  const char* e1,
+                                  Pred pred,
+                                  const T1& v1) {
+  if (pred(v1)) return AssertionSuccess();
+
+  Message msg;
+  msg << pred_text << "("
+      << e1 << ") evaluates to false, where"
+      << "\n" << e1 << " evaluates to " << v1;
+  return AssertionFailure(msg);
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, v1),\
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED1.  Don't use
+// this in your code.
+#define GTEST_PRED1_(pred, v1, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, \
+                                             #v1, \
+                                             pred, \
+                                             v1), on_failure)
+
+// Unary predicate assertion macros.
+#define EXPECT_PRED_FORMAT1(pred_format, v1) \
+  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED1(pred, v1) \
+  GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT1(pred_format, v1) \
+  GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED1(pred, v1) \
+  GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED2.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2>
+AssertionResult AssertPred2Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2) {
+  if (pred(v1, v2)) return AssertionSuccess();
+
+  Message msg;
+  msg << pred_text << "("
+      << e1 << ", "
+      << e2 << ") evaluates to false, where"
+      << "\n" << e1 << " evaluates to " << v1
+      << "\n" << e2 << " evaluates to " << v2;
+  return AssertionFailure(msg);
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2),\
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED2.  Don't use
+// this in your code.
+#define GTEST_PRED2_(pred, v1, v2, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             pred, \
+                                             v1, \
+                                             v2), on_failure)
+
+// Binary predicate assertion macros.
+#define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \
+  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED2(pred, v1, v2) \
+  GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \
+  GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED2(pred, v1, v2) \
+  GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED3.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2,
+          typename T3>
+AssertionResult AssertPred3Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  const char* e3,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2,
+                                  const T3& v3) {
+  if (pred(v1, v2, v3)) return AssertionSuccess();
+
+  Message msg;
+  msg << pred_text << "("
+      << e1 << ", "
+      << e2 << ", "
+      << e3 << ") evaluates to false, where"
+      << "\n" << e1 << " evaluates to " << v1
+      << "\n" << e2 << " evaluates to " << v2
+      << "\n" << e3 << " evaluates to " << v3;
+  return AssertionFailure(msg);
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3),\
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED3.  Don't use
+// this in your code.
+#define GTEST_PRED3_(pred, v1, v2, v3, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred3Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             #v3, \
+                                             pred, \
+                                             v1, \
+                                             v2, \
+                                             v3), on_failure)
+
+// Ternary predicate assertion macros.
+#define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \
+  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED3(pred, v1, v2, v3) \
+  GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \
+  GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED3(pred, v1, v2, v3) \
+  GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED4.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2,
+          typename T3,
+          typename T4>
+AssertionResult AssertPred4Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  const char* e3,
+                                  const char* e4,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2,
+                                  const T3& v3,
+                                  const T4& v4) {
+  if (pred(v1, v2, v3, v4)) return AssertionSuccess();
+
+  Message msg;
+  msg << pred_text << "("
+      << e1 << ", "
+      << e2 << ", "
+      << e3 << ", "
+      << e4 << ") evaluates to false, where"
+      << "\n" << e1 << " evaluates to " << v1
+      << "\n" << e2 << " evaluates to " << v2
+      << "\n" << e3 << " evaluates to " << v3
+      << "\n" << e4 << " evaluates to " << v4;
+  return AssertionFailure(msg);
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4),\
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED4.  Don't use
+// this in your code.
+#define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             #v3, \
+                                             #v4, \
+                                             pred, \
+                                             v1, \
+                                             v2, \
+                                             v3, \
+                                             v4), on_failure)
+
+// 4-ary predicate assertion macros.
+#define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
+  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED4(pred, v1, v2, v3, v4) \
+  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \
+  GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED4(pred, v1, v2, v3, v4) \
+  GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_)
+
+
+
+// Helper function for implementing {EXPECT|ASSERT}_PRED5.  Don't use
+// this in your code.
+template <typename Pred,
+          typename T1,
+          typename T2,
+          typename T3,
+          typename T4,
+          typename T5>
+AssertionResult AssertPred5Helper(const char* pred_text,
+                                  const char* e1,
+                                  const char* e2,
+                                  const char* e3,
+                                  const char* e4,
+                                  const char* e5,
+                                  Pred pred,
+                                  const T1& v1,
+                                  const T2& v2,
+                                  const T3& v3,
+                                  const T4& v4,
+                                  const T5& v5) {
+  if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess();
+
+  Message msg;
+  msg << pred_text << "("
+      << e1 << ", "
+      << e2 << ", "
+      << e3 << ", "
+      << e4 << ", "
+      << e5 << ") evaluates to false, where"
+      << "\n" << e1 << " evaluates to " << v1
+      << "\n" << e2 << " evaluates to " << v2
+      << "\n" << e3 << " evaluates to " << v3
+      << "\n" << e4 << " evaluates to " << v4
+      << "\n" << e5 << " evaluates to " << v5;
+  return AssertionFailure(msg);
+}
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5.
+// Don't use this in your code.
+#define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)\
+  GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5),\
+                on_failure)
+
+// Internal macro for implementing {EXPECT|ASSERT}_PRED5.  Don't use
+// this in your code.
+#define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)\
+  GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, \
+                                             #v1, \
+                                             #v2, \
+                                             #v3, \
+                                             #v4, \
+                                             #v5, \
+                                             pred, \
+                                             v1, \
+                                             v2, \
+                                             v3, \
+                                             v4, \
+                                             v5), on_failure)
+
+// 5-ary predicate assertion macros.
+#define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
+  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
+#define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \
+  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_)
+#define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \
+  GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
+#define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \
+  GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_)
+
+
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_
diff --git a/final/utils/unittest/googletest/include/gtest/gtest_prod.h b/final/utils/unittest/googletest/include/gtest/gtest_prod.h
new file mode 100644
index 00000000000..da80ddc6c70
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/gtest_prod.h
@@ -0,0 +1,58 @@
+// Copyright 2006, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// Google C++ Testing Framework definitions useful in production code.
+
+#ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_
+#define GTEST_INCLUDE_GTEST_GTEST_PROD_H_
+
+// When you need to test the private or protected members of a class,
+// use the FRIEND_TEST macro to declare your tests as friends of the
+// class.  For example:
+//
+// class MyClass {
+//  private:
+//   void MyMethod();
+//   FRIEND_TEST(MyClassTest, MyMethod);
+// };
+//
+// class MyClassTest : public testing::Test {
+//   // ...
+// };
+//
+// TEST_F(MyClassTest, MyMethod) {
+//   // Can call MyClass::MyMethod() here.
+// }
+
+#define FRIEND_TEST(test_case_name, test_name)\
+friend class test_case_name##_##test_name##_Test
+
+#endif  // GTEST_INCLUDE_GTEST_GTEST_PROD_H_
diff --git a/final/utils/unittest/googletest/include/gtest/internal/gtest-death-test-internal.h b/final/utils/unittest/googletest/include/gtest/internal/gtest-death-test-internal.h
new file mode 100644
index 00000000000..e4330848d4e
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/internal/gtest-death-test-internal.h
@@ -0,0 +1,275 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file defines internal utilities needed for implementing
+// death tests.  They are subject to change without notice.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
+
+#include <gtest/internal/gtest-internal.h>
+
+namespace testing {
+namespace internal {
+
+GTEST_DECLARE_string_(internal_run_death_test);
+
+// Names of the flags (needed for parsing Google Test flags).
+const char kDeathTestStyleFlag[] = "death_test_style";
+const char kDeathTestUseFork[] = "death_test_use_fork";
+const char kInternalRunDeathTestFlag[] = "internal_run_death_test";
+
+#if GTEST_HAS_DEATH_TEST
+
+// DeathTest is a class that hides much of the complexity of the
+// GTEST_DEATH_TEST_ macro.  It is abstract; its static Create method
+// returns a concrete class that depends on the prevailing death test
+// style, as defined by the --gtest_death_test_style and/or
+// --gtest_internal_run_death_test flags.
+
+// In describing the results of death tests, these terms are used with
+// the corresponding definitions:
+//
+// exit status:  The integer exit information in the format specified
+//               by wait(2)
+// exit code:    The integer code passed to exit(3), _exit(2), or
+//               returned from main()
+class GTEST_API_ DeathTest {
+ public:
+  // Create returns false if there was an error determining the
+  // appropriate action to take for the current death test; for example,
+  // if the gtest_death_test_style flag is set to an invalid value.
+  // The LastMessage method will return a more detailed message in that
+  // case.  Otherwise, the DeathTest pointer pointed to by the "test"
+  // argument is set.  If the death test should be skipped, the pointer
+  // is set to NULL; otherwise, it is set to the address of a new concrete
+  // DeathTest object that controls the execution of the current test.
+  static bool Create(const char* statement, const RE* regex,
+                     const char* file, int line, DeathTest** test);
+  DeathTest();
+  virtual ~DeathTest() { }
+
+  // A helper class that aborts a death test when it's deleted.
+  class ReturnSentinel {
+   public:
+    explicit ReturnSentinel(DeathTest* test) : test_(test) { }
+    ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); }
+   private:
+    DeathTest* const test_;
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel);
+  } GTEST_ATTRIBUTE_UNUSED_;
+
+  // An enumeration of possible roles that may be taken when a death
+  // test is encountered.  EXECUTE means that the death test logic should
+  // be executed immediately.  OVERSEE means that the program should prepare
+  // the appropriate environment for a child process to execute the death
+  // test, then wait for it to complete.
+  enum TestRole { OVERSEE_TEST, EXECUTE_TEST };
+
+  // An enumeration of the two reasons that a test might be aborted.
+  enum AbortReason { TEST_ENCOUNTERED_RETURN_STATEMENT, TEST_DID_NOT_DIE };
+
+  // Assumes one of the above roles.
+  virtual TestRole AssumeRole() = 0;
+
+  // Waits for the death test to finish and returns its status.
+  virtual int Wait() = 0;
+
+  // Returns true if the death test passed; that is, the test process
+  // exited during the test, its exit status matches a user-supplied
+  // predicate, and its stderr output matches a user-supplied regular
+  // expression.
+  // The user-supplied predicate may be a macro expression rather
+  // than a function pointer or functor, or else Wait and Passed could
+  // be combined.
+  virtual bool Passed(bool exit_status_ok) = 0;
+
+  // Signals that the death test did not die as expected.
+  virtual void Abort(AbortReason reason) = 0;
+
+  // Returns a human-readable outcome message regarding the outcome of
+  // the last death test.
+  static const char* LastMessage();
+
+  static void set_last_death_test_message(const String& message);
+
+ private:
+  // A string containing a description of the outcome of the last death test.
+  static String last_death_test_message_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest);
+};
+
+// Factory interface for death tests.  May be mocked out for testing.
+class DeathTestFactory {
+ public:
+  virtual ~DeathTestFactory() { }
+  virtual bool Create(const char* statement, const RE* regex,
+                      const char* file, int line, DeathTest** test) = 0;
+};
+
+// A concrete DeathTestFactory implementation for normal use.
+class DefaultDeathTestFactory : public DeathTestFactory {
+ public:
+  virtual bool Create(const char* statement, const RE* regex,
+                      const char* file, int line, DeathTest** test);
+};
+
+// Returns true if exit_status describes a process that was terminated
+// by a signal, or exited normally with a nonzero exit code.
+GTEST_API_ bool ExitedUnsuccessfully(int exit_status);
+
+// This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*,
+// ASSERT_EXIT*, and EXPECT_EXIT*.
+#define GTEST_DEATH_TEST_(statement, predicate, regex, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (::testing::internal::AlwaysTrue()) { \
+    const ::testing::internal::RE& gtest_regex = (regex); \
+    ::testing::internal::DeathTest* gtest_dt; \
+    if (!::testing::internal::DeathTest::Create(#statement, &gtest_regex, \
+        __FILE__, __LINE__, &gtest_dt)) { \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
+    } \
+    if (gtest_dt != NULL) { \
+      ::testing::internal::scoped_ptr< ::testing::internal::DeathTest> \
+          gtest_dt_ptr(gtest_dt); \
+      switch (gtest_dt->AssumeRole()) { \
+        case ::testing::internal::DeathTest::OVERSEE_TEST: \
+          if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) { \
+            goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \
+          } \
+          break; \
+        case ::testing::internal::DeathTest::EXECUTE_TEST: { \
+          ::testing::internal::DeathTest::ReturnSentinel \
+              gtest_sentinel(gtest_dt); \
+          GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+          gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \
+          break; \
+        } \
+      } \
+    } \
+  } else \
+    GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__): \
+      fail(::testing::internal::DeathTest::LastMessage())
+// The symbol "fail" here expands to something into which a message
+// can be streamed.
+
+// A class representing the parsed contents of the
+// --gtest_internal_run_death_test flag, as it existed when
+// RUN_ALL_TESTS was called.
+class InternalRunDeathTestFlag {
+ public:
+  InternalRunDeathTestFlag(const String& a_file,
+                           int a_line,
+                           int an_index,
+                           int a_write_fd)
+      : file_(a_file), line_(a_line), index_(an_index),
+        write_fd_(a_write_fd) {}
+
+  ~InternalRunDeathTestFlag() {
+    if (write_fd_ >= 0)
+      posix::Close(write_fd_);
+  }
+
+  String file() const { return file_; }
+  int line() const { return line_; }
+  int index() const { return index_; }
+  int write_fd() const { return write_fd_; }
+
+ private:
+  String file_;
+  int line_;
+  int index_;
+  int write_fd_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag);
+};
+
+// Returns a newly created InternalRunDeathTestFlag object with fields
+// initialized from the GTEST_FLAG(internal_run_death_test) flag if
+// the flag is specified; otherwise returns NULL.
+InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag();
+
+#else  // GTEST_HAS_DEATH_TEST
+
+// This macro is used for implementing macros such as
+// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where
+// death tests are not supported. Those macros must compile on such systems
+// iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on
+// systems that support death tests. This allows one to write such a macro
+// on a system that does not support death tests and be sure that it will
+// compile on a death-test supporting system.
+//
+// Parameters:
+//   statement -  A statement that a macro such as EXPECT_DEATH would test
+//                for program termination. This macro has to make sure this
+//                statement is compiled but not executed, to ensure that
+//                EXPECT_DEATH_IF_SUPPORTED compiles with a certain
+//                parameter iff EXPECT_DEATH compiles with it.
+//   regex     -  A regex that a macro such as EXPECT_DEATH would use to test
+//                the output of statement.  This parameter has to be
+//                compiled but not evaluated by this macro, to ensure that
+//                this macro only accepts expressions that a macro such as
+//                EXPECT_DEATH would accept.
+//   terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED
+//                and a return statement for ASSERT_DEATH_IF_SUPPORTED.
+//                This ensures that ASSERT_DEATH_IF_SUPPORTED will not
+//                compile inside functions where ASSERT_DEATH doesn't
+//                compile.
+//
+//  The branch that has an always false condition is used to ensure that
+//  statement and regex are compiled (and thus syntactically correct) but
+//  never executed. The unreachable code macro protects the terminator
+//  statement from generating an 'unreachable code' warning in case
+//  statement unconditionally returns or throws. The Message constructor at
+//  the end allows the syntax of streaming additional messages into the
+//  macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH.
+#define GTEST_UNSUPPORTED_DEATH_TEST_(statement, regex, terminator) \
+    GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+    if (::testing::internal::AlwaysTrue()) { \
+      GTEST_LOG_(WARNING) \
+          << "Death tests are not supported on this platform.\n" \
+          << "Statement '" #statement "' cannot be verified."; \
+    } else if (::testing::internal::AlwaysFalse()) { \
+      ::testing::internal::RE::PartialMatch(".*", (regex)); \
+      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+      terminator; \
+    } else \
+      ::testing::Message()
+
+#endif  // GTEST_HAS_DEATH_TEST
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_
diff --git a/final/utils/unittest/googletest/include/gtest/internal/gtest-filepath.h b/final/utils/unittest/googletest/include/gtest/internal/gtest-filepath.h
new file mode 100644
index 00000000000..4b76d79506b
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/internal/gtest-filepath.h
@@ -0,0 +1,210 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: keith.ray@gmail.com (Keith Ray)
+//
+// Google Test filepath utilities
+//
+// This header file declares classes and functions used internally by
+// Google Test.  They are subject to change without notice.
+//
+// This file is #included in <gtest/internal/gtest-internal.h>.
+// Do not include this header file separately!
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
+
+#include <gtest/internal/gtest-string.h>
+
+namespace testing {
+namespace internal {
+
+// FilePath - a class for file and directory pathname manipulation which
+// handles platform-specific conventions (like the pathname separator).
+// Used for helper functions for naming files in a directory for xml output.
+// Except for Set methods, all methods are const or static, which provides an
+// "immutable value object" -- useful for peace of mind.
+// A FilePath with a value ending in a path separator ("like/this/") represents
+// a directory, otherwise it is assumed to represent a file. In either case,
+// it may or may not represent an actual file or directory in the file system.
+// Names are NOT checked for syntax correctness -- no checking for illegal
+// characters, malformed paths, etc.
+
+class GTEST_API_ FilePath {
+ public:
+  FilePath() : pathname_("") { }
+  FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { }
+
+  explicit FilePath(const char* pathname) : pathname_(pathname) {
+    Normalize();
+  }
+
+  explicit FilePath(const String& pathname) : pathname_(pathname) {
+    Normalize();
+  }
+
+  FilePath& operator=(const FilePath& rhs) {
+    Set(rhs);
+    return *this;
+  }
+
+  void Set(const FilePath& rhs) {
+    pathname_ = rhs.pathname_;
+  }
+
+  String ToString() const { return pathname_; }
+  const char* c_str() const { return pathname_.c_str(); }
+
+  // Returns the current working directory, or "" if unsuccessful.
+  static FilePath GetCurrentDir();
+
+  // Given directory = "dir", base_name = "test", number = 0,
+  // extension = "xml", returns "dir/test.xml". If number is greater
+  // than zero (e.g., 12), returns "dir/test_12.xml".
+  // On Windows platform, uses \ as the separator rather than /.
+  static FilePath MakeFileName(const FilePath& directory,
+                               const FilePath& base_name,
+                               int number,
+                               const char* extension);
+
+  // Given directory = "dir", relative_path = "test.xml",
+  // returns "dir/test.xml".
+  // On Windows, uses \ as the separator rather than /.
+  static FilePath ConcatPaths(const FilePath& directory,
+                              const FilePath& relative_path);
+
+  // Returns a pathname for a file that does not currently exist. The pathname
+  // will be directory/base_name.extension or
+  // directory/base_name_<number>.extension if directory/base_name.extension
+  // already exists. The number will be incremented until a pathname is found
+  // that does not already exist.
+  // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'.
+  // There could be a race condition if two or more processes are calling this
+  // function at the same time -- they could both pick the same filename.
+  static FilePath GenerateUniqueFileName(const FilePath& directory,
+                                         const FilePath& base_name,
+                                         const char* extension);
+
+  // Returns true iff the path is NULL or "".
+  bool IsEmpty() const { return c_str() == NULL || *c_str() == '\0'; }
+
+  // If input name has a trailing separator character, removes it and returns
+  // the name, otherwise return the name string unmodified.
+  // On Windows platform, uses \ as the separator, other platforms use /.
+  FilePath RemoveTrailingPathSeparator() const;
+
+  // Returns a copy of the FilePath with the directory part removed.
+  // Example: FilePath("path/to/file").RemoveDirectoryName() returns
+  // FilePath("file"). If there is no directory part ("just_a_file"), it returns
+  // the FilePath unmodified. If there is no file part ("just_a_dir/") it
+  // returns an empty FilePath ("").
+  // On Windows platform, '\' is the path separator, otherwise it is '/'.
+  FilePath RemoveDirectoryName() const;
+
+  // RemoveFileName returns the directory path with the filename removed.
+  // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/".
+  // If the FilePath is "a_file" or "/a_file", RemoveFileName returns
+  // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does
+  // not have a file, like "just/a/dir/", it returns the FilePath unmodified.
+  // On Windows platform, '\' is the path separator, otherwise it is '/'.
+  FilePath RemoveFileName() const;
+
+  // Returns a copy of the FilePath with the case-insensitive extension removed.
+  // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns
+  // FilePath("dir/file"). If a case-insensitive extension is not
+  // found, returns a copy of the original FilePath.
+  FilePath RemoveExtension(const char* extension) const;
+
+  // Creates directories so that path exists. Returns true if successful or if
+  // the directories already exist; returns false if unable to create
+  // directories for any reason. Will also return false if the FilePath does
+  // not represent a directory (that is, it doesn't end with a path separator).
+  bool CreateDirectoriesRecursively() const;
+
+  // Create the directory so that path exists. Returns true if successful or
+  // if the directory already exists; returns false if unable to create the
+  // directory for any reason, including if the parent directory does not
+  // exist. Not named "CreateDirectory" because that's a macro on Windows.
+  bool CreateFolder() const;
+
+  // Returns true if FilePath describes something in the file-system,
+  // either a file, directory, or whatever, and that something exists.
+  bool FileOrDirectoryExists() const;
+
+  // Returns true if pathname describes a directory in the file-system
+  // that exists.
+  bool DirectoryExists() const;
+
+  // Returns true if FilePath ends with a path separator, which indicates that
+  // it is intended to represent a directory. Returns false otherwise.
+  // This does NOT check that a directory (or file) actually exists.
+  bool IsDirectory() const;
+
+  // Returns true if pathname describes a root directory. (Windows has one
+  // root directory per disk drive.)
+  bool IsRootDirectory() const;
+
+  // Returns true if pathname describes an absolute path.
+  bool IsAbsolutePath() const;
+
+ private:
+  // Replaces multiple consecutive separators with a single separator.
+  // For example, "bar///foo" becomes "bar/foo". Does not eliminate other
+  // redundancies that might be in a pathname involving "." or "..".
+  //
+  // A pathname with multiple consecutive separators may occur either through
+  // user error or as a result of some scripts or APIs that generate a pathname
+  // with a trailing separator. On other platforms the same API or script
+  // may NOT generate a pathname with a trailing "/". Then elsewhere that
+  // pathname may have another "/" and pathname components added to it,
+  // without checking for the separator already being there.
+  // The script language and operating system may allow paths like "foo//bar"
+  // but some of the functions in FilePath will not handle that correctly. In
+  // particular, RemoveTrailingPathSeparator() only removes one separator, and
+  // it is called in CreateDirectoriesRecursively() assuming that it will change
+  // a pathname from directory syntax (trailing separator) to filename syntax.
+  //
+  // On Windows this method also replaces the alternate path separator '/' with
+  // the primary path separator '\\', so that for example "bar\\/\\foo" becomes
+  // "bar\\foo".
+
+  void Normalize();
+
+  // Returns a pointer to the last occurence of a valid path separator in
+  // the FilePath. On Windows, for example, both '/' and '\' are valid path
+  // separators. Returns NULL if no path separator was found.
+  const char* FindLastPathSeparator() const;
+
+  String pathname_;
+};  // class FilePath
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_
diff --git a/final/utils/unittest/googletest/include/gtest/internal/gtest-internal-inl.h b/final/utils/unittest/googletest/include/gtest/internal/gtest-internal-inl.h
new file mode 100644
index 00000000000..855b21554af
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/internal/gtest-internal-inl.h
@@ -0,0 +1,1074 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Utility functions and classes used by the Google C++ testing framework.
+//
+// Author: wan@google.com (Zhanyong Wan)
+//
+// This file contains purely Google Test's internal implementation.  Please
+// DO NOT #INCLUDE IT IN A USER PROGRAM.
+
+#ifndef GTEST_SRC_GTEST_INTERNAL_INL_H_
+#define GTEST_SRC_GTEST_INTERNAL_INL_H_
+
+// GTEST_IMPLEMENTATION_ is defined to 1 iff the current translation unit is
+// part of Google Test's implementation; otherwise it's undefined.
+#if !GTEST_IMPLEMENTATION_
+// A user is trying to include this from his code - just say no.
+#error "gtest-internal-inl.h is part of Google Test's internal implementation."
+#error "It must not be included except by Google Test itself."
+#endif  // GTEST_IMPLEMENTATION_
+
+#ifndef _WIN32_WCE
+#include <errno.h>
+#endif  // !_WIN32_WCE
+#include <stddef.h>
+#include <stdlib.h>  // For strtoll/_strtoul64/malloc/free.
+#include <string.h>  // For memmove.
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+#include <gtest/internal/gtest-port.h>
+
+#if GTEST_OS_WINDOWS
+#include <windows.h>  // For DWORD.
+#endif  // GTEST_OS_WINDOWS
+
+#include <gtest/gtest.h>  // NOLINT
+#include <gtest/gtest-spi.h>
+
+namespace testing {
+
+// Declares the flags.
+//
+// We don't want the users to modify this flag in the code, but want
+// Google Test's own unit tests to be able to access it. Therefore we
+// declare it here as opposed to in gtest.h.
+GTEST_DECLARE_bool_(death_test_use_fork);
+
+namespace internal {
+
+// The value of GetTestTypeId() as seen from within the Google Test
+// library.  This is solely for testing GetTestTypeId().
+GTEST_API_ extern const TypeId kTestTypeIdInGoogleTest;
+
+// Names of the flags (needed for parsing Google Test flags).
+const char kAlsoRunDisabledTestsFlag[] = "also_run_disabled_tests";
+const char kBreakOnFailureFlag[] = "break_on_failure";
+const char kCatchExceptionsFlag[] = "catch_exceptions";
+const char kColorFlag[] = "color";
+const char kFilterFlag[] = "filter";
+const char kListTestsFlag[] = "list_tests";
+const char kOutputFlag[] = "output";
+const char kPrintTimeFlag[] = "print_time";
+const char kRandomSeedFlag[] = "random_seed";
+const char kRepeatFlag[] = "repeat";
+const char kShuffleFlag[] = "shuffle";
+const char kStackTraceDepthFlag[] = "stack_trace_depth";
+const char kThrowOnFailureFlag[] = "throw_on_failure";
+
+// A valid random seed must be in [1, kMaxRandomSeed].
+const int kMaxRandomSeed = 99999;
+
+// g_help_flag is true iff the --help flag or an equivalent form is
+// specified on the command line.
+GTEST_API_ extern bool g_help_flag;
+
+// Returns the current time in milliseconds.
+GTEST_API_ TimeInMillis GetTimeInMillis();
+
+// Returns true iff Google Test should use colors in the output.
+GTEST_API_ bool ShouldUseColor(bool stdout_is_tty);
+
+// Formats the given time in milliseconds as seconds.
+GTEST_API_ std::string FormatTimeInMillisAsSeconds(TimeInMillis ms);
+
+// Parses a string for an Int32 flag, in the form of "--flag=value".
+//
+// On success, stores the value of the flag in *value, and returns
+// true.  On failure, returns false without changing *value.
+GTEST_API_ bool ParseInt32Flag(
+    const char* str, const char* flag, Int32* value);
+
+// Returns a random seed in range [1, kMaxRandomSeed] based on the
+// given --gtest_random_seed flag value.
+inline int GetRandomSeedFromFlag(Int32 random_seed_flag) {
+  const unsigned int raw_seed = (random_seed_flag == 0) ?
+      static_cast<unsigned int>(GetTimeInMillis()) :
+      static_cast<unsigned int>(random_seed_flag);
+
+  // Normalizes the actual seed to range [1, kMaxRandomSeed] such that
+  // it's easy to type.
+  const int normalized_seed =
+      static_cast<int>((raw_seed - 1U) %
+                       static_cast<unsigned int>(kMaxRandomSeed)) + 1;
+  return normalized_seed;
+}
+
+// Returns the first valid random seed after 'seed'.  The behavior is
+// undefined if 'seed' is invalid.  The seed after kMaxRandomSeed is
+// considered to be 1.
+inline int GetNextRandomSeed(int seed) {
+  GTEST_CHECK_(1 <= seed && seed <= kMaxRandomSeed)
+      << "Invalid random seed " << seed << " - must be in [1, "
+      << kMaxRandomSeed << "].";
+  const int next_seed = seed + 1;
+  return (next_seed > kMaxRandomSeed) ? 1 : next_seed;
+}
+
+// This class saves the values of all Google Test flags in its c'tor, and
+// restores them in its d'tor.
+class GTestFlagSaver {
+ public:
+  // The c'tor.
+  GTestFlagSaver() {
+    also_run_disabled_tests_ = GTEST_FLAG(also_run_disabled_tests);
+    break_on_failure_ = GTEST_FLAG(break_on_failure);
+    catch_exceptions_ = GTEST_FLAG(catch_exceptions);
+    color_ = GTEST_FLAG(color);
+    death_test_style_ = GTEST_FLAG(death_test_style);
+    death_test_use_fork_ = GTEST_FLAG(death_test_use_fork);
+    filter_ = GTEST_FLAG(filter);
+    internal_run_death_test_ = GTEST_FLAG(internal_run_death_test);
+    list_tests_ = GTEST_FLAG(list_tests);
+    output_ = GTEST_FLAG(output);
+    print_time_ = GTEST_FLAG(print_time);
+    random_seed_ = GTEST_FLAG(random_seed);
+    repeat_ = GTEST_FLAG(repeat);
+    shuffle_ = GTEST_FLAG(shuffle);
+    stack_trace_depth_ = GTEST_FLAG(stack_trace_depth);
+    throw_on_failure_ = GTEST_FLAG(throw_on_failure);
+  }
+
+  // The d'tor is not virtual.  DO NOT INHERIT FROM THIS CLASS.
+  ~GTestFlagSaver() {
+    GTEST_FLAG(also_run_disabled_tests) = also_run_disabled_tests_;
+    GTEST_FLAG(break_on_failure) = break_on_failure_;
+    GTEST_FLAG(catch_exceptions) = catch_exceptions_;
+    GTEST_FLAG(color) = color_;
+    GTEST_FLAG(death_test_style) = death_test_style_;
+    GTEST_FLAG(death_test_use_fork) = death_test_use_fork_;
+    GTEST_FLAG(filter) = filter_;
+    GTEST_FLAG(internal_run_death_test) = internal_run_death_test_;
+    GTEST_FLAG(list_tests) = list_tests_;
+    GTEST_FLAG(output) = output_;
+    GTEST_FLAG(print_time) = print_time_;
+    GTEST_FLAG(random_seed) = random_seed_;
+    GTEST_FLAG(repeat) = repeat_;
+    GTEST_FLAG(shuffle) = shuffle_;
+    GTEST_FLAG(stack_trace_depth) = stack_trace_depth_;
+    GTEST_FLAG(throw_on_failure) = throw_on_failure_;
+  }
+ private:
+  // Fields for saving the original values of flags.
+  bool also_run_disabled_tests_;
+  bool break_on_failure_;
+  bool catch_exceptions_;
+  String color_;
+  String death_test_style_;
+  bool death_test_use_fork_;
+  String filter_;
+  String internal_run_death_test_;
+  bool list_tests_;
+  String output_;
+  bool print_time_;
+  bool pretty_;
+  internal::Int32 random_seed_;
+  internal::Int32 repeat_;
+  bool shuffle_;
+  internal::Int32 stack_trace_depth_;
+  bool throw_on_failure_;
+} GTEST_ATTRIBUTE_UNUSED_;
+
+// Converts a Unicode code point to a narrow string in UTF-8 encoding.
+// code_point parameter is of type UInt32 because wchar_t may not be
+// wide enough to contain a code point.
+// The output buffer str must containt at least 32 characters.
+// The function returns the address of the output buffer.
+// If the code_point is not a valid Unicode code point
+// (i.e. outside of Unicode range U+0 to U+10FFFF) it will be output
+// as '(Invalid Unicode 0xXXXXXXXX)'.
+GTEST_API_ char* CodePointToUtf8(UInt32 code_point, char* str);
+
+// Converts a wide string to a narrow string in UTF-8 encoding.
+// The wide string is assumed to have the following encoding:
+//   UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS)
+//   UTF-32 if sizeof(wchar_t) == 4 (on Linux)
+// Parameter str points to a null-terminated wide string.
+// Parameter num_chars may additionally limit the number
+// of wchar_t characters processed. -1 is used when the entire string
+// should be processed.
+// If the string contains code points that are not valid Unicode code points
+// (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output
+// as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding
+// and contains invalid UTF-16 surrogate pairs, values in those pairs
+// will be encoded as individual Unicode characters from Basic Normal Plane.
+GTEST_API_ String WideStringToUtf8(const wchar_t* str, int num_chars);
+
+// Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file
+// if the variable is present. If a file already exists at this location, this
+// function will write over it. If the variable is present, but the file cannot
+// be created, prints an error and exits.
+void WriteToShardStatusFileIfNeeded();
+
+// Checks whether sharding is enabled by examining the relevant
+// environment variable values. If the variables are present,
+// but inconsistent (e.g., shard_index >= total_shards), prints
+// an error and exits. If in_subprocess_for_death_test, sharding is
+// disabled because it must only be applied to the original test
+// process. Otherwise, we could filter out death tests we intended to execute.
+GTEST_API_ bool ShouldShard(const char* total_shards_str,
+                            const char* shard_index_str,
+                            bool in_subprocess_for_death_test);
+
+// Parses the environment variable var as an Int32. If it is unset,
+// returns default_val. If it is not an Int32, prints an error and
+// and aborts.
+GTEST_API_ Int32 Int32FromEnvOrDie(const char* env_var, Int32 default_val);
+
+// Given the total number of shards, the shard index, and the test id,
+// returns true iff the test should be run on this shard. The test id is
+// some arbitrary but unique non-negative integer assigned to each test
+// method. Assumes that 0 <= shard_index < total_shards.
+GTEST_API_ bool ShouldRunTestOnShard(
+    int total_shards, int shard_index, int test_id);
+
+// STL container utilities.
+
+// Returns the number of elements in the given container that satisfy
+// the given predicate.
+template <class Container, typename Predicate>
+inline int CountIf(const Container& c, Predicate predicate) {
+  return static_cast<int>(std::count_if(c.begin(), c.end(), predicate));
+}
+
+// Applies a function/functor to each element in the container.
+template <class Container, typename Functor>
+void ForEach(const Container& c, Functor functor) {
+  std::for_each(c.begin(), c.end(), functor);
+}
+
+// Returns the i-th element of the vector, or default_value if i is not
+// in range [0, v.size()).
+template <typename E>
+inline E GetElementOr(const std::vector<E>& v, int i, E default_value) {
+  return (i < 0 || i >= static_cast<int>(v.size())) ? default_value : v[i];
+}
+
+// Performs an in-place shuffle of a range of the vector's elements.
+// 'begin' and 'end' are element indices as an STL-style range;
+// i.e. [begin, end) are shuffled, where 'end' == size() means to
+// shuffle to the end of the vector.
+template <typename E>
+void ShuffleRange(internal::Random* random, int begin, int end,
+                  std::vector<E>* v) {
+  const int size = static_cast<int>(v->size());
+  GTEST_CHECK_(0 <= begin && begin <= size)
+      << "Invalid shuffle range start " << begin << ": must be in range [0, "
+      << size << "].";
+  GTEST_CHECK_(begin <= end && end <= size)
+      << "Invalid shuffle range finish " << end << ": must be in range ["
+      << begin << ", " << size << "].";
+
+  // Fisher-Yates shuffle, from
+  // http://en.wikipedia.org/wiki/Fisher-Yates_shuffle
+  for (int range_width = end - begin; range_width >= 2; range_width--) {
+    const int last_in_range = begin + range_width - 1;
+    const int selected = begin + random->Generate(range_width);
+    std::swap((*v)[selected], (*v)[last_in_range]);
+  }
+}
+
+// Performs an in-place shuffle of the vector's elements.
+template <typename E>
+inline void Shuffle(internal::Random* random, std::vector<E>* v) {
+  ShuffleRange(random, 0, static_cast<int>(v->size()), v);
+}
+
+// A function for deleting an object.  Handy for being used as a
+// functor.
+template <typename T>
+static void Delete(T* x) {
+  delete x;
+}
+
+// A predicate that checks the key of a TestProperty against a known key.
+//
+// TestPropertyKeyIs is copyable.
+class TestPropertyKeyIs {
+ public:
+  // Constructor.
+  //
+  // TestPropertyKeyIs has NO default constructor.
+  explicit TestPropertyKeyIs(const char* key)
+      : key_(key) {}
+
+  // Returns true iff the test name of test property matches on key_.
+  bool operator()(const TestProperty& test_property) const {
+    return String(test_property.key()).Compare(key_) == 0;
+  }
+
+ private:
+  String key_;
+};
+
+class TestInfoImpl {
+ public:
+  TestInfoImpl(TestInfo* parent, const char* test_case_name,
+               const char* name, const char* test_case_comment,
+               const char* comment, TypeId fixture_class_id,
+               internal::TestFactoryBase* factory);
+  ~TestInfoImpl();
+
+  // Returns true if this test should run.
+  bool should_run() const { return should_run_; }
+
+  // Sets the should_run member.
+  void set_should_run(bool should) { should_run_ = should; }
+
+  // Returns true if this test is disabled. Disabled tests are not run.
+  bool is_disabled() const { return is_disabled_; }
+
+  // Sets the is_disabled member.
+  void set_is_disabled(bool is) { is_disabled_ = is; }
+
+  // Returns true if this test matches the filter specified by the user.
+  bool matches_filter() const { return matches_filter_; }
+
+  // Sets the matches_filter member.
+  void set_matches_filter(bool matches) { matches_filter_ = matches; }
+
+  // Returns the test case name.
+  const char* test_case_name() const { return test_case_name_.c_str(); }
+
+  // Returns the test name.
+  const char* name() const { return name_.c_str(); }
+
+  // Returns the test case comment.
+  const char* test_case_comment() const { return test_case_comment_.c_str(); }
+
+  // Returns the test comment.
+  const char* comment() const { return comment_.c_str(); }
+
+  // Returns the ID of the test fixture class.
+  TypeId fixture_class_id() const { return fixture_class_id_; }
+
+  // Returns the test result.
+  TestResult* result() { return &result_; }
+  const TestResult* result() const { return &result_; }
+
+  // Creates the test object, runs it, records its result, and then
+  // deletes it.
+  void Run();
+
+  // Clears the test result.
+  void ClearResult() { result_.Clear(); }
+
+  // Clears the test result in the given TestInfo object.
+  static void ClearTestResult(TestInfo * test_info) {
+    test_info->impl()->ClearResult();
+  }
+
+ private:
+  // These fields are immutable properties of the test.
+  TestInfo* const parent_;          // The owner of this object
+  const String test_case_name_;     // Test case name
+  const String name_;               // Test name
+  const String test_case_comment_;  // Test case comment
+  const String comment_;            // Test comment
+  const TypeId fixture_class_id_;   // ID of the test fixture class
+  bool should_run_;                 // True iff this test should run
+  bool is_disabled_;                // True iff this test is disabled
+  bool matches_filter_;             // True if this test matches the
+                                    // user-specified filter.
+  internal::TestFactoryBase* const factory_;  // The factory that creates
+                                              // the test object
+
+  // This field is mutable and needs to be reset before running the
+  // test for the second time.
+  TestResult result_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestInfoImpl);
+};
+
+// Class UnitTestOptions.
+//
+// This class contains functions for processing options the user
+// specifies when running the tests.  It has only static members.
+//
+// In most cases, the user can specify an option using either an
+// environment variable or a command line flag.  E.g. you can set the
+// test filter using either GTEST_FILTER or --gtest_filter.  If both
+// the variable and the flag are present, the latter overrides the
+// former.
+class GTEST_API_ UnitTestOptions {
+ public:
+  // Functions for processing the gtest_output flag.
+
+  // Returns the output format, or "" for normal printed output.
+  static String GetOutputFormat();
+
+  // Returns the absolute path of the requested output file, or the
+  // default (test_detail.xml in the original working directory) if
+  // none was explicitly specified.
+  static String GetAbsolutePathToOutputFile();
+
+  // Functions for processing the gtest_filter flag.
+
+  // Returns true iff the wildcard pattern matches the string.  The
+  // first ':' or '\0' character in pattern marks the end of it.
+  //
+  // This recursive algorithm isn't very efficient, but is clear and
+  // works well enough for matching test names, which are short.
+  static bool PatternMatchesString(const char *pattern, const char *str);
+
+  // Returns true iff the user-specified filter matches the test case
+  // name and the test name.
+  static bool FilterMatchesTest(const String &test_case_name,
+                                const String &test_name);
+
+#if GTEST_OS_WINDOWS
+  // Function for supporting the gtest_catch_exception flag.
+
+  // Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the
+  // given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise.
+  // This function is useful as an __except condition.
+  static int GTestShouldProcessSEH(DWORD exception_code);
+#endif  // GTEST_OS_WINDOWS
+
+  // Returns true if "name" matches the ':' separated list of glob-style
+  // filters in "filter".
+  static bool MatchesFilter(const String& name, const char* filter);
+};
+
+// Returns the current application's name, removing directory path if that
+// is present.  Used by UnitTestOptions::GetOutputFile.
+GTEST_API_ FilePath GetCurrentExecutableName();
+
+// The role interface for getting the OS stack trace as a string.
+class OsStackTraceGetterInterface {
+ public:
+  OsStackTraceGetterInterface() {}
+  virtual ~OsStackTraceGetterInterface() {}
+
+  // Returns the current OS stack trace as a String.  Parameters:
+  //
+  //   max_depth  - the maximum number of stack frames to be included
+  //                in the trace.
+  //   skip_count - the number of top frames to be skipped; doesn't count
+  //                against max_depth.
+  virtual String CurrentStackTrace(int max_depth, int skip_count) = 0;
+
+  // UponLeavingGTest() should be called immediately before Google Test calls
+  // user code. It saves some information about the current stack that
+  // CurrentStackTrace() will use to find and hide Google Test stack frames.
+  virtual void UponLeavingGTest() = 0;
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetterInterface);
+};
+
+// A working implementation of the OsStackTraceGetterInterface interface.
+class OsStackTraceGetter : public OsStackTraceGetterInterface {
+ public:
+  OsStackTraceGetter() : caller_frame_(NULL) {}
+  virtual String CurrentStackTrace(int max_depth, int skip_count);
+  virtual void UponLeavingGTest();
+
+  // This string is inserted in place of stack frames that are part of
+  // Google Test's implementation.
+  static const char* const kElidedFramesMarker;
+
+ private:
+  Mutex mutex_;  // protects all internal state
+
+  // We save the stack frame below the frame that calls user code.
+  // We do this because the address of the frame immediately below
+  // the user code changes between the call to UponLeavingGTest()
+  // and any calls to CurrentStackTrace() from within the user code.
+  void* caller_frame_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetter);
+};
+
+// Information about a Google Test trace point.
+struct TraceInfo {
+  const char* file;
+  int line;
+  String message;
+};
+
+// This is the default global test part result reporter used in UnitTestImpl.
+// This class should only be used by UnitTestImpl.
+class DefaultGlobalTestPartResultReporter
+  : public TestPartResultReporterInterface {
+ public:
+  explicit DefaultGlobalTestPartResultReporter(UnitTestImpl* unit_test);
+  // Implements the TestPartResultReporterInterface. Reports the test part
+  // result in the current test.
+  virtual void ReportTestPartResult(const TestPartResult& result);
+
+ private:
+  UnitTestImpl* const unit_test_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultGlobalTestPartResultReporter);
+};
+
+// This is the default per thread test part result reporter used in
+// UnitTestImpl. This class should only be used by UnitTestImpl.
+class DefaultPerThreadTestPartResultReporter
+    : public TestPartResultReporterInterface {
+ public:
+  explicit DefaultPerThreadTestPartResultReporter(UnitTestImpl* unit_test);
+  // Implements the TestPartResultReporterInterface. The implementation just
+  // delegates to the current global test part result reporter of *unit_test_.
+  virtual void ReportTestPartResult(const TestPartResult& result);
+
+ private:
+  UnitTestImpl* const unit_test_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultPerThreadTestPartResultReporter);
+};
+
+// The private implementation of the UnitTest class.  We don't protect
+// the methods under a mutex, as this class is not accessible by a
+// user and the UnitTest class that delegates work to this class does
+// proper locking.
+class GTEST_API_ UnitTestImpl {
+ public:
+  explicit UnitTestImpl(UnitTest* parent);
+  virtual ~UnitTestImpl();
+
+  // There are two different ways to register your own TestPartResultReporter.
+  // You can register your own repoter to listen either only for test results
+  // from the current thread or for results from all threads.
+  // By default, each per-thread test result repoter just passes a new
+  // TestPartResult to the global test result reporter, which registers the
+  // test part result for the currently running test.
+
+  // Returns the global test part result reporter.
+  TestPartResultReporterInterface* GetGlobalTestPartResultReporter();
+
+  // Sets the global test part result reporter.
+  void SetGlobalTestPartResultReporter(
+      TestPartResultReporterInterface* reporter);
+
+  // Returns the test part result reporter for the current thread.
+  TestPartResultReporterInterface* GetTestPartResultReporterForCurrentThread();
+
+  // Sets the test part result reporter for the current thread.
+  void SetTestPartResultReporterForCurrentThread(
+      TestPartResultReporterInterface* reporter);
+
+  // Gets the number of successful test cases.
+  int successful_test_case_count() const;
+
+  // Gets the number of failed test cases.
+  int failed_test_case_count() const;
+
+  // Gets the number of all test cases.
+  int total_test_case_count() const;
+
+  // Gets the number of all test cases that contain at least one test
+  // that should run.
+  int test_case_to_run_count() const;
+
+  // Gets the number of successful tests.
+  int successful_test_count() const;
+
+  // Gets the number of failed tests.
+  int failed_test_count() const;
+
+  // Gets the number of disabled tests.
+  int disabled_test_count() const;
+
+  // Gets the number of all tests.
+  int total_test_count() const;
+
+  // Gets the number of tests that should run.
+  int test_to_run_count() const;
+
+  // Gets the elapsed time, in milliseconds.
+  TimeInMillis elapsed_time() const { return elapsed_time_; }
+
+  // Returns true iff the unit test passed (i.e. all test cases passed).
+  bool Passed() const { return !Failed(); }
+
+  // Returns true iff the unit test failed (i.e. some test case failed
+  // or something outside of all tests failed).
+  bool Failed() const {
+    return failed_test_case_count() > 0 || ad_hoc_test_result()->Failed();
+  }
+
+  // Gets the i-th test case among all the test cases. i can range from 0 to
+  // total_test_case_count() - 1. If i is not in that range, returns NULL.
+  const TestCase* GetTestCase(int i) const {
+    const int index = GetElementOr(test_case_indices_, i, -1);
+    return index < 0 ? NULL : test_cases_[i];
+  }
+
+  // Gets the i-th test case among all the test cases. i can range from 0 to
+  // total_test_case_count() - 1. If i is not in that range, returns NULL.
+  TestCase* GetMutableTestCase(int i) {
+    const int index = GetElementOr(test_case_indices_, i, -1);
+    return index < 0 ? NULL : test_cases_[index];
+  }
+
+  // Provides access to the event listener list.
+  TestEventListeners* listeners() { return &listeners_; }
+
+  // Returns the TestResult for the test that's currently running, or
+  // the TestResult for the ad hoc test if no test is running.
+  TestResult* current_test_result();
+
+  // Returns the TestResult for the ad hoc test.
+  const TestResult* ad_hoc_test_result() const { return &ad_hoc_test_result_; }
+
+  // Sets the OS stack trace getter.
+  //
+  // Does nothing if the input and the current OS stack trace getter
+  // are the same; otherwise, deletes the old getter and makes the
+  // input the current getter.
+  void set_os_stack_trace_getter(OsStackTraceGetterInterface* getter);
+
+  // Returns the current OS stack trace getter if it is not NULL;
+  // otherwise, creates an OsStackTraceGetter, makes it the current
+  // getter, and returns it.
+  OsStackTraceGetterInterface* os_stack_trace_getter();
+
+  // Returns the current OS stack trace as a String.
+  //
+  // The maximum number of stack frames to be included is specified by
+  // the gtest_stack_trace_depth flag.  The skip_count parameter
+  // specifies the number of top frames to be skipped, which doesn't
+  // count against the number of frames to be included.
+  //
+  // For example, if Foo() calls Bar(), which in turn calls
+  // CurrentOsStackTraceExceptTop(1), Foo() will be included in the
+  // trace but Bar() and CurrentOsStackTraceExceptTop() won't.
+  String CurrentOsStackTraceExceptTop(int skip_count);
+
+  // Finds and returns a TestCase with the given name.  If one doesn't
+  // exist, creates one and returns it.
+  //
+  // Arguments:
+  //
+  //   test_case_name: name of the test case
+  //   set_up_tc:      pointer to the function that sets up the test case
+  //   tear_down_tc:   pointer to the function that tears down the test case
+  TestCase* GetTestCase(const char* test_case_name,
+                        const char* comment,
+                        Test::SetUpTestCaseFunc set_up_tc,
+                        Test::TearDownTestCaseFunc tear_down_tc);
+
+  // Adds a TestInfo to the unit test.
+  //
+  // Arguments:
+  //
+  //   set_up_tc:    pointer to the function that sets up the test case
+  //   tear_down_tc: pointer to the function that tears down the test case
+  //   test_info:    the TestInfo object
+  void AddTestInfo(Test::SetUpTestCaseFunc set_up_tc,
+                   Test::TearDownTestCaseFunc tear_down_tc,
+                   TestInfo * test_info) {
+    // In order to support thread-safe death tests, we need to
+    // remember the original working directory when the test program
+    // was first invoked.  We cannot do this in RUN_ALL_TESTS(), as
+    // the user may have changed the current directory before calling
+    // RUN_ALL_TESTS().  Therefore we capture the current directory in
+    // AddTestInfo(), which is called to register a TEST or TEST_F
+    // before main() is reached.
+    if (original_working_dir_.IsEmpty()) {
+      original_working_dir_.Set(FilePath::GetCurrentDir());
+      GTEST_CHECK_(!original_working_dir_.IsEmpty())
+          << "Failed to get the current working directory.";
+    }
+
+    GetTestCase(test_info->test_case_name(),
+                test_info->test_case_comment(),
+                set_up_tc,
+                tear_down_tc)->AddTestInfo(test_info);
+  }
+
+#if GTEST_HAS_PARAM_TEST
+  // Returns ParameterizedTestCaseRegistry object used to keep track of
+  // value-parameterized tests and instantiate and register them.
+  internal::ParameterizedTestCaseRegistry& parameterized_test_registry() {
+    return parameterized_test_registry_;
+  }
+#endif  // GTEST_HAS_PARAM_TEST
+
+  // Sets the TestCase object for the test that's currently running.
+  void set_current_test_case(TestCase* a_current_test_case) {
+    current_test_case_ = a_current_test_case;
+  }
+
+  // Sets the TestInfo object for the test that's currently running.  If
+  // current_test_info is NULL, the assertion results will be stored in
+  // ad_hoc_test_result_.
+  void set_current_test_info(TestInfo* a_current_test_info) {
+    current_test_info_ = a_current_test_info;
+  }
+
+  // Registers all parameterized tests defined using TEST_P and
+  // INSTANTIATE_TEST_P, creating regular tests for each test/parameter
+  // combination. This method can be called more then once; it has
+  // guards protecting from registering the tests more then once.
+  // If value-parameterized tests are disabled, RegisterParameterizedTests
+  // is present but does nothing.
+  void RegisterParameterizedTests();
+
+  // Runs all tests in this UnitTest object, prints the result, and
+  // returns 0 if all tests are successful, or 1 otherwise.  If any
+  // exception is thrown during a test on Windows, this test is
+  // considered to be failed, but the rest of the tests will still be
+  // run.  (We disable exceptions on Linux and Mac OS X, so the issue
+  // doesn't apply there.)
+  int RunAllTests();
+
+  // Clears the results of all tests, including the ad hoc test.
+  void ClearResult() {
+    ForEach(test_cases_, TestCase::ClearTestCaseResult);
+    ad_hoc_test_result_.Clear();
+  }
+
+  enum ReactionToSharding {
+    HONOR_SHARDING_PROTOCOL,
+    IGNORE_SHARDING_PROTOCOL
+  };
+
+  // Matches the full name of each test against the user-specified
+  // filter to decide whether the test should run, then records the
+  // result in each TestCase and TestInfo object.
+  // If shard_tests == HONOR_SHARDING_PROTOCOL, further filters tests
+  // based on sharding variables in the environment.
+  // Returns the number of tests that should run.
+  int FilterTests(ReactionToSharding shard_tests);
+
+  // Prints the names of the tests matching the user-specified filter flag.
+  void ListTestsMatchingFilter();
+
+  const TestCase* current_test_case() const { return current_test_case_; }
+  TestInfo* current_test_info() { return current_test_info_; }
+  const TestInfo* current_test_info() const { return current_test_info_; }
+
+  // Returns the vector of environments that need to be set-up/torn-down
+  // before/after the tests are run.
+  std::vector<Environment*>& environments() { return environments_; }
+
+  // Getters for the per-thread Google Test trace stack.
+  std::vector<TraceInfo>& gtest_trace_stack() {
+    return *(gtest_trace_stack_.pointer());
+  }
+  const std::vector<TraceInfo>& gtest_trace_stack() const {
+    return gtest_trace_stack_.get();
+  }
+
+#if GTEST_HAS_DEATH_TEST
+  void InitDeathTestSubprocessControlInfo() {
+    internal_run_death_test_flag_.reset(ParseInternalRunDeathTestFlag());
+  }
+  // Returns a pointer to the parsed --gtest_internal_run_death_test
+  // flag, or NULL if that flag was not specified.
+  // This information is useful only in a death test child process.
+  // Must not be called before a call to InitGoogleTest.
+  const InternalRunDeathTestFlag* internal_run_death_test_flag() const {
+    return internal_run_death_test_flag_.get();
+  }
+
+  // Returns a pointer to the current death test factory.
+  internal::DeathTestFactory* death_test_factory() {
+    return death_test_factory_.get();
+  }
+
+  void SuppressTestEventsIfInSubprocess();
+
+  friend class ReplaceDeathTestFactory;
+#endif  // GTEST_HAS_DEATH_TEST
+
+  // Initializes the event listener performing XML output as specified by
+  // UnitTestOptions. Must not be called before InitGoogleTest.
+  void ConfigureXmlOutput();
+
+  // Performs initialization dependent upon flag values obtained in
+  // ParseGoogleTestFlagsOnly.  Is called from InitGoogleTest after the call to
+  // ParseGoogleTestFlagsOnly.  In case a user neglects to call InitGoogleTest
+  // this function is also called from RunAllTests.  Since this function can be
+  // called more than once, it has to be idempotent.
+  void PostFlagParsingInit();
+
+  // Gets the random seed used at the start of the current test iteration.
+  int random_seed() const { return random_seed_; }
+
+  // Gets the random number generator.
+  internal::Random* random() { return &random_; }
+
+  // Shuffles all test cases, and the tests within each test case,
+  // making sure that death tests are still run first.
+  void ShuffleTests();
+
+  // Restores the test cases and tests to their order before the first shuffle.
+  void UnshuffleTests();
+
+ private:
+  friend class ::testing::UnitTest;
+
+  // The UnitTest object that owns this implementation object.
+  UnitTest* const parent_;
+
+  // The working directory when the first TEST() or TEST_F() was
+  // executed.
+  internal::FilePath original_working_dir_;
+
+  // The default test part result reporters.
+  DefaultGlobalTestPartResultReporter default_global_test_part_result_reporter_;
+  DefaultPerThreadTestPartResultReporter
+      default_per_thread_test_part_result_reporter_;
+
+  // Points to (but doesn't own) the global test part result reporter.
+  TestPartResultReporterInterface* global_test_part_result_repoter_;
+
+  // Protects read and write access to global_test_part_result_reporter_.
+  internal::Mutex global_test_part_result_reporter_mutex_;
+
+  // Points to (but doesn't own) the per-thread test part result reporter.
+  internal::ThreadLocal<TestPartResultReporterInterface*>
+      per_thread_test_part_result_reporter_;
+
+  // The vector of environments that need to be set-up/torn-down
+  // before/after the tests are run.
+  std::vector<Environment*> environments_;
+
+  // The vector of TestCases in their original order.  It owns the
+  // elements in the vector.
+  std::vector<TestCase*> test_cases_;
+
+  // Provides a level of indirection for the test case list to allow
+  // easy shuffling and restoring the test case order.  The i-th
+  // element of this vector is the index of the i-th test case in the
+  // shuffled order.
+  std::vector<int> test_case_indices_;
+
+#if GTEST_HAS_PARAM_TEST
+  // ParameterizedTestRegistry object used to register value-parameterized
+  // tests.
+  internal::ParameterizedTestCaseRegistry parameterized_test_registry_;
+
+  // Indicates whether RegisterParameterizedTests() has been called already.
+  bool parameterized_tests_registered_;
+#endif  // GTEST_HAS_PARAM_TEST
+
+  // Index of the last death test case registered.  Initially -1.
+  int last_death_test_case_;
+
+  // This points to the TestCase for the currently running test.  It
+  // changes as Google Test goes through one test case after another.
+  // When no test is running, this is set to NULL and Google Test
+  // stores assertion results in ad_hoc_test_result_.  Initially NULL.
+  TestCase* current_test_case_;
+
+  // This points to the TestInfo for the currently running test.  It
+  // changes as Google Test goes through one test after another.  When
+  // no test is running, this is set to NULL and Google Test stores
+  // assertion results in ad_hoc_test_result_.  Initially NULL.
+  TestInfo* current_test_info_;
+
+  // Normally, a user only writes assertions inside a TEST or TEST_F,
+  // or inside a function called by a TEST or TEST_F.  Since Google
+  // Test keeps track of which test is current running, it can
+  // associate such an assertion with the test it belongs to.
+  //
+  // If an assertion is encountered when no TEST or TEST_F is running,
+  // Google Test attributes the assertion result to an imaginary "ad hoc"
+  // test, and records the result in ad_hoc_test_result_.
+  TestResult ad_hoc_test_result_;
+
+  // The list of event listeners that can be used to track events inside
+  // Google Test.
+  TestEventListeners listeners_;
+
+  // The OS stack trace getter.  Will be deleted when the UnitTest
+  // object is destructed.  By default, an OsStackTraceGetter is used,
+  // but the user can set this field to use a custom getter if that is
+  // desired.
+  OsStackTraceGetterInterface* os_stack_trace_getter_;
+
+  // True iff PostFlagParsingInit() has been called.
+  bool post_flag_parse_init_performed_;
+
+  // The random number seed used at the beginning of the test run.
+  int random_seed_;
+
+  // Our random number generator.
+  internal::Random random_;
+
+  // How long the test took to run, in milliseconds.
+  TimeInMillis elapsed_time_;
+
+#if GTEST_HAS_DEATH_TEST
+  // The decomposed components of the gtest_internal_run_death_test flag,
+  // parsed when RUN_ALL_TESTS is called.
+  internal::scoped_ptr<InternalRunDeathTestFlag> internal_run_death_test_flag_;
+  internal::scoped_ptr<internal::DeathTestFactory> death_test_factory_;
+#endif  // GTEST_HAS_DEATH_TEST
+
+  // A per-thread stack of traces created by the SCOPED_TRACE() macro.
+  internal::ThreadLocal<std::vector<TraceInfo> > gtest_trace_stack_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTestImpl);
+};  // class UnitTestImpl
+
+// Convenience function for accessing the global UnitTest
+// implementation object.
+inline UnitTestImpl* GetUnitTestImpl() {
+  return UnitTest::GetInstance()->impl();
+}
+
+// Internal helper functions for implementing the simple regular
+// expression matcher.
+GTEST_API_ bool IsInSet(char ch, const char* str);
+GTEST_API_ bool IsDigit(char ch);
+GTEST_API_ bool IsPunct(char ch);
+GTEST_API_ bool IsRepeat(char ch);
+GTEST_API_ bool IsWhiteSpace(char ch);
+GTEST_API_ bool IsWordChar(char ch);
+GTEST_API_ bool IsValidEscape(char ch);
+GTEST_API_ bool AtomMatchesChar(bool escaped, char pattern, char ch);
+GTEST_API_ bool ValidateRegex(const char* regex);
+GTEST_API_ bool MatchRegexAtHead(const char* regex, const char* str);
+GTEST_API_ bool MatchRepetitionAndRegexAtHead(
+    bool escaped, char ch, char repeat, const char* regex, const char* str);
+GTEST_API_ bool MatchRegexAnywhere(const char* regex, const char* str);
+
+// Parses the command line for Google Test flags, without initializing
+// other parts of Google Test.
+GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, char** argv);
+GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv);
+
+#if GTEST_HAS_DEATH_TEST
+
+// Returns the message describing the last system error, regardless of the
+// platform.
+String GetLastErrnoDescription();
+
+#if GTEST_OS_WINDOWS
+// Provides leak-safe Windows kernel handle ownership.
+class AutoHandle {
+ public:
+  AutoHandle() : handle_(INVALID_HANDLE_VALUE) {}
+  explicit AutoHandle(HANDLE handle) : handle_(handle) {}
+
+  ~AutoHandle() { Reset(); }
+
+  HANDLE Get() const { return handle_; }
+  void Reset() { Reset(INVALID_HANDLE_VALUE); }
+  void Reset(HANDLE handle) {
+    if (handle != handle_) {
+      if (handle_ != INVALID_HANDLE_VALUE)
+        ::CloseHandle(handle_);
+      handle_ = handle;
+    }
+  }
+
+ private:
+  HANDLE handle_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(AutoHandle);
+};
+#endif  // GTEST_OS_WINDOWS
+
+// Attempts to parse a string into a positive integer pointed to by the
+// number parameter.  Returns true if that is possible.
+// GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we can use
+// it here.
+template <typename Integer>
+bool ParseNaturalNumber(const ::std::string& str, Integer* number) {
+  // Fail fast if the given string does not begin with a digit;
+  // this bypasses strtoXXX's "optional leading whitespace and plus
+  // or minus sign" semantics, which are undesirable here.
+  if (str.empty() || !isdigit(str[0])) {
+    return false;
+  }
+  errno = 0;
+
+  char* end;
+  // BiggestConvertible is the largest integer type that system-provided
+  // string-to-number conversion routines can return.
+#if GTEST_OS_WINDOWS && !defined(__GNUC__)
+  // MSVC and C++ Builder define __int64 instead of the standard long long.
+  typedef unsigned __int64 BiggestConvertible;
+  const BiggestConvertible parsed = _strtoui64(str.c_str(), &end, 10);
+#else
+  typedef unsigned long long BiggestConvertible;  // NOLINT
+  const BiggestConvertible parsed = strtoull(str.c_str(), &end, 10);
+#endif  // GTEST_OS_WINDOWS && !defined(__GNUC__)
+  const bool parse_success = *end == '\0' && errno == 0;
+
+  // TODO(vladl@google.com): Convert this to compile time assertion when it is
+  // available.
+  GTEST_CHECK_(sizeof(Integer) <= sizeof(parsed));
+
+  const Integer result = static_cast<Integer>(parsed);
+  if (parse_success && static_cast<BiggestConvertible>(result) == parsed) {
+    *number = result;
+    return true;
+  }
+  return false;
+}
+#endif  // GTEST_HAS_DEATH_TEST
+
+// TestResult contains some private methods that should be hidden from
+// Google Test user but are required for testing. This class allow our tests
+// to access them.
+//
+// This class is supplied only for the purpose of testing Google Test's own
+// constructs. Do not use it in user tests, either directly or indirectly.
+class TestResultAccessor {
+ public:
+  static void RecordProperty(TestResult* test_result,
+                             const TestProperty& property) {
+    test_result->RecordProperty(property);
+  }
+
+  static void ClearTestPartResults(TestResult* test_result) {
+    test_result->ClearTestPartResults();
+  }
+
+  static const std::vector<testing::TestPartResult>& test_part_results(
+      const TestResult& test_result) {
+    return test_result.test_part_results();
+  }
+};
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_SRC_GTEST_INTERNAL_INL_H_
diff --git a/final/utils/unittest/googletest/include/gtest/internal/gtest-internal.h b/final/utils/unittest/googletest/include/gtest/internal/gtest-internal.h
new file mode 100644
index 00000000000..0b90132e785
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/internal/gtest-internal.h
@@ -0,0 +1,943 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file declares functions and macros used internally by
+// Google Test.  They are subject to change without notice.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
+
+#include <gtest/internal/gtest-port.h>
+
+#if GTEST_OS_LINUX
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#endif  // GTEST_OS_LINUX
+
+#include <ctype.h>
+#include <string.h>
+#include <iomanip>
+#include <limits>
+#include <set>
+
+#include <gtest/internal/gtest-string.h>
+#include <gtest/internal/gtest-filepath.h>
+#include <gtest/internal/gtest-type-util.h>
+
+#include "llvm/Support/raw_os_ostream.h"
+
+// Due to C++ preprocessor weirdness, we need double indirection to
+// concatenate two tokens when one of them is __LINE__.  Writing
+//
+//   foo ## __LINE__
+//
+// will result in the token foo__LINE__, instead of foo followed by
+// the current line number.  For more details, see
+// http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.6
+#define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar)
+#define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo ## bar
+
+// Google Test defines the testing::Message class to allow construction of
+// test messages via the << operator.  The idea is that anything
+// streamable to std::ostream can be streamed to a testing::Message.
+// This allows a user to use his own types in Google Test assertions by
+// overloading the << operator.
+//
+// util/gtl/stl_logging-inl.h overloads << for STL containers.  These
+// overloads cannot be defined in the std namespace, as that will be
+// undefined behavior.  Therefore, they are defined in the global
+// namespace instead.
+//
+// C++'s symbol lookup rule (i.e. Koenig lookup) says that these
+// overloads are visible in either the std namespace or the global
+// namespace, but not other namespaces, including the testing
+// namespace which Google Test's Message class is in.
+//
+// To allow STL containers (and other types that has a << operator
+// defined in the global namespace) to be used in Google Test assertions,
+// testing::Message must access the custom << operator from the global
+// namespace.  Hence this helper function.
+//
+// Note: Jeffrey Yasskin suggested an alternative fix by "using
+// ::operator<<;" in the definition of Message's operator<<.  That fix
+// doesn't require a helper function, but unfortunately doesn't
+// compile with MSVC.
+
+// LLVM INTERNAL CHANGE: To allow operator<< to work with both
+// std::ostreams and LLVM's raw_ostreams, we define a special
+// std::ostream with an implicit conversion to raw_ostream& and stream
+// to that.  This causes the compiler to prefer std::ostream overloads
+// but still find raw_ostream& overloads.
+namespace llvm {
+class convertible_fwd_ostream : public std::ostream {
+  std::ostream& os_;
+  raw_os_ostream ros_;
+
+public:
+  convertible_fwd_ostream(std::ostream& os)
+    : std::ostream(os.rdbuf()), os_(os), ros_(*this) {}
+  operator raw_ostream&() { return ros_; }
+};
+}
+template <typename T>
+inline void GTestStreamToHelper(std::ostream* os, const T& val) {
+  llvm::convertible_fwd_ostream cos(*os);
+  cos << val;
+}
+
+namespace testing {
+
+// Forward declaration of classes.
+
+class AssertionResult;                 // Result of an assertion.
+class Message;                         // Represents a failure message.
+class Test;                            // Represents a test.
+class TestInfo;                        // Information about a test.
+class TestPartResult;                  // Result of a test part.
+class UnitTest;                        // A collection of test cases.
+
+namespace internal {
+
+struct TraceInfo;                      // Information about a trace point.
+class ScopedTrace;                     // Implements scoped trace.
+class TestInfoImpl;                    // Opaque implementation of TestInfo
+class UnitTestImpl;                    // Opaque implementation of UnitTest
+
+// How many times InitGoogleTest() has been called.
+extern int g_init_gtest_count;
+
+// The text used in failure messages to indicate the start of the
+// stack trace.
+GTEST_API_ extern const char kStackTraceMarker[];
+
+// A secret type that Google Test users don't know about.  It has no
+// definition on purpose.  Therefore it's impossible to create a
+// Secret object, which is what we want.
+class Secret;
+
+// Two overloaded helpers for checking at compile time whether an
+// expression is a null pointer literal (i.e. NULL or any 0-valued
+// compile-time integral constant).  Their return values have
+// different sizes, so we can use sizeof() to test which version is
+// picked by the compiler.  These helpers have no implementations, as
+// we only need their signatures.
+//
+// Given IsNullLiteralHelper(x), the compiler will pick the first
+// version if x can be implicitly converted to Secret*, and pick the
+// second version otherwise.  Since Secret is a secret and incomplete
+// type, the only expression a user can write that has type Secret* is
+// a null pointer literal.  Therefore, we know that x is a null
+// pointer literal if and only if the first version is picked by the
+// compiler.
+char IsNullLiteralHelper(Secret* p);
+char (&IsNullLiteralHelper(...))[2];  // NOLINT
+
+// A compile-time bool constant that is true if and only if x is a
+// null pointer literal (i.e. NULL or any 0-valued compile-time
+// integral constant).
+#ifdef GTEST_ELLIPSIS_NEEDS_POD_
+// We lose support for NULL detection where the compiler doesn't like
+// passing non-POD classes through ellipsis (...).
+#define GTEST_IS_NULL_LITERAL_(x) false
+#else
+#define GTEST_IS_NULL_LITERAL_(x) \
+    (sizeof(::testing::internal::IsNullLiteralHelper(x)) == 1)
+#endif  // GTEST_ELLIPSIS_NEEDS_POD_
+
+// Appends the user-supplied message to the Google-Test-generated message.
+GTEST_API_ String AppendUserMessage(const String& gtest_msg,
+                                    const Message& user_msg);
+
+// A helper class for creating scoped traces in user programs.
+class GTEST_API_ ScopedTrace {
+ public:
+  // The c'tor pushes the given source file location and message onto
+  // a trace stack maintained by Google Test.
+  ScopedTrace(const char* file, int line, const Message& message);
+
+  // The d'tor pops the info pushed by the c'tor.
+  //
+  // Note that the d'tor is not virtual in order to be efficient.
+  // Don't inherit from ScopedTrace!
+  ~ScopedTrace();
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedTrace);
+} GTEST_ATTRIBUTE_UNUSED_;  // A ScopedTrace object does its job in its
+                            // c'tor and d'tor.  Therefore it doesn't
+                            // need to be used otherwise.
+
+// Converts a streamable value to a String.  A NULL pointer is
+// converted to "(null)".  When the input value is a ::string,
+// ::std::string, ::wstring, or ::std::wstring object, each NUL
+// character in it is replaced with "\\0".
+// Declared here but defined in gtest.h, so that it has access
+// to the definition of the Message class, required by the ARM
+// compiler.
+template <typename T>
+String StreamableToString(const T& streamable);
+
+// Formats a value to be used in a failure message.
+
+#ifdef GTEST_NEEDS_IS_POINTER_
+
+// These are needed as the Nokia Symbian and IBM XL C/C++ compilers
+// cannot decide between const T& and const T* in a function template.
+// These compilers _can_ decide between class template specializations
+// for T and T*, so a tr1::type_traits-like is_pointer works, and we
+// can overload on that.
+
+// This overload makes sure that all pointers (including
+// those to char or wchar_t) are printed as raw pointers.
+template <typename T>
+inline String FormatValueForFailureMessage(internal::true_type /*dummy*/,
+                                           T* pointer) {
+  return StreamableToString(static_cast<const void*>(pointer));
+}
+
+template <typename T>
+inline String FormatValueForFailureMessage(internal::false_type /*dummy*/,
+                                           const T& value) {
+  return StreamableToString(value);
+}
+
+template <typename T>
+inline String FormatForFailureMessage(const T& value) {
+  return FormatValueForFailureMessage(
+      typename internal::is_pointer<T>::type(), value);
+}
+
+#else
+
+// These are needed as the above solution using is_pointer has the
+// limitation that T cannot be a type without external linkage, when
+// compiled using MSVC.
+
+template <typename T>
+inline String FormatForFailureMessage(const T& value) {
+  return StreamableToString(value);
+}
+
+// This overload makes sure that all pointers (including
+// those to char or wchar_t) are printed as raw pointers.
+template <typename T>
+inline String FormatForFailureMessage(T* pointer) {
+  return StreamableToString(static_cast<const void*>(pointer));
+}
+
+#endif  // GTEST_NEEDS_IS_POINTER_
+
+// These overloaded versions handle narrow and wide characters.
+GTEST_API_ String FormatForFailureMessage(char ch);
+GTEST_API_ String FormatForFailureMessage(wchar_t wchar);
+
+// When this operand is a const char* or char*, and the other operand
+// is a ::std::string or ::string, we print this operand as a C string
+// rather than a pointer.  We do the same for wide strings.
+
+// This internal macro is used to avoid duplicated code.
+#define GTEST_FORMAT_IMPL_(operand2_type, operand1_printer)\
+inline String FormatForComparisonFailureMessage(\
+    operand2_type::value_type* str, const operand2_type& /*operand2*/) {\
+  return operand1_printer(str);\
+}\
+inline String FormatForComparisonFailureMessage(\
+    const operand2_type::value_type* str, const operand2_type& /*operand2*/) {\
+  return operand1_printer(str);\
+}
+
+GTEST_FORMAT_IMPL_(::std::string, String::ShowCStringQuoted)
+#if GTEST_HAS_STD_WSTRING
+GTEST_FORMAT_IMPL_(::std::wstring, String::ShowWideCStringQuoted)
+#endif  // GTEST_HAS_STD_WSTRING
+
+#if GTEST_HAS_GLOBAL_STRING
+GTEST_FORMAT_IMPL_(::string, String::ShowCStringQuoted)
+#endif  // GTEST_HAS_GLOBAL_STRING
+#if GTEST_HAS_GLOBAL_WSTRING
+GTEST_FORMAT_IMPL_(::wstring, String::ShowWideCStringQuoted)
+#endif  // GTEST_HAS_GLOBAL_WSTRING
+
+#undef GTEST_FORMAT_IMPL_
+
+// Constructs and returns the message for an equality assertion
+// (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure.
+//
+// The first four parameters are the expressions used in the assertion
+// and their values, as strings.  For example, for ASSERT_EQ(foo, bar)
+// where foo is 5 and bar is 6, we have:
+//
+//   expected_expression: "foo"
+//   actual_expression:   "bar"
+//   expected_value:      "5"
+//   actual_value:        "6"
+//
+// The ignoring_case parameter is true iff the assertion is a
+// *_STRCASEEQ*.  When it's true, the string " (ignoring case)" will
+// be inserted into the message.
+GTEST_API_ AssertionResult EqFailure(const char* expected_expression,
+                                     const char* actual_expression,
+                                     const String& expected_value,
+                                     const String& actual_value,
+                                     bool ignoring_case);
+
+// Constructs a failure message for Boolean assertions such as EXPECT_TRUE.
+GTEST_API_ String GetBoolAssertionFailureMessage(
+    const AssertionResult& assertion_result,
+    const char* expression_text,
+    const char* actual_predicate_value,
+    const char* expected_predicate_value);
+
+// This template class represents an IEEE floating-point number
+// (either single-precision or double-precision, depending on the
+// template parameters).
+//
+// The purpose of this class is to do more sophisticated number
+// comparison.  (Due to round-off error, etc, it's very unlikely that
+// two floating-points will be equal exactly.  Hence a naive
+// comparison by the == operation often doesn't work.)
+//
+// Format of IEEE floating-point:
+//
+//   The most-significant bit being the leftmost, an IEEE
+//   floating-point looks like
+//
+//     sign_bit exponent_bits fraction_bits
+//
+//   Here, sign_bit is a single bit that designates the sign of the
+//   number.
+//
+//   For float, there are 8 exponent bits and 23 fraction bits.
+//
+//   For double, there are 11 exponent bits and 52 fraction bits.
+//
+//   More details can be found at
+//   http://en.wikipedia.org/wiki/IEEE_floating-point_standard.
+//
+// Template parameter:
+//
+//   RawType: the raw floating-point type (either float or double)
+template <typename RawType>
+class FloatingPoint {
+ public:
+  // Defines the unsigned integer type that has the same size as the
+  // floating point number.
+  typedef typename TypeWithSize<sizeof(RawType)>::UInt Bits;
+
+  // Constants.
+
+  // # of bits in a number.
+  static const size_t kBitCount = 8*sizeof(RawType);
+
+  // # of fraction bits in a number.
+  static const size_t kFractionBitCount =
+    std::numeric_limits<RawType>::digits - 1;
+
+  // # of exponent bits in a number.
+  static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount;
+
+  // The mask for the sign bit.
+  static const Bits kSignBitMask = static_cast<Bits>(1) << (kBitCount - 1);
+
+  // The mask for the fraction bits.
+  static const Bits kFractionBitMask =
+    ~static_cast<Bits>(0) >> (kExponentBitCount + 1);
+
+  // The mask for the exponent bits.
+  static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask);
+
+  // How many ULP's (Units in the Last Place) we want to tolerate when
+  // comparing two numbers.  The larger the value, the more error we
+  // allow.  A 0 value means that two numbers must be exactly the same
+  // to be considered equal.
+  //
+  // The maximum error of a single floating-point operation is 0.5
+  // units in the last place.  On Intel CPU's, all floating-point
+  // calculations are done with 80-bit precision, while double has 64
+  // bits.  Therefore, 4 should be enough for ordinary use.
+  //
+  // See the following article for more details on ULP:
+  // http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm.
+  static const size_t kMaxUlps = 4;
+
+  // Constructs a FloatingPoint from a raw floating-point number.
+  //
+  // On an Intel CPU, passing a non-normalized NAN (Not a Number)
+  // around may change its bits, although the new value is guaranteed
+  // to be also a NAN.  Therefore, don't expect this constructor to
+  // preserve the bits in x when x is a NAN.
+  explicit FloatingPoint(const RawType& x) { u_.value_ = x; }
+
+  // Static methods
+
+  // Reinterprets a bit pattern as a floating-point number.
+  //
+  // This function is needed to test the AlmostEquals() method.
+  static RawType ReinterpretBits(const Bits bits) {
+    FloatingPoint fp(0);
+    fp.u_.bits_ = bits;
+    return fp.u_.value_;
+  }
+
+  // Returns the floating-point number that represent positive infinity.
+  static RawType Infinity() {
+    return ReinterpretBits(kExponentBitMask);
+  }
+
+  // Non-static methods
+
+  // Returns the bits that represents this number.
+  const Bits &bits() const { return u_.bits_; }
+
+  // Returns the exponent bits of this number.
+  Bits exponent_bits() const { return kExponentBitMask & u_.bits_; }
+
+  // Returns the fraction bits of this number.
+  Bits fraction_bits() const { return kFractionBitMask & u_.bits_; }
+
+  // Returns the sign bit of this number.
+  Bits sign_bit() const { return kSignBitMask & u_.bits_; }
+
+  // Returns true iff this is NAN (not a number).
+  bool is_nan() const {
+    // It's a NAN if the exponent bits are all ones and the fraction
+    // bits are not entirely zeros.
+    return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0);
+  }
+
+  // Returns true iff this number is at most kMaxUlps ULP's away from
+  // rhs.  In particular, this function:
+  //
+  //   - returns false if either number is (or both are) NAN.
+  //   - treats really large numbers as almost equal to infinity.
+  //   - thinks +0.0 and -0.0 are 0 DLP's apart.
+  bool AlmostEquals(const FloatingPoint& rhs) const {
+    // The IEEE standard says that any comparison operation involving
+    // a NAN must return false.
+    if (is_nan() || rhs.is_nan()) return false;
+
+    return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_)
+        <= kMaxUlps;
+  }
+
+ private:
+  // The data type used to store the actual floating-point number.
+  union FloatingPointUnion {
+    RawType value_;  // The raw floating-point number.
+    Bits bits_;      // The bits that represent the number.
+  };
+
+  // Converts an integer from the sign-and-magnitude representation to
+  // the biased representation.  More precisely, let N be 2 to the
+  // power of (kBitCount - 1), an integer x is represented by the
+  // unsigned number x + N.
+  //
+  // For instance,
+  //
+  //   -N + 1 (the most negative number representable using
+  //          sign-and-magnitude) is represented by 1;
+  //   0      is represented by N; and
+  //   N - 1  (the biggest number representable using
+  //          sign-and-magnitude) is represented by 2N - 1.
+  //
+  // Read http://en.wikipedia.org/wiki/Signed_number_representations
+  // for more details on signed number representations.
+  static Bits SignAndMagnitudeToBiased(const Bits &sam) {
+    if (kSignBitMask & sam) {
+      // sam represents a negative number.
+      return ~sam + 1;
+    } else {
+      // sam represents a positive number.
+      return kSignBitMask | sam;
+    }
+  }
+
+  // Given two numbers in the sign-and-magnitude representation,
+  // returns the distance between them as an unsigned number.
+  static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1,
+                                                     const Bits &sam2) {
+    const Bits biased1 = SignAndMagnitudeToBiased(sam1);
+    const Bits biased2 = SignAndMagnitudeToBiased(sam2);
+    return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1);
+  }
+
+  FloatingPointUnion u_;
+};
+
+// Typedefs the instances of the FloatingPoint template class that we
+// care to use.
+typedef FloatingPoint<float> Float;
+typedef FloatingPoint<double> Double;
+
+// In order to catch the mistake of putting tests that use different
+// test fixture classes in the same test case, we need to assign
+// unique IDs to fixture classes and compare them.  The TypeId type is
+// used to hold such IDs.  The user should treat TypeId as an opaque
+// type: the only operation allowed on TypeId values is to compare
+// them for equality using the == operator.
+typedef const void* TypeId;
+
+template <typename T>
+class TypeIdHelper {
+ public:
+  // dummy_ must not have a const type.  Otherwise an overly eager
+  // compiler (e.g. MSVC 7.1 & 8.0) may try to merge
+  // TypeIdHelper<T>::dummy_ for different Ts as an "optimization".
+  static bool dummy_;
+};
+
+template <typename T>
+bool TypeIdHelper<T>::dummy_ = false;
+
+// GetTypeId<T>() returns the ID of type T.  Different values will be
+// returned for different types.  Calling the function twice with the
+// same type argument is guaranteed to return the same ID.
+template <typename T>
+TypeId GetTypeId() {
+  // The compiler is required to allocate a different
+  // TypeIdHelper<T>::dummy_ variable for each T used to instantiate
+  // the template.  Therefore, the address of dummy_ is guaranteed to
+  // be unique.
+  return &(TypeIdHelper<T>::dummy_);
+}
+
+// Returns the type ID of ::testing::Test.  Always call this instead
+// of GetTypeId< ::testing::Test>() to get the type ID of
+// ::testing::Test, as the latter may give the wrong result due to a
+// suspected linker bug when compiling Google Test as a Mac OS X
+// framework.
+GTEST_API_ TypeId GetTestTypeId();
+
+// Defines the abstract factory interface that creates instances
+// of a Test object.
+class TestFactoryBase {
+ public:
+  virtual ~TestFactoryBase() {}
+
+  // Creates a test instance to run. The instance is both created and destroyed
+  // within TestInfoImpl::Run()
+  virtual Test* CreateTest() = 0;
+
+ protected:
+  TestFactoryBase() {}
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestFactoryBase);
+};
+
+// This class provides implementation of TeastFactoryBase interface.
+// It is used in TEST and TEST_F macros.
+template <class TestClass>
+class TestFactoryImpl : public TestFactoryBase {
+ public:
+  virtual Test* CreateTest() { return new TestClass; }
+};
+
+#if GTEST_OS_WINDOWS
+
+// Predicate-formatters for implementing the HRESULT checking macros
+// {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}
+// We pass a long instead of HRESULT to avoid causing an
+// include dependency for the HRESULT type.
+GTEST_API_ AssertionResult IsHRESULTSuccess(const char* expr,
+                                            long hr);  // NOLINT
+GTEST_API_ AssertionResult IsHRESULTFailure(const char* expr,
+                                            long hr);  // NOLINT
+
+#endif  // GTEST_OS_WINDOWS
+
+// Formats a source file path and a line number as they would appear
+// in a compiler error message.
+inline String FormatFileLocation(const char* file, int line) {
+  const char* const file_name = file == NULL ? "unknown file" : file;
+  if (line < 0) {
+    return String::Format("%s:", file_name);
+  }
+#ifdef _MSC_VER
+  return String::Format("%s(%d):", file_name, line);
+#else
+  return String::Format("%s:%d:", file_name, line);
+#endif  // _MSC_VER
+}
+
+// Types of SetUpTestCase() and TearDownTestCase() functions.
+typedef void (*SetUpTestCaseFunc)();
+typedef void (*TearDownTestCaseFunc)();
+
+// Creates a new TestInfo object and registers it with Google Test;
+// returns the created object.
+//
+// Arguments:
+//
+//   test_case_name:   name of the test case
+//   name:             name of the test
+//   test_case_comment: a comment on the test case that will be included in
+//                      the test output
+//   comment:          a comment on the test that will be included in the
+//                     test output
+//   fixture_class_id: ID of the test fixture class
+//   set_up_tc:        pointer to the function that sets up the test case
+//   tear_down_tc:     pointer to the function that tears down the test case
+//   factory:          pointer to the factory that creates a test object.
+//                     The newly created TestInfo instance will assume
+//                     ownership of the factory object.
+GTEST_API_ TestInfo* MakeAndRegisterTestInfo(
+    const char* test_case_name, const char* name,
+    const char* test_case_comment, const char* comment,
+    TypeId fixture_class_id,
+    SetUpTestCaseFunc set_up_tc,
+    TearDownTestCaseFunc tear_down_tc,
+    TestFactoryBase* factory);
+
+// If *pstr starts with the given prefix, modifies *pstr to be right
+// past the prefix and returns true; otherwise leaves *pstr unchanged
+// and returns false.  None of pstr, *pstr, and prefix can be NULL.
+bool SkipPrefix(const char* prefix, const char** pstr);
+
+#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+
+// State of the definition of a type-parameterized test case.
+class GTEST_API_ TypedTestCasePState {
+ public:
+  TypedTestCasePState() : registered_(false) {}
+
+  // Adds the given test name to defined_test_names_ and return true
+  // if the test case hasn't been registered; otherwise aborts the
+  // program.
+  bool AddTestName(const char* file, int line, const char* case_name,
+                   const char* test_name) {
+    if (registered_) {
+      fprintf(stderr, "%s Test %s must be defined before "
+              "REGISTER_TYPED_TEST_CASE_P(%s, ...).\n",
+              FormatFileLocation(file, line).c_str(), test_name, case_name);
+      fflush(stderr);
+      posix::Abort();
+    }
+    defined_test_names_.insert(test_name);
+    return true;
+  }
+
+  // Verifies that registered_tests match the test names in
+  // defined_test_names_; returns registered_tests if successful, or
+  // aborts the program otherwise.
+  const char* VerifyRegisteredTestNames(
+      const char* file, int line, const char* registered_tests);
+
+ private:
+  bool registered_;
+  ::std::set<const char*> defined_test_names_;
+};
+
+// Skips to the first non-space char after the first comma in 'str';
+// returns NULL if no comma is found in 'str'.
+inline const char* SkipComma(const char* str) {
+  const char* comma = strchr(str, ',');
+  if (comma == NULL) {
+    return NULL;
+  }
+  while (isspace(*(++comma))) {}
+  return comma;
+}
+
+// Returns the prefix of 'str' before the first comma in it; returns
+// the entire string if it contains no comma.
+inline String GetPrefixUntilComma(const char* str) {
+  const char* comma = strchr(str, ',');
+  return comma == NULL ? String(str) : String(str, comma - str);
+}
+
+// TypeParameterizedTest<Fixture, TestSel, Types>::Register()
+// registers a list of type-parameterized tests with Google Test.  The
+// return value is insignificant - we just need to return something
+// such that we can call this function in a namespace scope.
+//
+// Implementation note: The GTEST_TEMPLATE_ macro declares a template
+// template parameter.  It's defined in gtest-type-util.h.
+template <GTEST_TEMPLATE_ Fixture, class TestSel, typename Types>
+class TypeParameterizedTest {
+ public:
+  // 'index' is the index of the test in the type list 'Types'
+  // specified in INSTANTIATE_TYPED_TEST_CASE_P(Prefix, TestCase,
+  // Types).  Valid values for 'index' are [0, N - 1] where N is the
+  // length of Types.
+  static bool Register(const char* prefix, const char* case_name,
+                       const char* test_names, int index) {
+    typedef typename Types::Head Type;
+    typedef Fixture<Type> FixtureClass;
+    typedef typename GTEST_BIND_(TestSel, Type) TestClass;
+
+    // First, registers the first type-parameterized test in the type
+    // list.
+    MakeAndRegisterTestInfo(
+        String::Format("%s%s%s/%d", prefix, prefix[0] == '\0' ? "" : "/",
+                       case_name, index).c_str(),
+        GetPrefixUntilComma(test_names).c_str(),
+        String::Format("TypeParam = %s", GetTypeName<Type>().c_str()).c_str(),
+        "",
+        GetTypeId<FixtureClass>(),
+        TestClass::SetUpTestCase,
+        TestClass::TearDownTestCase,
+        new TestFactoryImpl<TestClass>);
+
+    // Next, recurses (at compile time) with the tail of the type list.
+    return TypeParameterizedTest<Fixture, TestSel, typename Types::Tail>
+        ::Register(prefix, case_name, test_names, index + 1);
+  }
+};
+
+// The base case for the compile time recursion.
+template <GTEST_TEMPLATE_ Fixture, class TestSel>
+class TypeParameterizedTest<Fixture, TestSel, Types0> {
+ public:
+  static bool Register(const char* /*prefix*/, const char* /*case_name*/,
+                       const char* /*test_names*/, int /*index*/) {
+    return true;
+  }
+};
+
+// TypeParameterizedTestCase<Fixture, Tests, Types>::Register()
+// registers *all combinations* of 'Tests' and 'Types' with Google
+// Test.  The return value is insignificant - we just need to return
+// something such that we can call this function in a namespace scope.
+template <GTEST_TEMPLATE_ Fixture, typename Tests, typename Types>
+class TypeParameterizedTestCase {
+ public:
+  static bool Register(const char* prefix, const char* case_name,
+                       const char* test_names) {
+    typedef typename Tests::Head Head;
+
+    // First, register the first test in 'Test' for each type in 'Types'.
+    TypeParameterizedTest<Fixture, Head, Types>::Register(
+        prefix, case_name, test_names, 0);
+
+    // Next, recurses (at compile time) with the tail of the test list.
+    return TypeParameterizedTestCase<Fixture, typename Tests::Tail, Types>
+        ::Register(prefix, case_name, SkipComma(test_names));
+  }
+};
+
+// The base case for the compile time recursion.
+template <GTEST_TEMPLATE_ Fixture, typename Types>
+class TypeParameterizedTestCase<Fixture, Templates0, Types> {
+ public:
+  static bool Register(const char* /*prefix*/, const char* /*case_name*/,
+                       const char* /*test_names*/) {
+    return true;
+  }
+};
+
+#endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+
+// Returns the current OS stack trace as a String.
+//
+// The maximum number of stack frames to be included is specified by
+// the gtest_stack_trace_depth flag.  The skip_count parameter
+// specifies the number of top frames to be skipped, which doesn't
+// count against the number of frames to be included.
+//
+// For example, if Foo() calls Bar(), which in turn calls
+// GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in
+// the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't.
+GTEST_API_ String GetCurrentOsStackTraceExceptTop(UnitTest* unit_test,
+                                                  int skip_count);
+
+// Helpers for suppressing warnings on unreachable code or constant
+// condition.
+
+// Always returns true.
+GTEST_API_ bool AlwaysTrue();
+
+// Always returns false.
+inline bool AlwaysFalse() { return !AlwaysTrue(); }
+
+// A simple Linear Congruential Generator for generating random
+// numbers with a uniform distribution.  Unlike rand() and srand(), it
+// doesn't use global state (and therefore can't interfere with user
+// code).  Unlike rand_r(), it's portable.  An LCG isn't very random,
+// but it's good enough for our purposes.
+class GTEST_API_ Random {
+ public:
+  static const UInt32 kMaxRange = 1u << 31;
+
+  explicit Random(UInt32 seed) : state_(seed) {}
+
+  void Reseed(UInt32 seed) { state_ = seed; }
+
+  // Generates a random number from [0, range).  Crashes if 'range' is
+  // 0 or greater than kMaxRange.
+  UInt32 Generate(UInt32 range);
+
+ private:
+  UInt32 state_;
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(Random);
+};
+
+}  // namespace internal
+}  // namespace testing
+
+#define GTEST_MESSAGE_(message, result_type) \
+  ::testing::internal::AssertHelper(result_type, __FILE__, __LINE__, message) \
+    = ::testing::Message()
+
+#define GTEST_FATAL_FAILURE_(message) \
+  return GTEST_MESSAGE_(message, ::testing::TestPartResult::kFatalFailure)
+
+#define GTEST_NONFATAL_FAILURE_(message) \
+  GTEST_MESSAGE_(message, ::testing::TestPartResult::kNonFatalFailure)
+
+#define GTEST_SUCCESS_(message) \
+  GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess)
+
+// Suppresses MSVC warnings 4072 (unreachable code) for the code following
+// statement if it returns or throws (or doesn't return or throw in some
+// situations).
+#define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \
+  if (::testing::internal::AlwaysTrue()) { statement; }
+
+#define GTEST_TEST_THROW_(statement, expected_exception, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (const char* gtest_msg = "") { \
+    bool gtest_caught_expected = false; \
+    try { \
+      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+    } \
+    catch (expected_exception const&) { \
+      gtest_caught_expected = true; \
+    } \
+    catch (...) { \
+      gtest_msg = "Expected: " #statement " throws an exception of type " \
+                  #expected_exception ".\n  Actual: it throws a different " \
+                  "type."; \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
+    } \
+    if (!gtest_caught_expected) { \
+      gtest_msg = "Expected: " #statement " throws an exception of type " \
+                  #expected_exception ".\n  Actual: it throws nothing."; \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
+    } \
+  } else \
+    GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__): \
+      fail(gtest_msg)
+
+#define GTEST_TEST_NO_THROW_(statement, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (const char* gtest_msg = "") { \
+    try { \
+      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+    } \
+    catch (...) { \
+      gtest_msg = "Expected: " #statement " doesn't throw an exception.\n" \
+                  "  Actual: it throws."; \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \
+    } \
+  } else \
+    GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__): \
+      fail(gtest_msg)
+
+#define GTEST_TEST_ANY_THROW_(statement, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (const char* gtest_msg = "") { \
+    bool gtest_caught_any = false; \
+    try { \
+      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+    } \
+    catch (...) { \
+      gtest_caught_any = true; \
+    } \
+    if (!gtest_caught_any) { \
+      gtest_msg = "Expected: " #statement " throws an exception.\n" \
+                  "  Actual: it doesn't."; \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__); \
+    } \
+  } else \
+    GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__): \
+      fail(gtest_msg)
+
+
+// Implements Boolean test assertions such as EXPECT_TRUE. expression can be
+// either a boolean expression or an AssertionResult. text is a textual
+// represenation of expression as it was passed into the EXPECT_TRUE.
+#define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (const ::testing::AssertionResult gtest_ar_ = \
+      ::testing::AssertionResult(expression)) \
+    ; \
+  else \
+    fail(::testing::internal::GetBoolAssertionFailureMessage(\
+        gtest_ar_, text, #actual, #expected).c_str())
+
+#define GTEST_TEST_NO_FATAL_FAILURE_(statement, fail) \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+  if (const char* gtest_msg = "") { \
+    ::testing::internal::HasNewFatalFailureHelper gtest_fatal_failure_checker; \
+    GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
+    if (gtest_fatal_failure_checker.has_new_fatal_failure()) { \
+      gtest_msg = "Expected: " #statement " doesn't generate new fatal " \
+                  "failures in the current thread.\n" \
+                  "  Actual: it does."; \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__); \
+    } \
+  } else \
+    GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__): \
+      fail(gtest_msg)
+
+// Expands to the name of the class that implements the given test.
+#define GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \
+  test_case_name##_##test_name##_Test
+
+// Helper macro for defining tests.
+#define GTEST_TEST_(test_case_name, test_name, parent_class, parent_id)\
+class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class {\
+ public:\
+  GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {}\
+ private:\
+  virtual void TestBody();\
+  static ::testing::TestInfo* const test_info_;\
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(\
+      GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\
+};\
+\
+::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\
+  ::test_info_ =\
+    ::testing::internal::MakeAndRegisterTestInfo(\
+        #test_case_name, #test_name, "", "", \
+        (parent_id), \
+        parent_class::SetUpTestCase, \
+        parent_class::TearDownTestCase, \
+        new ::testing::internal::TestFactoryImpl<\
+            GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\
+void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody()
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_
diff --git a/final/utils/unittest/googletest/include/gtest/internal/gtest-linked_ptr.h b/final/utils/unittest/googletest/include/gtest/internal/gtest-linked_ptr.h
new file mode 100644
index 00000000000..2404ea88242
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/internal/gtest-linked_ptr.h
@@ -0,0 +1,243 @@
+// Copyright 2003 Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: Dan Egnor (egnor@google.com)
+//
+// A "smart" pointer type with reference tracking.  Every pointer to a
+// particular object is kept on a circular linked list.  When the last pointer
+// to an object is destroyed or reassigned, the object is deleted.
+//
+// Used properly, this deletes the object when the last reference goes away.
+// There are several caveats:
+// - Like all reference counting schemes, cycles lead to leaks.
+// - Each smart pointer is actually two pointers (8 bytes instead of 4).
+// - Every time a pointer is assigned, the entire list of pointers to that
+//   object is traversed.  This class is therefore NOT SUITABLE when there
+//   will often be more than two or three pointers to a particular object.
+// - References are only tracked as long as linked_ptr<> objects are copied.
+//   If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS
+//   will happen (double deletion).
+//
+// A good use of this class is storing object references in STL containers.
+// You can safely put linked_ptr<> in a vector<>.
+// Other uses may not be as good.
+//
+// Note: If you use an incomplete type with linked_ptr<>, the class
+// *containing* linked_ptr<> must have a constructor and destructor (even
+// if they do nothing!).
+//
+// Bill Gibbons suggested we use something like this.
+//
+// Thread Safety:
+//   Unlike other linked_ptr implementations, in this implementation
+//   a linked_ptr object is thread-safe in the sense that:
+//     - it's safe to copy linked_ptr objects concurrently,
+//     - it's safe to copy *from* a linked_ptr and read its underlying
+//       raw pointer (e.g. via get()) concurrently, and
+//     - it's safe to write to two linked_ptrs that point to the same
+//       shared object concurrently.
+// TODO(wan@google.com): rename this to safe_linked_ptr to avoid
+// confusion with normal linked_ptr.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
+
+#include <stdlib.h>
+#include <assert.h>
+
+#include <gtest/internal/gtest-port.h>
+
+namespace testing {
+namespace internal {
+
+// Protects copying of all linked_ptr objects.
+GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex);
+
+// This is used internally by all instances of linked_ptr<>.  It needs to be
+// a non-template class because different types of linked_ptr<> can refer to
+// the same object (linked_ptr<Superclass>(obj) vs linked_ptr<Subclass>(obj)).
+// So, it needs to be possible for different types of linked_ptr to participate
+// in the same circular linked list, so we need a single class type here.
+//
+// DO NOT USE THIS CLASS DIRECTLY YOURSELF.  Use linked_ptr<T>.
+class linked_ptr_internal {
+ public:
+  // Create a new circle that includes only this instance.
+  void join_new() {
+    next_ = this;
+  }
+
+  // Many linked_ptr operations may change p.link_ for some linked_ptr
+  // variable p in the same circle as this object.  Therefore we need
+  // to prevent two such operations from occurring concurrently.
+  //
+  // Note that different types of linked_ptr objects can coexist in a
+  // circle (e.g. linked_ptr<Base>, linked_ptr<Derived1>, and
+  // linked_ptr<Derived2>).  Therefore we must use a single mutex to
+  // protect all linked_ptr objects.  This can create serious
+  // contention in production code, but is acceptable in a testing
+  // framework.
+
+  // Join an existing circle.
+  // L < g_linked_ptr_mutex
+  void join(linked_ptr_internal const* ptr) {
+    MutexLock lock(&g_linked_ptr_mutex);
+
+    linked_ptr_internal const* p = ptr;
+    while (p->next_ != ptr) p = p->next_;
+    p->next_ = this;
+    next_ = ptr;
+  }
+
+  // Leave whatever circle we're part of.  Returns true if we were the
+  // last member of the circle.  Once this is done, you can join() another.
+  // L < g_linked_ptr_mutex
+  bool depart() {
+    MutexLock lock(&g_linked_ptr_mutex);
+
+    if (next_ == this) return true;
+    linked_ptr_internal const* p = next_;
+    while (p->next_ != this) p = p->next_;
+    p->next_ = next_;
+    return false;
+  }
+
+ private:
+  mutable linked_ptr_internal const* next_;
+};
+
+template <typename T>
+class linked_ptr {
+ public:
+  typedef T element_type;
+
+  // Take over ownership of a raw pointer.  This should happen as soon as
+  // possible after the object is created.
+  explicit linked_ptr(T* ptr = NULL) { capture(ptr); }
+  ~linked_ptr() { depart(); }
+
+  // Copy an existing linked_ptr<>, adding ourselves to the list of references.
+  template <typename U> linked_ptr(linked_ptr<U> const& ptr) { copy(&ptr); }
+  linked_ptr(linked_ptr const& ptr) {  // NOLINT
+    assert(&ptr != this);
+    copy(&ptr);
+  }
+
+  // Assignment releases the old value and acquires the new.
+  template <typename U> linked_ptr& operator=(linked_ptr<U> const& ptr) {
+    depart();
+    copy(&ptr);
+    return *this;
+  }
+
+  linked_ptr& operator=(linked_ptr const& ptr) {
+    if (&ptr != this) {
+      depart();
+      copy(&ptr);
+    }
+    return *this;
+  }
+
+  // Smart pointer members.
+  void reset(T* ptr = NULL) {
+    depart();
+    capture(ptr);
+  }
+  T* get() const { return value_; }
+  T* operator->() const { return value_; }
+  T& operator*() const { return *value_; }
+  // Release ownership of the pointed object and returns it.
+  // Sole ownership by this linked_ptr object is required.
+  T* release() {
+    bool last = link_.depart();
+    (void) last;
+    assert(last);
+    T* v = value_;
+    value_ = NULL;
+    return v;
+  }
+
+  bool operator==(T* p) const { return value_ == p; }
+  bool operator!=(T* p) const { return value_ != p; }
+  template <typename U>
+  bool operator==(linked_ptr<U> const& ptr) const {
+    return value_ == ptr.get();
+  }
+  template <typename U>
+  bool operator!=(linked_ptr<U> const& ptr) const {
+    return value_ != ptr.get();
+  }
+
+ private:
+  template <typename U>
+  friend class linked_ptr;
+
+  T* value_;
+  linked_ptr_internal link_;
+
+  void depart() {
+    if (link_.depart()) delete value_;
+  }
+
+  void capture(T* ptr) {
+    value_ = ptr;
+    link_.join_new();
+  }
+
+  template <typename U> void copy(linked_ptr<U> const* ptr) {
+    value_ = ptr->get();
+    if (value_)
+      link_.join(&ptr->link_);
+    else
+      link_.join_new();
+  }
+};
+
+template<typename T> inline
+bool operator==(T* ptr, const linked_ptr<T>& x) {
+  return ptr == x.get();
+}
+
+template<typename T> inline
+bool operator!=(T* ptr, const linked_ptr<T>& x) {
+  return ptr != x.get();
+}
+
+// A function to convert T* into linked_ptr<T>
+// Doing e.g. make_linked_ptr(new FooBarBaz<type>(arg)) is a shorter notation
+// for linked_ptr<FooBarBaz<type> >(new FooBarBaz<type>(arg))
+template <typename T>
+linked_ptr<T> make_linked_ptr(T* ptr) {
+  return linked_ptr<T>(ptr);
+}
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_
diff --git a/final/utils/unittest/googletest/include/gtest/internal/gtest-param-util-generated.h b/final/utils/unittest/googletest/include/gtest/internal/gtest-param-util-generated.h
new file mode 100644
index 00000000000..ab4ab566256
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/internal/gtest-param-util-generated.h
@@ -0,0 +1,4820 @@
+// This file was GENERATED by a script.  DO NOT EDIT BY HAND!!!
+
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: vladl@google.com (Vlad Losev)
+
+// Type and function utilities for implementing parameterized tests.
+// This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
+//
+// Currently Google Test supports at most 50 arguments in Values,
+// and at most 10 arguments in Combine. Please contact
+// googletestframework@googlegroups.com if you need more.
+// Please note that the number of arguments to Combine is limited
+// by the maximum arity of the implementation of tr1::tuple which is
+// currently set at 10.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
+
+// scripts/fuse_gtest.py depends on gtest's own header being #included
+// *unconditionally*.  Therefore these #includes cannot be moved
+// inside #if GTEST_HAS_PARAM_TEST.
+#include <gtest/internal/gtest-param-util.h>
+#include <gtest/internal/gtest-port.h>
+
+#if GTEST_HAS_PARAM_TEST
+
+namespace testing {
+
+// Forward declarations of ValuesIn(), which is implemented in
+// include/gtest/gtest-param-test.h.
+template <typename ForwardIterator>
+internal::ParamGenerator<
+    typename ::std::iterator_traits<ForwardIterator>::value_type> ValuesIn(
+        ForwardIterator begin, ForwardIterator end);
+
+template <typename T, size_t N>
+internal::ParamGenerator<T> ValuesIn(const T (&array)[N]);
+
+template <class Container>
+internal::ParamGenerator<typename Container::value_type> ValuesIn(
+    const Container& container);
+
+namespace internal {
+
+// Used in the Values() function to provide polymorphic capabilities.
+template <typename T1>
+class ValueArray1 {
+ public:
+  explicit ValueArray1(T1 v1) : v1_(v1) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const { return ValuesIn(&v1_, &v1_ + 1); }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray1& other);
+
+  const T1 v1_;
+};
+
+template <typename T1, typename T2>
+class ValueArray2 {
+ public:
+  ValueArray2(T1 v1, T2 v2) : v1_(v1), v2_(v2) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray2& other);
+
+  const T1 v1_;
+  const T2 v2_;
+};
+
+template <typename T1, typename T2, typename T3>
+class ValueArray3 {
+ public:
+  ValueArray3(T1 v1, T2 v2, T3 v3) : v1_(v1), v2_(v2), v3_(v3) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray3& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4>
+class ValueArray4 {
+ public:
+  ValueArray4(T1 v1, T2 v2, T3 v3, T4 v4) : v1_(v1), v2_(v2), v3_(v3),
+      v4_(v4) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray4& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+class ValueArray5 {
+ public:
+  ValueArray5(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) : v1_(v1), v2_(v2), v3_(v3),
+      v4_(v4), v5_(v5) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray5& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6>
+class ValueArray6 {
+ public:
+  ValueArray6(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6) : v1_(v1), v2_(v2),
+      v3_(v3), v4_(v4), v5_(v5), v6_(v6) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray6& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7>
+class ValueArray7 {
+ public:
+  ValueArray7(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7) : v1_(v1),
+      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray7& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8>
+class ValueArray8 {
+ public:
+  ValueArray8(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7,
+      T8 v8) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray8& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9>
+class ValueArray9 {
+ public:
+  ValueArray9(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8,
+      T9 v9) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray9& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10>
+class ValueArray10 {
+ public:
+  ValueArray10(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray10& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11>
+class ValueArray11 {
+ public:
+  ValueArray11(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
+      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray11& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12>
+class ValueArray12 {
+ public:
+  ValueArray12(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
+      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray12& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13>
+class ValueArray13 {
+ public:
+  ValueArray13(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
+      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
+      v12_(v12), v13_(v13) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray13& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14>
+class ValueArray14 {
+ public:
+  ValueArray14(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) : v1_(v1), v2_(v2), v3_(v3),
+      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray14& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15>
+class ValueArray15 {
+ public:
+  ValueArray15(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) : v1_(v1), v2_(v2),
+      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray15& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16>
+class ValueArray16 {
+ public:
+  ValueArray16(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16) : v1_(v1),
+      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
+      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
+      v16_(v16) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray16& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17>
+class ValueArray17 {
+ public:
+  ValueArray17(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16,
+      T17 v17) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray17& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18>
+class ValueArray18 {
+ public:
+  ValueArray18(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray18& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19>
+class ValueArray19 {
+ public:
+  ValueArray19(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
+      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
+      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray19& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20>
+class ValueArray20 {
+ public:
+  ValueArray20(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
+      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
+      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
+      v19_(v19), v20_(v20) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray20& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21>
+class ValueArray21 {
+ public:
+  ValueArray21(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
+      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
+      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
+      v18_(v18), v19_(v19), v20_(v20), v21_(v21) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray21& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22>
+class ValueArray22 {
+ public:
+  ValueArray22(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22) : v1_(v1), v2_(v2), v3_(v3),
+      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray22& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23>
+class ValueArray23 {
+ public:
+  ValueArray23(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23) : v1_(v1), v2_(v2),
+      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+      v23_(v23) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_,
+        v23_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray23& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24>
+class ValueArray24 {
+ public:
+  ValueArray24(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24) : v1_(v1),
+      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
+      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
+      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
+      v22_(v22), v23_(v23), v24_(v24) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray24& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25>
+class ValueArray25 {
+ public:
+  ValueArray25(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24,
+      T25 v25) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray25& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26>
+class ValueArray26 {
+ public:
+  ValueArray26(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray26& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27>
+class ValueArray27 {
+ public:
+  ValueArray27(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
+      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
+      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
+      v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
+      v26_(v26), v27_(v27) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray27& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28>
+class ValueArray28 {
+ public:
+  ValueArray28(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
+      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
+      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
+      v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
+      v25_(v25), v26_(v26), v27_(v27), v28_(v28) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray28& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29>
+class ValueArray29 {
+ public:
+  ValueArray29(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
+      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
+      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
+      v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
+      v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray29& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30>
+class ValueArray30 {
+ public:
+  ValueArray30(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) : v1_(v1), v2_(v2), v3_(v3),
+      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+      v29_(v29), v30_(v30) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray30& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31>
+class ValueArray31 {
+ public:
+  ValueArray31(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) : v1_(v1), v2_(v2),
+      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+      v29_(v29), v30_(v30), v31_(v31) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray31& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32>
+class ValueArray32 {
+ public:
+  ValueArray32(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32) : v1_(v1),
+      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
+      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
+      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
+      v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
+      v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray32& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33>
+class ValueArray33 {
+ public:
+  ValueArray33(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32,
+      T33 v33) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+      v33_(v33) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray33& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34>
+class ValueArray34 {
+ public:
+  ValueArray34(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+      v33_(v33), v34_(v34) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray34& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35>
+class ValueArray35 {
+ public:
+  ValueArray35(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
+      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
+      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
+      v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
+      v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31),
+      v32_(v32), v33_(v33), v34_(v34), v35_(v35) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_,
+        v35_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray35& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36>
+class ValueArray36 {
+ public:
+  ValueArray36(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
+      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
+      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
+      v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
+      v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30),
+      v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
+        v36_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray36& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37>
+class ValueArray37 {
+ public:
+  ValueArray37(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
+      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
+      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
+      v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
+      v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29),
+      v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35),
+      v36_(v36), v37_(v37) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
+        v36_, v37_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray37& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38>
+class ValueArray38 {
+ public:
+  ValueArray38(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38) : v1_(v1), v2_(v2), v3_(v3),
+      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
+      v35_(v35), v36_(v36), v37_(v37), v38_(v38) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
+        v36_, v37_, v38_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray38& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39>
+class ValueArray39 {
+ public:
+  ValueArray39(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39) : v1_(v1), v2_(v2),
+      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
+      v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
+        v36_, v37_, v38_, v39_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray39& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40>
+class ValueArray40 {
+ public:
+  ValueArray40(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) : v1_(v1),
+      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
+      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
+      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
+      v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
+      v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33),
+      v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39),
+      v40_(v40) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
+        v36_, v37_, v38_, v39_, v40_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray40& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41>
+class ValueArray41 {
+ public:
+  ValueArray41(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40,
+      T41 v41) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
+      v39_(v39), v40_(v40), v41_(v41) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
+        v36_, v37_, v38_, v39_, v40_, v41_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray41& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42>
+class ValueArray42 {
+ public:
+  ValueArray42(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
+      v39_(v39), v40_(v40), v41_(v41), v42_(v42) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
+        v36_, v37_, v38_, v39_, v40_, v41_, v42_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray42& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43>
+class ValueArray43 {
+ public:
+  ValueArray43(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42, T43 v43) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6),
+      v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13),
+      v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19),
+      v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25),
+      v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31),
+      v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37),
+      v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
+        v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray43& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+  const T43 v43_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44>
+class ValueArray44 {
+ public:
+  ValueArray44(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42, T43 v43, T44 v44) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5),
+      v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12),
+      v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18),
+      v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24),
+      v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30),
+      v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36),
+      v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42),
+      v43_(v43), v44_(v44) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
+        v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray44& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+  const T43 v43_;
+  const T44 v44_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45>
+class ValueArray45 {
+ public:
+  ValueArray45(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42, T43 v43, T44 v44, T45 v45) : v1_(v1), v2_(v2), v3_(v3), v4_(v4),
+      v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11),
+      v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17),
+      v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23),
+      v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29),
+      v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35),
+      v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41),
+      v42_(v42), v43_(v43), v44_(v44), v45_(v45) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
+        v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray45& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+  const T43 v43_;
+  const T44 v44_;
+  const T45 v45_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46>
+class ValueArray46 {
+ public:
+  ValueArray46(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) : v1_(v1), v2_(v2), v3_(v3),
+      v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
+      v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40),
+      v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
+        v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_, v46_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray46& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+  const T43 v43_;
+  const T44 v44_;
+  const T45 v45_;
+  const T46 v46_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47>
+class ValueArray47 {
+ public:
+  ValueArray47(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) : v1_(v1), v2_(v2),
+      v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10),
+      v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16),
+      v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22),
+      v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28),
+      v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34),
+      v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40),
+      v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46),
+      v47_(v47) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
+        v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_, v46_,
+        v47_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray47& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+  const T43 v43_;
+  const T44 v44_;
+  const T45 v45_;
+  const T46 v46_;
+  const T47 v47_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48>
+class ValueArray48 {
+ public:
+  ValueArray48(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48) : v1_(v1),
+      v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9),
+      v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15),
+      v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21),
+      v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27),
+      v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33),
+      v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39),
+      v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45),
+      v46_(v46), v47_(v47), v48_(v48) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
+        v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_, v46_, v47_,
+        v48_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray48& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+  const T43 v43_;
+  const T44 v44_;
+  const T45 v45_;
+  const T46 v46_;
+  const T47 v47_;
+  const T48 v48_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48, typename T49>
+class ValueArray49 {
+ public:
+  ValueArray49(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48,
+      T49 v49) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
+      v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44),
+      v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
+        v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_, v46_, v47_,
+        v48_, v49_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray49& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+  const T43 v43_;
+  const T44 v44_;
+  const T45 v45_;
+  const T46 v46_;
+  const T47 v47_;
+  const T48 v48_;
+  const T49 v49_;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48, typename T49, typename T50>
+class ValueArray50 {
+ public:
+  ValueArray50(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9,
+      T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17,
+      T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25,
+      T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33,
+      T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41,
+      T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, T49 v49,
+      T50 v50) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7),
+      v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14),
+      v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20),
+      v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26),
+      v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32),
+      v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38),
+      v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44),
+      v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49), v50_(v50) {}
+
+  template <typename T>
+  operator ParamGenerator<T>() const {
+    const T array[] = {v1_, v2_, v3_, v4_, v5_, v6_, v7_, v8_, v9_, v10_, v11_,
+        v12_, v13_, v14_, v15_, v16_, v17_, v18_, v19_, v20_, v21_, v22_, v23_,
+        v24_, v25_, v26_, v27_, v28_, v29_, v30_, v31_, v32_, v33_, v34_, v35_,
+        v36_, v37_, v38_, v39_, v40_, v41_, v42_, v43_, v44_, v45_, v46_, v47_,
+        v48_, v49_, v50_};
+    return ValuesIn(array);
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const ValueArray50& other);
+
+  const T1 v1_;
+  const T2 v2_;
+  const T3 v3_;
+  const T4 v4_;
+  const T5 v5_;
+  const T6 v6_;
+  const T7 v7_;
+  const T8 v8_;
+  const T9 v9_;
+  const T10 v10_;
+  const T11 v11_;
+  const T12 v12_;
+  const T13 v13_;
+  const T14 v14_;
+  const T15 v15_;
+  const T16 v16_;
+  const T17 v17_;
+  const T18 v18_;
+  const T19 v19_;
+  const T20 v20_;
+  const T21 v21_;
+  const T22 v22_;
+  const T23 v23_;
+  const T24 v24_;
+  const T25 v25_;
+  const T26 v26_;
+  const T27 v27_;
+  const T28 v28_;
+  const T29 v29_;
+  const T30 v30_;
+  const T31 v31_;
+  const T32 v32_;
+  const T33 v33_;
+  const T34 v34_;
+  const T35 v35_;
+  const T36 v36_;
+  const T37 v37_;
+  const T38 v38_;
+  const T39 v39_;
+  const T40 v40_;
+  const T41 v41_;
+  const T42 v42_;
+  const T43 v43_;
+  const T44 v44_;
+  const T45 v45_;
+  const T46 v46_;
+  const T47 v47_;
+  const T48 v48_;
+  const T49 v49_;
+  const T50 v50_;
+};
+
+#if GTEST_HAS_COMBINE
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Generates values from the Cartesian product of values produced
+// by the argument generators.
+//
+template <typename T1, typename T2>
+class CartesianProductGenerator2
+    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2> > {
+ public:
+  typedef ::std::tr1::tuple<T1, T2> ParamType;
+
+  CartesianProductGenerator2(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2)
+      : g1_(g1), g2_(g2) {}
+  virtual ~CartesianProductGenerator2() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current2_;
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return &current_value_; }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_ = ParamType(*current1_, *current2_);
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    ParamType current_value_;
+  };  // class CartesianProductGenerator2::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator2& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+};  // class CartesianProductGenerator2
+
+
+template <typename T1, typename T2, typename T3>
+class CartesianProductGenerator3
+    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3> > {
+ public:
+  typedef ::std::tr1::tuple<T1, T2, T3> ParamType;
+
+  CartesianProductGenerator3(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3)
+      : g1_(g1), g2_(g2), g3_(g3) {}
+  virtual ~CartesianProductGenerator3() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+        g3_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2,
+      const ParamGenerator<T3>& g3,
+      const typename ParamGenerator<T3>::iterator& current3)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+          begin3_(g3.begin()), end3_(g3.end()), current3_(current3)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current3_;
+      if (current3_ == end3_) {
+        current3_ = begin3_;
+        ++current2_;
+      }
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return &current_value_; }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_ &&
+          current3_ == typed_other->current3_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_),
+        begin3_(other.begin3_),
+        end3_(other.end3_),
+        current3_(other.current3_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_ = ParamType(*current1_, *current2_, *current3_);
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_ ||
+          current3_ == end3_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    const typename ParamGenerator<T3>::iterator begin3_;
+    const typename ParamGenerator<T3>::iterator end3_;
+    typename ParamGenerator<T3>::iterator current3_;
+    ParamType current_value_;
+  };  // class CartesianProductGenerator3::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator3& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+  const ParamGenerator<T3> g3_;
+};  // class CartesianProductGenerator3
+
+
+template <typename T1, typename T2, typename T3, typename T4>
+class CartesianProductGenerator4
+    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4> > {
+ public:
+  typedef ::std::tr1::tuple<T1, T2, T3, T4> ParamType;
+
+  CartesianProductGenerator4(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+      const ParamGenerator<T4>& g4)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {}
+  virtual ~CartesianProductGenerator4() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+        g3_.begin(), g4_, g4_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+        g4_, g4_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2,
+      const ParamGenerator<T3>& g3,
+      const typename ParamGenerator<T3>::iterator& current3,
+      const ParamGenerator<T4>& g4,
+      const typename ParamGenerator<T4>::iterator& current4)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+          begin4_(g4.begin()), end4_(g4.end()), current4_(current4)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current4_;
+      if (current4_ == end4_) {
+        current4_ = begin4_;
+        ++current3_;
+      }
+      if (current3_ == end3_) {
+        current3_ = begin3_;
+        ++current2_;
+      }
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return &current_value_; }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_ &&
+          current3_ == typed_other->current3_ &&
+          current4_ == typed_other->current4_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_),
+        begin3_(other.begin3_),
+        end3_(other.end3_),
+        current3_(other.current3_),
+        begin4_(other.begin4_),
+        end4_(other.end4_),
+        current4_(other.current4_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_ = ParamType(*current1_, *current2_, *current3_,
+            *current4_);
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_ ||
+          current3_ == end3_ ||
+          current4_ == end4_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    const typename ParamGenerator<T3>::iterator begin3_;
+    const typename ParamGenerator<T3>::iterator end3_;
+    typename ParamGenerator<T3>::iterator current3_;
+    const typename ParamGenerator<T4>::iterator begin4_;
+    const typename ParamGenerator<T4>::iterator end4_;
+    typename ParamGenerator<T4>::iterator current4_;
+    ParamType current_value_;
+  };  // class CartesianProductGenerator4::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator4& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+  const ParamGenerator<T3> g3_;
+  const ParamGenerator<T4> g4_;
+};  // class CartesianProductGenerator4
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+class CartesianProductGenerator5
+    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5> > {
+ public:
+  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5> ParamType;
+
+  CartesianProductGenerator5(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {}
+  virtual ~CartesianProductGenerator5() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+        g4_, g4_.end(), g5_, g5_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2,
+      const ParamGenerator<T3>& g3,
+      const typename ParamGenerator<T3>::iterator& current3,
+      const ParamGenerator<T4>& g4,
+      const typename ParamGenerator<T4>::iterator& current4,
+      const ParamGenerator<T5>& g5,
+      const typename ParamGenerator<T5>::iterator& current5)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+          begin5_(g5.begin()), end5_(g5.end()), current5_(current5)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current5_;
+      if (current5_ == end5_) {
+        current5_ = begin5_;
+        ++current4_;
+      }
+      if (current4_ == end4_) {
+        current4_ = begin4_;
+        ++current3_;
+      }
+      if (current3_ == end3_) {
+        current3_ = begin3_;
+        ++current2_;
+      }
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return &current_value_; }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_ &&
+          current3_ == typed_other->current3_ &&
+          current4_ == typed_other->current4_ &&
+          current5_ == typed_other->current5_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_),
+        begin3_(other.begin3_),
+        end3_(other.end3_),
+        current3_(other.current3_),
+        begin4_(other.begin4_),
+        end4_(other.end4_),
+        current4_(other.current4_),
+        begin5_(other.begin5_),
+        end5_(other.end5_),
+        current5_(other.current5_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_ = ParamType(*current1_, *current2_, *current3_,
+            *current4_, *current5_);
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_ ||
+          current3_ == end3_ ||
+          current4_ == end4_ ||
+          current5_ == end5_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    const typename ParamGenerator<T3>::iterator begin3_;
+    const typename ParamGenerator<T3>::iterator end3_;
+    typename ParamGenerator<T3>::iterator current3_;
+    const typename ParamGenerator<T4>::iterator begin4_;
+    const typename ParamGenerator<T4>::iterator end4_;
+    typename ParamGenerator<T4>::iterator current4_;
+    const typename ParamGenerator<T5>::iterator begin5_;
+    const typename ParamGenerator<T5>::iterator end5_;
+    typename ParamGenerator<T5>::iterator current5_;
+    ParamType current_value_;
+  };  // class CartesianProductGenerator5::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator5& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+  const ParamGenerator<T3> g3_;
+  const ParamGenerator<T4> g4_;
+  const ParamGenerator<T5> g5_;
+};  // class CartesianProductGenerator5
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6>
+class CartesianProductGenerator6
+    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5,
+        T6> > {
+ public:
+  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> ParamType;
+
+  CartesianProductGenerator6(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
+      const ParamGenerator<T6>& g6)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {}
+  virtual ~CartesianProductGenerator6() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2,
+      const ParamGenerator<T3>& g3,
+      const typename ParamGenerator<T3>::iterator& current3,
+      const ParamGenerator<T4>& g4,
+      const typename ParamGenerator<T4>::iterator& current4,
+      const ParamGenerator<T5>& g5,
+      const typename ParamGenerator<T5>::iterator& current5,
+      const ParamGenerator<T6>& g6,
+      const typename ParamGenerator<T6>::iterator& current6)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
+          begin6_(g6.begin()), end6_(g6.end()), current6_(current6)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current6_;
+      if (current6_ == end6_) {
+        current6_ = begin6_;
+        ++current5_;
+      }
+      if (current5_ == end5_) {
+        current5_ = begin5_;
+        ++current4_;
+      }
+      if (current4_ == end4_) {
+        current4_ = begin4_;
+        ++current3_;
+      }
+      if (current3_ == end3_) {
+        current3_ = begin3_;
+        ++current2_;
+      }
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return &current_value_; }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_ &&
+          current3_ == typed_other->current3_ &&
+          current4_ == typed_other->current4_ &&
+          current5_ == typed_other->current5_ &&
+          current6_ == typed_other->current6_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_),
+        begin3_(other.begin3_),
+        end3_(other.end3_),
+        current3_(other.current3_),
+        begin4_(other.begin4_),
+        end4_(other.end4_),
+        current4_(other.current4_),
+        begin5_(other.begin5_),
+        end5_(other.end5_),
+        current5_(other.current5_),
+        begin6_(other.begin6_),
+        end6_(other.end6_),
+        current6_(other.current6_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_ = ParamType(*current1_, *current2_, *current3_,
+            *current4_, *current5_, *current6_);
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_ ||
+          current3_ == end3_ ||
+          current4_ == end4_ ||
+          current5_ == end5_ ||
+          current6_ == end6_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    const typename ParamGenerator<T3>::iterator begin3_;
+    const typename ParamGenerator<T3>::iterator end3_;
+    typename ParamGenerator<T3>::iterator current3_;
+    const typename ParamGenerator<T4>::iterator begin4_;
+    const typename ParamGenerator<T4>::iterator end4_;
+    typename ParamGenerator<T4>::iterator current4_;
+    const typename ParamGenerator<T5>::iterator begin5_;
+    const typename ParamGenerator<T5>::iterator end5_;
+    typename ParamGenerator<T5>::iterator current5_;
+    const typename ParamGenerator<T6>::iterator begin6_;
+    const typename ParamGenerator<T6>::iterator end6_;
+    typename ParamGenerator<T6>::iterator current6_;
+    ParamType current_value_;
+  };  // class CartesianProductGenerator6::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator6& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+  const ParamGenerator<T3> g3_;
+  const ParamGenerator<T4> g4_;
+  const ParamGenerator<T5> g5_;
+  const ParamGenerator<T6> g6_;
+};  // class CartesianProductGenerator6
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7>
+class CartesianProductGenerator7
+    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
+        T7> > {
+ public:
+  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7> ParamType;
+
+  CartesianProductGenerator7(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
+      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {}
+  virtual ~CartesianProductGenerator7() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
+        g7_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2,
+      const ParamGenerator<T3>& g3,
+      const typename ParamGenerator<T3>::iterator& current3,
+      const ParamGenerator<T4>& g4,
+      const typename ParamGenerator<T4>::iterator& current4,
+      const ParamGenerator<T5>& g5,
+      const typename ParamGenerator<T5>::iterator& current5,
+      const ParamGenerator<T6>& g6,
+      const typename ParamGenerator<T6>::iterator& current6,
+      const ParamGenerator<T7>& g7,
+      const typename ParamGenerator<T7>::iterator& current7)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
+          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
+          begin7_(g7.begin()), end7_(g7.end()), current7_(current7)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current7_;
+      if (current7_ == end7_) {
+        current7_ = begin7_;
+        ++current6_;
+      }
+      if (current6_ == end6_) {
+        current6_ = begin6_;
+        ++current5_;
+      }
+      if (current5_ == end5_) {
+        current5_ = begin5_;
+        ++current4_;
+      }
+      if (current4_ == end4_) {
+        current4_ = begin4_;
+        ++current3_;
+      }
+      if (current3_ == end3_) {
+        current3_ = begin3_;
+        ++current2_;
+      }
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return &current_value_; }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_ &&
+          current3_ == typed_other->current3_ &&
+          current4_ == typed_other->current4_ &&
+          current5_ == typed_other->current5_ &&
+          current6_ == typed_other->current6_ &&
+          current7_ == typed_other->current7_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_),
+        begin3_(other.begin3_),
+        end3_(other.end3_),
+        current3_(other.current3_),
+        begin4_(other.begin4_),
+        end4_(other.end4_),
+        current4_(other.current4_),
+        begin5_(other.begin5_),
+        end5_(other.end5_),
+        current5_(other.current5_),
+        begin6_(other.begin6_),
+        end6_(other.end6_),
+        current6_(other.current6_),
+        begin7_(other.begin7_),
+        end7_(other.end7_),
+        current7_(other.current7_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_ = ParamType(*current1_, *current2_, *current3_,
+            *current4_, *current5_, *current6_, *current7_);
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_ ||
+          current3_ == end3_ ||
+          current4_ == end4_ ||
+          current5_ == end5_ ||
+          current6_ == end6_ ||
+          current7_ == end7_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    const typename ParamGenerator<T3>::iterator begin3_;
+    const typename ParamGenerator<T3>::iterator end3_;
+    typename ParamGenerator<T3>::iterator current3_;
+    const typename ParamGenerator<T4>::iterator begin4_;
+    const typename ParamGenerator<T4>::iterator end4_;
+    typename ParamGenerator<T4>::iterator current4_;
+    const typename ParamGenerator<T5>::iterator begin5_;
+    const typename ParamGenerator<T5>::iterator end5_;
+    typename ParamGenerator<T5>::iterator current5_;
+    const typename ParamGenerator<T6>::iterator begin6_;
+    const typename ParamGenerator<T6>::iterator end6_;
+    typename ParamGenerator<T6>::iterator current6_;
+    const typename ParamGenerator<T7>::iterator begin7_;
+    const typename ParamGenerator<T7>::iterator end7_;
+    typename ParamGenerator<T7>::iterator current7_;
+    ParamType current_value_;
+  };  // class CartesianProductGenerator7::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator7& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+  const ParamGenerator<T3> g3_;
+  const ParamGenerator<T4> g4_;
+  const ParamGenerator<T5> g5_;
+  const ParamGenerator<T6> g6_;
+  const ParamGenerator<T7> g7_;
+};  // class CartesianProductGenerator7
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8>
+class CartesianProductGenerator8
+    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
+        T7, T8> > {
+ public:
+  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8> ParamType;
+
+  CartesianProductGenerator8(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
+      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
+      const ParamGenerator<T8>& g8)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7),
+          g8_(g8) {}
+  virtual ~CartesianProductGenerator8() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
+        g7_.begin(), g8_, g8_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
+        g8_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2,
+      const ParamGenerator<T3>& g3,
+      const typename ParamGenerator<T3>::iterator& current3,
+      const ParamGenerator<T4>& g4,
+      const typename ParamGenerator<T4>::iterator& current4,
+      const ParamGenerator<T5>& g5,
+      const typename ParamGenerator<T5>::iterator& current5,
+      const ParamGenerator<T6>& g6,
+      const typename ParamGenerator<T6>::iterator& current6,
+      const ParamGenerator<T7>& g7,
+      const typename ParamGenerator<T7>::iterator& current7,
+      const ParamGenerator<T8>& g8,
+      const typename ParamGenerator<T8>::iterator& current8)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
+          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
+          begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
+          begin8_(g8.begin()), end8_(g8.end()), current8_(current8)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current8_;
+      if (current8_ == end8_) {
+        current8_ = begin8_;
+        ++current7_;
+      }
+      if (current7_ == end7_) {
+        current7_ = begin7_;
+        ++current6_;
+      }
+      if (current6_ == end6_) {
+        current6_ = begin6_;
+        ++current5_;
+      }
+      if (current5_ == end5_) {
+        current5_ = begin5_;
+        ++current4_;
+      }
+      if (current4_ == end4_) {
+        current4_ = begin4_;
+        ++current3_;
+      }
+      if (current3_ == end3_) {
+        current3_ = begin3_;
+        ++current2_;
+      }
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return &current_value_; }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_ &&
+          current3_ == typed_other->current3_ &&
+          current4_ == typed_other->current4_ &&
+          current5_ == typed_other->current5_ &&
+          current6_ == typed_other->current6_ &&
+          current7_ == typed_other->current7_ &&
+          current8_ == typed_other->current8_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_),
+        begin3_(other.begin3_),
+        end3_(other.end3_),
+        current3_(other.current3_),
+        begin4_(other.begin4_),
+        end4_(other.end4_),
+        current4_(other.current4_),
+        begin5_(other.begin5_),
+        end5_(other.end5_),
+        current5_(other.current5_),
+        begin6_(other.begin6_),
+        end6_(other.end6_),
+        current6_(other.current6_),
+        begin7_(other.begin7_),
+        end7_(other.end7_),
+        current7_(other.current7_),
+        begin8_(other.begin8_),
+        end8_(other.end8_),
+        current8_(other.current8_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_ = ParamType(*current1_, *current2_, *current3_,
+            *current4_, *current5_, *current6_, *current7_, *current8_);
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_ ||
+          current3_ == end3_ ||
+          current4_ == end4_ ||
+          current5_ == end5_ ||
+          current6_ == end6_ ||
+          current7_ == end7_ ||
+          current8_ == end8_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    const typename ParamGenerator<T3>::iterator begin3_;
+    const typename ParamGenerator<T3>::iterator end3_;
+    typename ParamGenerator<T3>::iterator current3_;
+    const typename ParamGenerator<T4>::iterator begin4_;
+    const typename ParamGenerator<T4>::iterator end4_;
+    typename ParamGenerator<T4>::iterator current4_;
+    const typename ParamGenerator<T5>::iterator begin5_;
+    const typename ParamGenerator<T5>::iterator end5_;
+    typename ParamGenerator<T5>::iterator current5_;
+    const typename ParamGenerator<T6>::iterator begin6_;
+    const typename ParamGenerator<T6>::iterator end6_;
+    typename ParamGenerator<T6>::iterator current6_;
+    const typename ParamGenerator<T7>::iterator begin7_;
+    const typename ParamGenerator<T7>::iterator end7_;
+    typename ParamGenerator<T7>::iterator current7_;
+    const typename ParamGenerator<T8>::iterator begin8_;
+    const typename ParamGenerator<T8>::iterator end8_;
+    typename ParamGenerator<T8>::iterator current8_;
+    ParamType current_value_;
+  };  // class CartesianProductGenerator8::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator8& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+  const ParamGenerator<T3> g3_;
+  const ParamGenerator<T4> g4_;
+  const ParamGenerator<T5> g5_;
+  const ParamGenerator<T6> g6_;
+  const ParamGenerator<T7> g7_;
+  const ParamGenerator<T8> g8_;
+};  // class CartesianProductGenerator8
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9>
+class CartesianProductGenerator9
+    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
+        T7, T8, T9> > {
+ public:
+  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9> ParamType;
+
+  CartesianProductGenerator9(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
+      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
+      const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
+          g9_(g9) {}
+  virtual ~CartesianProductGenerator9() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
+        g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
+        g8_.end(), g9_, g9_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2,
+      const ParamGenerator<T3>& g3,
+      const typename ParamGenerator<T3>::iterator& current3,
+      const ParamGenerator<T4>& g4,
+      const typename ParamGenerator<T4>::iterator& current4,
+      const ParamGenerator<T5>& g5,
+      const typename ParamGenerator<T5>::iterator& current5,
+      const ParamGenerator<T6>& g6,
+      const typename ParamGenerator<T6>::iterator& current6,
+      const ParamGenerator<T7>& g7,
+      const typename ParamGenerator<T7>::iterator& current7,
+      const ParamGenerator<T8>& g8,
+      const typename ParamGenerator<T8>::iterator& current8,
+      const ParamGenerator<T9>& g9,
+      const typename ParamGenerator<T9>::iterator& current9)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
+          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
+          begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
+          begin8_(g8.begin()), end8_(g8.end()), current8_(current8),
+          begin9_(g9.begin()), end9_(g9.end()), current9_(current9)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current9_;
+      if (current9_ == end9_) {
+        current9_ = begin9_;
+        ++current8_;
+      }
+      if (current8_ == end8_) {
+        current8_ = begin8_;
+        ++current7_;
+      }
+      if (current7_ == end7_) {
+        current7_ = begin7_;
+        ++current6_;
+      }
+      if (current6_ == end6_) {
+        current6_ = begin6_;
+        ++current5_;
+      }
+      if (current5_ == end5_) {
+        current5_ = begin5_;
+        ++current4_;
+      }
+      if (current4_ == end4_) {
+        current4_ = begin4_;
+        ++current3_;
+      }
+      if (current3_ == end3_) {
+        current3_ = begin3_;
+        ++current2_;
+      }
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return &current_value_; }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_ &&
+          current3_ == typed_other->current3_ &&
+          current4_ == typed_other->current4_ &&
+          current5_ == typed_other->current5_ &&
+          current6_ == typed_other->current6_ &&
+          current7_ == typed_other->current7_ &&
+          current8_ == typed_other->current8_ &&
+          current9_ == typed_other->current9_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_),
+        begin3_(other.begin3_),
+        end3_(other.end3_),
+        current3_(other.current3_),
+        begin4_(other.begin4_),
+        end4_(other.end4_),
+        current4_(other.current4_),
+        begin5_(other.begin5_),
+        end5_(other.end5_),
+        current5_(other.current5_),
+        begin6_(other.begin6_),
+        end6_(other.end6_),
+        current6_(other.current6_),
+        begin7_(other.begin7_),
+        end7_(other.end7_),
+        current7_(other.current7_),
+        begin8_(other.begin8_),
+        end8_(other.end8_),
+        current8_(other.current8_),
+        begin9_(other.begin9_),
+        end9_(other.end9_),
+        current9_(other.current9_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_ = ParamType(*current1_, *current2_, *current3_,
+            *current4_, *current5_, *current6_, *current7_, *current8_,
+            *current9_);
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_ ||
+          current3_ == end3_ ||
+          current4_ == end4_ ||
+          current5_ == end5_ ||
+          current6_ == end6_ ||
+          current7_ == end7_ ||
+          current8_ == end8_ ||
+          current9_ == end9_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    const typename ParamGenerator<T3>::iterator begin3_;
+    const typename ParamGenerator<T3>::iterator end3_;
+    typename ParamGenerator<T3>::iterator current3_;
+    const typename ParamGenerator<T4>::iterator begin4_;
+    const typename ParamGenerator<T4>::iterator end4_;
+    typename ParamGenerator<T4>::iterator current4_;
+    const typename ParamGenerator<T5>::iterator begin5_;
+    const typename ParamGenerator<T5>::iterator end5_;
+    typename ParamGenerator<T5>::iterator current5_;
+    const typename ParamGenerator<T6>::iterator begin6_;
+    const typename ParamGenerator<T6>::iterator end6_;
+    typename ParamGenerator<T6>::iterator current6_;
+    const typename ParamGenerator<T7>::iterator begin7_;
+    const typename ParamGenerator<T7>::iterator end7_;
+    typename ParamGenerator<T7>::iterator current7_;
+    const typename ParamGenerator<T8>::iterator begin8_;
+    const typename ParamGenerator<T8>::iterator end8_;
+    typename ParamGenerator<T8>::iterator current8_;
+    const typename ParamGenerator<T9>::iterator begin9_;
+    const typename ParamGenerator<T9>::iterator end9_;
+    typename ParamGenerator<T9>::iterator current9_;
+    ParamType current_value_;
+  };  // class CartesianProductGenerator9::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator9& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+  const ParamGenerator<T3> g3_;
+  const ParamGenerator<T4> g4_;
+  const ParamGenerator<T5> g5_;
+  const ParamGenerator<T6> g6_;
+  const ParamGenerator<T7> g7_;
+  const ParamGenerator<T8> g8_;
+  const ParamGenerator<T9> g9_;
+};  // class CartesianProductGenerator9
+
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10>
+class CartesianProductGenerator10
+    : public ParamGeneratorInterface< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
+        T7, T8, T9, T10> > {
+ public:
+  typedef ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> ParamType;
+
+  CartesianProductGenerator10(const ParamGenerator<T1>& g1,
+      const ParamGenerator<T2>& g2, const ParamGenerator<T3>& g3,
+      const ParamGenerator<T4>& g4, const ParamGenerator<T5>& g5,
+      const ParamGenerator<T6>& g6, const ParamGenerator<T7>& g7,
+      const ParamGenerator<T8>& g8, const ParamGenerator<T9>& g9,
+      const ParamGenerator<T10>& g10)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
+          g9_(g9), g10_(g10) {}
+  virtual ~CartesianProductGenerator10() {}
+
+  virtual ParamIteratorInterface<ParamType>* Begin() const {
+    return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_,
+        g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_,
+        g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin(), g10_, g10_.begin());
+  }
+  virtual ParamIteratorInterface<ParamType>* End() const {
+    return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(),
+        g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_,
+        g8_.end(), g9_, g9_.end(), g10_, g10_.end());
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<ParamType> {
+   public:
+    Iterator(const ParamGeneratorInterface<ParamType>* base,
+      const ParamGenerator<T1>& g1,
+      const typename ParamGenerator<T1>::iterator& current1,
+      const ParamGenerator<T2>& g2,
+      const typename ParamGenerator<T2>::iterator& current2,
+      const ParamGenerator<T3>& g3,
+      const typename ParamGenerator<T3>::iterator& current3,
+      const ParamGenerator<T4>& g4,
+      const typename ParamGenerator<T4>::iterator& current4,
+      const ParamGenerator<T5>& g5,
+      const typename ParamGenerator<T5>::iterator& current5,
+      const ParamGenerator<T6>& g6,
+      const typename ParamGenerator<T6>::iterator& current6,
+      const ParamGenerator<T7>& g7,
+      const typename ParamGenerator<T7>::iterator& current7,
+      const ParamGenerator<T8>& g8,
+      const typename ParamGenerator<T8>::iterator& current8,
+      const ParamGenerator<T9>& g9,
+      const typename ParamGenerator<T9>::iterator& current9,
+      const ParamGenerator<T10>& g10,
+      const typename ParamGenerator<T10>::iterator& current10)
+        : base_(base),
+          begin1_(g1.begin()), end1_(g1.end()), current1_(current1),
+          begin2_(g2.begin()), end2_(g2.end()), current2_(current2),
+          begin3_(g3.begin()), end3_(g3.end()), current3_(current3),
+          begin4_(g4.begin()), end4_(g4.end()), current4_(current4),
+          begin5_(g5.begin()), end5_(g5.end()), current5_(current5),
+          begin6_(g6.begin()), end6_(g6.end()), current6_(current6),
+          begin7_(g7.begin()), end7_(g7.end()), current7_(current7),
+          begin8_(g8.begin()), end8_(g8.end()), current8_(current8),
+          begin9_(g9.begin()), end9_(g9.end()), current9_(current9),
+          begin10_(g10.begin()), end10_(g10.end()), current10_(current10)    {
+      ComputeCurrentValue();
+    }
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<ParamType>* BaseGenerator() const {
+      return base_;
+    }
+    // Advance should not be called on beyond-of-range iterators
+    // so no component iterators must be beyond end of range, either.
+    virtual void Advance() {
+      assert(!AtEnd());
+      ++current10_;
+      if (current10_ == end10_) {
+        current10_ = begin10_;
+        ++current9_;
+      }
+      if (current9_ == end9_) {
+        current9_ = begin9_;
+        ++current8_;
+      }
+      if (current8_ == end8_) {
+        current8_ = begin8_;
+        ++current7_;
+      }
+      if (current7_ == end7_) {
+        current7_ = begin7_;
+        ++current6_;
+      }
+      if (current6_ == end6_) {
+        current6_ = begin6_;
+        ++current5_;
+      }
+      if (current5_ == end5_) {
+        current5_ = begin5_;
+        ++current4_;
+      }
+      if (current4_ == end4_) {
+        current4_ = begin4_;
+        ++current3_;
+      }
+      if (current3_ == end3_) {
+        current3_ = begin3_;
+        ++current2_;
+      }
+      if (current2_ == end2_) {
+        current2_ = begin2_;
+        ++current1_;
+      }
+      ComputeCurrentValue();
+    }
+    virtual ParamIteratorInterface<ParamType>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const ParamType* Current() const { return &current_value_; }
+    virtual bool Equals(const ParamIteratorInterface<ParamType>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const Iterator* typed_other =
+          CheckedDowncastToActualType<const Iterator>(&other);
+      // We must report iterators equal if they both point beyond their
+      // respective ranges. That can happen in a variety of fashions,
+      // so we have to consult AtEnd().
+      return (AtEnd() && typed_other->AtEnd()) ||
+         (
+          current1_ == typed_other->current1_ &&
+          current2_ == typed_other->current2_ &&
+          current3_ == typed_other->current3_ &&
+          current4_ == typed_other->current4_ &&
+          current5_ == typed_other->current5_ &&
+          current6_ == typed_other->current6_ &&
+          current7_ == typed_other->current7_ &&
+          current8_ == typed_other->current8_ &&
+          current9_ == typed_other->current9_ &&
+          current10_ == typed_other->current10_);
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : base_(other.base_),
+        begin1_(other.begin1_),
+        end1_(other.end1_),
+        current1_(other.current1_),
+        begin2_(other.begin2_),
+        end2_(other.end2_),
+        current2_(other.current2_),
+        begin3_(other.begin3_),
+        end3_(other.end3_),
+        current3_(other.current3_),
+        begin4_(other.begin4_),
+        end4_(other.end4_),
+        current4_(other.current4_),
+        begin5_(other.begin5_),
+        end5_(other.end5_),
+        current5_(other.current5_),
+        begin6_(other.begin6_),
+        end6_(other.end6_),
+        current6_(other.current6_),
+        begin7_(other.begin7_),
+        end7_(other.end7_),
+        current7_(other.current7_),
+        begin8_(other.begin8_),
+        end8_(other.end8_),
+        current8_(other.current8_),
+        begin9_(other.begin9_),
+        end9_(other.end9_),
+        current9_(other.current9_),
+        begin10_(other.begin10_),
+        end10_(other.end10_),
+        current10_(other.current10_) {
+      ComputeCurrentValue();
+    }
+
+    void ComputeCurrentValue() {
+      if (!AtEnd())
+        current_value_ = ParamType(*current1_, *current2_, *current3_,
+            *current4_, *current5_, *current6_, *current7_, *current8_,
+            *current9_, *current10_);
+    }
+    bool AtEnd() const {
+      // We must report iterator past the end of the range when either of the
+      // component iterators has reached the end of its range.
+      return
+          current1_ == end1_ ||
+          current2_ == end2_ ||
+          current3_ == end3_ ||
+          current4_ == end4_ ||
+          current5_ == end5_ ||
+          current6_ == end6_ ||
+          current7_ == end7_ ||
+          current8_ == end8_ ||
+          current9_ == end9_ ||
+          current10_ == end10_;
+    }
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<ParamType>* const base_;
+    // begin[i]_ and end[i]_ define the i-th range that Iterator traverses.
+    // current[i]_ is the actual traversing iterator.
+    const typename ParamGenerator<T1>::iterator begin1_;
+    const typename ParamGenerator<T1>::iterator end1_;
+    typename ParamGenerator<T1>::iterator current1_;
+    const typename ParamGenerator<T2>::iterator begin2_;
+    const typename ParamGenerator<T2>::iterator end2_;
+    typename ParamGenerator<T2>::iterator current2_;
+    const typename ParamGenerator<T3>::iterator begin3_;
+    const typename ParamGenerator<T3>::iterator end3_;
+    typename ParamGenerator<T3>::iterator current3_;
+    const typename ParamGenerator<T4>::iterator begin4_;
+    const typename ParamGenerator<T4>::iterator end4_;
+    typename ParamGenerator<T4>::iterator current4_;
+    const typename ParamGenerator<T5>::iterator begin5_;
+    const typename ParamGenerator<T5>::iterator end5_;
+    typename ParamGenerator<T5>::iterator current5_;
+    const typename ParamGenerator<T6>::iterator begin6_;
+    const typename ParamGenerator<T6>::iterator end6_;
+    typename ParamGenerator<T6>::iterator current6_;
+    const typename ParamGenerator<T7>::iterator begin7_;
+    const typename ParamGenerator<T7>::iterator end7_;
+    typename ParamGenerator<T7>::iterator current7_;
+    const typename ParamGenerator<T8>::iterator begin8_;
+    const typename ParamGenerator<T8>::iterator end8_;
+    typename ParamGenerator<T8>::iterator current8_;
+    const typename ParamGenerator<T9>::iterator begin9_;
+    const typename ParamGenerator<T9>::iterator end9_;
+    typename ParamGenerator<T9>::iterator current9_;
+    const typename ParamGenerator<T10>::iterator begin10_;
+    const typename ParamGenerator<T10>::iterator end10_;
+    typename ParamGenerator<T10>::iterator current10_;
+    ParamType current_value_;
+  };  // class CartesianProductGenerator10::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductGenerator10& other);
+
+  const ParamGenerator<T1> g1_;
+  const ParamGenerator<T2> g2_;
+  const ParamGenerator<T3> g3_;
+  const ParamGenerator<T4> g4_;
+  const ParamGenerator<T5> g5_;
+  const ParamGenerator<T6> g6_;
+  const ParamGenerator<T7> g7_;
+  const ParamGenerator<T8> g8_;
+  const ParamGenerator<T9> g9_;
+  const ParamGenerator<T10> g10_;
+};  // class CartesianProductGenerator10
+
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Helper classes providing Combine() with polymorphic features. They allow
+// casting CartesianProductGeneratorN<T> to ParamGenerator<U> if T is
+// convertible to U.
+//
+template <class Generator1, class Generator2>
+class CartesianProductHolder2 {
+ public:
+CartesianProductHolder2(const Generator1& g1, const Generator2& g2)
+      : g1_(g1), g2_(g2) {}
+  template <typename T1, typename T2>
+  operator ParamGenerator< ::std::tr1::tuple<T1, T2> >() const {
+    return ParamGenerator< ::std::tr1::tuple<T1, T2> >(
+        new CartesianProductGenerator2<T1, T2>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder2& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+};  // class CartesianProductHolder2
+
+template <class Generator1, class Generator2, class Generator3>
+class CartesianProductHolder3 {
+ public:
+CartesianProductHolder3(const Generator1& g1, const Generator2& g2,
+    const Generator3& g3)
+      : g1_(g1), g2_(g2), g3_(g3) {}
+  template <typename T1, typename T2, typename T3>
+  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3> >() const {
+    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3> >(
+        new CartesianProductGenerator3<T1, T2, T3>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_),
+        static_cast<ParamGenerator<T3> >(g3_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder3& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+  const Generator3 g3_;
+};  // class CartesianProductHolder3
+
+template <class Generator1, class Generator2, class Generator3,
+    class Generator4>
+class CartesianProductHolder4 {
+ public:
+CartesianProductHolder4(const Generator1& g1, const Generator2& g2,
+    const Generator3& g3, const Generator4& g4)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {}
+  template <typename T1, typename T2, typename T3, typename T4>
+  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4> >() const {
+    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4> >(
+        new CartesianProductGenerator4<T1, T2, T3, T4>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_),
+        static_cast<ParamGenerator<T3> >(g3_),
+        static_cast<ParamGenerator<T4> >(g4_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder4& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+  const Generator3 g3_;
+  const Generator4 g4_;
+};  // class CartesianProductHolder4
+
+template <class Generator1, class Generator2, class Generator3,
+    class Generator4, class Generator5>
+class CartesianProductHolder5 {
+ public:
+CartesianProductHolder5(const Generator1& g1, const Generator2& g2,
+    const Generator3& g3, const Generator4& g4, const Generator5& g5)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {}
+  template <typename T1, typename T2, typename T3, typename T4, typename T5>
+  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5> >() const {
+    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5> >(
+        new CartesianProductGenerator5<T1, T2, T3, T4, T5>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_),
+        static_cast<ParamGenerator<T3> >(g3_),
+        static_cast<ParamGenerator<T4> >(g4_),
+        static_cast<ParamGenerator<T5> >(g5_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder5& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+  const Generator3 g3_;
+  const Generator4 g4_;
+  const Generator5 g5_;
+};  // class CartesianProductHolder5
+
+template <class Generator1, class Generator2, class Generator3,
+    class Generator4, class Generator5, class Generator6>
+class CartesianProductHolder6 {
+ public:
+CartesianProductHolder6(const Generator1& g1, const Generator2& g2,
+    const Generator3& g3, const Generator4& g4, const Generator5& g5,
+    const Generator6& g6)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {}
+  template <typename T1, typename T2, typename T3, typename T4, typename T5,
+      typename T6>
+  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> >() const {
+    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6> >(
+        new CartesianProductGenerator6<T1, T2, T3, T4, T5, T6>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_),
+        static_cast<ParamGenerator<T3> >(g3_),
+        static_cast<ParamGenerator<T4> >(g4_),
+        static_cast<ParamGenerator<T5> >(g5_),
+        static_cast<ParamGenerator<T6> >(g6_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder6& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+  const Generator3 g3_;
+  const Generator4 g4_;
+  const Generator5 g5_;
+  const Generator6 g6_;
+};  // class CartesianProductHolder6
+
+template <class Generator1, class Generator2, class Generator3,
+    class Generator4, class Generator5, class Generator6, class Generator7>
+class CartesianProductHolder7 {
+ public:
+CartesianProductHolder7(const Generator1& g1, const Generator2& g2,
+    const Generator3& g3, const Generator4& g4, const Generator5& g5,
+    const Generator6& g6, const Generator7& g7)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {}
+  template <typename T1, typename T2, typename T3, typename T4, typename T5,
+      typename T6, typename T7>
+  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6,
+      T7> >() const {
+    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7> >(
+        new CartesianProductGenerator7<T1, T2, T3, T4, T5, T6, T7>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_),
+        static_cast<ParamGenerator<T3> >(g3_),
+        static_cast<ParamGenerator<T4> >(g4_),
+        static_cast<ParamGenerator<T5> >(g5_),
+        static_cast<ParamGenerator<T6> >(g6_),
+        static_cast<ParamGenerator<T7> >(g7_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder7& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+  const Generator3 g3_;
+  const Generator4 g4_;
+  const Generator5 g5_;
+  const Generator6 g6_;
+  const Generator7 g7_;
+};  // class CartesianProductHolder7
+
+template <class Generator1, class Generator2, class Generator3,
+    class Generator4, class Generator5, class Generator6, class Generator7,
+    class Generator8>
+class CartesianProductHolder8 {
+ public:
+CartesianProductHolder8(const Generator1& g1, const Generator2& g2,
+    const Generator3& g3, const Generator4& g4, const Generator5& g5,
+    const Generator6& g6, const Generator7& g7, const Generator8& g8)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7),
+          g8_(g8) {}
+  template <typename T1, typename T2, typename T3, typename T4, typename T5,
+      typename T6, typename T7, typename T8>
+  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7,
+      T8> >() const {
+    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8> >(
+        new CartesianProductGenerator8<T1, T2, T3, T4, T5, T6, T7, T8>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_),
+        static_cast<ParamGenerator<T3> >(g3_),
+        static_cast<ParamGenerator<T4> >(g4_),
+        static_cast<ParamGenerator<T5> >(g5_),
+        static_cast<ParamGenerator<T6> >(g6_),
+        static_cast<ParamGenerator<T7> >(g7_),
+        static_cast<ParamGenerator<T8> >(g8_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder8& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+  const Generator3 g3_;
+  const Generator4 g4_;
+  const Generator5 g5_;
+  const Generator6 g6_;
+  const Generator7 g7_;
+  const Generator8 g8_;
+};  // class CartesianProductHolder8
+
+template <class Generator1, class Generator2, class Generator3,
+    class Generator4, class Generator5, class Generator6, class Generator7,
+    class Generator8, class Generator9>
+class CartesianProductHolder9 {
+ public:
+CartesianProductHolder9(const Generator1& g1, const Generator2& g2,
+    const Generator3& g3, const Generator4& g4, const Generator5& g5,
+    const Generator6& g6, const Generator7& g7, const Generator8& g8,
+    const Generator9& g9)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
+          g9_(g9) {}
+  template <typename T1, typename T2, typename T3, typename T4, typename T5,
+      typename T6, typename T7, typename T8, typename T9>
+  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
+      T9> >() const {
+    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
+        T9> >(
+        new CartesianProductGenerator9<T1, T2, T3, T4, T5, T6, T7, T8, T9>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_),
+        static_cast<ParamGenerator<T3> >(g3_),
+        static_cast<ParamGenerator<T4> >(g4_),
+        static_cast<ParamGenerator<T5> >(g5_),
+        static_cast<ParamGenerator<T6> >(g6_),
+        static_cast<ParamGenerator<T7> >(g7_),
+        static_cast<ParamGenerator<T8> >(g8_),
+        static_cast<ParamGenerator<T9> >(g9_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder9& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+  const Generator3 g3_;
+  const Generator4 g4_;
+  const Generator5 g5_;
+  const Generator6 g6_;
+  const Generator7 g7_;
+  const Generator8 g8_;
+  const Generator9 g9_;
+};  // class CartesianProductHolder9
+
+template <class Generator1, class Generator2, class Generator3,
+    class Generator4, class Generator5, class Generator6, class Generator7,
+    class Generator8, class Generator9, class Generator10>
+class CartesianProductHolder10 {
+ public:
+CartesianProductHolder10(const Generator1& g1, const Generator2& g2,
+    const Generator3& g3, const Generator4& g4, const Generator5& g5,
+    const Generator6& g6, const Generator7& g7, const Generator8& g8,
+    const Generator9& g9, const Generator10& g10)
+      : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8),
+          g9_(g9), g10_(g10) {}
+  template <typename T1, typename T2, typename T3, typename T4, typename T5,
+      typename T6, typename T7, typename T8, typename T9, typename T10>
+  operator ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
+      T9, T10> >() const {
+    return ParamGenerator< ::std::tr1::tuple<T1, T2, T3, T4, T5, T6, T7, T8,
+        T9, T10> >(
+        new CartesianProductGenerator10<T1, T2, T3, T4, T5, T6, T7, T8, T9,
+            T10>(
+        static_cast<ParamGenerator<T1> >(g1_),
+        static_cast<ParamGenerator<T2> >(g2_),
+        static_cast<ParamGenerator<T3> >(g3_),
+        static_cast<ParamGenerator<T4> >(g4_),
+        static_cast<ParamGenerator<T5> >(g5_),
+        static_cast<ParamGenerator<T6> >(g6_),
+        static_cast<ParamGenerator<T7> >(g7_),
+        static_cast<ParamGenerator<T8> >(g8_),
+        static_cast<ParamGenerator<T9> >(g9_),
+        static_cast<ParamGenerator<T10> >(g10_)));
+  }
+
+ private:
+  // No implementation - assignment is unsupported.
+  void operator=(const CartesianProductHolder10& other);
+
+  const Generator1 g1_;
+  const Generator2 g2_;
+  const Generator3 g3_;
+  const Generator4 g4_;
+  const Generator5 g5_;
+  const Generator6 g6_;
+  const Generator7 g7_;
+  const Generator8 g8_;
+  const Generator9 g9_;
+  const Generator10 g10_;
+};  // class CartesianProductHolder10
+
+#endif  // GTEST_HAS_COMBINE
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  //  GTEST_HAS_PARAM_TEST
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_
diff --git a/final/utils/unittest/googletest/include/gtest/internal/gtest-param-util.h b/final/utils/unittest/googletest/include/gtest/internal/gtest-param-util.h
new file mode 100644
index 00000000000..0cbb58c21b7
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/internal/gtest-param-util.h
@@ -0,0 +1,619 @@
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: vladl@google.com (Vlad Losev)
+
+// Type and function utilities for implementing parameterized tests.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
+
+#include <iterator>
+#include <utility>
+#include <vector>
+
+// scripts/fuse_gtest.py depends on gtest's own header being #included
+// *unconditionally*.  Therefore these #includes cannot be moved
+// inside #if GTEST_HAS_PARAM_TEST.
+#include <gtest/internal/gtest-internal.h>
+#include <gtest/internal/gtest-linked_ptr.h>
+#include <gtest/internal/gtest-port.h>
+
+#if GTEST_HAS_PARAM_TEST
+
+namespace testing {
+namespace internal {
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Outputs a message explaining invalid registration of different
+// fixture class for the same test case. This may happen when
+// TEST_P macro is used to define two tests with the same name
+// but in different namespaces.
+GTEST_API_ void ReportInvalidTestCaseType(const char* test_case_name,
+                                          const char* file, int line);
+
+template <typename> class ParamGeneratorInterface;
+template <typename> class ParamGenerator;
+
+// Interface for iterating over elements provided by an implementation
+// of ParamGeneratorInterface<T>.
+template <typename T>
+class ParamIteratorInterface {
+ public:
+  virtual ~ParamIteratorInterface() {}
+  // A pointer to the base generator instance.
+  // Used only for the purposes of iterator comparison
+  // to make sure that two iterators belong to the same generator.
+  virtual const ParamGeneratorInterface<T>* BaseGenerator() const = 0;
+  // Advances iterator to point to the next element
+  // provided by the generator. The caller is responsible
+  // for not calling Advance() on an iterator equal to
+  // BaseGenerator()->End().
+  virtual void Advance() = 0;
+  // Clones the iterator object. Used for implementing copy semantics
+  // of ParamIterator<T>.
+  virtual ParamIteratorInterface* Clone() const = 0;
+  // Dereferences the current iterator and provides (read-only) access
+  // to the pointed value. It is the caller's responsibility not to call
+  // Current() on an iterator equal to BaseGenerator()->End().
+  // Used for implementing ParamGenerator<T>::operator*().
+  virtual const T* Current() const = 0;
+  // Determines whether the given iterator and other point to the same
+  // element in the sequence generated by the generator.
+  // Used for implementing ParamGenerator<T>::operator==().
+  virtual bool Equals(const ParamIteratorInterface& other) const = 0;
+};
+
+// Class iterating over elements provided by an implementation of
+// ParamGeneratorInterface<T>. It wraps ParamIteratorInterface<T>
+// and implements the const forward iterator concept.
+template <typename T>
+class ParamIterator {
+ public:
+  typedef T value_type;
+  typedef const T& reference;
+  typedef ptrdiff_t difference_type;
+
+  // ParamIterator assumes ownership of the impl_ pointer.
+  ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {}
+  ParamIterator& operator=(const ParamIterator& other) {
+    if (this != &other)
+      impl_.reset(other.impl_->Clone());
+    return *this;
+  }
+
+  const T& operator*() const { return *impl_->Current(); }
+  const T* operator->() const { return impl_->Current(); }
+  // Prefix version of operator++.
+  ParamIterator& operator++() {
+    impl_->Advance();
+    return *this;
+  }
+  // Postfix version of operator++.
+  ParamIterator operator++(int /*unused*/) {
+    ParamIteratorInterface<T>* clone = impl_->Clone();
+    impl_->Advance();
+    return ParamIterator(clone);
+  }
+  bool operator==(const ParamIterator& other) const {
+    return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_);
+  }
+  bool operator!=(const ParamIterator& other) const {
+    return !(*this == other);
+  }
+
+ private:
+  friend class ParamGenerator<T>;
+  explicit ParamIterator(ParamIteratorInterface<T>* impl) : impl_(impl) {}
+  scoped_ptr<ParamIteratorInterface<T> > impl_;
+};
+
+// ParamGeneratorInterface<T> is the binary interface to access generators
+// defined in other translation units.
+template <typename T>
+class ParamGeneratorInterface {
+ public:
+  typedef T ParamType;
+
+  virtual ~ParamGeneratorInterface() {}
+
+  // Generator interface definition
+  virtual ParamIteratorInterface<T>* Begin() const = 0;
+  virtual ParamIteratorInterface<T>* End() const = 0;
+};
+
+// Wraps ParamGeneratorInterface<T> and provides general generator syntax
+// compatible with the STL Container concept.
+// This class implements copy initialization semantics and the contained
+// ParamGeneratorInterface<T> instance is shared among all copies
+// of the original object. This is possible because that instance is immutable.
+template<typename T>
+class ParamGenerator {
+ public:
+  typedef ParamIterator<T> iterator;
+
+  explicit ParamGenerator(ParamGeneratorInterface<T>* impl) : impl_(impl) {}
+  ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {}
+
+  ParamGenerator& operator=(const ParamGenerator& other) {
+    impl_ = other.impl_;
+    return *this;
+  }
+
+  iterator begin() const { return iterator(impl_->Begin()); }
+  iterator end() const { return iterator(impl_->End()); }
+
+ private:
+  ::testing::internal::linked_ptr<const ParamGeneratorInterface<T> > impl_;
+};
+
+// Generates values from a range of two comparable values. Can be used to
+// generate sequences of user-defined types that implement operator+() and
+// operator<().
+// This class is used in the Range() function.
+template <typename T, typename IncrementT>
+class RangeGenerator : public ParamGeneratorInterface<T> {
+ public:
+  RangeGenerator(T begin, T end, IncrementT step)
+      : begin_(begin), end_(end),
+        step_(step), end_index_(CalculateEndIndex(begin, end, step)) {}
+  virtual ~RangeGenerator() {}
+
+  virtual ParamIteratorInterface<T>* Begin() const {
+    return new Iterator(this, begin_, 0, step_);
+  }
+  virtual ParamIteratorInterface<T>* End() const {
+    return new Iterator(this, end_, end_index_, step_);
+  }
+
+ private:
+  class Iterator : public ParamIteratorInterface<T> {
+   public:
+    Iterator(const ParamGeneratorInterface<T>* base, T value, int index,
+             IncrementT step)
+        : base_(base), value_(value), index_(index), step_(step) {}
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
+      return base_;
+    }
+    virtual void Advance() {
+      value_ = value_ + step_;
+      index_++;
+    }
+    virtual ParamIteratorInterface<T>* Clone() const {
+      return new Iterator(*this);
+    }
+    virtual const T* Current() const { return &value_; }
+    virtual bool Equals(const ParamIteratorInterface<T>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      const int other_index =
+          CheckedDowncastToActualType<const Iterator>(&other)->index_;
+      return index_ == other_index;
+    }
+
+   private:
+    Iterator(const Iterator& other)
+        : ParamIteratorInterface<T>(),
+          base_(other.base_), value_(other.value_), index_(other.index_),
+          step_(other.step_) {}
+
+    // No implementation - assignment is unsupported.
+    void operator=(const Iterator& other);
+
+    const ParamGeneratorInterface<T>* const base_;
+    T value_;
+    int index_;
+    const IncrementT step_;
+  };  // class RangeGenerator::Iterator
+
+  static int CalculateEndIndex(const T& begin,
+                               const T& end,
+                               const IncrementT& step) {
+    int end_index = 0;
+    for (T i = begin; i < end; i = i + step)
+      end_index++;
+    return end_index;
+  }
+
+  // No implementation - assignment is unsupported.
+  void operator=(const RangeGenerator& other);
+
+  const T begin_;
+  const T end_;
+  const IncrementT step_;
+  // The index for the end() iterator. All the elements in the generated
+  // sequence are indexed (0-based) to aid iterator comparison.
+  const int end_index_;
+};  // class RangeGenerator
+
+
+// Generates values from a pair of STL-style iterators. Used in the
+// ValuesIn() function. The elements are copied from the source range
+// since the source can be located on the stack, and the generator
+// is likely to persist beyond that stack frame.
+template <typename T>
+class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface<T> {
+ public:
+  template <typename ForwardIterator>
+  ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end)
+      : container_(begin, end) {}
+  virtual ~ValuesInIteratorRangeGenerator() {}
+
+  virtual ParamIteratorInterface<T>* Begin() const {
+    return new Iterator(this, container_.begin());
+  }
+  virtual ParamIteratorInterface<T>* End() const {
+    return new Iterator(this, container_.end());
+  }
+
+ private:
+  typedef typename ::std::vector<T> ContainerType;
+
+  class Iterator : public ParamIteratorInterface<T> {
+   public:
+    Iterator(const ParamGeneratorInterface<T>* base,
+             typename ContainerType::const_iterator iterator)
+        :  base_(base), iterator_(iterator) {}
+    virtual ~Iterator() {}
+
+    virtual const ParamGeneratorInterface<T>* BaseGenerator() const {
+      return base_;
+    }
+    virtual void Advance() {
+      ++iterator_;
+      value_.reset();
+    }
+    virtual ParamIteratorInterface<T>* Clone() const {
+      return new Iterator(*this);
+    }
+    // We need to use cached value referenced by iterator_ because *iterator_
+    // can return a temporary object (and of type other then T), so just
+    // having "return &*iterator_;" doesn't work.
+    // value_ is updated here and not in Advance() because Advance()
+    // can advance iterator_ beyond the end of the range, and we cannot
+    // detect that fact. The client code, on the other hand, is
+    // responsible for not calling Current() on an out-of-range iterator.
+    virtual const T* Current() const {
+      if (value_.get() == NULL)
+        value_.reset(new T(*iterator_));
+      return value_.get();
+    }
+    virtual bool Equals(const ParamIteratorInterface<T>& other) const {
+      // Having the same base generator guarantees that the other
+      // iterator is of the same type and we can downcast.
+      GTEST_CHECK_(BaseGenerator() == other.BaseGenerator())
+          << "The program attempted to compare iterators "
+          << "from different generators." << std::endl;
+      return iterator_ ==
+          CheckedDowncastToActualType<const Iterator>(&other)->iterator_;
+    }
+
+   private:
+    Iterator(const Iterator& other)
+          // The explicit constructor call suppresses a false warning
+          // emitted by gcc when supplied with the -Wextra option.
+        : ParamIteratorInterface<T>(),
+          base_(other.base_),
+          iterator_(other.iterator_) {}
+
+    const ParamGeneratorInterface<T>* const base_;
+    typename ContainerType::const_iterator iterator_;
+    // A cached value of *iterator_. We keep it here to allow access by
+    // pointer in the wrapping iterator's operator->().
+    // value_ needs to be mutable to be accessed in Current().
+    // Use of scoped_ptr helps manage cached value's lifetime,
+    // which is bound by the lifespan of the iterator itself.
+    mutable scoped_ptr<const T> value_;
+  };  // class ValuesInIteratorRangeGenerator::Iterator
+
+  // No implementation - assignment is unsupported.
+  void operator=(const ValuesInIteratorRangeGenerator& other);
+
+  const ContainerType container_;
+};  // class ValuesInIteratorRangeGenerator
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Stores a parameter value and later creates tests parameterized with that
+// value.
+template <class TestClass>
+class ParameterizedTestFactory : public TestFactoryBase {
+ public:
+  typedef typename TestClass::ParamType ParamType;
+  explicit ParameterizedTestFactory(ParamType parameter) :
+      parameter_(parameter) {}
+  virtual Test* CreateTest() {
+    TestClass::SetParam(&parameter_);
+    return new TestClass();
+  }
+
+ private:
+  const ParamType parameter_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory);
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// TestMetaFactoryBase is a base class for meta-factories that create
+// test factories for passing into MakeAndRegisterTestInfo function.
+template <class ParamType>
+class TestMetaFactoryBase {
+ public:
+  virtual ~TestMetaFactoryBase() {}
+
+  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0;
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// TestMetaFactory creates test factories for passing into
+// MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives
+// ownership of test factory pointer, same factory object cannot be passed
+// into that method twice. But ParameterizedTestCaseInfo is going to call
+// it for each Test/Parameter value combination. Thus it needs meta factory
+// creator class.
+template <class TestCase>
+class TestMetaFactory
+    : public TestMetaFactoryBase<typename TestCase::ParamType> {
+ public:
+  typedef typename TestCase::ParamType ParamType;
+
+  TestMetaFactory() {}
+
+  virtual TestFactoryBase* CreateTestFactory(ParamType parameter) {
+    return new ParameterizedTestFactory<TestCase>(parameter);
+  }
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory);
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestCaseInfoBase is a generic interface
+// to ParameterizedTestCaseInfo classes. ParameterizedTestCaseInfoBase
+// accumulates test information provided by TEST_P macro invocations
+// and generators provided by INSTANTIATE_TEST_CASE_P macro invocations
+// and uses that information to register all resulting test instances
+// in RegisterTests method. The ParameterizeTestCaseRegistry class holds
+// a collection of pointers to the ParameterizedTestCaseInfo objects
+// and calls RegisterTests() on each of them when asked.
+class ParameterizedTestCaseInfoBase {
+ public:
+  virtual ~ParameterizedTestCaseInfoBase() {}
+
+  // Base part of test case name for display purposes.
+  virtual const String& GetTestCaseName() const = 0;
+  // Test case id to verify identity.
+  virtual TypeId GetTestCaseTypeId() const = 0;
+  // UnitTest class invokes this method to register tests in this
+  // test case right before running them in RUN_ALL_TESTS macro.
+  // This method should not be called more then once on any single
+  // instance of a ParameterizedTestCaseInfoBase derived class.
+  virtual void RegisterTests() = 0;
+
+ protected:
+  ParameterizedTestCaseInfoBase() {}
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfoBase);
+};
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestCaseInfo accumulates tests obtained from TEST_P
+// macro invocations for a particular test case and generators
+// obtained from INSTANTIATE_TEST_CASE_P macro invocations for that
+// test case. It registers tests with all values generated by all
+// generators when asked.
+template <class TestCase>
+class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase {
+ public:
+  // ParamType and GeneratorCreationFunc are private types but are required
+  // for declarations of public methods AddTestPattern() and
+  // AddTestCaseInstantiation().
+  typedef typename TestCase::ParamType ParamType;
+  // A function that returns an instance of appropriate generator type.
+  typedef ParamGenerator<ParamType>(GeneratorCreationFunc)();
+
+  explicit ParameterizedTestCaseInfo(const char* name)
+      : test_case_name_(name) {}
+
+  // Test case base name for display purposes.
+  virtual const String& GetTestCaseName() const { return test_case_name_; }
+  // Test case id to verify identity.
+  virtual TypeId GetTestCaseTypeId() const { return GetTypeId<TestCase>(); }
+  // TEST_P macro uses AddTestPattern() to record information
+  // about a single test in a LocalTestInfo structure.
+  // test_case_name is the base name of the test case (without invocation
+  // prefix). test_base_name is the name of an individual test without
+  // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is
+  // test case base name and DoBar is test base name.
+  void AddTestPattern(const char* test_case_name,
+                      const char* test_base_name,
+                      TestMetaFactoryBase<ParamType>* meta_factory) {
+    tests_.push_back(linked_ptr<TestInfo>(new TestInfo(test_case_name,
+                                                       test_base_name,
+                                                       meta_factory)));
+  }
+  // INSTANTIATE_TEST_CASE_P macro uses AddGenerator() to record information
+  // about a generator.
+  int AddTestCaseInstantiation(const char* instantiation_name,
+                               GeneratorCreationFunc* func,
+                               const char* /* file */,
+                               int /* line */) {
+    instantiations_.push_back(::std::make_pair(instantiation_name, func));
+    return 0;  // Return value used only to run this method in namespace scope.
+  }
+  // UnitTest class invokes this method to register tests in this test case
+  // test cases right before running tests in RUN_ALL_TESTS macro.
+  // This method should not be called more then once on any single
+  // instance of a ParameterizedTestCaseInfoBase derived class.
+  // UnitTest has a guard to prevent from calling this method more then once.
+  virtual void RegisterTests() {
+    for (typename TestInfoContainer::iterator test_it = tests_.begin();
+         test_it != tests_.end(); ++test_it) {
+      linked_ptr<TestInfo> test_info = *test_it;
+      for (typename InstantiationContainer::iterator gen_it =
+               instantiations_.begin(); gen_it != instantiations_.end();
+               ++gen_it) {
+        const String& instantiation_name = gen_it->first;
+        ParamGenerator<ParamType> generator((*gen_it->second)());
+
+        Message test_case_name_stream;
+        if ( !instantiation_name.empty() )
+          test_case_name_stream << instantiation_name.c_str() << "/";
+        test_case_name_stream << test_info->test_case_base_name.c_str();
+
+        int i = 0;
+        for (typename ParamGenerator<ParamType>::iterator param_it =
+                 generator.begin();
+             param_it != generator.end(); ++param_it, ++i) {
+          Message test_name_stream;
+          test_name_stream << test_info->test_base_name.c_str() << "/" << i;
+          ::testing::internal::MakeAndRegisterTestInfo(
+              test_case_name_stream.GetString().c_str(),
+              test_name_stream.GetString().c_str(),
+              "",  // test_case_comment
+              "",  // comment; TODO(vladl@google.com): provide parameter value
+                   //                                  representation.
+              GetTestCaseTypeId(),
+              TestCase::SetUpTestCase,
+              TestCase::TearDownTestCase,
+              test_info->test_meta_factory->CreateTestFactory(*param_it));
+        }  // for param_it
+      }  // for gen_it
+    }  // for test_it
+  }  // RegisterTests
+
+ private:
+  // LocalTestInfo structure keeps information about a single test registered
+  // with TEST_P macro.
+  struct TestInfo {
+    TestInfo(const char* a_test_case_base_name,
+             const char* a_test_base_name,
+             TestMetaFactoryBase<ParamType>* a_test_meta_factory) :
+        test_case_base_name(a_test_case_base_name),
+        test_base_name(a_test_base_name),
+        test_meta_factory(a_test_meta_factory) {}
+
+    const String test_case_base_name;
+    const String test_base_name;
+    const scoped_ptr<TestMetaFactoryBase<ParamType> > test_meta_factory;
+  };
+  typedef ::std::vector<linked_ptr<TestInfo> > TestInfoContainer;
+  // Keeps pairs of <Instantiation name, Sequence generator creation function>
+  // received from INSTANTIATE_TEST_CASE_P macros.
+  typedef ::std::vector<std::pair<String, GeneratorCreationFunc*> >
+      InstantiationContainer;
+
+  const String test_case_name_;
+  TestInfoContainer tests_;
+  InstantiationContainer instantiations_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfo);
+};  // class ParameterizedTestCaseInfo
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// ParameterizedTestCaseRegistry contains a map of ParameterizedTestCaseInfoBase
+// classes accessed by test case names. TEST_P and INSTANTIATE_TEST_CASE_P
+// macros use it to locate their corresponding ParameterizedTestCaseInfo
+// descriptors.
+class ParameterizedTestCaseRegistry {
+ public:
+  ParameterizedTestCaseRegistry() {}
+  ~ParameterizedTestCaseRegistry() {
+    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
+         it != test_case_infos_.end(); ++it) {
+      delete *it;
+    }
+  }
+
+  // Looks up or creates and returns a structure containing information about
+  // tests and instantiations of a particular test case.
+  template <class TestCase>
+  ParameterizedTestCaseInfo<TestCase>* GetTestCasePatternHolder(
+      const char* test_case_name,
+      const char* file,
+      int line) {
+    ParameterizedTestCaseInfo<TestCase>* typed_test_info = NULL;
+    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
+         it != test_case_infos_.end(); ++it) {
+      if ((*it)->GetTestCaseName() == test_case_name) {
+        if ((*it)->GetTestCaseTypeId() != GetTypeId<TestCase>()) {
+          // Complain about incorrect usage of Google Test facilities
+          // and terminate the program since we cannot guaranty correct
+          // test case setup and tear-down in this case.
+          ReportInvalidTestCaseType(test_case_name,  file, line);
+          abort();
+        } else {
+          // At this point we are sure that the object we found is of the same
+          // type we are looking for, so we downcast it to that type
+          // without further checks.
+          typed_test_info = CheckedDowncastToActualType<
+              ParameterizedTestCaseInfo<TestCase> >(*it);
+        }
+        break;
+      }
+    }
+    if (typed_test_info == NULL) {
+      typed_test_info = new ParameterizedTestCaseInfo<TestCase>(test_case_name);
+      test_case_infos_.push_back(typed_test_info);
+    }
+    return typed_test_info;
+  }
+  void RegisterTests() {
+    for (TestCaseInfoContainer::iterator it = test_case_infos_.begin();
+         it != test_case_infos_.end(); ++it) {
+      (*it)->RegisterTests();
+    }
+  }
+
+ private:
+  typedef ::std::vector<ParameterizedTestCaseInfoBase*> TestCaseInfoContainer;
+
+  TestCaseInfoContainer test_case_infos_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseRegistry);
+};
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  //  GTEST_HAS_PARAM_TEST
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_
diff --git a/final/utils/unittest/googletest/include/gtest/internal/gtest-port.h b/final/utils/unittest/googletest/include/gtest/internal/gtest-port.h
new file mode 100644
index 00000000000..3d076eb44c7
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/internal/gtest-port.h
@@ -0,0 +1,1502 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: wan@google.com (Zhanyong Wan)
+//
+// Low-level types and utilities for porting Google Test to various
+// platforms.  They are subject to change without notice.  DO NOT USE
+// THEM IN USER CODE.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
+
+// The user can define the following macros in the build script to
+// control Google Test's behavior.  If the user doesn't define a macro
+// in this list, Google Test will define it.
+//
+//   GTEST_HAS_CLONE          - Define it to 1/0 to indicate that clone(2)
+//                              is/isn't available.
+//   GTEST_HAS_EXCEPTIONS     - Define it to 1/0 to indicate that exceptions
+//                              are enabled.
+//   GTEST_HAS_GLOBAL_STRING  - Define it to 1/0 to indicate that ::string
+//                              is/isn't available (some systems define
+//                              ::string, which is different to std::string).
+//   GTEST_HAS_GLOBAL_WSTRING - Define it to 1/0 to indicate that ::string
+//                              is/isn't available (some systems define
+//                              ::wstring, which is different to std::wstring).
+//   GTEST_HAS_PTHREAD        - Define it to 1/0 to indicate that <pthread.h>
+//                              is/isn't available.
+//   GTEST_HAS_RTTI           - Define it to 1/0 to indicate that RTTI is/isn't
+//                              enabled.
+//   GTEST_HAS_STD_WSTRING    - Define it to 1/0 to indicate that
+//                              std::wstring does/doesn't work (Google Test can
+//                              be used where std::wstring is unavailable).
+//   GTEST_HAS_TR1_TUPLE      - Define it to 1/0 to indicate tr1::tuple
+//                              is/isn't available.
+//   GTEST_HAS_SEH            - Define it to 1/0 to indicate whether the
+//                              compiler supports Microsoft's "Structured
+//                              Exception Handling".
+//   GTEST_USE_OWN_TR1_TUPLE  - Define it to 1/0 to indicate whether Google
+//                              Test's own tr1 tuple implementation should be
+//                              used.  Unused when the user sets
+//                              GTEST_HAS_TR1_TUPLE to 0.
+//   GTEST_LINKED_AS_SHARED_LIBRARY
+//                            - Define to 1 when compiling tests that use
+//                              Google Test as a shared library (known as
+//                              DLL on Windows).
+//   GTEST_CREATE_SHARED_LIBRARY
+//                            - Define to 1 when compiling Google Test itself
+//                              as a shared library.
+
+// This header defines the following utilities:
+//
+// Macros indicating the current platform (defined to 1 if compiled on
+// the given platform; otherwise undefined):
+//   GTEST_OS_AIX      - IBM AIX
+//   GTEST_OS_CYGWIN   - Cygwin
+//   GTEST_OS_HAIKU    - Haiku
+//   GTEST_OS_LINUX    - Linux
+//   GTEST_OS_MAC      - Mac OS X
+//   GTEST_OS_SOLARIS  - Sun Solaris
+//   GTEST_OS_SYMBIAN  - Symbian
+//   GTEST_OS_WINDOWS  - Windows (Desktop, MinGW, or Mobile)
+//     GTEST_OS_WINDOWS_DESKTOP  - Windows Desktop
+//     GTEST_OS_WINDOWS_MINGW    - MinGW
+//     GTEST_OS_WINDOWS_MOBILE   - Windows Mobile
+//   GTEST_OS_ZOS      - z/OS
+//
+// Among the platforms, Cygwin, Linux, Max OS X, and Windows have the
+// most stable support.  Since core members of the Google Test project
+// don't have access to other platforms, support for them may be less
+// stable.  If you notice any problems on your platform, please notify
+// googletestframework@googlegroups.com (patches for fixing them are
+// even more welcome!).
+//
+// Note that it is possible that none of the GTEST_OS_* macros are defined.
+//
+// Macros indicating available Google Test features (defined to 1 if
+// the corresponding feature is supported; otherwise undefined):
+//   GTEST_HAS_COMBINE      - the Combine() function (for value-parameterized
+//                            tests)
+//   GTEST_HAS_DEATH_TEST   - death tests
+//   GTEST_HAS_PARAM_TEST   - value-parameterized tests
+//   GTEST_HAS_TYPED_TEST   - typed tests
+//   GTEST_HAS_TYPED_TEST_P - type-parameterized tests
+//   GTEST_USES_POSIX_RE    - enhanced POSIX regex is used.
+//   GTEST_USES_SIMPLE_RE   - our own simple regex is used;
+//                            the above two are mutually exclusive.
+//   GTEST_CAN_COMPARE_NULL - accepts untyped NULL in EXPECT_EQ().
+//
+// Macros for basic C++ coding:
+//   GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning.
+//   GTEST_ATTRIBUTE_UNUSED_  - declares that a class' instances or a
+//                              variable don't have to be used.
+//   GTEST_DISALLOW_ASSIGN_   - disables operator=.
+//   GTEST_DISALLOW_COPY_AND_ASSIGN_ - disables copy ctor and operator=.
+//   GTEST_MUST_USE_RESULT_   - declares that a function's result must be used.
+//
+// Synchronization:
+//   Mutex, MutexLock, ThreadLocal, GetThreadCount()
+//                  - synchronization primitives.
+//   GTEST_IS_THREADSAFE - defined to 1 to indicate that the above
+//                         synchronization primitives have real implementations
+//                         and Google Test is thread-safe; or 0 otherwise.
+//
+// Template meta programming:
+//   is_pointer     - as in TR1; needed on Symbian and IBM XL C/C++ only.
+//
+// Smart pointers:
+//   scoped_ptr     - as in TR2.
+//
+// Regular expressions:
+//   RE             - a simple regular expression class using the POSIX
+//                    Extended Regular Expression syntax.  Not available on
+//                    Windows.
+//
+// Logging:
+//   GTEST_LOG_()   - logs messages at the specified severity level.
+//   LogToStderr()  - directs all log messages to stderr.
+//   FlushInfoLog() - flushes informational log messages.
+//
+// Stdout and stderr capturing:
+//   CaptureStdout()     - starts capturing stdout.
+//   GetCapturedStdout() - stops capturing stdout and returns the captured
+//                         string.
+//   CaptureStderr()     - starts capturing stderr.
+//   GetCapturedStderr() - stops capturing stderr and returns the captured
+//                         string.
+//
+// Integer types:
+//   TypeWithSize   - maps an integer to a int type.
+//   Int32, UInt32, Int64, UInt64, TimeInMillis
+//                  - integers of known sizes.
+//   BiggestInt     - the biggest signed integer type.
+//
+// Command-line utilities:
+//   GTEST_FLAG()       - references a flag.
+//   GTEST_DECLARE_*()  - declares a flag.
+//   GTEST_DEFINE_*()   - defines a flag.
+//   GetArgvs()         - returns the command line as a vector of strings.
+//
+// Environment variable utilities:
+//   GetEnv()             - gets the value of an environment variable.
+//   BoolFromGTestEnv()   - parses a bool environment variable.
+//   Int32FromGTestEnv()  - parses an Int32 environment variable.
+//   StringFromGTestEnv() - parses a string environment variable.
+
+#include <stddef.h>  // For ptrdiff_t
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#ifndef _WIN32_WCE
+#include <sys/stat.h>
+#endif  // !_WIN32_WCE
+
+#include <iostream>  // NOLINT
+#include <sstream>  // NOLINT
+#include <string>  // NOLINT
+
+#define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com"
+#define GTEST_FLAG_PREFIX_ "gtest_"
+#define GTEST_FLAG_PREFIX_DASH_ "gtest-"
+#define GTEST_FLAG_PREFIX_UPPER_ "GTEST_"
+#define GTEST_NAME_ "Google Test"
+#define GTEST_PROJECT_URL_ "http://code.google.com/p/googletest/"
+
+// Determines the version of gcc that is used to compile this.
+#ifdef __GNUC__
+// 40302 means version 4.3.2.
+#define GTEST_GCC_VER_ \
+    (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
+#endif  // __GNUC__
+
+// Determines the platform on which Google Test is compiled.
+#ifdef __CYGWIN__
+#define GTEST_OS_CYGWIN 1
+#elif defined __SYMBIAN32__
+#define GTEST_OS_SYMBIAN 1
+#elif defined _WIN32
+#define GTEST_OS_WINDOWS 1
+#ifdef _WIN32_WCE
+#define GTEST_OS_WINDOWS_MOBILE 1
+#elif defined(__MINGW__) || defined(__MINGW32__)
+#define GTEST_OS_WINDOWS_MINGW 1
+#else
+#define GTEST_OS_WINDOWS_DESKTOP 1
+#endif  // _WIN32_WCE
+#elif defined __APPLE__
+#define GTEST_OS_MAC 1
+#elif defined __linux__
+#define GTEST_OS_LINUX 1
+#elif defined __MVS__
+#define GTEST_OS_ZOS 1
+#elif defined(__sun) && defined(__SVR4)
+#define GTEST_OS_SOLARIS 1
+#elif defined(_AIX)
+#define GTEST_OS_AIX 1
+#elif defined(__HAIKU__)
+#define GTEST_OS_HAIKU 1
+#endif  // __CYGWIN__
+
+#if GTEST_OS_CYGWIN || GTEST_OS_HAIKU || GTEST_OS_LINUX || GTEST_OS_MAC || \
+    GTEST_OS_SYMBIAN || GTEST_OS_SOLARIS || GTEST_OS_AIX
+
+// On some platforms, <regex.h> needs someone to define size_t, and
+// won't compile otherwise.  We can #include it here as we already
+// included <stdlib.h>, which is guaranteed to define size_t through
+// <stddef.h>.
+#include <regex.h>  // NOLINT
+#include <strings.h>  // NOLINT
+#include <sys/types.h>  // NOLINT
+#include <time.h>  // NOLINT
+#include <unistd.h>  // NOLINT
+
+#define GTEST_USES_POSIX_RE 1
+
+#elif GTEST_OS_WINDOWS
+
+#if !GTEST_OS_WINDOWS_MOBILE
+#include <direct.h>  // NOLINT
+#include <io.h>  // NOLINT
+#endif
+
+// <regex.h> is not available on Windows.  Use our own simple regex
+// implementation instead.
+#define GTEST_USES_SIMPLE_RE 1
+
+#else
+
+// <regex.h> may not be available on this platform.  Use our own
+// simple regex implementation instead.
+#define GTEST_USES_SIMPLE_RE 1
+
+#endif  // GTEST_OS_CYGWIN || GTEST_OS_LINUX || GTEST_OS_MAC ||
+        // GTEST_OS_SYMBIAN || GTEST_OS_SOLARIS || GTEST_OS_AIX
+
+#ifndef GTEST_HAS_EXCEPTIONS
+// The user didn't tell us whether exceptions are enabled, so we need
+// to figure it out.
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+// MSVC's and C++Builder's implementations of the STL use the _HAS_EXCEPTIONS
+// macro to enable exceptions, so we'll do the same.
+// Assumes that exceptions are enabled by default.
+#ifndef _HAS_EXCEPTIONS
+#define _HAS_EXCEPTIONS 1
+#endif  // _HAS_EXCEPTIONS
+#define GTEST_HAS_EXCEPTIONS _HAS_EXCEPTIONS
+#elif defined(__GNUC__) && __EXCEPTIONS
+// gcc defines __EXCEPTIONS to 1 iff exceptions are enabled.
+#define GTEST_HAS_EXCEPTIONS 1
+#elif defined(__SUNPRO_CC)
+// Sun Pro CC supports exceptions.  However, there is no compile-time way of
+// detecting whether they are enabled or not.  Therefore, we assume that
+// they are enabled unless the user tells us otherwise.
+#define GTEST_HAS_EXCEPTIONS 1
+#elif defined(__IBMCPP__) && __EXCEPTIONS
+// xlC defines __EXCEPTIONS to 1 iff exceptions are enabled.
+#define GTEST_HAS_EXCEPTIONS 1
+#else
+// For other compilers, we assume exceptions are disabled to be
+// conservative.
+#define GTEST_HAS_EXCEPTIONS 0
+#endif  // defined(_MSC_VER) || defined(__BORLANDC__)
+#endif  // GTEST_HAS_EXCEPTIONS
+
+#if !defined(GTEST_HAS_STD_STRING)
+// Even though we don't use this macro any longer, we keep it in case
+// some clients still depend on it.
+#define GTEST_HAS_STD_STRING 1
+#elif !GTEST_HAS_STD_STRING
+// The user told us that ::std::string isn't available.
+#error "Google Test cannot be used where ::std::string isn't available."
+#endif  // !defined(GTEST_HAS_STD_STRING)
+
+#ifndef GTEST_HAS_GLOBAL_STRING
+// The user didn't tell us whether ::string is available, so we need
+// to figure it out.
+
+#define GTEST_HAS_GLOBAL_STRING 0
+
+#endif  // GTEST_HAS_GLOBAL_STRING
+
+#ifndef GTEST_HAS_STD_WSTRING
+// The user didn't tell us whether ::std::wstring is available, so we need
+// to figure it out.
+// TODO(wan@google.com): uses autoconf to detect whether ::std::wstring
+//   is available.
+
+// Cygwin 1.5 and below doesn't support ::std::wstring.
+// Cygwin 1.7 might add wstring support; this should be updated when clear.
+// Solaris' libc++ doesn't support it either.
+// Minix currently doesn't support it either.
+#define GTEST_HAS_STD_WSTRING (!(GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || GTEST_OS_HAIKU || defined(_MINIX)))
+
+#endif  // GTEST_HAS_STD_WSTRING
+
+#ifndef GTEST_HAS_GLOBAL_WSTRING
+// The user didn't tell us whether ::wstring is available, so we need
+// to figure it out.
+#define GTEST_HAS_GLOBAL_WSTRING \
+    (GTEST_HAS_STD_WSTRING && GTEST_HAS_GLOBAL_STRING)
+#endif  // GTEST_HAS_GLOBAL_WSTRING
+
+// Determines whether RTTI is available.
+#ifndef GTEST_HAS_RTTI
+// The user didn't tell us whether RTTI is enabled, so we need to
+// figure it out.
+
+#ifdef _MSC_VER
+
+#ifdef _CPPRTTI  // MSVC defines this macro iff RTTI is enabled.
+#define GTEST_HAS_RTTI 1
+#else
+#define GTEST_HAS_RTTI 0
+#endif
+
+// Starting with version 4.3.2, gcc defines __GXX_RTTI iff RTTI is enabled.
+#elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40302)
+
+#ifdef __GXX_RTTI
+#define GTEST_HAS_RTTI 1
+#else
+#define GTEST_HAS_RTTI 0
+#endif  // __GXX_RTTI
+
+// Starting with version 9.0 IBM Visual Age defines __RTTI_ALL__ to 1 if
+// both the typeid and dynamic_cast features are present.
+#elif defined(__IBMCPP__) && (__IBMCPP__ >= 900)
+
+#ifdef __RTTI_ALL__
+#define GTEST_HAS_RTTI 1
+#else
+#define GTEST_HAS_RTTI 0
+#endif
+
+#else
+
+// For all other compilers, we assume RTTI is enabled.
+#define GTEST_HAS_RTTI 1
+
+#endif  // _MSC_VER
+
+#endif  // GTEST_HAS_RTTI
+
+// It's this header's responsibility to #include <typeinfo> when RTTI
+// is enabled.
+#if GTEST_HAS_RTTI
+#include <typeinfo>
+#endif
+
+// Determines whether Google Test can use the pthreads library.
+#ifndef GTEST_HAS_PTHREAD
+// The user didn't tell us explicitly, so we assume pthreads support is
+// available on Linux and Mac.
+//
+// To disable threading support in Google Test, add -DGTEST_HAS_PTHREAD=0
+// to your compiler flags.
+#define GTEST_HAS_PTHREAD (GTEST_OS_LINUX || GTEST_OS_MAC)
+#endif  // GTEST_HAS_PTHREAD
+
+// Determines whether Google Test can use tr1/tuple.  You can define
+// this macro to 0 to prevent Google Test from using tuple (any
+// feature depending on tuple with be disabled in this mode).
+#ifndef GTEST_HAS_TR1_TUPLE
+// The user didn't tell us not to do it, so we assume it's OK.
+#define GTEST_HAS_TR1_TUPLE 1
+#endif  // GTEST_HAS_TR1_TUPLE
+
+// Determines whether Google Test's own tr1 tuple implementation
+// should be used.
+#ifndef GTEST_USE_OWN_TR1_TUPLE
+// The user didn't tell us, so we need to figure it out.
+
+// We use our own TR1 tuple if we aren't sure the user has an
+// implementation of it already.  At this time, GCC 4.0.0+ and MSVC
+// 2010 are the only mainstream compilers that come with a TR1 tuple
+// implementation.  NVIDIA's CUDA NVCC compiler pretends to be GCC by
+// defining __GNUC__ and friends, but cannot compile GCC's tuple
+// implementation.  MSVC 2008 (9.0) provides TR1 tuple in a 323 MB
+// Feature Pack download, which we cannot assume the user has.
+#if (defined(__GNUC__) && !(defined(__CUDACC__) || defined(__clang__)) \
+                       && (GTEST_GCC_VER_ >= 40000)) \
+    || _MSC_VER >= 1600
+#define GTEST_USE_OWN_TR1_TUPLE 0
+#else
+#define GTEST_USE_OWN_TR1_TUPLE 1
+#endif
+
+#endif  // GTEST_USE_OWN_TR1_TUPLE
+
+// To avoid conditional compilation everywhere, we make it
+// gtest-port.h's responsibility to #include the header implementing
+// tr1/tuple.
+#if GTEST_HAS_TR1_TUPLE
+
+#if GTEST_USE_OWN_TR1_TUPLE
+#include <gtest/internal/gtest-tuple.h>
+#elif GTEST_OS_SYMBIAN
+
+// On Symbian, BOOST_HAS_TR1_TUPLE causes Boost's TR1 tuple library to
+// use STLport's tuple implementation, which unfortunately doesn't
+// work as the copy of STLport distributed with Symbian is incomplete.
+// By making sure BOOST_HAS_TR1_TUPLE is undefined, we force Boost to
+// use its own tuple implementation.
+#ifdef BOOST_HAS_TR1_TUPLE
+#undef BOOST_HAS_TR1_TUPLE
+#endif  // BOOST_HAS_TR1_TUPLE
+
+// This prevents <boost/tr1/detail/config.hpp>, which defines
+// BOOST_HAS_TR1_TUPLE, from being #included by Boost's <tuple>.
+#define BOOST_TR1_DETAIL_CONFIG_HPP_INCLUDED
+#include <tuple>
+
+#elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40000)
+// GCC 4.0+ implements tr1/tuple in the <tr1/tuple> header.  This does
+// not conform to the TR1 spec, which requires the header to be <tuple>.
+
+#if !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302
+// Until version 4.3.2, gcc has a bug that causes <tr1/functional>,
+// which is #included by <tr1/tuple>, to not compile when RTTI is
+// disabled.  _TR1_FUNCTIONAL is the header guard for
+// <tr1/functional>.  Hence the following #define is a hack to prevent
+// <tr1/functional> from being included.
+#define _TR1_FUNCTIONAL 1
+#include <tr1/tuple>
+#undef _TR1_FUNCTIONAL  // Allows the user to #include
+                        // <tr1/functional> if he chooses to.
+#else
+#include <tr1/tuple>  // NOLINT
+#endif  // !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302
+
+#else
+// If the compiler is not GCC 4.0+, we assume the user is using a
+// spec-conforming TR1 implementation.
+#include <tuple>  // NOLINT
+#endif  // GTEST_USE_OWN_TR1_TUPLE
+
+#endif  // GTEST_HAS_TR1_TUPLE
+
+// Determines whether clone(2) is supported.
+// Usually it will only be available on Linux, excluding
+// Linux on the Itanium architecture.
+// Also see http://linux.die.net/man/2/clone.
+#ifndef GTEST_HAS_CLONE
+// The user didn't tell us, so we need to figure it out.
+
+#if GTEST_OS_LINUX && !defined(__ia64__)
+#define GTEST_HAS_CLONE 1
+#else
+#define GTEST_HAS_CLONE 0
+#endif  // GTEST_OS_LINUX && !defined(__ia64__)
+
+#endif  // GTEST_HAS_CLONE
+
+// Determines whether to support stream redirection. This is used to test
+// output correctness and to implement death tests.
+#if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_SYMBIAN
+#define GTEST_HAS_STREAM_REDIRECTION_ 1
+#endif  // !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_SYMBIAN
+
+// Determines whether to support death tests.
+// Google Test does not support death tests for VC 7.1 and earlier as
+// abort() in a VC 7.1 application compiled as GUI in debug config
+// pops up a dialog window that cannot be suppressed programmatically.
+#if (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \
+     (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER >= 1400) || \
+     GTEST_OS_WINDOWS_MINGW || GTEST_OS_AIX)
+#define GTEST_HAS_DEATH_TEST 1
+#include <vector>  // NOLINT
+#endif
+
+// We don't support MSVC 7.1 with exceptions disabled now.  Therefore
+// all the compilers we care about are adequate for supporting
+// value-parameterized tests.
+#define GTEST_HAS_PARAM_TEST 1
+
+// Determines whether to support type-driven tests.
+
+// Typed tests need <typeinfo> and variadic macros, which GCC, VC++ 8.0,
+// Sun Pro CC, and IBM Visual Age support.
+#if defined(__GNUC__) || (_MSC_VER >= 1400) || defined(__SUNPRO_CC) || \
+    defined(__IBMCPP__)
+#define GTEST_HAS_TYPED_TEST 1
+#define GTEST_HAS_TYPED_TEST_P 1
+#endif
+
+// Determines whether to support Combine(). This only makes sense when
+// value-parameterized tests are enabled.  The implementation doesn't
+// work on Sun Studio since it doesn't understand templated conversion
+// operators.
+#if GTEST_HAS_PARAM_TEST && GTEST_HAS_TR1_TUPLE && !defined(__SUNPRO_CC)
+#define GTEST_HAS_COMBINE 1
+#endif
+
+// Determines whether the system compiler uses UTF-16 for encoding wide strings.
+#define GTEST_WIDE_STRING_USES_UTF16_ \
+    (GTEST_OS_WINDOWS || GTEST_OS_CYGWIN || GTEST_OS_SYMBIAN || GTEST_OS_AIX)
+
+// Defines some utility macros.
+
+// The GNU compiler emits a warning if nested "if" statements are followed by
+// an "else" statement and braces are not used to explicitly disambiguate the
+// "else" binding.  This leads to problems with code like:
+//
+//   if (gate)
+//     ASSERT_*(condition) << "Some message";
+//
+// The "switch (0) case 0:" idiom is used to suppress this.
+#ifdef __INTEL_COMPILER
+#define GTEST_AMBIGUOUS_ELSE_BLOCKER_
+#else
+#define GTEST_AMBIGUOUS_ELSE_BLOCKER_ switch (0) case 0:  // NOLINT
+#endif
+
+// Use this annotation at the end of a struct/class definition to
+// prevent the compiler from optimizing away instances that are never
+// used.  This is useful when all interesting logic happens inside the
+// c'tor and / or d'tor.  Example:
+//
+//   struct Foo {
+//     Foo() { ... }
+//   } GTEST_ATTRIBUTE_UNUSED_;
+//
+// Also use it after a variable or parameter declaration to tell the
+// compiler the variable/parameter does not have to be used.
+#if defined(__GNUC__) && !defined(COMPILER_ICC)
+#define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused))
+#else
+#define GTEST_ATTRIBUTE_UNUSED_
+#endif
+
+// A macro to disallow operator=
+// This should be used in the private: declarations for a class.
+#define GTEST_DISALLOW_ASSIGN_(type)\
+  void operator=(type const &)
+
+// A macro to disallow copy constructor and operator=
+// This should be used in the private: declarations for a class.
+#define GTEST_DISALLOW_COPY_AND_ASSIGN_(type)\
+  type(type const &);\
+  GTEST_DISALLOW_ASSIGN_(type)
+
+// Tell the compiler to warn about unused return values for functions declared
+// with this macro.  The macro should be used on function declarations
+// following the argument list:
+//
+//   Sprocket* AllocateSprocket() GTEST_MUST_USE_RESULT_;
+#if defined(__GNUC__) && (GTEST_GCC_VER_ >= 30400) && !defined(COMPILER_ICC)
+#define GTEST_MUST_USE_RESULT_ __attribute__ ((warn_unused_result))
+#else
+#define GTEST_MUST_USE_RESULT_
+#endif  // __GNUC__ && (GTEST_GCC_VER_ >= 30400) && !COMPILER_ICC
+
+// Determine whether the compiler supports Microsoft's Structured Exception
+// Handling.  This is supported by several Windows compilers but generally
+// does not exist on any other system.
+#ifndef GTEST_HAS_SEH
+// The user didn't tell us, so we need to figure it out.
+
+#if defined(_MSC_VER) || defined(__BORLANDC__)
+// These two compilers are known to support SEH.
+#define GTEST_HAS_SEH 1
+#else
+// Assume no SEH.
+#define GTEST_HAS_SEH 0
+#endif
+
+#endif  // GTEST_HAS_SEH
+
+#ifdef _MSC_VER
+
+#if GTEST_LINKED_AS_SHARED_LIBRARY
+#define GTEST_API_ __declspec(dllimport)
+#elif GTEST_CREATE_SHARED_LIBRARY
+#define GTEST_API_ __declspec(dllexport)
+#endif
+
+#endif  // _MSC_VER
+
+#ifndef GTEST_API_
+#define GTEST_API_
+#endif
+
+namespace testing {
+
+class Message;
+
+namespace internal {
+
+class String;
+
+typedef ::std::stringstream StrStream;
+
+// A helper for suppressing warnings on constant condition.  It just
+// returns 'condition'.
+GTEST_API_ bool IsTrue(bool condition);
+
+// Defines scoped_ptr.
+
+// This implementation of scoped_ptr is PARTIAL - it only contains
+// enough stuff to satisfy Google Test's need.
+template <typename T>
+class scoped_ptr {
+ public:
+  typedef T element_type;
+
+  explicit scoped_ptr(T* p = NULL) : ptr_(p) {}
+  ~scoped_ptr() { reset(); }
+
+  T& operator*() const { return *ptr_; }
+  T* operator->() const { return ptr_; }
+  T* get() const { return ptr_; }
+
+  T* release() {
+    T* const ptr = ptr_;
+    ptr_ = NULL;
+    return ptr;
+  }
+
+  void reset(T* p = NULL) {
+    if (p != ptr_) {
+      if (IsTrue(sizeof(T) > 0)) {  // Makes sure T is a complete type.
+        delete ptr_;
+      }
+      ptr_ = p;
+    }
+  }
+ private:
+  T* ptr_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(scoped_ptr);
+};
+
+// Defines RE.
+
+// A simple C++ wrapper for <regex.h>.  It uses the POSIX Extended
+// Regular Expression syntax.
+class GTEST_API_ RE {
+ public:
+  // A copy constructor is required by the Standard to initialize object
+  // references from r-values.
+  RE(const RE& other) { Init(other.pattern()); }
+
+  // Constructs an RE from a string.
+  RE(const ::std::string& regex) { Init(regex.c_str()); }  // NOLINT
+
+#if GTEST_HAS_GLOBAL_STRING
+  RE(const ::string& regex) { Init(regex.c_str()); }  // NOLINT
+#endif  // GTEST_HAS_GLOBAL_STRING
+
+  RE(const char* regex) { Init(regex); }  // NOLINT
+  ~RE();
+
+  // Returns the string representation of the regex.
+  const char* pattern() const { return pattern_; }
+
+  // FullMatch(str, re) returns true iff regular expression re matches
+  // the entire str.
+  // PartialMatch(str, re) returns true iff regular expression re
+  // matches a substring of str (including str itself).
+  //
+  // TODO(wan@google.com): make FullMatch() and PartialMatch() work
+  // when str contains NUL characters.
+  static bool FullMatch(const ::std::string& str, const RE& re) {
+    return FullMatch(str.c_str(), re);
+  }
+  static bool PartialMatch(const ::std::string& str, const RE& re) {
+    return PartialMatch(str.c_str(), re);
+  }
+
+#if GTEST_HAS_GLOBAL_STRING
+  static bool FullMatch(const ::string& str, const RE& re) {
+    return FullMatch(str.c_str(), re);
+  }
+  static bool PartialMatch(const ::string& str, const RE& re) {
+    return PartialMatch(str.c_str(), re);
+  }
+#endif  // GTEST_HAS_GLOBAL_STRING
+
+  static bool FullMatch(const char* str, const RE& re);
+  static bool PartialMatch(const char* str, const RE& re);
+
+ private:
+  void Init(const char* regex);
+
+  // We use a const char* instead of a string, as Google Test may be used
+  // where string is not available.  We also do not use Google Test's own
+  // String type here, in order to simplify dependencies between the
+  // files.
+  const char* pattern_;
+  bool is_valid_;
+#if GTEST_USES_POSIX_RE
+  regex_t full_regex_;     // For FullMatch().
+  regex_t partial_regex_;  // For PartialMatch().
+#else  // GTEST_USES_SIMPLE_RE
+  const char* full_pattern_;  // For FullMatch();
+#endif
+
+  GTEST_DISALLOW_ASSIGN_(RE);
+};
+
+// Defines logging utilities:
+//   GTEST_LOG_(severity) - logs messages at the specified severity level. The
+//                          message itself is streamed into the macro.
+//   LogToStderr()  - directs all log messages to stderr.
+//   FlushInfoLog() - flushes informational log messages.
+
+enum GTestLogSeverity {
+  GTEST_INFO,
+  GTEST_WARNING,
+  GTEST_ERROR,
+  GTEST_FATAL
+};
+
+// Formats log entry severity, provides a stream object for streaming the
+// log message, and terminates the message with a newline when going out of
+// scope.
+class GTEST_API_ GTestLog {
+ public:
+  GTestLog(GTestLogSeverity severity, const char* file, int line);
+
+  // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program.
+  ~GTestLog();
+
+  ::std::ostream& GetStream() { return ::std::cerr; }
+
+ private:
+  const GTestLogSeverity severity_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestLog);
+};
+
+#define GTEST_LOG_(severity) \
+    ::testing::internal::GTestLog(::testing::internal::GTEST_##severity, \
+                                  __FILE__, __LINE__).GetStream()
+
+inline void LogToStderr() {}
+inline void FlushInfoLog() { fflush(NULL); }
+
+// INTERNAL IMPLEMENTATION - DO NOT USE.
+//
+// GTEST_CHECK_ is an all-mode assert. It aborts the program if the condition
+// is not satisfied.
+//  Synopsys:
+//    GTEST_CHECK_(boolean_condition);
+//     or
+//    GTEST_CHECK_(boolean_condition) << "Additional message";
+//
+//    This checks the condition and if the condition is not satisfied
+//    it prints message about the condition violation, including the
+//    condition itself, plus additional message streamed into it, if any,
+//    and then it aborts the program. It aborts the program irrespective of
+//    whether it is built in the debug mode or not.
+#define GTEST_CHECK_(condition) \
+    GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
+    if (::testing::internal::IsTrue(condition)) \
+      ; \
+    else \
+      GTEST_LOG_(FATAL) << "Condition " #condition " failed. "
+
+// An all-mode assert to verify that the given POSIX-style function
+// call returns 0 (indicating success).  Known limitation: this
+// doesn't expand to a balanced 'if' statement, so enclose the macro
+// in {} if you need to use it as the only statement in an 'if'
+// branch.
+#define GTEST_CHECK_POSIX_SUCCESS_(posix_call) \
+  if (const int gtest_error = (posix_call)) \
+    GTEST_LOG_(FATAL) << #posix_call << "failed with error " \
+                      << gtest_error
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
+// Downcasts the pointer of type Base to Derived.
+// Derived must be a subclass of Base. The parameter MUST
+// point to a class of type Derived, not any subclass of it.
+// When RTTI is available, the function performs a runtime
+// check to enforce this.
+template <class Derived, class Base>
+Derived* CheckedDowncastToActualType(Base* base) {
+#if GTEST_HAS_RTTI
+  GTEST_CHECK_(typeid(*base) == typeid(Derived));
+  return dynamic_cast<Derived*>(base);  // NOLINT
+#else
+  return static_cast<Derived*>(base);  // Poor man's downcast.
+#endif
+}
+
+#if GTEST_HAS_STREAM_REDIRECTION_
+
+// Defines the stderr capturer:
+//   CaptureStdout     - starts capturing stdout.
+//   GetCapturedStdout - stops capturing stdout and returns the captured string.
+//   CaptureStderr     - starts capturing stderr.
+//   GetCapturedStderr - stops capturing stderr and returns the captured string.
+//
+GTEST_API_ void CaptureStdout();
+GTEST_API_ String GetCapturedStdout();
+GTEST_API_ void CaptureStderr();
+GTEST_API_ String GetCapturedStderr();
+
+#endif  // GTEST_HAS_STREAM_REDIRECTION_
+
+
+#if GTEST_HAS_DEATH_TEST
+
+// A copy of all command line arguments.  Set by InitGoogleTest().
+extern ::std::vector<String> g_argvs;
+
+// GTEST_HAS_DEATH_TEST implies we have ::std::string.
+const ::std::vector<String>& GetArgvs();
+
+#endif  // GTEST_HAS_DEATH_TEST
+
+// Defines synchronization primitives.
+
+#if GTEST_HAS_PTHREAD
+
+// Sleeps for (roughly) n milli-seconds.  This function is only for
+// testing Google Test's own constructs.  Don't use it in user tests,
+// either directly or indirectly.
+inline void SleepMilliseconds(int n) {
+  const timespec time = {
+    0,                  // 0 seconds.
+    n * 1000L * 1000L,  // And n ms.
+  };
+  nanosleep(&time, NULL);
+}
+
+// Allows a controller thread to pause execution of newly created
+// threads until notified.  Instances of this class must be created
+// and destroyed in the controller thread.
+//
+// This class is only for testing Google Test's own constructs. Do not
+// use it in user tests, either directly or indirectly.
+class Notification {
+ public:
+  Notification() : notified_(false) {}
+
+  // Notifies all threads created with this notification to start. Must
+  // be called from the controller thread.
+  void Notify() { notified_ = true; }
+
+  // Blocks until the controller thread notifies. Must be called from a test
+  // thread.
+  void WaitForNotification() {
+    while(!notified_) {
+      SleepMilliseconds(10);
+    }
+  }
+
+ private:
+  volatile bool notified_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification);
+};
+
+// As a C-function, ThreadFuncWithCLinkage cannot be templated itself.
+// Consequently, it cannot select a correct instantiation of ThreadWithParam
+// in order to call its Run(). Introducing ThreadWithParamBase as a
+// non-templated base class for ThreadWithParam allows us to bypass this
+// problem.
+class ThreadWithParamBase {
+ public:
+  virtual ~ThreadWithParamBase() {}
+  virtual void Run() = 0;
+};
+
+// pthread_create() accepts a pointer to a function type with the C linkage.
+// According to the Standard (7.5/1), function types with different linkages
+// are different even if they are otherwise identical.  Some compilers (for
+// example, SunStudio) treat them as different types.  Since class methods
+// cannot be defined with C-linkage we need to define a free C-function to
+// pass into pthread_create().
+extern "C" inline void* ThreadFuncWithCLinkage(void* thread) {
+  static_cast<ThreadWithParamBase*>(thread)->Run();
+  return NULL;
+}
+
+// Helper class for testing Google Test's multi-threading constructs.
+// To use it, write:
+//
+//   void ThreadFunc(int param) { /* Do things with param */ }
+//   Notification thread_can_start;
+//   ...
+//   // The thread_can_start parameter is optional; you can supply NULL.
+//   ThreadWithParam<int> thread(&ThreadFunc, 5, &thread_can_start);
+//   thread_can_start.Notify();
+//
+// These classes are only for testing Google Test's own constructs. Do
+// not use them in user tests, either directly or indirectly.
+template <typename T>
+class ThreadWithParam : public ThreadWithParamBase {
+ public:
+  typedef void (*UserThreadFunc)(T);
+
+  ThreadWithParam(
+      UserThreadFunc func, T param, Notification* thread_can_start)
+      : func_(func),
+        param_(param),
+        thread_can_start_(thread_can_start),
+        finished_(false) {
+    ThreadWithParamBase* const base = this;
+    // The thread can be created only after all fields except thread_
+    // have been initialized.
+    GTEST_CHECK_POSIX_SUCCESS_(
+        pthread_create(&thread_, 0, &ThreadFuncWithCLinkage, base));
+  }
+  ~ThreadWithParam() { Join(); }
+
+  void Join() {
+    if (!finished_) {
+      GTEST_CHECK_POSIX_SUCCESS_(pthread_join(thread_, 0));
+      finished_ = true;
+    }
+  }
+
+  virtual void Run() {
+    if (thread_can_start_ != NULL)
+      thread_can_start_->WaitForNotification();
+    func_(param_);
+  }
+
+ private:
+  const UserThreadFunc func_;  // User-supplied thread function.
+  const T param_;  // User-supplied parameter to the thread function.
+  // When non-NULL, used to block execution until the controller thread
+  // notifies.
+  Notification* const thread_can_start_;
+  bool finished_;  // true iff we know that the thread function has finished.
+  pthread_t thread_;  // The native thread object.
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam);
+};
+
+// gtest-port.h guarantees to #include <pthread.h> when GTEST_HAS_PTHREAD is
+// true.
+#include <pthread.h>
+
+// MutexBase and Mutex implement mutex on pthreads-based platforms. They
+// are used in conjunction with class MutexLock:
+//
+//   Mutex mutex;
+//   ...
+//   MutexLock lock(&mutex);  // Acquires the mutex and releases it at the end
+//                            // of the current scope.
+//
+// MutexBase implements behavior for both statically and dynamically
+// allocated mutexes.  Do not use MutexBase directly.  Instead, write
+// the following to define a static mutex:
+//
+//   GTEST_DEFINE_STATIC_MUTEX_(g_some_mutex);
+//
+// You can forward declare a static mutex like this:
+//
+//   GTEST_DECLARE_STATIC_MUTEX_(g_some_mutex);
+//
+// To create a dynamic mutex, just define an object of type Mutex.
+class MutexBase {
+ public:
+  // Acquires this mutex.
+  void Lock() {
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&mutex_));
+    owner_ = pthread_self();
+  }
+
+  // Releases this mutex.
+  void Unlock() {
+    // We don't protect writing to owner_ here, as it's the caller's
+    // responsibility to ensure that the current thread holds the
+    // mutex when this is called.
+    owner_ = 0;
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&mutex_));
+  }
+
+  // Does nothing if the current thread holds the mutex. Otherwise, crashes
+  // with high probability.
+  void AssertHeld() const {
+    GTEST_CHECK_(owner_ == pthread_self())
+        << "The current thread is not holding the mutex @" << this;
+  }
+
+  // A static mutex may be used before main() is entered.  It may even
+  // be used before the dynamic initialization stage.  Therefore we
+  // must be able to initialize a static mutex object at link time.
+  // This means MutexBase has to be a POD and its member variables
+  // have to be public.
+ public:
+  pthread_mutex_t mutex_;  // The underlying pthread mutex.
+  pthread_t owner_;  // The thread holding the mutex; 0 means no one holds it.
+};
+
+// Forward-declares a static mutex.
+#define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
+    extern ::testing::internal::MutexBase mutex
+
+// Defines and statically (i.e. at link time) initializes a static mutex.
+#define GTEST_DEFINE_STATIC_MUTEX_(mutex) \
+    ::testing::internal::MutexBase mutex = { PTHREAD_MUTEX_INITIALIZER, 0 }
+
+// The Mutex class can only be used for mutexes created at runtime. It
+// shares its API with MutexBase otherwise.
+class Mutex : public MutexBase {
+ public:
+  Mutex() {
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL));
+    owner_ = 0;
+  }
+  ~Mutex() {
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&mutex_));
+  }
+
+ private:
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex);
+};
+
+// We cannot name this class MutexLock as the ctor declaration would
+// conflict with a macro named MutexLock, which is defined on some
+// platforms.  Hence the typedef trick below.
+class GTestMutexLock {
+ public:
+  explicit GTestMutexLock(MutexBase* mutex)
+      : mutex_(mutex) { mutex_->Lock(); }
+
+  ~GTestMutexLock() { mutex_->Unlock(); }
+
+ private:
+  MutexBase* const mutex_;
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock);
+};
+
+typedef GTestMutexLock MutexLock;
+
+// Helpers for ThreadLocal.
+
+// pthread_key_create() requires DeleteThreadLocalValue() to have
+// C-linkage.  Therefore it cannot be templatized to access
+// ThreadLocal<T>.  Hence the need for class
+// ThreadLocalValueHolderBase.
+class ThreadLocalValueHolderBase {
+ public:
+  virtual ~ThreadLocalValueHolderBase() {}
+};
+
+// Called by pthread to delete thread-local data stored by
+// pthread_setspecific().
+extern "C" inline void DeleteThreadLocalValue(void* value_holder) {
+  delete static_cast<ThreadLocalValueHolderBase*>(value_holder);
+}
+
+// Implements thread-local storage on pthreads-based systems.
+//
+//   // Thread 1
+//   ThreadLocal<int> tl(100);  // 100 is the default value for each thread.
+//
+//   // Thread 2
+//   tl.set(150);  // Changes the value for thread 2 only.
+//   EXPECT_EQ(150, tl.get());
+//
+//   // Thread 1
+//   EXPECT_EQ(100, tl.get());  // In thread 1, tl has the original value.
+//   tl.set(200);
+//   EXPECT_EQ(200, tl.get());
+//
+// The template type argument T must have a public copy constructor.
+// In addition, the default ThreadLocal constructor requires T to have
+// a public default constructor.
+//
+// An object managed for a thread by a ThreadLocal instance is deleted
+// when the thread exits.  Or, if the ThreadLocal instance dies in
+// that thread, when the ThreadLocal dies.  It's the user's
+// responsibility to ensure that all other threads using a ThreadLocal
+// have exited when it dies, or the per-thread objects for those
+// threads will not be deleted.
+//
+// Google Test only uses global ThreadLocal objects.  That means they
+// will die after main() has returned.  Therefore, no per-thread
+// object managed by Google Test will be leaked as long as all threads
+// using Google Test have exited when main() returns.
+template <typename T>
+class ThreadLocal {
+ public:
+  ThreadLocal() : key_(CreateKey()),
+                  default_() {}
+  explicit ThreadLocal(const T& value) : key_(CreateKey()),
+                                         default_(value) {}
+
+  ~ThreadLocal() {
+    // Destroys the managed object for the current thread, if any.
+    DeleteThreadLocalValue(pthread_getspecific(key_));
+
+    // Releases resources associated with the key.  This will *not*
+    // delete managed objects for other threads.
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_key_delete(key_));
+  }
+
+  T* pointer() { return GetOrCreateValue(); }
+  const T* pointer() const { return GetOrCreateValue(); }
+  const T& get() const { return *pointer(); }
+  void set(const T& value) { *pointer() = value; }
+
+ private:
+  // Holds a value of type T.
+  class ValueHolder : public ThreadLocalValueHolderBase {
+   public:
+    explicit ValueHolder(const T& value) : value_(value) {}
+
+    T* pointer() { return &value_; }
+
+   private:
+    T value_;
+    GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder);
+  };
+
+  static pthread_key_t CreateKey() {
+    pthread_key_t key;
+    // When a thread exits, DeleteThreadLocalValue() will be called on
+    // the object managed for that thread.
+    GTEST_CHECK_POSIX_SUCCESS_(
+        pthread_key_create(&key, &DeleteThreadLocalValue));
+    return key;
+  }
+
+  T* GetOrCreateValue() const {
+    ThreadLocalValueHolderBase* const holder =
+        static_cast<ThreadLocalValueHolderBase*>(pthread_getspecific(key_));
+    if (holder != NULL) {
+      return CheckedDowncastToActualType<ValueHolder>(holder)->pointer();
+    }
+
+    ValueHolder* const new_holder = new ValueHolder(default_);
+    ThreadLocalValueHolderBase* const holder_base = new_holder;
+    GTEST_CHECK_POSIX_SUCCESS_(pthread_setspecific(key_, holder_base));
+    return new_holder->pointer();
+  }
+
+  // A key pthreads uses for looking up per-thread values.
+  const pthread_key_t key_;
+  const T default_;  // The default value for each thread.
+
+  GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal);
+};
+
+#define GTEST_IS_THREADSAFE 1
+
+#else  // GTEST_HAS_PTHREAD
+
+// A dummy implementation of synchronization primitives (mutex, lock,
+// and thread-local variable).  Necessary for compiling Google Test where
+// mutex is not supported - using Google Test in multiple threads is not
+// supported on such platforms.
+
+class Mutex {
+ public:
+  Mutex() {}
+  void AssertHeld() const {}
+};
+
+#define GTEST_DECLARE_STATIC_MUTEX_(mutex) \
+  extern ::testing::internal::Mutex mutex
+
+#define GTEST_DEFINE_STATIC_MUTEX_(mutex) ::testing::internal::Mutex mutex
+
+class GTestMutexLock {
+ public:
+  explicit GTestMutexLock(Mutex*) {}  // NOLINT
+};
+
+typedef GTestMutexLock MutexLock;
+
+template <typename T>
+class ThreadLocal {
+ public:
+  ThreadLocal() : value_() {}
+  explicit ThreadLocal(const T& value) : value_(value) {}
+  T* pointer() { return &value_; }
+  const T* pointer() const { return &value_; }
+  const T& get() const { return value_; }
+  void set(const T& value) { value_ = value; }
+ private:
+  T value_;
+};
+
+// The above synchronization primitives have dummy implementations.
+// Therefore Google Test is not thread-safe.
+#define GTEST_IS_THREADSAFE 0
+
+#endif  // GTEST_HAS_PTHREAD
+
+// Returns the number of threads running in the process, or 0 to indicate that
+// we cannot detect it.
+GTEST_API_ size_t GetThreadCount();
+
+// Passing non-POD classes through ellipsis (...) crashes the ARM
+// compiler and generates a warning in Sun Studio.  The Nokia Symbian
+// and the IBM XL C/C++ compiler try to instantiate a copy constructor
+// for objects passed through ellipsis (...), failing for uncopyable
+// objects.  We define this to ensure that only POD is passed through
+// ellipsis on these systems.
+#if defined(__SYMBIAN32__) || defined(__IBMCPP__) || defined(__SUNPRO_CC)
+// We lose support for NULL detection where the compiler doesn't like
+// passing non-POD classes through ellipsis (...).
+#define GTEST_ELLIPSIS_NEEDS_POD_ 1
+#else
+#define GTEST_CAN_COMPARE_NULL 1
+#endif
+
+// The Nokia Symbian and IBM XL C/C++ compilers cannot decide between
+// const T& and const T* in a function template.  These compilers
+// _can_ decide between class template specializations for T and T*,
+// so a tr1::type_traits-like is_pointer works.
+#if defined(__SYMBIAN32__) || defined(__IBMCPP__)
+#define GTEST_NEEDS_IS_POINTER_ 1
+#endif
+
+template <bool bool_value>
+struct bool_constant {
+  typedef bool_constant<bool_value> type;
+  static const bool value = bool_value;
+};
+template <bool bool_value> const bool bool_constant<bool_value>::value;
+
+typedef bool_constant<false> false_type;
+typedef bool_constant<true> true_type;
+
+template <typename T>
+struct is_pointer : public false_type {};
+
+template <typename T>
+struct is_pointer<T*> : public true_type {};
+
+#if GTEST_OS_WINDOWS
+#define GTEST_PATH_SEP_ "\\"
+#define GTEST_HAS_ALT_PATH_SEP_ 1
+// The biggest signed integer type the compiler supports.
+typedef __int64 BiggestInt;
+#else
+#define GTEST_PATH_SEP_ "/"
+#define GTEST_HAS_ALT_PATH_SEP_ 0
+typedef long long BiggestInt;  // NOLINT
+#endif  // GTEST_OS_WINDOWS
+
+// The testing::internal::posix namespace holds wrappers for common
+// POSIX functions.  These wrappers hide the differences between
+// Windows/MSVC and POSIX systems.  Since some compilers define these
+// standard functions as macros, the wrapper cannot have the same name
+// as the wrapped function.
+
+namespace posix {
+
+// Functions with a different name on Windows.
+
+#if GTEST_OS_WINDOWS
+
+typedef struct _stat StatStruct;
+
+#ifdef __BORLANDC__
+inline int IsATTY(int fd) { return isatty(fd); }
+inline int StrCaseCmp(const char* s1, const char* s2) {
+  return stricmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return strdup(src); }
+#else  // !__BORLANDC__
+#if GTEST_OS_WINDOWS_MOBILE
+inline int IsATTY(int /* fd */) { return 0; }
+#else
+inline int IsATTY(int fd) { return _isatty(fd); }
+#endif  // GTEST_OS_WINDOWS_MOBILE
+inline int StrCaseCmp(const char* s1, const char* s2) {
+  return _stricmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return _strdup(src); }
+#endif  // __BORLANDC__
+
+#if GTEST_OS_WINDOWS_MOBILE
+inline int FileNo(FILE* file) { return reinterpret_cast<int>(_fileno(file)); }
+// Stat(), RmDir(), and IsDir() are not needed on Windows CE at this
+// time and thus not defined there.
+#else
+inline int FileNo(FILE* file) { return _fileno(file); }
+inline int Stat(const char* path, StatStruct* buf) { return _stat(path, buf); }
+inline int RmDir(const char* dir) { return _rmdir(dir); }
+inline bool IsDir(const StatStruct& st) {
+  return (_S_IFDIR & st.st_mode) != 0;
+}
+#endif  // GTEST_OS_WINDOWS_MOBILE
+
+#else
+
+typedef struct stat StatStruct;
+
+inline int FileNo(FILE* file) { return fileno(file); }
+inline int IsATTY(int fd) { return isatty(fd); }
+inline int Stat(const char* path, StatStruct* buf) { return stat(path, buf); }
+inline int StrCaseCmp(const char* s1, const char* s2) {
+  return strcasecmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return strdup(src); }
+inline int RmDir(const char* dir) { return rmdir(dir); }
+inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); }
+
+#endif  // GTEST_OS_WINDOWS
+
+// Functions deprecated by MSVC 8.0.
+
+#ifdef _MSC_VER
+// Temporarily disable warning 4996 (deprecated function).
+#pragma warning(push)
+#pragma warning(disable:4996)
+#endif
+
+inline const char* StrNCpy(char* dest, const char* src, size_t n) {
+  return strncpy(dest, src, n);
+}
+
+// ChDir(), FReopen(), FDOpen(), Read(), Write(), Close(), and
+// StrError() aren't needed on Windows CE at this time and thus not
+// defined there.
+
+#if !GTEST_OS_WINDOWS_MOBILE
+inline int ChDir(const char* dir) { return chdir(dir); }
+#endif
+inline FILE* FOpen(const char* path, const char* mode) {
+  return fopen(path, mode);
+}
+#if !GTEST_OS_WINDOWS_MOBILE
+inline FILE *FReopen(const char* path, const char* mode, FILE* stream) {
+  return freopen(path, mode, stream);
+}
+inline FILE* FDOpen(int fd, const char* mode) { return fdopen(fd, mode); }
+#endif
+inline int FClose(FILE* fp) { return fclose(fp); }
+#if !GTEST_OS_WINDOWS_MOBILE
+inline int Read(int fd, void* buf, unsigned int count) {
+  return static_cast<int>(read(fd, buf, count));
+}
+inline int Write(int fd, const void* buf, unsigned int count) {
+  return static_cast<int>(write(fd, buf, count));
+}
+inline int Close(int fd) { return close(fd); }
+inline const char* StrError(int errnum) { return strerror(errnum); }
+#endif
+inline const char* GetEnv(const char* name) {
+#if GTEST_OS_WINDOWS_MOBILE
+  // We are on Windows CE, which has no environment variables.
+  return NULL;
+#elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
+  // Environment variables which we programmatically clear will be set to the
+  // empty string rather than unset (NULL).  Handle that case.
+  const char* const env = getenv(name);
+  return (env != NULL && env[0] != '\0') ? env : NULL;
+#else
+  return getenv(name);
+#endif
+}
+
+#ifdef _MSC_VER
+#pragma warning(pop)  // Restores the warning state.
+#endif
+
+#if GTEST_OS_WINDOWS_MOBILE
+// Windows CE has no C library. The abort() function is used in
+// several places in Google Test. This implementation provides a reasonable
+// imitation of standard behaviour.
+void Abort();
+#else
+inline void Abort() { abort(); }
+#endif  // GTEST_OS_WINDOWS_MOBILE
+
+}  // namespace posix
+
+// The maximum number a BiggestInt can represent.  This definition
+// works no matter BiggestInt is represented in one's complement or
+// two's complement.
+//
+// We cannot rely on numeric_limits in STL, as __int64 and long long
+// are not part of standard C++ and numeric_limits doesn't need to be
+// defined for them.
+const BiggestInt kMaxBiggestInt =
+    ~(static_cast<BiggestInt>(1) << (8*sizeof(BiggestInt) - 1));
+
+// This template class serves as a compile-time function from size to
+// type.  It maps a size in bytes to a primitive type with that
+// size. e.g.
+//
+//   TypeWithSize<4>::UInt
+//
+// is typedef-ed to be unsigned int (unsigned integer made up of 4
+// bytes).
+//
+// Such functionality should belong to STL, but I cannot find it
+// there.
+//
+// Google Test uses this class in the implementation of floating-point
+// comparison.
+//
+// For now it only handles UInt (unsigned int) as that's all Google Test
+// needs.  Other types can be easily added in the future if need
+// arises.
+template <size_t size>
+class TypeWithSize {
+ public:
+  // This prevents the user from using TypeWithSize<N> with incorrect
+  // values of N.
+  typedef void UInt;
+};
+
+// The specialization for size 4.
+template <>
+class TypeWithSize<4> {
+ public:
+  // unsigned int has size 4 in both gcc and MSVC.
+  //
+  // As base/basictypes.h doesn't compile on Windows, we cannot use
+  // uint32, uint64, and etc here.
+  typedef int Int;
+  typedef unsigned int UInt;
+};
+
+// The specialization for size 8.
+template <>
+class TypeWithSize<8> {
+ public:
+#if GTEST_OS_WINDOWS
+  typedef __int64 Int;
+  typedef unsigned __int64 UInt;
+#else
+  typedef long long Int;  // NOLINT
+  typedef unsigned long long UInt;  // NOLINT
+#endif  // GTEST_OS_WINDOWS
+};
+
+// Integer types of known sizes.
+typedef TypeWithSize<4>::Int Int32;
+typedef TypeWithSize<4>::UInt UInt32;
+typedef TypeWithSize<8>::Int Int64;
+typedef TypeWithSize<8>::UInt UInt64;
+typedef TypeWithSize<8>::Int TimeInMillis;  // Represents time in milliseconds.
+
+// Utilities for command line flags and environment variables.
+
+// Macro for referencing flags.
+#define GTEST_FLAG(name) FLAGS_gtest_##name
+
+// Macros for declaring flags.
+#define GTEST_DECLARE_bool_(name) GTEST_API_ extern bool GTEST_FLAG(name)
+#define GTEST_DECLARE_int32_(name) \
+    GTEST_API_ extern ::testing::internal::Int32 GTEST_FLAG(name)
+#define GTEST_DECLARE_string_(name) \
+    GTEST_API_ extern ::testing::internal::String GTEST_FLAG(name)
+
+// Macros for defining flags.
+#define GTEST_DEFINE_bool_(name, default_val, doc) \
+    GTEST_API_ bool GTEST_FLAG(name) = (default_val)
+#define GTEST_DEFINE_int32_(name, default_val, doc) \
+    GTEST_API_ ::testing::internal::Int32 GTEST_FLAG(name) = (default_val)
+#define GTEST_DEFINE_string_(name, default_val, doc) \
+    GTEST_API_ ::testing::internal::String GTEST_FLAG(name) = (default_val)
+
+// Parses 'str' for a 32-bit signed integer.  If successful, writes the result
+// to *value and returns true; otherwise leaves *value unchanged and returns
+// false.
+// TODO(chandlerc): Find a better way to refactor flag and environment parsing
+// out of both gtest-port.cc and gtest.cc to avoid exporting this utility
+// function.
+bool ParseInt32(const Message& src_text, const char* str, Int32* value);
+
+// Parses a bool/Int32/string from the environment variable
+// corresponding to the given Google Test flag.
+bool BoolFromGTestEnv(const char* flag, bool default_val);
+GTEST_API_ Int32 Int32FromGTestEnv(const char* flag, Int32 default_val);
+const char* StringFromGTestEnv(const char* flag, const char* default_val);
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
diff --git a/final/utils/unittest/googletest/include/gtest/internal/gtest-string.h b/final/utils/unittest/googletest/include/gtest/internal/gtest-string.h
new file mode 100644
index 00000000000..aff093dec8d
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/internal/gtest-string.h
@@ -0,0 +1,350 @@
+// Copyright 2005, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Authors: wan@google.com (Zhanyong Wan), eefacm@gmail.com (Sean Mcafee)
+//
+// The Google C++ Testing Framework (Google Test)
+//
+// This header file declares the String class and functions used internally by
+// Google Test.  They are subject to change without notice. They should not used
+// by code external to Google Test.
+//
+// This header file is #included by <gtest/internal/gtest-internal.h>.
+// It should not be #included by other files.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
+
+#ifdef __BORLANDC__
+// string.h is not guaranteed to provide strcpy on C++ Builder.
+#include <mem.h>
+#endif
+
+#include <string.h>
+#include <gtest/internal/gtest-port.h>
+
+#include <string>
+
+namespace testing {
+namespace internal {
+
+// String - a UTF-8 string class.
+//
+// For historic reasons, we don't use std::string.
+//
+// TODO(wan@google.com): replace this class with std::string or
+// implement it in terms of the latter.
+//
+// Note that String can represent both NULL and the empty string,
+// while std::string cannot represent NULL.
+//
+// NULL and the empty string are considered different.  NULL is less
+// than anything (including the empty string) except itself.
+//
+// This class only provides minimum functionality necessary for
+// implementing Google Test.  We do not intend to implement a full-fledged
+// string class here.
+//
+// Since the purpose of this class is to provide a substitute for
+// std::string on platforms where it cannot be used, we define a copy
+// constructor and assignment operators such that we don't need
+// conditional compilation in a lot of places.
+//
+// In order to make the representation efficient, the d'tor of String
+// is not virtual.  Therefore DO NOT INHERIT FROM String.
+class GTEST_API_ String {
+ public:
+  // Static utility methods
+
+  // Returns the input enclosed in double quotes if it's not NULL;
+  // otherwise returns "(null)".  For example, "\"Hello\"" is returned
+  // for input "Hello".
+  //
+  // This is useful for printing a C string in the syntax of a literal.
+  //
+  // Known issue: escape sequences are not handled yet.
+  static String ShowCStringQuoted(const char* c_str);
+
+  // Clones a 0-terminated C string, allocating memory using new.  The
+  // caller is responsible for deleting the return value using
+  // delete[].  Returns the cloned string, or NULL if the input is
+  // NULL.
+  //
+  // This is different from strdup() in string.h, which allocates
+  // memory using malloc().
+  static const char* CloneCString(const char* c_str);
+
+#if GTEST_OS_WINDOWS_MOBILE
+  // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be
+  // able to pass strings to Win32 APIs on CE we need to convert them
+  // to 'Unicode', UTF-16.
+
+  // Creates a UTF-16 wide string from the given ANSI string, allocating
+  // memory using new. The caller is responsible for deleting the return
+  // value using delete[]. Returns the wide string, or NULL if the
+  // input is NULL.
+  //
+  // The wide string is created using the ANSI codepage (CP_ACP) to
+  // match the behaviour of the ANSI versions of Win32 calls and the
+  // C runtime.
+  static LPCWSTR AnsiToUtf16(const char* c_str);
+
+  // Creates an ANSI string from the given wide string, allocating
+  // memory using new. The caller is responsible for deleting the return
+  // value using delete[]. Returns the ANSI string, or NULL if the
+  // input is NULL.
+  //
+  // The returned string is created using the ANSI codepage (CP_ACP) to
+  // match the behaviour of the ANSI versions of Win32 calls and the
+  // C runtime.
+  static const char* Utf16ToAnsi(LPCWSTR utf16_str);
+#endif
+
+  // Compares two C strings.  Returns true iff they have the same content.
+  //
+  // Unlike strcmp(), this function can handle NULL argument(s).  A
+  // NULL C string is considered different to any non-NULL C string,
+  // including the empty string.
+  static bool CStringEquals(const char* lhs, const char* rhs);
+
+  // Converts a wide C string to a String using the UTF-8 encoding.
+  // NULL will be converted to "(null)".  If an error occurred during
+  // the conversion, "(failed to convert from wide string)" is
+  // returned.
+  static String ShowWideCString(const wchar_t* wide_c_str);
+
+  // Similar to ShowWideCString(), except that this function encloses
+  // the converted string in double quotes.
+  static String ShowWideCStringQuoted(const wchar_t* wide_c_str);
+
+  // Compares two wide C strings.  Returns true iff they have the same
+  // content.
+  //
+  // Unlike wcscmp(), this function can handle NULL argument(s).  A
+  // NULL C string is considered different to any non-NULL C string,
+  // including the empty string.
+  static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs);
+
+  // Compares two C strings, ignoring case.  Returns true iff they
+  // have the same content.
+  //
+  // Unlike strcasecmp(), this function can handle NULL argument(s).
+  // A NULL C string is considered different to any non-NULL C string,
+  // including the empty string.
+  static bool CaseInsensitiveCStringEquals(const char* lhs,
+                                           const char* rhs);
+
+  // Compares two wide C strings, ignoring case.  Returns true iff they
+  // have the same content.
+  //
+  // Unlike wcscasecmp(), this function can handle NULL argument(s).
+  // A NULL C string is considered different to any non-NULL wide C string,
+  // including the empty string.
+  // NB: The implementations on different platforms slightly differ.
+  // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
+  // environment variable. On GNU platform this method uses wcscasecmp
+  // which compares according to LC_CTYPE category of the current locale.
+  // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
+  // current locale.
+  static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
+                                               const wchar_t* rhs);
+
+  // Formats a list of arguments to a String, using the same format
+  // spec string as for printf.
+  //
+  // We do not use the StringPrintf class as it is not universally
+  // available.
+  //
+  // The result is limited to 4096 characters (including the tailing
+  // 0).  If 4096 characters are not enough to format the input,
+  // "<buffer exceeded>" is returned.
+  static String Format(const char* format, ...);
+
+  // C'tors
+
+  // The default c'tor constructs a NULL string.
+  String() : c_str_(NULL), length_(0) {}
+
+  // Constructs a String by cloning a 0-terminated C string.
+  String(const char* a_c_str) {  // NOLINT
+    if (a_c_str == NULL) {
+      c_str_ = NULL;
+      length_ = 0;
+    } else {
+      ConstructNonNull(a_c_str, strlen(a_c_str));
+    }
+  }
+
+  // Constructs a String by copying a given number of chars from a
+  // buffer.  E.g. String("hello", 3) creates the string "hel",
+  // String("a\0bcd", 4) creates "a\0bc", String(NULL, 0) creates "",
+  // and String(NULL, 1) results in access violation.
+  String(const char* buffer, size_t a_length) {
+    ConstructNonNull(buffer, a_length);
+  }
+
+  // The copy c'tor creates a new copy of the string.  The two
+  // String objects do not share content.
+  String(const String& str) : c_str_(NULL), length_(0) { *this = str; }
+
+  // D'tor.  String is intended to be a final class, so the d'tor
+  // doesn't need to be virtual.
+  ~String() { delete[] c_str_; }
+
+  // Allows a String to be implicitly converted to an ::std::string or
+  // ::string, and vice versa.  Converting a String containing a NULL
+  // pointer to ::std::string or ::string is undefined behavior.
+  // Converting a ::std::string or ::string containing an embedded NUL
+  // character to a String will result in the prefix up to the first
+  // NUL character.
+  String(const ::std::string& str) {
+    ConstructNonNull(str.c_str(), str.length());
+  }
+
+  operator ::std::string() const { return ::std::string(c_str(), length()); }
+
+#if GTEST_HAS_GLOBAL_STRING
+  String(const ::string& str) {
+    ConstructNonNull(str.c_str(), str.length());
+  }
+
+  operator ::string() const { return ::string(c_str(), length()); }
+#endif  // GTEST_HAS_GLOBAL_STRING
+
+  // Returns true iff this is an empty string (i.e. "").
+  bool empty() const { return (c_str() != NULL) && (length() == 0); }
+
+  // Compares this with another String.
+  // Returns < 0 if this is less than rhs, 0 if this is equal to rhs, or > 0
+  // if this is greater than rhs.
+  int Compare(const String& rhs) const;
+
+  // Returns true iff this String equals the given C string.  A NULL
+  // string and a non-NULL string are considered not equal.
+  bool operator==(const char* a_c_str) const { return Compare(a_c_str) == 0; }
+
+  // Returns true iff this String is less than the given String.  A
+  // NULL string is considered less than "".
+  bool operator<(const String& rhs) const { return Compare(rhs) < 0; }
+
+  // Returns true iff this String doesn't equal the given C string.  A NULL
+  // string and a non-NULL string are considered not equal.
+  bool operator!=(const char* a_c_str) const { return !(*this == a_c_str); }
+
+  // Returns true iff this String ends with the given suffix.  *Any*
+  // String is considered to end with a NULL or empty suffix.
+  bool EndsWith(const char* suffix) const;
+
+  // Returns true iff this String ends with the given suffix, not considering
+  // case. Any String is considered to end with a NULL or empty suffix.
+  bool EndsWithCaseInsensitive(const char* suffix) const;
+
+  // Returns the length of the encapsulated string, or 0 if the
+  // string is NULL.
+  size_t length() const { return length_; }
+
+  // Gets the 0-terminated C string this String object represents.
+  // The String object still owns the string.  Therefore the caller
+  // should NOT delete the return value.
+  const char* c_str() const { return c_str_; }
+
+  // Assigns a C string to this object.  Self-assignment works.
+  const String& operator=(const char* a_c_str) {
+    return *this = String(a_c_str);
+  }
+
+  // Assigns a String object to this object.  Self-assignment works.
+  const String& operator=(const String& rhs) {
+    if (this != &rhs) {
+      delete[] c_str_;
+      if (rhs.c_str() == NULL) {
+        c_str_ = NULL;
+        length_ = 0;
+      } else {
+        ConstructNonNull(rhs.c_str(), rhs.length());
+      }
+    }
+
+    return *this;
+  }
+
+ private:
+  // Constructs a non-NULL String from the given content.  This
+  // function can only be called when data_ has not been allocated.
+  // ConstructNonNull(NULL, 0) results in an empty string ("").
+  // ConstructNonNull(NULL, non_zero) is undefined behavior.
+  void ConstructNonNull(const char* buffer, size_t a_length) {
+    char* const str = new char[a_length + 1];
+    memcpy(str, buffer, a_length);
+    str[a_length] = '\0';
+    c_str_ = str;
+    length_ = a_length;
+  }
+
+  const char* c_str_;
+  size_t length_;
+};  // class String
+
+// Streams a String to an ostream.  Each '\0' character in the String
+// is replaced with "\\0".
+inline ::std::ostream& operator<<(::std::ostream& os, const String& str) {
+  if (str.c_str() == NULL) {
+    os << "(null)";
+  } else {
+    const char* const c_str = str.c_str();
+    for (size_t i = 0; i != str.length(); i++) {
+      if (c_str[i] == '\0') {
+        os << "\\0";
+      } else {
+        os << c_str[i];
+      }
+    }
+  }
+  return os;
+}
+
+// Gets the content of the StrStream's buffer as a String.  Each '\0'
+// character in the buffer is replaced with "\\0".
+GTEST_API_ String StrStreamToString(StrStream* stream);
+
+// Converts a streamable value to a String.  A NULL pointer is
+// converted to "(null)".  When the input value is a ::string,
+// ::std::string, ::wstring, or ::std::wstring object, each NUL
+// character in it is replaced with "\\0".
+
+// Declared here but defined in gtest.h, so that it has access
+// to the definition of the Message class, required by the ARM
+// compiler.
+template <typename T>
+String StreamableToString(const T& streamable);
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_
diff --git a/final/utils/unittest/googletest/include/gtest/internal/gtest-tuple.h b/final/utils/unittest/googletest/include/gtest/internal/gtest-tuple.h
new file mode 100644
index 00000000000..16178fc07af
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/internal/gtest-tuple.h
@@ -0,0 +1,968 @@
+// This file was GENERATED by a script.  DO NOT EDIT BY HAND!!!
+
+// Copyright 2009 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+// Implements a subset of TR1 tuple needed by Google Test and Google Mock.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
+
+#include <utility>  // For ::std::pair.
+
+// The compiler used in Symbian has a bug that prevents us from declaring the
+// tuple template as a friend (it complains that tuple is redefined).  This
+// hack bypasses the bug by declaring the members that should otherwise be
+// private as public.
+// Sun Studio versions < 12 also have the above bug.
+#if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590)
+#define GTEST_DECLARE_TUPLE_AS_FRIEND_ public:
+#else
+#define GTEST_DECLARE_TUPLE_AS_FRIEND_ \
+    template <GTEST_10_TYPENAMES_(U)> friend class tuple; \
+   private:
+#endif
+
+// GTEST_n_TUPLE_(T) is the type of an n-tuple.
+#define GTEST_0_TUPLE_(T) tuple<>
+#define GTEST_1_TUPLE_(T) tuple<T##0, void, void, void, void, void, void, \
+    void, void, void>
+#define GTEST_2_TUPLE_(T) tuple<T##0, T##1, void, void, void, void, void, \
+    void, void, void>
+#define GTEST_3_TUPLE_(T) tuple<T##0, T##1, T##2, void, void, void, void, \
+    void, void, void>
+#define GTEST_4_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, void, void, void, \
+    void, void, void>
+#define GTEST_5_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, void, void, \
+    void, void, void>
+#define GTEST_6_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, void, \
+    void, void, void>
+#define GTEST_7_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
+    void, void, void>
+#define GTEST_8_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
+    T##7, void, void>
+#define GTEST_9_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
+    T##7, T##8, void>
+#define GTEST_10_TUPLE_(T) tuple<T##0, T##1, T##2, T##3, T##4, T##5, T##6, \
+    T##7, T##8, T##9>
+
+// GTEST_n_TYPENAMES_(T) declares a list of n typenames.
+#define GTEST_0_TYPENAMES_(T)
+#define GTEST_1_TYPENAMES_(T) typename T##0
+#define GTEST_2_TYPENAMES_(T) typename T##0, typename T##1
+#define GTEST_3_TYPENAMES_(T) typename T##0, typename T##1, typename T##2
+#define GTEST_4_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+    typename T##3
+#define GTEST_5_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+    typename T##3, typename T##4
+#define GTEST_6_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+    typename T##3, typename T##4, typename T##5
+#define GTEST_7_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+    typename T##3, typename T##4, typename T##5, typename T##6
+#define GTEST_8_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+    typename T##3, typename T##4, typename T##5, typename T##6, typename T##7
+#define GTEST_9_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+    typename T##3, typename T##4, typename T##5, typename T##6, \
+    typename T##7, typename T##8
+#define GTEST_10_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \
+    typename T##3, typename T##4, typename T##5, typename T##6, \
+    typename T##7, typename T##8, typename T##9
+
+// In theory, defining stuff in the ::std namespace is undefined
+// behavior.  We can do this as we are playing the role of a standard
+// library vendor.
+namespace std {
+namespace tr1 {
+
+template <typename T0 = void, typename T1 = void, typename T2 = void,
+    typename T3 = void, typename T4 = void, typename T5 = void,
+    typename T6 = void, typename T7 = void, typename T8 = void,
+    typename T9 = void>
+class tuple;
+
+// Anything in namespace gtest_internal is Google Test's INTERNAL
+// IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code.
+namespace gtest_internal {
+
+// ByRef<T>::type is T if T is a reference; otherwise it's const T&.
+template <typename T>
+struct ByRef { typedef const T& type; };  // NOLINT
+template <typename T>
+struct ByRef<T&> { typedef T& type; };  // NOLINT
+
+// A handy wrapper for ByRef.
+#define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef<T>::type
+
+// AddRef<T>::type is T if T is a reference; otherwise it's T&.  This
+// is the same as tr1::add_reference<T>::type.
+template <typename T>
+struct AddRef { typedef T& type; };  // NOLINT
+template <typename T>
+struct AddRef<T&> { typedef T& type; };  // NOLINT
+
+// A handy wrapper for AddRef.
+#define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef<T>::type
+
+// A helper for implementing get<k>().
+template <int k> class Get;
+
+// A helper for implementing tuple_element<k, T>.  kIndexValid is true
+// iff k < the number of fields in tuple type T.
+template <bool kIndexValid, int kIndex, class Tuple>
+struct TupleElement;
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 0, GTEST_10_TUPLE_(T)> { typedef T0 type; };
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 1, GTEST_10_TUPLE_(T)> { typedef T1 type; };
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 2, GTEST_10_TUPLE_(T)> { typedef T2 type; };
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 3, GTEST_10_TUPLE_(T)> { typedef T3 type; };
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 4, GTEST_10_TUPLE_(T)> { typedef T4 type; };
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 5, GTEST_10_TUPLE_(T)> { typedef T5 type; };
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 6, GTEST_10_TUPLE_(T)> { typedef T6 type; };
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 7, GTEST_10_TUPLE_(T)> { typedef T7 type; };
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 8, GTEST_10_TUPLE_(T)> { typedef T8 type; };
+
+template <GTEST_10_TYPENAMES_(T)>
+struct TupleElement<true, 9, GTEST_10_TUPLE_(T)> { typedef T9 type; };
+
+}  // namespace gtest_internal
+
+template <>
+class tuple<> {
+ public:
+  tuple() {}
+  tuple(const tuple& /* t */)  {}
+  tuple& operator=(const tuple& /* t */) { return *this; }
+};
+
+template <GTEST_1_TYPENAMES_(T)>
+class GTEST_1_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0) : f0_(f0) {}
+
+  tuple(const tuple& t) : f0_(t.f0_) {}
+
+  template <GTEST_1_TYPENAMES_(U)>
+  tuple(const GTEST_1_TUPLE_(U)& t) : f0_(t.f0_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_1_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_1_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_1_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_1_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    return *this;
+  }
+
+  T0 f0_;
+};
+
+template <GTEST_2_TYPENAMES_(T)>
+class GTEST_2_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1) : f0_(f0),
+      f1_(f1) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_) {}
+
+  template <GTEST_2_TYPENAMES_(U)>
+  tuple(const GTEST_2_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_) {}
+  template <typename U0, typename U1>
+  tuple(const ::std::pair<U0, U1>& p) : f0_(p.first), f1_(p.second) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_2_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_2_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+  template <typename U0, typename U1>
+  tuple& operator=(const ::std::pair<U0, U1>& p) {
+    f0_ = p.first;
+    f1_ = p.second;
+    return *this;
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_2_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_2_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+};
+
+template <GTEST_3_TYPENAMES_(T)>
+class GTEST_3_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_(), f2_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+      GTEST_BY_REF_(T2) f2) : f0_(f0), f1_(f1), f2_(f2) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {}
+
+  template <GTEST_3_TYPENAMES_(U)>
+  tuple(const GTEST_3_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_3_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_3_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_3_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_3_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    f2_ = t.f2_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+  T2 f2_;
+};
+
+template <GTEST_4_TYPENAMES_(T)>
+class GTEST_4_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_(), f2_(), f3_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3) : f0_(f0), f1_(f1), f2_(f2),
+      f3_(f3) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_) {}
+
+  template <GTEST_4_TYPENAMES_(U)>
+  tuple(const GTEST_4_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+      f3_(t.f3_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_4_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_4_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_4_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_4_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    f2_ = t.f2_;
+    f3_ = t.f3_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+  T2 f2_;
+  T3 f3_;
+};
+
+template <GTEST_5_TYPENAMES_(T)>
+class GTEST_5_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_(), f2_(), f3_(), f4_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3,
+      GTEST_BY_REF_(T4) f4) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+      f4_(t.f4_) {}
+
+  template <GTEST_5_TYPENAMES_(U)>
+  tuple(const GTEST_5_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+      f3_(t.f3_), f4_(t.f4_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_5_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_5_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_5_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_5_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    f2_ = t.f2_;
+    f3_ = t.f3_;
+    f4_ = t.f4_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+  T2 f2_;
+  T3 f3_;
+  T4 f4_;
+};
+
+template <GTEST_6_TYPENAMES_(T)>
+class GTEST_6_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
+      GTEST_BY_REF_(T5) f5) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
+      f5_(f5) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+      f4_(t.f4_), f5_(t.f5_) {}
+
+  template <GTEST_6_TYPENAMES_(U)>
+  tuple(const GTEST_6_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_6_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_6_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_6_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_6_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    f2_ = t.f2_;
+    f3_ = t.f3_;
+    f4_ = t.f4_;
+    f5_ = t.f5_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+  T2 f2_;
+  T3 f3_;
+  T4 f4_;
+  T5 f5_;
+};
+
+template <GTEST_7_TYPENAMES_(T)>
+class GTEST_7_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
+      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6) : f0_(f0), f1_(f1), f2_(f2),
+      f3_(f3), f4_(f4), f5_(f5), f6_(f6) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {}
+
+  template <GTEST_7_TYPENAMES_(U)>
+  tuple(const GTEST_7_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_7_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_7_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_7_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_7_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    f2_ = t.f2_;
+    f3_ = t.f3_;
+    f4_ = t.f4_;
+    f5_ = t.f5_;
+    f6_ = t.f6_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+  T2 f2_;
+  T3 f3_;
+  T4 f4_;
+  T5 f5_;
+  T6 f6_;
+};
+
+template <GTEST_8_TYPENAMES_(T)>
+class GTEST_8_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
+      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6,
+      GTEST_BY_REF_(T7) f7) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
+      f5_(f5), f6_(f6), f7_(f7) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {}
+
+  template <GTEST_8_TYPENAMES_(U)>
+  tuple(const GTEST_8_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_8_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_8_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_8_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_8_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    f2_ = t.f2_;
+    f3_ = t.f3_;
+    f4_ = t.f4_;
+    f5_ = t.f5_;
+    f6_ = t.f6_;
+    f7_ = t.f7_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+  T2 f2_;
+  T3 f3_;
+  T4 f4_;
+  T5 f5_;
+  T6 f6_;
+  T7 f7_;
+};
+
+template <GTEST_9_TYPENAMES_(T)>
+class GTEST_9_TUPLE_(T) {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
+      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7,
+      GTEST_BY_REF_(T8) f8) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4),
+      f5_(f5), f6_(f6), f7_(f7), f8_(f8) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {}
+
+  template <GTEST_9_TYPENAMES_(U)>
+  tuple(const GTEST_9_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_9_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_9_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_9_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_9_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    f2_ = t.f2_;
+    f3_ = t.f3_;
+    f4_ = t.f4_;
+    f5_ = t.f5_;
+    f6_ = t.f6_;
+    f7_ = t.f7_;
+    f8_ = t.f8_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+  T2 f2_;
+  T3 f3_;
+  T4 f4_;
+  T5 f5_;
+  T6 f6_;
+  T7 f7_;
+  T8 f8_;
+};
+
+template <GTEST_10_TYPENAMES_(T)>
+class tuple {
+ public:
+  template <int k> friend class gtest_internal::Get;
+
+  tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_(),
+      f9_() {}
+
+  explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1,
+      GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4,
+      GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7,
+      GTEST_BY_REF_(T8) f8, GTEST_BY_REF_(T9) f9) : f0_(f0), f1_(f1), f2_(f2),
+      f3_(f3), f4_(f4), f5_(f5), f6_(f6), f7_(f7), f8_(f8), f9_(f9) {}
+
+  tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_),
+      f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_), f9_(t.f9_) {}
+
+  template <GTEST_10_TYPENAMES_(U)>
+  tuple(const GTEST_10_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_),
+      f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_),
+      f9_(t.f9_) {}
+
+  tuple& operator=(const tuple& t) { return CopyFrom(t); }
+
+  template <GTEST_10_TYPENAMES_(U)>
+  tuple& operator=(const GTEST_10_TUPLE_(U)& t) {
+    return CopyFrom(t);
+  }
+
+  GTEST_DECLARE_TUPLE_AS_FRIEND_
+
+  template <GTEST_10_TYPENAMES_(U)>
+  tuple& CopyFrom(const GTEST_10_TUPLE_(U)& t) {
+    f0_ = t.f0_;
+    f1_ = t.f1_;
+    f2_ = t.f2_;
+    f3_ = t.f3_;
+    f4_ = t.f4_;
+    f5_ = t.f5_;
+    f6_ = t.f6_;
+    f7_ = t.f7_;
+    f8_ = t.f8_;
+    f9_ = t.f9_;
+    return *this;
+  }
+
+  T0 f0_;
+  T1 f1_;
+  T2 f2_;
+  T3 f3_;
+  T4 f4_;
+  T5 f5_;
+  T6 f6_;
+  T7 f7_;
+  T8 f8_;
+  T9 f9_;
+};
+
+// 6.1.3.2 Tuple creation functions.
+
+// Known limitations: we don't support passing an
+// std::tr1::reference_wrapper<T> to make_tuple().  And we don't
+// implement tie().
+
+inline tuple<> make_tuple() { return tuple<>(); }
+
+template <GTEST_1_TYPENAMES_(T)>
+inline GTEST_1_TUPLE_(T) make_tuple(const T0& f0) {
+  return GTEST_1_TUPLE_(T)(f0);
+}
+
+template <GTEST_2_TYPENAMES_(T)>
+inline GTEST_2_TUPLE_(T) make_tuple(const T0& f0, const T1& f1) {
+  return GTEST_2_TUPLE_(T)(f0, f1);
+}
+
+template <GTEST_3_TYPENAMES_(T)>
+inline GTEST_3_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2) {
+  return GTEST_3_TUPLE_(T)(f0, f1, f2);
+}
+
+template <GTEST_4_TYPENAMES_(T)>
+inline GTEST_4_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+    const T3& f3) {
+  return GTEST_4_TUPLE_(T)(f0, f1, f2, f3);
+}
+
+template <GTEST_5_TYPENAMES_(T)>
+inline GTEST_5_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+    const T3& f3, const T4& f4) {
+  return GTEST_5_TUPLE_(T)(f0, f1, f2, f3, f4);
+}
+
+template <GTEST_6_TYPENAMES_(T)>
+inline GTEST_6_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+    const T3& f3, const T4& f4, const T5& f5) {
+  return GTEST_6_TUPLE_(T)(f0, f1, f2, f3, f4, f5);
+}
+
+template <GTEST_7_TYPENAMES_(T)>
+inline GTEST_7_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+    const T3& f3, const T4& f4, const T5& f5, const T6& f6) {
+  return GTEST_7_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6);
+}
+
+template <GTEST_8_TYPENAMES_(T)>
+inline GTEST_8_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7) {
+  return GTEST_8_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7);
+}
+
+template <GTEST_9_TYPENAMES_(T)>
+inline GTEST_9_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7,
+    const T8& f8) {
+  return GTEST_9_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8);
+}
+
+template <GTEST_10_TYPENAMES_(T)>
+inline GTEST_10_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2,
+    const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7,
+    const T8& f8, const T9& f9) {
+  return GTEST_10_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9);
+}
+
+// 6.1.3.3 Tuple helper classes.
+
+template <typename Tuple> struct tuple_size;
+
+template <GTEST_0_TYPENAMES_(T)>
+struct tuple_size<GTEST_0_TUPLE_(T)> { static const int value = 0; };
+
+template <GTEST_1_TYPENAMES_(T)>
+struct tuple_size<GTEST_1_TUPLE_(T)> { static const int value = 1; };
+
+template <GTEST_2_TYPENAMES_(T)>
+struct tuple_size<GTEST_2_TUPLE_(T)> { static const int value = 2; };
+
+template <GTEST_3_TYPENAMES_(T)>
+struct tuple_size<GTEST_3_TUPLE_(T)> { static const int value = 3; };
+
+template <GTEST_4_TYPENAMES_(T)>
+struct tuple_size<GTEST_4_TUPLE_(T)> { static const int value = 4; };
+
+template <GTEST_5_TYPENAMES_(T)>
+struct tuple_size<GTEST_5_TUPLE_(T)> { static const int value = 5; };
+
+template <GTEST_6_TYPENAMES_(T)>
+struct tuple_size<GTEST_6_TUPLE_(T)> { static const int value = 6; };
+
+template <GTEST_7_TYPENAMES_(T)>
+struct tuple_size<GTEST_7_TUPLE_(T)> { static const int value = 7; };
+
+template <GTEST_8_TYPENAMES_(T)>
+struct tuple_size<GTEST_8_TUPLE_(T)> { static const int value = 8; };
+
+template <GTEST_9_TYPENAMES_(T)>
+struct tuple_size<GTEST_9_TUPLE_(T)> { static const int value = 9; };
+
+template <GTEST_10_TYPENAMES_(T)>
+struct tuple_size<GTEST_10_TUPLE_(T)> { static const int value = 10; };
+
+template <int k, class Tuple>
+struct tuple_element {
+  typedef typename gtest_internal::TupleElement<
+      k < (tuple_size<Tuple>::value), k, Tuple>::type type;
+};
+
+#define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element<k, Tuple >::type
+
+// 6.1.3.4 Element access.
+
+namespace gtest_internal {
+
+template <>
+class Get<0> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple))
+  Field(Tuple& t) { return t.f0_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple))
+  ConstField(const Tuple& t) { return t.f0_; }
+};
+
+template <>
+class Get<1> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple))
+  Field(Tuple& t) { return t.f1_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple))
+  ConstField(const Tuple& t) { return t.f1_; }
+};
+
+template <>
+class Get<2> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple))
+  Field(Tuple& t) { return t.f2_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple))
+  ConstField(const Tuple& t) { return t.f2_; }
+};
+
+template <>
+class Get<3> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple))
+  Field(Tuple& t) { return t.f3_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple))
+  ConstField(const Tuple& t) { return t.f3_; }
+};
+
+template <>
+class Get<4> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple))
+  Field(Tuple& t) { return t.f4_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple))
+  ConstField(const Tuple& t) { return t.f4_; }
+};
+
+template <>
+class Get<5> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple))
+  Field(Tuple& t) { return t.f5_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple))
+  ConstField(const Tuple& t) { return t.f5_; }
+};
+
+template <>
+class Get<6> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple))
+  Field(Tuple& t) { return t.f6_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple))
+  ConstField(const Tuple& t) { return t.f6_; }
+};
+
+template <>
+class Get<7> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple))
+  Field(Tuple& t) { return t.f7_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple))
+  ConstField(const Tuple& t) { return t.f7_; }
+};
+
+template <>
+class Get<8> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple))
+  Field(Tuple& t) { return t.f8_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple))
+  ConstField(const Tuple& t) { return t.f8_; }
+};
+
+template <>
+class Get<9> {
+ public:
+  template <class Tuple>
+  static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple))
+  Field(Tuple& t) { return t.f9_; }  // NOLINT
+
+  template <class Tuple>
+  static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple))
+  ConstField(const Tuple& t) { return t.f9_; }
+};
+
+}  // namespace gtest_internal
+
+template <int k, GTEST_10_TYPENAMES_(T)>
+GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T)))
+get(GTEST_10_TUPLE_(T)& t) {
+  return gtest_internal::Get<k>::Field(t);
+}
+
+template <int k, GTEST_10_TYPENAMES_(T)>
+GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k,  GTEST_10_TUPLE_(T)))
+get(const GTEST_10_TUPLE_(T)& t) {
+  return gtest_internal::Get<k>::ConstField(t);
+}
+
+// 6.1.3.5 Relational operators
+
+// We only implement == and !=, as we don't have a need for the rest yet.
+
+namespace gtest_internal {
+
+// SameSizeTuplePrefixComparator<k, k>::Eq(t1, t2) returns true if the
+// first k fields of t1 equals the first k fields of t2.
+// SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if
+// k1 != k2.
+template <int kSize1, int kSize2>
+struct SameSizeTuplePrefixComparator;
+
+template <>
+struct SameSizeTuplePrefixComparator<0, 0> {
+  template <class Tuple1, class Tuple2>
+  static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) {
+    return true;
+  }
+};
+
+template <int k>
+struct SameSizeTuplePrefixComparator<k, k> {
+  template <class Tuple1, class Tuple2>
+  static bool Eq(const Tuple1& t1, const Tuple2& t2) {
+    return SameSizeTuplePrefixComparator<k - 1, k - 1>::Eq(t1, t2) &&
+        ::std::tr1::get<k - 1>(t1) == ::std::tr1::get<k - 1>(t2);
+  }
+};
+
+}  // namespace gtest_internal
+
+template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)>
+inline bool operator==(const GTEST_10_TUPLE_(T)& t,
+                       const GTEST_10_TUPLE_(U)& u) {
+  return gtest_internal::SameSizeTuplePrefixComparator<
+      tuple_size<GTEST_10_TUPLE_(T)>::value,
+      tuple_size<GTEST_10_TUPLE_(U)>::value>::Eq(t, u);
+}
+
+template <GTEST_10_TYPENAMES_(T), GTEST_10_TYPENAMES_(U)>
+inline bool operator!=(const GTEST_10_TUPLE_(T)& t,
+                       const GTEST_10_TUPLE_(U)& u) { return !(t == u); }
+
+// 6.1.4 Pairs.
+// Unimplemented.
+
+}  // namespace tr1
+}  // namespace std
+
+#undef GTEST_0_TUPLE_
+#undef GTEST_1_TUPLE_
+#undef GTEST_2_TUPLE_
+#undef GTEST_3_TUPLE_
+#undef GTEST_4_TUPLE_
+#undef GTEST_5_TUPLE_
+#undef GTEST_6_TUPLE_
+#undef GTEST_7_TUPLE_
+#undef GTEST_8_TUPLE_
+#undef GTEST_9_TUPLE_
+#undef GTEST_10_TUPLE_
+
+#undef GTEST_0_TYPENAMES_
+#undef GTEST_1_TYPENAMES_
+#undef GTEST_2_TYPENAMES_
+#undef GTEST_3_TYPENAMES_
+#undef GTEST_4_TYPENAMES_
+#undef GTEST_5_TYPENAMES_
+#undef GTEST_6_TYPENAMES_
+#undef GTEST_7_TYPENAMES_
+#undef GTEST_8_TYPENAMES_
+#undef GTEST_9_TYPENAMES_
+#undef GTEST_10_TYPENAMES_
+
+#undef GTEST_DECLARE_TUPLE_AS_FRIEND_
+#undef GTEST_BY_REF_
+#undef GTEST_ADD_REF_
+#undef GTEST_TUPLE_ELEMENT_
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_
diff --git a/final/utils/unittest/googletest/include/gtest/internal/gtest-type-util.h b/final/utils/unittest/googletest/include/gtest/internal/gtest-type-util.h
new file mode 100644
index 00000000000..093eee6f019
--- /dev/null
+++ b/final/utils/unittest/googletest/include/gtest/internal/gtest-type-util.h
@@ -0,0 +1,3321 @@
+// This file was GENERATED by command:
+//     pump.py gtest-type-util.h.pump
+// DO NOT EDIT BY HAND!!!
+
+// Copyright 2008 Google Inc.
+// All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: wan@google.com (Zhanyong Wan)
+
+// Type utilities needed for implementing typed and type-parameterized
+// tests.  This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
+//
+// Currently we support at most 50 types in a list, and at most 50
+// type-parameterized tests in one type-parameterized test case.
+// Please contact googletestframework@googlegroups.com if you need
+// more.
+
+#ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+#define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
+
+#include <gtest/internal/gtest-port.h>
+#include <gtest/internal/gtest-string.h>
+
+#if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+
+// #ifdef __GNUC__ is too general here.  It is possible to use gcc without using
+// libstdc++ (which is where cxxabi.h comes from).
+#ifdef __GLIBCXX__
+#include <cxxabi.h>
+#endif  // __GLIBCXX__
+
+namespace testing {
+namespace internal {
+
+// AssertyTypeEq<T1, T2>::type is defined iff T1 and T2 are the same
+// type.  This can be used as a compile-time assertion to ensure that
+// two types are equal.
+
+template <typename T1, typename T2>
+struct AssertTypeEq;
+
+template <typename T>
+struct AssertTypeEq<T, T> {
+  typedef bool type;
+};
+
+// GetTypeName<T>() returns a human-readable name of type T.
+template <typename T>
+String GetTypeName() {
+#if GTEST_HAS_RTTI
+
+  const char* const name = typeid(T).name();
+#ifdef __GLIBCXX__
+  int status = 0;
+  // gcc's implementation of typeid(T).name() mangles the type name,
+  // so we have to demangle it.
+  char* const readable_name = abi::__cxa_demangle(name, 0, 0, &status);
+  const String name_str(status == 0 ? readable_name : name);
+  free(readable_name);
+  return name_str;
+#else
+  return name;
+#endif  // __GLIBCXX__
+
+#else
+  return "<type>";
+#endif  // GTEST_HAS_RTTI
+}
+
+// A unique type used as the default value for the arguments of class
+// template Types.  This allows us to simulate variadic templates
+// (e.g. Types<int>, Type<int, double>, and etc), which C++ doesn't
+// support directly.
+struct None {};
+
+// The following family of struct and struct templates are used to
+// represent type lists.  In particular, TypesN<T1, T2, ..., TN>
+// represents a type list with N types (T1, T2, ..., and TN) in it.
+// Except for Types0, every struct in the family has two member types:
+// Head for the first type in the list, and Tail for the rest of the
+// list.
+
+// The empty type list.
+struct Types0 {};
+
+// Type lists of length 1, 2, 3, and so on.
+
+template <typename T1>
+struct Types1 {
+  typedef T1 Head;
+  typedef Types0 Tail;
+};
+template <typename T1, typename T2>
+struct Types2 {
+  typedef T1 Head;
+  typedef Types1<T2> Tail;
+};
+
+template <typename T1, typename T2, typename T3>
+struct Types3 {
+  typedef T1 Head;
+  typedef Types2<T2, T3> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4>
+struct Types4 {
+  typedef T1 Head;
+  typedef Types3<T2, T3, T4> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+struct Types5 {
+  typedef T1 Head;
+  typedef Types4<T2, T3, T4, T5> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6>
+struct Types6 {
+  typedef T1 Head;
+  typedef Types5<T2, T3, T4, T5, T6> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7>
+struct Types7 {
+  typedef T1 Head;
+  typedef Types6<T2, T3, T4, T5, T6, T7> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8>
+struct Types8 {
+  typedef T1 Head;
+  typedef Types7<T2, T3, T4, T5, T6, T7, T8> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9>
+struct Types9 {
+  typedef T1 Head;
+  typedef Types8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10>
+struct Types10 {
+  typedef T1 Head;
+  typedef Types9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11>
+struct Types11 {
+  typedef T1 Head;
+  typedef Types10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12>
+struct Types12 {
+  typedef T1 Head;
+  typedef Types11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13>
+struct Types13 {
+  typedef T1 Head;
+  typedef Types12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14>
+struct Types14 {
+  typedef T1 Head;
+  typedef Types13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15>
+struct Types15 {
+  typedef T1 Head;
+  typedef Types14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16>
+struct Types16 {
+  typedef T1 Head;
+  typedef Types15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17>
+struct Types17 {
+  typedef T1 Head;
+  typedef Types16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18>
+struct Types18 {
+  typedef T1 Head;
+  typedef Types17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19>
+struct Types19 {
+  typedef T1 Head;
+  typedef Types18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20>
+struct Types20 {
+  typedef T1 Head;
+  typedef Types19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21>
+struct Types21 {
+  typedef T1 Head;
+  typedef Types20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22>
+struct Types22 {
+  typedef T1 Head;
+  typedef Types21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23>
+struct Types23 {
+  typedef T1 Head;
+  typedef Types22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24>
+struct Types24 {
+  typedef T1 Head;
+  typedef Types23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25>
+struct Types25 {
+  typedef T1 Head;
+  typedef Types24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26>
+struct Types26 {
+  typedef T1 Head;
+  typedef Types25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27>
+struct Types27 {
+  typedef T1 Head;
+  typedef Types26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28>
+struct Types28 {
+  typedef T1 Head;
+  typedef Types27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29>
+struct Types29 {
+  typedef T1 Head;
+  typedef Types28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30>
+struct Types30 {
+  typedef T1 Head;
+  typedef Types29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31>
+struct Types31 {
+  typedef T1 Head;
+  typedef Types30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32>
+struct Types32 {
+  typedef T1 Head;
+  typedef Types31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33>
+struct Types33 {
+  typedef T1 Head;
+  typedef Types32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34>
+struct Types34 {
+  typedef T1 Head;
+  typedef Types33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35>
+struct Types35 {
+  typedef T1 Head;
+  typedef Types34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36>
+struct Types36 {
+  typedef T1 Head;
+  typedef Types35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37>
+struct Types37 {
+  typedef T1 Head;
+  typedef Types36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38>
+struct Types38 {
+  typedef T1 Head;
+  typedef Types37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39>
+struct Types39 {
+  typedef T1 Head;
+  typedef Types38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40>
+struct Types40 {
+  typedef T1 Head;
+  typedef Types39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41>
+struct Types41 {
+  typedef T1 Head;
+  typedef Types40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42>
+struct Types42 {
+  typedef T1 Head;
+  typedef Types41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43>
+struct Types43 {
+  typedef T1 Head;
+  typedef Types42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44>
+struct Types44 {
+  typedef T1 Head;
+  typedef Types43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+      T44> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45>
+struct Types45 {
+  typedef T1 Head;
+  typedef Types44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+      T44, T45> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46>
+struct Types46 {
+  typedef T1 Head;
+  typedef Types45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+      T44, T45, T46> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47>
+struct Types47 {
+  typedef T1 Head;
+  typedef Types46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+      T44, T45, T46, T47> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48>
+struct Types48 {
+  typedef T1 Head;
+  typedef Types47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+      T44, T45, T46, T47, T48> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48, typename T49>
+struct Types49 {
+  typedef T1 Head;
+  typedef Types48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+      T44, T45, T46, T47, T48, T49> Tail;
+};
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48, typename T49, typename T50>
+struct Types50 {
+  typedef T1 Head;
+  typedef Types49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+      T44, T45, T46, T47, T48, T49, T50> Tail;
+};
+
+
+}  // namespace internal
+
+// We don't want to require the users to write TypesN<...> directly,
+// as that would require them to count the length.  Types<...> is much
+// easier to write, but generates horrible messages when there is a
+// compiler error, as gcc insists on printing out each template
+// argument, even if it has the default value (this means Types<int>
+// will appear as Types<int, None, None, ..., None> in the compiler
+// errors).
+//
+// Our solution is to combine the best part of the two approaches: a
+// user would write Types<T1, ..., TN>, and Google Test will translate
+// that to TypesN<T1, ..., TN> internally to make error messages
+// readable.  The translation is done by the 'type' member of the
+// Types template.
+template <typename T1 = internal::None, typename T2 = internal::None,
+    typename T3 = internal::None, typename T4 = internal::None,
+    typename T5 = internal::None, typename T6 = internal::None,
+    typename T7 = internal::None, typename T8 = internal::None,
+    typename T9 = internal::None, typename T10 = internal::None,
+    typename T11 = internal::None, typename T12 = internal::None,
+    typename T13 = internal::None, typename T14 = internal::None,
+    typename T15 = internal::None, typename T16 = internal::None,
+    typename T17 = internal::None, typename T18 = internal::None,
+    typename T19 = internal::None, typename T20 = internal::None,
+    typename T21 = internal::None, typename T22 = internal::None,
+    typename T23 = internal::None, typename T24 = internal::None,
+    typename T25 = internal::None, typename T26 = internal::None,
+    typename T27 = internal::None, typename T28 = internal::None,
+    typename T29 = internal::None, typename T30 = internal::None,
+    typename T31 = internal::None, typename T32 = internal::None,
+    typename T33 = internal::None, typename T34 = internal::None,
+    typename T35 = internal::None, typename T36 = internal::None,
+    typename T37 = internal::None, typename T38 = internal::None,
+    typename T39 = internal::None, typename T40 = internal::None,
+    typename T41 = internal::None, typename T42 = internal::None,
+    typename T43 = internal::None, typename T44 = internal::None,
+    typename T45 = internal::None, typename T46 = internal::None,
+    typename T47 = internal::None, typename T48 = internal::None,
+    typename T49 = internal::None, typename T50 = internal::None>
+struct Types {
+  typedef internal::Types50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
+};
+
+template <>
+struct Types<internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types0 type;
+};
+template <typename T1>
+struct Types<T1, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types1<T1> type;
+};
+template <typename T1, typename T2>
+struct Types<T1, T2, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types2<T1, T2> type;
+};
+template <typename T1, typename T2, typename T3>
+struct Types<T1, T2, T3, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types3<T1, T2, T3> type;
+};
+template <typename T1, typename T2, typename T3, typename T4>
+struct Types<T1, T2, T3, T4, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types4<T1, T2, T3, T4> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5>
+struct Types<T1, T2, T3, T4, T5, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types5<T1, T2, T3, T4, T5> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6>
+struct Types<T1, T2, T3, T4, T5, T6, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types6<T1, T2, T3, T4, T5, T6> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7>
+struct Types<T1, T2, T3, T4, T5, T6, T7, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types7<T1, T2, T3, T4, T5, T6, T7> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types8<T1, T2, T3, T4, T5, T6, T7, T8> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
+      T12> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
+      T26> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
+      T40> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, internal::None,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None, internal::None> {
+  typedef internal::Types43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None, internal::None> {
+  typedef internal::Types44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43, T44> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
+    internal::None, internal::None, internal::None, internal::None,
+    internal::None> {
+  typedef internal::Types45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43, T44, T45> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
+    T46, internal::None, internal::None, internal::None, internal::None> {
+  typedef internal::Types46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43, T44, T45, T46> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
+    T46, T47, internal::None, internal::None, internal::None> {
+  typedef internal::Types47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43, T44, T45, T46, T47> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
+    T46, T47, T48, internal::None, internal::None> {
+  typedef internal::Types48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43, T44, T45, T46, T47, T48> type;
+};
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48, typename T49>
+struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
+    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
+    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
+    T46, T47, T48, T49, internal::None> {
+  typedef internal::Types49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43, T44, T45, T46, T47, T48, T49> type;
+};
+
+namespace internal {
+
+#define GTEST_TEMPLATE_ template <typename T> class
+
+// The template "selector" struct TemplateSel<Tmpl> is used to
+// represent Tmpl, which must be a class template with one type
+// parameter, as a type.  TemplateSel<Tmpl>::Bind<T>::type is defined
+// as the type Tmpl<T>.  This allows us to actually instantiate the
+// template "selected" by TemplateSel<Tmpl>.
+//
+// This trick is necessary for simulating typedef for class templates,
+// which C++ doesn't support directly.
+template <GTEST_TEMPLATE_ Tmpl>
+struct TemplateSel {
+  template <typename T>
+  struct Bind {
+    typedef Tmpl<T> type;
+  };
+};
+
+#define GTEST_BIND_(TmplSel, T) \
+  TmplSel::template Bind<T>::type
+
+// A unique struct template used as the default value for the
+// arguments of class template Templates.  This allows us to simulate
+// variadic templates (e.g. Templates<int>, Templates<int, double>,
+// and etc), which C++ doesn't support directly.
+template <typename T>
+struct NoneT {};
+
+// The following family of struct and struct templates are used to
+// represent template lists.  In particular, TemplatesN<T1, T2, ...,
+// TN> represents a list of N templates (T1, T2, ..., and TN).  Except
+// for Templates0, every struct in the family has two member types:
+// Head for the selector of the first template in the list, and Tail
+// for the rest of the list.
+
+// The empty template list.
+struct Templates0 {};
+
+// Template lists of length 1, 2, 3, and so on.
+
+template <GTEST_TEMPLATE_ T1>
+struct Templates1 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates0 Tail;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
+struct Templates2 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates1<T2> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
+struct Templates3 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates2<T2, T3> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4>
+struct Templates4 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates3<T2, T3, T4> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
+struct Templates5 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates4<T2, T3, T4, T5> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
+struct Templates6 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates5<T2, T3, T4, T5, T6> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7>
+struct Templates7 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates6<T2, T3, T4, T5, T6, T7> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
+struct Templates8 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates7<T2, T3, T4, T5, T6, T7, T8> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
+struct Templates9 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10>
+struct Templates10 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
+struct Templates11 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
+struct Templates12 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13>
+struct Templates13 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
+struct Templates14 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
+struct Templates15 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16>
+struct Templates16 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
+struct Templates17 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
+struct Templates18 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19>
+struct Templates19 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
+struct Templates20 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
+struct Templates21 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22>
+struct Templates22 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
+struct Templates23 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
+struct Templates24 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25>
+struct Templates25 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
+struct Templates26 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
+struct Templates27 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28>
+struct Templates28 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
+struct Templates29 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
+struct Templates30 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31>
+struct Templates31 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
+struct Templates32 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
+struct Templates33 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34>
+struct Templates34 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
+struct Templates35 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
+struct Templates36 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37>
+struct Templates37 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
+struct Templates38 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
+struct Templates39 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40>
+struct Templates40 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
+struct Templates41 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
+struct Templates42 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43>
+struct Templates43 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
+struct Templates44 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43, T44> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
+struct Templates45 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43, T44, T45> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46>
+struct Templates46 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43, T44, T45, T46> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
+struct Templates47 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43, T44, T45, T46, T47> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
+struct Templates48 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43, T44, T45, T46, T47, T48> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
+    GTEST_TEMPLATE_ T49>
+struct Templates49 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43, T44, T45, T46, T47, T48, T49> Tail;
+};
+
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
+    GTEST_TEMPLATE_ T49, GTEST_TEMPLATE_ T50>
+struct Templates50 {
+  typedef TemplateSel<T1> Head;
+  typedef Templates49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
+      T43, T44, T45, T46, T47, T48, T49, T50> Tail;
+};
+
+
+// We don't want to require the users to write TemplatesN<...> directly,
+// as that would require them to count the length.  Templates<...> is much
+// easier to write, but generates horrible messages when there is a
+// compiler error, as gcc insists on printing out each template
+// argument, even if it has the default value (this means Templates<list>
+// will appear as Templates<list, NoneT, NoneT, ..., NoneT> in the compiler
+// errors).
+//
+// Our solution is to combine the best part of the two approaches: a
+// user would write Templates<T1, ..., TN>, and Google Test will translate
+// that to TemplatesN<T1, ..., TN> internally to make error messages
+// readable.  The translation is done by the 'type' member of the
+// Templates template.
+template <GTEST_TEMPLATE_ T1 = NoneT, GTEST_TEMPLATE_ T2 = NoneT,
+    GTEST_TEMPLATE_ T3 = NoneT, GTEST_TEMPLATE_ T4 = NoneT,
+    GTEST_TEMPLATE_ T5 = NoneT, GTEST_TEMPLATE_ T6 = NoneT,
+    GTEST_TEMPLATE_ T7 = NoneT, GTEST_TEMPLATE_ T8 = NoneT,
+    GTEST_TEMPLATE_ T9 = NoneT, GTEST_TEMPLATE_ T10 = NoneT,
+    GTEST_TEMPLATE_ T11 = NoneT, GTEST_TEMPLATE_ T12 = NoneT,
+    GTEST_TEMPLATE_ T13 = NoneT, GTEST_TEMPLATE_ T14 = NoneT,
+    GTEST_TEMPLATE_ T15 = NoneT, GTEST_TEMPLATE_ T16 = NoneT,
+    GTEST_TEMPLATE_ T17 = NoneT, GTEST_TEMPLATE_ T18 = NoneT,
+    GTEST_TEMPLATE_ T19 = NoneT, GTEST_TEMPLATE_ T20 = NoneT,
+    GTEST_TEMPLATE_ T21 = NoneT, GTEST_TEMPLATE_ T22 = NoneT,
+    GTEST_TEMPLATE_ T23 = NoneT, GTEST_TEMPLATE_ T24 = NoneT,
+    GTEST_TEMPLATE_ T25 = NoneT, GTEST_TEMPLATE_ T26 = NoneT,
+    GTEST_TEMPLATE_ T27 = NoneT, GTEST_TEMPLATE_ T28 = NoneT,
+    GTEST_TEMPLATE_ T29 = NoneT, GTEST_TEMPLATE_ T30 = NoneT,
+    GTEST_TEMPLATE_ T31 = NoneT, GTEST_TEMPLATE_ T32 = NoneT,
+    GTEST_TEMPLATE_ T33 = NoneT, GTEST_TEMPLATE_ T34 = NoneT,
+    GTEST_TEMPLATE_ T35 = NoneT, GTEST_TEMPLATE_ T36 = NoneT,
+    GTEST_TEMPLATE_ T37 = NoneT, GTEST_TEMPLATE_ T38 = NoneT,
+    GTEST_TEMPLATE_ T39 = NoneT, GTEST_TEMPLATE_ T40 = NoneT,
+    GTEST_TEMPLATE_ T41 = NoneT, GTEST_TEMPLATE_ T42 = NoneT,
+    GTEST_TEMPLATE_ T43 = NoneT, GTEST_TEMPLATE_ T44 = NoneT,
+    GTEST_TEMPLATE_ T45 = NoneT, GTEST_TEMPLATE_ T46 = NoneT,
+    GTEST_TEMPLATE_ T47 = NoneT, GTEST_TEMPLATE_ T48 = NoneT,
+    GTEST_TEMPLATE_ T49 = NoneT, GTEST_TEMPLATE_ T50 = NoneT>
+struct Templates {
+  typedef Templates50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
+};
+
+template <>
+struct Templates<NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT> {
+  typedef Templates0 type;
+};
+template <GTEST_TEMPLATE_ T1>
+struct Templates<T1, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT> {
+  typedef Templates1<T1> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
+struct Templates<T1, T2, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT> {
+  typedef Templates2<T1, T2> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
+struct Templates<T1, T2, T3, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates3<T1, T2, T3> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4>
+struct Templates<T1, T2, T3, T4, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates4<T1, T2, T3, T4> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
+struct Templates<T1, T2, T3, T4, T5, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates5<T1, T2, T3, T4, T5> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
+struct Templates<T1, T2, T3, T4, T5, T6, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates6<T1, T2, T3, T4, T5, T6> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates7<T1, T2, T3, T4, T5, T6, T7> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates8<T1, T2, T3, T4, T5, T6, T7, T8> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT> {
+  typedef Templates22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT> {
+  typedef Templates23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT> {
+  typedef Templates24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT> {
+  typedef Templates25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT> {
+  typedef Templates26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT> {
+  typedef Templates27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT> {
+  typedef Templates28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT> {
+  typedef Templates29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, NoneT, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, NoneT, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, NoneT, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, NoneT, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, NoneT,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42, T43> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42, T43, T44> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+    T45, NoneT, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42, T43, T44, T45> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+    T45, T46, NoneT, NoneT, NoneT, NoneT> {
+  typedef Templates46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42, T43, T44, T45, T46> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+    T45, T46, T47, NoneT, NoneT, NoneT> {
+  typedef Templates47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42, T43, T44, T45, T46, T47> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+    T45, T46, T47, T48, NoneT, NoneT> {
+  typedef Templates48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42, T43, T44, T45, T46, T47, T48> type;
+};
+template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
+    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
+    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
+    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
+    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
+    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
+    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
+    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
+    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
+    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
+    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
+    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
+    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
+    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
+    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
+    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
+    GTEST_TEMPLATE_ T49>
+struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
+    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
+    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
+    T45, T46, T47, T48, T49, NoneT> {
+  typedef Templates49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
+      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
+      T42, T43, T44, T45, T46, T47, T48, T49> type;
+};
+
+// The TypeList template makes it possible to use either a single type
+// or a Types<...> list in TYPED_TEST_CASE() and
+// INSTANTIATE_TYPED_TEST_CASE_P().
+
+template <typename T>
+struct TypeList { typedef Types1<T> type; };
+
+template <typename T1, typename T2, typename T3, typename T4, typename T5,
+    typename T6, typename T7, typename T8, typename T9, typename T10,
+    typename T11, typename T12, typename T13, typename T14, typename T15,
+    typename T16, typename T17, typename T18, typename T19, typename T20,
+    typename T21, typename T22, typename T23, typename T24, typename T25,
+    typename T26, typename T27, typename T28, typename T29, typename T30,
+    typename T31, typename T32, typename T33, typename T34, typename T35,
+    typename T36, typename T37, typename T38, typename T39, typename T40,
+    typename T41, typename T42, typename T43, typename T44, typename T45,
+    typename T46, typename T47, typename T48, typename T49, typename T50>
+struct TypeList<Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
+    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
+    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
+    T44, T45, T46, T47, T48, T49, T50> > {
+  typedef typename Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
+      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
+      T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>::type type;
+};
+
+}  // namespace internal
+}  // namespace testing
+
+#endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
+
+#endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
diff --git a/final/utils/valgrind/i386-pc-linux-gnu.supp b/final/utils/valgrind/i386-pc-linux-gnu.supp
new file mode 100644
index 00000000000..0509791582b
--- /dev/null
+++ b/final/utils/valgrind/i386-pc-linux-gnu.supp
@@ -0,0 +1,41 @@
+{
+   False leak under RegisterPass
+   Memcheck:Leak
+   ...
+   fun:_ZN83_GLOBAL_*PassRegistrar12RegisterPassERKN4llvm8PassInfoE
+   fun:_ZN4llvm8PassInfo12registerPassEv
+}
+
+# Python false positives according to
+# http://svn.python.org/projects/python/trunk/Misc/README.valgrind
+
+{
+   ADDRESS_IN_RANGE/Invalid read of size 4
+   Memcheck:Addr4
+   obj:/usr/bin/python*
+}
+
+{
+   ADDRESS_IN_RANGE/Invalid read of size 4
+   Memcheck:Value4
+   obj:/usr/bin/python*
+}
+
+{
+   ADDRESS_IN_RANGE/Conditional jump or move depends on uninitialised value
+   Memcheck:Cond
+   obj:/usr/bin/python*
+}
+
+{
+   We don't care if as leaks
+   Memcheck:Leak
+   obj:/usr/bin/as
+}
+
+{
+   We don't care if python leaks
+   Memcheck:Leak
+   fun:malloc
+   obj:/usr/bin/python*
+}
diff --git a/final/utils/valgrind/x86_64-pc-linux-gnu.supp b/final/utils/valgrind/x86_64-pc-linux-gnu.supp
new file mode 100644
index 00000000000..7b2dd4517da
--- /dev/null
+++ b/final/utils/valgrind/x86_64-pc-linux-gnu.supp
@@ -0,0 +1,46 @@
+{
+   False leak under RegisterPass
+   Memcheck:Leak
+   ...
+   fun:_ZN4llvm12PassRegistry12registerPassERKNS_8PassInfoE
+}
+
+# Python false positives according to
+# http://svn.python.org/projects/python/trunk/Misc/README.valgrind
+
+{
+   ADDRESS_IN_RANGE/Invalid read of size 4
+   Memcheck:Addr4
+   obj:/usr/bin/python*
+}
+
+{
+   ADDRESS_IN_RANGE/Invalid read of size 4
+   Memcheck:Value8
+   obj:/usr/bin/python*
+}
+
+{
+   ADDRESS_IN_RANGE/Conditional jump or move depends on uninitialised value
+   Memcheck:Cond
+   obj:/usr/bin/python*
+}
+
+{
+   We don't care if as leaks
+   Memcheck:Leak
+   obj:/usr/bin/as
+}
+
+{
+   We don't care if grep leaks
+   Memcheck:Leak
+   obj:/bin/grep
+}
+
+{
+   We don't care if python leaks
+   Memcheck:Leak
+   fun:malloc
+   obj:/usr/bin/python*
+}
diff --git a/final/utils/vim/README b/final/utils/vim/README
new file mode 100644
index 00000000000..bca25bfe612
--- /dev/null
+++ b/final/utils/vim/README
@@ -0,0 +1,43 @@
+-*- llvm/utils/vim/README -*-
+
+These are syntax highlighting files for the VIM editor. Included are:
+
+* llvm.vim
+
+  Syntax highlighting mode for LLVM assembly files. To use, copy `llvm.vim' to
+  ~/.vim/syntax and add this code to your ~/.vimrc :
+
+  augroup filetype
+    au! BufRead,BufNewFile *.ll     set filetype=llvm
+  augroup END
+
+* tablegen.vim
+
+  Syntax highlighting mode for TableGen description files. To use, copy
+  `tablegen.vim' to ~/.vim/syntax and add this code to your ~/.vimrc :
+
+  augroup filetype
+    au! BufRead,BufNewFile *.td     set filetype=tablegen
+  augroup END
+
+
+If you prefer, instead of making copies you can make symlinks from
+~/.vim/syntax/... to the syntax files in your LLVM source tree. Apparently
+this did not work with older versions of vim however, so if this doesn't
+work you may need to make actual copies of the files.
+
+Another option, if you do not already have a ~/.vim/syntax directory, is
+to symlink ~/.vim/syntax itself to llvm/utils/vim .
+
+Note: If you notice missing or incorrect syntax highlighting, please contact
+<llvmbugs [at] cs.uiuc.edu>; if you wish to provide a patch to improve the
+functionality, it will be most appreciated. Thank you.
+
+If you find yourself working with LLVM Makefiles often, but you don't get syntax
+highlighting (because the files have names such as Makefile.rules or
+TEST.nightly.Makefile), add the following to your ~/.vimrc:
+
+  " LLVM Makefile highlighting mode
+  augroup filetype
+    au! BufRead,BufNewFile *Makefile*     set filetype=make
+  augroup END
diff --git a/final/utils/vim/llvm.vim b/final/utils/vim/llvm.vim
new file mode 100644
index 00000000000..23802f49afa
--- /dev/null
+++ b/final/utils/vim/llvm.vim
@@ -0,0 +1,110 @@
+" Vim syntax file
+" Language:   llvm
+" Maintainer: The LLVM team, http://llvm.org/
+" Version:      $Revision$
+
+if version < 600
+  syntax clear
+elseif exists("b:current_syntax")
+  finish
+endif
+
+syn case match
+
+" Types.
+" Types also include struct, array, vector, etc. but these don't
+" benefit as much from having dedicated highlighting rules.
+syn keyword llvmType void float double
+syn keyword llvmType x86_fp80 fp128 ppc_fp128
+syn keyword llvmType type label opaque
+syn match   llvmType /\<i\d\+\>/
+
+" Instructions.
+" The true and false tokens can be used for comparison opcodes, but it's
+" much more common for these tokens to be used for boolean constants.
+syn keyword llvmStatement add fadd sub fsub mul fmul
+syn keyword llvmStatement sdiv udiv fdiv srem urem frem
+syn keyword llvmStatement and or xor
+syn keyword llvmStatement icmp fcmp
+syn keyword llvmStatement eq ne ugt uge ult ule sgt sge slt sle
+syn keyword llvmStatement oeq ogt oge olt ole one ord ueq ugt uge
+syn keyword llvmStatement ult ule une uno
+syn keyword llvmStatement nuw nsw exact inbounds
+syn keyword llvmStatement phi call select shl lshr ashr va_arg
+syn keyword llvmStatement trunc zext sext
+syn keyword llvmStatement fptrunc fpext fptoui fptosi uitofp sitofp
+syn keyword llvmStatement ptrtoint inttoptr bitcast
+syn keyword llvmStatement ret br indirectbr switch invoke unwind unreachable
+syn keyword llvmStatement malloc alloca free load store getelementptr
+syn keyword llvmStatement extractelement insertelement shufflevector
+syn keyword llvmStatement extractvalue insertvalue
+
+" Keywords.
+syn keyword llvmKeyword define declare global constant
+syn keyword llvmKeyword internal external private
+syn keyword llvmKeyword linkonce linkonce_odr weak weak_odr appending
+syn keyword llvmKeyword common extern_weak
+syn keyword llvmKeyword thread_local dllimport dllexport
+syn keyword llvmKeyword hidden protected default
+syn keyword llvmKeyword except deplibs
+syn keyword llvmKeyword volatile fastcc coldcc cc ccc
+syn keyword llvmKeyword x86_stdcallcc x86_fastcallcc
+syn keyword llvmKeyword ptx_kernel ptx_device
+syn keyword llvmKeyword signext zeroext inreg sret nounwind noreturn
+syn keyword llvmKeyword nocapture byval nest readnone readonly noalias
+syn keyword llvmKeyword inlinehint noinline alwaysinline optsize ssp sspreq
+syn keyword llvmKeyword noredzone noimplicitfloat naked alignstack
+syn keyword llvmKeyword module asm align tail to
+syn keyword llvmKeyword addrspace section alias sideeffect c gc
+syn keyword llvmKeyword target datalayout triple
+syn keyword llvmKeyword blockaddress
+
+" Obsolete keywords.
+syn keyword llvmError  getresult begin end
+
+" Misc syntax.
+syn match   llvmNoName /[%@]\d\+\>/
+syn match   llvmNumber /-\?\<\d\+\>/
+syn match   llvmFloat  /-\?\<\d\+\.\d*\(e[+-]\d\+\)\?\>/
+syn match   llvmFloat  /\<0x\x\+\>/
+syn keyword llvmBoolean true false
+syn keyword llvmConstant zeroinitializer undef null
+syn match   llvmComment /;.*$/
+syn region  llvmString start=/"/ skip=/\\"/ end=/"/
+syn match   llvmLabel /[-a-zA-Z$._][-a-zA-Z$._0-9]*:/
+syn match   llvmIdentifier /[%@][-a-zA-Z$._][-a-zA-Z$._0-9]*/
+
+" Syntax-highlight dejagnu test commands.
+syn match  llvmSpecialComment /;\s*RUN:.*$/
+syn match  llvmSpecialComment /;\s*PR\d*\s*$/
+syn match  llvmSpecialComment /;\s*END\.\s*$/
+syn match  llvmSpecialComment /;\s*XFAIL:.*$/
+syn match  llvmSpecialComment /;\s*XTARGET:.*$/
+
+if version >= 508 || !exists("did_c_syn_inits")
+  if version < 508
+    let did_c_syn_inits = 1
+    command -nargs=+ HiLink hi link <args>
+  else
+    command -nargs=+ HiLink hi def link <args>
+  endif
+
+  HiLink llvmType Type
+  HiLink llvmStatement Statement
+  HiLink llvmNumber Number
+  HiLink llvmComment Comment
+  HiLink llvmString String
+  HiLink llvmLabel Label
+  HiLink llvmKeyword Keyword
+  HiLink llvmBoolean Boolean
+  HiLink llvmFloat Float
+  HiLink llvmNoName Identifier
+  HiLink llvmConstant Constant
+  HiLink llvmSpecialComment SpecialComment
+  HiLink llvmError Error
+  HiLink llvmIdentifier Identifier
+
+  delcommand HiLink
+endif
+
+let b:current_syntax = "llvm"
diff --git a/final/utils/vim/tablegen.vim b/final/utils/vim/tablegen.vim
new file mode 100644
index 00000000000..30434899bc0
--- /dev/null
+++ b/final/utils/vim/tablegen.vim
@@ -0,0 +1,54 @@
+" Vim syntax file
+" Language:   TableGen
+" Maintainer: The LLVM team, http://llvm.org/
+" Version:    $Revision$
+
+if version < 600
+  syntax clear
+elseif exists("b:current_syntax")
+  finish
+endif
+
+" May be changed if you have a really slow machine
+syntax sync minlines=100
+
+syn case match
+
+syn keyword tgKeyword   def let in code dag field include defm
+syn keyword tgType      class int string list bit bits multiclass
+
+syn match   tgNumber    /\<\d\+\>/
+syn match   tgNumber    /\<\d\+\.\d*\>/
+syn match   tgNumber    /\<0b[01]\+\>/
+syn match   tgNumber    /\<0x[0-9a-fA-F]\+\>/
+syn region  tgString    start=/"/ skip=/\\"/ end=/"/    oneline
+
+syn region  tgCode      start=/\[{/ end=/}\]/
+
+syn keyword tgTodo             contained TODO FIXME
+syn match   tgComment   /\/\/.*$/         contains=tgTodo
+" Handle correctly imbricated comment
+syn region  tgComment2 matchgroup=tgComment2  start=+/\*+ end=+\*/+ contains=tgTodo,tgComment2
+
+if version >= 508 || !exists("did_c_syn_inits")
+  if version < 508
+    let did_c_syn_inits = 1
+    command -nargs=+ HiLink hi link <args>
+  else
+    command -nargs=+ HiLink hi def link <args>
+  endif
+
+  HiLink tgKeyword Statement
+  HiLink tgType Type
+  HiLink tgNumber Number
+  HiLink tgComment Comment
+  HiLink tgComment2 Comment
+  HiLink tgString String
+  " May find a better Hilight group...
+  HiLink tgCode Special
+  HiLink tgTodo Todo
+
+  delcommand HiLink
+endif
+
+let b:current_syntax = "tablegen"
diff --git a/final/utils/vim/vimrc b/final/utils/vim/vimrc
new file mode 100644
index 00000000000..3ef54c08cfe
--- /dev/null
+++ b/final/utils/vim/vimrc
@@ -0,0 +1,221 @@
+" LLVM coding guidelines conformance for VIM
+" $Revision$
+"
+" Maintainer: The LLVM Team, http://llvm.org
+" WARNING:    Read before you source in all these commands and macros!  Some
+"             of them may change VIM behavior that you depend on.
+"
+" You can run VIM with these settings without changing your current setup with:
+" $ vim -u /path/to/llvm/utils/vim/vimrc
+
+" It's VIM, not VI
+set nocompatible
+
+" A tab produces a 2-space indentation
+set softtabstop=2
+set shiftwidth=2
+set expandtab
+
+" Highlight trailing whitespace and lines longer than 80 columns.
+highlight LongLine ctermbg=DarkYellow guibg=DarkYellow
+highlight WhitespaceEOL ctermbg=DarkYellow guibg=DarkYellow
+if v:version >= 702
+  " Lines longer than 80 columns.
+  au BufWinEnter * let w:m0=matchadd('LongLine', '\%>80v.\+', -1)
+
+  " Whitespace at the end of a line. This little dance suppresses
+  " whitespace that has just been typed.
+  au BufWinEnter * let w:m1=matchadd('WhitespaceEOL', '\s\+$', -1)
+  au InsertEnter * call matchdelete(w:m1)
+  au InsertEnter * let w:m2=matchadd('WhitespaceEOL', '\s\+\%#\@<!$', -1)
+  au InsertLeave * call matchdelete(w:m2)
+  au InsertLeave * let w:m1=matchadd('WhitespaceEOL', '\s\+$', -1)
+else
+  au BufRead,BufNewFile * syntax match LongLine /\%>80v.\+/
+  au InsertEnter * syntax match WhitespaceEOL /\s\+\%#\@<!$/
+  au InsertLeave * syntax match WhitespaceEOL /\s\+$/
+endif
+
+" Enable filetype detection
+filetype on
+
+" Optional
+" C/C++ programming helpers
+augroup csrc
+  au!
+  autocmd FileType *      set nocindent smartindent
+  autocmd FileType c,cpp  set cindent
+augroup END
+" Set a few indentation parameters. See the VIM help for cinoptions-values for
+" details.  These aren't absolute rules; they're just an approximation of
+" common style in LLVM source.
+set cinoptions=:0,g0,(0,Ws,l1
+" Add and delete spaces in increments of `shiftwidth' for tabs
+set smarttab
+
+" Highlight syntax in programming languages
+syntax on
+
+" LLVM Makefiles can have names such as Makefile.rules or TEST.nightly.Makefile,
+" so it's important to categorize them as such.
+augroup filetype
+  au! BufRead,BufNewFile *Makefile* set filetype=make
+augroup END
+
+" In Makefiles, don't expand tabs to spaces, since we need the actual tabs
+autocmd FileType make set noexpandtab
+
+" Useful macros for cleaning up code to conform to LLVM coding guidelines
+
+" Delete trailing whitespace and tabs at the end of each line
+command! DeleteTrailingWs :%s/\s\+$//
+
+" Convert all tab characters to two spaces
+command! Untab :%s/\t/  /g
+
+" Enable syntax highlighting for LLVM files. To use, copy
+" utils/vim/llvm.vim to ~/.vim/syntax .
+augroup filetype
+  au! BufRead,BufNewFile *.ll     set filetype=llvm
+augroup END
+
+" Enable syntax highlighting for tablegen files. To use, copy
+" utils/vim/tablegen.vim to ~/.vim/syntax .
+augroup filetype
+  au! BufRead,BufNewFile *.td     set filetype=tablegen
+augroup END
+
+" Additional vim features to optionally uncomment.
+"set showcmd
+"set showmatch
+"set showmode
+"set incsearch
+"set ruler
+
+" Clang code-completion support. This is somewhat experimental!
+
+" A path to a clang executable.
+let g:clang_path = "clang++"
+
+" A list of options to add to the clang commandline, for example to add
+" include paths, predefined macros, and language options.
+let g:clang_opts = [
+  \ "-x","c++",
+  \ "-D__STDC_LIMIT_MACROS=1","-D__STDC_CONSTANT_MACROS=1",
+  \ "-Iinclude" ]
+
+function! ClangComplete(findstart, base)
+   if a:findstart == 1
+      " In findstart mode, look for the beginning of the current identifier.
+      let l:line = getline('.')
+      let l:start = col('.') - 1
+      while l:start > 0 && l:line[l:start - 1] =~ '\i'
+         let l:start -= 1
+      endwhile
+      return l:start
+   endif
+
+   " Get the current line and column numbers.
+   let l:l = line('.')
+   let l:c = col('.')
+
+   " Build a clang commandline to do code completion on stdin.
+   let l:the_command = shellescape(g:clang_path) .
+                     \ " -cc1 -code-completion-at=-:" . l:l . ":" . l:c
+   for l:opt in g:clang_opts
+      let l:the_command .= " " . shellescape(l:opt)
+   endfor
+
+   " Copy the contents of the current buffer into a string for stdin.
+   " TODO: The extra space at the end is for working around clang's
+   " apparent inability to do code completion at the very end of the
+   " input.
+   " TODO: Is it better to feed clang the entire file instead of truncating
+   " it at the current line?
+   let l:process_input = join(getline(1, l:l), "\n") . " "
+
+   " Run it!
+   let l:input_lines = split(system(l:the_command, l:process_input), "\n")
+
+   " Parse the output.
+   for l:input_line in l:input_lines
+      " Vim's substring operator is annoyingly inconsistent with python's.
+      if l:input_line[:11] == 'COMPLETION: '
+         let l:value = l:input_line[12:]
+
+        " Chop off anything after " : ", if present, and move it to the menu.
+        let l:menu = ""
+        let l:spacecolonspace = stridx(l:value, " : ")
+        if l:spacecolonspace != -1
+           let l:menu = l:value[l:spacecolonspace+3:]
+           let l:value = l:value[:l:spacecolonspace-1]
+        endif
+
+        " Chop off " (Hidden)", if present, and move it to the menu.
+        let l:hidden = stridx(l:value, " (Hidden)")
+        if l:hidden != -1
+           let l:menu .= " (Hidden)"
+           let l:value = l:value[:l:hidden-1]
+        endif
+
+        " Handle "Pattern". TODO: Make clang less weird.
+        if l:value == "Pattern"
+           let l:value = l:menu
+           let l:pound = stridx(l:value, "#")
+           " Truncate the at the first [#, <#, or {#.
+           if l:pound != -1
+              let l:value = l:value[:l:pound-2]
+           endif
+        endif
+
+         " Filter out results which don't match the base string.
+         if a:base != ""
+            if l:value[:strlen(a:base)-1] != a:base
+               continue
+            end
+         endif
+
+        " TODO: Don't dump the raw input into info, though it's nice for now.
+        " TODO: The kind string?
+        let l:item = {
+          \ "word": l:value,
+          \ "menu": l:menu,
+          \ "info": l:input_line,
+          \ "dup": 1 }
+
+        " Report a result.
+        if complete_add(l:item) == 0
+           return []
+        endif
+        if complete_check()
+           return []
+        endif
+
+      elseif l:input_line[:9] == "OVERLOAD: "
+         " An overload candidate. Use a crazy hack to get vim to
+         " display the results. TODO: Make this better.
+         let l:value = l:input_line[10:]
+         let l:item = {
+           \ "word": " ",
+           \ "menu": l:value,
+           \ "info": l:input_line,
+           \ "dup": 1}
+
+        " Report a result.
+        if complete_add(l:item) == 0
+           return []
+        endif
+        if complete_check()
+           return []
+        endif
+
+      endif
+   endfor
+
+
+   return []
+endfunction ClangComplete
+
+" This to enables the somewhat-experimental clang-based
+" autocompletion support.
+set omnifunc=ClangComplete
diff --git a/final/utils/webNLT.pl b/final/utils/webNLT.pl
new file mode 100755
index 00000000000..fb29fd292e2
--- /dev/null
+++ b/final/utils/webNLT.pl
@@ -0,0 +1,83 @@
+#!/usr/bin/perl
+
+use DBI;
+use CGI;
+
+$q = new CGI;
+print $q->header();
+print $q->start_html(-title=>"Nightly Tester DB");
+
+unless($q->param('pwd'))
+  {
+    print $q->startform();
+    print $q->password_field(-name=>"pwd", -size=>20, -maxlength=>20);
+    print $q->submit();
+    print $q->endform();
+  }
+else
+  {
+    # database information
+    $db="llvmalpha";
+    $host="localhost";
+    $userid="llvmdbuser";
+    $passwd=$q->param('pwd');
+    $connectionInfo="dbi:mysql:$db;$host";
+    
+    # make connection to database
+    $dbh = DBI->connect($connectionInfo,$userid,$passwd) or die DBI->errstr;
+    $query = "Select DISTINCT(NAME) from Tests";
+    my $sth = $dbh->prepare($query) || die "Can't prepare statement: $DBI::errstr";
+    my $rc = $sth->execute or die DBI->errstr;
+    while (($n) = $sth->fetchrow_array)
+      {
+        push @names, ($n);
+#        print "$n<P>";
+      }
+    $query = "Select DISTINCT(TEST) from Tests";
+    my $sth = $dbh->prepare($query) || die "Can't prepare statement: $DBI::errstr";
+    my $rc = $sth->execute or die DBI->errstr;
+    while (($n) = $sth->fetchrow_array)
+      {
+        push @tests, ($n);
+#        print "$n\n";
+      }
+
+#    print join "<BR>", @names;
+
+    print $q->startform();
+    print $q->scrolling_list(-name=>"test", -values=>\@tests, -multiple=>'true');
+    print "<P>";
+    print $q->scrolling_list(-name=>"name", -values=>\@names, -multiple=>'true');
+    print "<P>";
+    print $q->submit();
+    print $q->hidden("pwd", $q->param('pwd'));
+    print $q->endform();
+
+    # disconnect from database
+    $dbh->disconnect;
+
+    #now generate the urls to the chart
+    if ($q->param('test') && $q->param('name'))
+      {
+        my @names = $q->param('name');
+        my @tests = $q->param('test');
+        print "<P>";
+        print join "<BR>", @names;
+        print "<P>";
+        print join "<BR>", @tests;
+        print "<P>";
+        $str = "pwd=" . $q->param('pwd');
+        $count = 0;
+        foreach $n (@names)
+          {
+            foreach $t (@tests)
+              {
+                $str = "$str&t$count=$t&n$count=$n";
+                $count++;
+              }
+          }
+        print "<img src=\"cgiplotNLT.pl?$str\">";
+      }
+  }
+
+print $q->end_html();
diff --git a/final/website/index.html b/final/website/index.html
new file mode 100644
index 00000000000..c42855957e1
--- /dev/null
+++ b/final/website/index.html
@@ -0,0 +1,26 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>LLVM Core Website</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<div class="doc_title">LLVM Core Web Site</div>
+  <p>This is just a place holder for now.</p>
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss" alt="Valid CSS!"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"></a>
+
+  <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2007-07-09 01:04:31 -0700 (Mon, 09 Jul 2007) $
+</address>
+</body></html>